From 7f3b74c052f0473dab30677214e187d676876237 Mon Sep 17 00:00:00 2001
From: Frances Perry <fjp@google.com>
Date: Sat, 13 Dec 2014 10:56:14 -0800
Subject: [PATCH 0001/1541] Welcome to Dataflow!

---
 README | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 README

diff --git a/README b/README
new file mode 100644
index 0000000000000..2da8ea5ba93c4
--- /dev/null
+++ b/README
@@ -0,0 +1,19 @@
+Greetings! Welcome to the (Alpha) Google Cloud Dataflow Java SDK.
+
+Dataflow provides a simple, powerful model for building both batch and
+streaming parallel data processing Pipelines.
+
+To use the Google Cloud Dataflow SDK, you build a Pipeline which manages a
+graph of PTransforms and PCollections that the PTransforms consume and produce.
+
+You then use a PipelineRunner to specify where and how the pipeline should
+execute. Currently there are two runners:
+  1. The DirectPipelineRunner runs the pipeline on your local machine.
+  2. The [Blocking]DataflowPipelineRunner runs the pipeline on the Dataflow
+     Service using the Google Cloud Platform. The Dataflow Service is
+     currently in the Alpha phase of development and access is limited to
+     whitelisted users.
+
+For more about both the Dataflow SDK and the Dataflow Service, visit:
+  http://cloud.google.com/dataflow
+

From ec739e8bac4ac2b21fe9e3e16f9900cc0a4e26e1 Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davor@google.com>
Date: Sat, 13 Dec 2014 12:20:10 -0800
Subject: [PATCH 0002/1541] Let's get this party started.

---
 .gitignore                                    |    1 +
 LICENSE                                       |  202 ++++
 checkstyle.xml                                |  385 ++++++
 examples/pom.xml                              |  223 ++++
 .../dataflow/examples/BigQueryTornadoes.java  |  149 +++
 .../dataflow/examples/DatastoreWordCount.java |  198 ++++
 .../google/cloud/dataflow/examples/TfIdf.java |  425 +++++++
 .../examples/TopWikipediaSessions.java        |  208 ++++
 .../cloud/dataflow/examples/WordCount.java    |  174 +++
 .../examples/BigQueryTornadoesTest.java       |   80 ++
 .../cloud/dataflow/examples/TfIdfTest.java    |   63 +
 .../examples/TopWikipediaSessionsTest.java    |   62 +
 .../dataflow/examples/WordCountTest.java      |   81 ++
 pom.xml                                       |  202 ++++
 sdk/pom.xml                                   |  315 +++++
 .../google/cloud/dataflow/sdk/Pipeline.java   |  395 ++++++
 .../cloud/dataflow/sdk/PipelineResult.java    |   27 +
 .../dataflow/sdk/coders/AtomicCoder.java      |   42 +
 .../cloud/dataflow/sdk/coders/AvroCoder.java  |  202 ++++
 .../sdk/coders/BigEndianIntegerCoder.java     |   88 ++
 .../sdk/coders/BigEndianLongCoder.java        |   87 ++
 .../dataflow/sdk/coders/ByteArrayCoder.java   |  103 ++
 .../cloud/dataflow/sdk/coders/Coder.java      |  154 +++
 .../dataflow/sdk/coders/CoderException.java   |   37 +
 .../dataflow/sdk/coders/CoderRegistry.java    |  701 +++++++++++
 .../dataflow/sdk/coders/CollectionCoder.java  |   63 +
 .../dataflow/sdk/coders/CustomCoder.java      |   83 ++
 .../dataflow/sdk/coders/DefaultCoder.java     |   68 ++
 .../dataflow/sdk/coders/DoubleCoder.java      |   92 ++
 .../dataflow/sdk/coders/EntityCoder.java      |   82 ++
 .../dataflow/sdk/coders/InstantCoder.java     |   60 +
 .../dataflow/sdk/coders/IterableCoder.java    |   72 ++
 .../sdk/coders/IterableLikeCoder.java         |  227 ++++
 .../cloud/dataflow/sdk/coders/KvCoder.java    |  142 +++
 .../dataflow/sdk/coders/KvCoderBase.java      |   53 +
 .../cloud/dataflow/sdk/coders/ListCoder.java  |   70 ++
 .../cloud/dataflow/sdk/coders/MapCoder.java   |  149 +++
 .../dataflow/sdk/coders/MapCoderBase.java     |   52 +
 .../sdk/coders/SerializableCoder.java         |  126 ++
 .../cloud/dataflow/sdk/coders/SetCoder.java   |  124 ++
 .../dataflow/sdk/coders/StandardCoder.java    |  143 +++
 .../dataflow/sdk/coders/StringUtf8Coder.java  |  124 ++
 .../sdk/coders/TableRowJsonCoder.java         |   80 ++
 .../sdk/coders/TextualIntegerCoder.java       |   73 ++
 .../cloud/dataflow/sdk/coders/URICoder.java   |   77 ++
 .../dataflow/sdk/coders/VarIntCoder.java      |   90 ++
 .../dataflow/sdk/coders/VarLongCoder.java     |   90 ++
 .../cloud/dataflow/sdk/coders/VoidCoder.java  |   69 ++
 .../dataflow/sdk/coders/package-info.java     |   44 +
 .../google/cloud/dataflow/sdk/io/AvroIO.java  |  678 +++++++++++
 .../cloud/dataflow/sdk/io/BigQueryIO.java     |  937 +++++++++++++++
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |  603 ++++++++++
 .../dataflow/sdk/io/DatastoreIterator.java    |  141 +++
 .../cloud/dataflow/sdk/io/PubsubIO.java       |  331 ++++++
 .../dataflow/sdk/io/ShardNameTemplate.java    |   75 ++
 .../google/cloud/dataflow/sdk/io/TextIO.java  |  567 +++++++++
 .../cloud/dataflow/sdk/io/package-info.java   |   37 +
 .../sdk/options/ApplicationNameOptions.java   |   33 +
 .../dataflow/sdk/options/BigQueryOptions.java |   29 +
 .../BlockingDataflowPipelineOptions.java      |   46 +
 .../options/DataflowPipelineDebugOptions.java |   67 ++
 .../sdk/options/DataflowPipelineOptions.java  |  128 ++
 .../DataflowPipelineShuffleOptions.java       |   58 +
 .../DataflowPipelineWorkerPoolOptions.java    |  116 ++
 .../options/DataflowWorkerHarnessOptions.java |   35 +
 .../cloud/dataflow/sdk/options/Default.java   |  130 ++
 .../sdk/options/DefaultValueFactory.java      |   38 +
 .../dataflow/sdk/options/Description.java     |   31 +
 .../sdk/options/DirectPipelineOptions.java    |   28 +
 .../dataflow/sdk/options/GcpOptions.java      |  150 +++
 .../dataflow/sdk/options/GcsOptions.java      |   77 ++
 .../dataflow/sdk/options/PipelineOptions.java |   62 +
 .../sdk/options/PipelineOptionsFactory.java   |  862 ++++++++++++++
 .../sdk/options/PipelineOptionsValidator.java |   59 +
 .../sdk/options/ProxyInvocationHandler.java   |  390 ++++++
 .../sdk/options/StreamingOptions.java         |   38 +
 .../dataflow/sdk/options/Validation.java      |   39 +
 .../dataflow/sdk/options/package-info.java    |   25 +
 .../cloud/dataflow/sdk/package-info.java      |   35 +
 .../BlockingDataflowPipelineRunner.java       |  136 +++
 .../sdk/runners/DataflowPipeline.java         |   49 +
 .../sdk/runners/DataflowPipelineJob.java      |  169 +++
 .../sdk/runners/DataflowPipelineRunner.java   |  315 +++++
 .../runners/DataflowPipelineRunnerHooks.java  |   40 +
 .../runners/DataflowPipelineTranslator.java   |  963 +++++++++++++++
 .../dataflow/sdk/runners/DirectPipeline.java  |   50 +
 .../sdk/runners/DirectPipelineRunner.java     |  844 +++++++++++++
 .../dataflow/sdk/runners/PipelineRunner.java  |   76 ++
 .../sdk/runners/RecordingPipelineVisitor.java |   53 +
 .../sdk/runners/TransformHierarchy.java       |  111 ++
 .../sdk/runners/TransformTreeNode.java        |  237 ++++
 .../runners/dataflow/AvroIOTranslator.java    |  113 ++
 .../dataflow/BigQueryIOTranslator.java        |  200 ++++
 .../dataflow/DatastoreIOTranslator.java       |   41 +
 .../runners/dataflow/PubsubIOTranslator.java  |   91 ++
 .../runners/dataflow/TextIOTranslator.java    |  129 ++
 .../sdk/runners/dataflow/package-info.java    |   20 +
 .../dataflow/sdk/runners/package-info.java    |   33 +
 .../worker/ApplianceShuffleReader.java        |   63 +
 .../worker/ApplianceShuffleWriter.java        |   66 ++
 .../runners/worker/AssignWindowsParDoFn.java  |   86 ++
 .../sdk/runners/worker/AvroByteSink.java      |   83 ++
 .../sdk/runners/worker/AvroByteSource.java    |   95 ++
 .../dataflow/sdk/runners/worker/AvroSink.java |  140 +++
 .../sdk/runners/worker/AvroSinkFactory.java   |   61 +
 .../sdk/runners/worker/AvroSource.java        |  203 ++++
 .../sdk/runners/worker/AvroSourceFactory.java |   65 +
 .../sdk/runners/worker/BigQuerySource.java    |  114 ++
 .../runners/worker/BigQuerySourceFactory.java |   46 +
 .../worker/ByteArrayShufflePosition.java      |   95 ++
 .../worker/ChunkingShuffleBatchReader.java    |   97 ++
 .../worker/ChunkingShuffleEntryWriter.java    |   87 ++
 .../sdk/runners/worker/CombineValuesFn.java   |  219 ++++
 .../worker/CopyableSeekableByteChannel.java   |  270 +++++
 .../worker/CustomSourceFormatFactory.java     |   47 +
 .../worker/DataflowWorkProgressUpdater.java   |  121 ++
 .../sdk/runners/worker/DataflowWorker.java    |  330 ++++++
 .../runners/worker/DataflowWorkerHarness.java |  231 ++++
 .../sdk/runners/worker/FileBasedSource.java   |  259 ++++
 .../worker/GroupAlsoByWindowsParDoFn.java     |  119 ++
 .../runners/worker/GroupingShuffleSource.java |  368 ++++++
 .../worker/GroupingShuffleSourceFactory.java  |   62 +
 .../sdk/runners/worker/InMemorySource.java    |  163 +++
 .../runners/worker/InMemorySourceFactory.java |   54 +
 .../worker/LazyMultiSourceIterator.java       |   87 ++
 .../worker/MapTaskExecutorFactory.java        |  413 +++++++
 .../sdk/runners/worker/NormalParDoFn.java     |  214 ++++
 .../sdk/runners/worker/OrderedCode.java       |  678 +++++++++++
 .../sdk/runners/worker/ParDoFnFactory.java    |  115 ++
 .../worker/PartitioningShuffleSource.java     |  128 ++
 .../PartitioningShuffleSourceFactory.java     |   50 +
 .../runners/worker/ShuffleEntryWriter.java    |   39 +
 .../sdk/runners/worker/ShuffleLibrary.java    |   44 +
 .../sdk/runners/worker/ShuffleReader.java     |   48 +
 .../sdk/runners/worker/ShuffleSink.java       |  248 ++++
 .../runners/worker/ShuffleSinkFactory.java    |   55 +
 .../sdk/runners/worker/ShuffleWriter.java     |   37 +
 .../sdk/runners/worker/SideInputUtils.java    |  211 ++++
 .../sdk/runners/worker/SinkFactory.java       |   94 ++
 .../sdk/runners/worker/SourceFactory.java     |  113 ++
 .../worker/SourceOperationExecutor.java       |   72 ++
 .../SourceOperationExecutorFactory.java       |   31 +
 .../worker/SourceTranslationUtils.java        |  189 +++
 .../dataflow/sdk/runners/worker/TextSink.java |  285 +++++
 .../sdk/runners/worker/TextSinkFactory.java   |   55 +
 .../sdk/runners/worker/TextSource.java        |  383 ++++++
 .../sdk/runners/worker/TextSourceFactory.java |   74 ++
 .../worker/UngroupedShuffleSource.java        |   96 ++
 .../worker/UngroupedShuffleSourceFactory.java |   56 +
 .../DataflowWorkerLoggingFormatter.java       |   77 ++
 .../DataflowWorkerLoggingInitializer.java     |   88 ++
 .../sdk/runners/worker/package-info.java      |   24 +
 .../dataflow/sdk/testing/DataflowAssert.java  |  374 ++++++
 .../sdk/testing/RunnableOnService.java        |   29 +
 .../testing/TestDataflowPipelineOptions.java  |   26 +
 .../testing/TestDataflowPipelineRunner.java   |   45 +
 .../dataflow/sdk/testing/TestPipeline.java    |  164 +++
 .../sdk/testing/WindowingFnTestUtils.java     |  185 +++
 .../dataflow/sdk/testing/package-info.java    |   21 +
 .../dataflow/sdk/transforms/Aggregator.java   |   64 +
 .../sdk/transforms/ApproximateQuantiles.java  |  723 +++++++++++
 .../sdk/transforms/ApproximateUnique.java     |  426 +++++++
 .../dataflow/sdk/transforms/Combine.java      | 1045 ++++++++++++++++
 .../cloud/dataflow/sdk/transforms/Count.java  |  163 +++
 .../cloud/dataflow/sdk/transforms/Create.java |  314 +++++
 .../cloud/dataflow/sdk/transforms/DoFn.java   |  330 ++++++
 .../dataflow/sdk/transforms/DoFnTester.java   |  357 ++++++
 .../cloud/dataflow/sdk/transforms/First.java  |  106 ++
 .../dataflow/sdk/transforms/Flatten.java      |  206 ++++
 .../dataflow/sdk/transforms/GroupByKey.java   |  517 ++++++++
 .../cloud/dataflow/sdk/transforms/Keys.java   |   68 ++
 .../cloud/dataflow/sdk/transforms/KvSwap.java |   73 ++
 .../cloud/dataflow/sdk/transforms/Max.java    |  196 +++
 .../cloud/dataflow/sdk/transforms/Mean.java   |  143 +++
 .../cloud/dataflow/sdk/transforms/Min.java    |  196 +++
 .../dataflow/sdk/transforms/PTransform.java   |  400 +++++++
 .../cloud/dataflow/sdk/transforms/ParDo.java  | 1054 +++++++++++++++++
 .../dataflow/sdk/transforms/Partition.java    |  173 +++
 .../dataflow/sdk/transforms/RateLimiting.java |  336 ++++++
 .../sdk/transforms/RemoveDuplicates.java      |   89 ++
 .../cloud/dataflow/sdk/transforms/Sample.java |  154 +++
 .../transforms/SerializableComparator.java    |   28 +
 .../sdk/transforms/SerializableFunction.java  |   31 +
 .../cloud/dataflow/sdk/transforms/Sum.java    |  179 +++
 .../cloud/dataflow/sdk/transforms/Top.java    |  489 ++++++++
 .../cloud/dataflow/sdk/transforms/Values.java |   68 ++
 .../cloud/dataflow/sdk/transforms/View.java   |  211 ++++
 .../dataflow/sdk/transforms/WithKeys.java     |  116 ++
 .../sdk/transforms/join/CoGbkResult.java      |  367 ++++++
 .../transforms/join/CoGbkResultSchema.java    |  133 +++
 .../sdk/transforms/join/CoGroupByKey.java     |  208 ++++
 .../join/KeyedPCollectionTuple.java           |  217 ++++
 .../sdk/transforms/join/RawUnionValue.java    |   51 +
 .../sdk/transforms/join/UnionCoder.java       |  149 +++
 .../sdk/transforms/join/package-info.java     |   21 +
 .../dataflow/sdk/transforms/package-info.java |   43 +
 .../transforms/windowing/BoundedWindow.java   |   37 +
 .../transforms/windowing/CalendarWindows.java |  300 +++++
 .../transforms/windowing/FixedWindows.java    |   93 ++
 .../transforms/windowing/GlobalWindow.java    |   84 ++
 .../transforms/windowing/IntervalWindow.java  |  257 ++++
 .../windowing/InvalidWindowingFn.java         |   75 ++
 .../MergeOverlappingIntervalWindows.java      |   86 ++
 .../windowing/NonMergingWindowingFn.java      |   31 +
 .../windowing/PartitioningWindowingFn.java    |   42 +
 .../sdk/transforms/windowing/Sessions.java    |   81 ++
 .../transforms/windowing/SlidingWindows.java  |  131 ++
 .../sdk/transforms/windowing/Window.java      |  321 +++++
 .../sdk/transforms/windowing/WindowingFn.java |  117 ++
 .../dataflow/sdk/util/AbstractWindowSet.java  |  170 +++
 .../dataflow/sdk/util/AggregatorImpl.java     |  111 ++
 .../dataflow/sdk/util/ApiErrorExtractor.java  |  104 ++
 .../sdk/util/AppEngineEnvironment.java        |   61 +
 .../dataflow/sdk/util/AssignWindowsDoFn.java  |   64 +
 .../AttemptBoundedExponentialBackOff.java     |   82 ++
 .../cloud/dataflow/sdk/util/Base64Utils.java  |   30 +
 .../sdk/util/BatchModeExecutionContext.java   |  157 +++
 .../sdk/util/BigQueryTableInserter.java       |  240 ++++
 .../sdk/util/BigQueryTableRowIterator.java    |  201 ++++
 .../dataflow/sdk/util/BufferingWindowSet.java |  193 +++
 .../dataflow/sdk/util/CloudCounterUtils.java  |  104 ++
 .../dataflow/sdk/util/CloudKnownType.java     |  138 +++
 .../dataflow/sdk/util/CloudMetricUtils.java   |   73 ++
 .../cloud/dataflow/sdk/util/CloudObject.java  |  184 +++
 .../dataflow/sdk/util/CloudSourceUtils.java   |   80 ++
 .../cloud/dataflow/sdk/util/CoderUtils.java   |  202 ++++
 .../cloud/dataflow/sdk/util/Credentials.java  |  244 ++++
 .../sdk/util/DataflowReleaseInfo.java         |   87 ++
 .../sdk/util/DirectModeExecutionContext.java  |   68 ++
 .../cloud/dataflow/sdk/util/DoFnContext.java  |  193 +++
 .../dataflow/sdk/util/DoFnProcessContext.java |  136 +++
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |  147 +++
 .../dataflow/sdk/util/ExecutionContext.java   |  168 +++
 .../sdk/util/FileIOChannelFactory.java        |   91 ++
 .../dataflow/sdk/util/GCloudCredential.java   |  113 ++
 .../sdk/util/GcsIOChannelFactory.java         |   75 ++
 .../cloud/dataflow/sdk/util/GcsUtil.java      |  277 +++++
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  359 ++++++
 .../dataflow/sdk/util/IOChannelFactory.java   |   69 ++
 .../dataflow/sdk/util/IOChannelUtils.java     |  179 +++
 .../dataflow/sdk/util/InstanceBuilder.java    |  259 ++++
 .../cloud/dataflow/sdk/util/MimeTypes.java    |   23 +
 .../dataflow/sdk/util/MonitoringUtil.java     |  230 ++++
 .../dataflow/sdk/util/OutputReference.java    |   42 +
 .../cloud/dataflow/sdk/util/PTuple.java       |  152 +++
 .../cloud/dataflow/sdk/util/PackageUtil.java  |  307 +++++
 .../sdk/util/PartitionBufferingWindowSet.java |   87 ++
 .../dataflow/sdk/util/PropertyNames.java      |   87 ++
 .../sdk/util/RetryHttpRequestInitializer.java |  165 +++
 .../dataflow/sdk/util/SerializableUtils.java  |  145 +++
 .../cloud/dataflow/sdk/util/Serializer.java   |  152 +++
 .../sdk/util/ShardingWritableByteChannel.java |  118 ++
 .../util/StreamingGroupAlsoByWindowsDoFn.java |  133 +++
 .../cloud/dataflow/sdk/util/StringUtils.java  |  146 +++
 .../cloud/dataflow/sdk/util/Structs.java      |  345 ++++++
 .../dataflow/sdk/util/TestCredential.java     |   49 +
 .../cloud/dataflow/sdk/util/TimeUtil.java     |  164 +++
 .../dataflow/sdk/util/TimerOrElement.java     |  195 +++
 .../cloud/dataflow/sdk/util/Transport.java    |  141 +++
 .../dataflow/sdk/util/UserCodeException.java  |  132 +++
 .../cloud/dataflow/sdk/util/Values.java       |   88 ++
 .../cloud/dataflow/sdk/util/VarInt.java       |  115 ++
 .../cloud/dataflow/sdk/util/WindowUtils.java  |   62 +
 .../dataflow/sdk/util/WindowedValue.java      |  368 ++++++
 .../dataflow/sdk/util/common/Counter.java     |  730 ++++++++++++
 .../dataflow/sdk/util/common/CounterSet.java  |  152 +++
 .../common/ElementByteSizeObservable.java     |   41 +
 .../ElementByteSizeObservableIterable.java    |   63 +
 .../ElementByteSizeObservableIterator.java    |   36 +
 .../util/common/ElementByteSizeObserver.java  |   84 ++
 .../sdk/util/common/ForwardingReiterator.java |   83 ++
 .../dataflow/sdk/util/common/Metric.java      |   45 +
 .../sdk/util/common/PeekingReiterator.java    |   98 ++
 .../dataflow/sdk/util/common/Reiterable.java  |   27 +
 .../dataflow/sdk/util/common/Reiterator.java  |   39 +
 .../sdk/util/common/package-info.java         |   18 +
 .../worker/BatchingShuffleEntryReader.java    |  148 +++
 .../worker/CachingShuffleBatchReader.java     |  228 ++++
 .../common/worker/CustomSourceFormat.java     |   61 +
 .../util/common/worker/FlattenOperation.java  |   54 +
 .../worker/GroupingShuffleEntryIterator.java  |  216 ++++
 .../worker/KeyGroupedShuffleEntries.java      |   35 +
 .../util/common/worker/MapTaskExecutor.java   |  116 ++
 .../sdk/util/common/worker/Operation.java     |  132 +++
 .../util/common/worker/OutputReceiver.java    |  207 ++++
 .../sdk/util/common/worker/ParDoFn.java       |   28 +
 .../util/common/worker/ParDoOperation.java    |   65 +
 .../worker/PartialGroupByKeyOperation.java    |  521 ++++++++
 .../util/common/worker/ProgressTracker.java   |   38 +
 .../common/worker/ProgressTrackerGroup.java   |   71 ++
 .../worker/ProgressTrackingReiterator.java    |   57 +
 .../sdk/util/common/worker/ReadOperation.java |  233 ++++
 .../sdk/util/common/worker/Receiver.java      |   27 +
 .../common/worker/ReceivingOperation.java     |   45 +
 .../common/worker/ShuffleBatchReader.java     |   61 +
 .../sdk/util/common/worker/ShuffleEntry.java  |  110 ++
 .../common/worker/ShuffleEntryReader.java     |   50 +
 .../util/common/worker/ShufflePosition.java   |   23 +
 .../dataflow/sdk/util/common/worker/Sink.java |   47 +
 .../sdk/util/common/worker/Source.java        |  157 +++
 .../sdk/util/common/worker/StateSampler.java  |  279 +++++
 .../sdk/util/common/worker/WorkExecutor.java  |   99 ++
 .../common/worker/WorkProgressUpdater.java    |  239 ++++
 .../util/common/worker/WriteOperation.java    |  105 ++
 .../sdk/util/common/worker/package-info.java  |   18 +
 .../dataflow/sdk/util/gcsfs/GcsPath.java      |  617 ++++++++++
 .../dataflow/sdk/util/gcsfs/package-info.java |   18 +
 .../sdk/util/gcsio/ClientRequestHelper.java   |   40 +
 .../gcsio/GoogleCloudStorageExceptions.java   |   82 ++
 .../gcsio/GoogleCloudStorageReadChannel.java  |  538 +++++++++
 .../gcsio/GoogleCloudStorageWriteChannel.java |  379 ++++++
 ...gingMediaHttpUploaderProgressListener.java |   91 ++
 .../sdk/util/gcsio/StorageResourceId.java     |  165 +++
 .../cloud/dataflow/sdk/util/package-info.java |   18 +
 .../dataflow/sdk/values/CodedTupleTag.java    |   72 ++
 .../dataflow/sdk/values/CodedTupleTagMap.java |   59 +
 .../google/cloud/dataflow/sdk/values/KV.java  |  117 ++
 .../cloud/dataflow/sdk/values/PBegin.java     |   77 ++
 .../dataflow/sdk/values/PCollection.java      |  240 ++++
 .../dataflow/sdk/values/PCollectionList.java  |  227 ++++
 .../dataflow/sdk/values/PCollectionTuple.java |  252 ++++
 .../dataflow/sdk/values/PCollectionView.java  |   45 +
 .../cloud/dataflow/sdk/values/PDone.java      |   36 +
 .../cloud/dataflow/sdk/values/PInput.java     |   57 +
 .../cloud/dataflow/sdk/values/POutput.java    |   72 ++
 .../dataflow/sdk/values/POutputValueBase.java |   83 ++
 .../cloud/dataflow/sdk/values/PValue.java     |   37 +
 .../cloud/dataflow/sdk/values/PValueBase.java |  190 +++
 .../dataflow/sdk/values/TimestampedValue.java |  133 +++
 .../cloud/dataflow/sdk/values/TupleTag.java   |  170 +++
 .../dataflow/sdk/values/TupleTagList.java     |  146 +++
 .../dataflow/sdk/values/TypedPValue.java      |  168 +++
 .../dataflow/sdk/values/package-info.java     |   42 +
 .../google/cloud/dataflow/sdk/sdk.properties  |    5 +
 .../cloud/dataflow/sdk/PipelineTest.java      |  105 ++
 .../google/cloud/dataflow/sdk/TestUtils.java  |  231 ++++
 .../dataflow/sdk/coders/AvroCoderTest.java    |  189 +++
 .../sdk/coders/ByteArrayCoderTest.java        |   70 ++
 .../dataflow/sdk/coders/CoderProperties.java  |   73 ++
 .../sdk/coders/CoderRegistryTest.java         |  230 ++++
 .../dataflow/sdk/coders/CustomCoderTest.java  |   83 ++
 .../dataflow/sdk/coders/DefaultCoderTest.java |   93 ++
 .../dataflow/sdk/coders/InstantCoderTest.java |   67 ++
 .../sdk/coders/IterableCoderTest.java         |   46 +
 .../dataflow/sdk/coders/ListCoderTest.java    |   46 +
 .../dataflow/sdk/coders/MapCoderTest.java     |   49 +
 .../sdk/coders/SerializableCoderTest.java     |  182 +++
 .../dataflow/sdk/coders/URICoderTest.java     |   68 ++
 .../cloud/dataflow/sdk/io/AvroIOTest.java     |  365 ++++++
 .../cloud/dataflow/sdk/io/BigQueryIOTest.java |  307 +++++
 .../dataflow/sdk/io/DatastoreIOTest.java      |  126 ++
 .../cloud/dataflow/sdk/io/TextIOTest.java     |  413 +++++++
 .../google/cloud/dataflow/sdk/io/user.avsc    |   10 +
 .../options/DataflowPipelineOptionsTest.java  |   94 ++
 .../options/PipelineOptionsFactoryTest.java   |  502 ++++++++
 .../sdk/options/PipelineOptionsTest.java      |   45 +
 .../options/PipelineOptionsValidatorTest.java |   86 ++
 .../options/ProxyInvocationHandlerTest.java   |  625 ++++++++++
 .../BlockingDataflowPipelineRunnerTest.java   |  137 +++
 .../sdk/runners/DataflowPipelineJobTest.java  |   66 ++
 .../runners/DataflowPipelineRunnerTest.java   |  501 ++++++++
 .../DataflowPipelineTranslatorTest.java       |  582 +++++++++
 .../sdk/runners/PipelineRunnerTest.java       |   84 ++
 .../sdk/runners/TransformTreeTest.java        |  179 +++
 .../sdk/runners/worker/AvroByteSinkTest.java  |  114 ++
 .../runners/worker/AvroByteSourceTest.java    |  200 ++++
 .../runners/worker/AvroSinkFactoryTest.java   |   83 ++
 .../sdk/runners/worker/AvroSinkTest.java      |  104 ++
 .../runners/worker/AvroSourceFactoryTest.java |  115 ++
 .../sdk/runners/worker/AvroSourceTest.java    |  196 +++
 .../worker/BigQuerySourceFactoryTest.java     |   78 ++
 .../runners/worker/BigQuerySourceTest.java    |  183 +++
 .../runners/worker/CombineValuesFnTest.java   |  337 ++++++
 .../CopyableSeekableByteChannelTest.java      |  152 +++
 .../DataflowWorkProgressUpdaterTest.java      |  438 +++++++
 .../worker/DataflowWorkerHarnessTest.java     |  243 ++++
 .../runners/worker/DataflowWorkerTest.java    |   85 ++
 .../worker/GroupingShuffleSourceTest.java     |  499 ++++++++
 .../worker/InMemorySourceFactoryTest.java     |  110 ++
 .../runners/worker/InMemorySourceTest.java    |  236 ++++
 .../worker/MapTaskExecutorFactoryTest.java    |  567 +++++++++
 .../sdk/runners/worker/NormalParDoFnTest.java |  331 ++++++
 .../sdk/runners/worker/OrderedCodeTest.java   |  504 ++++++++
 .../runners/worker/ParDoFnFactoryTest.java    |  125 ++
 .../worker/PartitioningShuffleSourceTest.java |  137 +++
 .../worker/ShuffleSinkFactoryTest.java        |  187 +++
 .../sdk/runners/worker/ShuffleSinkTest.java   |  236 ++++
 .../worker/ShuffleSourceFactoryTest.java      |  230 ++++
 .../runners/worker/SideInputUtilsTest.java    |  145 +++
 .../sdk/runners/worker/SinkFactoryTest.java   |  119 ++
 .../sdk/runners/worker/SourceFactoryTest.java |  124 ++
 .../sdk/runners/worker/TestShuffleReader.java |  177 +++
 .../runners/worker/TestShuffleReaderTest.java |  139 +++
 .../sdk/runners/worker/TestShuffleWriter.java |   69 ++
 .../runners/worker/TextSinkFactoryTest.java   |   98 ++
 .../sdk/runners/worker/TextSinkTest.java      |  144 +++
 .../runners/worker/TextSourceFactoryTest.java |   98 ++
 .../sdk/runners/worker/TextSourceTest.java    |  581 +++++++++
 .../worker/UngroupedShuffleSourceTest.java    |  112 ++
 .../DataflowWorkerLoggingFormatterTest.java   |  134 +++
 .../DataflowWorkerLoggingInitializerTest.java |  109 ++
 .../dataflow/sdk/testing/ExpectedLogs.java    |  240 ++++
 .../sdk/testing/ExpectedLogsTest.java         |  102 ++
 .../sdk/testing/FastNanoClockAndSleeper.java  |   47 +
 .../testing/FastNanoClockAndSleeperTest.java  |   47 +
 .../sdk/testing/ResetDateTimeProvider.java    |   41 +
 .../testing/ResetDateTimeProviderTest.java    |   55 +
 .../RestoreMappedDiagnosticContext.java       |   47 +
 .../RestoreMappedDiagnosticContextTest.java   |   51 +
 .../sdk/testing/RestoreSystemProperties.java  |   51 +
 .../testing/RestoreSystemPropertiesTest.java  |   50 +
 .../sdk/testing/TestPipelineTest.java         |   76 ++
 .../transforms/ApproximateQuantilesTest.java  |  287 +++++
 .../sdk/transforms/ApproximateUniqueTest.java |  302 +++++
 .../dataflow/sdk/transforms/CombineTest.java  |  527 +++++++++
 .../dataflow/sdk/transforms/CountTest.java    |  112 ++
 .../dataflow/sdk/transforms/CreateTest.java   |  189 +++
 .../dataflow/sdk/transforms/FirstTest.java    |  140 +++
 .../dataflow/sdk/transforms/FlattenTest.java  |  244 ++++
 .../sdk/transforms/GroupByKeyTest.java        |  280 +++++
 .../dataflow/sdk/transforms/KeysTest.java     |  100 ++
 .../dataflow/sdk/transforms/KvSwapTest.java   |  112 ++
 .../dataflow/sdk/transforms/ParDoTest.java    |  986 +++++++++++++++
 .../sdk/transforms/PartitionTest.java         |  141 +++
 .../sdk/transforms/RateLimitingTest.java      |  225 ++++
 .../sdk/transforms/RemoveDuplicatesTest.java  |   82 ++
 .../dataflow/sdk/transforms/SampleTest.java   |  175 +++
 .../sdk/transforms/SimpleStatsFnsTest.java    |  130 ++
 .../dataflow/sdk/transforms/TopTest.java      |  244 ++++
 .../dataflow/sdk/transforms/ValuesTest.java   |  103 ++
 .../dataflow/sdk/transforms/ViewTest.java     |  159 +++
 .../dataflow/sdk/transforms/WithKeysTest.java |  122 ++
 .../transforms/join/CoGbkResultCoderTest.java |   55 +
 .../sdk/transforms/join/CoGroupByKeyTest.java |  348 ++++++
 .../sdk/transforms/join/UnionCoderTest.java   |   48 +
 .../windowing/CalendarWindowsTest.java        |  260 ++++
 .../windowing/FixedWindowsTest.java           |  114 ++
 .../transforms/windowing/SessionsTest.java    |  100 ++
 .../windowing/SlidingWindowsTest.java         |  127 ++
 .../transforms/windowing/WindowingTest.java   |  277 +++++
 .../dataflow/sdk/util/AggregatorImplTest.java |  194 +++
 .../AttemptBoundedExponentialBackOffTest.java |   71 ++
 .../dataflow/sdk/util/Base64UtilsTest.java    |   53 +
 .../dataflow/sdk/util/BigQueryUtilTest.java   |  306 +++++
 .../sdk/util/CloudMetricUtilsTest.java        |   66 ++
 .../sdk/util/CloudSourceUtilsTest.java        |   83 ++
 .../dataflow/sdk/util/CoderUtilsTest.java     |  158 +++
 .../cloud/dataflow/sdk/util/GcsUtilTest.java  |  105 ++
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  |  231 ++++
 .../dataflow/sdk/util/IOChannelUtilsTest.java |   76 ++
 .../dataflow/sdk/util/IOFactoryTest.java      |   99 ++
 .../sdk/util/InstanceBuilderTest.java         |  114 ++
 .../dataflow/sdk/util/MonitoringUtilTest.java |   90 ++
 .../cloud/dataflow/sdk/util/PTupleTest.java   |   40 +
 .../dataflow/sdk/util/PackageUtilTest.java    |  342 ++++++
 .../util/RetryHttpRequestInitializerTest.java |  234 ++++
 .../sdk/util/SerializableUtilsTest.java       |   75 ++
 .../dataflow/sdk/util/SerializerTest.java     |  163 +++
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  282 +++++
 .../dataflow/sdk/util/StringUtilsTest.java    |   88 ++
 .../cloud/dataflow/sdk/util/StructsTest.java  |  177 +++
 .../cloud/dataflow/sdk/util/TimeUtilTest.java |   73 ++
 .../cloud/dataflow/sdk/util/VarIntTest.java   |  281 +++++
 .../dataflow/sdk/util/WindowedValueTest.java  |   55 +
 .../sdk/util/common/CounterSetTest.java       |   75 ++
 .../dataflow/sdk/util/common/CounterTest.java |  743 ++++++++++++
 .../sdk/util/common/CounterTestUtils.java     |  123 ++
 .../dataflow/sdk/util/common/MetricTest.java  |   40 +
 .../BatchingShuffleEntryReaderTest.java       |  138 +++
 .../worker/CachingShuffleBatchReaderTest.java |   95 ++
 .../util/common/worker/ExecutorTestUtils.java |  238 ++++
 .../common/worker/FlattenOperationTest.java   |   79 ++
 .../common/worker/MapTaskExecutorTest.java    |  290 +++++
 .../common/worker/OutputReceiverTest.java     |  135 +++
 .../common/worker/ParDoOperationTest.java     |  116 ++
 .../PartialGroupByKeyOperationTest.java       |  397 +++++++
 .../util/common/worker/ReadOperationTest.java |  303 +++++
 .../util/common/worker/ShuffleEntryTest.java  |  145 +++
 .../util/common/worker/StateSamplerTest.java  |  139 +++
 .../util/common/worker/WorkExecutorTest.java  |   58 +
 .../common/worker/WriteOperationTest.java     |   73 ++
 .../dataflow/sdk/util/gcsfs/GcsPathTest.java  |  334 ++++++
 ...MediaHttpUploaderProgressListenerTest.java |   83 ++
 .../cloud/dataflow/sdk/values/KVTest.java     |   73 ++
 .../sdk/values/PCollectionListTest.java       |   47 +
 .../cloud/dataflow/sdk/values/PDoneTest.java  |   98 ++
 486 files changed, 80297 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 LICENSE
 create mode 100644 checkstyle.xml
 create mode 100644 examples/pom.xml
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
 create mode 100644 examples/src/test/java/com/google/cloud/dataflow/examples/BigQueryTornadoesTest.java
 create mode 100644 examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java
 create mode 100644 examples/src/test/java/com/google/cloud/dataflow/examples/TopWikipediaSessionsTest.java
 create mode 100644 examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
 create mode 100644 pom.xml
 create mode 100644 sdk/pom.xml
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/URICoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIterator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ShardNameTemplate.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/package-info.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BigQueryOptions.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BlockingDataflowPipelineOptions.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineShuffleOptions.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/package-info.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipeline.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DatastoreIOTranslator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/package-info.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/package-info.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSink.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSource.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSource.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySource.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySourceFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleEntryWriter.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannel.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CustomSourceFormatFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedSource.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSource.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySource.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiSourceIterator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSource.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSourceFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleEntryWriter.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleLibrary.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReader.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleWriter.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutorFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSource.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSource.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/package-info.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/RunnableOnService.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineOptions.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowingFnTestUtils.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/package-info.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableComparator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableFunction.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/RawUnionValue.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/package-info.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindowingFn.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/MergeOverlappingIntervalWindows.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowingFn.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowingFn.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingFn.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AggregatorImpl.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiErrorExtractor.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppEngineEnvironment.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Base64Utils.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudKnownType.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtils.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudObject.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowReleaseInfo.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MimeTypes.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/OutputReference.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ShardingWritableByteChannel.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TestCredential.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeUtil.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Values.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowUtils.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservable.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterable.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObserver.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ForwardingReiterator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Metric.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/PeekingReiterator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterable.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/package-info.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReader.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReader.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CustomSourceFormat.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperation.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/KeyGroupedShuffleEntries.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiver.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperation.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTracker.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackerGroup.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackingReiterator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Receiver.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleBatchReader.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntry.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryReader.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShufflePosition.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Source.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/package-info.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/package-info.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/ClientRequestHelper.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageExceptions.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListener.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/StorageResourceId.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/package-info.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PDone.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java
 create mode 100644 sdk/src/main/resources/com/google/cloud/dataflow/sdk/sdk.properties
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/URICoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/user.avsc
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptionsTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSourceTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySourceFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySourceTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannelTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCodeTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSourceTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSourceFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReader.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReaderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleWriter.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/FastNanoClockAndSleeper.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/FastNanoClockAndSleeperTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ResetDateTimeProvider.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ResetDateTimeProviderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreMappedDiagnosticContext.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreMappedDiagnosticContextTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreSystemProperties.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreSystemPropertiesTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindowsTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindowsTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SessionsTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOffTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/Base64UtilsTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtilsTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtilsTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOChannelUtilsTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/InstanceBuilderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PTupleTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializerTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializerTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StructsTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TimeUtilTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/VarIntTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/WindowedValueTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterSetTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/MetricTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReaderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReaderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutorTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperationTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPathTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListenerTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/values/KVTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionListTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000..2f7896d1d1365
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+target/
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000000000..d645695673349
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/checkstyle.xml b/checkstyle.xml
new file mode 100644
index 0000000000000..08df965ae6bb1
--- /dev/null
+++ b/checkstyle.xml
@@ -0,0 +1,385 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  ~ Copyright (C) 2014 Google Inc.
+  ~
+  ~ Licensed under the Apache License, Version 2.0 (the "License"); you may not
+  ~ use this file except in compliance with the License. You may obtain a copy of
+  ~ the License at
+  ~
+  ~ http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+  ~ License for the specific language governing permissions and limitations under
+  ~ the License.
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
+<!DOCTYPE module PUBLIC
+    "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
+    "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
+
+<!-- This is a checkstyle configuration file. For descriptions of
+what the following rules do, please see the checkstyle configuration
+page at http://checkstyle.sourceforge.net/config.html -->
+
+<module name="Checker">
+
+  <module name="RegexpSingleline">
+    <!-- Requires a Google copyright notice in each file.
+      Code intended to be open-sourced may have a multi-line copyright
+      notice, so that this required text appears on the second line:
+      <pre>
+        /*
+         * Copyright 2008 Google Inc.
+         *
+         * (details of open-source license...)
+      </pre>
+    -->
+    <property name="format"
+        value="^(//| \*) Copyright (\([cC]\) )?[\d]{4}(\-[\d]{4})? (Google Inc\.).*$" />
+    <property name="minimum" value="1" />
+    <property name="maximum" value="10" />
+    <property name="message" value="Google copyright is missing or malformed." />
+    <property name="severity" value="error" />
+  </module>
+
+  <module name="FileTabCharacter">
+    <!-- Checks that there are no tab characters in the file.
+    -->
+  </module>
+
+  <module name="NewlineAtEndOfFile"/>
+
+  <module name="RegexpSingleline">
+    <!-- Checks that TODOs don't have stuff in parenthesis, e.g., username. -->
+    <property name="format" value="((//.*)|(\*.*))TODO\(" />
+    <property name="message" value="TODO comments must not include usernames." />
+    <property name="severity" value="error" />
+  </module>
+
+  <module name="RegexpSingleline">
+    <property name="format" value="[ \t]+$"/>
+    <property name="message" value="Trailing whitespace"/>
+    <property name="severity" value="warning"/>
+  </module>
+
+  <module name="RegexpSingleline">
+    <!-- Checks that FIXME is not used in comments.  -->
+    <property name="format" value="((//.*)|(\*.*))FIXME" />
+    <property name="message" value='FIXME should not appear in comments.' />
+    <property name="severity" value="error"/>
+  </module>
+
+  <module name="RegexpSingleline">
+    <!-- Checks that XXX is not used in comments.  -->
+    <property name="format" value="((//.*)|(\*.*))XXX" />
+    <property name="message" value='XXX should not appear in comments.' />
+    <property name="severity" value="error"/>
+  </module>
+
+  <!-- All Java AST specific tests live under TreeWalker module. -->
+  <module name="TreeWalker">
+
+    <module name="TodoComment">
+      <metadata name="com.atlassw.tools.eclipse.checkstyle.comment"
+                value="Author tags not allowed"/>
+      <property name="format" value="@author"/>
+      <property name="severity" value="error"/>
+    </module>
+
+    <!--
+
+    IMPORT CHECKS
+
+    -->
+
+    <module name="RedundantImport">
+      <!-- Checks for redundant import statements. -->
+      <property name="severity" value="error"/>
+      <message key="import.redundancy"
+               value="Redundant import {0}."/>
+    </module>
+
+    <module name="ImportOrder">
+      <!-- Checks for out of order import statements. -->
+
+      <property name="severity" value="error"/>
+      <property name="groups" value="com.google,android,com,junit,net,org,sun,java,javax"/>
+      <!-- This ensures that static imports go first. -->
+      <property name="option" value="top"/>
+      <property name="tokens" value="STATIC_IMPORT, IMPORT"/>
+      <message key="import.ordering"
+               value="Import {0} appears after other imports that it should precede"/>
+    </module>
+
+    <module name="AvoidStarImport">
+      <property name="severity" value="error"/>
+    </module>
+
+    <module name="UnusedImports">
+      <property name="severity" value="error"/>
+      <property name="processJavadoc" value="true"/>
+      <message key="import.unused"
+               value="Unused import: {0}."/>
+    </module>
+
+    <!--
+
+    JAVADOC CHECKS
+
+    -->
+
+    <!-- Checks for Javadoc comments.                     -->
+    <!-- See http://checkstyle.sf.net/config_javadoc.html -->
+    <module name="JavadocMethod">
+      <property name="scope" value="protected"/>
+      <property name="severity" value="warning"/>
+      <property name="allowMissingJavadoc" value="true"/>
+      <property name="allowMissingParamTags" value="true"/>
+      <property name="allowMissingReturnTag" value="true"/>
+      <property name="allowMissingThrowsTags" value="true"/>
+      <property name="allowThrowsTagsForSubclasses" value="true"/>
+      <property name="allowUndeclaredRTE" value="true"/>
+    </module>
+
+    <module name="JavadocType">
+      <property name="scope" value="protected"/>
+      <property name="severity" value="error"/>
+    </module>
+
+    <module name="JavadocStyle">
+      <property name="severity" value="warning"/>
+    </module>
+
+    <!--
+
+    NAMING CHECKS
+
+    -->
+
+    <!-- Item 38 - Adhere to generally accepted naming conventions -->
+
+    <module name="PackageName">
+      <!-- Validates identifiers for package names against the
+        supplied expression. -->
+      <!-- Here the default checkstyle rule restricts package name parts to
+        seven characters, this is not in line with common practice at Google.
+      -->
+      <property name="format" value="^[a-z]+(\.[a-z][a-z0-9]{1,})*$"/>
+      <property name="severity" value="error"/>
+    </module>
+
+    <module name="TypeNameCheck">
+      <!-- Validates static, final fields against the
+      expression "^[A-Z][a-zA-Z0-9]*$". -->
+      <metadata name="altname" value="TypeName"/>
+      <property name="severity" value="warning"/>
+    </module>
+
+    <module name="ConstantNameCheck">
+      <!-- Validates non-private, static, final fields against the supplied
+      public/package final fields "^[A-Z][A-Z0-9]*(_[A-Z0-9]+)*$". -->
+      <metadata name="altname" value="ConstantName"/>
+      <property name="applyToPublic" value="true"/>
+      <property name="applyToProtected" value="true"/>
+      <property name="applyToPackage" value="true"/>
+      <property name="applyToPrivate" value="false"/>
+      <property name="format" value="^([A-Z][A-Z0-9]*(_[A-Z0-9]+)*|FLAG_.*)$"/>
+      <message key="name.invalidPattern"
+               value="Variable ''{0}'' should be in ALL_CAPS (if it is a constant) or be private (otherwise)."/>
+      <property name="severity" value="warning"/>
+    </module>
+
+    <module name="StaticVariableNameCheck">
+      <!-- Validates static, non-final fields against the supplied
+      expression "^[a-z][a-zA-Z0-9]*_?$". -->
+      <metadata name="altname" value="StaticVariableName"/>
+      <property name="applyToPublic" value="true"/>
+      <property name="applyToProtected" value="true"/>
+      <property name="applyToPackage" value="true"/>
+      <property name="applyToPrivate" value="true"/>
+      <property name="format" value="^[a-z][a-zA-Z0-9]*_?$"/>
+      <property name="severity" value="warning"/>
+    </module>
+
+    <module name="MemberNameCheck">
+      <!-- Validates non-static members against the supplied expression. -->
+      <metadata name="altname" value="MemberName"/>
+      <property name="applyToPublic" value="true"/>
+      <property name="applyToProtected" value="true"/>
+      <property name="applyToPackage" value="true"/>
+      <property name="applyToPrivate" value="true"/>
+      <property name="format" value="^[a-z][a-zA-Z0-9]*$"/>
+      <property name="severity" value="warning"/>
+    </module>
+
+    <module name="MethodNameCheck">
+      <!-- Validates identifiers for method names. -->
+      <metadata name="altname" value="MethodName"/>
+      <property name="format" value="^[a-z][a-zA-Z0-9]*(_[a-zA-Z0-9]+)*$"/>
+      <property name="severity" value="warning"/>
+    </module>
+
+    <module name="ParameterName">
+      <!-- Validates identifiers for method parameters against the
+        expression "^[a-z][a-zA-Z0-9]*$". -->
+      <property name="severity" value="warning"/>
+    </module>
+
+    <module name="LocalFinalVariableName">
+      <!-- Validates identifiers for local final variables against the
+        expression "^[a-z][a-zA-Z0-9]*$". -->
+      <property name="severity" value="warning"/>
+    </module>
+
+    <module name="LocalVariableName">
+      <!-- Validates identifiers for local variables against the
+        expression "^[a-z][a-zA-Z0-9]*$". -->
+      <property name="severity" value="warning"/>
+    </module>
+
+
+    <!--
+
+    LENGTH and CODING CHECKS
+
+    -->
+
+    <module name="LineLength">
+      <!-- Checks if a line is too long. -->
+      <property name="max" value="${com.puppycrawl.tools.checkstyle.checks.sizes.LineLength.max}" default="100"/>
+      <property name="severity" value="error"/>
+
+      <!--
+        The default ignore pattern exempts the following elements:
+          - import statements
+          - long URLs inside comments
+      -->
+
+      <property name="ignorePattern"
+          value="${com.puppycrawl.tools.checkstyle.checks.sizes.LineLength.ignorePattern}"
+          default="^(package .*;\s*)|(import .*;\s*)|( *\* .*https?://.*)$"/>
+    </module>
+
+    <module name="LeftCurly">
+      <!-- Checks for placement of the left curly brace ('{'). -->
+      <property name="severity" value="warning"/>
+    </module>
+
+    <module name="RightCurly">
+      <!-- Checks right curlies on CATCH, ELSE, and TRY blocks are on
+      the same line. e.g., the following example is fine:
+      <pre>
+        if {
+          ...
+        } else
+      </pre>
+      -->
+      <!-- This next example is not fine:
+      <pre>
+        if {
+          ...
+        }
+        else
+      </pre>
+      -->
+      <property name="option" value="same"/>
+      <property name="severity" value="warning"/>
+    </module>
+
+    <!-- Checks for braces around if and else blocks -->
+    <module name="NeedBraces">
+      <property name="severity" value="warning"/>
+      <property name="tokens" value="LITERAL_IF, LITERAL_ELSE, LITERAL_FOR, LITERAL_WHILE, LITERAL_DO"/>
+    </module>
+
+    <module name="UpperEll">
+      <!-- Checks that long constants are defined with an upper ell.-->
+      <property name="severity" value="error"/>
+    </module>
+
+    <module name="FallThrough">
+      <!-- Warn about falling through to the next case statement.  Similar to
+      javac -Xlint:fallthrough, but the check is suppressed if a single-line comment
+      on the last non-blank line preceding the fallen-into case contains 'fall through' (or
+      some other variants which we don't publicized to promote consistency).
+      -->
+      <property name="reliefPattern"
+       value="fall through|Fall through|fallthru|Fallthru|falls through|Falls through|fallthrough|Fallthrough|No break|NO break|no break|continue on"/>
+      <property name="severity" value="error"/>
+    </module>
+
+
+    <!--
+
+    MODIFIERS CHECKS
+
+    -->
+
+    <module name="ModifierOrder">
+      <!-- Warn if modifier order is inconsistent with JLS3 8.1.1, 8.3.1, and
+           8.4.3.  The prescribed order is:
+           public, protected, private, abstract, static, final, transient, volatile,
+           synchronized, native, strictfp
+        -->
+      <property name="severity" value="error"/>
+    </module>
+
+
+    <!--
+
+    WHITESPACE CHECKS
+
+    -->
+
+    <module name="WhitespaceAround">
+      <!-- Checks that various tokens are surrounded by whitespace.
+           This includes most binary operators and keywords followed
+           by regular or curly braces.
+      -->
+      <property name="tokens" value="ASSIGN, BAND, BAND_ASSIGN, BOR,
+        BOR_ASSIGN, BSR, BSR_ASSIGN, BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN,
+        EQUAL, GE, GT, LAND, LE, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
+        LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
+        LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE, LOR, LT, MINUS,
+        MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL, PLUS, PLUS_ASSIGN, QUESTION,
+        SL, SL_ASSIGN, SR_ASSIGN, STAR, STAR_ASSIGN"/>
+      <property name="severity" value="error"/>
+    </module>
+
+    <module name="WhitespaceAfter">
+      <!-- Checks that commas, semicolons and typecasts are followed by
+           whitespace.
+      -->
+      <property name="tokens" value="COMMA, SEMI, TYPECAST"/>
+    </module>
+
+    <module name="NoWhitespaceAfter">
+      <!-- Checks that there is no whitespace after various unary operators.
+           Linebreaks are allowed.
+      -->
+      <property name="tokens" value="BNOT, DEC, DOT, INC, LNOT, UNARY_MINUS,
+        UNARY_PLUS"/>
+      <property name="allowLineBreaks" value="true"/>
+      <property name="severity" value="error"/>
+    </module>
+
+    <module name="NoWhitespaceBefore">
+      <!-- Checks that there is no whitespace before various unary operators.
+           Linebreaks are allowed.
+      -->
+      <property name="tokens" value="SEMI, DOT, POST_DEC, POST_INC"/>
+      <property name="allowLineBreaks" value="true"/>
+      <property name="severity" value="error"/>
+    </module>
+
+    <module name="ParenPad">
+      <!-- Checks that there is no whitespace before close parens or after
+           open parens.
+      -->
+      <property name="severity" value="warning"/>
+    </module>
+
+  </module>
+</module>
+
diff --git a/examples/pom.xml b/examples/pom.xml
new file mode 100644
index 0000000000000..fcb52fcdbf8d9
--- /dev/null
+++ b/examples/pom.xml
@@ -0,0 +1,223 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  ~ Copyright (C) 2014 Google Inc.
+  ~
+  ~ Licensed under the Apache License, Version 2.0 (the "License"); you may not
+  ~ use this file except in compliance with the License. You may obtain a copy of
+  ~ the License at
+  ~
+  ~ http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+  ~ License for the specific language governing permissions and limitations under
+  ~ the License.
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>com.google.cloud.dataflow</groupId>
+    <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
+    <version>manual_build</version>
+  </parent>
+
+  <groupId>com.google.cloud.dataflow</groupId>
+  <artifactId>google-cloud-dataflow-java-examples-all</artifactId>
+  <name>Google Cloud Dataflow Java Examples - All</name>
+  <description>Google Cloud Dataflow Java SDK provides a simple, Java-based
+    interface for processing virtually any size data using Google cloud
+    resources. This artifact includes all Dataflow Java SDK
+    examples.</description>
+  <url>http://cloud.google.com/dataflow</url>
+
+  <version>manual_build</version>
+
+  <packaging>jar</packaging>
+
+  <profiles>
+    <profile>
+      <id>DataflowPipelineTests</id>
+      <properties>
+        <runIntegrationTestOnService>true</runIntegrationTestOnService>
+        <testGroups>com.google.cloud.dataflow.sdk.testing.RunnableOnService</testGroups>
+        <testParallelValue>both</testParallelValue>
+      </properties>
+    </profile>
+  </profiles>
+
+  <build>
+    <plugins>
+      <plugin>
+        <artifactId>maven-compiler-plugin</artifactId>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+        <version>2.12</version>
+        <configuration>
+          <configLocation>../checkstyle.xml</configLocation>
+          <consoleOutput>true</consoleOutput>
+          <failOnViolation>true</failOnViolation>
+          <includeTestSourceDirectory>true</includeTestSourceDirectory>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>check</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <!-- Source plugin for generating source and test-source JARs. -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-source-plugin</artifactId>
+        <version>2.4</version>
+        <executions>
+          <execution>
+            <id>attach-sources</id>
+            <phase>compile</phase>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>attach-test-sources</id>
+            <phase>test-compile</phase>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-bundle-plugin</artifactId>
+        <version>2.4.0</version>
+        <extensions>true</extensions>
+        <configuration>
+        <finalName>${project.artifactId}-bundled-${project.version}</finalName>
+          <instructions>
+            <!-- Embed all dependencies, except for the dataflow sdk -->
+            <Embed-Dependency>*;scope=compile|runtime;artifactId=!google-cloud-dataflow-java-sdk-all;inline=true</Embed-Dependency>
+          </instructions>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>dataflow-examples-compile</id>
+            <phase>compile</phase>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>dataflow-examples-test-compile</id>
+            <phase>test-compile</phase>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+  <dependencies>
+    <dependency>
+      <groupId>com.google.cloud.dataflow</groupId>
+      <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-storage</artifactId>
+      <version>v1-rev11-1.19.0</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-bigquery</artifactId>
+      <version>v2-rev167-1.19.0</version>
+      <exclusions>
+        <!-- Exclude an old version of guava which is being pulled
+             in by a transitive dependency google-api-client 1.19.0 -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.http-client</groupId>
+      <artifactId>google-http-client-jackson2</artifactId>
+      <version>1.19.0</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-core</artifactId>
+      <version>2.4.2</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-annotations</artifactId>
+      <version>2.4.2</version>
+    </dependency>
+
+    <!-- Add slf4j API frontend binding with JUL backend -->
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <version>1.7.7</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-jdk14</artifactId>
+      <version>1.7.7</version>
+    </dependency>
+
+    <!-- test dependencies -->
+    <dependency>
+      <groupId>com.google.cloud.dataflow</groupId>
+      <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.hamcrest</groupId>
+      <artifactId>hamcrest-all</artifactId>
+      <version>1.3</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>4.11</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+</project>
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
new file mode 100644
index 0000000000000..43e94c08633b0
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * An example that reads the public samples of weather data from BigQuery, counts the number of
+ * tornadoes that occur in each month, and writes the results to BigQuery.
+ */
+public class BigQueryTornadoes {
+  // Default to using a 1000 row subset of the public weather station table publicdata:samples.gsod.
+  private static final String WEATHER_SAMPLES_TABLE =
+      "clouddataflow-readonly:samples.weather_stations";
+
+  /**
+   * Examines each row in the input table. If a tornado was recorded in that sample, the month in
+   * which it occurred is output.
+   */
+  static class ExtractTornadoesFn extends DoFn<TableRow, Integer> {
+    @Override
+    public void processElement(ProcessContext c){
+      TableRow row = c.element();
+      if ((Boolean) row.get("tornado")) {
+        c.output(Integer.parseInt((String) row.get("month")));
+      }
+    }
+  }
+
+  /**
+   * Prepares the data for writing to BigQuery by building a TableRow object containing an
+   * integer representation of month and the number of tornadoes that occurred in each month.
+   */
+  static class FormatCountsFn extends DoFn<KV<Integer, Long>, TableRow> {
+    @Override
+    public void processElement(ProcessContext c) {
+      TableRow row = new TableRow()
+          .set("month", c.element().getKey().intValue())
+          .set("tornado_count", c.element().getValue().longValue());
+      c.output(row);
+    }
+  }
+
+  /**
+   * Takes rows from a table and generates a table of counts.
+   *
+   * The input schema is described by
+   * https://developers.google.com/bigquery/docs/dataset-gsod .
+   * The output contains the total number of tornadoes found in each month in
+   * the following schema:
+   * <ul>
+   *   <li>month: integer</li>
+   *   <li>tornado_count: integer</li>
+   * </ul>
+   */
+  static class CountTornadoes
+      extends PTransform<PCollection<TableRow>, PCollection<TableRow>> {
+    @Override
+    public PCollection<TableRow> apply(PCollection<TableRow> rows) {
+
+      // row... => month...
+      PCollection<Integer> tornadoes = rows.apply(
+          ParDo.of(new ExtractTornadoesFn()));
+
+      // month... => <month,count>...
+      PCollection<KV<Integer, Long>> tornadoCounts =
+          tornadoes.apply(Count.<Integer>perElement());
+
+      // <month,count>... => row...
+      PCollection<TableRow> results = tornadoCounts.apply(
+          ParDo.of(new FormatCountsFn()));
+
+      return results;
+    }
+  }
+
+  /**
+   * Options supported by {@link BigQueryTornadoes}.
+   * <p>
+   * Inherits standard configuration options.
+   */
+  private static interface Options extends PipelineOptions {
+    @Description("Table to read from, specified as "
+        + "<project_id>:<dataset_id>.<table_id>")
+    @Default.String(WEATHER_SAMPLES_TABLE)
+    String getInput();
+    void setInput(String value);
+
+    @Description("Table to write to, specified as "
+        + "<project_id>:<dataset_id>.<table_id>")
+    @Validation.Required
+    String getOutput();
+    void setOutput(String value);
+  }
+
+  public static void main(String[] args) {
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+
+    Pipeline p = Pipeline.create(options);
+
+    // Build the table schema for the output table.
+    List<TableFieldSchema> fields = new ArrayList<>();
+    fields.add(new TableFieldSchema().setName("month").setType("INTEGER"));
+    fields.add(new TableFieldSchema().setName("tornado_count").setType("INTEGER"));
+    TableSchema schema = new TableSchema().setFields(fields);
+
+    p.apply(BigQueryIO.Read.from(options.getInput()))
+     .apply(new CountTornadoes())
+     .apply(BigQueryIO.Write
+        .to(options.getOutput())
+        .withSchema(schema)
+        .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
+        .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
+
+    p.run();
+  }
+}
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
new file mode 100644
index 0000000000000..1e00589281aad
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.api.services.datastore.DatastoreV1.Entity;
+import com.google.api.services.datastore.DatastoreV1.Key;
+import com.google.api.services.datastore.DatastoreV1.Property;
+import com.google.api.services.datastore.DatastoreV1.Query;
+import com.google.api.services.datastore.DatastoreV1.Value;
+import com.google.api.services.datastore.client.DatastoreHelper;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.DatastoreIO;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+
+import java.util.Map;
+
+/**
+ * A WordCount example using DatastoreIO.
+ *
+ * <p> This example shows how to use DatastoreIO to read from Datastore and
+ * write the results to Cloud Storage.  Note that this example will write
+ * data to Datastore, which may incur charge for Datastore operations.
+ *
+ * <p> To run this example, users need to set up the environment and use gcloud
+ * to get credential for Datastore:
+ * <pre>
+ * $ export CLOUDSDK_EXTRA_SCOPES=https://www.googleapis.com/auth/datastore
+ * $ gcloud auth login
+ * </pre>
+ *
+ * <p> Note that the environment variable CLOUDSDK_EXTRA_SCOPES must be set
+ * to the same value when executing a Datastore pipeline, as the local auth
+ * cache is keyed by the requested scopes.
+ *
+ * <p> To run this pipeline locally, the following options must be provided:
+ * <pre>{@code
+ *   --project=<PROJECT ID>
+ *   --dataset=<DATASET ID>
+ *   --output=[<LOCAL FILE> | gs://<OUTPUT PATH>]
+ * }</pre>
+ *
+ * <p> To run this example using Dataflow service, you must additionally
+ * provide either {@literal --stagingLocation} or {@literal --tempLocation}, and
+ * select one of the Dataflow pipeline runners, eg
+ * {@literal --runner=BlockingDataflowPipelineRunner}.
+ */
+public class DatastoreWordCount {
+
+  /**
+   * A DoFn that gets the content of an entity (one line in a
+   * Shakespeare play) and converts it to a string.
+   */
+  static class GetContentFn extends DoFn<Entity, String> {
+    @Override
+    public void processElement(ProcessContext c) {
+      Map<String, Value> props = DatastoreHelper.getPropertyMap(c.element());
+      c.output(DatastoreHelper.getString(props.get("content")));
+    }
+  }
+
+  /**
+   * A DoFn that creates entity for every line in Shakespeare.
+   */
+  static class CreateEntityFn extends DoFn<String, Entity> {
+    private String kind;
+
+    CreateEntityFn(String kind) {
+      this.kind = kind;
+    }
+
+    public Entity makeEntity(String content) {
+      Entity.Builder entityBuilder = Entity.newBuilder();
+      // Create entities with same ancestor Key.
+      Key ancestorKey = DatastoreHelper.makeKey(kind, "root").build();
+      Key key = DatastoreHelper.makeKey(ancestorKey, kind).build();
+      entityBuilder.setKey(key);
+      entityBuilder.addProperty(Property.newBuilder()
+          .setName("content")
+          .setValue(Value.newBuilder().setStringValue(content)));
+      return entityBuilder.build();
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(makeEntity(c.element()));
+    }
+  }
+
+  /**
+   * Options supported by {@link DatastoreWordCount}.
+   * <p>
+   * Inherits standard configuration options.
+   */
+  private static interface Options extends PipelineOptions {
+    @Description("Path of the file to read from and store to Datastore")
+    @Default.String("gs://dataflow-samples/shakespeare/kinglear.txt")
+    String getInput();
+    void setInput(String value);
+
+    @Description("Path of the file to write to")
+    @Validation.Required
+    String getOutput();
+    void setOutput(String value);
+
+    @Description("Dataset ID to read from datastore")
+    @Validation.Required
+    String getDataset();
+    void setDataset(String value);
+
+    @Description("Dataset entity kind")
+    @Default.String("shakespeare-demo")
+    String getKind();
+    void setKind(String value);
+
+    @Description("Read an existing dataset, do not write first")
+    boolean isReadOnly();
+    void setReadOnly(boolean value);
+  }
+
+  /**
+   * An example which creates a pipeline to populate DatastoreIO from a
+   * text input.  Forces use of DirectPipelineRunner for local execution mode.
+   */
+  public static void writeDataToDatastore(Options options) {
+    // Runs locally via DirectPiplineRunner, as writing is not yet implemented
+    // for the other runners which is why we just create a PipelineOptions with defaults.
+    Pipeline p = Pipeline.create(PipelineOptionsFactory.create());
+    p.apply(TextIO.Read.named("ReadLines").from(options.getInput()))
+     .apply(ParDo.of(new CreateEntityFn(options.getKind())))
+     .apply(DatastoreIO.Write.to(options.getDataset()));
+
+    p.run();
+  }
+
+  /**
+   * An example which creates a pipeline to do DatastoreIO.Read from Datastore.
+   */
+  public static void readDataFromDatastore(Options options) {
+    // Build a query: read all entities of the specified kind.
+    Query.Builder q = Query.newBuilder();
+    q.addKindBuilder().setName(options.getKind());
+    Query query = q.build();
+
+    Pipeline p = Pipeline.create(options);
+    p.apply(DatastoreIO.Read.named("ReadShakespeareFromDatastore")
+        .from(options.getDataset(), query))
+        .apply(ParDo.of(new GetContentFn()))
+        .apply(new WordCount.CountWords())
+        .apply(TextIO.Write.named("WriteLines").to(options.getOutput()));
+
+    p.run();
+  }
+
+  /**
+   * Main function.
+   * An example to demo how to use DatastoreIO.  The runner here is
+   * customizable, which means users could pass either DirectPipelineRunner
+   * or DataflowPipelineRunner in PipelineOptions.
+   */
+  public static void main(String args[]) {
+    // The options are used in two places, for Dataflow service, and
+    // building DatastoreIO.Read object
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+
+    if (!options.isReadOnly()) {
+      // First example: write data to Datastore for reading later.
+      // Note: this will insert new entries with the given kind.  Existing entries
+      // should be cleared first, or the final counts will contain duplicates.
+      // The Datastore Admin tool in the AppEngine console can be used to erase
+      // all entries with a particular kind.
+      DatastoreWordCount.writeDataToDatastore(options);
+    }
+
+    // Second example: do parallel read from Datastore.
+    DatastoreWordCount.readDataFromDatastore(options);
+  }
+}
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
new file mode 100644
index 0000000000000..a6bd4f27fd61d
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
@@ -0,0 +1,425 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.URICoder;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.GcsOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Flatten;
+import com.google.cloud.dataflow.sdk.transforms.Keys;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.RemoveDuplicates;
+import com.google.cloud.dataflow.sdk.transforms.Values;
+import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.transforms.WithKeys;
+import com.google.cloud.dataflow.sdk.transforms.join.CoGbkResult;
+import com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.join.KeyedPCollectionTuple;
+import com.google.cloud.dataflow.sdk.util.GcsUtil;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.PDone;
+import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * An example that computes a basic TF-IDF search table for a directory or GCS prefix.
+ *
+ * <p> Command-line usage for this example:
+ *
+ * <pre>
+ *     java com.google.cloud.dataflow.examples.TfIdf \
+ *       --runner=<RUNNER> \
+ *       --input=<INPUT URI> \
+ *       --output=<OUTPUT URI PREFIX>
+ * </pre>
+ *
+ * <p> For example, to execute this pipeline locally to index a local directory:
+ *
+ * <pre>
+ *     java com.google.cloud.dataflow.examples.TfIdf \
+ *       --runner=DirectPipelineRunner \
+ *       --input=<LOCAL PATH TO DIRECTORY> \
+ *       --output=<OUTPUT PREFIX>
+ * </pre>
+ *
+ * <p> To execute this pipeline using the Dataflow service
+ * to index the works of Shakespeare and write the results to a GCS bucket:
+ * (For execution via the Dataflow service, only GCS locations are supported)
+ *
+ * <pre>
+ *     java com.google.cloud.dataflow.examples.TfIdf \
+ *       --project=<PROJECT ID> \
+ *       --stagingLocation=gs://<STAGING DIRECTORY> \
+ *       --runner=BlockingDataflowPipelineRunner \
+ *       [--input=gs://<INPUT DIRECTORY>] \
+ *       --output=gs://<OUTPUT PREFIX>
+ * </pre>
+ *
+ * <p> The default input is gs://dataflow-samples/shakespeare/
+ */
+public class TfIdf {
+  /**
+   * Options supported by {@link TfIdf}.
+   * <p>
+   * Inherits standard configuration options.
+   */
+  private static interface Options extends PipelineOptions {
+    @Description("Path to the directory or GCS prefix containing files to read from")
+    @Default.String("gs://dataflow-samples/shakespeare/")
+    String getInput();
+    void setInput(String value);
+
+    @Description("Prefix of output URI to write to")
+    @Validation.Required
+    String getOutput();
+    void setOutput(String value);
+  }
+
+  /**
+   * Lists documents contained beneath the {@code options.input} prefix/directory.
+   */
+  public static Set<URI> listInputDocuments(Options options)
+      throws URISyntaxException, IOException {
+    URI baseUri = new URI(options.getInput());
+
+    // List all documents in the directory or GCS prefix.
+    URI absoluteUri;
+    if (baseUri.getScheme() != null) {
+      absoluteUri = baseUri;
+    } else {
+      absoluteUri = new URI(
+          "file",
+          baseUri.getAuthority(),
+          baseUri.getPath(),
+          baseUri.getQuery(),
+          baseUri.getFragment());
+    }
+
+    Set<URI> uris = new HashSet<>();
+    if (absoluteUri.getScheme().equals("file")) {
+      File directory = new File(absoluteUri);
+      for (String entry : directory.list()) {
+        File path = new File(directory, entry);
+        uris.add(path.toURI());
+      }
+    } else if (absoluteUri.getScheme().equals("gs")) {
+      GcsUtil gcsUtil = options.as(GcsOptions.class).getGcsUtil();
+      URI gcsUriGlob = new URI(
+          absoluteUri.getScheme(),
+          absoluteUri.getAuthority(),
+          absoluteUri.getPath() + "*",
+          absoluteUri.getQuery(),
+          absoluteUri.getFragment());
+      for (GcsPath entry : gcsUtil.expand(GcsPath.fromUri(gcsUriGlob))) {
+        uris.add(entry.toUri());
+      }
+    }
+
+    return uris;
+  }
+
+  /**
+   * Reads the documents at the provided uris and returns all lines
+   * from the documents tagged with which document they are from.
+   */
+  public static class ReadDocuments
+      extends PTransform<PInput, PCollection<KV<URI, String>>> {
+
+    private Iterable<URI> uris;
+
+    public ReadDocuments(Iterable<URI> uris) {
+      this.uris = uris;
+    }
+
+    @Override
+    public Coder<?> getDefaultOutputCoder() {
+      return KvCoder.of(URICoder.of(), StringUtf8Coder.of());
+    }
+
+    @Override
+    public PCollection<KV<URI, String>> apply(PInput input) {
+      Pipeline pipeline = getPipeline();
+
+      // Create one TextIO.Read transform for each document
+      // and add its output to a PCollectionList
+      PCollectionList<KV<URI, String>> urisToLines =
+          PCollectionList.empty(pipeline);
+
+      // TextIO.Read supports:
+      //  - file: URIs and paths locally
+      //  - gs: URIs on the service
+      for (final URI uri : uris) {
+        String uriString;
+        if (uri.getScheme().equals("file")) {
+          uriString = new File(uri).getPath();
+        } else {
+          uriString = uri.toString();
+        }
+
+        PCollection<KV<URI, String>> oneUriToLines = pipeline
+            .apply(TextIO.Read.from(uriString)
+                .named("TextIO.Read(" + uriString + ")"))
+            .apply(WithKeys.<URI, String>of(uri));
+
+        urisToLines = urisToLines.and(oneUriToLines);
+      }
+
+      return urisToLines.apply(Flatten.<KV<URI, String>>create());
+    }
+  }
+
+  /**
+   * A transform containing a basic TF-IDF pipeline. The input consists of KV objects
+   * where the key is the document's URI and the value is a piece
+   * of the document's content. The output is mapping from terms to
+   * scores for each document URI.
+   */
+  public static class ComputeTfIdf
+      extends PTransform<PCollection<KV<URI, String>>, PCollection<KV<String, KV<URI, Double>>>> {
+
+    public ComputeTfIdf() { }
+
+    @Override
+    public PCollection<KV<String, KV<URI, Double>>> apply(
+      PCollection<KV<URI, String>> uriToContent) {
+
+      // Compute the total number of documents, and
+      // prepare this singleton PCollectionView for
+      // use as a side input.
+      final PCollectionView<Long, ?> totalDocuments =
+          uriToContent
+          .apply(Keys.<URI>create())
+          .apply(RemoveDuplicates.<URI>create())
+          .apply(Count.<URI>globally())
+          .apply(View.<Long>asSingleton());
+
+      // Create a collection of pairs mapping a URI to each
+      // of the words in the document associated with that that URI.
+      PCollection<KV<URI, String>> uriToWords = uriToContent
+          .apply(ParDo.named("SplitWords").of(
+              new DoFn<KV<URI, String>, KV<URI, String>>() {
+                @Override
+                public void processElement(ProcessContext c) {
+                  URI uri = c.element().getKey();
+                  String line = c.element().getValue();
+                  for (String word : line.split("\\W+")) {
+                    if (!word.isEmpty()) {
+                      c.output(KV.of(uri, word.toLowerCase()));
+                    }
+                  }
+                }
+              }));
+
+      // Compute a mapping from each word to the total
+      // number of documents in which it appears.
+      PCollection<KV<String, Long>> wordToDocCount = uriToWords
+          .apply(RemoveDuplicates.<KV<URI, String>>create())
+          .apply(Values.<String>create())
+          .apply(Count.<String>perElement());
+
+      // Compute a mapping from each URI to the total
+      // number of words in the document associated with that URI.
+      PCollection<KV<URI, Long>> uriToWordTotal = uriToWords
+          .apply(Keys.<URI>create())
+          .apply(Count.<URI>perElement());
+
+      // Count, for each (URI, word) pair, the number of
+      // occurrences of that word in the document associated
+      // with the URI.
+      PCollection<KV<KV<URI, String>, Long>> uriAndWordToCount = uriToWords
+          .apply(Count.<KV<URI, String>>perElement());
+
+      // Adjust the above collection to a mapping from
+      // (URI, word) pairs to counts into an isomorphic mapping
+      // from URI to (word, count) pairs, to prepare for a join
+      // by the URI key.
+      PCollection<KV<URI, KV<String, Long>>> uriToWordAndCount = uriAndWordToCount
+          .apply(ParDo.of(new DoFn<KV<KV<URI, String>, Long>, KV<URI, KV<String, Long>>>() {
+            @Override
+            public void processElement(ProcessContext c) {
+              URI uri = c.element().getKey().getKey();
+              String word = c.element().getKey().getValue();
+              Long occurrences = c.element().getValue();
+              c.output(KV.of(uri, KV.of(word, occurrences)));
+            }
+          }));
+
+      // Prepare to join the mapping of URI to (word, count) pairs with
+      // the mapping of URI to total word counts, by associating
+      // each of the input PCollection<KV<URI, ...>> with
+      // a tuple tag. Each input must have the same key type, URI
+      // in this case. The type parameter of the tuple tag matches
+      // the types of the values for each collection.
+      final TupleTag<Long> wordTotalsTag = new TupleTag<Long>();
+      final TupleTag<KV<String, Long>> wordCountsTag = new TupleTag<KV<String, Long>>();
+      KeyedPCollectionTuple<URI> coGbkInput = KeyedPCollectionTuple
+          .of(wordTotalsTag, uriToWordTotal)
+          .and(wordCountsTag, uriToWordAndCount);
+
+      // Perform a CoGroupByKey (a sort of pre-join) on the prepared
+      // inputs. This yields a mapping from URI to a CoGbkResult
+      // (CoGroupByKey Result). The CoGbkResult is a mapping
+      // from the above tuple tags to the values in each input
+      // associated with a particular URI. In this case, each
+      // KV<URI, CoGbkResult> group a URI with the total number of
+      // words in that document as well as all the (word, count)
+      // pairs for particular words.
+      PCollection<KV<URI, CoGbkResult>> uriToWordAndCountAndTotal = coGbkInput
+          .apply(CoGroupByKey.<URI>create().withName("CoGroupByURI"));
+
+      // Compute a mapping from each word to a (URI, term frequency)
+      // pair for each URI. A word's term frequency for a document
+      // is simply the number of times that word occurs in the document
+      // divided by the total number of words in the document.
+      PCollection<KV<String, KV<URI, Double>>> wordToUriAndTf = uriToWordAndCountAndTotal
+          .apply(ParDo.of(new DoFn<KV<URI, CoGbkResult>, KV<String, KV<URI, Double>>>() {
+            @Override
+            public void processElement(ProcessContext c) {
+              URI uri = c.element().getKey();
+              Long wordTotal = c.element().getValue().getOnly(wordTotalsTag);
+
+              for (KV<String, Long> wordAndCount : c.element().getValue().getAll(wordCountsTag)) {
+                String word = wordAndCount.getKey();
+                Long wordCount = wordAndCount.getValue();
+                Double termFrequency = wordCount.doubleValue() / wordTotal.doubleValue();
+                c.output(KV.of(word, KV.of(uri, termFrequency)));
+              }
+            }
+          }));
+
+      // Compute a mapping from each word to its document frequency.
+      // A word's document frequency in a corpus is the number of
+      // documents in which the word appears divided by the total
+      // number of documents in the corpus. Note how the total number of
+      // documents is passed as a side input; the same value is
+      // presented to each invocation of the DoFn.
+      PCollection<KV<String, Double>> wordToDf = wordToDocCount
+          .apply(ParDo
+              .withSideInputs(totalDocuments)
+              .of(new DoFn<KV<String, Long>, KV<String, Double>>() {
+                @Override
+                public void processElement(ProcessContext c) {
+                  String word = c.element().getKey();
+                  Long documentCount = c.element().getValue();
+                  Long documentTotal = c.sideInput(totalDocuments);
+                  Double documentFrequency = documentCount.doubleValue()
+                      / documentTotal.doubleValue();
+
+                  c.output(KV.of(word, documentFrequency));
+                }
+              }));
+
+      // Join the term frequency and document frequency
+      // collections, each keyed on the word.
+      final TupleTag<KV<URI, Double>> tfTag = new TupleTag<KV<URI, Double>>();
+      final TupleTag<Double> dfTag = new TupleTag<Double>();
+      PCollection<KV<String, CoGbkResult>> wordToUriAndTfAndDf = KeyedPCollectionTuple
+          .of(tfTag, wordToUriAndTf)
+          .and(dfTag, wordToDf)
+          .apply(CoGroupByKey.<String>create());
+
+      // Compute a mapping from each word to a (URI, TF-IDF) score
+      // for each URI. There are a variety of definitions of TF-IDF
+      // ("term frequency - inverse document frequency") score;
+      // here we use a basic version which is the term frequency
+      // divided by the log of the document frequency.
+      PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf = wordToUriAndTfAndDf
+          .apply(ParDo.of(new DoFn<KV<String, CoGbkResult>, KV<String, KV<URI, Double>>>() {
+            @Override
+            public void processElement(ProcessContext c) {
+              String word = c.element().getKey();
+              Double df = c.element().getValue().getOnly(dfTag);
+
+              for (KV<URI, Double> uriAndTf : c.element().getValue().getAll(tfTag)) {
+                URI uri = uriAndTf.getKey();
+                Double tf = uriAndTf.getValue();
+                Double tfIdf = tf * Math.log(1 / df);
+                c.output(KV.of(word, KV.of(uri, tfIdf)));
+              }
+            }
+          }));
+
+      return wordToUriAndTfIdf;
+    }
+  }
+
+  /**
+   * A {@link PTransform} to write, in CSV format, a mapping from term and URI
+   * to score.
+   */
+  public static class WriteTfIdf
+      extends PTransform<PCollection<KV<String, KV<URI, Double>>>, PDone> {
+
+    private String output;
+
+    public WriteTfIdf(String output) {
+      this.output = output;
+    }
+
+    @Override
+    public PDone apply(PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf) {
+      return wordToUriAndTfIdf
+          .apply(ParDo.of(new DoFn<KV<String, KV<URI, Double>>, String>() {
+            @Override
+            public void processElement(ProcessContext c) {
+              c.output(String.format("%s,\t%s,\t%f",
+                  c.element().getKey(),
+                  c.element().getValue().getKey(),
+                  c.element().getValue().getValue()));
+            }
+          }))
+          .apply(TextIO.Write
+              .to(output)
+              .withSuffix(".csv"));
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    Pipeline pipeline = Pipeline.create(options);
+
+    pipeline
+        .apply(new ReadDocuments(listInputDocuments(options)))
+        .apply(new ComputeTfIdf())
+        .apply(new WriteTfIdf(options.getOutput()));
+
+    pipeline.run();
+  }
+}
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
new file mode 100644
index 0000000000000..baa520ea0447f
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
@@ -0,0 +1,208 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.TableRowJsonCoder;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SerializableComparator;
+import com.google.cloud.dataflow.sdk.transforms.Top;
+import com.google.cloud.dataflow.sdk.transforms.windowing.CalendarWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+import java.util.List;
+
+/**
+ * Pipeline that reads Wikipedia edit data from BigQuery and computes the user with
+ * the longest string of edits separated by no more than an hour within each month.
+ *
+ * <p> This pipeline demonstrates how the Windowing API can be used to perform
+ * various time-based aggregations of data.
+ *
+ * <p> To run this pipeline, the following options must be provided:
+ * <pre>{@code
+ *   --project=<PROJECT ID>
+ *   --output=gs://<OUTPUT PATH>
+ *   --stagingLocation=gs://<STAGING PATH>
+ *   --runner=(Blocking)DataflowPipelineRunner
+ * }</pre>
+ *
+ * <p> To run this example using Dataflow service, you must additionally
+ * provide either {@literal --stagingLocation} or {@literal --tempLocation}, and
+ * select one of the Dataflow pipeline runners, eg
+ * {@literal --runner=BlockingDataflowPipelineRunner}.
+ */
+public class TopWikipediaSessions {
+  private static final String EXPORTED_WIKI_TABLE = "gs://dataflow-samples/wikipedia_edits/*.json";
+
+  /**
+   * Extracts user and timestamp from a TableRow representing a Wikipedia edit
+   */
+  static class ExtractUserAndTimestamp extends DoFn<TableRow, String> {
+    @Override
+    public void processElement(ProcessContext c) {
+      TableRow row = c.element();
+      int timestamp = (Integer) row.get("timestamp");
+      String userName = (String) row.get("contributor_username");
+      if (userName != null) {
+        // Sets the implicit timestamp field to be used in windowing.
+        c.outputWithTimestamp(userName, new Instant(timestamp * 1000L));
+      }
+    }
+  }
+
+  /**
+   * Computes the number of edits in each user session.  A session is defined as
+   * a string of edits where each is separated from the next by less than an hour.
+   */
+  static class ComputeSessions
+      extends PTransform<PCollection<String>, PCollection<KV<String, Long>>> {
+    @Override
+    public PCollection<KV<String, Long>> apply(PCollection<String> actions) {
+      return actions
+          .apply(Window.<String>into(Sessions.withGapDuration(Duration.standardHours(1))))
+
+          .apply(Count.<String>perElement());
+    }
+  }
+
+  /**
+   * Computes the longest session ending in each month.
+   */
+  private static class TopPerMonth
+      extends PTransform<PCollection<KV<String, Long>>, PCollection<List<KV<String, Long>>>> {
+    @Override
+    public PCollection<List<KV<String, Long>>> apply(PCollection<KV<String, Long>> sessions) {
+      return sessions
+        .apply(Window.<KV<String, Long>>into(CalendarWindows.months(1)))
+
+          .apply(Top.of(1, new SerializableComparator<KV<String, Long>>() {
+                    @Override
+                    public int compare(KV<String, Long> o1, KV<String, Long> o2) {
+                      return Long.compare(o1.getValue(), o2.getValue());
+                    }
+                  }));
+    }
+  }
+
+  static class ComputeTopSessions extends PTransform<PCollection<TableRow>, PCollection<String>> {
+    private final double samplingThreshold;
+
+    public ComputeTopSessions(double samplingThreshold) {
+      this.samplingThreshold = samplingThreshold;
+    }
+
+    @Override
+    public PCollection<String> apply(PCollection<TableRow> input) {
+      return input
+          .apply(ParDo.of(new ExtractUserAndTimestamp()))
+
+          .apply(ParDo.named("SampleUsers").of(
+              new DoFn<String, String>() {
+                @Override
+                public void processElement(ProcessContext c) {
+                  if (Math.abs(c.element().hashCode()) <= Integer.MAX_VALUE * samplingThreshold) {
+                    c.output(c.element());
+                  }
+                }
+              }))
+
+          .apply(new ComputeSessions())
+
+          .apply(ParDo.named("SessionsToStrings").of(
+              new DoFn<KV<String, Long>, KV<String, Long>>() {
+                @Override
+                public void processElement(ProcessContext c) {
+                  c.output(KV.of(
+                      c.element().getKey() + " : "
+                      + c.windows().iterator().next(), c.element().getValue()));
+                }
+              }))
+
+          .apply(new TopPerMonth())
+
+          .apply(ParDo.named("FormatOutput").of(
+              new DoFn<List<KV<String, Long>>, String>() {
+                @Override
+                public void processElement(ProcessContext c) {
+                  for (KV<String, Long> item : c.element()) {
+                    String session = item.getKey();
+                    long count = item.getValue();
+                    c.output(
+                        session + " : " + count + " : "
+                        + ((IntervalWindow) c.windows().iterator().next()).start());
+                  }
+                }
+              }));
+    }
+  }
+
+  /**
+   * Options supported by this class.
+   *
+   * <p> Inherits standard Dataflow configuration options.
+   */
+  private static interface Options extends PipelineOptions {
+    @Description(
+      "Input specified as a GCS path containing a BigQuery table exported as json")
+    @Default.String(EXPORTED_WIKI_TABLE)
+    String getInput();
+    void setInput(String value);
+
+    @Description("File to output results to")
+    @Validation.Required
+    String getOutput();
+    void setOutput(String value);
+  }
+
+  public static void main(String[] args) {
+    Options options = PipelineOptionsFactory.fromArgs(args)
+        .withValidation()
+        .as(Options.class);
+    DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
+
+    Pipeline p = Pipeline.create(dataflowOptions);
+
+    double samplingThreshold = 0.1;
+
+    p.apply(TextIO.Read
+        .from(options.getInput())
+        .withCoder(TableRowJsonCoder.of()))
+     .apply(new ComputeTopSessions(samplingThreshold))
+     .apply(TextIO.Write.named("Write").withoutSharding().to(options.getOutput()));
+
+    p.run();
+  }
+}
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
new file mode 100644
index 0000000000000..96893b909bc7a
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
@@ -0,0 +1,174 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+/**
+ * An example that counts words in Shakespeare. For a detailed walkthrough of this
+ * example see:
+ *   https://developers.google.com/cloud-dataflow/java-sdk/wordcount-example
+ *
+ * To execute this pipeline locally, specify general pipeline configuration:
+ *   --project=<PROJECT ID>
+ * and example configuration:
+ *   --output=[<LOCAL FILE> | gs://<OUTPUT PATH>]
+ *
+ * To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ *   --project=<PROJECT ID> --stagingLocation=gs://<STAGING DIRECTORY>
+ *   --runner=BlockingDataflowPipelineRunner
+ * and example configuration:
+ *   --output=gs://<OUTPUT PATH>
+ *
+ * The input file defaults to gs://dataflow-samples/shakespeare/kinglear.txt and can be
+ * overridden with --input.
+ */
+public class WordCount {
+
+  /** A DoFn that tokenizes lines of text into individual words. */
+  static class ExtractWordsFn extends DoFn<String, String> {
+    private Aggregator<Long> emptyLines;
+
+    @Override
+    public void startBundle(Context c) {
+      emptyLines = c.createAggregator("emptyLines", new Sum.SumLongFn());
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      // Split the line into words.
+      String[] words = c.element().split("[^a-zA-Z']+");
+
+      // Keep track of the number of lines without any words encountered while tokenizing.
+      // This aggregator is visible in the monitoring UI when run using DataflowPipelineRunner.
+      if (words.length == 0) {
+        emptyLines.addValue(1L);
+      }
+
+      // Output each word encountered into the output PCollection.
+      for (String word : words) {
+        if (!word.isEmpty()) {
+          c.output(word);
+        }
+      }
+    }
+  }
+
+  /** A DoFn that converts a Word and Count into a printable string. */
+  static class FormatCountsFn extends DoFn<KV<String, Long>, String> {
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(c.element().getKey() + ": " + c.element().getValue());
+    }
+  }
+
+  /**
+   * A PTransform that converts a PCollection containing lines of text into a PCollection of
+   * formatted word counts.
+   * <p>
+   * Although this pipeline fragment could be inlined, bundling it as a PTransform allows for easy
+   * reuse, modular testing, and an improved monitoring experience.
+   */
+  public static class CountWords extends PTransform<PCollection<String>, PCollection<String>> {
+    @Override
+    public PCollection<String> apply(PCollection<String> lines) {
+
+      // Convert lines of text into individual words.
+      PCollection<String> words = lines.apply(
+          ParDo.of(new ExtractWordsFn()));
+
+      // Count the number of times each word occurs.
+      PCollection<KV<String, Long>> wordCounts =
+          words.apply(Count.<String>perElement());
+
+      // Format each word and count into a printable string.
+      PCollection<String> results = wordCounts.apply(
+          ParDo.of(new FormatCountsFn()));
+
+      return results;
+    }
+  }
+
+  /**
+   * Options supported by {@link WordCount}.
+   * <p>
+   * Inherits standard configuration options.
+   */
+  public static interface Options extends PipelineOptions {
+    @Description("Path of the file to read from")
+    @Default.String("gs://dataflow-samples/shakespeare/kinglear.txt")
+    String getInput();
+    void setInput(String value);
+
+    @Description("Path of the file to write to")
+    @Default.InstanceFactory(OutputFactory.class)
+    String getOutput();
+    void setOutput(String value);
+
+    /** Returns gs://${STAGING_LOCATION}/"counts.txt" */
+    public static class OutputFactory implements DefaultValueFactory<String> {
+      @Override
+      public String create(PipelineOptions options) {
+        DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
+        if (dataflowOptions.getStagingLocation() != null) {
+          return GcsPath.fromUri(dataflowOptions.getStagingLocation())
+              .resolve("counts.txt").toString();
+        } else {
+          throw new IllegalArgumentException("Must specify --output or --stagingLocation");
+        }
+      }
+    }
+
+     /**
+     * By default (numShards == 0), the system will choose the shard count.
+     * Most programs will not need this option.
+     */
+    @Description("Number of output shards (0 if the system should choose automatically)")
+    int getNumShards();
+    void setNumShards(int value);
+  }
+
+  public static void main(String[] args) {
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    Pipeline p = Pipeline.create(options);
+
+    p.apply(TextIO.Read.named("ReadLines").from(options.getInput()))
+     .apply(new CountWords())
+     .apply(TextIO.Write.named("WriteCounts")
+         .to(options.getOutput())
+         .withNumShards(options.getNumShards()));
+
+    p.run();
+  }
+}
+
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/BigQueryTornadoesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/BigQueryTornadoesTest.java
new file mode 100644
index 0000000000000..6dafef7036481
--- /dev/null
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/BigQueryTornadoesTest.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.examples.BigQueryTornadoes.ExtractTornadoesFn;
+import com.google.cloud.dataflow.examples.BigQueryTornadoes.FormatCountsFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.List;
+
+/**
+ * Test case for {@link BigQueryTornadoes}.
+ */
+@RunWith(JUnit4.class)
+public class BigQueryTornadoesTest {
+
+  @Test
+  public void testExtractTornadoes() throws Exception {
+    TableRow row = new TableRow()
+          .set("month", "6")
+          .set("tornado", true);
+    DoFnTester<TableRow, Integer> extractWordsFn =
+        DoFnTester.of(new ExtractTornadoesFn());
+    Assert.assertThat(extractWordsFn.processBatch(row),
+                      CoreMatchers.hasItems(6));
+  }
+
+  @Test
+  public void testNoTornadoes() throws Exception {
+    TableRow row = new TableRow()
+          .set("month", 6)
+          .set("tornado", false);
+    DoFnTester<TableRow, Integer> extractWordsFn =
+        DoFnTester.of(new ExtractTornadoesFn());
+    Assert.assertTrue(extractWordsFn.processBatch(row).isEmpty());
+  }
+
+  @Test
+  @SuppressWarnings({"rawtypes", "unchecked"})
+  public void testFormatCounts() throws Exception {
+    DoFnTester<KV<Integer, Long>, TableRow> formatCountsFn =
+        DoFnTester.of(new FormatCountsFn());
+    KV empty[] = {};
+    List<TableRow> results = formatCountsFn.processBatch(empty);
+    Assert.assertTrue(results.size() == 0);
+    KV input[] = { KV.of(3, 0L),
+                   KV.of(4, Long.MAX_VALUE),
+                   KV.of(5, Long.MIN_VALUE) };
+    results = formatCountsFn.processBatch(input);
+    Assert.assertEquals(results.size(), 3);
+    Assert.assertEquals(results.get(0).get("month"), 3);
+    Assert.assertEquals(results.get(0).get("tornado_count"), 0L);
+    Assert.assertEquals(results.get(1).get("month"), 4);
+    Assert.assertEquals(results.get(1).get("tornado_count"), Long.MAX_VALUE);
+    Assert.assertEquals(results.get(2).get("month"), 5);
+    Assert.assertEquals(results.get(2).get("tornado_count"), Long.MIN_VALUE);
+  }
+}
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java
new file mode 100644
index 0000000000000..341fd80c25b2b
--- /dev/null
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.Keys;
+import com.google.cloud.dataflow.sdk.transforms.RemoveDuplicates;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.net.URI;
+import java.util.Arrays;
+
+/**
+ * Tests of TfIdf
+ */
+@RunWith(JUnit4.class)
+public class TfIdfTest {
+
+  /** Test that the example runs */
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testTfIdf() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf = pipeline
+        .apply(Create.of(
+            KV.of(new URI("x"), "a b c d"),
+            KV.of(new URI("y"), "a b c"),
+            KV.of(new URI("z"), "a m n")))
+        .apply(new TfIdf.ComputeTfIdf());
+
+    PCollection<String> words = wordToUriAndTfIdf
+        .apply(Keys.<String>create())
+        .apply(RemoveDuplicates.<String>create());
+
+    DataflowAssert.that(words).containsInAnyOrder(Arrays.asList("a", "m", "n", "b", "c", "d"));
+
+    pipeline.run();
+  }
+}
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/TopWikipediaSessionsTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/TopWikipediaSessionsTest.java
new file mode 100644
index 0000000000000..ce43ae9930a4b
--- /dev/null
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/TopWikipediaSessionsTest.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+
+/** Unit tests for {@link TopWikipediaSessions}. */
+@RunWith(JUnit4.class)
+public class TopWikipediaSessionsTest {
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testComputeTopUsers() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> output =
+        p.apply(Create.of(Arrays.asList(
+            new TableRow().set("timestamp", 0).set("contributor_username", "user1"),
+            new TableRow().set("timestamp", 1).set("contributor_username", "user1"),
+            new TableRow().set("timestamp", 2).set("contributor_username", "user1"),
+            new TableRow().set("timestamp", 0).set("contributor_username", "user2"),
+            new TableRow().set("timestamp", 1).set("contributor_username", "user2"),
+            new TableRow().set("timestamp", 3601).set("contributor_username", "user2"),
+            new TableRow().set("timestamp", 3602).set("contributor_username", "user2"),
+            new TableRow().set("timestamp", 35 * 24 * 3600).set("contributor_username", "user3"))))
+        .apply(new TopWikipediaSessions.ComputeTopSessions(1.0));
+
+    DataflowAssert.that(output).containsInAnyOrder(Arrays.asList(
+        "user1 : [1970-01-01T00:00:00.000Z..1970-01-01T01:00:02.000Z)"
+        + " : 3 : 1970-01-01T00:00:00.000Z",
+        "user3 : [1970-02-05T00:00:00.000Z..1970-02-05T01:00:00.000Z)"
+        + " : 1 : 1970-02-01T00:00:00.000Z"));
+
+    p.run();
+  }
+}
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
new file mode 100644
index 0000000000000..36efec738ddc5
--- /dev/null
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.cloud.dataflow.examples.WordCount.CountWords;
+import com.google.cloud.dataflow.examples.WordCount.ExtractWordsFn;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests of WordCount.
+ */
+@RunWith(JUnit4.class)
+public class WordCountTest {
+
+  /** Example test that tests a specific DoFn. */
+  @Test
+  public void testExtractWordsFn() {
+    DoFnTester<String, String> extractWordsFn =
+        DoFnTester.of(new ExtractWordsFn());
+
+    Assert.assertThat(extractWordsFn.processBatch(" some  input  words "),
+                      CoreMatchers.hasItems("some", "input", "words"));
+    Assert.assertThat(extractWordsFn.processBatch(" "),
+                      CoreMatchers.<String>hasItems());
+    Assert.assertThat(extractWordsFn.processBatch(" some ", " input", " words"),
+                      CoreMatchers.hasItems("some", "input", "words"));
+  }
+
+  static final String[] WORDS_ARRAY = new String[] {
+    "hi there", "hi", "hi sue bob",
+    "hi sue", "", "bob hi"};
+
+  static final List<String> WORDS = Arrays.asList(WORDS_ARRAY);
+
+  static final String[] COUNTS_ARRAY = new String[] {
+      "hi: 5", "there: 1", "sue: 2", "bob: 2"};
+
+  /** Example test that tests a PTransform by using an in-memory input and inspecting the output. */
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testCountWords() throws Exception {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of(WORDS)).setCoder(StringUtf8Coder.of());
+
+    PCollection<String> output = input.apply(new CountWords());
+
+    DataflowAssert.that(output).containsInAnyOrder(COUNTS_ARRAY);
+    p.run();
+  }
+}
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000000000..fd5b04376e43e
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,202 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  ~ Copyright (C) 2014 Google Inc.
+  ~
+  ~ Licensed under the Apache License, Version 2.0 (the "License"); you may not
+  ~ use this file except in compliance with the License. You may obtain a copy of
+  ~ the License at
+  ~
+  ~ http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+  ~ License for the specific language governing permissions and limitations under
+  ~ the License.
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>com.google</groupId>
+    <artifactId>google</artifactId>
+    <version>5</version>
+  </parent>
+
+  <groupId>com.google.cloud.dataflow</groupId>
+  <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
+  <name>Google Cloud Dataflow Java SDK - Parent</name>
+  <description>Google Cloud Dataflow Java SDK provides a simple, Java-based
+    interface for processing virtually any size data using Google cloud
+    resources. This artifact includes the parent POM for other Dataflow
+    artifacts.</description>
+  <url>http://cloud.google.com/dataflow</url>
+  <inceptionYear>2013</inceptionYear>
+
+  <version>manual_build</version>
+
+  <licenses>
+    <license>
+      <name>Apache License, Version 2.0</name>
+      <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+      <distribution>repo</distribution>
+    </license>
+  </licenses>
+
+  <developers>
+    <developer>
+      <organization>Google Inc.</organization>
+      <organizationUrl>http://www.google.com</organizationUrl>
+    </developer>
+  </developers>
+
+  <scm>
+    <connection>scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git</connection>
+    <developerConnection>scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git</developerConnection>
+    <url>git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git</url>
+  </scm>
+
+  <prerequisites>
+    <maven>3.0.3</maven>
+  </prerequisites>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+  </properties>
+
+  <packaging>pom</packaging>
+  <modules>
+    <module>sdk</module>
+    <module>examples</module>
+  </modules>
+
+  <build>
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <artifactId>maven-compiler-plugin</artifactId>
+          <version>3.1</version>
+          <configuration>
+            <source>1.7</source>
+            <target>1.7</target>
+            <compilerArgument>-Xlint:all</compilerArgument>
+            <showWarnings>true</showWarnings>
+            <showDeprecation>true</showDeprecation>
+          </configuration>
+        </plugin>
+
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-jar-plugin</artifactId>
+          <version>2.5</version>
+        </plugin>
+
+        <plugin>
+          <groupId>org.codehaus.mojo</groupId>
+          <artifactId>versions-maven-plugin</artifactId>
+          <version>2.1</version>
+        </plugin>
+
+        <plugin>
+          <groupId>org.codehaus.mojo</groupId>
+          <artifactId>exec-maven-plugin</artifactId>
+          <version>1.1</version>
+          <executions>
+            <execution>
+              <phase>verify</phase>
+              <goals>
+                <goal>java</goal>
+              </goals>
+            </execution>
+          </executions>
+          <configuration>
+            <systemProperties>
+              <systemProperty>
+                <key>java.util.logging.config.file</key>
+                <value>logging.properties</value>
+              </systemProperty>
+            </systemProperties>
+          </configuration>
+        </plugin>
+
+        <plugin>
+          <groupId>org.apache.felix</groupId>
+          <artifactId>maven-bundle-plugin</artifactId>
+          <version>2.4.0</version>
+        </plugin>
+
+        <!-- Coverage analysis for tests -->
+        <plugin>
+          <groupId>org.jacoco</groupId>
+          <artifactId>jacoco-maven-plugin</artifactId>
+          <version>0.7.1.201405082137</version>
+          <executions>
+            <execution>
+              <goals>
+                <goal>prepare-agent</goal>
+              </goals>
+              <configuration>
+                <output>file</output>
+                <dumpOnExit>true</dumpOnExit>
+              </configuration>
+            </execution>
+            <execution>
+              <id>report</id>
+              <phase>prepare-package</phase>
+              <goals>
+                <goal>report</goal>
+              </goals>
+            </execution>
+          </executions>
+        </plugin>
+
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-surefire-plugin</artifactId>
+          <version>2.15</version>
+          <configuration>
+            <parallel>${testParallelValue}</parallel>
+            <threadCount>4</threadCount>
+            <additionalClasspathElements>
+              <additionalClasspathElement>${project.build.directory}/${project.artifactId}-${project.version}.jar</additionalClasspathElement>
+              <additionalClasspathElement>${project.build.directory}/${project.artifactId}-${project.version}-tests.jar</additionalClasspathElement>
+            </additionalClasspathElements>
+            <groups>${testGroups}</groups>
+            <systemPropertyVariables>
+              <runIntegrationTestOnService>${runIntegrationTestOnService}</runIntegrationTestOnService>
+              <projectName>${dataflowProjectName}</projectName>
+            </systemPropertyVariables>
+            <useManifestOnlyJar>false</useManifestOnlyJar>
+          </configuration>
+          <dependencies>
+            <dependency>
+              <groupId>org.apache.maven.surefire</groupId>
+              <artifactId>surefire-junit47</artifactId>
+              <version>2.7.2</version>
+            </dependency>
+          </dependencies>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+  </build>
+
+  <reporting>
+    <plugins>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>versions-maven-plugin</artifactId>
+        <version>2.1</version>
+        <reportSets>
+          <reportSet>
+            <reports>
+              <report>dependency-updates-report</report>
+              <report>plugin-updates-report</report>
+            </reports>
+          </reportSet>
+        </reportSets>
+      </plugin>
+    </plugins>
+  </reporting>
+</project>
diff --git a/sdk/pom.xml b/sdk/pom.xml
new file mode 100644
index 0000000000000..93a8f277a8370
--- /dev/null
+++ b/sdk/pom.xml
@@ -0,0 +1,315 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  ~ Copyright (C) 2014 Google Inc.
+  ~
+  ~ Licensed under the Apache License, Version 2.0 (the "License"); you may not
+  ~ use this file except in compliance with the License. You may obtain a copy of
+  ~ the License at
+  ~
+  ~ http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+  ~ License for the specific language governing permissions and limitations under
+  ~ the License.
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>com.google.cloud.dataflow</groupId>
+    <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
+    <version>manual_build</version>
+  </parent>
+
+  <groupId>com.google.cloud.dataflow</groupId>
+  <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
+  <name>Google Cloud Dataflow Java SDK - All</name>
+  <description>Google Cloud Dataflow Java SDK provides a simple, Java-based
+    interface for processing virtually any size data using Google cloud
+    resources. This artifact includes entire Dataflow Java SDK.</description>
+  <url>http://cloud.google.com/dataflow</url>
+
+  <version>manual_build</version>
+
+  <packaging>jar</packaging>
+
+  <properties>
+    <timestamp>${maven.build.timestamp}</timestamp>
+    <maven.build.timestamp.format>yyyy-MM-dd HH:mm</maven.build.timestamp.format>
+    <dataflow>com.google.cloud.dataflow</dataflow>
+    <runIntegrationTestOnService>false</runIntegrationTestOnService>
+    <testParallelValue>none</testParallelValue>
+    <testGroups></testGroups>
+    <dataflowProjectName></dataflowProjectName>
+  </properties>
+
+  <profiles>
+    <profile>
+      <id>DataflowPipelineTests</id>
+      <properties>
+        <runIntegrationTestOnService>true</runIntegrationTestOnService>
+        <testGroups>com.google.cloud.dataflow.sdk.testing.RunnableOnService</testGroups>
+        <testParallelValue>both</testParallelValue>
+      </properties>
+    </profile>
+  </profiles>
+
+  <build>
+    <resources>
+      <resource>
+        <directory>src/main/resources</directory>
+        <filtering>true</filtering>
+      </resource>
+    </resources>
+
+    <plugins>
+      <plugin>
+        <artifactId>maven-compiler-plugin</artifactId>
+      </plugin>
+
+      <!-- Run CheckStyle pass on transforms, as they are release in
+           source form. -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+        <version>2.12</version>
+        <configuration>
+          <configLocation>../checkstyle.xml</configLocation>
+          <consoleOutput>true</consoleOutput>
+          <failOnViolation>true</failOnViolation>
+          <includeResources>false</includeResources>
+          <includeTestSourceDirectory>true</includeTestSourceDirectory>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>check</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+          <executions>
+            <execution>
+              <id>dataflow-sdk-compile</id>
+              <phase>compile</phase>
+              <goals>
+                <goal>jar</goal>
+              </goals>
+            </execution>
+            <execution>
+              <id>dataflow-sdk-test-compile</id>
+              <phase>test-compile</phase>
+              <goals>
+                <goal>test-jar</goal>
+              </goals>
+            </execution>
+        </executions>
+      </plugin>
+
+      <!-- Source plugin for generating source and test-source JARs. -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-source-plugin</artifactId>
+        <version>2.4</version>
+        <executions>
+          <execution>
+            <id>attach-sources</id>
+            <phase>compile</phase>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>attach-test-sources</id>
+            <phase>test-compile</phase>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-bundle-plugin</artifactId>
+        <extensions>true</extensions>
+        <configuration>
+          <finalName>${project.artifactId}-bundled-${project.version}</finalName>
+          <instructions>
+            <Export-Package>
+              !${dataflow}.sdk.runners.worker.*,
+              !${dataflow}.sdk.streaming.*,
+              !${dataflow}.sdk.util.gcsio,
+              ${dataflow}.*
+            </Export-Package>
+            <Embed-Transitive>true</Embed-Transitive>
+            <Embed-Dependency>*;scope=compile|runtime;inline=true</Embed-Dependency>
+          </instructions>
+        </configuration>
+      </plugin>
+
+      <!-- Coverage analysis for unit tests. -->
+      <plugin>
+        <groupId>org.jacoco</groupId>
+        <artifactId>jacoco-maven-plugin</artifactId>
+      </plugin>
+
+      <!-- Avro plugin for automatic code generation -->
+      <plugin>
+        <groupId>org.apache.avro</groupId>
+        <artifactId>avro-maven-plugin</artifactId>
+        <version>1.7.7</version>
+        <executions>
+          <execution>
+            <id>schemas</id>
+            <phase>generate-test-sources</phase>
+            <goals>
+              <goal>schema</goal>
+            </goals>
+            <configuration>
+              <testSourceDirectory>${project.basedir}/src/test/</testSourceDirectory>
+              <testOutputDirectory>${project.build.directory}/generated-test-sources/java</testOutputDirectory>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+  <dependencies>
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-dataflow</artifactId>
+      <version>v1beta3-rev1-1.19.0</version>
+      <exclusions>
+        <!-- Exclude an old version of guava which is being pulled
+             in by a transitive dependency google-api-client 1.19.0 -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-bigquery</artifactId>
+      <version>v2-rev167-1.19.0</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-compute</artifactId>
+      <version>v1-rev34-1.19.0</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-pubsub</artifactId>
+      <version>v1beta1-rev9-1.19.0</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-storage</artifactId>
+      <version>v1-rev11-1.19.0</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.http-client</groupId>
+      <artifactId>google-http-client-jackson2</artifactId>
+      <version>1.19.0</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.oauth-client</groupId>
+      <artifactId>google-oauth-client-java6</artifactId>
+      <version>1.19.0</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-datastore-protobuf</artifactId>
+      <version>v1beta2-rev1-2.1.0</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>18.0</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-core</artifactId>
+      <version>2.4.2</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-annotations</artifactId>
+      <version>2.4.2</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-databind</artifactId>
+      <version>2.4.2</version>
+    </dependency>
+
+    <!-- Add slf4j API frontend binding with JUL backend -->
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <version>1.7.7</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-jdk14</artifactId>
+      <version>1.7.7</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro</artifactId>
+      <version>1.7.7</version>
+    </dependency>
+
+    <dependency>
+      <groupId>joda-time</groupId>
+      <artifactId>joda-time</artifactId>
+      <version>2.4</version>
+    </dependency>
+
+    <!-- test dependencies -->
+    <dependency>
+      <groupId>org.hamcrest</groupId>
+      <artifactId>hamcrest-all</artifactId>
+      <version>1.3</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>4.11</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <version>1.9.5</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+</project>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
new file mode 100644
index 0000000000000..ec67fd7aabc3f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -0,0 +1,395 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk;
+
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.TransformHierarchy;
+import com.google.cloud.dataflow.sdk.runners.TransformTreeNode;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.UserCodeException;
+import com.google.cloud.dataflow.sdk.values.PBegin;
+import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.cloud.dataflow.sdk.values.POutput;
+import com.google.cloud.dataflow.sdk.values.PValue;
+import com.google.common.base.Preconditions;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * A Pipeline manages a DAG of PTransforms, and the PCollections
+ * that the PTransforms consume and produce.
+ *
+ * <p> After a {@code Pipeline} has been constructed, it can be executed,
+ * using a default or an explicit {@link PipelineRunner}.
+ *
+ * <p> Multiple {@code Pipeline}s can be constructed and executed independently
+ * and concurrently.
+ *
+ * <p> Each {@code Pipeline} is self-contained and isolated from any other
+ * {@code Pipeline}.  The {@link PValues} that are inputs and outputs of each of a
+ * {@code Pipeline}'s {@link PTransform}s are also owned by that {@code Pipeline}.
+ * A {@code PValue} owned by one {@code Pipeline} can be read only by {@code PTransform}s
+ * also owned by that {@code Pipeline}.
+ *
+ * <p> Here's a typical example of use:
+ * <pre> {@code
+ * // Start by defining the options for the pipeline.
+ * PipelineOptions options = PipelineOptionsFactory.create();
+ * // Then create the pipeline.
+ * Pipeline p = Pipeline.create(options);
+ *
+ * // A root PTransform, like TextIO.Read or Create, gets added
+ * // to the Pipeline by being applied:
+ * PCollection<String> lines =
+ *     p.apply(TextIO.Read.from("gs://bucket/dir/file*.txt"));
+ *
+ * // A Pipeline can have multiple root transforms:
+ * PCollection<String> moreLines =
+ *     p.apply(TextIO.Read.from("gs://bucket/other/dir/file*.txt"));
+ * PCollection<String> yetMoreLines =
+ *     p.apply(Create.of("yet", "more", "lines")).setCoder(StringUtf8Coder.of());
+ *
+ * // Further PTransforms can be applied, in an arbitrary (acyclic) graph.
+ * // Subsequent PTransforms (and intermediate PCollections etc.) are
+ * // implicitly part of the same Pipeline.
+ * PCollection<String> allLines =
+ *     PCollectionList.of(lines).and(moreLines).and(yetMoreLines)
+ *     .apply(new Flatten<String>());
+ * PCollection<KV<String, Integer>> wordCounts =
+ *     allLines
+ *     .apply(ParDo.of(new ExtractWords()))
+ *     .apply(new Count<String>());
+ * PCollection<String> formattedWordCounts =
+ *     wordCounts.apply(ParDo.of(new FormatCounts()));
+ * formattedWordCounts.apply(TextIO.Write.to("gs://bucket/dir/counts.txt"));
+ *
+ * // PTransforms aren't executed when they're applied, rather they're
+ * // just added to the Pipeline.  Once the whole Pipeline of PTransforms
+ * // is constructed, the Pipeline's PTransforms can be run using a
+ * // PipelineRunner.  The default PipelineRunner executes the Pipeline
+ * // directly, sequentially, in this one process, which is useful for
+ * // unit tests and simple experiments:
+ * p.run();
+ *
+ * } </pre>
+ */
+public class Pipeline {
+  private static final Logger LOG = LoggerFactory.getLogger(Pipeline.class);
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Public operations.
+
+  /**
+   * Constructs a pipeline from the provided options.
+   *
+   * @return The newly created pipeline.
+   */
+  public static Pipeline create(PipelineOptions options) {
+    Pipeline pipeline = new Pipeline(PipelineRunner.fromOptions(options), options);
+    LOG.debug("Creating {}", pipeline);
+    return pipeline;
+  }
+
+  /**
+   * Returns a {@link PBegin} owned by this Pipeline.  This is useful
+   * as the input of a root PTransform such as {@code TextIO.Read} or
+   * {@link com.google.cloud.dataflow.sdk.transforms.Create}.
+   */
+  public PBegin begin() {
+    return PBegin.in(this);
+  }
+
+  /**
+   * Starts using this pipeline with a root PTransform such as
+   * {@code TextIO.Read} or
+   * {@link com.google.cloud.dataflow.sdk.transforms.Create}.
+   *
+   * <P>
+   * Alias for {@code begin().apply(root)}.
+   */
+  public <Output extends POutput> Output apply(
+      PTransform<? super PBegin, Output> root) {
+    return begin().apply(root);
+  }
+
+  /**
+   * Runs the Pipeline.
+   */
+  public PipelineResult run() {
+    LOG.debug("Running {} via {}", this, runner);
+    try {
+      return runner.run(this);
+    } catch (UserCodeException e) {
+      // This serves to replace the stack with one that ends here and
+      // is caused by the caught UserCodeException, thereby splicing
+      // out all the stack frames in between the PipelineRunner itself
+      // and where the worker calls into the user's code.
+      throw new RuntimeException(e.getCause());
+    }
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Below here are operations that aren't normally called by users.
+
+  /**
+   * Returns the {@link CoderRegistry} that this Pipeline uses.
+   */
+  public CoderRegistry getCoderRegistry() {
+    if (coderRegistry == null) {
+      coderRegistry = new CoderRegistry();
+      coderRegistry.registerStandardCoders();
+    }
+    return coderRegistry;
+  }
+
+  /**
+   * Sets the {@link CoderRegistry} that this Pipeline uses.
+   */
+  public void setCoderRegistry(CoderRegistry coderRegistry) {
+    this.coderRegistry = coderRegistry;
+  }
+
+  /**
+   * A PipelineVisitor can be passed into
+   * {@link Pipeline#traverseTopologically} to be called for each of the
+   * transforms and values in the Pipeline.
+   */
+  public interface PipelineVisitor {
+    public void enterCompositeTransform(TransformTreeNode node);
+    public void leaveCompositeTransform(TransformTreeNode node);
+    public void visitTransform(TransformTreeNode node);
+    public void visitValue(PValue value, TransformTreeNode producer);
+  }
+
+  /**
+   * Invokes the PipelineVisitor's
+   * {@link PipelineVisitor#visitTransform} and
+   * {@link PipelineVisitor#visitValue} operations on each of this
+   * Pipeline's PTransforms and PValues, in forward
+   * topological order.
+   *
+   * <p> Traversal of the pipeline causes PTransform and PValue instances to
+   * be marked as finished, at which point they may no longer be modified.
+   *
+   * <p> Typically invoked by {@link PipelineRunner} subclasses.
+   */
+  public void traverseTopologically(PipelineVisitor visitor) {
+    Set<PValue> visitedValues = new HashSet<>();
+    // Visit all the transforms, which should implicitly visit all the values.
+    transforms.visit(visitor, visitedValues);
+    if (!visitedValues.containsAll(values)) {
+      throw new RuntimeException(
+          "internal error: should have visited all the values "
+          + "after visiting all the transforms");
+    }
+  }
+
+  /**
+   * Applies the given PTransform to the given Input,
+   * and returns its Output.
+   *
+   * <p> Called by PInput subclasses in their {@code apply} methods.
+   */
+  public static <Input extends PInput, Output extends POutput>
+  Output applyTransform(Input input,
+                        PTransform<? super Input, Output> transform) {
+    return input.getPipeline().applyInternal(input, transform);
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Below here are internal operations, never called by users.
+
+  private final PipelineRunner<?> runner;
+  private final PipelineOptions options;
+  private final TransformHierarchy transforms = new TransformHierarchy();
+  private Collection<PValue> values = new ArrayList<>();
+  private Set<String> usedFullNames = new HashSet<>();
+  private CoderRegistry coderRegistry;
+
+  @Deprecated
+  protected Pipeline(PipelineRunner<?> runner) {
+    this(runner, PipelineOptionsFactory.create());
+  }
+
+  protected Pipeline(PipelineRunner<?> runner, PipelineOptions options) {
+    this.runner = runner;
+    this.options = options;
+  }
+
+  @Override
+  public String toString() { return "Pipeline#" + hashCode(); }
+
+  /**
+   * Applies a transformation to the given input.
+   *
+   * @see Pipeline#apply
+   */
+  private <Input extends PInput, Output extends POutput>
+  Output applyInternal(Input input,
+      PTransform<? super Input, Output> transform) {
+    input.finishSpecifying();
+
+    TransformTreeNode parent = transforms.getCurrent();
+    String namePrefix = parent.getFullName();
+    String fullName = uniquifyInternal(namePrefix, transform.getName());
+    TransformTreeNode child = new TransformTreeNode(parent, transform, fullName, input);
+    parent.addComposite(child);
+
+    transforms.addInput(child, input);
+
+    transform.setPipeline(this);
+    LOG.debug("Adding {} to {}", transform, this);
+    try {
+      transforms.pushNode(child);
+      Output output = runner.apply(transform, input);
+      transforms.setOutput(child, output);
+
+      // recordAsOutput is a NOOP if already called;
+      output.recordAsOutput(this, child.getTransform());
+      verifyOutputState(output, child);
+      return output;
+    } finally {
+      transforms.popNode();
+    }
+  }
+
+  /**
+   * Returns all producing transforms for the {@link PValue}s contained
+   * in {@code output}.
+   */
+  private List<PTransform> getProducingTransforms(POutput output) {
+    List<PTransform> producingTransforms = new ArrayList<>();
+    for (PValue value : output.expand()) {
+      PTransform transform = value.getProducingTransformInternal();
+      if (transform != null) {
+        producingTransforms.add(transform);
+      }
+    }
+    return producingTransforms;
+  }
+
+  /**
+   * Verifies that the output of a PTransform is correctly defined.
+   *
+   * <p> A non-composite transform must have all
+   * of its outputs registered as produced by the transform.
+   */
+  private void verifyOutputState(POutput output, TransformTreeNode node) {
+    if (!node.isCompositeNode()) {
+      PTransform thisTransform = node.getTransform();
+      List<PTransform> producingTransforms = getProducingTransforms(output);
+      for (PTransform producingTransform : producingTransforms) {
+        if (thisTransform != producingTransform) {
+          throw new IllegalArgumentException("Output of non-composite transform "
+              + thisTransform + " is registered as being produced by"
+              + " a different transform: " + producingTransform);
+        }
+      }
+    }
+  }
+
+  /**
+   * Returns the configured pipeline runner.
+   */
+  public PipelineRunner<?> getRunner() {
+    return runner;
+  }
+
+  /**
+   * Returns the configured pipeline options.
+   */
+  public PipelineOptions getOptions() {
+    return options;
+  }
+
+  /**
+   * Returns the output associated with a transform.
+   *
+   * @throws IllegalStateException if the transform has not been applied to the pipeline.
+   */
+  public POutput getOutput(PTransform<?, ?> transform) {
+    TransformTreeNode node = transforms.getNode(transform);
+    Preconditions.checkState(node != null,
+                             "Unknown transform: " + transform);
+    return node.getOutput();
+  }
+
+  /**
+   * Returns the input associated with a transform.
+   *
+   * @throws IllegalStateException if the transform has not been applied to the pipeline.
+   */
+  public PInput getInput(PTransform<?, ?> transform) {
+    TransformTreeNode node = transforms.getNode(transform);
+    Preconditions.checkState(node != null,
+                             "Unknown transform: " + transform);
+    return node.getInput();
+  }
+
+  /**
+   * Returns the fully qualified name of a transform.
+   *
+   * @throws IllegalStateException if the transform has not been applied to the pipeline.
+   */
+  public String getFullName(PTransform<?, ?> transform) {
+    TransformTreeNode node = transforms.getNode(transform);
+    Preconditions.checkState(node != null,
+                             "Unknown transform: " + transform);
+    return node.getFullName();
+  }
+
+  /**
+   * Returns a unique name for a transform with the given prefix (from
+   * enclosing transforms) and initial name.
+   *
+   * <p> For internal use only.
+   */
+  private String uniquifyInternal(String namePrefix, String origName) {
+    String name = origName;
+    int suffixNum = 2;
+    while (true) {
+      String candidate = namePrefix.isEmpty() ? name : namePrefix + "/" + name;
+      if (usedFullNames.add(candidate)) {
+        return candidate;
+      }
+      // A duplicate!  Retry.
+      name = origName + suffixNum++;
+    }
+  }
+
+  /**
+   * Adds the given PValue to this Pipeline.
+   *
+   * <p> For internal use only.
+   */
+  public void addValueInternal(PValue value) {
+    this.values.add(value);
+    value.setPipelineInternal(this);
+    LOG.debug("Adding {} to {}", value, this);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java
new file mode 100644
index 0000000000000..7ab3845724f29
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk;
+
+/**
+ * Result of {@link com.google.cloud.dataflow.sdk.Pipeline#run()}.
+ */
+public interface PipelineResult {
+
+  // TODO: method to ask if pipeline is running / finished.
+  // TODO: method to retrieve error messages.
+
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
new file mode 100644
index 0000000000000..6d032371207f1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * An AtomicCoder is one that has no component Coders or other state.
+ * All instances of its class are equal.
+ *
+ * @param <T> the type of the values being transcoded
+ */
+public abstract class AtomicCoder<T> extends StandardCoder<T> {
+  protected AtomicCoder() {}
+
+  @Override
+  public List<Coder<?>> getCoderArguments() { return null; }
+
+  /**
+   * Returns a list of values contained in the provided example
+   * value, one per type parameter. If there are no type parameters,
+   * returns the empty list.
+   */
+  public static <T> List<Object> getInstanceComponents(T exampleValue) {
+    return Collections.emptyList();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
new file mode 100644
index 0000000000000..5ea631a970a79
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.BinaryDecoder;
+import org.apache.avro.io.BinaryEncoder;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.io.DecoderFactory;
+import org.apache.avro.io.EncoderFactory;
+import org.apache.avro.reflect.ReflectData;
+import org.apache.avro.reflect.ReflectDatumReader;
+import org.apache.avro.reflect.ReflectDatumWriter;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.List;
+
+/**
+ * An encoder using Avro binary format.
+ * <p>
+ * The Avro schema is generated using reflection on the element type, using
+ * Avro's <a href="http://avro.apache.org/docs/current/api/java/index.html">
+ * org.apache.avro.reflect.ReflectData</a>,
+ * and encoded as part of the {@code Coder} instance.
+ * <p>
+ * For complete details about schema generation and how it can be controlled please see
+ * the <a href="http://avro.apache.org/docs/current/api/java/index.html">
+ * org.apache.avro.reflect package</a>.
+ * Only concrete classes with a no-argument constructor can be mapped to Avro records.
+ * All inherited fields that are not static or transient are used. Fields are not permitted to be
+ * null unless annotated by
+ * <a href="http://avro.apache.org/docs/current/api/java/org/apache/avro/reflect/Nullable.html">
+ * org.apache.avro.reflect.Nullable</a> or a
+ * <http://avro.apache.org/docs/current/api/java/org/apache/avro/reflect/Union.html>
+ * org.apache.avro.reflect.Union</a> containing null.
+ * <p>
+ * To use, specify the {@code Coder} type on a PCollection:
+ * <pre>
+ * {@code
+ * PCollection<MyCustomElement> records =
+ *     input.apply(...)
+ *          .setCoder(AvroCoder.of(MyCustomElement.class);
+ * }
+ * </pre>
+ * <p>
+ * or annotate the element class using {@code @DefaultCoder}.
+ * <pre><code>
+ * {@literal @}DefaultCoder(AvroCoder.class)
+ * public class MyCustomElement {
+ *   ...
+ * }
+ * </code></pre>
+ *
+ * @param <T> the type of elements handled by this coder
+ */
+public class AvroCoder<T> extends StandardCoder<T> {
+
+  /**
+   * Returns an {@code AvroCoder} instance for the provided element type.
+   * @param <T> the element type
+   */
+  public static <T> AvroCoder<T> of(Class<T> type) {
+    return new AvroCoder<>(type, ReflectData.get().getSchema(type));
+  }
+
+  /**
+   * Returns an {@code AvroCoder} instance for the Avro schema. The implicit
+   * type is GenericRecord.
+   */
+  public static AvroCoder<GenericRecord> of(Schema schema) {
+    return new AvroCoder<>(GenericRecord.class, schema);
+  }
+
+  /**
+   * Returns an {@code AvroCoder} instance for the provided element type
+   * using the provided Avro schema.
+   *
+   * <p> If the type argument is GenericRecord, the schema may be arbitrary.
+   * Otherwise, the schema must correspond to the type provided.
+   *
+   * @param <T> the element type
+   */
+  public static <T> AvroCoder<T> of(Class<T> type, Schema schema) {
+    return new AvroCoder<>(type, schema);
+  }
+
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  @JsonCreator
+  public static AvroCoder<?> of(
+      @JsonProperty("type") String classType,
+      @JsonProperty("schema") String schema) throws ClassNotFoundException {
+    Schema.Parser parser = new Schema.Parser();
+    return new AvroCoder(Class.forName(classType), parser.parse(schema));
+  }
+
+  private final Class<T> type;
+  private final Schema schema;
+  private final DatumWriter<T> writer;
+  private final DatumReader<T> reader;
+  private final EncoderFactory encoderFactory = new EncoderFactory();
+  private final DecoderFactory decoderFactory = new DecoderFactory();
+
+  protected AvroCoder(Class<T> type, Schema schema) {
+    this.type = type;
+    this.schema = schema;
+    this.reader = createDatumReader();
+    this.writer = createDatumWriter();
+  }
+
+  @Override
+  public void encode(T value, OutputStream outStream, Context context)
+      throws IOException {
+    BinaryEncoder encoder = encoderFactory.directBinaryEncoder(outStream, null);
+    writer.write(value, encoder);
+    encoder.flush();
+  }
+
+  @Override
+  public T decode(InputStream inStream, Context context) throws IOException {
+    BinaryDecoder decoder = decoderFactory.directBinaryDecoder(inStream, null);
+    return reader.read(null, decoder);
+  }
+
+  @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+    return null;
+  }
+
+  @Override
+  public CloudObject asCloudObject() {
+    CloudObject result = super.asCloudObject();
+    addString(result, "type", type.getName());
+    addString(result, "schema", schema.toString());
+    return result;
+  }
+
+  /**
+   * Depends upon the structure being serialized.
+   */
+  @Override
+  public boolean isDeterministic() {
+    return false;
+  }
+
+  /**
+   * Returns a new DatumReader that can be used to read from
+   * an Avro file directly.
+   */
+  public DatumReader<T> createDatumReader() {
+    if (type.equals(GenericRecord.class)) {
+      return new GenericDatumReader<>(schema);
+    } else {
+      return new ReflectDatumReader<>(schema);
+    }
+  }
+
+  /**
+   * Returns a new DatumWriter that can be used to write to
+   * an Avro file directly.
+   */
+  public DatumWriter<T> createDatumWriter() {
+    if (type.equals(GenericRecord.class)) {
+      return new GenericDatumWriter<>(schema);
+    } else {
+      return new ReflectDatumWriter<>(schema);
+    }
+  }
+
+  /**
+   * Returns the schema used by this coder.
+   */
+  public Schema getSchema() {
+    return schema;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
new file mode 100644
index 0000000000000..6af2d6f5ac4ea
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UTFDataFormatException;
+
+/**
+ * A BigEndianIntegerCoder encodes Integers in 4 bytes, big-endian.
+ */
+public class BigEndianIntegerCoder extends AtomicCoder<Integer> {
+  @JsonCreator
+  public static BigEndianIntegerCoder of() {
+    return INSTANCE;
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private static final BigEndianIntegerCoder INSTANCE =
+      new BigEndianIntegerCoder();
+
+  private BigEndianIntegerCoder() {}
+
+  @Override
+  public void encode(Integer value, OutputStream outStream, Context context)
+      throws IOException, CoderException {
+    if (value == null) {
+      throw new CoderException("cannot encode a null Integer");
+    }
+    new DataOutputStream(outStream).writeInt(value);
+  }
+
+  @Override
+  public Integer decode(InputStream inStream, Context context)
+      throws IOException, CoderException {
+    try {
+      return new DataInputStream(inStream).readInt();
+    } catch (EOFException | UTFDataFormatException exn) {
+      // These exceptions correspond to decoding problems, so change
+      // what kind of exception they're branded as.
+      throw new CoderException(exn);
+    }
+  }
+
+  @Override
+  public boolean isDeterministic() {
+    return true;
+  }
+
+  /**
+   * Returns true since registerByteSizeObserver() runs in constant time.
+   */
+  @Override
+  public boolean isRegisterByteSizeObserverCheap(Integer value, Context context) {
+    return true;
+  }
+
+  @Override
+  protected long getEncodedElementByteSize(Integer value, Context context)
+      throws Exception {
+    if (value == null) {
+      throw new CoderException("cannot encode a null Integer");
+    }
+    return 4;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
new file mode 100644
index 0000000000000..43ee9cab34be5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UTFDataFormatException;
+
+/**
+ * A BigEndianLongCoder encodes Longs in 8 bytes, big-endian.
+ */
+public class BigEndianLongCoder extends AtomicCoder<Long> {
+  @JsonCreator
+  public static BigEndianLongCoder of() {
+    return INSTANCE;
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private static final BigEndianLongCoder INSTANCE = new BigEndianLongCoder();
+
+  private BigEndianLongCoder() {}
+
+  @Override
+  public void encode(Long value, OutputStream outStream, Context context)
+      throws IOException, CoderException {
+    if (value == null) {
+      throw new CoderException("cannot encode a null Long");
+    }
+    new DataOutputStream(outStream).writeLong(value);
+  }
+
+  @Override
+  public Long decode(InputStream inStream, Context context)
+      throws IOException, CoderException {
+    try {
+      return new DataInputStream(inStream).readLong();
+    } catch (EOFException | UTFDataFormatException exn) {
+      // These exceptions correspond to decoding problems, so change
+      // what kind of exception they're branded as.
+      throw new CoderException(exn);
+    }
+  }
+
+  @Override
+  public boolean isDeterministic() {
+    return true;
+  }
+
+  /**
+   * Returns true since registerByteSizeObserver() runs in constant time.
+   */
+  @Override
+  public boolean isRegisterByteSizeObserverCheap(Long value, Context context) {
+    return true;
+  }
+
+  @Override
+  protected long getEncodedElementByteSize(Long value, Context context)
+      throws Exception {
+    if (value == null) {
+      throw new CoderException("cannot encode a null Long");
+    }
+    return 8;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
new file mode 100644
index 0000000000000..c750d932dd066
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.util.VarInt;
+import com.google.common.io.ByteStreams;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+/**
+ * A ByteArrayCoder encodes byte[] objects.
+ *
+ * If in a nested context, prefixes the encoded array with a VarInt encoding
+ * of the length.
+ */
+public class ByteArrayCoder extends AtomicCoder<byte[]> {
+  @JsonCreator
+  public static ByteArrayCoder of() {
+    return INSTANCE;
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private static final ByteArrayCoder INSTANCE = new ByteArrayCoder();
+
+  private ByteArrayCoder() {}
+
+  @Override
+  public void encode(byte[] value, OutputStream outStream, Context context)
+      throws IOException, CoderException {
+    if (value == null) {
+      throw new CoderException("cannot encode a null byte[]");
+    }
+    if (!context.isWholeStream) {
+      VarInt.encode(value.length, outStream);
+    }
+    outStream.write(value);
+  }
+
+  @Override
+  public byte[] decode(InputStream inStream, Context context)
+      throws IOException, CoderException {
+    if (context.isWholeStream) {
+      ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+      ByteStreams.copy(inStream, outStream);
+      return outStream.toByteArray();
+    } else {
+      int length = VarInt.decodeInt(inStream);
+      if (length < 0) {
+        throw new IOException("invalid length " + length);
+      }
+      byte[] value = new byte[length];
+      ByteStreams.readFully(inStream, value);
+      return value;
+    }
+  }
+
+  @Override
+  public boolean isDeterministic() {
+    return true;
+  }
+
+  /**
+   * Returns true since registerByteSizeObserver() runs in constant time.
+   */
+  @Override
+  public boolean isRegisterByteSizeObserverCheap(byte[] value, Context context) {
+    return true;
+  }
+
+  @Override
+  protected long getEncodedElementByteSize(byte[] value, Context context)
+      throws Exception {
+    if (value == null) {
+      throw new CoderException("cannot encode a null byte[]");
+    }
+    long size = 0;
+    if (!context.isWholeStream) {
+      size += VarInt.getLength(value.length);
+    }
+    return size + value.length;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
new file mode 100644
index 0000000000000..3760cb82003bb
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.Serializable;
+import java.util.List;
+
+/**
+ * A {@code Coder<T>} defines how to encode and decode values of type {@code T} into byte streams.
+ *
+ * <p> All methods of a {@code Coder<T>} are required to be thread safe.
+ *
+ * <p> {@code Coder}s are serialized during job creation and deserialized
+ * before use, via JSON serialization.
+ *
+ * <p> See {@link SerializableCoder} for an example of a {@code Coder} that adds
+ * a custom field to the {@code Coder} serialization. It provides a
+ * constructor annotated with {@link
+ * com.fasterxml.jackson.annotation.JsonCreator}, which is a factory method
+ * used when deserializing a {@code Coder} instance.
+ *
+ * <p> See {@link KvCoder} for an example of a nested {@code Coder} type.
+ *
+ * @param <T> the type of the values being transcoded
+ */
+public interface Coder<T> extends Serializable {
+  /** The context in which encoding or decoding is being done. */
+  public static class Context {
+    /**
+     * The outer context.  The value being encoded or decoded takes
+     * up the remainder of the whole record/stream contents.
+     */
+    public static final Context OUTER = new Context(true);
+
+    /**
+     * The nested context.  The value being encoded or decoded is
+     * (potentially) a part of a larger record/stream contents, and
+     * may have other parts encoded or decoded after it.
+     */
+    public static final Context NESTED = new Context(false);
+
+    /**
+     * Whether the encoded or decoded value fills the remainder of the
+     * output or input (resp.) record/stream contents.  If so, then
+     * the size of the decoded value can be determined from the
+     * remaining size of the record/stream contents, and so explicit
+     * lengths aren't required.
+     */
+    public final boolean isWholeStream;
+
+    public Context(boolean isWholeStream) {
+      this.isWholeStream = isWholeStream;
+    }
+
+    public Context nested() {
+      return NESTED;
+    }
+  }
+
+  /**
+   * Encodes the given value of type {@code T} onto the given output stream
+   * in the given context.
+   *
+   * @throws IOException if writing to the {@code OutputStream} fails
+   * for some reason
+   * @throws CoderException if the value could not be encoded for some reason
+   */
+  public void encode(T value, OutputStream outStream, Context context)
+      throws CoderException, IOException;
+
+  /**
+   * Decodes a value of type {@code T} from the given input stream in
+   * the given context.  Returns the decoded value.
+   *
+   * @throws IOException if reading from the {@code InputStream} fails
+   * for some reason
+   * @throws CoderException if the value could not be decoded for some reason
+   */
+  public T decode(InputStream inStream, Context context)
+      throws CoderException, IOException;
+
+  /**
+   * If this is a {@code Coder} for a parameterized type, returns the
+   * list of {@code Coder}s being used for each of the parameters, or
+   * returns {@code null} if this cannot be done or this is not a
+   * parameterized type.
+   */
+  public List<? extends Coder<?>> getCoderArguments();
+
+  /**
+   * Returns the {@link CloudObject} that represents this {@code Coder}.
+   */
+  public CloudObject asCloudObject();
+
+  /**
+   * Returns true if the coding is deterministic.
+   *
+   * <p> In order for a {@code Coder} to be considered deterministic,
+   * the following must be true:
+   * <ul>
+   *   <li>two values which compare as equal (via {@code Object.equals()}
+   *       or {@code Comparable.compareTo()}, if supported), have the same
+   *       encoding.
+   *   <li>the {@code Coder} always produces a canonical encoding, which is the
+   *       same for an instance of an object even if produced on different
+   *       computers at different times.
+   * </ul>
+   */
+  public boolean isDeterministic();
+
+  /**
+   * Returns whether {@link #registerByteSizeObserver} cheap enough to
+   * call for every element, that is, if this {@code Coder} can
+   * calculate the byte size of the element to be coded in roughly
+   * constant time (or lazily).
+   *
+   * <p> Not intended to be called by user code, but instead by
+   * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner}
+   * implementations.
+   */
+  public boolean isRegisterByteSizeObserverCheap(T value, Context context);
+
+  /**
+   * Notifies the {@code ElementByteSizeObserver} about the byte size
+   * of the encoded value using this {@code Coder}.
+   *
+   * <p> Not intended to be called by user code, but instead by
+   * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner}
+   * implementations.
+   */
+  public void registerByteSizeObserver(
+      T value, ElementByteSizeObserver observer, Context context)
+      throws Exception;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java
new file mode 100644
index 0000000000000..1bbc3fa176b7e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import java.io.IOException;
+
+/**
+ * A CoderException is thrown if there is a problem encoding or
+ * decoding a value.
+ */
+public class CoderException extends IOException {
+  public CoderException(String message) {
+    super(message);
+  }
+
+  public CoderException(String message, Throwable cause) {
+    super(message, cause);
+  }
+
+  public CoderException(Throwable cause) {
+    super(cause);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
new file mode 100644
index 0000000000000..670b4e3e320af
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -0,0 +1,701 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.common.reflect.TypeToken;
+
+import org.joda.time.Instant;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.Serializable;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+import java.lang.reflect.ParameterizedType;
+import java.lang.reflect.Type;
+import java.lang.reflect.TypeVariable;
+import java.lang.reflect.WildcardType;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * A CoderRegistry allows registering the default Coder to use for a Java class,
+ * and looking up and instantiating the default Coder for a Java type.
+ *
+ * <p> {@code CoderRegistry} uses the following mechanisms to determine a
+ * default {@link Coder} for a Java class, in order of precedence:
+ * <ul>
+ *   <li> Registration: coders can be registered explicitly via
+ *        {@link #registerCoder}.  Built-in types are registered via
+ *        {@link #registerStandardCoders()}.
+ *   <li> Annotations: {@link DefaultCoder} can be used to annotate a type with
+ *        the default {@code Coder} type.
+ *   <li> Inheritance: {@link Serializable} objects are given a default
+ *        {@code Coder} of {@link SerializableCoder}.
+ * </ul>
+ */
+public class CoderRegistry {
+  private static final Logger LOG = LoggerFactory.getLogger(CoderRegistry.class);
+
+  /** A factory for default Coders for values of a particular class. */
+  public abstract static class CoderFactory {
+    /**
+     * Returns the default Coder to use for values of a particular type,
+     * given the Coders for each of the type's generic parameter types.
+     * May return null if no default Coder can be created.
+     */
+    public abstract Coder<?> create(
+        List<? extends Coder<?>> typeArgumentCoders);
+
+    /**
+     * Returns a list of objects contained in {@code value}, one per
+     * type argument, or {@code null} if none can be determined.
+     */
+    public abstract List<Object> getInstanceComponents(Object value);
+  }
+
+  /** A factory that always returns the coder with which it is instantiated. */
+  public class ConstantCoderFactory extends CoderFactory {
+    private Coder<?> coder;
+
+    public ConstantCoderFactory(Coder<?> coder) {
+      this.coder = coder;
+    }
+
+    @Override
+    public Coder<?> create(List<? extends Coder<?>> typeArgumentCoders) {
+      return this.coder;
+    }
+
+    @Override
+    public List<Object> getInstanceComponents(Object value) {
+      return Collections.emptyList();
+    }
+  }
+
+  public CoderRegistry() {}
+
+  /**
+   * Registers standard Coders with this CoderRegistry.
+   */
+  public void registerStandardCoders() {
+    registerCoder(Double.class, DoubleCoder.class);
+    registerCoder(Instant.class, InstantCoder.class);
+    registerCoder(Integer.class, VarIntCoder.class);
+    registerCoder(Iterable.class, IterableCoder.class);
+    registerCoder(KV.class, KvCoder.class);
+    registerCoder(List.class, ListCoder.class);
+    registerCoder(Long.class, VarLongCoder.class);
+    registerCoder(String.class, StringUtf8Coder.class);
+    registerCoder(TableRow.class, TableRowJsonCoder.class);
+    registerCoder(Void.class, VoidCoder.class);
+    registerCoder(byte[].class, ByteArrayCoder.class);
+    registerCoder(URI.class, URICoder.class);
+    registerCoder(TimestampedValue.class, TimestampedValue.TimestampedValueCoder.class);
+  }
+
+  /**
+   * Registers {@code coderClazz} as the default {@code Coder<T>}
+   * class to handle encoding and decoding instances of {@code clazz}
+   * of type {@code T}.
+   *
+   * <p> {@code coderClazz} should have a static factory method with the
+   * following signature:
+   *
+   * <pre> {@code
+   * public static Coder<T> of(Coder<X> argCoder1, Coder<Y> argCoder2, ...)
+   * } </pre>
+   *
+   * <p> This method will be called to create instances of {@code Coder<T>}
+   * for values of type {@code T}, passing Coders for each of the generic type
+   * parameters of {@code T}.  If {@code T} takes no generic type parameters,
+   * then the {@code of()} factory method should have no arguments.
+   *
+   * <p> If {@code T} is a parameterized type, then it should additionally
+   * have a method with the following signature:
+   *
+   * <pre> {@code
+   * public static List<Object> getInstanceComponents(T exampleValue);
+   * } </pre>
+   *
+   * <p> This method will be called to decompose a value during the coder
+   * inference process, to automatically choose coders for the components
+   */
+  public void registerCoder(Class<?> clazz,
+                            Class<?> coderClazz) {
+    int numTypeParameters = clazz.getTypeParameters().length;
+
+    // Find the static factory method of coderClazz named 'of' with
+    // the appropriate number of type parameters.
+
+    Class<?>[] factoryMethodArgTypes = new Class<?>[numTypeParameters];
+    Arrays.fill(factoryMethodArgTypes, Coder.class);
+
+    Method factoryMethod;
+    try {
+      factoryMethod =
+          coderClazz.getDeclaredMethod("of", factoryMethodArgTypes);
+    } catch (NoSuchMethodException | SecurityException exn) {
+      throw new IllegalArgumentException(
+          "cannot register Coder " + coderClazz + ": "
+          + "does not have an accessible method named 'of' with "
+          + numTypeParameters + " arguments of Coder type",
+          exn);
+    }
+    if (!Modifier.isStatic(factoryMethod.getModifiers())) {
+      throw new IllegalArgumentException(
+          "cannot register Coder " + coderClazz + ": "
+          + "method named 'of' with " + numTypeParameters
+          + " arguments of Coder type is not static");
+    }
+    if (!coderClazz.isAssignableFrom(factoryMethod.getReturnType())) {
+      throw new IllegalArgumentException(
+          "cannot register Coder " + coderClazz + ": "
+          + "method named 'of' with " + numTypeParameters
+          + " arguments of Coder type does not return a " + coderClazz);
+    }
+    try {
+      if (!factoryMethod.isAccessible()) {
+        factoryMethod.setAccessible(true);
+      }
+    } catch (SecurityException exn) {
+      throw new IllegalArgumentException(
+          "cannot register Coder " + coderClazz + ": "
+          + "method named 'of' with " + numTypeParameters
+          + " arguments of Coder type is not accessible",
+          exn);
+    }
+
+    // Find the static method to decompose values when inferring a coder,
+    // if there are type parameters for which we also need an example
+    // value
+    Method getComponentsMethod = null;
+    if (clazz.getTypeParameters().length > 0) {
+      try {
+        getComponentsMethod = coderClazz.getDeclaredMethod(
+            "getInstanceComponents",
+            clazz);
+      } catch (NoSuchMethodException | SecurityException exn) {
+        LOG.warn("cannot find getInstanceComponents for class {}. This may limit the ability to"
+            + " infer a Coder for values of this type.", coderClazz, exn);
+      }
+    }
+
+    registerCoder(clazz, defaultCoderFactory(coderClazz, factoryMethod, getComponentsMethod));
+  }
+
+  public void registerCoder(Class<?> rawClazz,
+                            CoderFactory coderFactory) {
+    if (coderFactoryMap.put(rawClazz, coderFactory) != null) {
+      throw new IllegalArgumentException(
+          "cannot register multiple default Coder factories for " + rawClazz);
+    }
+  }
+
+  public void registerCoder(Class<?> rawClazz, Coder<?> coder) {
+    CoderFactory factory = new ConstantCoderFactory(coder);
+    registerCoder(rawClazz, factory);
+  }
+
+  /**
+   * Returns the Coder to use by default for values of the given type,
+   * or null if there is no default Coder.
+   */
+  public <T> Coder<T> getDefaultCoder(TypeToken<T> typeToken) {
+    return getDefaultCoder(typeToken, Collections.<Type, Coder<?>>emptyMap());
+  }
+
+  /**
+   * Returns the Coder to use by default for values of the given type,
+   * where the given context type uses the given context coder,
+   * or null if there is no default Coder.
+   */
+  public <I, O> Coder<O> getDefaultCoder(TypeToken<O> typeToken,
+                                         TypeToken<I> contextTypeToken,
+                                         Coder<I> contextCoder) {
+    return getDefaultCoder(typeToken,
+                           createTypeBindings(contextTypeToken, contextCoder));
+  }
+
+  /**
+   * Returns the Coder to use on elements produced by this function, given
+   * the coder used for its input elements.
+   */
+  public <I, O> Coder<O> getDefaultOutputCoder(
+      SerializableFunction<I, O> fn, Coder<I> inputCoder) {
+    return getDefaultCoder(
+        fn.getClass(), SerializableFunction.class, inputCoder);
+  }
+
+  /**
+   * Returns the Coder to use for the last type parameter specialization
+   * of the subclass given Coders to use for all other type parameters
+   * specializations (if any).
+   */
+  public <T, O> Coder<O> getDefaultCoder(
+      Class<? extends T> subClass,
+      Class<T> baseClass,
+      Coder<?>... knownCoders) {
+    Coder<?>[] allCoders = new Coder<?>[knownCoders.length + 1];
+    // Last entry intentionally left null.
+    System.arraycopy(knownCoders, 0, allCoders, 0, knownCoders.length);
+    allCoders = getDefaultCoders(subClass, baseClass, allCoders);
+    @SuppressWarnings("unchecked") // trusted
+    Coder<O> coder = (Coder<O>) allCoders[knownCoders.length];
+    return coder;
+  }
+
+  /**
+   * Returns the Coder to use for the specified type parameter specialization
+   * of the subclass, given Coders to use for all other type parameters
+   * (if any).
+   */
+  @SuppressWarnings("unchecked")
+  public <T, O> Coder<O> getDefaultCoder(
+      Class<? extends T> subClass,
+      Class<T> baseClass,
+      Map<String, ? extends Coder<?>> knownCoders,
+      String paramName) {
+    // TODO: Don't infer unneeded params.
+    return (Coder<O>) getDefaultCoders(subClass, baseClass, knownCoders)
+        .get(paramName);
+  }
+
+  /**
+   * Returns the Coder to use for the provided example value, if it can
+   * be determined, otherwise returns {@code null}. If more than one
+   * default coder matches, this will raise an exception.
+   */
+  public <T> Coder<T> getDefaultCoder(T exampleValue) {
+    Class<?> clazz = exampleValue.getClass();
+
+    if (clazz.getTypeParameters().length == 0) {
+      // Trust that getDefaultCoder returns a valid
+      // Coder<T> for non-generic clazz.
+      @SuppressWarnings("unchecked")
+      Coder<T> coder = (Coder<T>) getDefaultCoder(clazz);
+      return coder;
+    } else {
+      CoderFactory factory = getDefaultCoderFactory(clazz);
+      if (factory == null) {
+        return null;
+      }
+
+      List<Object> components = factory.getInstanceComponents(exampleValue);
+      if (components == null) {
+        return null;
+      }
+
+      // componentcoders = components.map(this.getDefaultCoder)
+      List<Coder<?>> componentCoders = new ArrayList<>();
+      for (Object component : components) {
+        Coder<?> componentCoder = getDefaultCoder(component);
+        if (componentCoder == null) {
+          return null;
+        } else {
+          componentCoders.add(componentCoder);
+        }
+      }
+
+      // Trust that factory.create maps from valid component coders
+      // to a valid Coder<T>.
+      @SuppressWarnings("unchecked")
+      Coder<T> coder = (Coder<T>) factory.create(componentCoders);
+      return coder;
+    }
+  }
+
+
+  /**
+   * Returns a Map from each of baseClass's type parameters to the Coder to
+   * use by default for it, in the context of subClass's specialization of
+   * baseClass.
+   *
+   * <P> For example, if baseClass is Map.class and subClass extends
+   * {@code Map<String, Integer>} then this will return the registered Coders
+   * to use for String and Integer as a {"K": stringCoder, "V": intCoder} Map.
+   * The knownCoders parameter can be used to provide known coders for any of
+   * the parameters which will be used to infer the others.
+   *
+   * @param subClass the concrete type whose specializations are being inferred
+   * @param baseClass the base type, a parameterized class
+   * @param knownCoders a map corresponding to the set of known coders indexed
+   *        by parameter name
+   */
+  public <T> Map<String, Coder<?>> getDefaultCoders(
+      Class<? extends T> subClass,
+      Class<T> baseClass,
+      Map<String, ? extends Coder<?>> knownCoders) {
+    TypeVariable<Class<T>>[] typeParams = baseClass.getTypeParameters();
+    Coder<?>[] knownCodersArray = new Coder<?>[typeParams.length];
+    for (int i = 0; i < typeParams.length; i++) {
+      knownCodersArray[i] = knownCoders.get(typeParams[i].getName());
+    }
+    Coder<?>[] resultArray = getDefaultCoders(
+      subClass, baseClass, knownCodersArray);
+    Map<String, Coder<?>> result = new HashMap<>();
+    for (int i = 0; i < typeParams.length; i++) {
+      result.put(typeParams[i].getName(), resultArray[i]);
+    }
+    return result;
+  }
+
+  /**
+   * Returns an array listing, for each of baseClass's type parameters, the
+   * Coder to use by default for it, in the context of subClass's specialization
+   * of baseClass.
+   *
+   * <P> For example, if baseClass is Map.class and subClass extends
+   * {@code Map<String, Integer>} then this will return the registered Coders
+   * to use for String and Integer in that order.  The knownCoders parameter
+   * can be used to provide known coders for any of the parameters which will
+   * be used to infer the others.
+   *
+   * <P> If a type cannot be inferred, null is returned.
+   *
+   * @param subClass the concrete type whose specializations are being inferred
+   * @param baseClass the base type, a parameterized class
+   * @param knownCoders an array corresponding to the set of base class
+   *        type parameters.  Each entry is can be either a Coder (in which
+   *        case it will be used for inference) or null (in which case it
+   *        will be inferred).  May be null to indicate the entire set of
+   *        parameters should be inferred.
+   * @throws IllegalArgumentException if baseClass doesn't have type parameters
+   *         or if the length of knownCoders is not equal to the number of type
+   *         parameters
+   */
+  public <T> Coder<?>[] getDefaultCoders(
+      Class<? extends T> subClass,
+      Class<T> baseClass,
+      Coder<?>[] knownCoders) {
+    Type type = TypeToken.of(subClass).getSupertype(baseClass).getType();
+    if (!(type instanceof ParameterizedType)) {
+      throw new IllegalArgumentException(type + " is not a ParameterizedType");
+    }
+    ParameterizedType parameterizedType = (ParameterizedType) type;
+    Type[] typeArgs = parameterizedType.getActualTypeArguments();
+    if (knownCoders == null) {
+      knownCoders = new Coder<?>[typeArgs.length];
+    } else if (typeArgs.length != knownCoders.length) {
+      throw new IllegalArgumentException(
+          "Class " + baseClass + " has " + typeArgs.length + " parameters, "
+          + "but " + knownCoders.length + " coders are requested.");
+    }
+    Map<Type, Coder<?>> context = new HashMap<>();
+    for (int i = 0; i < knownCoders.length; i++) {
+      if (knownCoders[i] != null) {
+        if (!isCompatible(knownCoders[i], typeArgs[i])) {
+          throw new IllegalArgumentException(
+              "Cannot encode elements of type " + typeArgs[i]
+                  + " with " + knownCoders[i]);
+        }
+        context.put(typeArgs[i], knownCoders[i]);
+      }
+    }
+    Coder<?>[] result = new Coder<?>[typeArgs.length];
+    for (int i = 0; i < knownCoders.length; i++) {
+      if (knownCoders[i] != null) {
+        result[i] = knownCoders[i];
+      } else {
+        result[i] = getDefaultCoder(typeArgs[i], context);
+      }
+    }
+    return result;
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Returns whether the given coder can possibly encode elements
+   * of the given type.
+   */
+  static boolean isCompatible(Coder<?> coder, Type type) {
+    Type coderType =
+        ((ParameterizedType)
+            TypeToken.of(coder.getClass()).getSupertype(Coder.class).getType())
+        .getActualTypeArguments()[0];
+    if (type instanceof TypeVariable) {
+      return true; // Can't rule it out.
+    }
+    Class<?> coderClass = TypeToken.of(coderType).getRawType();
+    if (!coderClass.isAssignableFrom(TypeToken.of(type).getRawType())) {
+      return false;
+    }
+    if (coderType instanceof ParameterizedType
+        && !isNullOrEmpty(coder.getCoderArguments())) {
+      @SuppressWarnings("unchecked")
+      Type[] typeArguments =
+          ((ParameterizedType)
+           TypeToken.of(type).getSupertype((Class) coderClass).getType())
+          .getActualTypeArguments();
+      List<? extends Coder<?>> typeArgumentCoders = coder.getCoderArguments();
+      assert typeArguments.length == typeArgumentCoders.size();
+      for (int i = 0; i < typeArguments.length; i++) {
+        if (!isCompatible(
+                typeArgumentCoders.get(i),
+                TypeToken.of(type).resolveType(typeArguments[i]).getType())) {
+          return false;
+        }
+      }
+    }
+    return true; // For all we can tell.
+  }
+
+  private static boolean isNullOrEmpty(Collection<?> c) {
+    return c == null || c.size() == 0;
+  }
+
+  /**
+   * The map of classes to the CoderFactories to use to create their
+   * default Coders.
+   */
+  Map<Class<?>, CoderFactory> coderFactoryMap = new HashMap<>();
+
+  /**
+   * Returns a CoderFactory that invokes the given static factory method
+   * to create the Coder.
+   */
+  static CoderFactory defaultCoderFactory(
+      final Class<?> coderClazz,
+      final Method coderFactoryMethod,
+      final Method getComponentsMethod) {
+
+    return new CoderFactory() {
+      @Override
+      public Coder<?> create(List<? extends Coder<?>> typeArgumentCoders) {
+        try {
+          return (Coder) coderFactoryMethod.invoke(
+              null /* static */, typeArgumentCoders.toArray());
+        } catch (IllegalAccessException |
+                 IllegalArgumentException |
+                 InvocationTargetException |
+                 NullPointerException |
+                 ExceptionInInitializerError exn) {
+          throw new IllegalStateException(
+              "error when invoking Coder factory method " + coderFactoryMethod,
+              exn);
+        }
+      }
+
+      @Override
+      public List<Object> getInstanceComponents(Object value) {
+        if (getComponentsMethod == null) {
+          throw new IllegalStateException(
+              "no suitable static getInstanceComponents method available for "
+              + "Coder " + coderClazz);
+        }
+
+        try {
+          @SuppressWarnings("unchecked")
+          List<Object> result = (List<Object>) (getComponentsMethod.invoke(
+              null /* static */, value));
+          return result;
+        } catch (IllegalAccessException
+            | IllegalArgumentException
+            | InvocationTargetException
+            | NullPointerException
+            | ExceptionInInitializerError exn) {
+          throw new IllegalStateException(
+              "error when invoking Coder getComponents method " + getComponentsMethod,
+              exn);
+        }
+      }
+    };
+  }
+
+  static CoderFactory defaultCoderFactory(Class<?> coderClazz, final Method coderFactoryMethod) {
+    return defaultCoderFactory(coderClazz, coderFactoryMethod, null);
+  }
+
+  /**
+   * Returns the CoderFactory to use to create default Coders for
+   * instances of the given class, or null if there is no default
+   * CoderFactory registered.
+   */
+  CoderFactory getDefaultCoderFactory(Class<?> clazz) {
+    CoderFactory coderFactory = coderFactoryMap.get(clazz);
+    if (coderFactory == null) {
+      LOG.debug("No Coder registered for {}", clazz);
+    }
+    return coderFactory;
+  }
+
+  /**
+   * Returns the Coder to use by default for values of the given type,
+   * in a context where the given types use the given coders,
+   * or null if there is no default Coder.
+   */
+  <T> Coder<T> getDefaultCoder(TypeToken<T> typeToken,
+                               Map<Type, Coder<?>> typeCoderBindings) {
+    Coder<?> defaultCoder = getDefaultCoder(typeToken.getType(),
+                                            typeCoderBindings);
+    LOG.debug("Default Coder for {}: {}", typeToken, defaultCoder);
+    @SuppressWarnings("unchecked")
+    Coder<T> result = (Coder<T>) defaultCoder;
+    return result;
+  }
+
+  /**
+   * Returns the Coder to use by default for values of the given type,
+   * in a context where the given types use the given coders,
+   * or null if there is no default Coder.
+   */
+  Coder<?> getDefaultCoder(Type type, Map<Type, Coder<?>> typeCoderBindings) {
+    Coder<?> coder = typeCoderBindings.get(type);
+    if (coder != null) {
+      return coder;
+    }
+    if (type instanceof Class<?>) {
+      return getDefaultCoder((Class) type);
+    } else if (type instanceof ParameterizedType) {
+      return this.getDefaultCoder((ParameterizedType) type,
+                                     typeCoderBindings);
+    } else if (type instanceof TypeVariable
+        || type instanceof WildcardType) {
+      // No default coder for an unknown generic type.
+      LOG.debug("No Coder for unknown generic type {}", type);
+      return null;
+    } else {
+      throw new RuntimeException(
+          "internal error: unexpected kind of Type: " + type);
+    }
+  }
+
+  /**
+   * Returns the Coder to use by default for values of the given
+   * class, or null if there is no default Coder.
+   */
+  Coder<?> getDefaultCoder(Class<?> clazz) {
+    CoderFactory coderFactory = getDefaultCoderFactory(clazz);
+    if (coderFactory != null) {
+      LOG.debug("Default Coder for {} found by factory", clazz);
+      return coderFactory.create(Collections.<Coder<?>>emptyList());
+    }
+
+    DefaultCoder defaultAnnotation = clazz.getAnnotation(
+        DefaultCoder.class);
+    if (defaultAnnotation != null) {
+      LOG.debug("Default Coder for {} found by DefaultCoder annotation", clazz);
+      return InstanceBuilder.ofType(Coder.class)
+          .fromClass(defaultAnnotation.value())
+          .fromFactoryMethod("of")
+          .withArg(Class.class, clazz)
+          .build();
+    }
+
+    // Interface-based defaults.
+    if (Serializable.class.isAssignableFrom(clazz)) {
+      @SuppressWarnings("unchecked")
+      Class<? extends Serializable> serializableClazz =
+          (Class<? extends Serializable>) clazz;
+      LOG.debug("Default Coder for {}: SerializableCoder", serializableClazz);
+      return SerializableCoder.of(serializableClazz);
+    }
+
+    LOG.debug("No default Coder for {}", clazz);
+    return null;
+  }
+
+  /**
+   * Returns the Coder to use by default for values of the given
+   * parameterized type, in a context where the given types use the
+   * given coders, or null if there is no default Coder.
+   */
+  Coder<?> getDefaultCoder(
+      ParameterizedType type,
+      Map<Type, Coder<?>> typeCoderBindings) {
+    Class<?> rawClazz = (Class) type.getRawType();
+    CoderFactory coderFactory = getDefaultCoderFactory(rawClazz);
+    if (coderFactory == null) {
+      return null;
+    }
+    List<Coder<?>> typeArgumentCoders = new ArrayList<>();
+    for (Type typeArgument : type.getActualTypeArguments()) {
+      Coder<?> typeArgumentCoder = getDefaultCoder(typeArgument,
+                                                   typeCoderBindings);
+      if (typeArgumentCoder == null) {
+        return null;
+      }
+      typeArgumentCoders.add(typeArgumentCoder);
+    }
+    return coderFactory.create(typeArgumentCoders);
+  }
+
+  /**
+   * Returns a Map where each of the type variables embedded in the
+   * given type are mapped to the corresponding Coders in the given
+   * coder.
+   */
+  Map<Type, Coder<?>> createTypeBindings(TypeToken<?> typeToken,
+                                         Coder<?> coder) {
+    Map<Type, Coder<?>> typeCoderBindings = new HashMap<>();
+    fillTypeBindings(typeToken.getType(), coder, typeCoderBindings);
+    return typeCoderBindings;
+  }
+
+  /**
+   * Adds to the given map bindings from each of the type variables
+   * embedded in the given type to the corresponding Coders in the
+   * given coder.
+   */
+  void fillTypeBindings(Type type,
+                        Coder<?> coder,
+                        Map<Type, Coder<?>> typeCoderBindings) {
+    if (type instanceof TypeVariable) {
+      LOG.debug("Binding type {} to Coder {}", type, coder);
+      typeCoderBindings.put(type, coder);
+    } else if (type instanceof ParameterizedType) {
+      fillTypeBindings((ParameterizedType) type,
+                       coder,
+                       typeCoderBindings);
+    }
+  }
+
+  /**
+   * Adds to the given map bindings from each of the type variables
+   * embedded in the given parameterized type to the corresponding
+   * Coders in the given coder.
+   */
+  void fillTypeBindings(ParameterizedType type,
+                        Coder<?> coder,
+                        Map<Type, Coder<?>> typeCoderBindings) {
+    Type[] typeArguments = type.getActualTypeArguments();
+    List<? extends Coder<?>> coderArguments = coder.getCoderArguments();
+    if (coderArguments == null
+        || typeArguments.length != coderArguments.size()) {
+      return;
+    }
+    for (int i = 0; i < typeArguments.length; i++) {
+      fillTypeBindings(typeArguments[i],
+                       coderArguments.get(i),
+                       typeCoderBindings);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
new file mode 100644
index 0000000000000..546695dfefe80
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * A CollectionCoder encodes Collections.
+ *
+ * @param <T> the type of the elements of the Collections being transcoded
+ */
+public class CollectionCoder<T> extends IterableLikeCoder<T, Collection<T>> {
+
+  public static <T> CollectionCoder<T> of(Coder<T> elemCoder) {
+    return new CollectionCoder<>(elemCoder);
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Internal operations below here.
+
+  @JsonCreator
+  public static CollectionCoder<?> of(
+      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+      List<Object> components) {
+    Preconditions.checkArgument(components.size() == 1,
+        "Expecting 1 component, got " + components.size());
+    return of((Coder<?>) components.get(0));
+  }
+
+  /**
+   * Returns the first element in this collection if it is non-empty,
+   * otherwise returns {@code null}.
+   */
+  public static <T> List<Object> getInstanceComponents(
+      Collection<T> exampleValue) {
+    return getInstanceComponentsHelper(exampleValue);
+  }
+
+  CollectionCoder(Coder<T> elemCoder) {
+    super(elemCoder);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
new file mode 100644
index 0000000000000..6b31297a1071c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.util.StringUtils;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.io.Serializable;
+
+/**
+ * An abstract base class for writing Coders that encodes itself via java
+ * serialization.  Subclasses only need to implement the {@link Coder#encode}
+ * and {@link Coder#decode} methods.
+ *
+ * <p>
+ * Not to be confused with {@link SerializableCoder} that encodes serializables.
+ *
+ * @param <T> the type of elements handled by this coder
+ */
+public abstract class CustomCoder<T> extends AtomicCoder<T>
+    implements Serializable {
+
+  @JsonCreator
+  public static CustomCoder<?> of(
+      // N.B. typeId is a required parameter here, since a field named "@type"
+      // is presented to the deserializer as an input.
+      //
+      // If this method did not consume the field, Jackson2 would observe an
+      // unconsumed field and a returned value of a derived type.  So Jackson2
+      // would attempt to update the returned value with the unconsumed field
+      // data, The standard JsonDeserializer does not implement a mechanism for
+      // updating constructed values, so it would throw an exception, causing
+      // deserialization to fail.
+      @JsonProperty(value = "@type", required = false) String typeId,
+      @JsonProperty("type") String type,
+      @JsonProperty("serialized_coder") String serializedCoder) {
+    return (CustomCoder<?>) SerializableUtils.deserializeFromByteArray(
+        StringUtils.jsonStringToByteArray(serializedCoder),
+        type);
+  }
+
+  @Override
+  public CloudObject asCloudObject() {
+    // N.B. We use the CustomCoder class, not the derived class, since during
+    // deserialization we will be using the CustomCoder's static factory method
+    // to construct an instance of the derived class.
+    CloudObject result = CloudObject.forClass(CustomCoder.class);
+    addString(result, "type", getClass().getName());
+    addString(result, "serialized_coder",
+        StringUtils.byteArrayToJsonString(
+            SerializableUtils.serializeToByteArray(this)));
+    return result;
+  }
+
+  @Override
+  public boolean isDeterministic() {
+    return false;
+  }
+
+  // This coder inherits isRegisterByteSizeObserverCheap,
+  // getEncodedElementByteSize and registerByteSizeObserver
+  // from StandardCoder. Override if we can do better.
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java
new file mode 100644
index 0000000000000..6c6f4197c5a86
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import java.lang.annotation.Documented;
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * Specifies a default {@link Coder} class to handle encoding and decoding
+ * instances of the annotated class.
+ *
+ * <p> The specified {@code Coder} must implement a function with the following
+ * signature:
+ * <pre>{@code
+ * public static Coder<T> of(Class<T> clazz) {...}
+ * }</pre>
+ *
+ * <p> For example, to configure the use of Java serialization as the default
+ * for a class, annotate the class to use
+ * {@link com.google.cloud.dataflow.sdk.coders.SerializableCoder} as follows:the
+ *
+ * <pre><code>
+ * {@literal @}DefaultCoder(SerializableCoder.class)
+ * public class MyCustomDataType {
+ *   // ...
+ * }
+ * </code></pre>
+ *
+ * <p> Similarly, to configure the use of
+ * {@link com.google.cloud.dataflow.sdk.coders.AvroCoder} as the default:
+ * <pre><code>
+ * {@literal @}DefaultCoder(AvroCoder.class)
+ * public class MyCustomDataType {
+ *   public MyCustomDataType() {}   // Avro requires an empty constructor.
+ *   // ...
+ * }
+ * </code></pre>
+ *
+ * <p> Coders specified explicitly via
+ * {@link com.google.cloud.dataflow.sdk.values.PCollection#setCoder(Coder)
+ *  PCollection.setCoder}
+ * take precedence, followed by Coders registered at runtime via
+ * {@link CoderRegistry#registerCoder}.
+ */
+@Documented
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.TYPE)
+@SuppressWarnings("rawtypes")
+public @interface DefaultCoder {
+  Class<? extends Coder> value();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
new file mode 100644
index 0000000000000..6b531ad0dc454
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UTFDataFormatException;
+
+/**
+ * A DoubleCoder encodes Doubles in 8 bytes.
+ */
+public class DoubleCoder extends AtomicCoder<Double> {
+  @JsonCreator
+  public static DoubleCoder of() {
+    return INSTANCE;
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private static final DoubleCoder INSTANCE = new DoubleCoder();
+
+  private DoubleCoder() {}
+
+  @Override
+  public void encode(Double value, OutputStream outStream, Context context)
+      throws IOException, CoderException {
+    if (value == null) {
+      throw new CoderException("cannot encode a null Double");
+    }
+    new DataOutputStream(outStream).writeDouble(value);
+  }
+
+  @Override
+  public Double decode(InputStream inStream, Context context)
+      throws IOException, CoderException {
+    try {
+      return new DataInputStream(inStream).readDouble();
+    } catch (EOFException | UTFDataFormatException exn) {
+      // These exceptions correspond to decoding problems, so change
+      // what kind of exception they're branded as.
+      throw new CoderException(exn);
+    }
+  }
+
+  /**
+   * Floating-point operations are not guaranteed to be deterministic, even
+   * if the storage format might be, so floating point representations are not
+   * recommended for use in operations which require deterministic inputs.
+   */
+  @Override
+  public boolean isDeterministic() {
+    return false;
+  }
+
+  /**
+   * Returns true since registerByteSizeObserver() runs in constant time.
+   */
+  @Override
+  public boolean isRegisterByteSizeObserverCheap(Double value, Context context) {
+    return true;
+  }
+
+  @Override
+  protected long getEncodedElementByteSize(Double value, Context context)
+      throws Exception {
+    if (value == null) {
+      throw new CoderException("cannot encode a null Double");
+    }
+    return 8;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
new file mode 100644
index 0000000000000..988a04c03160c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.api.services.datastore.DatastoreV1.Entity;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+/**
+ * An EntityCoder encodes/decodes Datastore Entity objects.
+ */
+public class EntityCoder extends AtomicCoder<Entity> {
+
+  @JsonCreator
+  public static EntityCoder of() {
+    return INSTANCE;
+  }
+
+  /***************************/
+
+  private static final EntityCoder INSTANCE = new EntityCoder();
+
+  private EntityCoder() {}
+
+  @Override
+  public void encode(Entity value, OutputStream outStream, Context context)
+      throws IOException, CoderException {
+    if (value == null) {
+      throw new CoderException("cannot encode a null Entity");
+    }
+
+    // Since Entity implements com.google.protobuf.MessageLite,
+    // we could directly use writeTo to write to a OutputStream object
+    outStream.write(java.nio.ByteBuffer.allocate(4).putInt(value.getSerializedSize()).array());
+    value.writeTo(outStream);
+    outStream.flush();
+  }
+
+  @Override
+  public Entity decode(InputStream inStream, Context context)
+      throws IOException {
+    byte[] entitySize = new byte[4];
+    inStream.read(entitySize, 0, 4);
+    int size = java.nio.ByteBuffer.wrap(entitySize).getInt();
+    byte[] data = new byte[size];
+    inStream.read(data, 0, size);
+    return Entity.parseFrom(data);
+  }
+
+  @Override
+  protected long getEncodedElementByteSize(Entity value, Context context)
+      throws Exception {
+    return value.getSerializedSize();
+  }
+
+  /**
+   * A datastore kind can hold arbitrary Object instances,
+   * which makes the encoding non-deterministic.
+   */
+  @Override
+  public boolean isDeterministic() {
+    return false;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
new file mode 100644
index 0000000000000..3190124391703
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import org.joda.time.Instant;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+/**
+ * A InstantCoder encodes joda Instant.
+ */
+public class InstantCoder extends AtomicCoder<Instant> {
+  @JsonCreator
+  public static InstantCoder of() {
+    return INSTANCE;
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private static final InstantCoder INSTANCE = new InstantCoder();
+
+  private InstantCoder() {}
+
+  @Override
+  public void encode(Instant value, OutputStream outStream, Context context)
+      throws CoderException, IOException {
+    // Shift the millis by Long.MIN_VALUE so that negative values sort before positive
+    // values when encoded.  The overflow is well-defined:
+    // http://docs.oracle.com/javase/specs/jls/se7/html/jls-15.html#jls-15.18.2
+    BigEndianLongCoder.of().encode(value.getMillis() - Long.MIN_VALUE, outStream, context);
+  }
+
+  @Override
+  public Instant decode(InputStream inStream, Context context)
+      throws CoderException, IOException {
+      return new Instant(BigEndianLongCoder.of().decode(inStream, context) + Long.MIN_VALUE);
+  }
+
+  @Override
+  public boolean isDeterministic() {
+    return true;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
new file mode 100644
index 0000000000000..801dd2042cfdd
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.util.List;
+
+/**
+ * An IterableCoder encodes Iterables.
+ *
+ * @param <T> the type of the elements of the Iterables being transcoded
+ */
+public class IterableCoder<T> extends IterableLikeCoder<T, Iterable<T>> {
+
+  public static <T> IterableCoder<T> of(Coder<T> elemCoder) {
+    return new IterableCoder<>(elemCoder);
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Internal operations below here.
+
+  @JsonCreator
+  public static IterableCoder<?> of(
+      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+      List<Coder<?>> components) {
+    Preconditions.checkArgument(components.size() == 1,
+        "Expecting 1 component, got " + components.size());
+    return of(components.get(0));
+  }
+
+  /**
+   * Returns the first element in this iterable if it is non-empty,
+   * otherwise returns {@code null}.
+   */
+  public static <T> List<Object> getInstanceComponents(
+      Iterable<T> exampleValue) {
+    return getInstanceComponentsHelper(exampleValue);
+  }
+
+  IterableCoder(Coder<T> elemCoder) {
+    super(elemCoder);
+  }
+
+  @Override
+  public CloudObject asCloudObject() {
+    CloudObject result = super.asCloudObject();
+    addBoolean(result, PropertyNames.IS_STREAM_LIKE, true);
+    return result;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
new file mode 100644
index 0000000000000..e6ecdbe26bb97
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservableIterable;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Observable;
+import java.util.Observer;
+
+/**
+ * The base class of Coders for Iterable subclasses.
+ *
+ * @param <T> the type of the elements of the Iterables being transcoded
+ * @param <IT> the type of the Iterables being transcoded
+ */
+public abstract class IterableLikeCoder<T, IT extends Iterable<T>>
+    extends StandardCoder<IT> {
+
+  public Coder<T> getElemCoder() { return elemCoder; }
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Internal operations below here.
+
+  final Coder<T> elemCoder;
+
+  /**
+   * Returns the first element in this iterable-like if it is non-empty,
+   * otherwise returns {@code null}.
+   */
+  protected static <T, IT extends Iterable<T>>
+      List<Object> getInstanceComponentsHelper(
+          IT exampleValue) {
+    for (T value : exampleValue) {
+      return Arrays.<Object>asList(value);
+    }
+    return null;
+  }
+
+  protected IterableLikeCoder(Coder<T> elemCoder) {
+    this.elemCoder = elemCoder;
+  }
+
+  @Override
+  public void encode(IT iterable, OutputStream outStream, Context context)
+      throws IOException, CoderException  {
+    if (iterable == null) {
+      throw new CoderException("cannot encode a null Iterable");
+    }
+    Context nestedContext = context.nested();
+    DataOutputStream dataOutStream = new DataOutputStream(outStream);
+    if (iterable instanceof Collection) {
+      // We can know the size of the Iterable.  Use an encoding with a
+      // leading size field, followed by that many elements.
+      Collection<T> collection = (Collection<T>) iterable;
+      dataOutStream.writeInt(collection.size());
+      for (T elem : collection) {
+        elemCoder.encode(elem, dataOutStream, nestedContext);
+      }
+    } else {
+      // We don't know the size without traversing it.  So use a
+      // "hasNext" sentinel before each element.
+      // TODO: Don't use the sentinel if context.isWholeStream.
+      dataOutStream.writeInt(-1);
+      for (T elem : iterable) {
+        dataOutStream.writeBoolean(true);
+        elemCoder.encode(elem, dataOutStream, nestedContext);
+      }
+      dataOutStream.writeBoolean(false);
+    }
+    // Make sure all our output gets pushed to the underlying outStream.
+    dataOutStream.flush();
+  }
+
+  @Override
+  public IT decode(InputStream inStream, Context context)
+      throws IOException, CoderException {
+    Context nestedContext = context.nested();
+    DataInputStream dataInStream = new DataInputStream(inStream);
+    int size = dataInStream.readInt();
+    if (size >= 0) {
+      List<T> elements = new ArrayList<>(size);
+      for (int i = 0; i < size; i++) {
+        elements.add(elemCoder.decode(dataInStream, nestedContext));
+      }
+      return (IT) elements;
+    } else {
+      // We don't know the size a priori.  Check if we're done with
+      // each element.
+      List<T> elements = new ArrayList<>();
+      while (dataInStream.readBoolean()) {
+        elements.add(elemCoder.decode(dataInStream, nestedContext));
+      }
+      return (IT) elements;
+    }
+  }
+
+  @Override
+  public List<? extends Coder<?>> getCoderArguments() {
+    return Arrays.asList(elemCoder);
+  }
+
+  /**
+   * Encoding is not deterministic for the general Iterable case, as it depends
+   * upon the type of iterable. This may allow two objects to compare as equal
+   * while the encoding differs.
+   */
+  @Override
+  public boolean isDeterministic() {
+    return false;
+  }
+
+  /**
+   * Returns whether iterable can use lazy counting, since that
+   * requires minimal extra computation.
+   */
+  @Override
+  public boolean isRegisterByteSizeObserverCheap(IT iterable, Context context) {
+    return iterable instanceof ElementByteSizeObservableIterable;
+  }
+
+  /**
+   * Notifies ElementByteSizeObserver about the byte size of the
+   * encoded value using this coder.
+   */
+  @Override
+  public void registerByteSizeObserver(
+      IT iterable, ElementByteSizeObserver observer, Context context)
+      throws Exception {
+    if (iterable == null) {
+      throw new CoderException("cannot encode a null Iterable");
+    }
+    Context nestedContext = context.nested();
+
+    if (iterable instanceof ElementByteSizeObservableIterable) {
+      observer.setLazy();
+      ElementByteSizeObservableIterable<?, ?> observableIT =
+          (ElementByteSizeObservableIterable) iterable;
+      observableIT.addObserver(
+          new IteratorObserver(observer, iterable instanceof Collection));
+    } else {
+      if (iterable instanceof Collection) {
+        // We can know the size of the Iterable.  Use an encoding with a
+        // leading size field, followed by that many elements.
+        Collection<T> collection = (Collection<T>) iterable;
+        observer.update(4L);
+        for (T elem : collection) {
+          elemCoder.registerByteSizeObserver(elem, observer, nestedContext);
+        }
+      } else {
+        // We don't know the size without traversing it.  So use a
+        // "hasNext" sentinel before each element.
+        // TODO: Don't use the sentinel if context.isWholeStream.
+        observer.update(4L);
+        for (T elem : iterable) {
+          observer.update(1L);
+          elemCoder.registerByteSizeObserver(elem, observer, nestedContext);
+        }
+        observer.update(1L);
+      }
+    }
+  }
+
+  /**
+   * An observer that gets notified when an observable iterator
+   * returns a new value. This observer just notifies an outerObserver
+   * about this event. Additionally, the outerObserver is notified
+   * about additional separators that are transparently added by this
+   * coder.
+   */
+  private class IteratorObserver implements Observer {
+    private final ElementByteSizeObserver outerObserver;
+    private final boolean countable;
+
+    public IteratorObserver(ElementByteSizeObserver outerObserver,
+                            boolean countable) {
+      this.outerObserver = outerObserver;
+      this.countable = countable;
+
+      if (countable) {
+        // Additional 4 bytes are due to size.
+        outerObserver.update(4L);
+      } else {
+        // Additional 5 bytes are due to size = -1 (4 bytes) and
+        // hasNext = false (1 byte).
+        outerObserver.update(5L);
+      }
+    }
+
+    @Override
+    public void update(Observable obs, Object obj) {
+      if (!(obj instanceof Long)) {
+        throw new AssertionError("unexpected parameter object");
+      }
+
+      if (countable) {
+        outerObserver.update(obs, obj);
+      } else {
+        // Additional 1 byte is due to hasNext = true flag.
+        outerObserver.update(obs, 1 + (long) obj);
+      }
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
new file mode 100644
index 0000000000000..000d6ca75807a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * A KvCoder encodes KVs.
+ *
+ * @param <K> the type of the keys of the KVs being transcoded
+ * @param <V> the type of the values of the KVs being transcoded
+ */
+public class KvCoder<K, V> extends KvCoderBase<KV<K, V>> {
+
+  public static <K, V> KvCoder<K, V> of(Coder<K> keyCoder,
+                                        Coder<V> valueCoder) {
+    return new KvCoder<>(keyCoder, valueCoder);
+  }
+
+  @JsonCreator
+  public static KvCoder<?, ?> of(
+      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+      List<Coder<?>> components) {
+    Preconditions.checkArgument(components.size() == 2,
+        "Expecting 2 components, got " + components.size());
+    return of(components.get(0), components.get(1));
+  }
+
+  public static <K, V> List<Object> getInstanceComponents(
+      KV<K, V> exampleValue) {
+    return Arrays.asList(
+        exampleValue.getKey(),
+        exampleValue.getValue());
+  }
+
+  public Coder<K> getKeyCoder() { return keyCoder; }
+  public Coder<V> getValueCoder() { return valueCoder; }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  Coder<K> keyCoder;
+  Coder<V> valueCoder;
+
+  KvCoder(Coder<K> keyCoder, Coder<V> valueCoder) {
+    this.keyCoder = keyCoder;
+    this.valueCoder = valueCoder;
+  }
+
+  @Override
+  public void encode(KV<K, V> kv, OutputStream outStream, Context context)
+      throws IOException, CoderException  {
+    if (kv == null) {
+      throw new CoderException("cannot encode a null KV");
+    }
+    Context nestedContext = context.nested();
+    keyCoder.encode(kv.getKey(), outStream, nestedContext);
+    valueCoder.encode(kv.getValue(), outStream, nestedContext);
+  }
+
+  @Override
+  public KV<K, V> decode(InputStream inStream, Context context)
+      throws IOException, CoderException {
+    Context nestedContext = context.nested();
+    K key = keyCoder.decode(inStream, nestedContext);
+    V value = valueCoder.decode(inStream, nestedContext);
+    return KV.of(key, value);
+  }
+
+  @Override
+  public List<? extends Coder<?>> getCoderArguments() {
+    return Arrays.asList(keyCoder, valueCoder);
+  }
+
+  @Override
+  public boolean isDeterministic() {
+    return getKeyCoder().isDeterministic() && getValueCoder().isDeterministic();
+  }
+
+  @Override
+  public CloudObject asCloudObject() {
+    CloudObject result = super.asCloudObject();
+    addBoolean(result, PropertyNames.IS_PAIR_LIKE, true);
+    return result;
+  }
+
+  /**
+   * Returns whether both keyCoder and valueCoder are considered not expensive.
+   */
+  @Override
+  public boolean isRegisterByteSizeObserverCheap(KV<K, V> kv, Context context) {
+    return keyCoder.isRegisterByteSizeObserverCheap(kv.getKey(),
+                                                    context.nested())
+        && valueCoder.isRegisterByteSizeObserverCheap(kv.getValue(),
+                                                      context.nested());
+  }
+
+  /**
+   * Notifies ElementByteSizeObserver about the byte size of the
+   * encoded value using this coder.
+   */
+  @Override
+  public void registerByteSizeObserver(
+      KV<K, V> kv, ElementByteSizeObserver observer, Context context)
+      throws Exception {
+    if (kv == null) {
+      throw new CoderException("cannot encode a null KV");
+    }
+    keyCoder.registerByteSizeObserver(
+        kv.getKey(), observer, context.nested());
+    valueCoder.registerByteSizeObserver(
+        kv.getValue(), observer, context.nested());
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java
new file mode 100644
index 0000000000000..b959e1c3c576b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.util.List;
+
+/**
+ * A abstract base class for KvCoder. Works around a Jackson2 bug tickled when building
+ * {@link KvCoder} directly (as of this writing, Jackson2 walks off the end of
+ * an array when it tries to deserialize a class with multiple generic type
+ * parameters).  This class should be removed when possible.
+ * @param <T> the type of values being transcoded
+ */
+public abstract class KvCoderBase<T> extends StandardCoder<T> {
+  @JsonCreator
+  public static KvCoderBase<?> of(
+      // N.B. typeId is a required parameter here, since a field named "@type"
+      // is presented to the deserializer as an input.
+      //
+      // If this method did not consume the field, Jackson2 would observe an
+      // unconsumed field and a returned value of a derived type.  So Jackson2
+      // would attempt to update the returned value with the unconsumed field
+      // data.  The standard JsonDeserializer does not implement a mechanism for
+      // updating constructed values, so it would throw an exception, causing
+      // deserialization to fail.
+      @JsonProperty(value = "@type", required = false) String typeId,
+      @JsonProperty(value = PropertyNames.IS_PAIR_LIKE, required = false) boolean isPairLike,
+      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+      List<Coder<?>> components) {
+    return KvCoder.of(components);
+  }
+
+  protected KvCoderBase() {}
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
new file mode 100644
index 0000000000000..ab9d8147aa1f1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.util.List;
+
+/**
+ * A ListCoder encodes Lists.
+ *
+ * @param <T> the type of the elements of the Lists being transcoded
+ */
+public class ListCoder<T> extends IterableLikeCoder<T, List<T>> {
+
+  public static <T> ListCoder<T> of(Coder<T> elemCoder) {
+    return new ListCoder<>(elemCoder);
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Internal operations below here.
+
+  @JsonCreator
+  public static ListCoder<?> of(
+      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+      List<Coder<?>> components) {
+    Preconditions.checkArgument(components.size() == 1,
+        "Expecting 1 component, got " + components.size());
+    return of((Coder<?>) components.get(0));
+  }
+
+  /**
+   * Returns the first element in this list if it is non-empty,
+   * otherwise returns {@code null}.
+   */
+  public static <T> List<Object> getInstanceComponents(List<T> exampleValue) {
+    return getInstanceComponentsHelper(exampleValue);
+  }
+
+  ListCoder(Coder<T> elemCoder) {
+    super(elemCoder);
+  }
+
+  /**
+   * List sizes are always known, so ListIterable may be deterministic while
+   * the general IterableLikeCoder is not.
+   */
+  @Override
+  public boolean isDeterministic() {
+    return getElemCoder().isDeterministic();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
new file mode 100644
index 0000000000000..fa3fc58950150
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+/**
+ * A MapCoder encodes Maps.
+ *
+ * @param <K> the type of the keys of the KVs being transcoded
+ * @param <V> the type of the values of the KVs being transcoded
+ */
+public class MapCoder<K, V> extends MapCoderBase<Map<K, V>> {
+
+  /**
+   * Produces a MapCoder with the given keyCoder and valueCoder.
+   */
+  public static <K, V> MapCoder<K, V> of(
+      Coder<K> keyCoder,
+      Coder<V> valueCoder) {
+    return new MapCoder<>(keyCoder, valueCoder);
+  }
+
+  @JsonCreator
+  public static MapCoder<?, ?> of(
+      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+      List<Coder<?>> components) {
+    Preconditions.checkArgument(components.size() == 2,
+        "Expecting 2 components, got " + components.size());
+    return of((Coder<?>) components.get(0), (Coder<?>) components.get(1));
+  }
+
+  /**
+   * Returns the key and value for an arbitrary element of this map,
+   * if it is non-empty, otherwise returns {@code null}.
+   */
+   public static <K, V> List<Object> getInstanceComponents(
+       Map<K, V> exampleValue) {
+     for (Map.Entry<K, V> entry : exampleValue.entrySet()) {
+       return Arrays.asList(entry.getKey(), entry.getValue());
+     }
+     return null;
+   }
+
+  public Coder<K> getKeyCoder() { return keyCoder; }
+  public Coder<V> getValueCoder() { return valueCoder; }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  Coder<K> keyCoder;
+  Coder<V> valueCoder;
+
+  MapCoder(Coder<K> keyCoder, Coder<V> valueCoder) {
+    this.keyCoder = keyCoder;
+    this.valueCoder = valueCoder;
+  }
+
+  @Override
+  public void encode(
+      Map<K, V> map,
+      OutputStream outStream,
+      Context context)
+      throws IOException, CoderException  {
+    DataOutputStream dataOutStream = new DataOutputStream(outStream);
+    dataOutStream.writeInt(map.size());
+    for (Entry<K, V> entry : map.entrySet()) {
+      keyCoder.encode(entry.getKey(), outStream, context.nested());
+      valueCoder.encode(entry.getValue(), outStream, context.nested());
+    }
+    dataOutStream.flush();
+  }
+
+  @Override
+  public Map<K, V> decode(InputStream inStream, Context context)
+      throws IOException, CoderException {
+    DataInputStream dataInStream = new DataInputStream(inStream);
+    int size = dataInStream.readInt();
+    Map<K, V> retval = new HashMap<>();
+    for (int i = 0; i < size; ++i) {
+      K key = keyCoder.decode(inStream, context.nested());
+      V value = valueCoder.decode(inStream, context.nested());
+      retval.put(key, value);
+    }
+    return retval;
+  }
+
+  @Override
+  public List<? extends Coder<?>> getCoderArguments() {
+    return Arrays.asList(keyCoder, valueCoder);
+  }
+
+  /**
+   * Not all maps have a deterministic encoding.
+   *
+   * <p> For example, HashMap comparison does not depend on element order, so
+   * two HashMap instances may be equal but produce different encodings.
+   */
+  @Override
+  public boolean isDeterministic() {
+    return false;
+  }
+
+  /**
+   * Notifies ElementByteSizeObserver about the byte size of the
+   * encoded value using this coder.
+   */
+  @Override
+  public void registerByteSizeObserver(
+      Map<K, V> map, ElementByteSizeObserver observer, Context context)
+      throws Exception {
+    observer.update(4L);
+    for (Entry<K, V> entry : map.entrySet()) {
+      keyCoder.registerByteSizeObserver(
+          entry.getKey(), observer, context.nested());
+      valueCoder.registerByteSizeObserver(
+          entry.getValue(), observer, context.nested());
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java
new file mode 100644
index 0000000000000..e896e0d36dc14
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.util.List;
+
+/**
+ * A abstract base class for MapCoder. Works around a Jackson2 bug tickled when building
+ * {@link MapCoder} directly (as of this writing, Jackson2 walks off the end of
+ * an array when it tries to deserialize a class with multiple generic type
+ * parameters).  This should be removed in favor of a better workaround.
+ * @param <T> the type of values being transcoded
+ */
+public abstract class MapCoderBase<T> extends StandardCoder<T> {
+  @JsonCreator
+  public static MapCoderBase<?> of(
+      // N.B. typeId is a required parameter here, since a field named "@type"
+      // is presented to the deserializer as an input.
+      //
+      // If this method did not consume the field, Jackson2 would observe an
+      // unconsumed field and a returned value of a derived type.  So Jackson2
+      // would attempt to update the returned value with the unconsumed field
+      // data, The standard JsonDeserializer does not implement a mechanism for
+      // updating constructed values, so it would throw an exception, causing
+      // deserialization to fail.
+      @JsonProperty(value = "@type", required = false) String typeId,
+      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+      List<Coder<?>> components) {
+    return MapCoder.of(components);
+  }
+
+  protected MapCoderBase() {}
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
new file mode 100644
index 0000000000000..c078e6629a2b7
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.OutputStream;
+import java.io.Serializable;
+
+/**
+ * An encoder of {@link java.io.Serializable} objects.
+ *
+ * To use, specify the coder type on a PCollection.
+ * <pre>
+ * {@code
+ *   PCollection<MyRecord> records =
+ *       foo.apply(...).setCoder(SerializableCoder.of(MyRecord.class));
+ * }
+ * </pre>
+ *
+ * <p> SerializableCoder does not guarantee a deterministic encoding, as Java
+ * Serialization may produce different binary encodings for two equivalent
+ * objects.
+ *
+ * @param <T> the type of elements handled by this coder
+ */
+public class SerializableCoder<T extends Serializable>
+    extends AtomicCoder<T> {
+  /**
+   * Returns a {@code SerializableCoder} instance for the provided element type.
+   * @param <T> the element type
+   */
+  public static <T extends Serializable> SerializableCoder<T> of(Class<T> type) {
+    return new SerializableCoder<>(type);
+  }
+
+  @JsonCreator
+  public static SerializableCoder<?> of(@JsonProperty("type") String classType)
+      throws ClassNotFoundException {
+    Class<?> clazz = Class.forName(classType);
+    if (!Serializable.class.isAssignableFrom(clazz)) {
+      throw new ClassNotFoundException(
+          "Class " + classType + " does not implement Serializable");
+    }
+    return of((Class<? extends Serializable>) clazz);
+  }
+
+  private final Class<T> type;
+
+  protected SerializableCoder(Class<T> type) {
+    this.type = type;
+  }
+
+  public Class<T> getRecordType() {
+    return type;
+  }
+
+  @Override
+  public void encode(T value, OutputStream outStream, Context context)
+      throws IOException, CoderException {
+    if (value == null) {
+      throw new CoderException("cannot encode a null record");
+    }
+    try (ObjectOutputStream oos = new ObjectOutputStream(outStream)) {
+      oos.writeObject(value);
+    } catch (IOException exn) {
+      throw new CoderException("unable to serialize record " + value, exn);
+    }
+  }
+
+  @Override
+  public T decode(InputStream inStream, Context context)
+      throws IOException, CoderException {
+    try (ObjectInputStream ois = new ObjectInputStream(inStream)) {
+      return type.cast(ois.readObject());
+    } catch (ClassNotFoundException e) {
+      throw new CoderException("unable to deserialize record", e);
+    }
+  }
+
+  @Override
+  public CloudObject asCloudObject() {
+    CloudObject result = super.asCloudObject();
+    result.put("type", type.getName());
+    return result;
+  }
+
+  @Override
+  public boolean isDeterministic() {
+    return false;
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (getClass() != other.getClass()) {
+      return false;
+    }
+    return type == ((SerializableCoder) other).type;
+  }
+
+  // This coder inherits isRegisterByteSizeObserverCheap,
+  // getEncodedElementByteSize and registerByteSizeObserver
+  // from StandardCoder. Looks like we cannot do much better
+  // in this case.
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
new file mode 100644
index 0000000000000..1a234c7b40ed8
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * A SetCoder encodes Sets.
+ *
+ * @param <T> the type of the elements of the set
+ */
+public class SetCoder<T> extends StandardCoder<Set<T>> {
+
+  /**
+   * Produces a SetCoder with the given elementCoder.
+   */
+  public static <T> SetCoder<T> of(Coder<T> elementCoder) {
+    return new SetCoder<>(elementCoder);
+  }
+
+  @JsonCreator
+  public static SetCoder<?> of(
+      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+      List<Object> components) {
+    Preconditions.checkArgument(components.size() == 1,
+        "Expecting 1 component, got " + components.size());
+    return of((Coder<?>) components.get(0));
+  }
+
+  public Coder<T> getElementCoder() { return elementCoder; }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  Coder<T> elementCoder;
+
+  SetCoder(Coder<T> elementCoder) {
+    this.elementCoder = elementCoder;
+  }
+
+  @Override
+  public void encode(
+      Set<T> set,
+      OutputStream outStream,
+      Context context)
+      throws IOException, CoderException  {
+    DataOutputStream dataOutStream = new DataOutputStream(outStream);
+    dataOutStream.writeInt(set.size());
+    for (T element : set) {
+      elementCoder.encode(element, outStream, context.nested());
+    }
+    dataOutStream.flush();
+  }
+
+  @Override
+  public Set<T> decode(InputStream inStream, Context context)
+      throws IOException, CoderException {
+    DataInputStream dataInStream = new DataInputStream(inStream);
+    int size = dataInStream.readInt();
+    Set<T> retval = new HashSet<T>();
+    for (int i = 0; i < size; ++i) {
+      T element = elementCoder.decode(inStream, context.nested());
+      retval.add(element);
+    }
+    return retval;
+  }
+
+  @Override
+  public List<? extends Coder<?>> getCoderArguments() {
+    return Arrays.<Coder<?>>asList(elementCoder);
+  }
+
+  /**
+   * Not all sets have a deterministic encoding.
+   *
+   * <p> For example, HashSet comparison does not depend on element order, so
+   * two HashSet instances may be equal but produce different encodings.
+   */
+  @Override
+  public boolean isDeterministic() {
+    return false;
+  }
+
+  /**
+   * Notifies ElementByteSizeObserver about the byte size of the encoded value using this coder.
+   */
+  @Override
+  public void registerByteSizeObserver(
+      Set<T> set, ElementByteSizeObserver observer, Context context)
+      throws Exception {
+    observer.update(4L);
+    for (T element : set) {
+      elementCoder.registerByteSizeObserver(element, observer, context.nested());
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
new file mode 100644
index 0000000000000..7a35fdcafbf27
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.addList;
+
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+
+import java.io.ByteArrayOutputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * A StandardCoder is one that defines equality, hashing, and printing
+ * via the class name and recursively using {@link #getComponents}.
+ *
+ * @param <T> the type of the values being transcoded
+ */
+public abstract class StandardCoder<T> implements Coder<T> {
+
+  protected StandardCoder() {}
+
+  /**
+   * Returns the list of {@code Coder}s that are components of this
+   * {@code Coder}.  Returns an empty list if this is an {@link AtomicCoder} (or
+   * other {@code Coder} with no components).
+   */
+  public List<? extends Coder<?>> getComponents() {
+    List<? extends Coder<?>> coderArguments = getCoderArguments();
+    if (coderArguments == null) {
+      return Collections.emptyList();
+    } else {
+      return coderArguments;
+    }
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this.getClass() != o.getClass()) {
+      return false;
+    }
+    StandardCoder<?> that = (StandardCoder<?>) o;
+    return this.getComponents().equals(that.getComponents());
+  }
+
+  @Override
+  public int hashCode() {
+    return getClass().hashCode() * 31 + getComponents().hashCode();
+  }
+
+  @Override
+  public String toString() {
+    String s = getClass().getName();
+    s = s.substring(s.lastIndexOf('.') + 1);
+    List<? extends Coder<?>> componentCoders = getComponents();
+    if (!componentCoders.isEmpty()) {
+      s += "(";
+      boolean first = true;
+      for (Coder<?> componentCoder : componentCoders) {
+        if (first) {
+          first = false;
+        } else {
+          s += ", ";
+        }
+        s += componentCoder.toString();
+      }
+      s += ")";
+    }
+    return s;
+  }
+
+  @Override
+  public CloudObject asCloudObject() {
+    CloudObject result = CloudObject.forClass(getClass());
+
+    List<? extends Coder<?>> components = getComponents();
+    if (!components.isEmpty()) {
+      List<CloudObject> cloudComponents = new ArrayList<>(components.size());
+      for (Coder<?> coder : components) {
+        cloudComponents.add(coder.asCloudObject());
+      }
+      addList(result, PropertyNames.COMPONENT_ENCODINGS, cloudComponents);
+    }
+
+    return result;
+  }
+
+  /**
+   * StandardCoder requires elements to be fully encoded and copied
+   * into a byte stream to determine the byte size of the element,
+   * which is considered expensive.
+   */
+  @Override
+  public boolean isRegisterByteSizeObserverCheap(T value, Context context) {
+    return false;
+  }
+
+  /**
+   * Returns the size in bytes of the encoded value using this
+   * coder. Derived classes override this method if byte size can be
+   * computed with less computation or copying.
+   */
+  protected long getEncodedElementByteSize(T value, Context context)
+      throws Exception {
+    try {
+      ByteArrayOutputStream os = new ByteArrayOutputStream();
+      encode(value, os, context);
+      return os.size();
+    } catch (Exception exn) {
+      throw new IllegalArgumentException(
+          "Unable to encode element " + value + " with coder " + this, exn);
+    }
+  }
+
+  /**
+   * Notifies ElementByteSizeObserver about the byte size of the
+   * encoded value using this coder.  Calls
+   * getEncodedElementByteSize() and notifies ElementByteSizeObserver.
+   */
+  @Override
+  public void registerByteSizeObserver(
+      T value, ElementByteSizeObserver observer, Context context)
+      throws Exception {
+    observer.update(getEncodedElementByteSize(value, context));
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
new file mode 100644
index 0000000000000..17995c31b65be
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.util.VarInt;
+import com.google.common.io.ByteStreams;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UTFDataFormatException;
+import java.nio.charset.Charset;
+
+/**
+ * A StringUtf8Coder encodes Java Strings in UTF-8 encoding.
+ * If in a nested context, prefixes the string with a VarInt length field.
+ */
+public class StringUtf8Coder extends AtomicCoder<String> {
+  @JsonCreator
+  public static StringUtf8Coder of() {
+    return INSTANCE;
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private static final StringUtf8Coder INSTANCE = new StringUtf8Coder();
+
+  private static class Singletons {
+    private static final Charset UTF8 = Charset.forName("UTF-8");
+  }
+
+  // Writes a string with VarInt size prefix, supporting large strings.
+  private static void writeString(String value, DataOutputStream dos)
+      throws IOException {
+    byte[] bytes = value.getBytes(Singletons.UTF8);
+    VarInt.encode(bytes.length, dos);
+    dos.write(bytes);
+  }
+
+  // Reads a string with VarInt size prefix, supporting large strings.
+  private static String readString(DataInputStream dis) throws IOException {
+    int len = VarInt.decodeInt(dis);
+    if (len < 0) {
+      throw new CoderException("Invalid encoded string length: " + len);
+    }
+    byte[] bytes = new byte[len];
+    dis.readFully(bytes);
+    return new String(bytes, Singletons.UTF8);
+  }
+
+  private StringUtf8Coder() {}
+
+  @Override
+  public void encode(String value, OutputStream outStream, Context context)
+      throws IOException {
+    if (value == null) {
+      throw new CoderException("cannot encode a null String");
+    }
+    if (context.isWholeStream) {
+      outStream.write(value.getBytes(Singletons.UTF8));
+    } else {
+      writeString(value, new DataOutputStream(outStream));
+    }
+  }
+
+  @Override
+  public String decode(InputStream inStream, Context context)
+      throws IOException {
+    if (context.isWholeStream) {
+      ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+      ByteStreams.copy(inStream, outStream);
+      // ByteArrayOutputStream.toString provides no Charset overloads.
+      return outStream.toString("UTF-8");
+    } else {
+      try {
+        return readString(new DataInputStream(inStream));
+      } catch (EOFException | UTFDataFormatException exn) {
+        // These exceptions correspond to decoding problems, so change
+        // what kind of exception they're branded as.
+        throw new CoderException(exn);
+      }
+    }
+  }
+
+  @Override
+  public boolean isDeterministic() {
+    return true;
+  }
+
+  protected long getEncodedElementByteSize(String value, Context context)
+      throws Exception {
+    if (value == null) {
+      throw new CoderException("cannot encode a null String");
+    }
+    if (context.isWholeStream) {
+      return value.getBytes(Singletons.UTF8).length;
+    } else {
+      DataOutputStream stream = new DataOutputStream(new ByteArrayOutputStream());
+      writeString(value, stream);
+      return stream.size();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
new file mode 100644
index 0000000000000..e49dfbb9c01c8
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.api.services.bigquery.model.TableRow;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.SerializationFeature;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+/**
+ * A TableRowJsonCoder encodes BigQuery TableRow objects.
+ */
+public class TableRowJsonCoder extends AtomicCoder<TableRow> {
+
+  @JsonCreator
+  public static TableRowJsonCoder of() {
+    return INSTANCE;
+  }
+
+  @Override
+  public void encode(TableRow value, OutputStream outStream, Context context)
+      throws IOException {
+    String strValue = MAPPER.writeValueAsString(value);
+    StringUtf8Coder.of().encode(strValue, outStream, context);
+  }
+
+  @Override
+  public TableRow decode(InputStream inStream, Context context)
+      throws IOException {
+    String strValue = StringUtf8Coder.of().decode(inStream, context);
+    return MAPPER.readValue(strValue, TableRow.class);
+  }
+
+  @Override
+  protected long getEncodedElementByteSize(TableRow value, Context context)
+      throws Exception {
+    String strValue = MAPPER.writeValueAsString(value);
+    return StringUtf8Coder.of().getEncodedElementByteSize(strValue, context);
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  // FAIL_ON_EMPTY_BEANS is disabled in order to handle null values in
+  // TableRow.
+  private static final ObjectMapper MAPPER =
+      new ObjectMapper().disable(SerializationFeature.FAIL_ON_EMPTY_BEANS);
+
+  private static final TableRowJsonCoder INSTANCE = new TableRowJsonCoder();
+
+  private TableRowJsonCoder() {
+  }
+
+  /**
+   * TableCell can hold arbitrary Object instances, which makes the encoding
+   * non-deterministic.
+   */
+  @Override
+  public boolean isDeterministic() {
+    return false;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
new file mode 100644
index 0000000000000..93d080b7f01cd
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+/**
+ * A TextualIntegerCoder encodes Integers as text.
+ */
+public class TextualIntegerCoder extends AtomicCoder<Integer> {
+  @JsonCreator
+  public static TextualIntegerCoder of() {
+    return new TextualIntegerCoder();
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private TextualIntegerCoder() {}
+
+  @Override
+  public void encode(Integer value, OutputStream outStream, Context context)
+      throws IOException, CoderException {
+    if (value == null) {
+      throw new CoderException("cannot encode a null Integer");
+    }
+    String textualValue = value.toString();
+    StringUtf8Coder.of().encode(textualValue, outStream, context);
+  }
+
+  @Override
+  public Integer decode(InputStream inStream, Context context)
+      throws IOException, CoderException {
+    String textualValue = StringUtf8Coder.of().decode(inStream, context);
+    try {
+      return Integer.valueOf(textualValue);
+    } catch (NumberFormatException exn) {
+      throw new CoderException("error when decoding a textual integer", exn);
+    }
+  }
+
+  @Override
+  public boolean isDeterministic() {
+    return true;
+  }
+
+  protected long getEncodedElementByteSize(Integer value, Context context)
+      throws Exception {
+    if (value == null) {
+      throw new CoderException("cannot encode a null Integer");
+    }
+    String textualValue = value.toString();
+    return StringUtf8Coder.of().getEncodedElementByteSize(textualValue, context);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/URICoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/URICoder.java
new file mode 100644
index 0000000000000..ed5ae45c53e77
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/URICoder.java
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+/**
+ * A {@code URICoder} encodes/decodes {@link URI}s by conversion to/from {@link String}, delegating
+ * encoding/decoding of the string to {@link StringUtf8Coder}.
+ */
+public class URICoder extends AtomicCoder<URI> {
+
+  @JsonCreator
+  public static URICoder of() {
+    return INSTANCE;
+  }
+
+  private static final URICoder INSTANCE = new URICoder();
+  private static final StringUtf8Coder STRING_CODER = StringUtf8Coder.of();
+
+  private URICoder() {}
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  @Override
+  public void encode(URI value, OutputStream outStream, Context context)
+      throws IOException {
+    if (value == null) {
+      throw new CoderException("cannot encode a null URI");
+    }
+    STRING_CODER.encode(value.toString(), outStream, context);
+  }
+
+  @Override
+  public URI decode(InputStream inStream, Context context)
+      throws IOException {
+    try {
+      return new URI(STRING_CODER.decode(inStream, context));
+    } catch (URISyntaxException exn) {
+      throw new CoderException(exn);
+    }
+  }
+
+  @Override
+  public boolean isDeterministic() {
+    return STRING_CODER.isDeterministic();
+  }
+
+  @Override
+  protected long getEncodedElementByteSize(URI value, Context context)
+      throws Exception {
+    if (value == null) {
+      throw new CoderException("cannot encode a null URI");
+    }
+    return STRING_CODER.getEncodedElementByteSize(value.toString(), context);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
new file mode 100644
index 0000000000000..eff03fb737324
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.util.VarInt;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UTFDataFormatException;
+
+/**
+ * A VarIntCoder encodes Integers using between 1 and 5 bytes.  Negative
+ * numbers always take 5 bytes, so BigEndianIntegerCoder may be preferable for
+ * ints that are known to often be large or negative.
+ */
+public class VarIntCoder extends AtomicCoder<Integer> {
+  @JsonCreator
+  public static VarIntCoder of() {
+    return INSTANCE;
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private static final VarIntCoder INSTANCE =
+      new VarIntCoder();
+
+  private VarIntCoder() {}
+
+  @Override
+  public void encode(Integer value, OutputStream outStream, Context context)
+      throws IOException, CoderException {
+    if (value == null) {
+      throw new CoderException("cannot encode a null Integer");
+    }
+    VarInt.encode(value.intValue(), outStream);
+  }
+
+  @Override
+  public Integer decode(InputStream inStream, Context context)
+      throws IOException, CoderException {
+    try {
+      return VarInt.decodeInt(inStream);
+    } catch (EOFException | UTFDataFormatException exn) {
+      // These exceptions correspond to decoding problems, so change
+      // what kind of exception they're branded as.
+      throw new CoderException(exn);
+    }
+  }
+
+  @Override
+  public boolean isDeterministic() {
+    return true;
+  }
+
+  /**
+   * Returns true since registerByteSizeObserver() runs in constant time.
+   */
+  @Override
+  public boolean isRegisterByteSizeObserverCheap(Integer value, Context context) {
+    return true;
+  }
+
+  @Override
+  protected long getEncodedElementByteSize(Integer value, Context context)
+      throws Exception {
+    if (value == null) {
+      throw new CoderException("cannot encode a null Integer");
+    }
+    return VarInt.getLength(value.longValue());
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
new file mode 100644
index 0000000000000..74f9b6092288f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.util.VarInt;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UTFDataFormatException;
+
+/**
+ * A VarLongCoder encodes longs using between 1 and 10 bytes.  Negative
+ * numbers always take 10 bytes, so BigEndianLongCoder may be preferable for
+ * longs that are known to often be large or negative.
+ */
+public class VarLongCoder extends AtomicCoder<Long> {
+  @JsonCreator
+  public static VarLongCoder of() {
+    return INSTANCE;
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private static final VarLongCoder INSTANCE =
+      new VarLongCoder();
+
+  private VarLongCoder() {}
+
+  @Override
+  public void encode(Long value, OutputStream outStream, Context context)
+      throws IOException, CoderException {
+    if (value == null) {
+      throw new CoderException("cannot encode a null Long");
+    }
+    VarInt.encode(value.longValue(), outStream);
+  }
+
+  @Override
+  public Long decode(InputStream inStream, Context context)
+      throws IOException, CoderException {
+    try {
+      return VarInt.decodeLong(inStream);
+    } catch (EOFException | UTFDataFormatException exn) {
+      // These exceptions correspond to decoding problems, so change
+      // what kind of exception they're branded as.
+      throw new CoderException(exn);
+    }
+  }
+
+  @Override
+  public boolean isDeterministic() {
+    return true;
+  }
+
+  /**
+   * Returns true since registerByteSizeObserver() runs in constant time.
+   */
+  @Override
+  public boolean isRegisterByteSizeObserverCheap(Long value, Context context) {
+    return true;
+  }
+
+  @Override
+  protected long getEncodedElementByteSize(Long value, Context context)
+      throws Exception {
+    if (value == null) {
+      throw new CoderException("cannot encode a null Long");
+    }
+    return VarInt.getLength(value.longValue());
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
new file mode 100644
index 0000000000000..fc9a1e0958b24
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import java.io.InputStream;
+import java.io.OutputStream;
+
+/**
+ * A VoidCoder encodes Voids.  Uses zero bytes per Void.
+ */
+public class VoidCoder extends AtomicCoder<Void> {
+  @JsonCreator
+  public static VoidCoder of() {
+    return INSTANCE;
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private static final VoidCoder INSTANCE = new VoidCoder();
+
+  private VoidCoder() {}
+
+  @Override
+  public void encode(Void value, OutputStream outStream, Context context) {
+    // Nothing to write!
+  }
+
+  @Override
+  public Void decode(InputStream inStream, Context context) {
+    // Nothing to read!
+    return null;
+  }
+
+  @Override
+  public boolean isDeterministic() {
+    return true;
+  }
+
+  /**
+   * Returns true since registerByteSizeObserver() runs in constant time.
+   */
+  @Override
+  public boolean isRegisterByteSizeObserverCheap(Void value, Context context) {
+    return true;
+  }
+
+  @Override
+  protected long getEncodedElementByteSize(Void value, Context context)
+      throws Exception {
+    return 0;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java
new file mode 100644
index 0000000000000..ea305e776bc9a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/** 
+ * Defines {@link com.google.cloud.dataflow.sdk.coders.Coder}s 
+ * to specify how data is encoded to and decoded from byte strings.
+ * 
+ * <p> During execution of a Pipeline, elements in a
+ * {@link com.google.cloud.dataflow.sdk.values.PCollection}
+ * may need to be encoded into byte strings.
+ * This happens both at the beginning and end of a pipeline when data is read from and written to
+ * persistent storage and also during execution of a pipeline when elements are communicated between
+ * machines.
+ * 
+ * <p> Exactly when PCollection elements are encoded during execution depends on which 
+ * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner} is being used and how that runner
+ * chooses to execute the pipeline. As such, Dataflow requires that all PCollections have an
+ * appropriate Coder in case it becomes necessary. In many cases, the Coder can be inferred from 
+ * the available Java type
+ * information and the Pipeline's {@link com.google.cloud.dataflow.sdk.coders.CoderRegistry}. It
+ * can be specified per PCollection via 
+ * {@link com.google.cloud.dataflow.sdk.values.PCollection#setCoder(Coder)} or per type using the
+ * {@link com.google.cloud.dataflow.sdk.coders.DefaultCoder} annotation.
+ * 
+ * <p> This package provides a number of coders for common types like {@code Integer}, 
+ * {@code String}, and {@code List}, as well as coders like 
+ * {@link com.google.cloud.dataflow.sdk.coders.AvroCoder} that can be used to encode many custom 
+ * types. 
+ * 
+ */
+package com.google.cloud.dataflow.sdk.coders;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
new file mode 100644
index 0000000000000..7a9e6ea3d394c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -0,0 +1,678 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import static com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
+import static com.google.cloud.dataflow.sdk.util.CloudSourceUtils.readElemsFromSource;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.worker.AvroSink;
+import com.google.cloud.dataflow.sdk.runners.worker.AvroSource;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PDone;
+import com.google.cloud.dataflow.sdk.values.PInput;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.reflect.ReflectData;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
+
+import javax.annotation.Nullable;
+
+/**
+ * Transforms for reading and writing Avro files.
+ *
+ * <p> To read a {@link PCollection} from one or more Avro files, use
+ * {@link AvroIO.Read}, specifying {@link AvroIO.Read#from} to specify
+ * the path of the file(s) to read from (e.g., a local filename or
+ * filename pattern if running locally, or a Google Cloud Storage
+ * filename or filename pattern of the form
+ * {@code "gs://<bucket>/<filepath>"}), and optionally
+ * {@link AvroIO.Read#named} to specify the name of the pipeline step.
+ *
+ * <p> It is required to specify {@link AvroIO.Read#withSchema}. To
+ * read specific records, such as Avro-generated classes, provide an
+ * Avro-generated class type. To read GenericRecords, provide either
+ * an org.apache.avro.Schema or a schema in a JSON-encoded string form.
+ * An exception will be thrown if a record doesn't match the specified
+ * schema.
+ *
+ * <p>For example:
+ * <pre> {@code
+ * Pipeline p = ...;
+ *
+ * // A simple Read of a local file (only runs locally):
+ * PCollection<AvroAutoGenClass> records =
+ *     p.apply(AvroIO.Read.from("/path/to/file.avro")
+ *                        .withSchema(AvroAutoGenClass.class));
+ *
+ * // A Read from a GCS file (runs locally and via the Google Cloud
+ * // Dataflow service):
+ * Schema schema = new Schema.Parser().parse(new File(
+ *     "gs://my_bucket/path/to/schema.avsc"));
+ * PCollection<GenericRecord> records =
+ *     p.apply(AvroIO.Read.named("ReadFromAvro")
+ *                        .from("gs://my_bucket/path/to/records-*.avro")
+ *                        .withSchema(schema));
+ * } </pre>
+ *
+ * <p> To write a {@link PCollection} to one or more Avro files, use
+ * {@link AvroIO.Write}, specifying {@link AvroIO.Write#to} to specify
+ * the path of the file to write to (e.g., a local filename or sharded
+ * filename pattern if running locally, or a Google Cloud Storage
+ * filename or sharded filename pattern of the form
+ * {@code "gs://<bucket>/<filepath>"}), and optionally
+ * {@link AvroIO.Write#named} to specify the name of the pipeline step.
+ *
+ * <p> It is required to specify {@link AvroIO.Write#withSchema}. To
+ * write specific records, such as Avro-generated classes, provide an
+ * Avro-generated class type. To write GenericRecords, provide either
+ * an org.apache.avro.Schema or a schema in a JSON-encoded string form.
+ * An exception will be thrown if a record doesn't match the specified
+ * schema.
+ *
+ * <p>For example:
+ * <pre> {@code
+ * // A simple Write to a local file (only runs locally):
+ * PCollection<AvroAutoGenClass> records = ...;
+ * records.apply(AvroIO.Write.to("/path/to/file.avro")
+ *                           .withSchema(AvroAutoGenClass.class));
+ *
+ * // A Write to a sharded GCS file (runs locally and via the Google Cloud
+ * // Dataflow service):
+ * Schema schema = new Schema.Parser().parse(new File(
+ *     "gs://my_bucket/path/to/schema.avsc"));
+ * PCollection<GenericRecord> records = ...;
+ * records.apply(AvroIO.Write.named("WriteToAvro")
+ *                           .to("gs://my_bucket/path/to/numbers")
+ *                           .withSchema(schema)
+ *                           .withSuffix(".avro"));
+ * } </pre>
+ */
+public class AvroIO {
+
+  /**
+   * A root PTransform that reads from an Avro file (or multiple Avro
+   * files matching a pattern) and returns a PCollection containing
+   * the decoding of each record.
+   */
+  public static class Read {
+
+    /**
+     * Returns an AvroIO.Read PTransform with the given step name.
+     */
+    public static Bound<GenericRecord> named(String name) {
+      return new Bound<>(GenericRecord.class).named(name);
+    }
+
+    /**
+     * Returns an AvroIO.Read PTransform that reads from the file(s)
+     * with the given name or pattern.  This can be a local filename
+     * or filename pattern (if running locally), or a Google Cloud
+     * Storage filename or filename pattern of the form
+     * {@code "gs://<bucket>/<filepath>"}) (if running locally or via
+     * the Google Cloud Dataflow service).  Standard
+     * <a href="http://docs.oracle.com/javase/tutorial/essential/io/find.html"
+     * >Java Filesystem glob patterns</a> ("*", "?", "[..]") are supported.
+     */
+    public static Bound<GenericRecord> from(String filepattern) {
+      return new Bound<>(GenericRecord.class).from(filepattern);
+    }
+
+    /**
+     * Returns an AvroIO.Read PTransform that reads Avro file(s)
+     * containing records whose type is the specified Avro-generated class.
+     *
+     * @param <T> the type of the decoded elements, and the elements
+     * of the resulting PCollection
+     */
+    public static <T> Bound<T> withSchema(Class<T> type) {
+      return new Bound<>(type).withSchema(type);
+    }
+
+    /**
+     * Returns an AvroIO.Read PTransform that reads Avro file(s)
+     * containing records of the specified schema.
+     */
+    public static Bound<GenericRecord> withSchema(Schema schema) {
+      return new Bound<>(GenericRecord.class).withSchema(schema);
+    }
+
+    /**
+     * Returns an AvroIO.Read PTransform that reads Avro file(s)
+     * containing records of the specified schema in a JSON-encoded
+     * string form.
+     */
+    public static Bound<GenericRecord> withSchema(String schema) {
+      return withSchema((new Schema.Parser()).parse(schema));
+    }
+
+    /**
+     * A PTransform that reads from an Avro file (or multiple Avro
+     * files matching a pattern) and returns a bounded PCollection containing
+     * the decoding of each record.
+     *
+     * @param <T> the type of each of the elements of the resulting
+     * PCollection
+     */
+    public static class Bound<T>
+        extends PTransform<PInput, PCollection<T>> {
+      private static final long serialVersionUID = 0;
+
+      /** The filepattern to read from. */
+      @Nullable final String filepattern;
+      /** The class type of the records. */
+      final Class<T> type;
+      /** The schema of the input file. */
+      @Nullable final Schema schema;
+
+      Bound(Class<T> type) {
+        this(null, null, type, null);
+      }
+
+      Bound(String name, String filepattern, Class<T> type, Schema schema) {
+        super(name);
+        this.filepattern = filepattern;
+        this.type = type;
+        this.schema = schema;
+      }
+
+      /**
+       * Returns a new AvroIO.Read PTransform that's like this one but
+       * with the given step name.  Does not modify this object.
+       */
+      public Bound<T> named(String name) {
+        return new Bound<>(name, filepattern, type, schema);
+      }
+
+      /**
+       * Returns a new AvroIO.Read PTransform that's like this one but
+       * that reads from the file(s) with the given name or pattern.
+       * (See {@link AvroIO.Read#from} for a description of
+       * filepatterns.)  Does not modify this object.
+       */
+      public Bound<T> from(String filepattern) {
+        return new Bound<>(name, filepattern, type, schema);
+      }
+
+      /**
+       * Returns a new AvroIO.Read PTransform that's like this one but
+       * that reads Avro file(s) containing records whose type is the
+       * specified Avro-generated class.  Does not modify this object.
+       *
+       * @param <T1> the type of the decoded elements, and the elements of
+       * the resulting PCollection
+       */
+      public <T1> Bound<T1> withSchema(Class<T1> type) {
+        return new Bound<>(name, filepattern, type, ReflectData.get().getSchema(type));
+      }
+
+      /**
+       * Returns a new AvroIO.Read PTransform that's like this one but
+       * that reads Avro file(s) containing records of the specified schema.
+       * Does not modify this object.
+       */
+      public Bound<GenericRecord> withSchema(Schema schema) {
+        return new Bound<>(name, filepattern, GenericRecord.class, schema);
+      }
+
+      /**
+       * Returns a new AvroIO.Read PTransform that's like this one but
+       * that reads Avro file(s) containing records of the specified schema
+       * in a JSON-encoded string form.  Does not modify this object.
+       */
+      public Bound<GenericRecord> withSchema(String schema) {
+        return withSchema((new Schema.Parser()).parse(schema));
+      }
+
+      @Override
+      public PCollection<T> apply(PInput input) {
+        if (filepattern == null) {
+          throw new IllegalStateException(
+              "need to set the filepattern of an AvroIO.Read transform");
+        }
+        if (schema == null) {
+          throw new IllegalStateException(
+              "need to set the schema of an AvroIO.Read transform");
+        }
+
+        // Force the output's Coder to be what the read is using, and
+        // unchangeable later, to ensure that we read the input in the
+        // format specified by the Read transform.
+        return PCollection.<T>createPrimitiveOutputInternal(new GlobalWindow())
+            .setCoder(getDefaultOutputCoder());
+      }
+
+      @Override
+      protected Coder<T> getDefaultOutputCoder() {
+        return AvroCoder.of(type, schema);
+      }
+
+      @Override
+      protected String getKindString() { return "AvroIO.Read"; }
+
+      public String getFilepattern() {
+        return filepattern;
+      }
+
+      public Schema getSchema() {
+        return schema;
+      }
+
+      static {
+        DirectPipelineRunner.registerDefaultTransformEvaluator(
+            Bound.class,
+            new DirectPipelineRunner.TransformEvaluator<Bound>() {
+              @Override
+              public void evaluate(
+                  Bound transform,
+                  DirectPipelineRunner.EvaluationContext context) {
+                evaluateReadHelper(transform, context);
+              }
+            });
+      }
+    }
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * A root PTransform that writes a PCollection to an Avro file (or
+   * multiple Avro files matching a sharding pattern).
+   */
+  public static class Write {
+
+    /**
+     * Returns an AvroIO.Write PTransform with the given step name.
+     */
+    public static Bound<GenericRecord> named(String name) {
+      return new Bound<>(GenericRecord.class).named(name);
+    }
+
+    /**
+     * Returns an AvroIO.Write PTransform that writes to the file(s)
+     * with the given prefix.  This can be a local filename
+     * (if running locally), or a Google Cloud Storage filename of
+     * the form {@code "gs://<bucket>/<filepath>"})
+     * (if running locally or via the Google Cloud Dataflow service).
+     *
+     * <p> The files written will begin with this prefix, followed by
+     * a shard identifier (see {@link Bound#withNumShards}, and end
+     * in a common extension, if given by {@link Bound#withSuffix}.
+     */
+    public static Bound<GenericRecord> to(String prefix) {
+      return new Bound<>(GenericRecord.class).to(prefix);
+    }
+
+    /**
+     * Returns an AvroIO.Write PTransform that writes to the file(s) with the
+     * given filename suffix.
+     */
+    public static Bound<GenericRecord> withSuffix(String filenameSuffix) {
+      return new Bound<>(GenericRecord.class).withSuffix(filenameSuffix);
+    }
+
+    /**
+     * Returns an AvroIO.Write PTransform that uses the provided shard count.
+     *
+     * <p> Constraining the number of shards is likely to reduce
+     * the performance of a pipeline. Setting this value is not recommended
+     * unless you require a specific number of output files.
+     *
+     * @param numShards the number of shards to use, or 0 to let the system
+     *                  decide.
+     */
+    public static Bound<GenericRecord> withNumShards(int numShards) {
+      return new Bound<>(GenericRecord.class).withNumShards(numShards);
+    }
+
+    /**
+     * Returns an AvroIO.Write PTransform that uses the given shard name
+     * template.
+     *
+     * See {@link ShardNameTemplate} for a description of shard templates.
+     */
+    public static Bound<GenericRecord> withShardNameTemplate(String shardTemplate) {
+      return new Bound<>(GenericRecord.class).withShardNameTemplate(shardTemplate);
+    }
+
+    /**
+     * Returns an AvroIO.Write PTransform that forces a single file as
+     * output.
+     *
+     * <p> Constraining the number of shards is likely to reduce
+     * the performance of a pipeline.  Setting this value is not recommended
+     * unless you require a specific number of output files.
+     */
+    public static Bound<GenericRecord> withoutSharding() {
+      return new Bound<>(GenericRecord.class).withoutSharding();
+    }
+
+    /**
+     * Returns an AvroIO.Write PTransform that writes Avro file(s)
+     * containing records whose type is the specified Avro-generated class.
+     *
+     * @param <T> the type of the elements of the input PCollection
+     */
+    public static <T> Bound<T> withSchema(Class<T> type) {
+      return new Bound<>(type).withSchema(type);
+    }
+
+    /**
+     * Returns an AvroIO.Write PTransform that writes Avro file(s)
+     * containing records of the specified schema.
+     */
+    public static Bound<GenericRecord> withSchema(Schema schema) {
+      return new Bound<>(GenericRecord.class).withSchema(schema);
+    }
+
+    /**
+     * Returns an AvroIO.Write PTransform that writes Avro file(s)
+     * containing records of the specified schema in a JSON-encoded
+     * string form.
+     */
+    public static Bound<GenericRecord> withSchema(String schema) {
+      return withSchema((new Schema.Parser()).parse(schema));
+    }
+
+    /**
+     * A PTransform that writes a bounded PCollection to an Avro file (or
+     * multiple Avro files matching a sharding pattern).
+     *
+     * @param <T> the type of each of the elements of the input PCollection
+     */
+    public static class Bound<T>
+        extends PTransform<PCollection<T>, PDone> {
+      private static final long serialVersionUID = 0;
+
+      /** The filename to write to. */
+      @Nullable final String filenamePrefix;
+      /** Suffix to use for each filename. */
+      final String filenameSuffix;
+      /** Requested number of shards.  0 for automatic. */
+      final int numShards;
+      /** Shard template string. */
+      final String shardTemplate;
+      /** The class type of the records. */
+      final Class<T> type;
+      /** The schema of the output file. */
+      @Nullable final Schema schema;
+
+      Bound(Class<T> type) {
+        this(null, null, "", 0, ShardNameTemplate.INDEX_OF_MAX, type, null);
+      }
+
+      Bound(String name, String filenamePrefix, String filenameSuffix,
+            int numShards, String shardTemplate,
+            Class<T> type, Schema schema) {
+        super(name);
+        this.filenamePrefix = filenamePrefix;
+        this.filenameSuffix = filenameSuffix;
+        this.numShards = numShards;
+        this.shardTemplate = shardTemplate;
+        this.type = type;
+        this.schema = schema;
+      }
+
+      /**
+       * Returns a new AvroIO.Write PTransform that's like this one but
+       * with the given step name.  Does not modify this object.
+       */
+      public Bound<T> named(String name) {
+        return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
+                           type, schema);
+      }
+
+      /**
+       * Returns a new AvroIO.Write PTransform that's like this one but
+       * that writes to the file(s) with the given filename prefix.
+       *
+       * <p> See {@link Write#to(String) Write.to(String)} for more information.
+       *
+       * <p> Does not modify this object.
+       */
+      public Bound<T> to(String filenamePrefix) {
+        validateOutputComponent(filenamePrefix);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
+                           type, schema);
+      }
+
+      /**
+       * Returns a new AvroIO.Write PTransform that's like this one but
+       * that writes to the file(s) with the given filename suffix.
+       *
+       * <p> Does not modify this object.
+       *
+       * @see ShardNameTemplate
+       */
+      public Bound<T> withSuffix(String filenameSuffix) {
+        validateOutputComponent(filenameSuffix);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
+                           type, schema);
+      }
+
+      /**
+       * Returns a new AvroIO.Write PTransform that's like this one but
+       * that uses the provided shard count.
+       *
+       * <p> Constraining the number of shards is likely to reduce
+       * the performance of a pipeline.  Setting this value is not recommended
+       * unless you require a specific number of output files.
+       *
+       * <p> Does not modify this object.
+       *
+       * @param numShards the number of shards to use, or 0 to let the system
+       *                  decide.
+       * @see ShardNameTemplate
+       */
+      public Bound<T> withNumShards(int numShards) {
+        Preconditions.checkArgument(numShards >= 0);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
+                           type, schema);
+      }
+
+      /**
+       * Returns a new AvroIO.Write PTransform that's like this one but
+       * that uses the given shard name template.
+       *
+       * <p> Does not modify this object.
+       *
+       * @see ShardNameTemplate
+       */
+      public Bound<T> withShardNameTemplate(String shardTemplate) {
+        return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
+                           type, schema);
+      }
+
+      /**
+       * Returns a new AvroIO.Write PTransform that's like this one but
+       * that forces a single file as output.
+       *
+       * <p> This is a shortcut for
+       * {@code .withNumShards(1).withShardNameTemplate("")}
+       *
+       * <p> Does not modify this object.
+       */
+      public Bound<T> withoutSharding() {
+        return new Bound<>(name, filenamePrefix, filenameSuffix, 1, "", type, schema);
+      }
+
+      /**
+       * Returns a new AvroIO.Write PTransform that's like this one but
+       * that writes to Avro file(s) containing records whose type is the
+       * specified Avro-generated class.  Does not modify this object.
+       *
+       * @param <T1> the type of the elements of the input PCollection
+       */
+      public <T1> Bound<T1> withSchema(Class<T1> type) {
+        return new Bound<>(name, filenamePrefix, filenameSuffix,
+                           numShards, shardTemplate,
+                           type, ReflectData.get().getSchema(type));
+      }
+
+      /**
+       * Returns a new AvroIO.Write PTransform that's like this one but
+       * that writes to Avro file(s) containing records of the specified
+       * schema.  Does not modify this object.
+       */
+      public Bound<GenericRecord> withSchema(Schema schema) {
+        return new Bound<>(name, filenamePrefix, filenameSuffix,
+                           numShards, shardTemplate,
+                           GenericRecord.class, schema);
+      }
+
+      /**
+       * Returns a new AvroIO.Write PTransform that's like this one but
+       * that writes to Avro file(s) containing records of the specified
+       * schema in a JSON-encoded string form.  Does not modify this object.
+       */
+      public Bound<GenericRecord> withSchema(String schema) {
+        return withSchema((new Schema.Parser()).parse(schema));
+      }
+
+      @Override
+      public PDone apply(PCollection<T> input) {
+        if (filenamePrefix == null) {
+          throw new IllegalStateException(
+              "need to set the filename prefix of an AvroIO.Write transform");
+        }
+        if (schema == null) {
+          throw new IllegalStateException(
+              "need to set the schema of an AvroIO.Write transform");
+        }
+
+        return new PDone();
+      }
+
+      /**
+       * Returns the current shard name template string.
+       */
+      public String getShardNameTemplate() {
+        return shardTemplate;
+      }
+
+      @Override
+      protected Coder<Void> getDefaultOutputCoder() {
+        return VoidCoder.of();
+      }
+
+      @Override
+      protected String getKindString() { return "AvroIO.Write"; }
+
+      public String getFilenamePrefix() {
+        return filenamePrefix;
+      }
+
+      public String getShardTemplate() {
+        return shardTemplate;
+      }
+
+      public int getNumShards() {
+        return numShards;
+      }
+
+      public String getFilenameSuffix() {
+        return filenameSuffix;
+      }
+
+      public Class<T> getType() {
+        return type;
+      }
+
+      public Schema getSchema() {
+        return schema;
+      }
+
+      static {
+        DirectPipelineRunner.registerDefaultTransformEvaluator(
+            Bound.class,
+            new DirectPipelineRunner.TransformEvaluator<Bound>() {
+              @Override
+              public void evaluate(
+                  Bound transform,
+                  DirectPipelineRunner.EvaluationContext context) {
+                evaluateWriteHelper(transform, context);
+              }
+            });
+      }
+    }
+  }
+
+  // Pattern which matches old-style shard output patterns, which are now
+  // disallowed.
+  private static final Pattern SHARD_OUTPUT_PATTERN =
+      Pattern.compile("@([0-9]+|\\*)");
+
+  private static void validateOutputComponent(String partialFilePattern) {
+    Preconditions.checkArgument(
+        !SHARD_OUTPUT_PATTERN.matcher(partialFilePattern).find(),
+        "Output name components are not allowed to contain @* or @N patterns: "
+            + partialFilePattern);
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private static <T> void evaluateReadHelper(
+      Read.Bound<T> transform,
+      DirectPipelineRunner.EvaluationContext context) {
+    AvroSource<T> source = new AvroSource<>(
+        transform.filepattern, null, null, WindowedValue.getValueOnlyCoder(
+            transform.getDefaultOutputCoder()));
+    List<WindowedValue<T>> elems = readElemsFromSource(source);
+    List<ValueWithMetadata<T>> output = new ArrayList<>();
+    for (WindowedValue<T> elem : elems) {
+      output.add(ValueWithMetadata.of(elem));
+    }
+    context.setPCollectionValuesWithMetadata(transform.getOutput(), output);
+  }
+
+  private static <T> void evaluateWriteHelper(
+      Write.Bound<T> transform,
+      DirectPipelineRunner.EvaluationContext context) {
+    List<WindowedValue<T>> elems = context.getPCollectionWindowedValues(transform.getInput());
+    int numShards = transform.numShards;
+    if (numShards < 1) {
+      // System gets to choose.  For direct mode, choose 1.
+      numShards = 1;
+    }
+    AvroSink<T> writer = new AvroSink<>(transform.filenamePrefix, transform.shardTemplate,
+                                        transform.filenameSuffix, numShards,
+                                        WindowedValue.getValueOnlyCoder(
+                                            AvroCoder.of(transform.type, transform.schema)));
+    try (Sink.SinkWriter<WindowedValue<T>> sink = writer.writer()) {
+      for (WindowedValue<T> elem : elems) {
+        sink.add(elem);
+      }
+    } catch (IOException exn) {
+      throw new RuntimeException(
+          "unable to write to output file \"" + transform.filenamePrefix + "\"",
+          exn);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
new file mode 100644
index 0000000000000..2fffe4de2c45b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -0,0 +1,937 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import com.google.api.client.json.JsonFactory;
+import com.google.api.services.bigquery.Bigquery;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.TableRowJsonCoder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
+import com.google.cloud.dataflow.sdk.options.GcpOptions;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.worker.BigQuerySource;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.util.BigQueryTableInserter;
+import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
+import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PDone;
+import com.google.cloud.dataflow.sdk.values.PInput;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.UUID;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Transformations for reading and writing
+ * <a href="https://developers.google.com/bigquery/">BigQuery</a> tables.
+ * <p><h3>Table References</h3>
+ * A fully-qualified BigQuery table name consists of three components:
+ * <ul>
+ *   <li>{@code projectId}: the Cloud project id (defaults to
+ *       {@link GcpOptions#getProject()}).
+ *   <li>{@code datasetId}: the BigQuery dataset id, unique within a project.
+ *   <li>{@code tableId}: a table id, unique within a dataset.
+ * </ul>
+ * <p>
+ * BigQuery table references are stored as a {@link TableReference}, which comes
+ * from the <a href="BigQuery Java Client API">BigQuery Java Client API</a>.
+ * Tables can be referred to as Strings, with or without the {@code projectId}.
+ * A helper function is provided ({@link BigQueryIO#parseTableSpec(String)}),
+ * which parses the following string forms into a {@link TableReference}:
+ * <ul>
+ *   <li>[{@code project_id}]:[{@code dataset_id}].[{@code table_id}]
+ *   <li>[{@code dataset_id}].[{@code table_id}]
+ * </ul>
+ * <p><h3>Reading</h3>
+ * To read from a BigQuery table, apply a {@link BigQueryIO.Read} transformation.
+ * This produces a {@code PCollection<TableRow>} as output:
+ * <pre>{@code
+ * PCollection<TableRow> shakespeare = pipeline.apply(
+ *     BigQueryIO.Read
+ *         .named("Read")
+ *         .from("clouddataflow-readonly:samples.weather_stations");
+ * }</pre>
+ * <p><h3>Writing</h3>
+ * To write to a BigQuery table, apply a {@link BigQueryIO.Write} transformation.
+ * This consumes a {@code PCollection<TableRow>} as input.
+ * <p>
+ * <pre>{@code
+ * PCollection<TableRow> quotes = ...
+ *
+ * List<TableFieldSchema> fields = new ArrayList<>();
+ * fields.add(new TableFieldSchema().setName("source").setType("STRING"));
+ * fields.add(new TableFieldSchema().setName("quote").setType("STRING"));
+ * TableSchema schema = new TableSchema().setFields(fields);
+ *
+ * quotes.apply(BigQueryIO.Write
+ *     .named("Write")
+ *     .to("my-project:output.output_table")
+ *     .withSchema(schema)
+ *     .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
+ * }</pre>
+ * <p>
+ * See {@link BigQueryIO.Write} for details on how to specify if a write should
+ * append to an existing table, replace the table, or verify that the table is
+ * empty.
+ *
+ * @see <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html">TableRow</a>
+ */
+public class BigQueryIO {
+  private static final Logger LOG = LoggerFactory.getLogger(BigQueryIO.class);
+
+  /**
+   * Singleton instance of the JSON factory used to read and write JSON
+   * formatted rows.
+   */
+  private static final JsonFactory JSON_FACTORY = Transport.getJsonFactory();
+
+  /**
+   * Project IDs must contain 6-63 lowercase letters, digits, or dashes.
+   * IDs must start with a letter and may not end with a dash.
+   * This regex isn't exact - this allows for patterns that would be rejected by
+   * the service, but this is sufficient for basic parsing of table references.
+   */
+  private static final String PROJECT_ID_REGEXP =
+      "[a-z][-a-z0-9:.]{4,61}[a-z0-9]";
+
+  /**
+   * Regular expression which matches Dataset IDs.
+   */
+  private static final String DATASET_REGEXP = "[-\\w.]{1,1024}";
+
+  /**
+   * Regular expression which matches Table IDs.
+   */
+  private static final String TABLE_REGEXP = "[-\\w$@]{1,1024}";
+
+  /**
+   * Matches table specifications in the form
+   * "[project_id]:[dataset_id].[table_id]" or "[dataset_id].[table_id]".
+   */
+  private static final String DATASET_TABLE_REGEXP = String.format(
+      "((?<PROJECT>%s):)?(?<DATASET>%s)\\.(?<TABLE>%s)",
+      PROJECT_ID_REGEXP, DATASET_REGEXP, TABLE_REGEXP);
+
+  private static final Pattern TABLE_SPEC =
+      Pattern.compile(DATASET_TABLE_REGEXP);
+
+  /**
+   * Parse a table specification in the form
+   * "[project_id]:[dataset_id].[table_id]" or "[dataset_id].[table_id]".
+   * <p>
+   * If the project id is omitted, the default project id is used.
+   */
+  public static TableReference parseTableSpec(String tableSpec) {
+    Matcher match = TABLE_SPEC.matcher(tableSpec);
+    if (!match.matches()) {
+      throw new IllegalArgumentException(
+          "Table reference is not in [project_id]:[dataset_id].[table_id] "
+              + "format: " + tableSpec);
+    }
+
+    TableReference ref = new TableReference();
+    ref.setProjectId(match.group("PROJECT"));
+
+    return ref
+        .setDatasetId(match.group("DATASET"))
+        .setTableId(match.group("TABLE"));
+  }
+
+  /**
+   * Returns a canonical string representation of the TableReference.
+   */
+  public static String toTableSpec(TableReference ref) {
+    StringBuilder sb = new StringBuilder();
+    if (ref.getProjectId() != null) {
+      sb.append(ref.getProjectId());
+      sb.append(":");
+    }
+
+    sb.append(ref.getDatasetId())
+        .append('.')
+        .append(ref.getTableId());
+    return sb.toString();
+  }
+
+  /**
+   * A PTransform that reads from a BigQuery table and returns a
+   * {@code PCollection<TableRow>} containing each of the rows of the table.
+   * <p>
+   * Each TableRow record contains values indexed by column name.  Here is a
+   * sample processing function which processes a "line" column from rows:
+   * <pre><code>
+   * static class ExtractWordsFn extends DoFn{@literal <TableRow, String>} {
+   *   {@literal @}Override
+   *   public void processElement(ProcessContext c) {
+   *     // Get the "line" field of the TableRow object, split it into words, and emit them.
+   *     TableRow row = c.element();
+   *     String[] words = row.get("line").toString().split("[^a-zA-Z']+");
+   *     for (String word : words) {
+   *       if (!word.isEmpty()) {
+   *         c.output(word);
+   *       }
+   *     }
+   *   }
+   * }
+   * </code></pre>
+   */
+  public static class Read {
+    public static Bound named(String name) {
+      return new Bound().named(name);
+    }
+
+    /**
+     * Reads a BigQuery table specified as
+     * "[project_id]:[dataset_id].[table_id]" or "[dataset_id].[table_id]" for
+     * tables within the current project.
+     */
+    public static Bound from(String tableSpec) {
+      return new Bound().from(tableSpec);
+    }
+
+    /**
+     * Reads a BigQuery table specified as a TableReference object.
+     */
+    public static Bound from(TableReference table) {
+      return new Bound().from(table);
+    }
+
+    /**
+     * Disables BigQuery table validation which is enabled by default.
+     */
+    public static Bound withoutValidation() {
+      return new Bound().withoutValidation();
+    }
+
+    /**
+     * A PTransform that reads from a BigQuery table and returns a bounded
+     * {@code PCollection<TableRow>}.
+     */
+    public static class Bound
+        extends PTransform<PInput, PCollection<TableRow>> {
+      TableReference table;
+      final boolean validate;
+
+      Bound() {
+        this.validate = true;
+      }
+
+      Bound(String name, TableReference reference, boolean validate) {
+        super(name);
+        this.table = reference;
+        this.validate = validate;
+      }
+
+      /**
+       * Sets the name associated with this transformation.
+       */
+      public Bound named(String name) {
+        return new Bound(name, table, validate);
+      }
+
+      /**
+       * Sets the table specification.
+       * <p>
+       * Refer to {@link #parseTableSpec(String)} for the specification format.
+       */
+      public Bound from(String tableSpec) {
+        return from(parseTableSpec(tableSpec));
+      }
+
+      /**
+       * Sets the table specification.
+       */
+      public Bound from(TableReference table) {
+        return new Bound(name, table, validate);
+      }
+
+      /**
+       * Disable table validation.
+       */
+      public Bound withoutValidation() {
+        return new Bound(name, table, false);
+      }
+
+      @Override
+      public PCollection<TableRow> apply(PInput input) {
+        if (table == null) {
+          throw new IllegalStateException(
+              "must set the table reference of a BigQueryIO.Read transform");
+        }
+        return PCollection.<TableRow>createPrimitiveOutputInternal(
+            new GlobalWindow())
+            // Force the output's Coder to be what the read is using, and
+            // unchangeable later, to ensure that we read the input in the
+            // format specified by the Read transform.
+            .setCoder(TableRowJsonCoder.of());
+      }
+
+      @Override
+      protected Coder<TableRow> getDefaultOutputCoder() {
+        return TableRowJsonCoder.of();
+      }
+
+      @Override
+      protected String getKindString() { return "BigQueryIO.Read"; }
+
+      static {
+        DirectPipelineRunner.registerDefaultTransformEvaluator(
+            Bound.class,
+            new DirectPipelineRunner.TransformEvaluator<Bound>() {
+              @Override
+              public void evaluate(
+                  Bound transform,
+                  DirectPipelineRunner.EvaluationContext context) {
+                evaluateReadHelper(transform, context);
+              }
+            });
+      }
+
+      /**
+       * Returns the table to write.
+       */
+      public TableReference getTable() {
+        return table;
+      }
+
+      /**
+       * Returns true if table validation is enabled.
+       */
+      public boolean getValidate() {
+        return validate;
+      }
+    }
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * A PTransform that writes a {@code PCollection<TableRow>} containing rows
+   * to a BigQuery table.
+   * <p>
+   * By default, tables will be created if they do not exist, which
+   * corresponds to a {@code CreateDisposition.CREATE_IF_NEEDED} disposition
+   * which matches the default of BigQuery's Jobs API.  A schema must be
+   * provided (via {@link Write#withSchema}), or else the transform may fail
+   * at runtime with an {@link java.lang.IllegalArgumentException}.
+   * <p>
+   * By default, writes require an empty table, which corresponds to
+   * a {@code WriteDisposition.WRITE_EMPTY} disposition which matches the
+   * default of BigQuery's Jobs API.
+   * <p>
+   * Here is a sample transform which produces TableRow values containing
+   * "word" and "count" columns:
+   * <pre><code>
+   * static class FormatCountsFn extends DoFnP{@literal <KV<String, Long>, TableRow>} {
+   *   {@literal @}Override
+   *   public void processElement(ProcessContext c) {
+   *     TableRow row = new TableRow()
+   *         .set("word", c.element().getKey())
+   *         .set("count", c.element().getValue().intValue());
+   *     c.output(row);
+   *   }
+   * }
+   * </code></pre>
+   */
+  public static class Write {
+
+    /**
+     * An enumeration type for the BigQuery create disposition strings publicly
+     * documented as {@code CREATE_NEVER}, and {@code CREATE_IF_NEEDED}.
+     */
+    public enum CreateDisposition {
+      /**
+       * Specifics that tables should not be created.
+       * <p>
+       * If the output table does not exist, the write fails.
+       */
+      CREATE_NEVER,
+
+      /**
+       * Specifies that tables should be created if needed. This is the default
+       * behavior.
+       * <p>
+       * Requires that a table schema is provided via {@link Write#withSchema}.
+       * This precondition is checked before starting a job. The schema is
+       * not required to match an existing table's schema.
+       * <p>
+       * When this transformation is executed, if the output table does not
+       * exist, the table is created from the provided schema. Note that even if
+       * the table exists, it may be recreated if necessary when paired with a
+       * {@link WriteDisposition#WRITE_TRUNCATE}.
+       */
+      CREATE_IF_NEEDED
+    }
+
+    /**
+     * An enumeration type for the BigQuery write disposition strings publicly
+     * documented as {@code WRITE_TRUNCATE}, {@code WRITE_APPEND}, and
+     * {@code WRITE_EMPTY}.
+     */
+    public enum WriteDisposition {
+      /**
+       * Specifies that write should replace a table.
+       * <p>
+       * The replacement may occur in multiple steps - for instance by first
+       * removing the existing table, then creating a replacement, then filling
+       * it in.  This is not an atomic operation, and external programs may
+       * see the table in any of these intermediate steps.
+       */
+      WRITE_TRUNCATE,
+
+      /**
+       * Specifies that rows may be appended to an existing table.
+       */
+      WRITE_APPEND,
+
+      /**
+       * Specifies that the output table must be empty. This is the default
+       * behavior.
+       * <p>
+       * If the output table is not empty, the write fails at runtime.
+       * <p>
+       * This check may occur long before data is written, and does not
+       * guarantee exclusive access to the table.  If two programs are run
+       * concurrently, each specifying the same output table and
+       * a {@link WriteDisposition} of {@code WRITE_EMPTY}, it is possible
+       * for both to succeed.
+       */
+      WRITE_EMPTY
+    }
+
+    /**
+     * Sets the name associated with this transformation.
+     */
+    public static Bound named(String name) {
+      return new Bound().named(name);
+    }
+
+    /**
+     * Creates a write transformation for the given table specification.
+     * <p>
+     * Refer to {@link #parseTableSpec(String)} for the specification format.
+     */
+    public static Bound to(String tableSpec) {
+      return new Bound().to(tableSpec);
+    }
+
+    /** Creates a write transformation for the given table. */
+    public static Bound to(TableReference table) {
+      return new Bound().to(table);
+    }
+
+    /**
+     * Specifies a table schema to use in table creation.
+     * <p>
+     * The schema is required only if writing to a table which does not already
+     * exist, and {@link BigQueryIO.Write.CreateDisposition} is set to
+     * {@code CREATE_IF_NEEDED}.
+     */
+    public static Bound withSchema(TableSchema schema) {
+      return new Bound().withSchema(schema);
+    }
+
+    /** Specifies options for creating the table. */
+    public static Bound withCreateDisposition(CreateDisposition disposition) {
+      return new Bound().withCreateDisposition(disposition);
+    }
+
+    /** Specifies options for writing to the table. */
+    public static Bound withWriteDisposition(WriteDisposition disposition) {
+      return new Bound().withWriteDisposition(disposition);
+    }
+
+    /**
+     * Disables BigQuery table validation which is enabled by default.
+     */
+    public static Bound withoutValidation() {
+      return new Bound().withoutValidation();
+    }
+
+    /**
+     * A PTransform that can write either a bounded or unbounded
+     * {@code PCollection<TableRow>}s to a BigQuery table.
+     */
+    public static class Bound
+        extends PTransform<PCollection<TableRow>, PDone> {
+      final TableReference table;
+
+      // Table schema. The schema is required only if the table does not exist.
+      final TableSchema schema;
+
+      // Options for creating the table. Valid values are CREATE_IF_NEEDED and
+      // CREATE_NEVER.
+      final CreateDisposition createDisposition;
+
+      // Options for writing to the table. Valid values are WRITE_TRUNCATE,
+      // WRITE_APPEND and WRITE_EMPTY.
+      final WriteDisposition writeDisposition;
+
+      // An option to indicate if table validation is desired. Default is true.
+      final boolean validate;
+
+      public Bound() {
+        this.table = null;
+        this.schema = null;
+        this.createDisposition = CreateDisposition.CREATE_IF_NEEDED;
+        this.writeDisposition = WriteDisposition.WRITE_EMPTY;
+        this.validate = true;
+      }
+
+      Bound(String name, TableReference ref, TableSchema schema,
+          CreateDisposition createDisposition,
+          WriteDisposition writeDisposition,
+          boolean validate) {
+        super(name);
+        this.table = ref;
+        this.schema = schema;
+        this.createDisposition = createDisposition;
+        this.writeDisposition = writeDisposition;
+        this.validate = validate;
+      }
+
+      /**
+       * Sets the name associated with this transformation.
+       */
+      public Bound named(String name) {
+        return new Bound(name, table, schema, createDisposition,
+            writeDisposition, validate);
+      }
+
+      /**
+       * Specifies the table specification.
+       * <p>
+       * Refer to {@link #parseTableSpec(String)} for the specification format.
+       */
+      public Bound to(String tableSpec) {
+        return to(parseTableSpec(tableSpec));
+      }
+
+      /**
+       * Specifies the table to be written to.
+       */
+      public Bound to(TableReference table) {
+        return new Bound(name, table, schema, createDisposition,
+            writeDisposition, validate);
+      }
+
+      /**
+       * Specifies the table schema, used if the table is created.
+       */
+      public Bound withSchema(TableSchema schema) {
+        return new Bound(name, table, schema, createDisposition,
+            writeDisposition, validate);
+      }
+
+      /** Specifies options for creating the table. */
+      public Bound withCreateDisposition(CreateDisposition createDisposition) {
+        return new Bound(name, table, schema, createDisposition,
+            writeDisposition, validate);
+      }
+
+      /** Specifies options for writing the table. */
+      public Bound withWriteDisposition(WriteDisposition writeDisposition) {
+        return new Bound(name, table, schema, createDisposition,
+            writeDisposition, validate);
+      }
+
+      /**
+       * Disable table validation.
+       */
+      public Bound withoutValidation() {
+        return new Bound(name, table, schema, createDisposition, writeDisposition, false);
+      }
+
+      @Override
+      public PDone apply(PCollection<TableRow> input) {
+        if (table == null) {
+          throw new IllegalStateException(
+              "must set the table reference of a BigQueryIO.Write transform");
+        }
+
+        if (createDisposition == CreateDisposition.CREATE_IF_NEEDED &&
+            schema == null) {
+          throw new IllegalArgumentException(
+              "CreateDisposition is CREATE_IF_NEEDED, "
+                  + "however no schema was provided.");
+        }
+
+        // In streaming, BigQuery write is taken care of by StreamWithDeDup transform.
+        BigQueryOptions options = getPipeline().getOptions().as(BigQueryOptions.class);
+        if (options.isStreaming()) {
+          return input.apply(new StreamWithDeDup(table, schema));
+        }
+
+        return new PDone();
+      }
+
+      @Override
+      protected Coder<Void> getDefaultOutputCoder() {
+        return VoidCoder.of();
+      }
+
+      @Override
+      protected String getKindString() { return "BigQueryIO.Write"; }
+
+      static {
+        DirectPipelineRunner.registerDefaultTransformEvaluator(
+            Bound.class,
+            new DirectPipelineRunner.TransformEvaluator<Bound>() {
+              @Override
+              public void evaluate(
+                  Bound transform,
+                  DirectPipelineRunner.EvaluationContext context) {
+                evaluateWriteHelper(transform, context);
+              }
+            });
+      }
+
+      /** Returns the create disposition. */
+      public CreateDisposition getCreateDisposition() {
+        return createDisposition;
+      }
+
+      /** Returns the write disposition. */
+      public WriteDisposition getWriteDisposition() {
+        return writeDisposition;
+      }
+
+      /** Returns the table schema. */
+      public TableSchema getSchema() {
+        return schema;
+      }
+
+      /** Returns the table reference. */
+      public TableReference getTable() {
+        return table;
+      }
+
+      /** Returns true if table validation is enabled. */
+      public boolean getValidate() {
+        return validate;
+      }
+    }
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Implementation of DoFn to perform streaming BigQuery write.
+   */
+  private static class StreamingWriteFn extends DoFn<KV<Integer, KV<String, TableRow>>, Void>
+      implements DoFn.RequiresKeyedState {
+
+    /**
+     * Class to accumulate BigQuery row data as a list of String.
+     * DoFn implementation must be Serializable, but BigQuery classes,
+     * such as TableRow are not.  Therefore, convert into JSON String
+     * for accumulation.
+     */
+    private static class JsonTableRows implements Iterable<TableRow>, Serializable {
+
+      /** The list where BigQuery row data is accumulated. */
+      private final List<String> jsonRows = new ArrayList<>();
+
+      /** Iterator of JsonTableRows converts the row in String to TableRow. */
+      static class JsonTableRowIterator implements Iterator<TableRow> {
+
+        private final Iterator<String> iteratorInternal;
+
+        /** Constructor. */
+        JsonTableRowIterator(List<String> jsonRowList) {
+          iteratorInternal = jsonRowList.iterator();
+        }
+
+        @Override
+        public boolean hasNext() {
+          return iteratorInternal.hasNext();
+        }
+
+        @Override
+        public TableRow next() {
+          try {
+            // Converts the String back into TableRow.
+            return JSON_FACTORY.fromString(iteratorInternal.next(), TableRow.class);
+          } catch (IOException e) {
+            throw new RuntimeException(e);
+          }
+        }
+
+        @Override
+        public void remove() {
+          iteratorInternal.remove();
+        }
+      }
+
+      /** Returns the iterator. */
+      @Override
+      public Iterator<TableRow> iterator() {
+        return new JsonTableRowIterator(jsonRows);
+      }
+
+      /** Adds a BigQuery TableRow. */
+      void add(TableRow row) {
+        try {
+          // Converts into JSON format.
+          jsonRows.add(JSON_FACTORY.toString(row));
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+      }
+    }
+
+    /** TableReference in JSON.  Use String to make the class Serializable. */
+    private final String jsonTableReference;
+
+    /** TableSchema in JSON.  Use String to make the class Serializable. */
+    private final String jsonTableSchema;
+
+    /** JsonTableRows to accumulate BigQuery rows. */
+    private JsonTableRows jsonTableRows;
+
+    /** The list of unique ids for each BigQuery table row. */
+    private List<String> uniqueIdsForTableRows;
+
+    /** The list of tables created so far, so we don't try the creation
+        each time. */
+    private static ThreadLocal<HashSet<String>> createdTables =
+        new ThreadLocal<HashSet<String>>() {
+      @Override protected HashSet<String> initialValue() {
+        return new HashSet<>();
+      }
+    };
+
+    /** Constructor. */
+    StreamingWriteFn(TableReference table, TableSchema schema) {
+      try {
+        jsonTableReference = JSON_FACTORY.toString(table);
+        jsonTableSchema = JSON_FACTORY.toString(schema);
+      } catch (IOException e) {
+        throw new RuntimeException("Cannot initialize BigQuery streaming writer.", e);
+      }
+    }
+
+    /** Prepares a target BigQuery table. */
+    @Override
+    public void startBundle(Context context) {
+      jsonTableRows = new JsonTableRows();
+      uniqueIdsForTableRows = new ArrayList<>();
+      BigQueryOptions options = context.getPipelineOptions().as(BigQueryOptions.class);
+      Bigquery client = Transport.newBigQueryClient(options).build();
+
+      // TODO: Support table sharding and the better place to initialize
+      //     BigQuery table.
+      HashSet<String> tables = createdTables.get();
+      if (!tables.contains(jsonTableSchema)) {
+        try {
+          TableSchema tableSchema = JSON_FACTORY.fromString(
+              jsonTableSchema, TableSchema.class);
+          TableReference tableReference = JSON_FACTORY.fromString(
+              jsonTableReference, TableReference.class);
+
+
+          BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
+          inserter.tryCreateTable(tableSchema);
+          tables.add(jsonTableSchema);
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+      }
+    }
+
+    /** Accumulates the input into JsonTableRows and uniqueIdsForTableRows. */
+    @Override
+    public void processElement(ProcessContext context) {
+      KV<Integer, KV<String, TableRow>> kv = context.element();
+      TableRow tableRow = kv.getValue().getValue();
+      uniqueIdsForTableRows.add(kv.getValue().getKey());
+      jsonTableRows.add(tableRow);
+    }
+
+    /** Writes the accumulated rows into BigQuery with streaming API. */
+    @Override
+    public void finishBundle(Context context) {
+      BigQueryOptions options = context.getPipelineOptions().as(BigQueryOptions.class);
+      Bigquery client = Transport.newBigQueryClient(options).build();
+
+      try {
+        TableReference tableReference = JSON_FACTORY.fromString(
+            jsonTableReference, TableReference.class);
+
+        BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
+        inserter.insertAll(jsonTableRows.iterator(), uniqueIdsForTableRows.iterator());
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Fn that tags each table row with a unique id.
+   * To avoid calling UUID.randomUUID() for each element, which can be costly,
+   * a randomUUID is generated only once per bucket of data. The actual unique
+   * id is created by concatenating this randomUUID with a sequential number.
+   */
+  private static class TagWithUniqueIds extends DoFn<TableRow,
+                                        KV<Integer, KV<String, TableRow>>> {
+    private transient String randomUUID;
+    private transient AtomicLong sequenceNo;
+
+    @Override
+    public void startBundle(Context context) {
+      randomUUID = UUID.randomUUID().toString();
+      sequenceNo = new AtomicLong();
+    }
+
+    /** Tag the input with a unique id. */
+    @Override
+    public void processElement(ProcessContext context) {
+      String uniqueId = randomUUID + Long.toString(sequenceNo.getAndIncrement());
+      ThreadLocalRandom randomGenerator = ThreadLocalRandom.current();
+      // We output on keys 0-50 to ensure that there's enough batching for
+      // BigQuery.
+      context.output(KV.of(randomGenerator.nextInt(0, 50),
+              KV.of(uniqueId, context.element())));
+    }
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+  * PTransform that performs streaming BigQuery write. To increase consistency,
+  * it leverages BigQuery best effort de-dup mechanism.
+   */
+  private static class StreamWithDeDup
+    extends PTransform<PCollection<TableRow>, PDone> {
+
+    private final TableReference tableReference;
+    private final TableSchema tableSchema;
+
+    /** Constructor. */
+    StreamWithDeDup(TableReference tableReference, TableSchema tableSchema) {
+      this.tableReference = tableReference;
+      this.tableSchema = tableSchema;
+    }
+
+    @Override protected Coder<Void> getDefaultOutputCoder() { return VoidCoder.of(); }
+
+    @Override
+    public PDone apply(PCollection<TableRow> in) {
+      // A naive implementation would be to simply stream data directly to BigQuery.
+      // However, this could occassionally lead to duplicated data, e.g., when
+      // a VM that runs this code is restarted and the code is re-run.
+
+      // The above risk is mitigated in this implementation by relying on
+      // BigQuery built-in best effort de-dup mechanism.
+
+      // To use this mechanism, each input TableRow is tagged with a generated
+      // unique id, which is then passed to BigQuery and used to ignore duplicates.
+
+      PCollection<KV<Integer, KV<String, TableRow>>> tagged =
+          in.apply(ParDo.of(new TagWithUniqueIds()));
+
+      // To prevent having the same TableRow processed more than once with regenerated
+      // different unique ids, this implementation relies on "checkpointing" which is
+      // achieved as a side effect of having StreamingWriteFn implement RequiresKeyedState.
+      tagged.apply(ParDo.of(new StreamingWriteFn(tableReference, tableSchema)));
+
+      // Note that the implementation to return PDone here breaks the
+      // implicit assumption about the job execution order.  If a user
+      // implements a PTransform that takes PDone returned here as its
+      // input, the transform may not necessarily be executed after
+      // the BigQueryIO.Write.
+
+      return new PDone();
+    }
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Direct mode read evaluator.
+   * <p>
+   * This loads the entire table into an in-memory PCollection.
+   */
+  private static void evaluateReadHelper(
+      Read.Bound transform,
+      DirectPipelineRunner.EvaluationContext context) {
+    BigQueryOptions options = context.getPipelineOptions();
+    Bigquery client = Transport.newBigQueryClient(options).build();
+    TableReference ref = transform.table;
+    if (ref.getProjectId() == null) {
+      ref.setProjectId(options.getProject());
+    }
+
+    LOG.info("Reading from BigQuery table {}", toTableSpec(ref));
+    List<TableRow> elems = CloudSourceUtils.readElemsFromSource(new BigQuerySource(client, ref));
+    LOG.info("Number of records read from BigQuery: {}", elems.size());
+    context.setPCollection(transform.getOutput(), elems);
+  }
+
+  /**
+   * Direct mode write evaluator.
+   * <p>
+   * This writes the entire table in a single BigQuery request.
+   * The table will be created if necessary.
+   */
+  private static void evaluateWriteHelper(
+      Write.Bound transform,
+      DirectPipelineRunner.EvaluationContext context) {
+    BigQueryOptions options = context.getPipelineOptions();
+    Bigquery client = Transport.newBigQueryClient(options).build();
+    TableReference ref = transform.table;
+    if (ref.getProjectId() == null) {
+      ref.setProjectId(options.getProject());
+    }
+
+    LOG.info("Writing to BigQuery table {}", toTableSpec(ref));
+
+    try {
+      BigQueryTableInserter inserter = new BigQueryTableInserter(client, ref);
+
+      inserter.getOrCreateTable(transform.writeDisposition,
+          transform.createDisposition, transform.schema);
+
+      List<TableRow> tableRows = context.getPCollection(transform.getInput());
+      inserter.insertAll(tableRows.iterator());
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
new file mode 100644
index 0000000000000..9c7fc0a1c5b33
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -0,0 +1,603 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import com.google.api.client.auth.oauth2.Credential;
+import com.google.api.services.datastore.DatastoreV1.BeginTransactionRequest;
+import com.google.api.services.datastore.DatastoreV1.BeginTransactionResponse;
+import com.google.api.services.datastore.DatastoreV1.CommitRequest;
+import com.google.api.services.datastore.DatastoreV1.Entity;
+import com.google.api.services.datastore.DatastoreV1.Query;
+import com.google.api.services.datastore.client.Datastore;
+import com.google.api.services.datastore.client.DatastoreException;
+import com.google.api.services.datastore.client.DatastoreFactory;
+import com.google.api.services.datastore.client.DatastoreHelper;
+import com.google.api.services.datastore.client.DatastoreOptions;
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
+import com.google.cloud.dataflow.sdk.coders.EntityCoder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.GcpOptions;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Flatten;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.util.Credentials;
+import com.google.cloud.dataflow.sdk.util.RetryHttpRequestInitializer;
+import com.google.cloud.dataflow.sdk.values.PBegin;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+import com.google.cloud.dataflow.sdk.values.PDone;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Transforms for reading and writing
+ * <a href="https://developers.google.com/datastore/">Google Cloud Datastore</a>
+ * entities.
+ *
+ * <p> The DatastoreIO class provides an experimental API to Read and Write a
+ * {@link PCollection} of Datastore Entity.  Currently the class supports
+ * read operations on both the DirectPipelineRunner and DataflowPipelineRunner,
+ * and write operations on the DirectPipelineRunner.  This API is subject to
+ * change, and currently requires an authentication workaround described below.
+ *
+ * <p> Datastore is a fully managed NoSQL data storage service.
+ * An Entity is an object in Datastore, analogous to the a row in traditional
+ * database table.  DatastoreIO supports Read/Write from/to Datastore within
+ * Dataflow SDK service.
+ *
+ * <p> To use DatastoreIO, users must set up the environment and use gcloud
+ * to get credential for Datastore:
+ * <pre>
+ * $ export CLOUDSDK_EXTRA_SCOPES=https://www.googleapis.com/auth/datastore
+ * $ gcloud auth login
+ * </pre>
+ *
+ * <p> Note that the environment variable CLOUDSDK_EXTRA_SCOPES must be set
+ * to the same value when executing a Datastore pipeline, as the local auth
+ * cache is keyed by the requested scopes.
+ *
+ * <p> To read a {@link PCollection} from a query to Datastore, use
+ * {@link DatastoreIO.Read}, specifying {@link DatastoreIO.Read#from} to specify
+ * dataset to read, the query to read from, and optionally
+ * {@link DatastoreIO.Read#named} and {@link DatastoreIO.Read#withHost} to specify
+ * the name of the pipeline step and the host of Datastore, respectively.
+ * For example:
+ *
+ * <pre> {@code
+ * // Read a query from Datastore
+ * PipelineOptions options =
+ *     CliPipelineOptionsFactory.create(PipelineOptions.class, args);
+ * Pipeline p = Pipeline.create(options);
+ * PCollection<Entity> entities =
+ *     p.apply(DatastoreIO.Read
+ *             .named("Read Datastore")
+ *             .from(datasetId, query)
+ *             .withHost(host));
+ * p.run();
+ * } </pre>
+ *
+ * <p> To write a {@link PCollection} to a datastore, use
+ * {@link DatastoreIO.Write}, specifying {@link DatastoreIO.Write#to} to specify
+ * the datastore to write to, and optionally {@link TextIO.Write#named} to specify
+ * the name of the pipeline step.  For example:
+ *
+ * <pre> {@code
+ * // A simple Write to Datastore with DirectPipelineRunner (writing is not
+ * // yet implemented for other runners):
+ * PCollection<Entity> entities = ...;
+ * lines.apply(DatastoreIO.Write.to("Write entities", datastore));
+ * p.run();
+ *
+ * } </pre>
+ */
+
+public class DatastoreIO {
+
+  private static final Logger LOG = LoggerFactory.getLogger(DatastoreIO.class);
+  private static final String DEFAULT_HOST = "https://www.googleapis.com";
+
+  /**
+   * A PTransform that reads from a Datastore query and returns a
+   * {@code PCollection<Entity>} containing each of the rows of the table.
+   */
+  public static class Read {
+
+    /**
+     * Returns a DatastoreIO.Read PTransform with the given step name.
+     */
+    public static Bound named(String name) {
+      return new Bound(DEFAULT_HOST).named(name);
+    }
+
+    /**
+     * Reads entities retrieved from the dataset and a given query.
+     */
+    public static Bound from(String datasetId, Query query) {
+      return new Bound(DEFAULT_HOST).from(datasetId, query);
+    }
+
+    /**
+     * Returns a DatastoreIO.Read PTransform with specified host.
+     */
+    public static Bound withHost(String host) {
+      return new Bound(host);
+    }
+
+    /**
+     * A PTransform that reads from a Datastore query and returns a bounded
+     * {@code PCollection<Entity>}.
+     */
+    public static class Bound extends PTransform<PBegin, PCollection<Entity>> {
+      String host;
+      String datasetId;
+      Query query;
+
+      /**
+       * Returns a DatastoreIO.Bound object with given query.
+       * Sets the name, Datastore host, datasetId, query associated
+       * with this PTransform, and options for this Pipeline.
+       */
+      Bound(String name, String host, String datasetId, Query query) {
+        super(name);
+        this.host = host;
+        this.datasetId = datasetId;
+        this.query = query;
+      }
+
+      /**
+       * Returns a DatastoreIO.Read PTransform with host set up.
+       */
+      Bound(String host) {
+        this.host = host;
+      }
+
+      /**
+       * Returns a new DatastoreIO.Read PTransform with the name
+       * associated with this transformation.
+       */
+      public Bound named(String name) {
+        return new Bound(name, host, datasetId, query);
+      }
+
+      /**
+       * Returns a new DatastoreIO.Read PTransform with datasetId,
+       * and query associated with this transformation, and options
+       * associated with this Pipleine.
+       */
+      public Bound from(String datasetId, Query query) {
+        return new Bound(name, host, datasetId, query);
+      }
+
+      /**
+       * Returns a new DatastoreIO.Read PTransform with the host
+       * specified.
+       */
+      public Bound withHost(String host) {
+        return new Bound(name, host, datasetId, query);
+      }
+
+      @Override
+      public PCollection<Entity> apply(PBegin input) {
+        if (datasetId == null || query == null) {
+          throw new IllegalStateException(
+              "need to set datasetId, and query "
+              + "of a DatastoreIO.Read transform");
+        }
+
+        QueryOptions queryOptions = QueryOptions.create(host, datasetId, query);
+        PCollection<Entity> output;
+        try {
+          DataflowPipelineOptions options =
+              getPipeline().getOptions().as(DataflowPipelineOptions.class);
+          PCollection<QueryOptions> queries = splitQueryOptions(queryOptions, options, input);
+
+          output = queries.apply(ParDo.of(new ReadEntitiesFn()));
+          getCoderRegistry().registerCoder(Entity.class, EntityCoder.class);
+        } catch (DatastoreException e) {
+          LOG.warn("DatastoreException: error while doing Datastore query splitting.", e);
+          throw new RuntimeException("Error while splitting Datastore query.");
+        }
+
+        return output;
+      }
+    }
+  }
+
+  ///////////////////// Write Class /////////////////////////////////
+  /**
+   * A PTransform that writes a {@code PCollection<Entity>} containing
+   * entities to a Datastore kind.
+   *
+   * Current version only supports Write operation running on
+   * DirectPipelineRunner.  If Write is used on DataflowPipelineRunner,
+   * it throws UnsupportedOperationException and won't continue on the
+   * operation.
+   *
+   */
+  public static class Write {
+    /**
+     * Returns a DatastoreIO.Write PTransform with the name
+     * associated with this PTransform.
+     */
+    public static Bound named(String name) {
+      return new Bound(DEFAULT_HOST).named(name);
+    }
+
+    /**
+     * Returns a DatastoreIO.Write PTransform with given datasetId.
+     */
+    public static Bound to(String datasetId) {
+      return new Bound(DEFAULT_HOST).to(datasetId);
+    }
+
+    /**
+     * Returns a DatastoreIO.Write PTransform with specified host.
+     */
+    public static Bound withHost(String host) {
+      return new Bound(host);
+    }
+
+    /**
+     * A PTransform that writes a bounded {@code PCollection<Entities>}
+     * to a Datastore.
+     */
+    public static class Bound extends PTransform<PCollection<Entity>, PDone> {
+      String host;
+      String datasetId;
+
+      /**
+       * Returns a DatastoreIO.Write PTransform with given host.
+       */
+      Bound(String host) {
+        this.host = host;
+      }
+
+      /**
+       * Returns a DatastoreIO.Write.Bound object.
+       * Sets the name, datastore agent, and kind associated
+       * with this transformation.
+       */
+      Bound(String name, String host, String datasetId) {
+        super(name);
+        this.host = host;
+        this.datasetId = datasetId;
+      }
+
+      /**
+       * Returns a DatastoreIO.Write PTransform with the name
+       * associated with this PTransform.
+       */
+      public Bound named(String name) {
+        return new Bound(name, host, datasetId);
+      }
+
+      /**
+       * Returns a DatastoreIO.Write PTransform with given datasetId.
+       */
+      public Bound to(String datasetId) {
+        return new Bound(name, host, datasetId);
+      }
+
+      /**
+       * Returns a new DatastoreIO.Write PTransform with specified host.
+       */
+      public Bound withHost(String host) {
+        return new Bound(name, host, datasetId);
+      }
+
+      @Override
+      public PDone apply(PCollection<Entity> input) {
+        if (this.host == null || this.datasetId == null) {
+          throw new IllegalStateException(
+              "need to set Datastore host and dataasetId"
+              + "of a DatastoreIO.Write transform");
+        }
+
+        return new PDone();
+      }
+
+      @Override
+      protected String getKindString() { return "DatastoreIO.Write"; }
+
+      @Override
+      protected Coder<Void> getDefaultOutputCoder() {
+        return VoidCoder.of();
+      }
+
+      static {
+        DirectPipelineRunner.registerDefaultTransformEvaluator(
+            Bound.class,
+            new DirectPipelineRunner.TransformEvaluator<Bound>() {
+              @Override
+              public void evaluate(
+                  Bound transform,
+                  DirectPipelineRunner.EvaluationContext context) {
+                evaluateWriteHelper(transform, context);
+              }
+            });
+      }
+    }
+  }
+
+  ///////////////////////////////////////////////////////////////////
+
+  /**
+   * A DoFn that performs query request to Datastore and converts
+   * each QueryOptions into Entities.
+   */
+  private static class ReadEntitiesFn extends DoFn<QueryOptions, Entity> {
+    @Override
+    public void processElement(ProcessContext c) {
+      Query query = c.element().getQuery();
+      Datastore datastore = c.element().getWorkerDatastore(
+          c.getPipelineOptions().as(GcpOptions.class));
+      DatastoreIterator entityIterator = new DatastoreIterator(query, datastore);
+
+      while (entityIterator.hasNext()) {
+        c.output(entityIterator.next().getEntity());
+      }
+    }
+  }
+
+  /**
+   * A class that stores query and datastore setup environments
+   * (host and datasetId).
+   */
+  @DefaultCoder(AvroCoder.class)
+  private static class QueryOptions {
+    // Query to read in byte array.
+    public byte[] byteQuery;
+
+    // Datastore host to read from.
+    public String host;
+
+    // Datastore dataset ID to read from.
+    public String datasetId;
+
+    @SuppressWarnings("unused")
+    QueryOptions() {}
+
+    /**
+     * Returns a QueryOption object without account and private key file
+     * (for supporting query on local Datastore).
+     *
+     * @param host the host of Datastore to connect
+     * @param datasetId the dataset ID of Datastore to query
+     * @param query the query to perform
+     */
+    QueryOptions(String host, String datasetId, Query query) {
+      this.host = host;
+      this.datasetId = datasetId;
+      this.setQuery(query);
+    }
+
+    /**
+     * Creates and returns a QueryOption object for query on local Datastore.
+     *
+     * @param host the host of Datastore to connect
+     * @param datasetId the dataset ID of Datastore to query
+     * @param query the query to perform
+     */
+    public static QueryOptions create(String host, String datasetId, Query query) {
+      return new QueryOptions(host, datasetId, query);
+    }
+
+    /**
+     * Sets up a query.
+     * Stores query in a byte array so that we can use AvroCoder to encode/decode
+     * QueryOptions.
+     *
+     * @param q the query to be addressed
+     */
+    public void setQuery(Query q) {
+      this.byteQuery = q.toByteArray();
+    }
+
+    /**
+     * Returns query.
+     *
+     * @return query in this option.
+     */
+    public Query getQuery() {
+      try {
+        return Query.parseFrom(this.byteQuery);
+      } catch (IOException e) {
+        LOG.warn("IOException: parsing query failed.", e);
+        throw new RuntimeException("Cannot parse query from byte array.");
+      }
+    }
+
+    /**
+     * Returns the dataset ID.
+     *
+     * @return a dataset ID string for Datastore.
+     */
+    public String getDatasetId() {
+      return this.datasetId;
+    }
+
+    /**
+     * Returns a copy of QueryOptions from current options with given query.
+     *
+     * @param query a new query to be set
+     * @return A QueryOptions object for query
+     */
+    public QueryOptions newQuery(Query query) {
+      return create(host, datasetId, query);
+    }
+
+    /**
+     * Returns a Datastore object for connecting to Datastore on workers.
+     * This method will try to get worker credential from Credentials
+     * library and constructs a Datastore object which is set up and
+     * ready to communicate with Datastore.
+     *
+     * @return a Datastore object setup with host and dataset.
+     */
+    public Datastore getWorkerDatastore(GcpOptions options) {
+      DatastoreOptions.Builder builder = new DatastoreOptions.Builder()
+          .host(this.host)
+          .dataset(this.datasetId)
+          .initializer(new RetryHttpRequestInitializer(null));
+
+      try {
+        Credential credential = Credentials.getWorkerCredential(options);
+        builder.credential(credential);
+      } catch (IOException e) {
+        LOG.warn("IOException: can't get credential for worker.", e);
+        throw new RuntimeException("Failed on getting credential for worker.");
+      }
+      return DatastoreFactory.get().create(builder.build());
+    }
+
+    /**
+     * Returns a Datastore object for connecting to Datastore for users.
+     * This method will use the passed in credentials and construct a Datastore
+     * object which is set up and ready to communicate with Datastore.
+     *
+     * @return a Datastore object setup with host and dataset.
+     */
+    public Datastore getUserDatastore(GcpOptions options) {
+      DatastoreOptions.Builder builder = new DatastoreOptions.Builder()
+          .host(this.host)
+          .dataset(this.datasetId)
+          .initializer(new RetryHttpRequestInitializer(null));
+
+      Credential credential = options.getGcpCredential();
+      if (credential != null) {
+        builder.credential(credential);
+      }
+      return DatastoreFactory.get().create(builder.build());
+    }
+  }
+
+  /**
+   * Returns a list of QueryOptions by splitting a QueryOptions into sub-queries.
+   * This method leverages the QuerySplitter in Datastore to split the
+   * query into sub-queries for further parallel query in Dataflow service.
+   *
+   * @return a PCollection of QueryOptions for split queries
+   */
+  private static PCollection<QueryOptions> splitQueryOptions(
+      QueryOptions queryOptions, DataflowPipelineOptions options,
+      PBegin input)
+      throws DatastoreException {
+    Query query = queryOptions.getQuery();
+    Datastore datastore = queryOptions.getUserDatastore(options);
+
+    // Get splits from the QuerySplit interface.
+    List<Query> splitQueries = DatastoreHelper.getQuerySplitter()
+        .getSplits(query, options.getNumWorkers(), datastore);
+
+    List<PCollection<QueryOptions>> queryList = new LinkedList<>();
+    for (Query q : splitQueries) {
+      PCollection<QueryOptions> newQuery = input
+          .apply(Create.of(queryOptions.newQuery(q)));
+      queryList.add(newQuery);
+    }
+
+    // This is a workaround to allow for parallelism of a small collection.
+    return PCollectionList.of(queryList)
+        .apply(Flatten.<QueryOptions>create());
+  }
+
+  /////////////////////////////////////////////////////////////////////
+
+  /**
+   * Direct mode write evaluator.
+   * This writes the result to Datastore.
+   */
+  private static void evaluateWriteHelper(
+      Write.Bound transform,
+      DirectPipelineRunner.EvaluationContext context) {
+    LOG.info("Writing to Datastore");
+    GcpOptions options = context.getPipelineOptions();
+    Credential credential = options.getGcpCredential();
+    Datastore datastore = DatastoreFactory.get().create(
+        new DatastoreOptions.Builder()
+            .host(transform.host)
+            .dataset(transform.datasetId)
+            .credential(credential)
+            .initializer(new RetryHttpRequestInitializer(null))
+            .build());
+
+    List<Entity> entityList = context.getPCollection(transform.getInput());
+
+    // Create a map to put entities with same ancestor for writing in a batch.
+    HashMap<String, List<Entity>> map = new HashMap<>();
+    for (Entity e : entityList) {
+      String keyOfAncestor = e.getKey().getPathElement(0).getKind()
+          + e.getKey().getPathElement(0).getName();
+      List<Entity> value = map.get(keyOfAncestor);
+      if (value == null) {
+        value = new ArrayList<>();
+      }
+      value.add(e);
+      map.put(keyOfAncestor, value);
+    }
+
+    // Walk over the map, and write entities bucket by bucket.
+    int count = 0;
+    for (String k : map.keySet()) {
+      List<Entity> entitiesWithSameAncestor = map.get(k);
+      List<Entity> toInsert = new ArrayList<>();
+      for (Entity e : entitiesWithSameAncestor) {
+        toInsert.add(e);
+        // Note that Datastore has limit as 500 for a batch operation,
+        // so just flush to Datastore with every 500 entties.
+        if (toInsert.size() >= 500) {
+          writeBatch(toInsert, datastore);
+          toInsert.clear();
+        }
+      }
+      writeBatch(toInsert, datastore);
+      count += entitiesWithSameAncestor.size();
+    }
+
+    LOG.info("Total number of entities written: {}", count);
+  }
+
+  /**
+   * A function for batch writing to Datastore.
+   */
+  private static void writeBatch(List<Entity> listOfEntities, Datastore datastore) {
+    try {
+      BeginTransactionRequest.Builder treq = BeginTransactionRequest.newBuilder();
+      BeginTransactionResponse tres = datastore.beginTransaction(treq.build());
+      CommitRequest.Builder creq = CommitRequest.newBuilder();
+      creq.setTransaction(tres.getTransaction());
+      creq.getMutationBuilder().addAllInsertAutoId(listOfEntities);
+      datastore.commit(creq.build());
+    } catch (DatastoreException e) {
+      LOG.warn("Error while doing datastore operation: {}", e);
+      throw new RuntimeException("Datastore exception", e);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIterator.java
new file mode 100644
index 0000000000000..1b6d92e73c76a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIterator.java
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import com.google.api.services.datastore.DatastoreV1.EntityResult;
+import com.google.api.services.datastore.DatastoreV1.Query;
+import com.google.api.services.datastore.DatastoreV1.QueryResultBatch;
+import com.google.api.services.datastore.DatastoreV1.RunQueryRequest;
+import com.google.api.services.datastore.DatastoreV1.RunQueryResponse;
+import com.google.api.services.datastore.client.Datastore;
+import com.google.api.services.datastore.client.DatastoreException;
+import com.google.common.collect.AbstractIterator;
+
+import java.util.Iterator;
+
+/**
+ * An iterator over the records from a query of the datastore.
+ *
+ * <p> Usage:
+ * <pre>{@code
+ *   // Need to pass query and datastore object.
+ *   DatastoreIterator iterator = new DatastoreIterator(query, datastore);
+ *   while (iterator.hasNext()) {
+ *     Entity e = iterator.next().getEntity();
+ *     ...
+ *   }
+ * }</pre>
+ */
+class DatastoreIterator extends AbstractIterator<EntityResult> {
+  /**
+   * Query to select records.
+   */
+  private Query.Builder query;
+
+  /**
+   * Datastore to read from.
+   */
+  private Datastore datastore;
+
+  /**
+   * True if more results may be available.
+   */
+  private boolean moreResults;
+
+  /**
+   * Iterator over records.
+   */
+  private Iterator<EntityResult> entities;
+
+  /**
+   * Current batch of query results.
+   */
+  private QueryResultBatch currentBatch;
+
+  /**
+   * Maximum number of results to request per query.
+   *
+   * <p> Must be set, or it may result in an I/O error when querying
+   * Cloud Datastore.
+   */
+  private static final int QUERY_LIMIT = 5000;
+
+  /**
+   * Returns a DatastoreIterator with query and Datastore object set.
+   *
+   * @param query the query to select records.
+   * @param datastore a datastore connection to use.
+   */
+  public DatastoreIterator(Query query, Datastore datastore) {
+    this.query = query.toBuilder().clone();
+    this.datastore = datastore;
+    this.query.setLimit(QUERY_LIMIT);
+  }
+
+  /**
+   * Returns an iterator over the next batch of records for the query
+   * and updates the cursor to get the next batch as needed.
+   * Query has specified limit and offset from InputSplit.
+   */
+  private Iterator<EntityResult> getIteratorAndMoveCursor()
+      throws DatastoreException{
+    if (this.currentBatch != null && this.currentBatch.hasEndCursor()) {
+      this.query.setStartCursor(this.currentBatch.getEndCursor());
+    }
+
+    RunQueryRequest request = RunQueryRequest.newBuilder()
+        .setQuery(this.query)
+        .build();
+    RunQueryResponse response = this.datastore.runQuery(request);
+
+    this.currentBatch = response.getBatch();
+
+    // MORE_RESULTS_AFTER_LIMIT is not implemented yet:
+    // https://groups.google.com/forum/#!topic/gcd-discuss/iNs6M1jA2Vw, so
+    // use result count to determine if more results might exist.
+    int numFetch = this.currentBatch.getEntityResultCount();
+    moreResults = numFetch == QUERY_LIMIT;
+
+    // May receive a batch of 0 results if the number of records is a multiple
+    // of the request limit.
+    if (numFetch == 0) {
+      return null;
+    }
+
+    return this.currentBatch.getEntityResultList().iterator();
+  }
+
+  @Override
+  public EntityResult computeNext() {
+    try {
+      if (entities == null || (!entities.hasNext() && this.moreResults)) {
+        entities = getIteratorAndMoveCursor();
+      }
+
+      if (entities == null || !entities.hasNext()) {
+        return endOfData();
+      }
+
+      return entities.next();
+
+    } catch (DatastoreException e) {
+      throw new RuntimeException(
+          "Datastore error while iterating over entities", e);
+    }
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
new file mode 100644
index 0000000000000..b9f0514841590
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -0,0 +1,331 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PDone;
+import com.google.cloud.dataflow.sdk.values.PInput;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * [Whitelisting Required] Read and Write transforms for Pub/Sub streams. These transforms create
+ * and consume unbounded {@link com.google.cloud.dataflow.sdk.values.PCollection}s.
+ *
+ * <p> <b>Important:</b> PubsubIO is experimental. It is not supported by the
+ * {@link com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner} and is only supported in the
+ * {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner} for users whitelisted in a
+ * streaming early access program and who enable 
+ * {@link com.google.cloud.dataflow.sdk.options.StreamingOptions#setStreaming(boolean)}.
+ * 
+ * <p> You should expect this class to change significantly in future versions of the SDK
+ * or be removed entirely.
+ */
+public class PubsubIO {
+
+  /**
+   * Project IDs must contain 6-63 lowercase letters, digits, or dashes.
+   * IDs must start with a letter and may not end with a dash.
+   * This regex isn't exact - this allows for patterns that would be rejected by
+   * the service, but this is sufficient for basic parsing of table references.
+   */
+  private static final Pattern PROJECT_ID_REGEXP =
+      Pattern.compile("[a-z][-a-z0-9:.]{4,61}[a-z0-9]");
+
+  private static final Pattern SUBSCRIPTION_REGEXP =
+      Pattern.compile("/subscriptions/([^/]+)/(.+)");
+
+  private static final Pattern TOPIC_REGEXP =
+      Pattern.compile("/topics/([^/]+)/(.+)");
+
+  private static final Pattern PUBSUB_NAME_REGEXP =
+      Pattern.compile("[a-z][-._a-z0-9]+[a-z0-9]");
+
+  private static final int PUBSUB_NAME_MAX_LENGTH = 255;
+
+  private static final String SUBSCRIPTION_RANDOM_TEST_PREFIX = "_random/";
+  private static final String TOPIC_DEV_NULL_TEST_NAME = "/topics/dev/null";
+
+  /**
+   * Utility class to validate topic and subscription names.
+   */
+  public static class Validator {
+    public static void validateTopicName(String topic) {
+      if (topic.equals(TOPIC_DEV_NULL_TEST_NAME)) {
+        return;
+      }
+      Matcher match = TOPIC_REGEXP.matcher(topic);
+      if (!match.matches()) {
+        throw new IllegalArgumentException(
+            "Pubsub topic is not in /topics/project_id/topic_name format: "
+            + topic);
+      }
+      validateProjectName(match.group(1));
+      validatePubsubName(match.group(2));
+    }
+
+    public static void validateSubscriptionName(String subscription) {
+      if (subscription.startsWith(SUBSCRIPTION_RANDOM_TEST_PREFIX)) {
+        return;
+      }
+      Matcher match = SUBSCRIPTION_REGEXP.matcher(subscription);
+      if (!match.matches()) {
+        throw new IllegalArgumentException(
+            "Pubsub subscription is not in /subscriptions/project_id/subscription_name format: "
+            + subscription);
+      }
+      validateProjectName(match.group(1));
+      validatePubsubName(match.group(2));
+    }
+
+    private static void validateProjectName(String project) {
+      Matcher match = PROJECT_ID_REGEXP.matcher(project);
+      if (!match.matches()) {
+        throw new IllegalArgumentException(
+            "Illegal project name specified in Pubsub subscription: " + project);
+      }
+    }
+
+    private static void validatePubsubName(String name) {
+      if (name.length() > PUBSUB_NAME_MAX_LENGTH) {
+        throw new IllegalArgumentException(
+            "Pubsub object name is longer than 255 characters: " + name);
+      }
+
+      if (name.startsWith("goog")) {
+        throw new IllegalArgumentException(
+            "Pubsub object name cannot start with goog: " + name);
+      }
+
+      Matcher match = PUBSUB_NAME_REGEXP.matcher(name);
+      if (!match.matches()) {
+        throw new IllegalArgumentException(
+            "Illegal Pubsub object name specified: " + name
+            + " Please see Javadoc for naming rules.");
+      }
+    }
+  }
+
+  /**
+   * A PTransform that continuously reads from a Pubsub stream and
+   * returns a {@code PCollection<String>} containing the items from
+   * the stream.
+   */
+  // TODO: Support non-String encodings.
+  public static class Read {
+    public static Bound named(String name) {
+      return new Bound().named(name);
+    }
+
+    /**
+     * Creates and returns a PubsubIO.Read PTransform for reading from
+     * a Pubsub topic with the specified publisher topic. Format for
+     * Cloud Pubsub topic names should be of the form /topics/<project>/<topic>,
+     * where <project> is the name of the publishing project.
+     * The <topic> component must comply with the below requirements.
+     * <ul>
+     * <li>Can only contain lowercase letters, numbers, dashes ('-'), underscores ('_') and periods
+     * ('.').</li>
+     * <li>Must be between 3 and 255 characters.</li>
+     * <li>Must begin with a letter.</li>
+     * <li>Must end with a letter or a number.</li>
+     * <li>Cannot begin with 'goog' prefix.</li>
+     * </ul>
+     */
+    public static Bound topic(String topic) {
+      return new Bound().topic(topic);
+    }
+
+    /**
+     * Creates and returns a PubsubIO.Read PTransform for reading from
+     * a specific Pubsub subscription. Mutually exclusive with
+     * PubsubIO.Read.topic().
+     * Cloud Pubsub subscription names should be of the form
+     * /subscriptions/<project>/<<subscription>,
+     * where <project> is the name of the project the subscription belongs to.
+     * The <subscription> component must comply with the below requirements.
+     * <ul>
+     * <li>Can only contain lowercase letters, numbers, dashes ('-'), underscores ('_') and periods
+     * ('.').</li>
+     * <li>Must be between 3 and 255 characters.</li>
+     * <li>Must begin with a letter.</li>
+     * <li>Must end with a letter or a number.</li>
+     * <li>Cannot begin with 'goog' prefix.</li>
+     * </ul>
+     */
+    public static Bound subscription(String subscription) {
+      return new Bound().subscription(subscription);
+    }
+
+    /**
+     * A PTransform that reads from a PubSub source and returns
+     * a unbounded PCollection containing the items from the stream.
+     */
+    public static class Bound
+        extends PTransform<PInput, PCollection<String>> {
+      /** The Pubsub topic to read from. */
+      String topic;
+      /** The Pubsub subscription to read from */
+      String subscription;
+
+      Bound() {}
+
+      Bound(String name, String subscription, String topic) {
+        super(name);
+        if (subscription != null) {
+          Validator.validateSubscriptionName(subscription);
+        }
+        if (topic != null) {
+          Validator.validateTopicName(topic);
+        }
+        this.subscription = subscription;
+        this.topic = topic;
+      }
+
+      public Bound named(String name) {
+        return new Bound(name, subscription, topic);
+      }
+
+      public Bound subscription(String subscription) {
+        return new Bound(name, subscription, topic);
+      }
+
+      public Bound topic(String topic) {
+        return new Bound(name, subscription, topic);
+      }
+
+      @Override
+      public PCollection<String> apply(PInput input) {
+        if (topic == null && subscription == null) {
+          throw new IllegalStateException(
+              "need to set either the topic or the subscription for "
+              + "a PubsubIO.Read transform");
+        }
+        if (topic != null && subscription != null) {
+          throw new IllegalStateException(
+              "Can't set both the topic and the subscription for a "
+              + "PubsubIO.Read transform");
+        }
+        return PCollection.<String>createPrimitiveOutputInternal(
+            new GlobalWindow());
+      }
+
+      @Override
+      protected Coder<String> getDefaultOutputCoder() {
+        return StringUtf8Coder.of();
+      }
+
+      @Override
+      protected String getKindString() { return "PubsubIO.Read"; }
+
+      public String getTopic() {
+        return topic;
+      }
+
+      public String getSubscription() {
+        return subscription;
+      }
+
+      static {
+        // TODO: Figure out how to make this work under
+        // DirectPipelineRunner.
+      }
+    }
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * A PTransform that continuously writes a
+   * {@code PCollection<String>} to a Pubsub stream.
+   */
+  // TODO: Support non-String encodings.
+  public static class Write {
+    public static Bound named(String name) {
+      return new Bound().named(name);
+    }
+
+    /** The topic to publish to.
+     * Cloud Pubsub topic names should be /topics/<project>/<topic>,
+     * where <project> is the name of the publishing project.
+     */
+    public static Bound topic(String topic) {
+      return new Bound().topic(topic);
+    }
+
+    /**
+     * A PTransfrom that writes a unbounded {@code PCollection<String>}
+     * to a PubSub stream.
+     */
+    public static class Bound
+        extends PTransform<PCollection<String>, PDone> {
+      /** The Pubsub topic to publish to. */
+      String topic;
+
+      Bound() {}
+
+      Bound(String name, String topic) {
+        super(name);
+        if (topic != null) {
+          Validator.validateTopicName(topic);
+          this.topic = topic;
+        }
+      }
+
+      public Bound named(String name) {
+        return new Bound(name, topic);
+      }
+
+      public Bound topic(String topic) {
+        return new Bound(name, topic);
+      }
+
+      @Override
+      public PDone apply(PCollection<String> input) {
+        if (topic == null) {
+          throw new IllegalStateException(
+              "need to set the topic of a PubsubIO.Write transform");
+        }
+        return new PDone();
+      }
+
+      @Override
+      protected Coder<Void> getDefaultOutputCoder() {
+        return VoidCoder.of();
+      }
+
+      @Override
+      protected String getKindString() { return "PubsubIO.Write"; }
+
+      public String getTopic() {
+        return topic;
+      }
+
+      static {
+        // TODO: Figure out how to make this work under
+        // DirectPipelineRunner.
+      }
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ShardNameTemplate.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ShardNameTemplate.java
new file mode 100644
index 0000000000000..5ab0a99084b84
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ShardNameTemplate.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+/**
+ * Standard shard naming templates.
+ *
+ * <p> Shard naming templates are strings which may contain placeholders for
+ * the shard number and shard count.  When constructing a filename for a
+ * particular shard number, the upper-case letters 'S' and 'N' are replaced
+ * with the 0-padded shard number and shard count respectively.
+ *
+ * <p> Left-padding of the numbers enables lexicographical sorting of the
+ * resulting filenames.  If the shard number or count are too large for the
+ * space provided in the template, then the result may no longer sort
+ * lexicographically.  For example, a shard template of "S-of-N", for 200
+ * shards, will result in outputs named "0-of-200", ... '10-of-200',
+ * '100-of-200", etc.
+ *
+ * <p> Shard numbers start with 0, so the last shard number is the shard count
+ * minus one.  For example, the template "-SSSSS-of-NNNNN" will be
+ * instantiated as "-00000-of-01000" for the first shard (shard 0) of a
+ * 1000-way sharded output.
+ *
+ * <p> A shard name template is typically provided along with a name prefix
+ * and suffix, which allows constructing complex paths which have embedded
+ * shard information.  For example, outputs in the form
+ * "gs://bucket/path-01-of-99.txt" could be constructed by providing the
+ * individual components:
+ *
+ * <pre>{@code
+ *   pipeline.apply(
+ *       TextIO.Write.to("gs://bucket/path")
+ *                   .withShardNameTemplate("-SS-of-NN")
+ *                   .withSuffix(".txt"))
+ * }</pre>
+ *
+ * <p> In the example above, you could make parts of the output configurable
+ * by users without the user having to specify all components of the output
+ * name.
+ *
+ * <p> If a shard name template does not contain any repeating 'S', then
+ * the output shard count must be 1, as otherwise the same filename would be
+ * generated for multiple shards.
+ */
+public class ShardNameTemplate {
+  /**
+   * Shard name containing the index and max.
+   *
+   * <p> Eg: [prefix]-00000-of-00100[suffix] and
+   * [prefix]-00001-of-00100[suffix]
+   */
+  public static final String INDEX_OF_MAX = "-SSSSS-of-NNNNN";
+
+  /**
+   * Shard is a file within a directory.
+   *
+   * <p> Eg: [prefix]/part-00000[suffix] and [prefix]/part-00001[suffix]
+   */
+  public static final String DIRECTORY_CONTAINER = "/part-SSSSS";
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
new file mode 100644
index 0000000000000..5d1cb205b4224
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -0,0 +1,567 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import static com.google.cloud.dataflow.sdk.util.CloudSourceUtils.readElemsFromSource;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.worker.TextSink;
+import com.google.cloud.dataflow.sdk.runners.worker.TextSource;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PDone;
+import com.google.cloud.dataflow.sdk.values.PInput;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.regex.Pattern;
+
+import javax.annotation.Nullable;
+
+/**
+ * Transforms for reading and writing text files.
+ *
+ * <p> To read a {@link PCollection} from one or more text files, use
+ * {@link TextIO.Read}, specifying {@link TextIO.Read#from} to specify
+ * the path of the file(s) to read from (e.g., a local filename or
+ * filename pattern if running locally, or a Google Cloud Storage
+ * filename or filename pattern of the form
+ * {@code "gs://<bucket>/<filepath>"}), and optionally
+ * {@link TextIO.Read#named} to specify the name of the pipeline step
+ * and/or {@link TextIO.Read#withCoder} to specify the Coder to use to
+ * decode the text lines into Java values.  For example:
+ *
+ * <pre> {@code
+ * Pipeline p = ...;
+ *
+ * // A simple Read of a local file (only runs locally):
+ * PCollection<String> lines =
+ *     p.apply(TextIO.Read.from("/path/to/file.txt"));
+ *
+ * // A fully-specified Read from a GCS file (runs locally and via the
+ * // Google Cloud Dataflow service):
+ * PCollection<Integer> numbers =
+ *     p.apply(TextIO.Read.named("ReadNumbers")
+ *                        .from("gs://my_bucket/path/to/numbers-*.txt")
+ *                        .withCoder(TextualIntegerCoder.of()));
+ * } </pre>
+ *
+ * <p> To write a {@link PCollection} to one or more text files, use
+ * {@link TextIO.Write}, specifying {@link TextIO.Write#to} to specify
+ * the path of the file to write to (e.g., a local filename or sharded
+ * filename pattern if running locally, or a Google Cloud Storage
+ * filename or sharded filename pattern of the form
+ * {@code "gs://<bucket>/<filepath>"}), and optionally
+ * {@link TextIO.Write#named} to specify the name of the pipeline step
+ * and/or {@link TextIO.Write#withCoder} to specify the Coder to use
+ * to encode the Java values into text lines.  For example:
+ *
+ * <pre> {@code
+ * // A simple Write to a local file (only runs locally):
+ * PCollection<String> lines = ...;
+ * lines.apply(TextIO.Write.to("/path/to/file.txt"));
+ *
+ * // A fully-specified Write to a sharded GCS file (runs locally and via the
+ * // Google Cloud Dataflow service):
+ * PCollection<Integer> numbers = ...;
+ * numbers.apply(TextIO.Write.named("WriteNumbers")
+ *                           .to("gs://my_bucket/path/to/numbers")
+ *                           .withSuffix(".txt")
+ *                           .withCoder(TextualIntegerCoder.of()));
+ * } </pre>
+ */
+public class TextIO {
+  public static final Coder<String> DEFAULT_TEXT_CODER = StringUtf8Coder.of();
+
+  /**
+   * A root PTransform that reads from a text file (or multiple text
+   * files matching a pattern) and returns a PCollection containing
+   * the decoding of each of the lines of the text file(s).  The
+   * default decoding just returns the lines.
+   */
+  public static class Read {
+    /**
+     * Returns a TextIO.Read PTransform with the given step name.
+     */
+    public static Bound<String> named(String name) {
+      return new Bound<>(DEFAULT_TEXT_CODER).named(name);
+    }
+
+    /**
+     * Returns a TextIO.Read PTransform that reads from the file(s)
+     * with the given name or pattern.  This can be a local filename
+     * or filename pattern (if running locally), or a Google Cloud
+     * Storage filename or filename pattern of the form
+     * {@code "gs://<bucket>/<filepath>"}) (if running locally or via
+     * the Google Cloud Dataflow service).  Standard
+     * <a href="http://docs.oracle.com/javase/tutorial/essential/io/find.html"
+     * >Java Filesystem glob patterns</a> ("*", "?", "[..]") are supported.
+     */
+    public static Bound<String> from(String filepattern) {
+      return new Bound<>(DEFAULT_TEXT_CODER).from(filepattern);
+    }
+
+    /**
+     * Returns a TextIO.Read PTransform that uses the given
+     * {@code Coder<T>} to decode each of the lines of the file into a
+     * value of type {@code T}.
+     *
+     * <p> By default, uses {@link StringUtf8Coder}, which just
+     * returns the text lines as Java strings.
+     *
+     * @param <T> the type of the decoded elements, and the elements
+     * of the resulting PCollection
+     */
+    public static <T> Bound<T> withCoder(Coder<T> coder) {
+      return new Bound<>(coder);
+    }
+
+    // TODO: strippingNewlines, gzipped, etc.
+
+    /**
+     * A root PTransform that reads from a text file (or multiple text files
+     * matching a pattern) and returns a bounded PCollection containing the
+     * decoding of each of the lines of the text file(s).  The default
+     * decoding just returns the lines.
+     *
+     * @param <T> the type of each of the elements of the resulting
+     * PCollection, decoded from the lines of the text file
+     */
+    public static class Bound<T>
+        extends PTransform<PInput, PCollection<T>> {
+      /** The filepattern to read from. */
+      @Nullable final String filepattern;
+
+      /** The Coder to use to decode each line. */
+      @Nullable final Coder<T> coder;
+
+      Bound(Coder<T> coder) {
+        this(null, null, coder);
+      }
+
+      Bound(String name, String filepattern, Coder<T> coder) {
+        super(name);
+        this.coder = coder;
+        this.filepattern = filepattern;
+      }
+
+      /**
+       * Returns a new TextIO.Read PTransform that's like this one but
+       * with the given step name.  Does not modify this object.
+       */
+      public Bound<T> named(String name) {
+        return new Bound<>(name, filepattern, coder);
+      }
+
+      /**
+       * Returns a new TextIO.Read PTransform that's like this one but
+       * that reads from the file(s) with the given name or pattern.
+       * (See {@link TextIO.Read#from} for a description of
+       * filepatterns.)  Does not modify this object.
+       */
+      public Bound<T> from(String filepattern) {
+        return new Bound<>(name, filepattern, coder);
+      }
+
+      /**
+       * Returns a new TextIO.Read PTransform that's like this one but
+       * that uses the given {@code Coder<T1>} to decode each of the
+       * lines of the file into a value of type {@code T1}.  Does not
+       * modify this object.
+       *
+       * @param <T1> the type of the decoded elements, and the
+       * elements of the resulting PCollection
+       */
+      public <T1> Bound<T1> withCoder(Coder<T1> coder) {
+        return new Bound<>(name, filepattern, coder);
+      }
+
+      @Override
+      public PCollection<T> apply(PInput input) {
+        if (filepattern == null) {
+          throw new IllegalStateException(
+              "need to set the filepattern of a TextIO.Read transform");
+        }
+        // Force the output's Coder to be what the read is using, and
+        // unchangeable later, to ensure that we read the input in the
+        // format specified by the Read transform.
+        return PCollection.<T>createPrimitiveOutputInternal(new GlobalWindow())
+            .setCoder(coder);
+      }
+
+      @Override
+      protected Coder<T> getDefaultOutputCoder() {
+        return coder;
+      }
+
+      @Override
+      protected String getKindString() { return "TextIO.Read"; }
+
+      public String getFilepattern() {
+        return filepattern;
+      }
+
+      static {
+        DirectPipelineRunner.registerDefaultTransformEvaluator(
+            Bound.class,
+            new DirectPipelineRunner.TransformEvaluator<Bound>() {
+              @Override
+              public void evaluate(
+                  Bound transform,
+                  DirectPipelineRunner.EvaluationContext context) {
+                evaluateReadHelper(transform, context);
+              }
+            });
+      }
+    }
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * A PTransform that writes a PCollection to a text file (or
+   * multiple text files matching a sharding pattern), with each
+   * PCollection element being encoded into its own line.
+   */
+  public static class Write {
+    /**
+     * Returns a TextIO.Write PTransform with the given step name.
+     */
+    public static Bound<String> named(String name) {
+      return new Bound<>(DEFAULT_TEXT_CODER).named(name);
+    }
+
+    /**
+     * Returns a TextIO.Write PTransform that writes to the file(s)
+     * with the given prefix.  This can be a local filename
+     * (if running locally), or a Google Cloud Storage filename of
+     * the form {@code "gs://<bucket>/<filepath>"})
+     * (if running locally or via the Google Cloud Dataflow service).
+     *
+     * <p> The files written will begin with this prefix, followed by
+     * a shard identifier (see {@link Bound#withNumShards}, and end
+     * in a common extension, if given by {@link Bound#withSuffix}.
+     */
+    public static Bound<String> to(String prefix) {
+      return new Bound<>(DEFAULT_TEXT_CODER).to(prefix);
+    }
+
+    /**
+     * Returns a TextIO.Write PTransform that writes to the file(s) with the
+     * given filename suffix.
+     */
+    public static Bound<String> withSuffix(String nameExtension) {
+      return new Bound<>(DEFAULT_TEXT_CODER).withSuffix(nameExtension);
+    }
+
+    /**
+     * Returns a TextIO.Write PTransform that uses the provided shard count.
+     *
+     * <p> Constraining the number of shards is likely to reduce
+     * the performance of a pipeline.  Setting this value is not recommended
+     * unless you require a specific number of output files.
+     *
+     * @param numShards the number of shards to use, or 0 to let the system
+     *                  decide.
+     */
+    public static Bound<String> withNumShards(int numShards) {
+      return new Bound<>(DEFAULT_TEXT_CODER).withNumShards(numShards);
+    }
+
+    /**
+     * Returns a TextIO.Write PTransform that uses the given shard name
+     * template.
+     *
+     * <p> See {@link ShardNameTemplate} for a description of shard templates.
+     */
+    public static Bound<String> withShardNameTemplate(String shardTemplate) {
+      return new Bound<>(DEFAULT_TEXT_CODER)
+          .withShardNameTemplate(shardTemplate);
+    }
+
+    /**
+     * Returns a TextIO.Write PTransform that forces a single file as
+     * output.
+     */
+    public static Bound<String> withoutSharding() {
+      return new Bound<>(DEFAULT_TEXT_CODER).withoutSharding();
+    }
+
+    /**
+     * Returns a TextIO.Write PTransform that uses the given
+     * {@code Coder<T>} to encode each of the elements of the input
+     * {@code PCollection<T>} into an output text line.
+     *
+     * <p> By default, uses {@link StringUtf8Coder}, which writes input
+     * Java strings directly as output lines.
+     *
+     * @param <T> the type of the elements of the input PCollection
+     */
+    public static <T> Bound<T> withCoder(Coder<T> coder) {
+      return new Bound<>(coder);
+    }
+
+    // TODO: appendingNewlines, gzipped, header, footer, etc.
+
+    /**
+     * A PTransform that writes a bounded PCollection to a text file (or
+     * multiple text files matching a sharding pattern), with each
+     * PCollection element being encoded into its own line.
+     *
+     * @param <T> the type of the elements of the input PCollection
+     */
+    public static class Bound<T>
+        extends PTransform<PCollection<T>, PDone> {
+      /** The filename to write to. */
+      @Nullable final String filenamePrefix;
+      /** Suffix to use for each filename. */
+      final String filenameSuffix;
+
+      /** The Coder to use to decode each line. */
+      final Coder<T> coder;
+
+      /** Requested number of shards.  0 for automatic. */
+      final int numShards;
+
+      /** Shard template string. */
+      final String shardTemplate;
+
+      Bound(Coder<T> coder) {
+        this(null, null, "", coder, 0, ShardNameTemplate.INDEX_OF_MAX);
+      }
+
+      Bound(String name, String filenamePrefix, String filenameSuffix,
+          Coder<T> coder, int numShards,
+          String shardTemplate) {
+        super(name);
+        this.coder = coder;
+        this.filenamePrefix = filenamePrefix;
+        this.filenameSuffix = filenameSuffix;
+        this.numShards = numShards;
+        this.shardTemplate = shardTemplate;
+      }
+
+      /**
+       * Returns a new TextIO.Write PTransform that's like this one but
+       * with the given step name.  Does not modify this object.
+       */
+      public Bound<T> named(String name) {
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
+            shardTemplate);
+      }
+
+      /**
+       * Returns a new TextIO.Write PTransform that's like this one but
+       * that writes to the file(s) with the given filename prefix.
+       *
+       * <p> See {@link Write#to(String) Write.to(String)} for more information.
+       *
+       * <p> Does not modify this object.
+       */
+      public Bound<T> to(String filenamePrefix) {
+        validateOutputComponent(filenamePrefix);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
+            shardTemplate);
+      }
+
+      /**
+       * Returns a new TextIO.Write PTransform that's like this one but
+       * that writes to the file(s) with the given filename suffix.
+       *
+       * <p> Does not modify this object.
+       *
+       * @see ShardNameTemplate
+       */
+      public Bound<T> withSuffix(String nameExtension) {
+        validateOutputComponent(nameExtension);
+        return new Bound<>(name, filenamePrefix, nameExtension, coder, numShards,
+            shardTemplate);
+      }
+
+      /**
+       * Returns a new TextIO.Write PTransform that's like this one but
+       * that uses the provided shard count.
+       *
+       * <p> Constraining the number of shards is likely to reduce
+       * the performance of a pipeline.  Setting this value is not recommended
+       * unless you require a specific number of output files.
+       *
+       * <p> Does not modify this object.
+       *
+       * @param numShards the number of shards to use, or 0 to let the system
+       *                  decide.
+       * @see ShardNameTemplate
+       */
+      public Bound<T> withNumShards(int numShards) {
+        Preconditions.checkArgument(numShards >= 0);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
+            shardTemplate);
+      }
+
+      /**
+       * Returns a new TextIO.Write PTransform that's like this one but
+       * that uses the given shard name template.
+       *
+       * <p> Does not modify this object.
+       *
+       * @see ShardNameTemplate
+       */
+      public Bound<T> withShardNameTemplate(String shardTemplate) {
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
+            shardTemplate);
+      }
+
+      /**
+       * Returns a new TextIO.Write PTransform that's like this one but
+       * that forces a single file as output.
+       *
+       * <p> This is a shortcut for
+       * {@code .withNumShards(1).withShardNameTemplate("")}
+       *
+       * <p> Does not modify this object.
+       */
+      public Bound<T> withoutSharding() {
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, 1, "");
+      }
+
+      /**
+       * Returns a new TextIO.Write PTransform that's like this one
+       * but that uses the given {@code Coder<T1>} to encode each of
+       * the elements of the input {@code PCollection<T1>} into an
+       * output text line.  Does not modify this object.
+       *
+       * @param <T1> the type of the elements of the input PCollection
+       */
+      public <T1> Bound<T1> withCoder(Coder<T1> coder) {
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
+            shardTemplate);
+      }
+
+      @Override
+      public PDone apply(PCollection<T> input) {
+        if (filenamePrefix == null) {
+          throw new IllegalStateException(
+              "need to set the filename prefix of a TextIO.Write transform");
+        }
+        return new PDone();
+      }
+
+      /**
+       * Returns the current shard name template string.
+       */
+      public String getShardNameTemplate() {
+        return shardTemplate;
+      }
+
+      @Override
+      protected Coder<Void> getDefaultOutputCoder() {
+        return VoidCoder.of();
+      }
+
+      @Override
+      protected String getKindString() { return "TextIO.Write"; }
+
+      public String getFilenamePrefix() {
+        return filenamePrefix;
+      }
+
+      public String getShardTemplate() {
+        return shardTemplate;
+      }
+
+      public int getNumShards() {
+        return numShards;
+      }
+
+      public String getFilenameSuffix() {
+        return filenameSuffix;
+      }
+
+      public Coder<T> getCoder() {
+        return coder;
+      }
+
+      static {
+        DirectPipelineRunner.registerDefaultTransformEvaluator(
+            Bound.class,
+            new DirectPipelineRunner.TransformEvaluator<Bound>() {
+              @Override
+              public void evaluate(
+                  Bound transform,
+                  DirectPipelineRunner.EvaluationContext context) {
+                evaluateWriteHelper(transform, context);
+              }
+            });
+      }
+    }
+  }
+
+  // Pattern which matches old-style shard output patterns, which are now
+  // disallowed.
+  private static final Pattern SHARD_OUTPUT_PATTERN =
+      Pattern.compile("@([0-9]+|\\*)");
+
+  private static void validateOutputComponent(String partialFilePattern) {
+    Preconditions.checkArgument(
+        !SHARD_OUTPUT_PATTERN.matcher(partialFilePattern).find(),
+        "Output name components are not allowed to contain @* or @N patterns: "
+            + partialFilePattern);
+  }
+
+  //////////////////////////////////////////////////////////////////////////////
+
+  private static <T> void evaluateReadHelper(
+      Read.Bound<T> transform,
+      DirectPipelineRunner.EvaluationContext context) {
+    TextSource<T> source = new TextSource<>(
+        transform.filepattern, true, null, null, transform.coder);
+    List<T> elems = readElemsFromSource(source);
+    context.setPCollection(transform.getOutput(), elems);
+  }
+
+  private static <T> void evaluateWriteHelper(
+      Write.Bound<T> transform,
+      DirectPipelineRunner.EvaluationContext context) {
+    List<T> elems = context.getPCollection(transform.getInput());
+    int numShards = transform.numShards;
+    if (numShards < 1) {
+      // System gets to choose.  For direct mode, choose 1.
+      numShards = 1;
+    }
+    TextSink<WindowedValue<T>> writer = TextSink.createForDirectPipelineRunner(
+        transform.filenamePrefix, transform.getShardNameTemplate(),
+        transform.filenameSuffix, numShards,
+        true, null, null, transform.coder);
+    try (Sink.SinkWriter<WindowedValue<T>> sink = writer.writer()) {
+      for (T elem : elems) {
+        sink.add(WindowedValue.valueInGlobalWindow(elem));
+      }
+    } catch (IOException exn) {
+      throw new RuntimeException(
+          "unable to write to output file \"" + transform.filenamePrefix + "\"",
+          exn);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/package-info.java
new file mode 100644
index 0000000000000..886255e271d23
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/package-info.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/**
+ * Defines transforms for reading and writing common storage formats, including
+ * {@link com.google.cloud.dataflow.sdk.io.AvroIO},
+ * {@link com.google.cloud.dataflow.sdk.io.BigQueryIO}, and
+ * {@link com.google.cloud.dataflow.sdk.io.TextIO}.
+ * 
+ * <p>The classes in this package provide {@code Read} transforms which create PCollections
+ * from existing storage:
+ * <pre>{@code
+ * PCollection<TableRow> inputData = pipeline.apply(
+ *     BigQueryIO.Read.named("Read")
+ *                    .from("clouddataflow-readonly:samples.weather_stations");
+ * }</pre>
+ * and {@code Write} transforms which persist PCollections to external storage:
+ * <pre> {@code
+ * PCollection<Integer> numbers = ...;
+ * numbers.apply(TextIO.Write.named("WriteNumbers")
+ *                           .to("gs://my_bucket/path/to/numbers"));
+ * } </pre>
+ */
+package com.google.cloud.dataflow.sdk.io;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java
new file mode 100644
index 0000000000000..327e5c08445cb
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+/**
+ * Options that allow setting the application name.
+ */
+public interface ApplicationNameOptions extends PipelineOptions {
+  /**
+   * Name of application, for display purposes.
+   * <p>
+   * Defaults to the name of the class which constructs the
+   * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner}.
+   */
+  @Description("Application name. Defaults to the name of the class which "
+      + "constructs the Pipeline.")
+  String getAppName();
+  void setAppName(String value);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BigQueryOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BigQueryOptions.java
new file mode 100644
index 0000000000000..b764f20918b02
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BigQueryOptions.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+/**
+ * Properties needed when using BigQuery with the Dataflow SDK.
+ */
+public interface BigQueryOptions extends ApplicationNameOptions, GcpOptions,
+    PipelineOptions, StreamingOptions {
+  @Description("Temporary staging dataset ID for BigQuery "
+      + "table operations")
+  @Default.String("bigquery.googleapis.com/cloud_dataflow")
+  String getTempDatasetId();
+  void setTempDatasetId(String value);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BlockingDataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BlockingDataflowPipelineOptions.java
new file mode 100644
index 0000000000000..cdd5019b5df14
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BlockingDataflowPipelineOptions.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+
+import java.io.PrintStream;
+
+/**
+ * Options which are used to configure the {@link BlockingDataflowPipelineRunner}.
+ */
+public interface BlockingDataflowPipelineOptions extends DataflowPipelineOptions {
+  /**
+   * Output stream for job status messages.
+   */
+  @JsonIgnore
+  @Default.InstanceFactory(StandardOutputFactory.class)
+  PrintStream getJobMessageOutput();
+  void setJobMessageOutput(PrintStream value);
+
+  /**
+   * Returns a default of {@link System#out}.
+   */
+  public static class StandardOutputFactory implements DefaultValueFactory<PrintStream> {
+    @Override
+    public PrintStream create(PipelineOptions options) {
+      return System.out;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
new file mode 100644
index 0000000000000..76de6e6dd8bf6
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import java.util.List;
+
+/**
+ * Options used for testing and debugging the Dataflow SDK.
+ */
+public interface DataflowPipelineDebugOptions extends PipelineOptions {
+  /**
+   * Dataflow endpoint to use.
+   *
+   * <p> Defaults to the current version of the Google Cloud Dataflow
+   * API, at the time the current SDK version was released.
+   *
+   * <p> If the string contains "://", then this is treated as a url,
+   * otherwise {@link #getApiRootUrl()} is used as the root
+   * url.
+   */
+  @Description("Cloud Dataflow Endpoint")
+  @Default.String("dataflow/v1b3/projects/")
+  String getDataflowEndpoint();
+  void setDataflowEndpoint(String value);
+
+  /**
+   * The list of backend experiments to enable.
+   *
+   * <p> Dataflow provides a number of experimental features that can be enabled
+   * with this flag.
+   *
+   * <p> Please sync with the Dataflow team when enabling any experiments.
+   */
+  @Description("Backend experiments to enable.")
+  List<String> getExperiments();
+  void setExperiments(List<String> value);
+
+  /**
+   * The API endpoint to use when communicating with the Dataflow service.
+   */
+  @Description("Google Cloud root API")
+  @Default.String("https://www.googleapis.com/")
+  String getApiRootUrl();
+  void setApiRootUrl(String value);
+
+  /**
+   * The path to write the translated Dataflow specification out to
+   * at job submission time.
+   */
+  @Description("File for writing dataflow job descriptions")
+  String getDataflowJobFile();
+  void setDataflowJobFile(String value);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
new file mode 100644
index 0000000000000..7d05088732328
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import com.google.api.services.dataflow.Dataflow;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipeline;
+import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.common.base.MoreObjects;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+
+import org.joda.time.DateTimeUtils;
+import org.joda.time.DateTimeZone;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+
+/**
+ * Options which can be used to configure the {@link DataflowPipeline}.
+ */
+public interface DataflowPipelineOptions extends
+    PipelineOptions, GcpOptions, ApplicationNameOptions, DataflowPipelineDebugOptions,
+    DataflowPipelineShuffleOptions, DataflowPipelineWorkerPoolOptions, BigQueryOptions,
+    GcsOptions, StreamingOptions {
+
+  /**
+   * GCS path for temporary files.
+   * <p>
+   * Must be a valid Cloud Storage url, beginning with the prefix "gs://"
+   * <p>
+   * At least one of {@link #getTempLocation()} or {@link #getStagingLocation()} must be set. If
+   * {@link #getTempLocation()} is not set, then the Dataflow pipeline defaults to using
+   * {@link #getStagingLocation()}.
+   */
+  @Description("GCS path for temporary files, eg \"gs://bucket/object\".  "
+      + "Defaults to stagingLocation.")
+  String getTempLocation();
+  void setTempLocation(String value);
+
+  /**
+   * GCS path for staging local files.
+   * <p>
+   * If {@link #getStagingLocation()} is not set, then the Dataflow pipeline defaults to a staging
+   * directory within {@link #getTempLocation}.
+   * <p>
+   * At least one of {@link #getTempLocation()} or {@link #getStagingLocation()} must be set.
+   */
+  @Description("GCS staging path.  Defaults to a staging directory"
+      + " with the tempLocation")
+  String getStagingLocation();
+  void setStagingLocation(String value);
+
+  /**
+   * The job name is used as an idempotence key within the Dataflow service. If there
+   * is an existing job which is currently active, another job with the same name will
+   * not be able to be created.
+   */
+  @Description("Dataflow job name, to uniquely identify active jobs. "
+      + "Defaults to using the ApplicationName-UserDame-Date.")
+  @Default.InstanceFactory(JobNameFactory.class)
+  String getJobName();
+  void setJobName(String value);
+
+  /**
+   * Returns a normalized job name constructed from {@link ApplicationNameOptions#getAppName()}, the
+   * local system user name (if available), and the current time. The normalization makes sure that
+   * the job name matches the required pattern of [a-z]([-a-z0-9]*[a-z0-9])? and length limit of 40
+   * characters.
+   * <p>
+   * This job name factory is only able to generate one unique name per second per application and
+   * user combination.
+   */
+  public static class JobNameFactory implements DefaultValueFactory<String> {
+    private static final DateTimeFormatter FORMATTER =
+        DateTimeFormat.forPattern("MMddHHmmss").withZone(DateTimeZone.UTC);
+    private static final int MAX_APP_NAME = 19;
+    private static final int MAX_USER_NAME = 9;
+
+    @Override
+    public String create(PipelineOptions options) {
+      String appName = options.as(ApplicationNameOptions.class).getAppName();
+      String normalizedAppName = appName == null || appName.length() == 0 ? "dataflow"
+          : appName.toLowerCase()
+                   .replaceAll("[^a-z0-9]", "0")
+                   .replaceAll("^[^a-z]", "a");
+      String userName = MoreObjects.firstNonNull(System.getProperty("user.name"), "");
+      String normalizedUserName = userName.toLowerCase()
+                                          .replaceAll("[^a-z0-9]", "0");
+      String datePart = FORMATTER.print(DateTimeUtils.currentTimeMillis());
+
+      // Maximize the amount of the app name and user name we can use.
+      normalizedAppName = normalizedAppName.substring(0,
+            Math.min(normalizedAppName.length(),
+                MAX_APP_NAME + Math.max(0, MAX_USER_NAME - normalizedUserName.length())));
+      normalizedUserName = normalizedUserName.substring(0,
+            Math.min(userName.length(),
+                MAX_USER_NAME + Math.max(0, MAX_APP_NAME - normalizedAppName.length())));
+      return normalizedAppName + "-" + normalizedUserName + "-" + datePart;
+    }
+  }
+
+  /** Alternative Dataflow client */
+  @JsonIgnore
+  @Default.InstanceFactory(DataflowClientFactory.class)
+  Dataflow getDataflowClient();
+  void setDataflowClient(Dataflow value);
+
+  /** Returns the default Dataflow client built from the passed in PipelineOptions. */
+  public static class DataflowClientFactory implements DefaultValueFactory<Dataflow> {
+    @Override
+    public Dataflow create(PipelineOptions options) {
+        return Transport.newDataflowClient(options.as(DataflowPipelineOptions.class)).build();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineShuffleOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineShuffleOptions.java
new file mode 100644
index 0000000000000..f59f5eb5d78cd
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineShuffleOptions.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+/**
+ * Options for Shuffle workers. Most users should not need to adjust the settings in this section.
+ */
+public interface DataflowPipelineShuffleOptions {
+  /**
+   * Disk source image to use by shuffle VMs for jobs.
+   * @see <a href="https://developers.google.com/compute/docs/images"
+   * >Compute Engine Images</a>
+   */
+  @Description("Dataflow shuffle VM disk image.")
+  String getShuffleDiskSourceImage();
+  void setShuffleDiskSourceImage(String value);
+  
+  /**
+   * Number of workers to use with the shuffle appliance, or 0 to use
+   * the default number of workers.
+   */
+  @Description("Number of shuffle workers, when using remote execution")
+  int getShuffleNumWorkers();
+  void setShuffleNumWorkers(int value);
+
+  /**
+   * Remote shuffle worker disk size, in gigabytes, or 0 to use the
+   * default size.
+   */
+  @Description("Remote shuffle worker disk size, in gigabytes, or 0 to use the default size.")
+  int getShuffleDiskSizeGb();
+  void setShuffleDiskSizeGb(int value);
+
+  /**
+   * GCE <a href="https://developers.google.com/compute/docs/zones"
+   * >availability zone</a> for launching shuffle workers.
+   *
+   * <p> Default is up to the service.
+   */
+  @Description("GCE availability zone for launching shuffle workers. "
+      + "Default is up to the service")
+  String getShuffleZone();
+  void setShuffleZone(String value);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
new file mode 100644
index 0000000000000..6cd9839318630
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import java.util.List;
+
+/**
+ * Options which are used to configure the Dataflow pipeline worker pool.
+ */
+public interface DataflowPipelineWorkerPoolOptions {
+  /**
+   * Disk source image to use by VMs for jobs.
+   * @see <a href="https://developers.google.com/compute/docs/images">Compute Engine Images</a>
+   */
+  @Description("Dataflow VM disk image.")
+  String getDiskSourceImage();
+  void setDiskSourceImage(String value);
+
+  /**
+   * Number of workers to use in remote execution.
+   */
+  @Description("Number of workers, when using remote execution")
+  @Default.Integer(3)
+  int getNumWorkers();
+  void setNumWorkers(int value);
+
+  /**
+   * Remote worker disk size, in gigabytes, or 0 to use the default size.
+   */
+  @Description("Remote worker disk size, in gigabytes, or 0 to use the default size.")
+  int getDiskSizeGb();
+  void setDiskSizeGb(int value);
+
+  /**
+   * GCE <a href="https://developers.google.com/compute/docs/zones"
+   * >availability zone</a> for launching workers.
+   *
+   * <p> Default is up to the service.
+   */
+  @Description("GCE availability zone for launching workers. "
+      + "Default is up to the service")
+  String getZone();
+  void setZone(String value);
+
+  /**
+   * Type of API for handling cluster management,i.e. resizing, healthchecking, etc.
+   */
+  public enum ClusterManagerApiType {
+    COMPUTE_ENGINE("compute.googleapis.com"),
+    REPLICA_POOL("replicapool.googleapis.com");
+
+    private final String apiServiceName;
+
+    private ClusterManagerApiType(String apiServiceName) {
+      this.apiServiceName = apiServiceName;
+    }
+
+    public String getApiServiceName() {
+      return this.apiServiceName;
+    }
+  }
+
+  @Description("Type of API for handling cluster management,i.e. resizing, healthchecking, etc.")
+  @Default.InstanceFactory(ClusterManagerApiTypeFactory.class)
+  ClusterManagerApiType getClusterManagerApi();
+  void setClusterManagerApi(ClusterManagerApiType value);
+
+  /** Returns the default COMPUTE_ENGINE ClusterManagerApiType. */
+  public static class ClusterManagerApiTypeFactory implements
+      DefaultValueFactory<ClusterManagerApiType> {
+    @Override
+    public ClusterManagerApiType create(PipelineOptions options) {
+      return ClusterManagerApiType.COMPUTE_ENGINE;
+    }
+  }
+
+  /**
+   * Machine type to create worker VMs as.
+   */
+  @Description("Dataflow VM machine type for workers.")
+  String getWorkerMachineType();
+  void setWorkerMachineType(String value);
+
+  /**
+   * Machine type to create VMs as.
+   */
+  @Description("Dataflow VM machine type.")
+  String getMachineType();
+  void setMachineType(String value);
+
+  /**
+   * List of local files to make available to workers.
+   * <p>
+   * Jars are placed on the worker's classpath.
+   * <p>
+   * The default value is the list of jars from the main program's classpath.
+   */
+  @Description("Files to stage on GCS and make available to "
+      + "workers.  The default value is all files from the classpath.")
+  List<String> getFilesToStage();
+  void setFilesToStage(List<String> value);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java
new file mode 100644
index 0000000000000..0b8e1f809cc23
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+/**
+ * Options which are used exclusively within the Dataflow worker harness.
+ * These options have no effect at pipeline creation time.
+ */
+public interface DataflowWorkerHarnessOptions extends DataflowPipelineOptions {
+  /**
+   * ID of the worker running this pipeline.
+   */
+  String getWorkerId();
+  void setWorkerId(String value);
+
+  /**
+   * ID of the job this pipeline represents.
+   */
+  String getJobId();
+  void setJobId(String value);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java
new file mode 100644
index 0000000000000..321fe744ca49b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * {@link Default} represents a set of annotations which can be used to annotate getter properties
+ * on {@link PipelineOptions} with information representing the default value to be returned
+ * if no value is specified.
+ */
+public @interface Default {
+  /**
+   * This represents that the default of the option is the specified {@link java.lang.Class} value.
+   */
+  @Target(ElementType.METHOD)
+  @Retention(RetentionPolicy.RUNTIME)
+  public @interface Class {
+    java.lang.Class<?> value();
+  }
+
+  /**
+   * This represents that the default of the option is the specified {@link java.lang.String}
+   * value.
+   */
+  @Target(ElementType.METHOD)
+  @Retention(RetentionPolicy.RUNTIME)
+  public @interface String {
+    java.lang.String value();
+  }
+
+  /**
+   * This represents that the default of the option is the specified boolean primitive value.
+   */
+  @Target(ElementType.METHOD)
+  @Retention(RetentionPolicy.RUNTIME)
+  public @interface Boolean {
+    boolean value();
+  }
+
+  /**
+   * This represents that the default of the option is the specified char primitive value.
+   */
+  @Target(ElementType.METHOD)
+  @Retention(RetentionPolicy.RUNTIME)
+  public @interface Character {
+    char value();
+  }
+
+  /**
+   * This represents that the default of the option is the specified byte primitive value.
+   */
+  @Target(ElementType.METHOD)
+  @Retention(RetentionPolicy.RUNTIME)
+  public @interface Byte {
+    byte value();
+  }
+  /**
+   * This represents that the default of the option is the specified short primitive value.
+   */
+  @Target(ElementType.METHOD)
+  @Retention(RetentionPolicy.RUNTIME)
+  public @interface Short {
+    short value();
+  }
+  /**
+   * This represents that the default of the option is the specified int primitive value.
+   */
+  @Target(ElementType.METHOD)
+  @Retention(RetentionPolicy.RUNTIME)
+  public @interface Integer {
+    int value();
+  }
+
+  /**
+   * This represents that the default of the option is the specified long primitive value.
+   */
+  @Target(ElementType.METHOD)
+  @Retention(RetentionPolicy.RUNTIME)
+  public @interface Long {
+    long value();
+  }
+
+  /**
+   * This represents that the default of the option is the specified float primitive value.
+   */
+  @Target(ElementType.METHOD)
+  @Retention(RetentionPolicy.RUNTIME)
+  public @interface Float {
+    float value();
+  }
+
+  /**
+   * This represents that the default of the option is the specified double primitive value.
+   */
+  @Target(ElementType.METHOD)
+  @Retention(RetentionPolicy.RUNTIME)
+  public @interface Double {
+    double value();
+  }
+
+  /**
+   * Value must be of type {@link DefaultValueFactory} and have a default constructor.
+   * Value is instantiated and then used as a type factory to generate the default.
+   * <p>
+   * See {@link DefaultValueFactory} for more details.
+   */
+  @Target(ElementType.METHOD)
+  @Retention(RetentionPolicy.RUNTIME)
+  public @interface InstanceFactory {
+    java.lang.Class<? extends DefaultValueFactory<?>> value();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java
new file mode 100644
index 0000000000000..18fd7827798c5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+/**
+ * An interface used with {@link Default.InstanceFactory} annotation to specify the class which will
+ * be an instance factory to produce default values for a given getter on {@link PipelineOptions}.
+ * When a property on a {@link PipelineOptions} is fetched, and is currently unset, the default
+ * value factory will be instantiated and invoked.
+ * <p>
+ * Care must be taken to not produce an infinite loop when accessing other fields on the
+ * {@link PipelineOptions} object.
+ *
+ * @param <T> The type of object this factory produces.
+ */
+public interface DefaultValueFactory<T> {
+  /**
+   * Creates a default value for a getter marked with {@link Default.InstanceFactory}.
+   *
+   * @param options The current pipeline options.
+   * @return The default value to be used for the given pipeline options.
+   */
+  T create(PipelineOptions options);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java
new file mode 100644
index 0000000000000..9de8b1cd25805
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * Machine-readable description for options in {@link PipelineOptions}.
+ */
+@Target(value = ElementType.METHOD)
+@Retention(RetentionPolicy.RUNTIME)
+public @interface Description {
+  String value();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java
new file mode 100644
index 0000000000000..85a280d991934
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
+
+/**
+ * Options which can be used to configure the {@link DirectPipeline}.
+ */
+public interface DirectPipelineOptions extends
+    ApplicationNameOptions, BigQueryOptions, GcsOptions, GcpOptions,
+    PipelineOptions, StreamingOptions {
+
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
new file mode 100644
index 0000000000000..7dbaa5fb32d9f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import com.google.api.client.auth.oauth2.Credential;
+import com.google.cloud.dataflow.sdk.util.Credentials;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+
+import java.io.File;
+import java.io.IOException;
+import java.security.GeneralSecurityException;
+
+/**
+ * Options used to configure Google Cloud Platform project and credentials.
+ * <p>
+ * These options configure which of the following 4 different mechanisms for obtaining a credential
+ * are used:
+ * <ol>
+ *   <li>
+ *     It can fetch the
+ *     <a href="https://developers.google.com/accounts/docs/application-default-credentials">
+ *     application default credentials</a>.
+ *   </li>
+ *   <li>
+ *     It can run the gcloud tool in a subprocess to obtain a credential.
+ *     This is the preferred mechanism.  The property "GCloudPath" can be
+ *     used to specify where we search for gcloud data.
+ *   </li>
+ *   <li>
+ *     The user can specify a client secrets file and go through the OAuth2
+ *     webflow. The credential will then be cached in the user's home
+ *     directory for reuse.
+ *   </li>
+ *   <li>
+ *     The user can specify a file containing a service account private key along
+ *     with the service account name.
+ *   </li>
+ * </ol>
+ * The default mechanism is to use the 
+ * <a href="https://developers.google.com/accounts/docs/application-default-credentials">
+ * application default credentials</a> falling back to gcloud. The other options can be
+ * used by setting the corresponding properties.
+ */
+public interface GcpOptions extends PipelineOptions {
+  /**
+   * Project id to use when launching jobs.
+   */
+  @Description("Project id.  Required when running a Dataflow in the cloud.")
+  String getProject();
+  void setProject(String value);
+
+  /**
+   * This option controls which file to use when attempting to create the credentials using the
+   * OAuth 2 webflow.
+   */
+  @Description("Path to a file containing Google API secret")
+  String getSecretsFile();
+  void setSecretsFile(String value);
+
+  /**
+   * This option controls which file to use when attempting to create the credentials using the
+   * service account method.
+   * <p>
+   * This option if specified, needs be combined with the
+   * {@link GcpOptions#getServiceAccountName() serviceAccountName}.
+   */
+  @Description("Path to a file containing the P12 service credentials")
+  String getServiceAccountKeyfile();
+  void setServiceAccountKeyfile(String value);
+
+  /**
+   * This option controls which service account to use when attempting to create the credentials
+   * using the service account method.
+   * <p>
+   * This option if specified, needs be combined with the
+   * {@link GcpOptions#getServiceAccountKeyfile() serviceAccountKeyfile}.
+   */
+  @Description("Name of the service account for Google APIs")
+  String getServiceAccountName();
+  void setServiceAccountName(String value);
+
+  @Description("The path to the gcloud binary. "
+      + " Default is to search the system path.")
+  String getGCloudPath();
+  void setGCloudPath(String value);
+
+  /**
+   * Directory for storing dataflow credentials.
+   */
+  @Description("Directory for storing dataflow credentials")
+  @Default.InstanceFactory(CredentialDirFactory.class)
+  String getCredentialDir();
+  void setCredentialDir(String value);
+
+  /**
+   * Returns the default credential directory of ${user.home}/.store/data-flow.
+   */
+  public static class CredentialDirFactory implements DefaultValueFactory<String> {
+    @Override
+    public String create(PipelineOptions options) {
+      File home = new File(System.getProperty("user.home"));
+      File store = new File(home, ".store");
+      File dataflow = new File(store, "data-flow");
+      return dataflow.getPath();
+    }
+  }
+
+  @Description("The credential identifier when using a persistent"
+      + " credential store")
+  @Default.String("cloud_dataflow")
+  String getCredentialId();
+  void setCredentialId(String value);
+
+  /** Alternative Google Cloud Platform Credential */
+  @JsonIgnore
+  @Description("Google Cloud Platform user credentials.")
+  @Default.InstanceFactory(GcpUserCredentialsFactory.class)
+  Credential getGcpCredential();
+  void setGcpCredential(Credential value);
+
+  /**
+   * Attempts to load the user credentials. See
+   * {@link Credentials#getUserCredential(GcpOptions)} for more details.
+   */
+  public static class GcpUserCredentialsFactory implements DefaultValueFactory<Credential> {
+    @Override
+    public Credential create(PipelineOptions options) {
+      try {
+        return Credentials.getUserCredential(options.as(GcpOptions.class));
+      } catch (IOException | GeneralSecurityException e) {
+        throw new RuntimeException("Unable to obtain credential", e);
+      }
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
new file mode 100644
index 0000000000000..543c9cac6c406
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import com.google.cloud.dataflow.sdk.util.AppEngineEnvironment;
+import com.google.cloud.dataflow.sdk.util.GcsUtil;
+import com.google.common.util.concurrent.MoreExecutors;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.SynchronousQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Options used to configure Google Cloud Storage.
+ */
+public interface GcsOptions extends
+    ApplicationNameOptions, GcpOptions, PipelineOptions {
+  /** Alternative GcsUtil instance */
+  @JsonIgnore
+  @Default.InstanceFactory(GcsUtil.GcsUtilFactory.class)
+  GcsUtil getGcsUtil();
+  void setGcsUtil(GcsUtil value);
+
+  ////////////////////////////////////////////////////////////////////////////
+  // Allows the user to provide an alternative ExecutorService if their
+  // environment does not support the default implementation.
+  @JsonIgnore
+  @Default.InstanceFactory(ExecutorServiceFactory.class)
+  ExecutorService getExecutorService();
+  void setExecutorService(ExecutorService value);
+
+  /**
+   * Returns the default {@link ExecutorService} to use within the Dataflow SDK. The
+   * {@link ExecutorService} is compatible with AppEngine.
+   */
+  public static class ExecutorServiceFactory implements DefaultValueFactory<ExecutorService> {
+    @Override
+    public ExecutorService create(PipelineOptions options) {
+      ThreadFactoryBuilder threadFactoryBuilder = new ThreadFactoryBuilder();
+      threadFactoryBuilder.setThreadFactory(MoreExecutors.platformThreadFactory());
+      if (!AppEngineEnvironment.IS_APP_ENGINE) {
+        // AppEngine doesn't allow modification of threads to be daemon threads.
+        threadFactoryBuilder.setDaemon(true);
+      }
+      /* The SDK requires an unbounded thread pool because a step may create X writers
+       * each requiring their own thread to perform the writes otherwise a writer may
+       * block causing deadlock for the step because the writers buffer is full. 
+       * Also, the MapTaskExecutor launches the steps in reverse order and completes
+       * them in forward order thus requiring enough threads so that each step's writers
+       * can be active.
+       */
+      return new ThreadPoolExecutor(
+          0, Integer.MAX_VALUE, // Allow an unlimited number of re-usable threads.
+          Long.MAX_VALUE, TimeUnit.NANOSECONDS, // Keep non-core threads alive forever.
+          new SynchronousQueue<Runnable>(),
+          threadFactoryBuilder.build());
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
new file mode 100644
index 0000000000000..d626b90d3c520
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import com.google.cloud.dataflow.sdk.options.ProxyInvocationHandler.Deserializer;
+import com.google.cloud.dataflow.sdk.options.ProxyInvocationHandler.Serializer;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
+import com.fasterxml.jackson.databind.annotation.JsonSerialize;
+
+/**
+ * Dataflow SDK pipeline configuration options.
+ * <p>
+ * Serialization
+ * <p>
+ * For runners which execute their work remotely, every property available within PipelineOptions
+ * must either be serializable using Jackson's {@link ObjectMapper} or the getter method for the
+ * property annotated with {@link JsonIgnore @JsonIgnore}.
+ * <p>
+ * It is an error to have the same property available in multiple interfaces with only some
+ * of them being annotated with {@link JsonIgnore @JsonIgnore}. It is also an error to mark a
+ * setter for a property with {@link JsonIgnore @JsonIgnore}.
+ */
+@JsonSerialize(using = Serializer.class)
+@JsonDeserialize(using = Deserializer.class)
+public interface PipelineOptions {
+  /**
+   * Transforms this object into an object of type <T>. <T> must extend {@link PipelineOptions}.
+   * <p>
+   * If <T> is not registered with the {@link PipelineOptionsFactory}, then we attempt to
+   * verify that <T> is composable with every interface that this instance of the PipelineOptions
+   * has seen.
+   *
+   * @param kls The class of the type to transform to.
+   * @return An object of type kls.
+   */
+  <T extends PipelineOptions> T as(Class<T> kls);
+
+  @Validation.Required
+  @Description("The runner which will be used when executing the pipeline.")
+  @Default.Class(DirectPipelineRunner.class)
+  Class<? extends PipelineRunner<?>> getRunner();
+  void setRunner(Class<? extends PipelineRunner<?>> kls);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
new file mode 100644
index 0000000000000..89a31b07e888a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -0,0 +1,862 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.worker.DataflowWorkerHarness;
+import com.google.cloud.dataflow.sdk.testing.TestDataflowPipelineOptions;
+import com.google.common.base.Equivalence;
+import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Predicate;
+import com.google.common.base.Throwables;
+import com.google.common.collect.FluentIterable;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableListMultimap;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.ListMultimap;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Queues;
+import com.google.common.collect.SetMultimap;
+import com.google.common.collect.Sets;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.databind.JavaType;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.beans.BeanInfo;
+import java.beans.IntrospectionException;
+import java.beans.Introspector;
+import java.beans.PropertyDescriptor;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+import java.lang.reflect.Proxy;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+
+/**
+ * Constructs a {@link PipelineOptions} or any derived interface which is composable to any other
+ * derived interface of {@link PipelineOptions} via the {@link PipelineOptions#as} method. Being
+ * able to compose one derived interface of {@link PipelineOptions} to another has the following
+ * restrictions:
+ * <ul>
+ *   <li>Any property with the same name must have the same return type for all derived interfaces
+ *       of {@link PipelineOptions}.
+ *   <li>Every bean property of any interface derived from {@link PipelineOptions} must have a
+ *       getter and setter method.
+ *   <li>Every method must conform to being a getter or setter for a JavaBean.
+ *   <li>The derived interface of {@link PipelineOptions} must be composable with every interface
+ *       registered with this factory.
+ * </ul>
+ * <p>
+ * See the <a
+ * href="http://www.oracle.com/technetwork/java/javase/documentation/spec-136004.html">JavaBeans
+ * specification</a> for more details as to what constitutes a property.
+ */
+public class PipelineOptionsFactory {
+
+  /**
+   * Creates and returns an object which implements {@link PipelineOptions}.
+   * This sets the {@link ApplicationNameOptions#getAppName() "appName"} to the calling
+   * {@link Class#getSimpleName() classes simple name}.
+   *
+   * @return An object which implements {@link PipelineOptions}.
+   */
+  public static PipelineOptions create() {
+    return new Builder(getAppName(3)).as(PipelineOptions.class);
+  }
+
+  /**
+   * Creates and returns an object which implements @{code <T>}.
+   * This sets the {@link ApplicationNameOptions#getAppName() "appName"} to the calling
+   * {@link Class#getSimpleName() classes simple name}.
+   * <p>
+   * Note that @{code <T>} must be composable with every registered interface with this factory.
+   * See {@link PipelineOptionsFactory#validateWellFormed(Class, Set)} for more details.
+   *
+   * @return An object which implements @{code <T>}.
+   */
+  public static <T extends PipelineOptions> T as(Class<T> klass) {
+    return new Builder(getAppName(3)).as(klass);
+  }
+
+  /**
+   * Sets the command line arguments to parse when constructing the {@link PipelineOptions}.
+   * <p>
+   * Example GNU style command line arguments:
+   * <pre>
+   *   --project=MyProject (simple property, will set the "project" property to "MyProject")
+   *   --readOnly=true (for boolean properties, will set the "readOnly" property to "true")
+   *   --readOnly (shorthand for boolean properties, will set the "readOnly" property to "true")
+   *   --x=1 --x=2 --x=3 (list style property, will set the "x" property to [1, 2, 3])
+   *   --x=1,2,3 (shorthand list style property, will set the "x" property to [1, 2, 3])
+   * </pre>
+   * Properties are able to bound to {@link String} and Java primitives @{code boolean},
+   * @{code byte}, @{code short}, @{code int}, @{code long}, @{code float}, @{code double} and
+   * their primitive wrapper classes.
+   * <p>
+   * List style properties are able to be bound to @{code boolean[]}, @{code char[]},
+   * @{code short[]}, @{code int[]}, @{code long[]}, @{code float[]}, @{code double[]},
+   * @{code String[]} and @{code List<String>}.
+   */
+  public static Builder fromArgs(String[] args) {
+    return new Builder(getAppName(3)).fromArgs(args);
+  }
+
+  /**
+   * After creation we will validate that {@link PipelineOptions} conforms to all the
+   * validation criteria from {@code <T>}. See
+   * {@link PipelineOptionsValidator#validate(Class, PipelineOptions)} for more details about
+   * validation.
+   */
+  public Builder withValidation() {
+    return new Builder(getAppName(3)).withValidation();
+  }
+
+  /** A fluent PipelineOptions builder. */
+  public static class Builder {
+    private final String defaultAppName;
+    private final String[] args;
+    private final boolean validation;
+
+    // Do not allow direct instantiation
+    private Builder(String defaultAppName) {
+      this(defaultAppName, null, false);
+    }
+
+    private Builder(String defaultAppName, String[] args, boolean validation) {
+      this.defaultAppName = defaultAppName;
+      this.args = args;
+      this.validation = validation;
+    }
+
+    /**
+     * Sets the command line arguments to parse when constructing the {@link PipelineOptions}.
+     * <p>
+     * Example GNU style command line arguments:
+     * <pre>
+     *   --project=MyProject (simple property, will set the "project" property to "MyProject")
+     *   --readOnly=true (for boolean properties, will set the "readOnly" property to "true")
+     *   --readOnly (shorthand for boolean properties, will set the "readOnly" property to "true")
+     *   --x=1 --x=2 --x=3 (list style property, will set the "x" property to [1, 2, 3])
+     *   --x=1,2,3 (shorthand list style property, will set the "x" property to [1, 2, 3])
+     * </pre>
+     * Properties are able to bound to {@link String} and Java primitives @{code boolean},
+     * @{code byte}, @{code short}, @{code int}, @{code long}, @{code float}, @{code double} and
+     * their primitive wrapper classes.
+     * <p>
+     * List style properties are able to be bound to @{code boolean[]}, @{code char[]},
+     * @{code short[]}, @{code int[]}, @{code long[]}, @{code float[]}, @{code double[]},
+     * @{code String[]} and @{code List<String>}.
+     */
+    public Builder fromArgs(String[] args) {
+      Preconditions.checkNotNull(args, "Arguments should not be null.");
+      return new Builder(defaultAppName, args, validation);
+    }
+
+    /**
+     * After creation we will validate that {@link PipelineOptions} conforms to all the
+     * validation criteria from {@code <T>}. See
+     * {@link PipelineOptionsValidator#validate(Class, PipelineOptions)} for more details about
+     * validation.
+     */
+    public Builder withValidation() {
+      return new Builder(defaultAppName, args, true);
+    }
+
+    /**
+     * Creates and returns an object which implements {@link PipelineOptions} using the values
+     * configured on this builder during construction.
+     *
+     * @return An object which implements {@link PipelineOptions}.
+     */
+    public PipelineOptions create() {
+      return as(PipelineOptions.class);
+    }
+
+    /**
+     * Creates and returns an object which implements @{code <T>} using the values configured on
+     * this builder during construction.
+     * <p>
+     * Note that {@code <T>} must be composable with every registered interface with this factory.
+     * See {@link PipelineOptionsFactory#validateWellFormed(Class, Set)} for more details.
+     *
+     * @return An object which implements @{code <T>}.
+     */
+    public <T extends PipelineOptions> T as(Class<T> klass) {
+      Map<String, Object> initialOptions = Maps.newHashMap();
+
+      // Attempt to parse the arguments into the set of initial options to use
+      if (args != null) {
+        ListMultimap<String, String> options = parseCommandLine(args);
+        LOG.debug("Provided Arguments: {}", options);
+        initialOptions = parseObjects(klass, options);
+      }
+
+      // Create our proxy
+      ProxyInvocationHandler handler = new ProxyInvocationHandler(initialOptions);
+      T t = handler.as(klass);
+
+      // Set the application name to the default if none was set.
+      ApplicationNameOptions appNameOptions = t.as(ApplicationNameOptions.class);
+      if (appNameOptions.getAppName() == null) {
+        appNameOptions.setAppName(defaultAppName);
+      }
+
+      if (validation) {
+        PipelineOptionsValidator.validate(klass, t);
+      }
+      return t;
+    }
+  }
+
+  /**
+   * Returns the simple name of calling class at the stack trace {@code level}.
+   */
+  private static String getAppName(int level) {
+    StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
+    try {
+      return Class.forName(stackTrace[level].getClassName()).getSimpleName();
+    } catch (ClassNotFoundException e) {
+      return "unknown";
+    }
+  }
+
+  /**
+   * Stores the generated proxyClass and its respective {@link BeanInfo} object.
+   *
+   * @param <T> The type of the proxyClass.
+   */
+  static class Registration<T extends PipelineOptions> {
+    private final Class<T> proxyClass;
+    private final List<PropertyDescriptor> propertyDescriptors;
+
+    public Registration(Class<T> proxyClass, List<PropertyDescriptor> beanInfo) {
+      this.proxyClass = proxyClass;
+      this.propertyDescriptors = beanInfo;
+    }
+
+    List<PropertyDescriptor> getPropertyDescriptors() {
+      return propertyDescriptors;
+    }
+
+    Class<T> getProxyClass() {
+      return proxyClass;
+    }
+  }
+
+
+  private static final Logger LOG = LoggerFactory.getLogger(PipelineOptionsFactory.class);
+  private static final Class<?>[] EMPTY_CLASS_ARRAY = new Class[0];
+  private static final ObjectMapper MAPPER = new ObjectMapper();
+
+  // TODO: Add dynamic registration of pipeline runners.
+  private static final Map<String, Class<? extends PipelineRunner<? extends PipelineResult>>>
+      SUPPORTED_PIPELINE_RUNNERS =
+          ImmutableMap.<String, Class<? extends PipelineRunner<? extends PipelineResult>>>builder()
+          .put(DirectPipelineRunner.class.getSimpleName(),
+               DirectPipelineRunner.class)
+          .put(DataflowPipelineRunner.class.getSimpleName(),
+               DataflowPipelineRunner.class)
+          .put(BlockingDataflowPipelineRunner.class.getSimpleName(),
+               BlockingDataflowPipelineRunner.class)
+          .build();
+
+  /** Methods which are ignored when validating the proxy class */
+  private static final Set<Method> IGNORED_METHODS;
+
+  /** The set of options which have been registered and visible to the user. */
+  private static final Set<Class<? extends PipelineOptions>> REGISTERED_OPTIONS =
+      Sets.newConcurrentHashSet();
+
+  /** A cache storing a mapping from a given interface to its registration record. */
+  private static final Map<Class<? extends PipelineOptions>, Registration<?>> INTERFACE_CACHE =
+      Maps.newConcurrentMap();
+
+  /** A cache storing a mapping from a set of interfaces to its registration record. */
+  private static final Map<Set<Class<? extends PipelineOptions>>, Registration<?>> COMBINED_CACHE =
+      Maps.newConcurrentMap();
+
+  static {
+    try {
+      IGNORED_METHODS = ImmutableSet.<Method>builder()
+          .add(Object.class.getMethod("getClass"))
+          .add(Object.class.getMethod("wait"))
+          .add(Object.class.getMethod("wait", long.class))
+          .add(Object.class.getMethod("wait", long.class, int.class))
+          .add(Object.class.getMethod("notify"))
+          .add(Object.class.getMethod("notifyAll"))
+          .add(Proxy.class.getMethod("getInvocationHandler", Object.class))
+          .build();
+    } catch (NoSuchMethodException | SecurityException e) {
+      LOG.error("Unable to find expected method", e);
+      throw new ExceptionInInitializerError(e);
+    }
+
+    // TODO Add support for dynamically loading and registering the options interfaces.
+    register(PipelineOptions.class);
+    register(DirectPipelineOptions.class);
+    register(DataflowPipelineOptions.class);
+    register(BlockingDataflowPipelineOptions.class);
+    register(TestDataflowPipelineOptions.class);
+  }
+
+  /**
+   * This registers the interface with this factory. This interface must conform to the following
+   * restrictions:
+   * <ul>
+   *   <li>Any property with the same name must have the same return type for all derived
+   *       interfaces of {@link PipelineOptions}.
+   *   <li>Every bean property of any interface derived from {@link PipelineOptions} must have a
+   *       getter and setter method.
+   *   <li>Every method must conform to being a getter or setter for a JavaBean.
+   *   <li>The derived interface of {@link PipelineOptions} must be composable with every interface
+   *       registered with this factory.
+   * </ul>
+   *
+   * @param iface The interface object to manually register.
+   */
+  public static synchronized void register(Class<? extends PipelineOptions> iface) {
+    Preconditions.checkNotNull(iface);
+    Preconditions.checkArgument(iface.isInterface(), "Only interface types are supported.");
+
+    if (REGISTERED_OPTIONS.contains(iface)) {
+      return;
+    }
+    validateWellFormed(iface, REGISTERED_OPTIONS);
+    REGISTERED_OPTIONS.add(iface);
+  }
+
+  /**
+   * Validates that the interface conforms to the following:
+   * <ul>
+   *   <li>Any property with the same name must have the same return type for all derived
+   *       interfaces of {@link PipelineOptions}.
+   *   <li>Every bean property of any interface derived from {@link PipelineOptions} must have a
+   *       getter and setter method.
+   *   <li>Every method must conform to being a getter or setter for a JavaBean.
+   *   <li>The derived interface of {@link PipelineOptions} must be composable with every interface
+   *       part of allPipelineOptionsClasses.
+   *   <li>Only getters may be annotated with {@link JsonIgnore @JsonIgnore}.
+   *   <li>If any getter is annotated with {@link JsonIgnore @JsonIgnore}, then all getters for
+   *       this property must be annotated with {@link JsonIgnore @JsonIgnore}.
+   * </ul>
+   *
+   * @param iface The interface to validate.
+   * @param validatedPipelineOptionsInterfaces The set of validated pipeline options interfaces to
+   *        validate against.
+   * @return A registration record containing the proxy class and bean info for iface.
+   */
+  static synchronized <T extends PipelineOptions> Registration<T> validateWellFormed(
+      Class<T> iface, Set<Class<? extends PipelineOptions>> validatedPipelineOptionsInterfaces) {
+    Preconditions.checkArgument(iface.isInterface(), "Only interface types are supported.");
+
+    Set<Class<? extends PipelineOptions>> combinedPipelineOptionsInterfaces = 
+        FluentIterable.from(validatedPipelineOptionsInterfaces).append(iface).toSet();
+    // Validate that the view of all currently passed in options classes is well formed.
+    if (!COMBINED_CACHE.containsKey(combinedPipelineOptionsInterfaces)) {
+      Class<?> allProxyClass = Proxy.getProxyClass(PipelineOptionsFactory.class.getClassLoader(),
+          combinedPipelineOptionsInterfaces.toArray(EMPTY_CLASS_ARRAY));
+      try {
+        List<PropertyDescriptor> propertyDescriptors =
+            getPropertyDescriptors(allProxyClass);
+        validateClass(iface, validatedPipelineOptionsInterfaces,
+            allProxyClass, propertyDescriptors);
+        COMBINED_CACHE.put(combinedPipelineOptionsInterfaces,
+            new Registration<T>((Class<T>) allProxyClass, propertyDescriptors));
+      } catch (IntrospectionException e) {
+        throw Throwables.propagate(e);
+      }
+    }
+
+    // Validate that the local view of the class is well formed.
+    if (!INTERFACE_CACHE.containsKey(iface)) {
+      Class<?> proxyClass = Proxy.getProxyClass(
+          PipelineOptionsFactory.class.getClassLoader(), new Class[] {iface});
+      try {
+        List<PropertyDescriptor> propertyDescriptors =
+            getPropertyDescriptors(proxyClass);
+        validateClass(iface, validatedPipelineOptionsInterfaces, proxyClass, propertyDescriptors);
+        INTERFACE_CACHE.put(iface,
+            new Registration<T>((Class<T>) proxyClass, propertyDescriptors));
+      } catch (IntrospectionException e) {
+        throw Throwables.propagate(e);
+      }
+    }
+    return (Registration<T>) INTERFACE_CACHE.get(iface);
+  }
+
+  public static Set<Class<? extends PipelineOptions>> getRegisteredOptions() {
+    return Collections.unmodifiableSet(REGISTERED_OPTIONS);
+  }
+
+  static List<PropertyDescriptor> getPropertyDescriptors(
+      Set<Class<? extends PipelineOptions>> interfaces) {
+    return COMBINED_CACHE.get(interfaces).getPropertyDescriptors();
+  }
+
+
+  /**
+   * Creates a set of {@link DataflowWorkerHarnessOptions} based of a set of known system
+   * properties. This is meant to only be used from the {@link DataflowWorkerHarness} as a method to
+   * bootstrap the worker harness.
+   *
+   * @return A {@link DataflowWorkerHarnessOptions} object configured for the
+   *         {@link DataflowWorkerHarness}.
+   */
+  @Deprecated
+  public static DataflowWorkerHarnessOptions createFromSystemProperties() {
+    DataflowWorkerHarnessOptions options = as(DataflowWorkerHarnessOptions.class);
+    options.setRunner(null);
+    if (System.getProperties().containsKey("root_url")) {
+      options.setApiRootUrl(System.getProperty("root_url"));
+    }
+    if (System.getProperties().containsKey("service_path")) {
+      options.setDataflowEndpoint(System.getProperty("service_path"));
+    }
+    if (System.getProperties().containsKey("temp_gcs_directory")) {
+      options.setTempLocation(System.getProperty("temp_gcs_directory"));
+    }
+    if (System.getProperties().containsKey("service_account_name")) {
+      options.setServiceAccountName(System.getProperty("service_account_name"));
+    }
+    if (System.getProperties().containsKey("service_account_keyfile")) {
+      options.setServiceAccountKeyfile(System.getProperty("service_account_keyfile"));
+    }
+    if (System.getProperties().containsKey("worker_id")) {
+      options.setWorkerId(System.getProperty("worker_id"));
+    }
+    if (System.getProperties().containsKey("project_id")) {
+      options.setProject(System.getProperty("project_id"));
+    }
+    if (System.getProperties().containsKey("job_id")) {
+      options.setJobId(System.getProperty("job_id"));
+    }
+    return options;
+  }
+
+  /**
+   * Returns all the methods visible from the provided interfaces.
+   *
+   * @param interfaces The interfaces to use when searching for all their methods.
+   * @return An iterable of {@link Method}s which interfaces expose.
+   */
+  static Iterable<Method> getClosureOfMethodsOnInterfaces(
+      Iterable<Class<? extends PipelineOptions>> interfaces) {
+    return FluentIterable.from(interfaces).transformAndConcat(
+        new Function<Class<? extends PipelineOptions>, Iterable<Method>>() {
+          @Override
+          public Iterable<Method> apply(Class<? extends PipelineOptions> input) {
+            return getClosureOfMethodsOnInterface(input);
+          }
+    });
+  }
+
+  /**
+   * Returns all the methods visible from {@code iface}.
+   *
+   * @param iface The interface to use when searching for all its methods.
+   * @return An iterable of {@link Method}s which {@code iface} exposes.
+   */
+  static Iterable<Method> getClosureOfMethodsOnInterface(Class<? extends PipelineOptions> iface) {
+    Preconditions.checkNotNull(iface);
+    Preconditions.checkArgument(iface.isInterface());
+    ImmutableList.Builder<Method> builder = ImmutableList.builder();
+    Queue<Class<?>> interfacesToProcess = Queues.newArrayDeque();
+    interfacesToProcess.add(iface);
+    while (!interfacesToProcess.isEmpty()) {
+      Class<?> current = interfacesToProcess.remove();
+      builder.add(current.getMethods());
+      interfacesToProcess.addAll(Arrays.asList(current.getInterfaces()));
+    }
+    return builder.build();
+  }
+
+  /**
+   * This method is meant to emulate the behavior of {@link Introspector#getBeanInfo(Class, int)}
+   * to construct the list of {@link PropertyDescriptor}.
+   * <p>
+   * TODO: Swap back to using Introspector once the proxy class issue with AppEngine is resolved.
+   */
+  private static List<PropertyDescriptor> getPropertyDescriptors(Class<?> beanClass)
+      throws IntrospectionException {
+    // The sorting is important to make this method stable.
+    SortedSet<Method> methods = Sets.newTreeSet(MethodComparator.INSTANCE);
+    methods.addAll(Arrays.asList(beanClass.getMethods()));
+    // Build a map of property names to getters.
+    SortedMap<String, Method> propertyNamesToGetters = Maps.newTreeMap();
+    for (Method method : methods) {
+      String methodName = method.getName();
+      if ((!methodName.startsWith("get")
+          && !methodName.startsWith("is"))
+          || method.getParameterTypes().length != 0
+          || method.getReturnType() == void.class) {
+        continue;
+      }
+      String propertyName = Introspector.decapitalize(
+          methodName.startsWith("is") ? methodName.substring(2) : methodName.substring(3));
+      propertyNamesToGetters.put(propertyName, method);
+    }
+
+    List<PropertyDescriptor> descriptors = Lists.newArrayList();
+
+    /*
+     * Add all the getter/setter pairs to the list of descriptors removing the getter once
+     * it has been paired up.
+     */
+    for (Method method : methods) {
+      String methodName = method.getName();
+      if (!methodName.startsWith("set")
+          || method.getParameterTypes().length != 1
+          || method.getReturnType() != void.class) {
+        continue;
+      }
+      String propertyName = Introspector.decapitalize(methodName.substring(3));
+      descriptors.add(new PropertyDescriptor(
+          propertyName, propertyNamesToGetters.remove(propertyName), method));
+    }
+
+    // Add the remaining getters with missing setters.
+    for (Map.Entry<String, Method> getterToMethod : propertyNamesToGetters.entrySet()) {
+      descriptors.add(new PropertyDescriptor(
+          getterToMethod.getKey(), getterToMethod.getValue(), null));
+    }
+    return descriptors;
+  }
+
+  /**
+   * Validates that a given class conforms to the following properties:
+   * <ul>
+   *   <li>Any property with the same name must have the same return type for all derived
+   *       interfaces of {@link PipelineOptions}.
+   *   <li>Every bean property of any interface derived from {@link PipelineOptions} must have a
+   *       getter and setter method.
+   *   <li>Every method must conform to being a getter or setter for a JavaBean.
+   *   <li>Only getters may be annotated with {@link JsonIgnore @JsonIgnore}.
+   *   <li>If any getter is annotated with {@link JsonIgnore @JsonIgnore}, then all getters for
+   *       this property must be annotated with {@link JsonIgnore @JsonIgnore}.
+   * </ul>
+   *
+   * @param iface The interface to validate.
+   * @param validatedPipelineOptionsInterfaces The set of validated pipeline options interfaces to
+   *        validate against.
+   * @param klass The proxy class representing the interface.
+   * @param descriptors A list of {@link PropertyDescriptor}s to use when validating.
+   */
+  private static void validateClass(Class<? extends PipelineOptions> iface,
+      Set<Class<? extends PipelineOptions>> validatedPipelineOptionsInterfaces,
+      Class<?> klass, List<PropertyDescriptor> descriptors) {
+    Set<Method> methods = Sets.newHashSet(IGNORED_METHODS);
+    // Ignore static methods, "equals", "hashCode", "toString" and "as" on the generated class.
+    for (Method method : klass.getMethods()) {
+      if (Modifier.isStatic(method.getModifiers())) {
+        methods.add(method);
+      }
+    }
+    try {
+      methods.add(klass.getMethod("equals", Object.class));
+      methods.add(klass.getMethod("hashCode"));
+      methods.add(klass.getMethod("toString"));
+      methods.add(klass.getMethod("as", Class.class));
+    } catch (NoSuchMethodException | SecurityException e) {
+      throw Throwables.propagate(e);
+    }
+
+    // Verify that there are no methods with the same name with two different return types.
+    Iterable<Method> interfaceMethods = FluentIterable
+        .from(getClosureOfMethodsOnInterface(iface))
+        .toSortedSet(MethodComparator.INSTANCE);
+    SetMultimap<Equivalence.Wrapper<Method>, Method> methodNameToMethodMap =
+        HashMultimap.create();
+    for (Method method : interfaceMethods) {
+      methodNameToMethodMap.put(MethodNameEquivalence.INSTANCE.wrap(method), method);
+    }
+    for (Map.Entry<Equivalence.Wrapper<Method>, Collection<Method>> entry
+        : methodNameToMethodMap.asMap().entrySet()) {
+      Set<Class<?>> returnTypes = FluentIterable.from(entry.getValue())
+          .transform(ReturnTypeFetchingFunction.INSTANCE).toSet();
+      SortedSet<Method> collidingMethods = FluentIterable.from(entry.getValue())
+          .toSortedSet(MethodComparator.INSTANCE);
+      Preconditions.checkArgument(returnTypes.size() == 1,
+          "Method [%s] has multiple definitions %s with different return types for [%s].",
+          entry.getKey().get().getName(),
+          collidingMethods,
+          iface.getName());
+    }
+
+    // Verify that there is no getter with a mixed @JsonIgnore annotation and verify
+    // that no setter has @JsonIgnore.
+    Iterable<Method> allInterfaceMethods = FluentIterable
+        .from(getClosureOfMethodsOnInterfaces(validatedPipelineOptionsInterfaces))
+        .append(getClosureOfMethodsOnInterface(iface))
+        .toSortedSet(MethodComparator.INSTANCE);
+    SetMultimap<Equivalence.Wrapper<Method>, Method> methodNameToAllMethodMap =
+        HashMultimap.create();
+    for (Method method : allInterfaceMethods) {
+      methodNameToAllMethodMap.put(MethodNameEquivalence.INSTANCE.wrap(method), method);
+    }
+    for (PropertyDescriptor descriptor : descriptors) {
+      if (IGNORED_METHODS.contains(descriptor.getReadMethod())
+          || IGNORED_METHODS.contains(descriptor.getWriteMethod())) {
+        continue;
+      }
+      Set<Method> getters =
+          methodNameToAllMethodMap.get(
+              MethodNameEquivalence.INSTANCE.wrap(descriptor.getReadMethod()));
+      Set<Method> gettersWithJsonIgnore =
+          FluentIterable.from(getters).filter(JsonIgnorePredicate.INSTANCE).toSet();
+
+      Iterable<String> getterClassNames = FluentIterable.from(getters)
+          .transform(MethodToDeclaringClassFunction.INSTANCE)
+          .transform(ClassNameFunction.INSTANCE);
+      Iterable<String> gettersWithJsonIgnoreClassNames = FluentIterable.from(gettersWithJsonIgnore)
+          .transform(MethodToDeclaringClassFunction.INSTANCE)
+          .transform(ClassNameFunction.INSTANCE);
+
+      Preconditions.checkArgument(gettersWithJsonIgnore.isEmpty()
+          || getters.size() == gettersWithJsonIgnore.size(),
+          "Expected getter for property [%s] to be marked with @JsonIgnore on all %s, "
+          + "found only on %s",
+          descriptor.getName(), getterClassNames, gettersWithJsonIgnoreClassNames);
+
+      Set<Method> settersWithJsonIgnore = FluentIterable.from(
+          methodNameToAllMethodMap.get(
+              MethodNameEquivalence.INSTANCE.wrap(descriptor.getWriteMethod())))
+                  .filter(JsonIgnorePredicate.INSTANCE).toSet();
+
+      Iterable<String> settersWithJsonIgnoreClassNames = FluentIterable.from(settersWithJsonIgnore)
+          .transform(MethodToDeclaringClassFunction.INSTANCE)
+          .transform(ClassNameFunction.INSTANCE);
+
+      Preconditions.checkArgument(settersWithJsonIgnore.isEmpty(),
+          "Expected setter for property [%s] to not be marked with @JsonIgnore on %s",
+          descriptor.getName(), settersWithJsonIgnoreClassNames);
+    }
+
+    // Verify that each property has a matching read and write method.
+    for (PropertyDescriptor propertyDescriptor : descriptors) {
+      Preconditions.checkArgument(
+          IGNORED_METHODS.contains(propertyDescriptor.getWriteMethod())
+          || propertyDescriptor.getReadMethod() != null,
+          "Expected getter for property [%s] of type [%s] on [%s].",
+          propertyDescriptor.getName(),
+          propertyDescriptor.getPropertyType().getName(),
+          iface.getName());
+      Preconditions.checkArgument(
+          IGNORED_METHODS.contains(propertyDescriptor.getReadMethod())
+          || propertyDescriptor.getWriteMethod() != null,
+          "Expected setter for property [%s] of type [%s] on [%s].",
+          propertyDescriptor.getName(),
+          propertyDescriptor.getPropertyType().getName(),
+          iface.getName());
+      methods.add(propertyDescriptor.getReadMethod());
+      methods.add(propertyDescriptor.getWriteMethod());
+    }
+
+    // Verify that no additional methods are on an interface that aren't a bean property.
+    Set<Method> unknownMethods = Sets.difference(Sets.newHashSet(klass.getMethods()), methods);
+    Preconditions.checkArgument(unknownMethods.isEmpty(),
+        "Methods %s on [%s] do not conform to being bean properties.",
+        FluentIterable.from(unknownMethods).transform(MethodFormatterFunction.INSTANCE),
+        iface.getName());
+  }
+
+  /** A {@link Comparator} which uses the generic method signature to sort them. */
+  private static class MethodComparator implements Comparator<Method> {
+    static final MethodComparator INSTANCE = new MethodComparator();
+    @Override
+    public int compare(Method o1, Method o2) {
+      return o1.toGenericString().compareTo(o2.toGenericString());
+    }
+  }
+
+  /** A {@link Function} which gets the methods return type. */
+  private static class ReturnTypeFetchingFunction implements Function<Method, Class<?>> {
+    static final ReturnTypeFetchingFunction INSTANCE = new ReturnTypeFetchingFunction();
+    @Override
+    public Class<?> apply(Method input) {
+      return input.getReturnType();
+    }
+  }
+
+  /** A {@link Function} which turns a method into a simple method signature. */
+  private static class MethodFormatterFunction implements Function<Method, String> {
+    static final MethodFormatterFunction INSTANCE = new MethodFormatterFunction();
+    @Override
+    public String apply(Method input) {
+      String parameterTypes = FluentIterable.of(input.getParameterTypes())
+          .transform(ClassNameFunction.INSTANCE)
+          .toSortedList(String.CASE_INSENSITIVE_ORDER)
+          .toString();
+      return ClassNameFunction.INSTANCE.apply(input.getReturnType()) + " " + input.getName()
+          + "(" + parameterTypes.substring(1, parameterTypes.length() - 1) + ")";
+    }
+  }
+
+  /** A {@link Function} with returns the classes name. */
+  private static class ClassNameFunction implements Function<Class<?>, String> {
+    static final ClassNameFunction INSTANCE = new ClassNameFunction();
+    @Override
+    public String apply(Class<?> input) {
+      return input.getName();
+    }
+  }
+
+  /** A {@link Function} with returns the declaring class for the method. */
+  private static class MethodToDeclaringClassFunction implements Function<Method, Class<?>> {
+    static final MethodToDeclaringClassFunction INSTANCE = new MethodToDeclaringClassFunction();
+    @Override
+    public Class<?> apply(Method input) {
+      return input.getDeclaringClass();
+    }
+  }
+
+  /** An {@link Equivalence} which considers two methods equivalent if they share the same name. */
+  private static class MethodNameEquivalence extends Equivalence<Method> {
+    static final MethodNameEquivalence INSTANCE = new MethodNameEquivalence();
+    @Override
+    protected boolean doEquivalent(Method a, Method b) {
+      return a.getName().equals(b.getName());
+    }
+
+    @Override
+    protected int doHash(Method t) {
+      return t.getName().hashCode();
+    }
+  }
+
+  /**
+   * A {@link Predicate} which returns true if the method is annotated with
+   * {@link JsonIgnore @JsonIgnore}.
+   */
+  static class JsonIgnorePredicate implements Predicate<Method> {
+    static final JsonIgnorePredicate INSTANCE = new JsonIgnorePredicate();
+    @Override
+    public boolean apply(Method input) {
+      return input.isAnnotationPresent(JsonIgnore.class);
+    }
+  }
+
+  /**
+   * Splits string arguments based upon expected pattern of --argName=value.
+   * <p>
+   * Example GNU style command line arguments:
+   * <pre>
+   *   --project=MyProject (simple property, will set the "project" property to "MyProject")
+   *   --readOnly=true (for boolean properties, will set the "readOnly" property to "true")
+   *   --readOnly (shorthand for boolean properties, will set the "readOnly" property to "true")
+   *   --x=1 --x=2 --x=3 (list style property, will set the "x" property to [1, 2, 3])
+   *   --x=1,2,3 (shorthand list style property, will set the "x" property to [1, 2, 3])
+   * </pre>
+   * Properties are able to bound to {@link String} and Java primitives boolean, byte,
+   * short, int, long, float, double and their primitive wrapper classes.
+   * <p>
+   * List style properties are able to be bound to boolean[], char[], short[],
+   * int[], long[], float[], double[], String[] and List<String>.
+   * <p>
+   */
+  private static ListMultimap<String, String> parseCommandLine(String[] args) {
+    ImmutableListMultimap.Builder<String, String> builder = ImmutableListMultimap.builder();
+    for (String arg : args) {
+      Preconditions.checkArgument(arg.startsWith("--"),
+          "Unknown argument %s in command line %s", arg, Arrays.toString(args));
+      int index = arg.indexOf("=");
+      // Make sure that '=' isn't the first character after '--' or the last character
+      Preconditions.checkArgument(index != 2 && index != arg.length() - 1,
+          "Unknown argument %s in command line %s", arg, Arrays.toString(args));
+      if (index > 0) {
+        builder.put(arg.substring(2, index), arg.substring(index + 1, arg.length()));
+      } else {
+        builder.put(arg.substring(2), "true");
+      }
+    }
+    return builder.build();
+  }
+
+  /**
+   * Using the parsed string arguments, we convert the strings to the expected
+   * return type of the methods which are found on the passed in class.
+   * <p>
+   * For any return type that is expected to be an array or a collection, we further
+   * split up each string on ','.
+   * <p>
+   * We special case the "runner" option. It is mapped to the class of the {@link PipelineRunner}
+   * based off of the {@link PipelineRunner}s simple class name.
+   */
+  private static <T extends PipelineOptions> Map<String, Object> parseObjects(
+      Class<T> klass, ListMultimap<String, String> options) {
+    Map<String, Method> propertyNamesToGetters = Maps.newHashMap();
+    PipelineOptionsFactory.validateWellFormed(klass, getRegisteredOptions());
+    Iterable<PropertyDescriptor> propertyDescriptors =
+        PipelineOptionsFactory.getPropertyDescriptors(
+            FluentIterable.from(getRegisteredOptions()).append(klass).toSet());
+    for (PropertyDescriptor descriptor : propertyDescriptors) {
+      propertyNamesToGetters.put(descriptor.getName(), descriptor.getReadMethod());
+    }
+    Map<String, Object> convertedOptions = Maps.newHashMap();
+    for (Map.Entry<String, Collection<String>> entry : options.asMap().entrySet()) {
+      if (!propertyNamesToGetters.containsKey(entry.getKey())) {
+        LOG.warn("Ignoring argument {}={}", entry.getKey(), entry.getValue());
+        continue;
+      }
+
+      Method method = propertyNamesToGetters.get(entry.getKey());
+      JavaType type = MAPPER.getTypeFactory().constructType(method.getGenericReturnType());
+      if ("runner".equals(entry.getKey())) {
+        String runner = Iterables.getOnlyElement(entry.getValue());
+        Preconditions.checkArgument(SUPPORTED_PIPELINE_RUNNERS.containsKey(runner),
+            "Unknown 'runner' specified %s, supported pipeline runners %s",
+            runner, SUPPORTED_PIPELINE_RUNNERS.keySet());
+        convertedOptions.put("runner", SUPPORTED_PIPELINE_RUNNERS.get(runner));
+      } else if (method.getReturnType().isArray()
+          || Collection.class.isAssignableFrom(method.getReturnType())) {
+        // Split any strings with ","
+        List<String> values = FluentIterable.from(entry.getValue())
+            .transformAndConcat(new Function<String, Iterable<String>>() {
+              @Override
+              public Iterable<String> apply(String input) {
+                return Arrays.asList(input.split(","));
+              }
+        }).toList();
+        convertedOptions.put(entry.getKey(), MAPPER.convertValue(values, type));
+      } else {
+        String value = Iterables.getOnlyElement(entry.getValue());
+        convertedOptions.put(entry.getKey(), MAPPER.convertValue(value, type));
+      }
+    }
+    return convertedOptions;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
new file mode 100644
index 0000000000000..bb7bcf3de831f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import com.google.common.base.Preconditions;
+
+import java.lang.annotation.Annotation;
+import java.lang.reflect.Method;
+import java.lang.reflect.Proxy;
+
+/**
+ * Validates that the {@link PipelineOptions} conforms to all the {@link Validation} criteria.
+ */
+public class PipelineOptionsValidator {
+  /**
+   * Validates that the passed {@link PipelineOptions} conforms to all the validation criteria from
+   * the passed in interface.
+   * <p>
+   * Note that the interface requested must conform to the validation criteria specified on
+   * {@link PipelineOptions#as(Class)}.
+   *
+   * @param klass The interface to fetch validation criteria from.
+   * @param options The {@link PipelineOptions} to validate.
+   * @return The type
+   */
+  public static <T extends PipelineOptions> T validate(Class<T> klass, PipelineOptions options) {
+    Preconditions.checkNotNull(klass);
+    Preconditions.checkNotNull(options);
+    Preconditions.checkArgument(Proxy.isProxyClass(options.getClass()));
+    Preconditions.checkArgument(Proxy.getInvocationHandler(options)
+        instanceof ProxyInvocationHandler);
+
+    ProxyInvocationHandler handler =
+        (ProxyInvocationHandler) Proxy.getInvocationHandler(options);
+    for (Method method : PipelineOptionsFactory.getClosureOfMethodsOnInterface(klass)) {
+      for (Annotation annotation : method.getAnnotations()) {
+        if (annotation instanceof Validation.Required) {
+          Preconditions.checkArgument(handler.invoke(options, method, null) != null,
+              "Expected non-null property to be set for [" + method + "].");
+        }
+      }
+    }
+    return options.as(klass);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
new file mode 100644
index 0000000000000..aefbe1dec294e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
@@ -0,0 +1,390 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory.JsonIgnorePredicate;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory.Registration;
+import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
+import com.google.common.base.Defaults;
+import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ClassToInstanceMap;
+import com.google.common.collect.FluentIterable;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+import com.google.common.collect.MutableClassToInstanceMap;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.DeserializationContext;
+import com.fasterxml.jackson.databind.JavaType;
+import com.fasterxml.jackson.databind.JsonDeserializer;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.JsonSerializer;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.SerializerProvider;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+
+import java.beans.PropertyDescriptor;
+import java.io.IOException;
+import java.lang.annotation.Annotation;
+import java.lang.reflect.InvocationHandler;
+import java.lang.reflect.Method;
+import java.lang.reflect.Proxy;
+import java.lang.reflect.Type;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+
+/**
+ * Represents and {@link InvocationHandler} for a {@link Proxy}. The invocation handler uses bean
+ * introspection of the proxy class to store and retrieve values based off of the property name.
+ * <p>
+ * Unset properties use the {@Default} metadata on the getter to return values. If there
+ * is no {@Default} annotation on the getter, then a <a
+ * href="https://docs.oracle.com/javase/tutorial/java/nutsandbolts/datatypes.html">default</a> as
+ * per the Java Language Specification for the expected return type is returned.
+ * <p>
+ * In addition to the getter/setter pairs, this proxy invocation handler supports
+ * {@link Object#equals(Object)}, {@link Object#hashCode()}, {@link Object#toString()} and
+ * {@link PipelineOptions#as(Class)}.
+ */
+class ProxyInvocationHandler implements InvocationHandler {
+  private static final ObjectMapper MAPPER = new ObjectMapper();
+  /**
+   * No two instances of this class are considered equivalent hence we generate a random hash code
+   * between 0 and {@link Integer#MAX_VALUE}.
+   */
+  private final int hashCode = (int) Math.random() * Integer.MAX_VALUE;
+  private final Set<Class<? extends PipelineOptions>> knownInterfaces;
+  private final ClassToInstanceMap<PipelineOptions> interfaceToProxyCache;
+  private final Map<String, Object> options;
+  private final Map<String, JsonNode> jsonOptions;
+  private final Map<String, String> gettersToPropertyNames;
+  private final Map<String, String> settersToPropertyNames;
+
+  ProxyInvocationHandler(Map<String, Object> options) {
+    this(options, Maps.<String, JsonNode>newHashMap());
+  }
+
+  private ProxyInvocationHandler(Map<String, Object> options, Map<String, JsonNode> jsonOptions) {
+    this.options = options;
+    this.jsonOptions = jsonOptions;
+    this.knownInterfaces = new HashSet<>(PipelineOptionsFactory.getRegisteredOptions());
+    gettersToPropertyNames = Maps.newHashMap();
+    settersToPropertyNames = Maps.newHashMap();
+    interfaceToProxyCache = MutableClassToInstanceMap.create();
+  }
+
+  @Override
+  public Object invoke(Object proxy, Method method, Object[] args) {
+    if (args == null && "toString".equals(method.getName())) {
+      return toString();
+    } else if (args != null && args.length == 1 && "equals".equals(method.getName())) {
+      return equals(args[0]);
+    } else if (args == null && "hashCode".equals(method.getName())) {
+      return hashCode();
+    } else if (args != null && "as".equals(method.getName()) && args[0] instanceof Class) {
+      return as((Class<? extends PipelineOptions>) args[0]);
+    }
+    String methodName = method.getName();
+    synchronized (this) {
+      if (gettersToPropertyNames.keySet().contains(methodName)) {
+        String propertyName = gettersToPropertyNames.get(methodName);
+        if (!options.containsKey(propertyName)) {
+          // Lazy bind the default to the method.
+          Object value = jsonOptions.containsKey(propertyName)
+              ? getValueFromJson(propertyName, method)
+              : getDefault((PipelineOptions) proxy, method);
+          options.put(propertyName, value);
+        }
+        return options.get(propertyName);
+      } else if (settersToPropertyNames.containsKey(methodName)) {
+        options.put(settersToPropertyNames.get(methodName), args[0]);
+        return Void.TYPE;
+      }
+    }
+    throw new RuntimeException("Unknown method [" + method + "] invoked with args ["
+        + Arrays.toString(args) + "].");
+  }
+
+  /**
+   * Backing implementation for {@link PipelineOptions#as(Class)}.
+   *
+   * @param iface The interface which the returned object needs to implement.
+   * @return An object which implements the interface <T>.
+   */
+  synchronized <T extends PipelineOptions> T as(Class<T> iface) {
+    Preconditions.checkNotNull(iface);
+    Preconditions.checkArgument(iface.isInterface());
+    if (!interfaceToProxyCache.containsKey(iface)) {
+      Registration<T> registration = 
+          PipelineOptionsFactory.validateWellFormed(iface, knownInterfaces);
+      List<PropertyDescriptor> propertyDescriptors = registration.getPropertyDescriptors();
+      Class<T> proxyClass = registration.getProxyClass();
+      gettersToPropertyNames.putAll(generateGettersToPropertyNames(propertyDescriptors));
+      settersToPropertyNames.putAll(generateSettersToPropertyNames(propertyDescriptors));
+      knownInterfaces.add(iface);
+      interfaceToProxyCache.putInstance(iface,
+          InstanceBuilder.ofType(proxyClass)
+              .fromClass(proxyClass)
+              .withArg(InvocationHandler.class, this)
+              .build());
+    }
+    return interfaceToProxyCache.getInstance(iface);
+  }
+
+
+  /**
+   * Returns true if the other object is a ProxyInvocationHandler or is a Proxy object and has the
+   * same ProxyInvocationHandler as this.
+   *
+   * @param obj The object to compare against this.
+   * @return true iff the other object is a ProxyInvocationHandler or is a Proxy object and has the
+   *         same ProxyInvocationHandler as this.
+   */
+  @Override
+  public boolean equals(Object obj) {
+    return obj != null && ((obj instanceof ProxyInvocationHandler && this == obj)
+        || (Proxy.isProxyClass(obj.getClass()) && this == Proxy.getInvocationHandler(obj)));
+  }
+
+  /**
+   * Each instance of this ProxyInvocationHandler is unique and has a random hash code.
+   *
+   * @return A hash code that was generated randomly.
+   */
+  @Override
+  public int hashCode() {
+    return hashCode;
+  }
+
+  /**
+   * This will output all the currently set values.
+   *
+   * @return A string representation of this.
+   */
+  @Override
+  public synchronized String toString() {
+    StringBuilder b = new StringBuilder();
+    b.append("Current Settings:\n");
+    for (Map.Entry<String, Object> entry : new TreeMap<>(options).entrySet()) {
+      b.append("  " + entry.getKey() + ": " + entry.getValue() + "\n");
+    }
+    return b.toString();
+  }
+
+  /**
+   * Uses a Jackson {@link ObjectMapper} to attempt type conversion.
+   *
+   * @param method The method whose return type you would like to return.
+   * @param propertyName The name of the property which is being returned.
+   * @return An object matching the return type of the method passed in.
+   */
+  private Object getValueFromJson(String propertyName, Method method) {
+    try {
+      JavaType type = MAPPER.getTypeFactory().constructType(method.getGenericReturnType());
+      JsonNode jsonNode = jsonOptions.get(propertyName);
+      return MAPPER.readValue(jsonNode.toString(), type);
+    } catch (IOException e) {
+      throw new RuntimeException("Unable to parse representation", e);
+    }
+  }
+
+  /**
+   * Returns a default value for the method based upon {@Default} metadata on the getter
+   * to return values. If there is no {@Default} annotation on the getter, then a <a
+   * href="https://docs.oracle.com/javase/tutorial/java/nutsandbolts/datatypes.html">default</a> as
+   * per the Java Language Specification for the expected return type is returned.
+   *
+   * @param proxy The proxy object for which we are attempting to get the default.
+   * @param method The getter method which was invoked.
+   * @return The default value from an {@link Default} annotation if present, otherwise a default
+   *         value as per the Java Language Specification.
+   */
+  private Object getDefault(PipelineOptions proxy, Method method) {
+    for (Annotation annotation : method.getAnnotations()) {
+      if (annotation instanceof Default.Class) {
+        return ((Default.Class) annotation).value();
+      } else if (annotation instanceof Default.String) {
+        return ((Default.String) annotation).value();
+      } else if (annotation instanceof Default.Boolean) {
+        return ((Default.Boolean) annotation).value();
+      } else if (annotation instanceof Default.Character) {
+        return ((Default.Character) annotation).value();
+      } else if (annotation instanceof Default.Byte) {
+        return ((Default.Byte) annotation).value();
+      } else if (annotation instanceof Default.Short) {
+        return ((Default.Short) annotation).value();
+      } else if (annotation instanceof Default.Integer) {
+        return ((Default.Integer) annotation).value();
+      } else if (annotation instanceof Default.Long) {
+        return ((Default.Long) annotation).value();
+      } else if (annotation instanceof Default.Float) {
+        return ((Default.Float) annotation).value();
+      } else if (annotation instanceof Default.Double) {
+        return ((Default.Double) annotation).value();
+      } else if (annotation instanceof Default.String) {
+        return ((Default.String) annotation).value();
+      } else if (annotation instanceof Default.String) {
+        return ((Default.String) annotation).value();
+      } else if (annotation instanceof Default.String) {
+        return ((Default.String) annotation).value();
+      } else if (annotation instanceof Default.InstanceFactory) {
+        return InstanceBuilder.ofType(((Default.InstanceFactory) annotation).value())
+            .build()
+            .create(proxy);
+      }
+    }
+
+    /*
+     * We need to make sure that we return something appropriate for the return type. Thus we return
+     * a default value as defined by the JLS.
+     */
+    return Defaults.defaultValue(method.getReturnType());
+  }
+
+  /**
+   * Returns a map from the getters method name to the name of the property based upon the passed in
+   * {@link PropertyDescriptor}s property descriptors.
+   *
+   * @param propertyDescriptors A list of {@link PropertyDescriptor}s to use when generating the
+   *        map.
+   * @return A map of getter method name to property name.
+   */
+  private static Map<String, String> generateGettersToPropertyNames(
+      List<PropertyDescriptor> propertyDescriptors) {
+    ImmutableMap.Builder<String, String> builder = ImmutableMap.builder();
+    for (PropertyDescriptor descriptor : propertyDescriptors) {
+      if (descriptor.getReadMethod() != null) {
+        builder.put(descriptor.getReadMethod().getName(), descriptor.getName());
+      }
+    }
+    return builder.build();
+  }
+
+  /**
+   * Returns a map from the setters method name to its matching getters method name based upon the
+   * passed in {@link PropertyDescriptor}s property descriptors.
+   *
+   * @param propertyDescriptors A list of {@link PropertyDescriptor}s to use when generating the
+   *        map.
+   * @return A map of setter method name to getter method name.
+   */
+  private static Map<String, String> generateSettersToPropertyNames(
+      List<PropertyDescriptor> propertyDescriptors) {
+    ImmutableMap.Builder<String, String> builder = ImmutableMap.builder();
+    for (PropertyDescriptor descriptor : propertyDescriptors) {
+      if (descriptor.getWriteMethod() != null) {
+        builder.put(descriptor.getWriteMethod().getName(), descriptor.getName());
+      }
+    }
+    return builder.build();
+  }
+
+  static class Serializer extends JsonSerializer<PipelineOptions> {
+    @Override
+    public void serialize(PipelineOptions value, JsonGenerator jgen, SerializerProvider provider)
+        throws IOException, JsonProcessingException {
+      ProxyInvocationHandler handler = (ProxyInvocationHandler) Proxy.getInvocationHandler(value);
+      Map<String, Object> options = Maps.<String, Object>newHashMap(handler.jsonOptions);
+      options.putAll(handler.options);
+      removeIgnoredOptions(handler.knownInterfaces, options);
+      ensureSerializable(handler.knownInterfaces, options);
+      jgen.writeStartObject();
+      jgen.writeFieldName("options");
+      jgen.writeObject(options);
+      jgen.writeEndObject();
+    }
+
+    /**
+     * We remove all properties within the passed in options where there getter is annotated with
+     * {@link JsonIgnore @JsonIgnore} from the passed in options using the passed in interfaces.
+     */
+    private void removeIgnoredOptions(
+        Set<Class<? extends PipelineOptions>> interfaces, Map<String, Object> options) {
+      // Find all the method names which are annotated with JSON ignore.
+      Set<String> jsonIgnoreMethodNames = FluentIterable.from(
+          PipelineOptionsFactory.getClosureOfMethodsOnInterfaces(interfaces))
+          .filter(JsonIgnorePredicate.INSTANCE).transform(new Function<Method, String>() {
+            @Override
+            public String apply(Method input) {
+              return input.getName();
+            }
+          }).toSet();
+
+      // Remove all options which have the same method name as the descriptor.
+      for (PropertyDescriptor descriptor
+          : PipelineOptionsFactory.getPropertyDescriptors(interfaces)) {
+        if (jsonIgnoreMethodNames.contains(descriptor.getReadMethod().getName())) {
+          options.remove(descriptor.getName());
+        }
+      }
+    }
+
+    /**
+     * We use an {@link ObjectMapper} to verify that the passed in options are serializable
+     * and deserializable.
+     */
+    private void ensureSerializable(Set<Class<? extends PipelineOptions>> interfaces,
+        Map<String, Object> options) throws IOException {
+      // Construct a map from property name to the return type of the getter.
+      Map<String, Type> propertyToReturnType = Maps.newHashMap();
+      for (PropertyDescriptor descriptor
+          : PipelineOptionsFactory.getPropertyDescriptors(interfaces)) {
+        if (descriptor.getReadMethod() != null) {
+          propertyToReturnType.put(descriptor.getName(),
+              descriptor.getReadMethod().getGenericReturnType());
+        }
+      }
+
+      // Attempt to serialize and deserialize each property.
+      for (Map.Entry<String, Object> entry : options.entrySet()) {
+        String serializedValue = MAPPER.writeValueAsString(entry.getValue());
+        JavaType type = MAPPER.getTypeFactory()
+            .constructType(propertyToReturnType.get(entry.getKey()));
+        MAPPER.readValue(serializedValue, type);
+      }
+    }
+  }
+
+  static class Deserializer extends JsonDeserializer<PipelineOptions> {
+    @Override
+    public PipelineOptions deserialize(JsonParser jp, DeserializationContext ctxt)
+        throws IOException, JsonProcessingException {
+      ObjectNode objectNode = (ObjectNode) jp.readValueAsTree();
+      ObjectNode optionsNode = (ObjectNode) objectNode.get("options");
+
+      Map<String, JsonNode> fields = Maps.newHashMap();
+      for (Iterator<Map.Entry<String, JsonNode>> iterator = optionsNode.fields();
+          iterator.hasNext(); ) {
+        Map.Entry<String, JsonNode> field = iterator.next();
+        fields.put(field.getKey(), field.getValue());
+      }
+      PipelineOptions options =
+          new ProxyInvocationHandler(Maps.<String, Object>newHashMap(), fields)
+              .as(PipelineOptions.class);
+      return options;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java
new file mode 100644
index 0000000000000..725d845d5b9f2
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+/**
+ * [Whitelisting Required] Options used to configure the streaming backend.
+ * 
+ * <p> <b>Important:</b> Streaming support is experimental. It is only supported in the
+ * {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner} for users whitelisted in a
+ * streaming early access program. 
+ * 
+ * <p> You should expect this class to change significantly in future
+ * versions of the SDK or be removed entirely.
+ */
+public interface StreamingOptions extends
+    ApplicationNameOptions, GcpOptions, PipelineOptions {
+  /**
+   * Note that this feature is currently experimental and only available to users whitelisted in
+   * a streaming early access program.
+   */
+  @Description("True if running in streaming mode (experimental)")
+  boolean isStreaming();
+  void setStreaming(boolean value);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java
new file mode 100644
index 0000000000000..10f205fcadb96
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * {@link Validation} represents a set of annotations which can be used to annotate getter
+ * properties on {@link PipelineOptions} with information representing the validation criteria to
+ * be used when validating with the {@link PipelineOptionsValidator}.
+ */
+
+public @interface Validation {
+  /**
+   * This criteria specifies that the value must be not null. Note that this annotation
+   * should only be applied to methods which return nullable objects.
+   */
+  @Target(value = ElementType.METHOD)
+  @Retention(RetentionPolicy.RUNTIME)
+  public @interface Required {
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/package-info.java
new file mode 100644
index 0000000000000..557e377676b71
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/package-info.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/** 
+ * Defines {@link com.google.cloud.dataflow.sdk.options.PipelineOptions} for 
+ * configuring pipeline execution.
+ * 
+ * <p> {@link com.google.cloud.dataflow.sdk.options.PipelineOptions} encapsulates the various
+ * parameters that describe how a pipeline should be run. {@code PipelineOptions} are created
+ * using a {@link com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory}.
+ */
+package com.google.cloud.dataflow.sdk.options;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java
new file mode 100644
index 0000000000000..e27ac01476606
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/** 
+ * Provides a simple, powerful model for building both batch and 
+ * streaming parallel data processing
+ * {@link com.google.cloud.dataflow.sdk.Pipeline}s. 
+ * 
+ * <p> To use the Google Cloud Dataflow SDK, you build a 
+ * {@link com.google.cloud.dataflow.sdk.Pipeline} which manages a graph of 
+ * {@link com.google.cloud.dataflow.sdk.transforms.PTransform}s 
+ * and the {@link com.google.cloud.dataflow.sdk.values.PCollection}s that 
+ * the PTransforms consume and produce.
+ *
+ * <p> Each Pipeline has a
+ * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner} to specify 
+ * where and how it should run after pipeline construction is complete.
+ *
+ */
+package com.google.cloud.dataflow.sdk;
+
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
new file mode 100644
index 0000000000000..61fb09746921d
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.options.BlockingDataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
+import com.google.cloud.dataflow.sdk.util.MonitoringUtil.JobState;
+import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.cloud.dataflow.sdk.values.POutput;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+
+import javax.annotation.Nullable;
+
+/**
+ * A PipelineRunner that's like {@link DataflowPipelineRunner}
+ * but that waits for the launched job to finish.
+ *
+ * <p> Prints out job status updates and console messages while it waits.
+ *
+ * <p> Returns the final job state, or throws an exception if the job
+ * fails or cannot be monitored.
+ */
+public class BlockingDataflowPipelineRunner extends
+    PipelineRunner<BlockingDataflowPipelineRunner.PipelineJobState> {
+  private static final Logger LOG = LoggerFactory.getLogger(BlockingDataflowPipelineRunner.class);
+
+  /**
+   * Holds the status of a run request.
+   */
+  public static class PipelineJobState implements PipelineResult {
+    private final JobState state;
+
+    public PipelineJobState(JobState state) {
+      this.state = state;
+    }
+
+    public JobState getJobState() {
+      return state;
+    }
+  }
+
+  // Defaults to an infinite wait period.
+  // TODO: make this configurable after removal of option map.
+  private static final long BUILTIN_JOB_TIMEOUT_SEC = -1L;
+
+  private DataflowPipelineRunner dataflowPipelineRunner = null;
+  private MonitoringUtil.JobMessagesHandler jobMessagesHandler;
+
+  protected BlockingDataflowPipelineRunner(
+      DataflowPipelineRunner internalRunner,
+      MonitoringUtil.JobMessagesHandler jobMessagesHandler) {
+    this.dataflowPipelineRunner = internalRunner;
+    this.jobMessagesHandler = jobMessagesHandler;
+  }
+
+  /**
+   * Constructs a runner from the provided options.
+   */
+  public static BlockingDataflowPipelineRunner fromOptions(
+      PipelineOptions options) {
+    BlockingDataflowPipelineOptions dataflowOptions =
+        PipelineOptionsValidator.validate(BlockingDataflowPipelineOptions.class, options);
+    DataflowPipelineRunner dataflowPipelineRunner =
+        DataflowPipelineRunner.fromOptions(dataflowOptions);
+
+    return new BlockingDataflowPipelineRunner(dataflowPipelineRunner,
+        new MonitoringUtil.PrintHandler(dataflowOptions.getJobMessageOutput()));
+  }
+
+  @Override
+  public PipelineJobState run(Pipeline p) {
+    DataflowPipelineJob job = dataflowPipelineRunner.run(p);
+
+    @Nullable JobState result;
+    try {
+      result = job.waitToFinish(
+          BUILTIN_JOB_TIMEOUT_SEC, TimeUnit.SECONDS, jobMessagesHandler);
+    } catch (IOException | InterruptedException ex) {
+      throw new RuntimeException("Exception caught during job execution", ex);
+    }
+
+    if (result == null) {
+      throw new RuntimeException("No result provided: "
+          + "possible error requesting job status.");
+    }
+
+    LOG.info("Job finished with status {}", result);
+    if (result.isTerminal()) {
+      return new PipelineJobState(result);
+    }
+
+    // TODO: introduce an exception which can wrap a JobState,
+    // so that detailed error information can be retrieved.
+    throw new RuntimeException("Job failed with state " + result);
+  }
+
+  @Override
+  public <Output extends POutput, Input extends PInput> Output apply(
+      PTransform<Input, Output> transform, Input input) {
+    return dataflowPipelineRunner.apply(transform, input);
+  }
+
+  /**
+   * Sets callbacks to invoke during execution see {@code DataflowPipelineRunnerHooks}.
+   * Important: setHooks is experimental. Please consult with the Dataflow team before using it.
+   * You should expect this class to change significantly in future versions of the SDK or be
+   * removed entirely.
+   */
+  public void setHooks(DataflowPipelineRunnerHooks hooks) {
+    this.dataflowPipelineRunner.setHooks(hooks);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java
new file mode 100644
index 0000000000000..310b4d97a323f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+
+/**
+ * A DataflowPipeline, which returns a
+ * {@link DataflowPipelineJob} subclass of PipelineResult
+ * from {@link com.google.cloud.dataflow.sdk.Pipeline#run()}.
+ */
+public class DataflowPipeline extends Pipeline {
+
+  /**
+   * Creates and returns a new DataflowPipeline instance for tests.
+   */
+  public static DataflowPipeline create(DataflowPipelineOptions options) {
+    return new DataflowPipeline(options);
+  }
+
+  private DataflowPipeline(DataflowPipelineOptions options) {
+    super(DataflowPipelineRunner.fromOptions(options), options);
+  }
+
+  @Override
+  public DataflowPipelineJob run() {
+    return (DataflowPipelineJob) super.run();
+  }
+
+  @Override
+  public DataflowPipelineRunner getRunner() {
+    return (DataflowPipelineRunner) super.getRunner();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
new file mode 100644
index 0000000000000..c1facb0288b84
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners;
+
+import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudTime;
+
+import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.api.services.dataflow.Dataflow;
+import com.google.api.services.dataflow.model.Job;
+import com.google.api.services.dataflow.model.JobMessage;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
+import com.google.cloud.dataflow.sdk.util.MonitoringUtil.JobState;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.net.SocketTimeoutException;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+import javax.annotation.Nullable;
+
+/**
+ * A DataflowPipelineJob represents a job submitted to Dataflow using
+ * {@link DataflowPipelineRunner}.
+ */
+public class DataflowPipelineJob implements PipelineResult {
+  private static final Logger LOG = LoggerFactory.getLogger(DataflowPipelineJob.class);
+
+  /**
+   * The id for the job.
+   */
+  private String jobId;
+
+  /**
+   * Google cloud project to associate this pipeline with.
+   */
+  private String project;
+
+  /**
+   * Client for the Dataflow service. This can be used to query the service
+   * for information about the job.
+   */
+  private Dataflow dataflowClient;
+
+  /**
+   * Construct the job.
+   *
+   * @param projectId the project id
+   * @param jobId the job id
+   * @param client the workflow client
+   */
+  public DataflowPipelineJob(
+      String projectId, String jobId, Dataflow client) {
+    project = projectId;
+    this.jobId = jobId;
+    dataflowClient = client;
+  }
+
+  public String getJobId() {
+    return jobId;
+  }
+
+  public String getProjectId() {
+    return project;
+  }
+
+  public Dataflow getDataflowClient() {
+    return dataflowClient;
+  }
+
+  /**
+   * Wait for the job to finish and return the final status.
+   *
+   * @param timeToWait The time to wait in units timeUnit for the job to finish.
+   * @param timeUnit The unit of time for timeToWait.
+   *     Provide a negative value for an infinite wait.
+   * @param messageHandler If non null this handler will be invoked for each
+   *   batch of messages received.
+   * @return The final state of the job or null on timeout or if the
+   *   thread is interrupted.
+   * @throws IOException If there is a persistent problem getting job
+   *   information.
+   * @throws InterruptedException
+   */
+  @Nullable
+  public JobState waitToFinish(
+      long timeToWait,
+      TimeUnit timeUnit,
+      MonitoringUtil.JobMessagesHandler messageHandler)
+          throws IOException, InterruptedException {
+    // The polling interval for job status information.
+    long interval = TimeUnit.SECONDS.toMillis(2);
+
+    // The time at which to stop.
+    long endTime = timeToWait >= 0
+        ? System.currentTimeMillis() + timeUnit.toMillis(timeToWait)
+        : Long.MAX_VALUE;
+
+    MonitoringUtil monitor = new MonitoringUtil(project, dataflowClient);
+
+    long lastTimestamp = 0;
+    int errorGettingMessages = 0;
+    int errorGettingJobStatus = 0;
+    while (true) {
+      if (System.currentTimeMillis() >= endTime) {
+        // Timed out.
+        return null;
+      }
+
+      if (messageHandler != null) {
+        // Process all the job messages that have accumulated so far.
+        try {
+          List<JobMessage> allMessages = monitor.getJobMessages(
+              jobId, lastTimestamp);
+
+          if (!allMessages.isEmpty()) {
+            lastTimestamp =
+                fromCloudTime(allMessages.get(allMessages.size() - 1).getTime()).getMillis();
+            messageHandler.process(allMessages);
+          }
+        } catch (GoogleJsonResponseException | SocketTimeoutException e) {
+          if (++errorGettingMessages > 5) {
+            // We want to continue to wait for the job to finish so
+            // we ignore this error, but warn occasionally if it keeps happening.
+            LOG.warn("There are problems accessing job messages: ", e);
+            errorGettingMessages = 0;
+          }
+        }
+      }
+
+      // Check if the job is done.
+      try {
+        Job job = dataflowClient.v1b3().projects().jobs().get(project, jobId).execute();
+        JobState state = JobState.toState(job.getCurrentState());
+        if (state.isTerminal()) {
+          return state;
+        }
+      } catch (GoogleJsonResponseException | SocketTimeoutException e) {
+        if (++errorGettingJobStatus > 5) {
+          // We want to continue to wait for the job to finish so
+          // we ignore this error, but warn occasionally if it keeps happening.
+          LOG.warn("There were problems getting job status: ", e);
+          errorGettingJobStatus = 0;
+        }
+      }
+
+      // Job not yet done.  Wait a little, then check again.
+      long sleepTime = Math.min(
+          endTime - System.currentTimeMillis(), interval);
+      TimeUnit.MILLISECONDS.sleep(sleepTime);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
new file mode 100644
index 0000000000000..ed01b8345c186
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -0,0 +1,315 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.api.client.util.Joiner;
+import com.google.api.services.dataflow.Dataflow;
+import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.api.services.dataflow.model.Job;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.DataflowReleaseInfo;
+import com.google.cloud.dataflow.sdk.util.GcsUtil;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
+import com.google.cloud.dataflow.sdk.util.PackageUtil;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.cloud.dataflow.sdk.values.POutput;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * A {@link PipelineRunner} that executes the operations in the
+ * pipeline by first translating them to the Dataflow representation
+ * using the {@link DataflowPipelineTranslator} and then submitting
+ * them to a Dataflow service for execution.
+ */
+public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob> {
+  private static final Logger LOG = LoggerFactory.getLogger(DataflowPipelineRunner.class);
+
+  /** Provided configuration options. */
+  private final DataflowPipelineOptions options;
+
+  /** The directory on GCS where files should be uploaded. */
+  private final GcsPath gcsStaging;
+
+  /** The directory on GCS where temporary files are stored. */
+  private final GcsPath gcsTemp;
+
+  /** Client for the Dataflow service. This is used to actually submit jobs. */
+  private final Dataflow dataflowClient;
+
+  /** Translator for this DataflowPipelineRunner, based on options. */
+  private final DataflowPipelineTranslator translator;
+
+  /** A set of user defined functions to invoke at different points in execution. */
+  private DataflowPipelineRunnerHooks hooks;
+
+  // Environment version information
+  private static final String ENVIRONMENT_MAJOR_VERSION = "0";
+
+  /**
+   * Construct a runner from the provided options.
+   *
+   * @param options Properties which configure the runner.
+   * @return The newly created runner.
+   */
+  public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
+    DataflowPipelineOptions dataflowOptions = 
+        PipelineOptionsValidator.validate(DataflowPipelineOptions.class, options);
+    ArrayList<String> missing = new ArrayList<>();
+
+    if (dataflowOptions.getProject() == null) {
+      missing.add("project");
+    }
+    if (dataflowOptions.getAppName() == null) {
+      missing.add("appName");
+    }
+    if (missing.size() > 0) {
+      throw new IllegalArgumentException(
+          "Missing required values: " + Joiner.on(',').join(missing));
+    }
+
+    Preconditions.checkArgument(!(Strings.isNullOrEmpty(dataflowOptions.getTempLocation())
+        && Strings.isNullOrEmpty(dataflowOptions.getStagingLocation())),
+        "Missing required value: at least one of tempLocation or stagingLocation must be set.");
+    if (Strings.isNullOrEmpty(dataflowOptions.getTempLocation())) {
+      dataflowOptions.setTempLocation(dataflowOptions.getStagingLocation());
+    } else if (Strings.isNullOrEmpty(dataflowOptions.getStagingLocation())) {
+      dataflowOptions.setStagingLocation(
+          GcsPath.fromUri(dataflowOptions.getTempLocation()).resolve("staging").toString());
+    }
+
+    if (dataflowOptions.getFilesToStage() == null) {
+      dataflowOptions.setFilesToStage(detectClassPathResourcesToStage(
+          DataflowPipelineRunner.class.getClassLoader()));
+      LOG.info("No specified files to stage. Defaulting to files: {}",
+          dataflowOptions.getFilesToStage());
+    }
+
+    // Verify jobName according to service requirements.
+    String jobName = dataflowOptions.getJobName().toLowerCase();
+    Preconditions.checkArgument(
+        jobName.matches("[a-z]([-a-z0-9]*[a-z0-9])?"),
+        "JobName invalid; the name must consist of only the characters "
+            + "[-a-z0-9], starting with a letter and ending with a letter "
+            + "or number");
+    Preconditions.checkArgument(jobName.length() <= 40,
+        "JobName too long; must be no more than 40 characters in length");
+
+    return new DataflowPipelineRunner(dataflowOptions);
+  }
+
+  private DataflowPipelineRunner(DataflowPipelineOptions options) {
+    this.options = options;
+    this.dataflowClient = options.getDataflowClient();
+    this.gcsTemp = GcsPath.fromUri(options.getTempLocation());
+    this.gcsStaging = GcsPath.fromUri(options.getStagingLocation());
+    this.translator = DataflowPipelineTranslator.fromOptions(options);
+
+    // (Re-)register standard IO factories. Clobbers any prior credentials.
+    IOChannelUtils.registerStandardIOFactories(options);
+  }
+
+  @Override
+  @SuppressWarnings("unchecked")
+  public <Output extends POutput, Input extends PInput> Output apply(
+      PTransform<Input, Output> transform, Input input) {
+    if (transform instanceof Combine.GroupedValues) {
+      // TODO: Redundant with translator registration?
+      return (Output) PCollection.createPrimitiveOutputInternal(
+          ((PCollection<?>) input).getWindowingFn());
+    } else if (transform instanceof GroupByKey) {
+      // The DataflowPipelineRunner implementation of GroupByKey will sort values by timestamp,
+      // so no need for an explicit sort transform.
+      boolean runnerSortsByTimestamp = true;
+      return (Output) ((GroupByKey) transform).applyHelper(
+          (PCollection<?>) input, options.isStreaming(), runnerSortsByTimestamp);
+    } else {
+      return super.apply(transform, input);
+    }
+  }
+
+  @Override
+  public DataflowPipelineJob run(Pipeline pipeline) {
+    LOG.info("Executing pipeline on the Dataflow Service, which will have billing implications "
+        + "related to Google Compute Engine usage and other Google Cloud Services.");
+
+    GcsUtil gcsUtil = options.getGcsUtil();
+    List<DataflowPackage> packages =
+        PackageUtil.stageClasspathElementsToGcs(gcsUtil, options.getFilesToStage(), gcsStaging);
+
+    Job newJob = translator.translate(pipeline, packages);
+
+    String version = DataflowReleaseInfo.getReleaseInfo().getVersion();
+    System.out.println("Dataflow SDK version: " + version);
+
+    newJob.getEnvironment().setUserAgent(DataflowReleaseInfo.getReleaseInfo());
+    // The Dataflow Service may write to the temporary directory directly, so
+    // must be verified.
+    newJob.getEnvironment().setTempStoragePrefix(verifyGcsPath(gcsTemp).toResourceName());
+    newJob.getEnvironment().setDataset(options.getTempDatasetId());
+    newJob.getEnvironment().setClusterManagerApiService(
+        options.getClusterManagerApi().getApiServiceName());
+    newJob.getEnvironment().setExperiments(options.getExperiments());
+
+    // Requirements about the service.
+    Map<String, Object> environmentVersion = new HashMap<>();
+    // TODO: Specify the environment major version.
+    // environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_MAJOR_KEY,
+    // ENVIRONMENT_MAJOR_VERSION);
+    newJob.getEnvironment().setVersion(environmentVersion);
+    // Default jobType is DATA_PARALLEL which is for java batch.
+    String jobType = "DATA_PARALLEL";
+
+    if (options.isStreaming()) {
+      jobType = "STREAMING";
+    }
+    environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_JOB_TYPE_KEY, jobType);
+
+    if (hooks != null) {
+      hooks.modifyEnvironmentBeforeSubmission(newJob.getEnvironment());
+    }
+
+    if (!Strings.isNullOrEmpty(options.getDataflowJobFile())) {
+      try (PrintWriter printWriter = new PrintWriter(
+          new File(options.getDataflowJobFile()))) {
+        String workSpecJson = DataflowPipelineTranslator.jobToString(newJob);
+        printWriter.print(workSpecJson);
+        LOG.info("Printed workflow specification to {}", options.getDataflowJobFile());
+      } catch (JsonProcessingException ex) {
+        LOG.warn("Cannot translate workflow spec to json for debug.");
+      } catch (FileNotFoundException ex) {
+        LOG.warn("Cannot create workflow spec output file.");
+      }
+    }
+
+    Job jobResult;
+    try {
+      jobResult = dataflowClient.v1b3().projects().jobs()
+          .create(options.getProject(), newJob)
+          .execute();
+    } catch (GoogleJsonResponseException e) {
+      throw new RuntimeException(
+          "Failed to create a workflow job: " 
+              + (e.getDetails() != null ? e.getDetails().getMessage() : e), e);
+    } catch (IOException e) {
+      throw new RuntimeException("Failed to create a workflow job", e);
+    }
+
+    LOG.info("To access the Dataflow monitoring console, please navigate to {}",
+        MonitoringUtil.getJobMonitoringPageURL(options.getProject(), jobResult.getId()));
+    System.out.println("Submitted job: " + jobResult.getId());
+
+    // Use a raw client for post-launch monitoring, as status calls may fail
+    // regularly and need not be retried automatically.
+    return new DataflowPipelineJob(options.getProject(), jobResult.getId(),
+        Transport.newRawDataflowClient(options).build());
+  }
+
+  /**
+   * Returns the DataflowPipelineTranslator associated with this object.
+   */
+  public DataflowPipelineTranslator getTranslator() {
+    return translator;
+  }
+
+  /**
+   * Sets callbacks to invoke during execution see {@code DataflowPipelineRunnerHooks}.
+   * Important: setHooks is experimental. Please consult with the Dataflow team before using it.
+   * You should expect this class to change significantly in future versions of the SDK or be
+   * removed entirely.
+   */
+  public void setHooks(DataflowPipelineRunnerHooks hooks) {
+    this.hooks = hooks;
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  @Override
+  public String toString() { return "DataflowPipelineRunner#" + hashCode(); }
+
+  /**
+   * Verifies that a path can be used by the Dataflow Service API.
+   * @return the supplied path
+   */
+  public static GcsPath verifyGcsPath(GcsPath path) {
+    Preconditions.checkArgument(path.isAbsolute(),
+        "Must provide absolute paths for Dataflow");
+    Preconditions.checkArgument(!path.getObject().contains("//"),
+        "Dataflow Service does not allow objects with consecutive slashes");
+    return path;
+  }
+
+  /**
+   * Attempts to detect all the resources the class loader has access to. This does not recurse
+   * to class loader parents stopping it from pulling in resources from the system class loader.
+   *
+   * @param classLoader The URLClassLoader to use to detect resources to stage.
+   * @throws IllegalArgumentException  If either the class loader is not a URLClassLoader or one
+   * of the resources the class loader exposes is not a file resource.
+   * @return A list of absolute paths to the resources the class loader uses.
+   */
+  protected static List<String> detectClassPathResourcesToStage(ClassLoader classLoader) {
+    if (!(classLoader instanceof URLClassLoader)) {
+      String message = String.format("Unable to use ClassLoader to detect classpath elements. "
+          + "Current ClassLoader is %s, only URLClassLoaders are supported.", classLoader);
+      LOG.error(message);
+      throw new IllegalArgumentException(message);
+    }
+
+    List<String> files = new ArrayList<>();
+    for (URL url : ((URLClassLoader) classLoader).getURLs()) {
+      try {
+        files.add(new File(url.toURI()).getAbsolutePath());
+      } catch (IllegalArgumentException | URISyntaxException e) {
+        String message = String.format("Unable to convert url (%s) to file.", url);
+        LOG.error(message);
+        throw new IllegalArgumentException(message, e);
+      }
+    }
+    return files;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
new file mode 100644
index 0000000000000..ba822e876e481
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import com.google.api.services.dataflow.model.Environment;
+
+/**
+ * An instance of this class can be passed to the
+ * DataflowPipeline runner to add user defined hooks to be
+ * invoked at various times during pipeline execution.
+ *
+ * Important: DataflowPipelineRunnerHooks is experimental. Please consult with
+ * the Dataflow team before using it. You should expect this class to change significantly
+ * in future versions of the SDK or be removed entirely.
+ *
+ */
+public class DataflowPipelineRunnerHooks {
+  /**
+   * Allows the user to modify the environment of their job before their job is submitted
+   * to the service for execution.
+   *
+   * @param environment The environment of the job. Users can make change to this instance in order
+   *     to change the environment with which their job executes on the service.
+   */
+  public void modifyEnvironmentBeforeSubmission(Environment environment) {}
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
new file mode 100644
index 0000000000000..6f39a2bae5b8a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -0,0 +1,963 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
+import static com.google.cloud.dataflow.sdk.util.SerializableUtils.serializeToByteArray;
+import static com.google.cloud.dataflow.sdk.util.StringUtils.byteArrayToJsonString;
+import static com.google.cloud.dataflow.sdk.util.StringUtils.jsonStringToByteArray;
+import static com.google.cloud.dataflow.sdk.util.Structs.addDictionary;
+import static com.google.cloud.dataflow.sdk.util.Structs.addList;
+import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
+import static com.google.cloud.dataflow.sdk.util.Structs.addObject;
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+
+import com.google.api.client.util.Preconditions;
+import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.api.services.dataflow.model.Disk;
+import com.google.api.services.dataflow.model.Environment;
+import com.google.api.services.dataflow.model.Job;
+import com.google.api.services.dataflow.model.Step;
+import com.google.api.services.dataflow.model.TaskRunnerSettings;
+import com.google.api.services.dataflow.model.WorkerPool;
+import com.google.api.services.dataflow.model.WorkerSettings;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.io.AvroIO;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.DatastoreIO;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.dataflow.AvroIOTranslator;
+import com.google.cloud.dataflow.sdk.runners.dataflow.BigQueryIOTranslator;
+import com.google.cloud.dataflow.sdk.runners.dataflow.DatastoreIOTranslator;
+import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
+import com.google.cloud.dataflow.sdk.runners.dataflow.TextIOTranslator;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Flatten;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.OutputReference;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.cloud.dataflow.sdk.values.POutput;
+import com.google.cloud.dataflow.sdk.values.PValue;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.cloud.dataflow.sdk.values.TypedPValue;
+import com.google.common.base.Strings;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import javax.annotation.Nullable;
+
+/**
+ * DataflowPipelineTranslator knows how to translate Pipeline objects
+ * into Dataflow API Jobs.
+ */
+public class DataflowPipelineTranslator {
+  // Must be kept in sync with their internal counterparts.
+  public static final String HARNESS_WORKER_POOL = "harness";
+  public static final String SHUFFLE_WORKER_POOL = "shuffle";
+  private static final Logger LOG = LoggerFactory.getLogger(DataflowPipelineTranslator.class);
+
+  /**
+   * A map from PTransform class to the corresponding
+   * TransformTranslator to use to translate that transform.
+   *
+   * <p> A static map that contains system-wide defaults.
+   */
+  private static Map<Class, TransformTranslator> transformTranslators =
+      new HashMap<>();
+
+  /** Provided configuration options. */
+  private final DataflowPipelineOptions options;
+
+  /**
+   * Constructs a translator from the provided options.
+   *
+   * @param options Properties which configure the translator.
+   *
+   * @return The newly created translator.
+   */
+  public static DataflowPipelineTranslator fromOptions(
+      DataflowPipelineOptions options) {
+    return new DataflowPipelineTranslator(options);
+  }
+
+  private DataflowPipelineTranslator(DataflowPipelineOptions options) {
+    this.options = options;
+  }
+
+  /**
+   * Translates a Pipeline into a Job
+   */
+  public Job translate(Pipeline pipeline, List<DataflowPackage> packages) {
+    Translator translator = new Translator(pipeline);
+    return translator.translate(packages);
+  }
+
+  public static String jobToString(Job job)
+      throws JsonProcessingException {
+    return new ObjectMapper().writerWithDefaultPrettyPrinter()
+        .writeValueAsString(job);
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Records that instances of the specified PTransform class
+   * should be translated by default by the corresponding
+   * TransformTranslator.
+   */
+  public static <PT extends PTransform> void registerTransformTranslator(
+      Class<PT> transformClass,
+      TransformTranslator<? extends PT> transformTranslator) {
+    if (transformTranslators.put(transformClass, transformTranslator) != null) {
+      throw new IllegalArgumentException(
+          "defining multiple translators for " + transformClass);
+    }
+  }
+
+  /**
+   * Returns the TransformTranslator to use for instances of the
+   * specified PTransform class, or null if none registered.
+   */
+  @SuppressWarnings("unchecked")
+  public <PT extends PTransform>
+      TransformTranslator<PT> getTransformTranslator(Class<PT> transformClass) {
+    return transformTranslators.get(transformClass);
+  }
+
+  /**
+   * An translator of a PTransform.
+   */
+  public interface TransformTranslator<PT extends PTransform> {
+    public void translate(PT transform,
+                          TranslationContext context);
+  }
+
+  /**
+   * The interface provided to registered callbacks for interacting
+   * with the DataflowPipelineRunner, including reading and writing the
+   * values of PCollections and side inputs ({@link PCollectionViews}).
+   */
+  public interface TranslationContext {
+    /**
+     * Returns the configured pipeline options.
+     */
+    DataflowPipelineOptions getPipelineOptions();
+
+    /**
+     * Adds a step to the Dataflow workflow for the given transform, with
+     * the given Dataflow step type.
+     * This step becomes "current" for the purpose of {@link #addInput} and
+     * {@link #addOutput}.
+     */
+    public void addStep(PTransform<?, ?> transform, String type);
+
+    /**
+     * Adds a pre-defined step to the Dataflow workflow. The given PTransform should be
+     * consistent with the Step, in terms of input, output and coder types.
+     *
+     * <p> This is a low-level operation, when using this method it is up to
+     * the caller to ensure that names do not collide.
+     */
+    public void addStep(PTransform<?, ? extends PValue> transform, Step step);
+
+    /**
+     * Sets the encoding for the current Dataflow step.
+     */
+    public void addEncodingInput(Coder<?> value);
+
+    /**
+     * Adds an input with the given name and value to the current
+     * Dataflow step.
+     */
+    public void addInput(String name, String value);
+
+    /**
+     * Adds an input with the given name and value to the current
+     * Dataflow step.
+     */
+    public void addInput(String name, Long value);
+
+    /**
+     * Adds an input with the given name to the previously added Dataflow
+     * step, coming from the specified input PValue.
+     */
+    public void addInput(String name, PInput value);
+
+    /**
+     * Adds an input with the given name and value to the current
+     * Dataflow step.
+     *
+     * <p> This applies any verification of paths required by the Dataflow
+     * service.
+     */
+    public void addInput(String name, GcsPath path);
+
+    /**
+     * Adds an input which is a dictionary of strings to objects.
+     */
+    public void addInput(String name, Map<String, Object> elements);
+
+    /**
+     * Adds an input which is a list of objects.
+     */
+    public void addInput(String name, List<? extends Map<String, Object>> elements);
+
+    /**
+     * Adds an output with the given name to the previously added
+     * Dataflow step, producing the specified output {@code PValue},
+     * including its {@code Coder} if a {@code TypedPValue}.  If the
+     * {@code PValue} is a {@code PCollection}, wraps its coder inside
+     * a {@code WindowedValueCoder}.
+     */
+    public void addOutput(String name, PValue value);
+
+    /**
+     * Adds an output with the given name to the previously added
+     * Dataflow step, producing the specified output {@code PValue},
+     * including its {@code Coder} if a {@code TypedPValue}.  If the
+     * {@code PValue} is a {@code PCollection}, wraps its coder inside
+     * a {@code ValueOnlyCoder}.
+     */
+    public void addValueOnlyOutput(String name, PValue value);
+
+    /**
+     * Adds an output with the given name to the previously added
+     * CollectionToSingleton Dataflow step, consuming the specified
+     * input {@code PValue} and producing the specified output
+     * {@code PValue}.  This step requires special treatment for its
+     * output encoding.
+     */
+    public void addCollectionToSingletonOutput(String name,
+                                               PValue inputValue,
+                                               PValue outputValue);
+
+    /**
+     * Encode a PValue reference as an output reference.
+     */
+    public OutputReference asOutputReference(PValue value);
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Translates a Pipeline into the Dataflow representation.
+   */
+  class Translator implements PipelineVisitor, TranslationContext {
+    /** The Pipeline to translate. */
+    private final Pipeline pipeline;
+
+    /** The Cloud Dataflow Job representation. */
+    private final Job job = new Job();
+
+    /**
+     * Translator is stateful, as addProperty calls refer to the current step.
+     */
+    private Step currentStep;
+
+    /**
+     * A Map from PTransforms to their unique Dataflow step names.
+     */
+    private final Map<PTransform, String> stepNames = new HashMap<>();
+
+    /**
+     * A Map from PValues to their output names used by their producer
+     * Dataflow steps.
+     */
+    private final Map<POutput, String> outputNames = new HashMap<>();
+
+    /**
+     * A Map from PValues to the Coders used for them.
+     */
+    private final Map<POutput, Coder<?>> outputCoders = new HashMap<>();
+
+    /**
+     * Constructs a Translator that will translate the specified
+     * Pipeline into Dataflow objects.
+     */
+    public Translator(Pipeline pipeline) {
+      this.pipeline = pipeline;
+    }
+
+    /**
+     * Translates this Translator's pipeline onto its writer.
+     * @return a Job definition filled in with the type of job, the environment,
+     * and the job steps.
+     */
+    public Job translate(List<DataflowPackage> packages) {
+      job.setName(options.getJobName().toLowerCase());
+
+      Environment environment = new Environment();
+      job.setEnvironment(environment);
+
+      WorkerPool workerPool = new WorkerPool();
+
+      workerPool.setKind(HARNESS_WORKER_POOL);
+
+      // Pass the URL and endpoint to use to the worker pool.
+      WorkerSettings workerSettings = new WorkerSettings();
+      workerSettings.setBaseUrl(options.getApiRootUrl());
+      workerSettings.setServicePath(options.getDataflowEndpoint());
+
+      TaskRunnerSettings taskRunnerSettings = new TaskRunnerSettings();
+      taskRunnerSettings.setParallelWorkerSettings(workerSettings);
+
+      workerPool.setTaskrunnerSettings(taskRunnerSettings);
+
+      WorkerPool shufflePool = new WorkerPool();
+      shufflePool.setKind(SHUFFLE_WORKER_POOL);
+
+      if (options.isStreaming()) {
+        job.setType("JOB_TYPE_STREAMING");
+      } else {
+        job.setType("JOB_TYPE_BATCH");
+      }
+
+      if (options.getWorkerMachineType() != null) {
+        workerPool.setMachineType(options.getWorkerMachineType());
+      }
+
+      workerPool.setPackages(packages);
+      workerPool.setNumWorkers(options.getNumWorkers());
+      shufflePool.setNumWorkers(options.getNumWorkers());
+      if (options.getDiskSourceImage() != null) {
+        workerPool.setDiskSourceImage(options.getDiskSourceImage());
+        shufflePool.setDiskSourceImage(options.getDiskSourceImage());
+      }
+
+      if (options.getMachineType() != null) {
+        workerPool.setMachineType(options.getMachineType());
+      }
+      if (options.isStreaming()) {
+        // Use separate data disk for streaming.
+        Disk disk = new Disk();
+        disk.setSizeGb(10);
+        disk.setDiskType(
+            // TODO: Fill in the project and zone.
+            "compute.googleapis.com/projects//zones//diskTypes/pd-standard");
+        // TODO: introduce a separate location for Windmill binary in the
+        // TaskRunner so it wouldn't interfere with the data disk mount point.
+        disk.setMountPoint("/windmill");
+        workerPool.setDataDisks(Collections.singletonList(disk));
+      }
+      if (!Strings.isNullOrEmpty(options.getZone())) {
+        workerPool.setZone(options.getZone());
+        shufflePool.setZone(options.getZone());
+      }
+      if (options.getDiskSizeGb() > 0) {
+        workerPool.setDiskSizeGb(options.getDiskSizeGb());
+        shufflePool.setDiskSizeGb(options.getDiskSizeGb());
+      }
+
+      // Set up any specific shuffle pool parameters
+      if (options.getShuffleNumWorkers() > 0) {
+        shufflePool.setNumWorkers(options.getShuffleNumWorkers());
+      }
+      if (options.getShuffleDiskSourceImage() != null) {
+        shufflePool.setDiskSourceImage(options.getShuffleDiskSourceImage());
+      }
+      if (!Strings.isNullOrEmpty(options.getShuffleZone())) {
+        shufflePool.setZone(options.getShuffleZone());
+      }
+      if (options.getShuffleDiskSizeGb() > 0) {
+        shufflePool.setDiskSizeGb(options.getShuffleDiskSizeGb());
+      }
+
+      List<WorkerPool> workerPools = new LinkedList<>();
+
+      workerPools.add(workerPool);
+      if (!options.isStreaming()) {
+        workerPools.add(shufflePool);
+      }
+      environment.setWorkerPools(workerPools);
+
+      pipeline.traverseTopologically(this);
+      return job;
+    }
+
+    @Override
+    public DataflowPipelineOptions getPipelineOptions() {
+      return options;
+    }
+
+    @Override
+    public void enterCompositeTransform(TransformTreeNode node) {
+    }
+
+    @Override
+    public void leaveCompositeTransform(TransformTreeNode node) {
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public void visitTransform(TransformTreeNode node) {
+      PTransform<?, ?> transform = node.getTransform();
+      TransformTranslator translator =
+          getTransformTranslator(transform.getClass());
+      if (translator == null) {
+        throw new IllegalStateException(
+            "no translator registered for " + transform);
+      }
+      LOG.debug("Translating {}", transform);
+      translator.translate(transform, this);
+    }
+
+    @Override
+    public void visitValue(PValue value, TransformTreeNode producer) {
+      LOG.debug("Checking translation of {}", value);
+      if (options.isStreaming()
+          && value instanceof PCollectionView) {
+        throw new UnsupportedOperationException(
+             "PCollectionViews are not supported in streaming Dataflow.");
+      }
+      if (value.getProducingTransformInternal() == null) {
+        throw new RuntimeException(
+            "internal error: expecting a PValue "
+            + "to have a producingTransform");
+      }
+      if (!producer.isCompositeNode()) {
+        // Primitive transforms are the only ones assigned step names.
+        asOutputReference(value);
+      }
+    }
+
+    @Override
+    public void addStep(PTransform<?, ?> transform, String type) {
+      String stepName = genStepName();
+      if (stepNames.put(transform, stepName) != null) {
+        throw new IllegalArgumentException(
+            transform + " already has a name specified");
+      }
+      // Start the next "steps" list item.
+      List<Step> steps = job.getSteps();
+      if (steps == null) {
+        steps = new LinkedList<>();
+        job.setSteps(steps);
+      }
+
+      currentStep = new Step();
+      currentStep.setName(stepName);
+      currentStep.setKind(type);
+      steps.add(currentStep);
+      addInput(PropertyNames.USER_NAME, pipeline.getFullName(transform));
+    }
+
+    @Override
+    public void addStep(PTransform<?, ? extends PValue> transform, Step original) {
+      Step step = original.clone();
+      String stepName = step.getName();
+      if (stepNames.put(transform, stepName) != null) {
+        throw new IllegalArgumentException(transform + " already has a name specified");
+      }
+
+      Map<String, Object> properties = step.getProperties();
+      if (properties != null) {
+        @Nullable List<Map<String, Object>> outputInfoList = null;
+        try {
+          // TODO: This should be done via a Structs accessor.
+          outputInfoList = (List<Map<String, Object>>) properties.get(PropertyNames.OUTPUT_INFO);
+        } catch (Exception e) {
+          throw new RuntimeException("Inconsistent dataflow pipeline translation", e);
+        }
+        if (outputInfoList != null && outputInfoList.size() > 0) {
+          Map<String, Object> firstOutputPort = outputInfoList.get(0);
+          @Nullable String name;
+          try {
+            name = getString(firstOutputPort, PropertyNames.OUTPUT_NAME);
+          } catch (Exception e) {
+            name = null;
+          }
+          if (name != null) {
+            registerOutputName(pipeline.getOutput(transform), name);
+          }
+        }
+      }
+
+      List<Step> steps = job.getSteps();
+      if (steps == null) {
+        steps = new LinkedList<>();
+        job.setSteps(steps);
+      }
+      currentStep = step;
+      steps.add(step);
+    }
+
+    @Override
+    public void addEncodingInput(Coder<?> coder) {
+      CloudObject encoding = SerializableUtils.ensureSerializable(coder);
+      addObject(getProperties(), PropertyNames.ENCODING, encoding);
+    }
+
+    @Override
+    public void addInput(String name, String value) {
+      addString(getProperties(), name, value);
+    }
+
+    @Override
+    public void addInput(String name, Long value) {
+      addLong(getProperties(), name, value);
+    }
+
+    @Override
+    public void addInput(String name, Map<String, Object> elements) {
+      addDictionary(getProperties(), name, elements);
+    }
+
+    @Override
+    public void addInput(String name, List<? extends Map<String, Object>> elements) {
+      addList(getProperties(), name, elements);
+    }
+
+    @Override
+    public void addInput(String name, PInput value) {
+      if (value instanceof PValue) {
+        addInput(name, asOutputReference((PValue) value));
+      } else {
+        throw new IllegalStateException("Input must be a PValue");
+      }
+    }
+
+    @Override
+    public void addInput(String name, GcsPath path) {
+      addInput(name, DataflowPipelineRunner.verifyGcsPath(path).toResourceName());
+    }
+
+    @Override
+    public void addOutput(String name, PValue value) {
+      Coder<?> coder;
+      if (value instanceof TypedPValue) {
+        coder = ((TypedPValue<?>) value).getCoder();
+        if (value instanceof PCollection) {
+          // Wrap the PCollection element Coder inside a WindowedValueCoder.
+          coder = WindowedValue.getFullCoder(
+              coder,
+              ((PCollection<?>) value).getWindowingFn().windowCoder());
+        }
+      } else {
+        // No output coder to encode.
+        coder = null;
+      }
+      addOutput(name, value, coder);
+    }
+
+    @Override
+    public void addValueOnlyOutput(String name, PValue value) {
+      Coder<?> coder;
+      if (value instanceof TypedPValue) {
+        coder = ((TypedPValue<?>) value).getCoder();
+        if (value instanceof PCollection) {
+          // Wrap the PCollection element Coder inside a ValueOnly
+          // WindowedValueCoder.
+          coder = WindowedValue.getValueOnlyCoder(coder);
+        }
+      } else {
+        // No output coder to encode.
+        coder = null;
+      }
+      addOutput(name, value, coder);
+    }
+
+    @Override
+    public void addCollectionToSingletonOutput(String name,
+                                               PValue inputValue,
+                                               PValue outputValue) {
+      Coder<?> inputValueCoder =
+          Preconditions.checkNotNull(outputCoders.get(inputValue));
+      // The inputValueCoder for the input PCollection should be some
+      // WindowedValueCoder of the input PCollection's element
+      // coder.
+      Preconditions.checkState(
+          inputValueCoder instanceof WindowedValue.WindowedValueCoder);
+      // The outputValueCoder for the output should be an
+      // IterableCoder of the inputValueCoder. This is a property
+      // of the backend "CollectionToSingleton" step.
+      Coder<?> outputValueCoder = IterableCoder.of(inputValueCoder);
+      addOutput(name, outputValue, outputValueCoder);
+    }
+
+    /**
+     * Adds an output with the given name to the previously added
+     * Dataflow step, producing the specified output {@code PValue}
+     * with the given {@code Coder} (if not {@code null}).
+     */
+    @SuppressWarnings("unchecked")
+    private void addOutput(String name, PValue value, Coder<?> valueCoder) {
+      registerOutputName(value, name);
+
+      Map<String, Object> properties = getProperties();
+      @Nullable List<Map<String, Object>> outputInfoList = null;
+      try {
+        // TODO: This should be done via a Structs accessor.
+        outputInfoList = (List<Map<String, Object>>) properties.get(PropertyNames.OUTPUT_INFO);
+      } catch (Exception e) {
+        throw new RuntimeException("Inconsistent dataflow pipeline translation", e);
+      }
+      if (outputInfoList == null) {
+        outputInfoList = new ArrayList<>();
+        // TODO: This should be done via a Structs accessor.
+        properties.put(PropertyNames.OUTPUT_INFO, outputInfoList);
+      }
+
+      Map<String, Object> outputInfo = new HashMap<>();
+      addString(outputInfo, PropertyNames.OUTPUT_NAME, name);
+      addString(outputInfo, PropertyNames.USER_NAME, value.getName());
+
+      if (valueCoder != null) {
+        // Verify that encoding can be decoded, in order to catch serialization
+        // failures as early as possible.
+        CloudObject encoding = SerializableUtils.ensureSerializable(valueCoder);
+        addObject(outputInfo, PropertyNames.ENCODING, encoding);
+        outputCoders.put(value, valueCoder);
+      }
+
+      outputInfoList.add(outputInfo);
+    }
+
+    @Override
+    public OutputReference asOutputReference(PValue value) {
+      PTransform<?, ?> transform =
+          value.getProducingTransformInternal();
+      String stepName = stepNames.get(transform);
+      if (stepName == null) {
+        throw new IllegalArgumentException(transform + " doesn't have a name specified");
+      }
+
+      String outputName = outputNames.get(value);
+      if (outputName == null) {
+        throw new IllegalArgumentException(
+            "output " + value + " doesn't have a name specified");
+      }
+
+      return new OutputReference(stepName, outputName);
+    }
+
+    private Map<String, Object> getProperties() {
+      Map<String, Object> properties = currentStep.getProperties();
+      if (properties == null) {
+        properties = new HashMap<>();
+        currentStep.setProperties(properties);
+      }
+      return properties;
+    }
+
+    /**
+     * Returns a fresh Dataflow step name.
+     */
+    private String genStepName() {
+      return "s" + (stepNames.size() + 1);
+    }
+
+    /**
+     * Records the name of the given output PValue,
+     * within its producing transform.
+     */
+    private void registerOutputName(POutput value, String name) {
+      if (outputNames.put(value, name) != null) {
+        throw new IllegalArgumentException(
+            "output " + value + " already has a name specified");
+      }
+    }
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  @Override
+  public String toString() {
+    return "DataflowPipelineTranslator#" + hashCode();
+  }
+
+
+  ///////////////////////////////////////////////////////////////////////////
+
+  static {
+    registerTransformTranslator(
+        View.CreatePCollectionView.class,
+        new TransformTranslator<View.CreatePCollectionView>() {
+          @Override
+          public void translate(
+              View.CreatePCollectionView transform,
+              TranslationContext context) {
+            translateTyped(transform, context);
+          }
+
+          private <R, T, WT> void translateTyped(
+              View.CreatePCollectionView<R, T, WT> transform,
+              TranslationContext context) {
+            context.addStep(transform, "CollectionToSingleton");
+            context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
+            context.addCollectionToSingletonOutput(
+                PropertyNames.OUTPUT,
+                transform.getInput(),
+                transform.getOutput());
+          }
+        });
+
+    DataflowPipelineTranslator.registerTransformTranslator(
+        Combine.GroupedValues.class,
+        new DataflowPipelineTranslator.TransformTranslator<Combine.GroupedValues>() {
+          @SuppressWarnings("unchecked")
+          @Override
+          public void translate(
+              Combine.GroupedValues transform,
+              DataflowPipelineTranslator.TranslationContext context) {
+            translateHelper(transform, context);
+          }
+
+          private <K, VI, VO> void translateHelper(
+              final Combine.GroupedValues<K, VI, VO> transform,
+              DataflowPipelineTranslator.TranslationContext context) {
+            context.addStep(transform, "CombineValues");
+            context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
+            context.addInput(
+                PropertyNames.SERIALIZED_FN,
+                byteArrayToJsonString(serializeToByteArray(transform.getFn())));
+            context.addEncodingInput(transform.getAccumulatorCoder());
+            context.addOutput(PropertyNames.OUTPUT, transform.getOutput());
+          }
+        });
+
+    registerTransformTranslator(
+        Create.class,
+        new TransformTranslator<Create>() {
+          @Override
+          public void translate(
+              Create transform,
+              TranslationContext context) {
+            createHelper(transform, context);
+          }
+
+          private <T> void createHelper(
+              Create<T> transform,
+              TranslationContext context) {
+            context.addStep(transform, "CreateCollection");
+
+            Coder<T> coder = transform.getOutput().getCoder();
+            List<CloudObject> elements = new LinkedList<>();
+            for (T elem : transform.getElements()) {
+              byte[] encodedBytes;
+              try {
+                encodedBytes = encodeToByteArray(coder, elem);
+              } catch (CoderException exn) {
+                // TODO: Put in better element printing:
+                // truncate if too long.
+                throw new IllegalArgumentException(
+                    "unable to encode element " + elem + " of " + transform
+                    + " using " + coder,
+                    exn);
+              }
+              String encodedJson = byteArrayToJsonString(encodedBytes);
+              assert Arrays.equals(encodedBytes,
+                                   jsonStringToByteArray(encodedJson));
+              elements.add(CloudObject.forString(encodedJson));
+            }
+            context.addInput(PropertyNames.ELEMENT, elements);
+            context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
+          }
+        });
+
+    registerTransformTranslator(
+        Flatten.FlattenPCollectionList.class,
+        new TransformTranslator<Flatten.FlattenPCollectionList>() {
+          @Override
+          public void translate(
+              Flatten.FlattenPCollectionList transform,
+              TranslationContext context) {
+            flattenHelper(transform, context);
+          }
+
+          private <T> void flattenHelper(
+              Flatten.FlattenPCollectionList<T> transform,
+              TranslationContext context) {
+            context.addStep(transform, "Flatten");
+
+            List<OutputReference> inputs = new LinkedList<>();
+            for (PCollection<T> input : transform.getInput().getAll()) {
+              inputs.add(context.asOutputReference(input));
+            }
+            context.addInput(PropertyNames.INPUTS, inputs);
+            context.addOutput(PropertyNames.OUTPUT, transform.getOutput());
+            // TODO: Need to specify orderedness.
+          }
+        });
+
+    registerTransformTranslator(
+        GroupByKeyOnly.class,
+        new TransformTranslator<GroupByKeyOnly>() {
+          @Override
+          public void translate(
+              GroupByKeyOnly transform,
+              TranslationContext context) {
+            groupByKeyHelper(transform, context);
+          }
+
+          private <K, V> void groupByKeyHelper(
+              GroupByKeyOnly<K, V> transform,
+              TranslationContext context) {
+            context.addStep(transform, "GroupByKey");
+            context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
+            context.addOutput(PropertyNames.OUTPUT, transform.getOutput());
+            // TODO: sortsValues
+          }
+        });
+
+    registerTransformTranslator(
+        ParDo.BoundMulti.class,
+        new TransformTranslator<ParDo.BoundMulti>() {
+          @Override
+          public void translate(
+              ParDo.BoundMulti transform,
+              TranslationContext context) {
+            translateMultiHelper(transform, context);
+          }
+
+          private <I, O> void translateMultiHelper(
+              ParDo.BoundMulti<I, O> transform,
+              TranslationContext context) {
+            context.addStep(transform, "ParallelDo");
+            translateInputs(transform.getInput(), transform.getSideInputs(), context);
+            translateFn(transform.getFn(), context);
+            translateOutputs(transform.getOutput(), context);
+          }
+        });
+
+    registerTransformTranslator(
+        ParDo.Bound.class,
+        new TransformTranslator<ParDo.Bound>() {
+          @Override
+          public void translate(
+              ParDo.Bound transform,
+              TranslationContext context) {
+            translateSingleHelper(transform, context);
+          }
+
+          private <I, O> void translateSingleHelper(
+              ParDo.Bound<I, O> transform,
+              TranslationContext context) {
+            context.addStep(transform, "ParallelDo");
+            translateInputs(transform.getInput(), transform.getSideInputs(), context);
+            translateFn(transform.getFn(), context);
+            context.addOutput("out", transform.getOutput());
+          }
+        });
+
+    ///////////////////////////////////////////////////////////////////////////
+    // IO Translation.
+
+    registerTransformTranslator(
+        AvroIO.Read.Bound.class, new AvroIOTranslator.ReadTranslator());
+    registerTransformTranslator(
+        AvroIO.Write.Bound.class, new AvroIOTranslator.WriteTranslator());
+
+    registerTransformTranslator(
+        BigQueryIO.Read.Bound.class, new BigQueryIOTranslator.ReadTranslator());
+    registerTransformTranslator(
+        BigQueryIO.Write.Bound.class, new BigQueryIOTranslator.WriteTranslator());
+
+    registerTransformTranslator(
+        DatastoreIO.Write.Bound.class, new DatastoreIOTranslator.WriteTranslator());
+
+    registerTransformTranslator(
+        PubsubIO.Read.Bound.class, new PubsubIOTranslator.ReadTranslator());
+    registerTransformTranslator(
+        PubsubIO.Write.Bound.class, new PubsubIOTranslator.WriteTranslator());
+
+    registerTransformTranslator(
+        TextIO.Read.Bound.class, new TextIOTranslator.ReadTranslator());
+    registerTransformTranslator(
+        TextIO.Write.Bound.class, new TextIOTranslator.WriteTranslator());
+  }
+
+  private static void translateInputs(
+      PCollection<?> input,
+      List<PCollectionView<?, ?>> sideInputs,
+      TranslationContext context) {
+    context.addInput(PropertyNames.PARALLEL_INPUT, input);
+    translateSideInputs(sideInputs, context);
+  }
+
+  // Used for ParDo
+  private static void translateSideInputs(
+      List<PCollectionView<?, ?>> sideInputs,
+      TranslationContext context) {
+    Map<String, Object> nonParInputs = new HashMap<>();
+
+    for (PCollectionView<?, ?> view : sideInputs) {
+      nonParInputs.put(
+          view.getTagInternal().getId(),
+          context.asOutputReference(view));
+    }
+
+    context.addInput(PropertyNames.NON_PARALLEL_INPUTS, nonParInputs);
+  }
+
+  private static void translateFn(
+      Serializable fn,
+      TranslationContext context) {
+    context.addInput(PropertyNames.USER_FN, fn.getClass().getName());
+    context.addInput(
+        PropertyNames.SERIALIZED_FN,
+        byteArrayToJsonString(serializeToByteArray(fn)));
+    if (fn instanceof DoFn.RequiresKeyedState) {
+      context.addInput(PropertyNames.USES_KEYED_STATE, "true");
+    }
+  }
+
+  private static void translateOutputs(
+      PCollectionTuple outputs,
+      TranslationContext context) {
+    for (Map.Entry<TupleTag<?>, PCollection<?>> entry
+             : outputs.getAll().entrySet()) {
+      TupleTag<?> tag = entry.getKey();
+      PCollection<?> output = entry.getValue();
+      context.addOutput(tag.getId(), output);
+      // TODO: Need to specify orderedness.
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipeline.java
new file mode 100644
index 0000000000000..e3cd18ecfda3b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipeline.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
+
+/**
+ * A DirectPipeline, which returns a
+ * {@link DirectPipelineRunner.EvaluationResults} subclass of PipelineResult
+ * from {@link com.google.cloud.dataflow.sdk.Pipeline#run()}.
+ */
+public class DirectPipeline extends Pipeline {
+
+  /**
+   * Creates and returns a new DirectPipeline instance for tests.
+   */
+  public static DirectPipeline createForTest() {
+    DirectPipelineRunner runner = DirectPipelineRunner.createForTest();
+    return new DirectPipeline(runner, runner.getPipelineOptions());
+  }
+
+  private DirectPipeline(DirectPipelineRunner runner, DirectPipelineOptions options) {
+    super(runner, options);
+  }
+
+  @Override
+  public DirectPipelineRunner.EvaluationResults run() {
+    return (DirectPipelineRunner.EvaluationResults) super.run();
+  }
+
+  @Override
+  public DirectPipelineRunner getRunner() {
+    return (DirectPipelineRunner) super.getRunner();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
new file mode 100644
index 0000000000000..a19b2055a0b99
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -0,0 +1,844 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.ListCoder;
+import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.util.TestCredential;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.cloud.dataflow.sdk.values.POutput;
+import com.google.cloud.dataflow.sdk.values.PValue;
+import com.google.cloud.dataflow.sdk.values.TypedPValue;
+import com.google.common.base.Function;
+import com.google.common.collect.Lists;
+
+import org.joda.time.Instant;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+/**
+ * Executes the operations in the pipeline directly, in this process, without
+ * any optimization.  Useful for small local execution and tests.
+ *
+ * <p> Throws an exception from {@link #run} if execution fails.
+ */
+public class DirectPipelineRunner
+    extends PipelineRunner<DirectPipelineRunner.EvaluationResults> {
+  private static final Logger LOG = LoggerFactory.getLogger(DirectPipelineRunner.class);
+
+  /**
+   * A map from PTransform class to the corresponding
+   * TransformEvaluator to use to evaluate that transform.
+   *
+   * <p> A static map that contains system-wide defaults.
+   */
+  private static Map<Class, TransformEvaluator> defaultTransformEvaluators =
+      new HashMap<>();
+
+  /**
+   * A map from PTransform class to the corresponding
+   * TransformEvaluator to use to evaluate that transform.
+   *
+   * <p> An instance map that contains bindings for this DirectPipelineRunner.
+   * Bindings in this map override those in the default map.
+   */
+  private Map<Class, TransformEvaluator> localTransformEvaluators =
+      new HashMap<>();
+
+  /**
+   * Records that instances of the specified PTransform class
+   * should be evaluated by default by the corresponding
+   * TransformEvaluator.
+   */
+  public static <PT extends PTransform<?, ?>>
+  void registerDefaultTransformEvaluator(
+      Class<PT> transformClass,
+      TransformEvaluator<PT> transformEvaluator) {
+    if (defaultTransformEvaluators.put(transformClass, transformEvaluator)
+        != null) {
+      throw new IllegalArgumentException(
+          "defining multiple evaluators for " + transformClass);
+    }
+  }
+
+  /**
+   * Records that instances of the specified PTransform class
+   * should be evaluated by the corresponding TransformEvaluator.
+   * Overrides any bindings specified by
+   * {@link #registerDefaultTransformEvaluator}.
+   */
+  public <PT extends PTransform<?, ?>>
+  void registerTransformEvaluator(
+      Class<PT> transformClass,
+      TransformEvaluator<PT> transformEvaluator) {
+    if (localTransformEvaluators.put(transformClass, transformEvaluator)
+        != null) {
+      throw new IllegalArgumentException(
+          "defining multiple evaluators for " + transformClass);
+    }
+  }
+
+  /**
+   * Returns the TransformEvaluator to use for instances of the
+   * specified PTransform class, or null if none registered.
+   */
+  @SuppressWarnings("unchecked")
+  public <PT extends PTransform<?, ?>>
+      TransformEvaluator<PT> getTransformEvaluator(Class<PT> transformClass) {
+    TransformEvaluator<PT> transformEvaluator =
+        localTransformEvaluators.get(transformClass);
+    if (transformEvaluator == null) {
+      transformEvaluator = defaultTransformEvaluators.get(transformClass);
+    }
+    return transformEvaluator;
+  }
+
+  /**
+   * Constructs a DirectPipelineRunner from the given options.
+   */
+  public static DirectPipelineRunner fromOptions(PipelineOptions options) {
+    DirectPipelineOptions directOptions =
+        PipelineOptionsValidator.validate(DirectPipelineOptions.class, options);
+    LOG.debug("Creating DirectPipelineRunner");
+    return new DirectPipelineRunner(directOptions);
+  }
+
+  /**
+   * Constructs a runner with default properties for testing.
+   *
+   * @return The newly created runner.
+   */
+  public static DirectPipelineRunner createForTest() {
+    DirectPipelineOptions options = PipelineOptionsFactory.as(DirectPipelineOptions.class);
+    options.setGcpCredential(new TestCredential());
+    return new DirectPipelineRunner(options);
+  }
+
+  /**
+   * Enable runtime testing to verify that all functions and {@link Coder}
+   * instances can be serialized.
+   *
+   * <p> Enabled by default.
+   *
+   * <p> This method modifies the {@code DirectPipelineRunner} instance and
+   * returns itself.
+   */
+  public DirectPipelineRunner withSerializabilityTesting(boolean enable) {
+    this.testSerializability = enable;
+    return this;
+  }
+
+  /**
+   * Enable runtime testing to verify that all values can be encoded.
+   *
+   * <p> Enabled by default.
+   *
+   * <p> This method modifies the {@code DirectPipelineRunner} instance and
+   * returns itself.
+   */
+  public DirectPipelineRunner withEncodabilityTesting(boolean enable) {
+    this.testEncodability = enable;
+    return this;
+  }
+
+  /**
+   * Enable runtime testing to verify that functions do not depend on order
+   * of the elements.
+   *
+   * <p> This is accomplished by randomizing the order of elements.
+   *
+   * <p> Enabled by default.
+   *
+   * <p> This method modifies the {@code DirectPipelineRunner} instance and
+   * returns itself.
+   */
+  public DirectPipelineRunner withUnorderednessTesting(boolean enable) {
+    this.testUnorderedness = enable;
+    return this;
+  }
+
+  @Override
+  @SuppressWarnings("unchecked")
+  public <Output extends POutput, Input extends PInput> Output apply(
+      PTransform<Input, Output> transform, Input input) {
+    if (transform instanceof Combine.GroupedValues) {
+      return (Output) applyTestCombine((Combine.GroupedValues) transform, (PCollection) input);
+    } else {
+      return super.apply(transform, input);
+    }
+  }
+
+  private <K, VI, VA, VO> PCollection<KV<K, VO>> applyTestCombine(
+      Combine.GroupedValues<K, VI, VO> transform,
+      PCollection<KV<K, Iterable<VI>>> input) {
+    return input.apply(ParDo.of(TestCombineDoFn.create(transform, testSerializability)))
+                .setCoder(transform.getDefaultOutputCoder());
+  }
+
+  /**
+   * The implementation may split the KeyedCombineFn into ADD, MERGE
+   * and EXTRACT phases (see CombineValuesFn). In order to emulate
+   * this for the DirectPipelineRunner and provide an experience
+   * closer to the service, go through heavy seralizability checks for
+   * the equivalent of the results of the ADD phase, but after the
+   * GroupByKey shuffle, and the MERGE phase. Doing these checks
+   * ensure that not only is the accumulator coder serializable, but
+   * the accumulator coder can actually serialize the data in
+   * question.
+   */
+  // @VisibleForTesting
+  public static class TestCombineDoFn<K, VI, VA, VO>
+      extends DoFn<KV<K, Iterable<VI>>, KV<K, VO>> {
+    private final KeyedCombineFn<? super K, ? super VI, VA, VO> fn;
+    private final Coder<VA> accumCoder;
+    private final boolean testSerializability;
+
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    public static <K, VI, VA, VO> TestCombineDoFn<K, VI, VA, VO> create(
+        Combine.GroupedValues<K, VI, VO> transform,
+        boolean testSerializability) {
+      return new TestCombineDoFn(
+          transform.getFn(), transform.getAccumulatorCoder(), testSerializability);
+    }
+
+    public TestCombineDoFn(
+        KeyedCombineFn<? super K, ? super VI, VA, VO> fn,
+        Coder<VA> accumCoder,
+        boolean testSerializability) {
+      this.fn = fn;
+      this.accumCoder = accumCoder;
+      this.testSerializability = testSerializability;
+    }
+
+    @Override
+    public void processElement(ProcessContext c) throws Exception {
+      K key = c.element().getKey();
+      Iterable<VI> values = c.element().getValue();
+      List<VA> groupedPostShuffle =
+          ensureSerializableByCoder(ListCoder.of(accumCoder),
+              addInputsRandomly(fn, key, values, new Random()),
+              "After addInputs of KeyedCombineFn " + fn.toString());
+      VA merged =
+          ensureSerializableByCoder(accumCoder,
+              fn.mergeAccumulators(key, groupedPostShuffle),
+              "After mergeAccumulators of KeyedCombineFn " + fn.toString());
+      // Note: The serializability of KV<K, VO> is ensured by the
+      // runner itself, since it's a transform output.
+      c.output(KV.of(key, fn.extractOutput(key, merged)));
+    }
+
+    // Create a random list of accumulators from the given list of values
+    // @VisibleForTesting
+    public static <K, VA, VI> List<VA> addInputsRandomly(
+        KeyedCombineFn<? super K, ? super VI, VA, ?> fn,
+        K key,
+        Iterable<VI> values,
+        Random random) {
+      List<VA> out = new ArrayList<VA>();
+      int i = 0;
+      VA accumulator = fn.createAccumulator(key);
+      boolean hasInput = false;
+
+      for (VI value : values) {
+        fn.addInput(key, accumulator, value);
+        hasInput = true;
+
+        // For each index i, flip a 1/2^i weighted coin for whether to
+        // create a new accumulator after index i is added, i.e. [0]
+        // is guaranteed, [1] is an even 1/2, [2] is 1/4, etc. The
+        // goal is to partition the inputs into accumulators, and make
+        // the accumulators potentially lumpy.
+        if (i == 0 || random.nextInt(1 << Math.min(i, 30)) == 0) {
+          out.add(accumulator);
+          accumulator = fn.createAccumulator(key);
+          hasInput = false;
+        }
+        i++;
+      }
+      if (hasInput) {
+        out.add(accumulator);
+      }
+
+      Collections.shuffle(out, random);
+      return out;
+    }
+
+    public <T> T ensureSerializableByCoder(
+        Coder<T> coder, T value, String errorContext) {
+      if (testSerializability) {
+        return SerializableUtils.ensureSerializableByCoder(
+            coder, value, errorContext);
+      }
+      return value;
+    }
+  }
+
+  @Override
+  public EvaluationResults run(Pipeline pipeline) {
+    Evaluator evaluator = new Evaluator();
+    evaluator.run(pipeline);
+
+    // Log all counter values for debugging purposes.
+    for (Counter counter : evaluator.getCounters()) {
+      LOG.debug("Final aggregator value: {}", counter);
+    }
+
+    return evaluator;
+  }
+
+  /**
+   * An evaluator of a PTransform.
+   */
+  public interface TransformEvaluator<PT extends PTransform> {
+    public void evaluate(PT transform,
+                         EvaluationContext context);
+  }
+
+  /**
+   * The interface provided to registered callbacks for interacting
+   * with the {@code DirectPipelineRunner}, including reading and writing the
+   * values of {@link PCollection}s and {@link PCollectionView}s.
+   */
+  public interface EvaluationResults extends PipelineResult {
+    /**
+     * Retrieves the value of the given PCollection.
+     * Throws an exception if the PCollection's value hasn't already been set.
+     */
+    <T> List<T> getPCollection(PCollection<T> pc);
+
+    /**
+     * Retrieves the windowed value of the given PCollection.
+     * Throws an exception if the PCollection's value hasn't already been set.
+     */
+    <T> List<WindowedValue<T>> getPCollectionWindowedValues(PCollection<T> pc);
+
+    /**
+     * Retrieves the values of each PCollection in the given
+     * PCollectionList. Throws an exception if the PCollectionList's
+     * value hasn't already been set.
+     */
+    <T> List<List<T>> getPCollectionList(PCollectionList<T> pcs);
+
+    /**
+     * Retrieves the values indicated by the given {@link PCollectionView}.
+     * Note that within the {@link DoFnContext} a {@link PCollectionView}
+     * converts from this representation to a suitable side input value.
+     */
+    <T, WT> Iterable<WindowedValue<?>> getPCollectionView(PCollectionView<T, WT> view);
+  }
+
+  /**
+   * An immutable (value, timestamp) pair, along with other metadata necessary
+   * for the implementation of {@code DirectPipelineRunner}.
+   */
+  public static class ValueWithMetadata<V> {
+    /**
+     * Returns a new {@code ValueWithMetadata} with the {@code WindowedValue}.
+     * Key is null.
+     */
+    public static <V> ValueWithMetadata<V> of(WindowedValue<V> windowedValue) {
+      return new ValueWithMetadata<>(windowedValue, null);
+    }
+
+    /**
+     * Returns a new {@code ValueWithMetadata} with the implicit key associated
+     * with this value set.  The key is the last key grouped by in the chain of
+     * productions that produced this element.
+     * These keys are used internally by {@link DirectPipelineRunner} for keeping
+     * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState} separate
+     * across keys.
+     */
+    public ValueWithMetadata<V> withKey(Object key) {
+      return new ValueWithMetadata<>(windowedValue, key);
+    }
+
+    /**
+     * Returns a new {@code ValueWithMetadata} that is a copy of this one, but with
+     * a different value.
+     */
+    public <T> ValueWithMetadata<T> withValue(T value) {
+      return new ValueWithMetadata(windowedValue.withValue(value), getKey());
+    }
+
+    /**
+     * Returns the {@code WindowedValue} associated with this element.
+     */
+    public WindowedValue<V> getWindowedValue() {
+      return windowedValue;
+    }
+
+    /**
+     * Returns the value associated with this element.
+     *
+     * @see #withValue
+     */
+    public V getValue() {
+      return windowedValue.getValue();
+    }
+
+    /**
+     * Returns the timestamp associated with this element.
+     */
+    public Instant getTimestamp() {
+      return windowedValue.getTimestamp();
+    }
+
+    /**
+     * Returns the collection of windows this element has been placed into.  May
+     * be null if the {@code PCollection} this element is in has not yet been
+     * windowed.
+     *
+     * @see #getWindows()
+     */
+    public Collection<? extends BoundedWindow> getWindows() {
+      return windowedValue.getWindows();
+    }
+
+
+    /**
+     * Returns the key associated with this element.  May be null if the
+     * {@code PCollection} this element is in is not keyed.
+     *
+     * @see #withKey
+     */
+    public Object getKey() {
+      return key;
+    }
+
+    ////////////////////////////////////////////////////////////////////////////
+
+  private final Object key;
+    private final WindowedValue<V> windowedValue;
+
+    private ValueWithMetadata(WindowedValue<V> windowedValue,
+                              Object key) {
+      this.windowedValue = windowedValue;
+      this.key = key;
+    }
+  }
+
+  /**
+   * The interface provided to registered callbacks for interacting
+   * with the {@code DirectPipelineRunner}, including reading and writing the
+   * values of {@link PCollection}s and {@link PCollectionView}s.
+   */
+  public interface EvaluationContext extends EvaluationResults {
+    /**
+     * Returns the configured pipeline options.
+     */
+    DirectPipelineOptions getPipelineOptions();
+
+    /**
+     * Sets the value of the given PCollection, where each element also has a timestamp
+     * and collection of windows.
+     * Throws an exception if the PCollection's value has already been set.
+     */
+    <T> void setPCollectionValuesWithMetadata(
+        PCollection<T> pc, List<ValueWithMetadata<T>> elements);
+
+    /**
+     * Shorthand for setting the value of a PCollection where the elements do not have
+     * timestamps or windows.
+     * Throws an exception if the PCollection's value has already been set.
+     */
+    <T> void setPCollection(PCollection<T> pc, List<T> elements);
+
+    /**
+     * Retrieves the value of the given PCollection, along with element metadata
+     * such as timestamps and windows.
+     * Throws an exception if the PCollection's value hasn't already been set.
+     */
+    <T> List<ValueWithMetadata<T>> getPCollectionValuesWithMetadata(PCollection<T> pc);
+
+    /**
+     * Sets the value associated with the given {@link PCollectionView}.
+     * Throws an exception if the {@link PCollectionView}'s value has already been set.
+     */
+    <R, T, WT> void setPCollectionView(
+        PCollectionView<T, WT> pc,
+        Iterable<WindowedValue<R>> value);
+
+    /**
+     * Ensures that the element is encodable and decodable using the
+     * TypePValue's coder, by encoding it and decoding it, and
+     * returning the result.
+     */
+    <T> T ensureElementEncodable(TypedPValue<T> pvalue, T element);
+
+    /**
+     * If the evaluation context is testing unorderedness and
+     * !isOrdered, randomly permutes the order of the elements, in a
+     * copy if !inPlaceAllowed, and returns the permuted list,
+     * otherwise returns the argument unchanged.
+     */
+    <T> List<T> randomizeIfUnordered(boolean isOrdered,
+                                     List<T> elements,
+                                     boolean inPlaceAllowed);
+
+    /**
+     * If the evaluation context is testing serializability, ensures
+     * that the argument function is serializable and deserializable
+     * by encoding it and then decoding it, and returning the result.
+     * Otherwise returns the argument unchanged.
+     */
+    <Fn extends Serializable> Fn ensureSerializable(Fn fn);
+
+    /**
+     * If the evaluation context is testing serializability, ensures
+     * that the argument Coder is serializable and deserializable
+     * by encoding it and then decoding it, and returning the result.
+     * Otherwise returns the argument unchanged.
+     */
+    <T> Coder<T> ensureCoderSerializable(Coder<T> coder);
+
+    /**
+     * If the evaluation context is testing serializability, ensures
+     * that the given data is serializable and deserializable with the
+     * given Coder by encoding it and then decoding it, and returning
+     * the result. Otherwise returns the argument unchanged.
+     *
+     * <p> Error context is prefixed to any thrown exceptions.
+     */
+    <T> T ensureSerializableByCoder(Coder<T> coder,
+                                    T data, String errorContext);
+
+    /**
+     * Returns a mutator, which can be used to add additional counters to
+     * this EvaluationContext.
+     */
+    CounterSet.AddCounterMutator getAddCounterMutator();
+
+    /**
+     * Gets the step name for this transform.
+     */
+    public String getStepName(PTransform<?, ?> transform);
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  class Evaluator implements PipelineVisitor, EvaluationContext {
+    private final Map<PTransform, String> stepNames = new HashMap<>();
+    private final Map<PValue, Object> store = new HashMap<>();
+    private final CounterSet counters = new CounterSet();
+
+    // Use a random number generator with a fixed seed, so execution
+    // using this evaluator is deterministic.  (If the user-defined
+    // functions, transforms, and coders are deterministic.)
+    Random rand = new Random(0);
+
+    public Evaluator() {}
+
+    public void run(Pipeline pipeline) {
+      pipeline.traverseTopologically(this);
+    }
+
+    @Override
+    public DirectPipelineOptions getPipelineOptions() {
+      return options;
+    }
+
+    @Override
+    public void enterCompositeTransform(TransformTreeNode node) {
+    }
+
+    @Override
+    public void leaveCompositeTransform(TransformTreeNode node) {
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public void visitTransform(TransformTreeNode node) {
+      PTransform<?, ?> transform = node.getTransform();
+      TransformEvaluator evaluator =
+          getTransformEvaluator(transform.getClass());
+      if (evaluator == null) {
+        throw new IllegalStateException(
+            "no evaluator registered for " + transform);
+      }
+      LOG.debug("Evaluating {}", transform);
+      evaluator.evaluate(transform, this);
+    }
+
+    @Override
+    public void visitValue(PValue value, TransformTreeNode producer) {
+      LOG.debug("Checking evaluation of {}", value);
+      if (value.getProducingTransformInternal() == null) {
+        throw new RuntimeException(
+            "internal error: expecting a PValue " +
+            "to have a producingTransform");
+      }
+      if (!producer.isCompositeNode()) {
+        // Verify that primitive transform outputs are already computed.
+        getPValue(value);
+      }
+    }
+
+    /**
+     * Sets the value of the given PValue.
+     * Throws an exception if the PValue's value has already been set.
+     */
+    void setPValue(PValue pvalue, Object contents) {
+      if (store.containsKey(pvalue)) {
+        throw new IllegalStateException(
+            "internal error: setting the value of " + pvalue +
+            " more than once");
+      }
+      store.put(pvalue, contents);
+    }
+
+    /**
+     * Retrieves the value of the given PValue.
+     * Throws an exception if the PValue's value hasn't already been set.
+     */
+    Object getPValue(PValue pvalue) {
+      if (!store.containsKey(pvalue)) {
+        throw new IllegalStateException(
+            "internal error: getting the value of " + pvalue +
+            " before it has been computed");
+      }
+      return store.get(pvalue);
+    }
+
+    /**
+     * Convert a list of T to a list of {@code ValueWithMetadata<T>}, with a timestamp of 0
+     * and null windows.
+     */
+    <T> List<ValueWithMetadata<T>> toValuesWithMetadata(List<T> values) {
+      List<ValueWithMetadata<T>> result = new ArrayList<>(values.size());
+      for (T value : values) {
+        result.add(ValueWithMetadata.of(WindowedValue.valueInGlobalWindow(value)));
+      }
+      return result;
+    }
+
+    @Override
+    public <T> void setPCollection(PCollection<T> pc, List<T> elements) {
+      setPCollectionValuesWithMetadata(pc, toValuesWithMetadata(elements));
+    }
+
+    @Override
+    public <T> void setPCollectionValuesWithMetadata(
+        PCollection<T> pc, List<ValueWithMetadata<T>> elements) {
+      LOG.debug("Setting {} = {}", pc, elements);
+      setPValue(pc, ensurePCollectionEncodable(pc, elements));
+    }
+
+    @Override
+    public <R, T, WT> void setPCollectionView(
+        PCollectionView<T, WT> view,
+        Iterable<WindowedValue<R>> value) {
+      LOG.debug("Setting {} = {}", view, value);
+      setPValue(view, value);
+    }
+
+    /**
+     * Retrieves the value of the given PCollection.
+     * Throws an exception if the PCollection's value hasn't already been set.
+     */
+    @Override
+    public <T> List<T> getPCollection(PCollection<T> pc) {
+      List<T> result = new ArrayList<>();
+      for (ValueWithMetadata<T> elem : getPCollectionValuesWithMetadata(pc)) {
+        result.add(elem.getValue());
+      }
+      return result;
+    }
+
+    @Override
+    public <T> List<WindowedValue<T>> getPCollectionWindowedValues(PCollection<T> pc) {
+      return Lists.transform(
+          getPCollectionValuesWithMetadata(pc),
+          new Function<ValueWithMetadata<T>, WindowedValue<T>>() {
+            @Override
+            public WindowedValue<T> apply(ValueWithMetadata<T> input) {
+              return input.getWindowedValue();
+            }});
+    }
+
+    @Override
+    public <T> List<ValueWithMetadata<T>> getPCollectionValuesWithMetadata(PCollection<T> pc) {
+      @SuppressWarnings("unchecked")
+      List<ValueWithMetadata<T>> elements = (List<ValueWithMetadata<T>>) getPValue(pc);
+      elements = randomizeIfUnordered(
+          pc.isOrdered(), elements, false /* not inPlaceAllowed */);
+      LOG.debug("Getting {} = {}", pc, elements);
+      return elements;
+    }
+
+    @Override
+    public <T> List<List<T>> getPCollectionList(PCollectionList<T> pcs) {
+      List<List<T>> elementsList = new ArrayList<>();
+      for (PCollection<T> pc : pcs.getAll()) {
+        elementsList.add(getPCollection(pc));
+      }
+      return elementsList;
+    }
+
+    /**
+     * Retrieves the value indicated by the given {@link PCollectionView}.
+     * Note that within the {@link DoFnContext} a {@link PCollectionView}
+     * converts from this representation to a suitable side input value.
+     */
+    @Override
+    public <T, WT> Iterable<WindowedValue<?>> getPCollectionView(PCollectionView<T, WT> view) {
+      @SuppressWarnings("unchecked")
+      Iterable<WindowedValue<?>> value = (Iterable<WindowedValue<?>>) getPValue(view);
+      LOG.debug("Getting {} = {}", view, value);
+      return value;
+    }
+
+    /**
+     * If testEncodability, ensures that the PCollection's coder and elements
+     * are encodable and decodable by encoding them and decoding them,
+     * and returning the result.  Otherwise returns the argument elements.
+     */
+    <T> List<ValueWithMetadata<T>> ensurePCollectionEncodable(
+        PCollection<T> pc, List<ValueWithMetadata<T>> elements) {
+      ensureCoderSerializable(pc.getCoder());
+      if (!testEncodability) {
+        return elements;
+      }
+      List<ValueWithMetadata<T>> elementsCopy = new ArrayList<>(elements.size());
+      for (ValueWithMetadata<T> element : elements) {
+        elementsCopy.add(
+            element.withValue(ensureElementEncodable(pc, element.getValue())));
+      }
+      return elementsCopy;
+    }
+
+    @Override
+    public <T> T ensureElementEncodable(TypedPValue<T> pvalue, T element) {
+      return ensureSerializableByCoder(
+          pvalue.getCoder(), element, "Within " + pvalue.toString());
+    }
+
+    @Override
+    public <T> List<T> randomizeIfUnordered(boolean isOrdered,
+                                            List<T> elements,
+                                            boolean inPlaceAllowed) {
+      if (!testUnorderedness || isOrdered) {
+        return elements;
+      }
+      List<T> elementsCopy = new ArrayList<>(elements);
+      Collections.shuffle(elementsCopy, rand);
+      return elementsCopy;
+    }
+
+    @Override
+    public <Fn extends Serializable> Fn ensureSerializable(Fn fn) {
+      if (!testSerializability) {
+        return fn;
+      }
+      return SerializableUtils.ensureSerializable(fn);
+    }
+
+    @Override
+    public <T> Coder<T> ensureCoderSerializable(Coder<T> coder) {
+      if (testSerializability) {
+        SerializableUtils.ensureSerializable(coder);
+      }
+      return coder;
+    }
+
+    @Override
+    public <T> T ensureSerializableByCoder(
+        Coder<T> coder, T value, String errorContext) {
+      if (testSerializability) {
+        return SerializableUtils.ensureSerializableByCoder(
+            coder, value, errorContext);
+      }
+      return value;
+    }
+
+    @Override
+    public CounterSet.AddCounterMutator getAddCounterMutator() {
+      return counters.getAddCounterMutator();
+    }
+
+    @Override
+    public String getStepName(PTransform<?, ?> transform) {
+      String stepName = stepNames.get(transform);
+      if (stepName == null) {
+        stepName = "s" + (stepNames.size() + 1);
+        stepNames.put(transform, stepName);
+      }
+      return stepName;
+    }
+
+    /**
+     * Returns the CounterSet generated during evaluation, which includes
+     * user-defined Aggregators and may include system-defined counters.
+     */
+    public CounterSet getCounters() {
+      return counters;
+    }
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private final DirectPipelineOptions options;
+  private boolean testSerializability = true;
+  private boolean testEncodability = true;
+  private boolean testUnorderedness = true;
+
+  /** Returns a new DirectPipelineRunner. */
+  private DirectPipelineRunner(DirectPipelineOptions options) {
+    this.options = options;
+    // (Re-)register standard IO factories. Clobbers any prior credentials.
+    IOChannelUtils.registerStandardIOFactories(options);
+  }
+
+  public DirectPipelineOptions getPipelineOptions() {
+    return options;
+  }
+
+  @Override
+  public String toString() { return "DirectPipelineRunner#" + hashCode(); }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java
new file mode 100644
index 0000000000000..8b134e98601ce
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.options.GcsOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
+import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.cloud.dataflow.sdk.values.POutput;
+import com.google.common.base.Preconditions;
+
+/**
+ * A PipelineRunner can execute, translate, or otherwise process a
+ * Pipeline.
+ *
+ * @param <Results> the type of the result of {@link #run}.
+ */
+public abstract class PipelineRunner<Results extends PipelineResult> {
+
+  /**
+   * Constructs a runner from the provided options.
+   *
+   * @return The newly created runner.
+   */
+  public static PipelineRunner<? extends PipelineResult> fromOptions(PipelineOptions options) {
+    GcsOptions gcsOptions = PipelineOptionsValidator.validate(GcsOptions.class, options);
+    Preconditions.checkNotNull(options);
+
+    // (Re-)register standard IO factories. Clobbers any prior credentials.
+    IOChannelUtils.registerStandardIOFactories(gcsOptions);
+
+    @SuppressWarnings("unchecked")
+    PipelineRunner<? extends PipelineResult> result =
+        InstanceBuilder.ofType(PipelineRunner.class)
+        .fromClass(options.getRunner())
+        .fromFactoryMethod("fromOptions")
+        .withArg(PipelineOptions.class, options)
+        .build();
+    return result;
+  }
+
+  /**
+   * Processes the given Pipeline, returning the results.
+   */
+  public abstract Results run(Pipeline pipeline);
+
+  /**
+   * Applies a transform to the given input, returning the output.
+   *
+   * <p> The default implementation calls PTransform.apply(input), but can be overridden
+   * to customize behavior for a particular runner.
+   */
+  public <Output extends POutput, Input extends PInput> Output apply(
+      PTransform<Input, Output> transform, Input input) {
+    return transform.apply(input);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java
new file mode 100644
index 0000000000000..cb1850d654bf4
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.values.PValue;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Provides a simple PipelineVisitor which records the transformation tree.
+ *
+ * <p> Provided for internal unit tests.
+ */
+public class RecordingPipelineVisitor implements Pipeline.PipelineVisitor {
+
+  public final List<PTransform<?, ?>> transforms = new ArrayList<>();
+  public final List<PValue> values = new ArrayList<>();
+
+  @Override
+  public void enterCompositeTransform(TransformTreeNode node) {
+  }
+
+  @Override
+  public void leaveCompositeTransform(TransformTreeNode node) {
+  }
+
+  @Override
+  public void visitTransform(TransformTreeNode node) {
+    transforms.add(node.getTransform());
+  }
+
+  @Override
+  public void visitValue(PValue value, TransformTreeNode producer) {
+    values.add(value);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java
new file mode 100644
index 0000000000000..53a90b2b80121
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.cloud.dataflow.sdk.values.POutput;
+import com.google.cloud.dataflow.sdk.values.PValue;
+import com.google.common.base.Preconditions;
+
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Captures information about a collection of transformations and their
+ * associated PValues.
+ */
+public class TransformHierarchy {
+  private final Deque<TransformTreeNode> transformStack = new LinkedList<>();
+  private final Map<PInput, TransformTreeNode> producingTransformNode = new HashMap<>();
+  private final Map<PTransform<?, ?>, TransformTreeNode> transformToNode = new HashMap<>();
+
+  public TransformHierarchy() {
+    // First element in the stack is the root node, holding all child nodes.
+    transformStack.add(new TransformTreeNode(null, null, "", null));
+  }
+
+  /**
+   * Returns the last TransformTreeNode on the stack.
+   */
+  public TransformTreeNode getCurrent() {
+    return transformStack.peek();
+  }
+
+  /**
+   * Add a TransformTreeNode to the stack.
+   */
+  public void pushNode(TransformTreeNode current) {
+    transformStack.push(current);
+    transformToNode.put(current.getTransform(), current);
+  }
+
+  /**
+   * Removes the last TransformTreeNode from the stack.
+   */
+  public void popNode() {
+    transformStack.pop();
+    Preconditions.checkState(!transformStack.isEmpty());
+  }
+
+  /**
+   * Adds an input to the given node.
+   *
+   * <p> This forces the producing node to be finished.
+   */
+  public void addInput(TransformTreeNode node, PInput input) {
+    for (PValue i : input.expand()) {
+      TransformTreeNode producer = producingTransformNode.get(i);
+      if (producer == null) {
+        throw new IllegalStateException("Producer unknown for input: " + i);
+      }
+
+      producer.finishSpecifying();
+      node.addInputProducer(i, producer);
+    }
+  }
+
+  /**
+   * Sets the output of a transform node.
+   */
+  public void setOutput(TransformTreeNode producer, POutput output) {
+    producer.setOutput(output);
+
+    for (PValue o : output.expand()) {
+      producingTransformNode.put(o, producer);
+    }
+  }
+
+  /**
+   * Returns the TransformTreeNode associated with a given transform.
+   */
+  public TransformTreeNode getNode(PTransform<?, ?> transform) {
+    return transformToNode.get(transform);
+  }
+
+  /**
+   * Visits all nodes in the transform hierarchy, in transitive order.
+   */
+  public void visit(Pipeline.PipelineVisitor visitor,
+                    Set<PValue> visitedNodes) {
+    transformStack.peekFirst().visit(visitor, visitedNodes);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
new file mode 100644
index 0000000000000..efd28b354f073
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
@@ -0,0 +1,237 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.cloud.dataflow.sdk.values.POutput;
+import com.google.cloud.dataflow.sdk.values.PValue;
+import com.google.common.base.Preconditions;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import javax.annotation.Nullable;
+
+/**
+ * Provides internal tracking of transform relationships with helper methods
+ * for initialization and ordered visitation.
+ */
+public class TransformTreeNode {
+  private final TransformTreeNode enclosingNode;
+
+  // The transform.  If composite.isEmpty(), then this is a
+  // PrimitivePTransform, otherwise a composite PTransform.
+  private final PTransform<?, ?> transform;
+
+  private final String fullName;
+
+  // Nodes of a composite transform.
+  private final Collection<TransformTreeNode> parts = new ArrayList<>();
+
+  // Inputs to the transform, in expanded form and mapped to the producer
+  // of the input.
+  private final Map<PValue, TransformTreeNode> inputs = new HashMap<>();
+
+  // Input to the transform, in unexpanded form.
+  private final PInput input;
+
+  // TODO: track which outputs need to be exported to parent.
+  // Output of the transform, in unexpanded form.
+  private POutput output;
+
+  private boolean finishedSpecifying = false;
+
+  /**
+   * Creates a new TransformTreeNode with the given parent and transform.
+   *
+   * <p> EnclosingNode and transform may both be null for a root-level node
+   * which holds all other nodes.
+   *
+   * @param enclosingNode the composite node containing this node
+   * @param transform the PTransform tracked by this node
+   * @param fullName the fully qualified name of the transform
+   * @param input the unexpanded input to the transform
+   */
+  public TransformTreeNode(@Nullable TransformTreeNode enclosingNode,
+                           @Nullable PTransform<?, ?> transform,
+                           String fullName,
+                           @Nullable PInput input) {
+    this.enclosingNode = enclosingNode;
+    this.transform = transform;
+    Preconditions.checkArgument((enclosingNode == null && transform == null)
+        || (enclosingNode != null && transform != null),
+        "EnclosingNode and transform must both be specified, or both be null");
+    this.fullName = fullName;
+    this.input = input;
+  }
+
+  /**
+   * Returns the transform associated with this transform node.
+   */
+  public PTransform<?, ?> getTransform() {
+    return transform;
+  }
+
+  /**
+   * Returns the enclosing composite transform node, or null if there is none.
+   */
+  public TransformTreeNode getEnclosingNode() {
+    return enclosingNode;
+  }
+
+  /**
+   * Adds a composite operation to the transform node.
+   *
+   * <p> As soon as a node is added, the transform node is considered a
+   * composite operation instead of a primitive transform.
+   */
+  public void addComposite(TransformTreeNode node) {
+    parts.add(node);
+  }
+
+  /**
+   * Returns true if this node represents a composite transform.
+   */
+  public boolean isCompositeNode() {
+    return !parts.isEmpty();
+  }
+
+  public String getFullName() {
+    return fullName;
+  }
+
+  /**
+   * Adds an input to the transform node.
+   */
+  public void addInputProducer(PValue expandedInput, TransformTreeNode producer) {
+    Preconditions.checkState(!finishedSpecifying);
+    inputs.put(expandedInput, producer);
+  }
+
+  /**
+   * Returns the transform input, in unexpanded form.
+   */
+  public PInput getInput() {
+    return input;
+  }
+
+  /**
+   * Returns a mapping of inputs to the producing nodes for all inputs to
+   * the transform.
+   */
+  public Map<PValue, TransformTreeNode> getInputs() {
+    return Collections.unmodifiableMap(inputs);
+  }
+
+  /**
+   * Adds an output to the transform node.
+   */
+  public void setOutput(POutput output) {
+    Preconditions.checkState(!finishedSpecifying);
+    Preconditions.checkState(this.output == null);
+    this.output = output;
+  }
+
+  /**
+   * Returns the transform output, in unexpanded form.
+   */
+  public POutput getOutput() {
+    return output;
+  }
+
+  /**
+   * Returns the transform outputs, in expanded form.
+   */
+  public Collection<? extends PValue> getExpandedOutputs() {
+    if (output != null) {
+      return output.expand();
+    } else {
+      return Collections.emptyList();
+    }
+  }
+
+  /**
+   * Visit the transform node.
+   *
+   * <p> Provides an ordered visit of the input values, the primitive
+   * transform (or child nodes for composite transforms), then the
+   * output values.
+   */
+  public void visit(Pipeline.PipelineVisitor visitor,
+                    Set<PValue> visitedValues) {
+    if (!finishedSpecifying) {
+      finishSpecifying();
+    }
+
+    // Visit inputs.
+    for (Map.Entry<PValue, TransformTreeNode> entry : inputs.entrySet()) {
+      if (visitedValues.add(entry.getKey())) {
+        visitor.visitValue(entry.getKey(), entry.getValue());
+      }
+    }
+
+    if (isCompositeNode()) {
+      visitor.enterCompositeTransform(this);
+      for (TransformTreeNode child : parts) {
+        child.visit(visitor, visitedValues);
+      }
+      visitor.leaveCompositeTransform(this);
+    } else {
+      visitor.visitTransform(this);
+    }
+
+    // Visit outputs.
+    for (PValue pValue : getExpandedOutputs()) {
+      if (visitedValues.add(pValue)) {
+        visitor.visitValue(pValue, this);
+      }
+    }
+  }
+
+  /**
+   * Finish specifying a transform.
+   *
+   * <p> All inputs are finished first, then the transform, then
+   * all outputs.
+   */
+  public void finishSpecifying() {
+    if (finishedSpecifying) {
+      return;
+    }
+    finishedSpecifying = true;
+
+    for (TransformTreeNode input : inputs.values()) {
+      if (input != null) {
+        input.finishSpecifying();
+      }
+    }
+
+    if (transform != null) {
+      transform.finishSpecifying();
+    }
+
+    if (output != null) {
+      output.finishSpecifyingOutput();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
new file mode 100644
index 0000000000000..d7e36c54fc050
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.dataflow;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.io.AvroIO;
+import com.google.cloud.dataflow.sdk.io.ShardNameTemplate;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+
+/**
+ * Avro transform support code for the Dataflow backend.
+ */
+public class AvroIOTranslator {
+
+  /**
+   * Implements AvroIO Read translation for the Dataflow backend.
+   */
+  public static class ReadTranslator implements TransformTranslator<AvroIO.Read.Bound> {
+
+    @Override
+    public void translate(
+        AvroIO.Read.Bound transform,
+        TranslationContext context) {
+      translateReadHelper(transform, context);
+    }
+
+    private <T> void translateReadHelper(
+        AvroIO.Read.Bound<T> transform,
+        TranslationContext context) {
+      if (context.getPipelineOptions().isStreaming()) {
+        throw new IllegalArgumentException("AvroIO not supported in streaming mode.");
+      }
+
+      // Only GCS paths are permitted for filepatterns in the DataflowPipelineRunner.
+      GcsPath gcsPath = GcsPath.fromUri(transform.getFilepattern());
+      context.addStep(transform, "ParallelRead");
+      context.addInput(PropertyNames.FORMAT, "avro");
+      context.addInput(PropertyNames.FILEPATTERN, gcsPath);
+      context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
+      // TODO: Orderedness?
+    }
+  }
+
+  /**
+   * Implements AvroIO Write translation for the Dataflow backend.
+   */
+  public static class WriteTranslator implements TransformTranslator<AvroIO.Write.Bound> {
+
+    @Override
+    public void translate(
+        AvroIO.Write.Bound transform,
+        TranslationContext context) {
+      translateWriteHelper(transform, context);
+    }
+
+    private <T> void translateWriteHelper(
+        AvroIO.Write.Bound<T> transform,
+        TranslationContext context) {
+      // Only GCS paths are permitted for filepatterns in the DataflowPipelineRunner.
+      GcsPath gcsPath = GcsPath.fromUri(transform.getFilenamePrefix());
+      context.addStep(transform, "ParallelWrite");
+      context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
+
+      // TODO: drop this check when server supports alternative templates.
+      switch (transform.getShardTemplate()) {
+        case ShardNameTemplate.INDEX_OF_MAX:
+          break;  // supported by server
+        case "":
+          // Empty shard template allowed - forces single output.
+          Preconditions.checkArgument(transform.getNumShards() <= 1,
+              "Num shards must be <= 1 when using an empty sharding template");
+          break;
+        default:
+          throw new UnsupportedOperationException("Shard template "
+              + transform.getShardTemplate()
+              + " not yet supported by Dataflow service");
+      }
+
+      context.addInput(PropertyNames.FORMAT, "avro");
+      context.addInput(PropertyNames.FILENAME_PREFIX, gcsPath);
+      context.addInput(PropertyNames.SHARD_NAME_TEMPLATE, transform.getShardTemplate());
+      context.addInput(PropertyNames.FILENAME_SUFFIX, transform.getFilenameSuffix());
+
+      long numShards = transform.getNumShards();
+      if (numShards > 0) {
+        context.addInput(PropertyNames.NUM_SHARDS, numShards);
+      }
+
+      context.addEncodingInput(
+          WindowedValue.getValueOnlyCoder(
+              AvroCoder.of(transform.getType(), transform.getSchema())));
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
new file mode 100644
index 0000000000000..fd2731949c414
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.dataflow;
+
+import com.google.api.client.json.JsonFactory;
+import com.google.api.services.bigquery.Bigquery;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.cloud.dataflow.sdk.coders.TableRowJsonCoder;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
+import com.google.cloud.dataflow.sdk.util.ApiErrorExtractor;
+import com.google.cloud.dataflow.sdk.util.BigQueryTableInserter;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+
+import java.io.IOException;
+
+/**
+ * BigQuery transform support code for the Dataflow backend.
+ */
+public class BigQueryIOTranslator {
+  private static final JsonFactory JSON_FACTORY = Transport.getJsonFactory();
+
+  /**
+   * Implements BigQueryIO Read translation for the Dataflow backend.
+   */
+  public static class ReadTranslator
+      implements DataflowPipelineTranslator.TransformTranslator<BigQueryIO.Read.Bound> {
+
+    @Override
+    public void translate(BigQueryIO.Read.Bound transform,
+                          DataflowPipelineTranslator.TranslationContext context) {
+      TableReference table = transform.getTable();
+      if (table.getProjectId() == null) {
+        table.setProjectId(context.getPipelineOptions().getProject());
+      }
+
+      // Check for source table presence for early failure notification.
+      // Note that a presence check can fail if the table or dataset are created by earlier stages
+      // of the pipeline. For these cases the withoutValidation method can be used to disable
+      // the check.
+      if (transform.getValidate()) {
+        verifyDatasetPresence(context.getPipelineOptions(), table);
+        verifyTablePresence(context.getPipelineOptions(), table);
+      }
+
+      // Actual translation.
+      context.addStep(transform, "ParallelRead");
+      context.addInput(PropertyNames.FORMAT, "bigquery");
+      context.addInput(PropertyNames.BIGQUERY_TABLE, table.getTableId());
+      context.addInput(PropertyNames.BIGQUERY_DATASET, table.getDatasetId());
+      if (table.getProjectId() != null) {
+        context.addInput(PropertyNames.BIGQUERY_PROJECT, table.getProjectId());
+      }
+      context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
+    }
+  }
+
+  /**
+   * Implements BigQueryIO Write translation for the Dataflow backend.
+   */
+  public static class WriteTranslator
+      implements DataflowPipelineTranslator.TransformTranslator<BigQueryIO.Write.Bound> {
+
+    @Override
+    public void translate(BigQueryIO.Write.Bound transform,
+                          DataflowPipelineTranslator.TranslationContext context) {
+      if (context.getPipelineOptions().isStreaming()) {
+        // Streaming is handled by the streaming runner.
+        throw new AssertionError(
+            "BigQueryIO is specified to use streaming write in batch mode.");
+      }
+
+      TableReference table = transform.getTable();
+      if (table.getProjectId() == null) {
+        table.setProjectId(context.getPipelineOptions().getProject());
+      }
+
+      // Check for destination table presence and emptiness for early failure notification.
+      // Note that a presence check can fail if the table or dataset are created by earlier stages
+      // of the pipeline. For these cases the withoutValidation method can be used to disable
+      // the check.
+      if (transform.getValidate()) {
+        verifyDatasetPresence(context.getPipelineOptions(), table);
+        if (transform.getCreateDisposition() == BigQueryIO.Write.CreateDisposition.CREATE_NEVER) {
+          verifyTablePresence(context.getPipelineOptions(), table);
+        }
+        if (transform.getWriteDisposition() == BigQueryIO.Write.WriteDisposition.WRITE_EMPTY) {
+          verifyTableEmpty(context.getPipelineOptions(), table);
+        }
+      }
+
+      // Actual translation.
+      context.addStep(transform, "ParallelWrite");
+      context.addInput(PropertyNames.FORMAT, "bigquery");
+      context.addInput(PropertyNames.BIGQUERY_TABLE,
+                       table.getTableId());
+      context.addInput(PropertyNames.BIGQUERY_DATASET,
+                       table.getDatasetId());
+      if (table.getProjectId() != null) {
+        context.addInput(PropertyNames.BIGQUERY_PROJECT, table.getProjectId());
+      }
+      if (transform.getSchema() != null) {
+        try {
+          context.addInput(PropertyNames.BIGQUERY_SCHEMA,
+                           JSON_FACTORY.toString(transform.getSchema()));
+        } catch (IOException exn) {
+          throw new IllegalArgumentException("Invalid table schema.", exn);
+        }
+      }
+      context.addInput(
+          PropertyNames.BIGQUERY_CREATE_DISPOSITION,
+          transform.getCreateDisposition().name());
+      context.addInput(
+          PropertyNames.BIGQUERY_WRITE_DISPOSITION,
+          transform.getWriteDisposition().name());
+      // Set sink encoding to TableRowJsonCoder.
+      context.addEncodingInput(
+          WindowedValue.getValueOnlyCoder(TableRowJsonCoder.of()));
+      context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
+    }
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private static void verifyDatasetPresence(
+      BigQueryOptions options,
+      TableReference table) {
+    try {
+      Bigquery client = Transport.newBigQueryClient(options).build();
+      client.datasets().get(table.getProjectId(), table.getDatasetId())
+            .execute();
+    } catch (IOException e) {
+      ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
+      if (errorExtractor.itemNotFound(e)) {
+        throw new IllegalArgumentException(
+            "BigQuery dataset not found for table: " + BigQueryIO.toTableSpec(table), e);
+      } else {
+        throw new RuntimeException(
+            "unable to confirm BigQuery dataset presence", e);
+      }
+    }
+  }
+
+  private static void verifyTablePresence(
+      BigQueryOptions options,
+      TableReference table) {
+    try {
+      Bigquery client = Transport.newBigQueryClient(options).build();
+      client.tables().get(table.getProjectId(), table.getDatasetId(), table.getTableId())
+            .execute();
+    } catch (IOException e) {
+      ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
+      if (errorExtractor.itemNotFound(e)) {
+        throw new IllegalArgumentException(
+            "BigQuery table not found: " + BigQueryIO.toTableSpec(table), e);
+      } else {
+        throw new RuntimeException(
+            "unable to confirm BigQuery table presence", e);
+      }
+    }
+  }
+
+  private static void verifyTableEmpty(
+      BigQueryOptions options,
+      TableReference table) {
+    try {
+      Bigquery client = Transport.newBigQueryClient(options).build();
+      BigQueryTableInserter inserter = new BigQueryTableInserter(client, table);
+      if (!inserter.isEmpty()) {
+        throw new IllegalArgumentException(
+            "BigQuery table is not empty: " + BigQueryIO.toTableSpec(table));
+      }
+    } catch (IOException e) {
+      ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
+      if (errorExtractor.itemNotFound(e)) {
+        // Nothing to do. If the table does not exist, it is considered empty.
+      } else {
+        throw new RuntimeException(
+            "unable to confirm BigQuery table emptiness", e);
+      }
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DatastoreIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DatastoreIOTranslator.java
new file mode 100644
index 0000000000000..4292199174a14
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DatastoreIOTranslator.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.dataflow;
+
+import com.google.cloud.dataflow.sdk.io.DatastoreIO;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
+
+/**
+ * Datastore transform support code for the Dataflow backend.
+ */
+public class DatastoreIOTranslator {
+
+  /**
+   * Implements DatastoreIO Write translation for the Dataflow backend.
+   */
+  public static class WriteTranslator implements TransformTranslator<DatastoreIO.Write.Bound> {
+    @Override
+    public void translate(
+        DatastoreIO.Write.Bound transform,
+        TranslationContext context) {
+      // TODO: Not implemented yet.
+      // translateWriteHelper(transform, context);
+      throw new UnsupportedOperationException("Write only supports direct mode now.");
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
new file mode 100644
index 0000000000000..706397bddd37b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.dataflow;
+
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+
+/**
+ * Pubsub transform support code for the Dataflow backend.
+ */
+public class PubsubIOTranslator {
+
+  /**
+   * Implements PubsubIO Read translation for the Dataflow backend.
+   */
+  public static class ReadTranslator implements TransformTranslator<PubsubIO.Read.Bound> {
+    @Override
+    public void translate(
+        PubsubIO.Read.Bound transform,
+        TranslationContext context) {
+      translateReadHelper(transform, context);
+    }
+
+    /*
+  private static void translateReadHelper(
+     */
+
+    private void translateReadHelper(
+        PubsubIO.Read.Bound transform,
+        TranslationContext context) {
+      if (!context.getPipelineOptions().isStreaming()) {
+        throw new IllegalArgumentException("PubsubIO can only be used in streaming mode.");
+      }
+
+      context.addStep(transform, "ParallelRead");
+      context.addInput(PropertyNames.FORMAT, "pubsub");
+      if (transform.getTopic() != null) {
+        context.addInput(PropertyNames.PUBSUB_TOPIC, transform.getTopic());
+      }
+      if (transform.getSubscription() != null) {
+        context.addInput(PropertyNames.PUBSUB_SUBSCRIPTION, transform.getSubscription());
+      }
+      context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
+      // TODO: Orderedness?
+    }
+  }
+
+  /**
+   * Implements PubsubIO Write translation for the Dataflow backend.
+   */
+  public static class WriteTranslator implements TransformTranslator<PubsubIO.Write.Bound> {
+    @Override
+    public void translate(
+        PubsubIO.Write.Bound transform,
+        TranslationContext context) {
+      translateWriteHelper(transform, context);
+    }
+
+    private void translateWriteHelper(
+        PubsubIO.Write.Bound transform,
+        TranslationContext context) {
+      if (!context.getPipelineOptions().isStreaming()) {
+        throw new IllegalArgumentException("PubsubIO can only be used in streaming mode.");
+      }
+
+      context.addStep(transform, "ParallelWrite");
+      context.addInput(PropertyNames.FORMAT, "pubsub");
+      context.addInput(PropertyNames.PUBSUB_TOPIC, transform.getTopic());
+      context.addEncodingInput(
+          WindowedValue.getValueOnlyCoder(transform.getInput().getCoder()));
+      context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
new file mode 100644
index 0000000000000..05a44648eba95
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.dataflow;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.io.ShardNameTemplate;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
+import com.google.cloud.dataflow.sdk.util.GcsUtil;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+
+/**
+ * TextIO transform support code for the Dataflow backend.
+ */
+public class TextIOTranslator {
+
+  /**
+   * Implements TextIO Read translation for the Dataflow backend.
+   */
+  public static class ReadTranslator implements TransformTranslator<TextIO.Read.Bound> {
+    @Override
+    public void translate(
+        TextIO.Read.Bound transform,
+        TranslationContext context) {
+      translateReadHelper(transform, context);
+    }
+
+    private <T> void translateReadHelper(
+        TextIO.Read.Bound<T> transform,
+        TranslationContext context) {
+      if (context.getPipelineOptions().isStreaming()) {
+        throw new IllegalArgumentException("TextIO not supported in streaming mode.");
+      }
+
+      // Only GCS paths are permitted for filepatterns in the DataflowPipelineRunner.
+      GcsPath gcsPath = GcsPath.fromUri(transform.getFilepattern());
+      // Furthermore, on the service there is currently a limitation
+      // that the first wildcard character must occur after the last
+      // delimiter, and that the delimiter is fixed to '/'
+      if (!GcsUtil.GCS_READ_PATTERN.matcher(gcsPath.getObject()).matches()) {
+        throw new IllegalArgumentException(
+            "Unsupported wildcard usage in \"" + gcsPath + "\": "
+            + " all wildcards must occur after the final '/' delimiter.");
+      }
+
+      context.addStep(transform, "ParallelRead");
+      // TODO: How do we want to specify format and
+      // format-specific properties?
+      context.addInput(PropertyNames.FORMAT, "text");
+      context.addInput(PropertyNames.FILEPATTERN, gcsPath);
+      context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
+
+      // TODO: Orderedness?
+    }
+  }
+
+  /**
+   * Implements TextIO Write translation for the Dataflow backend.
+   */
+  public static class WriteTranslator implements TransformTranslator<TextIO.Write.Bound> {
+    @Override
+    public void translate(
+        TextIO.Write.Bound transform,
+        TranslationContext context) {
+      translateWriteHelper(transform, context);
+    }
+
+    private <T> void translateWriteHelper(
+        TextIO.Write.Bound<T> transform,
+        TranslationContext context) {
+      if (context.getPipelineOptions().isStreaming()) {
+        throw new IllegalArgumentException("TextIO not supported in streaming mode.");
+      }
+
+      // Only GCS paths are permitted for filepatterns in the DataflowPipelineRunner.
+      GcsPath gcsPath = GcsPath.fromUri(transform.getFilenamePrefix());
+      context.addStep(transform, "ParallelWrite");
+      context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
+
+      // TODO: drop this check when server supports alternative templates.
+      switch (transform.getShardTemplate()) {
+        case ShardNameTemplate.INDEX_OF_MAX:
+          break;  // supported by server
+        case "":
+          // Empty shard template allowed - forces single output.
+          Preconditions.checkArgument(transform.getNumShards() <= 1,
+              "Num shards must be <= 1 when using an empty sharding template");
+          break;
+        default:
+          throw new UnsupportedOperationException("Shard template "
+              + transform.getShardTemplate()
+              + " not yet supported by Dataflow service");
+      }
+
+      // TODO: How do we want to specify format and
+      // format-specific properties?
+      context.addInput(PropertyNames.FORMAT, "text");
+      context.addInput(PropertyNames.FILENAME_PREFIX, gcsPath);
+      context.addInput(PropertyNames.SHARD_NAME_TEMPLATE,
+          transform.getShardNameTemplate());
+      context.addInput(PropertyNames.FILENAME_SUFFIX, transform.getFilenameSuffix());
+
+      long numShards = transform.getNumShards();
+      if (numShards > 0) {
+        context.addInput(PropertyNames.NUM_SHARDS, numShards);
+      }
+
+      context.addEncodingInput(
+          WindowedValue.getValueOnlyCoder(transform.getCoder()));
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/package-info.java
new file mode 100644
index 0000000000000..c2fcc288cf3c4
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/package-info.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/** 
+ * Implementation of the {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner}.
+ */
+package com.google.cloud.dataflow.sdk.runners.dataflow;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/package-info.java
new file mode 100644
index 0000000000000..c75fe2f8348e0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/package-info.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/**
+ * Defines runners for executing Pipelines in different modes, including 
+ * {@link com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner} and
+ * {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner}.
+ * 
+ * <p>{@link com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner} executes a {@code Pipeline}
+ * locally, without contacting the Dataflow service. 
+ * {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner} submits a 
+ * {@code Pipeline} to the Dataflow service, which executes it on Dataflow-managed Compute Engine
+ * instances. {@code DataflowPipelineRunner} returns
+ * as soon as the {@code Pipeline} has been submitted. Use
+ * {@link com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner} to have execution
+ * updates printed to the console.
+ * 
+ * <p>The runner is specified as part {@link com.google.cloud.dataflow.sdk.options.PipelineOptions}.
+ */
+package com.google.cloud.dataflow.sdk.runners;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java
new file mode 100644
index 0000000000000..912c570f8efaa
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java
@@ -0,0 +1,63 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import java.io.IOException;
+
+import javax.annotation.concurrent.ThreadSafe;
+
+/**
+ * ApplianceShuffleReader reads chunks of data from a shuffle dataset
+ * for a position range.
+ *
+ * It is a JNI wrapper of an equivalent C++ class.
+ */
+@ThreadSafe
+public final class ApplianceShuffleReader implements ShuffleReader {
+  static {
+    ShuffleLibrary.load();
+  }
+
+  /**
+   * Pointer to the underlying native shuffle reader object.
+   */
+  private long nativePointer;
+
+  /**
+   * @param shuffleReaderConfig opaque configuration for creating a
+   * shuffle reader
+   */
+  public ApplianceShuffleReader(byte[] shuffleReaderConfig) {
+    this.nativePointer = createFromConfig(shuffleReaderConfig);
+  }
+
+  @Override
+  public void finalize() {
+    destroy();
+  }
+
+  /**
+   * Native methods for interacting with the underlying native shuffle client
+   * code.
+   */
+  private native long createFromConfig(byte[] shuffleReaderConfig);
+  private native void destroy();
+
+  @Override
+  public native ReadChunkResult readIncludingPosition(
+      byte[] startPosition, byte[] endPosition) throws IOException;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java
new file mode 100644
index 0000000000000..d6b3c7518e3e0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java
@@ -0,0 +1,66 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import java.io.IOException;
+import javax.annotation.concurrent.ThreadSafe;
+
+/**
+ * ApplianceShuffleWriter writes chunks of data to a shuffle dataset.
+ *
+ * It is a JNI wrapper of an equivalent C++ class.
+ */
+@ThreadSafe
+public final class ApplianceShuffleWriter implements ShuffleWriter {
+  static {
+    ShuffleLibrary.load();
+  }
+
+  /**
+   * Pointer to the underlying native shuffle writer code.
+   */
+  private long nativePointer;
+
+  /**
+   * @param shuffleWriterConfig opaque configuration for creating a
+   * shuffle writer
+   * @param bufferSize the writer buffer size
+   */
+  public ApplianceShuffleWriter(byte[] shuffleWriterConfig,
+                                long bufferSize) {
+    this.nativePointer = createFromConfig(shuffleWriterConfig, bufferSize);
+  }
+
+  @Override
+  public void finalize() {
+    destroy();
+  }
+
+  /**
+   * Native methods for interacting with the underlying native shuffle
+   * writer code.
+   */
+  private native long createFromConfig(byte[] shuffleWriterConfig,
+                                       long bufferSize);
+  private native void destroy();
+
+  @Override
+  public native void write(byte[] chunk) throws IOException;
+
+  @Override
+  public native void close() throws IOException;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
new file mode 100644
index 0000000000000..f1ae7f11b9374
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.getBytes;
+
+import com.google.api.services.dataflow.model.MultiOutputInfo;
+import com.google.api.services.dataflow.model.SideInputInfo;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PTuple;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+
+import java.util.Arrays;
+import java.util.List;
+
+import javax.annotation.Nullable;
+
+/**
+ * A wrapper around an AssignWindowsDoFn.  This class is the same as
+ * NormalParDoFn, except that it gets deserialized differently.
+ */
+class AssignWindowsParDoFn extends NormalParDoFn {
+  public static AssignWindowsParDoFn create(
+      PipelineOptions options,
+      CloudObject cloudUserFn,
+      String stepName,
+      @Nullable List<SideInputInfo> sideInputInfos,
+      @Nullable List<MultiOutputInfo> multiOutputInfos,
+      Integer numOutputs,
+      ExecutionContext executionContext,
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler sampler /* unused */)
+      throws Exception {
+    Object windowingFn =
+        SerializableUtils.deserializeFromByteArray(
+            getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
+            "serialized window fn");
+    if (!(windowingFn instanceof WindowingFn)) {
+      throw new Exception(
+          "unexpected kind of WindowingFn: " + windowingFn.getClass().getName());
+    }
+
+    DoFn assignWindowsDoFn = new AssignWindowsDoFn((WindowingFn) windowingFn);
+
+    return new AssignWindowsParDoFn(
+        options, assignWindowsDoFn, stepName, executionContext, addCounterMutator);
+  }
+
+  private AssignWindowsParDoFn(
+      PipelineOptions options,
+      DoFn fn,
+      String stepName,
+      ExecutionContext executionContext,
+      CounterSet.AddCounterMutator addCounterMutator) {
+    super(
+        options,
+        fn,
+        PTuple.empty(),
+        Arrays.asList("output"),
+        stepName,
+        executionContext,
+        addCounterMutator);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSink.java
new file mode 100644
index 0000000000000..404b2d261fc98
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSink.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericDatumWriter;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+/**
+ * A sink that writes Avro files. Records are written to the Avro file as a
+ * series of byte arrays. The coder provided is used to serialize each record
+ * into a byte array.
+ *
+ * @param <T> the type of the elements written to the sink
+ */
+public class AvroByteSink<T> extends Sink<T> {
+
+  final AvroSink<ByteBuffer> avroSink;
+  final Coder<T> coder;
+  private final Schema schema = Schema.create(Schema.Type.BYTES);
+
+  public AvroByteSink(String filenamePrefix, Coder<T> coder) {
+    this(filenamePrefix, "", "", 1, coder);
+  }
+
+  public AvroByteSink(String filenamePrefix, String shardFormat, String filenameSuffix,
+                      int shardCount, Coder<T> coder) {
+    this.coder = coder;
+    avroSink = new AvroSink(
+        filenamePrefix, shardFormat, filenameSuffix, shardCount,
+        WindowedValue.getValueOnlyCoder(AvroCoder.of(ByteBuffer.class, schema)));
+  }
+
+  @Override
+  public SinkWriter<T> writer() throws IOException {
+    return new AvroByteFileWriter();
+  }
+
+  /** The SinkWriter for an AvroByteSink. */
+  class AvroByteFileWriter implements SinkWriter<T> {
+
+    private final SinkWriter<WindowedValue<ByteBuffer>> avroFileWriter;
+
+    public AvroByteFileWriter() throws IOException {
+      avroFileWriter = avroSink.writer(new GenericDatumWriter<ByteBuffer>(schema));
+    }
+
+    @Override
+    public long add(T value) throws IOException {
+      byte[] encodedElem = CoderUtils.encodeToByteArray(coder, value);
+      ByteBuffer encodedBuffer = ByteBuffer.wrap(encodedElem);
+      avroFileWriter.add(WindowedValue.valueInGlobalWindow(encodedBuffer));
+      return encodedElem.length;
+    }
+
+    @Override
+    public void close() throws IOException {
+      avroFileWriter.close();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSource.java
new file mode 100644
index 0000000000000..b71700a08fcab
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSource.java
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericDatumReader;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.NoSuchElementException;
+
+import javax.annotation.Nullable;
+
+/**
+ * A source that reads Avro files. Records are read from the Avro file as a
+ * series of byte arrays. The coder provided is used to deserialize each record
+ * from a byte array.
+ *
+ * @param <T> the type of the elements read from the source
+ */
+public class AvroByteSource<T> extends Source<T> {
+
+  final AvroSource<ByteBuffer> avroSource;
+  final Coder<T> coder;
+  private final Schema schema = Schema.create(Schema.Type.BYTES);
+
+  public AvroByteSource(String filename,
+                        @Nullable Long startPosition,
+                        @Nullable Long endPosition,
+                        Coder<T> coder) {
+    this.coder = coder;
+    avroSource = new AvroSource(
+        filename, startPosition, endPosition,
+        WindowedValue.getValueOnlyCoder(AvroCoder.of(ByteBuffer.class, schema)));
+  }
+
+  @Override
+  public SourceIterator<T> iterator() throws IOException {
+    return new AvroByteFileIterator();
+  }
+
+  class AvroByteFileIterator extends AbstractSourceIterator<T> {
+
+    private final SourceIterator<WindowedValue<ByteBuffer>> avroFileIterator;
+
+    public AvroByteFileIterator() throws IOException {
+      avroFileIterator = avroSource.iterator(
+          new GenericDatumReader<ByteBuffer>(schema));
+    }
+
+    @Override
+    public boolean hasNext() throws IOException {
+      return avroFileIterator.hasNext();
+    }
+
+    @Override
+    public T next() throws IOException {
+      if (!hasNext()) {
+        throw new NoSuchElementException();
+      }
+      ByteBuffer inBuffer = avroFileIterator.next().getValue();
+      byte[] encodedElem = new byte[inBuffer.remaining()];
+      inBuffer.get(encodedElem);
+      assert inBuffer.remaining() == 0;
+      inBuffer.clear();
+      notifyElementRead(encodedElem.length);
+      return CoderUtils.decodeFromByteArray(coder, encodedElem);
+    }
+
+    @Override
+    public void close() throws IOException {
+      avroFileIterator.close();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java
new file mode 100644
index 0000000000000..64fe691aa41f9
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.WindowedValue.ValueOnlyWindowedValueCoder;
+import static com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.MimeTypes;
+import com.google.cloud.dataflow.sdk.util.ShardingWritableByteChannel;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+
+import java.io.IOException;
+import java.nio.channels.Channels;
+import java.nio.channels.WritableByteChannel;
+import java.util.ArrayList;
+import java.util.Random;
+
+/**
+ * A sink that writes Avro files.
+ *
+ * @param <T> the type of the elements written to the sink
+ */
+public class AvroSink<T> extends Sink<WindowedValue<T>> {
+
+  final String filenamePrefix;
+  final String shardFormat;
+  final String filenameSuffix;
+  final int shardCount;
+  final AvroCoder<T> avroCoder;
+  final Schema schema;
+
+  public AvroSink(String filename, WindowedValueCoder<T> coder) {
+    this(filename, "", "", 1, coder);
+  }
+
+  public AvroSink(String filenamePrefix, String shardFormat, String filenameSuffix, int shardCount,
+                  WindowedValueCoder<T> coder) {
+    if (!(coder instanceof ValueOnlyWindowedValueCoder)) {
+      throw new IllegalArgumentException("Expected ValueOnlyWindowedValueCoder");
+    }
+
+    if (!(coder.getValueCoder() instanceof AvroCoder)) {
+      throw new IllegalArgumentException("AvroSink requires an AvroCoder");
+    }
+
+    this.filenamePrefix = filenamePrefix;
+    this.shardFormat = shardFormat;
+    this.filenameSuffix = filenameSuffix;
+    this.shardCount = shardCount;
+    this.avroCoder = (AvroCoder<T>) coder.getValueCoder();
+    this.schema = this.avroCoder.getSchema();
+  }
+
+  public SinkWriter<WindowedValue<T>> writer(DatumWriter<T> datumWriter) throws IOException {
+    WritableByteChannel writer = IOChannelUtils.create(
+        filenamePrefix, shardFormat, filenameSuffix, shardCount, MimeTypes.BINARY);
+
+    if (writer instanceof ShardingWritableByteChannel) {
+      return new AvroShardingFileWriter(datumWriter, (ShardingWritableByteChannel) writer);
+    } else {
+      return new AvroFileWriter(datumWriter, writer);
+    }
+  }
+
+  @Override
+  public SinkWriter<WindowedValue<T>> writer() throws IOException {
+    return writer(avroCoder.createDatumWriter());
+  }
+
+  /** The SinkWriter for an AvroSink. */
+  class AvroFileWriter implements SinkWriter<WindowedValue<T>> {
+    DataFileWriter<T> fileWriter;
+
+    public AvroFileWriter(DatumWriter<T> datumWriter, WritableByteChannel outputChannel)
+        throws IOException {
+      fileWriter = new DataFileWriter<>(datumWriter);
+      fileWriter.create(schema, Channels.newOutputStream(outputChannel));
+    }
+
+    @Override
+    public long add(WindowedValue<T> value) throws IOException {
+      fileWriter.append(value.getValue());
+      // DataFileWriter doesn't support returning the length written. Use the
+      // coder instead.
+      return CoderUtils.encodeToByteArray(avroCoder, value.getValue()).length;
+    }
+
+    @Override
+    public void close() throws IOException {
+      fileWriter.close();
+    }
+  }
+
+  /** The SinkWriter for an AvroSink, which supports sharding. */
+  class AvroShardingFileWriter implements SinkWriter<WindowedValue<T>> {
+    private ArrayList<AvroFileWriter> fileWriters = new ArrayList<>();
+    private final Random random = new Random();
+
+    public AvroShardingFileWriter(
+        DatumWriter<T> datumWriter, ShardingWritableByteChannel outputChannel) throws IOException {
+      for (int i = 0; i < outputChannel.getNumShards(); i++) {
+        fileWriters.add(new AvroFileWriter(datumWriter, outputChannel.getChannel(i)));
+      }
+    }
+
+    @Override
+    public long add(WindowedValue<T> value) throws IOException {
+      return fileWriters.get(random.nextInt(fileWriters.size())).add(value);
+    }
+
+    @Override
+    public void close() throws IOException {
+      for (AvroFileWriter fileWriter : fileWriters) {
+        fileWriter.close();
+      }
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
new file mode 100644
index 0000000000000..9a20d17aee220
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+
+/**
+ * Creates an AvroSink from a CloudObject spec.
+ */
+public final class AvroSinkFactory {
+  // Do not instantiate.
+  private AvroSinkFactory() {}
+
+  public static <T> Sink<T> create(PipelineOptions options,
+                                   CloudObject spec,
+                                   Coder<T> coder,
+                                   ExecutionContext executionContext)
+      throws Exception {
+    return create(spec, coder);
+  }
+
+  static <T> Sink<T> create(CloudObject spec, Coder<T> coder)
+      throws Exception {
+    String filename = getString(spec, PropertyNames.FILENAME);
+
+    if (!(coder instanceof WindowedValueCoder)) {
+      return new AvroByteSink<>(filename, coder);
+      //throw new IllegalArgumentException("Expected WindowedValueCoder");
+    }
+
+    WindowedValueCoder windowedCoder = (WindowedValueCoder) coder;
+    if (windowedCoder.getValueCoder() instanceof AvroCoder) {
+      return new AvroSink(filename, windowedCoder);
+    } else {
+      return new AvroByteSink<>(filename, windowedCoder);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSource.java
new file mode 100644
index 0000000000000..3f071cff2c7a1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSource.java
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.WindowedValue.ValueOnlyWindowedValueCoder;
+import static com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.file.SeekableInput;
+import org.apache.avro.io.DatumReader;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.SeekableByteChannel;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+import javax.annotation.Nullable;
+
+/**
+ * A source that reads Avro files.
+ *
+ * @param <T> the type of the elements read from the source
+ */
+public class AvroSource<T> extends Source<WindowedValue<T>> {
+  private static final int BUF_SIZE = 200;
+  final String filename;
+  @Nullable final Long startPosition;
+  @Nullable final Long endPosition;
+  final AvroCoder<T> avroCoder;
+  private final Schema schema;
+
+  public AvroSource(String filename,
+                    @Nullable Long startPosition,
+                    @Nullable Long endPosition,
+                    WindowedValueCoder<T> coder) {
+    if (!(coder instanceof ValueOnlyWindowedValueCoder)) {
+      throw new IllegalArgumentException("Expected ValueOnlyWindowedValueCoder");
+    }
+
+    if (!(coder.getValueCoder() instanceof AvroCoder)) {
+      throw new IllegalArgumentException("AvroSource requires an AvroCoder");
+    }
+
+    this.filename = filename;
+    this.startPosition = startPosition;
+    this.endPosition = endPosition;
+    this.avroCoder = (AvroCoder<T>) coder.getValueCoder();
+    this.schema = this.avroCoder.getSchema();
+  }
+
+  public SourceIterator<WindowedValue<T>> iterator(DatumReader<T> datumReader) throws IOException {
+    IOChannelFactory factory = IOChannelUtils.getFactory(filename);
+    Collection<String> inputs = factory.match(filename);
+
+    if (inputs.size() == 1) {
+      String input = inputs.iterator().next();
+      ReadableByteChannel reader = factory.open(input);
+      return new AvroFileIterator(datumReader, input, reader, startPosition, endPosition);
+
+    } else {
+      if (startPosition != null || endPosition != null) {
+        throw new UnsupportedOperationException(
+            "Unable to apply range limits to multiple-input stream: " +
+                filename);
+      }
+      return new AvroFileMultiIterator(datumReader, factory, inputs.iterator());
+    }
+  }
+
+  @Override
+  public SourceIterator<WindowedValue<T>> iterator() throws IOException {
+    return iterator(avroCoder.createDatumReader());
+  }
+
+  class AvroFileMultiIterator extends LazyMultiSourceIterator<WindowedValue<T>> {
+    private final IOChannelFactory factory;
+    private final DatumReader<T> datumReader;
+
+    public AvroFileMultiIterator(DatumReader<T> datumReader,
+                                 IOChannelFactory factory,
+                                 Iterator<String> inputs) {
+      super(inputs);
+      this.factory = factory;
+      this.datumReader = datumReader;
+    }
+
+    @Override
+    protected SourceIterator<WindowedValue<T>> open(String input) throws IOException {
+      return new AvroFileIterator(datumReader, input, factory.open(input), null, null);
+    }
+  }
+
+  class AvroFileIterator extends AbstractSourceIterator<WindowedValue<T>> {
+    final DataFileReader<T> fileReader;
+    final Long endOffset;
+
+    public AvroFileIterator(DatumReader<T> datumReader,
+                            String filename,
+                            ReadableByteChannel reader,
+                            @Nullable Long startOffset,
+                            @Nullable Long endOffset)
+        throws IOException {
+      if (!(reader instanceof SeekableByteChannel)) {
+        throw new UnsupportedOperationException(
+            "Unable to seek to offset in stream for " + filename);
+      }
+      SeekableByteChannel inChannel = (SeekableByteChannel) reader;
+      SeekableInput seekableInput = new SeekableByteChannelInput(inChannel);
+      this.fileReader = new DataFileReader<>(seekableInput, datumReader);
+      this.endOffset = endOffset;
+      if (startOffset != null && startOffset > 0) {
+        // Sync to the first record at or after startOffset.
+        fileReader.sync(startOffset);
+      }
+    }
+
+    @Override
+    public boolean hasNext() throws IOException {
+      return fileReader.hasNext()
+          && (endOffset == null || !fileReader.pastSync(endOffset));
+    }
+
+    @Override
+    public WindowedValue<T> next() throws IOException {
+      if (!hasNext()) {
+        throw new NoSuchElementException();
+      }
+      T next = fileReader.next();
+      // DataFileReader doesn't seem to support getting the current position.
+      // The difference between tell() calls seems to be zero. Use the coder
+      // instead.
+      notifyElementRead(CoderUtils.encodeToByteArray(avroCoder, next).length);
+      return WindowedValue.valueInGlobalWindow(next);
+    }
+
+    @Override
+    public void close() throws IOException {
+      fileReader.close();
+    }
+  }
+
+  /**
+   * An implementation of an Avro SeekableInput wrapping a
+   * SeekableByteChannel.
+   */
+  static class SeekableByteChannelInput implements SeekableInput {
+    final SeekableByteChannel channel;
+
+    public SeekableByteChannelInput(SeekableByteChannel channel) {
+      this.channel = channel;
+    }
+
+    @Override
+    public void seek(long position) throws IOException {
+      channel.position(position);
+    }
+
+    @Override
+    public long tell() throws IOException {
+      return channel.position();
+    }
+
+    @Override
+    public long length() throws IOException {
+      return channel.size();
+    }
+
+    @Override
+    public int read(byte[] b, int offset, int length) throws IOException {
+      return channel.read(ByteBuffer.wrap(b, offset, length));
+    }
+
+    @Override
+    public void close() throws IOException {
+      channel.close();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactory.java
new file mode 100644
index 0000000000000..329d8b66e2ee1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactory.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.getLong;
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+/**
+ * Creates an AvroSource from a CloudObject spec.
+ */
+public class AvroSourceFactory {
+  // Do not instantiate.
+  private AvroSourceFactory() {}
+
+  public static <T> Source<T> create(PipelineOptions options,
+                                     CloudObject spec,
+                                     Coder<T> coder,
+                                     ExecutionContext executionContext)
+      throws Exception {
+    return create(spec, coder);
+  }
+
+  static <T> Source<T> create(CloudObject spec,
+                              Coder<T> coder)
+      throws Exception {
+    String filename = getString(spec, PropertyNames.FILENAME);
+    Long startOffset = getLong(spec, PropertyNames.START_OFFSET, null);
+    Long endOffset = getLong(spec, PropertyNames.END_OFFSET, null);
+
+    if (!(coder instanceof WindowedValueCoder)) {
+      return new AvroByteSource<>(filename, startOffset, endOffset, coder);
+      //throw new IllegalArgumentException("Expected WindowedValueCoder");
+    }
+
+    WindowedValueCoder windowedCoder = (WindowedValueCoder) coder;
+    if (windowedCoder.getValueCoder() instanceof AvroCoder) {
+      return new AvroSource(filename, startOffset, endOffset, windowedCoder);
+    } else {
+      return new AvroByteSource<>(filename, startOffset, endOffset, windowedCoder);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySource.java
new file mode 100644
index 0000000000000..b43c942b3ed98
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySource.java
@@ -0,0 +1,114 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.api.client.util.Preconditions.checkNotNull;
+
+import com.google.api.services.bigquery.Bigquery;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
+import com.google.cloud.dataflow.sdk.util.BigQueryTableRowIterator;
+import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+import java.io.IOException;
+import java.util.NoSuchElementException;
+import java.util.logging.Logger;
+
+/**
+ * A source that reads a BigQuery table and yields TableRow objects.
+ *
+ * <p>The source is a wrapper over the {@code BigQueryTableRowIterator} class which issues a
+ * query for all rows of a table and then iterates over the result. There is no support for
+ * progress reporting because the source is used only in situations where the entire table must be
+ * read by each worker (i.e. the source is used as a side input).
+ */
+public class BigQuerySource extends Source<TableRow> {
+  private static final Logger LOG =
+      Logger.getLogger(BigQuerySource.class.getName());
+
+  final TableReference tableRef;
+  final BigQueryOptions bigQueryOptions;
+  final Bigquery bigQueryClient;
+
+  /** Builds a BigQuery source using pipeline options to instantiate a Bigquery client. */
+  public BigQuerySource(BigQueryOptions bigQueryOptions, TableReference tableRef) {
+    // Save pipeline options so that we can construct the BigQuery client on-demand whenever an
+    // iterator gets created.
+    this.bigQueryOptions = bigQueryOptions;
+    this.tableRef = tableRef;
+    this.bigQueryClient = null;
+  }
+
+  /** Builds a BigQuerySource directly using a BigQuery client. */
+  public BigQuerySource(Bigquery bigQueryClient, TableReference tableRef) {
+    this.bigQueryOptions = null;
+    this.tableRef = tableRef;
+    this.bigQueryClient = bigQueryClient;
+  }
+
+  @Override
+  public SourceIterator<TableRow> iterator() throws IOException {
+    return new BigQuerySourceIterator(
+        bigQueryClient != null
+            ? bigQueryClient
+            : Transport.newBigQueryClient(bigQueryOptions).build(),
+        tableRef);
+  }
+
+  /**
+   * A SourceIterator that yields TableRow objects for each row of a BigQuery table.
+   */
+  class BigQuerySourceIterator extends AbstractSourceIterator<TableRow> {
+
+    private BigQueryTableRowIterator rowIterator;
+
+    public BigQuerySourceIterator(Bigquery bigQueryClient, TableReference tableRef) {
+      rowIterator =  new BigQueryTableRowIterator(bigQueryClient, tableRef);
+    }
+
+    @Override
+    public boolean hasNext() {
+      return rowIterator.hasNext();
+    }
+
+    @Override
+    public TableRow next() throws IOException {
+      if (!hasNext()) {
+        throw new NoSuchElementException();
+      }
+      return rowIterator.next();
+    }
+
+    @Override
+    public Progress getProgress() {
+      // For now reporting progress is not supported because this source is used only when
+      // an entire table needs to be read by each worker (used as a side input for instance).
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public Position updateStopPosition(Progress proposedStopPosition) {
+      // For now updating the stop position is not supported because this source
+      // is used only when an entire table needs to be read by each worker (used
+      // as a side input for instance).
+      checkNotNull(proposedStopPosition);
+      throw new UnsupportedOperationException();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySourceFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySourceFactory.java
new file mode 100644
index 0000000000000..682b7faa1400b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySourceFactory.java
@@ -0,0 +1,46 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+
+/**
+ * Creates a BigQuerySource from a {@link CloudObject} spec.
+ */
+public class BigQuerySourceFactory {
+  // Do not instantiate.
+  private BigQuerySourceFactory() {}
+
+  public static BigQuerySource create(
+      PipelineOptions options, CloudObject spec, Coder<?> coder,
+      ExecutionContext executionContext) throws Exception {
+    return new BigQuerySource(
+        options.as(BigQueryOptions.class),
+        new TableReference()
+            .setProjectId(getString(spec, PropertyNames.BIGQUERY_PROJECT))
+            .setDatasetId(getString(spec, PropertyNames.BIGQUERY_DATASET))
+            .setTableId(getString(spec, PropertyNames.BIGQUERY_TABLE)));
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
new file mode 100644
index 0000000000000..881f61b730207
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
@@ -0,0 +1,95 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.api.client.util.Base64.decodeBase64;
+import static com.google.api.client.util.Base64.encodeBase64URLSafeString;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.util.common.worker.ShufflePosition;
+import com.google.common.primitives.UnsignedBytes;
+
+import java.util.Arrays;
+
+/**
+ * Represents a ShufflePosition as an array of bytes.
+ */
+public class ByteArrayShufflePosition implements Comparable, ShufflePosition {
+  private final byte[] position;
+
+  public ByteArrayShufflePosition(byte[] position) {
+    this.position = position;
+  }
+
+  public static ByteArrayShufflePosition fromBase64(String position) {
+    return ByteArrayShufflePosition.of(decodeBase64(position));
+  }
+
+  public static ByteArrayShufflePosition of(byte[] position) {
+    if (position == null) {
+      return null;
+    }
+    return new ByteArrayShufflePosition(position);
+  }
+
+  public static byte[] getPosition(ShufflePosition shufflePosition) {
+    if (shufflePosition == null) {
+      return null;
+    }
+    Preconditions.checkArgument(
+        shufflePosition instanceof ByteArrayShufflePosition);
+    ByteArrayShufflePosition adapter = (ByteArrayShufflePosition) shufflePosition;
+    return adapter.getPosition();
+  }
+
+  public byte[] getPosition() { return position; }
+
+  public String encodeBase64() {
+    return encodeBase64URLSafeString(position);
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o instanceof ByteArrayShufflePosition) {
+      ByteArrayShufflePosition that = (ByteArrayShufflePosition) o;
+      return Arrays.equals(this.position, that.position);
+    }
+    return false;
+  }
+
+  @Override
+  public int hashCode() {
+    return Arrays.hashCode(position);
+  }
+
+  @Override
+  public String toString() {
+    return "ShufflePosition(" + (new String(position)) + ")";
+  }
+
+  @Override
+  public int compareTo(Object o) {
+    if (this == o) {
+      return 0;
+    }
+    return UnsignedBytes.lexicographicalComparator().compare(
+        position, ((ByteArrayShufflePosition) o).position);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java
new file mode 100644
index 0000000000000..6f746ffec8c59
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java
@@ -0,0 +1,97 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleBatchReader;
+import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
+import com.google.cloud.dataflow.sdk.util.common.worker.ShufflePosition;
+import com.google.common.io.ByteStreams;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+
+import javax.annotation.Nullable;
+
+/**
+ * ChunkingShuffleBatchReader reads data from a shuffle dataset using a
+ * ShuffleReader.
+ */
+final class ChunkingShuffleBatchReader implements ShuffleBatchReader {
+  private ShuffleReader reader;
+
+  /**
+   * @param reader used to read from a shuffle dataset
+   */
+  public ChunkingShuffleBatchReader(ShuffleReader reader) throws IOException {
+    this.reader = reader;
+  }
+
+  @Override
+  public ShuffleBatchReader.Batch read(
+      @Nullable ShufflePosition startShufflePosition,
+      @Nullable ShufflePosition endShufflePosition) throws IOException {
+    @Nullable byte[] startPosition =
+        ByteArrayShufflePosition.getPosition(startShufflePosition);
+    @Nullable byte[] endPosition =
+        ByteArrayShufflePosition.getPosition(endShufflePosition);
+
+    ShuffleReader.ReadChunkResult result =
+        reader.readIncludingPosition(startPosition, endPosition);
+    InputStream input = new ByteArrayInputStream(result.chunk);
+    ArrayList<ShuffleEntry> entries = new ArrayList<>();
+    while (input.available() > 0) {
+      entries.add(getShuffleEntry(input));
+    }
+    return new Batch(entries, result.nextStartPosition == null ? null
+        : ByteArrayShufflePosition.of(result.nextStartPosition));
+  }
+
+  /**
+   * Extracts a ShuffleEntry by parsing bytes from a given InputStream.
+   *
+   * @param input stream to read from
+   * @return parsed ShuffleEntry
+   */
+  static ShuffleEntry getShuffleEntry(InputStream input) throws IOException {
+    byte[] position = getFixedLengthPrefixedByteArray(input);
+    byte[] key = getFixedLengthPrefixedByteArray(input);
+    byte[] skey = getFixedLengthPrefixedByteArray(input);
+    byte[] value = getFixedLengthPrefixedByteArray(input);
+    return new ShuffleEntry(position, key, skey, value);
+  }
+
+  /**
+   * Extracts a length-prefix-encoded byte array from a given InputStream.
+   *
+   * @param input stream to read from
+   * @return parsed byte array
+   */
+  static byte[] getFixedLengthPrefixedByteArray(InputStream input)
+      throws IOException {
+    DataInputStream dataInputStream = new DataInputStream(input);
+    int length = dataInputStream.readInt();
+    if (length < 0) {
+      throw new IOException("invalid length: " + length);
+    }
+    byte[] data = new byte[(int) length];
+    ByteStreams.readFully(dataInputStream, data);
+    return data;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleEntryWriter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleEntryWriter.java
new file mode 100644
index 0000000000000..9c55c181aebfe
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleEntryWriter.java
@@ -0,0 +1,87 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.api.client.util.Preconditions.checkNotNull;
+
+import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+import javax.annotation.concurrent.NotThreadSafe;
+
+/**
+ * ChunkingShuffleEntryWriter buffers ShuffleEntries and writes them
+ * in batches to a shuffle dataset using a given writer.
+ */
+@NotThreadSafe
+final class ChunkingShuffleEntryWriter implements ShuffleEntryWriter {
+  // Approximate maximum size of a chunk in bytes.
+  private static final int MAX_CHUNK_SIZE = 1 << 20;
+
+  private static final byte[] EMPTY_BYTES = new byte[0];
+
+  private ByteArrayOutputStream chunk = new ByteArrayOutputStream();
+
+  private final ShuffleWriter writer;
+
+  /**
+   * @param writer used to write chunks created by this writer
+   */
+  public ChunkingShuffleEntryWriter(ShuffleWriter writer) {
+    this.writer = checkNotNull(writer);
+  }
+
+  @Override
+  public long put(ShuffleEntry entry) throws IOException {
+    if (chunk.size() >= MAX_CHUNK_SIZE) {
+      writeChunk();
+    }
+
+    DataOutputStream output = new DataOutputStream(chunk);
+    return putFixedLengthPrefixedByteArray(entry.getKey(), output)
+        + putFixedLengthPrefixedByteArray(entry.getSecondaryKey(), output)
+        + putFixedLengthPrefixedByteArray(entry.getValue(), output);
+  }
+
+  @Override
+  public void close() throws IOException {
+    writeChunk();
+    writer.close();
+  }
+
+  private void writeChunk() throws IOException {
+    if (chunk.size() > 0) {
+      writer.write(chunk.toByteArray());
+      chunk.reset();
+    }
+  }
+
+  static int putFixedLengthPrefixedByteArray(byte[] data,
+                                             DataOutputStream output)
+      throws IOException {
+    if (data == null) {
+      data = EMPTY_BYTES;
+    }
+    int bytesWritten = output.size();
+    output.writeInt(data.length);
+    output.write(data, 0, data.length);
+    return output.size() - bytesWritten;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
new file mode 100644
index 0000000000000..16230571fae12
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
@@ -0,0 +1,219 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.getBytes;
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+
+import com.google.api.services.dataflow.model.MultiOutputInfo;
+import com.google.api.services.dataflow.model.SideInputInfo;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PTuple;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.base.Preconditions;
+
+import java.util.Arrays;
+import java.util.List;
+
+import javax.annotation.Nullable;
+
+/**
+ * A wrapper around a decoded user value combining function.
+ */
+public class CombineValuesFn extends NormalParDoFn {
+  /**
+   * The optimizer may split run the user combiner in 3 separate
+   * phases (ADD, MERGE, and EXTRACT), on separate VMs, as it sees
+   * fit. The CombinerPhase dictates which DoFn is actually running in
+   * the worker.
+   *
+   * TODO: These strings are part of the service definition, and
+   * should be added into the definition of the ParDoInstruction,
+   * but the protiary definitions don't allow for enums yet.
+   */
+  public static class CombinePhase {
+    public static final String ALL = "all";
+    public static final String ADD = "add";
+    public static final String MERGE = "merge";
+    public static final String EXTRACT = "extract";
+  }
+
+  public static CombineValuesFn create(
+      PipelineOptions options,
+      CloudObject cloudUserFn,
+      String stepName,
+      @Nullable List<SideInputInfo> sideInputInfos,
+      @Nullable List<MultiOutputInfo> multiOutputInfos,
+      Integer numOutputs,
+      ExecutionContext executionContext,
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler /* unused */)
+      throws Exception {
+    Object deserializedFn =
+        SerializableUtils.deserializeFromByteArray(
+            getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
+            "serialized user fn");
+    Preconditions.checkArgument(
+        deserializedFn instanceof Combine.KeyedCombineFn);
+    Combine.KeyedCombineFn combineFn = (Combine.KeyedCombineFn) deserializedFn;
+
+    // Get the combine phase, default to ALL. (The implementation
+    // doesn't have to split the combiner).
+    String phase = getString(cloudUserFn, PropertyNames.PHASE, CombinePhase.ALL);
+
+    Preconditions.checkArgument(
+        sideInputInfos == null || sideInputInfos.size() == 0,
+        "unexpected side inputs for CombineValuesFn");
+    Preconditions.checkArgument(
+        numOutputs == 1, "expected exactly one output for CombineValuesFn");
+
+    DoFn doFn = null;
+    switch (phase) {
+      case CombinePhase.ALL:
+        doFn = new CombineValuesDoFn(combineFn);
+        break;
+      case CombinePhase.ADD:
+        doFn = new AddInputsDoFn(combineFn);
+        break;
+      case CombinePhase.MERGE:
+        doFn = new MergeAccumulatorsDoFn(combineFn);
+        break;
+      case CombinePhase.EXTRACT:
+        doFn = new ExtractOutputDoFn(combineFn);
+        break;
+      default:
+        throw new IllegalArgumentException(
+            "phase must be one of 'all', 'add', 'merge', 'extract'");
+    }
+    return new CombineValuesFn(options, doFn, stepName, executionContext, addCounterMutator);
+  }
+
+  private CombineValuesFn(
+      PipelineOptions options,
+      DoFn doFn,
+      String stepName,
+      ExecutionContext executionContext,
+      CounterSet.AddCounterMutator addCounterMutator) {
+    super(
+        options,
+        doFn,
+        PTuple.empty(),
+        Arrays.asList("output"),
+        stepName,
+        executionContext,
+        addCounterMutator);
+  }
+
+  /**
+   * The ALL phase is the unsplit combiner, in case combiner lifting
+   * is disabled or the optimizer chose not to lift this combiner.
+   */
+  private static class CombineValuesDoFn<K, VI, VO>
+      extends DoFn<KV<K, Iterable<VI>>, KV<K, VO>>{
+    private final Combine.KeyedCombineFn<K, VI, ?, VO> combineFn;
+
+    private CombineValuesDoFn(
+        Combine.KeyedCombineFn<K, VI, ?, VO> combineFn) {
+      this.combineFn = combineFn;
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      KV<K, Iterable<VI>> kv = (KV<K, Iterable<VI>>) c.element();
+      K key = (K) kv.getKey();
+
+      c.output(KV.of(key, this.combineFn.apply(key, kv.getValue())));
+    }
+  }
+
+  /**
+   * ADD phase: KV<K, Iterable<VI>> -> KV<K, VA>
+   */
+  private static class AddInputsDoFn<K, VI, VA>
+      extends DoFn<KV<K, Iterable<VI>>, KV<K, VA>>{
+    private final Combine.KeyedCombineFn<K, VI, VA, ?> combineFn;
+
+    private AddInputsDoFn(
+        Combine.KeyedCombineFn<K, VI, VA, ?> combineFn) {
+      this.combineFn = combineFn;
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      KV<K, Iterable<VI>> kv = (KV<K, Iterable<VI>>) c.element();
+      K key = kv.getKey();
+      VA accum = this.combineFn.createAccumulator(key);
+      for (VI input : kv.getValue()) {
+        this.combineFn.addInput(key, accum, input);
+      }
+
+      c.output(KV.of(key, accum));
+    }
+  }
+
+  /**
+   * MERGE phase: KV<K, Iterable<VA>> -> KV<K, VA>
+   */
+  private static class MergeAccumulatorsDoFn<K, VA>
+      extends DoFn<KV<K, Iterable<VA>>, KV<K, VA>>{
+    private final Combine.KeyedCombineFn<K, ?, VA, ?> combineFn;
+
+    private MergeAccumulatorsDoFn(
+        Combine.KeyedCombineFn<K, ?, VA, ?> combineFn) {
+      this.combineFn = combineFn;
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      KV<K, Iterable<VA>> kv = (KV<K, Iterable<VA>>) c.element();
+      K key = kv.getKey();
+      VA accum = this.combineFn.mergeAccumulators(key, kv.getValue());
+
+      c.output(KV.of(key, accum));
+    }
+  }
+
+  /**
+   * EXTRACT phase: KV<K, Iterable<VA>> -> KV<K, VA>
+   */
+  private static class ExtractOutputDoFn<K, VA, VO>
+      extends DoFn<KV<K, VA>, KV<K, VO>>{
+    private final Combine.KeyedCombineFn<K, ?, VA, VO> combineFn;
+
+    private ExtractOutputDoFn(
+        Combine.KeyedCombineFn<K, ?, VA, VO> combineFn) {
+      this.combineFn = combineFn;
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      KV<K, VA> kv = (KV<K, VA>) c.element();
+      K key = kv.getKey();
+      VO output = this.combineFn.extractOutput(key, kv.getValue());
+
+      c.output(KV.of(key, output));
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannel.java
new file mode 100644
index 0000000000000..660b374665572
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannel.java
@@ -0,0 +1,270 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.api.client.util.Preconditions.checkNotNull;
+import static com.google.api.client.util.Preconditions.checkState;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.ClosedChannelException;
+import java.nio.channels.SeekableByteChannel;
+
+import javax.annotation.concurrent.GuardedBy;
+
+/**
+ * A {@link SeekableByteChannel} that adds copy semantics.
+ *
+ * <p>This implementation uses a lock to ensure that only one thread accesses
+ * the underlying {@code SeekableByteChannel} at any given time.
+ *
+ * <p>{@link SeekableByteChannel#close} is called on the underlying channel once
+ * all {@code CopyableSeekableByteChannel} objects copied from the initial
+ * {@code CopyableSeekableByteChannel} are closed.
+ *
+ * <p>The implementation keeps track of the position of each
+ * {@code CopyableSeekableByteChannel}; on access, it synchronizes with the
+ * other {@code CopyableSeekableByteChannel} instances accessing the underlying
+ * channel, seeks to its own position, performs the operation, updates its local
+ * position, and returns the result.
+ */
+final class CopyableSeekableByteChannel implements SeekableByteChannel {
+  /** This particular stream's position in the base stream. */
+  private long pos;
+
+  /**
+   * The synchronization object keeping track of the base
+   * {@link SeekableByteChannel}, its reference count, and its current position.
+   * This also doubles as the lock shared by all
+   * {@link CopyableSeekableByteChannel} instances derived from some original
+   * instance.
+   */
+  private final Sync sync;
+
+  /**
+   * Indicates whether this {@link CopyableSeekableByteChannel} is closed.
+   *
+   * <p>Invariant: Unclosed channels own a reference to the base channel,
+   * allowing us to make {@link #close} idempotent.
+   *
+   * <p>This is only modified under the sync lock.
+   */
+  private boolean closed;
+
+  /**
+   * Constructs a new {@link CopyableSeekableByteChannel}.  The supplied base
+   * channel will be closed when this channel and all derived channels are
+   * closed.
+   */
+  public CopyableSeekableByteChannel(SeekableByteChannel base) throws IOException {
+    this(new Sync(base), 0);
+
+    // Update the position to match the original stream's position.
+    //
+    // This doesn't actually need to be synchronized, but it's a little more
+    // obviously correct to always access sync.position while holding sync's
+    // internal monitor.
+    synchronized (sync) {
+      sync.position = base.position();
+      pos = sync.position;
+    }
+  }
+
+  /**
+   * The internal constructor used when deriving a new
+   * {@link CopyableSeekableByteChannel}.
+   *
+   * <p>N.B. This signature is deliberately incompatible with the public
+   * constructor.
+   *
+   * <p>Ordinarily, one would implement copy using a copy constructor, and pass
+   * the object being copied -- but that signature would be compatible with the
+   * public constructor creating a new set of
+   * {@code CopyableSeekableByteChannel} objects for some base channel.  The
+   * copy constructor would still be the one called, since its type is more
+   * specific, but that's fragile; it'd be easy to tweak the signature of the
+   * constructor used for copies without changing callers, which would silently
+   * fall back to using the public constructor.  So instead, we're careful to
+   * give this internal constructor its own unique signature.
+   */
+  private CopyableSeekableByteChannel(Sync sync, long pos) {
+    this.sync = checkNotNull(sync);
+    checkState(sync.base.isOpen(),
+        "the base SeekableByteChannel is not open");
+    synchronized (sync) {
+      sync.refCount++;
+    }
+    this.pos = pos;
+    this.closed = false;
+  }
+
+  /**
+   * Creates a new {@link CopyableSeekableByteChannel} derived from an existing
+   * channel, referencing the same base channel.
+   */
+  public CopyableSeekableByteChannel copy() throws IOException {
+    synchronized (sync) {
+      if (closed) {
+        throw new ClosedChannelException();
+      }
+      return new CopyableSeekableByteChannel(sync, pos);
+    }
+  }
+
+  // SeekableByteChannel implementation
+
+  @Override
+  public long position() throws IOException {
+    synchronized (sync) {
+      if (closed) {
+        throw new ClosedChannelException();
+      }
+      return pos;
+    }
+  }
+
+  @Override
+  public CopyableSeekableByteChannel position(long newPosition)
+      throws IOException {
+    synchronized (sync) {
+      if (closed) {
+        throw new ClosedChannelException();
+      }
+      // Verify that the position is valid for the base channel.
+      sync.base.position(newPosition);
+      this.pos = newPosition;
+      this.sync.position = newPosition;
+    }
+    return this;
+  }
+
+  @Override
+  public int read(ByteBuffer dst) throws IOException {
+    synchronized (sync) {
+      if (closed) {
+        throw new ClosedChannelException();
+      }
+      reposition();
+      int bytesRead = sync.base.read(dst);
+      notePositionAdded(bytesRead);
+      return bytesRead;
+    }
+  }
+
+  @Override
+  public long size() throws IOException {
+    synchronized (sync) {
+      if (closed) {
+        throw new ClosedChannelException();
+      }
+      return sync.base.size();
+    }
+  }
+
+  @Override
+  public CopyableSeekableByteChannel truncate(long size) throws IOException {
+    synchronized (sync) {
+      if (closed) {
+        throw new ClosedChannelException();
+      }
+      sync.base.truncate(size);
+      return this;
+    }
+  }
+
+  @Override
+  public int write(ByteBuffer src) throws IOException {
+    synchronized (sync) {
+      if (closed) {
+        throw new ClosedChannelException();
+      }
+      reposition();
+      int bytesWritten = sync.base.write(src);
+      notePositionAdded(bytesWritten);
+      return bytesWritten;
+    }
+  }
+
+  @Override
+  public boolean isOpen() {
+    synchronized (sync) {
+      if (closed) {
+        return false;
+      }
+      return sync.base.isOpen();
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    synchronized (sync) {
+      if (closed) {
+        return;
+      }
+      closed = true;
+      sync.refCount--;
+      if (sync.refCount == 0) {
+        sync.base.close();
+      }
+    }
+  }
+
+  /**
+   * Updates the base stream's position to match the position required by this
+   * {@link CopyableSeekableByteChannel}.
+   */
+  @GuardedBy("sync")
+  private void reposition() throws IOException {
+    if (pos != sync.position) {
+      sync.base.position(pos);
+      sync.position = pos;
+    }
+  }
+
+  /**
+   * Notes that the specified amount has been logically added to the current
+   * stream's position.
+   */
+  @GuardedBy("sync")
+  private void notePositionAdded(int amount) {
+    if (amount < 0) {
+      return;  // Handles EOF indicators.
+    }
+    pos += amount;
+    sync.position += amount;
+  }
+
+  /**
+   * A simple value type used to synchronize a set of
+   * {@link CopyableSeekableByteChannel} instances referencing a single
+   * underlying channel.
+   */
+  private static final class Sync {
+    // N.B. Another way to do this would be to implement something like a
+    // RefcountingForwardingSeekableByteChannel.  Doing so would have the
+    // advantage of clearly isolating the mutable state, at the cost of a lot
+    // more code.
+    public final SeekableByteChannel base;
+    @GuardedBy("this") public long refCount = 0;
+    @GuardedBy("this") public long position = 0;
+
+    public Sync(SeekableByteChannel base) throws IOException {
+      this.base = checkNotNull(base);
+      position = base.position();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CustomSourceFormatFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CustomSourceFormatFactory.java
new file mode 100644
index 0000000000000..1bb3db228a730
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CustomSourceFormatFactory.java
@@ -0,0 +1,47 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+
+import com.google.api.services.dataflow.model.Source;
+import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.worker.CustomSourceFormat;
+
+import java.util.Map;
+
+/**
+ * Creates {@code CustomSourceFormat} objects from {@code Source}.
+ */
+public class CustomSourceFormatFactory {
+  private CustomSourceFormatFactory() {}
+
+  public static CustomSourceFormat create(Source source) throws Exception {
+    Map<String, Object> spec = source.getSpec();
+
+    try {
+      return InstanceBuilder.ofType(CustomSourceFormat.class)
+          .fromClassName(getString(spec, PropertyNames.OBJECT_TYPE_NAME))
+          .build();
+
+    } catch (ClassNotFoundException exn) {
+      throw new Exception(
+          "unable to create a custom source format from " + source, exn);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
new file mode 100644
index 0000000000000..f2d41cfcbc45d
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
@@ -0,0 +1,121 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.runners.worker.DataflowWorker.buildStatus;
+import static com.google.cloud.dataflow.sdk.runners.worker.DataflowWorker.uniqueId;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
+import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudDuration;
+import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudTime;
+import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudDuration;
+
+import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.WorkItem;
+import com.google.api.services.dataflow.model.WorkItemServiceState;
+import com.google.api.services.dataflow.model.WorkItemStatus;
+import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
+import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
+import com.google.cloud.dataflow.sdk.util.common.worker.WorkProgressUpdater;
+
+import org.joda.time.Duration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.annotation.concurrent.NotThreadSafe;
+
+/**
+ * DataflowWorkProgressUpdater implements the WorkProgressUpdater
+ * interface for the Cloud Dataflow system.
+ */
+@NotThreadSafe
+public class DataflowWorkProgressUpdater extends WorkProgressUpdater {
+  private static final Logger LOG = LoggerFactory.getLogger(DataflowWorkProgressUpdater.class);
+
+  /** The Dataflow Worker WorkItem client */
+  private final DataflowWorker.WorkUnitClient workUnitClient;
+
+  /** The WorkItem for which work progress updates are sent. */
+  private final WorkItem workItem;
+
+  /** Options specifying information about the pipeline run by the worker.*/
+  private final DataflowWorkerHarnessOptions options;
+
+  public DataflowWorkProgressUpdater(
+      WorkItem workItem,
+      WorkExecutor worker,
+      DataflowWorker.WorkUnitClient workUnitClient,
+      DataflowWorkerHarnessOptions options) {
+    super(worker);
+    this.workItem = workItem;
+    this.workUnitClient = workUnitClient;
+    this.options = options;
+  }
+
+  @Override
+  protected String workString() {
+    return uniqueId(workItem);
+  }
+
+  @Override
+  protected long getWorkUnitLeaseExpirationTimestamp() {
+    return getLeaseExpirationTimestamp(workItem);
+  }
+
+  @Override
+  protected void reportProgressHelper() throws Exception {
+    WorkItemStatus status = buildStatus(
+        workItem, false /*completed*/,
+        worker.getOutputCounters(), worker.getOutputMetrics(), options,
+        worker.getWorkerProgress(), stopPositionToService,
+        null /*sourceOperationResponse*/, null /*errors*/);
+    status.setRequestedLeaseDuration(toCloudDuration(Duration.millis(requestedLeaseDurationMs)));
+
+    WorkItemServiceState result = workUnitClient.reportWorkItemStatus(status);
+    if (result != null) {
+      // Resets state after a successful progress report.
+      stopPositionToService = null;
+
+      progressReportIntervalMs = nextProgressReportInterval(
+          fromCloudDuration(workItem.getReportStatusInterval()).getMillis(),
+          leaseRemainingTime(getLeaseExpirationTimestamp(result)));
+
+      ApproximateProgress suggestedStopPoint = result.getSuggestedStopPoint();
+      if (suggestedStopPoint == null && result.getSuggestedStopPosition() != null) {
+        suggestedStopPoint = new ApproximateProgress()
+            .setPosition(result.getSuggestedStopPosition());
+      }
+
+      if (suggestedStopPoint != null) {
+        LOG.info("Proposing stop progress on work unit {} at proposed stopping point {}",
+            workString(), suggestedStopPoint);
+        stopPositionToService =
+                 worker.proposeStopPosition(
+                     cloudProgressToSourceProgress(suggestedStopPoint));
+      }
+    }
+  }
+
+  /** Returns the given work unit's lease expiration timestamp. */
+  private long getLeaseExpirationTimestamp(WorkItem workItem) {
+    return fromCloudTime(workItem.getLeaseExpireTime()).getMillis();
+  }
+
+  /** Returns the given work unit service state lease expiration timestamp. */
+  private long getLeaseExpirationTimestamp(WorkItemServiceState workItemServiceState) {
+    return fromCloudTime(workItemServiceState.getLeaseExpireTime()).getMillis();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
new file mode 100644
index 0000000000000..5175d15aa882e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -0,0 +1,330 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceOperationResponseToSourceOperationResponse;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceOperationResponseToCloudSourceOperationResponse;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+
+import com.google.api.services.dataflow.model.MetricUpdate;
+import com.google.api.services.dataflow.model.Status;
+import com.google.api.services.dataflow.model.WorkItem;
+import com.google.api.services.dataflow.model.WorkItemServiceState;
+import com.google.api.services.dataflow.model.WorkItemStatus;
+import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.CloudCounterUtils;
+import com.google.cloud.dataflow.sdk.util.CloudMetricUtils;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.UserCodeException;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.Metric;
+import com.google.cloud.dataflow.sdk.util.common.worker.CustomSourceFormat;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
+import com.google.cloud.dataflow.sdk.util.common.worker.WorkProgressUpdater;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+
+import javax.annotation.Nullable;
+
+/**
+ * This is a semi-abstract harness for executing WorkItem tasks in
+ * Java workers. Concrete implementations need to implement a
+ * WorkUnitClient.
+ *
+ * <p>DataflowWorker presents one public interface,
+ * getAndPerformWork(), which uses the WorkUnitClient to get work,
+ * execute it, and update the work.
+ */
+public class DataflowWorker {
+
+  private static final Logger LOG = LoggerFactory.getLogger(DataflowWorker.class);
+
+  /**
+   * A client to get and update work items.
+   */
+  private final WorkUnitClient workUnitClient;
+
+  /**
+   * Pipeline options, initially provided via the constructor and
+   * partially provided via each work work unit.
+   */
+  private final DataflowWorkerHarnessOptions options;
+
+  public DataflowWorker(WorkUnitClient workUnitClient,
+      DataflowWorkerHarnessOptions options) {
+    this.workUnitClient = workUnitClient;
+    this.options = options;
+  }
+
+  /**
+   * Gets WorkItem and performs it; returns true if work was
+   * successfully completed.
+   *
+   * getAndPerformWork may throw if there is a failure of the
+   * WorkUnitClient.
+   */
+  public boolean getAndPerformWork() throws IOException {
+    WorkItem work = workUnitClient.getWorkItem();
+    if (work == null) {
+      return false;
+    }
+    return doWork(work);
+  }
+
+  /**
+   * Performs the given work; returns true if successful.
+   *
+   * @throws IOException Only if the WorkUnitClient fails.
+   */
+  private boolean doWork(WorkItem workItem) throws IOException {
+    LOG.info("Executing: {}", workItem);
+
+    WorkExecutor worker = null;
+    try {
+      // Populate PipelineOptions with data from work unit.
+      options.setProject(workItem.getProjectId());
+
+      ExecutionContext executionContext = new BatchModeExecutionContext();
+
+      if (workItem.getMapTask() != null) {
+        worker = MapTaskExecutorFactory.create(options,
+                                               workItem.getMapTask(),
+                                               executionContext);
+
+      } else if (workItem.getSourceOperationTask() != null) {
+        worker = SourceOperationExecutorFactory.create(
+            workItem.getSourceOperationTask());
+
+      } else {
+        throw new RuntimeException("unknown kind of work item: " + workItem.toString());
+      }
+
+      WorkProgressUpdater progressUpdater = new DataflowWorkProgressUpdater(
+          workItem, worker, workUnitClient, options);
+      progressUpdater.startReportingProgress();
+
+      // Blocks while executing the work.
+      // TODO: refactor to allow multiple work unit
+      // processing threads.
+      worker.execute();
+
+      // Log all counter values for debugging purposes.
+      CounterSet counters = worker.getOutputCounters();
+      for (Counter counter : counters) {
+        LOG.info("COUNTER {}.", counter);
+      }
+
+      // Log all metrics for debugging purposes.
+      Collection<Metric<?>> metrics = worker.getOutputMetrics();
+      for (Metric<?> metric : metrics) {
+        LOG.info("METRIC {}: {}", metric.getName(), metric.getValue());
+      }
+
+      // stopReportingProgress can throw an exception if the final progress
+      // update fails. For correctness, the task must then be marked as failed.
+      progressUpdater.stopReportingProgress();
+
+      // Report job success.
+
+      // TODO: Find out a generic way for the WorkExecutor to report work-specific results
+      // into the work update.
+      CustomSourceFormat.SourceOperationResponse sourceOperationResponse =
+          (worker instanceof SourceOperationExecutor)
+              ? cloudSourceOperationResponseToSourceOperationResponse(
+              ((SourceOperationExecutor) worker).getResponse())
+              : null;
+      reportStatus(options, "Success", workItem, counters, metrics, sourceOperationResponse,
+          null /*errors*/);
+
+      return true;
+
+    } catch (Throwable e) {
+      handleWorkError(workItem, worker, e);
+      return false;
+
+    } finally {
+      if (worker != null) {
+        try {
+          worker.close();
+        } catch (Exception exn) {
+          LOG.warn("Uncaught exception occurred during work unit shutdown:", exn);
+        }
+      }
+    }
+  }
+
+  /** Handles the exception thrown when reading and executing the work. */
+  private void handleWorkError(
+      WorkItem workItem, WorkExecutor worker, Throwable e)
+      throws IOException {
+    LOG.warn("Uncaught exception occurred during work unit execution:", e);
+
+    // TODO: Look into moving the stack trace thinning
+    // into the client.
+    Throwable t = e instanceof UserCodeException ? e.getCause() : e;
+    Status error = new Status();
+    error.setCode(2);  // Code.UNKNOWN.  TODO: Replace with a generated definition.
+    // TODO: Attach the stack trace as exception details, not to the message.
+    error.setMessage(buildCloudStackTrace(t));
+
+    reportStatus(options, "Failure", workItem,
+        worker == null ? null : worker.getOutputCounters(),
+        worker == null ? null : worker.getOutputMetrics(),
+        null /*sourceOperationResponse*/,
+        error == null ? null : Collections.singletonList(error));
+  }
+
+  /**
+   * Recursively goes through an exception, pulling out the stack trace. If the
+   * exception is a chained exception, it recursively goes through any causes
+   * and appends them to the stack trace.
+   */
+  private static String buildCloudStackTrace(Throwable t) {
+    StringWriter result = new StringWriter();
+    PrintWriter printResult = new PrintWriter(result);
+
+    printResult.print("Exception: ");
+    for (;;) {
+      printResult.println(t.toString());
+      for (StackTraceElement frame : t.getStackTrace()) {
+        printResult.println(frame.toString());
+      }
+      t = t.getCause();
+      if (t == null) {
+        break;
+      }
+      printResult.print("Caused by: ");
+    }
+    return result.toString();
+  }
+
+  private void reportStatus(DataflowWorkerHarnessOptions options,
+                            String status,
+                            WorkItem workItem,
+                            @Nullable CounterSet counters,
+                            @Nullable Collection<Metric<?>> metrics,
+                            @Nullable CustomSourceFormat.SourceOperationResponse
+                                sourceOperationResponse,
+                            @Nullable List<Status> errors)
+      throws IOException {
+    LOG.info("{} processing work item {}", status, uniqueId(workItem));
+    WorkItemStatus workItemStatus = buildStatus(workItem, true /*completed*/,
+        counters, metrics, options, null, null, sourceOperationResponse, errors);
+    workUnitClient.reportWorkItemStatus(workItemStatus);
+  }
+
+  static WorkItemStatus buildStatus(
+      WorkItem workItem,
+      boolean completed,
+      @Nullable CounterSet counters,
+      @Nullable Collection<Metric<?>> metrics,
+      DataflowWorkerHarnessOptions options,
+      @Nullable Source.Progress progress,
+      @Nullable Source.Position stopPosition,
+      @Nullable CustomSourceFormat.SourceOperationResponse sourceOperationResponse,
+      @Nullable List<Status> errors) {
+    WorkItemStatus status = new WorkItemStatus();
+    status.setWorkItemId(Long.toString(workItem.getId()));
+    status.setCompleted(completed);
+
+    List<MetricUpdate> counterUpdates = null;
+    List<MetricUpdate> metricUpdates = null;
+
+    if (counters != null) {
+      // Currently we lack a reliable exactly-once delivery mechanism for
+      // work updates, i.e. they can be retried or reordered, so sending
+      // delta updates could lead to double-counted or missed contributions.
+      // However, delta updates may be beneficial for performance.
+      // TODO: Implement exactly-once delivery and use deltas,
+      // if it ever becomes clear that deltas are necessary for performance.
+      boolean delta = false;
+      counterUpdates = CloudCounterUtils.extractCounters(counters, delta);
+    }
+    if (metrics != null) {
+      metricUpdates = CloudMetricUtils.extractCloudMetrics(metrics, options.getWorkerId());
+    }
+    List<MetricUpdate> updates = null;
+    if (counterUpdates == null) {
+      updates = metricUpdates;
+    } else if (metrics == null) {
+      updates = counterUpdates;
+    } else {
+      updates = new ArrayList<>();
+      updates.addAll(counterUpdates);
+      updates.addAll(metricUpdates);
+    }
+    status.setMetricUpdates(updates);
+
+    // TODO: Provide more structure representation of error,
+    // e.g., the serialized exception object.
+    if (errors != null) {
+      status.setErrors(errors);
+    }
+
+    if (progress != null) {
+      status.setProgress(sourceProgressToCloudProgress(progress));
+    }
+    if (stopPosition != null) {
+      status.setStopPosition(sourcePositionToCloudPosition(stopPosition));
+    }
+
+    if (workItem.getSourceOperationTask() != null) {
+      status.setSourceOperationResponse(
+          sourceOperationResponseToCloudSourceOperationResponse(sourceOperationResponse));
+    }
+
+    return status;
+  }
+
+  static String uniqueId(WorkItem work) {
+    return work.getProjectId() + ";" + work.getJobId() + ";" + work.getId();
+  }
+
+  /**
+   * Abstract base class describing a client for WorkItem work units.
+   */
+  public abstract static class WorkUnitClient {
+    /**
+     * Returns a new WorkItem unit for this Worker to work on or null
+     * if no work item is available.
+     */
+    public abstract WorkItem getWorkItem() throws IOException;
+
+    /**
+     * Reports a {@link WorkItemStatus} for an assigned {@link WorkItem}.
+     *
+     * @param workItemStatus the status to report
+     * @return a {@link WorkServiceState} (e.g. a new stop position)
+     */
+    public abstract WorkItemServiceState reportWorkItemStatus(
+        WorkItemStatus workItemStatus)
+        throws IOException;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
new file mode 100644
index 0000000000000..fa17cf67390d0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -0,0 +1,231 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudDuration;
+import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudTime;
+
+import com.google.api.client.util.Preconditions;
+import com.google.api.services.dataflow.Dataflow;
+import com.google.api.services.dataflow.model.LeaseWorkItemRequest;
+import com.google.api.services.dataflow.model.LeaseWorkItemResponse;
+import com.google.api.services.dataflow.model.ReportWorkItemStatusRequest;
+import com.google.api.services.dataflow.model.ReportWorkItemStatusResponse;
+import com.google.api.services.dataflow.model.WorkItem;
+import com.google.api.services.dataflow.model.WorkItemServiceState;
+import com.google.api.services.dataflow.model.WorkItemStatus;
+import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingFormatter;
+import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingInitializer;
+import com.google.cloud.dataflow.sdk.util.Credentials;
+import com.google.cloud.dataflow.sdk.util.GcsIOChannelFactory;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.common.collect.ImmutableList;
+
+import org.joda.time.DateTime;
+import org.joda.time.Duration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.slf4j.MDC;
+
+import java.io.IOException;
+import java.lang.Thread.UncaughtExceptionHandler;
+import java.util.Collections;
+import java.util.List;
+
+import javax.annotation.concurrent.ThreadSafe;
+
+/**
+ * This is a harness for executing WorkItem tasks in Java workers.
+ * <p>
+ * The worker fetches WorkItem units from the Dataflow Service.
+ * When the work is complete, the program sends results via the worker service API.
+ * <p>
+ * Returns status code 0 on successful completion, 1 on any uncaught failures.
+ * <p>
+ * TODO: add support for VM initialization via config.
+ * During initialization, we should take a configuration which specifies
+ * an initialization function, allowing user code to run on VM startup.
+ */
+public class DataflowWorkerHarness {
+  private static final Logger LOG = LoggerFactory.getLogger(DataflowWorkerHarness.class);
+
+  private static final String APPLICATION_NAME = "DataflowWorkerHarness";
+
+  /**
+   * This uncaught exception handler logs the {@link Throwable} to the logger, {@link System#err}
+   * and exits the application with status code 1.
+   */
+  static class WorkerUncaughtExceptionHandler implements UncaughtExceptionHandler {
+    static final WorkerUncaughtExceptionHandler INSTANCE = new WorkerUncaughtExceptionHandler();
+
+    @Override
+    public void uncaughtException(Thread t, Throwable e) {
+      LOG.error("Uncaught exception in main thread. Exiting with status code 1.", e);
+      System.err.println("Uncaught exception in main thread. Exiting with status code 1.");
+      e.printStackTrace();
+      System.exit(1);
+    }
+  }
+
+  /**
+   * Fetches and processes work units from the Dataflow service.
+   */
+  public static void main(String[] args) throws Exception {
+    Thread.currentThread().setUncaughtExceptionHandler(WorkerUncaughtExceptionHandler.INSTANCE);
+    new DataflowWorkerLoggingInitializer().initialize();
+
+    DataflowWorker worker = createFromSystemProperties();
+    processWork(worker);
+  }
+
+  // Visible for testing.
+  static void processWork(DataflowWorker worker) throws IOException {
+    worker.getAndPerformWork();
+  }
+
+  static DataflowWorker createFromSystemProperties() {
+    return create(PipelineOptionsFactory.createFromSystemProperties());
+  }
+  
+  static DataflowWorker create(DataflowWorkerHarnessOptions options) {
+    MDC.put(DataflowWorkerLoggingFormatter.MDC_DATAFLOW_JOB_ID, options.getJobId());
+    MDC.put(DataflowWorkerLoggingFormatter.MDC_DATAFLOW_WORKER_ID, options.getWorkerId());
+    options.setAppName(APPLICATION_NAME);
+
+    if (options.getGcpCredential() == null) {
+      try {
+        // Load the worker credential, otherwise the default is to load user
+        // credentials.
+        options.setGcpCredential(Credentials.getWorkerCredential(options));
+        Preconditions.checkState(options.getGcpCredential() != null,
+            "Failed to obtain worker credential");
+      } catch (Throwable e) {
+        LOG.warn("Unable to obtain any valid credentials. Worker inoperable.", e);
+        return null;
+      }
+    }
+
+    // Configure standard IO factories.
+    IOChannelUtils.setIOFactory("gs", new GcsIOChannelFactory(options));
+
+    DataflowWorkUnitClient client = DataflowWorkUnitClient.fromOptions(options);
+    return new DataflowWorker(client, options);
+  }
+
+  /**
+   * A Dataflow WorkUnit client that fetches WorkItems from the Dataflow service.
+   */
+  @ThreadSafe
+  static class DataflowWorkUnitClient extends DataflowWorker.WorkUnitClient {
+    private final Dataflow dataflow;
+    private final DataflowWorkerHarnessOptions options;
+
+    /**
+     * Creates a client that fetches WorkItems from the Dataflow service.
+     *
+     * @param options The pipeline options.
+     * @return A WorkItemClient that fetches WorkItems from the Dataflow service.
+     */
+    static DataflowWorkUnitClient fromOptions(DataflowWorkerHarnessOptions options) {
+      return new DataflowWorkUnitClient(
+          Transport.newDataflowClient(options).build(),
+          options);
+    }
+
+    /**
+     * Package private constructor for testing.
+     */
+    DataflowWorkUnitClient(Dataflow dataflow, DataflowWorkerHarnessOptions options) {
+      this.dataflow = dataflow;
+      this.options = options;
+    }
+
+    /**
+     * Gets a WorkItem from the Dataflow service.
+     */
+    @Override
+    public WorkItem getWorkItem() throws IOException {
+      LeaseWorkItemRequest request = new LeaseWorkItemRequest();
+      request.setFactory(Transport.getJsonFactory());
+      request.setWorkItemTypes(ImmutableList.<String>of(
+          "map_task", "seq_map_task", "remote_source_task"));
+      // All remote sources require the "remote_source" capability. Dataflow's
+      // custom sources are further tagged with the format "custom_source".
+      request.setWorkerCapabilities(ImmutableList.<String>of(
+          options.getWorkerId(), "remote_source", PropertyNames.CUSTOM_SOURCE_FORMAT));
+      request.setWorkerId(options.getWorkerId());
+      request.setCurrentWorkerTime(toCloudTime(DateTime.now()));
+
+      // This shouldn't be necessary, but a valid cloud duration string is
+      // required by the Google API parsing framework.  TODO: Fix the framework
+      // so that an empty or not-present string can be used as a default value.
+      request.setRequestedLeaseDuration(toCloudDuration(Duration.standardSeconds(60)));
+
+      LOG.debug("Leasing work: {}", request);
+
+      LeaseWorkItemResponse response = dataflow.v1b3().projects().jobs().workItems().lease(
+          options.getProject(), options.getJobId(), request).execute();
+      LOG.debug("Lease work response: {}", response);
+
+      List<WorkItem> workItems = response.getWorkItems();
+      if (workItems == null || workItems.isEmpty()) {
+        // We didn't lease any work
+        return null;
+      } else if (workItems.size() > 1){
+        throw new IOException(
+            "This version of the SDK expects no more than one work item from the service: "
+            + response);
+      }
+
+      WorkItem work = response.getWorkItems().get(0);
+      if (work == null || work.getId() == null) {
+        return null;
+      }
+
+      MDC.put(DataflowWorkerLoggingFormatter.MDC_DATAFLOW_WORK_ID, Long.toString(work.getId()));
+      // Looks like the work's a'ight.
+      return work;
+    }
+
+    @Override
+    public WorkItemServiceState reportWorkItemStatus(WorkItemStatus workItemStatus)
+        throws IOException {
+      workItemStatus.setFactory(Transport.getJsonFactory());
+      LOG.debug("Reporting work status: {}", workItemStatus);
+      ReportWorkItemStatusResponse result =
+          dataflow.v1b3().projects().jobs().workItems().reportStatus(
+              options.getProject(), options.getJobId(),
+              new ReportWorkItemStatusRequest()
+              .setWorkerId(options.getWorkerId())
+              .setWorkItemStatuses(Collections.singletonList(workItemStatus))
+              .setCurrentWorkerTime(toCloudTime(DateTime.now())))
+          .execute();
+      if (result == null || result.getWorkItemServiceStates() == null
+          || result.getWorkItemServiceStates().size() != 1) {
+        throw new IOException(
+            "This version of the SDK expects exactly one work item service state from the service");
+      }
+      WorkItemServiceState state = result.getWorkItemServiceStates().get(0);
+      LOG.debug("ReportWorkItemStatus result: {}", state);
+      return state;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedSource.java
new file mode 100644
index 0000000000000..beea88747c1cf
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedSource.java
@@ -0,0 +1,259 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.api.client.util.Preconditions.checkNotNull;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToSourcePosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+
+import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.ProgressTracker;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PushbackInputStream;
+import java.nio.channels.Channels;
+import java.util.Collection;
+import java.util.NoSuchElementException;
+
+import javax.annotation.Nullable;
+
+/**
+ * Abstract base class for sources that read from files.
+ *
+ * @param <T> the type of the elements read from the source
+ */
+public abstract class FileBasedSource<T> extends Source<T> {
+  protected static final int BUF_SIZE = 200;
+  protected final String filename;
+  @Nullable protected final Long startPosition;
+  @Nullable protected final Long endPosition;
+  protected final Coder<T> coder;
+  protected final boolean useDefaultBufferSize;
+
+  private static final Logger LOG = LoggerFactory.getLogger(FileBasedSource.class);
+
+  protected FileBasedSource(String filename,
+                            @Nullable Long startPosition,
+                            @Nullable Long endPosition,
+                            Coder<T> coder,
+                            boolean useDefaultBufferSize) {
+    this.filename = filename;
+    this.startPosition = startPosition;
+    this.endPosition = endPosition;
+    this.coder = coder;
+    this.useDefaultBufferSize = useDefaultBufferSize;
+  }
+
+  /**
+   * Returns a new iterator for elements in the given range in the
+   * given file.  If the range starts in the middle an element, this
+   * element is skipped as it is considered part of the previous
+   * range; if the last element that starts in the range finishes
+   * beyond the end position, it is still considered part of this
+   * range.  In other words, the start position and the end position
+   * are "rounded up" to element boundaries.
+   *
+   * @param endPosition offset of the end position; null means end-of-file
+   */
+  protected abstract SourceIterator<T> newSourceIteratorForRangeInFile(
+      IOChannelFactory factory, String oneFile, long startPosition,
+      @Nullable Long endPosition)
+      throws IOException;
+
+  /**
+   * Returns a new iterator for elements in the given files.  Caller
+   * must ensure that the file collection is not empty.
+   */
+  protected abstract SourceIterator<T> newSourceIteratorForFiles(
+      IOChannelFactory factory, Collection<String> files) throws IOException;
+
+  @Override
+  public SourceIterator<T> iterator() throws IOException {
+    IOChannelFactory factory = IOChannelUtils.getFactory(filename);
+    Collection<String> inputs = factory.match(filename);
+    if (inputs.isEmpty()) {
+      throw new IOException("No match for file pattern '" + filename + "'");
+    }
+
+    if (startPosition != null || endPosition != null) {
+      if (inputs.size() != 1) {
+        throw new UnsupportedOperationException(
+            "Unable to apply range limits to multiple-input stream: "
+            + filename);
+      }
+
+      return newSourceIteratorForRangeInFile(
+          factory, inputs.iterator().next(),
+          startPosition == null ? 0 : startPosition, endPosition);
+    } else {
+      return newSourceIteratorForFiles(factory, inputs);
+    }
+  }
+
+  /**
+   * Abstract base class for file-based source iterators.
+   */
+  protected abstract class FileBasedIterator extends AbstractSourceIterator<T> {
+    protected final CopyableSeekableByteChannel seeker;
+    protected final PushbackInputStream stream;
+    protected final Long startOffset;
+    protected Long endOffset;
+    protected final ProgressTracker<Integer> tracker;
+    protected ByteArrayOutputStream nextElement;
+    protected boolean nextElementComputed = false;
+    protected long offset;
+
+    FileBasedIterator(CopyableSeekableByteChannel seeker,
+        long startOffset,
+        long offset,
+        @Nullable Long endOffset,
+        ProgressTracker<Integer> tracker) throws IOException {
+      this.seeker = checkNotNull(seeker);
+      this.seeker.position(startOffset);
+      BufferedInputStream bufferedStream = useDefaultBufferSize
+          ? new BufferedInputStream(Channels.newInputStream(seeker))
+          : new BufferedInputStream(Channels.newInputStream(seeker), BUF_SIZE);
+      this.stream = new PushbackInputStream(bufferedStream, BUF_SIZE);
+      this.startOffset = startOffset;
+      this.offset = offset;
+      this.endOffset = endOffset;
+      this.tracker = checkNotNull(tracker);
+    }
+
+    /**
+     * Reads the next element.
+     *
+     * @return a {@code ByteArrayOutputStream} containing the contents
+     *     of the element, or {@code null} if the end of the stream
+     *     has been reached.
+     * @throws IOException if an I/O error occurs
+     */
+    protected abstract ByteArrayOutputStream readElement()
+        throws IOException;
+
+    @Override
+    public boolean hasNext() throws IOException {
+      computeNextElement();
+      return nextElement != null;
+    }
+
+    @Override
+    public T next() throws IOException {
+      advance();
+      return CoderUtils.decodeFromByteArray(coder, nextElement.toByteArray());
+    }
+
+    void advance() throws IOException {
+      computeNextElement();
+      if (nextElement == null) {
+        throw new NoSuchElementException();
+      }
+      nextElementComputed = false;
+    }
+
+    @Override
+    public Progress getProgress() {
+      // Currently we assume that only a offset position is reported as
+      // current progress. Source writer can override this method to update
+      // other metrics, e.g. completion percentage or remaining time.
+      com.google.api.services.dataflow.model.Position currentPosition =
+          new com.google.api.services.dataflow.model.Position();
+      currentPosition.setByteOffset(offset);
+
+      ApproximateProgress progress = new ApproximateProgress();
+      progress.setPosition(currentPosition);
+
+      return cloudProgressToSourceProgress(progress);
+    }
+
+    @Override
+    public Position updateStopPosition(Progress proposedStopPosition) {
+      checkNotNull(proposedStopPosition);
+
+      // Currently we only support stop position in byte offset of
+      // CloudPosition in a file-based Source. If stop position in
+      // other types is proposed, the end position in iterator will
+      // not be updated, and return null.
+      com.google.api.services.dataflow.model.ApproximateProgress stopPosition = 
+          sourceProgressToCloudProgress(proposedStopPosition);
+      if (stopPosition == null) {
+        LOG.warn(
+            "A stop position other than CloudPosition is not supported now.");
+        return null;
+      }
+
+      Long byteOffset = stopPosition.getPosition().getByteOffset();
+      if (byteOffset == null) {
+        LOG.warn(
+            "A stop position other than byte offset is not supported in a "
+            + "file-based Source.");
+        return null;
+      }
+      if (byteOffset <= offset) {
+        // Proposed stop position is not after the current position:
+        // No stop position update.
+        return null;
+      }
+
+      if (endOffset != null && byteOffset >= endOffset) {
+        // Proposed stop position is after the current stop (end) position: No
+        // stop position update.
+        return null;
+      }
+
+      this.endOffset = byteOffset;
+      return cloudPositionToSourcePosition(stopPosition.getPosition());
+    }
+
+    /**
+     * Returns the end offset of the iterator.
+     * The method is called for test ONLY.
+     */
+    Long getEndOffset() {
+      return this.endOffset;
+    }
+
+    @Override
+    public void close() throws IOException {
+      stream.close();
+    }
+
+    private void computeNextElement() throws IOException {
+      if (nextElementComputed) {
+        return;
+      }
+
+      if (endOffset == null || offset < endOffset) {
+        nextElement = readElement();
+      } else {
+        nextElement = null;
+      }
+      nextElementComputed = true;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
new file mode 100644
index 0000000000000..adf0435e6e981
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.getBytes;
+import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
+
+import com.google.api.services.dataflow.model.MultiOutputInfo;
+import com.google.api.services.dataflow.model.SideInputInfo;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PTuple;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.util.Serializer;
+import com.google.cloud.dataflow.sdk.util.StreamingGroupAlsoByWindowsDoFn;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import javax.annotation.Nullable;
+
+/**
+ * A wrapper around a GroupAlsoByWindowsDoFn.  This class is the same as
+ * NormalParDoFn, except that it gets deserialized differently.
+ */
+class GroupAlsoByWindowsParDoFn extends NormalParDoFn {
+  public static GroupAlsoByWindowsParDoFn create(
+      PipelineOptions options,
+      CloudObject cloudUserFn,
+      String stepName,
+      @Nullable List<SideInputInfo> sideInputInfos,
+      @Nullable List<MultiOutputInfo> multiOutputInfos,
+      Integer numOutputs,
+      ExecutionContext executionContext,
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler sampler /* unused */)
+      throws Exception {
+    Object windowingFn =
+        SerializableUtils.deserializeFromByteArray(
+            getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
+            "serialized window fn");
+    if (!(windowingFn instanceof WindowingFn)) {
+      throw new Exception(
+          "unexpected kind of WindowingFn: " + windowingFn.getClass().getName());
+    }
+
+    byte[] serializedCombineFn = getBytes(cloudUserFn, PropertyNames.COMBINE_FN, null);
+    Object combineFn = null;
+    if (serializedCombineFn != null) {
+      combineFn =
+          SerializableUtils.deserializeFromByteArray(serializedCombineFn, "serialized combine fn");
+      if (!(combineFn instanceof KeyedCombineFn)) {
+        throw new Exception("unexpected kind of KeyedCombineFn: " + combineFn.getClass().getName());
+      }
+    }
+
+    Map<String, Object> inputCoderObject = getObject(cloudUserFn, PropertyNames.INPUT_CODER);
+
+    Coder inputCoder = Serializer.deserialize(inputCoderObject, Coder.class);
+    if (!(inputCoder instanceof WindowedValueCoder)) {
+      throw new Exception(
+          "Expected WindowedValueCoder for inputCoder, got: "
+          + inputCoder.getClass().getName());
+    }
+    Coder elemCoder = ((WindowedValueCoder) inputCoder).getValueCoder();
+    if (!(elemCoder instanceof KvCoder)) {
+      throw new Exception(
+          "Expected KvCoder for inputCoder, got: " + elemCoder.getClass().getName());
+    }
+
+    DoFn windowingDoFn = StreamingGroupAlsoByWindowsDoFn.create(
+        (WindowingFn) windowingFn,
+        ((KvCoder) elemCoder).getValueCoder());
+
+    return new GroupAlsoByWindowsParDoFn(
+        options, windowingDoFn, stepName, executionContext, addCounterMutator);
+  }
+
+  private GroupAlsoByWindowsParDoFn(
+      PipelineOptions options,
+      DoFn fn,
+      String stepName,
+      ExecutionContext executionContext,
+      CounterSet.AddCounterMutator addCounterMutator) {
+    super(
+        options,
+        fn,
+        PTuple.empty(),
+        Arrays.asList("output"),
+        stepName,
+        executionContext,
+        addCounterMutator);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSource.java
new file mode 100644
index 0000000000000..2d168879a21b7
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSource.java
@@ -0,0 +1,368 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.api.client.util.Preconditions.checkNotNull;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToSourcePosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudDuration;
+
+import com.google.api.client.util.Preconditions;
+
+import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.Reiterable;
+import com.google.cloud.dataflow.sdk.util.common.Reiterator;
+import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
+import com.google.cloud.dataflow.sdk.util.common.worker.GroupingShuffleEntryIterator;
+import com.google.cloud.dataflow.sdk.util.common.worker.KeyGroupedShuffleEntries;
+import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
+import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import org.joda.time.Duration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+/**
+ * A source that reads from a shuffled dataset and yields key-grouped data.
+ *
+ * @param <K> the type of the keys read from the shuffle
+ * @param <V> the type of the values read from the shuffle
+ */
+public class GroupingShuffleSource<K, V>
+    extends Source<WindowedValue<KV<K, Reiterable<V>>>> {
+  private static final Logger LOG =
+      LoggerFactory.getLogger(GroupingShuffleSource.class);
+
+  final byte[] shuffleReaderConfig;
+  final String startShufflePosition;
+  final String stopShufflePosition;
+  final BatchModeExecutionContext executionContext;
+
+  Coder<K> keyCoder;
+  Coder<V> valueCoder;
+
+  public GroupingShuffleSource(PipelineOptions options,
+                               byte[] shuffleReaderConfig,
+                               String startShufflePosition,
+                               String stopShufflePosition,
+                               Coder<WindowedValue<KV<K, Iterable<V>>>> coder,
+                               BatchModeExecutionContext executionContext)
+      throws Exception {
+    this.shuffleReaderConfig = shuffleReaderConfig;
+    this.startShufflePosition = startShufflePosition;
+    this.stopShufflePosition = stopShufflePosition;
+    this.executionContext = executionContext;
+    initCoder(coder);
+  }
+
+  @Override
+  public SourceIterator<WindowedValue<KV<K, Reiterable<V>>>> iterator()
+      throws IOException {
+    Preconditions.checkArgument(shuffleReaderConfig != null);
+    return iterator(new BatchingShuffleEntryReader(
+        new ChunkingShuffleBatchReader(new ApplianceShuffleReader(
+            shuffleReaderConfig))));
+  }
+
+  private void initCoder(Coder<WindowedValue<KV<K, Iterable<V>>>> coder) throws Exception {
+    if (!(coder instanceof WindowedValueCoder)) {
+      throw new Exception(
+          "unexpected kind of coder for WindowedValue: " + coder);
+    }
+    Coder<KV<K, Iterable<V>>> elemCoder =
+        ((WindowedValueCoder<KV<K, Iterable<V>>>) coder).getValueCoder();
+    if (!(elemCoder instanceof KvCoder)) {
+      throw new Exception(
+          "unexpected kind of coder for elements read from " +
+          "a key-grouping shuffle: " + elemCoder);
+    }
+    KvCoder<K, Iterable<V>> kvCoder = (KvCoder<K, Iterable<V>>) elemCoder;
+    this.keyCoder = kvCoder.getKeyCoder();
+    Coder<Iterable<V>> kvValueCoder = kvCoder.getValueCoder();
+    if (!(kvValueCoder instanceof IterableCoder)) {
+      throw new Exception(
+          "unexpected kind of coder for values of KVs read from " +
+          "a key-grouping shuffle");
+    }
+    IterableCoder<V> iterCoder = (IterableCoder<V>) kvValueCoder;
+    this.valueCoder = iterCoder.getElemCoder();
+  }
+
+  final SourceIterator<WindowedValue<KV<K, Reiterable<V>>>> iterator(ShuffleEntryReader reader)
+      throws IOException {
+    return new GroupingShuffleSourceIterator(reader);
+  }
+
+  /**
+   * A SourceIterator that reads from a ShuffleEntryReader and groups
+   * all the values with the same key.
+   *
+   * <p>A key limitation of this implementation is that all iterator accesses
+   * must by externally synchronized (the iterator objects are not individually
+   * thread-safe, and the iterators derived from a single original iterator
+   * access shared state which is not thread-safe).
+   *
+   * <p>To access the current position, the iterator must advance
+   * on-demand and cache the next batch of key grouped shuffle
+   * entries. The iterator does not advance a second time in @next()
+   * to avoid asking the underlying iterator to advance to the next
+   * key before the caller/user iterates over the values corresponding
+   * to the current key -- which would introduce a performance
+   * penalty.
+   */
+  private final class GroupingShuffleSourceIterator
+      extends AbstractSourceIterator<WindowedValue<KV<K, Reiterable<V>>>> {
+    // N.B. This class is *not* static; it uses the keyCoder, valueCoder, and
+    // executionContext from its enclosing GroupingShuffleSource.
+
+    /** The iterator over shuffle entries, grouped by common key. */
+    private final Iterator<KeyGroupedShuffleEntries> groups;
+
+    /** The stop position. No records with a position at or after
+     * @stopPosition will be returned.  Initialized
+     * to @AbstractShuffleSource.stopShufflePosition but can be
+     * dynamically updated via @updateStopPosition() (note that such
+     * updates can only decrease @stopPosition).
+     *
+     * <p> The granularity of the stop position is such that it can
+     * only refer to records at the boundary of a key.
+     */
+    private ByteArrayShufflePosition stopPosition = null;
+
+    /** The next group to be consumed, if available */
+    private KeyGroupedShuffleEntries nextGroup = null;
+
+    public GroupingShuffleSourceIterator(ShuffleEntryReader reader) {
+      stopPosition = ByteArrayShufflePosition.fromBase64(stopShufflePosition);
+      this.groups =
+          new GroupingShuffleEntryIterator(reader.read(
+              ByteArrayShufflePosition.fromBase64(startShufflePosition),
+              stopPosition)) {
+          @Override
+          protected void notifyElementRead(long byteSize) {
+            GroupingShuffleSource.this.notifyElementRead(byteSize);
+          }
+        };
+    }
+
+    private void advanceIfNecessary() {
+      if (nextGroup == null && groups.hasNext()) {
+        nextGroup = groups.next();
+      }
+    }
+
+    @Override
+    public boolean hasNext() throws IOException {
+      return hasNextInternal();
+    }
+
+    /**
+     * Returns false if the next group does not exist (i.e., no more
+     * records available) or the group is beyond @stopPosition.
+     */
+    private boolean hasNextInternal() {
+      advanceIfNecessary();
+      if (nextGroup == null) {
+        return false;
+      }
+      ByteArrayShufflePosition current =
+          ByteArrayShufflePosition.of(nextGroup.position);
+      return stopPosition == null || current.compareTo(stopPosition) < 0;
+    }
+
+    @Override
+    public WindowedValue<KV<K, Reiterable<V>>> next() throws IOException {
+      if (!hasNext()) {
+        throw new NoSuchElementException();
+      }
+      KeyGroupedShuffleEntries group = nextGroup;
+      nextGroup = null;
+
+      K key = CoderUtils.decodeFromByteArray(keyCoder, group.key);
+      if (executionContext != null) {
+        executionContext.setKey(key);
+      }
+
+      return WindowedValue.valueInEmptyWindows(
+          KV.<K, Reiterable<V>>of(key, new ValuesIterable(group.values)));
+    }
+
+    /**
+     * Returns the position before the next {@code KV<K, Reiterable<V>>} to be returned by the
+     * {@link GroupingShuffleSourceIterator}. Returns null if the
+     * {@link GroupingShuffleSourceIterator} is finished.
+     */
+    @Override
+    public Progress getProgress() {
+      com.google.api.services.dataflow.model.Position currentPosition =
+          new com.google.api.services.dataflow.model.Position();
+      ApproximateProgress progress = new ApproximateProgress();
+      if (hasNextInternal()) {
+        ByteArrayShufflePosition current =
+            ByteArrayShufflePosition.of(nextGroup.position);
+        currentPosition.setShufflePosition(current.encodeBase64());
+      } else {
+        if (stopPosition != null) {
+          currentPosition.setShufflePosition(stopPosition.encodeBase64());
+        } else {
+          // The original stop position described the end of the
+          // shuffle-position-space (or infinity) and all records have
+          // been consumed.
+          progress.setPercentComplete((float) 1.0);
+          progress.setRemainingTime(toCloudDuration(Duration.ZERO));
+          return cloudProgressToSourceProgress(progress);
+        }
+      }
+
+      progress.setPosition(currentPosition);
+      return cloudProgressToSourceProgress(progress);
+    }
+
+    /**
+     * Updates the stop position of the shuffle source to the position proposed. Ignores the
+     * proposed stop position if it is smaller than or equal to the position before the next
+     * {@code KV<K, Reiterable<V>>} to be returned by the {@link GroupingShuffleSourceIterator}.
+     */
+    @Override
+    public Position updateStopPosition(Progress proposedStopPosition) {
+      checkNotNull(proposedStopPosition);
+      com.google.api.services.dataflow.model.Position stopCloudPosition =
+          sourceProgressToCloudProgress(proposedStopPosition).getPosition();
+      if (stopCloudPosition == null) {
+        LOG.warn(
+            "A stop position other than a Position is not supported now.");
+        return null;
+      }
+
+      if (stopCloudPosition.getShufflePosition() == null) {
+        LOG.warn(
+            "A stop position other than shuffle position is not supported in "
+            + "a grouping shuffle source: " + stopCloudPosition.toString());
+        return null;
+      }
+      ByteArrayShufflePosition newStopPosition =
+          ByteArrayShufflePosition.fromBase64(stopCloudPosition.getShufflePosition());
+
+      if (!hasNextInternal()) {
+        LOG.warn("Cannot update stop position to "
+            + stopCloudPosition.getShufflePosition()
+            + " since all input was consumed.");
+        return null;
+      }
+      ByteArrayShufflePosition current =
+          ByteArrayShufflePosition.of(nextGroup.position);
+      if (newStopPosition.compareTo(current) <= 0) {
+        LOG.warn("Proposed stop position: "
+            + stopCloudPosition.getShufflePosition() + " <= current position: "
+            + current.encodeBase64());
+        return null;
+      }
+
+      if (this.stopPosition != null
+          && newStopPosition.compareTo(this.stopPosition) >= 0) {
+        LOG.warn("Proposed stop position: "
+            + stopCloudPosition.getShufflePosition()
+            + " >= current stop position: "
+            + this.stopPosition.encodeBase64());
+        return null;
+      }
+
+      this.stopPosition = newStopPosition;
+      LOG.info("Updated the stop position to "
+          + stopCloudPosition.getShufflePosition());
+
+      return cloudPositionToSourcePosition(stopCloudPosition);
+    }
+
+    /**
+     * Provides the {@link Reiterable} used to iterate through the values part
+     * of a {@code KV<K, Reiterable<V>>} entry produced by a
+     * {@link GroupingShuffleSource}.
+     */
+    private final class ValuesIterable implements Reiterable<V> {
+      // N.B. This class is *not* static; it uses the valueCoder from
+      // its enclosing GroupingShuffleSource.
+
+      private final Reiterable<ShuffleEntry> base;
+
+      public ValuesIterable(Reiterable<ShuffleEntry> base) {
+        this.base = checkNotNull(base);
+      }
+
+      @Override
+      public ValuesIterator iterator() {
+        return new ValuesIterator(base.iterator());
+      }
+    }
+
+    /**
+     * Provides the {@link Reiterator} used to iterate through the values part
+     * of a {@code KV<K, Reiterable<V>>} entry produced by a
+     * {@link GroupingShuffleSource}.
+     */
+    private final class ValuesIterator implements Reiterator<V> {
+      // N.B. This class is *not* static; it uses the valueCoder from
+      // its enclosing GroupingShuffleSource.
+
+      private final Reiterator<ShuffleEntry> base;
+
+      public ValuesIterator(Reiterator<ShuffleEntry> base) {
+        this.base = checkNotNull(base);
+      }
+
+      @Override
+      public boolean hasNext() {
+        return base.hasNext();
+      }
+
+      @Override
+      public V next() {
+        ShuffleEntry entry = base.next();
+        try {
+          return CoderUtils.decodeFromByteArray(valueCoder, entry.getValue());
+        } catch (IOException exn) {
+          throw new RuntimeException(exn);
+        }
+      }
+
+      @Override
+      public void remove() {
+        base.remove();
+      }
+
+      @Override
+      public ValuesIterator copy() {
+        return new ValuesIterator(base.copy());
+      }
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceFactory.java
new file mode 100644
index 0000000000000..2229a77ddc10b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceFactory.java
@@ -0,0 +1,62 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.api.client.util.Base64.decodeBase64;
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+/**
+ * Creates a GroupingShuffleSource from a CloudObject spec.
+ */
+public class GroupingShuffleSourceFactory {
+  // Do not instantiate.
+  private GroupingShuffleSourceFactory() {}
+
+  public static <K, V> GroupingShuffleSource<K, V> create(
+      PipelineOptions options,
+      CloudObject spec,
+      Coder<WindowedValue<KV<K, Iterable<V>>>> coder,
+      ExecutionContext executionContext)
+      throws Exception {
+    return create(options, spec, coder,
+                  (BatchModeExecutionContext) executionContext);
+  }
+
+  static <K, V> GroupingShuffleSource<K, V> create(
+      PipelineOptions options,
+      CloudObject spec,
+      Coder<WindowedValue<KV<K, Iterable<V>>>> coder,
+      BatchModeExecutionContext executionContext)
+      throws Exception {
+    return new GroupingShuffleSource<>(
+        options,
+        decodeBase64(getString(spec, PropertyNames.SHUFFLE_READER_CONFIG)),
+        getString(spec, PropertyNames.START_SHUFFLE_POSITION, null),
+        getString(spec, PropertyNames.END_SHUFFLE_POSITION, null),
+        coder,
+        executionContext);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySource.java
new file mode 100644
index 0000000000000..a0a524ee0c9b9
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySource.java
@@ -0,0 +1,163 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.api.client.util.Preconditions.checkNotNull;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToSourcePosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+import static java.lang.Math.min;
+
+import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.StringUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+import javax.annotation.Nullable;
+
+/**
+ * A source that yields a set of precomputed elements.
+ *
+ * @param <T> the type of the elements read from the source
+ */
+public class InMemorySource<T> extends Source<T> {
+  private static final Logger LOG = LoggerFactory.getLogger(InMemorySource.class);
+
+  final List<String> encodedElements;
+  final int startIndex;
+  final int endIndex;
+  final Coder<T> coder;
+
+  public InMemorySource(List<String> encodedElements,
+                        @Nullable Long startIndex,
+                        @Nullable Long endIndex,
+                        Coder<T> coder) {
+    this.encodedElements = encodedElements;
+    int maxIndex = encodedElements.size();
+    if (startIndex == null) {
+      this.startIndex = 0;
+    } else {
+      if (startIndex < 0) {
+        throw new IllegalArgumentException("start index should be >= 0");
+      }
+      this.startIndex = (int) min(startIndex, maxIndex);
+    }
+    if (endIndex == null) {
+      this.endIndex = maxIndex;
+    } else {
+      if (endIndex < this.startIndex) {
+        throw new IllegalArgumentException(
+            "end index should be >= start index");
+      }
+      this.endIndex = (int) min(endIndex, maxIndex);
+    }
+    this.coder = coder;
+  }
+
+  @Override
+  public SourceIterator<T> iterator() throws IOException {
+    return new InMemorySourceIterator();
+  }
+
+  /**
+   * A SourceIterator that yields an in-memory list of elements.
+   */
+  class InMemorySourceIterator extends AbstractSourceIterator<T> {
+    int index;
+    int endPosition;
+
+    public InMemorySourceIterator() {
+      index = startIndex;
+      endPosition = endIndex;
+    }
+
+    @Override
+    public boolean hasNext() {
+      return index < endPosition;
+    }
+
+    @Override
+    public T next() throws IOException {
+      if (!hasNext()) {
+        throw new NoSuchElementException();
+      }
+      String encodedElementString = encodedElements.get(index++);
+      // TODO: Replace with the real encoding used by the
+      // front end, when we know what it is.
+      byte[] encodedElement =
+          StringUtils.jsonStringToByteArray(encodedElementString);
+      notifyElementRead(encodedElement.length);
+      return CoderUtils.decodeFromByteArray(coder, encodedElement);
+    }
+
+    @Override
+    public Progress getProgress() {
+      // Currently we assume that only a record index position is reported as
+      // current progress. Source writer can override this method to update
+      // other metrics, e.g. completion percentage or remaining time.
+      com.google.api.services.dataflow.model.Position currentPosition =
+          new com.google.api.services.dataflow.model.Position();
+      currentPosition.setRecordIndex((long) index);
+
+      ApproximateProgress progress = new ApproximateProgress();
+      progress.setPosition(currentPosition);
+
+      return cloudProgressToSourceProgress(progress);
+    }
+
+    @Override
+    public Position updateStopPosition(Progress proposedStopPosition) {
+      checkNotNull(proposedStopPosition);
+
+      // Currently we only support stop position in record index of
+      // an API Position in InMemorySource. If stop position in other types is
+      // proposed, the end position in iterator will not be updated,
+      // and return null.
+      com.google.api.services.dataflow.model.Position stopPosition =
+          sourceProgressToCloudProgress(proposedStopPosition).getPosition();
+      if (stopPosition == null) {
+        LOG.warn(
+            "A stop position other than a Dataflow API Position is not currently supported.");
+        return null;
+      }
+
+      Long recordIndex = stopPosition.getRecordIndex();
+      if (recordIndex == null) {
+        LOG.warn(
+            "A stop position other than record index is not supported in InMemorySource.");
+        return null;
+      }
+      if (recordIndex <= index || recordIndex >= endPosition) {
+        // Proposed stop position is not after the current position or proposed
+        // stop position is after the current stop (end) position: No stop
+        // position update.
+        return null;
+      }
+
+      this.endPosition = recordIndex.intValue();
+      return cloudPositionToSourcePosition(stopPosition);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceFactory.java
new file mode 100644
index 0000000000000..3f2cd9c9a1dba
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceFactory.java
@@ -0,0 +1,54 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.getLong;
+import static com.google.cloud.dataflow.sdk.util.Structs.getStrings;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+
+import java.util.Collections;
+
+/**
+ * Creates an InMemorySource from a CloudObject spec.
+ */
+public class InMemorySourceFactory {
+  // Do not instantiate.
+  private InMemorySourceFactory() {}
+
+  public static <T> InMemorySource<T> create(PipelineOptions options,
+                                             CloudObject spec,
+                                             Coder<T> coder,
+                                             ExecutionContext executionContext)
+      throws Exception {
+    return create(spec, coder);
+  }
+
+  static <T> InMemorySource<T> create(CloudObject spec,
+                                      Coder<T> coder) throws Exception {
+    return new InMemorySource<>(
+        getStrings(spec,
+            PropertyNames.ELEMENTS, Collections.<String>emptyList()),
+        getLong(spec, PropertyNames.START_INDEX, null),
+        getLong(spec, PropertyNames.END_INDEX, null),
+        coder);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiSourceIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiSourceIterator.java
new file mode 100644
index 0000000000000..3ccebd5617565
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiSourceIterator.java
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+/**
+ * Implements a SourceIterator over a collection of inputs.
+ *
+ * The sources are used sequentially, each consumed entirely before moving
+ * to the next source.
+ *
+ * The input is lazily constructed by using the abstract method {@code open} to
+ * create a source iterator for inputs on demand.  This allows the resources to
+ * be produced lazily, as an open source iterator may consume process resources
+ * such as file descriptors.
+ */
+abstract class LazyMultiSourceIterator<T>
+    extends Source.AbstractSourceIterator<T> {
+  private final Iterator<String> inputs;
+  Source.SourceIterator<T> current;
+
+  public LazyMultiSourceIterator(Iterator<String> inputs) {
+    this.inputs = inputs;
+  }
+
+  @Override
+  public boolean hasNext() throws IOException {
+    while (selectSource()) {
+      if (!current.hasNext()) {
+        current.close();
+        current = null;
+      } else {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  @Override
+  public T next() throws IOException {
+    if (!hasNext()) {
+      throw new NoSuchElementException();
+    }
+    return current.next();
+  }
+
+  @Override
+  public void close() throws IOException {
+    while (selectSource()) {
+      current.close();
+      current = null;
+    }
+  }
+
+  protected abstract Source.SourceIterator<T> open(String input)
+      throws IOException;
+
+  boolean selectSource() throws IOException {
+    if (current != null) {
+      return true;
+    }
+    if (inputs.hasNext()) {
+      current = open(inputs.next());
+      return true;
+    }
+    return false;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
new file mode 100644
index 0000000000000..095aa0876ee8e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -0,0 +1,413 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.api.services.dataflow.model.FlattenInstruction;
+import com.google.api.services.dataflow.model.InstructionInput;
+import com.google.api.services.dataflow.model.InstructionOutput;
+import com.google.api.services.dataflow.model.MapTask;
+import com.google.api.services.dataflow.model.ParDoInstruction;
+import com.google.api.services.dataflow.model.ParallelInstruction;
+import com.google.api.services.dataflow.model.PartialGroupByKeyInstruction;
+import com.google.api.services.dataflow.model.ReadInstruction;
+import com.google.api.services.dataflow.model.WriteInstruction;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.Serializer;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservable;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.cloud.dataflow.sdk.util.common.worker.FlattenOperation;
+import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
+import com.google.cloud.dataflow.sdk.util.common.worker.Operation;
+import com.google.cloud.dataflow.sdk.util.common.worker.OutputReceiver;
+import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
+import com.google.cloud.dataflow.sdk.util.common.worker.ParDoOperation;
+import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation;
+import com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation;
+import com.google.cloud.dataflow.sdk.util.common.worker.ReceivingOperation;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+import com.google.cloud.dataflow.sdk.util.common.worker.WriteOperation;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.annotation.Nullable;
+
+/**
+ * Creates a MapTaskExecutor from a MapTask definition.
+ */
+public class MapTaskExecutorFactory {
+  /**
+   * Creates a new MapTaskExecutor from the given MapTask definition.
+   */
+  public static MapTaskExecutor create(PipelineOptions options,
+                                       MapTask mapTask,
+                                       ExecutionContext context)
+      throws Exception {
+    List<Operation> operations = new ArrayList<>();
+    CounterSet counters = new CounterSet();
+    String counterPrefix = mapTask.getStageName() + "-";
+    StateSampler stateSampler = new StateSampler(
+        counterPrefix, counters.getAddCounterMutator());
+    // Open-ended state.
+    stateSampler.setState("other");
+
+    // Instantiate operations for each instruction in the graph.
+    for (ParallelInstruction instruction : mapTask.getInstructions()) {
+      operations.add(
+          createOperation(options, instruction, context, operations,
+                          counterPrefix, counters.getAddCounterMutator(),
+                          stateSampler));
+    }
+
+    return new MapTaskExecutor(operations, counters, stateSampler);
+  }
+
+  /**
+   * Creates an Operation from the given ParallelInstruction definition.
+   */
+  static Operation createOperation(
+      PipelineOptions options,
+      ParallelInstruction instruction,
+      ExecutionContext executionContext,
+      List<Operation> priorOperations,
+      String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler)
+      throws Exception {
+    if (instruction.getRead() != null) {
+      return createReadOperation(
+          options, instruction, executionContext, priorOperations,
+          counterPrefix, addCounterMutator, stateSampler);
+    } else if (instruction.getWrite() != null) {
+      return createWriteOperation(
+          options, instruction, executionContext, priorOperations,
+          counterPrefix, addCounterMutator, stateSampler);
+    } else if (instruction.getParDo() != null) {
+      return createParDoOperation(
+          options, instruction, executionContext, priorOperations,
+          counterPrefix, addCounterMutator, stateSampler);
+    } else if (instruction.getPartialGroupByKey() != null) {
+      return createPartialGroupByKeyOperation(
+          options, instruction, executionContext, priorOperations,
+          counterPrefix, addCounterMutator, stateSampler);
+    } else if (instruction.getFlatten() != null) {
+      return createFlattenOperation(
+          options, instruction, executionContext, priorOperations,
+          counterPrefix, addCounterMutator, stateSampler);
+    } else {
+      throw new Exception("Unexpected instruction: " + instruction);
+    }
+  }
+
+  static ReadOperation createReadOperation(
+      PipelineOptions options,
+      ParallelInstruction instruction,
+      ExecutionContext executionContext,
+      List<Operation> priorOperations,
+      String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler)
+      throws Exception {
+    ReadInstruction read = instruction.getRead();
+
+    Source<?> source =
+        SourceFactory.create(options, read.getSource(), executionContext);
+
+    OutputReceiver[] receivers = createOutputReceivers(
+        instruction, counterPrefix, addCounterMutator, stateSampler, 1);
+
+    return new ReadOperation(instruction.getSystemName(), source, receivers,
+                             counterPrefix, addCounterMutator, stateSampler);
+  }
+
+  static WriteOperation createWriteOperation(
+      PipelineOptions options,
+      ParallelInstruction instruction,
+      ExecutionContext executionContext,
+      List<Operation> priorOperations,
+      String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler)
+      throws Exception {
+    WriteInstruction write = instruction.getWrite();
+
+    Sink sink = SinkFactory.create(options, write.getSink(), executionContext);
+
+    OutputReceiver[] receivers = createOutputReceivers(
+        instruction, counterPrefix, addCounterMutator, stateSampler, 0);
+
+    WriteOperation operation =
+        new WriteOperation(instruction.getSystemName(), sink, receivers,
+                           counterPrefix, addCounterMutator, stateSampler);
+
+    attachInput(operation, write.getInput(), priorOperations);
+
+    return operation;
+  }
+
+  static ParDoOperation createParDoOperation(
+      PipelineOptions options,
+      ParallelInstruction instruction,
+      ExecutionContext executionContext,
+      List<Operation> priorOperations,
+      String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler)
+      throws Exception {
+    ParDoInstruction parDo = instruction.getParDo();
+
+    ParDoFn fn = ParDoFnFactory.create(
+        options,
+        CloudObject.fromSpec(parDo.getUserFn()),
+        instruction.getSystemName(),
+        parDo.getSideInputs(),
+        parDo.getMultiOutputInfos(),
+        parDo.getNumOutputs(),
+        executionContext,
+        addCounterMutator,
+        stateSampler);
+
+    OutputReceiver[] receivers =
+        createOutputReceivers(instruction, counterPrefix, addCounterMutator,
+                              stateSampler, parDo.getNumOutputs());
+
+    ParDoOperation operation =
+        new ParDoOperation(instruction.getSystemName(), fn, receivers,
+                           counterPrefix, addCounterMutator, stateSampler);
+
+    attachInput(operation, parDo.getInput(), priorOperations);
+
+    return operation;
+  }
+
+  static PartialGroupByKeyOperation createPartialGroupByKeyOperation(
+      PipelineOptions options,
+      ParallelInstruction instruction,
+      ExecutionContext executionContext,
+      List<Operation> priorOperations,
+      String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler)
+      throws Exception {
+    PartialGroupByKeyInstruction pgbk = instruction.getPartialGroupByKey();
+
+    Coder<?> coder = Serializer.deserialize(pgbk.getInputElementCodec(), Coder.class);
+    if (!(coder instanceof WindowedValueCoder)) {
+      throw new Exception(
+          "unexpected kind of input coder for PartialGroupByKeyOperation: " + coder);
+    }
+    Coder<?> elemCoder = ((WindowedValueCoder<?>) coder).getValueCoder();
+    if (!(elemCoder instanceof KvCoder)) {
+      throw new Exception(
+          "unexpected kind of input element coder for PartialGroupByKeyOperation: " + elemCoder);
+    }
+    KvCoder<Object, Object> kvCoder = (KvCoder<Object, Object>) elemCoder;
+    Coder keyCoder = kvCoder.getKeyCoder();
+    Coder valueCoder = kvCoder.getValueCoder();
+
+    OutputReceiver[] receivers = createOutputReceivers(
+        instruction, counterPrefix, addCounterMutator, stateSampler, 1);
+
+    PartialGroupByKeyOperation operation =
+        new PartialGroupByKeyOperation(instruction.getSystemName(),
+                                       new CoderGroupingKeyCreator(keyCoder),
+                                       new CoderSizeEstimator(keyCoder),
+                                       new CoderSizeEstimator(valueCoder),
+                                       0.001 /*sizeEstimatorSampleRate*/,
+                                       PairInfo.create(),
+                                       receivers,
+                                       counterPrefix, addCounterMutator,
+                                       stateSampler);
+
+    attachInput(operation, pgbk.getInput(), priorOperations);
+
+    return operation;
+  }
+
+  /**
+   * Implements PGBKOp.PairInfo via KVs.
+   */
+  public static class PairInfo implements PartialGroupByKeyOperation.PairInfo {
+    private static PairInfo theInstance = new PairInfo();
+    public static PairInfo create() { return theInstance; }
+    private PairInfo() {}
+    @Override
+    public Object getKeyFromInputPair(Object pair) {
+      WindowedValue<KV<Object, Object>> windowedKv = (WindowedValue<KV<Object, Object>>) pair;
+      return windowedKv.getValue().getKey();
+    }
+    @Override
+    public Object getValueFromInputPair(Object pair) {
+      WindowedValue<KV<Object, Object>> windowedKv = (WindowedValue<KV<Object, Object>>) pair;
+      return windowedKv.getValue().getValue();
+    }
+    @Override
+    public Object makeOutputPair(Object key, Object values) {
+      return WindowedValue.valueInEmptyWindows(KV.of(key, values));
+    }
+  }
+
+  /**
+   * Implements PGBKOp.GroupingKeyCreator via Coder.
+   */
+  public static class CoderGroupingKeyCreator
+      implements PartialGroupByKeyOperation.GroupingKeyCreator {
+    final Coder coder;
+
+    public CoderGroupingKeyCreator(Coder coder) {
+      this.coder = coder;
+    }
+
+    @Override
+    public Object createGroupingKey(Object value) throws Exception {
+      return new PartialGroupByKeyOperation.StructuralByteArray(
+          CoderUtils.encodeToByteArray(coder, value));
+    }
+  }
+
+  /**
+   * Implements PGBKOp.SizeEstimator via Coder.
+   */
+  public static class CoderSizeEstimator
+      implements PartialGroupByKeyOperation.SizeEstimator {
+    final Coder coder;
+
+    public CoderSizeEstimator(Coder coder) {
+      this.coder = coder;
+    }
+
+    @Override
+    public long estimateSize(Object value) throws Exception {
+      return CoderUtils.encodeToByteArray(coder, value).length;
+    }
+  }
+
+  static FlattenOperation createFlattenOperation(
+      PipelineOptions options,
+      ParallelInstruction instruction,
+      ExecutionContext executionContext,
+      List<Operation> priorOperations,
+      String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler)
+      throws Exception {
+    FlattenInstruction flatten = instruction.getFlatten();
+
+    OutputReceiver[] receivers =
+        createOutputReceivers(instruction, counterPrefix, addCounterMutator,
+                              stateSampler, 1);
+
+    FlattenOperation operation =
+        new FlattenOperation(instruction.getSystemName(), receivers,
+                             counterPrefix, addCounterMutator, stateSampler);
+
+    for (InstructionInput input : flatten.getInputs()) {
+      attachInput(operation, input, priorOperations);
+    }
+
+    return operation;
+  }
+
+  /**
+   * Returns an array of OutputReceivers for the given
+   * ParallelInstruction definition.
+   */
+  static OutputReceiver[] createOutputReceivers(
+      ParallelInstruction instruction,
+      String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler,
+      int expectedNumOutputs)
+      throws Exception {
+    int numOutputs = 0;
+    if (instruction.getOutputs() != null) {
+      numOutputs = instruction.getOutputs().size();
+    }
+    if (numOutputs != expectedNumOutputs) {
+      throw new AssertionError(
+          "ParallelInstruction.Outputs has an unexpected length");
+    }
+    OutputReceiver[] receivers = new OutputReceiver[numOutputs];
+    for (int i = 0; i < numOutputs; i++) {
+      InstructionOutput cloudOutput = instruction.getOutputs().get(i);
+      receivers[i] = new OutputReceiver(
+          cloudOutput.getName(),
+          new ElementByteSizeObservableCoder(
+              Serializer.deserialize(cloudOutput.getCodec(), Coder.class)),
+          counterPrefix,
+          addCounterMutator);
+    }
+    return receivers;
+  }
+
+  /**
+   * Adapts a Coder to the ElementByteSizeObservable interface.
+   */
+  public static class ElementByteSizeObservableCoder<T>
+      implements ElementByteSizeObservable<T> {
+    final Coder<T> coder;
+
+    public ElementByteSizeObservableCoder(Coder<T> coder) {
+      this.coder = coder;
+    }
+
+    @Override
+    public boolean isRegisterByteSizeObserverCheap(T value) {
+      return coder.isRegisterByteSizeObserverCheap(value, Coder.Context.OUTER);
+    }
+
+    @Override
+    public void registerByteSizeObserver(T value,
+                                         ElementByteSizeObserver observer)
+        throws Exception {
+      coder.registerByteSizeObserver(value, observer, Coder.Context.OUTER);
+    }
+  }
+
+  /**
+   * Adds an input to the given Operation, coming from the given
+   * producer instruction output.
+   */
+  static void attachInput(ReceivingOperation operation,
+                          @Nullable InstructionInput input,
+                          List<Operation> priorOperations) {
+    Integer producerInstructionIndex = 0;
+    Integer outputNum = 0;
+    if (input != null) {
+      if (input.getProducerInstructionIndex() != null) {
+        producerInstructionIndex = input.getProducerInstructionIndex();
+      }
+      if (input.getOutputNum() != null) {
+        outputNum = input.getOutputNum();
+      }
+    }
+    // Input id must refer to an operation that has already been seen.
+    Operation source = priorOperations.get(producerInstructionIndex);
+    operation.attachInput(source, outputNum);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
new file mode 100644
index 0000000000000..c6e5f9f163e35
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
@@ -0,0 +1,214 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.getBytes;
+
+import com.google.api.services.dataflow.model.MultiOutputInfo;
+import com.google.api.services.dataflow.model.SideInputInfo;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner.OutputManager;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
+import com.google.cloud.dataflow.sdk.util.PTuple;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.OutputReceiver;
+import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
+import com.google.cloud.dataflow.sdk.util.common.worker.Receiver;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import javax.annotation.Nullable;
+
+/**
+ * A wrapper around a decoded user DoFn.
+ */
+public class NormalParDoFn extends ParDoFn {
+  public static NormalParDoFn create(
+      PipelineOptions options,
+      CloudObject cloudUserFn,
+      String stepName,
+      @Nullable List<SideInputInfo> sideInputInfos,
+      @Nullable List<MultiOutputInfo> multiOutputInfos,
+      Integer numOutputs,
+      ExecutionContext executionContext,
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler /* ignored */)
+      throws Exception {
+    Object deserializedFn =
+        SerializableUtils.deserializeFromByteArray(
+            getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
+            "serialized user fn");
+    if (!(deserializedFn instanceof DoFn)) {
+      throw new Exception("unexpected kind of DoFn: " + deserializedFn.getClass().getName());
+    }
+    DoFn fn = (DoFn) deserializedFn;
+
+    PTuple sideInputValues = PTuple.empty();
+    if (sideInputInfos != null) {
+      for (SideInputInfo sideInputInfo : sideInputInfos) {
+        Object sideInputValue = SideInputUtils.readSideInput(
+            options, sideInputInfo, executionContext);
+        TupleTag tag = new TupleTag(sideInputInfo.getTag());
+        sideInputValues = sideInputValues.and(tag, sideInputValue);
+      }
+    }
+
+    List<String> outputTags = new ArrayList<>();
+    if (multiOutputInfos != null) {
+      for (MultiOutputInfo multiOutputInfo : multiOutputInfos) {
+        outputTags.add(multiOutputInfo.getTag());
+      }
+    }
+    if (outputTags.isEmpty()) {
+      // Legacy support: assume there's a single output tag named "output".
+      // (The output tag name will be ignored, for the main output.)
+      outputTags.add("output");
+    }
+    if (numOutputs != outputTags.size()) {
+      throw new AssertionError(
+          "unexpected number of outputTags for DoFn");
+    }
+
+    return new NormalParDoFn(options, fn, sideInputValues, outputTags,
+                             stepName, executionContext, addCounterMutator);
+  }
+
+  public final PipelineOptions options;
+  public final DoFn<Object, Object> fn;
+  public final PTuple sideInputValues;
+  public final TupleTag<Object> mainOutputTag;
+  public final List<TupleTag<?>> sideOutputTags;
+  public final String stepName;
+  public final ExecutionContext executionContext;
+  private final CounterSet.AddCounterMutator addCounterMutator;
+
+  /** The DoFnRunner executing a batch. Null between batches. */
+  DoFnRunner<Object, Object, Receiver> fnRunner;
+
+  public NormalParDoFn(PipelineOptions options,
+                       DoFn fn,
+                       PTuple sideInputValues,
+                       List<String> outputTags,
+                       String stepName,
+                       ExecutionContext executionContext,
+                       CounterSet.AddCounterMutator addCounterMutator) {
+    this.options = options;
+    this.fn = fn;
+    this.sideInputValues = sideInputValues;
+    if (outputTags.size() < 1) {
+      throw new AssertionError("expected at least one output");
+    }
+    this.mainOutputTag = new TupleTag(outputTags.get(0));
+    this.sideOutputTags = new ArrayList<>();
+    if (outputTags.size() > 1) {
+      for (String tag : outputTags.subList(1, outputTags.size())) {
+        this.sideOutputTags.add(new TupleTag(tag));
+      }
+    }
+    this.stepName = stepName;
+    this.executionContext = executionContext;
+    this.addCounterMutator = addCounterMutator;
+  }
+
+  @Override
+  public void startBundle(final Receiver... receivers) throws Exception {
+    if (receivers.length != sideOutputTags.size() + 1) {
+      throw new AssertionError(
+          "unexpected number of receivers for DoFn");
+    }
+
+    StepContext stepContext = null;
+    if (executionContext != null) {
+      stepContext = executionContext.getStepContext(stepName);
+    }
+
+    fnRunner = DoFnRunner.create(
+        options,
+        fn,
+        sideInputValues,
+        new OutputManager<Receiver>() {
+          final Map<TupleTag<?>, OutputReceiver> undeclaredOutputs =
+              new HashMap<>();
+
+          @Override
+          public Receiver initialize(TupleTag tag) {
+            // Declared outputs.
+            if (tag.equals(mainOutputTag)) {
+              return receivers[0];
+            } else if (sideOutputTags.contains(tag)) {
+              return receivers[sideOutputTags.indexOf(tag) + 1];
+            }
+
+            // Undeclared outputs.
+            OutputReceiver receiver = undeclaredOutputs.get(tag);
+            if (receiver == null) {
+              // A new undeclared output.
+              // TODO: plumb through the operationName, so that we can
+              // name implicit outputs after it.
+              String outputName = "implicit-" + tag.getId();
+              // TODO: plumb through the counter prefix, so we can
+              // make it available to the OutputReceiver class in case
+              // it wants to use it in naming output counters.  (It
+              // doesn't today.)
+              String counterPrefix = "";
+              receiver = new OutputReceiver(
+                  outputName, counterPrefix, addCounterMutator);
+              undeclaredOutputs.put(tag, receiver);
+            }
+            return receiver;
+          }
+
+          @Override
+          public void output(Receiver receiver, WindowedValue<?> output) {
+            try {
+              receiver.process(output);
+            } catch (Exception e) {
+              throw new RuntimeException(e);
+            }
+          }
+        },
+        mainOutputTag,
+        sideOutputTags,
+        stepContext,
+        addCounterMutator);
+
+    fnRunner.startBundle();
+  }
+
+  @Override
+  public void processElement(Object elem) throws Exception {
+    fnRunner.processElement((WindowedValue) elem);
+  }
+
+  @Override
+  public void finishBundle() throws Exception {
+    fnRunner.finishBundle();
+    fnRunner = null;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java
new file mode 100644
index 0000000000000..487420ce39342
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java
@@ -0,0 +1,678 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.common.math.LongMath;
+import com.google.common.primitives.Longs;
+
+import java.math.RoundingMode;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+/**
+ * This module provides routines for encoding a sequence of typed
+ * entities into a byte array.  The resulting byte arrays can be
+ * lexicographically compared to yield the same comparison value that
+ * would have been generated if the encoded items had been compared
+ * one by one according to their type.
+ *
+ * More precisely, suppose:
+ *  1. byte array A is generated by encoding the sequence of items [A_1..A_n]
+ *  2. byte array B is generated by encoding the sequence of items [B_1..B_n]
+ *  3. The types match; i.e., for all i: A_i was encoded using
+ *     the same routine as B_i
+ * Then:
+ *    Comparing A vs. B lexicographically is the same as comparing
+ *    the vectors [A_1..A_n] and [B_1..B_n] lexicographically.
+ *
+ * <p>
+ * <b>This class is NOT thread safe.</b>
+ */
+public class OrderedCode {
+  // We want to encode a few extra symbols in strings:
+  //      <sep>           Separator between items
+  //      <infinity>      Infinite string
+  //
+  // Therefore we need an alphabet with at least 258 characters.  We
+  // achieve this by using two-letter sequences starting with '\0' and '\xff'
+  // as extra symbols:
+  //      <sep>           encoded as =>           \0\1
+  //      \0              encoded as =>           \0\xff
+  //      \xff            encoded as =>           \xff\x00
+  //      <infinity>      encoded as =>           \xff\xff
+  //
+  // The remaining two letter sequences starting with '\0' and '\xff'
+  // are currently unused.
+
+  public static final byte ESCAPE1        = 0x00;
+  public static final byte NULL_CHARACTER =
+      (byte) 0xff;                                  // Combined with ESCAPE1
+  public static final byte SEPARATOR      = 0x01;   // Combined with ESCAPE1
+
+  public static final byte ESCAPE2        = (byte) 0xff;
+  public static final byte INFINITY       =
+      (byte) 0xff;                                  // Combined with ESCAPE2
+  public static final byte FF_CHARACTER   = 0x00;   // Combined with ESCAPE2
+
+  public static final byte[] ESCAPE1_SEPARATOR = { ESCAPE1, SEPARATOR };
+
+  public static final byte[] INFINITY_ENCODED = { ESCAPE2, INFINITY };
+
+  /**
+   * This array maps encoding length to header bits in the first two bytes for
+   * SignedNumIncreasing encoding.
+   */
+  private static final byte[][] LENGTH_TO_HEADER_BITS = {
+    { 0, 0 },
+    { (byte) 0x80, 0 },
+    { (byte) 0xc0, 0 },
+    { (byte) 0xe0, 0 },
+    { (byte) 0xf0, 0 },
+    { (byte) 0xf8, 0 },
+    { (byte) 0xfc, 0 },
+    { (byte) 0xfe, 0 },
+    { (byte) 0xff, 0 },
+    { (byte) 0xff, (byte) 0x80 },
+    { (byte) 0xff, (byte) 0xc0 }
+  };
+
+  /**
+   * This array maps encoding lengths to the header bits that overlap with
+   * the payload and need fixing during readSignedNumIncreasing.
+   */
+  private static final long[] LENGTH_TO_MASK = {
+    0L,
+    0x80L,
+    0xc000L,
+    0xe00000L,
+    0xf0000000L,
+    0xf800000000L,
+    0xfc0000000000L,
+    0xfe000000000000L,
+    0xff00000000000000L,
+    0x8000000000000000L,
+    0L
+  };
+
+  /**
+   * This array maps the number of bits in a number to the encoding
+   * length produced by WriteSignedNumIncreasing.
+   * For positive numbers, the number of bits is 1 plus the most significant
+   * bit position (the highest bit position in a positive long is 63).
+   * For a negative number n, we count the bits in ~n.
+   * That is, length = BITS_TO_LENGTH[log2Floor(n < 0 ? ~n : n) + 1].
+   */
+  private static final short[] BITS_TO_LENGTH = {
+    1, 1, 1, 1, 1, 1, 1,
+    2, 2, 2, 2, 2, 2, 2,
+    3, 3, 3, 3, 3, 3, 3,
+    4, 4, 4, 4, 4, 4, 4,
+    5, 5, 5, 5, 5, 5, 5,
+    6, 6, 6, 6, 6, 6, 6,
+    7, 7, 7, 7, 7, 7, 7,
+    8, 8, 8, 8, 8, 8, 8,
+    9, 9, 9, 9, 9, 9, 9,
+    10
+  };
+
+  /**
+   * stores the current encoded value as a list of byte arrays. Note that this
+   * is manipulated as we read/write items.
+   * Note that every item will fit on at most one array. One array may
+   * have more than one item (eg when used for decoding). While encoding,
+   * one array will have exactly one item. While returning the encoded array
+   * we will merge all the arrays in this list.
+   */
+  private final ArrayList<byte[]> encodedArrays = new ArrayList<>();
+
+  /**
+   * This is the current position on the first array. Will be non-zero
+   * only if the ordered code was created using encoded byte array.
+   */
+  private int firstArrayPosition = 0;
+
+  /**
+   * Creates OrderedCode from scractch. Typically used at encoding time.
+   */
+  public OrderedCode(){
+  }
+
+  /**
+   * Creates OrderedCode from a given encoded byte array. Typically used at
+   * decoding time.
+   *
+   * <p>
+   * <b> For better performance, it uses the input array provided (not a copy).
+   * Therefore the input array should not be modified.</b>
+   */
+  public OrderedCode(byte[] encodedByteArray) {
+    encodedArrays.add(encodedByteArray);
+  }
+
+  /**
+   * Adds the given byte array item to the OrderedCode. It encodes the input
+   * byte array, followed by a separator and appends the result to its
+   * internal encoded byte array store.
+   *
+   * <p>
+   * It works with the input array,
+   * so the input array 'value' should not be modified till the method returns.
+   *
+   * @param value bytes to be written.
+   * @see #readBytes()
+   */
+  public void writeBytes(byte[] value) {
+    // Determine the length of the encoded array
+    int encodedLength = 2;      // for separator
+    for (byte b : value) {
+      if ((b == ESCAPE1) || (b == ESCAPE2)) {
+        encodedLength += 2;
+      } else {
+        encodedLength++;
+      }
+    }
+
+    byte[] encodedArray = new byte[encodedLength];
+    int copyStart = 0;
+    int outIndex = 0;
+    for (int i = 0; i < value.length; i++) {
+      byte b = value[i];
+      if (b == ESCAPE1) {
+        System.arraycopy(value, copyStart, encodedArray, outIndex,
+                         i - copyStart);
+        outIndex += i - copyStart;
+        encodedArray[outIndex++] = ESCAPE1;
+        encodedArray[outIndex++] = NULL_CHARACTER;
+        copyStart = i + 1;
+      } else if (b == ESCAPE2) {
+        System.arraycopy(value, copyStart, encodedArray, outIndex,
+                         i - copyStart);
+        outIndex += i - copyStart;
+        encodedArray[outIndex++] = ESCAPE2;
+        encodedArray[outIndex++] = FF_CHARACTER;
+        copyStart = i + 1;
+      }
+    }
+    if (copyStart < value.length) {
+      System.arraycopy(value, copyStart, encodedArray, outIndex,
+          value.length - copyStart);
+      outIndex += value.length - copyStart;
+    }
+    encodedArray[outIndex++] = ESCAPE1;
+    encodedArray[outIndex] = SEPARATOR;
+
+    encodedArrays.add(encodedArray);
+  }
+
+  /**
+   * Encodes the long item, in big-endian format, and appends the result to its
+   * internal encoded byte array store.
+   *
+   * Note that the specified long is treated like a uint64, e.g.
+   * {@code new OrderedCode().writeNumIncreasing(-1L).getEncodedBytes() &gt;
+   * new OrderedCode().writeNumIncreasing(Long.MAX_VALUE).getEncodedBytes()}.
+   *
+   * @see #readNumIncreasing()
+   */
+  public void writeNumIncreasing(long value) {
+    // Values are encoded with a single byte length prefix, followed
+    // by the actual value in big-endian format with leading 0 bytes
+    // dropped.
+    byte[] bufer = new byte[9];  // 8 bytes for value plus one byte for length
+    int len = 0;
+    while (value != 0) {
+      len++;
+      bufer[9 - len] = (byte) (value & 0xff);
+      value >>>= 8;
+    }
+    bufer[9 - len - 1] = (byte) len;
+    len++;
+    byte[] encodedArray = new byte[len];
+    System.arraycopy(bufer, 9 - len, encodedArray, 0, len);
+    encodedArrays.add(encodedArray);
+  }
+
+  /**
+   * Return floor(log2(n)) for positive integer n.  Returns -1 iff n == 0.
+   */
+  int log2Floor(long n) {
+    if (n < 0) {
+      throw new IllegalArgumentException("must be non-negative");
+    }
+    return n == 0 ? -1 : LongMath.log2(n, RoundingMode.FLOOR);
+  }
+
+  /**
+   * Calculates the encoding length in bytes of the signed number n.
+   */
+  int getSignedEncodingLength(long n) {
+    return BITS_TO_LENGTH[log2Floor(n < 0 ? ~n : n) + 1];
+  }
+
+  /**
+   * Encodes the long item, in big-endian format, and appends the result to its
+   * internal encoded byte array store.
+   *
+   * Note that the specified long is treated like an int64, i.e.
+   * {@code new OrderedCode().writeNumIncreasing(-1L).getEncodedBytes() &lt;
+   * new OrderedCode().writeNumIncreasing(0L).getEncodedBytes()}.
+   *
+   * @see #readSignedNumIncreasing()
+   */
+  public void writeSignedNumIncreasing(long val) {
+    long x = val < 0 ? ~val : val;
+    if (x < 64) {  // Fast path for encoding length == 1.
+      byte[] encodedArray =
+          new byte[] { (byte) (LENGTH_TO_HEADER_BITS[1][0] ^ val) };
+      encodedArrays.add(encodedArray);
+      return;
+    }
+    // buf = val in network byte order, sign extended to 10 bytes.
+    byte signByte = val < 0 ? (byte) 0xff : 0;
+    byte[] buf = new byte[2 + Longs.BYTES];
+    buf[0] = buf[1] = signByte;
+    System.arraycopy(Longs.toByteArray(val), 0, buf, 2, Longs.BYTES);
+    int len = getSignedEncodingLength(x);
+    if (len < 2) {
+      throw new IllegalStateException(
+          "Invalid length (" + len + ")" +
+          " returned by getSignedEncodingLength(" + x + ")");
+    }
+    int beginIndex = buf.length - len;
+    buf[beginIndex] ^= LENGTH_TO_HEADER_BITS[len][0];
+    buf[beginIndex + 1] ^= LENGTH_TO_HEADER_BITS[len][1];
+
+    byte[] encodedArray = new byte[len];
+    System.arraycopy(buf, beginIndex, encodedArray, 0, len);
+    encodedArrays.add(encodedArray);
+  }
+
+  /**
+   * Encodes and appends INFINITY item to its internal encoded byte array
+   * store.
+   *
+   * @see #readInfinity()
+   */
+  public void writeInfinity() {
+    writeTrailingBytes(INFINITY_ENCODED);
+  }
+
+  /**
+   * Appends the byte array item to its internal encoded byte array
+   * store. This is used for the last item and is not encoded.  It
+   * also can be used to write a fixed number of bytes which will be
+   * read back using {@link #readBytes(int)}.
+   *
+   * <p>
+   * It stores the input array in the store,
+   * so the input array 'value' should not be modified.
+   *
+   * @param value bytes to be written.
+   * @see #readTrailingBytes()
+   * @see #readBytes(int)
+   */
+  public void writeTrailingBytes(byte[] value) {
+    if ((value == null) || (value.length == 0)) {
+      throw new IllegalArgumentException(
+          "Value cannot be null or have 0 elements");
+    }
+
+    encodedArrays.add(value);
+  }
+
+  /**
+   * Returns the next byte array item from its encoded byte array store and
+   * removes the item from the store.
+   *
+   * @see #writeBytes(byte[])
+   */
+  public byte[] readBytes() {
+    if ((encodedArrays == null) || (encodedArrays.size() == 0) ||
+        ((encodedArrays.get(0)).length - firstArrayPosition <= 0)) {
+      throw new IllegalArgumentException("Invalid encoded byte array");
+    }
+
+    // Determine the length of the decoded array
+    // We only scan up to "length-2" since a valid string must end with
+    // a two character terminator: 'ESCAPE1 SEPARATOR'
+    byte[] store = encodedArrays.get(0);
+    int decodedLength = 0;
+    boolean valid = false;
+    int i = firstArrayPosition;
+    while (i < store.length - 1) {
+      byte b = store[i++];
+      if (b == ESCAPE1) {
+        b = store[i++];
+        if (b == SEPARATOR) {
+          valid = true;
+          break;
+        } else if (b == NULL_CHARACTER) {
+          decodedLength++;
+        } else {
+          throw new IllegalArgumentException("Invalid encoded byte array");
+        }
+      } else if (b == ESCAPE2) {
+        b = store[i++];
+        if (b == FF_CHARACTER) {
+          decodedLength++;
+        } else {
+          throw new IllegalArgumentException("Invalid encoded byte array");
+        }
+      } else {
+        decodedLength++;
+      }
+    }
+    if (!valid) {
+      throw new IllegalArgumentException("Invalid encoded byte array");
+    }
+
+    byte[] decodedArray = new byte[decodedLength];
+    int copyStart = firstArrayPosition;
+    int outIndex = 0;
+    int j = firstArrayPosition;
+    while (j < store.length - 1) {
+      byte b = store[j++];   // note that j has been incremented
+      if (b == ESCAPE1) {
+        System.arraycopy(store, copyStart, decodedArray, outIndex,
+                         j - copyStart - 1);
+        outIndex += j - copyStart - 1;
+        // ESCAPE1 SEPARATOR ends component
+        // ESCAPE1 NULL_CHARACTER represents '\0'
+        b = store[j++];
+        if (b == SEPARATOR) {
+          if ((store.length - j) == 0) {
+            // we are done with the first array
+            encodedArrays.remove(0);
+            firstArrayPosition = 0;
+          } else {
+            firstArrayPosition = j;
+          }
+          return decodedArray;
+        } else if (b == NULL_CHARACTER) {
+          decodedArray[outIndex++] = 0x00;
+        }   // else not required - handled during length determination
+        copyStart = j;
+      } else if (b == ESCAPE2) {
+        System.arraycopy(store, copyStart, decodedArray, outIndex,
+                         j - copyStart - 1);
+        outIndex += j - copyStart - 1;
+        // ESCAPE2 FF_CHARACTER represents '\xff'
+        // ESCAPE2 INFINITY is an error
+        b = store[j++];
+        if (b == FF_CHARACTER) {
+          decodedArray[outIndex++] = (byte) 0xff;
+        }   // else not required - handled during length determination
+        copyStart = j;
+      }
+    }
+    // not required due to the first phase, but need to entertain the compiler
+    throw new IllegalArgumentException("Invalid encoded byte array");
+  }
+
+  /**
+   * Returns the next long item (encoded in big-endian format via
+   * {@code writeNumIncreasing(long)}) from its internal encoded byte array
+   * store and removes the item from the store.
+   *
+   * @see #writeNumIncreasing(long)
+   */
+  public long readNumIncreasing() {
+    if ((encodedArrays == null) || (encodedArrays.size() == 0) ||
+        ((encodedArrays.get(0)).length - firstArrayPosition < 1)) {
+      throw new IllegalArgumentException("Invalid encoded byte array");
+    }
+
+    byte[] store = encodedArrays.get(0);
+    // Decode length byte
+    int len = store[firstArrayPosition];
+    if ((firstArrayPosition + len + 1 > store.length) || len > 8) {
+      throw new IllegalArgumentException("Invalid encoded byte array");
+    }
+
+    long result = 0;
+    for (int i = 0; i < len; i++) {
+      result <<= 8;
+      result |= (store[firstArrayPosition + i + 1] & 0xff);
+    }
+
+    if ((store.length - firstArrayPosition - len - 1) == 0) {
+      // we are done with the first array
+      encodedArrays.remove(0);
+      firstArrayPosition = 0;
+    } else {
+      firstArrayPosition = firstArrayPosition + len + 1;
+    }
+
+    return result;
+  }
+
+  /**
+   * Returns the next long item (encoded via
+   * {@code writeSignedNumIncreasing(long)}) from its internal encoded byte
+   * array store and removes the item from the store.
+   *
+   * @see #writeSignedNumIncreasing(long)
+   */
+  public long readSignedNumIncreasing() {
+    if ((encodedArrays == null) || (encodedArrays.size() == 0) ||
+        ((encodedArrays.get(0)).length - firstArrayPosition < 1)) {
+      throw new IllegalArgumentException("Invalid encoded byte array");
+    }
+
+    byte[] store = encodedArrays.get(0);
+
+    long xorMask = ((store[firstArrayPosition] & 0x80) == 0) ? ~0L : 0L;
+    // Store first byte as an int rather than a (signed) byte -- to avoid
+    // accidental byte-to-int promotion later which would extend the byte's
+    // sign bit (if any).
+    int firstByte =
+        (store[firstArrayPosition] & 0xff) ^ (int) (xorMask & 0xff);
+
+    // Now calculate and test length, and set x to raw (unmasked) result.
+    int len;
+    long x;
+    if (firstByte != 0xff) {
+      len = 7 - log2Floor(firstByte ^ 0xff);
+      if (store.length - firstArrayPosition < len) {
+        throw new IllegalArgumentException("Invalid encoded byte array");
+      }
+      x = xorMask;  // Sign extend using xorMask.
+      for (int i = firstArrayPosition; i < firstArrayPosition + len; i++) {
+        x = (x << 8) | (store[i] & 0xff);
+      }
+    } else {
+      len = 8;
+      if (store.length - firstArrayPosition < len) {
+        throw new IllegalArgumentException("Invalid encoded byte array");
+      }
+      int secondByte =
+          (store[firstArrayPosition + 1] & 0xff) ^ (int) (xorMask & 0xff);
+      if (secondByte >= 0x80) {
+        if (secondByte < 0xc0) {
+          len = 9;
+        } else {
+          int thirdByte =
+              (store[firstArrayPosition + 2] & 0xff) ^ (int) (xorMask & 0xff);
+          if (secondByte == 0xc0 && thirdByte < 0x80) {
+            len = 10;
+          } else {
+            // Either len > 10 or len == 10 and #bits > 63.
+            throw new IllegalArgumentException("Invalid encoded byte array");
+          }
+        }
+        if (store.length - firstArrayPosition < len) {
+          throw new IllegalArgumentException("Invalid encoded byte array");
+        }
+      }
+      x = Longs.fromByteArray(Arrays.copyOfRange(
+          store, firstArrayPosition + len - 8, firstArrayPosition + len));
+    }
+
+    x ^= LENGTH_TO_MASK[len];  // Remove spurious header bits.
+
+    if (len != getSignedEncodingLength(x)) {
+      throw new IllegalArgumentException("Invalid encoded byte array");
+    }
+
+    if ((store.length - firstArrayPosition - len) == 0) {
+      // We are done with the first array.
+      encodedArrays.remove(0);
+      firstArrayPosition = 0;
+    } else {
+      firstArrayPosition = firstArrayPosition + len;
+    }
+
+    return x;
+  }
+
+  /**
+   * Removes INFINITY item from its internal encoded byte array store
+   * if present.  Returns whether INFINITY was present.
+   *
+   * @see #writeInfinity()
+   */
+  public boolean readInfinity() {
+    if ((encodedArrays == null) || (encodedArrays.size() == 0) ||
+        ((encodedArrays.get(0)).length - firstArrayPosition < 1)) {
+      throw new IllegalArgumentException("Invalid encoded byte array");
+    }
+    byte[] store = encodedArrays.get(0);
+    if (store.length - firstArrayPosition < 2) {
+      return false;
+    }
+    if ((store[firstArrayPosition] == ESCAPE2) &&
+        (store[firstArrayPosition +  1] == INFINITY)) {
+      if ((store.length - firstArrayPosition - 2) == 0) {
+        // we are done with the first array
+        encodedArrays.remove(0);
+        firstArrayPosition = 0;
+      } else {
+        firstArrayPosition = firstArrayPosition + 2;
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  /**
+   * Returns the trailing byte array item from its internal encoded byte array
+   * store and removes the item from the store.
+   *
+   * @see #writeTrailingBytes(byte[])
+   */
+  public byte[] readTrailingBytes() {
+    // one item is contained within one byte array
+    if ((encodedArrays == null) || (encodedArrays.size() != 1)) {
+      throw new IllegalArgumentException("Invalid encoded byte array");
+    }
+
+    byte[] store = encodedArrays.get(0);
+    encodedArrays.remove(0);
+    assert encodedArrays.size() == 0;
+    return Arrays.copyOfRange(store, firstArrayPosition, store.length);
+  }
+
+  /**
+   * Reads (unencoded) {@code len} bytes.
+   *
+   * @see #writeTrailingBytes(byte[])
+   */
+  public byte[] readBytes(int len) {
+    if ((encodedArrays == null) || (encodedArrays.size() == 0) ||
+        ((encodedArrays.get(0)).length - firstArrayPosition < len)) {
+      throw new IllegalArgumentException("Invalid encoded byte array");
+    }
+
+    byte[] store = encodedArrays.get(0);
+
+    byte[] result;
+    if (store.length - firstArrayPosition == len) {
+      // We are done with the first array.
+      result = encodedArrays.remove(0);
+      firstArrayPosition = 0;
+    } else {
+      result = new byte[len];
+      System.arraycopy(store, firstArrayPosition, result, 0, len);
+      firstArrayPosition = firstArrayPosition + len;
+    }
+    return result;
+  }
+
+  /**
+   * Returns the encoded bytes that represent the current state of the
+   * OrderedCode.
+   *
+   * <p>
+   * <b> NOTE: This method returns OrederedCode's internal array (not a
+   * copy) for better performance. Therefore the returned array should not be
+   * modified.</b>
+   */
+  public byte[] getEncodedBytes() {
+    if (encodedArrays.size() == 0) {
+      return new byte[0];
+    }
+    if ((encodedArrays.size() == 1) && (firstArrayPosition == 0)) {
+      return encodedArrays.get(0);
+    }
+
+    int totalLength = 0;
+
+    for (int i = 0; i < encodedArrays.size(); i++) {
+      byte[] bytes = encodedArrays.get(i);
+      if (i == 0) {
+        totalLength += bytes.length - firstArrayPosition;
+      } else {
+        totalLength += bytes.length;
+      }
+    }
+
+    byte[] encodedBytes = new byte[totalLength];
+    int destPos = 0;
+    for (int i = 0; i < encodedArrays.size(); i++) {
+      byte[] bytes = encodedArrays.get(i);
+      if (i == 0) {
+        System.arraycopy(bytes, firstArrayPosition, encodedBytes, destPos,
+            bytes.length - firstArrayPosition);
+        destPos += bytes.length - firstArrayPosition;
+      } else {
+        System.arraycopy(bytes, 0, encodedBytes, destPos, bytes.length);
+        destPos += bytes.length;
+      }
+    }
+
+    // replace the store with merged array, so that repeated calls
+    // don't need to merge. The reads can handle both the versions.
+    encodedArrays.clear();
+    encodedArrays.add(encodedBytes);
+    firstArrayPosition = 0;
+
+    return encodedBytes;
+  }
+
+  /**
+   * Returns true if this has more encoded bytes that haven't been read,
+   * false otherwise.  Return value of true doesn't imply anything about
+   * validity of remaining data.
+   * @return true if it has more encoded bytes that haven't been read,
+   * false otherwise.
+   */
+  public boolean hasRemainingEncodedBytes() {
+    // We delete an array after fully consuming it.
+    return encodedArrays != null && encodedArrays.size() != 0;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
new file mode 100644
index 0000000000000..23d4040685bfd
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
@@ -0,0 +1,115 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.api.services.dataflow.model.MultiOutputInfo;
+import com.google.api.services.dataflow.model.SideInputInfo;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Creates a ParDoFn from a CloudObject spec.
+ *
+ * A ParDoFnFactory concrete "subclass" should define a method with
+ * the following signature:
+ * <pre> {@code
+ * static SomeParDoFnSubclass create(
+ *     CloudObject spec,
+ *     List<SideInputInfo> sideInputInfos,
+ *     List<MultiOutputInfo> multiOutputInfos,
+ *     int numOutputs,
+ *     ExecutionContext executionContext);
+ * } </pre>
+ */
+public class ParDoFnFactory {
+  // Do not instantiate.
+  private ParDoFnFactory() {}
+
+  /**
+   * A map from the short names of predefined ParDoFnFactories to their full
+   * class names.
+   */
+  static Map<String, String> predefinedParDoFnFactories = new HashMap<>();
+
+  static {
+    predefinedParDoFnFactories.put("DoFn",
+                                   NormalParDoFn.class.getName());
+    predefinedParDoFnFactories.put("CombineValuesFn",
+                                   CombineValuesFn.class.getName());
+    // TODO: Remove outdated bindings once the services produces the right ones
+    predefinedParDoFnFactories.put("MergeBucketsDoFn",
+                                   GroupAlsoByWindowsParDoFn.class.getName());
+    predefinedParDoFnFactories.put("AssignBucketsDoFn",
+                                   AssignWindowsParDoFn.class.getName());
+    predefinedParDoFnFactories.put("MergeWindowsDoFn",
+                                   GroupAlsoByWindowsParDoFn.class.getName());
+    predefinedParDoFnFactories.put("AssignWindowsDoFn",
+                                   AssignWindowsParDoFn.class.getName());
+  }
+
+  /**
+   * Creates a ParDoFn from a CloudObject spec.
+   *
+   * @throws Exception if the CloudObject spec could not be
+   * decoded and constructed.
+   */
+  public static ParDoFn create(PipelineOptions options,
+                               CloudObject cloudUserFn,
+                               String stepName,
+                               List<SideInputInfo> sideInputInfos,
+                               List<MultiOutputInfo> multiOutputInfos,
+                               int numOutputs,
+                               ExecutionContext executionContext,
+                               CounterSet.AddCounterMutator addCounterMutator,
+                               StateSampler stateSampler)
+      throws Exception {
+    String className = cloudUserFn.getClassName();
+    String parDoFnFactoryClassName = predefinedParDoFnFactories.get(className);
+    if (parDoFnFactoryClassName == null) {
+      parDoFnFactoryClassName = className;
+    }
+
+    try {
+      return InstanceBuilder.ofType(ParDoFn.class)
+          .fromClassName(parDoFnFactoryClassName)
+          .fromFactoryMethod("create")
+          .withArg(PipelineOptions.class, options)
+          .withArg(CloudObject.class, cloudUserFn)
+          .withArg(String.class, stepName)
+          .withArg(List.class, sideInputInfos)
+          .withArg(List.class, multiOutputInfos)
+          .withArg(Integer.class, numOutputs)
+          .withArg(ExecutionContext.class, executionContext)
+          .withArg(CounterSet.AddCounterMutator.class, addCounterMutator)
+          .withArg(StateSampler.class, stateSampler)
+          .build();
+
+    } catch (ClassNotFoundException exn) {
+      throw new Exception(
+          "unable to create a ParDoFn from " + cloudUserFn, exn);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSource.java
new file mode 100644
index 0000000000000..5394a26cc47fc
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSource.java
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
+import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
+import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+/**
+ * A source that reads from a key-sharded dataset, and returns KVs without
+ * any values grouping.
+ *
+ * @param <K> the type of the keys read from the shuffle
+ * @param <V> the type of the values read from the shuffle
+ */
+public class PartitioningShuffleSource<K, V> extends Source<WindowedValue<KV<K, V>>> {
+
+  final byte[] shuffleReaderConfig;
+  final String startShufflePosition;
+  final String stopShufflePosition;
+  Coder<K> keyCoder;
+  WindowedValueCoder<V> windowedValueCoder;
+
+  public PartitioningShuffleSource(PipelineOptions options,
+                                   byte[] shuffleReaderConfig,
+                                   String startShufflePosition,
+                                   String stopShufflePosition,
+                                   Coder<WindowedValue<KV<K, V>>> coder)
+      throws Exception {
+    this.shuffleReaderConfig = shuffleReaderConfig;
+    this.startShufflePosition = startShufflePosition;
+    this.stopShufflePosition = stopShufflePosition;
+    initCoder(coder);
+  }
+
+  /**
+   * Given a {@code WindowedValueCoder<KV<K, V>>}, splits it into a coder for K
+   * and a {@code WindowedValueCoder<V>} with the same kind of windows.
+   */
+  private void initCoder(Coder<WindowedValue<KV<K, V>>> coder) throws Exception {
+    if (!(coder instanceof WindowedValueCoder)) {
+      throw new Exception(
+          "unexpected kind of coder for WindowedValue: " + coder);
+    }
+    WindowedValueCoder<KV<K, V>> windowedElemCoder = ((WindowedValueCoder<KV<K, V>>) coder);
+    Coder<KV<K, V>> elemCoder = windowedElemCoder.getValueCoder();
+    if (!(elemCoder instanceof KvCoder)) {
+      throw new Exception(
+          "unexpected kind of coder for elements read from "
+          + "a key-partitioning shuffle: " + elemCoder);
+    }
+    KvCoder<K, V> kvCoder = (KvCoder) elemCoder;
+    this.keyCoder = kvCoder.getKeyCoder();
+    windowedValueCoder = windowedElemCoder.withValueCoder(kvCoder.getValueCoder());
+  }
+
+  @Override
+  public com.google.cloud.dataflow.sdk.util.common.worker.Source.SourceIterator<
+      WindowedValue<KV<K, V>>> iterator() throws IOException {
+    Preconditions.checkArgument(shuffleReaderConfig != null);
+    return iterator(new BatchingShuffleEntryReader(
+        new ChunkingShuffleBatchReader(new ApplianceShuffleReader(
+            shuffleReaderConfig))));
+  }
+
+  SourceIterator<WindowedValue<KV<K, V>>> iterator(ShuffleEntryReader reader) throws IOException {
+    return new PartitioningShuffleSourceIterator(reader);
+  }
+
+  /**
+   * A SourceIterator that reads from a ShuffleEntryReader,
+   * extracts K and {@code WindowedValue<V>}, and returns a constructed
+   * {@code WindowedValue<KV>}.
+   */
+  class PartitioningShuffleSourceIterator
+      extends AbstractSourceIterator<WindowedValue<KV<K, V>>> {
+    Iterator<ShuffleEntry> iterator;
+
+    PartitioningShuffleSourceIterator(ShuffleEntryReader reader) {
+      this.iterator = reader.read(
+          ByteArrayShufflePosition.fromBase64(startShufflePosition),
+          ByteArrayShufflePosition.fromBase64(stopShufflePosition));
+    }
+
+    @Override
+    public boolean hasNext() throws IOException {
+      return iterator.hasNext();
+    }
+
+    @Override
+    public WindowedValue<KV<K, V>> next() throws IOException {
+      ShuffleEntry record = iterator.next();
+      K key = CoderUtils.decodeFromByteArray(keyCoder, record.getKey());
+      WindowedValue<V> windowedValue =
+          CoderUtils.decodeFromByteArray(windowedValueCoder, record.getValue());
+      notifyElementRead(record.length());
+      return WindowedValue.of(KV.of(key, windowedValue.getValue()),
+                              windowedValue.getTimestamp(),
+                              windowedValue.getWindows());
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSourceFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSourceFactory.java
new file mode 100644
index 0000000000000..f97d1d5b82988
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSourceFactory.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.api.client.util.Base64.decodeBase64;
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+/**
+ * Creates a PartitioningShuffleSource from a CloudObject spec.
+ */
+public class PartitioningShuffleSourceFactory {
+  // Do not instantiate.
+  private PartitioningShuffleSourceFactory() {}
+
+  public static <K, V> PartitioningShuffleSource<K, V> create(
+      PipelineOptions options,
+      CloudObject spec,
+      Coder<WindowedValue<KV<K, V>>> coder,
+      ExecutionContext executionContext)
+      throws Exception {
+      return new PartitioningShuffleSource<K, V>(
+          options,
+          decodeBase64(getString(spec, PropertyNames.SHUFFLE_READER_CONFIG)),
+          getString(spec, PropertyNames.START_SHUFFLE_POSITION, null),
+          getString(spec, PropertyNames.END_SHUFFLE_POSITION, null),
+          coder);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleEntryWriter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleEntryWriter.java
new file mode 100644
index 0000000000000..4fd44230421d5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleEntryWriter.java
@@ -0,0 +1,39 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
+
+import java.io.IOException;
+
+import javax.annotation.concurrent.NotThreadSafe;
+
+/**
+ * ShuffleEntryWriter provides an interface for writing key/value
+ * entries to a shuffle dataset.
+ */
+@NotThreadSafe
+interface ShuffleEntryWriter extends AutoCloseable {
+  /**
+   * Writes an entry to a shuffle dataset. Returns the size
+   * in bytes of the data written.
+   */
+  public long put(ShuffleEntry entry) throws IOException;
+
+  @Override
+  public void close() throws IOException;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleLibrary.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleLibrary.java
new file mode 100644
index 0000000000000..8863436d2e1d6
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleLibrary.java
@@ -0,0 +1,44 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.StandardCopyOption;
+
+/**
+ * Native library used to read from and write to a shuffle dataset.
+ */
+class ShuffleLibrary {
+  /**
+   * Loads the native shuffle library.
+   */
+  static void load() {
+    try {
+      File tempfile = File.createTempFile("libshuffle_client_jni", ".so");
+      InputStream input = ClassLoader.getSystemResourceAsStream(
+          "libshuffle_client_jni.so.stripped");
+      Files.copy(input, tempfile.toPath(), StandardCopyOption.REPLACE_EXISTING);
+      System.load(tempfile.getAbsolutePath());
+    } catch (IOException e) {
+      throw new RuntimeException("Loading shuffle_client failed:", e);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReader.java
new file mode 100644
index 0000000000000..8a1018b237ee7
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReader.java
@@ -0,0 +1,48 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import java.io.IOException;
+
+/**
+ * ShuffleReader reads chunks of data from a shuffle dataset for
+ * a given position range.
+ */
+interface ShuffleReader {
+  /** Represents a chunk of data read from a shuffle dataset. */
+  public static class ReadChunkResult {
+    public final byte[] chunk;
+    public final byte[] nextStartPosition;
+    public ReadChunkResult(byte[] chunk, byte[] nextStartPosition) {
+      this.chunk = chunk;
+      this.nextStartPosition = nextStartPosition;
+    }
+  }
+
+  /**
+   * Reads a chunk of data for keys in the given position range.
+   * The chunk is a sequence of pairs encoded as:
+   * {@code <position-size><position><key-size><key>
+      <secondary-key-size><secondary-key><value-size><value>}
+   * where the sizes are 4-byte big-endian integers.
+   *
+   * @param startPosition the start of the requested range (inclusive)
+   * @param endPosition the end of the requested range (exclusive)
+   */
+  public ReadChunkResult readIncludingPosition(
+      byte[] startPosition, byte[] endPosition) throws IOException;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
new file mode 100644
index 0000000000000..72ea16fc99b43
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.InstantCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import java.io.IOException;
+
+/**
+ * A sink that writes to a shuffle dataset.
+ *
+ * @param <T> the type of the elements written to the sink
+ */
+public class ShuffleSink<T> extends Sink<WindowedValue<T>> {
+
+  enum ShuffleKind { UNGROUPED, PARTITION_KEYS, GROUP_KEYS, GROUP_KEYS_AND_SORT_VALUES }
+
+  static final long SHUFFLE_WRITER_BUFFER_SIZE = 128 << 20;
+
+  final byte[] shuffleWriterConfig;
+
+  final ShuffleKind shuffleKind;
+
+  boolean shardByKey;
+  boolean groupValues;
+  boolean sortValues;
+
+  WindowedValueCoder<T> windowedElemCoder;
+  WindowedValueCoder windowedValueCoder;
+  Coder<T> elemCoder;
+  Coder keyCoder;
+  Coder valueCoder;
+  Coder sortKeyCoder;
+  Coder sortValueCoder;
+
+  public static ShuffleKind parseShuffleKind(String shuffleKind)
+      throws Exception {
+    try {
+      return Enum.valueOf(ShuffleKind.class, shuffleKind.trim().toUpperCase());
+    } catch (IllegalArgumentException e) {
+      throw new Exception("unexpected shuffle_kind", e);
+    }
+  }
+
+  public ShuffleSink(PipelineOptions options,
+                     byte[] shuffleWriterConfig,
+                     ShuffleKind shuffleKind,
+                     Coder<WindowedValue<T>> coder)
+      throws Exception {
+    this.shuffleWriterConfig = shuffleWriterConfig;
+    this.shuffleKind = shuffleKind;
+    initCoder(coder);
+  }
+
+  private void initCoder(Coder<WindowedValue<T>> coder) throws Exception {
+    switch (shuffleKind) {
+      case UNGROUPED:
+        this.shardByKey = false;
+        this.groupValues = false;
+        this.sortValues = false;
+        break;
+      case PARTITION_KEYS:
+        this.shardByKey = true;
+        this.groupValues = false;
+        this.sortValues = false;
+        break;
+      case GROUP_KEYS:
+        this.shardByKey = true;
+        this.groupValues = true;
+        this.sortValues = false;
+        break;
+      case GROUP_KEYS_AND_SORT_VALUES:
+        this.shardByKey = true;
+        this.groupValues = true;
+        this.sortValues = true;
+        break;
+      default:
+        throw new AssertionError("unexpected shuffle kind");
+    }
+
+    this.windowedElemCoder = (WindowedValueCoder<T>) coder;
+    this.elemCoder = windowedElemCoder.getValueCoder();
+    if (shardByKey) {
+      if (!(elemCoder instanceof KvCoder)) {
+        throw new Exception(
+            "unexpected kind of coder for elements written to "
+            + "a key-grouping shuffle");
+      }
+      KvCoder<?, ?> kvCoder = (KvCoder) elemCoder;
+      this.keyCoder = kvCoder.getKeyCoder();
+      this.valueCoder = kvCoder.getValueCoder();
+      if (sortValues) {
+        // TODO: Decide the representation of sort-keyed values.
+        // For now, we'll just use KVs.
+        if (!(valueCoder instanceof KvCoder)) {
+          throw new Exception(
+              "unexpected kind of coder for values written to "
+              + "a value-sorting shuffle");
+        }
+        KvCoder<?, ?> kvValueCoder = (KvCoder) valueCoder;
+        this.sortKeyCoder = kvValueCoder.getKeyCoder();
+        this.sortValueCoder = kvValueCoder.getValueCoder();
+      } else {
+        this.sortKeyCoder = null;
+        this.sortValueCoder = null;
+      }
+      if (groupValues) {
+        this.windowedValueCoder = null;
+      } else {
+        this.windowedValueCoder = this.windowedElemCoder.withValueCoder(this.valueCoder);
+      }
+    } else {
+      this.keyCoder = null;
+      this.valueCoder = null;
+      this.sortKeyCoder = null;
+      this.sortValueCoder = null;
+      this.windowedValueCoder = null;
+    }
+  }
+
+  /**
+   * Returns a SinkWriter that allows writing to this ShuffleSink,
+   * using the given ShuffleEntryWriter.
+   */
+  public SinkWriter<WindowedValue<T>> writer(ShuffleEntryWriter writer) throws IOException {
+    return new ShuffleSinkWriter(writer);
+  }
+
+  /** The SinkWriter for a ShuffleSink. */
+  class ShuffleSinkWriter implements SinkWriter<WindowedValue<T>> {
+    ShuffleEntryWriter writer;
+    long seqNum = 0;
+
+    ShuffleSinkWriter(ShuffleEntryWriter writer) throws IOException {
+      this.writer = writer;
+    }
+
+    @Override
+    public long add(WindowedValue<T> windowedElem) throws IOException {
+      byte[] keyBytes;
+      byte[] secondaryKeyBytes;
+      byte[] valueBytes;
+      T elem = windowedElem.getValue();
+      if (shardByKey) {
+        if (!(elem instanceof KV)) {
+          throw new AssertionError(
+              "expecting the values written to a key-grouping shuffle "
+              + "to be KVs");
+        }
+        KV<?, ?> kv = (KV) elem;
+        Object key = kv.getKey();
+        Object value = kv.getValue();
+
+        keyBytes = CoderUtils.encodeToByteArray(keyCoder, key);
+
+        if (sortValues) {
+          if (!(value instanceof KV)) {
+            throw new AssertionError(
+                "expecting the value parts of the KVs written to "
+                + "a value-sorting shuffle to also be KVs");
+          }
+          KV<?, ?> kvValue = (KV) value;
+          Object sortKey = kvValue.getKey();
+          Object sortValue = kvValue.getValue();
+
+          // TODO: Need to coordinate with the
+          // GroupingShuffleSource, to make sure it knows how to
+          // reconstruct the value from the sortKeyBytes and
+          // sortValueBytes.  Right now, it doesn't know between
+          // sorting and non-sorting GBKs.
+          secondaryKeyBytes =
+              CoderUtils.encodeToByteArray(sortKeyCoder, sortKey);
+          valueBytes = CoderUtils.encodeToByteArray(sortValueCoder, sortValue);
+
+        } else if (groupValues) {
+          // Sort values by timestamp so that GroupAlsoByWindows can run efficiently.
+          if (windowedElem.getTimestamp().getMillis() == Long.MIN_VALUE) {
+            // Empty secondary keys sort before all other secondary keys, so we
+            // can omit this common value here for efficiency.
+            secondaryKeyBytes = null;
+          } else {
+            secondaryKeyBytes =
+                CoderUtils.encodeToByteArray(InstantCoder.of(), windowedElem.getTimestamp());
+          }
+          valueBytes = CoderUtils.encodeToByteArray(valueCoder, value);
+        } else {
+          secondaryKeyBytes = null;
+          valueBytes = CoderUtils.encodeToByteArray(
+              windowedValueCoder,
+              WindowedValue.of(value, windowedElem.getTimestamp(), windowedElem.getWindows()));
+        }
+
+      } else {
+        // Not partitioning or grouping by key, just resharding values.
+        // <key> is ignored, except by the shuffle splitter.  Use a seq#
+        // as the key, so we can split records anywhere.  This also works
+        // for writing a single-sharded ordered PCollection through a
+        // shuffle, since the order of elements in the input will be
+        // preserved in the output.
+        keyBytes =
+            CoderUtils.encodeToByteArray(BigEndianLongCoder.of(), seqNum++);
+
+        secondaryKeyBytes = null;
+        valueBytes = CoderUtils.encodeToByteArray(windowedElemCoder, windowedElem);
+      }
+
+      return writer.put(new ShuffleEntry(
+          keyBytes, secondaryKeyBytes, valueBytes));
+    }
+
+    @Override
+    public void close() throws IOException {
+      writer.close();
+    }
+  }
+
+  @Override
+  public SinkWriter<WindowedValue<T>> writer() throws IOException {
+    Preconditions.checkArgument(shuffleWriterConfig != null);
+    return writer(new ChunkingShuffleEntryWriter(new ApplianceShuffleWriter(
+        shuffleWriterConfig, SHUFFLE_WRITER_BUFFER_SIZE)));
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactory.java
new file mode 100644
index 0000000000000..6db9945eb6135
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactory.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.api.client.util.Base64.decodeBase64;
+import static com.google.cloud.dataflow.sdk.runners.worker.ShuffleSink.parseShuffleKind;
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+
+/**
+ * Creates a ShuffleSink from a CloudObject spec.
+ */
+public class ShuffleSinkFactory {
+  // Do not instantiate.
+  private ShuffleSinkFactory() {}
+
+  public static <T> ShuffleSink<T> create(PipelineOptions options,
+                                          CloudObject spec,
+                                          Coder<WindowedValue<T>> coder,
+                                          ExecutionContext executionContext)
+      throws Exception {
+    return create(options, spec, coder);
+  }
+
+  static <T> ShuffleSink<T> create(PipelineOptions options,
+                                   CloudObject spec,
+                                   Coder<WindowedValue<T>> coder)
+      throws Exception {
+    return new ShuffleSink<>(
+        options,
+        decodeBase64(getString(spec, PropertyNames.SHUFFLE_WRITER_CONFIG, null)),
+        parseShuffleKind(getString(spec, PropertyNames.SHUFFLE_KIND)),
+        coder);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleWriter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleWriter.java
new file mode 100644
index 0000000000000..ff880fd13c4c1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleWriter.java
@@ -0,0 +1,37 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import java.io.IOException;
+
+/**
+ * ShuffleWriter writes chunks of records to a shuffle dataset.
+ */
+interface ShuffleWriter extends AutoCloseable {
+  /**
+   * Writes a chunk of records. The chunk is a sequence of pairs encoded as:
+   * <key-size><key><secondary-key-size><secondary-key><value-size><value>
+   * where the sizes are 4-byte big-endian integers.
+   */
+  public void write(byte[] chunk) throws IOException;
+
+  /**
+   * Flushes written records and closes this writer.
+   */
+  @Override
+  public void close() throws IOException;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
new file mode 100644
index 0000000000000..f3fc1cf3f3ef3
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
@@ -0,0 +1,211 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+
+import com.google.api.services.dataflow.model.SideInputInfo;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+
+/**
+ * Utilities for working with side inputs.
+ */
+public class SideInputUtils {
+  static final String SINGLETON_KIND = "singleton";
+  static final String COLLECTION_KIND = "collection";
+
+  /**
+   * Reads the given side input, producing the contents associated
+   * with a a {@link PCollectionView}.
+   */
+  public static Object readSideInput(PipelineOptions options,
+                                     SideInputInfo sideInputInfo,
+                                     ExecutionContext executionContext)
+      throws Exception {
+    Iterable<Object> elements =
+        readSideInputSources(options, sideInputInfo.getSources(), executionContext);
+    return readSideInputValue(sideInputInfo.getKind(), elements);
+  }
+
+  static Iterable<Object> readSideInputSources(
+      PipelineOptions options,
+      List<com.google.api.services.dataflow.model.Source> sideInputSources,
+      ExecutionContext executionContext)
+      throws Exception {
+    int numSideInputSources = sideInputSources.size();
+    if (numSideInputSources == 0) {
+      throw new Exception("expecting at least one side input Source");
+    } else if (numSideInputSources == 1) {
+      return readSideInputSource(options, sideInputSources.get(0), executionContext);
+    } else {
+      List<Iterable<Object>> shards = new ArrayList<>();
+      for (com.google.api.services.dataflow.model.Source sideInputSource
+               : sideInputSources) {
+        shards.add(readSideInputSource(options, sideInputSource, executionContext));
+      }
+      return new ShardedIterable<>(shards);
+    }
+  }
+
+  static Iterable<Object> readSideInputSource(
+      PipelineOptions options,
+      com.google.api.services.dataflow.model.Source sideInputSource,
+      ExecutionContext executionContext)
+      throws Exception {
+    return new SourceIterable<>(
+        SourceFactory.create(options, sideInputSource, executionContext));
+  }
+
+  static Object readSideInputValue(Map<String, Object> sideInputKind,
+                                   Iterable<Object> elements)
+      throws Exception {
+    String className = getString(sideInputKind, PropertyNames.OBJECT_TYPE_NAME);
+    if (SINGLETON_KIND.equals(className)) {
+      Iterator<Object> iter = elements.iterator();
+      if (iter.hasNext()) {
+        Object elem = iter.next();
+        if (!iter.hasNext()) {
+          return elem;
+        }
+      }
+      throw new Exception(
+          "expecting a singleton side input to have a single value");
+
+    } else if (COLLECTION_KIND.equals(className)) {
+      return elements;
+
+    } else {
+      throw new Exception("unexpected kind of side input: " + className);
+    }
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+
+  static class SourceIterable<T> implements Iterable<T> {
+    final Source<T> source;
+
+    public SourceIterable(Source<T> source) {
+      this.source = source;
+    }
+
+    @Override
+    public Iterator<T> iterator() {
+      try {
+        return new SourceIterator<>(source.iterator());
+      } catch (Exception exn) {
+        throw new RuntimeException(exn);
+      }
+    }
+  }
+
+  static class SourceIterator<T> implements Iterator<T> {
+    final Source.SourceIterator<T> iterator;
+
+    public SourceIterator(Source.SourceIterator<T> iterator) {
+      this.iterator = iterator;
+    }
+
+    @Override
+    public boolean hasNext() {
+      try {
+        return iterator.hasNext();
+      } catch (Exception exn) {
+        throw new RuntimeException(exn);
+      }
+    }
+
+    @Override
+    public T next() {
+      try {
+        return iterator.next();
+      } catch (Exception exn) {
+        throw new RuntimeException(exn);
+      }
+    }
+
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  static class ShardedIterable<T> implements Iterable<T> {
+    final List<Iterable<T>> shards;
+
+    public ShardedIterable(List<Iterable<T>> shards) {
+      this.shards = shards;
+    }
+
+    @Override
+    public Iterator<T> iterator() {
+      return new ShardedIterator<>(shards.iterator());
+    }
+  }
+
+  static class ShardedIterator<T> implements Iterator<T> {
+    final Iterator<Iterable<T>> shards;
+    Iterator<T> shard;
+
+    public ShardedIterator(Iterator<Iterable<T>> shards) {
+      this.shards = shards;
+      this.shard = null;
+    }
+
+    @Override
+    public boolean hasNext() {
+      boolean shardHasNext;
+      for (;;) {
+        shardHasNext = (shard != null && shard.hasNext());
+        if (shardHasNext) {
+          break;
+        }
+        if (!shards.hasNext()) {
+          break;
+        }
+        shard = shards.next().iterator();
+      }
+      return shardHasNext;
+    }
+
+    @Override
+    public T next() {
+      if (!hasNext()) {
+        throw new NoSuchElementException();
+      }
+      return shard.next();
+    }
+
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
new file mode 100644
index 0000000000000..df2d5ac754281
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
@@ -0,0 +1,94 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
+import com.google.cloud.dataflow.sdk.util.Serializer;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+import com.google.common.reflect.TypeToken;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Constructs a Sink from a Dataflow service protocol Sink definition.
+ *
+ * A SinkFactory concrete "subclass" should define a method with the following
+ * signature:
+ * <pre> {@code
+ * static SomeSinkSubclass<T> create(PipelineOptions, CloudObject,
+ *                                   Coder<T>, ExecutionContext);
+ * } </pre>
+ */
+public final class SinkFactory {
+  // Do not instantiate.
+  private SinkFactory() {}
+
+  /**
+   * A map from the short names of predefined sinks to their full
+   * factory class names.
+   */
+  static Map<String, String> predefinedSinkFactories = new HashMap<>();
+
+  static {
+    predefinedSinkFactories.put("TextSink",
+                                TextSinkFactory.class.getName());
+    predefinedSinkFactories.put("AvroSink",
+                                AvroSinkFactory.class.getName());
+    predefinedSinkFactories.put("ShuffleSink",
+                                ShuffleSinkFactory.class.getName());
+  }
+
+  /**
+   * Creates a {@link Sink} from a Dataflow API Sink definition.
+   *
+   * @throws Exception if the sink could not be decoded and
+   * constructed
+   */
+  public static <T> Sink<T> create(
+      PipelineOptions options,
+      com.google.api.services.dataflow.model.Sink cloudSink,
+      ExecutionContext executionContext)
+      throws Exception {
+    Coder<T> coder = Serializer.deserialize(cloudSink.getCodec(), Coder.class);
+    CloudObject object = CloudObject.fromSpec(cloudSink.getSpec());
+
+    String className = predefinedSinkFactories.get(object.getClassName());
+    if (className == null) {
+      className = object.getClassName();
+    }
+
+    try {
+      return InstanceBuilder.ofType(new TypeToken<Sink<T>>() {})
+          .fromClassName(className)
+          .fromFactoryMethod("create")
+          .withArg(PipelineOptions.class, options)
+          .withArg(CloudObject.class, object)
+          .withArg(Coder.class, coder)
+          .withArg(ExecutionContext.class, executionContext)
+          .build();
+
+    } catch (ClassNotFoundException exn) {
+      throw new Exception(
+          "unable to create a sink from " + cloudSink, exn);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFactory.java
new file mode 100644
index 0000000000000..d4726094a3ea6
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFactory.java
@@ -0,0 +1,113 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
+import com.google.cloud.dataflow.sdk.util.Serializer;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.common.reflect.TypeToken;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.annotation.Nullable;
+
+/**
+ * Constructs a Source from a Dataflow API Source definition.
+ *
+ * A SourceFactory concrete "subclass" should define a method with the following
+ * signature:
+ * <pre> {@code
+ * static SomeSourceSubclass<T> create(PipelineOptions, CloudObject,
+ *                                     Coder<T>, ExecutionContext);
+ * } </pre>
+ */
+public final class SourceFactory {
+  // Do not instantiate.
+  private SourceFactory() {}
+
+  /**
+   * A map from the short names of predefined sources to
+   * their full factory class names.
+   */
+  static Map<String, String> predefinedSourceFactories = new HashMap<>();
+
+  static {
+    predefinedSourceFactories.put(
+        "TextSource",
+        TextSourceFactory.class.getName());
+    predefinedSourceFactories.put(
+        "AvroSource",
+        AvroSourceFactory.class.getName());
+    predefinedSourceFactories.put(
+        "UngroupedShuffleSource",
+        UngroupedShuffleSourceFactory.class.getName());
+    predefinedSourceFactories.put(
+        "PartitioningShuffleSource",
+        PartitioningShuffleSourceFactory.class.getName());
+    predefinedSourceFactories.put(
+        "GroupingShuffleSource",
+        GroupingShuffleSourceFactory.class.getName());
+    predefinedSourceFactories.put(
+        "InMemorySource",
+        InMemorySourceFactory.class.getName());
+    predefinedSourceFactories.put(
+        "BigQuerySource",
+        BigQuerySourceFactory.class.getName());
+  }
+
+  /**
+   * Creates a Source from a Dataflow API Source definition.
+   *
+   * @throws Exception if the source could not be decoded and
+   * constructed
+   */
+  public static <T> Source<T> create(
+      @Nullable PipelineOptions options,
+      com.google.api.services.dataflow.model.Source cloudSource,
+      @Nullable ExecutionContext executionContext)
+      throws Exception {
+    cloudSource = CloudSourceUtils.flattenBaseSpecs(cloudSource);
+    Coder<T> coder = Serializer.deserialize(cloudSource.getCodec(), Coder.class);
+    CloudObject object = CloudObject.fromSpec(cloudSource.getSpec());
+
+    String sourceFactoryClassName = predefinedSourceFactories.get(object.getClassName());
+    if (sourceFactoryClassName == null) {
+      sourceFactoryClassName = object.getClassName();
+    }
+
+    try {
+      return InstanceBuilder.ofType(new TypeToken<Source<T>>() {})
+          .fromClassName(sourceFactoryClassName)
+          .fromFactoryMethod("create")
+          .withArg(PipelineOptions.class, options)
+          .withArg(CloudObject.class, object)
+          .withArg(Coder.class, coder)
+          .withArg(ExecutionContext.class, executionContext)
+          .build();
+
+    } catch (ClassNotFoundException exn) {
+      throw new Exception(
+          "unable to create a source from " + cloudSource, exn);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
new file mode 100644
index 0000000000000..2db18b2724740
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
@@ -0,0 +1,72 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceOperationRequestToSourceOperationRequest;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceOperationResponseToCloudSourceOperationResponse;
+
+import com.google.api.services.dataflow.model.Source;
+import com.google.api.services.dataflow.model.SourceOperationRequest;
+import com.google.api.services.dataflow.model.SourceOperationResponse;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
+import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * An executor for a source operation, defined by a {@code SourceOperationRequest}.
+ */
+public class SourceOperationExecutor extends WorkExecutor {
+  private static final Logger LOG = LoggerFactory.getLogger(MapTaskExecutor.class);
+
+  private final SourceOperationRequest request;
+  private SourceOperationResponse response;
+
+  public SourceOperationExecutor(SourceOperationRequest request,
+                                 CounterSet counters) {
+    super(counters);
+    this.request = request;
+  }
+
+  @Override
+  public void execute() throws Exception {
+    LOG.debug("Executing source operation");
+
+    Source sourceSpec;
+    if (request.getGetMetadata() != null) {
+      sourceSpec = request.getGetMetadata().getSource();
+    } else if (request.getSplit() != null) {
+      sourceSpec = request.getSplit().getSource();
+    } else {
+      throw new UnsupportedOperationException("Unknown source operation");
+    }
+
+    this.response =
+        sourceOperationResponseToCloudSourceOperationResponse(
+            CustomSourceFormatFactory.create(sourceSpec)
+                .performSourceOperation(
+                    cloudSourceOperationRequestToSourceOperationRequest(request)));
+
+    LOG.debug("Source operation execution complete");
+  }
+
+  public SourceOperationResponse getResponse() {
+    return response;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutorFactory.java
new file mode 100644
index 0000000000000..10c862e464875
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutorFactory.java
@@ -0,0 +1,31 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.api.services.dataflow.model.SourceOperationRequest;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+
+/**
+ * Creates a SourceOperationExecutor from a SourceOperation.
+ */
+public class SourceOperationExecutorFactory {
+  public static SourceOperationExecutor create(SourceOperationRequest request)
+      throws Exception {
+    CounterSet counters = new CounterSet();
+    return new SourceOperationExecutor(request, counters);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
new file mode 100644
index 0000000000000..1e0c8aa234918
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
@@ -0,0 +1,189 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
+import static com.google.cloud.dataflow.sdk.util.Structs.addDictionary;
+import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
+import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
+
+import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.Position;
+import com.google.api.services.dataflow.model.SourceMetadata;
+import com.google.api.services.dataflow.model.SourceOperationRequest;
+import com.google.api.services.dataflow.model.SourceOperationResponse;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.worker.CustomSourceFormat;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.annotation.Nullable;
+
+/**
+ * Utilities for representing Source-specific objects
+ * using Dataflow model protos.
+ */
+public class SourceTranslationUtils {
+  public static Source.Progress cloudProgressToSourceProgress(
+      @Nullable ApproximateProgress cloudProgress) {
+    return cloudProgress == null ? null
+        : new DataflowSourceProgress(cloudProgress);
+  }
+
+  public static Source.Position cloudPositionToSourcePosition(
+      @Nullable Position cloudPosition) {
+    return cloudPosition == null ? null
+        : new DataflowSourcePosition(cloudPosition);
+  }
+
+  public static CustomSourceFormat.SourceOperationRequest
+  cloudSourceOperationRequestToSourceOperationRequest(
+      @Nullable SourceOperationRequest request) {
+    return request == null ? null
+        : new DataflowSourceOperationRequest(request);
+  }
+
+  public static CustomSourceFormat.SourceOperationResponse
+  cloudSourceOperationResponseToSourceOperationResponse(
+      @Nullable SourceOperationResponse response) {
+    return response == null ? null
+        : new DataflowSourceOperationResponse(response);
+  }
+
+  public static CustomSourceFormat.SourceSpec cloudSourceToSourceSpec(
+      @Nullable com.google.api.services.dataflow.model.Source cloudSource) {
+    return cloudSource == null ? null
+        : new DataflowSourceSpec(cloudSource);
+  }
+
+  public static ApproximateProgress sourceProgressToCloudProgress(
+      @Nullable Source.Progress sourceProgress) {
+    return sourceProgress == null ? null
+        : ((DataflowSourceProgress) sourceProgress).cloudProgress;
+  }
+
+  public static Position sourcePositionToCloudPosition(
+      @Nullable Source.Position sourcePosition) {
+    return sourcePosition == null ? null
+        : ((DataflowSourcePosition) sourcePosition).cloudPosition;
+  }
+
+  public static SourceOperationRequest
+  sourceOperationRequestToCloudSourceOperationRequest(
+      @Nullable CustomSourceFormat.SourceOperationRequest request) {
+    return (request == null) ? null
+        : ((DataflowSourceOperationRequest) request).cloudRequest;
+  }
+
+  public static SourceOperationResponse
+  sourceOperationResponseToCloudSourceOperationResponse(
+      @Nullable CustomSourceFormat.SourceOperationResponse response) {
+    return (response == null) ? null
+        : ((DataflowSourceOperationResponse) response).cloudResponse;
+  }
+
+  public static com.google.api.services.dataflow.model.Source sourceSpecToCloudSource(
+      @Nullable CustomSourceFormat.SourceSpec spec) {
+    return (spec == null) ? null
+        : ((DataflowSourceSpec) spec).cloudSource;
+  }
+
+  static class DataflowSourceProgress implements Source.Progress {
+    public final ApproximateProgress cloudProgress;
+    public DataflowSourceProgress(ApproximateProgress cloudProgress) {
+      this.cloudProgress = cloudProgress;
+    }
+  }
+
+  static class DataflowSourcePosition implements Source.Position {
+    public final Position cloudPosition;
+    public DataflowSourcePosition(Position cloudPosition) {
+      this.cloudPosition = cloudPosition;
+    }
+  }
+
+  static class DataflowSourceOperationRequest implements CustomSourceFormat.SourceOperationRequest {
+    public final SourceOperationRequest cloudRequest;
+    public DataflowSourceOperationRequest(SourceOperationRequest cloudRequest) {
+      this.cloudRequest = cloudRequest;
+    }
+  }
+
+  static class DataflowSourceOperationResponse
+      implements CustomSourceFormat.SourceOperationResponse {
+    public final SourceOperationResponse cloudResponse;
+    public DataflowSourceOperationResponse(SourceOperationResponse cloudResponse) {
+      this.cloudResponse = cloudResponse;
+    }
+  }
+
+  static class DataflowSourceSpec implements CustomSourceFormat.SourceSpec {
+    public final com.google.api.services.dataflow.model.Source cloudSource;
+    public DataflowSourceSpec(com.google.api.services.dataflow.model.Source cloudSource) {
+      this.cloudSource = cloudSource;
+    }
+  }
+
+  // Represents a cloud Source as a dictionary for encoding inside the CUSTOM_SOURCE
+  // property of CloudWorkflowStep.input.
+  public static Map<String, Object> cloudSourceToDictionary(
+      com.google.api.services.dataflow.model.Source source) {
+    // Do not translate encoding - the source's encoding is translated elsewhere
+    // to the step's output info.
+    Map<String, Object> res = new HashMap<>();
+    addDictionary(res, PropertyNames.CUSTOM_SOURCE_SPEC, source.getSpec());
+    if (source.getMetadata() != null) {
+      addDictionary(res, PropertyNames.CUSTOM_SOURCE_METADATA,
+          cloudSourceMetadataToDictionary(source.getMetadata()));
+    }
+    if (source.getDoesNotNeedSplitting() != null) {
+      addBoolean(res, PropertyNames.CUSTOM_SOURCE_DOES_NOT_NEED_SPLITTING,
+          source.getDoesNotNeedSplitting());
+    }
+    return res;
+  }
+
+  private static Map<String, Object> cloudSourceMetadataToDictionary(
+      SourceMetadata metadata) {
+    Map<String, Object> res = new HashMap<>();
+    if (metadata.getProducesSortedKeys() != null) {
+      addBoolean(res, PropertyNames.CUSTOM_SOURCE_PRODUCES_SORTED_KEYS,
+          metadata.getProducesSortedKeys());
+    }
+    if (metadata.getEstimatedSizeBytes() != null) {
+      addLong(res, PropertyNames.CUSTOM_SOURCE_ESTIMATED_SIZE_BYTES,
+          metadata.getEstimatedSizeBytes());
+    }
+    if (metadata.getInfinite() != null) {
+      addBoolean(res, PropertyNames.CUSTOM_SOURCE_IS_INFINITE,
+          metadata.getInfinite());
+    }
+    return res;
+  }
+
+  public static com.google.api.services.dataflow.model.Source dictionaryToCloudSource(
+      Map<String, Object> params) throws Exception {
+    com.google.api.services.dataflow.model.Source res =
+        new com.google.api.services.dataflow.model.Source();
+    res.setSpec(getDictionary(params, PropertyNames.CUSTOM_SOURCE_SPEC));
+    // CUSTOM_SOURCE_METADATA and CUSTOM_SOURCE_DOES_NOT_NEED_SPLITTING do not have to be
+    // translated, because they only make sense in cloud Source objects produced by the user.
+    return res;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
new file mode 100644
index 0000000000000..5fef80f725131
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
@@ -0,0 +1,285 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.MimeTypes;
+import com.google.cloud.dataflow.sdk.util.ShardingWritableByteChannel;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.nio.channels.WritableByteChannel;
+import java.util.Random;
+
+import javax.annotation.Nullable;
+
+/**
+ * A sink that writes text files.
+ *
+ * @param <T> the type of the elements written to the sink
+ */
+public class TextSink<T> extends Sink<T> {
+
+  static final byte[] NEWLINE = getNewline();
+
+  private static byte[] getNewline() {
+    String newline = "\n";
+    try {
+      return newline.getBytes("UTF-8");
+    } catch (UnsupportedEncodingException e) {
+      throw new RuntimeException("UTF-8 not supported", e);
+    }
+  }
+
+  final String namePrefix;
+  final String shardFormat;
+  final String nameSuffix;
+  final int shardCount;
+  final boolean appendTrailingNewlines;
+  final String header;
+  final String footer;
+  final Coder<T> coder;
+
+  /**
+   * For testing only.
+   *
+   * <p> Used by simple tests which write to a single unsharded file.
+   */
+  public static <V> TextSink<WindowedValue<V>> createForTest(
+      String filename,
+      boolean appendTrailingNewlines,
+      @Nullable String header,
+      @Nullable String footer,
+      Coder<V> coder) {
+    return create(filename,
+                  "",
+                  "",
+                  1,
+                  appendTrailingNewlines,
+                  header,
+                  footer,
+                  WindowedValue.getValueOnlyCoder(coder));
+  }
+
+  /**
+   * For DirectPipelineRunner only.
+   * It wraps the coder with {@code WindowedValue.ValueOnlyCoder}.
+   */
+  public static <V> TextSink<WindowedValue<V>> createForDirectPipelineRunner(
+      String filenamePrefix,
+      String shardFormat,
+      String filenameSuffix,
+      int shardCount,
+      boolean appendTrailingNewlines,
+      @Nullable String header,
+      @Nullable String footer,
+      Coder<V> coder) {
+    return create(filenamePrefix,
+                  shardFormat,
+                  filenameSuffix,
+                  shardCount,
+                  appendTrailingNewlines,
+                  header,
+                  footer,
+                  WindowedValue.getValueOnlyCoder(coder));
+  }
+
+  /**
+   * Constructs a new TextSink.
+   *
+   * @param filenamePrefix the prefix of output filenames.
+   * @param shardFormat the shard name template to use for output filenames.
+   * @param filenameSuffix the suffix of output filenames.
+   * @param shardCount the number of outupt shards to produce.
+   * @param appendTrailingNewlines true to append newlines to each output line.
+   * @param header text to place at the beginning of each output file.
+   * @param footer text to place at the end of each output file.
+   * @param coder the code used to encode elements for output.
+   */
+  public static <V> TextSink<V> create(String filenamePrefix,
+                                       String shardFormat,
+                                       String filenameSuffix,
+                                       int shardCount,
+                                       boolean appendTrailingNewlines,
+                                       @Nullable String header,
+                                       @Nullable String footer,
+                                       Coder<V> coder) {
+    return new TextSink<>(filenamePrefix,
+                          shardFormat,
+                          filenameSuffix,
+                          shardCount,
+                          appendTrailingNewlines,
+                          header,
+                          footer,
+                          coder);
+  }
+
+  private TextSink(String filenamePrefix,
+                   String shardFormat,
+                   String filenameSuffix,
+                   int shardCount,
+                   boolean appendTrailingNewlines,
+                   @Nullable String header,
+                   @Nullable String footer,
+                   Coder<T> coder) {
+    this.namePrefix = filenamePrefix;
+    this.shardFormat = shardFormat;
+    this.nameSuffix = filenameSuffix;
+    this.shardCount = shardCount;
+    this.appendTrailingNewlines = appendTrailingNewlines;
+    this.header = header;
+    this.footer = footer;
+    this.coder = coder;
+  }
+
+  @Override
+  public SinkWriter<T> writer() throws IOException {
+    String mimeType;
+
+    if (!(coder instanceof WindowedValueCoder)) {
+      throw new IOException(
+          "Expected WindowedValueCoder for inputCoder, got: "
+          + coder.getClass().getName());
+    }
+    Coder valueCoder = ((WindowedValueCoder) coder).getValueCoder();
+    if (valueCoder.equals(StringUtf8Coder.of())) {
+      mimeType = MimeTypes.TEXT;
+    } else {
+      mimeType = MimeTypes.BINARY;
+    }
+
+    WritableByteChannel writer = IOChannelUtils.create(namePrefix, shardFormat,
+        nameSuffix, shardCount, mimeType);
+
+    if (writer instanceof ShardingWritableByteChannel) {
+      return new ShardingTextFileWriter((ShardingWritableByteChannel) writer);
+    } else {
+      return new TextFileWriter(writer);
+    }
+  }
+
+  /**
+   * Abstract SinkWriter base class shared by sharded and unsharded Text
+   * writer implementations.
+   */
+  abstract class AbstractTextFileWriter implements SinkWriter<T> {
+    protected void init() throws IOException {
+      if (header != null) {
+        printLine(ShardingWritableByteChannel.ALL_SHARDS,
+            CoderUtils.encodeToByteArray(StringUtf8Coder.of(), header));
+      }
+    }
+
+    /**
+     * Adds a value to the sink. Returns the size in bytes of the data written.
+     * The return value does -not- include header/footer size.
+     */
+    @Override
+    public long add(T value) throws IOException {
+      return printLine(getShardNum(value),
+          CoderUtils.encodeToByteArray(coder, value));
+    }
+
+    @Override
+    public void close() throws IOException {
+      if (footer != null) {
+        printLine(ShardingWritableByteChannel.ALL_SHARDS,
+            CoderUtils.encodeToByteArray(StringUtf8Coder.of(), footer));
+      }
+    }
+
+    protected long printLine(int shardNum, byte[] line) throws IOException {
+      long length = line.length;
+      write(shardNum, ByteBuffer.wrap(line));
+
+      if (appendTrailingNewlines) {
+        write(shardNum, ByteBuffer.wrap(NEWLINE));
+        length += NEWLINE.length;
+      }
+
+      return length;
+    }
+
+    protected abstract void write(int shardNum, ByteBuffer buf)
+        throws IOException;
+    protected abstract int getShardNum(T value);
+  }
+
+  /** An unsharded SinkWriter for a TextSink. */
+  class TextFileWriter extends AbstractTextFileWriter {
+    private final WritableByteChannel outputChannel;
+
+    TextFileWriter(WritableByteChannel outputChannel) throws IOException {
+      this.outputChannel = outputChannel;
+      init();
+    }
+
+    @Override
+    public void close() throws IOException {
+      super.close();
+      outputChannel.close();
+    }
+
+    @Override
+    protected void write(int shardNum, ByteBuffer buf) throws IOException {
+      outputChannel.write(buf);
+    }
+
+    @Override
+    protected int getShardNum(T value) {
+      return 0;
+    }
+  }
+
+  /** A sharding SinkWriter for a TextSink. */
+  class ShardingTextFileWriter extends AbstractTextFileWriter {
+    private final Random rng = new Random();
+    private final int numShards;
+    private final ShardingWritableByteChannel outputChannel;
+
+    // TODO: add support for user-defined sharding function.
+    ShardingTextFileWriter(ShardingWritableByteChannel outputChannel)
+        throws IOException {
+      this.outputChannel = outputChannel;
+      numShards = outputChannel.getNumShards();
+      init();
+    }
+
+    @Override
+    public void close() throws IOException {
+      super.close();
+      outputChannel.close();
+    }
+
+    @Override
+    protected void write(int shardNum, ByteBuffer buf) throws IOException {
+      outputChannel.writeToShard(shardNum, buf);
+    }
+
+    @Override
+    protected int getShardNum(T value) {
+      return rng.nextInt(numShards);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactory.java
new file mode 100644
index 0000000000000..bac663dea2da5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactory.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.getBoolean;
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+
+/**
+ * Creates a TextSink from a CloudObject spec.
+ */
+public final class TextSinkFactory {
+  // Do not instantiate.
+  private TextSinkFactory() {}
+
+  public static <T> TextSink<T> create(PipelineOptions options,
+                                       CloudObject spec,
+                                       Coder<T> coder,
+                                       ExecutionContext executionContext)
+      throws Exception {
+    return create(spec, coder);
+  }
+
+  static <T> TextSink<T> create(CloudObject spec, Coder<T> coder)
+      throws Exception {
+    return TextSink.create(
+        getString(spec, PropertyNames.FILENAME),
+        "",  // No shard template
+        "",  // No suffix
+        1,   // Exactly one output file
+        getBoolean(spec, PropertyNames.APPEND_TRAILING_NEWLINES, true),
+        getString(spec, PropertyNames.HEADER, null),
+        getString(spec, PropertyNames.FOOTER, null),
+        coder);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSource.java
new file mode 100644
index 0000000000000..5bbcba0e6b91d
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSource.java
@@ -0,0 +1,383 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
+import com.google.cloud.dataflow.sdk.util.common.worker.ProgressTracker;
+import com.google.cloud.dataflow.sdk.util.common.worker.ProgressTrackerGroup;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PushbackInputStream;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.SeekableByteChannel;
+import java.util.Collection;
+import java.util.Iterator;
+
+import javax.annotation.Nullable;
+
+/**
+ * A source that reads text files.
+ *
+ * @param <T> the type of the elements read from the source
+ */
+public class TextSource<T> extends FileBasedSource<T> {
+  final boolean stripTrailingNewlines;
+
+  public TextSource(String filename,
+                    boolean stripTrailingNewlines,
+                    @Nullable Long startPosition,
+                    @Nullable Long endPosition,
+                    Coder<T> coder) {
+    this(filename, stripTrailingNewlines,
+         startPosition, endPosition, coder, true);
+  }
+
+  protected TextSource(String filename,
+                       boolean stripTrailingNewlines,
+                       @Nullable Long startPosition,
+                       @Nullable Long endPosition,
+                       Coder<T> coder,
+                       boolean useDefaultBufferSize) {
+    super(filename, startPosition, endPosition, coder, useDefaultBufferSize);
+    this.stripTrailingNewlines = stripTrailingNewlines;
+  }
+
+  @Override
+  protected SourceIterator<T> newSourceIteratorForRangeInFile(
+      IOChannelFactory factory, String oneFile, long startPosition,
+      @Nullable Long endPosition)
+      throws IOException {
+    // Position before the first record, so we can find the record beginning.
+    final long start = startPosition > 0 ? startPosition - 1 : 0;
+
+    TextFileIterator iterator = newSourceIteratorForRangeWithStrictStart(
+        factory, oneFile, stripTrailingNewlines, start, endPosition);
+
+    // Skip the initial record if start position was set.
+    if (startPosition > 0 && iterator.hasNext()) {
+      iterator.advance();
+    }
+
+    return iterator;
+  }
+
+  @Override
+  protected SourceIterator<T> newSourceIteratorForFiles(
+      IOChannelFactory factory, Collection<String> files) throws IOException {
+    if (files.size() == 1) {
+      return newSourceIteratorForFile(
+          factory, files.iterator().next(), stripTrailingNewlines);
+    }
+
+    return new TextFileMultiIterator(
+        factory, files.iterator(), stripTrailingNewlines);
+  }
+
+  private TextFileIterator newSourceIteratorForFile(
+      IOChannelFactory factory, String input, boolean stripTrailingNewlines)
+      throws IOException {
+    return newSourceIteratorForRangeWithStrictStart(
+        factory, input, stripTrailingNewlines, 0, null);
+  }
+
+  /**
+   * Returns a new iterator for lines in the given range in the given
+   * file.  Does NOT skip the first line if the range starts in the
+   * middle of a line (instead, the latter half that starts at
+   * startOffset will be returned as the first element).
+   */
+  private TextFileIterator newSourceIteratorForRangeWithStrictStart(
+      IOChannelFactory factory, String input, boolean stripTrailingNewlines,
+      long startOffset, @Nullable Long endOffset) throws IOException {
+    ReadableByteChannel reader = factory.open(input);
+    if (!(reader instanceof SeekableByteChannel)) {
+      throw new UnsupportedOperationException(
+          "Unable to seek in stream for " + input);
+    }
+
+    SeekableByteChannel seeker = (SeekableByteChannel) reader;
+
+    return new TextFileIterator(
+        new CopyableSeekableByteChannel(seeker),
+        stripTrailingNewlines, startOffset, endOffset);
+  }
+
+  class TextFileMultiIterator extends LazyMultiSourceIterator<T> {
+    private final IOChannelFactory factory;
+    private final boolean stripTrailingNewlines;
+
+    public TextFileMultiIterator(IOChannelFactory factory,
+        Iterator<String> inputs, boolean stripTrailingNewlines) {
+      super(inputs);
+      this.factory = factory;
+      this.stripTrailingNewlines = stripTrailingNewlines;
+    }
+
+    @Override
+    protected SourceIterator<T> open(String input) throws IOException {
+      return newSourceIteratorForFile(factory, input, stripTrailingNewlines);
+    }
+  }
+
+  class TextFileIterator extends FileBasedIterator {
+    private final boolean stripTrailingNewlines;
+    private ScanState state;
+
+    TextFileIterator(CopyableSeekableByteChannel seeker,
+        boolean stripTrailingNewlines,
+        long startOffset,
+        @Nullable Long endOffset) throws IOException {
+      this(seeker, stripTrailingNewlines, startOffset, startOffset, endOffset,
+          new ProgressTrackerGroup<Integer>() {
+            @Override
+            protected void report(Integer lineLength) {
+              notifyElementRead(lineLength.longValue());
+            }
+          }.start(), new ScanState(BUF_SIZE, !stripTrailingNewlines));
+    }
+
+    private TextFileIterator(CopyableSeekableByteChannel seeker,
+        boolean stripTrailingNewlines,
+        long startOffset,
+        long offset,
+        @Nullable Long endOffset,
+        ProgressTracker<Integer> tracker,
+        ScanState state) throws IOException {
+      super(seeker, startOffset, offset, endOffset, tracker);
+
+      this.stripTrailingNewlines = stripTrailingNewlines;
+      this.state = state;
+    }
+
+    private TextFileIterator(TextFileIterator it) throws IOException {
+      this(it.seeker.copy(), it.stripTrailingNewlines,
+          /* Correctly adjust the start position of the seeker given
+           * that it may hold bytes that have been read and now reside
+           * in the read buffer (that is copied during cloning) */
+          it.startOffset + it.state.totalBytesRead,
+          it.offset,
+          it.endOffset, it.tracker.copy(), it.state.copy());
+    }
+
+    @Override
+    public SourceIterator<T> copy() throws IOException {
+      return new TextFileIterator(this);
+    }
+
+    /**
+     * Reads a line of text. A line is considered to be terminated by any
+     * one of a line feed ({@code '\n'}), a carriage return
+     * ({@code '\r'}), or a carriage return followed immediately by a linefeed
+     * ({@code "\r\n"}).
+     *
+     * @return a {@code ByteArrayOutputStream} containing the contents of the
+     *     line, with any line-termination characters stripped if
+     *     keepNewlines==false, or {@code null} if the end of the stream has
+     *     been reached.
+     * @throws IOException if an I/O error occurs
+     */
+    @Override
+    protected ByteArrayOutputStream readElement()
+        throws IOException {
+      ByteArrayOutputStream buffer = new ByteArrayOutputStream(BUF_SIZE);
+
+      int charsConsumed = 0;
+      while (true) {
+        // Attempt to read blocks of data at a time
+        // until a separator is found.
+        if (!state.readBytes(stream)) {
+          break;
+        }
+
+        int consumed = state.consumeUntilSeparator(buffer);
+        charsConsumed += consumed;
+        if (consumed > 0 && state.separatorFound()) {
+          if (state.lastByteRead() == '\r') {
+            charsConsumed += state.copyCharIfLinefeed(buffer, stream);
+          }
+          break;
+        }
+      }
+
+      if (charsConsumed == 0) {
+        // Note that charsConsumed includes the size of any separators that may
+        // have been stripped off -- so if we didn't get anything, we're at the
+        // end of the file.
+        return null;
+      }
+
+      offset += charsConsumed;
+      tracker.saw(charsConsumed);
+      return buffer;
+    }
+  }
+
+  /**
+   * ScanState encapsulates the state for the current buffer of text
+   * being scanned.
+   */
+  private static class ScanState {
+    private int start;  // Valid bytes in buf start at this index
+    private int pos; // Where the separator is in the buf (if one was found)
+    private int end; // the index of the end of bytes in buf
+    private byte[] buf;
+    private boolean keepNewlines;
+    private byte lastByteRead;
+    private long totalBytesRead;
+
+    public ScanState(int size, boolean keepNewlines) {
+      this.start = 0;
+      this.pos = 0;
+      this.end = 0;
+      this.buf = new byte[size];
+      this.keepNewlines = keepNewlines;
+      totalBytesRead = 0;
+    }
+
+    public ScanState copy() {
+      byte[] bufCopy = new byte[buf.length];  // copy :(
+      System.arraycopy(buf, start, bufCopy, start, end - start);
+      return new ScanState(
+          this.keepNewlines, this.start, this.pos, this.end,
+          bufCopy, this.lastByteRead, 0);
+    }
+
+    private ScanState(
+        boolean keepNewlines, int start, int pos, int end,
+        byte[] buf, byte lastByteRead, long totalBytesRead) {
+      this.start = start;
+      this.pos = pos;
+      this.end = end;
+      this.buf = buf;
+      this.keepNewlines = keepNewlines;
+      this.lastByteRead = lastByteRead;
+      this.totalBytesRead = totalBytesRead;
+    }
+
+    public boolean readBytes(PushbackInputStream stream) throws IOException {
+      if (start < end) {
+        return true;
+      }
+      assert end <= buf.length : end + " > " + buf.length;
+      int bytesRead = stream.read(buf, end, buf.length - end);
+      if (bytesRead == -1) {
+        return false;
+      }
+      totalBytesRead += bytesRead;
+      end += bytesRead;
+      return true;
+    }
+
+    /**
+     * Consumes characters until a separator character is found or the
+     * end of buffer is reached.
+     *
+     * Updates the state to indicate the position of the separator
+     * character. If pos==len, no separator was found.
+     *
+     * @return the number of characters consumed.
+     */
+    public int consumeUntilSeparator(ByteArrayOutputStream out) {
+      for (pos = start; pos < end; ++pos) {
+        lastByteRead = buf[pos];
+        if (separatorFound()) {
+          int charsConsumed = (pos - start + 1); // The separator is consumed
+          copyToOutputBuffer(out);
+          start = pos + 1;  // skip the separator
+          return charsConsumed;
+        }
+      }
+      // No separator found
+      assert pos == end;
+      int charsConsumed = (pos - start);
+      out.write(buf, start, charsConsumed);
+      start = 0;
+      end = 0;
+      pos = 0;
+      return charsConsumed;
+    }
+
+    public boolean separatorFound() {
+      return lastByteRead == '\n' || lastByteRead == '\r';
+    }
+
+    public byte lastByteRead() {
+      return buf[pos];
+    }
+
+    public int bytesBuffered() {
+      assert end >= start : end + " must be >= " + start;
+      return end - start;
+    }
+
+    /**
+     * Copies data from the input buffer to the output buffer.
+     *
+     * If keepNewlines==true, line-termination characters are included in the copy.
+     */
+    private void copyToOutputBuffer(ByteArrayOutputStream out) {
+      int charsCopied = pos - start;
+      if (keepNewlines && separatorFound()) {
+        charsCopied++;
+      }
+      out.write(buf, start, charsCopied);
+    }
+
+    /**
+     * Scans the input buffer to determine if a matched carriage return
+     * has an accompanying linefeed and process the input buffer accordingly.
+     *
+     * If keepNewlines==true and a linefeed character is detected,
+     * it is included in the copy.
+     *
+     * @return the number of characters consumed
+     */
+    private int copyCharIfLinefeed(ByteArrayOutputStream out, PushbackInputStream stream)
+        throws IOException {
+      int charsConsumed = 0;
+      // Check to make sure we don't go off the end of the buffer
+      if ((pos + 1) < end) {
+        if (buf[pos + 1] == '\n') {
+          charsConsumed++;
+          pos++;
+          start++;
+          if (keepNewlines) {
+            out.write('\n');
+          }
+        }
+      } else {
+        // We are at the end of the buffer and need one more
+        // byte. Get it the slow but safe way.
+        int b = stream.read();
+        if (b == '\n') {
+          charsConsumed++;
+          totalBytesRead++;
+          if (keepNewlines) {
+            out.write(b);
+          }
+        } else if (b != -1) {
+          // Consider replacing unread() since it may be slow if
+          // iterators are cloned frequently.
+          stream.unread(b);
+        }
+      }
+      return charsConsumed;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceFactory.java
new file mode 100644
index 0000000000000..a15c2d505c47f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceFactory.java
@@ -0,0 +1,74 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.getBoolean;
+import static com.google.cloud.dataflow.sdk.util.Structs.getLong;
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+
+import com.google.api.services.dataflow.model.Source;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.Serializer;
+
+/**
+ * Creates a TextSource from a CloudObject spec.
+ */
+public class TextSourceFactory {
+  // Do not instantiate.
+  private TextSourceFactory() {}
+
+  public static <T> TextSource<T> create(PipelineOptions options,
+                                         CloudObject spec,
+                                         Coder<T> coder,
+                                         ExecutionContext executionContext)
+      throws Exception {
+    return create(spec, coder);
+  }
+
+  static <T> TextSource<T> create(CloudObject spec,
+                                  Coder<T> coder)
+      throws Exception {
+    return create(spec, coder, true);
+  }
+
+  public static <T> TextSource<T> create(Source spec)
+      throws Exception {
+    return create(
+        CloudObject.fromSpec(spec.getSpec()),
+        Serializer.deserialize(spec.getCodec(), Coder.class));
+  }
+
+  static <T> TextSource<T> create(CloudObject spec,
+                                  Coder<T> coder,
+                                  boolean useDefaultBufferSize) throws Exception {
+    String filenameOrPattern = getString(spec, PropertyNames.FILENAME, null);
+    if (filenameOrPattern == null) {
+      filenameOrPattern = getString(spec, PropertyNames.FILEPATTERN, null);
+    }
+    return new TextSource<>(
+        filenameOrPattern,
+        getBoolean(spec, PropertyNames.STRIP_TRAILING_NEWLINES, true),
+        getLong(spec, PropertyNames.START_OFFSET, null),
+        getLong(spec, PropertyNames.END_OFFSET, null),
+        coder,
+        useDefaultBufferSize);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSource.java
new file mode 100644
index 0000000000000..d7d0cf7cf841e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSource.java
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
+import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
+import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import javax.annotation.Nullable;
+
+/**
+ * A source that reads from a shuffled dataset, without any key grouping.
+ * Returns just the values.  (This reader is for an UNGROUPED shuffle session.)
+ *
+ * @param <T> the type of the elements read from the source
+ */
+public class UngroupedShuffleSource<T> extends Source<T> {
+  final byte[] shuffleReaderConfig;
+  final String startShufflePosition;
+  final String stopShufflePosition;
+  final Coder<T> coder;
+
+  public UngroupedShuffleSource(PipelineOptions options,
+                                byte[] shuffleReaderConfig,
+                                @Nullable String startShufflePosition,
+                                @Nullable String stopShufflePosition,
+                                Coder<T> coder) {
+    this.shuffleReaderConfig = shuffleReaderConfig;
+    this.startShufflePosition = startShufflePosition;
+    this.stopShufflePosition = stopShufflePosition;
+    this.coder = coder;
+  }
+
+  @Override
+    public SourceIterator<T> iterator() throws IOException {
+    Preconditions.checkArgument(shuffleReaderConfig != null);
+    return iterator(new BatchingShuffleEntryReader(
+        new ChunkingShuffleBatchReader(new ApplianceShuffleReader(
+            shuffleReaderConfig))));
+  }
+
+  SourceIterator<T> iterator(ShuffleEntryReader reader) throws IOException {
+    return new UngroupedShuffleSourceIterator(reader);
+  }
+
+  /**
+   * A SourceIterator that reads from a ShuffleEntryReader and extracts
+   * just the values.
+   */
+  class UngroupedShuffleSourceIterator extends AbstractSourceIterator<T> {
+    Iterator<ShuffleEntry> iterator;
+
+    UngroupedShuffleSourceIterator(ShuffleEntryReader reader)
+        throws IOException {
+      this.iterator = reader.read(
+          ByteArrayShufflePosition.fromBase64(startShufflePosition),
+          ByteArrayShufflePosition.fromBase64(stopShufflePosition));
+    }
+
+    @Override
+    public boolean hasNext() throws IOException {
+      return iterator.hasNext();
+    }
+
+    @Override
+    public T next() throws IOException {
+      ShuffleEntry record = iterator.next();
+      // Throw away the primary and the secondary keys.
+      byte[] value = record.getValue();
+      notifyElementRead(record.length());
+      return CoderUtils.decodeFromByteArray(coder, value);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceFactory.java
new file mode 100644
index 0000000000000..adff71226d6b8
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceFactory.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.api.client.util.Base64.decodeBase64;
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+
+/**
+ * Creates an UngroupedShuffleSource from a CloudObject spec.
+ */
+public class UngroupedShuffleSourceFactory {
+  // Do not instantiate.
+  private UngroupedShuffleSourceFactory() {}
+
+  public static <T> UngroupedShuffleSource<T> create(
+      PipelineOptions options,
+      CloudObject spec,
+      Coder<T> coder,
+      ExecutionContext executionContext)
+      throws Exception {
+    return create(options, spec, coder);
+  }
+
+  static <T> UngroupedShuffleSource<T> create(
+      PipelineOptions options,
+      CloudObject spec,
+      Coder<T> coder)
+      throws Exception {
+    return new UngroupedShuffleSource<>(
+        options,
+        decodeBase64(getString(spec, PropertyNames.SHUFFLE_READER_CONFIG)),
+        getString(spec, PropertyNames.START_SHUFFLE_POSITION, null),
+        getString(spec, PropertyNames.END_SHUFFLE_POSITION, null),
+        coder);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
new file mode 100644
index 0000000000000..85805773c7060
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker.logging;
+
+import static com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingInitializer.LEVELS;
+
+import com.google.common.base.MoreObjects;
+
+import org.joda.time.format.DateTimeFormatter;
+import org.joda.time.format.ISODateTimeFormat;
+import org.slf4j.MDC;
+
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.util.logging.Formatter;
+import java.util.logging.LogRecord;
+
+/**
+ * Formats {@link LogRecord} into the following format:
+ * ISO8601Date LogLevel JobId WorkerId WorkId ThreadId LoggerName LogMessage
+ * with one or more additional lines for any {@link Throwable} associated with
+ * the {@link LogRecord}. The exception is output using
+ * {@link Throwable#printStackTrace()}.
+ */
+public class DataflowWorkerLoggingFormatter extends Formatter {
+  private static final DateTimeFormatter DATE_FORMATTER =
+      ISODateTimeFormat.dateTime().withZoneUTC();
+  public static final String MDC_DATAFLOW_JOB_ID = "dataflow.jobId";
+  public static final String MDC_DATAFLOW_WORKER_ID = "dataflow.workerId";
+  public static final String MDC_DATAFLOW_WORK_ID = "dataflow.workId";
+
+  @Override
+  public String format(LogRecord record) {
+    String exception = formatException(record.getThrown());
+    return DATE_FORMATTER.print(record.getMillis())
+        + " " + MoreObjects.firstNonNull(LEVELS.get(record.getLevel()), 
+                                         record.getLevel().getName())
+        + " " + MoreObjects.firstNonNull(MDC.get(MDC_DATAFLOW_JOB_ID), "unknown")
+        + " " + MoreObjects.firstNonNull(MDC.get(MDC_DATAFLOW_WORKER_ID), "unknown")
+        + " " + MoreObjects.firstNonNull(MDC.get(MDC_DATAFLOW_WORK_ID), "unknown")
+        + " " + record.getThreadID()
+        + " " + record.getLoggerName()
+        + " " + record.getMessage() + "\n"
+        + (exception != null ? exception : "");
+  }
+
+  /**
+   * Formats the throwable as per {@link Throwable#printStackTrace()}.
+   *
+   * @param thrown The throwable to format.
+   * @return A string containing the contents of {@link Throwable#printStackTrace()}.
+   */
+  private String formatException(Throwable thrown) {
+    if (thrown == null) {
+      return null;
+    }
+    StringWriter sw = new StringWriter();
+    PrintWriter pw = new PrintWriter(sw);
+    thrown.printStackTrace(pw);
+    pw.close();
+    return sw.toString();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
new file mode 100644
index 0000000000000..80ccf7084bcbf
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker.logging;
+
+import com.google.common.collect.ImmutableBiMap;
+
+import java.io.IOException;
+import java.util.logging.ConsoleHandler;
+import java.util.logging.FileHandler;
+import java.util.logging.Formatter;
+import java.util.logging.Handler;
+import java.util.logging.Level;
+import java.util.logging.LogManager;
+import java.util.logging.Logger;
+
+/**
+ * Sets up java.util.Logging configuration on the Dataflow Worker Harness with a
+ * console and file logger. The console and file loggers use the
+ * {@link DataflowWorkerLoggingFormatter} format. A user can override
+ * the logging level and location by specifying the Java system properties
+ * "dataflow.worker.logging.level" and "dataflow.worker.logging.location" respectively.
+ * The default log level is INFO and the default location is a file named dataflow-worker.log
+ * within the systems temporary directory. 
+ */
+public class DataflowWorkerLoggingInitializer {
+  private static final String DEFAULT_LOGGING_LOCATION = "/tmp/dataflow-worker.log";
+  private static final String ROOT_LOGGER_NAME = "";
+  public static final String DATAFLOW_WORKER_LOGGING_LEVEL = "dataflow.worker.logging.level";
+  public static final String DATAFLOW_WORKER_LOGGING_LOCATION = "dataflow.worker.logging.location";
+  public static final ImmutableBiMap<Level, String> LEVELS = 
+      ImmutableBiMap.<Level, String>builder()
+      .put(Level.SEVERE, "ERROR")
+      .put(Level.WARNING, "WARNING")
+      .put(Level.INFO, "INFO")
+      .put(Level.FINE, "DEBUG")
+      .put(Level.FINEST, "TRACE")
+      .build();
+  private static final String DEFAULT_LOG_LEVEL = LEVELS.get(Level.INFO);
+
+  public void initialize() {
+    initialize(LogManager.getLogManager());
+  }
+
+  void initialize(LogManager logManager) {
+    try {
+      Level logLevel = LEVELS.inverse().get(
+              System.getProperty(DATAFLOW_WORKER_LOGGING_LEVEL, DEFAULT_LOG_LEVEL));
+      Formatter formatter = new DataflowWorkerLoggingFormatter();
+
+      FileHandler fileHandler = new FileHandler(
+          System.getProperty(DATAFLOW_WORKER_LOGGING_LOCATION, DEFAULT_LOGGING_LOCATION),
+          true /* Append so that we don't squash existing logs */);
+      fileHandler.setFormatter(formatter);
+      fileHandler.setLevel(logLevel);
+
+      ConsoleHandler consoleHandler = new ConsoleHandler();
+      consoleHandler.setFormatter(formatter);
+      consoleHandler.setLevel(logLevel);
+
+      // Reset the global log manager, get the root logger and remove the default log handlers.
+      logManager.reset();
+      Logger rootLogger = logManager.getLogger(ROOT_LOGGER_NAME);
+      for (Handler handler : rootLogger.getHandlers()) {
+        rootLogger.removeHandler(handler);
+      }
+
+      rootLogger.setLevel(logLevel);
+      rootLogger.addHandler(consoleHandler);
+      rootLogger.addHandler(fileHandler);
+    } catch (SecurityException | IOException e) {
+      throw new ExceptionInInitializerError(e);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/package-info.java
new file mode 100644
index 0000000000000..615ed64743922
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/package-info.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/** 
+ * Implementation of the harness that runs on each Google Compute Engine instance to coordinate
+ * execution of Pipeline code.
+ */
+@ParametersAreNonnullByDefault
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import javax.annotation.ParametersAreNonnullByDefault;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
new file mode 100644
index 0000000000000..d4fe32ffd86f3
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -0,0 +1,374 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+
+import java.io.Serializable;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.Collection;
+
+/**
+ * An assertion on the contents of a {@link PCollection}
+ * incorporated into the pipeline.  Such an assertion
+ * can be checked no matter what kind of
+ * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner} is
+ * used, so it's good for testing using the
+ * {@link com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner},
+ * the
+ * {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner},
+ * etc.
+ *
+ * <p>Note that the {@code DataflowAssert} call must precede the call
+ * to {@link com.google.cloud.dataflow.sdk.Pipeline#run}.
+ *
+ * <p> Examples of use:
+ * <pre>{@code
+ * Pipeline p = TestPipeline.create();
+ * ...
+ * PCollection<String> output =
+ *      input
+ *      .apply(ParDo.of(new TestDoFn()));
+ * DataflowAssert.that(output)
+ *     .containsInAnyOrder("out1", "out2", "out3");
+ * ...
+ * PCollection<Integer> ints = ...
+ * PCollection<Integer> sum =
+ *     ints
+ *     .apply(Combine.globally(new SumInts()));
+ * DataflowAssert.that(sum)
+ *     .is(42);
+ * ...
+ * p.run();
+ * }</pre>
+ *
+ * <p>JUnit and Hamcrest must be linked in by any code that uses DataflowAssert.
+ *
+ * @param <T> The type of elements in the input collection.
+ */
+public class DataflowAssert<T> {
+  /**
+   * Constructs an IterableAssert for the elements of the provided
+   * {@code PCollection<T>}.
+   */
+  public static <T> IterableAssert<T> that(PCollection<T> futureResult) {
+    return new IterableAssert<>(futureResult.apply(View.<T>asIterable()));
+  }
+
+  /**
+   * Constructs an IterableAssert for the value of the provided
+   * {@code PCollection<Iterable<T>>}, which must be a singleton.
+   */
+  public static <T> IterableAssert<T> thatSingletonIterable(
+      PCollection<Iterable<T>> futureResult) {
+    return new IterableAssert<>(futureResult.apply(View.<Iterable<T>>asSingleton()));
+  }
+
+  /**
+   * Constructs an IterableAssert for the value of the provided
+   * {@code PCollectionView<Iterable<T>, ?>}.
+   */
+  public static <T> IterableAssert<T> thatIterable(
+      PCollectionView<Iterable<T>, ?> futureResult) {
+    return new IterableAssert<>(futureResult);
+  }
+
+  /**
+   * An assertion about the contents of a {@link PCollectionView<<Iterable<T>, ?>}
+   */
+  public static class IterableAssert<T> implements Serializable {
+    private final PCollectionView<Iterable<T>, ?> actualResults;
+
+    private IterableAssert(PCollectionView<Iterable<T>, ?> futureResult) {
+      actualResults = futureResult;
+    }
+
+    /**
+     * Applies a SerializableFunction to check the elements of the Iterable.
+     *
+     * <p> Returns this IterableAssert.
+     */
+    public IterableAssert<T> satisfies(
+        final SerializableFunction<Iterable<T>, Void> checkerFn) {
+
+      actualResults.getPipeline()
+          .apply(Create.<Void>of((Void) null))
+          .setCoder(VoidCoder.of())
+          .apply(ParDo
+            .withSideInputs(actualResults)
+            .of(new DoFn<Void, Void>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                Iterable<T> actualContents = c.sideInput(actualResults);
+                checkerFn.apply(actualContents);
+              }
+            }));
+
+      return this;
+    }
+
+    /**
+     * Checks that the Iterable contains the expected elements, in any
+     * order.
+     *
+     * <p> Returns this IterableAssert.
+     */
+    public IterableAssert<T> containsInAnyOrder(T... expectedElements) {
+      return this.satisfies(new AssertContainsInAnyOrder<T>(expectedElements));
+    }
+
+    /**
+     * Checks that the Iterable contains the expected elements, in any
+     * order.
+     *
+     * <p> Returns this IterableAssert.
+     */
+    public IterableAssert<T> containsInAnyOrder(
+        Collection<T> expectedElements) {
+      return this.satisfies(new AssertContainsInAnyOrder<T>(expectedElements));
+    }
+
+    /**
+     * Checks that the Iterable contains the expected elements, in the
+     * specified order.
+     *
+     * <p> Returns this IterableAssert.
+     */
+    public IterableAssert<T> containsInOrder(T... expectedElements) {
+      return this.satisfies(new AssertContainsInOrder<T>(expectedElements));
+    }
+
+    /**
+     * Checks that the Iterable contains the expected elements, in the
+     * specified order.
+     *
+     * <p> Returns this IterableAssert.
+     */
+    public IterableAssert<T> containsInOrder(Collection<T> expectedElements) {
+      return this.satisfies(new AssertContainsInOrder<T>(expectedElements));
+    }
+
+    /**
+     * SerializableFunction that performs an {@code Assert.assertThat()}
+     * operation using a {@code Matcher} operation that takes an array
+     * of elements.
+     */
+    static class AssertThatIterable<T> extends AssertThat<Iterable<T>, T[]> {
+      AssertThatIterable(T[] expected,
+                         String matcherClassName,
+                         String matcherFactoryMethodName) {
+        super(expected, Object[].class,
+              matcherClassName, matcherFactoryMethodName);
+      }
+    }
+
+    /**
+     * SerializableFunction that verifies that an Iterable contains
+     * expected items in any order.
+     */
+    static class AssertContainsInAnyOrder<T> extends AssertThatIterable<T> {
+      AssertContainsInAnyOrder(T... expected) {
+        super(expected,
+              "org.hamcrest.collection.IsIterableContainingInAnyOrder",
+              "containsInAnyOrder");
+      }
+      @SuppressWarnings("unchecked")
+      AssertContainsInAnyOrder(Collection<T> expected) {
+        this((T[]) expected.toArray());
+      }
+    }
+
+    /**
+     * SerializableFunction that verifies that an Iterable contains
+     * expected items in the provided order.
+     */
+    static class AssertContainsInOrder<T> extends AssertThatIterable<T> {
+      AssertContainsInOrder(T... expected) {
+        super(expected,
+              "org.hamcrest.collection.IsIterableContainingInOrder",
+              "contains");
+      }
+      @SuppressWarnings("unchecked")
+      AssertContainsInOrder(Collection<T> expected) {
+        this((T[]) expected.toArray());
+      }
+    }
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Constructs a SingletonAssert for the value of the provided
+   * {@code PCollection<T>}, which must be a singleton.
+   */
+  public static <T> SingletonAssert<T> thatSingleton(PCollection<T> futureResult) {
+    return new SingletonAssert<>(futureResult.apply(View.<T>asSingleton()));
+  }
+
+  /**
+   * An assertion about a single value.
+   */
+  public static class SingletonAssert<T> implements Serializable {
+    private final PCollectionView<T, ?> actualResult;
+
+    private SingletonAssert(PCollectionView<T, ?> futureResult) {
+      actualResult = futureResult;
+    }
+
+    /**
+     * Applies a SerializableFunction to check the value of this
+     * SingletonAssert's view.
+     *
+     * <p> Returns this SingletonAssert.
+     */
+    public SingletonAssert<T> satisfies(final SerializableFunction<T, Void> checkerFn) {
+      actualResult.getPipeline()
+          .apply(Create.<Void>of((Void) null))
+          .setCoder(VoidCoder.of())
+          .apply(ParDo
+            .withSideInputs(actualResult)
+            .of(new DoFn<Void, Void>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                T actualContents = c.sideInput(actualResult);
+                checkerFn.apply(actualContents);
+              }
+            }));
+
+      return this;
+    }
+
+    /**
+     * Checks that the value of this SingletonAssert's view is equal
+     * to the expected value.
+     *
+     * <p> Returns this SingletonAssert.
+     */
+    public SingletonAssert<T> is(T expectedValue) {
+      return this.satisfies(new AssertIs<T>(expectedValue));
+    }
+
+    /**
+     * SerializableFunction that performs an {@code Assert.assertThat()}
+     * operation using a {@code Matcher} operation that takes a single element.
+     */
+    static class AssertThatValue<T> extends AssertThat<T, T> {
+      AssertThatValue(T expected,
+                      String matcherClassName,
+                      String matcherFactoryMethodName) {
+        super(expected, Object.class,
+              matcherClassName, matcherFactoryMethodName);
+      }
+    }
+
+    /**
+     * SerializableFunction that verifies that a value is equal to an
+     * expected value.
+     */
+    public static class AssertIs<T> extends AssertThatValue<T> {
+      AssertIs(T expected) {
+        super(expected, "org.hamcrest.core.IsEqual", "equalTo");
+      }
+    }
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+
+  // Do not instantiate.
+  private DataflowAssert() {}
+
+  /**
+   * SerializableFunction that performs an {@code Assert.assertThat()}
+   * operation using a {@code Matcher} operation.
+   *
+   * <P> The MatcherFactory should take an {@code Expected} and
+   * produce a Matcher to be used to check an {@code Actual} value
+   * against.
+   */
+  public static class AssertThat<Actual, Expected>
+      implements SerializableFunction<Actual, Void> {
+    final Expected expected;
+    final Class expectedClass;
+    final String matcherClassName;
+    final String matcherFactoryMethodName;
+
+    AssertThat(Expected expected,
+               Class expectedClass,
+               String matcherClassName,
+               String matcherFactoryMethodName) {
+      this.expected = expected;
+      this.expectedClass = expectedClass;
+      this.matcherClassName = matcherClassName;
+      this.matcherFactoryMethodName = matcherFactoryMethodName;
+    }
+
+    @Override
+    public Void apply(Actual in) {
+      try {
+        Method matcherFactoryMethod = Class.forName(this.matcherClassName)
+            .getMethod(this.matcherFactoryMethodName, expectedClass);
+        Object matcher = matcherFactoryMethod.invoke(null, (Object) expected);
+        Method assertThatMethod = Class.forName("org.junit.Assert")
+            .getMethod("assertThat",
+                       Object.class,
+                       Class.forName("org.hamcrest.Matcher"));
+        assertThatMethod.invoke(null, in, matcher);
+      } catch (InvocationTargetException e) {
+        // An error in the assertThat or matcher itself.
+        throw new RuntimeException(e);
+      } catch (ReflectiveOperationException e) {
+        // An error looking up the classes and methods.
+        throw new RuntimeException(
+            "DataflowAssert requires that JUnit and Hamcrest be linked in.",
+            e);
+      }
+      return null;
+    }
+  }
+
+  /**
+   * SerializableFunction that performs an {@code Assert.assertThat()}
+   * operation using a {@code Matcher} operation that takes a single element.
+   */
+  static class AssertThatValue<T> extends AssertThat<T, T> {
+    AssertThatValue(T expected,
+                    String matcherClassName,
+                    String matcherFactoryMethodName) {
+      super(expected, Object.class,
+            matcherClassName, matcherFactoryMethodName);
+    }
+  }
+
+  /**
+   * SerializableFunction that verifies that a value is equal to an
+   * expected value.
+   */
+  public static class AssertIs<T> extends AssertThatValue<T> {
+    public AssertIs(T expected) {
+      super(expected, "org.hamcrest.core.IsEqual", "equalTo");
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/RunnableOnService.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/RunnableOnService.java
new file mode 100644
index 0000000000000..048ea36a25338
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/RunnableOnService.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+/**
+ * Category tag for tests that can be run on the DataflowPipelineRunner if the
+ * runIntegrationTestOnService System property is set to true.
+ * Example usage:
+ * <pre><code>
+ *     {@literal @}Test
+ *     {@literal @}Category(RunnableOnService.class)
+ *     public void testParDo() {...
+ * </code></pre>
+ */
+public interface RunnableOnService {}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineOptions.java
new file mode 100644
index 0000000000000..e9f8f828120fd
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineOptions.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import com.google.cloud.dataflow.sdk.options.BlockingDataflowPipelineOptions;
+
+/**
+ * A set of options used to configure the {@link TestPipeline}.
+ */
+public interface TestDataflowPipelineOptions extends BlockingDataflowPipelineOptions {
+
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
new file mode 100644
index 0000000000000..96da50189a905
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
+
+/**
+ * TestDataflowPipelineRunner is a pipeline runner that wraps a
+ * DataflowPipelineRunner when running tests against the {@link TestPipeline}.
+ *
+ * @see TestPipeline
+ */
+public class TestDataflowPipelineRunner extends BlockingDataflowPipelineRunner {
+  TestDataflowPipelineRunner(
+      DataflowPipelineRunner internalRunner,
+      MonitoringUtil.JobMessagesHandler jobMessagesHandler) {
+    super(internalRunner, jobMessagesHandler);
+  }
+
+  @Override
+  public PipelineJobState run(Pipeline pipeline) {
+    PipelineJobState state = super.run(pipeline);
+    if (state.getJobState() != MonitoringUtil.JobState.DONE) {
+      throw new AssertionError("The dataflow failed.");
+    }
+    return state;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
new file mode 100644
index 0000000000000..6044365a664d9
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
+import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
+import com.google.common.base.Optional;
+import com.google.common.collect.Iterators;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+/**
+ * A creator of test pipelines which can be used inside of tests that can be
+ * configured to run locally or against the live service.
+ *
+ * <p> It is recommended to tag hand-selected tests for this purpose using the
+ * RunnableOnService Category annotation, as each test run against the service
+ * will spin up and tear down a single VM.
+ *
+ * <p> In order to run tests on the dataflow pipeline service, the following
+ * conditions must be met:
+ * <ul>
+ * <li> runIntegrationTestOnService System property must be set to true.
+ * <li> System property "projectName" must be set to your Cloud project.
+ * <li> System property "temp_gcs_directory" must be set to a valid GCS bucket.
+ * <li> Jars containing the SDK and test classes must be added to the test classpath.
+ * </ul>
+ *
+ * <p> Use {@link DataflowAssert} for tests, as it integrates with this test
+ * harness in both direct and remote execution modes.  For example:
+ *
+ * <pre>{@code
+ * Pipeline p = TestPipeline.create();
+ * PCollection<Integer> output = ...
+ *
+ * DataflowAssert.that(output)
+ *     .containsInAnyOrder(1, 2, 3, 4);
+ * p.run();
+ * }</pre>
+ *
+ */
+public class TestPipeline extends Pipeline {
+  private static final String PROPERTY_DATAFLOW_OPTIONS = "dataflowOptions";
+  private static final Logger LOG = LoggerFactory.getLogger(TestPipeline.class);
+  private static final ObjectMapper MAPPER = new ObjectMapper();
+
+  /**
+   * Creates and returns a new test pipeline.
+   *
+   * <p> Use {@link DataflowAssert} to add tests, then call
+   * {@link Pipeline#run} to execute the pipeline and check the tests.
+   */
+  public static TestPipeline create() {
+    if (Boolean.parseBoolean(System.getProperty("runIntegrationTestOnService"))) {
+      TestDataflowPipelineOptions options = getPipelineOptions();
+      LOG.info("Using passed in options: " + options);
+      return new TestPipeline(createRunner(options), options);
+    } else {
+      DirectPipelineRunner directRunner = DirectPipelineRunner.createForTest();
+      return new TestPipeline(directRunner, directRunner.getPipelineOptions());
+    }
+  }
+
+  private TestPipeline(PipelineRunner<? extends PipelineResult> runner, PipelineOptions options) {
+    super(runner, options);
+  }
+
+  /**
+   * Creates and returns a TestDataflowPipelineRunner based on
+   * configuration via system properties.
+   */
+  private static TestDataflowPipelineRunner createRunner(
+      TestDataflowPipelineOptions options) {
+
+    DataflowPipelineRunner dataflowRunner = DataflowPipelineRunner
+        .fromOptions(options);
+    return new TestDataflowPipelineRunner(dataflowRunner,
+        new MonitoringUtil.PrintHandler(options.getJobMessageOutput()));
+  }
+
+  /**
+   * Creates PipelineOptions for testing with a DataflowPipelineRunner.
+   */
+  static TestDataflowPipelineOptions getPipelineOptions() {
+    try {
+      TestDataflowPipelineOptions options = MAPPER.readValue(
+          System.getProperty(PROPERTY_DATAFLOW_OPTIONS), PipelineOptions.class)
+          .as(TestDataflowPipelineOptions.class);
+      options.setAppName(getAppName());
+      options.setJobName(getJobName());
+      return options;
+    } catch (IOException e) {
+      throw new RuntimeException("Unable to instantiate test options from system property "
+          + PROPERTY_DATAFLOW_OPTIONS + ":" + System.getProperty(PROPERTY_DATAFLOW_OPTIONS), e);
+    }
+  }
+
+  /** Returns the class name of the test, or a default name. */
+  private static String getAppName() {
+    Optional<StackTraceElement> stackTraceElement = findCallersStackTrace();
+    if (stackTraceElement.isPresent()) {
+      String className = stackTraceElement.get().getClassName();
+      return className.contains(".") 
+          ? className.substring(className.lastIndexOf(".") + 1)
+              : className;
+    }
+    return "UnitTest";
+  }
+
+  /** Returns the method name of the test, or a default name. */
+  private static String getJobName() {
+    Optional<StackTraceElement> stackTraceElement = findCallersStackTrace();
+    if (stackTraceElement.isPresent()) {
+      return stackTraceElement.get().getMethodName();
+    }
+    return "unittestjob";
+  }
+
+  /** Returns the {@link StackTraceElement} of the calling class. */
+  private static Optional<StackTraceElement> findCallersStackTrace() {
+    Iterator<StackTraceElement> elements =
+        Iterators.forArray(Thread.currentThread().getStackTrace());
+    // First find the TestPipeline class in the stack trace.
+    while (elements.hasNext()) {
+      StackTraceElement next = elements.next();
+      if (TestPipeline.class.getName().equals(next.getClassName())) {
+        break;
+      }
+    }
+    // Then find the first instance after which is not the TestPipeline
+    while (elements.hasNext()) {
+      StackTraceElement next = elements.next();
+      if (!TestPipeline.class.getName().equals(next.getClassName())) {
+        return Optional.of(next);
+      }
+    }
+    return Optional.absent();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowingFnTestUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowingFnTestUtils.java
new file mode 100644
index 0000000000000..687cb64530efa
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowingFnTestUtils.java
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+
+import org.joda.time.Instant;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * A utility class for testing {@link WindowingFn}s.
+ */
+public class WindowingFnTestUtils {
+
+  /**
+   * Creates a Set of elements to be used as expected output in
+   * {@link #runWindowingFn}.
+   */
+  public static Set<String> set(long... timestamps) {
+    Set<String> result = new HashSet<>();
+    for (long timestamp : timestamps) {
+      result.add(timestampValue(timestamp));
+    }
+    return result;
+  }
+
+
+  /**
+   * Runs the {@link WindowingFn} over the provided input, returning a map
+   * of windows to the timestamps in those windows.
+   */
+  public static <T, W extends BoundedWindow> Map<W, Set<String>> runWindowingFn(
+      WindowingFn<T, W> windowingFn,
+      List<Long> timestamps) throws Exception {
+
+    final TestWindowSet<W, String> windowSet = new TestWindowSet<W, String>();
+    for (final Long timestamp : timestamps) {
+      for (W window : windowingFn.assignWindows(
+          new TestAssignContext<T, W>(new Instant(timestamp), windowingFn))) {
+        windowSet.put(window, timestampValue(timestamp));
+      }
+      windowingFn.mergeWindows(new TestMergeContext<T, W>(windowSet, windowingFn));
+    }
+    Map<W, Set<String>> actual = new HashMap<>();
+    for (W window : windowSet.windows()) {
+      actual.put(window, windowSet.get(window));
+    }
+    return actual;
+  }
+
+  private static String timestampValue(long timestamp) {
+    return "T" + new Instant(timestamp);
+  }
+
+  /**
+   * Test implementation of AssignContext.
+   */
+  private static class TestAssignContext<T, W extends BoundedWindow>
+      extends WindowingFn<T, W>.AssignContext {
+    private Instant timestamp;
+
+    public TestAssignContext(Instant timestamp, WindowingFn<T, W> windowingFn) {
+      windowingFn.super();
+      this.timestamp = timestamp;
+    }
+
+    @Override
+    public T element() {
+      return null;
+    }
+
+    @Override
+    public Instant timestamp() {
+      return timestamp;
+    }
+
+    @Override
+    public Collection<? extends BoundedWindow> windows() {
+      return null;
+    }
+  }
+
+  /**
+   * Test implementation of MergeContext.
+   */
+  private static class TestMergeContext<T, W extends BoundedWindow>
+    extends WindowingFn<T, W>.MergeContext {
+    private TestWindowSet<W, ?> windowSet;
+
+    public TestMergeContext(
+        TestWindowSet<W, ?> windowSet, WindowingFn<T, W> windowingFn) {
+      windowingFn.super();
+      this.windowSet = windowSet;
+    }
+
+    @Override
+    public Collection<W> windows() {
+      return windowSet.windows();
+    }
+
+    @Override
+    public void merge(Collection<W> toBeMerged, W mergeResult) {
+      windowSet.merge(toBeMerged, mergeResult);
+    }
+  }
+
+  /**
+   * A WindowSet useful for testing WindowingFns which simply
+   * collects the placed elements into multisets.
+   */
+  private static class TestWindowSet<W extends BoundedWindow, V> {
+
+    private Map<W, Set<V>> elements = new HashMap<>();
+    private List<Set<V>> emitted = new ArrayList<>();
+
+    public void put(W window, V value) {
+      Set<V> all = elements.get(window);
+      if (all == null) {
+        all = new HashSet<>();
+        elements.put(window, all);
+      }
+      all.add(value);
+    }
+
+    public void remove(W window) {
+      elements.remove(window);
+    }
+
+    public void merge(Collection<W> otherWindows, W window) {
+      if (otherWindows.isEmpty()) {
+        return;
+      }
+      Set<V> merged = new HashSet<>();
+      if (elements.containsKey(window) && !otherWindows.contains(window)) {
+        merged.addAll(elements.get(window));
+      }
+      for (W w : otherWindows) {
+        if (!elements.containsKey(w)) {
+          throw new IllegalArgumentException("Tried to merge a non-existent window:" + w);
+        }
+        merged.addAll(elements.get(w));
+        elements.remove(w);
+      }
+      elements.put(window, merged);
+    }
+
+    public void markCompleted(W window) {}
+
+    public Collection<W> windows() {
+      return elements.keySet();
+    }
+
+    public boolean contains(W window) {
+      return elements.containsKey(window);
+    }
+
+    // For testing.
+
+    public Set<V> get(W window) {
+      return elements.get(window);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/package-info.java
new file mode 100644
index 0000000000000..799c1ac98bc8c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/** 
+ * Defines utilities for unit testing Dataflow pipelines. The tests for the {@code PTransform}s and
+ * examples included the Dataflow SDK provide examples of using these utilities. 
+ */
+package com.google.cloud.dataflow.sdk.testing;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
new file mode 100644
index 0000000000000..13ad17efa7027
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
@@ -0,0 +1,64 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+/**
+ * An {@code Aggregator} enables arbitrary monitoring in user code.
+ *
+ * <p> Aggregators are created by calling {@link DoFn.Context#createAggregator},
+ * typically from {@link DoFn#startBundle}. Elements can be added to the
+ * {@code Aggregator} by calling {@link Aggregator#addValue}.
+ *
+ * <p> Aggregators are visible in the monitoring UI, when the pipeline is run
+ * using DataflowPipelineRunner or BlockingDataflowPipelineRunner, along with
+ * their current value. Aggregators may not become visible until the system
+ * begins executing the ParDo transform which created them and/or their initial
+ * value is changed.
+ *
+ * <p> Example:
+ * <pre> {@code
+ * class MyDoFn extends DoFn<String, String> {
+ *   private Aggregator<Integer> myAggregator;
+ *
+ *   {@literal @}Override
+ *   public void startBundle(Context c) {
+ *     myAggregator = c.createAggregator("myCounter", new Sum.SumIntegerFn());
+ *   }
+ *
+ *   {@literal @}Override
+ *   public void processElement(ProcessContext c) {
+ *     myAggregator.addValue(1);
+ *   }
+ * }
+ * } </pre>
+ *
+ * @param <VI> the type of input values
+ */
+public interface Aggregator<VI> {
+
+  /**
+   * Adds a new value into the Aggregator.
+   */
+  public void addValue(VI value);
+
+  // TODO: Consider the following additional API conveniences:
+  // - In addition to createAggregator(), consider adding getAggregator() to
+  //   avoid the need to store the aggregator locally in a DoFn, i.e., create
+  //   if not already present.
+  // - Add a shortcut for the most common aggregator:
+  //   c.createAggregator("name", new Sum.SumIntegerFn()).
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
new file mode 100644
index 0000000000000..ff5687fe30fb3
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
@@ -0,0 +1,723 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.CustomCoder;
+import com.google.cloud.dataflow.sdk.coders.ListCoder;
+import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterators;
+import com.google.common.collect.Lists;
+import com.google.common.collect.UnmodifiableIterator;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.PriorityQueue;
+
+/**
+ * {@code PTransform}s for getting an idea of a {@code PCollection}'s
+ * data distribution using approximate {@code N}-tiles, either
+ * globally or per-key.
+ */
+public class ApproximateQuantiles {
+
+  /**
+   * Returns a {@code PTransform} that takes a {@code PCollection<T>}
+   * and returns a {@code PCollection<List<T>>} whose sinlge value is a
+   * {@code List} of the approximate {@code N}-tiles of the elements
+   * of the input {@code PCollection}.  This gives an idea of the
+   * distribution of the input elements.
+   *
+   * <p> The computed {@code List} is of size {@code numQuantiles},
+   * and contains the input elements' minimum value,
+   * {@code numQuantiles-2} intermediate values, and maximum value, in
+   * sorted order, using the given {@code Comparator} to order values.
+   * To compute traditional {@code N}-tiles, one should use
+   * {@code ApproximateQuantiles.globally(compareFn, N+1)}.
+   *
+   * <p> If there are fewer input elements than {@code numQuantiles},
+   * then the result {@code List} will contain all the input elements,
+   * in sorted order.
+   *
+   * <p> The argument {@code Comparator} must be {@code Serializable}.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<String> pc = ...;
+   * PCollection<List<String>> quantiles =
+   *     pc.apply(ApproximateQuantiles.globally(stringCompareFn, 11));
+   * } </pre>
+   *
+   * @param <T> the type of the elements in the input {@code PCollection}
+   * @param numQuantiles the number of elements in the resulting
+   *        quantile values {@code List}
+   * @param compareFn the function to use to order the elements
+   */
+  public static <T, C extends Comparator<T> & Serializable>
+  PTransform<PCollection<T>, PCollection<List<T>>> globally(
+      int numQuantiles, C compareFn) {
+    return Combine.globally(
+        ApproximateQuantilesCombineFn.create(numQuantiles, compareFn));
+  }
+
+  /**
+   * Like {@link #globally(int, Comparator)}, but sorts using the
+   * elements' natural ordering.
+   *
+   * @param <T> the type of the elements in the input {@code PCollection}
+   * @param numQuantiles the number of elements in the resulting
+   *        quantile values {@code List}
+   */
+  public static <T extends Comparable<T>>
+      PTransform<PCollection<T>, PCollection<List<T>>> globally(int numQuantiles) {
+    return Combine.globally(
+        ApproximateQuantilesCombineFn.<T>create(numQuantiles));
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes a
+   * {@code PCollection<KV<K, V>>} and returns a
+   * {@code PCollection<KV<K, List<V>>>} that contains an output
+   * element mapping each distinct key in the input
+   * {@code PCollection} to a {@code List} of the approximate
+   * {@code N}-tiles of the values associated with that key in the
+   * input {@code PCollection}.  This gives an idea of the
+   * distribution of the input values for each key.
+   *
+   * <p> Each of the computed {@code List}s is of size {@code numQuantiles},
+   * and contains the input values' minimum value,
+   * {@code numQuantiles-2} intermediate values, and maximum value, in
+   * sorted order, using the given {@code Comparator} to order values.
+   * To compute traditional {@code N}-tiles, one should use
+   * {@code ApproximateQuantiles.perKey(compareFn, N+1)}.
+   *
+   * <p> If a key has fewer than {@code numQuantiles} values
+   * associated with it, then that key's output {@code List} will
+   * contain all the key's input values, in sorted order.
+   *
+   * <p> The argument {@code Comparator} must be {@code Serializable}.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<KV<Integer, String>> pc = ...;
+   * PCollection<KV<Integer, List<String>>> quantilesPerKey =
+   *     pc.apply(ApproximateQuantiles.<Integer, String>perKey(stringCompareFn, 11));
+   * } </pre>
+   *
+   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   *
+   * @param <K> the type of the keys in the input and output
+   *        {@code PCollection}s
+   * @param <V> the type of the values in the input {@code PCollection}
+   * @param numQuantiles the number of elements in the resulting
+   *        quantile values {@code List}
+   * @param compareFn the function to use to order the elements
+   */
+  public static <K, V, C extends Comparator<V> & Serializable>
+      PTransform<PCollection<KV<K, V>>, PCollection<KV<K, List<V>>>>
+      perKey(int numQuantiles, C compareFn) {
+    return Combine.perKey(
+        ApproximateQuantilesCombineFn.create(numQuantiles, compareFn)
+        .<K>asKeyedFn());
+  }
+
+  /**
+   * Like {@link #perKey(int, Comparator)}, but sorts
+   * values using the their natural ordering.
+   *
+   * @param <K> the type of the keys in the input and output
+   *        {@code PCollection}s
+   * @param <V> the type of the values in the input {@code PCollection}
+   * @param numQuantiles the number of elements in the resulting
+   *        quantile values {@code List}
+   */
+  public static <K, V extends Comparable<V>>
+      PTransform<PCollection<KV<K, V>>, PCollection<KV<K, List<V>>>>
+      perKey(int numQuantiles) {
+    return Combine.perKey(
+        ApproximateQuantilesCombineFn.<V>create(numQuantiles)
+        .<K>asKeyedFn());
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * The {@code ApproximateQuantilesCombineFn} combiner gives an idea
+   * of the distribution of a collection of values using approximate
+   * {@code N}-tiles.  The output of this combiner is a {@code List}
+   * of size {@code numQuantiles}, containing the input values'
+   * minimum value, {@code numQuantiles-2} intermediate values, and
+   * maximum value, in sorted order, so for traditional
+   * {@code N}-tiles, one should use
+   * {@code ApproximateQuantilesCombineFn#create(N+1)}.
+   *
+   * <p> If there are fewer values to combine than
+   * {@code numQuantiles}, then the result {@code List} will contain all the
+   * values being combined, in sorted order.
+   *
+   * <P> Values are ordered using either a specified
+   * {@code Comparator} or the values' natural ordering.
+   *
+   * <p> To evaluate the quantiles we use the "New Algorithm" described here:
+   * <pre>
+   *   [MRL98] Manku, Rajagopalan & Lindsay, "Approximate Medians and other
+   *   Quantiles in One Pass and with Limited Memory", Proc. 1998 ACM
+   *   SIGMOD, Vol 27, No 2, p 426-435, June 1998.
+   *   http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.6.6513&rep=rep1&type=pdf
+   * </pre>
+   *
+   * <P> The default error bound is {@code 1 / N}, though in practice
+   * the accuracy tends to be much better.  <p> See
+   * {@link #create(int, Comparator, long, double)} for
+   * more information about the meaning of {@code epsilon}, and
+   * {@link #withEpsilon} for a convenient way to adjust it.
+   *
+   * @param <T> the type of the values being combined
+   */
+  public static class ApproximateQuantilesCombineFn
+      <T, C extends Comparator<T> & Serializable>
+      extends AccumulatingCombineFn
+      <T, ApproximateQuantilesCombineFn<T, C>.QuantileState, List<T>> {
+
+    /**
+     * The cost (in time and space) to compute quantiles to a given
+     * accuracy is a function of the total number of elements in the
+     * data set.  If an estimate is not known or specified, we use
+     * this as an upper bound.  If this is too low, errors may exceed
+     * the requested tolerance; if too high, efficiency may be
+     * non-optimal.  The impact is logarithmic with respect to this
+     * value, so this default should be fine for most uses.
+     */
+    public static final long DEFAULT_MAX_NUM_ELEMENTS = (long) 1e9;
+
+    /** The comparison function to use. */
+    private final C compareFn;
+
+    /**
+     * Number of quantiles to produce.  The size of the final output
+     * list, including the minimum and maximum, is numQuantiles.
+     */
+    private final int numQuantiles;
+
+    /** The size of the buffers, corresponding to k in the referenced paper. */
+    private final int bufferSize;
+
+    /**  The number of buffers, corresponding to b in the referenced paper. */
+    private final int numBuffers;
+
+    private final double epsilon;
+    private final long maxNumElements;
+
+    /**
+     * Used to alternate between biasing up and down in the even weight collapse
+     * operation.
+     */
+    private int offsetJitter = 0;
+
+    /**
+     * Returns an approximate quantiles combiner with the given
+     * {@code compareFn} and desired number of quantiles.  A total of
+     * {@code numQuantiles} elements will appear in the output list,
+     * including the minimum and maximum.
+     *
+     * <p> The {@code Comparator} must be {@code Serializable}.
+     *
+     * <p> The default error bound is {@code 1 / numQuantiles} which
+     * holds as long as the number of elements is less than
+     * {@link #DEFAULT_MAX_NUM_ELEMENTS}.
+     */
+    public static <T, C extends Comparator<T> & Serializable>
+    ApproximateQuantilesCombineFn<T, C> create(
+        int numQuantiles, C compareFn) {
+      return create(numQuantiles, compareFn,
+                    DEFAULT_MAX_NUM_ELEMENTS, 1.0 / numQuantiles);
+    }
+
+    /**
+     * Like {@link #create(int, Comparator)}, but sorts
+     * values using their natural ordering.
+     */
+    public static <T extends Comparable<T>>
+    ApproximateQuantilesCombineFn<T, Top.Largest<T>> create(int numQuantiles) {
+      return create(numQuantiles, new Top.Largest<T>());
+    }
+
+    /**
+     * Returns an {@code ApproximateQuantilesCombineFn} that's like
+     * this one except that it uses the specified {@code epsilon}
+     * value.  Does not modify this combiner.
+     *
+     * <p> See {@link #create(int, Comparator, long,
+     * double)} for more information about the meaning of
+     * {@code epsilon}.
+     */
+    public ApproximateQuantilesCombineFn<T, C> withEpsilon(double epsilon) {
+      return create(numQuantiles, compareFn, maxNumElements, epsilon);
+    }
+
+    /**
+     * Returns an {@code ApproximateQuantilesCombineFn} that's like
+     * this one except that it uses the specified {@code maxNumElements}
+     * value.  Does not modify this combiner.
+     *
+     * <p> See {@link #create(int, Comparator, long, double)} for more
+     * information about the meaning of {@code maxNumElements}.
+     */
+    public ApproximateQuantilesCombineFn<T, C> withMaxInputSize(
+        long maxNumElements) {
+      return create(numQuantiles, compareFn, maxNumElements, maxNumElements);
+    }
+
+    /**
+     * Creates an approximate quantiles combiner with the given
+     * {@code compareFn} and desired number of quantiles.  A total of
+     * {@code numQuantiles} elements will appear in the output list,
+     * including the minimum and maximum.
+     *
+     * <p> The {@code Comparator} must be {@code Serializable}.
+     *
+     * <p> The default error bound is {@code epsilon} which is holds as long
+     * as the number of elements is less than {@code maxNumElements}.
+     * Specifically, if one considers the input as a sorted list x_1, ..., x_N,
+     * then the distance between the each exact quantile x_c and its
+     * approximation x_c' is bounded by {@code |c - c'| < epsilon * N}.
+     * Note that these errors are worst-case scenarios; in practice the accuracy
+     * tends to be much better.
+     */
+    public static <T, C extends Comparator<T> & Serializable>
+    ApproximateQuantilesCombineFn<T, C> create(
+        int numQuantiles,
+        C compareFn,
+        long maxNumElements,
+        double epsilon) {
+      // Compute optimal b and k.
+      int b = 2;
+      while ((b - 2) * (1 << (b - 2)) < epsilon * maxNumElements) {
+        b++;
+      }
+      b--;
+      int k = Math.max(2, (int) Math.ceil(maxNumElements / (1 << (b - 1))));
+      return new ApproximateQuantilesCombineFn<>(
+          numQuantiles, compareFn, k, b, epsilon, maxNumElements);
+    }
+
+    private ApproximateQuantilesCombineFn(int numQuantiles,
+                                          C compareFn,
+                                          int bufferSize,
+                                          int numBuffers,
+                                          double epsilon,
+                                          long maxNumElements) {
+      Preconditions.checkArgument(numQuantiles >= 2);
+      Preconditions.checkArgument(bufferSize >= 2);
+      Preconditions.checkArgument(numBuffers >= 2);
+      Preconditions.checkArgument(compareFn instanceof Serializable);
+      this.numQuantiles = numQuantiles;
+      this.compareFn = compareFn;
+      this.bufferSize = bufferSize;
+      this.numBuffers = numBuffers;
+      this.epsilon = epsilon;
+      this.maxNumElements = maxNumElements;
+    }
+
+    @Override
+    public QuantileState createAccumulator() {
+      return new QuantileState();
+    }
+
+    @Override
+    public Coder<QuantileState> getAccumulatorCoder(
+        CoderRegistry registry, Coder<T> elementCoder) {
+      return new QuantileStateCoder(elementCoder);
+    }
+
+    /**
+     * Compact summarization of a collection on which quantiles can be
+     * estimated.
+     */
+    class QuantileState
+        extends AccumulatingCombineFn
+        <T, ApproximateQuantilesCombineFn<T, C>.QuantileState, List<T>>
+        .Accumulator {
+
+      private T min;
+      private T max;
+
+      /**
+       * The set of buffers, ordered by level from smallest to largest.
+       */
+      private PriorityQueue<QuantileBuffer> buffers =
+          new PriorityQueue<>(numBuffers + 1);
+
+      /**
+       * The algorithm requires that the manipulated buffers always be filled
+       * to capacity to perform the collapse operation.  This operation can
+       * be extended to buffers of varying sizes by introducing the notion of
+       * fractional weights, but it's easier to simply combine the remainders
+       * from all shards into new, full buffers and then take them into account
+       * when computing the final output.
+       */
+      private List<T> unbufferedElements = Lists.newArrayList();
+
+      public QuantileState() { }
+
+      public QuantileState(T elem) {
+        min = elem;
+        max = elem;
+        unbufferedElements.add(elem);
+      }
+
+      public QuantileState(T min, T max, Collection<T> unbufferedElements,
+                           Collection<QuantileBuffer> buffers) {
+        this.min = min;
+        this.max = max;
+        this.unbufferedElements.addAll(unbufferedElements);
+        this.buffers.addAll(buffers);
+      }
+
+      /**
+       * Add a new element to the collection being summarized by this state.
+       */
+      @Override
+      public void addInput(T elem) {
+        if (isEmpty()) {
+          min = max = elem;
+        } else if (compareFn.compare(elem, min) < 0) {
+          min = elem;
+        } else if (compareFn.compare(elem, max) > 0) {
+          max = elem;
+        }
+        addUnbuffered(elem);
+      }
+
+      /**
+       * Add a new buffer to the unbuffered list, creating a new buffer and
+       * collapsing if needed.
+       */
+      private void addUnbuffered(T elem) {
+        unbufferedElements.add(elem);
+        if (unbufferedElements.size() == bufferSize) {
+          Collections.sort(unbufferedElements, compareFn);
+          buffers.add(new QuantileBuffer(unbufferedElements));
+          unbufferedElements = Lists.newArrayListWithCapacity(bufferSize);
+          collapseIfNeeded();
+        }
+      }
+
+      /**
+       * Updates this as if adding all elements seen by other.
+       */
+      @Override
+      public void mergeAccumulator(QuantileState other) {
+        if (other.isEmpty()) {
+          return;
+        }
+        if (min == null || compareFn.compare(other.min, min) < 0) {
+          min = other.min;
+        }
+        if (max == null || compareFn.compare(other.max, max) > 0) {
+          max = other.max;
+        }
+        for (T elem : other.unbufferedElements) {
+          addUnbuffered(elem);
+        }
+        buffers.addAll(other.buffers);
+        collapseIfNeeded();
+      }
+
+      public boolean isEmpty() {
+        return unbufferedElements.size() == 0 && buffers.size() == 0;
+      }
+
+      private void collapseIfNeeded() {
+        while (buffers.size() > numBuffers) {
+          List<QuantileBuffer> toCollapse = Lists.newArrayList();
+          toCollapse.add(buffers.poll());
+          toCollapse.add(buffers.poll());
+          int minLevel = toCollapse.get(1).level;
+          while (!buffers.isEmpty() && buffers.peek().level == minLevel) {
+            toCollapse.add(buffers.poll());
+          }
+          buffers.add(collapse(toCollapse));
+        }
+      }
+
+      private QuantileBuffer collapse(Iterable<QuantileBuffer> buffers) {
+        int newLevel = 0;
+        long newWeight = 0;
+        for (QuantileBuffer buffer : buffers) {
+          // As presented in the paper, there should always be at least two
+          // buffers of the same (minimal) level to collapse, but it is possible
+          // to violate this condition when combining buffers from independently
+          // computed shards.  If they differ we take the max.
+          newLevel = Math.max(newLevel, buffer.level + 1);
+          newWeight += buffer.weight;
+        }
+        List<T> newElements =
+            interpolate(buffers, bufferSize, newWeight, offset(newWeight));
+        return new QuantileBuffer(newLevel, newWeight, newElements);
+      }
+
+      /**
+       * Outputs numQuantiles elements consisting of the minimum, maximum, and
+       * numQuantiles - 2 evenly spaced intermediate elements.
+       *
+       * Returns the empty list if no elements have been added.
+       */
+      @Override
+      public List<T> extractOutput() {
+        if (isEmpty()) {
+          return Lists.newArrayList();
+        }
+        long totalCount = unbufferedElements.size();
+        for (QuantileBuffer buffer : buffers) {
+          totalCount += bufferSize * buffer.weight;
+        }
+        List<QuantileBuffer> all = Lists.newArrayList(buffers);
+        if (!unbufferedElements.isEmpty()) {
+          Collections.sort(unbufferedElements, compareFn);
+          all.add(new QuantileBuffer(unbufferedElements));
+        }
+        double step = 1.0 * totalCount / (numQuantiles - 1);
+        double offset = (1.0 * totalCount - 1) / (numQuantiles - 1);
+        List<T> quantiles = interpolate(all, numQuantiles - 2, step, offset);
+        quantiles.add(0, min);
+        quantiles.add(max);
+        return quantiles;
+      }
+    }
+
+    /**
+     * A single buffer in the sense of the referenced algorithm.
+     */
+    private class QuantileBuffer implements Comparable<QuantileBuffer> {
+      private int level;
+      private long weight;
+      private List<T> elements;
+
+      public QuantileBuffer(List<T> elements) {
+        this(0, 1, elements);
+      }
+
+      public QuantileBuffer(int level, long weight, List<T> elements) {
+        this.level = level;
+        this.weight = weight;
+        this.elements = elements;
+      }
+
+      @Override
+      public int compareTo(QuantileBuffer other) {
+        return this.level - other.level;
+      }
+
+      @Override
+      public String toString() {
+        return "QuantileBuffer["
+            + "level=" + level
+            + ", weight="
+            + weight + ", elements=" + elements + "]";
+      }
+
+      public Iterator<WeightedElement<T>> weightedIterator() {
+        return new UnmodifiableIterator<WeightedElement<T>>() {
+          Iterator<T> iter = elements.iterator();
+          @Override public boolean hasNext() { return iter.hasNext(); }
+          @Override public WeightedElement<T> next() {
+            return WeightedElement.of(weight, iter.next());
+          }
+        };
+      }
+    }
+
+    /**
+     * Coder for QuantileState.
+     */
+    private class QuantileStateCoder extends CustomCoder<QuantileState> {
+
+      private final Coder<T> elementCoder;
+      private final Coder<List<T>> elementListCoder;
+
+      public QuantileStateCoder(Coder<T> elementCoder) {
+        this.elementCoder = elementCoder;
+        this.elementListCoder = ListCoder.of(elementCoder);
+      }
+
+      @Override
+      public void encode(
+          QuantileState state, OutputStream outStream, Coder.Context context)
+          throws CoderException, IOException {
+        Coder.Context nestedContext = context.nested();
+        elementCoder.encode(state.min, outStream, nestedContext);
+        elementCoder.encode(state.max, outStream, nestedContext);
+        elementListCoder.encode(
+            state.unbufferedElements, outStream, nestedContext);
+        BigEndianIntegerCoder.of().encode(
+            state.buffers.size(), outStream, nestedContext);
+        for (QuantileBuffer buffer : state.buffers) {
+          encodeBuffer(buffer, outStream, nestedContext);
+        }
+      }
+
+      @Override
+      public QuantileState decode(InputStream inStream, Coder.Context context)
+          throws CoderException, IOException {
+        Coder.Context nestedContext = context.nested();
+        T min = elementCoder.decode(inStream, nestedContext);
+        T max = elementCoder.decode(inStream, nestedContext);
+        List<T> unbufferedElements =
+            elementListCoder.decode(inStream, nestedContext);
+        int numBuffers =
+            BigEndianIntegerCoder.of().decode(inStream, nestedContext);
+        List<QuantileBuffer> buffers = new ArrayList<>(numBuffers);
+        for (int i = 0; i < numBuffers; i++) {
+          buffers.add(decodeBuffer(inStream, nestedContext));
+        }
+        return new QuantileState(min, max, unbufferedElements, buffers);
+      }
+
+      private void encodeBuffer(
+          QuantileBuffer buffer, OutputStream outStream, Coder.Context context)
+          throws CoderException, IOException {
+        DataOutputStream outData = new DataOutputStream(outStream);
+        outData.writeInt(buffer.level);
+        outData.writeLong(buffer.weight);
+        elementListCoder.encode(buffer.elements, outStream, context);
+      }
+
+      private QuantileBuffer decodeBuffer(
+          InputStream inStream, Coder.Context context)
+          throws IOException, CoderException {
+        DataInputStream inData = new DataInputStream(inStream);
+        return new QuantileBuffer(
+            inData.readInt(),
+            inData.readLong(),
+            elementListCoder.decode(inStream, context));
+      }
+
+      /**
+       * Notifies ElementByteSizeObserver about the byte size of the
+       * encoded value using this coder.
+       */
+      @Override
+      public void registerByteSizeObserver(
+          QuantileState state,
+          ElementByteSizeObserver observer,
+          Coder.Context context)
+          throws Exception {
+        Coder.Context nestedContext = context.nested();
+        elementCoder.registerByteSizeObserver(
+            state.min, observer, nestedContext);
+        elementCoder.registerByteSizeObserver(
+            state.max, observer, nestedContext);
+        elementListCoder.registerByteSizeObserver(
+            state.unbufferedElements, observer, nestedContext);
+
+        BigEndianIntegerCoder.of().registerByteSizeObserver(
+            state.buffers.size(), observer, nestedContext);
+        for (QuantileBuffer buffer : state.buffers) {
+          observer.update(4L + 8);
+
+          elementListCoder.registerByteSizeObserver(
+              buffer.elements, observer, nestedContext);
+        }
+      }
+
+      @Override
+      public boolean isDeterministic() {
+        return elementListCoder.isDeterministic();
+      }
+    }
+
+    /**
+     * If the weight is even, we must round up our down.  Alternate between
+     * these two options to avoid a bias.
+     */
+    private long offset(long newWeight) {
+      if (newWeight % 2 == 1) {
+        return (newWeight + 1) / 2;
+      } else {
+        offsetJitter = 2 - offsetJitter;
+        return (newWeight + offsetJitter) / 2;
+      }
+    }
+
+    /**
+     * Emulates taking the ordered union of all elements in buffers, repeated
+     * according to their weight, and picking out the (k * step + offset)-th
+     * elements of this list for {@code 0 <= k < count}.
+     */
+    private List<T> interpolate(Iterable<QuantileBuffer> buffers,
+                                int count, double step, double offset) {
+      List<Iterator<WeightedElement<T>>> iterators = Lists.newArrayList();
+      for (QuantileBuffer buffer : buffers) {
+        iterators.add(buffer.weightedIterator());
+      }
+      // Each of the buffers is already sorted by element.
+      Iterator<WeightedElement<T>> sorted = Iterators.mergeSorted(
+          iterators,
+          new Comparator<WeightedElement<T>>() {
+            @Override
+            public int compare(WeightedElement<T> a, WeightedElement<T> b) {
+              return compareFn.compare(a.value, b.value);
+            }
+          });
+
+      List<T> newElements = Lists.newArrayListWithCapacity(count);
+      WeightedElement<T> weightedElement = sorted.next();
+      double current = weightedElement.weight;
+      for (int j = 0; j < count; j++) {
+        double target = j * step + offset;
+        while (current <= target && sorted.hasNext()) {
+          weightedElement = sorted.next();
+          current += weightedElement.weight;
+        }
+        newElements.add(weightedElement.value);
+      }
+      return newElements;
+    }
+
+    /** An element and its weight. */
+    private static class WeightedElement<T> {
+      public long weight;
+      public T value;
+      private WeightedElement(long weight, T value) {
+        this.weight = weight;
+        this.value = value;
+      }
+      public static <T> WeightedElement<T> of(long weight, T value) {
+        return new WeightedElement<>(weight, value);
+      }
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
new file mode 100644
index 0000000000000..9308a010a2a9e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
@@ -0,0 +1,426 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.hash.Hashing;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+import java.util.PriorityQueue;
+
+/**
+ * {@code PTransform}s for estimating the number of distinct elements
+ * in a {@code PCollection}, or the number of distinct values
+ * associated with each key in a {@code PCollection} of {@code KV}s.
+ */
+public class ApproximateUnique {
+
+  /**
+   * Returns a {@code PTransform} that takes a {@code PCollection<T>}
+   * and returns a {@code PCollection<Long>} containing a single value
+   * that is an estimate of the number of distinct elements in the
+   * input {@code PCollection}.
+   *
+   * <p> The {@code sampleSize} parameter controls the estimation
+   * error.  The error is about {@code 2 / sqrt(sampleSize)}, so for
+   * {@code ApproximateUnique.globally(10000)} the estimation error is
+   * about 2%.  Similarly, for {@code ApproximateUnique.of(16)} the
+   * estimation error is about 50%.  If there are fewer than
+   * {@code sampleSize} distinct elements then the returned result
+   * will be exact with extremely high probability (the chance of a
+   * hash collision is about {@code sampleSize^2 / 2^65}).
+   *
+   * <p> This transform approximates the number of elements in a set
+   * by computing the top {@code sampleSize} hash values, and using
+   * that to extrapolate the size of the entire set of hash values by
+   * assuming the rest of the hash values are as densely distributed
+   * as the top {@code sampleSize}.
+   *
+   * <p> See also {@link #globally(double)}.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<String> pc = ...;
+   * PCollection<Long> approxNumDistinct =
+   *     pc.apply(ApproximateUnique.<String>globally(1000));
+   * } </pre>
+   *
+   * @param <T> the type of the elements in the input {@code PCollection}
+   * @param sampleSize the number of entries in the statistical
+   *        sample; the higher this number, the more accurate the
+   *        estimate will be; should be {@code >= 16}
+   * @throws IllegalArgumentException if the {@code sampleSize}
+   *         argument is too small
+   */
+  public static <T> Globally<T> globally(int sampleSize) {
+    return new Globally<>(sampleSize);
+  }
+
+  /**
+   * Like {@link #globally(int)}, but specifies the desired maximum
+   * estimation error instead of the sample size.
+   *
+   * @param <T> the type of the elements in the input {@code PCollection}
+   * @param maximumEstimationError the maximum estimation error, which
+   *        should be in the range {@code [0.01, 0.5]}
+   * @throws IllegalArgumentException if the
+   *         {@code maximumEstimationError} argument is out of range
+   */
+  public static <T> Globally<T> globally(double maximumEstimationError) {
+    return new Globally<>(maximumEstimationError);
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes a
+   * {@code PCollection<KV<K, V>>} and returns a
+   * {@code PCollection<KV<K, Long>>} that contains an output element
+   * mapping each distinct key in the input {@code PCollection} to an
+   * estimate of the number of distinct values associated with that
+   * key in the input {@code PCollection}.
+   *
+   * <p> See {@link #globally(int)} for an explanation of the
+   * {@code sampleSize} parameter.  A separate sampling is computed
+   * for each distinct key of the input.
+   *
+   * <p> See also {@link #perKey(double)}.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<KV<Integer, String>> pc = ...;
+   * PCollection<KV<Integer, Long>> approxNumDistinctPerKey =
+   *     pc.apply(ApproximateUnique.<Integer, String>perKey(1000));
+   * } </pre>
+   *
+   * @param <K> the type of the keys in the input and output
+   *        {@code PCollection}s
+   * @param <V> the type of the values in the input {@code PCollection}
+   * @param sampleSize the number of entries in the statistical
+   *        sample; the higher this number, the more accurate the
+   *        estimate will be; should be {@code >= 16}
+   * @throws IllegalArgumentException if the {@code sampleSize}
+   *         argument is too small
+   */
+  public static <K, V> PerKey<K, V> perKey(int sampleSize) {
+    return new PerKey<>(sampleSize);
+  }
+
+  /**
+   * Like {@link #perKey(int)}, but specifies the desired maximum
+   * estimation error instead of the sample size.
+   *
+   * @param <K> the type of the keys in the input and output
+   *        {@code PCollection}s
+   * @param <V> the type of the values in the input {@code PCollection}
+   * @param maximumEstimationError the maximum estimation error, which
+   *        should be in the range {@code [0.01, 0.5]}
+   * @throws IllegalArgumentException if the
+   *         {@code maximumEstimationError} argument is out of range
+   */
+  public static <K, V> PerKey<K, V> perKey(double maximumEstimationError) {
+    return new PerKey<>(maximumEstimationError);
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * {@code PTransform} for estimating the number of distinct elements
+   * in a {@code PCollection}.
+   *
+   * @param <T> the type of the elements in the input {@code PCollection}
+   */
+  static class Globally<T> extends PTransform<PCollection<T>, PCollection<Long>> {
+
+    /**
+     * The number of entries in the statistical sample; the higher this number,
+     * the more accurate the estimate will be.
+     */
+    private final long sampleSize;
+
+    /**
+     * @see ApproximateUnique#globally(int)
+     */
+    public Globally(int sampleSize) {
+      if (sampleSize < 16) {
+        throw new IllegalArgumentException(
+            "ApproximateUnique needs a sampleSize "
+            + ">= 16 for an estimation error <= 50%.  "
+            + "In general, the estimation "
+            + "error is about 2 / sqrt(sampleSize).");
+      }
+      this.sampleSize = sampleSize;
+    }
+
+    /**
+     * @see ApproximateUnique#globally(double)
+     */
+    public Globally(double maximumEstimationError) {
+      if (maximumEstimationError < 0.01 || maximumEstimationError > 0.5) {
+        throw new IllegalArgumentException(
+            "ApproximateUnique needs an "
+            + "estimation error between 1% (0.01) and 50% (0.5).");
+      }
+      this.sampleSize = sampleSizeFromEstimationError(maximumEstimationError);
+    }
+
+    @Override
+    public PCollection<Long> apply(PCollection<T> input) {
+      Coder<T> coder = input.getCoder();
+      return input.apply(
+          Combine.globally(
+              new ApproximateUniqueCombineFn<>(sampleSize, coder)));
+    }
+
+    @Override
+    protected String getKindString() {
+      return "ApproximateUnique.Globally";
+    }
+  }
+
+  /**
+   * {@code PTransform} for estimating the number of distinct values
+   * associated with each key in a {@code PCollection} of {@code KV}s.
+   *
+   * @param <K> the type of the keys in the input and output
+   *        {@code PCollection}s
+   * @param <V> the type of the values in the input {@code PCollection}
+   */
+  static class PerKey<K, V>
+      extends PTransform<PCollection<KV<K, V>>, PCollection<KV<K, Long>>> {
+
+    private final long sampleSize;
+
+    /**
+     * @see ApproximateUnique#perKey(int)
+     */
+    public PerKey(int sampleSize) {
+      if (sampleSize < 16) {
+        throw new IllegalArgumentException(
+            "ApproximateUnique needs a "
+            + "sampleSize >= 16 for an estimation error <= 50%.  In general, "
+            + "the estimation error is about 2 / sqrt(sampleSize).");
+      }
+      this.sampleSize = sampleSize;
+    }
+
+    /**
+     * @see ApproximateUnique#perKey(double)
+     */
+    public PerKey(double estimationError) {
+      if (estimationError < 0.01 || estimationError > 0.5) {
+        throw new IllegalArgumentException(
+            "ApproximateUnique.PerKey needs an "
+            + "estimation error between 1% (0.01) and 50% (0.5).");
+      }
+      this.sampleSize = sampleSizeFromEstimationError(estimationError);
+    }
+
+    @Override
+    public PCollection<KV<K, Long>> apply(PCollection<KV<K, V>> input) {
+      Coder<KV<K, V>> inputCoder = input.getCoder();
+      if (!(inputCoder instanceof KvCoder)) {
+        throw new IllegalStateException(
+            "ApproximateUnique.PerKey requires its input to use KvCoder");
+      }
+      @SuppressWarnings("unchecked")
+      final Coder<V> coder = ((KvCoder<K, V>) inputCoder).getValueCoder();
+
+      return input.apply(
+          Combine.perKey(new ApproximateUniqueCombineFn<>(
+              sampleSize, coder).<K>asKeyedFn()));
+    }
+
+    @Override
+    protected String getKindString() {
+      return "ApproximateUnique.PerKey";
+    }
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * {@code CombineFn} that computes an estimate of the number of
+   * distinct values that were combined.
+   *
+   * <p> Hashes input elements, computes the top {@code sampleSize}
+   * hash values, and uses those to extrapolate the size of the entire
+   * set of hash values by assuming the rest of the hash values are as
+   * densely distributed as the top {@code sampleSize}.
+   *
+   * <p> Used to implement
+   * {@link #globally(int) ApproximatUnique.globally(...)} and
+   * {@link #perKey(int) ApproximatUnique.perKey(...)}.
+   *
+   * @param <T> the type of the values being combined
+   */
+  public static class ApproximateUniqueCombineFn<T> extends
+      CombineFn<T, ApproximateUniqueCombineFn.LargestUnique, Long> {
+
+    /**
+     * The size of the space of hashes returned by the hash function.
+     */
+    static final double HASH_SPACE_SIZE =
+        Long.MAX_VALUE - (double) Long.MIN_VALUE;
+
+    /**
+     * A heap utility class to efficiently track the largest added elements.
+     */
+    public static class LargestUnique implements Serializable {
+      private PriorityQueue<Long> heap = new PriorityQueue<>();
+      private final long sampleSize;
+
+      /**
+       * Creates a heap to track the largest {@code sampleSize} elements.
+       *
+       * @param sampleSize the size of the heap
+       */
+      public LargestUnique(long sampleSize) {
+        this.sampleSize = sampleSize;
+      }
+
+      /**
+       * Adds a value to the heap, returning whether the value is (large enough
+       * to be) in the heap.
+       */
+      public boolean add(Long value) {
+        if (heap.contains(value)) {
+          return true;
+        } else if (heap.size() < sampleSize) {
+          heap.add(value);
+          return true;
+        } else if (value > heap.element()) {
+          heap.remove();
+          heap.add(value);
+          return true;
+        } else {
+          return false;
+        }
+      }
+
+      /**
+       * Returns the values in the heap, ordered largest to smallest.
+       */
+      public List<Long> extractOrderedList() {
+        // The only way to extract the order from the heap is element-by-element
+        // from smallest to largest.
+        Long[] array = new Long[heap.size()];
+        for (int i = heap.size() - 1; i >= 0; i--) {
+          array[i] = heap.remove();
+        }
+        return Arrays.asList(array);
+      }
+    }
+
+    private final long sampleSize;
+    private final Coder<T> coder;
+
+    public ApproximateUniqueCombineFn(long sampleSize, Coder<T> coder) {
+      this.sampleSize = sampleSize;
+      this.coder = coder;
+    }
+
+    @Override
+    public LargestUnique createAccumulator() {
+      return new LargestUnique(sampleSize);
+    }
+
+    @Override
+    public void addInput(LargestUnique heap, T input) {
+      try {
+        heap.add(hash(input, coder));
+      } catch (Throwable e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    @Override
+    public LargestUnique mergeAccumulators(Iterable<LargestUnique> heaps) {
+      Iterator<LargestUnique> iterator = heaps.iterator();
+      LargestUnique heap = iterator.next();
+      while (iterator.hasNext()) {
+        List<Long> largestHashes = iterator.next().extractOrderedList();
+        for (long hash : largestHashes) {
+          if (!heap.add(hash)) {
+            break; // The remainder of this list is all smaller.
+          }
+        }
+      }
+      return heap;
+    }
+
+    @Override
+    public Long extractOutput(LargestUnique heap) {
+      List<Long> largestHashes = heap.extractOrderedList();
+      if (largestHashes.size() < sampleSize) {
+        return (long) largestHashes.size();
+      } else {
+        long smallestSampleHash = largestHashes.get(largestHashes.size() - 1);
+        double sampleSpaceSize = Long.MAX_VALUE - (double) smallestSampleHash;
+        // This formula takes into account the possibility of hash collisions,
+        // which become more likely than not for 2^32 distinct elements.
+        // Note that log(1+x) ~ x for small x, so for sampleSize << maxHash
+        // log(1 - sampleSize/sampleSpace) / log(1 - 1/sampleSpace) ~ sampleSize
+        // and hence estimate ~ sampleSize * HASH_SPACE_SIZE / sampleSpace
+        // as one would expect.
+        double estimate = Math.log1p(-sampleSize / sampleSpaceSize)
+            / Math.log1p(-1 / sampleSpaceSize)
+            * HASH_SPACE_SIZE / sampleSpaceSize;
+        return Math.round(estimate);
+      }
+    }
+
+    @Override
+    public Coder<LargestUnique> getAccumulatorCoder(CoderRegistry registry,
+        Coder<T> inputCoder) {
+      return SerializableCoder.of(LargestUnique.class);
+    }
+
+    /**
+     * Encodes the given element using the given coder and hashes the encoding.
+     */
+    static <T> long hash(T element, Coder<T> coder)
+        throws CoderException, IOException {
+      ByteArrayOutputStream baos = new ByteArrayOutputStream();
+      coder.encode(element, baos, Context.OUTER);
+      return Hashing.murmur3_128().hashBytes(baos.toByteArray()).asLong();
+    }
+  }
+
+  /**
+   * Computes the sampleSize based on the desired estimation error.
+   *
+   * @param estimationError should be bounded by [0.01, 0.5]
+   * @return the sample size needed for the desired estimation error
+   */
+  static long sampleSizeFromEstimationError(double estimationError) {
+    return Math.round(Math.ceil(4.0 / Math.pow(estimationError, 2.0)));
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
new file mode 100644
index 0000000000000..9b374665451ed
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -0,0 +1,1045 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Iterables;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * {@code PTransform}s for combining {@code PCollection} elements
+ * globally and per-key.
+ */
+public class Combine {
+
+  /**
+   * Returns a {@link Globally Combine.Globally} {@code PTransform}
+   * that uses the given {@code SerializableFunction} to combine all
+   * the elements of the input {@code PCollection} into a singleton
+   * {@code PCollection} value.  The types of the input elements and the
+   * output value must be the same.
+   *
+   * <p>If the input {@code PCollection} is empty, the ouput will contain a the
+   * default value of the combining function if the input is windowed into
+   * the {@link GlobalWindow}; otherwise, the output will be empty.  Note: this
+   * behavior is subject to change.
+   *
+   * <p> See {@link Globally Combine.Globally} for more information.
+   */
+  public static <V> Globally<V, V> globally(
+      SerializableFunction<Iterable<V>, V> combiner) {
+    return globally(SimpleCombineFn.of(combiner));
+  }
+
+  /**
+   * Returns a {@link Globally Combine.Globally} {@code PTransform}
+   * that uses the given {@code CombineFn} to combine all the elements
+   * of the input {@code PCollection} into a singleton {@code PCollection}
+   * value.  The types of the input elements and the output value can
+   * differ.
+   *
+   * If the input {@code PCollection} is empty, the ouput will contain a the
+   * default value of the combining function if the input is windowed into
+   * the {@link GlobalWindow}; otherwise, the output will be empty.  Note: this
+   * behavior is subject to change.
+   *
+   * <p> See {@link Globally Combine.Globally} for more information.
+   */
+  public static <VI, VO> Globally<VI, VO> globally(
+      CombineFn<? super VI, ?, VO> fn) {
+    return new Globally<>(fn);
+  }
+
+  /**
+   * Returns a {@link PerKey Combine.PerKey} {@code PTransform} that
+   * first groups its input {@code PCollection} of {@code KV}s by keys and
+   * windows, then invokes the given function on each of the values lists to
+   * produce a combined value, and then returns a {@code PCollection}
+   * of {@code KV}s mapping each distinct key to its combined value for each
+   * window.
+   *
+   * <p> Each output element is in the window by which its corresponding input
+   * was grouped, and has the timestamp of the end of that window.  The output
+   * {@code PCollection} has the same
+   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+   * as the input.
+   *
+   * <p> See {@link PerKey Combine.PerKey} for more information.
+   */
+  public static <K, V> PerKey<K, V, V> perKey(
+      SerializableFunction<Iterable<V>, V> fn) {
+    return perKey(Combine.SimpleCombineFn.of(fn));
+  }
+
+  /**
+   * Returns a {@link PerKey Combine.PerKey} {@code PTransform} that
+   * first groups its input {@code PCollection} of {@code KV}s by keys and
+   * windows, then invokes the given function on each of the values lists to
+   * produce a combined value, and then returns a {@code PCollection}
+   * of {@code KV}s mapping each distinct key to its combined value for each
+   * window.
+   *
+   * <p> Each output element is in the window by which its corresponding input
+   * was grouped, and has the timestamp of the end of that window.  The output
+   * {@code PCollection} has the same
+   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+   * as the input.
+   *
+   * <p> See {@link PerKey Combine.PerKey} for more information.
+   */
+  public static <K, VI, VO> PerKey<K, VI, VO> perKey(
+      CombineFn<? super VI, ?, VO> fn) {
+    return perKey(fn.<K>asKeyedFn());
+  }
+
+  /**
+   * Returns a {@link PerKey Combine.PerKey} {@code PTransform} that
+   * first groups its input {@code PCollection} of {@code KV}s by keys and
+   * windows, then invokes the given function on each of the key/values-lists
+   * pairs to produce a combined value, and then returns a
+   * {@code PCollection} of {@code KV}s mapping each distinct key to
+   * its combined value for each window.
+   *
+   * <p> Each output element is in the window by which its corresponding input
+   * was grouped, and has the timestamp of the end of that window.  The output
+   * {@code PCollection} has the same
+   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+   * as the input.
+   *
+   * <p> See {@link PerKey Combine.PerKey} for more information.
+   */
+  public static <K, VI, VO> PerKey<K, VI, VO> perKey(
+      KeyedCombineFn<? super K, ? super VI, ?, VO> fn) {
+    return new PerKey<>(fn);
+  }
+
+  /**
+   * Returns a {@link GroupedValues Combine.GroupedValues}
+   * {@code PTransform} that takes a {@code PCollection} of
+   * {@code KV}s where a key maps to an {@code Iterable} of values, e.g.,
+   * the result of a {@code GroupByKey}, then uses the given
+   * {@code SerializableFunction} to combine all the values associated
+   * with a key, ignoring the key.  The type of the input and
+   * output values must be the same.
+   *
+   * <p> Each output element has the same timestamp and is in the same window
+   * as its corresponding input element, and the output
+   * {@code PCollection} has the same
+   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+   * associated with it as the input.
+   *
+   * <p> See {@link GroupedValues Combine.GroupedValues} for more information.
+   *
+   * <p> Note that {@link #perKey(SerializableFunction)} is typically
+   * more convenient to use than {@link GroupByKey} followed by
+   * {@code groupedValues(...)}.
+   */
+  public static <K, V> GroupedValues<K, V, V> groupedValues(
+      SerializableFunction<Iterable<V>, V> fn) {
+    return groupedValues(SimpleCombineFn.of(fn));
+  }
+
+  /**
+   * Returns a {@link GroupedValues Combine.GroupedValues}
+   * {@code PTransform} that takes a {@code PCollection} of
+   * {@code KV}s where a key maps to an {@code Iterable} of values, e.g.,
+   * the result of a {@code GroupByKey}, then uses the given
+   * {@code CombineFn} to combine all the values associated with a
+   * key, ignoring the key.  The types of the input and output values
+   * can differ.
+   *
+   * <p> Each output element has the same timestamp and is in the same window
+   * as its corresponding input element, and the output
+   * {@code PCollection} has the same
+   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+   * associated with it as the input.
+   *
+   * <p> See {@link GroupedValues Combine.GroupedValues} for more information.
+   *
+   * <p> Note that {@link #perKey(CombineFn)} is typically
+   * more convenient to use than {@link GroupByKey} followed by
+   * {@code groupedValues(...)}.
+   */
+  public static <K, VI, VO> GroupedValues<K, VI, VO> groupedValues(
+      CombineFn<? super VI, ?, VO> fn) {
+    return groupedValues(fn.<K>asKeyedFn());
+  }
+
+  /**
+   * Returns a {@link GroupedValues Combine.GroupedValues}
+   * {@code PTransform} that takes a {@code PCollection} of
+   * {@code KV}s where a key maps to an {@code Iterable} of values, e.g.,
+   * the result of a {@code GroupByKey}, then uses the given
+   * {@code KeyedCombineFn} to combine all the values associated with
+   * each key.  The combining function is provided the key.  The types
+   * of the input and output values can differ.
+   *
+   * <p> Each output element has the same timestamp and is in the same window
+   * as its corresponding input element, and the output
+   * {@code PCollection} has the same
+   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+   * associated with it as the input.
+   *
+   * <p> See {@link GroupedValues Combine.GroupedValues} for more information.
+   *
+   * <p> Note that {@link #perKey(KeyedCombineFn)} is typically
+   * more convenient to use than {@link GroupByKey} followed by
+   * {@code groupedValues(...)}.
+   */
+  public static <K, VI, VO> GroupedValues<K, VI, VO> groupedValues(
+      KeyedCombineFn<? super K, ? super VI, ?, VO> fn) {
+    return new GroupedValues<>(fn);
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * A {@code CombineFn<VI, VA, VO>} specifies how to combine a
+   * collection of input values of type {@code VI} into a single
+   * output value of type {@code VO}.  It does this via one or more
+   * intermediate mutable accumulator values of type {@code VA}.
+   *
+   * <p> The overall process to combine a collection of input
+   * {@code VI} values into a single output {@code VO} value is as
+   * follows:
+   *
+   * <ol>
+   *
+   * <li> The input {@code VI} values are partitioned into one or more
+   * batches.
+   *
+   * <li> For each batch, the {@link #createAccumulator} operation is
+   * invoked to create a fresh mutable accumulator value of type
+   * {@code VA}, initialized to represent the combination of zero
+   * values.
+   *
+   * <li> For each input {@code VI} value in a batch, the
+   * {@link #addInput} operation is invoked to add the value to that
+   * batch's accumulator {@code VA} value.  The accumulator may just
+   * record the new value (e.g., if {@code VA == List<VI>}, or may do
+   * work to represent the combination more compactly.
+   *
+   * <li> The {@link #mergeAccumulators} operation is invoked to
+   * combine a collection of accumulator {@code VA} values into a
+   * single combined output accumulator {@code VA} value, once the
+   * merging accumulators have had all all the input values in their
+   * batches added to them.  This operation is invoked repeatedly,
+   * until there is only one accumulator value left.
+   *
+   * <li> The {@link #extractOutput} operation is invoked on the final
+   * accumulator {@code VA} value to get the output {@code VO} value.
+   *
+   * </ol>
+   *
+   * <p> For example:
+   * <pre> {@code
+   * public class AverageFn extends CombineFn<Integer, AverageFn.Accum, Double> {
+   *   public static class Accum {
+   *     int sum = 0;
+   *     int count = 0;
+   *   }
+   *   public Accum createAccumulator() { return new Accum(); }
+   *   public void addInput(Accum accum, Integer input) {
+   *       accum.sum += input;
+   *       accum.count++;
+   *   }
+   *   public Accum mergeAccumulators(Iterable<Accum> accums) {
+   *     Accum merged = createAccumulator();
+   *     for (Accum accum : accums) {
+   *       merged.sum += accum.sum;
+   *       merged.count += accum.count;
+   *     }
+   *     return merged;
+   *   }
+   *   public Double extractOutput(Accum accum) {
+   *     return ((double) accum.sum) / accum.count;
+   *   }
+   * }
+   * PCollection<Integer> pc = ...;
+   * PCollection<Double> average = pc.apply(Combine.globally(new AverageFn()));
+   * } </pre>
+   *
+   * <p> Combining functions used by {@link Combine.Globally},
+   * {@link Combine.PerKey}, {@link Combine.GroupedValues}, and
+   * {@code PTransforms} derived from them should be
+   * <i>associative</i> and <i>commutative</i>.  Associativity is
+   * required because input values are first broken up into subgroups
+   * before being combined, and their intermediate results further
+   * combined, in an arbitrary tree structure.  Commutativity is
+   * required because any order of the input values is ignored when
+   * breaking up input values into groups.
+   *
+   * @param <VI> type of input values
+   * @param <VA> type of mutable accumulator values
+   * @param <VO> type of output values
+   */
+  public abstract static class CombineFn<VI, VA, VO> implements Serializable {
+    /**
+     * Returns a new, mutable accumulator value, representing the
+     * accumulation of zero input values.
+     */
+    public abstract VA createAccumulator();
+
+    /**
+     * Adds the given input value to the given accumulator,
+     * modifying the accumulator.
+     */
+    public abstract void addInput(VA accumulator, VI input);
+
+    /**
+     * Returns an accumulator representing the accumulation of all the
+     * input values accumulated in the merging accumulators.
+     *
+     * <p> May modify any of the argument accumulators.  May return a
+     * fresh accumulator, or may return one of the (modified) argument
+     * accumulators.
+     */
+    public abstract VA mergeAccumulators(Iterable<VA> accumulators);
+
+    /**
+     * Returns the output value that is the result of combining all
+     * the input values represented by the given accumulator.
+     */
+    public abstract VO extractOutput(VA accumulator);
+
+    /**
+     * Applies this {@code CombineFn} to a collection of input values
+     * to produce a combined output value.
+     *
+     * <p> Useful when testing the behavior of a {@code CombineFn}
+     * separately from a {@code Combine} transform.
+     */
+    public VO apply(Iterable<? extends VI> inputs) {
+      VA accum = createAccumulator();
+      for (VI input : inputs) {
+        addInput(accum, input);
+      }
+      return extractOutput(accum);
+    }
+
+    /**
+     * Returns the {@code Coder} to use for accumulator {@code VA}
+     * values, or null if it is not able to be inferred.
+     *
+     * <p> By default, uses the knowledge of the {@code Coder} being used
+     * for {@code VI} values and the enclosing {@code Pipeline}'s
+     * {@code CoderRegistry} to try to infer the Coder for {@code VA}
+     * values.
+     */
+    public Coder<VA> getAccumulatorCoder(
+        CoderRegistry registry, Coder<VI> inputCoder) {
+      return registry.getDefaultCoder(
+          getClass(),
+          CombineFn.class,
+          ImmutableMap.of("VI", inputCoder),
+          "VA");
+    }
+
+    /**
+     * Returns the {@code Coder} to use by default for output
+     * {@code VO} values, or null if it is not able to be inferred.
+     *
+     * <p> By default, uses the knowledge of the {@code Coder} being
+     * used for input {@code VI} values and the enclosing
+     * {@code Pipeline}'s {@code CoderRegistry} to try to infer the
+     * Coder for {@code VO} values.
+     */
+    public Coder<VO> getDefaultOutputCoder(
+        CoderRegistry registry, Coder<VI> inputCoder) {
+      return registry.getDefaultCoder(
+          getClass(),
+          CombineFn.class,
+          ImmutableMap.of("VI", inputCoder,
+                          "VA", getAccumulatorCoder(registry, inputCoder)),
+          "VO");
+    }
+
+    /**
+     * Converts this {@code CombineFn} into an equivalent
+     * {@link KeyedCombineFn}, which ignores the keys passed to it and
+     * combines the values according to this {@code CombineFn}.
+     *
+     * @param <K> the type of the (ignored) keys
+     */
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    public <K> KeyedCombineFn<K, VI, VA, VO> asKeyedFn() {
+      // The key, an object, is never even looked at.
+      return new KeyedCombineFn<K, VI, VA, VO>() {
+        @Override
+        public VA createAccumulator(K key) {
+          return CombineFn.this.createAccumulator();
+        }
+
+        @Override
+        public void addInput(K key, VA accumulator, VI input) {
+          CombineFn.this.addInput(accumulator, input);
+        }
+
+        @Override
+        public VA mergeAccumulators(K key, Iterable<VA> accumulators) {
+          return CombineFn.this.mergeAccumulators(accumulators);
+        }
+
+        @Override
+        public VO extractOutput(K key, VA accumulator) {
+          return CombineFn.this.extractOutput(accumulator);
+        }
+
+        @Override
+        public Coder<VA> getAccumulatorCoder(
+            CoderRegistry registry, Coder<K> keyCoder, Coder<VI> inputCoder) {
+          return CombineFn.this.getAccumulatorCoder(registry, inputCoder);
+        }
+
+        @Override
+        public Coder<VO> getDefaultOutputCoder(
+            CoderRegistry registry, Coder<K> keyCoder, Coder<VI> inputCoder) {
+          return CombineFn.this.getDefaultOutputCoder(registry, inputCoder);
+        }
+      };
+    }
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * A {@code CombineFn} that uses a subclass of
+   * {@link AccumulatingCombineFn.Accumulator} as its accumulator
+   * type.  By defining the operations of the {@code Accumulator}
+   * helper class, the operations of the enclosing {@code CombineFn}
+   * are automatically provided.  This can reduce the code required to
+   * implement a {@code CombineFn}.
+   *
+   * <p> For example, the example from {@link CombineFn} above can be
+   * expressed using {@code AccumulatingCombineFn} more concisely as
+   * follows:
+   *
+   * <pre> {@code
+   * public class AverageFn
+   *     extends AccumulatingCombineFn<Integer, AverageFn.Accum, Double> {
+   *   public Accum createAccumulator() { return new Accum(); }
+   *   public class Accum
+   *       extends AccumulatingCombineFn<Integer, AverageFn.Accum, Double>
+   *               .Accumulator {
+   *     private int sum = 0;
+   *     private int count = 0;
+   *     public void addInput(Integer input) {
+   *       sum += input;
+   *       count++;
+   *     }
+   *     public void mergeAccumulator(Accum other) {
+   *       sum += other.sum;
+   *       count += other.count;
+   *     }
+   *     public Double extractOutput() {
+   *       return ((double) sum) / count;
+   *     }
+   *   }
+   * }
+   * PCollection<Integer> pc = ...;
+   * PCollection<Double> average = pc.apply(Combine.globally(new AverageFn()));
+   * } </pre>
+   *
+   * @param <VI> type of input values
+   * @param <VA> type of mutable accumulator values
+   * @param <VO> type of output values
+   */
+  public abstract static class AccumulatingCombineFn
+      <VI, VA extends AccumulatingCombineFn<VI, VA, VO>.Accumulator, VO>
+      extends CombineFn<VI, VA, VO> {
+
+    /**
+     * The type of mutable accumulator values used by this
+     * {@code AccumulatingCombineFn}.
+     */
+    public abstract class Accumulator implements Serializable {
+      /**
+       * Adds the given input value to this accumulator, modifying
+       * this accumulator.
+       */
+      public abstract void addInput(VI input);
+
+      /**
+       * Adds the input values represented by the given accumulator
+       * into this accumulator.
+       */
+      public abstract void mergeAccumulator(VA other);
+
+      /**
+       * Returns the output value that is the result of combining all
+       * the input values represented by this accumulator.
+       */
+      public abstract VO extractOutput();
+    }
+
+    @Override
+    public final void addInput(VA accumulator, VI input) {
+      accumulator.addInput(input);
+    }
+
+    @Override
+    public final VA mergeAccumulators(Iterable<VA> accumulators) {
+      VA accumulator = createAccumulator();
+      for (VA partial : accumulators) {
+        accumulator.mergeAccumulator(partial);
+      }
+      return accumulator;
+    }
+
+    @Override
+    public final VO extractOutput(VA accumulator) {
+      return accumulator.extractOutput();
+    }
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+
+  /**
+   * A {@code KeyedCombineFn<K, VI, VA, VO>} specifies how to combine
+   * a collection of input values of type {@code VI}, associated with
+   * a key of type {@code K}, into a single output value of type
+   * {@code VO}.  It does this via one or more intermediate mutable
+   * accumulator values of type {@code VA}.
+   *
+   * <p> The overall process to combine a collection of input
+   * {@code VI} values associated with an input {@code K} key into a
+   * single output {@code VO} value is as follows:
+   *
+   * <ol>
+   *
+   * <li> The input {@code VI} values are partitioned into one or more
+   * batches.
+   *
+   * <li> For each batch, the {@link #createAccumulator} operation is
+   * invoked to create a fresh mutable accumulator value of type
+   * {@code VA}, initialized to represent the combination of zero
+   * values.
+   *
+   * <li> For each input {@code VI} value in a batch, the
+   * {@link #addInput} operation is invoked to add the value to that
+   * batch's accumulator {@code VA} value.  The accumulator may just
+   * record the new value (e.g., if {@code VA == List<VI>}, or may do
+   * work to represent the combination more compactly.
+   *
+   * <li> The {@link #mergeAccumulators} operation is invoked to
+   * combine a collection of accumulator {@code VA} values into a
+   * single combined output accumulator {@code VA} value, once the
+   * merging accumulators have had all all the input values in their
+   * batches added to them.  This operation is invoked repeatedly,
+   * until there is only one accumulator value left.
+   *
+   * <li> The {@link #extractOutput} operation is invoked on the final
+   * accumulator {@code VA} value to get the output {@code VO} value.
+   *
+   * </ol>
+   *
+   * All of these operations are passed the {@code K} key that the
+   * values being combined are associated with.
+   *
+   * <p> For example:
+   * <pre> {@code
+   * public class ConcatFn
+   *     extends KeyedCombineFn<String, Integer, ConcatFn.Accum, String> {
+   *   public static class Accum {
+   *     String s = "";
+   *   }
+   *   public Accum createAccumulator(String key) { return new Accum(); }
+   *   public void addInput(String key, Accum accum, Integer input) {
+   *       accum.s += "+" + input;
+   *   }
+   *   public Accum mergeAccumulators(String key, Iterable<Accum> accums) {
+   *     Accum merged = new Accum();
+   *     for (Accum accum : accums) {
+   *       merged.s += accum.s;
+   *     }
+   *     return merged;
+   *   }
+   *   public String extractOutput(String key, Accum accum) {
+   *     return key + accum.s;
+   *   }
+   * }
+   * PCollection<KV<String, Integer>> pc = ...;
+   * PCollection<KV<String, String>> pc2 = pc.apply(
+   *     Combine.perKey(new ConcatFn()));
+   * } </pre>
+   *
+   * <p> Keyed combining functions used by {@link Combine.PerKey},
+   * {@link Combine.GroupedValues}, and {@code PTransforms} derived
+   * from them should be <i>associative</i> and <i>commutative</i>.
+   * Associativity is required because input values are first broken
+   * up into subgroups before being combined, and their intermediate
+   * results further combined, in an arbitrary tree structure.
+   * Commutativity is required because any order of the input values
+   * is ignored when breaking up input values into groups.
+   *
+   * @param <K> type of keys
+   * @param <VI> type of input values
+   * @param <VA> type of mutable accumulator values
+   * @param <VO> type of output values
+   */
+  public abstract static class KeyedCombineFn<K, VI, VA, VO>
+      implements Serializable {
+    /**
+     * Returns a new, mutable accumulator value representing the
+     * accumulation of zero input values.
+     *
+     * @param key the key that all the accumulated values using the
+     * accumulator are associated with
+     */
+    public abstract VA createAccumulator(K key);
+
+    /**
+     * Adds the given input value to the given accumulator,
+     * modifying the accumulator.
+     *
+     * @param key the key that all the accumulated values using the
+     * accumulator are associated with
+     */
+    public abstract void addInput(K key, VA accumulator, VI value);
+
+    /**
+     * Returns an accumulator representing the accumulation of all the
+     * input values accumulated in the merging accumulators.
+     *
+     * <p> May modify any of the argument accumulators.  May return a
+     * fresh accumulator, or may return one of the (modified) argument
+     * accumulators.
+     *
+     * @param key the key that all the accumulators are associated
+     * with
+     */
+    public abstract VA mergeAccumulators(K key, Iterable<VA> accumulators);
+
+    /**
+     * Returns the output value that is the result of combining all
+     * the input values represented by the given accumulator.
+     *
+     * @param key the key that all the accumulated values using the
+     * accumulator are associated with
+     */
+    public abstract VO extractOutput(K key, VA accumulator);
+
+    /**
+     * Applies this {@code KeyedCombineFn} to a key and a collection
+     * of input values to produce a combined output value.
+     *
+     * <p> Useful when testing the behavior of a {@code KeyedCombineFn}
+     * separately from a {@code Combine} transform.
+     */
+    public VO apply(K key, Iterable<? extends VI> inputs) {
+      VA accum = createAccumulator(key);
+      for (VI input : inputs) {
+        addInput(key, accum, input);
+      }
+      return extractOutput(key, accum);
+    }
+
+    /**
+     * Returns the {@code Coder} to use for accumulator {@code VA}
+     * values, or null if it is not able to be inferred.
+     *
+     * <p> By default, uses the knowledge of the {@code Coder} being
+     * used for {@code K} keys and input {@code VI} values and the
+     * enclosing {@code Pipeline}'s {@code CoderRegistry} to try to
+     * infer the Coder for {@code VA} values.
+     */
+    public Coder<VA> getAccumulatorCoder(
+        CoderRegistry registry, Coder<K> keyCoder, Coder<VI> inputCoder) {
+      return registry.getDefaultCoder(
+          getClass(),
+          KeyedCombineFn.class,
+          ImmutableMap.of("K", keyCoder, "VI", inputCoder),
+          "VA");
+    }
+
+    /**
+     * Returns the {@code Coder} to use by default for output
+     * {@code VO} values, or null if it is not able to be inferred.
+     *
+     * <p> By default, uses the knowledge of the {@code Coder} being
+     * used for {@code K} keys and input {@code VI} values and the
+     * enclosing {@code Pipeline}'s {@code CoderRegistry} to try to
+     * infer the Coder for {@code VO} values.
+     */
+    public Coder<VO> getDefaultOutputCoder(
+        CoderRegistry registry, Coder<K> keyCoder, Coder<VI> inputCoder) {
+      return registry.getDefaultCoder(
+          getClass(),
+          KeyedCombineFn.class,
+          ImmutableMap.of(
+              "K", keyCoder,
+              "VI", inputCoder,
+              "VA", getAccumulatorCoder(registry, keyCoder, inputCoder)),
+          "VO");
+    }
+  }
+
+
+  ////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * {@code Combine.Globally<VI, VO>} takes a {@code PCollection<VI>}
+   * and returns a {@code PCollection<VO>} whose single element is the result of
+   * combining all the elements of the input {@code PCollection},
+   * using a specified
+   * {@link CombineFn CombineFn<VI, VA, VO>}.  It is common
+   * for {@code VI == VO}, but not required.  Common combining
+   * functions include sums, mins, maxes, and averages of numbers,
+   * conjunctions and disjunctions of booleans, statistical
+   * aggregations, etc.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<Integer> pc = ...;
+   * PCollection<Integer> sum = pc.apply(
+   *     Combine.globally(new Sum.SumIntegerFn()));
+   * } </pre>
+   *
+   * <p> Combining can happen in parallel, with different subsets of the
+   * input {@code PCollection} being combined separately, and their
+   * intermediate results combined further, in an arbitrary tree
+   * reduction pattern, until a single result value is produced.
+   *
+   * <p> By default, the {@code Coder} of the output {@code PValue<VO>}
+   * is inferred from the concrete type of the
+   * {@code CombineFn<VI, VA, VO>}'s output type {@code VO}.
+   *
+   * <p> See also {@link #perKey}/{@link PerKey Combine.PerKey} and
+   * {@link #groupedValues}/{@link GroupedValues Combine.GroupedValues},
+   * which are useful for combining values associated with each key in
+   * a {@code PCollection} of {@code KV}s.
+   *
+   * @param <VI> type of input values
+   * @param <VO> type of output values
+   */
+  public static class Globally<VI, VO>
+      extends PTransform<PCollection<VI>, PCollection<VO>> {
+
+    private final CombineFn<? super VI, ?, VO> fn;
+
+    private Globally(CombineFn<? super VI, ?, VO> fn) {
+      this.fn = fn;
+    }
+
+    @Override
+    public PCollection<VO> apply(PCollection<VI> input) {
+      PCollection<VO> output = input
+          .apply(WithKeys.<Void, VI>of((Void) null))
+          .setCoder(KvCoder.of(VoidCoder.of(), input.getCoder()))
+          .apply(Combine.<Void, VI, VO>perKey(fn.<Void>asKeyedFn()))
+          .apply(Values.<VO>create());
+
+      if (input.getWindowingFn().isCompatible(new GlobalWindow())) {
+        return insertDefaultValueIfEmpty(output);
+      } else {
+        return output;
+      }
+    }
+
+    private PCollection<VO> insertDefaultValueIfEmpty(PCollection<VO> maybeEmpty) {
+      final PCollectionView<Iterable<VO>, ?> maybeEmptyView = maybeEmpty.apply(
+          View.<VO>asIterable());
+      return maybeEmpty.getPipeline()
+          .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
+          .apply(ParDo.of(
+              new DoFn<Void, VO>() {
+                  @Override
+                  public void processElement(DoFn<Void, VO>.ProcessContext c) {
+                    Iterator<VO> combined = c.sideInput(maybeEmptyView).iterator();
+                    if (combined.hasNext()) {
+                      c.output(combined.next());
+                    } else {
+                      c.output(fn.apply(Collections.<VI>emptyList()));
+                    }
+                  }
+              }).withSideInputs(maybeEmptyView))
+          .setCoder(maybeEmpty.getCoder());
+    }
+
+    @Override
+    protected String getKindString() {
+      return "Combine.Globally";
+    }
+  }
+
+  /**
+   * Converts a {@link SerializableFunction} from {@code Iterable<V>}s
+   * to {@code V}s into a simple {@link CombineFn} over {@code V}s.
+   *
+   * <p> Used in the implementation of convenience methods like
+   * {@link #globally(SerializableFunction)},
+   * {@link #perKey(SerializableFunction)}, and
+   * {@link #groupedValues(SerializableFunction)}.
+   */
+  static class SimpleCombineFn<V> extends CombineFn<V, List<V>, V> {
+    /**
+     * Returns a {@code CombineFn} that uses the given
+     * {@code SerializableFunction} to combine values.
+     */
+    public static <V> SimpleCombineFn<V> of(
+        SerializableFunction<Iterable<V>, V> combiner) {
+      return new SimpleCombineFn<>(combiner);
+    }
+
+    /**
+     * The number of values to accumulate before invoking the combiner
+     * function to combine them.
+     */
+    private static final int BUFFER_SIZE = 20;
+
+    /** The combiner function. */
+    private final SerializableFunction<Iterable<V>, V> combiner;
+
+    private SimpleCombineFn(SerializableFunction<Iterable<V>, V> combiner) {
+      this.combiner = combiner;
+    }
+
+    @Override
+    public List<V> createAccumulator() {
+      return new ArrayList<>();
+    }
+
+    @Override
+    public void addInput(List<V> accumulator, V input) {
+      accumulator.add(input);
+      if (accumulator.size() > BUFFER_SIZE) {
+        V combined = combiner.apply(accumulator);
+        accumulator.clear();
+        accumulator.add(combined);
+      }
+    }
+
+    @Override
+    public List<V> mergeAccumulators(Iterable<List<V>> accumulators) {
+      List<V> singleton = new ArrayList<>();
+      singleton.add(combiner.apply(Iterables.concat(accumulators)));
+      return singleton;
+    }
+
+    @Override
+    public V extractOutput(List<V> accumulator) {
+      return combiner.apply(accumulator);
+    }
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * {@code PerKey<K, VI, VO>} takes a
+   * {@code PCollection<KV<K, VI>>}, groups it by key, applies a
+   * combining function to the {@code VI} values associated with each
+   * key to produce a combined {@code VO} value, and returns a
+   * {@code PCollection<KV<K, VO>>} representing a map from each
+   * distinct key of the input {@code PCollection} to the corresponding
+   * combined value.  {@code VI} and {@code VO} are often the same.
+   *
+   * <p> This is a concise shorthand for an application of
+   * {@link GroupByKey} followed by an application of
+   * {@link GroupedValues Combine.GroupedValues}.  See those
+   * operations for more details on how keys are compared for equality
+   * and on the default {@code Coder} for the output.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<KV<String, Double>> salesRecords = ...;
+   * PCollection<KV<String, Double>> totalSalesPerPerson =
+   *     salesRecords.apply(Combine.<String, Double>perKey(
+   *         new Sum.SumDoubleFn()));
+   * } </pre>
+   *
+   * <p> Each output element is in the window by which its corresponding input
+   * was grouped, and has the timestamp of the end of that window.  The output
+   * {@code PCollection} has the same
+   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+   * as the input.
+   *
+   * @param <K> the type of the keys of the input and output
+   * {@code PCollection}s
+   * @param <VI> the type of the values of the input {@code PCollection}
+   * @param <VO> the type of the values of the output {@code PCollection}
+   */
+  public static class PerKey<K, VI, VO>
+    extends PTransform<PCollection<KV<K, VI>>, PCollection<KV<K, VO>>> {
+
+    private final transient KeyedCombineFn<? super K, ? super VI, ?, VO> fn;
+
+    private PerKey(
+        KeyedCombineFn<? super K, ? super VI, ?, VO> fn) {
+      this.fn = fn;
+    }
+
+    @Override
+    public PCollection<KV<K, VO>> apply(PCollection<KV<K, VI>> input) {
+      return input
+        .apply(GroupByKey.<K, VI>create())
+        .apply(Combine.<K, VI, VO>groupedValues(fn));
+    }
+
+    @Override
+    protected String getKindString() {
+      return "Combine.PerKey";
+    }
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * {@code GroupedValues<K, VI, VO>} takes a
+   * {@code PCollection<KV<K, Iterable<VI>>>}, such as the result of
+   * {@link GroupByKey}, applies a specified
+   * {@link KeyedCombineFn KeyedCombineFn<K, VI, VA, VO>}
+   * to each of the input {@code KV<K, Iterable<VI>>} elements to
+   * produce a combined output {@code KV<K, VO>} element, and returns a
+   * {@code PCollection<KV<K, VO>>} containing all the combined output
+   * elements.  It is common for {@code VI == VO}, but not required.
+   * Common combining functions include sums, mins, maxes, and averages
+   * of numbers, conjunctions and disjunctions of booleans, statistical
+   * aggregations, etc.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<KV<String, Integer>> pc = ...;
+   * PCollection<KV<String, Iterable<Integer>>> groupedByKey = pc.apply(
+   *     new GroupByKey<String, Integer>());
+   * PCollection<KV<String, Integer>> sumByKey = groupedByKey.apply(
+   *     Combine.<String, Integer>groupedValues(
+   *         new Sum.SumIntegerFn()));
+   * } </pre>
+   *
+   * <p> See also {@link #perKey}/{@link PerKey Combine.PerKey}
+   * which captures the common pattern of "combining by key" in a
+   * single easy-to-use {@code PTransform}.
+   *
+   * <p> Combining for different keys can happen in parallel.  Moreover,
+   * combining of the {@code Iterable<VI>} values associated a single
+   * key can happen in parallel, with different subsets of the values
+   * being combined separately, and their intermediate results combined
+   * further, in an arbitrary tree reduction pattern, until a single
+   * result value is produced for each key.
+   *
+   * <p> By default, the {@code Coder} of the keys of the output
+   * {@code PCollection<KV<K, VO>>} is that of the keys of the input
+   * {@code PCollection<KV<K, VI>>}, and the {@code Coder} of the values
+   * of the output {@code PCollection<KV<K, VO>>} is inferred from the
+   * concrete type of the {@code KeyedCombineFn<K, VI, VA, VO>}'s output
+   * type {@code VO}.
+   *
+   * <p> Each output element has the same timestamp and is in the same window
+   * as its corresponding input element, and the output
+   * {@code PCollection} has the same
+   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+   * associated with it as the input.
+   *
+   * <p> See also {@link #globally}/{@link Globally Combine.Globally},
+   * which combines all the values in a {@code PCollection} into a
+   * single value in a {@code PCollection}.
+   *
+   * @param <K> type of input and output keys
+   * @param <VI> type of input values
+   * @param <VO> type of output values
+   */
+  public static class GroupedValues<K, VI, VO>
+      extends PTransform
+                        <PCollection<? extends KV<K, ? extends Iterable<VI>>>,
+                         PCollection<KV<K, VO>>> {
+
+    private final KeyedCombineFn<? super K, ? super VI, ?, VO> fn;
+
+    private GroupedValues(KeyedCombineFn<? super K, ? super VI, ?, VO> fn) {
+      this.fn = fn;
+    }
+
+    /**
+     * Returns the KeyedCombineFn used by this Combine operation.
+     */
+    public KeyedCombineFn<? super K, ? super VI, ?, VO> getFn() {
+      return fn;
+    }
+
+    @Override
+    public PCollection<KV<K, VO>> apply(
+        PCollection<? extends KV<K, ? extends Iterable<VI>>> input) {
+      Coder<KV<K, VO>> outputCoder = getDefaultOutputCoder();
+      return input.apply(ParDo.of(
+          new DoFn<KV<K, ? extends Iterable<VI>>, KV<K, VO>>() {
+            @Override
+            public void processElement(ProcessContext c) {
+              K key = c.element().getKey();
+              c.output(KV.of(key, fn.apply(key, c.element().getValue())));
+            }
+          })).setCoder(outputCoder);
+    }
+
+    private KvCoder<K, VI> getKvCoder() {
+      Coder<? extends KV<K, ? extends Iterable<VI>>> inputCoder =
+          getInput().getCoder();
+      if (!(inputCoder instanceof KvCoder)) {
+        throw new IllegalStateException(
+            "Combine.GroupedValues requires its input to use KvCoder");
+      }
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      KvCoder<K, ? extends Iterable<VI>> kvCoder = (KvCoder) inputCoder;
+      Coder<K> keyCoder = kvCoder.getKeyCoder();
+      Coder<? extends Iterable<VI>> kvValueCoder = kvCoder.getValueCoder();
+      if (!(kvValueCoder instanceof IterableCoder)) {
+        throw new IllegalStateException(
+            "Combine.GroupedValues requires its input values to use "
+            + "IterableCoder");
+      }
+      IterableCoder<VI> inputValuesCoder = (IterableCoder<VI>) kvValueCoder;
+      Coder<VI> inputValueCoder = inputValuesCoder.getElemCoder();
+      return KvCoder.of(keyCoder, inputValueCoder);
+    }
+
+    @SuppressWarnings("unchecked")
+    public Coder<?> getAccumulatorCoder() {
+      KvCoder<K, VI> kvCoder = getKvCoder();
+      return ((KeyedCombineFn<K, VI, ?, VO>) fn).getAccumulatorCoder(
+          getCoderRegistry(), kvCoder.getKeyCoder(), kvCoder.getValueCoder());
+    }
+
+    @Override
+    public Coder<KV<K, VO>> getDefaultOutputCoder() {
+      KvCoder<K, VI> kvCoder = getKvCoder();
+      @SuppressWarnings("unchecked")
+      Coder<VO> outputValueCoder = ((KeyedCombineFn<K, VI, ?, VO>) fn)
+          .getDefaultOutputCoder(
+              getCoderRegistry(), kvCoder.getKeyCoder(), kvCoder.getValueCoder());
+      return KvCoder.of(kvCoder.getKeyCoder(), outputValueCoder);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
new file mode 100644
index 0000000000000..1303b0a98634a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+/**
+ * {@code Count<T>} takes a {@code PCollection<T>} and returns a
+ * {@code PCollection<KV<T, Long>>} representing a map from each
+ * distinct element of the input {@code PCollection} to the number of times
+ * that element occurs in the input.  Each of the keys in the output
+ * {@code PCollection} is unique.
+ *
+ * <p> Two values of type {@code T} are compared for equality <b>not</b> by
+ * regular Java {@link Object#equals}, but instead by first encoding
+ * each of the elements using the {@code PCollection}'s {@code Coder}, and then
+ * comparing the encoded bytes.  This admits efficient parallel
+ * evaluation.
+ *
+ * <p> By default, the {@code Coder} of the keys of the output
+ * {@code PCollection} is the same as the {@code Coder} of the
+ * elements of the input {@code PCollection}.
+ *
+ * <p> Each output element is in the window by which its corresponding input
+ * was grouped, and has the timestamp of the end of that window.  The output
+ * {@code PCollection} has the same
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+ * as the input.
+ *
+ * <p> Example of use:
+ * <pre> {@code
+ * PCollection<String> words = ...;
+ * PCollection<KV<String, Long>> wordCounts =
+ *     words.apply(Count.<String>create());
+ * } </pre>
+ */
+public class Count {
+
+  /**
+   * Returns a {@link Globally Count.Globally} {@link PTransform}
+   * that counts the number of elements in its input {@link PCollection}.
+   *
+   * <p> See {@link Globally Count.Globally} for more details.
+   */
+  public static <T> Globally<T> globally() {
+    return new Globally<>();
+  }
+
+  /**
+   * Returns a {@link PerElement Count.PerElement} {@link PTransform}
+   * that counts the number of occurrences of each element in its
+   * input {@link PCollection}.
+   *
+   * <p> See {@link PerElement Count.PerElement} for more details.
+   */
+  public static <T> PerElement<T> perElement() {
+    return new PerElement<>();
+  }
+
+  ///////////////////////////////////////
+
+  /**
+   * {@code Count.Globally<T>} takes a {@code PCollection<T>} and returns a
+   * {@code PCollection<Long>} containing a single element which is the total
+   * number of elements in the {@code PCollection}.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<String> words = ...;
+   * PCollection<Long> wordCount =
+   *     words.apply(Count.<String>globally());
+   * } </pre>
+   *
+   * @param <T> the type of the elements of the input {@code PCollection}
+   */
+  public static class Globally<T>
+      extends PTransform<PCollection<T>, PCollection<Long>> {
+
+    public Globally() { }
+
+    @Override
+    public PCollection<Long> apply(PCollection<T> input) {
+      return
+          input
+          .apply(ParDo.named("Init")
+                 .of(new DoFn<T, Long>() {
+                     @Override
+                     public void processElement(ProcessContext c) {
+                       c.output(1L);
+                     }
+                   }))
+          .apply(Sum.longsGlobally());
+    }
+  }
+
+  /**
+   * {@code Count.PerElement<T>} takes a {@code PCollection<T>} and returns a
+   * {@code PCollection<KV<T, Long>>} representing a map from each
+   * distinct element of the input {@code PCollection} to the number of times
+   * that element occurs in the input.  Each of the keys in the output
+   * {@code PCollection} is unique.
+   *
+   * <p> This transform compares two values of type {@code T} by first
+   * encoding each element using the input {@code PCollection}'s
+   * {@code Coder}, then comparing the encoded bytes. Because of this,
+   * the input coder must be deterministic. (See
+   * {@link com.google.cloud.dataflow.sdk.coders.Coder#isDeterministic()} for more detail). 
+   * Performing the comparison in this manner admits efficient parallel evaluation.
+   *
+   * <p> By default, the {@code Coder} of the keys of the output
+   * {@code PCollection} is the same as the {@code Coder} of the
+   * elements of the input {@code PCollection}.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<String> words = ...;
+   * PCollection<KV<String, Long>> wordCounts =
+   *     words.apply(Count.<String>perElement());
+   * } </pre>
+   *
+   * @param <T> the type of the elements of the input {@code PCollection}, and
+   * the type of the keys of the output {@code PCollection}
+   */
+  public static class PerElement<T>
+      extends PTransform<PCollection<T>, PCollection<KV<T, Long>>> {
+
+    public PerElement() { }
+
+    @Override
+    public PCollection<KV<T, Long>> apply(PCollection<T> input) {
+      return
+          input
+          .apply(ParDo.named("Init")
+                 .of(new DoFn<T, KV<T, Long>>() {
+                     @Override
+                     public void processElement(ProcessContext c) {
+                       c.output(KV.of(c.element(), 1L));
+                     }
+                   }))
+          .apply(Sum.<T>longsPerKey());
+    }
+
+    @Override
+    public String getKindString() {
+      return "Count.PerElement";
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
new file mode 100644
index 0000000000000..93747ea6462f7
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -0,0 +1,314 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PBegin;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.common.reflect.TypeToken;
+
+import org.joda.time.Instant;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * {@code Create<T>} takes a collection of elements of type {@code T}
+ * known when the pipeline is constructed and returns a
+ * {@code PCollection<T>} containing the elements.
+ *
+ * <p> Example of use:
+ * <pre> {@code
+ * Pipeline p = ...;
+ *
+ * PCollection<Integer> pc = p.apply(Create.of(3, 4, 5)).setCoder(BigEndianIntegerCoder.of());
+ *
+ * Map<String, Integer> map = ...;
+ * PCollection<KV<String, Integer>> pt =
+ *     p.apply(Create.of(map))
+ *      .setCoder(KvCoder.of(StringUtf8Coder.of(),
+ *                           BigEndianIntegerCoder.of()));
+ * } </pre>
+ *
+ * <p> Note that {@link PCollection#setCoder} must be called
+ * explicitly to set the encoding of the resulting
+ * {@code PCollection}, since {@code Create} does not infer the
+ * encoding.
+ *
+ * <p> A good use for {@code Create} is when a {@code PCollection}
+ * needs to be created without dependencies on files or other external
+ * entities.  This is especially useful during testing.
+ *
+ * <p> Caveat: {@code Create} only supports small in-memory datasets,
+ * particularly when submitting jobs to the Google Cloud Dataflow
+ * service.
+ *
+ * <p> {@code Create} can automatically determine the {@code Coder} to use
+ * if all elements are the same type, and a default exists for that type.
+ * See {@link com.google.cloud.dataflow.sdk.coders.CoderRegistry} for details
+ * on how defaults are determined.
+ *
+ * @param <T> the type of the elements of the resulting {@code PCollection}
+ */
+public class Create<T> extends PTransform<PInput, PCollection<T>> {
+
+  /**
+   * Returns a new {@code Create} root transform that produces a
+   * {@link PCollection} containing the specified elements.
+   *
+   * <p> The argument should not be modified after this is called.
+   *
+   * <p> The elements will have a timestamp of negative infinity, see
+   * {@link Create#timestamped} for a way of creating a {@code PCollection}
+   * with timestamped elements.
+   *
+   * <p> The result of applying this transform should have its
+   * {@link Coder} specified explicitly, via a call to
+   * {@link PCollection#setCoder}.
+   */
+  public static <T> Create<T> of(Iterable<T> elems) {
+    return new Create<>(elems);
+  }
+
+  /**
+   * Returns a new {@code Create} root transform that produces a
+   * {@link PCollection} containing the specified elements.
+   *
+   * <p> The elements will have a timestamp of negative infinity, see
+   * {@link Create#timestamped} for a way of creating a {@code PCollection}
+   * with timestamped elements.
+   *
+   * <p> The argument should not be modified after this is called.
+   *
+   * <p> The result of applying this transform should have its
+   * {@link Coder} specified explicitly, via a call to
+   * {@link PCollection#setCoder}.
+   */
+  public static <T> Create<T> of(T... elems) {
+    return of(Arrays.asList(elems));
+  }
+
+  /**
+   * Returns a new {@code Create} root transform that produces a
+   * {@link PCollection} of {@link KV}s corresponding to the keys and
+   * values of the specified {@code Map}.
+   *
+   * <p> The elements will have a timestamp of negative infinity, see
+   * {@link Create#timestamped} for a way of creating a {@code PCollection}
+   * with timestamped elements.
+   *
+   * <p> The result of applying this transform should have its
+   * {@link Coder} specified explicitly, via a call to
+   * {@link PCollection#setCoder}.
+   */
+  public static <K, V> Create<KV<K, V>> of(Map<K, V> elems) {
+    List<KV<K, V>> kvs = new ArrayList<>(elems.size());
+    for (Map.Entry<K, V> entry : elems.entrySet()) {
+      kvs.add(KV.of(entry.getKey(), entry.getValue()));
+    }
+    return of(kvs);
+  }
+
+  /**
+   * Returns a new root transform that produces a {@link PCollection} containing
+   * the specified elements with the specified timestamps.
+   *
+   * <p> The argument should not be modified after this is called.
+   */
+  public static <T> CreateTimestamped<T> timestamped(Iterable<TimestampedValue<T>> elems) {
+    return new CreateTimestamped<>(elems);
+  }
+
+  /**
+   * Returns a new root transform that produces a {@link PCollection} containing
+   * the specified elements with the specified timestamps.
+   *
+   * <p> The argument should not be modified after this is called.
+   */
+  public static <T> CreateTimestamped<T> timestamped(TimestampedValue<T>... elems) {
+    return new CreateTimestamped(Arrays.asList(elems));
+  }
+
+  /**
+   * Returns a new root transform that produces a {@link PCollection} containing
+   * the specified elements with the specified timestamps.
+   *
+   * <p> The arguments should not be modified after this is called.
+   *
+   * @throws IllegalArgumentException if there are a different number of values
+   * and timestamps
+   */
+  public static <T> CreateTimestamped<T> timestamped(
+      Iterable<T> values, Iterable<Long> timestamps) {
+    List<TimestampedValue<T>> elems = new ArrayList<>();
+    Iterator<T> valueIter = values.iterator();
+    Iterator<Long> timestampIter = timestamps.iterator();
+    while (valueIter.hasNext() && timestampIter.hasNext()) {
+      elems.add(TimestampedValue.of(valueIter.next(), new Instant(timestampIter.next())));
+    }
+    Preconditions.checkArgument(
+        !valueIter.hasNext() && !timestampIter.hasNext(),
+        "Expect sizes of values and timestamps are same.");
+    return new CreateTimestamped<>(elems);
+  }
+
+  @Override
+  public PCollection<T> apply(PInput input) {
+    return PCollection.<T>createPrimitiveOutputInternal(new GlobalWindow());
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /** The elements of the resulting PCollection. */
+  private final Iterable<T> elems;
+
+  /**
+   * Constructs a {@code Create} transform that produces a
+   * {@link PCollection} containing the specified elements.
+   *
+   * <p> The argument should not be modified after this is called.
+   */
+  private Create(Iterable<T> elems) {
+    this.elems = elems;
+  }
+
+  public Iterable<T> getElements() {
+    return elems;
+  }
+
+  @Override
+  protected Coder<?> getDefaultOutputCoder() {
+    // First try to deduce a coder using the types of the elements.
+    Class<?> elementType = null;
+    for (T elem : elems) {
+      Class<?> type = elem.getClass();
+      if (elementType == null) {
+        elementType = type;
+      } else if (!elementType.equals(type)) {
+        // Elements are not the same type, require a user-specified coder.
+        elementType = null;
+        break;
+      }
+    }
+    if (elementType == null) {
+      return super.getDefaultOutputCoder();
+    }
+    if (elementType.getTypeParameters().length == 0) {
+      Coder<?> candidate = getCoderRegistry().getDefaultCoder(TypeToken.of(elementType));
+      if (candidate != null) {
+        return candidate;
+      }
+    }
+
+    // If that fails, try to deduce a coder using the elements themselves
+    Coder<?> coder = null;
+    for (T elem : elems) {
+      Coder<?> c = getCoderRegistry().getDefaultCoder(elem);
+      if (coder == null) {
+        coder = c;
+      } else if (!Objects.equals(c, coder)) {
+        coder = null;
+        break;
+      }
+    }
+    if (coder != null) {
+      return coder;
+    }
+
+    return super.getDefaultOutputCoder();
+  }
+
+  /**
+   * A {@code PTransform} that creates a {@code PCollection} whose elements have
+   * associated timestamps.
+   */
+  private static class CreateTimestamped<T> extends PTransform<PBegin, PCollection<T>> {
+    /** The timestamped elements of the resulting PCollection. */
+    private final Iterable<TimestampedValue<T>> elems;
+
+    private CreateTimestamped(Iterable<TimestampedValue<T>> elems) {
+      this.elems = elems;
+    }
+
+    @Override
+    public PCollection<T> apply(PBegin input) {
+      PCollection<TimestampedValue<T>> intermediate = input.apply(Create.of(elems));
+      if (!elems.iterator().hasNext()) {
+        // There aren't any elements, so we can provide a fake coder instance.
+        // If we don't set a Coder here, users of CreateTimestamped have
+        // no way to set the coder of the intermediate PCollection.
+        intermediate.setCoder((Coder) TimestampedValue.TimestampedValueCoder.of(VoidCoder.of()));
+      }
+
+      return intermediate.apply(ParDo.of(new ConvertTimestamps<T>()));
+    }
+
+    private static class ConvertTimestamps<T> extends DoFn<TimestampedValue<T>, T> {
+      @Override
+        public void processElement(ProcessContext c) {
+        c.outputWithTimestamp(c.element().getValue(), c.element().getTimestamp());
+      }
+    }
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  static {
+    DirectPipelineRunner.registerDefaultTransformEvaluator(
+        Create.class,
+        new DirectPipelineRunner.TransformEvaluator<Create>() {
+          @Override
+          public void evaluate(
+              Create transform,
+              DirectPipelineRunner.EvaluationContext context) {
+            evaluateHelper(transform, context);
+          }
+        });
+  }
+
+  private static <T> void evaluateHelper(
+      Create<T> transform,
+      DirectPipelineRunner.EvaluationContext context) {
+    // Convert the Iterable of elems into a List of elems.
+    List<T> listElems;
+    if (transform.elems instanceof Collection) {
+      Collection<T> collectionElems = (Collection<T>) transform.elems;
+      listElems = new ArrayList<>(collectionElems.size());
+    } else {
+      listElems = new ArrayList<>();
+    }
+    for (T elem : transform.elems) {
+      listElems.add(
+          context.ensureElementEncodable(transform.getOutput(), elem));
+    }
+    context.setPCollection(transform.getOutput(), listElems);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
new file mode 100644
index 0000000000000..3c61ab38557d7
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -0,0 +1,330 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.reflect.TypeToken;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * The argument to {@link ParDo} providing the code to use to process
+ * elements of the input
+ * {@link com.google.cloud.dataflow.sdk.values.PCollection}.
+ *
+ * <p> See {@link ParDo} for more explanation, examples of use, and
+ * discussion of constraints on {@code DoFn}s, including their
+ * serializability, lack of access to global shared mutable state,
+ * requirements for failure tolerance, and benefits of optimization.
+ *
+ * <p> {@code DoFn}s can be tested in the context of a particular
+ * {@code Pipeline} by running that {@code Pipeline} on sample input
+ * and then checking its output.  Unit testing of a {@code DoFn},
+ * separately from any {@code ParDo} transform or {@code Pipeline},
+ * can be done via the {@link DoFnTester} harness.
+ *
+ * @param <I> the type of the (main) input elements
+ * @param <O> the type of the (main) output elements
+ */
+public abstract class DoFn<I, O> implements Serializable {
+
+  /** Information accessible to all methods in this {@code DoFn}. */
+  public abstract class Context {
+
+    /**
+     * Returns the {@code PipelineOptions} specified with the
+     * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner}
+     * invoking this {@code DoFn}.  The {@code PipelineOptions} will
+     * be the default running via {@link DoFnTester}.
+     */
+    public abstract PipelineOptions getPipelineOptions();
+
+    /**
+     * Returns the value of the side input.
+     *
+     * @throws IllegalArgumentException if this is not a side input
+     * @see ParDo#withSideInput
+     */
+    public abstract <T> T sideInput(PCollectionView<T, ?> view);
+
+    /**
+     * Adds the given element to the main output {@code PCollection}.
+     *
+     * <p> If invoked from {@link DoFn#processElement}, the output
+     * element will have the same timestamp and be in the same windows
+     * as the input element passed to {@link DoFn#processElement}).
+     *
+     * <p> Is is illegal to invoke this from {@link #startBundle} or
+     * {@link #finishBundle} unless the input {@code PCollection} is
+     * windowed by the
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow}.
+     * If this is the case, the output element will have a timestamp
+     * of negative infinity and be in the 
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow}.
+     */
+    public abstract void output(O output);
+
+    /**
+     * Adds the given element to the main output {@code PCollection},
+     * with the given timestamp.
+     *
+     * <p> If invoked from {@link DoFn#processElement}), the timestamp
+     * must not be older than the input element's timestamp minus
+     * {@link DoFn#getAllowedTimestampSkew}.  The output element will
+     * be in the same windows as the input element.
+     *
+     * <p> Is is illegal to invoke this from {@link #startBundle} or
+     * {@link #finishBundle} unless the input {@code PCollection} is
+     * windowed by the
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow}.
+     * If this is the case, the output element's timestamp will be
+     * the given timestamp and its window will be the 
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow}.
+     */
+    public abstract void outputWithTimestamp(O output, Instant timestamp);
+
+    /**
+     * Adds the given element to the side output {@code PCollection} with the
+     * given tag.
+     *
+     * <p> The caller of {@code ParDo} uses {@link ParDo#withOutputTags} to
+     * specify the tags of side outputs that it consumes. Non-consumed side
+     * outputs, e.g., outputs for monitoring purposes only, don't necessarily
+     * need to be specified.
+     *
+     * <p> The output element will have the same timestamp and be in the same
+     * windows as the input element passed to {@link DoFn#processElement}).
+     *
+     * @throws IllegalArgumentException if the number of outputs exceeds
+     * the limit of 1,000 outputs per DoFn
+     * @see ParDo#withOutputTags
+     */
+    public abstract <T> void sideOutput(TupleTag<T> tag, T output);
+
+    // TODO: add sideOutputWithTimestamp[AndWindows]
+
+    /**
+     * Returns an aggregator with aggregation logic specified by the CombineFn
+     * argument. The name provided should be unique across aggregators created
+     * within the containing ParDo transform application.
+     *
+     * <p> All instances of this DoFn in the containing ParDo
+     * transform application should define aggregators consistently,
+     * i.e., an aggregator with a given name always specifies the same
+     * combiner in all DoFn instances in the containing ParDo
+     * transform application.
+     *
+     * @throws IllegalArgumentException if the given CombineFn is not
+     * supported as aggregator's combiner, or if the given name collides
+     * with another aggregator or system-provided counter.
+     */
+    public abstract <AI, AA, AO> Aggregator<AI> createAggregator(
+        String name, Combine.CombineFn<? super AI, AA, AO> combiner);
+
+    /**
+     * Returns an aggregator with aggregation logic specified by the
+     * SerializableFunction argument. The name provided should be unique across
+     * aggregators created within the containing ParDo transform application.
+     *
+     * <p> All instances of this DoFn in the containing ParDo
+     * transform application should define aggregators consistently,
+     * i.e., an aggregator with a given name always specifies the same
+     * combiner in all DoFn instances in the containing ParDo
+     * transform application.
+     *
+     * @throws IllegalArgumentException if the given SerializableFunction is
+     * not supported as aggregator's combiner, or if the given name collides
+     * with another aggregator or system-provided counter.
+     */
+    public abstract <AI, AO> Aggregator<AI> createAggregator(
+        String name, SerializableFunction<Iterable<AI>, AO> combiner);
+  }
+
+  /**
+   * Information accessible when running {@link DoFn#processElement}.
+   */
+  public abstract class ProcessContext extends Context {
+
+    /**
+     * Returns the input element to be processed.
+     */
+    public abstract I element();
+
+    /**
+     * Returns this {@code DoFn}'s state associated with the input
+     * element's key.  This state can be used by the {@code DoFn} to
+     * store whatever information it likes with that key.  Unlike
+     * {@code DoFn} instance variables, this state is persistent and
+     * can be arbitrarily large; it is more expensive than instance
+     * variable state, however.  It is particularly intended for
+     * streaming computations.
+     *
+     * <p> Requires that this {@code DoFn} implements
+     * {@link RequiresKeyedState}.
+     *
+     * <p> Each {@link ParDo} invocation with this {@code DoFn} as an
+     * argument will maintain its own {@code KeyedState} maps, one per
+     * key.
+     *
+     * @throws UnsupportedOperationException if this {@link DoFn} does
+     * not implement {@link RequiresKeyedState}
+     */
+    public abstract KeyedState keyedState();
+
+    /**
+     * Returns the timestamp of the input element.
+     *
+     * <p> See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window}
+     * for more information.
+     */
+    public abstract Instant timestamp();
+
+    /**
+     * Returns the set of windows to which the input element has been assigned.
+     *
+     * <p> See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window}
+     * for more information.
+     */
+    public abstract Collection<? extends BoundedWindow> windows();
+  }
+
+  /**
+   * Returns the allowed timestamp skew duration, which is the maximum
+   * duration that timestamps can be shifted backward in
+   * {@link DoFn.Context#outputWithTimestamp}.
+   *
+   * The default value is {@code Duration.ZERO}, in which case
+   * timestamps can only be shifted forward to future.  For infinite
+   * skew, return {@code Duration.millis(Long.MAX_VALUE)}.
+   */
+  public Duration getAllowedTimestampSkew() {
+    return Duration.ZERO;
+  }
+
+  /**
+   * Interface for signaling that a {@link DoFn} needs to maintain
+   * per-key state, accessed via
+   * {@link DoFn.ProcessContext#keyedState}.
+   *
+   * <p> This functionality is experimental and likely to change.
+   */
+  public interface RequiresKeyedState {}
+
+  /**
+   * Interface for interacting with keyed state.
+   *
+   * <p> This functionality is experimental and likely to change.
+   */
+  public interface KeyedState {
+    /**
+     * Updates this {@code KeyedState} in place so that the given tag
+     * maps to the given value.
+     *
+     * @throws IOException if encoding the given value fails
+     */
+    public <T> void store(CodedTupleTag<T> tag, T value) throws IOException;
+
+    /**
+     * Returns the value associated with the given tag in this
+     * {@code KeyedState}, or {@code null} if the tag has no asssociated
+     * value.
+     *
+     * <p> See {@link #lookup(List)} to look up multiple tags at
+     * once.  It is significantly more efficient to look up multiple
+     * tags all at once rather than one at a time.
+     *
+     * @throws IOException if decoding the requested value fails
+     */
+    public <T> T lookup(CodedTupleTag<T> tag) throws IOException;
+
+    /**
+     * Returns a map from the given tags to the values associated with
+     * those tags in this {@code KeyedState}.  A tag will map to null if
+     * the tag had no associated value.
+     *
+     * <p> See {@link #lookup(CodedTupleTag)} to look up a single
+     * tag.
+     *
+     * @throws CoderException if decoding any of the requested values fails
+     */
+    public CodedTupleTagMap lookup(List<? extends CodedTupleTag<?>> tags) throws IOException;
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Prepares this {@code DoFn} instance for processing a batch of elements.
+   *
+   * <p> By default, does nothing.
+   */
+  public void startBundle(Context c) throws Exception {
+  }
+
+  /**
+   * Processes an input element.
+   */
+  public abstract void processElement(ProcessContext c) throws Exception;
+
+  /**
+   * Finishes processing this batch of elements.  This {@code DoFn}
+   * instance will be thrown away after this operation returns.
+   *
+   * <p> By default, does nothing.
+   */
+  public void finishBundle(Context c) throws Exception {
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Returns a {@link TypeToken} capturing what is known statically
+   * about the input type of this {@code DoFn} instance's most-derived
+   * class.
+   *
+   * <p> See {@link #getOutputTypeToken} for more discussion.
+   */
+  TypeToken<I> getInputTypeToken() {
+    return new TypeToken<I>(getClass()) {};
+  }
+
+  /**
+   * Returns a {@link TypeToken} capturing what is known statically
+   * about the output type of this {@code DoFn} instance's
+   * most-derived class.
+   *
+   * <p> In the normal case of a concrete {@code DoFn} subclass with
+   * no generic type parameters of its own (including anonymous inner
+   * classes), this will be a complete non-generic type, which is good
+   * for choosing a default output {@code Coder<O>} for the output
+   * {@code PCollection<O>}.
+   */
+  TypeToken<O> getOutputTypeToken() {
+    return new TypeToken<O>(getClass()) {};
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
new file mode 100644
index 0000000000000..3e23b5ed04506
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -0,0 +1,357 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.PTuple;
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.cloud.dataflow.sdk.values.TupleTagList;
+import com.google.common.base.Function;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * A harness for unit-testing a {@link DoFn}.
+ *
+ * <p> For example:
+ *
+ * <pre> {@code
+ * DoFn<Input, Output> fn = ...;
+ *
+ * DoFnTester<Input, Output> fnTester = DoFnTester.of(fn);
+ *
+ * // Set arguments shared across all batches:
+ * fnTester.setSideInputs(...);      // If fn takes side inputs.
+ * fnTester.setSideOutputTags(...);  // If fn writes to side outputs.
+ *
+ * // Process a batch containing a single input element:
+ * Input testInput = ...;
+ * List<Output> testOutputs = fnTester.processBatch(testInput);
+ * Assert.assertThat(testOutputs,
+ *                   JUnitMatchers.hasItems(...));
+ *
+ * // Process a bigger batch:
+ * Assert.assertThat(fnTester.processBatch(i1, i2, ...),
+ *                   JUnitMatchers.hasItems(...));
+ * } </pre>
+ *
+ * @param <I> the type of the {@code DoFn}'s (main) input elements
+ * @param <O> the type of the {@code DoFn}'s (main) output elements
+ */
+public class DoFnTester<I, O> {
+  /**
+   * Returns a {@code DoFnTester} supporting unit-testing of the given
+   * {@link DoFn}.
+   */
+  @SuppressWarnings("unchecked")
+  public static <I, O> DoFnTester<I, O> of(DoFn<I, O> fn) {
+    return new DoFnTester(fn);
+  }
+
+  /**
+   * Registers the tuple of values of the side input {@link PCollectionView}s to
+   * pass to the {@link DoFn} under test.
+   *
+   * <p> If needed, first creates a fresh instance of the {@link DoFn}
+   * under test.
+   *
+   * <p> If this isn't called, {@code DoFnTester} assumes the
+   * {@link DoFn} takes no side inputs.
+   */
+  public void setSideInputs(Map<PCollectionView<?, ?>, Iterable<WindowedValue<?>>> sideInputs) {
+    this.sideInputs = sideInputs;
+    resetState();
+  }
+
+  /**
+   * Registers the values of a side input {@link PCollectionView} to
+   * pass to the {@link DoFn} under test.
+   *
+   * <p> If needed, first creates a fresh instance of the {@code DoFn}
+   * under test.
+   *
+   * <p> If this isn't called, {@code DoFnTester} assumes the
+   * {@code DoFn} takes no side inputs.
+   */
+  public void setSideInput(PCollectionView<?, ?> sideInput, Iterable<WindowedValue<?>> value) {
+    sideInputs.put(sideInput, value);
+  }
+
+  /**
+   * Registers the values for a side input {@link PCollectionView} to
+   * pass to the {@link DoFn} under test. All values are placed
+   * in the global window.
+   */
+  public void setSideInputInGlobalWindow(
+      PCollectionView<?, ?> sideInput,
+      Iterable<?> value) {
+    sideInputs.put(
+        sideInput,
+        Iterables.transform(value, new Function<Object, WindowedValue<?>>() {
+          @Override
+          public WindowedValue<?> apply(Object input) {
+            return WindowedValue.valueInGlobalWindow(input);
+          }
+        }));
+  }
+
+
+  /**
+   * Registers the list of {@code TupleTag}s that can be used by the
+   * {@code DoFn} under test to output to side output
+   * {@code PCollection}s.
+   *
+   * <p> If needed, first creates a fresh instance of the DoFn under test.
+   *
+   * <p> If this isn't called, {@code DoFnTester} assumes the
+   * {@code DoFn} doesn't emit to any side outputs.
+   */
+  public void setSideOutputTags(TupleTagList sideOutputTags) {
+    this.sideOutputTags = sideOutputTags.getAll();
+    resetState();
+  }
+
+  /**
+   * A convenience operation that first calls {@link #startBundle},
+   * then calls {@link #processElement} on each of the arguments, then
+   * calls {@link #finishBundle}, then returns the result of
+   * {@link #takeOutputElements}.
+   */
+  public List<O> processBatch(I... inputElements) {
+    startBundle();
+    for (I inputElement : inputElements) {
+      processElement(inputElement);
+    }
+    finishBundle();
+    return takeOutputElements();
+  }
+
+  /**
+   * Calls {@link DoFn#startBundle} on the {@code DoFn} under test.
+   *
+   * <p> If needed, first creates a fresh instance of the DoFn under test.
+   */
+  public void startBundle() {
+    resetState();
+    initializeState();
+    fnRunner.startBundle();
+    state = State.STARTED;
+  }
+
+  /**
+   * Calls {@link DoFn#processElement} on the {@code DoFn} under test, in a
+   * context where {@link DoFn.ProcessContext#element} returns the
+   * given element.
+   *
+   * <p> Will call {@link #startBundle} automatically, if it hasn't
+   * already been called.
+   *
+   * @throws IllegalStateException if the {@code DoFn} under test has already
+   * been finished
+   */
+  public void processElement(I element) {
+    if (state == State.FINISHED) {
+      throw new IllegalStateException("finishBundle() has already been called");
+    }
+    if (state == State.UNSTARTED) {
+      startBundle();
+    }
+    fnRunner.processElement(WindowedValue.valueInGlobalWindow(element));
+  }
+
+  /**
+   * Calls {@link DoFn#finishBundle} of the {@code DoFn} under test.
+   *
+   * <p> Will call {@link #startBundle} automatically, if it hasn't
+   * already been called.
+   *
+   * @throws IllegalStateException if the {@code DoFn} under test has already
+   * been finished
+   */
+  public void finishBundle() {
+    if (state == State.FINISHED) {
+      throw new IllegalStateException("finishBundle() has already been called");
+    }
+    if (state == State.UNSTARTED) {
+      startBundle();
+    }
+    fnRunner.finishBundle();
+    state = State.FINISHED;
+  }
+
+  /**
+   * Returns the elements output so far to the main output.  Does not
+   * clear them, so subsequent calls will continue to include these
+   * elements.
+   *
+   * @see #takeOutputElements
+   * @see #clearOutputElements
+   *
+   * TODO: provide accessors that take and return {@code WindowedValue}s
+   * in order to test timestamp- and window-sensitive DoFns.
+   */
+  public List<O> peekOutputElements() {
+    // TODO: Should we return an unmodifiable list?
+    return Lists.transform(fnRunner.getReceiver(mainOutputTag),
+                           new Function<Object, O>() {
+                             @Override
+                             public O apply(Object input) {
+                               return ((WindowedValue<O>) input).getValue();
+                             }
+                           });
+
+  }
+
+  /**
+   * Clears the record of the elements output so far to the main output.
+   *
+   * @see #peekOutputElements
+   */
+  public void clearOutputElements() {
+    peekOutputElements().clear();
+  }
+
+  /**
+   * Returns the elements output so far to the main output.
+   * Clears the list so these elements don't appear in future calls.
+   *
+   * @see #peekOutputElements
+   */
+  public List<O> takeOutputElements() {
+    List<O> resultElems = new ArrayList<>(peekOutputElements());
+    clearOutputElements();
+    return resultElems;
+  }
+
+  /**
+   * Returns the elements output so far to the side output with the
+   * given tag.  Does not clear them, so subsequent calls will
+   * continue to include these elements.
+   *
+   * @see #takeSideOutputElements
+   * @see #clearSideOutputElements
+   */
+  public <T> List<T> peekSideOutputElements(TupleTag<T> tag) {
+    // TODO: Should we return an unmodifiable list?
+    return Lists.transform(fnRunner.getReceiver(tag),
+                           new Function<Object, T>() {
+                             @Override
+                             public T apply(Object input) {
+                               return ((WindowedValue<T>) input).getValue();
+                             }});
+  }
+
+  /**
+   * Clears the record of the elements output so far to the side
+   * output with the given tag.
+   *
+   * @see #peekSideOutputElements
+   */
+  public <T> void clearSideOutputElements(TupleTag<T> tag) {
+    peekSideOutputElements(tag).clear();
+  }
+
+  /**
+   * Returns the elements output so far to the side output with the given tag.
+   * Clears the list so these elements don't appear in future calls.
+   *
+   * @see #peekSideOutputElements
+   */
+  public <T> List<T> takeSideOutputElements(TupleTag<T> tag) {
+    List<T> resultElems = new ArrayList<>(peekSideOutputElements(tag));
+    clearSideOutputElements(tag);
+    return resultElems;
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /** The possible states of processing a DoFn. */
+  enum State { UNSTARTED, STARTED, FINISHED }
+
+  final PipelineOptions options = PipelineOptionsFactory.create();
+
+  /** The original DoFn under test. */
+  final DoFn<I, O> origFn;
+
+  /** The side input values to provide to the DoFn under test. */
+  private Map<PCollectionView<?, ?>, Iterable<WindowedValue<?>>> sideInputs =
+      new HashMap<>();
+
+  /** The output tags used by the DoFn under test. */
+  TupleTag<O> mainOutputTag = new TupleTag<>();
+  List<TupleTag<?>> sideOutputTags = new ArrayList<>();
+
+  /** The original DoFn under test, if started. */
+  DoFn<I, O> fn;
+
+  /** The DoFnRunner if processing is in progress. */
+  DoFnRunner<I, O, List> fnRunner;
+
+  /** Counters for user-defined Aggregators if processing is in progress. */
+  CounterSet counterSet;
+  // TODO: expose counterSet through a getter method, once we have
+  // a convenient public API for it.
+
+  /** The state of processing of the DoFn under test. */
+  State state;
+
+  DoFnTester(DoFn<I, O> origFn) {
+    this.origFn = origFn;
+    resetState();
+  }
+
+  void resetState() {
+    fn = null;
+    fnRunner = null;
+    counterSet = null;
+    state = State.UNSTARTED;
+  }
+
+  @SuppressWarnings("unchecked")
+  void initializeState() {
+    fn = (DoFn<I, O>)
+        SerializableUtils.deserializeFromByteArray(
+            SerializableUtils.serializeToByteArray(origFn),
+            origFn.toString());
+    counterSet = new CounterSet();
+    PTuple runnerSideInputs = PTuple.empty();
+    for (Map.Entry<PCollectionView<?, ?>, Iterable<WindowedValue<?>>> entry
+        : sideInputs.entrySet()) {
+      runnerSideInputs = runnerSideInputs.and(entry.getKey().getTagInternal(), entry.getValue());
+    }
+    fnRunner = DoFnRunner.createWithListOutputs(
+        options,
+        fn,
+        runnerSideInputs,
+        mainOutputTag,
+        sideOutputTags,
+        (new BatchModeExecutionContext()).createStepContext("stepName"),
+        counterSet.getAddCounterMutator());
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
new file mode 100644
index 0000000000000..9e4f3b099d48a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+
+/**
+ * {@code First<T>} takes a {@code PCollection<T>} and a limit, and
+ * produces a new {@code PCollection<T>} containing up to limit
+ * elements of the input {@code PCollection}.
+ *
+ * <p> If the input and output {@code PCollection}s are ordered, then
+ * {@code First} will select the first elements, otherwise it will
+ * select any elements.
+ *
+ * <p> If limit is less than or equal to the size of the input
+ * {@code PCollection}, then all the input's elements will be selected.
+ *
+ * <p> All of the elements of the output {@code PCollection} should fit into
+ * main memory of a single worker machine.  This operation does not
+ * run in parallel.
+ *
+ * <p> Example of use:
+ * <pre> {@code
+ * PCollection<String> input = ...;
+ * PCollection<String> output = input.apply(First.<String>of(100));
+ * } </pre>
+ *
+ * @param <T> the type of the elements of the input and output
+ * {@code PCollection}s
+ */
+public class First<T> extends PTransform<PCollection<T>, PCollection<T>> {
+  /**
+   * Returns a {@code First<T>} {@code PTransform}.
+   *
+   * @param <T> the type of the elements of the input and output
+   * {@code PCollection}s
+   * @param limit the numer of elements to take from the input
+   */
+  public static <T> First<T> of(long limit) {
+    return new First<>(limit);
+  }
+
+  private final long limit;
+
+  /**
+   * Constructs a {@code First<T>} PTransform that, when applied,
+   * produces a new PCollection containing up to {@code limit}
+   * elements of its input {@code PCollection}.
+   */
+  private First(long limit) {
+    this.limit = limit;
+    if (limit < 0) {
+      throw new IllegalArgumentException(
+          "limit argument to First should be non-negative");
+    }
+  }
+
+  private static class CopyFirstDoFn<T> extends DoFn<Void, T> {
+    long limit;
+    final PCollectionView<Iterable<T>, ?> iterableView;
+
+    public CopyFirstDoFn(long limit, PCollectionView<Iterable<T>, ?> iterableView) {
+      this.limit = limit;
+      this.iterableView = iterableView;
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      for (T i : c.sideInput(iterableView)) {
+        if (limit-- <= 0) {
+          break;
+        }
+        c.output(i);
+      }
+    }
+  }
+
+  @Override
+  public PCollection<T> apply(PCollection<T> in) {
+    PCollectionView<Iterable<T>, ?> iterableView = in.apply(View.<T>asIterable());
+    return
+        in.getPipeline()
+        .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
+        .apply(ParDo
+               .withSideInputs(iterableView)
+               .of(new CopyFirstDoFn<>(limit, iterableView)))
+        .setCoder(in.getCoder());
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
new file mode 100644
index 0000000000000..14b2169b97bfa
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * {@code Flatten<T>} takes multiple {@code PCollection<T>}s bundled
+ * into a {@code PCollectionList<T>} and returns a single
+ * {@code PCollection<T>} containing all the elements in all the input
+ * {@code PCollection}s.  The name "Flatten" suggests taking a list of
+ * lists and flattening them into a single list.
+ *
+ * <p> Example of use:
+ * <pre> {@code
+ * PCollection<String> pc1 = ...;
+ * PCollection<String> pc2 = ...;
+ * PCollection<String> pc3 = ...;
+ * PCollectionList<String> pcs = PCollectionList.of(pc1).and(pc2).and(pc3);
+ * PCollection<String> merged = pcs.apply(Flatten.<String>.create());
+ * } </pre>
+ *
+ * <p> By default, the {@code Coder} of the output {@code PCollection}
+ * is the same as the {@code Coder} of the first {@code PCollection}
+ * in the input {@code PCollectionList} (if the
+ * {@code PCollectionList} is non-empty).
+ *
+ */
+public class Flatten {
+
+  /**
+   * Returns a {@link PTransform} that flattens a {@link PCollectionList<T>}
+   * into a {@link PCollection<T>} containing all the elements of all
+   * the {@link PCollection}s in its input.
+   *
+   * <p> If any of the inputs to {@code Flatten<T>} require window merging,
+   * all inputs must have equal {@link WindowingFn}s.
+   * The output elements of {@code Flatten<T>} are in the same windows and
+   * have the same timestamps as their corresponding input elements.  The output
+   * {@code PCollection} will have the same
+   * {@link WindowingFn} as all of the inputs.
+   *
+   * @param <T> the type of the elements in the input and output
+   * {@code PCollection}s.
+   */
+  public static <T> FlattenPCollectionList<T> pCollections() {
+    return new FlattenPCollectionList<>();
+  }
+
+  @Deprecated
+  public static <T> FlattenPCollectionList<T> create() {
+    return pCollections();
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes a {@code PCollection<Iterable<T>>}
+   * and returns a {@code PCollection<T>} containing all the elements from
+   * all the {@code Iterable}s.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<Iterable<Integer>> pcOfIterables = ...;
+   * PCollection<Integer> pc = pcOfIterables.apply(Flatten.<Integer>iterables());
+   * } </pre>
+   *
+   * <p> By default, the output {@code PCollection} encodes its elements
+   * using the same {@code Coder} that the input uses for
+   * the elements in its {@code Iterable}.
+   *
+   * @param <T> the type of the elements of the input {@code Iterable} and
+   * the output {@code PCollection}
+   */
+  public static <T> FlattenIterables<T> iterables() {
+    return new FlattenIterables<>();
+  }
+
+  /**
+   * A {@link PTransform} that flattens a {@link PCollectionList<T>}
+   * into a {@link PCollection<T>} containing all the elements of all
+   * the {@link PCollection}s in its input.
+   *
+   * @param <T> the type of the elements in the input and output
+   * {@code PCollection}s.
+   */
+  public static class FlattenPCollectionList<T>
+      extends PTransform<PCollectionList<T>, PCollection<T>> {
+
+    private FlattenPCollectionList() { }
+
+    @Override
+    public PCollection<T> apply(PCollectionList<T> inputs) {
+      WindowingFn windowingFn;
+      if (!getInput().getAll().isEmpty()) {
+        windowingFn = getInput().get(0).getWindowingFn();
+        for (PCollection<?> input : getInput().getAll()) {
+          if (!windowingFn.isCompatible(input.getWindowingFn())) {
+            throw new IllegalStateException(
+                "Inputs to Flatten had incompatible window windowingFns: "
+                + windowingFn + ", " + input.getWindowingFn());
+          }
+        }
+      } else {
+        windowingFn = new GlobalWindow();
+      }
+
+      return PCollection.<T>createPrimitiveOutputInternal(windowingFn);
+    }
+
+    @Override
+    protected Coder<?> getDefaultOutputCoder() {
+      List<PCollection<T>> inputs = getInput().getAll();
+      if (inputs.isEmpty()) {
+        // Cannot infer a Coder from an empty list of input PCollections.
+        return null;
+      }
+      // Use the Coder of the first input.
+      return inputs.get(0).getCoder();
+    }
+
+  }
+
+  /**
+   * {@code FlattenIterables<T>} takes a {@code PCollection<Iterable<T>>} and returns a
+   * {@code PCollection<T>} that contains all the elements from each iterable.
+   * Implements {@link #fromIterable}.
+   *
+   * @param <T> the type of the elements of the input {@code Iterable}s and
+   * the output {@code PCollection}
+   */
+  public static class FlattenIterables<T>
+      extends PTransform<PCollection<Iterable<T>>, PCollection<T>> {
+
+    @Override
+    public PCollection<T> apply(PCollection<Iterable<T>> in) {
+      Coder<Iterable<T>> inCoder = in.getCoder();
+      if (!(inCoder instanceof IterableCoder)) {
+        throw new IllegalArgumentException(
+            "expecting the input Coder<Iterable> to be an IterableCoder");
+      }
+      IterableCoder<T> iterableCoder = (IterableCoder<T>) inCoder;
+      Coder<T> elemCoder = iterableCoder.getElemCoder();
+
+      return in.apply(ParDo.of(
+          new DoFn<Iterable<T>, T>() {
+            @Override
+            public void processElement(ProcessContext c) {
+              for (T i : c.element()) {
+                c.output(i);
+              }
+            }
+          }))
+          .setCoder(elemCoder);
+    }
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  static {
+    DirectPipelineRunner.registerDefaultTransformEvaluator(
+        FlattenPCollectionList.class,
+        new DirectPipelineRunner.TransformEvaluator<FlattenPCollectionList>() {
+          @Override
+          public void evaluate(
+              FlattenPCollectionList transform,
+              DirectPipelineRunner.EvaluationContext context) {
+            evaluateHelper(transform, context);
+          }
+        });
+  }
+
+  private static <T> void evaluateHelper(
+      FlattenPCollectionList<T> transform,
+      DirectPipelineRunner.EvaluationContext context) {
+    List<DirectPipelineRunner.ValueWithMetadata<T>> outputElems = new ArrayList<>();
+    PCollectionList<T> inputs = transform.getInput();
+
+    for (PCollection<T> input : inputs.getAll()) {
+      outputElems.addAll(context.getPCollectionValuesWithMetadata(input));
+    }
+
+    context.setPCollectionValuesWithMetadata(transform.getOutput(), outputElems);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
new file mode 100644
index 0000000000000..d7a4de64e50d3
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -0,0 +1,517 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.joda.time.Instant;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * {@code GroupByKey<K, V>} takes a {@code PCollection<KV<K, V>>},
+ * groups the values by key and windows, and returns a
+ * {@code PCollection<KV<K, Iterable<V>>>} representing a map from
+ * each distinct key and window of the input {@code PCollection} to an
+ * {@code Iterable} over all the values associated with that key in
+ * the input.  Each key in the output {@code PCollection} is unique within
+ * each window.
+ *
+ * <p> {@code GroupByKey} is analogous to converting a multi-map into
+ * a uni-map, and related to {@code GROUP BY} in SQL.  It corresponds
+ * to the "shuffle" step between the Mapper and the Reducer in the
+ * MapReduce framework.
+ *
+ * <p> Two keys of type {@code K} are compared for equality
+ * <b>not</b> by regular Java {@link Object#equals}, but instead by
+ * first encoding each of the keys using the {@code Coder} of the
+ * keys of the input {@code PCollection}, and then comparing the
+ * encoded bytes.  This admits efficient parallel evaluation.  Note that
+ * this requires that the {@code Coder} of the keys be deterministic (see
+ * {@link Coder#isDeterministic()}).  If the key {@code Coder} is not
+ * deterministic, an exception is thrown at runtime.
+ *
+ * <p> By default, the {@code Coder} of the keys of the output
+ * {@code PCollection} is the same as that of the keys of the input,
+ * and the {@code Coder} of the elements of the {@code Iterable}
+ * values of the output {@code PCollection} is the same as the
+ * {@code Coder} of the values of the input.
+ *
+ * <p> Example of use:
+ * <pre> {@code
+ * PCollection<KV<String, Doc>> urlDocPairs = ...;
+ * PCollection<KV<String, Iterable<Doc>>> urlToDocs =
+ *     urlDocPairs.apply(GroupByKey.<String, Doc>create());
+ * PCollection<R> results =
+ *     urlToDocs.apply(ParDo.of(new DoFn<KV<String, Iterable<Doc>>, R>() {
+ *       public void processElement(ProcessContext c) {
+ *         String url = c.element().getKey();
+ *         Iterable<Doc> docsWithThatUrl = c.element().getValue();
+ *         ... process all docs having that url ...
+ *       }}));
+ * } </pre>
+ *
+ * <p> {@code GroupByKey} is a key primitive in data-parallel
+ * processing, since it is the main way to efficiently bring
+ * associated data together into one location.  It is also a key
+ * determiner of the performance of a data-parallel pipeline.
+ *
+ * <p> See {@link com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey}
+ * for a way to group multiple input PCollections by a common key at once.
+ *
+ * <p> See {@link Combine.PerKey} for a common pattern of
+ * {@code GroupByKey} followed by {@link Combine.GroupedValues}.
+ *
+ * <p> When grouping, windows that can be merged according to the {@link WindowingFn}
+ * of the input {@code PCollection} will be merged together, and a group
+ * corresponding to the new, merged window will be emitted.
+ * The timestamp for each group is the upper bound of its window, e.g., the most
+ * recent timestamp that can be assigned into the window, and the group will be
+ * in the window that it corresponds to.  The output {@code PCollection} will
+ * have the same {@link WindowingFn} as the input.
+ *
+ * <p> If the {@link WindowingFn} of the input requires merging, it is not
+ * valid to apply another {@code GroupByKey} without first applying a new
+ * {@link WindowingFn}.
+ *
+ * @param <K> the type of the keys of the input and output
+ * {@code PCollection}s
+ * @param <V> the type of the values of the input {@code PCollection}
+ * and the elements of the {@code Iterable}s in the output
+ * {@code PCollection}
+ */
+public class GroupByKey<K, V>
+    extends PTransform<PCollection<KV<K, V>>,
+                       PCollection<KV<K, Iterable<V>>>> {
+  /**
+   * Returns a {@code GroupByKey<K, V>} {@code PTransform}.
+   *
+   * @param <K> the type of the keys of the input and output
+   * {@code PCollection}s
+   * @param <V> the type of the values of the input {@code PCollection}
+   * and the elements of the {@code Iterable}s in the output
+   * {@code PCollection}
+   */
+  public static <K, V> GroupByKey<K, V> create() {
+    return new GroupByKey<>();
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  @Override
+  public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
+    return applyHelper(input, false, false);
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Helper transform that makes timestamps and window assignments
+   * explicit in the value part of each key/value pair.
+   */
+  public static class ReifyTimestampsAndWindows<K, V>
+      extends PTransform<PCollection<KV<K, V>>,
+                         PCollection<KV<K, WindowedValue<V>>>> {
+    @Override
+    public PCollection<KV<K, WindowedValue<V>>> apply(
+        PCollection<KV<K, V>> input) {
+      Coder<KV<K, V>> inputCoder = getInput().getCoder();
+      KvCoder<K, V> inputKvCoder = (KvCoder<K, V>) inputCoder;
+      Coder<K> keyCoder = inputKvCoder.getKeyCoder();
+      Coder<V> inputValueCoder = inputKvCoder.getValueCoder();
+      Coder<WindowedValue<V>> outputValueCoder = FullWindowedValueCoder.of(
+          inputValueCoder, getInput().getWindowingFn().windowCoder());
+      Coder<KV<K, WindowedValue<V>>> outputKvCoder =
+          KvCoder.of(keyCoder, outputValueCoder);
+      return input.apply(ParDo.of(
+          new DoFn<KV<K, V>, KV<K, WindowedValue<V>>>() {
+            @Override
+            public void processElement(ProcessContext c) {
+              KV<K, V> kv = c.element();
+              K key = kv.getKey();
+              V value = kv.getValue();
+              c.output(KV.of(
+                  key,
+                  WindowedValue.of(value, c.timestamp(), c.windows())));
+            }}))
+          .setCoder(outputKvCoder);
+    }
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Helper transform that sorts the values associated with each key
+   * by timestamp.
+   */
+  public static class SortValuesByTimestamp<K, V>
+      extends PTransform<PCollection<KV<K, Iterable<WindowedValue<V>>>>,
+                         PCollection<KV<K, Iterable<WindowedValue<V>>>>> {
+    @Override
+    public PCollection<KV<K, Iterable<WindowedValue<V>>>> apply(
+        PCollection<KV<K, Iterable<WindowedValue<V>>>> input) {
+      return input.apply(ParDo.of(
+          new DoFn<KV<K, Iterable<WindowedValue<V>>>,
+                   KV<K, Iterable<WindowedValue<V>>>>() {
+            @Override
+            public void processElement(ProcessContext c) {
+              KV<K, Iterable<WindowedValue<V>>> kvs = c.element();
+              K key = kvs.getKey();
+              Iterable<WindowedValue<V>> unsortedValues = kvs.getValue();
+              List<WindowedValue<V>> sortedValues = new ArrayList<>();
+              for (WindowedValue<V> value : unsortedValues) {
+                sortedValues.add(value);
+              }
+              Collections.sort(sortedValues,
+                               new Comparator<WindowedValue<V>>() {
+                  @Override
+                  public int compare(WindowedValue<V> e1, WindowedValue<V> e2) {
+                    return e1.getTimestamp().compareTo(e2.getTimestamp());
+                  }
+                });
+              c.output(KV.<K, Iterable<WindowedValue<V>>>of(key, sortedValues));
+            }}))
+          .setCoder(getInput().getCoder());
+    }
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Helper transform that takes a collection of timestamp-ordered
+   * values associated with each key, groups the values by window,
+   * combines windows as needed, and for each window in each key,
+   * outputs a collection of key/value-list pairs implicitly assigned
+   * to the window and with the timestamp derived from that window.
+   */
+  public static class GroupAlsoByWindow<K, V>
+      extends PTransform<PCollection<KV<K, Iterable<WindowedValue<V>>>>,
+                         PCollection<KV<K, Iterable<V>>>> {
+    private final WindowingFn windowingFn;
+
+    public GroupAlsoByWindow(WindowingFn windowingFn) {
+      this.windowingFn = windowingFn;
+    }
+
+    @Override
+    public PCollection<KV<K, Iterable<V>>> apply(
+        PCollection<KV<K, Iterable<WindowedValue<V>>>> input) {
+      Coder<KV<K, Iterable<WindowedValue<V>>>> inputCoder =
+          getInput().getCoder();
+      KvCoder<K, Iterable<WindowedValue<V>>> inputKvCoder =
+          (KvCoder<K, Iterable<WindowedValue<V>>>) inputCoder;
+      Coder<K> keyCoder = inputKvCoder.getKeyCoder();
+      Coder<Iterable<WindowedValue<V>>> inputValueCoder =
+          inputKvCoder.getValueCoder();
+      IterableCoder<WindowedValue<V>> inputIterableValueCoder =
+          (IterableCoder<WindowedValue<V>>) inputValueCoder;
+      Coder<WindowedValue<V>> inputIterableElementCoder =
+          inputIterableValueCoder.getElemCoder();
+      WindowedValueCoder<V> inputIterableWindowedValueCoder =
+          (WindowedValueCoder<V>) inputIterableElementCoder;
+      Coder<V> inputIterableElementValueCoder =
+          inputIterableWindowedValueCoder.getValueCoder();
+      Coder<Iterable<V>> outputValueCoder =
+          IterableCoder.of(inputIterableElementValueCoder);
+      Coder<KV<K, Iterable<V>>> outputKvCoder =
+          KvCoder.of(keyCoder, outputValueCoder);
+
+      return input.apply(ParDo.of(
+          new GroupAlsoByWindowsDoFn<K, V, BoundedWindow>(
+              windowingFn, inputIterableElementValueCoder)))
+          .setCoder(outputKvCoder);
+    }
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Primitive helper transform that groups by key only, ignoring any
+   * window assignments.
+   */
+  public static class GroupByKeyOnly<K, V>
+      extends PTransform<PCollection<KV<K, V>>,
+                         PCollection<KV<K, Iterable<V>>>> {
+    // TODO: Define and implement sorting by value.
+    boolean sortsValues = false;
+
+    public GroupByKeyOnly() { }
+
+    @Override
+    public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
+      WindowingFn windowingFn = getInput().getWindowingFn();
+      if (!(windowingFn instanceof NonMergingWindowingFn)) {
+        // Prevent merging windows again, without explicit user
+        // involvement, e.g., by Window.into() or Window.remerge().
+        windowingFn = new InvalidWindowingFn(
+            "WindowingFn has already been consumed by previous GroupByKey",
+            windowingFn);
+      }
+      return PCollection.<KV<K, Iterable<V>>>createPrimitiveOutputInternal(
+          windowingFn);
+    }
+
+    @Override
+    public void finishSpecifying() {
+      // Verify that the input Coder<KV<K, V>> is a KvCoder<K, V>, and that
+      // the key coder is deterministic.
+      Coder<K> keyCoder = getKeyCoder();
+      if (!keyCoder.isDeterministic()) {
+        throw new IllegalStateException(
+            "the key Coder must be deterministic for grouping");
+      }
+      if (getOutput().isOrdered()) {
+        throw new IllegalStateException(
+            "the result of a GroupByKey cannot be specified to be ordered");
+      }
+      super.finishSpecifying();
+    }
+
+    /**
+     * Returns the {@code Coder} of the input to this transform, which
+     * should be a {@code KvCoder}.
+     */
+    KvCoder<K, V> getInputKvCoder() {
+      Coder<KV<K, V>> inputCoder = getInput().getCoder();
+      if (!(inputCoder instanceof KvCoder)) {
+        throw new IllegalStateException(
+            "GroupByKey requires its input to use KvCoder");
+      }
+      return (KvCoder<K, V>) inputCoder;
+    }
+
+    /**
+     * Returns the {@code Coder} of the keys of the input to this
+     * transform, which is also used as the {@code Coder} of the keys of
+     * the output of this transform.
+     */
+    Coder<K> getKeyCoder() {
+      return getInputKvCoder().getKeyCoder();
+    }
+
+    /**
+     * Returns the {@code Coder} of the values of the input to this transform.
+     */
+    Coder<V> getInputValueCoder() {
+      return getInputKvCoder().getValueCoder();
+    }
+
+    /**
+     * Returns the {@code Coder} of the {@code Iterable} values of the
+     * output of this transform.
+     */
+    Coder<Iterable<V>> getOutputValueCoder() {
+      return IterableCoder.of(getInputValueCoder());
+    }
+
+    /**
+     * Returns the {@code Coder} of the output of this transform.
+     */
+    KvCoder<K, Iterable<V>> getOutputKvCoder() {
+      return KvCoder.of(getKeyCoder(), getOutputValueCoder());
+    }
+
+    @Override
+    protected Coder<KV<K, Iterable<V>>> getDefaultOutputCoder() {
+      return getOutputKvCoder();
+    }
+
+    /**
+     * Returns whether this GBK sorts values.
+     */
+    boolean sortsValues() {
+      return sortsValues;
+    }
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  static {
+    DirectPipelineRunner.registerDefaultTransformEvaluator(
+        GroupByKeyOnly.class,
+        new DirectPipelineRunner.TransformEvaluator<GroupByKeyOnly>() {
+          @Override
+          public void evaluate(
+              GroupByKeyOnly transform,
+              DirectPipelineRunner.EvaluationContext context) {
+            evaluateHelper(transform, context);
+          }
+        });
+  }
+
+  private static <K, V> void evaluateHelper(
+      GroupByKeyOnly<K, V> transform,
+      DirectPipelineRunner.EvaluationContext context) {
+    PCollection<KV<K, V>> input = transform.getInput();
+
+    List<ValueWithMetadata<KV<K, V>>> inputElems =
+        context.getPCollectionValuesWithMetadata(input);
+
+    Coder<K> keyCoder = transform.getKeyCoder();
+
+    Map<GroupingKey<K>, List<V>> groupingMap = new HashMap<>();
+
+    for (ValueWithMetadata<KV<K, V>> elem : inputElems) {
+      K key = elem.getValue().getKey();
+      V value = elem.getValue().getValue();
+      Instant timestamp = elem.getTimestamp();
+      byte[] encodedKey;
+      try {
+        encodedKey = encodeToByteArray(keyCoder, key);
+      } catch (CoderException exn) {
+        // TODO: Put in better element printing:
+        // truncate if too long.
+        throw new IllegalArgumentException(
+            "unable to encode key " + key + " of input to " + transform +
+            " using " + keyCoder,
+            exn);
+      }
+      GroupingKey<K> groupingKey = new GroupingKey<>(key, encodedKey);
+      List<V> values = groupingMap.get(groupingKey);
+      if (values == null) {
+        values = new ArrayList<V>();
+        groupingMap.put(groupingKey, values);
+      }
+      values.add(value);
+    }
+
+    List<ValueWithMetadata<KV<K, Iterable<V>>>> outputElems =
+        new ArrayList<>();
+    for (Map.Entry<GroupingKey<K>, List<V>> entry : groupingMap.entrySet()) {
+      GroupingKey<K> groupingKey = entry.getKey();
+      K key = groupingKey.getKey();
+      List<V> values = entry.getValue();
+      values = context.randomizeIfUnordered(
+          transform.sortsValues(), values, true /* inPlaceAllowed */);
+      outputElems.add(ValueWithMetadata
+                      .of(WindowedValue.valueInEmptyWindows(KV.<K, Iterable<V>>of(key, values)))
+                      .withKey(key));
+    }
+
+    context.setPCollectionValuesWithMetadata(transform.getOutput(),
+                                             outputElems);
+  }
+
+  public PCollection<KV<K, Iterable<V>>> applyHelper(
+      PCollection<KV<K, V>> input, boolean isStreaming, boolean runnerSortsByTimestamp) {
+    Coder<KV<K, V>> inputCoder = getInput().getCoder();
+    if (!(inputCoder instanceof KvCoder)) {
+      throw new IllegalStateException(
+          "GroupByKey requires its input to use KvCoder");
+    }
+    // This operation groups by the combination of key and window,
+    // merging windows as needed, using the windows assigned to the
+    // key/value input elements and the window merge operation of the
+    // windowing function associated with the input PCollection.
+    WindowingFn windowingFn = getInput().getWindowingFn();
+    if (windowingFn instanceof InvalidWindowingFn) {
+      String cause = ((InvalidWindowingFn) windowingFn).getCause();
+      throw new IllegalStateException(
+          "GroupByKey must have a valid Window merge function.  "
+          + "Invalid because: " + cause);
+    }
+    if (windowingFn.isCompatible(new GlobalWindow())) {
+      // The input PCollection is using the degenerate default
+      // windowing function, which uses a single global window for all
+      // elements.  We can implement this using a more-primitive
+      // non-window-aware GBK transform.
+      return input.apply(new GroupByKeyOnly<K, V>());
+
+    } else if (isStreaming) {
+      // If using the streaming runner, the service will do the insertion of
+      // the GroupAlsoByWindow step.
+      // TODO: Remove this case once the Dataflow Runner handles GBK directly
+      return input.apply(new GroupByKeyOnly<K, V>());
+
+    } else {
+      // By default, implement GroupByKey[AndWindow] via a series of lower-level
+      // operations.
+      PCollection<KV<K, Iterable<WindowedValue<V>>>> gbkOutput = input
+          // Make each input element's timestamp and assigned windows
+          // explicit, in the value part.
+          .apply(new ReifyTimestampsAndWindows<K, V>())
+
+          // Group by just the key.
+          .apply(new GroupByKeyOnly<K, WindowedValue<V>>());
+
+      if (!runnerSortsByTimestamp) {
+        // Sort each key's values by timestamp.  GroupAlsoByWindow requires
+        // its input to be sorted by timestamp.
+        gbkOutput = gbkOutput.apply(new SortValuesByTimestamp<K, V>());
+      }
+
+      return gbkOutput
+          // Group each key's values by window, merging windows as needed.
+          .apply(new GroupAlsoByWindow<K, V>(windowingFn));
+    }
+  }
+
+  private static class GroupingKey<K> {
+    private K key;
+    private byte[] encodedKey;
+
+    public GroupingKey(K key, byte[] encodedKey) {
+      this.key = key;
+      this.encodedKey = encodedKey;
+    }
+
+    public K getKey() { return key; }
+
+    @Override
+    public boolean equals(Object o) {
+      if (o instanceof GroupingKey) {
+        GroupingKey<?> that = (GroupingKey<?>) o;
+        return Arrays.equals(this.encodedKey, that.encodedKey);
+      } else {
+        return false;
+      }
+    }
+
+    @Override
+    public int hashCode() { return Arrays.hashCode(encodedKey); }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java
new file mode 100644
index 0000000000000..08a801b15ec2d
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+/**
+ * {@code Keys<K>} takes a {@code PCollection} of {@code KV<K, V>}s and
+ * returns a {@code PCollection<K>} of the keys.
+ *
+ * <p> Example of use:
+ * <pre> {@code
+ * PCollection<KV<String, Long>> wordCounts = ...;
+ * PCollection<String> words = wordCounts.apply(Keys.<String>create());
+ * } </pre>
+ *
+ * <p> Each output element has the same timestamp and is in the same windows
+ * as its corresponding input element, and the output {@code PCollection}
+ * has the same
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+ * associated with it as the input.
+ *
+ * <p> See also {@link Values}.
+ *
+ * @param <K> the type of the keys in the input {@code PCollection},
+ * and the type of the elements in the output {@code PCollection}
+ */
+public class Keys<K> extends PTransform<PCollection<? extends KV<K, ?>>,
+                                        PCollection<K>> {
+  /**
+   * Returns a {@code Keys<K>} {@code PTransform}.
+   *
+   * @param <K> the type of the keys in the input {@code PCollection},
+   * and the type of the elements in the output {@code PCollection}
+   */
+  public static <K> Keys<K> create() {
+    return new Keys<>();
+  }
+
+  private Keys() { }
+
+  @Override
+  public PCollection<K> apply(PCollection<? extends KV<K, ?>> in) {
+    return
+        in.apply(ParDo.named("Keys")
+                 .of(new DoFn<KV<K, ?>, K>() {
+                     @Override
+                     public void processElement(ProcessContext c) {
+                       c.output(c.element().getKey());
+                     }
+                    }));
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
new file mode 100644
index 0000000000000..ee73ae4087f5d
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+/**
+ * {@code KvSwap<A, B>} takes a {@code PCollection<KV<A, B>>} and
+ * returns a {@code PCollection<KV<B, A>>}, where all the keys and
+ * values have been swapped.
+ *
+ * <p> Example of use:
+ * <pre> {@code
+ * PCollection<String, Long> wordsToCounts = ...;
+ * PCollection<Long, String> countsToWords =
+ *     wordToCounts.apply(KvSwap.<String, Long>create());
+ * } </pre>
+ *
+ * <p> Each output element has the same timestamp and is in the same windows
+ * as its corresponding input element, and the output {@code PCollection}
+ * has the same
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+ * associated with it as the input.
+ *
+ * @param <A> the type of the keys in the input {@code PCollection}
+ * and the values in the output {@code PCollection}
+ * @param <B> the type of the values in the input {@code PCollection}
+ * and the keys in the output {@code PCollection}
+ */
+public class KvSwap<A, B> extends PTransform<PCollection<KV<A, B>>,
+                                             PCollection<KV<B, A>>> {
+  /**
+   * Returns a {@code KvSwap<A, B>} {@code PTransform}.
+   *
+   * @param <A> the type of the keys in the input {@code PCollection}
+   * and the values in the output {@code PCollection}
+   * @param <B> the type of the values in the input {@code PCollection}
+   * and the keys in the output {@code PCollection}
+   */
+  public static <A, B> KvSwap<A, B> create() {
+    return new KvSwap<>();
+  }
+
+  private KvSwap() { }
+
+  @Override
+  public PCollection<KV<B, A>> apply(PCollection<KV<A, B>> in) {
+    return
+        in.apply(ParDo.named("KvSwap")
+                 .of(new DoFn<KV<A, B>, KV<B, A>>() {
+                     @Override
+                     public void processElement(ProcessContext c) {
+                       KV<A, B> e = c.element();
+                       c.output(KV.of(e.getValue(), e.getKey()));
+                     }
+                    }));
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
new file mode 100644
index 0000000000000..fce9a328f1c84
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
@@ -0,0 +1,196 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+/**
+ * {@code PTransform}s for computing the maximum of the elements in a
+ * {@code PCollection}, or the maximum of the values associated with
+ * each key in a {@code PCollection} of {@code KV}s.
+ *
+ * <p> Example 1: get the maximum of a {@code PCollection} of {@code Double}s.
+ * <pre> {@code
+ * PCollection<Double> input = ...;
+ * PCollection<Double> max = input.apply(Max.doublesGlobally());
+ * } </pre>
+ *
+ * <p> Example 2: calculate the maximum of the {@code Integer}s
+ * associated with each unique key (which is of type {@code String}).
+ * <pre> {@code
+ * PCollection<KV<String, Integer>> input = ...;
+ * PCollection<KV<String, Integer>> maxPerKey = input
+ *     .apply(Max.<String>integersPerKey());
+ * } </pre>
+ */
+public class Max {
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<Integer>} and returns a
+   * {@code PCollection<Integer>} whose contents is the maximum of the
+   * input {@code PCollection}'s elements, or
+   * {@code Integer.MIN_VALUE} if there are no elements.
+   */
+  public static Combine.Globally<Integer, Integer> integersGlobally() {
+    Combine.Globally<Integer, Integer> combine = Combine
+        .globally(new MaxIntegerFn());
+    combine.setName("Max");
+    return combine;
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<KV<K, Integer>>} and returns a
+   * {@code PCollection<KV<K, Integer>>} that contains an output
+   * element mapping each distinct key in the input
+   * {@code PCollection} to the maximum of the values associated with
+   * that key in the input {@code PCollection}.
+   *
+   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   */
+  public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() {
+    Combine.PerKey<K, Integer, Integer> combine = Combine
+        .perKey(new MaxIntegerFn());
+    combine.setName("Max.PerKey");
+    return combine;
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<Long>} and returns a
+   * {@code PCollection<Long>} whose contents is the maximum of the
+   * input {@code PCollection}'s elements, or
+   * {@code Long.MIN_VALUE} if there are no elements.
+   */
+  public static Combine.Globally<Long, Long> longsGlobally() {
+    Combine.Globally<Long, Long> combine = Combine.globally(new MaxLongFn());
+    combine.setName("Max");
+    return combine;
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<KV<K, Long>>} and returns a
+   * {@code PCollection<KV<K, Long>>} that contains an output
+   * element mapping each distinct key in the input
+   * {@code PCollection} to the maximum of the values associated with
+   * that key in the input {@code PCollection}.
+   *
+   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   */
+  public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
+    Combine.PerKey<K, Long, Long> combine = Combine
+        .perKey(new MaxLongFn());
+    combine.setName("Max.PerKey");
+    return combine;
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<Double>} and returns a
+   * {@code PCollection<Double>} whose contents is the maximum of the
+   * input {@code PCollection}'s elements, or
+   * {@code Double.MIN_VALUE} if there are no elements.
+   */
+  public static Combine.Globally<Double, Double> doublesGlobally() {
+    Combine.Globally<Double, Double> combine = Combine
+        .globally(new MaxDoubleFn());
+    combine.setName("Max");
+    return combine;
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<KV<K, Double>>} and returns a
+   * {@code PCollection<KV<K, Double>>} that contains an output
+   * element mapping each distinct key in the input
+   * {@code PCollection} to the maximum of the values associated with
+   * that key in the input {@code PCollection}.
+   *
+   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   */
+  public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
+    Combine.PerKey<K, Double, Double> combine = Combine
+        .perKey(new MaxDoubleFn());
+    combine.setName("Max.PerKey");
+    return combine;
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * A {@code SerializableFunction} that computes the maximum of an
+   * {@code Iterable} of numbers of type {@code N}, useful as an
+   * argument to {@link Combine#globally} or {@link Combine#perKey}.
+   *
+   * @param <N> the type of the {@code Number}s being compared
+   */
+  public static class MaxFn<N extends Number & Comparable<N>>
+      implements SerializableFunction<Iterable<N>, N> {
+
+    /** The smallest value of type N. */
+    private final N initialValue;
+
+    /**
+     * Constructs a combining function that computes the maximum over
+     * a collection of values of type {@code N}, given the smallest
+     * value of type {@code N}, which is the identity value for the
+     * maximum operation over {@code N}s.
+     */
+    public MaxFn(N initialValue) {
+      this.initialValue = initialValue;
+    }
+
+    @Override
+    public N apply(Iterable<N> input) {
+      N max = initialValue;
+      for (N value : input) {
+        if (value.compareTo(max) > 0) {
+          max = value;
+        }
+      }
+      return max;
+    }
+  }
+
+  /**
+   * A {@code SerializableFunction} that computes the maximum of an
+   * {@code Iterable} of {@code Integer}s, useful as an argument to
+   * {@link Combine#globally} or {@link Combine#perKey}.
+   */
+  public static class MaxIntegerFn extends MaxFn<Integer> {
+    public MaxIntegerFn() { super(Integer.MIN_VALUE); }
+  }
+
+  /**
+   * A {@code SerializableFunction} that computes the maximum of an
+   * {@code Iterable} of {@code Long}s, useful as an argument to
+   * {@link Combine#globally} or {@link Combine#perKey}.
+   */
+  public static class MaxLongFn extends MaxFn<Long> {
+    public MaxLongFn() { super(Long.MIN_VALUE); }
+  }
+
+  /**
+   * A {@code SerializableFunction} that computes the maximum of an
+   * {@code Iterable} of {@code Double}s, useful as an argument to
+   * {@link Combine#globally} or {@link Combine#perKey}.
+   */
+  public static class MaxDoubleFn extends MaxFn<Double> {
+    public MaxDoubleFn() { super(Double.MIN_VALUE); }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
new file mode 100644
index 0000000000000..34fbb1fc29088
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
+
+/**
+ * {@code PTransform}s for computing the arithmetic mean
+ * (a.k.a. average) of the elements in a {@code PCollection}, or the
+ * mean of the values associated with each key in a
+ * {@code PCollection} of {@code KV}s.
+ *
+ * <p> Example 1: get the mean of a {@code PCollection} of {@code Long}s.
+ * <pre> {@code
+ * PCollection<Long> input = ...;
+ * PCollection<Double> mean = input.apply(Mean.<Long>globally());
+ * } </pre>
+ *
+ * <p> Example 2: calculate the mean of the {@code Integer}s
+ * associated with each unique key (which is of type {@code String}).
+ * <pre> {@code
+ * PCollection<KV<String, Integer>> input = ...;
+ * PCollection<KV<String, Double>> meanPerKey =
+ *     input.apply(Mean.<String, Integer>perKey());
+ * } </pre>
+ */
+public class Mean {
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<N>} and returns a
+   * {@code PCollection<Double>} whose contents is the mean of the
+   * input {@code PCollection}'s elements, or
+   * {@code 0} if there are no elements.
+   *
+   * @param <N> the type of the {@code Number}s being combined
+   */
+  public static <N extends Number> Combine.Globally<N, Double> globally() {
+    Combine.Globally<N, Double> combine = Combine.globally(new MeanFn<>());
+    combine.setName("Mean");
+    return combine;
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<KV<K, N>>} and returns a
+   * {@code PCollection<KV<K, Double>>} that contains an output
+   * element mapping each distinct key in the input
+   * {@code PCollection} to the mean of the values associated with
+   * that key in the input {@code PCollection}.
+   *
+   * See {@link Combine.PerKey} for how this affects timestamps and bucketing.
+   *
+   * @param <K> the type of the keys
+   * @param <N> the type of the {@code Number}s being combined
+   */
+  public static <K, N extends Number> Combine.PerKey<K, N, Double> perKey() {
+    Combine.PerKey<K, N, Double> combine = Combine.perKey(new MeanFn<>());
+    combine.setName("Mean.PerKey");
+    return combine;
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * A {@code Combine.CombineFn} that computes the arithmetic mean
+   * (a.k.a. average) of an {@code Iterable} of numbers of type
+   * {@code N}, useful as an argument to {@link Combine#globally} or
+   * {@link Combine#perKey}.
+   *
+   * <p> Returns {@code 0} if combining zero elements.
+   *
+   * @param <N> the type of the {@code Number}s being combined
+   */
+  public static class MeanFn<N extends Number> extends
+    Combine.AccumulatingCombineFn<N, MeanFn<N>.CountSum, Double> {
+
+    /**
+     * Constructs a combining function that computes the mean over
+     * a collection of values of type {@code N}.
+     */
+    public MeanFn() {}
+
+    /**
+     * Accumulator helper class for MeanFn.
+     */
+    class CountSum
+        extends Combine.AccumulatingCombineFn<N, CountSum, Double>.Accumulator {
+
+      long count = 0;
+      double sum = 0.0;
+
+      @Override
+      public void addInput(N element) {
+        count++;
+        sum += element.doubleValue();
+      }
+
+      @Override
+      public void mergeAccumulator(CountSum accumulator) {
+        count += accumulator.count;
+        sum += accumulator.sum;
+      }
+
+      @Override
+      public Double extractOutput() {
+        return count == 0 ? 0.0 : sum / count;
+      }
+    }
+
+    @Override
+    public CountSum createAccumulator() {
+      return new CountSum();
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public Coder<CountSum> getAccumulatorCoder(
+        CoderRegistry registry, Coder<N> inputCoder) {
+      // The casts are needed because CountSum.class is a
+      // Class<MeanFn.CountSum>, but we need a
+      // Class<MeanFn<N>.CountSum>.
+      return SerializableCoder.of((Class<CountSum>) (Class) CountSum.class);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
new file mode 100644
index 0000000000000..337a051160973
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
@@ -0,0 +1,196 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+/**
+ * {@code PTransform}s for computing the minimum of the elements in a
+ * {@code PCollection}, or the minimum of the values associated with
+ * each key in a {@code PCollection} of {@code KV}s.
+ *
+ * <p> Example 1: get the minimum of a {@code PCollection} of {@code Double}s.
+ * <pre> {@code
+ * PCollection<Double> input = ...;
+ * PCollection<Double> min = input.apply(Min.doublesGlobally());
+ * } </pre>
+ *
+ * <p> Example 2: calculate the minimum of the {@code Integer}s
+ * associated with each unique key (which is of type {@code String}).
+ * <pre> {@code
+ * PCollection<KV<String, Integer>> input = ...;
+ * PCollection<KV<String, Integer>> minPerKey = input
+ *     .apply(Min.<String>integersPerKey());
+ * } </pre>
+ */
+public class Min {
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<Integer>} and returns a
+   * {@code PCollection<Integer>} whose contents is a single value that is
+   * the minimum of the input {@code PCollection}'s elements, or
+   * {@code Integer.MAX_VALUE} if there are no elements.
+   */
+  public static Combine.Globally<Integer, Integer> integersGlobally() {
+    Combine.Globally<Integer, Integer> combine = Combine
+        .globally(new MinIntegerFn());
+    combine.setName("Min");
+    return combine;
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<KV<K, Integer>>} and returns a
+   * {@code PCollection<KV<K, Integer>>} that contains an output
+   * element mapping each distinct key in the input
+   * {@code PCollection} to the minimum of the values associated with
+   * that key in the input {@code PCollection}.
+   *
+   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   */
+  public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() {
+    Combine.PerKey<K, Integer, Integer> combine = Combine
+        .perKey(new MinIntegerFn());
+    combine.setName("Min.PerKey");
+    return combine;
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<Long>} and returns a
+   * {@code PCollection<Long>} whose contents is the minimum of the
+   * input {@code PCollection}'s elements, or
+   * {@code Long.MAX_VALUE} if there are no elements.
+   */
+  public static Combine.Globally<Long, Long> longsGlobally() {
+    Combine.Globally<Long, Long> combine = Combine.globally(new MinLongFn());
+    combine.setName("Min");
+    return combine;
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<KV<K, Long>>} and returns a
+   * {@code PCollection<KV<K, Long>>} that contains an output
+   * element mapping each distinct key in the input
+   * {@code PCollection} to the minimum of the values associated with
+   * that key in the input {@code PCollection}.
+   *
+   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   */
+  public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
+    Combine.PerKey<K, Long, Long> combine = Combine
+        .perKey(new MinLongFn());
+    combine.setName("Min.PerKey");
+    return combine;
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<Double>} and returns a
+   * {@code PCollection<Double>} whose contents is the minimum of the
+   * input {@code PCollection}'s elements, or
+   * {@code Double.MAX_VALUE} if there are no elements.
+   */
+  public static Combine.Globally<Double, Double> doublesGlobally() {
+    Combine.Globally<Double, Double> combine = Combine
+        .globally(new MinDoubleFn());
+    combine.setName("Min");
+    return combine;
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<KV<K, Double>>} and returns a
+   * {@code PCollection<KV<K, Double>>} that contains an output
+   * element mapping each distinct key in the input
+   * {@code PCollection} to the minimum of the values associated with
+   * that key in the input {@code PCollection}.
+   *
+   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   */
+  public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
+    Combine.PerKey<K, Double, Double> combine = Combine
+        .perKey(new MinDoubleFn());
+    combine.setName("Min.PerKey");
+    return combine;
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * A {@code SerializableFunction} that computes the minimum of an
+   * {@code Iterable} of numbers of type {@code N}, useful as an
+   * argument to {@link Combine#globally} or {@link Combine#perKey}.
+   *
+   * @param <N> the type of the {@code Number}s being compared
+   */
+  public static class MinFn<N extends Number & Comparable<N>>
+      implements SerializableFunction<Iterable<N>, N> {
+
+    /** The largest value of type N. */
+    private final N initialValue;
+
+    /**
+     * Constructs a combining function that computes the minimum over
+     * a collection of values of type {@code N}, given the largest
+     * value of type {@code N}, which is the identity value for the
+     * minimum operation over {@code N}s.
+     */
+    public MinFn(N initialValue) {
+      this.initialValue = initialValue;
+    }
+
+    @Override
+    public N apply(Iterable<N> input) {
+      N min = initialValue;
+      for (N value : input) {
+        if (value.compareTo(min) < 0) {
+          min = value;
+        }
+      }
+      return min;
+    }
+  }
+
+  /**
+   * A {@code SerializableFunction} that computes the minimum of an
+   * {@code Iterable} of {@code Integer}s, useful as an argument to
+   * {@link Combine#globally} or {@link Combine#perKey}.
+   */
+  public static class MinIntegerFn extends MinFn<Integer> {
+    public MinIntegerFn() { super(Integer.MAX_VALUE); }
+  }
+
+  /**
+   * A {@code SerializableFunction} that computes the minimum of an
+   * {@code Iterable} of {@code Long}s, useful as an argument to
+   * {@link Combine#globally} or {@link Combine#perKey}.
+   */
+  public static class MinLongFn extends MinFn<Long> {
+    public MinLongFn() { super(Long.MAX_VALUE); }
+  }
+
+  /**
+   * A {@code SerializableFunction} that computes the minimum of an
+   * {@code Iterable} of {@code Double}s, useful as an argument to
+   * {@link Combine#globally} or {@link Combine#perKey}.
+   */
+  public static class MinDoubleFn extends MinFn<Double> {
+    public MinDoubleFn() { super(Double.MAX_VALUE); }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
new file mode 100644
index 0000000000000..5906d7212dba0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
@@ -0,0 +1,400 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.util.StringUtils;
+import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.cloud.dataflow.sdk.values.POutput;
+import com.google.cloud.dataflow.sdk.values.TypedPValue;
+
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+
+/**
+ * A {@code PTransform<Input, Output>} is an operation that takes an
+ * {@code Input} (some subtype of {@link PInput}) and produces an
+ * {@code Output} (some subtype of {@link POutput}).
+ *
+ * <p> Common PTransforms include root PTransforms like
+ * {@link com.google.cloud.dataflow.sdk.io.TextIO.Read},
+ * {@link Create}, processing and
+ * conversion operations like {@link ParDo},
+ * {@link GroupByKey},
+ * {@link com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey},
+ * {@link Combine}, and {@link Count}, and outputting
+ * PTransforms like
+ * {@link com.google.cloud.dataflow.sdk.io.TextIO.Write}.  Users also
+ * define their own application-specific composite PTransforms.
+ *
+ * <p> Each {@code PTransform<Input, Output>} has a single
+ * {@code Input} type and a single {@code Output} type.  Many
+ * PTransforms conceptually transform one input value to one output
+ * value, and in this case {@code Input} and {@code Output} are
+ * typically instances of
+ * {@link com.google.cloud.dataflow.sdk.values.PCollection}.
+ * A root
+ * PTransform conceptually has no input; in this case, conventionally
+ * a {@link com.google.cloud.dataflow.sdk.values.PBegin} object
+ * produced by calling {@link Pipeline#begin} is used as the input.
+ * An outputting PTransform conceptually has no output; in this case,
+ * conventionally {@link com.google.cloud.dataflow.sdk.values.PDone}
+ * is used as its output type.  Some PTransforms conceptually have
+ * multiple inputs and/or outputs; in these cases special "bundling"
+ * classes like
+ * {@link com.google.cloud.dataflow.sdk.values.PCollectionList},
+ * {@link com.google.cloud.dataflow.sdk.values.PCollectionTuple}
+ * are used
+ * to combine multiple values into a single bundle for passing into or
+ * returning from the PTransform.
+ *
+ * <p> A {@code PTransform<Input, Output>} is invoked by calling
+ * {@code apply()} on its {@code Input}, returning its {@code Output}.
+ * Calls can be chained to concisely create linear pipeline segments.
+ * For example:
+ *
+ * <pre> {@code
+ * PCollection<T1> pc1 = ...;
+ * PCollection<T2> pc2 =
+ *     pc1.apply(ParDo.of(new MyDoFn<T1,KV<K,V>>()))
+ *        .apply(GroupByKey.<K, V>create())
+ *        .apply(Combine.perKey(new MyKeyedCombineFn<K,V>()))
+ *        .apply(ParDo.of(new MyDoFn2<KV<K,V>,T2>()));
+ * } </pre>
+ *
+ * <p> PTransform operations have unique names, which are used by the
+ * system when explaining what's going on during optimization and
+ * execution.  Each PTransform gets a system-provided default name,
+ * but it's a good practice to specify an explicit name, where
+ * possible, using the {@code named()} method offered by some
+ * PTransforms such as {@link ParDo}.  For example:
+ *
+ * <pre> {@code
+ * ...
+ * .apply(ParDo.named("Step1").of(new MyDoFn3()))
+ * ...
+ * } </pre>
+ *
+ * <p> Each PCollection output produced by a PTransform,
+ * either directly or within a "bundling" class, automatically gets
+ * its own name derived from the name of its producing PTransform.  An
+ * output's name can be changed by invoking
+ * {@link com.google.cloud.dataflow.sdk.values.PValue#setName}.
+ *
+ * <p> Each PCollection output produced by a PTransform
+ * also records a {@link com.google.cloud.dataflow.sdk.coders.Coder}
+ * that specifies how the elements of that PCollection
+ * are to be encoded as a byte string, if necessary.  The
+ * PTransform may provide a default Coder for any of its outputs, for
+ * instance by deriving it from the PTransform input's Coder.  If the
+ * PTransform does not specify the Coder for an output PCollection,
+ * the system will attempt to infer a Coder for it, based on
+ * what's known at run-time about the Java type of the output's
+ * elements.  The enclosing {@link Pipeline}'s
+ * {@link com.google.cloud.dataflow.sdk.coders.CoderRegistry}
+ * (accessible via {@link Pipeline#getCoderRegistry}) defines the
+ * mapping from Java types to the default Coder to use, for a standard
+ * set of Java types; users can extend this mapping for additional
+ * types, via
+ * {@link com.google.cloud.dataflow.sdk.coders.CoderRegistry#registerCoder}.
+ * If this inference process fails, either because the Java type was
+ * not known at run-time (e.g., due to Java's "erasure" of generic
+ * types) or there was no default Coder registered, then the Coder
+ * should be specified manually by calling
+ * {@link com.google.cloud.dataflow.sdk.values.TypedPValue#setCoder}
+ * on the output PCollection.  The Coder of every output
+ * PCollection must be determined one way or another
+ * before that output is used as an input to another PTransform, or
+ * before the enclosing Pipeline is run.
+ *
+ * <p> A small number of PTransforms are implemented natively by the
+ * Google Cloud Dataflow SDK; such PTransforms simply return an
+ * output value as their apply implementation.
+ * The majority of PTransforms are
+ * implemented as composites of other PTransforms.  Such a PTransform
+ * subclass typically just implements {@link #apply}, computing its
+ * Output value from its Input value.  User programs are encouraged to
+ * use this mechanism to modularize their own code.  Such composite
+ * abstractions get their own name, and navigating through the
+ * composition hierarchy of PTransforms is supported by the monitoring
+ * interface.  Examples of composite PTransforms can be found in this
+ * directory and in examples.  From the caller's point of view, there
+ * is no distinction between a PTransform implemented natively and one
+ * implemented in terms of other PTransforms; both kinds of PTransform
+ * are invoked in the same way, using {@code apply()}.
+ *
+ * <h3>Note on Serialization</h3>
+ *
+ * {@code PTransform} doesn't actually support serialization, despite
+ * implementing {@code Serializable}.
+ *
+ * <p> {@code PTransform} is marked {@code Serializable} solely
+ * because it is common for an anonymous {@code DoFn},
+ * instance to be created within an
+ * {@code apply()} method of a composite {@code PTransform}.
+ *
+ * <p> Each of those {@code *Fn}s is {@code Serializable}, but
+ * unfortunately its instance state will contain a reference to the
+ * enclosing {@code PTransform} instance, and so attempt to serialize
+ * the {@code PTransform} instance, even though the {@code *Fn}
+ * instance never references anything about the enclosing
+ * {@code PTransform}.
+ *
+ * <p> To allow such anonymous {@code *Fn}s to be written
+ * conveniently, {@code PTransform} is marked as {@code Serializable},
+ * and includes dummy {@code writeObject()} and {@code readObject()}
+ * operations that do not save or restore any state.
+ *
+ * @see <a href=
+ * "https://cloud.google.com/dataflow/java-sdk/applying-transforms"
+ * >Applying Transformations</a>
+ *
+ * @param <Input> the type of the input to this PTransform
+ * @param <Output> the type of the output of this PTransform
+ */
+public abstract class PTransform<Input extends PInput, Output extends POutput>
+    implements Serializable /* See the note above */ {
+
+  /**
+   * Applies this {@code PTransform} on the given {@code Input}, and returns its
+   * {@code Output}.
+   *
+   * <p> Composite transforms, which are defined in terms of other transforms,
+   * should return the output of one of the composed transforms.  Non-composite
+   * transforms, which do not apply any transforms internally, should return
+   * a new unbound output and register evaluators (via backend-specific
+   * registration methods).
+   *
+   * <p> The default implementation throws an exception.  A derived class must
+   * either implement apply, or else each runner must supply a custom
+   * implementation via
+   * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner#apply}.
+   */
+  public Output apply(Input input) {
+    throw new IllegalArgumentException(
+        "Runner " + getPipeline().getRunner()
+            + " has not registered an implementation for the required primitive operation "
+            + this);
+  }
+
+  /**
+   * Sets the base name of this {@code PTransform}.
+   */
+  public void setName(String name) {
+    this.name = name;
+  }
+
+  /**
+   * Sets the base name of this {@code PTransform} and returns itself.
+   *
+   * <p> This is a shortcut for calling {@link #setName}, which allows method
+   * chaining.
+   */
+  public PTransform<Input, Output> withName(String name) {
+    setName(name);
+    return this;
+  }
+
+  /**
+   * Returns the transform name.
+   *
+   * <p> This name is provided by the transform creator and is not required to be unique.
+   */
+  public String getName() {
+    return name != null ? name : getDefaultName();
+  }
+
+  /**
+   * Returns the owning {@link Pipeline} of this {@code PTransform}.
+   *
+   * @throws IllegalStateException if the owning {@code Pipeline} hasn't been
+   * set yet
+   */
+  @Deprecated
+  public Pipeline getPipeline() {
+    if (pipeline == null) {
+      throw new IllegalStateException("owning pipeline not set");
+    }
+    return pipeline;
+  }
+
+  /**
+   * Returns the input of this transform.
+   *
+   * @throws IllegalStateException if this PTransform hasn't been applied yet
+   * @deprecated Use pipeline.getInput(transform)
+   */
+  @Deprecated
+  public Input getInput() {
+    @SuppressWarnings("unchecked")
+    Input input = (Input) getPipeline().getInput(this);
+    return input;
+  }
+
+  /**
+   * Returns the output of this transform.
+   *
+   * @throws IllegalStateException if this PTransform hasn't been applied yet
+   * #deprecated use pipeline.getOutput(transform)
+   */
+  @Deprecated
+  public Output getOutput() {
+    @SuppressWarnings("unchecked")
+    Output output = (Output) getPipeline().getOutput(this);
+    return output;
+  }
+
+  /**
+   * Returns the {@link CoderRegistry}, useful for inferring
+   * {@link com.google.cloud.dataflow.sdk.coders.Coder}s.
+   *
+   * @throws IllegalStateException if the owning {@link Pipeline} hasn't been
+   * set yet
+   * @deprecated use pipeline.getCoderRegistry()
+   */
+  @Deprecated
+  protected CoderRegistry getCoderRegistry() {
+    return getPipeline().getCoderRegistry();
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  // See the note about about PTransform's fake Serializability, to
+  // understand why all of its instance state is transient.
+
+  /**
+   * The base name of this {@code PTransform}, e.g., from
+   * {@link ParDo#named(String)}, or from defaults, or {@code null} if not
+   * yet assigned.
+   */
+  protected transient String name;
+
+  /**
+   * The {@link Pipeline} that owns this {@code PTransform}, or {@code null}
+   * if not yet set.
+   */
+  private transient Pipeline pipeline;
+
+  protected PTransform() {
+    this.name = null;
+  }
+
+  protected PTransform(String name) {
+    this.name = name;
+  }
+
+  /**
+   * Associates this {@code PTransform} with the given {@code Pipeline}.
+   *
+   * <p> For internal use only.
+   *
+   * @throws IllegalArgumentException if this transform has already
+   * been associated with a pipeline
+   */
+  @Deprecated
+  public void setPipeline(Pipeline pipeline) {
+    if (this.pipeline != null) {
+      throw new IllegalStateException(
+          "internal error: transform already initialized");
+    }
+    this.pipeline = pipeline;
+  }
+
+  @Override
+  public String toString() {
+    return getName() + " [" + getKindString() + "]";
+  }
+
+  /**
+   * Returns the name to use by default for this {@code PTransform}
+   * (not including the names of any enclosing {@code PTransform}s).
+   *
+   * <p> By default, returns {@link #getKindString}.
+   *
+   * <p> The caller is responsible for ensuring that names of applied
+   * {@code PTransform}s are unique, e.g., by adding a uniquifying
+   * suffix when needed.
+   */
+  protected String getDefaultName() {
+    return getKindString();
+  }
+
+  /**
+   * Returns a string describing what kind of {@code PTransform} this is.
+   *
+   * <p> By default, returns the base name of this
+   * {@code PTransform}'s class.
+   */
+  protected String getKindString() {
+    return StringUtils.approximateSimpleName(getClass());
+  }
+
+  private void writeObject(ObjectOutputStream oos) throws IOException {
+    // We don't really want to be serializing this object, but we
+    // often have serializable anonymous DoFns nested within a
+    // PTransform.
+  }
+
+  private void readObject(ObjectInputStream oos)
+      throws IOException, ClassNotFoundException {
+    // We don't really want to be serializing this object, but we
+    // often have serializable anonymous DoFns nested within a
+    // PTransform.
+  }
+
+  /**
+   * After building, finalizes this {@code PTransform} to
+   * make it ready for running.  Called automatically when its
+   * output(s) are finished.
+   *
+   * <p> Not normally called by user code.
+   */
+  public void finishSpecifying() {
+    getOutput().finishSpecifyingOutput();
+  }
+
+  /**
+   * Returns the default {@code Coder} to use for the output of this
+   * single-output {@code PTransform}, or {@code null} if
+   * none can be inferred.
+   *
+   * <p> By default, returns {@code null}.
+   */
+  protected Coder<?> getDefaultOutputCoder() {
+    return null;
+  }
+
+  /**
+   * Returns the default {@code Coder} to use for the given output of
+   * this single-output {@code PTransform}, or {@code null}
+   * if none can be inferred.
+   */
+  public <T> Coder<T> getDefaultOutputCoder(TypedPValue<T> output) {
+    if (output != getOutput()) {
+      return null;
+    } else {
+      @SuppressWarnings("unchecked")
+      Coder<T> defaultOutputCoder = (Coder<T>) getDefaultOutputCoder();
+      return defaultOutputCoder;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
new file mode 100644
index 0000000000000..c7d925b2b418b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -0,0 +1,1054 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.PTuple;
+import com.google.cloud.dataflow.sdk.util.StringUtils;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.cloud.dataflow.sdk.values.TupleTagList;
+import com.google.common.collect.ImmutableList;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * {@code ParDo} is the core element-wise transform in Google Cloud
+ * Dataflow, invoking a user-specified function (from {@code I} to
+ * {@code O}) on each of the elements of the input
+ * {@code PCollection<I>} to produce zero or more output elements, all
+ * of which are collected into the output {@code PCollection<O>}.
+ *
+ * <p> Elements are processed independently, and possibly in parallel across
+ * distributed cloud resources.
+ *
+ * <p> The {@code ParDo} processing style is similar to what happens inside
+ * the "Mapper" or "Reducer" class of a MapReduce-style algorithm.
+ *
+ * <h2>{@code DoFn}s</h2>
+ *
+ * <p> The function to use to process each element is specified by a
+ * {@link DoFn DoFn<I, O>}.
+ *
+ * <p> Conceptually, when a {@code ParDo} transform is executed, the
+ * elements of the input {@code PCollection<I>} are first divided up
+ * into some number of "batches".  These are farmed off to distributed
+ * worker machines (or run locally, if using the
+ * {@link com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner}).
+ * For each batch of input elements, a fresh instance of the argument
+ * {@code DoFn<I, O>} is created on a worker, then the {@code DoFn}'s
+ * optional {@link DoFn#startBundle} method is called to initialize it,
+ * then the {@code DoFn}'s required {@link DoFn#processElement} method
+ * is called on each of the input elements in the batch, then the
+ * {@code DoFn}'s optional {@link DoFn#finishBundle} method is called
+ * to complete its work, and finally the {@code DoFn} instance is
+ * thrown away.  Each of the calls to any of the {@code DoFn}'s
+ * methods can produce zero or more output elements, which are
+ * collected together into a batch of output elements.  All of the
+ * batches of output elements from all of the {@code DoFn} instances
+ * are "flattened" together into the output {@code PCollection<O>}.
+ *
+ * <p> For example:
+ *
+ * <pre> {@code
+ * PCollection<String> lines = ...;
+ * PCollection<String> words =
+ *     lines.apply(ParDo.of(new DoFn<String, String>() {
+ *         public void processElement(ProcessContext c) {
+ *           String line = c.element();
+ *           for (String word : line.split("[^a-zA-Z']+")) {
+ *             c.output(word);
+ *           }
+ *         }}));
+ * PCollection<Integer> wordLengths =
+ *     words.apply(ParDo.of(new DoFn<String, Integer>() {
+ *         public void processElement(ProcessContext c) {
+ *           String word = c.element();
+ *           Integer length = word.length();
+ *           c.output(length);
+ *         }}));
+ * } </pre>
+ *
+ * <p> Each output element has the same timestamp and is in the same windows
+ * as its corresponding input element, and the output {@code PCollection}
+ * has the same
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+ * associated with it as the input.
+ *
+ * <h2>Naming {@code ParDo}s</h2>
+ *
+ * <p> A {@code ParDo} transform can be given a name using
+ * {@link #named}.  While the system will automatically provide a name
+ * if none is specified explicitly, it is still a good practice to
+ * provide an explicit name, since that will probably make monitoring
+ * output more readable.  For example:
+ *
+ * <pre> {@code
+ * PCollection<String> words =
+ *     lines.apply(ParDo.named("ExtractWords")
+ *                      .of(new DoFn<String, String>() { ... }));
+ * PCollection<Integer> wordLengths =
+ *     words.apply(ParDo.named("ComputeWordLengths")
+ *                      .of(new DoFn<String, Integer>() { ... }));
+ * } </pre>
+ *
+ * <h2>Side Inputs</h2>
+ *
+ * <p> While a {@code ParDo} iterates over a single "main input"
+ * {@code PCollection}, it can take additional "side input"
+ * {@code PCollectionView}s. These side input
+ * {@code PCollectionView}s express styles of accessing
+ * {@code PCollection}s computed by earlier pipeline operations,
+ * passed in to the {@code ParDo} transform using
+ * {@link #withSideInputs}, and their contents accessible to each of
+ * the {@code DoFn} operations via {@link DoFn.Context#sideInput}.
+ * For example:
+ *
+ * <pre> {@code
+ * PCollection<String> words = ...;
+ * PCollection<Integer> maxWordLengthCutOff = ...; // Singleton PCollection
+ * final PCollectionView<Integer> maxWordLengthCutOffView =
+ *     SingletonPCollectionView.of(maxWordLengthCutOff);
+ * PCollection<String> wordsBelowCutOff =
+ *     words.apply(ParDo.withSideInput(maxWordLengthCutOffView)
+ *                      .of(new DoFn<String, String>() {
+ *         public void processElement(ProcessContext c) {
+ *           String word = c.element();
+ *           int lengthCutOff = c.sideInput(maxWordLengthCutOffView);
+ *           if (word.length() <= lengthCutOff) {
+ *             c.output(word);
+ *           }
+ *         }}));
+ * } </pre>
+ *
+ * <h2>Side Outputs</h2>
+ *
+ * <p> Optionally, a {@code ParDo} transform can produce multiple
+ * output {@code PCollection}s, both a "main output"
+ * {@code PCollection<O>} plus any number of "side output"
+ * {@code PCollection}s, each keyed by a distinct {@link TupleTag},
+ * and bundled in a {@link PCollectionTuple}.  The {@code TupleTag}s
+ * to be used for the output {@code PCollectionTuple} is specified by
+ * invoking {@link #withOutputTags}.  Unconsumed side outputs does not
+ * necessarily need to be explicity specified, even if the {@code DoFn}
+ * generates them. Within the {@code DoFn}, an element is added to the
+ * main output {@code PCollection} as normal, using
+ * {@link DoFn.Context#output}, while an element is added to a side output
+ * {@code PCollection} using {@link DoFn.Context#sideOutput}.  For example:
+ *
+ * <pre> {@code
+ * PCollection<String> words = ...;
+ * // Select words whose length is below a cut off,
+ * // plus the lengths of words that are above the cut off.
+ * // Also select words starting with "MARKER".
+ * final int wordLengthCutOff = 10;
+ * // Create tags to use for the main and side outputs.
+ * final TupleTag<String> wordsBelowCutOffTag =
+ *     new TupleTag<String>(){};
+ * final TupleTag<Integer> wordLengthsAboveCutOffTag =
+ *     new TupleTag<Integer>(){};
+ * final TupleTag<String> markedWordsTag =
+ *     new TupleTag<String>(){};
+ * PCollectionTuple results =
+ *     words.apply(
+ *         ParDo
+ *         // Specify the main and consumed side output tags of the
+ *         // PCollectionTuple result:
+ *         .withOutputTags(wordsBelowCutOffTag,
+ *                         TupleTagList.of(wordLengthsAboveCutOffTag)
+ *                                     .and(markedWordsTag))
+ *         .of(new DoFn<String, String>() {
+ *             // Create a tag for the unconsumed side output.
+ *             final TupleTag<String> specialWordsTag =
+ *                 new TupleTag<String>(){};
+ *             public void processElement(ProcessContext c) {
+ *               String word = c.element();
+ *               if (word.length() <= wordLengthCutOff) {
+ *                 // Emit this short word to the main output.
+ *                 c.output(word);
+ *               } else {
+ *                 // Emit this long word's length to a side output.
+ *                 c.sideOutput(wordLengthsAboveCutOffTag, word.length());
+ *               }
+ *               if (word.startsWith("MARKER")) {
+ *                 // Emit this word to a different side output.
+ *                 c.sideOutput(markedWordsTag, word);
+ *               }
+ *               if (word.startsWith("SPECIAL")) {
+ *                 // Emit this word to the unconsumed side output.
+ *                 c.sideOutput(specialWordsTag, word);
+ *               }
+ *             }}));
+ * // Extract the PCollection results, by tag.
+ * PCollection<String> wordsBelowCutOff =
+ *     results.get(wordsBelowCutOffTag);
+ * PCollection<Integer> wordLengthsAboveCutOff =
+ *     results.get(wordLengthsAboveCutOffTag);
+ * PCollection<String> markedWords =
+ *     results.get(markedWordsTag);
+ * } </pre>
+ *
+ * <h2>Properties May Be Specified In Any Order</h2>
+ *
+ * Several properties can be specified for a {@code ParDo}
+ * {@code PTransform}, including name, side inputs, side output tags,
+ * and {@code DoFn} to invoke.  Only the {@code DoFn} is required; the
+ * name is encouraged but not required, and side inputs and side
+ * output tags are only specified when they're needed.  These
+ * properties can be specified in any order, as long as they're
+ * specified before the {@code ParDo} {@code PTransform} is applied.
+ *
+ * <p> The approach used to allow these properties to be specified in
+ * any order, with some properties omitted, is to have each of the
+ * property "setter" methods defined as static factory methods on
+ * {@code ParDo} itself, which return an instance of either
+ * {@link ParDo.Unbound ParDo.Unbound} or
+ * {@link ParDo.Bound ParDo.Bound} nested classes, each of which offer
+ * property setter instance methods to enable setting additional
+ * properties.  {@code ParDo.Bound} is used for {@code ParDo}
+ * transforms whose {@code DoFn} is specified and whose input and
+ * output static types have been bound.  {@code ParDo.Unbound} is used
+ * for {@code ParDo} transforms that have not yet had their
+ * {@code DoFn} specified.  Only {@code ParDo.Bound} instances can be
+ * applied.
+ *
+ * <p> Another benefit of this approach is that it reduces the number
+ * of type parameters that need to be specified manually.  In
+ * particular, the input and output types of the {@code ParDo}
+ * {@code PTransform} are inferred automatically from the type
+ * parameters of the {@code DoFn} argument passed to {@link ParDo#of}.
+ *
+ * <h2>Output Coders</h2>
+ *
+ * <p> By default, the {@code Coder} of the
+ * elements of the main output {@code PCollection<O>} is inferred from the
+ * concrete type of the {@code DoFn<I, O>}'s output type {@code O}.
+ *
+ * <p> By default, the {@code Coder} of the elements of a side output
+ * {@code PCollection<X>} is inferred from the concrete type of the
+ * corresponding {@code TupleTag<X>}'s type {@code X}.  To be
+ * successful, the {@code TupleTag} should be created as an instance
+ * of a trivial anonymous subclass, with {@code {}} suffixed to the
+ * constructor call.  Such uses block Java's generic type parameter
+ * inference, so the {@code <X>} argument must be provided explicitly.
+ * For example:
+ * <pre> {@code
+ * // A TupleTag to use for a side input can be written concisely:
+ * final TupleTag<Integer> sideInputTag = new TupleTag<>();
+ * // A TupleTag to use for a side output should be written with "{}",
+ * // and explicit generic parameter type:
+ * final TupleTag<String> sideOutputTag = new TupleTag<String>(){};
+ * } </pre>
+ * This style of {@code TupleTag} instantiation is used in the example of
+ * multiple side outputs, above.
+ *
+ * <h2>Ordered Input and/or Output PCollections</h2>
+ *
+ * <p> If the input {@code PCollection} is ordered (see
+ * {@link PCollection#setOrdered}), then each batch of the input
+ * processed by a {@code DoFn} instance will correspond to a
+ * consecutive subsequence of elements of the input, and the
+ * {@link DoFn#processElement} operation will be invoked on each
+ * element of the batch in order; otherwise, batches will correspond
+ * to arbitrary subsets of elements of the input, processed in
+ * arbitrary order.
+ *
+ * <p> Independently, if a main or side output {@code PCollection} is
+ * ordered, then the order in which elements are output to it will be
+ * preserved in the output {@code PCollection}; otherwise, the order
+ * in which elements are output to the {@code PCollection} doesn't
+ * matter.  If the input {@code PCollection} is also ordered, then the
+ * sequences of elements output from the batches will be concatenated
+ * together in the same order as the batches appear in the input,
+ * supporting order-preserving transforms on {@code PCollection}s.
+ *
+ * <h2>Serializability of {@code DoFn}s</h2>
+ *
+ * <p> A {@code DoFn} passed to a {@code ParDo} transform must be
+ * {@code Serializable}.  This allows the {@code DoFn} instance
+ * created in this "main program" to be sent (in serialized form) to
+ * remote worker machines and reconstituted for each batch of elements
+ * of the input {@code PCollection} being processed.  A {@code DoFn}
+ * can have instance variable state, and non-transient instance
+ * variable state will be serialized in the main program and then
+ * deserialized on remote worker machines for each batch of elements
+ * to process.
+ *
+ * <p> To aid in ensuring that {@code DoFn}s are properly
+ * {@code Serializable}, even local execution using the
+ * {@link DirectPipelineRunner} will serialize and then deserialize
+ * {@code DoFn}s before executing them on a batch.
+ *
+ * <p> {@code DoFn}s expressed as anonymous inner classes can be
+ * convenient, but due to a quirk in Java's rules for serializability,
+ * non-static inner or nested classes (including anonymous inner
+ * classes) automatically capture their enclosing class's instance in
+ * their serialized state.  This can lead to including much more than
+ * intended in the serialized state of a {@code DoFn}, or even things
+ * that aren't {@code Serializable}.
+ *
+ * <p> There are two ways to avoid unintended serialized state in a
+ * {@code DoFn}:
+ *
+ * <ul>
+ *
+ * <li> Define the {@code DoFn} as a named, static class.
+ *
+ * <li> Define the {@code DoFn} as an anonymous inner class inside of
+ * a static method.
+ *
+ * </ul>
+ *
+ * Both these approaches ensure that there is no implicit enclosing
+ * class instance serialized along with the {@code DoFn} instance.
+ *
+ * <p> Prior to Java 8, any local variables of the enclosing
+ * method referenced from within an anonymous inner class need to be
+ * marked as {@code final}.  If defining the {@code DoFn} as a named
+ * static class, such variables would be passed as explicit
+ * constructor arguments and stored in explicit instance variables.
+ *
+ * <p> There are three main ways to initialize the state of a
+ * {@code DoFn} instance processing a batch:
+ *
+ * <ul>
+ *
+ * <li> Define instance variable state (including implicit instance
+ * variables holding final variables captured by an anonymous inner
+ * class), initialized by the {@code DoFn}'s constructor (which is
+ * implicit for an anonymous inner class).  This state will be
+ * automatically serialized and then deserialized in the {@code DoFn}
+ * instance created for each batch.  This method is good for state
+ * known when the original {@code DoFn} is created in the main
+ * program, if it's not overly large.
+ *
+ * <li> Compute the state as a singleton {@code PCollection} and pass it
+ * in as a side input to the {@code DoFn}.  This is good if the state
+ * needs to be computed by the pipeline, or if the state is very large
+ * and so is best read from file(s) rather than sent as part of the
+ * {@code DoFn}'s serialized state.
+ *
+ * <li> Initialize the state in each {@code DoFn} instance, in
+ * {@link DoFn#startBundle}.  This is good if the initialization
+ * doesn't depend on any information known only by the main program or
+ * computed by earlier pipeline operations, but is the same for all
+ * instances of this {@code DoFn} for all program executions, say
+ * setting up empty caches or initializing constant data.
+ *
+ * </ul>
+ *
+ * <h2>No Global Shared State</h2>
+ *
+ * <p> {@code ParDo} operations are intended to be able to run in
+ * parallel across multiple worker machines.  This precludes easy
+ * sharing and updating mutable state across those machines.  There is
+ * no support in the Google Cloud Dataflow system for communicating
+ * and synchronizing updates to shared state across worker machines,
+ * so programs should not access any mutable static variable state in
+ * their {@code DoFn}, without understanding that the Java processes
+ * for the main program and workers will each have its own independent
+ * copy of such state, and there won't be any automatic copying of
+ * that state across Java processes.  All information should be
+ * communicated to {@code DoFn} instances via main and side inputs and
+ * serialized state, and all output should be communicated from a
+ * {@code DoFn} instance via main and side outputs, in the absence of
+ * external communication mechanisms written by user code.
+ *
+ * <h2>Fault Tolerance</h2>
+ *
+ * <p> In a distributed system, things can fail: machines can crash,
+ * machines can be unable to communicate across the network, etc.
+ * While individual failures are rare, the larger the job, the greater
+ * the chance that something, somewhere, will fail.  The Google Cloud
+ * Dataflow service strives to mask such failures automatically,
+ * principally by retrying failed {@code DoFn} batches.  This means
+ * that a {@code DoFn} instance might process a batch partially, then
+ * crash for some reason, then be rerun (often on a different worker
+ * machine) on that same batch and on the same elements as before.
+ * Sometimes two or more {@code DoFn} instances will be running on the
+ * same batch simultaneously, with the system taking the results of
+ * the first instance to complete successfully.  Consequently, the
+ * code in a {@code DoFn} needs to be written such that these
+ * duplicate (sequential or concurrent) executions do not cause
+ * problems.  If the outputs of a {@code DoFn} are a pure function of
+ * its inputs, then this requirement is satisfied.  However, if a
+ * {@code DoFn}'s execution has external side-effects, say performing
+ * updates to external HTTP services, then the {@code DoFn}'s code
+ * needs to take care to ensure that those updates are idempotent and
+ * that concurrent updates are acceptable.  This property can be
+ * difficult to achieve, so it is advisable to strive to keep
+ * {@code DoFn}s as pure functions as much as possible.
+ *
+ * <h2>Optimization</h2>
+ *
+ * <p> The Google Cloud Dataflow service automatically optimizes a
+ * pipeline before it is executed.  A key optimization, <i>fusion</i>,
+ * relates to ParDo operations.  If one ParDo operation produces a
+ * PCollection that is then consumed as the main input of another
+ * ParDo operation, the two ParDo operations will be <i>fused</i>
+ * together into a single ParDo operation and run in a single pass;
+ * this is "producer-consumer fusion".  Similarly, if
+ * two or more ParDo operations have the same PCollection main input,
+ * they will be fused into a single ParDo which makes just one pass
+ * over the input PCollection; this is "sibling fusion".
+ *
+ * <p> If after fusion there are no more unfused references to a
+ * PCollection (e.g., one between a producer ParDo and a consumer
+ * ParDo), the PCollection itself is "fused away" and won't ever be
+ * written to disk, saving all the I/O and space expense of
+ * constructing it.
+ *
+ * <p> The Google Cloud Dataflow service applies fusion as much as
+ * possible, greatly reducing the cost of executing pipelines.  As a
+ * result, it is essentially "free" to write ParDo operations in a
+ * vary modular, composable style, each ParDo operation doing one
+ * clear task, and stringing together sequences of ParDo operations to
+ * get the desired overall effect.  Such programs can be easier to
+ * understand, easier to unit-test, easier to extend and evolve, and
+ * easier to reuse in new programs.  The predefined library of
+ * PTransforms that come with Google Cloud Dataflow makes heavy use of
+ * this modular, composable style, trusting to the Google Cloud
+ * Dataflow service's optimizer to "flatten out" all the compositions
+ * into highly optimized stages.
+ *
+ * @see <a href=
+ * "https://developers.google.com/cloud-dataflow/java-sdk/primitive-transforms#Using-ParDo"
+ * >Using ParDo</a>
+ */
+public class ParDo {
+
+  /**
+   * Creates a {@code ParDo} {@code PTransform} with the given name.
+   *
+   * <p> See the discussion of Naming above for more explanation.
+   *
+   * <p> The resulting {@code PTransform} is incomplete, and its
+   * input/output types are not yet bound.  Use
+   * {@link ParDo.Unbound#of} to specify the {@link DoFn} to
+   * invoke, which will also bind the input/output types of this
+   * {@code PTransform}.
+   */
+  public static Unbound named(String name) {
+    return new Unbound().named(name);
+  }
+
+  /**
+   * Creates a {@code ParDo} {@code PTransform} with the given
+   * side inputs.
+   *
+   * <p> Side inputs are {@link PCollectionView}s, whose contents are
+   * computed during pipeline execution and then made accessible to
+   * {@code DoFn} code via {@link DoFn.Context#sideInput}. Each
+   * invocation of the {@code DoFn} receives the same values for these
+   * side inputs.
+   *
+   * <p> See the discussion of Side Inputs above for more explanation.
+   *
+   * <p> The resulting {@code PTransform} is incomplete, and its
+   * input/output types are not yet bound.  Use
+   * {@link ParDo.Unbound#of} to specify the {@link DoFn} to
+   * invoke, which will also bind the input/output types of this
+   * {@code PTransform}.
+   */
+  public static Unbound withSideInputs(PCollectionView<?, ?>... sideInputs) {
+    return new Unbound().withSideInputs(sideInputs);
+  }
+
+  /**
+    * Creates a {@code ParDo} with the given side inputs.
+    *
+   * <p> Side inputs are {@link PCollectionView}s, whose contents are
+   * computed during pipeline execution and then made accessible to
+   * {@code DoFn} code via {@link DoFn.Context#sideInput}.
+   *
+   * <p> See the discussion of Side Inputs above for more explanation.
+   *
+   * <p> The resulting {@code PTransform} is incomplete, and its
+   * input/output types are not yet bound.  Use
+   * {@link ParDo.Unbound#of} to specify the {@link DoFn} to
+   * invoke, which will also bind the input/output types of this
+   * {@code PTransform}.
+   */
+  public static Unbound withSideInputs(
+      Iterable<? extends PCollectionView<?, ?>> sideInputs) {
+    return new Unbound().withSideInputs(sideInputs);
+  }
+
+  /**
+   * Creates a multi-output {@code ParDo} {@code PTransform} whose
+   * output {@link PCollection}s will be referenced using the given main
+   * output and side output tags.
+   *
+   * <p> {@link TupleTag}s are used to name (with its static element
+   * type {@code T}) each main and side output {@code PCollection<T>}.
+   * This {@code PTransform}'s {@link DoFn} emits elements to the main
+   * output {@code PCollection} as normal, using
+   * {@link DoFn.Context#output}.  The {@code DoFn} emits elements to
+   * a side output {@code PCollection} using
+   * {@link DoFn.Context#sideOutput}, passing that side output's tag
+   * as an argument.  The result of invoking this {@code PTransform}
+   * will be a {@link PCollectionTuple}, and any of the the main and
+   * side output {@code PCollection}s can be retrieved from it via
+   * {@link PCollectionTuple#get}, passing the output's tag as an
+   * argument.
+   *
+   * <p> See the discussion of Side Outputs above for more explanation.
+   *
+   * <p> The resulting {@code PTransform} is incomplete, and its input
+   * type is not yet bound.  Use {@link ParDo.UnboundMulti#of}
+   * to specify the {@link DoFn} to invoke, which will also bind the
+   * input type of this {@code PTransform}.
+   */
+  public static <O> UnboundMulti<O> withOutputTags(
+      TupleTag<O> mainOutputTag,
+      TupleTagList sideOutputTags) {
+    return new Unbound().withOutputTags(mainOutputTag, sideOutputTags);
+  }
+
+  /**
+   * Creates a {@code ParDo} {@code PTransform} that will invoke the
+   * given {@link DoFn} function.
+   *
+   * <p> The resulting {@code PTransform}'s types have been bound, with the
+   * input being a {@code PCollection<I>} and the output a
+   * {@code PCollection<O>}, inferred from the types of the argument
+   * {@code DoFn<I, O>}.  It is ready to be applied, or further
+   * properties can be set on it first.
+   */
+  public static <I, O> Bound<I, O> of(DoFn<I, O> fn) {
+    return new Unbound().of(fn);
+  }
+
+  /**
+   * An incomplete {@code ParDo} transform, with unbound input/output types.
+   *
+   * <p> Before being applied, {@link ParDo.Unbound#of} must be
+   * invoked to specify the {@link DoFn} to invoke, which will also
+   * bind the input/output types of this {@code PTransform}.
+   */
+  public static class Unbound {
+    String name;
+    List<PCollectionView<?, ?>> sideInputs = Collections.emptyList();
+
+    Unbound() {}
+
+    Unbound(String name,
+            List<PCollectionView<?, ?>> sideInputs) {
+      this.name = name;
+      this.sideInputs = sideInputs;
+    }
+
+    /**
+     * Returns a new {@code ParDo} transform that's like this
+     * transform but with the specified name.  Does not modify this
+     * transform.  The resulting transform is still incomplete.
+     *
+     * <p> See the discussion of Naming above for more explanation.
+     */
+    public Unbound named(String name) {
+      return new Unbound(name, sideInputs);
+    }
+
+    /**
+     * Returns a new {@code ParDo} transform that's like this
+     * transform but with the specified side inputs.
+     * Does not modify this transform. The resulting transform is
+     * still incomplete.
+     *
+     * <p> See the discussion of Side Inputs above and on
+     * {@link ParDo#withSideInputs} for more explanation.
+     */
+    public Unbound withSideInputs(PCollectionView<?, ?>... sideInputs) {
+      return new Unbound(name, ImmutableList.copyOf(sideInputs));
+    }
+
+    /**
+     * Returns a new {@code ParDo} transform that's like this
+     * transform but with the specified side inputs.  Does not modify
+     * this transform.  The resulting transform is still incomplete.
+     *
+     * <p> See the discussion of Side Inputs above and on
+     * {@link ParDo#withSideInputs} for more explanation.
+     */
+    public Unbound withSideInputs(
+        Iterable<? extends PCollectionView<?, ?>> sideInputs) {
+      return new Unbound(name, ImmutableList.copyOf(sideInputs));
+    }
+
+    /**
+     * Returns a new multi-output {@code ParDo} transform that's like
+     * this transform but with the specified main and side output
+     * tags.  Does not modify this transform.  The resulting transform
+     * is still incomplete.
+     *
+     * <p> See the discussion of Side Outputs above and on
+     * {@link ParDo#withOutputTags} for more explanation.
+     */
+    public <O> UnboundMulti<O> withOutputTags(TupleTag<O> mainOutputTag,
+                                              TupleTagList sideOutputTags) {
+      return new UnboundMulti<>(
+          name, sideInputs, mainOutputTag, sideOutputTags);
+    }
+
+    /**
+     * Returns a new {@code ParDo} {@code PTransform} that's like this
+     * transform but which will invoke the given {@link DoFn}
+     * function, and which has its input and output types bound.  Does
+     * not modify this transform.  The resulting {@code PTransform} is
+     * sufficiently specified to be applied, but more properties can
+     * still be specified.
+     */
+    public <I, O> Bound<I, O> of(DoFn<I, O> fn) {
+      return new Bound<>(name, sideInputs, fn);
+    }
+  }
+
+  /**
+   * A {@code PTransform} that, when applied to a {@code PCollection<I>},
+   * invokes a user-specified {@code DoFn<I, O>} on all its elements,
+   * with all its outputs collected into an output
+   * {@code PCollection<O>}.
+   *
+   * <p> A multi-output form of this transform can be created with
+   * {@link ParDo.Bound#withOutputTags}.
+   *
+   * @param <I> the type of the (main) input {@code PCollection} elements
+   * @param <O> the type of the (main) output {@code PCollection} elements
+   */
+  public static class Bound<I, O>
+      extends PTransform<PCollection<? extends I>, PCollection<O>> {
+    // Inherits name.
+    List<PCollectionView<?, ?>> sideInputs;
+    DoFn<I, O> fn;
+
+    Bound(String name,
+          List<PCollectionView<?, ?>> sideInputs,
+          DoFn<I, O> fn) {
+      super(name);
+      this.sideInputs = sideInputs;
+      this.fn = fn;
+    }
+
+    /**
+     * Returns a new {@code ParDo} {@code PTransform} that's like this
+     * {@code PTransform} but with the specified name.  Does not
+     * modify this {@code PTransform}.
+     *
+     * <p> See the discussion of Naming above for more explanation.
+     */
+    public Bound<I, O> named(String name) {
+      return new Bound<>(name, sideInputs, fn);
+    }
+
+    /**
+     * Returns a new {@code ParDo} {@code PTransform} that's like this
+     * {@code PTransform} but with the specified side inputs.  Does not
+     * modify this {@code PTransform}.
+     *
+     * <p> See the discussion of Side Inputs above and on
+     * {@link ParDo#withSideInputs} for more explanation.
+     */
+    public Bound<I, O> withSideInputs(PCollectionView<?, ?>... sideInputs) {
+      return new Bound<>(name, ImmutableList.copyOf(sideInputs), fn);
+    }
+
+    /**
+     * Returns a new {@code ParDo} {@code PTransform} that's like this
+     * {@code PTransform} but with the specified side inputs.  Does not
+     * modify this {@code PTransform}.
+     *
+     * <p> See the discussion of Side Inputs above and on
+     * {@link ParDo#withSideInputs} for more explanation.
+     */
+    public Bound<I, O> withSideInputs(
+        Iterable<? extends PCollectionView<?, ?>> sideInputs) {
+      return new Bound<>(name, ImmutableList.copyOf(sideInputs), fn);
+    }
+
+    /**
+     * Returns a new multi-output {@code ParDo} {@code PTransform}
+     * that's like this {@code PTransform} but with the specified main
+     * and side output tags.  Does not modify this {@code PTransform}.
+     *
+     * <p> See the discussion of Side Outputs above and on
+     * {@link ParDo#withOutputTags} for more explanation.
+     */
+    public BoundMulti<I, O> withOutputTags(TupleTag<O> mainOutputTag,
+                                           TupleTagList sideOutputTags) {
+      return new BoundMulti<>(
+          name, sideInputs, mainOutputTag, sideOutputTags, fn);
+    }
+
+    @Override
+    public PCollection<O> apply(PCollection<? extends I> input) {
+      if (sideInputs == null) {
+        sideInputs = Collections.emptyList();
+      }
+      return PCollection.<O>createPrimitiveOutputInternal(getInput().getWindowingFn())
+          .setTypeTokenInternal(fn.getOutputTypeToken());
+    }
+
+    @Override
+    protected Coder<O> getDefaultOutputCoder() {
+      return getPipeline().getCoderRegistry().getDefaultCoder(
+          fn.getOutputTypeToken(),
+          fn.getInputTypeToken(),
+          ((PCollection<I>) getInput()).getCoder());
+    }
+
+    @Override
+    protected String getDefaultName() {
+      return StringUtils.approximateSimpleName(fn.getClass());
+    }
+
+    @Override
+    protected String getKindString() { return "ParDo"; }
+
+    public DoFn<I, O> getFn() {
+      return fn;
+    }
+
+    public List<PCollectionView<?, ?>> getSideInputs() {
+      return sideInputs;
+    }
+  }
+
+  /**
+   * An incomplete multi-output {@code ParDo} transform, with unbound
+   * input type.
+   *
+   * <p> Before being applied, {@link ParDo.UnboundMulti#of} must be
+   * invoked to specify the {@link DoFn} to invoke, which will also
+   * bind the input type of this {@code PTransform}.
+   *
+   * @param <O> the type of the main output {@code PCollection} elements
+   */
+  public static class UnboundMulti<O> {
+    String name;
+    List<PCollectionView<?, ?>> sideInputs;
+    TupleTag<O> mainOutputTag;
+    TupleTagList sideOutputTags;
+
+    UnboundMulti(String name,
+                 List<PCollectionView<?, ?>> sideInputs,
+                 TupleTag<O> mainOutputTag,
+                 TupleTagList sideOutputTags) {
+      this.name = name;
+      this.sideInputs = sideInputs;
+      this.mainOutputTag = mainOutputTag;
+      this.sideOutputTags = sideOutputTags;
+    }
+
+    /**
+     * Returns a new multi-output {@code ParDo} transform that's like
+     * this transform but with the specified name.  Does not modify
+     * this transform.  The resulting transform is still incomplete.
+     *
+     * <p> See the discussion of Naming above for more explanation.
+     */
+    public UnboundMulti<O> named(String name) {
+      return new UnboundMulti<>(
+          name, sideInputs, mainOutputTag, sideOutputTags);
+    }
+
+    /**
+     * Returns a new multi-output {@code ParDo} transform that's like
+     * this transform but with the specified side inputs.  Does not
+     * modify this transform.  The resulting transform is still
+     * incomplete.
+     *
+     * <p> See the discussion of Side Inputs above and on
+     * {@link ParDo#withSideInputs} for more explanation.
+     */
+    public UnboundMulti<O> withSideInputs(
+        PCollectionView<?, ?>... sideInputs) {
+      return new UnboundMulti<>(
+          name, ImmutableList.copyOf(sideInputs),
+          mainOutputTag, sideOutputTags);
+    }
+
+    /**
+     * Returns a new multi-output {@code ParDo} transform that's like
+     * this transform but with the specified side inputs.  Does not
+     * modify this transform.  The resulting transform is still
+     * incomplete.
+     *
+     * <p> See the discussion of Side Inputs above and on
+     * {@link ParDo#withSideInputs} for more explanation.
+     */
+    public UnboundMulti<O> withSideInputs(
+        Iterable<? extends PCollectionView<?, ?>> sideInputs) {
+      return new UnboundMulti<>(
+          name, ImmutableList.copyOf(sideInputs),
+          mainOutputTag, sideOutputTags);
+    }
+
+    /**
+     * Returns a new multi-output {@code ParDo} {@code PTransform}
+     * that's like this transform but which will invoke the given
+     * {@link DoFn} function, and which has its input type bound.
+     * Does not modify this transform.  The resulting
+     * {@code PTransform} is sufficiently specified to be applied, but
+     * more properties can still be specified.
+     */
+    public <I> BoundMulti<I, O> of(DoFn<I, O> fn) {
+      return new BoundMulti<>(
+          name, sideInputs, mainOutputTag, sideOutputTags, fn);
+    }
+  }
+
+  /**
+   * A {@code PTransform} that, when applied to a
+   * {@code PCollection<I>}, invokes a user-specified
+   * {@code DoFn<I, O>} on all its elements, which can emit elements
+   * to any of the {@code PTransform}'s main and side output
+   * {@code PCollection}s, which are bundled into a result
+   * {@code PCollectionTuple}.
+   *
+   * @param <I> the type of the (main) input {@code PCollection} elements
+   * @param <O> the type of the main output {@code PCollection} elements
+   */
+  public static class BoundMulti<I, O>
+      extends PTransform<PCollection<? extends I>, PCollectionTuple> {
+    // Inherits name.
+    List<PCollectionView<?, ?>> sideInputs;
+    TupleTag<O> mainOutputTag;
+    TupleTagList sideOutputTags;
+    DoFn<I, O> fn;
+
+    BoundMulti(String name,
+               List<PCollectionView<?, ?>> sideInputs,
+               TupleTag<O> mainOutputTag,
+               TupleTagList sideOutputTags,
+               DoFn<I, O> fn) {
+      super(name);
+      this.sideInputs = sideInputs;
+      this.mainOutputTag = mainOutputTag;
+      this.sideOutputTags = sideOutputTags;
+      this.fn = fn;
+    }
+
+    /**
+     * Returns a new multi-output {@code ParDo} {@code PTransform}
+     * that's like this {@code PTransform} but with the specified
+     * name.  Does not modify this {@code PTransform}.
+     *
+     * <p> See the discussion of Naming above for more explanation.
+     */
+    public BoundMulti<I, O> named(String name) {
+      return new BoundMulti<>(
+          name, sideInputs, mainOutputTag, sideOutputTags, fn);
+    }
+
+    /**
+     * Returns a new multi-output {@code ParDo} {@code PTransform}
+     * that's like this {@code PTransform} but with the specified side
+     * inputs.  Does not modify this {@code PTransform}.
+     *
+     * <p> See the discussion of Side Inputs above and on
+     * {@link ParDo#withSideInputs} for more explanation.
+     */
+    public BoundMulti<I, O> withSideInputs(
+        PCollectionView<?, ?>... sideInputs) {
+      return new BoundMulti<>(
+          name, ImmutableList.copyOf(sideInputs),
+          mainOutputTag, sideOutputTags, fn);
+    }
+
+    /**
+     * Returns a new multi-output {@code ParDo} {@code PTransform}
+     * that's like this {@code PTransform} but with the specified side
+     * inputs.  Does not modify this {@code PTransform}.
+     *
+     * <p> See the discussion of Side Inputs above and on
+     * {@link ParDo#withSideInputs} for more explanation.
+     */
+    public BoundMulti<I, O> withSideInputs(
+        Iterable<? extends PCollectionView<?, ?>> sideInputs) {
+      return new BoundMulti<>(
+          name, ImmutableList.copyOf(sideInputs),
+          mainOutputTag, sideOutputTags, fn);
+    }
+
+
+    @Override
+    public PCollectionTuple apply(PCollection<? extends I> input) {
+      PCollectionTuple outputs = PCollectionTuple.ofPrimitiveOutputsInternal(
+          TupleTagList.of(mainOutputTag).and(sideOutputTags.getAll()),
+          getInput().getWindowingFn());
+
+      // The fn will likely be an instance of an anonymous subclass
+      // such as DoFn<Integer, String> { }, thus will have a high-fidelity
+      // TypeToken for the output type.
+      outputs.get(mainOutputTag).setTypeTokenInternal(fn.getOutputTypeToken());
+
+      return outputs;
+    }
+
+    @Override
+    protected Coder<O> getDefaultOutputCoder() {
+      throw new RuntimeException(
+          "internal error: shouldn't be calling this on a multi-output ParDo");
+    }
+
+    @Override
+    protected String getDefaultName() {
+      return StringUtils.approximateSimpleName(fn.getClass());
+    }
+
+    @Override
+    protected String getKindString() { return "ParMultiDo"; }
+
+    public DoFn<I, O> getFn() {
+      return fn;
+    }
+
+    public TupleTag<O> getMainOutputTag() {
+      return mainOutputTag;
+    }
+
+    public List<PCollectionView<?, ?>> getSideInputs() {
+      return sideInputs;
+    }
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  static {
+    DirectPipelineRunner.registerDefaultTransformEvaluator(
+        Bound.class,
+        new DirectPipelineRunner.TransformEvaluator<Bound>() {
+          @Override
+          public void evaluate(
+              Bound transform,
+              DirectPipelineRunner.EvaluationContext context) {
+            evaluateSingleHelper(transform, context);
+          }
+        });
+  }
+
+  private static <I, O> void evaluateSingleHelper(
+      Bound<I, O> transform,
+      DirectPipelineRunner.EvaluationContext context) {
+    TupleTag<O> mainOutputTag = new TupleTag<>("out");
+
+    DirectModeExecutionContext executionContext = new DirectModeExecutionContext();
+
+    DoFnRunner<I, O, List> fnRunner =
+        evaluateHelper(transform.fn, context.getStepName(transform),
+            transform.getInput(), transform.sideInputs,
+            mainOutputTag, new ArrayList<TupleTag<?>>(),
+            context, executionContext);
+
+    context.setPCollectionValuesWithMetadata(
+        transform.getOutput(),
+        executionContext.getOutput(mainOutputTag));
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  static {
+    DirectPipelineRunner.registerDefaultTransformEvaluator(
+        BoundMulti.class,
+        new DirectPipelineRunner.TransformEvaluator<BoundMulti>() {
+          @Override
+          public void evaluate(
+              BoundMulti transform,
+              DirectPipelineRunner.EvaluationContext context) {
+            evaluateMultiHelper(transform, context);
+          }
+        });
+  }
+
+  private static <I, O> void evaluateMultiHelper(
+      BoundMulti<I, O> transform,
+      DirectPipelineRunner.EvaluationContext context) {
+
+    DirectModeExecutionContext executionContext = new DirectModeExecutionContext();
+
+    DoFnRunner<I, O, List> fnRunner =
+        evaluateHelper(transform.fn, context.getStepName(transform),
+                       transform.getInput(), transform.sideInputs,
+                       transform.mainOutputTag, transform.sideOutputTags.getAll(),
+                       context, executionContext);
+
+    for (Map.Entry<TupleTag<?>, PCollection<?>> entry
+        : transform.getOutput().getAll().entrySet()) {
+      TupleTag<Object> tag = (TupleTag<Object>) entry.getKey();
+      @SuppressWarnings("unchecked")
+      PCollection<Object> pc = (PCollection<Object>) entry.getValue();
+
+      context.setPCollectionValuesWithMetadata(
+          pc,
+          (tag == transform.mainOutputTag
+              ? executionContext.getOutput(tag)
+              : executionContext.getSideOutput(tag)));
+    }
+  }
+
+  private static <I, O> DoFnRunner<I, O, List> evaluateHelper(
+      DoFn<I, O> doFn,
+      String name,
+      PCollection<? extends I> input,
+      List<PCollectionView<?, ?>> sideInputs,
+      TupleTag<O> mainOutputTag,
+      List<TupleTag<?>> sideOutputTags,
+      DirectPipelineRunner.EvaluationContext context,
+      DirectModeExecutionContext executionContext) {
+    // TODO: Run multiple shards?
+    DoFn<I, O> fn = context.ensureSerializable(doFn);
+
+    PTuple sideInputValues = PTuple.empty();
+    for (PCollectionView<?, ?> view : sideInputs) {
+      sideInputValues = sideInputValues.and(
+          view.getTagInternal(),
+          context.getPCollectionView(view));
+    }
+
+    DoFnRunner<I, O, List> fnRunner =
+        DoFnRunner.createWithListOutputs(
+            context.getPipelineOptions(),
+            fn,
+            sideInputValues,
+            mainOutputTag,
+            sideOutputTags,
+            executionContext.getStepContext(name),
+            context.getAddCounterMutator());
+
+    fnRunner.startBundle();
+
+    for (DirectPipelineRunner.ValueWithMetadata<? extends I> elem
+             : context.getPCollectionValuesWithMetadata(input)) {
+      executionContext.setKey(elem.getKey());
+      fnRunner.processElement((WindowedValue<I>) elem.getWindowedValue());
+    }
+
+    fnRunner.finishBundle();
+
+    return fnRunner;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
new file mode 100644
index 0000000000000..74a1359aa5ed0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.cloud.dataflow.sdk.values.TupleTagList;
+
+import java.io.Serializable;
+
+/**
+ * {@code Partition} takes a {@code PCollection<T>} and a
+ * {@code PartitionFn}, uses the {@code PartitionFn} to split the
+ * elements of the input {@code PCollection} into {@code N} partitions, and
+ * returns a {@code PCollectionList<T>} that bundles {@code N}
+ * {@code PCollection<T>}s containing the split elements.
+ *
+ * <p> Example of use:
+ * <pre> {@code
+ * PCollection<Student> students = ...;
+ * // Split students up into 10 partitions, by percentile:
+ * PCollectionList<Student> studentsByPercentile =
+ *     students.apply(Partition.of(10, new PartitionFn<Student>() {
+ *         public int partitionFor(Student student, int numPartitions) {
+ *             return student.getPercentile()  // 0..99
+ *                  * numPartitions / 100;
+ *         }}))
+ * for (int i = 0; i < 10; i++) {
+ *   PCollection<Student> partition = studentsByPercentile.get(i);
+ *   ...
+ * }
+ * } </pre>
+ *
+ * <p> By default, the {@code Coder} of each of the
+ * {@code PCollection}s in the output {@code PCollectionList} is the
+ * same as the {@code Coder} of the input {@code PCollection}.
+ *
+ * <p> Each output element has the same timestamp and is in the same windows
+ * as its corresponding input element, and each output {@code PCollection}
+ * has the same
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+ * associated with it as the input.
+ *
+ * @param <T> the type of the elements of the input and output
+ * {@code PCollection}s
+ */
+public class Partition<T>
+    extends PTransform<PCollection<T>, PCollectionList<T>> {
+
+  /**
+   * A function object that chooses an output partition for an element.
+   *
+   * @param <T> the type of the elements being partitioned
+   */
+  public interface PartitionFn<T> extends Serializable {
+    /**
+     * Chooses the partition into which to put the given element.
+     *
+     * @param elem the element to be partitioned
+     * @param numPartitions the total number of partitions ({@code >= 1})
+     * @return index of the selected partition (in the range
+     * {@code [0..numPartitions-1]})
+     */
+    public int partitionFor(T elem, int numPartitions);
+  }
+
+  /**
+   * Returns a new {@code Partition} {@code PTransform} that divides
+   * its input {@code PCollection} into the given number of partitions,
+   * using the given partitioning function.
+   *
+   * @param numPartitions the number of partitions to divide the input
+   * {@code PCollection} into
+   * @param partitionFn the function to invoke on each element to
+   * choose its output partition
+   * @throws IllegalArgumentException if {@code numPartitions <= 0}
+   */
+  public static <T> Partition<T> of(
+      int numPartitions, PartitionFn<? super T> partitionFn) {
+    return new Partition<>(new PartitionDoFn<T>(numPartitions, partitionFn));
+  }
+
+  @Override
+  public PCollectionList<T> apply(PCollection<T> in) {
+    final TupleTagList outputTags = partitionDoFn.getOutputTags();
+
+    PCollectionTuple outputs = in.apply(
+        ParDo
+        .withOutputTags(new TupleTag<Void>(){}, outputTags)
+        .of(partitionDoFn));
+
+    PCollectionList<T> pcs = PCollectionList.empty(in.getPipeline());
+    Coder<T> coder = in.getCoder();
+
+    for (TupleTag<?> outputTag : outputTags.getAll()) {
+      // All the tuple tags are actually TupleTag<T>
+      // And all the collections are actually PCollection<T>
+      @SuppressWarnings("unchecked")
+      TupleTag<T> typedOutputTag = (TupleTag<T>) outputTag;
+      pcs = pcs.and(outputs.get(typedOutputTag).setCoder(coder));
+    }
+    return pcs;
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private final transient PartitionDoFn<T> partitionDoFn;
+
+  private Partition(PartitionDoFn<T> partitionDoFn) {
+    this.partitionDoFn = partitionDoFn;
+  }
+
+  private static class PartitionDoFn<T1> extends DoFn<T1, Void> {
+    private final int numPartitions;
+    private final PartitionFn<? super T1> partitionFn;
+    private final TupleTagList outputTags;
+
+    /**
+     * Constructs a PartitionDoFn.
+     *
+     * @throws IllegalArgumentException if {@code numPartitions <= 0}
+     */
+    public PartitionDoFn(
+        int numPartitions, PartitionFn<? super T1> partitionFn) {
+      if (numPartitions <= 0) {
+        throw new IllegalArgumentException("numPartitions must be > 0");
+      }
+
+      this.numPartitions = numPartitions;
+      this.partitionFn = partitionFn;
+
+      TupleTagList buildOutputTags = TupleTagList.empty();
+      for (int partition = 0; partition < numPartitions; partition++) {
+        buildOutputTags = buildOutputTags.and(new TupleTag<T1>());
+      }
+      outputTags = buildOutputTags;
+    }
+
+    public TupleTagList getOutputTags() {
+      return outputTags;
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      T1 input = c.element();
+      int partition = partitionFn.partitionFor(input, numPartitions);
+      if (0 <= partition && partition < numPartitions) {
+        c.sideOutput((TupleTag<T1>) outputTags.get(partition), input);
+      } else {
+        throw new IndexOutOfBoundsException(
+            "Partition function returned out of bounds index: " +
+            partition + " not in [0.." + numPartitions + ")");
+      }
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
new file mode 100644
index 0000000000000..2124acfbb84a5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
@@ -0,0 +1,336 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.api.client.util.Throwables;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.reflect.TypeToken;
+import com.google.common.util.concurrent.RateLimiter;
+
+import org.joda.time.Instant;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Collection;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
+
+/**
+ * Provides rate-limiting of user functions, using threaded execution and a
+ * {@link com.google.common.util.concurrent.RateLimiter} to process elements
+ * at the desired rate.
+ *
+ * <p> For example, to limit each worker to 10 requests per second:
+ * <pre>{@code
+ * PCollection<T> data = ...;
+ * data.apply(
+ *   RateLimiting.perWorker(new MyDoFn())
+ *               .withRateLimit(10)));
+ * }</pre>
+ *
+ * <p> An uncaught exception from the wrapped DoFn will result in the exception
+ * being rethrown in later calls to {@link RateLimitingDoFn#processElement}
+ * or a call to {@link RateLimitingDoFn#finishBundle}.
+ *
+ * <p> Rate limiting is provided as a PTransform
+ * ({@link RateLimitingTransform}), and also as a {@code DoFn}
+ * ({@link RateLimitingDoFn}).
+ */
+public class RateLimiting {
+
+  /**
+   * Creates a new per-worker rate-limiting transform for the given
+   * {@link com.google.cloud.dataflow.sdk.transforms.DoFn}.
+   *
+   * <p> The default behavior is to process elements with multiple threads,
+   * but no rate limit is applied.
+   *
+   * <p> Use {@link RateLimitingTransform#withRateLimit} to limit the processing
+   * rate, and {@link RateLimitingTransform#withMaxParallelism} to control the
+   * maximum concurrent processing limit.
+   *
+   * <p> Aside from the above, the {@code DoFn} will be executed in the same manner
+   * as in {@link ParDo}.
+   *
+   * <p> Rate limiting is applied independently per-worker.
+   */
+  public static <I, O> RateLimitingTransform<I, O> perWorker(DoFn<I, O> doFn) {
+    return new RateLimitingTransform<>(doFn);
+  }
+
+  /**
+   * A {@link PTransform} which applies rate limiting to a {@link DoFn}.
+   *
+   * @param <I> the type of the (main) input elements
+   * @param <O> the type of the (main) output elements
+   */
+  public static class RateLimitingTransform<I, O>
+      extends PTransform<PCollection<? extends I>, PCollection<O>> {
+    private final DoFn<I, O> doFn;
+    private double rate = 0.0;
+    // TODO: set default based on num cores, or based on rate limit?
+    private int maxParallelism = DEFAULT_MAX_PARALLELISM;
+
+    public RateLimitingTransform(DoFn<I, O> doFn) {
+      this.doFn = doFn;
+    }
+
+    /**
+     * Modifies this {@code RateLimitingTransform}, specifying a maximum
+     * per-worker element processing rate.
+     *
+     * <p> A rate of {@code N} corresponds to {@code N} elements per second.
+     * This rate is on a per-worker basis, so the overall rate of the job
+     * depends upon the number of workers.
+     *
+     * <p> This rate limit may not be reachable unless there is sufficient
+     * parallelism.
+     *
+     * <p> A rate of <= 0.0 disables rate limiting.
+     */
+    public RateLimitingTransform<I, O> withRateLimit(
+        double maxElementsPerSecond) {
+      this.rate = maxElementsPerSecond;
+      return this;
+    }
+
+    /**
+     * Modifies this {@code RateLimitingTransform}, specifying a maximum
+     * per-worker parallelism.
+     *
+     * <p> This determines how many concurrent elements will be processed by the
+     * wrapped {@code DoFn}.
+     *
+     * <p> The desired amount of parallelism depends upon the type of work.  For
+     * CPU-intensive work, a good starting point is to use the number of cores:
+     * {@code Runtime.getRuntime().availableProcessors()}.
+     */
+    public RateLimitingTransform<I, O> withMaxParallelism(int max) {
+      this.maxParallelism = max;
+      return this;
+    }
+
+    @Override
+    public PCollection<O> apply(PCollection<? extends I> input) {
+      return input.apply(
+          ParDo.of(new RateLimitingDoFn<>(doFn, rate, maxParallelism)));
+    }
+  }
+
+  /**
+   * A rate-limiting {@code DoFn} wrapper.
+   *
+   * @see RateLimiting#perWorker(DoFn)
+   *
+   * @param <I> the type of the (main) input elements
+   * @param <O> the type of the (main) output elements
+   */
+  public static class RateLimitingDoFn<I, O> extends DoFn<I, O> {
+    private static final Logger LOG = LoggerFactory.getLogger(RateLimitingDoFn.class);
+
+    public RateLimitingDoFn(DoFn<I, O> doFn, double rateLimit,
+        int maxParallelism) {
+      this.doFn = doFn;
+      this.rate = rateLimit;
+      this.maxParallelism = maxParallelism;
+    }
+
+    @Override
+    public void startBundle(Context c) throws Exception {
+      doFn.startBundle(c);
+
+      if (rate > 0.0) {
+        limiter = RateLimiter.create(rate);
+      }
+      executor = Executors.newCachedThreadPool();
+      workTickets = new Semaphore(maxParallelism);
+      failure = new AtomicReference<>();
+    }
+
+    @Override
+    public void processElement(final ProcessContext c) throws Exception {
+      // Apply rate limiting up front, controlling the availability of work for
+      // the thread pool.  This allows us to use an auto-scaling thread pool,
+      // which adapts the parallelism to the available work.
+      // The semaphore is used to avoid overwhelming the executor, by bounding
+      // the number of outstanding elements.
+      if (limiter != null) {
+        limiter.acquire();
+      }
+      try {
+        workTickets.acquire();
+      } catch (InterruptedException e) {
+        throw new RuntimeException("Interrupted while scheduling work", e);
+      }
+
+      if (failure.get() != null) {
+        throw Throwables.propagate(failure.get());
+      }
+
+      executor.submit(new Runnable() {
+        @Override
+        public void run() {
+          try {
+            doFn.processElement(new WrappedContext(c));
+          } catch (Throwable t) {
+            failure.compareAndSet(null, t);
+            Throwables.propagateIfPossible(t);
+            throw new AssertionError("Unexpected checked exception: " + t);
+          } finally {
+            workTickets.release();
+          }
+        }
+      });
+    }
+
+    @Override
+    public void finishBundle(Context c) throws Exception {
+      executor.shutdown();
+      // Log a periodic progress report until the queue has drained.
+      while (true) {
+        try {
+          if (executor.awaitTermination(30, TimeUnit.SECONDS)) {
+            if (failure.get() != null) {
+              // Handle failure propagation outside of the try/catch block.
+              break;
+            }
+            doFn.finishBundle(c);
+            return;
+          }
+          int outstanding = workTickets.getQueueLength()
+              + maxParallelism - workTickets.availablePermits();
+          LOG.info("RateLimitingDoFn backlog: {}", outstanding);
+        } catch (InterruptedException e) {
+          throw Throwables.propagate(e);
+        }
+      }
+
+      throw Throwables.propagate(failure.get());
+    }
+
+    @Override
+    TypeToken<I> getInputTypeToken() {
+      return doFn.getInputTypeToken();
+    }
+
+    @Override
+    TypeToken<O> getOutputTypeToken() {
+      return doFn.getOutputTypeToken();
+    }
+
+    /////////////////////////////////////////////////////////////////////////////
+
+    /**
+     * Wraps a DoFn context, forcing single-thread output so that threads don't
+     * propagate through to downstream functions.
+     */
+    private class WrappedContext extends ProcessContext {
+      private final ProcessContext context;
+
+      WrappedContext(ProcessContext context) {
+        this.context = context;
+      }
+
+      @Override
+      public I element() {
+        return context.element();
+      }
+
+      @Override
+      public KeyedState keyedState() {
+        return context.keyedState();
+      }
+
+      @Override
+      public PipelineOptions getPipelineOptions() {
+        return context.getPipelineOptions();
+      }
+
+      @Override
+      public <T> T sideInput(PCollectionView<T, ?> view) {
+        return context.sideInput(view);
+      }
+
+      @Override
+      public void output(O output) {
+        synchronized (RateLimitingDoFn.this) {
+          context.output(output);
+        }
+      }
+
+      @Override
+      public void outputWithTimestamp(O output, Instant timestamp) {
+        synchronized (RateLimitingDoFn.this) {
+          context.outputWithTimestamp(output, timestamp);
+        }
+      }
+
+      @Override
+      public <T> void sideOutput(TupleTag<T> tag, T output) {
+        synchronized (RateLimitingDoFn.this) {
+          context.sideOutput(tag, output);
+        }
+      }
+
+      @Override
+      public <AI, AA, AO> Aggregator<AI> createAggregator(
+          String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
+        return context.createAggregator(name, combiner);
+      }
+
+      @Override
+      public <AI, AO> Aggregator<AI> createAggregator(
+          String name, SerializableFunction<Iterable<AI>, AO> combiner) {
+        return context.createAggregator(name, combiner);
+      }
+
+      @Override
+      public Instant timestamp() {
+        return context.timestamp();
+      }
+
+      @Override
+      public Collection<? extends BoundedWindow> windows() {
+        return context.windows();
+      }
+    }
+
+    private final DoFn<I, O> doFn;
+    private double rate;
+    private int maxParallelism;
+
+    private transient RateLimiter limiter;
+    private transient ExecutorService executor;
+    private transient Semaphore workTickets;
+    private transient AtomicReference<Throwable> failure;
+  }
+
+  /**
+   * Default maximum for number of concurrent elements to process.
+   */
+  @VisibleForTesting
+  static final int DEFAULT_MAX_PARALLELISM = 16;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
new file mode 100644
index 0000000000000..0e4f21f75b781
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+/**
+ * {@code RemoveDuplicates<T>} takes a {@code PCollection<T>} and
+ * returns a {@code PCollection<T>} that has all the elements of the
+ * input but with duplicate elements removed such that each element is
+ * unique within each window.
+ *
+ * <p> Two values of type {@code T} are compared for equality <b>not</b> by
+ * regular Java {@link Object#equals}, but instead by first encoding
+ * each of the elements using the {@code PCollection}'s {@code Coder}, and then
+ * comparing the encoded bytes.  This admits efficient parallel
+ * evaluation.
+ *
+ * <p> By default, the {@code Coder} of the output {@code PCollection}
+ * is the same as the {@code Coder} of the input {@code PCollection}.
+ *
+ * <p> Each output element is in the same window as its corresponding input
+ * element, and has the timestamp of the end of that window.  The output
+ * {@code PCollection} has the same
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+ * as the input.
+ *
+ * <p> Does not preserve any order the input PCollection might have had.
+ *
+ * <p> Example of use:
+ * <pre> {@code
+ * PCollection<String> words = ...;
+ * PCollection<String> uniqueWords =
+ *     words.apply(RemoveDuplicates.<String>create());
+ * } </pre>
+ *
+ * @param <T> the type of the elements of the input and output
+ * {@code PCollection}s
+ */
+public class RemoveDuplicates<T> extends PTransform<PCollection<T>,
+                                                    PCollection<T>> {
+  /**
+   * Returns a {@code RemoveDuplicates<T>} {@code PTransform}.
+   *
+   * @param <T> the type of the elements of the input and output
+   * {@code PCollection}s
+   */
+  public static <T> RemoveDuplicates<T> create() {
+    return new RemoveDuplicates<>();
+  }
+
+  private RemoveDuplicates() { }
+
+  @Override
+  public PCollection<T> apply(PCollection<T> in) {
+    return
+        in
+        .apply(ParDo.named("CreateIndex")
+            .of(new DoFn<T, KV<T, Void>>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                c.output(KV.of(c.element(), (Void) null));
+              }
+            }))
+        .apply(Combine.<T, Void>perKey(
+            new SerializableFunction<Iterable<Void>, Void>() {
+              @Override
+              public Void apply(Iterable<Void> iter) {
+                return null; // ignore input
+              }
+            }))
+        .apply(Keys.<T>create());
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
new file mode 100644
index 0000000000000..832cc996ea761
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+/**
+ * {@code PTransform}s for taking samples of the elements in a
+ * {@code PCollection}, or samples of the values associated with each
+ * key in a {@code PCollection} of {@code KV}s.
+ **/
+public class Sample {
+  /**
+   * Returns a {@code PTransform} that takes a {@code PCollection<T>},
+   * selects {@code sampleSize} elements, uniformly at random, and returns a
+   * {@code PCollection<Iterable<T>>} containing the selected elements.
+   * If the input {@code PCollection} has fewer than
+   * {@code sampleSize} elements, then the output {@code Iterable<T>}
+   * will be all the input's elements.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<String> pc = ...;
+   * PCollection<Iterable<String>> sampleOfSize10 =
+   *     pc.apply(Sample.fixedSizeGlobally(10));
+   * } </pre>
+   *
+   * @param sampleSize the number of elements to select; must be {@code >= 0}
+   * @param <T> the type of the elements
+   */
+  public static <T> PTransform<PCollection<T>, PCollection<Iterable<T>>>
+      fixedSizeGlobally(int sampleSize) {
+    return Combine.globally(new FixedSizedSampleFn<T>(sampleSize));
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<KV<K, V>>} and returns a
+   * {@code PCollection<KV<K, Iterable<V>>>} that contains an output
+   * element mapping each distinct key in the input
+   * {@code PCollection} to a sample of {@code sampleSize} values
+   * associated with that key in the input {@code PCollection}, taken
+   * uniformly at random.  If a key in the input {@code PCollection}
+   * has fewer than {@code sampleSize} values associated with it, then
+   * the output {@code Iterable<V>} associated with that key will be
+   * all the values associated with that key in the input
+   * {@code PCollection}.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<KV<String, Integer>> pc = ...;
+   * PCollection<KV<String, Iterable<Integer>>> sampleOfSize10PerKey =
+   *     pc.apply(Sample.<String, Integer>fixedSizePerKey());
+   * } </pre>
+   *
+   * @param sampleSize the number of values to select for each
+   * distinct key; must be {@code >= 0}
+   * @param <K> the type of the keys
+   * @param <V> the type of the values
+   */
+  public static <K, V> PTransform<PCollection<KV<K, V>>,
+                                  PCollection<KV<K, Iterable<V>>>>
+      fixedSizePerKey(int sampleSize) {
+    return Combine.perKey(new FixedSizedSampleFn<V>(sampleSize));
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * {@code CombineFn} that computes a fixed-size sample of a
+   * collection of values.
+   *
+   * @param <T> the type of the elements
+   */
+  public static class FixedSizedSampleFn<T>
+      extends CombineFn<T, Top.TopCombineFn<KV<Integer, T>>.Heap, Iterable<T>> {
+    private final Top.TopCombineFn<KV<Integer, T>> topCombineFn;
+    private final Random rand = new Random();
+
+    private FixedSizedSampleFn(int sampleSize) {
+      if (sampleSize < 0) {
+        throw new IllegalArgumentException("sample size must be >= 0");
+      }
+      topCombineFn = new Top.TopCombineFn<>(sampleSize,
+                                            new KV.OrderByKey<Integer, T>());
+    }
+
+    @Override
+    public Top.TopCombineFn<KV<Integer, T>>.Heap createAccumulator() {
+      return topCombineFn.createAccumulator();
+    }
+
+    @Override
+    public void addInput(Top.TopCombineFn<KV<Integer, T>>.Heap accumulator,
+                         T input) {
+      accumulator.addInput(KV.of(rand.nextInt(), input));
+    }
+
+    @Override
+    public Top.TopCombineFn<KV<Integer, T>>.Heap mergeAccumulators(
+        Iterable<Top.TopCombineFn<KV<Integer, T>>.Heap> accumulators) {
+      return topCombineFn.mergeAccumulators(accumulators);
+    }
+
+    @Override
+    public Iterable<T> extractOutput(
+        Top.TopCombineFn<KV<Integer, T>>.Heap accumulator) {
+      List<T> out = new ArrayList<>();
+      for (KV<Integer, T> element : accumulator.extractOutput()) {
+        out.add(element.getValue());
+      }
+      return out;
+    }
+
+    @Override
+    public Coder<Top.TopCombineFn<KV<Integer, T>>.Heap> getAccumulatorCoder(
+        CoderRegistry registry, Coder<T> inputCoder) {
+      return topCombineFn.getAccumulatorCoder(
+          registry, KvCoder.of(BigEndianIntegerCoder.of(), inputCoder));
+    }
+
+    @Override
+    public Coder<Iterable<T>> getDefaultOutputCoder(
+        CoderRegistry registry, Coder<T> inputCoder) {
+      return IterableCoder.of(inputCoder);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableComparator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableComparator.java
new file mode 100644
index 0000000000000..3d538faa54d85
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableComparator.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import java.io.Serializable;
+import java.util.Comparator;
+
+/**
+ * A {@code Serializable} {@code Comparator}.
+ *
+ * @param <T> type of values being compared
+ */
+public interface SerializableComparator<T> extends Comparator<T>, Serializable {
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableFunction.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableFunction.java
new file mode 100644
index 0000000000000..857491a11fe84
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableFunction.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import java.io.Serializable;
+
+/**
+ * A function that computes an output value based on an input value,
+ * and is {@link Serializable}.
+ *
+ * @param <I> input value type
+ * @param <O> output value type
+ */
+public interface SerializableFunction<I, O> extends Serializable {
+  /** Returns the result of invoking this function on the given input. */
+  public O apply(I input);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
new file mode 100644
index 0000000000000..e925e4a5cc90d
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
@@ -0,0 +1,179 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+/**
+ * {@code PTransform}s for computing the sum of the elements in a
+ * {@code PCollection}, or the sum of the values associated with
+ * each key in a {@code PCollection} of {@code KV}s.
+ *
+ * <p> Example 1: get the sum of a {@code PCollection} of {@code Double}s.
+ * <pre> {@code
+ * PCollection<Double> input = ...;
+ * PCollection<Double> sum = input.apply(Sum.doublesGlobally());
+ * } </pre>
+ *
+ * <p> Example 2: calculate the sum of the {@code Integer}s
+ * associated with each unique key (which is of type {@code String}).
+ * <pre> {@code
+ * PCollection<KV<String, Integer>> input = ...;
+ * PCollection<KV<String, Integer>> sumPerKey = input
+ *     .apply(Sum.<String>integersPerKey());
+ * } </pre>
+ */
+public class Sum {
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<Integer>} and returns a
+   * {@code PCollection<Integer>} whose contents is the sum of the
+   * input {@code PCollection}'s elements, or
+   * {@code 0} if there are no elements.
+   */
+  public static Combine.Globally<Integer, Integer> integersGlobally() {
+    Combine.Globally<Integer, Integer> combine = Combine
+        .globally(new SumIntegerFn());
+    combine.setName("Sum");
+    return combine;
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<KV<K, Integer>>} and returns a
+   * {@code PCollection<KV<K, Integer>>} that contains an output
+   * element mapping each distinct key in the input
+   * {@code PCollection} to the sum of the values associated with
+   * that key in the input {@code PCollection}.
+   */
+  public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() {
+    Combine.PerKey<K, Integer, Integer> combine = Combine
+        .perKey(new SumIntegerFn());
+    combine.setName("Sum.PerKey");
+    return combine;
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<Long>} and returns a
+   * {@code PCollection<Long>} whose contents is the sum of the
+   * input {@code PCollection}'s elements, or
+   * {@code 0} if there are no elements.
+   */
+  public static Combine.Globally<Long, Long> longsGlobally() {
+    Combine.Globally<Long, Long> combine = Combine.globally(new SumLongFn());
+    combine.setName("Sum");
+    return combine;
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<KV<K, Long>>} and returns a
+   * {@code PCollection<KV<K, Long>>} that contains an output
+   * element mapping each distinct key in the input
+   * {@code PCollection} to the sum of the values associated with
+   * that key in the input {@code PCollection}.
+   */
+  public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
+    Combine.PerKey<K, Long, Long> combine = Combine
+        .perKey(new SumLongFn());
+    combine.setName("Sum.PerKey");
+    return combine;
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<Double>} and returns a
+   * {@code PCollection<Double>} whose contents is the sum of the
+   * input {@code PCollection}'s elements, or
+   * {@code 0} if there are no elements.
+   */
+  public static Combine.Globally<Double, Double> doublesGlobally() {
+    Combine.Globally<Double, Double> combine = Combine
+        .globally(new SumDoubleFn());
+    combine.setName("Sum");
+    return combine;
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<KV<K, Double>>} and returns a
+   * {@code PCollection<KV<K, Double>>} that contains an output
+   * element mapping each distinct key in the input
+   * {@code PCollection} to the sum of the values associated with
+   * that key in the input {@code PCollection}.
+   */
+  public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
+    Combine.PerKey<K, Double, Double> combine = Combine
+        .perKey(new SumDoubleFn());
+    combine.setName("Sum.PerKey");
+    return combine;
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * A {@code SerializableFunction} that computes the sum of an
+   * {@code Iterable} of {@code Integer}s, useful as an argument to
+   * {@link Combine#globally} or {@link Combine#perKey}.
+   */
+  public static class SumIntegerFn
+      implements SerializableFunction<Iterable<Integer>, Integer> {
+    @Override
+    public Integer apply(Iterable<Integer> input) {
+      int sum = 0;
+      for (int value : input) {
+        sum += value;
+      }
+      return sum;
+    }
+  }
+
+  /**
+   * A {@code SerializableFunction} that computes the sum of an
+   * {@code Iterable} of {@code Long}s, useful as an argument to
+   * {@link Combine#globally} or {@link Combine#perKey}.
+   */
+  public static class SumLongFn
+      implements SerializableFunction<Iterable<Long>, Long> {
+    @Override
+    public Long apply(Iterable<Long> input) {
+      long sum = 0;
+      for (long value : input) {
+        sum += value;
+      }
+      return sum;
+    }
+  }
+
+  /**
+   * A {@code SerializableFunction} that computes the sum of an
+   * {@code Iterable} of {@code Double}s, useful as an argument to
+   * {@link Combine#globally} or {@link Combine#perKey}.
+   */
+  public static class SumDoubleFn
+      implements SerializableFunction<Iterable<Double>, Double> {
+    @Override
+    public Double apply(Iterable<Double> input) {
+      double sum = 0;
+      for (double value : input) {
+        sum += value;
+      }
+      return sum;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
new file mode 100644
index 0000000000000..1f63808fc2237
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -0,0 +1,489 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.CustomCoder;
+import com.google.cloud.dataflow.sdk.coders.ListCoder;
+import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+import java.util.PriorityQueue;
+
+/**
+ * {@code PTransform}s for finding the largest (or smallest) set
+ * of elements in a {@code PCollection}, or the largest (or smallest)
+ * set of values associated with each key in a {@code PCollection} of
+ * {@code KV}s.
+ */
+public class Top {
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<T>} and returns a {@code PCollection<List<T>>} with a
+   * single element containing the largest {@code count} elements of the input
+   * {@code PCollection<T>}, in decreasing order, sorted using the
+   * given {@code Comparator<T>}.  The {@code Comparator<T>} must also
+   * be {@code Serializable}.
+   *
+   * <p> If {@code count} {@code <} the number of elements in the
+   * input {@code PCollection}, then all the elements of the input
+   * {@code PCollection} will be in the resulting
+   * {@code List}, albeit in sorted order.
+   *
+   * <p> All the elements of the result's {@code List}
+   * must fit into the memory of a single machine.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<Student> students = ...;
+   * PCollection<List<Student>> top10Students =
+   *     students.apply(Top.of(10, new CompareStudentsByAvgGrade()));
+   * } </pre>
+   *
+   * <p> By default, the {@code Coder} of the output {@code PCollection}
+   * is a {@code ListCoder} of the {@code Coder} of the elements of
+   * the input {@code PCollection}.
+   *
+   * <p> See also {@link #smallest} and {@link #largest}, which sort
+   * {@code Comparable} elements using their natural ordering.
+   *
+   * <p> See also {@link #perKey}, {@link #smallestPerKey}, and
+   * {@link #largestPerKey} which take a {@code PCollection} of
+   * {@code KV}s and return the top values associated with each key.
+   */
+  public static <T, C extends Comparator<T> & Serializable>
+      PTransform<PCollection<T>, PCollection<List<T>>> of(int count, C compareFn) {
+    return Combine.globally(new TopCombineFn<>(count, compareFn))
+        .withName("Top");
+
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<T>} and returns a {@code PCollection<List<T>>} with a
+   * single element containing the smallest {@code count} elements of the input
+   * {@code PCollection<T>}, in increasing order, sorted according to
+   * their natural order.
+   *
+   * <p> If {@code count} {@code <} the number of elements in the
+   * input {@code PCollection}, then all the elements of the input
+   * {@code PCollection} will be in the resulting {@code PCollection}'s
+   * {@code List}, albeit in sorted order.
+   *
+   * <p> All the elements of the result {@code List}
+   * must fit into the memory of a single machine.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<Integer> values = ...;
+   * PCollection<List<Integer>> smallest10Values = values.apply(Top.smallest(10));
+   * } </pre>
+   *
+   * <p> By default, the {@code Coder} of the output {@code PCollection}
+   * is a {@code ListCoder} of the {@code Coder} of the elements of
+   * the input {@code PCollection}.
+   *
+   * <p> See also {@link #largest}.
+   *
+   * <p> See also {@link #of}, which sorts using a user-specified
+   * {@code Comparator} function.
+   *
+   * <p> See also {@link #perKey}, {@link #smallestPerKey}, and
+   * {@link #largestPerKey} which take a {@code PCollection} of
+   * {@code KV}s and return the top values associated with each key.
+   */
+  public static <T extends Comparable<T>>
+      PTransform<PCollection<T>, PCollection<List<T>>> smallest(int count) {
+    return Combine.globally(new TopCombineFn<>(count, new Smallest<T>()))
+        .withName("Top.Smallest");
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<T>} and returns a {@code PCollection<List<T>>} with a
+   * single element containing the largest {@code count} elements of the input
+   * {@code PCollection<T>}, in decreasing order, sorted according to
+   * their natural order.
+   *
+   * <p> If {@code count} {@code <} the number of elements in the
+   * input {@code PCollection}, then all the elements of the input
+   * {@code PCollection} will be in the resulting {@code PCollection}'s
+   * {@code List}, albeit in sorted order.
+   *
+   * <p> All the elements of the result's {@code List}
+   * must fit into the memory of a single machine.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<Integer> values = ...;
+   * PCollection<List<Integer>> largest10Values = values.apply(Top.largest(10));
+   * } </pre>
+   *
+   * <p> By default, the {@code Coder} of the output {@code PCollection}
+   * is a {@code ListCoder} of the {@code Coder} of the elements of
+   * the input {@code PCollection}.
+   *
+   * <p> See also {@link #smallest}.
+   *
+   * <p> See also {@link #of}, which sorts using a user-specified
+   * {@code Comparator} function.
+   *
+   * <p> See also {@link #perKey}, {@link #smallestPerKey}, and
+   * {@link #largestPerKey} which take a {@code PCollection} of
+   * {@code KV}s and return the top values associated with each key.
+   */
+  public static <T extends Comparable<T>>
+      PTransform<PCollection<T>, PCollection<List<T>>> largest(int count) {
+    return Combine.globally(new TopCombineFn<>(count, new Largest<T>()))
+        .withName("Top.Largest");
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<KV<K, V>>} and returns a
+   * {@code PCollection<KV<K, List<V>>>} that contains an output
+   * element mapping each distinct key in the input
+   * {@code PCollection} to the largest {@code count} values
+   * associated with that key in the input
+   * {@code PCollection<KV<K, V>>}, in decreasing order, sorted using
+   * the given {@code Comparator<V>}.  The
+   * {@code Comparator<V>} must also be {@code Serializable}.
+   *
+   * <p> If there are fewer than {@code count} values associated with
+   * a particular key, then all those values will be in the result
+   * mapping for that key, albeit in sorted order.
+   *
+   * <p> All the values associated with a single key must fit into the
+   * memory of a single machine, but there can be many more
+   * {@code KV}s in the resulting {@code PCollection} than can fit
+   * into the memory of a single machine.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<KV<School, Student>> studentsBySchool = ...;
+   * PCollection<KV<School, List<Student>>> top10StudentsBySchool =
+   *     studentsBySchool.apply(
+   *         Top.perKey(10, new CompareStudentsByAvgGrade()));
+   * } </pre>
+   *
+   * <p> By default, the {@code Coder} of the keys of the output
+   * {@code PCollection} is the same as that of the keys of the input
+   * {@code PCollection}, and the {@code Coder} of the values of the
+   * output {@code PCollection} is a {@code ListCoder} of the
+   * {@code Coder} of the values of the input {@code PCollection}.
+   *
+   * <p> See also {@link #smallestPerKey} and {@link #largestPerKey},
+   * which sort {@code Comparable<V>} values using their natural
+   * ordering.
+   *
+   * <p> See also {@link #of}, {@link #smallest}, and {@link #largest}
+   * which take a {@code PCollection} and return the top elements.
+   */
+  public static <K, V, C extends Comparator<V> & Serializable>
+      PTransform<PCollection<KV<K, V>>, PCollection<KV<K, List<V>>>>
+      perKey(int count, C compareFn) {
+    return Combine.perKey(
+        new TopCombineFn<>(count, compareFn).<K>asKeyedFn())
+        .withName("Top.PerKey");
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<KV<K, V>>} and returns a
+   * {@code PCollection<KV<K, List<V>>>} that contains an output
+   * element mapping each distinct key in the input
+   * {@code PCollection} to the smallest {@code count} values
+   * associated with that key in the input
+   * {@code PCollection<KV<K, V>>}, in increasing order, sorted
+   * according to their natural order.
+   *
+   * <p> If there are fewer than {@code count} values associated with
+   * a particular key, then all those values will be in the result
+   * mapping for that key, albeit in sorted order.
+   *
+   * <p> All the values associated with a single key must fit into the
+   * memory of a single machine, but there can be many more
+   * {@code KV}s in the resulting {@code PCollection} than can fit
+   * into the memory of a single machine.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<KV<String, Integer>> keyedValues = ...;
+   * PCollection<KV<String, List<Integer>>> smallest10ValuesPerKey =
+   *     keyedValues.apply(Top.smallestPerKey(10));
+   * } </pre>
+   *
+   * <p> By default, the {@code Coder} of the keys of the output
+   * {@code PCollection} is the same as that of the keys of the input
+   * {@code PCollection}, and the {@code Coder} of the values of the
+   * output {@code PCollection} is a {@code ListCoder} of the
+   * {@code Coder} of the values of the input {@code PCollection}.
+   *
+   * <p> See also {@link #largestPerKey}.
+   *
+   * <p> See also {@link #perKey}, which sorts values using a user-specified
+   * {@code Comparator} function.
+   *
+   * <p> See also {@link #of}, {@link #smallest}, and {@link #largest}
+   * which take a {@code PCollection} and return the top elements.
+   */
+  public static <K, V extends Comparable<V>>
+      PTransform<PCollection<KV<K, V>>, PCollection<KV<K, List<V>>>>
+      smallestPerKey(int count) {
+    return Combine.perKey(
+        new TopCombineFn<>(count, new Smallest<V>()).<K>asKeyedFn())
+        .withName("Top.SmallestPerKey");
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<KV<K, V>>} and returns a
+   * {@code PCollection<KV<K, List<V>>>} that contains an output
+   * element mapping each distinct key in the input
+   * {@code PCollection} to the largest {@code count} values
+   * associated with that key in the input
+   * {@code PCollection<KV<K, V>>}, in decreasing order, sorted
+   * according to their natural order.
+   *
+   * <p> If there are fewer than {@code count} values associated with
+   * a particular key, then all those values will be in the result
+   * mapping for that key, albeit in sorted order.
+   *
+   * <p> All the values associated with a single key must fit into the
+   * memory of a single machine, but there can be many more
+   * {@code KV}s in the resulting {@code PCollection} than can fit
+   * into the memory of a single machine.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<KV<String, Integer>> keyedValues = ...;
+   * PCollection<KV<String, List<Integer>>> largest10ValuesPerKey =
+   *     keyedValues.apply(Top.largestPerKey(10));
+   * } </pre>
+   *
+   * <p> By default, the {@code Coder} of the keys of the output
+   * {@code PCollection} is the same as that of the keys of the input
+   * {@code PCollection}, and the {@code Coder} of the values of the
+   * output {@code PCollection} is a {@code ListCoder} of the
+   * {@code Coder} of the values of the input {@code PCollection}.
+   *
+   * <p> See also {@link #smallestPerKey}.
+   *
+   * <p> See also {@link #perKey}, which sorts values using a user-specified
+   * {@code Comparator} function.
+   *
+   * <p> See also {@link #of}, {@link #smallest}, and {@link #largest}
+   * which take a {@code PCollection} and return the top elements.
+   */
+  public static <K, V extends Comparable<V>>
+      PTransform<PCollection<KV<K, V>>, PCollection<KV<K, List<V>>>>
+      largestPerKey(int count) {
+    return Combine.perKey(
+        new TopCombineFn<>(count, new Largest<V>()).<K>asKeyedFn())
+        .withName("Top.LargestPerKey");
+  }
+
+
+  ////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * {@code CombineFn} for {@code Top} transforms that combines a
+   * bunch of {@code T}s into a single {@code count}-long
+   * {@code List<T>}, using {@code compareFn} to choose the largest
+   * {@code T}s.
+   *
+   * @param <T> type of element being compared
+   */
+  public static class TopCombineFn<T>
+      extends AccumulatingCombineFn<T, TopCombineFn<T>.Heap, List<T>> {
+
+    private final int count;
+    private final Comparator<T> compareFn;
+
+    public <C extends Comparator<T> & Serializable> TopCombineFn(
+        int count, C compareFn) {
+      if (count < 0) {
+        throw new IllegalArgumentException("count must be >= 0");
+      }
+      this.count = count;
+      this.compareFn = compareFn;
+    }
+
+    class Heap
+        // TODO: Why do I have to fully qualify the
+        // Accumulator class here?
+        extends AccumulatingCombineFn<T, TopCombineFn<T>.Heap, List<T>>
+                .Accumulator {
+
+      // Exactly one of these should be set.
+      private List<T> asList;            // ordered largest first
+      private PriorityQueue<T> asQueue;  // head is smallest
+
+      private Heap(List<T> asList) {
+        this.asList = asList;
+      }
+
+      @Override
+      public void addInput(T value) {
+        addInputInternal(value);
+      }
+
+      private boolean addInputInternal(T value) {
+        if (count == 0) {
+          // Don't add anything.
+          return false;
+        }
+
+        if (asQueue == null) {
+          asQueue = new PriorityQueue<>(count, compareFn);
+          for (T item : asList) {
+            asQueue.add(item);
+          }
+          asList = null;
+        }
+
+        if (asQueue.size() < count) {
+          asQueue.add(value);
+          return true;
+        } else if (compareFn.compare(value, asQueue.peek()) > 0) {
+          asQueue.poll();
+          asQueue.add(value);
+          return true;
+        } else {
+          return false;
+        }
+      }
+
+      @Override
+      public void mergeAccumulator(Heap accumulator) {
+        for (T value : accumulator.asList()) {
+          if (!addInputInternal(value)) {
+            // The list is ordered, remainder will also all be smaller.
+            break;
+          }
+        }
+      }
+
+      @Override
+      public List<T> extractOutput() {
+        return asList();
+      }
+
+      private List<T> asList() {
+        if (asList == null) {
+          int index = asQueue.size();
+          @SuppressWarnings("unchecked")
+          T[] ordered = (T[]) new Object[index];
+          while (!asQueue.isEmpty()) {
+            index--;
+            ordered[index] = asQueue.poll();
+          }
+          asList = Arrays.asList(ordered);
+          asQueue = null;
+        }
+        return asList;
+      }
+    }
+
+    @Override
+    public Heap createAccumulator() {
+      return new Heap(new ArrayList<T>());
+    }
+
+    @Override
+    public Coder<Heap> getAccumulatorCoder(
+        CoderRegistry registry, Coder<T> inputCoder) {
+      return new HeapCoder(inputCoder);
+    }
+
+    private class HeapCoder extends CustomCoder<Heap> {
+      private final Coder<List<T>> listCoder;
+
+      public HeapCoder(Coder<T> inputCoder) {
+        listCoder = ListCoder.of(inputCoder);
+      }
+
+      @Override
+      public void encode(Heap value, OutputStream outStream,
+          Context context) throws CoderException, IOException {
+        listCoder.encode(value.asList(), outStream, context);
+      }
+
+      @Override
+      public Heap decode(InputStream inStream, Coder.Context context)
+          throws CoderException, IOException {
+        return new Heap(listCoder.decode(inStream, context));
+      }
+
+      @Override
+      public boolean isDeterministic() {
+        return listCoder.isDeterministic();
+      }
+
+      @Override
+      public boolean isRegisterByteSizeObserverCheap(
+          Heap value, Context context) {
+        return listCoder.isRegisterByteSizeObserverCheap(
+            value.asList(), context);
+      }
+
+      @Override
+      public void registerByteSizeObserver(
+          Heap value, ElementByteSizeObserver observer, Context context)
+          throws Exception {
+        listCoder.registerByteSizeObserver(value.asList(), observer, context);
+      }
+    };
+  }
+
+  /**
+   * {@code Serializable} {@code Comparator} that that uses the
+   * compared elements' natural ordering.
+   */
+  public static class Largest<T extends Comparable<T>>
+      implements Comparator<T>, Serializable {
+    @Override
+    public int compare(T a, T b) {
+      return a.compareTo(b);
+    }
+  }
+
+  /**
+   * {@code Serializable} {@code Comparator} that that uses the
+   * reverse of the compared elements' natural ordering.
+   */
+  public static class Smallest<T extends Comparable<T>>
+      implements Comparator<T>, Serializable {
+    @Override
+    public int compare(T a, T b) {
+      return b.compareTo(a);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java
new file mode 100644
index 0000000000000..ae008b196ad3b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+/**
+ * {@code Values<V>} takes a {@code PCollection} of {@code KV<K, V>}s and
+ * returns a {@code PCollection<V>} of the values.
+ *
+ * <p> Example of use:
+ * <pre> {@code
+ * PCollection<KV<String, Long>> wordCounts = ...;
+ * PCollection<Long> counts = wordCounts.apply(Values.<String>create());
+ * } </pre>
+ *
+ * <p> Each output element has the same timestamp and is in the same windows
+ * as its corresponding input element, and the output {@code PCollection}
+ * has the same
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+ * associated with it as the input.
+ *
+ * <p> See also {@link Keys}.
+ *
+ * @param <V> the type of the values in the input {@code PCollection},
+ * and the type of the elements in the output {@code PCollection}
+ */
+public class Values<V> extends PTransform<PCollection<? extends KV<?, V>>,
+                                          PCollection<V>> {
+  /**
+   * Returns a {@code Values<V>} {@code PTransform}.
+   *
+   * @param <V> the type of the values in the input {@code PCollection},
+   * and the type of the elements in the output {@code PCollection}
+   */
+  public static <V> Values<V> create() {
+    return new Values<>();
+  }
+
+  private Values() { }
+
+  @Override
+  public PCollection<V> apply(PCollection<? extends KV<?, V>> in) {
+    return
+        in.apply(ParDo.named("Values")
+                 .of(new DoFn<KV<?, V>, V>() {
+                     @Override
+                     public void processElement(ProcessContext c) {
+                       c.output(c.element().getValue());
+                     }
+                    }));
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
new file mode 100644
index 0000000000000..d3bb863888707
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -0,0 +1,211 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.PValueBase;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Function;
+import com.google.common.collect.Iterables;
+
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * Transforms for creating {@link PCollectionView}s from {@link PCollection}s,
+ * for consuming the contents of those {@link PCollection}s as side inputs
+ * to {@link ParDo} transforms.
+ */
+public class View {
+
+  // Do not instantiate
+  private View() { }
+
+  /**
+   * Returns a {@link AsSingleton} transform that takes a singleton
+   * {@link PCollection} as input and produces a {@link PCollectionView}
+   * of the single value, to be consumed as a side input.
+   *
+   * <p> If the input {@link PCollection} is empty,
+   * throws {@link NoSuchElementException} in the consuming
+   * {@link DoFn}.
+   *
+   * <p> If the input {@link PCollection} contains more than one
+   * element, throws {@link IllegalArgumentException} in the
+   * consuming {@link DoFn}.
+   */
+  public static <T> AsSingleton<T> asSingleton() {
+    return new AsSingleton<>();
+  }
+
+  /**
+   * Returns a {@link AsIterable} that takes a
+   * {@link PCollection} as input and produces a {@link PCollectionView}
+   * of the values, to be consumed as an iterable side input.
+   */
+  public static <T> AsIterable<T> asIterable() {
+    return new AsIterable<>();
+  }
+
+
+  /**
+   * A {@PTransform} that produces a {@link PCollectionView} of a singleton {@link PCollection}
+   * yielding the single element it contains.
+   *
+   * <p> Instantiate via {@link View.asIterable}.
+   */
+  public static class AsIterable<T> extends PTransform<
+      PCollection<T>,
+      PCollectionView<Iterable<T>, Iterable<WindowedValue<T>>>> {
+
+    private AsIterable() { }
+
+    @Override
+    public PCollectionView<Iterable<T>, Iterable<WindowedValue<T>>> apply(
+        PCollection<T> input) {
+      return input.apply(
+          new CreatePCollectionView<T, Iterable<T>, Iterable<WindowedValue<T>>>(
+              new IterablePCollectionView<T>(input.getPipeline())));
+    }
+  }
+
+  /**
+   * A {@PTransform} that produces a {@link PCollectionView} of a singleton {@link PCollection}
+   * yielding the single element it contains.
+   *
+   * <p> Instantiate via {@link View.asIterable}.
+   */
+  public static class AsSingleton<T>
+      extends PTransform<PCollection<T>, PCollectionView<T, WindowedValue<T>>> {
+
+    private AsSingleton() { }
+
+    @Override
+    public PCollectionView<T, WindowedValue<T>> apply(PCollection<T> input) {
+      return input.apply(
+          new CreatePCollectionView<T, T, WindowedValue<T>>(
+            new SingletonPCollectionView<T>(input.getPipeline())));
+    }
+
+  }
+
+
+  ////////////////////////////////////////////////////////////////////////////
+  // Internal details below
+
+  /**
+   * Creates a primitive PCollectionView.
+   *
+   * <p> For internal use only.
+   *
+   * @param <R> The type of the elements of the input PCollection
+   * @param <T> The type associated with the PCollectionView used as a side input
+   * @param <WT> The type associated with a windowed side input from the
+   * PCollectionView
+   */
+  public static class CreatePCollectionView<R, T, WT>
+      extends PTransform<PCollection<R>, PCollectionView<T, WT>> {
+
+    private PCollectionView<T, WT> view;
+
+    public CreatePCollectionView(PCollectionView<T, WT> view) {
+      this.view = view;
+    }
+
+    @Override
+    public PCollectionView<T, WT> apply(PCollection<R> input) {
+      return view;
+    }
+
+    static {
+      DirectPipelineRunner.registerDefaultTransformEvaluator(
+          CreatePCollectionView.class,
+          new DirectPipelineRunner.TransformEvaluator<CreatePCollectionView>() {
+            @Override
+            public void evaluate(
+                CreatePCollectionView transform,
+                DirectPipelineRunner.EvaluationContext context) {
+              evaluateTyped(transform, context);
+            }
+
+            private <R, T, WT> void evaluateTyped(
+                CreatePCollectionView<R, T, WT> transform,
+                DirectPipelineRunner.EvaluationContext context) {
+              List<WindowedValue<R>> elems =
+                  context.getPCollectionWindowedValues(transform.getInput());
+              context.setPCollectionView(transform.getOutput(), elems);
+            }
+          });
+    }
+  }
+
+  private static class SingletonPCollectionView<T>
+      extends PCollectionViewBase<T, WindowedValue<T>> {
+
+    public SingletonPCollectionView(Pipeline pipeline) {
+      setPipelineInternal(pipeline);
+    }
+
+    @Override
+    public T fromIterableInternal(Iterable<WindowedValue<?>> contents) {
+      try {
+        return (T) Iterables.getOnlyElement(contents).getValue();
+      } catch (NoSuchElementException exc) {
+        throw new NoSuchElementException(
+            "Empty PCollection accessed as a singleton view.");
+      } catch (IllegalArgumentException exc) {
+        throw new IllegalArgumentException(
+            "PCollection with more than one element "
+            + "accessed as a singleton view.");
+      }
+    }
+  }
+
+  private static class IterablePCollectionView<T>
+      extends PCollectionViewBase<Iterable<T>, Iterable<WindowedValue<T>>> {
+
+    public IterablePCollectionView(Pipeline pipeline) {
+      setPipelineInternal(pipeline);
+    }
+
+    @Override
+    public Iterable<T> fromIterableInternal(Iterable<WindowedValue<?>> contents) {
+      return Iterables.transform(contents, new Function<WindowedValue<?>, T>() {
+        @Override
+        public T apply(WindowedValue<?> input) {
+          return (T) input.getValue();
+        }
+      });
+    }
+  }
+
+  private abstract static class PCollectionViewBase<T, WT>
+      extends PValueBase
+      implements PCollectionView<T, WT> {
+
+    @Override
+    public TupleTag<Iterable<WindowedValue<?>>> getTagInternal() {
+      return tag;
+    }
+
+    private TupleTag<Iterable<WindowedValue<?>>> tag = new TupleTag<>();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
new file mode 100644
index 0000000000000..1754c20a7916b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.reflect.TypeToken;
+
+/**
+ * {@code WithKeys<K, V>} takes a {@code PCollection<V>}, and either a
+ * constant key of type {@code K} or a function from {@code V} to
+ * {@code K}, and returns a {@code PCollection<KV<K, V>>}, where each
+ * of the values in the input {@code PCollection} has been paired with
+ * either the constant key or a key computed from the value.
+ *
+ * <p> Example of use:
+ * <pre> {@code
+ * PCollection<String> words = ...;
+ * PCollection<KV<Integer, String>> lengthsToWords =
+ *     words.apply(WithKeys.of(new SerializableFunction<String, Integer>() {
+ *         public Integer apply(String s) { return s.length(); } }));
+ * } </pre>
+ *
+ * <p> Each output element has the same timestamp and is in the same windows
+ * as its corresponding input element, and the output {@code PCollection}
+ * has the same
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+ * associated with it as the input.
+ *
+ * @param <K> the type of the keys in the output {@code PCollection}
+ * @param <V> the type of the elements in the input
+ * {@code PCollection} and the values in the output
+ * {@code PCollection}
+ */
+public class WithKeys<K, V> extends PTransform<PCollection<V>,
+                                               PCollection<KV<K, V>>> {
+  /**
+   * Returns a {@code PTransform} that takes a {@code PCollection<V>}
+   * and returns a {@code PCollection<KV<K, V>>}, where each of the
+   * values in the input {@code PCollection} has been paired with a
+   * key computed from the value by invoking the given
+   * {@code SerializableFunction}.
+   */
+  public static <K, V> WithKeys<K, V> of(SerializableFunction<V, K> fn) {
+    return new WithKeys<>(fn, null);
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes a {@code PCollection<V>}
+   * and returns a {@code PCollection<KV<K, V>>}, where each of the
+   * values in the input {@code PCollection} has been paired with the
+   * given key.
+   */
+  @SuppressWarnings("unchecked")
+  public static <K, V> WithKeys<K, V> of(final K key) {
+    return new WithKeys<>(
+        new SerializableFunction<V, K>() {
+          @Override
+          public K apply(V value) {
+            return key;
+          }
+        },
+        (Class<K>) (key == null ? null : key.getClass()));
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private SerializableFunction<V, K> fn;
+  private transient Class<K> keyClass;
+
+  private WithKeys(SerializableFunction<V, K> fn, Class<K> keyClass) {
+    this.fn = fn;
+    this.keyClass = keyClass;
+  }
+
+  @Override
+  public PCollection<KV<K, V>> apply(PCollection<V> in) {
+    Coder<K> keyCoder;
+    if (keyClass == null) {
+      keyCoder = getCoderRegistry().getDefaultOutputCoder(fn, in.getCoder());
+    } else {
+      keyCoder = getCoderRegistry().getDefaultCoder(TypeToken.of(keyClass));
+    }
+    PCollection<KV<K, V>> result =
+        in.apply(ParDo.named("AddKeys")
+                 .of(new DoFn<V, KV<K, V>>() {
+                     @Override
+                     public void processElement(ProcessContext c) {
+                       c.output(KV.of(fn.apply(c.element()),
+                                    c.element()));
+                     }
+                    }));
+    if (keyCoder != null) {
+      // TODO: Remove when we can set the coder inference context.
+      result.setCoder(KvCoder.of(keyCoder, in.getCoder()));
+    }
+    return result;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
new file mode 100644
index 0000000000000..f91d7d2ca669c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
@@ -0,0 +1,367 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.join;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.addObject;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.ListCoder;
+import com.google.cloud.dataflow.sdk.coders.MapCoder;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.cloud.dataflow.sdk.values.TupleTagList;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+/**
+ * A row result of a CoGroupByKey.  This is a tuple of Iterables produced for
+ * a given key, and these can be accessed in different ways.
+ */
+public class CoGbkResult {
+  // TODO: If we keep this representation for any amount of time,
+  // optimize it so that the union tag does not have to be repeated in the
+  // values stored under the union tag key.
+  /**
+   * A map of integer union tags to a list of union objects.
+   * Note: the key and the embedded union tag are the same, so it is redundant
+   * to store it multiple times, but for now it makes encoding easier.
+   */
+  private final Map<Integer, List<RawUnionValue>> valueMap;
+
+  private final CoGbkResultSchema schema;
+
+  /**
+   * A row in the PCollection resulting from a CoGroupByKey transform.
+   * Currently, this row must fit into memory.
+   *
+   * @param schema the set of tuple tags used to refer to input tables and
+   *               result values
+   * @param values the raw results from a group-by-key
+   */
+  @SuppressWarnings("unchecked")
+  public CoGbkResult(
+      CoGbkResultSchema schema,
+      Iterable<RawUnionValue> values) {
+    this.schema = schema;
+    valueMap = new TreeMap<>();
+    for (RawUnionValue value : values) {
+      // Make sure the given union tag has a corresponding tuple tag in the
+      // schema.
+      int unionTag = value.getUnionTag();
+      if (schema.size() <= unionTag) {
+        throw new IllegalStateException("union tag " + unionTag +
+            " has no corresponding tuple tag in the result schema");
+      }
+      List<RawUnionValue> taggedValueList = valueMap.get(unionTag);
+      if (taggedValueList == null) {
+        taggedValueList = new ArrayList<>();
+        valueMap.put(unionTag, taggedValueList);
+      }
+      taggedValueList.add(value);
+    }
+  }
+
+  public boolean isEmpty() {
+    return valueMap == null || valueMap.isEmpty();
+  }
+
+  /**
+   * Returns the schema used by this CoGbkResult.
+   */
+  public CoGbkResultSchema getSchema() {
+    return schema;
+  }
+
+  @Override
+  public String toString() {
+    return valueMap.toString();
+  }
+
+  /**
+   * Returns the values from the table represented by the given
+   * {@code TupleTag<V>} as an {@code Iterable<V>} (which may be empty if there
+   * are no results).
+   */
+  public <V> Iterable<V> getAll(TupleTag<V> tag) {
+    int index = schema.getIndex(tag);
+    if (index < 0) {
+      throw new IllegalArgumentException("TupleTag " + tag +
+          " is not in the schema");
+    }
+    List<RawUnionValue> unions = valueMap.get(index);
+    if (unions == null) {
+      return buildEmptyIterable(tag);
+    }
+    return new UnionValueIterable<>(unions);
+  }
+
+  /**
+   * If there is a singleton value for the given tag, returns it.
+   * Otherwise, throws an IllegalArgumentException.
+   */
+  public <V> V getOnly(TupleTag<V> tag) {
+    return innerGetOnly(tag, null, false);
+  }
+
+  /**
+   * If there is a singleton value for the given tag, returns it.  If there is
+   * no value for the given tag, returns the defaultValue.
+   * Otherwise, throws an IllegalArgumentException.
+   */
+  public <V> V getOnly(TupleTag<V> tag, V defaultValue) {
+    return innerGetOnly(tag, defaultValue, true);
+  }
+
+  /**
+   * A coder for CoGbkResults.
+   */
+  public static class CoGbkResultCoder extends StandardCoder<CoGbkResult> {
+
+    private final CoGbkResultSchema schema;
+    private final MapCoder<Integer, List<RawUnionValue>> mapCoder;
+
+    /**
+     * Returns a CoGbkResultCoder for the given schema and unionCoder.
+     */
+    public static CoGbkResultCoder of(
+        CoGbkResultSchema schema,
+        UnionCoder unionCoder) {
+      return new CoGbkResultCoder(schema, unionCoder);
+    }
+
+    @JsonCreator
+    public static CoGbkResultCoder of(
+        @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+        List<Coder<?>> components,
+        @JsonProperty(PropertyNames.CO_GBK_RESULT_SCHEMA) CoGbkResultSchema schema) {
+      Preconditions.checkArgument(components.size() == 1,
+          "Expecting 1 component, got " + components.size());
+      return new CoGbkResultCoder(schema, (MapCoder) components.get(0));
+    }
+
+    private CoGbkResultCoder(
+        CoGbkResultSchema tupleTags,
+        UnionCoder unionCoder) {
+      this.schema = tupleTags;
+      this.mapCoder = MapCoder.of(VarIntCoder.of(),
+          ListCoder.of(unionCoder));
+    }
+
+    private CoGbkResultCoder(
+        CoGbkResultSchema tupleTags,
+        MapCoder mapCoder) {
+      this.schema = tupleTags;
+      this.mapCoder = mapCoder;
+    }
+
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return null;
+    }
+
+    @Override
+    public List<? extends Coder<?>> getComponents() {
+      return Arrays.<Coder<?>>asList(mapCoder);
+    }
+
+    @Override
+    public CloudObject asCloudObject() {
+      CloudObject result = super.asCloudObject();
+      addObject(result, PropertyNames.CO_GBK_RESULT_SCHEMA, schema.asCloudObject());
+      return result;
+    }
+
+    @Override
+    public void encode(
+        CoGbkResult value,
+        OutputStream outStream,
+        Context context) throws CoderException,
+        IOException {
+      if (!schema.equals(value.getSchema())) {
+        throw new CoderException("input schema does not match coder schema");
+      }
+      mapCoder.encode(value.valueMap, outStream, context);
+    }
+
+    @Override
+    public CoGbkResult decode(
+        InputStream inStream,
+        Context context)
+        throws CoderException, IOException {
+      Map<Integer, List<RawUnionValue>> map = mapCoder.decode(
+          inStream, context);
+      return new CoGbkResult(schema, map);
+    }
+
+    public boolean equals(Object other) {
+      if (!super.equals(other)) {
+        return false;
+      }
+      return schema.equals(((CoGbkResultCoder) other).schema);
+    }
+
+    @Override
+    public boolean isDeterministic() {
+      return mapCoder.isDeterministic();
+    }
+  }
+
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Methods for testing purposes
+
+  /**
+   * Returns a new CoGbkResult that contains just the given tag the given data.
+   */
+  public static <V> CoGbkResult of(TupleTag<V> tag, List<V> data) {
+    return CoGbkResult.empty().and(tag, data);
+  }
+
+  /**
+   * Returns a new CoGbkResult based on this, with the given tag and given data
+   * added to it.
+   */
+  public <V> CoGbkResult and(TupleTag<V> tag, List<V> data) {
+    if (nextTestUnionId != schema.size()) {
+      throw new IllegalArgumentException(
+          "Attempting to call and() on a CoGbkResult apparently not created by"
+          + " of().");
+    }
+    Map<Integer, List<RawUnionValue>> valueMap = new TreeMap<>(this.valueMap);
+    valueMap.put(nextTestUnionId,
+        convertValueListToUnionList(nextTestUnionId, data));
+    return new CoGbkResult(
+        new CoGbkResultSchema(schema.getTupleTagList().and(tag)), valueMap,
+        nextTestUnionId + 1);
+  }
+
+  /**
+   * Returns an empty CoGbkResult.
+   */
+  public static <V> CoGbkResult empty() {
+    return new CoGbkResult(new CoGbkResultSchema(TupleTagList.empty()),
+        new TreeMap<Integer, List<RawUnionValue>>());
+  }
+
+  //////////////////////////////////////////////////////////////////////////////
+
+  private int nextTestUnionId = 0;
+
+  private CoGbkResult(
+      CoGbkResultSchema schema,
+      Map<Integer, List<RawUnionValue>> valueMap,
+      int nextTestUnionId) {
+    this(schema, valueMap);
+    this.nextTestUnionId = nextTestUnionId;
+  }
+
+  private CoGbkResult(
+      CoGbkResultSchema schema,
+      Map<Integer, List<RawUnionValue>> valueMap) {
+    this.schema = schema;
+    this.valueMap = valueMap;
+  }
+
+  private static <V> List<RawUnionValue> convertValueListToUnionList(
+      int unionTag, List<V> data) {
+    List<RawUnionValue> unionList = new ArrayList<>();
+    for (V value : data) {
+      unionList.add(new RawUnionValue(unionTag, value));
+    }
+    return unionList;
+  }
+
+  private <V> Iterable<V> buildEmptyIterable(TupleTag<V> tag) {
+    return new ArrayList<>();
+  }
+
+  private <V> V innerGetOnly(
+      TupleTag<V> tag,
+      V defaultValue,
+      boolean useDefault) {
+    int index = schema.getIndex(tag);
+    if (index < 0) {
+      throw new IllegalArgumentException("TupleTag " + tag
+          + " is not in the schema");
+    }
+    List<RawUnionValue> unions = valueMap.get(index);
+    if (unions.isEmpty()) {
+      if (useDefault) {
+        return defaultValue;
+      } else {
+        throw new IllegalArgumentException("TupleTag " + tag
+            + " corresponds to an empty result, and no default was provided");
+      }
+    }
+    if (unions.size() != 1) {
+      throw new IllegalArgumentException("TupleTag " + tag
+          + " corresponds to a non-singleton result of size " + unions.size());
+    }
+    return (V) unions.get(0).getValue();
+  }
+
+  /**
+   * Lazily converts and recasts an {@code Iterable<RawUnionValue>} into an
+   * {@code Iterable<V>}, where V is the type of the raw union value's contents.
+   */
+  private static class UnionValueIterable<V> implements Iterable<V> {
+
+    private final Iterable<RawUnionValue> unions;
+
+    private UnionValueIterable(Iterable<RawUnionValue> unions) {
+      this.unions = unions;
+    }
+
+    @Override
+    public Iterator<V> iterator() {
+      final Iterator<RawUnionValue> unionsIterator = unions.iterator();
+      return new Iterator<V>() {
+        @Override
+        public boolean hasNext() {
+          return unionsIterator.hasNext();
+        }
+
+        @Override
+        public V next() {
+          return (V) unionsIterator.next().getValue();
+        }
+
+        @Override
+        public void remove() {
+          throw new UnsupportedOperationException();
+        }
+      };
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
new file mode 100644
index 0000000000000..93883b80750c8
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.join;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.addList;
+
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.cloud.dataflow.sdk.values.TupleTagList;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+/**
+ * A schema for the results of a CoGroupByKey.  This maintains the full
+ * set of TupleTags for the results of a CoGroupByKey, and facilitates mapping
+ * between TupleTags and Union Tags (which are used as secondary keys in the
+ * CoGroupByKey).
+ */
+class CoGbkResultSchema implements Serializable {
+
+  private final TupleTagList tupleTagList;
+
+  @JsonCreator
+  public static CoGbkResultSchema of(
+      @JsonProperty(PropertyNames.TUPLE_TAGS) List<TupleTag<?>> tags) {
+    TupleTagList tupleTags = TupleTagList.empty();
+    for (TupleTag<?> tag : tags) {
+      tupleTags = tupleTags.and(tag);
+    }
+    return new CoGbkResultSchema(tupleTags);
+  }
+
+  /**
+   * Maps TupleTags to union tags.  This avoids needing to encode the tags
+   * themselves.
+   */
+  private final HashMap<TupleTag<?>, Integer> tagMap = new HashMap<>();
+
+  /**
+   * Builds a schema from a tuple of {@code TupleTag<?>}s.
+   */
+  public CoGbkResultSchema(TupleTagList tupleTagList) {
+    this.tupleTagList = tupleTagList;
+    int index = -1;
+    for (TupleTag<?> tag : tupleTagList.getAll()) {
+      index++;
+      tagMap.put(tag, index);
+    }
+  }
+
+  /**
+   * Returns the index for the given tuple tag, if the tag is present in this
+   * schema, -1 if it isn't.
+   */
+  public int getIndex(TupleTag<?> tag) {
+    Integer index = tagMap.get(tag);
+    return index == null ? -1 : index;
+  }
+
+  /**
+   * Returns the JoinTupleTag at the given index.
+   */
+  public TupleTag<?> getTag(int index) {
+    return tupleTagList.get(index);
+  }
+
+  /**
+   * Returns the number of columms for this schema.
+   */
+  public int size() {
+    return tupleTagList.getAll().size();
+  }
+
+  /**
+   * Returns the TupleTagList tuple associated with this schema.
+   */
+  public TupleTagList getTupleTagList() {
+    return tupleTagList;
+  }
+
+  public CloudObject asCloudObject() {
+    CloudObject result = CloudObject.forClass(getClass());
+    List<CloudObject> serializedTags = new ArrayList<>(tupleTagList.size());
+    for (TupleTag<?> tag : tupleTagList.getAll()) {
+      serializedTags.add(tag.asCloudObject());
+    }
+    addList(result, PropertyNames.TUPLE_TAGS, serializedTags);
+    return result;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (obj == this) {
+      return true;
+    }
+    if (!(obj instanceof CoGbkResultSchema)) {
+      return false;
+    }
+    CoGbkResultSchema other = (CoGbkResultSchema) obj;
+    return tupleTagList.getAll().equals(other.tupleTagList.getAll());
+  }
+
+  @Override
+  public int hashCode() {
+    return tupleTagList.getAll().hashCode();
+  }
+
+  @Override
+  public String toString() {
+    return "CoGbkResultSchema: " + tupleTagList.getAll();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
new file mode 100644
index 0000000000000..d81c9ef707ca7
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
@@ -0,0 +1,208 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.join;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Flatten;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.join.CoGbkResult.CoGbkResultCoder;
+import com.google.cloud.dataflow.sdk.transforms.join.KeyedPCollectionTuple.TaggedKeyedPCollection;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A transform that performs a CoGroupByKey on a tuple of tables.  A
+ * CoGroupByKey groups results from all tables by like keys into CoGbkResults,
+ * from which the results for any specific table can be accessed by the
+ * TupleTag supplied with the initial table.
+ *
+ * <p> Example of performing a CoGroupByKey followed by a ParDo that consumes
+ * the results:
+ * <pre> <code>
+ * {@literal PCollection<KV<K, V1>>} pt1 = ...;
+ * {@literal PCollection<KV<K, V2>>} pt2 = ...;
+ *
+ * final {@literal TupleTag<V1>} t1 = new {@literal TupleTag<>()};
+ * final {@literal TupleTag<V2>} t2 = new {@literal TupleTag<>()};
+ * {@literal PCollection<KV<K, CoGbkResult>>} coGbkResultCollection =
+ *   KeyedPCollectionTuple.of(t1, pt1)
+ *                        .and(t2, pt2)
+ *                        .apply({@literal CoGroupByKey.<K>create()});
+ *
+ * {@literal PCollection<T>} finalResultCollection =
+ *   coGbkResultCollection.apply(ParDo.of(
+ *     new {@literal DoFn<KV<K, CoGbkResult>, T>()} {
+ *       {@literal @}Override
+ *       public void processElement(ProcessContext c) {
+ *         {@literal KV<K, CoGbkResult>} e = c.element();
+ *         {@literal Iterable<V1>} pt1Vals = e.getValue().getAll(t1);
+ *         V2 pt2Val = e.getValue().getOnly(t2);
+ *          ... Do Something ....
+ *         c.output(...some T...);
+ *       }
+ *     }));
+ * </code> </pre>
+ *
+ * @param <K> the type of the keys in the input and output
+ * {@code PCollection}s
+ */
+public class CoGroupByKey<K> extends
+    PTransform<KeyedPCollectionTuple<K>,
+               PCollection<KV<K, CoGbkResult>>> {
+  /**
+   * Returns a {@code CoGroupByKey<K>} {@code PTransform}.
+   *
+   * @param <K> the type of the keys in the input and output
+   * {@code PCollection}s
+   */
+  public static <K> CoGroupByKey<K> create() {
+    return new CoGroupByKey<>();
+  }
+
+  private CoGroupByKey() { }
+
+  @Override
+  public PCollection<KV<K, CoGbkResult>> apply(
+      KeyedPCollectionTuple<K> input) {
+    if (input.isEmpty()) {
+      throw new IllegalArgumentException(
+          "must have at least one input to a KeyedPCollections");
+    }
+
+    // First build the union coder.
+    // TODO: Look at better integration of union types with the
+    // schema specified in the input.
+    List<Coder<?>> codersList = new ArrayList<>();
+    for (TaggedKeyedPCollection<K, ?> entry : input.getKeyedCollections()) {
+      codersList.add(getValueCoder(entry.pCollection));
+    }
+    UnionCoder unionCoder = UnionCoder.of(codersList);
+    Coder<K> keyCoder = input.getKeyCoder();
+    KvCoder<K, RawUnionValue> kVCoder =
+        KvCoder.of(keyCoder, unionCoder);
+
+    PCollectionList<KV<K, RawUnionValue>> unionTables =
+        PCollectionList.empty(getPipeline());
+
+    // TODO: Use the schema to order the indices rather than depending
+    // on the fact that the schema ordering is identical to the ordering from
+    // input.getJoinCollections().
+    int index = -1;
+    for (TaggedKeyedPCollection<K, ?> entry : input.getKeyedCollections()) {
+      index++;
+      PCollection<KV<K, RawUnionValue>> unionTable =
+          makeUnionTable(index, entry.pCollection, kVCoder);
+      unionTables = unionTables.and(unionTable);
+    }
+
+    PCollection<KV<K, RawUnionValue>> flattenedTable =
+        unionTables.apply(Flatten.<KV<K, RawUnionValue>>create());
+
+    PCollection<KV<K, Iterable<RawUnionValue>>> groupedTable =
+        flattenedTable.apply(GroupByKey.<K, RawUnionValue>create());
+
+    CoGbkResultSchema tupleTags = input.getCoGbkResultSchema();
+    PCollection<KV<K, CoGbkResult>> result = groupedTable.apply(
+        ParDo.of(new ConstructCoGbkResultFn<K>(tupleTags))
+          .named("ConstructCoGbkResultFn"));
+    result.setCoder(KvCoder.of(keyCoder,
+        CoGbkResultCoder.of(tupleTags, unionCoder)));
+
+    return result;
+  }
+
+  //////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Returns the value coder for the given PCollection.  Assumes that the value
+   * coder is an instance of {@code KvCoder<K, V>}.
+   */
+  private <V> Coder<V> getValueCoder(PCollection<KV<K, V>> pCollection) {
+    // Assumes that the PCollection uses a KvCoder.
+    Coder<?> entryCoder = pCollection.getCoder();
+    if (!(entryCoder instanceof KvCoder<?, ?>)) {
+      throw new IllegalArgumentException("PCollection does not use a KvCoder");
+    }
+    @SuppressWarnings("unchecked")
+    KvCoder<K, V> coder = (KvCoder<K, V>) entryCoder;
+    return coder.getValueCoder();
+  }
+
+  /**
+   * Returns a UnionTable for the given input PCollection, using the given
+   * union index and the given unionTableEncoder.
+   */
+  private <V> PCollection<KV<K, RawUnionValue>> makeUnionTable(
+      final int index,
+      PCollection<KV<K, V>> pCollection,
+      KvCoder<K, RawUnionValue> unionTableEncoder) {
+
+    return pCollection.apply(ParDo.of(
+        new ConstructUnionTableFn<K, V>(index)).named("MakeUnionTable"))
+                                               .setCoder(unionTableEncoder);
+  }
+
+  /**
+   * A DoFn to construct a UnionTable (i.e., a
+   * {@code PCollection<KV<K, RawUnionValue>>} from a
+   * {@code PCollection<KV<K, V>>}.
+   */
+  private static class ConstructUnionTableFn<K, V> extends
+      DoFn<KV<K, V>, KV<K, RawUnionValue>> {
+
+    private final int index;
+
+    public ConstructUnionTableFn(int index) {
+      this.index = index;
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      KV<K, ?> e = c.element();
+      c.output(KV.of(e.getKey(), new RawUnionValue(index, e.getValue())));
+    }
+  }
+
+  /**
+   * A DoFn to construct a CoGbkResult from an input grouped union
+   * table.
+    */
+  private static class ConstructCoGbkResultFn<K>
+    extends DoFn<KV<K, Iterable<RawUnionValue>>,
+                 KV<K, CoGbkResult>> {
+
+    private final CoGbkResultSchema schema;
+
+    public ConstructCoGbkResultFn(CoGbkResultSchema schema) {
+      this.schema = schema;
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      KV<K, Iterable<RawUnionValue>> e = c.element();
+      c.output(KV.of(e.getKey(), new CoGbkResult(schema, e.getValue())));
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
new file mode 100644
index 0000000000000..a9fd4b684f85b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
@@ -0,0 +1,217 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.join;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.cloud.dataflow.sdk.values.POutput;
+import com.google.cloud.dataflow.sdk.values.PValue;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.cloud.dataflow.sdk.values.TupleTagList;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * Represents an immutable tuple of keyed PCollections (i.e. PCollections of
+ * {@code KV<K, ?>}), with key type K.
+ *
+ * @param <K> the type of key shared by all constituent PCollections
+ */
+public class KeyedPCollectionTuple<K> implements PInput {
+  /**
+   * Returns an empty {@code KeyedPCollections<K>} on the given pipeline.
+   */
+  public static <K> KeyedPCollectionTuple<K> empty(Pipeline pipeline) {
+    return new KeyedPCollectionTuple<>(pipeline);
+  }
+
+  /**
+   * Returns a new {@code KeyedPCollections<K>} with the given tag and initial
+   * PCollection.
+   */
+  public static <K, VI> KeyedPCollectionTuple<K> of(
+      TupleTag<VI> tag,
+      PCollection<KV<K, VI>> pc) {
+    return new KeyedPCollectionTuple<K>(pc.getPipeline()).and(tag, pc);
+  }
+
+  /**
+   * Returns a new {@code KeyedPCollections<K>} that is the same as this,
+   * appended with the given PCollection.
+   */
+  public <V> KeyedPCollectionTuple<K> and(
+      TupleTag< V> tag,
+      PCollection<KV<K, V>> pc) {
+    if (pc.getPipeline() != getPipeline()) {
+      throw new IllegalArgumentException(
+          "PCollections come from different Pipelines");
+    }
+    TaggedKeyedPCollection<K, ?> wrapper =
+        new TaggedKeyedPCollection<>(tag, pc);
+    Coder<K> myKeyCoder = keyCoder == null ? getKeyCoder(pc) : keyCoder;
+    List<TaggedKeyedPCollection<K, ?>>
+      newKeyedCollections =
+        copyAddLast(
+            keyedCollections,
+            wrapper);
+    return new KeyedPCollectionTuple<>(
+        getPipeline(),
+        newKeyedCollections,
+        schema.getTupleTagList().and(tag),
+        myKeyCoder);
+  }
+
+  public boolean isEmpty() {
+    return keyedCollections.isEmpty();
+  }
+
+  /**
+   * Returns a list of TaggedKeyedPCollections for the PCollections contained in
+   * this {@code KeyedPCollections<K>}.
+   */
+  public List<TaggedKeyedPCollection<K, ?>> getKeyedCollections() {
+    return keyedCollections;
+  }
+
+  /**
+   * Applies the given transform to this input.
+   */
+  public <O extends POutput> O apply(
+      PTransform<KeyedPCollectionTuple<K>, O> transform) {
+    return Pipeline.applyTransform(this, transform);
+  }
+
+  /**
+   * Expands the component PCollections, stripping off any tag-specific
+   * information.
+   */
+  @Override
+  public Collection<? extends PValue> expand() {
+    List<PCollection<?>> retval = new ArrayList<>();
+    for (TaggedKeyedPCollection<K, ?> taggedPCollection : keyedCollections) {
+      retval.add(taggedPCollection.pCollection);
+    }
+    return retval;
+  }
+
+  /**
+   * Returns the KeyCoder for all PCollections in this KeyedPCollections.
+   */
+  public Coder<K> getKeyCoder() {
+    if (keyCoder == null) {
+      throw new IllegalStateException("cannot return null keyCoder");
+    }
+    return keyCoder;
+  }
+
+  /**
+   * Returns the CoGbkResultSchema associated with this
+   * KeyedPCollections.
+   */
+  public CoGbkResultSchema getCoGbkResultSchema() {
+    return schema;
+  }
+
+  @Override
+  public Pipeline getPipeline() {
+    return pipeline;
+  }
+
+  @Override
+  public void finishSpecifying() {
+    for (TaggedKeyedPCollection<K, ?> taggedPCollection : keyedCollections) {
+      taggedPCollection.pCollection.finishSpecifying();
+    }
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * A utility class to help ensure coherence of tag and input PCollection
+   * types.
+   */
+  static class TaggedKeyedPCollection<K, V> {
+    final TupleTag<V> tupleTag;
+    final PCollection<KV<K, V>> pCollection;
+
+    public TaggedKeyedPCollection(
+        TupleTag<V> tupleTag,
+        PCollection<KV<K, V>> pCollection) {
+      this.tupleTag = tupleTag;
+      this.pCollection = pCollection;
+    }
+  }
+
+  /**
+   * We use a List to properly track the order in which collections are added.
+   */
+  private final List<TaggedKeyedPCollection<K, ?>> keyedCollections;
+
+  private final Coder<K> keyCoder;
+
+  private final CoGbkResultSchema schema;
+
+  private final Pipeline pipeline;
+
+  KeyedPCollectionTuple(Pipeline pipeline) {
+    this(pipeline,
+         new ArrayList<TaggedKeyedPCollection<K, ?>>(),
+         TupleTagList.empty(),
+         null);
+  }
+
+  KeyedPCollectionTuple(
+      Pipeline pipeline,
+      List<TaggedKeyedPCollection<K, ?>> keyedCollections,
+      TupleTagList tupleTagList,
+      Coder<K> keyCoder) {
+    this.pipeline = pipeline;
+    this.keyedCollections = keyedCollections;
+    this.schema = new CoGbkResultSchema(tupleTagList);
+    this.keyCoder = keyCoder;
+  }
+
+  private static <K, V> Coder<K> getKeyCoder(PCollection<KV<K, V>> pc) {
+    // Need to run coder inference on this PCollection before inspecting it.
+    pc.finishSpecifying();
+
+    // Assumes that the PCollection uses a KvCoder.
+    Coder<?> entryCoder = pc.getCoder();
+    if (!(entryCoder instanceof KvCoder<?, ?>)) {
+      throw new IllegalArgumentException("PCollection does not use a KvCoder");
+    }
+    @SuppressWarnings("unchecked")
+    KvCoder<K, V> coder = (KvCoder<K, V>) entryCoder;
+    return coder.getKeyCoder();
+  }
+
+  private static <K> List<TaggedKeyedPCollection<K, ?>> copyAddLast(
+        List<TaggedKeyedPCollection<K, ?>> keyedCollections,
+        TaggedKeyedPCollection<K, ?> taggedCollection) {
+    List<TaggedKeyedPCollection<K, ?>> retval =
+        new ArrayList<>(keyedCollections);
+    retval.add(taggedCollection);
+    return retval;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/RawUnionValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/RawUnionValue.java
new file mode 100644
index 0000000000000..b52f8b3e49c2f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/RawUnionValue.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.join;
+
+// TODO: Think about making this a complete dynamic union by adding
+// a schema.  Type would then be defined by the corresponding schema entry.
+
+/**
+ * This corresponds to an integer union tag and value.  The mapping of
+ * union tag to type must come from elsewhere.
+ */
+class RawUnionValue {
+  private final int unionTag;
+  private final Object value;
+
+  /**
+   * Constructs a partial union from the given union tag and value.
+   */
+  public RawUnionValue(int unionTag, Object value) {
+    this.unionTag = unionTag;
+    this.value = value;
+  }
+
+  public int getUnionTag() {
+    return unionTag;
+  }
+
+  public Object getValue() {
+    return value;
+  }
+
+  @Override
+  public String toString() {
+    return unionTag + ":" + value;
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
new file mode 100644
index 0000000000000..a6bb4bcb45860
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.join;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.VarInt;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.List;
+
+/**
+ * A UnionCoder encodes RawUnionValues.
+ */
+class UnionCoder extends StandardCoder<RawUnionValue> {
+  // TODO: Think about how to integrate this with a schema object (i.e.
+  // a tuple of tuple tags).
+  /**
+   * Builds a union coder with the given list of element coders.  This list
+   * corresponds to a mapping of union tag to Coder.  Union tags start at 0.
+   */
+  public static UnionCoder of(List<Coder<?>> elementCoders) {
+    return new UnionCoder(elementCoders);
+  }
+
+  @JsonCreator
+  public static UnionCoder jsonOf(
+      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+      List<Coder<?>> elements) {
+    return UnionCoder.of(elements);
+  }
+
+  private int getIndexForEncoding(RawUnionValue union) {
+    if (union == null) {
+      throw new IllegalArgumentException("cannot encode a null tagged union");
+    }
+    int index = union.getUnionTag();
+    if (index < 0 || index >= elementCoders.size()) {
+      throw new IllegalArgumentException(
+          "union value index " + index + " not in range [0.." +
+          (elementCoders.size() - 1) + "]");
+    }
+    return index;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public void encode(
+      RawUnionValue union,
+      OutputStream outStream,
+      Context context)
+      throws IOException, CoderException  {
+    int index = getIndexForEncoding(union);
+    // Write out the union tag.
+    VarInt.encode(index, outStream);
+
+    // Write out the actual value.
+    Coder<Object> coder = (Coder<Object>) elementCoders.get(index);
+    coder.encode(
+        union.getValue(),
+        outStream,
+        context);
+  }
+
+  @Override
+  public RawUnionValue decode(InputStream inStream, Context context)
+      throws IOException, CoderException {
+    int index = VarInt.decodeInt(inStream);
+    Object value = elementCoders.get(index).decode(inStream, context);
+    return new RawUnionValue(index, value);
+  }
+
+  @Override
+  public List<? extends Coder<?>> getCoderArguments() {
+    return null;
+  }
+
+  @Override
+  public List<? extends Coder<?>> getComponents() {
+    return elementCoders;
+  }
+
+  /**
+   * Since this coder uses elementCoders.get(index) and coders that are known to run in constant
+   * time, we defer the return value to that coder.
+   */
+  @Override
+  public boolean isRegisterByteSizeObserverCheap(RawUnionValue union, Context context) {
+    int index = getIndexForEncoding(union);
+    Coder<Object> coder = (Coder<Object>) elementCoders.get(index);
+    return coder.isRegisterByteSizeObserverCheap(union.getValue(), context);
+  }
+
+  /**
+   * Notifies ElementByteSizeObserver about the byte size of the encoded value using this coder.
+   */
+  @Override
+  public void registerByteSizeObserver(
+      RawUnionValue union, ElementByteSizeObserver observer, Context context)
+      throws Exception {
+    int index = getIndexForEncoding(union);
+    // Write out the union tag.
+    observer.update(VarInt.getLength(index));
+    // Write out the actual value.
+    Coder<Object> coder = (Coder<Object>) elementCoders.get(index);
+    coder.registerByteSizeObserver(union.getValue(), observer, context);
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private final List<Coder<?>> elementCoders;
+
+  private UnionCoder(List<Coder<?>> elementCoders) {
+    this.elementCoders = elementCoders;
+  }
+
+  @Override
+  public boolean isDeterministic() {
+    for (Coder<?> elementCoder : elementCoders) {
+      if (!elementCoder.isDeterministic()) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/package-info.java
new file mode 100644
index 0000000000000..ba907ac2cd734
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/** 
+ * Defines the {@link com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey} transform
+ * for joining multiple PCollections.
+ */
+package com.google.cloud.dataflow.sdk.transforms.join;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java
new file mode 100644
index 0000000000000..b72e90e780ac0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/** 
+ * Defines {@link com.google.cloud.dataflow.sdk.transforms.PTransform}s for transforming 
+ * data in a pipeline.
+ * 
+ * <p>A {@link com.google.cloud.dataflow.sdk.transforms.PTransform} is an operation that takes an
+ * {@code Input} (some subtype of {@link com.google.cloud.dataflow.sdk.values.PInput}) 
+ * and produces an
+ * {@code Output} (some subtype of {@link com.google.cloud.dataflow.sdk.values.POutput}).
+ *
+ * <p> Common PTransforms include root PTransforms like
+ * {@link com.google.cloud.dataflow.sdk.io.TextIO.Read} and
+ * {@link com.google.cloud.dataflow.sdk.transforms.Create}, processing and
+ * conversion operations like {@link com.google.cloud.dataflow.sdk.transforms.ParDo},
+ * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey},
+ * {@link com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey},
+ * {@link com.google.cloud.dataflow.sdk.transforms.Combine}, and 
+ * {@link com.google.cloud.dataflow.sdk.transforms.Count}, and outputting
+ * PTransforms like
+ * {@link com.google.cloud.dataflow.sdk.io.TextIO.Write}.  
+ * 
+ * <p>New PTransforms can be created by composing existing PTransforms. 
+ * Most PTransforms in this package are composites, and users can also create composite PTransforms
+ * for their own application-specific logic. 
+ * 
+ */
+package com.google.cloud.dataflow.sdk.transforms;
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java
new file mode 100644
index 0000000000000..01de83f1585d0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import org.joda.time.Instant;
+
+/**
+ * A {@code BoundedWindow} represents a finite grouping of elements, with an
+ * upper bound (larger timestamps represent more recent data) on the timestamps
+ * of elements that can be placed in the window. This finiteness means that for
+ * every window, at some point in time, all data for that window will have
+ * arrived and can be processed together.
+ *
+ * <p> Windows must also implement {@link Object#equals} and
+ * {@link Object#hashCode} such that windows that are logically equal will
+ * be treated as equal by {@code equals()} and {@code hashCode()}.
+ */
+public abstract class BoundedWindow {
+  /**
+   * Returns the upper bound of timestamps for values in this window.
+   */
+  public abstract Instant maxTimestamp();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
new file mode 100644
index 0000000000000..bb0de796f86ab
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
@@ -0,0 +1,300 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+
+import org.joda.time.DateTime;
+import org.joda.time.DateTimeZone;
+import org.joda.time.Days;
+import org.joda.time.Instant;
+import org.joda.time.Months;
+import org.joda.time.Years;
+
+/**
+ * A collection of {@link WindowingFn}s that windows values into calendar-based
+ * windows such as spans of days, months, or years.
+ *
+ * <p> For example, to group data into quarters that change on the 15th, use
+ * {@code CalendarWindows.months(3).withStartingMonth(2014, 1).beginningOnDay(15)}.
+ */
+public class CalendarWindows {
+
+  /**
+   * Returns a {@link WindowingFn} that windows elements into periods measured by days.
+   *
+   * <p> For example, {@code CalendarWindows.days(1)} will window elements into
+   * separate windows for each day.
+   */
+  public static <T> DaysWindows<T> days(int number) {
+    return new DaysWindows(number, new DateTime(0, DateTimeZone.UTC), DateTimeZone.UTC);
+  }
+
+  /**
+   * Returns a {@link WindowingFn} that windows elements into periods measured by weeks.
+   *
+   * <p> For example, {@code CalendarWindows.weeks(1, DateTimeConstants.TUESDAY)} will
+   * window elements into week-long windows starting on Tuesdays.
+   */
+  public static <T> DaysWindows<T> weeks(int number, int startDayOfWeek) {
+    return new DaysWindows(
+        7 * number,
+        new DateTime(0, DateTimeZone.UTC).withDayOfWeek(startDayOfWeek),
+        DateTimeZone.UTC);
+  }
+
+  /**
+   * Returns a {@link WindowingFn} that windows elements into periods measured by months.
+   *
+   * <p> For example,
+   * {@code CalendarWindows.months(8).withStartingMonth(2014, 1).beginningOnDay(10)}
+   * will window elements into 8 month windows where that start on the 10th day of month,
+   * and the first window begins in January 2014.
+   */
+  public static <T> MonthsWindows<T> months(int number) {
+    return new MonthsWindows(number, 1, new DateTime(0, DateTimeZone.UTC), DateTimeZone.UTC);
+  }
+
+  /**
+   * Returns a {@link WindowingFn} that windows elements into periods measured by years.
+   *
+   * <p> For example,
+   * {@code CalendarWindows.years(1).withTimeZone(DateTimeZone.forId("America/Los_Angeles"))}
+   * will window elements into year-long windows that start at midnight on Jan 1, in the
+   * America/Los_Angeles time zone.
+   */
+  public static <T> YearsWindows<T> years(int number) {
+    return new YearsWindows(number, 1, 1, new DateTime(0, DateTimeZone.UTC), DateTimeZone.UTC);
+  }
+
+  /**
+   * A {@link WindowingFn} that windows elements into periods measured by days.
+   *
+   * <p> By default, periods of multiple days are measured starting at the
+   * epoch.  This can be overridden with {@link #withStartingDay}.
+   *
+   * <p> The time zone used to determine calendar boundaries is UTC, unless this
+   * is overridden with the {@link #withTimeZone} method.
+   */
+  public static class DaysWindows<T> extends PartitioningWindowingFn<T, IntervalWindow> {
+
+    public DaysWindows<T> withStartingDay(int year, int month, int day) {
+      return new DaysWindows<T>(
+          number, new DateTime(year, month, day, 0, 0, timeZone), timeZone);
+    }
+
+    public DaysWindows<T> withTimeZone(DateTimeZone timeZone) {
+      return new DaysWindows<T>(
+          number, startDate.withZoneRetainFields(timeZone), timeZone);
+    }
+
+    ////////////////////////////////////////////////////////////////////////////
+
+    private int number;
+    private DateTime startDate;
+    private DateTimeZone timeZone;
+
+    private DaysWindows(int number, DateTime startDate, DateTimeZone timeZone) {
+      this.number = number;
+      this.startDate = startDate;
+      this.timeZone = timeZone;
+    }
+
+    @Override
+    public IntervalWindow assignWindow(Instant timestamp) {
+      DateTime datetime = new DateTime(timestamp, timeZone);
+
+      int dayOffset = Days.daysBetween(startDate, datetime).getDays() / number * number;
+
+      DateTime begin = startDate.plusDays(dayOffset);
+      DateTime end = begin.plusDays(number);
+
+      return new IntervalWindow(begin.toInstant(), end.toInstant());
+    }
+
+    @Override
+    public Coder<IntervalWindow> windowCoder() {
+      return IntervalWindow.getCoder();
+    }
+
+    @Override
+    public boolean isCompatible(WindowingFn other) {
+      if (!(other instanceof DaysWindows)) {
+        return false;
+      }
+      DaysWindows that = (DaysWindows) other;
+      return number == that.number
+          && startDate == that.startDate
+          && timeZone == that.timeZone;
+    }
+  }
+
+  /**
+   * A {@link WindowingFn} that windows elements into periods measured by months.
+   *
+   * <p> By default, periods of multiple months are measured starting at the
+   * epoch.  This can be overridden with {@link #withStartingMonth}.
+   *
+   * <p> Months start on the first day of each calendar month, unless overridden by
+   * {@link #beginningOnDay}.
+   *
+   * <p> The time zone used to determine calendar boundaries is UTC, unless this
+   * is overridden with the {@link #withTimeZone} method.
+   */
+  public static class MonthsWindows<T> extends PartitioningWindowingFn<T, IntervalWindow> {
+
+    public MonthsWindows<T> beginningOnDay(int dayOfMonth) {
+      return new MonthsWindows<T>(
+          number, dayOfMonth, startDate, timeZone);
+    }
+
+    public MonthsWindows<T> withStartingMonth(int year, int month) {
+      return new MonthsWindows<T>(
+          number, dayOfMonth, new DateTime(year, month, 1, 0, 0, timeZone), timeZone);
+    }
+
+    public MonthsWindows<T> withTimeZone(DateTimeZone timeZone) {
+      return new MonthsWindows<T>(
+          number, dayOfMonth, startDate.withZoneRetainFields(timeZone), timeZone);
+    }
+
+    ////////////////////////////////////////////////////////////////////////////
+
+    private int number;
+    private int dayOfMonth;
+    private DateTime startDate;
+    private DateTimeZone timeZone;
+
+    private MonthsWindows(int number, int dayOfMonth, DateTime startDate, DateTimeZone timeZone) {
+      this.number = number;
+      this.dayOfMonth = dayOfMonth;
+      this.startDate = startDate;
+      this.timeZone = timeZone;
+    }
+
+    @Override
+    public IntervalWindow assignWindow(Instant timestamp) {
+      DateTime datetime = new DateTime(timestamp, timeZone);
+
+      int monthOffset =
+          Months.monthsBetween(startDate.withDayOfMonth(dayOfMonth), datetime).getMonths()
+          / number * number;
+
+      DateTime begin = startDate.withDayOfMonth(dayOfMonth).plusMonths(monthOffset);
+      DateTime end = begin.plusMonths(number);
+
+      return new IntervalWindow(begin.toInstant(), end.toInstant());
+    }
+
+    @Override
+    public Coder<IntervalWindow> windowCoder() {
+      return IntervalWindow.getCoder();
+    }
+
+    @Override
+    public boolean isCompatible(WindowingFn other) {
+      if (!(other instanceof MonthsWindows)) {
+        return false;
+      }
+      MonthsWindows that = (MonthsWindows) other;
+      return number == that.number
+          && dayOfMonth == dayOfMonth
+          && startDate == that.startDate
+          && timeZone == that.timeZone;
+    }
+  }
+
+  /**
+   * A {@link WindowingFn} that windows elements into periods measured by years.
+   *
+   * <p> By default, periods of multiple years are measured starting at the
+   * epoch.  This can be overridden with {@link #withStartingYear}.
+   *
+   * <p> Years start on the first day of each calendar year, unless overridden by
+   * {@link #beginningOnDay}.
+   *
+   * <p> The time zone used to determine calendar boundaries is UTC, unless this
+   * is overridden with the {@link #withTimeZone} method.
+   */
+  public static class YearsWindows<T> extends PartitioningWindowingFn<T, IntervalWindow> {
+
+    public YearsWindows<T> beginningOnDay(int monthOfYear, int dayOfMonth) {
+      return new YearsWindows<T>(
+          number, monthOfYear, dayOfMonth, startDate, timeZone);
+    }
+
+    public YearsWindows<T> withStartingYear(int year) {
+      return new YearsWindows<T>(
+          number, monthOfYear, dayOfMonth, new DateTime(year, 1, 1, 0, 0, timeZone), timeZone);
+    }
+
+    public YearsWindows<T> withTimeZone(DateTimeZone timeZone) {
+      return new YearsWindows<T>(
+          number, monthOfYear, dayOfMonth, startDate.withZoneRetainFields(timeZone), timeZone);
+    }
+
+    ////////////////////////////////////////////////////////////////////////////
+
+    private int number;
+    private int monthOfYear;
+    private int dayOfMonth;
+    private DateTime startDate;
+    private DateTimeZone timeZone;
+
+    private YearsWindows(
+        int number, int monthOfYear, int dayOfMonth, DateTime startDate, DateTimeZone timeZone) {
+      this.number = number;
+      this.monthOfYear = monthOfYear;
+      this.dayOfMonth = dayOfMonth;
+      this.startDate = startDate;
+      this.timeZone = timeZone;
+    }
+
+    @Override
+    public IntervalWindow assignWindow(Instant timestamp) {
+      DateTime datetime = new DateTime(timestamp, timeZone);
+
+      DateTime offsetStart = startDate.withMonthOfYear(monthOfYear).withDayOfMonth(dayOfMonth);
+
+      int yearOffset =
+          Years.yearsBetween(offsetStart, datetime).getYears() / number * number;
+
+      DateTime begin = offsetStart.plusYears(yearOffset);
+      DateTime end = begin.plusYears(number);
+
+      return new IntervalWindow(begin.toInstant(), end.toInstant());
+    }
+
+    @Override
+    public Coder<IntervalWindow> windowCoder() {
+      return IntervalWindow.getCoder();
+    }
+
+    @Override
+    public boolean isCompatible(WindowingFn other) {
+      if (!(other instanceof YearsWindows)) {
+        return false;
+      }
+      YearsWindows that = (YearsWindows) other;
+      return number == that.number
+          && monthOfYear == monthOfYear
+          && dayOfMonth == dayOfMonth
+          && startDate == that.startDate
+          && timeZone == that.timeZone;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
new file mode 100644
index 0000000000000..ea7a22c8fc41a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+/**
+ * A {@link WindowingFn} that windows values into fixed-size timestamp-based windows.
+ *
+ * <p> For example, in order to partition the data into 10 minute windows:
+ * <pre> {@code
+ * PCollection<Integer> items = ...;
+ * PCollection<Integer> windowedItems = items.apply(
+ *   Window.<Integer>by(FixedWindows.of(Duration.standardMinutes(10))));
+ * } </pre>
+ */
+public class FixedWindows extends PartitioningWindowingFn<Object, IntervalWindow> {
+
+  /**
+   * Size of this window.
+   */
+  private final Duration size;
+
+  /**
+   * Offset of this window.  Windows start at time
+   * N * size + offset, where 0 is the epoch.
+   */
+  private final Duration offset;
+
+  /**
+   * Partitions the timestamp space into half-open intervals of the form
+   * [N * size, (N + 1) * size), where 0 is the epoch.
+   */
+  public static FixedWindows of(Duration size) {
+    return new FixedWindows(size, Duration.ZERO);
+  }
+
+  /**
+   * Partitions the timestamp space into half-open intervals of the form
+   * [N * size + offset, (N + 1) * size + offset),
+   * where 0 is the epoch.
+   *
+   * @throws IllegalAgumentException if offset is not in [0, size)
+   */
+  public FixedWindows withOffset(Duration offset) {
+    return new FixedWindows(size, offset);
+  }
+
+  private FixedWindows(Duration size, Duration offset) {
+    if (offset.isShorterThan(Duration.ZERO) || !offset.isShorterThan(size)) {
+      throw new IllegalArgumentException(
+          "FixedWindows WindowingStrategies must have 0 <= offset < size");
+    }
+    this.size = size;
+    this.offset = offset;
+  }
+
+  @Override
+  public IntervalWindow assignWindow(Instant timestamp) {
+    long start = timestamp.getMillis()
+        - timestamp.plus(size).minus(offset).getMillis() % size.getMillis();
+    return new IntervalWindow(new Instant(start), size);
+  }
+
+  @Override
+  public Coder<IntervalWindow> windowCoder() {
+    return IntervalWindow.getFixedSizeCoder(size);
+  }
+
+  @Override
+  public boolean isCompatible(WindowingFn other) {
+    return (other instanceof FixedWindows)
+        && (size.equals(((FixedWindows) other).size))
+        && (offset.equals(((FixedWindows) other).offset));
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
new file mode 100644
index 0000000000000..bfcb9c7fa1595
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+
+import org.joda.time.Instant;
+
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.Collection;
+
+/**
+ * Default {@link WindowingFn} where all data is in the same bucket.
+ */
+public class GlobalWindow
+    extends NonMergingWindowingFn<Object, GlobalWindow.Window> {
+  @Override
+  public Collection<Window> assignWindows(AssignContext c) {
+    return Arrays.asList(Window.INSTANCE);
+  }
+
+  @Override
+  public boolean isCompatible(WindowingFn o) {
+    return o instanceof GlobalWindow;
+  }
+
+  @Override
+  public Coder<Window> windowCoder() {
+    return Window.Coder.INSTANCE;
+  }
+
+  /**
+   * The default window into which all data is placed.
+   */
+  public static class Window extends BoundedWindow {
+    public static Window INSTANCE = new Window();
+
+    @Override
+    public Instant maxTimestamp() {
+      return new Instant(Long.MAX_VALUE);
+    }
+
+    private Window() {}
+
+    /**
+     * {@link Coder} for encoding and decoding {@code Window}s.
+     */
+    public static class Coder extends AtomicCoder<Window> {
+      public static Coder INSTANCE = new Coder();
+
+      @Override
+      public void encode(Window window, OutputStream outStream, Context context) {}
+
+      @Override
+      public Window decode(InputStream inStream, Context context) {
+        return Window.INSTANCE;
+      }
+
+      @Override
+      public boolean isDeterministic() {
+        return true;
+      }
+
+      private Coder() {}
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
new file mode 100644
index 0000000000000..8ac23501c97e5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
@@ -0,0 +1,257 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudDuration;
+import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudDuration;
+
+import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.InstantCoder;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+/**
+ * An implementation of {@link BoundedWindow} that represents an interval from
+ * {@link #start} (inclusive) to {@link #end} (exclusive).
+ */
+public class IntervalWindow extends BoundedWindow
+    implements Comparable<IntervalWindow> {
+  /**
+   * Start of the interval, inclusive.
+   */
+  private final Instant start;
+
+  /**
+   * End of the interval, exclusive.
+   */
+  private final Instant end;
+
+  /**
+   * Creates a new IntervalWindow that represents the half-open time
+   * interval [start, end).
+   */
+  public IntervalWindow(Instant start, Instant end) {
+    this.start = start;
+    this.end = end;
+  }
+
+  public IntervalWindow(Instant start, Duration size) {
+    this.start = start;
+    this.end = start.plus(size);
+  }
+
+  /**
+   * Returns the start of this window, inclusive.
+   */
+  public Instant start() {
+    return start;
+  }
+
+  /**
+   * Returns the end of this window, exclusive.
+   */
+  public Instant end() {
+    return end;
+  }
+
+  /**
+   * Returns the largest timestamp that can be included in this window.
+   */
+  @Override
+  public Instant maxTimestamp() {
+    // end not inclusive
+    return end.minus(1);
+  }
+
+  /**
+   * Returns whether this window contains the given window.
+   */
+  public boolean contains(IntervalWindow other) {
+    return !this.start.isAfter(other.start) && !this.end.isBefore(other.end);
+  }
+
+  /**
+   * Returns whether this window is disjoint from the given window.
+   */
+  public boolean isDisjoint(IntervalWindow other) {
+    return !this.end.isAfter(other.start) || !other.end.isAfter(this.start);
+  }
+
+  /**
+   * Returns whether this window intersects the given window.
+   */
+  public boolean intersects(IntervalWindow other) {
+    return !isDisjoint(other);
+  }
+
+  /**
+   * Returns the minimal window that includes both this window and
+   * the given window.
+   */
+  public IntervalWindow span(IntervalWindow other) {
+    return new IntervalWindow(
+        new Instant(Math.min(start.getMillis(), other.start.getMillis())),
+        new Instant(Math.max(end.getMillis(), other.end.getMillis())));
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    return (o instanceof IntervalWindow)
+        && ((IntervalWindow) o).end.isEqual(end)
+        && ((IntervalWindow) o).start.isEqual(start);
+  }
+
+  @Override
+  public int hashCode() {
+    // The end values are themselves likely to be arithmetic sequence,
+    // which is a poor distribution to use for a hashtable, so we
+    // add a highly non-linear transformation.
+    return (int)
+        (start.getMillis() + modInverse((int) (end.getMillis() << 1) + 1));
+  }
+
+  /**
+   * Compute the inverse of (odd) x mod 2^32.
+   */
+  private int modInverse(int x) {
+    // Cube gives inverse mod 2^4, as x^4 == 1 (mod 2^4) for all odd x.
+    int inverse = x * x * x;
+    // Newton iteration doubles correct bits at each step.
+    inverse *= 2 - x * inverse;
+    inverse *= 2 - x * inverse;
+    inverse *= 2 - x * inverse;
+    return inverse;
+  }
+
+  @Override
+  public String toString() {
+    return "[" + start + ".." + end + ")";
+  }
+
+  @Override
+  public int compareTo(IntervalWindow o) {
+    if (start.isEqual(o.start)) {
+      return end.compareTo(o.end);
+    }
+    return start.compareTo(o.start);
+  }
+
+  /**
+   * Returns a Coder suitable for encoding IntervalWindows.
+   */
+  public static Coder<IntervalWindow> getCoder() {
+    return IntervalWindowCoder.of();
+  }
+
+  /**
+   * Returns a Coder for encoding interval windows of fixed size (which
+   * is more efficient than {@link #getCoder()} as it only needs to encode
+   * one endpoint).
+   */
+  public static Coder<IntervalWindow> getFixedSizeCoder(final Duration size) {
+    return FixedSizeIntervalWindowCoder.of(size);
+  }
+
+  private static class IntervalWindowCoder extends AtomicCoder<IntervalWindow> {
+    private static final IntervalWindowCoder INSTANCE =
+        new IntervalWindowCoder();
+    private static final Coder<Instant> instantCoder = InstantCoder.of();
+
+    @JsonCreator
+    public static IntervalWindowCoder of() {
+      return INSTANCE;
+    }
+
+    @Override
+    public void encode(IntervalWindow window,
+                       OutputStream outStream,
+                       Context context)
+        throws IOException, CoderException {
+      instantCoder.encode(window.start, outStream, context.nested());
+      instantCoder.encode(window.end, outStream, context.nested());
+    }
+
+    @Override
+    public IntervalWindow decode(InputStream inStream, Context context)
+        throws IOException, CoderException {
+      Instant start = instantCoder.decode(inStream, context.nested());
+      Instant end = instantCoder.decode(inStream, context.nested());
+      return new IntervalWindow(start, end);
+    }
+
+    @Override
+    public boolean isDeterministic() { return true; }
+  }
+
+  private static class FixedSizeIntervalWindowCoder
+      extends AtomicCoder<IntervalWindow> {
+    private static final Coder<Instant> instantCoder = InstantCoder.of();
+
+    private final Duration size;
+
+    @JsonCreator
+    public static FixedSizeIntervalWindowCoder of(
+        @JsonProperty("duration") String duration) {
+      return of(fromCloudDuration(duration));
+    }
+
+    public static FixedSizeIntervalWindowCoder of(Duration size) {
+      return new FixedSizeIntervalWindowCoder(size);
+    }
+
+    private FixedSizeIntervalWindowCoder(Duration size) {
+      this.size = size;
+    }
+
+    @Override
+    public void encode(IntervalWindow window,
+                       OutputStream outStream,
+                       Context context)
+        throws IOException, CoderException {
+      instantCoder.encode(window.start, outStream, context);
+    }
+
+    @Override
+    public IntervalWindow decode(InputStream inStream, Context context)
+        throws IOException, CoderException {
+      Instant start = instantCoder.decode(inStream, context);
+      return new IntervalWindow(start, size);
+    }
+
+    @Override
+    public boolean isDeterministic() { return true; }
+
+    @Override
+    public CloudObject asCloudObject() {
+      CloudObject result = super.asCloudObject();
+      addString(result, "duration", toCloudDuration(Duration.millis(size.getMillis())));
+      return result;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindowingFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindowingFn.java
new file mode 100644
index 0000000000000..7ad7f29f6655a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindowingFn.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+
+import java.util.Collection;
+
+/**
+ * A {@link WindowingFn} that represents an invalid pipeline state.
+ *
+ * @param <W> window type
+ */
+public class InvalidWindowingFn<W extends BoundedWindow> extends WindowingFn<Object, W> {
+  private String cause;
+  private WindowingFn<?, W> originalWindowingFn;
+
+  public InvalidWindowingFn(String cause, WindowingFn<?, W> originalWindowingFn) {
+    this.originalWindowingFn = originalWindowingFn;
+    this.cause = cause;
+  }
+
+  /**
+   * Returns the reason that this {@code WindowingFn} is invalid.
+   */
+  public String getCause() {
+    return cause;
+  }
+
+  /**
+   * Returns the original windowingFn that this InvalidWindowingFn replaced.
+   */
+  public WindowingFn<?, W> getOriginalWindowingFn() {
+    return originalWindowingFn;
+  }
+  
+  @Override
+  public Collection<W> assignWindows(AssignContext c) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void mergeWindows(MergeContext c) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public Coder<W> windowCoder() {
+    return originalWindowingFn.windowCoder();
+  }
+
+  /**
+   * {@code InvalidWindowingFn} objects with the same {@code originalWindowingFn} are compatible.
+   */
+  @Override
+  public boolean isCompatible(WindowingFn other) {
+    return getClass() == other.getClass()
+        && getOriginalWindowingFn().isCompatible(
+            ((InvalidWindowingFn) other).getOriginalWindowingFn());
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/MergeOverlappingIntervalWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/MergeOverlappingIntervalWindows.java
new file mode 100644
index 0000000000000..4d4dd8492684d
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/MergeOverlappingIntervalWindows.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * A {@link WindowingFn} that merges overlapping {@link IntervalWindow}s.
+ */
+public class MergeOverlappingIntervalWindows {
+
+  /**
+   * Merge overlapping intervals.
+   */
+  public static void mergeWindows(WindowingFn<?, IntervalWindow>.MergeContext c) throws Exception {
+    // Merge any overlapping windows into a single window.
+    // Sort the list of existing windows so we only have to
+    // traverse the list once rather than considering all
+    // O(n^2) window pairs.
+    List<IntervalWindow> sortedWindows = new ArrayList<>();
+    for (IntervalWindow window : c.windows()) {
+      sortedWindows.add(window);
+    }
+    Collections.sort(sortedWindows);
+    List<MergeCandidate> merges = new ArrayList<>();
+    MergeCandidate current = new MergeCandidate();
+    for (IntervalWindow window : sortedWindows) {
+      if (current.intersects(window)) {
+        current.add(window);
+      } else {
+        merges.add(current);
+        current = new MergeCandidate(window);
+      }
+    }
+    merges.add(current);
+    for (MergeCandidate merge : merges) {
+      merge.apply(c);
+    }
+  }
+
+  private static class MergeCandidate {
+    private IntervalWindow union;
+    private final List<IntervalWindow> parts;
+    public MergeCandidate() {
+      parts = new ArrayList<>();
+    }
+    public MergeCandidate(IntervalWindow window) {
+      union = window;
+      parts = new ArrayList<>(Arrays.asList(window));
+    }
+    public boolean intersects(IntervalWindow window) {
+      return union == null || union.intersects(window);
+    }
+    public void add(IntervalWindow window) {
+      union = union == null ? window : union.span(window);
+      parts.add(window);
+    }
+    public void apply(WindowingFn<?, IntervalWindow>.MergeContext c) throws Exception {
+      if (parts.size() > 1) {
+        c.merge(parts, union);
+      }
+    }
+
+    @Override
+    public String toString() {
+      return "MergeCandidate[union=" + union + ", parts=" + parts + "]";
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowingFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowingFn.java
new file mode 100644
index 0000000000000..ffeea996d60dc
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowingFn.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+/**
+ * Abstract base class for {@link WindowingFn}s that do not merge windows.
+ *
+ * @param <T> type of elements being windowed
+ * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
+ *            {@code WindowingFn}
+ */
+public abstract class NonMergingWindowingFn<T, W extends BoundedWindow>
+    extends WindowingFn<T, W> {
+
+  @Override
+  public final void mergeWindows(MergeContext c) { }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowingFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowingFn.java
new file mode 100644
index 0000000000000..6a65ba134f181
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowingFn.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import org.joda.time.Instant;
+
+import java.util.Arrays;
+import java.util.Collection;
+
+/**
+ * A {@link WindowingFn} that places each value into exactly one window
+ * based on its timestamp and never merges windows.
+ *
+ * @param <T> type of elements being windowed
+ * @param <W> window type
+ */
+public abstract class PartitioningWindowingFn<T, W extends BoundedWindow>
+    extends NonMergingWindowingFn<T, W> {
+  /**
+   * Returns the single window to which elements with this timestamp belong.
+   */
+  public abstract W assignWindow(Instant timestamp);
+
+  @Override
+  public final Collection<W> assignWindows(AssignContext c) {
+    return Arrays.asList(assignWindow(c.timestamp()));
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
new file mode 100644
index 0000000000000..47f8a08005830
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+
+import org.joda.time.Duration;
+
+import java.util.Arrays;
+import java.util.Collection;
+
+/**
+ * A WindowingFn windowing values into sessions separated by {@link #gapDuration}-long
+ * periods with no elements.
+ *
+ * <p> For example, in order to window data into session with at least 10 minute
+ * gaps in between them:
+ * <pre> {@code
+ * PCollection<Integer> pc = ...;
+ * PCollection<Integer> windowed_pc = pc.apply(
+ *   Window.<Integer>by(Sessions.withGapDuration(Duration.standardMinutes(10))));
+ * } </pre>
+ */
+public class Sessions extends WindowingFn<Object, IntervalWindow> {
+
+  /**
+   * Duration of the gaps between sessions.
+   */
+  private final Duration gapDuration;
+
+  /**
+   * Creates a {@code Sessions} {@link WindowingFn} with the specified gap duration.
+   */
+  public static Sessions withGapDuration(Duration gapDuration) {
+    return new Sessions(gapDuration);
+  }
+
+  /**
+   * Creates a {@code Sessions} {@link WindowingFn} with the specified gap duration.
+   */
+  private Sessions(Duration gapDuration) {
+    this.gapDuration = gapDuration;
+  }
+
+  @Override
+  public Collection<IntervalWindow> assignWindows(AssignContext c) {
+    // Assign each element into a window from its timestamp until gapDuration in the
+    // future.  Overlapping windows (representing elements within gapDuration of
+    // each other) will be merged.
+    return Arrays.asList(new IntervalWindow(c.timestamp(), gapDuration));
+  }
+
+  @Override
+  public void mergeWindows(MergeContext c) throws Exception {
+    MergeOverlappingIntervalWindows.mergeWindows(c);
+  }
+
+  @Override
+  public Coder<IntervalWindow> windowCoder() {
+    return IntervalWindow.getCoder();
+  }
+
+  @Override
+  public boolean isCompatible(WindowingFn other) {
+    return other instanceof Sessions;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
new file mode 100644
index 0000000000000..6643289071ef5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * A WindowingFn that windows values into possibly overlapping fixed-size
+ * timestamp-based windows.
+ *
+ * <p> For example, in order to window data into 10 minute windows that
+ * update every minute:
+ * <pre> {@code
+ * PCollection<Integer> items = ...;
+ * PCollection<Integer> windowedItems = items.apply(
+ *   Window.<Integer>by(SlidingWindows.of(Duration.standardMinutes(10))));
+ * } </pre>
+ */
+public class SlidingWindows extends NonMergingWindowingFn<Object, IntervalWindow> {
+
+  /**
+   * Amount of time between generated windows.
+   */
+  private final Duration period;
+
+  /**
+   * Size of the generated windows.
+   */
+  private final Duration size;
+
+  /**
+   * Offset of the generated windows.
+   * Windows start at time N * start + offset, where 0 is the epoch.
+   */
+  private final Duration offset;
+
+  /**
+   * Assigns timestamps into half-open intervals of the form
+   * [N * period, N * period + size), where 0 is the epoch.
+   *
+   * <p> If {@link SlidingWindows#every} is not called, the period defaults
+   * to one millisecond.
+   */
+  public static SlidingWindows of(Duration size) {
+    return new SlidingWindows(new Duration(1), size, Duration.ZERO);
+  }
+
+  /**
+   * Returns a new {@code SlidingWindows} with the original size, that assigns
+   * timestamps into half-open intervals of the form
+   * [N * period, N * period + size), where 0 is the epoch.
+   */
+  public SlidingWindows every(Duration period) {
+    return new SlidingWindows(period, size, offset);
+  }
+
+  /**
+   * Assigns timestamps into half-open intervals of the form
+   * [N * period + offset, N * period + offset + size).
+   *
+   * @throws IllegalArgumentException if offset is not in [0, period)
+   */
+  public SlidingWindows withOffset(Duration offset) {
+    return new SlidingWindows(period, size, offset);
+  }
+
+  private SlidingWindows(Duration period, Duration size, Duration offset) {
+    if (offset.isShorterThan(Duration.ZERO)
+        || !offset.isShorterThan(period)
+        || !size.isLongerThan(Duration.ZERO)) {
+      throw new IllegalArgumentException(
+          "SlidingWindows WindowingStrategies must have 0 <= offset < period and 0 < size");
+    }
+    this.period = period;
+    this.size = size;
+    this.offset = offset;
+  }
+
+  @Override
+  public Coder<IntervalWindow> windowCoder() {
+    return IntervalWindow.getFixedSizeCoder(size);
+  }
+
+  @Override
+  public Collection<IntervalWindow> assignWindows(AssignContext c) {
+    List<IntervalWindow> windows =
+        new ArrayList<>((int) (size.getMillis() / period.getMillis()));
+    Instant timestamp = c.timestamp();
+    long lastStart = timestamp.getMillis()
+        - timestamp.plus(period).minus(offset).getMillis() % period.getMillis();
+    for (long start = lastStart;
+         start > timestamp.minus(size).getMillis();
+         start -= period.getMillis()) {
+      windows.add(new IntervalWindow(new Instant(start), size));
+    }
+    return windows;
+  }
+
+  @Override
+  public boolean isCompatible(WindowingFn other) {
+    if (other instanceof SlidingWindows) {
+      SlidingWindows that = (SlidingWindows) other;
+      return period.equals(that.period)
+        && size.equals(that.size)
+        && offset.equals(that.offset);
+    } else {
+      return false;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
new file mode 100644
index 0000000000000..68796c908aba5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import static com.google.cloud.dataflow.sdk.util.SerializableUtils.serializeToByteArray;
+import static com.google.cloud.dataflow.sdk.util.StringUtils.byteArrayToJsonString;
+import static com.google.cloud.dataflow.sdk.util.StringUtils.jsonStringToByteArray;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
+import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.PTuple;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.StringUtils;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * {@code Window} logically divides up or groups the elements of a
+ * {@link PCollection} into finite windows according to a {@link WindowingFn}.
+ * The output of {@code Window} contains the same elements as input, but they
+ * have been logically assigned to windows. The next
+ * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}s, including one
+ * within composite transforms, will group by the combination of keys and
+ * windows.
+
+ * <p> See {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}
+ * for more information about how grouping with windows works.
+ *
+ * <p> Windowing a {@code PCollection} allows chunks of it to be processed
+ * individually, before the entire {@code PCollection} is available.  This is
+ * especially important for {@code PCollection}s with unbounded size,
+ * since the full {@code PCollection} is
+ * never available at once, since more data is continually arriving.
+ * For {@code PCollection}s with a bounded size (aka. conventional batch mode),
+ * by default, all data is implicitly in a single window, unless
+ * {@code Window} is applied.
+ *
+ * <p> For example, a simple form of windowing divides up the data into
+ * fixed-width time intervals, using {@link FixedWindows}.
+ * The following example demonstrates how to use {@code Window} in a pipeline
+ * that counts the number of occurrences of strings each minute:
+ *
+ * <pre> {@code
+ * PCollection<String> items = ...;
+ * PCollection<String> windowed_items = item.apply(
+ *   Window.<String>into(FixedWindows.of(1, TimeUnit.MINUTES)));
+ * PCollection<KV<String, Long>> windowed_counts = windowed_items.apply(
+ *   Count.<String>create());
+ * } </pre>
+ *
+ * <p> Let (data, timestamp) denote a data element along with its timestamp.
+ * Then, if the input to this pipeline consists of
+ * {("foo", 15s), ("bar", 30s), ("foo", 45s), ("foo", 1m30s)},
+ * the output will be
+ * {(KV("foo", 2), 1m), (KV("bar", 1), 1m), (KV("foo", 1), 2m)}
+ *
+ *
+ * <p> Several predefined {@link WindowingFn}s are provided:
+ * <ul>
+ *  <li> {@link FixedWindows} partitions the timestamps into fixed-width intervals.
+ *  <li> {@link SlidingWindows} places data into overlapping fixed-width intervals.
+ *  <li> {@link Sessions} groups data into sessions where each item in a window
+ *       is separated from the next by no more than a specified gap.
+ * </ul>
+ *
+ * Additionally, custom {@link WindowingFn}s can be created, by creating new
+ * subclasses of {@link WindowingFn}.
+ */
+public class Window {
+  /**
+   * Creates a {@code Window} {@code PTransform} with the given name.
+   *
+   * <p> See the discussion of Naming in
+   * {@link com.google.cloud.dataflow.sdk.transforms.ParDo} for more explanation.
+   *
+   * <p> The resulting {@code PTransform} is incomplete, and its input/output
+   * type is not yet bound.  Use {@link Window.Unbound#into} to specify the
+   * {@link WindowingFn} to use, which will also bind the input/output type of this
+   * {@code PTransform}.
+   */
+  public static Unbound named(String name) {
+    return new Unbound().named(name);
+  }
+
+  /**
+   * Creates a {@code Window} {@code PTransform} that uses the given
+   * {@link WindowingFn} to window the data.
+   *
+   * <p> The resulting {@code PTransform}'s types have been bound, with both the
+   * input and output being a {@code PCollection<T>}, inferred from the types of
+   * the argument {@code WindowingFn<T, B>}.  It is ready to be applied, or further
+   * properties can be set on it first.
+   */
+  public static <T> Bound<T> into(WindowingFn<? super T, ?> fn) {
+    return new Unbound().into(fn);
+  }
+
+  /**
+   * An incomplete {@code Window} transform, with unbound input/output type.
+   *
+   * <p> Before being applied, {@link Window.Unbound#into} must be
+   * invoked to specify the {@link WindowingFn} to invoke, which will also
+   * bind the input/output type of this {@code PTransform}.
+   */
+  public static class Unbound {
+    String name;
+
+    Unbound() {}
+
+    Unbound(String name) {
+      this.name = name;
+    }
+
+    /**
+     * Returns a new {@code Window} transform that's like this
+     * transform but with the specified name.  Does not modify this
+     * transform.  The resulting transform is still incomplete.
+     *
+     * <p> See the discussion of Naming in
+     * {@link com.google.cloud.dataflow.sdk.transforms.ParDo} for more
+     * explanation.
+     */
+    public Unbound named(String name) {
+      return new Unbound(name);
+    }
+
+    /**
+     * Returns a new {@code Window} {@code PTransform} that's like this
+     * transform but which will use the given {@link WindowingFn}, and which has
+     * its input and output types bound.  Does not modify this transform.  The
+     * resulting {@code PTransform} is sufficiently specified to be applied,
+     * but more properties can still be specified.
+     */
+    public <T> Bound<T> into(WindowingFn<? super T, ?> fn) {
+      return new Bound<>(name, fn);
+    }
+  }
+
+  /**
+   * A {@code PTransform} that windows the elements of a {@code PCollection<T>},
+   * into finite windows according to a user-specified {@code WindowingFn<T, B>}.
+   *
+   * @param <T> The type of elements this {@code Window} is applied to
+   */
+  public static class Bound<T> extends PTransform<PCollection<T>, PCollection<T>> {
+    WindowingFn<? super T, ?> fn;
+
+    Bound(String name, WindowingFn<? super T, ?> fn) {
+      this.name = name;
+      this.fn = fn;
+    }
+
+    /**
+     * Returns a new {@code Window} {@code PTransform} that's like this
+     * {@code PTransform} but with the specified name.  Does not
+     * modify this {@code PTransform}.
+     *
+     * <p> See the discussion of Naming in
+     * {@link com.google.cloud.dataflow.sdk.transforms.ParDo} for more
+     * explanation.
+     */
+    public Bound<T> named(String name) {
+      return new Bound<>(name, fn);
+    }
+
+    @Override
+    public PCollection<T> apply(PCollection<T> input) {
+      return PCollection.<T>createPrimitiveOutputInternal(fn);
+    }
+
+    @Override
+    protected Coder<?> getDefaultOutputCoder() {
+      return getInput().getCoder();
+    }
+
+    @Override
+    protected String getKindString() {
+      return "Window.Into(" + StringUtils.approximateSimpleName(fn.getClass()) + ")";
+    }
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Creates a {@code Window} {@code PTransform} that does not change assigned
+   * windows, but will cause windows to be merged again as part of the next
+   * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}.
+   */
+  public static <T> Remerge<T> remerge() {
+    return new Remerge<T>();
+  }
+
+  /**
+   * {@code PTransform} that does not change assigned windows, but will cause
+   *  windows to be merged again as part of the next
+   * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}.
+   */
+  public static class Remerge<T> extends PTransform<PCollection<T>, PCollection<T>> {
+    @Override
+    public PCollection<T> apply(PCollection<T> input) {
+      WindowingFn windowingFn = getInput().getWindowingFn();
+      WindowingFn outputWindowingFn =
+          (windowingFn instanceof InvalidWindowingFn)
+          ? ((InvalidWindowingFn) windowingFn).getOriginalWindowingFn()
+          : windowingFn;
+
+      return input.apply(ParDo.named("Identity").of(new DoFn<T, T>() {
+                @Override public void processElement(ProcessContext c) {
+                  c.output(c.element());
+                }
+              })).setWindowingFnInternal(outputWindowingFn);
+    }
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  static {
+    DirectPipelineRunner.registerDefaultTransformEvaluator(
+        Bound.class,
+        new DirectPipelineRunner.TransformEvaluator<Bound>() {
+          @Override
+          public void evaluate(
+              Bound transform,
+              DirectPipelineRunner.EvaluationContext context) {
+            evaluateHelper(transform, context);
+          }
+        });
+  }
+
+  private static <T> void evaluateHelper(
+      Bound<T> transform,
+      DirectPipelineRunner.EvaluationContext context) {
+    PCollection<T> input = transform.getInput();
+
+    DirectModeExecutionContext executionContext = new DirectModeExecutionContext();
+
+    TupleTag<T> outputTag = new TupleTag<>();
+    DoFn<T, T> addWindowsDoFn = new AssignWindowsDoFn<>(transform.fn);
+    DoFnRunner<T, T, List> addWindowsRunner =
+        DoFnRunner.createWithListOutputs(
+            context.getPipelineOptions(),
+            addWindowsDoFn,
+            PTuple.empty(),
+            outputTag,
+            new ArrayList<TupleTag<?>>(),
+            executionContext.getStepContext(context.getStepName(transform)),
+            context.getAddCounterMutator());
+
+    addWindowsRunner.startBundle();
+
+    // Process input elements.
+    for (DirectPipelineRunner.ValueWithMetadata<T> inputElem
+             : context.getPCollectionValuesWithMetadata(input)) {
+      executionContext.setKey(inputElem.getKey());
+      addWindowsRunner.processElement(inputElem.getWindowedValue());
+    }
+
+    addWindowsRunner.finishBundle();
+
+    context.setPCollectionValuesWithMetadata(
+        transform.getOutput(),
+        executionContext.getOutput(outputTag));
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  static {
+    DataflowPipelineTranslator.registerTransformTranslator(
+        Bound.class,
+        new DataflowPipelineTranslator.TransformTranslator<Bound>() {
+          @Override
+          public void translate(
+              Bound transform,
+              DataflowPipelineTranslator.TranslationContext context) {
+            translateHelper(transform, context);
+          }
+        });
+  }
+
+  private static <T> void translateHelper(
+      Bound<T> transform,
+      DataflowPipelineTranslator.TranslationContext context) {
+    context.addStep(transform, "Bucket");
+    context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
+    context.addOutput(PropertyNames.OUTPUT, transform.getOutput());
+
+    byte[] serializedBytes = serializeToByteArray(transform.fn);
+    String serializedJson = byteArrayToJsonString(serializedBytes);
+    assert Arrays.equals(serializedBytes,
+                         jsonStringToByteArray(serializedJson));
+    context.addInput(PropertyNames.SERIALIZED_FN, serializedJson);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingFn.java
new file mode 100644
index 0000000000000..0f049372555bb
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingFn.java
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+
+import org.joda.time.Instant;
+
+import java.io.Serializable;
+import java.util.Collection;
+
+/**
+ * The argument to the {@link Window} transform used to assign elements into
+ * windows and to determine how windows are merged.  See {@link Window} for more
+ * information on how {@code WindowingFn}s are used and for a library of
+ * predefined {@code WindowingFn}s.
+ *
+ * <p> Users will generally want to use the predefined
+ * {@code WindowingFn}s, but it is  also possible to create new
+ * subclasses.
+ * TODO: Describe how to properly create {@code WindowingFn}s.
+ *
+ * @param <T> type of elements being windowed
+ * @param <W> {@link BoundedWindow} subclass used to represent the
+ *            windows used by this {@code WindowingFn}
+ */
+public abstract class WindowingFn<T, W extends BoundedWindow>
+    implements Serializable {
+
+  /**
+   * Information available when running {@link #assignWindows}.
+   */
+  public abstract class AssignContext {
+    /**
+     * Returns the current element.
+     */
+    public abstract T element();
+
+    /**
+     * Returns the timestamp of the current element.
+     */
+    public abstract Instant timestamp();
+
+    /**
+     * Returns the windows the current element was in, prior to this
+     * {@code AssignFn} being called.
+     */
+    public abstract Collection<? extends BoundedWindow> windows();
+  }
+
+  /**
+   * Given a timestamp and element, returns the set of windows into which it
+   * should be placed.
+   */
+  public abstract Collection<W> assignWindows(AssignContext c) throws Exception;
+
+  /**
+   * Information available when running {@link #mergeWindows}.
+   */
+  public abstract class MergeContext {
+    /**
+     * Returns the current set of windows.
+     */
+    public abstract Collection<W> windows();
+
+    /**
+     * Signals to the framework that the windows in {@code toBeMerged} should
+     * be merged together to form {@code mergeResult}.
+     *
+     * <p> {@code toBeMerged} should be a subset of {@link #windows}
+     * and disjoint from the {@code toBeMerged} set of previous calls
+     * to {@code merge}.
+     *
+     * <p> {@code mergeResult} must either not be in {@link #windows} or be in
+     * {@code toBeMerged}.
+     *
+     * @throws IllegalArgumentException if any elements of toBeMerged are not
+     * in windows(), or have already been merged
+     */
+    public abstract void merge(Collection<W> toBeMerged, W mergeResult)
+        throws Exception;
+  }
+
+  /**
+   * Does whatever merging of windows is necessary.
+   *
+   * <p> See {@link MergeOverlappingIntervalWindows#mergeWindows} for an
+   * example of how to override this method.
+   */
+  public abstract void mergeWindows(MergeContext c) throws Exception;
+
+  /**
+   * Returns whether this performs the same merging as the given
+   * {@code WindowingFn}.
+   */
+  public abstract boolean isCompatible(WindowingFn other);
+
+  /**
+   * Returns the {@link Coder} used for serializing the windows used
+   * by this windowingFn.
+   */
+  public abstract Coder<W> windowCoder();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
new file mode 100644
index 0000000000000..dda2488dac346
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import java.util.Arrays;
+import java.util.Collection;
+
+/**
+ * Abstract class representing a set of active windows for a key.
+ */
+abstract class AbstractWindowSet<K, VI, VO, W extends BoundedWindow> {
+  /**
+   * Hook for determining how to keep track of active windows and when they
+   * should be marked as complete.
+   */
+  interface ActiveWindowManager<W> {
+    /**
+     * Notes that a window has been added to the active set.
+     *
+     * <p> The given window must not already be active.
+     */
+    void addWindow(W window) throws Exception;
+
+    /**
+     * Notes that a window has been explicitly removed from the active set.
+     *
+     * <p> The given window must currently be active.
+     *
+     * <p> Windows are implicitly removed from the active set when they are
+     * complete, and this method will not be called.  This method is called when
+     * a window is merged into another and thus is no longer active.
+     */
+    void removeWindow(W window) throws Exception;
+  }
+
+  /**
+   * Wrapper around AbstractWindowSet that provides the MergeContext interface.
+   */
+  static class WindowMergeContext<T, W extends BoundedWindow>
+      extends WindowingFn<T, W>.MergeContext {
+    private final AbstractWindowSet<?, ?, ?, W> windowSet;
+
+    public WindowMergeContext(
+        AbstractWindowSet<?, ?, ?, W> windowSet,
+        WindowingFn<?, W> windowingFn) {
+      ((WindowingFn<T, W>) windowingFn).super();
+      this.windowSet = windowSet;
+    }
+
+    @Override public Collection<W> windows() {
+      return windowSet.windows();
+    }
+
+    @Override public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
+      windowSet.merge(toBeMerged, mergeResult);
+    }
+  }
+
+  protected final K key;
+  protected final WindowingFn<?, W> windowingFn;
+  protected final Coder<VI> inputCoder;
+  protected final DoFnProcessContext<?, KV<K, VO>> context;
+  protected final ActiveWindowManager<W> activeWindowManager;
+
+  protected AbstractWindowSet(
+      K key,
+      WindowingFn<?, W> windowingFn,
+      Coder<VI> inputCoder,
+      DoFnProcessContext<?, KV<K, VO>> context,
+      ActiveWindowManager<W> activeWindowManager) {
+    this.key = key;
+    this.windowingFn = windowingFn;
+    this.inputCoder = inputCoder;
+    this.context = context;
+    this.activeWindowManager = activeWindowManager;
+  }
+
+  /**
+   * Returns the set of known windows.
+   */
+  protected abstract Collection<W> windows();
+
+  /**
+   * Returns the final value of the elements in the given window.
+   *
+   * <p> Illegal to call if the window does not exist in the set.
+   */
+  protected abstract VO finalValue(W window) throws Exception;
+
+  /**
+   * Adds the given value in the given window to the set.
+   *
+   * <p> If the window already exists, puts the element into that window.
+   * If not, adds the window to the set first, then puts the element
+   * in the window.
+   */
+  protected abstract void put(W window, VI value) throws Exception;
+
+  /**
+   * Removes the given window from the set.
+   *
+   * <p> Illegal to call if the window does not exist in the set.
+   *
+   * <p> {@code AbstractWindowSet} subclasses may throw
+   * {@link UnsupportedOperationException} if they do not support removing
+   * windows.
+   */
+  protected abstract void remove(W window) throws Exception;
+
+  /**
+   * Instructs this set to merge the windows in toBeMerged into mergeResult.
+   *
+   * <p> {@code toBeMerged} should be a subset of {@link #windows}
+   * and disjoint from the {@code toBeMerged} set of previous calls
+   * to {@code merge}.
+   *
+   * <p> {@code mergeResult} must either not be in {@link @windows} or be in
+   * {@code toBeMerged}.
+   *
+   * <p> {@code AbstractWindowSet} subclasses may throw
+   * {@link UnsupportedOperationException} if they do not support merging windows.
+   */
+  protected abstract void merge(Collection<W> toBeMerged, W mergeResult) throws Exception;
+
+  /**
+   * Returns whether this window set contains the given window.
+   *
+   * <p> {@code AbstractWindowSet} subclasses may throw
+   * {@link UnsupportedOperationException} if they do not support querying for
+   * which windows are active.  If this is the case, callers must ensure they
+   * do not call {@link #finalValue} on non-existent windows.
+   */
+  protected abstract boolean contains(W window);
+
+  /**
+   * Marks the window as complete, causing its elements to be emitted.
+   */
+  public void markCompleted(W window) throws Exception {
+    VO value = finalValue(window);
+    remove(window);
+    context.outputWindowedValue(
+        KV.of(key, value),
+        window.maxTimestamp(),
+        Arrays.asList(window));
+  }
+
+  /**
+   * Hook for WindowSets to take action before they are deleted.
+   */
+  protected void flush() throws Exception {}
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AggregatorImpl.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AggregatorImpl.java
new file mode 100644
index 0000000000000..e71bf7f8a7f0c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AggregatorImpl.java
@@ -0,0 +1,111 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MAX;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MIN;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Max;
+import com.google.cloud.dataflow.sdk.transforms.Min;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+
+/**
+ * An implementation of the {@code Aggregator} interface.
+ *
+ * @param <VI> the type of input values
+ * @param <VA> the type of accumulator values
+ * @param <VO> the type of output value
+ */
+public class AggregatorImpl<VI, VA, VO> implements Aggregator<VI> {
+
+  private final Counter<VI> counter;
+
+  /*
+   * Constructs a new aggregator with the given name and aggregation logic
+   * specified in the CombineFn argument. The underlying counter is
+   * automatically added into the provided CounterSet.
+   *
+   * <p> If a counter with the same name already exists, it will be
+   * reused, as long as it has the same type.
+   */
+  public AggregatorImpl(String name,
+                        CombineFn<? super VI, VA, VO> combiner,
+                        CounterSet.AddCounterMutator addCounterMutator) {
+    this((Counter<VI>) constructCounter(name, combiner), addCounterMutator);
+  }
+
+  /*
+   * Constructs a new aggregator with the given name and aggregation logic
+   * specified in the SerializableFunction argument. The underlying counter is
+   * automatically added into the provided CounterSet.
+   *
+   * <p> If a counter with the same name already exists, it will be
+   * reused, as long as it has the same type.
+   */
+  public AggregatorImpl(String name,
+                        SerializableFunction<Iterable<VI>, VO> combiner,
+                        CounterSet.AddCounterMutator addCounterMutator) {
+    this((Counter<VI>) constructCounter(name, combiner), addCounterMutator);
+  }
+
+  private AggregatorImpl(Counter<VI> counter,
+                         CounterSet.AddCounterMutator addCounterMutator) {
+    try {
+      this.counter = addCounterMutator.addCounter(counter);
+    } catch (IllegalArgumentException ex) {
+      throw new IllegalArgumentException(
+          "aggregator's name collides with an existing aggregator "
+          + "or system-provided counter of an incompatible type");
+    }
+  }
+
+  private static Counter<?> constructCounter(String name, Object combiner) {
+    if (combiner.getClass() == Sum.SumIntegerFn.class) {
+      return Counter.ints(name, SUM);
+    } else if (combiner.getClass() == Sum.SumLongFn.class) {
+      return Counter.longs(name, SUM);
+    } else if (combiner.getClass() == Sum.SumDoubleFn.class) {
+      return Counter.doubles(name, SUM);
+    } else if (combiner.getClass() == Min.MinIntegerFn.class) {
+      return Counter.ints(name, MIN);
+    } else if (combiner.getClass() == Min.MinLongFn.class) {
+      return Counter.longs(name, MIN);
+    } else if (combiner.getClass() == Min.MinDoubleFn.class) {
+      return Counter.doubles(name, MIN);
+    } else if (combiner.getClass() == Max.MaxIntegerFn.class) {
+      return Counter.ints(name, MAX);
+    } else if (combiner.getClass() == Max.MaxLongFn.class) {
+      return Counter.longs(name, MAX);
+    } else if (combiner.getClass() == Max.MaxDoubleFn.class) {
+      return Counter.doubles(name, MAX);
+    } else {
+      throw new IllegalArgumentException("unsupported combiner in Aggregator: "
+        + combiner.getClass().getName());
+    }
+  }
+
+  @Override
+  public void addValue(VI value) {
+    counter.addValue(value);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiErrorExtractor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiErrorExtractor.java
new file mode 100644
index 0000000000000..ad181cee40b38
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiErrorExtractor.java
@@ -0,0 +1,104 @@
+/**
+ * Copyright 2013 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.googleapis.json.GoogleJsonError;
+import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.api.client.http.HttpStatusCodes;
+import com.google.common.annotations.VisibleForTesting;
+
+import java.io.IOException;
+
+/**
+ * Translates exceptions from API calls into higher-level meaning, while allowing injectability
+ * for testing how API errors are handled.
+ */
+public class ApiErrorExtractor {
+
+  public static final int STATUS_CODE_CONFLICT = 409;
+  public static final int STATUS_CODE_RANGE_NOT_SATISFIABLE = 416;
+
+  /**
+   * Determines if the given exception indicates 'item not found'.
+   */
+  public boolean itemNotFound(IOException e) {
+    if (e instanceof GoogleJsonResponseException) {
+      return (getHttpStatusCode((GoogleJsonResponseException) e)) ==
+          HttpStatusCodes.STATUS_CODE_NOT_FOUND;
+    }
+    return false;
+  }
+
+  /**
+   * Determines if the given GoogleJsonError indicates 'item not found'.
+   */
+  public boolean itemNotFound(GoogleJsonError e) {
+    return e.getCode() == HttpStatusCodes.STATUS_CODE_NOT_FOUND;
+  }
+
+  /**
+   * Checks if HTTP status code indicates the error specified.
+   */
+  private boolean hasHttpCode(IOException e, int code) {
+    if (e instanceof GoogleJsonResponseException) {
+      return (getHttpStatusCode((GoogleJsonResponseException) e)) == code;
+    }
+    return false;
+  }
+
+  /**
+   * Determines if the given exception indicates 'conflict' (already exists).
+   */
+  public boolean alreadyExists(IOException e) {
+    return hasHttpCode(e, STATUS_CODE_CONFLICT);
+  }
+
+  /**
+   * Determines if the given exception indicates 'range not satisfiable'.
+   */
+  public boolean rangeNotSatisfiable(IOException e) {
+    return hasHttpCode(e, STATUS_CODE_RANGE_NOT_SATISFIABLE);
+  }
+
+  /**
+   * Determines if the given exception indicates 'access denied'.
+   */
+  public boolean accessDenied(GoogleJsonResponseException e) {
+    return getHttpStatusCode(e) == HttpStatusCodes.STATUS_CODE_FORBIDDEN;
+  }
+
+  /**
+   * Determines if the given exception indicates 'access denied', recursively checking inner
+   * getCause() if outer exception isn't an instance of the correct class.
+   */
+  public boolean accessDenied(IOException e) {
+    return (e.getCause() != null) &&
+        (e.getCause() instanceof GoogleJsonResponseException) &&
+        accessDenied((GoogleJsonResponseException) e.getCause());
+  }
+
+  /**
+   * Returns HTTP status code from the given exception.
+   *
+   * Note: GoogleJsonResponseException.getStatusCode() method is marked final therefore
+   * it cannot be mocked using Mockito. We use this helper so that we can override it in tests.
+   */
+  @VisibleForTesting
+  int getHttpStatusCode(GoogleJsonResponseException e) {
+    return e.getStatusCode();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppEngineEnvironment.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppEngineEnvironment.java
new file mode 100644
index 0000000000000..f3b57a4508b0e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppEngineEnvironment.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import java.lang.reflect.InvocationTargetException;
+
+/** Stores whether we are running within AppEngine or not. */
+public class AppEngineEnvironment {
+  /**
+   * True if running inside of AppEngine, false otherwise.
+   */
+  @Deprecated
+  public static final boolean IS_APP_ENGINE = isAppEngine();
+
+  /**
+   * Attempts to detect whether we are inside of AppEngine.
+   * <p>
+   * Purposely copied and left private from private <a href="https://code.google.com/p/
+   * guava-libraries/source/browse/guava/src/com/google/common/util/concurrent/
+   * MoreExecutors.java#785">code.google.common.util.concurrent.MoreExecutors#isAppEngine</a>.
+   * 
+   * @return true if we are inside of AppEngine, false otherwise.
+   */
+  static boolean isAppEngine() {
+    if (System.getProperty("com.google.appengine.runtime.environment") == null) {
+      return false;
+    }
+    try {
+      // If the current environment is null, we're not inside AppEngine.
+      return Class.forName("com.google.apphosting.api.ApiProxy")
+          .getMethod("getCurrentEnvironment")
+          .invoke(null) != null;
+    } catch (ClassNotFoundException e) {
+      // If ApiProxy doesn't exist, we're not on AppEngine at all.
+      return false;
+    } catch (InvocationTargetException e) {
+      // If ApiProxy throws an exception, we're not in a proper AppEngine environment.
+      return false;
+    } catch (IllegalAccessException e) {
+      // If the method isn't accessible, we're not on a supported version of AppEngine;
+      return false;
+    } catch (NoSuchMethodException e) {
+      // If the method doesn't exist, we're not on a supported version of AppEngine;
+      return false;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
new file mode 100644
index 0000000000000..7649a8c637248
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+
+import org.joda.time.Instant;
+
+import java.util.Collection;
+
+/**
+ * {@link DoFn} that tags elements of a PCollection with windows, according
+ * to the provided {@link WindowingFn}.
+ * @param <T> Type of elements being windowed
+ * @param <W> Window type
+ */
+public class AssignWindowsDoFn<T, W extends BoundedWindow> extends DoFn<T, T> {
+  private WindowingFn<? super T, W> fn;
+
+  public AssignWindowsDoFn(WindowingFn<? super T, W> fn) {
+    this.fn = fn;
+  }
+
+  @Override
+  public void processElement(ProcessContext c) throws Exception {
+    final DoFnProcessContext<T, T> context = (DoFnProcessContext<T, T>) c;
+    Collection<W> windows =
+        ((WindowingFn<T, W>) fn).assignWindows(
+            ((WindowingFn<T, W>) fn).new AssignContext() {
+                @Override
+                public T element() {
+                  return context.element();
+                }
+
+                @Override
+                public Instant timestamp() {
+                  return context.timestamp();
+                }
+
+                @Override
+                public Collection<? extends BoundedWindow> windows() {
+                  return context.windows();
+                }
+              });
+
+    context.outputWindowedValue(context.element(), context.timestamp(), windows);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java
new file mode 100644
index 0000000000000..78e8e0538b824
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.util.BackOff;
+import com.google.common.base.Preconditions;
+
+/**
+ * Implementation of {@link BackOff} that increases the back off period for each retry attempt
+ * using a randomization function that grows exponentially.
+ * <p>
+ * Example: The initial interval is .5 seconds and the maximum number of retries is 10.
+ * For 10 tries the sequence will be (values in seconds):
+ * </p>
+ *
+ * <pre>
+   retry#      retry_interval     randomized_interval
+   1             0.5                [0.25,   0.75]
+   2             0.75               [0.375,  1.125]
+   3             1.125              [0.562,  1.687]
+   4             1.687              [0.8435, 2.53]
+   5             2.53               [1.265,  3.795]
+   6             3.795              [1.897,  5.692]
+   7             5.692              [2.846,  8.538]
+   8             8.538              [4.269, 12.807]
+   9            12.807              [6.403, 19.210]
+   10           {@link BackOff#STOP}
+ * </pre>
+ *
+ * <p>
+ * Implementation is not thread-safe.
+ * </p>
+ */
+public class AttemptBoundedExponentialBackOff implements BackOff {
+  public static final double DEFAULT_MULTIPLIER = 1.5;
+  public static final double DEFAULT_RANDOMIZATION_FACTOR = 0.5;
+  private final int maximumNumberOfAttempts;
+  private final long initialIntervalMillis;
+  private int currentAttempt;
+
+  public AttemptBoundedExponentialBackOff(int maximumNumberOfAttempts, long initialIntervalMillis) {
+    Preconditions.checkArgument(maximumNumberOfAttempts > 0,
+        "Maximum number of attempts must be greater than zero.");
+    Preconditions.checkArgument(initialIntervalMillis > 0,
+        "Initial interval must be greater than zero.");
+    this.maximumNumberOfAttempts = maximumNumberOfAttempts;
+    this.initialIntervalMillis = initialIntervalMillis;
+    reset();
+  }
+
+  @Override
+  public void reset() {
+    currentAttempt = 1;
+  }
+
+  @Override
+  public long nextBackOffMillis() {
+    if (currentAttempt >= maximumNumberOfAttempts) {
+      return BackOff.STOP;
+    }
+    double currentIntervalMillis = initialIntervalMillis
+        * Math.pow(DEFAULT_MULTIPLIER, currentAttempt - 1);
+    double randomOffset = (Math.random() * 2 - 1)
+        * DEFAULT_RANDOMIZATION_FACTOR * currentIntervalMillis;
+    currentAttempt += 1;
+    return Math.round(currentIntervalMillis + randomOffset);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Base64Utils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Base64Utils.java
new file mode 100644
index 0000000000000..0ea25102e1321
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Base64Utils.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+/**
+ * Utilities related to Base64 encoding.
+ */
+public class Base64Utils {
+  /**
+   * Returns an upper bound of the length of non-chunked Base64 encoded version
+   * of the string of the given length.
+   */
+  public static int getBase64Length(int length) {
+    return 4 * ((length + 2) / 3);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
new file mode 100644
index 0000000000000..2d42407c94377
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
+
+import org.joda.time.Instant;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * {@link ExecutionContext} for use in batch mode.
+ */
+public class BatchModeExecutionContext extends ExecutionContext {
+  private Object key;
+  private Map<Object, Map<String, Instant>> timers = new HashMap<>();
+
+  /**
+   * Create a new {@link ExecutionContext.StepContext}.
+   */
+  @Override
+  public ExecutionContext.StepContext createStepContext(String stepName) {
+    return new StepContext(stepName);
+  }
+
+  /**
+   * Sets the key of the work currently being processed.
+   */
+  public void setKey(Object key) {
+    this.key = key;
+  }
+
+  /**
+   * Returns the key of the work currently being processed.
+   *
+   * <p> If there is not a currently defined key, returns null.
+   */
+  public Object getKey() {
+    return key;
+  }
+
+  @Override
+  public void setTimer(String timer, Instant timestamp) {
+    Map<String, Instant> keyTimers = timers.get(getKey());
+    if (keyTimers == null) {
+      keyTimers = new HashMap<>();
+      timers.put(getKey(), keyTimers);
+    }
+    keyTimers.put(timer, timestamp);
+  }
+
+  @Override
+  public void deleteTimer(String timer) {
+    Map<String, Instant> keyTimers = timers.get(getKey());
+    if (keyTimers != null) {
+      keyTimers.remove(timer);
+    }
+  }
+
+  public <E> List<TimerOrElement<E>> getAllTimers() {
+    List<TimerOrElement<E>> result = new ArrayList<>();
+    for (Map.Entry<Object, Map<String, Instant>> keyTimers : timers.entrySet()) {
+      for (Map.Entry<String, Instant> timer : keyTimers.getValue().entrySet()) {
+        result.add(TimerOrElement.<E>timer(timer.getKey(), timer.getValue(), keyTimers.getKey()));
+      }
+    }
+    return result;
+  }
+
+  /**
+   * {@link ExecutionContext.StepContext} used in batch mode.
+   */
+  class StepContext extends ExecutionContext.StepContext {
+    private Map<Object, Map<CodedTupleTag<?>, Object>> state = new HashMap<>();
+    private Map<Object, Map<CodedTupleTag<?>, List<Object>>> tagLists = new HashMap<>();
+
+    StepContext(String stepName) {
+      super(stepName);
+    }
+
+    @Override
+    public <T> void store(CodedTupleTag<T> tag, T value) {
+      Map<CodedTupleTag<?>, Object> perKeyState = state.get(getKey());
+      if (perKeyState == null) {
+        perKeyState = new HashMap<>();
+        state.put(getKey(), perKeyState);
+      }
+      perKeyState.put(tag, value);
+    }
+
+    @Override
+    public CodedTupleTagMap lookup(List<? extends CodedTupleTag<?>> tags) {
+      Map<CodedTupleTag<?>, Object> perKeyState = state.get(getKey());
+      Map<CodedTupleTag<?>, Object> map = new HashMap<>();
+      if (perKeyState != null) {
+        for (CodedTupleTag<?> tag : tags) {
+          map.put(tag, perKeyState.get(tag));
+        }
+      }
+      return CodedTupleTagMap.of(map);
+    }
+
+    @Override
+    public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp) {
+      Map<CodedTupleTag<?>, List<Object>> perKeyTagLists = tagLists.get(getKey());
+      if (perKeyTagLists == null) {
+        perKeyTagLists = new HashMap<>();
+        tagLists.put(getKey(), perKeyTagLists);
+      }
+      List<Object> tagList = perKeyTagLists.get(tag);
+      if (tagList == null) {
+        tagList = new ArrayList<>();
+        perKeyTagLists.put(tag, tagList);
+      }
+      tagList.add(value);
+    }
+
+    @Override
+    public <T> void deleteTagList(CodedTupleTag<T> tag) {
+      Map<CodedTupleTag<?>, List<Object>> perKeyTagLists = tagLists.get(getKey());
+      if (perKeyTagLists != null) {
+        perKeyTagLists.remove(tag);
+      }
+    }
+
+    @Override
+    public <T> Iterable<T> readTagList(CodedTupleTag<T> tag) {
+      Map<CodedTupleTag<?>, List<Object>> perKeyTagLists = tagLists.get(getKey());
+      if (perKeyTagLists == null || perKeyTagLists.get(tag) == null) {
+        return new ArrayList<T>();
+      }
+      List<T> result = new ArrayList<T>();
+      for (Object element : perKeyTagLists.get(tag)) {
+        result.add((T) element);
+      }
+      return result;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
new file mode 100644
index 0000000000000..c241ee2f25912
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
@@ -0,0 +1,240 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.services.bigquery.Bigquery;
+import com.google.api.services.bigquery.model.Table;
+import com.google.api.services.bigquery.model.TableDataInsertAllRequest;
+import com.google.api.services.bigquery.model.TableDataInsertAllResponse;
+import com.google.api.services.bigquery.model.TableDataList;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.WriteDisposition;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+
+import javax.annotation.Nullable;
+
+/**
+ * Inserts rows into BigQuery.
+ */
+public class BigQueryTableInserter {
+  private static final Logger LOG = LoggerFactory.getLogger(BigQueryTableInserter.class);
+
+  // Approximate amount of table data to upload per InsertAll request.
+  private static final long UPLOAD_BATCH_SIZE = 64 * 1024;
+
+  private final Bigquery client;
+  private final TableReference ref;
+
+  /**
+   * Constructs a new row inserter.
+   *
+   * @param client a BigQuery client
+   * @param ref identifies the table to insert into
+   */
+  public BigQueryTableInserter(Bigquery client, TableReference ref) {
+    this.client = client;
+    this.ref = ref;
+  }
+
+  /**
+   * Insert all rows from the given iterator.
+   */
+  public void insertAll(Iterator<TableRow> rowIterator) throws IOException {
+    insertAll(rowIterator, null);
+  }
+
+  /**
+   * Insert all rows from the given iterator using specified insertIds if not null.
+   */
+  public void insertAll(Iterator<TableRow> rowIterator,
+      @Nullable Iterator<String> insertIdIterator) throws IOException {
+    // Upload in batches.
+    List<TableDataInsertAllRequest.Rows> rows = new LinkedList<>();
+    int numInserted = 0;
+    int dataSize = 0;
+    while (rowIterator.hasNext()) {
+      TableRow row = rowIterator.next();
+      TableDataInsertAllRequest.Rows out = new TableDataInsertAllRequest.Rows();
+      if (insertIdIterator != null) {
+        if (insertIdIterator.hasNext()) {
+          out.setInsertId(insertIdIterator.next());
+        } else {
+          throw new AssertionError("If insertIdIterator is not null it needs to have at least "
+              + "as many elements as rowIterator");
+        }
+      }
+      out.setJson(row.getUnknownKeys());
+      rows.add(out);
+
+      dataSize += row.toString().length();
+      if (dataSize >= UPLOAD_BATCH_SIZE || !rowIterator.hasNext()) {
+        TableDataInsertAllRequest content = new TableDataInsertAllRequest();
+        content.setRows(rows);
+
+        LOG.info("Number of rows in BigQuery insert: {}", rows.size());
+        numInserted += rows.size();
+
+        Bigquery.Tabledata.InsertAll insert = client.tabledata()
+            .insertAll(ref.getProjectId(), ref.getDatasetId(), ref.getTableId(),
+                content);
+        TableDataInsertAllResponse response = insert.execute();
+        List<TableDataInsertAllResponse.InsertErrors> errors = response
+            .getInsertErrors();
+        if (errors != null && !errors.isEmpty()) {
+          throw new IOException("Insert failed: " + errors);
+        }
+
+        dataSize = 0;
+        rows.clear();
+      }
+    }
+
+    LOG.info("Number of rows written to BigQuery: {}", numInserted);
+  }
+
+  /**
+   * Retrieves or creates the table.
+   * <p>
+   * The table is checked to conform to insertion requirements as specified
+   * by WriteDisposition and CreateDisposition.
+   * <p>
+   * If table truncation is requested (WriteDisposition.WRITE_TRUNCATE), then
+   * this will re-create the table if necessary to ensure it is empty.
+   * <p>
+   * If an empty table is required (WriteDisposition.WRITE_EMPTY), then this
+   * will fail if the table exists and is not empty.
+   * <p>
+   * When constructing a table, a {@code TableSchema} must be available.  If a
+   * schema is provided, then it will be used.  If no schema is provided, but
+   * an existing table is being cleared (WRITE_TRUNCATE option above), then
+   * the existing schema will be re-used.  If no schema is available, then an
+   * {@code IOException} is thrown.
+   */
+  public Table getOrCreateTable(
+      WriteDisposition writeDisposition,
+      CreateDisposition createDisposition,
+      @Nullable TableSchema schema) throws IOException {
+    // Check if table already exists.
+    Bigquery.Tables.Get get = client.tables()
+        .get(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
+    Table table = null;
+    try {
+      table = get.execute();
+    } catch (IOException e) {
+      ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
+      if (!errorExtractor.itemNotFound(e) ||
+          createDisposition != CreateDisposition.CREATE_IF_NEEDED) {
+        // Rethrow.
+        throw e;
+      }
+    }
+
+    // If we want an empty table, and it isn't, then delete it first.
+    if (table != null) {
+      if (writeDisposition == WriteDisposition.WRITE_APPEND) {
+        return table;
+      }
+
+      boolean empty = isEmpty();
+      if (empty) {
+        if (writeDisposition == WriteDisposition.WRITE_TRUNCATE) {
+          LOG.info("Empty table found, not removing {}", BigQueryIO.toTableSpec(ref));
+        }
+        return table;
+
+      } else if (writeDisposition == WriteDisposition.WRITE_EMPTY) {
+        throw new IOException("WriteDisposition is WRITE_EMPTY, "
+            + "but table is not empty");
+      }
+
+      // Reuse the existing schema if none was provided.
+      if (schema == null) {
+        schema = table.getSchema();
+      }
+
+      // Delete table and fall through to re-creating it below.
+      LOG.info("Deleting table {}", BigQueryIO.toTableSpec(ref));
+      Bigquery.Tables.Delete delete = client.tables()
+          .delete(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
+      delete.execute();
+    }
+
+    if (schema == null) {
+      throw new IllegalArgumentException(
+          "Table schema required for new table.");
+    }
+
+    // Create the table.
+    return tryCreateTable(schema);
+  }
+
+  /**
+   * Checks if a table is empty.
+   */
+  public boolean isEmpty() throws IOException {
+    Bigquery.Tabledata.List list = client.tabledata()
+        .list(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
+    list.setMaxResults(1L);
+    TableDataList dataList = list.execute();
+
+    return dataList.getRows() == null || dataList.getRows().isEmpty();
+  }
+
+  /**
+   * Tries to create the BigQuery table.
+   * If a table with the same name already exists in the dataset, the table
+   * creation fails, and the function returns null.  In such a case,
+   * the existing table doesn't necessarily have the same schema as specified
+   * by the parameter.
+   *
+   * @param schema Schema of the new BigQuery table.
+   * @return The newly created BigQuery table information, or null if the table
+   *     with the same name already exists.
+   * @throws IOException if other error than already existing table occurs.
+   */
+  @Nullable
+  public Table tryCreateTable(TableSchema schema) throws IOException {
+    LOG.info("Trying to create BigQuery table: {}", BigQueryIO.toTableSpec(ref));
+
+    Table content = new Table();
+    content.setTableReference(ref);
+    content.setSchema(schema);
+
+    try {
+      return client.tables()
+          .insert(ref.getProjectId(), ref.getDatasetId(), content)
+          .execute();
+    } catch (IOException e) {
+      if (new ApiErrorExtractor().alreadyExists(e)) {
+        LOG.info("The BigQuery table already exists.");
+        return null;
+      }
+      throw e;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
new file mode 100644
index 0000000000000..a6ea658ae3f42
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.util.Data;
+import com.google.api.client.util.Preconditions;
+import com.google.api.services.bigquery.Bigquery;
+import com.google.api.services.bigquery.model.Table;
+import com.google.api.services.bigquery.model.TableCell;
+import com.google.api.services.bigquery.model.TableDataList;
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+import java.util.Objects;
+
+/**
+ * Iterates over all rows in a table.
+ */
+public class BigQueryTableRowIterator implements Iterator<TableRow>, Closeable {
+
+  private final Bigquery client;
+  private final TableReference ref;
+  private TableSchema schema;
+  private String pageToken;
+  private Iterator<TableRow> rowIterator;
+  // Set true when the final page is seen from the service.
+  private boolean lastPage = false;
+
+  public BigQueryTableRowIterator(Bigquery client, TableReference ref) {
+    this.client = client;
+    this.ref = ref;
+  }
+
+  @Override
+  public boolean hasNext() {
+    try {
+      if (!isOpen()) {
+        open();
+      }
+
+      if (!rowIterator.hasNext() && !lastPage) {
+        readNext();
+      }
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+
+    return rowIterator.hasNext();
+  }
+
+  /**
+   * Adjusts a field returned from the API to
+   * match the type that will be seen when run on the
+   * backend service. The end result is:
+   *
+   * <p><ul>
+   *   <li> Nulls are {@code null}.
+   *   <li> Repeated fields are lists.
+   *   <li> Record columns are {@link TableRow}s.
+   *   <li> {@code BOOLEAN} columns are JSON booleans, hence Java {@link Boolean}s.
+   *   <li> {@code FLOAT} columns are JSON floats, hence Java {@link Double}s.
+   *   <li> Every other atomic type is a {@link String}.
+   * </ul></p>
+   *
+   * <p> Note that currently integers are encoded as strings to match
+   * the behavior of the backend service.
+   */
+  private Object getTypedCellValue(TableFieldSchema fieldSchema, Object v) {
+    // In the input from the BQ API, atomic types all come in as
+    // strings, while on the Dataflow service they have more precise
+    // types.
+
+    if (Data.isNull(v)) {
+      return null;
+    }
+
+    if (Objects.equals(fieldSchema.getMode(), "REPEATED")) {
+      TableFieldSchema elementSchema = fieldSchema.clone().setMode("REQUIRED");
+      List<Object> rawValues = (List<Object>) v;
+      List<Object> values = new ArrayList<Object>(rawValues.size());
+      for (Object element : rawValues) {
+        values.add(getTypedCellValue(elementSchema, element));
+      }
+      return values;
+    }
+
+    if (fieldSchema.getType().equals("RECORD")) {
+      return getTypedTableRow(fieldSchema.getFields(), (TableRow) v);
+    }
+
+    if (fieldSchema.getType().equals("FLOAT")) {
+      return Double.parseDouble((String) v);
+    }
+
+    if (fieldSchema.getType().equals("BOOLEAN")) {
+      return Boolean.parseBoolean((String) v);
+    }
+
+    return v;
+  }
+
+  private TableRow getTypedTableRow(List<TableFieldSchema> fields, TableRow rawRow) {
+    List<TableCell> cells = rawRow.getF();
+    Preconditions.checkState(cells.size() == fields.size());
+
+    Iterator<TableCell> cellIt = cells.iterator();
+    Iterator<TableFieldSchema> fieldIt = fields.iterator();
+
+    TableRow row = new TableRow();
+    while (cellIt.hasNext()) {
+      TableCell cell = cellIt.next();
+      TableFieldSchema fieldSchema = fieldIt.next();
+      row.set(fieldSchema.getName(), getTypedCellValue(fieldSchema, cell.getV()));
+    }
+    return row;
+  }
+
+  @Override
+  public TableRow next() {
+    if (!hasNext()) {
+      throw new NoSuchElementException();
+    }
+
+    // Embed schema information into the raw row, so that values have an
+    // associated key.  This matches how rows are read when using the
+    // DataflowPipelineRunner.
+    return getTypedTableRow(schema.getFields(), rowIterator.next());
+  }
+
+  @Override
+  public void remove() {
+    throw new UnsupportedOperationException();
+  }
+
+  private void readNext() throws IOException {
+    Bigquery.Tabledata.List list = client.tabledata()
+        .list(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
+    if (pageToken != null) {
+      list.setPageToken(pageToken);
+    }
+
+    TableDataList result = list.execute();
+    pageToken = result.getPageToken();
+    rowIterator = result.getRows() != null ? result.getRows().iterator() :
+                  Collections.<TableRow>emptyIterator();
+
+    // The server may return a page token indefinitely on a zero-length table.
+    if (pageToken == null ||
+        result.getTotalRows() != null && result.getTotalRows() == 0) {
+      lastPage = true;
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    // Prevent any further requests.
+    lastPage = true;
+  }
+
+  private boolean isOpen() {
+    return schema != null;
+  }
+
+  /**
+   * Opens the table for read.
+   * @throws IOException on failure
+   */
+  private void open() throws IOException {
+    // Get table schema.
+    Bigquery.Tables.Get get = client.tables()
+        .get(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
+    Table table = get.execute();
+    schema = table.getSchema();
+
+    // Read the first page of results.
+    readNext();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
new file mode 100644
index 0000000000000..4801d6d64c3c3
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.cloud.dataflow.sdk.util.WindowUtils.bufferTag;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.MapCoder;
+import com.google.cloud.dataflow.sdk.coders.SetCoder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * A WindowSet allowing windows to be merged and deleted.
+ */
+class BufferingWindowSet<K, V, W extends BoundedWindow>
+    extends AbstractWindowSet<K, V, Iterable<V>, W> {
+  /**
+   * Tag for storing the merge tree, the data structure that keeps
+   * track of which windows have been merged together.
+   */
+  private final CodedTupleTag<Map<W, Set<W>>> mergeTreeTag =
+      CodedTupleTag.of(
+          "mergeTree",
+          MapCoder.of(
+              windowingFn.windowCoder(),
+              SetCoder.of(windowingFn.windowCoder())));
+
+  /**
+   * A map of live windows to windows that were merged into them.
+   *
+   * <p> The keys of the map correspond to the set of (merged) windows and the values
+   * are the no-longer-present windows that were merged into the keys.  A given
+   * window can appear in both the key and value of a single entry, but other at
+   * most once across all keys and values.
+   */
+  private final Map<W, Set<W>> mergeTree;
+
+  /**
+   * Used to determine if writing the mergeTree (which is relatively stable)
+   * is necessary.
+   */
+  private final Map<W, Set<W>> originalMergeTree;
+
+  protected BufferingWindowSet(
+      K key,
+      WindowingFn<?, W> windowingFn,
+      Coder<V> inputCoder,
+      DoFnProcessContext<?, KV<K, Iterable<V>>> context,
+      ActiveWindowManager<W> activeWindowManager) throws Exception {
+    super(key, windowingFn, inputCoder, context, activeWindowManager);
+
+    mergeTree = emptyIfNull(
+        context.context.stepContext.lookup(Arrays.asList(mergeTreeTag))
+        .get(mergeTreeTag));
+
+    originalMergeTree = deepCopy(mergeTree);
+  }
+
+  @Override
+  public void put(W window, V value) throws Exception {
+    context.context.stepContext.writeToTagList(
+        bufferTag(window, windowingFn.windowCoder(), inputCoder),
+        value,
+        context.timestamp());
+    if (!mergeTree.containsKey(window)) {
+      mergeTree.put(window, new HashSet<W>());
+      activeWindowManager.addWindow(window);
+    }
+  }
+
+  @Override
+  public void remove(W window) throws Exception {
+    mergeTree.remove(window);
+    activeWindowManager.removeWindow(window);
+  }
+
+  @Override
+  public void merge(Collection<W> otherWindows, W newWindow) throws Exception {
+    Set<W> subWindows = mergeTree.get(newWindow);
+    if (subWindows == null) {
+      subWindows = new HashSet<>();
+    }
+    for (W other : otherWindows) {
+      if (!mergeTree.containsKey(other)) {
+        throw new IllegalArgumentException("Tried to merge a non-existent window: " + other);
+      }
+      subWindows.addAll(mergeTree.get(other));
+      subWindows.add(other);
+      remove(other);
+    }
+    mergeTree.put(newWindow, subWindows);
+    activeWindowManager.addWindow(newWindow);
+  }
+
+  @Override
+  public Collection<W> windows() {
+    return Collections.unmodifiableSet(mergeTree.keySet());
+  }
+
+  @Override
+  public boolean contains(W window) {
+    return mergeTree.containsKey(window);
+  }
+
+  @Override
+  protected Iterable<V> finalValue(W window) throws Exception {
+    if (!contains(window)) {
+      throw new IllegalStateException("finalValue called for non-existent window");
+    }
+
+    List<V> toEmit = new ArrayList<>();
+    // This is the set of windows that we're currently emitting.
+    Set<W> curWindows = new HashSet<>();
+    curWindows.add(window);
+    curWindows.addAll(mergeTree.get(window));
+
+    // This is the set of unflushed windows (for preservation detection).
+    Set<W> otherWindows = new HashSet<>();
+    for (Map.Entry<W, Set<W>> entry : mergeTree.entrySet()) {
+      if (!entry.getKey().equals(window)) {
+        otherWindows.add(entry.getKey());
+        otherWindows.addAll(entry.getValue());
+      }
+    }
+
+    for (W curWindow : curWindows) {
+      Iterable<V> items = context.context.stepContext.readTagList(bufferTag(
+          curWindow, windowingFn.windowCoder(), inputCoder));
+      for (V item : items) {
+        toEmit.add(item);
+      }
+      context.context.stepContext.deleteTagList(bufferTag(
+          curWindow, windowingFn.windowCoder(), inputCoder));
+    }
+
+    return toEmit;
+  }
+
+  @Override
+  public void flush() throws Exception {
+    if (!mergeTree.equals(originalMergeTree)) {
+      context.context.stepContext.store(mergeTreeTag, mergeTree);
+    }
+  }
+
+  private static <W> Map<W, Set<W>> emptyIfNull(Map<W, Set<W>> input) {
+    if (input == null) {
+      return new HashMap<>();
+    } else {
+      for (Map.Entry<W, Set<W>> entry : input.entrySet()) {
+        if (entry.getValue() == null) {
+          entry.setValue(new HashSet<W>());
+        }
+      }
+      return input;
+    }
+  }
+
+  private Map<W, Set<W>> deepCopy(Map<W, Set<W>> mergeTree) {
+    Map<W, Set<W>> newMergeTree = new HashMap<>();
+    for (Map.Entry<W, Set<W>> entry : mergeTree.entrySet()) {
+      newMergeTree.put(entry.getKey(), new HashSet(entry.getValue()));
+    }
+    return newMergeTree;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java
new file mode 100644
index 0000000000000..f96ba486f24da
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java
@@ -0,0 +1,104 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.services.dataflow.model.MetricStructuredName;
+import com.google.api.services.dataflow.model.MetricUpdate;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Utilities for working with CloudCounters.
+ */
+public class CloudCounterUtils {
+  private static final Logger LOG = LoggerFactory.getLogger(CloudCounterUtils.class);
+
+  public static List<MetricUpdate> extractCounters(
+      CounterSet counters, boolean delta) {
+    synchronized (counters) {
+      List<MetricUpdate> cloudCounters = new ArrayList<>(counters.size());
+      for (Counter<?> counter : counters) {
+        try {
+          MetricUpdate cloudCounter = extractCounter(counter, delta);
+          if (cloudCounter != null) {
+            cloudCounters.add(cloudCounter);
+          }
+        } catch (IllegalArgumentException exn) {
+          LOG.warn("Error extracting counter value: ", exn);
+        }
+      }
+      return cloudCounters;
+    }
+  }
+
+  public static MetricUpdate extractCounter(Counter<?> counter, boolean delta) {
+    // TODO: Omit no-op counter updates, for counters whose
+    // values haven't changed since the last time we sent them.
+    synchronized (counter) {
+      MetricStructuredName name = new MetricStructuredName();
+      name.setName(counter.getName());
+      MetricUpdate metricUpdate = new MetricUpdate()
+          .setName(name)
+          .setKind(counter.getKind().name())
+          .setCumulative(!delta);
+      switch (counter.getKind()) {
+        case SUM:
+        case MAX:
+        case MIN:
+        case AND:
+        case OR:
+          metricUpdate.setScalar(CloudObject.forKnownType(counter.getAggregate(delta)));
+          break;
+        case MEAN: {
+          long countUpdate = counter.getCount(delta);
+          if (countUpdate <= 0) {
+            return null;
+          }
+          metricUpdate.setMeanSum(CloudObject.forKnownType(counter.getAggregate(delta)));
+          metricUpdate.setMeanCount(CloudObject.forKnownType(countUpdate));
+          break;
+        }
+        case SET: {
+          Set<?> values = counter.getSet(delta);
+          if (values.isEmpty()) {
+            return null;
+          }
+          Set<Object> encodedSet = new HashSet(values.size());
+          for (Object value : values) {
+            encodedSet.add(CloudObject.forKnownType(value));
+          }
+          metricUpdate.setSet(encodedSet);
+          break;
+        }
+        default:
+          throw new IllegalArgumentException("unexpected kind of counter");
+      }
+      if (delta) {
+        counter.resetDelta();
+      }
+      return metricUpdate;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudKnownType.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudKnownType.java
new file mode 100644
index 0000000000000..ad57b99536313
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudKnownType.java
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.annotation.Nullable;
+
+/** A utility for manipulating well-known cloud types. */
+enum CloudKnownType {
+  TEXT("http://schema.org/Text", String.class) {
+    @Override
+    public <T> T parse(Object value, Class<T> clazz) {
+      return clazz.cast(value);
+    }
+  },
+  BOOLEAN("http://schema.org/Boolean", Boolean.class) {
+    @Override
+    public <T> T parse(Object value, Class<T> clazz) {
+      return clazz.cast(value);
+    }
+  },
+  INTEGER("http://schema.org/Integer", Long.class, Integer.class) {
+    @Override
+    public <T> T parse(Object value, Class<T> clazz) {
+      Object result = null;
+      if (value.getClass() == clazz) {
+        result = value;
+      } else if (clazz == Long.class) {
+        if (value instanceof Integer) {
+          result = ((Integer) value).longValue();
+        } else if (value instanceof String) {
+          result = Long.valueOf((String) value);
+        }
+      } else if (clazz == Integer.class) {
+        if (value instanceof Long) {
+          result = ((Long) value).intValue();
+        } else if (value instanceof String) {
+          result = Integer.valueOf((String) value);
+        }
+      }
+      return clazz.cast(result);
+    }
+  },
+  FLOAT("http://schema.org/Float", Double.class, Float.class) {
+    @Override
+    public <T> T parse(Object value, Class<T> clazz) {
+      Object result = null;
+      if (value.getClass() == clazz) {
+        result = value;
+      } else if (clazz == Double.class) {
+        if (value instanceof Float) {
+          result = ((Float) value).doubleValue();
+        } else if (value instanceof String) {
+          result = Double.valueOf((String) value);
+        }
+      } else if (clazz == Float.class) {
+        if (value instanceof Double) {
+          result = ((Double) value).floatValue();
+        } else if (value instanceof String) {
+          result = Float.valueOf((String) value);
+        }
+      }
+      return clazz.cast(result);
+    }
+  };
+
+  private final String uri;
+  private final Class<?>[] classes;
+
+  private CloudKnownType(String uri, Class<?>... classes) {
+    this.uri = uri;
+    this.classes = classes;
+  }
+
+  public String getUri() {
+    return uri;
+  }
+
+  public abstract <T> T parse(Object value, Class<T> clazz);
+
+  public Class<?> defaultClass() {
+    return classes[0];
+  }
+
+  private static final Map<String, CloudKnownType> typesByUri =
+      Collections.unmodifiableMap(buildTypesByUri());
+
+  private static Map<String, CloudKnownType> buildTypesByUri() {
+    Map<String, CloudKnownType> result = new HashMap<>();
+    for (CloudKnownType ty : CloudKnownType.values()) {
+      result.put(ty.getUri(), ty);
+    }
+    return result;
+  }
+
+  @Nullable
+  public static CloudKnownType forUri(@Nullable String uri) {
+    if (uri == null) {
+      return null;
+    }
+    return typesByUri.get(uri);
+  }
+
+  private static final Map<Class<?>, CloudKnownType> typesByClass =
+  Collections.unmodifiableMap(buildTypesByClass());
+
+  private static Map<Class<?>, CloudKnownType> buildTypesByClass() {
+    Map<Class<?>, CloudKnownType> result = new HashMap<>();
+    for (CloudKnownType ty : CloudKnownType.values()) {
+      for (Class<?> clazz : ty.classes) {
+        result.put(clazz, ty);
+      }
+    }
+    return result;
+  }
+
+  @Nullable
+  public static CloudKnownType forClass(Class<?> clazz) {
+    return typesByClass.get(clazz);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtils.java
new file mode 100644
index 0000000000000..da99e5b3c3851
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtils.java
@@ -0,0 +1,73 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.services.dataflow.model.MetricStructuredName;
+import com.google.api.services.dataflow.model.MetricUpdate;
+import com.google.cloud.dataflow.sdk.util.common.Metric;
+import com.google.cloud.dataflow.sdk.util.common.Metric.DoubleMetric;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Utilities for working with Dataflow API Metrics.
+ */
+public class CloudMetricUtils {
+  // Do not instantiate.
+  private CloudMetricUtils() {}
+
+  /**
+   * Returns a List of {@link MetricUpdate}s representing the given Metrics.
+   */
+  public static List<MetricUpdate> extractCloudMetrics(
+      Collection<Metric<?>> metrics,
+      String workerId) {
+    List<MetricUpdate> cloudMetrics = new ArrayList<>(metrics.size());
+    for (Metric<?> metric : metrics) {
+      cloudMetrics.add(extractCloudMetric(metric, workerId));
+    }
+    return cloudMetrics;
+  }
+
+  /**
+   * Returns a {@link MetricUpdate} representing the given Metric.
+   */
+  public static MetricUpdate extractCloudMetric(Metric<?> metric, String workerId) {
+    if (metric instanceof DoubleMetric) {
+      return extractCloudMetric(
+          metric,
+          ((DoubleMetric) metric).getValue(),
+          workerId);
+    } else {
+      throw new IllegalArgumentException("unexpected kind of Metric");
+    }
+  }
+
+  private static MetricUpdate extractCloudMetric(
+      Metric<?> metric, Double value, String workerId) {
+    MetricStructuredName name = new MetricStructuredName();
+    name.setName(metric.getName());
+    Map<String, String> context = new HashMap<>();
+    context.put("workerId", workerId);
+    name.setContext(context);
+    return new MetricUpdate().setName(name).setScalar(CloudObject.forFloat(value));
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudObject.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudObject.java
new file mode 100644
index 0000000000000..973fe5ab7707e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudObject.java
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.api.client.util.Preconditions.checkNotNull;
+
+import com.google.api.client.json.GenericJson;
+import com.google.api.client.util.Key;
+
+import java.util.Map;
+
+import javax.annotation.Nullable;
+
+/**
+ * A representation of an arbitrary Java object to be instantiated by Dataflow
+ * workers.
+ * <p>
+ * Typically, an object to be written by the SDK to the Dataflow service will
+ * implement a method (typically called {@code asCloudObject()}) which returns a
+ * {@code CloudObject} to represent the object in the protocol.  Once the
+ * {@code CloudObject} is constructed, the method should explicitly add
+ * additional properties to be presented during deserialization, representing
+ * child objects by building additional {@code CloudObject}s.
+ */
+public final class CloudObject extends GenericJson {
+  /**
+   * Constructs a {@code CloudObject} by copying the supplied serialized object spec,
+   * which must represent an SDK object serialized for transport via the
+   * Dataflow API.
+   * <p>
+   * The most common use of this method is during deserialization on the worker,
+   * where it's used as a binding type during instance construction.
+   *
+   * @param spec supplies the serialized form of the object as a nested map
+   * @throws RuntimeException if the supplied map does not represent an SDK object
+   */
+  public static CloudObject fromSpec(Map<String, Object> spec) {
+    CloudObject result = new CloudObject();
+    result.putAll(spec);
+    if (result.className == null) {
+      throw new RuntimeException("Unable to create an SDK object from " + spec
+          + ": Object class not specified (missing \""
+          + PropertyNames.OBJECT_TYPE_NAME + "\" field)");
+    }
+    return result;
+  }
+
+  /**
+   * Constructs a {@code CloudObject} to be used for serializing an instance of
+   * the supplied class for transport via the Dataflow API.  The instance
+   * parameters to be serialized must be supplied explicitly after the
+   * {@code CloudObject} is created, by using {@link CloudObject#put}.
+   *
+   * @param cls the class to use when deserializing the object on the worker
+   */
+  public static CloudObject forClass(Class<?> cls) {
+    CloudObject result = new CloudObject();
+    result.className = checkNotNull(cls).getName();
+    return result;
+  }
+
+  /**
+   * Constructs a {@code CloudObject} to be used for serializing data to be
+   * deserialized using the supplied class name the supplied class name for
+   * transport via the Dataflow API.  The instance parameters to be serialized
+   * must be supplied explicitly after the {@code CloudObject} is created, by
+   * using {@link CloudObject#put}.
+   *
+   * @param className the class to use when deserializing the object on the worker
+   */
+  public static CloudObject forClassName(String className) {
+    CloudObject result = new CloudObject();
+    result.className = checkNotNull(className);
+    return result;
+  }
+
+  /**
+   * Constructs a {@code CloudObject} representing the given value.
+   * @param value the scalar value to represent.
+   */
+  public static CloudObject forString(String value) {
+    CloudObject result = forClassName(CloudKnownType.TEXT.getUri());
+    result.put(PropertyNames.SCALAR_FIELD_NAME, value);
+    return result;
+  }
+
+  /**
+   * Constructs a {@code CloudObject} representing the given value.
+   * @param value the scalar value to represent.
+   */
+  public static CloudObject forBoolean(Boolean value) {
+    CloudObject result = forClassName(CloudKnownType.BOOLEAN.getUri());
+    result.put(PropertyNames.SCALAR_FIELD_NAME, value);
+    return result;
+  }
+
+  /**
+   * Constructs a {@code CloudObject} representing the given value.
+   * @param value the scalar value to represent.
+   */
+  public static CloudObject forInteger(Long value) {
+    CloudObject result = forClassName(CloudKnownType.INTEGER.getUri());
+    result.put(PropertyNames.SCALAR_FIELD_NAME, value);
+    return result;
+  }
+
+  /**
+   * Constructs a {@code CloudObject} representing the given value.
+   * @param value the scalar value to represent.
+   */
+  public static CloudObject forInteger(Integer value) {
+    CloudObject result = forClassName(CloudKnownType.INTEGER.getUri());
+    result.put(PropertyNames.SCALAR_FIELD_NAME, value);
+    return result;
+  }
+
+  /**
+   * Constructs a {@code CloudObject} representing the given value.
+   * @param value the scalar value to represent.
+   */
+  public static CloudObject forFloat(Float value) {
+    CloudObject result = forClassName(CloudKnownType.FLOAT.getUri());
+    result.put(PropertyNames.SCALAR_FIELD_NAME, value);
+    return result;
+  }
+
+  /**
+   * Constructs a {@code CloudObject} representing the given value.
+   * @param value the scalar value to represent.
+   */
+  public static CloudObject forFloat(Double value) {
+    CloudObject result = forClassName(CloudKnownType.FLOAT.getUri());
+    result.put(PropertyNames.SCALAR_FIELD_NAME, value);
+    return result;
+  }
+
+  /**
+   * Constructs a {@code CloudObject} representing the given value of a
+   * well-known cloud object type.
+   * @param value the scalar value to represent.
+   * @throw RuntimeException if the value does not have a {@link CloudKnownType}
+   * mapping
+   */
+  public static CloudObject forKnownType(Object value) {
+    @Nullable CloudKnownType ty = CloudKnownType.forClass(value.getClass());
+    if (ty == null) {
+      throw new RuntimeException("Unable to represent value via the Dataflow API: " + value);
+    }
+    CloudObject result = forClassName(ty.getUri());
+    result.put(PropertyNames.SCALAR_FIELD_NAME, value);
+    return result;
+  }
+
+  @Key(PropertyNames.OBJECT_TYPE_NAME)
+  private String className;
+
+  private CloudObject() {}
+
+  /**
+   * Gets the name of the Java class which this CloudObject represents.
+   */
+  public String getClassName() {
+    return className;
+  }
+
+  @Override
+  public CloudObject clone() {
+    return (CloudObject) super.clone();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
new file mode 100644
index 0000000000000..7d97948af437e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.runners.worker.SourceFactory;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Utilities for working with Source Dataflow API definitions and {@link Source}
+ * objects.
+ */
+public class CloudSourceUtils {
+  /**
+   * Returns a copy of the source with {@code baseSpecs} flattened into {@code spec}.
+   * On conflict for a parameter name, values in {@code spec} override values in {@code baseSpecs},
+   * and later values in {@code baseSpecs} override earlier ones.
+   */
+  public static com.google.api.services.dataflow.model.Source
+      flattenBaseSpecs(com.google.api.services.dataflow.model.Source source) {
+    if (source.getBaseSpecs() == null) {
+      return source;
+    }
+    Map<String, Object> params = new HashMap<>();
+    for (Map<String, Object> baseSpec : source.getBaseSpecs()) {
+      params.putAll(baseSpec);
+    }
+    params.putAll(source.getSpec());
+
+    com.google.api.services.dataflow.model.Source result = source.clone();
+    result.setSpec(params);
+    result.setBaseSpecs(null);
+    return result;
+  }
+
+  /** Reads all elements from the given {@link Source}. */
+  public static <T> List<T> readElemsFromSource(Source<T> source) {
+    List<T> elems = new ArrayList<>();
+    try (Source.SourceIterator<T> it = source.iterator()) {
+      while (it.hasNext()) {
+        elems.add(it.next());
+      }
+    } catch (IOException e) {
+      throw new RuntimeException("Failed to read from source: " + source, e);
+    }
+    return elems;
+  }
+
+  /**
+   * Creates a {@link Source} from the given Dataflow Source API definition and
+   * reads all elements from it.
+   */
+  public static <T> List<T> readElemsFromSource(
+      com.google.api.services.dataflow.model.Source source) {
+    try {
+      return readElemsFromSource(SourceFactory.<T>create(null, source, null));
+    } catch (Exception e) {
+      throw new RuntimeException("Failed to read from source: " + source.toString(), e);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
new file mode 100644
index 0000000000000..c77f35a45da2a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.addList;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoderBase;
+import com.google.cloud.dataflow.sdk.coders.MapCoder;
+import com.google.cloud.dataflow.sdk.coders.MapCoderBase;
+
+import com.fasterxml.jackson.annotation.JsonTypeInfo;
+import com.fasterxml.jackson.annotation.JsonTypeInfo.As;
+import com.fasterxml.jackson.annotation.JsonTypeInfo.Id;
+import com.fasterxml.jackson.databind.JavaType;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.annotation.JsonTypeIdResolver;
+import com.fasterxml.jackson.databind.jsontype.impl.TypeIdResolverBase;
+import com.fasterxml.jackson.databind.module.SimpleModule;
+import com.fasterxml.jackson.databind.type.TypeFactory;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.lang.reflect.TypeVariable;
+
+/**
+ * Utilities for working with Coders.
+ */
+public final class CoderUtils {
+  private CoderUtils() {}  // Non-instantiable
+
+  /**
+   * Coder class-name alias for a key-value type.
+   */
+  public static final String KIND_PAIR = "kind:pair";
+
+  /**
+   * Coder class-name alias for a stream type.
+   */
+  public static final String KIND_STREAM = "kind:stream";
+
+  /**
+   * Encodes the given value using the specified Coder, and returns
+   * the encoded bytes.
+   *
+   * @throws CoderException if there are errors during encoding
+   */
+  public static <T> byte[] encodeToByteArray(Coder<T> coder, T value)
+      throws CoderException {
+    try {
+      try (ByteArrayOutputStream os = new ByteArrayOutputStream()) {
+        coder.encode(value, os, Coder.Context.OUTER);
+        return os.toByteArray();
+      }
+    } catch (IOException exn) {
+      throw new RuntimeException("unexpected IOException", exn);
+    }
+  }
+
+  /**
+   * Decodes the given bytes using the specified Coder, and returns
+   * the resulting decoded value.
+   *
+   * @throws CoderException if there are errors during decoding
+   */
+  public static <T> T decodeFromByteArray(Coder<T> coder, byte[] encodedValue)
+      throws CoderException {
+    try {
+      try (ByteArrayInputStream is = new ByteArrayInputStream(encodedValue)) {
+        T result = coder.decode(is, Coder.Context.OUTER);
+        if (is.available() != 0) {
+          throw new CoderException(
+              is.available() + " unexpected extra bytes after decoding " +
+              result);
+        }
+        return result;
+      }
+    } catch (IOException exn) {
+      throw new RuntimeException("unexpected IOException", exn);
+    }
+  }
+
+  public static CloudObject makeCloudEncoding(
+      String type,
+      CloudObject... componentSpecs) {
+    CloudObject encoding = CloudObject.forClassName(type);
+    if (componentSpecs.length > 0) {
+      addList(encoding, PropertyNames.COMPONENT_ENCODINGS, componentSpecs);
+    }
+    return encoding;
+  }
+
+  /**
+   * A {@link com.fasterxml.jackson.databind.module.Module} which adds the type
+   * resolver needed for Coder definitions created by the Dataflow service.
+   */
+  static final class Jackson2Module extends SimpleModule {
+    /**
+     * The Coder custom type resolver.
+     * <p>
+     * This resolver resolves coders.  If the Coder ID is a particular
+     * well-known identifier supplied by the Dataflow service, it's replaced
+     * with the corresponding class.  All other Coder instances are resolved
+     * by class name, using the package com.google.cloud.dataflow.sdk.coders
+     * if there are no "."s in the ID.
+     */
+    private static final class Resolver extends TypeIdResolverBase {
+      public Resolver() {
+        super(TypeFactory.defaultInstance().constructType(Coder.class),
+            TypeFactory.defaultInstance());
+      }
+
+      @Override
+      public JavaType typeFromId(String id) {
+        Class<?> clazz = getClassForId(id);
+        if (clazz == KvCoder.class) {
+          clazz = KvCoderBase.class;
+        }
+        if (clazz == MapCoder.class) {
+          clazz = MapCoderBase.class;
+        }
+        TypeVariable[] tvs = clazz.getTypeParameters();
+        JavaType[] types = new JavaType[tvs.length];
+        for (int lupe = 0; lupe < tvs.length; lupe++) {
+          types[lupe] = TypeFactory.unknownType();
+        }
+        return _typeFactory.constructSimpleType(clazz, types);
+      }
+
+      private Class<?> getClassForId(String id) {
+        try {
+          if (id.contains(".")) {
+            return Class.forName(id);
+          }
+
+          if (id.equals(KIND_STREAM)) {
+            return IterableCoder.class;
+          } else if (id.equals(KIND_PAIR)) {
+            return KvCoder.class;
+          }
+
+          // Otherwise, see if the ID is the name of a class in
+          // com.google.cloud.dataflow.sdk.coders.  We do this via creating
+          // the class object so that class loaders have a chance to get
+          // involved -- and since we need the class object anyway.
+          return Class.forName("com.google.cloud.dataflow.sdk.coders." + id);
+        } catch (ClassNotFoundException e) {
+          throw new RuntimeException("Unable to convert coder ID " + id + " to class", e);
+        }
+      }
+
+      @Override
+      public String idFromValueAndType(Object o, Class<?> clazz) {
+        return clazz.getName();
+      }
+
+      @Override
+      public String idFromValue(Object o) {
+        return o.getClass().getName();
+      }
+
+      @Override
+      public JsonTypeInfo.Id getMechanism() {
+        return JsonTypeInfo.Id.CUSTOM;
+      }
+    }
+
+    /**
+     * The mixin class defining how Coders are handled by the deserialization
+     * {@link ObjectMapper}.
+     * <p>
+     * This is done via a mixin so that this resolver is <i>only</i> used
+     * during deserialization requested by the Dataflow SDK.
+     */
+    @JsonTypeIdResolver(Resolver.class)
+    @JsonTypeInfo(use = Id.CUSTOM, include = As.PROPERTY, property = PropertyNames.OBJECT_TYPE_NAME)
+    private static final class Mixin {}
+
+    public Jackson2Module() {
+      super("DataflowCoders");
+      setMixInAnnotation(Coder.class, Mixin.class);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
new file mode 100644
index 0000000000000..2a24a76fde9f3
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
@@ -0,0 +1,244 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.auth.oauth2.Credential;
+import com.google.api.client.extensions.java6.auth.oauth2.AbstractPromptReceiver;
+import com.google.api.client.extensions.java6.auth.oauth2.AuthorizationCodeInstalledApp;
+import com.google.api.client.googleapis.auth.oauth2.GoogleAuthorizationCodeFlow;
+import com.google.api.client.googleapis.auth.oauth2.GoogleClientSecrets;
+import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
+import com.google.api.client.googleapis.auth.oauth2.GoogleOAuthConstants;
+import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
+import com.google.api.client.http.HttpTransport;
+import com.google.api.client.json.JsonFactory;
+import com.google.api.client.json.jackson2.JacksonFactory;
+import com.google.api.client.util.Preconditions;
+import com.google.api.client.util.Strings;
+import com.google.api.client.util.store.FileDataStoreFactory;
+import com.google.cloud.dataflow.sdk.options.GcpOptions;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.security.GeneralSecurityException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * Provides support for loading credentials.
+ */
+public class Credentials {
+
+  private static final Logger LOG = LoggerFactory.getLogger(Credentials.class);
+
+  /** OAuth 2.0 scopes used by a local worker (not on GCE).
+   *  The scope cloud-platform provides access to all Cloud Platform resources.
+   *  cloud-platform isn't sufficient yet for talking to datastore so we request
+   *  those resources separately.
+   *
+   *  Note that trusted scope relationships don't apply to OAuth tokens, so for
+   *  services we access directly (GCS) as opposed to through the backend
+   *  (BigQuery, GCE), we need to explicitly request that scope.
+   */
+  private static final List<String> WORKER_SCOPES = Arrays.asList(
+      "https://www.googleapis.com/auth/cloud-platform",
+      "https://www.googleapis.com/auth/devstorage.full_control",
+      "https://www.googleapis.com/auth/userinfo.email",
+      "https://www.googleapis.com/auth/datastore");
+
+  private static final List<String> USER_SCOPES = Arrays.asList(
+      "https://www.googleapis.com/auth/cloud-platform",
+      "https://www.googleapis.com/auth/devstorage.full_control",
+      "https://www.googleapis.com/auth/userinfo.email",
+      "https://www.googleapis.com/auth/datastore");
+
+  private static class PromptReceiver extends AbstractPromptReceiver {
+    @Override
+    public String getRedirectUri() {
+      return GoogleOAuthConstants.OOB_REDIRECT_URI;
+    }
+  }
+
+  /**
+   * Initializes OAuth2 credential for a worker, using the
+   * <a href="https://developers.google.com/accounts/docs/application-default-credentials">
+   * application default credentials</a>, or from a local key file when running outside of GCE.
+   */
+  public static Credential getWorkerCredential(GcpOptions options)
+      throws IOException {
+    String keyFile = options.getServiceAccountKeyfile();
+    String accountName = options.getServiceAccountName();
+
+    if (keyFile != null && accountName != null) {
+      try {
+        return getCredentialFromFile(keyFile, accountName, WORKER_SCOPES);
+      } catch (GeneralSecurityException e) {
+        LOG.warn("Unable to obtain credentials from file {}", keyFile);
+        // Fall through..
+      }
+    }
+
+    return GoogleCredential.getApplicationDefault().createScoped(WORKER_SCOPES);
+  }
+
+  /**
+   * Initializes OAuth2 credential for an interactive user program.
+   *
+   * This can use 4 different mechanisms for obtaining a credential:
+   * <ol>
+   *   <li>
+   *     It can fetch the
+   *     <a href="https://developers.google.com/accounts/docs/application-default-credentials">
+   *     application default credentials</a>.
+   *   </li>
+   *   <li>
+   *     It can run the gcloud tool in a subprocess to obtain a credential.
+   *     This is the preferred mechanism.  The property "gcloud_path" can be
+   *     used to specify where we search for gcloud data.
+   *   </li>
+   *   <li>
+   *     The user can specify a client secrets file and go through the OAuth2
+   *     webflow. The credential will then be cached in the user's home
+   *     directory for reuse. Provide the property "secrets_file" to use this
+   *     mechanism.
+   *   </li>
+   *   <li>
+   *     The user can specify a file containing a service account.
+   *     Provide the properties "service_account_keyfile" and
+   *     "service_account_name" to use this mechanism.
+   *   </li>
+   * </ol>
+   * The default mechanism is to use the 
+   * <a href="https://developers.google.com/accounts/docs/application-default-credentials">
+   * application default credentials</a> falling back to gcloud. The other options can be
+   * used by providing the corresponding properties.
+   */
+  public static Credential getUserCredential(GcpOptions options)
+      throws IOException, GeneralSecurityException {
+    String keyFile = options.getServiceAccountKeyfile();
+    String accountName = options.getServiceAccountName();
+
+    if (keyFile != null && accountName != null) {
+      try {
+        return getCredentialFromFile(keyFile, accountName, USER_SCOPES);
+      } catch (GeneralSecurityException e) {
+        throw new IOException("Unable to obtain credentials from file", e);
+      }
+    }
+
+    if (options.getSecretsFile() != null) {
+      return getCredentialFromClientSecrets(options, USER_SCOPES);
+    }
+
+    try {
+      return GoogleCredential.getApplicationDefault().createScoped(USER_SCOPES);
+    } catch (IOException e) {
+      LOG.info("Failed to get application default credentials, falling back to gcloud.");
+    }
+
+    String gcloudPath = options.getGCloudPath();
+    return getCredentialFromGCloud(gcloudPath);
+  }
+
+  /**
+   * Loads OAuth2 credential from a local file.
+   */
+  private static Credential getCredentialFromFile(
+      String keyFile, String accountId, Collection<String> scopes)
+      throws IOException, GeneralSecurityException {
+    GoogleCredential credential = new GoogleCredential.Builder()
+        .setTransport(Transport.getTransport())
+        .setJsonFactory(Transport.getJsonFactory())
+        .setServiceAccountId(accountId)
+        .setServiceAccountScopes(scopes)
+        .setServiceAccountPrivateKeyFromP12File(new File(keyFile))
+        .build();
+
+    LOG.info("Created credential from file {}", keyFile);
+    return credential;
+  }
+
+  /**
+   * Loads OAuth2 credential from GCloud utility.
+   */
+  private static Credential getCredentialFromGCloud(String gcloudPath)
+      throws IOException, GeneralSecurityException {
+    GCloudCredential credential;
+    HttpTransport transport = GoogleNetHttpTransport.newTrustedTransport();
+    if (Strings.isNullOrEmpty(gcloudPath)) {
+      credential = new GCloudCredential(transport);
+    } else {
+      credential = new GCloudCredential(gcloudPath, transport);
+    }
+
+    try {
+      credential.refreshToken();
+    } catch (IOException e) {
+      throw new RuntimeException("Could not obtain credential using gcloud", e);
+    }
+
+    LOG.info("Got credential from GCloud");
+    return credential;
+  }
+
+  /**
+   * Loads OAuth2 credential from client secrets, which may require an
+   * interactive authorization prompt.
+   */
+  private static Credential getCredentialFromClientSecrets(
+      GcpOptions options, Collection<String> scopes)
+      throws IOException, GeneralSecurityException {
+    String clientSecretsFile = options.getSecretsFile();
+
+    Preconditions.checkArgument(clientSecretsFile != null);
+    HttpTransport httpTransport = GoogleNetHttpTransport.newTrustedTransport();
+
+    JsonFactory jsonFactory = JacksonFactory.getDefaultInstance();
+    GoogleClientSecrets clientSecrets;
+
+    try {
+      clientSecrets = GoogleClientSecrets.load(jsonFactory,
+          new FileReader(clientSecretsFile));
+    } catch (IOException e) {
+      throw new RuntimeException(
+          "Could not read the client secrets from file: " + clientSecretsFile,
+          e);
+    }
+
+    FileDataStoreFactory dataStoreFactory =
+        new FileDataStoreFactory(new java.io.File(options.getCredentialDir()));
+
+    GoogleAuthorizationCodeFlow flow = new GoogleAuthorizationCodeFlow.Builder(
+        httpTransport, jsonFactory, clientSecrets, scopes)
+        .setDataStoreFactory(dataStoreFactory)
+        .build();
+
+    // The credentialId identifies the credential if we're using a persistent
+    // credential store.
+    Credential credential =
+        new AuthorizationCodeInstalledApp(flow, new PromptReceiver())
+            .authorize(options.getCredentialId());
+
+    LOG.info("Got credential from client secret");
+    return credential;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowReleaseInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowReleaseInfo.java
new file mode 100644
index 0000000000000..ab7e0de6a8e0f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowReleaseInfo.java
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.json.GenericJson;
+import com.google.api.client.util.Key;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Properties;
+
+/**
+ * Utilities for working with the Dataflow distribution.
+ */
+public final class DataflowReleaseInfo extends GenericJson {
+  private static final Logger LOG = LoggerFactory.getLogger(DataflowReleaseInfo.class);
+
+  private static final String DATAFLOW_PROPERTIES_PATH =
+      "/com/google/cloud/dataflow/sdk/sdk.properties";
+
+  private static class LazyInit {
+    private static final DataflowReleaseInfo INSTANCE =
+        new DataflowReleaseInfo(DATAFLOW_PROPERTIES_PATH);
+  }
+
+  /**
+   * Returns an instance of DataflowReleaseInfo.
+   */
+  public static DataflowReleaseInfo getReleaseInfo() {
+    return LazyInit.INSTANCE;
+  }
+
+  @Key private String name = "Google Cloud Dataflow Java SDK";
+  @Key private String version = "Unknown";
+
+  /** Provides the SDK name. */
+  public String getName() {
+    return name;
+  }
+
+  /** Provides the SDK version. */
+  public String getVersion() {
+    return version;
+  }
+
+  private DataflowReleaseInfo(String resourcePath) {
+    Properties properties = new Properties();
+
+    InputStream in = DataflowReleaseInfo.class.getResourceAsStream(
+        DATAFLOW_PROPERTIES_PATH);
+    if (in == null) {
+      LOG.warn("Dataflow properties resource not found: {}", resourcePath);
+      return;
+    }
+
+    try {
+      properties.load(in);
+    } catch (IOException e) {
+      LOG.warn("Error loading Dataflow properties resource: ", e);
+    }
+
+    for (String name : properties.stringPropertyNames()) {
+      if (name.equals("name")) {
+        // We don't allow the properties to override the SDK name.
+        continue;
+      }
+      put(name, properties.getProperty(name));
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
new file mode 100644
index 0000000000000..a157ceefa57ca
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
+
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * {@link ExecutionContext} for use in direct mode.
+ */
+public class DirectModeExecutionContext extends BatchModeExecutionContext {
+  List<ValueWithMetadata> output = new ArrayList<>();
+  Map<TupleTag<?>, List<ValueWithMetadata>> sideOutputs = new HashMap<>();
+
+  @Override
+  public ExecutionContext.StepContext createStepContext(String stepName) {
+    return new StepContext(stepName);
+  }
+
+  @Override
+  public void noteOutput(WindowedValue<?> outputElem) {
+    output.add(ValueWithMetadata.of(outputElem)
+                                .withKey(getKey()));
+  }
+
+  @Override
+  public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> outputElem) {
+    List<ValueWithMetadata> output = sideOutputs.get(tag);
+    if (output == null) {
+      output = new ArrayList<>();
+      sideOutputs.put(tag, output);
+    }
+    output.add(ValueWithMetadata.of(outputElem)
+                                .withKey(getKey()));
+  }
+
+  public <T> List<ValueWithMetadata<T>> getOutput(TupleTag<T> tag) {
+    return (List) output;
+  }
+
+  public <T> List<ValueWithMetadata<T>> getSideOutput(TupleTag<T> tag) {
+    if (sideOutputs.containsKey(tag)) {
+      return (List) sideOutputs.get(tag);
+    } else {
+      return new ArrayList<>();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
new file mode 100644
index 0000000000000..80d8f34edd04d
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner.OutputManager;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import org.joda.time.Instant;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * A concrete implementation of {@link DoFn<I, O>.Context} used for running
+ * a {@link DoFn}.
+ *
+ * @param <I> the type of the DoFn's (main) input elements
+ * @param <O> the type of the DoFn's (main) output elements
+ * @param <R> the type of object which receives outputs
+ */
+class DoFnContext<I, O, R> extends DoFn<I, O>.Context {
+  private static final int MAX_SIDE_OUTPUTS = 1000;
+
+  final PipelineOptions options;
+  final DoFn<I, O> fn;
+  final PTuple sideInputs;
+  final OutputManager<R> outputManager;
+  final Map<TupleTag, R> outputMap;
+  final TupleTag<O> mainOutputTag;
+  final StepContext stepContext;
+  final CounterSet.AddCounterMutator addCounterMutator;
+
+  public DoFnContext(PipelineOptions options,
+                     DoFn<I, O> fn,
+                     PTuple sideInputs,
+                     OutputManager<R> outputManager,
+                     TupleTag<O> mainOutputTag,
+                     List<TupleTag<?>> sideOutputTags,
+                     StepContext stepContext,
+                     CounterSet.AddCounterMutator addCounterMutator) {
+    fn.super();
+    this.options = options;
+    this.fn = fn;
+    this.sideInputs = sideInputs;
+    this.outputManager = outputManager;
+    this.mainOutputTag = mainOutputTag;
+    this.outputMap = new HashMap<>();
+    outputMap.put(mainOutputTag, outputManager.initialize(mainOutputTag));
+    for (TupleTag sideOutputTag : sideOutputTags) {
+      outputMap.put(sideOutputTag, outputManager.initialize(sideOutputTag));
+    }
+    this.stepContext = stepContext;
+    this.addCounterMutator = addCounterMutator;
+  }
+
+  public R getReceiver(TupleTag tag) {
+    R receiver = outputMap.get(tag);
+    if (receiver == null) {
+      throw new IllegalArgumentException(
+          "calling getReceiver() with unknown tag " + tag);
+    }
+    return receiver;
+  }
+
+  //////////////////////////////////////////////////////////////////////////////
+
+  @Override
+  public PipelineOptions getPipelineOptions() {
+    return options;
+  }
+
+  @Override
+  public <T> T sideInput(PCollectionView<T, ?> view) {
+    TupleTag<?> tag = view.getTagInternal();
+    if (!sideInputs.has(tag)) {
+      throw new IllegalArgumentException(
+          "calling sideInput() with unknown view; " +
+          "did you forget to pass the view in " +
+          "ParDo.withSideInputs()?");
+    }
+    return view.fromIterableInternal((Iterable<WindowedValue<?>>) sideInputs.get(tag));
+  }
+
+  void outputWindowedValue(
+      O output,
+      Instant timestamp,
+      Collection<? extends BoundedWindow> windows) {
+    WindowedValue<O> windowedElem = WindowedValue.of(output, timestamp, windows);
+    outputManager.output(outputMap.get(mainOutputTag), windowedElem);
+    if (stepContext != null) {
+      stepContext.noteOutput(windowedElem);
+    }
+  }
+
+  protected <T> void sideOutputWindowedValue(TupleTag<T> tag,
+                                             T output,
+                                             Instant timestamp,
+                                             Collection<? extends BoundedWindow> windows) {
+    R receiver = outputMap.get(tag);
+    if (receiver == null) {
+      // This tag wasn't declared nor was it seen before during this execution.
+      // Thus, this must be a new, undeclared and unconsumed output.
+
+      // To prevent likely user errors, enforce the limit on the number of side
+      // outputs.
+      if (outputMap.size() >= MAX_SIDE_OUTPUTS) {
+        throw new IllegalArgumentException(
+            "the number of side outputs has exceeded a limit of "
+            + MAX_SIDE_OUTPUTS);
+      }
+
+      // Register the new TupleTag with outputManager and add an entry for it in
+      // the outputMap.
+      receiver = outputManager.initialize(tag);
+      outputMap.put(tag, receiver);
+    }
+
+    WindowedValue<T> windowedElem = WindowedValue.of(output, timestamp, windows);
+    outputManager.output(receiver, windowedElem);
+    if (stepContext != null) {
+      stepContext.noteSideOutput(tag, windowedElem);
+    }
+  }
+
+  // Following implementations of output, outputWithTimestamp, and sideOutput
+  // are only accessible in DoFn.startBundle and DoFn.finishBundle, and will be shadowed by
+  // ProcessContext's versions in DoFn.processElement.
+  // TODO: it seems wrong to use Long.MIN_VALUE, since it will violate all our rules about
+  // DoFns preserving watermarks.
+  @Override
+  public void output(O output) {
+    outputWindowedValue(output,
+                        new Instant(Long.MIN_VALUE),
+                        Arrays.asList(GlobalWindow.Window.INSTANCE));
+  }
+
+  @Override
+  public void outputWithTimestamp(O output, Instant timestamp) {
+    outputWindowedValue(output, timestamp, Arrays.asList(GlobalWindow.Window.INSTANCE));
+  }
+
+  @Override
+  public <T> void sideOutput(TupleTag<T> tag, T output) {
+    sideOutputWindowedValue(tag,
+                            output,
+                            new Instant(Long.MIN_VALUE),
+                            Arrays.asList(GlobalWindow.Window.INSTANCE));
+  }
+
+  private String generateInternalAggregatorName(String userName) {
+    return "user-" + stepContext.getStepName() + "-" + userName;
+  }
+
+  @Override
+  public <AI, AA, AO> Aggregator<AI> createAggregator(
+      String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
+    return new AggregatorImpl<>(generateInternalAggregatorName(name), combiner, addCounterMutator);
+  }
+
+  @Override
+  public <AI, AO> Aggregator<AI> createAggregator(
+      String name, SerializableFunction<Iterable<AI>, AO> combiner) {
+    return new AggregatorImpl<AI, Iterable<AI>, AO>(
+        generateInternalAggregatorName(name), combiner, addCounterMutator);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
new file mode 100644
index 0000000000000..d393e6f0b8b66
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresKeyedState;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import org.joda.time.Instant;
+
+import java.util.Collection;
+
+/**
+ * A concrete implementation of {@link DoFn<I, O>.ProcessContext} used for running
+ * a {@link DoFn} over a single element.
+ *
+ * @param <I> the type of the DoFn's (main) input elements
+ * @param <O> the type of the DoFn's (main) output elements
+ */
+class DoFnProcessContext<I, O> extends DoFn<I, O>.ProcessContext {
+
+  final DoFn<I, O> fn;
+  final DoFnContext<I, O, ?> context;
+  final WindowedValue<I> windowedValue;
+
+  public DoFnProcessContext(DoFn<I, O> fn,
+                            DoFnContext<I, O, ?> context,
+                            WindowedValue<I> windowedValue) {
+    fn.super();
+    this.fn = fn;
+    this.context = context;
+    this.windowedValue = windowedValue;
+  }
+
+  @Override
+  public PipelineOptions getPipelineOptions() {
+    return context.getPipelineOptions();
+  }
+
+  @Override
+  public I element() {
+    return windowedValue.getValue();
+  }
+
+  @Override
+  public KeyedState keyedState() {
+    if (!(fn instanceof RequiresKeyedState)
+        || (element() != null && !(element() instanceof KV))) {
+      throw new UnsupportedOperationException(
+          "Keyed state is only available in the context of a keyed DoFn marked as requiring state");
+    }
+
+    return context.stepContext;
+  }
+
+  @Override
+  public <T> T sideInput(PCollectionView<T, ?> view) {
+    return context.sideInput(view);
+  }
+
+  @Override
+  public void output(O output) {
+    context.outputWindowedValue(output, windowedValue.getTimestamp(), windowedValue.getWindows());
+  }
+
+  @Override
+  public void outputWithTimestamp(O output, Instant timestamp) {
+    Instant originalTimestamp = windowedValue.getTimestamp();
+
+    if (originalTimestamp != null) {
+      Preconditions.checkArgument(
+          !timestamp.isBefore(originalTimestamp.minus(fn.getAllowedTimestampSkew())));
+    }
+    context.outputWindowedValue(output, timestamp, windowedValue.getWindows());
+  }
+
+  void outputWindowedValue(
+      O output,
+      Instant timestamp,
+      Collection<? extends BoundedWindow> windows) {
+    context.outputWindowedValue(output, timestamp, windows);
+  }
+
+  @Override
+  public <T> void sideOutput(TupleTag<T> tag, T output) {
+    context.sideOutputWindowedValue(tag,
+                                    output,
+                                    windowedValue.getTimestamp(),
+                                    windowedValue.getWindows());
+  }
+
+  @Override
+  public <AI, AA, AO> Aggregator<AI> createAggregator(
+      String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
+    return context.createAggregator(name, combiner);
+  }
+
+  @Override
+  public <AI, AO> Aggregator<AI> createAggregator(
+      String name, SerializableFunction<Iterable<AI>, AO> combiner) {
+    return context.createAggregator(name, combiner);
+  }
+
+  @Override
+  public Instant timestamp() {
+    return windowedValue.getTimestamp();
+  }
+
+  @Override
+  public Collection<? extends BoundedWindow> windows() {
+    return windowedValue.getWindows();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
new file mode 100644
index 0000000000000..975af472a4b6d
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Runs a DoFn by constructing the appropriate contexts and passing them in.
+ *
+ * @param <I> the type of the DoFn's (main) input elements
+ * @param <O> the type of the DoFn's (main) output elements
+ * @param <R> the type of object which receives outputs
+ */
+public class DoFnRunner<I, O, R> {
+
+  /** Information about how to create output receivers and output to them. */
+  public interface OutputManager<R> {
+
+    /** Returns the receiver to use for a given tag. */
+    public R initialize(TupleTag tag);
+
+    /** Outputs a single element to the provided receiver. */
+    public void output(R receiver, WindowedValue<?> output);
+
+  }
+
+  /** The DoFn being run. */
+  public final DoFn<I, O> fn;
+
+  /** The context used for running the DoFn. */
+  public final DoFnContext<I, O, R> context;
+
+  private DoFnRunner(PipelineOptions options,
+                     DoFn<I, O> fn,
+                     PTuple sideInputs,
+                     OutputManager<R> outputManager,
+                     TupleTag<O> mainOutputTag,
+                     List<TupleTag<?>> sideOutputTags,
+                     StepContext stepContext,
+                     CounterSet.AddCounterMutator addCounterMutator) {
+    this.fn = fn;
+    this.context = new DoFnContext<>(options, fn, sideInputs, outputManager,
+                                     mainOutputTag, sideOutputTags, stepContext,
+                                     addCounterMutator);
+  }
+
+  public static <I, O, R> DoFnRunner<I, O, R> create(
+      PipelineOptions options,
+      DoFn<I, O> fn,
+      PTuple sideInputs,
+      OutputManager<R> outputManager,
+      TupleTag<O> mainOutputTag,
+      List<TupleTag<?>> sideOutputTags,
+      StepContext stepContext,
+      CounterSet.AddCounterMutator addCounterMutator) {
+    return new DoFnRunner<>(
+        options, fn, sideInputs, outputManager,
+        mainOutputTag, sideOutputTags, stepContext, addCounterMutator);
+  }
+
+  public static <I, O> DoFnRunner<I, O, List> createWithListOutputs(
+      PipelineOptions options,
+      DoFn<I, O> fn,
+      PTuple sideInputs,
+      TupleTag<O> mainOutputTag,
+      List<TupleTag<?>> sideOutputTags,
+      StepContext stepContext,
+      CounterSet.AddCounterMutator addCounterMutator) {
+    return create(
+        options, fn, sideInputs,
+        new OutputManager<List>() {
+          @Override
+          public List initialize(TupleTag tag) {
+            return new ArrayList<>();
+          }
+          @Override
+          public void output(List list, WindowedValue<?> output) {
+            list.add(output);
+          }
+        },
+        mainOutputTag, sideOutputTags, stepContext, addCounterMutator);
+  }
+
+  /** Calls {@link DoFn#startBundle}. */
+  public void startBundle() {
+    // This can contain user code. Wrap it in case it throws an exception.
+    try {
+      fn.startBundle(context);
+    } catch (Throwable t) {
+      // Exception in user code.
+      throw new UserCodeException(t);
+    }
+  }
+
+  /**
+   * Calls {@link DoFn#processElement} with a ProcessContext containing
+   * the current element.
+   */
+  public void processElement(WindowedValue<I> elem) {
+    DoFnProcessContext<I, O> processContext = new DoFnProcessContext<I, O>(fn, context, elem);
+
+    // This can contain user code. Wrap it in case it throws an exception.
+    try {
+      fn.processElement(processContext);
+    } catch (Throwable t) {
+      // Exception in user code.
+      throw new UserCodeException(t);
+    }
+  }
+
+  /** Calls {@link DoFn#finishBundle}. */
+  public void finishBundle() {
+    // This can contain user code. Wrap it in case it throws an exception.
+    try {
+      fn.finishBundle(context);
+    } catch (Throwable t) {
+      // Exception in user code.
+      throw new UserCodeException(t);
+    }
+  }
+
+  /** Returns the receiver who gets outputs with the provided tag. */
+  public R getReceiver(TupleTag tag) {
+    return context.getReceiver(tag);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
new file mode 100644
index 0000000000000..12d0745b67b6c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Context about the current execution.  This is guaranteed to exist during processing,
+ * but does not necessarily persist between different batches of work.
+ */
+public abstract class ExecutionContext {
+  private Map<String, StepContext> cachedStepContexts = new HashMap<>();
+
+  /**
+   * Returns the {@link StepContext} associated with the given step.
+   */
+  public StepContext getStepContext(String stepName) {
+    StepContext context = cachedStepContexts.get(stepName);
+    if (context == null) {
+      context = createStepContext(stepName);
+      cachedStepContexts.put(stepName, context);
+    }
+    return context;
+  }
+
+  /**
+   * Returns a collection view of all of the {@link StepContext}s.
+   */
+  public Collection<StepContext> getAllStepContexts() {
+    return cachedStepContexts.values();
+  }
+
+  /**
+   * Implementations should override this to create the specific type
+   * of {@link StepContext} they neeed.
+   */
+  public abstract StepContext createStepContext(String stepName);
+
+  /**
+   * Writes out a timer to be fired when the watermark reaches the given
+   * timestamp.  Timers are identified by their name, and can be moved
+   * by calling {@code setTimer} again, or deleted with
+   * {@link ExecutionContext#deleteTimer}.
+   */
+  public abstract void setTimer(String timer, Instant timestamp);
+
+  /**
+   * Deletes the given timer.
+   */
+  public abstract void deleteTimer(String timer);
+
+  /**
+   * Hook for subclasses to implement that will be called whenever
+   * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#output}
+   * is called.
+   */
+  public void noteOutput(WindowedValue<?> output) {}
+
+  /**
+   * Hook for subclasses to implement that will be called whenever
+   * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#sideOutput}
+   * is called.
+   */
+  public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output) {}
+
+  /**
+   * Per-step, per-key context used for retrieving state.
+   */
+  public abstract class StepContext implements DoFn.KeyedState {
+    private final String stepName;
+
+    public StepContext(String stepName) {
+      this.stepName = stepName;
+    }
+
+    public String getStepName() {
+      return stepName;
+    }
+
+    public ExecutionContext getExecutionContext() {
+      return ExecutionContext.this;
+    }
+
+    public void noteOutput(WindowedValue<?> output) {
+      ExecutionContext.this.noteOutput(output);
+    }
+
+    public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output) {
+      ExecutionContext.this.noteSideOutput(tag, output);
+    }
+
+    /**
+     * Stores the provided value in per-{@link com.google.cloud.dataflow.sdk.transforms.DoFn},
+     * per-key state.  This state is in the form of a map from tags to arbitrary
+     * encodable values.
+     *
+     * @throws IOException if encoding the given value fails
+     */
+    public abstract <T> void store(CodedTupleTag<T> tag, T value) throws IOException;
+
+    /**
+     * Loads the values from the per-{@link com.google.cloud.dataflow.sdk.transforms.DoFn},
+     * per-key state corresponding to the given tags.
+     *
+     * @throws IOException if decoding any of the requested values fails
+     */
+    public abstract CodedTupleTagMap lookup(List<? extends CodedTupleTag<?>> tags)
+        throws IOException;
+
+    /**
+     * Loads the value from the per-{@link com.google.cloud.dataflow.sdk.transforms.DoFn},
+     * per-key state corresponding to the given tag.
+     *
+     * @throws IOException if decoding the value fails
+     */
+    public <T> T lookup(CodedTupleTag<T> tag) throws IOException {
+      return lookup(Arrays.asList(tag)).get(tag);
+    }
+
+    /**
+     * Writes the provided value to the list of values in stored state corresponding to the
+     * provided tag.
+     *
+     * @throws IOException if encoding the given value fails
+     */
+    public abstract <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp)
+        throws IOException;
+
+    /**
+     * Deletes the list corresponding to the given tag.
+     */
+    public abstract <T> void deleteTagList(CodedTupleTag<T> tag);
+
+    /**
+     * Reads the elements of the list in stored state corresponding to the provided tag.
+     *
+     * @throws IOException if decoding any of the requested values fails
+     */
+    public abstract <T> Iterable<T> readTagList(CodedTupleTag<T> tag)
+        throws IOException;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
new file mode 100644
index 0000000000000..71f66ed2f6db5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileFilter;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.channels.Channels;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.WritableByteChannel;
+import java.nio.file.FileSystems;
+import java.nio.file.Files;
+import java.nio.file.PathMatcher;
+import java.util.Collection;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Implements IOChannelFactory for local files.
+ */
+public class FileIOChannelFactory implements IOChannelFactory {
+  private static final Logger LOG = LoggerFactory.getLogger(FileIOChannelFactory.class);
+
+  // This implementation only allows for wildcards in the file name.
+  // The directory portion must exist as-is.
+  @Override
+  public Collection<String> match(String spec) throws IOException {
+    File file = new File(spec);
+
+    File parent = file.getParentFile();
+    if (!parent.exists()) {
+      throw new IOException("Unable to find parent directory of " + spec);
+    }
+
+    final PathMatcher matcher =
+        FileSystems.getDefault().getPathMatcher("glob:" + spec);
+    File[] files = parent.listFiles(new FileFilter() {
+      @Override
+      public boolean accept(File pathname) {
+        return matcher.matches(pathname.toPath());
+      }
+    });
+
+    List<String> result = new LinkedList<>();
+    for (File match : files) {
+      result.add(match.getPath());
+    }
+
+    return result;
+  }
+
+  @Override
+  public ReadableByteChannel open(String spec) throws IOException {
+    LOG.debug("opening file {}", spec);
+    FileInputStream inputStream = new FileInputStream(spec);
+    return inputStream.getChannel();
+  }
+
+  @Override
+  public WritableByteChannel create(String spec, String mimeType)
+      throws IOException {
+    LOG.debug("creating file {}", spec);
+    return Channels.newChannel(
+        new BufferedOutputStream(new FileOutputStream(spec)));
+  }
+
+  @Override
+  public long getSizeBytes(String spec) throws IOException {
+    return Files.size(FileSystems.getDefault().getPath(spec));
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java
new file mode 100644
index 0000000000000..a3a3fd2eb5bf1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.auth.oauth2.BearerToken;
+import com.google.api.client.auth.oauth2.Credential;
+import com.google.api.client.auth.oauth2.TokenResponse;
+import com.google.api.client.http.HttpTransport;
+import com.google.api.client.util.IOUtils;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+
+/**
+ * A credential object which uses the GCloud command line tool to get
+ * an access token.
+ */
+public class GCloudCredential extends Credential {
+  private static final String DEFAULT_GCLOUD_BINARY = "gcloud";
+  private final String binary;
+
+  public GCloudCredential(HttpTransport transport) {
+    this(DEFAULT_GCLOUD_BINARY, transport);
+  }
+
+  /**
+   * Path to the GCloud binary.
+   */
+  public GCloudCredential(String binary, HttpTransport transport) {
+    super(new Builder(BearerToken.authorizationHeaderAccessMethod())
+        .setTransport(transport));
+
+    this.binary = binary;
+  }
+
+  private String readStream(InputStream stream) throws IOException {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    IOUtils.copy(stream, baos);
+    return baos.toString("UTF-8");
+  }
+
+  @Override
+  protected TokenResponse executeRefreshToken() throws IOException {
+    TokenResponse response = new TokenResponse();
+
+    ProcessBuilder builder = new ProcessBuilder();
+    // ProcessBuilder will search the path automatically for the binary
+    // GCLOUD_BINARY.
+    builder.command(Arrays.asList(binary, "auth", "print-access-token"));
+    Process process = builder.start();
+
+    try {
+      process.waitFor();
+    } catch (InterruptedException e) {
+      throw new RuntimeException(
+          "Could not obtain an access token using gcloud; timed out waiting " +
+          "for gcloud.");
+    }
+
+    if (process.exitValue() != 0) {
+      String output;
+      try {
+        output = readStream(process.getErrorStream());
+      } catch (IOException e) {
+        throw new RuntimeException(
+            "Could not obtain an access token using gcloud.");
+      }
+
+      throw new RuntimeException(
+          "Could not obtain an access token using gcloud. Result of " +
+          "invoking gcloud was:\n" + output);
+    }
+
+    String output;
+    try {
+      output = readStream(process.getInputStream());
+    } catch (IOException e) {
+      throw new RuntimeException(
+          "Could not obtain an access token using gcloud. We encountered an " +
+          "an error trying to read stdout.", e);
+    }
+    String[] lines = output.split("\n");
+
+    if (lines.length != 1) {
+      throw new RuntimeException(
+          "Could not obtain an access token using gcloud. Result of " +
+          "invoking gcloud was:\n" + output);
+    }
+
+    // Access token should be good for 5 minutes.
+    Long expiresInSeconds = 5L * 60;
+    response.setExpiresInSeconds(expiresInSeconds);
+    response.setAccessToken(output.trim());
+
+    return response;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java
new file mode 100644
index 0000000000000..9ff133261e601
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.options.GcsOptions;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+
+import java.io.IOException;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.WritableByteChannel;
+import java.util.Collection;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Implements IOChannelFactory for GCS.
+ */
+public class GcsIOChannelFactory implements IOChannelFactory {
+
+  private final GcsOptions options;
+
+  public GcsIOChannelFactory(GcsOptions options) {
+    this.options = options;
+  }
+
+  @Override
+  public Collection<String> match(String spec) throws IOException {
+    GcsPath path = GcsPath.fromUri(spec);
+    GcsUtil util = options.getGcsUtil();
+    List<GcsPath> matched = util.expand(path);
+
+    List<String> specs = new LinkedList<>();
+    for (GcsPath match : matched) {
+      specs.add(match.toString());
+    }
+
+    return specs;
+  }
+
+  @Override
+  public ReadableByteChannel open(String spec) throws IOException {
+    GcsPath path = GcsPath.fromUri(spec);
+    GcsUtil util = options.getGcsUtil();
+    return util.open(path);
+  }
+
+  @Override
+  public WritableByteChannel create(String spec, String mimeType)
+      throws IOException {
+    GcsPath path = GcsPath.fromUri(spec);
+    GcsUtil util = options.getGcsUtil();
+    return util.create(path, mimeType);
+  }
+
+  @Override
+  public long getSizeBytes(String spec) throws IOException {
+    GcsPath path = GcsPath.fromUri(spec);
+    GcsUtil util = options.getGcsUtil();
+    return util.fileSize(path);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
new file mode 100644
index 0000000000000..c3edd2ac2c338
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
@@ -0,0 +1,277 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.util.Preconditions;
+import com.google.api.services.storage.Storage;
+import com.google.api.services.storage.model.Objects;
+import com.google.api.services.storage.model.StorageObject;
+import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
+import com.google.cloud.dataflow.sdk.options.GcsOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.cloud.dataflow.sdk.util.gcsio.GoogleCloudStorageReadChannel;
+import com.google.cloud.dataflow.sdk.util.gcsio.GoogleCloudStorageWriteChannel;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.channels.SeekableByteChannel;
+import java.nio.channels.WritableByteChannel;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Provides operations on GCS.
+ *
+ * TODO: re-implement as a FileSystemProvider?
+ */
+public class GcsUtil {
+  /**
+   * This is a {@link DefaultValueFactory} able to create a {@link GcsUtil} using 
+   * any transport flags specified on the {@link PipelineOptions}.
+   */
+  public static class GcsUtilFactory implements DefaultValueFactory<GcsUtil> {
+    /**
+     * Returns an instance of {@link GcsUtil} based on the
+     * {@link PipelineOptions}.
+     * <p>
+     * If no instance has previously been created, one is created and the value
+     * stored in {@code options}.
+     */
+    @Override
+    public GcsUtil create(PipelineOptions options) {
+      GcsOptions gcsOptions = options.as(GcsOptions.class);
+      LOG.debug("Creating new GcsUtil");
+      return new GcsUtil(Transport.newStorageClient(gcsOptions).build(),
+          gcsOptions.getExecutorService());
+    }
+  }
+
+  private static final Logger LOG = LoggerFactory.getLogger(GcsUtil.class);
+
+  /** Maximum number of items to retrieve per Objects.List request. */
+  private static final long MAX_LIST_ITEMS_PER_CALL = 1024;
+
+  /** Matches a glob containing a wildcard, capturing the portion before the first wildcard. */
+  private static final Pattern GLOB_PREFIX = Pattern.compile("(?<PREFIX>[^*?]*)[*?].*");
+
+  private static final String WILDCARD = "[\\[\\]*?]";
+  private static final String NON_WILDCARD = "[^\\[\\]*?]";
+  private static final String NON_DELIMITER = "[^/]";
+  private static final String OPTIONAL_WILDCARD_AND_SUFFIX = "(" + WILDCARD + NON_DELIMITER + "*)?";
+
+  /**
+   * A {@link Pattern} that matches globs in which every wildcard is interpreted as such,
+   * assuming a delimiter of {@code '/'}.
+   *
+   * <p> Most importantly, if a {@code '*'} or {@code '?'} occurs before the
+   * final delimiter it will not be interpreted as a wildcard.
+   */
+  public static final Pattern GCS_READ_PATTERN = Pattern.compile(
+      NON_WILDCARD + "*" + OPTIONAL_WILDCARD_AND_SUFFIX);
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /** Client for the GCS API */
+  private final Storage storage;
+
+  // Helper delegate for turning IOExceptions from API calls into higher-level semantics.
+  private final ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
+
+  // Exposed for testing.
+  final ExecutorService executorService;
+
+  private GcsUtil(Storage storageClient, ExecutorService executorService) {
+    storage = storageClient;
+    this.executorService = executorService;
+  }
+
+  /**
+   * Expands a pattern into matched paths. The input path may contain
+   * globs (in the last component only!), which are expanded in the result.
+   *
+   * TODO: add support for full path matching.
+   */
+  public List<GcsPath> expand(GcsPath path) throws IOException {
+    if (!GCS_READ_PATTERN.matcher(path.getObject()).matches()) {
+      throw new IllegalArgumentException(
+          "Unsupported wildcard usage in \"" + path + "\": "
+          + " all wildcards must occur after the final '/' delimiter.");
+    }
+
+    Matcher m = GLOB_PREFIX.matcher(path.getObject());
+    if (!m.matches()) {
+      return Arrays.asList(path);
+    }
+
+    String prefix = m.group("PREFIX");
+    Pattern p = Pattern.compile(globToRegexp(path.getObject()));
+    LOG.info("matching files in bucket {}, prefix {} against pattern {}",
+        path.getBucket(), prefix, p.toString());
+
+    Storage.Objects.List listObject = storage.objects().list(path.getBucket());
+    listObject.setMaxResults(MAX_LIST_ITEMS_PER_CALL);
+    listObject.setDelimiter("/");
+    listObject.setPrefix(prefix);
+
+    String pageToken = null;
+    List<GcsPath> results = new LinkedList<>();
+    do {
+      if (pageToken != null) {
+        listObject.setPageToken(pageToken);
+      }
+
+      Objects objects = listObject.execute();
+      Preconditions.checkNotNull(objects);
+
+      if (objects.getItems() == null) {
+        break;
+      }
+
+      // Filter
+      for (StorageObject o : objects.getItems()) {
+        String name = o.getName();
+        // Skip directories, which end with a slash.
+        if (p.matcher(name).matches() && !name.endsWith("/")) {
+          LOG.debug("Matched object: {}", name);
+          results.add(GcsPath.fromObject(o));
+        }
+      }
+
+      pageToken = objects.getNextPageToken();
+    } while (pageToken != null);
+
+    return results;
+  }
+
+  /**
+   * Returns the file size from GCS, or -1 if the file does not exist.
+   */
+  public long fileSize(GcsPath path) throws IOException {
+    try {
+      Storage.Objects.Get getObject =
+          storage.objects().get(path.getBucket(), path.getObject());
+
+      StorageObject object = getObject.execute();
+      return object.getSize().longValue();
+    } catch (IOException e) {
+      if (errorExtractor.itemNotFound(e)) {
+        return -1;
+      }
+
+      // Re-throw any other error.
+      throw e;
+    }
+  }
+
+  /**
+   * Opens an object in GCS.
+   *
+   * Returns a SeekableByteChannel which provides access to data in the bucket.
+   *
+   * @param path the GCS filename to read from
+   * @return a SeekableByteChannel which can read the object data
+   * @throws IOException
+   */
+  public SeekableByteChannel open(GcsPath path)
+      throws IOException {
+    return new GoogleCloudStorageReadChannel(storage, path.getBucket(),
+            path.getObject(), errorExtractor);
+  }
+
+  /**
+   * Creates an object in GCS.
+   *
+   * Returns a WritableByteChannel which can be used to write data to the
+   * object.
+   *
+   * @param path the GCS file to write to
+   * @param type the type of object, eg "text/plain".
+   * @return a Callable object which encloses the operation.
+   * @throws IOException
+   */
+  public WritableByteChannel create(GcsPath path,
+      String type) throws IOException {
+    return new GoogleCloudStorageWriteChannel(
+        executorService,
+        storage,
+        path.getBucket(),
+        path.getObject(),
+        type);
+  }
+
+  /**
+   * Expands glob expressions to regular expressions.
+   *
+   * @param globExp the glob expression to expand
+   * @return a string with the regular expression this glob expands to
+   */
+  static String globToRegexp(String globExp) {
+    StringBuilder dst = new StringBuilder();
+    char[] src = globExp.toCharArray();
+    int i = 0;
+    while (i < src.length) {
+      char c = src[i++];
+      switch (c) {
+        case '*':
+          dst.append("[^/]*");
+          break;
+        case '?':
+          dst.append("[^/]");
+          break;
+        case '.':
+        case '+':
+        case '{':
+        case '}':
+        case '(':
+        case ')':
+        case '|':
+        case '^':
+        case '$':
+          // These need to be escaped in regular expressions
+          dst.append('\\').append(c);
+          break;
+        case '\\':
+          i = doubleSlashes(dst, src, i);
+          break;
+        default:
+          dst.append(c);
+          break;
+      }
+    }
+    return dst.toString();
+  }
+
+  private static int doubleSlashes(StringBuilder dst, char[] src, int i) {
+    // Emit the next character without special interpretation
+    dst.append('\\');
+    if ((i - 1) != src.length) {
+      dst.append(src[i]);
+      i++;
+    } else {
+      // A backslash at the very end is treated like an escaped backslash
+      dst.append('\\');
+    }
+    return i;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
new file mode 100644
index 0000000000000..62ae4875f9651
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -0,0 +1,359 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.util.common.PeekingReiterator;
+import com.google.cloud.dataflow.sdk.util.common.Reiterable;
+import com.google.cloud.dataflow.sdk.util.common.Reiterator;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.ListMultimap;
+
+import org.joda.time.Instant;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+import java.util.PriorityQueue;
+
+/**
+ * DoFn that merges windows and groups elements in those windows, optionally
+ * combining values.
+ *
+ * @param <K> key type
+ * @param <V> input value element type
+ * @param <W> window type
+ */
+public class GroupAlsoByWindowsDoFn<K, V, W extends BoundedWindow>
+    extends DoFn<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>> {
+  // TODO: Add back RequiresKeyed state once that is supported.
+
+  protected WindowingFn<?, W> windowingFn;
+  protected Coder<V> inputCoder;
+
+  public GroupAlsoByWindowsDoFn(
+      WindowingFn<?, W> windowingFn,
+      Coder<V> inputCoder) {
+    this.windowingFn = windowingFn;
+    this.inputCoder = inputCoder;
+  }
+
+  @Override
+  public void processElement(ProcessContext processContext) throws Exception {
+    DoFnProcessContext<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>> context =
+        (DoFnProcessContext<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>>) processContext;
+
+    if (windowingFn instanceof NonMergingWindowingFn) {
+      processElementViaIterators(context);
+    } else {
+      processElementViaWindowSet(context);
+    }
+  }
+
+  private void processElementViaWindowSet(
+      DoFnProcessContext<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>> context)
+      throws Exception {
+
+    K key = context.element().getKey();
+    BatchActiveWindowManager<W> activeWindowManager = new BatchActiveWindowManager<>();
+    AbstractWindowSet<K, V, Iterable<V>, W> windowSet =
+        new BufferingWindowSet(key, windowingFn, inputCoder, context, activeWindowManager);
+
+    for (WindowedValue<V> e : context.element().getValue()) {
+      for (BoundedWindow window : e.getWindows()) {
+        windowSet.put((W) window, e.getValue());
+      }
+      ((WindowingFn<Object, W>) windowingFn)
+        .mergeWindows(new AbstractWindowSet.WindowMergeContext<Object, W>(windowSet, windowingFn));
+
+      maybeOutputWindows(activeWindowManager, windowSet, windowingFn, e.getTimestamp());
+    }
+
+    maybeOutputWindows(activeWindowManager, windowSet, windowingFn, null);
+
+    windowSet.flush();
+  }
+
+  /**
+   * Outputs any windows that are complete, with their corresponding elemeents.
+   * If there are potentially complete windows, try merging windows first.
+   */
+  private void maybeOutputWindows(
+      BatchActiveWindowManager<W> activeWindowManager,
+      AbstractWindowSet<?, ?, ?, W> windowSet,
+      WindowingFn<?, W> windowingFn,
+      Instant nextTimestamp) throws Exception {
+    if (activeWindowManager.hasMoreWindows()
+        && (nextTimestamp == null
+            || activeWindowManager.nextTimestamp().isBefore(nextTimestamp))) {
+      // There is at least one window ready to emit.  Merge now in case that window should be merged
+      // into a not yet completed one.
+      ((WindowingFn<Object, W>) windowingFn)
+        .mergeWindows(new AbstractWindowSet.WindowMergeContext<Object, W>(windowSet, windowingFn));
+    }
+
+    while (activeWindowManager.hasMoreWindows()
+        && (nextTimestamp == null
+            || activeWindowManager.nextTimestamp().isBefore(nextTimestamp))) {
+      W window = activeWindowManager.getWindow();
+      if (windowSet.contains(window)) {
+        windowSet.markCompleted(window);
+      }
+    }
+  }
+
+  private void processElementViaIterators(
+      DoFnProcessContext<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>> context)
+      throws Exception {
+    K key = context.element().getKey();
+    Iterable<WindowedValue<V>> value = context.element().getValue();
+    PeekingReiterator<WindowedValue<V>> iterator;
+
+    if (value instanceof Collection) {
+      iterator = new PeekingReiterator<>(new ListReiterator<WindowedValue<V>>(
+          new ArrayList<WindowedValue<V>>((Collection<WindowedValue<V>>) value), 0));
+    } else if (value instanceof Reiterable) {
+      iterator = new PeekingReiterator(((Reiterable<WindowedValue<V>>) value).iterator());
+    } else {
+      throw new IllegalArgumentException(
+          "Input to GroupAlsoByWindowsDoFn must be a Collection or Reiterable");
+    }
+
+    // This ListMultimap is a map of window maxTimestamps to the list of active
+    // windows with that maxTimestamp.
+    ListMultimap<Instant, BoundedWindow> windows = ArrayListMultimap.create();
+
+    while (iterator.hasNext()) {
+      WindowedValue<V> e = iterator.peek();
+      for (BoundedWindow window : e.getWindows()) {
+        // If this window is not already in the active set, emit a new WindowReiterable
+        // corresponding to this window, starting at this element in the input Reiterable.
+        if (!windows.containsEntry(window.maxTimestamp(), window)) {
+          // Iterating through the WindowReiterable may advance iterator as an optimization
+          // for as long as it detects that there are no new windows.
+          windows.put(window.maxTimestamp(), window);
+          context.outputWindowedValue(
+              KV.of(key, (Iterable<V>) new WindowReiterable<V>(iterator, window)),
+              window.maxTimestamp(),
+              Arrays.asList((W) window));
+        }
+      }
+      // Copy the iterator in case the next DoFn cached its version of the iterator instead
+      // of immediately iterating through it.
+      // And, only advance the iterator if the consuming operation hasn't done so.
+      iterator = iterator.copy();
+      if (iterator.hasNext() && iterator.peek() == e) {
+        iterator.next();
+      }
+
+      // Remove all windows with maxTimestamp behind the current timestamp.
+      Iterator<Instant> windowIterator = windows.keys().iterator();
+      while (windowIterator.hasNext()
+          && windowIterator.next().isBefore(e.getTimestamp())) {
+        windowIterator.remove();
+      }
+    }
+  }
+
+  /**
+   * {@link Reiterable} representing a view of all elements in a base
+   * {@link Reiterator} that are in a given window.
+   */
+  private static class WindowReiterable<V> implements Reiterable<V> {
+    private PeekingReiterator<WindowedValue<V>> baseIterator;
+    private BoundedWindow window;
+
+    public WindowReiterable(
+        PeekingReiterator<WindowedValue<V>> baseIterator, BoundedWindow window) {
+      this.baseIterator = baseIterator;
+      this.window = window;
+    }
+
+    @Override
+    public Reiterator<V> iterator() {
+      // We don't copy the baseIterator when creating the first WindowReiterator
+      // so that the WindowReiterator can advance the baseIterator.  We have to
+      // make a copy afterwards so that future calls to iterator() will start
+      // at the right spot.
+      Reiterator<V> result = new WindowReiterator<V>(baseIterator, window);
+      baseIterator = baseIterator.copy();
+      return result;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder result = new StringBuilder();
+      result.append("WR{");
+      for (V v : this) {
+        result.append(v.toString()).append(',');
+      }
+      result.append("}");
+      return result.toString();
+    }
+  }
+
+  /**
+   * The {@link Reiterator} used by {@link WindowReiterable}.
+   */
+  private static class WindowReiterator<V> implements Reiterator<V> {
+    private PeekingReiterator<WindowedValue<V>> iterator;
+    private BoundedWindow window;
+
+    public WindowReiterator(PeekingReiterator<WindowedValue<V>> iterator, BoundedWindow window) {
+      this.iterator = iterator;
+      this.window = window;
+    }
+
+    @Override
+    public Reiterator<V> copy() {
+      return new WindowReiterator<V>(iterator.copy(), window);
+    }
+
+    @Override
+    public boolean hasNext() {
+      skipToValidElement();
+      return (iterator.hasNext() && iterator.peek().getWindows().contains(window));
+    }
+
+    @Override
+    public V next() {
+      skipToValidElement();
+      WindowedValue<V> next = iterator.next();
+      if (!next.getWindows().contains(window)) {
+        throw new NoSuchElementException("No next item in window");
+      }
+      return next.getValue();
+    }
+
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+
+    /**
+     * Moves the underlying iterator forward until it either points to the next
+     * element in the correct window, or is past the end of the window.
+     */
+    private void skipToValidElement() {
+      while (iterator.hasNext()) {
+        WindowedValue<V> peek = iterator.peek();
+        if (!peek.getTimestamp().isBefore(window.maxTimestamp())) {
+          // We are past the end of this window, so there can't be any more
+          // elements in this iterator.
+          break;
+        }
+        if (!(peek.getWindows().size() == 1 && peek.getWindows().contains(window))) {
+          // We have reached new windows; we need to copy the iterator so we don't
+          // keep advancing the outer loop in processElement.
+          iterator = iterator.copy();
+        }
+        if (!peek.getWindows().contains(window)) {
+          // The next element is not in the right window: skip it.
+          iterator.next();
+        } else {
+          // The next element is in the right window.
+          break;
+        }
+      }
+    }
+  }
+
+  /**
+   * {@link Reiterator} that wraps a {@link List}.
+   */
+  private static class ListReiterator<T> implements Reiterator<T> {
+    private List<T> list;
+    private int index;
+
+    public ListReiterator(List<T> list, int index) {
+      this.list = list;
+      this.index = index;
+    }
+
+    @Override
+    public T next() {
+      return list.get(index++);
+    }
+
+    @Override
+    public boolean hasNext() {
+      return index < list.size();
+    }
+
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public Reiterator<T> copy() {
+      return new ListReiterator(list, index);
+    }
+  }
+
+  private static class BatchActiveWindowManager<W extends BoundedWindow>
+      implements AbstractWindowSet.ActiveWindowManager<W> {
+    // Sort the windows by their end timestamps so that we can efficiently
+    // ask for the next window that will be completed.
+    PriorityQueue<W> windows = new PriorityQueue<>(11, new Comparator<W>() {
+          @Override
+          public int compare(W w1, W w2) {
+            return w1.maxTimestamp().compareTo(w2.maxTimestamp());
+          }
+        });
+
+    @Override
+    public void addWindow(W window) {
+      windows.add(window);
+    }
+
+    @Override
+    public void removeWindow(W window) {
+      windows.remove(window);
+    }
+
+    /**
+     * Returns whether there are more windows.
+     */
+    public boolean hasMoreWindows() {
+      return windows.peek() != null;
+    }
+
+    /**
+     * Returns the timestamp of the next window
+     */
+    public Instant nextTimestamp() {
+      return windows.peek().maxTimestamp();
+    }
+
+    /**
+     * Returns and removes the next window.
+     */
+    public W getWindow() {
+      return windows.poll();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
new file mode 100644
index 0000000000000..683ca76efa5d5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import java.io.IOException;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.WritableByteChannel;
+import java.util.Collection;
+
+/**
+ * Defines a factory for working with read and write channels.
+ *
+ * Channels provide an abstract API for IO operations.
+ *
+ * See <a href="http://docs.oracle.com/javase/7/docs/api/java/nio/channels/package-summary.html
+ * >Java NIO Channels</a>
+ */
+public interface IOChannelFactory {
+
+  /**
+   * Matches a specification, which may contain globs, against available
+   * resources.
+   *
+   * Glob handling is dependent on the implementation.  Implementations should
+   * all support globs in the final component of a path (eg /foo/bar/*.txt),
+   * however they are not required to support globs in the directory paths.
+   *
+   * The result is the (possibly empty) set of specifications which match.
+   */
+  Collection<String> match(String spec) throws IOException;
+
+  /**
+   * Returns a read channel for the given specification.
+   *
+   * The specification is not expanded; it is used verbatim.
+   *
+   * If seeking is supported, then this returns a
+   * {@link java.nio.channels.SeekableByteChannel}.
+   */
+  ReadableByteChannel open(String spec) throws IOException;
+
+  /**
+   * Returns a write channel for the given specification.
+   *
+   * The specification is not expanded; is it used verbatim.
+   */
+  WritableByteChannel create(String spec, String mimeType) throws IOException;
+
+  /**
+   * Returns the size in bytes for the given specification.
+   *
+   * The specification is not expanded; it is used verbatim.
+   */
+  long getSizeBytes(String spec) throws IOException;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
new file mode 100644
index 0000000000000..a9e997a8357e0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
@@ -0,0 +1,179 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.options.GcsOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.channels.WritableByteChannel;
+import java.text.DecimalFormat;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Provides utilities for creating read and write channels.
+ */
+public class IOChannelUtils {
+  private static final Logger LOG = LoggerFactory.getLogger(IOChannelUtils.class);
+
+  // TODO: add registration mechanism for adding new schemas.
+  private static final Map<String, IOChannelFactory> FACTORY_MAP =
+      Collections.synchronizedMap(new HashMap<String, IOChannelFactory>());
+
+  // Pattern which matches shard placeholders within a shard template.
+  private static final Pattern SHARD_FORMAT_RE = Pattern.compile("(S+|N+)");
+
+  /**
+   * Associates a scheme with an {@link IOChannelFactory}.
+   *
+   * The given factory is used to construct read and write channels when
+   * a URI is provided with the given scheme.
+   *
+   * For example, when reading from "gs://bucket/path", the scheme "gs" is
+   * used to lookup the appropriate factory.
+   */
+  public static void setIOFactory(String scheme, IOChannelFactory factory) {
+    FACTORY_MAP.put(scheme, factory);
+  }
+
+  /**
+   * Registers standard factories globally. This requires {@link PipelineOptions}
+   * to provide e.g. credentials for GCS.
+   */
+  public static void registerStandardIOFactories(PipelineOptions options) {
+    setIOFactory("gs", new GcsIOChannelFactory(options.as(GcsOptions.class)));
+  }
+
+  /**
+   * Creates a write channel for the given filename.
+   */
+  public static WritableByteChannel create(String filename, String mimeType)
+      throws IOException {
+    return getFactory(filename).create(filename, mimeType);
+  }
+
+  /**
+   * Creates a write channel for the given file components.
+   *
+   * <p> If numShards is specified, then a ShardingWritableByteChannel is
+   * returned.
+   *
+   * <p> Shard numbers are 0 based, meaning they start with 0 and end at the
+   * number of shards - 1.
+   */
+  public static WritableByteChannel create(String prefix, String shardTemplate,
+      String suffix, int numShards, String mimeType) throws IOException {
+    if (numShards == 1) {
+      return create(constructName(prefix, shardTemplate, suffix, 0, 1),
+                    mimeType);
+    }
+
+    ShardingWritableByteChannel shardingChannel =
+        new ShardingWritableByteChannel();
+
+    Set<String> outputNames = new HashSet<>();
+    for (int i = 0; i < numShards; i++) {
+      String outputName =
+          constructName(prefix, shardTemplate, suffix, i, numShards);
+      if (!outputNames.add(outputName)) {
+        throw new IllegalArgumentException(
+            "Shard name collision detected for: " + outputName);
+      }
+      WritableByteChannel channel = create(outputName, mimeType);
+      shardingChannel.addChannel(channel);
+    }
+
+    return shardingChannel;
+  }
+
+  /**
+   * Constructs a fully qualified name from components.
+   *
+   * <p> The name is built from a prefix, shard template (with shard numbers
+   * applied), and a suffix.  All components are required, but may be empty
+   * strings.
+   *
+   * <p> Within a shard template, repeating sequences of the letters "S" or "N"
+   * are replaced with the shard number, or number of shards respectively.  The
+   * numbers are formatted with leading zeros to match the length of the
+   * repeated sequence of letters.
+   *
+   * <p> For example, if prefix = "output", shardTemplate = "-SSS-of-NNN", and
+   * suffix = ".txt", with shardNum = 1 and numShards = 100, the following is
+   * produced:  "output-001-of-100.txt".
+   */
+  public static String constructName(String prefix,
+      String shardTemplate, String suffix, int shardNum, int numShards) {
+    // Matcher API works with StringBuffer, rather than StringBuilder.
+    StringBuffer sb = new StringBuffer();
+    sb.append(prefix);
+
+    Matcher m = SHARD_FORMAT_RE.matcher(shardTemplate);
+    while (m.find()) {
+      boolean isShardNum = (m.group(1).charAt(0) == 'S');
+
+      char[] zeros = new char[m.end() - m.start()];
+      Arrays.fill(zeros, '0');
+      DecimalFormat df = new DecimalFormat(String.valueOf(zeros));
+      String formatted = df.format(isShardNum
+                                   ? shardNum
+                                   : numShards);
+      m.appendReplacement(sb, formatted);
+    }
+    m.appendTail(sb);
+
+    sb.append(suffix);
+    return sb.toString();
+  }
+
+  private static final Pattern URI_SCHEME_PATTERN = Pattern.compile(
+      "(?<scheme>[a-zA-Z][-a-zA-Z0-9+.]*)://.*");
+
+  /**
+   * Returns the IOChannelFactory associated with an input specification.
+   */
+  public static IOChannelFactory getFactory(String spec) throws IOException {
+    // The spec is almost, but not quite, a URI. In particular,
+    // the reserved characters '[', ']', and '?' have meanings that differ
+    // from their use in the URI spec. ('*' is not reserved).
+    // Here, we just need the scheme, which is so circumscribed as to be
+    // very easy to extract with a regex.
+    Matcher matcher = URI_SCHEME_PATTERN.matcher(spec);
+
+    if (!matcher.matches()) {
+      return new FileIOChannelFactory();
+    }
+
+    String scheme = matcher.group("scheme");
+    IOChannelFactory ioFactory = FACTORY_MAP.get(scheme);
+    if (ioFactory != null) {
+      return ioFactory;
+    }
+
+    throw new IOException("Unable to find handler for " + spec);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
new file mode 100644
index 0000000000000..8712855a86220
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
@@ -0,0 +1,259 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.util.Preconditions;
+import com.google.common.reflect.TypeToken;
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+import java.util.LinkedList;
+import java.util.List;
+
+import javax.annotation.Nullable;
+
+/**
+ * Utility for creating objects dynamically.
+ *
+ * @param <T> type type of object returned by this instance builder
+ */
+public class InstanceBuilder<T> {
+
+  /**
+   * Create an InstanceBuilder for the given type.
+   * <p>
+   * The specified type is the type returned by {@link #build}, which is
+   * typically the common base type or interface of the instance being
+   * constructed.
+   */
+  public static <T> InstanceBuilder<T> ofType(Class<T> type) {
+    return new InstanceBuilder<>(type);
+  }
+
+  /**
+   * Create an InstanceBuilder for the given type.
+   * <p>
+   * The specified type is the type returned by {@link #build}, which is
+   * typically the common base type or interface for the instance to be
+   * constructed.
+   * <p>
+   * The TypeToken argument allows specification of generic types.  For example,
+   * a {@code List<String>} return type can be specified as
+   * {@code ofType(new TypeToken<List<String>>(){})}.
+   */
+  public static <T> InstanceBuilder<T> ofType(TypeToken<T> token) {
+    @SuppressWarnings("unchecked")
+    Class<T> type = (Class<T>) token.getRawType();
+    return new InstanceBuilder<>(type);
+  }
+
+  /**
+   * Sets the class name to be constructed.
+   * <p>
+   * If the name is a simple name (ie {@link Class#getSimpleName()}), then
+   * the package of the return type is added as a prefix.
+   * <p>
+   * The default class is the return type, specified in {@link #ofType}.
+   * <p>
+   * Modifies and returns the {@code InstanceBuilder} for chaining.
+   *
+   * @throws ClassNotFoundException if no class can be found by the given name
+   */
+  public InstanceBuilder<T> fromClassName(String name)
+      throws ClassNotFoundException {
+    Preconditions.checkArgument(factoryClass == null,
+        "Class name may only be specified once");
+    if (name.indexOf('.') == -1) {
+      name = type.getPackage().getName() + "." + name;
+    }
+
+    try {
+      factoryClass = Class.forName(name);
+    } catch (ClassNotFoundException e) {
+      throw new ClassNotFoundException(
+          String.format("Could not find class: %s", name), e);
+    }
+    return this;
+  }
+
+  /**
+   * Sets the factory class to use for instance construction.
+   * <p>
+   * Modifies and returns the {@code InstanceBuilder} for chaining.
+   */
+  public InstanceBuilder<T> fromClass(Class<?> factoryClass) {
+    this.factoryClass = factoryClass;
+    return this;
+  }
+
+  /**
+   * Sets the name of the factory method used to construct the instance.
+   * <p>
+   * The default, if no factory method was specified, is to look for a class
+   * constructor.
+   * <p>
+   * Modifies and returns the {@code InstanceBuilder} for chaining.
+   */
+  public InstanceBuilder<T> fromFactoryMethod(String methodName) {
+    Preconditions.checkArgument(this.methodName == null,
+        "Factory method name may only be specified once");
+    this.methodName = methodName;
+    return this;
+  }
+
+  /**
+   * Adds an argument to be passed to the factory method.
+   * <p>
+   * The argument type is used to lookup the factory method. This type may be
+   * a supertype of the argument value's class.
+   * <p>
+   * Modifies and returns the {@code InstanceBuilder} for chaining.
+   */
+  public <A> InstanceBuilder<T> withArg(Class<? super A> argType, A value) {
+    parameterTypes.add(argType);
+    arguments.add(value);
+    return this;
+  }
+
+  /**
+   * Creates the instance by calling the factory method with the given
+   * arguments.
+   * <p>
+   * <h3>Defaults</h3>
+   * <ul>
+   *   <li>factory class: defaults to the output type class, overridden
+   *   via {@link #fromClassName(String)}.
+   *   <li>factory method: defaults to using a constructor on the factory
+   *   class, overridden via {@link #fromFactoryMethod(String)}.
+   * </ul>
+   *
+   * @throws RuntimeException if the method does not exist, on type mismatch,
+   * or if the method cannot be made accessible.
+   */
+  public T build() {
+    if (factoryClass == null) {
+      factoryClass = type;
+    }
+
+    Class<?>[] types = parameterTypes
+        .toArray(new Class<?>[parameterTypes.size()]);
+
+    // TODO: cache results, to speed repeated type lookups?
+    if (methodName != null) {
+      return buildFromMethod(types);
+    } else {
+      return buildFromConstructor(types);
+    }
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Type of object to construct.
+   */
+  private final Class<T> type;
+
+  /**
+   * Types of parameters for Method lookup.
+   *
+   * @see Class#getDeclaredMethod(String, Class[])
+   */
+  private final List<Class<?>> parameterTypes = new LinkedList<>();
+
+  /**
+   * Arguments to factory method {@link Method#invoke(Object, Object...)}.
+   */
+  private final List<Object> arguments = new LinkedList<>();
+
+  /**
+   * Name of factory method, or null to invoke the constructor.
+   */
+  @Nullable private String methodName;
+
+  /**
+   * Factory class, or null to instantiate {@code type}.
+   */
+  @Nullable private Class<?> factoryClass;
+
+  private InstanceBuilder(Class<T> type) {
+    this.type = type;
+  }
+
+  private T buildFromMethod(Class<?>[] types) {
+    Preconditions.checkState(factoryClass != null);
+    Preconditions.checkState(methodName != null);
+
+    try {
+      Method method = factoryClass.getDeclaredMethod(methodName, types);
+
+      Preconditions.checkState(Modifier.isStatic(method.getModifiers()),
+          "Factory method must be a static method for "
+              + factoryClass.getName() + "#" + method.getName()
+      );
+
+      Preconditions.checkState(type.isAssignableFrom(method.getReturnType()),
+          "Return type for " + factoryClass.getName() + "#" + method.getName()
+              + " must be assignable to " + type.getSimpleName());
+
+      if (!method.isAccessible()) {
+        method.setAccessible(true);
+      }
+
+      Object[] args = arguments.toArray(new Object[arguments.size()]);
+      return type.cast(method.invoke(null, args));
+
+    } catch (NoSuchMethodException e) {
+      throw new RuntimeException("Unable to find factory method "
+          + factoryClass.getName() + "#" + methodName);
+
+    } catch (IllegalAccessException | InvocationTargetException e) {
+      throw new RuntimeException("Failed to construct instance from "
+          + "factory method " + factoryClass.getName() + "#" + methodName, e);
+    }
+  }
+
+  private T buildFromConstructor(Class<?>[] types) {
+    Preconditions.checkState(factoryClass != null);
+
+    try {
+      Constructor<?> constructor = factoryClass.getDeclaredConstructor(types);
+
+      Preconditions.checkState(type.isAssignableFrom(factoryClass),
+          "Instance type " + factoryClass.getName()
+              + " must be assignable to " + type.getSimpleName());
+
+      if (!constructor.isAccessible()) {
+        constructor.setAccessible(true);
+      }
+
+      Object[] args = arguments.toArray(new Object[arguments.size()]);
+      return type.cast(constructor.newInstance(args));
+
+    } catch (NoSuchMethodException e) {
+      throw new RuntimeException("Unable to find constructor for "
+          + factoryClass.getName());
+
+    } catch (InvocationTargetException |
+        InstantiationException |
+        IllegalAccessException e) {
+      throw new RuntimeException("Failed to construct instance from "
+          + "constructor " + factoryClass.getName(), e);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MimeTypes.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MimeTypes.java
new file mode 100644
index 0000000000000..3318a150662ac
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MimeTypes.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+/** Constants representing various mime types. */
+public class MimeTypes {
+  public static final String TEXT = "text/plain";
+  public static final String BINARY = "application/octet-stream";
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
new file mode 100644
index 0000000000000..89df25c391112
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
@@ -0,0 +1,230 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudTime;
+
+import com.google.api.services.dataflow.Dataflow;
+import com.google.api.services.dataflow.Dataflow.V1b3.Projects.Jobs.Messages;
+import com.google.api.services.dataflow.model.JobMessage;
+import com.google.api.services.dataflow.model.ListJobMessagesResponse;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import javax.annotation.Nullable;
+
+/**
+ * A helper class for monitoring jobs submitted to the service.
+ */
+public final class MonitoringUtil {
+  private String projectId;
+  private Messages messagesClient;
+
+  /** Named constants for common values for the job state. */
+  public static enum JobState {
+    UNKNOWN  ("JOB_STATE_UNKNOWN",   false),
+    STOPPED  ("JOB_STATE_STOPPED",   false),
+    RUNNING  ("JOB_STATE_RUNNING",   false),
+    DONE     ("JOB_STATE_DONE",      true),
+    FAILED   ("JOB_STATE_FAILED",    true),
+    CANCELLED("JOB_STATE_CANCELLED", true);
+
+    private final String stateName;
+    private final boolean terminal;
+
+    private JobState(String stateName, boolean terminal) {
+      this.stateName = stateName;
+      this.terminal = terminal;
+    }
+
+    public final String getStateName() {
+      return stateName;
+    }
+
+    public final boolean isTerminal() {
+      return terminal;
+    }
+
+    private static final Map<String, JobState> statesByName =
+        Collections.unmodifiableMap(buildStatesByName());
+
+    private static Map<String, JobState> buildStatesByName() {
+      Map<String, JobState> result = new HashMap<>();
+      for (JobState state : JobState.values()) {
+        result.put(state.getStateName(), state);
+      }
+      return result;
+    }
+
+    public static JobState toState(String stateName) {
+      @Nullable JobState state = statesByName.get(stateName);
+      if (state == null) {
+        state = UNKNOWN;
+      }
+      return state;
+    }
+  }
+
+  /**
+   * An interface which can be used for defining callbacks to receive a list
+   * of JobMessages containing monitoring information.
+   */
+  public interface JobMessagesHandler {
+    /** Process the rows. */
+    void process(List<JobMessage> messages);
+  }
+
+  /** A handler which prints monitoring messages to a stream. */
+  public static class PrintHandler implements JobMessagesHandler {
+    private PrintStream out;
+
+    /**
+     * Construct the handler.
+     *
+     * @param stream The stream to write the messages to.
+     */
+    public PrintHandler(PrintStream stream) {
+      out = stream;
+    }
+
+    @Override
+    public void process(List<JobMessage> messages) {
+      for (JobMessage message : messages) {
+        StringBuilder sb = new StringBuilder();
+        if (message.getMessageText() != null && !message.getMessageText().isEmpty()) {
+          if (message.getMessageImportance() != null) {
+            if (message.getMessageImportance().equals("ERROR")) {
+              sb.append("Error: ");
+            } else if (message.getMessageImportance().equals("WARNING")) {
+              sb.append("Warning: ");
+            }
+          }
+          // TODO: Allow filtering out overly detailed messages.
+          sb.append(message.getMessageText());
+        }
+        if (sb.length() > 0) {
+          @Nullable Instant time = fromCloudTime(message.getTime());
+          if (time == null) {
+            out.print("UNKNOWN TIMESTAMP: ");
+          } else {
+            out.print(time + ": ");
+          }
+          out.println(sb.toString());
+        }
+      }
+      out.flush();
+    }
+  }
+
+  /** Construct a helper for monitoring. */
+  public MonitoringUtil(String projectId, Dataflow dataflow) {
+    this(projectId, dataflow.v1b3().projects().jobs().messages());
+  }
+
+  // @VisibleForTesting
+  MonitoringUtil(String projectId, Messages messagesClient) {
+    this.projectId = projectId;
+    this.messagesClient = messagesClient;
+  }
+
+  /**
+   * Comparator for sorting rows in increasing order based on timestamp.
+   */
+  public static class TimeStampComparator implements Comparator<JobMessage> {
+    @Override
+    public int compare(JobMessage o1, JobMessage o2) {
+      @Nullable Instant t1 = fromCloudTime(o1.getTime());
+      if (t1 == null) {
+        return -1;
+      }
+      @Nullable Instant t2 = fromCloudTime(o2.getTime());
+      if (t2 == null) {
+        return 1;
+      }
+      return t1.compareTo(t2);
+    }
+  }
+
+  /**
+   * Return job messages sorted in ascending order by timestamp.
+   * @param jobId The id of the job to get the messages for.
+   * @param startTimestampMs Return only those messages with a
+   *   timestamp greater than this value.
+   * @return collection of messages
+   * @throws IOException
+   */
+  public ArrayList<JobMessage> getJobMessages(
+      String jobId, long startTimestampMs) throws IOException {
+    Instant startTimestamp = new Instant(startTimestampMs);
+    ArrayList<JobMessage> allMessages = new ArrayList<>();
+    String pageToken = null;
+    while (true) {
+      Messages.List listRequest = messagesClient.list(projectId, jobId);
+      if (pageToken != null) {
+        listRequest.setPageToken(pageToken);
+      }
+      ListJobMessagesResponse response = listRequest.execute();
+
+      if (response == null || response.getJobMessages() == null) {
+        return allMessages;
+      }
+
+      for (JobMessage m : response.getJobMessages()) {
+        @Nullable Instant timestamp = fromCloudTime(m.getTime());
+        if (timestamp == null) {
+          continue;
+        }
+        if (timestamp.isAfter(startTimestamp)) {
+          allMessages.add(m);
+        }
+      }
+
+      if (response.getNextPageToken() == null) {
+        break;
+      } else {
+        pageToken = response.getNextPageToken();
+      }
+    }
+
+    Collections.sort(allMessages, new TimeStampComparator());
+    return allMessages;
+  }
+
+  public static String getJobMonitoringPageURL(String projectName, String jobId) {
+    try {
+      // Project name is allowed in place of the project id: the user will be redirected to a URL
+      // that has the project name replaced with project id.
+      return String.format(
+          "https://console.developers.google.com/project/%s/dataflow/job/%s",
+          URLEncoder.encode(projectName, "UTF-8"),
+          URLEncoder.encode(jobId, "UTF-8"));
+    } catch (UnsupportedEncodingException e) {
+      // Should never happen.
+      throw new AssertionError("UTF-8 encoding is not supported by the environment", e);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/OutputReference.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/OutputReference.java
new file mode 100644
index 0000000000000..eade03d252041
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/OutputReference.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.api.client.util.Preconditions.checkNotNull;
+
+import com.google.api.client.json.GenericJson;
+import com.google.api.client.util.Key;
+
+/**
+ * A representation used by {@link com.google.api.services.dataflow.model.Step}s
+ * to reference the output of other {@code Step}s.
+ */
+public final class OutputReference extends GenericJson {
+  @Key("@type")
+  public final String type = "OutputReference";
+
+  @Key("step_name")
+  private final String stepName;
+
+  @Key("output_name")
+  private final String outputName;
+
+  public OutputReference(String stepName, String outputName) {
+    this.stepName = checkNotNull(stepName);
+    this.outputName = checkNotNull(outputName);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java
new file mode 100644
index 0000000000000..98fe4606807a5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+/**
+ * A {@code PTuple} is an immutable tuple of
+ * heterogeneously-typed values, "keyed" by {@link TupleTag}s.
+ *
+ * <p> PTuples can be created and accessed like follows:
+ * <pre> {@code
+ * String v1 = ...;
+ * Integer v2 = ...;
+ * Iterable<String> v3 = ...;
+ *
+ * // Create TupleTags for each of the values to put in the
+ * // PTuple (the type of the TupleTag enables tracking the
+ * // static type of each of the values in the PTuple):
+ * TupleTag<String> tag1 = new TupleTag<>();
+ * TupleTag<Integer> tag2 = new TupleTag<>();
+ * TupleTag<Iterable<String>> tag3 = new TupleTag<>();
+ *
+ * // Create a PTuple with three values:
+ * PTuple povs =
+ *     PTuple.of(tag1, v1)
+ *         .and(tag2, v2)
+ *         .and(tag3, v3);
+ *
+ * // Create an empty PTuple:
+ * Pipeline p = ...;
+ * PTuple povs2 = PTuple.empty(p);
+ *
+ * // Get values out of a PTuple, using the same tags
+ * // that were used to put them in:
+ * Integer vX = povs.get(tag2);
+ * String vY = povs.get(tag1);
+ * Iterable<String> vZ = povs.get(tag3);
+ *
+ * // Get a map of all values in a PTuple:
+ * Map<TupleTag<?>, ?> allVs = povs.getAll();
+ * } </pre>
+ */
+public class PTuple {
+  /**
+   * Returns an empty PTuple.
+   *
+   * <p> Longer PTuples can be created by calling
+   * {@link #and} on the result.
+   */
+  public static PTuple empty() {
+    return new PTuple();
+  }
+
+  /**
+   * Returns a singleton PTuple containing the given
+   * value keyed by the given TupleTag.
+   *
+   * <p> Longer PTuples can be created by calling
+   * {@link #and} on the result.
+   */
+  public static <V> PTuple of(TupleTag<V> tag, V value) {
+    return empty().and(tag, value);
+  }
+
+  /**
+   * Returns a new PTuple that has all the values and
+   * tags of this PTuple plus the given value and tag.
+   *
+   * <p> The given TupleTag should not already be mapped to a
+   * value in this PTuple.
+   */
+  public <V> PTuple and(TupleTag<V> tag, V value) {
+    Map<TupleTag<?>, Object> newMap = new LinkedHashMap<TupleTag<?>, Object>();
+    newMap.putAll(valueMap);
+    newMap.put(tag, value);
+    return new PTuple(newMap);
+  }
+
+  /**
+   * Returns whether this PTuple contains a value with
+   * the given tag.
+   */
+  public <V> boolean has(TupleTag<V> tag) {
+    return valueMap.containsKey(tag);
+  }
+
+  /**
+   * Returns the value with the given tag in this
+   * PTuple.  Throws IllegalArgumentException if there is no
+   * such value, i.e., {@code !has(tag)}.
+   */
+  public <V> V get(TupleTag<V> tag) {
+    if (!has(tag)) {
+      throw new IllegalArgumentException(
+          "TupleTag not found in this PTuple");
+    }
+    @SuppressWarnings("unchecked")
+    V value = (V) valueMap.get(tag);
+    return value;
+  }
+
+  /**
+   * Returns an immutable Map from TupleTag to corresponding
+   * value, for all the members of this PTuple.
+   */
+  public Map<TupleTag<?>, ?> getAll() {
+    return valueMap;
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Internal details below here.
+
+  private final Map<TupleTag<?>, ?> valueMap;
+
+  private PTuple() {
+    this(new LinkedHashMap());
+  }
+
+  private PTuple(Map<TupleTag<?>, ?> valueMap) {
+    this.valueMap = Collections.unmodifiableMap(valueMap);
+  }
+
+  /**
+   * Returns a PTuple with each of the given tags mapping
+   * to the corresponding value.
+   *
+   * <p> For internal use only.
+   */
+  public static PTuple ofInternal(Map<TupleTag<?>, ?> valueMap) {
+    return new PTuple(valueMap);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
new file mode 100644
index 0000000000000..c108ceb4f1571
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
@@ -0,0 +1,307 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.util.BackOff;
+import com.google.api.client.util.BackOffUtils;
+import com.google.api.client.util.Sleeper;
+import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.common.collect.TreeTraverser;
+import com.google.common.hash.Funnels;
+import com.google.common.hash.Hasher;
+import com.google.common.hash.Hashing;
+import com.google.common.io.ByteStreams;
+import com.google.common.io.CountingOutputStream;
+import com.google.common.io.Files;
+
+import com.fasterxml.jackson.core.Base64Variants;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.channels.Channels;
+import java.nio.channels.WritableByteChannel;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipOutputStream;
+
+/** Helper routines for packages. */
+public class PackageUtil {
+  private static final Logger LOG = LoggerFactory.getLogger(PackageUtil.class);
+  /**
+   * The initial interval to use between package staging attempts.
+   */
+  private static final long INITIAL_BACKOFF_INTERVAL_MS = 5000L;
+  /**
+   * The maximum number of attempts when staging a file.
+   */
+  private static final int MAX_ATTEMPTS = 5;
+
+  /**
+   * Creates a DataflowPackage containing information about how a classpath element should be
+   * staged.
+   *
+   * @param classpathElement The local path for the classpath element.
+   * @param stagingDirectory The base location in GCS for staged classpath elements.
+   * @param overridePackageName If non-null, use the given value as the package name
+   *                            instead of generating one automatically.
+   * @return The package.
+   */
+  public static DataflowPackage createPackage(String classpathElement,
+      GcsPath stagingDirectory, String overridePackageName) {
+    try {
+      File file = new File(classpathElement);
+      String contentHash = computeContentHash(file);
+
+      // Drop the directory prefixes, and form the filename + hash + extension.
+      String uniqueName = getUniqueContentName(file, contentHash);
+
+      GcsPath stagingPath = stagingDirectory.resolve(uniqueName);
+
+      DataflowPackage target = new DataflowPackage();
+      target.setName(overridePackageName != null ? overridePackageName : uniqueName);
+      target.setLocation(stagingPath.toResourceName());
+      return target;
+    } catch (IOException e) {
+      throw new RuntimeException("Package setup failure for " + classpathElement, e);
+    }
+  }
+
+  /**
+   * Transfers the classpath elements to GCS.
+   *
+   * @param gcsUtil GCS utility.
+   * @param classpathElements The elements to stage onto GCS.
+   * @param gcsStaging The path on GCS to stage the classpath elements to.
+   * @return A list of cloud workflow packages, each representing a classpath element.
+   */
+  public static List<DataflowPackage> stageClasspathElementsToGcs(
+      GcsUtil gcsUtil,
+      Collection<String> classpathElements,
+      GcsPath gcsStaging) {
+    return stageClasspathElementsToGcs(gcsUtil, classpathElements, gcsStaging, Sleeper.DEFAULT);
+  }
+
+  // Visible for testing.
+  static List<DataflowPackage> stageClasspathElementsToGcs(
+      GcsUtil gcsUtil,
+      Collection<String> classpathElements,
+      GcsPath gcsStaging,
+      Sleeper retrySleeper) {
+    ArrayList<DataflowPackage> packages = new ArrayList<>();
+
+    if (gcsStaging == null) {
+      throw new IllegalArgumentException(
+          "Can't stage classpath elements on GCS because no GCS location has been provided");
+    }
+
+    for (String classpathElement : classpathElements) {
+      String packageName = null;
+      if (classpathElement.contains("=")) {
+        String[] components = classpathElement.split("=", 2);
+        packageName = components[0];
+        classpathElement = components[1];
+      }
+
+      DataflowPackage workflowPackage = createPackage(
+          classpathElement, gcsStaging, packageName);
+
+      packages.add(workflowPackage);
+      GcsPath target = GcsPath.fromResourceName(workflowPackage.getLocation());
+
+      // TODO: Should we attempt to detect the Mime type rather than
+      // always using MimeTypes.BINARY?
+      try {
+        long remoteLength = gcsUtil.fileSize(target);
+        if (remoteLength >= 0 && remoteLength == getClasspathElementLength(classpathElement)) {
+          LOG.info("Skipping classpath element already on gcs: {} at {}", classpathElement, target);
+          continue;
+        }
+
+        // Upload file, retrying on failure.
+        BackOff backoff = new AttemptBoundedExponentialBackOff(
+            MAX_ATTEMPTS,
+            INITIAL_BACKOFF_INTERVAL_MS);
+        while (true) {
+          try {
+            LOG.info("Uploading classpath element {} to {}", classpathElement, target);
+            try (WritableByteChannel writer = gcsUtil.create(target, MimeTypes.BINARY)) {
+              copyContent(classpathElement, writer);
+            }
+            break;
+          } catch (IOException e) {
+            if (BackOffUtils.next(retrySleeper, backoff)) {
+              LOG.warn("Upload attempt failed, will retry staging of classpath: {}",
+                  classpathElement, e);
+            } else {
+              // Rethrow last error, to be included as a cause in the catch below.
+              LOG.error("Upload failed, will NOT retry staging of classpath: {}",
+                  classpathElement, e);
+              throw e;
+            }
+          }
+        }
+      } catch (Exception e) {
+        throw new RuntimeException("Could not stage classpath element: " + classpathElement, e);
+      }
+    }
+
+    return packages;
+  }
+
+  /**
+   * If classpathElement is a file, then the files length is returned, otherwise the length
+   * of the copied stream is returned.
+   *
+   * @param classpathElement The local path for the classpath element.
+   * @return The length of the classpathElement.
+   */
+  private static long getClasspathElementLength(String classpathElement) throws IOException {
+    File file = new File(classpathElement);
+    if (file.isFile()) {
+      return file.length();
+    }
+
+    CountingOutputStream countingOutputStream =
+        new CountingOutputStream(ByteStreams.nullOutputStream());
+    try (WritableByteChannel channel = Channels.newChannel(countingOutputStream)) {
+      copyContent(classpathElement, channel);
+    }
+    return countingOutputStream.getCount();
+  }
+
+  /**
+   * Returns a unique name for a file with a given content hash.
+   * <p>
+   * Directory paths are removed. Example:
+   * <pre>
+   * dir="a/b/c/d", contentHash="f000" => d-f000.zip
+   * file="a/b/c/d.txt", contentHash="f000" => d-f000.txt
+   * file="a/b/c/d", contentHash="f000" => d-f000
+   * </pre>
+   */
+  static String getUniqueContentName(File classpathElement, String contentHash) {
+    String fileName = Files.getNameWithoutExtension(classpathElement.getAbsolutePath());
+    String fileExtension = Files.getFileExtension(classpathElement.getAbsolutePath());
+    if (classpathElement.isDirectory()) {
+      return fileName + "-" + contentHash + ".zip";
+    } else if (fileExtension.isEmpty()) {
+      return fileName + "-" + contentHash;
+    }
+    return fileName + "-" + contentHash + "." + fileExtension;
+  }
+
+  /**
+   * Computes a message digest of the file/directory contents, returning a base64 string which is
+   * suitable for use in URLs.
+   */
+  private static String computeContentHash(File classpathElement) throws IOException {
+    TreeTraverser<File> files = Files.fileTreeTraverser();
+    Hasher hasher = Hashing.md5().newHasher();
+    for (File currentFile : files.preOrderTraversal(classpathElement)) {
+      String relativePath = relativize(currentFile, classpathElement);
+      hasher.putString(relativePath, StandardCharsets.UTF_8);
+      if (currentFile.isDirectory()) {
+        hasher.putLong(-1L);
+        continue;
+      }
+      hasher.putLong(currentFile.length());
+      Files.asByteSource(currentFile).copyTo(Funnels.asOutputStream(hasher));
+    }
+    return Base64Variants.MODIFIED_FOR_URL.encode(hasher.hash().asBytes());
+  }
+
+  /**
+   * Copies the contents of the classpathElement to the output channel.
+   * <p>
+   * If the classpathElement is a directory, a Zip stream is constructed on the fly,
+   * otherwise the file contents are copied as-is.
+   * <p>
+   * The output channel is not closed.
+   */
+  private static void copyContent(String classpathElement, WritableByteChannel outputChannel)
+      throws IOException {
+    final File classpathElementFile = new File(classpathElement);
+    if (!classpathElementFile.isDirectory()) {
+      Files.asByteSource(classpathElementFile).copyTo(Channels.newOutputStream(outputChannel));
+      return;
+    }
+
+    ZipOutputStream zos = new ZipOutputStream(Channels.newOutputStream(outputChannel));
+    zipDirectoryRecursive(classpathElementFile, classpathElementFile, zos);
+    zos.finish();
+  }
+
+  /**
+   * Private helper function for zipping files. This one goes recursively through the input
+   * directory and all of its subdirectories and adds the single zip entries.
+   *
+   * @param file the file or directory to be added to the zip file.
+   * @param root each file uses the root directory to generate its relative path within the zip.
+   * @param zos the zipstream to write to.
+   * @throws IOException the zipping failed, e.g. because the output was not writable.
+   */
+  private static void zipDirectoryRecursive(File file, File root, ZipOutputStream zos)
+      throws IOException {
+    final String entryName = relativize(file, root);
+    if (file.isDirectory()) {
+      // We are hitting a directory. Start the recursion.
+      // Add the empty entry if it is a subdirectory and the subdirectory has no children.
+      // Don't add it otherwise, as this is incompatible with certain implementations of unzip.
+      if (file.list().length == 0 && !file.equals(root)) {
+        ZipEntry entry = new ZipEntry(entryName + "/");
+        zos.putNextEntry(entry);
+      } else {
+        // loop through the directory content, and zip the files
+        for (File currentFile : file.listFiles()) {
+          zipDirectoryRecursive(currentFile, root, zos);
+        }
+      }
+    } else {
+      // Put the next zip-entry into the zipoutputstream.
+      ZipEntry entry = new ZipEntry(entryName);
+      zos.putNextEntry(entry);
+      Files.asByteSource(file).copyTo(zos);
+    }
+  }
+
+  /**
+   * Constructs a relative path between file and root.
+   * <p>
+   * This function will attempt to use {@link java.nio.file.Path#relativize} and
+   * will fallback to using {@link java.net.URI#relativize} in AppEngine.
+   *
+   * @param file The file for which the relative path is being constructed for.
+   * @param root The root from which the relative path should be constructed.
+   * @return The relative path between the file and root.
+   */
+  private static String relativize(File file, File root) {
+    if (AppEngineEnvironment.IS_APP_ENGINE) {
+      // AppEngine doesn't allow for java.nio.file.Path to be used so we rely on
+      // using URIs, but URIs are broken for UNC paths which AppEngine doesn't
+      // use. See for more details: http://wiki.eclipse.org/Eclipse/UNC_Paths
+      return root.toURI().relativize(file.toURI()).getPath();
+    }
+    return root.toPath().relativize(file.toPath()).toString();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
new file mode 100644
index 0000000000000..96b2ece5cf987
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.cloud.dataflow.sdk.util.WindowUtils.bufferTag;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import java.util.Collection;
+
+/**
+ * A WindowSet where each value is placed in exactly one window,
+ * and windows are never merged, deleted, or flushed early, and the
+ * WindowSet itself is never exposed to user code, allowing
+ * a much simpler (and cheaper) implementation.
+ *
+ * This WindowSet only works with {@link StreamingGroupAlsoByWindowsDoFn}.
+ */
+class PartitionBufferingWindowSet<K, V, W extends BoundedWindow>
+    extends AbstractWindowSet<K, V, Iterable<V>, W> {
+  PartitionBufferingWindowSet(
+      K key,
+      WindowingFn<?, W> windowingFn,
+      Coder<V> inputCoder,
+      DoFnProcessContext<?, KV<K, Iterable<V>>> context,
+      ActiveWindowManager<W> activeWindowManager) {
+    super(key, windowingFn, inputCoder, context, activeWindowManager);
+  }
+
+  @Override
+  public void put(W window, V value) throws Exception {
+    context.context.stepContext.writeToTagList(
+        bufferTag(window, windowingFn.windowCoder(), inputCoder), value, context.timestamp());
+    // Adds the window even if it is already present, relying on the streaming backend to
+    // de-deduplicate.
+    activeWindowManager.addWindow(window);
+  }
+
+  @Override
+  public void remove(W window) throws Exception {
+    CodedTupleTag<V> tag = bufferTag(window, windowingFn.windowCoder(), inputCoder);
+    context.context.stepContext.deleteTagList(tag);
+  }
+
+  @Override
+  public void merge(Collection<W> otherWindows, W newWindow) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public Collection<W> windows() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public boolean contains(W window) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  protected Iterable<V> finalValue(W window) throws Exception {
+    CodedTupleTag<V> tag = bufferTag(window, windowingFn.windowCoder(), inputCoder);
+    Iterable<V> result = context.context.stepContext.readTagList(tag);
+    if (result == null) {
+      throw new IllegalStateException("finalValue called for non-existent window");
+    }
+    return result;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
new file mode 100644
index 0000000000000..85a81cdeff9c9
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+/**
+ * Constant property names used by the SDK in CloudWorkflow specifications.
+ */
+public class PropertyNames {
+  public static final String APPEND_TRAILING_NEWLINES = "append_trailing_newlines";
+  public static final String BIGQUERY_CREATE_DISPOSITION = "create_disposition";
+  public static final String BIGQUERY_DATASET = "dataset";
+  public static final String BIGQUERY_PROJECT = "project";
+  public static final String BIGQUERY_SCHEMA = "schema";
+  public static final String BIGQUERY_TABLE = "table";
+  public static final String BIGQUERY_WRITE_DISPOSITION = "write_disposition";
+  public static final String CO_GBK_RESULT_SCHEMA = "co_gbk_result_schema";
+  public static final String COMBINE_FN = "combine_fn";
+  public static final String COMPONENT_ENCODINGS = "component_encodings";
+  public static final String CUSTOM_SOURCE_FORMAT = "custom_source";
+  public static final String CUSTOM_SOURCE_STEP_INPUT = "custom_source_step_input";
+  public static final String CUSTOM_SOURCE_SPEC = "spec";
+  public static final String CUSTOM_SOURCE_METADATA = "metadata";
+  public static final String CUSTOM_SOURCE_DOES_NOT_NEED_SPLITTING = "does_not_need_splitting";
+  public static final String CUSTOM_SOURCE_PRODUCES_SORTED_KEYS = "produces_sorted_keys";
+  public static final String CUSTOM_SOURCE_IS_INFINITE = "is_infinite";
+  public static final String CUSTOM_SOURCE_ESTIMATED_SIZE_BYTES = "estimated_size_bytes";
+  public static final String ELEMENT = "element";
+  public static final String ELEMENTS = "elements";
+  public static final String ENCODING = "encoding";
+  public static final String END_INDEX = "end_index";
+  public static final String END_OFFSET = "end_offset";
+  public static final String END_SHUFFLE_POSITION = "end_shuffle_position";
+  public static final String ENVIRONMENT_VERSION_JOB_TYPE_KEY = "job_type";
+  public static final String ENVIRONMENT_VERSION_MAJOR_KEY = "major";
+  public static final String FILENAME = "filename";
+  public static final String FILENAME_PREFIX = "filename_prefix";
+  public static final String FILENAME_SUFFIX = "filename_suffix";
+  public static final String FILEPATTERN = "filepattern";
+  public static final String FOOTER = "footer";
+  public static final String FORMAT = "format";
+  public static final String HEADER = "header";
+  public static final String INPUTS = "inputs";
+  public static final String INPUT_CODER = "input_coder";
+  public static final String IS_GENERATED = "is_generated";
+  public static final String IS_PAIR_LIKE = "is_pair_like";
+  public static final String IS_STREAM_LIKE = "is_stream_like";
+  public static final String IS_WRAPPER = "is_wrapper";
+  public static final String NON_PARALLEL_INPUTS = "non_parallel_inputs";
+  public static final String NUM_SHARDS = "num_shards";
+  public static final String OBJECT_TYPE_NAME = "@type";
+  public static final String OUTPUT = "output";
+  public static final String OUTPUT_INFO = "output_info";
+  public static final String OUTPUT_NAME = "output_name";
+  public static final String PARALLEL_INPUT = "parallel_input";
+  public static final String PHASE = "phase";
+  public static final String PUBSUB_SUBSCRIPTION = "pubsub_subscription";
+  public static final String PUBSUB_TOPIC = "pubsub_topic";
+  public static final String SCALAR_FIELD_NAME = "value";
+  public static final String SERIALIZED_FN = "serialized_fn";
+  public static final String SHARD_NAME_TEMPLATE = "shard_template";
+  public static final String SHUFFLE_KIND = "shuffle_kind";
+  public static final String SHUFFLE_READER_CONFIG = "shuffle_reader_config";
+  public static final String SHUFFLE_WRITER_CONFIG = "shuffle_writer_config";
+  public static final String START_INDEX = "start_index";
+  public static final String START_OFFSET = "start_offset";
+  public static final String START_SHUFFLE_POSITION = "start_shuffle_position";
+  public static final String STRIP_TRAILING_NEWLINES = "strip_trailing_newlines";
+  public static final String TUPLE_TAGS = "tuple_tags";
+  public static final String USER_FN = "user_fn";
+  public static final String USER_NAME = "user_name";
+  public static final String USES_KEYED_STATE = "uses_keyed_state";
+  public static final String VALUE = "value";
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
new file mode 100644
index 0000000000000..34d40f1470793
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
@@ -0,0 +1,165 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.http.HttpBackOffIOExceptionHandler;
+import com.google.api.client.http.HttpBackOffUnsuccessfulResponseHandler;
+import com.google.api.client.http.HttpRequest;
+import com.google.api.client.http.HttpRequestInitializer;
+import com.google.api.client.http.HttpResponse;
+import com.google.api.client.http.HttpUnsuccessfulResponseHandler;
+import com.google.api.client.util.BackOff;
+import com.google.api.client.util.ExponentialBackOff;
+import com.google.api.client.util.NanoClock;
+import com.google.api.client.util.Sleeper;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import javax.annotation.Nullable;
+
+/**
+ * Implements a request initializer which adds retry handlers to all
+ * HttpRequests.
+ *
+ * This allows chaining through to another HttpRequestInitializer, since
+ * clients have exactly one HttpRequestInitializer, and Credential is also
+ * a required HttpRequestInitializer.
+ */
+public class RetryHttpRequestInitializer implements HttpRequestInitializer {
+
+  private static final Logger LOG = LoggerFactory.getLogger(RetryHttpRequestInitializer.class);
+
+  /**
+   * Http response codes that should be silently ignored.
+   */
+  private static final Set<Integer> IGNORED_RESPONSE_CODES = new HashSet<>(
+      Arrays.asList(307 /* Redirect, handled by Apiary client */,
+                    308 /* Resume Incomplete, handled by Apiary client */));
+
+  /**
+   * Http response timeout to use for hanging gets.
+   */
+  private static final int HANGING_GET_TIMEOUT_SEC = 80;
+
+  private static class LoggingHttpBackOffIOExceptionHandler
+      extends HttpBackOffIOExceptionHandler {
+    public LoggingHttpBackOffIOExceptionHandler(BackOff backOff) {
+      super(backOff);
+    }
+
+    @Override
+    public boolean handleIOException(HttpRequest request, boolean supportsRetry)
+        throws IOException {
+      boolean willRetry = super.handleIOException(request, supportsRetry);
+      if (willRetry) {
+        LOG.info("Request failed with IOException, will retry: {}", request.getUrl());
+      } else {
+        LOG.info("Request failed with IOException, will NOT retry: {}", request.getUrl());
+      }
+      return willRetry;
+    }
+  }
+
+  private static class LoggingHttpBackoffUnsuccessfulResponseHandler
+      implements HttpUnsuccessfulResponseHandler {
+    private final HttpBackOffUnsuccessfulResponseHandler handler;
+
+    public LoggingHttpBackoffUnsuccessfulResponseHandler(BackOff backoff,
+        Sleeper sleeper) {
+      handler = new HttpBackOffUnsuccessfulResponseHandler(backoff);
+      handler.setSleeper(sleeper);
+      handler.setBackOffRequired(
+          new HttpBackOffUnsuccessfulResponseHandler.BackOffRequired() {
+            @Override
+            public boolean isRequired(HttpResponse response) {
+              int statusCode = response.getStatusCode();
+              return (statusCode / 100 == 5) ||  // 5xx: server error
+                  statusCode == 429;             // 429: Too many requests
+            }
+          });
+    }
+
+    @Override
+    public boolean handleResponse(HttpRequest request, HttpResponse response,
+        boolean supportsRetry) throws IOException {
+      boolean retry = handler.handleResponse(request, response, supportsRetry);
+      if (retry) {
+        LOG.info("Request failed with code {} will retry: {}",
+            response.getStatusCode(), request.getUrl());
+
+      } else if (!IGNORED_RESPONSE_CODES.contains(response.getStatusCode())) {
+        LOG.info("Request failed with code {}, will NOT retry: {}",
+            response.getStatusCode(), request.getUrl());
+      }
+
+      return retry;
+    }
+  }
+
+  private final HttpRequestInitializer chained;
+
+  private final NanoClock nanoClock;  // used for testing
+
+  private final Sleeper sleeper;  // used for testing
+
+  /**
+   * @param chained a downstream HttpRequestInitializer, which will also be
+   *                applied to HttpRequest initialization.  May be null.
+   */
+  public RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained) {
+    this(chained, NanoClock.SYSTEM, Sleeper.DEFAULT);
+  }
+
+  public RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained,
+      NanoClock nanoClock, Sleeper sleeper) {
+    this.chained = chained;
+    this.nanoClock = nanoClock;
+    this.sleeper = sleeper;
+  }
+
+  @Override
+  public void initialize(HttpRequest request) throws IOException {
+    if (chained != null) {
+      chained.initialize(request);
+    }
+
+    // Set a timeout for hanging-gets.
+    // TODO: Do this exclusively for work requests.
+    request.setReadTimeout(HANGING_GET_TIMEOUT_SEC * 1000);
+
+    // Back off on retryable http errors.
+    request.setUnsuccessfulResponseHandler(
+        // A back-off multiplier of 2 raises the maximum request retrying time
+        // to approximately 5 minutes (keeping other back-off parameters to
+        // their default values).
+        new LoggingHttpBackoffUnsuccessfulResponseHandler(
+            new ExponentialBackOff.Builder().setNanoClock(nanoClock)
+                                            .setMultiplier(2).build(),
+            sleeper));
+
+    // Retry immediately on IOExceptions.
+    LoggingHttpBackOffIOExceptionHandler loggingBackoffHandler =
+        new LoggingHttpBackOffIOExceptionHandler(BackOff.ZERO_BACKOFF);
+    request.setIOExceptionHandler(loggingBackoffHandler);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java
new file mode 100644
index 0000000000000..9ee09c8608ab7
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.decodeFromByteArray;
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.util.Arrays;
+
+/**
+ * Utilities for working with Serializables.
+ */
+public class SerializableUtils {
+  /**
+   * Serializes the argument into an array of bytes, and returns it.
+   *
+   * @throws IllegalArgumentException if there are errors when serializing
+   */
+  public static byte[] serializeToByteArray(Serializable value) {
+    try {
+      ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+      try (ObjectOutputStream oos = new ObjectOutputStream(buffer)) {
+        oos.writeObject(value);
+      }
+      return buffer.toByteArray();
+    } catch (IOException exn) {
+      throw new IllegalArgumentException(
+          "unable to serialize " + value,
+          exn);
+    }
+  }
+
+  /**
+   * Deserializes an object from the given array of bytes, e.g., as
+   * serialized using {@link #serializeToByteArray}, and returns it.
+   *
+   * @throws IllegalArgumentException if there are errors when
+   * deserializing, using the provided description to identify what
+   * was being deserialized
+   */
+  public static Object deserializeFromByteArray(byte[] encodedValue,
+      String description) {
+    try {
+      try (ObjectInputStream ois = new ObjectInputStream(
+          new ByteArrayInputStream(encodedValue))) {
+        return ois.readObject();
+      }
+    } catch (IOException | ClassNotFoundException exn) {
+      throw new IllegalArgumentException(
+          "unable to deserialize " + description,
+          exn);
+    }
+  }
+
+  public static <T extends Serializable> T ensureSerializable(T value) {
+    @SuppressWarnings("unchecked")
+    T copy = (T) deserializeFromByteArray(serializeToByteArray(value),
+        value.toString());
+    return copy;
+  }
+
+  /**
+   * Serializes a Coder and verifies that it can be correctly deserialized.
+   * <p>
+   * Throws a RuntimeException if serialized Coder cannot be deserialized, or
+   * if the deserialized instance is not equal to the original.
+   * <p>
+   * @return the serialized Coder, as a {@link CloudObject}
+   */
+  public static CloudObject ensureSerializable(Coder<?> coder) {
+    CloudObject cloudObject = coder.asCloudObject();
+
+    Coder<?> decoded;
+    try {
+      decoded = Serializer.deserialize(cloudObject, Coder.class);
+    } catch (RuntimeException e) {
+      throw new RuntimeException(
+          String.format("Unable to deserialize Coder: %s. "
+              + "Check that a suitable constructor is defined.  "
+              + "See Coder for details.", coder), e
+      );
+    }
+    Preconditions.checkState(coder.equals(decoded),
+        String.format("Coder not equal to original after serialization, "
+            + "indicating that the Coder may not implement serialization "
+            + "correctly.  Before: %s, after: %s, cloud encoding: %s",
+            coder, decoded, cloudObject));
+
+    return cloudObject;
+  }
+
+  /**
+   * Serializes an arbitrary T with the given Coder<T> and verifies
+   * that it can be correctly deserialized.
+   */
+  public static <T> T ensureSerializableByCoder(
+      Coder<T> coder, T value, String errorContext) {
+      byte[] encodedValue;
+      try {
+        encodedValue = encodeToByteArray(coder, value);
+      } catch (CoderException exn) {
+        // TODO: Put in better element printing:
+        // truncate if too long.
+        throw new IllegalArgumentException(
+            errorContext + ": unable to encode value "
+            + value + " using " + coder,
+            exn);
+      }
+      try {
+        return decodeFromByteArray(coder, encodedValue);
+      } catch (CoderException exn) {
+        // TODO: Put in better encoded byte array printing:
+        // use printable chars with escapes instead of codes, and
+        // truncate if too long.
+        throw new IllegalArgumentException(
+            errorContext + ": unable to decode " + Arrays.toString(encodedValue)
+            + ", encoding of value " + value + ", using " + coder,
+            exn);
+      }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java
new file mode 100644
index 0000000000000..42071ec467ee3
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.Module;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import javax.annotation.Nullable;
+
+/**
+ * Utility for converting objects between Java and Cloud representations.
+ */
+public final class Serializer {
+  // Delay initialization of statics until the first call to Serializer.
+  private static class SingletonHelper {
+    static final ObjectMapper OBJECT_MAPPER = createObjectMapper();
+    static final ObjectMapper TREE_MAPPER = createTreeMapper();
+
+    /**
+     * Creates the object mapper which will be used for serializing Google API
+     * client maps into Jackson trees.
+     */
+    private static ObjectMapper createTreeMapper() {
+      return new ObjectMapper();
+    }
+
+    /**
+     * Creates the object mapper which will be used for deserializing Jackson
+     * trees into objects.
+     */
+    private static ObjectMapper createObjectMapper() {
+      ObjectMapper m = new ObjectMapper();
+      // Ignore properties which are not used by the object.
+      m.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);
+
+      // For parameters of type Object, use the @type property to determine the
+      // class to instantiate.
+      //
+      // TODO: It would be ideal to do this for all non-final classes.  The
+      // problem with using DefaultTyping.NON_FINAL is that it insists on having
+      // type information in the JSON for classes with useful default
+      // implementations, such as List.  Ideally, we'd combine these defaults
+      // with available type information if that information's present.
+      m.enableDefaultTypingAsProperty(
+           ObjectMapper.DefaultTyping.JAVA_LANG_OBJECT,
+           PropertyNames.OBJECT_TYPE_NAME);
+
+      m.registerModule(new CoderUtils.Jackson2Module());
+
+      return m;
+    }
+  }
+
+  /**
+   * Registers a module to use during object deserialization.
+   */
+  public static void registerModule(Module module) {
+    SingletonHelper.OBJECT_MAPPER.registerModule(module);
+  }
+
+  /**
+   * Deserializes an object from a Dataflow structured encoding (represented in
+   * Java as a map).
+   * <p>
+   * The standard Dataflow SDK object serialization protocol is based on JSON.
+   * Data is typically encoded as a JSON object whose fields represent the
+   * object's data.
+   * <p>
+   * The actual deserialization is performed by Jackson, which can deserialize
+   * public fields, use JavaBean setters, or use injection annotations to
+   * indicate how to construct the object.  The {@link ObjectMapper} used is
+   * configured to use the "@type" field as the name of the class to instantiate
+   * (supporting polymorphic types), and may be further configured by
+   * annotations or via {@link #registerModule}.
+   * <p>
+   * @see <a href="http://wiki.fasterxml.com/JacksonFAQ#Data_Binding.2C_general">
+   * Jackson Data-Binding</a>
+   * @see <a href="https://github.com/FasterXML/jackson-annotations/wiki/Jackson-Annotations">
+   * Jackson-Annotations</a>
+   * @param serialized the object in untyped decoded form (i.e. a nested {@link Map})
+   * @param clazz the expected object class
+   */
+  public static <T> T deserialize(Map<String, Object> serialized, Class<T> clazz) {
+    try {
+      return SingletonHelper.OBJECT_MAPPER.treeToValue(
+          SingletonHelper.TREE_MAPPER.valueToTree(
+              deserializeCloudKnownTypes(serialized)),
+          clazz);
+    } catch (JsonProcessingException e) {
+      throw new RuntimeException(
+          "Unable to deserialize class " + clazz, e);
+    }
+  }
+
+  /**
+   * Recursively walks the supplied map, looking for well-known cloud type
+   * information (keyed as {@link PropertyNames#OBJECT_TYPE_NAME}, matching a
+   * URI value from the {@link CloudKnownType} enum.  Upon finding this type
+   * information, it converts it into the correspondingly typed Java value.
+   */
+  private static Object deserializeCloudKnownTypes(Object src) {
+    if (src instanceof Map) {
+      Map<String, Object> srcMap = (Map<String, Object>) src;
+      @Nullable Object value = srcMap.get(PropertyNames.SCALAR_FIELD_NAME);
+      @Nullable CloudKnownType type =
+          CloudKnownType.forUri((String) srcMap.get(PropertyNames.OBJECT_TYPE_NAME));
+      if (type != null && value != null) {
+        // It's a value of a well-known cloud type; let the known type handler
+        // handle the translation.
+        Object result = type.parse(value, type.defaultClass());
+        return result;
+      }
+      // Otherwise, it's just an ordinary map.
+      Map<String, Object> dest = new HashMap<>(srcMap.size());
+      for (Map.Entry<String, Object> entry : srcMap.entrySet()) {
+        dest.put(entry.getKey(), deserializeCloudKnownTypes(entry.getValue()));
+      }
+      return dest;
+    }
+    if (src instanceof List) {
+      List<Object> srcList = (List<Object>) src;
+      List<Object> dest = new ArrayList<>(srcList.size());
+      for (Object obj : srcList) {
+        dest.add(deserializeCloudKnownTypes(obj));
+      }
+      return dest;
+    }
+    // Neither a Map nor a List; no translation needed.
+    return src;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ShardingWritableByteChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ShardingWritableByteChannel.java
new file mode 100644
index 0000000000000..4a3322b345355
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ShardingWritableByteChannel.java
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.WritableByteChannel;
+import java.util.ArrayList;
+
+/**
+ * Implements a WritableByteChannel which may contain multiple output shards.
+ *
+ * <p> This provides {@link #writeToShard}, which takes a shard number for
+ * writing to a particular shard.
+ *
+ * <p> The channel is considered open if all downstream channels are open, and
+ * closes all downstream channels when closed.
+ */
+public class ShardingWritableByteChannel implements WritableByteChannel {
+
+  /**
+   * Special shard number which causes a write to all shards.
+   */
+  public static final int ALL_SHARDS = -2;
+
+
+  private final ArrayList<WritableByteChannel> writers = new ArrayList<>();
+
+  /**
+   * Returns the number of output shards.
+   */
+  public int getNumShards() {
+    return writers.size();
+  }
+
+  /**
+   * Adds another shard output channel.
+   */
+  public void addChannel(WritableByteChannel writer) {
+    writers.add(writer);
+  }
+
+  /**
+   * Returns the WritableByteChannel associated with the given shard number.
+   */
+  public WritableByteChannel getChannel(int shardNum) {
+    return writers.get(shardNum);
+  }
+
+  /**
+   * Writes the buffer to the given shard.
+   *
+   * <p> This does not change the current output shard.
+   *
+   * @return The total number of bytes written.  If the shard number is
+   * {@link #ALL_SHARDS}, then the total is the sum of each individual shard
+   * write.
+   */
+  public int writeToShard(int shardNum, ByteBuffer src) throws IOException {
+    if (shardNum >= 0) {
+      return writers.get(shardNum).write(src);
+    }
+
+    switch (shardNum) {
+      case ALL_SHARDS:
+        int size = 0;
+        for (WritableByteChannel writer : writers) {
+          size += writer.write(src);
+        }
+        return size;
+
+      default:
+        throw new IllegalArgumentException("Illegal shard number: " + shardNum);
+    }
+  }
+
+  /**
+   * Writes a buffer to all shards.
+   *
+   * <p> Same as calling {@code writeToShard(ALL_SHARDS, buf)}.
+   */
+  @Override
+  public int write(ByteBuffer src) throws IOException {
+    return writeToShard(ALL_SHARDS, src);
+  }
+
+  @Override
+  public boolean isOpen() {
+    for (WritableByteChannel writer : writers) {
+      if (!writer.isOpen()) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  @Override
+  public void close() throws IOException {
+    for (WritableByteChannel writer : writers) {
+      writer.close();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
new file mode 100644
index 0000000000000..dcfd58aee92d2
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PartitioningWindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import java.io.IOException;
+
+/**
+ * DoFn that merges windows and groups elements in those windows.
+ *
+ * @param <K> key type
+ * @param <VI> input value element type
+ * @param <VO> output value element type
+ * @param <W> window type
+ */
+public class StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
+    extends DoFn<TimerOrElement<KV<K, VI>>, KV<K, VO>> implements DoFn.RequiresKeyedState {
+
+  protected WindowingFn<?, W> windowingFn;
+  protected Coder<VI> inputCoder;
+
+  protected StreamingGroupAlsoByWindowsDoFn(
+      WindowingFn<?, W> windowingFn,
+      Coder<VI> inputCoder) {
+    this.windowingFn = windowingFn;
+    this.inputCoder = inputCoder;
+  }
+
+  public static <K, VI, VO, W extends BoundedWindow>
+      StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W> create(
+          WindowingFn<?, W> windowingFn,
+          Coder<VI> inputCoder) {
+    return new StreamingGroupAlsoByWindowsDoFn<>(windowingFn, inputCoder);
+  }
+
+  private AbstractWindowSet<K, VI, VO, W> createWindowSet(
+      K key,
+      DoFnProcessContext<?, KV<K, VO>> context,
+      AbstractWindowSet.ActiveWindowManager<W> activeWindowManager) throws Exception {
+    if (windowingFn instanceof PartitioningWindowingFn) {
+      return new PartitionBufferingWindowSet(
+          key, windowingFn, inputCoder, context, activeWindowManager);
+    } else {
+      return new BufferingWindowSet(key, windowingFn, inputCoder, context, activeWindowManager);
+    }
+  }
+
+  @Override
+  public void processElement(ProcessContext processContext) throws Exception {
+    DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>> context =
+        (DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>>) processContext;
+    if (!context.element().isTimer()) {
+      KV<K, VI> element = context.element().element();
+      K key = element.getKey();
+      VI value = element.getValue();
+      AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(
+          key, context, new StreamingActiveWindowManager<>(context, windowingFn.windowCoder()));
+
+      for (BoundedWindow window : context.windows()) {
+        windowSet.put((W) window, value);
+      }
+
+      windowSet.flush();
+    } else {
+      TimerOrElement<?> timer = context.element();
+      AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(
+          (K) timer.key(), context, new StreamingActiveWindowManager<>(
+              context, windowingFn.windowCoder()));
+
+      // Attempt to merge windows before emitting; that may remove the current window under
+      // consideration.
+      ((WindowingFn<Object, W>) windowingFn)
+        .mergeWindows(new AbstractWindowSet.WindowMergeContext<Object, W>(windowSet, windowingFn));
+
+      W window = WindowUtils.windowFromString(timer.tag(), windowingFn.windowCoder());
+      boolean windowExists;
+      try {
+        windowExists = windowSet.contains(window);
+      } catch (UnsupportedOperationException e) {
+        windowExists = true;
+      }
+      if (windowExists) {
+        windowSet.markCompleted(window);
+        windowSet.flush();
+      }
+    }
+  }
+
+  private static class StreamingActiveWindowManager<W extends BoundedWindow>
+      implements AbstractWindowSet.ActiveWindowManager<W> {
+    DoFnProcessContext<?, ?> context;
+    Coder<W> coder;
+
+    StreamingActiveWindowManager(
+        DoFnProcessContext<?, ?> context,
+        Coder<W> coder) {
+      this.context = context;
+      this.coder = coder;
+    }
+
+    @Override
+    public void addWindow(W window) throws IOException {
+      context.context.stepContext.getExecutionContext().setTimer(
+          WindowUtils.windowToString(window, coder), window.maxTimestamp());
+    }
+
+    @Override
+    public void removeWindow(W window) throws IOException {
+      context.context.stepContext.getExecutionContext().deleteTimer(
+          WindowUtils.windowToString(window, coder));
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
new file mode 100644
index 0000000000000..382683c2de3c3
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.common.base.Joiner;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Utilities for working with JSON and other human-readable string formats.
+ */
+public class StringUtils {
+  /**
+   * Converts the given array of bytes into a legal JSON string.
+   *
+   * Uses a simple strategy of converting each byte to a single char,
+   * except for non-printable chars, non-ASCII chars, and '%', '\',
+   * and '"', which are encoded as three chars in '%xx' format, where
+   * 'xx' is the hexadecimal encoding of the byte.
+   */
+  public static String byteArrayToJsonString(byte[] bytes) {
+    StringBuilder sb = new StringBuilder(bytes.length * 2);
+    for (byte b : bytes) {
+      if (b >= 32 && b < 127) {
+        // A printable ascii character.
+        char c = (char) b;
+        if (c != '%' && c != '\\' && c != '\"') {
+          // Not an escape prefix or special character, either.
+          // Send through unchanged.
+          sb.append(c);
+          continue;
+        }
+      }
+      // Send through escaped.  Use '%xx' format.
+      sb.append(String.format("%%%02x", b));
+    }
+    return sb.toString();
+  }
+
+  /**
+   * Converts the given string, encoded using {@link #byteArrayToJsonString},
+   * into a byte array.
+   *
+   * @throws IllegalArgumentException if the argument string is not legal
+   */
+  public static byte[] jsonStringToByteArray(String string) {
+    List<Byte> bytes = new ArrayList<>();
+    for (int i = 0; i < string.length(); ) {
+      char c = string.charAt(i);
+      Byte b;
+      if (c == '%') {
+        // Escaped.  Expect '%xx' format.
+        try {
+          b = (byte) Integer.parseInt(string.substring(i + 1, i + 3), 16);
+        } catch (IndexOutOfBoundsException | NumberFormatException exn) {
+          throw new IllegalArgumentException(
+              "not in legal encoded format; " +
+              "substring [" + i + ".." + (i + 2) + "] not in format \"%xx\"",
+              exn);
+        }
+        i += 3;
+      } else {
+        // Send through unchanged.
+        b = (byte) c;
+        i++;
+      }
+      bytes.add(b);
+    }
+    byte[] byteArray = new byte[bytes.size()];
+    int i = 0;
+    for (Byte b : bytes) {
+      byteArray[i++] = b;
+    }
+    return byteArray;
+  }
+
+  private static final String[] STANDARD_NAME_SUFFIXES =
+      new String[]{"DoFn", "Fn"};
+
+  /**
+   * Pattern to match a non-anonymous inner class.
+   * Eg, matches "Foo$Bar", or even "Foo$1$Bar", but not "Foo$1" or "Foo$1$2".
+   */
+  private static final Pattern NAMED_INNER_CLASS =
+      Pattern.compile(".+\\$(?<INNER>[^0-9].*)");
+
+  /**
+   * Returns a simple name for a class.
+   *
+   * <p> Note: this is non-invertible - the name may be simplified to an
+   * extent that it cannot be mapped back to the original class.
+   *
+   * <p> This can be used to generate human-readable transform names.  It
+   * removes the package from the name, and removes common suffixes.
+   *
+   * <p> Examples:
+   * <ul>
+   *   <li>{@code some.package.WordSummaryDoFn} -> "WordSummary"
+   *   <li>{@code another.package.PairingFn} -> "Pairing"
+   * </ul>
+   */
+  public static String approximateSimpleName(Class<?> clazz) {
+    String fullName = clazz.getName();
+    String shortName = fullName.substring(fullName.lastIndexOf('.') + 1);
+
+    // Simplify inner class name by dropping outer class prefixes.
+    Matcher m = NAMED_INNER_CLASS.matcher(shortName);
+    if (m.matches()) {
+      shortName = m.group("INNER");
+    }
+
+    // Drop common suffixes for each named component.
+    String[] names = shortName.split("\\$");
+    for (int i = 0; i < names.length; i++) {
+      names[i] = simplifyNameComponent(names[i]);
+    }
+
+    return Joiner.on('$').join(names);
+  }
+
+  private static String simplifyNameComponent(String name) {
+    for (String suffix : STANDARD_NAME_SUFFIXES) {
+      if (name.endsWith(suffix) && name.length() > suffix.length()) {
+        return name.substring(0, name.length() - suffix.length());
+      }
+    }
+    return name;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java
new file mode 100644
index 0000000000000..8fb2e834f19e5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java
@@ -0,0 +1,345 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.util.Data;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import javax.annotation.Nullable;
+
+/**
+ * A collection of static methods for manipulating datastructure representations
+ * transferred via the Dataflow API.
+ */
+public final class Structs {
+  private Structs() {}  // Non-instantiable
+
+  public static String getString(Map<String, Object> map, String name) throws Exception {
+    return getValue(map, name, String.class, "a string");
+  }
+
+  public static String getString(
+      Map<String, Object> map, String name, @Nullable String defaultValue)
+      throws Exception {
+    return getValue(map, name, String.class, "a string", defaultValue);
+  }
+
+  public static byte[] getBytes(Map<String, Object> map, String name) throws Exception {
+    @Nullable byte[] result = getBytes(map, name, null);
+    if (result == null) {
+      throw new ParameterNotFoundException(name, map);
+    }
+    return result;
+  }
+
+  @Nullable
+  public static byte[] getBytes(Map<String, Object> map, String name, @Nullable byte[] defaultValue)
+      throws Exception {
+    @Nullable String jsonString = getString(map, name, null);
+    if (jsonString == null) {
+      return defaultValue;
+    }
+    // TODO: Need to agree on a format for encoding bytes in
+    // a string that can be sent over the Apiary wire, over the cloud
+    // map task work API.  base64 encoding seems pretty common.  Switch to it?
+    return StringUtils.jsonStringToByteArray(jsonString);
+  }
+
+  public static Boolean getBoolean(Map<String, Object> map, String name) throws Exception {
+    return getValue(map, name, Boolean.class, "a boolean");
+  }
+
+  @Nullable
+  public static Boolean getBoolean(
+      Map<String, Object> map, String name, @Nullable Boolean defaultValue)
+      throws Exception {
+    return getValue(map, name, Boolean.class, "a boolean", defaultValue);
+  }
+
+  public static Long getLong(Map<String, Object> map, String name) throws Exception {
+    return getValue(map, name, Long.class, "an int");
+  }
+
+  @Nullable
+  public static Long getLong(Map<String, Object> map, String name, @Nullable Long defaultValue)
+      throws Exception {
+    return getValue(map, name, Long.class, "an int", defaultValue);
+  }
+
+  @Nullable
+  public static List<String> getStrings(
+      Map<String, Object> map, String name, @Nullable List<String> defaultValue)
+      throws Exception {
+    @Nullable Object value = map.get(name);
+    if (value == null) {
+      if (map.containsKey(name)) {
+        throw new IncorrectTypeException(name, map, "a string or a list");
+      }
+      return defaultValue;
+    }
+    if (Data.isNull(value)) {
+      // This is a JSON literal null.  When represented as a list of strings,
+      // this is an empty list.
+      return Collections.<String>emptyList();
+    }
+    @Nullable String singletonString = decodeValue(value, String.class);
+    if (singletonString != null) {
+      return Collections.singletonList(singletonString);
+    }
+    if (!(value instanceof List)) {
+      throw new IncorrectTypeException(name, map, "a string or a list");
+    }
+    @SuppressWarnings("unchecked")
+    List<Object> elements = (List<Object>) value;
+    List<String> result = new ArrayList<>(elements.size());
+    for (Object o : elements) {
+      @Nullable String s = decodeValue(o, String.class);
+      if (s == null) {
+        throw new IncorrectTypeException(name, map, "a list of strings");
+      }
+      result.add(s);
+    }
+    return result;
+  }
+
+  public static Map<String, Object> getObject(Map<String, Object> map, String name)
+      throws Exception {
+    @Nullable Map<String, Object> result = getObject(map, name, null);
+    if (result == null) {
+      throw new ParameterNotFoundException(name, map);
+    }
+    return result;
+  }
+
+  @Nullable
+  public static Map<String, Object> getObject(
+      Map<String, Object> map, String name, @Nullable Map<String, Object> defaultValue)
+      throws Exception {
+    @Nullable Object value = map.get(name);
+    if (value == null) {
+      if (map.containsKey(name)) {
+        throw new IncorrectTypeException(name, map, "an object");
+      }
+      return defaultValue;
+    }
+    return checkObject(value, map, name);
+  }
+
+  private static Map<String, Object> checkObject(
+      Object value, Map<String, Object> map, String name) throws Exception {
+    if (Data.isNull(value)) {
+      // This is a JSON literal null.  When represented as an object, this is an
+      // empty map.
+      return Collections.<String, Object>emptyMap();
+    }
+    if (!(value instanceof Map)) {
+      throw new IncorrectTypeException(name, map, "an object (not a map)");
+    }
+    @SuppressWarnings("unchecked")
+    Map<String, Object> mapValue = (Map<String, Object>) value;
+    if (!mapValue.containsKey(PropertyNames.OBJECT_TYPE_NAME)) {
+      throw new IncorrectTypeException(name, map,
+          "an object (no \"" + PropertyNames.OBJECT_TYPE_NAME + "\" field)");
+    }
+    return mapValue;
+  }
+
+  public static Map<String, Object> getDictionary(
+      Map<String, Object> map, String name) throws Exception {
+    @Nullable Object value = map.get(name);
+    if (value == null) {
+      throw new ParameterNotFoundException(name, map);
+    }
+    if (Data.isNull(value)) {
+      // This is a JSON literal null.  When represented as a dictionary, this is
+      // an empty map.
+      return Collections.<String, Object>emptyMap();
+    }
+    if (!(value instanceof Map)) {
+      throw new IncorrectTypeException(name, map, "a dictionary");
+    }
+    @SuppressWarnings("unchecked")
+    Map<String, Object> result = (Map<String, Object>) value;
+    return result;
+  }
+
+  @Nullable
+  public static Map<String, Object> getDictionary(
+      Map<String, Object> map, String name, @Nullable Map<String, Object> defaultValue)
+      throws Exception {
+    @Nullable Object value = map.get(name);
+    if (value == null) {
+      if (map.containsKey(name)) {
+        throw new IncorrectTypeException(name, map, "a dictionary");
+      }
+      return defaultValue;
+    }
+    if (Data.isNull(value)) {
+      // This is a JSON literal null.  When represented as a dictionary, this is
+      // an empty map.
+      return Collections.<String, Object>emptyMap();
+    }
+    if (!(value instanceof Map)) {
+      throw new IncorrectTypeException(name, map, "a dictionary");
+    }
+    @SuppressWarnings("unchecked")
+    Map<String, Object> result = (Map<String, Object>) value;
+    return result;
+  }
+
+  // Builder operations.
+
+  public static void addString(Map<String, Object> map, String name, String value) {
+    addObject(map, name, CloudObject.forString(value));
+  }
+
+  public static void addBoolean(Map<String, Object> map, String name, boolean value) {
+    addObject(map, name, CloudObject.forBoolean(value));
+  }
+
+  public static void addLong(Map<String, Object> map, String name, long value) {
+    addObject(map, name, CloudObject.forInteger(value));
+  }
+
+  public static void addObject(
+      Map<String, Object> map, String name, Map<String, Object> value) {
+    map.put(name, value);
+  }
+
+  public static void addNull(Map<String, Object> map, String name) {
+    map.put(name, Data.nullOf(Object.class));
+  }
+
+  public static void addLongs(Map<String, Object> map, String name, long... longs) {
+    List<Map<String, Object>> elements = new ArrayList<>(longs.length);
+    for (Long value : longs) {
+      elements.add(CloudObject.forInteger(value));
+    }
+    map.put(name, elements);
+  }
+
+  public static void addList(
+      Map<String, Object> map, String name, List<? extends Map<String, Object>> elements) {
+    map.put(name, elements);
+  }
+
+  public static void addStringList(Map<String, Object> map, String name, List<String> elements) {
+    ArrayList<CloudObject> objects = new ArrayList<>(elements.size());
+    for (String element : elements) {
+      objects.add(CloudObject.forString(element));
+    }
+    addList(map, name, objects);
+  }
+
+  public static <T extends Map<String, Object>> void addList(
+      Map<String, Object> map, String name, T[] elements) {
+    map.put(name, Arrays.asList(elements));
+  }
+
+  public static void addDictionary(
+      Map<String, Object> map, String name, Map<String, Object> value) {
+    map.put(name, value);
+  }
+
+  public static void addDouble(Map<String, Object> map, String name, Double value) {
+    addObject(map, name, CloudObject.forFloat(value));
+  }
+
+  // Helper methods for a few of the accessor methods.
+
+  private static <T> T getValue(Map<String, Object> map, String name, Class<T> clazz, String type)
+      throws Exception {
+    @Nullable T result = getValue(map, name, clazz, type, null);
+    if (result == null) {
+      throw new ParameterNotFoundException(name, map);
+    }
+    return result;
+  }
+
+  @Nullable
+  private static <T> T getValue(
+      Map<String, Object> map, String name, Class<T> clazz, String type, @Nullable T defaultValue)
+      throws Exception {
+    @Nullable Object value = map.get(name);
+    if (value == null) {
+      if (map.containsKey(name)) {
+        throw new IncorrectTypeException(name, map, type);
+      }
+      return defaultValue;
+    }
+    T result = decodeValue(value, clazz);
+    if (result == null) {
+      // The value exists, but can't be decoded.
+      throw new IncorrectTypeException(name, map, type);
+    }
+    return result;
+  }
+
+  @Nullable
+  private static <T> T decodeValue(Object value, Class<T> clazz) {
+    try {
+      if (value.getClass() == clazz) {
+        // decodeValue() is only called for final classes; if the class matches,
+        // it's safe to just return the value, and if it doesn't match, decoding
+        // is needed.
+        return clazz.cast(value);
+      }
+      if (!(value instanceof Map)) {
+        return null;
+      }
+      @SuppressWarnings("unchecked")
+      Map<String, Object> map = (Map<String, Object>) value;
+      @Nullable String typeName = (String) map.get(PropertyNames.OBJECT_TYPE_NAME);
+      if (typeName == null) {
+        return null;
+      }
+      @Nullable CloudKnownType knownType = CloudKnownType.forUri(typeName);
+      if (knownType == null) {
+        return null;
+      }
+      @Nullable Object scalar = map.get(PropertyNames.SCALAR_FIELD_NAME);
+      if (scalar == null) {
+        return null;
+      }
+      return knownType.parse(scalar, clazz);
+    } catch (ClassCastException e) {
+      // If any class cast fails during decoding, the value's not decodable.
+      return null;
+    }
+  }
+
+  private static final class ParameterNotFoundException extends Exception {
+    private static final long serialVersionUID = 0;
+
+    public ParameterNotFoundException(String name, Map<String, Object> map) {
+      super("didn't find required parameter " + name + " in " + map);
+    }
+  }
+
+  private static final class IncorrectTypeException extends Exception {
+    private static final long serialVersionUID = 0;
+
+    public IncorrectTypeException(String name, Map<String, Object> map, String type) {
+      super("required parameter " + name + " in " + map + " not " + type);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TestCredential.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TestCredential.java
new file mode 100644
index 0000000000000..fa02a6bf3185b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TestCredential.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.auth.oauth2.BearerToken;
+import com.google.api.client.auth.oauth2.Credential;
+import com.google.api.client.auth.oauth2.TokenResponse;
+
+import java.io.IOException;
+
+/**
+ * Fake credential, for use in testing.
+ */
+public class TestCredential extends Credential {
+
+  private final String token;
+
+  public TestCredential() {
+    this("NULL");
+  }
+
+  public TestCredential(String token) {
+    super(new Builder(
+        BearerToken.authorizationHeaderAccessMethod()));
+    this.token = token;
+  }
+
+  @Override
+  protected TokenResponse executeRefreshToken() throws IOException {
+    TokenResponse response = new TokenResponse();
+    response.setExpiresInSeconds(5L * 60);
+    response.setAccessToken(token);
+    return response;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeUtil.java
new file mode 100644
index 0000000000000..48324818ca63b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeUtil.java
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import org.joda.time.DateTime;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.joda.time.ReadableDuration;
+import org.joda.time.ReadableInstant;
+import org.joda.time.chrono.ISOChronology;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.annotation.Nullable;
+
+/**
+ * A helper class for converting between Dataflow API and SDK time
+ * representations.
+ * <p>
+ * Dataflow API times are strings of the form
+ * {@code YYYY-MM-dd'T'HH:mm:ss[.nnnn]'Z'}: that is, RFC 3339
+ * strings with optional fractional seconds and a 'Z' offset.
+ * <p>
+ * Dataflow API durations are strings of the form {@code ['-']sssss[.nnnn]'s'}:
+ * that is, seconds with optional fractional seconds and a literal 's' at the end.
+ * <p>
+ * In both formats, fractional seconds are either three digits (millisecond
+ * resolution), six digits (microsecond resolution), or nine digits (nanosecond
+ * resolution).
+ */
+public final class TimeUtil {
+  private TimeUtil() {}  // Non-instantiable.
+
+  private static final Pattern DURATION_PATTERN = Pattern.compile("(\\d+)(?:\\.(\\d+))?s");
+  private static final Pattern TIME_PATTERN =
+      Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2})T(\\d{2}):(\\d{2}):(\\d{2})(?:\\.(\\d+))?Z");
+
+  /**
+   * Converts a {@link ReadableInstant} into a Dateflow API time value.
+   */
+  public static String toCloudTime(ReadableInstant instant) {
+    // Note that since Joda objects use millisecond resolution, we always
+    // produce either no fractional seconds or fractional seconds with
+    // millisecond resolution.
+
+    // Translate the ReadableInstant to a DateTime with ISOChronology.
+    DateTime time = new DateTime(instant);
+
+    int millis = time.getMillisOfSecond();
+    if (millis == 0) {
+      return String.format("%04d-%02d-%02dT%02d:%02d:%02dZ",
+          time.getYear(),
+          time.getMonthOfYear(),
+          time.getDayOfMonth(),
+          time.getHourOfDay(),
+          time.getMinuteOfHour(),
+          time.getSecondOfMinute());
+    } else {
+      return String.format("%04d-%02d-%02dT%02d:%02d:%02d.%03dZ",
+          time.getYear(),
+          time.getMonthOfYear(),
+          time.getDayOfMonth(),
+          time.getHourOfDay(),
+          time.getMinuteOfHour(),
+          time.getSecondOfMinute(),
+          millis);
+    }
+  }
+
+  /**
+   * Converts a time value received via the Dataflow API into the corresponding
+   * {@link Instant}.
+   * @return the parsed time, or null if a parse error occurs
+   */
+  @Nullable
+  public static Instant fromCloudTime(String time) {
+    Matcher matcher = TIME_PATTERN.matcher(time);
+    if (!matcher.matches()) {
+      return null;
+    }
+    int year = Integer.valueOf(matcher.group(1));
+    int month = Integer.valueOf(matcher.group(2));
+    int day = Integer.valueOf(matcher.group(3));
+    int hour = Integer.valueOf(matcher.group(4));
+    int minute = Integer.valueOf(matcher.group(5));
+    int second = Integer.valueOf(matcher.group(6));
+    int millis = 0;
+
+    String frac = matcher.group(7);
+    if (frac != null) {
+      int fracs = Integer.valueOf(frac);
+      if (frac.length() == 3) {  // millisecond resolution
+        millis = fracs;
+      } else if (frac.length() == 6) {  // microsecond resolution
+        millis = fracs / 1000;
+      } else if (frac.length() == 9) {  // nanosecond resolution
+        millis = fracs / 1000000;
+      } else {
+        return null;
+      }
+    }
+
+    return new DateTime(year, month, day, hour, minute, second, millis,
+        ISOChronology.getInstanceUTC()).toInstant();
+  }
+
+  /**
+   * Converts a {@link ReadableDuration} into a Dataflow API duration string.
+   */
+  public static String toCloudDuration(ReadableDuration duration) {
+    // Note that since Joda objects use millisecond resolution, we always
+    // produce either no fractional seconds or fractional seconds with
+    // millisecond resolution.
+    long millis = duration.getMillis();
+    long seconds = millis / 1000;
+    millis = millis % 1000;
+    if (millis == 0) {
+      return String.format("%ds", seconds);
+    } else {
+      return String.format("%d.%03ds", seconds, millis);
+    }
+  }
+
+  /**
+   * Converts a Dataflow API duration string into a {@link Duration}.
+   * @return the parsed duration, or null if a parse error occurs
+   */
+  @Nullable
+  public static Duration fromCloudDuration(String duration) {
+    Matcher matcher = DURATION_PATTERN.matcher(duration);
+    if (!matcher.matches()) {
+      return null;
+    }
+    long millis = Long.valueOf(matcher.group(1)) * 1000;
+    String frac = matcher.group(2);
+    if (frac != null) {
+      long fracs = Long.valueOf(frac);
+      if (frac.length() == 3) {  // millisecond resolution
+        millis += fracs;
+      } else if (frac.length() == 6) {  // microsecond resolution
+        millis += fracs / 1000;
+      } else if (frac.length() == 9) {  // nanosecond resolution
+        millis += fracs / 1000000;
+      } else {
+        return null;
+      }
+    }
+    return Duration.millis(millis);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
new file mode 100644
index 0000000000000..4859f8ae5f39b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
@@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import org.joda.time.Instant;
+
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Class representing either a timer, or arbitrary element.
+ * Used as the input type of {@link StreamingGroupAlsoByWindowsDoFn}.
+ *
+ * @param <E> the element type
+ */
+public class TimerOrElement<E> {
+
+  /**
+   * Creates a new {@code TimerOrElement<E>} representing a timer.
+   *
+   * @param <E> the element type
+   */
+  public static <E> TimerOrElement<E> timer(
+      String tag, Instant timestamp, Object key) {
+    return new TimerOrElement<>(tag, timestamp, key);
+  }
+
+  /**
+   * Creates a new {@code TimerOrElement<E>} representing an element.
+   *
+   * @param <E> the element type
+   */
+  public static <E> TimerOrElement<E> element(E element) {
+    return new TimerOrElement<>(element);
+  }
+
+  /**
+   * Returns whether this is a timer or an element.
+   */
+  public boolean isTimer() {
+    return isTimer;
+  }
+
+  /**
+   * If this is a timer, returns its tag, otherwise throws an exception.
+   */
+  public String tag() {
+    if (!isTimer) {
+      throw new IllegalStateException("tag() called, but this is an element");
+    }
+    return tag;
+  }
+
+  /**
+   * If this is a timer, returns its timestamp, otherwise throws an exception.
+   */
+  public Instant timestamp() {
+    if (!isTimer) {
+      throw new IllegalStateException("timestamp() called, but this is an element");
+    }
+    return timestamp;
+  }
+
+  /**
+   * If this is a timer, returns its key, otherwise throws an exception.
+   */
+  public Object key() {
+    if (!isTimer) {
+      throw new IllegalStateException("key() called, but this is an element");
+    }
+    return key;
+  }
+
+  /**
+   * If this is an element, returns it, otherwise throws an exception.
+   */
+  public E element() {
+    if (isTimer) {
+      throw new IllegalStateException("element() called, but this is a timer");
+    }
+    return element;
+  }
+
+  /**
+   * Coder that forwards {@code ByteSizeObserver} calls to an underlying element coder.
+   * {@code TimerOrElement} objects never need to be encoded, so this class does not
+   * support the {@code encode} and {@code decode} methods.
+   */
+  public static class TimerOrElementCoder<T> extends StandardCoder<TimerOrElement<T>> {
+    final Coder<T> elemCoder;
+
+    /**
+     * Creates a new {@code TimerOrElement.Coder} that wraps the given {@link Coder}.
+     */
+    public static <T> TimerOrElementCoder<T> of(Coder<T> elemCoder) {
+      return new TimerOrElementCoder<>(elemCoder);
+    }
+
+    @JsonCreator
+    public static TimerOrElementCoder<?> of(
+            @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+            List<Object> components) {
+      return of((Coder<?>) components.get(0));
+    }
+
+    @Override
+    public void encode(TimerOrElement<T> value, OutputStream outStream, Context context) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public TimerOrElement<T> decode(InputStream inStream, Context context) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean isRegisterByteSizeObserverCheap(TimerOrElement<T> value, Context context) {
+      if (value.isTimer()) {
+        return true;
+      } else {
+        return elemCoder.isRegisterByteSizeObserverCheap(value.element(), context);
+      }
+    }
+
+    @Override
+    public void registerByteSizeObserver(
+        TimerOrElement<T> value, ElementByteSizeObserver observer, Context context)
+        throws Exception{
+      if (!value.isTimer()) {
+        elemCoder.registerByteSizeObserver(value.element(), observer, context);
+      }
+    }
+
+    @Override
+    public boolean isDeterministic() {
+      return elemCoder.isDeterministic();
+    }
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return Arrays.asList(elemCoder);
+    }
+
+    public Coder<T> getElementCoder() {
+      return elemCoder;
+    }
+
+    private TimerOrElementCoder(Coder<T> elemCoder) {
+      this.elemCoder = elemCoder;
+    }
+  }
+
+  //////////////////////////////////////////////////////////////////////////////
+
+  private boolean isTimer;
+  private String tag;
+  private Instant timestamp;
+  private Object key;
+  private E element;
+
+  TimerOrElement(String tag, Instant timestamp, Object key) {
+    this.isTimer = true;
+    this.tag = tag;
+    this.timestamp = timestamp;
+    this.key = key;
+  }
+
+  TimerOrElement(E element) {
+    this.isTimer = false;
+    this.element = element;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
new file mode 100644
index 0000000000000..e27f7fcc4f885
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
@@ -0,0 +1,141 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
+import com.google.api.client.http.HttpTransport;
+import com.google.api.client.json.JsonFactory;
+import com.google.api.client.json.jackson2.JacksonFactory;
+import com.google.api.services.bigquery.Bigquery;
+import com.google.api.services.dataflow.Dataflow;
+import com.google.api.services.pubsub.Pubsub;
+import com.google.api.services.storage.Storage;
+import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.GcsOptions;
+import com.google.cloud.dataflow.sdk.options.StreamingOptions;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.security.GeneralSecurityException;
+
+/**
+ * Helpers for cloud communication.
+ */
+public class Transport {
+
+  private static class SingletonHelper {
+    /** Global instance of the JSON factory. */
+    private static final JsonFactory JSON_FACTORY;
+
+    /** Global instance of the HTTP transport. */
+    private static final HttpTransport HTTP_TRANSPORT;
+
+    static {
+      try {
+        JSON_FACTORY = JacksonFactory.getDefaultInstance();
+        HTTP_TRANSPORT = GoogleNetHttpTransport.newTrustedTransport();
+      } catch (GeneralSecurityException | IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+  }
+
+  public static HttpTransport getTransport() {
+    return SingletonHelper.HTTP_TRANSPORT;
+  }
+
+  public static JsonFactory getJsonFactory() {
+    return SingletonHelper.JSON_FACTORY;
+  }
+
+  /**
+   * Returns a BigQuery client builder.
+   * <p>
+   * Note: this client's endpoint is <b>not</b> modified by the
+   * {@link DataflowPipelineDebugOptions#getApiRootUrl()} option.
+   */
+  public static Bigquery.Builder
+      newBigQueryClient(BigQueryOptions options) {
+    return new Bigquery.Builder(getTransport(), getJsonFactory(),
+        new RetryHttpRequestInitializer(options.getGcpCredential()))
+        .setApplicationName(options.getAppName());
+  }
+
+/**
+   * Returns a Pubsub client builder.
+   * <p>
+   * Note: this client's endpoint is <b>not</b> modified by the
+   * {@link DataflowPipelineDebugOptions#getApiRootUrl()} option.
+   */
+  public static Pubsub.Builder
+      newPubsubClient(StreamingOptions options) {
+    return new Pubsub.Builder(getTransport(), getJsonFactory(),
+        new RetryHttpRequestInitializer(options.getGcpCredential()))
+        .setApplicationName(options.getAppName());
+  }
+
+  /**
+   * Returns a Google Cloud Dataflow client builder.
+   */
+  public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options) {
+    String rootUrl = options.getApiRootUrl();
+    String servicePath = options.getDataflowEndpoint();
+    if (servicePath.contains("://")) {
+      try {
+        URL url = new URL(servicePath);
+        rootUrl = url.getProtocol() + "://" + url.getHost() +
+            (url.getPort() > 0 ? ":" + url.getPort() : "");
+        servicePath = url.getPath();
+      } catch (MalformedURLException e) {
+        throw new RuntimeException("Invalid URL: " + servicePath);
+      }
+    }
+
+    return new Dataflow.Builder(getTransport(),
+        getJsonFactory(),
+        new RetryHttpRequestInitializer(options.getGcpCredential()))
+        .setApplicationName(options.getAppName())
+        .setRootUrl(rootUrl)
+        .setServicePath(servicePath);
+  }
+
+  /**
+   * Returns a Dataflow client which does not automatically retry failed
+   * requests.
+   */
+  public static Dataflow.Builder
+      newRawDataflowClient(DataflowPipelineOptions options) {
+    return newDataflowClient(options)
+        .setHttpRequestInitializer(options.getGcpCredential());
+  }
+
+  /**
+   * Returns a Cloud Storage client builder.
+   * <p>
+   * Note: this client's endpoint is <b>not</b> modified by the
+   * {@link DataflowPipelineDebugOptions#getApiRootUrl()} option.
+   */
+  public static Storage.Builder
+      newStorageClient(GcsOptions options) {
+    return new Storage.Builder(getTransport(), getJsonFactory(),
+        new RetryHttpRequestInitializer(options.getGcpCredential()))
+        .setApplicationName(options.getAppName());
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java
new file mode 100644
index 0000000000000..a0bfed1626f92
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Arrays;
+import java.util.Objects;
+
+/**
+ * An exception that was thrown in user-code. Sets the stack trace
+ * from the first time execution enters user code down through the
+ * rest of the user's stack frames until the exception is
+ * reached.
+ */
+public class UserCodeException extends RuntimeException {
+  private static final Logger LOG = LoggerFactory.getLogger(UserCodeException.class);
+
+  public UserCodeException(Throwable t) {
+    super(t);
+
+    StackTraceElement[] currentFrames =
+        Thread.currentThread().getStackTrace();
+
+    // We're interested in getting the third stack frame here, since
+    // the exception stack trace includes the getStackTrace frame from
+    // Thread and the frame from where the UserCodeException is
+    // actually thrown. If there aren't more than two frames,
+    // something is odd about where the exception was thrown, so leave
+    // the stack trace alone and allow it to propagate.
+    //
+    // For example, if an exception in user code has a stack trace like this:
+    //
+    // java.lang.NullPointerException
+    // at com.google.cloud.dataflow.sdk.examples.
+    //     SimpleWordCount$ExtractWordsFn.dieHere(SimpleWordCount.java:23)
+    // at com.google.cloud.dataflow.sdk.examples.
+    //     SimpleWordCount$ExtractWordsFn.
+    //         processElement(SimpleWordCount.java:27)
+    // at com.google.cloud.dataflow.sdk.
+    //     DoFnRunner.processElement(DoFnRunner.java:95)       <-- caught here
+    // at com.google.cloud.dataflow.sdk.
+    //     worker.NormalParDoFn.processElement(NormalParDoFn.java:119)
+    // at com.google.cloud.dataflow.sdk.
+    //     worker.executor.ParDoOperation.process(ParDoOperation.java:65)
+    // at com.google.cloud.dataflow.sdk.
+    //     worker.executor.ReadOperation.start(ReadOperation.java:65)
+    // at com.google.cloud.dataflow.sdk.
+    //     worker.executor.MapTaskExecutor.execute(MapTaskExecutor.java:79)
+    // at com.google.cloud.dataflow.sdk.
+    //     worker.DataflowWorkerHarness.main(DataflowWorkerHarness.java:95)
+    //
+    // It would be truncated to:
+    //
+    // java.lang.NullPointerException
+    // at com.google.cloud.dataflow.sdk.examples.
+    //     SimpleWordCount$ExtractWordsFn.dieHere(SimpleWordCount.java:23)
+    // at com.google.cloud.dataflow.sdk.examples.
+    //     SimpleWordCount$ExtractWordsFn.
+    //         processElement(SimpleWordCount.java:27)
+    //
+    // However, we need to get the third stack frame from the
+    // getStackTrace, since after catching the error in DoFnRunner,
+    // the trace is two frames deeper by the time we get it:
+    //
+    // [0] java.lang.Thread.getStackTrace(Thread.java:1568)
+    // [1] com.google.cloud.dataflow.sdk.
+    //         UserCodeException.<init>(UserCodeException.java:16)
+    // [2] com.google.cloud.dataflow.sdk.
+    //         DoFnRunner.processElement(DoFnRunner.java:95)  <-- common frame
+    //
+    // We then proceed to truncate the original exception at the
+    // common frame, setting the UserCodeException's cause to the
+    // truncated stack trace.
+
+    // Check to make sure the stack is > 2 deep.
+    if (currentFrames.length <= 2) {
+      LOG.error("Expecting stack trace to be > 2 frames long.");
+      return;
+    }
+
+    // Perform some checks to make sure javac doesn't change from below us.
+    if (!Objects.equals(currentFrames[1].getClassName(), getClass().getName())) {
+      LOG.error("Expected second frame coming from Thread.currentThread.getStackTrace() "
+          + "to be {}, was: {}", getClass().getName(), currentFrames[1].getClassName());
+      return;
+    }
+    if (Objects.equals(currentFrames[2].getClassName(), currentFrames[1].getClassName())) {
+      LOG.error("Javac's Thread.CurrentThread.getStackTrace() changed unexpectedly.");
+      return;
+    }
+
+    // Now that all checks have passed, select the common frame.
+    StackTraceElement callingFrame = currentFrames[2];
+    // Truncate the user-level stack trace below where the
+    // UserCodeException was thrown.
+    truncateStackTrace(callingFrame, t);
+  }
+
+  /**
+   * Truncates this Throwable's stack frame at the given frame,
+   * removing all frames below.
+   */
+  private void truncateStackTrace(
+      StackTraceElement currentFrame, Throwable t) {
+    int index = 0;
+    StackTraceElement[] stackTrace = t.getStackTrace();
+    for (StackTraceElement element : stackTrace) {
+      if (Objects.equals(element.getClassName(), currentFrame.getClassName()) &&
+          Objects.equals(element.getMethodName(), currentFrame.getMethodName())) {
+        t.setStackTrace(Arrays.copyOfRange(stackTrace, 0, index));
+        break;
+      }
+      index++;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Values.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Values.java
new file mode 100644
index 0000000000000..f5ce4540d931a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Values.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import java.util.Map;
+
+import javax.annotation.Nullable;
+
+/**
+ * A collection of static methods for manipulating value representations
+ * transfered via the Dataflow API.
+ */
+public final class Values {
+  private Values() {}  // Non-instantiable
+
+  public static Boolean asBoolean(Object value) throws ClassCastException {
+    @Nullable Boolean knownResult = checkKnownValue(CloudKnownType.BOOLEAN, value, Boolean.class);
+    if (knownResult != null) {
+      return knownResult;
+    }
+    return Boolean.class.cast(value);
+  }
+
+  public static Double asDouble(Object value) throws ClassCastException {
+    @Nullable Double knownResult = checkKnownValue(CloudKnownType.FLOAT, value, Double.class);
+    if (knownResult != null) {
+      return knownResult;
+    }
+    if (value instanceof Double) {
+      return (Double) value;
+    }
+    return ((Float) value).doubleValue();
+  }
+
+  public static Long asLong(Object value) throws ClassCastException {
+    @Nullable Long knownResult = checkKnownValue(CloudKnownType.INTEGER, value, Long.class);
+    if (knownResult != null) {
+      return knownResult;
+    }
+    if (value instanceof Long) {
+      return (Long) value;
+    }
+    return ((Integer) value).longValue();
+  }
+
+  public static String asString(Object value) throws ClassCastException {
+    @Nullable String knownResult = checkKnownValue(CloudKnownType.TEXT, value, String.class);
+    if (knownResult != null) {
+      return knownResult;
+    }
+    return String.class.cast(value);
+  }
+
+  @Nullable
+  private static <T> T checkKnownValue(CloudKnownType type, Object value, Class<T> clazz) {
+    if (!(value instanceof Map)) {
+      return null;
+    }
+    Map<String, Object> map = (Map<String, Object>) value;
+    @Nullable String typeName = (String) map.get(PropertyNames.OBJECT_TYPE_NAME);
+    if (typeName == null) {
+      return null;
+    }
+    @Nullable CloudKnownType knownType = CloudKnownType.forUri(typeName);
+    if (knownType == null || knownType != type) {
+      return null;
+    }
+    @Nullable Object scalar = map.get(PropertyNames.SCALAR_FIELD_NAME);
+    if (scalar == null) {
+      return null;
+    }
+    return knownType.parse(scalar, clazz);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java
new file mode 100644
index 0000000000000..a7399473d4b4a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+/**
+ * Variable-length encoding for integers.
+ *
+ * Handles, in a common encoding format, signed bytes, shorts, ints, and longs.
+ * Takes between 1 and 10 bytes.
+ * Less efficient than BigEndian{Int,Long} coder for negative or large numbers.
+ * All negative ints are encoded using 5 bytes, longs take 10 bytes.
+ */
+public class VarInt {
+
+  private static long convertIntToLongNoSignExtend(int v) {
+    return ((long) v) & 0xFFFFFFFFL;
+  }
+
+  /**
+   * Encodes the given value onto the stream.
+   */
+  public static void encode(int v, OutputStream stream) throws IOException {
+    encode(convertIntToLongNoSignExtend(v), stream);
+  }
+
+  /**
+   * Encodes the given value onto the stream.
+   */
+  public static void encode(long v, OutputStream stream) throws IOException {
+    do {
+      // Encode next 7 bits + terminator bit
+      long bits = v & 0x7F;
+      v >>>= 7;
+      byte b = (byte) (bits | ((v != 0) ? 0x80 : 0));
+      stream.write(b);
+    } while (v != 0);
+  }
+
+  /**
+   * Decodes an integer value from the given stream.
+   */
+  public static int decodeInt(InputStream stream) throws IOException {
+    long r = decodeLong(stream);
+    if (r < 0 || r >= 1L << 32) {
+      throw new IOException("varint overflow " + r);
+    }
+    return (int) r;
+  }
+
+  /**
+   * Decodes a long value from the given stream.
+   */
+  public static long decodeLong(InputStream stream) throws IOException {
+    long result = 0;
+    int shift = 0;
+    int b;
+    do {
+      // Get 7 bits from next byte
+      b = stream.read();
+      if (b < 0) {
+        if (shift == 0) {
+          throw new EOFException();
+        } else {
+          throw new IOException("varint not terminated");
+        }
+      }
+      long bits = b & 0x7F;
+      if (shift >= 64 || (shift == 63 && bits > 1)) {
+        // Out of range
+        throw new IOException("varint too long");
+      }
+      result |= bits << shift;
+      shift += 7;
+    } while ((b & 0x80) != 0);
+    return result;
+  }
+
+  /**
+   * Returns the length of the encoding of the given value (in bytes).
+   */
+  public static int getLength(int v) {
+    return getLength(convertIntToLongNoSignExtend(v));
+  }
+
+  /**
+   * Returns the length of the encoding of the given value (in bytes).
+   */
+  public static int getLength(long v) {
+    int result = 0;
+    do {
+      result++;
+      v >>>= 7;
+    } while (v != 0);
+    return result;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowUtils.java
new file mode 100644
index 0000000000000..de0a8f24ba645
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowUtils.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.util.Base64;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+/**
+ * Utility functions related to serializing windows.
+ */
+class WindowUtils {
+  private static final String BUFFER_TAG_PREFIX = "buffer:";
+
+  /**
+   * Converts the given window to a base64-encoded String using the given coder.
+   */
+  public static <W> String windowToString(W window, Coder<W> coder) throws IOException {
+    ByteArrayOutputStream stream = new ByteArrayOutputStream();
+    coder.encode(window, stream, Coder.Context.OUTER);
+    byte[] rawWindow = stream.toByteArray();
+    return Base64.encodeBase64String(rawWindow);
+  }
+
+  /**
+   * Parses a window from a base64-encoded String using the given coder.
+   */
+  public static <W> W windowFromString(String serializedWindow, Coder<W> coder) throws IOException {
+    return coder.decode(
+        new ByteArrayInputStream(Base64.decodeBase64(serializedWindow)),
+        Coder.Context.OUTER);
+  }
+
+  /**
+   * Returns a tag for storing buffered data in per-key state.
+   */
+  public static <W extends BoundedWindow, V> CodedTupleTag<V> bufferTag(
+      W window, Coder<W> windowCoder, Coder<V> elemCoder)
+      throws IOException {
+    return CodedTupleTag.of(
+        BUFFER_TAG_PREFIX + windowToString(window, windowCoder), elemCoder);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
new file mode 100644
index 0000000000000..de310b8271149
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -0,0 +1,368 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.CollectionCoder;
+import com.google.cloud.dataflow.sdk.coders.InstantCoder;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * An immutable triple of value, timestamp, and windows.
+ *
+ * @param <V> the type of the value
+ */
+public class WindowedValue<V> {
+
+  private final V value;
+  private final Instant timestamp;
+  private final Collection<? extends BoundedWindow> windows;
+
+  /**
+   * Returns a {@code WindowedValue} with the given value, timestamp, and windows.
+   */
+  public static <V> WindowedValue<V> of(
+      V value,
+      Instant timestamp,
+      Collection<? extends BoundedWindow> windows) {
+    return new WindowedValue<>(value, timestamp, windows);
+  }
+
+  /**
+   * Returns a {@code WindowedValue} with the given value, default timestamp,
+   * and {@code GlobalWindow}.
+   */
+  public static <V> WindowedValue<V> valueInGlobalWindow(V value) {
+    return new WindowedValue<>(value,
+                               new Instant(Long.MIN_VALUE),
+                               Arrays.asList(GlobalWindow.Window.INSTANCE));
+  }
+
+  /**
+   * Returns a {@code WindowedValue} with the given value and default timestamp and empty windows.
+   */
+  public static <V> WindowedValue<V> valueInEmptyWindows(V value) {
+    return new WindowedValue<>(value,
+                               new Instant(Long.MIN_VALUE),
+                               new ArrayList());
+  }
+
+  private WindowedValue(V value,
+                        Instant timestamp,
+                        Collection<? extends BoundedWindow> windows) {
+    this.value = value;
+    this.timestamp = timestamp;
+    this.windows = windows;
+  }
+
+  /**
+   * Returns a new {@code WindowedValue} that is a copy of this one, but with a different value.
+   */
+  public <V> WindowedValue<V> withValue(V value) {
+    return new WindowedValue<>(value, this.timestamp, this.windows);
+  }
+
+  /**
+   * Returns the value of this {@code WindowedValue}.
+   */
+  public V getValue() {
+    return value;
+  }
+
+  /**
+   * Returns the timestamp of this {@code WindowedValue}.
+   */
+  public Instant getTimestamp() {
+    return timestamp;
+  }
+
+  /**
+   * Returns the windows of this {@code WindowedValue}.
+   */
+  public Collection<? extends BoundedWindow> getWindows() {
+    return windows;
+  }
+
+  /**
+   * Returns the {@code Coder} to use for a {@code WindowedValue<T>},
+   * using the given valueCoder and windowCoder.
+   */
+  public static <T> WindowedValueCoder<T> getFullCoder(
+      Coder<T> valueCoder,
+      Coder<? extends BoundedWindow> windowCoder) {
+    return FullWindowedValueCoder.of(valueCoder, windowCoder);
+  }
+
+  /**
+   * Returns the {@code ValueOnlyCoder} from the given valueCoder.
+   */
+  public static <T> WindowedValueCoder<T> getValueOnlyCoder(Coder<T> valueCoder) {
+    return ValueOnlyWindowedValueCoder.of(valueCoder);
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (o instanceof WindowedValue) {
+      WindowedValue that = (WindowedValue) o;
+      if (that.timestamp.isEqual(timestamp) && that.windows.size() == windows.size()) {
+        for (Iterator thatIterator = that.windows.iterator(), thisIterator = windows.iterator();
+            thatIterator.hasNext() && thisIterator.hasNext();
+            /* do nothng */) {
+          if (!thatIterator.next().equals(thisIterator.next())) {
+            return false;
+          }
+        }
+        return true;
+      }
+    }
+    return false;
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(value, timestamp, Arrays.hashCode(windows.toArray()));
+  }
+
+  @Override
+  public String toString() {
+    return "[WindowedValue: " + value + ", timestamp: " + timestamp.getMillis()
+        + ", windows: " + windows + "]";
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Abstract class for {@code WindowedValue} coder.
+   */
+  public abstract static class WindowedValueCoder<T>
+      extends StandardCoder<WindowedValue<T>> {
+    final Coder<T> valueCoder;
+
+    WindowedValueCoder(Coder<T> valueCoder) {
+      this.valueCoder = checkNotNull(valueCoder);
+    }
+
+    /**
+     * Returns the value coder.
+     */
+    public Coder<T> getValueCoder() {
+      return valueCoder;
+    }
+
+    /**
+     * Returns a new {@code WindowedValueCoder} that is a copy of this one,
+     * but with a different value coder.
+     */
+    public abstract <V> WindowedValueCoder<V> withValueCoder(Coder<V> valueCoder);
+  }
+
+  /**
+   * Coder for {@code WindowedValue}.
+   */
+  public static class FullWindowedValueCoder<T> extends WindowedValueCoder<T> {
+    private final Coder<? extends BoundedWindow> windowCoder;
+    // Precompute and cache the coder for a list of windows.
+    private final Coder<Collection<? extends BoundedWindow>> windowsCoder;
+
+    public static <T> FullWindowedValueCoder<T> of(
+        Coder<T> valueCoder,
+        Coder<? extends BoundedWindow> windowCoder) {
+      return new FullWindowedValueCoder<>(valueCoder, windowCoder);
+    }
+
+    @JsonCreator
+    public static FullWindowedValueCoder<?> of(
+        @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+        List<Coder<?>> components) {
+      checkArgument(components.size() == 2,
+                    "Expecting 2 components, got " + components.size());
+      return of(components.get(0),
+                (Coder<? extends BoundedWindow>) components.get(1));
+    }
+
+    @SuppressWarnings("unchecked")
+    FullWindowedValueCoder(Coder<T> valueCoder,
+                           Coder<? extends BoundedWindow> windowCoder) {
+      super(valueCoder);
+      this.windowCoder = checkNotNull(windowCoder);
+      // It's not possible to statically type-check correct use of the
+      // windowCoder (we have to ensure externally that we only get
+      // windows of the class handled by windowCoder), so type
+      // windowsCoder in a way that makes encode() and decode() work
+      // right, and cast the window type away here.
+      this.windowsCoder = (Coder) CollectionCoder.of(this.windowCoder);
+    }
+
+    public Coder<? extends BoundedWindow> getWindowCoder() {
+      return windowCoder;
+    }
+
+    public Coder<Collection<? extends BoundedWindow>> getWindowsCoder() {
+      return windowsCoder;
+    }
+
+    @Override
+    public <V> WindowedValueCoder<V> withValueCoder(Coder<V> valueCoder) {
+      return new FullWindowedValueCoder<>(valueCoder, windowCoder);
+    }
+
+    @Override
+    public void encode(WindowedValue<T> windowedElem,
+                       OutputStream outStream,
+                       Context context)
+        throws CoderException, IOException {
+      Context nestedContext = context.nested();
+      valueCoder.encode(windowedElem.getValue(), outStream, nestedContext);
+      InstantCoder.of().encode(
+          windowedElem.getTimestamp(), outStream, nestedContext);
+      windowsCoder.encode(windowedElem.getWindows(), outStream, nestedContext);
+    }
+
+    @Override
+    public WindowedValue<T> decode(InputStream inStream, Context context)
+        throws CoderException, IOException {
+      Context nestedContext = context.nested();
+      T value = valueCoder.decode(inStream, nestedContext);
+      Instant timestamp = InstantCoder.of().decode(inStream, nestedContext);
+      Collection<? extends BoundedWindow> windows =
+          windowsCoder.decode(inStream, nestedContext);
+      return WindowedValue.of(value, timestamp, windows);
+    }
+
+    @Override
+    public boolean isDeterministic() {
+      return valueCoder.isDeterministic() && windowCoder.isDeterministic();
+    }
+
+    @Override
+    public void registerByteSizeObserver(WindowedValue<T> value,
+                                         ElementByteSizeObserver observer,
+                                         Context context) throws Exception {
+      valueCoder.registerByteSizeObserver(value.getValue(), observer, context);
+      InstantCoder.of().registerByteSizeObserver(value.getTimestamp(), observer, context);
+      windowsCoder.registerByteSizeObserver(value.getWindows(), observer, context);
+    }
+
+    @Override
+    public CloudObject asCloudObject() {
+      CloudObject result = super.asCloudObject();
+      addBoolean(result, PropertyNames.IS_WRAPPER, true);
+      return result;
+    }
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return null;
+    }
+
+    @Override
+    public List<? extends Coder<?>> getComponents() {
+      return Arrays.<Coder<?>>asList(valueCoder, windowCoder);
+    }
+  }
+
+  /**
+   * Coder for {@code WindowedValue}.
+   *
+   * <P>A {@code ValueOnlyWindowedValueCoder} only encodes and decodes the value. It drops
+   * timestamp and windows for encoding, and uses defaults timestamp, and windows for decoding.
+   */
+  public static class ValueOnlyWindowedValueCoder<T> extends WindowedValueCoder<T> {
+
+    public static <T> ValueOnlyWindowedValueCoder<T> of(
+        Coder<T> valueCoder) {
+      return new ValueOnlyWindowedValueCoder<>(valueCoder);
+    }
+
+    @JsonCreator
+    public static ValueOnlyWindowedValueCoder<?> of(
+        @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+        List<Coder<?>> components) {
+      checkArgument(components.size() == 1, "Expecting 1 component, got " + components.size());
+      return of(components.get(0));
+    }
+
+    ValueOnlyWindowedValueCoder(Coder<T> valueCoder) {
+      super(valueCoder);
+    }
+
+    @Override
+    public <V> WindowedValueCoder<V> withValueCoder(Coder<V> valueCoder) {
+      return new ValueOnlyWindowedValueCoder<>(valueCoder);
+    }
+
+    @Override
+    public void encode(WindowedValue<T> windowedElem, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+      valueCoder.encode(windowedElem.getValue(), outStream, context);
+    }
+
+    @Override
+    public WindowedValue<T> decode(InputStream inStream, Context context)
+        throws CoderException, IOException {
+      T value = valueCoder.decode(inStream, context);
+      return WindowedValue.valueInGlobalWindow(value);
+    }
+
+    @Override
+    public boolean isDeterministic() {
+      return valueCoder.isDeterministic();
+    }
+
+    @Override
+    public void registerByteSizeObserver(
+        WindowedValue<T> value, ElementByteSizeObserver observer, Context context)
+        throws Exception {
+      valueCoder.registerByteSizeObserver(value.getValue(), observer, context);
+    }
+
+    @Override
+    public CloudObject asCloudObject() {
+      CloudObject result = super.asCloudObject();
+      addBoolean(result, PropertyNames.IS_WRAPPER, true);
+      return result;
+    }
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return Arrays.<Coder<?>>asList(valueCoder);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
new file mode 100644
index 0000000000000..8b5f636ac5da3
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
@@ -0,0 +1,730 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common;
+
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.AND;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.OR;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SET;
+
+import com.google.common.reflect.TypeToken;
+
+import java.util.HashSet;
+import java.util.Objects;
+import java.util.Set;
+import java.util.logging.Logger;
+
+/**
+ * A Counter enables the aggregation of a stream of values over time.  The
+ * cumulative aggregate value is updated as new values are added, or it can be
+ * reset to a new value.  Multiple kinds of aggregation are supported depending
+ * on the type of the counter.
+ *
+ * <p>Counters compare using value equality of their name, kind, and
+ * cumulative value.  Equal counters should have equal toString()s.
+ *
+ * @param <T> the type of values aggregated by this counter
+ */
+public abstract class Counter<T> {
+  private static final Logger LOG = Logger.getLogger(Counter.class.getName());
+
+  /**
+   * Possible kinds of counter aggregation.
+   */
+  public static enum AggregationKind {
+
+    /**
+     * Computes the sum of all added values.
+     * Applicable to {@link Integer}, {@link Long}, and {@link Double} values.
+     */
+    SUM,
+
+    /**
+     * Computes the maximum value of all added values.
+     * Applicable to {@link Integer}, {@link Long}, and {@link Double} values.
+     */
+    MAX,
+
+    /**
+     * Computes the minimum value of all added values.
+     * Applicable to {@link Integer}, {@link Long}, and {@link Double} values.
+     */
+    MIN,
+
+    /**
+     * Computes the arithmetic mean of all added values.  Applicable to
+     * {@link Integer}, {@link Long}, and {@link Double} values.
+     */
+    MEAN,
+
+    /**
+     * Computes the set of all added values.  Applicable to {@link Integer},
+     * {@link Long}, {@link Double}, and {@link String} values.
+     */
+    SET,
+
+    /**
+     * Computes boolean AND over all added values.
+     * Applicable only to {@link Boolean} values.
+     */
+    AND,
+
+    /**
+     * Computes boolean OR over all added values. Applicable only to
+     * {@link Boolean} values.
+     */
+    OR
+    // TODO: consider adding VECTOR_SUM, HISTOGRAM, KV_SET, PRODUCT, TOP.
+  }
+
+  /**
+   * Constructs a new {@link Counter} that aggregates {@link Integer}, values
+   * according to the desired aggregation kind. The supported aggregation kinds
+   * are {@link AggregationKind#SUM}, {@link AggregationKind#MIN},
+   * {@link AggregationKind#MAX}, {@link AggregationKind#MEAN}, and
+   * {@link AggregationKind#SET}. This is a convenience wrapper over a
+   * {@link Counter} implementation that aggregates {@link Long} values. This is
+   * useful when the application handles (boxed) {@link Integer} values which
+   * are not readily convertible to the (boxed) {@link Long} values otherwise
+   * expected by the {@link Counter} implementation aggregating {@link Long}
+   * values.
+   *
+   * @param name the name of the new counter
+   * @param kind the new counter's aggregation kind
+   * @return the newly constructed Counter
+   * @throws IllegalArgumentException if the aggregation kind is not supported
+   */
+  public static Counter<Integer> ints(String name, AggregationKind kind) {
+    return new IntegerCounter(name, kind);
+  }
+
+  /**
+   * Constructs a new {@link Counter} that aggregates {@link Long} values
+   * according to the desired aggregation kind. The supported aggregation kinds
+   * are {@link AggregationKind#SUM}, {@link AggregationKind#MIN},
+   * {@link AggregationKind#MAX}, {@link AggregationKind#MEAN}, and
+   * {@link AggregationKind#SET}.
+   *
+   * @param name the name of the new counter
+   * @param kind the new counter's aggregation kind
+   * @return the newly constructed Counter
+   * @throws IllegalArgumentException if the aggregation kind is not supported
+   */
+  public static Counter<Long> longs(String name, AggregationKind kind) {
+    return new LongCounter(name, kind);
+  }
+
+  /**
+   * Constructs a new {@link Counter} that aggregates {@link Double} values
+   * according to the desired aggregation kind. The supported aggregation kinds
+   * are {@link AggregationKind#SUM}, {@link AggregationKind#MIN},
+   * {@link AggregationKind#MAX}, {@link AggregationKind#MEAN}, and
+   * {@link AggregationKind#SET}.
+   *
+   * @param name the name of the new counter
+   * @param kind the new counter's aggregation kind
+   * @return the newly constructed Counter
+   * @throws IllegalArgumentException if the aggregation kind is not supported
+   */
+  public static Counter<Double> doubles(String name, AggregationKind kind) {
+    return new DoubleCounter(name, kind);
+  }
+
+  /**
+   * Constructs a new {@link Counter} that aggregates {@link Boolean} values
+   * according to the desired aggregation kind. The only supported aggregation
+   * kinds are {@link AggregationKind#AND} and {@link AggregationKind#OR}.
+   *
+   * @param name the name of the new counter
+   * @param kind the new counter's aggregation kind
+   * @return the newly constructed Counter
+   * @throws IllegalArgumentException if the aggregation kind is not supported
+   */
+  public static Counter<Boolean> booleans(String name, AggregationKind kind) {
+    return new BooleanCounter(name, kind);
+  }
+
+  /**
+   * Constructs a new {@link Counter} that aggregates {@link String} values
+   * according to the desired aggregation kind. The only supported aggregation
+   * kind is {@link AggregationKind#SET}.
+   *
+   * @param name the name of the new counter
+   * @param kind the new counter's aggregation kind
+   * @return the newly constructed Counter
+   * @throws IllegalArgumentException if the aggregation kind is not supported
+   */
+  public static Counter<String> strings(String name, AggregationKind kind) {
+    return new StringCounter(name, kind);
+  }
+
+
+  //////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Adds a new value to the aggregation stream. Returns this (to allow method
+   * chaining).
+   */
+  public abstract Counter<T> addValue(T value);
+
+  /**
+   * Resets the aggregation stream to this new value. Returns this (to allow
+   * method chaining).
+   */
+  public Counter<T> resetToValue(T value) {
+    return resetToValue(-1, value);
+  }
+
+  /**
+   * Resets the aggregation stream to this new value. Returns this (to allow
+   * method chaining). The value of elementCount must be -1 for non-MEAN
+   * aggregations. The value of elementCount must be non-negative for MEAN
+   * aggregation.
+   */
+  public synchronized Counter<T> resetToValue(long elementCount, T value) {
+    aggregate = value;
+    deltaAggregate = value;
+
+    if (kind.equals(MEAN)) {
+      if (elementCount < 0) {
+        throw new AssertionError(
+            "elementCount must be non-negative for MEAN aggregation");
+      }
+      count = elementCount;
+      deltaCount = elementCount;
+    } else {
+      if (elementCount != -1) {
+        throw new AssertionError(
+            "elementCount must be -1 for non-MEAN aggregations");
+      }
+      count = 0;
+      deltaCount = 0;
+    }
+
+    if (kind.equals(SET)) {
+      set.clear();
+      set.add(value);
+      deltaSet = new HashSet<>();
+      deltaSet.add(value);
+    }
+    return this;
+  }
+
+  /** Resets the counter's delta value to have no values accumulated. */
+  public abstract void resetDelta();
+
+  /**
+   * Returns the counter's name.
+   */
+  public String getName() {
+    return name;
+  }
+
+  /**
+   * Returns the counter's aggregation kind.
+   */
+  public AggregationKind getKind() {
+    return kind;
+  }
+
+  /**
+   * Returns the counter's type.
+   */
+  public Class<?> getType() {
+    return new TypeToken<T>(getClass()) {}.getRawType();
+  }
+
+  /**
+   * Returns the aggregated value, or the sum for MEAN aggregation, either
+   * total or, if delta, since the last update extraction or resetDelta,
+   * if not a SET aggregation.
+   */
+  public T getAggregate(boolean delta) {
+    return delta ? deltaAggregate : aggregate;
+  }
+
+  /**
+   * Returns the number of aggregated values, either total or, if
+   * delta, since the last update extraction or resetDelta, if a MEAN
+   * aggregation.
+   */
+  public long getCount(boolean delta) {
+    return delta ? deltaCount : count;
+  }
+
+  /**
+   * Returns the set of all aggregated values, either total or, if
+   * delta, since the last update extraction or resetDelta, if a SET
+   * aggregation.
+   */
+  public Set<T> getSet(boolean delta) {
+    return delta ? deltaSet : set;
+  }
+
+  /**
+   * Returns a string representation of the Counter. Useful for debugging logs.
+   * Example return value: "ElementCount:SUM(15)".
+   */
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append(getName());
+    sb.append(":");
+    sb.append(getKind());
+    sb.append("(");
+    switch (kind) {
+      case SUM:
+      case MAX:
+      case MIN:
+      case AND:
+      case OR:
+        sb.append(aggregate);
+        break;
+      case MEAN:
+        sb.append(aggregate);
+        sb.append("/");
+        sb.append(count);
+        break;
+      case SET:
+        sb.append(set);
+        break;
+      default:
+        throw illegalArgumentException();
+    }
+    sb.append(")");
+
+    return sb.toString();
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    } else if (o instanceof Counter) {
+      Counter<?> that = (Counter<?>) o;
+      return this.name.equals(that.name)
+          && this.kind == that.kind
+          && this.getClass().equals(that.getClass())
+          && this.count == that.count
+          && Objects.equals(this.aggregate, that.aggregate)
+          && Objects.equals(this.set, that.set);
+    } else {
+      return false;
+    }
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(getClass(), name, kind, aggregate, count, set);
+  }
+
+  /**
+   * Returns whether this Counter is compatible with that Counter.  If
+   * so, they can be merged into a single Counter.
+   */
+  public boolean isCompatibleWith(Counter<?> that) {
+    return this.name.equals(that.name)
+        && this.kind == that.kind
+        && this.getClass().equals(that.getClass());
+  }
+
+
+  //////////////////////////////////////////////////////////////////////////////
+
+  /** The name of this counter. */
+  protected final String name;
+
+  /** The kind of aggregation function to apply to this counter. */
+  protected final AggregationKind kind;
+
+  /** The total cumulative aggregation value. Holds sum for MEAN aggregation. */
+  protected T aggregate;
+
+  /** The cumulative aggregation value since the last update extraction. */
+  protected T deltaAggregate;
+
+  /** The total number of aggregated values. Useful for MEAN aggregation. */
+  protected long count;
+
+  /** The number of aggregated values since the last update extraction. */
+  protected long deltaCount;
+
+  /** Holds the set of all aggregated values. Used only for SET aggregation. */
+  protected Set<T> set;
+
+  /** Holds the set of aggregated values since the last update extraction. */
+  protected Set<T> deltaSet;
+
+  protected Counter(String name, AggregationKind kind) {
+    this.name = name;
+    this.kind = kind;
+    this.count = 0;
+    this.deltaCount = 0;
+    if (kind.equals(SET)) {
+      set = new HashSet<>();
+      deltaSet = new HashSet<>();
+    }
+  }
+
+
+  //////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Implements a {@link Counter} for {@link Long} values.
+   */
+  private static class LongCounter extends Counter<Long> {
+
+    /** Initializes a new {@link Counter} for {@link Long} values. */
+    private LongCounter(String name, AggregationKind kind) {
+      super(name, kind);
+      switch (kind) {
+        case SUM:
+        case MEAN:
+          aggregate = deltaAggregate = 0L;
+          break;
+        case MAX:
+          aggregate = deltaAggregate = Long.MIN_VALUE;
+          break;
+        case MIN:
+          aggregate = deltaAggregate = Long.MAX_VALUE;
+          break;
+        case SET:
+          break;
+        default:
+          throw illegalArgumentException();
+      }
+    }
+
+    @Override
+    public synchronized LongCounter addValue(Long value) {
+      switch (kind) {
+        case SUM:
+          aggregate += value;
+          deltaAggregate += value;
+          break;
+        case MEAN:
+          aggregate += value;
+          deltaAggregate += value;
+          count++;
+          deltaCount++;
+          break;
+        case MAX:
+          aggregate = Math.max(aggregate, value);
+          deltaAggregate = Math.max(deltaAggregate, value);
+          break;
+        case MIN:
+          aggregate = Math.min(aggregate, value);
+          deltaAggregate = Math.min(deltaAggregate, value);
+          break;
+        case SET:
+          set.add(value);
+          deltaSet.add(value);
+          break;
+        default:
+          throw illegalArgumentException();
+      }
+      return this;
+    }
+
+    @Override
+    public synchronized void resetDelta() {
+      switch (kind) {
+        case SUM:
+          deltaAggregate = 0L;
+          break;
+        case MEAN:
+          deltaAggregate = 0L;
+          deltaCount = 0;
+          break;
+        case MAX:
+          deltaAggregate = Long.MIN_VALUE;
+          break;
+        case MIN:
+          deltaAggregate = Long.MAX_VALUE;
+          break;
+        case SET:
+          deltaSet = new HashSet<>();
+          break;
+        default:
+          throw illegalArgumentException();
+      }
+    }
+  }
+
+  /**
+   * Implements a {@link Counter} for {@link Double} values.
+   */
+  private static class DoubleCounter extends Counter<Double> {
+
+    /** Initializes a new {@link Counter} for {@link Double} values. */
+    private DoubleCounter(String name, AggregationKind kind) {
+      super(name, kind);
+      switch (kind) {
+        case SUM:
+        case MEAN:
+          aggregate = deltaAggregate = 0.0;
+          break;
+        case MAX:
+          aggregate = deltaAggregate = Double.MIN_VALUE;
+          break;
+        case MIN:
+          aggregate = deltaAggregate = Double.MAX_VALUE;
+          break;
+        case SET:
+          break;
+        default:
+          throw illegalArgumentException();
+      }
+    }
+
+    @Override
+    public synchronized DoubleCounter addValue(Double value) {
+      switch (kind) {
+        case SUM:
+          aggregate += value;
+          deltaAggregate += value;
+          break;
+        case MEAN:
+          aggregate += value;
+          deltaAggregate += value;
+          count++;
+          deltaCount++;
+          break;
+        case MAX:
+          aggregate = Math.max(aggregate, value);
+          deltaAggregate = Math.max(deltaAggregate, value);
+          break;
+        case MIN:
+          aggregate = Math.min(aggregate, value);
+          deltaAggregate = Math.min(deltaAggregate, value);
+          break;
+        case SET:
+          set.add(value);
+          deltaSet.add(value);
+          break;
+        default:
+          throw illegalArgumentException();
+      }
+      return this;
+    }
+
+    @Override
+    public synchronized void resetDelta() {
+      switch (kind) {
+        case SUM:
+          deltaAggregate = 0.0;
+          break;
+        case MEAN:
+          deltaAggregate = 0.0;
+          deltaCount = 0;
+          break;
+        case MAX:
+          deltaAggregate = Double.MIN_VALUE;
+          break;
+        case MIN:
+          deltaAggregate = Double.MAX_VALUE;
+          break;
+        case SET:
+          deltaSet = new HashSet<>();
+          break;
+        default:
+          throw illegalArgumentException();
+      }
+    }
+  }
+
+  /**
+   * Implements a {@link Counter} for {@link Boolean} values.
+   */
+  private static class BooleanCounter extends Counter<Boolean> {
+
+    /** Initializes a new {@link Counter} for {@link Boolean} values. */
+    private BooleanCounter(String name, AggregationKind kind) {
+      super(name, kind);
+      if (kind.equals(AND)) {
+        aggregate = deltaAggregate = true;
+      } else if (kind.equals(OR)) {
+        aggregate = deltaAggregate = false;
+      } else {
+        throw illegalArgumentException();
+      }
+    }
+
+    @Override
+    public synchronized BooleanCounter addValue(Boolean value) {
+      if (kind.equals(AND)) {
+        aggregate &= value;
+        deltaAggregate &= value;
+      } else { // kind.equals(OR))
+        aggregate |= value;
+        deltaAggregate |= value;
+      }
+      return this;
+    }
+
+    @Override
+    public synchronized void resetDelta() {
+      switch (kind) {
+        case AND:
+          deltaAggregate = true;
+          break;
+        case OR:
+          deltaAggregate = false;
+          break;
+        default:
+          throw illegalArgumentException();
+      }
+    }
+  }
+
+  /**
+   * Implements a {@link Counter} for {@link String} values.
+   */
+  private static class StringCounter extends Counter<String> {
+
+    /** Initializes a new {@link Counter} for {@link String} values. */
+    private StringCounter(String name, AggregationKind kind) {
+      super(name, kind);
+      if (!kind.equals(SET)) {
+        throw illegalArgumentException();
+      }
+    }
+
+    @Override
+    public synchronized StringCounter addValue(String value) {
+      set.add(value);
+      deltaSet.add(value);
+      return this;
+    }
+
+    @Override
+    public synchronized void resetDelta() {
+      switch (kind) {
+        case SET:
+          deltaSet = new HashSet<>();
+          break;
+        default:
+          throw illegalArgumentException();
+      }
+    }
+  }
+
+  /**
+   * Implements a {@link Counter} for {@link Integer} values.
+   */
+  private static class IntegerCounter extends Counter<Integer> {
+
+    /** Initializes a new {@link Counter} for {@link Integer} values. */
+    private IntegerCounter(String name, AggregationKind kind) {
+      super(name, kind);
+      switch (kind) {
+        case SUM:
+        case MEAN:
+          aggregate = deltaAggregate = 0;
+          break;
+        case MAX:
+          aggregate = deltaAggregate = Integer.MIN_VALUE;
+          break;
+        case MIN:
+          aggregate = deltaAggregate = Integer.MAX_VALUE;
+          break;
+        case SET:
+          break;
+        default:
+          throw illegalArgumentException();
+      }
+    }
+
+    @Override
+    public synchronized IntegerCounter addValue(Integer value) {
+      switch (kind) {
+        case SUM:
+          aggregate += value;
+          deltaAggregate += value;
+          break;
+        case MEAN:
+          aggregate += value;
+          deltaAggregate += value;
+          count++;
+          deltaCount++;
+          break;
+        case MAX:
+          aggregate = Math.max(aggregate, value);
+          deltaAggregate = Math.max(deltaAggregate, value);
+          break;
+        case MIN:
+          aggregate = Math.min(aggregate, value);
+          deltaAggregate = Math.min(deltaAggregate, value);
+          break;
+        case SET:
+          set.add(value);
+          deltaSet.add(value);
+          break;
+        default:
+          throw illegalArgumentException();
+      }
+      return this;
+    }
+
+    @Override
+    public synchronized void resetDelta() {
+      switch (kind) {
+        case SUM:
+          deltaAggregate = 0;
+          break;
+        case MEAN:
+          deltaAggregate = 0;
+          deltaCount = 0;
+          break;
+        case MAX:
+          deltaAggregate = Integer.MIN_VALUE;
+          break;
+        case MIN:
+          deltaAggregate = Integer.MAX_VALUE;
+          break;
+        case SET:
+          deltaSet = new HashSet<>();
+          break;
+        default:
+          throw illegalArgumentException();
+      }
+    }
+  }
+
+
+  //////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Constructs an {@link IllegalArgumentException} explaining that this
+   * {@link Counter}'s aggregation kind is not supported by its value type.
+   */
+  protected IllegalArgumentException illegalArgumentException() {
+    return new IllegalArgumentException("Cannot compute " + kind
+        + " aggregation over " + getType().getSimpleName() + " values.");
+  }
+
+
+  //////////////////////////////////////////////////////////////////////////////
+
+  // For testing.
+  synchronized T getTotalAggregate() { return aggregate; }
+  synchronized T getDeltaAggregate() { return deltaAggregate; }
+  synchronized long getTotalCount() { return count; }
+  synchronized long getDeltaCount() { return deltaCount; }
+  synchronized Set<T> getTotalSet() { return set; }
+  synchronized Set<T> getDeltaSet() { return deltaSet; }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
new file mode 100644
index 0000000000000..a9e83f3237919
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
@@ -0,0 +1,152 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common;
+
+import java.util.AbstractSet;
+import java.util.HashMap;
+import java.util.Iterator;
+
+/**
+ * A CounterSet maintains a set of {@link Counter}s.
+ *
+ * <p> Thread-safe.
+ */
+public class CounterSet extends AbstractSet<Counter<?>> {
+
+  /** Registered counters. */
+  private final HashMap<String, Counter<?>> counters = new HashMap<>();
+
+  private final AddCounterMutator addCounterMutator = new AddCounterMutator();
+
+  /**
+   * Constructs a CounterSet containing the given Counters.
+   */
+  public CounterSet(Counter<?>... counters) {
+    for (Counter<?> counter : counters) {
+      addNewCounter(counter);
+    }
+  }
+
+  /**
+   * Returns an object that supports adding additional counters into
+   * this CounterSet.
+   */
+  public AddCounterMutator getAddCounterMutator() {
+    return addCounterMutator;
+  }
+
+  /**
+   * Adds a new counter, throwing an exception if a counter of the
+   * same name already exists.
+   */
+  public void addNewCounter(Counter<?> counter) {
+    if (!addCounter(counter)) {
+      throw new IllegalArgumentException(
+          "Counter " + counter + " duplicates an existing counter in " + this);
+    }
+  }
+
+  /**
+   * Adds the given Counter to this CounterSet.
+   *
+   * <p> If a counter with the same name already exists, it will be
+   * reused, as long as it is compatible.
+   *
+   * @return the Counter that was reused, or added
+   * @throws IllegalArgumentException if the a counter with the same
+   * name but an incompatible kind had already been added
+   */
+  public synchronized <T> Counter<T> addOrReuseCounter(Counter<T> counter) {
+    Counter<?> oldCounter = counters.get(counter.getName());
+    if (oldCounter == null) {
+      // A new counter.
+      counters.put(counter.getName(), counter);
+      return counter;
+    }
+    if (counter.isCompatibleWith(oldCounter)) {
+      // Return the counter to reuse.
+      @SuppressWarnings("unchecked")
+      Counter<T> compatibleCounter = (Counter) oldCounter;
+      return compatibleCounter;
+    }
+    throw new IllegalArgumentException(
+        "Counter " + counter + " duplicates incompatible counter "
+        + oldCounter + " in " + this);
+  }
+
+  /**
+   * Adds a counter. Returns {@code true} if the counter was added to the set
+   * and false if the given counter was {@code null} or it already existed in
+   * the set.
+   *
+   * @param counter to register
+   */
+  public boolean addCounter(Counter<?> counter) {
+    return add(counter);
+  }
+
+  /**
+   * Returns the Counter with the given name in this CounterSet;
+   * returns null if no such Counter exists.
+   */
+  public synchronized Counter<?> getExistingCounter(String name) {
+    return counters.get(name);
+  }
+
+  @Override
+  public synchronized Iterator<Counter<?>> iterator() {
+    return counters.values().iterator();
+  }
+
+  @Override
+  public synchronized int size() {
+    return counters.size();
+  }
+
+  @Override
+  public synchronized boolean add(Counter<?> e) {
+    if (null == e) {
+      return false;
+    }
+    if (counters.containsKey(e.getName())) {
+      return false;
+    }
+    counters.put(e.getName(), e);
+    return true;
+  }
+
+  /**
+   * A nested class that supports adding additional counters into the
+   * enclosing CounterSet. This is useful as a mutator; hiding other
+   * public methods of the CounterSet.
+   */
+  public class AddCounterMutator {
+    /**
+     * Adds the given Counter into the enclosing CounterSet.
+     *
+     * <p> If a counter with the same name already exists, it will be
+     * reused, as long as it has the same type.
+     *
+     * @return the Counter that was reused, or added
+     * @throws IllegalArgumentException if the a counter with the same
+     * name but an incompatible kind had already been added
+     */
+    public <T> Counter<T> addCounter(Counter<T> counter) {
+      return addOrReuseCounter(counter);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservable.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservable.java
new file mode 100644
index 0000000000000..447dadcb8ef75
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservable.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.common;
+
+/**
+ * An interface for things that allow observing the size in bytes of
+ * encoded values of type {@code T}.
+ *
+ * @param <T> the type of the values being observed
+ */
+public interface ElementByteSizeObservable<T> {
+  /**
+   * Returns whether {@link #registerByteSizeObserver} is cheap enough
+   * to call for every element, that is, if this
+   * {@code ElementByteSizeObservable} can calculate the byte size of
+   * the element to be coded in roughly constant time (or lazily).
+   */
+  public boolean isRegisterByteSizeObserverCheap(T value);
+
+  /**
+   * Notifies the {@code ElementByteSizeObserver} about the byte size
+   * of the encoded value using this {@code ElementByteSizeObservable}.
+   */
+  public void registerByteSizeObserver(T value,
+                                       ElementByteSizeObserver observer)
+      throws Exception;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterable.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterable.java
new file mode 100644
index 0000000000000..f8f727090237a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterable.java
@@ -0,0 +1,63 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Observer;
+
+/**
+ * An abstract class used for iterables that notify observers about size in
+ * bytes of their elements, as they are being iterated over.
+ *
+ * @param <V> the type of elements returned by this iterable
+ * @param <VI> type type of iterator returned by this iterable
+ */
+public abstract class ElementByteSizeObservableIterable<
+    V, VI extends ElementByteSizeObservableIterator<V>>
+    implements Iterable<V> {
+  private List<Observer> observers = new ArrayList<>();
+
+  /**
+   * Derived classes override this method to return an iterator for this
+   * iterable.
+   */
+  protected abstract VI createIterator();
+
+  /**
+   * Sets the observer, which will observe the iterator returned in
+   * the next call to iterator() method. Future calls to iterator()
+   * won't be observed, unless an observer is set again.
+   */
+  public void addObserver(Observer observer) {
+    observers.add(observer);
+  }
+
+  /**
+   * Returns a new iterator for this iterable. If an observer was set in
+   * a previous call to setObserver(), it will observe the iterator returned.
+   */
+  @Override
+  public VI iterator() {
+    VI iterator = createIterator();
+    for (Observer observer : observers) {
+      iterator.addObserver(observer);
+    }
+    observers.clear();
+    return iterator;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterator.java
new file mode 100644
index 0000000000000..50c9add0edaab
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterator.java
@@ -0,0 +1,36 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common;
+
+import java.util.Iterator;
+import java.util.Observable;
+
+/**
+ * An abstract class used for iterators that notify observers about size in
+ * bytes of their elements, as they are being iterated over. The subclasses
+ * need to implement the standard Iterator interface and call method
+ * notifyValueReturned() for each element read and/or iterated over.
+ *
+ * @param <V> value type
+ */
+public abstract class ElementByteSizeObservableIterator<V>
+    extends Observable implements Iterator<V> {
+  protected final void notifyValueReturned(long byteSize) {
+    setChanged();
+    notifyObservers(byteSize);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObserver.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObserver.java
new file mode 100644
index 0000000000000..9cccb4365c6f3
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObserver.java
@@ -0,0 +1,84 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common;
+
+import java.util.Observable;
+import java.util.Observer;
+
+/**
+ * An observer that gets notified when additional bytes are read
+ * and/or used. It adds all bytes into a local counter. When the
+ * observer gets advanced via the next() call, it adds the total byte
+ * count to the specified counter, and prepares for the next element.
+ */
+public class ElementByteSizeObserver implements Observer {
+  private final Counter<Long> counter;
+  private boolean isLazy = false;
+  private long totalSize = 0;
+
+  public ElementByteSizeObserver(Counter<Long> counter) {
+    this.counter = counter;
+  }
+
+  /**
+   * Sets byte counting for the current element as lazy. That is, the
+   * observer will get notified of the element's byte count only as
+   * element's pieces are being processed or iterated over.
+   */
+  public void setLazy() {
+    isLazy = true;
+  }
+
+  /**
+   * Returns whether byte counting for the current element is lazy, that is,
+   * whether the observer gets notified of the element's byte count only as
+   * element's pieces are being processed or iterated over.
+   */
+  public boolean getIsLazy() {
+    return isLazy;
+  }
+
+  /**
+   * Updates the observer with a context specified, but without an instance of
+   * the Observable.
+   */
+  public void update(Object obj) {
+    update(null, obj);
+  }
+
+  @Override
+  public void update(Observable obs, Object obj) {
+    if (obj instanceof Long) {
+      totalSize += (Long) obj;
+    } else if (obj instanceof Integer) {
+      totalSize += (Integer) obj;
+    } else {
+      throw new AssertionError("unexpected parameter object");
+    }
+  }
+
+  /**
+   * Advances the observer to the next element. Adds the current total byte
+   * size to the counter, and prepares the observer for the next element.
+   */
+  public void advance() {
+    counter.addValue(totalSize);
+
+    totalSize = 0;
+    isLazy = false;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ForwardingReiterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ForwardingReiterator.java
new file mode 100644
index 0000000000000..f3008232a1074
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ForwardingReiterator.java
@@ -0,0 +1,83 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+/**
+ * A {@link Reiterator} which forwards to another {@code Reiterator}, useful for
+ * implementing {@code Reiterator} wrappers.
+ *
+ * @param <T> the type of elements returned by this iterator
+ */
+public abstract class ForwardingReiterator<T>
+    implements Reiterator<T>, Cloneable {
+  private Reiterator<T> base;
+
+  /**
+   * Constructs a {@link ForwardingReiterator}.
+   * @param base supplies a base reiterator to forward requests to.  This
+   * reiterator will be used directly; it will not be copied by the constructor.
+   */
+  public ForwardingReiterator(Reiterator<T> base) {
+    this.base = checkNotNull(base);
+  }
+
+  @Override
+  protected ForwardingReiterator<T> clone() {
+    ForwardingReiterator<T> result;
+    try {
+      result = (ForwardingReiterator<T>) super.clone();
+    } catch (CloneNotSupportedException e) {
+      throw new AssertionError(
+          "Object.clone() for a ForwardingReiterator<T> threw "
+          + "CloneNotSupportedException; this should not happen, "
+          + "since ForwardingReiterator<T> implements Cloneable.",
+          e);
+    }
+    result.base = base.copy();
+    return result;
+  }
+
+  @Override
+  public boolean hasNext() {
+    return base.hasNext();
+  }
+
+  @Override
+  public T next() {
+    return base.next();
+  }
+
+  @Override
+  public void remove() {
+    base.remove();
+  }
+
+  /**
+   * {@inheritDoc}
+   *
+   * <p>This implementation uses {@link #clone} to construct a duplicate of the
+   * {@link Reiterator}.  Derived classes must either implement
+   * {@link Cloneable} semantics, or must provide an alternative implementation
+   * of this method.
+   */
+  @Override
+  public ForwardingReiterator<T> copy() {
+    return clone();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Metric.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Metric.java
new file mode 100644
index 0000000000000..23a590743b21f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Metric.java
@@ -0,0 +1,45 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common;
+
+/**
+ * A metric (e.g., CPU usage) that can be reported by a worker.
+ *
+ * @param <T> the type of the metric's value
+ */
+public abstract class Metric<T> {
+  String name;
+  T value;
+
+  public Metric(String name, T value) {
+    this.name = name;
+    this.value = value;
+  }
+
+  public String getName() { return name; }
+
+  public T getValue() { return value; }
+
+  /**
+   * A double-valued Metric.
+   */
+  public static class DoubleMetric extends Metric<Double> {
+    public DoubleMetric(String name, double value) {
+      super(name, value);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/PeekingReiterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/PeekingReiterator.java
new file mode 100644
index 0000000000000..d139380c65c12
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/PeekingReiterator.java
@@ -0,0 +1,98 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
+
+import java.util.NoSuchElementException;
+
+/**
+ * A {@link Reiterator} that supports one-element lookahead during iteration.
+ *
+ * @param <T> the type of elements returned by this iterator
+ */
+public final class PeekingReiterator<T> implements Reiterator<T> {
+  private T nextElement;
+  private boolean nextElementComputed;
+  private final Reiterator<T> iterator;
+
+  public PeekingReiterator(Reiterator<T> iterator) {
+    this.iterator = checkNotNull(iterator);
+  }
+
+  PeekingReiterator(PeekingReiterator<T> it) {
+    this.iterator = checkNotNull(it).iterator.copy();
+    this.nextElement = it.nextElement;
+    this.nextElementComputed = it.nextElementComputed;
+  }
+
+  @Override
+  public boolean hasNext() {
+    computeNext();
+    return nextElementComputed;
+  }
+
+  @Override
+  public T next() {
+    T result = peek();
+    nextElementComputed = false;
+    return result;
+  }
+
+  /**
+   * {@inheritDoc}
+   *
+   * <p>If {@link #peek} is called, {@code remove} is disallowed until
+   * {@link #next} has been subsequently called.
+   */
+  @Override
+  public void remove() {
+    checkState(!nextElementComputed,
+        "After peek(), remove() is disallowed until next() is called");
+    iterator.remove();
+  }
+
+  @Override
+  public PeekingReiterator<T> copy() {
+    return new PeekingReiterator(this);
+  }
+
+  /**
+   * Returns the element that would be returned by {@link #next}, without
+   * actually consuming the element.
+   * @throws NoSuchElementException if there is no next element
+   */
+  public T peek() {
+    computeNext();
+    if (!nextElementComputed) {
+      throw new NoSuchElementException();
+    }
+    return nextElement;
+  }
+
+  private void computeNext() {
+    if (nextElementComputed) {
+      return;
+    }
+    if (!iterator.hasNext()) {
+      return;
+    }
+    nextElement = iterator.next();
+    nextElementComputed = true;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterable.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterable.java
new file mode 100644
index 0000000000000..ebf30459e2778
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterable.java
@@ -0,0 +1,27 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common;
+
+/**
+ * An {@link Iterable} that returns {@link Reiterator} iterators.
+ *
+ * @param <T> the type of elements returned by the iterator
+ */
+public interface Reiterable<T> extends Iterable<T> {
+  @Override
+  public Reiterator<T> iterator();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterator.java
new file mode 100644
index 0000000000000..7613a3a37bd37
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterator.java
@@ -0,0 +1,39 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common;
+
+import java.util.Iterator;
+
+/**
+ * An {@link Iterator} with the ability to copy its iteration state.
+ *
+ * @param <T> the type of elements returned by this iterator
+ */
+public interface Reiterator<T> extends Iterator<T> {
+  /**
+   * Returns a copy of the current {@link Reiterator}.  The copy's iteration
+   * state is logically independent of the current iterator; each may be
+   * advanced without affecting the other.
+   *
+   * <p>The returned {@code Reiterator} is not guaranteed to return
+   * referentially identical iteration results as the original
+   * {@link Reiterator}, although {@link Object#equals} will typically return
+   * true for the corresponding elements of each if the original source is
+   * logically immutable.
+   */
+  public Reiterator<T> copy();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/package-info.java
new file mode 100644
index 0000000000000..0dd2af486ba0f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/package-info.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/** Defines utilities shared by multiple PipelineRunner implementations. **/
+package com.google.cloud.dataflow.sdk.util.common;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReader.java
new file mode 100644
index 0000000000000..2a596c0d86f86
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReader.java
@@ -0,0 +1,148 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
+
+import com.google.cloud.dataflow.sdk.util.common.Reiterator;
+
+import java.util.ListIterator;
+import java.util.NoSuchElementException;
+
+import javax.annotation.Nullable;
+import javax.annotation.concurrent.NotThreadSafe;
+
+/**
+ * BatchingShuffleEntryReader provides a mechanism for reading entries from
+ * a shuffle dataset.
+ */
+@NotThreadSafe
+public final class BatchingShuffleEntryReader implements ShuffleEntryReader {
+  private final ShuffleBatchReader batchReader;
+
+  /**
+   * Constructs a {@link BatchingShuffleEntryReader}
+   *
+   * @param batchReader supplies the underlying
+   * {@link ShuffleBatchReader} to read batches of entries from
+   */
+  public BatchingShuffleEntryReader(
+      ShuffleBatchReader batchReader) {
+    this.batchReader = checkNotNull(batchReader);
+  }
+
+  @Override
+  public Reiterator<ShuffleEntry> read(
+      @Nullable ShufflePosition startPosition,
+      @Nullable ShufflePosition endPosition) {
+    return new ShuffleReadIterator(startPosition, endPosition);
+  }
+
+  /**
+   * ShuffleReadIterator iterates over a (potentially huge) sequence of shuffle
+   * entries.
+   */
+  private final class ShuffleReadIterator implements Reiterator<ShuffleEntry> {
+    // Shuffle service returns entries in pages. If the response contains a
+    // non-null nextStartPosition, we have to ask for more pages. The response
+    // with null nextStartPosition signifies the end of stream.
+    @Nullable private final ShufflePosition endPosition;
+    @Nullable private ShufflePosition nextStartPosition;
+
+    /** The most recently read batch. */
+    @Nullable ShuffleBatchReader.Batch currentBatch;
+    /** An iterator over the most recently read batch. */
+    @Nullable private ListIterator<ShuffleEntry> entries;
+
+    ShuffleReadIterator(@Nullable ShufflePosition startPosition,
+                        @Nullable ShufflePosition endPosition) {
+      this.nextStartPosition = startPosition;
+      this.endPosition = endPosition;
+    }
+
+    private ShuffleReadIterator(ShuffleReadIterator it) {
+      this.endPosition = it.endPosition;
+      this.nextStartPosition = it.nextStartPosition;
+      this.currentBatch = it.currentBatch;
+      // The idea here: if the iterator being copied was in the middle of a
+      // batch (the typical case), create a new iteration state at the same
+      // point in the same batch.
+      this.entries = (it.entries == null
+          ? null
+          : it.currentBatch.entries.listIterator(it.entries.nextIndex()));
+    }
+
+    @Override
+    public boolean hasNext() {
+      fillEntriesIfNeeded();
+      // TODO: Report API errors to the caller using checked
+      // exceptions.
+      return entries.hasNext();
+    }
+
+    @Override
+    public ShuffleEntry next() throws NoSuchElementException {
+      fillEntriesIfNeeded();
+      ShuffleEntry entry = entries.next();
+      return entry;
+    }
+
+    @Override
+    public void remove() throws UnsupportedOperationException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public ShuffleReadIterator copy() {
+      return new ShuffleReadIterator(this);
+    }
+
+    private void fillEntriesIfNeeded() {
+      if (entries != null && entries.hasNext()) {
+        // Has more records in the current page, or error.
+        return;
+      }
+
+      if (entries != null && nextStartPosition == null) {
+        // End of stream.
+        checkState(!entries.hasNext());
+        return;
+      }
+
+      do {
+        fillEntries();
+      } while (!entries.hasNext() && nextStartPosition != null);
+    }
+
+    private void fillEntries() {
+      try {
+        ShuffleBatchReader.Batch batch =
+            batchReader.read(nextStartPosition, endPosition);
+        nextStartPosition = batch.nextStartPosition;
+        entries = batch.entries.listIterator();
+        currentBatch = batch;
+      } catch (RuntimeException e) {
+        throw e;
+      } catch (Throwable t) {
+        throw new RuntimeException(t);
+      }
+
+      checkState(entries != null);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReader.java
new file mode 100644
index 0000000000000..87abf21d4229b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReader.java
@@ -0,0 +1,228 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import com.google.common.base.Objects;
+import com.google.common.base.Throwables;
+
+import java.io.IOException;
+import java.lang.ref.Reference;
+import java.lang.ref.ReferenceQueue;
+import java.lang.ref.SoftReference;
+import java.util.HashMap;
+
+import javax.annotation.Nullable;
+
+/** A {@link ShuffleBatchReader} that caches batches as they're read. */
+public final class CachingShuffleBatchReader implements ShuffleBatchReader {
+  private final ShuffleBatchReader reader;
+
+  // The cache itself is implemented as a HashMap of RangeReadReference values,
+  // keyed by the start and end positions describing the range of a particular
+  // request (represented by BatchRange).
+  //
+  // The first reader for a particular range builds an AsyncReadResult for the
+  // result, inserts it into the cache, drops the lock, and then completes the
+  // read; subsequent readers simply wait for the AsyncReadResult to complete.
+  //
+  // Note that overlapping ranges are considered distinct; cached entries for
+  // one range are not used for any other range, even if doing so would avoid a
+  // fetch.
+  //
+  // So this is not a particularly sophisticated algorithm: a smarter cache
+  // would be able to use subranges of previous requests to satisfy new
+  // requests.  But in this particular case, we expect that the simple algorithm
+  // will work well.  For a given shuffle source, the splits read by various
+  // iterators over that source starting from a particular position (which is
+  // how this class is used in practice) should turn out to be constant, if the
+  // result returned by the service for a particular [start, end) range are
+  // consistent.  So we're not expecting to see overlapping ranges of entries
+  // within a cache.
+  //
+  // It's also been shown -- by implementing it -- that the more thorough
+  // algorithm is relatively complex, with numerous edge cases requiring very
+  // careful thought to get right.  It's doable, but non-trivial and hard to
+  // understand and maintain; without a compelling justification, it's better to
+  // stick with the simpler implementation.
+  //
+  // @VisibleForTesting
+  final HashMap<BatchRange, RangeReadReference> cache = new HashMap<>();
+
+  // The queue of references which have been collected by the garbage collector.
+  // This queue should only be used with references of class RangeReadReference.
+  private final ReferenceQueue<AsyncReadResult> refQueue = new ReferenceQueue<>();
+
+  /**
+   * Constructs a new {@link CachingShuffleBatchReader}.
+   *
+   * @param reader supplies the downstream {@link ShuffleBatchReader}
+   * this {@code CachingShuffleBatchReader} will use to issue reads
+   */
+  public CachingShuffleBatchReader(ShuffleBatchReader reader) {
+    this.reader = checkNotNull(reader);
+  }
+
+  @Override
+  public Batch read(
+      @Nullable ShufflePosition startPosition,
+      @Nullable ShufflePosition endPosition) throws IOException {
+
+    @Nullable AsyncReadResult waitResult = null;
+    @Nullable AsyncReadResult runResult = null;
+    final BatchRange batchRange = new BatchRange(startPosition, endPosition);
+
+    synchronized (cache) {
+      // Remove any GCd entries.
+      for (Reference<? extends AsyncReadResult> ref = refQueue.poll();
+           ref != null;
+           ref = refQueue.poll()) {
+        RangeReadReference rangeReadRef = (RangeReadReference) ref;
+        cache.remove(rangeReadRef.getBatchRange());
+      }
+
+      // Find the range reference; note that one might not be in the map, or it
+      // might contain a null if its target has been GCd.
+      @Nullable RangeReadReference rangeReadRef = cache.get(batchRange);
+
+      // Get a strong reference to the existing AsyncReadResult for the range, if possible.
+      if (rangeReadRef != null) {
+        waitResult = rangeReadRef.get();
+      }
+
+      // Create a new AsyncReadResult if one is needed.
+      if (waitResult == null) {
+        runResult = new AsyncReadResult();
+        waitResult = runResult;
+        rangeReadRef = null;  // Replace the previous RangeReadReference.
+      }
+
+      // Insert a new RangeReadReference into the map if we don't have a usable
+      // one (either we weren't able to find one in the map, or we did but it
+      // was already cleared by the GC).
+      if (rangeReadRef == null) {
+        cache.put(batchRange,
+            new RangeReadReference(batchRange, runResult, refQueue));
+      }
+    }  // Drop the cache lock.
+
+    if (runResult != null) {
+      // This thread created the AsyncReadResult, and is responsible for
+      // actually performing the read.
+      try {
+        Batch result = reader.read(startPosition, endPosition);
+        runResult.setResult(result);
+      } catch (RuntimeException | IOException e) {
+        runResult.setException(e);
+        synchronized (cache) {
+          // No reason to continue to cache the fact that there was a problem.
+          // Note that since this thread holds a strong reference to the
+          // AsyncReadResult, it won't be GCd, so the soft reference held by the
+          // cache is guaranteed to still be present.
+          cache.remove(batchRange);
+        }
+      }
+    }
+
+    return waitResult.getResult();
+  }
+
+  /** The key for the entries stored in the batch cache. */
+  // @VisibleForTesting
+  static final class BatchRange {
+    @Nullable private final ShufflePosition startPosition;
+    @Nullable private final ShufflePosition endPosition;
+
+    public BatchRange(@Nullable ShufflePosition startPosition,
+                      @Nullable ShufflePosition endPosition) {
+      this.startPosition = startPosition;
+      this.endPosition = endPosition;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      return o == this
+          || (o instanceof BatchRange
+              && Objects.equal(((BatchRange) o).startPosition, startPosition)
+              && Objects.equal(((BatchRange) o).endPosition, endPosition));
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(startPosition, endPosition);
+    }
+  }
+
+  /** Holds an asynchronously batch read result. */
+  private static final class AsyncReadResult {
+    @Nullable private Batch batch = null;
+    @Nullable private Throwable thrown = null;
+
+    public synchronized void setResult(Batch b) {
+      batch = b;
+      notifyAll();
+    }
+
+    public synchronized void setException(Throwable t) {
+      thrown = t;
+      notifyAll();
+    }
+
+    public synchronized Batch getResult() throws IOException {
+      while (batch == null && thrown == null) {
+        try {
+          wait();
+        } catch (InterruptedException e) {
+          throw new RuntimeException("interrupted", e);
+        }
+      }
+      if (thrown != null) {
+        // N.B. setException can only be called with a RuntimeException or an
+        // IOException, so propagateIfPossible should always do the throw.
+        Throwables.propagateIfPossible(thrown, IOException.class);
+        throw new RuntimeException("unexpected", thrown);
+      }
+      return batch;
+    }
+  }
+
+  /**
+   * Maintains a soft reference to an AsyncReadResult.
+   *
+   * <p>This class extends {@link SoftReference} so that when the garbage
+   * collector collects a batch and adds its reference to the cache's reference
+   * queue, that reference can be cast back to {@code RangeReadReference},
+   * allowing us to identify the reference's position in the cache (and to
+   * therefore remove it).
+   */
+  // @VisibleForTesting
+  static final class RangeReadReference extends SoftReference<AsyncReadResult> {
+    private final BatchRange range;
+
+    public RangeReadReference(
+        BatchRange range, AsyncReadResult result,
+        ReferenceQueue<? super AsyncReadResult> refQueue) {
+      super(result, refQueue);
+      this.range = checkNotNull(range);
+    }
+
+    public BatchRange getBatchRange() {
+      return range;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CustomSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CustomSourceFormat.java
new file mode 100644
index 0000000000000..4fc67d60f3c3f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CustomSourceFormat.java
@@ -0,0 +1,61 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+/**
+ * An interface for sources which can perform operations on source specifications, such as
+ * splitting the source and computing its metadata. See {@code SourceOperationRequest} for details.
+ */
+public interface CustomSourceFormat {
+  /**
+   * Performs an operation on the specification of a source.
+   * See {@code SourceOperationRequest} for details.
+   */
+  public SourceOperationResponse performSourceOperation(SourceOperationRequest operation)
+      throws Exception;
+
+  /**
+   * A representation of an operation on the specification of a source,
+   * e.g. splitting a source into shards, getting the metadata of a source,
+   * etc.
+   *
+   * <p> The common worker framework does not interpret instances of
+   * this interface.  But a tool-specific framework can make assumptions
+   * about the implementation, and so the concrete Source subclasses used
+   * by a tool-specific framework should match.
+   */
+  public interface SourceOperationRequest {
+  }
+
+  /**
+   * A representation of the result of a SourceOperationRequest.
+   *
+   * <p> See the comment on {@link SourceOperationRequest} for how instances of this
+   * interface are used by the rest of the framework.
+   */
+  public interface SourceOperationResponse {
+  }
+
+  /**
+   * A representation of a specification of a source.
+   *
+   * <p> See the comment on {@link SourceOperationRequest} for how instances of this
+   * interface are used by the rest of the framework.
+   */
+  public interface SourceSpec {
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperation.java
new file mode 100644
index 0000000000000..6325d1ac5cdb8
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperation.java
@@ -0,0 +1,54 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+
+/**
+ * A flatten operation.
+ */
+public class FlattenOperation extends ReceivingOperation {
+  public FlattenOperation(String operationName,
+                          OutputReceiver[] receivers,
+                          String counterPrefix,
+                          CounterSet.AddCounterMutator addCounterMutator,
+                          StateSampler stateSampler) {
+    super(operationName, receivers,
+          counterPrefix, addCounterMutator, stateSampler);
+  }
+
+  /** Invoked by tests. */
+  public FlattenOperation(OutputReceiver outputReceiver,
+                          String counterPrefix,
+                          CounterSet.AddCounterMutator addCounterMutator,
+                          StateSampler stateSampler) {
+    this("FlattenOperation", new OutputReceiver[]{ outputReceiver },
+         counterPrefix, addCounterMutator, stateSampler);
+  }
+
+  @Override
+  public void process(Object elem) throws Exception {
+    try (StateSampler.ScopedState process =
+        stateSampler.scopedState(processState)) {
+      checkStarted();
+      Receiver receiver = receivers[0];
+      if (receiver != null) {
+        receiver.process(elem);
+      }
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java
new file mode 100644
index 0000000000000..19428201f0395
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java
@@ -0,0 +1,216 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservableIterable;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservableIterator;
+import com.google.cloud.dataflow.sdk.util.common.PeekingReiterator;
+import com.google.cloud.dataflow.sdk.util.common.Reiterable;
+import com.google.cloud.dataflow.sdk.util.common.Reiterator;
+
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+import javax.annotation.Nullable;
+
+/**
+ * An iterator through KeyGroupedShuffleEntries.
+ */
+public abstract class GroupingShuffleEntryIterator
+    implements Iterator<KeyGroupedShuffleEntries> {
+  /** The iterator through the underlying shuffle records. */
+  private PeekingReiterator<ShuffleEntry> shuffleIterator;
+
+  /**
+   * The key of the most recent KeyGroupedShuffleEntries returned by
+   * {@link #next}, if any.
+   *
+   * <p>If currentKeyBytes is non-null, then it's the key for the last entry
+   * returned by {@link #next}, and all incoming entries with that key should
+   * be skipped over by this iterator (since this iterator is iterating over
+   * keys, not the individual values associated with a given key).
+   *
+   * <p>If currentKeyBytes is null, and shuffleIterator.hasNext(), then the
+   * key of shuffleIterator.next() is the key of the next
+   * KeyGroupedShuffleEntries to return from {@link #next}.
+   */
+  @Nullable private byte[] currentKeyBytes = null;
+
+  /**
+   * Constructs a GroupingShuffleEntryIterator, given a Reiterator
+   * over ungrouped ShuffleEntries, assuming the ungrouped
+   * ShuffleEntries for a given key are consecutive.
+   */
+  public GroupingShuffleEntryIterator(
+      Reiterator<ShuffleEntry> shuffleIterator) {
+    this.shuffleIterator =
+        new PeekingReiterator(
+            new ProgressTrackingReiterator<>(
+                shuffleIterator,
+                new ProgressTrackerGroup<ShuffleEntry>() {
+                  @Override
+                  protected void report(ShuffleEntry entry) {
+                    notifyElementRead(entry.length());
+                  }
+                }.start()));
+  }
+
+  /** Notifies observers about a new element read. */
+  protected abstract void notifyElementRead(long byteSize);
+
+  @Override
+  public boolean hasNext() {
+    advanceIteratorToNextKey();
+    return shuffleIterator.hasNext();
+  }
+
+  @Override
+  public KeyGroupedShuffleEntries next() {
+    if (!hasNext()) {
+      throw new NoSuchElementException();
+    }
+    ShuffleEntry entry = shuffleIterator.peek();
+    currentKeyBytes = entry.getKey();
+    return new KeyGroupedShuffleEntries(
+        entry.getPosition(),
+        currentKeyBytes,
+        new ValuesIterable(new ValuesIterator(currentKeyBytes)));
+  }
+
+  @Override
+  public void remove() {
+    throw new UnsupportedOperationException();
+  }
+
+  private void advanceIteratorToNextKey() {
+    if (currentKeyBytes == null) {
+      return;
+    }
+    while (shuffleIterator.hasNext()) {
+      ShuffleEntry entry = shuffleIterator.peek();
+      if (!Arrays.equals(entry.getKey(), currentKeyBytes)) {
+        break;
+      }
+      shuffleIterator.next();
+    }
+    currentKeyBytes = null;
+  }
+
+  private static class ValuesIterable
+      extends ElementByteSizeObservableIterable<ShuffleEntry, ValuesIterator>
+      implements Reiterable<ShuffleEntry> {
+    private final ValuesIterator base;
+
+    public ValuesIterable(ValuesIterator base) {
+      this.base = checkNotNull(base);
+    }
+
+    @Override
+    public ValuesIterator createIterator() {
+      return base.copy();
+    }
+  }
+
+  /**
+   * Provides the {@link Reiterator} used to iterate through the
+   * shuffle entries of a KeyGroupedShuffleEntries.
+   */
+  private class ValuesIterator
+      extends ElementByteSizeObservableIterator<ShuffleEntry>
+      implements Reiterator<ShuffleEntry> {
+    // N.B. This class is *not* static; it maintains a reference to its
+    // enclosing KeyGroupedShuffleEntriesIterator instance so that it can update
+    // that instance's shuffleIterator as an optimization.
+
+    private final byte[] valueKeyBytes;
+    private final PeekingReiterator<ShuffleEntry> valueShuffleIterator;
+    private final ProgressTracker<ShuffleEntry> tracker;
+    private boolean nextKnownValid = false;
+
+    public ValuesIterator(byte[] valueKeyBytes) {
+      this.valueKeyBytes = checkNotNull(valueKeyBytes);
+      this.valueShuffleIterator = shuffleIterator.copy();
+      // N.B. The ProgressTrackerGroup captures the reference to the original
+      // ValuesIterator for a given values iteration.  Which happens to be
+      // exactly what we want, since this is also the ValuesIterator whose
+      // base Observable has the references to all of the Observers watching
+      // the iteration.  Copied ValuesIterator instances do *not* have these
+      // Observers, but that's fine, since the derived ProgressTracker
+      // instances reference the ProgressTrackerGroup which references the
+      // original ValuesIterator which does have them.
+      this.tracker = new ProgressTrackerGroup<ShuffleEntry>() {
+        @Override
+        protected void report(ShuffleEntry entry) {
+          notifyValueReturned(entry.length());
+        }
+      }.start();
+    }
+
+    private ValuesIterator(ValuesIterator it) {
+      this.valueKeyBytes = it.valueKeyBytes;
+      this.valueShuffleIterator = it.valueShuffleIterator.copy();
+      this.tracker = it.tracker.copy();
+      this.nextKnownValid = it.nextKnownValid;
+    }
+
+    @Override
+    public boolean hasNext() {
+      if (nextKnownValid) {
+        return true;
+      }
+      if (!valueShuffleIterator.hasNext()) {
+        return false;
+      }
+      ShuffleEntry entry = valueShuffleIterator.peek();
+      nextKnownValid = Arrays.equals(entry.getKey(), valueKeyBytes);
+
+      // Opportunistically update the parent KeyGroupedShuffleEntriesIterator,
+      // potentially allowing it to skip a large number of key/value pairs
+      // with this key.
+      if (!nextKnownValid && valueKeyBytes == currentKeyBytes) {
+        shuffleIterator = valueShuffleIterator.copy();
+        currentKeyBytes = null;
+      }
+
+      return nextKnownValid;
+    }
+
+    @Override
+    public ShuffleEntry next() {
+      if (!hasNext()) {
+        throw new NoSuchElementException();
+      }
+      ShuffleEntry entry = valueShuffleIterator.next();
+      nextKnownValid = false;
+      tracker.saw(entry);
+      return entry;
+    }
+
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public ValuesIterator copy() {
+      return new ValuesIterator(this);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/KeyGroupedShuffleEntries.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/KeyGroupedShuffleEntries.java
new file mode 100644
index 0000000000000..1b8b552b521e8
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/KeyGroupedShuffleEntries.java
@@ -0,0 +1,35 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import com.google.cloud.dataflow.sdk.util.common.Reiterable;
+
+/**
+ * A collection of ShuffleEntries, all with the same key.
+ */
+public class KeyGroupedShuffleEntries {
+  public final byte[] position;
+  public final byte[] key;
+  public final Reiterable<ShuffleEntry> values;
+
+  public KeyGroupedShuffleEntries(byte[] position, byte[] key,
+                                  Reiterable<ShuffleEntry> values) {
+    this.position = position;
+    this.key = key;
+    this.values = values;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
new file mode 100644
index 0000000000000..45d5e8c6715e3
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
@@ -0,0 +1,116 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+import java.util.ListIterator;
+
+/**
+ * An executor for a map task, defined by a list of Operations.
+ */
+public class MapTaskExecutor extends WorkExecutor {
+  private static final Logger LOG =
+      LoggerFactory.getLogger(MapTaskExecutor.class);
+
+  /** The operations in the map task, in execution order. */
+  public final List<Operation> operations;
+
+  /** The StateSampler for tracking where time is being spent, or null. */
+  protected final StateSampler stateSampler;
+
+  /**
+   * Creates a new MapTaskExecutor.
+   *
+   * @param operations the operations of the map task, in order of execution
+   * @param counters a set of system counters associated with
+   * operations, which may get extended during execution
+   * @param stateSampler a state sampler for tracking where time is being spent
+   */
+  public MapTaskExecutor(List<Operation> operations,
+                         CounterSet counters,
+                         StateSampler stateSampler) {
+    super(counters);
+    this.operations = operations;
+    this.stateSampler = stateSampler;
+  }
+
+  @Override
+  public void execute() throws Exception {
+    LOG.debug("executing map task");
+
+    // Start operations, in reverse-execution-order, so that a
+    // consumer is started before a producer might output to it.
+    // Starting a root operation such as a ReadOperation does the work
+    // of processing the input dataset.
+    LOG.debug("starting operations");
+    ListIterator<Operation> iterator =
+        operations.listIterator(operations.size());
+    while (iterator.hasPrevious()) {
+      Operation op = iterator.previous();
+      op.start();
+    }
+
+    // Finish operations, in forward-execution-order, so that a
+    // producer finishes outputting to its consumers before those
+    // consumers are themselves finished.
+    LOG.debug("finishing operations");
+    for (Operation op : operations) {
+      op.finish();
+    }
+
+    LOG.debug("map task execution complete");
+
+    // TODO: support for success / failure ports?
+  }
+
+  @Override
+  public Source.Progress getWorkerProgress() throws Exception {
+    return getReadOperation().getProgress();
+  }
+
+  @Override
+  public Source.Position proposeStopPosition(
+      Source.Progress proposedStopPosition) throws Exception {
+    return getReadOperation().proposeStopPosition(proposedStopPosition);
+  }
+
+  ReadOperation getReadOperation() throws Exception {
+    if (operations == null || operations.isEmpty()) {
+      throw new IllegalStateException(
+          "Map task has no operation.");
+    }
+
+    Operation readOperation = operations.get(0);
+    if (!(readOperation instanceof ReadOperation)) {
+      throw new IllegalStateException(
+          "First operation in the map task is not a ReadOperation.");
+    }
+
+    return (ReadOperation) readOperation;
+  }
+
+  @Override
+  public void close() throws Exception {
+    stateSampler.close();
+    super.close();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
new file mode 100644
index 0000000000000..bedc081cec99d
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
@@ -0,0 +1,132 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+
+/**
+ * The abstract base class for Operations, which correspond to
+ * Instructions in the original MapTask InstructionGraph.
+ *
+ * Call start() to start the operation.
+ *
+ * A read operation's start() method actually reads the data, and in
+ * effect runs the pipeline.
+ *
+ * Call finish() to finish the operation.
+ *
+ * Since both start() and finish() may call process() on
+ * this operation's consumers, start an operation after
+ * starting its consumers, and finish an operation before
+ * finishing its consumers.
+ */
+public abstract class Operation {
+  /**
+   * The array of consuming receivers, one per operation output
+   * "port" (e.g., DoFn main or side output).  A receiver might be
+   * null if that output isn't being consumed.
+   */
+  public final OutputReceiver[] receivers;
+
+  /**
+   * The possible initialization states of an Operation.
+   * For internal self-checking purposes.
+   */
+  public enum InitializationState {
+    // start() hasn't yet been called.
+    UNSTARTED,
+
+    // start() has been called, but finish() hasn't yet been called.
+    STARTED,
+
+    // finish() has been called.
+    FINISHED
+  }
+
+  /** The initialization state of this Operation. */
+  public InitializationState initializationState =
+      InitializationState.UNSTARTED;
+
+  protected final StateSampler stateSampler;
+
+  protected final int startState;
+  protected final int processState;
+  protected final int finishState;
+
+  public Operation(String operationName,
+                   OutputReceiver[] receivers,
+                   String counterPrefix,
+                   CounterSet.AddCounterMutator addCounterMutator,
+                   StateSampler stateSampler) {
+    this.receivers = receivers;
+    this.stateSampler = stateSampler;
+    startState = stateSampler.stateForName(operationName + "-start");
+    processState = stateSampler.stateForName(operationName + "-process");
+    finishState = stateSampler.stateForName(operationName + "-finish");
+  }
+
+  /**
+   * Checks that this oepration is not yet started, throwing an
+   * exception otherwise.
+   */
+  void checkUnstarted() {
+    if (initializationState != InitializationState.UNSTARTED) {
+      throw new AssertionError(
+          "expecting this instruction to not yet be started");
+    }
+  }
+
+  /**
+   * Checks that this oepration has been started but not yet finished,
+   * throwing an exception otherwise.
+   */
+  void checkStarted() {
+    if (initializationState != InitializationState.STARTED) {
+      throw new AssertionError(
+          "expecting this instruction to be started");
+    }
+  }
+
+  /**
+   * Checks that this oepration has been finished, throwing an
+   * exception otherwise.
+   */
+  void checkFinished() {
+    if (initializationState != InitializationState.FINISHED) {
+      throw new AssertionError(
+          "expecting this instruction to be finished");
+    }
+  }
+
+  /**
+   * Starts this Operation's execution.  Called after all successsor
+   * consuming operations have been started.
+   */
+  public void start() throws Exception {
+    checkUnstarted();
+    initializationState = InitializationState.STARTED;
+  }
+
+  /**
+   * Finishes this Operation's execution.  Called after all
+   * predecessor producing operations have been finished.
+   */
+  public void finish() throws Exception {
+    checkStarted();
+    initializationState = InitializationState.FINISHED;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiver.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiver.java
new file mode 100644
index 0000000000000..a13b74afbf8b3
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiver.java
@@ -0,0 +1,207 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservable;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+/**
+ * Receiver that forwards each input it receives to each of a list of
+ * output Receivers. Additionally, it tracks output counters, that is, size
+ * information for elements passing through.
+ */
+public class OutputReceiver implements Receiver {
+  private final String outputName;
+  // Might be null, e.g., undeclared outputs will not have an
+  // elementByteSizeObservable.
+  private final ElementByteSizeObservable elementByteSizeObservable;
+  private final Counter<Long> elementCount;
+  private Counter<Long> byteCount = null;
+  private Counter<Long> meanByteCount = null;
+  private ElementByteSizeObserver byteCountObserver = null;
+  private ElementByteSizeObserver meanByteCountObserver = null;
+  private final List<Receiver> outputs = new ArrayList<>();
+  private final Random randomGenerator = new Random();
+  private int samplingToken = 0;
+  private final int samplingTokenUpperBound = 1000000;  // Lowest sampling probability: 0.001%.
+  private final int samplingCutoff = 10;
+
+  public OutputReceiver(String outputName,
+                        String counterPrefix,
+                        CounterSet.AddCounterMutator addCounterMutator) {
+    this(outputName, (ElementByteSizeObservable) null,
+         counterPrefix, addCounterMutator);
+  }
+
+  public OutputReceiver(String outputName,
+                        ElementByteSizeObservable elementByteSizeObservable,
+                        String counterPrefix,
+                        CounterSet.AddCounterMutator addCounterMutator) {
+    this.outputName = outputName;
+    this.elementByteSizeObservable = elementByteSizeObservable;
+
+    elementCount = addCounterMutator.addCounter(
+        Counter.longs(elementsCounterName(counterPrefix, outputName), SUM));
+
+    if (elementByteSizeObservable != null) {
+      String bytesCounterName = bytesCounterName(counterPrefix, outputName);
+      if (bytesCounterName != null) {
+        byteCount = addCounterMutator.addCounter(
+            Counter.longs(bytesCounterName, SUM));
+        byteCountObserver = new ElementByteSizeObserver(byteCount);
+      }
+      String meanBytesCounterName =
+          meanBytesCounterName(counterPrefix, outputName);
+      if (meanBytesCounterName != null) {
+        meanByteCount = addCounterMutator.addCounter(
+            Counter.longs(meanBytesCounterName, MEAN));
+        meanByteCountObserver = new ElementByteSizeObserver(meanByteCount);
+      }
+    }
+  }
+
+  protected String elementsCounterName(String counterPrefix,
+                                       String outputName) {
+    return outputName + "-ElementCount";
+  }
+  protected String bytesCounterName(String counterPrefix,
+                                    String outputName) {
+    return null;
+  }
+  protected String meanBytesCounterName(String counterPrefix,
+                                        String outputName) {
+    return outputName + "-MeanByteCount";
+  }
+
+  /**
+   * Adds a new receiver that this OutputReceiver forwards to.
+   */
+  public void addOutput(Receiver receiver) {
+    outputs.add(receiver);
+  }
+
+  @Override
+  public void process(Object elem) throws Exception {
+    // Increment element counter.
+    elementCount.addValue(1L);
+
+    // Increment byte counter.
+    boolean advanceByteCountObserver = false;
+    boolean advanceMeanByteCountObserver = false;
+    if ((byteCountObserver != null || meanByteCountObserver != null)
+        && (sampleElement()
+            || elementByteSizeObservable.isRegisterByteSizeObserverCheap(
+                elem))) {
+
+      if (byteCountObserver != null) {
+        elementByteSizeObservable.registerByteSizeObserver(
+            elem, byteCountObserver);
+      }
+      if (meanByteCountObserver != null) {
+        elementByteSizeObservable.registerByteSizeObserver(
+            elem, meanByteCountObserver);
+      }
+
+      if (byteCountObserver != null) {
+        if (!byteCountObserver.getIsLazy()) {
+          byteCountObserver.advance();
+        } else {
+          advanceByteCountObserver = true;
+        }
+      }
+      if (meanByteCountObserver != null) {
+        if (!meanByteCountObserver.getIsLazy()) {
+          meanByteCountObserver.advance();
+        } else {
+          advanceMeanByteCountObserver = true;
+        }
+      }
+    }
+
+    // Fan-out.
+    for (Receiver out : outputs) {
+      if (out != null) {
+        out.process(elem);
+      }
+    }
+
+    // Advance lazy ElementByteSizeObservers, if any.
+    // Note that user's code is allowed to store the element of one
+    // DoFn.processElement() call and access it later on. We are still
+    // calling next() here, causing an update to byteCount. If user's
+    // code really accesses more element's pieces later on, their byte
+    // count would accrue against a future element. This is not ideal,
+    // but still approximately correct.
+    if (advanceByteCountObserver) {
+      byteCountObserver.advance();
+    }
+    if (advanceMeanByteCountObserver) {
+      meanByteCountObserver.advance();
+    }
+  }
+
+  public String getName() {
+    return outputName;
+  }
+
+  public Counter<Long> getElementCount() {
+    return elementCount;
+  }
+
+  public Counter<Long> getByteCount() {
+    return byteCount;
+  }
+
+  public Counter<Long> getMeanByteCount() {
+    return meanByteCount;
+  }
+
+  protected boolean sampleElement() {
+    // Sampling probability decreases as the element count is increasing.
+    // We unconditionally sample the first samplingCutoff elements. For the
+    // next samplingCutoff elements, the sampling probability drops from 100%
+    // to 50%. The probability of sampling the Nth element is:
+    // min(1, samplingCutoff / N), with an additional lower bound of
+    // samplingCutoff / samplingTokenUpperBound. This algorithm may be refined
+    // later.
+    samplingToken = Math.min(samplingToken + 1, samplingTokenUpperBound);
+    return randomGenerator.nextInt(samplingToken) < samplingCutoff;
+  }
+
+  /** Invoked by tests only. */
+  public int getReceiverCount() {
+    return outputs.size();
+  }
+
+  /** Invoked by tests only. */
+  public Receiver getOnlyReceiver() {
+    if (outputs.size() != 1) {
+      throw new AssertionError("only one receiver expected");
+    }
+
+    return outputs.get(0);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java
new file mode 100644
index 0000000000000..b922acc412d4c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java
@@ -0,0 +1,28 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+/**
+ * Abstract base class for ParDoFns, invocable by ParDoOperations.
+ */
+public abstract class ParDoFn {
+  public abstract void startBundle(Receiver... receivers) throws Exception;
+
+  public abstract void processElement(Object elem) throws Exception;
+
+  public abstract void finishBundle() throws Exception;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperation.java
new file mode 100644
index 0000000000000..7a620983476f0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperation.java
@@ -0,0 +1,65 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+
+/**
+ * A ParDo mapping function.
+ */
+public class ParDoOperation extends ReceivingOperation {
+  public final ParDoFn fn;
+
+  public ParDoOperation(String operationName,
+                        ParDoFn fn,
+                        OutputReceiver[] outputReceivers,
+                        String counterPrefix,
+                        CounterSet.AddCounterMutator addCounterMutator,
+                        StateSampler stateSampler) {
+    super(operationName, outputReceivers,
+          counterPrefix, addCounterMutator, stateSampler);
+    this.fn = fn;
+  }
+
+  @Override
+  public void start() throws Exception {
+    try (StateSampler.ScopedState start =
+        stateSampler.scopedState(startState)) {
+      super.start();
+      fn.startBundle(receivers);
+    }
+  }
+
+  @Override
+  public void process(Object elem) throws Exception {
+    try (StateSampler.ScopedState process =
+        stateSampler.scopedState(processState)) {
+      checkStarted();
+      fn.processElement(elem);
+    }
+  }
+
+  @Override
+  public void finish() throws Exception {
+    try (StateSampler.ScopedState finish =
+        stateSampler.scopedState(finishState)) {
+      checkStarted();
+      fn.finishBundle();
+      super.finish();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
new file mode 100644
index 0000000000000..a4afa5b2820d2
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
@@ -0,0 +1,521 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+/**
+ * A partial group-by-key operation.
+ */
+public class PartialGroupByKeyOperation extends ReceivingOperation {
+  /**
+   * Provides client-specific operations for grouping keys.
+   */
+  public static interface GroupingKeyCreator<K> {
+    public Object createGroupingKey(K key) throws Exception;
+  }
+
+  /**
+   * Provides client-specific operations for size estimates.
+   */
+  public static interface SizeEstimator<E> {
+    public long estimateSize(E element) throws Exception;
+  }
+
+  /**
+   * Provides client-specific operations for working with elements
+   * that are key/value or key/values pairs.
+   */
+  public interface PairInfo {
+    public Object getKeyFromInputPair(Object pair);
+    public Object getValueFromInputPair(Object pair);
+    public Object makeOutputPair(Object key, Object value);
+  }
+
+  /**
+   * Provides client-specific operations for combining values.
+   */
+  public interface Combiner<K, VI, VA, VO> {
+    public VA createAccumulator(K key);
+    public VA add(K key, VA accumulator, VI value);
+    public VA merge(K key, Iterable<VA> accumulators);
+    public VO extract(K key, VA accumulator);
+  }
+
+  /**
+   * A wrapper around a byte[] that uses structural, value-based
+   * equality rather than byte[]'s normal object identity.
+   */
+  public static class StructuralByteArray {
+    byte[] value;
+
+    public StructuralByteArray(byte[] value) {
+      this.value = value;
+    }
+
+    public byte[] getValue() { return value; }
+
+    @Override
+    public boolean equals(Object o) {
+      if (o instanceof StructuralByteArray) {
+        StructuralByteArray that = (StructuralByteArray) o;
+        return Arrays.equals(this.value, that.value);
+      } else {
+        return false;
+      }
+    }
+
+    @Override
+    public int hashCode() {
+      return Arrays.hashCode(value);
+    }
+
+    @Override
+    public String toString() {
+      return "Val" + Arrays.toString(value);
+    }
+  }
+
+  // By default, how many bytes we allow the grouping table to consume before
+  // it has to be flushed.
+  static final long DEFAULT_MAX_GROUPING_TABLE_BYTES = 100_000_000L;
+
+  // How many bytes a word in the JVM has.
+  static final int BYTES_PER_JVM_WORD = getBytesPerJvmWord();
+
+  /**
+   * The number of bytes of overhead to store an entry in the
+   * grouping table (a {@code HashMap<StructuralByteArray, KeyAndValues>}),
+   * ignoring the actual number of bytes in the keys and values:
+   *
+   * - an array element (1 word),
+   * - a HashMap.Entry (4 words),
+   * - a StructuralByteArray (1 words),
+   * - a backing array (guessed at 1 word for the length),
+   * - a KeyAndValues (2 words),
+   * - an ArrayList (2 words),
+   * - a backing array (1 word),
+   * - per-object overhead (JVM-specific, guessed at 2 words * 6 objects).
+   */
+  static final int PER_KEY_OVERHEAD = 24 * BYTES_PER_JVM_WORD;
+
+  final GroupingTable<Object, Object, Object> groupingTable;
+
+  @SuppressWarnings("unchecked")
+  public PartialGroupByKeyOperation(
+      String operationName,
+      GroupingKeyCreator<?> groupingKeyCreator,
+      SizeEstimator<?> keySizeEstimator, SizeEstimator<?> valueSizeEstimator,
+      PairInfo pairInfo,
+      OutputReceiver[] receivers,
+      String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler) {
+    super(operationName, receivers, counterPrefix, addCounterMutator, stateSampler);
+    groupingTable = new BufferingGroupingTable(
+        DEFAULT_MAX_GROUPING_TABLE_BYTES, groupingKeyCreator,
+        pairInfo, keySizeEstimator, valueSizeEstimator);
+  }
+
+  @SuppressWarnings("unchecked")
+  public PartialGroupByKeyOperation(
+      String operationName,
+      GroupingKeyCreator<?> groupingKeyCreator,
+      SizeEstimator<?> keySizeEstimator, SizeEstimator<?> valueSizeEstimator,
+      double sizeEstimatorSampleRate,
+      PairInfo pairInfo,
+      OutputReceiver[] receivers,
+      String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) {
+    this(operationName, groupingKeyCreator,
+        new SamplingSizeEstimator(keySizeEstimator, sizeEstimatorSampleRate, 1.0),
+        new SamplingSizeEstimator(valueSizeEstimator, sizeEstimatorSampleRate, 1.0),
+        pairInfo, receivers, counterPrefix, addCounterMutator, stateSampler);
+  }
+
+  /** Invoked by tests. */
+  public PartialGroupByKeyOperation(
+      GroupingKeyCreator<?> groupingKeyCreator,
+      SizeEstimator<?> keySizeEstimator, SizeEstimator<?> valueSizeEstimator,
+      PairInfo pairInfo,
+      OutputReceiver outputReceiver,
+      String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler) {
+    this("PartialGroupByKeyOperation", groupingKeyCreator,
+         keySizeEstimator, valueSizeEstimator, pairInfo,
+         new OutputReceiver[]{ outputReceiver },
+         counterPrefix,
+         addCounterMutator,
+         stateSampler);
+  }
+
+  @Override
+  public void process(Object elem) throws Exception {
+    try (StateSampler.ScopedState process =
+        stateSampler.scopedState(processState)) {
+      if (receivers[0] != null) {
+        groupingTable.put(elem, receivers[0]);
+      }
+    }
+  }
+
+  @Override
+  public void finish() throws Exception {
+    try (StateSampler.ScopedState finish =
+        stateSampler.scopedState(finishState)) {
+      checkStarted();
+      if (receivers[0] != null) {
+        groupingTable.flush(receivers[0]);
+      }
+      super.finish();
+    }
+  }
+
+  /**
+   * Sets the maximum amount of memory the grouping table is allowed to
+   * consume before it has to be flushed.
+   */
+  // @VisibleForTesting
+  public void setMaxGroupingTableBytes(long maxSize) {
+    groupingTable.maxSize = maxSize;
+  }
+
+  /**
+   * Returns the amount of memory the grouping table currently consumes.
+   */
+  // @VisibleForTesting
+  public long getGroupingTableBytes() {
+    return groupingTable.size;
+  }
+
+  /**
+   * Returns the number of bytes in a JVM word.  In case we failed to
+   * find the answer, returns 8.
+   */
+  static int getBytesPerJvmWord() {
+    String wordSizeInBits = System.getProperty("sun.arch.data.model");
+    try {
+      return Integer.parseInt(wordSizeInBits) / 8;
+    } catch (NumberFormatException e) {
+      // The JVM word size is unknown.  Assume 64-bit.
+      return 8;
+    }
+  }
+
+  private abstract static class GroupingTable<K, VI, VA> {
+
+    // Keep the table relatively full to increase the chance of collisions.
+    private static final double TARGET_LOAD = 0.9;
+
+    private long maxSize;
+    private final GroupingKeyCreator<? super K> groupingKeyCreator;
+    private final PairInfo pairInfo;
+
+    private long size = 0;
+    private Map<Object, GroupingTableEntry<K, VI, VA>> table;
+
+    public GroupingTable(long maxSize,
+                          GroupingKeyCreator<? super K> groupingKeyCreator,
+                          PairInfo pairInfo) {
+      this.maxSize = maxSize;
+      this.groupingKeyCreator = groupingKeyCreator;
+      this.pairInfo = pairInfo;
+      this.table = new HashMap<>();
+    }
+
+    interface GroupingTableEntry<K, VI, VA> {
+      public K getKey();
+      public VA getValue();
+      public void add(VI value) throws Exception;
+      public long getSize();
+    }
+
+    public abstract GroupingTableEntry<K, VI, VA> createTableEntry(K key) throws Exception;
+
+    /**
+     * Adds a pair to this table, possibly flushing some entries to output
+     * if the table is full.
+     */
+    @SuppressWarnings("unchecked")
+    public void put(Object pair, Receiver receiver) throws Exception {
+      put((K) pairInfo.getKeyFromInputPair(pair),
+          (VI) pairInfo.getValueFromInputPair(pair),
+          receiver);
+    }
+
+    /**
+     * Adds the key and value to this table, possibly flushing some entries
+     * to output if the table is full.
+     */
+    public void put(K key, VI value, Receiver receiver) throws Exception {
+      Object groupingKey = groupingKeyCreator.createGroupingKey(key);
+      GroupingTableEntry<K, VI, VA> entry = table.get(groupingKey);
+      if (entry == null) {
+        entry = createTableEntry(key);
+        table.put(groupingKey, entry);
+        size += PER_KEY_OVERHEAD;
+      } else {
+        size -= entry.getSize();
+      }
+      entry.add(value);
+      size += entry.getSize();
+
+      if (size >= maxSize) {
+        long targetSize = (long) (TARGET_LOAD * maxSize);
+        Iterator<GroupingTableEntry<K, VI, VA>> entries =
+            table.values().iterator();
+        while (size >= targetSize) {
+          if (!entries.hasNext()) {
+            // Should never happen, but sizes may be estimates...
+            size = 0;
+            break;
+          }
+          GroupingTableEntry<K, VI, VA> toFlush = entries.next();
+          entries.remove();
+          size -= toFlush.getSize() + PER_KEY_OVERHEAD;
+          output(toFlush, receiver);
+        }
+      }
+    }
+
+    /**
+     * Output the given entry. Does not actually remove it from the table or
+     * update this table's size.
+     */
+    private void output(GroupingTableEntry<K, VI, VA> entry, Receiver receiver) throws Exception {
+      receiver.process(pairInfo.makeOutputPair(entry.getKey(), entry.getValue()));
+    }
+
+    /**
+     * Flushes all entries in this table to output.
+     */
+    public void flush(Receiver output) throws Exception {
+      for (GroupingTableEntry<K, VI, VA> entry : table.values()) {
+        output(entry, output);
+      }
+      table.clear();
+      size = 0;
+    }
+
+  }
+
+  /**
+   * A grouping table that simply buffers all inserted values in a list.
+   */
+  public static class BufferingGroupingTable<K, V> extends GroupingTable<K, V, List<V>> {
+
+    public final SizeEstimator<? super K> keySizer;
+    public final SizeEstimator<? super V> valueSizer;
+
+    public BufferingGroupingTable(long maxSize,
+                                  GroupingKeyCreator<? super K> groupingKeyCreator,
+                                  PairInfo pairInfo,
+                                  SizeEstimator<? super K> keySizer,
+                                  SizeEstimator<? super V> valueSizer) {
+      super(maxSize, groupingKeyCreator, pairInfo);
+      this.keySizer = keySizer;
+      this.valueSizer = valueSizer;
+    }
+
+    @Override
+    public GroupingTableEntry<K, V, List<V>> createTableEntry(final K key) throws Exception {
+      return new GroupingTableEntry<K, V, List<V>>() {
+        long size = keySizer.estimateSize(key);
+        final List<V> values = new ArrayList<>();
+        public K getKey() { return key; }
+        public List<V> getValue() { return values; }
+        public long getSize() { return size; }
+        public void add(V value) throws Exception {
+          values.add(value);
+          size += BYTES_PER_JVM_WORD + valueSizer.estimateSize(value);
+        }
+      };
+    }
+  }
+
+  /**
+   * A grouping table that uses the given combiner to combine values in place.
+   */
+  public static class CombiningGroupingTable<K, VI, VA> extends GroupingTable<K, VI, VA> {
+
+    private final Combiner<? super K, VI, VA, ?> combiner;
+    private final SizeEstimator<? super K> keySizer;
+    private final SizeEstimator<? super VA> valueSizer;
+
+    public CombiningGroupingTable(long maxSize,
+                                  GroupingKeyCreator<? super K> groupingKeyCreator,
+                                  PairInfo pairInfo,
+                                  Combiner<? super K, VI, VA, ?> combineFn,
+                                  SizeEstimator<? super K> keySizer,
+                                  SizeEstimator<? super VA> valueSizer) {
+      super(maxSize, groupingKeyCreator, pairInfo);
+      this.combiner =  combineFn;
+      this.keySizer = keySizer;
+      this.valueSizer = valueSizer;
+    }
+
+    @Override
+    public GroupingTableEntry<K, VI, VA> createTableEntry(final K key) throws Exception {
+      return new GroupingTableEntry<K, VI, VA>() {
+        final long keySize = keySizer.estimateSize(key);
+        VA accumulator = combiner.createAccumulator(key);
+        long accumulatorSize = 0; // never used before a value is added...
+        public K getKey() { return key; }
+        public VA getValue() { return accumulator; }
+        public long getSize() { return keySize + accumulatorSize; }
+        public void add(VI value) throws Exception {
+          accumulator = combiner.add(key, accumulator, value);
+          accumulatorSize = valueSizer.estimateSize(accumulator);
+        }
+      };
+    }
+  }
+
+
+  ////////////////////////////////////////////////////////////////////////////
+  // Size sampling.
+
+  /**
+   * Implements size estimation by adaptively delegating to an underlying
+   * (potentially more expensive) estimator for some elements and returning
+   * the average value for others.
+   */
+  public static class SamplingSizeEstimator<E> implements SizeEstimator<E> {
+
+    /**
+     * The degree of confidence required in our expected value predictions
+     * before we allow under-sampling.
+     *
+     * <p> The value of 3.0 is a confidence interval of about 99.7% for a
+     * a high-degree-of-freedom t-distribution.
+     */
+    public static final double CONFIDENCE_INTERVAL_SIGMA = 3;
+
+    /**
+     * The desired size of our confidence interval (relative to the measured
+     * expected value).
+     *
+     * <p> The value of 0.25 is plus or minus 25%.
+     */
+    public static final double CONFIDENCE_INTERVAL_SIZE = 0.25;
+
+    /**
+     * Default number of elements that must be measured before elements are skipped.
+     */
+    public static final long DEFAULT_MIN_SAMPLED = 20;
+
+    private final SizeEstimator<E> underlying;
+    private final double minSampleRate;
+    private final double maxSampleRate;
+    private final long minSampled;
+    private final Random random;
+
+    private long totalElements = 0;
+    private long sampledElements = 0;
+    private long sampledSum = 0;
+    private double sampledSumSquares = 0;
+    private long estimate;
+
+    private long nextSample = 0;
+
+    public SamplingSizeEstimator(
+        SizeEstimator<E> underlying,
+        double minSampleRate,
+        double maxSampleRate) {
+      this(underlying, minSampleRate, maxSampleRate, DEFAULT_MIN_SAMPLED, new Random());
+    }
+
+    public SamplingSizeEstimator(SizeEstimator<E> underlying,
+                                 double minSampleRate,
+                                 double maxSampleRate,
+                                 long minSampled,
+                                 Random random) {
+      this.underlying = underlying;
+      this.minSampleRate = minSampleRate;
+      this.maxSampleRate = maxSampleRate;
+      this.minSampled = minSampled;
+      this.random = random;
+    }
+
+    @Override
+    public long estimateSize(E element) throws Exception {
+      if (sampleNow()) {
+        return recordSample(underlying.estimateSize(element));
+      } else {
+        return estimate;
+      }
+    }
+
+    private boolean sampleNow() {
+      totalElements++;
+      return --nextSample < 0;
+    }
+
+    private long recordSample(long value) {
+      sampledElements += 1;
+      sampledSum += value;
+      sampledSumSquares += value * value;
+      estimate = (long) Math.ceil(sampledSum / sampledElements);
+      long target = desiredSampleSize();
+      if (sampledElements < minSampled || sampledElements < target) {
+        // Sample immediately.
+        nextSample = 0;
+      } else {
+        double rate = cap(
+            minSampleRate,
+            maxSampleRate,
+            Math.max(1.0 / (totalElements - minSampled + 1), // slowly ramp down
+                     target / (double) totalElements));      // "future" target
+        // Uses the geometric distribution to return the likely distance between
+        // successive independent trials of a fixed probability p. This gives the
+        // same uniform distribution of branching on Math.random() < p, but with
+        // one random number generation per success rather than one per test,
+        // which can be a significant savings if p is small.
+        nextSample = rate == 1.0
+            ? 0
+            : (long) Math.floor(Math.log(random.nextDouble()) / Math.log(1 - rate));
+      }
+      return value;
+    }
+
+    private static final double cap(double min, double max, double value) {
+      return Math.min(max, Math.max(min, value));
+    }
+
+    private long desiredSampleSize() {
+      // We have no a-priori information on the actual distribution of data
+      // sizes, so compute our desired sample as if it were normal.
+      // Yes this formula is unstable for small stddev, but we only care about large stddev.
+      double mean = sampledSum / (double) sampledElements;
+      double sumSquareDiff =
+          (sampledSumSquares - (2 * mean * sampledSum) + (sampledElements * mean * mean));
+      double stddev = Math.sqrt(sumSquareDiff / (sampledElements - 1));
+      double sqrtDesiredSamples =
+          (CONFIDENCE_INTERVAL_SIGMA * stddev) / (CONFIDENCE_INTERVAL_SIZE * mean);
+      return (long) Math.ceil(sqrtDesiredSamples * sqrtDesiredSamples);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTracker.java
new file mode 100644
index 0000000000000..fd26caa31e69b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTracker.java
@@ -0,0 +1,38 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+/**
+ * Provides an interface to an object capable of tracking progress through a
+ * collection of elements to be processed.
+ *
+ * @param <T> the type of elements being tracked
+ */
+public interface ProgressTracker<T> {
+  /**
+   * Copies this {@link ProgressTracker}.  The copied tracker will maintain its
+   * own independent notion of the caller's progress through the collection of
+   * elements being processed.
+   */
+  public ProgressTracker<T> copy();
+
+  /**
+   * Reports an element to this {@link ProgressTracker}, as the element is about
+   * to be processed.
+   */
+  public void saw(T element);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackerGroup.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackerGroup.java
new file mode 100644
index 0000000000000..7ed370f16770d
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackerGroup.java
@@ -0,0 +1,71 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+/**
+ * Implements a group of linked
+ * {@link ProgressTracker ProgressTrackers} which
+ * collectively track how far a processing loop has gotten through the elements
+ * it's processing.  Individual {@code ProgressTracker} instances may be copied,
+ * capturing an independent view of the progress of the system; this turns out
+ * to be useful for some non-trivial processing loops.  The furthest point
+ * reached by any {@code ProgressTracker} is the one reported.
+ *
+ * <p>This class is abstract.  Its single extension point is {@link #report},
+ * which should be overriden to provide a function which handles the reporting
+ * of the supplied element, as appropriate.
+ *
+ * @param <T> the type of elements being tracked
+ */
+public abstract class ProgressTrackerGroup<T> {
+  // TODO: Instead of an abstract class, strongly consider adding an
+  // interface like Receiver to the SDK, so that this class can be final and all
+  // that good stuff.
+  private long nextIndexToReport = 0;
+
+  public ProgressTrackerGroup() {}
+
+  public final ProgressTracker<T> start() {
+    return new Tracker(0);
+  }
+
+  /** Reports the indicated element. */
+  protected abstract void report(T element);
+
+  private final class Tracker implements ProgressTracker<T> {
+    private long nextElementIndex;
+
+    private Tracker(long nextElementIndex) {
+      this.nextElementIndex = nextElementIndex;
+    }
+
+    @Override
+    public ProgressTracker<T> copy() {
+      return new Tracker(nextElementIndex);
+    }
+
+    @Override
+    public void saw(T element) {
+      long thisElementIndex = nextElementIndex;
+      nextElementIndex++;
+      if (thisElementIndex == nextIndexToReport) {
+        nextIndexToReport = nextElementIndex;
+        report(element);
+      }
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackingReiterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackingReiterator.java
new file mode 100644
index 0000000000000..8d5d43fa74889
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackingReiterator.java
@@ -0,0 +1,57 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import com.google.cloud.dataflow.sdk.util.common.ForwardingReiterator;
+import com.google.cloud.dataflow.sdk.util.common.Reiterator;
+
+/**
+ * Implements a {@link Reiterator} which uses a
+ * {@link ProgressTrackerGroup.Tracker ProgressTracker} to track how far
+ * it's gotten through some base {@code Reiterator}.
+ * {@link ProgressTrackingReiterator#copy} copies the {@code ProgressTracker},
+ * allowing for an independent progress state.
+ *
+ * @param <T> the type of the elements of this iterator
+ */
+public final class ProgressTrackingReiterator<T>
+    extends ForwardingReiterator<T> {
+  private ProgressTracker<T> tracker;
+
+  public ProgressTrackingReiterator(Reiterator<T> base,
+                                    ProgressTracker<T> tracker) {
+    super(base);
+    this.tracker = checkNotNull(tracker);
+  }
+
+  @Override
+  public T next() {
+    T result = super.next();
+    tracker.saw(result);
+    return result;
+  }
+
+  @Override
+  protected ProgressTrackingReiterator<T> clone() {
+    ProgressTrackingReiterator<T> result =
+        (ProgressTrackingReiterator<T>) super.clone();
+    result.tracker = tracker.copy();
+    return result;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
new file mode 100644
index 0000000000000..1930e0e61aaad
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -0,0 +1,233 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.common.base.Preconditions;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Observable;
+import java.util.Observer;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicReference;
+
+/**
+ * A read operation.
+ *
+ * Its start() method iterates through all elements of the source
+ * and emits them on its output.
+ */
+public class ReadOperation extends Operation {
+  private static final Logger LOG = LoggerFactory.getLogger(ReadOperation.class);
+  private static final long DEFAULT_PROGRESS_UPDATE_PERIOD_MS = TimeUnit.SECONDS.toMillis(1);
+
+  /** The Source this operation reads from. */
+  public final Source<?> source;
+
+  /** The total byte counter for all data read by this operation. */
+  final Counter<Long> byteCount;
+
+  /** StateSampler state for advancing the SourceIterator. */
+  private final int readState;
+
+  /**
+   * The Source's reader this operation reads from, created by start().
+   * Guarded by sourceIteratorLock.
+   */
+  volatile Source.SourceIterator<?> sourceIterator = null;
+  private final Object sourceIteratorLock = new Object();
+
+  /**
+   * A cache of sourceIterator.getProgress() updated inside the read loop at a bounded rate.
+   * <p>
+   * Necessary so that ReadOperation.getProgress() can return immediately, rather than potentially
+   * wait for a read to complete (which can take an unbounded time, delay a worker progress update,
+   * and cause lease expiration and all sorts of trouble).
+   */
+  private AtomicReference<Source.Progress> progress = new AtomicReference<>();
+
+  /**
+   * On every iteration of the read loop, "progress" is fetched from sourceIterator if requested.
+   */
+  private long progressUpdatePeriodMs = DEFAULT_PROGRESS_UPDATE_PERIOD_MS;
+
+  /**
+   * Signals whether the next iteration of the read loop should update the progress.
+   * Set to true every progressUpdatePeriodMs.
+   */
+  private AtomicBoolean isProgressUpdateRequested = new AtomicBoolean(true);
+
+
+  public ReadOperation(String operationName, Source<?> source, OutputReceiver[] receivers,
+      String counterPrefix, CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler) {
+    super(operationName, receivers, counterPrefix, addCounterMutator, stateSampler);
+    this.source = source;
+    this.byteCount = addCounterMutator.addCounter(
+        Counter.longs(bytesCounterName(counterPrefix, operationName), SUM));
+    readState = stateSampler.stateForName(operationName + "-read");
+  }
+
+  /** Invoked by tests. */
+  ReadOperation(Source<?> source, OutputReceiver outputReceiver, String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) {
+    this("ReadOperation", source, new OutputReceiver[] {outputReceiver}, counterPrefix,
+         addCounterMutator, stateSampler);
+  }
+
+  /**
+   * Invoked by tests. A value of 0 means "update progress on each iteration".
+   */
+  void setProgressUpdatePeriodMs(long millis) {
+    Preconditions.checkArgument(millis >= 0, "Progress update period must be non-negative");
+    progressUpdatePeriodMs = millis;
+  }
+
+  protected String bytesCounterName(String counterPrefix, String operationName) {
+    return operationName + "-ByteCount";
+  }
+
+  public Source<?> getSource() {
+    return source;
+  }
+
+  @Override
+  public void start() throws Exception {
+    try (StateSampler.ScopedState start = stateSampler.scopedState(startState)) {
+      super.start();
+      runReadLoop();
+    }
+  }
+
+  protected void runReadLoop() throws Exception {
+    Receiver receiver = receivers[0];
+    if (receiver == null) {
+      // No consumer of this data; don't do anything.
+      return;
+    }
+
+    source.addObserver(new SourceObserver());
+
+    try (StateSampler.ScopedState process = stateSampler.scopedState(processState)) {
+      synchronized (sourceIteratorLock) {
+        sourceIterator = source.iterator();
+      }
+
+      // TODO: Consider using the ExecutorService from PipelineOptions instead.
+      Thread updateRequester = new Thread() {
+        @Override
+        public void run() {
+          while (true) {
+            isProgressUpdateRequested.set(true);
+            try {
+              Thread.sleep(progressUpdatePeriodMs);
+            } catch (InterruptedException e) {
+              break;
+            }
+          }
+        }
+      };
+      if (progressUpdatePeriodMs != 0) {
+        updateRequester.start();
+      }
+
+      try {
+        // Force a progress update at the beginning and at the end.
+        synchronized (sourceIteratorLock) {
+          progress.set(sourceIterator.getProgress());
+        }
+        while (true) {
+          Object value;
+          // Stop position update request comes concurrently.
+          // Accesses to iterator need to be synchronized.
+          try (StateSampler.ScopedState read = stateSampler.scopedState(readState)) {
+            synchronized (sourceIteratorLock) {
+              if (!sourceIterator.hasNext()) {
+                break;
+              }
+              value = sourceIterator.next();
+
+              if (isProgressUpdateRequested.getAndSet(false) || progressUpdatePeriodMs == 0) {
+                progress.set(sourceIterator.getProgress());
+              }
+            }
+          }
+          receiver.process(value);
+        }
+        synchronized (sourceIteratorLock) {
+          progress.set(sourceIterator.getProgress());
+        }
+      } finally {
+        synchronized (sourceIteratorLock) {
+          sourceIterator.close();
+        }
+        if (progressUpdatePeriodMs != 0) {
+          updateRequester.interrupt();
+          updateRequester.join();
+        }
+      }
+    }
+  }
+
+  /**
+   * Returns a (possibly slightly stale) value of the progress of the task.
+   * Guaranteed to not block indefinitely.
+   *
+   * @return the task progress, or {@code null} if the source iterator has not
+   * been initialized
+   */
+  public Source.Progress getProgress() {
+    return progress.get();
+  }
+
+  /**
+   * Relays the request to update the stop position to {@code SourceIterator}.
+   *
+   * @param proposedStopPosition the proposed stop position
+   * @return the new stop position updated in {@code SourceIterator}, or
+   * {@code null} if the source iterator has not been initialized
+   */
+  public Source.Position proposeStopPosition(Source.Progress proposedStopPosition) {
+    synchronized (sourceIteratorLock) {
+      if (sourceIterator == null) {
+        LOG.warn("Iterator has not been initialized, returning null stop position.");
+        return null;
+      }
+      return sourceIterator.updateStopPosition(proposedStopPosition);
+    }
+  }
+
+  /**
+   * This is an observer on the instance of the source. Whenever source reads
+   * an element, update() gets called with the byte size of the element, which
+   * gets added up into the ReadOperation's byte counter.
+   */
+  private class SourceObserver implements Observer {
+    @Override
+    public void update(Observable obs, Object obj) {
+      Preconditions.checkArgument(obs == source, "unexpected observable" + obs);
+      Preconditions.checkArgument(obj instanceof Long, "unexpected parameter object: " + obj);
+      byteCount.addValue((long) obj);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Receiver.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Receiver.java
new file mode 100644
index 0000000000000..f772ee4c24453
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Receiver.java
@@ -0,0 +1,27 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+/**
+ * Abstract interface of things that accept inputs one at a time via process().
+ */
+public interface Receiver {
+  /**
+   * Processes the element.
+   */
+  void process(Object outputElem) throws Exception;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java
new file mode 100644
index 0000000000000..60deea53fa9c5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java
@@ -0,0 +1,45 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+
+/**
+ * The abstract base class for Operations that have inputs and
+ * implement process().
+ */
+public abstract class ReceivingOperation extends Operation implements Receiver {
+
+  public ReceivingOperation(String operationName,
+                            OutputReceiver[] receivers,
+                            String counterPrefix,
+                            CounterSet.AddCounterMutator addCounterMutator,
+                            StateSampler stateSampler) {
+    super(operationName, receivers,
+          counterPrefix, addCounterMutator, stateSampler);
+  }
+
+  /**
+   * Adds an input to this Operation, coming from the given
+   * output of the given source Operation.
+   */
+  public void attachInput(Operation source, int outputNum) {
+    checkUnstarted();
+    OutputReceiver fanOut = source.receivers[outputNum];
+    fanOut.addOutput(this);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleBatchReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleBatchReader.java
new file mode 100644
index 0000000000000..f5102dd14e059
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleBatchReader.java
@@ -0,0 +1,61 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import java.io.IOException;
+import java.util.List;
+
+import javax.annotation.Nullable;
+
+/**
+ * ShuffleBatchReader provides an interface for reading a batch of
+ * key/value entries from a shuffle dataset.
+ */
+public interface ShuffleBatchReader {
+  /** The result returned by #read. */
+  public static class Batch {
+    public final List<ShuffleEntry> entries;
+    @Nullable public final ShufflePosition nextStartPosition;
+
+    public Batch(List<ShuffleEntry> entries,
+                 @Nullable ShufflePosition nextStartPosition) {
+      this.entries = entries;
+      this.nextStartPosition = nextStartPosition;
+    }
+  }
+
+  /**
+   * Reads a batch of data from a shuffle dataset.
+   *
+   * @param startPosition encodes the initial key from where to read.
+   * This parameter may be null, indicating that the read should start
+   * with the first key in the dataset.
+   *
+   * @param endPosition encodes the key "just past" the end of the
+   * range to be read; keys up to endPosition will be returned, but
+   * keys equal to or greater than endPosition will not.  This
+   * parameter may be null, indicating that the read should end just
+   * past the last key in the dataset (that is, the last key in the
+   * dataset will be included in the read, as long as that key is
+   * greater than or equal to startPosition).
+   *
+   * @return the first {@link Batch} of entries
+   */
+  public Batch read(@Nullable ShufflePosition startPosition,
+                    @Nullable ShufflePosition endPosition)
+      throws IOException;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntry.java
new file mode 100644
index 0000000000000..750c3ac5c71c3
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntry.java
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import java.util.Arrays;
+
+/**
+ * Entry written to/read from a shuffle dataset.
+ */
+public class ShuffleEntry {
+  final byte[] position;
+  final byte[] key;
+  final byte[] secondaryKey;
+  final byte[] value;
+
+  public ShuffleEntry(byte[] key, byte[] secondaryKey, byte[] value) {
+    this.position = null;
+    this.key = key;
+    this.secondaryKey = secondaryKey;
+    this.value = value;
+  }
+
+  public ShuffleEntry(byte[] position, byte[] key, byte[] secondaryKey,
+      byte[] value) {
+    this.position = position;
+    this.key = key;
+    this.secondaryKey = secondaryKey;
+    this.value = value;
+  }
+
+  public byte[] getPosition() {
+    return position;
+  }
+
+  public byte[] getKey() {
+    return key;
+  }
+
+  public byte[] getSecondaryKey() {
+    return secondaryKey;
+  }
+
+  public byte[] getValue() {
+    return value;
+  }
+
+  public int length() {
+    return (position == null ? 0 : position.length)
+        + (key == null ? 0 : key.length)
+        + (secondaryKey == null ? 0 : secondaryKey.length)
+        + (value == null ? 0 : value.length);
+  }
+
+  @Override
+  public String toString() {
+    return "ShuffleEntry("
+        + byteArrayToString(position) + ","
+        + byteArrayToString(key) + ","
+        + byteArrayToString(secondaryKey) + ","
+        + byteArrayToString(value) + ")";
+  }
+
+  public static String byteArrayToString(byte[] bytes) {
+    // TODO: Use a more compact and readable representation,
+    // particularly for (nearly-)ascii keys and values.
+    return Arrays.toString(bytes);
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o instanceof ShuffleEntry) {
+      ShuffleEntry that = (ShuffleEntry) o;
+      return (this.position == null ? that.position == null
+              : Arrays.equals(this.position, that.position))
+          && (this.key == null ? that.key == null
+              : Arrays.equals(this.key, that.key))
+          && (this.secondaryKey == null ? that.secondaryKey == null
+              : Arrays.equals(this.secondaryKey, that.secondaryKey))
+          && (this.value == null ? that.value == null
+              : Arrays.equals(this.value, that.value));
+    }
+    return false;
+  }
+
+  @Override
+  public int hashCode() {
+    return getClass().hashCode()
+        + (position == null ? 0 : Arrays.hashCode(position))
+        + (key == null ? 0 : Arrays.hashCode(key))
+        + (secondaryKey == null ? 0 : Arrays.hashCode(secondaryKey))
+        + (value == null ? 0 : Arrays.hashCode(value));
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryReader.java
new file mode 100644
index 0000000000000..bbc5f47a4b8ce
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryReader.java
@@ -0,0 +1,50 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import com.google.cloud.dataflow.sdk.util.common.Reiterator;
+
+import javax.annotation.Nullable;
+import javax.annotation.concurrent.NotThreadSafe;
+
+/**
+ * ShuffleEntryReader provides an interface for reading key/value
+ * entries from a shuffle dataset.
+ */
+@NotThreadSafe
+public interface ShuffleEntryReader {
+  /**
+   * Returns an iterator which reads a range of entries from a shuffle dataset.
+   *
+   * @param startPosition encodes the initial key from where to read.
+   * This parameter may be null, indicating that the read should start
+   * with the first key in the dataset.
+   *
+   * @param endPosition encodes the key "just past" the end of the
+   * range to be read; keys up to endPosition will be returned, but
+   * keys equal to or greater than endPosition will not.  This
+   * parameter may be null, indicating that the read should end just
+   * past the last key in the dataset (that is, the last key in the
+   * dataset will be included in the read, as long as that key is
+   * greater than or equal to startPosition).
+   *
+   * @return a {@link Reiterator} over the requested range of entries.
+   */
+  public Reiterator<ShuffleEntry> read(
+      @Nullable ShufflePosition startPosition,
+      @Nullable ShufflePosition endPosition);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShufflePosition.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShufflePosition.java
new file mode 100644
index 0000000000000..c512269a49506
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShufflePosition.java
@@ -0,0 +1,23 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+/**
+ * Represents a position in a stream of ShuffleEntries.
+ */
+public interface ShufflePosition {
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
new file mode 100644
index 0000000000000..829fd1a391535
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
@@ -0,0 +1,47 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import java.io.IOException;
+
+/**
+ * Abstract base class for Sinks.
+ *
+ * <p> A Sink is written to by getting a SinkWriter and adding values to
+ * it.
+ *
+ * @param <T> the type of the elements written to the sink
+ */
+public abstract class Sink<T> {
+  /**
+   * Returns a Writer that allows writing to this Sink.
+   */
+  public abstract SinkWriter<T> writer() throws IOException;
+
+  /**
+   * Writes to a Sink.
+   */
+  public interface SinkWriter<E> extends AutoCloseable {
+    /**
+     * Adds a value to the sink. Returns the size in bytes of the data written.
+     */
+    public long add(E value) throws IOException;
+
+    @Override
+    public void close() throws IOException;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Source.java
new file mode 100644
index 0000000000000..d50b93dc54193
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Source.java
@@ -0,0 +1,157 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import java.io.IOException;
+import java.util.NoSuchElementException;
+import java.util.Observable;
+
+/**
+ * Abstract base class for Sources.
+ *
+ * <p> A Source is read from by getting an Iterator-like value and
+ * iterating through it.
+ *
+ * @param <T> the type of the elements read from the source
+ */
+public abstract class Source<T> extends Observable {
+  /**
+   * Returns a SourceIterator that allows reading from this source.
+   */
+  public abstract SourceIterator<T> iterator() throws IOException;
+
+  /**
+   * A stateful iterator over the data in a Source.
+   */
+  public interface SourceIterator<T> extends AutoCloseable {
+    /**
+     * Returns whether the source has any more elements. Some sources,
+     * such as GroupingShuffleSource, invalidate the return value of
+     * the previous next() call during the call to hasNext().
+     */
+    public boolean hasNext() throws IOException;
+
+    /**
+     * Returns the next element.
+     *
+     * @throws NoSuchElementException if there are no more elements
+     */
+    public T next() throws IOException;
+
+    /**
+     * Copies the current SourceIterator.
+     *
+     * @throws UnsupportedOperationException if the particular implementation
+     * does not support copy
+     * @throws IOException if copying the iterator involves IO that fails
+     */
+    public SourceIterator<T> copy() throws IOException;
+
+    @Override
+    public void close() throws IOException;
+
+    /**
+     * Returns a representation of how far this iterator is through the source.
+     *
+     * <p> This method is not required to be thread-safe, and it will not be
+     * called concurrently to any other methods.
+     *
+     * @return the progress, or {@code null} if no progress measure
+     * can be provided
+     */
+    public Progress getProgress();
+
+    /**
+     * Attempts to update the stop position of the task with the proposed stop
+     * position and returns the actual new stop position.
+     *
+     * <p> If the source finds the proposed one is not a convenient position to
+     * stop, it can pick a different stop position. The {@code SourceIterator}
+     * should start returning {@code false} from {@code hasNext()} once it has
+     * passed its stop position. Subsequent stop position updates must be in
+     * non-increasing order within a task.
+     *
+     * <p> This method is not required to be thread-safe, and it will not be
+     * called concurrently to any other methods.
+     *
+     * @param proposedStopPosition a proposed position to stop
+     * iterating through the source
+     * @return the new stop position, or {@code null} on failure if the
+     * implementation does not support position updates.
+     */
+    public Position updateStopPosition(Progress proposedStopPosition);
+  }
+
+  /** An abstract base class for SourceIterator implementations. */
+  public abstract static class AbstractSourceIterator<T>
+      implements SourceIterator<T> {
+    @Override
+    public SourceIterator<T> copy() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void close() throws IOException {
+      // By default, nothing is needed for close.
+    }
+
+    @Override
+    public Progress getProgress() {
+      return null;
+    }
+
+    @Override
+    public Position updateStopPosition(Progress proposedStopPosition) {
+      return null;
+    }
+  }
+
+  /**
+   * A representation of how far a {@code SourceIterator} is through a
+   * {@code Source}.
+   *
+   * <p> The common worker framework does not interpret instances of
+   * this interface.  But a tool-specific framework can make assumptions
+   * about the implementation, and so the concrete Source subclasses used
+   * by a tool-specific framework should match.
+   */
+  public interface Progress {
+  }
+
+  /**
+   * A representation of a position in an iteration through a
+   * {@code Source}.
+   *
+   * <p> See the comment on {@link Progress} for how instances of this
+   * interface are used by the rest of the framework.
+   */
+  public interface Position {
+  }
+
+  /**
+   * Utility method to notify observers about a new element, which has
+   * been read by this Source, and its size in bytes. Normally, there
+   * is only one observer, which is a ReadOperation that encapsules
+   * this Source. Derived classes must call this method whenever they
+   * read additional data, even if that element may never be returned
+   * from the corresponding source iterator.
+   */
+  protected void notifyElementRead(long byteSize) {
+    setChanged();
+    notifyObservers(byteSize);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
new file mode 100644
index 0000000000000..91d90e9d2a05f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
@@ -0,0 +1,279 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+
+import java.util.AbstractMap.SimpleEntry;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Random;
+import java.util.Timer;
+import java.util.TimerTask;
+
+/**
+ * A StateSampler object may be used to obtain an approximate
+ * breakdown of the time spent by an execution context in various
+ * states, as a fraction of the total time.  The sampling is taken at
+ * regular intervals, with adjustment for scheduling delay.
+ *
+ * <p> Thread-safe.
+ */
+public class StateSampler extends TimerTask implements AutoCloseable {
+  private final String prefix;
+  private CounterSet.AddCounterMutator counterSetMutator;
+  // Sampling period of internal Timer (thread).
+  public final long samplingPeriodMs;
+  public static final int DO_NOT_SAMPLE = -1;
+  public static final long DEFAULT_SAMPLING_PERIOD_MS = 200;
+  // Array of counters indexed by their state.
+  private ArrayList<Counter<Long>> countersByState = new ArrayList<>();
+  // Map of state name to state.
+  private HashMap<String, Integer> statesByName = new HashMap<>();
+  // The current state.
+  private int currentState;
+  // The timestamp corresponding to the last state change or the last
+  // time the current state was sampled (and recorded).
+  private long stateTimestamp = 0;
+
+  // When sampling this state, a stack trace is also logged.
+  private int stateToSampleThreadStacks = DO_NOT_SAMPLE;
+  // The thread that performed the last state transition.
+  private Thread sampledThread = null;
+  // The frequency with which the stack traces are logged, with respect
+  // to the sampling period.
+  private static final int SAMPLE_THREAD_STACK_FREQ = 10;
+  private int sampleThreadStackFreq = 0;
+
+  // Using a fixed number of timers for all StateSampler objects.
+  private static final int NUM_TIMER_THREADS = 16;
+  // The timers is used for periodically sampling the states.
+  private static Timer[] timers = new Timer[NUM_TIMER_THREADS];
+  static {
+    for (int i = 0; i < timers.length; ++i) {
+      timers[i] = new Timer("StateSampler_" + i, true /* is daemon */);
+    }
+  }
+
+  /**
+   * Constructs a new {@link StateSampler} that can be used to obtain
+   * an approximate breakdown of the time spent by an execution
+   * context in various states, as a fraction of the total time.
+   *
+   * @param prefix the prefix of the counter names for the states
+   * @param counterSetMutator the {@link CounterSet.AddCounterMutator}
+   * used to create a counter for each distinct state
+   * @param samplingPeriodMs the sampling period in milliseconds
+   */
+  public StateSampler(String prefix,
+                      CounterSet.AddCounterMutator counterSetMutator,
+                      long samplingPeriodMs) {
+    this.prefix = prefix;
+    this.counterSetMutator = counterSetMutator;
+    this.samplingPeriodMs = samplingPeriodMs;
+    currentState = DO_NOT_SAMPLE;
+    Random rand = new Random();
+    int initialDelay = rand.nextInt((int) samplingPeriodMs);
+    timers[rand.nextInt(NUM_TIMER_THREADS)].scheduleAtFixedRate(
+        this, initialDelay, samplingPeriodMs);
+    stateTimestamp = System.currentTimeMillis();
+  }
+
+  /**
+   * Constructs a new {@link StateSampler} that can be used to obtain
+   * an approximate breakdown of the time spent by an execution
+   * context in various states, as a fraction of the total time.
+   *
+   * @param prefix the prefix of the counter names for the states
+   * @param counterSetMutator the {@link CounterSet.AddCounterMutator}
+   * used to create a counter for each distinct state
+   */
+  public StateSampler(String prefix,
+                      CounterSet.AddCounterMutator counterSetMutator) {
+    this(prefix, counterSetMutator, DEFAULT_SAMPLING_PERIOD_MS);
+  }
+
+  private void printStackTrace(Thread thread) {
+    System.out.println("Sampled stack trace:");
+    StackTraceElement[] stack = thread.getStackTrace();
+    for (StackTraceElement elem : stack) {
+      System.out.println("\t" + elem.toString());
+    }
+  }
+
+  /**
+   * Selects a state for which the thread stacks will also be logged
+   * during the sampling.  Useful for debugging.
+   *
+   * @param state name of the selected state
+   */
+  public synchronized void setStateToSampleThreadStacks(int state) {
+    stateToSampleThreadStacks = state;
+  }
+
+  @Override
+  public synchronized void run() {
+    long now = System.currentTimeMillis();
+    if (currentState != DO_NOT_SAMPLE) {
+      countersByState.get(currentState).addValue(now - stateTimestamp);
+      if (sampledThread != null
+          && currentState == stateToSampleThreadStacks
+          && ++sampleThreadStackFreq >= SAMPLE_THREAD_STACK_FREQ) {
+        printStackTrace(sampledThread);
+        sampleThreadStackFreq = 0;
+      }
+    }
+    stateTimestamp = now;
+  }
+
+  @Override
+  public void close() {
+    this.cancel();  // cancel the TimerTask
+  }
+
+  /**
+   * Returns the state associated with a name; creating a new state if
+   * necessary. Using states instead of state names during state
+   * transitions is done for efficiency.
+   *
+   * @name the name for the state
+   * @return the state associated with the state name
+   */
+  public int stateForName(String name) {
+    if (name.isEmpty()) {
+      return DO_NOT_SAMPLE;
+    }
+
+    String counterName = prefix + name + "-msecs";
+    synchronized (this) {
+      Integer state = statesByName.get(counterName);
+      if (state == null) {
+        Counter<Long> counter = counterSetMutator.addCounter(
+            Counter.longs(counterName, Counter.AggregationKind.SUM));
+        state = countersByState.size();
+        statesByName.put(name, state);
+        countersByState.add(counter);
+      }
+      return state;
+    }
+  }
+
+  /**
+   * Sets the current thread state.
+   *
+   * @param state the new state to transition to
+   * @return the previous state
+   */
+  public synchronized int setState(int state) {
+    // TODO: investigate whether this can be made cheaper, (e.g.,
+    // using atomic operations).
+    int previousState = currentState;
+    currentState = state;
+    if (stateToSampleThreadStacks != DO_NOT_SAMPLE) {
+      sampledThread = Thread.currentThread();
+    }
+    return previousState;
+  }
+
+  /**
+   * Sets the current thread state.
+   *
+   * @param name the name of the new state to transition to
+   * @return the previous state
+   */
+  public synchronized int setState(String name) {
+    return setState(stateForName(name));
+  }
+
+  /**
+   *  Returns a tuple consisting of the current state and duration.
+   *
+   * @return a {@link Map.Entry} entry with current state and duration
+   */
+  public synchronized Map.Entry<String, Long> getCurrentStateAndDuration() {
+    if (currentState == DO_NOT_SAMPLE) {
+      return new SimpleEntry<>("", 0L);
+    }
+
+    Counter<Long> counter = countersByState.get(currentState);
+    return new SimpleEntry<>(counter.getName(),
+        counter.getAggregate(false)
+        + System.currentTimeMillis() - stateTimestamp);
+  }
+
+  /**
+   * Get the duration for a given state.
+   *
+   * @param state the state whose duration is returned
+   * @return the duration of a given state
+   */
+  public synchronized long getStateDuration(int state) {
+    Counter<Long> counter = countersByState.get(state);
+    return counter.getAggregate(false)
+        + (state == currentState
+            ? System.currentTimeMillis() - stateTimestamp : 0);
+  }
+
+  /**
+   * Returns an AutoCloseable {@link ScopedState} that will perform a
+   * state transition to the given state, and will automatically reset
+   * the state to the prior state upon closing.
+   *
+   * @param state the new state to transition to
+   * @return a {@link ScopedState} that automatically resets the state
+   * to the prior state
+   */
+  public synchronized ScopedState scopedState(int state) {
+    return new ScopedState(this, setState(state));
+  }
+
+  /**
+   * Returns an AutoCloseable {@link ScopedState} that will perform a
+   * state transition to the given state, and will automatically reset
+   * the state to the prior state upon closing.
+   *
+   * @param stateName the name of the new state
+   * @return a {@link ScopedState} that automatically resets the state
+   * to the prior state
+   */
+  public synchronized ScopedState scopedState(String stateName) {
+    return new ScopedState(this, setState(stateName));
+  }
+
+  /**
+   * A nested class that is used to account for states and state
+   * transitions based on lexical scopes.
+   *
+   * <p> Thread-safe.
+   */
+  public class ScopedState implements AutoCloseable {
+    private StateSampler sampler;
+    private int previousState;
+
+    private ScopedState(StateSampler sampler, int previousState) {
+      this.sampler = sampler;
+      this.previousState = previousState;
+    }
+
+    @Override
+    public void close() {
+      sampler.setState(previousState);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
new file mode 100644
index 0000000000000..63270b682ebce
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
@@ -0,0 +1,99 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.Metric;
+import com.google.cloud.dataflow.sdk.util.common.Metric.DoubleMetric;
+
+import com.sun.management.OperatingSystemMXBean;
+
+import java.lang.management.ManagementFactory;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * Abstract executor for WorkItem tasks.
+ */
+public abstract class WorkExecutor implements AutoCloseable {
+  /** The output counters for this task. */
+  private final CounterSet outputCounters;
+
+  /**
+   * OperatingSystemMXBean for reporting CPU usage.
+   *
+   * Uses com.sun.management.OperatingSystemMXBean instead of
+   * java.lang.management.OperatingSystemMXBean because the former supports
+   * getProcessCpuLoad().
+   */
+  private final OperatingSystemMXBean os;
+
+  /**
+   * Constructs a new WorkExecutor task.
+   */
+  public WorkExecutor(CounterSet outputCounters) {
+    this.outputCounters = outputCounters;
+    this.os =
+        (OperatingSystemMXBean) ManagementFactory.getOperatingSystemMXBean();
+  }
+
+  /**
+   * Returns the set of output counters for this task.
+   */
+  public CounterSet getOutputCounters() {
+    return outputCounters;
+  }
+
+  /**
+   * Returns a collection of output metrics for this task.
+   */
+  public Collection<Metric<?>> getOutputMetrics() {
+    List<Metric<?>> outputMetrics = new ArrayList<>();
+    outputMetrics.add(new DoubleMetric("CPU", os.getProcessCpuLoad()));
+    // More metrics as needed.
+    return outputMetrics;
+  }
+
+  /**
+   * Executes the task.
+   */
+  public abstract void execute() throws Exception;
+
+  /**
+   * Returns the worker's current progress.
+   */
+  public Source.Progress getWorkerProgress() throws Exception {
+    // By default, return null indicating worker progress not available.
+    return null;
+  }
+
+  /**
+   * Proposes that the worker changes the stop position for the current work.
+   * Returns the new position if accepted, otherwise {@code null}.
+   */
+  public Source.Position proposeStopPosition(
+      Source.Progress proposedStopPosition) throws Exception {
+    // By default, returns null indicating that no task splitting happens.
+    return null;
+  }
+
+  @Override
+  public void close() throws Exception {
+    // By default, nothing to close or shut down.
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
new file mode 100644
index 0000000000000..c5222eb04a2f1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
@@ -0,0 +1,239 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import javax.annotation.concurrent.NotThreadSafe;
+
+/**
+ * WorkProgressUpdater allows a work executor to send work progress
+ * updates to the worker service. The life-cycle of the
+ * WorkProgressUpdater is controlled externally through its
+ * {@link #startReportingProgress()} and
+ * {@link #stopReportingProgress()} methods. The updater queries the
+ * worker for progress updates and sends the updates to the worker
+ * service.  The interval between two consecutive updates is
+ * controlled by the worker service through reporting interval hints
+ * sent back in the update response messages.  To avoid update storms
+ * and monitoring staleness, the interval between two consecutive
+ * updates is also bound by {@link #MIN_REPORTING_INTERVAL_MILLIS} and
+ * {@link #MAX_REPORTING_INTERVAL_MILLIS}.
+ */
+@NotThreadSafe
+public abstract class WorkProgressUpdater {
+  private static final Logger LOG = LoggerFactory.getLogger(WorkProgressUpdater.class);
+
+  /** The default lease duration to request from the external worker service. */
+  private static final long DEFAULT_LEASE_DURATION_MILLIS = 3 * 60 * 1000;
+
+  /** The lease renewal RPC latency margin. */
+  private static final long LEASE_RENEWAL_LATENCY_MARGIN = Long.valueOf(
+      System.getProperty("worker_lease_renewal_latency_margin", "5000"));
+
+  /**
+   * The minimum period between two consecutive progress updates. Ensures the
+   * {@link WorkProgressUpdater} does not generate update storms.
+   */
+  private static final long MIN_REPORTING_INTERVAL_MILLIS = Long.valueOf(
+      System.getProperty("minimum_worker_update_interval_millis", "5000"));
+
+  /**
+   * The maximum period between two consecutive progress updates. Ensures the
+   * {@link WorkProgressUpdater} does not cause monitoring staleness.
+   */
+  private static final long MAX_REPORTING_INTERVAL_MILLIS = 10 * 60 * 1000;
+
+  /** Worker providing the work progress updates. */
+  protected final WorkExecutor worker;
+
+  /** Executor used to schedule work progress updates. */
+  private final ScheduledExecutorService executor;
+
+  /** The lease duration to request from the external worker service. */
+  protected long requestedLeaseDurationMs;
+
+  /** The time period until the next work progress update. */
+  protected long progressReportIntervalMs;
+
+  /**
+   * The stop position to report to the service in the next progress update,
+   * or {@code null} if there is nothing to report.
+   * In cases that there is no split request from service, or worker failed to
+   * split in response to the last received split request, the task stop
+   * position implicitly stays the same as it was before that last request
+   * (as a result of a prior split request), and on the next reportProgress
+   * we'll send the {@code null} as a stop position update, which is a no-op
+   * for the service.
+   */
+  protected Source.Position stopPositionToService;
+
+  public WorkProgressUpdater(WorkExecutor worker) {
+    this.worker = worker;
+    this.executor = Executors.newSingleThreadScheduledExecutor(
+        new ThreadFactoryBuilder()
+        .setDaemon(true)
+        .setNameFormat("WorkProgressUpdater-%d")
+        .build());
+  }
+
+  /**
+   * Starts sending work progress updates to the worker service.
+   */
+  public void startReportingProgress() {
+    // Send the initial work progress report half-way through the lease
+    // expiration. Subsequent intervals adapt to hints from the service.
+    long leaseRemainingTime =
+        leaseRemainingTime(getWorkUnitLeaseExpirationTimestamp());
+    progressReportIntervalMs = nextProgressReportInterval(
+        leaseRemainingTime / 2, leaseRemainingTime);
+    requestedLeaseDurationMs = DEFAULT_LEASE_DURATION_MILLIS;
+
+    LOG.info("Started reporting progress for work item: {}", workString());
+    scheduleNextUpdate();
+  }
+
+  /**
+   * Stops sending work progress updates to the worker service.
+   * It may throw an exception if the final progress report fails to be sent for some reason.
+   */
+  public void stopReportingProgress() throws Exception {
+    // TODO: Redesign to get rid of the executor and use a dedicated
+    // thread with a sleeper.  Also unify with success/failure reporting.
+
+    // Wait until there are no more progress updates in progress, then
+    // shut down.
+    synchronized (executor) {
+      executor.shutdownNow();
+    }
+
+    // We send a final progress report in case there was an unreported stop position update.
+    if (stopPositionToService != null) {
+      LOG.info("Sending final progress update with unreported stop position.");
+      reportProgressHelper();  // This call can fail with an exception
+    }
+
+    LOG.info("Stopped reporting progress for work item: {}", workString());
+  }
+
+  /**
+   * Computes the time before sending the next work progress update making sure
+   * that it falls between the [{@link #MIN_REPORTING_INTERVAL_MILLIS},
+   * {@link #MAX_REPORTING_INTERVAL_MILLIS}) interval. Makes an attempt to bound
+   * the result by the remaining lease time, with an RPC latency margin of
+   * {@link #LEASE_RENEWAL_LATENCY_MARGIN}.
+   *
+   * @param suggestedInterval the suggested progress report interval
+   * @param leaseRemainingTime milliseconds left before the work lease expires
+   * @return the time in milliseconds before sending the next progress update
+   */
+  protected static long nextProgressReportInterval(long suggestedInterval,
+                                                   long leaseRemainingTime) {
+    // Sanitize input in case we get a negative suggested time interval.
+    suggestedInterval = Math.max(0, suggestedInterval);
+
+    // Try to send the next progress update before the next lease expiration
+    // allowing some RPC latency margin.
+    suggestedInterval = Math.min(suggestedInterval,
+        leaseRemainingTime - LEASE_RENEWAL_LATENCY_MARGIN);
+
+    // Bound reporting interval to avoid staleness and progress update storms.
+    return Math.min(Math.max(MIN_REPORTING_INTERVAL_MILLIS, suggestedInterval),
+        MAX_REPORTING_INTERVAL_MILLIS);
+  }
+
+  /**
+   * Schedules the next work progress update.
+   */
+  private void scheduleNextUpdate() {
+    if (executor.isShutdown()) {
+      return;
+    }
+    executor.schedule(new Runnable() {
+      @Override
+      public void run() {
+        // Don't shut down while reporting progress.
+        synchronized (executor) {
+          if (executor.isShutdown()) {
+            return;
+          }
+          reportProgress();
+        }
+      }
+    }, progressReportIntervalMs, TimeUnit.MILLISECONDS);
+    LOG.debug("Next work progress update for work item {} scheduled to occur in {} ms.",
+        workString(), progressReportIntervalMs);
+  }
+
+  /**
+   * Reports the current work progress to the worker service.
+   */
+  private void reportProgress() {
+    LOG.info("Updating progress on work item {}", workString());
+    try {
+      reportProgressHelper();
+    } catch (Throwable e) {
+      LOG.warn("Error reporting work progress update: ", e);
+    } finally {
+      scheduleNextUpdate();
+    }
+  }
+
+  /**
+   * Computes the amount of time left, in milliseconds, before a lease
+   * with the specified expiration timestamp expires.  Returns zero if
+   * the lease has already expired.
+   */
+  protected long leaseRemainingTime(long leaseExpirationTimestamp) {
+    long now = System.currentTimeMillis();
+    if (leaseExpirationTimestamp < now) {
+      LOG.debug("Lease remaining time for {} is 0 ms.", workString());
+      return 0;
+    }
+    LOG.debug("Lease remaining time for {} is {} ms.",
+        workString(), leaseExpirationTimestamp - now);
+    return leaseExpirationTimestamp - now;
+  }
+
+  // Visible for testing.
+  public Source.Position getStopPosition() {
+    return stopPositionToService;
+  }
+
+  /**
+   * Reports the current work progress to the worker service.
+   */
+  protected abstract void reportProgressHelper() throws Exception;
+
+  /**
+   * Returns the current work item's lease expiration timestamp.
+   */
+  protected abstract long getWorkUnitLeaseExpirationTimestamp();
+
+  /**
+   * Returns a string representation of the work item whose progress
+   * is being updated, for use in logging messages.
+   */
+  protected abstract String workString();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
new file mode 100644
index 0000000000000..6f8b2e586548f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
@@ -0,0 +1,105 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+
+/**
+ * A write operation.
+ */
+public class WriteOperation extends ReceivingOperation {
+  /**
+   * The Sink this operation writes to.
+   */
+  public final Sink sink;
+
+  /**
+   * The total byte counter for all data written by this operation.
+   */
+  final Counter<Long> byteCount;
+
+  /**
+   * The Sink's writer this operation writes to, created by start().
+   */
+  Sink.SinkWriter writer;
+
+  public WriteOperation(String operationName,
+                        Sink sink,
+                        OutputReceiver[] receivers,
+                        String counterPrefix,
+                        CounterSet.AddCounterMutator addCounterMutator,
+                        StateSampler stateSampler) {
+    super(operationName, receivers,
+          counterPrefix, addCounterMutator, stateSampler);
+    this.sink = sink;
+    this.byteCount = addCounterMutator.addCounter(
+        Counter.longs(bytesCounterName(counterPrefix, operationName), SUM));
+  }
+
+  /** Invoked by tests. */
+  public WriteOperation(Sink sink,
+                        String counterPrefix,
+                        CounterSet.AddCounterMutator addCounterMutator,
+                        StateSampler stateSampler) {
+    this("WriteOperation", sink, new OutputReceiver[]{ },
+         counterPrefix, addCounterMutator, stateSampler);
+  }
+
+  protected String bytesCounterName(String counterPrefix,
+                                    String operationName) {
+    return operationName + "-ByteCount";
+  }
+
+  public Sink getSink() {
+    return sink;
+  }
+
+  @Override
+  public void start() throws Exception {
+    try (StateSampler.ScopedState start =
+        stateSampler.scopedState(startState)) {
+      super.start();
+      writer = sink.writer();
+    }
+  }
+
+  @Override
+  public void process(Object outputElem) throws Exception {
+    try (StateSampler.ScopedState process =
+        stateSampler.scopedState(processState)) {
+      checkStarted();
+      byteCount.addValue(writer.add(outputElem));
+    }
+  }
+
+  @Override
+  public void finish() throws Exception {
+    try (StateSampler.ScopedState finish =
+        stateSampler.scopedState(finishState)) {
+      checkStarted();
+      writer.close();
+      super.finish();
+    }
+  }
+
+  public Counter<Long> getByteCount() {
+    return byteCount;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/package-info.java
new file mode 100644
index 0000000000000..1bef723c9ac75
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/package-info.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/** Defines utilities used to implement the harness that runs user code. **/
+package com.google.cloud.dataflow.sdk.util.common.worker;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
new file mode 100644
index 0000000000000..f1da8b767ef2e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
@@ -0,0 +1,617 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.gcsfs;
+
+import com.google.api.client.util.Preconditions;
+import com.google.api.client.util.Strings;
+import com.google.api.services.storage.model.StorageObject;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.nio.file.FileSystem;
+import java.nio.file.LinkOption;
+import java.nio.file.Path;
+import java.nio.file.WatchEvent;
+import java.nio.file.WatchKey;
+import java.nio.file.WatchService;
+import java.util.Iterator;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+
+/**
+ * Implements the Java NIO {@link Path} API for Google Cloud Storage paths.
+ *
+ * <p> GcsPath uses a slash ('/') as a directory separator.  Below is
+ * a summary of how slashes are treated:
+ * <ul>
+ *   <li> A GCS bucket may not contain a slash.  An object may contain zero or
+ *        more slashes.
+ *   <li> A trailing slash always indicates a directory, which is compliant
+ *        with POSIX.1-2008.
+ *   <li> Slashes separate components of a path.  Empty components are allowed,
+ *        which is represented as repeated slashes.  An empty component always
+ *        refers to a directory, and always ends in a slash.
+ *   <li> {@link #getParent()}} always returns a path ending in a slash, as the
+ *        parent of a GcsPath is always a directory.
+ *   <li> Use {@link #resolve(String)} to append elements to a GcsPath -- this
+ *        applies the rules consistently and is highly recommended over any
+ *        custom string concatenation.
+ * </ul>
+ *
+ * <p> GcsPath treats all GCS objects and buckets as belonging to the same
+ * filesystem, so the root of a GcsPath is the GcsPath bucket="", object="".
+ *
+ * <p> Relative paths are not associated with any bucket.  This matches common
+ * treatment of Path in which relative paths can be constructed from one
+ * filesystem and appended to another filesystem.
+ *
+ * @see <a href=
+ * "http://docs.oracle.com/javase/tutorial/essential/io/pathOps.html"
+ * >Java Tutorials: Path Operations</a>
+ */
+public class GcsPath implements Path {
+
+  public static final String SCHEME = "gs";
+
+  /**
+   * Creates a GcsPath from a URI.
+   *
+   * <p> The URI must be in the form {@code gs://[bucket]/[path]}, and may not
+   * contain a port, user info, a query, or a fragment.
+   */
+  public static GcsPath fromUri(URI uri) {
+    Preconditions.checkArgument(uri.getScheme().equalsIgnoreCase(SCHEME),
+        "URI: %s is not a GCS URI", uri);
+    Preconditions.checkArgument(uri.getPort() == -1,
+        "GCS URI may not specify port: %s (%i)", uri, uri.getPort());
+    Preconditions.checkArgument(
+        Strings.isNullOrEmpty(uri.getUserInfo()),
+        "GCS URI may not specify userInfo: %s (%s)", uri, uri.getUserInfo());
+    Preconditions.checkArgument(
+        Strings.isNullOrEmpty(uri.getQuery()),
+        "GCS URI may not specify query: %s (%s)", uri, uri.getQuery());
+    Preconditions.checkArgument(
+        Strings.isNullOrEmpty(uri.getFragment()),
+        "GCS URI may not specify fragment: %s (%s)", uri, uri.getFragment());
+
+    return fromUri(uri.toString());
+  }
+
+  /**
+   * Pattern which is used to parse a GCS URL.
+   *
+   * <p> This is used to separate the components.  Verification is handled
+   * separately.
+   */
+  private static final Pattern GCS_URI =
+      Pattern.compile("(?<SCHEME>[^:]+)://(?<BUCKET>[^/]+)(/(?<OBJECT>.*))?");
+
+  /**
+   * Creates a GcsPath from a URI in string form.
+   *
+   * <p> This does not use URI parsing, which means it may accept patterns that
+   * the URI parser would not accept.
+   */
+  public static GcsPath fromUri(String uri) {
+    Matcher m = GCS_URI.matcher(uri);
+    Preconditions.checkArgument(m.matches(), "Invalid GCS URI: %s", uri);
+
+    Preconditions.checkArgument(m.group("SCHEME").equalsIgnoreCase(SCHEME),
+        "URI: %s is not a GCS URI", uri);
+    return new GcsPath(null, m.group("BUCKET"), m.group("OBJECT"));
+  }
+
+  /**
+   * Pattern which is used to parse a GCS resource name.
+   */
+  private static final Pattern GCS_RESOURCE_NAME =
+      Pattern.compile("storage.googleapis.com/(?<BUCKET>[^/]+)(/(?<OBJECT>.*))?");
+
+  /**
+   * Creates a GcsPath from a OnePlatform resource name in string form.
+   */
+  public static GcsPath fromResourceName(String name) {
+    Matcher m = GCS_RESOURCE_NAME.matcher(name);
+    Preconditions.checkArgument(m.matches(), "Invalid GCS resource name: %s", name);
+
+    return new GcsPath(null, m.group("BUCKET"), m.group("OBJECT"));
+  }
+
+  /**
+   * Creates a GcsPath from a {@linkplain StorageObject}.
+   */
+  public static GcsPath fromObject(StorageObject object) {
+    return new GcsPath(null, object.getBucket(), object.getName());
+  }
+
+  /**
+   * Creates a GcsPath from bucket and object components.
+   *
+   * <p> A GcsPath without a bucket name is treated as a relative path, which
+   * is a path component with no linkage to the root element.  This is similar
+   * to a Unix path which does not begin with the root marker (a slash).
+   * GCS has different naming constraints and APIs for working with buckets and
+   * objects, so these two concepts are kept separate to avoid accidental
+   * attempts to treat objects as buckets, or vice versa, as much as possible.
+   *
+   * <p> A GcsPath without an object name is a bucket reference.
+   * A bucket is always a directory, which could be used to lookup or add
+   * files to a bucket, but could not be opened as a file.
+   *
+   * <p> A GcsPath containing neither bucket or object names is treated as
+   * the root of the GCS filesystem.  A listing on the root element would return
+   * the buckets available to the user.
+   *
+   * <p> If {@code null} is passed as either parameter, it is converted to an
+   * empty string internally for consistency.  There is no distinction between
+   * an empty string and a {@code null}, as neither are allowed by GCS.
+   *
+   * @param bucket a GCS bucket name, or none ({@code null} or an empty string)
+   *               if the object is not associated with a bucket
+   *               (e.g. relative paths or the root node).
+   * @param object a GCS object path, or none ({@code null} or an empty string)
+   *               for no object.
+   */
+  public static GcsPath fromComponents(@Nullable String bucket,
+                                       @Nullable String object) {
+    return new GcsPath(null, bucket, object);
+  }
+
+  @Nullable
+  private FileSystem fs;
+  @Nonnull
+  private final String bucket;
+  @Nonnull
+  private final String object;
+
+  /**
+   * Constructs a GcsPath.
+   *
+   * @param fs the associated FileSystem, if any
+   * @param bucket the associated bucket, or none ({@code null} or an empty
+   *               string) for a relative path component
+   * @param object the object, which is a fully-qualified object name if bucket
+   *               was also provided, or none ({@code null} or an empty string)
+   *               for no object
+   * @throws java.lang.IllegalArgumentException if the bucket of object names
+   *         are invalid.
+   */
+  public GcsPath(@Nullable FileSystem fs,
+                 @Nullable String bucket,
+                 @Nullable String object) {
+    if (bucket == null) {
+      bucket = "";
+    }
+    Preconditions.checkArgument(!bucket.contains("/"),
+        "GCS bucket may not contain a slash");
+    Preconditions
+        .checkArgument(bucket.isEmpty()
+                || bucket.matches("[a-z0-9][-_a-z0-9.]+[a-z0-9]"),
+            "GCS bucket names must contain only lowercase letters, numbers, "
+                + "dashes (-), underscores (_), and dots (.). Bucket names "
+                + "must start and end with a number or letter. "
+                + "See https://developers.google.com/storage/docs/bucketnaming "
+                + "for more details.  Bucket name: " + bucket);
+
+    if (object == null) {
+      object = "";
+    }
+    Preconditions.checkArgument(
+        object.indexOf('\n') < 0 && object.indexOf('\r') < 0,
+        "GCS object names must not contain Carriage Return or "
+            + "Line Feed characters.");
+
+    this.fs = fs;
+    this.bucket = bucket;
+    this.object = object;
+  }
+
+  /**
+   * Returns the bucket name associated with this GCS path, or an empty string
+   * if this is a relative path component.
+   */
+  public String getBucket() {
+    return bucket;
+  }
+
+  /**
+   * Returns the object name associated with this GCS path, or an empty string
+   * if no object is specified.
+   */
+  public String getObject() {
+    return object;
+  }
+
+  public void setFileSystem(FileSystem fs) {
+    this.fs = fs;
+  }
+
+  @Override
+  public FileSystem getFileSystem() {
+    return fs;
+  }
+
+  // Absolute paths are those which have a bucket and the root path.
+  @Override
+  public boolean isAbsolute() {
+    return !bucket.isEmpty() || object.isEmpty();
+  }
+
+  @Override
+  public GcsPath getRoot() {
+    return new GcsPath(fs, "", "");
+  }
+
+  @Override
+  public GcsPath getFileName() {
+    throw new UnsupportedOperationException();
+  }
+
+  /**
+   * Returns the <em>parent path</em>, or {@code null} if this path does not
+   * have a parent.
+   *
+   * <p> Returns a path which ends in '/', as the parent path always refers to
+   * a directory.
+   */
+  @Override
+  public GcsPath getParent() {
+    if (bucket.isEmpty() && object.isEmpty()) {
+      // The root path has no parent, by definition.
+      return null;
+    }
+
+    if (object.isEmpty()) {
+      // A GCS bucket. All buckets come from a common root.
+      return getRoot();
+    }
+
+    // Skip last character, in case it is a trailing slash.
+    int i = object.lastIndexOf('/', object.length() - 2);
+    if (i <= 0) {
+      if (bucket.isEmpty()) {
+        // Relative paths are not attached to the root node.
+        return null;
+      }
+      return new GcsPath(fs, bucket, "");
+    }
+
+    // Retain trailing slash.
+    return new GcsPath(fs, bucket, object.substring(0, i + 1));
+  }
+
+  @Override
+  public int getNameCount() {
+    int count = bucket.isEmpty() ? 0 : 1;
+    if (object.isEmpty()) {
+      return count;
+    }
+
+    // Add another for each separator found.
+    int index = -1;
+    while ((index = object.indexOf('/', index + 1)) != -1) {
+      count++;
+    }
+
+    return object.endsWith("/") ? count : count + 1;
+  }
+
+  @Override
+  public GcsPath getName(int count) {
+    Preconditions.checkArgument(count >= 0);
+
+    Iterator<Path> iterator = iterator();
+    for (int i = 0; i < count; ++i) {
+      Preconditions.checkArgument(iterator.hasNext());
+      iterator.next();
+    }
+
+    Preconditions.checkArgument(iterator.hasNext());
+    return (GcsPath) iterator.next();
+  }
+
+  @Override
+  public GcsPath subpath(int beginIndex, int endIndex) {
+    Preconditions.checkArgument(beginIndex >= 0);
+    Preconditions.checkArgument(endIndex > beginIndex);
+
+    Iterator<Path> iterator = iterator();
+    for (int i = 0; i < beginIndex; ++i) {
+      Preconditions.checkArgument(iterator.hasNext());
+      iterator.next();
+    }
+
+    GcsPath path = null;
+    while (beginIndex < endIndex) {
+      Preconditions.checkArgument(iterator.hasNext());
+      if (path == null) {
+        path = (GcsPath) iterator.next();
+      } else {
+        path = path.resolve(iterator.next());
+      }
+      ++beginIndex;
+    }
+
+    return path;
+  }
+
+  @Override
+  public boolean startsWith(Path other) {
+    if (other instanceof GcsPath) {
+      GcsPath gcsPath = (GcsPath) other;
+      return startsWith(gcsPath.bucketAndObject());
+    } else {
+      return startsWith(other.toString());
+    }
+  }
+
+  @Override
+  public boolean startsWith(String prefix) {
+    return bucketAndObject().startsWith(prefix);
+  }
+
+  @Override
+  public boolean endsWith(Path other) {
+    if (other instanceof GcsPath) {
+      GcsPath gcsPath = (GcsPath) other;
+      return endsWith(gcsPath.bucketAndObject());
+    } else {
+      return endsWith(other.toString());
+    }
+  }
+
+  @Override
+  public boolean endsWith(String suffix) {
+    return bucketAndObject().endsWith(suffix);
+  }
+
+  // TODO: support "." and ".." path components?
+  @Override
+  public GcsPath normalize() { return this; }
+
+  @Override
+  public GcsPath resolve(Path other) {
+    if (other instanceof GcsPath) {
+      GcsPath path = (GcsPath) other;
+      if (path.isAbsolute()) {
+        return path;
+      } else {
+        return resolve(path.getObject());
+      }
+    } else {
+      return resolve(other.toString());
+    }
+  }
+
+  @Override
+  public GcsPath resolve(String other) {
+    if (bucket.isEmpty() && object.isEmpty()) {
+      // Resolve on a root path is equivalent to looking up a bucket and object.
+      other = SCHEME + "://" + other;
+    }
+
+    if (other.startsWith(SCHEME + "://")) {
+      GcsPath path = GcsPath.fromUri(other);
+      path.setFileSystem(getFileSystem());
+      return path;
+    }
+
+    if (other.isEmpty()) {
+      // An empty component MUST refer to a directory.
+      other = "/";
+    }
+
+    if (object.isEmpty()) {
+      return new GcsPath(fs, bucket, other);
+    } else if (object.endsWith("/")) {
+      return new GcsPath(fs, bucket, object + other);
+    } else {
+      return new GcsPath(fs, bucket, object + "/" + other);
+    }
+  }
+
+  @Override
+  public Path resolveSibling(Path other) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public Path resolveSibling(String other) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public Path relativize(Path other) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public GcsPath toAbsolutePath() {
+    return this;
+  }
+
+  @Override
+  public GcsPath toRealPath(LinkOption... options) throws IOException {
+    return this;
+  }
+
+  @Override
+  public File toFile() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public WatchKey register(WatchService watcher, WatchEvent.Kind<?>[] events,
+      WatchEvent.Modifier... modifiers) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public WatchKey register(WatchService watcher, WatchEvent.Kind<?>... events)
+      throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public Iterator<Path> iterator() {
+    return new NameIterator(fs, !bucket.isEmpty(), bucketAndObject());
+  }
+
+  private static class NameIterator implements Iterator<Path> {
+    private final FileSystem fs;
+    private boolean fullPath;
+    private String name;
+
+    NameIterator(FileSystem fs, boolean fullPath, String name) {
+      this.fs = fs;
+      this.fullPath = fullPath;
+      this.name = name;
+    }
+
+    @Override
+    public boolean hasNext() {
+      return !Strings.isNullOrEmpty(name);
+    }
+
+    @Override
+    public GcsPath next() {
+      int i = name.indexOf('/');
+      String component;
+      if (i >= 0) {
+        component = name.substring(0, i);
+        name = name.substring(i + 1);
+      } else {
+        component = name;
+        name = null;
+      }
+      if (fullPath) {
+        fullPath = false;
+        return new GcsPath(fs, component, "");
+      } else {
+        // Relative paths have no bucket.
+        return new GcsPath(fs, "", component);
+      }
+    }
+
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  @Override
+  public int compareTo(Path other) {
+    if (!(other instanceof GcsPath)) {
+      throw new ClassCastException();
+    }
+
+    GcsPath path = (GcsPath) other;
+    int b = bucket.compareTo(path.bucket);
+    if (b != 0) {
+      return b;
+    }
+
+    // Compare a component at a time, so that the separator char doesn't
+    // get compared against component contents.  Eg, "a/b" < "a-1/b".
+    Iterator<Path> left = iterator();
+    Iterator<Path> right = path.iterator();
+
+    while (left.hasNext() && right.hasNext()) {
+      String leftStr = left.next().toString();
+      String rightStr = right.next().toString();
+      int c = leftStr.compareTo(rightStr);
+      if (c != 0) {
+        return c;
+      }
+    }
+
+    if (!left.hasNext() && !right.hasNext()) {
+      return 0;
+    } else {
+      return left.hasNext() ? 1 : -1;
+    }
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+
+    GcsPath paths = (GcsPath) o;
+    return bucket.equals(paths.bucket) && object.equals(paths.object);
+  }
+
+  @Override
+  public int hashCode() {
+    int result = bucket.hashCode();
+    result = 31 * result + object.hashCode();
+    return result;
+  }
+
+  @Override
+  public String toString() {
+    if (!isAbsolute()) {
+      return object;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(SCHEME)
+        .append("://");
+    if (!bucket.isEmpty()) {
+      sb.append(bucket)
+          .append('/');
+    }
+    sb.append(object);
+    return sb.toString();
+  }
+
+  // TODO: Consider using resource names for all GCS paths used by the SDK.
+  public String toResourceName() {
+    StringBuilder sb = new StringBuilder();
+    sb.append("storage.googleapis.com/");
+    if (!bucket.isEmpty()) {
+      sb.append(bucket).append('/');
+    }
+    sb.append(object);
+    return sb.toString();
+  }
+
+  @Override
+  public URI toUri() {
+    try {
+      return new URI(SCHEME, "//" + bucketAndObject(), null);
+    } catch (URISyntaxException e) {
+      throw new RuntimeException("Unable to create URI for GCS path " + this);
+    }
+  }
+
+  private String bucketAndObject() {
+    if (bucket.isEmpty()) {
+      return object;
+    } else {
+      return bucket + "/" + object;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/package-info.java
new file mode 100644
index 0000000000000..6784109e82af8
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/package-info.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/** Defines utilities used to interact with Google Cloud Storage. **/
+package com.google.cloud.dataflow.sdk.util.gcsfs;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/ClientRequestHelper.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/ClientRequestHelper.java
new file mode 100644
index 0000000000000..155dd79f795b3
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/ClientRequestHelper.java
@@ -0,0 +1,40 @@
+/**
+ * Copyright 2013 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.gcsio;
+
+import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
+import com.google.api.client.http.HttpHeaders;
+
+/**
+ * ClientRequestHelper provides wrapper methods around final methods of AbstractGoogleClientRequest
+ * to allow overriding them if necessary. Typically should be used for testing purposes only.
+ */
+public class ClientRequestHelper {
+  /**
+   * Wraps AbstractGoogleClientRequest.getRequestHeaders().
+   */
+  public HttpHeaders getRequestHeaders(AbstractGoogleClientRequest clientRequest) {
+    return clientRequest.getRequestHeaders();
+  }
+
+  /**
+   * Wraps AbstractGoogleClientRequest.getMediaHttpUploader().
+   */
+  public void setChunkSize(AbstractGoogleClientRequest clientRequest, int chunkSize) {
+    clientRequest.getMediaHttpUploader().setChunkSize(chunkSize);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageExceptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageExceptions.java
new file mode 100644
index 0000000000000..5535a90826a9a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageExceptions.java
@@ -0,0 +1,82 @@
+/**
+ * Copyright 2013 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.gcsio;
+
+import com.google.api.client.util.Preconditions;
+import com.google.common.base.Strings;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * Miscellaneous helper methods for standardizing the types of exceptions thrown by the various
+ * GCS-based FileSystems.
+ */
+public class GoogleCloudStorageExceptions {
+  /**
+   * Creates FileNotFoundException with suitable message for a GCS bucket or object.
+   */
+  public static FileNotFoundException getFileNotFoundException(
+      String bucketName, String objectName) {
+    Preconditions.checkArgument(!Strings.isNullOrEmpty(bucketName),
+        "bucketName must not be null or empty");
+    if (objectName == null) {
+      objectName = "";
+    }
+    return new FileNotFoundException(
+        String.format("Item not found: %s/%s", bucketName, objectName));
+  }
+
+  /**
+   * Creates a composite IOException out of multiple IOExceptions. If there is only a single
+   * {@code innerException}, it will be returned as-is without wrapping into an outer exception.
+   * it.
+   */
+  public static IOException createCompositeException(
+      List<IOException> innerExceptions) {
+    Preconditions.checkArgument(innerExceptions != null,
+        "innerExceptions must not be null");
+    Preconditions.checkArgument(innerExceptions.size() > 0,
+        "innerExceptions must contain at least one element");
+
+    if (innerExceptions.size() == 1) {
+      return innerExceptions.get(0);
+    }
+
+    IOException combined = new IOException("Multiple IOExceptions.");
+    for (IOException inner : innerExceptions) {
+      combined.addSuppressed(inner);
+    }
+    return combined;
+  }
+
+  /**
+   * Wraps the given IOException into another IOException, adding the given error message and a
+   * reference to the supplied bucket and object. It allows one to know which bucket and object
+   * were being accessed when the exception occurred for an operation.
+   */
+  public static IOException wrapException(IOException e, String message,
+      String bucketName, String objectName) {
+    String name = "bucket: " + bucketName;
+    if (!Strings.isNullOrEmpty(objectName)) {
+      name += ", object: " + objectName;
+    }
+    String fullMessage = String.format("%s: %s", message, name);
+    return new IOException(fullMessage, e);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
new file mode 100644
index 0000000000000..a3d9b65347b2a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
@@ -0,0 +1,538 @@
+/**
+ * Copyright 2013 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.gcsio;
+
+import com.google.api.client.http.HttpResponse;
+import com.google.api.client.util.BackOff;
+import com.google.api.client.util.BackOffUtils;
+import com.google.api.client.util.ExponentialBackOff;
+import com.google.api.client.util.NanoClock;
+import com.google.api.client.util.Preconditions;
+import com.google.api.client.util.Sleeper;
+import com.google.api.services.storage.Storage;
+import com.google.cloud.dataflow.sdk.util.ApiErrorExtractor;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.channels.Channels;
+import java.nio.channels.ClosedChannelException;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.SeekableByteChannel;
+import java.util.regex.Pattern;
+
+/**
+ * Provides seekable read access to GCS.
+ */
+public class GoogleCloudStorageReadChannel implements SeekableByteChannel {
+  // Logger.
+  private static final Logger LOG = LoggerFactory.getLogger(GoogleCloudStorageReadChannel.class);
+
+  // Used to separate elements of a Content-Range
+  private static final Pattern SLASH = Pattern.compile("/");
+
+  // GCS access instance.
+  private Storage gcs;
+
+  // Name of the bucket containing the object being read.
+  private String bucketName;
+
+  // Name of the object being read.
+  private String objectName;
+
+  // Read channel.
+  private ReadableByteChannel readChannel;
+
+  // True if this channel is open, false otherwise.
+  private boolean channelIsOpen;
+
+  // Current read position in the channel.
+  private long currentPosition = -1;
+
+  // When a caller calls position(long) to set stream position, we record the target position
+  // and defer the actual seek operation until the caller tries to read from the channel.
+  // This allows us to avoid an unnecessary seek to position 0 that would take place on creation
+  // of this instance in cases where caller intends to start reading at some other offset.
+  // If lazySeekPending is set to true, it indicates that a target position has been set
+  // but the actual seek operation is still pending.
+  private boolean lazySeekPending;
+
+  // Size of the object being read.
+  private long size = -1;
+
+  // Maximum number of automatic retries when reading from the underlying channel without making
+  // progress; each time at least one byte is successfully read, the counter of attempted retries
+  // is reset.
+  // TODO: Wire this setting out to GHFS; it should correspond to adding the wiring for
+  // setting the equivalent value inside HttpRequest.java which determines the low-level retries
+  // during "execute()" calls. The default in HttpRequest.java is also 10.
+  private int maxRetries = 10;
+
+  // Helper delegate for turning IOExceptions from API calls into higher-level semantics.
+  private final ApiErrorExtractor errorExtractor;
+
+  // Sleeper used for waiting between retries.
+  private Sleeper sleeper = Sleeper.DEFAULT;
+
+  // The clock used by ExponentialBackOff to determine when the maximum total elapsed time has
+  // passed doing a series of retries.
+  private NanoClock clock = NanoClock.SYSTEM;
+
+  // Lazily initialized BackOff for sleeping between retries; only ever initialized if a retry is
+  // necessary.
+  private BackOff backOff = null;
+
+  // Settings used for instantiating the default BackOff used for determining wait time between
+  // retries. TODO: Wire these out to be settable by the Hadoop configs.
+  // The number of milliseconds to wait before the very first retry in a series of retries.
+  public static final int DEFAULT_BACKOFF_INITIAL_INTERVAL_MILLIS = 200;
+
+  // The amount of jitter introduced when computing the next retry sleep interval so that when
+  // many clients are retrying, they don't all retry at the same time.
+  public static final double DEFAULT_BACKOFF_RANDOMIZATION_FACTOR = 0.5;
+
+  // The base of the exponent used for exponential backoff; each subsequent sleep interval is
+  // roughly this many times the previous interval.
+  public static final double DEFAULT_BACKOFF_MULTIPLIER = 1.5;
+
+  // The maximum amount of sleep between retries; at this point, there will be no further
+  // exponential backoff. This prevents intervals from growing unreasonably large.
+  public static final int DEFAULT_BACKOFF_MAX_INTERVAL_MILLIS = 10 * 1000;
+
+  // The maximum total time elapsed since the first retry over the course of a series of retries.
+  // This makes it easier to bound the maximum time it takes to respond to a permanent failure
+  // without having to calculate the summation of a series of exponentiated intervals while
+  // accounting for the randomization of backoff intervals.
+  public static final int DEFAULT_BACKOFF_MAX_ELAPSED_TIME_MILLIS = 2 * 60 * 1000;
+
+  // ClientRequestHelper to be used instead of calling final methods in client requests.
+  private static ClientRequestHelper clientRequestHelper = new ClientRequestHelper();
+
+  /**
+   * Constructs an instance of GoogleCloudStorageReadChannel.
+   *
+   * @param gcs storage object instance
+   * @param bucketName name of the bucket containing the object to read
+   * @param objectName name of the object to read
+   * @throws java.io.FileNotFoundException if the given object does not exist
+   * @throws IOException on IO error
+   */
+  public GoogleCloudStorageReadChannel(
+      Storage gcs, String bucketName, String objectName, ApiErrorExtractor errorExtractor)
+      throws IOException {
+    this.gcs = gcs;
+    this.bucketName = bucketName;
+    this.objectName = objectName;
+    this.errorExtractor = errorExtractor;
+    channelIsOpen = true;
+    position(0);
+  }
+
+  /**
+   * Constructs an instance of GoogleCloudStorageReadChannel.
+   * Used for unit testing only. Do not use elsewhere.
+   *
+   * @throws IOException on IO error
+   */
+  GoogleCloudStorageReadChannel()
+      throws IOException {
+    this.errorExtractor = null;
+    channelIsOpen = true;
+    position(0);
+  }
+
+  /**
+   * Sets the ClientRequestHelper to be used instead of calling final methods in client requests.
+   */
+  static void setClientRequestHelper(ClientRequestHelper helper) {
+    clientRequestHelper = helper;
+  }
+
+  /**
+   * Sets the Sleeper used for sleeping between retries.
+   */
+  void setSleeper(Sleeper sleeper) {
+    Preconditions.checkArgument(sleeper != null, "sleeper must not be null!");
+    this.sleeper = sleeper;
+  }
+
+  /**
+   * Sets the clock to be used for determining when max total time has elapsed doing retries.
+   */
+  void setNanoClock(NanoClock clock) {
+    Preconditions.checkArgument(clock != null, "clock must not be null!");
+    this.clock = clock;
+  }
+
+  /**
+   * Sets the backoff for determining sleep duration between retries.
+   *
+   * @param backOff May be null to force the next usage to auto-initialize with default settings.
+   */
+  void setBackOff(BackOff backOff) {
+    this.backOff = backOff;
+  }
+
+  /**
+   * Gets the backoff used for determining sleep duration between retries. May be null if it was
+   * never lazily initialized.
+   */
+  BackOff getBackOff() {
+    return backOff;
+  }
+
+  /**
+   * Helper for initializing the BackOff used for retries.
+   */
+  private BackOff createBackOff() {
+    return new ExponentialBackOff.Builder()
+        .setInitialIntervalMillis(DEFAULT_BACKOFF_INITIAL_INTERVAL_MILLIS)
+        .setRandomizationFactor(DEFAULT_BACKOFF_RANDOMIZATION_FACTOR)
+        .setMultiplier(DEFAULT_BACKOFF_MULTIPLIER)
+        .setMaxIntervalMillis(DEFAULT_BACKOFF_MAX_INTERVAL_MILLIS)
+        .setMaxElapsedTimeMillis(DEFAULT_BACKOFF_MAX_ELAPSED_TIME_MILLIS)
+        .setNanoClock(clock)
+        .build();
+  }
+
+  /**
+   * Sets the number of times to automatically retry by re-opening the underlying readChannel
+   * whenever an exception occurs while reading from it. The count of attempted retries is reset
+   * whenever at least one byte is successfully read, so this number of retries refers to retries
+   * made without achieving any forward progress.
+   */
+  public void setMaxRetries(int maxRetries) {
+    this.maxRetries = maxRetries;
+  }
+
+  /**
+   * Reads from this channel and stores read data in the given buffer.
+   *
+   * @param buffer buffer to read data into
+   * @return number of bytes read or -1 on end-of-stream
+   * @throws java.io.IOException on IO error
+   */
+  @Override
+  public int read(ByteBuffer buffer)
+      throws IOException {
+    throwIfNotOpen();
+
+    // Don't try to read if the buffer has no space.
+    if (buffer.remaining() == 0) {
+      return 0;
+    }
+
+    // Perform a lazy seek if not done already.
+    performLazySeek();
+
+    int totalBytesRead = 0;
+    int retriesAttempted = 0;
+
+    // We read from a streaming source. We may not get all the bytes we asked for
+    // in the first read. Therefore, loop till we either read the required number of
+    // bytes or we reach end-of-stream.
+    do {
+      int remainingBeforeRead = buffer.remaining();
+      try {
+        int numBytesRead = readChannel.read(buffer);
+        Preconditions.checkState(numBytesRead != 0, "Read 0 bytes without blocking!");
+        if (numBytesRead < 0) {
+          break;
+        }
+        totalBytesRead += numBytesRead;
+        currentPosition += numBytesRead;
+
+        // The count of retriesAttempted is per low-level readChannel.read call; each time we make
+        // progress we reset the retry counter.
+        retriesAttempted = 0;
+      } catch (IOException ioe) {
+        // TODO: Refactor any reusable logic for retries into a separate RetryHelper class.
+        if (retriesAttempted == maxRetries) {
+          LOG.warn("Already attempted max of {} retries while reading '{}'; throwing exception.",
+              maxRetries, StorageResourceId.createReadableString(bucketName, objectName));
+          throw ioe;
+        } else {
+          if (retriesAttempted == 0) {
+            // If this is the first of a series of retries, we also want to reset the backOff
+            // to have fresh initial values.
+            if (backOff == null) {
+              backOff = createBackOff();
+            } else {
+              backOff.reset();
+            }
+          }
+
+          ++retriesAttempted;
+          LOG.warn("Got exception while reading '{}'; retry # {}. Sleeping...",
+              StorageResourceId.createReadableString(bucketName, objectName),
+              retriesAttempted, ioe);
+
+          try {
+            boolean backOffSuccessful = BackOffUtils.next(sleeper, backOff);
+            if (!backOffSuccessful) {
+              LOG.warn("BackOff returned false; maximum total elapsed time exhausted. Giving up "
+                      + "after {} retries for '{}'", retriesAttempted,
+                      StorageResourceId.createReadableString(bucketName, objectName));
+              throw ioe;
+            }
+          } catch (InterruptedException ie) {
+            LOG.warn("Interrupted while sleeping before retry."
+                + "Giving up after {} retries for '{}'", retriesAttempted,
+                StorageResourceId.createReadableString(bucketName, objectName));
+            ioe.addSuppressed(ie);
+            throw ioe;
+          }
+          LOG.info("Done sleeping before retry for '{}'; retry # {}.",
+              StorageResourceId.createReadableString(bucketName, objectName),
+              retriesAttempted);
+
+          if (buffer.remaining() != remainingBeforeRead) {
+            int partialRead = remainingBeforeRead - buffer.remaining();
+            LOG.info("Despite exception, had partial read of {} bytes; resetting retry count.",
+                partialRead);
+            retriesAttempted = 0;
+            totalBytesRead += partialRead;
+            currentPosition += partialRead;
+          }
+
+          // Force the stream to be reopened by seeking to the current position.
+          long newPosition = currentPosition;
+          currentPosition = -1;
+          position(newPosition);
+          performLazySeek();
+        }
+      }
+    } while (buffer.remaining() > 0);
+
+    // If this method was called when the stream was already at EOF
+    // (indicated by totalBytesRead == 0) then return EOF else,
+    // return the number of bytes read.
+    return (totalBytesRead == 0) ? -1 : totalBytesRead;
+  }
+
+  @Override
+  public int write(ByteBuffer src) throws IOException {
+    throw new UnsupportedOperationException("Cannot mutate read-only channel");
+  }
+
+  /**
+   * Tells whether this channel is open.
+   *
+   * @return a value indicating whether this channel is open
+   */
+  @Override
+  public boolean isOpen() {
+    return channelIsOpen;
+  }
+
+  /**
+   * Closes this channel.
+   *
+   * @throws IOException on IO error
+   */
+  @Override
+  public void close()
+      throws IOException {
+    throwIfNotOpen();
+    channelIsOpen = false;
+    if (readChannel != null) {
+      readChannel.close();
+    }
+  }
+
+  /**
+   * Returns this channel's current position.
+   *
+   * @return this channel's current position
+   */
+  @Override
+  public long position()
+      throws IOException {
+    throwIfNotOpen();
+    return currentPosition;
+  }
+
+  /**
+   * Sets this channel's position.
+   *
+   * @param newPosition the new position, counting the number of bytes from the beginning.
+   * @return this channel instance
+   * @throws java.io.FileNotFoundException if the underlying object does not exist.
+   * @throws IOException on IO error
+   */
+  @Override
+  public SeekableByteChannel position(long newPosition)
+      throws IOException {
+    throwIfNotOpen();
+
+    // If the position has not changed, avoid the expensive operation.
+    if (newPosition == currentPosition) {
+      return this;
+    }
+
+    validatePosition(newPosition);
+    currentPosition = newPosition;
+    lazySeekPending = true;
+    return this;
+  }
+
+  /**
+   * Returns size of the object to which this channel is connected.
+   *
+   * @return size of the object to which this channel is connected
+   * @throws IOException on IO error
+   */
+  @Override
+  public long size()
+      throws IOException {
+    throwIfNotOpen();
+    // Perform a lazy seek if not done already so that size of this channel is set correctly.
+    performLazySeek();
+    return size;
+  }
+
+  @Override
+  public SeekableByteChannel truncate(long size) throws IOException {
+    throw new UnsupportedOperationException("Cannot mutate read-only channel");
+  }
+
+  /**
+   * Sets size of this channel to the given value.
+   */
+  protected void setSize(long size) {
+    this.size = size;
+  }
+
+  /**
+   * Validates that the given position is valid for this channel.
+   */
+  protected void validatePosition(long newPosition) {
+    // Validate: 0 <= newPosition
+    if (newPosition < 0) {
+      throw new IllegalArgumentException(
+          String.format("Invalid seek offset: position value (%d) must be >= 0", newPosition));
+    }
+
+    // Validate: newPosition < size
+    // Note that we access this.size directly rather than calling size() to avoid initiating
+    // lazy seek that leads to recursive error. We validate newPosition < size only when size of
+    // this channel has been computed by a prior call. This means that position could be
+    // potentially set to an invalid value (>= size) by position(long). However, that error
+    // gets caught during lazy seek.
+    if ((size >= 0) && (newPosition >= size)) {
+      throw new IllegalArgumentException(
+          String.format(
+              "Invalid seek offset: position value (%d) must be between 0 and %d",
+              newPosition, size));
+    }
+  }
+
+  /**
+   * Seeks to the given position in the underlying stream.
+   *
+   * Note: Seek is an expensive operation because a new stream is opened each time.
+   *
+   * @throws java.io.FileNotFoundException if the underlying object does not exist.
+   * @throws IOException on IO error
+   */
+  private void performLazySeek()
+      throws IOException {
+
+    // Return quickly if there is no pending seek operation.
+    if (!lazySeekPending) {
+      return;
+    }
+
+    // Close the underlying channel if it is open.
+    if (readChannel != null) {
+      readChannel.close();
+    }
+
+    InputStream objectContentStream = openStreamAndSetSize(currentPosition);
+    readChannel = Channels.newChannel(objectContentStream);
+    lazySeekPending = false;
+  }
+
+  /**
+   * Opens the underlying stream, sets its position to the given value and sets size based on
+   * stream content size.
+   *
+   * @param newPosition position to seek into the new stream.
+   * @throws IOException on IO error
+   */
+  protected InputStream openStreamAndSetSize(long newPosition)
+      throws IOException {
+    validatePosition(newPosition);
+    Storage.Objects.Get getObject = gcs.objects().get(bucketName, objectName);
+    // Set the range on the existing request headers which may have been initialized with things
+    // like user-agent already.
+    clientRequestHelper.getRequestHeaders(getObject)
+        .setRange(String.format("bytes=%d-", newPosition));
+    HttpResponse response;
+    try {
+      response = getObject.executeMedia();
+    } catch (IOException e) {
+      if (errorExtractor.itemNotFound(e)) {
+        throw GoogleCloudStorageExceptions
+            .getFileNotFoundException(bucketName, objectName);
+      } else if (errorExtractor.rangeNotSatisfiable(e)
+                 && newPosition == 0
+                 && size == -1) {
+        // We don't know the size yet (size == -1) and we're seeking to byte 0, but got 'range
+        // not satisfiable'; the object must be empty.
+        LOG.info("Got 'range not satisfiable' for reading {} at position 0; assuming empty.",
+            StorageResourceId.createReadableString(bucketName, objectName));
+        size = 0;
+        return new ByteArrayInputStream(new byte[0]);
+      } else {
+        String msg = String.format("Error reading %s at position %d",
+            StorageResourceId.createReadableString(bucketName, objectName), newPosition);
+        throw new IOException(msg, e);
+      }
+    }
+
+    String contentRange = response.getHeaders().getContentRange();
+    if (response.getHeaders().getContentLength() != null) {
+      size = response.getHeaders().getContentLength() + newPosition;
+    } else if (contentRange != null) {
+      String sizeStr = SLASH.split(contentRange)[1];
+      try {
+        size = Long.parseLong(sizeStr);
+      } catch (NumberFormatException e) {
+        throw new IOException(
+            "Could not determine size from response from Content-Range: " + contentRange, e);
+      }
+    } else {
+      throw new IOException("Could not determine size of response");
+    }
+    return response.getContent();
+  }
+
+  /**
+   * Throws if this channel is not currently open.
+   */
+  private void throwIfNotOpen()
+      throws IOException {
+    if (!isOpen()) {
+      throw new ClosedChannelException();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
new file mode 100644
index 0000000000000..11113d0367ea5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
@@ -0,0 +1,379 @@
+/**
+ * Copyright 2013 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.gcsio;
+
+import com.google.api.client.http.HttpHeaders;
+import com.google.api.client.http.InputStreamContent;
+import com.google.api.client.util.Preconditions;
+import com.google.api.services.storage.Storage;
+import com.google.api.services.storage.model.StorageObject;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PipedInputStream;
+import java.io.PipedOutputStream;
+import java.nio.ByteBuffer;
+import java.nio.channels.Channels;
+import java.nio.channels.ClosedChannelException;
+import java.nio.channels.WritableByteChannel;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+
+/**
+ * Implements WritableByteChannel to provide write access to GCS.
+ */
+public class GoogleCloudStorageWriteChannel
+    implements WritableByteChannel {
+
+  // The minimum logging interval for upload progress.
+  private static final long MIN_LOGGING_INTERVAL_MS = 60000L;
+
+  // Logger.
+  private static final Logger LOG = LoggerFactory.getLogger(GoogleCloudStorageWriteChannel.class);
+
+  // Buffering used in the upload path:
+  // There are a series of buffers used along the upload path. It is important to understand their
+  // function before tweaking their values.
+  //
+  // Note: Most values are already tweaked based on performance measurements. If you want to change
+  // buffer sizes, you should change only 1 buffer size at a time to make sure you understand
+  // the correlation between various buffers and their characteristics.
+  //
+  // Upload path:
+  // Uploading a file involves the following steps:
+  // -- caller creates a write stream. It involves creating a pipe between data writer (controlled
+  // by the caller) and data uploader.
+  // The writer and the uploader are on separate threads. That is, pipe operation is asynchronous
+  // between its
+  // two ends.
+  // -- caller puts data in a ByteBuffer and calls write(ByteBuffer). The write() method starts
+  // writing into sink end of the pipe. It blocks if pipe buffer is full till the other end
+  // reads data to make space.
+  // -- MediaHttpUploader code keeps on reading from the source end of the pipe till it has
+  // uploadBufferSize amount of data.
+  //
+  // The following buffers are involved along the above path:
+  // -- ByteBuffer passed by caller. We have no control over its size.
+  //
+  // -- Pipe buffer.
+  // size = UPLOAD_PIPE_BUFFER_SIZE_DEFAULT (1 MB)
+  // Increasing size does not have noticeable difference on performance.
+  //
+  // -- Buffer used by Java client
+  // code.
+  // size = UPLOAD_CHUNK_SIZE_DEFAULT (64 MB)
+
+  // A pipe that connects write channel used by caller to the input stream used by GCS uploader.
+  // The uploader reads from input stream which blocks till a caller writes some data to the
+  // write channel (pipeSinkChannel below). The pipe is formed by connecting pipeSink to pipeSource.
+  private PipedOutputStream pipeSink;
+  private PipedInputStream pipeSource;
+
+  // Size of buffer used by upload pipe.
+  private int pipeBufferSize = UPLOAD_PIPE_BUFFER_SIZE_DEFAULT;
+
+  // A channel wrapper over pipeSink.
+  private WritableByteChannel pipeSinkChannel;
+
+  // Upload operation that takes place on a separate thread.
+  private UploadOperation uploadOperation;
+
+  // Default GCS upload granularity.
+  private static final int GCS_UPLOAD_GRANULARITY = 8 * 1024 * 1024;
+
+  // Upper limit on object size.
+  // We use less than 250GB limit to avoid potential boundary errors
+  // in scotty/blobstore stack.
+  private static final long UPLOAD_MAX_SIZE = 249 * 1024 * 1024 * 1024L;
+
+  // Chunk size to use. Limit the amount of memory used in low memory
+  // environments such as small AppEngine instances.
+  private static final int UPLOAD_CHUNK_SIZE_DEFAULT =
+      Runtime.getRuntime().totalMemory() < 512 * 1024 * 1024
+      ? GCS_UPLOAD_GRANULARITY : 8 * GCS_UPLOAD_GRANULARITY;
+
+  // If true, we get very high write throughput but writing files larger than UPLOAD_MAX_SIZE
+  // will not succeed. Set it to false to allow larger files at lower throughput.
+  private static boolean limitFileSizeTo250Gb = true;
+
+  // Chunk size to use.
+  static int uploadBufferSize = UPLOAD_CHUNK_SIZE_DEFAULT;
+
+  // Default size of upload buffer.
+  public static final int UPLOAD_PIPE_BUFFER_SIZE_DEFAULT = 1 * 1024 * 1024;
+
+  // ClientRequestHelper to be used instead of calling final methods in client requests.
+  private static ClientRequestHelper clientRequestHelper = new ClientRequestHelper();
+
+  /**
+   * Allows running upload operation on a background thread.
+   */
+  static class UploadOperation
+      implements Runnable {
+
+    // Object to be uploaded. This object declared final for safe object publishing.
+    private final Storage.Objects.Insert insertObject;
+
+    // Exception encountered during upload.
+    Throwable exception;
+
+    // Allows other threads to wait for this operation to be complete. This object declared final
+    // for safe object publishing.
+    final CountDownLatch uploadDone = new CountDownLatch(1);
+
+    // Read end of the pipe. This object declared final for safe object publishing.
+    private final InputStream pipeSource;
+
+    /**
+     * Constructs an instance of UploadOperation.
+     *
+     * @param insertObject object to be uploaded
+     */
+    public UploadOperation(Storage.Objects.Insert insertObject, InputStream pipeSource) {
+      this.insertObject = insertObject;
+      this.pipeSource = pipeSource;
+    }
+
+    /**
+     * Gets exception/error encountered during upload or null.
+     */
+    public Throwable exception() {
+      return exception;
+    }
+
+    /**
+     * Runs the upload operation.
+     */
+    @Override
+    public void run() {
+      try {
+        insertObject.execute();
+      } catch (Throwable t) {
+        exception = t;
+        LOG.error("Upload failure", t);
+      } finally {
+        uploadDone.countDown();
+        try {
+          // Close this end of the pipe so that the writer at the other end
+          // will not hang indefinitely.
+          pipeSource.close();
+        } catch (IOException ioe) {
+          LOG.error("Error trying to close pipe.source()", ioe);
+          // Log and ignore IOException while trying to close the channel,
+          // as there is not much we can do about it.
+        }
+      }
+    }
+
+    public void waitForCompletion() {
+      do {
+        try {
+          uploadDone.await();
+        } catch (InterruptedException e) {
+          // Ignore it and continue to wait.
+        }
+      } while(uploadDone.getCount() > 0);
+    }
+  }
+
+  /**
+   * Constructs an instance of GoogleCloudStorageWriteChannel.
+   *
+   * @param threadPool thread pool to use for running the upload operation
+   * @param gcs storage object instance
+   * @param bucketName name of the bucket to create object in
+   * @param objectName name of the object to create
+   * @throws IOException on IO error
+   */
+  public GoogleCloudStorageWriteChannel(
+      ExecutorService threadPool, Storage gcs, String bucketName,
+      String objectName, String contentType)
+      throws IOException {
+    init(threadPool, gcs, bucketName, objectName, contentType);
+  }
+
+  /**
+   * Sets the ClientRequestHelper to be used instead of calling final methods in client requests.
+   */
+  static void setClientRequestHelper(ClientRequestHelper helper) {
+    clientRequestHelper = helper;
+  }
+
+  /**
+   * Writes contents of the given buffer to this channel.
+   *
+   * Note: The data that one writes gets written to a pipe which may not block
+   * if the pipe has sufficient buffer space. A success code returned from this method
+   * does not mean that the specific data was successfully written to the underlying
+   * storage. It simply means that there is no error at present. The data upload
+   * may encounter an error on a separate thread. Such error is not ignored;
+   * it shows up as an exception during a subsequent call to write() or close().
+   * The only way to be sure of successful upload is when the close() method
+   * returns successfully.
+   *
+   * @param buffer buffer to write
+   * @throws IOException on IO error
+   */
+  @Override
+  public int write(ByteBuffer buffer)
+      throws IOException {
+    throwIfNotOpen();
+
+    // No point in writing further if upload failed on another thread.
+    throwIfUploadFailed();
+
+    return pipeSinkChannel.write(buffer);
+  }
+
+  /**
+   * Tells whether this channel is open.
+   *
+   * @return a value indicating whether this channel is open
+   */
+  @Override
+  public boolean isOpen() {
+    return (pipeSinkChannel != null) && pipeSinkChannel.isOpen();
+  }
+
+  /**
+   * Closes this channel.
+   *
+   * Note:
+   * The method returns only after all data has been successfully written to GCS
+   * or if there is a non-retry-able error.
+   *
+   * @throws IOException on IO error
+   */
+  @Override
+  public void close()
+      throws IOException {
+    throwIfNotOpen();
+    try {
+      pipeSinkChannel.close();
+      uploadOperation.waitForCompletion();
+      throwIfUploadFailed();
+    } finally {
+      pipeSinkChannel = null;
+      pipeSink = null;
+      pipeSource = null;
+      uploadOperation = null;
+    }
+  }
+
+  /**
+   * Sets size of upload buffer used.
+   */
+  public static void setUploadBufferSize(int bufferSize) {
+    Preconditions.checkArgument(bufferSize > 0,
+        "Upload buffer size must be great than 0.");
+    if (bufferSize % GCS_UPLOAD_GRANULARITY != 0) {
+      LOG.warn("Upload buffer size should be a multiple of {} for best performance, got {}",
+          GCS_UPLOAD_GRANULARITY, bufferSize);
+    }
+    GoogleCloudStorageWriteChannel.uploadBufferSize = bufferSize;
+  }
+
+  /**
+   * Enables or disables hard limit of 250GB on size of uploaded files.
+   *
+   * If enabled, we get very high write throughput but writing files larger than UPLOAD_MAX_SIZE
+   * will not succeed. Set it to false to allow larger files at lower throughput.
+   */
+  public static void enableFileSizeLimit250Gb(boolean enableLimit) {
+    GoogleCloudStorageWriteChannel.limitFileSizeTo250Gb = enableLimit;
+  }
+
+  /**
+   * Initializes an instance of GoogleCloudStorageWriteChannel.
+   *
+   * @param threadPool thread pool to use for running the upload operation
+   * @param gcs storage object instance
+   * @param bucketName name of the bucket in which to create object
+   * @param objectName name of the object to create
+   * @throws IOException on IO error
+   */
+  private void init(
+      ExecutorService threadPool, Storage gcs, String bucketName,
+      String objectName, String contentType)
+      throws IOException {
+
+    // Create object with the given name.
+    StorageObject object = (new StorageObject()).setName(objectName);
+
+    // Create a pipe such that its one end is connected to the input stream used by
+    // the uploader and the other end is the write channel used by the caller.
+    pipeSource = new PipedInputStream(pipeBufferSize);
+    pipeSink = new PipedOutputStream(pipeSource);
+    pipeSinkChannel = Channels.newChannel(pipeSink);
+
+    // Connect pipe-source to the stream used by uploader.
+    InputStreamContent objectContentStream =
+        new InputStreamContent(contentType, pipeSource);
+    // Indicate that we do not know length of file in advance.
+    objectContentStream.setLength(-1);
+    objectContentStream.setCloseInputStream(false);
+    Storage.Objects.Insert insertObject =
+        gcs.objects().insert(bucketName, object, objectContentStream);
+    insertObject.setDisableGZipContent(true);
+    insertObject.getMediaHttpUploader().setProgressListener(
+        new LoggingMediaHttpUploaderProgressListener(objectName, MIN_LOGGING_INTERVAL_MS));
+
+    // Insert necessary http headers to enable 250GB limit+high throughput if so configured.
+    if (limitFileSizeTo250Gb) {
+      HttpHeaders headers = clientRequestHelper.getRequestHeaders(insertObject);
+      headers.set("X-Goog-Upload-Desired-Chunk-Granularity", GCS_UPLOAD_GRANULARITY);
+      headers.set("X-Goog-Upload-Max-Raw-Size", UPLOAD_MAX_SIZE);
+    }
+    // Change chunk size from default value (10MB) to one that yields higher performance.
+    clientRequestHelper.setChunkSize(insertObject, uploadBufferSize);
+
+    // Given that the two ends of the pipe must operate asynchronous relative
+    // to each other, we need to start the upload operation on a separate thread.
+    uploadOperation = new UploadOperation(insertObject, pipeSource);
+    threadPool.execute(uploadOperation);
+  }
+
+  /**
+   * Throws if this channel is not currently open.
+   *
+   * @throws IOException on IO error
+   */
+  private void throwIfNotOpen()
+      throws IOException {
+    if (!isOpen()) {
+      throw new ClosedChannelException();
+    }
+  }
+
+  /**
+   * Throws if upload operation failed. Propagates any errors.
+   *
+   * @throws IOException on IO error
+   */
+  private void throwIfUploadFailed()
+      throws IOException {
+    if ((uploadOperation != null) && (uploadOperation.exception() != null)) {
+      if (uploadOperation.exception() instanceof Error) {
+        throw (Error) uploadOperation.exception();
+      }
+      throw new IOException(uploadOperation.exception());
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListener.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListener.java
new file mode 100644
index 0000000000000..c215f4aeafafc
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListener.java
@@ -0,0 +1,91 @@
+/**
+ * Copyright 2013 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.gcsio;
+
+import com.google.api.client.googleapis.media.MediaHttpUploader;
+import com.google.api.client.googleapis.media.MediaHttpUploader.UploadState;
+import com.google.api.client.googleapis.media.MediaHttpUploaderProgressListener;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+
+/**
+ * Logs the status of uploads. At the beginning, during, and
+ * at the end of the upload, emits relevant statistics such as how many bytes
+ * uploaded and the rate at which the upload is progressing.
+ * <p>
+ * A new instance of this progress listener should be used for each MediaHttpUploader.
+ */
+class LoggingMediaHttpUploaderProgressListener implements MediaHttpUploaderProgressListener {
+  private static final Logger LOG =
+      LoggerFactory.getLogger(MediaHttpUploaderProgressListener.class);
+  private static final double BYTES_IN_MB = 1048576.0;
+  private final long minLoggingInterval;
+  private final String name;
+  private long startTime;
+  private long prevTime;
+  private long prevUploadedBytes;
+
+  /**
+   * Creates a upload progress listener which emits relevant statistics about the
+   * progress of the upload.
+   * @param name The name of the resource being uploaded.
+   * @param minLoggingInterval The minimum amount of time (millis) between logging upload progress.
+   */
+  LoggingMediaHttpUploaderProgressListener(String name, long minLoggingInterval) {
+    this.name = name;
+    this.minLoggingInterval = minLoggingInterval;
+  }
+
+  @Override
+  public void progressChanged(MediaHttpUploader uploader) throws IOException {
+    progressChanged(LOG,
+        uploader.getUploadState(),
+        uploader.getNumBytesUploaded(),
+        System.currentTimeMillis());
+  }
+
+  void progressChanged(Logger log, UploadState uploadState, long bytesUploaded, long currentTime) {
+    switch (uploadState) {
+      case INITIATION_STARTED:
+        startTime = currentTime;
+        prevTime = currentTime;
+        log.info("Uploading: {}", name);
+        break;
+      case MEDIA_IN_PROGRESS:
+        // Limit messages to be emitted for in progress uploads.
+        if (currentTime > prevTime + minLoggingInterval) {
+          double averageRate = (bytesUploaded / BYTES_IN_MB)
+                               / ((currentTime - startTime) / 1000.0);
+          double currentRate = ((bytesUploaded - prevUploadedBytes) / BYTES_IN_MB)
+                               / ((currentTime - prevTime) / 1000.0);
+          log.info(String.format(
+              "Uploading: %s Average Rate: %.3f MiB/s, Current Rate: %.3f MiB/s, Total: %.3f MiB",
+              name, averageRate, currentRate, bytesUploaded / BYTES_IN_MB));
+          prevTime = currentTime;
+          prevUploadedBytes = bytesUploaded;
+        }
+        break;
+      case MEDIA_COMPLETE:
+        log.info("Finished Uploading: {}", name);
+        break;
+      default:
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/StorageResourceId.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/StorageResourceId.java
new file mode 100644
index 0000000000000..b6051a5147d3e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/StorageResourceId.java
@@ -0,0 +1,165 @@
+/**
+ * Copyright 2013 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.gcsio;
+
+import com.google.api.client.util.Preconditions;
+import com.google.common.base.Strings;
+
+import java.util.Objects;
+
+/**
+ * Data struct representing either a GCS StorageObject, a GCS Bucket or the GCS root (gs://).
+ * If both bucketName and objectName are null, the StorageResourceId refers to GCS root (gs://).
+ * If bucketName is non-null, and objectName is null, then this refers to a GCS Bucket. Otherwise,
+ * if bucketName and objectName are both non-null, this refers to a GCS StorageObject.
+ */
+public class StorageResourceId {
+  // The singleton instance identifying the GCS root (gs://). Both getObjectName() and
+  // getBucketName() will return null.
+  public static final StorageResourceId ROOT = new StorageResourceId();
+
+  // Bucket name of this storage resource to be used with the Google Cloud Storage API.
+  private final String bucketName;
+
+  // Object name of this storage resource to be used with the Google Cloud Storage API.
+  private final String objectName;
+
+  // Human-readable String to be returned by toString(); kept as 'final' member for efficiency.
+  private final String readableString;
+
+  /**
+   * Constructor for a StorageResourceId which refers to the GCS root (gs://). Private because
+   * all external users should just use the singleton StorageResourceId.ROOT.
+   */
+  private StorageResourceId() {
+    this.bucketName = null;
+    this.objectName = null;
+    this.readableString = createReadableString(bucketName, objectName);
+  }
+
+  /**
+   * Constructor for a StorageResourceId representing a Bucket; {@code getObjectName()} will return
+   * null for a StorageResourceId which represents a Bucket.
+   *
+   * @param bucketName The bucket name of the resource. Must be non-empty and non-null.
+   */
+  public StorageResourceId(String bucketName) {
+    Preconditions.checkArgument(!Strings.isNullOrEmpty(bucketName),
+        "bucketName must not be null or empty");
+
+    this.bucketName = bucketName;
+    this.objectName = null;
+    this.readableString = createReadableString(bucketName, objectName);
+  }
+
+  /**
+   * Constructor for a StorageResourceId representing a full StorageObject, including bucketName
+   * and objectName.
+   *
+   * @param bucketName The bucket name of the resource. Must be non-empty and non-null.
+   * @param objectName The object name of the resource. Must be non-empty and non-null.
+   */
+  public StorageResourceId(String bucketName, String objectName) {
+    Preconditions.checkArgument(!Strings.isNullOrEmpty(bucketName),
+        "bucketName must not be null or empty");
+    Preconditions.checkArgument(!Strings.isNullOrEmpty(objectName),
+        "objectName must not be null or empty");
+
+    this.bucketName = bucketName;
+    this.objectName = objectName;
+    this.readableString = createReadableString(bucketName, objectName);
+  }
+
+  /**
+   * Returns true if this StorageResourceId represents a GCS StorageObject; if true, both
+   * {@code getBucketName} and {@code getObjectName} will be non-empty and non-null.
+   */
+  public boolean isStorageObject() {
+    return bucketName != null && objectName != null;
+  }
+
+  /**
+   * Returns true if this StorageResourceId represents a GCS Bucket; if true, then {@code
+   * getObjectName} will return null.
+   */
+  public boolean isBucket() {
+    return bucketName != null && objectName == null;
+  }
+
+  /**
+   * Returns true if this StorageResourceId represents the GCS root (gs://); if true, then
+   * both {@code getBucketName} and {@code getObjectName} will be null.
+   */
+  public boolean isRoot() {
+    return bucketName == null && objectName == null;
+  }
+
+  /**
+   * Gets the bucket name component of this resource identifier.
+   */
+  public String getBucketName() {
+    return bucketName;
+  }
+
+  /**
+   * Gets the object name component of this resource identifier.
+   */
+  public String getObjectName() {
+    return objectName;
+  }
+
+  /**
+   * Returns a string of the form gs://<bucketName>/<objectName>.
+   */
+  @Override
+  public String toString() {
+    return readableString;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (obj instanceof StorageResourceId) {
+      StorageResourceId other = (StorageResourceId) obj;
+      return Objects.equals(bucketName, other.bucketName)
+          && Objects.equals(objectName, other.objectName);
+    }
+    return false;
+  }
+
+  @Override
+  public int hashCode() {
+    return readableString.hashCode();
+  }
+
+  /**
+   * Helper for standardizing the way various human-readable messages in logs/exceptions which refer
+   * to a bucket/object pair.
+   */
+  public static String createReadableString(String bucketName, String objectName) {
+    if (bucketName == null && objectName == null) {
+      // TODO: Unify this method with other methods which convert bucketName/objectName
+      // to a URI; maybe use the single slash for compatibility.
+      return "gs://";
+    } else if (bucketName != null && objectName == null) {
+      return String.format("gs://%s", bucketName);
+    } else if (bucketName != null && objectName != null) {
+      return String.format("gs://%s/%s", bucketName, objectName);
+    }
+    throw new IllegalArgumentException(
+        String.format("Invalid bucketName/objectName pair: gs://%s/%s", bucketName, objectName));
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/package-info.java
new file mode 100644
index 0000000000000..98fdc44113a34
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/package-info.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/** Defines utilities used by the Dataflow SDK. **/
+package com.google.cloud.dataflow.sdk.util;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
new file mode 100644
index 0000000000000..3caed1a8bcce9
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+
+/**
+ * A {@link TupleTag} combined with the {@link Coder} to use for
+ * values associated with the tag.
+ *
+ * <p> Used as tags in
+ * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState}.
+ *
+ * @param <T> the type of the values associated with this tag
+ */
+public class CodedTupleTag<T> extends TupleTag<T> {
+  /**
+   * Returns a {@code CodedTupleTag} with the given id which uses the
+   * given {@code Coder} whenever a value associated with the tag
+   * needs to be serialized.
+   *
+   * <p> It is up to the user to ensure that two
+   * {@code CodedTupleTag}s with the same id actually mean the same
+   * tag and carry the same generic type parameter.  Violating this
+   * invariant can lead to hard-to-diagnose runtime type errors.
+   *
+   * <p> (An explicit id is required so that persistent keyed state
+   * saved by one run of a streaming program can be reused if that
+   * streaming program is upgraded to a new version.)
+   *
+   * @param <T> the type of the values associated with the tag
+   */
+  public static <T> CodedTupleTag<T> of(String id, Coder<T> coder) {
+    return new CodedTupleTag(id, coder);
+  }
+
+  /**
+   * Returns the {@code Coder} used for values associated with this tag.
+   */
+  public Coder<T> getCoder() {
+    return coder;
+  }
+
+
+  ///////////////////////////////////////////////
+
+  private final Coder<T> coder;
+
+  CodedTupleTag(String id, Coder<T> coder) {
+    super(id);
+    this.coder = coder;
+  }
+
+  @Override
+  public String toString() {
+    return "CodedTupleTag<" + getId() + ", " + coder + ">";
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java
new file mode 100644
index 0000000000000..6f96c694ea2e6
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import java.util.Map;
+
+/**
+ * A mapping of {@link CodedTupleTag}s to associated values.
+ *
+ * <p> Returned by
+ * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState#lookup(java.util.List)}.
+ */
+public class CodedTupleTagMap {
+  /**
+   * Returns a {@code CodedTupleTagMap} containing the given mappings.
+   *
+   * <p> It is up to the caller to ensure that the value associated
+   * with each CodedTupleTag in the map has the static type specified
+   * by that tag.
+   *
+   * <p> Intended for internal use only.
+   */
+  public static CodedTupleTagMap of(Map<CodedTupleTag<?>, Object> map) {
+    // TODO: Should we copy the Map here, to insulate this
+    // map from any changes to the original argument?
+    return new CodedTupleTagMap(map);
+  }
+
+  /**
+   * Returns the value associated with the given tag in this
+   * {@code CodedTupleTagMap}, or {@code null} if the tag has no
+   * asssociated value.
+   */
+  public <T> T get(CodedTupleTag<T> tag) {
+    return (T) map.get(tag);
+  }
+
+  //////////////////////////////////////////////
+
+  private Map<CodedTupleTag<?>, Object> map;
+
+  CodedTupleTagMap(Map<CodedTupleTag<?>, Object> map) {
+    this.map = map;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
new file mode 100644
index 0000000000000..d354707ebb0c0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import java.io.Serializable;
+import java.util.Comparator;
+
+/**
+ * An immutable key/value pair.
+ *
+ * <p> Various
+ * {@link com.google.cloud.dataflow.sdk.transforms.PTransform}s like
+ * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} and
+ * {@link com.google.cloud.dataflow.sdk.transforms.Combine#perKey}
+ * work on {@link PCollection}s of KVs.
+ *
+ * @param <K> the type of the key
+ * @param <V> the type of the value
+ */
+public class KV<K, V> implements Serializable {
+  /** Returns a KV with the given key and value. */
+  public static <K, V> KV<K, V> of(K key, V value) {
+    return new KV<>(key, value);
+  }
+
+  /** Returns the key of this KV. */
+  public K getKey() {
+    return key;
+  }
+
+  /** Returns the value of this KV. */
+  public V getValue() {
+    return value;
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  final K key;
+  final V value;
+
+  private KV(K key, V value) {
+    this.key = key;
+    this.value = value;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o instanceof KV) {
+      KV<?, ?> that = (KV<?, ?>) o;
+      return (this.key == null ? that.key == null
+              : this.key.equals(that.key))
+          && (this.value == null ? that.value == null
+              : this.value.equals(that.value));
+    }
+    return false;
+  }
+
+  /** Orders the KV by the key. A null key is less than any non-null key. */
+  public static class OrderByKey<K extends Comparable<? super K>, V> implements
+      Comparator<KV<K, V>>, Serializable {
+    @Override
+    public int compare(KV<K, V> a, KV<K, V> b) {
+      if (a.key == null) {
+        return b.key == null ? 0 : -1;
+      } else if (b.key == null) {
+        return 1;
+      } else {
+        return a.key.compareTo(b.key);
+      }
+    }
+  }
+
+  /** Orders the KV by the value. A null value is less than any non-null value. */
+  public static class OrderByValue<K, V extends Comparable<? super V>>
+      implements Comparator<KV<K, V>>, Serializable {
+    @Override
+    public int compare(KV<K, V> a, KV<K, V> b) {
+      if (a.value == null) {
+        return b.value == null ? 0 : -1;
+      } else if (b.value == null) {
+        return 1;
+      } else {
+        return a.value.compareTo(b.value);
+      }
+    }
+  }
+
+  @Override
+  public int hashCode() {
+    return getClass().hashCode()
+        + (key == null ? 0 : key.hashCode())
+        + (value == null ? 0 : value.hashCode());
+  }
+
+  @Override
+  public String toString() {
+    return "KV(" + key + ", " + value + ")";
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
new file mode 100644
index 0000000000000..fc3f179fc1765
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+
+import java.util.Collection;
+import java.util.Collections;
+
+/**
+ * {@code PBegin} is used as the "input" to a root
+ * {@link com.google.cloud.dataflow.sdk.transforms.PTransform} which
+ * is the first operation in a {@link Pipeline}, such as
+ * {@link com.google.cloud.dataflow.sdk.io.TextIO.Read} or
+ * {@link com.google.cloud.dataflow.sdk.transforms.Create}.
+ *
+ * <p> Typically created by calling {@link Pipeline#begin} on a Pipeline.
+ */
+public class PBegin implements PInput {
+  /**
+   * Returns a {@code PBegin} in the given {@code Pipeline}.
+   */
+  public static PBegin in(Pipeline pipeline) {
+    return new PBegin(pipeline);
+  }
+
+  /**
+   * Applies the given PTransform to this input PBegin, and
+   * returns the PTransform's Output.
+   */
+  public <Output extends POutput> Output apply(
+      PTransform<? super PBegin, Output> t) {
+    return Pipeline.applyTransform(this, t);
+  }
+
+  @Override
+  public Pipeline getPipeline() {
+    return pipeline;
+  }
+
+  @Override
+  public Collection<? extends PValue> expand() {
+    // A PBegin contains no PValues.
+    return Collections.emptyList();
+  }
+
+  @Override
+  public void finishSpecifying() {
+    // Nothing more to be done.
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Constructs a {@code PBegin} in the given {@code Pipeline}.
+   */
+  protected PBegin(Pipeline pipeline) {
+    this.pipeline = pipeline;
+  }
+
+  private Pipeline pipeline;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
new file mode 100644
index 0000000000000..fc4b0886b7d5b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
@@ -0,0 +1,240 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.common.reflect.TypeToken;
+
+/**
+ * A {@code PCollection<T>} is an immutable collection of values of type
+ * {@code T}.  A {@code PCollection} can contain either a bounded or unbounded
+ * number of elements.  Bounded and unbounded {@code PCollection}s are produced
+ * as the output of {@link com.google.cloud.dataflow.sdk.transforms.PTransform}s
+ * (including root PTransforms like
+ * {@link com.google.cloud.dataflow.sdk.io.TextIO.Read},
+ * {@link com.google.cloud.dataflow.sdk.io.PubsubIO.Read} and
+ * {@link com.google.cloud.dataflow.sdk.transforms.Create}), and can
+ * be passed as the inputs of other PTransforms.
+ *
+ * <p> Some root transforms produce bounded {@code PCollections} and others
+ * produce unbounded ones.  For example,
+ * {@link com.google.cloud.dataflow.sdk.io.TextIO.Read} reads a static set
+ * of files, so it produces a bounded {@code PCollection}.
+ * {@link com.google.cloud.dataflow.sdk.io.PubsubIO.Read}, on the other hand,
+ * receives a potentially infinite stream of Pubsub messages, so it produces
+ * an unbounded {@code PCollection}.
+ *
+ * <p> Each element in a {@code PCollection} may have an associated implicit
+ * timestamp.  Sources assign timestamps to elements when they create
+ * {@code PCollection}s, and other {@code PTransform}s propagate these
+ * timestamps from their input to their output. For example, PubsubIO.Read
+ * assigns pubsub message timestamps to elements, and TextIO.Read assigns
+ * the default value {@code Long.MIN_VALUE} to elements. User code can
+ * explicitly assign timestamps to elements with
+ * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#outputWithTimestamp}.
+ *
+ * <p> Additionally, a {@code PCollection} has an associated
+ * {@link WindowingFn} and each element is assigned to a set of windows.
+ * By default, the windowing function is
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow}
+ * and all elements are assigned into a single default window.
+ * This default can be overridden with the
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window}
+ * {@code PTransform}. Dataflow pipelines run in classic batch MapReduce style
+ * with the default GlobalWindow strategy if timestamps are ignored.
+ *
+ * <p> See the individual {@code PTransform} subclasses for specific information
+ * on how they propagate timestamps and windowing.
+ *
+ * @param <T> the type of the elements of this PCollection
+ */
+public class PCollection<T> extends TypedPValue<T> {
+  /**
+   * Returns the name of this PCollection.
+   *
+   * <p> By default, the name of a PCollection is based on the name of the
+   * PTransform that produces it.  It can be specified explicitly by
+   * calling {@link #setName}.
+   *
+   * @throws IllegalStateException if the name hasn't been set yet
+   */
+  @Override
+  public String getName() {
+    return super.getName();
+  }
+
+  /**
+   * Sets the name of this PCollection.  Returns {@code this}.
+   *
+   * @throws IllegalStateException if this PCollection has already been
+   * finalized and is no longer settable, e.g., by having
+   * {@code apply()} called on it
+   */
+  @Override
+  public PCollection<T> setName(String name) {
+    super.setName(name);
+    return this;
+  }
+
+  /**
+   * Returns the Coder used by this PCollection to encode and decode
+   * the values stored in it.
+   *
+   * @throws IllegalStateException if the Coder hasn't been set, and
+   * couldn't be inferred
+   */
+  @Override
+  public Coder<T> getCoder() {
+    return super.getCoder();
+  }
+
+  /**
+   * Sets the Coder used by this PCollection to encode and decode the
+   * values stored in it.  Returns {@code this}.
+   *
+   * @throws IllegalStateException if this PCollection has already
+   * been finalized and is no longer settable, e.g., by having
+   * {@code apply()} called on it
+   */
+  @Override
+  public PCollection<T> setCoder(Coder<T> coder) {
+    super.setCoder(coder);
+    return this;
+  }
+
+  /**
+   * Returns whether or not the elements of this PCollection have a
+   * well-defined and fixed order, such that subsequent reading of the
+   * PCollection is guaranteed to process the elements in order.
+   *
+   * <p> Requiring a fixed order can limit optimization opportunities.
+   *
+   * <p> By default, PCollections do not have a well-defined or fixed order.
+   */
+  public boolean isOrdered() {
+    return isOrdered;
+  }
+
+  /**
+   * Sets whether or not this PCollection should preserve the order in
+   * which elements are put in it, such that subsequent parallel
+   * reading of the PCollection is guaranteed to process the elements
+   * in order.
+   *
+   * <p> Requiring a fixed order can limit optimization opportunities.
+   *
+   * <p> Returns {@code this}.
+   *
+   * @throws IllegalStateException if this PCollection has already
+   * been finalized and is no longer settable, e.g., by having
+   * {@code apply()} called on it
+   */
+  public PCollection<T> setOrdered(boolean isOrdered) {
+    if (this.isOrdered != isOrdered) {
+      if (isFinishedSpecifyingInternal()) {
+        throw new IllegalStateException(
+            "cannot change the orderedness of " + this +
+            " once it's been used");
+      }
+      this.isOrdered = isOrdered;
+    }
+    return this;
+  }
+
+  /**
+   * Applies the given PTransform to this input PCollection, and
+   * returns the PTransform's Output.
+   */
+  public <Output extends POutput> Output apply(
+      PTransform<? super PCollection<T>, Output> t) {
+    return Pipeline.applyTransform(this, t);
+  }
+
+  /**
+   * Returns the {@link WindowingFn} of this {@code PCollection}.
+   */
+  public WindowingFn getWindowingFn() {
+    return windowingFn;
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Internal details below here.
+
+  /**
+   * Whether or not the elements of this PCollection have a
+   * well-defined and fixed order, such that subsequent reading of the
+   * PCollection is guaranteed to process the elements in order.
+   */
+  private boolean isOrdered = false;
+
+  /**
+   * {@link WindowingFn} that will be used to merge windows in
+   * this {@code PCollection} and subsequent {@code PCollection}s produced
+   * from this one.
+   *
+   * <p> By default, no merging is performed.
+   */
+  private WindowingFn windowingFn;
+
+  private PCollection() {}
+
+  /**
+   * Sets the {@code TypeToken<T>} for this {@code PCollection<T>}, so that
+   * the enclosing {@code PCollectionTuple}, {@code PCollectionList<T>},
+   * or {@code PTransform<?, PCollection<T>>}, etc., can provide
+   * more detailed reflective information.
+   */
+  @Override
+  public PCollection<T> setTypeTokenInternal(TypeToken<T> typeToken) {
+    super.setTypeTokenInternal(typeToken);
+    return this;
+  }
+
+  /**
+   * Sets the {@link WindowingFn} of this {@code PCollection}.
+   *
+   * <p> For use by primitive transformations only.
+   */
+  public PCollection<T> setWindowingFnInternal(WindowingFn windowingFn) {
+     this.windowingFn = windowingFn;
+     return this;
+  }
+
+  /**
+   * Sets the {@link Pipeline} for this {@code PCollection}.
+   *
+   * <p> For use by primitive transformations only.
+   */
+  @Override
+  public PCollection<T> setPipelineInternal(Pipeline pipeline) {
+    super.setPipelineInternal(pipeline);
+    return this;
+  }
+
+  /**
+   * Creates and returns a new PCollection for a primitive output.
+   *
+   * <p> For use by primitive transformations only.
+   */
+  public static <T> PCollection<T> createPrimitiveOutputInternal(
+      WindowingFn windowingFn) {
+    return new PCollection<T>().setWindowingFnInternal(windowingFn);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
new file mode 100644
index 0000000000000..26b7300a9341a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.common.collect.ImmutableList;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * A {@code PCollectionList<T>} is an immutable list of homogeneously
+ * typed {@code PCollection<T>}s.  A PCollectionList is used, for
+ * instance, as the input to
+ * {@link com.google.cloud.dataflow.sdk.transforms.Flatten} or the
+ * output of
+ * {@link com.google.cloud.dataflow.sdk.transforms.Partition}.
+ *
+ * <p> PCollectionLists can be created and accessed like follows:
+ * <pre> {@code
+ * PCollection<String> pc1 = ...;
+ * PCollection<String> pc2 = ...;
+ * PCollection<String> pc3 = ...;
+ *
+ * // Create a PCollectionList with three PCollections:
+ * PCollectionList<String> pcs = PCollectionList.of(pc1).and(pc2).and(pc3);
+ *
+ * // Create an empty PCollectionList:
+ * Pipeline p = ...;
+ * PCollectionList<String> pcs2 = PCollectionList.<String>empty(p);
+ *
+ * // Get PCollections out of a PCollectionList, by index (origin 0):
+ * PCollection<String> pcX = pcs.get(1);
+ * PCollection<String> pcY = pcs.get(0);
+ * PCollection<String> pcZ = pcs.get(2);
+ *
+ * // Get a list of all PCollections in a PCollectionList:
+ * List<PCollection<String>> allPcs = pcs.getAll();
+ * } </pre>
+ *
+ * @param <T> the type of the elements of all the PCollections in this list
+ */
+public class PCollectionList<T> implements PInput, POutput {
+  /**
+   * Returns an empty PCollectionList that is part of the given Pipeline.
+   *
+   * <p> Longer PCollectionLists can be created by calling
+   * {@link #and} on the result.
+   */
+  public static <T> PCollectionList<T> empty(Pipeline pipeline) {
+    return new PCollectionList<>(pipeline);
+  }
+
+  /**
+   * Returns a singleton PCollectionList containing the given PCollection.
+   *
+   * <p> Longer PCollectionLists can be created by calling
+   * {@link #and} on the result.
+   */
+  public static <T> PCollectionList<T> of(PCollection<T> pc) {
+    return new PCollectionList<T>(pc.getPipeline()).and(pc);
+  }
+
+  /**
+   * Returns a PCollectionList containing the given PCollections, in order.
+   *
+   * <p> The argument list cannot be empty.
+   *
+   * <p> All the PCollections in the resulting PCollectionList must be
+   * part of the same Pipeline.
+   *
+   * <p> Longer PCollectionLists can be created by calling
+   * {@link #and} on the result.
+   */
+  public static <T> PCollectionList<T> of(Iterable<PCollection<T>> pcs) {
+    Iterator<PCollection<T>> pcsIter = pcs.iterator();
+    if (!pcsIter.hasNext()) {
+      throw new IllegalArgumentException(
+          "must either have a non-empty list of PCollections, " +
+          "or must first call empty(Pipeline)");
+    }
+    return new PCollectionList<T>(pcsIter.next().getPipeline()).and(pcs);
+  }
+
+  /**
+   * Returns a new PCollectionList that has all the PCollections of
+   * this PCollectionList plus the given PCollection appended to the end.
+   *
+   * <p> All the PCollections in the resulting PCollectionList must be
+   * part of the same Pipeline.
+   */
+  public PCollectionList<T> and(PCollection<T> pc) {
+    if (pc.getPipeline() != pipeline) {
+      throw new IllegalArgumentException(
+          "PCollections come from different Pipelines");
+    }
+    return new PCollectionList<>(pipeline,
+        new ImmutableList.Builder<PCollection<T>>()
+            .addAll(pcollections)
+            .add(pc)
+            .build());
+  }
+
+  /**
+   * Returns a new PCollectionList that has all the PCollections of
+   * this PCollectionList plus the given PCollections appended to the end,
+   * in order.
+   *
+   * <p> All the PCollections in the resulting PCollectionList must be
+   * part of the same Pipeline.
+   */
+  public PCollectionList<T> and(Iterable<PCollection<T>> pcs) {
+    List<PCollection<T>> copy = new ArrayList<>(pcollections);
+    for (PCollection<T> pc : pcs) {
+      if (pc.getPipeline() != pipeline) {
+        throw new IllegalArgumentException(
+            "PCollections come from different Pipelines");
+      }
+      copy.add(pc);
+    }
+    return new PCollectionList<>(pipeline, copy);
+  }
+
+  /**
+   * Returns the number of PCollections in this PCollectionList.
+   */
+  public int size() {
+    return pcollections.size();
+  }
+
+  /**
+   * Returns the PCollection at the given index (origin zero).  Throws
+   * IndexOutOfBounds if the index is out of the range
+   * {@code [0..size()-1]}.
+   */
+  public PCollection<T> get(int index) {
+    return pcollections.get(index);
+  }
+
+  /**
+   * Returns an immutable List of all the PCollections in this PCollectionList.
+   */
+  public List<PCollection<T>> getAll() {
+    return pcollections;
+  }
+
+  /**
+   * Applies the given PTransform to this input {@code PCollectionList<T>},
+   * and returns the PTransform's Output.
+   */
+  public <Output extends POutput> Output apply(
+      PTransform<PCollectionList<T>, Output> t) {
+    return Pipeline.applyTransform(this, t);
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Internal details below here.
+
+  final Pipeline pipeline;
+  final List<PCollection<T>> pcollections;
+
+  PCollectionList(Pipeline pipeline) {
+    this(pipeline, new ArrayList<PCollection<T>>());
+  }
+
+  PCollectionList(Pipeline pipeline, List<PCollection<T>> pcollections) {
+    this.pipeline = pipeline;
+    this.pcollections = Collections.unmodifiableList(pcollections);
+  }
+
+  @Override
+  public Pipeline getPipeline() {
+    return pipeline;
+  }
+
+  @Override
+  public Collection<? extends PValue> expand() {
+    return pcollections;
+  }
+
+  @Override
+  public void recordAsOutput(Pipeline pipeline,
+                             PTransform<?, ?> transform) {
+    if (this.pipeline != null && this.pipeline != pipeline) {
+      throw new AssertionError(
+          "not expecting to change the Pipeline owning a PCollectionList");
+    }
+    int i = 0;
+    for (PCollection<T> pc : pcollections) {
+      pc.recordAsOutput(pipeline, transform, "out" + i);
+      i++;
+    }
+  }
+
+  @Override
+  public void finishSpecifying() {
+    for (PCollection<T> pc : pcollections) {
+      pc.finishSpecifying();
+    }
+  }
+
+  @Override
+  public void finishSpecifyingOutput() {
+    for (PCollection<T> pc : pcollections) {
+      pc.finishSpecifyingOutput();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
new file mode 100644
index 0000000000000..fecc175f4d3cc
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
@@ -0,0 +1,252 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.reflect.TypeToken;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+/**
+ * A {@code PCollectionTuple} is an immutable tuple of
+ * heterogeneously-typed {@link PCollection}s, "keyed" by
+ * {@link TupleTag}s.  A PCollectionTuple can be used as the input or
+ * output of a
+ * {@link com.google.cloud.dataflow.sdk.transforms.PTransform} taking
+ * or producing multiple PCollection inputs or outputs that can be of
+ * different types, for instance a
+ * {@link com.google.cloud.dataflow.sdk.transforms.ParDo} with side
+ * outputs.
+ *
+ * <p> PCollectionTuples can be created and accessed like follows:
+ * <pre> {@code
+ * PCollection<String> pc1 = ...;
+ * PCollection<Integer> pc2 = ...;
+ * PCollection<Iterable<String>> pc3 = ...;
+ *
+ * // Create TupleTags for each of the PCollections to put in the
+ * // PCollectionTuple (the type of the TupleTag enables tracking the
+ * // static type of each of the PCollections in the PCollectionTuple):
+ * TupleTag<String> tag1 = new TupleTag<>();
+ * TupleTag<Integer> tag2 = new TupleTag<>();
+ * TupleTag<Iterable<String>> tag3 = new TupleTag<>();
+ *
+ * // Create a PCollectionTuple with three PCollections:
+ * PCollectionTuple pcs =
+ *     PCollectionTuple.of(tag1, pc1)
+ *                     .and(tag2, pc2)
+ *                     .and(tag3, pc3);
+ *
+ * // Create an empty PCollectionTuple:
+ * Pipeline p = ...;
+ * PCollectionTuple pcs2 = PCollectionTuple.empty(p);
+ *
+ * // Get PCollections out of a PCollectionTuple, using the same tags
+ * // that were used to put them in:
+ * PCollection<Integer> pcX = pcs.get(tag2);
+ * PCollection<String> pcY = pcs.get(tag1);
+ * PCollection<Iterable<String>> pcZ = pcs.get(tag3);
+ *
+ * // Get a map of all PCollections in a PCollectionTuple:
+ * Map<TupleTag<?>, PCollection<?>> allPcs = pcs.getAll();
+ * } </pre>
+ */
+public class PCollectionTuple implements PInput, POutput {
+  /**
+   * Returns an empty PCollectionTuple that is part of the given Pipeline.
+   *
+   * <p> Longer PCollectionTuples can be created by calling
+   * {@link #and} on the result.
+   */
+  public static PCollectionTuple empty(Pipeline pipeline) {
+    return new PCollectionTuple(pipeline);
+  }
+
+  /**
+   * Returns a singleton PCollectionTuple containing the given
+   * PCollection keyed by the given TupleTag.
+   *
+   * <p> Longer PCollectionTuples can be created by calling
+   * {@link #and} on the result.
+   */
+  public static <T> PCollectionTuple of(TupleTag<T> tag, PCollection<T> pc) {
+    return empty(pc.getPipeline()).and(tag, pc);
+  }
+
+  /**
+   * Returns a new PCollectionTuple that has all the PCollections and
+   * tags of this PCollectionTuple plus the given PCollection and tag.
+   *
+   * <p> The given TupleTag should not already be mapped to a
+   * PCollection in this PCollectionTuple.
+   *
+   * <p> All the PCollections in the resulting PCollectionTuple must be
+   * part of the same Pipeline.
+   */
+  public <T> PCollectionTuple and(TupleTag<T> tag, PCollection<T> pc) {
+    if (pc.getPipeline() != pipeline) {
+      throw new IllegalArgumentException(
+          "PCollections come from different Pipelines");
+    }
+
+    // The TypeToken<T> in tag will often have good
+    // reflective information about T
+    pc.setTypeTokenInternal(tag.getTypeToken());
+    return new PCollectionTuple(pipeline,
+        new ImmutableMap.Builder<TupleTag<?>, PCollection<?>>()
+            .putAll(pcollectionMap)
+            .put(tag, pc)
+            .build());
+  }
+
+  /**
+   * Returns whether this PCollectionTuple contains a PCollection with
+   * the given tag.
+   */
+  public <T> boolean has(TupleTag<T> tag) {
+    return pcollectionMap.containsKey(tag);
+  }
+
+  /**
+   * Returns the PCollection with the given tag in this
+   * PCollectionTuple.  Throws IllegalArgumentException if there is no
+   * such PCollection, i.e., {@code !has(tag)}.
+   */
+  public <T> PCollection<T> get(TupleTag<T> tag) {
+    @SuppressWarnings("unchecked")
+    PCollection<T> pcollection = (PCollection<T>) pcollectionMap.get(tag);
+    if (pcollection == null) {
+      throw new IllegalArgumentException(
+          "TupleTag not found in this PCollectionTuple tuple");
+    }
+    return pcollection;
+  }
+
+  /**
+   * Returns an immutable Map from TupleTag to corresponding
+   * PCollection, for all the members of this PCollectionTuple.
+   */
+  public Map<TupleTag<?>, PCollection<?>> getAll() {
+    return pcollectionMap;
+  }
+
+  /**
+   * Applies the given PTransform to this input PCollectionTuple, and
+   * returns the PTransform's Output.
+   */
+  public <Output extends POutput> Output apply(
+      PTransform<PCollectionTuple, Output> t) {
+    return Pipeline.applyTransform(this, t);
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Internal details below here.
+
+  Pipeline pipeline;
+  final Map<TupleTag<?>, PCollection<?>> pcollectionMap;
+
+  PCollectionTuple(Pipeline pipeline) {
+    this(pipeline, new LinkedHashMap<TupleTag<?>, PCollection<?>>());
+  }
+
+  PCollectionTuple(Pipeline pipeline,
+                   Map<TupleTag<?>, PCollection<?>> pcollectionMap) {
+    this.pipeline = pipeline;
+    this.pcollectionMap = Collections.unmodifiableMap(pcollectionMap);
+  }
+
+  /**
+   * Returns a PCollectionTuple with each of the given tags mapping to a new
+   * output PCollection.
+   *
+   * <p> For use by primitive transformations only.
+   */
+  public static PCollectionTuple ofPrimitiveOutputsInternal(
+      TupleTagList outputTags, WindowingFn windowingFn) {
+    Map<TupleTag<?>, PCollection<?>> pcollectionMap = new LinkedHashMap<>();
+    for (TupleTag<?> outputTag : outputTags.tupleTags) {
+      if (pcollectionMap.containsKey(outputTag)) {
+        throw new IllegalArgumentException(
+            "TupleTag already present in this tuple");
+      }
+
+      // In fact, `token` and `outputCollection` should have
+      // types TypeToken<T> and PCollection<T> for some
+      // unknown T. It is safe to create `outputCollection`
+      // with type PCollection<Object> because it has the same
+      // erasure as the correct type. When a transform adds
+      // elements to `outputCollection` they will be of type T.
+      @SuppressWarnings("unchecked")
+      TypeToken<Object> token = (TypeToken<Object>) outputTag.getTypeToken();
+      PCollection<Object> outputCollection = PCollection
+          .createPrimitiveOutputInternal(windowingFn)
+          .setTypeTokenInternal(token);
+
+      pcollectionMap.put(outputTag, outputCollection);
+    }
+    return new PCollectionTuple(null, pcollectionMap);
+  }
+
+  @Override
+  public Pipeline getPipeline() {
+    return pipeline;
+  }
+
+  @Override
+  public Collection<? extends PValue> expand() {
+    return pcollectionMap.values();
+  }
+
+  @Override
+  public void recordAsOutput(Pipeline pipeline,
+                             PTransform<?, ?> transform) {
+    if (this.pipeline != null && this.pipeline != pipeline) {
+      throw new AssertionError(
+          "not expecting to change the Pipeline owning a PCollectionTuple");
+    }
+    this.pipeline = pipeline;
+    int i = 0;
+    for (Map.Entry<TupleTag<?>, PCollection<?>> entry
+             : pcollectionMap.entrySet()) {
+      TupleTag<?> tag = entry.getKey();
+      PCollection<?> pc = entry.getValue();
+      pc.recordAsOutput(pipeline, transform, tag.getOutName(i));
+      i++;
+    }
+  }
+
+  @Override
+  public void finishSpecifying() {
+    for (PCollection<?> pc : pcollectionMap.values()) {
+      pc.finishSpecifying();
+    }
+  }
+
+  @Override
+  public void finishSpecifyingOutput() {
+    for (PCollection<?> pc : pcollectionMap.values()) {
+      pc.finishSpecifyingOutput();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
new file mode 100644
index 0000000000000..d19854ccc588b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+
+import java.io.Serializable;
+
+/**
+ * A {@code PCollectionView<T, WT>} is an immutable view of a
+ * {@link PCollection} that can be accessed e.g. as a
+ * side input to a {@link DoFn}.
+ *
+ * <p> A {@PCollectionView} should always be the output of a {@link PTransform}. It is
+ * the joint responsibility of this transform and each {@link PipelineRunner} to
+ * implement the view in a runner-specific manner.
+ *
+ * @param <T> the type of the value(s) accessible via this {@code PCollectionView}
+ * @param <WT> the type of the windowed value(s) accessible via this {@code PCollectionView}
+ */
+public interface PCollectionView<T, WT> extends PValue, Serializable {
+  /**
+   * A unique identifier, for internal use.
+   */
+  public TupleTag<Iterable<WindowedValue<?>>> getTagInternal();
+
+  /**
+   * For internal use only.
+   */
+  public T fromIterableInternal(Iterable<WindowedValue<?>> contents);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PDone.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PDone.java
new file mode 100644
index 0000000000000..dda48fc530a8c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PDone.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import java.util.Collection;
+import java.util.Collections;
+
+/**
+ * {@code PDone} is the output of a
+ * {@link com.google.cloud.dataflow.sdk.transforms.PTransform} that
+ * doesn't have a non-trival result, e.g., a Write.  No more
+ * transforms can be applied to it.
+ */
+public class PDone extends POutputValueBase {
+  public PDone() {}
+
+  @Override
+  public Collection<? extends PValue> expand() {
+    // A PDone contains no PValues.
+    return Collections.emptyList();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java
new file mode 100644
index 0000000000000..6d86fb069535e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+
+import java.util.Collection;
+
+/**
+ * The abstract interface of things that might be input to a
+ * {@link com.google.cloud.dataflow.sdk.transforms.PTransform}.
+ */
+public interface PInput {
+  /**
+   * Returns the owning Pipeline of this PInput.
+   *
+   * @throws IllegalStateException if the owning Pipeline hasn't been
+   * set yet
+   */
+  public Pipeline getPipeline();
+
+  /**
+   * Expands this PInput into a list of its component input PValues.
+   *
+   * <p> A PValue expands to itself.
+   *
+   * <p> A tuple or list of PValues (e.g.,
+   * PCollectionTuple, and PCollectionList) expands to its component
+   * PValues.
+   *
+   * <p> Not intended to be invoked directly by user code.
+   */
+  public Collection<? extends PValue> expand();
+
+  /**
+   * <p> After building, finalizes this PInput to make it ready for
+   * being used as an input to a PTransform.
+   *
+   * <p> Automatically invoked whenever {@code apply()} is invoked on
+   * this PInput, so users do not normally call this explicitly.
+   */
+  public void finishSpecifying();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
new file mode 100644
index 0000000000000..3b3264985d559
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+
+import java.util.Collection;
+
+/**
+ * The abstract interface of things that might be output from a
+ * {@link PTransform}.
+ */
+public interface POutput {
+  /**
+   * Expands this {@code POutput} into a list of its component output
+   * {@code PValue}s.
+   *
+   * <p> A {@link PValue} expands to itself.
+   *
+   * <p> A tuple or list of {@code PValue}s (e.g.,
+   * {@link PCollectionTuple}, and
+   * {@link PCollectionList}) expands to its component {@code PValue}s.
+   *
+   * <p> Not intended to be invoked directly by user code.
+   */
+  public Collection<? extends PValue> expand();
+
+  /**
+   * Records that this {@code POutput} is an output of the given
+   * {@code PTransform} in the given {@code Pipeline}.
+   *
+   * <p> Should expand this {@code POutput} and invoke
+   * {@link PValue#recordAsOutput(Pipeline,
+   * com.google.cloud.dataflow.sdk.transforms.PTransform,
+   * String)} on each component output {@code PValue}.
+   *
+   * <p> Automatically invoked as part of applying a
+   * {@code PTransform}.  Not to be invoked directly by user code.
+   */
+  public void recordAsOutput(Pipeline pipeline,
+                             PTransform<?, ?> transform);
+
+  /**
+   * As part of finishing the producing {@code PTransform}, finalizes this
+   * {@code PTransform} output to make it ready for being used as an input and
+   * for running.
+   *
+   * <p> This includes ensuring that all {@code PCollection}s
+   * have {@code Coder}s specified or defaulted.
+   *
+   * <p> Automatically invoked whenever this {@code POutput} is used
+   * as a {@code PInput} to another {@code PTransform}, or if never
+   * used as a {@code PInput}, when {@link Pipeline#run} is called, so
+   * users do not normally call this explicitly.
+   */
+  public void finishSpecifyingOutput();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
new file mode 100644
index 0000000000000..0401393f142b8
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+
+/**
+ * A {@code POutputValueBase} is the abstract base class of
+ * {@code PTransform} outputs.
+ *
+ * <p> A {@code PValueBase} that adds tracking of its producing
+ * {@code PTransform}.
+ *
+ * <p> For internal use.
+ */
+public abstract class POutputValueBase implements POutput {
+
+  protected POutputValueBase() { }
+
+  /**
+   * Returns the {@code PTransform} that this {@code POutputValueBase}
+   * is an output of.
+   *
+   * <p> For internal use only.
+   */
+  public PTransform<?, ?> getProducingTransformInternal() {
+    return producingTransform;
+  }
+
+  /**
+   * Records that this {@code POutputValueBase} is an output with the
+   * given name of the given {@code PTransform} in the given
+   * {@code Pipeline}.
+   *
+   * <p> To be invoked only by {@link POutput#recordAsOutput}
+   * implementations.  Not to be invoked directly by user code.
+   */
+  public void recordAsOutput(Pipeline pipeline,
+                             PTransform<?, ?> transform) {
+    if (producingTransform != null) {
+      // Already used this POutput as a PTransform output.  This can
+      // happen if the POutput is an output of a transform within a
+      // composite transform, and is also the result of the composite.
+      // We want to record the "immediate" atomic transform producing
+      // this output, and ignore all later composite transforms that
+      // also produce this output.
+      //
+      // Pipeline.applyInternal() uses !hasProducingTransform() to
+      // avoid calling this operation redundantly, but
+      // hasProducingTransform() doesn't apply to POutputValueBases
+      // that aren't PValues or composites of PValues, e.g., PDone.
+      return;
+    }
+    producingTransform = transform;
+  }
+
+  /**
+   * Default behavior for {@code finishSpecifyingOutput()} is
+   * to do nothing. Override if your {@link PValue} requires
+   * finalization.
+   */
+  public void finishSpecifyingOutput() { }
+
+  /**
+   * The {@code PTransform} that produces this {@code POutputValueBase}.
+   */
+  private PTransform<?, ?> producingTransform;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java
new file mode 100644
index 0000000000000..7e45196af813c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+
+/**
+ * A {@code PValue} is the interface to values that can be
+ * input and output from {@link PTransform}s.
+ */
+public interface PValue extends POutput, PInput {
+  public String getName();
+
+  public PValue setPipelineInternal(Pipeline pipeline);
+
+  /**
+   * Returns the {@code PTransform} that this {@code PValue} is an output of.
+   *
+   * <p> For internal use only.
+   */
+  public PTransform<?, ?> getProducingTransformInternal();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
new file mode 100644
index 0000000000000..25b1fd6fd9a13
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.StringUtils;
+
+import java.util.Collection;
+import java.util.Collections;
+
+/**
+ * A {@code PValueBase} is an abstract base class that provides
+ * sensible default implementations for methods of {@link PValue}.
+ * In particular, this includes functionality for getting/setting:
+ *
+ * <ul>
+ * <li> The {@code Pipeline} that the {@code PValue} is
+ * part of.
+ * <li> Whether the {@code PValue} has bee finalized (as an input
+ * or an output), after which its properties can
+ * no longer be changed.
+ * </ul>
+ *
+ * <p> For internal use.
+ */
+public abstract class PValueBase extends POutputValueBase implements PValue {
+  /**
+   * Returns the name of this {@code PValueBase}.
+   *
+   * <p> By default, the name of a {@code PValueBase} is based on the
+   * name of the {@code PTransform} that produces it.  It can be
+   * specified explicitly by calling {@link #setName}.
+   *
+   * @throws IllegalStateException if the name hasn't been set yet
+   */
+  public String getName() {
+    if (name == null) {
+      throw new IllegalStateException("name not set");
+    }
+    return name;
+  }
+
+  /**
+   * Sets the name of this {@code PValueBase}.  Returns {@code this}.
+   *
+   * @throws IllegalStateException if this {@code PValueBase} has
+   * already been finalized and is no longer settable, e.g., by having
+   * {@code apply()} called on it
+   */
+  public PValueBase setName(String name) {
+    if (finishedSpecifying) {
+      throw new IllegalStateException(
+          "cannot change the name of " + this + " once it's been used");
+    }
+    this.name = name;
+    return this;
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  protected PValueBase() {}
+
+  /**
+   * The name of this {@code PValueBase}, or null if not yet set.
+   */
+  private String name;
+
+  /**
+   * The {@code Pipeline} that owns this {@code PValueBase}, or null
+   * if not yet set.
+   */
+  private Pipeline pipeline;
+
+  /**
+   * Whether this {@code PValueBase} has been finalized, and its core
+   * properties, e.g., name, can no longer be changed.
+   */
+  private boolean finishedSpecifying = false;
+
+
+  /**
+   * Returns the owning {@code Pipeline} of this {@code PValueBase}.
+   *
+   * @throws IllegalStateException if the owning {@code Pipeline}
+   * hasn't been set yet
+   */
+  @Override
+  public Pipeline getPipeline() {
+    if (pipeline == null) {
+      throw new IllegalStateException("owning pipeline not set");
+    }
+    return pipeline;
+  }
+
+  /**
+   * Sets the owning {@code Pipeline} of this {@code PValueBase}.
+   * Returns {@code this}.
+   *
+   * <p> For internal use only.
+   *
+   * @throws IllegalArgumentException if the owner has already been set
+   * differently
+   */
+  @Override
+  public PValue setPipelineInternal(Pipeline pipeline) {
+    if (this.pipeline != null
+        && this.pipeline != pipeline) {
+      throw new IllegalArgumentException(
+          "owning pipeline cannot be changed once set");
+    }
+    this.pipeline = pipeline;
+    return this;
+  }
+
+  @Override
+  public void recordAsOutput(Pipeline pipeline,
+                             PTransform<?, ?> transform) {
+    recordAsOutput(pipeline, transform, "out");
+  }
+
+  /**
+   * Records that this {@code POutputValueBase} is an output with the
+   * given name of the given {@code PTransform} in the given
+   * {@code Pipeline}.
+   *
+   * <p> To be invoked only by {@link POutput#recordAsOutput}
+   * implementations.  Not to be invoked directly by user code.
+   */
+  protected void recordAsOutput(Pipeline pipeline,
+                             PTransform<?, ?> transform,
+                             String outName) {
+    super.recordAsOutput(pipeline, transform);
+    if (name == null) {
+      name = pipeline.getFullName(transform) + "." + outName;
+    }
+  }
+
+  /**
+   * Returns whether this {@code PValueBase} has been finalized, and
+   * its core properties, e.g., name, can no longer be changed.
+   *
+   * <p> For internal use only.
+   */
+  public boolean isFinishedSpecifyingInternal() {
+    return finishedSpecifying;
+  }
+
+  @Override
+  public Collection<? extends PValue> expand() {
+    return Collections.singletonList(this);
+  }
+
+  @Override
+  public void finishSpecifying() {
+    getProducingTransformInternal().finishSpecifying();
+    finishedSpecifying = true;
+  }
+
+  @Override
+  public String toString() {
+    return (name == null ? "<unnamed>" : getName())
+        + " [" + getKindString() + "]";
+  }
+
+  /**
+   * Returns a {@code String} capturing the kind of this
+   * {@code PValueBase}.
+   *
+   * <p> By default, uses the base name of this {@code PValueBase}'s
+   * class as its kind string.
+   */
+  protected String getKindString() {
+    return StringUtils.approximateSimpleName(getClass());
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
new file mode 100644
index 0000000000000..9d91a18cb3cf0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.InstantCoder;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * An immutable (value, timestamp) pair.
+ *
+ * <p> Used for assigning initial timestamps to values inserted into a pipeline
+ * with {@link com.google.cloud.dataflow.sdk.transforms.Create#timestamped}.
+ *
+ * @param <V> the type of the value
+ */
+public class TimestampedValue<V> {
+
+  /**
+   * Returns a new {@code TimestampedValue} with the given value and timestamp.
+   */
+  public static <V> TimestampedValue<V> of(V value, Instant timestamp) {
+    return new TimestampedValue<>(value, timestamp);
+  }
+
+  public V getValue() {
+    return value;
+  }
+
+  public Instant getTimestamp() {
+    return timestamp;
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Coder for {@code TimestampedValue}.
+   */
+  public static class TimestampedValueCoder<T>
+      extends StandardCoder<TimestampedValue<T>> {
+
+    private final Coder<T> valueCoder;
+
+    public static <T> TimestampedValueCoder<T> of(Coder<T> valueCoder) {
+      return new TimestampedValueCoder<>(valueCoder);
+    }
+
+    @JsonCreator
+    public static TimestampedValueCoder<?> of(
+        @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+        List<Object> components) {
+      checkArgument(components.size() == 1,
+                    "Expecting 1 component, got " + components.size());
+      return of((Coder<?>) components.get(0));
+    }
+
+    @SuppressWarnings("unchecked")
+    TimestampedValueCoder(Coder<T> valueCoder) {
+      this.valueCoder = checkNotNull(valueCoder);
+    }
+
+    @Override
+    public void encode(TimestampedValue<T> windowedElem,
+                       OutputStream outStream,
+                       Context context)
+        throws IOException {
+      valueCoder.encode(windowedElem.getValue(), outStream, context.nested());
+      InstantCoder.of().encode(
+          windowedElem.getTimestamp(), outStream, context);
+    }
+
+    @Override
+    public TimestampedValue<T> decode(InputStream inStream, Context context)
+        throws IOException {
+      T value = valueCoder.decode(inStream, context.nested());
+      Instant timestamp = InstantCoder.of().decode(inStream, context);
+      return TimestampedValue.of(value, timestamp);
+    }
+
+    @Override
+    public boolean isDeterministic() {
+      return valueCoder.isDeterministic();
+    }
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return Arrays.<Coder<?>>asList(valueCoder);
+    }
+
+    public static <T> List<Object> getInstanceComponents(TimestampedValue<T> exampleValue) {
+      return Arrays.<Object>asList(exampleValue.getValue());
+    }
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private final V value;
+  private final Instant timestamp;
+
+  protected TimestampedValue(V value, Instant timestamp) {
+    this.value = value;
+    this.timestamp = timestamp;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
new file mode 100644
index 0000000000000..58562163f4a85
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.common.reflect.TypeToken;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.io.Serializable;
+import java.util.Random;
+
+/**
+ * A {@code TupleTag} is a typed tag to use as the key of a
+ * heterogeneously typed tuple, like {@link PCollectionTuple} or
+ * Its generic type parameter allows tracking
+ * the static type of things stored in tuples.
+ *
+ * <p> To aid in assigning default {@code Coder}s for results of
+ * side outputs of {@code ParDo}, an output
+ * {@code TupleTag} should be instantiated with an extra {@code {}} so
+ * it is an instance of an anonymous subclass without generic type
+ * parameters.  Input {@code TupleTag}s require no such extra
+ * instantiation (although it doesn't hurt).  For example:
+ *
+ * <pre> {@code
+ * TupleTag<SomeType> inputTag = new TupleTag<>();
+ * TupleTag<SomeOtherType> outputTag = new TupleTag<SomeOtherType>(){};
+ * } </pre>
+ *
+ * @param <V> the type of the elements or values of the tagged thing,
+ * e.g., a {@code PCollection<V>}.
+ */
+public class TupleTag<V> implements Serializable {
+  /**
+   * Constructs a new {@code TupleTag}, with a fresh unique id.
+   *
+   * <p> This is the normal way {@code TupleTag}s are constructed.
+   */
+  public TupleTag() {
+    this.id = genId();
+    this.generated = true;
+  }
+
+  /**
+   * Constructs a new {@code TupleTag} with the given id.
+   *
+   * <p> It is up to the user to ensure that two {@code TupleTag}s
+   * with the same id actually mean the same tag and carry the same
+   * generic type parameter.  Violating this invariant can lead to
+   * hard-to-diagnose runtime type errors.  Consequently, this
+   * operation should be used very sparingly, such as when the
+   * producer and consumer of {@code TupleTag}s are written in
+   * separate modules and can only coordinate via ids rather than
+   * shared {@code TupleTag} instances.  Most of the time,
+   * {@link #TupleTag()} should be preferred.
+   */
+  public TupleTag(String id) {
+    this.id = id;
+    this.generated = false;
+  }
+
+  /**
+   * Returns the id of this {@code TupleTag}.
+   *
+   * <p> Two {@code TupleTag}s with the same id are considered equal.
+   *
+   * <p> {@code TupleTag}s are not ordered, i.e., the class does not implement
+   * Comparable interface. TupleTags implement equals and hashCode, making them
+   * suitable for use as keys in HashMap and HashSet.
+   */
+  public String getId() { return id; }
+
+  /**
+   * If this {@code TupleTag} is tagging output {@code outputIndex} of
+   * a {@code PTransform}, returns the name that should be used by
+   * default for the output.
+   */
+  public String getOutName(int outIndex) {
+    if (generated) {
+      return "out" + outIndex;
+    } else {
+      return id;
+    }
+  }
+
+  /**
+   * Returns a {@code TypeToken} capturing what is known statically
+   * about the type of this {@code TupleTag} instance's most-derived
+   * class.
+   *
+   * <p> This is useful for a {@code TupleTag} constructed as an
+   * instance of an anonymous subclass with a trailing {@code {}},
+   * e.g., {@code new TupleTag<SomeType>(){}}.
+   */
+  public TypeToken<V> getTypeToken() {
+    return new TypeToken<V>(getClass()) {};
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Internal details below here.
+
+  static final Random RANDOM = new Random(0);
+
+  final String id;
+  final boolean generated;
+
+  /** Generates and returns a fresh unique id for a TupleTag's id. */
+  static String genId() {
+    long randomLong;
+    synchronized (RANDOM) {
+      randomLong = RANDOM.nextLong();
+    }
+    return Long.toHexString(randomLong);
+  }
+
+  @JsonCreator
+  private static TupleTag<?> fromJson(
+      @JsonProperty(PropertyNames.VALUE) String id,
+      @JsonProperty(PropertyNames.IS_GENERATED) boolean generated) {
+    return new TupleTag(id, generated);
+  }
+
+  private TupleTag(String id, boolean generated) {
+    this.id = id;
+    this.generated = generated;
+  }
+
+  public CloudObject asCloudObject() {
+    CloudObject result = CloudObject.forClass(getClass());
+    addString(result, PropertyNames.VALUE, id);
+    addBoolean(result, PropertyNames.IS_GENERATED, generated);
+    return result;
+  }
+
+  @Override
+  public boolean equals(Object that) {
+    if (that instanceof TupleTag) {
+      return this.id.equals(((TupleTag<?>) that).id);
+    } else {
+      return false;
+    }
+  }
+
+  @Override
+  public int hashCode() { return id.hashCode(); }
+
+  @Override
+  public String toString() { return "Tag<" + id + ">"; }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java
new file mode 100644
index 0000000000000..27a0683bab5aa
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import com.google.common.collect.ImmutableList;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * A {@code TupleTagList} is an immutable list of heterogeneously
+ * typed {@link TupleTag}s.  A TupleTagList is used, for instance, to
+ * specify the tags of the side outputs of a
+ * {@link com.google.cloud.dataflow.sdk.transforms.ParDo}.
+ *
+ * <p> TupleTagLists can be created and accessed like follows:
+ * <pre> {@code
+ * TupleTag<String> tag1 = ...;
+ * TupleTag<Integer> tag2 = ...;
+ * TupleTag<Iterable<String>> tag3 = ...;
+ *
+ * // Create a TupleTagList with three TupleTags:
+ * TupleTagList tags = TupleTagList.of(tag1).and(tag2).and(tag3);
+ *
+ * // Create an empty TupleTagList:
+ * Pipeline p = ...;
+ * TupleTagList tags2 = TupleTagList.empty(p);
+ *
+ * // Get TupleTags out of a TupleTagList, by index (origin 0):
+ * TupleTag<?> tagX = tags.get(1);
+ * TupleTag<?> tagY = tags.get(0);
+ * TupleTag<?> tagZ = tags.get(2);
+ *
+ * // Get a list of all TupleTags in a TupleTagList:
+ * List<TupleTag<?>> allTags = tags.getAll();
+ * } </pre>
+ */
+public class TupleTagList implements Serializable {
+  /**
+   * Returns an empty TupleTagList.
+   *
+   * <p> Longer TupleTagLists can be created by calling
+   * {@link #and} on the result.
+   */
+  public static TupleTagList empty() {
+    return new TupleTagList();
+  }
+
+  /**
+   * Returns a singleton TupleTagList containing the given TupleTag.
+   *
+   * <p> Longer TupleTagLists can be created by calling
+   * {@link #and} on the result.
+   */
+  public static TupleTagList of(TupleTag<?> tag) {
+    return empty().and(tag);
+  }
+
+  /**
+   * Returns a TupleTagList containing the given TupleTags, in order.
+   *
+   * <p> Longer TupleTagLists can be created by calling
+   * {@link #and} on the result.
+   */
+  public static TupleTagList of(List<TupleTag<?>> tags) {
+    return empty().and(tags);
+  }
+
+  /**
+   * Returns a new TupleTagList that has all the TupleTags of
+   * this TupleTagList plus the given TupleTag appended to the end.
+   */
+  public TupleTagList and(TupleTag<?> tag) {
+    return new TupleTagList(
+        new ImmutableList.Builder<TupleTag<?>>()
+            .addAll(tupleTags)
+            .add(tag)
+            .build());
+  }
+
+  /**
+   * Returns a new TupleTagList that has all the TupleTags of
+   * this TupleTagList plus the given TupleTags appended to the end,
+   * in order.
+   */
+  public TupleTagList and(List<TupleTag<?>> tags) {
+    return new TupleTagList(
+        new ImmutableList.Builder<TupleTag<?>>()
+            .addAll(tupleTags)
+            .addAll(tags)
+            .build());
+  }
+
+  /**
+   * Returns the number of TupleTags in this TupleTagList.
+   */
+  public int size() {
+    return tupleTags.size();
+  }
+
+  /**
+   * Returns the TupleTag at the given index (origin zero).  Throws
+   * IndexOutOfBounds if the index is out of the range
+   * {@code [0..size()-1]}.
+   */
+  public TupleTag<?> get(int index) {
+    return tupleTags.get(index);
+  }
+
+  /**
+   * Returns an immutable List of all the TupleTags in this TupleTagList.
+   */
+  public List<TupleTag<?>> getAll() {
+    return tupleTags;
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Internal details below here.
+
+  final List<TupleTag<?>> tupleTags;
+
+  TupleTagList() {
+    this(new ArrayList<TupleTag<?>>());
+  }
+
+  TupleTagList(List<TupleTag<?>> tupleTags) {
+    this.tupleTags = Collections.unmodifiableList(tupleTags);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
new file mode 100644
index 0000000000000..95b9b45f53770
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.common.reflect.TypeToken;
+
+/**
+ * A {@code TypedPValue<T>} is the abstract base class of things that
+ * store some number of values of type {@code T}. Because we know
+ * the type {@code T}, this is the layer of the inheritance hierarchy where
+ * we store a coder for objects of type {@code T}
+ *
+ * @param <T> the type of the values stored in this {@code TypedPValue}
+ */
+public abstract class TypedPValue<T> extends PValueBase implements PValue {
+
+  /**
+   * Returns the Coder used by this TypedPValue to encode and decode
+   * the values stored in it.
+   *
+   * @throws IllegalStateException if the Coder hasn't been set, and
+   * couldn't be inferred
+   */
+  public Coder<T> getCoder() {
+    if (coder == null) {
+      throw new IllegalStateException(
+          "coder for " + this + " not set, and couldn't be inferred; "
+          + "either register a default Coder for its element type, "
+          + "or use setCoder() to specify one explicitly");
+    }
+    return coder;
+  }
+
+  /**
+   * Sets the Coder used by this TypedPValue to encode and decode the
+   * values stored in it.  Returns {@code this}.
+   *
+   * @throws IllegalStateException if this TypedPValue has already
+   * been finalized and is no longer settable, e.g., by having
+   * {@code apply()} called on it
+   */
+  public TypedPValue<T> setCoder(Coder<T> coder) {
+    if (isFinishedSpecifyingInternal()) {
+      throw new IllegalStateException(
+          "cannot change the Coder of " + this + " once it's been used");
+    }
+    if (coder == null) {
+      throw new IllegalArgumentException(
+          "Cannot setCoder(null)");
+    }
+    this.coder = coder;
+    return this;
+  }
+
+  @Override
+  public void recordAsOutput(Pipeline pipeline,
+                             PTransform<?, ?> transform,
+                             String outName) {
+    super.recordAsOutput(pipeline, transform, outName);
+    pipeline.addValueInternal(this);
+  }
+
+  @Override
+  public TypedPValue<T> setPipelineInternal(Pipeline pipeline) {
+    super.setPipelineInternal(pipeline);
+    return this;
+  }
+
+  /**
+   * After building, finalizes this PValue to make it ready for
+   * running.  Automatically invoked whenever the PValue is "used"
+   * (e.g., when apply() is called on it) and when the Pipeline is
+   * run (useful if this is a PValue with no consumers).
+   */
+  @Override
+  public void finishSpecifying() {
+    if (isFinishedSpecifyingInternal()) {
+      return;
+    }
+    super.finishSpecifying();
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Internal details below here.
+
+  /**
+   * The Coder used by this TypedPValue to encode and decode the
+   * values stored in it, or null if not specified nor inferred yet.
+   */
+  private Coder<T> coder;
+
+  protected TypedPValue() {}
+
+  private TypeToken<T> typeToken;
+
+  /**
+   * Returns a {@code TypeToken<T>} with some reflective information
+   * about {@code T}, if possible. May return {@code null} if no information
+   * is available. Subclasses may override this to enable better
+   * {@code Coder} inference.
+   */
+  public TypeToken<T> getTypeToken() {
+    return typeToken;
+  }
+
+  /**
+   * Sets the {@code TypeToken<T>} associated with this class. Better
+   * reflective type information will lead to better {@code Coder}
+   * inference.
+   */
+  public TypedPValue<T> setTypeTokenInternal(TypeToken<T> typeToken) {
+    this.typeToken = typeToken;
+    return this;
+  }
+
+
+  /**
+   * If the coder is not explicitly set, this sets the coder for
+   * this {@code TypedPValue<T>} to the best coder that can be inferred
+   * based upon the known {@code TypeToken<T>}. By default, this is null,
+   * but can and should be improved by subclasses.
+   */
+  @Override
+  public void finishSpecifyingOutput() {
+    if (coder == null) {
+      TypeToken<T> token = getTypeToken();
+      CoderRegistry registry = getProducingTransformInternal()
+          .getPipeline()
+          .getCoderRegistry();
+
+      if (token != null) {
+        coder = registry.getDefaultCoder(token);
+      }
+
+      if (coder == null) {
+        coder = getProducingTransformInternal().getDefaultOutputCoder(this);
+      }
+
+      if (coder == null) {
+        throw new IllegalStateException(
+            "unable to infer a default Coder for " + this
+            + "; either register a default Coder for its element type, "
+            + "or use setCoder() to specify one explicitly. "
+            + "If a default coder is registered, it may not be found "
+            + "due to type erasure; again, use setCoder() to specify "
+            + "a Coder explicitly");
+      }
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java
new file mode 100644
index 0000000000000..ba6e927e0996b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/** 
+ * Defines {@link com.google.cloud.dataflow.sdk.values.PCollection} and other classes for 
+ * representing data in a {@link com.google.cloud.dataflow.sdk.Pipeline}.
+ * 
+ * <p> A {@link com.google.cloud.dataflow.sdk.values.PCollection} is an immutable collection of 
+ * values of type {@code T} and is the main representation for data. 
+ * A {@link com.google.cloud.dataflow.sdk.values.PCollectionTuple} is a tuple of PCollections
+ * used in cases where PTransforms take or return multiple PCollections. 
+ * 
+ * <p> A {@link com.google.cloud.dataflow.sdk.values.PCollectionTuple} is an immutable tuple of
+ * heterogeneously-typed {@link com.google.cloud.dataflow.sdk.values.PCollection}s, "keyed" by
+ * {@link com.google.cloud.dataflow.sdk.values.TupleTag}s.  
+ * A PCollectionTuple can be used as the input or
+ * output of a
+ * {@link com.google.cloud.dataflow.sdk.transforms.PTransform} taking
+ * or producing multiple PCollection inputs or outputs that can be of
+ * different types, for instance a
+ * {@link com.google.cloud.dataflow.sdk.transforms.ParDo} with side
+ * outputs.
+ * 
+ * <p> A {@link com.google.cloud.dataflow.sdk.values.PCollectionView} is an immutable view of a
+ * PCollection that can be accessed from a DoFn and other user Fns
+ * as a side input.
+ * 
+ */
+package com.google.cloud.dataflow.sdk.values;
diff --git a/sdk/src/main/resources/com/google/cloud/dataflow/sdk/sdk.properties b/sdk/src/main/resources/com/google/cloud/dataflow/sdk/sdk.properties
new file mode 100644
index 0000000000000..5b0a720b215d8
--- /dev/null
+++ b/sdk/src/main/resources/com/google/cloud/dataflow/sdk/sdk.properties
@@ -0,0 +1,5 @@
+# SDK source version.
+version=${pom.version}
+
+build.date=${timestamp}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
new file mode 100644
index 0000000000000..13d2b2996cfd1
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk;
+
+import static org.hamcrest.CoreMatchers.containsString;
+import static org.hamcrest.core.IsInstanceOf.instanceOf;
+import static org.hamcrest.core.IsNot.not;
+import static org.junit.Assert.fail;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
+import com.google.cloud.dataflow.sdk.util.UserCodeException;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for Pipeline.
+ */
+@RunWith(JUnit4.class)
+public class PipelineTest {
+
+  static class PipelineWrapper extends Pipeline {
+    protected PipelineWrapper(PipelineRunner<?> runner) {
+      super(runner, PipelineOptionsFactory.create());
+    }
+  }
+
+  // Mock class that throws a user code exception during the call to
+  // Pipeline.run().
+  static class TestPipelineRunnerThrowingUserException
+      extends PipelineRunner<PipelineResult> {
+    @Override
+    public PipelineResult run(Pipeline pipeline) {
+      Throwable t = new IllegalStateException("user code exception");
+      throw new UserCodeException(t);
+    }
+  }
+
+  // Mock class that throws an SDK or API client code exception during
+  // the call to Pipeline.run().
+  static class TestPipelineRunnerThrowingSDKException
+      extends PipelineRunner<PipelineResult> {
+    @Override
+    public PipelineResult run(Pipeline pipeline) {
+      throw new IllegalStateException("SDK exception");
+    }
+  }
+
+  @Test
+  public void testPipelineUserExceptionHandling() {
+    Pipeline p = new PipelineWrapper(
+        new TestPipelineRunnerThrowingUserException());
+
+    // Check pipeline runner correctly catches user errors.
+    try {
+      Object results = p.run();
+      fail("Should have thrown an exception.");
+    } catch (RuntimeException exn) {
+      // Make sure users don't have to worry about the
+      // UserCodeException wrapper.
+      Assert.assertThat(exn, not(instanceOf(UserCodeException.class)));
+      // Assert that the message is correct.
+      Assert.assertThat(
+          exn.getMessage(), containsString("user code exception"));
+      // Cause should be IllegalStateException.
+      Assert.assertThat(
+          exn.getCause(), instanceOf(IllegalStateException.class));
+    }
+  }
+
+  @Test
+  public void testPipelineSDKExceptionHandling() {
+    Pipeline p = new PipelineWrapper(new TestPipelineRunnerThrowingSDKException());
+
+    // Check pipeline runner correctly catches SDK errors.
+    try {
+      Object results = p.run();
+      fail("Should have thrown an exception.");
+    } catch (RuntimeException exn) {
+      // Make sure the exception isn't a UserCodeException.
+      Assert.assertThat(exn, not(instanceOf(UserCodeException.class)));
+      // Assert that the message is correct.
+      Assert.assertThat(exn.getMessage(), containsString("SDK exception"));
+      // RuntimeException should be IllegalStateException.
+      Assert.assertThat(exn, instanceOf(IllegalStateException.class));
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
new file mode 100644
index 0000000000000..9a92ba0167c4e
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
@@ -0,0 +1,231 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk;
+
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+
+import org.hamcrest.CoreMatchers;
+import org.hamcrest.Description;
+import org.hamcrest.Matcher;
+import org.hamcrest.TypeSafeMatcher;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Utilities for tests.
+ */
+public class TestUtils {
+  // Do not instantiate.
+  private TestUtils() {}
+
+  public static final String[] NO_LINES_ARRAY = new String[] { };
+
+  public static final List<String> NO_LINES = Arrays.asList(NO_LINES_ARRAY);
+
+  public static final String[] LINES_ARRAY = new String[] {
+      "To be, or not to be: that is the question: ",
+      "Whether 'tis nobler in the mind to suffer ",
+      "The slings and arrows of outrageous fortune, ",
+      "Or to take arms against a sea of troubles, ",
+      "And by opposing end them? To die: to sleep; ",
+      "No more; and by a sleep to say we end ",
+      "The heart-ache and the thousand natural shocks ",
+      "That flesh is heir to, 'tis a consummation ",
+      "Devoutly to be wish'd. To die, to sleep; ",
+      "To sleep: perchance to dream: ay, there's the rub; ",
+      "For in that sleep of death what dreams may come ",
+      "When we have shuffled off this mortal coil, ",
+      "Must give us pause: there's the respect ",
+      "That makes calamity of so long life; ",
+      "For who would bear the whips and scorns of time, ",
+      "The oppressor's wrong, the proud man's contumely, ",
+      "The pangs of despised love, the law's delay, ",
+      "The insolence of office and the spurns ",
+      "That patient merit of the unworthy takes, ",
+      "When he himself might his quietus make ",
+      "With a bare bodkin? who would fardels bear, ",
+      "To grunt and sweat under a weary life, ",
+      "But that the dread of something after death, ",
+      "The undiscover'd country from whose bourn ",
+      "No traveller returns, puzzles the will ",
+      "And makes us rather bear those ills we have ",
+      "Than fly to others that we know not of? ",
+      "Thus conscience does make cowards of us all; ",
+      "And thus the native hue of resolution ",
+      "Is sicklied o'er with the pale cast of thought, ",
+      "And enterprises of great pith and moment ",
+      "With this regard their currents turn awry, ",
+      "And lose the name of action.--Soft you now! ",
+      "The fair Ophelia! Nymph, in thy orisons ",
+      "Be all my sins remember'd." };
+
+  public static final List<String> LINES = Arrays.asList(LINES_ARRAY);
+
+  public static final String[] LINES2_ARRAY = new String[] {
+    "hi", "there", "bob!" };
+
+  public static final List<String> LINES2 = Arrays.asList(LINES2_ARRAY);
+
+  public static final Integer[] NO_INTS_ARRAY = new Integer[] { };
+
+  public static final List<Integer> NO_INTS = Arrays.asList(NO_INTS_ARRAY);
+
+  public static final Integer[] INTS_ARRAY = new Integer[] {
+    3, 42, Integer.MAX_VALUE, 0, -1, Integer.MIN_VALUE, 666 };
+
+  public static final List<Integer> INTS = Arrays.asList(INTS_ARRAY);
+
+  /**
+   * Matcher for KVs.
+   */
+  public static class KvMatcher<K, V>
+      extends TypeSafeMatcher<KV<? extends K, ? extends V>> {
+    final Matcher<? super K> keyMatcher;
+    final Matcher<? super V> valueMatcher;
+
+    public static <K, V> KvMatcher<K, V> isKv(Matcher<K> keyMatcher,
+                                              Matcher<V> valueMatcher) {
+      return new KvMatcher<>(keyMatcher, valueMatcher);
+    }
+
+    public KvMatcher(Matcher<? super K> keyMatcher,
+                     Matcher<? super V> valueMatcher) {
+      this.keyMatcher = keyMatcher;
+      this.valueMatcher = valueMatcher;
+    }
+
+    @Override
+    public boolean matchesSafely(KV<? extends K, ? extends V> kv) {
+      return keyMatcher.matches(kv.getKey())
+          && valueMatcher.matches(kv.getValue());
+    }
+
+    @Override
+    public void describeTo(Description description) {
+      description
+          .appendText("a KV(").appendValue(keyMatcher)
+          .appendText(", ").appendValue(valueMatcher)
+          .appendText(")");
+    }
+  }
+
+  public static PCollection<String> createStrings(Pipeline p,
+                                                  Iterable<String> values) {
+    return p.apply(Create.of(values)).setCoder(StringUtf8Coder.of());
+  }
+
+  public static PCollection<Integer> createInts(Pipeline p,
+                                                Iterable<Integer> values) {
+    return p.apply(Create.of(values)).setCoder(BigEndianIntegerCoder.of());
+  }
+
+  public static PCollectionView<Integer, ?>
+      createSingletonInt(Pipeline p, Integer value) {
+    PCollection<Integer> collection = p.apply(Create.of(value));
+    return collection.apply(View.<Integer>asSingleton());
+  }
+
+  ////////////////////////////////////////////////////////////////////////////
+  // Utilities for testing CombineFns, ensuring they give correct results
+  // across various permutations and shardings of the input.
+
+  public static <VI, VA, VO> void checkCombineFn(
+      CombineFn<VI, VA, VO> fn, List<VI> input, final VO expected) {
+    checkCombineFn(fn, input, CoreMatchers.is(expected));
+  }
+
+  public static <VI, VA, VO> void checkCombineFn(
+      CombineFn<VI, VA, VO> fn, List<VI> input, Matcher<? super VO> matcher) {
+    checkCombineFnInternal(fn, input, matcher);
+    Collections.shuffle(input);
+    checkCombineFnInternal(fn, input, matcher);
+  }
+
+  private static <VI, VA, VO> void checkCombineFnInternal(
+      CombineFn<VI, VA, VO> fn, List<VI> input, Matcher<? super VO> matcher) {
+    int size = input.size();
+    checkCombineFnShards(fn, Collections.singletonList(input), matcher);
+    checkCombineFnShards(fn, shardEvenly(input, 2), matcher);
+    if (size > 4) {
+      checkCombineFnShards(fn, shardEvenly(input, size / 2), matcher);
+      checkCombineFnShards(
+          fn, shardEvenly(input, (int) (size / Math.sqrt(size))), matcher);
+    }
+    checkCombineFnShards(fn, shardExponentially(input, 1.4), matcher);
+    checkCombineFnShards(fn, shardExponentially(input, 2), matcher);
+    checkCombineFnShards(fn, shardExponentially(input, Math.E), matcher);
+  }
+
+  public static <VI, VA, VO> void checkCombineFnShards(
+      CombineFn<VI, VA, VO> fn,
+      List<? extends Iterable<VI>> shards,
+      Matcher<? super VO> matcher) {
+    checkCombineFnShardsInternal(fn, shards, matcher);
+    Collections.shuffle(shards);
+    checkCombineFnShardsInternal(fn, shards, matcher);
+  }
+
+  private static <VI, VA, VO> void checkCombineFnShardsInternal(
+      CombineFn<VI, VA, VO> fn,
+      Iterable<? extends Iterable<VI>> shards,
+      Matcher<? super VO> matcher) {
+    List<VA> accumulators = new ArrayList<>();
+    for (Iterable<VI> shard : shards) {
+      VA accumulator = fn.createAccumulator();
+      for (VI elem : shard) {
+        fn.addInput(accumulator, elem);
+      }
+      accumulators.add(accumulator);
+    }
+    VA merged = fn.mergeAccumulators(accumulators);
+    assertThat(fn.extractOutput(merged), matcher);
+  }
+
+  private static <T> List<List<T>> shardEvenly(List<T> input, int numShards) {
+    List<List<T>> shards = new ArrayList<>(numShards);
+    for (int i = 0; i < numShards; i++) {
+      shards.add(input.subList(i * input.size() / numShards,
+                               (i + 1) * input.size() / numShards));
+    }
+    return shards;
+  }
+
+  private static <T> List<List<T>> shardExponentially(
+      List<T> input, double base) {
+    assert base > 1.0;
+    List<List<T>> shards = new ArrayList<>();
+    int end = input.size();
+    while (end > 0) {
+      int start = (int) (end / base);
+      shards.add(input.subList(start, end));
+      end = start;
+    }
+    return shards;
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
new file mode 100644
index 0000000000000..725c0e852022d
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.hamcrest.Matchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+
+/**
+ * Tests for AvroCoder.
+ */
+@RunWith(JUnit4.class)
+public class AvroCoderTest {
+
+  @DefaultCoder(AvroCoder.class)
+  private static class Pojo {
+    public String text;
+    public int count;
+
+    // Empty constructor required for Avro decoding.
+    public Pojo() {
+    }
+
+    public Pojo(String text, int count) {
+      this.text = text;
+      this.count = count;
+    }
+
+    // auto-generated
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+
+      Pojo pojo = (Pojo) o;
+
+      if (count != pojo.count) {
+        return false;
+      }
+      if (text != null
+          ? !text.equals(pojo.text)
+          : pojo.text != null) {
+        return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public String toString() {
+      return "Pojo{"
+          + "text='" + text + '\''
+          + ", count=" + count
+          + '}';
+    }
+  }
+
+  static class GetTextFn extends DoFn<Pojo, String> {
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(c.element().text);
+    }
+  }
+
+  @Test
+  public void testAvroCoderEncoding() throws Exception {
+    AvroCoder<Pojo> coder = AvroCoder.of(Pojo.class);
+    CloudObject encoding = coder.asCloudObject();
+
+    Assert.assertThat(encoding.keySet(),
+        Matchers.containsInAnyOrder("@type", "type", "schema"));
+  }
+
+  @Test
+  public void testPojoEncoding() throws Exception {
+    Pojo before = new Pojo("Hello", 42);
+
+    AvroCoder<Pojo> coder = AvroCoder.of(Pojo.class);
+    byte[] bytes = CoderUtils.encodeToByteArray(coder, before);
+    Pojo after = CoderUtils.decodeFromByteArray(coder, bytes);
+
+    Assert.assertEquals(before, after);
+  }
+
+  @Test
+  public void testGenericRecordEncoding() throws Exception {
+    String schemaString =
+        "{\"namespace\": \"example.avro\",\n"
+      + " \"type\": \"record\",\n"
+      + " \"name\": \"User\",\n"
+      + " \"fields\": [\n"
+      + "     {\"name\": \"name\", \"type\": \"string\"},\n"
+      + "     {\"name\": \"favorite_number\", \"type\": [\"int\", \"null\"]},\n"
+      + "     {\"name\": \"favorite_color\", \"type\": [\"string\", \"null\"]}\n"
+      + " ]\n"
+      + "}";
+    Schema schema = (new Schema.Parser()).parse(schemaString);
+
+    GenericRecord before = new GenericData.Record(schema);
+    before.put("name", "Bob");
+    before.put("favorite_number", 256);
+    // Leave favorite_color null
+
+    AvroCoder<GenericRecord> coder = AvroCoder.of(GenericRecord.class, schema);
+    byte[] bytes = CoderUtils.encodeToByteArray(coder, before);
+    GenericRecord after = CoderUtils.decodeFromByteArray(coder, bytes);
+
+    Assert.assertEquals(before, after);
+
+    Assert.assertEquals(schema, coder.getSchema());
+  }
+
+  @Test
+  public void testEncodingNotBuffered() throws Exception {
+    // This test ensures that the coder doesn't read ahead and buffer data.
+    // Reading ahead causes a problem if the stream consists of records of different
+    // types.
+    Pojo before = new Pojo("Hello", 42);
+
+    AvroCoder<Pojo> coder = AvroCoder.of(Pojo.class);
+    SerializableCoder<Integer> intCoder = SerializableCoder.of(Integer.class);
+
+    ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+
+    Context context = Context.NESTED;
+    coder.encode(before, outStream, context);
+    intCoder.encode(10, outStream, context);
+
+    ByteArrayInputStream inStream = new ByteArrayInputStream(outStream.toByteArray());
+
+    Pojo after = coder.decode(inStream, context);
+    Assert.assertEquals(before, after);
+
+    Integer intAfter = intCoder.decode(inStream, context);
+    Assert.assertEquals(new Integer(10), intAfter);
+  }
+
+  @Test
+  public void testDefaultCoder() throws Exception {
+    Pipeline p = TestPipeline.create();
+
+    // Use MyRecord as input and output types without explicitly specifying
+    // a coder (this uses the default coders, which may not be AvroCoder).
+    PCollection<String> output =
+        p.apply(Create.of(new Pojo("hello", 1), new Pojo("world", 2)))
+            .apply(ParDo.of(new GetTextFn()));
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder("hello", "world");
+    p.run();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
new file mode 100644
index 0000000000000..b6d2b3c657d04
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.util.common.CounterTestUtils;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+/** Unit tests for {@link ByteArrayCoder}. */
+@RunWith(JUnit4.class)
+public class ByteArrayCoderTest {
+  @Test public void testOuterContext() throws CoderException, IOException {
+    byte[] buffer = {0xa, 0xb, 0xc};
+
+    ByteArrayOutputStream os = new ByteArrayOutputStream();
+    ByteArrayCoder.of().encode(buffer, os, Coder.Context.OUTER);
+    byte[] encoded = os.toByteArray();
+
+    ByteArrayInputStream is = new ByteArrayInputStream(encoded);
+    byte[] decoded = ByteArrayCoder.of().decode(is, Coder.Context.OUTER);
+    assertThat(decoded, equalTo(buffer));
+  }
+
+  @Test public void testNestedContext() throws CoderException, IOException {
+    byte[][] buffers = {{0xa, 0xb, 0xc}, {}, {}, {0xd, 0xe}, {}};
+
+    ByteArrayOutputStream os = new ByteArrayOutputStream();
+    for (byte[] buffer : buffers) {
+      ByteArrayCoder.of().encode(buffer, os, Coder.Context.NESTED);
+    }
+    byte[] encoded = os.toByteArray();
+
+    ByteArrayInputStream is = new ByteArrayInputStream(encoded);
+    for (byte[] buffer : buffers) {
+      byte[] decoded = ByteArrayCoder.of().decode(is, Coder.Context.NESTED);
+      assertThat(decoded, equalTo(buffer));
+    }
+  }
+
+  @Test public void testRegisterByteSizeObserver() throws Exception {
+    CounterTestUtils.testByteCount(ByteArrayCoder.of(), Coder.Context.OUTER,
+                                   new byte[][]{{ 0xa, 0xb, 0xc }});
+
+    CounterTestUtils.testByteCount(ByteArrayCoder.of(), Coder.Context.NESTED,
+                                   new byte[][]{{ 0xa, 0xb, 0xc }, {}, {}, { 0xd, 0xe }, {}});
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
new file mode 100644
index 0000000000000..ef096eb01c999
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assume.assumeThat;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+/**
+ * Properties for use in {@link Coder} tests. These are implemented with junit assertions
+ * rather than as predicates for the sake of error messages.
+ */
+public class CoderProperties {
+
+  /**
+   * Verifies that for the given {@link Coder<T>}, {@link Coder.Context}, and values of
+   * type {@code T}, if the values are equal then the encoded bytes are equal.
+   */
+  public static <T> void coderDeterministic(
+      Coder<T> coder, Coder.Context context, T value1, T value2)
+      throws Exception {
+    assumeThat(value1, equalTo(value2));
+    assertArrayEquals(encode(coder, context, value1), encode(coder, context, value2));
+  }
+
+  /**
+   * Verifies that for the given {@link Coder<T>}, {@link Coder.Context},
+   * and value of type {@code T}, encoding followed by decoding yields an
+   * equal of type {@code T}.
+   */
+  public static <T> void coderDecodeEncodeEqual(
+      Coder<T> coder, Coder.Context context, T value)
+      throws Exception {
+    assertEquals(
+        decode(coder, context, encode(coder, context, value)),
+        value);
+  }
+
+  //////////////////////////////////////////////////////////////////////////
+
+  private static <T> byte[] encode(
+      Coder<T> coder, Coder.Context context, T value) throws CoderException, IOException {
+    ByteArrayOutputStream os = new ByteArrayOutputStream();
+    coder.encode(value, os, context);
+    return os.toByteArray();
+  }
+
+  private static <T> T decode(
+      Coder<T> coder, Coder.Context context, byte[] bytes) throws CoderException, IOException {
+    ByteArrayInputStream is = new ByteArrayInputStream(bytes);
+    return coder.decode(is, context);
+  }
+
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
new file mode 100644
index 0000000000000..ace3094827335
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -0,0 +1,230 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.reflect.TypeToken;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Tests for CoderRegistry.
+ */
+@RunWith(JUnit4.class)
+public class CoderRegistryTest {
+
+  public static CoderRegistry getStandardRegistry() {
+    CoderRegistry registry = new CoderRegistry();
+    registry.registerStandardCoders();
+    return registry;
+  }
+
+  @Test
+  public void testRegisterInstantiatedGenericCoder() {
+    class MyValueList extends ArrayList<MyValue> { }
+
+    CoderRegistry registry = new CoderRegistry();
+    registry.registerCoder(MyValueList.class, ListCoder.of(MyValueCoder.of()));
+    assertEquals(registry.getDefaultCoder(MyValueList.class), ListCoder.of(MyValueCoder.of()));
+  }
+
+  @Test
+  public void testSimpleDefaultCoder() {
+    CoderRegistry registry = getStandardRegistry();
+    assertEquals(StringUtf8Coder.of(), registry.getDefaultCoder(String.class));
+    assertEquals(null, registry.getDefaultCoder(UnknownType.class));
+  }
+
+  @Test
+  public void testTemplateDefaultCoder() {
+    CoderRegistry registry = getStandardRegistry();
+    TypeToken<List<Integer>> listToken = new TypeToken<List<Integer>>() {};
+    assertEquals(ListCoder.of(VarIntCoder.of()),
+                 registry.getDefaultCoder(listToken));
+
+    registry.registerCoder(MyValue.class, MyValueCoder.class);
+    TypeToken<KV<String, List<MyValue>>> kvToken =
+        new TypeToken<KV<String, List<MyValue>>>() {};
+    assertEquals(KvCoder.of(StringUtf8Coder.of(),
+                            ListCoder.of(MyValueCoder.of())),
+                 registry.getDefaultCoder(kvToken));
+
+    TypeToken<List<UnknownType>> listUnknownToken =
+        new TypeToken<List<UnknownType>>() {};
+    assertEquals(null, registry.getDefaultCoder(listUnknownToken));
+  }
+
+  @Test
+  public void testTemplateInference() {
+    CoderRegistry registry = getStandardRegistry();
+    MyTemplateClass<MyValue, List<MyValue>> instance =
+        new MyTemplateClass<MyValue, List<MyValue>>() {};
+    Coder<List<MyValue>> expected = ListCoder.of(MyValueCoder.of());
+
+    // The map method operates on parameter names.
+    Map<String, Coder<?>> coderMap = registry.getDefaultCoders(
+        instance.getClass(),
+        MyTemplateClass.class,
+        Collections.singletonMap("A", MyValueCoder.of()));
+    assertEquals(expected, coderMap.get("B"));
+
+    // The array interface operates on position.
+    Coder<?>[] coders = registry.getDefaultCoders(
+        instance.getClass(),
+        MyTemplateClass.class,
+        new Coder<?>[] { MyValueCoder.of(), null });
+    assertEquals(expected, coders[1]);
+
+    // The "last argument" coder handles a common case.
+    Coder<List<MyValueCoder>> actual = registry.getDefaultCoder(
+        instance.getClass(),
+        MyTemplateClass.class,
+        MyValueCoder.of());
+    assertEquals(expected, actual);
+
+    try {
+      registry.getDefaultCoder(
+          instance.getClass(),
+          MyTemplateClass.class,
+          BigEndianIntegerCoder.of());
+      fail("should have failed");
+    } catch (IllegalArgumentException exn) {
+      assertEquals("Cannot encode elements of type class "
+          + "com.google.cloud.dataflow.sdk.coders.CoderRegistryTest$MyValue "
+          + "with BigEndianIntegerCoder", exn.getMessage());
+    }
+  }
+
+  @Test
+  public void testGetDefaultCoderFromIntegerValue() {
+    CoderRegistry registry = getStandardRegistry();
+    Integer i = 13;
+    Coder<Integer> coder = registry.getDefaultCoder(i);
+    assertEquals(VarIntCoder.of(), coder);
+  }
+
+  @Test
+  public void testGetDefaultCoderFromKvValue() {
+    CoderRegistry registry = getStandardRegistry();
+    KV<Integer, String> kv = KV.of(13, "hello");
+    Coder<KV<Integer, String>> coder = registry.getDefaultCoder(kv);
+    assertEquals(KvCoder.of(VarIntCoder.of(), StringUtf8Coder.of()),
+        coder);
+  }
+
+  @Test
+  public void testGetDefaultCoderFromNestedKvValue() {
+    CoderRegistry registry = getStandardRegistry();
+    KV<Integer, KV<Long, KV<String, String>>> kv = KV.of(13, KV.of(17L, KV.of("hello", "goodbye")));
+    Coder<KV<Integer, KV<Long, KV<String, String>>>> coder = registry.getDefaultCoder(kv);
+    assertEquals(
+        KvCoder.of(VarIntCoder.of(),
+            KvCoder.of(VarLongCoder.of(),
+                KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))),
+        coder);
+  }
+
+  @Test
+  public void testTypeCompatibility() {
+    assertTrue(CoderRegistry.isCompatible(
+        BigEndianIntegerCoder.of(), Integer.class));
+    assertFalse(CoderRegistry.isCompatible(
+        BigEndianIntegerCoder.of(), String.class));
+
+    assertFalse(CoderRegistry.isCompatible(
+        ListCoder.of(BigEndianIntegerCoder.of()), Integer.class));
+    assertTrue(CoderRegistry.isCompatible(
+        ListCoder.of(BigEndianIntegerCoder.of()),
+        new TypeToken<List<Integer>>() {}.getType()));
+    assertFalse(CoderRegistry.isCompatible(
+        ListCoder.of(BigEndianIntegerCoder.of()),
+        new TypeToken<List<String>>() {}.getType()));
+  }
+
+  static class MyTemplateClass<A, B> { }
+
+  static class MyValue { }
+
+  static class MyValueCoder implements Coder<MyValue> {
+
+    private static final MyValueCoder INSTANCE = new MyValueCoder();
+
+    public static MyValueCoder of() {
+      return INSTANCE;
+    }
+
+    public static List<Object> getInstanceComponents(MyValue exampleValue) {
+      return Arrays.asList();
+    }
+
+    @Override
+    public void encode(MyValue value, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+    }
+
+    @Override
+    public MyValue decode(InputStream inStream, Context context)
+        throws CoderException, IOException {
+      return new MyValue();
+    }
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return null;
+    }
+
+    @Override
+    public CloudObject asCloudObject() {
+      return null;
+    }
+
+    @Override
+    public boolean isDeterministic() { return true; }
+
+    @Override
+    public boolean isRegisterByteSizeObserverCheap(MyValue value, Context context) {
+      return true;
+    }
+
+    @Override
+    public void registerByteSizeObserver(
+        MyValue value, ElementByteSizeObserver observer, Context context)
+        throws Exception {
+      observer.update(0L);
+    }
+  }
+
+  static class UnknownType { }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java
new file mode 100644
index 0000000000000..e532d44dc66b5
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+/** Unit tests for {@link CustomCoder}. */
+@RunWith(JUnit4.class)
+public class CustomCoderTest {
+
+  private static class MyCustomCoder extends CustomCoder<KV<String, Long>> {
+    private final String key;
+
+    public MyCustomCoder(String key) {
+      this.key = key;
+    }
+
+    @Override
+    public void encode(KV<String, Long> kv, OutputStream out, Context context)
+            throws IOException {
+      new DataOutputStream(out).writeLong(kv.getValue());
+    }
+
+    @Override
+    public KV<String, Long> decode(InputStream inStream, Context context)
+        throws IOException {
+      return KV.of(key, new DataInputStream(inStream).readLong());
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      return other instanceof MyCustomCoder
+          && key.equals(((MyCustomCoder) other).key);
+    }
+
+    @Override
+    public int hashCode() {
+      return key.hashCode();
+    }
+  }
+
+  @Test public void testEncodeDecode() throws Exception {
+    MyCustomCoder coder = new MyCustomCoder("key");
+    byte[] encoded = CoderUtils.encodeToByteArray(coder, KV.of("key", 3L));
+    Assert.assertEquals(
+        KV.of("key", 3L), CoderUtils.decodeFromByteArray(coder, encoded));
+
+    byte[] encoded2 = CoderUtils.encodeToByteArray(coder, KV.of("ignored", 3L));
+    Assert.assertEquals(
+        KV.of("key", 3L), CoderUtils.decodeFromByteArray(coder, encoded2));
+  }
+
+  @Test public void testEncodable() throws Exception {
+    SerializableUtils.ensureSerializable(new MyCustomCoder("key"));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
new file mode 100644
index 0000000000000..769d1e6fb144f
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.api.client.util.Preconditions;
+import com.google.common.reflect.TypeToken;
+
+import org.hamcrest.Matchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+
+/**
+ * Tests of Coder defaults.
+ */
+@RunWith(JUnit4.class)
+public class DefaultCoderTest {
+
+  @DefaultCoder(AvroCoder.class)
+  private static class AvroRecord {
+  }
+
+  private static class SerializableBase implements Serializable {
+  }
+
+  @DefaultCoder(SerializableCoder.class)
+  private static class SerializableRecord extends SerializableBase {
+  }
+
+  @DefaultCoder(CustomCoder.class)
+  private static class CustomRecord extends SerializableBase {
+  }
+
+  private static class Unknown {
+  }
+
+  private static class CustomCoder extends SerializableCoder<CustomRecord> {
+    // Extending SerializableCoder isn't trivial, but it can be done.
+    @SuppressWarnings("unchecked")
+    public static <T extends Serializable> SerializableCoder<T> of(Class<T> recordType) {
+       Preconditions.checkArgument(
+           CustomRecord.class.isAssignableFrom(recordType));
+       return (SerializableCoder<T>) new CustomCoder();
+    }
+
+    protected CustomCoder() {
+      super(CustomRecord.class);
+    }
+  }
+
+  @Test
+  public void testDefaultCoders() throws Exception {
+    checkDefault(AvroRecord.class, AvroCoder.class);
+    checkDefault(SerializableBase.class, SerializableCoder.class);
+    checkDefault(SerializableRecord.class, SerializableCoder.class);
+    checkDefault(CustomRecord.class, CustomCoder.class);
+  }
+
+  @Test
+  public void testUnknown() throws Exception {
+    CoderRegistry registery = new CoderRegistry();
+    Coder<?> coderType = registery.getDefaultCoder(Unknown.class);
+    Assert.assertNull(coderType);
+  }
+
+  /**
+   * Checks that the default Coder for {@code valueType} is an instance of
+   * {@code expectedCoder}.
+   */
+  private void checkDefault(Class<?> valueType,
+      Class<?> expectedCoder) {
+    CoderRegistry registry = new CoderRegistry();
+    Coder<?> coder = registry.getDefaultCoder(TypeToken.of(valueType));
+    Assert.assertThat(coder, Matchers.instanceOf(expectedCoder));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java
new file mode 100644
index 0000000000000..dd719004eab1c
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.common.primitives.UnsignedBytes;
+
+import org.joda.time.Instant;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+/** Unit tests for {@link InstantCoder}. */
+@RunWith(JUnit4.class)
+public class InstantCoderTest {
+  private final InstantCoder coder = InstantCoder.of();
+  private final List<Long> timestamps =
+      Arrays.asList(0L, 1L, -1L, -255L, 256L, Long.MIN_VALUE, Long.MAX_VALUE);
+
+  @Test
+  public void testBasicEncoding() throws Exception {
+    for (long timestamp : timestamps) {
+      Assert.assertEquals(new Instant(timestamp),
+          CoderUtils.decodeFromByteArray(coder,
+              CoderUtils.encodeToByteArray(coder, new Instant(timestamp))));
+    }
+  }
+
+  @Test
+  public void testOrderedEncoding() throws Exception {
+    List<Long> sortedTimestamps = new ArrayList<>(timestamps);
+    Collections.sort(sortedTimestamps);
+
+    List<byte[]> encodings = new ArrayList<>(sortedTimestamps.size());
+    for (long timestamp : sortedTimestamps) {
+      encodings.add(CoderUtils.encodeToByteArray(coder, new Instant(timestamp)));
+    }
+
+    // Verify that the encodings were already sorted, since they were generated
+    // in the correct order.
+    List<byte[]> sortedEncodings = new ArrayList<>(encodings);
+    Collections.sort(sortedEncodings, UnsignedBytes.lexicographicalComparator());
+
+    Assert.assertEquals(encodings, sortedEncodings);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java
new file mode 100644
index 0000000000000..993c5d0a5e917
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/** Unit tests for {@link IterableCoder}. */
+@RunWith(JUnit4.class)
+public class IterableCoderTest {
+  @Test
+  public void testGetInstanceComponentsNonempty() {
+    Iterable<Integer> iterable = Arrays.asList(2, 58, 99, 5);
+    List<Object> components = IterableCoder.getInstanceComponents(iterable);
+    assertEquals(1, components.size());
+    assertEquals(2, components.get(0));
+  }
+
+  @Test
+  public void testGetInstanceComponentsEmpty() {
+    Iterable<Integer> iterable = Arrays.asList();
+    List<Object> components = IterableCoder.getInstanceComponents(iterable);
+    assertNull(components);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java
new file mode 100644
index 0000000000000..c04d3e16745b8
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/** Unit tests for {@link ListCoder}. */
+@RunWith(JUnit4.class)
+public class ListCoderTest {
+  @Test
+  public void testGetInstanceComponentsNonempty() {
+    List<Integer> list = Arrays.asList(21, 5, 3, 5);
+    List<Object> components = ListCoder.getInstanceComponents(list);
+    assertEquals(1, components.size());
+    assertEquals(21, components.get(0));
+  }
+
+  @Test
+  public void testGetInstanceComponentsEmpty() {
+    List<Integer> list = Arrays.asList();
+    List<Object> components = ListCoder.getInstanceComponents(list);
+    assertNull(components);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java
new file mode 100644
index 0000000000000..30cd0d8e8100b
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/** Unit tests for {@link MapCoder}. */
+@RunWith(JUnit4.class)
+public class MapCoderTest {
+  @Test
+  public void testGetInstanceComponentsNonempty() {
+    Map<Integer, String> map = new HashMap<>();
+    map.put(17, "foozle");
+    List<Object> components = MapCoder.getInstanceComponents(map);
+    assertEquals(2, components.size());
+    assertEquals(17, components.get(0));
+    assertEquals("foozle", components.get(1));
+  }
+
+  @Test
+  public void testGetInstanceComponentsEmpty() {
+    Map<Integer, String> map = new HashMap<>();
+    List<Object> components = MapCoder.getInstanceComponents(map);
+    assertNull(components);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java
new file mode 100644
index 0000000000000..3e56832a3faad
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.Serializer;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.hamcrest.Matchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Tests SerializableCoder.
+ */
+@RunWith(JUnit4.class)
+public class SerializableCoderTest implements Serializable {
+
+  @DefaultCoder(SerializableCoder.class)
+  static class MyRecord implements Serializable {
+    public String value;
+
+    public MyRecord(String value) {
+      this.value = value;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+
+      MyRecord myRecord = (MyRecord) o;
+      return value.equals(myRecord.value);
+    }
+
+    @Override
+    public int hashCode() {
+      return value.hashCode();
+    }
+  }
+
+  static class StringToRecord extends DoFn<String, MyRecord> {
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(new MyRecord(c.element()));
+    }
+  }
+
+  static class RecordToString extends DoFn<MyRecord, String> {
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(c.element().value);
+    }
+  }
+
+  static final List<String> LINES = Arrays.asList(
+      "To be,",
+      "or not to be");
+
+  @Test
+  public void testSerializableCoder() throws Exception {
+    IterableCoder<MyRecord> coder = IterableCoder
+        .of(SerializableCoder.of(MyRecord.class));
+
+    List<MyRecord> records = new LinkedList<>();
+    for (String l : LINES) {
+      records.add(new MyRecord(l));
+    }
+
+    byte[] encoded = CoderUtils.encodeToByteArray(coder, records);
+    Iterable<MyRecord> decoded = CoderUtils.decodeFromByteArray(coder, encoded);
+
+    assertEquals(records, decoded);
+  }
+
+  @Test
+  public void testSerializableCoderConstruction() throws Exception {
+    SerializableCoder<MyRecord> coder = SerializableCoder.of(MyRecord.class);
+    assertEquals(coder.getRecordType(), MyRecord.class);
+
+    CloudObject encoding = coder.asCloudObject();
+    Assert.assertThat(encoding.getClassName(),
+        Matchers.containsString(SerializableCoder.class.getSimpleName()));
+
+    Coder<?> decoded = Serializer.deserialize(encoding, Coder.class);
+    Assert.assertThat(decoded, Matchers.instanceOf(SerializableCoder.class));
+  }
+
+  @Test
+  public void testDefaultCoder() throws Exception {
+    Pipeline p = TestPipeline.create();
+
+    // Use MyRecord as input and output types without explicitly specifying
+    // a coder (this uses the default coders, which may not be
+    // SerializableCoder).
+    PCollection<String> output =
+        p.apply(Create.of("Hello", "World"))
+        .apply(ParDo.of(new StringToRecord()))
+        .apply(ParDo.of(new RecordToString()));
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder("Hello", "World");
+  }
+
+  @Test
+  public void testLongStringEncoding() throws Exception {
+    StringUtf8Coder coder = StringUtf8Coder.of();
+
+    // Java's DataOutputStream.writeUTF fails at 64k, so test well beyond that.
+    char[] chars = new char[100 * 1024];
+    Arrays.fill(chars, 'o');
+    String source = new String(chars);
+
+    // Verify OUTER encoding.
+    assertEquals(source, CoderUtils.decodeFromByteArray(coder,
+        CoderUtils.encodeToByteArray(coder, source)));
+
+    // Second string uses a UTF8 character.  Each codepoint is translated into
+    // 4 characters in UTF8.
+    int[] codePoints = new int[20 * 1024];
+    Arrays.fill(codePoints, 0x1D50A);  // "MATHEMATICAL_FRAKTUR_CAPITAL_G"
+    String source2 = new String(codePoints, 0, codePoints.length);
+
+    // Verify OUTER encoding.
+    assertEquals(source2, CoderUtils.decodeFromByteArray(coder,
+        CoderUtils.encodeToByteArray(coder, source2)));
+
+
+    // Encode both strings into NESTED form.
+    byte[] nestedEncoding;
+    try (ByteArrayOutputStream os = new ByteArrayOutputStream()) {
+      coder.encode(source, os, Coder.Context.NESTED);
+      coder.encode(source2, os, Coder.Context.NESTED);
+      nestedEncoding = os.toByteArray();
+    }
+
+    // Decode from NESTED form.
+    try (ByteArrayInputStream is = new ByteArrayInputStream(nestedEncoding)) {
+      String result = coder.decode(is, Coder.Context.NESTED);
+      String result2 = coder.decode(is, Coder.Context.NESTED);
+      assertEquals(0, is.available());
+      assertEquals(source, result);
+      assertEquals(source2, result2);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/URICoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/URICoderTest.java
new file mode 100644
index 0000000000000..f464e813bf5d4
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/URICoderTest.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.net.URI;
+import java.util.Arrays;
+import java.util.List;
+
+/** Unit tests for {@link URICoder}. */
+@RunWith(JUnit4.class)
+public class URICoderTest {
+
+  // Test data
+
+  private static final List<String> TEST_URI_STRINGS = Arrays.asList(
+      "http://www.example.com",
+      "gs://myproject/mybucket/a/gcs/path",
+      "/just/a/path",
+      "file:/path/with/no/authority",
+      "file:///path/with/empty/authority");
+
+  private static final List<Coder.Context> TEST_CONTEXTS = Arrays.asList(
+      Coder.Context.OUTER,
+      Coder.Context.NESTED);
+
+  // Tests
+
+  @Test
+  public void testDeterministic() throws Exception {
+    Coder<URI> coder = URICoder.of();
+
+    for (String uriString : TEST_URI_STRINGS) {
+      for (Coder.Context context : TEST_CONTEXTS) {
+        // Obviously equal, but distinct as objects
+        CoderProperties.coderDeterministic(coder, context, new URI(uriString), new URI(uriString));
+      }
+    }
+  }
+
+  @Test
+  public void testDecodeEncodeEqual() throws Exception {
+    Coder<URI> coder = URICoder.of();
+
+    for (String uriString : TEST_URI_STRINGS) {
+      for (Coder.Context context : TEST_CONTEXTS) {
+        CoderProperties.coderDecodeEncodeEqual(coder, context, new URI(uriString));
+      }
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
new file mode 100644
index 0000000000000..ad6f16567e92c
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
@@ -0,0 +1,365 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.EvaluationResults;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PDone;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumReader;
+import org.apache.avro.specific.SpecificDatumWriter;
+
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests for AvroIO Read and Write transforms.
+ */
+@RunWith(JUnit4.class)
+public class AvroIOTest {
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+  private File avroFile;
+
+  @Before
+  public void prepareAvroFileBeforeAnyTest() throws IOException {
+    avroFile = tmpFolder.newFile("file.avro");
+  }
+
+  private final String schemaString =
+      "{\"namespace\": \"example.avro\",\n"
+    + " \"type\": \"record\",\n"
+    + " \"name\": \"User\",\n"
+    + " \"fields\": [\n"
+    + "     {\"name\": \"name\", \"type\": \"string\"},\n"
+    + "     {\"name\": \"favorite_number\", \"type\": [\"int\", \"null\"]},\n"
+    + "     {\"name\": \"favorite_color\", \"type\": [\"string\", \"null\"]}\n"
+    + " ]\n"
+    + "}";
+  private final Schema.Parser parser = new Schema.Parser();
+  private final Schema schema = parser.parse(schemaString);
+
+  private User[] generateAvroObjects() {
+    User user1 = new User();
+    user1.setName("Bob");
+    user1.setFavoriteNumber(256);
+
+    User user2 = new User();
+    user2.setName("Alice");
+    user2.setFavoriteNumber(128);
+
+    User user3 = new User();
+    user3.setName("Ted");
+    user3.setFavoriteColor("white");
+
+    return new User[] { user1, user2, user3 };
+  }
+
+  private GenericRecord[] generateAvroGenericRecords() {
+    GenericRecord user1 = new GenericData.Record(schema);
+    user1.put("name", "Bob");
+    user1.put("favorite_number", 256);
+
+    GenericRecord user2 = new GenericData.Record(schema);
+    user2.put("name", "Alice");
+    user2.put("favorite_number", 128);
+
+    GenericRecord user3 = new GenericData.Record(schema);
+    user3.put("name", "Ted");
+    user3.put("favorite_color", "white");
+
+    return new GenericRecord[] { user1, user2, user3 };
+  }
+
+  private void generateAvroFile(User[] elements) throws IOException {
+    DatumWriter<User> userDatumWriter = new SpecificDatumWriter<>(User.class);
+    DataFileWriter<User> dataFileWriter = new DataFileWriter<>(userDatumWriter);
+    dataFileWriter.create(elements[0].getSchema(), avroFile);
+    for (User user : elements) {
+      dataFileWriter.append(user);
+    }
+    dataFileWriter.close();
+  }
+
+  private List<User> readAvroFile() throws IOException {
+    DatumReader<User> userDatumReader = new SpecificDatumReader<>(User.class);
+    DataFileReader<User> dataFileReader = new DataFileReader<>(avroFile, userDatumReader);
+    List<User> users = new ArrayList<>();
+    while (dataFileReader.hasNext()) {
+      users.add(dataFileReader.next());
+    }
+    return users;
+  }
+
+  <T> void runTestRead(AvroIO.Read.Bound<T> read, String expectedName, T[] expectedOutput)
+      throws Exception {
+    generateAvroFile(generateAvroObjects());
+
+    DirectPipeline p = DirectPipeline.createForTest();
+    PCollection<T> output = p.apply(read);
+    EvaluationResults results = p.run();
+    assertEquals(expectedName, output.getName());
+    assertThat(results.getPCollection(output),
+               containsInAnyOrder(expectedOutput));
+  }
+
+  @Test
+  public void testReadFromGeneratedClass() throws Exception {
+    runTestRead(AvroIO.Read.from(avroFile.getPath())
+                           .withSchema(User.class),
+                "AvroIO.Read.out", generateAvroObjects());
+    runTestRead(AvroIO.Read.withSchema(User.class)
+                           .from(avroFile.getPath()),
+                "AvroIO.Read.out", generateAvroObjects());
+    runTestRead(AvroIO.Read.named("MyRead")
+                           .from(avroFile.getPath())
+                           .withSchema(User.class),
+                "MyRead.out", generateAvroObjects());
+    runTestRead(AvroIO.Read.named("MyRead")
+                           .withSchema(User.class)
+                           .from(avroFile.getPath()),
+                "MyRead.out", generateAvroObjects());
+    runTestRead(AvroIO.Read.from(avroFile.getPath())
+                           .withSchema(User.class)
+                           .named("HerRead"),
+                "HerRead.out", generateAvroObjects());
+    runTestRead(AvroIO.Read.from(avroFile.getPath())
+                           .named("HerRead")
+                           .withSchema(User.class),
+                "HerRead.out", generateAvroObjects());
+    runTestRead(AvroIO.Read.withSchema(User.class)
+                           .named("HerRead")
+                           .from(avroFile.getPath()),
+                "HerRead.out", generateAvroObjects());
+    runTestRead(AvroIO.Read.withSchema(User.class)
+                           .from(avroFile.getPath())
+                           .named("HerRead"),
+                "HerRead.out", generateAvroObjects());
+  }
+
+  @Test
+  public void testReadFromSchema() throws Exception {
+    runTestRead(AvroIO.Read.from(avroFile.getPath())
+                           .withSchema(schema),
+                "AvroIO.Read.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.withSchema(schema)
+                           .from(avroFile.getPath()),
+                "AvroIO.Read.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.named("MyRead")
+                           .from(avroFile.getPath())
+                           .withSchema(schema),
+                "MyRead.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.named("MyRead")
+                           .withSchema(schema)
+                           .from(avroFile.getPath()),
+                "MyRead.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.from(avroFile.getPath())
+                           .withSchema(schema)
+                           .named("HerRead"),
+                "HerRead.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.from(avroFile.getPath())
+                           .named("HerRead")
+                           .withSchema(schema),
+                "HerRead.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.withSchema(schema)
+                           .named("HerRead")
+                           .from(avroFile.getPath()),
+                "HerRead.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.withSchema(schema)
+                           .from(avroFile.getPath())
+                           .named("HerRead"),
+                "HerRead.out", generateAvroGenericRecords());
+  }
+
+  @Test
+  public void testReadFromSchemaString() throws Exception {
+    runTestRead(AvroIO.Read.from(avroFile.getPath())
+                           .withSchema(schemaString),
+                "AvroIO.Read.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.withSchema(schemaString)
+                           .from(avroFile.getPath()),
+                "AvroIO.Read.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.named("MyRead")
+                           .from(avroFile.getPath())
+                           .withSchema(schemaString),
+                "MyRead.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.named("MyRead")
+                           .withSchema(schemaString)
+                           .from(avroFile.getPath()),
+                "MyRead.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.from(avroFile.getPath())
+                           .withSchema(schemaString)
+                           .named("HerRead"),
+                "HerRead.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.from(avroFile.getPath())
+                           .named("HerRead")
+                           .withSchema(schemaString),
+                "HerRead.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.withSchema(schemaString)
+                           .named("HerRead")
+                           .from(avroFile.getPath()),
+                "HerRead.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.withSchema(schemaString)
+                           .from(avroFile.getPath())
+                           .named("HerRead"),
+                "HerRead.out", generateAvroGenericRecords());
+  }
+
+  <T> void runTestWrite(AvroIO.Write.Bound<T> write, String expectedName)
+      throws Exception {
+    User[] users = generateAvroObjects();
+
+    DirectPipeline p = DirectPipeline.createForTest();
+    PCollection<T> input = p.apply(Create.of(Arrays.asList((T[]) users)))
+                            .setCoder((Coder<T>) AvroCoder.of(User.class));
+    PDone output = input.apply(write.withoutSharding());
+    EvaluationResults results = p.run();
+    assertEquals(expectedName, write.getName());
+
+    assertThat(readAvroFile(), containsInAnyOrder(users));
+  }
+
+  @Test
+  public void testWriteFromGeneratedClass() throws Exception {
+    runTestWrite(AvroIO.Write.to(avroFile.getPath())
+                             .withSchema(User.class),
+                 "AvroIO.Write");
+    runTestWrite(AvroIO.Write.withSchema(User.class)
+                             .to(avroFile.getPath()),
+                 "AvroIO.Write");
+    runTestWrite(AvroIO.Write.named("MyWrite")
+                             .to(avroFile.getPath())
+                             .withSchema(User.class),
+                 "MyWrite");
+    runTestWrite(AvroIO.Write.named("MyWrite")
+                             .withSchema(User.class)
+                             .to(avroFile.getPath()),
+                 "MyWrite");
+    runTestWrite(AvroIO.Write.to(avroFile.getPath())
+                             .withSchema(User.class)
+                             .named("HerWrite"),
+                 "HerWrite");
+    runTestWrite(AvroIO.Write.to(avroFile.getPath())
+                             .named("HerWrite")
+                             .withSchema(User.class),
+                 "HerWrite");
+    runTestWrite(AvroIO.Write.withSchema(User.class)
+                             .named("HerWrite")
+                             .to(avroFile.getPath()),
+                 "HerWrite");
+    runTestWrite(AvroIO.Write.withSchema(User.class)
+                             .to(avroFile.getPath())
+                             .named("HerWrite"),
+                 "HerWrite");
+  }
+
+  @Test
+  public void testWriteFromSchema() throws Exception {
+    runTestWrite(AvroIO.Write.to(avroFile.getPath())
+                             .withSchema(schema),
+                 "AvroIO.Write");
+    runTestWrite(AvroIO.Write.withSchema(schema)
+                             .to(avroFile.getPath()),
+                 "AvroIO.Write");
+    runTestWrite(AvroIO.Write.named("MyWrite")
+                             .to(avroFile.getPath())
+                             .withSchema(schema),
+                 "MyWrite");
+    runTestWrite(AvroIO.Write.named("MyWrite")
+                             .withSchema(schema)
+                             .to(avroFile.getPath()),
+                 "MyWrite");
+    runTestWrite(AvroIO.Write.to(avroFile.getPath())
+                             .withSchema(schema)
+                             .named("HerWrite"),
+                 "HerWrite");
+    runTestWrite(AvroIO.Write.to(avroFile.getPath())
+                             .named("HerWrite")
+                             .withSchema(schema),
+                 "HerWrite");
+    runTestWrite(AvroIO.Write.withSchema(schema)
+                             .named("HerWrite")
+                             .to(avroFile.getPath()),
+                 "HerWrite");
+    runTestWrite(AvroIO.Write.withSchema(schema)
+                             .to(avroFile.getPath())
+                             .named("HerWrite"),
+                 "HerWrite");
+  }
+
+  @Test
+  public void testWriteFromSchemaString() throws Exception {
+    runTestWrite(AvroIO.Write.to(avroFile.getPath())
+                             .withSchema(schemaString),
+                 "AvroIO.Write");
+    runTestWrite(AvroIO.Write.withSchema(schemaString)
+                             .to(avroFile.getPath()),
+                 "AvroIO.Write");
+    runTestWrite(AvroIO.Write.named("MyWrite")
+                             .to(avroFile.getPath())
+                             .withSchema(schemaString),
+                 "MyWrite");
+    runTestWrite(AvroIO.Write.named("MyWrite")
+                             .withSchema(schemaString)
+                             .to(avroFile.getPath()),
+                 "MyWrite");
+    runTestWrite(AvroIO.Write.to(avroFile.getPath())
+                             .withSchema(schemaString)
+                             .named("HerWrite"),
+                 "HerWrite");
+    runTestWrite(AvroIO.Write.to(avroFile.getPath())
+                             .named("HerWrite")
+                             .withSchema(schemaString),
+                 "HerWrite");
+    runTestWrite(AvroIO.Write.withSchema(schemaString)
+                             .named("HerWrite")
+                             .to(avroFile.getPath()),
+                 "HerWrite");
+    runTestWrite(AvroIO.Write.withSchema(schemaString)
+                             .to(avroFile.getPath())
+                             .named("HerWrite"),
+                 "HerWrite");
+  }
+
+  // TODO: for Write only, test withSuffix, withNumShards,
+  // withShardNameTemplate and withoutSharding.
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
new file mode 100644
index 0000000000000..863e260282a34
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
@@ -0,0 +1,307 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.api.client.util.Data;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.TableRowJsonCoder;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.WriteDisposition;
+import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+
+/**
+ * Tests for BigQueryIO.
+ */
+@RunWith(JUnit4.class)
+public class BigQueryIOTest {
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  private void checkReadObject(
+      BigQueryIO.Read.Bound bound, String project, String dataset, String table) {
+    checkReadObjectWithValidate(bound, project, dataset, table, true);
+  }
+
+  private void checkReadObjectWithValidate(
+      BigQueryIO.Read.Bound bound, String project, String dataset, String table, boolean validate) {
+    assertEquals(project, bound.table.getProjectId());
+    assertEquals(dataset, bound.table.getDatasetId());
+    assertEquals(table, bound.table.getTableId());
+    assertEquals(validate, bound.validate);
+  }
+
+  private void checkWriteObject(
+      BigQueryIO.Write.Bound bound, String project, String dataset, String table,
+      TableSchema schema, CreateDisposition createDisposition,
+      WriteDisposition writeDisposition) {
+    checkWriteObjectWithValidate(
+        bound, project, dataset, table, schema, createDisposition, writeDisposition, true);
+  }
+
+  private void checkWriteObjectWithValidate(
+      BigQueryIO.Write.Bound bound, String project, String dataset, String table,
+      TableSchema schema, CreateDisposition createDisposition,
+      WriteDisposition writeDisposition, boolean validate) {
+    assertEquals(project, bound.table.getProjectId());
+    assertEquals(dataset, bound.table.getDatasetId());
+    assertEquals(table, bound.table.getTableId());
+    assertEquals(schema, bound.schema);
+    assertEquals(createDisposition, bound.createDisposition);
+    assertEquals(writeDisposition, bound.writeDisposition);
+    assertEquals(validate, bound.validate);
+  }
+
+  @Before
+  public void setUp() {
+    BigQueryOptions options = PipelineOptionsFactory.as(BigQueryOptions.class);
+    options.setProject("defaultProject");
+  }
+
+  @Test
+  public void testBuildSource() throws IOException {
+    BigQueryIO.Read.Bound bound = BigQueryIO.Read.named("ReadMyTable")
+        .from("foo.com:project:somedataset.sometable");
+    checkReadObject(bound, "foo.com:project", "somedataset", "sometable");
+  }
+
+  @Test
+  public void testBuildSourcewithoutValidation() throws IOException {
+    // This test just checks that using withoutValidation will not trigger object
+    // construction errors.
+    BigQueryIO.Read.Bound bound = BigQueryIO.Read.named("ReadMyTable")
+        .from("foo.com:project:somedataset.sometable").withoutValidation();
+    checkReadObjectWithValidate(bound, "foo.com:project", "somedataset", "sometable", false);
+  }
+
+  @Test
+  public void testBuildSourceWithDefaultProject() throws IOException {
+    BigQueryIO.Read.Bound bound = BigQueryIO.Read.named("ReadMyTable")
+        .from("somedataset.sometable");
+    checkReadObject(bound, null, "somedataset", "sometable");
+  }
+
+  @Test
+  public void testBuildSourceWithTableReference() throws IOException {
+    TableReference table = new TableReference()
+        .setProjectId("foo.com:project")
+        .setDatasetId("somedataset")
+        .setTableId("sometable");
+    BigQueryIO.Read.Bound bound = BigQueryIO.Read.named("ReadMyTable")
+        .from(table);
+    checkReadObject(bound, "foo.com:project", "somedataset", "sometable");
+  }
+
+  @Test(expected = IllegalStateException.class)
+  public void testBuildSourceWithoutTable() throws IOException {
+    Pipeline p = TestPipeline.create();
+    p.apply(BigQueryIO.Read.named("ReadMyTable"));
+  }
+
+  @Test
+  public void testBuildSink() throws IOException {
+    BigQueryIO.Write.Bound bound = BigQueryIO.Write.named("WriteMyTable")
+        .to("foo.com:project:somedataset.sometable");
+    checkWriteObject(
+        bound, "foo.com:project", "somedataset", "sometable",
+        null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY);
+  }
+
+  @Test
+  public void testBuildSinkwithoutValidation() throws IOException {
+    // This test just checks that using withoutValidation will not trigger object
+    // construction errors.
+    BigQueryIO.Write.Bound bound = BigQueryIO.Write.named("WriteMyTable")
+        .to("foo.com:project:somedataset.sometable").withoutValidation();
+    checkWriteObjectWithValidate(
+        bound, "foo.com:project", "somedataset", "sometable",
+        null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY, false);
+  }
+
+  @Test
+  public void testBuildSinkDefaultProject() throws IOException {
+    BigQueryIO.Write.Bound bound = BigQueryIO.Write.named("WriteMyTable")
+        .to("somedataset.sometable");
+    checkWriteObject(
+        bound, null, "somedataset", "sometable",
+        null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY);
+  }
+
+  @Test
+  public void testBuildSinkWithTableReference() throws IOException {
+    TableReference table = new TableReference()
+        .setProjectId("foo.com:project")
+        .setDatasetId("somedataset")
+        .setTableId("sometable");
+    BigQueryIO.Write.Bound bound = BigQueryIO.Write.named("WriteMyTable")
+        .to(table);
+    checkWriteObject(
+        bound, "foo.com:project", "somedataset", "sometable",
+        null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY);
+  }
+
+  @Test(expected = IllegalStateException.class)
+  public void testBuildSinkWithoutTable() throws IOException {
+    Pipeline p = TestPipeline.create();
+    p.apply(Create.<TableRow>of()).setCoder(TableRowJsonCoder.of())
+        .apply(BigQueryIO.Write.named("WriteMyTable"));
+  }
+
+  @Test
+  public void testBuildSinkWithSchema() throws IOException {
+    TableSchema schema = new TableSchema();
+    BigQueryIO.Write.Bound bound = BigQueryIO.Write.named("WriteMyTable")
+        .to("foo.com:project:somedataset.sometable").withSchema(schema);
+    checkWriteObject(
+        bound, "foo.com:project", "somedataset", "sometable",
+        schema, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY);
+  }
+
+  @Test
+  public void testBuildSinkWithCreateDispositionNever() throws IOException {
+    BigQueryIO.Write.Bound bound = BigQueryIO.Write.named("WriteMyTable")
+        .to("foo.com:project:somedataset.sometable")
+        .withCreateDisposition(CreateDisposition.CREATE_NEVER);
+    checkWriteObject(
+        bound, "foo.com:project", "somedataset", "sometable",
+        null, CreateDisposition.CREATE_NEVER, WriteDisposition.WRITE_EMPTY);
+  }
+
+  @Test
+  public void testBuildSinkWithCreateDispositionIfNeeded() throws IOException {
+    BigQueryIO.Write.Bound bound = BigQueryIO.Write.named("WriteMyTable")
+        .to("foo.com:project:somedataset.sometable")
+        .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED);
+    checkWriteObject(
+        bound, "foo.com:project", "somedataset", "sometable",
+        null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY);
+  }
+
+  @Test
+  public void testBuildSinkWithWriteDispositionTruncate() throws IOException {
+    BigQueryIO.Write.Bound bound = BigQueryIO.Write.named("WriteMyTable")
+        .to("foo.com:project:somedataset.sometable")
+        .withWriteDisposition(WriteDisposition.WRITE_TRUNCATE);
+    checkWriteObject(
+        bound, "foo.com:project", "somedataset", "sometable",
+        null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_TRUNCATE);
+  }
+
+  @Test
+  public void testBuildSinkWithWriteDispositionAppend() throws IOException {
+    BigQueryIO.Write.Bound bound = BigQueryIO.Write.named("WriteMyTable")
+        .to("foo.com:project:somedataset.sometable")
+        .withWriteDisposition(WriteDisposition.WRITE_APPEND);
+    checkWriteObject(
+        bound, "foo.com:project", "somedataset", "sometable",
+        null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_APPEND);
+  }
+
+  @Test
+  public void testBuildSinkWithWriteDispositionEmpty() throws IOException {
+    BigQueryIO.Write.Bound bound = BigQueryIO.Write.named("WriteMyTable")
+        .to("foo.com:project:somedataset.sometable")
+        .withWriteDisposition(WriteDisposition.WRITE_EMPTY);
+    checkWriteObject(
+        bound, "foo.com:project", "somedataset", "sometable",
+        null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY);
+  }
+
+  @Test
+  public void testTableParsing() {
+    TableReference ref = BigQueryIO
+        .parseTableSpec("my-project:data_set.table_name");
+    Assert.assertEquals("my-project", ref.getProjectId());
+    Assert.assertEquals("data_set", ref.getDatasetId());
+    Assert.assertEquals("table_name", ref.getTableId());
+  }
+
+  @Test
+  public void testTableParsing_validPatterns() {
+    BigQueryIO.parseTableSpec("a123-456:foo_bar.d");
+    BigQueryIO.parseTableSpec("a12345:b.c");
+    BigQueryIO.parseTableSpec("b12345.c");
+  }
+
+  @Test
+  public void testTableParsing_noProjectId() {
+    TableReference ref = BigQueryIO
+        .parseTableSpec("data_set.table_name");
+    Assert.assertEquals(null, ref.getProjectId());
+    Assert.assertEquals("data_set", ref.getDatasetId());
+    Assert.assertEquals("table_name", ref.getTableId());
+  }
+
+  @Test
+  public void testTableParsingError() {
+    thrown.expect(IllegalArgumentException.class);
+    BigQueryIO.parseTableSpec("0123456:foo.bar");
+  }
+
+  @Test
+  public void testTableParsingError_2() {
+    thrown.expect(IllegalArgumentException.class);
+    BigQueryIO.parseTableSpec("myproject:.bar");
+  }
+
+  @Test
+  public void testTableParsingError_3() {
+    thrown.expect(IllegalArgumentException.class);
+    BigQueryIO.parseTableSpec(":a.b");
+  }
+
+  @Test
+  public void testTableParsingError_slash() {
+    thrown.expect(IllegalArgumentException.class);
+    BigQueryIO.parseTableSpec("a\\b12345:c.d");
+  }
+
+  // Test that BigQuery's special null placeholder objects can be encoded.
+  @Test
+  public void testCoder_nullCell() throws CoderException {
+    TableRow row = new TableRow();
+    row.set("temperature", Data.nullOf(Object.class));
+    row.set("max_temperature", Data.nullOf(Object.class));
+
+    byte[] bytes = CoderUtils.encodeToByteArray(TableRowJsonCoder.of(), row);
+
+    TableRow newRow = CoderUtils.decodeFromByteArray(TableRowJsonCoder.of(), bytes);
+    byte[] newBytes = CoderUtils.encodeToByteArray(TableRowJsonCoder.of(), newRow);
+
+    Assert.assertArrayEquals(bytes, newBytes);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
new file mode 100644
index 0000000000000..e026c58102dae
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.api.services.datastore.DatastoreV1.Entity;
+import com.google.api.services.datastore.DatastoreV1.Query;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.EntityCoder;
+import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for DatastoreIO Read and Write transforms.
+ */
+@RunWith(JUnit4.class)
+public class DatastoreIOTest {
+
+  private String host;
+  private String datasetId;
+  private Query query;
+
+  /**
+   * Sets the default dataset ID as "shakespearedataset",
+   * which contains two kinds of records: "food" and "shakespeare".
+   * The "food" table contains 10 manually constructed entities,
+   * The "shakespeare" table contains 172948 entities,
+   * where each entity represents one line in one play in
+   * Shakespeare collections (e.g. there are 172948 lines in
+   * all Shakespeare files).
+   *
+   * <p> The function also sets up the datastore agent by creating
+   * a Datastore object to access the dataset shakespeareddataset.
+   *
+   * <p> Note that the local server must be started to let the agent
+   * be created normally.
+   */
+  @Before
+  public void setUp() {
+    this.host = "http://localhost:1234";
+    this.datasetId = "shakespearedataset";
+
+    Query.Builder q = Query.newBuilder();
+    q.addKindBuilder().setName("shakespeare");
+    this.query = q.build();
+  }
+
+  /**
+   * Test for reading one entity from kind "food"
+   */
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testBuildRead() throws Exception {
+    DatastoreIO.Read.Bound readQuery = DatastoreIO.Read
+        .withHost(this.host)
+        .from(this.datasetId, this.query);
+    assertEquals(this.query, readQuery.query);
+    assertEquals(this.datasetId, readQuery.datasetId);
+    assertEquals(this.host, readQuery.host);
+  }
+
+  @Test
+  public void testBuildReadAlt() throws Exception {
+    DatastoreIO.Read.Bound readQuery = DatastoreIO.Read
+        .from(this.datasetId, this.query)
+        .withHost(this.host);
+    assertEquals(this.query, readQuery.query);
+    assertEquals(this.datasetId, readQuery.datasetId);
+    assertEquals(this.host, readQuery.host);
+  }
+
+  @Test(expected = IllegalStateException.class)
+  public void testBuildReadWithoutDatastoreSettingToCatchException()
+      throws Exception {
+    // create pipeline and run the pipeline to get result
+    Pipeline p = DirectPipeline.createForTest();
+    p.apply(DatastoreIO.Read.named("ReadDatastore"));
+  }
+
+  @Test
+  public void testBuildWrite() throws Exception {
+    DatastoreIO.Write.Bound write = DatastoreIO.Write
+        .to(this.datasetId)
+        .withHost(this.host);
+    assertEquals(this.host, write.host);
+    assertEquals(this.datasetId, write.datasetId);
+  }
+
+  @Test
+  public void testBuildWriteAlt() throws Exception {
+    DatastoreIO.Write.Bound write = DatastoreIO.Write
+        .withHost(this.host)
+        .to(this.datasetId);
+    assertEquals(this.host, write.host);
+    assertEquals(this.datasetId, write.datasetId);
+  }
+
+  @Test(expected = IllegalStateException.class)
+  public void testBuildWriteWithoutDatastoreToCatchException() throws Exception {
+    // create pipeline and run the pipeline to get result
+    Pipeline p = DirectPipeline.createForTest();
+    p.apply(Create.<Entity>of()).setCoder(EntityCoder.of())
+        .apply(DatastoreIO.Write.named("WriteDatastore"));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
new file mode 100644
index 0000000000000..b6aaf59b51ad8
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -0,0 +1,413 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import static com.google.cloud.dataflow.sdk.TestUtils.INTS_ARRAY;
+import static com.google.cloud.dataflow.sdk.TestUtils.LINES;
+import static com.google.cloud.dataflow.sdk.TestUtils.LINES_ARRAY;
+import static com.google.cloud.dataflow.sdk.TestUtils.NO_INTS_ARRAY;
+import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES_ARRAY;
+import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.EvaluationResults;
+import com.google.cloud.dataflow.sdk.testing.TestDataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.GcsUtil;
+import com.google.cloud.dataflow.sdk.util.TestCredential;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PDone;
+
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mockito;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.nio.ByteBuffer;
+import java.nio.channels.SeekableByteChannel;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests for TextIO Read and Write transforms.
+ */
+@RunWith(JUnit4.class)
+public class TextIOTest {
+
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  private static class EmptySeekableByteChannel implements SeekableByteChannel {
+    public long position() {
+      return 0L;
+    }
+
+    public SeekableByteChannel position(long newPosition) {
+      return this;
+    }
+
+    public long size() {
+      return 0L;
+    }
+
+    public SeekableByteChannel truncate(long size) {
+      return this;
+    }
+
+    public int write(ByteBuffer src) {
+      return 0;
+    }
+
+    public int read(ByteBuffer dst) {
+      return 0;
+    }
+
+    public boolean isOpen() {
+      return true;
+    }
+
+    public void close() { }
+  }
+
+  private GcsUtil buildMockGcsUtil() throws IOException {
+    GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class);
+
+    // Any request to open gets a new bogus channel
+    Mockito
+        .when(mockGcsUtil.open(Mockito.any(GcsPath.class)))
+        .thenReturn(new EmptySeekableByteChannel());
+
+    // Any request for expansion gets a single bogus URL
+    // after we first run the expansion code (which will generally
+    // return no results, which causes a crash we aren't testing)
+    Mockito
+        .when(mockGcsUtil.expand(Mockito.any(GcsPath.class)))
+        .thenReturn(Arrays.asList(GcsPath.fromUri("gs://bucket/foo")));
+
+    return mockGcsUtil;
+  }
+
+  private TestDataflowPipelineOptions buildTestPipelineOptions() {
+    TestDataflowPipelineOptions options =
+        PipelineOptionsFactory.as(TestDataflowPipelineOptions.class);
+    options.setGcpCredential(new TestCredential());
+    return options;
+  }
+
+  <T> void runTestRead(T[] expected, Coder<T> coder) throws Exception {
+    File tmpFile = tmpFolder.newFile("file.txt");
+    String filename = tmpFile.getPath();
+
+    try (PrintStream writer = new PrintStream(new FileOutputStream(tmpFile))) {
+      for (T elem : expected) {
+        byte[] encodedElem = CoderUtils.encodeToByteArray(coder, elem);
+        String line = new String(encodedElem);
+        writer.println(line);
+      }
+    }
+
+    DirectPipeline p = DirectPipeline.createForTest();
+
+    TextIO.Read.Bound<T> read;
+    if (coder.equals(StringUtf8Coder.of())) {
+      TextIO.Read.Bound<String> readStrings = TextIO.Read.from(filename);
+      // T==String
+      read = (TextIO.Read.Bound) readStrings;
+    } else {
+      read = TextIO.Read.from(filename).withCoder(coder);
+    }
+
+    PCollection<T> output = p.apply(read);
+
+    EvaluationResults results = p.run();
+
+    assertThat(results.getPCollection(output),
+               containsInAnyOrder(expected));
+  }
+
+  @Test
+  public void testReadStrings() throws Exception {
+    runTestRead(LINES_ARRAY, StringUtf8Coder.of());
+  }
+
+  @Test
+  public void testReadEmptyStrings() throws Exception {
+    runTestRead(NO_LINES_ARRAY, StringUtf8Coder.of());
+  }
+
+  @Test
+  public void testReadInts() throws Exception {
+    runTestRead(INTS_ARRAY, TextualIntegerCoder.of());
+  }
+
+  @Test
+  public void testReadEmptyInts() throws Exception {
+    runTestRead(NO_INTS_ARRAY, TextualIntegerCoder.of());
+  }
+
+  @Test
+  public void testReadNamed() {
+    Pipeline p = DirectPipeline.createForTest();
+
+    {
+      PCollection<String> output1 =
+          p.apply(TextIO.Read.from("/tmp/file.txt"));
+      assertEquals("TextIO.Read.out", output1.getName());
+    }
+
+    {
+      PCollection<String> output2 =
+          p.apply(TextIO.Read.named("MyRead").from("/tmp/file.txt"));
+      assertEquals("MyRead.out", output2.getName());
+    }
+
+    {
+      PCollection<String> output3 =
+          p.apply(TextIO.Read.from("/tmp/file.txt").named("HerRead"));
+      assertEquals("HerRead.out", output3.getName());
+    }
+  }
+
+  <T> void runTestWrite(T[] elems, Coder<T> coder) throws Exception {
+    File tmpFile = tmpFolder.newFile("file.txt");
+    String filename = tmpFile.getPath();
+
+    DirectPipeline p = DirectPipeline.createForTest();
+
+    PCollection<T> input =
+        p.apply(Create.of(Arrays.asList(elems))).setCoder(coder);
+
+    TextIO.Write.Bound<T> write;
+    if (coder.equals(StringUtf8Coder.of())) {
+      TextIO.Write.Bound<String> writeStrings =
+          TextIO.Write.to(filename).withoutSharding();
+      // T==String
+      write = (TextIO.Write.Bound) writeStrings;
+    } else {
+      write = TextIO.Write.to(filename).withCoder(coder).withoutSharding();
+    }
+
+    PDone output = input.apply(write);
+
+    EvaluationResults results = p.run();
+
+    BufferedReader reader = new BufferedReader(new FileReader(tmpFile));
+    List<String> actual = new ArrayList<>();
+    for (;;) {
+      String line = reader.readLine();
+      if (line == null) {
+        break;
+      }
+      actual.add(line);
+    }
+
+    String[] expected = new String[elems.length];
+    for (int i = 0; i < elems.length; i++) {
+      T elem = elems[i];
+      byte[] encodedElem = CoderUtils.encodeToByteArray(coder, elem);
+      String line = new String(encodedElem);
+      expected[i] = line;
+    }
+
+    assertThat(actual,
+               containsInAnyOrder(expected));
+  }
+
+  @Test
+  public void testWriteStrings() throws Exception {
+    runTestWrite(LINES_ARRAY, StringUtf8Coder.of());
+  }
+
+  @Test
+  public void testWriteEmptyStrings() throws Exception {
+    runTestWrite(NO_LINES_ARRAY, StringUtf8Coder.of());
+  }
+
+  @Test
+  public void testWriteInts() throws Exception {
+    runTestWrite(INTS_ARRAY, TextualIntegerCoder.of());
+  }
+
+  @Test
+  public void testWriteEmptyInts() throws Exception {
+    runTestWrite(NO_INTS_ARRAY, TextualIntegerCoder.of());
+  }
+
+  @Test
+  public void testWriteSharded() throws IOException {
+    File outFolder = tmpFolder.newFolder();
+    String filename = outFolder.toPath().resolve("output").toString();
+
+    DirectPipeline p = DirectPipeline.createForTest();
+
+    PCollection<String> input =
+        p.apply(Create.of(Arrays.asList(LINES_ARRAY)))
+            .setCoder(StringUtf8Coder.of());
+
+    PDone done = input.apply(
+        TextIO.Write.to(filename).withNumShards(2).withSuffix(".txt"));
+
+    EvaluationResults results = p.run();
+
+    String[] files = outFolder.list();
+
+    assertThat(Arrays.asList(files),
+        containsInAnyOrder("output-00000-of-00002.txt",
+                           "output-00001-of-00002.txt"));
+  }
+
+  @Test
+  public void testWriteNamed() {
+    Pipeline p = DirectPipeline.createForTest();
+
+    PCollection<String> input =
+        p.apply(Create.of(LINES)).setCoder(StringUtf8Coder.of());
+
+    {
+      PTransform<PCollection<String>, PDone> transform1 =
+        TextIO.Write.to("/tmp/file.txt");
+      assertEquals("TextIO.Write", transform1.getName());
+    }
+
+    {
+      PTransform<PCollection<String>, PDone> transform2 =
+          TextIO.Write.named("MyWrite").to("/tmp/file.txt");
+      assertEquals("MyWrite", transform2.getName());
+    }
+
+    {
+      PTransform<PCollection<String>, PDone> transform3 =
+          TextIO.Write.to("/tmp/file.txt").named("HerWrite");
+      assertEquals("HerWrite", transform3.getName());
+    }
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testUnsupportedFilePattern() throws IOException {
+    File outFolder = tmpFolder.newFolder();
+    String filename = outFolder.toPath().resolve("output@*").toString();
+
+    DirectPipeline p = DirectPipeline.createForTest();
+
+    PCollection<String> input =
+        p.apply(Create.of(Arrays.asList(LINES_ARRAY)))
+            .setCoder(StringUtf8Coder.of());
+
+    PDone done = input.apply(TextIO.Write.to(filename));
+
+    EvaluationResults results = p.run();
+    Assert.fail("Expected failure due to unsupported output pattern");
+  }
+
+  /**
+   * The first wildcard must occur after the last directory delimiter.
+   * This tests a few corner cases that should not crash.
+   */
+  @Test
+  public void testGoodWildcards() throws Exception {
+    TestDataflowPipelineOptions options = buildTestPipelineOptions();
+    options.setGcsUtil(buildMockGcsUtil());
+
+    Pipeline pipeline = Pipeline.create(options);
+
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo/"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo/*"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo/?"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo/[0-9]"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo/*baz*"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo/*baz?"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo/[0-9]baz?"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo/baz/*"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo/baz/*wonka*"));
+
+    // Check that running doesn't fail.
+    pipeline.run();
+  }
+
+  /**
+   * The first wildcard must occur after the last directory delimiter.
+   * This tests "*".
+   */
+  @Test
+  public void testBadWildcardStar() throws Exception {
+    Pipeline pipeline = Pipeline.create(buildTestPipelineOptions());
+
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo*/baz"));
+
+    // Check that running does fail.
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("wildcard");
+    pipeline.run();
+  }
+
+  /**
+   * The first wildcard must occur after the last directory delimiter.
+   * This tests "?".
+   */
+  @Test
+  public void testBadWildcardOptional() throws Exception {
+    Pipeline pipeline = Pipeline.create(buildTestPipelineOptions());
+
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo?/baz"));
+
+    // Check that running does fail.
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("wildcard");
+    pipeline.run();
+  }
+
+  /**
+   * The first wildcard must occur after the last directory delimiter.
+   * This tests "[]" based character classes.
+   */
+  @Test
+  public void testBadWildcardBrackets() throws Exception {
+    Pipeline pipeline = Pipeline.create(buildTestPipelineOptions());
+
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo[0-9]/baz"));
+
+    // Check that translation does fail.
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("wildcard");
+    pipeline.run();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/user.avsc b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/user.avsc
new file mode 100644
index 0000000000000..451a19fa12c32
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/user.avsc
@@ -0,0 +1,10 @@
+{
+  "namespace": "com.google.cloud.dataflow.sdk.io",
+  "type": "record",
+  "name": "User",
+  "fields": [
+    { "name": "name", "type": "string"},
+    { "name": "favorite_number", "type": ["int", "null"]},
+    { "name": "favorite_color", "type": ["string", "null"]}
+  ]
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptionsTest.java
new file mode 100644
index 0000000000000..f4d6f0499d44f
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptionsTest.java
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.testing.ResetDateTimeProvider;
+import com.google.cloud.dataflow.sdk.testing.RestoreSystemProperties;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestRule;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link DataflowPipelineOptions}. */
+@RunWith(JUnit4.class)
+public class DataflowPipelineOptionsTest {
+  @Rule public TestRule restoreSystemProperties = new RestoreSystemProperties();
+  @Rule public ResetDateTimeProvider resetDateTimeProviderRule = new ResetDateTimeProvider();
+
+  @Test
+  public void testJobNameIsSet() {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setJobName("TestJobName");
+    assertEquals("TestJobName", options.getJobName());
+  }
+
+  @Test
+  public void testUserNameIsNotSet() {
+    resetDateTimeProviderRule.setDateTimeFixed("2014-12-08T19:07:06.698Z");
+    System.getProperties().remove("user.name");
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setAppName("TestApplication");
+    assertEquals("testapplication--1208190706", options.getJobName());
+    assertTrue(options.getJobName().length() <= 40);
+  }
+
+  @Test
+  public void testAppNameAndUserNameIsTooLong() {
+    resetDateTimeProviderRule.setDateTimeFixed("2014-12-08T19:07:06.698Z");
+    System.getProperties().put("user.name", "abcdeabcdeabcdeabcdeabcdeabcde");
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setAppName("1234567890123456789012345678901234567890");
+    assertEquals("a234567890123456789-abcdeabcd-1208190706", options.getJobName());
+    assertTrue(options.getJobName().length() <= 40);
+  }
+
+  @Test
+  public void testAppNameIsTooLong() {
+    resetDateTimeProviderRule.setDateTimeFixed("2014-12-08T19:07:06.698Z");
+    System.getProperties().put("user.name", "abcde");
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setAppName("1234567890123456789012345678901234567890");
+    assertEquals("a2345678901234567890123-abcde-1208190706", options.getJobName());
+    assertTrue(options.getJobName().length() <= 40);
+  }
+
+  @Test
+  public void testUserNameIsTooLong() {
+    resetDateTimeProviderRule.setDateTimeFixed("2014-12-08T19:07:06.698Z");
+    System.getProperties().put("user.name", "abcdeabcdeabcdeabcdeabcdeabcde");
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setAppName("1234567890");
+    assertEquals("a234567890-abcdeabcdeabcdeabc-1208190706", options.getJobName());
+    assertTrue(options.getJobName().length() <= 40);
+  }
+
+  
+  @Test
+  public void testUtf8UserNameAndApplicationNameIsNormalized() {
+    resetDateTimeProviderRule.setDateTimeFixed("2014-12-08T19:07:06.698Z");
+    System.getProperties().put("user.name", "ði ıntəˈnæʃənəl ");
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setAppName("fəˈnɛtık əsoʊsiˈeıʃn");
+    assertEquals("f00n0t0k00so0si0e00-0i00nt00n-1208190706", options.getJobName());
+    assertTrue(options.getJobName().length() <= 40);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
new file mode 100644
index 0000000000000..ca1e9502bf97d
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -0,0 +1,502 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.testing.RestoreSystemProperties;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.rules.TestRule;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.List;
+
+/** Tests for {@link PipelineOptionsFactory}. */
+@RunWith(JUnit4.class)
+public class PipelineOptionsFactoryTest {
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+  @Rule public TestRule restoreSystemProperties = new RestoreSystemProperties();
+
+  @Test
+  public void testCreationFromSystemProperties() {
+    System.getProperties().putAll(ImmutableMap
+        .<String, String>builder()
+        .put("root_url", "test_root_url")
+        .put("service_path", "test_service_path")
+        .put("temp_gcs_directory",
+            "gs://tap-testing-30lsaafg6g3zudmjbnsdz6wj/unittesting/staging")
+        .put("service_account_name", "test_service_account_name")
+        .put("service_account_keyfile", "test_service_account_keyfile")
+        .put("worker_id", "test_worker_id")
+        .put("project_id", "test_project_id")
+        .put("job_id", "test_job_id")
+        .build());
+    DataflowWorkerHarnessOptions options = PipelineOptionsFactory.createFromSystemProperties();
+    assertEquals("test_root_url", options.getApiRootUrl());
+    assertEquals("test_service_path", options.getDataflowEndpoint());
+    assertEquals("gs://tap-testing-30lsaafg6g3zudmjbnsdz6wj/unittesting/staging",
+        options.getTempLocation());
+    assertEquals("test_service_account_name", options.getServiceAccountName());
+    assertEquals("test_service_account_keyfile", options.getServiceAccountKeyfile());
+    assertEquals("test_worker_id", options.getWorkerId());
+    assertEquals("test_project_id", options.getProject());
+    assertEquals("test_job_id", options.getJobId());
+  }
+
+  @Test
+  public void testAppNameIsSet() {
+    ApplicationNameOptions options = PipelineOptionsFactory.as(ApplicationNameOptions.class);
+    assertEquals(PipelineOptionsFactoryTest.class.getSimpleName(), options.getAppName());
+  }
+
+  /** A simple test interface. */
+  public static interface TestPipelineOptions extends PipelineOptions {
+    String getTestPipelineOption();
+    void setTestPipelineOption(String value);
+  }
+
+  @Test
+  public void testAppNameIsSetWhenUsingAs() {
+    TestPipelineOptions options = PipelineOptionsFactory.as(TestPipelineOptions.class);
+    assertEquals(PipelineOptionsFactoryTest.class.getSimpleName(),
+        options.as(ApplicationNameOptions.class).getAppName());
+  }
+
+  @Test
+  public void testManualRegistration() {
+    assertFalse(PipelineOptionsFactory.getRegisteredOptions().contains(TestPipelineOptions.class));
+    PipelineOptionsFactory.register(TestPipelineOptions.class);
+    assertTrue(PipelineOptionsFactory.getRegisteredOptions().contains(TestPipelineOptions.class));
+  }
+
+  @Test
+  public void testDefaultRegistration() {
+    assertTrue(PipelineOptionsFactory.getRegisteredOptions().contains(PipelineOptions.class));
+  }
+
+  /** A test interface missing a getter. */
+  public static interface MissingGetter extends PipelineOptions {
+    void setObject(Object value);
+  }
+
+  @Test
+  public void testMissingGetterThrows() throws Exception {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage(
+        "Expected getter for property [object] of type [java.lang.Object] on "
+        + "[com.google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest$MissingGetter].");
+
+    PipelineOptionsFactory.as(MissingGetter.class);
+  }
+
+  /** A test interface missing a setter. */
+  public static interface MissingSetter extends PipelineOptions {
+    Object getObject();
+  }
+
+  @Test
+  public void testMissingSetterThrows() throws Exception {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage(
+        "Expected setter for property [object] of type [java.lang.Object] on "
+        + "[com.google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest$MissingSetter].");
+
+    PipelineOptionsFactory.as(MissingSetter.class);
+  }
+
+  /** A test interface representing a composite interface. */
+  public static interface CombinedObject extends MissingGetter, MissingSetter {
+  }
+
+  @Test
+  public void testHavingSettersGettersFromSeparateInterfacesIsValid() {
+    PipelineOptionsFactory.as(CombinedObject.class);
+  }
+
+  /** A test interface which contains a non-bean style method. */
+  public static interface ExtraneousMethod extends PipelineOptions {
+    public String extraneousMethod(int value, String otherValue);
+  }
+
+  @Test
+  public void testHavingExtraneousMethodThrows() throws Exception {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage(
+        "Methods [java.lang.String extraneousMethod(int, java.lang.String)] on "
+        + "[com.google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest$ExtraneousMethod] "
+        + "do not conform to being bean properties.");
+
+    PipelineOptionsFactory.as(ExtraneousMethod.class);
+  }
+
+  /** A test interface which has a conflicting return type with its parent. */
+  public static interface ReturnTypeConflict extends CombinedObject {
+    @Override
+    String getObject();
+    void setObject(String value);
+  }
+
+  @Test
+  public void testReturnTypeConflictThrows() throws Exception {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage(
+        "Method [getObject] has multiple definitions [public abstract java.lang.Object "
+        + "com.google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest$MissingSetter"
+        + ".getObject(), public abstract java.lang.String "
+        + "com.google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest$ReturnTypeConflict"
+        + ".getObject()] with different return types for ["
+        + "com.google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest$ReturnTypeConflict].");
+    PipelineOptionsFactory.as(ReturnTypeConflict.class);
+  }
+
+  /** Test interface that has {@link JsonIgnore @JsonIgnore} on a setter for a property. */
+  public static interface SetterWithJsonIgnore extends PipelineOptions {
+    String getValue();
+    @JsonIgnore
+    void setValue(String value);
+  }
+
+  @Test
+  public void testSetterAnnotatedWithJsonIgnore() throws Exception {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage(
+        "Expected setter for property [value] to not be marked with @JsonIgnore on [com."
+        + "google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest$SetterWithJsonIgnore]");
+    PipelineOptionsFactory.as(SetterWithJsonIgnore.class);
+  }
+
+  /**
+   * This class is has a conflicting field with {@link CombinedObject} that doesn't have
+   * {@link JsonIgnore @JsonIgnore}.
+   */
+  public static interface GetterWithJsonIgnore extends PipelineOptions {
+    @JsonIgnore
+    Object getObject();
+    void setObject(Object value);
+  }
+
+  @Test
+  public void testNotAllGettersAnnotatedWithJsonIgnore() throws Exception {
+    // Initial construction is valid.
+    GetterWithJsonIgnore options = PipelineOptionsFactory.as(GetterWithJsonIgnore.class);
+
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage(
+        "Expected getter for property [object] to be marked with @JsonIgnore on all [com."
+        + "google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest$MissingSetter, "
+        + "com.google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest$GetterWithJsonIgnore], "
+        + "found only on [com.google.cloud.dataflow.sdk.options."
+        + "PipelineOptionsFactoryTest$GetterWithJsonIgnore]");
+
+    // When we attempt to convert, we should error at this moment.
+    options.as(CombinedObject.class);
+  }
+
+  @Test
+  public void testAppNameIsNotOverriddenWhenPassedInViaCommandLine() {
+    ApplicationNameOptions options = PipelineOptionsFactory
+        .fromArgs(new String[]{ "--appName=testAppName" })
+        .as(ApplicationNameOptions.class);
+    assertEquals("testAppName", options.getAppName());
+  }
+
+  @Test
+  public void testPropertyIsSetOnRegisteredPipelineOptionNotPartOfOriginalInterface() {
+    PipelineOptions options = PipelineOptionsFactory
+        .fromArgs(new String[]{ "--project=testProject" })
+        .create();
+    assertEquals("testProject", options.as(GcpOptions.class).getProject());
+  }
+
+  /** A test interface containing all the primitives */
+  public static interface Primitives extends PipelineOptions {
+    boolean getBoolean();
+    void setBoolean(boolean value);
+    char getChar();
+    void setChar(char value);
+    byte getByte();
+    void setByte(byte value);
+    short getShort();
+    void setShort(short value);
+    int getInt();
+    void setInt(int value);
+    long getLong();
+    void setLong(long value);
+    float getFloat();
+    void setFloat(float value);
+    double getDouble();
+    void setDouble(double value);
+  }
+
+  @Test
+  public void testPrimitives() {
+    String[] args = new String[] {
+        "--boolean=true",
+        "--char=d",
+        "--byte=12",
+        "--short=300",
+        "--int=100000",
+        "--long=123890123890",
+        "--float=55.5",
+        "--double=12.3"};
+
+    Primitives options = PipelineOptionsFactory.fromArgs(args).as(Primitives.class);
+    assertTrue(options.getBoolean());
+    assertEquals('d', options.getChar());
+    assertEquals((byte) 12, options.getByte());
+    assertEquals((short) 300, options.getShort());
+    assertEquals(100000, options.getInt());
+    assertEquals(123890123890L, options.getLong());
+    assertEquals(55.5f, options.getFloat(), 0.0f);
+    assertEquals(12.3, options.getDouble(), 0.0);
+  }
+
+  @Test
+  public void testBooleanShorthandArgument() {
+    String[] args = new String[] {"--boolean"};
+
+    Primitives options = PipelineOptionsFactory.fromArgs(args).as(Primitives.class);
+    assertTrue(options.getBoolean());
+  }
+
+  /** A test interface containing all supported objects */
+  public static interface Objects extends PipelineOptions {
+    Boolean getBoolean();
+    void setBoolean(Boolean value);
+    Character getChar();
+    void setChar(Character value);
+    Byte getByte();
+    void setByte(Byte value);
+    Short getShort();
+    void setShort(Short value);
+    Integer getInt();
+    void setInt(Integer value);
+    Long getLong();
+    void setLong(Long value);
+    Float getFloat();
+    void setFloat(Float value);
+    Double getDouble();
+    void setDouble(Double value);
+    String getString();
+    void setString(String value);
+    Class<?> getClassValue();
+    void setClassValue(Class<?> value);
+  }
+
+  @Test
+  public void testObjects() {
+    String[] args = new String[] {
+        "--boolean=true",
+        "--char=d",
+        "--byte=12",
+        "--short=300",
+        "--int=100000",
+        "--long=123890123890",
+        "--float=55.5",
+        "--double=12.3",
+        "--string=stringValue",
+        "--classValue=" + PipelineOptionsFactoryTest.class.getName()};
+
+    Objects options = PipelineOptionsFactory.fromArgs(args).as(Objects.class);
+    assertTrue(options.getBoolean());
+    assertEquals(Character.valueOf('d'), options.getChar());
+    assertEquals(Byte.valueOf((byte) 12), options.getByte());
+    assertEquals(Short.valueOf((short) 300), options.getShort());
+    assertEquals(Integer.valueOf(100000), options.getInt());
+    assertEquals(Long.valueOf(123890123890L), options.getLong());
+    assertEquals(Float.valueOf(55.5f), options.getFloat(), 0.0f);
+    assertEquals(Double.valueOf(12.3), options.getDouble(), 0.0);
+    assertEquals("stringValue", options.getString());
+    assertEquals(PipelineOptionsFactoryTest.class, options.getClassValue());
+  }
+
+  @Test
+  public void testMissingArgument() {
+    String[] args = new String[] {};
+
+    Objects options = PipelineOptionsFactory.fromArgs(args).as(Objects.class);
+    assertNull(options.getString());
+  }
+
+  /** A test interface containing all supported array return types */
+  public static interface Arrays extends PipelineOptions {
+    boolean[] getBoolean();
+    void setBoolean(boolean[] value);
+    char[] getChar();
+    void setChar(char[] value);
+    short[] getShort();
+    void setShort(short[] value);
+    int[] getInt();
+    void setInt(int[] value);
+    long[] getLong();
+    void setLong(long[] value);
+    float[] getFloat();
+    void setFloat(float[] value);
+    double[] getDouble();
+    void setDouble(double[] value);
+    String[] getString();
+    void setString(String[] value);
+    Class<?>[] getClassValue();
+    void setClassValue(Class<?>[] value);
+  }
+
+  @Test
+  public void testArrays() {
+    String[] args = new String[] {
+        "--boolean=true",
+        "--boolean=true",
+        "--boolean=false",
+        "--char=d",
+        "--char=e",
+        "--char=f",
+        "--short=300",
+        "--short=301",
+        "--short=302",
+        "--int=100000",
+        "--int=100001",
+        "--int=100002",
+        "--long=123890123890",
+        "--long=123890123891",
+        "--long=123890123892",
+        "--float=55.5",
+        "--float=55.6",
+        "--float=55.7",
+        "--double=12.3",
+        "--double=12.4",
+        "--double=12.5",
+        "--string=stringValue1",
+        "--string=stringValue2",
+        "--string=stringValue3",
+        "--classValue=" + PipelineOptionsFactory.class.getName(),
+        "--classValue=" + PipelineOptionsFactoryTest.class.getName()};
+
+    Arrays options = PipelineOptionsFactory.fromArgs(args).as(Arrays.class);
+    boolean[] bools = options.getBoolean();
+    assertTrue(bools[0] && bools[1] && !bools[2]);
+    assertArrayEquals(new char[] {'d', 'e', 'f'}, options.getChar());
+    assertArrayEquals(new short[] {300, 301, 302}, options.getShort());
+    assertArrayEquals(new int[] {100000, 100001, 100002}, options.getInt());
+    assertArrayEquals(new long[] {123890123890L, 123890123891L, 123890123892L}, options.getLong());
+    assertArrayEquals(new float[] {55.5f, 55.6f, 55.7f}, options.getFloat(), 0.0f);
+    assertArrayEquals(new double[] {12.3, 12.4, 12.5}, options.getDouble(), 0.0);
+    assertArrayEquals(new String[] {"stringValue1", "stringValue2", "stringValue3"},
+        options.getString());
+    assertArrayEquals(new Class[] {PipelineOptionsFactory.class,
+                                   PipelineOptionsFactoryTest.class},
+        options.getClassValue());
+  }
+
+  @Test
+  public void testOutOfOrderArrays() {
+    String[] args = new String[] {
+        "--char=d",
+        "--boolean=true",
+        "--boolean=true",
+        "--char=e",
+        "--char=f",
+        "--boolean=false"};
+
+    Arrays options = PipelineOptionsFactory.fromArgs(args).as(Arrays.class);
+    boolean[] bools = options.getBoolean();
+    assertTrue(bools[0] && bools[1] && !bools[2]);
+    assertArrayEquals(new char[] {'d', 'e', 'f'}, options.getChar());
+  }
+
+  /** A test interface containing all supported List return types */
+  public static interface Lists extends PipelineOptions {
+    List<String> getString();
+    void setString(List<String> value);
+  }
+
+  @Test
+  public void testList() {
+    String[] args =
+        new String[] {"--string=stringValue1", "--string=stringValue2", "--string=stringValue3"};
+
+    Lists options = PipelineOptionsFactory.fromArgs(args).as(Lists.class);
+    assertEquals(ImmutableList.of("stringValue1", "stringValue2", "stringValue3"),
+        options.getString());
+  }
+
+  @Test
+  public void testListShorthand() {
+    String[] args = new String[] {"--string=stringValue1,stringValue2,stringValue3"};
+
+    Lists options = PipelineOptionsFactory.fromArgs(args).as(Lists.class);
+    assertEquals(ImmutableList.of("stringValue1", "stringValue2", "stringValue3"),
+        options.getString());
+  }
+
+  @Test
+  public void testMixedShorthandAndLongStyleList() {
+    String[] args = new String[] {
+        "--char=d",
+        "--char=e",
+        "--char=f",
+        "--char=g,h,i",
+        "--char=j",
+        "--char=k",
+        "--char=l",
+        "--char=m,n,o"};
+
+    Arrays options = PipelineOptionsFactory.fromArgs(args).as(Arrays.class);
+    assertArrayEquals(new char[] {'d', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o'},
+                      options.getChar());
+  }
+
+  @Test
+  public void testSetASingularAttributeUsingAListThrowsAnError() {
+    String[] args = new String[] {
+        "--diskSizeGb=100",
+        "--diskSizeGb=200"};
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("expected one element but was");
+    PipelineOptionsFactory.fromArgs(args).create();
+  }
+
+  @Test
+  public void testSettingRunner() {
+    String[] args = new String[] {"--runner=BlockingDataflowPipelineRunner"};
+
+    PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create();
+    assertEquals(BlockingDataflowPipelineRunner.class, options.getRunner());
+  }
+
+  @Test
+  public void testSettingUnknownRunner() {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Unknown 'runner' specified UnknownRunner, supported pipeline "
+        + "runners [DirectPipelineRunner, DataflowPipelineRunner, BlockingDataflowPipelineRunner]");
+    String[] args = new String[] {"--runner=UnknownRunner"};
+
+    PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create();
+    options.getRunner();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java
new file mode 100644
index 0000000000000..9db6a6b754221
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Unit tests for {@link PipelineOptions}. */
+@RunWith(JUnit4.class)
+public class PipelineOptionsTest {
+  /** Interface used for testing that {@link PipelineOptions#as(Class)} functions */
+  public static interface TestOptions extends PipelineOptions {
+  }
+
+  @Test
+  public void testDynamicAs() {
+    TestOptions options = PipelineOptionsFactory.create().as(TestOptions.class);
+    assertNotNull(options);
+  }
+
+  @Test
+  public void testDefaultRunnerIsSet() {
+    assertEquals(DirectPipelineRunner.class, PipelineOptionsFactory.create().getRunner());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java
new file mode 100644
index 0000000000000..e0decb9f92255
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link PipelineOptionsValidator}. */
+@RunWith(JUnit4.class)
+public class PipelineOptionsValidatorTest {
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+
+  /** A test interface with an {@link Validation.Required} annotation. */
+  public static interface Required extends PipelineOptions {
+    @Validation.Required
+    public String getObject();
+    public void setObject(String value);
+  }
+  
+  @Test
+  public void testWhenRequiredOptionIsSet() {
+    Required required = PipelineOptionsFactory.as(Required.class);
+    required.setObject("blah");
+    PipelineOptionsValidator.validate(Required.class, required);
+  }
+
+  @Test
+  public void testWhenRequiredOptionIsSetAndCleared() {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Expected non-null property to be set for "
+        + "[public abstract java.lang.String com.google.cloud.dataflow."
+        + "sdk.options.PipelineOptionsValidatorTest$Required.getObject()].");
+
+    Required required = PipelineOptionsFactory.as(Required.class);
+    required.setObject("blah");
+    required.setObject(null);
+    PipelineOptionsValidator.validate(Required.class, required);
+  }
+
+  @Test
+  public void testWhenRequiredOptionIsNeverSet() {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Expected non-null property to be set for "
+        + "[public abstract java.lang.String com.google.cloud.dataflow."
+        + "sdk.options.PipelineOptionsValidatorTest$Required.getObject()].");
+
+    Required required = PipelineOptionsFactory.as(Required.class);
+    PipelineOptionsValidator.validate(Required.class, required);
+  }
+
+  /** A test interface which overrides the parents method. */
+  public static interface SubClassValidation extends Required {
+    @Override
+    public String getObject();
+    @Override
+    public void setObject(String value);
+  }
+
+  @Test
+  public void testValidationOnOverriddenMethods() throws Exception {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Expected non-null property to be set for "
+        + "[public abstract java.lang.String com.google.cloud.dataflow."
+        + "sdk.options.PipelineOptionsValidatorTest$Required.getObject()].");
+
+    SubClassValidation required = PipelineOptionsFactory.as(SubClassValidation.class);
+    PipelineOptionsValidator.validate(Required.class, required);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
new file mode 100644
index 0000000000000..b9b07e8626b1d
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
@@ -0,0 +1,625 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Maps;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.databind.JsonMappingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+
+/** Tests for {@link ProxyInvocationHandler}. */
+@RunWith(JUnit4.class)
+public class ProxyInvocationHandlerTest {
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+
+  /** A test interface with some primitives and objects. */
+  public static interface Simple extends PipelineOptions {
+    boolean isOptionEnabled();
+    void setOptionEnabled(boolean value);
+    int getPrimitive();
+    void setPrimitive(int value);
+    String getString();
+    void setString(String value);
+  }
+
+  @Test
+  public void testPropertySettingAndGetting() throws Exception {
+    ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
+    Simple proxy = handler.as(Simple.class);
+    proxy.setString("OBJECT");
+    proxy.setOptionEnabled(true);
+    proxy.setPrimitive(4);
+    assertEquals("OBJECT", proxy.getString());
+    assertTrue(proxy.isOptionEnabled());
+    assertEquals(4, proxy.getPrimitive());
+  }
+
+  /** A test interface containing all the JLS default values. */
+  public static interface JLSDefaults extends PipelineOptions {
+    boolean getBoolean();
+    void setBoolean(boolean value);
+    char getChar();
+    void setChar(char value);
+    byte getByte();
+    void setByte(byte value);
+    short getShort();
+    void setShort(short value);
+    int getInt();
+    void setInt(int value);
+    long getLong();
+    void setLong(long value);
+    float getFloat();
+    void setFloat(float value);
+    double getDouble();
+    void setDouble(double value);
+    Object getObject();
+    void setObject(Object value);
+  }
+
+  @Test
+  public void testGettingJLSDefaults() throws Exception {
+    ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
+    JLSDefaults proxy = handler.as(JLSDefaults.class);
+    assertFalse(proxy.getBoolean());
+    assertEquals('\0', proxy.getChar());
+    assertEquals((byte) 0, proxy.getByte());
+    assertEquals((short) 0, proxy.getShort());
+    assertEquals(0, proxy.getInt());
+    assertEquals(0L, proxy.getLong());
+    assertEquals(0f, proxy.getFloat(), 0f);
+    assertEquals(0d, proxy.getDouble(), 0d);
+    assertNull(proxy.getObject());
+  }
+
+  /** A {@link DefaultValueFactory} which is used for testing. */
+  public static class TestOptionFactory implements DefaultValueFactory<String> {
+    @Override
+    public String create(PipelineOptions options) {
+      return "testOptionFactory"; 
+    }
+  }
+
+  /** A test interface containing all the {@link Default} annotations. */
+  public static interface DefaultAnnotations extends PipelineOptions {
+    @Default.Boolean(true)
+    boolean getBoolean();
+    void setBoolean(boolean value);
+    @Default.Character('a')
+    char getChar();
+    void setChar(char value);
+    @Default.Byte((byte) 4)
+    byte getByte();
+    void setByte(byte value);
+    @Default.Short((short) 5)
+    short getShort();
+    void setShort(short value);
+    @Default.Integer(6)
+    int getInt();
+    void setInt(int value);
+    @Default.Long(7L)
+    long getLong();
+    void setLong(long value);
+    @Default.Float(8f)
+    float getFloat();
+    void setFloat(float value);
+    @Default.Double(9d)
+    double getDouble();
+    void setDouble(double value);
+    @Default.String("testString")
+    String getString();
+    void setString(String value);
+    @Default.Class(DefaultAnnotations.class)
+    Class<?> getClassOption();
+    void setClassOption(Class<?> value);
+    @Default.InstanceFactory(TestOptionFactory.class)
+    String getComplex();
+    void setComplex(String value);
+  }
+
+  @Test
+  public void testAnnotationDefaults() throws Exception {
+    ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
+    DefaultAnnotations proxy = handler.as(DefaultAnnotations.class);
+    assertTrue(proxy.getBoolean());
+    assertEquals('a', proxy.getChar());
+    assertEquals((byte) 4, proxy.getByte());
+    assertEquals((short) 5, proxy.getShort());
+    assertEquals(6, proxy.getInt());
+    assertEquals(7, proxy.getLong());
+    assertEquals(8f, proxy.getFloat(), 0f);
+    assertEquals(9d, proxy.getDouble(), 0d);
+    assertEquals("testString", proxy.getString());
+    assertEquals(DefaultAnnotations.class, proxy.getClassOption());
+    assertEquals("testOptionFactory", proxy.getComplex());
+  }
+
+  @Test
+  public void testEquals() throws Exception {
+    ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
+    Simple proxy = handler.as(Simple.class);
+    JLSDefaults sameAsProxy = proxy.as(JLSDefaults.class);
+    ProxyInvocationHandler handler2 = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
+    Simple proxy2 = handler2.as(Simple.class);
+    JLSDefaults sameAsProxy2 = proxy2.as(JLSDefaults.class);
+    assertTrue(handler.equals(proxy));
+    assertTrue(proxy.equals(proxy));
+    assertTrue(proxy.equals(sameAsProxy));
+    assertFalse(handler.equals(handler2));
+    assertFalse(proxy.equals(proxy2));
+    assertFalse(proxy.equals(sameAsProxy2));
+  }
+
+  @Test
+  public void testHashCode() throws Exception {
+    ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
+    Simple proxy = handler.as(Simple.class);
+    JLSDefaults sameAsProxy = proxy.as(JLSDefaults.class);
+    ProxyInvocationHandler handler2 = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
+    Simple proxy2 = handler.as(Simple.class);
+    JLSDefaults sameAsProxy2 = proxy.as(JLSDefaults.class);
+    assertTrue(handler.hashCode() == proxy.hashCode());
+    assertTrue(proxy.hashCode() == sameAsProxy.hashCode());
+    assertFalse(handler.hashCode() != handler2.hashCode());
+    assertFalse(proxy.hashCode() != proxy2.hashCode());
+    assertFalse(proxy.hashCode() != sameAsProxy2.hashCode());
+  }
+
+  @Test
+  public void testToString() throws Exception {
+    ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
+    Simple proxy = handler.as(Simple.class);
+    proxy.setString("stringValue");
+    DefaultAnnotations proxy2 = proxy.as(DefaultAnnotations.class);
+    proxy2.setLong(57L);
+    assertEquals("Current Settings:\n"
+        + "  long: 57\n"
+        + "  string: stringValue\n",
+        proxy.toString());
+  }
+
+  /** A test interface containing an unknown method. */
+  public static interface UnknownMethod {
+    void unknownMethod();
+  }
+
+  @Test
+  public void testInvokeWithUnknownMethod() throws Exception {
+    expectedException.expect(RuntimeException.class);
+    expectedException.expectMessage("Unknown method [public abstract void com.google.cloud."
+        + "dataflow.sdk.options.ProxyInvocationHandlerTest$UnknownMethod.unknownMethod()] "
+        + "invoked with args [null].");
+    
+    ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
+    handler.invoke(handler, UnknownMethod.class.getMethod("unknownMethod"), null);
+  }
+
+  /** A test interface which extends another interface. */
+  public static interface SubClass extends Simple {
+    String getExtended();
+    void setExtended(String value);
+  }
+
+  @Test
+  public void testSubClassStoresSuperInterfaceValues() throws Exception {
+    ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
+    SubClass extended = handler.as(SubClass.class);
+
+    extended.setString("parentValue");
+    assertEquals("parentValue", extended.getString());
+  }
+
+  @Test
+  public void testUpCastRetainsSuperInterfaceValues() throws Exception {
+    ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
+    SubClass extended = handler.as(SubClass.class);
+
+    extended.setString("parentValue");
+    Simple simple = extended.as(Simple.class);
+    assertEquals("parentValue", simple.getString());
+  }
+
+  @Test
+  public void testUpCastRetainsSubClassValues() throws Exception {
+    ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
+    SubClass extended = handler.as(SubClass.class);
+
+    extended.setExtended("subClassValue");
+    SubClass extended2 = extended.as(Simple.class).as(SubClass.class);
+    assertEquals("subClassValue", extended2.getExtended());
+  }
+
+  /** A test interface which is a sibling to {@link SubClass}. */
+  public static interface Sibling extends Simple {
+    String getSibling();
+    void setSibling(String value);
+  }
+
+  @Test
+  public void testAsSiblingRetainsSuperInterfaceValues() throws Exception {
+    ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
+    SubClass extended = handler.as(SubClass.class);
+
+    extended.setString("parentValue");
+    Sibling sibling = extended.as(Sibling.class);
+    assertEquals("parentValue", sibling.getString());
+  }
+
+  /** A test interface which has the same methods as the parent. */
+  public static interface MethodConflict extends Simple {
+    @Override
+    String getString();
+    @Override
+    void setString(String value);
+  }
+
+  @Test
+  public void testMethodConflictProvidesSameValue() throws Exception {
+    ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
+    MethodConflict methodConflict = handler.as(MethodConflict.class);
+    
+    methodConflict.setString("conflictValue");
+    assertEquals("conflictValue", methodConflict.getString());
+    assertEquals("conflictValue", methodConflict.as(Simple.class).getString());
+  }
+
+  /** A test interface which has the same methods as its parent and grandparent. */
+  public static interface DeepMethodConflict extends MethodConflict {
+    @Override
+    String getString();
+    @Override
+    void setString(String value);
+    @Override
+    int getPrimitive();
+    @Override
+    void setPrimitive(int value);
+  }
+
+  @Test
+  public void testDeepMethodConflictProvidesSameValue() throws Exception {
+    ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
+    DeepMethodConflict deepMethodConflict = handler.as(DeepMethodConflict.class);
+
+    // Tests overriding an already overridden method
+    deepMethodConflict.setString("conflictValue");
+    assertEquals("conflictValue", deepMethodConflict.getString());
+    assertEquals("conflictValue", deepMethodConflict.as(MethodConflict.class).getString());
+    assertEquals("conflictValue", deepMethodConflict.as(Simple.class).getString());
+
+    // Tests overriding a method from an ancestor class
+    deepMethodConflict.setPrimitive(5);
+    assertEquals(5, deepMethodConflict.getPrimitive());
+    assertEquals(5, deepMethodConflict.as(MethodConflict.class).getPrimitive());
+    assertEquals(5, deepMethodConflict.as(Simple.class).getPrimitive());
+  }
+
+  /** A test interface which shares the same methods as {@link Sibling}. */
+  public static interface SimpleSibling extends PipelineOptions {
+    String getString();
+    void setString(String value);
+  }
+
+  @Test
+  public void testDisjointSiblingsShareValues() throws Exception {
+    ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
+    SimpleSibling proxy = handler.as(SimpleSibling.class);
+    proxy.setString("siblingValue");
+    assertEquals("siblingValue", proxy.getString());
+    assertEquals("siblingValue", proxy.as(Simple.class).getString());
+  }
+
+  /** A test interface which joins two sibling interfaces which have conflicting methods. */
+  public static interface SiblingMethodConflict extends Simple, SimpleSibling {
+  }
+
+  @Test
+  public void testSiblingMethodConflict() throws Exception {
+    ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
+    SiblingMethodConflict siblingMethodConflict = handler.as(SiblingMethodConflict.class);
+    siblingMethodConflict.setString("siblingValue");
+    assertEquals("siblingValue", siblingMethodConflict.getString());
+    assertEquals("siblingValue", siblingMethodConflict.as(Simple.class).getString());
+    assertEquals("siblingValue", siblingMethodConflict.as(SimpleSibling.class).getString());
+  }
+
+  /** A test interface which has only the getter and only a setter overriden. */
+  public static interface PartialMethodConflict extends Simple {
+    @Override
+    String getString();
+    @Override
+    void setPrimitive(int value);
+  }
+  
+  @Test
+  public void testPartialMethodConflictProvidesSameValue() throws Exception {
+    ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
+    PartialMethodConflict partialMethodConflict = handler.as(PartialMethodConflict.class);
+
+    // Tests overriding a getter property which is only partially bound
+    partialMethodConflict.setString("conflictValue");
+    assertEquals("conflictValue", partialMethodConflict.getString());
+    assertEquals("conflictValue", partialMethodConflict.as(Simple.class).getString());
+
+    // Tests overriding a setter property which is only partially bound
+    partialMethodConflict.setPrimitive(5);
+    assertEquals(5, partialMethodConflict.getPrimitive());
+    assertEquals(5, partialMethodConflict.as(Simple.class).getPrimitive());
+  }
+
+  @Test
+  public void testJsonConversionForDefault() throws Exception {
+    PipelineOptions options = PipelineOptionsFactory.create();
+    assertNotNull(serializeDeserialize(PipelineOptions.class, options));
+  }
+
+  /** Test interface for JSON conversion of simple types */
+  private static interface SimpleTypes extends PipelineOptions {
+    int getInteger();
+    void setInteger(int value);
+    String getString();
+    void setString(String value);
+  }
+
+  @Test
+  public void testJsonConversionForSimpleTypes() throws Exception {
+    SimpleTypes options = PipelineOptionsFactory.as(SimpleTypes.class);
+    options.setString("TestValue");
+    options.setInteger(5);
+    SimpleTypes options2 = serializeDeserialize(SimpleTypes.class, options);
+    assertEquals(5, options2.getInteger());
+    assertEquals("TestValue", options2.getString());
+  }
+  
+  @Test
+  public void testJsonConversionOfAJsonConvertedType() throws Exception {
+    SimpleTypes options = PipelineOptionsFactory.as(SimpleTypes.class);
+    options.setString("TestValue");
+    options.setInteger(5);
+    SimpleTypes options2 = serializeDeserialize(SimpleTypes.class, 
+        serializeDeserialize(SimpleTypes.class, options));
+    assertEquals(5, options2.getInteger());
+    assertEquals("TestValue", options2.getString());
+  }
+
+  @Test
+  public void testJsonConversionForPartiallySerializedValues() throws Exception {
+    SimpleTypes options = PipelineOptionsFactory.as(SimpleTypes.class);
+    options.setInteger(5);
+    SimpleTypes options2 = serializeDeserialize(SimpleTypes.class, options);
+    options2.setString("TestValue");
+    SimpleTypes options3 = serializeDeserialize(SimpleTypes.class, options2);
+    assertEquals(5, options3.getInteger());
+    assertEquals("TestValue", options3.getString());
+  }
+
+  @Test
+  public void testJsonConversionForOverriddenSerializedValues() throws Exception {
+    SimpleTypes options = PipelineOptionsFactory.as(SimpleTypes.class);
+    options.setInteger(-5);
+    options.setString("NeedsToBeOverridden");
+    SimpleTypes options2 = serializeDeserialize(SimpleTypes.class, options);
+    options2.setInteger(5);
+    options2.setString("TestValue");
+    SimpleTypes options3 = serializeDeserialize(SimpleTypes.class, options2);
+    assertEquals(5, options3.getInteger());
+    assertEquals("TestValue", options3.getString());
+  }
+
+  /** Test interface for JSON conversion of container types */
+  private static interface ContainerTypes extends PipelineOptions {
+    List<String> getList();
+    void setList(List<String> values);
+    Map<String, String> getMap();
+    void setMap(Map<String, String> values);
+    Set<String> getSet();
+    void setSet(Set<String> values);
+  }
+
+  @Test
+  public void testJsonConversionForContainerTypes() throws Exception {
+    List<String> list = ImmutableList.of("a", "b", "c");
+    Map<String, String> map = ImmutableMap.of("d", "x", "e", "y", "f", "z");
+    Set<String> set = ImmutableSet.of("g", "h", "i");
+    ContainerTypes options = PipelineOptionsFactory.as(ContainerTypes.class);
+    options.setList(list);
+    options.setMap(map);
+    options.setSet(set);
+    ContainerTypes options2 = serializeDeserialize(ContainerTypes.class, options);
+    assertEquals(list, options2.getList());
+    assertEquals(map, options2.getMap());
+    assertEquals(set, options2.getSet());
+  }
+
+  /** Test interface for conversion of inner types */
+  private static class InnerType {
+    public double doubleField;
+
+    static InnerType of(double value) {
+      InnerType rval = new InnerType();
+      rval.doubleField = value;
+      return rval;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      return obj != null
+          && getClass().equals(obj.getClass()) 
+          && Objects.equals(doubleField, ((InnerType) obj).doubleField);
+    }
+  }
+
+  /** Test interface for conversion of generics and inner types */
+  private static class ComplexType {
+    public String stringField;
+    public Integer intField;
+    public List<InnerType> genericType;
+    public InnerType innerType;
+
+    @Override
+    public boolean equals(Object obj) {
+      return obj != null
+          && getClass().equals(obj.getClass()) 
+          && Objects.equals(stringField, ((ComplexType) obj).stringField)
+          && Objects.equals(intField, ((ComplexType) obj).intField)
+          && Objects.equals(genericType, ((ComplexType) obj).genericType)
+          && Objects.equals(innerType, ((ComplexType) obj).innerType);
+    }
+  }
+
+  private static interface ComplexTypes extends PipelineOptions {
+    ComplexType getComplexType();
+    void setComplexType(ComplexType value);
+  }
+
+  @Test
+  public void testJsonConversionForComplexType() throws Exception {
+    ComplexType complexType = new ComplexType();
+    complexType.stringField = "stringField";
+    complexType.intField = 12;
+    complexType.innerType = InnerType.of(12);
+    complexType.genericType = ImmutableList.of(InnerType.of(16234), InnerType.of(24));
+
+    ComplexTypes options = PipelineOptionsFactory.as(ComplexTypes.class);
+    options.setComplexType(complexType);
+    ComplexTypes options2 = serializeDeserialize(ComplexTypes.class, options);
+    assertEquals(complexType, options2.getComplexType());
+  }
+
+  /** Test interface for testing ignored properties during serialization. */
+  private static interface IgnoredProperty extends PipelineOptions {
+    @JsonIgnore
+    String getValue();
+    void setValue(String value);
+  }
+
+  @Test
+  public void testJsonConversionOfIgnoredProperty() throws Exception {
+    IgnoredProperty options = PipelineOptionsFactory.as(IgnoredProperty.class);
+    options.setValue("TestValue");
+    
+    IgnoredProperty options2 = serializeDeserialize(IgnoredProperty.class, options);
+    assertNull(options2.getValue());
+  }
+
+  /** Test class which is not serializable by Jackson. */
+  public static class NotSerializable {
+    private String value;
+    public NotSerializable(String value) {
+      this.value = value;
+    }
+
+    public String getValue() {
+      return value;
+    }
+  }
+
+  /** Test interface containing a class which is not serializable by Jackson. */
+  private static interface NotSerializableProperty extends PipelineOptions {
+    NotSerializable getValue();
+    void setValue(NotSerializable value);
+  }
+
+  @Test(expected = JsonMappingException.class)
+  public void testJsonConversionOfNotSerializableProperty() throws Exception {
+    NotSerializableProperty options = PipelineOptionsFactory.as(NotSerializableProperty.class);
+    options.setValue(new NotSerializable("TestString"));
+
+    serializeDeserialize(NotSerializableProperty.class, options);
+  }
+
+  /**
+   * Test interface which has {@link JsonIgnore @JsonIgnore} on a property that Jackson
+   * can't serialize.
+   */
+  private static interface IgnoredNotSerializableProperty extends PipelineOptions {
+    @JsonIgnore
+    NotSerializable getValue();
+    void setValue(NotSerializable value);
+  }
+
+  @Test
+  public void testJsonConversionOfIgnoredNotSerializableProperty() throws Exception {
+    IgnoredNotSerializableProperty options =
+        PipelineOptionsFactory.as(IgnoredNotSerializableProperty.class);
+    options.setValue(new NotSerializable("TestString"));
+    
+    IgnoredNotSerializableProperty options2 =
+        serializeDeserialize(IgnoredNotSerializableProperty.class, options);
+    assertNull(options2.getValue());
+  }
+
+  /** Test class which is only serializable by Jackson with the added metadata. */
+  public static class SerializableWithMetadata {
+    private String value;
+    public SerializableWithMetadata(@JsonProperty("value") String value) {
+      this.value = value;
+    }
+
+    @JsonProperty("value")
+    public String getValue() {
+      return value;
+    }
+  }
+
+  /**
+   * Test interface containing a property which is only serializable by Jackson with
+   * the additional metadata.
+   */
+  private static interface SerializableWithMetadataProperty extends PipelineOptions {
+    SerializableWithMetadata getValue();
+    void setValue(SerializableWithMetadata value);
+  }
+
+  @Test
+  public void testJsonConversionOfSerializableWithMetadataProperty() throws Exception {
+    SerializableWithMetadataProperty options =
+        PipelineOptionsFactory.as(SerializableWithMetadataProperty.class);
+    options.setValue(new SerializableWithMetadata("TestString"));
+
+    SerializableWithMetadataProperty options2 =
+        serializeDeserialize(SerializableWithMetadataProperty.class, options);
+    assertEquals("TestString", options2.getValue().getValue());
+  }
+
+  private <T extends PipelineOptions> T serializeDeserialize(Class<T> kls, PipelineOptions options)
+      throws Exception {
+    ObjectMapper mapper = new ObjectMapper();
+    String value = mapper.writeValueAsString(options);
+    return mapper.readValue(value, PipelineOptions.class).as(kls);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
new file mode 100644
index 0000000000000..398326e8a385e
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.anyLong;
+import static org.mockito.Matchers.isA;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
+import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
+import com.google.cloud.dataflow.sdk.util.MonitoringUtil.JobState;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+import java.io.IOException;
+import java.util.Date;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Tests for BlockingDataflowPipelineRunner.
+ */
+@RunWith(JUnit4.class)
+public class BlockingDataflowPipelineRunnerTest {
+  @Rule public ExpectedLogs expectedLogs = ExpectedLogs.none(BlockingDataflowPipelineRunner.class);
+
+  // This class mocks a call to DataflowPipelineJob.waitToFinish():
+  //    it blocks the thread to simulate waiting,
+  //    and releases the blocking once signaled
+  static class MockWaitToFinish implements Answer {
+    NotificationHelper jobCompleted = new NotificationHelper();
+
+    public Object answer(InvocationOnMock invocation) throws InterruptedException {
+      System.out.println("MockWaitToFinish.answer(): Wait for signaling job completion.");
+      assertTrue("Test did not receive mock job completion signal",
+          jobCompleted.waitTillSet(10000));
+
+      System.out.println("MockWaitToFinish.answer(): job completed.");
+      return JobState.DONE;
+    }
+
+    public void signalJobComplete() {
+      jobCompleted.set();
+    }
+  }
+
+  // Mini helper class for wait-notify
+  static class NotificationHelper {
+    private boolean isSet = false;
+
+    public synchronized void set() {
+      isSet = true;
+      notifyAll();
+    }
+
+    public synchronized boolean check() {
+      return isSet;
+    }
+
+    public synchronized boolean waitTillSet(long timeout) throws InterruptedException {
+      long remainingTimeout = timeout;
+      long startTime = new Date().getTime();
+      while (!isSet && remainingTimeout > 0) {
+        wait(remainingTimeout);
+        remainingTimeout = timeout - (new Date().getTime() - startTime);
+      }
+
+      return isSet;
+    }
+  }
+
+  @Test
+  public void testJobWaitComplete() throws IOException, InterruptedException {
+    expectedLogs.expectInfo("Job finished with status DONE");
+
+    DataflowPipelineRunner mockDataflowPipelineRunner = mock(DataflowPipelineRunner.class);
+    DataflowPipelineJob mockJob = mock(DataflowPipelineJob.class);
+    MockWaitToFinish mockWait = new MockWaitToFinish();
+
+    when(mockJob.waitToFinish(
+        anyLong(), isA(TimeUnit.class), isA(MonitoringUtil.JobMessagesHandler.class)))
+        .thenAnswer(mockWait);
+    when(mockDataflowPipelineRunner.run(isA(Pipeline.class))).thenReturn(mockJob);
+
+    // Construct a BlockingDataflowPipelineRunner with mockDataflowPipelineRunner inside
+    final BlockingDataflowPipelineRunner blockingRunner =
+        new BlockingDataflowPipelineRunner(
+            mockDataflowPipelineRunner,
+            new MonitoringUtil.PrintHandler(System.out));
+
+    final NotificationHelper executionStarted = new NotificationHelper();
+    final NotificationHelper jobCompleted = new NotificationHelper();
+
+    new Thread() {
+      public void run() {
+        executionStarted.set();
+
+        // Run on an empty test pipeline.
+        blockingRunner.run(DirectPipeline.createForTest());
+
+        // Test following code is not reached till mock job completion signal.
+        jobCompleted.set();
+      }
+    }.start();
+
+    assertTrue("'executionStarted' event not set till timeout.",
+        executionStarted.waitTillSet(2000));
+    assertFalse("Code after job completion should not be reached before mock signal.",
+        jobCompleted.check());
+
+    mockWait.signalJobComplete();
+    assertTrue("run() should return after job completion is mocked.",
+        jobCompleted.waitTillSet(2000));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java
new file mode 100644
index 0000000000000..30697deecc0fe
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.google.api.services.dataflow.Dataflow;
+import com.google.api.services.dataflow.model.Job;
+import com.google.cloud.dataflow.sdk.util.MonitoringUtil.JobState;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Tests for DataflowPipelineJob.
+ */
+@RunWith(JUnit4.class)
+public class DataflowPipelineJobTest {
+  private static final String PROJECT_ID = "someProject";
+  private static final String JOB_ID = "1234";
+
+  @Test
+  public void testWaitToFinish() throws IOException, InterruptedException {
+    Dataflow mockWorkflowClient = mock(Dataflow.class);
+    Dataflow.V1b3 mockV1b3 = mock(Dataflow.V1b3.class);
+    Dataflow.V1b3.Projects mockProjects = mock(Dataflow.V1b3.Projects.class);
+    Dataflow.V1b3.Projects.Jobs mockJobs = mock(Dataflow.V1b3.Projects.Jobs.class);
+    Dataflow.V1b3.Projects.Jobs.Get statusRequest = mock(Dataflow.V1b3.Projects.Jobs.Get.class);
+
+    Job statusResponse = new Job();
+    statusResponse.setCurrentState(JobState.DONE.getStateName());
+
+    when(mockWorkflowClient.v1b3()).thenReturn(mockV1b3);
+    when(mockV1b3.projects()).thenReturn(mockProjects);
+    when(mockProjects.jobs()).thenReturn(mockJobs);
+    when(mockJobs.get(eq(PROJECT_ID), eq(JOB_ID)))
+        .thenReturn(statusRequest);
+    when(statusRequest.execute()).thenReturn(statusResponse);
+
+    DataflowPipelineJob job = new DataflowPipelineJob(
+        PROJECT_ID, JOB_ID, mockWorkflowClient);
+
+    JobState state = job.waitToFinish(1, TimeUnit.MINUTES, null);
+    assertEquals(JobState.DONE, state);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
new file mode 100644
index 0000000000000..7995445c9869b
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -0,0 +1,501 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import static org.hamcrest.Matchers.containsString;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.google.api.services.dataflow.Dataflow;
+import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.api.services.dataflow.model.Job;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.util.DataflowReleaseInfo;
+import com.google.cloud.dataflow.sdk.util.GcsUtil;
+import com.google.cloud.dataflow.sdk.util.PackageUtil;
+import com.google.cloud.dataflow.sdk.util.TestCredential;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.collect.ImmutableList;
+
+import org.hamcrest.Matchers;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mockito;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.nio.channels.FileChannel;
+import java.nio.file.Files;
+import java.nio.file.StandardOpenOption;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Tests for DataflowPipelineRunner.
+ */
+@RunWith(JUnit4.class)
+public class DataflowPipelineRunnerTest {
+
+  @Rule public ExpectedException thrown = ExpectedException.none();
+
+  // Asserts that the given Job has all expected fields set.
+  private static void assertValidJob(Job job) {
+    assertNull(job.getId());
+    assertNull(job.getCurrentState());
+  }
+
+  private DataflowPipeline buildDataflowPipeline(DataflowPipelineOptions options) {
+    DataflowPipeline p = DataflowPipeline.create(options);
+
+    p.apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/object"))
+        .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/object"));
+
+    return p;
+  }
+
+  private static Dataflow buildMockDataflow(
+      final ArgumentCaptor<Job> jobCaptor) throws IOException {
+    Dataflow mockDataflowClient = mock(Dataflow.class);
+    Dataflow.V1b3 mockV1b3 = mock(Dataflow.V1b3.class);
+    Dataflow.V1b3.Projects mockProjects = mock(Dataflow.V1b3.Projects.class);
+    Dataflow.V1b3.Projects.Jobs mockJobs = mock(Dataflow.V1b3.Projects.Jobs.class);
+    Dataflow.V1b3.Projects.Jobs.Create mockRequest =
+        mock(Dataflow.V1b3.Projects.Jobs.Create.class);
+
+    when(mockDataflowClient.v1b3()).thenReturn(mockV1b3);
+    when(mockV1b3.projects()).thenReturn(mockProjects);
+    when(mockProjects.jobs()).thenReturn(mockJobs);
+    when(mockJobs.create(eq("someProject"), jobCaptor.capture()))
+        .thenReturn(mockRequest);
+
+    Job resultJob = new Job();
+    resultJob.setId("newid");
+    when(mockRequest.execute()).thenReturn(resultJob);
+    return mockDataflowClient;
+  }
+
+  private GcsUtil buildMockGcsUtil() throws IOException {
+    GcsUtil mockGcsUtil = mock(GcsUtil.class);
+    when(mockGcsUtil.create(
+        any(GcsPath.class), anyString()))
+        .thenReturn(FileChannel.open(
+            Files.createTempFile("channel-", ".tmp"),
+            StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE));
+    return mockGcsUtil;
+  }
+
+  private DataflowPipelineOptions buildPipelineOptions(
+      ArgumentCaptor<Job> jobCaptor) throws IOException {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setProject("someProject");
+    options.setTempLocation(DataflowPipelineRunner.verifyGcsPath(
+        GcsPath.fromComponents("somebucket", "some/path")).toString());
+    // Set FILES_PROPERTY to empty to prevent a default value calculated from classpath.
+    options.setFilesToStage(new LinkedList<String>());
+    options.setDataflowClient(buildMockDataflow(jobCaptor));
+    options.setGcsUtil(buildMockGcsUtil());
+    options.setGcpCredential(new TestCredential());
+    return options;
+  }
+
+  @Test
+  public void testRun() throws IOException {
+    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+    DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+    DataflowPipeline p = buildDataflowPipeline(options);
+    DataflowPipelineJob job = p.run();
+    assertEquals("newid", job.getJobId());
+    assertValidJob(jobCaptor.getValue());
+  }
+
+  @Test
+  public void testRunWithFiles() throws IOException {
+    // Test that the function DataflowPipelineRunner.stageFiles works as
+    // expected.
+    GcsUtil mockGcsUtil = buildMockGcsUtil();
+    final GcsPath gcsStaging =
+        GcsPath.fromComponents("somebucket", "some/path");
+    final GcsPath gcsTemp =
+        GcsPath.fromComponents("somebucket", "some/temp/path");
+    final String cloudDataflowDataset = "somedataset";
+
+    // Create some temporary files.
+    File temp1 = File.createTempFile("DataflowPipelineRunnerTest", "txt");
+    temp1.deleteOnExit();
+    File temp2 = File.createTempFile("DataflowPipelineRunnerTest2", "txt");
+    temp2.deleteOnExit();
+
+    DataflowPackage expectedPackage1 = PackageUtil.createPackage(
+        temp1.getAbsolutePath(), gcsStaging, null);
+
+    String overridePackageName = "alias.txt";
+    DataflowPackage expectedPackage2 = PackageUtil.createPackage(
+        temp2.getAbsolutePath(), gcsStaging, overridePackageName);
+
+    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setFilesToStage(ImmutableList.of(
+        temp1.getAbsolutePath(),
+        overridePackageName + "=" + temp2.getAbsolutePath()));
+    options.setStagingLocation(gcsStaging.toString());
+    options.setTempLocation(gcsTemp.toString());
+    options.setTempDatasetId(cloudDataflowDataset);
+    options.setProject("someProject");
+    options.setJobName("job");
+    options.setDataflowClient(buildMockDataflow(jobCaptor));
+    options.setGcsUtil(mockGcsUtil);
+    options.setGcpCredential(new TestCredential());
+
+    DataflowPipeline p = buildDataflowPipeline(options);
+
+    DataflowPipelineJob job = p.run();
+    assertEquals("newid", job.getJobId());
+
+    Job workflowJob = jobCaptor.getValue();
+    assertValidJob(workflowJob);
+
+    assertEquals(
+        2,
+        workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().size());
+    DataflowPackage workflowPackage1 =
+        workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().get(0);
+    assertEquals(expectedPackage1.getName(), workflowPackage1.getName());
+    assertEquals(expectedPackage1.getLocation(), workflowPackage1.getLocation());
+    DataflowPackage workflowPackage2 =
+        workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().get(1);
+    assertEquals(expectedPackage2.getName(), workflowPackage2.getName());
+    assertEquals(expectedPackage2.getLocation(), workflowPackage2.getLocation());
+
+    assertEquals(
+        gcsTemp.toResourceName(),
+        workflowJob.getEnvironment().getTempStoragePrefix());
+    assertEquals(
+        cloudDataflowDataset,
+        workflowJob.getEnvironment().getDataset());
+    assertEquals(
+        DataflowReleaseInfo.getReleaseInfo().getName(),
+        workflowJob.getEnvironment().getUserAgent().get("name"));
+    assertEquals(
+        DataflowReleaseInfo.getReleaseInfo().getVersion(),
+        workflowJob.getEnvironment().getUserAgent().get("version"));
+  }
+
+  @Test
+  public void runWithDefaultFilesToStage() throws Exception {
+    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+    DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+    options.setFilesToStage(null);
+    DataflowPipelineRunner.fromOptions(options);
+    assertTrue(!options.getFilesToStage().isEmpty());
+  }
+
+  @Test
+  public void detectClassPathResourceWithFileResources() throws Exception {
+    String path = "/tmp/file";
+    String path2 = "/tmp/file2";
+    URLClassLoader classLoader = new URLClassLoader(new URL[]{
+        new URL("file://" + path),
+        new URL("file://" + path2)
+    });
+
+    assertEquals(ImmutableList.of(path, path2),
+        DataflowPipelineRunner.detectClassPathResourcesToStage(classLoader));
+  }
+
+  @Test
+  public void detectClassPathResourcesWithUnsupportedClassLoader() {
+    ClassLoader mockClassLoader = Mockito.mock(ClassLoader.class);
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Unable to use ClassLoader to detect classpath elements.");
+
+    DataflowPipelineRunner.detectClassPathResourcesToStage(mockClassLoader);
+  }
+
+  @Test
+  public void detectClassPathResourceWithNonFileResources() throws Exception {
+    String url = "http://www.google.com/all-the-secrets.jar";
+    URLClassLoader classLoader = new URLClassLoader(new URL[]{
+        new URL(url)
+    });
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Unable to convert url (" + url + ") to file.");
+
+    DataflowPipelineRunner.detectClassPathResourcesToStage(classLoader);
+  }
+
+  @Test
+  public void testGcsStagingLocationInitialization() {
+    // Test that the staging location is initialized correctly.
+    GcsPath gcsTemp = GcsPath.fromComponents("somebucket",
+        "some/temp/path");
+
+    // Set temp location (required), and check that staging location is set.
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setTempLocation(gcsTemp.toString());
+    options.setProject("testProject");
+    options.setGcpCredential(new TestCredential());
+    DataflowPipelineRunner.fromOptions(options);
+
+    assertNotNull(options.getStagingLocation());
+  }
+
+  @Test
+  public void testGcsRequiredTempLocation() {
+    // Error raised if temp location not set.
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setProject("someProject");
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage(containsString("tempLocation"));
+    DataflowPipelineRunner.fromOptions(options);
+  }
+
+  @Test
+  public void testNonGcsFilePathInReadFailure() throws IOException {
+    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+    Pipeline p = buildDataflowPipeline(buildPipelineOptions(jobCaptor));
+    p.apply(TextIO.Read.named("ReadMyNonGcsFile").from("/tmp/file"));
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage(containsString("GCS URI"));
+    p.run();
+    assertValidJob(jobCaptor.getValue());
+  }
+
+  @Test
+  public void testNonGcsFilePathInWriteFailure() throws IOException {
+    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+    Pipeline p = buildDataflowPipeline(buildPipelineOptions(jobCaptor));
+    p.apply(TextIO.Read.named("ReadMyGcsFile").from("gs://bucket/object"))
+        .apply(TextIO.Write.named("WriteMyNonGcsFile").to("/tmp/file"));
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage(containsString("GCS URI"));
+    p.run();
+    assertValidJob(jobCaptor.getValue());
+  }
+
+  @Test
+  public void testMultiSlashGcsFileReadPath() throws IOException {
+    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+    Pipeline p = buildDataflowPipeline(buildPipelineOptions(jobCaptor));
+    p.apply(TextIO.Read.named("ReadInvalidGcsFile")
+        .from("gs://bucket/tmp//file"));
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("consecutive slashes");
+    p.run();
+    assertValidJob(jobCaptor.getValue());
+  }
+
+  @Test
+  public void testMultiSlashGcsFileWritePath() throws IOException {
+    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+    Pipeline p = buildDataflowPipeline(buildPipelineOptions(jobCaptor));
+    p.apply(TextIO.Read.named("ReadMyGcsFile").from("gs://bucket/object"))
+        .apply(TextIO.Write.named("WriteInvalidGcsFile")
+            .to("gs://bucket/tmp//file"));
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("consecutive slashes");
+    p.run();
+    assertValidJob(jobCaptor.getValue());
+  }
+
+  @Test
+  public void testInvalidTempLocation() throws IOException {
+    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+    DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+    options.setTempLocation("file://temp/location");
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage(containsString("GCS URI"));
+    DataflowPipelineRunner.fromOptions(options);
+    assertValidJob(jobCaptor.getValue());
+  }
+
+  @Test
+  public void testInvalidStagingLocation() throws IOException {
+    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+    DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+    options.setStagingLocation("file://my/staging/location");
+    try {
+      DataflowPipelineRunner.fromOptions(options);
+    } catch (IllegalArgumentException e) {
+      assertThat(e.getMessage(), containsString("GCS URI"));
+    }
+    options.setStagingLocation("my/staging/location");
+    try {
+      DataflowPipelineRunner.fromOptions(options);
+    } catch (IllegalArgumentException e) {
+      assertThat(e.getMessage(), containsString("GCS URI"));
+    }
+  }
+
+  @Test
+  public void testInvalidJobName() throws IOException {
+    List<String> invalidNames = Arrays.asList(
+        "invalid_name",
+        "0invalid",
+        "invalid-",
+        "this-one-is-too-long-01234567890123456789");
+    List<String> expectedReason = Arrays.asList(
+        "JobName invalid",
+        "JobName invalid",
+        "JobName invalid",
+        "JobName too long");
+
+    for (int i = 0; i < invalidNames.size(); ++i) {
+      ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+      DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+      options.setJobName(invalidNames.get(i));
+
+      try {
+        DataflowPipelineRunner.fromOptions(options);
+        fail("Expected IllegalArgumentException for jobName "
+            + options.getJobName());
+      } catch (IllegalArgumentException e) {
+        assertThat(e.getMessage(),
+            containsString(expectedReason.get(i)));
+      }
+    }
+  }
+
+  @Test
+  public void testValidJobName() throws IOException {
+    List<String> names = Arrays.asList("ok", "Ok", "A-Ok", "ok-123");
+
+    for (String name : names) {
+      ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+      DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+      options.setJobName(name);
+
+      DataflowPipelineRunner runner = DataflowPipelineRunner
+          .fromOptions(options);
+      assertNotNull(runner);
+    }
+  }
+
+  /**
+   * A fake PTransform for testing.
+   */
+  public static class TestTransform
+      extends PTransform<PCollection<Integer>, PCollection<Integer>> {
+    public boolean translated = false;
+
+    @Override
+    public PCollection<Integer> apply(PCollection<Integer> input) {
+      return PCollection.<Integer>createPrimitiveOutputInternal(new GlobalWindow());
+    }
+
+    @Override
+    protected Coder<?> getDefaultOutputCoder() {
+      return getInput().getCoder();
+    }
+  }
+
+  @Test
+  public void testTransformTranslatorMissing() throws IOException {
+    // Test that we throw if we don't provide a translation.
+    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+    DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+    Pipeline p = DataflowPipeline.create(options);
+
+    p.apply(Create.of(Arrays.asList(1, 2, 3)))
+     .apply(new TestTransform());
+
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage(Matchers.containsString("no translator registered"));
+    DataflowPipelineTranslator.fromOptions(options)
+        .translate(p, Collections.<DataflowPackage>emptyList());
+    assertValidJob(jobCaptor.getValue());
+  }
+
+  @Test
+  public void testTransformTranslator() throws IOException {
+    // Test that we can provide a custom translation
+    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+    DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+    DataflowPipeline p = DataflowPipeline.create(options);
+    TestTransform transform = new TestTransform();
+
+    p.apply(Create.of(Arrays.asList(1, 2, 3)))
+        .apply(transform)
+        .setCoder(BigEndianIntegerCoder.of());
+
+    DataflowPipelineTranslator translator = DataflowPipelineRunner
+        .fromOptions(options).getTranslator();
+
+    translator.registerTransformTranslator(
+        TestTransform.class,
+        new DataflowPipelineTranslator.TransformTranslator<TestTransform>() {
+          @SuppressWarnings("unchecked")
+          @Override
+          public void translate(
+              TestTransform transform,
+              DataflowPipelineTranslator.TranslationContext context) {
+            transform.translated = true;
+
+            // Note: This is about the minimum needed to fake out a
+            // translation. This obviously isn't a real translation.
+            context.addStep(transform, "TestTranslate");
+            context.addOutput("output", transform.getOutput());
+          }
+        });
+
+    translator.translate(p, Collections.<DataflowPackage>emptyList());
+    assertTrue(transform.translated);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
new file mode 100644
index 0000000000000..e2edb9fdc2232
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -0,0 +1,582 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.addObject;
+import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+import static org.mockito.Matchers.argThat;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.google.api.services.dataflow.Dataflow;
+import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.api.services.dataflow.model.Job;
+import com.google.api.services.dataflow.model.Step;
+import com.google.api.services.dataflow.model.WorkerPool;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.util.OutputReference;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.TestCredential;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.PDone;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.collect.Iterables;
+
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.ArgumentMatcher;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Tests for DataflowPipelineTranslator.
+ */
+@RunWith(JUnit4.class)
+public class DataflowPipelineTranslatorTest {
+
+  @Rule public ExpectedException thrown = ExpectedException.none();
+
+  // A Custom Mockito matcher for an initial Job which checks that all
+  // expected fields are set.
+  private static class IsValidCreateRequest extends ArgumentMatcher<Job> {
+    public boolean matches(Object o) {
+      Job job = (Job) o;
+      return job.getId() == null
+          && job.getProjectId() == null
+          && job.getName() != null
+          && job.getType() != null
+          && job.getEnvironment() != null
+          && job.getSteps() != null
+          && job.getCurrentState() == null
+          && job.getCurrentStateTime() == null
+          && job.getExecutionInfo() == null
+          && job.getCreateTime() == null;
+    }
+  }
+
+  private DataflowPipeline buildPipeline(DataflowPipelineOptions options)
+      throws IOException {
+    DataflowPipeline p = DataflowPipeline.create(options);
+
+    p.apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/object"))
+        .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/object"));
+
+    return p;
+  }
+
+  private static Dataflow buildMockDataflow(
+      ArgumentMatcher<Job> jobMatcher) throws IOException {
+    Dataflow mockDataflowClient = mock(Dataflow.class);
+    Dataflow.V1b3 mockV1b3 = mock(Dataflow.V1b3.class);
+    Dataflow.V1b3.Projects mockProjects = mock(Dataflow.V1b3.Projects.class);
+    Dataflow.V1b3.Projects.Jobs mockJobs = mock(Dataflow.V1b3.Projects.Jobs.class);
+    Dataflow.V1b3.Projects.Jobs.Create mockRequest = mock(
+        Dataflow.V1b3.Projects.Jobs.Create.class);
+
+    when(mockDataflowClient.v1b3()).thenReturn(mockV1b3);
+    when(mockV1b3.projects()).thenReturn(mockProjects);
+    when(mockProjects.jobs()).thenReturn(mockJobs);
+    when(mockJobs.create(eq("someProject"), argThat(jobMatcher)))
+        .thenReturn(mockRequest);
+
+    Job resultJob = new Job();
+    resultJob.setId("newid");
+    when(mockRequest.execute()).thenReturn(resultJob);
+    return mockDataflowClient;
+  }
+
+  private static DataflowPipelineOptions buildPipelineOptions() throws IOException {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setGcpCredential(new TestCredential());
+    options.setProject("some-project");
+    options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString());
+    options.setFilesToStage(new LinkedList<String>());
+    options.setDataflowClient(buildMockDataflow(new IsValidCreateRequest()));
+    return options;
+  }
+
+  @Test
+  public void testZoneConfig() throws IOException {
+    final String testZone = "test-zone-1";
+
+    DataflowPipelineOptions options = buildPipelineOptions();
+    options.setZone(testZone);
+
+    Pipeline p = buildPipeline(options);
+    p.traverseTopologically(new RecordingPipelineVisitor());
+    Job job = DataflowPipelineTranslator.fromOptions(options).translate(
+        p, Collections.<DataflowPackage>emptyList());
+
+    assertEquals(2, job.getEnvironment().getWorkerPools().size());
+    assertEquals(testZone,
+        job.getEnvironment().getWorkerPools().get(0).getZone());
+    assertEquals(testZone,
+        job.getEnvironment().getWorkerPools().get(1).getZone());
+  }
+
+  @Test
+  public void testWorkerMachineTypeConfig() throws IOException {
+    final String testMachineType = "test-machine-type";
+
+    DataflowPipelineOptions options = buildPipelineOptions();
+    options.setWorkerMachineType(testMachineType);
+
+    Pipeline p = buildPipeline(options);
+    p.traverseTopologically(new RecordingPipelineVisitor());
+    Job job = DataflowPipelineTranslator.fromOptions(options).translate(
+        p, Collections.<DataflowPackage>emptyList());
+
+    assertEquals(2, job.getEnvironment().getWorkerPools().size());
+
+    WorkerPool workerPool = null;
+
+    if (job
+        .getEnvironment()
+        .getWorkerPools()
+        .get(0)
+        .getKind()
+        .equals(DataflowPipelineTranslator.HARNESS_WORKER_POOL)) {
+      workerPool = job.getEnvironment().getWorkerPools().get(0);
+    } else if (job
+        .getEnvironment()
+        .getWorkerPools()
+        .get(1)
+        .getKind()
+        .equals(DataflowPipelineTranslator.HARNESS_WORKER_POOL)) {
+      workerPool = job.getEnvironment().getWorkerPools().get(1);
+    } else {
+      fail("Missing worker pool.");
+    }
+    assertEquals(testMachineType, workerPool.getMachineType());
+  }
+
+  @Test
+  public void testDiskSizeGbConfig() throws IOException {
+    final Integer diskSizeGb = 1234;
+
+    DataflowPipelineOptions options = buildPipelineOptions();
+    options.setDiskSizeGb(diskSizeGb);
+
+    Pipeline p = buildPipeline(options);
+    p.traverseTopologically(new RecordingPipelineVisitor());
+    Job job = DataflowPipelineTranslator.fromOptions(options).translate(
+        p, Collections.<DataflowPackage>emptyList());
+
+    assertEquals(2, job.getEnvironment().getWorkerPools().size());
+    assertEquals(diskSizeGb,
+        job.getEnvironment().getWorkerPools().get(0).getDiskSizeGb());
+    assertEquals(diskSizeGb,
+        job.getEnvironment().getWorkerPools().get(1).getDiskSizeGb());
+  }
+
+  @Test
+  public void testShufflePoolConfig() throws IOException {
+    final Integer numWorkers = 10;
+    final String diskSource = "test-disk-source";
+    final Integer diskSizeGb = 12345;
+    final String zone = "test-zone-1";
+
+    DataflowPipelineOptions options = buildPipelineOptions();
+    options.setShuffleNumWorkers(numWorkers);
+    options.setShuffleDiskSourceImage(diskSource);
+    options.setShuffleDiskSizeGb(diskSizeGb);
+    options.setShuffleZone(zone);
+
+    Pipeline p = buildPipeline(options);
+    p.traverseTopologically(new RecordingPipelineVisitor());
+    Job job = DataflowPipelineTranslator.fromOptions(options).translate(
+        p, Collections.<DataflowPackage>emptyList());
+
+    assertEquals(2, job.getEnvironment().getWorkerPools().size());
+    WorkerPool shufflePool =
+        job.getEnvironment().getWorkerPools().get(1);
+    assertEquals(shufflePool.getKind(),
+        DataflowPipelineTranslator.SHUFFLE_WORKER_POOL);
+    assertEquals(numWorkers, shufflePool.getNumWorkers());
+    assertEquals(diskSource, shufflePool.getDiskSourceImage());
+    assertEquals(diskSizeGb, shufflePool.getDiskSizeGb());
+    assertEquals(zone, shufflePool.getZone());
+  }
+
+  @Test
+  public void testPredefinedAddStep() throws Exception {
+    DataflowPipelineOptions options = buildPipelineOptions();
+
+    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
+    DataflowPipelineTranslator.registerTransformTranslator(
+        EmbeddedTransform.class, new EmbeddedTranslator());
+
+    // Create a predefined step using another pipeline
+    Step predefinedStep = createPredefinedStep();
+
+    // Create a pipeline that the predefined step will be embedded into
+    DataflowPipeline pipeline = DataflowPipeline.create(options);
+    pipeline.apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/in"))
+        .apply(ParDo.of(new NoOpFn()))
+        .apply(new EmbeddedTransform(predefinedStep.clone()))
+        .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/out"));
+    Job job = translator.translate(pipeline, Collections.<DataflowPackage>emptyList());
+
+    List<Step> steps = job.getSteps();
+    assertEquals(4, steps.size());
+
+    // The input to the embedded step should match the output of the step before
+    Map<String, Object> step1Out = getOutputPortReference(steps.get(1));
+    Map<String, Object> step2In = getDictionary(
+        steps.get(2).getProperties(), PropertyNames.PARALLEL_INPUT);
+    assertEquals(step1Out, step2In);
+
+    // The output from the embedded step should match the input of the step after
+    Map<String, Object> step2Out = getOutputPortReference(steps.get(2));
+    Map<String, Object> step3In = getDictionary(
+        steps.get(3).getProperties(), PropertyNames.PARALLEL_INPUT);
+    assertEquals(step2Out, step3In);
+
+    // The step should not have been modified other than remapping the input
+    Step predefinedStepClone = predefinedStep.clone();
+    Step embeddedStepClone = steps.get(2).clone();
+    predefinedStepClone.getProperties().remove(PropertyNames.PARALLEL_INPUT);
+    embeddedStepClone.getProperties().remove(PropertyNames.PARALLEL_INPUT);
+    assertEquals(predefinedStepClone, embeddedStepClone);
+  }
+
+  /**
+   * Construct a OutputReference for the output of the step.
+   */
+  private static OutputReference getOutputPortReference(Step step) throws Exception {
+    // TODO: This should be done via a Structs accessor.
+    List<Map<String, Object>> output =
+        (List<Map<String, Object>>) step.getProperties().get(PropertyNames.OUTPUT_INFO);
+    String outputTagId = getString(Iterables.getOnlyElement(output), PropertyNames.OUTPUT_NAME);
+    return new OutputReference(step.getName(), outputTagId);
+  }
+
+  /**
+   * Returns a Step for a DoFn by creating and translating a pipeline.
+   */
+  private static Step createPredefinedStep() throws Exception {
+    DataflowPipelineOptions options = buildPipelineOptions();
+    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
+    DataflowPipeline pipeline = DataflowPipeline.create(options);
+    String stepName = "DoFn1";
+    pipeline.apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/in"))
+        .apply(ParDo.of(new NoOpFn()).named(stepName))
+        .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/out"));
+    Job job = translator.translate(pipeline, Collections.<DataflowPackage>emptyList());
+
+    assertEquals(3, job.getSteps().size());
+    Step step = job.getSteps().get(1);
+    assertEquals(stepName, getString(step.getProperties(), PropertyNames.USER_NAME));
+    return step;
+  }
+
+  private static class NoOpFn extends DoFn<String, String>{
+    @Override public void processElement(ProcessContext c) throws Exception {
+      c.output(c.element());
+    }
+  }
+
+  /**
+   * A placeholder transform that will be used to substitute a predefined Step.
+   */
+  private static class EmbeddedTransform
+      extends PTransform<PCollection<String>, PCollection<String>> {
+    private final Step step;
+
+    public EmbeddedTransform(Step step) {
+      this.step = step;
+    }
+
+    @Override
+    public PCollection<String> apply(PCollection<String> input) {
+      return PCollection.createPrimitiveOutputInternal(new GlobalWindow());
+    }
+
+    @Override
+    protected Coder<?> getDefaultOutputCoder() {
+      return StringUtf8Coder.of();
+    }
+  }
+
+  /**
+   * A TransformTranslator that adds the predefined Step using
+   * {@link TranslationContext#addStep} and remaps the input port reference.
+   */
+  private static class EmbeddedTranslator
+      implements DataflowPipelineTranslator.TransformTranslator<EmbeddedTransform> {
+    @Override public void translate(EmbeddedTransform transform, TranslationContext context) {
+      addObject(transform.step.getProperties(), PropertyNames.PARALLEL_INPUT,
+          context.asOutputReference(transform.getInput()));
+      context.addStep(transform, transform.step);
+    }
+  }
+
+  /**
+   * A composite transform which returns an output that is unrelated to
+   * the input.
+   */
+  private static class UnrelatedOutputCreator
+      extends PTransform<PCollection<Integer>, PCollection<Integer>> {
+
+    @Override
+    public PCollection<Integer> apply(PCollection<Integer> input) {
+      // Apply an operation so that this is a composite transform.
+      input.apply(Count.<Integer>perElement());
+
+      // Return a value unrelated to the input.
+      return input.getPipeline().apply(Create.of(1, 2, 3, 4));
+    }
+
+    @Override
+    protected Coder<?> getDefaultOutputCoder() {
+      return VarIntCoder.of();
+    }
+  }
+
+  /**
+   * A composite transform which returns an output which is unbound.
+   */
+  private static class UnboundOutputCreator
+      extends PTransform<PCollection<Integer>, PDone> {
+
+    @Override
+    public PDone apply(PCollection<Integer> input) {
+      // Apply an operation so that this is a composite transform.
+      input.apply(Count.<Integer>perElement());
+
+      return new PDone();
+    }
+
+    @Override
+    protected Coder<?> getDefaultOutputCoder() {
+      return VoidCoder.of();
+    }
+  }
+
+  /**
+   * A composite transform which returns a partially bound output.
+   *
+   * <p> This is not allowed and will result in a failure.
+   */
+  private static class PartiallyBoundOutputCreator
+      extends PTransform<PCollection<Integer>, PCollectionTuple> {
+
+    public final TupleTag<Integer> sumTag = new TupleTag<>("sum");
+    public final TupleTag<Void> doneTag = new TupleTag<>("done");
+
+    @Override
+    public PCollectionTuple apply(PCollection<Integer> input) {
+      PCollection<Integer> sum = input.apply(Sum.integersGlobally());
+
+      // Fails here when attempting to construct a tuple with an unbound object.
+      return PCollectionTuple.of(sumTag, sum)
+          .and(doneTag, PCollection.<Void>createPrimitiveOutputInternal(
+              new GlobalWindow()));
+    }
+  }
+
+  @Test
+  public void testMultiGraphPipelineSerialization() throws IOException {
+    Pipeline p = DataflowPipeline.create(buildPipelineOptions());
+
+    PCollection<Integer> input = p.begin()
+        .apply(Create.of(1, 2, 3));
+
+    input.apply(new UnrelatedOutputCreator());
+    input.apply(new UnboundOutputCreator());
+
+    DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(
+        PipelineOptionsFactory.as(DataflowPipelineOptions.class));
+
+    // Check that translation doesn't fail.
+    t.translate(p, Collections.<DataflowPackage>emptyList());
+  }
+
+  @Test
+  public void testPartiallyBoundFailure() throws IOException {
+    Pipeline p = DataflowPipeline.create(buildPipelineOptions());
+
+    PCollection<Integer> input = p.begin()
+        .apply(Create.of(1, 2, 3));
+
+    thrown.expect(IllegalStateException.class);
+    input.apply(new PartiallyBoundOutputCreator());
+
+    Assert.fail("Failure expected from use of partially bound output");
+  }
+
+  /**
+   * The first wildcard must occur after the last directory delimiter.
+   * This tests a few corner cases that should not crash.
+   */
+  @Test
+  public void testGoodWildcards() throws Exception {
+    DataflowPipelineOptions options = buildPipelineOptions();
+    Pipeline pipeline = DataflowPipeline.create(options);
+    DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(options);
+
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo/"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo/*"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo/?"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo/[0-9]"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo/*baz*"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo/*baz?"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo/[0-9]baz?"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo/baz/*"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo/baz/*wonka*"));
+
+    // Check that translation doesn't fail.
+    t.translate(pipeline, Collections.<DataflowPackage>emptyList());
+  }
+
+  /**
+   * The first wildcard must occur after the last directory delimiter.
+   * This tests "*".
+   */
+  @Test
+  public void testBadWildcardStar() throws Exception {
+    DataflowPipelineOptions options = buildPipelineOptions();
+    Pipeline pipeline = DataflowPipeline.create(options);
+    DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(options);
+
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo*/baz"));
+
+    // Check that translation does fail.
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Unsupported wildcard usage");
+    t.translate(pipeline, Collections.<DataflowPackage>emptyList());
+  }
+
+  /**
+   * The first wildcard must occur after the last directory delimiter.
+   * This tests "?".
+   */
+  @Test
+  public void testBadWildcardOptional() throws Exception {
+    DataflowPipelineOptions options = buildPipelineOptions();
+    Pipeline pipeline = DataflowPipeline.create(options);
+    DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(options);
+
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo?/baz"));
+
+    // Check that translation does fail.
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Unsupported wildcard usage");
+    t.translate(pipeline, Collections.<DataflowPackage>emptyList());
+  }
+
+  /**
+   * The first wildcard must occur after the last directory delimiter.
+   * This tests "[]" based character classes.
+   */
+  @Test
+  public void testBadWildcardBrackets() throws Exception {
+    DataflowPipelineOptions options = buildPipelineOptions();
+    Pipeline pipeline = DataflowPipeline.create(options);
+    DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(options);
+
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo[0-9]/baz"));
+
+    // Check that translation does fail.
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Unsupported wildcard usage");
+    t.translate(pipeline, Collections.<DataflowPackage>emptyList());
+  }
+
+  @Test
+  public void testToSingletonTranslation() throws Exception {
+    // A "change detector" test that makes sure the translation
+    // of getting a PCollectionView<T, ...> does not change
+    // in bad ways during refactor
+
+    DataflowPipelineOptions options = buildPipelineOptions();
+    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
+
+    DataflowPipeline pipeline = DataflowPipeline.create(options);
+    PCollectionView<Integer, ?> view =  pipeline
+        .apply(Create.of(1))
+        .apply(View.<Integer>asSingleton());
+    Job job = translator.translate(pipeline, Collections.<DataflowPackage>emptyList());
+
+    List<Step> steps = job.getSteps();
+    assertEquals(2, steps.size());
+
+    Step createStep = steps.get(0);
+    assertEquals("CreateCollection", createStep.getKind());
+
+    Step collectionToSingletonStep = steps.get(1);
+    assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind());
+
+  }
+
+  @Test
+  public void testToIterableTranslation() throws Exception {
+    // A "change detector" test that makes sure the translation
+    // of getting a PCollectionView<Iterable<T>, ...> does not change
+    // in bad ways during refactor
+
+    DataflowPipelineOptions options = buildPipelineOptions();
+    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
+
+    DataflowPipeline pipeline = DataflowPipeline.create(options);
+    PCollectionView<Iterable<Integer>, ?> view =  pipeline
+        .apply(Create.of(1, 2, 3))
+        .apply(View.<Integer>asIterable());
+    Job job = translator.translate(pipeline, Collections.<DataflowPackage>emptyList());
+
+    List<Step> steps = job.getSteps();
+    assertEquals(2, steps.size());
+
+    Step createStep = steps.get(0);
+    assertEquals("CreateCollection", createStep.getKind());
+
+    Step collectionToSingletonStep = steps.get(1);
+    assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerTest.java
new file mode 100644
index 0000000000000..520e03e28b9dd
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerTest.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import static org.junit.Assert.assertTrue;
+
+import com.google.api.services.dataflow.Dataflow;
+import com.google.cloud.dataflow.sdk.options.ApplicationNameOptions;
+import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.util.GcsUtil;
+import com.google.cloud.dataflow.sdk.util.TestCredential;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.io.IOException;
+
+/**
+ * Tests for DataflowPipelineRunner.
+ */
+@RunWith(JUnit4.class)
+public class PipelineRunnerTest {
+
+  @Mock private Dataflow mockDataflow;
+  @Mock private GcsUtil mockGcsUtil;
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+  }
+
+  @Test
+  public void testLongName() throws IOException {
+    // Check we can create a pipeline runner using the full class name.
+    DirectPipelineOptions options = PipelineOptionsFactory.as(DirectPipelineOptions.class);
+    options.setAppName("test");
+    options.setProject("test");
+    options.setGcsUtil(mockGcsUtil);
+    options.setRunner(DirectPipelineRunner.class);
+    options.setGcpCredential(new TestCredential());
+    PipelineRunner runner = PipelineRunner.fromOptions(options);
+    assertTrue(runner instanceof DirectPipelineRunner);
+  }
+
+  @Test
+  public void testShortName() throws IOException {
+    // Check we can create a pipeline runner using the short class name.
+    DirectPipelineOptions options = PipelineOptionsFactory.as(DirectPipelineOptions.class);
+    options.setAppName("test");
+    options.setProject("test");
+    options.setGcsUtil(mockGcsUtil);
+    options.setRunner(DirectPipelineRunner.class);
+    options.setGcpCredential(new TestCredential());
+    PipelineRunner runner = PipelineRunner.fromOptions(options);
+    assertTrue(runner instanceof DirectPipelineRunner);
+  }
+
+  @Test
+  public void testAppNameDefault() throws IOException {
+    ApplicationNameOptions options = PipelineOptionsFactory.as(ApplicationNameOptions.class);
+    Assert.assertEquals(PipelineRunnerTest.class.getSimpleName(),
+        options.getAppName());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
new file mode 100644
index 0000000000000..d0308e87a33f5
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
@@ -0,0 +1,179 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import static org.hamcrest.Matchers.instanceOf;
+import static org.hamcrest.Matchers.not;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.First;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.values.PBegin;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+import com.google.cloud.dataflow.sdk.values.PDone;
+import com.google.cloud.dataflow.sdk.values.PValue;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.EnumSet;
+
+/**
+ * Tests for {@link TransformTreeNode} and {@link TransformHierarchy}.
+ */
+@RunWith(JUnit4.class)
+public class TransformTreeTest {
+
+  enum TransformsSeen {
+    READ,
+    WRITE,
+    FIRST
+  }
+
+  /**
+   * INVALID TRANSFORM, DO NOT COPY.
+   *
+   * <p> This is an invalid composite transform, which returns unbound outputs.
+   * This should never happen, and is here to test that it is properly rejected.
+   */
+  private static class InvalidCompositeTransform
+      extends PTransform<PBegin, PCollectionList<String>> {
+
+    @Override
+    public PCollectionList<String> apply(PBegin b) {
+      // Composite transform: apply delegates to other transformations,
+      // here a Create transform.
+      PCollection<String> result = b.apply(Create.of("hello", "world"));
+
+      // Issue below: PCollection.createPrimitiveOutput should not be used
+      // from within a composite transform.
+      return PCollectionList.of(
+          Arrays.asList(result, PCollection.<String>createPrimitiveOutputInternal(
+              new GlobalWindow())));
+    }
+  }
+
+  /**
+   * A composite transform which returns an output which is unbound.
+   */
+  private static class UnboundOutputCreator
+      extends PTransform<PCollection<Integer>, PDone> {
+
+    @Override
+    public PDone apply(PCollection<Integer> input) {
+      // Apply an operation so that this is a composite transform.
+      input.apply(Count.<Integer>perElement());
+
+      return new PDone();
+    }
+
+    @Override
+    protected Coder<?> getDefaultOutputCoder() {
+      return VoidCoder.of();
+    }
+  }
+
+  // Builds a pipeline containing a composite operation (First), then
+  // visits the nodes and verifies that the hierarchy was captured.
+  @Test
+  public void testCompositeCapture() throws Exception {
+    Pipeline p = DirectPipeline.createForTest();
+
+    p.apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/object"))
+        .apply(First.<String>of(10))
+        .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/object"));
+
+    final EnumSet<TransformsSeen> visited =
+        EnumSet.noneOf(TransformsSeen.class);
+    final EnumSet<TransformsSeen> left =
+        EnumSet.noneOf(TransformsSeen.class);
+
+    p.traverseTopologically(new Pipeline.PipelineVisitor() {
+      @Override
+      public void enterCompositeTransform(TransformTreeNode node) {
+        PTransform<?, ?> transform = node.getTransform();
+        if (transform instanceof First) {
+          Assert.assertTrue(visited.add(TransformsSeen.FIRST));
+          Assert.assertNotNull(node.getEnclosingNode());
+          Assert.assertTrue(node.isCompositeNode());
+        }
+        Assert.assertThat(transform, not(instanceOf(TextIO.Read.Bound.class)));
+        Assert.assertThat(transform, not(instanceOf(TextIO.Write.Bound.class)));
+      }
+
+      @Override
+      public void leaveCompositeTransform(TransformTreeNode node) {
+        PTransform<?, ?> transform = node.getTransform();
+        if (transform instanceof First) {
+          Assert.assertTrue(left.add(TransformsSeen.FIRST));
+        }
+      }
+
+      @Override
+      public void visitTransform(TransformTreeNode node) {
+        PTransform<?, ?> transform = node.getTransform();
+        // First is a composite, should not be visited here.
+        Assert.assertThat(transform, not(instanceOf(First.class)));
+        if (transform instanceof TextIO.Read.Bound) {
+          Assert.assertTrue(visited.add(TransformsSeen.READ));
+        } else if (transform instanceof TextIO.Write.Bound) {
+          Assert.assertTrue(visited.add(TransformsSeen.WRITE));
+        }
+      }
+
+      @Override
+      public void visitValue(PValue value, TransformTreeNode producer) {
+      }
+    });
+
+    Assert.assertTrue(visited.equals(EnumSet.allOf(TransformsSeen.class)));
+    Assert.assertTrue(left.equals(EnumSet.of(TransformsSeen.FIRST)));
+  }
+
+  @Test(expected = IllegalStateException.class)
+  public void testOutputChecking() throws Exception {
+    Pipeline p = DirectPipeline.createForTest();
+
+    p.apply(new InvalidCompositeTransform());
+
+    p.traverseTopologically(new RecordingPipelineVisitor());
+    Assert.fail("traversal should have failed with an IllegalStateException");
+  }
+
+  @Test
+  public void testMultiGraphSetup() throws IOException {
+    Pipeline p = DirectPipeline.createForTest();
+
+    PCollection<Integer> input = p.begin()
+        .apply(Create.of(1, 2, 3));
+
+    input.apply(new UnboundOutputCreator());
+
+    p.run();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
new file mode 100644
index 0000000000000..3c7f29b40a8c1
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.TestUtils;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.file.SeekableInput;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.io.DatumReader;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+import java.nio.ByteBuffer;
+import java.nio.channels.SeekableByteChannel;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Tests for AvroByteSink.
+ */
+@RunWith(JUnit4.class)
+public class AvroByteSinkTest {
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  <T> void runTestWriteFile(List<T> elems, Coder<T> coder) throws Exception {
+    File tmpFile = tmpFolder.newFile("file.avro");
+    String filename = tmpFile.getPath();
+
+    // Write the file.
+
+    AvroByteSink<T> avroSink = new AvroByteSink<>(filename, coder);
+    List<Long> actualSizes = new ArrayList<>();
+    try (Sink.SinkWriter<T> writer = avroSink.writer()) {
+      for (T elem : elems) {
+        actualSizes.add(writer.add(elem));
+      }
+    }
+
+    // Read back the file.
+
+    SeekableByteChannel inChannel = (SeekableByteChannel)
+        IOChannelUtils.getFactory(filename).open(filename);
+
+    SeekableInput seekableInput =
+        new AvroSource.SeekableByteChannelInput(inChannel);
+
+    Schema schema = Schema.create(Schema.Type.BYTES);
+
+    DatumReader<ByteBuffer> datumReader = new GenericDatumReader<>(schema);
+
+    DataFileReader<ByteBuffer> fileReader = new DataFileReader<>(
+        seekableInput, datumReader);
+
+    List<T> actual = new ArrayList<>();
+    List<Long> expectedSizes = new ArrayList<>();
+    ByteBuffer inBuffer = ByteBuffer.allocate(10 * 1024);
+    while (fileReader.hasNext()) {
+      inBuffer = fileReader.next(inBuffer);
+      byte[] encodedElem = new byte[inBuffer.remaining()];
+      inBuffer.get(encodedElem);
+      assert inBuffer.remaining() == 0;
+      inBuffer.clear();
+      T elem = CoderUtils.decodeFromByteArray(coder, encodedElem);
+      actual.add(elem);
+      expectedSizes.add((long) encodedElem.length);
+    }
+
+    fileReader.close();
+
+    // Compare the expected and the actual elements.
+    Assert.assertEquals(elems, actual);
+    Assert.assertEquals(expectedSizes, actualSizes);
+  }
+
+  @Test
+  public void testWriteFile() throws Exception {
+    runTestWriteFile(TestUtils.INTS, BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testWriteEmptyFile() throws Exception {
+    runTestWriteFile(TestUtils.NO_INTS, BigEndianIntegerCoder.of());
+  }
+
+  // TODO: sharded filenames
+  // TODO: writing to GCS
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSourceTest.java
new file mode 100644
index 0000000000000..e6bfffdcb68ab
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSourceTest.java
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.TestUtils;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.MimeTypes;
+import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.io.DatumWriter;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.channels.Channels;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+
+import javax.annotation.Nullable;
+
+/**
+ * Tests for AvroByteSource.
+ */
+@RunWith(JUnit4.class)
+public class AvroByteSourceTest {
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  private <T> void runTestRead(List<List<T>> elemsList,
+                               Coder<T> coder,
+                               boolean requireExactMatch)
+      throws Exception {
+    File tmpFile = tmpFolder.newFile("file.avro");
+    String filename = tmpFile.getPath();
+
+    // Write the data.
+    OutputStream outStream = Channels.newOutputStream(
+        IOChannelUtils.create(filename, MimeTypes.BINARY));
+    Schema schema = Schema.create(Schema.Type.BYTES);
+    DatumWriter<ByteBuffer> datumWriter = new GenericDatumWriter<>(schema);
+    DataFileWriter<ByteBuffer> fileWriter = new DataFileWriter<>(datumWriter);
+    fileWriter.create(schema, outStream);
+    boolean first = true;
+    List<Long> syncPoints = new ArrayList<>();
+    List<Integer> expectedSizes = new ArrayList<>();
+    for (List<T> elems : elemsList) {
+      if (first) {
+        first = false;
+      } else {
+        // Ensure a block boundary here.
+        long syncPoint = fileWriter.sync();
+        syncPoints.add(syncPoint);
+      }
+      for (T elem : elems) {
+        byte[] encodedElem = CoderUtils.encodeToByteArray(coder, elem);
+        fileWriter.append(ByteBuffer.wrap(encodedElem));
+        expectedSizes.add(encodedElem.length);
+      }
+    }
+    fileWriter.close();
+
+    // Test reading the data back.
+    List<List<T>> actualElemsList = new ArrayList<>();
+    List<Integer> actualSizes = new ArrayList<>();
+    Long startOffset = null;
+    Long endOffset;
+    long prevSyncPoint = 0;
+    for (long syncPoint : syncPoints) {
+      endOffset = (prevSyncPoint + syncPoint) / 2;
+      actualElemsList.add(readElems(filename, startOffset, endOffset, coder, actualSizes));
+      startOffset = endOffset;
+      prevSyncPoint = syncPoint;
+    }
+    actualElemsList.add(readElems(filename, startOffset, null, coder, actualSizes));
+
+    // Compare the expected and the actual elements.
+    if (requireExactMatch) {
+      // Require the blocks to match exactly.  (This works only for
+      // small block sizes.  Large block sizes, bigger than Avro's
+      // internal sizes, lead to different splits.)
+      Assert.assertEquals(elemsList, actualElemsList);
+    } else {
+      // Just require the overall elements to be the same.  (This
+      // works for any block size.)
+      List<T> expected = new ArrayList<>();
+      for (List<T> elems : elemsList) {
+        expected.addAll(elems);
+      }
+      List<T> actual = new ArrayList<>();
+      for (List<T> actualElems : actualElemsList) {
+        actual.addAll(actualElems);
+      }
+      Assert.assertEquals(expected, actual);
+    }
+
+    Assert.assertEquals(expectedSizes, actualSizes);
+  }
+
+  private <T> List<T> readElems(String filename,
+                                @Nullable Long startOffset,
+                                @Nullable Long endOffset,
+                                Coder<T> coder,
+                                List<Integer> actualSizes)
+      throws Exception {
+    AvroByteSource<T> avroSource =
+        new AvroByteSource<>(filename, startOffset, endOffset, coder);
+    ExecutorTestUtils.TestSourceObserver observer =
+        new ExecutorTestUtils.TestSourceObserver(avroSource, actualSizes);
+
+    List<T> actualElems = new ArrayList<>();
+    try (Source.SourceIterator<T> iterator = avroSource.iterator()) {
+      while (iterator.hasNext()) {
+        actualElems.add(iterator.next());
+      }
+    }
+    return actualElems;
+  }
+
+  @Test
+  public void testRead() throws Exception {
+    runTestRead(Collections.singletonList(TestUtils.INTS),
+                BigEndianIntegerCoder.of(),
+                true /* require exact match */);
+  }
+
+  @Test
+  public void testReadEmpty() throws Exception {
+    runTestRead(Collections.singletonList(TestUtils.NO_INTS),
+                BigEndianIntegerCoder.of(),
+                true /* require exact match */);
+  }
+
+  private List<List<String>> generateInputBlocks(int numBlocks,
+                                                 int blockSizeBytes,
+                                                 int averageLineSizeBytes) {
+    Random random = new Random(0);
+    List<List<String>> blocks = new ArrayList<>(numBlocks);
+    for (int blockNum = 0; blockNum < numBlocks; blockNum++) {
+      int numLines = blockSizeBytes / averageLineSizeBytes;
+      List<String> lines = new ArrayList<>(numLines);
+      for (int lineNum = 0; lineNum < numLines; lineNum++) {
+        int numChars = random.nextInt(averageLineSizeBytes * 2);
+        StringBuilder sb = new StringBuilder();
+        for (int charNum = 0; charNum < numChars; charNum++) {
+          sb.appendCodePoint(random.nextInt('z' - 'a' + 1) + 'a');
+        }
+        lines.add(sb.toString());
+      }
+      blocks.add(lines);
+    }
+    return blocks;
+  }
+
+  @Test
+  public void testReadSmallRanges() throws Exception {
+    runTestRead(generateInputBlocks(3, 50, 5),
+                StringUtf8Coder.of(),
+                true /* require exact match */);
+  }
+
+  @Test
+  public void testReadBigRanges() throws Exception {
+    runTestRead(generateInputBlocks(10, 128 * 1024, 100),
+                StringUtf8Coder.of(),
+                false /* don't require exact match */);
+  }
+
+  // TODO: sharded filenames
+  // TODO: reading from GCS
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
new file mode 100644
index 0000000000000..79653feabc4ce
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+
+import org.hamcrest.core.IsInstanceOf;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for AvroSinkFactory.
+ */
+@RunWith(JUnit4.class)
+public class AvroSinkFactoryTest {
+  private final String pathToAvroFile = "/path/to/file.avro";
+
+  Sink<?> runTestCreateAvroSink(String filename,
+                                CloudObject encoding)
+      throws Exception {
+    CloudObject spec = CloudObject.forClassName("AvroSink");
+    addString(spec, "filename", filename);
+
+    com.google.api.services.dataflow.model.Sink cloudSink =
+        new com.google.api.services.dataflow.model.Sink();
+    cloudSink.setSpec(spec);
+    cloudSink.setCodec(encoding);
+
+    Sink<?> sink = SinkFactory.create(PipelineOptionsFactory.create(), cloudSink,
+                                      new BatchModeExecutionContext());
+    return sink;
+  }
+
+  @Test
+  public void testCreateAvroByteSink() throws Exception {
+    Coder<?> coder =
+        WindowedValue.getValueOnlyCoder(BigEndianIntegerCoder.of());
+    Sink<?> sink = runTestCreateAvroSink(
+        pathToAvroFile, coder.asCloudObject());
+
+    Assert.assertThat(sink, new IsInstanceOf(AvroByteSink.class));
+    AvroByteSink avroSink = (AvroByteSink) sink;
+    Assert.assertEquals(pathToAvroFile, avroSink.avroSink.filenamePrefix);
+    Assert.assertEquals(coder, avroSink.coder);
+  }
+
+  @Test
+  public void testCreateAvroSink() throws Exception {
+    WindowedValue.WindowedValueCoder<?> coder =
+        WindowedValue.getValueOnlyCoder(AvroCoder.of(Integer.class));
+    Sink<?> sink = runTestCreateAvroSink(pathToAvroFile, coder.asCloudObject());
+
+    Assert.assertThat(sink, new IsInstanceOf(AvroSink.class));
+    AvroSink avroSink = (AvroSink) sink;
+    Assert.assertEquals(pathToAvroFile, avroSink.filenamePrefix);
+    Assert.assertEquals(coder.getValueCoder(), avroSink.avroCoder);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
new file mode 100644
index 0000000000000..5f22d2774f4be
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.TestUtils;
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.file.SeekableInput;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.io.DatumReader;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+import java.nio.channels.SeekableByteChannel;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Tests for AvroSink.
+ */
+@RunWith(JUnit4.class)
+public class AvroSinkTest {
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  <T> void runTestWriteFile(List<T> elems, AvroCoder<T> coder) throws Exception {
+    File tmpFile = tmpFolder.newFile("file.avro");
+    String filename = tmpFile.getPath();
+
+    // Write the file.
+
+    AvroSink<T> avroSink = new AvroSink<>(filename, WindowedValue.getValueOnlyCoder(coder));
+    List<Long> actualSizes = new ArrayList<>();
+    try (Sink.SinkWriter<WindowedValue<T>> writer = avroSink.writer()) {
+      for (T elem : elems) {
+        actualSizes.add(writer.add(WindowedValue.valueInGlobalWindow(elem)));
+      }
+    }
+
+    // Read back the file.
+
+    SeekableByteChannel inChannel = (SeekableByteChannel)
+        IOChannelUtils.getFactory(filename).open(filename);
+
+    SeekableInput seekableInput =
+        new AvroSource.SeekableByteChannelInput(inChannel);
+
+    DatumReader<T> datumReader = new GenericDatumReader<>(coder.getSchema());
+
+    DataFileReader<T> fileReader = new DataFileReader<>(
+        seekableInput, datumReader);
+
+    List<T> actual = new ArrayList<>();
+    List<Long> expectedSizes = new ArrayList<>();
+    while (fileReader.hasNext()) {
+      T next = fileReader.next();
+      actual.add(next);
+      expectedSizes.add((long) CoderUtils.encodeToByteArray(coder, next).length);
+    }
+
+    fileReader.close();
+
+    // Compare the expected and the actual elements.
+    Assert.assertEquals(elems, actual);
+    Assert.assertEquals(expectedSizes, actualSizes);
+  }
+
+  @Test
+  public void testWriteFile() throws Exception {
+    runTestWriteFile(TestUtils.INTS, AvroCoder.of(Integer.class));
+  }
+
+  @Test
+  public void testWriteEmptyFile() throws Exception {
+    runTestWriteFile(TestUtils.NO_INTS, AvroCoder.of(Integer.class));
+  }
+
+  // TODO: sharded filenames
+  // TODO: writing to GCS
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactoryTest.java
new file mode 100644
index 0000000000000..3c81950fd29d6
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactoryTest.java
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+import org.hamcrest.core.IsInstanceOf;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import javax.annotation.Nullable;
+
+/**
+ * Tests for AvroSourceFactory.
+ */
+@RunWith(JUnit4.class)
+public class AvroSourceFactoryTest {
+  private final String pathToAvroFile = "/path/to/file.avro";
+
+  Source<?> runTestCreateAvroSource(String filename,
+                               @Nullable Long start,
+                               @Nullable Long end,
+                               CloudObject encoding)
+      throws Exception {
+    CloudObject spec = CloudObject.forClassName("AvroSource");
+    addString(spec, "filename", filename);
+    if (start != null) {
+      addLong(spec, "start_offset", start);
+    }
+    if (end != null) {
+      addLong(spec, "end_offset", end);
+    }
+
+    com.google.api.services.dataflow.model.Source cloudSource =
+        new com.google.api.services.dataflow.model.Source();
+    cloudSource.setSpec(spec);
+    cloudSource.setCodec(encoding);
+
+    Source<?> source = SourceFactory.create(PipelineOptionsFactory.create(),
+                                            cloudSource,
+                                            new BatchModeExecutionContext());
+    return source;
+  }
+
+  @Test
+  public void testCreatePlainAvroByteSource() throws Exception {
+    Coder<?> coder =
+        WindowedValue.getValueOnlyCoder(BigEndianIntegerCoder.of());
+    Source<?> source = runTestCreateAvroSource(
+        pathToAvroFile, null, null, coder.asCloudObject());
+
+    Assert.assertThat(source, new IsInstanceOf(AvroByteSource.class));
+    AvroByteSource avroSource = (AvroByteSource) source;
+    Assert.assertEquals(pathToAvroFile, avroSource.avroSource.filename);
+    Assert.assertEquals(null, avroSource.avroSource.startPosition);
+    Assert.assertEquals(null, avroSource.avroSource.endPosition);
+    Assert.assertEquals(coder, avroSource.coder);
+  }
+
+  @Test
+  public void testCreateRichAvroByteSource() throws Exception {
+    Coder<?> coder =
+        WindowedValue.getValueOnlyCoder(BigEndianIntegerCoder.of());
+    Source<?> source = runTestCreateAvroSource(
+        pathToAvroFile, 200L, 500L, coder.asCloudObject());
+
+    Assert.assertThat(source, new IsInstanceOf(AvroByteSource.class));
+    AvroByteSource avroSource = (AvroByteSource) source;
+    Assert.assertEquals(pathToAvroFile, avroSource.avroSource.filename);
+    Assert.assertEquals(200L, (long) avroSource.avroSource.startPosition);
+    Assert.assertEquals(500L, (long) avroSource.avroSource.endPosition);
+    Assert.assertEquals(coder, avroSource.coder);
+  }
+
+  @Test
+  public void testCreateRichAvroSource() throws Exception {
+    WindowedValue.WindowedValueCoder<?> coder =
+        WindowedValue.getValueOnlyCoder(AvroCoder.of(Integer.class));
+    Source<?> source = runTestCreateAvroSource(
+        pathToAvroFile, 200L, 500L, coder.asCloudObject());
+
+    Assert.assertThat(source, new IsInstanceOf(AvroSource.class));
+    AvroSource avroSource = (AvroSource) source;
+    Assert.assertEquals(pathToAvroFile, avroSource.filename);
+    Assert.assertEquals(200L, (long) avroSource.startPosition);
+    Assert.assertEquals(500L, (long) avroSource.endPosition);
+    Assert.assertEquals(coder.getValueCoder(), avroSource.avroCoder);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceTest.java
new file mode 100644
index 0000000000000..4855ef92e4d96
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceTest.java
@@ -0,0 +1,196 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.TestUtils;
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.MimeTypes;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+import java.io.OutputStream;
+import java.nio.channels.Channels;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+
+import javax.annotation.Nullable;
+
+/**
+ * Tests for AvroSource.
+ */
+@RunWith(JUnit4.class)
+public class AvroSourceTest {
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  private <T> void runTestRead(List<List<T>> elemsList,
+                               AvroCoder<T> coder,
+                               boolean requireExactMatch)
+      throws Exception {
+    File tmpFile = tmpFolder.newFile("file.avro");
+    String filename = tmpFile.getPath();
+
+    // Write the data.
+    OutputStream outStream = Channels.newOutputStream(
+        IOChannelUtils.create(filename, MimeTypes.BINARY));
+    DatumWriter<T> datumWriter = coder.createDatumWriter();
+    DataFileWriter<T> fileWriter = new DataFileWriter<>(datumWriter);
+    fileWriter.create(coder.getSchema(), outStream);
+    boolean first = true;
+    List<Long> syncPoints = new ArrayList<>();
+    List<Integer> expectedSizes = new ArrayList<>();
+    for (List<T> elems : elemsList) {
+      if (first) {
+        first = false;
+      } else {
+        // Ensure a block boundary here.
+        long syncPoint = fileWriter.sync();
+        syncPoints.add(syncPoint);
+      }
+      for (T elem : elems) {
+        fileWriter.append(elem);
+        expectedSizes.add(CoderUtils.encodeToByteArray(coder, elem).length);
+      }
+    }
+    fileWriter.close();
+
+    // Test reading the data back.
+    List<List<T>> actualElemsList = new ArrayList<>();
+    List<Integer> actualSizes = new ArrayList<>();
+    Long startOffset = null;
+    Long endOffset;
+    long prevSyncPoint = 0;
+    for (long syncPoint : syncPoints) {
+      endOffset = (prevSyncPoint + syncPoint) / 2;
+      actualElemsList.add(readElems(filename, startOffset, endOffset, coder, actualSizes));
+      startOffset = endOffset;
+      prevSyncPoint = syncPoint;
+    }
+    actualElemsList.add(readElems(filename, startOffset, null, coder, actualSizes));
+
+    // Compare the expected and the actual elements.
+    if (requireExactMatch) {
+      // Require the blocks to match exactly.  (This works only for
+      // small block sizes.  Large block sizes, bigger than Avro's
+      // internal sizes, lead to different splits.)
+      Assert.assertEquals(elemsList, actualElemsList);
+    } else {
+      // Just require the overall elements to be the same.  (This
+      // works for any block size.)
+      List<T> expected = new ArrayList<>();
+      for (List<T> elems : elemsList) {
+        expected.addAll(elems);
+      }
+      List<T> actual = new ArrayList<>();
+      for (List<T> actualElems : actualElemsList) {
+        actual.addAll(actualElems);
+      }
+      Assert.assertEquals(expected, actual);
+    }
+
+    Assert.assertEquals(expectedSizes, actualSizes);
+  }
+
+  private <T> List<T> readElems(String filename,
+                                @Nullable Long startOffset,
+                                @Nullable Long endOffset,
+                                Coder<T> coder,
+                                List<Integer> actualSizes)
+      throws Exception {
+    AvroSource<T> avroSource =
+        new AvroSource<>(filename, startOffset, endOffset, WindowedValue.getValueOnlyCoder(coder));
+    ExecutorTestUtils.TestSourceObserver observer =
+        new ExecutorTestUtils.TestSourceObserver(avroSource, actualSizes);
+
+    List<T> actualElems = new ArrayList<>();
+    try (Source.SourceIterator<WindowedValue<T>> iterator = avroSource.iterator()) {
+      while (iterator.hasNext()) {
+        actualElems.add(iterator.next().getValue());
+      }
+    }
+    return actualElems;
+  }
+
+  @Test
+  public void testRead() throws Exception {
+    runTestRead(Collections.singletonList(TestUtils.INTS),
+                AvroCoder.of(Integer.class),
+                true /* require exact match */);
+  }
+
+  @Test
+  public void testReadEmpty() throws Exception {
+    runTestRead(Collections.singletonList(TestUtils.NO_INTS),
+                AvroCoder.of(Integer.class),
+                true /* require exact match */);
+  }
+
+  private List<List<String>> generateInputBlocks(int numBlocks,
+                                                 int blockSizeBytes,
+                                                 int averageLineSizeBytes) {
+    Random random = new Random(0);
+    List<List<String>> blocks = new ArrayList<>(numBlocks);
+    for (int blockNum = 0; blockNum < numBlocks; blockNum++) {
+      int numLines = blockSizeBytes / averageLineSizeBytes;
+      List<String> lines = new ArrayList<>(numLines);
+      for (int lineNum = 0; lineNum < numLines; lineNum++) {
+        int numChars = random.nextInt(averageLineSizeBytes * 2);
+        StringBuilder sb = new StringBuilder();
+        for (int charNum = 0; charNum < numChars; charNum++) {
+          sb.appendCodePoint(random.nextInt('z' - 'a' + 1) + 'a');
+        }
+        lines.add(sb.toString());
+      }
+      blocks.add(lines);
+    }
+    return blocks;
+  }
+
+  @Test
+  public void testReadSmallRanges() throws Exception {
+    runTestRead(generateInputBlocks(3, 50, 5),
+                AvroCoder.of(String.class),
+                true /* require exact match */);
+  }
+
+  @Test
+  public void testReadBigRanges() throws Exception {
+    runTestRead(generateInputBlocks(10, 128 * 1024, 100),
+                AvroCoder.of(String.class),
+                false /* don't require exact match */);
+  }
+
+  // TODO: sharded filenames
+  // TODO: reading from GCS
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySourceFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySourceFactoryTest.java
new file mode 100644
index 0000000000000..0eb95c70205ca
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySourceFactoryTest.java
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+import org.hamcrest.core.IsInstanceOf;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for BigQuerySourceFactory.
+ */
+@RunWith(JUnit4.class)
+public class BigQuerySourceFactoryTest {
+  void runTestCreateBigQuerySource(String project,
+                                   String dataset,
+                                   String table,
+                                   CloudObject encoding)
+      throws Exception {
+    CloudObject spec = CloudObject.forClassName("BigQuerySource");
+    addString(spec, "project", project);
+    addString(spec, "dataset", dataset);
+    addString(spec, "table", table);
+
+    com.google.api.services.dataflow.model.Source cloudSource =
+        new com.google.api.services.dataflow.model.Source();
+    cloudSource.setSpec(spec);
+    cloudSource.setCodec(encoding);
+
+    Source<?> source = SourceFactory.create(PipelineOptionsFactory.create(),
+                                            cloudSource,
+                                            new BatchModeExecutionContext());
+    Assert.assertThat(source, new IsInstanceOf(BigQuerySource.class));
+    BigQuerySource bigQuerySource = (BigQuerySource) source;
+    Assert.assertEquals(project, bigQuerySource.tableRef.getProjectId());
+    Assert.assertEquals(dataset, bigQuerySource.tableRef.getDatasetId());
+    Assert.assertEquals(table, bigQuerySource.tableRef.getTableId());
+  }
+
+  @Test
+  public void testCreateBigQuerySource() throws Exception {
+    runTestCreateBigQuerySource(
+        "someproject", "somedataset", "sometable",
+        makeCloudEncoding("TableRowJsonCoder"));
+  }
+
+  @Test
+  public void testCreateBigQuerySourceCoderIgnored() throws Exception {
+    // BigQuery sources do not need a coder because the TableRow objects are read directly from
+    // the table using the BigQuery API.
+    runTestCreateBigQuerySource(
+        "someproject", "somedataset", "sometable",
+        makeCloudEncoding("BigEndianIntegerCoder"));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySourceTest.java
new file mode 100644
index 0000000000000..2ed4635e8c10b
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySourceTest.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static org.mockito.Matchers.anyLong;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Mockito.atLeast;
+import static org.mockito.Mockito.atLeastOnce;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+import com.google.api.services.bigquery.Bigquery;
+import com.google.api.services.bigquery.model.Table;
+import com.google.api.services.bigquery.model.TableCell;
+import com.google.api.services.bigquery.model.TableDataList;
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Tests for BigQuerySource.
+ *
+ * <p>The tests just make sure a basic scenario of reading works because the class itself is a
+ * thin wrapper over {@code BigQueryTableRowIterator}. The tests for the wrapped class have
+ * comprehensive coverage.
+ */
+@RunWith(JUnit4.class)
+public class BigQuerySourceTest {
+
+  @Mock private Bigquery mockClient;
+  @Mock private Bigquery.Tables mockTables;
+  @Mock private Bigquery.Tables.Get mockTablesGet;
+  @Mock private Bigquery.Tabledata mockTabledata;
+  @Mock private Bigquery.Tabledata.List mockTabledataList;
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+  }
+
+  @After
+  public void tearDown() {
+    verifyNoMoreInteractions(mockClient);
+    verifyNoMoreInteractions(mockTables);
+    verifyNoMoreInteractions(mockTablesGet);
+    verifyNoMoreInteractions(mockTabledata);
+    verifyNoMoreInteractions(mockTabledataList);
+  }
+
+  private void onTableGet(Table table) throws IOException {
+    when(mockClient.tables())
+        .thenReturn(mockTables);
+    when(mockTables.get(anyString(), anyString(), anyString()))
+        .thenReturn(mockTablesGet);
+    when(mockTablesGet.execute())
+        .thenReturn(table);
+  }
+
+  private void verifyTableGet() throws IOException {
+    verify(mockClient).tables();
+    verify(mockTables).get("project", "dataset", "table");
+    verify(mockTablesGet).execute();
+  }
+
+  private void onTableList(TableDataList result) throws IOException {
+    when(mockClient.tabledata())
+        .thenReturn(mockTabledata);
+    when(mockTabledata.list(anyString(), anyString(), anyString()))
+        .thenReturn(mockTabledataList);
+    when(mockTabledataList.execute())
+        .thenReturn(result);
+  }
+
+  private void verifyTabledataList() throws IOException {
+    verify(mockClient, atLeastOnce()).tabledata();
+    verify(mockTabledata, atLeastOnce()).list("project", "dataset", "table");
+    verify(mockTabledataList, atLeastOnce()).execute();
+    // Max results may be set when testing for an empty table.
+    verify(mockTabledataList, atLeast(0)).setMaxResults(anyLong());
+  }
+
+  private Table basicTableSchema() {
+    return new Table()
+        .setSchema(new TableSchema()
+            .setFields(Arrays.asList(
+                new TableFieldSchema()
+                    .setName("name")
+                    .setType("STRING"),
+                new TableFieldSchema()
+                    .setName("integer")
+                    .setType("INTEGER"),
+                new TableFieldSchema()
+                    .setName("float")
+                    .setType("FLOAT"),
+                new TableFieldSchema()
+                    .setName("bool")
+                    .setType("BOOLEAN")
+            )));
+  }
+
+  private TableRow rawRow(Object...args) {
+    List<TableCell> cells = new LinkedList<>();
+    for (Object a : args) {
+      cells.add(new TableCell().setV(a));
+    }
+    return new TableRow().setF(cells);
+  }
+
+  private TableDataList rawDataList(TableRow...rows) {
+    return new TableDataList()
+        .setRows(Arrays.asList(rows));
+  }
+
+  @Test
+  public void testRead() throws IOException {
+    onTableGet(basicTableSchema());
+
+    // BQ API data is always encoded as a string
+    TableDataList dataList = rawDataList(
+        rawRow("Arthur", "42", "3.14159", "false"),
+        rawRow("Allison", "79", "2.71828", "true")
+    );
+    onTableList(dataList);
+
+    BigQuerySource source = new BigQuerySource(
+        mockClient,
+        new TableReference()
+            .setProjectId("project")
+            .setDatasetId("dataset")
+            .setTableId("table"));
+
+    BigQuerySource.SourceIterator<TableRow> iterator = source.iterator();
+    Assert.assertTrue(iterator.hasNext());
+    TableRow row = iterator.next();
+
+    Assert.assertEquals("Arthur", row.get("name"));
+    Assert.assertEquals("42", row.get("integer"));
+    Assert.assertEquals(3.14159, row.get("float"));
+    Assert.assertEquals(false, row.get("bool"));
+
+    row = iterator.next();
+
+    Assert.assertEquals("Allison", row.get("name"));
+    Assert.assertEquals("79", row.get("integer"));
+    Assert.assertEquals(2.71828, row.get("float"));
+    Assert.assertEquals(true, row.get("bool"));
+
+    Assert.assertFalse(iterator.hasNext());
+
+    verifyTableGet();
+    verifyTabledataList();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
new file mode 100644
index 0000000000000..b616f6d75f372
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
@@ -0,0 +1,337 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
+import static com.google.cloud.dataflow.sdk.util.SerializableUtils.serializeToByteArray;
+import static com.google.cloud.dataflow.sdk.util.StringUtils.byteArrayToJsonString;
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
+import com.google.cloud.dataflow.sdk.util.common.worker.Receiver;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.reflect.TypeToken;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests for CombineValuesFn.
+ */
+@RunWith(JUnit4.class)
+public class CombineValuesFnTest {
+  /** Example AccumulatingCombineFn. */
+  public static class MeanInts extends
+      Combine.AccumulatingCombineFn<Integer, MeanInts.CountSum, String> {
+
+    class CountSum extends
+        Combine.AccumulatingCombineFn<Integer, CountSum, String>.Accumulator {
+
+      long count;
+      double sum;
+
+      @Override
+      public void addInput(Integer element) {
+        count++;
+        sum += element.doubleValue();
+      }
+
+      @Override
+      public void mergeAccumulator(CountSum accumulator) {
+        count += accumulator.count;
+        sum += accumulator.sum;
+      }
+
+      @Override
+      public String extractOutput() {
+        return String.format("%.1f", count == 0 ? 0.0 : sum / count);
+      }
+
+      public CountSum(long count, double sum) {
+        this.count = count;
+        this.sum = sum;
+      }
+
+      @Override
+      public int hashCode() {
+        return KV.of(count, sum).hashCode();
+      }
+
+      @Override
+      public boolean equals(Object obj) {
+        if (obj == null || !(obj instanceof CountSum)) {
+          return false;
+        }
+        if (obj == this) {
+          return true;
+        }
+
+        CountSum other = (CountSum) obj;
+        return (this.count == other.count)
+            && (Math.abs(this.sum - other.sum) < 0.1);
+      }
+    }
+
+    @Override
+    public CountSum createAccumulator() {
+      return new CountSum(0, 0.0);
+    }
+
+    @Override
+    public Coder<CountSum> getAccumulatorCoder(
+        CoderRegistry registry, Coder<Integer> inputCoder) {
+      return new CountSumCoder();
+    }
+  }
+
+  /**
+   * An example "cheap" accumulator coder.
+   */
+  public static class CountSumCoder implements Coder<MeanInts.CountSum> {
+    public CountSumCoder() { }
+
+    @Override
+    public void encode(
+        MeanInts.CountSum value, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+      DataOutputStream dataStream = new DataOutputStream(outStream);
+      dataStream.writeLong(value.count);
+      dataStream.writeDouble(value.sum);
+    }
+
+    @Override
+    public MeanInts.CountSum decode(InputStream inStream, Context context)
+        throws CoderException, IOException {
+      DataInputStream dataStream = new DataInputStream(inStream);
+      long count = dataStream.readLong();
+      double sum = dataStream.readDouble();
+      return (new MeanInts ()).new CountSum(count, sum);
+    }
+
+    @Override
+    public boolean isDeterministic() { return true; }
+
+    public CloudObject asCloudObject() {
+      return makeCloudEncoding(this.getClass().getName());
+    }
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() { return null; }
+
+    public List<Object> getInstanceComponents(MeanInts.CountSum exampleValue) {
+      return null;
+    }
+
+    @Override
+    public boolean isRegisterByteSizeObserverCheap(
+        MeanInts.CountSum value, Context context) {
+      return true;
+    }
+
+    @Override
+    public void registerByteSizeObserver(
+        MeanInts.CountSum value, ElementByteSizeObserver observer, Context ctx)
+        throws Exception {
+      observer.update((long) 16);
+    }
+  }
+
+  static class TestReceiver implements Receiver {
+    List<Object> receivedElems = new ArrayList<>();
+
+    @Override
+    public void process(Object outputElem) {
+      receivedElems.add(outputElem);
+    }
+  }
+
+  private static ParDoFn createCombineValuesFn(
+      String phase, Combine.KeyedCombineFn combineFn) throws Exception {
+    // This partially mirrors the work that
+    // com.google.cloud.dataflow.sdk.transforms.Combine.translateHelper
+    // does, at least for the KeyedCombineFn. The phase is generated
+    // by the back-end.
+    CloudObject spec = CloudObject.forClassName("CombineValuesFn");
+    addString(spec, PropertyNames.SERIALIZED_FN,
+        byteArrayToJsonString(serializeToByteArray(combineFn)));
+    addString(spec, PropertyNames.PHASE, phase);
+
+    return CombineValuesFn.create(
+            PipelineOptionsFactory.create(),
+            spec,
+            "name",
+            null, // no side inputs
+            null, // no side outputs
+            1, // single main output
+            new BatchModeExecutionContext(),
+            (new CounterSet()).getAddCounterMutator(),
+            null);
+  }
+
+  @Test
+  public void testCombineValuesFnAll() throws Exception {
+    TestReceiver receiver = new TestReceiver();
+
+    Combine.KeyedCombineFn<String, Integer,
+        MeanInts.CountSum, String> combiner =
+        (new MeanInts()).asKeyedFn();
+
+    ParDoFn combineParDoFn = createCombineValuesFn(
+        CombineValuesFn.CombinePhase.ALL, combiner);
+
+    combineParDoFn.startBundle(receiver);
+    combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
+        KV.of("a", Arrays.asList(5, 6, 7))));
+    combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
+        KV.of("b", Arrays.asList(1, 3, 7))));
+    combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
+        KV.of("c", Arrays.asList(3, 6, 8, 9))));
+    combineParDoFn.finishBundle();
+
+    Object[] expectedReceivedElems = {
+      WindowedValue.valueInGlobalWindow(KV.of("a", "6.0")),
+      WindowedValue.valueInGlobalWindow(KV.of("b", "3.7")),
+      WindowedValue.valueInGlobalWindow(KV.of("c", "6.5")),
+    };
+    assertArrayEquals(expectedReceivedElems, receiver.receivedElems.toArray());
+  }
+
+  @Test
+  public void testCombineValuesFnAdd() throws Exception {
+    TestReceiver receiver = new TestReceiver();
+    MeanInts mean = new MeanInts();
+
+    Combine.KeyedCombineFn<String, Integer,
+        MeanInts.CountSum, String> combiner = mean.asKeyedFn();
+
+    ParDoFn combineParDoFn = createCombineValuesFn(
+        CombineValuesFn.CombinePhase.ADD, combiner);
+
+    combineParDoFn.startBundle(receiver);
+    combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
+        KV.of("a", Arrays.asList(5, 6, 7))));
+    combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
+        KV.of("b", Arrays.asList(1, 3, 7))));
+    combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
+        KV.of("c", Arrays.asList(3, 6, 8, 9))));
+    combineParDoFn.finishBundle();
+
+    Object[] expectedReceivedElems = {
+      WindowedValue.valueInGlobalWindow(KV.of("a", mean.new CountSum(3, 18))),
+      WindowedValue.valueInGlobalWindow(KV.of("b", mean.new CountSum(3, 11))),
+      WindowedValue.valueInGlobalWindow(KV.of("c", mean.new CountSum(4, 26)))
+    };
+    assertArrayEquals(expectedReceivedElems, receiver.receivedElems.toArray());
+  }
+
+  @Test
+  public void testCombineValuesFnMerge() throws Exception {
+    TestReceiver receiver = new TestReceiver();
+    MeanInts mean = new MeanInts();
+
+    Combine.KeyedCombineFn<String, Integer,
+        MeanInts.CountSum, String> combiner = mean.asKeyedFn();
+
+    ParDoFn combineParDoFn = createCombineValuesFn(
+        CombineValuesFn.CombinePhase.MERGE, combiner);
+
+    combineParDoFn.startBundle(receiver);
+    combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
+        KV.of("a",
+            Arrays.asList(
+                mean.new CountSum(3, 6),
+                mean.new CountSum(2, 9),
+                mean.new CountSum(1, 12)))));
+    combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
+        KV.of("b",
+            Arrays.asList(
+                mean.new CountSum(2, 20),
+                mean.new CountSum(1, 1)))));
+    combineParDoFn.finishBundle();
+
+    Object[] expectedReceivedElems = {
+      WindowedValue.valueInGlobalWindow(KV.of("a", mean.new CountSum(6, 27))),
+      WindowedValue.valueInGlobalWindow(KV.of("b", mean.new CountSum(3, 21))),
+    };
+    assertArrayEquals(expectedReceivedElems, receiver.receivedElems.toArray());
+  }
+
+  @Test
+  public void testCombineValuesFnExtract() throws Exception {
+    TestReceiver receiver = new TestReceiver();
+    MeanInts mean = new MeanInts();
+
+    Combine.KeyedCombineFn<String, Integer,
+        MeanInts.CountSum, String> combiner = mean.asKeyedFn();
+
+    ParDoFn combineParDoFn = createCombineValuesFn(
+        CombineValuesFn.CombinePhase.EXTRACT, combiner);
+
+    combineParDoFn.startBundle(receiver);
+    combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
+        KV.of("a", mean.new CountSum(6, 27))));
+    combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
+        KV.of("b", mean.new CountSum(3, 21))));
+    combineParDoFn.finishBundle();
+
+    assertArrayEquals(
+        new Object[]{ WindowedValue.valueInGlobalWindow(KV.of("a", "4.5")),
+                      WindowedValue.valueInGlobalWindow(KV.of("b", "7.0")) },
+        receiver.receivedElems.toArray());
+  }
+
+  @Test
+  public void testCombineValuesFnCoders() throws Exception {
+    CoderRegistry registry = new CoderRegistry();
+    registry.registerStandardCoders();
+
+    MeanInts meanInts = new MeanInts();
+    MeanInts.CountSum countSum = meanInts.new CountSum(6, 27);
+
+    Coder<MeanInts.CountSum> coder = meanInts.getAccumulatorCoder(
+        registry, registry.getDefaultCoder(TypeToken.of(Integer.class)));
+
+    assertEquals(
+        countSum,
+        CoderUtils.decodeFromByteArray(coder,
+            CoderUtils.encodeToByteArray(coder, countSum)));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannelTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannelTest.java
new file mode 100644
index 0000000000000..e27fa1832870a
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannelTest.java
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.api.client.util.Preconditions.checkArgument;
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertThat;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.ClosedChannelException;
+import java.nio.channels.SeekableByteChannel;
+
+/** Unit tests for {@link CopyableSeekableByteChannel}. */
+@RunWith(JUnit4.class)
+public final class CopyableSeekableByteChannelTest {
+  @Test
+  public void copiedChannelShouldMaintainIndependentPosition()
+      throws IOException {
+    ByteBuffer dst = ByteBuffer.allocate(6);
+    SeekableByteChannel base =
+        new FakeSeekableByteChannel("Hello, world! :-)".getBytes());
+    base.position(1);
+
+    CopyableSeekableByteChannel chan = new CopyableSeekableByteChannel(base);
+    assertThat(chan.position(), equalTo((long) 1));
+
+    CopyableSeekableByteChannel copy = chan.copy();
+    assertThat(copy.position(), equalTo((long) 1));
+
+    assertThat(chan.read(dst), equalTo(6));
+    assertThat(chan.position(), equalTo((long) 7));
+    assertThat(new String(dst.array()), equalTo("ello, "));
+    dst.rewind();
+
+    assertThat(copy.position(), equalTo((long) 1));
+    copy.position(3);
+    assertThat(copy.read(dst), equalTo(6));
+    assertThat(copy.position(), equalTo((long) 9));
+    assertThat(new String(dst.array()), equalTo("lo, wo"));
+    dst.rewind();
+
+    assertThat(chan.read(dst), equalTo(6));
+    assertThat(chan.position(), equalTo((long) 13));
+    assertThat(new String(dst.array()), equalTo("world!"));
+    dst.rewind();
+
+    assertThat(chan.read(dst), equalTo(4));
+    assertThat(chan.position(), equalTo((long) 17));
+    assertThat(new String(dst.array()), equalTo(" :-)d!"));
+    dst.rewind();
+
+    assertThat(copy.position(), equalTo((long) 9));
+    assertThat(copy.read(dst), equalTo(6));
+    assertThat(new String(dst.array()), equalTo("rld! :"));
+  }
+
+  private static final class FakeSeekableByteChannel
+      implements SeekableByteChannel {
+    private boolean closed = false;
+    private ByteBuffer data;
+
+    public FakeSeekableByteChannel(byte[] data) {
+      this.data = ByteBuffer.wrap(data);
+    }
+
+    @Override
+    public long position() throws IOException {
+      checkClosed();
+      return data.position();
+    }
+
+    @Override
+    public SeekableByteChannel position(long newPosition) throws IOException {
+      checkArgument(newPosition >= 0);
+      checkClosed();
+      data.position((int) newPosition);
+      return this;
+    }
+
+    @Override
+    public int read(ByteBuffer dst) throws IOException {
+      checkClosed();
+      if (!data.hasRemaining()) {
+        return -1;
+      }
+      int count = Math.min(data.remaining(), dst.remaining());
+      ByteBuffer src = data.slice();
+      src.limit(count);
+      dst.put(src);
+      data.position(data.position() + count);
+      return count;
+    }
+
+    @Override
+    public long size() throws IOException {
+      checkClosed();
+      return data.limit();
+    }
+
+    @Override
+    public SeekableByteChannel truncate(long size) throws IOException {
+      checkClosed();
+      data.limit((int) size);
+      return this;
+    }
+
+    @Override
+    public int write(ByteBuffer src) throws IOException {
+      checkClosed();
+      int count = Math.min(data.remaining(), src.remaining());
+      ByteBuffer copySrc = src.slice();
+      copySrc.limit(count);
+      data.put(copySrc);
+      return count;
+    }
+
+    @Override
+    public boolean isOpen() {
+      return !closed;
+    }
+
+    @Override
+    public void close() {
+      closed = true;
+    }
+
+    private void checkClosed() throws ClosedChannelException {
+      if (closed) {
+        throw new ClosedChannelException();
+      }
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
new file mode 100644
index 0000000000000..2167a504183fc
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -0,0 +1,438 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToSourcePosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.util.CloudCounterUtils.extractCounter;
+import static com.google.cloud.dataflow.sdk.util.CloudMetricUtils.extractCloudMetric;
+import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudDuration;
+import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudTime;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MAX;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SET;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.mockito.Mockito.any;
+import static org.mockito.Mockito.argThat;
+import static org.mockito.Mockito.timeout;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.MetricUpdate;
+import com.google.api.services.dataflow.model.Position;
+import com.google.api.services.dataflow.model.WorkItem;
+import com.google.api.services.dataflow.model.WorkItemServiceState;
+import com.google.api.services.dataflow.model.WorkItemStatus;
+import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.CounterTestUtils;
+import com.google.cloud.dataflow.sdk.util.common.Metric;
+import com.google.cloud.dataflow.sdk.util.common.Metric.DoubleMetric;
+import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
+import com.google.cloud.dataflow.sdk.util.common.worker.Operation;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+
+import org.hamcrest.Description;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.ArgumentMatcher;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import javax.annotation.Nullable;
+
+/** Unit tests for {@link DataflowWorkProgressUpdater}. */
+@RunWith(JUnit4.class)
+public class DataflowWorkProgressUpdaterTest {
+  static class TestMapTaskExecutor extends MapTaskExecutor {
+    ApproximateProgress progress = null;
+
+    public TestMapTaskExecutor(CounterSet counters) {
+      super(new ArrayList<Operation>(),
+            counters,
+            new StateSampler("test", counters.getAddCounterMutator()));
+    }
+
+    @Override
+    public Source.Progress getWorkerProgress() {
+      return cloudProgressToSourceProgress(progress);
+    }
+
+    @Override
+    public Source.Position proposeStopPosition(
+        Source.Progress suggestedStopPoint) {
+      @Nullable ApproximateProgress progress = sourceProgressToCloudProgress(suggestedStopPoint);
+      if (progress == null) {
+        return null;
+      }
+      return cloudPositionToSourcePosition(progress.getPosition());
+    }
+
+    public void setWorkerProgress(ApproximateProgress progress) {
+      this.progress = progress;
+    }
+  }
+
+  static {
+    // To shorten wait times during testing.
+    System.setProperty("minimum_worker_update_interval_millis", "100");
+    System.setProperty("worker_lease_renewal_latency_margin", "100");
+  }
+
+  private static final String PROJECT_ID = "TEST_PROJECT_ID";
+  private static final String JOB_ID = "TEST_JOB_ID";
+  private static final String WORKER_ID = "TEST_WORKER_ID";
+  private static final Long WORK_ID = 1234567890L;
+  private static final String COUNTER_NAME = "test-counter-";
+  private static final AggregationKind[] COUNTER_KINDS = {SUM, MAX, SET};
+  private static final Long COUNTER_VALUE1 = 12345L;
+  private static final Double COUNTER_VALUE2 = Math.PI;
+  private static final String COUNTER_VALUE3 = "value";
+
+  @Rule public final ExpectedException thrown = ExpectedException.none();
+  @Mock private DataflowWorker.WorkUnitClient workUnitClient;
+  private CounterSet counters;
+  private List<Metric<?>> metrics;
+  private TestMapTaskExecutor worker;
+  private WorkItem workItem;
+  private DataflowWorkerHarnessOptions options;
+  private DataflowWorkProgressUpdater progressUpdater;
+  private long nowMillis;
+
+  @Before
+  public void initMocksAndWorkflowServiceAndWorkerAndWork() throws IOException {
+    MockitoAnnotations.initMocks(this);
+
+    options = PipelineOptionsFactory.createFromSystemProperties();
+    options.setProject(PROJECT_ID);
+    options.setJobId(JOB_ID);
+    options.setWorkerId(WORKER_ID);
+
+    metrics = new ArrayList<>();
+    counters = new CounterSet();
+    worker = new TestMapTaskExecutor(counters) {
+      @Override
+      public Collection<Metric<?>> getOutputMetrics() {
+        return metrics;
+      }
+    };
+    nowMillis = System.currentTimeMillis();
+
+    workItem = new WorkItem();
+    workItem.setProjectId(PROJECT_ID);
+    workItem.setJobId(JOB_ID);
+    workItem.setId(WORK_ID);
+    workItem.setLeaseExpireTime(toCloudTime(new Instant(nowMillis + 1000)));
+    workItem.setReportStatusInterval(toCloudDuration(Duration.millis(500)));
+
+    progressUpdater = new DataflowWorkProgressUpdater(
+        workItem, worker, workUnitClient, options);
+  }
+
+  // TODO: Remove sleeps from this test by using a mock sleeper.  This
+  // requires a redesign of the WorkProgressUpdater to use a Sleeper and
+  // not use a ScheduledThreadExecutor which relies on real time passing.
+  @Test(timeout = 2000)
+  public void workProgressUpdaterUpdates() throws Exception {
+    when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class))).thenReturn(
+        generateServiceState(nowMillis + 2000, 1000, null));
+    setUpCounters(2);
+    setUpMetrics(3);
+    setUpProgress(makeRecordIndexProgress(1L));
+    progressUpdater.startReportingProgress();
+    // The initial update should be sent after leaseRemainingTime / 2.
+    verify(workUnitClient, timeout(600)).reportWorkItemStatus(argThat(
+        new ExpectedDataflowProgress()
+        .withCounters(2)
+        .withMetrics(3)
+        .withProgress(makeRecordIndexProgress(1L))));
+    progressUpdater.stopReportingProgress();
+  }
+
+  // Verifies that ReportWorkItemStatusRequest contains correct progress report
+  // and actual stop position report.
+  @Test(timeout = 5000)
+  public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
+    // Mock that the next reportProgress call will return a response that asks
+    // us to truncate the task at index 3, and the next two will not ask us to
+    // truncate at all.
+    when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
+        .thenReturn(generateServiceState(nowMillis + 2000, 1000,
+                                         makeRecordIndexPosition(3L)))
+        .thenReturn(generateServiceState(nowMillis + 3000, 2000, null))
+        .thenReturn(generateServiceState(nowMillis + 4000, 3000, null));
+
+    setUpCounters(3);
+    setUpMetrics(2);
+    setUpProgress(makeRecordIndexProgress(1L));
+    progressUpdater.startReportingProgress();
+    // The initial update should be sent after
+    // leaseRemainingTime (1000) / 2 = 500.
+    verify(workUnitClient, timeout(600)).reportWorkItemStatus(argThat(
+        new ExpectedDataflowProgress()
+        .withCounters(3)
+        .withMetrics(2)
+        .withProgress(makeRecordIndexProgress(1L))));
+
+    setUpCounters(5);
+    setUpMetrics(6);
+    setUpProgress(makeRecordIndexProgress(2L));
+    // The second update should be sent after one second (2000 / 2).
+    verify(workUnitClient, timeout(1100)).reportWorkItemStatus(argThat(
+        new ExpectedDataflowProgress()
+        .withCounters(5)
+        .withMetrics(6)
+        .withProgress(makeRecordIndexProgress(2L))
+        .withStopPosition(makeRecordIndexPosition(3L))));
+
+    // After the request is sent, reset stop position cache to null.
+    assertNull(progressUpdater.getStopPosition());
+
+    setUpProgress(makeRecordIndexProgress(3L));
+
+    // The third update should be sent after one and half seconds (3000 / 2).
+    verify(workUnitClient, timeout(1600)).reportWorkItemStatus(argThat(
+        new ExpectedDataflowProgress()
+        .withProgress(makeRecordIndexProgress(3L))));
+
+    progressUpdater.stopReportingProgress();
+  }
+
+  // Verifies that a last update is sent when there is an unacknowledged split request.
+  @Test(timeout = 3000)
+  public void workProgressUpdaterLastUpdate() throws Exception {
+    when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
+        .thenReturn(generateServiceState(nowMillis + 2000, 1000,
+                                         makeRecordIndexPosition(2L)))
+        .thenReturn(generateServiceState(nowMillis + 3000, 2000, null));
+
+    setUpProgress(makeRecordIndexProgress(1L));
+    progressUpdater.startReportingProgress();
+    // The initial update should be sent after leaseRemainingTime / 2 = 500 msec.
+    Thread.sleep(600);
+    verify(workUnitClient, timeout(200)).reportWorkItemStatus(argThat(
+        new ExpectedDataflowProgress()
+        .withProgress(makeRecordIndexProgress(1L))));
+
+    // The first update should include the new actual stop position.
+    // Verify that the progressUpdater has recorded it.
+    assertEquals(makeRecordIndexPosition(2L),
+        sourcePositionToCloudPosition(progressUpdater.getStopPosition()));
+
+    setUpProgress(makeRecordIndexProgress(2L));
+    // The second update should be sent after one second (2000 / 2).
+    Thread.sleep(200);  // not enough time for an update so the latest stop position is not
+                        // acknowledged.
+    // Check that the progressUpdater still has a pending stop position to send
+    assertEquals(makeRecordIndexPosition(2L),
+        sourcePositionToCloudPosition(progressUpdater.getStopPosition()));
+
+    progressUpdater.stopReportingProgress(); // should send the last update
+    // check that the progressUpdater is done with reporting its latest stop position
+    assertNull(progressUpdater.getStopPosition());
+
+    // Verify that the last update contained the latest stop position
+    verify(workUnitClient, timeout(1000)).reportWorkItemStatus(argThat(
+        new ExpectedDataflowProgress()
+        .withStopPosition(makeRecordIndexPosition(2L))));
+  }
+
+  private void setUpCounters(int n) {
+    counters.clear();
+    for (int i = 0; i < n; i++) {
+      counters.add(makeCounter(i));
+    }
+  }
+
+  private static Counter<?> makeCounter(int i) {
+    if (i % 3 == 0) {
+      return Counter.longs(COUNTER_NAME + i, COUNTER_KINDS[0])
+          .addValue(COUNTER_VALUE1 + i).addValue(COUNTER_VALUE1 + i * 2);
+    } else if (i % 3 == 1) {
+      return Counter.doubles(COUNTER_NAME + i, COUNTER_KINDS[1])
+          .addValue(COUNTER_VALUE2 + i).addValue(COUNTER_VALUE2 + i * 3);
+    } else {
+      return Counter.strings(COUNTER_NAME + i, COUNTER_KINDS[2])
+          .addValue(COUNTER_VALUE3 + i).addValue(COUNTER_NAME + i * 5);
+    }
+  }
+
+  private static Metric<?> makeMetric(int i) {
+    return new DoubleMetric(String.valueOf(i), (double) i);
+  }
+
+  private void setUpMetrics(int n) {
+    metrics = new ArrayList<>();
+    for (int i = 0; i < n; i++) {
+      metrics.add(makeMetric(i));
+    }
+  }
+
+  private void setUpProgress(ApproximateProgress progress) {
+    worker.setWorkerProgress(progress);
+  }
+
+  private com.google.api.services.dataflow.model.Position makeRecordIndexPosition(Long index) {
+    com.google.api.services.dataflow.model.Position position =
+        new com.google.api.services.dataflow.model.Position();
+    position.setRecordIndex(index);
+    return position;
+  }
+
+  private ApproximateProgress makeRecordIndexProgress(Long index) {
+    return new ApproximateProgress().setPosition(makeRecordIndexPosition(index));
+  }
+
+  private WorkItemServiceState generateServiceState(
+      long leaseExpirationTimestamp, int progressReportIntervalMs,
+      Position suggestedStopPosition)
+      throws IOException {
+    WorkItemServiceState responseState = new WorkItemServiceState();
+    responseState.setFactory(Transport.getJsonFactory());
+    responseState.setLeaseExpireTime(toCloudTime(new Instant(leaseExpirationTimestamp)));
+    responseState.setReportStatusInterval(
+        toCloudDuration(Duration.millis(progressReportIntervalMs)));
+
+    if (suggestedStopPosition != null) {
+      responseState.setSuggestedStopPosition(suggestedStopPosition);
+    }
+
+    return responseState;
+  }
+
+  private static final class ExpectedDataflowProgress extends ArgumentMatcher<WorkItemStatus> {
+    @Nullable Integer counterCount;
+    @Nullable Integer metricCount;
+    @Nullable ApproximateProgress expectedProgress;
+    @Nullable Position expectedStopPosition;
+
+    public ExpectedDataflowProgress withCounters(Integer counterCount) {
+      this.counterCount = counterCount;
+      return this;
+    }
+
+    public ExpectedDataflowProgress withMetrics(Integer metricCount) {
+      this.metricCount = metricCount;
+      return this;
+    }
+
+    public ExpectedDataflowProgress withProgress(ApproximateProgress expectedProgress) {
+      this.expectedProgress = expectedProgress;
+      return this;
+    }
+
+    public ExpectedDataflowProgress withStopPosition(Position expectedStopPosition) {
+      this.expectedStopPosition = expectedStopPosition;
+      return this;
+    }
+
+    @Override
+    public void describeTo(Description description) {
+      List<String> values = new ArrayList<>();
+      if (this.counterCount != null) {
+        for (int i = 0; i < counterCount; i++) {
+          values.add(extractCounter(makeCounter(i), false).toString());
+        }
+      }
+      if (this.metricCount != null) {
+        for (int i = 0; i < metricCount; i++) {
+          values.add(extractCloudMetric(makeMetric(i), WORKER_ID).toString());
+        }
+      }
+      if (this.expectedProgress != null) {
+        values.add("progress " + this.expectedProgress);
+      }
+      if (this.expectedStopPosition != null) {
+        values.add("stop position " + this.expectedStopPosition);
+      } else {
+        values.add("no stop position present");
+      }
+      description.appendValueList("Dataflow progress with ", ", ", ".", values);
+    }
+
+    @Override
+    public boolean matches(Object status) {
+      WorkItemStatus st = (WorkItemStatus) status;
+      return matchCountersAndMetrics(st)
+          && matchProgress(st)
+          && matchStopPosition(st);
+    }
+
+    private boolean matchCountersAndMetrics(WorkItemStatus status) {
+      if (counterCount == null && metricCount == null) {
+        return true;
+      }
+
+      List<MetricUpdate> sentUpdates = status.getMetricUpdates();
+
+      if (counterCount + metricCount != sentUpdates.size()) {
+        return false;
+      }
+
+      for (int i = 0; i < counterCount; i++) {
+        if (!sentUpdates.contains(
+            CounterTestUtils.extractCounterUpdate(makeCounter(i), false))) {
+          return false;
+        }
+      }
+
+      for (int i = 0; i < metricCount; i++) {
+        if (!sentUpdates.contains(extractCloudMetric(makeMetric(i), WORKER_ID))) {
+          return false;
+        }
+      }
+
+      return true;
+    }
+
+    private boolean matchProgress(WorkItemStatus status) {
+      if (expectedProgress == null) {
+        return true;
+      }
+      ApproximateProgress progress = status.getProgress();
+      return expectedProgress.equals(progress);
+    }
+
+    private boolean matchStopPosition(WorkItemStatus status) {
+      Position actualStopPosition = status.getStopPosition();
+      if (expectedStopPosition == null) {
+        return actualStopPosition == null;
+      }
+      return expectedStopPosition.equals(actualStopPosition);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
new file mode 100644
index 0000000000000..d1d369fe99acd
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Mockito.doCallRealMethod;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+import com.google.api.client.http.LowLevelHttpResponse;
+import com.google.api.client.json.Json;
+import com.google.api.client.testing.http.MockHttpTransport;
+import com.google.api.client.testing.http.MockLowLevelHttpRequest;
+import com.google.api.client.testing.http.MockLowLevelHttpResponse;
+import com.google.api.services.dataflow.Dataflow;
+import com.google.api.services.dataflow.model.LeaseWorkItemRequest;
+import com.google.api.services.dataflow.model.LeaseWorkItemResponse;
+import com.google.api.services.dataflow.model.WorkItem;
+import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.testing.RestoreMappedDiagnosticContext;
+import com.google.cloud.dataflow.sdk.testing.RestoreSystemProperties;
+import com.google.cloud.dataflow.sdk.util.TestCredential;
+import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
+
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.rules.TestRule;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+import org.slf4j.MDC;
+
+import java.io.IOException;
+
+/** Unit tests for {@link DataflowWorkerHarness}. */
+@RunWith(JUnit4.class)
+public class DataflowWorkerHarnessTest {
+  @Rule public TestRule restoreSystemProperties = new RestoreSystemProperties();
+  @Rule public TestRule restoreMDC = new RestoreMappedDiagnosticContext();
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+  @Mock private MockHttpTransport transport;
+  @Mock private MockLowLevelHttpRequest request;
+  @Mock private DataflowWorker mockDataflowWorker;
+
+  private Dataflow service;
+
+  @Before
+  public void setUp() throws Exception {
+    MockitoAnnotations.initMocks(this);
+    when(transport.buildRequest(anyString(), anyString())).thenReturn(request);
+    doCallRealMethod().when(request).getContentAsString();
+
+    service = new Dataflow(transport, Transport.getJsonFactory(), null);
+  }
+
+  @Test
+  public void testThatWeOnlyProcessWorkOnce() throws Exception {
+    when(mockDataflowWorker.getAndPerformWork()).thenReturn(true);
+    DataflowWorkerHarness.processWork(mockDataflowWorker);
+    verify(mockDataflowWorker).getAndPerformWork();
+    verifyNoMoreInteractions(mockDataflowWorker);
+  }
+
+  @Test
+  public void testThatWeOnlyProcessWorkOnceEvenWhenFailing() throws Exception {
+    when(mockDataflowWorker.getAndPerformWork()).thenReturn(false);
+    DataflowWorkerHarness.processWork(mockDataflowWorker);
+    verify(mockDataflowWorker).getAndPerformWork();
+    verifyNoMoreInteractions(mockDataflowWorker);
+  }
+
+  @Test
+  public void testCreationOfWorkerHarness() throws Exception {
+    System.getProperties().putAll(ImmutableMap
+        .<String, String>builder()
+        .put("project_id", "projectId")
+        .put("job_id", "jobId")
+        .put("worker_id", "workerId")
+        .build());
+    DataflowWorkerHarnessOptions options = PipelineOptionsFactory.createFromSystemProperties();
+    options.setGcpCredential(new TestCredential());
+    assertNotNull(DataflowWorkerHarness.create(options));
+    assertEquals("jobId", MDC.get("dataflow.jobId"));
+    assertEquals("workerId", MDC.get("dataflow.workerId"));
+  }
+
+  @Test
+  public void testCloudServiceCall() throws Exception {
+    System.getProperties().putAll(ImmutableMap
+        .<String, String>builder()
+        .put("project_id", "projectId")
+        .put("job_id", "jobId")
+        .put("worker_id", "workerId")
+        .build());
+    WorkItem workItem = createWorkItem("projectId", "jobId");
+
+    when(request.execute()).thenReturn(generateMockResponse(workItem));
+
+    DataflowWorkerHarnessOptions options = PipelineOptionsFactory.createFromSystemProperties();
+
+    DataflowWorker.WorkUnitClient client =
+        new DataflowWorkerHarness.DataflowWorkUnitClient(service, options);
+
+    assertEquals(workItem, client.getWorkItem());
+
+    LeaseWorkItemRequest actualRequest = Transport.getJsonFactory().fromString(
+        request.getContentAsString(), LeaseWorkItemRequest.class);
+    assertEquals("workerId", actualRequest.getWorkerId());
+    assertEquals(ImmutableList.<String>of("workerId", "remote_source", "custom_source"),
+        actualRequest.getWorkerCapabilities());
+    assertEquals(ImmutableList.<String>of("map_task", "seq_map_task", "remote_source_task"),
+        actualRequest.getWorkItemTypes());
+    assertEquals("1234", MDC.get("dataflow.workId"));
+  }
+
+  @Test
+  public void testCloudServiceCallNoWorkId() throws Exception {
+    System.getProperties().putAll(ImmutableMap
+        .<String, String>builder()
+        .put("project_id", "projectId")
+        .put("job_id", "jobId")
+        .put("worker_id", "workerId")
+        .build());
+
+    // If there's no work the service should return an empty work item.
+    WorkItem workItem = new WorkItem();
+
+    when(request.execute()).thenReturn(generateMockResponse(workItem));
+
+    DataflowWorkerHarnessOptions options = PipelineOptionsFactory.createFromSystemProperties();
+
+    DataflowWorker.WorkUnitClient client =
+        new DataflowWorkerHarness.DataflowWorkUnitClient(service, options);
+
+    assertNull(client.getWorkItem());
+
+    LeaseWorkItemRequest actualRequest = Transport.getJsonFactory().fromString(
+        request.getContentAsString(), LeaseWorkItemRequest.class);
+    assertEquals("workerId", actualRequest.getWorkerId());
+    assertEquals(ImmutableList.<String>of("workerId", "remote_source", "custom_source"),
+        actualRequest.getWorkerCapabilities());
+    assertEquals(ImmutableList.<String>of("map_task", "seq_map_task",  "remote_source_task"),
+        actualRequest.getWorkItemTypes());
+  }
+
+  @Test
+  public void testCloudServiceCallNoWorkItem() throws Exception {
+    System.getProperties().putAll(ImmutableMap
+        .<String, String>builder()
+        .put("project_id", "projectId")
+        .put("job_id", "jobId")
+        .put("worker_id", "workerId")
+        .build());
+
+    when(request.execute()).thenReturn(generateMockResponse());
+
+    DataflowWorkerHarnessOptions options = PipelineOptionsFactory.createFromSystemProperties();
+
+    DataflowWorker.WorkUnitClient client =
+        new DataflowWorkerHarness.DataflowWorkUnitClient(service, options);
+
+    assertNull(client.getWorkItem());
+
+    LeaseWorkItemRequest actualRequest = Transport.getJsonFactory().fromString(
+        request.getContentAsString(), LeaseWorkItemRequest.class);
+    assertEquals("workerId", actualRequest.getWorkerId());
+    assertEquals(ImmutableList.<String>of("workerId", "remote_source", "custom_source"),
+        actualRequest.getWorkerCapabilities());
+    assertEquals(ImmutableList.<String>of("map_task", "seq_map_task",  "remote_source_task"),
+        actualRequest.getWorkItemTypes());
+  }
+
+  @Test
+  public void testCloudServiceCallMultipleWorkItems() throws Exception {
+    expectedException.expect(IOException.class);
+    expectedException.expectMessage(
+        "This version of the SDK expects no more than one work item from the service");
+    System.getProperties().putAll(ImmutableMap
+        .<String, String>builder()
+        .put("project_id", "projectId")
+        .put("job_id", "jobId")
+        .put("worker_id", "workerId")
+        .build());
+
+    WorkItem workItem1 = createWorkItem("projectId", "jobId");
+    WorkItem workItem2 = createWorkItem("projectId", "jobId");
+
+    when(request.execute()).thenReturn(generateMockResponse(workItem1, workItem2));
+
+    DataflowWorkerHarnessOptions options = PipelineOptionsFactory.createFromSystemProperties();
+
+    DataflowWorker.WorkUnitClient client =
+        new DataflowWorkerHarness.DataflowWorkUnitClient(service, options);
+
+    client.getWorkItem();
+  }
+
+  private LowLevelHttpResponse generateMockResponse(WorkItem ... workItems) throws Exception {
+    MockLowLevelHttpResponse response = new MockLowLevelHttpResponse();
+    response.setContentType(Json.MEDIA_TYPE);
+    LeaseWorkItemResponse lease = new LeaseWorkItemResponse();
+    lease.setWorkItems(Lists.newArrayList(workItems));
+    // N.B. Setting the factory is necessary in order to get valid JSON.
+    lease.setFactory(Transport.getJsonFactory());
+    response.setContent(lease.toPrettyString());
+    return response;
+  }
+
+  private WorkItem createWorkItem(String projectId, String jobId) {
+    WorkItem workItem = new WorkItem();
+    workItem.setFactory(Transport.getJsonFactory());
+    workItem.setProjectId(projectId);
+    workItem.setJobId(jobId);
+
+    // We need to set a work id because otherwise the client will treat the response as
+    // indicating no work is available.
+    workItem.setId(1234L);
+    return workItem;
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
new file mode 100644
index 0000000000000..2d51fb2838954
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static org.junit.Assert.assertFalse;
+import static org.mockito.Matchers.argThat;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import com.google.api.services.dataflow.model.WorkItem;
+import com.google.api.services.dataflow.model.WorkItemStatus;
+import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
+import com.google.cloud.dataflow.sdk.testing.FastNanoClockAndSleeper;
+
+import org.hamcrest.Description;
+import org.hamcrest.Matcher;
+import org.hamcrest.TypeSafeMatcher;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+/** Unit tests for {@link DataflowWorker}. */
+@RunWith(JUnit4.class)
+public class DataflowWorkerTest {
+  @Rule
+  public FastNanoClockAndSleeper clockAndSleeper = new FastNanoClockAndSleeper();
+
+  @Mock
+  DataflowWorker.WorkUnitClient mockWorkUnitClient;
+
+  @Mock
+  DataflowWorkerHarnessOptions options;
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+  }
+
+  @Test
+  public void testWhenNoWorkThatWeReturnFalse() throws Exception {
+    DataflowWorker worker = new DataflowWorker(mockWorkUnitClient, options);
+    when(mockWorkUnitClient.getWorkItem()).thenReturn(null);
+
+    assertFalse(worker.getAndPerformWork());
+  }
+
+  @Test
+  public void testWhenProcessingWorkUnitFailsWeReportStatus() throws Exception {
+    DataflowWorker worker = new DataflowWorker(mockWorkUnitClient, options);
+    when(mockWorkUnitClient.getWorkItem()).thenReturn(new WorkItem().setId(1L)).thenReturn(null);
+
+    assertFalse(worker.getAndPerformWork());
+    verify(mockWorkUnitClient).reportWorkItemStatus(argThat(cloudWorkHasErrors()));
+  }
+
+  private Matcher<WorkItemStatus> cloudWorkHasErrors() {
+    return new TypeSafeMatcher<WorkItemStatus>() {
+      @Override
+      public void describeTo(Description description) {
+        description.appendText("WorkItemStatus expected to have errors");
+      }
+
+      @Override
+      protected boolean matchesSafely(WorkItemStatus status) {
+        return status.getCompleted() && !status.getErrors().isEmpty();
+      }
+    };
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceTest.java
new file mode 100644
index 0000000000000..b41bd1b2e2914
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceTest.java
@@ -0,0 +1,499 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.api.client.util.Base64.encodeBase64URLSafeString;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudDuration;
+
+import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.Position;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.InstantCoder;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.Reiterable;
+import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source.SourceIterator;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.collect.Lists;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * Tests for GroupingShuffleSource.
+ */
+@RunWith(JUnit4.class)
+public class GroupingShuffleSourceTest {
+  static final List<KV<Integer, List<String>>> NO_KVS = Collections.emptyList();
+
+  static final Instant timestamp = new Instant(123000);
+  static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
+
+  static final List<KV<Integer, List<String>>> KVS = Arrays.asList(
+      KV.of(1, Arrays.asList("in 1a", "in 1b")),
+      KV.of(2, Arrays.asList("in 2a", "in 2b")),
+      KV.of(3, Arrays.asList("in 3")),
+      KV.of(4, Arrays.asList("in 4a", "in 4b", "in 4c", "in 4d")),
+      KV.of(5, Arrays.asList("in 5")));
+
+  /** How many of the values with each key are to be read. */
+  enum ValuesToRead {
+    /** Don't even ask for the values iterator. */
+    SKIP_VALUES,
+    /** Get the iterator, but don't read any values. */
+    READ_NO_VALUES,
+    /** Read just the first value. */
+    READ_ONE_VALUE,
+    /** Read all the values. */
+    READ_ALL_VALUES
+  }
+
+  void runTestReadShuffleSource(List<KV<Integer, List<String>>> input,
+                                ValuesToRead valuesToRead)
+      throws Exception {
+    Coder<WindowedValue<String>> elemCoder =
+        WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder());
+    BatchModeExecutionContext context = new BatchModeExecutionContext();
+    GroupingShuffleSource<Integer, WindowedValue<String>> shuffleSource =
+        new GroupingShuffleSource<>(
+            PipelineOptionsFactory.create(),
+            null, null, null,
+            WindowedValue.getFullCoder(
+                KvCoder.of(
+                    BigEndianIntegerCoder.of(),
+                    IterableCoder.of(
+                        WindowedValue.getFullCoder(StringUtf8Coder.of(),
+                        IntervalWindow.getCoder()))),
+                IntervalWindow.getCoder()),
+            context);
+    ExecutorTestUtils.TestSourceObserver observer =
+        new ExecutorTestUtils.TestSourceObserver(shuffleSource);
+
+    TestShuffleReader shuffleReader = new TestShuffleReader();
+    List<Integer> expectedSizes = new ArrayList<>();
+    for (KV<Integer, List<String>> kvs : input) {
+      Integer key = kvs.getKey();
+      byte[] keyByte = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), key);
+
+      for (String value : kvs.getValue()) {
+        byte[] valueByte = CoderUtils.encodeToByteArray(
+            elemCoder, WindowedValue.of(value, timestamp, Lists.newArrayList(window)));
+        byte[] skey =  CoderUtils.encodeToByteArray(InstantCoder.of(), timestamp);
+        ShuffleEntry shuffleEntry = new ShuffleEntry(keyByte, skey, valueByte);
+        shuffleReader.addEntry(shuffleEntry);
+        expectedSizes.add(shuffleEntry.length());
+      }
+    }
+
+    List<KV<Integer, List<WindowedValue<String>>>> actual = new ArrayList<>();
+    try (SourceIterator<WindowedValue<KV<Integer, Reiterable<WindowedValue<String>>>>> iter =
+             shuffleSource.iterator(shuffleReader)) {
+      Iterable<WindowedValue<String>> prevValuesIterable = null;
+      Iterator<WindowedValue<String>> prevValuesIterator = null;
+      while (iter.hasNext()) {
+        Assert.assertTrue(iter.hasNext());
+        Assert.assertTrue(iter.hasNext());
+
+        KV<Integer, Reiterable<WindowedValue<String>>> elem = iter.next().getValue();
+        Integer key = elem.getKey();
+        List<WindowedValue<String>> values = new ArrayList<>();
+        if (valuesToRead.ordinal() > ValuesToRead.SKIP_VALUES.ordinal()) {
+          if (prevValuesIterable != null) {
+            prevValuesIterable.iterator();  // Verifies that this does not throw.
+          }
+          if (prevValuesIterator != null) {
+            prevValuesIterator.hasNext();  // Verifies that this does not throw.
+          }
+
+          Iterable<WindowedValue<String>> valuesIterable = elem.getValue();
+          Iterator<WindowedValue<String>> valuesIterator = valuesIterable.iterator();
+
+          if (valuesToRead.ordinal() >= ValuesToRead.READ_ONE_VALUE.ordinal()) {
+            while (valuesIterator.hasNext()) {
+              Assert.assertTrue(valuesIterator.hasNext());
+              Assert.assertTrue(valuesIterator.hasNext());
+              Assert.assertEquals("BatchModeExecutionContext key",
+                                  key, context.getKey());
+              values.add(valuesIterator.next());
+              if (valuesToRead == ValuesToRead.READ_ONE_VALUE) {
+                break;
+              }
+            }
+            if (valuesToRead == ValuesToRead.READ_ALL_VALUES) {
+              Assert.assertFalse(valuesIterator.hasNext());
+              Assert.assertFalse(valuesIterator.hasNext());
+
+              try {
+                valuesIterator.next();
+                Assert.fail("Expected NoSuchElementException");
+              } catch (NoSuchElementException exn) {
+                // As expected.
+              }
+              valuesIterable.iterator();  // Verifies that this does not throw.
+            }
+          }
+
+          prevValuesIterable = valuesIterable;
+          prevValuesIterator = valuesIterator;
+        }
+
+        actual.add(KV.of(key, values));
+      }
+      Assert.assertFalse(iter.hasNext());
+      Assert.assertFalse(iter.hasNext());
+      try {
+        iter.next();
+        Assert.fail("Expected NoSuchElementException");
+      } catch (NoSuchElementException exn) {
+        // As expected.
+      }
+    }
+
+    List<KV<Integer, List<WindowedValue<String>>>> expected = new ArrayList<>();
+    for (KV<Integer, List<String>> kvs : input) {
+      Integer key = kvs.getKey();
+      List<WindowedValue<String>> values = new ArrayList<>();
+      if (valuesToRead.ordinal() >= ValuesToRead.READ_ONE_VALUE.ordinal()) {
+        for (String value : kvs.getValue()) {
+          values.add(WindowedValue.of(value, timestamp, Lists.newArrayList(window)));
+          if (valuesToRead == ValuesToRead.READ_ONE_VALUE) {
+            break;
+          }
+        }
+      }
+      expected.add(KV.of(key, values));
+    }
+    Assert.assertEquals(expected, actual);
+    Assert.assertEquals(expectedSizes, observer.getActualSizes());
+  }
+
+  @Test
+  public void testReadEmptyShuffleSource() throws Exception {
+    runTestReadShuffleSource(NO_KVS, ValuesToRead.READ_ALL_VALUES);
+  }
+
+  @Test
+  public void testReadEmptyShuffleSourceSkippingValues() throws Exception {
+    runTestReadShuffleSource(NO_KVS, ValuesToRead.SKIP_VALUES);
+  }
+
+  @Test
+  public void testReadNonEmptyShuffleSource() throws Exception {
+    runTestReadShuffleSource(KVS, ValuesToRead.READ_ALL_VALUES);
+  }
+
+  @Test
+  public void testReadNonEmptyShuffleSourceReadingOneValue() throws Exception {
+    runTestReadShuffleSource(KVS, ValuesToRead.READ_ONE_VALUE);
+  }
+
+  @Test
+  public void testReadNonEmptyShuffleSourceReadingNoValues() throws Exception {
+    runTestReadShuffleSource(KVS, ValuesToRead.READ_NO_VALUES);
+  }
+
+  @Test
+  public void testReadNonEmptyShuffleSourceSkippingValues() throws Exception {
+    runTestReadShuffleSource(KVS, ValuesToRead.SKIP_VALUES);
+  }
+
+  static byte[] fabricatePosition(int shard, byte[] key) throws Exception {
+    ByteArrayOutputStream os = new ByteArrayOutputStream();
+    DataOutputStream dos = new DataOutputStream(os);
+    dos.writeInt(shard);
+    if (key != null) {
+      dos.writeInt(Arrays.hashCode(key));
+    }
+    return os.toByteArray();
+  }
+
+  @Test
+  public void testReadFromEmptyShuffleSourceAndUpdateStopPosition()
+      throws Exception {
+    BatchModeExecutionContext context = new BatchModeExecutionContext();
+    GroupingShuffleSource<Integer, Integer> shuffleSource =
+        new GroupingShuffleSource<>(
+            PipelineOptionsFactory.create(),
+            null, null, null,
+            WindowedValue.getFullCoder(
+                KvCoder.of(
+                    BigEndianIntegerCoder.of(),
+                    IterableCoder.of(BigEndianIntegerCoder.of())),
+                IntervalWindow.getCoder()),
+            context);
+    TestShuffleReader shuffleReader = new TestShuffleReader();
+    try (Source.SourceIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
+        shuffleSource.iterator(shuffleReader)) {
+
+      Position proposedStopPosition = new Position();
+      String stop = encodeBase64URLSafeString(fabricatePosition(0, null));
+      proposedStopPosition.setShufflePosition(stop);
+
+      // Cannot update stop position since all input was consumed.
+      Assert.assertEquals(null, iter.updateStopPosition(
+          cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
+      }
+  }
+
+  @Test
+  public void testReadFromShuffleSourceAndFailToUpdateStopPosition()
+      throws Exception {
+    BatchModeExecutionContext context = new BatchModeExecutionContext();
+    final int kFirstShard = 0;
+
+    TestShuffleReader shuffleReader = new TestShuffleReader();
+    final int kNumRecords = 2;
+    for (int i = 0; i < kNumRecords; ++i) {
+      byte[] key = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
+      shuffleReader.addEntry(new ShuffleEntry(
+          fabricatePosition(kFirstShard, key), key, null, key));
+    }
+
+    // Note that TestShuffleReader start/end positions are in the
+    // space of keys not the positions (TODO: should probably always
+    // use positions instead).
+    String stop = encodeBase64URLSafeString(
+        fabricatePosition(kNumRecords, null));
+    GroupingShuffleSource<Integer, Integer> shuffleSource =
+        new GroupingShuffleSource<>(
+            PipelineOptionsFactory.create(),
+            null, null, stop,
+            WindowedValue.getFullCoder(
+                KvCoder.of(
+                    BigEndianIntegerCoder.of(),
+                    IterableCoder.of(BigEndianIntegerCoder.of())),
+                IntervalWindow.getCoder()),
+            context);
+
+    try (Source.SourceIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
+        shuffleSource.iterator(shuffleReader)) {
+
+      Position proposedStopPosition = new Position();
+      proposedStopPosition.setShufflePosition(
+          encodeBase64URLSafeString(fabricatePosition(kNumRecords + 1, null)));
+
+      // Cannot update the stop position since the value provided is
+      // past the current stop position.
+      Assert.assertEquals(null, iter.updateStopPosition(
+          cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
+
+      int i = 0;
+      for (; iter.hasNext(); ++i) {
+        KV<Integer, Reiterable<Integer>> elem = iter.next().getValue();
+        if (i == 0) {
+          // First record
+          byte[] key = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
+          proposedStopPosition.setShufflePosition(
+              encodeBase64URLSafeString(fabricatePosition(kFirstShard, key)));
+          // Cannot update stop position since it is identical with
+          // the position of the record that was just returned.
+          Assert.assertEquals(null, iter.updateStopPosition(
+              cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
+
+          proposedStopPosition.setShufflePosition(
+              encodeBase64URLSafeString(fabricatePosition(kFirstShard, null)));
+          // Cannot update stop position since it comes before current position
+          Assert.assertEquals(null, iter.updateStopPosition(
+              cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
+        }
+      }
+      Assert.assertEquals(kNumRecords, i);
+
+      proposedStopPosition.setShufflePosition(
+          encodeBase64URLSafeString(fabricatePosition(kFirstShard, null)));
+      // Cannot update stop position since all input was consumed.
+      Assert.assertEquals(null, iter.updateStopPosition(
+          cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
+    }
+  }
+
+  @Test
+  public void testReadFromShuffleSourceAndUpdateStopPosition()
+      throws Exception {
+    BatchModeExecutionContext context = new BatchModeExecutionContext();
+    GroupingShuffleSource<Integer, Integer> shuffleSource =
+        new GroupingShuffleSource<>(
+            PipelineOptionsFactory.create(),
+            null, null, null,
+            WindowedValue.getFullCoder(
+                KvCoder.of(
+                    BigEndianIntegerCoder.of(),
+                    IterableCoder.of(BigEndianIntegerCoder.of())),
+                IntervalWindow.getCoder()),
+            context);
+
+    TestShuffleReader shuffleReader = new TestShuffleReader();
+    final int kNumRecords = 10;
+    final int kFirstShard = 0;
+    final int kSecondShard = 1;
+
+    // Setting up two shards with kNumRecords each; keys are unique
+    // (hence groups of values for the same key are singletons)
+    // therefore each record comes with a unique position constructed.
+    for (int i = 0; i < kNumRecords; ++i) {
+      byte[] keyByte = CoderUtils.encodeToByteArray(
+          BigEndianIntegerCoder.of(), i);
+      ShuffleEntry entry = new ShuffleEntry(
+          fabricatePosition(kFirstShard, keyByte), keyByte, null, keyByte);
+      shuffleReader.addEntry(entry);
+    }
+
+    for (int i = kNumRecords; i < 2 * kNumRecords; ++i) {
+      byte[] keyByte = CoderUtils.encodeToByteArray(
+          BigEndianIntegerCoder.of(), i);
+
+      ShuffleEntry entry = new ShuffleEntry(
+          fabricatePosition(kSecondShard, keyByte), keyByte, null, keyByte);
+      shuffleReader.addEntry(entry);
+    }
+
+    int i = 0;
+    try (Source.SourceIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
+        shuffleSource.iterator(shuffleReader)) {
+
+      Position proposedStopPosition = new Position();
+
+      Assert.assertNull(iter.updateStopPosition(
+          cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
+
+      // Stop at the shard boundary
+      String stop = encodeBase64URLSafeString(fabricatePosition(kSecondShard, null));
+      proposedStopPosition.setShufflePosition(stop);
+
+      Assert.assertEquals(
+          stop,
+          sourcePositionToCloudPosition(
+              iter.updateStopPosition(
+                  cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))))
+          .getShufflePosition());
+
+      while (iter.hasNext()) {
+        Assert.assertTrue(iter.hasNext());
+        Assert.assertTrue(iter.hasNext());
+
+        KV<Integer, Reiterable<Integer>> elem = iter.next().getValue();
+        int key = elem.getKey();
+        Assert.assertEquals(key, i);
+
+        Iterable<Integer> valuesIterable = elem.getValue();
+        Iterator<Integer> valuesIterator = valuesIterable.iterator();
+
+        int j = 0;
+        while (valuesIterator.hasNext()) {
+          Assert.assertTrue(valuesIterator.hasNext());
+          Assert.assertTrue(valuesIterator.hasNext());
+
+          int value = valuesIterator.next();
+          Assert.assertEquals(value, i);
+          ++j;
+        }
+        Assert.assertEquals(j, 1);
+        ++i;
+      }
+
+      ApproximateProgress progress =
+          sourceProgressToCloudProgress(iter.getProgress());
+      Assert.assertEquals(stop, progress.getPosition().getShufflePosition());
+    }
+    Assert.assertEquals(i, kNumRecords);
+  }
+
+  @Test
+  public void testGetApproximateProgress() throws Exception {
+    // Store the positions of all KVs returned.
+    List<byte[]> positionsList = new ArrayList<byte[]>();
+
+    BatchModeExecutionContext context = new BatchModeExecutionContext();
+    GroupingShuffleSource<Integer, Integer> shuffleSource =
+        new GroupingShuffleSource<>(
+            PipelineOptionsFactory.create(),
+            null, null, null,
+            WindowedValue.getFullCoder(
+                KvCoder.of(
+                    BigEndianIntegerCoder.of(),
+                    IterableCoder.of(BigEndianIntegerCoder.of())),
+                IntervalWindow.getCoder()),
+            context);
+
+    TestShuffleReader shuffleReader = new TestShuffleReader();
+    final int kNumRecords = 10;
+
+    for (int i = 0; i < kNumRecords; ++i) {
+      byte[] position = fabricatePosition(i, null);
+      byte[] keyByte = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
+      positionsList.add(position);
+      ShuffleEntry entry = new ShuffleEntry(position, keyByte, null, keyByte);
+      shuffleReader.addEntry(entry);
+    }
+
+    try (Source.SourceIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> sourceIterator =
+        shuffleSource.iterator(shuffleReader)) {
+      Integer i = 0;
+      while (sourceIterator.hasNext()) {
+        Assert.assertTrue(sourceIterator.hasNext());
+        ApproximateProgress progress = sourceProgressToCloudProgress(sourceIterator.getProgress());
+        Assert.assertNotNull(progress.getPosition().getShufflePosition());
+
+        // Compare returned position with the expected position.
+        Assert.assertEquals(ByteArrayShufflePosition.of(positionsList.get(i)).encodeBase64(),
+            progress.getPosition().getShufflePosition());
+
+        WindowedValue<KV<Integer, Reiterable<Integer>>> elem = sourceIterator.next();
+        Assert.assertEquals(i, elem.getValue().getKey());
+        i++;
+      }
+      Assert.assertFalse(sourceIterator.hasNext());
+
+      ApproximateProgress finalProgress =
+          sourceProgressToCloudProgress(sourceIterator.getProgress());
+      Assert.assertEquals(1.0,
+          (float) finalProgress.getPercentComplete(), 0.000000001);
+      Assert.assertEquals(Duration.ZERO, fromCloudDuration(finalProgress.getRemainingTime()));
+    }
+  }
+
+  private ApproximateProgress createApproximateProgress(
+      com.google.api.services.dataflow.model.Position position) {
+    return new ApproximateProgress().setPosition(position);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceFactoryTest.java
new file mode 100644
index 0000000000000..64cf4f5520217
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceFactoryTest.java
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.runners.worker.InMemorySourceTest.encodedElements;
+import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
+import static com.google.cloud.dataflow.sdk.util.Structs.addStringList;
+
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+import org.hamcrest.core.IsInstanceOf;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests for InMemorySourceFactory.
+ */
+@RunWith(JUnit4.class)
+public class InMemorySourceFactoryTest {
+  static <T> com.google.api.services.dataflow.model.Source createInMemoryCloudSource(
+      List<T> elements,
+      Long start,
+      Long end,
+      Coder<T> coder)
+      throws Exception {
+    List<String> encodedElements = encodedElements(elements, coder);
+
+    CloudObject spec = CloudObject.forClassName("InMemorySource");
+    addStringList(spec, PropertyNames.ELEMENTS, encodedElements);
+
+    if (start != null) {
+      addLong(spec, PropertyNames.START_INDEX, start);
+    }
+    if (end != null) {
+      addLong(spec, PropertyNames.END_INDEX, end);
+    }
+
+    com.google.api.services.dataflow.model.Source cloudSource =
+        new com.google.api.services.dataflow.model.Source();
+    cloudSource.setSpec(spec);
+    cloudSource.setCodec(coder.asCloudObject());
+
+    return cloudSource;
+  }
+
+  <T> void runTestCreateInMemorySource(List<T> elements,
+                                       Long start,
+                                       Long end,
+                                       int expectedStart,
+                                       int expectedEnd,
+                                       Coder<T> coder)
+      throws Exception {
+    com.google.api.services.dataflow.model.Source cloudSource =
+        createInMemoryCloudSource(elements, start, end, coder);
+
+    Source<?> source = SourceFactory.create(PipelineOptionsFactory.create(), cloudSource,
+                                            new BatchModeExecutionContext());
+    Assert.assertThat(source, new IsInstanceOf(InMemorySource.class));
+    InMemorySource inMemorySource = (InMemorySource) source;
+    Assert.assertEquals(encodedElements(elements, coder),
+                        inMemorySource.encodedElements);
+    Assert.assertEquals(expectedStart, inMemorySource.startIndex);
+    Assert.assertEquals(expectedEnd, inMemorySource.endIndex);
+    Assert.assertEquals(coder, inMemorySource.coder);
+  }
+
+  @Test
+  public void testCreatePlainInMemorySource() throws Exception {
+    runTestCreateInMemorySource(
+        Arrays.asList("hi", "there", "bob"),
+        null, null,
+        0, 3,
+        StringUtf8Coder.of());
+  }
+
+  @Test
+  public void testCreateRichInMemorySource() throws Exception {
+    runTestCreateInMemorySource(
+        Arrays.asList(33, 44, 55, 66, 77, 88),
+        1L, 3L,
+        1, 3,
+        BigEndianIntegerCoder.of());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceTest.java
new file mode 100644
index 0000000000000..d7574c517b4e8
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceTest.java
@@ -0,0 +1,236 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
+import static com.google.cloud.dataflow.sdk.util.StringUtils.byteArrayToJsonString;
+
+import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.Position;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests for InMemorySource.
+ */
+@RunWith(JUnit4.class)
+public class InMemorySourceTest {
+  static <T> List<String> encodedElements(List<T> elements, Coder<T> coder)
+      throws Exception {
+    List<String> encodedElements = new ArrayList<>();
+    for (T element : elements) {
+      byte[] encodedElement = encodeToByteArray(coder, element);
+      String encodedElementString = byteArrayToJsonString(encodedElement);
+      encodedElements.add(encodedElementString);
+    }
+    return encodedElements;
+  }
+
+  <T> void runTestReadInMemorySource(List<T> elements,
+                                     Long startIndex,
+                                     Long endIndex,
+                                     List<T> expectedElements,
+                                     List<Integer> expectedSizes,
+                                     Coder<T> coder)
+      throws Exception {
+    InMemorySource<T> inMemorySource = new InMemorySource<>(
+        encodedElements(elements, coder), startIndex, endIndex, coder);
+    ExecutorTestUtils.TestSourceObserver observer =
+        new ExecutorTestUtils.TestSourceObserver(inMemorySource);
+    List<T> actualElements = new ArrayList<>();
+    try (Source.SourceIterator<T> iterator = inMemorySource.iterator()) {
+      for (long i = inMemorySource.startIndex; iterator.hasNext(); i++) {
+        Assert.assertEquals(
+            new ApproximateProgress().setPosition(makeIndexPosition(i)),
+            sourceProgressToCloudProgress(iterator.getProgress()));
+        actualElements.add(iterator.next());
+      }
+    }
+    Assert.assertEquals(expectedElements, actualElements);
+    Assert.assertEquals(expectedSizes, observer.getActualSizes());
+  }
+
+  @Test
+  public void testReadAllElements() throws Exception {
+    runTestReadInMemorySource(Arrays.asList(33, 44, 55, 66, 77, 88),
+                              null,
+                              null,
+                              Arrays.asList(33, 44, 55, 66, 77, 88),
+                              Arrays.asList(4, 4, 4, 4, 4, 4),
+                              BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testReadElementsFromStart() throws Exception {
+    runTestReadInMemorySource(Arrays.asList(33, 44, 55, 66, 77, 88),
+                              2L,
+                              null,
+                              Arrays.asList(55, 66, 77, 88),
+                              Arrays.asList(4, 4, 4, 4),
+                              BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testReadElementsToEnd() throws Exception {
+    runTestReadInMemorySource(Arrays.asList(33, 44, 55, 66, 77, 88),
+                              null,
+                              3L,
+                              Arrays.asList(33, 44, 55),
+                              Arrays.asList(4, 4, 4),
+                              BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testReadElementsFromStartToEnd() throws Exception {
+    runTestReadInMemorySource(Arrays.asList(33, 44, 55, 66, 77, 88),
+                              2L,
+                              5L,
+                              Arrays.asList(55, 66, 77),
+                              Arrays.asList(4, 4, 4),
+                              BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testReadElementsOffEnd() throws Exception {
+    runTestReadInMemorySource(Arrays.asList(33, 44, 55, 66, 77, 88),
+                              null,
+                              30L,
+                              Arrays.asList(33, 44, 55, 66, 77, 88),
+                              Arrays.asList(4, 4, 4, 4, 4, 4),
+                              BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testReadElementsFromStartPastEnd() throws Exception {
+    runTestReadInMemorySource(Arrays.asList(33, 44, 55, 66, 77, 88),
+                              20L,
+                              null,
+                              Arrays.<Integer>asList(),
+                              Arrays.<Integer>asList(),
+                              BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testReadElementsFromStartToEndEmptyRange() throws Exception {
+    runTestReadInMemorySource(Arrays.asList(33, 44, 55, 66, 77, 88),
+                              2L,
+                              2L,
+                              Arrays.<Integer>asList(),
+                              Arrays.<Integer>asList(),
+                              BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testReadNoElements() throws Exception {
+    runTestReadInMemorySource(Arrays.<Integer>asList(),
+                              null,
+                              null,
+                              Arrays.<Integer>asList(),
+                              Arrays.<Integer>asList(),
+                              BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testReadNoElementsFromStartToEndEmptyRange() throws Exception {
+    runTestReadInMemorySource(Arrays.<Integer>asList(),
+                              0L,
+                              0L,
+                              Arrays.<Integer>asList(),
+                              Arrays.<Integer>asList(),
+                              BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testUpdatePosition() throws Exception {
+    List<Integer> elements = Arrays.asList(33, 44, 55, 66, 77, 88);
+    final long start = 1L;
+    final long stop = 3L;
+    final long end = 4L;
+
+    Coder<Integer> coder = BigEndianIntegerCoder.of();
+    InMemorySource<Integer> inMemorySource = new InMemorySource<>(
+        encodedElements(elements, coder), start, end, coder);
+
+    // Illegal proposed stop position.
+    try (Source.SourceIterator<Integer> iterator = inMemorySource.iterator()) {
+      Assert.assertNull(iterator.updateStopPosition(
+          cloudProgressToSourceProgress(new ApproximateProgress())));
+      Assert.assertNull(iterator.updateStopPosition(
+          cloudProgressToSourceProgress(
+              new ApproximateProgress().setPosition(makeIndexPosition(null)))));
+    }
+
+    // Successful update.
+    try (InMemorySource<Integer>.InMemorySourceIterator iterator =
+        (InMemorySource<Integer>.InMemorySourceIterator) inMemorySource.iterator()) {
+      Assert.assertEquals(
+          makeIndexPosition(stop),
+          sourcePositionToCloudPosition(
+              iterator.updateStopPosition(
+                  cloudProgressToSourceProgress(
+                      new ApproximateProgress().setPosition(makeIndexPosition(stop))))));
+      Assert.assertEquals(stop, iterator.endPosition);
+      Assert.assertEquals(44, iterator.next().intValue());
+      Assert.assertEquals(55, iterator.next().intValue());
+      Assert.assertFalse(iterator.hasNext());
+    }
+
+    // Proposed stop position is before the current position, no update.
+    try (InMemorySource<Integer>.InMemorySourceIterator iterator =
+        (InMemorySource<Integer>.InMemorySourceIterator) inMemorySource.iterator()) {
+      Assert.assertEquals(44, iterator.next().intValue());
+      Assert.assertEquals(55, iterator.next().intValue());
+      Assert.assertNull(iterator.updateStopPosition(
+          cloudProgressToSourceProgress(
+              new ApproximateProgress().setPosition(makeIndexPosition(stop)))));
+      Assert.assertEquals((int) end, iterator.endPosition);
+      Assert.assertTrue(iterator.hasNext());
+    }
+
+    // Proposed stop position is after the current stop (end) position, no update.
+    try (InMemorySource<Integer>.InMemorySourceIterator iterator =
+        (InMemorySource<Integer>.InMemorySourceIterator) inMemorySource.iterator()) {
+      Assert.assertNull(
+          iterator.updateStopPosition(
+              cloudProgressToSourceProgress(
+                  new ApproximateProgress().setPosition(makeIndexPosition(end + 1)))));
+      Assert.assertEquals((int) end, iterator.endPosition);
+    }
+  }
+
+  private Position makeIndexPosition(Long index) {
+    Position position = new Position();
+    if (index != null) {
+      position.setRecordIndex(index);
+    }
+    return position;
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
new file mode 100644
index 0000000000000..fae22797ef89c
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -0,0 +1,567 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+import static org.hamcrest.core.IsInstanceOf.instanceOf;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertThat;
+
+import com.google.api.services.dataflow.model.FlattenInstruction;
+import com.google.api.services.dataflow.model.InstructionInput;
+import com.google.api.services.dataflow.model.InstructionOutput;
+import com.google.api.services.dataflow.model.MapTask;
+import com.google.api.services.dataflow.model.ParDoInstruction;
+import com.google.api.services.dataflow.model.ParallelInstruction;
+import com.google.api.services.dataflow.model.PartialGroupByKeyInstruction;
+import com.google.api.services.dataflow.model.ReadInstruction;
+import com.google.api.services.dataflow.model.WriteInstruction;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.worker.SinkFactoryTest.TestSink;
+import com.google.cloud.dataflow.sdk.runners.worker.SinkFactoryTest.TestSinkFactory;
+import com.google.cloud.dataflow.sdk.runners.worker.SourceFactoryTest.TestSource;
+import com.google.cloud.dataflow.sdk.runners.worker.SourceFactoryTest.TestSourceFactory;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.util.StringUtils;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestOperation;
+import com.google.cloud.dataflow.sdk.util.common.worker.FlattenOperation;
+import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
+import com.google.cloud.dataflow.sdk.util.common.worker.Operation;
+import com.google.cloud.dataflow.sdk.util.common.worker.ParDoOperation;
+import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation;
+import com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+import com.google.cloud.dataflow.sdk.util.common.worker.WriteOperation;
+
+import org.hamcrest.CoreMatchers;
+import org.hamcrest.core.IsInstanceOf;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Tests for MapTaskExecutorFactory.
+ */
+@RunWith(JUnit4.class)
+public class MapTaskExecutorFactoryTest {
+  @Test
+  public void testCreateMapTaskExecutor() throws Exception {
+    List<ParallelInstruction> instructions =
+        Arrays.asList(
+            createReadInstruction("Read"),
+            createParDoInstruction(0, 0, "DoFn1"),
+            createParDoInstruction(0, 0, "DoFn2"),
+            createFlattenInstruction(1, 0, 2, 0, "Flatten"),
+            createWriteInstruction(3, 0, "Write"));
+
+    MapTask mapTask = new MapTask();
+    mapTask.setStageName("test");
+    mapTask.setInstructions(instructions);
+
+    CounterSet counterSet = null;
+    try (MapTaskExecutor executor =
+         MapTaskExecutorFactory.create(
+             PipelineOptionsFactory.create(),
+             mapTask,
+             new BatchModeExecutionContext())) {
+
+      @SuppressWarnings("unchecked")
+      List<Object> operations = (List) executor.operations;
+      assertThat(
+          operations,
+          CoreMatchers.hasItems(
+              new IsInstanceOf(ReadOperation.class),
+              new IsInstanceOf(ParDoOperation.class),
+              new IsInstanceOf(ParDoOperation.class),
+              new IsInstanceOf(FlattenOperation.class),
+              new IsInstanceOf(WriteOperation.class)));
+      counterSet = executor.getOutputCounters();
+    }
+
+    assertEquals(
+        new CounterSet(
+            Counter.longs("read_output_name-ElementCount", SUM)
+                .resetToValue(0L),
+            Counter.longs("read_output_name-MeanByteCount", MEAN)
+                .resetToValue(0, 0L),
+            Counter.longs("Read-ByteCount", SUM).resetToValue(0L),
+            Counter.longs("test-Read-start-msecs", SUM)
+                .resetToValue(0L),
+            Counter.longs("test-Read-read-msecs", SUM)
+                .resetToValue(0L),
+            Counter.longs("test-Read-process-msecs", SUM)
+                .resetToValue(0L),
+            Counter.longs("test-Read-finish-msecs", SUM)
+                .resetToValue(0L),
+            Counter.longs("DoFn1_output-ElementCount", SUM)
+                .resetToValue(0L),
+            Counter.longs("DoFn1_output-MeanByteCount", MEAN)
+                .resetToValue(0, 0L),
+            Counter.longs("test-DoFn1-start-msecs", SUM).resetToValue(0L),
+            Counter.longs("test-DoFn1-process-msecs", SUM).resetToValue(0L),
+            Counter.longs("test-DoFn1-finish-msecs", SUM).resetToValue(0L),
+            Counter.longs("DoFn2_output-ElementCount", SUM)
+                .resetToValue(0L),
+            Counter.longs("DoFn2_output-MeanByteCount", MEAN)
+                .resetToValue(0, 0L),
+            Counter.longs("test-DoFn2-start-msecs", SUM).resetToValue(0L),
+            Counter.longs("test-DoFn2-process-msecs", SUM).resetToValue(0L),
+            Counter.longs("test-DoFn2-finish-msecs", SUM).resetToValue(0L),
+            Counter.longs("flatten_output_name-ElementCount", SUM)
+                .resetToValue(0L),
+            Counter.longs("flatten_output_name-MeanByteCount", MEAN)
+                .resetToValue(0, 0L),
+            Counter.longs("test-Flatten-start-msecs", SUM).resetToValue(0L),
+            Counter.longs("test-Flatten-process-msecs", SUM).resetToValue(0L),
+            Counter.longs("test-Flatten-finish-msecs", SUM).resetToValue(0L),
+            Counter.longs("Write-ByteCount", SUM)
+                .resetToValue(0L),
+            Counter.longs("test-Write-start-msecs", SUM).resetToValue(0L),
+            Counter.longs("test-Write-process-msecs", SUM).resetToValue(0L),
+            Counter.longs("test-Write-finish-msecs", SUM).resetToValue(0L),
+            Counter.longs("test-other-msecs", SUM)
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                    "test-other-msecs")).getAggregate(false))),
+        counterSet);
+  }
+
+  @Test
+  public void testExecutionContextPlumbing() throws Exception {
+    List<ParallelInstruction> instructions =
+        Arrays.asList(
+            createReadInstruction("Read"),
+            createParDoInstruction(0, 0, "DoFn1"),
+            createParDoInstruction(1, 0, "DoFn2"),
+            createWriteInstruction(2, 0, "Write"));
+
+    MapTask mapTask = new MapTask();
+    mapTask.setInstructions(instructions);
+
+    BatchModeExecutionContext context = new BatchModeExecutionContext();
+
+    try (MapTaskExecutor executor =
+         MapTaskExecutorFactory.create(
+             PipelineOptionsFactory.create(), mapTask, context)) {
+      executor.execute();
+    }
+
+    List<String> stepNames = new ArrayList<>();
+    for (ExecutionContext.StepContext stepContext
+             : context.getAllStepContexts()) {
+      stepNames.add(stepContext.getStepName());
+    }
+    assertThat(stepNames, CoreMatchers.hasItems("DoFn1", "DoFn2"));
+  }
+
+  static ParallelInstruction createReadInstruction(String name) {
+    CloudObject spec = CloudObject.forClass(TestSourceFactory.class);
+
+    com.google.api.services.dataflow.model.Source cloudSource =
+        new com.google.api.services.dataflow.model.Source();
+    cloudSource.setSpec(spec);
+    cloudSource.setCodec(CloudObject.forClass(StringUtf8Coder.class));
+
+    ReadInstruction readInstruction = new ReadInstruction();
+    readInstruction.setSource(cloudSource);
+
+    InstructionOutput output = new InstructionOutput();
+    output.setName("read_output_name");
+    output.setCodec(CloudObject.forClass(StringUtf8Coder.class));
+
+    ParallelInstruction instruction = new ParallelInstruction();
+    instruction.setSystemName(name);
+    instruction.setRead(readInstruction);
+    instruction.setOutputs(Arrays.asList(output));
+
+    return instruction;
+  }
+
+  @Test
+  public void testCreateReadOperation() throws Exception {
+    CounterSet counterSet = new CounterSet();
+    String counterPrefix = "test-";
+    StateSampler stateSampler = new StateSampler(counterPrefix,
+        counterSet.getAddCounterMutator());
+    Operation operation = MapTaskExecutorFactory.createOperation(
+        PipelineOptionsFactory.create(),
+        createReadInstruction("Read"),
+        new BatchModeExecutionContext(),
+        Collections.<Operation>emptyList(),
+        counterPrefix,
+        counterSet.getAddCounterMutator(),
+        stateSampler);
+    assertThat(operation, new IsInstanceOf(ReadOperation.class));
+    ReadOperation readOperation = (ReadOperation) operation;
+
+    assertEquals(readOperation.receivers.length, 1);
+    assertEquals(readOperation.receivers[0].getReceiverCount(), 0);
+    assertEquals(readOperation.initializationState,
+                 Operation.InitializationState.UNSTARTED);
+    assertThat(readOperation.source, new IsInstanceOf(TestSource.class));
+
+    assertEquals(
+        new CounterSet(
+            Counter.longs("test-Read-start-msecs", SUM)
+                .resetToValue(0L),
+            Counter.longs("read_output_name-MeanByteCount", MEAN)
+                .resetToValue(0, 0L),
+            Counter.longs("Read-ByteCount", SUM).resetToValue(0L),
+            Counter.longs("test-Read-finish-msecs", SUM)
+                .resetToValue(0L),
+            Counter.longs("test-Read-read-msecs", SUM),
+            Counter.longs("test-Read-process-msecs", SUM),
+            Counter.longs("read_output_name-ElementCount", SUM).resetToValue(0L)),
+        counterSet);
+  }
+
+  static ParallelInstruction createWriteInstruction(
+      int producerIndex,
+      int producerOutputNum,
+      String systemName) {
+    InstructionInput cloudInput = new InstructionInput();
+    cloudInput.setProducerInstructionIndex(producerIndex);
+    cloudInput.setOutputNum(producerOutputNum);
+
+    CloudObject spec = CloudObject.forClass(TestSinkFactory.class);
+
+    com.google.api.services.dataflow.model.Sink cloudSink =
+        new com.google.api.services.dataflow.model.Sink();
+    cloudSink.setSpec(spec);
+    cloudSink.setCodec(CloudObject.forClass(StringUtf8Coder.class));
+
+    WriteInstruction writeInstruction = new WriteInstruction();
+    writeInstruction.setInput(cloudInput);
+    writeInstruction.setSink(cloudSink);
+
+    ParallelInstruction instruction = new ParallelInstruction();
+    instruction.setWrite(writeInstruction);
+    instruction.setSystemName(systemName);
+
+    return instruction;
+  }
+
+  @Test
+  public void testCreateWriteOperation() throws Exception {
+    List<Operation> priorOperations = Arrays.asList(new Operation[]{
+        new TestOperation(3),
+        new TestOperation(5),
+        new TestOperation(1) });
+
+    int producerIndex = 1;
+    int producerOutputNum = 2;
+
+    ParallelInstruction instruction =
+        createWriteInstruction(producerIndex, producerOutputNum, "WriteOperation");
+
+    CounterSet counterSet = new CounterSet();
+    String counterPrefix = "test-";
+    StateSampler stateSampler = new StateSampler(counterPrefix,
+        counterSet.getAddCounterMutator());
+    Operation operation = MapTaskExecutorFactory.createOperation(
+        PipelineOptionsFactory.create(),
+        instruction,
+        new BatchModeExecutionContext(),
+        priorOperations,
+        counterPrefix,
+        counterSet.getAddCounterMutator(),
+        stateSampler);
+    assertThat(operation, new IsInstanceOf(WriteOperation.class));
+    WriteOperation writeOperation = (WriteOperation) operation;
+
+    assertEquals(writeOperation.receivers.length, 0);
+    assertEquals(writeOperation.initializationState,
+                 Operation.InitializationState.UNSTARTED);
+    assertThat(writeOperation.sink,
+               new IsInstanceOf(TestSink.class));
+
+    assertSame(
+        writeOperation,
+        priorOperations.get(producerIndex).receivers[producerOutputNum]
+        .getOnlyReceiver());
+
+    assertEquals(
+        new CounterSet(
+            Counter.longs("WriteOperation-ByteCount", SUM)
+                .resetToValue(0L),
+            Counter.longs("test-WriteOperation-start-msecs", SUM)
+              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                  "test-WriteOperation-start-msecs")).getAggregate(false)),
+            Counter.longs("test-WriteOperation-process-msecs", SUM)
+              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                  "test-WriteOperation-process-msecs")).getAggregate(false)),
+            Counter.longs("test-WriteOperation-finish-msecs", SUM)
+              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                  "test-WriteOperation-finish-msecs")).getAggregate(false))),
+        counterSet);
+  }
+
+  static class TestDoFn extends DoFn<String, String> {
+    @Override
+    public void processElement(ProcessContext c) { }
+  }
+
+  static ParallelInstruction createParDoInstruction(
+      int producerIndex,
+      int producerOutputNum,
+      String systemName) {
+    InstructionInput cloudInput = new InstructionInput();
+    cloudInput.setProducerInstructionIndex(producerIndex);
+    cloudInput.setOutputNum(producerOutputNum);
+
+    TestDoFn fn = new TestDoFn();
+
+    String serializedFn =
+        StringUtils.byteArrayToJsonString(
+            SerializableUtils.serializeToByteArray(fn));
+
+    CloudObject cloudUserFn = CloudObject.forClassName("DoFn");
+    addString(cloudUserFn, PropertyNames.SERIALIZED_FN, serializedFn);
+
+    ParDoInstruction parDoInstruction = new ParDoInstruction();
+    parDoInstruction.setInput(cloudInput);
+    parDoInstruction.setNumOutputs(1);
+    parDoInstruction.setUserFn(cloudUserFn);
+
+    InstructionOutput output = new InstructionOutput();
+    output.setName(systemName + "_output");
+    output.setCodec(CloudObject.forClass(StringUtf8Coder.class));
+
+    ParallelInstruction instruction = new ParallelInstruction();
+    instruction.setParDo(parDoInstruction);
+    instruction.setOutputs(Arrays.asList(output));
+    instruction.setSystemName(systemName);
+    return instruction;
+  }
+
+  @Test
+  public void testCreateParDoOperation() throws Exception {
+    List<Operation> priorOperations = Arrays.asList(new Operation[]{
+        new TestOperation(3),
+        new TestOperation(5),
+        new TestOperation(1) });
+
+    int producerIndex = 1;
+    int producerOutputNum = 2;
+
+    ParallelInstruction instruction =
+        createParDoInstruction(producerIndex, producerOutputNum, "DoFn");
+
+    BatchModeExecutionContext context = new BatchModeExecutionContext();
+    CounterSet counterSet = new CounterSet();
+    String counterPrefix = "test-";
+    StateSampler stateSampler = new StateSampler(counterPrefix,
+        counterSet.getAddCounterMutator());
+    Operation operation = MapTaskExecutorFactory.createOperation(
+        PipelineOptionsFactory.create(),
+        instruction,
+        context,
+        priorOperations,
+        counterPrefix,
+        counterSet.getAddCounterMutator(), stateSampler);
+    assertThat(operation, new IsInstanceOf(ParDoOperation.class));
+    ParDoOperation parDoOperation = (ParDoOperation) operation;
+
+    assertEquals(parDoOperation.receivers.length, 1);
+    assertEquals(parDoOperation.receivers[0].getReceiverCount(), 0);
+    assertEquals(parDoOperation.initializationState,
+                 Operation.InitializationState.UNSTARTED);
+    assertThat(parDoOperation.fn,
+               new IsInstanceOf(NormalParDoFn.class));
+    NormalParDoFn normalParDoFn = (NormalParDoFn) parDoOperation.fn;
+
+    assertThat(normalParDoFn.fn,
+               new IsInstanceOf(TestDoFn.class));
+
+    assertSame(
+        parDoOperation,
+        priorOperations.get(producerIndex).receivers[producerOutputNum]
+        .getOnlyReceiver());
+
+    assertEquals(context, normalParDoFn.executionContext);
+  }
+
+  static ParallelInstruction createPartialGroupByKeyInstruction(
+      int producerIndex,
+      int producerOutputNum) {
+    InstructionInput cloudInput = new InstructionInput();
+    cloudInput.setProducerInstructionIndex(producerIndex);
+    cloudInput.setOutputNum(producerOutputNum);
+
+    PartialGroupByKeyInstruction pgbkInstruction =
+        new PartialGroupByKeyInstruction();
+    pgbkInstruction.setInput(cloudInput);
+    pgbkInstruction.setInputElementCodec(
+        makeCloudEncoding(FullWindowedValueCoder.class.getName(),
+          makeCloudEncoding("KvCoder",
+                            makeCloudEncoding("StringUtf8Coder"),
+                            makeCloudEncoding("BigEndianIntegerCoder")),
+          IntervalWindow.getCoder().asCloudObject()));
+
+    InstructionOutput output = new InstructionOutput();
+    output.setName("pgbk_output_name");
+    output.setCodec(makeCloudEncoding(
+        "KvCoder",
+        makeCloudEncoding("StringUtf8Coder"),
+        makeCloudEncoding(
+            "IterableCoder",
+            makeCloudEncoding("BigEndianIntegerCoder"))));
+
+    ParallelInstruction instruction = new ParallelInstruction();
+    instruction.setPartialGroupByKey(pgbkInstruction);
+    instruction.setOutputs(Arrays.asList(output));
+
+    return instruction;
+  }
+
+  @Test
+  public void testCreatePartialGroupByKeyOperation() throws Exception {
+    List<Operation> priorOperations = Arrays.asList(new Operation[]{
+        new TestOperation(3),
+        new TestOperation(5),
+        new TestOperation(1) });
+
+    int producerIndex = 1;
+    int producerOutputNum = 2;
+
+    ParallelInstruction instruction =
+        createPartialGroupByKeyInstruction(producerIndex, producerOutputNum);
+
+    CounterSet counterSet = new CounterSet();
+    String counterPrefix = "test-";
+    StateSampler stateSampler = new StateSampler(counterPrefix,
+        counterSet.getAddCounterMutator());
+    Operation operation = MapTaskExecutorFactory.createOperation(
+        PipelineOptionsFactory.create(),
+        instruction,
+        new BatchModeExecutionContext(),
+        priorOperations,
+        counterPrefix,
+        counterSet.getAddCounterMutator(),
+        stateSampler);
+    assertThat(operation, instanceOf(PartialGroupByKeyOperation.class));
+    PartialGroupByKeyOperation pgbkOperation =
+        (PartialGroupByKeyOperation) operation;
+
+    assertEquals(pgbkOperation.receivers.length, 1);
+    assertEquals(pgbkOperation.receivers[0].getReceiverCount(), 0);
+    assertEquals(pgbkOperation.initializationState,
+                 Operation.InitializationState.UNSTARTED);
+
+    assertSame(
+        pgbkOperation,
+        priorOperations.get(producerIndex).receivers[producerOutputNum]
+        .getOnlyReceiver());
+  }
+
+  static ParallelInstruction createFlattenInstruction(
+      int producerIndex1,
+      int producerOutputNum1,
+      int producerIndex2,
+      int producerOutputNum2,
+      String systemName) {
+    List<InstructionInput> cloudInputs = new ArrayList<>();
+
+    InstructionInput cloudInput1 = new InstructionInput();
+    cloudInput1.setProducerInstructionIndex(producerIndex1);
+    cloudInput1.setOutputNum(producerOutputNum1);
+    cloudInputs.add(cloudInput1);
+
+    InstructionInput cloudInput2 = new InstructionInput();
+    cloudInput2.setProducerInstructionIndex(producerIndex2);
+    cloudInput2.setOutputNum(producerOutputNum2);
+    cloudInputs.add(cloudInput2);
+
+    FlattenInstruction flattenInstruction = new FlattenInstruction();
+    flattenInstruction.setInputs(cloudInputs);
+
+    InstructionOutput output = new InstructionOutput();
+    output.setName("flatten_output_name");
+    output.setCodec(makeCloudEncoding(StringUtf8Coder.class.getName()));
+
+    ParallelInstruction instruction = new ParallelInstruction();
+    instruction.setFlatten(flattenInstruction);
+    instruction.setOutputs(Arrays.asList(output));
+    instruction.setSystemName(systemName);
+
+    return instruction;
+  }
+
+  @Test
+  public void testCreateFlattenOperation() throws Exception {
+    List<Operation> priorOperations = Arrays.asList(new Operation[]{
+        new TestOperation(3),
+        new TestOperation(5),
+        new TestOperation(1) });
+
+    int producerIndex1 = 1;
+    int producerOutputNum1 = 2;
+    int producerIndex2 = 0;
+    int producerOutputNum2 = 1;
+
+    ParallelInstruction instruction =
+        createFlattenInstruction(producerIndex1, producerOutputNum1,
+                                 producerIndex2, producerOutputNum2, "Flatten");
+
+    CounterSet counterSet = new CounterSet();
+    String counterPrefix = "test-";
+    StateSampler stateSampler = new StateSampler(counterPrefix,
+        counterSet.getAddCounterMutator());
+    Operation operation = MapTaskExecutorFactory.createOperation(
+        PipelineOptionsFactory.create(),
+        instruction,
+        new BatchModeExecutionContext(),
+        priorOperations,
+        counterPrefix,
+        counterSet.getAddCounterMutator(),
+        stateSampler);
+    assertThat(operation, new IsInstanceOf(FlattenOperation.class));
+    FlattenOperation flattenOperation = (FlattenOperation) operation;
+
+    assertEquals(flattenOperation.receivers.length, 1);
+    assertEquals(flattenOperation.receivers[0].getReceiverCount(), 0);
+    assertEquals(flattenOperation.initializationState,
+                 Operation.InitializationState.UNSTARTED);
+
+    assertSame(
+        flattenOperation,
+        priorOperations.get(producerIndex1).receivers[producerOutputNum1]
+        .getOnlyReceiver());
+    assertSame(
+        flattenOperation,
+        priorOperations.get(producerIndex2).receivers[producerOutputNum2]
+        .getOnlyReceiver());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
new file mode 100644
index 0000000000000..f94ab8339f9d1
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
@@ -0,0 +1,331 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+import static org.hamcrest.CoreMatchers.containsString;
+import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
+import static org.hamcrest.core.AnyOf.anyOf;
+import static org.hamcrest.core.IsEqual.equalTo;
+import static org.hamcrest.core.IsInstanceOf.instanceOf;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PTuple;
+import com.google.cloud.dataflow.sdk.util.UserCodeException;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.Receiver;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Tests for NormalParDoFn.
+ */
+@RunWith(JUnit4.class)
+public class NormalParDoFnTest {
+  static class TestDoFn extends DoFn<Integer, String> {
+    enum State { UNSTARTED, STARTED, PROCESSING, FINISHED }
+    State state = State.UNSTARTED;
+
+    List<TupleTag> sideOutputTupleTags;
+
+    public TestDoFn(List<String> sideOutputTags) {
+      sideOutputTupleTags = new ArrayList<>();
+      for (String sideOutputTag : sideOutputTags) {
+        sideOutputTupleTags.add(new TupleTag(sideOutputTag));
+      }
+    }
+
+    @Override
+    public void startBundle(Context c) {
+      assertEquals(State.UNSTARTED, state);
+      state = State.STARTED;
+      outputToAll(c, "started");
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      assertThat(state, anyOf(equalTo(State.STARTED),
+                              equalTo(State.PROCESSING)));
+      state = State.PROCESSING;
+      outputToAll(c, "processing: " + c.element());
+    }
+
+    @Override
+    public void finishBundle(Context c) {
+      assertThat(state, anyOf(equalTo(State.STARTED),
+                              equalTo(State.PROCESSING)));
+      state = State.FINISHED;
+      outputToAll(c, "finished");
+    }
+
+    private void outputToAll(Context c, String value) {
+      c.output(value);
+      for (TupleTag sideOutputTupleTag : sideOutputTupleTags) {
+        c.sideOutput(sideOutputTupleTag,
+                     sideOutputTupleTag.getId() + ": " + value);
+      }
+    }
+  }
+
+  static class TestErrorDoFn extends DoFn<Integer, String> {
+
+    // Used to test nested stack traces.
+    private void nestedFunctionBeta(String s) {
+      throw new RuntimeException(s);
+    }
+
+    private void nestedFunctionAlpha(String s) {
+      nestedFunctionBeta(s);
+    }
+
+    @Override
+    public void startBundle(Context c) {
+      nestedFunctionAlpha("test error in initialize");
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      nestedFunctionBeta("test error in process");
+    }
+
+    @Override
+    public void finishBundle(Context c) {
+      throw new RuntimeException("test error in finalize");
+    }
+  }
+
+  static class TestReceiver implements Receiver {
+    List<Object> receivedElems = new ArrayList<>();
+
+    @Override
+    public void process(Object outputElem) {
+      receivedElems.add(outputElem);
+    }
+  }
+
+  @Test
+  public void testNormalParDoFn() throws Exception {
+    List<String> sideOutputTags = Arrays.asList("tag1", "tag2", "tag3");
+
+    TestDoFn fn = new TestDoFn(sideOutputTags);
+    TestReceiver receiver = new TestReceiver();
+    TestReceiver receiver1 = new TestReceiver();
+    TestReceiver receiver2 = new TestReceiver();
+    TestReceiver receiver3 = new TestReceiver();
+
+    PTuple sideInputValues = PTuple.empty();
+
+    List<String> outputTags = new ArrayList<>();
+    outputTags.add("output");
+    outputTags.addAll(sideOutputTags);
+    NormalParDoFn normalParDoFn =
+        new NormalParDoFn(PipelineOptionsFactory.create(),
+                          fn, sideInputValues, outputTags, "doFn",
+                          new BatchModeExecutionContext(),
+                          (new CounterSet()).getAddCounterMutator());
+
+    normalParDoFn.startBundle(receiver, receiver1, receiver2, receiver3);
+
+    normalParDoFn.processElement(WindowedValue.valueInGlobalWindow(3));
+    normalParDoFn.processElement(WindowedValue.valueInGlobalWindow(42));
+    normalParDoFn.processElement(WindowedValue.valueInGlobalWindow(666));
+
+    normalParDoFn.finishBundle();
+
+    Object[] expectedReceivedElems = {
+      WindowedValue.valueInGlobalWindow("started"),
+      WindowedValue.valueInGlobalWindow("processing: 3"),
+      WindowedValue.valueInGlobalWindow("processing: 42"),
+      WindowedValue.valueInGlobalWindow("processing: 666"),
+      WindowedValue.valueInGlobalWindow("finished"),
+    };
+    assertArrayEquals(expectedReceivedElems, receiver.receivedElems.toArray());
+
+    Object[] expectedReceivedElems1 = {
+      WindowedValue.valueInGlobalWindow("tag1: started"),
+      WindowedValue.valueInGlobalWindow("tag1: processing: 3"),
+      WindowedValue.valueInGlobalWindow("tag1: processing: 42"),
+      WindowedValue.valueInGlobalWindow("tag1: processing: 666"),
+      WindowedValue.valueInGlobalWindow("tag1: finished"),
+    };
+    assertArrayEquals(expectedReceivedElems1, receiver1.receivedElems.toArray());
+
+    Object[] expectedReceivedElems2 = {
+      WindowedValue.valueInGlobalWindow("tag2: started"),
+      WindowedValue.valueInGlobalWindow("tag2: processing: 3"),
+      WindowedValue.valueInGlobalWindow("tag2: processing: 42"),
+      WindowedValue.valueInGlobalWindow("tag2: processing: 666"),
+      WindowedValue.valueInGlobalWindow("tag2: finished"),
+    };
+    assertArrayEquals(expectedReceivedElems2, receiver2.receivedElems.toArray());
+
+    Object[] expectedReceivedElems3 = {
+      WindowedValue.valueInGlobalWindow("tag3: started"),
+      WindowedValue.valueInGlobalWindow("tag3: processing: 3"),
+      WindowedValue.valueInGlobalWindow("tag3: processing: 42"),
+      WindowedValue.valueInGlobalWindow("tag3: processing: 666"),
+      WindowedValue.valueInGlobalWindow("tag3: finished"),
+    };
+    assertArrayEquals(expectedReceivedElems3, receiver3.receivedElems.toArray());
+  }
+
+  @Test
+  public void testUnexpectedNumberOfReceivers() throws Exception {
+    TestDoFn fn = new TestDoFn(Collections.<String>emptyList());
+    TestReceiver receiver = new TestReceiver();
+
+    PTuple sideInputValues = PTuple.empty();
+    List<String> outputTags = Arrays.asList("output");
+    NormalParDoFn normalParDoFn =
+        new NormalParDoFn(PipelineOptionsFactory.create(),
+                          fn, sideInputValues, outputTags, "doFn",
+                          new BatchModeExecutionContext(),
+                          (new CounterSet()).getAddCounterMutator());
+
+    try {
+      normalParDoFn.startBundle();
+      fail("should have failed");
+    } catch (Throwable exn) {
+      assertThat(exn.toString(),
+                 containsString("unexpected number of receivers"));
+    }
+    try {
+      normalParDoFn.startBundle(receiver, receiver);
+      fail("should have failed");
+    } catch (Throwable exn) {
+      assertThat(exn.toString(),
+                 containsString("unexpected number of receivers"));
+    }
+  }
+
+  private List<String> stackTraceFrameStrings(Throwable t) {
+    List<String> stack = new ArrayList<>();
+    for (StackTraceElement frame : t.getStackTrace()) {
+      // Make sure that the frame has the expected name.
+      stack.add(frame.toString());
+    }
+    return stack;
+  }
+
+  @Test
+  public void testErrorPropagation() throws Exception {
+    TestErrorDoFn fn = new TestErrorDoFn();
+    TestReceiver receiver = new TestReceiver();
+
+    PTuple sideInputValues = PTuple.empty();
+    List<String> outputTags = Arrays.asList("output");
+    NormalParDoFn normalParDoFn =
+        new NormalParDoFn(PipelineOptionsFactory.create(),
+                          fn, sideInputValues, outputTags, "doFn",
+                          new BatchModeExecutionContext(),
+                          (new CounterSet()).getAddCounterMutator());
+
+    try {
+      normalParDoFn.startBundle(receiver);
+      fail("should have failed");
+    } catch (Exception exn) {
+      // Because we're calling this from inside the SDK and not from a
+      // user's program (e.g. through Pipeline.run), the error should
+      // be thrown as a UserCodeException. The cause of the
+      // UserCodeError shouldn't contain any of the stack from within
+      // the SDK, since we don't want to overwhelm users with stack
+      // frames outside of their control.
+      assertThat(exn, instanceOf(UserCodeException.class));
+      // Stack trace of the cause should contain three frames:
+      // TestErrorDoFn.nestedFunctionBeta
+      // TestErrorDoFn.nestedFunctionAlpha
+      // TestErrorDoFn.startBundle
+      assertThat(stackTraceFrameStrings(exn.getCause()), contains(
+          containsString("TestErrorDoFn.nestedFunctionBeta"),
+          containsString("TestErrorDoFn.nestedFunctionAlpha"),
+          containsString("TestErrorDoFn.startBundle")));
+      assertThat(exn.toString(),
+                 containsString("test error in initialize"));
+    }
+
+    try {
+      normalParDoFn.processElement(WindowedValue.valueInGlobalWindow(3));
+      fail("should have failed");
+    } catch (Exception exn) {
+      // Exception should be a UserCodeException since we're calling
+      // from inside the SDK.
+      assertThat(exn, instanceOf(UserCodeException.class));
+      // Stack trace of the cause should contain two frames:
+      // TestErrorDoFn.nestedFunctionBeta
+      // TestErrorDoFn.processElement
+      assertThat(stackTraceFrameStrings(exn.getCause()), contains(
+          containsString("TestErrorDoFn.nestedFunctionBeta"),
+          containsString("TestErrorDoFn.processElement")));
+      assertThat(exn.toString(), containsString("test error in process"));
+    }
+
+    try {
+      normalParDoFn.finishBundle();
+      fail("should have failed");
+    } catch (Exception exn) {
+      // Exception should be a UserCodeException since we're calling
+      // from inside the SDK.
+      assertThat(exn, instanceOf(UserCodeException.class));
+      // Stack trace should only contain a single frame:
+      // TestErrorDoFn.finishBundle
+      assertThat(stackTraceFrameStrings(exn.getCause()), contains(
+          containsString("TestErrorDoFn.finishBundle")));
+      assertThat(exn.toString(), containsString("test error in finalize"));
+    }
+  }
+
+  @Test
+  public void testUndeclaredSideOutputs() throws Exception {
+    TestDoFn fn = new TestDoFn(Arrays.asList("declared", "undecl1", "undecl2", "undecl3"));
+    CounterSet counters = new CounterSet();
+    NormalParDoFn normalParDoFn =
+        new NormalParDoFn(PipelineOptionsFactory.create(), fn, PTuple.empty(),
+                          Arrays.asList("output", "declared"), "doFn",
+                          new BatchModeExecutionContext(),
+                          counters.getAddCounterMutator());
+
+    normalParDoFn.startBundle(new TestReceiver(), new TestReceiver());
+    normalParDoFn.processElement(WindowedValue.valueInGlobalWindow(5));
+    normalParDoFn.finishBundle();
+
+    assertEquals(
+        new CounterSet(
+            Counter.longs("implicit-undecl1-ElementCount", SUM)
+            .resetToValue(3L),
+            Counter.longs("implicit-undecl2-ElementCount", SUM)
+            .resetToValue(3L),
+            Counter.longs("implicit-undecl3-ElementCount", SUM)
+            .resetToValue(3L)),
+        counters);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCodeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCodeTest.java
new file mode 100644
index 0000000000000..6f467ba1173af
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCodeTest.java
@@ -0,0 +1,504 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import com.google.common.io.BaseEncoding;
+import com.google.common.primitives.Bytes;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for OrderedCode.
+ */
+@RunWith(JUnit4.class)
+public class OrderedCodeTest {
+  @Test
+  public void testWriteInfinity() {
+    OrderedCode orderedCode = new OrderedCode();
+    try {
+      orderedCode.readInfinity();
+      fail("Expected IllegalArgumentException.");
+    } catch (IllegalArgumentException e) {
+      // expected
+    }
+    orderedCode.writeInfinity();
+    assertTrue(orderedCode.readInfinity());
+    try {
+      orderedCode.readInfinity();
+      fail("Expected IllegalArgumentException.");
+    } catch (IllegalArgumentException e) {
+      // expected
+    }
+  }
+
+  @Test
+  public void testWriteBytes() {
+    byte[] first = { 'a', 'b', 'c'};
+    byte[] second = { 'd', 'e', 'f'};
+    byte[] last = { 'x', 'y', 'z'};
+    OrderedCode orderedCode = new OrderedCode();
+    orderedCode.writeBytes(first);
+    byte[] firstEncoded = orderedCode.getEncodedBytes();
+    assertArrayEquals(orderedCode.readBytes(), first);
+
+    orderedCode.writeBytes(first);
+    orderedCode.writeBytes(second);
+    orderedCode.writeBytes(last);
+    byte[] allEncoded = orderedCode.getEncodedBytes();
+    assertArrayEquals(orderedCode.readBytes(), first);
+    assertArrayEquals(orderedCode.readBytes(), second);
+    assertArrayEquals(orderedCode.readBytes(), last);
+
+    orderedCode = new OrderedCode(firstEncoded);
+    orderedCode.writeBytes(second);
+    orderedCode.writeBytes(last);
+    assertArrayEquals(orderedCode.getEncodedBytes(), allEncoded);
+    assertArrayEquals(orderedCode.readBytes(), first);
+    assertArrayEquals(orderedCode.readBytes(), second);
+    assertArrayEquals(orderedCode.readBytes(), last);
+
+    orderedCode = new OrderedCode(allEncoded);
+    assertArrayEquals(orderedCode.readBytes(), first);
+    assertArrayEquals(orderedCode.readBytes(), second);
+    assertArrayEquals(orderedCode.readBytes(), last);
+  }
+
+  @Test
+  public void testWriteNumIncreasing() {
+    OrderedCode orderedCode = new OrderedCode();
+    orderedCode.writeNumIncreasing(0);
+    orderedCode.writeNumIncreasing(1);
+    orderedCode.writeNumIncreasing(Long.MIN_VALUE);
+    orderedCode.writeNumIncreasing(Long.MAX_VALUE);
+    assertEquals(orderedCode.readNumIncreasing(), 0);
+    assertEquals(orderedCode.readNumIncreasing(), 1);
+    assertEquals(orderedCode.readNumIncreasing(), Long.MIN_VALUE);
+    assertEquals(orderedCode.readNumIncreasing(), Long.MAX_VALUE);
+  }
+
+  /**
+   * Assert that encoding the specified long via
+   * {@link OrderedCode#writeSignedNumIncreasing(long)} results in the bytes
+   * represented by the specified string of hex digits.
+   * E.g. assertSignedNumIncreasingEncodingEquals("3fbf", -65) asserts that
+   * -65 is encoded as { (byte) 0x3f, (byte) 0xbf }.
+   */
+  private static void assertSignedNumIncreasingEncodingEquals(
+      String expectedHexEncoding, long num) {
+    OrderedCode orderedCode = new OrderedCode();
+    orderedCode.writeSignedNumIncreasing(num);
+    assertEquals(
+        "Unexpected encoding for " + num,
+        expectedHexEncoding,
+        BaseEncoding.base16().lowerCase().encode(orderedCode.getEncodedBytes()));
+  }
+
+  /**
+   * Assert that encoding various long values via
+   * {@link OrderedCode#writeSignedNumIncreasing(long)} produces the expected
+   * bytes. Expected byte sequences were generated via the c++ (authoritative)
+   * implementation of OrderedCode::WriteSignedNumIncreasing.
+   */
+  @Test
+  public void testSignedNumIncreasing_write() {
+    assertSignedNumIncreasingEncodingEquals(
+        "003f8000000000000000", Long.MIN_VALUE);
+    assertSignedNumIncreasingEncodingEquals(
+        "003f8000000000000001", Long.MIN_VALUE + 1);
+    assertSignedNumIncreasingEncodingEquals(
+        "077fffffff", Integer.MIN_VALUE - 1L);
+    assertSignedNumIncreasingEncodingEquals("0780000000", Integer.MIN_VALUE);
+    assertSignedNumIncreasingEncodingEquals(
+        "0780000001", Integer.MIN_VALUE + 1);
+    assertSignedNumIncreasingEncodingEquals("3fbf", -65);
+    assertSignedNumIncreasingEncodingEquals("40", -64);
+    assertSignedNumIncreasingEncodingEquals("41", -63);
+    assertSignedNumIncreasingEncodingEquals("7d", -3);
+    assertSignedNumIncreasingEncodingEquals("7e", -2);
+    assertSignedNumIncreasingEncodingEquals("7f", -1);
+    assertSignedNumIncreasingEncodingEquals("80", 0);
+    assertSignedNumIncreasingEncodingEquals("81", 1);
+    assertSignedNumIncreasingEncodingEquals("82", 2);
+    assertSignedNumIncreasingEncodingEquals("83", 3);
+    assertSignedNumIncreasingEncodingEquals("bf", 63);
+    assertSignedNumIncreasingEncodingEquals("c040", 64);
+    assertSignedNumIncreasingEncodingEquals("c041", 65);
+    assertSignedNumIncreasingEncodingEquals(
+        "f87ffffffe", Integer.MAX_VALUE - 1);
+    assertSignedNumIncreasingEncodingEquals("f87fffffff", Integer.MAX_VALUE);
+    assertSignedNumIncreasingEncodingEquals(
+        "f880000000", Integer.MAX_VALUE + 1L);
+    assertSignedNumIncreasingEncodingEquals(
+        "ffc07ffffffffffffffe", Long.MAX_VALUE - 1);
+    assertSignedNumIncreasingEncodingEquals(
+        "ffc07fffffffffffffff", Long.MAX_VALUE);
+  }
+
+  /**
+   * Convert a string of hex digits (e.g. "3fbf") to a byte[]
+   * (e.g. { (byte) 0x3f, (byte) 0xbf }).
+   */
+  private static byte[] bytesFromHexString(String hexDigits) {
+    return BaseEncoding.base16().lowerCase().decode(hexDigits);
+  }
+
+  /**
+   * Assert that decoding (via {@link OrderedCode#readSignedNumIncreasing()})
+   * the bytes represented by the specified string of hex digits results in the
+   * expected long value.
+   * E.g. assertDecodedSignedNumIncreasingEquals(-65, "3fbf") asserts that the
+   * byte array { (byte) 0x3f, (byte) 0xbf } is decoded as -65.
+   */
+  private static void assertDecodedSignedNumIncreasingEquals(
+      long expectedNum, String encodedHexString) {
+    OrderedCode orderedCode =
+        new OrderedCode(bytesFromHexString(encodedHexString));
+    assertEquals(
+        "Unexpected value when decoding 0x" + encodedHexString,
+        expectedNum,
+        orderedCode.readSignedNumIncreasing());
+    assertFalse(
+        "Unexpected encoded bytes remain after decoding 0x" + encodedHexString,
+        orderedCode.hasRemainingEncodedBytes());
+  }
+
+  /**
+   * Assert that decoding various sequences of bytes via
+   * {@link OrderedCode#readSignedNumIncreasing()} produces the expected long
+   * value.
+   * Input byte sequences were generated via the c++ (authoritative)
+   * implementation of OrderedCode::WriteSignedNumIncreasing.
+   */
+  @Test
+  public void testSignedNumIncreasing_read() {
+    assertDecodedSignedNumIncreasingEquals(
+        Long.MIN_VALUE, "003f8000000000000000");
+    assertDecodedSignedNumIncreasingEquals(
+        Long.MIN_VALUE + 1, "003f8000000000000001");
+    assertDecodedSignedNumIncreasingEquals(
+        Integer.MIN_VALUE - 1L, "077fffffff");
+    assertDecodedSignedNumIncreasingEquals(Integer.MIN_VALUE, "0780000000");
+    assertDecodedSignedNumIncreasingEquals(Integer.MIN_VALUE + 1, "0780000001");
+    assertDecodedSignedNumIncreasingEquals(-65, "3fbf");
+    assertDecodedSignedNumIncreasingEquals(-64, "40");
+    assertDecodedSignedNumIncreasingEquals(-63, "41");
+    assertDecodedSignedNumIncreasingEquals(-3, "7d");
+    assertDecodedSignedNumIncreasingEquals(-2, "7e");
+    assertDecodedSignedNumIncreasingEquals(-1, "7f");
+    assertDecodedSignedNumIncreasingEquals(0, "80");
+    assertDecodedSignedNumIncreasingEquals(1, "81");
+    assertDecodedSignedNumIncreasingEquals(2, "82");
+    assertDecodedSignedNumIncreasingEquals(3, "83");
+    assertDecodedSignedNumIncreasingEquals(63, "bf");
+    assertDecodedSignedNumIncreasingEquals(64, "c040");
+    assertDecodedSignedNumIncreasingEquals(65, "c041");
+    assertDecodedSignedNumIncreasingEquals(Integer.MAX_VALUE - 1, "f87ffffffe");
+    assertDecodedSignedNumIncreasingEquals(Integer.MAX_VALUE, "f87fffffff");
+    assertDecodedSignedNumIncreasingEquals(
+        Integer.MAX_VALUE + 1L, "f880000000");
+    assertDecodedSignedNumIncreasingEquals(
+        Long.MAX_VALUE - 1, "ffc07ffffffffffffffe");
+    assertDecodedSignedNumIncreasingEquals(
+        Long.MAX_VALUE, "ffc07fffffffffffffff");
+  }
+
+  /**
+   * Assert that encoding (via
+   * {@link OrderedCode#writeSignedNumIncreasing(long)}) the specified long
+   * value and then decoding (via {@link OrderedCode#readSignedNumIncreasing()})
+   * results in the original value.
+   */
+  private static void assertSignedNumIncreasingWriteAndReadIsLossless(
+      long num) {
+    OrderedCode orderedCode = new OrderedCode();
+    orderedCode.writeSignedNumIncreasing(num);
+    assertEquals(
+        "Unexpected result when decoding writeSignedNumIncreasing(" + num + ")",
+        num,
+        orderedCode.readSignedNumIncreasing());
+    assertFalse("Unexpected remaining encoded bytes after decoding " + num,
+        orderedCode.hasRemainingEncodedBytes());
+  }
+
+  /**
+   * Assert that for various long values, encoding (via
+   * {@link OrderedCode#writeSignedNumIncreasing(long)}) and then decoding (via
+   * {@link OrderedCode#readSignedNumIncreasing()}) results in the original
+   * value.
+   */
+  @Test
+  public void testSignedNumIncreasing_writeAndRead() {
+    assertSignedNumIncreasingWriteAndReadIsLossless(Long.MIN_VALUE);
+    assertSignedNumIncreasingWriteAndReadIsLossless(Long.MIN_VALUE + 1);
+    assertSignedNumIncreasingWriteAndReadIsLossless(Integer.MIN_VALUE - 1L);
+    assertSignedNumIncreasingWriteAndReadIsLossless(Integer.MIN_VALUE);
+    assertSignedNumIncreasingWriteAndReadIsLossless(Integer.MIN_VALUE + 1);
+    assertSignedNumIncreasingWriteAndReadIsLossless(-65);
+    assertSignedNumIncreasingWriteAndReadIsLossless(-64);
+    assertSignedNumIncreasingWriteAndReadIsLossless(-63);
+    assertSignedNumIncreasingWriteAndReadIsLossless(-3);
+    assertSignedNumIncreasingWriteAndReadIsLossless(-2);
+    assertSignedNumIncreasingWriteAndReadIsLossless(-1);
+    assertSignedNumIncreasingWriteAndReadIsLossless(0);
+    assertSignedNumIncreasingWriteAndReadIsLossless(1);
+    assertSignedNumIncreasingWriteAndReadIsLossless(2);
+    assertSignedNumIncreasingWriteAndReadIsLossless(3);
+    assertSignedNumIncreasingWriteAndReadIsLossless(63);
+    assertSignedNumIncreasingWriteAndReadIsLossless(64);
+    assertSignedNumIncreasingWriteAndReadIsLossless(65);
+    assertSignedNumIncreasingWriteAndReadIsLossless(Integer.MAX_VALUE - 1);
+    assertSignedNumIncreasingWriteAndReadIsLossless(Integer.MAX_VALUE);
+    assertSignedNumIncreasingWriteAndReadIsLossless(Integer.MAX_VALUE + 1L);
+    assertSignedNumIncreasingWriteAndReadIsLossless(Long.MAX_VALUE - 1);
+    assertSignedNumIncreasingWriteAndReadIsLossless(Long.MAX_VALUE);
+  }
+
+  @Test
+  public void testLog2Floor_Positive() {
+    OrderedCode orderedCode = new OrderedCode();
+    assertEquals(0, orderedCode.log2Floor(1));
+    assertEquals(1, orderedCode.log2Floor(2));
+    assertEquals(1, orderedCode.log2Floor(3));
+    assertEquals(2, orderedCode.log2Floor(4));
+    assertEquals(5, orderedCode.log2Floor(63));
+    assertEquals(6, orderedCode.log2Floor(64));
+    assertEquals(62, orderedCode.log2Floor(Long.MAX_VALUE));
+  }
+
+  /**
+   * OrderedCode.log2Floor(long) is defined to return -1 given an input of zero.
+   */
+  @Test
+  public void testLog2Floor_zero() {
+    OrderedCode orderedCode = new OrderedCode();
+    assertEquals(-1, orderedCode.log2Floor(0));
+  }
+
+  @Test
+  public void testLog2Floor_negative() {
+    OrderedCode orderedCode = new OrderedCode();
+    try {
+      orderedCode.log2Floor(-1);
+      fail("Expected an IllegalArgumentException.");
+    } catch (IllegalArgumentException expected) {
+      // Expected!
+    }
+  }
+
+  @Test
+  public void testGetSignedEncodingLength() {
+    OrderedCode orderedCode = new OrderedCode();
+    assertEquals(10, orderedCode.getSignedEncodingLength(Long.MIN_VALUE));
+    assertEquals(10, orderedCode.getSignedEncodingLength(~(1L << 62)));
+    assertEquals(9, orderedCode.getSignedEncodingLength(~(1L << 62) + 1));
+    assertEquals(3, orderedCode.getSignedEncodingLength(-8193));
+    assertEquals(2, orderedCode.getSignedEncodingLength(-8192));
+    assertEquals(2, orderedCode.getSignedEncodingLength(-65));
+    assertEquals(1, orderedCode.getSignedEncodingLength(-64));
+    assertEquals(1, orderedCode.getSignedEncodingLength(-2));
+    assertEquals(1, orderedCode.getSignedEncodingLength(-1));
+    assertEquals(1, orderedCode.getSignedEncodingLength(0));
+    assertEquals(1, orderedCode.getSignedEncodingLength(1));
+    assertEquals(1, orderedCode.getSignedEncodingLength(63));
+    assertEquals(2, orderedCode.getSignedEncodingLength(64));
+    assertEquals(2, orderedCode.getSignedEncodingLength(8191));
+    assertEquals(3, orderedCode.getSignedEncodingLength(8192));
+    assertEquals(9, orderedCode.getSignedEncodingLength((1L << 62)) - 1);
+    assertEquals(10, orderedCode.getSignedEncodingLength(1L << 62));
+    assertEquals(10, orderedCode.getSignedEncodingLength(Long.MAX_VALUE));
+  }
+
+  @Test
+  public void testWriteTrailingBytes() {
+    byte[] escapeChars = new byte[] { OrderedCode.ESCAPE1,
+        OrderedCode.NULL_CHARACTER, OrderedCode.SEPARATOR, OrderedCode.ESCAPE2,
+        OrderedCode.INFINITY, OrderedCode.FF_CHARACTER};
+    byte[] anotherArray = new byte[] { 'a', 'b', 'c', 'd', 'e' };
+
+    OrderedCode orderedCode = new OrderedCode();
+    orderedCode.writeTrailingBytes(escapeChars);
+    assertArrayEquals(orderedCode.getEncodedBytes(), escapeChars);
+    assertArrayEquals(orderedCode.readTrailingBytes(), escapeChars);
+    try {
+      orderedCode.readInfinity();
+      fail("Expected IllegalArgumentException.");
+    } catch (IllegalArgumentException e) {
+      // expected
+    }
+
+    orderedCode = new OrderedCode();
+    orderedCode.writeTrailingBytes(anotherArray);
+    assertArrayEquals(orderedCode.getEncodedBytes(), anotherArray);
+    assertArrayEquals(orderedCode.readTrailingBytes(), anotherArray);
+  }
+
+  @Test
+  public void testMixedWrite() {
+    byte[] first = { 'a', 'b', 'c'};
+    byte[] second = { 'd', 'e', 'f'};
+    byte[] last = { 'x', 'y', 'z'};
+    byte[] escapeChars = new byte[] { OrderedCode.ESCAPE1,
+        OrderedCode.NULL_CHARACTER, OrderedCode.SEPARATOR, OrderedCode.ESCAPE2,
+        OrderedCode.INFINITY, OrderedCode.FF_CHARACTER};
+
+    OrderedCode orderedCode = new OrderedCode();
+    orderedCode.writeBytes(first);
+    orderedCode.writeBytes(second);
+    orderedCode.writeBytes(last);
+    orderedCode.writeInfinity();
+    orderedCode.writeNumIncreasing(0);
+    orderedCode.writeNumIncreasing(1);
+    orderedCode.writeNumIncreasing(Long.MIN_VALUE);
+    orderedCode.writeNumIncreasing(Long.MAX_VALUE);
+    orderedCode.writeSignedNumIncreasing(0);
+    orderedCode.writeSignedNumIncreasing(1);
+    orderedCode.writeSignedNumIncreasing(Long.MIN_VALUE);
+    orderedCode.writeSignedNumIncreasing(Long.MAX_VALUE);
+    orderedCode.writeTrailingBytes(escapeChars);
+    byte[] allEncoded = orderedCode.getEncodedBytes();
+    assertArrayEquals(orderedCode.readBytes(), first);
+    assertArrayEquals(orderedCode.readBytes(), second);
+    assertFalse(orderedCode.readInfinity());
+    assertArrayEquals(orderedCode.readBytes(), last);
+    assertTrue(orderedCode.readInfinity());
+    assertEquals(orderedCode.readNumIncreasing(), 0);
+    assertEquals(orderedCode.readNumIncreasing(), 1);
+    assertFalse(orderedCode.readInfinity());
+    assertEquals(orderedCode.readNumIncreasing(), Long.MIN_VALUE);
+    assertEquals(orderedCode.readNumIncreasing(), Long.MAX_VALUE);
+    assertEquals(orderedCode.readSignedNumIncreasing(), 0);
+    assertEquals(orderedCode.readSignedNumIncreasing(), 1);
+    assertFalse(orderedCode.readInfinity());
+    assertEquals(orderedCode.readSignedNumIncreasing(), Long.MIN_VALUE);
+    assertEquals(orderedCode.readSignedNumIncreasing(), Long.MAX_VALUE);
+    assertArrayEquals(orderedCode.getEncodedBytes(), escapeChars);
+    assertArrayEquals(orderedCode.readTrailingBytes(), escapeChars);
+
+    orderedCode = new OrderedCode(allEncoded);
+    assertArrayEquals(orderedCode.readBytes(), first);
+    assertArrayEquals(orderedCode.readBytes(), second);
+    assertFalse(orderedCode.readInfinity());
+    assertArrayEquals(orderedCode.readBytes(), last);
+    assertTrue(orderedCode.readInfinity());
+    assertEquals(orderedCode.readNumIncreasing(), 0);
+    assertEquals(orderedCode.readNumIncreasing(), 1);
+    assertFalse(orderedCode.readInfinity());
+    assertEquals(orderedCode.readNumIncreasing(), Long.MIN_VALUE);
+    assertEquals(orderedCode.readNumIncreasing(), Long.MAX_VALUE);
+    assertEquals(orderedCode.readSignedNumIncreasing(), 0);
+    assertEquals(orderedCode.readSignedNumIncreasing(), 1);
+    assertFalse(orderedCode.readInfinity());
+    assertEquals(orderedCode.readSignedNumIncreasing(), Long.MIN_VALUE);
+    assertEquals(orderedCode.readSignedNumIncreasing(), Long.MAX_VALUE);
+    assertArrayEquals(orderedCode.getEncodedBytes(), escapeChars);
+    assertArrayEquals(orderedCode.readTrailingBytes(), escapeChars);
+  }
+
+  @Test
+  public void testEdgeCases() {
+    byte[] ffChar = {OrderedCode.FF_CHARACTER};
+    byte[] nullChar = {OrderedCode.NULL_CHARACTER};
+
+    byte[] separatorEncoded = {OrderedCode.ESCAPE1, OrderedCode.SEPARATOR};
+    byte[] ffCharEncoded = {OrderedCode.ESCAPE1, OrderedCode.NULL_CHARACTER};
+    byte[] nullCharEncoded = {OrderedCode.ESCAPE2, OrderedCode.FF_CHARACTER};
+    byte[] infinityEncoded  = {OrderedCode.ESCAPE2, OrderedCode.INFINITY};
+
+    OrderedCode orderedCode = new OrderedCode();
+    orderedCode.writeBytes(ffChar);
+    orderedCode.writeBytes(nullChar);
+    orderedCode.writeInfinity();
+    assertArrayEquals(orderedCode.getEncodedBytes(),
+        Bytes.concat(ffCharEncoded, separatorEncoded,
+            nullCharEncoded, separatorEncoded,
+            infinityEncoded));
+    assertArrayEquals(orderedCode.readBytes(), ffChar);
+    assertArrayEquals(orderedCode.readBytes(), nullChar);
+    assertTrue(orderedCode.readInfinity());
+
+    orderedCode = new OrderedCode(
+        Bytes.concat(ffCharEncoded, separatorEncoded));
+    assertArrayEquals(orderedCode.readBytes(), ffChar);
+
+    orderedCode = new OrderedCode(
+        Bytes.concat(nullCharEncoded, separatorEncoded));
+    assertArrayEquals(orderedCode.readBytes(), nullChar);
+
+    byte[] invalidEncodingForRead = {OrderedCode.ESCAPE2, OrderedCode.ESCAPE2,
+        OrderedCode.ESCAPE1, OrderedCode.SEPARATOR};
+    orderedCode = new OrderedCode(invalidEncodingForRead);
+    try {
+      orderedCode.readBytes();
+      fail("Should have failed.");
+    } catch (Exception e) {
+      // Expected
+    }
+    assertTrue(orderedCode.hasRemainingEncodedBytes());
+  }
+
+  @Test
+  public void testHasRemainingEncodedBytes() {
+    byte[] bytes = { 'a', 'b', 'c'};
+    long number = 12345;
+
+    // Empty
+    OrderedCode orderedCode = new OrderedCode();
+    assertFalse(orderedCode.hasRemainingEncodedBytes());
+
+    // First and only field of each type.
+    orderedCode.writeBytes(bytes);
+    assertTrue(orderedCode.hasRemainingEncodedBytes());
+    assertArrayEquals(orderedCode.readBytes(), bytes);
+    assertFalse(orderedCode.hasRemainingEncodedBytes());
+
+    orderedCode.writeNumIncreasing(number);
+    assertTrue(orderedCode.hasRemainingEncodedBytes());
+    assertEquals(orderedCode.readNumIncreasing(), number);
+    assertFalse(orderedCode.hasRemainingEncodedBytes());
+
+    orderedCode.writeSignedNumIncreasing(number);
+    assertTrue(orderedCode.hasRemainingEncodedBytes());
+    assertEquals(orderedCode.readSignedNumIncreasing(), number);
+    assertFalse(orderedCode.hasRemainingEncodedBytes());
+
+    orderedCode.writeInfinity();
+    assertTrue(orderedCode.hasRemainingEncodedBytes());
+    assertTrue(orderedCode.readInfinity());
+    assertFalse(orderedCode.hasRemainingEncodedBytes());
+
+    orderedCode.writeTrailingBytes(bytes);
+    assertTrue(orderedCode.hasRemainingEncodedBytes());
+    assertArrayEquals(orderedCode.readTrailingBytes(), bytes);
+    assertFalse(orderedCode.hasRemainingEncodedBytes());
+
+    // Two fields of same type.
+    orderedCode.writeBytes(bytes);
+    orderedCode.writeBytes(bytes);
+    assertTrue(orderedCode.hasRemainingEncodedBytes());
+    assertArrayEquals(orderedCode.readBytes(), bytes);
+    assertArrayEquals(orderedCode.readBytes(), bytes);
+    assertFalse(orderedCode.hasRemainingEncodedBytes());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
new file mode 100644
index 0000000000000..05a3864d9bd4d
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+
+import com.google.api.services.dataflow.model.MultiOutputInfo;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.util.StringUtils;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+
+import org.hamcrest.CoreMatchers;
+import org.hamcrest.core.IsInstanceOf;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests for ParDoFnFactory.
+ */
+@RunWith(JUnit4.class)
+public class ParDoFnFactoryTest {
+  static class TestDoFn extends DoFn<Integer, String> {
+    final String stringState;
+    final long longState;
+
+    TestDoFn(String stringState, long longState) {
+      this.stringState = stringState;
+      this.longState = longState;
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      throw new RuntimeException("not expecting to call this");
+    }
+  }
+
+  @Test
+  public void testCreateNormalParDoFn() throws Exception {
+    String stringState = "some state";
+    long longState = 42L;
+
+    TestDoFn fn = new TestDoFn(stringState, longState);
+
+    String serializedFn =
+        StringUtils.byteArrayToJsonString(
+            SerializableUtils.serializeToByteArray(fn));
+
+    CloudObject cloudUserFn = CloudObject.forClassName("DoFn");
+    addString(cloudUserFn, "serialized_fn", serializedFn);
+
+    String tag = "output";
+    MultiOutputInfo multiOutputInfo = new MultiOutputInfo();
+    multiOutputInfo.setTag(tag);
+    List<MultiOutputInfo> multiOutputInfos =
+        Arrays.asList(multiOutputInfo);
+
+    BatchModeExecutionContext context = new BatchModeExecutionContext();
+    CounterSet counters = new CounterSet();
+    StateSampler stateSampler = new StateSampler(
+        "test", counters.getAddCounterMutator());
+    ParDoFn parDoFn = ParDoFnFactory.create(
+        PipelineOptionsFactory.create(),
+        cloudUserFn, "name", null, multiOutputInfos, 1,
+        context, counters.getAddCounterMutator(), stateSampler);
+
+    Assert.assertThat(parDoFn, new IsInstanceOf(NormalParDoFn.class));
+    NormalParDoFn normalParDoFn = (NormalParDoFn) parDoFn;
+
+    DoFn actualDoFn = normalParDoFn.fn;
+    Assert.assertThat(actualDoFn, new IsInstanceOf(TestDoFn.class));
+    TestDoFn actualTestDoFn = (TestDoFn) actualDoFn;
+
+    Assert.assertEquals(stringState, actualTestDoFn.stringState);
+    Assert.assertEquals(longState, actualTestDoFn.longState);
+
+    Assert.assertEquals(context, normalParDoFn.executionContext);
+  }
+
+  @Test
+  public void testCreateUnknownParDoFn() throws Exception {
+    CloudObject cloudUserFn = CloudObject.forClassName("UnknownKindOfDoFn");
+    try {
+      CounterSet counters = new CounterSet();
+      StateSampler stateSampler = new StateSampler(
+          "test", counters.getAddCounterMutator());
+      ParDoFnFactory.create(PipelineOptionsFactory.create(),
+                            cloudUserFn, "name", null, null, 1,
+                            new BatchModeExecutionContext(),
+                            counters.getAddCounterMutator(),
+                            stateSampler);
+      Assert.fail("should have thrown an exception");
+    } catch (Exception exn) {
+      Assert.assertThat(exn.toString(),
+                        CoreMatchers.containsString(
+                            "unable to create a ParDoFn"));
+    }
+  }
+
+  // TODO: Test side inputs.
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSourceTest.java
new file mode 100644
index 0000000000000..be8c972c5944a
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSourceTest.java
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.collect.Lists;
+
+import org.joda.time.Instant;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * Tests for PartitioningShuffleSource.
+ */
+@RunWith(JUnit4.class)
+public class PartitioningShuffleSourceTest {
+  static final List<WindowedValue<KV<Integer, String>>> NO_KVS = Collections.emptyList();
+
+  static final Instant timestamp = new Instant(123000);
+  static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
+
+  static final List<WindowedValue<KV<Integer, String>>> KVS = Arrays.asList(
+      WindowedValue.of(KV.of(1, "in 1a"), timestamp, Lists.newArrayList(window)),
+      WindowedValue.of(KV.of(1, "in 1b"), timestamp, Lists.newArrayList(window)),
+      WindowedValue.of(KV.of(2, "in 2a"), timestamp, Lists.newArrayList(window)),
+      WindowedValue.of(KV.of(2, "in 2b"), timestamp, Lists.newArrayList(window)),
+      WindowedValue.of(KV.of(3, "in 3"), timestamp, Lists.newArrayList(window)),
+      WindowedValue.of(KV.of(4, "in 4a"), timestamp, Lists.newArrayList(window)),
+      WindowedValue.of(KV.of(4, "in 4b"), timestamp, Lists.newArrayList(window)),
+      WindowedValue.of(KV.of(4, "in 4c"), timestamp, Lists.newArrayList(window)),
+      WindowedValue.of(KV.of(4, "in 4d"), timestamp, Lists.newArrayList(window)),
+      WindowedValue.of(KV.of(5, "in 5"), timestamp, Lists.newArrayList(window)));
+
+  void runTestReadShuffleSource(List<WindowedValue<KV<Integer, String>>> expected)
+      throws Exception {
+    Coder<WindowedValue<KV<Integer, String>>> elemCoder = WindowedValue.getFullCoder(
+            KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()),
+            IntervalWindow.getCoder());
+
+    // Write to shuffle with PARTITION_KEYS ShuffleSink.
+    ShuffleSink<KV<Integer, String>> shuffleSink = new ShuffleSink<>(
+        PipelineOptionsFactory.create(),
+        null, ShuffleSink.ShuffleKind.PARTITION_KEYS,
+        elemCoder);
+
+    TestShuffleWriter shuffleWriter = new TestShuffleWriter();
+
+    List<Long> actualSizes = new ArrayList<>();
+    try (Sink.SinkWriter<WindowedValue<KV<Integer, String>>> shuffleSinkWriter =
+             shuffleSink.writer(shuffleWriter)) {
+      for (WindowedValue<KV<Integer, String>> value : expected) {
+        actualSizes.add(shuffleSinkWriter.add(value));
+      }
+    }
+    List<ShuffleEntry> records = shuffleWriter.getRecords();
+    Assert.assertEquals(expected.size(), records.size());
+    Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
+
+    // Read from shuffle with PartitioningShuffleSource.
+    PartitioningShuffleSource<Integer, String> shuffleSource =
+        new PartitioningShuffleSource<>(
+            PipelineOptionsFactory.create(),
+            null, null, null,
+            elemCoder);
+    ExecutorTestUtils.TestSourceObserver observer =
+        new ExecutorTestUtils.TestSourceObserver(shuffleSource);
+
+    TestShuffleReader shuffleReader = new TestShuffleReader();
+    List<Integer> expectedSizes = new ArrayList<>();
+    for (ShuffleEntry record : records) {
+      expectedSizes.add(record.length());
+      shuffleReader.addEntry(record);
+    }
+
+    List<WindowedValue<KV<Integer, String>>> actual = new ArrayList<>();
+    try (Source.SourceIterator<WindowedValue<KV<Integer, String>>> iter =
+             shuffleSource.iterator(shuffleReader)) {
+      while (iter.hasNext()) {
+        Assert.assertTrue(iter.hasNext());
+        actual.add(iter.next());
+      }
+      Assert.assertFalse(iter.hasNext());
+      try {
+        iter.next();
+        Assert.fail("should have failed");
+      } catch (NoSuchElementException exn) {
+        // As expected.
+      }
+    }
+
+    Assert.assertEquals(expected, actual);
+    Assert.assertEquals(expectedSizes, observer.getActualSizes());
+  }
+
+  @Test
+  public void testReadEmptyShuffleSource() throws Exception {
+    runTestReadShuffleSource(NO_KVS);
+  }
+
+  @Test
+  public void testReadNonEmptyShuffleSource() throws Exception {
+    runTestReadShuffleSource(KVS);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java
new file mode 100644
index 0000000000000..4b8901af34b57
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.api.client.util.Base64.encodeBase64String;
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+
+import org.hamcrest.core.IsInstanceOf;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for ShuffleSinkFactory.
+ */
+@RunWith(JUnit4.class)
+public class ShuffleSinkFactoryTest {
+  ShuffleSink runTestCreateShuffleSinkHelper(byte[] shuffleWriterConfig,
+                                             String shuffleKind,
+                                             CloudObject encoding,
+                                             FullWindowedValueCoder<?> coder)
+      throws Exception {
+    CloudObject spec = CloudObject.forClassName("ShuffleSink");
+    addString(spec, "shuffle_writer_config", encodeBase64String(shuffleWriterConfig));
+    addString(spec, "shuffle_kind", shuffleKind);
+
+    com.google.api.services.dataflow.model.Sink cloudSink =
+        new com.google.api.services.dataflow.model.Sink();
+    cloudSink.setSpec(spec);
+    cloudSink.setCodec(encoding);
+
+    Sink<?> sink = SinkFactory.create(PipelineOptionsFactory.create(),
+                                      cloudSink,
+                                      new BatchModeExecutionContext());
+    Assert.assertThat(sink, new IsInstanceOf(ShuffleSink.class));
+    ShuffleSink shuffleSink = (ShuffleSink) sink;
+    Assert.assertArrayEquals(shuffleWriterConfig,
+                             shuffleSink.shuffleWriterConfig);
+    Assert.assertEquals(coder, shuffleSink.windowedElemCoder);
+    return shuffleSink;
+  }
+
+  void runTestCreateUngroupingShuffleSink(byte[] shuffleWriterConfig,
+                                          CloudObject encoding,
+                                          FullWindowedValueCoder<?> coder)
+      throws Exception {
+    ShuffleSink shuffleSink = runTestCreateShuffleSinkHelper(
+        shuffleWriterConfig, "ungrouped", encoding, coder);
+    Assert.assertEquals(ShuffleSink.ShuffleKind.UNGROUPED,
+                        shuffleSink.shuffleKind);
+    Assert.assertFalse(shuffleSink.shardByKey);
+    Assert.assertFalse(shuffleSink.groupValues);
+    Assert.assertFalse(shuffleSink.sortValues);
+    Assert.assertNull(shuffleSink.keyCoder);
+    Assert.assertNull(shuffleSink.valueCoder);
+    Assert.assertNull(shuffleSink.sortKeyCoder);
+    Assert.assertNull(shuffleSink.sortValueCoder);
+  }
+
+  void runTestCreatePartitioningShuffleSink(byte[] shuffleWriterConfig,
+                                            Coder<?> keyCoder,
+                                            Coder<?> valueCoder)
+      throws Exception {
+    FullWindowedValueCoder<?> coder = (FullWindowedValueCoder<?>) WindowedValue.getFullCoder(
+        KvCoder.of(keyCoder, valueCoder), IntervalWindow.getCoder());
+    ShuffleSink shuffleSink = runTestCreateShuffleSinkHelper(
+        shuffleWriterConfig, "partition_keys", coder.asCloudObject(), coder);
+    Assert.assertEquals(ShuffleSink.ShuffleKind.PARTITION_KEYS,
+                        shuffleSink.shuffleKind);
+    Assert.assertTrue(shuffleSink.shardByKey);
+    Assert.assertFalse(shuffleSink.groupValues);
+    Assert.assertFalse(shuffleSink.sortValues);
+    Assert.assertEquals(keyCoder, shuffleSink.keyCoder);
+    Assert.assertEquals(valueCoder, shuffleSink.valueCoder);
+    Assert.assertEquals(FullWindowedValueCoder.of(valueCoder,
+                                                  IntervalWindow.getCoder()),
+                        shuffleSink.windowedValueCoder);
+    Assert.assertNull(shuffleSink.sortKeyCoder);
+    Assert.assertNull(shuffleSink.sortValueCoder);
+  }
+
+  void runTestCreateGroupingShuffleSink(byte[] shuffleWriterConfig,
+                                        Coder<?> keyCoder,
+                                        Coder<?> valueCoder)
+      throws Exception {
+    FullWindowedValueCoder<?> coder = (FullWindowedValueCoder<?>) WindowedValue.getFullCoder(
+        KvCoder.of(keyCoder, valueCoder), IntervalWindow.getCoder());
+    ShuffleSink shuffleSink = runTestCreateShuffleSinkHelper(
+        shuffleWriterConfig, "group_keys", coder.asCloudObject(), coder);
+    Assert.assertEquals(ShuffleSink.ShuffleKind.GROUP_KEYS,
+                        shuffleSink.shuffleKind);
+    Assert.assertTrue(shuffleSink.shardByKey);
+    Assert.assertTrue(shuffleSink.groupValues);
+    Assert.assertFalse(shuffleSink.sortValues);
+    Assert.assertEquals(keyCoder, shuffleSink.keyCoder);
+    Assert.assertEquals(valueCoder, shuffleSink.valueCoder);
+    Assert.assertNull(shuffleSink.windowedValueCoder);
+    Assert.assertNull(shuffleSink.sortKeyCoder);
+    Assert.assertNull(shuffleSink.sortValueCoder);
+  }
+
+  void runTestCreateGroupingSortingShuffleSink(byte[] shuffleWriterConfig,
+                                               Coder<?> keyCoder,
+                                               Coder<?> sortKeyCoder,
+                                               Coder<?> sortValueCoder)
+      throws Exception {
+    FullWindowedValueCoder<?> coder = (FullWindowedValueCoder<?>) WindowedValue.getFullCoder(
+        KvCoder.of(keyCoder, KvCoder.of(sortKeyCoder, sortValueCoder)),
+        IntervalWindow.getCoder());
+    ShuffleSink shuffleSink = runTestCreateShuffleSinkHelper(
+        shuffleWriterConfig, "group_keys_and_sort_values", coder.asCloudObject(), coder);
+    Assert.assertEquals(ShuffleSink.ShuffleKind.GROUP_KEYS_AND_SORT_VALUES,
+                        shuffleSink.shuffleKind);
+    Assert.assertTrue(shuffleSink.shardByKey);
+    Assert.assertTrue(shuffleSink.groupValues);
+    Assert.assertTrue(shuffleSink.sortValues);
+    Assert.assertEquals(keyCoder, shuffleSink.keyCoder);
+    Assert.assertEquals(KvCoder.of(sortKeyCoder, sortValueCoder),
+                        shuffleSink.valueCoder);
+    Assert.assertEquals(sortKeyCoder, shuffleSink.sortKeyCoder);
+    Assert.assertEquals(sortValueCoder, shuffleSink.sortValueCoder);
+    Assert.assertNull(shuffleSink.windowedValueCoder);
+  }
+
+  @Test
+  public void testCreateUngroupingShuffleSink() throws Exception {
+    FullWindowedValueCoder<?> coder = (FullWindowedValueCoder<?>) WindowedValue.getFullCoder(
+        StringUtf8Coder.of(), IntervalWindow.getCoder());
+    runTestCreateUngroupingShuffleSink(
+        new byte[]{(byte) 0xE1},
+        coder.asCloudObject(),
+        coder);
+  }
+
+  @Test
+  public void testCreatePartitionShuffleSink() throws Exception {
+    runTestCreatePartitioningShuffleSink(
+        new byte[]{(byte) 0xE2},
+        BigEndianIntegerCoder.of(),
+        StringUtf8Coder.of());
+  }
+
+  @Test
+  public void testCreateGroupingShuffleSink() throws Exception {
+    runTestCreateGroupingShuffleSink(
+        new byte[]{(byte) 0xE2},
+        BigEndianIntegerCoder.of(),
+        WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder()));
+  }
+
+  @Test
+  public void testCreateGroupingSortingShuffleSink() throws Exception {
+    runTestCreateGroupingSortingShuffleSink(
+        new byte[]{(byte) 0xE3},
+        BigEndianIntegerCoder.of(),
+        StringUtf8Coder.of(),
+        VoidCoder.of());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
new file mode 100644
index 0000000000000..3e390b8966afa
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
@@ -0,0 +1,236 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.TestUtils;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.InstantCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink.SinkWriter;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.collect.Lists;
+
+import org.joda.time.Instant;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Tests for ShuffleSink.
+ */
+@RunWith(JUnit4.class)
+public class ShuffleSinkTest {
+  static final List<KV<Integer, String>> NO_KVS = Collections.emptyList();
+
+  static final List<KV<Integer, String>> KVS = Arrays.asList(
+      KV.of(1, "in 1a"),
+      KV.of(1, "in 1b"),
+      KV.of(2, "in 2a"),
+      KV.of(2, "in 2b"),
+      KV.of(3, "in 3"),
+      KV.of(4, "in 4a"),
+      KV.of(4, "in 4b"),
+      KV.of(4, "in 4c"),
+      KV.of(4, "in 4d"),
+      KV.of(5, "in 5"));
+
+  static final List<KV<Integer, KV<String, Integer>>> NO_SORTING_KVS =
+      Collections.emptyList();
+
+  static final List<KV<Integer, KV<String, Integer>>> SORTING_KVS =
+      Arrays.asList(
+          KV.of(1, KV.of("in 1a", 3)),
+          KV.of(1, KV.of("in 1b", 9)),
+          KV.of(2, KV.of("in 2a", 2)),
+          KV.of(2, KV.of("in 2b", 77)),
+          KV.of(3, KV.of("in 3", 33)),
+          KV.of(4, KV.of("in 4a", -123)),
+          KV.of(4, KV.of("in 4b", 0)),
+          KV.of(4, KV.of("in 4c", -1)),
+          KV.of(4, KV.of("in 4d", 1)),
+          KV.of(5, KV.of("in 5", 666)));
+
+  static final Instant timestamp = new Instant(123000);
+  static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
+
+  void runTestWriteUngroupingShuffleSink(List<Integer> expected)
+      throws Exception {
+    Coder<WindowedValue<Integer>> windowedValueCoder =
+        WindowedValue.getFullCoder(BigEndianIntegerCoder.of(), new GlobalWindow().windowCoder());
+    ShuffleSink<Integer> shuffleSink = new ShuffleSink<>(
+        PipelineOptionsFactory.create(),
+        null, ShuffleSink.ShuffleKind.UNGROUPED,
+        windowedValueCoder);
+
+    TestShuffleWriter shuffleWriter = new TestShuffleWriter();
+    List<Long> actualSizes = new ArrayList<>();
+    try (Sink.SinkWriter<WindowedValue<Integer>> shuffleSinkWriter =
+             shuffleSink.writer(shuffleWriter)) {
+      for (Integer value : expected) {
+        actualSizes.add(shuffleSinkWriter.add(WindowedValue.valueInGlobalWindow(value)));
+      }
+    }
+
+    List<ShuffleEntry> records = shuffleWriter.getRecords();
+
+    List<Integer> actual = new ArrayList<>();
+    for (ShuffleEntry record : records) {
+      // Ignore the key.
+      byte[] valueBytes = record.getValue();
+      WindowedValue<Integer> value = CoderUtils.decodeFromByteArray(windowedValueCoder, valueBytes);
+      Assert.assertEquals(Lists.newArrayList(GlobalWindow.Window.INSTANCE), value.getWindows());
+      actual.add(value.getValue());
+    }
+
+    Assert.assertEquals(expected, actual);
+    Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
+  }
+
+  void runTestWriteGroupingShuffleSink(
+      List<KV<Integer, String>> expected)
+      throws Exception {
+    ShuffleSink<KV<Integer, String>> shuffleSink = new ShuffleSink<>(
+        PipelineOptionsFactory.create(),
+        null, ShuffleSink.ShuffleKind.GROUP_KEYS,
+        WindowedValue.getFullCoder(
+            KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()),
+            IntervalWindow.getCoder()));
+
+    TestShuffleWriter shuffleWriter = new TestShuffleWriter();
+    List<Long> actualSizes = new ArrayList<>();
+    try (SinkWriter<WindowedValue<KV<Integer, String>>> shuffleSinkWriter =
+             shuffleSink.writer(shuffleWriter)) {
+      for (KV<Integer, String> kv : expected) {
+        actualSizes.add(shuffleSinkWriter.add(
+            WindowedValue.of(KV.of(kv.getKey(), kv.getValue()),
+                             timestamp,
+                             Lists.newArrayList(window))));
+      }
+    }
+
+    List<ShuffleEntry> records = shuffleWriter.getRecords();
+
+    List<KV<Integer, String>> actual = new ArrayList<>();
+    for (ShuffleEntry record : records) {
+      byte[] keyBytes = record.getKey();
+      byte[] valueBytes = record.getValue();
+      Assert.assertEquals(timestamp,
+          CoderUtils.decodeFromByteArray(InstantCoder.of(), record.getSecondaryKey()));
+
+      Integer key =
+          CoderUtils.decodeFromByteArray(BigEndianIntegerCoder.of(),
+                                         keyBytes);
+      String valueElem = CoderUtils.decodeFromByteArray(StringUtf8Coder.of(), valueBytes);
+
+      actual.add(KV.of(key, valueElem));
+    }
+
+    Assert.assertEquals(expected, actual);
+    Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
+  }
+
+  void runTestWriteGroupingSortingShuffleSink(
+      List<KV<Integer, KV<String, Integer>>> expected)
+      throws Exception {
+    ShuffleSink<KV<Integer, KV<String, Integer>>> shuffleSink =
+        new ShuffleSink<>(
+            PipelineOptionsFactory.create(),
+            null,
+            ShuffleSink.ShuffleKind.GROUP_KEYS_AND_SORT_VALUES,
+            WindowedValue.getFullCoder(
+                KvCoder.of(BigEndianIntegerCoder.of(),
+                           KvCoder.of(StringUtf8Coder.of(),
+                                      BigEndianIntegerCoder.of())),
+                new GlobalWindow().windowCoder()));
+
+    TestShuffleWriter shuffleWriter = new TestShuffleWriter();
+    List<Long> actualSizes = new ArrayList<>();
+    try (Sink.SinkWriter<WindowedValue<KV<Integer, KV<String, Integer>>>> shuffleSinkWriter =
+             shuffleSink.writer(shuffleWriter)) {
+      for (KV<Integer, KV<String, Integer>> kv : expected) {
+        actualSizes.add(shuffleSinkWriter.add(WindowedValue.valueInGlobalWindow(kv)));
+      }
+    }
+
+    List<ShuffleEntry> records = shuffleWriter.getRecords();
+
+    List<KV<Integer, KV<String, Integer>>> actual = new ArrayList<>();
+    for (ShuffleEntry record : records) {
+      byte[] keyBytes = record.getKey();
+      byte[] valueBytes = record.getValue();
+      byte[] sortKeyBytes = record.getSecondaryKey();
+
+      Integer key =
+          CoderUtils.decodeFromByteArray(BigEndianIntegerCoder.of(),
+                                         keyBytes);
+      String sortKey =
+          CoderUtils.decodeFromByteArray(StringUtf8Coder.of(),
+                                         sortKeyBytes);
+      Integer sortValue = CoderUtils.decodeFromByteArray(BigEndianIntegerCoder.of(), valueBytes);
+
+      actual.add(KV.of(key, KV.of(sortKey, sortValue)));
+    }
+
+    Assert.assertEquals(expected, actual);
+    Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
+  }
+
+  @Test
+  public void testWriteEmptyUngroupingShuffleSink() throws Exception {
+    runTestWriteUngroupingShuffleSink(TestUtils.NO_INTS);
+  }
+
+  @Test
+  public void testWriteNonEmptyUngroupingShuffleSink() throws Exception {
+    runTestWriteUngroupingShuffleSink(TestUtils.INTS);
+  }
+
+  @Test
+  public void testWriteEmptyGroupingShuffleSink() throws Exception {
+    runTestWriteGroupingShuffleSink(NO_KVS);
+  }
+
+  @Test
+  public void testWriteNonEmptyGroupingShuffleSink() throws Exception {
+    runTestWriteGroupingShuffleSink(KVS);
+  }
+
+  @Test
+  public void testWriteEmptyGroupingSortingShuffleSink() throws Exception {
+    runTestWriteGroupingSortingShuffleSink(NO_SORTING_KVS);
+  }
+
+  @Test
+  public void testWriteNonEmptyGroupingSortingShuffleSink() throws Exception {
+    runTestWriteGroupingSortingShuffleSink(SORTING_KVS);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSourceFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSourceFactoryTest.java
new file mode 100644
index 0000000000000..75fc7479687e7
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSourceFactoryTest.java
@@ -0,0 +1,230 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.api.client.util.Base64.encodeBase64String;
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+import org.hamcrest.core.IsInstanceOf;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import javax.annotation.Nullable;
+
+/**
+ * Tests for UngroupedShuffleSourceFactory, GroupingShuffleSourceFactory,
+ * and PartitioningShuffleSourceFactory.
+ */
+@RunWith(JUnit4.class)
+public class ShuffleSourceFactoryTest {
+  <T extends Source>
+  T runTestCreateShuffleSource(byte[] shuffleReaderConfig,
+                               @Nullable String start,
+                               @Nullable String end,
+                               CloudObject encoding,
+                               BatchModeExecutionContext context,
+                               Class<T> shuffleSourceClass)
+      throws Exception {
+    CloudObject spec = CloudObject.forClassName(shuffleSourceClass.getSimpleName());
+    addString(spec, "shuffle_reader_config", encodeBase64String(shuffleReaderConfig));
+    if (start != null) {
+      addString(spec, "start_shuffle_position", start);
+    }
+    if (end != null) {
+      addString(spec, "end_shuffle_position", end);
+    }
+
+    com.google.api.services.dataflow.model.Source cloudSource =
+        new com.google.api.services.dataflow.model.Source();
+    cloudSource.setSpec(spec);
+    cloudSource.setCodec(encoding);
+
+    Source<?> source = SourceFactory.create(
+        PipelineOptionsFactory.create(), cloudSource, context);
+    Assert.assertThat(source, new IsInstanceOf(shuffleSourceClass));
+    T shuffleSource = (T) source;
+    return shuffleSource;
+  }
+
+  void runTestCreateUngroupedShuffleSource(byte[] shuffleReaderConfig,
+                                           @Nullable String start,
+                                           @Nullable String end,
+                                           CloudObject encoding,
+                                           Coder<?> coder) throws Exception {
+    UngroupedShuffleSource shuffleSource =
+        runTestCreateShuffleSource(shuffleReaderConfig,
+                                   start,
+                                   end,
+                                   encoding,
+                                   new BatchModeExecutionContext(),
+                                   UngroupedShuffleSource.class);
+    Assert.assertArrayEquals(shuffleReaderConfig,
+                             shuffleSource.shuffleReaderConfig);
+    Assert.assertEquals(start, shuffleSource.startShufflePosition);
+    Assert.assertEquals(end, shuffleSource.stopShufflePosition);
+
+    Assert.assertEquals(coder, shuffleSource.coder);
+  }
+
+  void runTestCreateGroupingShuffleSource(byte[] shuffleReaderConfig,
+                                          @Nullable String start,
+                                          @Nullable String end,
+                                          CloudObject encoding,
+                                          Coder<?> keyCoder,
+                                          Coder<?> valueCoder) throws Exception {
+    BatchModeExecutionContext context = new BatchModeExecutionContext();
+    GroupingShuffleSource shuffleSource =
+        runTestCreateShuffleSource(shuffleReaderConfig,
+                                   start,
+                                   end,
+                                   encoding,
+                                   context,
+                                   GroupingShuffleSource.class);
+    Assert.assertArrayEquals(shuffleReaderConfig,
+                             shuffleSource.shuffleReaderConfig);
+    Assert.assertEquals(start, shuffleSource.startShufflePosition);
+    Assert.assertEquals(end, shuffleSource.stopShufflePosition);
+
+    Assert.assertEquals(keyCoder, shuffleSource.keyCoder);
+    Assert.assertEquals(valueCoder, shuffleSource.valueCoder);
+    Assert.assertEquals(context, shuffleSource.executionContext);
+  }
+
+  void runTestCreatePartitioningShuffleSource(byte[] shuffleReaderConfig,
+                                              @Nullable String start,
+                                              @Nullable String end,
+                                              CloudObject encoding,
+                                              Coder<?> keyCoder,
+                                              Coder<?> windowedValueCoder) throws Exception {
+    PartitioningShuffleSource shuffleSource =
+        runTestCreateShuffleSource(shuffleReaderConfig,
+                                   start,
+                                   end,
+                                   encoding,
+                                   new BatchModeExecutionContext(),
+                                   PartitioningShuffleSource.class);
+    Assert.assertArrayEquals(shuffleReaderConfig,
+                             shuffleSource.shuffleReaderConfig);
+    Assert.assertEquals(start, shuffleSource.startShufflePosition);
+    Assert.assertEquals(end, shuffleSource.stopShufflePosition);
+
+    Assert.assertEquals(keyCoder, shuffleSource.keyCoder);
+    Assert.assertEquals(windowedValueCoder, shuffleSource.windowedValueCoder);
+  }
+
+  @Test
+  public void testCreatePlainUngroupedShuffleSource() throws Exception {
+    runTestCreateUngroupedShuffleSource(
+        new byte[]{(byte) 0xE1}, null, null,
+        makeCloudEncoding("StringUtf8Coder"),
+        StringUtf8Coder.of());
+  }
+
+  @Test
+  public void testCreateRichUngroupedShuffleSource() throws Exception {
+    runTestCreateUngroupedShuffleSource(
+        new byte[]{(byte) 0xE2}, "aaa", "zzz",
+        makeCloudEncoding("BigEndianIntegerCoder"),
+        BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testCreatePlainGroupingShuffleSource() throws Exception {
+    runTestCreateGroupingShuffleSource(
+        new byte[]{(byte) 0xE1}, null, null,
+        makeCloudEncoding(
+            FullWindowedValueCoder.class.getName(),
+            makeCloudEncoding(
+                "KvCoder",
+                makeCloudEncoding("BigEndianIntegerCoder"),
+                makeCloudEncoding(
+                    "IterableCoder",
+                    makeCloudEncoding("StringUtf8Coder"))),
+            IntervalWindow.getCoder().asCloudObject()),
+        BigEndianIntegerCoder.of(),
+        StringUtf8Coder.of());
+  }
+
+  @Test
+  public void testCreateRichGroupingShuffleSource() throws Exception {
+    runTestCreateGroupingShuffleSource(
+        new byte[]{(byte) 0xE2}, "aaa", "zzz",
+        makeCloudEncoding(
+            FullWindowedValueCoder.class.getName(),
+            makeCloudEncoding(
+                "KvCoder",
+                makeCloudEncoding("BigEndianIntegerCoder"),
+                makeCloudEncoding(
+                    "IterableCoder",
+                    makeCloudEncoding(
+                        "KvCoder",
+                        makeCloudEncoding("StringUtf8Coder"),
+                        makeCloudEncoding("VoidCoder")))),
+            IntervalWindow.getCoder().asCloudObject()),
+        BigEndianIntegerCoder.of(),
+        KvCoder.of(StringUtf8Coder.of(), VoidCoder.of()));
+  }
+
+  @Test
+  public void testCreatePlainPartitioningShuffleSource() throws Exception {
+    runTestCreatePartitioningShuffleSource(
+        new byte[]{(byte) 0xE1}, null, null,
+        makeCloudEncoding(
+            FullWindowedValueCoder.class.getName(),
+            makeCloudEncoding(
+                "KvCoder",
+                makeCloudEncoding("BigEndianIntegerCoder"),
+                makeCloudEncoding("StringUtf8Coder")),
+            IntervalWindow.getCoder().asCloudObject()),
+        BigEndianIntegerCoder.of(),
+        FullWindowedValueCoder.of(StringUtf8Coder.of(), IntervalWindow.getCoder()));
+  }
+
+  @Test
+  public void testCreateRichPartitioningShuffleSource() throws Exception {
+    runTestCreatePartitioningShuffleSource(
+        new byte[]{(byte) 0xE2}, "aaa", "zzz",
+        makeCloudEncoding(
+            FullWindowedValueCoder.class.getName(),
+            makeCloudEncoding(
+                "KvCoder",
+                makeCloudEncoding("BigEndianIntegerCoder"),
+                makeCloudEncoding(
+                    "KvCoder",
+                    makeCloudEncoding("StringUtf8Coder"),
+                    makeCloudEncoding("VoidCoder"))),
+            IntervalWindow.getCoder().asCloudObject()),
+        BigEndianIntegerCoder.of(),
+        FullWindowedValueCoder.of(KvCoder.of(StringUtf8Coder.of(), VoidCoder.of()),
+                                  IntervalWindow.getCoder()));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java
new file mode 100644
index 0000000000000..ea879335ec02a
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static org.hamcrest.Matchers.emptyIterable;
+import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
+import static org.hamcrest.core.Is.is;
+import static org.hamcrest.core.IsInstanceOf.instanceOf;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+
+import com.google.api.services.dataflow.model.SideInputInfo;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests for SideInputUtils.
+ */
+@RunWith(JUnit4.class)
+public class SideInputUtilsTest {
+  SideInputInfo createSingletonSideInputInfo(
+      com.google.api.services.dataflow.model.Source sideInputSource) {
+    SideInputInfo sideInputInfo = new SideInputInfo();
+    sideInputInfo.setSources(Arrays.asList(sideInputSource));
+    sideInputInfo.setKind(CloudObject.forClassName("singleton"));
+    return sideInputInfo;
+  }
+
+  SideInputInfo createCollectionSideInputInfo(
+      com.google.api.services.dataflow.model.Source... sideInputSources) {
+    SideInputInfo sideInputInfo = new SideInputInfo();
+    sideInputInfo.setSources(Arrays.asList(sideInputSources));
+    sideInputInfo.setKind(CloudObject.forClassName("collection"));
+    return sideInputInfo;
+  }
+
+  com.google.api.services.dataflow.model.Source createSideInputSource(Integer... ints)
+      throws Exception {
+    return InMemorySourceFactoryTest.createInMemoryCloudSource(
+        Arrays.asList(ints),
+        null, null,
+        BigEndianIntegerCoder.of());
+  }
+
+  void assertThatContains(Object actual, Object... expected) {
+    assertThat(actual, instanceOf(Iterable.class));
+    Iterable<?> iter = (Iterable<?>) actual;
+    if (expected.length == 0) {
+      assertThat(iter, is(emptyIterable()));
+    } else {
+      assertThat(iter, contains(expected));
+    }
+  }
+
+  @Test
+  public void testReadSingletonSideInput() throws Exception {
+    SideInputInfo sideInputInfo =
+        createSingletonSideInputInfo(createSideInputSource(42));
+
+    assertEquals(42,
+                 SideInputUtils.readSideInput(PipelineOptionsFactory.create(),
+                                              sideInputInfo,
+                                              new BatchModeExecutionContext()));
+  }
+
+  @Test
+  public void testReadEmptyCollectionSideInput() throws Exception {
+    SideInputInfo sideInputInfo =
+        createCollectionSideInputInfo(createSideInputSource());
+
+    assertThatContains(
+        SideInputUtils.readSideInput(PipelineOptionsFactory.create(),
+                                     sideInputInfo,
+                                     new BatchModeExecutionContext()));
+  }
+
+  @Test
+  public void testReadCollectionSideInput() throws Exception {
+    SideInputInfo sideInputInfo =
+        createCollectionSideInputInfo(createSideInputSource(3, 4, 5, 6));
+
+    assertThatContains(
+        SideInputUtils.readSideInput(PipelineOptionsFactory.create(),
+                                     sideInputInfo,
+                                     new BatchModeExecutionContext()),
+        3, 4, 5, 6);
+  }
+
+  @Test
+  public void testReadCollectionShardedSideInput() throws Exception {
+    SideInputInfo sideInputInfo =
+        createCollectionSideInputInfo(
+            createSideInputSource(3),
+            createSideInputSource(),
+            createSideInputSource(4, 5),
+            createSideInputSource(6),
+            createSideInputSource());
+
+    assertThatContains(
+        SideInputUtils.readSideInput(PipelineOptionsFactory.create(),
+                                     sideInputInfo,
+                                     new BatchModeExecutionContext()),
+        3, 4, 5, 6);
+  }
+
+  @Test
+  public void testReadSingletonSideInputValue() throws Exception {
+    CloudObject sideInputKind = CloudObject.forClassName("singleton");
+    Object elem = "hi";
+    List<Object> elems = Arrays.asList(elem);
+    assertEquals(elem,
+                 SideInputUtils.readSideInputValue(sideInputKind, elems));
+  }
+
+  @Test
+  public void testReadCollectionSideInputValue() throws Exception {
+    CloudObject sideInputKind = CloudObject.forClassName("collection");
+    List<Object> elems = Arrays.<Object>asList("hi", "there", "bob");
+    assertEquals(elems,
+                 SideInputUtils.readSideInputValue(sideInputKind, elems));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactoryTest.java
new file mode 100644
index 0000000000000..66e72545cb717
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactoryTest.java
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+
+import org.hamcrest.CoreMatchers;
+import org.hamcrest.core.IsInstanceOf;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for SinkFactory.
+ */
+@RunWith(JUnit4.class)
+public class SinkFactoryTest {
+  static class TestSinkFactory {
+    public static TestSink create(PipelineOptions options,
+                                  CloudObject o,
+                                  Coder<Integer> coder,
+                                  ExecutionContext executionContext) {
+      return new TestSink();
+    }
+  }
+
+  static class TestSink extends Sink<Integer> {
+    @Override
+    public SinkWriter<Integer> writer() {
+      return new TestSinkWriter();
+    }
+
+    /** A sink writer that drops its input values, for testing. */
+    class TestSinkWriter implements SinkWriter<Integer> {
+      @Override
+      public long add(Integer outputElem) {
+        return 4;
+      }
+
+      @Override
+      public void close() {
+      }
+    }
+  }
+
+  @Test
+  public void testCreatePredefinedSink() throws Exception {
+    CloudObject spec = CloudObject.forClassName("TextSink");
+    addString(spec, "filename", "/path/to/file.txt");
+
+    com.google.api.services.dataflow.model.Sink cloudSink =
+        new com.google.api.services.dataflow.model.Sink();
+    cloudSink.setSpec(spec);
+    cloudSink.setCodec(makeCloudEncoding("StringUtf8Coder"));
+
+    Sink<?> sink = SinkFactory.create(PipelineOptionsFactory.create(),
+                                      cloudSink,
+                                      new BatchModeExecutionContext());
+    Assert.assertThat(sink, new IsInstanceOf(TextSink.class));
+  }
+
+  @Test
+  public void testCreateUserDefinedSink() throws Exception {
+    CloudObject spec = CloudObject.forClass(TestSinkFactory.class);
+
+    com.google.api.services.dataflow.model.Sink cloudSink =
+        new com.google.api.services.dataflow.model.Sink();
+    cloudSink.setSpec(spec);
+    cloudSink.setCodec(makeCloudEncoding("BigEndianIntegerCoder"));
+
+    Sink<?> sink = SinkFactory.create(PipelineOptionsFactory.create(),
+                                      cloudSink,
+                                      new BatchModeExecutionContext());
+    Assert.assertThat(sink, new IsInstanceOf(TestSink.class));
+  }
+
+  @Test
+  public void testCreateUnknownSink() throws Exception {
+    CloudObject spec = CloudObject.forClassName("UnknownSink");
+    com.google.api.services.dataflow.model.Sink cloudSink =
+        new com.google.api.services.dataflow.model.Sink();
+    cloudSink.setSpec(spec);
+    cloudSink.setCodec(makeCloudEncoding("StringUtf8Coder"));
+    try {
+      SinkFactory.create(PipelineOptionsFactory.create(),
+                         cloudSink,
+                         new BatchModeExecutionContext());
+      Assert.fail("should have thrown an exception");
+    } catch (Exception exn) {
+      Assert.assertThat(exn.toString(),
+                        CoreMatchers.containsString(
+                            "unable to create a sink"));
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFactoryTest.java
new file mode 100644
index 0000000000000..4b4665b55869d
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFactoryTest.java
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+import org.hamcrest.CoreMatchers;
+import org.hamcrest.core.IsInstanceOf;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.NoSuchElementException;
+
+/**
+ * Tests for SourceFactory.
+ */
+@RunWith(JUnit4.class)
+public class SourceFactoryTest {
+  static class TestSourceFactory {
+    public static TestSource create(PipelineOptions options,
+                                    CloudObject o,
+                                    Coder<Integer> coder,
+                                    ExecutionContext executionContext) {
+      return new TestSource();
+    }
+  }
+
+  static class TestSource extends Source<Integer> {
+    @Override
+    public SourceIterator<Integer> iterator() {
+      return new TestSourceIterator();
+    }
+
+    /** A source iterator that produces no values, for testing. */
+    class TestSourceIterator extends AbstractSourceIterator<Integer> {
+      @Override
+      public boolean hasNext() { return false; }
+
+      @Override
+      public Integer next() {
+        throw new NoSuchElementException();
+      }
+
+      @Override
+      public void close() {
+      }
+    }
+  }
+
+  @Test
+  public void testCreatePredefinedSource() throws Exception {
+    CloudObject spec = CloudObject.forClassName("TextSource");
+    addString(spec, "filename", "/path/to/file.txt");
+
+    com.google.api.services.dataflow.model.Source cloudSource =
+        new com.google.api.services.dataflow.model.Source();
+    cloudSource.setSpec(spec);
+    cloudSource.setCodec(makeCloudEncoding("StringUtf8Coder"));
+
+    Source<?> source = SourceFactory.create(PipelineOptionsFactory.create(),
+                                            cloudSource,
+                                            new BatchModeExecutionContext());
+    Assert.assertThat(source, new IsInstanceOf(TextSource.class));
+  }
+
+  @Test
+  public void testCreateUserDefinedSource() throws Exception {
+    CloudObject spec = CloudObject.forClass(TestSourceFactory.class);
+
+    com.google.api.services.dataflow.model.Source cloudSource =
+        new com.google.api.services.dataflow.model.Source();
+    cloudSource.setSpec(spec);
+    cloudSource.setCodec(makeCloudEncoding("BigEndianIntegerCoder"));
+
+    Source<?> source = SourceFactory.create(PipelineOptionsFactory.create(),
+                                            cloudSource,
+                                            new BatchModeExecutionContext());
+    Assert.assertThat(source, new IsInstanceOf(TestSource.class));
+  }
+
+  @Test
+  public void testCreateUnknownSource() throws Exception {
+    CloudObject spec = CloudObject.forClassName("UnknownSource");
+    com.google.api.services.dataflow.model.Source cloudSource =
+        new com.google.api.services.dataflow.model.Source();
+    cloudSource.setSpec(spec);
+    cloudSource.setCodec(makeCloudEncoding("StringUtf8Coder"));
+    try {
+      SourceFactory.create(PipelineOptionsFactory.create(),
+                           cloudSource,
+                           new BatchModeExecutionContext());
+      Assert.fail("should have thrown an exception");
+    } catch (Exception exn) {
+      Assert.assertThat(exn.toString(),
+                        CoreMatchers.containsString(
+                            "unable to create a source"));
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReader.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReader.java
new file mode 100644
index 0000000000000..4d5e85881be91
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReader.java
@@ -0,0 +1,177 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.util.common.Reiterator;
+import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
+import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
+import com.google.cloud.dataflow.sdk.util.common.worker.ShufflePosition;
+// TODO: Decide how we want to handle this Guava dependency.
+import com.google.common.primitives.UnsignedBytes;
+
+import org.junit.Assert;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.NoSuchElementException;
+import java.util.TreeMap;
+
+/**
+ * A fake implementation of a ShuffleEntryReader, for testing.
+ */
+public class TestShuffleReader implements ShuffleEntryReader {
+  static final Comparator<byte[]> SHUFFLE_KEY_COMPARATOR =
+      UnsignedBytes.lexicographicalComparator();
+  final NavigableMap<byte[], List<ShuffleEntry>> records;
+
+  public TestShuffleReader(NavigableMap<byte[], List<ShuffleEntry>> records) {
+    this.records = records;
+  }
+
+  public TestShuffleReader() {
+    this(new TreeMap<byte[], List<ShuffleEntry>>(SHUFFLE_KEY_COMPARATOR));
+  }
+
+  public void addEntry(String key, String value) {
+    addEntry(key.getBytes(), value.getBytes());
+  }
+
+  public void addEntry(byte[] key, byte[] value) {
+    addEntry(new ShuffleEntry(key, null, value));
+  }
+
+  public void addEntry(ShuffleEntry entry) {
+    List<ShuffleEntry> values = records.get(entry.getKey());
+    if (values == null) {
+      values = new ArrayList<>();
+      records.put(entry.getKey(), values);
+    }
+    values.add(entry);
+  }
+
+  public Iterator<ShuffleEntry> read() {
+    return read((byte[]) null, (byte[]) null);
+  }
+
+  @Override
+  public Reiterator<ShuffleEntry> read(ShufflePosition startPosition,
+                                       ShufflePosition endPosition) {
+    return read(ByteArrayShufflePosition.getPosition(startPosition),
+                ByteArrayShufflePosition.getPosition(endPosition));
+  }
+
+  public Reiterator<ShuffleEntry> read(String startKey, String endKey) {
+    return read(startKey == null ? null : startKey.getBytes(),
+                endKey == null ? null : endKey.getBytes());
+  }
+
+  public Reiterator<ShuffleEntry>read(byte[] startKey, byte[] endKey) {
+    return new ShuffleReaderIterator(startKey, endKey);
+  }
+
+  class ShuffleReaderIterator implements Reiterator<ShuffleEntry> {
+    final Iterator<Map.Entry<byte[], List<ShuffleEntry>>> recordsIter;
+    final byte[] startKey;
+    final byte[] endKey;
+    byte[] currentKey;
+    Map.Entry<byte[], List<ShuffleEntry>> currentRecord;
+    ListIterator<ShuffleEntry> currentValuesIter;
+
+    public ShuffleReaderIterator(byte[] startKey, byte[] endKey) {
+      this.recordsIter = records.entrySet().iterator();
+      this.startKey = startKey;
+      this.endKey = endKey;
+      advanceKey();
+    }
+
+    private ShuffleReaderIterator(ShuffleReaderIterator it) {
+      if (it.currentKey != null) {
+        this.recordsIter =
+            records.tailMap(it.currentKey, false).entrySet().iterator();
+      } else {
+        this.recordsIter = null;
+      }
+      this.startKey = it.startKey;
+      this.endKey = it.endKey;
+      this.currentKey = it.currentKey;
+      this.currentRecord = it.currentRecord;
+      if (it.currentValuesIter != null) {
+        this.currentValuesIter =
+            it.currentRecord.getValue().listIterator(
+                it.currentValuesIter.nextIndex());
+      } else {
+        this.currentValuesIter = null;
+      }
+    }
+
+    @Override
+    public boolean hasNext() {
+      return currentKey != null;
+    }
+
+    @Override
+    public ShuffleEntry next() {
+      if (currentKey == null) {
+        throw new NoSuchElementException();
+      }
+      ShuffleEntry resultValue = currentValuesIter.next();
+      Assert.assertTrue(Arrays.equals(currentKey, resultValue.getKey()));
+      if (!currentValuesIter.hasNext()) {
+        advanceKey();
+      }
+      return resultValue;
+    }
+
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public Reiterator<ShuffleEntry> copy() {
+      return new ShuffleReaderIterator(this);
+    }
+
+    private void advanceKey() {
+      while (recordsIter.hasNext()) {
+        currentRecord = recordsIter.next();
+        currentKey = currentRecord.getKey();
+        if (startKey != null &&
+            SHUFFLE_KEY_COMPARATOR.compare(currentKey, startKey) < 0) {
+          // This key is before the start of the range.  Keep looking.
+          continue;
+        }
+        if (endKey != null &&
+            SHUFFLE_KEY_COMPARATOR.compare(currentKey, endKey) >= 0) {
+          // This key is at or after the end of the range.  Stop looking.
+          break;
+        }
+        // In range.
+        currentValuesIter = currentRecord.getValue().listIterator();
+        return;
+      }
+      currentKey = null;
+      currentValuesIter = null;
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReaderTest.java
new file mode 100644
index 0000000000000..87935a7bb3d86
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReaderTest.java
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.AbstractMap.SimpleEntry;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+
+/**
+ * Tests of TestShuffleReader.
+ */
+@RunWith(JUnit4.class)
+public class TestShuffleReaderTest {
+  static final String START_KEY = "ddd";
+  static final String END_KEY = "mmm";
+
+  static final List<Map.Entry<String, String>> NO_ENTRIES =
+      Collections.emptyList();
+
+  static final List<Map.Entry<String, String>> IN_RANGE_ENTRIES =
+      Arrays.<Map.Entry<String, String>>asList(
+          new SimpleEntry<>("ddd", "in 1"),
+          new SimpleEntry<>("ddd", "in 1"),
+          new SimpleEntry<>("ddd", "in 1"),
+          new SimpleEntry<>("dddd", "in 2"),
+          new SimpleEntry<>("dddd", "in 2"),
+          new SimpleEntry<>("de", "in 3"),
+          new SimpleEntry<>("ee", "in 4"),
+          new SimpleEntry<>("ee", "in 4"),
+          new SimpleEntry<>("ee", "in 4"),
+          new SimpleEntry<>("ee", "in 4"),
+          new SimpleEntry<>("mm", "in 5"));
+  static final List<Map.Entry<String, String>> BEFORE_RANGE_ENTRIES =
+      Arrays.<Map.Entry<String, String>>asList(
+          new SimpleEntry<>("", "out 1"),
+          new SimpleEntry<>("dd", "out 2"));
+  static final List<Map.Entry<String, String>> AFTER_RANGE_ENTRIES =
+      Arrays.<Map.Entry<String, String>>asList(
+          new SimpleEntry<>("mmm", "out 3"),
+          new SimpleEntry<>("mmm", "out 3"),
+          new SimpleEntry<>("mmmm", "out 4"),
+          new SimpleEntry<>("mn", "out 5"),
+          new SimpleEntry<>("zzz", "out 6"));
+  static final List<Map.Entry<String, String>> OUT_OF_RANGE_ENTRIES =
+      new ArrayList<>();
+  static {
+    OUT_OF_RANGE_ENTRIES.addAll(BEFORE_RANGE_ENTRIES);
+    OUT_OF_RANGE_ENTRIES.addAll(AFTER_RANGE_ENTRIES);
+  }
+  static final List<Map.Entry<String, String>> ALL_ENTRIES = new ArrayList<>();
+  static {
+    ALL_ENTRIES.addAll(BEFORE_RANGE_ENTRIES);
+    ALL_ENTRIES.addAll(IN_RANGE_ENTRIES);
+    ALL_ENTRIES.addAll(AFTER_RANGE_ENTRIES);
+  }
+
+  void runTest(List<Map.Entry<String, String>> expected,
+               List<Map.Entry<String, String>> outOfRange,
+               String startKey,
+               String endKey) {
+    TestShuffleReader shuffleReader = new TestShuffleReader();
+    List<Map.Entry<String, String>> expectedCopy = new ArrayList<>(expected);
+    expectedCopy.addAll(outOfRange);
+    Collections.shuffle(expectedCopy);
+    for (Map.Entry<String, String> entry : expectedCopy) {
+      shuffleReader.addEntry(entry.getKey(), entry.getValue());
+    }
+    Iterator<ShuffleEntry> iter = shuffleReader.read(startKey, endKey);
+    List<Map.Entry<String, String>> actual = new ArrayList<>();
+    while (iter.hasNext()) {
+      ShuffleEntry entry = iter.next();
+      actual.add(new SimpleEntry<>(new String(entry.getKey()),
+              new String(entry.getValue())));
+    }
+    try {
+      iter.next();
+      Assert.fail("should have failed");
+    } catch (NoSuchElementException exn) {
+      // Success.
+    }
+    Assert.assertEquals(expected, actual);
+  }
+
+  @Test
+  public void testEmpty() {
+    runTest(NO_ENTRIES, NO_ENTRIES, null, null);
+  }
+
+  @Test
+  public void testEmptyWithRange() {
+    runTest(NO_ENTRIES, NO_ENTRIES, START_KEY, END_KEY);
+  }
+
+  @Test
+  public void testNonEmpty() {
+    runTest(ALL_ENTRIES, NO_ENTRIES, null, null);
+  }
+
+  @Test
+  public void testNonEmptyWithAllInRange() {
+    runTest(IN_RANGE_ENTRIES, NO_ENTRIES, START_KEY, END_KEY);
+  }
+
+  @Test
+  public void testNonEmptyWithSomeOutOfRange() {
+    runTest(IN_RANGE_ENTRIES, OUT_OF_RANGE_ENTRIES, START_KEY, END_KEY);
+  }
+
+  @Test
+  public void testNonEmptyWithAllOutOfRange() {
+    runTest(NO_ENTRIES, OUT_OF_RANGE_ENTRIES, START_KEY, END_KEY);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleWriter.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleWriter.java
new file mode 100644
index 0000000000000..4fde0bbcdbaa1
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleWriter.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A fake implementation of a ShuffleEntryWriter, for testing.
+ */
+public class TestShuffleWriter implements ShuffleEntryWriter {
+  final List<ShuffleEntry> records = new ArrayList<>();
+  final List<Long> sizes = new ArrayList<>();
+  boolean closed = false;
+
+  public TestShuffleWriter() { }
+
+  @Override
+  public long put(ShuffleEntry entry) {
+    if (closed) {
+      throw new AssertionError("shuffle writer already closed");
+    }
+    records.add(entry);
+
+    long size = entry.length();
+    sizes.add(size);
+    return size;
+  }
+
+  @Override
+  public void close() {
+    if (closed) {
+      throw new AssertionError("shuffle writer already closed");
+    }
+    closed = true;
+  }
+
+  /** Returns the key/value records that were written to this ShuffleWriter. */
+  public List<ShuffleEntry> getRecords() {
+    if (!closed) {
+      throw new AssertionError("shuffle writer not closed");
+    }
+    return records;
+  }
+
+  /** Returns the sizes in bytes of the records that were written to this ShuffleWriter. */
+  public List<Long> getSizes() {
+    if (!closed) {
+      throw new AssertionError("shuffle writer not closed");
+    }
+    return sizes;
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java
new file mode 100644
index 0000000000000..9f9e63090a6e5
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
+import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+
+import org.hamcrest.core.IsInstanceOf;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import javax.annotation.Nullable;
+
+/**
+ * Tests for TextSinkFactory.
+ */
+@RunWith(JUnit4.class)
+public class TextSinkFactoryTest {
+  void runTestCreateTextSink(String filename,
+                             @Nullable Boolean appendTrailingNewlines,
+                             @Nullable String header,
+                             @Nullable String footer,
+                             CloudObject encoding,
+                             Coder<?> coder)
+      throws Exception {
+    CloudObject spec = CloudObject.forClassName("TextSink");
+    addString(spec, PropertyNames.FILENAME, filename);
+    if (appendTrailingNewlines != null) {
+      addBoolean(spec, PropertyNames.APPEND_TRAILING_NEWLINES, appendTrailingNewlines);
+    }
+    if (header != null) {
+      addString(spec, PropertyNames.HEADER, header);
+    }
+    if (footer != null) {
+      addString(spec, PropertyNames.FOOTER, footer);
+    }
+
+    com.google.api.services.dataflow.model.Sink cloudSink =
+        new com.google.api.services.dataflow.model.Sink();
+    cloudSink.setSpec(spec);
+    cloudSink.setCodec(encoding);
+
+    Sink<?> sink = SinkFactory.create(PipelineOptionsFactory.create(),
+                                      cloudSink,
+                                      new BatchModeExecutionContext());
+    Assert.assertThat(sink, new IsInstanceOf(TextSink.class));
+    TextSink textSink = (TextSink) sink;
+    Assert.assertEquals(filename, textSink.namePrefix);
+    Assert.assertEquals(
+        appendTrailingNewlines == null ? true : appendTrailingNewlines,
+        textSink.appendTrailingNewlines);
+    Assert.assertEquals(header, textSink.header);
+    Assert.assertEquals(footer, textSink.footer);
+    Assert.assertEquals(coder, textSink.coder);
+  }
+
+  @Test
+  public void testCreatePlainTextSink() throws Exception {
+    runTestCreateTextSink(
+        "/path/to/file.txt", null, null, null,
+        makeCloudEncoding("StringUtf8Coder"),
+        StringUtf8Coder.of());
+  }
+
+  @Test
+  public void testCreateRichTextSink() throws Exception {
+    runTestCreateTextSink(
+        "gs://bucket/path/to/file2.txt", false, "$$$", "***",
+        makeCloudEncoding("TextualIntegerCoder"),
+        TextualIntegerCoder.of());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkTest.java
new file mode 100644
index 0000000000000..d1b8b436a2510
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkTest.java
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.TestUtils;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import javax.annotation.Nullable;
+
+/**
+ * Tests for TextSink.
+ */
+@RunWith(JUnit4.class)
+public class TextSinkTest {
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  <T> void runTestWriteFile(List<T> elems,
+                            @Nullable String header,
+                            @Nullable String footer,
+                            Coder<T> coder) throws Exception {
+    File tmpFile = tmpFolder.newFile("file.txt");
+    TextSink<WindowedValue<T>> textSink = TextSink.createForTest(
+        tmpFile.getPath(), true, header, footer, coder);
+    List<String> expected = new ArrayList<>();
+    List<Integer> actualSizes = new ArrayList<>();
+    if (header != null) {
+      expected.add(header);
+    }
+    try (Sink.SinkWriter<WindowedValue<T>> writer = textSink.writer()) {
+      for (T elem : elems) {
+        actualSizes.add((int) writer.add(WindowedValue.valueInGlobalWindow(elem)));
+        byte[] encodedElem = CoderUtils.encodeToByteArray(coder, elem);
+        String line = new String(encodedElem);
+        expected.add(line);
+      }
+    }
+    if (footer != null) {
+      expected.add(footer);
+    }
+
+    BufferedReader reader = new BufferedReader(new FileReader(tmpFile));
+    List<String> actual = new ArrayList<>();
+    List<Integer> expectedSizes = new ArrayList<>();
+    for (;;) {
+      String line = reader.readLine();
+      if (line == null) {
+        break;
+      }
+      actual.add(line);
+      expectedSizes.add(line.length() + TextSink.NEWLINE.length);
+    }
+    if (header != null) {
+      expectedSizes.remove(0);
+    }
+    if (footer != null) {
+      expectedSizes.remove(expectedSizes.size() - 1);
+    }
+
+    Assert.assertEquals(expected, actual);
+    Assert.assertEquals(expectedSizes, actualSizes);
+  }
+
+  @Test
+  public void testWriteEmptyFile() throws Exception {
+    runTestWriteFile(Collections.<String>emptyList(), null, null,
+                     StringUtf8Coder.of());
+  }
+
+  @Test
+  public void testWriteEmptyFileWithHeaderAndFooter() throws Exception {
+    runTestWriteFile(Collections.<String>emptyList(), "the head", "the foot",
+                     StringUtf8Coder.of());
+  }
+
+  @Test
+  public void testWriteNonEmptyFile() throws Exception {
+    List<String> lines = Arrays.asList(
+        "",
+        "  hi there  ",
+        "bob",
+        "",
+        "  ",
+        "--zowie!--",
+        "");
+    runTestWriteFile(lines, null, null, StringUtf8Coder.of());
+  }
+
+  @Test
+  public void testWriteNonEmptyFileWithHeaderAndFooter() throws Exception {
+    List<String> lines = Arrays.asList(
+        "",
+        "  hi there  ",
+        "bob",
+        "",
+        "  ",
+        "--zowie!--",
+        "");
+    runTestWriteFile(lines, "the head", "the foot", StringUtf8Coder.of());
+  }
+
+  @Test
+  public void testWriteNonEmptyNonStringFile() throws Exception {
+    runTestWriteFile(TestUtils.INTS, null, null, TextualIntegerCoder.of());
+  }
+
+  // TODO: sharded filenames
+  // TODO: not appending newlines
+  // TODO: writing to GCS
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceFactoryTest.java
new file mode 100644
index 0000000000000..2fa50b567e781
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceFactoryTest.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
+import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
+import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+import org.hamcrest.core.IsInstanceOf;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import javax.annotation.Nullable;
+
+/**
+ * Tests for TextSourceFactory.
+ */
+@RunWith(JUnit4.class)
+public class TextSourceFactoryTest {
+  void runTestCreateTextSource(String filename,
+                               @Nullable Boolean stripTrailingNewlines,
+                               @Nullable Long start,
+                               @Nullable Long end,
+                               CloudObject encoding,
+                               Coder<?> coder)
+      throws Exception {
+    CloudObject spec = CloudObject.forClassName("TextSource");
+    addString(spec, "filename", filename);
+    if (stripTrailingNewlines != null) {
+      addBoolean(spec, "strip_trailing_newlines", stripTrailingNewlines);
+    }
+    if (start != null) {
+      addLong(spec, "start_offset", start);
+    }
+    if (end != null) {
+      addLong(spec, "end_offset", end);
+    }
+
+    com.google.api.services.dataflow.model.Source cloudSource =
+        new com.google.api.services.dataflow.model.Source();
+    cloudSource.setSpec(spec);
+    cloudSource.setCodec(encoding);
+
+    Source<?> source = SourceFactory.create(PipelineOptionsFactory.create(),
+                                            cloudSource,
+                                            new BatchModeExecutionContext());
+    Assert.assertThat(source, new IsInstanceOf(TextSource.class));
+    TextSource textSource = (TextSource) source;
+    Assert.assertEquals(filename, textSource.filename);
+    Assert.assertEquals(
+        stripTrailingNewlines == null ? true : stripTrailingNewlines,
+        textSource.stripTrailingNewlines);
+    Assert.assertEquals(start, textSource.startPosition);
+    Assert.assertEquals(end, textSource.endPosition);
+    Assert.assertEquals(coder, textSource.coder);
+  }
+
+  @Test
+  public void testCreatePlainTextSource() throws Exception {
+    runTestCreateTextSource(
+        "/path/to/file.txt", null, null, null,
+        makeCloudEncoding("StringUtf8Coder"),
+        StringUtf8Coder.of());
+  }
+
+  @Test
+  public void testCreateRichTextSource() throws Exception {
+    runTestCreateTextSource(
+        "gs://bucket/path/to/file2.txt", false, 200L, 500L,
+        makeCloudEncoding("TextualIntegerCoder"),
+        TextualIntegerCoder.of());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceTest.java
new file mode 100644
index 0000000000000..8aee7aaf00529
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceTest.java
@@ -0,0 +1,581 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+import static org.hamcrest.Matchers.greaterThan;
+
+import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.cloud.dataflow.sdk.TestUtils;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.TextSource.TextFileIterator;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Tests for TextSource.
+ */
+@RunWith(JUnit4.class)
+public class TextSourceTest {
+  private static final String[] fileContent = {"First line\n",
+                                               "Second line\r\n",
+                                               "Third line"};
+  private static final long TOTAL_BYTES_COUNT;
+
+  static {
+    long sumLen = 0L;
+    for (String s : fileContent) {
+      sumLen += s.length();
+    }
+    TOTAL_BYTES_COUNT = sumLen;
+  }
+
+  @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  private File initTestFile() throws IOException {
+    File tmpFile = tmpFolder.newFile();
+    FileOutputStream output = new FileOutputStream(tmpFile);
+    for (String s : fileContent) {
+      output.write(s.getBytes());
+    }
+    output.close();
+
+    return tmpFile;
+  }
+
+  @Test
+  public void testReadEmptyFile() throws Exception {
+    TextSource<String> textSource = new TextSource<>(
+        "/dev/null", true, null, null, StringUtf8Coder.of());
+    try (Source.SourceIterator<String> iterator = textSource.iterator()) {
+      Assert.assertFalse(iterator.hasNext());
+    }
+  }
+
+  @Test
+  public void testStrippedNewlines() throws Exception {
+    testNewlineHandling("\r", true);
+    testNewlineHandling("\r\n", true);
+    testNewlineHandling("\n", true);
+  }
+
+  @Test
+  public void testStrippedNewlinesAtEndOfReadBuffer() throws Exception {
+    boolean stripNewLines = true;
+    StringBuilder payload = new StringBuilder();
+    for (int i = 0; i < TextSource.BUF_SIZE - 2; ++i) {
+      payload.append('a');
+    }
+    String[] lines = {payload.toString(), payload.toString()};
+    testStringPayload(lines , "\r", stripNewLines);
+    testStringPayload(lines , "\r\n", stripNewLines);
+    testStringPayload(lines , "\n", stripNewLines);
+  }
+
+  @Test
+  public void testUnstrippedNewlines() throws Exception {
+    testNewlineHandling("\r", false);
+    testNewlineHandling("\r\n", false);
+    testNewlineHandling("\n", false);
+  }
+
+  @Test
+  public void testUnstrippedNewlinesAtEndOfReadBuffer() throws Exception {
+    boolean stripNewLines = false;
+    StringBuilder payload = new StringBuilder();
+    for (int i = 0; i < TextSource.BUF_SIZE - 2; ++i) {
+      payload.append('a');
+    }
+    String[] lines = {payload.toString(), payload.toString()};
+    testStringPayload(lines , "\r", stripNewLines);
+    testStringPayload(lines , "\r\n", stripNewLines);
+    testStringPayload(lines , "\n", stripNewLines);
+  }
+
+  @Test
+  public void testStartPosition() throws Exception {
+    File tmpFile = initTestFile();
+
+    {
+      TextSource<String> textSource = new TextSource<>(
+          tmpFile.getPath(), false, 11L, null, StringUtf8Coder.of());
+      ExecutorTestUtils.TestSourceObserver observer =
+          new ExecutorTestUtils.TestSourceObserver(textSource);
+
+      try (Source.SourceIterator<String> iterator = textSource.iterator()) {
+        Assert.assertEquals("Second line\r\n", iterator.next());
+        Assert.assertEquals("Third line", iterator.next());
+        Assert.assertFalse(iterator.hasNext());
+        // The first '1' in the array represents the reading of '\n' between first and
+        // second line, to confirm that we are reading from the beginning of a record.
+        Assert.assertEquals(Arrays.asList(1, 13, 10), observer.getActualSizes());
+      }
+    }
+
+    {
+      TextSource<String> textSource = new TextSource<>(
+          tmpFile.getPath(), false, 20L, null, StringUtf8Coder.of());
+      ExecutorTestUtils.TestSourceObserver observer =
+          new ExecutorTestUtils.TestSourceObserver(textSource);
+
+      try (Source.SourceIterator<String> iterator = textSource.iterator()) {
+        Assert.assertEquals("Third line", iterator.next());
+        Assert.assertFalse(iterator.hasNext());
+        // The first '5' in the array represents the reading of a portion of the second
+        // line, which had to be read to find the beginning of the third line.
+        Assert.assertEquals(Arrays.asList(5, 10), observer.getActualSizes());
+      }
+    }
+
+    {
+      TextSource<String> textSource = new TextSource<>(
+          tmpFile.getPath(), true, 0L, 20L, StringUtf8Coder.of());
+      ExecutorTestUtils.TestSourceObserver observer =
+          new ExecutorTestUtils.TestSourceObserver(textSource);
+
+      try (Source.SourceIterator<String> iterator = textSource.iterator()) {
+        Assert.assertEquals("First line", iterator.next());
+        Assert.assertEquals("Second line", iterator.next());
+        Assert.assertFalse(iterator.hasNext());
+        Assert.assertEquals(Arrays.asList(11, 13), observer.getActualSizes());
+      }
+    }
+
+    {
+      TextSource<String> textSource = new TextSource<>(
+          tmpFile.getPath(), true, 1L, 20L, StringUtf8Coder.of());
+      ExecutorTestUtils.TestSourceObserver observer =
+          new ExecutorTestUtils.TestSourceObserver(textSource);
+
+      try (Source.SourceIterator<String> iterator = textSource.iterator()) {
+        Assert.assertEquals("Second line", iterator.next());
+        Assert.assertFalse(iterator.hasNext());
+        // The first '11' in the array represents the reading of the entire first
+        // line, which had to be read to find the beginning of the second line.
+        Assert.assertEquals(Arrays.asList(11, 13), observer.getActualSizes());
+      }
+    }
+  }
+
+  @Test
+  public void testUtf8Handling() throws Exception {
+    File tmpFile = tmpFolder.newFile();
+    FileOutputStream output = new FileOutputStream(tmpFile);
+    // first line:  €\n
+    // second line: ¢\n
+    output.write(new byte[]{(byte) 0xE2, (byte) 0x82, (byte) 0xAC, '\n',
+        (byte) 0xC2, (byte) 0xA2, '\n'});
+    output.close();
+
+    {
+      // 3L is after the first line if counting codepoints, but within
+      // the first line if counting chars.  So correct behavior is to return
+      // just one line, since offsets are in chars, not codepoints.
+      TextSource<String> textSource = new TextSource<>(
+          tmpFile.getPath(), true, 0L, 3L, StringUtf8Coder.of());
+      ExecutorTestUtils.TestSourceObserver observer =
+          new ExecutorTestUtils.TestSourceObserver(textSource);
+
+      try (Source.SourceIterator<String> iterator = textSource.iterator()) {
+        Assert.assertArrayEquals("€".getBytes("UTF-8"),
+            iterator.next().getBytes("UTF-8"));
+        Assert.assertFalse(iterator.hasNext());
+        Assert.assertEquals(Arrays.asList(4), observer.getActualSizes());
+      }
+    }
+
+    {
+      // Starting location is mid-way into a codepoint.
+      // Ensures we don't fail when skipping over an incomplete codepoint.
+      TextSource<String> textSource = new TextSource<>(
+          tmpFile.getPath(), true, 2L, null, StringUtf8Coder.of());
+      ExecutorTestUtils.TestSourceObserver observer =
+          new ExecutorTestUtils.TestSourceObserver(textSource);
+
+      try (Source.SourceIterator<String> iterator = textSource.iterator()) {
+        Assert.assertArrayEquals("¢".getBytes("UTF-8"),
+            iterator.next().getBytes("UTF-8"));
+        Assert.assertFalse(iterator.hasNext());
+        // The first '3' in the array represents the reading of a portion of the first
+        // line, which had to be read to find the beginning of the second line.
+        Assert.assertEquals(Arrays.asList(3, 3), observer.getActualSizes());
+      }
+    }
+  }
+
+  private void testNewlineHandling(String separator, boolean stripNewlines)
+      throws Exception {
+    File tmpFile = tmpFolder.newFile();
+    PrintStream writer =
+        new PrintStream(
+            new FileOutputStream(tmpFile));
+    List<String> expected = Arrays.asList(
+        "",
+        "  hi there  ",
+        "bob",
+        "",
+        "  ",
+        "--zowie!--",
+        "");
+    List<Integer> expectedSizes = new ArrayList<>();
+    for (String line : expected) {
+      writer.print(line);
+      writer.print(separator);
+      expectedSizes.add(line.length() + separator.length());
+    }
+    writer.close();
+
+    TextSource<String> textSource = new TextSource<>(
+        tmpFile.getPath(), stripNewlines, null, null, StringUtf8Coder.of());
+    ExecutorTestUtils.TestSourceObserver observer =
+        new ExecutorTestUtils.TestSourceObserver(textSource);
+
+    List<String> actual = new ArrayList<>();
+    try (Source.SourceIterator<String> iterator = textSource.iterator()) {
+      while (iterator.hasNext()) {
+        actual.add(iterator.next());
+      }
+    }
+
+    if (stripNewlines) {
+      Assert.assertEquals(expected, actual);
+    } else {
+      List<String> unstripped = new LinkedList<>();
+      for (String s : expected) {
+        unstripped.add(s + separator);
+      }
+      Assert.assertEquals(unstripped, actual);
+    }
+
+    Assert.assertEquals(expectedSizes, observer.getActualSizes());
+  }
+
+  private void testStringPayload(
+      String[] lines, String separator, boolean stripNewlines)
+      throws Exception {
+    File tmpFile = tmpFolder.newFile();
+    List<String> expected = new ArrayList<>();
+    PrintStream writer =
+        new PrintStream(
+            new FileOutputStream(tmpFile));
+    for (String line : lines) {
+      writer.print(line);
+      writer.print(separator);
+      expected.add(stripNewlines ? line : line + separator);
+    }
+    writer.close();
+
+    TextSource<String> textSource = new TextSource<>(
+        tmpFile.getPath(), stripNewlines, null, null, StringUtf8Coder.of());
+    ExecutorTestUtils.TestSourceObserver observer =
+        new ExecutorTestUtils.TestSourceObserver(textSource);
+
+    List<String> actual = new ArrayList<>();
+    try (Source.SourceIterator<String> iterator = textSource.iterator()) {
+        while (iterator.hasNext()) {
+          actual.add(iterator.next());
+        }
+      }
+    Assert.assertEquals(expected, actual);
+  }
+
+  @Test
+  public void testCloneIteratorWithEndPositionAndFinalBytesInBuffer()
+      throws Exception {
+    String line = "a\n";
+    boolean stripNewlines = false;
+    File tmpFile = tmpFolder.newFile();
+    List<String> expected = new ArrayList<>();
+    PrintStream writer = new PrintStream(new FileOutputStream(tmpFile));
+    // Write 5x the size of the buffer and 10 extra trailing bytes
+    for (long bytesWritten = 0;
+         bytesWritten < TextSource.BUF_SIZE * 3 + 10; ) {
+      writer.print(line);
+      expected.add(line);
+      bytesWritten += line.length();
+    }
+    writer.close();
+    Long fileSize = tmpFile.length();
+
+    TextSource<String> textSource = new TextSource<>(
+        tmpFile.getPath(), stripNewlines,
+        null, fileSize, StringUtf8Coder.of());
+
+    List<String> actual = new ArrayList<>();
+    Source.SourceIterator<String> iterator = textSource.iterator();
+    while (iterator.hasNext()) {
+      actual.add(iterator.next());
+      iterator = iterator.copy();
+    }
+    Assert.assertEquals(expected, actual);
+  }
+
+  @Test
+  public void testNonStringCoders() throws Exception {
+    File tmpFile = tmpFolder.newFile();
+    PrintStream writer =
+        new PrintStream(
+            new FileOutputStream(tmpFile));
+    List<Integer> expected = TestUtils.INTS;
+    List<Integer> expectedSizes = new ArrayList<>();
+    for (Integer elem : expected) {
+      byte[] encodedElem =
+          CoderUtils.encodeToByteArray(TextualIntegerCoder.of(), elem);
+      writer.print(elem);
+      writer.print("\n");
+      expectedSizes.add(1 + encodedElem.length);
+    }
+    writer.close();
+
+    TextSource<Integer> textSource = new TextSource<>(
+        tmpFile.getPath(), true, null, null, TextualIntegerCoder.of());
+    ExecutorTestUtils.TestSourceObserver observer =
+        new ExecutorTestUtils.TestSourceObserver(textSource);
+
+    List<Integer> actual = new ArrayList<>();
+    try (Source.SourceIterator<Integer> iterator = textSource.iterator()) {
+      while (iterator.hasNext()) {
+        actual.add(iterator.next());
+      }
+    }
+
+    Assert.assertEquals(expected, actual);
+    Assert.assertEquals(expectedSizes, observer.getActualSizes());
+  }
+
+  @Test
+  public void testGetApproximatePosition() throws Exception {
+    File tmpFile = initTestFile();
+    TextSource<String> textSource = new TextSource<>(
+        tmpFile.getPath(), false, 0L, null, StringUtf8Coder.of());
+
+    try (Source.SourceIterator<String> iterator = textSource.iterator()) {
+      ApproximateProgress progress =
+          sourceProgressToCloudProgress(iterator.getProgress());
+      Assert.assertEquals(0L,
+          progress.getPosition().getByteOffset().longValue());
+      iterator.next();
+      progress = sourceProgressToCloudProgress(iterator.getProgress());
+      Assert.assertEquals(11L,
+          progress.getPosition().getByteOffset().longValue());
+      iterator.next();
+      progress = sourceProgressToCloudProgress(iterator.getProgress());
+      Assert.assertEquals(24L,
+          progress.getPosition().getByteOffset().longValue());
+      iterator.next();
+      progress = sourceProgressToCloudProgress(iterator.getProgress());
+      Assert.assertEquals(34L,
+          progress.getPosition().getByteOffset().longValue());
+      Assert.assertFalse(iterator.hasNext());
+    }
+  }
+
+  @Test
+  public void testUpdateStopPosition() throws Exception {
+    final long end = 10L;  // in the first line
+    final long stop = 14L;  // in the middle of the second line
+    File tmpFile = initTestFile();
+
+    com.google.api.services.dataflow.model.Position proposedStopPosition =
+        new com.google.api.services.dataflow.model.Position();
+
+    // Illegal proposed stop position, no update.
+    {
+      TextSource<String> textSource = new TextSource<>(
+          tmpFile.getPath(), false, null, null,
+          StringUtf8Coder.of());
+      ExecutorTestUtils.TestSourceObserver observer =
+          new ExecutorTestUtils.TestSourceObserver(textSource);
+
+      try (TextFileIterator iterator = (TextFileIterator) textSource.iterator()) {
+          Assert.assertNull(iterator.updateStopPosition(
+              cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
+        }
+    }
+
+    proposedStopPosition.setByteOffset(stop);
+
+    // Successful update.
+    {
+      TextSource<String> textSource = new TextSource<>(
+          tmpFile.getPath(), false, null, null,
+          StringUtf8Coder.of());
+      ExecutorTestUtils.TestSourceObserver observer =
+          new ExecutorTestUtils.TestSourceObserver(textSource);
+
+      try (TextFileIterator iterator = (TextFileIterator) textSource.iterator()) {
+        Assert.assertNull(iterator.getEndOffset());
+        Assert.assertEquals(
+            stop,
+            sourcePositionToCloudPosition(
+                iterator.updateStopPosition(
+                    cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))))
+            .getByteOffset().longValue());
+        Assert.assertEquals(stop, iterator.getEndOffset().longValue());
+        Assert.assertEquals(fileContent[0], iterator.next());
+        Assert.assertEquals(fileContent[1], iterator.next());
+        Assert.assertFalse(iterator.hasNext());
+        Assert.assertEquals(Arrays.asList(fileContent[0].length(),
+                                          fileContent[1].length()),
+                            observer.getActualSizes());
+      }
+    }
+
+    // Proposed stop position is before the current position, no update.
+    {
+      TextSource<String> textSource = new TextSource<>(
+          tmpFile.getPath(), false, null, null,
+          StringUtf8Coder.of());
+      ExecutorTestUtils.TestSourceObserver observer =
+          new ExecutorTestUtils.TestSourceObserver(textSource);
+
+      try (TextFileIterator iterator = (TextFileIterator) textSource.iterator()) {
+        Assert.assertEquals(fileContent[0], iterator.next());
+        Assert.assertEquals(fileContent[1], iterator.next());
+        Assert.assertThat(sourceProgressToCloudProgress(iterator.getProgress())
+                          .getPosition().getByteOffset(),
+            greaterThan(stop));
+        Assert.assertNull(iterator.updateStopPosition(
+            cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
+        Assert.assertNull(iterator.getEndOffset());
+        Assert.assertTrue(iterator.hasNext());
+        Assert.assertEquals(fileContent[2], iterator.next());
+        Assert.assertEquals(Arrays.asList(fileContent[0].length(),
+                                          fileContent[1].length(),
+                                          fileContent[2].length()),
+                            observer.getActualSizes());
+      }
+    }
+
+    // Proposed stop position is after the current stop (end) position, no update.
+    {
+      TextSource<String> textSource = new TextSource<>(
+          tmpFile.getPath(), false, null, end, StringUtf8Coder.of());
+      ExecutorTestUtils.TestSourceObserver observer =
+          new ExecutorTestUtils.TestSourceObserver(textSource);
+
+      try (TextFileIterator iterator = (TextFileIterator) textSource.iterator()) {
+        Assert.assertEquals(fileContent[0], iterator.next());
+        Assert.assertNull(iterator.updateStopPosition(
+            cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
+        Assert.assertEquals(end, iterator.getEndOffset().longValue());
+        Assert.assertFalse(iterator.hasNext());
+        Assert.assertEquals(Arrays.asList(fileContent[0].length()),
+                            observer.getActualSizes());
+      }
+    }
+  }
+
+  @Test
+  public void testUpdateStopPositionExhaustive() throws Exception {
+    File tmpFile = initTestFile();
+
+    // Checks for every possible position in the file, that either we fail to
+    // "updateStop" at it, or we succeed and then reading both halves together
+    // yields the original file with no missed records or duplicates.
+    for (long start = 0; start < TOTAL_BYTES_COUNT - 1; start++) {
+      for (long end = start + 1; end < TOTAL_BYTES_COUNT; end++) {
+        for (long stop = start; stop <= end; stop++) {
+          stopPositionTestInternal(start, end,
+              stop, tmpFile);
+        }
+      }
+    }
+
+    // Test with null start/end positions.
+    for (long stop = 0L; stop < TOTAL_BYTES_COUNT; stop++) {
+      stopPositionTestInternal(null, null, stop, tmpFile);
+    }
+  }
+
+  private void stopPositionTestInternal(Long startOffset,
+                                        Long endOffset,
+                                        Long stopOffset,
+                                        File tmpFile) throws Exception {
+    String readWithoutSplit;
+    String readWithSplit1, readWithSplit2;
+    StringBuilder accumulatedRead = new StringBuilder();
+
+    // Read from source without split attempts.
+    TextSource<String> textSource = new TextSource<>(
+        tmpFile.getPath(), false, startOffset, endOffset,
+        StringUtf8Coder.of());
+
+    try (TextFileIterator iterator = (TextFileIterator) textSource.iterator()) {
+      while (iterator.hasNext()) {
+        accumulatedRead.append((String) iterator.next());
+      }
+      readWithoutSplit = accumulatedRead.toString();
+    }
+
+    // Read the first half of the split.
+    textSource = new TextSource<>(
+        tmpFile.getPath(), false, startOffset, stopOffset,
+        StringUtf8Coder.of());
+    accumulatedRead = new StringBuilder();
+
+    try (TextFileIterator iterator = (TextFileIterator) textSource.iterator()) {
+      while (iterator.hasNext()) {
+        accumulatedRead.append((String) iterator.next());
+      }
+      readWithSplit1 = accumulatedRead.toString();
+    }
+
+    // Read the second half of the split.
+    textSource = new TextSource<>(
+        tmpFile.getPath(), false, stopOffset, endOffset,
+        StringUtf8Coder.of());
+    accumulatedRead = new StringBuilder();
+
+    try (TextFileIterator iterator = (TextFileIterator) textSource.iterator()) {
+      while (iterator.hasNext()) {
+        accumulatedRead.append((String) iterator.next());
+      }
+      readWithSplit2 = accumulatedRead.toString();
+    }
+
+    Assert.assertEquals(readWithoutSplit, readWithSplit1 + readWithSplit2);
+  }
+
+  private ApproximateProgress createApproximateProgress(
+      com.google.api.services.dataflow.model.Position position) {
+    return new ApproximateProgress().setPosition(position);
+  }
+
+  // TODO: sharded filenames
+  // TODO: reading from GCS
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceTest.java
new file mode 100644
index 0000000000000..3a360d8d24add
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceTest.java
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.TestUtils;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.common.collect.Lists;
+
+import org.joda.time.Instant;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * Tests for UngroupedShuffleSource.
+ */
+@RunWith(JUnit4.class)
+public class UngroupedShuffleSourceTest {
+  static final Instant timestamp = new Instant(123000);
+  static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
+
+  byte[] asShuffleKey(long seqNum) throws Exception {
+    return CoderUtils.encodeToByteArray(BigEndianLongCoder.of(), seqNum);
+  }
+
+  byte[] asShuffleValue(Integer value) throws Exception {
+    return CoderUtils.encodeToByteArray(
+        WindowedValue.getFullCoder(BigEndianIntegerCoder.of(), IntervalWindow.getCoder()),
+        WindowedValue.of(value, timestamp, Lists.newArrayList(window)));
+  }
+
+  void runTestReadShuffleSource(List<Integer> expected) throws Exception {
+    UngroupedShuffleSource<WindowedValue<Integer>> shuffleSource =
+        new UngroupedShuffleSource<>(
+            PipelineOptionsFactory.create(),
+            null, null, null,
+            WindowedValue.getFullCoder(BigEndianIntegerCoder.of(), IntervalWindow.getCoder()));
+    ExecutorTestUtils.TestSourceObserver observer =
+        new ExecutorTestUtils.TestSourceObserver(shuffleSource);
+
+    TestShuffleReader shuffleReader = new TestShuffleReader();
+    List<Integer> expectedSizes = new ArrayList<>();
+    long seqNum = 0;
+    for (Integer value : expected) {
+      byte[] shuffleKey = asShuffleKey(seqNum++);
+      byte[] shuffleValue = asShuffleValue(value);
+      shuffleReader.addEntry(shuffleKey, shuffleValue);
+
+      ShuffleEntry record = new ShuffleEntry(shuffleKey, null, shuffleValue);
+      expectedSizes.add(record.length());
+    }
+
+    List<Integer> actual = new ArrayList<>();
+    try (Source.SourceIterator<WindowedValue<Integer>> iter =
+             shuffleSource.iterator(shuffleReader)) {
+      while (iter.hasNext()) {
+        Assert.assertTrue(iter.hasNext());
+        Assert.assertTrue(iter.hasNext());
+        WindowedValue<Integer> elem = iter.next();
+        actual.add(elem.getValue());
+      }
+      Assert.assertFalse(iter.hasNext());
+      Assert.assertFalse(iter.hasNext());
+      try {
+        iter.next();
+        Assert.fail("should have failed");
+      } catch (NoSuchElementException exn) {
+        // As expected.
+      }
+    }
+
+    Assert.assertEquals(expected, actual);
+    Assert.assertEquals(expectedSizes, observer.getActualSizes());
+  }
+
+  @Test
+  public void testReadEmptyShuffleSource() throws Exception {
+    runTestReadShuffleSource(TestUtils.NO_INTS);
+  }
+
+  @Test
+  public void testReadNonEmptyShuffleSource() throws Exception {
+    runTestReadShuffleSource(TestUtils.INTS);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java
new file mode 100644
index 0000000000000..065092aeaab14
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker.logging;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.testing.RestoreMappedDiagnosticContext;
+import com.google.common.collect.ImmutableMap;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestRule;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.slf4j.MDC;
+
+import java.util.logging.Level;
+import java.util.logging.LogRecord;
+
+/** Unit tests for {@link DataflowWorkerLoggingFormatter}. */
+@RunWith(JUnit4.class)
+public class DataflowWorkerLoggingFormatterTest {
+  @Rule public TestRule restoreMDC = new RestoreMappedDiagnosticContext();
+
+  @Test
+  public void testWithUnsetValuesInMDC() {
+    assertEquals(
+        "1970-01-01T00:00:00.001Z INFO unknown unknown unknown 2 LoggerName "
+        + "test.message\n",
+        new DataflowWorkerLoggingFormatter().format(
+            createLogRecord("test.message", null)));
+  }
+
+  @Test
+  public void testWithMessage() {
+    MDC.setContextMap(ImmutableMap.<String, String>of(
+        "dataflow.jobId", "testJobId",
+        "dataflow.workerId", "testWorkerId",
+        "dataflow.workId", "testWorkId"));
+    assertEquals(
+        "1970-01-01T00:00:00.001Z INFO testJobId testWorkerId testWorkId 2 LoggerName "
+        + "test.message\n",
+        new DataflowWorkerLoggingFormatter().format(
+            createLogRecord("test.message", null)));
+  }
+
+  @Test
+  public void testWithMessageAndException() {
+    MDC.setContextMap(ImmutableMap.<String, String>of(
+        "dataflow.jobId", "testJobId",
+        "dataflow.workerId", "testWorkerId",
+        "dataflow.workId", "testWorkId"));
+    assertEquals(
+        "1970-01-01T00:00:00.001Z INFO testJobId testWorkerId testWorkId 2 LoggerName "
+        + "test.message\n"
+        + "java.lang.Throwable: exception.test.message\n"
+        + "\tat declaringClass1.method1(file1.java:1)\n"
+        + "\tat declaringClass2.method2(file2.java:1)\n"
+        + "\tat declaringClass3.method3(file3.java:1)\n",
+        new DataflowWorkerLoggingFormatter().format(
+            createLogRecord("test.message", createThrowable())));
+  }
+
+  @Test
+  public void testWithException() {
+    MDC.setContextMap(ImmutableMap.<String, String>of(
+        "dataflow.jobId", "testJobId",
+        "dataflow.workerId", "testWorkerId",
+        "dataflow.workId", "testWorkId"));
+    assertEquals(
+        "1970-01-01T00:00:00.001Z INFO testJobId testWorkerId testWorkId 2 LoggerName null\n"
+        + "java.lang.Throwable: exception.test.message\n"
+        + "\tat declaringClass1.method1(file1.java:1)\n"
+        + "\tat declaringClass2.method2(file2.java:1)\n"
+        + "\tat declaringClass3.method3(file3.java:1)\n",
+        new DataflowWorkerLoggingFormatter().format(
+            createLogRecord(null, createThrowable())));
+  }
+
+  @Test
+  public void testWithoutExceptionOrMessage() {
+    MDC.setContextMap(ImmutableMap.<String, String>of(
+        "dataflow.jobId", "testJobId",
+        "dataflow.workerId", "testWorkerId",
+        "dataflow.workId", "testWorkId"));
+    assertEquals(
+        "1970-01-01T00:00:00.001Z INFO testJobId testWorkerId testWorkId 2 LoggerName null\n",
+        new DataflowWorkerLoggingFormatter().format(
+            createLogRecord(null, null)));
+  }
+
+  /**
+   * @return A throwable with a fixed stack trace.
+   */
+  private Throwable createThrowable() {
+    Throwable throwable = new Throwable("exception.test.message");
+    throwable.setStackTrace(new StackTraceElement[]{
+        new StackTraceElement("declaringClass1", "method1", "file1.java", 1),
+        new StackTraceElement("declaringClass2", "method2", "file2.java", 1),
+        new StackTraceElement("declaringClass3", "method3", "file3.java", 1),
+    });
+    return throwable;
+  }
+
+  /**
+   * Creates and returns a LogRecord with a given message and throwable.
+   *
+   * @param message The message to place in the {@link LogRecord}
+   * @param throwable The throwable to place in the {@link LogRecord}
+   * @return A {@link LogRecord} with the given message and throwable.
+   */
+  private LogRecord createLogRecord(String message, Throwable throwable) {
+    LogRecord logRecord = new LogRecord(Level.INFO, message);
+    logRecord.setLoggerName("LoggerName");
+    logRecord.setMillis(1L);
+    logRecord.setThreadID(2);
+    logRecord.setThrown(throwable);
+    return logRecord;
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
new file mode 100644
index 0000000000000..71e51f430d88d
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker.logging;
+
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.testing.RestoreSystemProperties;
+
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestRule;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.util.List;
+import java.util.logging.ConsoleHandler;
+import java.util.logging.FileHandler;
+import java.util.logging.Handler;
+import java.util.logging.Level;
+import java.util.logging.LogManager;
+import java.util.logging.Logger;
+
+/** Unit tests for {@link DataflowWorkerLoggingInitializer}. */
+@RunWith(JUnit4.class)
+public class DataflowWorkerLoggingInitializerTest {
+  @Rule public TestRule restoreSystemProperties = new RestoreSystemProperties();
+
+  @Mock LogManager mockLogManager;
+  @Mock Logger mockRootLogger;
+  @Mock Handler mockHandler;
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+    when(mockLogManager.getLogger("")).thenReturn(mockRootLogger);
+    when(mockRootLogger.getHandlers()).thenReturn(new Handler[]{ mockHandler });
+  }
+
+  @Test
+  public void testWithDefaults() {
+    ArgumentCaptor<Handler> argument = ArgumentCaptor.forClass(Handler.class);
+
+    new DataflowWorkerLoggingInitializer().initialize(mockLogManager);
+    verify(mockLogManager).getLogger("");
+    verify(mockLogManager).reset();
+    verify(mockRootLogger).getHandlers();
+    verify(mockRootLogger).removeHandler(mockHandler);
+    verify(mockRootLogger).setLevel(Level.INFO);
+    verify(mockRootLogger, times(2)).addHandler(argument.capture());
+    verifyNoMoreInteractions(mockLogManager, mockRootLogger);
+
+    List<Handler> handlers = argument.getAllValues();
+    assertTrue(isConsoleHandler(handlers.get(0), Level.INFO));
+    assertTrue(isFileHandler(handlers.get(1), Level.INFO));
+  }
+
+  @Test
+  public void testWithOverrides() {
+    ArgumentCaptor<Handler> argument = ArgumentCaptor.forClass(Handler.class);
+    System.setProperty("dataflow.worker.logging.level", "WARNING");
+
+    new DataflowWorkerLoggingInitializer().initialize(mockLogManager);
+    verify(mockLogManager).getLogger("");
+    verify(mockLogManager).reset();
+    verify(mockRootLogger).getHandlers();
+    verify(mockRootLogger).removeHandler(mockHandler);
+    verify(mockRootLogger).setLevel(Level.WARNING);
+    verify(mockRootLogger, times(2)).addHandler(argument.capture());
+    verifyNoMoreInteractions(mockLogManager, mockRootLogger);
+
+    List<Handler> handlers = argument.getAllValues();
+    assertTrue(isConsoleHandler(handlers.get(0), Level.WARNING));
+    assertTrue(isFileHandler(handlers.get(1), Level.WARNING));
+  }
+  
+  private boolean isConsoleHandler(Handler handler, Level level) {
+    return handler instanceof ConsoleHandler
+        && level.equals(handler.getLevel())
+        && handler.getFormatter() instanceof DataflowWorkerLoggingFormatter;
+  }
+
+  private boolean isFileHandler(Handler handler, Level level) {
+    return handler instanceof FileHandler
+        && level.equals(handler.getLevel())
+        && handler.getFormatter() instanceof DataflowWorkerLoggingFormatter;
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java
new file mode 100644
index 0000000000000..3f4e33d63268a
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java
@@ -0,0 +1,240 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import static org.junit.Assert.fail;
+
+import com.google.common.collect.Lists;
+
+import org.hamcrest.Description;
+import org.hamcrest.Matcher;
+import org.hamcrest.TypeSafeMatcher;
+import org.junit.rules.ExternalResource;
+import org.junit.rules.TestRule;
+
+import java.util.Collection;
+import java.util.logging.Handler;
+import java.util.logging.Level;
+import java.util.logging.LogRecord;
+import java.util.logging.Logger;
+
+/**
+ * This {@link TestRule} enables the ability to capture JUL logging events during test execution and
+ * assert expectations that they contain certain messages (with or without {@link Throwable}) at
+ * certain log levels. For logs generated via the SLF4J logging frontend, the JUL backend must be
+ * used.
+ */
+public class ExpectedLogs extends ExternalResource {
+  /**
+   * Returns a {@link TestRule} which captures logs for the given class.
+   *
+   * @param klass The class to capture logs for.
+   * @return A {@link ExpectedLogs} test rule.
+   */
+  public static ExpectedLogs none(Class<?> klass) {
+    return new ExpectedLogs(klass);
+  }
+
+  /**
+   * Expect a logging event at the trace level with the given message.
+   *
+   * @param substring The message to match against.
+   */
+  public void expectTrace(String substring) {
+    expect(Level.FINEST, substring);
+  }
+
+  /**
+   * Expect a logging event at the trace level with the given message and throwable.
+   *
+   * @param substring The message to match against.
+   * @param t The throwable to match against.
+   */
+  public void expectTrace(String substring, Throwable t) {
+    expect(Level.FINEST, substring, t);
+  }
+
+  /**
+   * Expect a logging event at the debug level with the given message.
+   *
+   * @param substring The message to match against.
+   */
+  public void expectDebug(String substring) {
+    expect(Level.FINE, substring);
+  }
+
+  /**
+   * Expect a logging event at the debug level with the given message and throwable.
+   *
+   * @param message The message to match against.
+   * @param t The throwable to match against.
+   */
+  public void expectDebug(String message, Throwable t) {
+    expect(Level.FINE, message, t);
+  }
+
+  /**
+   * Expect a logging event at the info level with the given message.
+   * @param substring The message to match against.
+   */
+  public void expectInfo(String substring) {
+    expect(Level.INFO, substring);
+  }
+
+  /**
+   * Expect a logging event at the info level with the given message and throwable.
+   *
+   * @param message The message to match against.
+   * @param t The throwable to match against.
+   */
+  public void expectInfo(String message, Throwable t) {
+    expect(Level.INFO, message, t);
+  }
+
+  /**
+   * Expect a logging event at the warn level with the given message.
+   *
+   * @param substring The message to match against.
+   */
+  public void expectWarn(String substring) {
+    expect(Level.WARNING, substring);
+  }
+
+  /**
+   * Expect a logging event at the warn level with the given message and throwable.
+   *
+   * @param substring The message to match against.
+   * @param t The throwable to match against.
+   */
+  public void expectWarn(String substring, Throwable t) {
+    expect(Level.WARNING, substring, t);
+  }
+
+  /**
+   * Expect a logging event at the error level with the given message.
+   *
+   * @param substring The message to match against.
+   */
+  public void expectError(String substring) {
+    expect(Level.SEVERE, substring);
+  }
+
+  /**
+   * Expect a logging event at the error level with the given message and throwable.
+   *
+   * @param substring The message to match against.
+   * @param t The throwable to match against.
+   */
+  public void expectError(String substring, Throwable t) {
+    expect(Level.SEVERE, substring, t);
+  }
+
+  private void expect(final Level level, final String substring) {
+    expectations.add(new TypeSafeMatcher<LogRecord>() {
+      @Override
+      public void describeTo(Description description) {
+        description.appendText(String.format(
+            "Expected log message of level [%s] containing message [%s]", level, substring));
+      }
+
+      @Override
+      protected boolean matchesSafely(LogRecord item) {
+        return level.equals(item.getLevel())
+            && item.getMessage().contains(substring);
+      }
+    });
+  }
+
+  private void expect(final Level level, final String substring, final Throwable throwable) {
+    expectations.add(new TypeSafeMatcher<LogRecord>() {
+      @Override
+      public void describeTo(Description description) {
+        description.appendText(String.format(
+            "Expected log message of level [%s] containg message [%s] with exception [%s] "
+            + "containing message [%s]",
+            level, substring, throwable.getClass(), throwable.getMessage()));
+      }
+
+      @Override
+      protected boolean matchesSafely(LogRecord item) {
+        return level.equals(item.getLevel())
+            && item.getMessage().contains(substring)
+            && item.getThrown().getClass().equals(throwable.getClass())
+            && item.getThrown().getMessage().contains(throwable.getMessage());
+      }
+    });
+  }
+
+  @Override
+  protected void before() throws Throwable {
+    previousLevel = log.getLevel();
+    log.setLevel(Level.ALL);
+    log.addHandler(logSaver);
+  }
+
+  @Override
+  protected void after() {
+    log.removeHandler(logSaver);
+    log.setLevel(previousLevel);
+    Collection<Matcher<LogRecord>> missingExpecations = Lists.newArrayList();
+    FOUND: for (Matcher<LogRecord> expectation : expectations) {
+      for (LogRecord log : logSaver.getLogs()) {
+        if (expectation.matches(log)) {
+          continue FOUND;
+        }
+      }
+      missingExpecations.add(expectation);
+    }
+
+    if (!missingExpecations.isEmpty()) {
+      fail(String.format("Missed logging expectations: %s", missingExpecations));
+    }
+  }
+
+  private final Logger log;
+  private final LogSaver logSaver;
+  private final Collection<Matcher<LogRecord>> expectations;
+  private Level previousLevel;
+
+  private ExpectedLogs(Class<?> klass) {
+    log = Logger.getLogger(klass.getName());
+    logSaver = new LogSaver();
+    expectations = Lists.newArrayList();
+  }
+
+  /**
+   * A JUL logging {@link Handler} that records all logging events which are passed to it.
+   */
+  private static class LogSaver extends Handler {
+    Collection<LogRecord> logRecords = Lists.newArrayList();
+
+    public Collection<LogRecord> getLogs() {
+      return logRecords;
+    }
+
+    @Override
+    public void publish(LogRecord record) {
+      logRecords.add(record);
+    }
+
+    @Override
+    public void flush() {}
+
+    @Override
+    public void close() throws SecurityException {}
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java
new file mode 100644
index 0000000000000..4d9cd0e76639a
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Random;
+
+/** Tests for {@link FastNanoClockAndSleeper}. */
+@RunWith(JUnit4.class)
+public class ExpectedLogsTest {
+  private static final Logger LOG = LoggerFactory.getLogger(ExpectedLogsTest.class);
+
+  private ExpectedLogs expectedLogs;
+
+  @Before
+  public void setUp() {
+    expectedLogs = ExpectedLogs.none(ExpectedLogsTest.class);
+  }
+
+  @Test
+  public void testWhenNoExpectations() throws Throwable {
+    expectedLogs.before();
+    LOG.error(generateRandomString());
+    expectedLogs.after();
+  }
+
+  @Test
+  public void testWhenExpectationIsMatchedFully() throws Throwable {
+    String expected = generateRandomString();
+    expectedLogs.before();
+    expectedLogs.expectError(expected);
+    LOG.error(expected);
+    expectedLogs.after();
+  }
+
+
+  @Test
+  public void testWhenExpectationIsMatchedPartially() throws Throwable {
+    String expected = generateRandomString();
+    expectedLogs.before();
+    expectedLogs.expectError(expected);
+    LOG.error("Extra stuff around expected " + expected + " blah");
+    expectedLogs.after();
+  }
+
+  @Test
+  public void testWhenExpectationIsMatchedWithExceptionBeingLogged() throws Throwable {
+    String expected = generateRandomString();
+    expectedLogs.before();
+    expectedLogs.expectError(expected);
+    LOG.error(expected, new IOException());
+    expectedLogs.after();
+  }
+
+  @Test(expected = AssertionError.class)
+  public void testWhenExpectationIsNotMatched() throws Throwable {
+    String expected = generateRandomString();
+    expectedLogs.before();
+    expectedLogs.expectError(expected);
+    expectedLogs.after();
+  }
+
+  @Test
+  public void testLogCaptureOccursAtLowestLogLevel() throws Throwable {
+    String expected = generateRandomString();
+    expectedLogs.before();
+    expectedLogs.expectTrace(expected);
+    LOG.trace(expected);
+    expectedLogs.after();
+  }
+
+  // Generates random strings of 10 characters.
+  private static String generateRandomString() {
+    Random random = new Random();
+    StringBuilder builder = new StringBuilder();
+    for (int i = 0; i < 10; i++) {
+      builder.append('a' + (char) random.nextInt(26));
+    }
+    return builder.toString();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/FastNanoClockAndSleeper.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/FastNanoClockAndSleeper.java
new file mode 100644
index 0000000000000..e9fa9839e7374
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/FastNanoClockAndSleeper.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import com.google.api.client.util.NanoClock;
+import com.google.api.client.util.Sleeper;
+
+import org.junit.rules.ExternalResource;
+import org.junit.rules.TestRule;
+
+/**
+ * This object quickly moves time forward based upon how much it has been asked to sleep,
+ * without actually sleeping, to simulate the backoff.
+ */
+public class FastNanoClockAndSleeper extends ExternalResource
+    implements NanoClock, Sleeper, TestRule {
+  private long fastNanoTime;
+
+  @Override
+  public long nanoTime() {
+    return fastNanoTime;
+  }
+
+  @Override
+  protected void before() throws Throwable {
+    fastNanoTime = NanoClock.SYSTEM.nanoTime();
+  }
+
+  @Override
+  public void sleep(long millis) throws InterruptedException {
+    fastNanoTime += millis * 1000000L;
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/FastNanoClockAndSleeperTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/FastNanoClockAndSleeperTest.java
new file mode 100644
index 0000000000000..3c9275f54a23d
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/FastNanoClockAndSleeperTest.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.concurrent.TimeUnit;
+
+/** Tests for {@link FastNanoClockAndSleeper}. */
+@RunWith(JUnit4.class)
+public class FastNanoClockAndSleeperTest {
+  @Rule public FastNanoClockAndSleeper fastNanoClockAndSleeper = new FastNanoClockAndSleeper();
+
+  @Test
+  public void testClockAndSleeper() throws Exception {
+    long sleepTimeMs = TimeUnit.SECONDS.toMillis(30);
+    long sleepTimeNano = TimeUnit.MILLISECONDS.toNanos(sleepTimeMs);
+    long fakeTimeNano = fastNanoClockAndSleeper.nanoTime();
+    long startTimeNano = System.nanoTime();
+    fastNanoClockAndSleeper.sleep(sleepTimeMs);
+    long maxTimeNano = startTimeNano + TimeUnit.SECONDS.toNanos(1);
+    // Verify that actual time didn't progress as much as was requested
+    assertTrue(System.nanoTime() < maxTimeNano);
+    // Verify that the fake time did go up by the amount requested
+    assertEquals(fakeTimeNano + sleepTimeNano, fastNanoClockAndSleeper.nanoTime());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ResetDateTimeProvider.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ResetDateTimeProvider.java
new file mode 100644
index 0000000000000..675d7ac113611
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ResetDateTimeProvider.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import org.joda.time.DateTimeUtils;
+import org.joda.time.format.ISODateTimeFormat;
+import org.junit.rules.ExternalResource;
+import org.junit.rules.TestRule;
+
+/**
+ * This {@link TestRule} resets the date time provider in Joda to the system date
+ * time provider after tests.
+ */
+public class ResetDateTimeProvider extends ExternalResource {
+  public void setDateTimeFixed(String iso8601) {
+    setDateTimeFixed(ISODateTimeFormat.dateTime().parseMillis(iso8601));
+  }
+
+  public void setDateTimeFixed(long millis) {
+    DateTimeUtils.setCurrentMillisFixed(millis);
+  }
+
+  @Override
+  protected void after() {
+    DateTimeUtils.setCurrentMillisSystem();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ResetDateTimeProviderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ResetDateTimeProviderTest.java
new file mode 100644
index 0000000000000..5aa96835676c9
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ResetDateTimeProviderTest.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+
+import org.joda.time.DateTimeUtils;
+import org.joda.time.format.ISODateTimeFormat;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link ResetDateTimeProvider}. */
+@RunWith(JUnit4.class)
+public class ResetDateTimeProviderTest {
+  private static final String TEST_TIME = "2014-12-08T19:07:06.698Z";
+  private static final long TEST_TIME_MS =
+      ISODateTimeFormat.dateTime().parseMillis(TEST_TIME);
+
+  @Rule public ResetDateTimeProvider resetDateTimeProviderRule = new ResetDateTimeProvider();
+
+  /*
+   * Since these tests can run out of order, both test A and B change the provider
+   * and verify that the provider was reset.
+   */
+  @Test
+  public void testResetA() {
+    assertNotEquals(TEST_TIME_MS, DateTimeUtils.currentTimeMillis());
+    resetDateTimeProviderRule.setDateTimeFixed(TEST_TIME);
+    assertEquals(TEST_TIME_MS, DateTimeUtils.currentTimeMillis());
+  }
+
+  @Test
+  public void testResetB() {
+    assertNotEquals(TEST_TIME_MS, DateTimeUtils.currentTimeMillis());
+    resetDateTimeProviderRule.setDateTimeFixed(TEST_TIME);
+    assertEquals(TEST_TIME_MS, DateTimeUtils.currentTimeMillis());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreMappedDiagnosticContext.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreMappedDiagnosticContext.java
new file mode 100644
index 0000000000000..f0bdb9e217044
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreMappedDiagnosticContext.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import com.google.common.base.MoreObjects;
+import com.google.common.collect.ImmutableMap;
+
+import org.junit.rules.ExternalResource;
+import org.slf4j.MDC;
+
+import java.util.Map;
+
+/**
+ * Saves and restores the current MDC for tests.
+ */
+public class RestoreMappedDiagnosticContext extends ExternalResource {
+  private Map<String, String> previousValue;
+
+  public RestoreMappedDiagnosticContext() {
+  }
+
+  @Override
+  protected void before() throws Throwable {
+    previousValue = MoreObjects.firstNonNull(
+        MDC.getCopyOfContextMap(),
+        ImmutableMap.<String, String>of());
+  }
+
+  @Override
+  protected void after() {
+    MDC.setContextMap(previousValue);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreMappedDiagnosticContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreMappedDiagnosticContextTest.java
new file mode 100644
index 0000000000000..c88f275f4bf65
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreMappedDiagnosticContextTest.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestRule;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.slf4j.MDC;
+
+/** Tests for {@link RestoreMappedDiagnosticContext}. */
+@RunWith(JUnit4.class)
+public class RestoreMappedDiagnosticContextTest {
+  @Rule public TestRule restoreMappedDiagnosticContext = new RestoreMappedDiagnosticContext();
+
+  /*
+   * Since these tests can run out of order, both test A and B verify that they
+   * could insert their property and that the other does not exist.
+   */
+  @Test
+  public void testThatMDCIsClearedA() {
+    MDC.put("TestA", "TestA");
+    assertNotNull(MDC.get("TestA"));
+    assertNull(MDC.get("TestB"));
+  }
+
+  @Test
+  public void testThatMDCIsClearedB() {
+    MDC.put("TestB", "TestB");
+    assertNotNull(MDC.get("TestB"));
+    assertNull(MDC.get("TestA"));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreSystemProperties.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreSystemProperties.java
new file mode 100644
index 0000000000000..ef4f3427b8891
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreSystemProperties.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import com.google.common.base.Throwables;
+
+import org.junit.rules.ExternalResource;
+import org.junit.rules.TestRule;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+/**
+ * Saves and restores the current system properties for tests.
+ */
+public class RestoreSystemProperties extends ExternalResource implements TestRule {
+  private byte[] originalProperties;
+
+  @Override
+  protected void before() throws Throwable {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    System.getProperties().store(baos, "");
+    baos.close();
+    originalProperties = baos.toByteArray();
+  }
+
+  @Override
+  protected void after() {
+    try (ByteArrayInputStream bais = new ByteArrayInputStream(originalProperties)) {
+      System.getProperties().clear();
+      System.getProperties().load(bais);
+    } catch (IOException e) {
+      throw Throwables.propagate(e);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreSystemPropertiesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreSystemPropertiesTest.java
new file mode 100644
index 0000000000000..8a4bb488922ef
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreSystemPropertiesTest.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestRule;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link RestoreSystemProperties}. */
+@RunWith(JUnit4.class)
+public class RestoreSystemPropertiesTest {
+  @Rule public TestRule restoreSystemProperties = new RestoreSystemProperties();
+
+  /*
+   * Since these tests can run out of order, both test A and B verify that they
+   * could insert their property and that the other does not exist.
+   */
+  @Test
+  public void testThatPropertyIsClearedA() {
+    System.getProperties().put("TestA", "TestA");
+    assertNotNull(System.getProperty("TestA"));
+    assertNull(System.getProperty("TestB"));
+  }
+
+  @Test
+  public void testThatPropertyIsClearedB() {
+    System.getProperties().put("TestB", "TestB");
+    assertNotNull(System.getProperty("TestB"));
+    assertNull(System.getProperty("TestA"));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
new file mode 100644
index 0000000000000..da4f66ec07758
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.common.collect.ImmutableMap;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestRule;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link TestPipeline}. */
+@RunWith(JUnit4.class)
+public class TestPipelineTest {
+  @Rule public TestRule restoreSystemProperties = new RestoreSystemProperties();
+
+  @Test
+  public void testCreationUsingDefaults() {
+    assertNotNull(TestPipeline.create());
+  }
+
+  @Test
+  public void testCreationOfPipelineOptions() throws Exception {
+    ObjectMapper mapper = new ObjectMapper();
+    String stringOptions = mapper.writeValueAsString(
+        ImmutableMap.of("options", 
+          ImmutableMap.<String, String>builder()
+          .put("runner", DataflowPipelineRunner.class.getName())
+          .put("project", "testProject")
+          .put("apiRootUrl", "testApiRootUrl")
+          .put("dataflowEndpoint", "testDataflowEndpoint")
+          .put("tempLocation", "testTempLocation")
+          .put("serviceAccountName", "testServiceAccountName")
+          .put("serviceAccountKeyfile", "testServiceAccountKeyfile")
+          .put("zone", "testZone")
+          .put("numWorkers", "1")
+          .put("diskSizeGb", "2")
+          .put("shuffleDiskSizeGb", "3")
+          .build()));
+    System.getProperties().put("dataflowOptions", stringOptions);
+    TestDataflowPipelineOptions options = TestPipeline.getPipelineOptions();
+    assertEquals(DataflowPipelineRunner.class, options.getRunner());
+    assertEquals("TestPipelineTest", options.getAppName());
+    assertEquals("testCreationOfPipelineOptions", options.getJobName());
+    assertEquals("testProject", options.getProject());
+    assertEquals("testApiRootUrl", options.getApiRootUrl());
+    assertEquals("testDataflowEndpoint", options.getDataflowEndpoint());
+    assertEquals("testTempLocation", options.getTempLocation());
+    assertEquals("testServiceAccountName", options.getServiceAccountName());
+    assertEquals("testServiceAccountKeyfile", options.getServiceAccountKeyfile());
+    assertEquals("testZone", options.getZone());
+    assertEquals(2, options.getDiskSizeGb());
+    assertEquals(3, options.getShuffleDiskSizeGb());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
new file mode 100644
index 0000000000000..b0493491634c2
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
@@ -0,0 +1,287 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static com.google.cloud.dataflow.sdk.TestUtils.checkCombineFn;
+import static com.google.cloud.dataflow.sdk.TestUtils.createInts;
+import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.ApproximateQuantiles.ApproximateQuantilesCombineFn;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.hamcrest.CoreMatchers;
+import org.hamcrest.Description;
+import org.hamcrest.Matcher;
+import org.hamcrest.TypeSafeDiagnosingMatcher;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests for ApproximateQuantiles
+ */
+@RunWith(JUnit4.class)
+public class ApproximateQuantilesTest {
+
+  static final List<KV<String, Integer>> TABLE = Arrays.asList(
+      KV.of("a", 1),
+      KV.of("a", 2),
+      KV.of("a", 3),
+      KV.of("b", 1),
+      KV.of("b", 10),
+      KV.of("b", 10),
+      KV.of("b", 100)
+  );
+
+  public PCollection<KV<String, Integer>> createInputTable(Pipeline p) {
+    return p.apply(Create.of(TABLE)).setCoder(
+        KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+  }
+
+  @Test
+  public void testQuantilesGlobally() {
+    DirectPipeline p = DirectPipeline.createForTest();
+
+    PCollection<Integer> input = intRangeCollection(p, 101);
+    PCollection<List<Integer>> quantiles =
+        input.apply(ApproximateQuantiles.<Integer>globally(5));
+
+    DirectPipelineRunner.EvaluationResults results = p.run();
+
+    DataflowAssert.that(quantiles)
+        .containsInAnyOrder(Arrays.asList(0, 25, 50, 75, 100));
+  }
+
+  @Test
+  public void testQuantilesGobally_comparable() {
+    DirectPipeline p = DirectPipeline.createForTest();
+
+    PCollection<Integer> input = intRangeCollection(p, 101);
+    PCollection<List<Integer>> quantiles =
+        input.apply(
+            ApproximateQuantiles.globally(5, new DescendingIntComparator()));
+
+    DirectPipelineRunner.EvaluationResults results = p.run();
+
+    DataflowAssert.that(quantiles)
+        .containsInAnyOrder(Arrays.asList(100, 75, 50, 25, 0));
+  }
+
+  @Test
+  public void testQuantilesPerKey() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> input = createInputTable(p);
+    PCollection<KV<String, List<Integer>>> quantiles = input.apply(
+        ApproximateQuantiles.<String, Integer>perKey(2));
+
+    DataflowAssert.that(quantiles)
+        .containsInAnyOrder(
+            KV.of("a", Arrays.asList(1, 3)),
+            KV.of("b", Arrays.asList(1, 100)));
+    p.run();
+
+  }
+
+  @Test
+  public void testQuantilesPerKey_reversed() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> input = createInputTable(p);
+    PCollection<KV<String, List<Integer>>> quantiles = input.apply(
+        ApproximateQuantiles.<String, Integer, DescendingIntComparator>perKey(
+            2, new DescendingIntComparator()));
+
+    DataflowAssert.that(quantiles)
+        .containsInAnyOrder(
+            KV.of("a", Arrays.asList(3, 1)),
+            KV.of("b", Arrays.asList(100, 1)));
+    p.run();
+  }
+
+  @Test
+  public void testSingleton() {
+    checkCombineFn(
+        ApproximateQuantilesCombineFn.<Integer>create(5),
+        Arrays.asList(389),
+        Arrays.asList(389, 389, 389, 389, 389));
+  }
+
+  @Test
+  public void testSimpleQuantiles() {
+    checkCombineFn(
+        ApproximateQuantilesCombineFn.<Integer>create(5),
+        intRange(101),
+        Arrays.asList(0, 25, 50, 75, 100));
+  }
+
+  @Test
+  public void testUnevenQuantiles() {
+    checkCombineFn(
+        ApproximateQuantilesCombineFn.<Integer>create(37),
+        intRange(5000),
+        quantileMatcher(5000, 37, 20 /* tolerance */));
+  }
+
+  @Test
+  public void testLargerQuantiles() {
+    checkCombineFn(
+        ApproximateQuantilesCombineFn.<Integer>create(50),
+        intRange(10001),
+        quantileMatcher(10001, 50, 20 /* tolerance */));
+  }
+
+  @Test
+  public void testTightEpsilon() {
+    checkCombineFn(
+        ApproximateQuantilesCombineFn.<Integer>create(10).withEpsilon(0.01),
+        intRange(10001),
+        quantileMatcher(10001, 10, 5 /* tolerance */));
+  }
+
+  @Test
+  public void testDuplicates() {
+    int size = 101;
+    List<Integer> all = new ArrayList<>();
+    for (int i = 0; i < 10; i++) {
+      all.addAll(intRange(size));
+    }
+    checkCombineFn(
+        ApproximateQuantilesCombineFn.<Integer>create(5),
+        all,
+        Arrays.asList(0, 25, 50, 75, 100));
+  }
+
+  @Test
+  public void testLotsOfDuplicates() {
+    List<Integer> all = new ArrayList<>();
+    all.add(1);
+    for (int i = 1; i < 300; i++) {
+      all.add(2);
+    }
+    for (int i = 300; i < 1000; i++) {
+      all.add(3);
+    }
+    checkCombineFn(
+        ApproximateQuantilesCombineFn.<Integer>create(5),
+        all,
+        Arrays.asList(1, 2, 3, 3, 3));
+  }
+
+  @Test
+  public void testLogDistribution() {
+    List<Integer> all = new ArrayList<>();
+    for (int i = 1; i < 1000; i++) {
+      all.add((int) Math.log(i));
+    }
+    checkCombineFn(
+        ApproximateQuantilesCombineFn.<Integer>create(5),
+        all,
+        Arrays.asList(0, 5, 6, 6, 6));
+  }
+
+  @Test
+  public void testZipfianDistribution() {
+    List<Integer> all = new ArrayList<>();
+    for (int i = 1; i < 1000; i++) {
+      all.add(1000 / i);
+    }
+    checkCombineFn(
+        ApproximateQuantilesCombineFn.<Integer>create(5),
+        all,
+        Arrays.asList(1, 1, 2, 4, 1000));
+  }
+
+  @Test
+  public void testAlternateComparator() {
+    List<String> inputs = Arrays.asList(
+        "aa", "aaa", "aaaa", "b", "ccccc", "dddd", "zz");
+    checkCombineFn(
+        ApproximateQuantilesCombineFn.<String>create(3),
+        inputs,
+        Arrays.asList("aa", "b", "zz"));
+    checkCombineFn(
+        ApproximateQuantilesCombineFn.create(3, new TopTest.OrderByLength()),
+        inputs,
+        Arrays.asList("b", "aaa", "ccccc"));
+  }
+
+  private Matcher<Iterable<? extends Integer>> quantileMatcher(
+      int size, int numQuantiles, int absoluteError) {
+    List<Matcher<? super Integer>> quantiles = new ArrayList<>();
+    quantiles.add(CoreMatchers.is(0));
+    for (int k = 1; k < numQuantiles - 1; k++) {
+      int expected = (int) (((double) (size - 1)) * k / (numQuantiles - 1));
+      quantiles.add(new Between<>(
+          expected - absoluteError, expected + absoluteError));
+    }
+    quantiles.add(CoreMatchers.is(size - 1));
+    return contains(quantiles);
+  }
+
+  private static class Between<T extends Comparable<T>>
+      extends TypeSafeDiagnosingMatcher<T> {
+    private final T min;
+    private final T max;
+    private Between(T min, T max) {
+      this.min = min;
+      this.max = max;
+    }
+    @Override
+    public void describeTo(Description description) {
+      description.appendText("is between " + min + " and " + max);
+    }
+
+    @Override
+    protected boolean matchesSafely(T item, Description mismatchDescription) {
+      return min.compareTo(item) <= 0 && item.compareTo(max) <= 0;
+    }
+  }
+
+  private static class DescendingIntComparator implements
+      SerializableComparator<Integer> {
+    @Override
+    public int compare(Integer o1, Integer o2) {
+      return o2.compareTo(o1);
+    }
+  }
+
+  private PCollection<Integer> intRangeCollection(Pipeline p, int size) {
+    return createInts(p, intRange(size));
+  }
+
+  private List<Integer> intRange(int size) {
+    List<Integer> all = new ArrayList<>(size);
+    for (int i = 0; i < size; i++) {
+      all.add(i);
+    }
+    return all;
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
new file mode 100644
index 0000000000000..2b2ff0ac9c967
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
@@ -0,0 +1,302 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.TestUtils;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.DoubleCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
+import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.EvaluationResults;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Tests for the ApproximateUnique aggregator transform.
+ */
+@RunWith(JUnit4.class)
+public class ApproximateUniqueTest {
+
+  @Test
+  public void testEstimationErrorToSampleSize() {
+    assertEquals(40000, ApproximateUnique.sampleSizeFromEstimationError(0.01));
+    assertEquals(10000, ApproximateUnique.sampleSizeFromEstimationError(0.02));
+    assertEquals(2500, ApproximateUnique.sampleSizeFromEstimationError(0.04));
+    assertEquals(1600, ApproximateUnique.sampleSizeFromEstimationError(0.05));
+    assertEquals(400, ApproximateUnique.sampleSizeFromEstimationError(0.1));
+    assertEquals(100, ApproximateUnique.sampleSizeFromEstimationError(0.2));
+    assertEquals(25, ApproximateUnique.sampleSizeFromEstimationError(0.4));
+    assertEquals(16, ApproximateUnique.sampleSizeFromEstimationError(0.5));
+  }
+
+  public <T> PCollection<T> createInput(Pipeline p, Iterable<T> input,
+      Coder<T> coder) {
+    return p.apply(Create.of(input)).setCoder(coder);
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testApproximateUniqueWithSmallInput() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Integer> input =
+        createInput(p, Arrays.asList(1, 2, 3, 3), BigEndianIntegerCoder.of());
+
+    PCollection<Long> estimate = input
+        .apply(ApproximateUnique.<Integer>globally(1000));
+
+    DataflowAssert.that(estimate).containsInAnyOrder(3L);
+
+    p.run();
+  }
+
+  @Test
+  public void testApproximateUniqueWithDuplicates() {
+    runApproximateUniqueWithDuplicates(100, 100, 100);
+    runApproximateUniqueWithDuplicates(1000, 1000, 100);
+    runApproximateUniqueWithDuplicates(1500, 1000, 100);
+    runApproximateUniqueWithDuplicates(10000, 1000, 100);
+  }
+
+  private void runApproximateUniqueWithDuplicates(int elementCount,
+      int uniqueCount, int sampleSize) {
+
+    assert elementCount >= uniqueCount;
+    List<Double> elements = Lists.newArrayList();
+    for (int i = 0; i < elementCount; i++) {
+      elements.add(1.0 / (i % uniqueCount + 1));
+    }
+    Collections.shuffle(elements);
+
+    DirectPipeline p = DirectPipeline.createForTest();
+    PCollection<Double> input = createInput(p, elements, DoubleCoder.of());
+    PCollection<Long> estimate =
+        input.apply(ApproximateUnique.<Double>globally(sampleSize));
+
+    EvaluationResults results = p.run();
+
+    verifyEstimate(uniqueCount, sampleSize,
+        results.getPCollection(estimate).get(0));
+  }
+
+  @Test
+  public void testApproximateUniqueWithSkewedDistributions() {
+    runApproximateUniqueWithSkewedDistributions(100, 100, 100);
+    runApproximateUniqueWithSkewedDistributions(10000, 10000, 100);
+    runApproximateUniqueWithSkewedDistributions(10000, 1000, 100);
+    runApproximateUniqueWithSkewedDistributions(10000, 200, 100);
+  }
+
+  @Test
+  public void testApproximateUniqueWithSkewedDistributionsAndLargeSampleSize() {
+    runApproximateUniqueWithSkewedDistributions(10000, 2000, 1000);
+  }
+
+  private void runApproximateUniqueWithSkewedDistributions(int elementCount,
+      final int uniqueCount, final int sampleSize) {
+    List<Integer> elements = Lists.newArrayList();
+    // Zipf distribution with approximately elementCount items.
+    double s = 1 - 1.0 * uniqueCount / elementCount;
+    double maxCount = Math.pow(uniqueCount, s);
+    for (int k = 0; k < uniqueCount; k++) {
+      int count = Math.max(1, (int) Math.round(maxCount * Math.pow(k, -s)));
+      // Element k occurs count times.
+      for (int c = 0; c < count; c++) {
+        elements.add(k);
+      }
+    }
+
+    DirectPipeline p = DirectPipeline.createForTest();
+    PCollection<Integer> input =
+        createInput(p, elements, BigEndianIntegerCoder.of());
+    PCollection<Long> estimate =
+        input.apply(ApproximateUnique.<Integer>globally(sampleSize));
+
+    EvaluationResults results = p.run();
+
+    verifyEstimate(uniqueCount, sampleSize,
+        results.getPCollection(estimate).get(0).longValue());
+  }
+
+  @Test
+  public void testApproximateUniquePerKey() {
+    List<KV<Integer, Integer>> elements = Lists.newArrayList();
+    List<Integer> keys = ImmutableList.of(20, 50, 100);
+    int elementCount = 1000;
+    int sampleSize = 100;
+    // Use the key as the number of unique values.
+    for (int uniqueCount : keys) {
+      for (int value = 0; value < elementCount; value++) {
+        elements.add(KV.of(uniqueCount, value % uniqueCount));
+      }
+    }
+
+    DirectPipeline p = DirectPipeline.createForTest();
+    PCollection<KV<Integer, Integer>> input = createInput(p, elements,
+        KvCoder.of(BigEndianIntegerCoder.of(), BigEndianIntegerCoder.of()));
+    PCollection<KV<Integer, Long>> counts =
+        input.apply(ApproximateUnique.<Integer, Integer>perKey(sampleSize));
+
+    EvaluationResults results = p.run();
+
+    for (KV<Integer, Long> result : results.getPCollection(counts)) {
+      verifyEstimate(result.getKey(), sampleSize, result.getValue());
+    }
+  }
+
+  /**
+   * Applies {@link ApproximateUnique} for different sample sizes and verifies
+   * that the estimation error falls within the maximum allowed error of
+   * {@code 2 / sqrt(sampleSize)}.
+   */
+  @Test
+  public void testApproximateUniqueWithDifferentSampleSizes() {
+    runApproximateUniquePipeline(16);
+    runApproximateUniquePipeline(64);
+    runApproximateUniquePipeline(128);
+    runApproximateUniquePipeline(256);
+    runApproximateUniquePipeline(512);
+    runApproximateUniquePipeline(1000);
+    runApproximateUniquePipeline(1024);
+    try {
+      runApproximateUniquePipeline(15);
+      fail("Accepted sampleSize < 16");
+    } catch (IllegalArgumentException e) {
+      assertTrue("Expected an exception due to sampleSize < 16", e.getMessage()
+          .startsWith("ApproximateUnique needs a sampleSize >= 16"));
+    }
+  }
+
+  /**
+   * Applies {@code ApproximateUnique(sampleSize)} verifying that the estimation
+   * error falls within the maximum allowed error of {@code 2/sqrt(sampleSize)}.
+   */
+  private void runApproximateUniquePipeline(int sampleSize) {
+    DirectPipeline p = DirectPipeline.createForTest();
+    PCollection<String> collection = readPCollection(p);
+
+    PCollection<Long> exact = collection.apply(RemoveDuplicates.<String>create())
+        .apply(Combine.globally(new CountElements<String>()));
+
+    PCollection<Long> approximate =
+        collection.apply(ApproximateUnique.<String>globally(sampleSize));
+
+    EvaluationResults results = p.run();
+
+    verifyEstimate(results.getPCollection(exact).get(0).longValue(), sampleSize,
+        results.getPCollection(approximate).get(0).longValue());
+  }
+
+  /**
+   * Reads a large {@code PCollection<String>}.
+   */
+  private PCollection<String> readPCollection(Pipeline p) {
+    // TODO: Read PCollection from a set of text files.
+    List<String> page = TestUtils.LINES;
+    final int pages = 1000;
+    ArrayList<String> file = new ArrayList<>(pages * page.size());
+    for (int i = 0; i < pages; i++) {
+      file.addAll(page);
+    }
+    assert file.size() == pages * page.size();
+    PCollection<String> words = TestUtils.createStrings(p, file);
+    return words;
+  }
+
+  /**
+   * Checks that the estimation error, i.e., the difference between
+   * {@code uniqueCount} and {@code estimate} is less than
+   * {@code 2 / sqrt(sampleSize}).
+   */
+  private static void verifyEstimate(long uniqueCount, int sampleSize,
+      long estimate) {
+    if (uniqueCount < sampleSize) {
+      assertEquals("Number of hashes is less than the sample size. "
+          + "Estimate should be exact", uniqueCount, estimate);
+    }
+
+    double error = 100.0 * Math.abs(estimate - uniqueCount) / uniqueCount;
+    double maxError = 100.0 * 2 / Math.sqrt(sampleSize);
+
+    assertTrue("Estimate= " + estimate + " Actual=" + uniqueCount + " Error="
+        + error + "%, MaxError=" + maxError + "%.", error < maxError);
+  }
+
+  /**
+   * Combiner function counting the number of elements in an input PCollection.
+   *
+   * @param <E> the type of elements in the input PCollection.
+   */
+  private static class CountElements<E> extends CombineFn<E, Long[], Long> {
+
+    @Override
+    public Long[] createAccumulator() {
+      Long[] accumulator = new Long[1];
+      accumulator[0] = 0L;
+      return accumulator;
+    }
+
+    @Override
+    public void addInput(Long[] accumulator, E input) {
+      accumulator[0]++;
+    }
+
+    @Override
+    public Long[] mergeAccumulators(Iterable<Long[]> accumulators) {
+      Long[] sum = new Long[1];
+      sum[0] = 0L;
+      for (Long[] accumulator : accumulators) {
+        sum[0] += accumulator[0];
+      }
+      return sum;
+    }
+
+    @Override
+    public Long extractOutput(Long[] accumulator) {
+      return accumulator[0];
+    }
+
+    @Override
+    public Coder<Long[]> getAccumulatorCoder(CoderRegistry registry,
+        Coder<E> inputCoder) {
+      return SerializableCoder.of(Long[].class);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
new file mode 100644
index 0000000000000..52b0b230a19d1
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -0,0 +1,527 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.junit.Assert.assertThat;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.CustomCoder;
+import com.google.cloud.dataflow.sdk.coders.DoubleCoder;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.RecordingPipelineVisitor;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Duration;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+
+/**
+ * Tests for Combine transforms.
+ */
+@RunWith(JUnit4.class)
+public class CombineTest {
+
+  @SuppressWarnings("unchecked")
+  static final KV<String, Integer>[] TABLE = new KV[] {
+    KV.of("a", 1),
+    KV.of("a", 1),
+    KV.of("a", 4),
+    KV.of("b", 1),
+    KV.of("b", 13),
+  };
+
+  @SuppressWarnings("unchecked")
+  static final KV<String, Integer>[] EMPTY_TABLE = new KV[] {
+  };
+
+  static final Integer[] NUMBERS = new Integer[] {
+    1, 1, 2, 3, 5, 8, 13, 21, 34, 55
+  };
+
+  PCollection<KV<String, Integer>> createInput(Pipeline p,
+                                               KV<String, Integer>[] table) {
+    return p.apply(Create.of(Arrays.asList(table))).setCoder(
+        KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+  }
+
+  private void runTestSimpleCombine(KV<String, Integer>[] table,
+                                    int globalSum,
+                                    KV<String, Integer>[] perKeySums) {
+    Pipeline p = TestPipeline.create();
+    PCollection<KV<String, Integer>> input = createInput(p, table);
+
+    PCollection<Integer> sum = input
+        .apply(Values.<Integer>create())
+        .apply(Combine.globally(new SumInts()));
+
+    // Java 8 will infer.
+    PCollection<KV<String, Integer>> sumPerKey = input
+        .apply(Combine.<String, Integer>perKey(new SumInts()));
+
+    DataflowAssert.that(sum).containsInAnyOrder(globalSum);
+    DataflowAssert.that(sumPerKey).containsInAnyOrder(perKeySums);
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testSimpleCombine() {
+    runTestSimpleCombine(TABLE, 20, new KV[] {
+        KV.of("a", 6), KV.of("b", 14) });
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testSimpleCombineEmpty() {
+    runTestSimpleCombine(EMPTY_TABLE, 0, new KV[] { });
+  }
+
+  private void runTestBasicCombine(KV<String, Integer>[] table,
+                                   Set<Integer> globalUnique,
+                                   KV<String, Set<Integer>>[] perKeyUnique) {
+    Pipeline p = TestPipeline.create();
+    p.getCoderRegistry().registerCoder(Set.class, SetCoder.class);
+    PCollection<KV<String, Integer>> input = createInput(p, table);
+
+    PCollection<Set<Integer>> unique = input
+        .apply(Values.<Integer>create())
+        .apply(Combine.globally(new UniqueInts()));
+
+    // Java 8 will infer.
+    PCollection<KV<String, Set<Integer>>> uniquePerKey = input
+        .apply(Combine.<String, Integer, Set<Integer>>perKey(new UniqueInts()));
+
+    DataflowAssert.that(unique).containsInAnyOrder(globalUnique);
+    DataflowAssert.that(uniquePerKey).containsInAnyOrder(perKeyUnique);
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testBasicCombine() {
+    runTestBasicCombine(TABLE, ImmutableSet.of(1, 13, 4), new KV[] {
+        KV.of("a", (Set<Integer>) ImmutableSet.of(1, 4)),
+        KV.of("b", (Set<Integer>) ImmutableSet.of(1, 13)) });
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testBasicCombineEmpty() {
+    runTestBasicCombine(EMPTY_TABLE, ImmutableSet.<Integer>of(), new KV[] { });
+  }
+
+  private void runTestAccumulatingCombine(KV<String, Integer>[] table,
+                                          Double globalMean,
+                                          KV<String, Double>[] perKeyMeans) {
+    Pipeline p = TestPipeline.create();
+    PCollection<KV<String, Integer>> input = createInput(p, table);
+
+    PCollection<Double> mean = input
+        .apply(Values.<Integer>create())
+        .apply(Combine.globally(new MeanInts()));
+
+    // Java 8 will infer.
+    PCollection<KV<String, Double>> meanPerKey = input.apply(
+        Combine.<String, Integer, Double>perKey(new MeanInts()));
+
+    DataflowAssert.that(mean).containsInAnyOrder(globalMean);
+    DataflowAssert.that(meanPerKey).containsInAnyOrder(perKeyMeans);
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testWindowedCombineEmpty() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Double> mean = p
+        .apply(Create.<Integer>of()).setCoder(BigEndianIntegerCoder.of())
+        .apply(Window.<Integer>into(FixedWindows.of(Duration.millis(1))))
+        .apply(Combine.globally(new MeanInts()));
+
+    DataflowAssert.that(mean).containsInAnyOrder();
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testAccumulatingCombine() {
+    runTestAccumulatingCombine(TABLE, 4.0, new KV[] {
+        KV.of("a", 2.0), KV.of("b", 7.0) });
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testAccumulatingCombineEmpty() {
+    runTestAccumulatingCombine(EMPTY_TABLE, 0.0, new KV[] { });
+  }
+
+  // Checks that Min, Max, Mean, Sum (operations which pass-through to Combine),
+  // provide their own top-level name.
+  @Test
+  public void testCombinerNames() {
+    Pipeline p = TestPipeline.create();
+    PCollection<KV<String, Integer>> input = createInput(p, TABLE);
+
+    Combine.PerKey<String, Integer, Integer> min = Min.integersPerKey();
+    Combine.PerKey<String, Integer, Integer> max = Max.integersPerKey();
+    Combine.PerKey<String, Integer, Double> mean = Mean.perKey();
+    Combine.PerKey<String, Integer, Integer> sum = Sum.integersPerKey();
+
+    input.apply(min);
+    input.apply(max);
+    input.apply(mean);
+    input.apply(sum);
+
+    p.traverseTopologically(new RecordingPipelineVisitor());
+
+    assertThat(p.getFullName(min), Matchers.startsWith("Min"));
+    assertThat(p.getFullName(max), Matchers.startsWith("Max"));
+    assertThat(p.getFullName(mean), Matchers.startsWith("Mean"));
+    assertThat(p.getFullName(sum), Matchers.startsWith("Sum"));
+  }
+
+  @Test
+  public void testAddInputsRandomly() {
+    TestCounter counter = new TestCounter();
+    Combine.KeyedCombineFn<
+        String, Integer, TestCounter.Counter, Iterable<Long>> fn =
+        counter.asKeyedFn();
+
+    List<TestCounter.Counter> accums = DirectPipelineRunner.TestCombineDoFn.addInputsRandomly(
+        fn, "bob", Arrays.asList(NUMBERS), new Random(42));
+
+    assertThat(accums, Matchers.contains(
+        counter.new Counter(3, 2, 0, 0),
+        counter.new Counter(131, 5, 0, 0),
+        counter.new Counter(8, 2, 0, 0),
+        counter.new Counter(1, 1, 0, 0)));
+  }
+
+  ////////////////////////////////////////////////////////////////////////////
+  // Test classes, for different kinds of combining fns.
+
+  /** Example SerializableFunction combiner. */
+  public static class SumInts
+      implements SerializableFunction<Iterable<Integer>, Integer> {
+    @Override
+    public Integer apply(Iterable<Integer> input) {
+      int sum = 0;
+      for (int item : input) {
+        sum += item;
+      }
+      return sum;
+    }
+  }
+
+  /** Example CombineFn. */
+  public static class UniqueInts extends
+      Combine.CombineFn<Integer, Set<Integer>, Set<Integer>> {
+
+    @Override
+    public Set<Integer> createAccumulator() {
+      return new HashSet<>();
+    }
+
+    @Override
+    public void addInput(Set<Integer> accumulator, Integer input) {
+      accumulator.add(input);
+    }
+
+    @Override
+    public Set<Integer> mergeAccumulators(Iterable<Set<Integer>> accumulators) {
+      Set<Integer> all = new HashSet<>();
+      for (Set<Integer> part : accumulators) {
+        all.addAll(part);
+      }
+      return all;
+    }
+
+    @Override
+    public Set<Integer> extractOutput(Set<Integer> accumulator) {
+      return accumulator;
+    }
+  }
+
+  // Note: not a deterministic encoding
+  private static class SetCoder<T> extends StandardCoder<Set<T>> {
+
+    public static <T> SetCoder<T> of(Coder<T> elementCoder) {
+      return new SetCoder<>(elementCoder);
+    }
+
+    @JsonCreator
+    public static SetCoder<?> of(
+        @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+        List<Coder<?>> components) {
+      Preconditions.checkArgument(components.size() == 1,
+          "Expecting 1 component, got " + components.size());
+      return of((Coder<?>) components.get(0));
+    }
+
+    public static <T> List<Object> getInstanceComponents(Set<T> exampleValue) {
+      return IterableCoder.getInstanceComponents(exampleValue);
+    }
+
+    private final Coder<Iterable<T>> iterableCoder;
+
+    private SetCoder(Coder<T> elementCoder) {
+      iterableCoder = IterableCoder.of(elementCoder);
+    }
+
+    @Override
+    public void encode(Set<T> value, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+      iterableCoder.encode(value, outStream, context);
+    }
+
+    @Override
+    public Set<T> decode(InputStream inStream, Context context)
+        throws CoderException, IOException {
+      // TODO: Eliminate extra copy if used in production.
+      return Sets.newHashSet(iterableCoder.decode(inStream, context));
+    }
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return iterableCoder.getCoderArguments();
+    }
+
+    @Override
+    public boolean isDeterministic() {
+      return false;
+    }
+
+    @Override
+    public boolean isRegisterByteSizeObserverCheap(Set<T> value, Context context) {
+      return iterableCoder.isRegisterByteSizeObserverCheap(value, context);
+    }
+
+    @Override
+    public void registerByteSizeObserver(
+        Set<T> value, ElementByteSizeObserver observer, Context context)
+        throws Exception {
+      iterableCoder.registerByteSizeObserver(value, observer, context);
+    }
+  }
+
+  /** Example AccumulatingCombineFn. */
+  public static class MeanInts extends
+      Combine.AccumulatingCombineFn<Integer, MeanInts.CountSum, Double> {
+    private static final Coder<Long> LONG_CODER = BigEndianLongCoder.of();
+    private static final Coder<Double> DOUBLE_CODER = DoubleCoder.of();
+
+    class CountSum extends
+        Combine.AccumulatingCombineFn<Integer, CountSum, Double>.Accumulator {
+      long count = 0;
+      double sum = 0.0;
+
+      CountSum(long count, double sum) {
+        this.count = count;
+        this.sum = sum;
+      }
+
+      @Override
+      public void addInput(Integer element) {
+        count++;
+        sum += element.doubleValue();
+      }
+
+      @Override
+      public void mergeAccumulator(CountSum accumulator) {
+        count += accumulator.count;
+        sum += accumulator.sum;
+      }
+
+      @Override
+      public Double extractOutput() {
+        return count == 0 ? 0.0 : sum / count;
+      }
+    }
+
+    @Override
+    public CountSum createAccumulator() {
+      return new CountSum(0, 0.0);
+    }
+
+    @Override
+    public Coder<CountSum> getAccumulatorCoder(
+        CoderRegistry registry, Coder<Integer> inputCoder) {
+      return new CountSumCoder();
+    }
+
+    /**
+     * A Coder for CountSum
+     */
+    public class CountSumCoder extends CustomCoder<CountSum> {
+      @Override
+      public void encode(CountSum value, OutputStream outStream,
+          Context context) throws CoderException, IOException {
+        LONG_CODER.encode(value.count, outStream, context);
+        DOUBLE_CODER.encode(value.sum, outStream, context);
+      }
+
+      @Override
+      public CountSum decode(InputStream inStream, Coder.Context context)
+          throws CoderException, IOException {
+        long count = LONG_CODER.decode(inStream, context);
+        double sum = DOUBLE_CODER.decode(inStream, context);
+        return new CountSum(count, sum);
+      }
+
+      @Override
+      public boolean isDeterministic() {
+        return true;
+      }
+
+      @Override
+      public boolean isRegisterByteSizeObserverCheap(
+          CountSum value, Context context) {
+        return true;
+      }
+
+      @Override
+      public void registerByteSizeObserver(
+          CountSum value, ElementByteSizeObserver observer, Context context)
+          throws Exception {
+        LONG_CODER.registerByteSizeObserver(value.count, observer, context);
+        DOUBLE_CODER.registerByteSizeObserver(value.sum, observer, context);
+      }
+    }
+  }
+
+  /** Another example AccumulatingCombineFn. */
+  public static class TestCounter extends
+      Combine.AccumulatingCombineFn<
+          Integer, TestCounter.Counter, Iterable<Long>> {
+
+    /** An accumulator that observes its merges and outputs */
+    public class Counter extends
+        Combine.AccumulatingCombineFn<
+            Integer, Counter, Iterable<Long>>.Accumulator {
+
+      public long sum = 0;
+      public long inputs = 0;
+      public long merges = 0;
+      public long outputs = 0;
+
+      public Counter(long sum, long inputs, long merges, long outputs) {
+        this.sum = sum;
+        this.inputs = inputs;
+        this.merges = merges;
+        this.outputs = outputs;
+      }
+
+      @Override
+      public void addInput(Integer element) {
+        Preconditions.checkState(merges == 0);
+        Preconditions.checkState(outputs == 0);
+
+        inputs++;
+        sum += element;
+      }
+
+      @Override
+      public void mergeAccumulator(Counter accumulator) {
+        Preconditions.checkState(outputs == 0);
+        Preconditions.checkArgument(accumulator.outputs == 0);
+
+        merges += accumulator.merges + 1;
+        inputs += accumulator.inputs;
+        sum += accumulator.sum;
+      }
+
+      @Override
+      public Iterable<Long> extractOutput() {
+        Preconditions.checkState(outputs == 0);
+
+        return Arrays.asList(sum, inputs, merges, outputs);
+      }
+
+      @Override
+      public int hashCode() {
+        return (int) (sum * 17 + inputs * 31 + merges * 43 + outputs * 181);
+      }
+
+      @Override
+      public boolean equals(Object otherObj) {
+        if (otherObj instanceof Counter) {
+          Counter other = (Counter) otherObj;
+          return (sum == other.sum
+              && inputs == other.inputs
+              && merges == other.merges
+              && outputs == other.outputs);
+        }
+        return false;
+      }
+
+      public String toString() {
+        return sum + ":" + inputs + ":" + merges + ":" + outputs;
+      }
+    }
+
+    @Override
+    public Counter createAccumulator() {
+      return new Counter(0, 0, 0, 0);
+    }
+
+    @Override
+    public Coder<Counter> getAccumulatorCoder(
+        CoderRegistry registry, Coder<Integer> inputCoder) {
+      return SerializableCoder.of(Counter.class);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java
new file mode 100644
index 0000000000000..05375bd7c5364
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES;
+import static com.google.cloud.dataflow.sdk.TestUtils.createStrings;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests for Count.
+ */
+@RunWith(JUnit4.class)
+public class CountTest {
+  static final String[] WORDS_ARRAY = new String[] {
+    "hi", "there", "hi", "hi", "sue", "bob",
+    "hi", "sue", "", "", "ZOW", "bob", "" };
+
+  static final List<String> WORDS = Arrays.asList(WORDS_ARRAY);
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testCountPerElementBasic() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = createStrings(p, WORDS);
+
+    PCollection<KV<String, Long>> output =
+        input.apply(Count.<String>perElement());
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder(
+            KV.of("hi", 4L),
+            KV.of("there", 1L),
+            KV.of("sue", 2L),
+            KV.of("bob", 2L),
+            KV.of("", 3L),
+            KV.of("ZOW", 1L));
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testCountPerElementEmpty() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = createStrings(p, NO_LINES);
+
+    PCollection<KV<String, Long>> output =
+        input.apply(Count.<String>perElement());
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder();
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testCountGloballyBasic() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = createStrings(p, WORDS);
+
+    PCollection<Long> output =
+        input.apply(Count.<String>globally());
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder(13L);
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testCountGloballyEmpty() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = createStrings(p, NO_LINES);
+
+    PCollection<Long> output =
+        input.apply(Count.<String>globally());
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder(0L);
+    p.run();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
new file mode 100644
index 0000000000000..8202da086240c
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static com.google.cloud.dataflow.sdk.TestUtils.LINES;
+import static com.google.cloud.dataflow.sdk.TestUtils.LINES_ARRAY;
+import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES;
+import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES_ARRAY;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Instant;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests for Create.
+ */
+@RunWith(JUnit4.class)
+public class CreateTest {
+  @Rule public final ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testCreate() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> output =
+        p.apply(Create.of(LINES));
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder(LINES_ARRAY);
+    p.run();
+  }
+
+  // TODO: setOrdered(true) isn't supported yet by the Dataflow service.
+  @Test
+  public void testCreateOrdered() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> output =
+        p.apply(Create.of(LINES))
+        .setOrdered(true);
+
+    DataflowAssert.that(output)
+        .containsInOrder(LINES_ARRAY);
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testCreateEmpty() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> output =
+        p.apply(Create.of(NO_LINES))
+        .setCoder(StringUtf8Coder.of());
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder(NO_LINES_ARRAY);
+    p.run();
+  }
+
+  static class Record implements Serializable {
+  }
+
+  static class Record2 extends Record {
+  }
+
+  @Test
+  public void testPolymorphicType() throws Exception {
+    thrown.expect(RuntimeException.class);
+    thrown.expectMessage(
+        Matchers.containsString("unable to infer a default Coder"));
+
+    Pipeline p = TestPipeline.create();
+
+    // Create won't infer a default coder in this case.
+    p.apply(Create.of(new Record(), new Record2()));
+
+    p.run();
+  }
+
+  @Test
+  public void testCreateParameterizedType() throws Exception {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<TimestampedValue<String>> output =
+        p.apply(Create.of(
+            TimestampedValue.of("a", new Instant(0)),
+            TimestampedValue.of("b", new Instant(0))));
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder(
+            TimestampedValue.of("a", new Instant(0)),
+            TimestampedValue.of("b", new Instant(0)));
+  }
+
+  private static class PrintTimestamps extends DoFn<String, String> {
+    @Override
+      public void processElement(ProcessContext c) {
+      c.output(c.element() + ":" + c.timestamp().getMillis());
+    }
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testCreateTimestamped() {
+    Pipeline p = TestPipeline.create();
+
+    List<TimestampedValue<String>> data = Arrays.asList(
+        TimestampedValue.of("a", new Instant(1L)),
+        TimestampedValue.of("b", new Instant(2L)),
+        TimestampedValue.of("c", new Instant(3L)));
+
+    PCollection<String> output =
+        p.apply(Create.timestamped(data))
+        .apply(ParDo.of(new PrintTimestamps()));
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder("a:1", "b:2", "c:3");
+    p.run();
+  }
+
+  @Test
+  // This test fails when run on the service!
+  // TODO: @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testCreateTimestampedEmpty() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> output = p
+        .apply(Create.timestamped(new ArrayList<TimestampedValue<String>>()))
+        .setCoder(StringUtf8Coder.of());
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder();
+    p.run();
+  }
+
+  @Test
+  public void testCreateTimestampedPolymorphicType() throws Exception {
+    thrown.expect(RuntimeException.class);
+    thrown.expectMessage(
+        Matchers.containsString("unable to infer a default Coder"));
+
+    Pipeline p = TestPipeline.create();
+
+    // Create won't infer a default coder in this case.
+    PCollection<Record> c = p.apply(Create.timestamped(
+        TimestampedValue.of(new Record(), new Instant(0)),
+        TimestampedValue.<Record>of(new Record2(), new Instant(0))));
+
+    p.run();
+
+
+    throw new RuntimeException("Coder: " + c.getCoder());
+
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java
new file mode 100644
index 0000000000000..bcd14d0c6e894
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static com.google.cloud.dataflow.sdk.TestUtils.LINES;
+import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
+
+/**
+ * Tests for First.
+ */
+@RunWith(JUnit4.class)
+public class FirstTest
+    implements Serializable /* to allow anon inner classes */ {
+  // PRE: lines contains no duplicates.
+  void runTestFirst(final List<String> lines, int limit, boolean ordered) {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of(lines))
+        .setCoder(StringUtf8Coder.of());
+
+    if (ordered) {
+      input.setOrdered(true);
+    }
+
+    PCollection<String> output =
+        input.apply(First.<String>of(limit));
+
+    if (ordered) {
+      output.setOrdered(true);
+    }
+
+    final int expectedSize = Math.min(limit, lines.size());
+    if (ordered) {
+      List<String> expected = lines.subList(0, expectedSize);
+      if (expected.isEmpty()) {
+        DataflowAssert.that(output)
+            .containsInAnyOrder(expected);
+      } else {
+        DataflowAssert.that(output)
+            .containsInOrder(expected);
+      }
+    } else {
+      DataflowAssert.that(output)
+          .satisfies(new SerializableFunction<Iterable<String>, Void>() {
+              @Override
+              public Void apply(Iterable<String> actualIter) {
+                // Make sure actual is the right length, and is a
+                // subset of expected.
+                List<String> actual = new ArrayList<>();
+                for (String s : actualIter) {
+                  actual.add(s);
+                }
+                assertEquals(expectedSize, actual.size());
+                Set<String> actualAsSet = new TreeSet<>(actual);
+                Set<String> linesAsSet = new TreeSet<>(lines);
+                assertEquals(actual.size(), actualAsSet.size());
+                assertEquals(lines.size(), linesAsSet.size());
+                assertTrue(linesAsSet.containsAll(actualAsSet));
+                return null;
+              }
+            });
+    }
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testFirst() {
+    runTestFirst(LINES, 0, false);
+    runTestFirst(LINES, LINES.size() / 2, false);
+    runTestFirst(LINES, LINES.size() * 2, false);
+  }
+
+  @Test
+  // Extra tests, not worth the time to run on the real service.
+  public void testFirstMore() {
+    runTestFirst(LINES, LINES.size() - 1, false);
+    runTestFirst(LINES, LINES.size(), false);
+    runTestFirst(LINES, LINES.size() + 1, false);
+  }
+
+  // TODO: setOrdered(true) isn't supported yet by the Dataflow service.
+  @Test
+  public void testFirstOrdered() {
+    runTestFirst(LINES, 0, true);
+    runTestFirst(LINES, LINES.size() / 2, true);
+    runTestFirst(LINES, LINES.size() - 1, true);
+    runTestFirst(LINES, LINES.size(), true);
+    runTestFirst(LINES, LINES.size() + 1, true);
+    runTestFirst(LINES, LINES.size() * 2, true);
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testFirstEmpty() {
+    runTestFirst(NO_LINES, 0, false);
+    runTestFirst(NO_LINES, 1, false);
+  }
+
+  @Test
+  // TODO: setOrdered(true) isn't supported yet by the Dataflow service.
+  public void testFirstEmptyOrdered() {
+    runTestFirst(NO_LINES, 0, true);
+    runTestFirst(NO_LINES, 1, true);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
new file mode 100644
index 0000000000000..70cc4f1eaf88d
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
@@ -0,0 +1,244 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static com.google.cloud.dataflow.sdk.TestUtils.LINES;
+import static com.google.cloud.dataflow.sdk.TestUtils.LINES2;
+import static com.google.cloud.dataflow.sdk.TestUtils.LINES_ARRAY;
+import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES;
+import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES_ARRAY;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+
+import org.joda.time.Duration;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Tests for Flatten.
+ */
+@RunWith(JUnit4.class)
+public class FlattenTest {
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testFlattenPCollectionListUnordered() {
+    Pipeline p = TestPipeline.create();
+
+    List<String>[] inputs = new List[] {
+      LINES, NO_LINES, LINES2, NO_LINES, LINES, NO_LINES };
+
+    PCollection<String> output =
+        makePCollectionListOfStrings(false /* not ordered */, p, inputs)
+        .apply(Flatten.<String>pCollections());
+
+    DataflowAssert.that(output).containsInAnyOrder(flatten(inputs));
+    p.run();
+  }
+
+  // TODO: setOrdered(true) isn't supported yet by the Dataflow service.
+  @Test
+  public void testFlattenPCollectionListOrdered() {
+    Pipeline p = TestPipeline.create();
+
+    List<String>[] inputs = new List[] {
+      LINES, NO_LINES, LINES2, NO_LINES, LINES, NO_LINES };
+
+    PCollection<String> output =
+        makePCollectionListOfStrings(true /* ordered */, p, inputs)
+        .apply(Flatten.<String>pCollections()).setOrdered(true);
+
+    DataflowAssert.that(output).containsInOrder(flatten(inputs));
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testFlattenPCollectionListEmpty() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> output =
+        PCollectionList.<String>empty(p)
+        .apply(Flatten.<String>pCollections()).setCoder(StringUtf8Coder.of());
+
+    DataflowAssert.that(output).containsInAnyOrder();
+    p.run();
+  }
+
+  @Test
+  public void testWindowingFnPropagationFailure() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input1 =
+        p.apply(Create.of("Input1"))
+        .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))));
+    PCollection<String> input2 =
+        p.apply(Create.of("Input2"))
+        .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(2))));
+
+    try {
+      PCollection<String> output =
+          PCollectionList.of(input1).and(input2)
+          .apply(Flatten.<String>create());
+      Assert.fail("Exception should have been thrown");
+    } catch (IllegalStateException e) {
+      Assert.assertTrue(e.getMessage().startsWith(
+          "Inputs to Flatten had incompatible window windowingFns"));
+    }
+  }
+
+  @Test
+  public void testWindowingFnPropagation() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input1 =
+        p.apply(Create.of("Input1"))
+        .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))));
+    PCollection<String> input2 =
+        p.apply(Create.of("Input2"))
+        .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))));
+
+    PCollection<String> output =
+        PCollectionList.of(input1).and(input2)
+        .apply(Flatten.<String>create());
+
+    p.run();
+
+    Assert.assertTrue(output.getWindowingFn().isCompatible(
+        FixedWindows.<String>of(Duration.standardMinutes(1))));
+  }
+
+  @Test
+  public void testEqualWindowingFnPropagation() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input1 =
+        p.apply(Create.of("Input1"))
+        .apply(Window.<String>into(Sessions.withGapDuration(Duration.standardMinutes(1))));
+    PCollection<String> input2 =
+        p.apply(Create.of("Input2"))
+        .apply(Window.<String>into(Sessions.withGapDuration(Duration.standardMinutes(2))));
+
+    PCollection<String> output =
+        PCollectionList.of(input1).and(input2)
+        .apply(Flatten.<String>create());
+
+    p.run();
+
+    Assert.assertTrue(output.getWindowingFn().isCompatible(
+        Sessions.<String>withGapDuration(Duration.standardMinutes(2))));
+  }
+
+
+  PCollectionList<String> makePCollectionListOfStrings(boolean ordered,
+                                                       Pipeline p,
+                                                       List<String>... lists) {
+    return makePCollectionList(ordered, p, StringUtf8Coder.of(), lists);
+  }
+
+  <T> PCollectionList<T> makePCollectionList(boolean ordered,
+                                             Pipeline p,
+                                             Coder<T> coder,
+                                             List<T>... lists) {
+    List<PCollection<T>> pcs = new ArrayList<>();
+    for (List<T> list : lists) {
+      PCollection<T> pc = p.apply(Create.of(list)).setCoder(coder);
+      if (ordered) {
+        pc.setOrdered(true);
+      }
+      pcs.add(pc);
+    }
+    return PCollectionList.of(pcs);
+  }
+
+  <T> T[] flatten(List<T>... lists) {
+    List<T> flattened = new ArrayList<>();
+    for (List<T> list : lists) {
+      flattened.addAll(list);
+    }
+    return flattened.toArray((T[]) new Object[flattened.size()]);
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testFlattenIterables() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Iterable<String>> input = p
+        .apply(Create.<Iterable<String>>of(LINES))
+        .setCoder(IterableCoder.of(StringUtf8Coder.of()));
+
+    PCollection<String> output =
+        input.apply(Flatten.<String>iterables());
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder(LINES_ARRAY);
+
+    p.run();
+  }
+
+  @Test
+  public void testFlattenIterablesOrdered() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Iterable<String>> input = p
+        .apply(Create.<Iterable<String>>of(LINES))
+        .setCoder(IterableCoder.of(StringUtf8Coder.of()));
+
+    PCollection<String> output =
+        input.apply(Flatten.<String>iterables()).setOrdered(true);
+
+    DataflowAssert.that(output)
+        .containsInOrder(LINES_ARRAY);
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testFlattenIterablesEmpty() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Iterable<String>> input = p
+        .apply(Create.<Iterable<String>>of(NO_LINES))
+        .setCoder(IterableCoder.of(StringUtf8Coder.of()));
+
+    PCollection<String> output =
+        input.apply(Flatten.<String>iterables());
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder(NO_LINES_ARRAY);
+
+    p.run();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
new file mode 100644
index 0000000000000..ebb141f38b7b6
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
@@ -0,0 +1,280 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static com.google.cloud.dataflow.sdk.TestUtils.KvMatcher.isKv;
+import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.MapCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Duration;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Tests for GroupByKey.
+ */
+@RunWith(JUnit4.class)
+public class GroupByKeyTest {
+
+  @Rule
+  public ExpectedException expectedEx = ExpectedException.none();
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testGroupByKey() {
+    List<KV<String, Integer>> ungroupedPairs = Arrays.asList(
+        KV.of("k1", 3),
+        KV.of("k5", Integer.MAX_VALUE),
+        KV.of("k5", Integer.MIN_VALUE),
+        KV.of("k2", 66),
+        KV.of("k1", 4),
+        KV.of("k2", -33),
+        KV.of("k3", 0));
+
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> input =
+        p.apply(Create.of(ungroupedPairs))
+        .setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+
+    PCollection<KV<String, Iterable<Integer>>> output =
+        input.apply(GroupByKey.<String, Integer>create());
+
+    DataflowAssert.that(output)
+        .satisfies(new AssertThatHasExpectedContentsForTestGroupByKey());
+
+    p.run();
+  }
+
+  static class AssertThatHasExpectedContentsForTestGroupByKey
+      implements SerializableFunction<Iterable<KV<String, Iterable<Integer>>>,
+                                      Void> {
+    @Override
+    public Void apply(Iterable<KV<String, Iterable<Integer>>> actual) {
+      assertThat(actual, containsInAnyOrder(
+          isKv(is("k1"), containsInAnyOrder(3, 4)),
+          isKv(is("k5"), containsInAnyOrder(Integer.MAX_VALUE,
+                                            Integer.MIN_VALUE)),
+          isKv(is("k2"), containsInAnyOrder(66, -33)),
+          isKv(is("k3"), containsInAnyOrder(0))));
+      return null;
+    }
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testGroupByKeyAndWindows() {
+    List<KV<String, Integer>> ungroupedPairs = Arrays.asList(
+        KV.of("k1", 3),  // window [0, 5)
+        KV.of("k5", Integer.MAX_VALUE), // window [0, 5)
+        KV.of("k5", Integer.MIN_VALUE), // window [0, 5)
+        KV.of("k2", 66), // window [0, 5)
+        KV.of("k1", 4),  // window [5, 10)
+        KV.of("k2", -33),  // window [5, 10)
+        KV.of("k3", 0));  // window [5, 10)
+
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> input =
+        p.apply(Create.timestamped(ungroupedPairs, Arrays.asList(1L, 2L, 3L, 4L, 5L, 6L, 7L)))
+        .setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+    PCollection<KV<String, Iterable<Integer>>> output =
+        input.apply(Window.<KV<String, Integer>>into(FixedWindows.of(new Duration(5))))
+             .apply(GroupByKey.<String, Integer>create());
+
+    DataflowAssert.that(output)
+        .satisfies(new AssertThatHasExpectedContentsForTestGroupByKeyAndWindows());
+
+    p.run();
+  }
+
+  static class AssertThatHasExpectedContentsForTestGroupByKeyAndWindows
+      implements SerializableFunction<Iterable<KV<String, Iterable<Integer>>>,
+                                      Void> {
+    @Override
+      public Void apply(Iterable<KV<String, Iterable<Integer>>> actual) {
+      assertThat(actual, containsInAnyOrder(
+          isKv(is("k1"), containsInAnyOrder(3)),
+          isKv(is("k1"), containsInAnyOrder(4)),
+          isKv(is("k5"), containsInAnyOrder(Integer.MAX_VALUE,
+                                            Integer.MIN_VALUE)),
+          isKv(is("k2"), containsInAnyOrder(66)),
+          isKv(is("k2"), containsInAnyOrder(-33)),
+          isKv(is("k3"), containsInAnyOrder(0))));
+      return null;
+    }
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testGroupByKeyEmpty() {
+    List<KV<String, Integer>> ungroupedPairs = Arrays.asList();
+
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> input =
+        p.apply(Create.of(ungroupedPairs))
+        .setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+
+    PCollection<KV<String, Iterable<Integer>>> output =
+        input.apply(GroupByKey.<String, Integer>create());
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder();
+
+    p.run();
+  }
+
+  @Test
+  public void testGroupByKeyNonDeterministic() throws Exception {
+    expectedEx.expect(IllegalStateException.class);
+    expectedEx.expectMessage(Matchers.containsString("must be deterministic"));
+
+    List<KV<Map<String, String>, Integer>> ungroupedPairs = Arrays.asList();
+
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<Map<String, String>, Integer>> input =
+        p.apply(Create.of(ungroupedPairs))
+        .setCoder(
+            KvCoder.of(MapCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()),
+                BigEndianIntegerCoder.of()));
+
+    input.apply(GroupByKey.<Map<String, String>, Integer>create());
+
+    p.run();
+  }
+
+  @Test
+  public void testIdentityWindowingFnPropagation() {
+    Pipeline p = TestPipeline.create();
+
+    List<KV<String, Integer>> ungroupedPairs = Arrays.asList();
+
+    PCollection<KV<String, Integer>> input =
+        p.apply(Create.of(ungroupedPairs))
+        .setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()))
+        .apply(Window.<KV<String, Integer>>into(FixedWindows.of(Duration.standardMinutes(1))));
+
+    PCollection<KV<String, Iterable<Integer>>> output =
+        input.apply(GroupByKey.<String, Integer>create());
+
+    p.run();
+
+    Assert.assertTrue(output.getWindowingFn().isCompatible(
+        FixedWindows.<KV<String, Integer>>of(Duration.standardMinutes(1))));
+
+  }
+
+  @Test
+  public void testWindowingFnInvalidation() {
+    Pipeline p = TestPipeline.create();
+
+    List<KV<String, Integer>> ungroupedPairs = Arrays.asList();
+
+    PCollection<KV<String, Integer>> input =
+        p.apply(Create.of(ungroupedPairs))
+        .setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()))
+        .apply(Window.<KV<String, Integer>>into(
+            Sessions.withGapDuration(Duration.standardMinutes(1))));
+
+    PCollection<KV<String, Iterable<Integer>>> output =
+        input.apply(GroupByKey.<String, Integer>create());
+
+    p.run();
+
+    Assert.assertTrue(
+        output.getWindowingFn().isCompatible(
+            new InvalidWindowingFn(
+                "Invalid",
+                Sessions.<KV<String, Integer>>withGapDuration(
+                    Duration.standardMinutes(1)))));
+  }
+
+  @Test
+  public void testInvalidWindowingFn() {
+    Pipeline p = TestPipeline.create();
+
+    List<KV<String, Integer>> ungroupedPairs = Arrays.asList();
+
+    PCollection<KV<String, Integer>> input =
+        p.apply(Create.of(ungroupedPairs))
+        .setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()))
+        .apply(Window.<KV<String, Integer>>into(
+            Sessions.withGapDuration(Duration.standardMinutes(1))));
+
+    try {
+      PCollection<KV<String, Iterable<Iterable<Integer>>>> output = input
+          .apply(GroupByKey.<String, Integer>create())
+          .apply(GroupByKey.<String, Iterable<Integer>>create());
+      Assert.fail("Exception should have been thrown");
+    } catch (IllegalStateException e) {
+      Assert.assertTrue(e.getMessage().startsWith(
+          "GroupByKey must have a valid Window merge function."));
+    }
+  }
+
+  @Test
+  public void testRemerge() {
+    Pipeline p = TestPipeline.create();
+
+    List<KV<String, Integer>> ungroupedPairs = Arrays.asList();
+
+    PCollection<KV<String, Integer>> input =
+        p.apply(Create.of(ungroupedPairs))
+        .setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()))
+        .apply(Window.<KV<String, Integer>>into(
+            Sessions.withGapDuration(Duration.standardMinutes(1))));
+
+    PCollection<KV<String, Iterable<Iterable<Integer>>>> middle = input
+        .apply(GroupByKey.<String, Integer>create())
+        .apply(Window.<KV<String, Iterable<Integer>>>remerge())
+        .apply(GroupByKey.<String, Iterable<Integer>>create())
+        .apply(Window.<KV<String, Iterable<Iterable<Integer>>>>remerge());
+
+    p.run();
+
+    Assert.assertTrue(
+        middle.getWindowingFn().isCompatible(
+            Sessions.withGapDuration(Duration.standardMinutes(1))));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java
new file mode 100644
index 0000000000000..1d6e233adef85
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+
+/**
+ * Tests for Keys transform.
+ */
+@RunWith(JUnit4.class)
+public class KeysTest {
+  static final KV<String, Integer>[] TABLE = new KV[] {
+    KV.of("one", 1),
+    KV.of("two", 2),
+    KV.of("three", 3),
+    KV.of("dup", 4),
+    KV.of("dup", 5)
+  };
+
+  static final KV<String, Integer>[] EMPTY_TABLE = new KV[] {
+  };
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testKeys() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> input =
+        p.apply(Create.of(Arrays.asList(TABLE))).setCoder(
+            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+
+    PCollection<String> output = input.apply(Keys.<String>create());
+    DataflowAssert.that(output)
+        .containsInAnyOrder("one", "two", "three", "dup", "dup");
+
+    p.run();
+  }
+
+  // TODO: setOrdered(true) isn't supported yet by the Dataflow service.
+  @Test
+  public void testKeysOrdered() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> input =
+        p.apply(Create.of(Arrays.asList(TABLE))).setCoder(
+            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+
+    input.setOrdered(true);
+    PCollection<String> output =
+        input.apply(Keys.<String>create()).setOrdered(true);
+    DataflowAssert.that(output)
+        .containsInOrder("one", "two", "three", "dup", "dup");
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testKeysEmpty() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> input =
+        p.apply(Create.of(Arrays.asList(EMPTY_TABLE))).setCoder(
+            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+
+    PCollection<String> output = input.apply(Keys.<String>create());
+    DataflowAssert.that(output)
+        .containsInAnyOrder();
+
+    p.run();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java
new file mode 100644
index 0000000000000..15c2ff2ff7366
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+
+/**
+ * Tests for KvSwap transform.
+ */
+@RunWith(JUnit4.class)
+public class KvSwapTest {
+  static final KV<String, Integer>[] TABLE = new KV[] {
+    KV.of("one", 1),
+    KV.of("two", 2),
+    KV.of("three", 3),
+    KV.of("four", 4),
+    KV.of("dup", 4),
+    KV.of("dup", 5)
+  };
+
+  static final KV<String, Integer>[] EMPTY_TABLE = new KV[] {
+  };
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testKvSwap() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> input =
+        p.apply(Create.of(Arrays.asList(TABLE))).setCoder(
+            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+
+    PCollection<KV<Integer, String>> output = input.apply(
+        KvSwap.<String, Integer>create());
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of(1, "one"),
+        KV.of(2, "two"),
+        KV.of(3, "three"),
+        KV.of(4, "four"),
+        KV.of(4, "dup"),
+        KV.of(5, "dup"));
+    p.run();
+  }
+
+  // TODO: setOrdered(true) isn't supported yet by the Dataflow service.
+  @Test
+  public void testKvSwapOrdered() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> input =
+        p.apply(Create.of(Arrays.asList(TABLE))).setCoder(
+            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+
+    input.setOrdered(true);
+    PCollection<KV<Integer, String>> output = input.apply(
+        KvSwap.<String, Integer>create()).setOrdered(true);
+
+    DataflowAssert.that(output).containsInOrder(
+        KV.of(1, "one"),
+        KV.of(2, "two"),
+        KV.of(3, "three"),
+        KV.of(4, "four"),
+        KV.of(4, "dup"),
+        KV.of(5, "dup"));
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testKvSwapEmpty() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> input =
+        p.apply(Create.of(Arrays.asList(EMPTY_TABLE))).setCoder(
+            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+
+    PCollection<KV<Integer, String>> output = input.apply(
+        KvSwap.<String, Integer>create());
+
+    DataflowAssert.that(output).containsInAnyOrder();
+    p.run();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
new file mode 100644
index 0000000000000..7e46bb31a7858
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -0,0 +1,986 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static com.google.cloud.dataflow.sdk.TestUtils.createInts;
+import static com.google.cloud.dataflow.sdk.util.SerializableUtils.serializeToByteArray;
+import static com.google.cloud.dataflow.sdk.util.StringUtils.byteArrayToJsonString;
+import static com.google.cloud.dataflow.sdk.util.StringUtils.jsonStringToByteArray;
+import static org.hamcrest.CoreMatchers.containsString;
+import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
+import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
+import static org.hamcrest.core.AnyOf.anyOf;
+import static org.hamcrest.core.IsEqual.equalTo;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.TestUtils;
+import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.cloud.dataflow.sdk.values.TupleTagList;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Tests for ParDo.
+ */
+@RunWith(JUnit4.class)
+public class ParDoTest implements Serializable {
+  // This test is Serializable, just so that it's easy to have
+  // anonymous inner classes inside the non-static test methods.
+
+  @Rule
+  public transient ExpectedException thrown = ExpectedException.none();
+
+
+  static class TestDoFn extends DoFn<Integer, String> {
+    enum State { UNSTARTED, STARTED, PROCESSING, FINISHED }
+    State state = State.UNSTARTED;
+
+    final List<PCollectionView<Integer, ?>> sideInputViews = new ArrayList<>();
+    final List<TupleTag<String>> sideOutputTupleTags = new ArrayList<>();
+
+    public TestDoFn() {
+    }
+
+    public TestDoFn(List<PCollectionView<Integer, ?>> sideInputViews,
+                    List<TupleTag<String>> sideOutputTupleTags) {
+      this.sideInputViews.addAll(sideInputViews);
+      this.sideOutputTupleTags.addAll(sideOutputTupleTags);
+    }
+
+    @Override
+    public void startBundle(Context c) {
+      assertEquals(State.UNSTARTED, state);
+      state = State.STARTED;
+      outputToAll(c, "started");
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      assertThat(state,
+                 anyOf(equalTo(State.STARTED), equalTo(State.PROCESSING)));
+      state = State.PROCESSING;
+      outputToAll(c, "processing: " + c.element());
+    }
+
+    @Override
+    public void finishBundle(Context c) {
+      assertThat(state,
+                 anyOf(equalTo(State.STARTED), equalTo(State.PROCESSING)));
+      state = State.FINISHED;
+      outputToAll(c, "finished");
+    }
+
+    private void outputToAll(Context c, String value) {
+      if (!sideInputViews.isEmpty()) {
+        List<Integer> sideInputValues = new ArrayList<>();
+        for (PCollectionView<Integer, ?> sideInputView : sideInputViews) {
+          sideInputValues.add(c.sideInput(sideInputView));
+        }
+        value += ": " + sideInputValues;
+      }
+      c.output(value);
+      for (TupleTag<String> sideOutputTupleTag : sideOutputTupleTags) {
+        c.sideOutput(sideOutputTupleTag,
+                     sideOutputTupleTag.getId() + ": " + value);
+      }
+    }
+
+    /** DataflowAssert "matcher" for expected output. */
+    static class HasExpectedOutput
+        implements SerializableFunction<Iterable<String>, Void>, Serializable {
+      private final List<Integer> inputs;
+      private final List<Integer> sideInputs;
+      private final String sideOutput;
+      private final boolean ordered;
+
+      public static HasExpectedOutput forInput(List<Integer> inputs) {
+        return new HasExpectedOutput(
+            new ArrayList<Integer>(inputs),
+            new ArrayList<Integer>(),
+            "",
+            false);
+      }
+
+      private HasExpectedOutput(List<Integer> inputs,
+                                List<Integer> sideInputs,
+                                String sideOutput,
+                                boolean ordered) {
+        this.inputs = inputs;
+        this.sideInputs = sideInputs;
+        this.sideOutput = sideOutput;
+        this.ordered = ordered;
+      }
+
+      public HasExpectedOutput andSideInputs(Integer... sideInputValues) {
+        List<Integer> sideInputs = new ArrayList<>();
+        for (Integer sideInputValue : sideInputValues) {
+          sideInputs.add(sideInputValue);
+        }
+        return new HasExpectedOutput(inputs, sideInputs, sideOutput, ordered);
+      }
+
+      public HasExpectedOutput fromSideOutput(TupleTag<String> sideOutputTag) {
+        return fromSideOutput(sideOutputTag.getId());
+      }
+      public HasExpectedOutput fromSideOutput(String sideOutput) {
+        return new HasExpectedOutput(inputs, sideInputs, sideOutput, ordered);
+      }
+
+      public HasExpectedOutput inOrder() {
+        return new HasExpectedOutput(inputs, sideInputs, sideOutput, true);
+      }
+
+      @Override
+      public Void apply(Iterable<String> outputs) {
+        List<String> starteds = new ArrayList<>();
+        List<String> processeds = new ArrayList<>();
+        List<String> finisheds = new ArrayList<>();
+        for (String output : outputs) {
+          if (output.contains("started")) {
+            starteds.add(output);
+          } else if (output.contains("finished")) {
+            finisheds.add(output);
+          } else {
+            processeds.add(output);
+          }
+        }
+
+        String sideInputsSuffix;
+        if (sideInputs.isEmpty()) {
+          sideInputsSuffix = "";
+        } else {
+          sideInputsSuffix = ": " + sideInputs;
+        }
+
+        String sideOutputPrefix;
+        if (sideOutput.isEmpty()) {
+          sideOutputPrefix = "";
+        } else {
+          sideOutputPrefix = sideOutput + ": ";
+        }
+
+        List<String> expectedProcesseds = new ArrayList<>();
+        for (Integer input : inputs) {
+          expectedProcesseds.add(
+              sideOutputPrefix + "processing: " + input + sideInputsSuffix);
+        }
+        String[] expectedProcessedsArray =
+            expectedProcesseds.toArray(new String[expectedProcesseds.size()]);
+        if (!ordered || expectedProcesseds.isEmpty()) {
+          assertThat(processeds, containsInAnyOrder(expectedProcessedsArray));
+        } else {
+          assertThat(processeds, contains(expectedProcessedsArray));
+        }
+
+        assertEquals(starteds.size(), finisheds.size());
+        assertTrue(starteds.size() > 0);
+        for (String started : starteds) {
+          assertEquals(sideOutputPrefix + "started" + sideInputsSuffix,
+                       started);
+        }
+        for (String finished : finisheds) {
+          assertEquals(sideOutputPrefix + "finished" + sideInputsSuffix,
+                       finished);
+        }
+
+        return null;
+      }
+    }
+  }
+
+  static class TestStartBatchErrorDoFn extends DoFn<Integer, String> {
+    @Override
+    public void startBundle(Context c) {
+      throw new RuntimeException("test error in initialize");
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      // This has to be here.
+    }
+  }
+
+  static class TestProcessElementErrorDoFn extends DoFn<Integer, String> {
+    @Override
+    public void processElement(ProcessContext c) {
+      throw new RuntimeException("test error in process");
+    }
+  }
+
+  static class TestFinishBatchErrorDoFn extends DoFn<Integer, String> {
+    @Override
+    public void processElement(ProcessContext c) {
+      // This has to be here.
+    }
+
+    @Override
+    public void finishBundle(Context c) {
+      throw new RuntimeException("test error in finalize");
+    }
+  }
+
+  static class TestUnexpectedKeyedStateDoFn extends DoFn<Integer, String> {
+    @Override
+    public void processElement(ProcessContext c) {
+      // Will fail since this DoFn doesn't implement RequiresKeyedState.
+      c.keyedState();
+    }
+  }
+
+  private static class StrangelyNamedDoer extends DoFn<Integer, String> {
+    @Override
+    public void processElement(ProcessContext c) {
+    }
+  }
+
+  static class TestOutputTimestampDoFn extends DoFn<Integer, Integer> {
+    @Override
+    public void processElement(ProcessContext c) {
+      Integer value = c.element();
+      c.outputWithTimestamp(value, new Instant(value.longValue()));
+    }
+  }
+
+  static class TestShiftTimestampDoFn extends DoFn<Integer, Integer> {
+    private Duration allowedTimestampSkew;
+    private Duration durationToShift;
+
+    public TestShiftTimestampDoFn(Duration allowedTimestampSkew,
+                                  Duration durationToShift) {
+      this.allowedTimestampSkew = allowedTimestampSkew;
+      this.durationToShift = durationToShift;
+    }
+
+    @Override
+    public Duration getAllowedTimestampSkew() {
+      return allowedTimestampSkew;
+    }
+    @Override
+    public void processElement(ProcessContext c) {
+      Instant timestamp = c.timestamp();
+      Preconditions.checkNotNull(timestamp);
+      Integer value = c.element();
+      c.outputWithTimestamp(value, timestamp.plus(durationToShift));
+    }
+  }
+
+  static class TestFormatTimestampDoFn extends DoFn<Integer, String> {
+    @Override
+    public void processElement(ProcessContext c) {
+      Preconditions.checkNotNull(c.timestamp());
+      c.output("processing: " + c.element() + ", timestamp: " + c.timestamp().getMillis());
+    }
+  }
+
+  static class MultiFilter
+      extends PTransform<PCollection<Integer>, PCollectionTuple> {
+
+    private static final TupleTag<Integer> BY2 = new TupleTag<Integer>("by2"){};
+    private static final TupleTag<Integer> BY3 = new TupleTag<Integer>("by3"){};
+
+    @Override
+    public PCollectionTuple apply(PCollection<Integer> input) {
+      PCollection<Integer> by2 = input.apply(ParDo.of(new FilterFn(2)));
+      PCollection<Integer> by3 = input.apply(ParDo.of(new FilterFn(3)));
+      return PCollectionTuple.of(BY2, by2).and(BY3, by3);
+    }
+
+    static class FilterFn extends DoFn<Integer, Integer> {
+      private final int divisor;
+
+      FilterFn(int divisor) {
+        this.divisor = divisor;
+      }
+
+      @Override
+      public void processElement(ProcessContext c) throws Exception {
+        if (c.element() % divisor == 0) {
+          c.output(c.element());
+        }
+      }
+    }
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testParDo() {
+    Pipeline p = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList(3, -42, 666);
+
+    PCollection<Integer> input = createInts(p, inputs);
+
+    PCollection<String> output =
+        input
+        .apply(ParDo.of(new TestDoFn()));
+
+    DataflowAssert.that(output)
+        .satisfies(TestDoFn.HasExpectedOutput.forInput(inputs));
+
+    p.run();
+  }
+
+  // TODO: setOrdered(true) isn't supported yet by the Dataflow service.
+  @Test
+  public void testParDoOrdered() {
+    Pipeline p = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList(3, -42, 666);
+
+    PCollection<Integer> input = createInts(p, inputs).setOrdered(true);
+
+    PCollection<String> output =
+        input
+        .apply(ParDo.of(new TestDoFn())).setOrdered(true);
+
+    DataflowAssert.that(output)
+        .satisfies(TestDoFn.HasExpectedOutput.forInput(inputs).inOrder());
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testParDoEmpty() {
+    Pipeline p = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList();
+
+    PCollection<Integer> input = createInts(p, inputs);
+
+    PCollection<String> output =
+        input
+        .apply(ParDo.of(new TestDoFn()));
+
+    DataflowAssert.that(output)
+        .satisfies(TestDoFn.HasExpectedOutput.forInput(inputs));
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testParDoWithSideOutputs() {
+    Pipeline p = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList(3, -42, 666);
+
+    PCollection<Integer> input = createInts(p, inputs);
+
+    TupleTag<String> mainTag = new TupleTag<String>("main"){};
+    TupleTag<String> sideTag1 = new TupleTag<String>("side1"){};
+    TupleTag<String> sideTag2 = new TupleTag<String>("side2"){};
+    TupleTag<String> sideTag3 = new TupleTag<String>("side3"){};
+    TupleTag<String> sideTagUnwritten = new TupleTag<String>("sideUnwritten"){};
+
+    PCollectionTuple outputs =
+        input
+        .apply(ParDo
+               .of(new TestDoFn(
+                   Arrays.<PCollectionView<Integer, ?>>asList(),
+                   Arrays.asList(sideTag1, sideTag2, sideTag3)))
+               .withOutputTags(
+                   mainTag,
+                   TupleTagList.of(sideTag3).and(sideTag1)
+                   .and(sideTagUnwritten).and(sideTag2)));
+
+    DataflowAssert.that(outputs.get(mainTag))
+        .satisfies(TestDoFn.HasExpectedOutput.forInput(inputs));
+
+    DataflowAssert.that(outputs.get(sideTag1))
+        .satisfies(TestDoFn.HasExpectedOutput.forInput(inputs)
+                   .fromSideOutput(sideTag1));
+    DataflowAssert.that(outputs.get(sideTag2))
+        .satisfies(TestDoFn.HasExpectedOutput.forInput(inputs)
+                   .fromSideOutput(sideTag2));
+    DataflowAssert.that(outputs.get(sideTag3))
+        .satisfies(TestDoFn.HasExpectedOutput.forInput(inputs)
+                   .fromSideOutput(sideTag3));
+    DataflowAssert.that(outputs.get(sideTagUnwritten)).containsInAnyOrder();
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testParDoWithOnlySideOutputs() {
+    Pipeline p = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList(3, -42, 666);
+
+    PCollection<Integer> input = createInts(p, inputs);
+
+    final TupleTag<Void> mainTag = new TupleTag<Void>("main"){};
+    final TupleTag<Integer> sideTag = new TupleTag<Integer>("side"){};
+
+    PCollectionTuple outputs = input.apply(
+        ParDo
+        .withOutputTags(mainTag, TupleTagList.of(sideTag))
+        .of(new DoFn<Integer, Void>(){
+              @Override
+              public void processElement(ProcessContext c) {
+                c.sideOutput(sideTag, c.element());
+              }}));
+
+    DataflowAssert.that(outputs.get(mainTag)).containsInAnyOrder();
+    DataflowAssert.that(outputs.get(sideTag)).containsInAnyOrder(inputs);
+
+    p.run();
+  }
+
+  @Test
+  public void testParDoWritingToUndeclaredSideOutput() {
+    Pipeline p = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList(3, -42, 666);
+
+    PCollection<Integer> input = createInts(p, inputs);
+
+    TupleTag<String> sideTag = new TupleTag<String>("side"){};
+
+    PCollection<String> output =
+        input
+        .apply(ParDo.of(new TestDoFn(
+            Arrays.<PCollectionView<Integer, ?>>asList(),
+            Arrays.asList(sideTag))));
+
+    DataflowAssert.that(output)
+        .satisfies(TestDoFn.HasExpectedOutput.forInput(inputs));
+
+    p.run();
+  }
+
+  @Test
+  public void testParDoUndeclaredSideOutputLimit() {
+    Pipeline p = TestPipeline.create();
+    PCollection<Integer> input = createInts(p, Arrays.asList(3));
+
+    // Success for a total of 1000 outputs.
+    input
+        .apply(ParDo.of(new DoFn<Integer, String>() {
+            @Override
+            public void processElement(ProcessContext c) {
+              TupleTag<String> specialSideTag = new TupleTag<String>(){};
+              c.sideOutput(specialSideTag, "side");
+              c.sideOutput(specialSideTag, "side");
+              c.sideOutput(specialSideTag, "side");
+
+              for (int i = 0; i < 998; i++) {
+                c.sideOutput(new TupleTag<String>(){}, "side");
+              }
+            }}));
+    p.run();
+
+    // Failure for a total of 1001 outputs.
+    input
+        .apply(ParDo.of(new DoFn<Integer, String>() {
+            @Override
+            public void processElement(ProcessContext c) {
+              for (int i = 0; i < 1000; i++) {
+                c.sideOutput(new TupleTag<String>(){}, "side");
+              }
+            }}));
+    try {
+      p.run();
+      fail("should have failed");
+    } catch (RuntimeException exn) {
+      assertThat(exn.toString(),
+                 containsString("the number of side outputs has exceeded a limit"));
+    }
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testParDoWithSideInputs() {
+    Pipeline p = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList(3, -42, 666);
+
+    PCollection<Integer> input = createInts(p, inputs);
+
+    PCollectionView<Integer, ?> sideInput1 = TestUtils.createSingletonInt(p, 11);
+    PCollectionView<Integer, ?> sideInputUnread = TestUtils.createSingletonInt(p, -3333);
+    PCollectionView<Integer, ?> sideInput2 = TestUtils.createSingletonInt(p, 222);
+
+    PCollection<String> output =
+        input
+        .apply(ParDo
+               .withSideInputs(sideInput1, sideInputUnread, sideInput2)
+               .of(new TestDoFn(
+                   Arrays.asList(sideInput1, sideInput2),
+                   Arrays.<TupleTag<String>>asList())));
+
+    DataflowAssert.that(output)
+        .satisfies(TestDoFn.HasExpectedOutput
+                   .forInput(inputs)
+                   .andSideInputs(11, 222));
+
+    p.run();
+  }
+
+  @Test
+  public void testParDoReadingFromUnknownSideInput() {
+    Pipeline p = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList(3, -42, 666);
+
+    PCollection<Integer> input = createInts(p, inputs);
+
+    PCollectionView<Integer, ?> sideView = TestUtils.createSingletonInt(p, 3);
+
+    input
+        .apply(ParDo.of(new TestDoFn(
+            Arrays.<PCollectionView<Integer, ?>>asList(sideView),
+            Arrays.<TupleTag<String>>asList())));
+
+    try {
+      p.run();
+      fail("should have failed");
+    } catch (RuntimeException exn) {
+      assertThat(exn.toString(),
+                 containsString("calling sideInput() with unknown view"));
+    }
+  }
+
+  @Test
+  public void testParDoWithErrorInStartBatch() {
+    Pipeline p = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList(3, -42, 666);
+
+    PCollection<Integer> input = createInts(p, inputs);
+
+    input
+        .apply(ParDo.of(new TestStartBatchErrorDoFn()));
+
+    try {
+      p.run();
+      fail("should have failed");
+    } catch (RuntimeException exn) {
+      assertThat(exn.toString(), containsString("test error in initialize"));
+    }
+  }
+
+  @Test
+  public void testParDoWithErrorInProcessElement() {
+    Pipeline p = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList(3, -42, 666);
+
+    PCollection<Integer> input = createInts(p, inputs);
+
+    input
+        .apply(ParDo.of(new TestProcessElementErrorDoFn()));
+
+    try {
+      p.run();
+      fail("should have failed");
+    } catch (RuntimeException exn) {
+      assertThat(exn.toString(), containsString("test error in process"));
+    }
+  }
+
+  @Test
+  public void testParDoWithErrorInFinishBatch() {
+    Pipeline p = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList(3, -42, 666);
+
+    PCollection<Integer> input = createInts(p, inputs);
+
+    input
+        .apply(ParDo.of(new TestFinishBatchErrorDoFn()));
+
+    try {
+      p.run();
+      fail("should have failed");
+    } catch (RuntimeException exn) {
+      assertThat(exn.toString(), containsString("test error in finalize"));
+    }
+  }
+
+  @Test
+  public void testParDoWithUnexpectedKeyedState() {
+    Pipeline p = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList(3, -42, 666);
+
+    PCollection<Integer> input = createInts(p, inputs);
+
+    input
+        .apply(ParDo.of(new TestUnexpectedKeyedStateDoFn()));
+
+    try {
+      p.run();
+      fail("should have failed");
+    } catch (RuntimeException exn) {
+      assertThat(exn.toString(),
+                 containsString("Keyed state is only available"));
+    }
+  }
+
+  @Test
+  public void testParDoName() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Integer> input =
+        createInts(p, Arrays.asList(3, -42, 666))
+        .setName("MyInput");
+
+    {
+      PCollection<String> output1 =
+          input
+          .apply(ParDo.of(new TestDoFn()));
+      assertEquals("Test.out", output1.getName());
+    }
+
+    {
+      PCollection<String> output2 =
+          input
+          .apply(ParDo.named("MyParDo").of(new TestDoFn()));
+      assertEquals("MyParDo.out", output2.getName());
+    }
+
+    {
+      PCollection<String> output3 =
+          input
+          .apply(ParDo.of(new TestDoFn()).named("HerParDo"));
+      assertEquals("HerParDo.out", output3.getName());
+    }
+
+    {
+      PCollection<String> output4 =
+          input
+              .apply(ParDo.of(new TestDoFn()).named("TestDoFn"));
+      assertEquals("TestDoFn.out", output4.getName());
+    }
+
+    {
+      PCollection<String> output5 =
+          input
+              .apply(ParDo.of(new StrangelyNamedDoer()));
+      assertEquals("StrangelyNamedDoer.out",
+          output5.getName());
+    }
+  }
+
+  @Test
+  public void testParDoWithSideOutputsName() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Integer> input =
+        createInts(p, Arrays.asList(3, -42, 666))
+        .setName("MyInput");
+
+    TupleTag<String> mainTag = new TupleTag<String>("main"){};
+    TupleTag<String> sideTag1 = new TupleTag<String>("side1"){};
+    TupleTag<String> sideTag2 = new TupleTag<String>("side2"){};
+    TupleTag<String> sideTag3 = new TupleTag<String>("side3"){};
+    TupleTag<String> sideTagUnwritten = new TupleTag<String>("sideUnwritten"){};
+
+    PCollectionTuple outputs =
+        input
+        .apply(ParDo
+               .named("MyParDo")
+               .of(new TestDoFn(
+                   Arrays.<PCollectionView<Integer, ?>>asList(),
+                   Arrays.asList(sideTag1, sideTag2, sideTag3)))
+               .withOutputTags(
+                   mainTag,
+                   TupleTagList.of(sideTag3).and(sideTag1)
+                   .and(sideTagUnwritten).and(sideTag2)));
+
+    assertEquals("MyParDo.main", outputs.get(mainTag).getName());
+    assertEquals("MyParDo.side1", outputs.get(sideTag1).getName());
+    assertEquals("MyParDo.side2", outputs.get(sideTag2).getName());
+    assertEquals("MyParDo.side3", outputs.get(sideTag3).getName());
+    assertEquals("MyParDo.sideUnwritten",
+                 outputs.get(sideTagUnwritten).getName());
+  }
+
+  @Test
+  public void testParDoInCustomTransform() {
+    Pipeline p = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList(3, -42, 666);
+
+    PCollection<Integer> input = createInts(p, inputs);
+
+    PCollection<String> output =
+        input
+        .apply(new PTransform<PCollection<Integer>, PCollection<String>>() {
+            @Override
+            public PCollection<String> apply(PCollection<Integer> input) {
+              return input.apply(ParDo.of(new TestDoFn()));
+            }
+          });
+
+    // Test that Coder inference of the result works through
+    // user-defined PTransforms.
+    DataflowAssert.that(output)
+        .satisfies(TestDoFn.HasExpectedOutput.forInput(inputs));
+
+    p.run();
+  }
+
+  @Test
+  public void testMultiOutputChaining() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Integer> input = createInts(p, Arrays.asList(3, 4, 5, 6));
+
+    PCollectionTuple filters = input.apply(new MultiFilter());
+    PCollection<Integer> by2 = filters.get(MultiFilter.BY2);
+    PCollection<Integer> by3 = filters.get(MultiFilter.BY3);
+
+    // Apply additional filters to each operation.
+    PCollection<Integer> by2then3 = by2
+        .apply(ParDo.of(new MultiFilter.FilterFn(3)));
+    PCollection<Integer> by3then2 = by3
+        .apply(ParDo.of(new MultiFilter.FilterFn(2)));
+
+    DataflowAssert.that(by2then3).containsInAnyOrder(6);
+    DataflowAssert.that(by3then2).containsInAnyOrder(6);
+    p.run();
+  }
+
+  @Test
+  public void testJsonEscaping() {
+    // Declare an arbitrary function and make sure we can serialize it
+    DoFn doFn = new DoFn<Integer, Integer>() {
+      @Override
+      public void processElement(ProcessContext c) {
+        c.output(c.element() + 1);
+      }
+    };
+
+    byte[] serializedBytes = serializeToByteArray(doFn);
+    String serializedJson = byteArrayToJsonString(serializedBytes);
+    assertArrayEquals(
+        serializedBytes, jsonStringToByteArray(serializedJson));
+  }
+
+  private static class TestDummy { }
+
+  private static class TestDummyCoder extends AtomicCoder<TestDummy> {
+    private TestDummyCoder() { }
+    private static final TestDummyCoder INSTANCE = new TestDummyCoder();
+
+    @JsonCreator
+    public static TestDummyCoder of() {
+      return INSTANCE;
+    }
+
+    public static List<Object> getInstanceComponents(TestDummy exampleValue) {
+      return Collections.emptyList();
+    }
+
+    @Override
+    public void encode(TestDummy value, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+    }
+
+    @Override
+    public TestDummy decode(InputStream inStream, Context context)
+        throws CoderException, IOException {
+      return new TestDummy();
+    }
+
+    @Override
+    public boolean isDeterministic() { return true; }
+
+    @Override
+    public boolean isRegisterByteSizeObserverCheap(TestDummy value, Context context) {
+      return true;
+    }
+
+    @Override
+    public void registerByteSizeObserver(
+        TestDummy value, ElementByteSizeObserver observer, Context context)
+        throws Exception {
+      observer.update(0L);
+    }
+  }
+
+  private static class SideOutputDummyFn extends DoFn<Integer, Integer> {
+    private TupleTag<TestDummy> sideTag;
+    public SideOutputDummyFn(TupleTag<TestDummy> sideTag) {
+      this.sideTag = sideTag;
+    }
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(1);
+      c.sideOutput(sideTag, new TestDummy());
+     }
+  }
+
+  private static class MainOutputDummyFn extends DoFn<Integer, TestDummy> {
+    private TupleTag<Integer> sideTag;
+    public MainOutputDummyFn(TupleTag<Integer> sideTag) {
+      this.sideTag = sideTag;
+    }
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(new TestDummy());
+      c.sideOutput(sideTag, 1);
+     }
+  }
+
+  @Test
+  public void testSideOutputUnknownCoder() {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> input = pipeline
+        .apply(Create.of(Arrays.asList(1, 2, 3)));
+
+    // Expect a fail, but it should be a NoCoderException
+    final TupleTag<Integer> mainTag = new TupleTag<Integer>();
+    final TupleTag<TestDummy> sideTag = new TupleTag<TestDummy>();
+    input.apply(ParDo.of(new SideOutputDummyFn(sideTag))
+        .withOutputTags(mainTag, TupleTagList.of(sideTag)));
+
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("unable to infer a default Coder");
+    pipeline.run();
+  }
+
+  @Test
+  public void testSideOutputUnregisteredExplicitCoder() {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> input = pipeline
+        .apply(Create.of(Arrays.asList(1, 2, 3)));
+
+    final TupleTag<Integer> mainTag = new TupleTag<Integer>();
+    final TupleTag<TestDummy> sideTag = new TupleTag<TestDummy>();
+    PCollectionTuple outputTuple = input.apply(ParDo.of(new SideOutputDummyFn(sideTag))
+        .withOutputTags(mainTag, TupleTagList.of(sideTag)));
+
+    outputTuple.get(sideTag)
+        .setCoder(new TestDummyCoder());
+
+    pipeline.run();
+  }
+
+  @Test
+  public void testMainOutputUnregisteredExplicitCoder() {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> input = pipeline
+        .apply(Create.of(Arrays.asList(1, 2, 3)));
+
+    final TupleTag<TestDummy> mainTag = new TupleTag<TestDummy>();
+    final TupleTag<Integer> sideTag = new TupleTag<Integer>() {};
+    PCollectionTuple outputTuple = input.apply(ParDo.of(new MainOutputDummyFn(sideTag))
+        .withOutputTags(mainTag, TupleTagList.of(sideTag)));
+
+    outputTuple.get(mainTag)
+        .setCoder(new TestDummyCoder());
+
+    pipeline.run();
+  }
+
+  @Test
+  public void testParDoOutputWithTimestamp() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Integer> input =
+        createInts(p, Arrays.asList(3, 42, 6)).setOrdered(true);
+
+    PCollection<String> output =
+        input
+        .apply(ParDo.of(new TestOutputTimestampDoFn()))
+        .apply(ParDo.of(new TestShiftTimestampDoFn(Duration.ZERO, Duration.ZERO)))
+        .apply(ParDo.of(new TestFormatTimestampDoFn()));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+                   "processing: 3, timestamp: 3",
+                   "processing: 42, timestamp: 42",
+                   "processing: 6, timestamp: 6");
+
+    p.run();
+  }
+
+  @Test
+  public void testParDoShiftTimestamp() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Integer> input =
+        createInts(p, Arrays.asList(3, 42, 6)).setOrdered(true);
+
+    PCollection<String> output =
+        input
+        .apply(ParDo.of(new TestOutputTimestampDoFn()))
+        .apply(ParDo.of(new TestShiftTimestampDoFn(Duration.millis(1000),
+                                                   Duration.millis(-1000))))
+        .apply(ParDo.of(new TestFormatTimestampDoFn()));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+                   "processing: 3, timestamp: -997",
+                   "processing: 42, timestamp: -958",
+                   "processing: 6, timestamp: -994");
+
+    p.run();
+  }
+
+  @Test
+  public void testParDoShiftTimestampInvalid() {
+    Pipeline p = TestPipeline.create();
+
+    createInts(p, Arrays.asList(3, 42, 6)).setOrdered(true)
+        .apply(ParDo.of(new TestOutputTimestampDoFn()))
+        .apply(ParDo.of(new TestShiftTimestampDoFn(Duration.millis(1000),
+                                                   Duration.millis(-1001))))
+        .apply(ParDo.of(new TestFormatTimestampDoFn()));
+
+    try {
+      p.run();
+      fail("should have failed");
+    } catch (RuntimeException exn) {
+      // expected
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
new file mode 100644
index 0000000000000..0d19f082ee07e
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static com.google.cloud.dataflow.sdk.TestUtils.createInts;
+import static com.google.cloud.dataflow.sdk.transforms.Partition.PartitionFn;
+import static org.hamcrest.Matchers.containsString;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests for Partition
+ */
+@RunWith(JUnit4.class)
+public class PartitionTest implements Serializable {
+  static class ModFn implements PartitionFn<Integer> {
+    public int partitionFor(Integer elem, int numPartitions) {
+      return elem % numPartitions;
+    }
+  }
+
+  static class IdentityFn implements PartitionFn<Integer> {
+    public int partitionFor(Integer elem, int numPartitions) {
+      return elem;
+    }
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testEvenOddPartition() {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<Integer> input =
+        createInts(p, Arrays.asList(591, 11789, 1257, 24578, 24799, 307));
+
+    PCollectionList<Integer> outputs = input.apply(Partition.of(2, new ModFn()));
+    assertTrue(outputs.size() == 2);
+    DataflowAssert.that(outputs.get(0)).containsInAnyOrder(24578);
+    DataflowAssert.that(outputs.get(1)).containsInAnyOrder(591, 11789, 1257,
+        24799, 307);
+    p.run();
+  }
+
+  @Test
+  public void testModPartition() {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<Integer> input =
+        createInts(p, Arrays.asList(1, 2, 4, 5));
+
+    PCollectionList<Integer> outputs = input.apply(Partition.of(3, new ModFn()));
+    assertTrue(outputs.size() == 3);
+    DataflowAssert.that(outputs.get(0)).containsInAnyOrder();
+    DataflowAssert.that(outputs.get(1)).containsInAnyOrder(1, 4);
+    DataflowAssert.that(outputs.get(2)).containsInAnyOrder(2, 5);
+    p.run();
+  }
+
+  @Test
+  public void testOutOfBoundsPartitions() {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<Integer> input = createInts(p, Arrays.asList(-1));
+
+    PCollectionList<Integer> outputs =
+        input.apply(Partition.of(5, new IdentityFn()));
+
+    try {
+      p.run();
+    } catch (RuntimeException e) {
+      assertThat(e.toString(), containsString(
+          "Partition function returned out of bounds index: -1 not in [0..5)"));
+    }
+  }
+
+  @Test
+  public void testZeroNumPartitions() {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<Integer> input = createInts(p, Arrays.asList(591));
+
+    try {
+      PCollectionList<Integer> outputs =
+          input.apply(Partition.of(0, new IdentityFn()));
+      fail("should have failed");
+    } catch (IllegalArgumentException exn) {
+      assertThat(exn.toString(), containsString("numPartitions must be > 0"));
+    }
+  }
+
+  @Test
+  public void testDroppedPartition() {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<Integer> input = createInts(p,
+        Arrays.asList(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12));
+
+    // Compute the set of integers either 1 or 2 mod 3, the hard way.
+    PCollectionList<Integer> outputs =
+        input.apply(Partition.of(3, new ModFn()));
+
+    List<PCollection<Integer>> outputsList = new ArrayList<>(outputs.getAll());
+    outputsList.remove(0);
+    outputs = PCollectionList.of(outputsList);
+    assertTrue(outputs.size() == 2);
+
+    PCollection<Integer> output = outputs.apply(Flatten.<Integer>create());
+    DataflowAssert.that(output).containsInAnyOrder(2, 4, 5, 7, 8, 10, 11);
+    p.run();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java
new file mode 100644
index 0000000000000..d6de05af6d3f4
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java
@@ -0,0 +1,225 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.hamcrest.Matchers.both;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
+import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.lessThan;
+
+import com.google.cloud.dataflow.sdk.TestUtils;
+import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * Tests for RateLimiter.
+ */
+@RunWith(JUnit4.class)
+public class RateLimitingTest {
+
+  /**
+   * Pass-thru function.
+   */
+  private static class IdentityFn<T> extends DoFn<T, T> {
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(c.element());
+    }
+  }
+
+  /**
+   * Introduces a delay in processing, then passes thru elements.
+   */
+  private static class DelayFn<T> extends DoFn<T, T> {
+    public static final long DELAY_MS = 250;
+
+    @Override
+    public void processElement(ProcessContext c) {
+      try {
+        Thread.sleep(DELAY_MS);
+      } catch (InterruptedException e) {
+        e.printStackTrace();
+        throw new RuntimeException("Interrupted");
+      }
+      c.output(c.element());
+    }
+  }
+
+  /**
+   * Throws an exception after some number of calls.
+   */
+  private static class ExceptionThrowingFn<T> extends DoFn<T, T> {
+    private final AtomicInteger numSuccesses;
+    private final AtomicInteger numProcessed = new AtomicInteger();
+    private final AtomicInteger numFailures = new AtomicInteger();
+
+    private ExceptionThrowingFn(int numSuccesses) {
+      this.numSuccesses = new AtomicInteger(numSuccesses);
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      numProcessed.incrementAndGet();
+      if (numSuccesses.decrementAndGet() > 0) {
+        c.output(c.element());
+        return;
+      }
+
+      numFailures.incrementAndGet();
+      throw new RuntimeException("Expected failure");
+    }
+  }
+
+  /**
+   * Measures concurrency of the processElement method.
+   *
+   * <p> Note: this only works when
+   * {@link DirectPipelineRunner#testSerializability} is disabled, otherwise
+   * the counters are not available after the run.
+   */
+  private static class ConcurrencyMeasuringFn<T> extends DoFn<T, T> {
+    private int concurrentElements = 0;
+    private int maxConcurrency = 0;
+
+    @Override
+    public void processElement(ProcessContext c) {
+      synchronized (this) {
+        concurrentElements++;
+        if (concurrentElements > maxConcurrency) {
+          maxConcurrency = concurrentElements;
+        }
+      }
+
+      c.output(c.element());
+
+      synchronized (this) {
+        concurrentElements--;
+      }
+    }
+  }
+
+  @Test
+  public void testRateLimitingMax() {
+    int n = 10;
+    double rate = 10.0;
+    long duration = runWithRate(n, rate, new IdentityFn<Integer>());
+
+    long perElementPause = (long) (1000L / rate);
+    long minDuration = (n - 1) * perElementPause;
+    Assert.assertThat(duration, greaterThan(minDuration));
+  }
+
+  @Test(timeout = 5000L)
+  public void testExceptionHandling() {
+    ExceptionThrowingFn<Integer> fn = new ExceptionThrowingFn<>(10);
+    try {
+      runWithRate(100, 0.0, fn);
+      Assert.fail("Expected exception to propagate");
+    } catch (RuntimeException e) {
+      Assert.assertThat(e.getMessage(), containsString("Expected failure"));
+    }
+
+    // Should have processed 10 elements, but stopped before processing all
+    // of them.
+    Assert.assertThat(fn.numProcessed.get(),
+        is(both(greaterThanOrEqualTo(10))
+            .and(lessThan(100))));
+
+    // The first failure should prevent the scheduling of any more elements.
+    Assert.assertThat(fn.numFailures.get(),
+        is(both(greaterThanOrEqualTo(1))
+            .and(lessThan(RateLimiting.DEFAULT_MAX_PARALLELISM))));
+  }
+
+  /**
+   * Test exception handling on the last element to be processed.
+   */
+  @Test(timeout = 5000L)
+  public void testExceptionHandling2() {
+    ExceptionThrowingFn<Integer> fn = new ExceptionThrowingFn<>(10);
+    try {
+      runWithRate(10, 0.0, fn);
+      Assert.fail("Expected exception to propagate");
+    } catch (RuntimeException e) {
+      Assert.assertThat(e.getMessage(), containsString("Expected failure"));
+    }
+
+    // Should have processed 10 elements, but stopped before processing all
+    // of them.
+    Assert.assertEquals(10, fn.numProcessed.get());
+    Assert.assertEquals(1, fn.numFailures.get());
+  }
+
+  /**
+   * Provides more elements than can be scheduled at once, testing that the
+   * backlog limit is applied.
+   */
+  @Test
+  public void testBacklogLimiter() {
+    long duration = runWithRate(2 * RateLimiting.DEFAULT_MAX_PARALLELISM,
+        -1.0 /* unlimited */, new DelayFn<Integer>());
+
+    // Should take > 2x the delay interval, since no more than half the elements
+    // can be scheduled at once.
+    Assert.assertThat(duration,
+        greaterThan(2 * DelayFn.DELAY_MS));
+  }
+
+  private long runWithRate(int numElements, double rateLimit,
+      DoFn<Integer, Integer> doFn) {
+    DirectPipeline p = DirectPipeline.createForTest();
+    // Run with serializability testing disabled so that our tests can inspect
+    // the DoFns after the test.
+    p.getRunner().withSerializabilityTesting(false);
+
+    ArrayList<Integer> data = new ArrayList<>(numElements);
+    for (int i = 0; i < numElements; ++i) {
+      data.add(i);
+    }
+
+    PCollection<Integer> input = TestUtils.createInts(p, data);
+
+    ConcurrencyMeasuringFn<Integer> downstream = new ConcurrencyMeasuringFn<>();
+
+    PCollection<Integer> output = input
+        .apply(RateLimiting.perWorker(doFn)
+            .withRateLimit(rateLimit))
+        .apply(ParDo
+            .of(downstream));
+
+    long startTime = System.currentTimeMillis();
+
+    DirectPipelineRunner.EvaluationResults results = p.run();
+
+    // Downstream methods should not see parallel threads.
+    Assert.assertEquals(1, downstream.maxConcurrency);
+
+    long endTime = System.currentTimeMillis();
+    return endTime - startTime;
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java
new file mode 100644
index 0000000000000..a44fa2d39103c
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests for RemovedDuplicates.
+ */
+@RunWith(JUnit4.class)
+public class RemoveDuplicatesTest {
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testRemoveDuplicates() {
+    List<String> strings = Arrays.asList(
+        "k1",
+        "k5",
+        "k5",
+        "k2",
+        "k1",
+        "k2",
+        "k3");
+
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input =
+        p.apply(Create.of(strings))
+        .setCoder(StringUtf8Coder.of());
+
+    PCollection<String> output =
+        input.apply(RemoveDuplicates.<String>create());
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder("k1", "k5", "k2", "k3");
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testRemoveDuplicatesEmpty() {
+    List<String> strings = Arrays.asList();
+
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input =
+        p.apply(Create.of(strings))
+        .setCoder(StringUtf8Coder.of());
+
+    PCollection<String> output =
+        input.apply(RemoveDuplicates.<String>create());
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder();
+    p.run();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
new file mode 100644
index 0000000000000..7c51d096fe4e1
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import com.google.api.client.util.Joiner;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Tests for Sample transform.
+ */
+@RunWith(JUnit4.class)
+public class SampleTest {
+  static final Integer[] EMPTY = new Integer[] { };
+  static final Integer[] DATA = new Integer[] {1, 2, 3, 4, 5};
+  static final Integer[] REPEATED_DATA = new Integer[] {1, 1, 2, 2, 3, 3, 4, 4, 5, 5};
+
+  /**
+   * Verifies that the result of a Sample operation contains the expected number of elements,
+   * and that those elements are a subset of the items in expected.
+   */
+  public static class VerifyCorrectSample<T extends Comparable>
+      implements SerializableFunction<Iterable<T>, Void> {
+    private T[] expectedValues;
+    private int expectedSize;
+
+    /**
+     * expectedSize is the number of elements that the Sample should contain. expected is the set
+     * of elements that the sample may contain.
+     */
+    VerifyCorrectSample(int expectedSize, T... expected) {
+      this.expectedValues = expected;
+      this.expectedSize = expectedSize;
+    }
+
+    @Override
+    public Void apply(Iterable<T> in) {
+      List<T> actual = new ArrayList<>();
+      for (T elem : in) {
+        actual.add(elem);
+      }
+
+      assertEquals(expectedSize, actual.size());
+
+      Collections.sort(actual);  // We assume that @expected is already sorted.
+      int i = 0;  // Index into @expected
+      for (T s : actual) {
+        boolean matchFound = false;
+        for (; i < expectedValues.length; i++) {
+          if (s.equals(expectedValues[i])) {
+            matchFound = true;
+            break;
+          }
+        }
+        assertTrue("Invalid sample: " +  Joiner.on(',').join(actual), matchFound);
+        i++;  // Don't match the same element again.
+      }
+      return null;
+    }
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testSample() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Integer> input = p.apply(Create.of(DATA))
+        .setCoder(BigEndianIntegerCoder.of());
+    PCollection<Iterable<Integer>> output = input.apply(
+        Sample.<Integer>fixedSizeGlobally(3));
+
+    DataflowAssert.thatSingletonIterable(output)
+        .satisfies(new VerifyCorrectSample<>(3, DATA));
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testSampleEmpty() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Integer> input = p.apply(Create.of(EMPTY))
+        .setCoder(BigEndianIntegerCoder.of());
+    PCollection<Iterable<Integer>> output = input.apply(
+        Sample.<Integer>fixedSizeGlobally(3));
+
+    DataflowAssert.thatSingletonIterable(output)
+        .satisfies(new VerifyCorrectSample<>(0, EMPTY));
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testSampleZero() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Integer> input = p.apply(Create.of(DATA))
+        .setCoder(BigEndianIntegerCoder.of());
+    PCollection<Iterable<Integer>> output = input.apply(
+        Sample.<Integer>fixedSizeGlobally(0));
+
+    DataflowAssert.thatSingletonIterable(output)
+        .satisfies(new VerifyCorrectSample<>(0, DATA));
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testSampleInsufficientElements() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Integer> input = p.apply(Create.of(DATA))
+        .setCoder(BigEndianIntegerCoder.of());
+    PCollection<Iterable<Integer>> output = input.apply(
+        Sample.<Integer>fixedSizeGlobally(10));
+
+    DataflowAssert.thatSingletonIterable(output)
+        .satisfies(new VerifyCorrectSample<>(5, DATA));
+    p.run();
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testSampleNegative() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Integer> input = p.apply(Create.of(DATA))
+        .setCoder(BigEndianIntegerCoder.of());
+    input.apply(Sample.<Integer>fixedSizeGlobally(-1));
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testSampleMultiplicity() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Integer> input = p.apply(Create.of(REPEATED_DATA))
+        .setCoder(BigEndianIntegerCoder.of());
+    // At least one value must be selected with multiplicity.
+    PCollection<Iterable<Integer>> output = input.apply(
+        Sample.<Integer>fixedSizeGlobally(6));
+
+    DataflowAssert.thatSingletonIterable(output)
+        .satisfies(new VerifyCorrectSample<>(6, REPEATED_DATA));
+    p.run();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
new file mode 100644
index 0000000000000..909dcba9981fd
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.DoubleCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests of Min, Max, Mean, and Sum.
+ */
+@RunWith(JUnit4.class)
+public class SimpleStatsFnsTest {
+  static final double DOUBLE_COMPARISON_ACCURACY = 1e-7;
+
+  private static class TestCase<N extends Number & Comparable<N>> {
+    final List<N> data;
+    final N min;
+    final N max;
+    final N sum;
+    final Double mean;
+
+    public TestCase(N min, N max, N sum, N... values) {
+      this.data = Arrays.asList(values);
+      this.min = min;
+      this.max = max;
+      this.sum = sum;
+      this.mean =
+          values.length == 0 ? 0.0 : sum.doubleValue() / values.length;
+    }
+  }
+
+  static final List<TestCase<Double>> DOUBLE_CASES = Arrays.asList(
+      new TestCase<>(-312.31, 6312.31, 11629.13,
+          -312.31, 29.13, 112.158, 6312.31, -312.158, -312.158, 112.158,
+          -312.31, 6312.31, 0.0),
+      new TestCase<>(3.14, 3.14, 3.14, 3.14),
+      new TestCase<>(Double.MAX_VALUE, Double.MIN_NORMAL, 0.0));
+
+  static final List<TestCase<Long>> LONG_CASES = Arrays.asList(
+      new TestCase<>(-50000000000000000L,
+          70000000000000000L,
+          60000033123213121L,
+          0L, 1L, 10000000000000000L, -50000000000000000L,
+          70000000000000000L, 0L, 10000000000000000L, -1L,
+          -50000000000000000L, 70000000000000000L, 33123213121L),
+      new TestCase<>(3L, 3L, 3L, 3L),
+      new TestCase<>(Long.MAX_VALUE, Long.MIN_VALUE, 0L));
+
+  static final List<TestCase<Integer>> INTEGER_CASES = Arrays.asList(
+      new TestCase<>(-3, 6, 22,
+          1, -3, 2, 6, 3, 4, -3, 5, 6, 1),
+      new TestCase<>(3, 3, 3, 3),
+      new TestCase<>(Integer.MAX_VALUE, Integer.MIN_VALUE, 0));
+
+  @Test
+  public void testDoubleStats() {
+    for (TestCase<Double> t : DOUBLE_CASES) {
+      assertEquals(t.sum, new Sum.SumDoubleFn().apply(t.data),
+          DOUBLE_COMPARISON_ACCURACY);
+      assertEquals(t.min, new Min.MinDoubleFn().apply(t.data),
+          DOUBLE_COMPARISON_ACCURACY);
+      assertEquals(t.max, new Max.MaxDoubleFn().apply(t.data),
+          DOUBLE_COMPARISON_ACCURACY);
+      assertEquals(t.mean, new Mean.MeanFn<Double>().apply(t.data),
+          DOUBLE_COMPARISON_ACCURACY);
+    }
+  }
+
+  @Test
+  public void testIntegerStats() {
+    for (TestCase<Integer> t : INTEGER_CASES) {
+      assertEquals(t.sum, new Sum.SumIntegerFn().apply(t.data));
+      assertEquals(t.min, new Min.MinIntegerFn().apply(t.data));
+      assertEquals(t.max, new Max.MaxIntegerFn().apply(t.data));
+      assertEquals(t.mean, new Mean.MeanFn<Integer>().apply(t.data));
+    }
+  }
+
+  @Test
+  public void testLongStats() {
+    for (TestCase<Long> t : LONG_CASES) {
+      assertEquals(t.sum, new Sum.SumLongFn().apply(t.data));
+      assertEquals(t.min, new Min.MinLongFn().apply(t.data));
+      assertEquals(t.max, new Max.MaxLongFn().apply(t.data));
+      assertEquals(t.mean, new Mean.MeanFn<Long>().apply(t.data));
+    }
+  }
+
+  @Test
+  public void testMeanCountSumSerializable() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<Long, Double>> input = p
+        .apply(Create.of(KV.of(1L, 1.5), KV.of(2L, 7.3)))
+        .setCoder(KvCoder.of(VarLongCoder.of(), DoubleCoder.of()));
+
+    PCollection<KV<Long, Double>> meanPerKey =
+        input.apply(Mean.<Long, Double>perKey());
+
+    p.run();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
new file mode 100644
index 0000000000000..63625a7f5f2b1
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
@@ -0,0 +1,244 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
+import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.EvaluationResults;
+import com.google.cloud.dataflow.sdk.runners.RecordingPipelineVisitor;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.hamcrest.Matchers;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+
+/** Tests for Top */
+@RunWith(JUnit4.class)
+public class TopTest {
+
+  @Rule
+  public ExpectedException expectedEx = ExpectedException.none();
+
+  @SuppressWarnings("unchecked")
+  static final String[] COLLECTION = new String[] {
+    "a", "bb", "c", "c", "z"
+  };
+
+  @SuppressWarnings("unchecked")
+  static final String[] EMPTY_COLLECTION = new String[] {
+  };
+
+  @SuppressWarnings("unchecked")
+  static final KV<String, Integer>[] TABLE = new KV[] {
+    KV.of("a", 1),
+    KV.of("a", 2),
+    KV.of("a", 3),
+    KV.of("b", 1),
+    KV.of("b", 10),
+    KV.of("b", 10),
+    KV.of("b", 100),
+  };
+
+  @SuppressWarnings("unchecked")
+  static final KV<String, Integer>[] EMPTY_TABLE = new KV[] {
+  };
+
+  public PCollection<KV<String, Integer>> createInputTable(Pipeline p) {
+    return p.apply(Create.of(Arrays.asList(TABLE))).setCoder(
+        KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+  }
+
+  public PCollection<KV<String, Integer>> createEmptyInputTable(Pipeline p) {
+    return p.apply(Create.of(Arrays.asList(EMPTY_TABLE))).setCoder(
+        KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testTop() {
+    DirectPipeline p = DirectPipeline.createForTest();
+    PCollection<String> input =
+        p.apply(Create.of(Arrays.asList(COLLECTION)))
+                 .setCoder(StringUtf8Coder.of());
+
+    PCollection<List<String>> top1 = input.apply(Top.of(1, new OrderByLength()));
+    PCollection<List<String>> top2 = input.apply(Top.<String>largest(2));
+    PCollection<List<String>> top3 = input.apply(Top.<String>smallest(3));
+
+    PCollection<KV<String, List<Integer>>> largestPerKey = createInputTable(p)
+        .apply(Top.<String, Integer>largestPerKey(2));
+    PCollection<KV<String, List<Integer>>> smallestPerKey = createInputTable(p)
+        .apply(Top.<String, Integer>smallestPerKey(2));
+
+    EvaluationResults results = p.run();
+
+    assertThat(results.getPCollection(top1).get(0), contains("bb"));
+    assertThat(results.getPCollection(top2).get(0), contains("z", "c"));
+    assertThat(results.getPCollection(top3).get(0), contains("a", "bb", "c"));
+    assertThat(results.getPCollection(largestPerKey), containsInAnyOrder(
+        KV.of("a", Arrays.asList(3, 2)),
+        KV.of("b", Arrays.asList(100, 10))));
+    assertThat(results.getPCollection(smallestPerKey), containsInAnyOrder(
+        KV.of("a", Arrays.asList(1, 2)),
+        KV.of("b", Arrays.asList(1, 10))));
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testTopEmpty() {
+    DirectPipeline p = DirectPipeline.createForTest();
+    PCollection<String> input =
+        p.apply(Create.of(Arrays.asList(EMPTY_COLLECTION)))
+                 .setCoder(StringUtf8Coder.of());
+
+    PCollection<List<String>> top1 = input.apply(Top.of(1, new OrderByLength()));
+    PCollection<List<String>> top2 = input.apply(Top.<String>largest(2));
+    PCollection<List<String>> top3 = input.apply(Top.<String>smallest(3));
+
+    PCollection<KV<String, List<Integer>>> largestPerKey = createEmptyInputTable(p)
+        .apply(Top.<String, Integer>largestPerKey(2));
+    PCollection<KV<String, List<Integer>>> smallestPerKey = createEmptyInputTable(p)
+        .apply(Top.<String, Integer>smallestPerKey(2));
+
+    EvaluationResults results = p.run();
+
+    assertThat(results.getPCollection(top1).get(0), containsInAnyOrder());
+    assertThat(results.getPCollection(top2).get(0), containsInAnyOrder());
+    assertThat(results.getPCollection(top3).get(0), containsInAnyOrder());
+    assertThat(results.getPCollection(largestPerKey), containsInAnyOrder());
+    assertThat(results.getPCollection(smallestPerKey), containsInAnyOrder());
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testTopZero() {
+    DirectPipeline p = DirectPipeline.createForTest();
+    PCollection<String> input =
+        p.apply(Create.of(Arrays.asList(COLLECTION)))
+                 .setCoder(StringUtf8Coder.of());
+
+    PCollection<List<String>> top1 = input.apply(Top.of(0, new OrderByLength()));
+    PCollection<List<String>> top2 = input.apply(Top.<String>largest(0));
+    PCollection<List<String>> top3 = input.apply(Top.<String>smallest(0));
+
+    PCollection<KV<String, List<Integer>>> largestPerKey = createInputTable(p)
+        .apply(Top.<String, Integer>largestPerKey(0));
+
+    PCollection<KV<String, List<Integer>>> smallestPerKey = createInputTable(p)
+        .apply(Top.<String, Integer>smallestPerKey(0));
+
+    EvaluationResults results = p.run();
+
+    assertThat(results.getPCollection(top1).get(0), containsInAnyOrder());
+    assertThat(results.getPCollection(top2).get(0), containsInAnyOrder());
+    assertThat(results.getPCollection(top3).get(0), containsInAnyOrder());
+    assertThat(results.getPCollection(largestPerKey), containsInAnyOrder(
+        KV.of("a", Arrays.<Integer>asList()),
+        KV.of("b", Arrays.<Integer>asList())));
+    assertThat(results.getPCollection(smallestPerKey), containsInAnyOrder(
+        KV.of("a", Arrays.<Integer>asList()),
+        KV.of("b", Arrays.<Integer>asList())));
+  }
+
+  // This is a purely compile-time test.  If the code compiles, then it worked.
+  @Test
+  public void testPerKeySerializabilityRequirement() {
+    DirectPipeline p = DirectPipeline.createForTest();
+    PCollection<String> input =
+        p.apply(Create.of(Arrays.asList(COLLECTION)))
+            .setCoder(StringUtf8Coder.of());
+
+    PCollection<KV<String, List<Integer>>> top1 = createInputTable(p)
+        .apply(Top.<String, Integer, IntegerComparator>perKey(1,
+            new IntegerComparator()));
+
+    PCollection<KV<String, List<Integer>>> top2 = createInputTable(p)
+        .apply(Top.<String, Integer, IntegerComparator2>perKey(1,
+            new IntegerComparator2()));
+  }
+
+  @Test
+  public void testCountConstraint() {
+    DirectPipeline p = DirectPipeline.createForTest();
+    PCollection<String> input =
+        p.apply(Create.of(Arrays.asList(COLLECTION)))
+            .setCoder(StringUtf8Coder.of());
+
+    expectedEx.expect(IllegalArgumentException.class);
+    expectedEx.expectMessage(Matchers.containsString(">= 0"));
+
+    input.apply(Top.of(-1, new OrderByLength()));
+  }
+
+  @Test
+  public void testTransformName() {
+    DirectPipeline p = DirectPipeline.createForTest();
+    PCollection<String> input =
+        p.apply(Create.of(Arrays.asList(COLLECTION)))
+            .setCoder(StringUtf8Coder.of());
+
+    PTransform<PCollection<String>, PCollection<List<String>>> top = Top
+        .of(10, new OrderByLength());
+    input.apply(top);
+
+    p.traverseTopologically(new RecordingPipelineVisitor());
+    // Check that the transform is named "Top" rather than "Combine".
+    assertThat(p.getFullName(top), Matchers.startsWith("Top"));
+  }
+
+  static class OrderByLength implements Comparator<String>, Serializable {
+    @Override
+    public int compare(String a, String b) {
+      if (a.length() != b.length()) {
+        return a.length() - b.length();
+      } else {
+        return a.compareTo(b);
+      }
+    }
+  }
+
+  static class IntegerComparator implements Comparator<Integer>, Serializable {
+    @Override
+    public int compare(Integer o1, Integer o2) {
+      return o1.compareTo(o2);
+    }
+  }
+
+  static class IntegerComparator2 implements SerializableComparator<Integer> {
+    @Override
+    public int compare(Integer o1, Integer o2) {
+      return o1.compareTo(o2);
+    }
+  }
+
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
new file mode 100644
index 0000000000000..497d8fc8406e0
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+
+/**
+ * Tests for Values transform.
+ */
+@RunWith(JUnit4.class)
+public class ValuesTest {
+  static final KV<String, Integer>[] TABLE = new KV[] {
+    KV.of("one", 1),
+    KV.of("two", 2),
+    KV.of("three", 3),
+    KV.of("four", 4),
+    KV.of("dup", 4)
+  };
+
+  static final KV<String, Integer>[] EMPTY_TABLE = new KV[] {
+  };
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testValues() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> input =
+        p.apply(Create.of(Arrays.asList(TABLE))).setCoder(
+            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+
+    PCollection<Integer> output = input.apply(Values.<Integer>create());
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder(1, 2, 3, 4, 4);
+
+    p.run();
+  }
+
+  // TODO: setOrdered(true) isn't supported yet by the Dataflow service.
+  @Test
+  public void testValuesOrdered() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> input =
+        p.apply(Create.of(Arrays.asList(TABLE))).setCoder(
+            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+
+    input.setOrdered(true);
+    PCollection<Integer> output =
+        input.apply(Values.<Integer>create()).setOrdered(true);
+
+    DataflowAssert.that(output)
+        .containsInOrder(1, 2, 3, 4, 4);
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testValuesEmpty() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> input =
+        p.apply(Create.of(Arrays.asList(EMPTY_TABLE))).setCoder(
+            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+
+    PCollection<Integer> output = input.apply(Values.<Integer>create());
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder();
+
+    p.run();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
new file mode 100644
index 0000000000000..3a7c8187d9237
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.hamcrest.CoreMatchers.isA;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+import java.util.NoSuchElementException;
+
+/**
+ * Tests for {@link View}. See also {@link ParDoTest} which
+ * provides additional coverage since views can only be
+ * observed via {@link ParDo}.
+ */
+@RunWith(JUnit4.class)
+public class ViewTest implements Serializable {
+  // This test is Serializable, just so that it's easy to have
+  // anonymous inner classes inside the non-static test methods.
+
+  @Rule
+  public transient ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testSingletonSideInput() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Integer, ?> view = pipeline
+        .apply(Create.of(47))
+        .apply(View.<Integer>asSingleton());
+
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3))
+        .apply(ParDo.withSideInputs(view).of(
+            new DoFn<Integer, Integer>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                c.output(c.sideInput(view));
+              }
+            }));
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder(47, 47, 47);
+
+    pipeline.run();
+  }
+
+  @Test
+  public void testEmptySingletonSideInput() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Integer, ?> view = pipeline
+        .apply(Create.<Integer>of())
+        .setCoder(VarIntCoder.of())
+        .apply(View.<Integer>asSingleton());
+
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3))
+        .apply(ParDo.withSideInputs(view).of(
+            new DoFn<Integer, Integer>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                c.output(c.sideInput(view));
+              }
+            }));
+
+    thrown.expect(RuntimeException.class);
+    thrown.expectCause(isA(NoSuchElementException.class));
+    thrown.expectMessage("Empty");
+    thrown.expectMessage("PCollection");
+    thrown.expectMessage("singleton");
+
+    pipeline.run();
+  }
+
+  @Test
+  public void testNonSingletonSideInput() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Integer, ?> view = pipeline
+        .apply(Create.<Integer>of(1, 2, 3))
+        .apply(View.<Integer>asSingleton());
+
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3))
+        .apply(ParDo.withSideInputs(view).of(
+            new DoFn<Integer, Integer>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                c.output(c.sideInput(view));
+              }
+            }));
+
+    thrown.expect(RuntimeException.class);
+    thrown.expectCause(isA(IllegalArgumentException.class));
+    thrown.expectMessage("PCollection");
+    thrown.expectMessage("more than one");
+    thrown.expectMessage("singleton");
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testIterableSideInput() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Iterable<Integer>, ?> view = pipeline
+        .apply(Create.of(11, 13, 17, 23))
+        .apply(View.<Integer>asIterable());
+
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(29, 31))
+        .apply(ParDo.withSideInputs(view).of(
+            new DoFn<Integer, Integer>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                for (Integer i : c.sideInput(view)) {
+                  c.output(i);
+                }
+              }
+            }));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        11, 13, 17, 23,
+        11, 13, 17, 23);
+
+    pipeline.run();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
new file mode 100644
index 0000000000000..3e4e359022c96
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests for ExtractKeys transform.
+ */
+@RunWith(JUnit4.class)
+public class WithKeysTest {
+  static final String[] COLLECTION = new String[] {
+    "a",
+    "aa",
+    "b",
+    "bb",
+    "bbb"
+  };
+
+  static final List<KV<Integer, String>> WITH_KEYS = Arrays.asList(
+    KV.of(1, "a"),
+    KV.of(2, "aa"),
+    KV.of(1, "b"),
+    KV.of(2, "bb"),
+    KV.of(3, "bbb")
+  );
+
+  static final List<KV<Integer, String>> WITH_CONST_KEYS = Arrays.asList(
+    KV.of(100, "a"),
+    KV.of(100, "aa"),
+    KV.of(100, "b"),
+    KV.of(100, "bb"),
+    KV.of(100, "bbb")
+  );
+
+  @Test
+  public void testExtractKeys() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input =
+        p.apply(Create.of(Arrays.asList(COLLECTION))).setCoder(
+            StringUtf8Coder.of());
+
+    PCollection<KV<Integer, String>> output = input.apply(WithKeys.of(
+        new LengthAsKey()));
+    DataflowAssert.that(output)
+        .containsInAnyOrder(WITH_KEYS);
+
+    p.run();
+  }
+
+  // TODO: setOrdered(true) isn't supported yet by the Dataflow service.
+  @Test
+  public void testExtractKeysOrdered() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input =
+        p.apply(Create.of(Arrays.asList(COLLECTION))).setCoder(
+            StringUtf8Coder.of());
+
+    input.setOrdered(true);
+    PCollection<KV<Integer, String>> output = input.apply(WithKeys.of(
+        new LengthAsKey())).setOrdered(true);
+    DataflowAssert.that(output)
+        .containsInAnyOrder(WITH_KEYS);
+
+    p.run();
+  }
+
+  @Test
+  public void testConstantKeys() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input =
+        p.apply(Create.of(Arrays.asList(COLLECTION))).setCoder(
+            StringUtf8Coder.of());
+
+    PCollection<KV<Integer, String>> output =
+        input.apply(WithKeys.<Integer, String>of(100));
+    DataflowAssert.that(output)
+        .containsInAnyOrder(WITH_CONST_KEYS);
+
+    p.run();
+  }
+
+  /**
+   * Key a value by its length.
+   */
+  public static class LengthAsKey
+      implements SerializableFunction<String, Integer> {
+    @Override
+    public Integer apply(String value) {
+      return value.length();
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultCoderTest.java
new file mode 100644
index 0000000000000..afb8a998798ec
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultCoderTest.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.join;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.DoubleCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.transforms.join.CoGbkResult.CoGbkResultCoder;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.Serializer;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.cloud.dataflow.sdk.values.TupleTagList;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+
+/**
+ * Tests the CoGbkResult.CoGbkResultCoder.
+ */
+@RunWith(JUnit4.class)
+public class CoGbkResultCoderTest {
+
+  @Test
+  public void testSerializationDeserialization() {
+    CoGbkResultSchema schema =
+        new CoGbkResultSchema(TupleTagList.of(new TupleTag<String>()).and(
+            new TupleTag<Double>()));
+    UnionCoder unionCoder =
+        UnionCoder.of(Arrays.<Coder<?>>asList(StringUtf8Coder.of(),
+            DoubleCoder.of()));
+    CoGbkResultCoder newCoder = CoGbkResultCoder.of(schema, unionCoder);
+    CloudObject encoding = newCoder.asCloudObject();
+    Coder<?> decodedCoder = Serializer.deserialize(encoding, Coder.class);
+    assertEquals(newCoder, decodedCoder);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
new file mode 100644
index 0000000000000..016ba15d5ae44
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
@@ -0,0 +1,348 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.join;
+
+import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.EvaluationResults;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import org.hamcrest.Matcher;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+
+/**
+ * Tests for CoGroupByKeyTest.  Implements Serializable for anonymous DoFns.
+ */
+@RunWith(JUnit4.class)
+public class CoGroupByKeyTest implements Serializable {
+
+  /**
+   * Converts the given list into a PCollection belonging to the provided
+   * Pipeline in such a way that coder inference needs to be performed.
+   */
+  private PCollection<KV<Integer, String>> createInput(
+      Pipeline p, List<KV<Integer, String>> list) {
+    return p
+            .apply(Create.of(list))
+            // Create doesn't infer coders for parameterized types.
+            .setCoder(
+                KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()))
+            // Do a dummy transform so consumers must deal with coder inference.
+            .apply(ParDo.of(new DoFn<KV<Integer, String>,
+                                     KV<Integer, String>>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                c.output(c.element());
+              }
+            }));
+  }
+
+  /**
+   * Returns a PCollection<KV<Integer, CoGbkResult>> containing the
+   * results of the CoGbk over 3 PCollection<KV<Integer, String>>, each of
+   * which correlates a customer id to purchases, addresses, or names,
+   * respectively.
+   */
+  private PCollection<KV<Integer, CoGbkResult>> buildPurchasesCoGbk(
+      Pipeline p,
+      TupleTag<String> purchasesTag,
+      TupleTag<String> addressesTag,
+      TupleTag<String> namesTag) {
+    List<KV<Integer, String>> idToPurchases =
+        Arrays.asList(
+            KV.of(2, "Boat"),
+            KV.of(1, "Shoes"),
+            KV.of(3, "Car"),
+            KV.of(1, "Book"),
+            KV.of(10, "Pens"),
+            KV.of(8, "House"),
+            KV.of(4, "Suit"),
+            KV.of(11, "House"),
+            KV.of(14, "Shoes"),
+            KV.of(2, "Suit"),
+            KV.of(8, "Suit Case"),
+            KV.of(3, "House"));
+
+    List<KV<Integer, String>> idToAddress =
+        Arrays.asList(
+            KV.of(2, "53 S. 3rd"),
+            KV.of(10, "383 Jackson Street"),
+            KV.of(20, "3 W. Arizona"),
+            KV.of(3, "29 School Rd"),
+            KV.of(8, "6 Watling Rd"));
+
+    List<KV<Integer, String>> idToName =
+        Arrays.asList(
+            KV.of(1, "John Smith"),
+            KV.of(2, "Sally James"),
+            KV.of(8, "Jeffery Spalding"),
+            KV.of(20, "Joan Lichtfield"));
+
+    PCollection<KV<Integer, String>> purchasesTable =
+        createInput(p, idToPurchases);
+
+    PCollection<KV<Integer, String>> addressTable =
+        createInput(p, idToAddress);
+
+    PCollection<KV<Integer, String>> nameTable =
+        createInput(p, idToName);
+
+    PCollection<KV<Integer, CoGbkResult>> coGbkResults =
+        KeyedPCollectionTuple.of(namesTag, nameTable)
+            .and(addressesTag, addressTable)
+            .and(purchasesTag, purchasesTable)
+            .apply(CoGroupByKey.<Integer>create());
+    return coGbkResults;
+  }
+
+  @Test
+  public void testCoGroupByKey() {
+    TupleTag<String> namesTag = new TupleTag<>();
+    TupleTag<String> addressesTag = new TupleTag<>();
+    TupleTag<String> purchasesTag = new TupleTag<>();
+
+    DirectPipeline p = DirectPipeline.createForTest();
+
+    PCollection<KV<Integer, CoGbkResult>> coGbkResults =
+        buildPurchasesCoGbk(p, purchasesTag, addressesTag, namesTag);
+
+    EvaluationResults results = p.run();
+
+    List<KV<Integer, CoGbkResult>> finalResult =
+        results.getPCollection(coGbkResults);
+
+    HashMap<Integer, Matcher<Iterable<? extends String>>> namesMatchers =
+        new HashMap<Integer, Matcher<Iterable<? extends String>>>() {
+      {
+        put(1, containsInAnyOrder("John Smith"));
+        put(2, containsInAnyOrder("Sally James"));
+        put(8, containsInAnyOrder("Jeffery Spalding"));
+        put(20, containsInAnyOrder("Joan Lichtfield"));
+      }
+    };
+
+    HashMap<Integer, Matcher<Iterable<? extends String>>> addressesMatchers =
+        new HashMap<Integer, Matcher<Iterable<? extends String>>>() {
+      {
+        put(2, containsInAnyOrder("53 S. 3rd"));
+        put(3, containsInAnyOrder("29 School Rd"));
+        put(8, containsInAnyOrder("6 Watling Rd"));
+        put(10, containsInAnyOrder("383 Jackson Street"));
+        put(20, containsInAnyOrder("3 W. Arizona"));
+      }
+    };
+
+    HashMap<Integer, Matcher<Iterable<? extends String>>> purchasesMatchers =
+        new HashMap<Integer, Matcher<Iterable<? extends String>>>() {
+      {
+        put(1, containsInAnyOrder("Shoes", "Book"));
+        put(2, containsInAnyOrder("Suit", "Boat"));
+        put(3, containsInAnyOrder("Car", "House"));
+        put(4, containsInAnyOrder("Suit"));
+        put(8, containsInAnyOrder("House", "Suit Case"));
+        put(10, containsInAnyOrder("Pens"));
+        put(11, containsInAnyOrder("House"));
+        put(14, containsInAnyOrder("Shoes"));
+      }
+    };
+
+    // TODO: Figure out a way to do a hamcrest matcher for CoGbkResults.
+    for (KV<Integer, CoGbkResult> result : finalResult) {
+      int key = result.getKey();
+      CoGbkResult row = result.getValue();
+      checkValuesMatch(key, namesMatchers, row, namesTag);
+      checkValuesMatch(key, addressesMatchers, row, addressesTag);
+      checkValuesMatch(key, purchasesMatchers, row, purchasesTag);
+
+    }
+
+  }
+
+  /**
+   * Checks that the values for the given tag in the given row matches the
+   * expected values for the given key in the given matchers map.
+   */
+  private <K, V> void checkValuesMatch(
+      K key,
+      HashMap<K, Matcher<Iterable<? extends V>>> matchers,
+      CoGbkResult row,
+      TupleTag<V> tag) {
+    Iterable<V> taggedValues = row.getAll(tag);
+    if (taggedValues.iterator().hasNext()) {
+      assertThat(taggedValues, matchers.get(key));
+    } else {
+      assertNull(matchers.get(key));
+    }
+  }
+
+  /**
+   * A DoFn used in testCoGroupByKeyHandleResults(), to test processing the
+   * results of a CoGroupByKey.
+   */
+  private static class CorrelatePurchaseCountForAddressesWithoutNamesFn extends
+      DoFn<KV<Integer, CoGbkResult>, KV<String, Integer>> {
+    private final TupleTag<String> purchasesTag;
+
+    private final TupleTag<String> addressesTag;
+
+    private final TupleTag<String> namesTag;
+
+    private CorrelatePurchaseCountForAddressesWithoutNamesFn(
+        TupleTag<String> purchasesTag,
+        TupleTag<String> addressesTag,
+        TupleTag<String> namesTag) {
+      this.purchasesTag = purchasesTag;
+      this.addressesTag = addressesTag;
+      this.namesTag = namesTag;
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      KV<Integer, CoGbkResult> e = c.element();
+      CoGbkResult row = e.getValue();
+      // Don't actually care about the id.
+      Iterable<String> names = row.getAll(namesTag);
+      if (names.iterator().hasNext()) {
+        // Nothing to do. There was a name.
+        return;
+      }
+      Iterable<String> addresses = row.getAll(addressesTag);
+      if (!addresses.iterator().hasNext()) {
+        // Nothing to do, there was no address.
+        return;
+      }
+      // Buffer the addresses so we can accredit all of them with
+      // corresponding purchases. All addresses are for the same id, so
+      // if there are multiple, we apply the same purchase count to all.
+      ArrayList<String> addressList = new ArrayList<String>();
+      for (String address : addresses) {
+        addressList.add(address);
+      }
+
+      Iterable<String> purchases = row.getAll(purchasesTag);
+
+      int purchaseCount = 0;
+      for (String purchase : purchases) {
+        purchaseCount++;
+      }
+
+      for (String address : addressList) {
+        c.output(KV.of(address, purchaseCount));
+      }
+    }
+  }
+
+  /**
+   * Tests that the consuming DoFn
+   * (CorrelatePurchaseCountForAddressesWithoutNamesFn) performs as expected.
+   */
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testConsumingDoFn() {
+    TupleTag<String> purchasesTag = new TupleTag<>();
+    TupleTag<String> addressesTag = new TupleTag<>();
+    TupleTag<String> namesTag = new TupleTag<>();
+
+    // result1 should get filtered out because it has a name.
+    CoGbkResult result1 = CoGbkResult
+        .of(purchasesTag, Arrays.asList("3a", "3b"))
+        .and(addressesTag, Arrays.asList("2a", "2b"))
+        .and(namesTag, Arrays.asList("1a"));
+    // result 2 should be counted because it has an address and purchases.
+    CoGbkResult result2 = CoGbkResult
+        .of(purchasesTag, Arrays.asList("5a", "5b"))
+        .and(addressesTag, Arrays.asList("4a"))
+        .and(namesTag, new ArrayList<String>());
+    // result 3 should not be counted because it has no addresses.
+    CoGbkResult result3 = CoGbkResult
+        .of(purchasesTag, Arrays.asList("7a", "7b"))
+        .and(addressesTag, new ArrayList<String>())
+        .and(namesTag, new ArrayList<String>());
+    // result 4 should be counted as 0, because it has no purchases.
+    CoGbkResult result4 = CoGbkResult
+        .of(purchasesTag, new ArrayList<String>())
+        .and(addressesTag, Arrays.asList("8a"))
+        .and(namesTag, new ArrayList<String>());
+
+    List<KV<String, Integer>> results =
+        DoFnTester.of(
+            new CorrelatePurchaseCountForAddressesWithoutNamesFn(
+                purchasesTag,
+                addressesTag,
+                namesTag))
+                .processBatch(
+                    KV.of(1, result1),
+                    KV.of(2, result2),
+                    KV.of(3, result3),
+                    KV.of(4, result4));
+    assertThat(results, containsInAnyOrder(KV.of("4a", 2), KV.of("8a", 0)));
+  }
+
+  /**
+   * Tests the pipeline end-to-end.  Builds the purchases CoGroupByKey, and
+   * applies CorrelatePurchaseCountForAddressesWithoutNamesFn to the results.
+   */
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testCoGroupByKeyHandleResults() {
+    TupleTag<String> namesTag = new TupleTag<>();
+    TupleTag<String> addressesTag = new TupleTag<>();
+    TupleTag<String> purchasesTag = new TupleTag<>();
+
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<Integer, CoGbkResult>> coGbkResults =
+        buildPurchasesCoGbk(p, purchasesTag, addressesTag, namesTag);
+
+    // Do some simple processing on the result of the CoGroupByKey.  Count the
+    // purchases for each address on record that has no associated name.
+    PCollection<KV<String, Integer>>
+      purchaseCountByKnownAddressesWithoutKnownNames =
+        coGbkResults.apply(ParDo.of(
+            new CorrelatePurchaseCountForAddressesWithoutNamesFn(
+                purchasesTag, addressesTag, namesTag)));
+
+    DataflowAssert.that(purchaseCountByKnownAddressesWithoutKnownNames)
+        .containsInAnyOrder(
+            KV.of("29 School Rd", 2),
+            KV.of("383 Jackson Street", 1));
+    p.run();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoderTest.java
new file mode 100644
index 0000000000000..24e6dde65c4fe
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoderTest.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.join;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.DoubleCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.Serializer;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+
+/**
+ * Tests the UnionCoder.
+ */
+@RunWith(JUnit4.class)
+public class UnionCoderTest {
+
+  @Test
+  public void testSerializationDeserialization() {
+    UnionCoder newCoder =
+        UnionCoder.of(Arrays.<Coder<?>>asList(StringUtf8Coder.of(),
+            DoubleCoder.of()));
+    CloudObject encoding = newCoder.asCloudObject();
+    Coder<Object> decodedCoder = Serializer.deserialize(encoding, Coder.class);
+    assertEquals(newCoder, decodedCoder);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindowsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindowsTest.java
new file mode 100644
index 0000000000000..36028e493a755
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindowsTest.java
@@ -0,0 +1,260 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import static com.google.cloud.dataflow.sdk.testing.WindowingFnTestUtils.runWindowingFn;
+import static com.google.cloud.dataflow.sdk.testing.WindowingFnTestUtils.set;
+import static org.junit.Assert.assertEquals;
+
+import org.joda.time.DateTime;
+import org.joda.time.DateTimeConstants;
+import org.joda.time.DateTimeZone;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Tests for CalendarWindows WindowingFn.
+ */
+@RunWith(JUnit4.class)
+public class CalendarWindowsTest {
+
+  private static Instant makeTimestamp(int year, int month, int day, int hours, int minutes) {
+    return new DateTime(year, month, day, hours, minutes, DateTimeZone.UTC).toInstant();
+  }
+
+  @Test
+  public void testDays() throws Exception {
+    Map<IntervalWindow, Set<String>> expected = new HashMap<>();
+
+    final List<Long> timestamps = Arrays.asList(
+        makeTimestamp(2014, 1, 1, 0, 0).getMillis(),
+        makeTimestamp(2014, 1, 1, 23, 59).getMillis(),
+
+        makeTimestamp(2014, 1, 2, 0, 0).getMillis(),
+        makeTimestamp(2014, 1, 2, 5, 5).getMillis(),
+
+        makeTimestamp(2015, 1, 1, 0, 0).getMillis(),
+        makeTimestamp(2015, 1, 1, 5, 5).getMillis());
+
+
+    expected.put(
+        new IntervalWindow(
+            makeTimestamp(2014, 1, 1, 0, 0),
+            makeTimestamp(2014, 1, 2, 0, 0)),
+        set(timestamps.get(0), timestamps.get(1)));
+
+    expected.put(
+        new IntervalWindow(
+            makeTimestamp(2014, 1, 2, 0, 0),
+            makeTimestamp(2014, 1, 3, 0, 0)),
+        set(timestamps.get(2), timestamps.get(3)));
+
+    expected.put(
+        new IntervalWindow(
+            makeTimestamp(2015, 1, 1, 0, 0),
+            makeTimestamp(2015, 1, 2, 0, 0)),
+        set(timestamps.get(4), timestamps.get(5)));
+
+    assertEquals(expected, runWindowingFn(CalendarWindows.days(1), timestamps));
+  }
+
+  @Test
+  public void testWeeks() throws Exception {
+    Map<IntervalWindow, Set<String>> expected = new HashMap<>();
+
+    final List<Long> timestamps = Arrays.asList(
+        makeTimestamp(2014, 1, 1, 0, 0).getMillis(),
+        makeTimestamp(2014, 1, 5, 5, 5).getMillis(),
+
+        makeTimestamp(2014, 1, 8, 0, 0).getMillis(),
+        makeTimestamp(2014, 1, 12, 5, 5).getMillis(),
+
+        makeTimestamp(2015, 1, 1, 0, 0).getMillis(),
+        makeTimestamp(2015, 1, 6, 5, 5).getMillis());
+
+
+    expected.put(
+        new IntervalWindow(
+            makeTimestamp(2014, 1, 1, 0, 0),
+            makeTimestamp(2014, 1, 8, 0, 0)),
+        set(timestamps.get(0), timestamps.get(1)));
+
+    expected.put(
+        new IntervalWindow(
+            makeTimestamp(2014, 1, 8, 0, 0),
+            makeTimestamp(2014, 1, 15, 0, 0)),
+        set(timestamps.get(2), timestamps.get(3)));
+
+    expected.put(
+        new IntervalWindow(
+            makeTimestamp(2014, 12, 31, 0, 0),
+            makeTimestamp(2015, 1, 7, 0, 0)),
+        set(timestamps.get(4), timestamps.get(5)));
+
+    assertEquals(expected,
+        runWindowingFn(CalendarWindows.weeks(1, DateTimeConstants.WEDNESDAY), timestamps));
+  }
+
+  @Test
+  public void testMonths() throws Exception {
+    Map<IntervalWindow, Set<String>> expected = new HashMap<>();
+
+    final List<Long> timestamps = Arrays.asList(
+        makeTimestamp(2014, 1, 1, 0, 0).getMillis(),
+        makeTimestamp(2014, 1, 31, 5, 5).getMillis(),
+
+        makeTimestamp(2014, 2, 1, 0, 0).getMillis(),
+        makeTimestamp(2014, 2, 15, 5, 5).getMillis(),
+
+        makeTimestamp(2015, 1, 1, 0, 0).getMillis(),
+        makeTimestamp(2015, 1, 31, 5, 5).getMillis());
+
+
+    expected.put(
+        new IntervalWindow(
+            makeTimestamp(2014, 1, 1, 0, 0),
+            makeTimestamp(2014, 2, 1, 0, 0)),
+        set(timestamps.get(0), timestamps.get(1)));
+
+    expected.put(
+        new IntervalWindow(
+            makeTimestamp(2014, 2, 1, 0, 0),
+            makeTimestamp(2014, 3, 1, 0, 0)),
+        set(timestamps.get(2), timestamps.get(3)));
+
+    expected.put(
+        new IntervalWindow(
+            makeTimestamp(2015, 1, 1, 0, 0),
+            makeTimestamp(2015, 2, 1, 0, 0)),
+        set(timestamps.get(4), timestamps.get(5)));
+
+    assertEquals(expected,
+        runWindowingFn(CalendarWindows.months(1), timestamps));
+  }
+
+  @Test
+  public void testMultiMonths() throws Exception {
+    Map<IntervalWindow, Set<String>> expected = new HashMap<>();
+
+    final List<Long> timestamps = Arrays.asList(
+        makeTimestamp(2014, 3, 5, 0, 0).getMillis(),
+        makeTimestamp(2014, 10, 4, 23, 59).getMillis(),
+
+        makeTimestamp(2014, 10, 5, 0, 0).getMillis(),
+        makeTimestamp(2015, 3, 1, 0, 0).getMillis(),
+
+        makeTimestamp(2016, 1, 5, 0, 0).getMillis(),
+        makeTimestamp(2016, 1, 31, 5, 5).getMillis());
+
+
+    expected.put(
+        new IntervalWindow(
+            makeTimestamp(2014, 3, 5, 0, 0),
+            makeTimestamp(2014, 10, 5, 0, 0)),
+        set(timestamps.get(0), timestamps.get(1)));
+
+    expected.put(
+        new IntervalWindow(
+            makeTimestamp(2014, 10, 5, 0, 0),
+            makeTimestamp(2015, 5, 5, 0, 0)),
+        set(timestamps.get(2), timestamps.get(3)));
+
+    expected.put(
+        new IntervalWindow(
+            makeTimestamp(2015, 12, 5, 0, 0),
+            makeTimestamp(2016, 7, 5, 0, 0)),
+        set(timestamps.get(4), timestamps.get(5)));
+
+    assertEquals(expected, runWindowingFn(
+        CalendarWindows.months(7).withStartingMonth(2014, 3).beginningOnDay(5), timestamps));
+  }
+
+  @Test
+  public void testYears() throws Exception {
+    Map<IntervalWindow, Set<String>> expected = new HashMap<>();
+
+    final List<Long> timestamps = Arrays.asList(
+        makeTimestamp(2000, 5, 5, 0, 0).getMillis(),
+        makeTimestamp(2010, 5, 4, 23, 59).getMillis(),
+
+        makeTimestamp(2010, 5, 5, 0, 0).getMillis(),
+        makeTimestamp(2015, 3, 1, 0, 0).getMillis(),
+
+        makeTimestamp(2052, 1, 5, 0, 0).getMillis(),
+        makeTimestamp(2060, 5, 4, 5, 5).getMillis());
+
+
+    expected.put(
+        new IntervalWindow(
+            makeTimestamp(2000, 5, 5, 0, 0),
+            makeTimestamp(2010, 5, 5, 0, 0)),
+        set(timestamps.get(0), timestamps.get(1)));
+
+    expected.put(
+        new IntervalWindow(
+            makeTimestamp(2010, 5, 5, 0, 0),
+            makeTimestamp(2020, 5, 5, 0, 0)),
+        set(timestamps.get(2), timestamps.get(3)));
+
+    expected.put(
+        new IntervalWindow(
+            makeTimestamp(2050, 5, 5, 0, 0),
+            makeTimestamp(2060, 5, 5, 0, 0)),
+        set(timestamps.get(4), timestamps.get(5)));
+
+    assertEquals(expected, runWindowingFn(
+        CalendarWindows.years(10).withStartingYear(2000).beginningOnDay(5, 5), timestamps));
+  }
+
+  @Test
+  public void testTimeZone() throws Exception {
+    Map<IntervalWindow, Set<String>> expected = new HashMap<>();
+
+    DateTimeZone timeZone = DateTimeZone.forID("America/Los_Angeles");
+
+    final List<Long> timestamps = Arrays.asList(
+        new DateTime(2014, 1, 1, 0, 0, timeZone).getMillis(),
+        new DateTime(2014, 1, 1, 23, 59, timeZone).getMillis(),
+
+        new DateTime(2014, 1, 2, 8, 0, DateTimeZone.UTC).getMillis(),
+        new DateTime(2014, 1, 3, 7, 59, DateTimeZone.UTC).getMillis());
+
+    expected.put(
+        new IntervalWindow(
+            new DateTime(2014, 1, 1, 0, 0, timeZone).toInstant(),
+            new DateTime(2014, 1, 2, 0, 0, timeZone).toInstant()),
+        set(timestamps.get(0), timestamps.get(1)));
+
+    expected.put(
+        new IntervalWindow(
+            new DateTime(2014, 1, 2, 0, 0, timeZone).toInstant(),
+            new DateTime(2014, 1, 3, 0, 0, timeZone).toInstant()),
+        set(timestamps.get(2), timestamps.get(3)));
+
+    assertEquals(expected, runWindowingFn(
+        CalendarWindows.days(1).withTimeZone(timeZone),
+        timestamps));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindowsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindowsTest.java
new file mode 100644
index 0000000000000..0a68e72348f73
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindowsTest.java
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import static com.google.cloud.dataflow.sdk.testing.WindowingFnTestUtils.runWindowingFn;
+import static com.google.cloud.dataflow.sdk.testing.WindowingFnTestUtils.set;
+import static org.hamcrest.CoreMatchers.containsString;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Tests for FixedWindows WindowingFn.
+ */
+@RunWith(JUnit4.class)
+public class FixedWindowsTest {
+
+  @Test
+  public void testSimpleFixedWindow() throws Exception {
+    Map<IntervalWindow, Set<String>> expected = new HashMap<>();
+    expected.put(new IntervalWindow(new Instant(0), new Instant(10)), set(1, 2, 5, 9));
+    expected.put(new IntervalWindow(new Instant(10), new Instant(20)), set(10, 11));
+    expected.put(new IntervalWindow(new Instant(100), new Instant(110)), set(100));
+    assertEquals(
+        expected,
+        runWindowingFn(
+            FixedWindows.of(new Duration(10)),
+            Arrays.asList(1L, 2L, 5L, 9L, 10L, 11L, 100L)));
+  }
+
+  @Test
+  public void testFixedOffsetWindow() throws Exception {
+    Map<IntervalWindow, Set<String>> expected = new HashMap<>();
+    expected.put(new IntervalWindow(new Instant(-5), new Instant(5)), set(1, 2));
+    expected.put(new IntervalWindow(new Instant(5), new Instant(15)), set(5, 9, 10, 11));
+    expected.put(new IntervalWindow(new Instant(95), new Instant(105)), set(100));
+    assertEquals(
+        expected,
+        runWindowingFn(
+            FixedWindows.of(new Duration(10)).withOffset(new Duration(5)),
+            Arrays.asList(1L, 2L, 5L, 9L, 10L, 11L, 100L)));
+  }
+
+  @Test
+  public void testTimeUnit() throws Exception {
+    Map<IntervalWindow, Set<String>> expected = new HashMap<>();
+    expected.put(new IntervalWindow(new Instant(-5000), new Instant(5000)), set(1, 2, 1000));
+    expected.put(new IntervalWindow(new Instant(5000), new Instant(15000)), set(5000, 5001, 10000));
+    assertEquals(
+        expected,
+        runWindowingFn(
+            FixedWindows.of(Duration.standardSeconds(10)).withOffset(Duration.standardSeconds(5)),
+            Arrays.asList(1L, 2L, 1000L, 5000L, 5001L, 10000L)));
+  }
+
+  void checkConstructionFailure(int size, int offset) {
+    try {
+      FixedWindows.of(Duration.standardSeconds(size)).withOffset(Duration.standardSeconds(offset));
+      fail("should have failed");
+    } catch (IllegalArgumentException e) {
+      assertThat(e.toString(),
+          containsString("FixedWindows WindowingStrategies must have 0 <= offset < size"));
+    }
+  }
+
+  @Test
+  public void testInvalidInput() throws Exception {
+    checkConstructionFailure(-1, 0);
+    checkConstructionFailure(1, 2);
+    checkConstructionFailure(1, -1);
+  }
+
+  @Test
+  public void testEquality() {
+    assertTrue(FixedWindows.of(new Duration(10)).isCompatible(FixedWindows.of(new Duration(10))));
+    assertTrue(
+        FixedWindows.of(new Duration(10)).isCompatible(
+            FixedWindows.of(new Duration(10))));
+    assertTrue(
+        FixedWindows.of(new Duration(10)).isCompatible(
+            FixedWindows.of(new Duration(10))));
+
+    assertFalse(FixedWindows.of(new Duration(10)).isCompatible(FixedWindows.of(new Duration(20))));
+    assertFalse(FixedWindows.of(new Duration(10)).isCompatible(
+        FixedWindows.of(new Duration(20))));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SessionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SessionsTest.java
new file mode 100644
index 0000000000000..ccb1ddecc4968
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SessionsTest.java
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import static com.google.cloud.dataflow.sdk.testing.WindowingFnTestUtils.runWindowingFn;
+import static com.google.cloud.dataflow.sdk.testing.WindowingFnTestUtils.set;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Tests for Sessions WindowingFn.
+ */
+@RunWith(JUnit4.class)
+public class SessionsTest {
+
+  @Test
+  public void testSimple() throws Exception {
+    Map<IntervalWindow, Set<String>> expected = new HashMap<>();
+    expected.put(new IntervalWindow(new Instant(0), new Instant(10)), set(0));
+    expected.put(new IntervalWindow(new Instant(10), new Instant(20)), set(10));
+    expected.put(new IntervalWindow(new Instant(101), new Instant(111)), set(101));
+    assertEquals(
+        expected,
+        runWindowingFn(
+            Sessions.withGapDuration(new Duration(10)),
+            Arrays.asList(0L, 10L, 101L)));
+  }
+
+  @Test
+  public void testConsecutive() throws Exception {
+    Map<IntervalWindow, Set<String>> expected = new HashMap<>();
+    expected.put(new IntervalWindow(new Instant(1), new Instant(19)), set(1, 2, 5, 9));
+    expected.put(new IntervalWindow(new Instant(100), new Instant(111)), set(100, 101));
+    assertEquals(
+        expected,
+        runWindowingFn(
+            Sessions.withGapDuration(new Duration(10)),
+            Arrays.asList(1L, 2L, 5L, 9L, 100L, 101L)));
+  }
+
+  @Test
+  public void testMerging() throws Exception {
+    Map<IntervalWindow, Set<String>> expected = new HashMap<>();
+    expected.put(new IntervalWindow(new Instant(1), new Instant(40)), set(1, 10, 15, 22, 30));
+    expected.put(new IntervalWindow(new Instant(95), new Instant(111)), set(95, 100, 101));
+    assertEquals(
+        expected,
+        runWindowingFn(
+            Sessions.withGapDuration(new Duration(10)),
+            Arrays.asList(1L, 15L, 30L, 100L, 101L, 95L, 22L, 10L)));
+  }
+
+  @Test
+  public void testTimeUnit() throws Exception {
+    Map<IntervalWindow, Set<String>> expected = new HashMap<>();
+    expected.put(new IntervalWindow(new Instant(1), new Instant(2000)), set(1, 2, 1000));
+    expected.put(new IntervalWindow(new Instant(5000), new Instant(6001)), set(5000, 5001));
+    expected.put(new IntervalWindow(new Instant(10000), new Instant(11000)), set(10000));
+    assertEquals(
+        expected,
+        runWindowingFn(
+            Sessions.withGapDuration(Duration.standardSeconds(1)),
+            Arrays.asList(1L, 2L, 1000L, 5000L, 5001L, 10000L)));
+  }
+
+  @Test
+  public void testEquality() {
+    assertTrue(
+        Sessions.withGapDuration(new Duration(10)).isCompatible(
+            Sessions.withGapDuration(new Duration(10))));
+    assertTrue(
+        Sessions.withGapDuration(new Duration(10)).isCompatible(
+            Sessions.withGapDuration(new Duration(20))));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java
new file mode 100644
index 0000000000000..f187cb429940e
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import static com.google.cloud.dataflow.sdk.testing.WindowingFnTestUtils.runWindowingFn;
+import static com.google.cloud.dataflow.sdk.testing.WindowingFnTestUtils.set;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Tests for the SlidingWindows WindowingFn.
+ */
+@RunWith(JUnit4.class)
+public class SlidingWindowsTest {
+
+  @Test
+  public void testSimple() throws Exception {
+    Map<IntervalWindow, Set<String>> expected = new HashMap<>();
+    expected.put(new IntervalWindow(new Instant(-5), new Instant(5)), set(1, 2));
+    expected.put(new IntervalWindow(new Instant(0), new Instant(10)), set(1, 2, 5, 9));
+    expected.put(new IntervalWindow(new Instant(5), new Instant(15)), set(5, 9, 10, 11));
+    expected.put(new IntervalWindow(new Instant(10), new Instant(20)), set(10, 11));
+    assertEquals(
+        expected,
+        runWindowingFn(
+            SlidingWindows.of(new Duration(10)).every(new Duration(5)),
+            Arrays.asList(1L, 2L, 5L, 9L, 10L, 11L)));
+  }
+
+  @Test
+  public void testSlightlyOverlapping() throws Exception {
+    Map<IntervalWindow, Set<String>> expected = new HashMap<>();
+    expected.put(new IntervalWindow(new Instant(-5), new Instant(2)), set(1));
+    expected.put(new IntervalWindow(new Instant(0), new Instant(7)), set(1, 2, 5));
+    expected.put(new IntervalWindow(new Instant(5), new Instant(12)), set(5, 9, 10, 11));
+    expected.put(new IntervalWindow(new Instant(10), new Instant(17)), set(10, 11));
+    assertEquals(
+        expected,
+        runWindowingFn(
+            SlidingWindows.of(new Duration(7)).every(new Duration(5)),
+            Arrays.asList(1L, 2L, 5L, 9L, 10L, 11L)));
+  }
+
+  @Test
+  public void testElidings() throws Exception {
+    Map<IntervalWindow, Set<String>> expected = new HashMap<>();
+    expected.put(new IntervalWindow(new Instant(0), new Instant(3)), set(1, 2));
+    expected.put(new IntervalWindow(new Instant(10), new Instant(13)), set(10, 11));
+    expected.put(new IntervalWindow(new Instant(100), new Instant(103)), set(100));
+    assertEquals(
+        expected,
+        runWindowingFn(
+            // Only look at the first 3 millisecs of every 10-millisec interval.
+            SlidingWindows.of(new Duration(3)).every(new Duration(10)),
+            Arrays.asList(1L, 2L, 3L, 5L, 9L, 10L, 11L, 100L)));
+  }
+
+  @Test
+  public void testOffset() throws Exception {
+    Map<IntervalWindow, Set<String>> expected = new HashMap<>();
+    expected.put(new IntervalWindow(new Instant(-8), new Instant(2)), set(1));
+    expected.put(new IntervalWindow(new Instant(-3), new Instant(7)), set(1, 2, 5));
+    expected.put(new IntervalWindow(new Instant(2), new Instant(12)), set(2, 5, 9, 10, 11));
+    expected.put(new IntervalWindow(new Instant(7), new Instant(17)), set(9, 10, 11));
+    assertEquals(
+        expected,
+        runWindowingFn(
+            SlidingWindows.of(new Duration(10)).every(new Duration(5)).withOffset(new Duration(2)),
+            Arrays.asList(1L, 2L, 5L, 9L, 10L, 11L)));
+  }
+
+  @Test
+  public void testTimeUnit() throws Exception {
+    Map<IntervalWindow, Set<String>> expected = new HashMap<>();
+    expected.put(new IntervalWindow(new Instant(-5000), new Instant(5000)), set(1, 2, 1000));
+    expected.put(new IntervalWindow(new Instant(0), new Instant(10000)),
+        set(1, 2, 1000, 5000, 5001));
+    expected.put(new IntervalWindow(new Instant(5000), new Instant(15000)), set(5000, 5001, 10000));
+    expected.put(new IntervalWindow(new Instant(10000), new Instant(20000)), set(10000));
+    assertEquals(
+        expected,
+        runWindowingFn(
+            SlidingWindows.of(Duration.standardSeconds(10)).every(Duration.standardSeconds(5)),
+            Arrays.asList(1L, 2L, 1000L, 5000L, 5001L, 10000L)));
+  }
+
+  @Test
+  public void testEquality() {
+    assertTrue(
+        SlidingWindows.of(new Duration(10)).isCompatible(
+            SlidingWindows.of(new Duration(10))));
+    assertTrue(
+        SlidingWindows.of(new Duration(10)).isCompatible(
+            SlidingWindows.of(new Duration(10))));
+
+    assertFalse(SlidingWindows.of(new Duration(10)).isCompatible(
+        SlidingWindows.of(new Duration(20))));
+    assertFalse(SlidingWindows.of(new Duration(10)).isCompatible(
+        SlidingWindows.of(new Duration(20))));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
new file mode 100644
index 0000000000000..85c0bf6b8b6d2
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
@@ -0,0 +1,277 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Flatten;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.PrintStream;
+import java.io.Serializable;
+import java.util.Arrays;
+
+/** Unit tests for bucketing. */
+@RunWith(JUnit4.class)
+public class WindowingTest implements Serializable {
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  private static class WindowedCount extends PTransform<PCollection<String>, PCollection<String>> {
+    private WindowingFn<Object, ?> windowingFn;
+    public WindowedCount(WindowingFn<? super String, ?> windowingFn) {
+      this.windowingFn = (WindowingFn) windowingFn;
+    }
+    @Override
+    public PCollection<String> apply(PCollection<String> in) {
+      return in
+          .apply(Window.named("Window").<String>into(windowingFn))
+          .apply(Count.<String>perElement())
+          .apply(ParDo
+              .named("FormatCounts")
+              .of(new DoFn<KV<String, Long>, String>() {
+                    @Override
+                        public void processElement(ProcessContext c) {
+                      c.output(c.element().getKey() + ":" + c.element().getValue()
+                          + ":" + c.timestamp().getMillis() + ":" + c.windows());
+                    }
+                  }))
+          .setCoder(StringUtf8Coder.of());
+    }
+  }
+
+  private String output(String value, int count, int timestamp, int windowStart, int windowEnd) {
+    return value + ":" + count + ":" + timestamp
+        + ":[[" + new Instant(windowStart) + ".." + new Instant(windowEnd) + ")]";
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testPartitioningWindowing() {
+    Pipeline p = TestPipeline.create();
+    PCollection<String> input =
+        p.apply(
+            Create.timestamped(
+                TimestampedValue.of("a", new Instant(1)),
+                TimestampedValue.of("b", new Instant(2)),
+                TimestampedValue.of("b", new Instant(3)),
+                TimestampedValue.of("c", new Instant(11)),
+                TimestampedValue.of("d", new Instant(11))));
+
+    PCollection<String> output =
+        input
+        .apply(new WindowedCount(FixedWindows.of(new Duration(10))));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        output("a", 1, 9, 0, 10),
+        output("b", 2, 9, 0, 10),
+        output("c", 1, 19, 10, 20),
+        output("d", 1, 19, 10, 20));
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testNonPartitioningWindowing() {
+    Pipeline p = TestPipeline.create();
+    PCollection<String> input =
+        p.apply(
+            Create.timestamped(
+                TimestampedValue.of("a", new Instant(1)),
+                TimestampedValue.of("a", new Instant(7)),
+                TimestampedValue.of("b", new Instant(8))));
+
+    PCollection<String> output =
+        input
+        .apply(new WindowedCount(
+            SlidingWindows.<String>of(new Duration(10)).every(new Duration(5))));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        output("a", 1, 4, -5, 5),
+        output("a", 2, 9, 0, 10),
+        output("a", 1, 14, 5, 15),
+        output("b", 1, 9, 0, 10),
+        output("b", 1, 14, 5, 15));
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testMergingWindowing() {
+    Pipeline p = TestPipeline.create();
+    PCollection<String> input =
+        p.apply(
+            Create.timestamped(
+                TimestampedValue.of("a", new Instant(1)),
+                TimestampedValue.of("a", new Instant(5)),
+                TimestampedValue.of("a", new Instant(20))));
+
+    PCollection<String> output =
+        input
+        .apply(new WindowedCount(Sessions.<String>withGapDuration(new Duration(10))));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        output("a", 2, 14, 1, 15),
+        output("a", 1, 29, 20, 30));
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testWindowPreservation() {
+    Pipeline p = TestPipeline.create();
+    PCollection<String> input1 = p.apply(
+        Create.timestamped(
+            TimestampedValue.of("a", new Instant(1)),
+            TimestampedValue.of("b", new Instant(2))));
+
+    PCollection<String> input2 = p.apply(
+        Create.timestamped(
+            TimestampedValue.of("a", new Instant(3)),
+            TimestampedValue.of("b", new Instant(4))));
+
+    PCollectionList<String> input = PCollectionList.of(input1).and(input2);
+
+    PCollection<String> output =
+        input
+        .apply(Flatten.<String>create())
+        .apply(new WindowedCount(FixedWindows.<String>of(new Duration(5))));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        output("a", 2, 4, 0, 5),
+        output("b", 2, 4, 0, 5));
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testElementsSortedByTimestamp() {
+    // The Windowing API does not guarantee that elements will be sorted by
+    // timestamp, but the implementation currently relies on this, so it
+    // needs to be tested.
+
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, String>> a = p
+        .apply(Create.timestamped(
+            TimestampedValue.of(KV.of("k", "a"), new Instant(1)),
+            TimestampedValue.of(KV.of("k", "b"), new Instant(4)),
+            TimestampedValue.of(KV.of("k", "c"), new Instant(3)),
+            TimestampedValue.of(KV.of("k", "d"), new Instant(5)),
+            TimestampedValue.of(KV.of("k", "e"), new Instant(2)),
+            TimestampedValue.of(KV.of("k", "f"), new Instant(-5)),
+            TimestampedValue.of(KV.of("k", "g"), new Instant(-6)),
+            TimestampedValue.of(KV.of("k", "h"), new Instant(-255)),
+            TimestampedValue.of(KV.of("k", "i"), new Instant(-256)),
+            TimestampedValue.of(KV.of("k", "j"), new Instant(255))));
+
+    PCollection<KV<String, String>> b = a
+        .apply(Window.<KV<String, String>>into(
+            FixedWindows.of(new Duration(1000)).withOffset(new Duration(500))));
+
+    PCollection<KV<String, Iterable<String>>> output = b
+        .apply(GroupByKey.<String, String>create());
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("k",
+            (Iterable<String>) Arrays.asList("i", "h", "g", "f", "a", "e", "c", "b", "d", "j")));
+
+    p.run();
+  }
+
+  @Test
+  public void testEmptyInput() {
+    Pipeline p = TestPipeline.create();
+    PCollection<String> input =
+        p.apply(Create.<String>timestamped())
+        .setCoder(StringUtf8Coder.of());
+
+    PCollection<String> output =
+        input
+        .apply(new WindowedCount(FixedWindows.<String>of(new Duration(10))));
+
+    DataflowAssert.that(output).containsInAnyOrder();
+
+    p.run();
+  }
+
+  @Test
+  public void testTextIoInput() throws Exception {
+    File tmpFile = tmpFolder.newFile("file.txt");
+    String filename = tmpFile.getPath();
+
+    try (PrintStream writer = new PrintStream(new FileOutputStream(tmpFile))) {
+      writer.println("a 1");
+      writer.println("b 2");
+      writer.println("b 3");
+      writer.println("c 11");
+      writer.println("d 11");
+    }
+
+    Pipeline p = TestPipeline.create();
+    PCollection<String> output = p.begin()
+        .apply(TextIO.Read.named("ReadLines").from(filename))
+        .apply(ParDo.of(new ExtractWordsWithTimestampsFn()))
+        .apply(new WindowedCount(FixedWindows.<String>of(Duration.millis(10))));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        output("a", 1, 9, 0, 10),
+        output("b", 2, 9, 0, 10),
+        output("c", 1, 19, 10, 20),
+        output("d", 1, 19, 10, 20));
+
+    p.run();
+  }
+
+  /** A DoFn that tokenizes lines of text into individual words. */
+  static class ExtractWordsWithTimestampsFn extends DoFn<String, String> {
+    @Override
+    public void processElement(ProcessContext c) {
+      String[] words = c.element().split("[^a-zA-Z0-9']+");
+      if (words.length == 2) {
+        c.outputWithTimestamp(words[0], new Instant(Long.parseLong(words[1])));
+      }
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java
new file mode 100644
index 0000000000000..45cc267d2d740
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java
@@ -0,0 +1,194 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MAX;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MIN;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+
+import com.google.api.services.dataflow.model.MetricUpdate;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.Max;
+import com.google.cloud.dataflow.sdk.transforms.Min;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.CounterTestUtils;
+
+import org.hamcrest.Matchers;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Unit tests for the {@link Aggregator} API.
+ */
+@RunWith(JUnit4.class)
+public class AggregatorImplTest {
+  @Rule
+  public final ExpectedException expectedEx = ExpectedException.none();
+
+  private static final String AGGREGATOR_NAME = "aggregator_name";
+
+  private <V> void testAggregator(List<V> items,
+                                  SerializableFunction<Iterable<V>, V> combiner,
+                                  Counter expectedCounter) {
+    CounterSet counters = new CounterSet();
+    Aggregator<V> aggregator = new AggregatorImpl<V, Iterable<V>, V>(
+        AGGREGATOR_NAME, combiner, counters.getAddCounterMutator());
+    for (V item : items) {
+      aggregator.addValue(item);
+    }
+
+    List<MetricUpdate> cloudCounterSet = CounterTestUtils.extractCounterUpdates(counters, false);
+    Assert.assertEquals(cloudCounterSet.size(), 1);
+    Assert.assertEquals(cloudCounterSet.get(0),
+                        CounterTestUtils.extractCounterUpdate(expectedCounter, false));
+  }
+
+  @Test
+  public void testSumInteger() throws Exception {
+    testAggregator(Arrays.asList(2, 4, 1, 3), new Sum.SumIntegerFn(),
+                   Counter.ints(AGGREGATOR_NAME, SUM).resetToValue(10));
+  }
+
+  @Test
+  public void testSumLong() throws Exception {
+    testAggregator(Arrays.asList(2L, 4L, 1L, 3L), new Sum.SumLongFn(),
+                   Counter.longs(AGGREGATOR_NAME, SUM).resetToValue(10L));
+  }
+
+  @Test
+  public void testSumDouble() throws Exception {
+    testAggregator(Arrays.asList(2.0, 4.1, 1.0, 3.1), new Sum.SumDoubleFn(),
+                   Counter.doubles(AGGREGATOR_NAME, SUM).resetToValue(10.2));
+  }
+
+  @Test
+  public void testMinInteger() throws Exception {
+    testAggregator(Arrays.asList(2, 4, 1, 3), new Min.MinIntegerFn(),
+                   Counter.ints(AGGREGATOR_NAME, MIN).resetToValue(1));
+  }
+
+  @Test
+  public void testMinLong() throws Exception {
+    testAggregator(Arrays.asList(2L, 4L, 1L, 3L), new Min.MinLongFn(),
+                   Counter.longs(AGGREGATOR_NAME, MIN).resetToValue(1L));
+  }
+
+  @Test
+  public void testMinDouble() throws Exception {
+    testAggregator(Arrays.asList(2.0, 4.1, 1.0, 3.1), new Min.MinDoubleFn(),
+                   Counter.doubles(AGGREGATOR_NAME, MIN).resetToValue(1.0));
+  }
+
+  @Test
+  public void testMaxInteger() throws Exception {
+    testAggregator(Arrays.asList(2, 4, 1, 3), new Max.MaxIntegerFn(),
+                   Counter.ints(AGGREGATOR_NAME, MAX).resetToValue(4));
+  }
+
+  @Test
+  public void testMaxLong() throws Exception {
+    testAggregator(Arrays.asList(2L, 4L, 1L, 3L), new Max.MaxLongFn(),
+                   Counter.longs(AGGREGATOR_NAME, MAX).resetToValue(4L));
+  }
+
+  @Test
+  public void testMaxDouble() throws Exception {
+    testAggregator(Arrays.asList(2.0, 4.1, 1.0, 3.1), new Max.MaxDoubleFn(),
+                   Counter.doubles(AGGREGATOR_NAME, MAX).resetToValue(4.1));
+  }
+
+  @Test
+  public void testCompatibleDuplicateNames() throws Exception {
+    CounterSet counters = new CounterSet();
+    Aggregator<Integer> aggregator1 =
+        new AggregatorImpl<Integer, Iterable<Integer>, Integer>(
+            AGGREGATOR_NAME, new Sum.SumIntegerFn(),
+            counters.getAddCounterMutator());
+
+    Aggregator<Integer> aggregator2 =
+        new AggregatorImpl<Integer, Iterable<Integer>, Integer>(
+            AGGREGATOR_NAME, new Sum.SumIntegerFn(),
+            counters.getAddCounterMutator());
+
+    // The duplicate aggregators should update the same counter.
+    aggregator1.addValue(3);
+    aggregator2.addValue(4);
+    Assert.assertEquals(
+        new CounterSet(Counter.ints(AGGREGATOR_NAME, SUM).resetToValue(7)),
+        counters);
+  }
+
+  @Test
+  public void testIncompatibleDuplicateNames() throws Exception {
+    CounterSet counters = new CounterSet();
+    new AggregatorImpl<Integer, Iterable<Integer>, Integer>(
+        AGGREGATOR_NAME, new Sum.SumIntegerFn(),
+        counters.getAddCounterMutator());
+
+    expectedEx.expect(IllegalArgumentException.class);
+    expectedEx.expectMessage(Matchers.containsString(
+        "aggregator's name collides with an existing aggregator or "
+        + "system-provided counter of an incompatible type"));
+    new AggregatorImpl<Long, Iterable<Long>, Long>(
+        AGGREGATOR_NAME, new Sum.SumLongFn(),
+        counters.getAddCounterMutator());
+    }
+
+  @Test
+  public void testUnsupportedCombineFn() throws Exception {
+    expectedEx.expect(IllegalArgumentException.class);
+    expectedEx.expectMessage(Matchers.containsString("unsupported combiner"));
+    new AggregatorImpl<>(
+        AGGREGATOR_NAME,
+        new Combine.CombineFn<Integer, List<Integer>, Integer>() {
+          @Override
+          public List<Integer> createAccumulator() { return null; }
+          @Override
+          public void addInput(List<Integer> accumulator, Integer input) { }
+          @Override
+          public List<Integer> mergeAccumulators(Iterable<List<Integer>> accumulators) {
+            return null; }
+          @Override
+          public Integer extractOutput(List<Integer> accumulator) { return null; }
+        },
+        (new CounterSet()).getAddCounterMutator());
+  }
+
+  @Test
+  public void testUnsupportedSerializableFunction() throws Exception {
+    expectedEx.expect(IllegalArgumentException.class);
+    expectedEx.expectMessage(Matchers.containsString("unsupported combiner"));
+    new AggregatorImpl<Integer, Iterable<Integer>, Integer>(
+        AGGREGATOR_NAME,
+        new SerializableFunction<Iterable<Integer>, Integer>() {
+          @Override
+          public Integer apply(Iterable<Integer> input) { return null; }
+        },
+        (new CounterSet()).getAddCounterMutator());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOffTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOffTest.java
new file mode 100644
index 0000000000000..0c262e2f1cb04
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOffTest.java
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.allOf;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.lessThan;
+import static org.junit.Assert.assertEquals;
+
+import com.google.api.client.util.BackOff;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Unit tests for {@link AttemptBoundedExponentialBackOff}. */
+@RunWith(JUnit4.class)
+public class AttemptBoundedExponentialBackOffTest {
+  @Rule public ExpectedException exception = ExpectedException.none();
+
+  @Test
+  public void testUsingInvalidInitialInterval() throws Exception {
+    exception.expect(IllegalArgumentException.class);
+    exception.expectMessage("Initial interval must be greater than zero.");
+    new AttemptBoundedExponentialBackOff(10, 0L);
+  }
+
+  @Test
+  public void testUsingInvalidMaximumNumberOfRetries() throws Exception {
+    exception.expect(IllegalArgumentException.class);
+    exception.expectMessage("Maximum number of attempts must be greater than zero.");
+    new AttemptBoundedExponentialBackOff(-1, 10L);
+  }
+
+  @Test
+  public void testThatFixedNumberOfAttemptsExits() throws Exception {
+    BackOff backOff = new AttemptBoundedExponentialBackOff(3, 500);
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
+    assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
+  }
+  
+  @Test
+  public void testThatResettingAllowsReuse() throws Exception {
+    BackOff backOff = new AttemptBoundedExponentialBackOff(3, 500);
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
+    assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
+    backOff.reset();
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
+    assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/Base64UtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/Base64UtilsTest.java
new file mode 100644
index 0000000000000..d557284ce0806
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/Base64UtilsTest.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.api.client.util.Base64.encodeBase64URLSafeString;
+
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
+import static org.hamcrest.Matchers.lessThan;
+import static org.junit.Assert.assertThat;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Unit tests for {@link Base64Utils}. */
+@RunWith(JUnit4.class)
+public class Base64UtilsTest {
+  void testLength(int length) {
+    byte[] b = new byte[length];
+    // Make sure that the estimated length is an upper bound.
+    assertThat(
+        Base64Utils.getBase64Length(length),
+        greaterThanOrEqualTo(encodeBase64URLSafeString(b).length()));
+    // Make sure that it's a tight upper bound (no more than 4 characters off).
+    assertThat(
+        Base64Utils.getBase64Length(length),
+        lessThan(4 + encodeBase64URLSafeString(b).length()));
+  }
+
+  @Test
+  public void getBase64Length() {
+    for (int i = 0; i < 100; ++i) {
+      testLength(i);
+    }
+    for (int i = 1000; i < 1100; ++i) {
+      testLength(i);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
new file mode 100644
index 0000000000000..ca75e6f94ca7b
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
@@ -0,0 +1,306 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.mockito.Matchers.anyLong;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Mockito.atLeast;
+import static org.mockito.Mockito.atLeastOnce;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+import com.google.api.services.bigquery.Bigquery;
+import com.google.api.services.bigquery.model.Table;
+import com.google.api.services.bigquery.model.TableCell;
+import com.google.api.services.bigquery.model.TableDataList;
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.common.base.Function;
+import com.google.common.collect.Iterators;
+
+import org.hamcrest.Matchers;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Tests for util classes related to BigQuery.
+ */
+@RunWith(JUnit4.class)
+public class BigQueryUtilTest {
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Mock private Bigquery mockClient;
+  @Mock private Bigquery.Tables mockTables;
+  @Mock private Bigquery.Tables.Get mockTablesGet;
+  @Mock private Bigquery.Tabledata mockTabledata;
+  @Mock private Bigquery.Tabledata.List mockTabledataList;
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+  }
+
+  @After
+  public void tearDown() {
+    verifyNoMoreInteractions(mockClient);
+    verifyNoMoreInteractions(mockTables);
+    verifyNoMoreInteractions(mockTablesGet);
+    verifyNoMoreInteractions(mockTabledata);
+    verifyNoMoreInteractions(mockTabledataList);
+  }
+
+  private void onTableGet(Table table) throws IOException {
+    when(mockClient.tables())
+        .thenReturn(mockTables);
+    when(mockTables.get(anyString(), anyString(), anyString()))
+        .thenReturn(mockTablesGet);
+    when(mockTablesGet.execute())
+        .thenReturn(table);
+  }
+
+  private void verifyTableGet() throws IOException {
+    verify(mockClient).tables();
+    verify(mockTables).get("project", "dataset", "table");
+    verify(mockTablesGet).execute();
+  }
+
+  private void onTableList(TableDataList result) throws IOException {
+    when(mockClient.tabledata())
+        .thenReturn(mockTabledata);
+    when(mockTabledata.list(anyString(), anyString(), anyString()))
+        .thenReturn(mockTabledataList);
+    when(mockTabledataList.execute())
+        .thenReturn(result);
+  }
+
+  private void verifyTabledataList() throws IOException {
+    verify(mockClient, atLeastOnce()).tabledata();
+    verify(mockTabledata, atLeastOnce()).list("project", "dataset", "table");
+    verify(mockTabledataList, atLeastOnce()).execute();
+    // Max results may be set when testing for an empty table.
+    verify(mockTabledataList, atLeast(0)).setMaxResults(anyLong());
+  }
+
+  private Table basicTableSchema() {
+    return new Table()
+        .setSchema(new TableSchema()
+            .setFields(Arrays.asList(
+                new TableFieldSchema()
+                    .setName("name")
+                    .setType("STRING"),
+                new TableFieldSchema()
+                    .setName("answer")
+                    .setType("INTEGER")
+            )));
+  }
+
+  private TableRow rawRow(Object...args) {
+    List<TableCell> cells = new LinkedList<>();
+    for (Object a : args) {
+      cells.add(new TableCell().setV(a));
+    }
+    return new TableRow().setF(cells);
+  }
+
+  private TableDataList rawDataList(TableRow...rows) {
+    return new TableDataList()
+        .setRows(Arrays.asList(rows));
+  }
+
+  @Test
+  public void testRead() throws IOException {
+    onTableGet(basicTableSchema());
+
+    TableDataList dataList = rawDataList(rawRow("Arthur", 42));
+    onTableList(dataList);
+
+    BigQueryTableRowIterator iterator = new BigQueryTableRowIterator(
+        mockClient,
+        BigQueryIO.parseTableSpec("project:dataset.table"));
+
+    Assert.assertTrue(iterator.hasNext());
+    TableRow row = iterator.next();
+
+    Assert.assertTrue(row.containsKey("name"));
+    Assert.assertTrue(row.containsKey("answer"));
+    Assert.assertEquals("Arthur", row.get("name"));
+    Assert.assertEquals(42, row.get("answer"));
+
+    Assert.assertFalse(iterator.hasNext());
+
+    verifyTableGet();
+    verifyTabledataList();
+  }
+
+  @Test
+  public void testReadEmpty() throws IOException {
+    onTableGet(basicTableSchema());
+
+    // BigQuery may respond with a page token for an empty table, ensure we
+    // handle it.
+    TableDataList dataList = new TableDataList()
+        .setPageToken("FEED==")
+        .setTotalRows(0L);
+    onTableList(dataList);
+
+    BigQueryTableRowIterator iterator = new BigQueryTableRowIterator(
+        mockClient,
+        BigQueryIO.parseTableSpec("project:dataset.table"));
+
+    Assert.assertFalse(iterator.hasNext());
+
+    verifyTableGet();
+    verifyTabledataList();
+  }
+
+  @Test
+  public void testReadMultiPage() throws IOException {
+    onTableGet(basicTableSchema());
+
+    TableDataList page1 = rawDataList(rawRow("Row1", 1))
+        .setPageToken("page2");
+    TableDataList page2 = rawDataList(rawRow("Row2", 2))
+        .setTotalRows(2L);
+
+    when(mockClient.tabledata())
+        .thenReturn(mockTabledata);
+    when(mockTabledata.list(anyString(), anyString(), anyString()))
+        .thenReturn(mockTabledataList);
+    when(mockTabledataList.execute())
+        .thenReturn(page1)
+        .thenReturn(page2);
+
+    BigQueryTableRowIterator iterator = new BigQueryTableRowIterator(
+        mockClient,
+        BigQueryIO.parseTableSpec("project:dataset.table"));
+    List<String> names = new LinkedList<>();
+    Iterators.addAll(names,
+        Iterators.transform(iterator, new Function<TableRow, String>(){
+          @Override
+          public String apply(TableRow input) {
+            return (String) input.get("name");
+          }
+        }));
+
+    Assert.assertThat(names, Matchers.hasItems("Row1", "Row2"));
+
+    verifyTableGet();
+    verifyTabledataList();
+    // The second call should have used a page token.
+    verify(mockTabledataList).setPageToken("page2");
+  }
+
+  @Test
+  public void testReadOpenFailure() throws IOException {
+    thrown.expect(RuntimeException.class);
+
+    when(mockClient.tables())
+        .thenReturn(mockTables);
+    when(mockTables.get(anyString(), anyString(), anyString()))
+        .thenReturn(mockTablesGet);
+    when(mockTablesGet.execute())
+        .thenThrow(new IOException("No such table"));
+
+    BigQueryTableRowIterator iterator = new BigQueryTableRowIterator(
+        mockClient,
+        BigQueryIO.parseTableSpec("project:dataset.table"));
+    try {
+      Assert.assertFalse(iterator.hasNext());  // throws.
+    } finally {
+      verifyTableGet();
+    }
+  }
+
+  @Test
+  public void testWriteAppend() throws IOException {
+    onTableGet(basicTableSchema());
+
+    TableReference ref = BigQueryIO
+        .parseTableSpec("project:dataset.table");
+
+    BigQueryTableInserter inserter =
+        new BigQueryTableInserter(mockClient, ref);
+
+    inserter.getOrCreateTable(BigQueryIO.Write.WriteDisposition.WRITE_APPEND,
+        BigQueryIO.Write.CreateDisposition.CREATE_NEVER, null);
+
+    verifyTableGet();
+  }
+
+  @Test
+  public void testWriteEmpty() throws IOException {
+    onTableGet(basicTableSchema());
+
+    TableDataList dataList = new TableDataList().setTotalRows(0L);
+    onTableList(dataList);
+
+    TableReference ref = BigQueryIO
+        .parseTableSpec("project:dataset.table");
+
+    BigQueryTableInserter inserter =
+        new BigQueryTableInserter(mockClient, ref);
+
+    inserter.getOrCreateTable(BigQueryIO.Write.WriteDisposition.WRITE_EMPTY,
+        BigQueryIO.Write.CreateDisposition.CREATE_NEVER, null);
+
+    verifyTableGet();
+    verifyTabledataList();
+  }
+
+  @Test
+  public void testWriteEmptyFail() throws IOException {
+    thrown.expect(IOException.class);
+
+    onTableGet(basicTableSchema());
+
+    TableDataList dataList = rawDataList(rawRow("Arthur", 42));
+    onTableList(dataList);
+
+    TableReference ref = BigQueryIO
+        .parseTableSpec("project:dataset.table");
+
+    BigQueryTableInserter inserter =
+        new BigQueryTableInserter(mockClient, ref);
+
+    try {
+      inserter.getOrCreateTable(BigQueryIO.Write.WriteDisposition.WRITE_EMPTY,
+          BigQueryIO.Write.CreateDisposition.CREATE_NEVER, null);
+    } finally {
+      verifyTableGet();
+      verifyTabledataList();
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtilsTest.java
new file mode 100644
index 0000000000000..31bc2f9241eaf
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtilsTest.java
@@ -0,0 +1,66 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.api.services.dataflow.model.MetricStructuredName;
+import com.google.api.services.dataflow.model.MetricUpdate;
+import com.google.cloud.dataflow.sdk.util.common.Metric;
+import com.google.cloud.dataflow.sdk.util.common.Metric.DoubleMetric;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/** Unit tests for {@link CloudMetricUtils}. */
+@RunWith(JUnit4.class)
+public class CloudMetricUtilsTest {
+  private void addDoubleMetric(String name, double value, String workerId,
+                                List<Metric<?>> metrics,
+                                List<MetricUpdate> cloudMetrics) {
+    metrics.add(new DoubleMetric(name, value));
+    MetricStructuredName structuredName = new MetricStructuredName();
+    structuredName.setName(name);
+    Map<String, String> context = new HashMap<>();
+    context.put("workerId", workerId);
+    structuredName.setContext(context);
+    cloudMetrics.add(new MetricUpdate()
+        .setName(structuredName)
+        .setScalar(CloudObject.forFloat(value)));
+  }
+
+  @Test
+  public void testExtractCloudMetrics() {
+    List<Metric<?>> metrics = new ArrayList<>();
+    List<MetricUpdate> expected = new ArrayList<>();
+    String workerId = "worker-id";
+
+    addDoubleMetric("m1", 3.14, workerId, metrics, expected);
+    addDoubleMetric("m2", 2.17, workerId, metrics, expected);
+    addDoubleMetric("m3", -66.666, workerId, metrics, expected);
+
+    List<MetricUpdate> actual = CloudMetricUtils.extractCloudMetrics(metrics, workerId);
+
+    assertEquals(expected, actual);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtilsTest.java
new file mode 100644
index 0000000000000..d813a103fabb2
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtilsTest.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import com.google.api.services.dataflow.model.Source;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Map;
+
+/**
+ * Tests for {@code CloudSourceUtils}.
+ */
+@RunWith(JUnit4.class)
+public class CloudSourceUtilsTest {
+  @Test
+  public void testFlattenBaseSpecs() throws Exception {
+    // G = grandparent, P = parent, C = child.
+    CloudObject grandparent = CloudObject.forClassName("text");
+    addString(grandparent, "G", "g_g");
+    addString(grandparent, "GP", "gp_g");
+    addString(grandparent, "GC", "gc_g");
+    addString(grandparent, "GPC", "gpc_g");
+
+    CloudObject parent = CloudObject.forClassName("text");
+    addString(parent, "P", "p_p");
+    addString(parent, "PC", "pc_p");
+    addString(parent, "GP", "gp_p");
+    addString(parent, "GPC", "gpc_p");
+
+    CloudObject child = CloudObject.forClassName("text");
+    addString(child, "C", "c_c");
+    addString(child, "PC", "pc_c");
+    addString(child, "GC", "gc_c");
+    addString(child, "GPC", "gpc_c");
+
+    Source source = new Source();
+    source.setBaseSpecs(new ArrayList<Map<String, Object>>());
+    source.getBaseSpecs().add(grandparent);
+    source.getBaseSpecs().add(parent);
+    source.setSpec(child);
+    source.setCodec(makeCloudEncoding(StringUtf8Coder.class.getName()));
+
+    Source flat = CloudSourceUtils.flattenBaseSpecs(source);
+    assertNull(flat.getBaseSpecs());
+    assertEquals(
+        StringUtf8Coder.class.getName(),
+        getString(flat.getCodec(), PropertyNames.OBJECT_TYPE_NAME));
+
+    CloudObject flatSpec = CloudObject.fromSpec(flat.getSpec());
+    assertEquals("g_g", getString(flatSpec, "G"));
+    assertEquals("p_p", getString(flatSpec, "P"));
+    assertEquals("c_c", getString(flatSpec, "C"));
+    assertEquals("gp_p", getString(flatSpec, "GP"));
+    assertEquals("gc_c", getString(flatSpec, "GC"));
+    assertEquals("pc_c", getString(flatSpec, "PC"));
+    assertEquals("gpc_c", getString(flatSpec, "GPC"));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
new file mode 100644
index 0000000000000..92f9e7481558f
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
+
+import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.InputStream;
+import java.io.OutputStream;
+
+/**
+ * Tests for CoderUtils.
+ */
+@RunWith(JUnit4.class)
+public class CoderUtilsTest {
+  static class TestCoder extends AtomicCoder<Integer> {
+    public static TestCoder of() { return new TestCoder(); }
+
+    @Override
+    public void encode(Integer value, OutputStream outStream, Context context) {
+      throw new RuntimeException("not expecting to be called");
+    }
+
+    @Override
+    public Integer decode(InputStream inStream, Context context) {
+      throw new RuntimeException("not expecting to be called");
+    }
+
+    @Override
+    public boolean isDeterministic() {
+      return false;
+    }
+  }
+
+  @Test
+  public void testCreateAtomicCoders() throws Exception {
+    Assert.assertEquals(
+        BigEndianIntegerCoder.of(),
+        Serializer.deserialize(makeCloudEncoding("BigEndianIntegerCoder"), Coder.class));
+    Assert.assertEquals(
+        StringUtf8Coder.of(),
+        Serializer.deserialize(
+            makeCloudEncoding(StringUtf8Coder.class.getName()), Coder.class));
+    Assert.assertEquals(
+        VoidCoder.of(),
+        Serializer.deserialize(makeCloudEncoding("VoidCoder"), Coder.class));
+    Assert.assertEquals(
+        TestCoder.of(),
+        Serializer.deserialize(makeCloudEncoding(TestCoder.class.getName()), Coder.class));
+  }
+
+  @Test
+  public void testCreateCompositeCoders() throws Exception {
+    Assert.assertEquals(
+        IterableCoder.of(StringUtf8Coder.of()),
+        Serializer.deserialize(
+            makeCloudEncoding("IterableCoder",
+                makeCloudEncoding("StringUtf8Coder")), Coder.class));
+    Assert.assertEquals(
+        KvCoder.of(BigEndianIntegerCoder.of(), VoidCoder.of()),
+        Serializer.deserialize(
+            makeCloudEncoding(
+                "KvCoder",
+                makeCloudEncoding(BigEndianIntegerCoder.class.getName()),
+                makeCloudEncoding("VoidCoder")), Coder.class));
+    Assert.assertEquals(
+        IterableCoder.of(
+            KvCoder.of(IterableCoder.of(BigEndianIntegerCoder.of()),
+                       KvCoder.of(VoidCoder.of(),
+                                  TestCoder.of()))),
+        Serializer.deserialize(
+            makeCloudEncoding(
+                IterableCoder.class.getName(),
+                makeCloudEncoding(
+                    KvCoder.class.getName(),
+                    makeCloudEncoding(
+                        "IterableCoder",
+                        makeCloudEncoding("BigEndianIntegerCoder")),
+                    makeCloudEncoding(
+                        "KvCoder",
+                        makeCloudEncoding("VoidCoder"),
+                        makeCloudEncoding(TestCoder.class.getName())))), Coder.class));
+  }
+
+  @Test
+  public void testCreateUntypedCoders() throws Exception {
+    Assert.assertEquals(
+        IterableCoder.of(StringUtf8Coder.of()),
+        Serializer.deserialize(
+            makeCloudEncoding(
+                "kind:stream",
+                makeCloudEncoding("StringUtf8Coder")), Coder.class));
+    Assert.assertEquals(
+        KvCoder.of(BigEndianIntegerCoder.of(), VoidCoder.of()),
+        Serializer.deserialize(
+            makeCloudEncoding(
+                "kind:pair",
+                makeCloudEncoding(BigEndianIntegerCoder.class.getName()),
+                makeCloudEncoding("VoidCoder")), Coder.class));
+    Assert.assertEquals(
+        IterableCoder.of(
+            KvCoder.of(IterableCoder.of(BigEndianIntegerCoder.of()),
+                       KvCoder.of(VoidCoder.of(),
+                                  TestCoder.of()))),
+        Serializer.deserialize(
+            makeCloudEncoding(
+                "kind:stream",
+                makeCloudEncoding(
+                    "kind:pair",
+                    makeCloudEncoding(
+                        "kind:stream",
+                        makeCloudEncoding("BigEndianIntegerCoder")),
+                    makeCloudEncoding(
+                        "kind:pair",
+                        makeCloudEncoding("VoidCoder"),
+                        makeCloudEncoding(TestCoder.class.getName())))), Coder.class));
+  }
+
+  @Test
+  public void testCreateUnknownCoder() throws Exception {
+    try {
+      Serializer.deserialize(makeCloudEncoding("UnknownCoder"), Coder.class);
+      Assert.fail("should have thrown an exception");
+    } catch (Exception exn) {
+      Assert.assertThat(exn.toString(),
+                        CoreMatchers.containsString(
+                            "Unable to convert coder ID UnknownCoder to class"));
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
new file mode 100644
index 0000000000000..cae705cea5a46
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+
+import com.google.api.client.auth.oauth2.Credential;
+import com.google.api.client.util.Throwables;
+import com.google.cloud.dataflow.sdk.options.GcsOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mockito;
+
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+
+/** Test case for {@link GcsUtil}. */
+@RunWith(JUnit4.class)
+public class GcsUtilTest {
+  @Test
+  public void testGlobTranslation() {
+    assertEquals("foo", GcsUtil.globToRegexp("foo"));
+    assertEquals("fo[^/]*o", GcsUtil.globToRegexp("fo*o"));
+    assertEquals("f[^/]*o\\.[^/]", GcsUtil.globToRegexp("f*o.?"));
+    assertEquals("foo-[0-9][^/]*", GcsUtil.globToRegexp("foo-[0-9]*"));
+  }
+
+  @Test
+  public void testCreationWithDefaultOptions() {
+    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
+    pipelineOptions.setGcpCredential(Mockito.mock(Credential.class));
+    assertNotNull(pipelineOptions.getGcpCredential());
+  }
+
+  @Test
+  public void testCreationWithExecutorServiceProvided() {
+    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
+    pipelineOptions.setGcpCredential(Mockito.mock(Credential.class));
+    pipelineOptions.setExecutorService(Executors.newCachedThreadPool());
+    assertSame(pipelineOptions.getExecutorService(), pipelineOptions.getGcsUtil().executorService);
+  }
+
+  @Test
+  public void testCreationWithGcsUtilProvided() {
+    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
+    GcsUtil gcsUtil = Mockito.mock(GcsUtil.class);
+    pipelineOptions.setGcsUtil(gcsUtil);
+    assertSame(gcsUtil, pipelineOptions.getGcsUtil());
+  }
+
+  @Test
+  public void testMultipleThreadsCanCompleteOutOfOrderWithDefaultThreadPool() throws Exception {
+    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
+    ExecutorService executorService = pipelineOptions.getExecutorService();
+
+    int numThreads = 1000;
+    final CountDownLatch[] countDownLatches = new CountDownLatch[numThreads];
+    for (int i = 0; i < numThreads; i++) {
+      final int currentLatch = i;
+      countDownLatches[i] = new CountDownLatch(1);
+      executorService.execute(new Runnable() {
+        @Override
+        public void run() {
+          // Wait for latch N and then release latch N - 1
+          try {
+            countDownLatches[currentLatch].await();
+            if (currentLatch > 0) {
+              countDownLatches[currentLatch - 1].countDown();
+            }
+          } catch (InterruptedException e) {
+            throw Throwables.propagate(e);
+          }
+        }
+      });
+    }
+
+    // Release the last latch starting the chain reaction.
+    countDownLatches[countDownLatches.length - 1].countDown();
+    executorService.shutdown();
+    assertTrue("Expected tasks to complete", 
+        executorService.awaitTermination(10, TimeUnit.SECONDS));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
new file mode 100644
index 0000000000000..d482d2c4d345a
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -0,0 +1,231 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/** Unit tests for {@link GroupAlsoByWindowsDoFn}. */
+@RunWith(JUnit4.class)
+public class GroupAlsoByWindowsDoFnTest {
+  ExecutionContext execContext;
+  CounterSet counters;
+  TupleTag<KV<String, Iterable<String>>> outputTag;
+
+  @Before public void setUp() {
+    execContext = new DirectModeExecutionContext();
+    counters = new CounterSet();
+    outputTag = new TupleTag<>();
+  }
+
+  @Test public void testEmpty() throws Exception {
+    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
+        KV<String, Iterable<String>>, List> runner =
+        makeRunner(FixedWindows.<String>of(Duration.millis(10)));
+
+    runner.startBundle();
+
+    runner.finishBundle();
+
+    List<KV<String, Iterable<String>>> result = runner.getReceiver(outputTag);
+
+    assertEquals(0, result.size());
+  }
+
+  @Test public void testFixedWindows() throws Exception {
+    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
+        KV<String, Iterable<String>>, List> runner =
+        makeRunner(FixedWindows.<String>of(Duration.millis(10)));
+
+    runner.startBundle();
+
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        KV.of("k", (Iterable<WindowedValue<String>>) Arrays.asList(
+            WindowedValue.of(
+                "v1",
+                new Instant(1),
+                Arrays.asList(window(0, 10))),
+            WindowedValue.of(
+                "v2",
+                new Instant(2),
+                Arrays.asList(window(0, 10))),
+            WindowedValue.of(
+                "v3",
+                new Instant(13),
+                Arrays.asList(window(10, 20)))))));
+
+    runner.finishBundle();
+
+    List<WindowedValue<KV<String, Iterable<String>>>> result = runner.getReceiver(outputTag);
+
+    assertEquals(2, result.size());
+
+    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
+    assertEquals("k", item0.getValue().getKey());
+    assertThat(item0.getValue().getValue(), Matchers.contains("v1", "v2"));
+    assertEquals(new Instant(9), item0.getTimestamp());
+    assertThat(item0.getWindows(),
+        Matchers.contains(window(0, 10)));
+
+    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
+    assertEquals("k", item1.getValue().getKey());
+    assertThat(item1.getValue().getValue(), Matchers.contains("v3"));
+    assertEquals(new Instant(19), item1.getTimestamp());
+    assertThat(item1.getWindows(),
+        Matchers.contains(window(10, 20)));
+  }
+
+  @Test public void testSlidingWindows() throws Exception {
+    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
+        KV<String, Iterable<String>>, List> runner =
+        makeRunner(SlidingWindows.<String>of(Duration.millis(20)).every(Duration.millis(10)));
+
+    runner.startBundle();
+
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        KV.of("k", (Iterable<WindowedValue<String>>) Arrays.asList(
+            WindowedValue.of(
+                "v1",
+                new Instant(5),
+                Arrays.asList(window(-10, 10), window(0, 20))),
+            WindowedValue.of(
+                "v2",
+                new Instant(15),
+                Arrays.asList(window(0, 20), window(10, 30)))))));
+
+    runner.finishBundle();
+
+    List<WindowedValue<KV<String, Iterable<String>>>> result = runner.getReceiver(outputTag);
+
+    assertEquals(3, result.size());
+
+    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
+    assertEquals("k", item0.getValue().getKey());
+    assertThat(item0.getValue().getValue(), Matchers.contains("v1"));
+    assertEquals(new Instant(9), item0.getTimestamp());
+    assertThat(item0.getWindows(),
+        Matchers.contains(window(-10, 10)));
+
+    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
+    assertEquals("k", item1.getValue().getKey());
+    assertThat(item1.getValue().getValue(), Matchers.contains("v1", "v2"));
+    assertEquals(new Instant(19), item1.getTimestamp());
+    assertThat(item1.getWindows(),
+        Matchers.contains(window(0, 20)));
+
+    WindowedValue<KV<String, Iterable<String>>> item2 = result.get(2);
+    assertEquals("k", item2.getValue().getKey());
+    assertThat(item2.getValue().getValue(), Matchers.contains("v2"));
+    assertEquals(new Instant(29), item2.getTimestamp());
+    assertThat(item2.getWindows(),
+        Matchers.contains(window(10, 30)));
+  }
+
+  @Test public void testSessions() throws Exception {
+    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
+        KV<String, Iterable<String>>, List> runner =
+        makeRunner(Sessions.<String>withGapDuration(Duration.millis(10)));
+
+    runner.startBundle();
+
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        KV.of("k", (Iterable<WindowedValue<String>>) Arrays.asList(
+            WindowedValue.of(
+                "v1",
+                new Instant(0),
+                Arrays.asList(window(0, 10))),
+            WindowedValue.of(
+                "v2",
+                new Instant(5),
+                Arrays.asList(window(5, 15))),
+            WindowedValue.of(
+                "v3",
+                new Instant(15),
+                Arrays.asList(window(15, 25)))))));
+
+    runner.finishBundle();
+
+    List<WindowedValue<KV<String, Iterable<String>>>> result = runner.getReceiver(outputTag);
+
+    assertEquals(2, result.size());
+
+    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
+    assertEquals("k", item0.getValue().getKey());
+    assertThat(item0.getValue().getValue(), Matchers.contains("v1", "v2"));
+    assertEquals(new Instant(14), item0.getTimestamp());
+    assertThat(item0.getWindows(),
+        Matchers.contains(window(0, 15)));
+
+    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
+    assertEquals("k", item1.getValue().getKey());
+    assertThat(item1.getValue().getValue(), Matchers.contains("v3"));
+    assertEquals(new Instant(24), item1.getTimestamp());
+    assertThat(item1.getWindows(),
+        Matchers.contains(window(15, 25)));
+  }
+
+
+  private DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
+    KV<String, Iterable<String>>, List> makeRunner(
+        WindowingFn<? super String, IntervalWindow> windowingFn) {
+
+    GroupAlsoByWindowsDoFn<String, String, IntervalWindow> fn =
+        new GroupAlsoByWindowsDoFn<String, String, IntervalWindow>(
+            windowingFn, StringUtf8Coder.of());
+
+    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
+        KV<String, Iterable<String>>, List> runner =
+        DoFnRunner.createWithListOutputs(
+            PipelineOptionsFactory.create(),
+            fn,
+            PTuple.empty(),
+            outputTag,
+            new ArrayList<TupleTag<?>>(),
+            execContext.createStepContext("merge"),
+            counters.getAddCounterMutator());
+
+    return runner;
+  }
+
+  private BoundedWindow window(long start, long end) {
+    return new IntervalWindow(new Instant(start), new Instant(end));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOChannelUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOChannelUtilsTest.java
new file mode 100644
index 0000000000000..fe82972044d03
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOChannelUtilsTest.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+import java.nio.channels.WritableByteChannel;
+
+/**
+ * Tests for IOChannelUtils.
+ */
+@RunWith(JUnit4.class)
+public class IOChannelUtilsTest {
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  @Test
+  public void testShardFormatExpansion() {
+    Assert.assertEquals("output-001-of-123.txt",
+        IOChannelUtils.constructName("output", "-SSS-of-NNN",
+            ".txt",
+            1, 123));
+
+    Assert.assertEquals("out.txt/part-00042",
+        IOChannelUtils.constructName("out.txt", "/part-SSSSS", "",
+            42, 100));
+
+    Assert.assertEquals("out.txt",
+        IOChannelUtils.constructName("ou", "t.t", "xt", 1, 1));
+
+    Assert.assertEquals("out0102shard.txt",
+        IOChannelUtils.constructName("out", "SSNNshard", ".txt", 1, 2));
+
+    Assert.assertEquals("out-2/1.part-1-of-2.txt",
+        IOChannelUtils.constructName("out", "-N/S.part-S-of-N",
+            ".txt", 1, 2));
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testShardNameCollision() throws Exception {
+    File outFolder = tmpFolder.newFolder();
+    String filename = outFolder.toPath().resolve("output").toString();
+
+    WritableByteChannel output = IOChannelUtils
+        .create(filename, "", "", 2, "text");
+    Assert.fail("IOChannelUtils.create expected to fail due "
+        + "to filename collision");
+  }
+
+  @Test
+  public void testLargeShardCount() {
+    Assert.assertEquals("out-100-of-5000.txt",
+        IOChannelUtils.constructName("out", "-SS-of-NN", ".txt",
+            100, 5000));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
new file mode 100644
index 0000000000000..fbf2f70b22355
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.runners.worker.TextSource;
+import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.util.Collection;
+import java.util.Set;
+import java.util.TreeSet;
+
+/**
+ * Tests for IOFactory.
+ */
+@RunWith(JUnit4.class)
+public class IOFactoryTest {
+
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  @Test
+  public void testLocalFileIO() throws Exception {
+    // Create some files to match against.
+    File foo1 = tmpFolder.newFile("foo1");
+    foo1.createNewFile();
+    File foo2 = tmpFolder.newFile("foo2");
+    foo2.createNewFile();
+    tmpFolder.newFile("barf").createNewFile();
+
+    FileIOChannelFactory factory = new FileIOChannelFactory();
+    Collection<String> paths = factory.match(tmpFolder.getRoot() + "/f*");
+
+    Assert.assertEquals(2, paths.size());
+    Assert.assertTrue(paths.contains(foo1.getCanonicalPath()));
+    Assert.assertTrue(paths.contains(foo2.getCanonicalPath()));
+  }
+
+  @Test
+  public void testMultiFileRead() throws Exception {
+    File file1 = tmpFolder.newFile("file1");
+    FileOutputStream output = new FileOutputStream(file1);
+    output.write("1\n2".getBytes());
+    output.close();
+
+    File file2 = tmpFolder.newFile("file2");
+    output = new FileOutputStream(file2);
+    output.write("3\n4\n".getBytes());
+    output.close();
+
+    File file3 = tmpFolder.newFile("file3");
+    output = new FileOutputStream(file3);
+    output.write("5".getBytes());
+    output.close();
+
+
+    TextSource<String> source = new TextSource<>(
+        tmpFolder.getRoot() + "/file*",
+        true /* strip newlines */,
+        null, null, StringUtf8Coder.of());
+
+    Set<String> records = new TreeSet<>();
+    try (Source.SourceIterator<String> iterator = source.iterator()) {
+      while (iterator.hasNext()) {
+        records.add(iterator.next());
+      }
+    }
+
+    Assert.assertEquals(records.toString(), 5, records.size());
+    Assert.assertTrue(records.contains("1"));
+    Assert.assertTrue(records.contains("2"));
+    Assert.assertTrue(records.contains("3"));
+    Assert.assertTrue(records.contains("4"));
+    Assert.assertTrue(records.contains("5"));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/InstanceBuilderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/InstanceBuilderTest.java
new file mode 100644
index 0000000000000..18777b2aa3942
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/InstanceBuilderTest.java
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import org.hamcrest.Matchers;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests of InstanceBuilder.
+ */
+@RunWith(JUnit4.class)
+public class InstanceBuilderTest {
+
+  @Rule
+  public ExpectedException expectedEx = ExpectedException.none();
+
+  @SuppressWarnings("unused")
+  private static TupleTag createTag(String id) {
+    return new TupleTag(id);
+  }
+
+  @Test
+  public void testFullNameLookup() throws Exception {
+    TupleTag tag = InstanceBuilder.ofType(TupleTag.class)
+        .fromClassName(InstanceBuilderTest.class.getName())
+        .fromFactoryMethod("createTag")
+        .withArg(String.class, "hello world!")
+        .build();
+
+    Assert.assertEquals("hello world!", tag.getId());
+  }
+
+  @Test
+  public void testConstructor() throws Exception {
+    TupleTag tag = InstanceBuilder.ofType(TupleTag.class)
+        .withArg(String.class, "hello world!")
+        .build();
+
+    Assert.assertEquals("hello world!", tag.getId());
+  }
+
+  @Test
+  public void testBadMethod() throws Exception {
+    expectedEx.expect(RuntimeException.class);
+    expectedEx.expectMessage(
+        Matchers.containsString("Unable to find factory method"));
+
+    InstanceBuilder.ofType(String.class)
+        .fromClassName(InstanceBuilderTest.class.getName())
+        .fromFactoryMethod("nonexistantFactoryMethod")
+        .withArg(String.class, "hello")
+        .withArg(String.class, " world!")
+        .build();
+  }
+
+  @Test
+  public void testBadArgs() throws Exception {
+    expectedEx.expect(RuntimeException.class);
+    expectedEx.expectMessage(
+        Matchers.containsString("Unable to find factory method"));
+
+    InstanceBuilder.ofType(TupleTag.class)
+        .fromClassName(InstanceBuilderTest.class.getName())
+        .fromFactoryMethod("createTag")
+        .withArg(String.class, "hello")
+        .withArg(Integer.class, 42)
+        .build();
+  }
+
+  @Test
+  public void testBadReturnType() throws Exception {
+    expectedEx.expect(RuntimeException.class);
+    expectedEx.expectMessage(
+        Matchers.containsString("must be assignable to String"));
+
+    InstanceBuilder.ofType(String.class)
+        .fromClassName(InstanceBuilderTest.class.getName())
+        .fromFactoryMethod("createTag")
+        .withArg(String.class, "hello")
+        .build();
+  }
+
+  @Test
+  public void testWrongType() throws Exception {
+    expectedEx.expect(RuntimeException.class);
+    expectedEx.expectMessage(
+        Matchers.containsString("must be assignable to TupleTag"));
+
+    InstanceBuilder.ofType(TupleTag.class)
+        .fromClassName(InstanceBuilderTest.class.getName())
+        .build();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java
new file mode 100644
index 0000000000000..8ec3012da4482
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import com.google.api.services.dataflow.Dataflow;
+import com.google.api.services.dataflow.model.JobMessage;
+import com.google.api.services.dataflow.model.ListJobMessagesResponse;
+
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Tests for MonitoringUtil.
+ */
+@RunWith(JUnit4.class)
+public class MonitoringUtilTest {
+  private static final String PROJECT_ID = "someProject";
+  private static final String JOB_ID = "1234";
+
+  @Test
+  public void testGetJobMessages() throws IOException {
+    Dataflow.V1b3.Projects.Jobs.Messages mockMessages =
+        mock(Dataflow.V1b3.Projects.Jobs.Messages.class);
+
+    // Two requests are needed to get all the messages.
+    Dataflow.V1b3.Projects.Jobs.Messages.List firstRequest =
+        mock(Dataflow.V1b3.Projects.Jobs.Messages.List.class);
+    Dataflow.V1b3.Projects.Jobs.Messages.List secondRequest =
+        mock(Dataflow.V1b3.Projects.Jobs.Messages.List.class);
+
+    when(mockMessages.list(PROJECT_ID, JOB_ID))
+        .thenReturn(firstRequest)
+        .thenReturn(secondRequest);
+
+    ListJobMessagesResponse firstResponse = new ListJobMessagesResponse();
+    firstResponse.setJobMessages(new ArrayList<JobMessage>());
+    for (int i = 0; i < 100; ++i) {
+      JobMessage message = new JobMessage();
+      message.setId("message_" + i);
+      message.setTime(TimeUtil.toCloudTime(new Instant(i)));
+      firstResponse.getJobMessages().add(message);
+    }
+    String pageToken = "page_token";
+    firstResponse.setNextPageToken(pageToken);
+
+    ListJobMessagesResponse secondResponse = new ListJobMessagesResponse();
+    secondResponse.setJobMessages(new ArrayList<JobMessage>());
+    for (int i = 100; i < 150; ++i) {
+      JobMessage message = new JobMessage();
+      message.setId("message_" + i);
+      message.setTime(TimeUtil.toCloudTime(new Instant(i)));
+      secondResponse.getJobMessages().add(message);
+    }
+
+    when(firstRequest.execute()).thenReturn(firstResponse);
+    when(secondRequest.execute()).thenReturn(secondResponse);
+
+    MonitoringUtil util = new MonitoringUtil(PROJECT_ID, mockMessages);
+
+    List<JobMessage> messages = util.getJobMessages(JOB_ID, -1);
+
+    verify(secondRequest).setPageToken(pageToken);
+
+    assertEquals(150, messages.size());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PTupleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PTupleTest.java
new file mode 100644
index 0000000000000..3692411a4a751
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PTupleTest.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.nullValue;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Unit tests for {@link PTuple}. */
+@RunWith(JUnit4.class)
+public final class PTupleTest {
+  @Test
+  public void accessingNullVoidValuesShouldNotCauseExceptions() {
+    TupleTag<Void> tag = new TupleTag<Void>() {};
+    PTuple tuple = PTuple.of(tag, null);
+    assertTrue(tuple.has(tag));
+    assertThat(tuple.get(tag), is(nullValue()));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
new file mode 100644
index 0000000000000..7d923c2fcdb7f
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
@@ -0,0 +1,342 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.cloud.dataflow.sdk.testing.FastNanoClockAndSleeper;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import com.google.common.io.Files;
+import com.google.common.io.LineReader;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.channels.Channels;
+import java.nio.channels.Pipe;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+/** Tests for PackageUtil. */
+@RunWith(JUnit4.class)
+public class PackageUtilTest {
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  @Rule
+  public FastNanoClockAndSleeper fastNanoClockAndSleeper = new FastNanoClockAndSleeper();
+
+  @Mock
+  GcsUtil mockGcsUtil;
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+  }
+
+  @Test
+  public void testPackageNamingWithFileHavingExtension() throws Exception {
+    File tmpFile = tmpFolder.newFile("file.txt");
+    Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
+    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+
+    DataflowPackage target = PackageUtil.createPackage(tmpFile.getAbsolutePath(), gcsStaging, null);
+
+    assertEquals("file-SAzzqSB2zmoIgNHC9A2G0A.txt", target.getName());
+    assertEquals("storage.googleapis.com/somebucket/base/path/file-SAzzqSB2zmoIgNHC9A2G0A.txt",
+        target.getLocation());
+  }
+
+  @Test
+  public void testPackageNamingWithFileMissingExtension() throws Exception {
+    File tmpFile = tmpFolder.newFile("file");
+    Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
+    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+
+    DataflowPackage target = PackageUtil.createPackage(tmpFile.getAbsolutePath(), gcsStaging, null);
+
+    assertEquals("file-SAzzqSB2zmoIgNHC9A2G0A", target.getName());
+    assertEquals("storage.googleapis.com/somebucket/base/path/file-SAzzqSB2zmoIgNHC9A2G0A",
+        target.getLocation());
+  }
+
+  @Test
+  public void testPackageNamingWithDirectory() throws Exception {
+    File tmpDirectory = tmpFolder.newFolder("folder");
+    File tmpFile = tmpFolder.newFile("folder/file.txt");
+    Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
+    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+
+    DataflowPackage target =
+        PackageUtil.createPackage(tmpDirectory.getAbsolutePath(), gcsStaging, null);
+
+    assertEquals("folder-9MHI5fxducQ06t3IG9MC-g.zip", target.getName());
+    assertEquals("storage.googleapis.com/somebucket/base/path/folder-9MHI5fxducQ06t3IG9MC-g.zip",
+                 target.getLocation());
+  }
+
+  @Test
+  public void testPackageNamingWithFilesHavingSameContentsButDifferentNames() throws Exception {
+    tmpFolder.newFolder("folder1");
+    File tmpDirectory1 = tmpFolder.newFolder("folder1/folderA");
+    File tmpFile1 = tmpFolder.newFile("folder1/folderA/uniqueName1");
+    Files.write("This is a test!", tmpFile1, StandardCharsets.UTF_8);
+
+    tmpFolder.newFolder("folder2");
+    File tmpDirectory2 = tmpFolder.newFolder("folder2/folderA");
+    File tmpFile2 = tmpFolder.newFile("folder2/folderA/uniqueName2");
+    Files.write("This is a test!", tmpFile2, StandardCharsets.UTF_8);
+
+    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+
+    DataflowPackage target1 =
+        PackageUtil.createPackage(tmpDirectory1.getAbsolutePath(), gcsStaging, null);
+    DataflowPackage target2 =
+        PackageUtil.createPackage(tmpDirectory2.getAbsolutePath(), gcsStaging, null);
+
+    assertFalse(target1.getName().equals(target2.getName()));
+    assertFalse(target1.getLocation().equals(target2.getLocation()));
+  }
+
+  @Test
+  public void testPackageNamingWithDirectoriesHavingSameContentsButDifferentNames()
+      throws Exception {
+    tmpFolder.newFolder("folder1");
+    File tmpDirectory1 = tmpFolder.newFolder("folder1/folderA");
+    tmpFolder.newFolder("folder1/folderA/uniqueName1");
+
+    tmpFolder.newFolder("folder2");
+    File tmpDirectory2 = tmpFolder.newFolder("folder2/folderA");
+    tmpFolder.newFolder("folder2/folderA/uniqueName2");
+
+    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+
+    DataflowPackage target1 =
+        PackageUtil.createPackage(tmpDirectory1.getAbsolutePath(), gcsStaging, null);
+    DataflowPackage target2 =
+        PackageUtil.createPackage(tmpDirectory2.getAbsolutePath(), gcsStaging, null);
+
+    assertFalse(target1.getName().equals(target2.getName()));
+    assertFalse(target1.getLocation().equals(target2.getLocation()));
+  }
+
+  @Test
+  public void testPackageUploadWithFileSucceeds() throws Exception {
+    Pipe pipe = Pipe.open();
+    File tmpFile = tmpFolder.newFile("file.txt");
+    Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
+    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(-1L);
+    when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
+
+    List<DataflowPackage> targets = PackageUtil.stageClasspathElementsToGcs(mockGcsUtil,
+        ImmutableList.of(tmpFile.getAbsolutePath()), gcsStaging);
+    DataflowPackage target = Iterables.getOnlyElement(targets);
+
+    verify(mockGcsUtil).fileSize(any(GcsPath.class));
+    verify(mockGcsUtil).create(any(GcsPath.class), anyString());
+    verifyNoMoreInteractions(mockGcsUtil);
+
+    assertEquals("file-SAzzqSB2zmoIgNHC9A2G0A.txt", target.getName());
+    assertEquals("storage.googleapis.com/somebucket/base/path/file-SAzzqSB2zmoIgNHC9A2G0A.txt",
+        target.getLocation());
+    assertEquals("This is a test!",
+        new LineReader(Channels.newReader(pipe.source(), "UTF-8")).readLine());
+  }
+
+  @Test
+  public void testPackageUploadWithDirectorySucceeds() throws Exception {
+    Pipe pipe = Pipe.open();
+    File tmpDirectory = tmpFolder.newFolder("folder");
+    tmpFolder.newFolder("folder/empty_directory");
+    tmpFolder.newFolder("folder/directory");
+    File tmpFile1 = tmpFolder.newFile("folder/file.txt");
+    File tmpFile2 = tmpFolder.newFile("folder/directory/file.txt");
+    Files.write("This is a test!", tmpFile1, StandardCharsets.UTF_8);
+    Files.write("This is also a test!", tmpFile2, StandardCharsets.UTF_8);
+
+    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(-1L);
+    when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
+
+    PackageUtil.stageClasspathElementsToGcs(mockGcsUtil,
+        ImmutableList.of(tmpDirectory.getAbsolutePath()), gcsStaging);
+
+    verify(mockGcsUtil).fileSize(any(GcsPath.class));
+    verify(mockGcsUtil).create(any(GcsPath.class), anyString());
+    verifyNoMoreInteractions(mockGcsUtil);
+
+    ZipInputStream inputStream = new ZipInputStream(Channels.newInputStream(pipe.source()));
+    List<String> zipEntryNames = new ArrayList<>();
+    for (ZipEntry entry = inputStream.getNextEntry(); entry != null;
+        entry = inputStream.getNextEntry()) {
+      zipEntryNames.add(entry.getName());
+    }
+    assertTrue(CoreMatchers.hasItems("directory/file.txt", "empty_directory/", "file.txt").matches(
+        zipEntryNames));
+  }
+
+  @Test
+  public void testPackageUploadWithEmptyDirectorySucceeds() throws Exception {
+    Pipe pipe = Pipe.open();
+    File tmpDirectory = tmpFolder.newFolder("folder");
+
+    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(-1L);
+    when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
+
+    List<DataflowPackage> targets = PackageUtil.stageClasspathElementsToGcs(mockGcsUtil,
+        ImmutableList.of(tmpDirectory.getAbsolutePath()), gcsStaging);
+    DataflowPackage target = Iterables.getOnlyElement(targets);
+
+    verify(mockGcsUtil).fileSize(any(GcsPath.class));
+    verify(mockGcsUtil).create(any(GcsPath.class), anyString());
+    verifyNoMoreInteractions(mockGcsUtil);
+
+    assertEquals("folder-wstW9MW_ZW-soJhufroDCA.zip", target.getName());
+    assertEquals("storage.googleapis.com/somebucket/base/path/folder-wstW9MW_ZW-soJhufroDCA.zip",
+        target.getLocation());
+    assertNull(new ZipInputStream(Channels.newInputStream(pipe.source())).getNextEntry());
+  }
+
+  @Test(expected = RuntimeException.class)
+  public void testPackageUploadFailsWhenIOExceptionThrown() throws Exception {
+    File tmpFile = tmpFolder.newFile("file.txt");
+    Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
+    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(-1L);
+    when(mockGcsUtil.create(any(GcsPath.class), anyString()))
+        .thenThrow(new IOException("Upload error"));
+
+    try {
+      PackageUtil.stageClasspathElementsToGcs(mockGcsUtil,
+          ImmutableList.of(tmpFile.getAbsolutePath()), gcsStaging, fastNanoClockAndSleeper);
+    } finally {
+      verify(mockGcsUtil).fileSize(any(GcsPath.class));
+      verify(mockGcsUtil, times(5)).create(any(GcsPath.class), anyString());
+      verifyNoMoreInteractions(mockGcsUtil);
+    }
+  }
+
+  @Test
+  public void testPackageUploadEventuallySucceeds() throws Exception {
+    Pipe pipe = Pipe.open();
+    File tmpFile = tmpFolder.newFile("file.txt");
+    Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
+    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(-1L);
+    when(mockGcsUtil.create(any(GcsPath.class), anyString()))
+        .thenThrow(new IOException("410 Gone")) // First attempt fails
+        .thenReturn(pipe.sink());               // second attempt succeeds
+
+    try {
+      PackageUtil.stageClasspathElementsToGcs(mockGcsUtil,
+                                              ImmutableList.of(tmpFile.getAbsolutePath()),
+                                              gcsStaging,
+                                              fastNanoClockAndSleeper);
+    } finally {
+      verify(mockGcsUtil).fileSize(any(GcsPath.class));
+      verify(mockGcsUtil, times(2)).create(any(GcsPath.class), anyString());
+      verifyNoMoreInteractions(mockGcsUtil);
+    }
+  }
+
+  @Test
+  public void testPackageUploadIsSkippedWhenFileAlreadyExists() throws Exception {
+    File tmpFile = tmpFolder.newFile("file.txt");
+    Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
+    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(tmpFile.length());
+
+    PackageUtil.stageClasspathElementsToGcs(mockGcsUtil,
+        ImmutableList.of(tmpFile.getAbsolutePath()), gcsStaging);
+
+    verify(mockGcsUtil).fileSize(any(GcsPath.class));
+    verifyNoMoreInteractions(mockGcsUtil);
+  }
+
+  @Test
+  public void testPackageUploadIsNotSkippedWhenSizesAreDifferent() throws Exception {
+    Pipe pipe = Pipe.open();
+    File tmpDirectory = tmpFolder.newFolder("folder");
+    tmpFolder.newFolder("folder/empty_directory");
+    tmpFolder.newFolder("folder/directory");
+    File tmpFile1 = tmpFolder.newFile("folder/file.txt");
+    File tmpFile2 = tmpFolder.newFile("folder/directory/file.txt");
+    Files.write("This is a test!", tmpFile1, StandardCharsets.UTF_8);
+    Files.write("This is also a test!", tmpFile2, StandardCharsets.UTF_8);
+    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(Long.MAX_VALUE);
+    when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
+
+    PackageUtil.stageClasspathElementsToGcs(mockGcsUtil,
+        ImmutableList.of(tmpDirectory.getAbsolutePath()), gcsStaging);
+
+    verify(mockGcsUtil).fileSize(any(GcsPath.class));
+    verify(mockGcsUtil).create(any(GcsPath.class), anyString());
+    verifyNoMoreInteractions(mockGcsUtil);
+  }
+
+  @Test
+  public void testPackageUploadWithExplicitPackageName() throws Exception {
+    Pipe pipe = Pipe.open();
+    File tmpFile = tmpFolder.newFile("file.txt");
+    Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
+    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    final String overriddenName = "alias.txt";
+
+    when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(-1L);
+    when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
+
+    List<DataflowPackage> targets = PackageUtil.stageClasspathElementsToGcs(mockGcsUtil,
+        ImmutableList.of(overriddenName + "=" + tmpFile.getAbsolutePath()), gcsStaging);
+    DataflowPackage target = Iterables.getOnlyElement(targets);
+
+    verify(mockGcsUtil).fileSize(any(GcsPath.class));
+    verify(mockGcsUtil).create(any(GcsPath.class), anyString());
+    verifyNoMoreInteractions(mockGcsUtil);
+
+    assertEquals(overriddenName, target.getName());
+    assertEquals("storage.googleapis.com/somebucket/base/path/file-SAzzqSB2zmoIgNHC9A2G0A.txt",
+        target.getLocation());
+  }
+
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializerTest.java
new file mode 100644
index 0000000000000..45924560630ba
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializerTest.java
@@ -0,0 +1,234 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertNotNull;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyInt;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Mockito.atLeastOnce;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+import com.google.api.client.auth.oauth2.Credential;
+import com.google.api.client.http.HttpRequest;
+import com.google.api.client.http.HttpResponse;
+import com.google.api.client.http.HttpResponseException;
+import com.google.api.client.http.HttpTransport;
+import com.google.api.client.http.LowLevelHttpRequest;
+import com.google.api.client.http.LowLevelHttpResponse;
+import com.google.api.client.json.JsonFactory;
+import com.google.api.client.json.jackson2.JacksonFactory;
+import com.google.api.client.util.NanoClock;
+import com.google.api.client.util.Sleeper;
+import com.google.api.services.storage.Storage;
+
+import org.hamcrest.Matchers;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+import java.io.IOException;
+import java.security.PrivateKey;
+
+/**
+ * Tests for RetryHttpRequestInitializer.
+ */
+@RunWith(JUnit4.class)
+public class RetryHttpRequestInitializerTest {
+
+  @Mock private Credential mockCredential;
+  @Mock private PrivateKey mockPrivateKey;
+  @Mock private LowLevelHttpRequest mockLowLevelRequest;
+  @Mock private LowLevelHttpResponse mockLowLevelResponse;
+
+  private final JsonFactory jsonFactory = JacksonFactory.getDefaultInstance();
+  private Storage storage;
+
+  // Used to test retrying a request more than the default 10 times.
+  static class MockNanoClock implements NanoClock {
+    private int timesMs[] = {500, 750, 1125, 1688, 2531, 3797, 5695, 8543,
+        12814, 19222, 28833, 43249, 64873, 97310, 145965, 218945, 328420};
+    private int i = 0;
+
+    @Override
+    public long nanoTime() {
+      return timesMs[i++ / 2] * 1000000;
+    }
+  }
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+
+    HttpTransport lowLevelTransport = new HttpTransport() {
+      @Override
+      protected LowLevelHttpRequest buildRequest(String method, String url)
+          throws IOException {
+        return mockLowLevelRequest;
+      }
+    };
+
+    // Retry initializer will pass through to credential, since we can have
+    // only a single HttpRequestInitializer, and we use multiple Credential
+    // types in the SDK, not all of which allow for retry configuration.
+    RetryHttpRequestInitializer initializer = new RetryHttpRequestInitializer(
+        mockCredential, new MockNanoClock(), new Sleeper() {
+          @Override
+          public void sleep(long millis) throws InterruptedException {}
+        });
+    storage = new Storage.Builder(lowLevelTransport, jsonFactory, initializer)
+        .setApplicationName("test").build();
+  }
+
+  @After
+  public void tearDown() {
+    verifyNoMoreInteractions(mockPrivateKey);
+    verifyNoMoreInteractions(mockLowLevelRequest);
+    verifyNoMoreInteractions(mockCredential);
+  }
+
+  @Test
+  public void testBasicOperation() throws IOException {
+    when(mockLowLevelRequest.execute())
+        .thenReturn(mockLowLevelResponse);
+    when(mockLowLevelResponse.getStatusCode())
+        .thenReturn(200);
+
+    Storage.Buckets.Get result = storage.buckets().get("test");
+    HttpResponse response = result.executeUnparsed();
+    assertNotNull(response);
+
+    verify(mockCredential).initialize(any(HttpRequest.class));
+    verify(mockLowLevelRequest, atLeastOnce())
+        .addHeader(anyString(), anyString());
+    verify(mockLowLevelRequest).setTimeout(anyInt(), anyInt());
+    verify(mockLowLevelRequest).execute();
+    verify(mockLowLevelResponse).getStatusCode();
+  }
+
+  /**
+   * Tests that a non-retriable error is not retried.
+   */
+  @Test
+  public void testErrorCodeForbidden() throws IOException {
+    when(mockLowLevelRequest.execute())
+        .thenReturn(mockLowLevelResponse);
+    when(mockLowLevelResponse.getStatusCode())
+        .thenReturn(403)  // Non-retryable error.
+        .thenReturn(200); // Shouldn't happen.
+
+    try {
+      Storage.Buckets.Get result = storage.buckets().get("test");
+      HttpResponse response = result.executeUnparsed();
+      assertNotNull(response);
+    } catch (HttpResponseException e) {
+      Assert.assertThat(e.getMessage(), Matchers.containsString("403"));
+    }
+
+    verify(mockCredential).initialize(any(HttpRequest.class));
+    verify(mockLowLevelRequest, atLeastOnce())
+        .addHeader(anyString(), anyString());
+    verify(mockLowLevelRequest).setTimeout(anyInt(), anyInt());
+    verify(mockLowLevelRequest).execute();
+    verify(mockLowLevelResponse).getStatusCode();
+  }
+
+  /**
+   * Tests that a retriable error is retried.
+   */
+  @Test
+  public void testRetryableError() throws IOException {
+    when(mockLowLevelRequest.execute())
+        .thenReturn(mockLowLevelResponse)
+        .thenReturn(mockLowLevelResponse)
+        .thenReturn(mockLowLevelResponse);
+    when(mockLowLevelResponse.getStatusCode())
+        .thenReturn(503)  // Retryable
+        .thenReturn(429)  // We also retry on 429 Too Many Requests.
+        .thenReturn(200);
+
+    Storage.Buckets.Get result = storage.buckets().get("test");
+    HttpResponse response = result.executeUnparsed();
+    assertNotNull(response);
+
+    verify(mockCredential).initialize(any(HttpRequest.class));
+    verify(mockLowLevelRequest, atLeastOnce())
+        .addHeader(anyString(), anyString());
+    verify(mockLowLevelRequest, times(3)).setTimeout(anyInt(), anyInt());
+    verify(mockLowLevelRequest, times(3)).execute();
+    verify(mockLowLevelResponse, times(3)).getStatusCode();
+  }
+
+  /**
+   * Tests that an IOException is retried.
+   */
+  @Test
+  public void testThrowIOException() throws IOException {
+    when(mockLowLevelRequest.execute())
+        .thenThrow(new IOException("Fake Error"))
+        .thenReturn(mockLowLevelResponse);
+    when(mockLowLevelResponse.getStatusCode())
+        .thenReturn(200);
+
+    Storage.Buckets.Get result = storage.buckets().get("test");
+    HttpResponse response = result.executeUnparsed();
+    assertNotNull(response);
+
+    verify(mockCredential).initialize(any(HttpRequest.class));
+    verify(mockLowLevelRequest, atLeastOnce())
+        .addHeader(anyString(), anyString());
+    verify(mockLowLevelRequest, times(2)).setTimeout(anyInt(), anyInt());
+    verify(mockLowLevelRequest, times(2)).execute();
+    verify(mockLowLevelResponse).getStatusCode();
+  }
+
+  /**
+   * Tests that a retryable error is retried enough times.
+   */
+  @Test
+  public void testRetryableErrorRetryEnoughTimes() throws IOException {
+    when(mockLowLevelRequest.execute()).thenReturn(mockLowLevelResponse);
+    final int retries = 10;
+    when(mockLowLevelResponse.getStatusCode()).thenAnswer(new Answer<Integer>(){
+      int n = 0;
+      @Override
+      public Integer answer(InvocationOnMock invocation) {
+        return (n++ < retries - 1) ? 503 : 200;
+      }});
+
+    Storage.Buckets.Get result = storage.buckets().get("test");
+    HttpResponse response = result.executeUnparsed();
+    assertNotNull(response);
+
+    verify(mockCredential).initialize(any(HttpRequest.class));
+    verify(mockLowLevelRequest, atLeastOnce()).addHeader(anyString(),
+        anyString());
+    verify(mockLowLevelRequest, times(retries)).setTimeout(anyInt(), anyInt());
+    verify(mockLowLevelRequest, times(retries)).execute();
+    verify(mockLowLevelResponse, times(retries)).getStatusCode();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
new file mode 100644
index 0000000000000..90f10cdc97134
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import org.hamcrest.CoreMatchers;
+import org.hamcrest.core.IsInstanceOf;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+
+/**
+ * Tests for SerializableUtils.
+ */
+@RunWith(JUnit4.class)
+public class SerializableUtilsTest {
+  static class TestClass implements Serializable {
+    final String stringValue;
+    final int intValue;
+
+    public TestClass(String stringValue, int intValue) {
+      this.stringValue = stringValue;
+      this.intValue = intValue;
+    }
+  }
+
+  @Test
+  public void testTranscode() {
+    String stringValue = "hi bob";
+    int intValue = 42;
+
+    TestClass testObject = new TestClass(stringValue, intValue);
+
+    Object copy =
+        SerializableUtils.deserializeFromByteArray(
+            SerializableUtils.serializeToByteArray(testObject),
+            "a TestObject");
+
+    Assert.assertThat(copy, new IsInstanceOf(TestClass.class));
+    TestClass testCopy = (TestClass) copy;
+
+    Assert.assertEquals(stringValue, testCopy.stringValue);
+    Assert.assertEquals(intValue, testCopy.intValue);
+  }
+
+  @Test
+  public void testDeserializationError() {
+    try {
+      SerializableUtils.deserializeFromByteArray(
+          "this isn't legal".getBytes(),
+          "a bogus string");
+      Assert.fail("should have thrown an exception");
+    } catch (Exception exn) {
+      Assert.assertThat(exn.toString(),
+                        CoreMatchers.containsString(
+                            "unable to deserialize a bogus string"));
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializerTest.java
new file mode 100644
index 0000000000000..40e2cc00f650d
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializerTest.java
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
+import static com.google.cloud.dataflow.sdk.util.Structs.addDouble;
+import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonTypeInfo;
+
+import org.hamcrest.Matchers;
+import org.junit.Assert;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests Serializer implementation.
+ */
+@RunWith(JUnit4.class)
+@Ignore
+public class SerializerTest {
+  /**
+   * A POJO to use for testing serialization.
+   */
+  @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY,
+      property = PropertyNames.OBJECT_TYPE_NAME)
+  public static class TestRecord {
+    // TODO: When we apply property name typing to all non-final classes, the
+    // annotation on this class should be removed.
+    public String name;
+    public boolean ok;
+    public int value;
+    public double dValue;
+  }
+
+  @Test
+  public void testStatefulDeserialization() {
+    CloudObject object = CloudObject.forClass(TestRecord.class);
+
+    addString(object, "name", "foobar");
+    addBoolean(object, "ok", true);
+    addLong(object, "value", 42L);
+    addDouble(object, "dValue", .25);
+
+    TestRecord record = Serializer.deserialize(object, TestRecord.class);
+    Assert.assertEquals("foobar", record.name);
+    Assert.assertEquals(true, record.ok);
+    Assert.assertEquals(42L, record.value);
+    Assert.assertEquals(0.25, record.dValue, 0.0001);
+  }
+
+  private static class InjectedTestRecord {
+    private final String n;
+    private final int v;
+
+    public InjectedTestRecord(
+        @JsonProperty("name") String name,
+        @JsonProperty("value") int value) {
+      this.n = name;
+      this.v = value;
+    }
+
+    public String getName() {
+      return n;
+    }
+    public int getValue() {
+      return v;
+    }
+  }
+
+  @Test
+  public void testDeserializationInjection() {
+    CloudObject object = CloudObject.forClass(InjectedTestRecord.class);
+    addString(object, "name", "foobar");
+    addLong(object, "value", 42L);
+
+    InjectedTestRecord record =
+        Serializer.deserialize(object, InjectedTestRecord.class);
+
+    Assert.assertEquals("foobar", record.getName());
+    Assert.assertEquals(42L, record.getValue());
+  }
+
+  private static class FactoryInjectedTestRecord {
+    @JsonCreator
+    public static FactoryInjectedTestRecord of(
+        @JsonProperty("name") String name,
+        @JsonProperty("value") int value) {
+      return new FactoryInjectedTestRecord(name, value);
+    }
+
+    private final String n;
+    private final int v;
+
+    private FactoryInjectedTestRecord(String name, int value) {
+      this.n = name;
+      this.v = value;
+    }
+
+    public String getName() {
+      return n;
+    }
+    public int getValue() {
+      return v;
+    }
+  }
+
+  @Test
+  public void testDeserializationFactoryInjection() {
+    CloudObject object = CloudObject.forClass(FactoryInjectedTestRecord.class);
+    addString(object, "name", "foobar");
+    addLong(object, "value", 42L);
+
+    FactoryInjectedTestRecord record =
+        Serializer.deserialize(object, FactoryInjectedTestRecord.class);
+    Assert.assertEquals("foobar", record.getName());
+    Assert.assertEquals(42L, record.getValue());
+  }
+
+  private static class DerivedTestRecord extends TestRecord {
+    public String derived;
+  }
+
+  @Test
+  public void testSubclassDeserialization() {
+    CloudObject object = CloudObject.forClass(DerivedTestRecord.class);
+
+    addString(object, "name", "foobar");
+    addBoolean(object, "ok", true);
+    addLong(object, "value", 42L);
+    addDouble(object, "dValue", .25);
+    addString(object, "derived", "baz");
+
+    TestRecord result = Serializer.deserialize(object, TestRecord.class);
+    Assert.assertThat(result, Matchers.instanceOf(DerivedTestRecord.class));
+
+    DerivedTestRecord record = (DerivedTestRecord) result;
+    Assert.assertEquals("foobar", record.name);
+    Assert.assertEquals(true, record.ok);
+    Assert.assertEquals(42L, record.value);
+    Assert.assertEquals(0.25, record.dValue, 0.0001);
+    Assert.assertEquals("baz", record.derived);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
new file mode 100644
index 0000000000000..94c44c707d0f3
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.cloud.dataflow.sdk.util.WindowUtils.windowToString;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/** Unit tests for {@link StreamingGroupAlsoByWindowsDoFn}. */
+@RunWith(JUnit4.class)
+public class StreamingGroupAlsoByWindowsDoFnTest {
+  ExecutionContext execContext;
+  CounterSet counters;
+  TupleTag<KV<String, Iterable<String>>> outputTag;
+
+  @Before public void setUp() {
+    execContext = new DirectModeExecutionContext();
+    counters = new CounterSet();
+    outputTag = new TupleTag<>();
+  }
+
+  @Test public void testEmpty() throws Exception {
+    DoFnRunner<TimerOrElement<KV<String, String>>,
+        KV<String, Iterable<String>>, List> runner =
+        makeRunner(FixedWindows.<String>of(Duration.millis(10)));
+
+    runner.startBundle();
+
+    runner.finishBundle();
+
+    List<KV<String, Iterable<String>>> result = runner.getReceiver(outputTag);
+
+    assertEquals(0, result.size());
+  }
+
+  @Test public void testFixedWindows() throws Exception {
+    DoFnRunner<TimerOrElement<KV<String, String>>,
+        KV<String, Iterable<String>>, List> runner =
+        makeRunner(FixedWindows.<String>of(Duration.millis(10)));
+
+    Coder<IntervalWindow> windowCoder = FixedWindows.<String>of(Duration.millis(10)).windowCoder();
+
+    runner.startBundle();
+
+    runner.processElement(WindowedValue.of(
+        TimerOrElement.element(KV.of("k", "v1")),
+        new Instant(1),
+        Arrays.asList(window(0, 10))));
+
+    runner.processElement(WindowedValue.of(
+        TimerOrElement.element(KV.of("k", "v2")),
+        new Instant(2),
+        Arrays.asList(window(0, 10))));
+
+    runner.processElement(WindowedValue.of(
+        TimerOrElement.element(KV.of("k", "v0")),
+        new Instant(0),
+        Arrays.asList(window(0, 10))));
+
+    runner.processElement(WindowedValue.of(
+        TimerOrElement.element(KV.of("k", "v3")),
+        new Instant(13),
+        Arrays.asList(window(10, 20))));
+
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        TimerOrElement.<KV<String, String>>timer(
+            windowToString((IntervalWindow) window(0, 10), windowCoder),
+            new Instant(9), "k")));
+
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        TimerOrElement.<KV<String, String>>timer(
+            windowToString((IntervalWindow) window(10, 20), windowCoder),
+            new Instant(19), "k")));
+
+    runner.finishBundle();
+
+    List<WindowedValue<KV<String, Iterable<String>>>> result = runner.getReceiver(outputTag);
+
+    assertEquals(2, result.size());
+
+    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
+    assertEquals("k", item0.getValue().getKey());
+    assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1", "v2"));
+    assertEquals(new Instant(9), item0.getTimestamp());
+    assertThat(item0.getWindows(), Matchers.contains(window(0, 10)));
+
+    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
+    assertEquals("k", item1.getValue().getKey());
+    assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v3"));
+    assertEquals(new Instant(19), item1.getTimestamp());
+    assertThat(item1.getWindows(), Matchers.contains(window(10, 20)));
+  }
+
+  @Test public void testSlidingWindows() throws Exception {
+    DoFnRunner<TimerOrElement<KV<String, String>>,
+        KV<String, Iterable<String>>, List> runner =
+        makeRunner(SlidingWindows.<String>of(Duration.millis(20)).every(Duration.millis(10)));
+
+    Coder<IntervalWindow> windowCoder =
+        SlidingWindows.<String>of(Duration.millis(10)).every(Duration.millis(10)).windowCoder();
+
+    runner.startBundle();
+
+    runner.processElement(WindowedValue.of(
+        TimerOrElement.element(KV.of("k", "v1")),
+        new Instant(5),
+        Arrays.asList(window(-10, 10), window(0, 20))));
+
+    runner.processElement(WindowedValue.of(
+        TimerOrElement.element(KV.of("k", "v0")),
+        new Instant(2),
+        Arrays.asList(window(-10, 10), window(0, 20))));
+
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        TimerOrElement.<KV<String, String>>timer(
+            windowToString((IntervalWindow) window(-10, 10), windowCoder),
+            new Instant(9), "k")));
+
+    runner.processElement(WindowedValue.of(
+        TimerOrElement.element(KV.of("k", "v2")),
+        new Instant(5),
+        Arrays.asList(window(0, 20), window(10, 30))));
+
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        TimerOrElement.<KV<String, String>>timer(
+            windowToString((IntervalWindow) window(0, 20), windowCoder),
+            new Instant(19), "k")));
+
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        TimerOrElement.<KV<String, String>>timer(
+            windowToString((IntervalWindow) window(10, 30), windowCoder),
+            new Instant(29), "k")));
+
+    runner.finishBundle();
+
+    List<WindowedValue<KV<String, Iterable<String>>>> result = runner.getReceiver(outputTag);
+
+    assertEquals(3, result.size());
+
+    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
+    assertEquals("k", item0.getValue().getKey());
+    assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1"));
+    assertEquals(new Instant(9), item0.getTimestamp());
+    assertThat(item0.getWindows(), Matchers.contains(window(-10, 10)));
+
+    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
+    assertEquals("k", item1.getValue().getKey());
+    assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1", "v2"));
+    assertEquals(new Instant(19), item1.getTimestamp());
+    assertThat(item1.getWindows(), Matchers.contains(window(0, 20)));
+
+    WindowedValue<KV<String, Iterable<String>>> item2 = result.get(2);
+    assertEquals("k", item2.getValue().getKey());
+    assertThat(item2.getValue().getValue(), Matchers.containsInAnyOrder("v2"));
+    assertEquals(new Instant(29), item2.getTimestamp());
+    assertThat(item2.getWindows(), Matchers.contains(window(10, 30)));
+  }
+
+  @Test public void testSessions() throws Exception {
+    DoFnRunner<TimerOrElement<KV<String, String>>,
+        KV<String, Iterable<String>>, List> runner =
+        makeRunner(Sessions.<String>withGapDuration(Duration.millis(10)));
+
+    Coder<IntervalWindow> windowCoder =
+        Sessions.<String>withGapDuration(Duration.millis(10)).windowCoder();
+
+    runner.startBundle();
+
+    runner.processElement(WindowedValue.of(
+        TimerOrElement.element(KV.of("k", "v1")),
+        new Instant(0),
+        Arrays.asList(window(0, 10))));
+
+    runner.processElement(WindowedValue.of(
+        TimerOrElement.element(KV.of("k", "v2")),
+        new Instant(5),
+        Arrays.asList(window(5, 15))));
+
+    runner.processElement(WindowedValue.of(
+        TimerOrElement.element(KV.of("k", "v3")),
+        new Instant(15),
+        Arrays.asList(window(15, 25))));
+
+    runner.processElement(WindowedValue.of(
+        TimerOrElement.element(KV.of("k", "v0")),
+        new Instant(3),
+        Arrays.asList(window(3, 13))));
+
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        TimerOrElement.<KV<String, String>>timer(
+            windowToString((IntervalWindow) window(0, 15), windowCoder),
+            new Instant(14), "k")));
+
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        TimerOrElement.<KV<String, String>>timer(
+            windowToString((IntervalWindow) window(15, 25), windowCoder),
+            new Instant(24), "k")));
+
+    runner.finishBundle();
+
+    List<WindowedValue<KV<String, Iterable<String>>>> result = runner.getReceiver(outputTag);
+
+    assertEquals(2, result.size());
+
+    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
+    assertEquals("k", item0.getValue().getKey());
+    assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1", "v2"));
+    assertEquals(new Instant(14), item0.getTimestamp());
+    assertThat(item0.getWindows(), Matchers.contains(window(0, 15)));
+
+    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
+    assertEquals("k", item1.getValue().getKey());
+    assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v3"));
+    assertEquals(new Instant(24), item1.getTimestamp());
+    assertThat(item1.getWindows(), Matchers.contains(window(15, 25)));
+  }
+
+
+  private DoFnRunner<TimerOrElement<KV<String, String>>,
+    KV<String, Iterable<String>>, List> makeRunner(
+        WindowingFn<? super String, IntervalWindow> windowingStrategy) {
+
+    StreamingGroupAlsoByWindowsDoFn<String, String, Iterable<String>, IntervalWindow> fn =
+        StreamingGroupAlsoByWindowsDoFn.create(windowingStrategy, StringUtf8Coder.of());
+
+    DoFnRunner<TimerOrElement<KV<String, String>>,
+        KV<String, Iterable<String>>, List> runner =
+        DoFnRunner.createWithListOutputs(
+            PipelineOptionsFactory.create(),
+            fn,
+            PTuple.empty(),
+            outputTag,
+            new ArrayList<TupleTag<?>>(),
+            execContext.createStepContext("merge"),
+            counters.getAddCounterMutator());
+
+    return runner;
+  }
+
+  private BoundedWindow window(long start, long end) {
+    return new IntervalWindow(new Instant(start), new Instant(end));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java
new file mode 100644
index 0000000000000..bf1a3193b7e3c
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Tests for StringUtils.
+ */
+@RunWith(JUnit4.class)
+public class StringUtilsTest {
+  @Test
+  public void testTranscodeEmptyByteArray() {
+    byte[] bytes = { };
+    String string = "";
+    assertEquals(string, StringUtils.byteArrayToJsonString(bytes));
+    assertArrayEquals(bytes, StringUtils.jsonStringToByteArray(string));
+  }
+
+  @Test
+  public void testTranscodeMixedByteArray() {
+    byte[] bytes = {
+      0, 5, 12, 16, 31, 32, 65, 66, 126, 127, (byte) 128, (byte) 255, 67, 0 };
+    String string = "%00%05%0c%10%1f AB~%7f%80%ffC%00";
+    assertEquals(string, StringUtils.byteArrayToJsonString(bytes));
+    assertArrayEquals(bytes, StringUtils.jsonStringToByteArray(string));
+  }
+
+  /**
+   * Inner class for simple name test.
+   */
+  private class EmbeddedDoFn {
+    // Returns an anonymous inner class.
+    private EmbeddedDoFn getEmbedded() {
+      return new EmbeddedDoFn(){};
+    }
+  }
+
+  @Test
+  public void testSimpleName() {
+    assertEquals("Embedded",
+        StringUtils.approximateSimpleName(EmbeddedDoFn.class));
+  }
+
+  @Test
+  public void testAnonSimpleName() {
+    EmbeddedDoFn anon = new EmbeddedDoFn(){};
+
+    Pattern p = Pattern.compile("StringUtilsTest\\$[0-9]+");
+    Matcher m = p.matcher(StringUtils.approximateSimpleName(anon.getClass()));
+    assertThat(m.matches(), is(true));
+  }
+
+  @Test
+  public void testNestedSimpleName() {
+    EmbeddedDoFn fn = new EmbeddedDoFn();
+    EmbeddedDoFn anon = fn.getEmbedded();
+
+    // Expect to find "Embedded$1"
+    Pattern p = Pattern.compile("Embedded\\$[0-9]+");
+    Matcher m = p.matcher(StringUtils.approximateSimpleName(anon.getClass()));
+    assertThat(m.matches(), is(true));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StructsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StructsTest.java
new file mode 100644
index 0000000000000..9b8cc208fca9b
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StructsTest.java
@@ -0,0 +1,177 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
+import static com.google.cloud.dataflow.sdk.util.Structs.addDouble;
+import static com.google.cloud.dataflow.sdk.util.Structs.addList;
+import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
+import static com.google.cloud.dataflow.sdk.util.Structs.addLongs;
+import static com.google.cloud.dataflow.sdk.util.Structs.addNull;
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+import static com.google.cloud.dataflow.sdk.util.Structs.addStringList;
+import static com.google.cloud.dataflow.sdk.util.Structs.getBoolean;
+import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
+import static com.google.cloud.dataflow.sdk.util.Structs.getLong;
+import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+import static com.google.cloud.dataflow.sdk.util.Structs.getStrings;
+
+import org.hamcrest.Matchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Tests for Structs.
+ */
+@RunWith(JUnit4.class)
+public class StructsTest {
+  private List<Map<String, Object>> makeCloudObjects() {
+    List<Map<String, Object>> objects = new ArrayList<>();
+    {
+      CloudObject o = CloudObject.forClassName("string");
+      addString(o, "singletonStringKey", "stringValue");
+      objects.add(o);
+    }
+    {
+      CloudObject o = CloudObject.forClassName("long");
+      addLong(o, "singletonLongKey", 42L);
+      objects.add(o);
+    }
+    return objects;
+  }
+
+  private Map<String, Object> makeCloudDictionary() {
+    Map<String, Object> o = new HashMap<>();
+    addList(o, "emptyKey", Collections.<Map<String, Object>>emptyList());
+    addNull(o, "noStringsKey");
+    addString(o, "singletonStringKey", "stringValue");
+    addStringList(o, "multipleStringsKey", Arrays.asList("hi", "there", "bob"));
+    addLongs(o, "multipleLongsKey", 47L, 1L << 42, -5L);
+    addLong(o, "singletonLongKey", 42L);
+    addDouble(o, "singletonDoubleKey", 3.14);
+    addBoolean(o, "singletonBooleanKey", true);
+    addNull(o, "noObjectsKey");
+    addList(o, "multipleObjectsKey", makeCloudObjects());
+    return o;
+  }
+
+  @Test
+  public void testGetStringParameter() throws Exception {
+    Map<String, Object> o = makeCloudDictionary();
+
+    Assert.assertEquals(
+        "stringValue",
+        getString(o, "singletonStringKey"));
+    Assert.assertEquals(
+        "stringValue",
+        getString(o, "singletonStringKey", "defaultValue"));
+    Assert.assertEquals(
+        "defaultValue",
+        getString(o, "missingKey", "defaultValue"));
+
+    try {
+      getString(o, "missingKey");
+      Assert.fail("should have thrown an exception");
+    } catch (Exception exn) {
+      Assert.assertThat(exn.toString(),
+                        Matchers.containsString(
+                            "didn't find required parameter missingKey"));
+    }
+
+    try {
+      getString(o, "noStringsKey");
+      Assert.fail("should have thrown an exception");
+    } catch (Exception exn) {
+      Assert.assertThat(exn.toString(),
+                        Matchers.containsString("not a string"));
+    }
+
+    Assert.assertThat(getStrings(o, "noStringsKey", null), Matchers.<String>emptyIterable());
+    Assert.assertThat(getObject(o, "noStringsKey").keySet(), Matchers.<String>emptyIterable());
+    Assert.assertThat(getDictionary(o, "noStringsKey").keySet(), Matchers.<String>emptyIterable());
+    Assert.assertThat(getDictionary(o, "noStringsKey", null).keySet(),
+        Matchers.<String>emptyIterable());
+
+    try {
+      getString(o, "multipleStringsKey");
+      Assert.fail("should have thrown an exception");
+    } catch (Exception exn) {
+      Assert.assertThat(exn.toString(),
+                        Matchers.containsString("not a string"));
+    }
+
+    try {
+      getString(o, "emptyKey");
+      Assert.fail("should have thrown an exception");
+    } catch (Exception exn) {
+      Assert.assertThat(exn.toString(),
+                        Matchers.containsString("not a string"));
+    }
+  }
+
+  @Test
+  public void testGetBooleanParameter() throws Exception {
+    Map<String, Object> o = makeCloudDictionary();
+
+    Assert.assertEquals(
+        true,
+        getBoolean(o, "singletonBooleanKey", false));
+    Assert.assertEquals(
+        false,
+        getBoolean(o, "missingKey", false));
+
+    try {
+      getBoolean(o, "emptyKey", false);
+      Assert.fail("should have thrown an exception");
+    } catch (Exception exn) {
+      Assert.assertThat(exn.toString(),
+                        Matchers.containsString("not a boolean"));
+    }
+  }
+
+  @Test
+  public void testGetLongParameter() throws Exception {
+    Map<String, Object> o = makeCloudDictionary();
+
+    Assert.assertEquals(
+        (Long) 42L,
+        getLong(o, "singletonLongKey", 666L));
+    Assert.assertEquals(
+        (Long) 666L,
+        getLong(o, "missingKey", 666L));
+
+    try {
+      getLong(o, "emptyKey", 666L);
+      Assert.fail("should have thrown an exception");
+    } catch (Exception exn) {
+      Assert.assertThat(exn.toString(),
+                        Matchers.containsString("not an int"));
+    }
+  }
+
+  // TODO: Test builder operations.
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TimeUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TimeUtilTest.java
new file mode 100644
index 0000000000000..1faebeba7c0e4
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TimeUtilTest.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudDuration;
+import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudTime;
+import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudDuration;
+import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudTime;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Unit tests for {@link TimeUtil}. */
+@RunWith(JUnit4.class)
+public final class TimeUtilTest {
+  @Test
+  public void toCloudTimeShouldPrintTimeStrings() {
+    assertEquals("1970-01-01T00:00:00Z", toCloudTime(new Instant(0)));
+    assertEquals("1970-01-01T00:00:00.001Z", toCloudTime(new Instant(1)));
+  }
+
+  @Test
+  public void fromCloudTimeShouldParseTimeStrings() {
+    assertEquals(new Instant(0), fromCloudTime("1970-01-01T00:00:00Z"));
+    assertEquals(new Instant(1), fromCloudTime("1970-01-01T00:00:00.001Z"));
+    assertEquals(new Instant(1), fromCloudTime("1970-01-01T00:00:00.001000Z"));
+    assertEquals(new Instant(1), fromCloudTime("1970-01-01T00:00:00.001001Z"));
+    assertEquals(new Instant(1), fromCloudTime("1970-01-01T00:00:00.001000000Z"));
+    assertEquals(new Instant(1), fromCloudTime("1970-01-01T00:00:00.001000001Z"));
+    assertNull(fromCloudTime(""));
+    assertNull(fromCloudTime("1970-01-01T00:00:00"));
+  }
+
+  @Test
+  public void toCloudDurationShouldPrintDurationStrings() {
+    assertEquals("0s", toCloudDuration(Duration.ZERO));
+    assertEquals("4s", toCloudDuration(Duration.millis(4000)));
+    assertEquals("4.001s", toCloudDuration(Duration.millis(4001)));
+  }
+
+  @Test
+  public void fromCloudDurationShouldParseDurationStrings() {
+    assertEquals(Duration.millis(4000), fromCloudDuration("4s"));
+    assertEquals(Duration.millis(4001), fromCloudDuration("4.001s"));
+    assertEquals(Duration.millis(4001), fromCloudDuration("4.001000s"));
+    assertEquals(Duration.millis(4001), fromCloudDuration("4.001001s"));
+    assertEquals(Duration.millis(4001), fromCloudDuration("4.001000000s"));
+    assertEquals(Duration.millis(4001), fromCloudDuration("4.001000001s"));
+    assertNull(fromCloudDuration(""));
+    assertNull(fromCloudDuration("4"));
+    assertNull(fromCloudDuration("4.1"));
+    assertNull(fromCloudDuration("4.1s"));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/VarIntTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/VarIntTest.java
new file mode 100644
index 0000000000000..d6b771bd0512b
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/VarIntTest.java
@@ -0,0 +1,281 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.EOFException;
+import java.io.IOException;
+
+/** Unit tests for {@link VarInt}. */
+@RunWith(JUnit4.class)
+public class VarIntTest {
+  @Rule public final ExpectedException thrown = ExpectedException.none();
+
+  // Long values to check for boundary cases.
+  private static final long[] LONG_VALUES = {
+      0,
+      1,
+      127,
+      128,
+      16383,
+      16384,
+      2097151,
+      2097152,
+      268435455,
+      268435456,
+      34359738367L,
+      34359738368L,
+      9223372036854775807L,
+      -9223372036854775808L,
+      -1,
+  };
+
+  // VarInt encoding of the above VALUES.
+  private static final byte[][] LONG_ENCODED = {
+      // 0
+      { 0x00 },
+      // 1
+      { 0x01 },
+      // 127
+      { 0x7f },
+      // 128
+      { (byte) 0x80, 0x01 },
+      // 16383
+      { (byte) 0xff, 0x7f },
+      // 16834
+      { (byte) 0x80, (byte) 0x80, 0x01 },
+      // 2097151
+      { (byte) 0xff, (byte) 0xff, 0x7f },
+      // 2097152
+      { (byte) 0x80, (byte) 0x80, (byte) 0x80, 0x01 },
+      // 268435455
+      { (byte) 0xff, (byte) 0xff, (byte) 0xff, 0x7f },
+      // 268435456
+      { (byte) 0x80, (byte) 0x80, (byte) 0x80, (byte) 0x80, 0x01 },
+      // 34359738367
+      { (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, 0x7f },
+      // 34359738368
+      { (byte) 0x80, (byte) 0x80, (byte) 0x80, (byte) 0x80, (byte) 0x80,
+        0x01 },
+      // 9223372036854775807
+      { (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff,
+        (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0x7f },
+      // -9223372036854775808L
+      { (byte) 0x80, (byte) 0x80, (byte) 0x80, (byte) 0x80, (byte) 0x80,
+        (byte) 0x80, (byte) 0x80, (byte) 0x80, (byte) 0x80, 0x01 },
+      // -1
+      { (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff,
+        (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, 0x01 }
+  };
+
+  // Integer values to check for boundary cases.
+  private static final int[] INT_VALUES = {
+      0,
+      1,
+      127,
+      128,
+      16383,
+      16384,
+      2097151,
+      2097152,
+      268435455,
+      268435456,
+      2147483647,
+      -2147483648,
+      -1,
+  };
+
+  // VarInt encoding of the above VALUES.
+  private static final byte[][] INT_ENCODED = {
+      // 0
+      { (byte) 0x00 },
+      // 1
+      { (byte) 0x01 },
+      // 127
+      { (byte) 0x7f },
+      // 128
+      { (byte) 0x80, (byte) 0x01 },
+      // 16383
+      { (byte) 0xff, (byte) 0x7f },
+      // 16834
+      { (byte) 0x80, (byte) 0x80, (byte) 0x01 },
+      // 2097151
+      { (byte) 0xff, (byte) 0xff, (byte) 0x7f },
+      // 2097152
+      { (byte) 0x80, (byte) 0x80, (byte) 0x80, (byte) 0x01 },
+      // 268435455
+      { (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0x7f },
+      // 268435456
+      { (byte) 0x80, (byte) 0x80, (byte) 0x80, (byte) 0x80, (byte) 0x01 },
+      // 2147483647
+      { (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0x07 },
+      // -2147483648
+      { (byte) 0x80, (byte) 0x80, (byte) 0x80, (byte) 0x80, (byte) 0x08 },
+      // -1
+      { (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0x0f }
+  };
+
+  private static byte[] encodeInt(int v) throws IOException {
+    ByteArrayOutputStream stream = new ByteArrayOutputStream();
+    VarInt.encode(v, stream);
+    return stream.toByteArray();
+  }
+
+  private static byte[] encodeLong(long v) throws IOException {
+    ByteArrayOutputStream stream = new ByteArrayOutputStream();
+    VarInt.encode(v, stream);
+    return stream.toByteArray();
+  }
+
+  private static int decodeInt(byte[] encoded) throws IOException {
+    ByteArrayInputStream stream = new ByteArrayInputStream(encoded);
+    return VarInt.decodeInt(stream);
+  }
+
+  private static long decodeLong(byte[] encoded) throws IOException {
+    ByteArrayInputStream stream = new ByteArrayInputStream(encoded);
+    return VarInt.decodeLong(stream);
+  }
+
+  @Test
+  public void decodeValues() throws IOException {
+    assertEquals(LONG_VALUES.length, LONG_ENCODED.length);
+    for (int i = 0; i < LONG_ENCODED.length; ++i) {
+      ByteArrayInputStream stream = new ByteArrayInputStream(LONG_ENCODED[i]);
+      long parsed = VarInt.decodeLong(stream);
+      assertEquals(LONG_VALUES[i], parsed);
+      assertEquals(-1, stream.read());
+    }
+
+    assertEquals(INT_VALUES.length, INT_ENCODED.length);
+    for (int i = 0; i < INT_ENCODED.length; ++i) {
+      ByteArrayInputStream stream = new ByteArrayInputStream(INT_ENCODED[i]);
+      int parsed = VarInt.decodeInt(stream);
+      assertEquals(INT_VALUES[i], parsed);
+      assertEquals(-1, stream.read());
+    }
+  }
+
+  @Test
+  public void encodeValuesAndGetLength() throws IOException {
+    assertEquals(LONG_VALUES.length, LONG_ENCODED.length);
+    for (int i = 0; i < LONG_VALUES.length; ++i) {
+      byte[] encoded = encodeLong(LONG_VALUES[i]);
+      assertThat(encoded, equalTo(LONG_ENCODED[i]));
+      assertEquals(LONG_ENCODED[i].length, VarInt.getLength(LONG_VALUES[i]));
+    }
+
+    assertEquals(INT_VALUES.length, INT_ENCODED.length);
+    for (int i = 0; i < INT_VALUES.length; ++i) {
+      byte[] encoded = encodeInt(INT_VALUES[i]);
+      assertThat(encoded, equalTo(INT_ENCODED[i]));
+      assertEquals(INT_ENCODED[i].length, VarInt.getLength(INT_VALUES[i]));
+    }
+  }
+
+  @Test
+  public void decodeThrowsExceptionForOverflow() throws IOException {
+    final byte[] tooLargeNumber =
+      { (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff,
+        (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, 0x02 };
+
+    thrown.expect(IOException.class);
+
+    long parsed = decodeLong(tooLargeNumber);
+  }
+
+  @Test
+  public void decodeThrowsExceptionForIntOverflow() throws IOException {
+    byte[] encoded = encodeLong(1L << 32);
+
+    thrown.expect(IOException.class);
+
+    int parsed = decodeInt(encoded);
+  }
+
+  @Test
+  public void decodeThrowsExceptionForIntUnderflow() throws IOException {
+    byte[] encoded = encodeLong(-1);
+
+    thrown.expect(IOException.class);
+
+    int parsed = decodeInt(encoded);
+  }
+
+  @Test
+  public void decodeThrowsExceptionForNonterminated() throws IOException {
+    final byte[] nonTerminatedNumber =
+      { (byte) 0xff, (byte) 0xff };
+
+    thrown.expect(IOException.class);
+
+    long parsed = decodeLong(nonTerminatedNumber);
+  }
+
+  @Test
+  public void decodeParsesEncodedValues() throws IOException {
+    ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+    for (int i = 10; i < Integer.MAX_VALUE; i = (int) (i * 1.1)) {
+      VarInt.encode(i, outStream);
+      VarInt.encode(-i, outStream);
+    }
+    for (long i = 10; i < Long.MAX_VALUE; i = (long) (i * 1.1)) {
+      VarInt.encode(i, outStream);
+      VarInt.encode(-i, outStream);
+    }
+
+    ByteArrayInputStream inStream =
+        new ByteArrayInputStream(outStream.toByteArray());
+    for (int i = 10; i < Integer.MAX_VALUE; i = (int) (i * 1.1)) {
+      assertEquals(i, VarInt.decodeInt(inStream));
+      assertEquals(-i, VarInt.decodeInt(inStream));
+    }
+    for (long i = 10; i < Long.MAX_VALUE; i = (long) (i * 1.1)) {
+      assertEquals(i, VarInt.decodeLong(inStream));
+      assertEquals(-i, VarInt.decodeLong(inStream));
+    }
+  }
+
+  @Test
+  public void endOfFileThrowsException() throws Exception {
+    ByteArrayInputStream inStream =
+        new ByteArrayInputStream(new byte[0]);
+    thrown.expect(EOFException.class);
+    VarInt.decodeInt(inStream);
+  }
+
+  @Test
+  public void unterminatedThrowsException() throws Exception {
+    byte[] e = encodeLong(Long.MAX_VALUE);
+    byte[] s = new byte[1];
+    s[0] = e[0];
+    ByteArrayInputStream inStream = new ByteArrayInputStream(s);
+    thrown.expect(IOException.class);
+    VarInt.decodeInt(inStream);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/WindowedValueTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/WindowedValueTest.java
new file mode 100644
index 0000000000000..67f21f5490928
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/WindowedValueTest.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+
+import org.joda.time.Instant;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+
+/** Test case for {@link WindowedValue}. */
+@RunWith(JUnit4.class)
+public class WindowedValueTest {
+  @Test
+  public void testWindowedValueCoder() throws CoderException {
+    Instant timestamp = new Instant(1234);
+    WindowedValue<String> value = WindowedValue.of(
+        "abc",
+        new Instant(1234),
+        Arrays.asList(new IntervalWindow(timestamp, timestamp.plus(1000)),
+                      new IntervalWindow(timestamp.plus(1000), timestamp.plus(2000))));
+
+    Coder<WindowedValue<String>> windowedValueCoder =
+        WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder());
+
+    byte[] encodedValue = CoderUtils.encodeToByteArray(windowedValueCoder, value);
+    WindowedValue<String> decodedValue =
+        CoderUtils.decodeFromByteArray(windowedValueCoder, encodedValue);
+
+    Assert.assertEquals(value.getValue(), decodedValue.getValue());
+    Assert.assertEquals(value.getTimestamp(), decodedValue.getTimestamp());
+    Assert.assertArrayEquals(value.getWindows().toArray(), decodedValue.getWindows().toArray());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterSetTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterSetTest.java
new file mode 100644
index 0000000000000..c8fa6c2fab5a0
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterSetTest.java
@@ -0,0 +1,75 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common;
+
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MAX;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SET;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Unit tests for {@link CounterSet}.
+ */
+@RunWith(JUnit4.class)
+public class CounterSetTest {
+  @Test
+  public void testSet() {
+    CounterSet set = new CounterSet();
+    assertTrue(set.add(Counter.longs("c1", SUM)));
+    assertFalse(set.add(Counter.longs("c1", SUM)));
+    assertTrue(set.add(Counter.longs("c2", MAX)));
+    assertEquals(2, set.size());
+  }
+
+  @Test
+  public void testAddCounterMutator() {
+    CounterSet set = new CounterSet();
+    Counter c1 = Counter.longs("c1", SUM);
+    Counter c1SecondInstance = Counter.longs("c1", SUM);
+    Counter c1IncompatibleInstance = Counter.longs("c1", SET);
+    Counter c2 = Counter.longs("c2", MAX);
+    Counter c2IncompatibleInstance = Counter.doubles("c2", MAX);
+
+    assertEquals(c1, set.getAddCounterMutator().addCounter(c1));
+    assertEquals(c2, set.getAddCounterMutator().addCounter(c2));
+
+    assertEquals(c1, set.getAddCounterMutator().addCounter(c1SecondInstance));
+
+    try {
+      set.getAddCounterMutator().addCounter(c1IncompatibleInstance);
+      fail("should have failed");
+    } catch (IllegalArgumentException exn) {
+      // Expected.
+    }
+
+    try {
+      set.getAddCounterMutator().addCounter(c2IncompatibleInstance);
+      fail("should have failed");
+    } catch (IllegalArgumentException exn) {
+      // Expected.
+    }
+
+    assertEquals(2, set.size());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
new file mode 100644
index 0000000000000..ff40e0d06f182
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
@@ -0,0 +1,743 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common;
+
+import static com.google.cloud.dataflow.sdk.util.Values.asBoolean;
+import static com.google.cloud.dataflow.sdk.util.Values.asDouble;
+import static com.google.cloud.dataflow.sdk.util.Values.asLong;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.AND;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MAX;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MIN;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.OR;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SET;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import com.google.api.services.dataflow.model.MetricUpdate;
+import com.google.cloud.dataflow.sdk.util.CloudCounterUtils;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.common.collect.Sets;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Unit tests for the {@link Counter} API.
+ */
+@RunWith(JUnit4.class)
+public class CounterTest {
+
+  private static MetricUpdate flush(Counter<?> c) {
+    // TODO: Move this out into a separate Counter test.
+    return CounterTestUtils.extractCounterUpdate(c, true);
+  }
+
+  private static final double EPSILON = 0.00000000001;
+
+  @Test
+  public void testNameKindAndCloudCounterRepresentation() {
+    Counter<Long> c1 = Counter.longs("c1", SUM);
+    Counter<Double> c2 = Counter.doubles("c2", MAX);
+    Counter<String> c3 = Counter.strings("c3", SET);
+    Counter<Double> c4 = Counter.doubles("c4", MEAN);
+    Counter<Integer> c5 = Counter.ints("c5", MIN);
+    Counter<Boolean> c6 = Counter.booleans("c6", AND);
+    Counter<Boolean> c7 = Counter.booleans("c7", OR);
+
+    assertEquals("c1", c1.getName());
+    assertEquals(SUM, c1.getKind());
+    MetricUpdate cc = flush(c1);
+    assertEquals("c1", cc.getName().getName());
+    assertEquals("SUM", cc.getKind());
+    assertEquals(0L, asLong(cc.getScalar()).longValue());
+    c1.addValue(123L).addValue(-13L);
+    cc = flush(c1);
+    assertEquals(110L, asLong(cc.getScalar()).longValue());
+
+    assertEquals("c2", c2.getName());
+    assertEquals(MAX, c2.getKind());
+    cc = flush(c2);
+    assertEquals("c2", cc.getName().getName());
+    assertEquals("MAX", cc.getKind());
+    assertEquals(Double.MIN_VALUE, asDouble(cc.getScalar()), EPSILON);
+    c2.resetToValue(0.0).addValue(Math.PI).addValue(Math.E);
+    cc = flush(c2);
+    assertEquals(Math.PI, asDouble(cc.getScalar()), EPSILON);
+
+    assertEquals("c3", c3.getName());
+    assertEquals(SET, c3.getKind());
+    cc = flush(c3); // empty sets are not sent to the service
+    assertEquals(null, cc);
+    c3.addValue("abc").addValue("e").addValue("abc");
+    cc = flush(c3);
+    assertEquals("c3", cc.getName().getName());
+    assertEquals("SET", cc.getKind());
+    Set<String> s = (Set<String>) cc.getSet();
+    assertEquals(2, s.size());
+    assertTrue(s.containsAll(Arrays.asList(
+        CloudObject.forString("e"),
+        CloudObject.forString("abc"))));
+
+    assertEquals("c4", c4.getName());
+    assertEquals(MEAN, c4.getKind());
+    cc = flush(c4); // zero-count means are not sent to the service
+    assertEquals(null, cc);
+    c4.addValue(Math.PI).addValue(Math.E).addValue(Math.sqrt(2));
+    cc = flush(c4);
+    assertEquals("c4", cc.getName().getName());
+    assertEquals("MEAN", cc.getKind());
+    Object ms = cc.getMeanSum();
+    Object mc = cc.getMeanCount();
+    assertEquals(Math.PI + Math.E + Math.sqrt(2), asDouble(ms), EPSILON);
+    assertEquals(3, asLong(mc).longValue());
+    c4.addValue(2.0).addValue(5.0);
+    cc = flush(c4);
+    ms = cc.getMeanSum();
+    mc = cc.getMeanCount();
+    assertEquals(7.0, asDouble(ms), EPSILON);
+    assertEquals(2L, asLong(mc).longValue());
+
+    assertEquals("c5", c5.getName());
+    assertEquals(MIN, c5.getKind());
+    cc = flush(c5);
+    assertEquals("c5", cc.getName().getName());
+    assertEquals("MIN", cc.getKind());
+    assertEquals(Integer.MAX_VALUE, asLong(cc.getScalar()).longValue());
+    c5.addValue(123).addValue(-13);
+    cc = flush(c5);
+    assertEquals(-13, asLong(cc.getScalar()).longValue());
+
+    assertEquals("c6", c6.getName());
+    assertEquals(AND, c6.getKind());
+    cc = flush(c6);
+    assertEquals("c6", cc.getName().getName());
+    assertEquals("AND", cc.getKind());
+    assertEquals(true, asBoolean(cc.getScalar()));
+    c6.addValue(false);
+    cc = flush(c6);
+    assertEquals(false, asBoolean(cc.getScalar()));
+
+    assertEquals("c7", c7.getName());
+    assertEquals(OR, c7.getKind());
+    cc = flush(c7);
+    assertEquals("c7", cc.getName().getName());
+    assertEquals("OR", cc.getKind());
+    assertEquals(false, asBoolean(cc.getScalar()));
+    c7.addValue(true);
+    cc = flush(c7);
+    assertEquals(true, asBoolean(cc.getScalar()));
+  }
+
+  @Test
+  public void testCompatibility() {
+    // Equal counters are compatible, of all kinds.
+    assertTrue(
+        Counter.longs("c", SUM).isCompatibleWith(Counter.longs("c", SUM)));
+    assertTrue(
+        Counter.ints("c", SUM).isCompatibleWith(Counter.ints("c", SUM)));
+    assertTrue(
+        Counter.doubles("c", SUM).isCompatibleWith(Counter.doubles("c", SUM)));
+    assertTrue(
+        Counter.strings("c", SET).isCompatibleWith(Counter.strings("c", SET)));
+    assertTrue(
+        Counter.booleans("c", OR).isCompatibleWith(
+            Counter.booleans("c", OR)));
+
+    // The name, kind, and type of the counter must match.
+    assertFalse(
+        Counter.longs("c", SUM).isCompatibleWith(Counter.longs("c2", SUM)));
+    assertFalse(
+        Counter.longs("c", SUM).isCompatibleWith(Counter.longs("c", MAX)));
+    assertFalse(
+        Counter.longs("c", SUM).isCompatibleWith(Counter.ints("c", SUM)));
+
+    // The value of the counters are ignored.
+    assertTrue(
+        Counter.longs("c", SUM).resetToValue(666L).isCompatibleWith(
+            Counter.longs("c", SUM).resetToValue(42L)));
+  }
+
+
+  private void assertOK(long total, long delta, Counter<Long> c) {
+    assertEquals(total, c.getTotalAggregate().longValue());
+    assertEquals(delta, c.getDeltaAggregate().longValue());
+  }
+
+  private void assertOK(double total, double delta, Counter<Double> c) {
+    assertEquals(total, asDouble(c.getTotalAggregate()), EPSILON);
+    assertEquals(delta, asDouble(c.getDeltaAggregate()), EPSILON);
+  }
+
+
+  // Tests for SUM.
+
+  @Test
+  public void testSumLong() {
+    Counter<Long> c = Counter.longs("sum-long", SUM);
+    long expectedTotal = 0;
+    long expectedDelta = 0;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.addValue(13L).addValue(42L).addValue(0L);
+    expectedTotal += 55;
+    expectedDelta += 55;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.resetToValue(120L).addValue(17L).addValue(37L);
+    expectedTotal = expectedDelta = 174;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    flush(c);
+    expectedDelta = 0;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.addValue(15L).addValue(42L);
+    expectedTotal += 57;
+    expectedDelta += 57;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.resetToValue(100L).addValue(17L).addValue(49L);
+    expectedTotal = expectedDelta = 166;
+    assertOK(expectedTotal, expectedDelta, c);
+  }
+
+  @Test
+  public void testSumDouble() {
+    Counter<Double> c = Counter.doubles("sum-double", SUM);
+    double expectedTotal = 0.0;
+    double expectedDelta = 0.0;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.addValue(Math.E).addValue(Math.PI).addValue(0.0);
+    expectedTotal += Math.E + Math.PI;
+    expectedDelta += Math.E + Math.PI;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.resetToValue(Math.sqrt(2)).addValue(2 * Math.PI).addValue(3 * Math.E);
+    expectedTotal = expectedDelta = Math.sqrt(2) + 2 * Math.PI + 3 * Math.E;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    flush(c);
+    expectedDelta = 0.0;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.addValue(7 * Math.PI).addValue(5 * Math.E);
+    expectedTotal += 7 * Math.PI + 5 * Math.E;
+    expectedDelta += 7 * Math.PI + 5 * Math.E;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.resetToValue(Math.sqrt(17)).addValue(17.0).addValue(49.0);
+    expectedTotal = expectedDelta = Math.sqrt(17.0) + 17.0 + 49.0;
+    assertOK(expectedTotal, expectedDelta, c);
+  }
+
+
+  // Tests for MAX.
+
+  @Test
+  public void testMaxLong() {
+    Counter<Long> c = Counter.longs("max-long", MAX);
+    long expectedTotal = Long.MIN_VALUE;
+    long expectedDelta = Long.MIN_VALUE;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.addValue(13L).addValue(42L).addValue(Long.MIN_VALUE);
+    expectedTotal = expectedDelta = 42;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.resetToValue(120L).addValue(17L).addValue(37L);
+    expectedTotal = expectedDelta = 120;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    flush(c);
+    expectedDelta = Long.MIN_VALUE;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.addValue(42L).addValue(15L);
+    expectedDelta = 42;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.resetToValue(100L).addValue(171L).addValue(49L);
+    expectedTotal = expectedDelta = 171;
+    assertOK(expectedTotal, expectedDelta, c);
+  }
+
+  @Test
+  public void testMaxDouble() {
+    Counter<Double> c = Counter.doubles("max-double", MAX);
+    double expectedTotal = Double.MIN_VALUE;
+    double expectedDelta = Double.MIN_VALUE;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.addValue(Math.E).addValue(Math.PI).addValue(Double.MIN_VALUE);
+    expectedTotal = expectedDelta = Math.PI;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.resetToValue(Math.sqrt(12345)).addValue(2 * Math.PI).addValue(3 * Math.E);
+    expectedTotal = expectedDelta = Math.sqrt(12345);
+    assertOK(expectedTotal, expectedDelta, c);
+
+    flush(c);
+    expectedDelta = Double.MIN_VALUE;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.addValue(7 * Math.PI).addValue(5 * Math.E);
+    expectedDelta = 7 * Math.PI;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.resetToValue(Math.sqrt(17)).addValue(171.0).addValue(49.0);
+    expectedTotal = expectedDelta = 171.0;
+    assertOK(expectedTotal, expectedDelta, c);
+  }
+
+
+  // Tests for MIN.
+
+  @Test
+  public void testMinLong() {
+    Counter<Long> c = Counter.longs("min-long", MIN);
+    long expectedTotal = Long.MAX_VALUE;
+    long expectedDelta = Long.MAX_VALUE;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.addValue(13L).addValue(42L).addValue(Long.MAX_VALUE);
+    expectedTotal = expectedDelta = 13;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.resetToValue(120L).addValue(17L).addValue(37L);
+    expectedTotal = expectedDelta = 17;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    flush(c);
+    expectedDelta = Long.MAX_VALUE;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.addValue(42L).addValue(18L);
+    expectedDelta = 18;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.resetToValue(100L).addValue(171L).addValue(49L);
+    expectedTotal = expectedDelta = 49;
+    assertOK(expectedTotal, expectedDelta, c);
+  }
+
+  @Test
+  public void testMinDouble() {
+    Counter<Double> c = Counter.doubles("min-double", MIN);
+    double expectedTotal = Double.MAX_VALUE;
+    double expectedDelta = Double.MAX_VALUE;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.addValue(Math.E).addValue(Math.PI).addValue(Double.MAX_VALUE);
+    expectedTotal = expectedDelta = Math.E;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.resetToValue(Math.sqrt(12345)).addValue(2 * Math.PI).addValue(3 * Math.E);
+    expectedTotal = expectedDelta = 2 * Math.PI;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    flush(c);
+    expectedDelta = Double.MAX_VALUE;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.addValue(7 * Math.PI).addValue(5 * Math.E);
+    expectedDelta = 5 * Math.E;
+    assertOK(expectedTotal, expectedDelta, c);
+
+    c.resetToValue(Math.sqrt(17)).addValue(171.0).addValue(0.0);
+    expectedTotal = expectedDelta = 0.0;
+    assertOK(expectedTotal, expectedDelta, c);
+  }
+
+
+  // Tests for MEAN.
+
+  private void assertMean(long s, long sd, long c, long cd, Counter<Long> cn) {
+    assertEquals(s, cn.getTotalAggregate().longValue());
+    assertEquals(sd, cn.getDeltaAggregate().longValue());
+    assertEquals(c, cn.getTotalCount());
+    assertEquals(cd, cn.getDeltaCount());
+  }
+
+  private void assertMean(double s, double sd, long c, long cd,
+      Counter<Double> cn) {
+    assertEquals(s, cn.getTotalAggregate().doubleValue(), EPSILON);
+    assertEquals(sd, cn.getDeltaAggregate().doubleValue(), EPSILON);
+    assertEquals(c, cn.getTotalCount());
+    assertEquals(cd, cn.getDeltaCount());
+  }
+
+  @Test
+  public void testMeanLong() {
+    Counter<Long> c = Counter.longs("mean-long", MEAN);
+    long expTotal = 0;
+    long expDelta = 0;
+    long expCountTotal = 0;
+    long expCountDelta = 0;
+    assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
+
+    c.addValue(13L).addValue(42L).addValue(0L);
+    expTotal += 55;
+    expDelta += 55;
+    expCountTotal += 3;
+    expCountDelta += 3;
+    assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
+
+    c.resetToValue(1L, 120L).addValue(17L).addValue(37L);
+    expTotal = expDelta = 174;
+    assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
+
+    flush(c);
+    expDelta = 0;
+    expCountDelta = 0;
+    assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
+
+    c.addValue(15L).addValue(42L);
+    expTotal += 57;
+    expDelta += 57;
+    expCountTotal += 2;
+    expCountDelta += 2;
+    assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
+
+    c.resetToValue(3L, 100L).addValue(17L).addValue(49L);
+    expTotal = expDelta = 166;
+    expCountTotal = expCountDelta = 5;
+    assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
+  }
+
+  @Test
+  public void testMeanDouble() {
+    Counter<Double> c = Counter.doubles("mean-double", MEAN);
+    double expTotal = 0.0;
+    double expDelta = 0.0;
+    long expCountTotal = 0;
+    long expCountDelta = 0;
+    assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
+
+    c.addValue(Math.E).addValue(Math.PI).addValue(0.0);
+    expTotal += Math.E + Math.PI;
+    expDelta += Math.E + Math.PI;
+    expCountTotal += 3;
+    expCountDelta += 3;
+    assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
+
+    c.resetToValue(1L, Math.sqrt(2)).addValue(2 * Math.PI).addValue(3 * Math.E);
+    expTotal = expDelta = Math.sqrt(2) + 2 * Math.PI + 3 * Math.E;
+    assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
+
+    flush(c);
+    expDelta = 0.0;
+    expCountDelta = 0;
+    assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
+
+    c.addValue(7 * Math.PI).addValue(5 * Math.E);
+    expTotal += 7 * Math.PI + 5 * Math.E;
+    expDelta += 7 * Math.PI + 5 * Math.E;
+    expCountTotal += 2;
+    expCountDelta += 2;
+    assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
+
+    c.resetToValue(3L, Math.sqrt(17)).addValue(17.0).addValue(49.0);
+    expTotal = expDelta = Math.sqrt(17.0) + 17.0 + 49.0;
+    expCountTotal = expCountDelta = 5;
+    assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
+  }
+
+
+  // Tests for SET.
+
+  private <T> void assertSet(Set<T> total, Set<T> delta, Counter<T> c) {
+    assertTrue(total.containsAll(c.getTotalSet()));
+    assertTrue(c.getTotalSet().containsAll(total));
+    assertTrue(delta.containsAll(c.getDeltaSet()));
+    assertTrue(c.getDeltaSet().containsAll(delta));
+  }
+
+  @Test
+  public void testSetLong() {
+    Counter<Long> c = Counter.longs("set-long", SET);
+    HashSet<Long> expectedTotal = new HashSet<>();
+    HashSet<Long> expectedDelta = new HashSet<>();
+    assertSet(expectedTotal, expectedDelta, c);
+
+    c.addValue(13L).addValue(42L).addValue(13L);
+    expectedTotal = expectedDelta = Sets.newHashSet(13L, 42L);
+    assertSet(expectedTotal, expectedDelta, c);
+
+    c.resetToValue(120L).addValue(17L).addValue(37L);
+    expectedTotal = expectedDelta = Sets.newHashSet(120L, 17L, 37L);
+    assertSet(expectedTotal, expectedDelta, c);
+
+    flush(c);
+    expectedDelta = new HashSet<>();
+    assertSet(expectedTotal, expectedDelta, c);
+
+    c.addValue(42L).addValue(18L);
+    expectedTotal.addAll(Arrays.asList(42L, 18L));
+    expectedDelta = Sets.newHashSet(42L, 18L);
+    assertSet(expectedTotal, expectedDelta, c);
+
+    c.resetToValue(100L).addValue(171L).addValue(49L);
+    expectedTotal = expectedDelta = Sets.newHashSet(100L, 171L, 49L);
+    assertSet(expectedTotal, expectedDelta, c);
+  }
+
+  @Test
+  public void testSetDouble() {
+    Counter<Double> c = Counter.doubles("set-double", SET);
+    HashSet<Double> expectedTotal = new HashSet<>();
+    HashSet<Double> expectedDelta = new HashSet<>();
+    assertSet(expectedTotal, expectedDelta, c);
+
+    c.addValue(Math.E).addValue(Math.PI);
+    expectedTotal = expectedDelta = Sets.newHashSet(Math.E, Math.PI);
+    assertSet(expectedTotal, expectedDelta, c);
+
+    c.resetToValue(Math.sqrt(12345)).addValue(2 * Math.PI).addValue(3 * Math.E);
+    expectedTotal =
+        expectedDelta = Sets.newHashSet(Math.sqrt(12345), 2 * Math.PI, 3 * Math.E);
+    assertSet(expectedTotal, expectedDelta, c);
+
+    flush(c);
+    expectedDelta = new HashSet<>();
+    assertSet(expectedTotal, expectedDelta, c);
+
+    c.addValue(7 * Math.PI).addValue(5 * Math.E);
+    expectedTotal.addAll(Arrays.asList(7 * Math.PI, 5 * Math.E));
+    expectedDelta = Sets.newHashSet(7 * Math.PI, 5 * Math.E);
+    assertSet(expectedTotal, expectedDelta, c);
+
+    c.resetToValue(Math.sqrt(17)).addValue(171.0).addValue(0.0);
+    expectedTotal = expectedDelta = Sets.newHashSet(Math.sqrt(17), 171.0, 0.0);
+    assertSet(expectedTotal, expectedDelta, c);
+  }
+
+  @Test
+  public void testSetString() {
+    Counter<String> c = Counter.strings("set-string", SET);
+    HashSet<String> expectedTotal = new HashSet<>();
+    HashSet<String> expectedDelta = new HashSet<>();
+    assertSet(expectedTotal, expectedDelta, c);
+
+    c.addValue("a").addValue("b").addValue("a");
+    expectedTotal = expectedDelta = Sets.newHashSet("a", "b");
+    assertSet(expectedTotal, expectedDelta, c);
+
+    c.resetToValue("c").addValue("d").addValue("e");
+    expectedTotal = expectedDelta = Sets.newHashSet("c", "d", "e");
+    assertSet(expectedTotal, expectedDelta, c);
+
+    flush(c);
+    expectedDelta = new HashSet<>();
+    assertSet(expectedTotal, expectedDelta, c);
+
+    c.addValue("b").addValue("f");
+    expectedTotal.addAll(Arrays.asList("b", "f"));
+    expectedDelta = Sets.newHashSet("b", "f");
+    assertSet(expectedTotal, expectedDelta, c);
+
+    c.resetToValue("g").addValue("h").addValue("i");
+    expectedTotal = expectedDelta = Sets.newHashSet("g", "h", "i");
+    assertSet(expectedTotal, expectedDelta, c);
+  }
+
+
+  // Test for AND and OR.
+
+  private void assertBool(boolean total, boolean delta, Counter<Boolean> c) {
+    assertEquals(total, c.getTotalAggregate().booleanValue());
+    assertEquals(delta, c.getDeltaAggregate().booleanValue());
+  }
+
+  @Test
+  public void testBoolAnd() {
+    Counter<Boolean> c = Counter.booleans("bool-and", AND);
+    boolean expectedTotal = true;
+    boolean expectedDelta = true;
+    assertBool(expectedTotal, expectedDelta, c);
+
+    c.addValue(true);
+    assertBool(expectedTotal, expectedDelta, c);
+
+    c.addValue(false);
+    expectedTotal = expectedDelta = false;
+    assertBool(expectedTotal, expectedDelta, c);
+
+    c.resetToValue(true).addValue(true);
+    expectedTotal = expectedDelta = true;
+    assertBool(expectedTotal, expectedDelta, c);
+
+    c.addValue(false);
+    expectedTotal = expectedDelta = false;
+    assertBool(expectedTotal, expectedDelta, c);
+
+    flush(c);
+    expectedDelta = true;
+    assertBool(expectedTotal, expectedDelta, c);
+
+    c.addValue(false);
+    expectedDelta = false;
+    assertBool(expectedTotal, expectedDelta, c);
+
+    c.addValue(true);
+    assertBool(expectedTotal, expectedDelta, c);
+  }
+
+  @Test
+  public void testBoolOr() {
+    Counter<Boolean> c = Counter.booleans("bool-or", OR);
+    boolean expectedTotal = false;
+    boolean expectedDelta = false;
+    assertBool(expectedTotal, expectedDelta, c);
+
+    c.addValue(false);
+    assertBool(expectedTotal, expectedDelta, c);
+
+    c.addValue(true);
+    expectedTotal = expectedDelta = true;
+    assertBool(expectedTotal, expectedDelta, c);
+
+    c.resetToValue(false).addValue(false);
+    expectedTotal = expectedDelta = false;
+    assertBool(expectedTotal, expectedDelta, c);
+
+    c.addValue(true);
+    expectedTotal = expectedDelta = true;
+    assertBool(expectedTotal, expectedDelta, c);
+
+    flush(c);
+    expectedDelta = false;
+    assertBool(expectedTotal, expectedDelta, c);
+
+    c.addValue(true);
+    expectedDelta = true;
+    assertBool(expectedTotal, expectedDelta, c);
+
+    c.addValue(false);
+    assertBool(expectedTotal, expectedDelta, c);
+  }
+
+
+  // Incompatibility tests.
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testSumBool() {
+    Counter.booleans("counter", SUM);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testSumString() {
+    Counter.strings("counter", SUM);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testMinBool() {
+    Counter.booleans("counter", MIN);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testMinString() {
+    Counter.strings("counter", MIN);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testMaxBool() {
+    Counter.booleans("counter", MAX);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testMaxString() {
+    Counter.strings("counter", MAX);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testMeanBool() {
+    Counter.booleans("counter", MEAN);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testMeanString() {
+    Counter.strings("counter", MEAN);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testSetBool() {
+    Counter.booleans("counter", SET);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testAndLong() {
+    Counter.longs("counter", AND);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testAndDouble() {
+    Counter.doubles("counter", AND);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testAndString() {
+    Counter.strings("counter", AND);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testOrLong() {
+    Counter.longs("counter", OR);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testOrDouble() {
+    Counter.doubles("counter", OR);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testOrString() {
+    Counter.strings("counter", OR);
+  }
+
+  @Test
+  public void testExtraction() {
+    Counter<?>[] counters = {Counter.longs("c1", SUM),
+                             Counter.doubles("c2", MAX),
+                             Counter.strings("c3", SET)};
+    CounterSet set = new CounterSet();
+    for (Counter<?> c : counters) {
+      set.addCounter(c);
+    }
+
+    List<MetricUpdate> cloudCountersFromSet = CloudCounterUtils.extractCounters(set, true);
+
+    List<MetricUpdate> cloudCountersFromArray =
+        CounterTestUtils.extractCounterUpdates(Arrays.asList(counters), true);
+
+    assertEquals(cloudCountersFromArray.size(), cloudCountersFromSet.size());
+    for (int i = 0; i < cloudCountersFromArray.size(); i++) {
+      assertEquals(cloudCountersFromArray.get(i), cloudCountersFromSet.get(i));
+    }
+
+    assertEquals(2, cloudCountersFromSet.size()); // empty set was ignored
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java
new file mode 100644
index 0000000000000..9c428476e28f0
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java
@@ -0,0 +1,123 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common;
+
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
+
+import com.google.api.services.dataflow.model.MetricUpdate;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.util.CloudCounterUtils;
+
+import org.junit.Assert;
+
+import java.io.ByteArrayOutputStream;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Utilities for testing {@link Counter}s.
+ */
+public class CounterTestUtils {
+
+  /**
+   * Extracts a MetricUpdate update from the given counter. This is used mainly
+   * for testing.
+   *
+   * @param extractDelta specifies whether or not to extract the cumulative
+   *        aggregate value or the delta since the last extraction.
+   */
+  public static MetricUpdate extractCounterUpdate(Counter<?> counter,
+      boolean extractDelta) {
+    // This may be invoked asynchronously with regular counter updates but
+    // access to counter data is synchronized, so this is safe.
+    return CloudCounterUtils.extractCounter(counter, extractDelta);
+  }
+
+  /**
+   * Extracts MetricUpdate updates from the given counters. This is used mainly
+   * for testing.
+   *
+   * @param extractDelta specifies whether or not to extract the cumulative
+   *        aggregate values or the deltas since the last extraction.
+   */
+  public static List<MetricUpdate> extractCounterUpdates(
+      Collection<Counter<?>> counters, boolean extractDelta) {
+    // This may be invoked asynchronously with regular counter updates but
+    // access to counter data is synchronized, so this is safe. Note however
+    // that the result is NOT an atomic snapshot across all given counters.
+    List<MetricUpdate> cloudCounters = new ArrayList<>(counters.size());
+    for (Counter<?> counter : counters) {
+      MetricUpdate cloudCounter = extractCounterUpdate(counter, extractDelta);
+      if (null != cloudCounter) {
+        cloudCounters.add(cloudCounter);
+      }
+    }
+    return cloudCounters;
+  }
+
+
+  // These methods expose a counter's values for testing.
+
+  public static <T> T getTotalAggregate(Counter<T> counter) {
+    return counter.getTotalAggregate();
+  }
+
+  public static <T> T getDeltaAggregate(Counter<T> counter) {
+    return counter.getDeltaAggregate();
+  }
+
+  public static <T> long getTotalCount(Counter<T> counter) {
+    return counter.getTotalCount();
+  }
+
+  public static <T> long getDeltaCount(Counter<T> counter) {
+    return counter.getDeltaCount();
+  }
+
+  public static <T> Set<T> getTotalSet(Counter<T> counter) {
+    return counter.getTotalSet();
+  }
+
+  public static <T> Set<T> getDeltaSet(Counter<T> counter) {
+    return counter.getDeltaSet();
+  }
+
+  /**
+   * A utility method that passes the given (unencoded) elements through
+   * coder's registerByteSizeObserver() and encode() methods, and confirms
+   * they are mutually consistent. This is useful for testing coder
+   * implementations.
+   */
+  public static void testByteCount(Coder coder, Coder.Context context, Object[] elements)
+      throws Exception {
+    Counter<Long> meanByteCount = Counter.longs("meanByteCount", MEAN);
+    ElementByteSizeObserver observer = new ElementByteSizeObserver(meanByteCount);
+
+    ByteArrayOutputStream os = new ByteArrayOutputStream();
+    for (Object elem : elements) {
+      coder.registerByteSizeObserver(elem, observer, context);
+      coder.encode(elem, os, context);
+      observer.advance();
+    }
+    long expectedLength = os.toByteArray().length;
+
+    Assert.assertEquals(expectedLength, (long) getTotalAggregate(meanByteCount));
+    Assert.assertEquals(elements.length, (long) getTotalCount(meanByteCount));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/MetricTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/MetricTest.java
new file mode 100644
index 0000000000000..0c60901ca0a69
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/MetricTest.java
@@ -0,0 +1,40 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.util.common.Metric.DoubleMetric;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Unit tests for {@link Metric}. */
+@RunWith(JUnit4.class)
+public class MetricTest {
+  @Test
+  public void testDoubleMetric() {
+    String name = "metric-name";
+    double value = 3.14;
+
+    DoubleMetric doubleMetric = new DoubleMetric(name, value);
+
+    assertEquals(name, doubleMetric.getName());
+    assertEquals((Double) value, doubleMetric.getValue());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReaderTest.java
new file mode 100644
index 0000000000000..5a41494717280
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReaderTest.java
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import static com.google.api.client.util.Lists.newArrayList;
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.runners.worker.ByteArrayShufflePosition;
+import com.google.cloud.dataflow.sdk.util.common.Reiterator;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/** Unit tests for {@link BatchingShuffleEntryReader}. */
+@RunWith(JUnit4.class)
+public final class BatchingShuffleEntryReaderTest {
+  private static final byte[] KEY = {0xA};
+  private static final byte[] SKEY = {0xB};
+  private static final byte[] VALUE = {0xC};
+  private static final ShufflePosition START_POSITION =
+      ByteArrayShufflePosition.of("aaa".getBytes());
+  private static final ShufflePosition END_POSITION =
+      ByteArrayShufflePosition.of("zzz".getBytes());
+  private static final ShufflePosition NEXT_START_POSITION =
+      ByteArrayShufflePosition.of("next".getBytes());
+  private static final ShufflePosition SECOND_NEXT_START_POSITION =
+      ByteArrayShufflePosition.of("next-second".getBytes());
+
+  @Mock private ShuffleBatchReader batchReader;
+  private ShuffleEntryReader reader;
+
+  @Before
+  public void initMocksAndReader() {
+    MockitoAnnotations.initMocks(this);
+    reader = new BatchingShuffleEntryReader(batchReader);
+  }
+
+  @Test
+  public void readerCanRead() throws Exception {
+    ShuffleEntry e1 = new ShuffleEntry(KEY, SKEY, VALUE);
+    ShuffleEntry e2 = new ShuffleEntry(KEY, SKEY, VALUE);
+    ArrayList<ShuffleEntry> entries = new ArrayList<>();
+    entries.add(e1);
+    entries.add(e2);
+    when(batchReader.read(START_POSITION, END_POSITION))
+        .thenReturn(new ShuffleBatchReader.Batch(entries, null));
+    List<ShuffleEntry> results = newArrayList(reader.read(START_POSITION, END_POSITION));
+    assertThat(results, contains(e1, e2));
+  }
+
+  @Test
+  public void readerIteratorCanBeCopied() throws Exception {
+    ShuffleEntry e1 = new ShuffleEntry(KEY, SKEY, VALUE);
+    ShuffleEntry e2 = new ShuffleEntry(KEY, SKEY, VALUE);
+    ArrayList<ShuffleEntry> entries = new ArrayList<>();
+    entries.add(e1);
+    entries.add(e2);
+    when(batchReader.read(START_POSITION, END_POSITION))
+        .thenReturn(new ShuffleBatchReader.Batch(entries, null));
+    Reiterator<ShuffleEntry> it = reader.read(START_POSITION, END_POSITION);
+    assertThat(it.hasNext(), equalTo(Boolean.TRUE));
+    assertThat(it.next(), equalTo(e1));
+    Reiterator<ShuffleEntry> copy = it.copy();
+    assertThat(it.hasNext(), equalTo(Boolean.TRUE));
+    assertThat(it.next(), equalTo(e2));
+    assertThat(it.hasNext(), equalTo(Boolean.FALSE));
+    assertThat(copy.hasNext(), equalTo(Boolean.TRUE));
+    assertThat(copy.next(), equalTo(e2));
+    assertThat(copy.hasNext(), equalTo(Boolean.FALSE));
+  }
+
+  @Test
+  public void readerShouldMergeMultipleBatchResults() throws Exception {
+    ShuffleEntry e1 = new ShuffleEntry(KEY, SKEY, VALUE);
+    List<ShuffleEntry> e1s = Collections.singletonList(e1);
+    ShuffleEntry e2 = new ShuffleEntry(KEY, SKEY, VALUE);
+    List<ShuffleEntry> e2s = Collections.singletonList(e2);
+    when(batchReader.read(START_POSITION, END_POSITION))
+        .thenReturn(new ShuffleBatchReader.Batch(e1s, NEXT_START_POSITION));
+    when(batchReader.read(NEXT_START_POSITION, END_POSITION))
+        .thenReturn(new ShuffleBatchReader.Batch(e2s, null));
+    List<ShuffleEntry> results = newArrayList(reader.read(START_POSITION, END_POSITION));
+    assertThat(results, contains(e1, e2));
+
+    verify(batchReader).read(START_POSITION, END_POSITION);
+    verify(batchReader).read(NEXT_START_POSITION, END_POSITION);
+    verifyNoMoreInteractions(batchReader);
+  }
+
+  @Test
+  public void readerShouldMergeMultipleBatchResultsIncludingEmptyShards()
+      throws Exception {
+    List<ShuffleEntry> e1s = new ArrayList<>();
+    List<ShuffleEntry> e2s = new ArrayList<>();
+    ShuffleEntry e3 = new ShuffleEntry(KEY, SKEY, VALUE);
+    List<ShuffleEntry> e3s = Collections.singletonList(e3);
+    when(batchReader.read(START_POSITION, END_POSITION))
+        .thenReturn(new ShuffleBatchReader.Batch(e1s, NEXT_START_POSITION));
+    when(batchReader.read(NEXT_START_POSITION, END_POSITION))
+        .thenReturn(new ShuffleBatchReader.Batch(e2s, SECOND_NEXT_START_POSITION));
+    when(batchReader.read(SECOND_NEXT_START_POSITION, END_POSITION))
+        .thenReturn(new ShuffleBatchReader.Batch(e3s, null));
+    List<ShuffleEntry> results = newArrayList(reader.read(START_POSITION, END_POSITION));
+    assertThat(results, contains(e3));
+
+    verify(batchReader).read(START_POSITION, END_POSITION);
+    verify(batchReader).read(NEXT_START_POSITION, END_POSITION);
+    verify(batchReader).read(SECOND_NEXT_START_POSITION, END_POSITION);
+    verifyNoMoreInteractions(batchReader);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReaderTest.java
new file mode 100644
index 0000000000000..4175c91505963
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReaderTest.java
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.notNullValue;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+/** Unit tests for {@link CachingShuffleBatchReader}. */
+@RunWith(JUnit4.class)
+public final class CachingShuffleBatchReaderTest {
+
+  private final ShuffleBatchReader.Batch testBatch =
+      new ShuffleBatchReader.Batch(new ArrayList<ShuffleEntry>(), null);
+
+  @Test
+  public void readerShouldCacheReads() throws IOException {
+    ShuffleBatchReader base = mock(ShuffleBatchReader.class);
+    CachingShuffleBatchReader reader = new CachingShuffleBatchReader(base);
+    when(base.read(null, null)).thenReturn(testBatch);
+    // N.B. We need to capture the result of reader.read() in order to ensure
+    // that there's a strong reference to it, preventing it from being
+    // collected.  Not that this should be an issue in tests, but it's good to
+    // be solid.
+    ShuffleBatchReader.Batch read = reader.read(null, null);
+    assertThat(read, equalTo(testBatch));
+    assertThat(reader.read(null, null), equalTo(testBatch));
+    assertThat(reader.read(null, null), equalTo(testBatch));
+    assertThat(reader.read(null, null), equalTo(testBatch));
+    assertThat(reader.read(null, null), equalTo(testBatch));
+    verify(base, times(1)).read(null, null);
+  }
+
+  @Test
+  public void readerShouldNotCacheExceptions() throws IOException {
+    ShuffleBatchReader base = mock(ShuffleBatchReader.class);
+    CachingShuffleBatchReader reader = new CachingShuffleBatchReader(base);
+    when(base.read(null, null))
+        .thenThrow(new IOException("test"))
+        .thenReturn(testBatch);
+    try {
+      reader.read(null, null);
+      fail("expected an IOException");
+    } catch (IOException e) {
+      // Nothing to do -- exception is expected.
+    }
+    assertThat(reader.read(null, null), equalTo(testBatch));
+    verify(base, times(2)).read(null, null);
+  }
+
+  @Test
+  public void readerShouldRereadClearedBatches() throws IOException {
+    ShuffleBatchReader base = mock(ShuffleBatchReader.class);
+    CachingShuffleBatchReader reader = new CachingShuffleBatchReader(base);
+    when(base.read(null, null)).thenReturn(testBatch);
+    ShuffleBatchReader.Batch read = reader.read(null, null);
+    assertThat(read, equalTo(testBatch));
+    verify(base, times(1)).read(null, null);
+    CachingShuffleBatchReader.BatchRange range =
+        new CachingShuffleBatchReader.BatchRange(null, null);
+    CachingShuffleBatchReader.RangeReadReference ref =
+        reader.cache.get(range);
+    assertThat(ref, notNullValue());
+    ref.clear();
+    read = reader.read(null, null);
+    assertThat(read, equalTo(testBatch));
+    verify(base, times(2)).read(null, null);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
new file mode 100644
index 0000000000000..0c678abe75d55
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.ElementByteSizeObservableCoder;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservable;
+
+import org.junit.Assert;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Observable;
+import java.util.Observer;
+
+/**
+ * Utilities for tests.
+ */
+public class ExecutorTestUtils {
+  // Do not instantiate.
+  private ExecutorTestUtils() { }
+
+  /** An Operation with a specified number of outputs. */
+  public static class TestOperation extends Operation {
+    public TestOperation(int numOutputs) {
+      this(numOutputs, new CounterSet());
+    }
+
+    TestOperation(int numOutputs, CounterSet counters) {
+      this(numOutputs, counters, "test-");
+    }
+
+    TestOperation(int numOutputs, CounterSet counters, String counterPrefix) {
+      this(numOutputs, counterPrefix, counters.getAddCounterMutator(),
+           new StateSampler(counterPrefix, counters.getAddCounterMutator()));
+    }
+
+    TestOperation(int numOutputs,
+                  String counterPrefix,
+                  CounterSet.AddCounterMutator addCounterMutator,
+                  StateSampler stateSampler) {
+      super("TestOperation",
+            createOutputReceivers(numOutputs, counterPrefix,
+                                  addCounterMutator, stateSampler),
+            counterPrefix,
+            addCounterMutator,
+            stateSampler);
+    }
+
+    private static OutputReceiver[] createOutputReceivers(
+        int numOutputs,
+        String counterPrefix,
+        CounterSet.AddCounterMutator addCounterMutator,
+        StateSampler stateSampler) {
+      OutputReceiver[] receivers = new OutputReceiver[numOutputs];
+      for (int i = 0; i < numOutputs; i++) {
+        receivers[i] = new OutputReceiver(
+            "out_" + i,
+            new ElementByteSizeObservableCoder(StringUtf8Coder.of()),
+            counterPrefix,
+            addCounterMutator);
+      }
+      return receivers;
+    }
+  }
+
+  /** An OutputReceiver that allows the output elements to be retrieved. */
+  public static class TestReceiver extends OutputReceiver {
+    List<Object> outputElems = new ArrayList<>();
+
+    public TestReceiver(CounterSet counterSet) {
+      this("test_receiver_out", counterSet);
+    }
+
+    public TestReceiver(Coder<?> coder) {
+      this(coder, new CounterSet());
+    }
+
+    public TestReceiver(Coder<?> coder, CounterSet counterSet) {
+      this("test_receiver_out",
+          new ElementByteSizeObservableCoder(coder),
+          counterSet,
+          "test-");
+    }
+
+    public TestReceiver(CounterSet counterSet, String counterPrefix) {
+      this("test_receiver_out", counterSet, counterPrefix);
+    }
+
+    public TestReceiver(String outputName, CounterSet counterSet) {
+      this(outputName, counterSet, "test-");
+    }
+
+    public TestReceiver(String outputName,
+                        CounterSet counterSet, String counterPrefix) {
+      this(outputName,
+           new ElementByteSizeObservableCoder(StringUtf8Coder.of()),
+           counterSet,
+           counterPrefix);
+    }
+
+    public TestReceiver(ElementByteSizeObservable elementByteSizeObservable,
+                        CounterSet counterSet, String counterPrefix) {
+      this("test_receiver_out", elementByteSizeObservable,
+           counterSet, counterPrefix);
+    }
+
+    public TestReceiver(String outputName,
+                        ElementByteSizeObservable elementByteSizeObservable,
+                        CounterSet counterSet, String counterPrefix) {
+      super(outputName,
+            elementByteSizeObservable,
+            counterPrefix,
+            counterSet.getAddCounterMutator());
+    }
+
+    @Override
+    public void process(Object elem) throws Exception {
+      super.process(elem);
+      outputElems.add(elem);
+    }
+
+    @Override
+    protected boolean sampleElement() {
+      return true;
+    }
+  }
+
+  /** A {@code Source<String>} that yields a specified set of values. */
+  public static class TestSource extends Source<String> {
+    List<String> inputs = new ArrayList<>();
+
+    public void addInput(String... inputs) {
+      this.inputs.addAll(Arrays.asList(inputs));
+    }
+
+    @Override
+    public SourceIterator<String> iterator() {
+      return new TestSourceIterator(inputs);
+    }
+
+    class TestSourceIterator extends AbstractSourceIterator<String> {
+      Iterator<String> iter;
+      boolean closed = false;
+
+      public TestSourceIterator(List<String> inputs) {
+        iter = inputs.iterator();
+      }
+
+      @Override
+      public boolean hasNext() { return iter.hasNext(); }
+
+      @Override
+      public String next() {
+        String next = iter.next();
+        notifyElementRead(next.length());
+        return next;
+      }
+
+      @Override
+      public void close() {
+        Assert.assertFalse(closed);
+        closed = true;
+      }
+    }
+  }
+
+  /**
+   * An Observer that stores all sizes into an ArrayList, to compare
+   * against the gold standard during testing.
+   */
+  public static class TestSourceObserver implements Observer {
+    private final Source source;
+    private final List<Integer> sizes;
+
+    public TestSourceObserver(Source source) {
+      this(source, new ArrayList<Integer>());
+    }
+
+    public TestSourceObserver(Source source, List<Integer> sizes) {
+      this.source = source;
+      this.sizes = sizes;
+      source.addObserver(this);
+    }
+
+    @Override
+    public void update(Observable obs, Object obj) {
+      sizes.add((int) (long) obj);
+    }
+
+    public List<Integer> getActualSizes() {
+      return sizes;
+    }
+  }
+
+  /** A {@code Sink<String>} that allows the output elements to be retrieved. */
+  public static class TestSink extends Sink<String> {
+    List<String> outputElems = new ArrayList<>();
+    boolean closed = false;
+
+    @Override
+    public SinkWriter<String> writer() {
+      return new TestSinkWriter();
+    }
+
+    class TestSinkWriter implements SinkWriter<String> {
+      @Override
+      public long add(String outputElem) {
+        outputElems.add(outputElem);
+        return outputElem.length();
+      }
+
+      @Override
+      public void close() {
+        Assert.assertFalse(closed);
+        closed = true;
+      }
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java
new file mode 100644
index 0000000000000..d0f8e747de7eb
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for FlattenOperation.
+ */
+@RunWith(JUnit4.class)
+public class FlattenOperationTest {
+  @Test
+  public void testRunFlattenOperation() throws Exception {
+    CounterSet counterSet = new CounterSet();
+    String counterPrefix = "test-";
+    StateSampler stateSampler = new StateSampler(
+        counterPrefix, counterSet.getAddCounterMutator());
+    ExecutorTestUtils.TestReceiver receiver =
+        new ExecutorTestUtils.TestReceiver(counterSet, counterPrefix);
+
+    FlattenOperation flattenOperation =
+        new FlattenOperation(receiver,
+                             counterPrefix, counterSet.getAddCounterMutator(),
+                             stateSampler);
+
+    flattenOperation.start();
+
+    flattenOperation.process("hi");
+    flattenOperation.process("there");
+    flattenOperation.process("");
+    flattenOperation.process("bob");
+
+    flattenOperation.finish();
+
+    Assert.assertThat(receiver.outputElems,
+                      CoreMatchers.<Object>hasItems("hi", "there", "", "bob"));
+
+    Assert.assertEquals(
+        new CounterSet(
+            Counter.longs("test-FlattenOperation-start-msecs", SUM)
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                    "test-FlattenOperation-start-msecs")).getAggregate(false)),
+            Counter.longs("test-FlattenOperation-process-msecs", SUM)
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                    "test-FlattenOperation-process-msecs")).getAggregate(false)),
+            Counter.longs("test-FlattenOperation-finish-msecs", SUM)
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                    "test-FlattenOperation-finish-msecs")).getAggregate(false)),
+            Counter.longs("test_receiver_out-ElementCount", SUM)
+                .resetToValue(4L),
+            Counter.longs("test_receiver_out-MeanByteCount", MEAN)
+                .resetToValue(4, 10L)),
+        counterSet);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
new file mode 100644
index 0000000000000..27017962ccc5f
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
@@ -0,0 +1,290 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToSourcePosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+
+import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
+import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReceiver;
+import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestSource;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests for MapTaskExecutor.
+ */
+@RunWith(JUnit4.class)
+public class MapTaskExecutorTest {
+  static class TestOperation extends Operation {
+    String label;
+    List<String> log;
+
+    private static CounterSet counterSet = new CounterSet();
+    private static String counterPrefix = "test-";
+    private static StateSampler stateSampler = new StateSampler(
+        counterPrefix, counterSet.getAddCounterMutator());
+
+    TestOperation(String label, List<String> log) {
+      super(label,
+            new OutputReceiver[]{},
+            counterPrefix,
+            counterSet.getAddCounterMutator(),
+            stateSampler);
+      this.label = label;
+      this.log = log;
+    }
+
+    TestOperation(String outputName,
+                  String counterPrefix,
+                  CounterSet.AddCounterMutator addCounterMutator,
+                  StateSampler stateSampler,
+                  long outputCount) {
+      super(outputName, new OutputReceiver[]{},
+            counterPrefix, addCounterMutator, stateSampler);
+      addCounterMutator.addCounter(
+          Counter.longs(outputName + "-ElementCount", SUM)
+          .resetToValue(outputCount));
+    }
+
+    @Override
+    public void start() throws Exception {
+      super.start();
+      log.add(label + " started");
+    }
+
+    @Override
+    public void finish() throws Exception {
+      log.add(label + " finished");
+      super.finish();
+    }
+  }
+
+  // A mock ReadOperation fed to a MapTaskExecutor in test.
+  static class TestReadOperation extends ReadOperation {
+    private ApproximateProgress progress = null;
+
+    TestReadOperation(OutputReceiver outputReceiver,
+                      String counterPrefix,
+                      AddCounterMutator addCounterMutator,
+                      StateSampler stateSampler) {
+      super(new TestSource(), outputReceiver,
+            counterPrefix, addCounterMutator, stateSampler);
+    }
+
+    @Override
+    public Source.Progress getProgress() {
+      return cloudProgressToSourceProgress(progress);
+    }
+
+    @Override
+    public Source.Position proposeStopPosition(
+        Source.Progress proposedStopPosition) {
+      // Fakes the return with the same position as proposed.
+      return cloudPositionToSourcePosition(
+          sourceProgressToCloudProgress(proposedStopPosition)
+          .getPosition());
+    }
+
+    public void setProgress(ApproximateProgress progress) {
+      this.progress = progress;
+    }
+  }
+
+  @Test
+  public void testExecuteMapTaskExecutor() throws Exception {
+    List<String> log = new ArrayList<>();
+
+    List<Operation> operations = Arrays.asList(new Operation[]{
+        new TestOperation("o1", log),
+        new TestOperation("o2", log),
+        new TestOperation("o3", log)});
+
+    CounterSet counters = new CounterSet();
+    String counterPrefix = "test-";
+    StateSampler stateSampler = new StateSampler(
+        counterPrefix, counters.getAddCounterMutator());
+    MapTaskExecutor executor =
+        new MapTaskExecutor(operations, counters, stateSampler);
+
+    executor.execute();
+
+    Assert.assertThat(log, CoreMatchers.hasItems(
+        "o3 started",
+        "o2 started",
+        "o1 started",
+        "o1 finished",
+        "o2 finished",
+        "o3 finished"));
+
+    executor.close();
+  }
+
+  @Test
+  public void testGetOutputCounters() throws Exception {
+    CounterSet counters = new CounterSet();
+    String counterPrefix = "test-";
+    StateSampler stateSampler = new StateSampler(
+        counterPrefix, counters.getAddCounterMutator());
+    List<Operation> operations = Arrays.asList(new Operation[]{
+          new TestOperation(
+              "o1", counterPrefix, counters.getAddCounterMutator(),
+              stateSampler, 1),
+          new TestOperation(
+              "o2", counterPrefix, counters.getAddCounterMutator(),
+              stateSampler, 2),
+          new TestOperation(
+              "o3", counterPrefix, counters.getAddCounterMutator(),
+              stateSampler, 3)});
+
+    MapTaskExecutor executor =
+        new MapTaskExecutor(operations, counters, stateSampler);
+
+    CounterSet counterSet = executor.getOutputCounters();
+    Assert.assertEquals(
+        new CounterSet(
+            Counter.longs("o1-ElementCount", SUM).resetToValue(1L),
+            Counter.longs("test-o1-start-msecs", SUM)
+              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                  "test-o1-start-msecs")).getAggregate(false)),
+            Counter.longs("test-o1-process-msecs", SUM)
+              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                  "test-o1-process-msecs")).getAggregate(false)),
+            Counter.longs("test-o1-finish-msecs", SUM)
+              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                  "test-o1-finish-msecs")).getAggregate(false)),
+            Counter.longs("o2-ElementCount", SUM).resetToValue(2L),
+            Counter.longs("test-o2-start-msecs", SUM)
+              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                  "test-o2-start-msecs")).getAggregate(false)),
+            Counter.longs("test-o2-process-msecs", SUM)
+              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                  "test-o2-process-msecs")).getAggregate(false)),
+            Counter.longs("test-o2-finish-msecs", SUM)
+              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                  "test-o2-finish-msecs")).getAggregate(false)),
+            Counter.longs("o3-ElementCount", SUM).resetToValue(3L),
+            Counter.longs("test-o3-start-msecs", SUM)
+              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                  "test-o3-start-msecs")).getAggregate(false)),
+            Counter.longs("test-o3-process-msecs", SUM)
+              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                  "test-o3-process-msecs")).getAggregate(false)),
+            Counter.longs("test-o3-finish-msecs", SUM)
+              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                  "test-o3-finish-msecs")).getAggregate(false))),
+        counterSet);
+
+    executor.close();
+  }
+
+  @Test
+  public void testGetReadOperation() throws Exception {
+    CounterSet counterSet = new CounterSet();
+    String counterPrefix = "test-";
+    StateSampler stateSampler = new StateSampler(
+        counterPrefix, counterSet.getAddCounterMutator());
+    // Test MapTaskExecutor without a single operation.
+    MapTaskExecutor executor =
+        new MapTaskExecutor(new ArrayList<Operation>(),
+                            counterSet, stateSampler);
+
+    try {
+      ReadOperation readOperation = executor.getReadOperation();
+      Assert.fail("Expected IllegalStateException.");
+    } catch (IllegalStateException e) {
+      // Exception expected
+    }
+
+    List<Operation> operations = Arrays.asList(new Operation[]{
+        new TestOperation("o1",
+                          counterPrefix, counterSet.getAddCounterMutator(),
+                          stateSampler, 1),
+        new TestOperation("o2",
+                          counterPrefix, counterSet.getAddCounterMutator(),
+                          stateSampler, 2)});
+    // Test MapTaskExecutor without ReadOperation.
+    executor = new MapTaskExecutor(operations, counterSet, stateSampler);
+
+    try {
+      ReadOperation readOperation = executor.getReadOperation();
+      Assert.fail("Expected IllegalStateException.");
+    } catch (IllegalStateException e) {
+      // Exception expected
+    }
+
+    executor.close();
+
+    TestReceiver receiver = new TestReceiver(counterSet, counterPrefix);
+    operations = Arrays.asList(new Operation[]{
+        new TestReadOperation(
+            receiver, counterPrefix, counterSet.getAddCounterMutator(),
+            stateSampler)});
+    executor = new MapTaskExecutor(operations, counterSet, stateSampler);
+    Assert.assertEquals(operations.get(0), executor.getReadOperation());
+    executor.close();
+  }
+
+  @Test
+  public void testGetProgressAndRequestSplit() throws Exception {
+    CounterSet counterSet = new CounterSet();
+    String counterPrefix = "test-";
+    StateSampler stateSampler = new StateSampler(
+        counterPrefix, counterSet.getAddCounterMutator());
+    TestReceiver receiver = new TestReceiver(counterSet, counterPrefix);
+    TestReadOperation operation =
+        new TestReadOperation(receiver,
+                              counterPrefix, counterSet.getAddCounterMutator(),
+                              stateSampler);
+    MapTaskExecutor executor = new MapTaskExecutor(
+        Arrays.asList(new Operation[]{operation}), counterSet, stateSampler);
+
+    operation.setProgress(new ApproximateProgress().setPosition(makePosition(1L)));
+    Assert.assertEquals(
+        makePosition(1L),
+        sourceProgressToCloudProgress(executor.getWorkerProgress()).getPosition());
+    Assert.assertEquals(
+        makePosition(1L),
+        sourcePositionToCloudPosition(
+            executor.proposeStopPosition(
+                cloudProgressToSourceProgress(
+                    new ApproximateProgress().setPosition(makePosition(1L))))));
+
+    executor.close();
+  }
+
+  private com.google.api.services.dataflow.model.Position makePosition(long index) {
+    com.google.api.services.dataflow.model.Position position =
+        new com.google.api.services.dataflow.model.Position();
+    position.setRecordIndex(index);
+    return position;
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java
new file mode 100644
index 0000000000000..08955ac564d7c
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.ElementByteSizeObservableCoder;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.CounterTestUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReceiver;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for OutputReceiver.
+ */
+@RunWith(JUnit4.class)
+public class OutputReceiverTest {
+  // We test OutputReceiver where every element is sampled.
+  static class TestOutputReceiver extends OutputReceiver {
+    public TestOutputReceiver() {
+      this(new CounterSet());
+    }
+
+    public TestOutputReceiver(CounterSet counters) {
+      super("output_name",
+            new ElementByteSizeObservableCoder(StringUtf8Coder.of()),
+            "test-",
+            counters.getAddCounterMutator());
+    }
+
+    @Override
+    protected boolean sampleElement() {
+      return true;
+    }
+  }
+
+  @Test
+  public void testEmptyOutputReceiver() throws Exception {
+    TestOutputReceiver fanOut = new TestOutputReceiver();
+    fanOut.process("hi");
+    fanOut.process("bob");
+
+    Assert.assertEquals("output_name", fanOut.getName());
+    Assert.assertEquals(
+        2,
+        (long) CounterTestUtils.getTotalAggregate(fanOut.getElementCount()));
+    Assert.assertEquals(
+        5,
+        (long) CounterTestUtils.getTotalAggregate(fanOut.getMeanByteCount()));
+    Assert.assertEquals(
+        2,
+        (long) CounterTestUtils.getTotalCount(fanOut.getMeanByteCount()));
+  }
+
+  @Test
+  public void testMultipleOutputReceiver() throws Exception {
+    TestOutputReceiver fanOut = new TestOutputReceiver();
+
+    CounterSet counters = new CounterSet();
+    String counterPrefix = "test-";
+
+    TestReceiver receiver1 = new TestReceiver(counters, counterPrefix);
+    fanOut.addOutput(receiver1);
+
+    TestReceiver receiver2 = new TestReceiver(counters, counterPrefix);
+    fanOut.addOutput(receiver2);
+
+    fanOut.process("hi");
+    fanOut.process("bob");
+
+    Assert.assertEquals("output_name", fanOut.getName());
+    Assert.assertEquals(
+        2,
+        (long) CounterTestUtils.getTotalAggregate(fanOut.getElementCount()));
+    Assert.assertEquals(
+        5,
+        (long) CounterTestUtils.getTotalAggregate(fanOut.getMeanByteCount()));
+    Assert.assertEquals(
+        2,
+        (long) CounterTestUtils.getTotalCount(fanOut.getMeanByteCount()));
+    Assert.assertThat(receiver1.outputElems,
+                      CoreMatchers.<Object>hasItems("hi", "bob"));
+    Assert.assertThat(receiver2.outputElems,
+                      CoreMatchers.<Object>hasItems("hi", "bob"));
+  }
+
+  @Test(expected = ClassCastException.class)
+  public void testIncorrectType() throws Exception {
+    TestOutputReceiver fanOut = new TestOutputReceiver();
+    fanOut.process(5);
+  }
+
+  @Test(expected = CoderException.class)
+  public void testNullArgument() throws Exception {
+    TestOutputReceiver fanOut = new TestOutputReceiver();
+    fanOut.process(null);
+  }
+
+  @Test
+  public void testAddingCountersIntoCounterSet() throws Exception {
+    CounterSet counters = new CounterSet();
+    TestOutputReceiver receiver = new TestOutputReceiver(counters);
+
+    Assert.assertEquals(
+        new CounterSet(
+            Counter.longs("output_name-ElementCount", SUM)
+                .resetToValue(0L),
+            Counter.longs("output_name-MeanByteCount", MEAN)
+                .resetToValue(0, 0L)),
+        counters);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
new file mode 100644
index 0000000000000..b08266cbb4d82
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for ParDoOperation.
+ */
+@RunWith(JUnit4.class)
+public class ParDoOperationTest {
+  static class TestParDoFn extends ParDoFn {
+    final OutputReceiver outputReceiver;
+
+    public TestParDoFn(OutputReceiver outputReceiver) {
+      this.outputReceiver = outputReceiver;
+    }
+
+    @Override
+    public void startBundle(final Receiver... receivers) throws Exception {
+      if (receivers.length != 1) {
+        throw new AssertionError(
+            "unexpected number of receivers for DoFn");
+      }
+
+      outputReceiver.process("x-start");
+    }
+
+    @Override
+    public void processElement(Object elem) throws Exception {
+      outputReceiver.process("y-" + elem);
+    }
+
+    @Override
+    public void finishBundle() throws Exception {
+      outputReceiver.process("z-finish");
+    }
+  }
+
+  @Test
+  public void testRunParDoOperation() throws Exception {
+    CounterSet counterSet = new CounterSet();
+    String counterPrefix = "test-";
+    StateSampler stateSampler = new StateSampler(
+        counterPrefix, counterSet.getAddCounterMutator());
+    ExecutorTestUtils.TestReceiver receiver =
+        new ExecutorTestUtils.TestReceiver(counterSet);
+
+    ParDoOperation parDoOperation =
+        new ParDoOperation(
+            "ParDoOperation",
+            new TestParDoFn(receiver),
+            new OutputReceiver[]{ receiver },
+            counterPrefix,
+            counterSet.getAddCounterMutator(),
+            stateSampler);
+
+    parDoOperation.start();
+
+    parDoOperation.process("hi");
+    parDoOperation.process("there");
+    parDoOperation.process("");
+    parDoOperation.process("bob");
+
+    parDoOperation.finish();
+
+    Assert.assertThat(
+        receiver.outputElems,
+        CoreMatchers.<Object>hasItems(
+            "x-start", "y-hi", "y-there", "y-", "y-bob", "z-finish"));
+
+    Assert.assertEquals(
+        new CounterSet(
+            Counter.longs("test-ParDoOperation-start-msecs", SUM)
+              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                  "test-ParDoOperation-start-msecs")).getAggregate(false)),
+            Counter.longs("test-ParDoOperation-process-msecs", SUM)
+              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                  "test-ParDoOperation-process-msecs")).getAggregate(false)),
+            Counter.longs("test-ParDoOperation-finish-msecs", SUM)
+              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                  "test-ParDoOperation-finish-msecs")).getAggregate(false)),
+            Counter.longs("test_receiver_out-ElementCount", SUM)
+                .resetToValue(6L),
+            Counter.longs("test_receiver_out-MeanByteCount", MEAN)
+                .resetToValue(6, 33L)),
+        counterSet);
+  }
+
+  // TODO: Test side inputs.
+  // TODO: Test side outputs.
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
new file mode 100644
index 0000000000000..620ac0c89894f
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
@@ -0,0 +1,397 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+import static org.hamcrest.Matchers.anyOf;
+import static org.hamcrest.Matchers.empty;
+import static org.hamcrest.Matchers.hasItem;
+import static org.hamcrest.Matchers.isIn;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.CoderGroupingKeyCreator;
+import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.CoderSizeEstimator;
+import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.ElementByteSizeObservableCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.PairInfo;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReceiver;
+import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.BufferingGroupingTable;
+import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.Combiner;
+import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.CombiningGroupingTable;
+import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.GroupingKeyCreator;
+import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.SamplingSizeEstimator;
+import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.SizeEstimator;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import org.hamcrest.Description;
+import org.hamcrest.TypeSafeDiagnosingMatcher;
+import org.hamcrest.collection.IsIterableContainingInAnyOrder;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+
+/**
+ * Tests for PartialGroupByKeyOperation.
+ */
+@RunWith(JUnit4.class)
+public class PartialGroupByKeyOperationTest {
+  @Test
+  public void testRunPartialGroupByKeyOperation() throws Exception {
+    Coder keyCoder = StringUtf8Coder.of();
+    Coder valueCoder = BigEndianIntegerCoder.of();
+
+    CounterSet counterSet = new CounterSet();
+    String counterPrefix = "test-";
+    StateSampler stateSampler = new StateSampler(
+        counterPrefix, counterSet.getAddCounterMutator());
+    TestReceiver receiver =
+        new TestReceiver(
+            new ElementByteSizeObservableCoder(
+                WindowedValue.getValueOnlyCoder(
+                    KvCoder.of(keyCoder, IterableCoder.of(valueCoder)))),
+            counterSet, counterPrefix);
+
+    PartialGroupByKeyOperation pgbkOperation =
+        new PartialGroupByKeyOperation(new CoderGroupingKeyCreator(keyCoder),
+                                       new CoderSizeEstimator(keyCoder),
+                                       new CoderSizeEstimator(valueCoder),
+                                       PairInfo.create(),
+                                       receiver,
+                                       counterPrefix,
+                                       counterSet.getAddCounterMutator(),
+                                       stateSampler);
+
+    pgbkOperation.start();
+
+    pgbkOperation.process(WindowedValue.valueInEmptyWindows(KV.of("hi", 4)));
+    pgbkOperation.process(WindowedValue.valueInEmptyWindows(KV.of("there", 5)));
+    pgbkOperation.process(WindowedValue.valueInEmptyWindows(KV.of("hi", 6)));
+    pgbkOperation.process(WindowedValue.valueInEmptyWindows(KV.of("joe", 7)));
+    pgbkOperation.process(WindowedValue.valueInEmptyWindows(KV.of("there", 8)));
+    pgbkOperation.process(WindowedValue.valueInEmptyWindows(KV.of("hi", 9)));
+
+    pgbkOperation.finish();
+
+    assertThat(receiver.outputElems,
+               IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(
+                   WindowedValue.valueInEmptyWindows(KV.of("hi", Arrays.asList(4, 6, 9))),
+                   WindowedValue.valueInEmptyWindows(KV.of("there", Arrays.asList(5, 8))),
+                   WindowedValue.valueInEmptyWindows(KV.of("joe", Arrays.asList(7)))));
+
+    // Exact counter values depend on size of encoded data.  If encoding
+    // changes, then these expected counters should change to match.
+    assertEquals(
+        new CounterSet(
+            Counter.longs("test-PartialGroupByKeyOperation-start-msecs", SUM)
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                    "test-PartialGroupByKeyOperation-start-msecs")).getAggregate(false)),
+            Counter.longs("test-PartialGroupByKeyOperation-process-msecs", SUM)
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                    "test-PartialGroupByKeyOperation-process-msecs")).getAggregate(false)),
+            Counter.longs("test-PartialGroupByKeyOperation-finish-msecs", SUM)
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                    "test-PartialGroupByKeyOperation-finish-msecs")).getAggregate(false)),
+            Counter.longs("test_receiver_out-ElementCount", SUM)
+                .resetToValue(3L),
+            Counter.longs("test_receiver_out-MeanByteCount", MEAN)
+                .resetToValue(3, 49L)),
+        counterSet);
+  }
+
+  // TODO: Add tests about early flushing when the table fills.
+
+  ////////////////////////////////////////////////////////////////////////////
+  // Tests for PartialGroupByKey internals.
+
+  /**
+   * Return the key as its grouping key.
+   */
+  public static class IdentityGroupingKeyCreator implements GroupingKeyCreator<Object> {
+    @Override
+    public Object createGroupingKey(Object key) {
+      return key;
+    }
+  }
+
+  /**
+   * "Estimate" the size of longs by looking at their value.
+   */
+  private static class IdentitySizeEstimator implements SizeEstimator<Long> {
+    public int calls = 0;
+    @Override
+    public long estimateSize(Long element) {
+      calls++;
+      return element;
+    }
+  }
+
+  /**
+   * "Estimate" the size of strings by taking the tenth power of their length.
+   */
+  private static class StringPowerSizeEstimator implements SizeEstimator<String> {
+    @Override
+    public long estimateSize(String element) {
+      return (long) Math.pow(10, element.length());
+    }
+  }
+
+  @Test
+  public void testBufferingGroupingTable() throws Exception {
+    BufferingGroupingTable<String, String> table =
+        new BufferingGroupingTable<>(
+            1000, new IdentityGroupingKeyCreator(), PairInfo.create(),
+            new StringPowerSizeEstimator(), new StringPowerSizeEstimator());
+    TestReceiver receiver = new TestReceiver(
+        WindowedValue.getValueOnlyCoder(
+            KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(StringUtf8Coder.of()))));
+
+    table.put("A", "a", receiver);
+    table.put("B", "b1", receiver);
+    table.put("B", "b2", receiver);
+    table.put("C", "c", receiver);
+    assertThat(unwindowed(receiver.outputElems), empty());
+
+    table.put("C", "cccc", receiver);
+    assertThat(unwindowed(receiver.outputElems),
+               hasItem((Object) KV.of("C", Arrays.asList("c", "cccc"))));
+
+    table.put("DDDD", "d", receiver);
+    assertThat(unwindowed(receiver.outputElems),
+               hasItem((Object) KV.of("DDDD", Arrays.asList("d"))));
+
+    table.flush(receiver);
+    assertThat(unwindowed(receiver.outputElems),
+               IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(
+                   KV.of("A", Arrays.asList("a")),
+                   KV.of("B", Arrays.asList("b1", "b2")),
+                   KV.of("C", Arrays.asList("c", "cccc")),
+                   KV.of("DDDD", Arrays.asList("d"))));
+  }
+
+  @Test
+  public void testCombiningGroupingTable() throws Exception {
+    Combiner<Object, Integer, Long, Long> summingCombineFn =
+        new Combiner<Object, Integer, Long, Long>() {
+          public Long createAccumulator(Object key) {
+            return 0L;
+          }
+          public Long add(Object key, Long accumulator, Integer value) {
+            return accumulator + value;
+          }
+          public Long merge(Object key, Iterable<Long> accumulators) {
+            long sum = 0;
+            for (Long part : accumulators) { sum += part; }
+            return sum;
+          }
+          public Long extract(Object key, Long accumulator) {
+            return accumulator;
+          }
+        };
+
+    CombiningGroupingTable<String, Integer, Long> table =
+        new CombiningGroupingTable<String, Integer, Long>(
+            1000, new IdentityGroupingKeyCreator(), PairInfo.create(),
+            summingCombineFn,
+            new StringPowerSizeEstimator(), new IdentitySizeEstimator());
+
+    TestReceiver receiver = new TestReceiver(
+        WindowedValue.getValueOnlyCoder(
+            KvCoder.of(StringUtf8Coder.of(), BigEndianLongCoder.of())));
+
+    table.put("A", 1, receiver);
+    table.put("B", 2, receiver);
+    table.put("B", 3, receiver);
+    table.put("C", 4, receiver);
+    assertThat(unwindowed(receiver.outputElems), empty());
+
+    table.put("C", 5000, receiver);
+    assertThat(unwindowed(receiver.outputElems), hasItem((Object) KV.of("C", 5004L)));
+
+    table.put("DDDD", 6, receiver);
+    assertThat(unwindowed(receiver.outputElems), hasItem((Object) KV.of("DDDD", 6L)));
+
+    table.flush(receiver);
+    assertThat(unwindowed(receiver.outputElems),
+               IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(
+                   KV.of("A", 1L),
+                   KV.of("B", 2L + 3),
+                   KV.of("C", 5000L + 4),
+                   KV.of("DDDD", 6L)));
+  }
+
+  private List<Object> unwindowed(Iterable<Object> windowed) {
+    List<Object> unwindowed = new ArrayList<>();
+    for (Object withWindow : windowed) {
+      unwindowed.add(((WindowedValue<?>) withWindow).getValue());
+    }
+    return unwindowed;
+  }
+
+
+  ////////////////////////////////////////////////////////////////////////////
+  // Tests for the sampling size estimator.
+
+  @Test
+  public void testSampleFlatSizes() throws Exception {
+    IdentitySizeEstimator underlying = new IdentitySizeEstimator();
+    SizeEstimator<Long> estimator =
+        new SamplingSizeEstimator<Long>(underlying, 0.05, 1.0, 10, new Random(1));
+    // First 10 elements are always sampled.
+    for (int k = 0; k < 10; k++) {
+      assertEquals(100, estimator.estimateSize(100L));
+      assertEquals(k + 1, underlying.calls);
+    }
+    // Next 10 are sometimes sampled.
+    for (int k = 10; k < 20; k++) {
+      assertEquals(100, estimator.estimateSize(100L));
+    }
+    assertThat(underlying.calls, between(11, 19));
+    int initialCalls = underlying.calls;
+    // Next 1000 are sampled at about 5%.
+    for (int k = 20; k < 1020; k++) {
+      assertEquals(100, estimator.estimateSize(100L));
+    }
+    assertThat(underlying.calls - initialCalls, between(40, 60));
+  }
+
+  @Test
+  public void testSampleBoringSizes() throws Exception {
+    IdentitySizeEstimator underlying = new IdentitySizeEstimator();
+    SizeEstimator<Long> estimator =
+        new SamplingSizeEstimator<Long>(underlying, 0.05, 1.0, 10, new Random(1));
+    // First 10 elements are always sampled.
+    for (int k = 0; k < 10; k += 2) {
+      assertEquals(100, estimator.estimateSize(100L));
+      assertEquals(102, estimator.estimateSize(102L));
+      assertEquals(k + 2, underlying.calls);
+    }
+    // Next 10 are sometimes sampled.
+    for (int k = 10; k < 20; k += 2) {
+      assertThat(estimator.estimateSize(100L), between(100L, 102L));
+      assertThat(estimator.estimateSize(102L), between(100L, 102L));
+    }
+    assertThat(underlying.calls, between(11, 19));
+    int initialCalls = underlying.calls;
+    // Next 1000 are sampled at about 5%.
+    for (int k = 20; k < 1020; k += 2) {
+      assertThat(estimator.estimateSize(100L), between(100L, 102L));
+      assertThat(estimator.estimateSize(102L), between(100L, 102L));
+    }
+    assertThat(underlying.calls - initialCalls, between(40, 60));
+  }
+
+  @Test
+  public void testSampleHighVarianceSizes() throws Exception {
+    // The largest element is much larger than the average.
+    List<Long> sizes = Arrays.asList(1L, 10L, 100L, 1000L);
+    IdentitySizeEstimator underlying = new IdentitySizeEstimator();
+    SizeEstimator<Long> estimator =
+        new SamplingSizeEstimator<Long>(underlying, 0.1, 0.2, 10, new Random(1));
+    // First 10 elements are always sampled.
+    for (int k = 0; k < 10; k++) {
+      long size = sizes.get(k % sizes.size());
+      assertEquals(size, estimator.estimateSize(size));
+      assertEquals(k + 1, underlying.calls);
+    }
+    // We're still not out of the woods; sample every element.
+    for (int k = 10; k < 20; k++) {
+      long size = sizes.get(k % sizes.size());
+      assertEquals(size, estimator.estimateSize(size));
+      assertEquals(k + 1, underlying.calls);
+    }
+    // Sample some more to let things settle down.
+    for (int k = 20; k < 500; k++) {
+      estimator.estimateSize(sizes.get(k % sizes.size()));
+    }
+    // Next 1000 are sampled at about 20% (maxSampleRate).
+    int initialCalls = underlying.calls;
+    for (int k = 500; k < 1500; k++) {
+      long size = sizes.get(k % sizes.size());
+      assertThat(estimator.estimateSize(size),
+                 anyOf(isIn(sizes), between(250L, 350L)));
+    }
+    assertThat(underlying.calls - initialCalls, between(180, 220));
+    // Sample some more to let things settle down.
+    for (int k = 1500; k < 3000; k++) {
+      estimator.estimateSize(sizes.get(k % sizes.size()));
+    }
+    // Next 1000 are sampled at about 10% (minSampleRate).
+    initialCalls = underlying.calls;
+    for (int k = 3000; k < 4000; k++) {
+      long size = sizes.get(k % sizes.size());
+      assertThat(estimator.estimateSize(size),
+                 anyOf(isIn(sizes), between(250L, 350L)));
+    }
+    assertThat(underlying.calls - initialCalls, between(90, 110));
+  }
+
+  @Test
+  public void testSampleChangingSizes() throws Exception {
+    IdentitySizeEstimator underlying = new IdentitySizeEstimator();
+    SizeEstimator<Long> estimator =
+        new SamplingSizeEstimator<Long>(underlying, 0.05, 1.0, 10, new Random(1));
+    // First 10 elements are always sampled.
+    for (int k = 0; k < 10; k++) {
+      assertEquals(100, estimator.estimateSize(100L));
+      assertEquals(k + 1, underlying.calls);
+    }
+    // Next 10 are sometimes sampled.
+    for (int k = 10; k < 20; k++) {
+      assertEquals(100, estimator.estimateSize(100L));
+    }
+    assertThat(underlying.calls, between(11, 19));
+    int initialCalls = underlying.calls;
+    // Next 1000 are sampled at about 5%.
+    for (int k = 20; k < 1020; k++) {
+      assertEquals(100, estimator.estimateSize(100L));
+    }
+    assertThat(underlying.calls - initialCalls, between(40, 60));
+    // Inject a big element until it is sampled.
+    while (estimator.estimateSize(1000000L) == 100) { }
+    // Check that we have started sampling more regularly again.
+    assertEquals(99, estimator.estimateSize(99L));
+  }
+
+  private static <T extends Comparable<T>> TypeSafeDiagnosingMatcher<T>
+      between(final T min, final T max) {
+    return new TypeSafeDiagnosingMatcher<T>() {
+      @Override
+      public void describeTo(Description description) {
+        description.appendText("is between " + min + " and " + max);
+      }
+      @Override
+      protected boolean matchesSafely(T item, Description mismatchDescription) {
+        return min.compareTo(item) <= 0 && item.compareTo(max) <= 0;
+      }
+    };
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
new file mode 100644
index 0000000000000..b3e29f8e5cf55
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -0,0 +1,303 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.CoreMatchers.everyItem;
+import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
+
+import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.Position;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReceiver;
+import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestSource;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.NoSuchElementException;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+
+/**
+ * Tests for ReadOperation.
+ */
+@RunWith(JUnit4.class)
+public class ReadOperationTest {
+  private static final long ITERATIONS = 3L;
+
+  /**
+   * The test Source for testing updating stop position and progress report.
+   * The number of read iterations is controlled by ITERATIONS.
+   */
+  static class TestTextSource extends Source<String> {
+    @Override
+    public SourceIterator<String> iterator() {
+      return new TestTextSourceIterator();
+    }
+
+    class TestTextSourceIterator extends AbstractSourceIterator<String> {
+      long offset = 0L;
+      List<com.google.api.services.dataflow.model.Position> proposedPositions =
+          new ArrayList<>();
+
+      @Override
+      public boolean hasNext() {
+        return offset < ITERATIONS;
+      }
+
+      @Override
+      public String next() {
+        if (hasNext()) {
+          offset++;
+          return "hi";
+        } else {
+          throw new AssertionError("No next Element.");
+        }
+      }
+
+      @Override
+      public Progress getProgress() {
+        com.google.api.services.dataflow.model.Position currentPosition =
+            new com.google.api.services.dataflow.model.Position();
+        currentPosition.setByteOffset(offset);
+
+        ApproximateProgress progress = new ApproximateProgress();
+        progress.setPosition(currentPosition);
+
+        return cloudProgressToSourceProgress(progress);
+      }
+
+      @Override
+      public Position updateStopPosition(Progress proposedStopPosition) {
+        proposedPositions.add(sourceProgressToCloudProgress(proposedStopPosition).getPosition());
+        // Actually no update happens, returns null.
+        return null;
+      }
+    }
+  }
+
+  /**
+   * The OutputReceiver for testing updating stop position and progress report.
+   * The offset of the Source (iterator) will be advanced each time this
+   * Receiver processes a record.
+   */
+  static class TestTextReceiver extends OutputReceiver {
+    ReadOperation readOperation = null;
+    com.google.api.services.dataflow.model.Position proposedStopPosition = null;
+    List<ApproximateProgress> progresses = new ArrayList<>();
+
+    public TestTextReceiver(CounterSet counterSet, String counterPrefix) {
+      super("test_receiver_out", counterPrefix, counterSet.getAddCounterMutator());
+    }
+
+    public void setReadOperation(ReadOperation readOp) {
+      this.readOperation = readOp;
+    }
+
+    public void setProposedStopPosition(com.google.api.services.dataflow.model.Position position) {
+      this.proposedStopPosition = position;
+    }
+
+    @Override
+    public void process(Object outputElem) throws Exception {
+      // Calls getProgress() and proposeStopPosition() in each iteration.
+      progresses.add(sourceProgressToCloudProgress(readOperation.getProgress()));
+      // We expect that call to proposeStopPosition is a no-op that does not
+      // update the stop position for every iteration. We will verify it is
+      // delegated to SourceIterator after ReadOperation finishes.
+      Assert.assertNull(
+          readOperation.proposeStopPosition(
+               cloudProgressToSourceProgress(makeApproximateProgress(proposedStopPosition))));
+    }
+  }
+
+  @Test
+  public void testRunReadOperation() throws Exception {
+    TestSource source = new TestSource();
+    source.addInput("hi", "there", "", "bob");
+
+    CounterSet counterSet = new CounterSet();
+    String counterPrefix = "test-";
+    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
+    TestReceiver receiver = new TestReceiver(counterSet, counterPrefix);
+
+    ReadOperation readOperation = new ReadOperation(
+        source, receiver, counterPrefix, counterSet.getAddCounterMutator(), stateSampler);
+
+    readOperation.start();
+    readOperation.finish();
+
+    Assert.assertThat(
+        receiver.outputElems, CoreMatchers.<Object>hasItems("hi", "there", "", "bob"));
+
+    Assert.assertEquals(
+        new CounterSet(
+            Counter.longs("ReadOperation-ByteCount", SUM).resetToValue(2L + 5 + 0 + 3),
+            Counter.longs("test_receiver_out-ElementCount", SUM).resetToValue(4L),
+            Counter.longs("test_receiver_out-MeanByteCount", MEAN).resetToValue(4, 10L),
+            Counter.longs("test-ReadOperation-start-msecs", SUM)
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                                   "test-ReadOperation-start-msecs")).getAggregate(false)),
+            Counter.longs("test-ReadOperation-read-msecs", SUM)
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                                   "test-ReadOperation-read-msecs")).getAggregate(false)),
+            Counter.longs("test-ReadOperation-process-msecs", SUM)
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                                   "test-ReadOperation-process-msecs")).getAggregate(false)),
+            Counter.longs("test-ReadOperation-finish-msecs", SUM)
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                                   "test-ReadOperation-finish-msecs")).getAggregate(false))),
+        counterSet);
+  }
+
+  @Test
+  public void testGetProgressAndProposeStopPosition() throws Exception {
+    TestTextSource testSource = new TestTextSource();
+    CounterSet counterSet = new CounterSet();
+    String counterPrefix = "test-";
+    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
+    TestTextReceiver receiver = new TestTextReceiver(counterSet, counterPrefix);
+    ReadOperation readOperation = new ReadOperation(
+        testSource, receiver, counterPrefix, counterSet.getAddCounterMutator(), stateSampler);
+    readOperation.setProgressUpdatePeriodMs(0);
+    receiver.setReadOperation(readOperation);
+
+    Position proposedStopPosition = makePosition(3L);
+    receiver.setProposedStopPosition(proposedStopPosition);
+
+    Assert.assertNull(readOperation.getProgress());
+    Assert.assertNull(readOperation.proposeStopPosition(
+                          cloudProgressToSourceProgress(
+                              makeApproximateProgress(proposedStopPosition))));
+
+    readOperation.start();
+    readOperation.finish();
+
+    TestTextSource.TestTextSourceIterator testIterator =
+        (TestTextSource.TestTextSourceIterator) readOperation.sourceIterator;
+
+    Assert.assertEquals(sourceProgressToCloudProgress(testIterator.getProgress()),
+                        sourceProgressToCloudProgress(readOperation.getProgress()));
+    Assert.assertEquals(sourcePositionToCloudPosition(testIterator.updateStopPosition(
+                            cloudProgressToSourceProgress(
+                                makeApproximateProgress(proposedStopPosition)))),
+                        sourcePositionToCloudPosition(readOperation.proposeStopPosition(
+                            cloudProgressToSourceProgress(
+                                makeApproximateProgress(proposedStopPosition)))));
+
+    // Verifies progress report and stop position updates.
+    Assert.assertEquals(testIterator.proposedPositions.size(), ITERATIONS + 2);
+    Assert.assertThat(
+        testIterator.proposedPositions, everyItem(equalTo(makePosition(3L))));
+    Assert.assertThat(
+        receiver.progresses, contains(makeApproximateProgress(1L), makeApproximateProgress(2L),
+            makeApproximateProgress(3L)));
+  }
+
+  @Test
+  public void testGetProgressDoesNotBlock() throws Exception {
+    final BlockingQueue<Integer> queue = new LinkedBlockingQueue<>();
+    final Source.SourceIterator<Integer> iterator = new Source.AbstractSourceIterator<Integer>() {
+      private int itemsReturned = 0;
+
+      @Override
+      public boolean hasNext() throws IOException {
+        return itemsReturned < 5;
+      }
+
+      @Override
+      public Integer next() throws IOException {
+        ++itemsReturned;
+        try {
+          return queue.take();
+        } catch (InterruptedException e) {
+          throw new NoSuchElementException("interrupted");
+        }
+      }
+
+      @Override
+      public Source.Progress getProgress() {
+        return cloudProgressToSourceProgress(new ApproximateProgress().setPosition(
+            new Position().setRecordIndex((long) itemsReturned)));
+      }
+    };
+
+    Source<Integer> source = new Source<Integer>() {
+      @Override
+      public SourceIterator<Integer> iterator() throws IOException {
+        return iterator;
+      }
+    };
+
+    CounterSet counterSet = new CounterSet();
+    String counterPrefix = "test-";
+    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
+    TestTextReceiver receiver = new TestTextReceiver(counterSet, counterPrefix);
+    final ReadOperation readOperation = new ReadOperation(
+        source, receiver, counterPrefix, counterSet.getAddCounterMutator(), stateSampler);
+    // Update progress not continuously, but so that it's never more than 1 record stale.
+    readOperation.setProgressUpdatePeriodMs(150);
+    receiver.setReadOperation(readOperation);
+
+    new Thread() {
+      @Override
+      public void run() {
+        try {
+          readOperation.start();
+          readOperation.finish();
+        } catch (Exception e) {
+          e.printStackTrace();
+        }
+      }
+    }.start();
+
+    for (int i = 0; i < 5; ++i) {
+      Thread.sleep(100); // Wait for the operation to start and block.
+      // Ensure that getProgress() doesn't block.
+      ApproximateProgress progress = sourceProgressToCloudProgress(readOperation.getProgress());
+      long observedIndex = progress.getPosition().getRecordIndex().longValue();
+      Assert.assertTrue("Actual: " + observedIndex, i == observedIndex || i == observedIndex + 1);
+      queue.offer(i);
+    }
+  }
+
+  private static Position makePosition(long offset) {
+    return new Position().setByteOffset(offset);
+  }
+
+  private static ApproximateProgress makeApproximateProgress(long offset) {
+    return makeApproximateProgress(makePosition(offset));
+  }
+  
+  private static ApproximateProgress makeApproximateProgress(
+      com.google.api.services.dataflow.model.Position position) {
+    return new ApproximateProgress().setPosition(position);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryTest.java
new file mode 100644
index 0000000000000..10e3b4da63f4c
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryTest.java
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.not;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Unit tests for {@link ShuffleEntry}. */
+@RunWith(JUnit4.class)
+public class ShuffleEntryTest {
+  private static final byte[] KEY = {0xA};
+  private static final byte[] SKEY = {0xB};
+  private static final byte[] VALUE = {0xC};
+
+  @Test
+  public void accessors() {
+    ShuffleEntry entry = new ShuffleEntry(KEY, SKEY, VALUE);
+    assertThat(entry.getKey(), equalTo(KEY));
+    assertThat(entry.getSecondaryKey(), equalTo(SKEY));
+    assertThat(entry.getValue(), equalTo(VALUE));
+  }
+
+  @Test
+  public void equalsToItself() {
+    ShuffleEntry entry = new ShuffleEntry(KEY, SKEY, VALUE);
+    assertTrue(entry.equals(entry));
+  }
+
+  @Test
+  public void equalsForEqualEntries() {
+    ShuffleEntry entry0 = new ShuffleEntry(KEY, SKEY, VALUE);
+    ShuffleEntry entry1 = new ShuffleEntry(
+        KEY.clone(), SKEY.clone(), VALUE.clone());
+
+    assertTrue(entry0.equals(entry1));
+    assertTrue(entry1.equals(entry0));
+    assertEquals(entry0.hashCode(), entry1.hashCode());
+  }
+
+  @Test
+  public void equalsForEqualNullEntries() {
+    ShuffleEntry entry0 = new ShuffleEntry(null, null, null);
+    ShuffleEntry entry1 = new ShuffleEntry(null, null, null);
+
+    assertTrue(entry0.equals(entry1));
+    assertTrue(entry1.equals(entry0));
+    assertEquals(entry0.hashCode(), entry1.hashCode());
+  }
+
+  @Test
+  public void notEqualsWhenKeysDiffer() {
+    final byte[] otherKey = {0x1};
+    ShuffleEntry entry0 = new ShuffleEntry(KEY, SKEY, VALUE);
+    ShuffleEntry entry1 = new ShuffleEntry(otherKey, SKEY, VALUE);
+
+    assertFalse(entry0.equals(entry1));
+    assertFalse(entry1.equals(entry0));
+    assertThat(entry0.hashCode(), not(equalTo(entry1.hashCode())));
+  }
+
+  @Test
+  public void notEqualsWhenKeysDifferOneNull() {
+    ShuffleEntry entry0 = new ShuffleEntry(KEY, SKEY, VALUE);
+    ShuffleEntry entry1 = new ShuffleEntry(null, SKEY, VALUE);
+
+    assertFalse(entry0.equals(entry1));
+    assertFalse(entry1.equals(entry0));
+    assertThat(entry0.hashCode(), not(equalTo(entry1.hashCode())));
+  }
+
+  @Test
+  public void notEqualsWhenSecondaryKeysDiffer() {
+    final byte[] otherSKey = {0x2};
+    ShuffleEntry entry0 = new ShuffleEntry(KEY, SKEY, VALUE);
+    ShuffleEntry entry1 = new ShuffleEntry(KEY, otherSKey, VALUE);
+
+    assertFalse(entry0.equals(entry1));
+    assertFalse(entry1.equals(entry0));
+    assertThat(entry0.hashCode(), not(equalTo(entry1.hashCode())));
+  }
+
+  @Test
+  public void notEqualsWhenSecondaryKeysDifferOneNull() {
+    ShuffleEntry entry0 = new ShuffleEntry(KEY, SKEY, VALUE);
+    ShuffleEntry entry1 = new ShuffleEntry(KEY, null, VALUE);
+
+    assertFalse(entry0.equals(entry1));
+    assertFalse(entry1.equals(entry0));
+    assertThat(entry0.hashCode(), not(equalTo(entry1.hashCode())));
+  }
+
+  @Test
+  public void notEqualsWhenValuesDiffer() {
+    final byte[] otherValue = {0x2};
+    ShuffleEntry entry0 = new ShuffleEntry(KEY, SKEY, VALUE);
+    ShuffleEntry entry1 = new ShuffleEntry(KEY, SKEY, otherValue);
+
+    assertFalse(entry0.equals(entry1));
+    assertFalse(entry1.equals(entry0));
+    assertThat(entry0.hashCode(), not(equalTo(entry1.hashCode())));
+  }
+
+  @Test
+  public void notEqualsWhenValuesDifferOneNull() {
+    ShuffleEntry entry0 = new ShuffleEntry(KEY, SKEY, VALUE);
+    ShuffleEntry entry1 = new ShuffleEntry(KEY, SKEY, null);
+
+    assertFalse(entry0.equals(entry1));
+    assertFalse(entry1.equals(entry0));
+    assertThat(entry0.hashCode(), not(equalTo(entry1.hashCode())));
+  }
+
+  @Test
+  public void emptyNotTheSameAsNull() {
+    final byte[] empty = {};
+    ShuffleEntry entry0 = new ShuffleEntry(null, null, null);
+    ShuffleEntry entry1 = new ShuffleEntry(empty, empty, empty);
+
+    assertFalse(entry0.equals(entry1));
+    assertFalse(entry1.equals(entry0));
+    assertThat(entry0.hashCode(), not(equalTo(entry1.hashCode())));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
new file mode 100644
index 0000000000000..d350db1798bfe
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
@@ -0,0 +1,139 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.AbstractMap.SimpleEntry;
+import java.util.Map;
+
+/**
+ * Unit tests for the {@link Counter} API.
+ */
+@RunWith(JUnit4.class)
+public class StateSamplerTest {
+
+  @Test
+  public void basicTest() throws InterruptedException {
+    CounterSet counters = new CounterSet();
+    long periodMs = 50;
+    StateSampler stateSampler = new StateSampler("test-",
+        counters.getAddCounterMutator(), periodMs);
+
+    int state1 = stateSampler.stateForName("1");
+    int state2 = stateSampler.stateForName("2");
+
+    assertEquals(new SimpleEntry<>("", 0L),
+        stateSampler.getCurrentStateAndDuration());
+
+    try (StateSampler.ScopedState s1 =
+      stateSampler.scopedState(state1)) {
+      Thread.sleep(2 * periodMs);
+    }
+
+    try (StateSampler.ScopedState s2 =
+      stateSampler.scopedState(state2)) {
+      Thread.sleep(3 * periodMs);
+    }
+
+    long s1 = stateSampler.getStateDuration(state1);
+    long s2 = stateSampler.getStateDuration(state2);
+
+    System.out.println("basic s1: " + s1);
+    System.out.println("basic s2: " + s2);
+
+    long toleranceMs = periodMs;
+    assertTrue(s1 + s2 >= 4 * periodMs - toleranceMs);
+    assertTrue(s1 + s2 <= 10 * periodMs + toleranceMs);
+  }
+
+  @Test
+  public void nestingTest() throws InterruptedException {
+    CounterSet counters = new CounterSet();
+    long periodMs = 50;
+    StateSampler stateSampler = new StateSampler("test-",
+        counters.getAddCounterMutator(), periodMs);
+
+    int state1 = stateSampler.stateForName("1");
+    int state2 = stateSampler.stateForName("2");
+    int state3 = stateSampler.stateForName("3");
+
+    assertEquals(new SimpleEntry<>("", 0L),
+        stateSampler.getCurrentStateAndDuration());
+
+    try (StateSampler.ScopedState s1 =
+        stateSampler.scopedState(state1)) {
+      Thread.sleep(2 * periodMs);
+
+      try (StateSampler.ScopedState s2 =
+          stateSampler.scopedState(state2)) {
+        Thread.sleep(2 * periodMs);
+
+        try (StateSampler.ScopedState s3 =
+            stateSampler.scopedState(state3)) {
+          Thread.sleep(2 * periodMs);
+        }
+        Thread.sleep(periodMs);
+      }
+      Thread.sleep(periodMs);
+    }
+
+    long s1 = stateSampler.getStateDuration(state1);
+    long s2 = stateSampler.getStateDuration(state2);
+    long s3 = stateSampler.getStateDuration(state3);
+
+    System.out.println("s1: " + s1);
+    System.out.println("s2: " + s2);
+    System.out.println("s3: " + s3);
+
+    long toleranceMs = periodMs;
+    assertTrue(s1 + s2 + s3 >= 4 * periodMs - toleranceMs);
+    assertTrue(s1 + s2 + s3 <= 16 * periodMs + toleranceMs);
+  }
+
+  @Test
+  public void nonScopedTest() throws InterruptedException {
+    CounterSet counters = new CounterSet();
+    long periodMs = 50;
+    StateSampler stateSampler = new StateSampler("test-",
+        counters.getAddCounterMutator(), periodMs);
+
+    int state1 = stateSampler.stateForName("1");
+    int previousState = stateSampler.setState(state1);
+    Thread.sleep(2 * periodMs);
+    Map.Entry<String, Long> currentStateAndDuration =
+        stateSampler.getCurrentStateAndDuration();
+    stateSampler.setState(previousState);
+    assertEquals("test-1-msecs", currentStateAndDuration.getKey());
+    long tolerance = periodMs;
+    long s = currentStateAndDuration.getValue();
+    System.out.println("s: " + s);
+    assertTrue(s >= periodMs - tolerance);
+    assertTrue(s <= 4 * periodMs + tolerance);
+
+    assertTrue(stateSampler.getCurrentStateAndDuration()
+        .getKey().isEmpty());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutorTest.java
new file mode 100644
index 0000000000000..ecce00d68b76c
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutorTest.java
@@ -0,0 +1,58 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
+
+import com.google.cloud.dataflow.sdk.util.common.Metric;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Collection;
+
+/**
+ * Unit tests for {@link WorkExecutor}.
+ */
+@RunWith(JUnit4.class)
+public class WorkExecutorTest {
+  private WorkExecutor mapWorker;
+  private WorkExecutor seqMapWorker;
+
+  @Before
+  public void setUp() {
+    mapWorker = new MapTaskExecutor(null, null, null);
+  }
+
+  @Test
+  public void testMapTaskGetOutputMetrics() {
+    Collection<Metric<?>> metrics = mapWorker.getOutputMetrics();
+    verifyOutputMetrics(metrics);
+  }
+
+  private void verifyOutputMetrics(Collection<Metric<?>> metrics) {
+    Collection<String> metricNames = new ArrayList<>();
+    for (Metric<?> metric : metrics) {
+      metricNames.add(metric.getName());
+    }
+    Assert.assertThat(metricNames, containsInAnyOrder("CPU"));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperationTest.java
new file mode 100644
index 0000000000000..6b51bc603531f
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperationTest.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for WriteOperation.
+ */
+@RunWith(JUnit4.class)
+public class WriteOperationTest {
+  @Test
+  public void testRunWriteOperation() throws Exception {
+    ExecutorTestUtils.TestSink sink = new ExecutorTestUtils.TestSink();
+    CounterSet counterSet = new CounterSet();
+    String counterPrefix = "test-";
+    StateSampler stateSampler = new StateSampler(
+        counterPrefix, counterSet.getAddCounterMutator());
+
+    WriteOperation writeOperation = new WriteOperation(
+        sink, counterPrefix, counterSet.getAddCounterMutator(), stateSampler);
+
+    writeOperation.start();
+
+    writeOperation.process("hi");
+    writeOperation.process("there");
+    writeOperation.process("");
+    writeOperation.process("bob");
+
+    writeOperation.finish();
+
+    Assert.assertThat(sink.outputElems,
+                      CoreMatchers.hasItems("hi", "there", "", "bob"));
+
+    Assert.assertEquals(
+        new CounterSet(
+            Counter.longs("WriteOperation-ByteCount", SUM)
+                .resetToValue(2L + 5 + 0 + 3),
+            Counter.longs("test-WriteOperation-start-msecs", SUM)
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                    "test-WriteOperation-start-msecs")).getAggregate(false)),
+            Counter.longs("test-WriteOperation-process-msecs", SUM)
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                    "test-WriteOperation-process-msecs")).getAggregate(false)),
+            Counter.longs("test-WriteOperation-finish-msecs", SUM)
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                    "test-WriteOperation-finish-msecs")).getAggregate(false))),
+        counterSet);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPathTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPathTest.java
new file mode 100644
index 0000000000000..9904bd5a2428e
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPathTest.java
@@ -0,0 +1,334 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.gcsfs;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import org.hamcrest.Matchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.net.URI;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * Tests of GcsPath.
+ */
+@RunWith(JUnit4.class)
+public class GcsPathTest {
+
+  /**
+   * Test case, which tests parsing and building of GcsPaths.
+   */
+  static final class TestCase {
+
+    final String uri;
+    final String expectedBucket;
+    final String expectedObject;
+    final String[] namedComponents;
+
+    TestCase(String uri, String... namedComponents) {
+      this.uri = uri;
+      this.expectedBucket = namedComponents[0];
+      this.namedComponents = namedComponents;
+      this.expectedObject = uri.substring(expectedBucket.length() + 6);
+    }
+  }
+
+  // Each test case is an expected URL, then the components used to build it.
+  // Empty components result in a double slash.
+  static final List<TestCase> PATH_TEST_CASES = Arrays.asList(
+      new TestCase("gs://bucket/then/object", "bucket", "then", "object"),
+      new TestCase("gs://bucket//then/object", "bucket", "", "then", "object"),
+      new TestCase("gs://bucket/then//object", "bucket", "then", "", "object"),
+      new TestCase("gs://bucket/then///object", "bucket", "then", "", "", "object"),
+      new TestCase("gs://bucket/then/object/", "bucket", "then", "object/"),
+      new TestCase("gs://bucket/then/object/", "bucket", "then/", "object/"),
+      new TestCase("gs://bucket/then/object//", "bucket", "then", "object", ""),
+      new TestCase("gs://bucket/then/object//", "bucket", "then", "object/", ""),
+      new TestCase("gs://bucket/", "bucket")
+  );
+
+  @Test
+  public void testGcsPathParsing() throws IOException {
+    for (TestCase testCase : PATH_TEST_CASES) {
+      String uriString = testCase.uri;
+
+      GcsPath path = GcsPath.fromUri(URI.create(uriString));
+      // Deconstruction - check bucket, object, and components.
+      assertEquals(testCase.expectedBucket, path.getBucket());
+      assertEquals(testCase.expectedObject, path.getObject());
+      assertEquals(testCase.uri,
+          testCase.namedComponents.length, path.getNameCount());
+
+      // Construction - check that the path can be built from components.
+      GcsPath built = GcsPath.fromComponents(null, null);
+      for (String component : testCase.namedComponents) {
+        built = built.resolve(component);
+      }
+      assertEquals(testCase.uri, built.toString());
+    }
+  }
+
+  @Test
+  public void testParentRelationship() throws IOException {
+    GcsPath path = GcsPath.fromComponents("bucket", "then/object");
+    assertEquals("bucket", path.getBucket());
+    assertEquals("then/object", path.getObject());
+    assertEquals(3, path.getNameCount());
+    assertTrue(path.endsWith("object"));
+    assertTrue(path.startsWith("bucket/then"));
+
+    GcsPath parent = path.getParent();  // gs://bucket/then/
+    assertEquals("bucket", parent.getBucket());
+    assertEquals("then/", parent.getObject());
+    assertEquals(2, parent.getNameCount());
+    assertThat(path, Matchers.not(Matchers.equalTo(parent)));
+    assertTrue(path.startsWith(parent));
+    assertFalse(parent.startsWith(path));
+    assertTrue(parent.endsWith("then/"));
+    assertTrue(parent.startsWith("bucket/then"));
+    assertTrue(parent.isAbsolute());
+
+    GcsPath root = path.getRoot();
+    assertEquals(0, root.getNameCount());
+    assertEquals("gs://", root.toString());
+    assertEquals("", root.getBucket());
+    assertEquals("", root.getObject());
+    assertTrue(root.isAbsolute());
+    assertThat(root, Matchers.equalTo(parent.getRoot()));
+
+    GcsPath grandParent = parent.getParent();  // gs://bucket/
+    assertEquals(1, grandParent.getNameCount());
+    assertEquals("gs://bucket/", grandParent.toString());
+    assertTrue(grandParent.isAbsolute());
+    assertThat(root, Matchers.equalTo(grandParent.getParent()));
+    assertThat(root.getParent(), Matchers.nullValue());
+
+    assertTrue(path.startsWith(path.getRoot()));
+    assertTrue(parent.startsWith(path.getRoot()));
+  }
+
+  @Test
+  public void testRelativeParent() throws IOException {
+    GcsPath path = GcsPath.fromComponents(null, "a/b");
+    GcsPath parent = path.getParent();
+    assertEquals("a/", parent.toString());
+
+    GcsPath grandParent = parent.getParent();
+    assertNull(grandParent);
+  }
+
+  @Test
+  public void testUriSupport() throws IOException {
+    URI uri = URI.create("gs://bucket/some/path");
+
+    GcsPath path = GcsPath.fromUri(uri);
+    assertEquals("bucket", path.getBucket());
+    assertEquals("some/path", path.getObject());
+
+    URI reconstructed = path.toUri();
+    assertEquals(uri, reconstructed);
+
+    path = GcsPath.fromUri("gs://bucket");
+    assertEquals("gs://bucket/", path.toString());
+  }
+
+  @Test
+  public void testBucketParsing() throws IOException {
+    GcsPath path = GcsPath.fromUri("gs://bucket");
+    GcsPath path2 = GcsPath.fromUri("gs://bucket/");
+
+    assertEquals(path, path2);
+    assertEquals(path.toString(), path2.toString());
+    assertEquals(path.toUri(), path2.toUri());
+  }
+
+  @Test
+  public void testGcsPathToString() throws Exception {
+    String filename = "gs://some_bucket/some/file.txt";
+    GcsPath path = GcsPath.fromUri(filename);
+    assertEquals(filename, path.toString());
+  }
+
+  @Test
+  public void testEquals() {
+    GcsPath a = GcsPath.fromComponents(null, "a/b/c");
+    GcsPath a2 = GcsPath.fromComponents(null, "a/b/c");
+    assertFalse(a.isAbsolute());
+    assertFalse(a2.isAbsolute());
+
+    GcsPath b = GcsPath.fromComponents("bucket", "a/b/c");
+    GcsPath b2 = GcsPath.fromComponents("bucket", "a/b/c");
+    assertTrue(b.isAbsolute());
+    assertTrue(b2.isAbsolute());
+
+    assertEquals(a, a);
+    assertThat(a, Matchers.not(Matchers.equalTo(b)));
+    assertThat(b, Matchers.not(Matchers.equalTo(a)));
+
+    assertEquals(a, a2);
+    assertEquals(a2, a);
+    assertEquals(b, b2);
+    assertEquals(b2, b);
+
+    assertThat(a, Matchers.not(Matchers.equalTo(Paths.get("/tmp/foo"))));
+    assertTrue(a != null);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testInvalidGcsPath() {
+    @SuppressWarnings("unused")
+    GcsPath filename =
+        GcsPath.fromUri("file://invalid/gcs/path");
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testInvalidBucket() {
+    GcsPath.fromComponents("invalid/", "");
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testInvalidObject_newline() {
+    GcsPath.fromComponents(null, "a\nb");
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testInvalidObject_cr() {
+    GcsPath.fromComponents(null, "a\rb");
+  }
+
+  @Test
+  public void testResolveUri() {
+    GcsPath path = GcsPath.fromComponents("bucket", "a/b/c");
+    GcsPath d = path.resolve("gs://bucket2/d");
+    assertEquals("gs://bucket2/d", d.toString());
+  }
+
+  @Test
+  public void testResolveOther() {
+    GcsPath a = GcsPath.fromComponents("bucket", "a");
+    GcsPath b = a.resolve(Paths.get("b"));
+    assertEquals("a/b", b.getObject());
+  }
+
+  @Test
+  public void testCompareTo() {
+    GcsPath a = GcsPath.fromComponents("bucket", "a");
+    GcsPath b = GcsPath.fromComponents("bucket", "b");
+    GcsPath b2 = GcsPath.fromComponents("bucket2", "b");
+    GcsPath brel = GcsPath.fromComponents(null, "b");
+    GcsPath a2 = GcsPath.fromComponents("bucket", "a");
+    GcsPath arel = GcsPath.fromComponents(null, "a");
+
+    assertThat(a.compareTo(b), Matchers.lessThan(0));
+    assertThat(b.compareTo(a), Matchers.greaterThan(0));
+    assertThat(a.compareTo(a2), Matchers.equalTo(0));
+
+    assertThat(a.hashCode(), Matchers.equalTo(a2.hashCode()));
+    assertThat(a.hashCode(), Matchers.not(Matchers.equalTo(b.hashCode())));
+    assertThat(b.hashCode(), Matchers.not(Matchers.equalTo(brel.hashCode())));
+
+    assertThat(brel.compareTo(b), Matchers.lessThan(0));
+    assertThat(b.compareTo(brel), Matchers.greaterThan(0));
+    assertThat(arel.compareTo(brel), Matchers.lessThan(0));
+    assertThat(brel.compareTo(arel), Matchers.greaterThan(0));
+
+    assertThat(b.compareTo(b2), Matchers.lessThan(0));
+    assertThat(b2.compareTo(b), Matchers.greaterThan(0));
+  }
+
+  @Test
+  public void testCompareTo_ordering() {
+    GcsPath ab = GcsPath.fromComponents("bucket", "a/b");
+    GcsPath abc = GcsPath.fromComponents("bucket", "a/b/c");
+    GcsPath a1b = GcsPath.fromComponents("bucket", "a-1/b");
+
+    assertThat(ab.compareTo(a1b), Matchers.lessThan(0));
+    assertThat(a1b.compareTo(ab), Matchers.greaterThan(0));
+
+    assertThat(ab.compareTo(abc), Matchers.lessThan(0));
+    assertThat(abc.compareTo(ab), Matchers.greaterThan(0));
+  }
+
+  @Test
+  public void testCompareTo_buckets() {
+    GcsPath a = GcsPath.fromComponents(null, "a/b/c");
+    GcsPath b = GcsPath.fromComponents("bucket", "a/b/c");
+
+    assertThat(a.compareTo(b), Matchers.lessThan(0));
+    assertThat(b.compareTo(a), Matchers.greaterThan(0));
+  }
+
+  @Test
+  public void testIterator() {
+    GcsPath a = GcsPath.fromComponents("bucket", "a/b/c");
+    Iterator<Path> it = a.iterator();
+
+    assertTrue(it.hasNext());
+    assertEquals("gs://bucket/", it.next().toString());
+    assertTrue(it.hasNext());
+    assertEquals("a", it.next().toString());
+    assertTrue(it.hasNext());
+    assertEquals("b", it.next().toString());
+    assertTrue(it.hasNext());
+    assertEquals("c", it.next().toString());
+    assertFalse(it.hasNext());
+  }
+
+  @Test
+  public void testSubpath() {
+    GcsPath a = GcsPath.fromComponents("bucket", "a/b/c/d");
+    assertThat(a.subpath(0, 1).toString(), Matchers.equalTo("gs://bucket/"));
+    assertThat(a.subpath(0, 2).toString(), Matchers.equalTo("gs://bucket/a"));
+    assertThat(a.subpath(0, 3).toString(), Matchers.equalTo("gs://bucket/a/b"));
+    assertThat(a.subpath(0, 4).toString(), Matchers.equalTo("gs://bucket/a/b/c"));
+    assertThat(a.subpath(1, 2).toString(), Matchers.equalTo("a"));
+    assertThat(a.subpath(2, 3).toString(), Matchers.equalTo("b"));
+    assertThat(a.subpath(2, 4).toString(), Matchers.equalTo("b/c"));
+    assertThat(a.subpath(2, 5).toString(), Matchers.equalTo("b/c/d"));
+  }
+
+  @Test
+  public void testGetName() {
+    GcsPath a = GcsPath.fromComponents("bucket", "a/b/c/d");
+    assertEquals(5, a.getNameCount());
+    assertThat(a.getName(0).toString(), Matchers.equalTo("gs://bucket/"));
+    assertThat(a.getName(1).toString(), Matchers.equalTo("a"));
+    assertThat(a.getName(2).toString(), Matchers.equalTo("b"));
+    assertThat(a.getName(3).toString(), Matchers.equalTo("c"));
+    assertThat(a.getName(4).toString(), Matchers.equalTo("d"));
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testSubPathError() {
+    GcsPath a = GcsPath.fromComponents("bucket", "a/b/c/d");
+    a.subpath(1, 1); // throws IllegalArgumentException
+    Assert.fail();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListenerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListenerTest.java
new file mode 100644
index 0000000000000..96e5bf6b49883
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListenerTest.java
@@ -0,0 +1,83 @@
+/**
+ * Copyright 2013 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.gcsio;
+
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.verifyZeroInteractions;
+
+import com.google.api.client.googleapis.media.MediaHttpUploader.UploadState;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+import org.slf4j.Logger;
+
+/** Unit tests for {@link LoggingMediaHttpUploaderProgressListener}. */
+@RunWith(JUnit4.class)
+public class LoggingMediaHttpUploaderProgressListenerTest {
+  @Mock
+  private Logger mockLogger;
+  private LoggingMediaHttpUploaderProgressListener listener;
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+    listener = new LoggingMediaHttpUploaderProgressListener("NAME", 60000L);
+  }
+
+  @Test
+  public void testLoggingInitiation() {
+    listener.progressChanged(mockLogger, UploadState.INITIATION_STARTED, 0L, 0L);
+    verify(mockLogger).info("Uploading: {}", "NAME");
+    verifyNoMoreInteractions(mockLogger);
+  }
+
+  @Test
+  public void testLoggingProgressAfterSixtySeconds() {
+    listener.progressChanged(mockLogger, UploadState.MEDIA_IN_PROGRESS, 10485760L, 60001L);
+    listener.progressChanged(mockLogger, UploadState.MEDIA_IN_PROGRESS, 104857600L, 120002L);
+    verify(mockLogger).info(
+        "Uploading: NAME Average Rate: 0.167 MiB/s, Current Rate: 0.167 MiB/s, Total: 10.000 MiB");
+    verify(mockLogger).info(
+        "Uploading: NAME Average Rate: 0.833 MiB/s, Current Rate: 1.500 MiB/s, Total: 100.000 MiB");
+    verifyNoMoreInteractions(mockLogger);
+  }
+
+  @Test
+  public void testSkippingLoggingAnInProgressUpdate() {
+    listener.progressChanged(mockLogger, UploadState.MEDIA_IN_PROGRESS, 104857600L, 60000L);
+    verifyZeroInteractions(mockLogger);
+  }
+
+  @Test
+  public void testLoggingCompletion() {
+    listener.progressChanged(mockLogger, UploadState.MEDIA_COMPLETE, 104857600L, 60000L);
+    verify(mockLogger).info("Finished Uploading: {}", "NAME");
+    verifyNoMoreInteractions(mockLogger);
+  }
+
+  @Test
+  public void testOtherUpdatesIgnored() {
+    listener.progressChanged(mockLogger, UploadState.NOT_STARTED, 0L, 60001L);
+    listener.progressChanged(mockLogger, UploadState.INITIATION_COMPLETE, 0L, 60001L);
+    verifyZeroInteractions(mockLogger);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/KVTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/KVTest.java
new file mode 100644
index 0000000000000..dae544fb033af
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/KVTest.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Comparator;
+
+/**
+ * Tests for KV.
+ */
+@RunWith(JUnit4.class)
+public class KVTest {
+  static final Integer testValues[] =
+      {null, Integer.MIN_VALUE, -1, 0, 1, Integer.MAX_VALUE};
+
+  // Wrapper around Integer.compareTo() to support null values.
+  private int compareInt(Integer a, Integer b) {
+    if (a == null) {
+      return b == null ? 0 : -1;
+    } else {
+      return b == null ? 1 : a.compareTo(b);
+    }
+  }
+
+  @Test
+  public void testOrderByKey() {
+    Comparator<KV<Integer, Integer>> orderByKey = new KV.OrderByKey<>();
+    for (Integer key1 : testValues) {
+      for (Integer val1 : testValues) {
+        for (Integer key2 : testValues) {
+          for (Integer val2 : testValues) {
+            assertEquals(compareInt(key1, key2),
+                orderByKey.compare(KV.of(key1, val1), KV.of(key2, val2)));
+          }
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testOrderByValue() {
+    Comparator<KV<Integer, Integer>> orderByValue = new KV.OrderByValue<>();
+    for (Integer key1 : testValues) {
+      for (Integer val1 : testValues) {
+        for (Integer key2 : testValues) {
+          for (Integer val2 : testValues) {
+            assertEquals(compareInt(val1, val2),
+                orderByValue.compare(KV.of(key1, val1), KV.of(key2, val2)));
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionListTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionListTest.java
new file mode 100644
index 0000000000000..a6c180fc9abe3
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionListTest.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import static org.hamcrest.CoreMatchers.containsString;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Collections;
+
+/**
+ * Tests for PCollectionLists.
+ */
+@RunWith(JUnit4.class)
+public class PCollectionListTest {
+  @Test
+  public void testEmptyListFailure() {
+    try {
+      PCollectionList.of(Collections.<PCollection<String>>emptyList());
+      fail("should have failed");
+    } catch (IllegalArgumentException exn) {
+      assertThat(
+          exn.toString(),
+          containsString(
+              "must either have a non-empty list of PCollections, " +
+              "or must first call empty(Pipeline)"));
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java
new file mode 100644
index 0000000000000..e886f350c12d7
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import static com.google.cloud.dataflow.sdk.TestUtils.LINES;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+
+/**
+ * Tests for PDone.
+ */
+@RunWith(JUnit4.class)
+public class PDoneTest {
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  /**
+   * A PTransform that just returns a fresh PDone.
+   */
+  static class EmptyTransform extends PTransform<PBegin, PDone> {
+    @Override
+    public PDone apply(PBegin begin) {
+      return new PDone();
+    }
+  }
+
+  /**
+   * A PTransform that's composed of something that returns a PDone.
+   */
+  static class SimpleTransform extends PTransform<PBegin, PDone> {
+    private final String filename;
+
+    public SimpleTransform(String filename) {
+      this.filename = filename;
+    }
+
+    @Override
+    public PDone apply(PBegin begin) {
+      return
+          begin
+          .apply(Create.of(LINES))
+          .apply(TextIO.Write.to(filename));
+    }
+  }
+
+  // TODO: This test doesn't work, because we can't handle composite
+  // transforms that contain no nested transforms.
+  // @Test
+  // @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void DISABLED_testEmptyTransform() {
+    Pipeline p = TestPipeline.create();
+
+    p.begin().apply(new EmptyTransform());
+
+    p.run();
+  }
+
+  // Cannot run on the service, unless we allocate a GCS temp file
+  // instead of a local temp file.  Or switch to applying a different
+  // transform that returns PDone.
+  @Test
+  public void testSimpleTransform() throws Exception {
+    File tmpFile = tmpFolder.newFile("file.txt");
+    String filename = tmpFile.getPath();
+
+    Pipeline p = TestPipeline.create();
+
+    p.begin().apply(new SimpleTransform(filename));
+
+    p.run();
+  }
+}

From 13a46e000dbdd814f60e1558804173e586405066 Mon Sep 17 00:00:00 2001
From: vanya <vanya@google.com>
Date: Sat, 13 Dec 2014 14:36:27 -0800
Subject: [PATCH 0003/1541] Add TextIO.Read.{,.Bound}.withoutValidation()
 method and plumb the validate_source value.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82063199
---
 .../google/cloud/dataflow/sdk/io/TextIO.java  | 43 ++++++++++++++++---
 .../runners/DataflowPipelineTranslator.java   | 12 ++++++
 .../runners/dataflow/TextIOTranslator.java    |  1 +
 .../dataflow/sdk/util/PropertyNames.java      |  1 +
 .../cloud/dataflow/sdk/io/TextIOTest.java     |  9 ++++
 5 files changed, 61 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 5d1cb205b4224..f6ff0e29fd8fe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -137,6 +137,18 @@ public static <T> Bound<T> withCoder(Coder<T> coder) {
       return new Bound<>(coder);
     }
 
+    /**
+     * Returns a TextIO.Read PTransform that has GCS path validation on
+     * pipeline creation disabled.
+     *
+     * <p> This can be useful in the case where the GCS input does not
+     * exist at the pipeline creation time, but is expected to be
+     * available at execution time.
+     */
+    public static Bound<String> withoutValidation() {
+      return new Bound<>(DEFAULT_TEXT_CODER).withoutValidation();
+    }
+
     // TODO: strippingNewlines, gzipped, etc.
 
     /**
@@ -156,14 +168,18 @@ public static class Bound<T>
       /** The Coder to use to decode each line. */
       @Nullable final Coder<T> coder;
 
+      /** An option to indicate if input validation is desired. Default is true. */
+      final boolean validate;
+
       Bound(Coder<T> coder) {
-        this(null, null, coder);
+        this(null, null, coder, true);
       }
 
-      Bound(String name, String filepattern, Coder<T> coder) {
+      Bound(String name, String filepattern, Coder<T> coder, boolean validate) {
         super(name);
         this.coder = coder;
         this.filepattern = filepattern;
+        this.validate = validate;
       }
 
       /**
@@ -171,7 +187,7 @@ public static class Bound<T>
        * with the given step name.  Does not modify this object.
        */
       public Bound<T> named(String name) {
-        return new Bound<>(name, filepattern, coder);
+        return new Bound<>(name, filepattern, coder, validate);
       }
 
       /**
@@ -181,7 +197,7 @@ public Bound<T> named(String name) {
        * filepatterns.)  Does not modify this object.
        */
       public Bound<T> from(String filepattern) {
-        return new Bound<>(name, filepattern, coder);
+        return new Bound<>(name, filepattern, coder, validate);
       }
 
       /**
@@ -194,7 +210,20 @@ public Bound<T> from(String filepattern) {
        * elements of the resulting PCollection
        */
       public <T1> Bound<T1> withCoder(Coder<T1> coder) {
-        return new Bound<>(name, filepattern, coder);
+        return new Bound<>(name, filepattern, coder, validate);
+      }
+
+      /**
+       * Returns a new TextIO.Read PTransform that's like this one but
+       * that has GCS path validation on pipeline creation disabled.
+       * Does not modify this object.
+       *
+       * <p> This can be useful in the case where the GCS input does not
+       * exist at the pipeline creation time, but is expected to be
+       * available at execution time.
+       */
+      public Bound<T> withoutValidation() {
+        return new Bound<>(name, filepattern, coder, false);
       }
 
       @Override
@@ -222,6 +251,10 @@ public String getFilepattern() {
         return filepattern;
       }
 
+      public boolean needsValidation() {
+        return validate;
+      }
+
       static {
         DirectPipelineRunner.registerDefaultTransformEvaluator(
             Bound.class,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 6f39a2bae5b8a..5f0b5a1985f04 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -20,6 +20,7 @@
 import static com.google.cloud.dataflow.sdk.util.SerializableUtils.serializeToByteArray;
 import static com.google.cloud.dataflow.sdk.util.StringUtils.byteArrayToJsonString;
 import static com.google.cloud.dataflow.sdk.util.StringUtils.jsonStringToByteArray;
+import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
 import static com.google.cloud.dataflow.sdk.util.Structs.addDictionary;
 import static com.google.cloud.dataflow.sdk.util.Structs.addList;
 import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
@@ -212,6 +213,12 @@ public interface TranslationContext {
      */
     public void addEncodingInput(Coder<?> value);
 
+    /**
+     * Adds an input with the given name and value to the current
+     * Dataflow step.
+     */
+    public void addInput(String name, Boolean value);
+
     /**
      * Adds an input with the given name and value to the current
      * Dataflow step.
@@ -535,6 +542,11 @@ public void addEncodingInput(Coder<?> coder) {
       addObject(getProperties(), PropertyNames.ENCODING, encoding);
     }
 
+    @Override
+    public void addInput(String name, Boolean value) {
+      addBoolean(getProperties(), name, value);
+    }
+
     @Override
     public void addInput(String name, String value) {
       addString(getProperties(), name, value);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
index 05a44648eba95..0a2f1f8a32571 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
@@ -66,6 +66,7 @@ private <T> void translateReadHelper(
       context.addInput(PropertyNames.FORMAT, "text");
       context.addInput(PropertyNames.FILEPATTERN, gcsPath);
       context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
+      context.addInput(PropertyNames.VALIDATE_SOURCE, transform.needsValidation());
 
       // TODO: Orderedness?
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
index 85a81cdeff9c9..c10f07fd98ed2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
@@ -83,5 +83,6 @@ public class PropertyNames {
   public static final String USER_FN = "user_fn";
   public static final String USER_NAME = "user_name";
   public static final String USES_KEYED_STATE = "uses_keyed_state";
+  public static final String VALIDATE_SOURCE = "validate_source";
   public static final String VALUE = "value";
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
index b6aaf59b51ad8..31bb18cd955f5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -23,7 +23,9 @@
 import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES_ARRAY;
 import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -410,4 +412,11 @@ public void testBadWildcardBrackets() throws Exception {
     thrown.expectMessage("wildcard");
     pipeline.run();
   }
+
+  @Test
+  public void testWithoutValidationFlag() throws Exception {
+    TextIO.Read.Bound<String> read = TextIO.Read.from("gs://bucket/foo*/baz");
+    assertTrue(read.needsValidation());
+    assertFalse(read.withoutValidation().needsValidation());
+  }
 }

From c266ce35a39f93d19ee4ffe837aebb326b7bde67 Mon Sep 17 00:00:00 2001
From: earhart <earhart@google.com>
Date: Sat, 13 Dec 2014 19:42:46 -0800
Subject: [PATCH 0004/1541] Fix some misc. warnings.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82071212
---
 .../cloud/dataflow/sdk/coders/AvroCoder.java  |  1 +
 .../dataflow/sdk/coders/CoderException.java   |  2 ++
 .../cloud/dataflow/sdk/coders/MapCoder.java   |  1 +
 .../google/cloud/dataflow/sdk/io/TextIO.java  |  4 ++++
 .../sdk/runners/worker/AvroByteSink.java      |  2 +-
 .../sdk/runners/worker/AvroByteSource.java    |  2 +-
 .../dataflow/sdk/util/WindowedValue.java      | 20 +++++++++++--------
 .../dataflow/sdk/util/common/Counter.java     |  4 +++-
 .../sdk/util/common/ForwardingReiterator.java |  4 +++-
 .../cloud/dataflow/sdk/values/TupleTag.java   |  8 ++++++--
 .../dataflow/sdk/values/TupleTagList.java     |  2 ++
 11 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
index 5ea631a970a79..9532da725ac97 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -81,6 +81,7 @@
  * @param <T> the type of elements handled by this coder
  */
 public class AvroCoder<T> extends StandardCoder<T> {
+  private static final long serialVersionUID = 0;
 
   /**
    * Returns an {@code AvroCoder} instance for the provided element type.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java
index 1bbc3fa176b7e..b19eb202dfed2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java
@@ -23,6 +23,8 @@
  * decoding a value.
  */
 public class CoderException extends IOException {
+  private static final long serialVersionUID = 0;
+
   public CoderException(String message) {
     super(message);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
index fa3fc58950150..244419da6ac0e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
@@ -41,6 +41,7 @@
  * @param <V> the type of the values of the KVs being transcoded
  */
 public class MapCoder<K, V> extends MapCoderBase<Map<K, V>> {
+  private static final long serialVersionUID = 0;
 
   /**
    * Produces a MapCoder with the given keyCoder and valueCoder.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index f6ff0e29fd8fe..c0e97ff7527b8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -162,6 +162,8 @@ public static Bound<String> withoutValidation() {
      */
     public static class Bound<T>
         extends PTransform<PInput, PCollection<T>> {
+      private static final long serialVersionUID = 0;
+
       /** The filepattern to read from. */
       @Nullable final String filepattern;
 
@@ -367,6 +369,8 @@ public static <T> Bound<T> withCoder(Coder<T> coder) {
      */
     public static class Bound<T>
         extends PTransform<PCollection<T>, PDone> {
+      private static final long serialVersionUID = 0;
+
       /** The filename to write to. */
       @Nullable final String filenamePrefix;
       /** Suffix to use for each filename. */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSink.java
index 404b2d261fc98..504aa69eecfcd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSink.java
@@ -48,7 +48,7 @@ public AvroByteSink(String filenamePrefix, Coder<T> coder) {
   public AvroByteSink(String filenamePrefix, String shardFormat, String filenameSuffix,
                       int shardCount, Coder<T> coder) {
     this.coder = coder;
-    avroSink = new AvroSink(
+    avroSink = new AvroSink<>(
         filenamePrefix, shardFormat, filenameSuffix, shardCount,
         WindowedValue.getValueOnlyCoder(AvroCoder.of(ByteBuffer.class, schema)));
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSource.java
index b71700a08fcab..5a8524eb88853 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSource.java
@@ -49,7 +49,7 @@ public AvroByteSource(String filename,
                         @Nullable Long endPosition,
                         Coder<T> coder) {
     this.coder = coder;
-    avroSource = new AvroSource(
+    avroSource = new AvroSource<>(
         filename, startPosition, endPosition,
         WindowedValue.getValueOnlyCoder(AvroCoder.of(ByteBuffer.class, schema)));
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index de310b8271149..636b77d8b1109 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -79,9 +79,9 @@ public static <V> WindowedValue<V> valueInGlobalWindow(V value) {
    * Returns a {@code WindowedValue} with the given value and default timestamp and empty windows.
    */
   public static <V> WindowedValue<V> valueInEmptyWindows(V value) {
-    return new WindowedValue<>(value,
-                               new Instant(Long.MIN_VALUE),
-                               new ArrayList());
+    return new WindowedValue(value,
+                             new Instant(Long.MIN_VALUE),
+                             new ArrayList<>());
   }
 
   private WindowedValue(V value,
@@ -140,11 +140,11 @@ public static <T> WindowedValueCoder<T> getValueOnlyCoder(Coder<T> valueCoder) {
   @Override
   public boolean equals(Object o) {
     if (o instanceof WindowedValue) {
-      WindowedValue that = (WindowedValue) o;
+      WindowedValue<?> that = (WindowedValue) o;
       if (that.timestamp.isEqual(timestamp) && that.windows.size() == windows.size()) {
-        for (Iterator thatIterator = that.windows.iterator(), thisIterator = windows.iterator();
+        for (Iterator<?> thatIterator = that.windows.iterator(), thisIterator = windows.iterator();
             thatIterator.hasNext() && thisIterator.hasNext();
-            /* do nothng */) {
+            /* do nothing */) {
           if (!thatIterator.next().equals(thisIterator.next())) {
             return false;
           }
@@ -197,6 +197,8 @@ public Coder<T> getValueCoder() {
    * Coder for {@code WindowedValue}.
    */
   public static class FullWindowedValueCoder<T> extends WindowedValueCoder<T> {
+    private static final long serialVersionUID = 0;
+
     private final Coder<? extends BoundedWindow> windowCoder;
     // Precompute and cache the coder for a list of windows.
     private final Coder<Collection<? extends BoundedWindow>> windowsCoder;
@@ -213,8 +215,9 @@ public static FullWindowedValueCoder<?> of(
         List<Coder<?>> components) {
       checkArgument(components.size() == 2,
                     "Expecting 2 components, got " + components.size());
-      return of(components.get(0),
-                (Coder<? extends BoundedWindow>) components.get(1));
+      @SuppressWarnings("unchecked")
+      Coder<? extends BoundedWindow> window = (Coder<? extends BoundedWindow>) components.get(1);
+      return of(components.get(0), window);
     }
 
     @SuppressWarnings("unchecked")
@@ -305,6 +308,7 @@ public List<? extends Coder<?>> getComponents() {
    * timestamp and windows for encoding, and uses defaults timestamp, and windows for decoding.
    */
   public static class ValueOnlyWindowedValueCoder<T> extends WindowedValueCoder<T> {
+    private static final long serialVersionUID = 0;
 
     public static <T> ValueOnlyWindowedValueCoder<T> of(
         Coder<T> valueCoder) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
index 8b5f636ac5da3..4516edd5dbdfe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
@@ -245,7 +245,9 @@ public AggregationKind getKind() {
    * Returns the counter's type.
    */
   public Class<?> getType() {
-    return new TypeToken<T>(getClass()) {}.getRawType();
+    return new TypeToken<T>(getClass()) {
+      private static final long serialVersionUID = 0;
+    }.getRawType();
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ForwardingReiterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ForwardingReiterator.java
index f3008232a1074..179b5abf4d42e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ForwardingReiterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ForwardingReiterator.java
@@ -41,7 +41,9 @@ public ForwardingReiterator(Reiterator<T> base) {
   protected ForwardingReiterator<T> clone() {
     ForwardingReiterator<T> result;
     try {
-      result = (ForwardingReiterator<T>) super.clone();
+      @SuppressWarnings("unchecked")
+      ForwardingReiterator<T> declResult = (ForwardingReiterator<T>) super.clone();
+      result = declResult;
     } catch (CloneNotSupportedException e) {
       throw new AssertionError(
           "Object.clone() for a ForwardingReiterator<T> threw "
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
index 58562163f4a85..4b47e80fdbabc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
@@ -51,6 +51,8 @@
  * e.g., a {@code PCollection<V>}.
  */
 public class TupleTag<V> implements Serializable {
+  private static final long serialVersionUID = 0;
+
   /**
    * Constructs a new {@code TupleTag}, with a fresh unique id.
    *
@@ -113,7 +115,9 @@ public String getOutName(int outIndex) {
    * e.g., {@code new TupleTag<SomeType>(){}}.
    */
   public TypeToken<V> getTypeToken() {
-    return new TypeToken<V>(getClass()) {};
+    return new TypeToken<V>(getClass()) {
+      private static final long serialVersionUID = 0;
+    };
   }
 
 
@@ -138,7 +142,7 @@ static String genId() {
   private static TupleTag<?> fromJson(
       @JsonProperty(PropertyNames.VALUE) String id,
       @JsonProperty(PropertyNames.IS_GENERATED) boolean generated) {
-    return new TupleTag(id, generated);
+    return new TupleTag<>(id, generated);
   }
 
   private TupleTag(String id, boolean generated) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java
index 27a0683bab5aa..515a388b22e2d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java
@@ -52,6 +52,8 @@
  * } </pre>
  */
 public class TupleTagList implements Serializable {
+  private static final long serialVersionUID = 0;
+
   /**
    * Returns an empty TupleTagList.
    *

From 146b8c8c5c714f1a1ad3957c4f2305d5ae726b44 Mon Sep 17 00:00:00 2001
From: fjp <fjp@google.com>
Date: Sat, 13 Dec 2014 20:13:42 -0800
Subject: [PATCH 0005/1541] Logging improvements - decrease level for scary
 looking but inert credential and upload messages - increase user counters to
 INFO - change aggregator example to really be empty lines - add brief logging
 to DirectPipelineRunner

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82071972
---
 .../google/cloud/dataflow/examples/WordCount.java | 12 ++++++------
 .../sdk/runners/DataflowPipelineRunner.java       |  4 ++--
 .../sdk/runners/DirectPipelineRunner.java         |  6 +++++-
 .../cloud/dataflow/sdk/util/Credentials.java      |  4 ++--
 .../cloud/dataflow/sdk/util/PackageUtil.java      | 15 +++++++++++++--
 .../sdk/util/RetryHttpRequestInitializer.java     |  8 ++++----
 .../LoggingMediaHttpUploaderProgressListener.java |  6 +++---
 ...gingMediaHttpUploaderProgressListenerTest.java |  8 ++++----
 8 files changed, 39 insertions(+), 24 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
index 96893b909bc7a..4295e7868d26e 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
@@ -66,14 +66,14 @@ public void startBundle(Context c) {
 
     @Override
     public void processElement(ProcessContext c) {
-      // Split the line into words.
-      String[] words = c.element().split("[^a-zA-Z']+");
-
-      // Keep track of the number of lines without any words encountered while tokenizing.
-      // This aggregator is visible in the monitoring UI when run using DataflowPipelineRunner.
-      if (words.length == 0) {
+      // Keep track of the number of empty lines. (When using the [Blocking]DataflowPipelineRunner, 
+      // Aggregators are shown in the monitoring UI.)
+      if (c.element().trim().isEmpty()) {
         emptyLines.addValue(1L);
       }
+      
+      // Split the line into words.
+      String[] words = c.element().split("[^a-zA-Z']+");
 
       // Output each word encountered into the output PCollection.
       for (String word : words) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index ed01b8345c186..9f770fcbb33cd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -124,8 +124,8 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
     if (dataflowOptions.getFilesToStage() == null) {
       dataflowOptions.setFilesToStage(detectClassPathResourcesToStage(
           DataflowPipelineRunner.class.getClassLoader()));
-      LOG.info("No specified files to stage. Defaulting to files: {}",
-          dataflowOptions.getFilesToStage());
+      LOG.info("PipelineOptions.filesToStage was not specified. "
+          + "Defaulting to files from the classpath: {}", dataflowOptions.getFilesToStage());
     }
 
     // Verify jobName according to service requirements.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index a19b2055a0b99..cc1fd8f38cdae 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -320,13 +320,17 @@ public <T> T ensureSerializableByCoder(
 
   @Override
   public EvaluationResults run(Pipeline pipeline) {
+    LOG.info("Executing pipeline using the DirectPipelineRunner.");
+    
     Evaluator evaluator = new Evaluator();
     evaluator.run(pipeline);
 
     // Log all counter values for debugging purposes.
     for (Counter counter : evaluator.getCounters()) {
-      LOG.debug("Final aggregator value: {}", counter);
+      LOG.info("Final aggregator value: {}", counter);
     }
+    
+    LOG.info("Pipeline execution complete.");
 
     return evaluator;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
index 2a24a76fde9f3..419339a83415e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
@@ -152,7 +152,7 @@ public static Credential getUserCredential(GcpOptions options)
     try {
       return GoogleCredential.getApplicationDefault().createScoped(USER_SCOPES);
     } catch (IOException e) {
-      LOG.info("Failed to get application default credentials, falling back to gcloud.");
+      LOG.debug("Failed to get application default credentials, falling back to gcloud.");
     }
 
     String gcloudPath = options.getGCloudPath();
@@ -196,7 +196,7 @@ private static Credential getCredentialFromGCloud(String gcloudPath)
       throw new RuntimeException("Could not obtain credential using gcloud", e);
     }
 
-    LOG.info("Got credential from GCloud");
+    LOG.info("Got user credential from GCloud");
     return credential;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
index c108ceb4f1571..c9e3185edd8c3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
@@ -108,6 +108,8 @@ static List<DataflowPackage> stageClasspathElementsToGcs(
       Collection<String> classpathElements,
       GcsPath gcsStaging,
       Sleeper retrySleeper) {
+    LOG.info("Uploading {} files from PipelineOptions.filesToStage to GCS to prepare for execution "
+        + "in the cloud.", classpathElements.size());
     ArrayList<DataflowPackage> packages = new ArrayList<>();
 
     if (gcsStaging == null) {
@@ -115,6 +117,8 @@ static List<DataflowPackage> stageClasspathElementsToGcs(
           "Can't stage classpath elements on GCS because no GCS location has been provided");
     }
 
+    int numUploaded = 0;
+    int numCached = 0;
     for (String classpathElement : classpathElements) {
       String packageName = null;
       if (classpathElement.contains("=")) {
@@ -134,7 +138,9 @@ static List<DataflowPackage> stageClasspathElementsToGcs(
       try {
         long remoteLength = gcsUtil.fileSize(target);
         if (remoteLength >= 0 && remoteLength == getClasspathElementLength(classpathElement)) {
-          LOG.info("Skipping classpath element already on gcs: {} at {}", classpathElement, target);
+          LOG.debug("Skipping classpath element already on gcs: {} at {}",
+              classpathElement, target);
+          numCached++;
           continue;
         }
 
@@ -144,10 +150,11 @@ static List<DataflowPackage> stageClasspathElementsToGcs(
             INITIAL_BACKOFF_INTERVAL_MS);
         while (true) {
           try {
-            LOG.info("Uploading classpath element {} to {}", classpathElement, target);
+            LOG.debug("Uploading classpath element {} to {}", classpathElement, target);
             try (WritableByteChannel writer = gcsUtil.create(target, MimeTypes.BINARY)) {
               copyContent(classpathElement, writer);
             }
+            numUploaded++;
             break;
           } catch (IOException e) {
             if (BackOffUtils.next(retrySleeper, backoff)) {
@@ -165,6 +172,10 @@ static List<DataflowPackage> stageClasspathElementsToGcs(
         throw new RuntimeException("Could not stage classpath element: " + classpathElement, e);
       }
     }
+    
+    LOG.info("Uploading PipelineOptions.filesToStage complete: {} files newly uploaded, "
+        + "{} files cached",
+        numUploaded, numCached);
 
     return packages;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
index 34d40f1470793..c673b10204bc1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
@@ -72,9 +72,9 @@ public boolean handleIOException(HttpRequest request, boolean supportsRetry)
         throws IOException {
       boolean willRetry = super.handleIOException(request, supportsRetry);
       if (willRetry) {
-        LOG.info("Request failed with IOException, will retry: {}", request.getUrl());
+        LOG.debug("Request failed with IOException, will retry: {}", request.getUrl());
       } else {
-        LOG.info("Request failed with IOException, will NOT retry: {}", request.getUrl());
+        LOG.debug("Request failed with IOException, will NOT retry: {}", request.getUrl());
       }
       return willRetry;
     }
@@ -104,11 +104,11 @@ public boolean handleResponse(HttpRequest request, HttpResponse response,
         boolean supportsRetry) throws IOException {
       boolean retry = handler.handleResponse(request, response, supportsRetry);
       if (retry) {
-        LOG.info("Request failed with code {} will retry: {}",
+        LOG.debug("Request failed with code {} will retry: {}",
             response.getStatusCode(), request.getUrl());
 
       } else if (!IGNORED_RESPONSE_CODES.contains(response.getStatusCode())) {
-        LOG.info("Request failed with code {}, will NOT retry: {}",
+        LOG.debug("Request failed with code {}, will NOT retry: {}",
             response.getStatusCode(), request.getUrl());
       }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListener.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListener.java
index c215f4aeafafc..26c88838b0045 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListener.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListener.java
@@ -66,7 +66,7 @@ void progressChanged(Logger log, UploadState uploadState, long bytesUploaded, lo
       case INITIATION_STARTED:
         startTime = currentTime;
         prevTime = currentTime;
-        log.info("Uploading: {}", name);
+        log.debug("Uploading: {}", name);
         break;
       case MEDIA_IN_PROGRESS:
         // Limit messages to be emitted for in progress uploads.
@@ -75,7 +75,7 @@ void progressChanged(Logger log, UploadState uploadState, long bytesUploaded, lo
                                / ((currentTime - startTime) / 1000.0);
           double currentRate = ((bytesUploaded - prevUploadedBytes) / BYTES_IN_MB)
                                / ((currentTime - prevTime) / 1000.0);
-          log.info(String.format(
+          log.debug(String.format(
               "Uploading: %s Average Rate: %.3f MiB/s, Current Rate: %.3f MiB/s, Total: %.3f MiB",
               name, averageRate, currentRate, bytesUploaded / BYTES_IN_MB));
           prevTime = currentTime;
@@ -83,7 +83,7 @@ void progressChanged(Logger log, UploadState uploadState, long bytesUploaded, lo
         }
         break;
       case MEDIA_COMPLETE:
-        log.info("Finished Uploading: {}", name);
+        log.debug("Finished Uploading: {}", name);
         break;
       default:
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListenerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListenerTest.java
index 96e5bf6b49883..f577ff5c80a9c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListenerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListenerTest.java
@@ -46,7 +46,7 @@ public void setUp() {
   @Test
   public void testLoggingInitiation() {
     listener.progressChanged(mockLogger, UploadState.INITIATION_STARTED, 0L, 0L);
-    verify(mockLogger).info("Uploading: {}", "NAME");
+    verify(mockLogger).debug("Uploading: {}", "NAME");
     verifyNoMoreInteractions(mockLogger);
   }
 
@@ -54,9 +54,9 @@ public void testLoggingInitiation() {
   public void testLoggingProgressAfterSixtySeconds() {
     listener.progressChanged(mockLogger, UploadState.MEDIA_IN_PROGRESS, 10485760L, 60001L);
     listener.progressChanged(mockLogger, UploadState.MEDIA_IN_PROGRESS, 104857600L, 120002L);
-    verify(mockLogger).info(
+    verify(mockLogger).debug(
         "Uploading: NAME Average Rate: 0.167 MiB/s, Current Rate: 0.167 MiB/s, Total: 10.000 MiB");
-    verify(mockLogger).info(
+    verify(mockLogger).debug(
         "Uploading: NAME Average Rate: 0.833 MiB/s, Current Rate: 1.500 MiB/s, Total: 100.000 MiB");
     verifyNoMoreInteractions(mockLogger);
   }
@@ -70,7 +70,7 @@ public void testSkippingLoggingAnInProgressUpdate() {
   @Test
   public void testLoggingCompletion() {
     listener.progressChanged(mockLogger, UploadState.MEDIA_COMPLETE, 104857600L, 60000L);
-    verify(mockLogger).info("Finished Uploading: {}", "NAME");
+    verify(mockLogger).debug("Finished Uploading: {}", "NAME");
     verifyNoMoreInteractions(mockLogger);
   }
 

From a4d86f9a4cb1bc18d7a97ffe3cabb2e2922a2ede Mon Sep 17 00:00:00 2001
From: mariand <mariand@google.com>
Date: Sat, 13 Dec 2014 20:15:51 -0800
Subject: [PATCH 0006/1541] Removed shuffle pool options.

Shuffle is no longer started as a separate pool.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82072028
---
 .../sdk/options/DataflowPipelineOptions.java  |  2 +-
 .../DataflowPipelineShuffleOptions.java       | 58 -----------------
 .../runners/DataflowPipelineTranslator.java   | 25 --------
 .../DataflowPipelineTranslatorTest.java       | 62 ++-----------------
 .../sdk/testing/TestPipelineTest.java         |  4 +-
 5 files changed, 7 insertions(+), 144 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineShuffleOptions.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
index 7d05088732328..686d72e20c2e6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
@@ -33,7 +33,7 @@
  */
 public interface DataflowPipelineOptions extends
     PipelineOptions, GcpOptions, ApplicationNameOptions, DataflowPipelineDebugOptions,
-    DataflowPipelineShuffleOptions, DataflowPipelineWorkerPoolOptions, BigQueryOptions,
+    DataflowPipelineWorkerPoolOptions, BigQueryOptions,
     GcsOptions, StreamingOptions {
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineShuffleOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineShuffleOptions.java
deleted file mode 100644
index f59f5eb5d78cd..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineShuffleOptions.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2014 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-/**
- * Options for Shuffle workers. Most users should not need to adjust the settings in this section.
- */
-public interface DataflowPipelineShuffleOptions {
-  /**
-   * Disk source image to use by shuffle VMs for jobs.
-   * @see <a href="https://developers.google.com/compute/docs/images"
-   * >Compute Engine Images</a>
-   */
-  @Description("Dataflow shuffle VM disk image.")
-  String getShuffleDiskSourceImage();
-  void setShuffleDiskSourceImage(String value);
-  
-  /**
-   * Number of workers to use with the shuffle appliance, or 0 to use
-   * the default number of workers.
-   */
-  @Description("Number of shuffle workers, when using remote execution")
-  int getShuffleNumWorkers();
-  void setShuffleNumWorkers(int value);
-
-  /**
-   * Remote shuffle worker disk size, in gigabytes, or 0 to use the
-   * default size.
-   */
-  @Description("Remote shuffle worker disk size, in gigabytes, or 0 to use the default size.")
-  int getShuffleDiskSizeGb();
-  void setShuffleDiskSizeGb(int value);
-
-  /**
-   * GCE <a href="https://developers.google.com/compute/docs/zones"
-   * >availability zone</a> for launching shuffle workers.
-   *
-   * <p> Default is up to the service.
-   */
-  @Description("GCE availability zone for launching shuffle workers. "
-      + "Default is up to the service")
-  String getShuffleZone();
-  void setShuffleZone(String value);
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 5f0b5a1985f04..b3b24269c365a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -101,7 +101,6 @@
 public class DataflowPipelineTranslator {
   // Must be kept in sync with their internal counterparts.
   public static final String HARNESS_WORKER_POOL = "harness";
-  public static final String SHUFFLE_WORKER_POOL = "shuffle";
   private static final Logger LOG = LoggerFactory.getLogger(DataflowPipelineTranslator.class);
 
   /**
@@ -358,9 +357,6 @@ public Job translate(List<DataflowPackage> packages) {
 
       workerPool.setTaskrunnerSettings(taskRunnerSettings);
 
-      WorkerPool shufflePool = new WorkerPool();
-      shufflePool.setKind(SHUFFLE_WORKER_POOL);
-
       if (options.isStreaming()) {
         job.setType("JOB_TYPE_STREAMING");
       } else {
@@ -373,10 +369,8 @@ public Job translate(List<DataflowPackage> packages) {
 
       workerPool.setPackages(packages);
       workerPool.setNumWorkers(options.getNumWorkers());
-      shufflePool.setNumWorkers(options.getNumWorkers());
       if (options.getDiskSourceImage() != null) {
         workerPool.setDiskSourceImage(options.getDiskSourceImage());
-        shufflePool.setDiskSourceImage(options.getDiskSourceImage());
       }
 
       if (options.getMachineType() != null) {
@@ -396,33 +390,14 @@ public Job translate(List<DataflowPackage> packages) {
       }
       if (!Strings.isNullOrEmpty(options.getZone())) {
         workerPool.setZone(options.getZone());
-        shufflePool.setZone(options.getZone());
       }
       if (options.getDiskSizeGb() > 0) {
         workerPool.setDiskSizeGb(options.getDiskSizeGb());
-        shufflePool.setDiskSizeGb(options.getDiskSizeGb());
-      }
-
-      // Set up any specific shuffle pool parameters
-      if (options.getShuffleNumWorkers() > 0) {
-        shufflePool.setNumWorkers(options.getShuffleNumWorkers());
-      }
-      if (options.getShuffleDiskSourceImage() != null) {
-        shufflePool.setDiskSourceImage(options.getShuffleDiskSourceImage());
-      }
-      if (!Strings.isNullOrEmpty(options.getShuffleZone())) {
-        shufflePool.setZone(options.getShuffleZone());
-      }
-      if (options.getShuffleDiskSizeGb() > 0) {
-        shufflePool.setDiskSizeGb(options.getShuffleDiskSizeGb());
       }
 
       List<WorkerPool> workerPools = new LinkedList<>();
 
       workerPools.add(workerPool);
-      if (!options.isStreaming()) {
-        workerPools.add(shufflePool);
-      }
       environment.setWorkerPools(workerPools);
 
       pipeline.traverseTopologically(this);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index e2edb9fdc2232..228dbfd73acf7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -20,7 +20,6 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.fail;
 import static org.mockito.Matchers.argThat;
 import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.mock;
@@ -152,11 +151,9 @@ public void testZoneConfig() throws IOException {
     Job job = DataflowPipelineTranslator.fromOptions(options).translate(
         p, Collections.<DataflowPackage>emptyList());
 
-    assertEquals(2, job.getEnvironment().getWorkerPools().size());
+    assertEquals(1, job.getEnvironment().getWorkerPools().size());
     assertEquals(testZone,
         job.getEnvironment().getWorkerPools().get(0).getZone());
-    assertEquals(testZone,
-        job.getEnvironment().getWorkerPools().get(1).getZone());
   }
 
   @Test
@@ -171,27 +168,9 @@ public void testWorkerMachineTypeConfig() throws IOException {
     Job job = DataflowPipelineTranslator.fromOptions(options).translate(
         p, Collections.<DataflowPackage>emptyList());
 
-    assertEquals(2, job.getEnvironment().getWorkerPools().size());
-
-    WorkerPool workerPool = null;
-
-    if (job
-        .getEnvironment()
-        .getWorkerPools()
-        .get(0)
-        .getKind()
-        .equals(DataflowPipelineTranslator.HARNESS_WORKER_POOL)) {
-      workerPool = job.getEnvironment().getWorkerPools().get(0);
-    } else if (job
-        .getEnvironment()
-        .getWorkerPools()
-        .get(1)
-        .getKind()
-        .equals(DataflowPipelineTranslator.HARNESS_WORKER_POOL)) {
-      workerPool = job.getEnvironment().getWorkerPools().get(1);
-    } else {
-      fail("Missing worker pool.");
-    }
+    assertEquals(1, job.getEnvironment().getWorkerPools().size());
+
+    WorkerPool workerPool = job.getEnvironment().getWorkerPools().get(0);
     assertEquals(testMachineType, workerPool.getMachineType());
   }
 
@@ -207,40 +186,9 @@ public void testDiskSizeGbConfig() throws IOException {
     Job job = DataflowPipelineTranslator.fromOptions(options).translate(
         p, Collections.<DataflowPackage>emptyList());
 
-    assertEquals(2, job.getEnvironment().getWorkerPools().size());
+    assertEquals(1, job.getEnvironment().getWorkerPools().size());
     assertEquals(diskSizeGb,
         job.getEnvironment().getWorkerPools().get(0).getDiskSizeGb());
-    assertEquals(diskSizeGb,
-        job.getEnvironment().getWorkerPools().get(1).getDiskSizeGb());
-  }
-
-  @Test
-  public void testShufflePoolConfig() throws IOException {
-    final Integer numWorkers = 10;
-    final String diskSource = "test-disk-source";
-    final Integer diskSizeGb = 12345;
-    final String zone = "test-zone-1";
-
-    DataflowPipelineOptions options = buildPipelineOptions();
-    options.setShuffleNumWorkers(numWorkers);
-    options.setShuffleDiskSourceImage(diskSource);
-    options.setShuffleDiskSizeGb(diskSizeGb);
-    options.setShuffleZone(zone);
-
-    Pipeline p = buildPipeline(options);
-    p.traverseTopologically(new RecordingPipelineVisitor());
-    Job job = DataflowPipelineTranslator.fromOptions(options).translate(
-        p, Collections.<DataflowPackage>emptyList());
-
-    assertEquals(2, job.getEnvironment().getWorkerPools().size());
-    WorkerPool shufflePool =
-        job.getEnvironment().getWorkerPools().get(1);
-    assertEquals(shufflePool.getKind(),
-        DataflowPipelineTranslator.SHUFFLE_WORKER_POOL);
-    assertEquals(numWorkers, shufflePool.getNumWorkers());
-    assertEquals(diskSource, shufflePool.getDiskSourceImage());
-    assertEquals(diskSizeGb, shufflePool.getDiskSizeGb());
-    assertEquals(zone, shufflePool.getZone());
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
index da4f66ec07758..b61c9fdd95599 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
@@ -44,7 +44,7 @@ public void testCreationUsingDefaults() {
   public void testCreationOfPipelineOptions() throws Exception {
     ObjectMapper mapper = new ObjectMapper();
     String stringOptions = mapper.writeValueAsString(
-        ImmutableMap.of("options", 
+        ImmutableMap.of("options",
           ImmutableMap.<String, String>builder()
           .put("runner", DataflowPipelineRunner.class.getName())
           .put("project", "testProject")
@@ -56,7 +56,6 @@ public void testCreationOfPipelineOptions() throws Exception {
           .put("zone", "testZone")
           .put("numWorkers", "1")
           .put("diskSizeGb", "2")
-          .put("shuffleDiskSizeGb", "3")
           .build()));
     System.getProperties().put("dataflowOptions", stringOptions);
     TestDataflowPipelineOptions options = TestPipeline.getPipelineOptions();
@@ -71,6 +70,5 @@ public void testCreationOfPipelineOptions() throws Exception {
     assertEquals("testServiceAccountKeyfile", options.getServiceAccountKeyfile());
     assertEquals("testZone", options.getZone());
     assertEquals(2, options.getDiskSizeGb());
-    assertEquals(3, options.getShuffleDiskSizeGb());
   }
 }

From 8767bb0eac5627e31e4fc843fcf7ae5cb53e6383 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Sat, 13 Dec 2014 22:16:41 -0800
Subject: [PATCH 0007/1541] Fixes a bunch of warnings in the transforms
 package.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82074882
---
 .../sdk/transforms/ApproximateQuantiles.java  |  2 +-
 .../dataflow/sdk/transforms/Combine.java      |  1 +
 .../cloud/dataflow/sdk/transforms/Create.java |  9 +++++--
 .../cloud/dataflow/sdk/transforms/DoFn.java   |  7 +++---
 .../dataflow/sdk/transforms/DoFnTester.java   |  1 +
 .../dataflow/sdk/transforms/Flatten.java      |  6 ++---
 .../dataflow/sdk/transforms/GroupByKey.java   | 24 +++++++++----------
 .../cloud/dataflow/sdk/transforms/Mean.java   |  2 +-
 .../dataflow/sdk/transforms/PTransform.java   | 10 +++-----
 .../dataflow/sdk/transforms/Partition.java    |  4 +++-
 .../cloud/dataflow/sdk/transforms/Top.java    |  2 +-
 .../cloud/dataflow/sdk/transforms/View.java   |  6 +++--
 .../transforms/windowing/FixedWindows.java    |  4 ++--
 .../windowing/InvalidWindowingFn.java         |  6 ++---
 .../sdk/transforms/windowing/Sessions.java    |  2 +-
 .../transforms/windowing/SlidingWindows.java  |  2 +-
 .../sdk/transforms/windowing/Window.java      |  6 ++---
 .../sdk/transforms/windowing/WindowingFn.java |  2 +-
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |  6 ++---
 .../dataflow/sdk/values/PCollection.java      |  2 +-
 20 files changed, 55 insertions(+), 49 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
index ff5687fe30fb3..b2c93c4d701cd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
@@ -54,7 +54,7 @@ public class ApproximateQuantiles {
 
   /**
    * Returns a {@code PTransform} that takes a {@code PCollection<T>}
-   * and returns a {@code PCollection<List<T>>} whose sinlge value is a
+   * and returns a {@code PCollection<List<T>>} whose single value is a
    * {@code List} of the approximate {@code N}-tiles of the elements
    * of the input {@code PCollection}.  This gives an idea of the
    * distribution of the input elements.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 9b374665451ed..6746f9e75672b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -1020,6 +1020,7 @@ private KvCoder<K, VI> getKvCoder() {
             "Combine.GroupedValues requires its input values to use "
             + "IterableCoder");
       }
+      @SuppressWarnings("unchecked")
       IterableCoder<VI> inputValuesCoder = (IterableCoder<VI>) kvValueCoder;
       Coder<VI> inputValueCoder = inputValuesCoder.getElemCoder();
       return KvCoder.of(keyCoder, inputValueCoder);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index 93747ea6462f7..0ff9deb039bc9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue.TimestampedValueCoder;
 import com.google.common.reflect.TypeToken;
 
 import org.joda.time.Instant;
@@ -151,8 +152,9 @@ public static <T> CreateTimestamped<T> timestamped(Iterable<TimestampedValue<T>>
    *
    * <p> The argument should not be modified after this is called.
    */
+  @SuppressWarnings("unchecked")
   public static <T> CreateTimestamped<T> timestamped(TimestampedValue<T>... elems) {
-    return new CreateTimestamped(Arrays.asList(elems));
+    return new CreateTimestamped<>(Arrays.asList(elems));
   }
 
   /**
@@ -264,7 +266,10 @@ public PCollection<T> apply(PBegin input) {
         // There aren't any elements, so we can provide a fake coder instance.
         // If we don't set a Coder here, users of CreateTimestamped have
         // no way to set the coder of the intermediate PCollection.
-        intermediate.setCoder((Coder) TimestampedValue.TimestampedValueCoder.of(VoidCoder.of()));
+        @SuppressWarnings("unchecked")
+        TimestampedValueCoder<T> fakeCoder =
+            (TimestampedValueCoder<T>) TimestampedValue.TimestampedValueCoder.of(VoidCoder.of());
+        intermediate.setCoder(fakeCoder);
       }
 
       return intermediate.apply(ParDo.of(new ConvertTimestamps<T>()));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 3c61ab38557d7..c9935fc25810b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
@@ -68,7 +69,7 @@ public abstract class Context {
      * Returns the value of the side input.
      *
      * @throws IllegalArgumentException if this is not a side input
-     * @see ParDo#withSideInput
+     * @see ParDo#withSideInputs
      */
     public abstract <T> T sideInput(PCollectionView<T, ?> view);
 
@@ -84,7 +85,7 @@ public abstract class Context {
      * windowed by the
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow}.
      * If this is the case, the output element will have a timestamp
-     * of negative infinity and be in the 
+     * of negative infinity and be in the
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow}.
      */
     public abstract void output(O output);
@@ -103,7 +104,7 @@ public abstract class Context {
      * windowed by the
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow}.
      * If this is the case, the output element's timestamp will be
-     * the given timestamp and its window will be the 
+     * the given timestamp and its window will be the
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow}.
      */
     public abstract void outputWithTimestamp(O output, Instant timestamp);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index 3e23b5ed04506..70ae664a6fc20 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -221,6 +221,7 @@ public List<O> peekOutputElements() {
     return Lists.transform(fnRunner.getReceiver(mainOutputTag),
                            new Function<Object, O>() {
                              @Override
+                             @SuppressWarnings("unchecked")
                              public O apply(Object input) {
                                return ((WindowedValue<O>) input).getValue();
                              }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index 14b2169b97bfa..c52ae265fd5f2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -52,7 +52,7 @@
 public class Flatten {
 
   /**
-   * Returns a {@link PTransform} that flattens a {@link PCollectionList<T>}
+   * Returns a {@link PTransform} that flattens a {@link CollectionList<T>}
    * into a {@link PCollection<T>} containing all the elements of all
    * the {@link PCollection}s in its input.
    *
@@ -112,7 +112,7 @@ private FlattenPCollectionList() { }
 
     @Override
     public PCollection<T> apply(PCollectionList<T> inputs) {
-      WindowingFn windowingFn;
+      WindowingFn<?, ?> windowingFn;
       if (!getInput().getAll().isEmpty()) {
         windowingFn = getInput().get(0).getWindowingFn();
         for (PCollection<?> input : getInput().getAll()) {
@@ -145,7 +145,7 @@ protected Coder<?> getDefaultOutputCoder() {
   /**
    * {@code FlattenIterables<T>} takes a {@code PCollection<Iterable<T>>} and returns a
    * {@code PCollection<T>} that contains all the elements from each iterable.
-   * Implements {@link #fromIterable}.
+   * Implements {@link #iterables}.
    *
    * @param <T> the type of the elements of the input {@code Iterable}s and
    * the output {@code PCollection}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index d7a4de64e50d3..54e60cfcb6209 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -36,8 +36,6 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
-import org.joda.time.Instant;
-
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -155,8 +153,8 @@ public static class ReifyTimestampsAndWindows<K, V>
     @Override
     public PCollection<KV<K, WindowedValue<V>>> apply(
         PCollection<KV<K, V>> input) {
-      Coder<KV<K, V>> inputCoder = getInput().getCoder();
-      KvCoder<K, V> inputKvCoder = (KvCoder<K, V>) inputCoder;
+      @SuppressWarnings("unchecked")
+      KvCoder<K, V> inputKvCoder = (KvCoder<K, V>) getInput().getCoder();
       Coder<K> keyCoder = inputKvCoder.getKeyCoder();
       Coder<V> inputValueCoder = inputKvCoder.getValueCoder();
       Coder<WindowedValue<V>> outputValueCoder = FullWindowedValueCoder.of(
@@ -229,19 +227,18 @@ public int compare(WindowedValue<V> e1, WindowedValue<V> e2) {
   public static class GroupAlsoByWindow<K, V>
       extends PTransform<PCollection<KV<K, Iterable<WindowedValue<V>>>>,
                          PCollection<KV<K, Iterable<V>>>> {
-    private final WindowingFn windowingFn;
+    private final WindowingFn<?, ?> windowingFn;
 
-    public GroupAlsoByWindow(WindowingFn windowingFn) {
+    public GroupAlsoByWindow(WindowingFn<?, ?> windowingFn) {
       this.windowingFn = windowingFn;
     }
 
     @Override
     public PCollection<KV<K, Iterable<V>>> apply(
         PCollection<KV<K, Iterable<WindowedValue<V>>>> input) {
-      Coder<KV<K, Iterable<WindowedValue<V>>>> inputCoder =
-          getInput().getCoder();
+      @SuppressWarnings("unchecked")
       KvCoder<K, Iterable<WindowedValue<V>>> inputKvCoder =
-          (KvCoder<K, Iterable<WindowedValue<V>>>) inputCoder;
+          (KvCoder<K, Iterable<WindowedValue<V>>>) getInput().getCoder();
       Coder<K> keyCoder = inputKvCoder.getKeyCoder();
       Coder<Iterable<WindowedValue<V>>> inputValueCoder =
           inputKvCoder.getValueCoder();
@@ -260,7 +257,7 @@ public PCollection<KV<K, Iterable<V>>> apply(
 
       return input.apply(ParDo.of(
           new GroupAlsoByWindowsDoFn<K, V, BoundedWindow>(
-              windowingFn, inputIterableElementValueCoder)))
+              (WindowingFn) windowingFn, inputIterableElementValueCoder)))
           .setCoder(outputKvCoder);
     }
   }
@@ -280,6 +277,7 @@ public static class GroupByKeyOnly<K, V>
 
     public GroupByKeyOnly() { }
 
+    @SuppressWarnings({"rawtypes", "unchecked"})
     @Override
     public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
       WindowingFn windowingFn = getInput().getWindowingFn();
@@ -314,6 +312,7 @@ public void finishSpecifying() {
      * Returns the {@code Coder} of the input to this transform, which
      * should be a {@code KvCoder}.
      */
+    @SuppressWarnings("unchecked")
     KvCoder<K, V> getInputKvCoder() {
       Coder<KV<K, V>> inputCoder = getInput().getCoder();
       if (!(inputCoder instanceof KvCoder)) {
@@ -398,7 +397,6 @@ private static <K, V> void evaluateHelper(
     for (ValueWithMetadata<KV<K, V>> elem : inputElems) {
       K key = elem.getValue().getKey();
       V value = elem.getValue().getValue();
-      Instant timestamp = elem.getTimestamp();
       byte[] encodedKey;
       try {
         encodedKey = encodeToByteArray(keyCoder, key);
@@ -447,9 +445,9 @@ public PCollection<KV<K, Iterable<V>>> applyHelper(
     // merging windows as needed, using the windows assigned to the
     // key/value input elements and the window merge operation of the
     // windowing function associated with the input PCollection.
-    WindowingFn windowingFn = getInput().getWindowingFn();
+    WindowingFn<?, ?> windowingFn = getInput().getWindowingFn();
     if (windowingFn instanceof InvalidWindowingFn) {
-      String cause = ((InvalidWindowingFn) windowingFn).getCause();
+      String cause = ((InvalidWindowingFn<?>) windowingFn).getCause();
       throw new IllegalStateException(
           "GroupByKey must have a valid Window merge function.  "
           + "Invalid because: " + cause);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
index 34fbb1fc29088..b198a0da25041 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
@@ -137,7 +137,7 @@ public Coder<CountSum> getAccumulatorCoder(
       // The casts are needed because CountSum.class is a
       // Class<MeanFn.CountSum>, but we need a
       // Class<MeanFn<N>.CountSum>.
-      return SerializableCoder.of((Class<CountSum>) (Class) CountSum.class);
+      return SerializableCoder.of((Class<CountSum>) (Class<?>) CountSum.class);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
index 5906d7212dba0..6fc2618dcc021 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
@@ -24,7 +24,6 @@
 import com.google.cloud.dataflow.sdk.values.POutput;
 import com.google.cloud.dataflow.sdk.values.TypedPValue;
 
-import java.io.IOException;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
 import java.io.Serializable;
@@ -95,9 +94,7 @@
  *
  * <p> Each PCollection output produced by a PTransform,
  * either directly or within a "bundling" class, automatically gets
- * its own name derived from the name of its producing PTransform.  An
- * output's name can be changed by invoking
- * {@link com.google.cloud.dataflow.sdk.values.PValue#setName}.
+ * its own name derived from the name of its producing PTransform.
  *
  * <p> Each PCollection output produced by a PTransform
  * also records a {@link com.google.cloud.dataflow.sdk.coders.Coder}
@@ -348,14 +345,13 @@ protected String getKindString() {
     return StringUtils.approximateSimpleName(getClass());
   }
 
-  private void writeObject(ObjectOutputStream oos) throws IOException {
+  private void writeObject(ObjectOutputStream oos) {
     // We don't really want to be serializing this object, but we
     // often have serializable anonymous DoFns nested within a
     // PTransform.
   }
 
-  private void readObject(ObjectInputStream oos)
-      throws IOException, ClassNotFoundException {
+  private void readObject(ObjectInputStream oos) {
     // We don't really want to be serializing this object, but we
     // often have serializable anonymous DoFns nested within a
     // PTransform.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
index 74a1359aa5ed0..a6444b2774f4c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
@@ -162,7 +162,9 @@ public void processElement(ProcessContext c) {
       T1 input = c.element();
       int partition = partitionFn.partitionFor(input, numPartitions);
       if (0 <= partition && partition < numPartitions) {
-        c.sideOutput((TupleTag<T1>) outputTags.get(partition), input);
+        @SuppressWarnings("unchecked")
+        TupleTag<T1> typedTag = (TupleTag<T1>) outputTags.get(partition);
+        c.sideOutput(typedTag, input);
       } else {
         throw new IndexOutOfBoundsException(
             "Partition function returned out of bounds index: " +
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index 1f63808fc2237..2e5e334b8a73d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -460,7 +460,7 @@ public void registerByteSizeObserver(
           throws Exception {
         listCoder.registerByteSizeObserver(value.asList(), observer, context);
       }
-    };
+    }
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index d3bb863888707..637aeb73f6f3b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -70,7 +70,7 @@ public static <T> AsIterable<T> asIterable() {
    * A {@PTransform} that produces a {@link PCollectionView} of a singleton {@link PCollection}
    * yielding the single element it contains.
    *
-   * <p> Instantiate via {@link View.asIterable}.
+   * <p> Instantiate via {@link View#asIterable}.
    */
   public static class AsIterable<T> extends PTransform<
       PCollection<T>,
@@ -91,7 +91,7 @@ public PCollectionView<Iterable<T>, Iterable<WindowedValue<T>>> apply(
    * A {@PTransform} that produces a {@link PCollectionView} of a singleton {@link PCollection}
    * yielding the single element it contains.
    *
-   * <p> Instantiate via {@link View.asIterable}.
+   * <p> Instantiate via {@link View#asIterable}.
    */
   public static class AsSingleton<T>
       extends PTransform<PCollection<T>, PCollectionView<T, WindowedValue<T>>> {
@@ -164,6 +164,7 @@ public SingletonPCollectionView(Pipeline pipeline) {
       setPipelineInternal(pipeline);
     }
 
+    @SuppressWarnings("unchecked")
     @Override
     public T fromIterableInternal(Iterable<WindowedValue<?>> contents) {
       try {
@@ -189,6 +190,7 @@ public IterablePCollectionView(Pipeline pipeline) {
     @Override
     public Iterable<T> fromIterableInternal(Iterable<WindowedValue<?>> contents) {
       return Iterables.transform(contents, new Function<WindowedValue<?>, T>() {
+        @SuppressWarnings("unchecked")
         @Override
         public T apply(WindowedValue<?> input) {
           return (T) input.getValue();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
index ea7a22c8fc41a..91c591c901900 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
@@ -57,7 +57,7 @@ public static FixedWindows of(Duration size) {
    * [N * size + offset, (N + 1) * size + offset),
    * where 0 is the epoch.
    *
-   * @throws IllegalAgumentException if offset is not in [0, size)
+   * @throws IllegalArgumentException if offset is not in [0, size)
    */
   public FixedWindows withOffset(Duration offset) {
     return new FixedWindows(size, offset);
@@ -85,7 +85,7 @@ public Coder<IntervalWindow> windowCoder() {
   }
 
   @Override
-  public boolean isCompatible(WindowingFn other) {
+  public boolean isCompatible(WindowingFn<?, ?> other) {
     return (other instanceof FixedWindows)
         && (size.equals(((FixedWindows) other).size))
         && (offset.equals(((FixedWindows) other).offset));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindowingFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindowingFn.java
index 7ad7f29f6655a..53dc93a213945 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindowingFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindowingFn.java
@@ -47,7 +47,7 @@ public String getCause() {
   public WindowingFn<?, W> getOriginalWindowingFn() {
     return originalWindowingFn;
   }
-  
+
   @Override
   public Collection<W> assignWindows(AssignContext c) {
     throw new UnsupportedOperationException();
@@ -67,9 +67,9 @@ public Coder<W> windowCoder() {
    * {@code InvalidWindowingFn} objects with the same {@code originalWindowingFn} are compatible.
    */
   @Override
-  public boolean isCompatible(WindowingFn other) {
+  public boolean isCompatible(WindowingFn<?, ?> other) {
     return getClass() == other.getClass()
         && getOriginalWindowingFn().isCompatible(
-            ((InvalidWindowingFn) other).getOriginalWindowingFn());
+            ((InvalidWindowingFn<?>) other).getOriginalWindowingFn());
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
index 47f8a08005830..26744a549ba2a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
@@ -75,7 +75,7 @@ public Coder<IntervalWindow> windowCoder() {
   }
 
   @Override
-  public boolean isCompatible(WindowingFn other) {
+  public boolean isCompatible(WindowingFn<?, ?> other) {
     return other instanceof Sessions;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
index 6643289071ef5..d98dab908240c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
@@ -118,7 +118,7 @@ public Collection<IntervalWindow> assignWindows(AssignContext c) {
   }
 
   @Override
-  public boolean isCompatible(WindowingFn other) {
+  public boolean isCompatible(WindowingFn<?, ?> other) {
     if (other instanceof SlidingWindows) {
       SlidingWindows that = (SlidingWindows) other;
       return period.equals(that.period)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 68796c908aba5..c450e0fa7980f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -224,10 +224,10 @@ public static <T> Remerge<T> remerge() {
   public static class Remerge<T> extends PTransform<PCollection<T>, PCollection<T>> {
     @Override
     public PCollection<T> apply(PCollection<T> input) {
-      WindowingFn windowingFn = getInput().getWindowingFn();
-      WindowingFn outputWindowingFn =
+      WindowingFn<?, ?> windowingFn = getInput().getWindowingFn();
+      WindowingFn<?, ?> outputWindowingFn =
           (windowingFn instanceof InvalidWindowingFn)
-          ? ((InvalidWindowingFn) windowingFn).getOriginalWindowingFn()
+          ? ((InvalidWindowingFn<?>) windowingFn).getOriginalWindowingFn()
           : windowingFn;
 
       return input.apply(ParDo.named("Identity").of(new DoFn<T, T>() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingFn.java
index 0f049372555bb..935deb09f2b7c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingFn.java
@@ -107,7 +107,7 @@ public abstract void merge(Collection<W> toBeMerged, W mergeResult)
    * Returns whether this performs the same merging as the given
    * {@code WindowingFn}.
    */
-  public abstract boolean isCompatible(WindowingFn other);
+  public abstract boolean isCompatible(WindowingFn<?, ?> other);
 
   /**
    * Returns the {@link Coder} used for serializing the windows used
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 975af472a4b6d..3a6d4af1c1a92 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -38,7 +38,7 @@ public class DoFnRunner<I, O, R> {
   public interface OutputManager<R> {
 
     /** Returns the receiver to use for a given tag. */
-    public R initialize(TupleTag tag);
+    public R initialize(TupleTag<?> tag);
 
     /** Outputs a single element to the provided receiver. */
     public void output(R receiver, WindowedValue<?> output);
@@ -91,7 +91,7 @@ public static <I, O> DoFnRunner<I, O, List> createWithListOutputs(
         options, fn, sideInputs,
         new OutputManager<List>() {
           @Override
-          public List initialize(TupleTag tag) {
+          public List initialize(TupleTag<?> tag) {
             return new ArrayList<>();
           }
           @Override
@@ -141,7 +141,7 @@ public void finishBundle() {
   }
 
   /** Returns the receiver who gets outputs with the provided tag. */
-  public R getReceiver(TupleTag tag) {
+  public R getReceiver(TupleTag<?> tag) {
     return context.getReceiver(tag);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
index fc4b0886b7d5b..34ad2163a8a1e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
@@ -170,7 +170,7 @@ public <Output extends POutput> Output apply(
   /**
    * Returns the {@link WindowingFn} of this {@code PCollection}.
    */
-  public WindowingFn getWindowingFn() {
+  public WindowingFn<?, ?> getWindowingFn() {
     return windowingFn;
   }
 

From cda8313c2d8b487d377c31dc502857d2bbde1825 Mon Sep 17 00:00:00 2001
From: malo <malo@google.com>
Date: Sun, 14 Dec 2014 20:15:06 -0800
Subject: [PATCH 0008/1541] Set the sdk major_version to 0. [] -------------
 Created by MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=82113067

---
 .../cloud/dataflow/sdk/runners/DataflowPipelineRunner.java    | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 9f770fcbb33cd..c13ef51c7c18a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -196,9 +196,7 @@ public DataflowPipelineJob run(Pipeline pipeline) {
 
     // Requirements about the service.
     Map<String, Object> environmentVersion = new HashMap<>();
-    // TODO: Specify the environment major version.
-    // environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_MAJOR_KEY,
-    // ENVIRONMENT_MAJOR_VERSION);
+    environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_MAJOR_KEY, ENVIRONMENT_MAJOR_VERSION);
     newJob.getEnvironment().setVersion(environmentVersion);
     // Default jobType is DATA_PARALLEL which is for java batch.
     String jobType = "DATA_PARALLEL";

From 22af2f3351a66296e930033790d9130b57de17ab Mon Sep 17 00:00:00 2001
From: fjp <fjp@google.com>
Date: Sun, 14 Dec 2014 22:08:30 -0800
Subject: [PATCH 0009/1541] Example Usability improvements: - print description
 when missing required options - document BigQuery dataset creation
 requirements - standardize example instructions

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82117381
---
 .../dataflow/examples/BigQueryTornadoes.java  | 24 ++++++++++-
 .../google/cloud/dataflow/examples/TfIdf.java | 43 ++++++-------------
 .../examples/TopWikipediaSessions.java        | 27 ++++++------
 .../cloud/dataflow/examples/WordCount.java    | 19 ++++----
 .../cloud/dataflow/sdk/io/BigQueryIO.java     |  4 +-
 .../sdk/options/PipelineOptionsValidator.java | 15 ++++---
 .../dataflow/BigQueryIOTranslator.java        |  3 +-
 .../options/PipelineOptionsValidatorTest.java | 13 +++---
 8 files changed, 80 insertions(+), 68 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
index 43e94c08633b0..a5f9a19705218 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
@@ -39,6 +39,26 @@
 /**
  * An example that reads the public samples of weather data from BigQuery, counts the number of
  * tornadoes that occur in each month, and writes the results to BigQuery.
+ * 
+ * <p> Concepts: Reading/writing BigQuery; counting a PCollection; user-defined PTransforms
+ * 
+ * <p> Note: Before running this example, you must create a BigQuery dataset to contain your output 
+ * table.
+ * 
+ * <p> To execute this pipeline locally, specify general pipeline configuration:
+ *   --project=<PROJECT ID>
+ * and the BigQuery table for the output:
+ *   --output=<project_id>:<dataset_id>.<table_id>
+ *
+ * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ *   --project=<PROJECT ID> 
+ *   --stagingLocation=gs://<STAGING DIRECTORY>
+ *   --runner=BlockingDataflowPipelineRunner
+ * and the BigQuery table for the output:
+ *   --output=<project_id>:<dataset_id>.<table_id>
+ *
+ * <p> The BigQuery input table defaults to clouddataflow-readonly:samples.weather_stations and can
+ * be overridden with --input.
  */
 public class BigQueryTornadoes {
   // Default to using a 1000 row subset of the public weather station table publicdata:samples.gsod.
@@ -118,8 +138,8 @@ private static interface Options extends PipelineOptions {
     String getInput();
     void setInput(String value);
 
-    @Description("Table to write to, specified as "
-        + "<project_id>:<dataset_id>.<table_id>")
+    @Description("BigQuery table to write to, specified as "
+        + "<project_id>:<dataset_id>.<table_id>. The dataset must already exist.")
     @Validation.Required
     String getOutput();
     void setOutput(String value);
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
index a6bd4f27fd61d..15948782e2b60 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
@@ -60,39 +60,22 @@
 
 /**
  * An example that computes a basic TF-IDF search table for a directory or GCS prefix.
+ * 
+ * <p> Concepts: joining data; side inputs
  *
- * <p> Command-line usage for this example:
+ * <p> To execute this pipeline locally, specify general pipeline configuration:
+ *   --project=<PROJECT ID>
+ * and a local output file or output prefix on GCS:
+ *   --output=[<LOCAL FILE> | gs://<OUTPUT PREFIX>]
  *
- * <pre>
- *     java com.google.cloud.dataflow.examples.TfIdf \
- *       --runner=<RUNNER> \
- *       --input=<INPUT URI> \
- *       --output=<OUTPUT URI PREFIX>
- * </pre>
+ * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ *   --project=<PROJECT ID> 
+ *   --stagingLocation=gs://<STAGING DIRECTORY>
+ *   --runner=BlockingDataflowPipelineRunner
+ * and an output prefix on GCS:
+ *   --output=gs://<OUTPUT PREFIX>
  *
- * <p> For example, to execute this pipeline locally to index a local directory:
- *
- * <pre>
- *     java com.google.cloud.dataflow.examples.TfIdf \
- *       --runner=DirectPipelineRunner \
- *       --input=<LOCAL PATH TO DIRECTORY> \
- *       --output=<OUTPUT PREFIX>
- * </pre>
- *
- * <p> To execute this pipeline using the Dataflow service
- * to index the works of Shakespeare and write the results to a GCS bucket:
- * (For execution via the Dataflow service, only GCS locations are supported)
- *
- * <pre>
- *     java com.google.cloud.dataflow.examples.TfIdf \
- *       --project=<PROJECT ID> \
- *       --stagingLocation=gs://<STAGING DIRECTORY> \
- *       --runner=BlockingDataflowPipelineRunner \
- *       [--input=gs://<INPUT DIRECTORY>] \
- *       --output=gs://<OUTPUT PREFIX>
- * </pre>
- *
- * <p> The default input is gs://dataflow-samples/shakespeare/
+ * <p> The default input is gs://dataflow-samples/shakespeare/ and can be overridden with --input.
  */
 public class TfIdf {
   /**
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
index baa520ea0447f..e4b2473c2f80e 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
@@ -45,24 +45,23 @@
 import java.util.List;
 
 /**
- * Pipeline that reads Wikipedia edit data from BigQuery and computes the user with
+ * An example that reads Wikipedia edit data from BigQuery and computes the user with
  * the longest string of edits separated by no more than an hour within each month.
  *
- * <p> This pipeline demonstrates how the Windowing API can be used to perform
- * various time-based aggregations of data.
+ * <p> Concepts: Using Windowing to perform time-based aggregations of data.
+ *   
+ * <p> It is not recommended to execute this pipeline locally, given the size of the default input
+ * data.
  *
- * <p> To run this pipeline, the following options must be provided:
- * <pre>{@code
+ * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
  *   --project=<PROJECT ID>
- *   --output=gs://<OUTPUT PATH>
- *   --stagingLocation=gs://<STAGING PATH>
- *   --runner=(Blocking)DataflowPipelineRunner
- * }</pre>
- *
- * <p> To run this example using Dataflow service, you must additionally
- * provide either {@literal --stagingLocation} or {@literal --tempLocation}, and
- * select one of the Dataflow pipeline runners, eg
- * {@literal --runner=BlockingDataflowPipelineRunner}.
+ *   --stagingLocation=gs://<STAGING DIRECTORY>
+ *   --runner=BlockingDataflowPipelineRunner
+ * and an output prefix on GCS:
+ *   --output=gs://<OUTPUT PREFIX>
+ * 
+ * <p> The default input is gs://dataflow-samples/wikipedia_edits/*.json and can be overridden with
+ * --input.
  */
 public class TopWikipediaSessions {
   private static final String EXPORTED_WIKI_TABLE = "gs://dataflow-samples/wikipedia_edits/*.json";
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
index 4295e7868d26e..4fe304925c0b6 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
@@ -38,19 +38,22 @@
  * An example that counts words in Shakespeare. For a detailed walkthrough of this
  * example see:
  *   https://developers.google.com/cloud-dataflow/java-sdk/wordcount-example
+ *   
+ * <p> Concepts: Reading/writing text files; counting a PCollection; user-defined PTransforms
  *
- * To execute this pipeline locally, specify general pipeline configuration:
+ * <p> To execute this pipeline locally, specify general pipeline configuration:
  *   --project=<PROJECT ID>
- * and example configuration:
- *   --output=[<LOCAL FILE> | gs://<OUTPUT PATH>]
+ * and a local output file or output prefix on GCS:
+ *   --output=[<LOCAL FILE> | gs://<OUTPUT PREFIX>]
  *
- * To execute this pipeline using the Dataflow service, specify pipeline configuration:
- *   --project=<PROJECT ID> --stagingLocation=gs://<STAGING DIRECTORY>
+ * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ *   --project=<PROJECT ID>
+ *   --stagingLocation=gs://<STAGING DIRECTORY>
  *   --runner=BlockingDataflowPipelineRunner
- * and example configuration:
- *   --output=gs://<OUTPUT PATH>
+ * and an output prefix on GCS:
+ *   --output=gs://<OUTPUT PREFIX>
  *
- * The input file defaults to gs://dataflow-samples/shakespeare/kinglear.txt and can be
+ * <p> The input file defaults to gs://dataflow-samples/shakespeare/kinglear.txt and can be
  * overridden with --input.
  */
 public class WordCount {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 2fffe4de2c45b..37e2963035e64 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -106,7 +106,7 @@
  * <p>
  * See {@link BigQueryIO.Write} for details on how to specify if a write should
  * append to an existing table, replace the table, or verify that the table is
- * empty.
+ * empty. Note that the dataset being written to must already exist.
  *
  * @see <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html">TableRow</a>
  */
@@ -349,6 +349,8 @@ public boolean getValidate() {
    * provided (via {@link Write#withSchema}), or else the transform may fail
    * at runtime with an {@link java.lang.IllegalArgumentException}.
    * <p>
+   * The dataset being written must already exist.
+   * <p>
    * By default, writes require an empty table, which corresponds to
    * a {@code WriteDisposition.WRITE_EMPTY} disposition which matches the
    * default of BigQuery's Jobs API.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
index bb7bcf3de831f..046cfd7fda05e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
@@ -18,7 +18,6 @@
 
 import com.google.common.base.Preconditions;
 
-import java.lang.annotation.Annotation;
 import java.lang.reflect.Method;
 import java.lang.reflect.Proxy;
 
@@ -47,13 +46,17 @@ public static <T extends PipelineOptions> T validate(Class<T> klass, PipelineOpt
     ProxyInvocationHandler handler =
         (ProxyInvocationHandler) Proxy.getInvocationHandler(options);
     for (Method method : PipelineOptionsFactory.getClosureOfMethodsOnInterface(klass)) {
-      for (Annotation annotation : method.getAnnotations()) {
-        if (annotation instanceof Validation.Required) {
-          Preconditions.checkArgument(handler.invoke(options, method, null) != null,
-              "Expected non-null property to be set for [" + method + "].");
-        }
+      if (method.getAnnotation(Validation.Required.class) != null) {
+        Preconditions.checkArgument(handler.invoke(options, method, null) != null,
+            "Missing required value for [" + method + ", \"" + getDescription(method) + "\"]. ");
       }
     }
     return options.as(klass);
   }
+  
+  private static String getDescription(Method method) {
+    Description description = method.getAnnotation(Description.class);
+    return description == null ? "" : description.value();
+  }
 }
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
index fd2731949c414..b3d0c19977db2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
@@ -150,7 +150,8 @@ private static void verifyDatasetPresence(
       ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
       if (errorExtractor.itemNotFound(e)) {
         throw new IllegalArgumentException(
-            "BigQuery dataset not found for table: " + BigQueryIO.toTableSpec(table), e);
+            "BigQuery dataset not found for table \"" + BigQueryIO.toTableSpec(table)
+            + "\". Please create the dataset before pipeline execution.");
       } else {
         throw new RuntimeException(
             "unable to confirm BigQuery dataset presence", e);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java
index e0decb9f92255..ad584ea32aeb4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java
@@ -30,6 +30,7 @@ public class PipelineOptionsValidatorTest {
   /** A test interface with an {@link Validation.Required} annotation. */
   public static interface Required extends PipelineOptions {
     @Validation.Required
+    @Description("Fake Description")
     public String getObject();
     public void setObject(String value);
   }
@@ -44,9 +45,9 @@ public void testWhenRequiredOptionIsSet() {
   @Test
   public void testWhenRequiredOptionIsSetAndCleared() {
     expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("Expected non-null property to be set for "
+    expectedException.expectMessage("Missing required value for "
         + "[public abstract java.lang.String com.google.cloud.dataflow."
-        + "sdk.options.PipelineOptionsValidatorTest$Required.getObject()].");
+        + "sdk.options.PipelineOptionsValidatorTest$Required.getObject(), \"Fake Description\"].");
 
     Required required = PipelineOptionsFactory.as(Required.class);
     required.setObject("blah");
@@ -57,9 +58,9 @@ public void testWhenRequiredOptionIsSetAndCleared() {
   @Test
   public void testWhenRequiredOptionIsNeverSet() {
     expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("Expected non-null property to be set for "
+    expectedException.expectMessage("Missing required value for "
         + "[public abstract java.lang.String com.google.cloud.dataflow."
-        + "sdk.options.PipelineOptionsValidatorTest$Required.getObject()].");
+        + "sdk.options.PipelineOptionsValidatorTest$Required.getObject(), \"Fake Description\"].");
 
     Required required = PipelineOptionsFactory.as(Required.class);
     PipelineOptionsValidator.validate(Required.class, required);
@@ -76,9 +77,9 @@ public static interface SubClassValidation extends Required {
   @Test
   public void testValidationOnOverriddenMethods() throws Exception {
     expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("Expected non-null property to be set for "
+    expectedException.expectMessage("Missing required value for "
         + "[public abstract java.lang.String com.google.cloud.dataflow."
-        + "sdk.options.PipelineOptionsValidatorTest$Required.getObject()].");
+        + "sdk.options.PipelineOptionsValidatorTest$Required.getObject(), \"Fake Description\"].");
 
     SubClassValidation required = PipelineOptionsFactory.as(SubClassValidation.class);
     PipelineOptionsValidator.validate(Required.class, required);

From 5581b9fe032c9dedc3a11c595a46312bbfbdc8d3 Mon Sep 17 00:00:00 2001
From: fjp <fjp@google.com>
Date: Sun, 14 Dec 2014 23:05:30 -0800
Subject: [PATCH 0010/1541] Add an easy option for configuring the autoscaling
 algorithm. Leave the default as NONE, but now basic, experimental autoscaling
 can be enabled with: --maxNumWorkers=20 --autoscalingAlgorithm=BASIC

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82119376
---
 .../examples/TopWikipediaSessions.java        | 10 ++++-
 .../DataflowPipelineWorkerPoolOptions.java    | 45 +++++++++++++++++++
 .../runners/DataflowPipelineTranslator.java   |  8 ++++
 3 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
index e4b2473c2f80e..3d945b6394acb 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
@@ -45,7 +45,7 @@
 import java.util.List;
 
 /**
- * An example that reads Wikipedia edit data from BigQuery and computes the user with
+ * An example that reads Wikipedia edit data from Cloud Storage and computes the user with
  * the longest string of edits separated by no more than an hour within each month.
  *
  * <p> Concepts: Using Windowing to perform time-based aggregations of data.
@@ -59,9 +59,15 @@
  *   --runner=BlockingDataflowPipelineRunner
  * and an output prefix on GCS:
  *   --output=gs://<OUTPUT PREFIX>
- * 
+ *
  * <p> The default input is gs://dataflow-samples/wikipedia_edits/*.json and can be overridden with
  * --input.
+ * 
+ * <p> The input for this example is large enough that it's a good place to enable (experimental)
+ * autoscaling:
+ *   --autoscalingAlgorithm=BASIC
+ *   --maxNumWorkers=20
+ * This will automatically scale the number of workers up over time until the job completes.
  */
 public class TopWikipediaSessions {
   private static final String EXPORTED_WIKI_TABLE = "gs://dataflow-samples/wikipedia_edits/*.json";
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index 6cd9839318630..b388da4313ee7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -37,6 +37,51 @@ public interface DataflowPipelineWorkerPoolOptions {
   @Default.Integer(3)
   int getNumWorkers();
   void setNumWorkers(int value);
+  
+  /**
+   * Type of autoscaling algorithm to use. These types are experimental and subject to change.
+   */
+  public enum AutoscalingAlgorithmType {
+    /** Use numWorkers machines. Do not autoscale the worker pool. */
+    NONE("AUTOSCALING_ALGORITHM_NONE"),
+    
+    /** Autoscale the workerpool size up to maxNumWorkers until the job completes. */
+    BASIC("AUTOSCALING_ALGORITHM_BASIC");
+
+    private final String algorithm;
+
+    private AutoscalingAlgorithmType(String algorithm) {
+      this.algorithm = algorithm;
+    }
+
+    /** Returns the string representation of this type. */
+    public String getAlgorithm() {
+      return this.algorithm;
+    }
+  }
+
+  @Description("(experimental) The autoscaling algorithm to use for the workerpool.")
+  @Default.InstanceFactory(AutoscalingAlgorithmTypeFactory.class)
+  AutoscalingAlgorithmType getAutoscalingAlgorithm();
+  void setAutoscalingAlgorithm(AutoscalingAlgorithmType value);
+
+  /** Returns the default NONE AutoscalingAlgorithmType. */
+  public static class AutoscalingAlgorithmTypeFactory implements
+      DefaultValueFactory<AutoscalingAlgorithmType> {
+    @Override
+    public AutoscalingAlgorithmType create(PipelineOptions options) {
+      return AutoscalingAlgorithmType.NONE;
+    }
+  }
+  
+  /**
+   * Max number of workers to use when using workerpool autoscaling. 
+   * This option is experimental and subject to change.
+   */
+  @Description("Max number of workers to use, when using autoscaling")
+  @Default.Integer(20)
+  int getMaxNumWorkers();
+  void setMaxNumWorkers(int value);
 
   /**
    * Remote worker disk size, in gigabytes, or 0 to use the default size.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index b3b24269c365a..823b9d44a1f6e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -29,6 +29,7 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 
 import com.google.api.client.util.Preconditions;
+import com.google.api.services.dataflow.model.AutoscalingSettings;
 import com.google.api.services.dataflow.model.DataflowPackage;
 import com.google.api.services.dataflow.model.Disk;
 import com.google.api.services.dataflow.model.Environment;
@@ -48,6 +49,7 @@
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType;
 import com.google.cloud.dataflow.sdk.runners.dataflow.AvroIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BigQueryIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.DatastoreIOTranslator;
@@ -394,6 +396,12 @@ public Job translate(List<DataflowPackage> packages) {
       if (options.getDiskSizeGb() > 0) {
         workerPool.setDiskSizeGb(options.getDiskSizeGb());
       }
+      if (!options.getAutoscalingAlgorithm().equals(AutoscalingAlgorithmType.NONE)) {
+        AutoscalingSettings settings = new AutoscalingSettings();
+        settings.setAlgorithm(options.getAutoscalingAlgorithm().getAlgorithm());
+        settings.setMaxNumWorkers(options.getMaxNumWorkers());
+        workerPool.setAutoscalingSettings(settings);
+      }
 
       List<WorkerPool> workerPools = new LinkedList<>();
 

From 3aa23436ed57daab40b2a3ce28c2c6accf8b9aff Mon Sep 17 00:00:00 2001
From: earhart <earhart@google.com>
Date: Sun, 14 Dec 2014 23:37:39 -0800
Subject: [PATCH 0011/1541] Fixing a few more warnings. [] -------------
 Created by MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=82120439

---
 .../java/com/google/cloud/dataflow/sdk/Pipeline.java | 12 ++++++------
 .../dataflow/sdk/coders/BigEndianIntegerCoder.java   |  1 +
 .../dataflow/sdk/coders/BigEndianLongCoder.java      |  1 +
 .../cloud/dataflow/sdk/coders/DoubleCoder.java       |  1 +
 .../cloud/dataflow/sdk/coders/EntityCoder.java       |  1 +
 .../cloud/dataflow/sdk/coders/InstantCoder.java      |  1 +
 .../google/cloud/dataflow/sdk/coders/KvCoder.java    |  1 +
 .../cloud/dataflow/sdk/coders/StringUtf8Coder.java   |  1 +
 .../cloud/dataflow/sdk/coders/VarIntCoder.java       |  1 +
 .../cloud/dataflow/sdk/coders/VarLongCoder.java      |  1 +
 .../com/google/cloud/dataflow/sdk/io/PubsubIO.java   |  2 ++
 .../dataflow/sdk/options/PipelineOptionsFactory.java |  2 ++
 .../dataflow/sdk/runners/DirectPipelineRunner.java   |  5 +++--
 .../dataflow/sdk/runners/worker/CombineValuesFn.java |  1 +
 .../cloud/dataflow/sdk/transforms/Combine.java       |  1 +
 .../google/cloud/dataflow/sdk/transforms/Count.java  |  2 ++
 .../google/cloud/dataflow/sdk/transforms/DoFn.java   |  1 +
 .../cloud/dataflow/sdk/transforms/Flatten.java       |  2 ++
 .../cloud/dataflow/sdk/transforms/GroupByKey.java    |  1 +
 .../google/cloud/dataflow/sdk/transforms/Keys.java   |  1 +
 .../google/cloud/dataflow/sdk/transforms/KvSwap.java |  1 +
 .../google/cloud/dataflow/sdk/transforms/Max.java    |  4 ++++
 .../google/cloud/dataflow/sdk/transforms/ParDo.java  |  2 ++
 .../cloud/dataflow/sdk/transforms/RateLimiting.java  |  1 +
 .../google/cloud/dataflow/sdk/transforms/Sum.java    |  1 +
 .../google/cloud/dataflow/sdk/transforms/Top.java    |  4 ++++
 .../google/cloud/dataflow/sdk/transforms/Values.java |  1 +
 .../cloud/dataflow/sdk/transforms/WithKeys.java      |  1 +
 .../dataflow/sdk/transforms/join/CoGbkResult.java    |  1 +
 .../sdk/transforms/join/CoGbkResultSchema.java       |  1 +
 .../dataflow/sdk/transforms/join/CoGroupByKey.java   |  1 +
 .../dataflow/sdk/transforms/join/UnionCoder.java     |  1 +
 .../sdk/transforms/windowing/GlobalWindow.java       |  1 +
 .../sdk/transforms/windowing/IntervalWindow.java     |  2 ++
 .../sdk/transforms/windowing/SlidingWindows.java     |  1 +
 .../dataflow/sdk/transforms/windowing/Window.java    |  2 ++
 .../google/cloud/dataflow/sdk/util/CoderUtils.java   |  1 +
 .../google/cloud/dataflow/sdk/util/DoFnContext.java  |  2 +-
 .../sdk/util/StreamingGroupAlsoByWindowsDoFn.java    |  1 +
 .../cloud/dataflow/sdk/util/TimerOrElement.java      |  1 +
 .../cloud/dataflow/sdk/util/WindowedValue.java       |  8 ++++----
 .../cloud/dataflow/sdk/values/CodedTupleTag.java     |  1 +
 .../com/google/cloud/dataflow/sdk/values/KV.java     |  4 ++++
 .../cloud/dataflow/sdk/coders/AvroCoderTest.java     |  1 +
 .../cloud/dataflow/sdk/coders/CoderRegistryTest.java |  1 +
 .../cloud/dataflow/sdk/coders/CustomCoderTest.java   |  1 +
 .../cloud/dataflow/sdk/coders/DefaultCoderTest.java  |  1 +
 .../dataflow/sdk/coders/SerializableCoderTest.java   |  1 +
 .../sdk/runners/DataflowPipelineRunnerTest.java      |  1 +
 .../sdk/runners/DataflowPipelineTranslatorTest.java  |  1 +
 .../dataflow/sdk/runners/TransformTreeTest.java      |  1 +
 .../sdk/runners/worker/CombineValuesFnTest.java      |  1 +
 .../runners/worker/MapTaskExecutorFactoryTest.java   |  1 +
 .../sdk/runners/worker/NormalParDoFnTest.java        |  1 +
 .../sdk/runners/worker/ParDoFnFactoryTest.java       |  1 +
 .../sdk/transforms/ApproximateQuantilesTest.java     |  1 +
 .../sdk/transforms/ApproximateUniqueTest.java        |  1 +
 .../cloud/dataflow/sdk/transforms/CombineTest.java   |  1 +
 .../cloud/dataflow/sdk/transforms/CreateTest.java    |  1 +
 .../cloud/dataflow/sdk/transforms/FirstTest.java     |  1 +
 .../dataflow/sdk/transforms/GroupByKeyTest.java      |  1 +
 .../cloud/dataflow/sdk/transforms/ParDoTest.java     |  1 +
 .../cloud/dataflow/sdk/transforms/PartitionTest.java |  1 +
 .../dataflow/sdk/transforms/RateLimitingTest.java    |  1 +
 .../cloud/dataflow/sdk/transforms/SampleTest.java    |  1 +
 .../cloud/dataflow/sdk/transforms/TopTest.java       |  1 +
 .../cloud/dataflow/sdk/transforms/ViewTest.java      |  1 +
 .../cloud/dataflow/sdk/transforms/WithKeysTest.java  |  1 +
 .../sdk/transforms/join/CoGroupByKeyTest.java        |  1 +
 .../sdk/transforms/windowing/WindowingTest.java      |  1 +
 .../cloud/dataflow/sdk/util/AggregatorImplTest.java  |  1 +
 .../cloud/dataflow/sdk/util/CoderUtilsTest.java      |  1 +
 .../google/cloud/dataflow/sdk/util/PTupleTest.java   |  1 +
 .../dataflow/sdk/util/SerializableUtilsTest.java     |  1 +
 .../google/cloud/dataflow/sdk/values/PDoneTest.java  |  1 +
 75 files changed, 101 insertions(+), 13 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index ec67fd7aabc3f..df7411f6521b6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -282,10 +282,10 @@ Output applyInternal(Input input,
    * Returns all producing transforms for the {@link PValue}s contained
    * in {@code output}.
    */
-  private List<PTransform> getProducingTransforms(POutput output) {
-    List<PTransform> producingTransforms = new ArrayList<>();
+  private List<PTransform<?, ?>> getProducingTransforms(POutput output) {
+    List<PTransform<?, ?>> producingTransforms = new ArrayList<>();
     for (PValue value : output.expand()) {
-      PTransform transform = value.getProducingTransformInternal();
+      PTransform<?, ?> transform = value.getProducingTransformInternal();
       if (transform != null) {
         producingTransforms.add(transform);
       }
@@ -301,9 +301,9 @@ private List<PTransform> getProducingTransforms(POutput output) {
    */
   private void verifyOutputState(POutput output, TransformTreeNode node) {
     if (!node.isCompositeNode()) {
-      PTransform thisTransform = node.getTransform();
-      List<PTransform> producingTransforms = getProducingTransforms(output);
-      for (PTransform producingTransform : producingTransforms) {
+      PTransform<?, ?> thisTransform = node.getTransform();
+      List<PTransform<?, ?>> producingTransforms = getProducingTransforms(output);
+      for (PTransform<?, ?> producingTransform : producingTransforms) {
         if (thisTransform != producingTransform) {
           throw new IllegalArgumentException("Output of non-composite transform "
               + thisTransform + " is registered as being produced by"
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
index 6af2d6f5ac4ea..0f872f637673d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
@@ -29,6 +29,7 @@
 /**
  * A BigEndianIntegerCoder encodes Integers in 4 bytes, big-endian.
  */
+@SuppressWarnings("serial")
 public class BigEndianIntegerCoder extends AtomicCoder<Integer> {
   @JsonCreator
   public static BigEndianIntegerCoder of() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
index 43ee9cab34be5..f9e8d3ea34ee3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
@@ -29,6 +29,7 @@
 /**
  * A BigEndianLongCoder encodes Longs in 8 bytes, big-endian.
  */
+@SuppressWarnings("serial")
 public class BigEndianLongCoder extends AtomicCoder<Long> {
   @JsonCreator
   public static BigEndianLongCoder of() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
index 6b531ad0dc454..1726ea9ed127d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
@@ -29,6 +29,7 @@
 /**
  * A DoubleCoder encodes Doubles in 8 bytes.
  */
+@SuppressWarnings("serial")
 public class DoubleCoder extends AtomicCoder<Double> {
   @JsonCreator
   public static DoubleCoder of() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
index 988a04c03160c..da9769303b702 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
@@ -27,6 +27,7 @@
 /**
  * An EntityCoder encodes/decodes Datastore Entity objects.
  */
+@SuppressWarnings("serial")
 public class EntityCoder extends AtomicCoder<Entity> {
 
   @JsonCreator
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
index 3190124391703..5918eb220aef6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
@@ -26,6 +26,7 @@
 /**
  * A InstantCoder encodes joda Instant.
  */
+@SuppressWarnings("serial")
 public class InstantCoder extends AtomicCoder<Instant> {
   @JsonCreator
   public static InstantCoder of() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
index 000d6ca75807a..bf6e7c9d78892 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
@@ -39,6 +39,7 @@
  * @param <K> the type of the keys of the KVs being transcoded
  * @param <V> the type of the values of the KVs being transcoded
  */
+@SuppressWarnings("serial")
 public class KvCoder<K, V> extends KvCoderBase<KV<K, V>> {
 
   public static <K, V> KvCoder<K, V> of(Coder<K> keyCoder,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
index 17995c31b65be..94db9e4ed3899 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
@@ -35,6 +35,7 @@
  * A StringUtf8Coder encodes Java Strings in UTF-8 encoding.
  * If in a nested context, prefixes the string with a VarInt length field.
  */
+@SuppressWarnings("serial")
 public class StringUtf8Coder extends AtomicCoder<String> {
   @JsonCreator
   public static StringUtf8Coder of() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
index eff03fb737324..2ce9ffd632e0f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
@@ -31,6 +31,7 @@
  * numbers always take 5 bytes, so BigEndianIntegerCoder may be preferable for
  * ints that are known to often be large or negative.
  */
+@SuppressWarnings("serial")
 public class VarIntCoder extends AtomicCoder<Integer> {
   @JsonCreator
   public static VarIntCoder of() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
index 74f9b6092288f..50866383bca2a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
@@ -31,6 +31,7 @@
  * numbers always take 10 bytes, so BigEndianLongCoder may be preferable for
  * longs that are known to often be large or negative.
  */
+@SuppressWarnings("serial")
 public class VarLongCoder extends AtomicCoder<Long> {
   @JsonCreator
   public static VarLongCoder of() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index b9f0514841590..75e17bed55264 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -181,6 +181,7 @@ public static Bound subscription(String subscription) {
      * A PTransform that reads from a PubSub source and returns
      * a unbounded PCollection containing the items from the stream.
      */
+    @SuppressWarnings("serial")
     public static class Bound
         extends PTransform<PInput, PCollection<String>> {
       /** The Pubsub topic to read from. */
@@ -278,6 +279,7 @@ public static Bound topic(String topic) {
      * A PTransfrom that writes a unbounded {@code PCollection<String>}
      * to a PubSub stream.
      */
+    @SuppressWarnings("serial")
     public static class Bound
         extends PTransform<PCollection<String>, PDone> {
       /** The Pubsub topic to publish to. */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 89a31b07e888a..9ff9280803fa6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -280,6 +280,7 @@ Class<T> getProxyClass() {
 
 
   private static final Logger LOG = LoggerFactory.getLogger(PipelineOptionsFactory.class);
+  @SuppressWarnings("rawtypes")
   private static final Class<?>[] EMPTY_CLASS_ARRAY = new Class[0];
   private static final ObjectMapper MAPPER = new ObjectMapper();
 
@@ -404,6 +405,7 @@ static synchronized <T extends PipelineOptions> Registration<T> validateWellForm
 
     // Validate that the local view of the class is well formed.
     if (!INTERFACE_CACHE.containsKey(iface)) {
+      @SuppressWarnings("rawtypes")
       Class<?> proxyClass = Proxy.getProxyClass(
           PipelineOptionsFactory.class.getClassLoader(), new Class[] {iface});
       try {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index cc1fd8f38cdae..2b0ca03beaab0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -232,6 +232,7 @@ private <K, VI, VA, VO> PCollection<KV<K, VO>> applyTestCombine(
    * question.
    */
   // @VisibleForTesting
+  @SuppressWarnings("serial")
   public static class TestCombineDoFn<K, VI, VA, VO>
       extends DoFn<KV<K, Iterable<VI>>, KV<K, VO>> {
     private final KeyedCombineFn<? super K, ? super VI, VA, VO> fn;
@@ -321,7 +322,7 @@ public <T> T ensureSerializableByCoder(
   @Override
   public EvaluationResults run(Pipeline pipeline) {
     LOG.info("Executing pipeline using the DirectPipelineRunner.");
-    
+
     Evaluator evaluator = new Evaluator();
     evaluator.run(pipeline);
 
@@ -329,7 +330,7 @@ public EvaluationResults run(Pipeline pipeline) {
     for (Counter counter : evaluator.getCounters()) {
       LOG.info("Final aggregator value: {}", counter);
     }
-    
+
     LOG.info("Pipeline execution complete.");
 
     return evaluator;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
index 16230571fae12..62a371d05271f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
@@ -42,6 +42,7 @@
 /**
  * A wrapper around a decoded user value combining function.
  */
+@SuppressWarnings("serial")
 public class CombineValuesFn extends NormalParDoFn {
   /**
    * The optimizer may split run the user combiner in 3 separate
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 6746f9e75672b..f455d5763ac1a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -38,6 +38,7 @@
  * {@code PTransform}s for combining {@code PCollection} elements
  * globally and per-key.
  */
+@SuppressWarnings("serial")
 public class Combine {
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
index 1303b0a98634a..29336d859ced8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
@@ -88,6 +88,7 @@ public static <T> PerElement<T> perElement() {
    *
    * @param <T> the type of the elements of the input {@code PCollection}
    */
+  @SuppressWarnings("serial")
   public static class Globally<T>
       extends PTransform<PCollection<T>, PCollection<Long>> {
 
@@ -136,6 +137,7 @@ public void processElement(ProcessContext c) {
    * @param <T> the type of the elements of the input {@code PCollection}, and
    * the type of the keys of the output {@code PCollection}
    */
+  @SuppressWarnings("serial")
   public static class PerElement<T>
       extends PTransform<PCollection<T>, PCollection<KV<T, Long>>> {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index c9935fc25810b..c18a646e4278a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -52,6 +52,7 @@
  * @param <I> the type of the (main) input elements
  * @param <O> the type of the (main) output elements
  */
+@SuppressWarnings("serial")
 public abstract class DoFn<I, O> implements Serializable {
 
   /** Information accessible to all methods in this {@code DoFn}. */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index c52ae265fd5f2..3ef5e3a369a30 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -105,6 +105,7 @@ public static <T> FlattenIterables<T> iterables() {
    * @param <T> the type of the elements in the input and output
    * {@code PCollection}s.
    */
+  @SuppressWarnings("serial")
   public static class FlattenPCollectionList<T>
       extends PTransform<PCollectionList<T>, PCollection<T>> {
 
@@ -150,6 +151,7 @@ protected Coder<?> getDefaultOutputCoder() {
    * @param <T> the type of the elements of the input {@code Iterable}s and
    * the output {@code PCollection}
    */
+  @SuppressWarnings("serial")
   public static class FlattenIterables<T>
       extends PTransform<PCollection<Iterable<T>>, PCollection<T>> {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 54e60cfcb6209..72e475f31002a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -116,6 +116,7 @@
  * and the elements of the {@code Iterable}s in the output
  * {@code PCollection}
  */
+@SuppressWarnings("serial")
 public class GroupByKey<K, V>
     extends PTransform<PCollection<KV<K, V>>,
                        PCollection<KV<K, Iterable<V>>>> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java
index 08a801b15ec2d..0e78f4ad1e840 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java
@@ -40,6 +40,7 @@
  * @param <K> the type of the keys in the input {@code PCollection},
  * and the type of the elements in the output {@code PCollection}
  */
+@SuppressWarnings("serial")
 public class Keys<K> extends PTransform<PCollection<? extends KV<K, ?>>,
                                         PCollection<K>> {
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
index ee73ae4087f5d..4b12d5db08c28 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
@@ -42,6 +42,7 @@
  * @param <B> the type of the values in the input {@code PCollection}
  * and the keys in the output {@code PCollection}
  */
+@SuppressWarnings("serial")
 public class KvSwap<A, B> extends PTransform<PCollection<KV<A, B>>,
                                              PCollection<KV<B, A>>> {
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
index fce9a328f1c84..fc901664be5a1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
@@ -139,6 +139,7 @@ public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
    *
    * @param <N> the type of the {@code Number}s being compared
    */
+  @SuppressWarnings("serial")
   public static class MaxFn<N extends Number & Comparable<N>>
       implements SerializableFunction<Iterable<N>, N> {
 
@@ -172,6 +173,7 @@ public N apply(Iterable<N> input) {
    * {@code Iterable} of {@code Integer}s, useful as an argument to
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
+  @SuppressWarnings("serial")
   public static class MaxIntegerFn extends MaxFn<Integer> {
     public MaxIntegerFn() { super(Integer.MIN_VALUE); }
   }
@@ -181,6 +183,7 @@ public static class MaxIntegerFn extends MaxFn<Integer> {
    * {@code Iterable} of {@code Long}s, useful as an argument to
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
+  @SuppressWarnings("serial")
   public static class MaxLongFn extends MaxFn<Long> {
     public MaxLongFn() { super(Long.MIN_VALUE); }
   }
@@ -190,6 +193,7 @@ public static class MaxLongFn extends MaxFn<Long> {
    * {@code Iterable} of {@code Double}s, useful as an argument to
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
+  @SuppressWarnings("serial")
   public static class MaxDoubleFn extends MaxFn<Double> {
     public MaxDoubleFn() { super(Double.MIN_VALUE); }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index c7d925b2b418b..fad1235de0b90 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -638,6 +638,7 @@ public <I, O> Bound<I, O> of(DoFn<I, O> fn) {
    * @param <I> the type of the (main) input {@code PCollection} elements
    * @param <O> the type of the (main) output {@code PCollection} elements
    */
+  @SuppressWarnings("serial")
   public static class Bound<I, O>
       extends PTransform<PCollection<? extends I>, PCollection<O>> {
     // Inherits name.
@@ -831,6 +832,7 @@ public <I> BoundMulti<I, O> of(DoFn<I, O> fn) {
    * @param <I> the type of the (main) input {@code PCollection} elements
    * @param <O> the type of the main output {@code PCollection} elements
    */
+  @SuppressWarnings("serial")
   public static class BoundMulti<I, O>
       extends PTransform<PCollection<? extends I>, PCollectionTuple> {
     // Inherits name.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
index 2124acfbb84a5..fd32303613bcb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
@@ -87,6 +87,7 @@ public static <I, O> RateLimitingTransform<I, O> perWorker(DoFn<I, O> doFn) {
    * @param <I> the type of the (main) input elements
    * @param <O> the type of the (main) output elements
    */
+  @SuppressWarnings("serial")
   public static class RateLimitingTransform<I, O>
       extends PTransform<PCollection<? extends I>, PCollection<O>> {
     private final DoFn<I, O> doFn;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
index e925e4a5cc90d..ce62e40b4580a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
@@ -35,6 +35,7 @@
  *     .apply(Sum.<String>integersPerKey());
  * } </pre>
  */
+@SuppressWarnings("serial")
 public class Sum {
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index 2e5e334b8a73d..0f8a3f0565850 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -321,6 +321,7 @@ PTransform<PCollection<T>, PCollection<List<T>>> largest(int count) {
    *
    * @param <T> type of element being compared
    */
+  @SuppressWarnings("serial")
   public static class TopCombineFn<T>
       extends AccumulatingCombineFn<T, TopCombineFn<T>.Heap, List<T>> {
 
@@ -423,6 +424,7 @@ public Coder<Heap> getAccumulatorCoder(
       return new HeapCoder(inputCoder);
     }
 
+    @SuppressWarnings("serial")
     private class HeapCoder extends CustomCoder<Heap> {
       private final Coder<List<T>> listCoder;
 
@@ -467,6 +469,7 @@ public void registerByteSizeObserver(
    * {@code Serializable} {@code Comparator} that that uses the
    * compared elements' natural ordering.
    */
+  @SuppressWarnings("serial")
   public static class Largest<T extends Comparable<T>>
       implements Comparator<T>, Serializable {
     @Override
@@ -479,6 +482,7 @@ public int compare(T a, T b) {
    * {@code Serializable} {@code Comparator} that that uses the
    * reverse of the compared elements' natural ordering.
    */
+  @SuppressWarnings("serial")
   public static class Smallest<T extends Comparable<T>>
       implements Comparator<T>, Serializable {
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java
index ae008b196ad3b..4ca84e45c6501 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java
@@ -40,6 +40,7 @@
  * @param <V> the type of the values in the input {@code PCollection},
  * and the type of the elements in the output {@code PCollection}
  */
+@SuppressWarnings("serial")
 public class Values<V> extends PTransform<PCollection<? extends KV<?, V>>,
                                           PCollection<V>> {
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
index 1754c20a7916b..82ce93cdf3274 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
@@ -48,6 +48,7 @@
  * {@code PCollection} and the values in the output
  * {@code PCollection}
  */
+@SuppressWarnings("serial")
 public class WithKeys<K, V> extends PTransform<PCollection<V>,
                                                PCollection<KV<K, V>>> {
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
index f91d7d2ca669c..cbacb5fec1385 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
@@ -145,6 +145,7 @@ public <V> V getOnly(TupleTag<V> tag, V defaultValue) {
   /**
    * A coder for CoGbkResults.
    */
+  @SuppressWarnings("serial")
   public static class CoGbkResultCoder extends StandardCoder<CoGbkResult> {
 
     private final CoGbkResultSchema schema;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
index 93883b80750c8..6ab1042e3b186 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
@@ -37,6 +37,7 @@
  * between TupleTags and Union Tags (which are used as secondary keys in the
  * CoGroupByKey).
  */
+@SuppressWarnings("serial")
 class CoGbkResultSchema implements Serializable {
 
   private final TupleTagList tupleTagList;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
index d81c9ef707ca7..bc0eed2c78de7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
@@ -68,6 +68,7 @@
  * @param <K> the type of the keys in the input and output
  * {@code PCollection}s
  */
+@SuppressWarnings("serial")
 public class CoGroupByKey<K> extends
     PTransform<KeyedPCollectionTuple<K>,
                PCollection<KV<K, CoGbkResult>>> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
index a6bb4bcb45860..f0987dfaffee5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
@@ -34,6 +34,7 @@
 /**
  * A UnionCoder encodes RawUnionValues.
  */
+@SuppressWarnings("serial")
 class UnionCoder extends StandardCoder<RawUnionValue> {
   // TODO: Think about how to integrate this with a schema object (i.e.
   // a tuple of tuple tags).
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
index bfcb9c7fa1595..5d0773d598a52 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
@@ -29,6 +29,7 @@
 /**
  * Default {@link WindowingFn} where all data is in the same bucket.
  */
+@SuppressWarnings("serial")
 public class GlobalWindow
     extends NonMergingWindowingFn<Object, GlobalWindow.Window> {
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
index 8ac23501c97e5..ab34ad5049a45 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
@@ -178,6 +178,7 @@ public static Coder<IntervalWindow> getFixedSizeCoder(final Duration size) {
     return FixedSizeIntervalWindowCoder.of(size);
   }
 
+  @SuppressWarnings("serial")
   private static class IntervalWindowCoder extends AtomicCoder<IntervalWindow> {
     private static final IntervalWindowCoder INSTANCE =
         new IntervalWindowCoder();
@@ -209,6 +210,7 @@ public IntervalWindow decode(InputStream inStream, Context context)
     public boolean isDeterministic() { return true; }
   }
 
+  @SuppressWarnings("serial")
   private static class FixedSizeIntervalWindowCoder
       extends AtomicCoder<IntervalWindow> {
     private static final Coder<Instant> instantCoder = InstantCoder.of();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
index d98dab908240c..45b90dd33d1e4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
@@ -37,6 +37,7 @@
  *   Window.<Integer>by(SlidingWindows.of(Duration.standardMinutes(10))));
  * } </pre>
  */
+@SuppressWarnings("serial")
 public class SlidingWindows extends NonMergingWindowingFn<Object, IntervalWindow> {
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index c450e0fa7980f..424fdd7c02120 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -167,6 +167,7 @@ public <T> Bound<T> into(WindowingFn<? super T, ?> fn) {
    *
    * @param <T> The type of elements this {@code Window} is applied to
    */
+  @SuppressWarnings("serial")
   public static class Bound<T> extends PTransform<PCollection<T>, PCollection<T>> {
     WindowingFn<? super T, ?> fn;
 
@@ -221,6 +222,7 @@ public static <T> Remerge<T> remerge() {
    *  windows to be merged again as part of the next
    * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}.
    */
+  @SuppressWarnings("serial")
   public static class Remerge<T> extends PTransform<PCollection<T>, PCollection<T>> {
     @Override
     public PCollection<T> apply(PCollection<T> input) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
index c77f35a45da2a..5dbfc2d4ef6ed 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
@@ -112,6 +112,7 @@ public static CloudObject makeCloudEncoding(
    * A {@link com.fasterxml.jackson.databind.module.Module} which adds the type
    * resolver needed for Coder definitions created by the Dataflow service.
    */
+  @SuppressWarnings("serial")
   static final class Jackson2Module extends SimpleModule {
     /**
      * The Coder custom type resolver.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
index 80d8f34edd04d..08d53a7d464b6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
@@ -52,7 +52,7 @@ class DoFnContext<I, O, R> extends DoFn<I, O>.Context {
   final DoFn<I, O> fn;
   final PTuple sideInputs;
   final OutputManager<R> outputManager;
-  final Map<TupleTag, R> outputMap;
+  final Map<TupleTag<?>, R> outputMap;
   final TupleTag<O> mainOutputTag;
   final StepContext stepContext;
   final CounterSet.AddCounterMutator addCounterMutator;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index dcfd58aee92d2..61911a9decd4c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -33,6 +33,7 @@
  * @param <VO> output value element type
  * @param <W> window type
  */
+@SuppressWarnings("serial")
 public class StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
     extends DoFn<TimerOrElement<KV<K, VI>>, KV<K, VO>> implements DoFn.RequiresKeyedState {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
index 4859f8ae5f39b..d0216d3075782 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
@@ -109,6 +109,7 @@ public E element() {
    * {@code TimerOrElement} objects never need to be encoded, so this class does not
    * support the {@code encode} and {@code decode} methods.
    */
+  @SuppressWarnings("serial")
   public static class TimerOrElementCoder<T> extends StandardCoder<TimerOrElement<T>> {
     final Coder<T> elemCoder;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index 636b77d8b1109..d47e810fa9909 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -37,9 +37,9 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Objects;
@@ -79,9 +79,9 @@ public static <V> WindowedValue<V> valueInGlobalWindow(V value) {
    * Returns a {@code WindowedValue} with the given value and default timestamp and empty windows.
    */
   public static <V> WindowedValue<V> valueInEmptyWindows(V value) {
-    return new WindowedValue(value,
-                             new Instant(Long.MIN_VALUE),
-                             new ArrayList<>());
+    return new WindowedValue<V>(value,
+                                new Instant(Long.MIN_VALUE),
+                                Collections.<BoundedWindow>emptyList());
   }
 
   private WindowedValue(V value,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
index 3caed1a8bcce9..770d460249339 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
@@ -27,6 +27,7 @@
  *
  * @param <T> the type of the values associated with this tag
  */
+@SuppressWarnings("serial")
 public class CodedTupleTag<T> extends TupleTag<T> {
   /**
    * Returns a {@code CodedTupleTag} with the given id which uses the
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
index d354707ebb0c0..febb1302bd8c1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
@@ -32,6 +32,8 @@
  * @param <V> the type of the value
  */
 public class KV<K, V> implements Serializable {
+  private static final long serialVersionUID = 0;
+
   /** Returns a KV with the given key and value. */
   public static <K, V> KV<K, V> of(K key, V value) {
     return new KV<>(key, value);
@@ -74,6 +76,7 @@ public boolean equals(Object o) {
   }
 
   /** Orders the KV by the key. A null key is less than any non-null key. */
+  @SuppressWarnings("serial")
   public static class OrderByKey<K extends Comparable<? super K>, V> implements
       Comparator<KV<K, V>>, Serializable {
     @Override
@@ -89,6 +92,7 @@ public int compare(KV<K, V> a, KV<K, V> b) {
   }
 
   /** Orders the KV by the value. A null value is less than any non-null value. */
+  @SuppressWarnings("serial")
   public static class OrderByValue<K, V extends Comparable<? super V>>
       implements Comparator<KV<K, V>>, Serializable {
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
index 725c0e852022d..fffbb5839e7ca 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
@@ -43,6 +43,7 @@
  * Tests for AvroCoder.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class AvroCoderTest {
 
   @DefaultCoder(AvroCoder.class)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index ace3094827335..a05816c776886 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -43,6 +43,7 @@
  * Tests for CoderRegistry.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class CoderRegistryTest {
 
   public static CoderRegistry getStandardRegistry() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java
index e532d44dc66b5..c71a50843fd02 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java
@@ -33,6 +33,7 @@
 
 /** Unit tests for {@link CustomCoder}. */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class CustomCoderTest {
 
   private static class MyCustomCoder extends CustomCoder<KV<String, Long>> {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
index 769d1e6fb144f..dc0d20c4f8a53 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
@@ -31,6 +31,7 @@
  * Tests of Coder defaults.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class DefaultCoderTest {
 
   @DefaultCoder(AvroCoder.class)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java
index 3e56832a3faad..d8a7e03092518 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java
@@ -46,6 +46,7 @@
  * Tests SerializableCoder.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class SerializableCoderTest implements Serializable {
 
   @DefaultCoder(SerializableCoder.class)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 7995445c9869b..3d6f804ed3a21 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -74,6 +74,7 @@
  * Tests for DataflowPipelineRunner.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class DataflowPipelineRunnerTest {
 
   @Rule public ExpectedException thrown = ExpectedException.none();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 228dbfd73acf7..12649de8acfca 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -76,6 +76,7 @@
  * Tests for DataflowPipelineTranslator.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class DataflowPipelineTranslatorTest {
 
   @Rule public ExpectedException thrown = ExpectedException.none();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
index d0308e87a33f5..cceefe90f622e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
@@ -47,6 +47,7 @@
  * Tests for {@link TransformTreeNode} and {@link TransformHierarchy}.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class TransformTreeTest {
 
   enum TransformsSeen {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
index b616f6d75f372..160b5456d6193 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
@@ -57,6 +57,7 @@
  * Tests for CombineValuesFn.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class CombineValuesFnTest {
   /** Example AccumulatingCombineFn. */
   public static class MeanInts extends
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index fae22797ef89c..2060865759331 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -76,6 +76,7 @@
  * Tests for MapTaskExecutorFactory.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class MapTaskExecutorFactoryTest {
   @Test
   public void testCreateMapTaskExecutor() throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
index f94ab8339f9d1..2c6811c27b5ea 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
@@ -51,6 +51,7 @@
  * Tests for NormalParDoFn.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class NormalParDoFnTest {
   static class TestDoFn extends DoFn<Integer, String> {
     enum State { UNSTARTED, STARTED, PROCESSING, FINISHED }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
index 05a3864d9bd4d..63d38ebdd4109 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
@@ -43,6 +43,7 @@
  * Tests for ParDoFnFactory.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class ParDoFnFactoryTest {
   static class TestDoFn extends DoFn<Integer, String> {
     final String stringState;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
index b0493491634c2..406fb0730d023 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
@@ -48,6 +48,7 @@
  * Tests for ApproximateQuantiles
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class ApproximateQuantilesTest {
 
   static final List<KV<String, Integer>> TABLE = Arrays.asList(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
index 2b2ff0ac9c967..ce357ca7e2b76 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
@@ -52,6 +52,7 @@
  * Tests for the ApproximateUnique aggregator transform.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class ApproximateUniqueTest {
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 52b0b230a19d1..646977441e03b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -68,6 +68,7 @@
  * Tests for Combine transforms.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class CombineTest {
 
   @SuppressWarnings("unchecked")
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
index 8202da086240c..2d2615c8a5eaf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
@@ -47,6 +47,7 @@
  * Tests for Create.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class CreateTest {
   @Rule public final ExpectedException thrown = ExpectedException.none();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java
index bcd14d0c6e894..1a7301379cb03 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java
@@ -42,6 +42,7 @@
  * Tests for First.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class FirstTest
     implements Serializable /* to allow anon inner classes */ {
   // PRE: lines contains no duplicates.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
index ebb141f38b7b6..f20e740c84c3f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
@@ -53,6 +53,7 @@
  * Tests for GroupByKey.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class GroupByKeyTest {
 
   @Rule
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 7e46bb31a7858..fdd557d08a7d4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -69,6 +69,7 @@
  * Tests for ParDo.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class ParDoTest implements Serializable {
   // This test is Serializable, just so that it's easy to have
   // anonymous inner classes inside the non-static test methods.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
index 0d19f082ee07e..c129b8e6b912e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
@@ -42,6 +42,7 @@
  * Tests for Partition
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class PartitionTest implements Serializable {
   static class ModFn implements PartitionFn<Integer> {
     public int partitionFor(Integer elem, int numPartitions) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java
index d6de05af6d3f4..5a1c2a8f1340c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java
@@ -40,6 +40,7 @@
  * Tests for RateLimiter.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class RateLimitingTest {
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
index 7c51d096fe4e1..95f88489dbd54 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
@@ -39,6 +39,7 @@
  * Tests for Sample transform.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class SampleTest {
   static final Integer[] EMPTY = new Integer[] { };
   static final Integer[] DATA = new Integer[] {1, 2, 3, 4, 5};
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
index 63625a7f5f2b1..561e17b871e7e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
@@ -44,6 +44,7 @@
 
 /** Tests for Top */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class TopTest {
 
   @Rule
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index 3a7c8187d9237..7b0313962fede 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -42,6 +42,7 @@
  * observed via {@link ParDo}.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class ViewTest implements Serializable {
   // This test is Serializable, just so that it's easy to have
   // anonymous inner classes inside the non-static test methods.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
index 3e4e359022c96..f0e608b2257d7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
@@ -34,6 +34,7 @@
  * Tests for ExtractKeys transform.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class WithKeysTest {
   static final String[] COLLECTION = new String[] {
     "a",
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
index 016ba15d5ae44..6369d06aee522 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
@@ -51,6 +51,7 @@
  * Tests for CoGroupByKeyTest.  Implements Serializable for anonymous DoFns.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class CoGroupByKeyTest implements Serializable {
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
index 85c0bf6b8b6d2..e9041c860cee7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
@@ -50,6 +50,7 @@
 
 /** Unit tests for bucketing. */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class WindowingTest implements Serializable {
   @Rule
   public TemporaryFolder tmpFolder = new TemporaryFolder();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java
index 45cc267d2d740..8f8dfbb9bafca 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java
@@ -46,6 +46,7 @@
  * Unit tests for the {@link Aggregator} API.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class AggregatorImplTest {
   @Rule
   public final ExpectedException expectedEx = ExpectedException.none();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
index 92f9e7481558f..1aec837d82ea4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
@@ -40,6 +40,7 @@
  * Tests for CoderUtils.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class CoderUtilsTest {
   static class TestCoder extends AtomicCoder<Integer> {
     public static TestCoder of() { return new TestCoder(); }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PTupleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PTupleTest.java
index 3692411a4a751..f09d59ce671b0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PTupleTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PTupleTest.java
@@ -32,6 +32,7 @@
 public final class PTupleTest {
   @Test
   public void accessingNullVoidValuesShouldNotCauseExceptions() {
+    @SuppressWarnings("serial")
     TupleTag<Void> tag = new TupleTag<Void>() {};
     PTuple tuple = PTuple.of(tag, null);
     assertTrue(tuple.has(tag));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
index 90f10cdc97134..daddde564aa09 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
@@ -29,6 +29,7 @@
  * Tests for SerializableUtils.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class SerializableUtilsTest {
   static class TestClass implements Serializable {
     final String stringValue;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java
index e886f350c12d7..f41fef6d840bf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java
@@ -36,6 +36,7 @@
  * Tests for PDone.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class PDoneTest {
   @Rule
   public TemporaryFolder tmpFolder = new TemporaryFolder();

From a77a8ab51a4738fdf7a8345dc526a72ee2fdc13e Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 15 Dec 2014 09:12:01 -0800
Subject: [PATCH 0012/1541] Remove type parameter from CalendarWindows

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82148657
---
 .../transforms/windowing/CalendarWindows.java | 46 +++++++++----------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
index bb0de796f86ab..70590665be5ac 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
@@ -40,7 +40,7 @@ public class CalendarWindows {
    * <p> For example, {@code CalendarWindows.days(1)} will window elements into
    * separate windows for each day.
    */
-  public static <T> DaysWindows<T> days(int number) {
+  public static DaysWindows days(int number) {
     return new DaysWindows(number, new DateTime(0, DateTimeZone.UTC), DateTimeZone.UTC);
   }
 
@@ -50,7 +50,7 @@ public static <T> DaysWindows<T> days(int number) {
    * <p> For example, {@code CalendarWindows.weeks(1, DateTimeConstants.TUESDAY)} will
    * window elements into week-long windows starting on Tuesdays.
    */
-  public static <T> DaysWindows<T> weeks(int number, int startDayOfWeek) {
+  public static DaysWindows weeks(int number, int startDayOfWeek) {
     return new DaysWindows(
         7 * number,
         new DateTime(0, DateTimeZone.UTC).withDayOfWeek(startDayOfWeek),
@@ -65,7 +65,7 @@ public static <T> DaysWindows<T> weeks(int number, int startDayOfWeek) {
    * will window elements into 8 month windows where that start on the 10th day of month,
    * and the first window begins in January 2014.
    */
-  public static <T> MonthsWindows<T> months(int number) {
+  public static MonthsWindows months(int number) {
     return new MonthsWindows(number, 1, new DateTime(0, DateTimeZone.UTC), DateTimeZone.UTC);
   }
 
@@ -77,7 +77,7 @@ public static <T> MonthsWindows<T> months(int number) {
    * will window elements into year-long windows that start at midnight on Jan 1, in the
    * America/Los_Angeles time zone.
    */
-  public static <T> YearsWindows<T> years(int number) {
+  public static YearsWindows years(int number) {
     return new YearsWindows(number, 1, 1, new DateTime(0, DateTimeZone.UTC), DateTimeZone.UTC);
   }
 
@@ -90,15 +90,15 @@ public static <T> YearsWindows<T> years(int number) {
    * <p> The time zone used to determine calendar boundaries is UTC, unless this
    * is overridden with the {@link #withTimeZone} method.
    */
-  public static class DaysWindows<T> extends PartitioningWindowingFn<T, IntervalWindow> {
+  public static class DaysWindows extends PartitioningWindowingFn<Object, IntervalWindow> {
 
-    public DaysWindows<T> withStartingDay(int year, int month, int day) {
-      return new DaysWindows<T>(
+    public DaysWindows withStartingDay(int year, int month, int day) {
+      return new DaysWindows(
           number, new DateTime(year, month, day, 0, 0, timeZone), timeZone);
     }
 
-    public DaysWindows<T> withTimeZone(DateTimeZone timeZone) {
-      return new DaysWindows<T>(
+    public DaysWindows withTimeZone(DateTimeZone timeZone) {
+      return new DaysWindows(
           number, startDate.withZoneRetainFields(timeZone), timeZone);
     }
 
@@ -155,20 +155,20 @@ public boolean isCompatible(WindowingFn other) {
    * <p> The time zone used to determine calendar boundaries is UTC, unless this
    * is overridden with the {@link #withTimeZone} method.
    */
-  public static class MonthsWindows<T> extends PartitioningWindowingFn<T, IntervalWindow> {
+  public static class MonthsWindows extends PartitioningWindowingFn<Object, IntervalWindow> {
 
-    public MonthsWindows<T> beginningOnDay(int dayOfMonth) {
-      return new MonthsWindows<T>(
+    public MonthsWindows beginningOnDay(int dayOfMonth) {
+      return new MonthsWindows(
           number, dayOfMonth, startDate, timeZone);
     }
 
-    public MonthsWindows<T> withStartingMonth(int year, int month) {
-      return new MonthsWindows<T>(
+    public MonthsWindows withStartingMonth(int year, int month) {
+      return new MonthsWindows(
           number, dayOfMonth, new DateTime(year, month, 1, 0, 0, timeZone), timeZone);
     }
 
-    public MonthsWindows<T> withTimeZone(DateTimeZone timeZone) {
-      return new MonthsWindows<T>(
+    public MonthsWindows withTimeZone(DateTimeZone timeZone) {
+      return new MonthsWindows(
           number, dayOfMonth, startDate.withZoneRetainFields(timeZone), timeZone);
     }
 
@@ -230,20 +230,20 @@ public boolean isCompatible(WindowingFn other) {
    * <p> The time zone used to determine calendar boundaries is UTC, unless this
    * is overridden with the {@link #withTimeZone} method.
    */
-  public static class YearsWindows<T> extends PartitioningWindowingFn<T, IntervalWindow> {
+  public static class YearsWindows extends PartitioningWindowingFn<Object, IntervalWindow> {
 
-    public YearsWindows<T> beginningOnDay(int monthOfYear, int dayOfMonth) {
-      return new YearsWindows<T>(
+    public YearsWindows beginningOnDay(int monthOfYear, int dayOfMonth) {
+      return new YearsWindows(
           number, monthOfYear, dayOfMonth, startDate, timeZone);
     }
 
-    public YearsWindows<T> withStartingYear(int year) {
-      return new YearsWindows<T>(
+    public YearsWindows withStartingYear(int year) {
+      return new YearsWindows(
           number, monthOfYear, dayOfMonth, new DateTime(year, 1, 1, 0, 0, timeZone), timeZone);
     }
 
-    public YearsWindows<T> withTimeZone(DateTimeZone timeZone) {
-      return new YearsWindows<T>(
+    public YearsWindows withTimeZone(DateTimeZone timeZone) {
+      return new YearsWindows(
           number, monthOfYear, dayOfMonth, startDate.withZoneRetainFields(timeZone), timeZone);
     }
 

From 1aca9d28f08fa4a65709a22048361df68133d446 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Mon, 15 Dec 2014 11:14:56 -0800
Subject: [PATCH 0013/1541] Dataflow launch: cleanup a subset of checkstyle
 warnings in the SDK.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82158870
---
 checkstyle.xml                                |  5 ++--
 .../dataflow/examples/BigQueryTornadoes.java  | 10 +++----
 .../google/cloud/dataflow/examples/TfIdf.java |  4 +--
 .../examples/TopWikipediaSessions.java        |  4 +--
 .../cloud/dataflow/examples/WordCount.java    |  6 ++---
 .../dataflow/sdk/coders/package-info.java     | 26 +++++++++----------
 .../cloud/dataflow/sdk/io/PubsubIO.java       |  4 +--
 .../cloud/dataflow/sdk/io/package-info.java   |  2 +-
 .../DataflowPipelineWorkerPoolOptions.java    |  8 +++---
 .../dataflow/sdk/options/GcpOptions.java      |  2 +-
 .../dataflow/sdk/options/GcsOptions.java      |  2 +-
 .../sdk/options/PipelineOptionsFactory.java   |  2 +-
 .../sdk/options/PipelineOptionsValidator.java |  2 +-
 .../sdk/options/ProxyInvocationHandler.java   |  2 +-
 .../sdk/options/StreamingOptions.java         |  6 ++---
 .../dataflow/sdk/options/package-info.java    |  6 ++---
 .../cloud/dataflow/sdk/package-info.java      | 18 ++++++-------
 .../sdk/runners/DataflowPipelineRunner.java   |  4 +--
 .../sdk/runners/dataflow/package-info.java    |  2 +-
 .../dataflow/sdk/runners/package-info.java    | 10 +++----
 .../runners/worker/DataflowWorkerHarness.java |  2 +-
 .../sdk/runners/worker/FileBasedSource.java   |  2 +-
 .../DataflowWorkerLoggingFormatter.java       |  2 +-
 .../DataflowWorkerLoggingInitializer.java     |  4 +--
 .../sdk/runners/worker/package-info.java      |  2 +-
 .../dataflow/sdk/testing/TestPipeline.java    |  2 +-
 .../dataflow/sdk/testing/package-info.java    |  4 +--
 .../cloud/dataflow/sdk/transforms/Count.java  |  2 +-
 .../sdk/transforms/join/package-info.java     |  2 +-
 .../dataflow/sdk/transforms/package-info.java | 20 +++++++-------
 .../sdk/util/AppEngineEnvironment.java        |  2 +-
 .../cloud/dataflow/sdk/util/Credentials.java  |  2 +-
 .../cloud/dataflow/sdk/util/GcsUtil.java      |  2 +-
 .../cloud/dataflow/sdk/util/PackageUtil.java  |  2 +-
 .../dataflow/sdk/values/package-info.java     | 20 +++++++-------
 .../options/DataflowPipelineOptionsTest.java  |  2 +-
 .../options/PipelineOptionsValidatorTest.java |  2 +-
 .../options/ProxyInvocationHandlerTest.java   | 20 +++++++-------
 .../DataflowWorkerLoggingInitializerTest.java |  2 +-
 .../AttemptBoundedExponentialBackOffTest.java |  2 +-
 .../cloud/dataflow/sdk/util/GcsUtilTest.java  |  2 +-
 .../util/common/worker/ReadOperationTest.java |  2 +-
 42 files changed, 113 insertions(+), 114 deletions(-)

diff --git a/checkstyle.xml b/checkstyle.xml
index 08df965ae6bb1..702558ac34acb 100644
--- a/checkstyle.xml
+++ b/checkstyle.xml
@@ -44,8 +44,7 @@ page at http://checkstyle.sourceforge.net/config.html -->
   </module>
 
   <module name="FileTabCharacter">
-    <!-- Checks that there are no tab characters in the file.
-    -->
+    <!-- Checks that there are no tab characters in the file. -->
   </module>
 
   <module name="NewlineAtEndOfFile"/>
@@ -60,7 +59,7 @@ page at http://checkstyle.sourceforge.net/config.html -->
   <module name="RegexpSingleline">
     <property name="format" value="[ \t]+$"/>
     <property name="message" value="Trailing whitespace"/>
-    <property name="severity" value="warning"/>
+    <property name="severity" value="error"/>
   </module>
 
   <module name="RegexpSingleline">
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
index a5f9a19705218..b6974e74eb829 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
@@ -39,19 +39,19 @@
 /**
  * An example that reads the public samples of weather data from BigQuery, counts the number of
  * tornadoes that occur in each month, and writes the results to BigQuery.
- * 
+ *
  * <p> Concepts: Reading/writing BigQuery; counting a PCollection; user-defined PTransforms
- * 
- * <p> Note: Before running this example, you must create a BigQuery dataset to contain your output 
+ *
+ * <p> Note: Before running this example, you must create a BigQuery dataset to contain your output
  * table.
- * 
+ *
  * <p> To execute this pipeline locally, specify general pipeline configuration:
  *   --project=<PROJECT ID>
  * and the BigQuery table for the output:
  *   --output=<project_id>:<dataset_id>.<table_id>
  *
  * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
- *   --project=<PROJECT ID> 
+ *   --project=<PROJECT ID>
  *   --stagingLocation=gs://<STAGING DIRECTORY>
  *   --runner=BlockingDataflowPipelineRunner
  * and the BigQuery table for the output:
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
index 15948782e2b60..3f22e27eab264 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
@@ -60,7 +60,7 @@
 
 /**
  * An example that computes a basic TF-IDF search table for a directory or GCS prefix.
- * 
+ *
  * <p> Concepts: joining data; side inputs
  *
  * <p> To execute this pipeline locally, specify general pipeline configuration:
@@ -69,7 +69,7 @@
  *   --output=[<LOCAL FILE> | gs://<OUTPUT PREFIX>]
  *
  * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
- *   --project=<PROJECT ID> 
+ *   --project=<PROJECT ID>
  *   --stagingLocation=gs://<STAGING DIRECTORY>
  *   --runner=BlockingDataflowPipelineRunner
  * and an output prefix on GCS:
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
index 3d945b6394acb..a64e2fc98f49a 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
@@ -49,7 +49,7 @@
  * the longest string of edits separated by no more than an hour within each month.
  *
  * <p> Concepts: Using Windowing to perform time-based aggregations of data.
- *   
+ *
  * <p> It is not recommended to execute this pipeline locally, given the size of the default input
  * data.
  *
@@ -62,7 +62,7 @@
  *
  * <p> The default input is gs://dataflow-samples/wikipedia_edits/*.json and can be overridden with
  * --input.
- * 
+ *
  * <p> The input for this example is large enough that it's a good place to enable (experimental)
  * autoscaling:
  *   --autoscalingAlgorithm=BASIC
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
index 4fe304925c0b6..948b83e032fab 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
@@ -38,7 +38,7 @@
  * An example that counts words in Shakespeare. For a detailed walkthrough of this
  * example see:
  *   https://developers.google.com/cloud-dataflow/java-sdk/wordcount-example
- *   
+ *
  * <p> Concepts: Reading/writing text files; counting a PCollection; user-defined PTransforms
  *
  * <p> To execute this pipeline locally, specify general pipeline configuration:
@@ -69,12 +69,12 @@ public void startBundle(Context c) {
 
     @Override
     public void processElement(ProcessContext c) {
-      // Keep track of the number of empty lines. (When using the [Blocking]DataflowPipelineRunner, 
+      // Keep track of the number of empty lines. (When using the [Blocking]DataflowPipelineRunner,
       // Aggregators are shown in the monitoring UI.)
       if (c.element().trim().isEmpty()) {
         emptyLines.addValue(1L);
       }
-      
+
       // Split the line into words.
       String[] words = c.element().split("[^a-zA-Z']+");
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java
index ea305e776bc9a..3366ff2267010 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java
@@ -14,31 +14,31 @@
  * the License.
  */
 
-/** 
- * Defines {@link com.google.cloud.dataflow.sdk.coders.Coder}s 
+/**
+ * Defines {@link com.google.cloud.dataflow.sdk.coders.Coder}s
  * to specify how data is encoded to and decoded from byte strings.
- * 
+ *
  * <p> During execution of a Pipeline, elements in a
  * {@link com.google.cloud.dataflow.sdk.values.PCollection}
  * may need to be encoded into byte strings.
  * This happens both at the beginning and end of a pipeline when data is read from and written to
  * persistent storage and also during execution of a pipeline when elements are communicated between
  * machines.
- * 
- * <p> Exactly when PCollection elements are encoded during execution depends on which 
+ *
+ * <p> Exactly when PCollection elements are encoded during execution depends on which
  * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner} is being used and how that runner
  * chooses to execute the pipeline. As such, Dataflow requires that all PCollections have an
- * appropriate Coder in case it becomes necessary. In many cases, the Coder can be inferred from 
+ * appropriate Coder in case it becomes necessary. In many cases, the Coder can be inferred from
  * the available Java type
  * information and the Pipeline's {@link com.google.cloud.dataflow.sdk.coders.CoderRegistry}. It
- * can be specified per PCollection via 
+ * can be specified per PCollection via
  * {@link com.google.cloud.dataflow.sdk.values.PCollection#setCoder(Coder)} or per type using the
  * {@link com.google.cloud.dataflow.sdk.coders.DefaultCoder} annotation.
- * 
- * <p> This package provides a number of coders for common types like {@code Integer}, 
- * {@code String}, and {@code List}, as well as coders like 
- * {@link com.google.cloud.dataflow.sdk.coders.AvroCoder} that can be used to encode many custom 
- * types. 
- * 
+ *
+ * <p> This package provides a number of coders for common types like {@code Integer},
+ * {@code String}, and {@code List}, as well as coders like
+ * {@link com.google.cloud.dataflow.sdk.coders.AvroCoder} that can be used to encode many custom
+ * types.
+ *
  */
 package com.google.cloud.dataflow.sdk.coders;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 75e17bed55264..5d87f2563fa52 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -35,9 +35,9 @@
  * <p> <b>Important:</b> PubsubIO is experimental. It is not supported by the
  * {@link com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner} and is only supported in the
  * {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner} for users whitelisted in a
- * streaming early access program and who enable 
+ * streaming early access program and who enable
  * {@link com.google.cloud.dataflow.sdk.options.StreamingOptions#setStreaming(boolean)}.
- * 
+ *
  * <p> You should expect this class to change significantly in future versions of the SDK
  * or be removed entirely.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/package-info.java
index 886255e271d23..b472b3f7c6200 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/package-info.java
@@ -19,7 +19,7 @@
  * {@link com.google.cloud.dataflow.sdk.io.AvroIO},
  * {@link com.google.cloud.dataflow.sdk.io.BigQueryIO}, and
  * {@link com.google.cloud.dataflow.sdk.io.TextIO}.
- * 
+ *
  * <p>The classes in this package provide {@code Read} transforms which create PCollections
  * from existing storage:
  * <pre>{@code
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index b388da4313ee7..0193ddaac1077 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -37,14 +37,14 @@ public interface DataflowPipelineWorkerPoolOptions {
   @Default.Integer(3)
   int getNumWorkers();
   void setNumWorkers(int value);
-  
+
   /**
    * Type of autoscaling algorithm to use. These types are experimental and subject to change.
    */
   public enum AutoscalingAlgorithmType {
     /** Use numWorkers machines. Do not autoscale the worker pool. */
     NONE("AUTOSCALING_ALGORITHM_NONE"),
-    
+
     /** Autoscale the workerpool size up to maxNumWorkers until the job completes. */
     BASIC("AUTOSCALING_ALGORITHM_BASIC");
 
@@ -73,9 +73,9 @@ public AutoscalingAlgorithmType create(PipelineOptions options) {
       return AutoscalingAlgorithmType.NONE;
     }
   }
-  
+
   /**
-   * Max number of workers to use when using workerpool autoscaling. 
+   * Max number of workers to use when using workerpool autoscaling.
    * This option is experimental and subject to change.
    */
   @Description("Max number of workers to use, when using autoscaling")
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
index 7dbaa5fb32d9f..34faf03acfb66 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
@@ -51,7 +51,7 @@
  *     with the service account name.
  *   </li>
  * </ol>
- * The default mechanism is to use the 
+ * The default mechanism is to use the
  * <a href="https://developers.google.com/accounts/docs/application-default-credentials">
  * application default credentials</a> falling back to gcloud. The other options can be
  * used by setting the corresponding properties.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
index 543c9cac6c406..20685b7cf383f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
@@ -62,7 +62,7 @@ public ExecutorService create(PipelineOptions options) {
       }
       /* The SDK requires an unbounded thread pool because a step may create X writers
        * each requiring their own thread to perform the writes otherwise a writer may
-       * block causing deadlock for the step because the writers buffer is full. 
+       * block causing deadlock for the step because the writers buffer is full.
        * Also, the MapTaskExecutor launches the steps in reverse order and completes
        * them in forward order thus requiring enough threads so that each step's writers
        * can be active.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 9ff9280803fa6..a46ce313e3383 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -385,7 +385,7 @@ static synchronized <T extends PipelineOptions> Registration<T> validateWellForm
       Class<T> iface, Set<Class<? extends PipelineOptions>> validatedPipelineOptionsInterfaces) {
     Preconditions.checkArgument(iface.isInterface(), "Only interface types are supported.");
 
-    Set<Class<? extends PipelineOptions>> combinedPipelineOptionsInterfaces = 
+    Set<Class<? extends PipelineOptions>> combinedPipelineOptionsInterfaces =
         FluentIterable.from(validatedPipelineOptionsInterfaces).append(iface).toSet();
     // Validate that the view of all currently passed in options classes is well formed.
     if (!COMBINED_CACHE.containsKey(combinedPipelineOptionsInterfaces)) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
index 046cfd7fda05e..9878056af64db 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
@@ -53,7 +53,7 @@ public static <T extends PipelineOptions> T validate(Class<T> klass, PipelineOpt
     }
     return options.as(klass);
   }
-  
+
   private static String getDescription(Method method) {
     Description description = method.getAnnotation(Description.class);
     return description == null ? "" : description.value();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
index aefbe1dec294e..88e84f6142313 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
@@ -138,7 +138,7 @@ synchronized <T extends PipelineOptions> T as(Class<T> iface) {
     Preconditions.checkNotNull(iface);
     Preconditions.checkArgument(iface.isInterface());
     if (!interfaceToProxyCache.containsKey(iface)) {
-      Registration<T> registration = 
+      Registration<T> registration =
           PipelineOptionsFactory.validateWellFormed(iface, knownInterfaces);
       List<PropertyDescriptor> propertyDescriptors = registration.getPropertyDescriptors();
       Class<T> proxyClass = registration.getProxyClass();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java
index 725d845d5b9f2..6deca04b4888f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java
@@ -18,11 +18,11 @@
 
 /**
  * [Whitelisting Required] Options used to configure the streaming backend.
- * 
+ *
  * <p> <b>Important:</b> Streaming support is experimental. It is only supported in the
  * {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner} for users whitelisted in a
- * streaming early access program. 
- * 
+ * streaming early access program.
+ *
  * <p> You should expect this class to change significantly in future
  * versions of the SDK or be removed entirely.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/package-info.java
index 557e377676b71..0541b942a0c21 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/package-info.java
@@ -14,10 +14,10 @@
  * the License.
  */
 
-/** 
- * Defines {@link com.google.cloud.dataflow.sdk.options.PipelineOptions} for 
+/**
+ * Defines {@link com.google.cloud.dataflow.sdk.options.PipelineOptions} for
  * configuring pipeline execution.
- * 
+ *
  * <p> {@link com.google.cloud.dataflow.sdk.options.PipelineOptions} encapsulates the various
  * parameters that describe how a pipeline should be run. {@code PipelineOptions} are created
  * using a {@link com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java
index e27ac01476606..ad5f04ca68ca9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java
@@ -14,19 +14,19 @@
  * the License.
  */
 
-/** 
- * Provides a simple, powerful model for building both batch and 
+/**
+ * Provides a simple, powerful model for building both batch and
  * streaming parallel data processing
- * {@link com.google.cloud.dataflow.sdk.Pipeline}s. 
- * 
- * <p> To use the Google Cloud Dataflow SDK, you build a 
- * {@link com.google.cloud.dataflow.sdk.Pipeline} which manages a graph of 
- * {@link com.google.cloud.dataflow.sdk.transforms.PTransform}s 
- * and the {@link com.google.cloud.dataflow.sdk.values.PCollection}s that 
+ * {@link com.google.cloud.dataflow.sdk.Pipeline}s.
+ *
+ * <p> To use the Google Cloud Dataflow SDK, you build a
+ * {@link com.google.cloud.dataflow.sdk.Pipeline} which manages a graph of
+ * {@link com.google.cloud.dataflow.sdk.transforms.PTransform}s
+ * and the {@link com.google.cloud.dataflow.sdk.values.PCollection}s that
  * the PTransforms consume and produce.
  *
  * <p> Each Pipeline has a
- * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner} to specify 
+ * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner} to specify
  * where and how it should run after pipeline construction is complete.
  *
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index c13ef51c7c18a..1a0d873307375 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -96,7 +96,7 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
    * @return The newly created runner.
    */
   public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
-    DataflowPipelineOptions dataflowOptions = 
+    DataflowPipelineOptions dataflowOptions =
         PipelineOptionsValidator.validate(DataflowPipelineOptions.class, options);
     ArrayList<String> missing = new ArrayList<>();
 
@@ -230,7 +230,7 @@ public DataflowPipelineJob run(Pipeline pipeline) {
           .execute();
     } catch (GoogleJsonResponseException e) {
       throw new RuntimeException(
-          "Failed to create a workflow job: " 
+          "Failed to create a workflow job: "
               + (e.getDetails() != null ? e.getDetails().getMessage() : e), e);
     } catch (IOException e) {
       throw new RuntimeException("Failed to create a workflow job", e);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/package-info.java
index c2fcc288cf3c4..751ca69e5572c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/package-info.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-/** 
+/**
  * Implementation of the {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner}.
  */
 package com.google.cloud.dataflow.sdk.runners.dataflow;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/package-info.java
index c75fe2f8348e0..8d543a4041b87 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/package-info.java
@@ -15,19 +15,19 @@
  */
 
 /**
- * Defines runners for executing Pipelines in different modes, including 
+ * Defines runners for executing Pipelines in different modes, including
  * {@link com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner} and
  * {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner}.
- * 
+ *
  * <p>{@link com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner} executes a {@code Pipeline}
- * locally, without contacting the Dataflow service. 
- * {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner} submits a 
+ * locally, without contacting the Dataflow service.
+ * {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner} submits a
  * {@code Pipeline} to the Dataflow service, which executes it on Dataflow-managed Compute Engine
  * instances. {@code DataflowPipelineRunner} returns
  * as soon as the {@code Pipeline} has been submitted. Use
  * {@link com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner} to have execution
  * updates printed to the console.
- * 
+ *
  * <p>The runner is specified as part {@link com.google.cloud.dataflow.sdk.options.PipelineOptions}.
  */
 package com.google.cloud.dataflow.sdk.runners;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index fa17cf67390d0..f46a8d4a00039 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -104,7 +104,7 @@ static void processWork(DataflowWorker worker) throws IOException {
   static DataflowWorker createFromSystemProperties() {
     return create(PipelineOptionsFactory.createFromSystemProperties());
   }
-  
+
   static DataflowWorker create(DataflowWorkerHarnessOptions options) {
     MDC.put(DataflowWorkerLoggingFormatter.MDC_DATAFLOW_JOB_ID, options.getJobId());
     MDC.put(DataflowWorkerLoggingFormatter.MDC_DATAFLOW_WORKER_ID, options.getWorkerId());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedSource.java
index beea88747c1cf..2e152e794f80c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedSource.java
@@ -199,7 +199,7 @@ public Position updateStopPosition(Progress proposedStopPosition) {
       // CloudPosition in a file-based Source. If stop position in
       // other types is proposed, the end position in iterator will
       // not be updated, and return null.
-      com.google.api.services.dataflow.model.ApproximateProgress stopPosition = 
+      com.google.api.services.dataflow.model.ApproximateProgress stopPosition =
           sourceProgressToCloudProgress(proposedStopPosition);
       if (stopPosition == null) {
         LOG.warn(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
index 85805773c7060..a8f54b48668fd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
@@ -47,7 +47,7 @@ public class DataflowWorkerLoggingFormatter extends Formatter {
   public String format(LogRecord record) {
     String exception = formatException(record.getThrown());
     return DATE_FORMATTER.print(record.getMillis())
-        + " " + MoreObjects.firstNonNull(LEVELS.get(record.getLevel()), 
+        + " " + MoreObjects.firstNonNull(LEVELS.get(record.getLevel()),
                                          record.getLevel().getName())
         + " " + MoreObjects.firstNonNull(MDC.get(MDC_DATAFLOW_JOB_ID), "unknown")
         + " " + MoreObjects.firstNonNull(MDC.get(MDC_DATAFLOW_WORKER_ID), "unknown")
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
index 80ccf7084bcbf..6be47019e3c86 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
@@ -34,14 +34,14 @@
  * the logging level and location by specifying the Java system properties
  * "dataflow.worker.logging.level" and "dataflow.worker.logging.location" respectively.
  * The default log level is INFO and the default location is a file named dataflow-worker.log
- * within the systems temporary directory. 
+ * within the systems temporary directory.
  */
 public class DataflowWorkerLoggingInitializer {
   private static final String DEFAULT_LOGGING_LOCATION = "/tmp/dataflow-worker.log";
   private static final String ROOT_LOGGER_NAME = "";
   public static final String DATAFLOW_WORKER_LOGGING_LEVEL = "dataflow.worker.logging.level";
   public static final String DATAFLOW_WORKER_LOGGING_LOCATION = "dataflow.worker.logging.location";
-  public static final ImmutableBiMap<Level, String> LEVELS = 
+  public static final ImmutableBiMap<Level, String> LEVELS =
       ImmutableBiMap.<Level, String>builder()
       .put(Level.SEVERE, "ERROR")
       .put(Level.WARNING, "WARNING")
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/package-info.java
index 615ed64743922..ffc9df2e43f26 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/package-info.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-/** 
+/**
  * Implementation of the harness that runs on each Google Compute Engine instance to coordinate
  * execution of Pipeline code.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
index 6044365a664d9..ff003d5dbf9ef 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
@@ -125,7 +125,7 @@ private static String getAppName() {
     Optional<StackTraceElement> stackTraceElement = findCallersStackTrace();
     if (stackTraceElement.isPresent()) {
       String className = stackTraceElement.get().getClassName();
-      return className.contains(".") 
+      return className.contains(".")
           ? className.substring(className.lastIndexOf(".") + 1)
               : className;
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/package-info.java
index 799c1ac98bc8c..9a410fb229f1f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/package-info.java
@@ -14,8 +14,8 @@
  * the License.
  */
 
-/** 
+/**
  * Defines utilities for unit testing Dataflow pipelines. The tests for the {@code PTransform}s and
- * examples included the Dataflow SDK provide examples of using these utilities. 
+ * examples included the Dataflow SDK provide examples of using these utilities.
  */
 package com.google.cloud.dataflow.sdk.testing;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
index 29336d859ced8..49251dfeca210 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
@@ -120,7 +120,7 @@ public void processElement(ProcessContext c) {
    * encoding each element using the input {@code PCollection}'s
    * {@code Coder}, then comparing the encoded bytes. Because of this,
    * the input coder must be deterministic. (See
-   * {@link com.google.cloud.dataflow.sdk.coders.Coder#isDeterministic()} for more detail). 
+   * {@link com.google.cloud.dataflow.sdk.coders.Coder#isDeterministic()} for more detail).
    * Performing the comparison in this manner admits efficient parallel evaluation.
    *
    * <p> By default, the {@code Coder} of the keys of the output
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/package-info.java
index ba907ac2cd734..25eafd34aac18 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/package-info.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-/** 
+/**
  * Defines the {@link com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey} transform
  * for joining multiple PCollections.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java
index b72e90e780ac0..5b50fd37634b0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java
@@ -14,12 +14,12 @@
  * the License.
  */
 
-/** 
- * Defines {@link com.google.cloud.dataflow.sdk.transforms.PTransform}s for transforming 
+/**
+ * Defines {@link com.google.cloud.dataflow.sdk.transforms.PTransform}s for transforming
  * data in a pipeline.
- * 
+ *
  * <p>A {@link com.google.cloud.dataflow.sdk.transforms.PTransform} is an operation that takes an
- * {@code Input} (some subtype of {@link com.google.cloud.dataflow.sdk.values.PInput}) 
+ * {@code Input} (some subtype of {@link com.google.cloud.dataflow.sdk.values.PInput})
  * and produces an
  * {@code Output} (some subtype of {@link com.google.cloud.dataflow.sdk.values.POutput}).
  *
@@ -29,15 +29,15 @@
  * conversion operations like {@link com.google.cloud.dataflow.sdk.transforms.ParDo},
  * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey},
  * {@link com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey},
- * {@link com.google.cloud.dataflow.sdk.transforms.Combine}, and 
+ * {@link com.google.cloud.dataflow.sdk.transforms.Combine}, and
  * {@link com.google.cloud.dataflow.sdk.transforms.Count}, and outputting
  * PTransforms like
- * {@link com.google.cloud.dataflow.sdk.io.TextIO.Write}.  
- * 
- * <p>New PTransforms can be created by composing existing PTransforms. 
+ * {@link com.google.cloud.dataflow.sdk.io.TextIO.Write}.
+ *
+ * <p>New PTransforms can be created by composing existing PTransforms.
  * Most PTransforms in this package are composites, and users can also create composite PTransforms
- * for their own application-specific logic. 
- * 
+ * for their own application-specific logic.
+ *
  */
 package com.google.cloud.dataflow.sdk.transforms;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppEngineEnvironment.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppEngineEnvironment.java
index f3b57a4508b0e..c83fd1da79d89 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppEngineEnvironment.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppEngineEnvironment.java
@@ -32,7 +32,7 @@ public class AppEngineEnvironment {
    * Purposely copied and left private from private <a href="https://code.google.com/p/
    * guava-libraries/source/browse/guava/src/com/google/common/util/concurrent/
    * MoreExecutors.java#785">code.google.common.util.concurrent.MoreExecutors#isAppEngine</a>.
-   * 
+   *
    * @return true if we are inside of AppEngine, false otherwise.
    */
   static boolean isAppEngine() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
index 419339a83415e..6d82eed7a7236 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
@@ -127,7 +127,7 @@ public static Credential getWorkerCredential(GcpOptions options)
    *     "service_account_name" to use this mechanism.
    *   </li>
    * </ol>
-   * The default mechanism is to use the 
+   * The default mechanism is to use the
    * <a href="https://developers.google.com/accounts/docs/application-default-credentials">
    * application default credentials</a> falling back to gcloud. The other options can be
    * used by providing the corresponding properties.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
index c3edd2ac2c338..cc429955f5662 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
@@ -47,7 +47,7 @@
  */
 public class GcsUtil {
   /**
-   * This is a {@link DefaultValueFactory} able to create a {@link GcsUtil} using 
+   * This is a {@link DefaultValueFactory} able to create a {@link GcsUtil} using
    * any transport flags specified on the {@link PipelineOptions}.
    */
   public static class GcsUtilFactory implements DefaultValueFactory<GcsUtil> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
index c9e3185edd8c3..46f610836f57d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
@@ -172,7 +172,7 @@ static List<DataflowPackage> stageClasspathElementsToGcs(
         throw new RuntimeException("Could not stage classpath element: " + classpathElement, e);
       }
     }
-    
+
     LOG.info("Uploading PipelineOptions.filesToStage complete: {} files newly uploaded, "
         + "{} files cached",
         numUploaded, numCached);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java
index ba6e927e0996b..232e0b4942a40 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java
@@ -14,18 +14,18 @@
  * the License.
  */
 
-/** 
- * Defines {@link com.google.cloud.dataflow.sdk.values.PCollection} and other classes for 
+/**
+ * Defines {@link com.google.cloud.dataflow.sdk.values.PCollection} and other classes for
  * representing data in a {@link com.google.cloud.dataflow.sdk.Pipeline}.
- * 
- * <p> A {@link com.google.cloud.dataflow.sdk.values.PCollection} is an immutable collection of 
- * values of type {@code T} and is the main representation for data. 
+ *
+ * <p> A {@link com.google.cloud.dataflow.sdk.values.PCollection} is an immutable collection of
+ * values of type {@code T} and is the main representation for data.
  * A {@link com.google.cloud.dataflow.sdk.values.PCollectionTuple} is a tuple of PCollections
- * used in cases where PTransforms take or return multiple PCollections. 
- * 
+ * used in cases where PTransforms take or return multiple PCollections.
+ *
  * <p> A {@link com.google.cloud.dataflow.sdk.values.PCollectionTuple} is an immutable tuple of
  * heterogeneously-typed {@link com.google.cloud.dataflow.sdk.values.PCollection}s, "keyed" by
- * {@link com.google.cloud.dataflow.sdk.values.TupleTag}s.  
+ * {@link com.google.cloud.dataflow.sdk.values.TupleTag}s.
  * A PCollectionTuple can be used as the input or
  * output of a
  * {@link com.google.cloud.dataflow.sdk.transforms.PTransform} taking
@@ -33,10 +33,10 @@
  * different types, for instance a
  * {@link com.google.cloud.dataflow.sdk.transforms.ParDo} with side
  * outputs.
- * 
+ *
  * <p> A {@link com.google.cloud.dataflow.sdk.values.PCollectionView} is an immutable view of a
  * PCollection that can be accessed from a DoFn and other user Fns
  * as a side input.
- * 
+ *
  */
 package com.google.cloud.dataflow.sdk.values;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptionsTest.java
index f4d6f0499d44f..50ea20bf539c6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptionsTest.java
@@ -81,7 +81,7 @@ public void testUserNameIsTooLong() {
     assertTrue(options.getJobName().length() <= 40);
   }
 
-  
+
   @Test
   public void testUtf8UserNameAndApplicationNameIsNormalized() {
     resetDateTimeProviderRule.setDateTimeFixed("2014-12-08T19:07:06.698Z");
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java
index ad584ea32aeb4..714647e35de88 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java
@@ -34,7 +34,7 @@ public static interface Required extends PipelineOptions {
     public String getObject();
     public void setObject(String value);
   }
-  
+
   @Test
   public void testWhenRequiredOptionIsSet() {
     Required required = PipelineOptionsFactory.as(Required.class);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
index b9b07e8626b1d..d33d42e4bf165 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
@@ -111,7 +111,7 @@ public void testGettingJLSDefaults() throws Exception {
   public static class TestOptionFactory implements DefaultValueFactory<String> {
     @Override
     public String create(PipelineOptions options) {
-      return "testOptionFactory"; 
+      return "testOptionFactory";
     }
   }
 
@@ -224,7 +224,7 @@ public void testInvokeWithUnknownMethod() throws Exception {
     expectedException.expectMessage("Unknown method [public abstract void com.google.cloud."
         + "dataflow.sdk.options.ProxyInvocationHandlerTest$UnknownMethod.unknownMethod()] "
         + "invoked with args [null].");
-    
+
     ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
     handler.invoke(handler, UnknownMethod.class.getMethod("unknownMethod"), null);
   }
@@ -292,7 +292,7 @@ public static interface MethodConflict extends Simple {
   public void testMethodConflictProvidesSameValue() throws Exception {
     ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
     MethodConflict methodConflict = handler.as(MethodConflict.class);
-    
+
     methodConflict.setString("conflictValue");
     assertEquals("conflictValue", methodConflict.getString());
     assertEquals("conflictValue", methodConflict.as(Simple.class).getString());
@@ -364,7 +364,7 @@ public static interface PartialMethodConflict extends Simple {
     @Override
     void setPrimitive(int value);
   }
-  
+
   @Test
   public void testPartialMethodConflictProvidesSameValue() throws Exception {
     ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
@@ -404,13 +404,13 @@ public void testJsonConversionForSimpleTypes() throws Exception {
     assertEquals(5, options2.getInteger());
     assertEquals("TestValue", options2.getString());
   }
-  
+
   @Test
   public void testJsonConversionOfAJsonConvertedType() throws Exception {
     SimpleTypes options = PipelineOptionsFactory.as(SimpleTypes.class);
     options.setString("TestValue");
     options.setInteger(5);
-    SimpleTypes options2 = serializeDeserialize(SimpleTypes.class, 
+    SimpleTypes options2 = serializeDeserialize(SimpleTypes.class,
         serializeDeserialize(SimpleTypes.class, options));
     assertEquals(5, options2.getInteger());
     assertEquals("TestValue", options2.getString());
@@ -478,7 +478,7 @@ static InnerType of(double value) {
     @Override
     public boolean equals(Object obj) {
       return obj != null
-          && getClass().equals(obj.getClass()) 
+          && getClass().equals(obj.getClass())
           && Objects.equals(doubleField, ((InnerType) obj).doubleField);
     }
   }
@@ -493,7 +493,7 @@ private static class ComplexType {
     @Override
     public boolean equals(Object obj) {
       return obj != null
-          && getClass().equals(obj.getClass()) 
+          && getClass().equals(obj.getClass())
           && Objects.equals(stringField, ((ComplexType) obj).stringField)
           && Objects.equals(intField, ((ComplexType) obj).intField)
           && Objects.equals(genericType, ((ComplexType) obj).genericType)
@@ -531,7 +531,7 @@ private static interface IgnoredProperty extends PipelineOptions {
   public void testJsonConversionOfIgnoredProperty() throws Exception {
     IgnoredProperty options = PipelineOptionsFactory.as(IgnoredProperty.class);
     options.setValue("TestValue");
-    
+
     IgnoredProperty options2 = serializeDeserialize(IgnoredProperty.class, options);
     assertNull(options2.getValue());
   }
@@ -577,7 +577,7 @@ public void testJsonConversionOfIgnoredNotSerializableProperty() throws Exceptio
     IgnoredNotSerializableProperty options =
         PipelineOptionsFactory.as(IgnoredNotSerializableProperty.class);
     options.setValue(new NotSerializable("TestString"));
-    
+
     IgnoredNotSerializableProperty options2 =
         serializeDeserialize(IgnoredNotSerializableProperty.class, options);
     assertNull(options2.getValue());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
index 71e51f430d88d..68a6b58a2d9bc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
@@ -94,7 +94,7 @@ public void testWithOverrides() {
     assertTrue(isConsoleHandler(handlers.get(0), Level.WARNING));
     assertTrue(isFileHandler(handlers.get(1), Level.WARNING));
   }
-  
+
   private boolean isConsoleHandler(Handler handler, Level level) {
     return handler instanceof ConsoleHandler
         && level.equals(handler.getLevel())
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOffTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOffTest.java
index 0c262e2f1cb04..e4f332db12e88 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOffTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOffTest.java
@@ -56,7 +56,7 @@ public void testThatFixedNumberOfAttemptsExits() throws Exception {
     assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
     assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
   }
-  
+
   @Test
   public void testThatResettingAllowsReuse() throws Exception {
     BackOff backOff = new AttemptBoundedExponentialBackOff(3, 500);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
index cae705cea5a46..c7a7c55f5b585 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
@@ -99,7 +99,7 @@ public void run() {
     // Release the last latch starting the chain reaction.
     countDownLatches[countDownLatches.length - 1].countDown();
     executorService.shutdown();
-    assertTrue("Expected tasks to complete", 
+    assertTrue("Expected tasks to complete",
         executorService.awaitTermination(10, TimeUnit.SECONDS));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
index b3e29f8e5cf55..d1a69e7a10cf0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -295,7 +295,7 @@ private static Position makePosition(long offset) {
   private static ApproximateProgress makeApproximateProgress(long offset) {
     return makeApproximateProgress(makePosition(offset));
   }
-  
+
   private static ApproximateProgress makeApproximateProgress(
       com.google.api.services.dataflow.model.Position position) {
     return new ApproximateProgress().setPosition(position);

From 7fb008fac423dfb570f697f52becfd7cce50b584 Mon Sep 17 00:00:00 2001
From: fjp <fjp@google.com>
Date: Mon, 15 Dec 2014 11:55:00 -0800
Subject: [PATCH 0014/1541] Markdown-ify README [] ------------- Created by
 MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=82162508

---
 README    | 19 -------------------
 README.md | 42 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 19 deletions(-)
 delete mode 100644 README
 create mode 100644 README.md

diff --git a/README b/README
deleted file mode 100644
index 2da8ea5ba93c4..0000000000000
--- a/README
+++ /dev/null
@@ -1,19 +0,0 @@
-Greetings! Welcome to the (Alpha) Google Cloud Dataflow Java SDK.
-
-Dataflow provides a simple, powerful model for building both batch and
-streaming parallel data processing Pipelines.
-
-To use the Google Cloud Dataflow SDK, you build a Pipeline which manages a
-graph of PTransforms and PCollections that the PTransforms consume and produce.
-
-You then use a PipelineRunner to specify where and how the pipeline should
-execute. Currently there are two runners:
-  1. The DirectPipelineRunner runs the pipeline on your local machine.
-  2. The [Blocking]DataflowPipelineRunner runs the pipeline on the Dataflow
-     Service using the Google Cloud Platform. The Dataflow Service is
-     currently in the Alpha phase of development and access is limited to
-     whitelisted users.
-
-For more about both the Dataflow SDK and the Dataflow Service, visit:
-  http://cloud.google.com/dataflow
-
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000..12eba01f5e831
--- /dev/null
+++ b/README.md
@@ -0,0 +1,42 @@
+# Cloud Dataflow Java SDK (Alpha)
+
+[Google Cloud Dataflow](http://cloud.google.com/dataflow)
+provides a simple, powerful programming model for building both batch
+and streaming parallel data processing pipelines.
+
+## Overview
+
+The key concepts in this programming model are:
+
+* [PCollection](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java):
+represents a collection of data, which could be bounded or unbounded in size.
+* [PTransform](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java):
+represents a computation that transform input PCollections into output PCollections.
+* [Pipeline](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java):
+manages a directed acyclic graph of PTransforms and PCollections, which is ready for excution.
+* [PipelineRunner](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java):
+specifies where and how the pipeline should execute.
+
+Currently there are three runners:
+
+  1. The [DirectPipelineRunner](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java)
+runs the pipeline on your local machine.
+  2. The
+[DataflowPipelineRunner](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java)
+submits the pipeline to the Dataflow Service**\***, where it runs using managed
+resources in the [Google Cloud Platform](http://cloud.google.com).
+  3. The
+[BlockingDataflowPipelineRunner](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java)
+submits the pipeline to the Dataflow Service**\*** via the DataflowPipelineRunner and then prints messages
+about the job status until execution is complete.
+
+**\***_The Dataflow Service is currently in the Alpha phase of development and access
+is limited to whitelisted users._
+
+## Getting Started
+
+## More Information
+
+* [Google Cloud Dataflow](http://cloud.google.com/dataflow)
+* [Dataflow Concepts and Programming Model](https://cloud.google.com/dataflow/java-sdk/building-a-pipeline)
+* [Javadoc](https://cloud.google.com/dataflow/java-sdk/JavaDoc/index)

From 5586b61fe7c4bd2de8549e7c6c3ac060c2ac1951 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Mon, 15 Dec 2014 12:18:39 -0800
Subject: [PATCH 0015/1541] Serializables is a very inefficient encoding for
 AccumulatingCombineFn.Accumulators, should be avoided.

Also allow AccumulatingCombineFn.Accumulators to be a static class.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82164661
---
 .../sdk/transforms/ApproximateQuantiles.java  |  5 +-
 .../dataflow/sdk/transforms/Combine.java      | 12 ++++-
 .../cloud/dataflow/sdk/transforms/Mean.java   | 50 ++++++++++++++++---
 .../cloud/dataflow/sdk/transforms/Top.java    |  6 +--
 .../runners/worker/CombineValuesFnTest.java   |  4 +-
 .../dataflow/sdk/transforms/CombineTest.java  | 13 +++--
 6 files changed, 67 insertions(+), 23 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
index b2c93c4d701cd..a2f0094cbb463 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
@@ -367,9 +367,8 @@ public Coder<QuantileState> getAccumulatorCoder(
      * estimated.
      */
     class QuantileState
-        extends AccumulatingCombineFn
-        <T, ApproximateQuantilesCombineFn<T, C>.QuantileState, List<T>>
-        .Accumulator {
+        implements AccumulatingCombineFn.Accumulator
+        <T, ApproximateQuantilesCombineFn<T, C>.QuantileState, List<T>> {
 
       private T min;
       private T max;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index f455d5763ac1a..f27308b739589 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -355,6 +355,10 @@ public VO apply(Iterable<? extends VI> inputs) {
      * for {@code VI} values and the enclosing {@code Pipeline}'s
      * {@code CoderRegistry} to try to infer the Coder for {@code VA}
      * values.
+     *
+     * <p> This is the Coder used to send data through a communication-intensive
+     * shuffle step, so a compact and efficient representation may have
+     * significant performance benefits.
      */
     public Coder<VA> getAccumulatorCoder(
         CoderRegistry registry, Coder<VI> inputCoder) {
@@ -476,14 +480,14 @@ public Coder<VO> getDefaultOutputCoder(
    * @param <VO> type of output values
    */
   public abstract static class AccumulatingCombineFn
-      <VI, VA extends AccumulatingCombineFn<VI, VA, VO>.Accumulator, VO>
+      <VI, VA extends AccumulatingCombineFn.Accumulator<VI, VA, VO>, VO>
       extends CombineFn<VI, VA, VO> {
 
     /**
      * The type of mutable accumulator values used by this
      * {@code AccumulatingCombineFn}.
      */
-    public abstract class Accumulator implements Serializable {
+    public abstract static interface Accumulator<VI, VA, VO> {
       /**
        * Adds the given input value to this accumulator, modifying
        * this accumulator.
@@ -675,6 +679,10 @@ public VO apply(K key, Iterable<? extends VI> inputs) {
      * used for {@code K} keys and input {@code VI} values and the
      * enclosing {@code Pipeline}'s {@code CoderRegistry} to try to
      * infer the Coder for {@code VA} values.
+     *
+     * <p> This is the Coder used to send data through a communication-intensive
+     * shuffle step, so a compact and efficient representation may have
+     * significant performance benefits.
      */
     public Coder<VA> getAccumulatorCoder(
         CoderRegistry registry, Coder<K> keyCoder, Coder<VI> inputCoder) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
index b198a0da25041..c82f4ab45d5ef 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
@@ -16,9 +16,16 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
+import com.google.cloud.dataflow.sdk.coders.CustomCoder;
+import com.google.cloud.dataflow.sdk.coders.DoubleCoder;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 
 /**
  * {@code PTransform}s for computing the arithmetic mean
@@ -102,11 +109,20 @@ public MeanFn() {}
      * Accumulator helper class for MeanFn.
      */
     class CountSum
-        extends Combine.AccumulatingCombineFn<N, CountSum, Double>.Accumulator {
+        implements Combine.AccumulatingCombineFn.Accumulator<N, CountSum, Double> {
 
       long count = 0;
       double sum = 0.0;
 
+      public CountSum() {
+        this(0, 0);
+      }
+
+      public CountSum(long count, double sum) {
+        this.count = count;
+        this.sum = sum;
+      }
+
       @Override
       public void addInput(N element) {
         count++;
@@ -130,14 +146,36 @@ public CountSum createAccumulator() {
       return new CountSum();
     }
 
+    private static final Coder<Long> LONG_CODER = BigEndianLongCoder.of();
+    private static final Coder<Double> DOUBLE_CODER = DoubleCoder.of();
+
     @SuppressWarnings("unchecked")
     @Override
     public Coder<CountSum> getAccumulatorCoder(
         CoderRegistry registry, Coder<N> inputCoder) {
-      // The casts are needed because CountSum.class is a
-      // Class<MeanFn.CountSum>, but we need a
-      // Class<MeanFn<N>.CountSum>.
-      return SerializableCoder.of((Class<CountSum>) (Class<?>) CountSum.class);
+      return new CustomCoder<CountSum> () {
+        @Override
+        public void encode(CountSum value, OutputStream outStream, Coder.Context context)
+            throws CoderException, IOException {
+          Coder.Context nestedContext = context.nested();
+          LONG_CODER.encode(value.count, outStream, nestedContext);
+          DOUBLE_CODER.encode(value.sum, outStream, nestedContext);
+        }
+
+        @Override
+        public CountSum decode(InputStream inStream, Coder.Context context)
+            throws CoderException, IOException {
+          Coder.Context nestedContext = context.nested();
+          return new CountSum(
+              LONG_CODER.decode(inStream, nestedContext),
+              DOUBLE_CODER.decode(inStream, nestedContext));
+        }
+
+        @Override
+        public boolean isDeterministic() {
+          return true;
+        }
+      };
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index 0f8a3f0565850..c4fb15bd23232 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -337,11 +337,7 @@ public <C extends Comparator<T> & Serializable> TopCombineFn(
       this.compareFn = compareFn;
     }
 
-    class Heap
-        // TODO: Why do I have to fully qualify the
-        // Accumulator class here?
-        extends AccumulatingCombineFn<T, TopCombineFn<T>.Heap, List<T>>
-                .Accumulator {
+    class Heap implements AccumulatingCombineFn.Accumulator<T, TopCombineFn<T>.Heap, List<T>> {
 
       // Exactly one of these should be set.
       private List<T> asList;            // ordered largest first
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
index 160b5456d6193..4833ff19a3109 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
@@ -63,8 +63,8 @@ public class CombineValuesFnTest {
   public static class MeanInts extends
       Combine.AccumulatingCombineFn<Integer, MeanInts.CountSum, String> {
 
-    class CountSum extends
-        Combine.AccumulatingCombineFn<Integer, CountSum, String>.Accumulator {
+    class CountSum implements
+        Combine.AccumulatingCombineFn.Accumulator<Integer, CountSum, String> {
 
       long count;
       double sum;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 646977441e03b..c0d8e21b15e67 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -58,6 +58,7 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.io.Serializable;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
@@ -365,8 +366,8 @@ public static class MeanInts extends
     private static final Coder<Long> LONG_CODER = BigEndianLongCoder.of();
     private static final Coder<Double> DOUBLE_CODER = DoubleCoder.of();
 
-    class CountSum extends
-        Combine.AccumulatingCombineFn<Integer, CountSum, Double>.Accumulator {
+    class CountSum implements
+        Combine.AccumulatingCombineFn.Accumulator<Integer, CountSum, Double> {
       long count = 0;
       double sum = 0.0;
 
@@ -450,9 +451,9 @@ public static class TestCounter extends
           Integer, TestCounter.Counter, Iterable<Long>> {
 
     /** An accumulator that observes its merges and outputs */
-    public class Counter extends
-        Combine.AccumulatingCombineFn<
-            Integer, Counter, Iterable<Long>>.Accumulator {
+    public class Counter implements
+        Combine.AccumulatingCombineFn.Accumulator<Integer, Counter, Iterable<Long>>,
+        Serializable {
 
       public long sum = 0;
       public long inputs = 0;
@@ -522,6 +523,8 @@ public Counter createAccumulator() {
     @Override
     public Coder<Counter> getAccumulatorCoder(
         CoderRegistry registry, Coder<Integer> inputCoder) {
+      // This is a *very* inefficient encoding to send over the wire, but suffices
+      // for tests.
       return SerializableCoder.of(Counter.class);
     }
   }

From 0d4061a0416fb13532540c704a4e3909d26927d3 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Mon, 15 Dec 2014 12:45:44 -0800
Subject: [PATCH 0016/1541] KeyedState: Add ParDo test that runs on service and
 triggers PARTITION_KEYS reshardings, and covers more cases on
 DirectPipelineRunner.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82166757
---
 .../dataflow/sdk/transforms/ParDoTest.java    | 79 ++++++++++++++++++-
 1 file changed, 76 insertions(+), 3 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index fdd557d08a7d4..a4fed2ada20b6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -35,10 +35,13 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -266,7 +269,25 @@ public void finishBundle(Context c) {
     }
   }
 
-  static class TestUnexpectedKeyedStateDoFn extends DoFn<Integer, String> {
+  /**
+   * Output the keys which have appeared at least three times.
+   */
+  static class TestKeyedStateCountAtLeastThreeDoFn
+      extends DoFn<KV<String, Integer>, String> implements DoFn.RequiresKeyedState{
+    @Override
+    public void processElement(ProcessContext c) throws IOException {
+      String key = c.element().getKey();
+      CodedTupleTag<Long> tag = CodedTupleTag.of(key, BigEndianLongCoder.of());
+      Long result = c.keyedState().lookup(tag);
+      long count = result == null ? 0 : result;
+      c.keyedState().store(tag, ++count);
+      if (count == 3) {
+        c.output(key);
+      }
+    }
+  }
+
+  static class TestUnexpectedKeyedStateDoFn extends DoFn<KV<String, Integer>, String> {
     @Override
     public void processElement(ProcessContext c) {
       // Will fail since this DoFn doesn't implement RequiresKeyedState.
@@ -274,6 +295,15 @@ public void processElement(ProcessContext c) {
     }
   }
 
+  static class TestKeyedStateDoFnWithNonKvInput
+      extends DoFn<Integer, String> implements DoFn.RequiresKeyedState {
+    @Override
+    public void processElement(ProcessContext c) {
+      // Will fail since this DoFn's input isn't KV.
+      c.keyedState();
+    }
+  }
+
   private static class StrangelyNamedDoer extends DoFn<Integer, String> {
     @Override
     public void processElement(ProcessContext c) {
@@ -646,13 +676,37 @@ public void testParDoWithErrorInFinishBatch() {
     }
   }
 
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testParDoKeyedState() {
+    Pipeline p = TestPipeline.create();
+
+    List<String> inputs = Arrays.asList(
+        "A", "A", "B", "C", "B", "A", "D", "D", "D", "D");
+
+    PCollection<String> output =
+        p.apply(Create.of(inputs))
+         .apply(ParDo.named("ToKv")
+                     .of(new DoFn<String, KV<String, Integer>>() {
+                         @Override
+                         public void processElement(ProcessContext c) {
+                           c.output(KV.of(c.element(), 1));
+                         }
+                     }))
+     .apply(ParDo.of(new TestKeyedStateCountAtLeastThreeDoFn()));
+
+    DataflowAssert.that(output).containsInAnyOrder("A", "D");
+    p.run();
+  }
+
   @Test
   public void testParDoWithUnexpectedKeyedState() {
     Pipeline p = TestPipeline.create();
 
-    List<Integer> inputs = Arrays.asList(3, -42, 666);
+    List<KV<String, Integer>> inputs = Arrays.asList(
+        KV.of("a", 1));
 
-    PCollection<Integer> input = createInts(p, inputs);
+    PCollection<KV<String, Integer>> input = p.apply(Create.of(inputs));
 
     input
         .apply(ParDo.of(new TestUnexpectedKeyedStateDoFn()));
@@ -666,6 +720,25 @@ public void testParDoWithUnexpectedKeyedState() {
     }
   }
 
+  @Test
+  public void testParDoKeyedStateDoFnWithNonKvInput() {
+    Pipeline p = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList(3, -42, 666);
+
+    PCollection<Integer> input = createInts(p, inputs);
+
+    input
+        .apply(ParDo.of(new TestKeyedStateDoFnWithNonKvInput()));
+    try {
+      p.run();
+      fail("should have failed");
+    } catch (RuntimeException exn) {
+      assertThat(exn.toString(),
+                 containsString("Keyed state is only available"));
+    }
+  }
+
   @Test
   public void testParDoName() {
     Pipeline p = TestPipeline.create();

From c14dbdefd0418b01481d532d6bf8531726d5a13b Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 15 Dec 2014 13:07:18 -0800
Subject: [PATCH 0017/1541] Bundle the SDK within the examples jar.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82168727
---
 examples/pom.xml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index fcb52fcdbf8d9..ad847af861d8b 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -108,10 +108,10 @@
         <version>2.4.0</version>
         <extensions>true</extensions>
         <configuration>
-        <finalName>${project.artifactId}-bundled-${project.version}</finalName>
+          <finalName>${project.artifactId}-bundled-${project.version}</finalName>
           <instructions>
-            <!-- Embed all dependencies, except for the dataflow sdk -->
-            <Embed-Dependency>*;scope=compile|runtime;artifactId=!google-cloud-dataflow-java-sdk-all;inline=true</Embed-Dependency>
+            <!-- Embed all dependencies -->
+            <Embed-Dependency>*;scope=compile|runtime;inline=true</Embed-Dependency>
           </instructions>
         </configuration>
       </plugin>

From 344a78dd0bf274150c09f0b6b3df0dfdd08455d0 Mon Sep 17 00:00:00 2001
From: earhart <earhart@google.com>
Date: Mon, 15 Dec 2014 13:27:01 -0800
Subject: [PATCH 0018/1541] Fixing up a few more warnings. [] -------------
 Created by MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=82170636

---
 .../cloud/dataflow/sdk/coders/AvroCoder.java  |  3 +--
 .../dataflow/sdk/coders/ByteArrayCoder.java   |  1 +
 .../dataflow/sdk/coders/CollectionCoder.java  |  6 ++++++
 .../dataflow/sdk/coders/IterableCoder.java    |  6 ++++++
 .../sdk/coders/IterableLikeCoder.java         | 12 +++++++++--
 .../cloud/dataflow/sdk/coders/ListCoder.java  |  6 ++++++
 .../cloud/dataflow/sdk/coders/MapCoder.java   |  3 +--
 .../sdk/coders/SerializableCoder.java         |  1 +
 .../cloud/dataflow/sdk/coders/SetCoder.java   |  1 +
 .../sdk/coders/TableRowJsonCoder.java         |  1 +
 .../sdk/coders/TextualIntegerCoder.java       |  1 +
 .../cloud/dataflow/sdk/coders/URICoder.java   |  1 +
 .../cloud/dataflow/sdk/coders/VoidCoder.java  |  1 +
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |  3 +++
 .../sdk/options/PipelineOptionsFactory.java   | 20 ++++++++++++-------
 .../sdk/options/ProxyInvocationHandler.java   |  4 +++-
 .../sdk/runners/worker/SinkFactory.java       |  1 +
 .../dataflow/sdk/testing/DataflowAssert.java  | 10 ++++++++++
 .../sdk/transforms/ApproximateUnique.java     |  3 +++
 .../dataflow/sdk/transforms/GroupByKey.java   |  6 ++++++
 .../dataflow/sdk/transforms/Partition.java    |  1 +
 .../transforms/windowing/FixedWindows.java    |  1 +
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  1 +
 .../dataflow/sdk/util/UserCodeException.java  |  1 +
 24 files changed, 80 insertions(+), 14 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
index 9532da725ac97..ca1f3c856e6fa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -80,9 +80,8 @@
  *
  * @param <T> the type of elements handled by this coder
  */
+@SuppressWarnings("serial")
 public class AvroCoder<T> extends StandardCoder<T> {
-  private static final long serialVersionUID = 0;
-
   /**
    * Returns an {@code AvroCoder} instance for the provided element type.
    * @param <T> the element type
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
index c750d932dd066..19d97f3b7e6b0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
@@ -32,6 +32,7 @@
  * If in a nested context, prefixes the encoded array with a VarInt encoding
  * of the length.
  */
+@SuppressWarnings("serial")
 public class ByteArrayCoder extends AtomicCoder<byte[]> {
   @JsonCreator
   public static ByteArrayCoder of() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
index 546695dfefe80..c75f645d344ef 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
@@ -30,6 +30,7 @@
  *
  * @param <T> the type of the elements of the Collections being transcoded
  */
+@SuppressWarnings("serial")
 public class CollectionCoder<T> extends IterableLikeCoder<T, Collection<T>> {
 
   public static <T> CollectionCoder<T> of(Coder<T> elemCoder) {
@@ -39,6 +40,11 @@ public static <T> CollectionCoder<T> of(Coder<T> elemCoder) {
   /////////////////////////////////////////////////////////////////////////////
   // Internal operations below here.
 
+  @Override
+  protected final Collection<T> decodeToIterable(List<T> decodedElements) {
+    return decodedElements;
+  }
+
   @JsonCreator
   public static CollectionCoder<?> of(
       @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
index 801dd2042cfdd..79d6173742a88 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
@@ -32,6 +32,7 @@
  *
  * @param <T> the type of the elements of the Iterables being transcoded
  */
+@SuppressWarnings("serial")
 public class IterableCoder<T> extends IterableLikeCoder<T, Iterable<T>> {
 
   public static <T> IterableCoder<T> of(Coder<T> elemCoder) {
@@ -41,6 +42,11 @@ public static <T> IterableCoder<T> of(Coder<T> elemCoder) {
   /////////////////////////////////////////////////////////////////////////////
   // Internal operations below here.
 
+  @Override
+  protected final Iterable<T> decodeToIterable(List<T> decodedElements) {
+    return decodedElements;
+  }
+
   @JsonCreator
   public static IterableCoder<?> of(
       @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
index e6ecdbe26bb97..128d23bf21a2c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
@@ -42,6 +42,14 @@ public abstract class IterableLikeCoder<T, IT extends Iterable<T>>
 
   public Coder<T> getElemCoder() { return elemCoder; }
 
+  /**
+   * Builds an instance of the coder's associated {@code Iterable} from a list
+   * of decoded elements.  If {@code IT} is a supertype of {@code List<T>}, the
+   * derived class implementation is permitted to return {@code decodedElements}
+   * directly.
+   */
+  protected abstract IT decodeToIterable(List<T> decodedElements);
+
   /////////////////////////////////////////////////////////////////////////////
   // Internal operations below here.
 
@@ -106,7 +114,7 @@ public IT decode(InputStream inStream, Context context)
       for (int i = 0; i < size; i++) {
         elements.add(elemCoder.decode(dataInStream, nestedContext));
       }
-      return (IT) elements;
+      return decodeToIterable(elements);
     } else {
       // We don't know the size a priori.  Check if we're done with
       // each element.
@@ -114,7 +122,7 @@ public IT decode(InputStream inStream, Context context)
       while (dataInStream.readBoolean()) {
         elements.add(elemCoder.decode(dataInStream, nestedContext));
       }
-      return (IT) elements;
+      return decodeToIterable(elements);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
index ab9d8147aa1f1..f6f04b60d34ff 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
@@ -29,6 +29,7 @@
  *
  * @param <T> the type of the elements of the Lists being transcoded
  */
+@SuppressWarnings("serial")
 public class ListCoder<T> extends IterableLikeCoder<T, List<T>> {
 
   public static <T> ListCoder<T> of(Coder<T> elemCoder) {
@@ -38,6 +39,11 @@ public static <T> ListCoder<T> of(Coder<T> elemCoder) {
   /////////////////////////////////////////////////////////////////////////////
   // Internal operations below here.
 
+  @Override
+  protected final List<T> decodeToIterable(List<T> decodedElements) {
+    return decodedElements;
+  }
+
   @JsonCreator
   public static ListCoder<?> of(
       @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
index 244419da6ac0e..1e12ef347e70f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
@@ -40,9 +40,8 @@
  * @param <K> the type of the keys of the KVs being transcoded
  * @param <V> the type of the values of the KVs being transcoded
  */
+@SuppressWarnings("serial")
 public class MapCoder<K, V> extends MapCoderBase<Map<K, V>> {
-  private static final long serialVersionUID = 0;
-
   /**
    * Produces a MapCoder with the given keyCoder and valueCoder.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
index c078e6629a2b7..8e58b4d96e291 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
@@ -45,6 +45,7 @@
  *
  * @param <T> the type of elements handled by this coder
  */
+@SuppressWarnings("serial")
 public class SerializableCoder<T extends Serializable>
     extends AtomicCoder<T> {
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
index 1a234c7b40ed8..8b5fca7638a5c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
@@ -38,6 +38,7 @@
  *
  * @param <T> the type of the elements of the set
  */
+@SuppressWarnings("serial")
 public class SetCoder<T> extends StandardCoder<Set<T>> {
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
index e49dfbb9c01c8..f21aaa513eb2f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
@@ -29,6 +29,7 @@
 /**
  * A TableRowJsonCoder encodes BigQuery TableRow objects.
  */
+@SuppressWarnings("serial")
 public class TableRowJsonCoder extends AtomicCoder<TableRow> {
 
   @JsonCreator
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
index 93d080b7f01cd..bd01ecbcca7b7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
@@ -25,6 +25,7 @@
 /**
  * A TextualIntegerCoder encodes Integers as text.
  */
+@SuppressWarnings("serial")
 public class TextualIntegerCoder extends AtomicCoder<Integer> {
   @JsonCreator
   public static TextualIntegerCoder of() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/URICoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/URICoder.java
index ed5ae45c53e77..eedcddf787e10 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/URICoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/URICoder.java
@@ -28,6 +28,7 @@
  * A {@code URICoder} encodes/decodes {@link URI}s by conversion to/from {@link String}, delegating
  * encoding/decoding of the string to {@link StringUtf8Coder}.
  */
+@SuppressWarnings("serial")
 public class URICoder extends AtomicCoder<URI> {
 
   @JsonCreator
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
index fc9a1e0958b24..24d5e061cfc73 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
@@ -24,6 +24,7 @@
 /**
  * A VoidCoder encodes Voids.  Uses zero bytes per Void.
  */
+@SuppressWarnings("serial")
 public class VoidCoder extends AtomicCoder<Void> {
   @JsonCreator
   public static VoidCoder of() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 9c7fc0a1c5b33..e7b05bc8a520b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -154,6 +154,7 @@ public static Bound withHost(String host) {
      * A PTransform that reads from a Datastore query and returns a bounded
      * {@code PCollection<Entity>}.
      */
+    @SuppressWarnings("serial")
     public static class Bound extends PTransform<PBegin, PCollection<Entity>> {
       String host;
       String datasetId;
@@ -268,6 +269,7 @@ public static Bound withHost(String host) {
      * A PTransform that writes a bounded {@code PCollection<Entities>}
      * to a Datastore.
      */
+    @SuppressWarnings("serial")
     public static class Bound extends PTransform<PCollection<Entity>, PDone> {
       String host;
       String datasetId;
@@ -352,6 +354,7 @@ public void evaluate(
    * A DoFn that performs query request to Datastore and converts
    * each QueryOptions into Entities.
    */
+  @SuppressWarnings("serial")
   private static class ReadEntitiesFn extends DoFn<QueryOptions, Entity> {
     @Override
     public void processElement(ProcessContext c) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index a46ce313e3383..dae5ca14340f0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -385,19 +385,22 @@ static synchronized <T extends PipelineOptions> Registration<T> validateWellForm
       Class<T> iface, Set<Class<? extends PipelineOptions>> validatedPipelineOptionsInterfaces) {
     Preconditions.checkArgument(iface.isInterface(), "Only interface types are supported.");
 
+    @SuppressWarnings("unchecked")
     Set<Class<? extends PipelineOptions>> combinedPipelineOptionsInterfaces =
         FluentIterable.from(validatedPipelineOptionsInterfaces).append(iface).toSet();
     // Validate that the view of all currently passed in options classes is well formed.
     if (!COMBINED_CACHE.containsKey(combinedPipelineOptionsInterfaces)) {
-      Class<?> allProxyClass = Proxy.getProxyClass(PipelineOptionsFactory.class.getClassLoader(),
-          combinedPipelineOptionsInterfaces.toArray(EMPTY_CLASS_ARRAY));
+      @SuppressWarnings("unchecked")
+      Class<T> allProxyClass =
+          (Class<T>) Proxy.getProxyClass(PipelineOptionsFactory.class.getClassLoader(),
+              combinedPipelineOptionsInterfaces.toArray(EMPTY_CLASS_ARRAY));
       try {
         List<PropertyDescriptor> propertyDescriptors =
             getPropertyDescriptors(allProxyClass);
         validateClass(iface, validatedPipelineOptionsInterfaces,
             allProxyClass, propertyDescriptors);
         COMBINED_CACHE.put(combinedPipelineOptionsInterfaces,
-            new Registration<T>((Class<T>) allProxyClass, propertyDescriptors));
+            new Registration<T>(allProxyClass, propertyDescriptors));
       } catch (IntrospectionException e) {
         throw Throwables.propagate(e);
       }
@@ -405,20 +408,22 @@ static synchronized <T extends PipelineOptions> Registration<T> validateWellForm
 
     // Validate that the local view of the class is well formed.
     if (!INTERFACE_CACHE.containsKey(iface)) {
-      @SuppressWarnings("rawtypes")
-      Class<?> proxyClass = Proxy.getProxyClass(
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      Class<T> proxyClass = (Class<T>) Proxy.getProxyClass(
           PipelineOptionsFactory.class.getClassLoader(), new Class[] {iface});
       try {
         List<PropertyDescriptor> propertyDescriptors =
             getPropertyDescriptors(proxyClass);
         validateClass(iface, validatedPipelineOptionsInterfaces, proxyClass, propertyDescriptors);
         INTERFACE_CACHE.put(iface,
-            new Registration<T>((Class<T>) proxyClass, propertyDescriptors));
+            new Registration<T>(proxyClass, propertyDescriptors));
       } catch (IntrospectionException e) {
         throw Throwables.propagate(e);
       }
     }
-    return (Registration<T>) INTERFACE_CACHE.get(iface);
+    @SuppressWarnings("unchecked")
+    Registration<T> result = (Registration<T>) INTERFACE_CACHE.get(iface);
+    return result;
   }
 
   public static Set<Class<? extends PipelineOptions>> getRegisteredOptions() {
@@ -822,6 +827,7 @@ private static <T extends PipelineOptions> Map<String, Object> parseObjects(
       Class<T> klass, ListMultimap<String, String> options) {
     Map<String, Method> propertyNamesToGetters = Maps.newHashMap();
     PipelineOptionsFactory.validateWellFormed(klass, getRegisteredOptions());
+    @SuppressWarnings("unchecked")
     Iterable<PropertyDescriptor> propertyDescriptors =
         PipelineOptionsFactory.getPropertyDescriptors(
             FluentIterable.from(getRegisteredOptions()).append(klass).toSet());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
index 88e84f6142313..cbe9c422b325b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
@@ -105,7 +105,9 @@ public Object invoke(Object proxy, Method method, Object[] args) {
     } else if (args == null && "hashCode".equals(method.getName())) {
       return hashCode();
     } else if (args != null && "as".equals(method.getName()) && args[0] instanceof Class) {
-      return as((Class<? extends PipelineOptions>) args[0]);
+      @SuppressWarnings("unchecked")
+      Class<? extends PipelineOptions> clazz = (Class<? extends PipelineOptions>) args[0];
+      return as(clazz);
     }
     String methodName = method.getName();
     synchronized (this) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
index df2d5ac754281..590e7f8151347 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
@@ -63,6 +63,7 @@ private SinkFactory() {}
    * @throws Exception if the sink could not be decoded and
    * constructed
    */
+  @SuppressWarnings("serial")
   public static <T> Sink<T> create(
       PipelineOptions options,
       com.google.api.services.dataflow.model.Sink cloudSink,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index d4fe32ffd86f3..d0ce9ff62b3f1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -98,6 +98,7 @@ public static <T> IterableAssert<T> thatIterable(
   /**
    * An assertion about the contents of a {@link PCollectionView<<Iterable<T>, ?>}
    */
+  @SuppressWarnings("serial")
   public static class IterableAssert<T> implements Serializable {
     private final PCollectionView<Iterable<T>, ?> actualResults;
 
@@ -175,6 +176,7 @@ public IterableAssert<T> containsInOrder(Collection<T> expectedElements) {
      * operation using a {@code Matcher} operation that takes an array
      * of elements.
      */
+    @SuppressWarnings("serial")
     static class AssertThatIterable<T> extends AssertThat<Iterable<T>, T[]> {
       AssertThatIterable(T[] expected,
                          String matcherClassName,
@@ -188,6 +190,7 @@ static class AssertThatIterable<T> extends AssertThat<Iterable<T>, T[]> {
      * SerializableFunction that verifies that an Iterable contains
      * expected items in any order.
      */
+    @SuppressWarnings("serial")
     static class AssertContainsInAnyOrder<T> extends AssertThatIterable<T> {
       AssertContainsInAnyOrder(T... expected) {
         super(expected,
@@ -204,6 +207,7 @@ static class AssertContainsInAnyOrder<T> extends AssertThatIterable<T> {
      * SerializableFunction that verifies that an Iterable contains
      * expected items in the provided order.
      */
+    @SuppressWarnings("serial")
     static class AssertContainsInOrder<T> extends AssertThatIterable<T> {
       AssertContainsInOrder(T... expected) {
         super(expected,
@@ -230,6 +234,7 @@ public static <T> SingletonAssert<T> thatSingleton(PCollection<T> futureResult)
   /**
    * An assertion about a single value.
    */
+  @SuppressWarnings("serial")
   public static class SingletonAssert<T> implements Serializable {
     private final PCollectionView<T, ?> actualResult;
 
@@ -274,6 +279,7 @@ public SingletonAssert<T> is(T expectedValue) {
      * SerializableFunction that performs an {@code Assert.assertThat()}
      * operation using a {@code Matcher} operation that takes a single element.
      */
+    @SuppressWarnings("serial")
     static class AssertThatValue<T> extends AssertThat<T, T> {
       AssertThatValue(T expected,
                       String matcherClassName,
@@ -287,6 +293,7 @@ static class AssertThatValue<T> extends AssertThat<T, T> {
      * SerializableFunction that verifies that a value is equal to an
      * expected value.
      */
+    @SuppressWarnings("serial")
     public static class AssertIs<T> extends AssertThatValue<T> {
       AssertIs(T expected) {
         super(expected, "org.hamcrest.core.IsEqual", "equalTo");
@@ -308,6 +315,7 @@ private DataflowAssert() {}
    * produce a Matcher to be used to check an {@code Actual} value
    * against.
    */
+  @SuppressWarnings("serial")
   public static class AssertThat<Actual, Expected>
       implements SerializableFunction<Actual, Void> {
     final Expected expected;
@@ -353,6 +361,7 @@ public Void apply(Actual in) {
    * SerializableFunction that performs an {@code Assert.assertThat()}
    * operation using a {@code Matcher} operation that takes a single element.
    */
+  @SuppressWarnings("serial")
   static class AssertThatValue<T> extends AssertThat<T, T> {
     AssertThatValue(T expected,
                     String matcherClassName,
@@ -366,6 +375,7 @@ static class AssertThatValue<T> extends AssertThat<T, T> {
    * SerializableFunction that verifies that a value is equal to an
    * expected value.
    */
+  @SuppressWarnings("serial")
   public static class AssertIs<T> extends AssertThatValue<T> {
     public AssertIs(T expected) {
       super(expected, "org.hamcrest.core.IsEqual", "equalTo");
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
index 9308a010a2a9e..39612397de172 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
@@ -156,6 +156,7 @@ public static <K, V> PerKey<K, V> perKey(double maximumEstimationError) {
    *
    * @param <T> the type of the elements in the input {@code PCollection}
    */
+  @SuppressWarnings("serial")
   static class Globally<T> extends PTransform<PCollection<T>, PCollection<Long>> {
 
     /**
@@ -212,6 +213,7 @@ protected String getKindString() {
    *        {@code PCollection}s
    * @param <V> the type of the values in the input {@code PCollection}
    */
+  @SuppressWarnings("serial")
   static class PerKey<K, V>
       extends PTransform<PCollection<KV<K, V>>, PCollection<KV<K, Long>>> {
 
@@ -281,6 +283,7 @@ protected String getKindString() {
    *
    * @param <T> the type of the values being combined
    */
+  @SuppressWarnings("serial")
   public static class ApproximateUniqueCombineFn<T> extends
       CombineFn<T, ApproximateUniqueCombineFn.LargestUnique, Long> {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 72e475f31002a..e8acdbad5be39 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -235,6 +235,7 @@ public GroupAlsoByWindow(WindowingFn<?, ?> windowingFn) {
     }
 
     @Override
+    @SuppressWarnings("unchecked")
     public PCollection<KV<K, Iterable<V>>> apply(
         PCollection<KV<K, Iterable<WindowedValue<V>>>> input) {
       @SuppressWarnings("unchecked")
@@ -371,6 +372,11 @@ boolean sortsValues() {
   /////////////////////////////////////////////////////////////////////////////
 
   static {
+    registerWithDirectPipelineRunner();
+  }
+
+  @SuppressWarnings("rawtypes")
+  private static <K, V> void registerWithDirectPipelineRunner() {
     DirectPipelineRunner.registerDefaultTransformEvaluator(
         GroupByKeyOnly.class,
         new DirectPipelineRunner.TransformEvaluator<GroupByKeyOnly>() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
index a6444b2774f4c..7e5cc00f62a9f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
@@ -61,6 +61,7 @@
  * @param <T> the type of the elements of the input and output
  * {@code PCollection}s
  */
+@SuppressWarnings("serial")
 public class Partition<T>
     extends PTransform<PCollection<T>, PCollectionList<T>> {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
index 91c591c901900..d381a80627b73 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
@@ -31,6 +31,7 @@
  *   Window.<Integer>by(FixedWindows.of(Duration.standardMinutes(10))));
  * } </pre>
  */
+@SuppressWarnings("serial")
 public class FixedWindows extends PartitioningWindowingFn<Object, IntervalWindow> {
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 62ae4875f9651..56bba7d083feb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -47,6 +47,7 @@
  * @param <V> input value element type
  * @param <W> window type
  */
+@SuppressWarnings("serial")
 public class GroupAlsoByWindowsDoFn<K, V, W extends BoundedWindow>
     extends DoFn<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>> {
   // TODO: Add back RequiresKeyed state once that is supported.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java
index a0bfed1626f92..840261d591492 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java
@@ -29,6 +29,7 @@
  * reached.
  */
 public class UserCodeException extends RuntimeException {
+  private static final long serialVersionUID = 0;
   private static final Logger LOG = LoggerFactory.getLogger(UserCodeException.class);
 
   public UserCodeException(Throwable t) {

From f2cd3ceb8bff483d160d9949935b5b2e51b67f11 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 15 Dec 2014 14:16:57 -0800
Subject: [PATCH 0019/1541] Make checkstyle violations errors. Fix existing
 violations. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=82175161

---
 checkstyle.xml                                | 31 ++++++-----
 .../examples/TopWikipediaSessions.java        |  2 +-
 .../cloud/dataflow/examples/WordCount.java    |  4 +-
 .../cloud/dataflow/examples/TfIdfTest.java    |  4 +-
 .../cloud/dataflow/sdk/coders/AvroCoder.java  |  2 +-
 .../cloud/dataflow/sdk/io/DatastoreIO.java    | 10 ++--
 .../cloud/dataflow/sdk/io/PubsubIO.java       | 21 ++++----
 .../sdk/options/DataflowPipelineOptions.java  |  2 +-
 .../dataflow/sdk/options/GcpOptions.java      |  2 +-
 .../dataflow/sdk/options/GcsOptions.java      |  2 +-
 .../sdk/options/PipelineOptionsFactory.java   | 53 ++++++++++---------
 .../runners/DataflowPipelineTranslator.java   |  8 +--
 .../sdk/runners/worker/CombineValuesFn.java   | 19 ++++---
 .../worker/DataflowWorkProgressUpdater.java   |  2 +-
 .../runners/worker/GroupingShuffleSource.java |  2 +-
 .../dataflow/sdk/testing/DataflowAssert.java  |  2 +-
 .../transforms/windowing/GlobalWindow.java    |  4 +-
 .../cloud/dataflow/sdk/util/GcsUtil.java      | 10 ++--
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  2 +-
 .../worker/BatchingShuffleEntryReader.java    |  2 +-
 .../dataflow/sdk/io/DatastoreIOTest.java      |  2 +-
 .../options/PipelineOptionsFactoryTest.java   |  8 +--
 .../sdk/options/PipelineOptionsTest.java      |  2 +-
 .../options/ProxyInvocationHandlerTest.java   |  8 +--
 .../worker/GroupingShuffleSourceTest.java     | 14 ++---
 .../worker/PartitioningShuffleSourceTest.java | 10 ++--
 .../sdk/runners/worker/ShuffleSinkTest.java   | 16 +++---
 .../worker/UngroupedShuffleSourceTest.java    | 10 ++--
 .../transforms/ApproximateQuantilesTest.java  |  7 ++-
 .../dataflow/sdk/transforms/CombineTest.java  |  4 +-
 .../sdk/transforms/PartitionTest.java         |  8 ++-
 .../dataflow/sdk/transforms/TopTest.java      |  9 ++--
 .../cloud/dataflow/sdk/values/KVTest.java     | 18 +++----
 .../cloud/dataflow/sdk/values/PDoneTest.java  |  9 ++--
 34 files changed, 158 insertions(+), 151 deletions(-)

diff --git a/checkstyle.xml b/checkstyle.xml
index 702558ac34acb..c6c07ef9f1c98 100644
--- a/checkstyle.xml
+++ b/checkstyle.xml
@@ -132,7 +132,7 @@ page at http://checkstyle.sourceforge.net/config.html -->
     <!-- See http://checkstyle.sf.net/config_javadoc.html -->
     <module name="JavadocMethod">
       <property name="scope" value="protected"/>
-      <property name="severity" value="warning"/>
+      <property name="severity" value="error"/>
       <property name="allowMissingJavadoc" value="true"/>
       <property name="allowMissingParamTags" value="true"/>
       <property name="allowMissingReturnTag" value="true"/>
@@ -147,7 +147,10 @@ page at http://checkstyle.sourceforge.net/config.html -->
     </module>
 
     <module name="JavadocStyle">
-      <property name="severity" value="warning"/>
+      <property name="severity" value="error"/>
+      <!-- checkHtml considers {@code PCollection<I>} and {@code DoFn<I, O>}
+          to contains unclosed tags. -->
+      <property name="checkHtml" value="false"/>
     </module>
 
     <!--
@@ -172,7 +175,7 @@ page at http://checkstyle.sourceforge.net/config.html -->
       <!-- Validates static, final fields against the
       expression "^[A-Z][a-zA-Z0-9]*$". -->
       <metadata name="altname" value="TypeName"/>
-      <property name="severity" value="warning"/>
+      <property name="severity" value="error"/>
     </module>
 
     <module name="ConstantNameCheck">
@@ -186,7 +189,7 @@ page at http://checkstyle.sourceforge.net/config.html -->
       <property name="format" value="^([A-Z][A-Z0-9]*(_[A-Z0-9]+)*|FLAG_.*)$"/>
       <message key="name.invalidPattern"
                value="Variable ''{0}'' should be in ALL_CAPS (if it is a constant) or be private (otherwise)."/>
-      <property name="severity" value="warning"/>
+      <property name="severity" value="error"/>
     </module>
 
     <module name="StaticVariableNameCheck">
@@ -198,7 +201,7 @@ page at http://checkstyle.sourceforge.net/config.html -->
       <property name="applyToPackage" value="true"/>
       <property name="applyToPrivate" value="true"/>
       <property name="format" value="^[a-z][a-zA-Z0-9]*_?$"/>
-      <property name="severity" value="warning"/>
+      <property name="severity" value="error"/>
     </module>
 
     <module name="MemberNameCheck">
@@ -209,32 +212,32 @@ page at http://checkstyle.sourceforge.net/config.html -->
       <property name="applyToPackage" value="true"/>
       <property name="applyToPrivate" value="true"/>
       <property name="format" value="^[a-z][a-zA-Z0-9]*$"/>
-      <property name="severity" value="warning"/>
+      <property name="severity" value="error"/>
     </module>
 
     <module name="MethodNameCheck">
       <!-- Validates identifiers for method names. -->
       <metadata name="altname" value="MethodName"/>
       <property name="format" value="^[a-z][a-zA-Z0-9]*(_[a-zA-Z0-9]+)*$"/>
-      <property name="severity" value="warning"/>
+      <property name="severity" value="error"/>
     </module>
 
     <module name="ParameterName">
       <!-- Validates identifiers for method parameters against the
         expression "^[a-z][a-zA-Z0-9]*$". -->
-      <property name="severity" value="warning"/>
+      <property name="severity" value="error"/>
     </module>
 
     <module name="LocalFinalVariableName">
       <!-- Validates identifiers for local final variables against the
         expression "^[a-z][a-zA-Z0-9]*$". -->
-      <property name="severity" value="warning"/>
+      <property name="severity" value="error"/>
     </module>
 
     <module name="LocalVariableName">
       <!-- Validates identifiers for local variables against the
         expression "^[a-z][a-zA-Z0-9]*$". -->
-      <property name="severity" value="warning"/>
+      <property name="severity" value="error"/>
     </module>
 
 
@@ -262,7 +265,7 @@ page at http://checkstyle.sourceforge.net/config.html -->
 
     <module name="LeftCurly">
       <!-- Checks for placement of the left curly brace ('{'). -->
-      <property name="severity" value="warning"/>
+      <property name="severity" value="error"/>
     </module>
 
     <module name="RightCurly">
@@ -283,12 +286,12 @@ page at http://checkstyle.sourceforge.net/config.html -->
       </pre>
       -->
       <property name="option" value="same"/>
-      <property name="severity" value="warning"/>
+      <property name="severity" value="error"/>
     </module>
 
     <!-- Checks for braces around if and else blocks -->
     <module name="NeedBraces">
-      <property name="severity" value="warning"/>
+      <property name="severity" value="error"/>
       <property name="tokens" value="LITERAL_IF, LITERAL_ELSE, LITERAL_FOR, LITERAL_WHILE, LITERAL_DO"/>
     </module>
 
@@ -376,7 +379,7 @@ page at http://checkstyle.sourceforge.net/config.html -->
       <!-- Checks that there is no whitespace before close parens or after
            open parens.
       -->
-      <property name="severity" value="warning"/>
+      <property name="severity" value="error"/>
     </module>
 
   </module>
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
index a64e2fc98f49a..f58871be308d2 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
@@ -73,7 +73,7 @@ public class TopWikipediaSessions {
   private static final String EXPORTED_WIKI_TABLE = "gs://dataflow-samples/wikipedia_edits/*.json";
 
   /**
-   * Extracts user and timestamp from a TableRow representing a Wikipedia edit
+   * Extracts user and timestamp from a TableRow representing a Wikipedia edit.
    */
   static class ExtractUserAndTimestamp extends DoFn<TableRow, String> {
     @Override
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
index 948b83e032fab..6b1623005785d 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
@@ -138,7 +138,9 @@ public static interface Options extends PipelineOptions {
     String getOutput();
     void setOutput(String value);
 
-    /** Returns gs://${STAGING_LOCATION}/"counts.txt" */
+    /**
+     * Returns gs://${STAGING_LOCATION}/"counts.txt" as the default destination.
+     */
     public static class OutputFactory implements DefaultValueFactory<String> {
       @Override
       public String create(PipelineOptions options) {
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java
index 341fd80c25b2b..990458f5e059c 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java
@@ -34,12 +34,12 @@
 import java.util.Arrays;
 
 /**
- * Tests of TfIdf
+ * Tests of {@link TfIdf}.
  */
 @RunWith(JUnit4.class)
 public class TfIdfTest {
 
-  /** Test that the example runs */
+  /** Test that the example runs. */
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testTfIdf() throws Exception {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
index ca1f3c856e6fa..db0000cd25b60 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -58,7 +58,7 @@
  * null unless annotated by
  * <a href="http://avro.apache.org/docs/current/api/java/org/apache/avro/reflect/Nullable.html">
  * org.apache.avro.reflect.Nullable</a> or a
- * <http://avro.apache.org/docs/current/api/java/org/apache/avro/reflect/Union.html>
+ * <a href="http://avro.apache.org/docs/current/api/java/org/apache/avro/reflect/Union.html">
  * org.apache.avro.reflect.Union</a> containing null.
  * <p>
  * To use, specify the {@code Coder} type on a PCollection:
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index e7b05bc8a520b..1b66b868c97ed 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -93,7 +93,7 @@
  * <pre> {@code
  * // Read a query from Datastore
  * PipelineOptions options =
- *     CliPipelineOptionsFactory.create(PipelineOptions.class, args);
+ *     PipelineOptionsFactory.fromArgs(args).create();
  * Pipeline p = Pipeline.create(options);
  * PCollection<Entity> entities =
  *     p.apply(DatastoreIO.Read
@@ -233,12 +233,12 @@ public PCollection<Entity> apply(PBegin input) {
 
   ///////////////////// Write Class /////////////////////////////////
   /**
-   * A PTransform that writes a {@code PCollection<Entity>} containing
+   * A {@link PTransform} that writes a {@code PCollection<Entity>} containing
    * entities to a Datastore kind.
    *
-   * Current version only supports Write operation running on
-   * DirectPipelineRunner.  If Write is used on DataflowPipelineRunner,
-   * it throws UnsupportedOperationException and won't continue on the
+   * <p> Current version only supports Write operation running on
+   * {@link DirectPipelineRunner}.  If Write is used on {@link DataflowPipelineRunner},
+   * it throws {@link UnsupportedOperationException} and won't continue on the
    * operation.
    *
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 5d87f2563fa52..f5b57f82fb5dc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -140,9 +140,11 @@ public static Bound named(String name) {
     /**
      * Creates and returns a PubsubIO.Read PTransform for reading from
      * a Pubsub topic with the specified publisher topic. Format for
-     * Cloud Pubsub topic names should be of the form /topics/<project>/<topic>,
-     * where <project> is the name of the publishing project.
-     * The <topic> component must comply with the below requirements.
+     * Cloud Pubsub topic names should be of the form
+     * {@code /topics/<project>/<topic>}, where {@code <project>} is the name of
+     * the publishing project. The {@code <topic>} component must comply with
+     * the below requirements.
+     *
      * <ul>
      * <li>Can only contain lowercase letters, numbers, dashes ('-'), underscores ('_') and periods
      * ('.').</li>
@@ -161,9 +163,10 @@ public static Bound topic(String topic) {
      * a specific Pubsub subscription. Mutually exclusive with
      * PubsubIO.Read.topic().
      * Cloud Pubsub subscription names should be of the form
-     * /subscriptions/<project>/<<subscription>,
-     * where <project> is the name of the project the subscription belongs to.
-     * The <subscription> component must comply with the below requirements.
+     * {@code /subscriptions/<project>/<<subscription>},
+     * where {@code <project>} is the name of the project the subscription belongs to.
+     * The {@code <subscription>} component must comply with the below requirements.
+     *
      * <ul>
      * <li>Can only contain lowercase letters, numbers, dashes ('-'), underscores ('_') and periods
      * ('.').</li>
@@ -186,7 +189,7 @@ public static class Bound
         extends PTransform<PInput, PCollection<String>> {
       /** The Pubsub topic to read from. */
       String topic;
-      /** The Pubsub subscription to read from */
+      /** The Pubsub subscription to read from. */
       String subscription;
 
       Bound() {}
@@ -268,8 +271,8 @@ public static Bound named(String name) {
     }
 
     /** The topic to publish to.
-     * Cloud Pubsub topic names should be /topics/<project>/<topic>,
-     * where <project> is the name of the publishing project.
+     * Cloud Pubsub topic names should be {@code /topics/<project>/<topic>},
+     * where {@code <project>} is the name of the publishing project.
      */
     public static Bound topic(String topic) {
       return new Bound().topic(topic);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
index 686d72e20c2e6..d30f7dc4d96b3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
@@ -112,7 +112,7 @@ public String create(PipelineOptions options) {
     }
   }
 
-  /** Alternative Dataflow client */
+  /** Alternative Dataflow client. */
   @JsonIgnore
   @Default.InstanceFactory(DataflowClientFactory.class)
   Dataflow getDataflowClient();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
index 34faf03acfb66..959a4fc77f3fb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
@@ -126,7 +126,7 @@ public String create(PipelineOptions options) {
   String getCredentialId();
   void setCredentialId(String value);
 
-  /** Alternative Google Cloud Platform Credential */
+  /** Alternative Google Cloud Platform Credential. */
   @JsonIgnore
   @Description("Google Cloud Platform user credentials.")
   @Default.InstanceFactory(GcpUserCredentialsFactory.class)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
index 20685b7cf383f..39614242f7f7c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
@@ -33,7 +33,7 @@
  */
 public interface GcsOptions extends
     ApplicationNameOptions, GcpOptions, PipelineOptions {
-  /** Alternative GcsUtil instance */
+  /** Alternative GcsUtil instance. */
   @JsonIgnore
   @Default.InstanceFactory(GcsUtil.GcsUtilFactory.class)
   GcsUtil getGcsUtil();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index dae5ca14340f0..176ff8c96e509 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -100,14 +100,14 @@ public static PipelineOptions create() {
   }
 
   /**
-   * Creates and returns an object which implements @{code <T>}.
+   * Creates and returns an object which implements {@code <T>}.
    * This sets the {@link ApplicationNameOptions#getAppName() "appName"} to the calling
    * {@link Class#getSimpleName() classes simple name}.
-   * <p>
-   * Note that @{code <T>} must be composable with every registered interface with this factory.
+   *
+   * <p> Note that {@code <T>} must be composable with every registered interface with this factory.
    * See {@link PipelineOptionsFactory#validateWellFormed(Class, Set)} for more details.
    *
-   * @return An object which implements @{code <T>}.
+   * @return An object which implements {@code <T>}.
    */
   public static <T extends PipelineOptions> T as(Class<T> klass) {
     return new Builder(getAppName(3)).as(klass);
@@ -124,13 +124,13 @@ public static <T extends PipelineOptions> T as(Class<T> klass) {
    *   --x=1 --x=2 --x=3 (list style property, will set the "x" property to [1, 2, 3])
    *   --x=1,2,3 (shorthand list style property, will set the "x" property to [1, 2, 3])
    * </pre>
-   * Properties are able to bound to {@link String} and Java primitives @{code boolean},
-   * @{code byte}, @{code short}, @{code int}, @{code long}, @{code float}, @{code double} and
+   * Properties are able to bound to {@link String} and Java primitives {@code boolean},
+   * {@code byte}, {@code short}, {@code int}, {@code long}, {@code float}, {@code double} and
    * their primitive wrapper classes.
    * <p>
-   * List style properties are able to be bound to @{code boolean[]}, @{code char[]},
-   * @{code short[]}, @{code int[]}, @{code long[]}, @{code float[]}, @{code double[]},
-   * @{code String[]} and @{code List<String>}.
+   * List style properties are able to be bound to {@code boolean[]}, {@code char[]},
+   * {@code short[]}, {@code int[]}, {@code long[]}, {@code float[]}, {@code double[]},
+   * {@code String[]} and {@code List<String>}.
    */
   public static Builder fromArgs(String[] args) {
     return new Builder(getAppName(3)).fromArgs(args);
@@ -174,13 +174,13 @@ private Builder(String defaultAppName, String[] args, boolean validation) {
      *   --x=1 --x=2 --x=3 (list style property, will set the "x" property to [1, 2, 3])
      *   --x=1,2,3 (shorthand list style property, will set the "x" property to [1, 2, 3])
      * </pre>
-     * Properties are able to bound to {@link String} and Java primitives @{code boolean},
-     * @{code byte}, @{code short}, @{code int}, @{code long}, @{code float}, @{code double} and
+     * Properties are able to bound to {@link String} and Java primitives {@code boolean},
+     * {@code byte}, {@code short}, {@code int}, {@code long}, {@code float}, {@code double} and
      * their primitive wrapper classes.
      * <p>
-     * List style properties are able to be bound to @{code boolean[]}, @{code char[]},
-     * @{code short[]}, @{code int[]}, @{code long[]}, @{code float[]}, @{code double[]},
-     * @{code String[]} and @{code List<String>}.
+     * List style properties are able to be bound to {@code boolean[]}, {@code char[]},
+     * {@code short[]}, {@code int[]}, {@code long[]}, {@code float[]}, {@code double[]},
+     * {@code String[]} and {@code List<String>}.
      */
     public Builder fromArgs(String[] args) {
       Preconditions.checkNotNull(args, "Arguments should not be null.");
@@ -208,13 +208,13 @@ public PipelineOptions create() {
     }
 
     /**
-     * Creates and returns an object which implements @{code <T>} using the values configured on
+     * Creates and returns an object which implements {@code <T>} using the values configured on
      * this builder during construction.
      * <p>
      * Note that {@code <T>} must be composable with every registered interface with this factory.
      * See {@link PipelineOptionsFactory#validateWellFormed(Class, Set)} for more details.
      *
-     * @return An object which implements @{code <T>}.
+     * @return An object which implements {@code <T>}.
      */
     public <T extends PipelineOptions> T as(Class<T> klass) {
       Map<String, Object> initialOptions = Maps.newHashMap();
@@ -296,7 +296,7 @@ Class<T> getProxyClass() {
                BlockingDataflowPipelineRunner.class)
           .build();
 
-  /** Methods which are ignored when validating the proxy class */
+  /** Methods which are ignored when validating the proxy class. */
   private static final Set<Method> IGNORED_METHODS;
 
   /** The set of options which have been registered and visible to the user. */
@@ -779,8 +779,9 @@ public boolean apply(Method input) {
 
   /**
    * Splits string arguments based upon expected pattern of --argName=value.
-   * <p>
-   * Example GNU style command line arguments:
+   *
+   * <p> Example GNU style command line arguments:
+   *
    * <pre>
    *   --project=MyProject (simple property, will set the "project" property to "MyProject")
    *   --readOnly=true (for boolean properties, will set the "readOnly" property to "true")
@@ -788,12 +789,14 @@ public boolean apply(Method input) {
    *   --x=1 --x=2 --x=3 (list style property, will set the "x" property to [1, 2, 3])
    *   --x=1,2,3 (shorthand list style property, will set the "x" property to [1, 2, 3])
    * </pre>
-   * Properties are able to bound to {@link String} and Java primitives boolean, byte,
-   * short, int, long, float, double and their primitive wrapper classes.
-   * <p>
-   * List style properties are able to be bound to boolean[], char[], short[],
-   * int[], long[], float[], double[], String[] and List<String>.
-   * <p>
+   *
+   * <p> Properties are able to bound to {@link String} and Java primitives {@code boolean},
+   * {@code byte}, {@code short}, {@code int}, {@code long}, {@code float}, {@code double}
+   * and their primitive wrapper classes.
+   *
+   * <p> List style properties are able to be bound to {@code boolean[]}, {@code char[]},
+   * {@code short[]}, {@code int[]}, {@code long[]}, {@code float[]}, {@code double[]},
+   * {@code String[]}, and {@code List<String>}.
    */
   private static ListMultimap<String, String> parseCommandLine(String[] args) {
     ImmutableListMultimap.Builder<String, String> builder = ImmutableListMultimap.builder();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 823b9d44a1f6e..52305ccd54966 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -134,7 +134,7 @@ private DataflowPipelineTranslator(DataflowPipelineOptions options) {
   }
 
   /**
-   * Translates a Pipeline into a Job
+   * Translates a {@link Pipeline} into a {@code Job}.
    */
   public Job translate(Pipeline pipeline, List<DataflowPackage> packages) {
     Translator translator = new Translator(pipeline);
@@ -174,7 +174,7 @@ TransformTranslator<PT> getTransformTranslator(Class<PT> transformClass) {
   }
 
   /**
-   * An translator of a PTransform.
+   * A translator of a {@link PTransform}.
    */
   public interface TransformTranslator<PT extends PTransform> {
     public void translate(PT transform,
@@ -183,8 +183,8 @@ public void translate(PT transform,
 
   /**
    * The interface provided to registered callbacks for interacting
-   * with the DataflowPipelineRunner, including reading and writing the
-   * values of PCollections and side inputs ({@link PCollectionViews}).
+   * with the {@link DataflowPipelineRunner}, including reading and writing the
+   * values of {@link PCollection}s and side inputs ({@link PCollectionView}s).
    */
   public interface TranslationContext {
     /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
index 62a371d05271f..761aaa950dba4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
@@ -49,11 +49,10 @@ public class CombineValuesFn extends NormalParDoFn {
    * phases (ADD, MERGE, and EXTRACT), on separate VMs, as it sees
    * fit. The CombinerPhase dictates which DoFn is actually running in
    * the worker.
-   *
-   * TODO: These strings are part of the service definition, and
-   * should be added into the definition of the ParDoInstruction,
-   * but the protiary definitions don't allow for enums yet.
    */
+   // TODO: These strings are part of the service definition, and
+   // should be added into the definition of the ParDoInstruction,
+   // but the protiary definitions don't allow for enums yet.
   public static class CombinePhase {
     public static final String ALL = "all";
     public static final String ADD = "add";
@@ -149,8 +148,8 @@ public void processElement(ProcessContext c) {
     }
   }
 
-  /**
-   * ADD phase: KV<K, Iterable<VI>> -> KV<K, VA>
+  /*
+   * ADD phase: KV<K, Iterable<VI>> -> KV<K, VA>.
    */
   private static class AddInputsDoFn<K, VI, VA>
       extends DoFn<KV<K, Iterable<VI>>, KV<K, VA>>{
@@ -174,8 +173,8 @@ public void processElement(ProcessContext c) {
     }
   }
 
-  /**
-   * MERGE phase: KV<K, Iterable<VA>> -> KV<K, VA>
+  /*
+   * MERGE phase: KV<K, Iterable<VA>> -> KV<K, VA>.
    */
   private static class MergeAccumulatorsDoFn<K, VA>
       extends DoFn<KV<K, Iterable<VA>>, KV<K, VA>>{
@@ -196,8 +195,8 @@ public void processElement(ProcessContext c) {
     }
   }
 
-  /**
-   * EXTRACT phase: KV<K, Iterable<VA>> -> KV<K, VA>
+  /*
+   * EXTRACT phase: KV<K, Iterable<VA>> -> KV<K, VA>.
    */
   private static class ExtractOutputDoFn<K, VA, VO>
       extends DoFn<KV<K, VA>, KV<K, VO>>{
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
index f2d41cfcbc45d..f2569b1488f6e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
@@ -45,7 +45,7 @@
 public class DataflowWorkProgressUpdater extends WorkProgressUpdater {
   private static final Logger LOG = LoggerFactory.getLogger(DataflowWorkProgressUpdater.class);
 
-  /** The Dataflow Worker WorkItem client */
+  /** The Dataflow Worker WorkItem client. */
   private final DataflowWorker.WorkUnitClient workUnitClient;
 
   /** The WorkItem for which work progress updates are sent. */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSource.java
index 2d168879a21b7..ac2bd5c3a78fe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSource.java
@@ -158,7 +158,7 @@ private final class GroupingShuffleSourceIterator
      */
     private ByteArrayShufflePosition stopPosition = null;
 
-    /** The next group to be consumed, if available */
+    /** The next group to be consumed, if available. */
     private KeyGroupedShuffleEntries nextGroup = null;
 
     public GroupingShuffleSourceIterator(ShuffleEntryReader reader) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index d0ce9ff62b3f1..3debc9ff4f034 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -96,7 +96,7 @@ public static <T> IterableAssert<T> thatIterable(
   }
 
   /**
-   * An assertion about the contents of a {@link PCollectionView<<Iterable<T>, ?>}
+   * An assertion about the contents of a {@link PCollectionView<<Iterable<T>, ?>}.
    */
   @SuppressWarnings("serial")
   public static class IterableAssert<T> implements Serializable {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
index 5d0773d598a52..507b1cc860783 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
@@ -51,7 +51,7 @@ public Coder<Window> windowCoder() {
    * The default window into which all data is placed.
    */
   public static class Window extends BoundedWindow {
-    public static Window INSTANCE = new Window();
+    public static final Window INSTANCE = new Window();
 
     @Override
     public Instant maxTimestamp() {
@@ -64,7 +64,7 @@ private Window() {}
      * {@link Coder} for encoding and decoding {@code Window}s.
      */
     public static class Coder extends AtomicCoder<Window> {
-      public static Coder INSTANCE = new Coder();
+      public static final Coder INSTANCE = new Coder();
 
       @Override
       public void encode(Window window, OutputStream outStream, Context context) {}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
index cc429955f5662..bcb387f29a6ed 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
@@ -42,8 +42,6 @@
 
 /**
  * Provides operations on GCS.
- *
- * TODO: re-implement as a FileSystemProvider?
  */
 public class GcsUtil {
   /**
@@ -92,7 +90,7 @@ public GcsUtil create(PipelineOptions options) {
 
   /////////////////////////////////////////////////////////////////////////////
 
-  /** Client for the GCS API */
+  /** Client for the GCS API. */
   private final Storage storage;
 
   // Helper delegate for turning IOExceptions from API calls into higher-level semantics.
@@ -110,7 +108,7 @@ private GcsUtil(Storage storageClient, ExecutorService executorService) {
    * Expands a pattern into matched paths. The input path may contain
    * globs (in the last component only!), which are expanded in the result.
    *
-   * TODO: add support for full path matching.
+   * <p> TODO: add support for full path matching.
    */
   public List<GcsPath> expand(GcsPath path) throws IOException {
     if (!GCS_READ_PATTERN.matcher(path.getObject()).matches()) {
@@ -187,7 +185,7 @@ public long fileSize(GcsPath path) throws IOException {
   /**
    * Opens an object in GCS.
    *
-   * Returns a SeekableByteChannel which provides access to data in the bucket.
+   * <p> Returns a SeekableByteChannel which provides access to data in the bucket.
    *
    * @param path the GCS filename to read from
    * @return a SeekableByteChannel which can read the object data
@@ -202,7 +200,7 @@ public SeekableByteChannel open(GcsPath path)
   /**
    * Creates an object in GCS.
    *
-   * Returns a WritableByteChannel which can be used to write data to the
+   * <p> Returns a WritableByteChannel which can be used to write data to the
    * object.
    *
    * @param path the GCS file to write to
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 56bba7d083feb..f4bd28039686f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -344,7 +344,7 @@ public boolean hasMoreWindows() {
     }
 
     /**
-     * Returns the timestamp of the next window
+     * Returns the timestamp of the next window.
      */
     public Instant nextTimestamp() {
       return windows.peek().maxTimestamp();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReader.java
index 2a596c0d86f86..47cfa3646dec6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReader.java
@@ -36,7 +36,7 @@ public final class BatchingShuffleEntryReader implements ShuffleEntryReader {
   private final ShuffleBatchReader batchReader;
 
   /**
-   * Constructs a {@link BatchingShuffleEntryReader}
+   * Constructs a {@link BatchingShuffleEntryReader}.
    *
    * @param batchReader supplies the underlying
    * {@link ShuffleBatchReader} to read batches of entries from
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
index e026c58102dae..fba8ec20b3fd0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
@@ -67,7 +67,7 @@ public void setUp() {
   }
 
   /**
-   * Test for reading one entity from kind "food"
+   * Test for reading one entity from kind "food".
    */
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index ca1e9502bf97d..206a13f70d171 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -235,7 +235,7 @@ public void testPropertyIsSetOnRegisteredPipelineOptionNotPartOfOriginalInterfac
     assertEquals("testProject", options.as(GcpOptions.class).getProject());
   }
 
-  /** A test interface containing all the primitives */
+  /** A test interface containing all the primitives. */
   public static interface Primitives extends PipelineOptions {
     boolean getBoolean();
     void setBoolean(boolean value);
@@ -286,7 +286,7 @@ public void testBooleanShorthandArgument() {
     assertTrue(options.getBoolean());
   }
 
-  /** A test interface containing all supported objects */
+  /** A test interface containing all supported objects. */
   public static interface Objects extends PipelineOptions {
     Boolean getBoolean();
     void setBoolean(Boolean value);
@@ -345,7 +345,7 @@ public void testMissingArgument() {
     assertNull(options.getString());
   }
 
-  /** A test interface containing all supported array return types */
+  /** A test interface containing all supported array return types. */
   public static interface Arrays extends PipelineOptions {
     boolean[] getBoolean();
     void setBoolean(boolean[] value);
@@ -429,7 +429,7 @@ public void testOutOfOrderArrays() {
     assertArrayEquals(new char[] {'d', 'e', 'f'}, options.getChar());
   }
 
-  /** A test interface containing all supported List return types */
+  /** A test interface containing all supported List return types. */
   public static interface Lists extends PipelineOptions {
     List<String> getString();
     void setString(List<String> value);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java
index 9db6a6b754221..1a11039504a6d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java
@@ -28,7 +28,7 @@
 /** Unit tests for {@link PipelineOptions}. */
 @RunWith(JUnit4.class)
 public class PipelineOptionsTest {
-  /** Interface used for testing that {@link PipelineOptions#as(Class)} functions */
+  /** Interface used for testing that {@link PipelineOptions#as(Class)} functions. */
   public static interface TestOptions extends PipelineOptions {
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
index d33d42e4bf165..0b2dc0a9adfba 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
@@ -387,7 +387,7 @@ public void testJsonConversionForDefault() throws Exception {
     assertNotNull(serializeDeserialize(PipelineOptions.class, options));
   }
 
-  /** Test interface for JSON conversion of simple types */
+  /** Test interface for JSON conversion of simple types. */
   private static interface SimpleTypes extends PipelineOptions {
     int getInteger();
     void setInteger(int value);
@@ -440,7 +440,7 @@ public void testJsonConversionForOverriddenSerializedValues() throws Exception {
     assertEquals("TestValue", options3.getString());
   }
 
-  /** Test interface for JSON conversion of container types */
+  /** Test interface for JSON conversion of container types. */
   private static interface ContainerTypes extends PipelineOptions {
     List<String> getList();
     void setList(List<String> values);
@@ -465,7 +465,7 @@ public void testJsonConversionForContainerTypes() throws Exception {
     assertEquals(set, options2.getSet());
   }
 
-  /** Test interface for conversion of inner types */
+  /** Test interface for conversion of inner types. */
   private static class InnerType {
     public double doubleField;
 
@@ -483,7 +483,7 @@ && getClass().equals(obj.getClass())
     }
   }
 
-  /** Test interface for conversion of generics and inner types */
+  /** Test interface for conversion of generics and inner types. */
   private static class ComplexType {
     public String stringField;
     public Integer intField;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceTest.java
index b41bd1b2e2914..eacb1ef7661b5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceTest.java
@@ -64,12 +64,12 @@
  */
 @RunWith(JUnit4.class)
 public class GroupingShuffleSourceTest {
-  static final List<KV<Integer, List<String>>> NO_KVS = Collections.emptyList();
+  private static final List<KV<Integer, List<String>>> NO_KVS = Collections.emptyList();
 
-  static final Instant timestamp = new Instant(123000);
-  static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
+  private static final Instant timestamp = new Instant(123000);
+  private static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
 
-  static final List<KV<Integer, List<String>>> KVS = Arrays.asList(
+  private static final List<KV<Integer, List<String>>> KVS = Arrays.asList(
       KV.of(1, Arrays.asList("in 1a", "in 1b")),
       KV.of(2, Arrays.asList("in 2a", "in 2b")),
       KV.of(3, Arrays.asList("in 3")),
@@ -77,7 +77,7 @@ public class GroupingShuffleSourceTest {
       KV.of(5, Arrays.asList("in 5")));
 
   /** How many of the values with each key are to be read. */
-  enum ValuesToRead {
+  private enum ValuesToRead {
     /** Don't even ask for the values iterator. */
     SKIP_VALUES,
     /** Get the iterator, but don't read any values. */
@@ -88,7 +88,7 @@ enum ValuesToRead {
     READ_ALL_VALUES
   }
 
-  void runTestReadShuffleSource(List<KV<Integer, List<String>>> input,
+  private void runTestReadShuffleSource(List<KV<Integer, List<String>>> input,
                                 ValuesToRead valuesToRead)
       throws Exception {
     Coder<WindowedValue<String>> elemCoder =
@@ -319,7 +319,7 @@ public void testReadFromShuffleSourceAndFailToUpdateStopPosition()
 
       int i = 0;
       for (; iter.hasNext(); ++i) {
-        KV<Integer, Reiterable<Integer>> elem = iter.next().getValue();
+        iter.next().getValue(); // ignored
         if (i == 0) {
           // First record
           byte[] key = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSourceTest.java
index be8c972c5944a..2b88456011dc4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSourceTest.java
@@ -47,12 +47,12 @@
  */
 @RunWith(JUnit4.class)
 public class PartitioningShuffleSourceTest {
-  static final List<WindowedValue<KV<Integer, String>>> NO_KVS = Collections.emptyList();
+  private static final List<WindowedValue<KV<Integer, String>>> NO_KVS = Collections.emptyList();
 
-  static final Instant timestamp = new Instant(123000);
-  static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
+  private static final Instant timestamp = new Instant(123000);
+  private static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
 
-  static final List<WindowedValue<KV<Integer, String>>> KVS = Arrays.asList(
+  private static final List<WindowedValue<KV<Integer, String>>> KVS = Arrays.asList(
       WindowedValue.of(KV.of(1, "in 1a"), timestamp, Lists.newArrayList(window)),
       WindowedValue.of(KV.of(1, "in 1b"), timestamp, Lists.newArrayList(window)),
       WindowedValue.of(KV.of(2, "in 2a"), timestamp, Lists.newArrayList(window)),
@@ -64,7 +64,7 @@ public class PartitioningShuffleSourceTest {
       WindowedValue.of(KV.of(4, "in 4d"), timestamp, Lists.newArrayList(window)),
       WindowedValue.of(KV.of(5, "in 5"), timestamp, Lists.newArrayList(window)));
 
-  void runTestReadShuffleSource(List<WindowedValue<KV<Integer, String>>> expected)
+  private void runTestReadShuffleSource(List<WindowedValue<KV<Integer, String>>> expected)
       throws Exception {
     Coder<WindowedValue<KV<Integer, String>>> elemCoder = WindowedValue.getFullCoder(
             KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
index 3e390b8966afa..b2aa533ce53f2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
@@ -45,13 +45,13 @@
 import java.util.List;
 
 /**
- * Tests for ShuffleSink.
+ * Tests for {@link ShuffleSink}.
  */
 @RunWith(JUnit4.class)
 public class ShuffleSinkTest {
-  static final List<KV<Integer, String>> NO_KVS = Collections.emptyList();
+  private static final List<KV<Integer, String>> NO_KVS = Collections.emptyList();
 
-  static final List<KV<Integer, String>> KVS = Arrays.asList(
+  private static final List<KV<Integer, String>> KVS = Arrays.asList(
       KV.of(1, "in 1a"),
       KV.of(1, "in 1b"),
       KV.of(2, "in 2a"),
@@ -63,10 +63,10 @@ public class ShuffleSinkTest {
       KV.of(4, "in 4d"),
       KV.of(5, "in 5"));
 
-  static final List<KV<Integer, KV<String, Integer>>> NO_SORTING_KVS =
+  private static final List<KV<Integer, KV<String, Integer>>> NO_SORTING_KVS =
       Collections.emptyList();
 
-  static final List<KV<Integer, KV<String, Integer>>> SORTING_KVS =
+  private static final List<KV<Integer, KV<String, Integer>>> SORTING_KVS =
       Arrays.asList(
           KV.of(1, KV.of("in 1a", 3)),
           KV.of(1, KV.of("in 1b", 9)),
@@ -79,10 +79,10 @@ public class ShuffleSinkTest {
           KV.of(4, KV.of("in 4d", 1)),
           KV.of(5, KV.of("in 5", 666)));
 
-  static final Instant timestamp = new Instant(123000);
-  static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
+  private static final Instant timestamp = new Instant(123000);
+  private static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
 
-  void runTestWriteUngroupingShuffleSink(List<Integer> expected)
+  private void runTestWriteUngroupingShuffleSink(List<Integer> expected)
       throws Exception {
     Coder<WindowedValue<Integer>> windowedValueCoder =
         WindowedValue.getFullCoder(BigEndianIntegerCoder.of(), new GlobalWindow().windowCoder());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceTest.java
index 3a360d8d24add..bf9a15c05b343 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceTest.java
@@ -43,20 +43,20 @@
  */
 @RunWith(JUnit4.class)
 public class UngroupedShuffleSourceTest {
-  static final Instant timestamp = new Instant(123000);
-  static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
+  private static final Instant timestamp = new Instant(123000);
+  private static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
 
-  byte[] asShuffleKey(long seqNum) throws Exception {
+  private byte[] asShuffleKey(long seqNum) throws Exception {
     return CoderUtils.encodeToByteArray(BigEndianLongCoder.of(), seqNum);
   }
 
-  byte[] asShuffleValue(Integer value) throws Exception {
+  private byte[] asShuffleValue(Integer value) throws Exception {
     return CoderUtils.encodeToByteArray(
         WindowedValue.getFullCoder(BigEndianIntegerCoder.of(), IntervalWindow.getCoder()),
         WindowedValue.of(value, timestamp, Lists.newArrayList(window)));
   }
 
-  void runTestReadShuffleSource(List<Integer> expected) throws Exception {
+  private void runTestReadShuffleSource(List<Integer> expected) throws Exception {
     UngroupedShuffleSource<WindowedValue<Integer>> shuffleSource =
         new UngroupedShuffleSource<>(
             PipelineOptionsFactory.create(),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
index 406fb0730d023..808ef4a23b6a8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
@@ -25,7 +25,6 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.ApproximateQuantiles.ApproximateQuantilesCombineFn;
@@ -45,7 +44,7 @@
 import java.util.List;
 
 /**
- * Tests for ApproximateQuantiles
+ * Tests for {@link ApproximateQuantiles}.
  */
 @RunWith(JUnit4.class)
 @SuppressWarnings("serial")
@@ -74,7 +73,7 @@ public void testQuantilesGlobally() {
     PCollection<List<Integer>> quantiles =
         input.apply(ApproximateQuantiles.<Integer>globally(5));
 
-    DirectPipelineRunner.EvaluationResults results = p.run();
+    p.run();
 
     DataflowAssert.that(quantiles)
         .containsInAnyOrder(Arrays.asList(0, 25, 50, 75, 100));
@@ -89,7 +88,7 @@ public void testQuantilesGobally_comparable() {
         input.apply(
             ApproximateQuantiles.globally(5, new DescendingIntComparator()));
 
-    DirectPipelineRunner.EvaluationResults results = p.run();
+    p.run();
 
     DataflowAssert.that(quantiles)
         .containsInAnyOrder(Arrays.asList(100, 75, 50, 25, 0));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index c0d8e21b15e67..baf8a24756cc7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -406,7 +406,7 @@ public Coder<CountSum> getAccumulatorCoder(
     }
 
     /**
-     * A Coder for CountSum
+     * A {@link Coder} for {@link CountSum}.
      */
     public class CountSumCoder extends CustomCoder<CountSum> {
       @Override
@@ -450,7 +450,7 @@ public static class TestCounter extends
       Combine.AccumulatingCombineFn<
           Integer, TestCounter.Counter, Iterable<Long>> {
 
-    /** An accumulator that observes its merges and outputs */
+    /** An accumulator that observes its merges and outputs. */
     public class Counter implements
         Combine.AccumulatingCombineFn.Accumulator<Integer, Counter, Iterable<Long>>,
         Serializable {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
index c129b8e6b912e..a4de8c80b7fb1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
@@ -39,7 +39,7 @@
 import java.util.List;
 
 /**
- * Tests for Partition
+ * Tests for {@link Partition}.
  */
 @RunWith(JUnit4.class)
 @SuppressWarnings("serial")
@@ -93,8 +93,7 @@ public void testOutOfBoundsPartitions() {
 
     PCollection<Integer> input = createInts(p, Arrays.asList(-1));
 
-    PCollectionList<Integer> outputs =
-        input.apply(Partition.of(5, new IdentityFn()));
+    input.apply(Partition.of(5, new IdentityFn()));
 
     try {
       p.run();
@@ -111,8 +110,7 @@ public void testZeroNumPartitions() {
     PCollection<Integer> input = createInts(p, Arrays.asList(591));
 
     try {
-      PCollectionList<Integer> outputs =
-          input.apply(Partition.of(0, new IdentityFn()));
+      input.apply(Partition.of(0, new IdentityFn()));
       fail("should have failed");
     } catch (IllegalArgumentException exn) {
       assertThat(exn.toString(), containsString("numPartitions must be > 0"));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
index 561e17b871e7e..fced952d90670 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
@@ -42,7 +42,7 @@
 import java.util.Comparator;
 import java.util.List;
 
-/** Tests for Top */
+/** Tests for Top. */
 @RunWith(JUnit4.class)
 @SuppressWarnings("serial")
 public class TopTest {
@@ -175,15 +175,14 @@ public void testTopZero() {
   @Test
   public void testPerKeySerializabilityRequirement() {
     DirectPipeline p = DirectPipeline.createForTest();
-    PCollection<String> input =
-        p.apply(Create.of(Arrays.asList(COLLECTION)))
+    p.apply(Create.of(Arrays.asList(COLLECTION)))
             .setCoder(StringUtf8Coder.of());
 
-    PCollection<KV<String, List<Integer>>> top1 = createInputTable(p)
+    createInputTable(p)
         .apply(Top.<String, Integer, IntegerComparator>perKey(1,
             new IntegerComparator()));
 
-    PCollection<KV<String, List<Integer>>> top2 = createInputTable(p)
+    createInputTable(p)
         .apply(Top.<String, Integer, IntegerComparator2>perKey(1,
             new IntegerComparator2()));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/KVTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/KVTest.java
index dae544fb033af..6dc77eba1475e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/KVTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/KVTest.java
@@ -29,7 +29,7 @@
  */
 @RunWith(JUnit4.class)
 public class KVTest {
-  static final Integer testValues[] =
+  private static final Integer TEST_VALUES[] =
       {null, Integer.MIN_VALUE, -1, 0, 1, Integer.MAX_VALUE};
 
   // Wrapper around Integer.compareTo() to support null values.
@@ -44,10 +44,10 @@ private int compareInt(Integer a, Integer b) {
   @Test
   public void testOrderByKey() {
     Comparator<KV<Integer, Integer>> orderByKey = new KV.OrderByKey<>();
-    for (Integer key1 : testValues) {
-      for (Integer val1 : testValues) {
-        for (Integer key2 : testValues) {
-          for (Integer val2 : testValues) {
+    for (Integer key1 : TEST_VALUES) {
+      for (Integer val1 : TEST_VALUES) {
+        for (Integer key2 : TEST_VALUES) {
+          for (Integer val2 : TEST_VALUES) {
             assertEquals(compareInt(key1, key2),
                 orderByKey.compare(KV.of(key1, val1), KV.of(key2, val2)));
           }
@@ -59,10 +59,10 @@ public void testOrderByKey() {
   @Test
   public void testOrderByValue() {
     Comparator<KV<Integer, Integer>> orderByValue = new KV.OrderByValue<>();
-    for (Integer key1 : testValues) {
-      for (Integer val1 : testValues) {
-        for (Integer key2 : testValues) {
-          for (Integer val2 : testValues) {
+    for (Integer key1 : TEST_VALUES) {
+      for (Integer val1 : TEST_VALUES) {
+        for (Integer key2 : TEST_VALUES) {
+          for (Integer val2 : TEST_VALUES) {
             assertEquals(compareInt(val1, val2),
                 orderByValue.compare(KV.of(key1, val1), KV.of(key2, val2)));
           }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java
index f41fef6d840bf..5d75100f58f5e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java
@@ -24,8 +24,10 @@
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 
+import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.experimental.categories.Category;
 import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -72,9 +74,10 @@ public PDone apply(PBegin begin) {
 
   // TODO: This test doesn't work, because we can't handle composite
   // transforms that contain no nested transforms.
-  // @Test
-  // @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
-  public void DISABLED_testEmptyTransform() {
+  @Ignore
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testEmptyTransform() {
     Pipeline p = TestPipeline.create();
 
     p.begin().apply(new EmptyTransform());

From dfa53aab308e14a1fdf7f572b70a90821d922dcc Mon Sep 17 00:00:00 2001
From: relax <relax@google.com>
Date: Mon, 15 Dec 2014 14:47:08 -0800
Subject: [PATCH 0020/1541] Revert change to set the sdk major_version to 0. []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=82178016

---
 .../cloud/dataflow/sdk/runners/DataflowPipelineRunner.java    | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 1a0d873307375..a2574774d3315 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -196,7 +196,9 @@ public DataflowPipelineJob run(Pipeline pipeline) {
 
     // Requirements about the service.
     Map<String, Object> environmentVersion = new HashMap<>();
-    environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_MAJOR_KEY, ENVIRONMENT_MAJOR_VERSION);
+    // TODO: Specify the environment major version.
+    // environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_MAJOR_KEY,
+    // ENVIRONMENT_MAJOR_VERSION);
     newJob.getEnvironment().setVersion(environmentVersion);
     // Default jobType is DATA_PARALLEL which is for java batch.
     String jobType = "DATA_PARALLEL";

From 622c8587675b6c197ab2f183d1172d56c1dffa45 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 15 Dec 2014 15:51:49 -0800
Subject: [PATCH 0021/1541] Canonicalize temp folder path before globbing in
 IOFactoryTest. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=82183712

---
 .../java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
index fbf2f70b22355..89ca01ef724d2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
@@ -52,7 +52,7 @@ public void testLocalFileIO() throws Exception {
     tmpFolder.newFile("barf").createNewFile();
 
     FileIOChannelFactory factory = new FileIOChannelFactory();
-    Collection<String> paths = factory.match(tmpFolder.getRoot() + "/f*");
+    Collection<String> paths = factory.match(tmpFolder.getRoot().getCanonicalPath() + "/f*");
 
     Assert.assertEquals(2, paths.size());
     Assert.assertTrue(paths.contains(foo1.getCanonicalPath()));

From 904ffe9367f1fc3fb1d6083949f5cf5a6d1693ae Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Mon, 15 Dec 2014 16:32:20 -0800
Subject: [PATCH 0022/1541] Dataflow launch: provide build and run instructions
 in README.md on GitHub.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82187443
---
 README.md | 75 +++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 68 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 12eba01f5e831..086c0aae78a14 100644
--- a/README.md
+++ b/README.md
@@ -11,9 +11,11 @@ The key concepts in this programming model are:
 * [PCollection](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java):
 represents a collection of data, which could be bounded or unbounded in size.
 * [PTransform](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java):
-represents a computation that transform input PCollections into output PCollections.
+represents a computation that transform input PCollections into output
+PCollections.
 * [Pipeline](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java):
-manages a directed acyclic graph of PTransforms and PCollections, which is ready for excution.
+manages a directed acyclic graph of PTransforms and PCollections, which is ready
+for execution.
 * [PipelineRunner](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java):
 specifies where and how the pipeline should execute.
 
@@ -23,18 +25,77 @@ Currently there are three runners:
 runs the pipeline on your local machine.
   2. The
 [DataflowPipelineRunner](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java)
-submits the pipeline to the Dataflow Service**\***, where it runs using managed
+submits the pipeline to the Dataflow Service, where it runs using managed
 resources in the [Google Cloud Platform](http://cloud.google.com).
   3. The
 [BlockingDataflowPipelineRunner](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java)
-submits the pipeline to the Dataflow Service**\*** via the DataflowPipelineRunner and then prints messages
-about the job status until execution is complete.
+submits the pipeline to the Dataflow Service via the DataflowPipelineRunner and
+then prints messages about the job status until execution is complete.
 
-**\***_The Dataflow Service is currently in the Alpha phase of development and access
-is limited to whitelisted users._
+_The Dataflow Service is currently in the Alpha phase of development and
+access is limited to whitelisted users._
 
 ## Getting Started
 
+This repository consists of two modules:
+
+* [Java SDK](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk)
+module provides a set of basic Java APIs to program against.
+* [Examples](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples)
+module provides a few samples to get started. We recommend starting with the
+WordCount example.
+
+The following command will build both modules and install them in your local
+Maven repository:
+
+    mvn clean install
+
+You can speed up the build and install process by using the following options:
+
+  1. To skip execution of the unit tests, run:
+
+        mvn install -DskipTests
+
+  2. While iterating on a specific module, use the following command to compile
+  and reinstall it. For example, to reinstall the 'examples' module, run:
+
+        mvn install -pl examples
+
+  Be careful, however, as this command will use the most recently installed SDK
+  from the local repository (or Maven Central) even if you have changed it
+  locally.
+
+  3. To run Maven using multiple threads, run:
+
+        mvn -T 4 install
+
+After building and installing, the following command will execute the WordCount
+example using the DirectPipelineRunner on your local machine:
+
+    mvn exec:java -pl examples \
+    -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
+    -Dexec.args="--input=<INPUT FILE PATTERN> --output=<OUTPUT FILE>"
+
+If you have been whitelisted for Alpha access to the Dataflow Service and
+followed the [developer setup](https://cloud.google.com/dataflow/java-sdk/getting-started#DeveloperSetup)
+steps, you can use the BlockingDataflowPipelineRunner to run the same program in
+the Google Cloud Platform (GCP):
+
+    mvn exec:java -pl examples \
+    -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
+    -Dexec.args="--project=<YOUR GCP PROJECT NAME> --stagingLocation=<YOUR GCS LOCATION> --runner=BlockingDataflowPipelineRunner"
+
+Google Cloud Storage (GCS) location should be entered in the form of
+gs://bucket/path/to/staging/directory. Google Cloud Platform (GCP) project
+refers to its name (not number), which has been whitelisted for Cloud Dataflow.
+Refer [here](https://cloud.google.com/) for instructions to get started with
+Google Cloud Platform.
+
+Other examples can be run similarly by replacing the WordCount class name with
+BigQueryTornadoes, DatastoreWordCount, TfIdf, TopWikipediaSessions, etc. and
+adjusting runtime options under Dexec.args parameter, as specified in the
+example itself.
+
 ## More Information
 
 * [Google Cloud Dataflow](http://cloud.google.com/dataflow)

From 4d91458aaf85239f25621131d0e25374dbf2bb3f Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Mon, 15 Dec 2014 16:44:33 -0800
Subject: [PATCH 0023/1541] Fix FileIOChannelFactory to allow specifying
 filename alone (that is, without the directory information) for inputs and
 outputs.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82188346
---
 .../google/cloud/dataflow/sdk/util/FileIOChannelFactory.java  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
index 71f66ed2f6db5..1bb7566a526b5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
@@ -47,13 +47,13 @@ public class FileIOChannelFactory implements IOChannelFactory {
   public Collection<String> match(String spec) throws IOException {
     File file = new File(spec);
 
-    File parent = file.getParentFile();
+    File parent = file.getAbsoluteFile().getParentFile();
     if (!parent.exists()) {
       throw new IOException("Unable to find parent directory of " + spec);
     }
 
     final PathMatcher matcher =
-        FileSystems.getDefault().getPathMatcher("glob:" + spec);
+        FileSystems.getDefault().getPathMatcher("glob:" + file.getAbsolutePath());
     File[] files = parent.listFiles(new FileFilter() {
       @Override
       public boolean accept(File pathname) {

From 43a04eb35a25bdbd5a269005cb1533afa3373d32 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Tue, 16 Dec 2014 09:35:44 -0800
Subject: [PATCH 0024/1541] Dataflow launch: enable Embed-Transitive option
 when bundling. The default behavior of the bundle plugin is to embed only
 direct dependencies, i.e., it does not embed transitive dependencies. Setting
 this option enables transitive dependency embedding.

So far, this option has been enabled on SDK's module, but not on other modules, such as examples. Now, this option is enabled across all modules.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82240765
---
 examples/pom.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/pom.xml b/examples/pom.xml
index ad847af861d8b..9ff72266c6d25 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -111,6 +111,7 @@
           <finalName>${project.artifactId}-bundled-${project.version}</finalName>
           <instructions>
             <!-- Embed all dependencies -->
+            <Embed-Transitive>true</Embed-Transitive>
             <Embed-Dependency>*;scope=compile|runtime;inline=true</Embed-Dependency>
           </instructions>
         </configuration>

From 170351e2313aa0713cedfc6598483723b8c6a57a Mon Sep 17 00:00:00 2001
From: fjp <fjp@google.com>
Date: Tue, 16 Dec 2014 09:57:25 -0800
Subject: [PATCH 0025/1541] Update documentation links.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82242478
---
 .../java/com/google/cloud/dataflow/examples/WordCount.java   | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/ParDo.java | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
index 6b1623005785d..bdd5fb6208fdf 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
@@ -37,7 +37,7 @@
 /**
  * An example that counts words in Shakespeare. For a detailed walkthrough of this
  * example see:
- *   https://developers.google.com/cloud-dataflow/java-sdk/wordcount-example
+ *   https://cloud.google.com/dataflow/java-sdk/wordcount-example
  *
  * <p> Concepts: Reading/writing text files; counting a PCollection; user-defined PTransforms
  *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index fad1235de0b90..8c9c43fb4e51f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -434,9 +434,8 @@
  * Dataflow service's optimizer to "flatten out" all the compositions
  * into highly optimized stages.
  *
- * @see <a href=
- * "https://developers.google.com/cloud-dataflow/java-sdk/primitive-transforms#Using-ParDo"
- * >Using ParDo</a>
+ * @see <a href="https://cloud.google.com/dataflow/java-sdk/par-do">Parallel
+ * Processing with ParDo</a>
  */
 public class ParDo {
 

From 9e90654cd39af8d2b91290055b5b5066d07c6d94 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Tue, 16 Dec 2014 09:59:31 -0800
Subject: [PATCH 0026/1541] Dataflow launch: update pom.xml for certain
 modules. In the context of bringing in dependencies from com.google.apis
 group, move exclusion of Guava transitive dependency to the first instance of
 the dependency in the POM file.

Notes:
* Artifacts from com.google.apis, version 1.19 in particular, brings in an old version of Guava, which is not compatible with the SDK content.
* We need to exclude this transitive dependency to ensure build works.
* This seems to happen when the exclusion in set on the first instance of a dependency from com.google.apis.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82242666
---
 examples/pom.xml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index 9ff72266c6d25..eef3d4003c35c 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -151,12 +151,6 @@
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-storage</artifactId>
       <version>v1-rev11-1.19.0</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.apis</groupId>
-      <artifactId>google-api-services-bigquery</artifactId>
-      <version>v2-rev167-1.19.0</version>
       <exclusions>
         <!-- Exclude an old version of guava which is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
@@ -167,6 +161,12 @@
       </exclusions>
     </dependency>
 
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-bigquery</artifactId>
+      <version>v2-rev167-1.19.0</version>
+    </dependency>
+
     <dependency>
       <groupId>com.google.http-client</groupId>
       <artifactId>google-http-client-jackson2</artifactId>

From 6ad521097f50d6390ba99489d0dffb87c9991715 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Tue, 16 Dec 2014 10:37:48 -0800
Subject: [PATCH 0027/1541] Dataflow launch: update pom.xml for certain
 modules. In the context of bringing in dependencies from com.google.apis
 group, add exclusion of Guava transitive dependency to all com.google.apis
 dependencies.

Notes:
* Artifacts from com.google.apis, version 1.19 in particular, brings in an old version of Guava, which is not compatible with the SDK content.
* We need to exclude this transitive dependency to ensure build works.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82246218
---
 examples/pom.xml | 16 ++++++++++++++
 sdk/pom.xml      | 56 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+)

diff --git a/examples/pom.xml b/examples/pom.xml
index eef3d4003c35c..6604a90e531ef 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -165,12 +165,28 @@
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-bigquery</artifactId>
       <version>v2-rev167-1.19.0</version>
+      <exclusions>
+        <!-- Exclude an old version of guava which is being pulled
+             in by a transitive dependency google-api-client 1.19.0 -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
 
     <dependency>
       <groupId>com.google.http-client</groupId>
       <artifactId>google-http-client-jackson2</artifactId>
       <version>1.19.0</version>
+      <exclusions>
+        <!-- Exclude an old version of guava which is being pulled
+             in by a transitive dependency google-api-client 1.19.0 -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
 
     <dependency>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 93a8f277a8370..921fa8675ac80 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -203,42 +203,98 @@
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-bigquery</artifactId>
       <version>v2-rev167-1.19.0</version>
+      <exclusions>
+        <!-- Exclude an old version of guava which is being pulled
+             in by a transitive dependency google-api-client 1.19.0 -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
 
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-compute</artifactId>
       <version>v1-rev34-1.19.0</version>
+      <exclusions>
+        <!-- Exclude an old version of guava which is being pulled
+             in by a transitive dependency google-api-client 1.19.0 -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
 
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-pubsub</artifactId>
       <version>v1beta1-rev9-1.19.0</version>
+      <exclusions>
+        <!-- Exclude an old version of guava which is being pulled
+             in by a transitive dependency google-api-client 1.19.0 -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
 
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-storage</artifactId>
       <version>v1-rev11-1.19.0</version>
+      <exclusions>
+        <!-- Exclude an old version of guava which is being pulled
+             in by a transitive dependency google-api-client 1.19.0 -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
 
     <dependency>
       <groupId>com.google.http-client</groupId>
       <artifactId>google-http-client-jackson2</artifactId>
       <version>1.19.0</version>
+      <exclusions>
+        <!-- Exclude an old version of guava which is being pulled
+             in by a transitive dependency google-api-client 1.19.0 -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
 
     <dependency>
       <groupId>com.google.oauth-client</groupId>
       <artifactId>google-oauth-client-java6</artifactId>
       <version>1.19.0</version>
+      <exclusions>
+        <!-- Exclude an old version of guava which is being pulled
+             in by a transitive dependency google-api-client 1.19.0 -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
 
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-datastore-protobuf</artifactId>
       <version>v1beta2-rev1-2.1.0</version>
+      <exclusions>
+        <!-- Exclude an old version of guava which is being pulled
+             in by a transitive dependency google-api-client 1.19.0 -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
 
     <dependency>

From 83b44d548641a098c017273b2bb79534b2b1525a Mon Sep 17 00:00:00 2001
From: Frances Perry <francesperry@users.noreply.github.com>
Date: Wed, 17 Dec 2014 16:52:45 -0800
Subject: [PATCH 0028/1541] Update README.md

---
 README.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/README.md b/README.md
index 086c0aae78a14..0aec8a0a251b4 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,18 @@
 provides a simple, powerful programming model for building both batch
 and streaming parallel data processing pipelines.
 
+## Status
+
+The Cloud Dataflow SDK is used to access the Google Cloud Dataflow service,
+which is currently in Alpha and restricted to whitelisted users.
+
+The SDK is publicly available and can be used for local execution by anyone.
+Note, however, that the SDK is also an Alpha release and may change
+significantly over time. The SDK is built to be extensible and support
+additional execution environments ("runners") beyond local execution and the
+Google Cloud Dataflow service. As the product matures, we look forward to
+working with you to improve Cloud Dataflow.
+
 ## Overview
 
 The key concepts in this programming model are:

From 9161131f4397f077ce2967e1280067214aaea954 Mon Sep 17 00:00:00 2001
From: Sean O'Keefe <seano314@users.noreply.github.com>
Date: Fri, 19 Dec 2014 08:56:32 -0800
Subject: [PATCH 0029/1541] Updating links to canonicals

---
 README.md | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 0aec8a0a251b4..f88d9622d2c92 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Cloud Dataflow Java SDK (Alpha)
 
-[Google Cloud Dataflow](http://cloud.google.com/dataflow)
+[Google Cloud Dataflow](https://cloud.google.com/dataflow/)
 provides a simple, powerful programming model for building both batch
 and streaming parallel data processing pipelines.
 
@@ -97,11 +97,10 @@ the Google Cloud Platform (GCP):
     -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
     -Dexec.args="--project=<YOUR GCP PROJECT NAME> --stagingLocation=<YOUR GCS LOCATION> --runner=BlockingDataflowPipelineRunner"
 
-Google Cloud Storage (GCS) location should be entered in the form of
+[Google Cloud Storage](https://cloud.google.com/storage/) (GCS) location should be entered in the form of
 gs://bucket/path/to/staging/directory. Google Cloud Platform (GCP) project
 refers to its name (not number), which has been whitelisted for Cloud Dataflow.
-Refer [here](https://cloud.google.com/) for instructions to get started with
-Google Cloud Platform.
+Refer to [Google Cloud Platform](https://cloud.google.com/) for instructions on getting started.
 
 Other examples can be run similarly by replacing the WordCount class name with
 BigQueryTornadoes, DatastoreWordCount, TfIdf, TopWikipediaSessions, etc. and
@@ -110,6 +109,6 @@ example itself.
 
 ## More Information
 
-* [Google Cloud Dataflow](http://cloud.google.com/dataflow)
+* [Google Cloud Dataflow](https://cloud.google.com/dataflow/)
 * [Dataflow Concepts and Programming Model](https://cloud.google.com/dataflow/java-sdk/building-a-pipeline)
 * [Javadoc](https://cloud.google.com/dataflow/java-sdk/JavaDoc/index)

From 60035b4e1f2a31485298f936d9715fcff95bc4ea Mon Sep 17 00:00:00 2001
From: fjp <fjp@google.com>
Date: Tue, 16 Dec 2014 12:18:22 -0800
Subject: [PATCH 0030/1541] Add a missing package-info.java for windowing. []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=82255231

---
 .../transforms/windowing/package-info.java    | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java
new file mode 100644
index 0000000000000..cab217d3e9698
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/**
+ * Defines the {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window} transform
+ * for dividing the elements in a PCollection into windows.
+ *
+ * <p> {@code Window} logically divides up or groups the elements of a
+ * {@link com.google.cloud.dataflow.sdk.values.PCollection} into finite windows according to a
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}.
+ * The output of {@code Window} contains the same elements as input, but they
+ * have been logically assigned to windows. The next
+ * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}s, including one
+ * within composite transforms, will group by the combination of keys and
+ * windows.
+ *
+ * <p> Windowing a {@code PCollection} allows chunks of it to be processed
+ * individually, before the entire {@code PCollection} is available.  This is
+ * especially important for {@code PCollection}s with unbounded size, since the full
+ * {@code PCollection} is never available at once.
+ *
+ * <p> For {@code PCollection}s with a bounded size, by default, all data is implicitly in a
+ * single window, and this replicates conventional batch mode. However, windowing can still be a
+ * convenient way to express time-sliced algorithms over bounded {@code PCollection}s.
+ */
+package com.google.cloud.dataflow.sdk.transforms.windowing;

From aab6b6d2c0b27b2a925d0dd6cd420bf56f41f795 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Thu, 18 Dec 2014 11:58:18 -0800
Subject: [PATCH 0031/1541] Maven: turn off trimStackTrace. Before the fix,
 only top level Exceptions are shown, e.t.c. InvocationTargetException and the
 root Exception is trimmed. Currently, it shows the full stack trace.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82451245
---
 pom.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pom.xml b/pom.xml
index fd5b04376e43e..39eec27b528b2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -169,6 +169,7 @@
               <projectName>${dataflowProjectName}</projectName>
             </systemPropertyVariables>
             <useManifestOnlyJar>false</useManifestOnlyJar>
+            <trimStackTrace>false</trimStackTrace>
           </configuration>
           <dependencies>
             <dependency>

From 96b0834a952803e1b09b7429464041d7b0ded4c7 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 19 Dec 2014 11:24:19 -0800
Subject: [PATCH 0032/1541] Fix typos in README.md.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82529793
---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 0aec8a0a251b4..18f78fc9f99d6 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ The key concepts in this programming model are:
 * [PCollection](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java):
 represents a collection of data, which could be bounded or unbounded in size.
 * [PTransform](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java):
-represents a computation that transform input PCollections into output
+represents a computation that transforms input PCollections into output
 PCollections.
 * [Pipeline](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java):
 manages a directed acyclic graph of PTransforms and PCollections, which is ready
@@ -31,7 +31,7 @@ for execution.
 * [PipelineRunner](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java):
 specifies where and how the pipeline should execute.
 
-Currently there are three runners:
+Currently there are three PipelineRunners:
 
   1. The [DirectPipelineRunner](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java)
 runs the pipeline on your local machine.
@@ -105,7 +105,7 @@ Google Cloud Platform.
 
 Other examples can be run similarly by replacing the WordCount class name with
 BigQueryTornadoes, DatastoreWordCount, TfIdf, TopWikipediaSessions, etc. and
-adjusting runtime options under Dexec.args parameter, as specified in the
+adjusting runtime options under the Dexec.args parameter, as specified in the
 example itself.
 
 ## More Information

From df872084cd83178569ed60f58a4a9c18a8b24195 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Mon, 22 Dec 2014 10:51:49 -0800
Subject: [PATCH 0033/1541] Dataflow open-sourcing: Add file explaining
 contribution rules.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82665181
---
 CONTRIBUTING.md | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 CONTRIBUTING.md

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000000000..26147ec273b71
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,26 @@
+Want to contribute? Great! First, read this page (including the small print at
+the end).
+
+### Before you contribute
+Before we can use your code, you must sign the
+[Google Individual Contributor License Agreement](https://developers.google.com/open-source/cla/individual?csw=1)
+(CLA), which you can do online. The CLA is necessary mainly because you own the
+copyright to your changes, even after your contribution becomes part of our
+codebase, so we need your permission to use and distribute your code. We also
+need to be sure of various other things. For instance that you'll tell us if you
+know that your code infringes on other people's patents. You don't have to sign
+the CLA until after you've submitted your code for review and a member has
+approved it, but you must do it before we can put your code into our codebase.
+
+Before you start working on a larger contribution, we recommend to get in touch
+with us first through the issue tracker with your idea so that we can help out
+and possibly guide you. Coordinating up front makes it much easier to avoid
+frustration later on.
+
+### Code reviews
+All submissions, including submissions by project members, require review. We
+use Github pull requests for this purpose.
+
+### The small print
+Contributions made by corporations are covered by a different agreement than
+the one above, the Software Grant and Corporate Contributor License Agreement.

From 3db16e53bb153c04fba55423f7ebe90ffa5d737f Mon Sep 17 00:00:00 2001
From: earhart <earhart@google.com>
Date: Fri, 19 Dec 2014 13:39:17 -0800
Subject: [PATCH 0034/1541] In GroupAlsoByWindowsDoFnTest, match values in
 arbitrary order. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=82539405

---
 .../cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index d482d2c4d345a..d01fe1008fa39 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -100,7 +100,7 @@ public class GroupAlsoByWindowsDoFnTest {
 
     WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
     assertEquals("k", item0.getValue().getKey());
-    assertThat(item0.getValue().getValue(), Matchers.contains("v1", "v2"));
+    assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v1", "v2"));
     assertEquals(new Instant(9), item0.getTimestamp());
     assertThat(item0.getWindows(),
         Matchers.contains(window(0, 10)));
@@ -146,7 +146,7 @@ public class GroupAlsoByWindowsDoFnTest {
 
     WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
     assertEquals("k", item1.getValue().getKey());
-    assertThat(item1.getValue().getValue(), Matchers.contains("v1", "v2"));
+    assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v1", "v2"));
     assertEquals(new Instant(19), item1.getTimestamp());
     assertThat(item1.getWindows(),
         Matchers.contains(window(0, 20)));
@@ -189,7 +189,7 @@ public class GroupAlsoByWindowsDoFnTest {
 
     WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
     assertEquals("k", item0.getValue().getKey());
-    assertThat(item0.getValue().getValue(), Matchers.contains("v1", "v2"));
+    assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v1", "v2"));
     assertEquals(new Instant(14), item0.getTimestamp());
     assertThat(item0.getWindows(),
         Matchers.contains(window(0, 15)));

From 45b78eaa6abfc7fcd0d173e111b60b66c811dbe4 Mon Sep 17 00:00:00 2001
From: Kelly Westbrooks <kwestbrooks@google.com>
Date: Fri, 9 Jan 2015 10:57:36 -0800
Subject: [PATCH 0035/1541] Generalize type signature of KV.of

---
 .../main/java/com/google/cloud/dataflow/sdk/values/KV.java    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
index febb1302bd8c1..81b6806a1125f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
@@ -35,8 +35,8 @@ public class KV<K, V> implements Serializable {
   private static final long serialVersionUID = 0;
 
   /** Returns a KV with the given key and value. */
-  public static <K, V> KV<K, V> of(K key, V value) {
-    return new KV<>(key, value);
+  public static <K, SK extends K, V, SV extends V> KV<K, V> of(SK key, SV value) {
+    return new KV<K, V>(key, value);
   }
 
   /** Returns the key of this KV. */

From b2870737bf3ffaed94eaa39aa5eebed8c1bf5a3c Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Wed, 17 Dec 2014 11:14:46 -0800
Subject: [PATCH 0036/1541] Cleanup: clarify fake exceptions and error messages
 in Tests, it helps to find the real tests failure when people go through
 logs.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82346418
---
 .../sdk/runners/worker/DataflowWorkerTest.java        |  3 ++-
 .../cloud/dataflow/sdk/testing/ExpectedLogsTest.java  | 11 +++--------
 .../cloud/dataflow/sdk/util/PackageUtilTest.java      |  6 +++---
 3 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
index 2d51fb2838954..037946c29a917 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
@@ -63,7 +63,8 @@ public void testWhenNoWorkThatWeReturnFalse() throws Exception {
   @Test
   public void testWhenProcessingWorkUnitFailsWeReportStatus() throws Exception {
     DataflowWorker worker = new DataflowWorker(mockWorkUnitClient, options);
-    when(mockWorkUnitClient.getWorkItem()).thenReturn(new WorkItem().setId(1L)).thenReturn(null);
+    when(mockWorkUnitClient.getWorkItem()).thenReturn(
+        new WorkItem().setId(1L).setJobId("Expected to fail the job")).thenReturn(null);
 
     assertFalse(worker.getAndPerformWork());
     verify(mockWorkUnitClient).reportWorkItemStatus(argThat(cloudWorkHasErrors()));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java
index 4d9cd0e76639a..bcd96cefffe45 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java
@@ -54,7 +54,6 @@ public void testWhenExpectationIsMatchedFully() throws Throwable {
     expectedLogs.after();
   }
 
-
   @Test
   public void testWhenExpectationIsMatchedPartially() throws Throwable {
     String expected = generateRandomString();
@@ -69,7 +68,7 @@ public void testWhenExpectationIsMatchedWithExceptionBeingLogged() throws Throwa
     String expected = generateRandomString();
     expectedLogs.before();
     expectedLogs.expectError(expected);
-    LOG.error(expected, new IOException());
+    LOG.error(expected, new IOException("Fake Exception"));
     expectedLogs.after();
   }
 
@@ -90,13 +89,9 @@ public void testLogCaptureOccursAtLowestLogLevel() throws Throwable {
     expectedLogs.after();
   }
 
-  // Generates random strings of 10 characters.
+  // Generates a random fake error message.
   private static String generateRandomString() {
     Random random = new Random();
-    StringBuilder builder = new StringBuilder();
-    for (int i = 0; i < 10; i++) {
-      builder.append('a' + (char) random.nextInt(26));
-    }
-    return builder.toString();
+    return "Fake error message: " + random.nextInt();
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
index 7d923c2fcdb7f..e49782f6b221b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
@@ -244,7 +244,7 @@ public void testPackageUploadFailsWhenIOExceptionThrown() throws Exception {
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
     when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(-1L);
     when(mockGcsUtil.create(any(GcsPath.class), anyString()))
-        .thenThrow(new IOException("Upload error"));
+        .thenThrow(new IOException("Fake Exception: Upload error"));
 
     try {
       PackageUtil.stageClasspathElementsToGcs(mockGcsUtil,
@@ -264,8 +264,8 @@ public void testPackageUploadEventuallySucceeds() throws Exception {
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
     when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(-1L);
     when(mockGcsUtil.create(any(GcsPath.class), anyString()))
-        .thenThrow(new IOException("410 Gone")) // First attempt fails
-        .thenReturn(pipe.sink());               // second attempt succeeds
+        .thenThrow(new IOException("Fake Exception: 410 Gone")) // First attempt fails
+        .thenReturn(pipe.sink());                               // second attempt succeeds
 
     try {
       PackageUtil.stageClasspathElementsToGcs(mockGcsUtil,

From 5cf3ed13d19335831034628a4b852d2b21e85630 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 17 Dec 2014 11:43:31 -0800
Subject: [PATCH 0037/1541] Add support for enum defaults on PipelineOptions
 using the enum name.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82349417
---
 .../DataflowPipelineWorkerPoolOptions.java    | 22 ++-----------------
 .../cloud/dataflow/sdk/options/Default.java   | 10 +++++++++
 .../sdk/options/ProxyInvocationHandler.java   |  3 +++
 .../options/ProxyInvocationHandlerTest.java   | 19 ++++++++++++++++
 4 files changed, 34 insertions(+), 20 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index 0193ddaac1077..da0a6e588605f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -61,19 +61,10 @@ public String getAlgorithm() {
   }
 
   @Description("(experimental) The autoscaling algorithm to use for the workerpool.")
-  @Default.InstanceFactory(AutoscalingAlgorithmTypeFactory.class)
+  @Default.Enum("NONE")
   AutoscalingAlgorithmType getAutoscalingAlgorithm();
   void setAutoscalingAlgorithm(AutoscalingAlgorithmType value);
 
-  /** Returns the default NONE AutoscalingAlgorithmType. */
-  public static class AutoscalingAlgorithmTypeFactory implements
-      DefaultValueFactory<AutoscalingAlgorithmType> {
-    @Override
-    public AutoscalingAlgorithmType create(PipelineOptions options) {
-      return AutoscalingAlgorithmType.NONE;
-    }
-  }
-
   /**
    * Max number of workers to use when using workerpool autoscaling.
    * This option is experimental and subject to change.
@@ -120,19 +111,10 @@ public String getApiServiceName() {
   }
 
   @Description("Type of API for handling cluster management,i.e. resizing, healthchecking, etc.")
-  @Default.InstanceFactory(ClusterManagerApiTypeFactory.class)
+  @Default.Enum("COMPUTE_ENGINE")
   ClusterManagerApiType getClusterManagerApi();
   void setClusterManagerApi(ClusterManagerApiType value);
 
-  /** Returns the default COMPUTE_ENGINE ClusterManagerApiType. */
-  public static class ClusterManagerApiTypeFactory implements
-      DefaultValueFactory<ClusterManagerApiType> {
-    @Override
-    public ClusterManagerApiType create(PipelineOptions options) {
-      return ClusterManagerApiType.COMPUTE_ENGINE;
-    }
-  }
-
   /**
    * Machine type to create worker VMs as.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java
index 321fe744ca49b..c295bde5eaf71 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java
@@ -116,6 +116,16 @@
     double value();
   }
 
+  /**
+   * This represents that the default of the option is the specified enum.
+   * The value should equal the enum's {@link java.lang.Enum#name() name}.
+   */
+  @Target(ElementType.METHOD)
+  @Retention(RetentionPolicy.RUNTIME)
+  public @interface Enum {
+    java.lang.String value();
+  }
+
   /**
    * Value must be of type {@link DefaultValueFactory} and have a default constructor.
    * Value is instantiated and then used as a type factory to generate the default.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
index cbe9c422b325b..39ab988182370 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
@@ -252,6 +252,9 @@ private Object getDefault(PipelineOptions proxy, Method method) {
         return ((Default.String) annotation).value();
       } else if (annotation instanceof Default.String) {
         return ((Default.String) annotation).value();
+      } else if (annotation instanceof Default.Enum) {
+        return Enum.valueOf((Class<Enum>) method.getReturnType(),
+            ((Default.Enum) annotation).value());
       } else if (annotation instanceof Default.InstanceFactory) {
         return InstanceBuilder.ofType(((Default.InstanceFactory) annotation).value())
             .build()
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
index 0b2dc0a9adfba..01085322f26e4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
@@ -115,6 +115,21 @@ public String create(PipelineOptions options) {
     }
   }
 
+  /** A test enum for testing {@link Default.Enum @Default.Enum}. */
+  public enum EnumType {
+    MyEnum("MyTestEnum");
+
+    private final String value;
+    private EnumType(String value) {
+      this.value = value;
+    }
+
+    @Override
+    public String toString() {
+      return value;
+    }
+  }
+
   /** A test interface containing all the {@link Default} annotations. */
   public static interface DefaultAnnotations extends PipelineOptions {
     @Default.Boolean(true)
@@ -147,6 +162,9 @@ public static interface DefaultAnnotations extends PipelineOptions {
     @Default.Class(DefaultAnnotations.class)
     Class<?> getClassOption();
     void setClassOption(Class<?> value);
+    @Default.Enum("MyEnum")
+    EnumType getEnum();
+    void setEnum(EnumType value);
     @Default.InstanceFactory(TestOptionFactory.class)
     String getComplex();
     void setComplex(String value);
@@ -166,6 +184,7 @@ public void testAnnotationDefaults() throws Exception {
     assertEquals(9d, proxy.getDouble(), 0d);
     assertEquals("testString", proxy.getString());
     assertEquals(DefaultAnnotations.class, proxy.getClassOption());
+    assertEquals(EnumType.MyEnum, proxy.getEnum());
     assertEquals("testOptionFactory", proxy.getComplex());
   }
 

From c4c96bb0e84ed7aa9c0c39c25a2e6b45b81a6dbb Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Wed, 17 Dec 2014 12:19:43 -0800
Subject: [PATCH 0038/1541] Testing: add CombineTest cases for windowing.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82352893
---
 .../dataflow/sdk/transforms/CombineTest.java  | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index baf8a24756cc7..4307619d3f93d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -183,6 +183,33 @@ private void runTestAccumulatingCombine(KV<String, Integer>[] table,
     p.run();
   }
 
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testWindowedCombine() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> input =
+        p.apply(Create.timestamped(Arrays.asList(TABLE),
+                                   Arrays.asList(0L, 1L, 6L, 7L, 8L)))
+         .setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()))
+         .apply(Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(2))));
+
+    PCollection<Integer> sum = input
+        .apply(Values.<Integer>create())
+        .apply(Combine.globally(new SumInts()));
+
+    PCollection<KV<String, Integer>> sumPerKey = input
+        .apply(Combine.<String, Integer>perKey(new SumInts()));
+
+    DataflowAssert.that(sum).containsInAnyOrder(2, 5, 13);
+    DataflowAssert.that(sumPerKey).containsInAnyOrder(
+        KV.of("a", 2),
+        KV.of("a", 4),
+        KV.of("b", 1),
+        KV.of("b", 13));
+    p.run();
+  }
+
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testWindowedCombineEmpty() {
@@ -510,6 +537,7 @@ public boolean equals(Object otherObj) {
         return false;
       }
 
+      @Override
       public String toString() {
         return sum + ":" + inputs + ":" + merges + ":" + outputs;
       }

From 73251c1265ef61afb0fb1f67f3ef23a446549217 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 17 Dec 2014 13:02:04 -0800
Subject: [PATCH 0039/1541] Add support for using ServiceLoader to register
 PipelineOptions and PipelineRunners with the SDK.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82356725
---
 sdk/pom.xml                                   |  9 +++
 .../sdk/options/PipelineOptionsFactory.java   | 65 ++++++++++-------
 .../sdk/options/PipelineOptionsRegistrar.java | 36 ++++++++++
 .../runners/DataflowPipelineRegistrar.java    | 58 +++++++++++++++
 .../sdk/runners/DirectPipelineRegistrar.java  | 53 ++++++++++++++
 .../sdk/runners/PipelineRunnerRegistrar.java  | 37 ++++++++++
 .../options/PipelineOptionsFactoryTest.java   | 14 +++-
 .../DataflowPipelineRegistrarTest.java        | 72 +++++++++++++++++++
 .../runners/DirectPipelineRegistrarTest.java  | 69 ++++++++++++++++++
 9 files changed, 388 insertions(+), 25 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRegistrar.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrar.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRegistrarTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrarTest.java

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 921fa8675ac80..1e9633695d10d 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -144,6 +144,7 @@
         <configuration>
           <finalName>${project.artifactId}-bundled-${project.version}</finalName>
           <instructions>
+            <Include-Resource>META-INF=target/classes/META-INF</Include-Resource>
             <Export-Package>
               !${dataflow}.sdk.runners.worker.*,
               !${dataflow}.sdk.streaming.*,
@@ -346,6 +347,14 @@
       <version>2.4</version>
     </dependency>
 
+    <!-- build dependencies -->
+    <dependency>
+      <groupId>com.google.auto.service</groupId>
+      <artifactId>auto-service</artifactId>
+      <version>1.0-rc2</version>
+      <optional>true</optional>
+    </dependency>
+
     <!-- test dependencies -->
     <dependency>
       <groupId>org.hamcrest</groupId>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 176ff8c96e509..250baa62af277 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -16,13 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.PipelineRunnerRegistrar;
 import com.google.cloud.dataflow.sdk.runners.worker.DataflowWorkerHarness;
-import com.google.cloud.dataflow.sdk.testing.TestDataflowPipelineOptions;
 import com.google.common.base.Equivalence;
 import com.google.common.base.Function;
 import com.google.common.base.Preconditions;
@@ -63,6 +59,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Queue;
+import java.util.ServiceLoader;
 import java.util.Set;
 import java.util.SortedMap;
 import java.util.SortedSet;
@@ -283,18 +280,7 @@ Class<T> getProxyClass() {
   @SuppressWarnings("rawtypes")
   private static final Class<?>[] EMPTY_CLASS_ARRAY = new Class[0];
   private static final ObjectMapper MAPPER = new ObjectMapper();
-
-  // TODO: Add dynamic registration of pipeline runners.
-  private static final Map<String, Class<? extends PipelineRunner<? extends PipelineResult>>>
-      SUPPORTED_PIPELINE_RUNNERS =
-          ImmutableMap.<String, Class<? extends PipelineRunner<? extends PipelineResult>>>builder()
-          .put(DirectPipelineRunner.class.getSimpleName(),
-               DirectPipelineRunner.class)
-          .put(DataflowPipelineRunner.class.getSimpleName(),
-               DataflowPipelineRunner.class)
-          .put(BlockingDataflowPipelineRunner.class.getSimpleName(),
-               BlockingDataflowPipelineRunner.class)
-          .build();
+  private static final Map<String, Class<? extends PipelineRunner<?>>> SUPPORTED_PIPELINE_RUNNERS;
 
   /** Methods which are ignored when validating the proxy class. */
   private static final Set<Method> IGNORED_METHODS;
@@ -327,12 +313,31 @@ Class<T> getProxyClass() {
       throw new ExceptionInInitializerError(e);
     }
 
-    // TODO Add support for dynamically loading and registering the options interfaces.
+    // Store the list of all available pipeline runners.
+    ImmutableMap.Builder<String, Class<? extends PipelineRunner<?>>> builder =
+            new ImmutableMap.Builder<>();
+    Set<PipelineRunnerRegistrar> pipelineRunnerRegistrars =
+        Sets.newTreeSet(ObjectsClassComparator.INSTANCE);
+    pipelineRunnerRegistrars.addAll(
+        Lists.newArrayList(ServiceLoader.load(PipelineRunnerRegistrar.class)));
+    for (PipelineRunnerRegistrar registrar : pipelineRunnerRegistrars) {
+      for (Class<? extends PipelineRunner<?>> klass : registrar.getPipelineRunners()) {
+        builder.put(klass.getSimpleName(), klass);
+      }
+    }
+    SUPPORTED_PIPELINE_RUNNERS = builder.build();
+
+    // Load and register the list of all classes that extend PipelineOptions.
     register(PipelineOptions.class);
-    register(DirectPipelineOptions.class);
-    register(DataflowPipelineOptions.class);
-    register(BlockingDataflowPipelineOptions.class);
-    register(TestDataflowPipelineOptions.class);
+    Set<PipelineOptionsRegistrar> pipelineOptionsRegistrars =
+        Sets.newTreeSet(ObjectsClassComparator.INSTANCE);
+    pipelineOptionsRegistrars.addAll(
+        Lists.newArrayList(ServiceLoader.load(PipelineOptionsRegistrar.class)));
+    for (PipelineOptionsRegistrar registrar : pipelineOptionsRegistrars) {
+      for (Class<? extends PipelineOptions> klass : registrar.getPipelineOptions()) {
+        register(klass);
+      }
+    }
   }
 
   /**
@@ -430,12 +435,15 @@ public static Set<Class<? extends PipelineOptions>> getRegisteredOptions() {
     return Collections.unmodifiableSet(REGISTERED_OPTIONS);
   }
 
+  static Map<String, Class<? extends PipelineRunner<?>>> getRegisteredRunners() {
+    return SUPPORTED_PIPELINE_RUNNERS;
+  }
+
   static List<PropertyDescriptor> getPropertyDescriptors(
       Set<Class<? extends PipelineOptions>> interfaces) {
     return COMBINED_CACHE.get(interfaces).getPropertyDescriptors();
   }
 
-
   /**
    * Creates a set of {@link DataflowWorkerHarnessOptions} based of a set of known system
    * properties. This is meant to only be used from the {@link DataflowWorkerHarness} as a method to
@@ -701,6 +709,15 @@ private static void validateClass(Class<? extends PipelineOptions> iface,
         iface.getName());
   }
 
+  /** A {@link Comparator} which uses the classes canonical name to compare them. */
+  private static class ObjectsClassComparator implements Comparator<Object> {
+    static final ObjectsClassComparator INSTANCE = new ObjectsClassComparator();
+    @Override
+    public int compare(Object o1, Object o2) {
+      return o1.getClass().getCanonicalName().compareTo(o2.getClass().getCanonicalName());
+    }
+  }
+
   /** A {@link Comparator} which uses the generic method signature to sort them. */
   private static class MethodComparator implements Comparator<Method> {
     static final MethodComparator INSTANCE = new MethodComparator();
@@ -850,7 +867,7 @@ private static <T extends PipelineOptions> Map<String, Object> parseObjects(
         String runner = Iterables.getOnlyElement(entry.getValue());
         Preconditions.checkArgument(SUPPORTED_PIPELINE_RUNNERS.containsKey(runner),
             "Unknown 'runner' specified %s, supported pipeline runners %s",
-            runner, SUPPORTED_PIPELINE_RUNNERS.keySet());
+            runner, Sets.newTreeSet(SUPPORTED_PIPELINE_RUNNERS.keySet()));
         convertedOptions.put("runner", SUPPORTED_PIPELINE_RUNNERS.get(runner));
       } else if (method.getReturnType().isArray()
           || Collection.class.isAssignableFrom(method.getReturnType())) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java
new file mode 100644
index 0000000000000..4235ec7dc056b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import com.google.auto.service.AutoService;
+
+import java.util.ServiceLoader;
+
+/**
+ * {@link PipelineOptions} creators have the ability to automatically have their
+ * {@link PipelineOptions} registered with this SDK by creating a {@link ServiceLoader} entry
+ * and a concrete implementation of this interface.
+ * <p>
+ * Note that automatic registration of any {@PipelineOptions} requires users
+ * conform to the limitations discussed on {@link PipelineOptionsFactory#register(Class)}.
+ * <p>
+ * It is optional but recommended to use one of the many build time tools such as
+ * {@link AutoService} to generate the necessary META-INF files automatically.
+ */
+public interface PipelineOptionsRegistrar {
+  Iterable<Class<? extends PipelineOptions>> getPipelineOptions();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRegistrar.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRegistrar.java
new file mode 100644
index 0000000000000..8e8f3c5017f7e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRegistrar.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import com.google.auto.service.AutoService;
+import com.google.cloud.dataflow.sdk.options.BlockingDataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsRegistrar;
+import com.google.common.collect.ImmutableList;
+
+/**
+ * Contains the {@link PipelineOptionsRegistrar} and {@link PipelineRunnerRegistrar} for
+ * the {@link DataflowPipeline}.
+ */
+public class DataflowPipelineRegistrar {
+  private DataflowPipelineRegistrar() { }
+
+  /**
+   * Register the {@link DataflowPipelineOptions} and {@link BlockingDataflowPipelineOptions}.
+   */
+  @AutoService(PipelineOptionsRegistrar.class)
+  public static class Options implements PipelineOptionsRegistrar {
+    @Override
+    public Iterable<Class<? extends PipelineOptions>> getPipelineOptions() {
+      return ImmutableList.<Class<? extends PipelineOptions>>of(
+          DataflowPipelineOptions.class,
+          BlockingDataflowPipelineOptions.class);
+    }
+  }
+
+  /**
+   * Register the {@link DataflowPipelineRunner} and {@link BlockingDataflowPipelineRunner}.
+   */
+  @AutoService(PipelineRunnerRegistrar.class)
+  public static class Runner implements PipelineRunnerRegistrar {
+    @Override
+    public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners() {
+      return ImmutableList.<Class<? extends PipelineRunner<?>>>of(
+          DataflowPipelineRunner.class,
+          BlockingDataflowPipelineRunner.class);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrar.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrar.java
new file mode 100644
index 0000000000000..03be8546f2846
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrar.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import com.google.auto.service.AutoService;
+import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsRegistrar;
+import com.google.common.collect.ImmutableList;
+
+/**
+ * Contains the {@link PipelineOptionsRegistrar} and {@link PipelineRunnerRegistrar} for
+ * the {@link DirectPipeline}.
+ */
+public class DirectPipelineRegistrar {
+  private DirectPipelineRegistrar() { }
+
+  /**
+   * Register the {@link DirectPipelineOptions}.
+   */
+  @AutoService(PipelineRunnerRegistrar.class)
+  public static class Runner implements PipelineRunnerRegistrar {
+    @Override
+    public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners() {
+      return ImmutableList.<Class<? extends PipelineRunner<?>>>of(DirectPipelineRunner.class);
+    }
+  }
+
+  /**
+   * Register the {@link DirectPipelineRunner}.
+   */
+  @AutoService(PipelineOptionsRegistrar.class)
+  public static class Options implements PipelineOptionsRegistrar {
+    @Override
+    public Iterable<Class<? extends PipelineOptions>> getPipelineOptions() {
+      return ImmutableList.<Class<? extends PipelineOptions>>of(DirectPipelineOptions.class);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java
new file mode 100644
index 0000000000000..aea6b5151747c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import com.google.auto.service.AutoService;
+
+import java.util.ServiceLoader;
+
+/**
+ * {@link PipelineRunner} creators have the ability to automatically have their
+ * {@link PipelineRunner} registered with this SDK by creating a {@link ServiceLoader} entry
+ * and a concrete implementation of this interface.
+ * <p>
+ * Note that automatic registration of any {@PipelineOptions} requires users
+ * conform to the limit that each {@link PipelineRunner}'s {@link Class#getSimpleName() simple name}
+ * must be unique.
+ * <p>
+ * It is optional but recommended to use one of the many build time tools such as
+ * {@link AutoService} to generate the necessary META-INF files automatically.
+ */
+public interface PipelineRunnerRegistrar {
+  public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners();
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index 206a13f70d171..872f31dae3554 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -23,6 +23,7 @@
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.testing.RestoreSystemProperties;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
@@ -44,6 +45,17 @@ public class PipelineOptionsFactoryTest {
   @Rule public ExpectedException expectedException = ExpectedException.none();
   @Rule public TestRule restoreSystemProperties = new RestoreSystemProperties();
 
+  @Test
+  public void testAutomaticRegistrationOfPipelineOptions() {
+    assertTrue(PipelineOptionsFactory.getRegisteredOptions().contains(DirectPipelineOptions.class));
+  }
+
+  @Test
+  public void testAutomaticRegistrationOfRunners() {
+    assertEquals(DirectPipelineRunner.class,
+        PipelineOptionsFactory.getRegisteredRunners().get("DirectPipelineRunner"));
+  }
+
   @Test
   public void testCreationFromSystemProperties() {
     System.getProperties().putAll(ImmutableMap
@@ -493,7 +505,7 @@ public void testSettingRunner() {
   public void testSettingUnknownRunner() {
     expectedException.expect(IllegalArgumentException.class);
     expectedException.expectMessage("Unknown 'runner' specified UnknownRunner, supported pipeline "
-        + "runners [DirectPipelineRunner, DataflowPipelineRunner, BlockingDataflowPipelineRunner]");
+        + "runners [BlockingDataflowPipelineRunner, DataflowPipelineRunner, DirectPipelineRunner]");
     String[] args = new String[] {"--runner=UnknownRunner"};
 
     PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRegistrarTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRegistrarTest.java
new file mode 100644
index 0000000000000..13c20d41d6805
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRegistrarTest.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+import com.google.cloud.dataflow.sdk.options.BlockingDataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsRegistrar;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ServiceLoader;
+
+/** Tests for {@link DataflowPipelineRegistrar}. */
+@RunWith(JUnit4.class)
+public class DataflowPipelineRegistrarTest {
+  @Test
+  public void testCorrectOptionsAreReturned() {
+    assertEquals(ImmutableList.of(DataflowPipelineOptions.class,
+                                  BlockingDataflowPipelineOptions.class),
+        new DataflowPipelineRegistrar.Options().getPipelineOptions());
+  }
+
+  @Test
+  public void testCorrectRunnersAreReturned() {
+    assertEquals(ImmutableList.of(DataflowPipelineRunner.class,
+                                  BlockingDataflowPipelineRunner.class),
+        new DataflowPipelineRegistrar.Runner().getPipelineRunners());
+  }
+
+  @Test
+  public void testServiceLoaderForOptions() {
+    for (PipelineOptionsRegistrar registrar :
+        Lists.newArrayList(ServiceLoader.load(PipelineOptionsRegistrar.class).iterator())) {
+      if (registrar instanceof DataflowPipelineRegistrar.Options) {
+        return;
+      }
+    }
+    fail("Expected to find " + DataflowPipelineRegistrar.Options.class);
+  }
+
+  @Test
+  public void testServiceLoaderForRunner() {
+    for (PipelineRunnerRegistrar registrar :
+        Lists.newArrayList(ServiceLoader.load(PipelineRunnerRegistrar.class).iterator())) {
+      if (registrar instanceof DataflowPipelineRegistrar.Runner) {
+        return;
+      }
+    }
+    fail("Expected to find " + DataflowPipelineRegistrar.Runner.class);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrarTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrarTest.java
new file mode 100644
index 0000000000000..6f59ff641d1ac
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrarTest.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsRegistrar;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ServiceLoader;
+
+/** Tests for {@link DirectPipelineRegistrar}. */
+@RunWith(JUnit4.class)
+public class DirectPipelineRegistrarTest {
+  @Test
+  public void testCorrectOptionsAreReturned() {
+    assertEquals(ImmutableList.of(DirectPipelineOptions.class),
+        new DirectPipelineRegistrar.Options().getPipelineOptions());
+  }
+
+  @Test
+  public void testCorrectRunnersAreReturned() {
+    assertEquals(ImmutableList.of(DirectPipelineRunner.class),
+        new DirectPipelineRegistrar.Runner().getPipelineRunners());
+  }
+
+  @Test
+  public void testServiceLoaderForOptions() {
+    for (PipelineOptionsRegistrar registrar :
+        Lists.newArrayList(ServiceLoader.load(PipelineOptionsRegistrar.class).iterator())) {
+      if (registrar instanceof DirectPipelineRegistrar.Options) {
+        return;
+      }
+    }
+    fail("Expected to find " + DirectPipelineRegistrar.Options.class);
+  }
+
+  @Test
+  public void testServiceLoaderForRunner() {
+    for (PipelineRunnerRegistrar registrar :
+        Lists.newArrayList(ServiceLoader.load(PipelineRunnerRegistrar.class).iterator())) {
+      if (registrar instanceof DirectPipelineRegistrar.Runner) {
+        return;
+      }
+    }
+    fail("Expected to find " + DirectPipelineRegistrar.Runner.class);
+  }
+}

From 7c9b182ff58b3c1d390fc4dfbd62b24ed97a31ea Mon Sep 17 00:00:00 2001
From: chernyak <chernyak@google.com>
Date: Wed, 17 Dec 2014 14:18:13 -0800
Subject: [PATCH 0040/1541] Insert reshard for RequiresKeyedState for
 DirectPipelineRunner

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82364001
---
 .../google/cloud/dataflow/sdk/transforms/ParDo.java  | 12 +++++++++++-
 .../cloud/dataflow/sdk/transforms/ParDoTest.java     |  2 +-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 8c9c43fb4e51f..4ddca9ef7dd12 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -1044,7 +1045,16 @@ private static <I, O> DoFnRunner<I, O, List> evaluateHelper(
 
     for (DirectPipelineRunner.ValueWithMetadata<? extends I> elem
              : context.getPCollectionValuesWithMetadata(input)) {
-      executionContext.setKey(elem.getKey());
+      if (doFn instanceof DoFn.RequiresKeyedState) {
+        // If the DoFn needs keyed state, set the implicit keys to the keys in the input elements.
+        if (!(elem.getValue() instanceof KV)) {
+          throw new IllegalStateException(
+              name + " marked as 'RequiresKeyedState' but input elements were not of type KV.");
+        }
+        executionContext.setKey(((KV) elem.getValue()).getKey());
+      } else {
+        executionContext.setKey(elem.getKey());
+      }
       fnRunner.processElement((WindowedValue<I>) elem.getWindowedValue());
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index a4fed2ada20b6..7034d1c53647c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -735,7 +735,7 @@ public void testParDoKeyedStateDoFnWithNonKvInput() {
       fail("should have failed");
     } catch (RuntimeException exn) {
       assertThat(exn.toString(),
-                 containsString("Keyed state is only available"));
+                 containsString("'RequiresKeyedState' but input elements were not of type KV"));
     }
   }
 

From d255b28d7a50f09f5cae09b516d8317456b9adab Mon Sep 17 00:00:00 2001
From: tudorm <tudorm@google.com>
Date: Wed, 17 Dec 2014 22:43:59 -0800
Subject: [PATCH 0041/1541] Do not advance the iteration in getProgress() and
 updateStopPosition(); the two are called from a different thread than the
 read loop, and race with the ValuesIterator when using the same non-thread
 safe shuffle client.

To this end, the GroupingShuffleSourceIterator maintains a monotonically increasing promisePosition and guarantees not to stop before reaching it.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82399182
---
 .../runners/worker/GroupingShuffleSource.java | 72 +++++++------------
 .../worker/GroupingShuffleSourceTest.java     | 38 ++++++----
 2 files changed, 49 insertions(+), 61 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSource.java
index ac2bd5c3a78fe..19ce35800c6ee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSource.java
@@ -20,7 +20,6 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToSourcePosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
-import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudDuration;
 
 import com.google.api.client.util.Preconditions;
 
@@ -43,7 +42,6 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.Source;
 import com.google.cloud.dataflow.sdk.values.KV;
 
-import org.joda.time.Duration;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -158,15 +156,27 @@ private final class GroupingShuffleSourceIterator
      */
     private ByteArrayShufflePosition stopPosition = null;
 
+    /**
+     * Position that this @GroupingShuffleSourceIterator is guaranteed
+     * not to stop before reaching (inclusive); @promisedPosition can
+     * only increase monotonically and is updated when advancing to a
+     * new group of records (either in the most recent call to next()
+     * or when peeked at in hasNext()).
+     */
+    private ByteArrayShufflePosition promisedPosition = null;
+
     /** The next group to be consumed, if available. */
     private KeyGroupedShuffleEntries nextGroup = null;
 
     public GroupingShuffleSourceIterator(ShuffleEntryReader reader) {
+      promisedPosition = ByteArrayShufflePosition.fromBase64(
+          startShufflePosition);
+      if (promisedPosition == null) {
+        promisedPosition = new ByteArrayShufflePosition(new byte[0]);
+      }
       stopPosition = ByteArrayShufflePosition.fromBase64(stopShufflePosition);
-      this.groups =
-          new GroupingShuffleEntryIterator(reader.read(
-              ByteArrayShufflePosition.fromBase64(startShufflePosition),
-              stopPosition)) {
+      this.groups = new GroupingShuffleEntryIterator(reader.read(
+          promisedPosition, stopPosition)) {
           @Override
           protected void notifyElementRead(long byteSize) {
             GroupingShuffleSource.this.notifyElementRead(byteSize);
@@ -177,26 +187,18 @@ protected void notifyElementRead(long byteSize) {
     private void advanceIfNecessary() {
       if (nextGroup == null && groups.hasNext()) {
         nextGroup = groups.next();
+        promisedPosition = ByteArrayShufflePosition.of(nextGroup.position);
       }
     }
 
     @Override
     public boolean hasNext() throws IOException {
-      return hasNextInternal();
-    }
-
-    /**
-     * Returns false if the next group does not exist (i.e., no more
-     * records available) or the group is beyond @stopPosition.
-     */
-    private boolean hasNextInternal() {
       advanceIfNecessary();
       if (nextGroup == null) {
         return false;
       }
-      ByteArrayShufflePosition current =
-          ByteArrayShufflePosition.of(nextGroup.position);
-      return stopPosition == null || current.compareTo(stopPosition) < 0;
+      return stopPosition == null
+          || promisedPosition.compareTo(stopPosition) < 0;
     }
 
     @Override
@@ -223,27 +225,11 @@ public WindowedValue<KV<K, Reiterable<V>>> next() throws IOException {
      */
     @Override
     public Progress getProgress() {
-      com.google.api.services.dataflow.model.Position currentPosition =
+      com.google.api.services.dataflow.model.Position position =
           new com.google.api.services.dataflow.model.Position();
       ApproximateProgress progress = new ApproximateProgress();
-      if (hasNextInternal()) {
-        ByteArrayShufflePosition current =
-            ByteArrayShufflePosition.of(nextGroup.position);
-        currentPosition.setShufflePosition(current.encodeBase64());
-      } else {
-        if (stopPosition != null) {
-          currentPosition.setShufflePosition(stopPosition.encodeBase64());
-        } else {
-          // The original stop position described the end of the
-          // shuffle-position-space (or infinity) and all records have
-          // been consumed.
-          progress.setPercentComplete((float) 1.0);
-          progress.setRemainingTime(toCloudDuration(Duration.ZERO));
-          return cloudProgressToSourceProgress(progress);
-        }
-      }
-
-      progress.setPosition(currentPosition);
+      position.setShufflePosition(promisedPosition.encodeBase64());
+      progress.setPosition(position);
       return cloudProgressToSourceProgress(progress);
     }
 
@@ -272,18 +258,10 @@ public Position updateStopPosition(Progress proposedStopPosition) {
       ByteArrayShufflePosition newStopPosition =
           ByteArrayShufflePosition.fromBase64(stopCloudPosition.getShufflePosition());
 
-      if (!hasNextInternal()) {
-        LOG.warn("Cannot update stop position to "
-            + stopCloudPosition.getShufflePosition()
-            + " since all input was consumed.");
-        return null;
-      }
-      ByteArrayShufflePosition current =
-          ByteArrayShufflePosition.of(nextGroup.position);
-      if (newStopPosition.compareTo(current) <= 0) {
+      if (newStopPosition.compareTo(promisedPosition) <= 0) {
         LOG.warn("Proposed stop position: "
-            + stopCloudPosition.getShufflePosition() + " <= current position: "
-            + current.encodeBase64());
+            + stopCloudPosition.getShufflePosition() + " <= promised position: "
+            + promisedPosition.encodeBase64());
         return null;
       }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceTest.java
index eacb1ef7661b5..df75774d9ccd6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceTest.java
@@ -20,7 +20,6 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
-import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudDuration;
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.api.services.dataflow.model.Position;
@@ -43,7 +42,6 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.collect.Lists;
 
-import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Assert;
 import org.junit.Test;
@@ -265,14 +263,29 @@ public void testReadFromEmptyShuffleSourceAndUpdateStopPosition()
     try (Source.SourceIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
         shuffleSource.iterator(shuffleReader)) {
 
+
+      // Can update the stop position, the source range spans all interval
       Position proposedStopPosition = new Position();
       String stop = encodeBase64URLSafeString(fabricatePosition(0, null));
       proposedStopPosition.setShufflePosition(stop);
 
-      // Cannot update stop position since all input was consumed.
+      Assert.assertEquals(
+          stop,
+          sourcePositionToCloudPosition(
+              iter.updateStopPosition(
+                  cloudProgressToSourceProgress(
+                      createApproximateProgress(proposedStopPosition))))
+          .getShufflePosition());
+
+
+      // Cannot update stop position to a position >= the current stop position
+      stop = encodeBase64URLSafeString(fabricatePosition(1, null));
+      proposedStopPosition.setShufflePosition(stop);
+
       Assert.assertEquals(null, iter.updateStopPosition(
-          cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
-      }
+          cloudProgressToSourceProgress(
+              createApproximateProgress(proposedStopPosition))));
+    }
   }
 
   @Test
@@ -430,10 +443,6 @@ public void testReadFromShuffleSourceAndUpdateStopPosition()
         Assert.assertEquals(j, 1);
         ++i;
       }
-
-      ApproximateProgress progress =
-          sourceProgressToCloudProgress(iter.getProgress());
-      Assert.assertEquals(stop, progress.getPosition().getShufflePosition());
     }
     Assert.assertEquals(i, kNumRecords);
   }
@@ -484,11 +493,12 @@ public void testGetApproximateProgress() throws Exception {
       }
       Assert.assertFalse(sourceIterator.hasNext());
 
-      ApproximateProgress finalProgress =
-          sourceProgressToCloudProgress(sourceIterator.getProgress());
-      Assert.assertEquals(1.0,
-          (float) finalProgress.getPercentComplete(), 0.000000001);
-      Assert.assertEquals(Duration.ZERO, fromCloudDuration(finalProgress.getRemainingTime()));
+      // Cannot update stop position since all input was consumed.
+      Position proposedStopPosition = new Position();
+      String stop = encodeBase64URLSafeString(fabricatePosition(0, null));
+      proposedStopPosition.setShufflePosition(stop);
+      Assert.assertEquals(null, sourceIterator.updateStopPosition(
+          cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
     }
   }
 

From e8c361484b5c6793550f894c97b7a6bb2806408e Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 18 Dec 2014 08:51:33 -0800
Subject: [PATCH 0042/1541] Infer coders lazily, when requested via getCoder().
 This makes impossible a current problem where coder inference fails before a
 user has a change to set the coder explicitly.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82434275
---
 .../dataflow/sdk/values/TypedPValue.java      | 11 ++--
 .../dataflow/sdk/transforms/ParDoTest.java    | 52 ++++++++++++++++++-
 2 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
index 95b9b45f53770..2bde2121a9fe2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
@@ -41,10 +41,7 @@ public abstract class TypedPValue<T> extends PValueBase implements PValue {
    */
   public Coder<T> getCoder() {
     if (coder == null) {
-      throw new IllegalStateException(
-          "coder for " + this + " not set, and couldn't be inferred; "
-          + "either register a default Coder for its element type, "
-          + "or use setCoder() to specify one explicitly");
+      inferCoderOrFail();
     }
     return coder;
   }
@@ -131,15 +128,13 @@ public TypedPValue<T> setTypeTokenInternal(TypeToken<T> typeToken) {
     return this;
   }
 
-
   /**
    * If the coder is not explicitly set, this sets the coder for
    * this {@code TypedPValue<T>} to the best coder that can be inferred
    * based upon the known {@code TypeToken<T>}. By default, this is null,
    * but can and should be improved by subclasses.
    */
-  @Override
-  public void finishSpecifyingOutput() {
+  private void inferCoderOrFail() {
     if (coder == null) {
       TypeToken<T> token = getTypeToken();
       CoderRegistry registry = getProducingTransformInternal()
@@ -161,7 +156,7 @@ public void finishSpecifyingOutput() {
             + "or use setCoder() to specify one explicitly. "
             + "If a default coder is registered, it may not be found "
             + "due to type erasure; again, use setCoder() to specify "
-            + "a Coder explicitly");
+            + "a Coder explicitly.");
       }
     }
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 7034d1c53647c..7baf0daa951e0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -27,6 +27,7 @@
 import static org.hamcrest.core.IsEqual.equalTo;
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
@@ -974,9 +975,15 @@ public void testSideOutputUnregisteredExplicitCoder() {
     PCollectionTuple outputTuple = input.apply(ParDo.of(new SideOutputDummyFn(sideTag))
         .withOutputTags(mainTag, TupleTagList.of(sideTag)));
 
-    outputTuple.get(sideTag)
-        .setCoder(new TestDummyCoder());
+    assertNull(pipeline.getCoderRegistry().getDefaultCoder(TestDummy.class));
+
+    outputTuple.get(sideTag).setCoder(new TestDummyCoder());
 
+    outputTuple.get(sideTag).apply(View.<TestDummy>asSingleton());
+
+    assertEquals(new TestDummyCoder(), outputTuple.get(sideTag).getCoder());
+    outputTuple.get(sideTag).finishSpecifyingOutput(); // Check for crashes
+    assertEquals(new TestDummyCoder(), outputTuple.get(sideTag).getCoder()); // Check for corruption
     pipeline.run();
   }
 
@@ -997,6 +1004,47 @@ public void testMainOutputUnregisteredExplicitCoder() {
     pipeline.run();
   }
 
+  @Test
+  public void testMainOutputApplySideOutputNoCoder() {
+    // Regression test: applying a transform to the main output
+    // should not cause a crash based on lack of a coder for the
+    // side output.
+
+    Pipeline pipeline = TestPipeline.create();
+    final TupleTag<TestDummy> mainOutputTag = new TupleTag<TestDummy>();
+    final TupleTag<TestDummy> sideOutputTag = new TupleTag<TestDummy>();
+    PCollectionTuple tuple = pipeline
+        .apply(Create.of(new TestDummy()))
+        .setCoder(TestDummyCoder.of())
+        .apply(ParDo
+            .withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag))
+            .of(
+                new DoFn<TestDummy, TestDummy>() {
+                  @Override public void processElement(ProcessContext context) {
+                    TestDummy element = context.element();
+                    context.output(element);
+                    context.sideOutput(sideOutputTag, element);
+                  }
+                })
+    );
+
+    // Before fix, tuple.get(mainOutputTag).apply(...) would indirectly trigger
+    // tuple.get(sideOutputTag).finishSpecifyingOutput() which would crash
+    // on a missing coder.
+    PCollection<Integer> foo = tuple
+        .get(mainOutputTag)
+        .setCoder(TestDummyCoder.of())
+        .apply(ParDo.of(new DoFn<TestDummy, Integer>() {
+          public void processElement(ProcessContext context) {
+            context.output(1);
+          }
+        }));
+
+    tuple.get(sideOutputTag).setCoder(TestDummyCoder.of());
+
+    pipeline.run();
+  }
+
   @Test
   public void testParDoOutputWithTimestamp() {
     Pipeline p = TestPipeline.create();

From f94256324259d5af106bd8281cfea039213ef072 Mon Sep 17 00:00:00 2001
From: jlewi <jlewi@google.com>
Date: Thu, 18 Dec 2014 16:34:46 -0800
Subject: [PATCH 0043/1541] Add a teardown policy flag to the SDK.

This is very useful for troubleshooting because it ensures VM's stay alive for manual inspection.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82474620
---
 .../DataflowPipelineWorkerPoolOptions.java    | 27 +++++++++++++++++++
 .../runners/DataflowPipelineTranslator.java   |  3 +++
 2 files changed, 30 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index da0a6e588605f..a9be13a16df4a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -129,6 +129,33 @@ public String getApiServiceName() {
   String getMachineType();
   void setMachineType(String value);
 
+  /**
+   * The policy for tearing down the workers spun up by the service.
+   */
+  public enum TeardownPolicy {
+    TEARDOWN_ALWAYS("TEARDOWN_ALWAYS"),
+    TEARDOWN_NEVER("TEARDOWN_NEVER");
+
+    private final String teardownPolicy;
+
+    private TeardownPolicy(String teardownPolicy) {
+      this.teardownPolicy = teardownPolicy;
+    }
+
+    public String getTeardownPolicyName() {
+      return this.teardownPolicy;
+    }
+  }
+
+  /**
+   * Teardown policy for the VMs.
+   *
+   * <p> By default this is left unset and the service sets the default policy.
+   */
+  @Description("The teardown policy for the VMs.")
+  TeardownPolicy getTeardownPolicy();
+  void setTeardownPolicy(TeardownPolicy value);
+
   /**
    * List of local files to make available to workers.
    * <p>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 52305ccd54966..2fe5d1d8d147e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -348,6 +348,9 @@ public Job translate(List<DataflowPackage> packages) {
       WorkerPool workerPool = new WorkerPool();
 
       workerPool.setKind(HARNESS_WORKER_POOL);
+      if (options.getTeardownPolicy() != null) {
+        workerPool.setTeardownPolicy(options.getTeardownPolicy().getTeardownPolicyName());
+      }
 
       // Pass the URL and endpoint to use to the worker pool.
       WorkerSettings workerSettings = new WorkerSettings();

From 623b6b781f95de7b24972c5c22efaca79e5a0f89 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Fri, 19 Dec 2014 10:42:41 -0800
Subject: [PATCH 0044/1541] * Raise logging level of non-retryable HTTP request
 URLs to warning. * RetryHttpRequestInitializer is configurable with a set of
 HTTP codes to skip logging. * In case of GCS, skip logging for 404 because
 it's a normal situation during file staging, and if it's not normal in some
 other use case, the caller should be able to just handle the exception.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82526469
---
 .../sdk/util/RetryHttpRequestInitializer.java | 23 ++++++++++++-------
 .../cloud/dataflow/sdk/util/Transport.java    |  8 ++++++-
 .../util/RetryHttpRequestInitializerTest.java |  3 ++-
 3 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
index c673b10204bc1..f5e660fa156bc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
@@ -32,6 +32,8 @@
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.Set;
 
@@ -52,7 +54,7 @@ public class RetryHttpRequestInitializer implements HttpRequestInitializer {
   /**
    * Http response codes that should be silently ignored.
    */
-  private static final Set<Integer> IGNORED_RESPONSE_CODES = new HashSet<>(
+  private static final Set<Integer> DEFAULT_IGNORED_RESPONSE_CODES = new HashSet<>(
       Arrays.asList(307 /* Redirect, handled by Apiary client */,
                     308 /* Resume Incomplete, handled by Apiary client */));
 
@@ -74,7 +76,7 @@ public boolean handleIOException(HttpRequest request, boolean supportsRetry)
       if (willRetry) {
         LOG.debug("Request failed with IOException, will retry: {}", request.getUrl());
       } else {
-        LOG.debug("Request failed with IOException, will NOT retry: {}", request.getUrl());
+        LOG.warn("Request failed with IOException, will NOT retry: {}", request.getUrl());
       }
       return willRetry;
     }
@@ -83,9 +85,11 @@ public boolean handleIOException(HttpRequest request, boolean supportsRetry)
   private static class LoggingHttpBackoffUnsuccessfulResponseHandler
       implements HttpUnsuccessfulResponseHandler {
     private final HttpBackOffUnsuccessfulResponseHandler handler;
+    private final Set<Integer> ignoredResponseCodes;
 
     public LoggingHttpBackoffUnsuccessfulResponseHandler(BackOff backoff,
-        Sleeper sleeper) {
+        Sleeper sleeper, Set<Integer> ignoredResponseCodes) {
+      this.ignoredResponseCodes = ignoredResponseCodes;
       handler = new HttpBackOffUnsuccessfulResponseHandler(backoff);
       handler.setSleeper(sleeper);
       handler.setBackOffRequired(
@@ -107,8 +111,8 @@ public boolean handleResponse(HttpRequest request, HttpResponse response,
         LOG.debug("Request failed with code {} will retry: {}",
             response.getStatusCode(), request.getUrl());
 
-      } else if (!IGNORED_RESPONSE_CODES.contains(response.getStatusCode())) {
-        LOG.debug("Request failed with code {}, will NOT retry: {}",
+      } else if (!ignoredResponseCodes.contains(response.getStatusCode())) {
+        LOG.warn("Request failed with code {}, will NOT retry: {}",
             response.getStatusCode(), request.getUrl());
       }
 
@@ -122,19 +126,22 @@ public boolean handleResponse(HttpRequest request, HttpResponse response,
 
   private final Sleeper sleeper;  // used for testing
 
+  private Set<Integer> ignoredResponseCodes = new HashSet<>(DEFAULT_IGNORED_RESPONSE_CODES);
+
   /**
    * @param chained a downstream HttpRequestInitializer, which will also be
    *                applied to HttpRequest initialization.  May be null.
    */
   public RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained) {
-    this(chained, NanoClock.SYSTEM, Sleeper.DEFAULT);
+    this(chained, NanoClock.SYSTEM, Sleeper.DEFAULT, Collections.<Integer>emptyList());
   }
 
   public RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained,
-      NanoClock nanoClock, Sleeper sleeper) {
+      NanoClock nanoClock, Sleeper sleeper, Collection<Integer> ignoredResponseCodes) {
     this.chained = chained;
     this.nanoClock = nanoClock;
     this.sleeper = sleeper;
+    this.ignoredResponseCodes = new HashSet<>(ignoredResponseCodes);
   }
 
   @Override
@@ -155,7 +162,7 @@ public void initialize(HttpRequest request) throws IOException {
         new LoggingHttpBackoffUnsuccessfulResponseHandler(
             new ExponentialBackOff.Builder().setNanoClock(nanoClock)
                                             .setMultiplier(2).build(),
-            sleeper));
+            sleeper, ignoredResponseCodes));
 
     // Retry immediately on IOExceptions.
     LoggingHttpBackOffIOExceptionHandler loggingBackoffHandler =
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
index e27f7fcc4f885..d1accb2ac191f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
@@ -20,6 +20,8 @@
 import com.google.api.client.http.HttpTransport;
 import com.google.api.client.json.JsonFactory;
 import com.google.api.client.json.jackson2.JacksonFactory;
+import com.google.api.client.util.NanoClock;
+import com.google.api.client.util.Sleeper;
 import com.google.api.services.bigquery.Bigquery;
 import com.google.api.services.dataflow.Dataflow;
 import com.google.api.services.pubsub.Pubsub;
@@ -34,6 +36,7 @@
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.security.GeneralSecurityException;
+import java.util.Arrays;
 
 /**
  * Helpers for cloud communication.
@@ -135,7 +138,10 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
   public static Storage.Builder
       newStorageClient(GcsOptions options) {
     return new Storage.Builder(getTransport(), getJsonFactory(),
-        new RetryHttpRequestInitializer(options.getGcpCredential()))
+        new RetryHttpRequestInitializer(
+            // Do not log the code 404. Code up the stack will deal with 404's if needed, and
+            // logging it by default clutters the output during file staging.
+            options.getGcpCredential(), NanoClock.SYSTEM, Sleeper.DEFAULT, Arrays.asList(404)))
         .setApplicationName(options.getAppName());
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializerTest.java
index 45924560630ba..09998a36a2fe6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializerTest.java
@@ -53,6 +53,7 @@
 
 import java.io.IOException;
 import java.security.PrivateKey;
+import java.util.Arrays;
 
 /**
  * Tests for RetryHttpRequestInitializer.
@@ -99,7 +100,7 @@ protected LowLevelHttpRequest buildRequest(String method, String url)
         mockCredential, new MockNanoClock(), new Sleeper() {
           @Override
           public void sleep(long millis) throws InterruptedException {}
-        });
+        }, Arrays.asList(418 /* I'm a teapot */));
     storage = new Storage.Builder(lowLevelTransport, jsonFactory, initializer)
         .setApplicationName("test").build();
   }

From c0ea51817e673b47d06653dbcc5c27563bc0d667 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 19 Dec 2014 11:10:22 -0800
Subject: [PATCH 0045/1541] Add DoFn.Context.sideOutputWithTimestamp

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82528619
---
 .../cloud/dataflow/sdk/transforms/DoFn.java   | 24 +++++++++++++-
 .../dataflow/sdk/transforms/RateLimiting.java |  7 +++++
 .../cloud/dataflow/sdk/util/DoFnContext.java  |  5 +++
 .../dataflow/sdk/util/DoFnProcessContext.java | 18 +++++++----
 .../dataflow/sdk/transforms/ParDoTest.java    | 31 +++++++++++++++++++
 5 files changed, 78 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index c18a646e4278a..d6057d5b29c65 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -128,7 +128,29 @@ public abstract class Context {
      */
     public abstract <T> void sideOutput(TupleTag<T> tag, T output);
 
-    // TODO: add sideOutputWithTimestamp[AndWindows]
+    /**
+     * Adds the given element to the specified side output {@code PCollection},
+     * with the given timestamp.
+     *
+     * <p> If invoked from {@link DoFn#processElement}), the timestamp
+     * must not be older than the input element's timestamp minus
+     * {@link DoFn#getAllowedTimestampSkew}.  The output element will
+     * be in the same windows as the input element.
+     *
+     * <p> Is is illegal to invoke this from {@link #startBundle} or
+     * {@link #finishBundle} unless the input {@code PCollection} is
+     * windowed by the
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow}.
+     * If this is the case, the output element's timestamp will be
+     * the given timestamp and its window will be the
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow}.
+     *
+     * @throws IllegalArgumentException if the number of outputs exceeds
+     * the limit of 1,000 outputs per DoFn
+     * @see ParDo#withOutputTags
+     */
+    public abstract <T> void sideOutputWithTimestamp(
+        TupleTag<T> tag, T output, Instant timestamp);
 
     /**
      * Returns an aggregator with aggregation logic specified by the CombineFn
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
index fd32303613bcb..d5f177ac83c8e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
@@ -296,6 +296,13 @@ public <T> void sideOutput(TupleTag<T> tag, T output) {
         }
       }
 
+      @Override
+      public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
+        synchronized (RateLimitingDoFn.this) {
+          context.sideOutputWithTimestamp(tag, output, timestamp);
+        }
+      }
+
       @Override
       public <AI, AA, AO> Aggregator<AI> createAggregator(
           String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
index 08d53a7d464b6..722a0227d6ca4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
@@ -174,6 +174,11 @@ public <T> void sideOutput(TupleTag<T> tag, T output) {
                             Arrays.asList(GlobalWindow.Window.INSTANCE));
   }
 
+  @Override
+  public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
+    sideOutputWindowedValue(tag, output, timestamp, Arrays.asList(GlobalWindow.Window.INSTANCE));
+  }
+
   private String generateInternalAggregatorName(String userName) {
     return "user-" + stepContext.getStepName() + "-" + userName;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
index d393e6f0b8b66..b6faaa4e8631a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
@@ -88,12 +88,7 @@ public void output(O output) {
 
   @Override
   public void outputWithTimestamp(O output, Instant timestamp) {
-    Instant originalTimestamp = windowedValue.getTimestamp();
-
-    if (originalTimestamp != null) {
-      Preconditions.checkArgument(
-          !timestamp.isBefore(originalTimestamp.minus(fn.getAllowedTimestampSkew())));
-    }
+    checkTimestamp(timestamp);
     context.outputWindowedValue(output, timestamp, windowedValue.getWindows());
   }
 
@@ -112,6 +107,12 @@ public <T> void sideOutput(TupleTag<T> tag, T output) {
                                     windowedValue.getWindows());
   }
 
+  @Override
+  public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
+    checkTimestamp(timestamp);
+    context.sideOutputWindowedValue(tag, output, timestamp, windowedValue.getWindows());
+  }
+
   @Override
   public <AI, AA, AO> Aggregator<AI> createAggregator(
       String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
@@ -133,4 +134,9 @@ public Instant timestamp() {
   public Collection<? extends BoundedWindow> windows() {
     return windowedValue.getWindows();
   }
+
+  private void checkTimestamp(Instant timestamp) {
+    Preconditions.checkArgument(
+        !timestamp.isBefore(windowedValue.getTimestamp().minus(fn.getAllowedTimestampSkew())));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 7baf0daa951e0..163426b1c4c85 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -1066,6 +1066,37 @@ public void testParDoOutputWithTimestamp() {
     p.run();
   }
 
+  @Test
+  public void testParDoSideOutputWithTimestamp() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Integer> input =
+        createInts(p, Arrays.asList(3, 42, 6)).setOrdered(true);
+
+    final TupleTag<Integer> mainTag = new TupleTag<Integer>(){};
+    final TupleTag<Integer> sideTag = new TupleTag<Integer>(){};
+
+    PCollection<String> output =
+        input
+        .apply(ParDo.withOutputTags(mainTag, TupleTagList.of(sideTag)).of(
+            new DoFn<Integer, Integer>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                c.sideOutputWithTimestamp(
+                    sideTag, c.element(), new Instant(c.element().longValue()));
+              }
+            })).get(sideTag)
+        .apply(ParDo.of(new TestShiftTimestampDoFn(Duration.ZERO, Duration.ZERO)))
+        .apply(ParDo.of(new TestFormatTimestampDoFn()));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+                   "processing: 3, timestamp: 3",
+                   "processing: 42, timestamp: 42",
+                   "processing: 6, timestamp: 6");
+
+    p.run();
+  }
+
   @Test
   public void testParDoShiftTimestamp() {
     Pipeline p = TestPipeline.create();

From df1e1afcae2536a1ea9d0b5b4cef51b5ba98d498 Mon Sep 17 00:00:00 2001
From: malo <malo@google.com>
Date: Fri, 19 Dec 2014 13:13:59 -0800
Subject: [PATCH 0046/1541] Set the sdk major_version to 0. [] -------------
 Created by MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=82537631

---
 .../cloud/dataflow/sdk/runners/DataflowPipelineRunner.java    | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index a2574774d3315..1a0d873307375 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -196,9 +196,7 @@ public DataflowPipelineJob run(Pipeline pipeline) {
 
     // Requirements about the service.
     Map<String, Object> environmentVersion = new HashMap<>();
-    // TODO: Specify the environment major version.
-    // environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_MAJOR_KEY,
-    // ENVIRONMENT_MAJOR_VERSION);
+    environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_MAJOR_KEY, ENVIRONMENT_MAJOR_VERSION);
     newJob.getEnvironment().setVersion(environmentVersion);
     // Default jobType is DATA_PARALLEL which is for java batch.
     String jobType = "DATA_PARALLEL";

From 3be5a2ab0c81f13c4d086ce3058b65272d070f07 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Fri, 19 Dec 2014 13:48:27 -0800
Subject: [PATCH 0047/1541] Avoids creating a thread in ReadOperation if
 progress estimation is not enabled or is continuous. [] ------------- Created
 by MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=82540119

---
 .../sdk/util/common/worker/ReadOperation.java | 25 ++++++++++---------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index 1930e0e61aaad..1d4e5ec9bef5b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -134,20 +134,21 @@ protected void runReadLoop() throws Exception {
       }
 
       // TODO: Consider using the ExecutorService from PipelineOptions instead.
-      Thread updateRequester = new Thread() {
-        @Override
-        public void run() {
-          while (true) {
-            isProgressUpdateRequested.set(true);
-            try {
-              Thread.sleep(progressUpdatePeriodMs);
-            } catch (InterruptedException e) {
-              break;
+      Thread updateRequester = null;
+      if (progressUpdatePeriodMs != 0) {
+        updateRequester = new Thread() {
+          @Override
+          public void run() {
+            while (true) {
+              isProgressUpdateRequested.set(true);
+              try {
+                Thread.sleep(progressUpdatePeriodMs);
+              } catch (InterruptedException e) {
+                break;
+              }
             }
           }
-        }
-      };
-      if (progressUpdatePeriodMs != 0) {
+        };
         updateRequester.start();
       }
 

From 69b6df51a8e532dc233bc840a4ddd55acc584d82 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Fri, 19 Dec 2014 14:04:45 -0800
Subject: [PATCH 0048/1541] Cleanup: tests UngroupedShuffleSource,
 GroupingShuffleSource APIs with corresponding ShuffleSinks, and avoids tests
 implementation details about data encoding into shuffle.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82541343
---
 .../worker/GroupingShuffleSourceTest.java     | 97 ++++++++++++-------
 .../worker/UngroupedShuffleSourceTest.java    | 49 ++++++----
 2 files changed, 93 insertions(+), 53 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceTest.java
index df75774d9ccd6..21ecb0af04f2c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceTest.java
@@ -25,7 +25,6 @@
 import com.google.api.services.dataflow.model.Position;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
@@ -37,6 +36,7 @@
 import com.google.cloud.dataflow.sdk.util.common.Reiterable;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.util.common.worker.Source;
 import com.google.cloud.dataflow.sdk.util.common.worker.Source.SourceIterator;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -89,52 +89,81 @@ private enum ValuesToRead {
   private void runTestReadShuffleSource(List<KV<Integer, List<String>>> input,
                                 ValuesToRead valuesToRead)
       throws Exception {
-    Coder<WindowedValue<String>> elemCoder =
-        WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder());
+    Coder<WindowedValue<KV<Integer, String>>> sinkElemCoder =
+        WindowedValue.getFullCoder(
+            KvCoder.of(BigEndianIntegerCoder.of(),
+                       StringUtf8Coder.of()),
+            IntervalWindow.getCoder());
+
+    Coder<WindowedValue<KV<Integer, Iterable<String>>>> sourceElemCoder =
+        WindowedValue.getFullCoder(
+            KvCoder.of(
+                BigEndianIntegerCoder.of(),
+                IterableCoder.of(StringUtf8Coder.of())),
+            IntervalWindow.getCoder());
+
+    // Write to shuffle with GROUP_KEYS ShuffleSink.
+    ShuffleSink<KV<Integer, String>> shuffleSink = new ShuffleSink<>(
+        PipelineOptionsFactory.create(),
+        null, ShuffleSink.ShuffleKind.GROUP_KEYS,
+        sinkElemCoder);
+
+    TestShuffleWriter shuffleWriter = new TestShuffleWriter();
+
+    int kvCount = 0;
+    List<Long> actualSizes = new ArrayList<>();
+    try (Sink.SinkWriter<WindowedValue<KV<Integer, String>>> shuffleSinkWriter =
+             shuffleSink.writer(shuffleWriter)) {
+      for (KV<Integer, List<String>> kvs : input) {
+        Integer key = kvs.getKey();
+        for (String value : kvs.getValue()) {
+          ++kvCount;
+          actualSizes.add(shuffleSinkWriter.add(
+              WindowedValue.of(KV.of(key, value),
+                               timestamp,
+                               Lists.newArrayList(window))));
+        }
+      }
+    }
+    List<ShuffleEntry> records = shuffleWriter.getRecords();
+    Assert.assertEquals(kvCount, records.size());
+    Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
+
+    // Read from shuffle with GroupingShuffleSource.
     BatchModeExecutionContext context = new BatchModeExecutionContext();
-    GroupingShuffleSource<Integer, WindowedValue<String>> shuffleSource =
+    GroupingShuffleSource<Integer, String> shuffleSource =
         new GroupingShuffleSource<>(
             PipelineOptionsFactory.create(),
             null, null, null,
-            WindowedValue.getFullCoder(
-                KvCoder.of(
-                    BigEndianIntegerCoder.of(),
-                    IterableCoder.of(
-                        WindowedValue.getFullCoder(StringUtf8Coder.of(),
-                        IntervalWindow.getCoder()))),
-                IntervalWindow.getCoder()),
+            sourceElemCoder,
             context);
     ExecutorTestUtils.TestSourceObserver observer =
         new ExecutorTestUtils.TestSourceObserver(shuffleSource);
 
     TestShuffleReader shuffleReader = new TestShuffleReader();
     List<Integer> expectedSizes = new ArrayList<>();
-    for (KV<Integer, List<String>> kvs : input) {
-      Integer key = kvs.getKey();
-      byte[] keyByte = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), key);
-
-      for (String value : kvs.getValue()) {
-        byte[] valueByte = CoderUtils.encodeToByteArray(
-            elemCoder, WindowedValue.of(value, timestamp, Lists.newArrayList(window)));
-        byte[] skey =  CoderUtils.encodeToByteArray(InstantCoder.of(), timestamp);
-        ShuffleEntry shuffleEntry = new ShuffleEntry(keyByte, skey, valueByte);
-        shuffleReader.addEntry(shuffleEntry);
-        expectedSizes.add(shuffleEntry.length());
-      }
+    for (ShuffleEntry record : records) {
+      expectedSizes.add(record.length());
+      shuffleReader.addEntry(record);
     }
 
-    List<KV<Integer, List<WindowedValue<String>>>> actual = new ArrayList<>();
-    try (SourceIterator<WindowedValue<KV<Integer, Reiterable<WindowedValue<String>>>>> iter =
+    List<KV<Integer, List<String>>> actual = new ArrayList<>();
+    try (SourceIterator<WindowedValue<KV<Integer, Reiterable<String>>>> iter =
              shuffleSource.iterator(shuffleReader)) {
-      Iterable<WindowedValue<String>> prevValuesIterable = null;
-      Iterator<WindowedValue<String>> prevValuesIterator = null;
+      Iterable<String> prevValuesIterable = null;
+      Iterator<String> prevValuesIterator = null;
       while (iter.hasNext()) {
         Assert.assertTrue(iter.hasNext());
         Assert.assertTrue(iter.hasNext());
 
-        KV<Integer, Reiterable<WindowedValue<String>>> elem = iter.next().getValue();
+        WindowedValue<KV<Integer, Reiterable<String>>> windowedValue = iter.next();
+        // Verify value is in an empty windows.
+        Assert.assertEquals(Long.MIN_VALUE, windowedValue.getTimestamp().getMillis());
+        Assert.assertEquals(0, windowedValue.getWindows().size());
+
+        KV<Integer, Reiterable<String>> elem = windowedValue.getValue();
         Integer key = elem.getKey();
-        List<WindowedValue<String>> values = new ArrayList<>();
+        List<String> values = new ArrayList<>();
         if (valuesToRead.ordinal() > ValuesToRead.SKIP_VALUES.ordinal()) {
           if (prevValuesIterable != null) {
             prevValuesIterable.iterator();  // Verifies that this does not throw.
@@ -143,8 +172,8 @@ private void runTestReadShuffleSource(List<KV<Integer, List<String>>> input,
             prevValuesIterator.hasNext();  // Verifies that this does not throw.
           }
 
-          Iterable<WindowedValue<String>> valuesIterable = elem.getValue();
-          Iterator<WindowedValue<String>> valuesIterator = valuesIterable.iterator();
+          Iterable<String> valuesIterable = elem.getValue();
+          Iterator<String> valuesIterator = valuesIterable.iterator();
 
           if (valuesToRead.ordinal() >= ValuesToRead.READ_ONE_VALUE.ordinal()) {
             while (valuesIterator.hasNext()) {
@@ -187,13 +216,13 @@ private void runTestReadShuffleSource(List<KV<Integer, List<String>>> input,
       }
     }
 
-    List<KV<Integer, List<WindowedValue<String>>>> expected = new ArrayList<>();
+    List<KV<Integer, List<String>>> expected = new ArrayList<>();
     for (KV<Integer, List<String>> kvs : input) {
       Integer key = kvs.getKey();
-      List<WindowedValue<String>> values = new ArrayList<>();
+      List<String> values = new ArrayList<>();
       if (valuesToRead.ordinal() >= ValuesToRead.READ_ONE_VALUE.ordinal()) {
         for (String value : kvs.getValue()) {
-          values.add(WindowedValue.of(value, timestamp, Lists.newArrayList(window)));
+          values.add(value);
           if (valuesToRead == ValuesToRead.READ_ONE_VALUE) {
             break;
           }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceTest.java
index bf9a15c05b343..75d9803ed7eed 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceTest.java
@@ -18,13 +18,13 @@
 
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.util.common.worker.Source;
 import com.google.common.collect.Lists;
 
@@ -46,35 +46,44 @@ public class UngroupedShuffleSourceTest {
   private static final Instant timestamp = new Instant(123000);
   private static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
 
-  private byte[] asShuffleKey(long seqNum) throws Exception {
-    return CoderUtils.encodeToByteArray(BigEndianLongCoder.of(), seqNum);
-  }
+  void runTestReadShuffleSource(List<Integer> expected) throws Exception {
+    Coder<WindowedValue<Integer>> elemCoder =
+        WindowedValue.getFullCoder(BigEndianIntegerCoder.of(), IntervalWindow.getCoder());
 
-  private byte[] asShuffleValue(Integer value) throws Exception {
-    return CoderUtils.encodeToByteArray(
-        WindowedValue.getFullCoder(BigEndianIntegerCoder.of(), IntervalWindow.getCoder()),
-        WindowedValue.of(value, timestamp, Lists.newArrayList(window)));
-  }
+    // Write to shuffle with UNGROUPED ShuffleSink.
+    ShuffleSink<Integer> shuffleSink = new ShuffleSink<>(
+        PipelineOptionsFactory.create(),
+        null, ShuffleSink.ShuffleKind.UNGROUPED,
+        elemCoder);
 
-  private void runTestReadShuffleSource(List<Integer> expected) throws Exception {
+    TestShuffleWriter shuffleWriter = new TestShuffleWriter();
+
+    List<Long> actualSizes = new ArrayList<>();
+    try (Sink.SinkWriter<WindowedValue<Integer>> shuffleSinkWriter =
+             shuffleSink.writer(shuffleWriter)) {
+      for (Integer value : expected) {
+        actualSizes.add(shuffleSinkWriter.add(
+            WindowedValue.of(value, timestamp, Lists.newArrayList(window))));
+      }
+    }
+    List<ShuffleEntry> records = shuffleWriter.getRecords();
+    Assert.assertEquals(expected.size(), records.size());
+    Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
+
+    // Read from shuffle with UngroupedShuffleSource.
     UngroupedShuffleSource<WindowedValue<Integer>> shuffleSource =
         new UngroupedShuffleSource<>(
             PipelineOptionsFactory.create(),
             null, null, null,
-            WindowedValue.getFullCoder(BigEndianIntegerCoder.of(), IntervalWindow.getCoder()));
+            elemCoder);
     ExecutorTestUtils.TestSourceObserver observer =
         new ExecutorTestUtils.TestSourceObserver(shuffleSource);
 
     TestShuffleReader shuffleReader = new TestShuffleReader();
     List<Integer> expectedSizes = new ArrayList<>();
-    long seqNum = 0;
-    for (Integer value : expected) {
-      byte[] shuffleKey = asShuffleKey(seqNum++);
-      byte[] shuffleValue = asShuffleValue(value);
-      shuffleReader.addEntry(shuffleKey, shuffleValue);
-
-      ShuffleEntry record = new ShuffleEntry(shuffleKey, null, shuffleValue);
+    for (ShuffleEntry record : records) {
       expectedSizes.add(record.length());
+      shuffleReader.addEntry(record);
     }
 
     List<Integer> actual = new ArrayList<>();
@@ -84,6 +93,8 @@ private void runTestReadShuffleSource(List<Integer> expected) throws Exception {
         Assert.assertTrue(iter.hasNext());
         Assert.assertTrue(iter.hasNext());
         WindowedValue<Integer> elem = iter.next();
+        Assert.assertEquals(timestamp, elem.getTimestamp());
+        Assert.assertEquals(Lists.newArrayList(window), elem.getWindows());
         actual.add(elem.getValue());
       }
       Assert.assertFalse(iter.hasNext());

From 6aaf644fca4c6f7eb4b40c2adc396050539a3ab5 Mon Sep 17 00:00:00 2001
From: earhart <earhart@google.com>
Date: Fri, 19 Dec 2014 15:33:34 -0800
Subject: [PATCH 0049/1541] Quieting a few more warnings. [] -------------
 Created by MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=82547955

---
 .../sdk/coders/SerializableCoder.java         |  1 +
 .../runners/DataflowPipelineTranslator.java   |  6 ++++-
 .../sdk/runners/DirectPipelineRunner.java     |  1 +
 .../runners/dataflow/AvroIOTranslator.java    |  2 ++
 .../runners/dataflow/TextIOTranslator.java    |  2 ++
 .../runners/worker/AssignWindowsParDoFn.java  |  1 +
 .../sdk/runners/worker/AvroSinkFactory.java   |  1 +
 .../sdk/runners/worker/AvroSourceFactory.java |  1 +
 .../sdk/runners/worker/CombineValuesFn.java   | 12 +++++-----
 .../sdk/runners/worker/NormalParDoFn.java     | 14 +++++++-----
 .../worker/PartitioningShuffleSource.java     |  2 +-
 .../worker/SourceOperationExecutor.java       |  1 +
 .../sdk/transforms/ApproximateQuantiles.java  |  1 +
 .../cloud/dataflow/sdk/transforms/Create.java |  7 ++++++
 .../dataflow/sdk/transforms/GroupByKey.java   |  2 +-
 .../sdk/transforms/RemoveDuplicates.java      |  1 +
 .../sdk/transforms/join/UnionCoder.java       |  2 ++
 .../dataflow/sdk/util/AbstractWindowSet.java  |  1 +
 .../dataflow/sdk/util/AssignWindowsDoFn.java  |  2 ++
 .../sdk/util/BatchModeExecutionContext.java   |  1 +
 .../dataflow/sdk/util/BufferingWindowSet.java |  2 +-
 .../cloud/dataflow/sdk/util/DoFnContext.java  |  5 +++--
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |  1 +
 .../cloud/dataflow/sdk/util/PTuple.java       |  1 +
 .../cloud/dataflow/sdk/util/Serializer.java   |  1 +
 .../sdk/util/common/PeekingReiterator.java    |  2 +-
 .../worker/GroupingShuffleEntryIterator.java  |  2 +-
 .../util/common/worker/OutputReceiver.java    | 10 ++++-----
 .../sdk/util/common/worker/ReadOperation.java |  3 +++
 .../sdk/util/common/worker/WorkExecutor.java  |  1 +
 .../util/common/worker/WriteOperation.java    | 15 ++++++++-----
 .../dataflow/sdk/values/CodedTupleTag.java    |  2 +-
 .../dataflow/sdk/values/CodedTupleTagMap.java |  1 +
 .../dataflow/sdk/values/PCollection.java      |  6 ++---
 .../dataflow/sdk/values/PCollectionTuple.java |  2 +-
 .../dataflow/sdk/values/TimestampedValue.java |  1 +
 .../sdk/coders/ByteArrayCoderTest.java        |  9 +++++---
 .../cloud/dataflow/sdk/io/AvroIOTest.java     |  1 +
 .../cloud/dataflow/sdk/io/TextIOTest.java     |  5 +++--
 .../options/PipelineOptionsFactoryTest.java   |  1 +
 .../BlockingDataflowPipelineRunnerTest.java   |  4 ++--
 .../DataflowPipelineTranslatorTest.java       |  1 +
 .../sdk/runners/PipelineRunnerTest.java       |  4 ++--
 .../runners/worker/AvroSinkFactoryTest.java   |  2 +-
 .../runners/worker/AvroSourceFactoryTest.java |  1 +
 .../runners/worker/CombineValuesFnTest.java   |  1 +
 .../DataflowWorkProgressUpdaterTest.java      |  1 +
 .../worker/InMemorySourceFactoryTest.java     |  2 +-
 .../runners/worker/ParDoFnFactoryTest.java    |  2 +-
 .../worker/ShuffleSinkFactoryTest.java        |  1 +
 .../worker/ShuffleSourceFactoryTest.java      |  1 +
 .../runners/worker/TextSinkFactoryTest.java   |  2 +-
 .../runners/worker/TextSourceFactoryTest.java |  2 +-
 .../sdk/runners/worker/TextSourceTest.java    | 22 ++++++++++++-------
 .../transforms/ApproximateQuantilesTest.java  |  2 +-
 .../dataflow/sdk/transforms/CombineTest.java  |  9 ++++++--
 .../dataflow/sdk/transforms/CountTest.java    |  5 +++--
 .../dataflow/sdk/transforms/CreateTest.java   |  2 +-
 .../sdk/transforms/GroupByKeyTest.java        |  2 +-
 .../dataflow/sdk/transforms/KeysTest.java     |  2 ++
 .../dataflow/sdk/transforms/KvSwapTest.java   |  1 +
 .../dataflow/sdk/transforms/ParDoTest.java    |  2 +-
 .../dataflow/sdk/transforms/SampleTest.java   |  3 +++
 .../sdk/transforms/SimpleStatsFnsTest.java    |  3 +++
 .../dataflow/sdk/transforms/TopTest.java      |  4 ++--
 .../dataflow/sdk/transforms/ValuesTest.java   |  2 ++
 .../sdk/transforms/join/UnionCoderTest.java   |  2 +-
 .../transforms/windowing/WindowingTest.java   |  2 +-
 .../dataflow/sdk/util/AggregatorImplTest.java |  1 +
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  |  1 +
 .../sdk/util/InstanceBuilderTest.java         |  1 +
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  1 +
 .../dataflow/sdk/util/common/CounterTest.java |  1 +
 .../sdk/util/common/CounterTestUtils.java     |  6 ++---
 .../util/common/worker/ExecutorTestUtils.java |  1 +
 .../common/worker/FlattenOperationTest.java   |  1 +
 .../common/worker/MapTaskExecutorTest.java    |  1 +
 .../common/worker/OutputReceiverTest.java     |  5 +++--
 .../common/worker/ParDoOperationTest.java     |  1 +
 .../PartialGroupByKeyOperationTest.java       |  1 +
 .../util/common/worker/ReadOperationTest.java |  1 +
 .../util/common/worker/StateSamplerTest.java  |  5 +++++
 .../common/worker/WriteOperationTest.java     |  1 +
 83 files changed, 172 insertions(+), 75 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
index 8e58b4d96e291..303370aac0e51 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
@@ -57,6 +57,7 @@ public static <T extends Serializable> SerializableCoder<T> of(Class<T> type) {
   }
 
   @JsonCreator
+  @SuppressWarnings("unchecked")
   public static SerializableCoder<?> of(@JsonProperty("type") String classType)
       throws ClassNotFoundException {
     Class<?> clazz = Class.forName(classType);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 2fe5d1d8d147e..aff881ddfe999 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -100,6 +100,7 @@
  * DataflowPipelineTranslator knows how to translate Pipeline objects
  * into Dataflow API Jobs.
  */
+@SuppressWarnings({"rawtypes", "unchecked"})
 public class DataflowPipelineTranslator {
   // Must be kept in sync with their internal counterparts.
   public static final String HARNESS_WORKER_POOL = "harness";
@@ -495,7 +496,10 @@ public void addStep(PTransform<?, ? extends PValue> transform, Step original) {
         @Nullable List<Map<String, Object>> outputInfoList = null;
         try {
           // TODO: This should be done via a Structs accessor.
-          outputInfoList = (List<Map<String, Object>>) properties.get(PropertyNames.OUTPUT_INFO);
+          @SuppressWarnings("unchecked")
+          @Nullable List<Map<String, Object>> list =
+              (List<Map<String, Object>>) properties.get(PropertyNames.OUTPUT_INFO);
+          outputInfoList = list;
         } catch (Exception e) {
           throw new RuntimeException("Inconsistent dataflow pipeline translation", e);
         }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 2b0ca03beaab0..c71eaf8434d3c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -67,6 +67,7 @@
  *
  * <p> Throws an exception from {@link #run} if execution fails.
  */
+@SuppressWarnings({"rawtypes", "unchecked"})
 public class DirectPipelineRunner
     extends PipelineRunner<DirectPipelineRunner.EvaluationResults> {
   private static final Logger LOG = LoggerFactory.getLogger(DirectPipelineRunner.class);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
index d7e36c54fc050..f191b25ba86ee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
@@ -34,6 +34,7 @@ public class AvroIOTranslator {
   /**
    * Implements AvroIO Read translation for the Dataflow backend.
    */
+  @SuppressWarnings("rawtypes")
   public static class ReadTranslator implements TransformTranslator<AvroIO.Read.Bound> {
 
     @Override
@@ -63,6 +64,7 @@ private <T> void translateReadHelper(
   /**
    * Implements AvroIO Write translation for the Dataflow backend.
    */
+  @SuppressWarnings("rawtypes")
   public static class WriteTranslator implements TransformTranslator<AvroIO.Write.Bound> {
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
index 0a2f1f8a32571..ee70949d5954c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
@@ -34,6 +34,7 @@ public class TextIOTranslator {
   /**
    * Implements TextIO Read translation for the Dataflow backend.
    */
+  @SuppressWarnings({"rawtypes", "unchecked"})
   public static class ReadTranslator implements TransformTranslator<TextIO.Read.Bound> {
     @Override
     public void translate(
@@ -75,6 +76,7 @@ private <T> void translateReadHelper(
   /**
    * Implements TextIO Write translation for the Dataflow backend.
    */
+  @SuppressWarnings({"rawtypes", "unchecked"})
   public static class WriteTranslator implements TransformTranslator<TextIO.Write.Bound> {
     @Override
     public void translate(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
index f1ae7f11b9374..00558a306ead6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
@@ -41,6 +41,7 @@
  * A wrapper around an AssignWindowsDoFn.  This class is the same as
  * NormalParDoFn, except that it gets deserialized differently.
  */
+@SuppressWarnings({"rawtypes", "unchecked"})
 class AssignWindowsParDoFn extends NormalParDoFn {
   public static AssignWindowsParDoFn create(
       PipelineOptions options,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
index 9a20d17aee220..f1c96dcf478c1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
@@ -30,6 +30,7 @@
 /**
  * Creates an AvroSink from a CloudObject spec.
  */
+@SuppressWarnings("rawtypes")
 public final class AvroSinkFactory {
   // Do not instantiate.
   private AvroSinkFactory() {}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactory.java
index 329d8b66e2ee1..740f94965dd80 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactory.java
@@ -31,6 +31,7 @@
 /**
  * Creates an AvroSource from a CloudObject spec.
  */
+@SuppressWarnings("rawtypes")
 public class AvroSourceFactory {
   // Do not instantiate.
   private AvroSourceFactory() {}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
index 761aaa950dba4..63c4089f616a2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
@@ -42,7 +42,7 @@
 /**
  * A wrapper around a decoded user value combining function.
  */
-@SuppressWarnings("serial")
+@SuppressWarnings({"rawtypes", "serial", "unchecked"})
 public class CombineValuesFn extends NormalParDoFn {
   /**
    * The optimizer may split run the user combiner in 3 separate
@@ -141,8 +141,8 @@ private CombineValuesDoFn(
 
     @Override
     public void processElement(ProcessContext c) {
-      KV<K, Iterable<VI>> kv = (KV<K, Iterable<VI>>) c.element();
-      K key = (K) kv.getKey();
+      KV<K, Iterable<VI>> kv = c.element();
+      K key = kv.getKey();
 
       c.output(KV.of(key, this.combineFn.apply(key, kv.getValue())));
     }
@@ -162,7 +162,7 @@ private AddInputsDoFn(
 
     @Override
     public void processElement(ProcessContext c) {
-      KV<K, Iterable<VI>> kv = (KV<K, Iterable<VI>>) c.element();
+      KV<K, Iterable<VI>> kv = c.element();
       K key = kv.getKey();
       VA accum = this.combineFn.createAccumulator(key);
       for (VI input : kv.getValue()) {
@@ -187,7 +187,7 @@ private MergeAccumulatorsDoFn(
 
     @Override
     public void processElement(ProcessContext c) {
-      KV<K, Iterable<VA>> kv = (KV<K, Iterable<VA>>) c.element();
+      KV<K, Iterable<VA>> kv = c.element();
       K key = kv.getKey();
       VA accum = this.combineFn.mergeAccumulators(key, kv.getValue());
 
@@ -209,7 +209,7 @@ private ExtractOutputDoFn(
 
     @Override
     public void processElement(ProcessContext c) {
-      KV<K, VA> kv = (KV<K, VA>) c.element();
+      KV<K, VA> kv = c.element();
       K key = kv.getKey();
       VO output = this.combineFn.extractOutput(key, kv.getValue());
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
index c6e5f9f163e35..79d729b974d87 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
@@ -48,6 +48,7 @@
 /**
  * A wrapper around a decoded user DoFn.
  */
+@SuppressWarnings({"rawtypes", "unchecked"})
 public class NormalParDoFn extends ParDoFn {
   public static NormalParDoFn create(
       PipelineOptions options,
@@ -67,14 +68,14 @@ public static NormalParDoFn create(
     if (!(deserializedFn instanceof DoFn)) {
       throw new Exception("unexpected kind of DoFn: " + deserializedFn.getClass().getName());
     }
-    DoFn fn = (DoFn) deserializedFn;
+    DoFn<Object, Object> fn = (DoFn<Object, Object>) deserializedFn;
 
     PTuple sideInputValues = PTuple.empty();
     if (sideInputInfos != null) {
       for (SideInputInfo sideInputInfo : sideInputInfos) {
         Object sideInputValue = SideInputUtils.readSideInput(
             options, sideInputInfo, executionContext);
-        TupleTag tag = new TupleTag(sideInputInfo.getTag());
+        TupleTag<Object> tag = new TupleTag<>(sideInputInfo.getTag());
         sideInputValues = sideInputValues.and(tag, sideInputValue);
       }
     }
@@ -119,16 +120,16 @@ public NormalParDoFn(PipelineOptions options,
                        ExecutionContext executionContext,
                        CounterSet.AddCounterMutator addCounterMutator) {
     this.options = options;
-    this.fn = fn;
+    this.fn = (DoFn<Object, Object>) fn;
     this.sideInputValues = sideInputValues;
     if (outputTags.size() < 1) {
       throw new AssertionError("expected at least one output");
     }
-    this.mainOutputTag = new TupleTag(outputTags.get(0));
+    this.mainOutputTag = new TupleTag<>(outputTags.get(0));
     this.sideOutputTags = new ArrayList<>();
     if (outputTags.size() > 1) {
       for (String tag : outputTags.subList(1, outputTags.size())) {
-        this.sideOutputTags.add(new TupleTag(tag));
+        this.sideOutputTags.add(new TupleTag<Object>(tag));
       }
     }
     this.stepName = stepName;
@@ -202,8 +203,9 @@ public void output(Receiver receiver, WindowedValue<?> output) {
   }
 
   @Override
+  @SuppressWarnings("unchecked")
   public void processElement(Object elem) throws Exception {
-    fnRunner.processElement((WindowedValue) elem);
+    fnRunner.processElement((WindowedValue<Object>) elem);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSource.java
index 5394a26cc47fc..e1401b6cd83ad 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSource.java
@@ -75,7 +75,7 @@ private void initCoder(Coder<WindowedValue<KV<K, V>>> coder) throws Exception {
           "unexpected kind of coder for elements read from "
           + "a key-partitioning shuffle: " + elemCoder);
     }
-    KvCoder<K, V> kvCoder = (KvCoder) elemCoder;
+    KvCoder<K, V> kvCoder = (KvCoder<K, V>) elemCoder;
     this.keyCoder = kvCoder.getKeyCoder();
     windowedValueCoder = windowedElemCoder.withValueCoder(kvCoder.getValueCoder());
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
index 2db18b2724740..34bd27966bfc7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
@@ -32,6 +32,7 @@
 /**
  * An executor for a source operation, defined by a {@code SourceOperationRequest}.
  */
+@SuppressWarnings("resource")
 public class SourceOperationExecutor extends WorkExecutor {
   private static final Logger LOG = LoggerFactory.getLogger(MapTaskExecutor.class);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
index a2f0094cbb463..5702583ebe65c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
@@ -206,6 +206,7 @@ PTransform<PCollection<T>, PCollection<List<T>>> globally(int numQuantiles) {
    *
    * @param <T> the type of the values being combined
    */
+  @SuppressWarnings("serial")
   public static class ApproximateQuantilesCombineFn
       <T, C extends Comparator<T> & Serializable>
       extends AccumulatingCombineFn
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index 0ff9deb039bc9..d28e6a28193f4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -77,6 +77,7 @@
  *
  * @param <T> the type of the elements of the resulting {@code PCollection}
  */
+@SuppressWarnings("serial")
 public class Create<T> extends PTransform<PInput, PCollection<T>> {
 
   /**
@@ -111,6 +112,7 @@ public static <T> Create<T> of(Iterable<T> elems) {
    * {@link Coder} specified explicitly, via a call to
    * {@link PCollection#setCoder}.
    */
+  @SafeVarargs
   public static <T> Create<T> of(T... elems) {
     return of(Arrays.asList(elems));
   }
@@ -287,6 +289,11 @@ public void processElement(ProcessContext c) {
   /////////////////////////////////////////////////////////////////////////////
 
   static {
+    registerDefaultTransformEvaluator();
+  }
+
+  @SuppressWarnings({"rawtypes", "unchecked"})
+  private static void registerDefaultTransformEvaluator() {
     DirectPipelineRunner.registerDefaultTransformEvaluator(
         Create.class,
         new DirectPipelineRunner.TransformEvaluator<Create>() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index e8acdbad5be39..c6d0acf4094d4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -375,7 +375,7 @@ boolean sortsValues() {
     registerWithDirectPipelineRunner();
   }
 
-  @SuppressWarnings("rawtypes")
+  @SuppressWarnings({"rawtypes", "unchecked"})
   private static <K, V> void registerWithDirectPipelineRunner() {
     DirectPipelineRunner.registerDefaultTransformEvaluator(
         GroupByKeyOnly.class,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
index 0e4f21f75b781..9540eda6da7d1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
@@ -52,6 +52,7 @@
  * @param <T> the type of the elements of the input and output
  * {@code PCollection}s
  */
+@SuppressWarnings("serial")
 public class RemoveDuplicates<T> extends PTransform<PCollection<T>,
                                                     PCollection<T>> {
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
index f0987dfaffee5..f3d128c67b9e1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
@@ -110,6 +110,7 @@ public List<? extends Coder<?>> getComponents() {
   @Override
   public boolean isRegisterByteSizeObserverCheap(RawUnionValue union, Context context) {
     int index = getIndexForEncoding(union);
+    @SuppressWarnings("unchecked")
     Coder<Object> coder = (Coder<Object>) elementCoders.get(index);
     return coder.isRegisterByteSizeObserverCheap(union.getValue(), context);
   }
@@ -125,6 +126,7 @@ public void registerByteSizeObserver(
     // Write out the union tag.
     observer.update(VarInt.getLength(index));
     // Write out the actual value.
+    @SuppressWarnings("unchecked")
     Coder<Object> coder = (Coder<Object>) elementCoders.get(index);
     coder.registerByteSizeObserver(union.getValue(), observer, context);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
index dda2488dac346..fa54eee605b9c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
@@ -59,6 +59,7 @@ static class WindowMergeContext<T, W extends BoundedWindow>
       extends WindowingFn<T, W>.MergeContext {
     private final AbstractWindowSet<?, ?, ?, W> windowSet;
 
+    @SuppressWarnings("unchecked")
     public WindowMergeContext(
         AbstractWindowSet<?, ?, ?, W> windowSet,
         WindowingFn<?, W> windowingFn) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
index 7649a8c637248..a0b91b53037d9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
@@ -30,6 +30,7 @@
  * @param <T> Type of elements being windowed
  * @param <W> Window type
  */
+@SuppressWarnings("serial")
 public class AssignWindowsDoFn<T, W extends BoundedWindow> extends DoFn<T, T> {
   private WindowingFn<? super T, W> fn;
 
@@ -38,6 +39,7 @@ public AssignWindowsDoFn(WindowingFn<? super T, W> fn) {
   }
 
   @Override
+  @SuppressWarnings("unchecked")
   public void processElement(ProcessContext c) throws Exception {
     final DoFnProcessContext<T, T> context = (DoFnProcessContext<T, T>) c;
     Collection<W> windows =
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
index 2d42407c94377..fb5277189f2ba 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
@@ -142,6 +142,7 @@ public <T> void deleteTagList(CodedTupleTag<T> tag) {
     }
 
     @Override
+    @SuppressWarnings("unchecked")
     public <T> Iterable<T> readTagList(CodedTupleTag<T> tag) {
       Map<CodedTupleTag<?>, List<Object>> perKeyTagLists = tagLists.get(getKey());
       if (perKeyTagLists == null || perKeyTagLists.get(tag) == null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
index 4801d6d64c3c3..957434c154421 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
@@ -186,7 +186,7 @@ private static <W> Map<W, Set<W>> emptyIfNull(Map<W, Set<W>> input) {
   private Map<W, Set<W>> deepCopy(Map<W, Set<W>> mergeTree) {
     Map<W, Set<W>> newMergeTree = new HashMap<>();
     for (Map.Entry<W, Set<W>> entry : mergeTree.entrySet()) {
-      newMergeTree.put(entry.getKey(), new HashSet(entry.getValue()));
+      newMergeTree.put(entry.getKey(), new HashSet<W>(entry.getValue()));
     }
     return newMergeTree;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
index 722a0227d6ca4..7eca7d5f510bf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
@@ -73,14 +73,14 @@ public DoFnContext(PipelineOptions options,
     this.mainOutputTag = mainOutputTag;
     this.outputMap = new HashMap<>();
     outputMap.put(mainOutputTag, outputManager.initialize(mainOutputTag));
-    for (TupleTag sideOutputTag : sideOutputTags) {
+    for (TupleTag<?> sideOutputTag : sideOutputTags) {
       outputMap.put(sideOutputTag, outputManager.initialize(sideOutputTag));
     }
     this.stepContext = stepContext;
     this.addCounterMutator = addCounterMutator;
   }
 
-  public R getReceiver(TupleTag tag) {
+  public R getReceiver(TupleTag<?> tag) {
     R receiver = outputMap.get(tag);
     if (receiver == null) {
       throw new IllegalArgumentException(
@@ -97,6 +97,7 @@ public PipelineOptions getPipelineOptions() {
   }
 
   @Override
+  @SuppressWarnings("unchecked")
   public <T> T sideInput(PCollectionView<T, ?> view) {
     TupleTag<?> tag = view.getTagInternal();
     if (!sideInputs.has(tag)) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 3a6d4af1c1a92..d4d788d89af7f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -79,6 +79,7 @@ public static <I, O, R> DoFnRunner<I, O, R> create(
         mainOutputTag, sideOutputTags, stepContext, addCounterMutator);
   }
 
+  @SuppressWarnings({"rawtypes", "unchecked"})
   public static <I, O> DoFnRunner<I, O, List> createWithListOutputs(
       PipelineOptions options,
       DoFn<I, O> fn,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java
index 98fe4606807a5..879693584ae56 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java
@@ -132,6 +132,7 @@ public <V> V get(TupleTag<V> tag) {
 
   private final Map<TupleTag<?>, ?> valueMap;
 
+  @SuppressWarnings("rawtypes")
   private PTuple() {
     this(new LinkedHashMap());
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java
index 42071ec467ee3..68ada31d981ce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java
@@ -119,6 +119,7 @@ public static <T> T deserialize(Map<String, Object> serialized, Class<T> clazz)
    * URI value from the {@link CloudKnownType} enum.  Upon finding this type
    * information, it converts it into the correspondingly typed Java value.
    */
+  @SuppressWarnings("unchecked")
   private static Object deserializeCloudKnownTypes(Object src) {
     if (src instanceof Map) {
       Map<String, Object> srcMap = (Map<String, Object>) src;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/PeekingReiterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/PeekingReiterator.java
index d139380c65c12..8789c4e84c8df 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/PeekingReiterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/PeekingReiterator.java
@@ -69,7 +69,7 @@ public void remove() {
 
   @Override
   public PeekingReiterator<T> copy() {
-    return new PeekingReiterator(this);
+    return new PeekingReiterator<>(this);
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java
index 19428201f0395..4dc9a1e1a8f16 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java
@@ -61,7 +61,7 @@ public abstract class GroupingShuffleEntryIterator
   public GroupingShuffleEntryIterator(
       Reiterator<ShuffleEntry> shuffleIterator) {
     this.shuffleIterator =
-        new PeekingReiterator(
+        new PeekingReiterator<>(
             new ProgressTrackingReiterator<>(
                 shuffleIterator,
                 new ProgressTrackerGroup<ShuffleEntry>() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiver.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiver.java
index a13b74afbf8b3..d758bbd298d9f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiver.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiver.java
@@ -37,7 +37,7 @@ public class OutputReceiver implements Receiver {
   private final String outputName;
   // Might be null, e.g., undeclared outputs will not have an
   // elementByteSizeObservable.
-  private final ElementByteSizeObservable elementByteSizeObservable;
+  private final ElementByteSizeObservable<Object> elementByteSizeObservable;
   private final Counter<Long> elementCount;
   private Counter<Long> byteCount = null;
   private Counter<Long> meanByteCount = null;
@@ -52,16 +52,16 @@ public class OutputReceiver implements Receiver {
   public OutputReceiver(String outputName,
                         String counterPrefix,
                         CounterSet.AddCounterMutator addCounterMutator) {
-    this(outputName, (ElementByteSizeObservable) null,
-         counterPrefix, addCounterMutator);
+    this(outputName, null, counterPrefix, addCounterMutator);
   }
 
+  @SuppressWarnings("unchecked")
   public OutputReceiver(String outputName,
-                        ElementByteSizeObservable elementByteSizeObservable,
+                        ElementByteSizeObservable<?> elementByteSizeObservable,
                         String counterPrefix,
                         CounterSet.AddCounterMutator addCounterMutator) {
     this.outputName = outputName;
-    this.elementByteSizeObservable = elementByteSizeObservable;
+    this.elementByteSizeObservable = (ElementByteSizeObservable<Object>) elementByteSizeObservable;
 
     elementCount = addCounterMutator.addCounter(
         Counter.longs(elementsCounterName(counterPrefix, outputName), SUM));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index 1d4e5ec9bef5b..f4caef51d0d1e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -114,6 +114,7 @@ public Source<?> getSource() {
   @Override
   public void start() throws Exception {
     try (StateSampler.ScopedState start = stateSampler.scopedState(startState)) {
+      assert start != null;
       super.start();
       runReadLoop();
     }
@@ -129,6 +130,7 @@ protected void runReadLoop() throws Exception {
     source.addObserver(new SourceObserver());
 
     try (StateSampler.ScopedState process = stateSampler.scopedState(processState)) {
+      assert process != null;
       synchronized (sourceIteratorLock) {
         sourceIterator = source.iterator();
       }
@@ -162,6 +164,7 @@ public void run() {
           // Stop position update request comes concurrently.
           // Accesses to iterator need to be synchronized.
           try (StateSampler.ScopedState read = stateSampler.scopedState(readState)) {
+            assert read != null;
             synchronized (sourceIteratorLock) {
               if (!sourceIterator.hasNext()) {
                 break;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
index 63270b682ebce..c4a603f70caad 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
@@ -30,6 +30,7 @@
 /**
  * Abstract executor for WorkItem tasks.
  */
+@SuppressWarnings("resource")
 public abstract class WorkExecutor implements AutoCloseable {
   /** The output counters for this task. */
   private final CounterSet outputCounters;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
index 6f8b2e586548f..3965e0d0a19c5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
@@ -28,7 +28,7 @@ public class WriteOperation extends ReceivingOperation {
   /**
    * The Sink this operation writes to.
    */
-  public final Sink sink;
+  public final Sink<?> sink;
 
   /**
    * The total byte counter for all data written by this operation.
@@ -38,10 +38,10 @@ public class WriteOperation extends ReceivingOperation {
   /**
    * The Sink's writer this operation writes to, created by start().
    */
-  Sink.SinkWriter writer;
+  Sink.SinkWriter<Object> writer;
 
   public WriteOperation(String operationName,
-                        Sink sink,
+                        Sink<?> sink,
                         OutputReceiver[] receivers,
                         String counterPrefix,
                         CounterSet.AddCounterMutator addCounterMutator,
@@ -54,7 +54,7 @@ public WriteOperation(String operationName,
   }
 
   /** Invoked by tests. */
-  public WriteOperation(Sink sink,
+  public WriteOperation(Sink<?> sink,
                         String counterPrefix,
                         CounterSet.AddCounterMutator addCounterMutator,
                         StateSampler stateSampler) {
@@ -67,7 +67,7 @@ protected String bytesCounterName(String counterPrefix,
     return operationName + "-ByteCount";
   }
 
-  public Sink getSink() {
+  public Sink<?> getSink() {
     return sink;
   }
 
@@ -75,8 +75,9 @@ public Sink getSink() {
   public void start() throws Exception {
     try (StateSampler.ScopedState start =
         stateSampler.scopedState(startState)) {
+      assert start != null;
       super.start();
-      writer = sink.writer();
+      writer = (Sink.SinkWriter<Object>) sink.writer();
     }
   }
 
@@ -84,6 +85,7 @@ public void start() throws Exception {
   public void process(Object outputElem) throws Exception {
     try (StateSampler.ScopedState process =
         stateSampler.scopedState(processState)) {
+      assert process != null;
       checkStarted();
       byteCount.addValue(writer.add(outputElem));
     }
@@ -93,6 +95,7 @@ public void process(Object outputElem) throws Exception {
   public void finish() throws Exception {
     try (StateSampler.ScopedState finish =
         stateSampler.scopedState(finishState)) {
+      assert finish != null;
       checkStarted();
       writer.close();
       super.finish();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
index 770d460249339..ffbf9c577ce53 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
@@ -46,7 +46,7 @@ public class CodedTupleTag<T> extends TupleTag<T> {
    * @param <T> the type of the values associated with the tag
    */
   public static <T> CodedTupleTag<T> of(String id, Coder<T> coder) {
-    return new CodedTupleTag(id, coder);
+    return new CodedTupleTag<>(id, coder);
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java
index 6f96c694ea2e6..a30d3feeeb612 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java
@@ -45,6 +45,7 @@ public static CodedTupleTagMap of(Map<CodedTupleTag<?>, Object> map) {
    * {@code CodedTupleTagMap}, or {@code null} if the tag has no
    * asssociated value.
    */
+  @SuppressWarnings("unchecked")
   public <T> T get(CodedTupleTag<T> tag) {
     return (T) map.get(tag);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
index 34ad2163a8a1e..fa09bd09106af 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
@@ -191,7 +191,7 @@ public <Output extends POutput> Output apply(
    *
    * <p> By default, no merging is performed.
    */
-  private WindowingFn windowingFn;
+  private WindowingFn<?, ?> windowingFn;
 
   private PCollection() {}
 
@@ -212,7 +212,7 @@ public PCollection<T> setTypeTokenInternal(TypeToken<T> typeToken) {
    *
    * <p> For use by primitive transformations only.
    */
-  public PCollection<T> setWindowingFnInternal(WindowingFn windowingFn) {
+  public PCollection<T> setWindowingFnInternal(WindowingFn<?, ?> windowingFn) {
      this.windowingFn = windowingFn;
      return this;
   }
@@ -234,7 +234,7 @@ public PCollection<T> setPipelineInternal(Pipeline pipeline) {
    * <p> For use by primitive transformations only.
    */
   public static <T> PCollection<T> createPrimitiveOutputInternal(
-      WindowingFn windowingFn) {
+      WindowingFn<?, ?> windowingFn) {
     return new PCollection<T>().setWindowingFnInternal(windowingFn);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
index fecc175f4d3cc..349ec2cc750cc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
@@ -183,7 +183,7 @@ public <Output extends POutput> Output apply(
    * <p> For use by primitive transformations only.
    */
   public static PCollectionTuple ofPrimitiveOutputsInternal(
-      TupleTagList outputTags, WindowingFn windowingFn) {
+      TupleTagList outputTags, WindowingFn<?, ?> windowingFn) {
     Map<TupleTag<?>, PCollection<?>> pcollectionMap = new LinkedHashMap<>();
     for (TupleTag<?> outputTag : outputTags.tupleTags) {
       if (pcollectionMap.containsKey(outputTag)) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
index 9d91a18cb3cf0..ffa0202e15b2c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
@@ -65,6 +65,7 @@ public Instant getTimestamp() {
   /**
    * Coder for {@code TimestampedValue}.
    */
+  @SuppressWarnings("serial")
   public static class TimestampedValueCoder<T>
       extends StandardCoder<TimestampedValue<T>> {
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
index b6d2b3c657d04..8f500a1655994 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
@@ -32,7 +32,8 @@
 /** Unit tests for {@link ByteArrayCoder}. */
 @RunWith(JUnit4.class)
 public class ByteArrayCoderTest {
-  @Test public void testOuterContext() throws CoderException, IOException {
+  @Test
+  public void testOuterContext() throws CoderException, IOException {
     byte[] buffer = {0xa, 0xb, 0xc};
 
     ByteArrayOutputStream os = new ByteArrayOutputStream();
@@ -44,7 +45,8 @@ public class ByteArrayCoderTest {
     assertThat(decoded, equalTo(buffer));
   }
 
-  @Test public void testNestedContext() throws CoderException, IOException {
+  @Test
+  public void testNestedContext() throws CoderException, IOException {
     byte[][] buffers = {{0xa, 0xb, 0xc}, {}, {}, {0xd, 0xe}, {}};
 
     ByteArrayOutputStream os = new ByteArrayOutputStream();
@@ -60,7 +62,8 @@ public class ByteArrayCoderTest {
     }
   }
 
-  @Test public void testRegisterByteSizeObserver() throws Exception {
+  @Test
+  public void testRegisterByteSizeObserver() throws Exception {
     CounterTestUtils.testByteCount(ByteArrayCoder.of(), Coder.Context.OUTER,
                                    new byte[][]{{ 0xa, 0xb, 0xc }});
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
index ad6f16567e92c..1d45f4ab9386c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
@@ -249,6 +249,7 @@ <T> void runTestWrite(AvroIO.Write.Bound<T> write, String expectedName)
     User[] users = generateAvroObjects();
 
     DirectPipeline p = DirectPipeline.createForTest();
+    @SuppressWarnings("unchecked")
     PCollection<T> input = p.apply(Create.of(Arrays.asList((T[]) users)))
                             .setCoder((Coder<T>) AvroCoder.of(User.class));
     PDone output = input.apply(write.withoutSharding());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
index 31bb18cd955f5..46bdd5944a969 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -69,6 +69,7 @@
  * Tests for TextIO Read and Write transforms.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("unchecked")
 public class TextIOTest {
 
   @Rule
@@ -152,7 +153,7 @@ <T> void runTestRead(T[] expected, Coder<T> coder) throws Exception {
     if (coder.equals(StringUtf8Coder.of())) {
       TextIO.Read.Bound<String> readStrings = TextIO.Read.from(filename);
       // T==String
-      read = (TextIO.Read.Bound) readStrings;
+      read = (TextIO.Read.Bound<T>) readStrings;
     } else {
       read = TextIO.Read.from(filename).withCoder(coder);
     }
@@ -222,7 +223,7 @@ <T> void runTestWrite(T[] elems, Coder<T> coder) throws Exception {
       TextIO.Write.Bound<String> writeStrings =
           TextIO.Write.to(filename).withoutSharding();
       // T==String
-      write = (TextIO.Write.Bound) writeStrings;
+      write = (TextIO.Write.Bound<T>) writeStrings;
     } else {
       write = TextIO.Write.to(filename).withCoder(coder).withoutSharding();
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index 872f31dae3554..d5fb27d9203c1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -380,6 +380,7 @@ public static interface Arrays extends PipelineOptions {
   }
 
   @Test
+  @SuppressWarnings("rawtypes")
   public void testArrays() {
     String[] args = new String[] {
         "--boolean=true",
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
index 398326e8a385e..8ab8de6dbfe12 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
@@ -49,10 +49,10 @@ public class BlockingDataflowPipelineRunnerTest {
   // This class mocks a call to DataflowPipelineJob.waitToFinish():
   //    it blocks the thread to simulate waiting,
   //    and releases the blocking once signaled
-  static class MockWaitToFinish implements Answer {
+  static class MockWaitToFinish implements Answer<JobState> {
     NotificationHelper jobCompleted = new NotificationHelper();
 
-    public Object answer(InvocationOnMock invocation) throws InterruptedException {
+    public JobState answer(InvocationOnMock invocation) throws InterruptedException {
       System.out.println("MockWaitToFinish.answer(): Wait for signaling job completion.");
       assertTrue("Test did not receive mock job completion signal",
           jobCompleted.waitTillSet(10000));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 12649de8acfca..3eb90edbf78e6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -239,6 +239,7 @@ public void testPredefinedAddStep() throws Exception {
    */
   private static OutputReference getOutputPortReference(Step step) throws Exception {
     // TODO: This should be done via a Structs accessor.
+    @SuppressWarnings("unchecked")
     List<Map<String, Object>> output =
         (List<Map<String, Object>>) step.getProperties().get(PropertyNames.OUTPUT_INFO);
     String outputTagId = getString(Iterables.getOnlyElement(output), PropertyNames.OUTPUT_NAME);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerTest.java
index 520e03e28b9dd..238e1844ca16a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerTest.java
@@ -58,7 +58,7 @@ public void testLongName() throws IOException {
     options.setGcsUtil(mockGcsUtil);
     options.setRunner(DirectPipelineRunner.class);
     options.setGcpCredential(new TestCredential());
-    PipelineRunner runner = PipelineRunner.fromOptions(options);
+    PipelineRunner<?> runner = PipelineRunner.fromOptions(options);
     assertTrue(runner instanceof DirectPipelineRunner);
   }
 
@@ -71,7 +71,7 @@ public void testShortName() throws IOException {
     options.setGcsUtil(mockGcsUtil);
     options.setRunner(DirectPipelineRunner.class);
     options.setGcpCredential(new TestCredential());
-    PipelineRunner runner = PipelineRunner.fromOptions(options);
+    PipelineRunner<?> runner = PipelineRunner.fromOptions(options);
     assertTrue(runner instanceof DirectPipelineRunner);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
index 79653feabc4ce..c55be77cbceba 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
@@ -76,7 +76,7 @@ public void testCreateAvroSink() throws Exception {
     Sink<?> sink = runTestCreateAvroSink(pathToAvroFile, coder.asCloudObject());
 
     Assert.assertThat(sink, new IsInstanceOf(AvroSink.class));
-    AvroSink avroSink = (AvroSink) sink;
+    AvroSink<?> avroSink = (AvroSink<?>) sink;
     Assert.assertEquals(pathToAvroFile, avroSink.filenamePrefix);
     Assert.assertEquals(coder.getValueCoder(), avroSink.avroCoder);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactoryTest.java
index 3c81950fd29d6..1db06de456057 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactoryTest.java
@@ -40,6 +40,7 @@
  * Tests for AvroSourceFactory.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("rawtypes")
 public class AvroSourceFactoryTest {
   private final String pathToAvroFile = "/path/to/file.avro";
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
index 4833ff19a3109..abddf51ad5bc9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
@@ -184,6 +184,7 @@ public void process(Object outputElem) {
     }
   }
 
+  @SuppressWarnings("rawtypes")
   private static ParDoFn createCombineValuesFn(
       String phase, Combine.KeyedCombineFn combineFn) throws Exception {
     // This partially mirrors the work that
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
index 2167a504183fc..952a51bbce9c6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -77,6 +77,7 @@
 
 /** Unit tests for {@link DataflowWorkProgressUpdater}. */
 @RunWith(JUnit4.class)
+@SuppressWarnings("resource")
 public class DataflowWorkProgressUpdaterTest {
   static class TestMapTaskExecutor extends MapTaskExecutor {
     ApproximateProgress progress = null;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceFactoryTest.java
index 64cf4f5520217..82d2c82e99761 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceFactoryTest.java
@@ -82,7 +82,7 @@ <T> void runTestCreateInMemorySource(List<T> elements,
     Source<?> source = SourceFactory.create(PipelineOptionsFactory.create(), cloudSource,
                                             new BatchModeExecutionContext());
     Assert.assertThat(source, new IsInstanceOf(InMemorySource.class));
-    InMemorySource inMemorySource = (InMemorySource) source;
+    InMemorySource<?> inMemorySource = (InMemorySource<?>) source;
     Assert.assertEquals(encodedElements(elements, coder),
                         inMemorySource.encodedElements);
     Assert.assertEquals(expectedStart, inMemorySource.startIndex);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
index 63d38ebdd4109..6ceb61f267708 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
@@ -43,7 +43,7 @@
  * Tests for ParDoFnFactory.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
+@SuppressWarnings({"rawtypes", "serial", "unchecked"})
 public class ParDoFnFactoryTest {
   static class TestDoFn extends DoFn<Integer, String> {
     final String stringState;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java
index 4b8901af34b57..0bf4916359cfa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java
@@ -42,6 +42,7 @@
  * Tests for ShuffleSinkFactory.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("rawtypes")
 public class ShuffleSinkFactoryTest {
   ShuffleSink runTestCreateShuffleSinkHelper(byte[] shuffleWriterConfig,
                                              String shuffleKind,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSourceFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSourceFactoryTest.java
index 75fc7479687e7..1803b06d7fd1e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSourceFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSourceFactoryTest.java
@@ -45,6 +45,7 @@
  * and PartitioningShuffleSourceFactory.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings({"rawtypes", "unchecked"})
 public class ShuffleSourceFactoryTest {
   <T extends Source>
   T runTestCreateShuffleSource(byte[] shuffleReaderConfig,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java
index 9f9e63090a6e5..1a2b843242c82 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java
@@ -70,7 +70,7 @@ void runTestCreateTextSink(String filename,
                                       cloudSink,
                                       new BatchModeExecutionContext());
     Assert.assertThat(sink, new IsInstanceOf(TextSink.class));
-    TextSink textSink = (TextSink) sink;
+    TextSink<?> textSink = (TextSink<?>) sink;
     Assert.assertEquals(filename, textSink.namePrefix);
     Assert.assertEquals(
         appendTrailingNewlines == null ? true : appendTrailingNewlines,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceFactoryTest.java
index 2fa50b567e781..05d8a721fefb3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceFactoryTest.java
@@ -70,7 +70,7 @@ void runTestCreateTextSource(String filename,
                                             cloudSource,
                                             new BatchModeExecutionContext());
     Assert.assertThat(source, new IsInstanceOf(TextSource.class));
-    TextSource textSource = (TextSource) source;
+    TextSource<?> textSource = (TextSource<?>) source;
     Assert.assertEquals(filename, textSource.filename);
     Assert.assertEquals(
         stripTrailingNewlines == null ? true : stripTrailingNewlines,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceTest.java
index 8aee7aaf00529..87f8379479ad8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceTest.java
@@ -25,7 +25,6 @@
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.TextSource.TextFileIterator;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.Source;
@@ -421,7 +420,8 @@ public void testUpdateStopPosition() throws Exception {
       ExecutorTestUtils.TestSourceObserver observer =
           new ExecutorTestUtils.TestSourceObserver(textSource);
 
-      try (TextFileIterator iterator = (TextFileIterator) textSource.iterator()) {
+      try (TextSource<String>.TextFileIterator iterator =
+          (TextSource<String>.TextFileIterator) textSource.iterator()) {
           Assert.assertNull(iterator.updateStopPosition(
               cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
         }
@@ -437,7 +437,8 @@ public void testUpdateStopPosition() throws Exception {
       ExecutorTestUtils.TestSourceObserver observer =
           new ExecutorTestUtils.TestSourceObserver(textSource);
 
-      try (TextFileIterator iterator = (TextFileIterator) textSource.iterator()) {
+      try (TextSource<String>.TextFileIterator iterator =
+          (TextSource<String>.TextFileIterator) textSource.iterator()) {
         Assert.assertNull(iterator.getEndOffset());
         Assert.assertEquals(
             stop,
@@ -463,7 +464,8 @@ public void testUpdateStopPosition() throws Exception {
       ExecutorTestUtils.TestSourceObserver observer =
           new ExecutorTestUtils.TestSourceObserver(textSource);
 
-      try (TextFileIterator iterator = (TextFileIterator) textSource.iterator()) {
+      try (TextSource<String>.TextFileIterator iterator =
+          (TextSource<String>.TextFileIterator) textSource.iterator()) {
         Assert.assertEquals(fileContent[0], iterator.next());
         Assert.assertEquals(fileContent[1], iterator.next());
         Assert.assertThat(sourceProgressToCloudProgress(iterator.getProgress())
@@ -488,7 +490,8 @@ public void testUpdateStopPosition() throws Exception {
       ExecutorTestUtils.TestSourceObserver observer =
           new ExecutorTestUtils.TestSourceObserver(textSource);
 
-      try (TextFileIterator iterator = (TextFileIterator) textSource.iterator()) {
+      try (TextSource<String>.TextFileIterator iterator =
+          (TextSource<String>.TextFileIterator) textSource.iterator()) {
         Assert.assertEquals(fileContent[0], iterator.next());
         Assert.assertNull(iterator.updateStopPosition(
             cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
@@ -535,7 +538,8 @@ private void stopPositionTestInternal(Long startOffset,
         tmpFile.getPath(), false, startOffset, endOffset,
         StringUtf8Coder.of());
 
-    try (TextFileIterator iterator = (TextFileIterator) textSource.iterator()) {
+    try (TextSource<String>.TextFileIterator iterator =
+        (TextSource<String>.TextFileIterator) textSource.iterator()) {
       while (iterator.hasNext()) {
         accumulatedRead.append((String) iterator.next());
       }
@@ -548,7 +552,8 @@ private void stopPositionTestInternal(Long startOffset,
         StringUtf8Coder.of());
     accumulatedRead = new StringBuilder();
 
-    try (TextFileIterator iterator = (TextFileIterator) textSource.iterator()) {
+    try (TextSource<String>.TextFileIterator iterator =
+        (TextSource<String>.TextFileIterator) textSource.iterator()) {
       while (iterator.hasNext()) {
         accumulatedRead.append((String) iterator.next());
       }
@@ -561,7 +566,8 @@ private void stopPositionTestInternal(Long startOffset,
         StringUtf8Coder.of());
     accumulatedRead = new StringBuilder();
 
-    try (TextFileIterator iterator = (TextFileIterator) textSource.iterator()) {
+    try (TextSource<String>.TextFileIterator iterator =
+        (TextSource<String>.TextFileIterator) textSource.iterator()) {
       while (iterator.hasNext()) {
         accumulatedRead.append((String) iterator.next());
       }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
index 808ef4a23b6a8..045c1df4d913e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
@@ -47,7 +47,7 @@
  * Tests for {@link ApproximateQuantiles}.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
+@SuppressWarnings({"serial", "unchecked"})
 public class ApproximateQuantilesTest {
 
   static final List<KV<String, Integer>> TABLE = Arrays.asList(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 4307619d3f93d..403190b6115aa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -72,7 +72,7 @@
 @SuppressWarnings("serial")
 public class CombineTest {
 
-  @SuppressWarnings("unchecked")
+  @SuppressWarnings({"rawtypes", "unchecked"})
   static final KV<String, Integer>[] TABLE = new KV[] {
     KV.of("a", 1),
     KV.of("a", 1),
@@ -81,7 +81,7 @@ public class CombineTest {
     KV.of("b", 13),
   };
 
-  @SuppressWarnings("unchecked")
+  @SuppressWarnings({"rawtypes", "unchecked"})
   static final KV<String, Integer>[] EMPTY_TABLE = new KV[] {
   };
 
@@ -117,6 +117,7 @@ private void runTestSimpleCombine(KV<String, Integer>[] table,
 
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @SuppressWarnings({"rawtypes", "unchecked"})
   public void testSimpleCombine() {
     runTestSimpleCombine(TABLE, 20, new KV[] {
         KV.of("a", 6), KV.of("b", 14) });
@@ -124,10 +125,12 @@ public void testSimpleCombine() {
 
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @SuppressWarnings({"rawtypes", "unchecked"})
   public void testSimpleCombineEmpty() {
     runTestSimpleCombine(EMPTY_TABLE, 0, new KV[] { });
   }
 
+  @SuppressWarnings("unchecked")
   private void runTestBasicCombine(KV<String, Integer>[] table,
                                    Set<Integer> globalUnique,
                                    KV<String, Set<Integer>>[] perKeyUnique) {
@@ -151,6 +154,7 @@ private void runTestBasicCombine(KV<String, Integer>[] table,
 
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @SuppressWarnings({"rawtypes", "unchecked"})
   public void testBasicCombine() {
     runTestBasicCombine(TABLE, ImmutableSet.of(1, 13, 4), new KV[] {
         KV.of("a", (Set<Integer>) ImmutableSet.of(1, 4)),
@@ -159,6 +163,7 @@ public void testBasicCombine() {
 
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @SuppressWarnings("rawtypes")
   public void testBasicCombineEmpty() {
     runTestBasicCombine(EMPTY_TABLE, ImmutableSet.<Integer>of(), new KV[] { });
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java
index 05375bd7c5364..256b06198bc30 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java
@@ -46,6 +46,7 @@ public class CountTest {
 
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @SuppressWarnings("unchecked")
   public void testCountPerElementBasic() {
     Pipeline p = TestPipeline.create();
 
@@ -67,6 +68,7 @@ public void testCountPerElementBasic() {
 
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @SuppressWarnings("unchecked")
   public void testCountPerElementEmpty() {
     Pipeline p = TestPipeline.create();
 
@@ -75,8 +77,7 @@ public void testCountPerElementEmpty() {
     PCollection<KV<String, Long>> output =
         input.apply(Count.<String>perElement());
 
-    DataflowAssert.that(output)
-        .containsInAnyOrder();
+    DataflowAssert.that(output).containsInAnyOrder();
     p.run();
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
index 2d2615c8a5eaf..3875ac465b331 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
@@ -47,7 +47,7 @@
  * Tests for Create.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
+@SuppressWarnings({"serial", "unchecked"})
 public class CreateTest {
   @Rule public final ExpectedException thrown = ExpectedException.none();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
index f20e740c84c3f..0ea40ae263ce9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
@@ -53,7 +53,7 @@
  * Tests for GroupByKey.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
+@SuppressWarnings({"rawtypes", "serial", "unchecked"})
 public class GroupByKeyTest {
 
   @Rule
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java
index 1d6e233adef85..6763793d490f1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java
@@ -37,6 +37,7 @@
  */
 @RunWith(JUnit4.class)
 public class KeysTest {
+  @SuppressWarnings({"rawtypes", "unchecked"})
   static final KV<String, Integer>[] TABLE = new KV[] {
     KV.of("one", 1),
     KV.of("two", 2),
@@ -45,6 +46,7 @@ public class KeysTest {
     KV.of("dup", 5)
   };
 
+  @SuppressWarnings({"rawtypes", "unchecked"})
   static final KV<String, Integer>[] EMPTY_TABLE = new KV[] {
   };
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java
index 15c2ff2ff7366..d192b39fcd6ca 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java
@@ -36,6 +36,7 @@
  * Tests for KvSwap transform.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings({"rawtypes", "unchecked"})
 public class KvSwapTest {
   static final KV<String, Integer>[] TABLE = new KV[] {
     KV.of("one", 1),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 163426b1c4c85..c5e699216707b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -868,7 +868,7 @@ public void testMultiOutputChaining() {
   @Test
   public void testJsonEscaping() {
     // Declare an arbitrary function and make sure we can serialize it
-    DoFn doFn = new DoFn<Integer, Integer>() {
+    DoFn<Integer, Integer> doFn = new DoFn<Integer, Integer>() {
       @Override
       public void processElement(ProcessContext c) {
         c.output(c.element() + 1);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
index 95f88489dbd54..9eaeb9a0e3fac 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
@@ -49,6 +49,7 @@ public class SampleTest {
    * Verifies that the result of a Sample operation contains the expected number of elements,
    * and that those elements are a subset of the items in expected.
    */
+  @SuppressWarnings("rawtypes")
   public static class VerifyCorrectSample<T extends Comparable>
       implements SerializableFunction<Iterable<T>, Void> {
     private T[] expectedValues;
@@ -58,12 +59,14 @@ public static class VerifyCorrectSample<T extends Comparable>
      * expectedSize is the number of elements that the Sample should contain. expected is the set
      * of elements that the sample may contain.
      */
+    @SafeVarargs
     VerifyCorrectSample(int expectedSize, T... expected) {
       this.expectedValues = expected;
       this.expectedSize = expectedSize;
     }
 
     @Override
+    @SuppressWarnings("unchecked")
     public Void apply(Iterable<T> in) {
       List<T> actual = new ArrayList<>();
       for (T elem : in) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
index 909dcba9981fd..131aaad3142e7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
@@ -47,6 +47,8 @@ private static class TestCase<N extends Number & Comparable<N>> {
     final N sum;
     final Double mean;
 
+    @SafeVarargs
+    @SuppressWarnings("all")
     public TestCase(N min, N max, N sum, N... values) {
       this.data = Arrays.asList(values);
       this.min = min;
@@ -115,6 +117,7 @@ public void testLongStats() {
   }
 
   @Test
+  @SuppressWarnings("unchecked")
   public void testMeanCountSumSerializable() {
     Pipeline p = TestPipeline.create();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
index fced952d90670..92dc8a90c1dff 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
@@ -59,7 +59,7 @@ public class TopTest {
   static final String[] EMPTY_COLLECTION = new String[] {
   };
 
-  @SuppressWarnings("unchecked")
+  @SuppressWarnings({"rawtypes", "unchecked"})
   static final KV<String, Integer>[] TABLE = new KV[] {
     KV.of("a", 1),
     KV.of("a", 2),
@@ -70,7 +70,7 @@ public class TopTest {
     KV.of("b", 100),
   };
 
-  @SuppressWarnings("unchecked")
+  @SuppressWarnings({"rawtypes", "unchecked"})
   static final KV<String, Integer>[] EMPTY_TABLE = new KV[] {
   };
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
index 497d8fc8406e0..0dde1f92cd48d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
@@ -37,6 +37,7 @@
  */
 @RunWith(JUnit4.class)
 public class ValuesTest {
+  @SuppressWarnings({"rawtypes", "unchecked"})
   static final KV<String, Integer>[] TABLE = new KV[] {
     KV.of("one", 1),
     KV.of("two", 2),
@@ -45,6 +46,7 @@ public class ValuesTest {
     KV.of("dup", 4)
   };
 
+  @SuppressWarnings({"rawtypes", "unchecked"})
   static final KV<String, Integer>[] EMPTY_TABLE = new KV[] {
   };
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoderTest.java
index 24e6dde65c4fe..8144851f5cbb6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoderTest.java
@@ -42,7 +42,7 @@ public void testSerializationDeserialization() {
         UnionCoder.of(Arrays.<Coder<?>>asList(StringUtf8Coder.of(),
             DoubleCoder.of()));
     CloudObject encoding = newCoder.asCloudObject();
-    Coder<Object> decodedCoder = Serializer.deserialize(encoding, Coder.class);
+    Coder<?> decodedCoder = Serializer.deserialize(encoding, Coder.class);
     assertEquals(newCoder, decodedCoder);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
index e9041c860cee7..9e05a7eb26c4b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
@@ -50,7 +50,7 @@
 
 /** Unit tests for bucketing. */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
+@SuppressWarnings({"serial", "unchecked"})
 public class WindowingTest implements Serializable {
   @Rule
   public TemporaryFolder tmpFolder = new TemporaryFolder();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java
index 8f8dfbb9bafca..9a375678d3e52 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java
@@ -53,6 +53,7 @@ public class AggregatorImplTest {
 
   private static final String AGGREGATOR_NAME = "aggregator_name";
 
+  @SuppressWarnings("rawtypes")
   private <V> void testAggregator(List<V> items,
                                   SerializableFunction<Iterable<V>, V> combiner,
                                   Counter expectedCounter) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index d01fe1008fa39..37c0f8edbff4a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -45,6 +45,7 @@
 
 /** Unit tests for {@link GroupAlsoByWindowsDoFn}. */
 @RunWith(JUnit4.class)
+@SuppressWarnings({"rawtypes", "unchecked"})
 public class GroupAlsoByWindowsDoFnTest {
   ExecutionContext execContext;
   CounterSet counters;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/InstanceBuilderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/InstanceBuilderTest.java
index 18777b2aa3942..2c0eb94123373 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/InstanceBuilderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/InstanceBuilderTest.java
@@ -30,6 +30,7 @@
  * Tests of InstanceBuilder.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("rawtypes")
 public class InstanceBuilderTest {
 
   @Rule
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 94c44c707d0f3..f5895037e4fcb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -47,6 +47,7 @@
 
 /** Unit tests for {@link StreamingGroupAlsoByWindowsDoFn}. */
 @RunWith(JUnit4.class)
+@SuppressWarnings({"rawtypes", "unchecked"})
 public class StreamingGroupAlsoByWindowsDoFnTest {
   ExecutionContext execContext;
   CounterSet counters;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
index ff40e0d06f182..249df71f71e18 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
@@ -95,6 +95,7 @@ public void testNameKindAndCloudCounterRepresentation() {
     cc = flush(c3);
     assertEquals("c3", cc.getName().getName());
     assertEquals("SET", cc.getKind());
+    @SuppressWarnings("unchecked")
     Set<String> s = (Set<String>) cc.getSet();
     assertEquals(2, s.size());
     assertTrue(s.containsAll(Arrays.asList(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java
index 9c428476e28f0..6f7157c8b0e96 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java
@@ -104,13 +104,13 @@ public static <T> Set<T> getDeltaSet(Counter<T> counter) {
    * they are mutually consistent. This is useful for testing coder
    * implementations.
    */
-  public static void testByteCount(Coder coder, Coder.Context context, Object[] elements)
+  public static <T> void testByteCount(Coder<T> coder, Coder.Context context, T[] elements)
       throws Exception {
     Counter<Long> meanByteCount = Counter.longs("meanByteCount", MEAN);
     ElementByteSizeObserver observer = new ElementByteSizeObserver(meanByteCount);
 
     ByteArrayOutputStream os = new ByteArrayOutputStream();
-    for (Object elem : elements) {
+    for (T elem : elements) {
       coder.registerByteSizeObserver(elem, observer, context);
       coder.encode(elem, os, context);
       observer.advance();
@@ -118,6 +118,6 @@ public static void testByteCount(Coder coder, Coder.Context context, Object[] el
     long expectedLength = os.toByteArray().length;
 
     Assert.assertEquals(expectedLength, (long) getTotalAggregate(meanByteCount));
-    Assert.assertEquals(elements.length, (long) getTotalCount(meanByteCount));
+    Assert.assertEquals(elements.length, getTotalCount(meanByteCount));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
index 0c678abe75d55..07b3bdde5dc26 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
@@ -34,6 +34,7 @@
 /**
  * Utilities for tests.
  */
+@SuppressWarnings({"rawtypes", "unchecked"})
 public class ExecutorTestUtils {
   // Do not instantiate.
   private ExecutorTestUtils() { }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java
index d0f8e747de7eb..84fe39a6d9212 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java
@@ -32,6 +32,7 @@
  * Tests for FlattenOperation.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("unchecked")
 public class FlattenOperationTest {
   @Test
   public void testRunFlattenOperation() throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
index 27017962ccc5f..fa46e2f235b71 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
@@ -149,6 +149,7 @@ public void testExecuteMapTaskExecutor() throws Exception {
   }
 
   @Test
+  @SuppressWarnings("unchecked")
   public void testGetOutputCounters() throws Exception {
     CounterSet counters = new CounterSet();
     String counterPrefix = "test-";
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java
index 08955ac564d7c..044674e7e9318 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java
@@ -44,6 +44,7 @@ public TestOutputReceiver() {
       this(new CounterSet());
     }
 
+    @SuppressWarnings("rawtypes")
     public TestOutputReceiver(CounterSet counters) {
       super("output_name",
             new ElementByteSizeObservableCoder(StringUtf8Coder.of()),
@@ -72,7 +73,7 @@ public void testEmptyOutputReceiver() throws Exception {
         (long) CounterTestUtils.getTotalAggregate(fanOut.getMeanByteCount()));
     Assert.assertEquals(
         2,
-        (long) CounterTestUtils.getTotalCount(fanOut.getMeanByteCount()));
+        CounterTestUtils.getTotalCount(fanOut.getMeanByteCount()));
   }
 
   @Test
@@ -100,7 +101,7 @@ public void testMultipleOutputReceiver() throws Exception {
         (long) CounterTestUtils.getTotalAggregate(fanOut.getMeanByteCount()));
     Assert.assertEquals(
         2,
-        (long) CounterTestUtils.getTotalCount(fanOut.getMeanByteCount()));
+        CounterTestUtils.getTotalCount(fanOut.getMeanByteCount()));
     Assert.assertThat(receiver1.outputElems,
                       CoreMatchers.<Object>hasItems("hi", "bob"));
     Assert.assertThat(receiver2.outputElems,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
index b08266cbb4d82..1c95766e73c01 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
@@ -32,6 +32,7 @@
  * Tests for ParDoOperation.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("unchecked")
 public class ParDoOperationTest {
   static class TestParDoFn extends ParDoFn {
     final OutputReceiver outputReceiver;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
index 620ac0c89894f..38aa5a35b3057 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
@@ -63,6 +63,7 @@
  * Tests for PartialGroupByKeyOperation.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings({"rawtypes", "unchecked"})
 public class PartialGroupByKeyOperationTest {
   @Test
   public void testRunPartialGroupByKeyOperation() throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
index d1a69e7a10cf0..c324a0bfe0010 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -139,6 +139,7 @@ public void process(Object outputElem) throws Exception {
   }
 
   @Test
+  @SuppressWarnings("unchecked")
   public void testRunReadOperation() throws Exception {
     TestSource source = new TestSource();
     source.addInput("hi", "there", "", "bob");
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
index d350db1798bfe..291efe14a5546 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
@@ -50,11 +50,13 @@ public void basicTest() throws InterruptedException {
 
     try (StateSampler.ScopedState s1 =
       stateSampler.scopedState(state1)) {
+      assert s1 != null;
       Thread.sleep(2 * periodMs);
     }
 
     try (StateSampler.ScopedState s2 =
       stateSampler.scopedState(state2)) {
+      assert s2 != null;
       Thread.sleep(3 * periodMs);
     }
 
@@ -85,14 +87,17 @@ public void nestingTest() throws InterruptedException {
 
     try (StateSampler.ScopedState s1 =
         stateSampler.scopedState(state1)) {
+      assert s1 != null;
       Thread.sleep(2 * periodMs);
 
       try (StateSampler.ScopedState s2 =
           stateSampler.scopedState(state2)) {
+        assert s2 != null;
         Thread.sleep(2 * periodMs);
 
         try (StateSampler.ScopedState s3 =
             stateSampler.scopedState(state3)) {
+          assert s3 != null;
           Thread.sleep(2 * periodMs);
         }
         Thread.sleep(periodMs);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperationTest.java
index 6b51bc603531f..4100880427f2b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperationTest.java
@@ -33,6 +33,7 @@
 @RunWith(JUnit4.class)
 public class WriteOperationTest {
   @Test
+  @SuppressWarnings("unchecked")
   public void testRunWriteOperation() throws Exception {
     ExecutorTestUtils.TestSink sink = new ExecutorTestUtils.TestSink();
     CounterSet counterSet = new CounterSet();

From 78297492844d7dffb0d7813ec856d231c826d8e0 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Fri, 19 Dec 2014 16:22:16 -0800
Subject: [PATCH 0050/1541] Fix equals in WindowedValue.java.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82551379
---
 .../com/google/cloud/dataflow/sdk/util/WindowedValue.java     | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index d47e810fa9909..67daaf0de246a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -141,7 +141,9 @@ public static <T> WindowedValueCoder<T> getValueOnlyCoder(Coder<T> valueCoder) {
   public boolean equals(Object o) {
     if (o instanceof WindowedValue) {
       WindowedValue<?> that = (WindowedValue) o;
-      if (that.timestamp.isEqual(timestamp) && that.windows.size() == windows.size()) {
+      if (Objects.equals(that.value, this.value)
+          && that.timestamp.isEqual(timestamp)
+          && that.windows.size() == windows.size()) {
         for (Iterator<?> thatIterator = that.windows.iterator(), thisIterator = windows.iterator();
             thatIterator.hasNext() && thisIterator.hasNext();
             /* do nothing */) {

From 44c39792718cec7a4c07008dc51f0355af336cc7 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 19 Dec 2014 16:38:23 -0800
Subject: [PATCH 0051/1541] Remove java.util.logging usage, SDK code is meant
 to use SLF4J as logging API. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=82552388

---
 .../cloud/dataflow/sdk/runners/worker/BigQuerySource.java     | 4 ----
 .../com/google/cloud/dataflow/sdk/util/common/Counter.java    | 3 ---
 2 files changed, 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySource.java
index b43c942b3ed98..fa1afddf87a7f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySource.java
@@ -28,7 +28,6 @@
 
 import java.io.IOException;
 import java.util.NoSuchElementException;
-import java.util.logging.Logger;
 
 /**
  * A source that reads a BigQuery table and yields TableRow objects.
@@ -39,9 +38,6 @@
  * read by each worker (i.e. the source is used as a side input).
  */
 public class BigQuerySource extends Source<TableRow> {
-  private static final Logger LOG =
-      Logger.getLogger(BigQuerySource.class.getName());
-
   final TableReference tableRef;
   final BigQueryOptions bigQueryOptions;
   final Bigquery bigQueryClient;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
index 4516edd5dbdfe..1df87a244cb61 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
@@ -26,7 +26,6 @@
 import java.util.HashSet;
 import java.util.Objects;
 import java.util.Set;
-import java.util.logging.Logger;
 
 /**
  * A Counter enables the aggregation of a stream of values over time.  The
@@ -40,8 +39,6 @@
  * @param <T> the type of values aggregated by this counter
  */
 public abstract class Counter<T> {
-  private static final Logger LOG = Logger.getLogger(Counter.class.getName());
-
   /**
    * Possible kinds of counter aggregation.
    */

From 12addd04cb041058f77bb9a668fa7b6e6d258f5e Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Fri, 19 Dec 2014 16:40:36 -0800
Subject: [PATCH 0052/1541] Fix the Count example in java doc.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82552517
---
 .../java/com/google/cloud/dataflow/sdk/transforms/Count.java    | 2 +-
 .../google/cloud/dataflow/sdk/transforms/windowing/Window.java  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
index 49251dfeca210..0550b2fff5f40 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
@@ -46,7 +46,7 @@
  * <pre> {@code
  * PCollection<String> words = ...;
  * PCollection<KV<String, Long>> wordCounts =
- *     words.apply(Count.<String>create());
+ *     words.apply(Count.<String>perElement());
  * } </pre>
  */
 public class Count {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 424fdd7c02120..71e8b0be3e36d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -70,7 +70,7 @@
  * PCollection<String> windowed_items = item.apply(
  *   Window.<String>into(FixedWindows.of(1, TimeUnit.MINUTES)));
  * PCollection<KV<String, Long>> windowed_counts = windowed_items.apply(
- *   Count.<String>create());
+ *   Count.<String>perElement());
  * } </pre>
  *
  * <p> Let (data, timestamp) denote a data element along with its timestamp.

From 7c27fb60aaa97061dacc4d33ad8db65066a73db5 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Fri, 19 Dec 2014 21:23:11 -0800
Subject: [PATCH 0053/1541] Renames Source -> Reader (ergo in associated
 classes such as SourceIterator, SourceFactory, tests, local variables etc.).
 This separates the concepts of "Source" (description of an input) and Reader
 (a way to read it) and avoids conflicts with the Source protobuf.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82563808
---
 .../google/cloud/dataflow/sdk/io/AvroIO.java  | 112 +++---
 .../cloud/dataflow/sdk/io/BigQueryIO.java     | 133 +++----
 .../google/cloud/dataflow/sdk/io/TextIO.java  |  97 +++--
 ...vroByteSource.java => AvroByteReader.java} |  26 +-
 .../{AvroSource.java => AvroReader.java}      |  52 ++-
 ...rceFactory.java => AvroReaderFactory.java} |  25 +-
 ...igQuerySource.java => BigQueryReader.java} |  26 +-
 ...actory.java => BigQueryReaderFactory.java} |  11 +-
 .../worker/DataflowWorkProgressUpdater.java   |  24 +-
 .../sdk/runners/worker/DataflowWorker.java    |  70 ++--
 ...eBasedSource.java => FileBasedReader.java} |  74 ++--
 ...Source.java => GroupingShuffleReader.java} | 119 +++---
 ...java => GroupingShuffleReaderFactory.java} |  30 +-
 ...nMemorySource.java => InMemoryReader.java} |  46 +--
 ...actory.java => InMemoryReaderFactory.java} |  24 +-
 ...ator.java => LazyMultiReaderIterator.java} |  20 +-
 .../worker/MapTaskExecutorFactory.java        | 217 ++++-------
 ...ce.java => PartitioningShuffleReader.java} |  40 +-
 ... => PartitioningShuffleReaderFactory.java} |  23 +-
 ...{SourceFactory.java => ReaderFactory.java} |  62 ++--
 .../sdk/runners/worker/ShuffleSink.java       |  45 +--
 .../sdk/runners/worker/SideInputUtils.java    |  51 +--
 .../worker/SourceTranslationUtils.java        | 112 +++---
 .../{TextSource.java => TextReader.java}      | 114 +++---
 ...rceFactory.java => TextReaderFactory.java} |  35 +-
 ...ource.java => UngroupedShuffleReader.java} |  27 +-
 ...ava => UngroupedShuffleReaderFactory.java} |  27 +-
 .../dataflow/sdk/util/CloudSourceUtils.java   |  36 +-
 .../cloud/dataflow/sdk/util/ReaderUtils.java  |  44 +++
 .../common/worker/CustomSourceFormat.java     |  16 +-
 .../util/common/worker/MapTaskExecutor.java   |  23 +-
 .../sdk/util/common/worker/ReadOperation.java |  58 +--
 .../worker/{Source.java => Reader.java}       |  45 ++-
 .../sdk/util/common/worker/WorkExecutor.java  |   9 +-
 .../common/worker/WorkProgressUpdater.java    |  45 ++-
 .../dataflow/sdk/values/PCollection.java      |   8 +-
 ...ourceTest.java => AvroByteReaderTest.java} |  55 ++-
 .../sdk/runners/worker/AvroByteSinkTest.java  |  10 +-
 .../runners/worker/AvroReaderFactoryTest.java | 107 ++++++
 ...vroSourceTest.java => AvroReaderTest.java} |  60 ++-
 .../sdk/runners/worker/AvroSinkTest.java      |  10 +-
 .../runners/worker/AvroSourceFactoryTest.java | 116 ------
 ...st.java => BigQueryReaderFactoryTest.java} |  46 +--
 ...ourceTest.java => BigQueryReaderTest.java} |  80 ++--
 .../DataflowWorkProgressUpdaterTest.java      | 135 +++----
 ...st.java => GroupingShuffleReaderTest.java} | 243 ++++++------
 ...st.java => InMemoryReaderFactoryTest.java} |  67 ++--
 .../runners/worker/InMemoryReaderTest.java    | 190 ++++++++++
 .../runners/worker/InMemorySourceTest.java    | 236 ------------
 .../worker/MapTaskExecutorFactoryTest.java    | 308 ++++++----------
 ...ava => PartitioningShuffleReaderTest.java} |  43 +--
 ...actoryTest.java => ReaderFactoryTest.java} |  72 ++--
 .../worker/ShuffleReaderFactoryTest.java      | 185 ++++++++++
 .../worker/ShuffleSourceFactoryTest.java      | 231 ------------
 .../runners/worker/SideInputUtilsTest.java    |  63 ++--
 ...ryTest.java => TextReaderFactoryTest.java} |  53 ++-
 ...extSourceTest.java => TextReaderTest.java} | 346 ++++++++----------
 ...t.java => UngroupedShuffleReaderTest.java} |  30 +-
 .../dataflow/sdk/util/IOFactoryTest.java      |  13 +-
 .../util/common/worker/ExecutorTestUtils.java |  98 ++---
 .../common/worker/MapTaskExecutorTest.java    | 181 ++++-----
 .../util/common/worker/ReadOperationTest.java | 118 +++---
 62 files changed, 2133 insertions(+), 2889 deletions(-)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{AvroByteSource.java => AvroByteReader.java} (76%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{AvroSource.java => AvroReader.java} (76%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{AvroSourceFactory.java => AvroReaderFactory.java} (70%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{BigQuerySource.java => BigQueryReader.java} (81%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{BigQuerySourceFactory.java => BigQueryReaderFactory.java} (86%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{FileBasedSource.java => FileBasedReader.java} (77%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{GroupingShuffleSource.java => GroupingShuffleReader.java} (75%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{GroupingShuffleSourceFactory.java => GroupingShuffleReaderFactory.java} (68%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{InMemorySource.java => InMemoryReader.java} (76%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{InMemorySourceFactory.java => InMemoryReaderFactory.java} (64%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{LazyMultiSourceIterator.java => LazyMultiReaderIterator.java} (78%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{PartitioningShuffleSource.java => PartitioningShuffleReader.java} (75%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{PartitioningShuffleSourceFactory.java => PartitioningShuffleReaderFactory.java} (65%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{SourceFactory.java => ReaderFactory.java} (59%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{TextSource.java => TextReader.java} (73%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{TextSourceFactory.java => TextReaderFactory.java} (64%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{UngroupedShuffleSource.java => UngroupedShuffleReader.java} (75%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{UngroupedShuffleSourceFactory.java => UngroupedShuffleReaderFactory.java} (70%)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReaderUtils.java
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/{Source.java => Reader.java} (78%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/{AvroByteSourceTest.java => AvroByteReaderTest.java} (74%)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/{AvroSourceTest.java => AvroReaderTest.java} (73%)
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactoryTest.java
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/{BigQuerySourceFactoryTest.java => BigQueryReaderFactoryTest.java} (53%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/{BigQuerySourceTest.java => BigQueryReaderTest.java} (69%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/{GroupingShuffleSourceTest.java => GroupingShuffleReaderTest.java} (68%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/{InMemorySourceFactoryTest.java => InMemoryReaderFactoryTest.java} (50%)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceTest.java
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/{PartitioningShuffleSourceTest.java => PartitioningShuffleReaderTest.java} (78%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/{SourceFactoryTest.java => ReaderFactoryTest.java} (52%)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSourceFactoryTest.java
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/{TextSourceFactoryTest.java => TextReaderFactoryTest.java} (56%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/{TextSourceTest.java => TextReaderTest.java} (57%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/{UngroupedShuffleSourceTest.java => UngroupedShuffleReaderTest.java} (81%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index 7a9e6ea3d394c..ed8be3a761567 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -17,17 +17,17 @@
 package com.google.cloud.dataflow.sdk.io;
 
 import static com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
-import static com.google.cloud.dataflow.sdk.util.CloudSourceUtils.readElemsFromSource;
 
 import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.worker.AvroReader;
 import com.google.cloud.dataflow.sdk.runners.worker.AvroSink;
-import com.google.cloud.dataflow.sdk.runners.worker.AvroSource;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.util.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -116,14 +116,12 @@
  * } </pre>
  */
 public class AvroIO {
-
   /**
    * A root PTransform that reads from an Avro file (or multiple Avro
    * files matching a pattern) and returns a PCollection containing
    * the decoding of each record.
    */
   public static class Read {
-
     /**
      * Returns an AvroIO.Read PTransform with the given step name.
      */
@@ -181,16 +179,17 @@ public static Bound<GenericRecord> withSchema(String schema) {
      * @param <T> the type of each of the elements of the resulting
      * PCollection
      */
-    public static class Bound<T>
-        extends PTransform<PInput, PCollection<T>> {
+    public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
       private static final long serialVersionUID = 0;
 
       /** The filepattern to read from. */
-      @Nullable final String filepattern;
+      @Nullable
+      final String filepattern;
       /** The class type of the records. */
       final Class<T> type;
       /** The schema of the input file. */
-      @Nullable final Schema schema;
+      @Nullable
+      final Schema schema;
 
       Bound(Class<T> type) {
         this(null, null, type, null);
@@ -258,8 +257,7 @@ public PCollection<T> apply(PInput input) {
               "need to set the filepattern of an AvroIO.Read transform");
         }
         if (schema == null) {
-          throw new IllegalStateException(
-              "need to set the schema of an AvroIO.Read transform");
+          throw new IllegalStateException("need to set the schema of an AvroIO.Read transform");
         }
 
         // Force the output's Coder to be what the read is using, and
@@ -275,7 +273,9 @@ protected Coder<T> getDefaultOutputCoder() {
       }
 
       @Override
-      protected String getKindString() { return "AvroIO.Read"; }
+      protected String getKindString() {
+        return "AvroIO.Read";
+      }
 
       public String getFilepattern() {
         return filepattern;
@@ -287,12 +287,10 @@ public Schema getSchema() {
 
       static {
         DirectPipelineRunner.registerDefaultTransformEvaluator(
-            Bound.class,
-            new DirectPipelineRunner.TransformEvaluator<Bound>() {
+            Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
               @Override
               public void evaluate(
-                  Bound transform,
-                  DirectPipelineRunner.EvaluationContext context) {
+                  Bound transform, DirectPipelineRunner.EvaluationContext context) {
                 evaluateReadHelper(transform, context);
               }
             });
@@ -307,7 +305,6 @@ public void evaluate(
    * multiple Avro files matching a sharding pattern).
    */
   public static class Write {
-
     /**
      * Returns an AvroIO.Write PTransform with the given step name.
      */
@@ -407,12 +404,12 @@ public static Bound<GenericRecord> withSchema(String schema) {
      *
      * @param <T> the type of each of the elements of the input PCollection
      */
-    public static class Bound<T>
-        extends PTransform<PCollection<T>, PDone> {
+    public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       private static final long serialVersionUID = 0;
 
       /** The filename to write to. */
-      @Nullable final String filenamePrefix;
+      @Nullable
+      final String filenamePrefix;
       /** Suffix to use for each filename. */
       final String filenameSuffix;
       /** Requested number of shards.  0 for automatic. */
@@ -422,15 +419,15 @@ public static class Bound<T>
       /** The class type of the records. */
       final Class<T> type;
       /** The schema of the output file. */
-      @Nullable final Schema schema;
+      @Nullable
+      final Schema schema;
 
       Bound(Class<T> type) {
         this(null, null, "", 0, ShardNameTemplate.INDEX_OF_MAX, type, null);
       }
 
-      Bound(String name, String filenamePrefix, String filenameSuffix,
-            int numShards, String shardTemplate,
-            Class<T> type, Schema schema) {
+      Bound(String name, String filenamePrefix, String filenameSuffix, int numShards,
+          String shardTemplate, Class<T> type, Schema schema) {
         super(name);
         this.filenamePrefix = filenamePrefix;
         this.filenameSuffix = filenameSuffix;
@@ -445,8 +442,8 @@ public static class Bound<T>
        * with the given step name.  Does not modify this object.
        */
       public Bound<T> named(String name) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
-                           type, schema);
+        return new Bound<>(
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema);
       }
 
       /**
@@ -459,8 +456,8 @@ public Bound<T> named(String name) {
        */
       public Bound<T> to(String filenamePrefix) {
         validateOutputComponent(filenamePrefix);
-        return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
-                           type, schema);
+        return new Bound<>(
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema);
       }
 
       /**
@@ -473,8 +470,8 @@ public Bound<T> to(String filenamePrefix) {
        */
       public Bound<T> withSuffix(String filenameSuffix) {
         validateOutputComponent(filenameSuffix);
-        return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
-                           type, schema);
+        return new Bound<>(
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema);
       }
 
       /**
@@ -493,8 +490,8 @@ public Bound<T> withSuffix(String filenameSuffix) {
        */
       public Bound<T> withNumShards(int numShards) {
         Preconditions.checkArgument(numShards >= 0);
-        return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
-                           type, schema);
+        return new Bound<>(
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema);
       }
 
       /**
@@ -506,8 +503,8 @@ public Bound<T> withNumShards(int numShards) {
        * @see ShardNameTemplate
        */
       public Bound<T> withShardNameTemplate(String shardTemplate) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
-                           type, schema);
+        return new Bound<>(
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema);
       }
 
       /**
@@ -531,9 +528,8 @@ public Bound<T> withoutSharding() {
        * @param <T1> the type of the elements of the input PCollection
        */
       public <T1> Bound<T1> withSchema(Class<T1> type) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix,
-                           numShards, shardTemplate,
-                           type, ReflectData.get().getSchema(type));
+        return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type,
+            ReflectData.get().getSchema(type));
       }
 
       /**
@@ -542,9 +538,8 @@ public <T1> Bound<T1> withSchema(Class<T1> type) {
        * schema.  Does not modify this object.
        */
       public Bound<GenericRecord> withSchema(Schema schema) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix,
-                           numShards, shardTemplate,
-                           GenericRecord.class, schema);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
+            GenericRecord.class, schema);
       }
 
       /**
@@ -563,8 +558,7 @@ public PDone apply(PCollection<T> input) {
               "need to set the filename prefix of an AvroIO.Write transform");
         }
         if (schema == null) {
-          throw new IllegalStateException(
-              "need to set the schema of an AvroIO.Write transform");
+          throw new IllegalStateException("need to set the schema of an AvroIO.Write transform");
         }
 
         return new PDone();
@@ -583,7 +577,9 @@ protected Coder<Void> getDefaultOutputCoder() {
       }
 
       @Override
-      protected String getKindString() { return "AvroIO.Write"; }
+      protected String getKindString() {
+        return "AvroIO.Write";
+      }
 
       public String getFilenamePrefix() {
         return filenamePrefix;
@@ -611,12 +607,10 @@ public Schema getSchema() {
 
       static {
         DirectPipelineRunner.registerDefaultTransformEvaluator(
-            Bound.class,
-            new DirectPipelineRunner.TransformEvaluator<Bound>() {
+            Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
               @Override
               public void evaluate(
-                  Bound transform,
-                  DirectPipelineRunner.EvaluationContext context) {
+                  Bound transform, DirectPipelineRunner.EvaluationContext context) {
                 evaluateWriteHelper(transform, context);
               }
             });
@@ -626,25 +620,22 @@ public void evaluate(
 
   // Pattern which matches old-style shard output patterns, which are now
   // disallowed.
-  private static final Pattern SHARD_OUTPUT_PATTERN =
-      Pattern.compile("@([0-9]+|\\*)");
+  private static final Pattern SHARD_OUTPUT_PATTERN = Pattern.compile("@([0-9]+|\\*)");
 
   private static void validateOutputComponent(String partialFilePattern) {
     Preconditions.checkArgument(
         !SHARD_OUTPUT_PATTERN.matcher(partialFilePattern).find(),
         "Output name components are not allowed to contain @* or @N patterns: "
-            + partialFilePattern);
+        + partialFilePattern);
   }
 
   /////////////////////////////////////////////////////////////////////////////
 
   private static <T> void evaluateReadHelper(
-      Read.Bound<T> transform,
-      DirectPipelineRunner.EvaluationContext context) {
-    AvroSource<T> source = new AvroSource<>(
-        transform.filepattern, null, null, WindowedValue.getValueOnlyCoder(
-            transform.getDefaultOutputCoder()));
-    List<WindowedValue<T>> elems = readElemsFromSource(source);
+      Read.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
+    AvroReader<T> reader = new AvroReader<>(transform.filepattern, null, null,
+        WindowedValue.getValueOnlyCoder(transform.getDefaultOutputCoder()));
+    List<WindowedValue<T>> elems = ReaderUtils.readElemsFromReader(reader);
     List<ValueWithMetadata<T>> output = new ArrayList<>();
     for (WindowedValue<T> elem : elems) {
       output.add(ValueWithMetadata.of(elem));
@@ -653,8 +644,7 @@ private static <T> void evaluateReadHelper(
   }
 
   private static <T> void evaluateWriteHelper(
-      Write.Bound<T> transform,
-      DirectPipelineRunner.EvaluationContext context) {
+      Write.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
     List<WindowedValue<T>> elems = context.getPCollectionWindowedValues(transform.getInput());
     int numShards = transform.numShards;
     if (numShards < 1) {
@@ -662,17 +652,15 @@ private static <T> void evaluateWriteHelper(
       numShards = 1;
     }
     AvroSink<T> writer = new AvroSink<>(transform.filenamePrefix, transform.shardTemplate,
-                                        transform.filenameSuffix, numShards,
-                                        WindowedValue.getValueOnlyCoder(
-                                            AvroCoder.of(transform.type, transform.schema)));
+        transform.filenameSuffix, numShards,
+        WindowedValue.getValueOnlyCoder(AvroCoder.of(transform.type, transform.schema)));
     try (Sink.SinkWriter<WindowedValue<T>> sink = writer.writer()) {
       for (WindowedValue<T> elem : elems) {
         sink.add(elem);
       }
     } catch (IOException exn) {
       throw new RuntimeException(
-          "unable to write to output file \"" + transform.filenamePrefix + "\"",
-          exn);
+          "unable to write to output file \"" + transform.filenamePrefix + "\"", exn);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 37e2963035e64..d7ed8c2116a1f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -27,13 +27,13 @@
 import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
 import com.google.cloud.dataflow.sdk.options.GcpOptions;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.worker.BigQuerySource;
+import com.google.cloud.dataflow.sdk.runners.worker.BigQueryReader;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.util.BigQueryTableInserter;
-import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
+import com.google.cloud.dataflow.sdk.util.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -125,8 +125,7 @@ public class BigQueryIO {
    * This regex isn't exact - this allows for patterns that would be rejected by
    * the service, but this is sufficient for basic parsing of table references.
    */
-  private static final String PROJECT_ID_REGEXP =
-      "[a-z][-a-z0-9:.]{4,61}[a-z0-9]";
+  private static final String PROJECT_ID_REGEXP = "[a-z][-a-z0-9:.]{4,61}[a-z0-9]";
 
   /**
    * Regular expression which matches Dataset IDs.
@@ -142,12 +141,11 @@ public class BigQueryIO {
    * Matches table specifications in the form
    * "[project_id]:[dataset_id].[table_id]" or "[dataset_id].[table_id]".
    */
-  private static final String DATASET_TABLE_REGEXP = String.format(
-      "((?<PROJECT>%s):)?(?<DATASET>%s)\\.(?<TABLE>%s)",
-      PROJECT_ID_REGEXP, DATASET_REGEXP, TABLE_REGEXP);
+  private static final String DATASET_TABLE_REGEXP =
+      String.format("((?<PROJECT>%s):)?(?<DATASET>%s)\\.(?<TABLE>%s)", PROJECT_ID_REGEXP,
+          DATASET_REGEXP, TABLE_REGEXP);
 
-  private static final Pattern TABLE_SPEC =
-      Pattern.compile(DATASET_TABLE_REGEXP);
+  private static final Pattern TABLE_SPEC = Pattern.compile(DATASET_TABLE_REGEXP);
 
   /**
    * Parse a table specification in the form
@@ -160,15 +158,13 @@ public static TableReference parseTableSpec(String tableSpec) {
     if (!match.matches()) {
       throw new IllegalArgumentException(
           "Table reference is not in [project_id]:[dataset_id].[table_id] "
-              + "format: " + tableSpec);
+          + "format: " + tableSpec);
     }
 
     TableReference ref = new TableReference();
     ref.setProjectId(match.group("PROJECT"));
 
-    return ref
-        .setDatasetId(match.group("DATASET"))
-        .setTableId(match.group("TABLE"));
+    return ref.setDatasetId(match.group("DATASET")).setTableId(match.group("TABLE"));
   }
 
   /**
@@ -181,9 +177,7 @@ public static String toTableSpec(TableReference ref) {
       sb.append(":");
     }
 
-    sb.append(ref.getDatasetId())
-        .append('.')
-        .append(ref.getTableId());
+    sb.append(ref.getDatasetId()).append('.').append(ref.getTableId());
     return sb.toString();
   }
 
@@ -241,8 +235,7 @@ public static Bound withoutValidation() {
      * A PTransform that reads from a BigQuery table and returns a bounded
      * {@code PCollection<TableRow>}.
      */
-    public static class Bound
-        extends PTransform<PInput, PCollection<TableRow>> {
+    public static class Bound extends PTransform<PInput, PCollection<TableRow>> {
       TableReference table;
       final boolean validate;
 
@@ -292,8 +285,7 @@ public PCollection<TableRow> apply(PInput input) {
           throw new IllegalStateException(
               "must set the table reference of a BigQueryIO.Read transform");
         }
-        return PCollection.<TableRow>createPrimitiveOutputInternal(
-            new GlobalWindow())
+        return PCollection.<TableRow>createPrimitiveOutputInternal(new GlobalWindow())
             // Force the output's Coder to be what the read is using, and
             // unchangeable later, to ensure that we read the input in the
             // format specified by the Read transform.
@@ -306,16 +298,16 @@ protected Coder<TableRow> getDefaultOutputCoder() {
       }
 
       @Override
-      protected String getKindString() { return "BigQueryIO.Read"; }
+      protected String getKindString() {
+        return "BigQueryIO.Read";
+      }
 
       static {
         DirectPipelineRunner.registerDefaultTransformEvaluator(
-            Bound.class,
-            new DirectPipelineRunner.TransformEvaluator<Bound>() {
+            Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
               @Override
               public void evaluate(
-                  Bound transform,
-                  DirectPipelineRunner.EvaluationContext context) {
+                  Bound transform, DirectPipelineRunner.EvaluationContext context) {
                 evaluateReadHelper(transform, context);
               }
             });
@@ -370,7 +362,6 @@ public boolean getValidate() {
    * </code></pre>
    */
   public static class Write {
-
     /**
      * An enumeration type for the BigQuery create disposition strings publicly
      * documented as {@code CREATE_NEVER}, and {@code CREATE_IF_NEEDED}.
@@ -488,8 +479,7 @@ public static Bound withoutValidation() {
      * A PTransform that can write either a bounded or unbounded
      * {@code PCollection<TableRow>}s to a BigQuery table.
      */
-    public static class Bound
-        extends PTransform<PCollection<TableRow>, PDone> {
+    public static class Bound extends PTransform<PCollection<TableRow>, PDone> {
       final TableReference table;
 
       // Table schema. The schema is required only if the table does not exist.
@@ -515,8 +505,7 @@ public Bound() {
       }
 
       Bound(String name, TableReference ref, TableSchema schema,
-          CreateDisposition createDisposition,
-          WriteDisposition writeDisposition,
+          CreateDisposition createDisposition, WriteDisposition writeDisposition,
           boolean validate) {
         super(name);
         this.table = ref;
@@ -530,8 +519,7 @@ public Bound() {
        * Sets the name associated with this transformation.
        */
       public Bound named(String name) {
-        return new Bound(name, table, schema, createDisposition,
-            writeDisposition, validate);
+        return new Bound(name, table, schema, createDisposition, writeDisposition, validate);
       }
 
       /**
@@ -547,28 +535,24 @@ public Bound to(String tableSpec) {
        * Specifies the table to be written to.
        */
       public Bound to(TableReference table) {
-        return new Bound(name, table, schema, createDisposition,
-            writeDisposition, validate);
+        return new Bound(name, table, schema, createDisposition, writeDisposition, validate);
       }
 
       /**
        * Specifies the table schema, used if the table is created.
        */
       public Bound withSchema(TableSchema schema) {
-        return new Bound(name, table, schema, createDisposition,
-            writeDisposition, validate);
+        return new Bound(name, table, schema, createDisposition, writeDisposition, validate);
       }
 
       /** Specifies options for creating the table. */
       public Bound withCreateDisposition(CreateDisposition createDisposition) {
-        return new Bound(name, table, schema, createDisposition,
-            writeDisposition, validate);
+        return new Bound(name, table, schema, createDisposition, writeDisposition, validate);
       }
 
       /** Specifies options for writing the table. */
       public Bound withWriteDisposition(WriteDisposition writeDisposition) {
-        return new Bound(name, table, schema, createDisposition,
-            writeDisposition, validate);
+        return new Bound(name, table, schema, createDisposition, writeDisposition, validate);
       }
 
       /**
@@ -585,11 +569,9 @@ public PDone apply(PCollection<TableRow> input) {
               "must set the table reference of a BigQueryIO.Write transform");
         }
 
-        if (createDisposition == CreateDisposition.CREATE_IF_NEEDED &&
-            schema == null) {
-          throw new IllegalArgumentException(
-              "CreateDisposition is CREATE_IF_NEEDED, "
-                  + "however no schema was provided.");
+        if (createDisposition == CreateDisposition.CREATE_IF_NEEDED && schema == null) {
+          throw new IllegalArgumentException("CreateDisposition is CREATE_IF_NEEDED, "
+              + "however no schema was provided.");
         }
 
         // In streaming, BigQuery write is taken care of by StreamWithDeDup transform.
@@ -607,16 +589,16 @@ protected Coder<Void> getDefaultOutputCoder() {
       }
 
       @Override
-      protected String getKindString() { return "BigQueryIO.Write"; }
+      protected String getKindString() {
+        return "BigQueryIO.Write";
+      }
 
       static {
         DirectPipelineRunner.registerDefaultTransformEvaluator(
-            Bound.class,
-            new DirectPipelineRunner.TransformEvaluator<Bound>() {
+            Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
               @Override
               public void evaluate(
-                  Bound transform,
-                  DirectPipelineRunner.EvaluationContext context) {
+                  Bound transform, DirectPipelineRunner.EvaluationContext context) {
                 evaluateWriteHelper(transform, context);
               }
             });
@@ -654,9 +636,8 @@ public boolean getValidate() {
   /**
    * Implementation of DoFn to perform streaming BigQuery write.
    */
-  private static class StreamingWriteFn extends DoFn<KV<Integer, KV<String, TableRow>>, Void>
-      implements DoFn.RequiresKeyedState {
-
+  private static class StreamingWriteFn
+      extends DoFn<KV<Integer, KV<String, TableRow>>, Void> implements DoFn.RequiresKeyedState {
     /**
      * Class to accumulate BigQuery row data as a list of String.
      * DoFn implementation must be Serializable, but BigQuery classes,
@@ -664,13 +645,11 @@ private static class StreamingWriteFn extends DoFn<KV<Integer, KV<String, TableR
      * for accumulation.
      */
     private static class JsonTableRows implements Iterable<TableRow>, Serializable {
-
       /** The list where BigQuery row data is accumulated. */
       private final List<String> jsonRows = new ArrayList<>();
 
       /** Iterator of JsonTableRows converts the row in String to TableRow. */
       static class JsonTableRowIterator implements Iterator<TableRow> {
-
         private final Iterator<String> iteratorInternal;
 
         /** Constructor. */
@@ -730,9 +709,9 @@ void add(TableRow row) {
 
     /** The list of tables created so far, so we don't try the creation
         each time. */
-    private static ThreadLocal<HashSet<String>> createdTables =
-        new ThreadLocal<HashSet<String>>() {
-      @Override protected HashSet<String> initialValue() {
+    private static ThreadLocal<HashSet<String>> createdTables = new ThreadLocal<HashSet<String>>() {
+      @Override
+      protected HashSet<String> initialValue() {
         return new HashSet<>();
       }
     };
@@ -760,10 +739,9 @@ public void startBundle(Context context) {
       HashSet<String> tables = createdTables.get();
       if (!tables.contains(jsonTableSchema)) {
         try {
-          TableSchema tableSchema = JSON_FACTORY.fromString(
-              jsonTableSchema, TableSchema.class);
-          TableReference tableReference = JSON_FACTORY.fromString(
-              jsonTableReference, TableReference.class);
+          TableSchema tableSchema = JSON_FACTORY.fromString(jsonTableSchema, TableSchema.class);
+          TableReference tableReference =
+              JSON_FACTORY.fromString(jsonTableReference, TableReference.class);
 
 
           BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
@@ -791,8 +769,8 @@ public void finishBundle(Context context) {
       Bigquery client = Transport.newBigQueryClient(options).build();
 
       try {
-        TableReference tableReference = JSON_FACTORY.fromString(
-            jsonTableReference, TableReference.class);
+        TableReference tableReference =
+            JSON_FACTORY.fromString(jsonTableReference, TableReference.class);
 
         BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
         inserter.insertAll(jsonTableRows.iterator(), uniqueIdsForTableRows.iterator());
@@ -810,8 +788,7 @@ public void finishBundle(Context context) {
    * a randomUUID is generated only once per bucket of data. The actual unique
    * id is created by concatenating this randomUUID with a sequential number.
    */
-  private static class TagWithUniqueIds extends DoFn<TableRow,
-                                        KV<Integer, KV<String, TableRow>>> {
+  private static class TagWithUniqueIds extends DoFn<TableRow, KV<Integer, KV<String, TableRow>>> {
     private transient String randomUUID;
     private transient AtomicLong sequenceNo;
 
@@ -828,8 +805,7 @@ public void processElement(ProcessContext context) {
       ThreadLocalRandom randomGenerator = ThreadLocalRandom.current();
       // We output on keys 0-50 to ensure that there's enough batching for
       // BigQuery.
-      context.output(KV.of(randomGenerator.nextInt(0, 50),
-              KV.of(uniqueId, context.element())));
+      context.output(KV.of(randomGenerator.nextInt(0, 50), KV.of(uniqueId, context.element())));
     }
   }
 
@@ -839,9 +815,7 @@ public void processElement(ProcessContext context) {
   * PTransform that performs streaming BigQuery write. To increase consistency,
   * it leverages BigQuery best effort de-dup mechanism.
    */
-  private static class StreamWithDeDup
-    extends PTransform<PCollection<TableRow>, PDone> {
-
+  private static class StreamWithDeDup extends PTransform<PCollection<TableRow>, PDone> {
     private final TableReference tableReference;
     private final TableSchema tableSchema;
 
@@ -851,7 +825,10 @@ private static class StreamWithDeDup
       this.tableSchema = tableSchema;
     }
 
-    @Override protected Coder<Void> getDefaultOutputCoder() { return VoidCoder.of(); }
+    @Override
+    protected Coder<Void> getDefaultOutputCoder() {
+      return VoidCoder.of();
+    }
 
     @Override
     public PDone apply(PCollection<TableRow> in) {
@@ -891,8 +868,7 @@ public PDone apply(PCollection<TableRow> in) {
    * This loads the entire table into an in-memory PCollection.
    */
   private static void evaluateReadHelper(
-      Read.Bound transform,
-      DirectPipelineRunner.EvaluationContext context) {
+      Read.Bound transform, DirectPipelineRunner.EvaluationContext context) {
     BigQueryOptions options = context.getPipelineOptions();
     Bigquery client = Transport.newBigQueryClient(options).build();
     TableReference ref = transform.table;
@@ -901,7 +877,7 @@ private static void evaluateReadHelper(
     }
 
     LOG.info("Reading from BigQuery table {}", toTableSpec(ref));
-    List<TableRow> elems = CloudSourceUtils.readElemsFromSource(new BigQuerySource(client, ref));
+    List<TableRow> elems = ReaderUtils.readElemsFromReader(new BigQueryReader(client, ref));
     LOG.info("Number of records read from BigQuery: {}", elems.size());
     context.setPCollection(transform.getOutput(), elems);
   }
@@ -913,8 +889,7 @@ private static void evaluateReadHelper(
    * The table will be created if necessary.
    */
   private static void evaluateWriteHelper(
-      Write.Bound transform,
-      DirectPipelineRunner.EvaluationContext context) {
+      Write.Bound transform, DirectPipelineRunner.EvaluationContext context) {
     BigQueryOptions options = context.getPipelineOptions();
     Bigquery client = Transport.newBigQueryClient(options).build();
     TableReference ref = transform.table;
@@ -927,8 +902,8 @@ private static void evaluateWriteHelper(
     try {
       BigQueryTableInserter inserter = new BigQueryTableInserter(client, ref);
 
-      inserter.getOrCreateTable(transform.writeDisposition,
-          transform.createDisposition, transform.schema);
+      inserter.getOrCreateTable(
+          transform.writeDisposition, transform.createDisposition, transform.schema);
 
       List<TableRow> tableRows = context.getPCollection(transform.getInput());
       inserter.insertAll(tableRows.iterator());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index c0e97ff7527b8..86512be491927 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -16,17 +16,16 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
-import static com.google.cloud.dataflow.sdk.util.CloudSourceUtils.readElemsFromSource;
-
 import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.worker.TextReader;
 import com.google.cloud.dataflow.sdk.runners.worker.TextSink;
-import com.google.cloud.dataflow.sdk.runners.worker.TextSource;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.util.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -160,15 +159,16 @@ public static Bound<String> withoutValidation() {
      * @param <T> the type of each of the elements of the resulting
      * PCollection, decoded from the lines of the text file
      */
-    public static class Bound<T>
-        extends PTransform<PInput, PCollection<T>> {
+    public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
       private static final long serialVersionUID = 0;
 
       /** The filepattern to read from. */
-      @Nullable final String filepattern;
+      @Nullable
+      final String filepattern;
 
       /** The Coder to use to decode each line. */
-      @Nullable final Coder<T> coder;
+      @Nullable
+      final Coder<T> coder;
 
       /** An option to indicate if input validation is desired. Default is true. */
       final boolean validate;
@@ -231,14 +231,12 @@ public Bound<T> withoutValidation() {
       @Override
       public PCollection<T> apply(PInput input) {
         if (filepattern == null) {
-          throw new IllegalStateException(
-              "need to set the filepattern of a TextIO.Read transform");
+          throw new IllegalStateException("need to set the filepattern of a TextIO.Read transform");
         }
         // Force the output's Coder to be what the read is using, and
         // unchangeable later, to ensure that we read the input in the
         // format specified by the Read transform.
-        return PCollection.<T>createPrimitiveOutputInternal(new GlobalWindow())
-            .setCoder(coder);
+        return PCollection.<T>createPrimitiveOutputInternal(new GlobalWindow()).setCoder(coder);
       }
 
       @Override
@@ -247,7 +245,9 @@ protected Coder<T> getDefaultOutputCoder() {
       }
 
       @Override
-      protected String getKindString() { return "TextIO.Read"; }
+      protected String getKindString() {
+        return "TextIO.Read";
+      }
 
       public String getFilepattern() {
         return filepattern;
@@ -259,12 +259,10 @@ public boolean needsValidation() {
 
       static {
         DirectPipelineRunner.registerDefaultTransformEvaluator(
-            Bound.class,
-            new DirectPipelineRunner.TransformEvaluator<Bound>() {
+            Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
               @Override
               public void evaluate(
-                  Bound transform,
-                  DirectPipelineRunner.EvaluationContext context) {
+                  Bound transform, DirectPipelineRunner.EvaluationContext context) {
                 evaluateReadHelper(transform, context);
               }
             });
@@ -332,8 +330,7 @@ public static Bound<String> withNumShards(int numShards) {
      * <p> See {@link ShardNameTemplate} for a description of shard templates.
      */
     public static Bound<String> withShardNameTemplate(String shardTemplate) {
-      return new Bound<>(DEFAULT_TEXT_CODER)
-          .withShardNameTemplate(shardTemplate);
+      return new Bound<>(DEFAULT_TEXT_CODER).withShardNameTemplate(shardTemplate);
     }
 
     /**
@@ -367,12 +364,12 @@ public static <T> Bound<T> withCoder(Coder<T> coder) {
      *
      * @param <T> the type of the elements of the input PCollection
      */
-    public static class Bound<T>
-        extends PTransform<PCollection<T>, PDone> {
+    public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       private static final long serialVersionUID = 0;
 
       /** The filename to write to. */
-      @Nullable final String filenamePrefix;
+      @Nullable
+      final String filenamePrefix;
       /** Suffix to use for each filename. */
       final String filenameSuffix;
 
@@ -389,9 +386,8 @@ public static class Bound<T>
         this(null, null, "", coder, 0, ShardNameTemplate.INDEX_OF_MAX);
       }
 
-      Bound(String name, String filenamePrefix, String filenameSuffix,
-          Coder<T> coder, int numShards,
-          String shardTemplate) {
+      Bound(String name, String filenamePrefix, String filenameSuffix, Coder<T> coder,
+          int numShards, String shardTemplate) {
         super(name);
         this.coder = coder;
         this.filenamePrefix = filenamePrefix;
@@ -405,8 +401,7 @@ public static class Bound<T>
        * with the given step name.  Does not modify this object.
        */
       public Bound<T> named(String name) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
-            shardTemplate);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate);
       }
 
       /**
@@ -419,8 +414,7 @@ public Bound<T> named(String name) {
        */
       public Bound<T> to(String filenamePrefix) {
         validateOutputComponent(filenamePrefix);
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
-            shardTemplate);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate);
       }
 
       /**
@@ -433,8 +427,7 @@ public Bound<T> to(String filenamePrefix) {
        */
       public Bound<T> withSuffix(String nameExtension) {
         validateOutputComponent(nameExtension);
-        return new Bound<>(name, filenamePrefix, nameExtension, coder, numShards,
-            shardTemplate);
+        return new Bound<>(name, filenamePrefix, nameExtension, coder, numShards, shardTemplate);
       }
 
       /**
@@ -453,8 +446,7 @@ public Bound<T> withSuffix(String nameExtension) {
        */
       public Bound<T> withNumShards(int numShards) {
         Preconditions.checkArgument(numShards >= 0);
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
-            shardTemplate);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate);
       }
 
       /**
@@ -466,8 +458,7 @@ public Bound<T> withNumShards(int numShards) {
        * @see ShardNameTemplate
        */
       public Bound<T> withShardNameTemplate(String shardTemplate) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
-            shardTemplate);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate);
       }
 
       /**
@@ -492,8 +483,7 @@ public Bound<T> withoutSharding() {
        * @param <T1> the type of the elements of the input PCollection
        */
       public <T1> Bound<T1> withCoder(Coder<T1> coder) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
-            shardTemplate);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate);
       }
 
       @Override
@@ -518,7 +508,9 @@ protected Coder<Void> getDefaultOutputCoder() {
       }
 
       @Override
-      protected String getKindString() { return "TextIO.Write"; }
+      protected String getKindString() {
+        return "TextIO.Write";
+      }
 
       public String getFilenamePrefix() {
         return filenamePrefix;
@@ -542,12 +534,10 @@ public Coder<T> getCoder() {
 
       static {
         DirectPipelineRunner.registerDefaultTransformEvaluator(
-            Bound.class,
-            new DirectPipelineRunner.TransformEvaluator<Bound>() {
+            Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
               @Override
               public void evaluate(
-                  Bound transform,
-                  DirectPipelineRunner.EvaluationContext context) {
+                  Bound transform, DirectPipelineRunner.EvaluationContext context) {
                 evaluateWriteHelper(transform, context);
               }
             });
@@ -557,30 +547,27 @@ public void evaluate(
 
   // Pattern which matches old-style shard output patterns, which are now
   // disallowed.
-  private static final Pattern SHARD_OUTPUT_PATTERN =
-      Pattern.compile("@([0-9]+|\\*)");
+  private static final Pattern SHARD_OUTPUT_PATTERN = Pattern.compile("@([0-9]+|\\*)");
 
   private static void validateOutputComponent(String partialFilePattern) {
     Preconditions.checkArgument(
         !SHARD_OUTPUT_PATTERN.matcher(partialFilePattern).find(),
         "Output name components are not allowed to contain @* or @N patterns: "
-            + partialFilePattern);
+        + partialFilePattern);
   }
 
   //////////////////////////////////////////////////////////////////////////////
 
   private static <T> void evaluateReadHelper(
-      Read.Bound<T> transform,
-      DirectPipelineRunner.EvaluationContext context) {
-    TextSource<T> source = new TextSource<>(
-        transform.filepattern, true, null, null, transform.coder);
-    List<T> elems = readElemsFromSource(source);
+      Read.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
+    TextReader<T> reader =
+        new TextReader<>(transform.filepattern, true, null, null, transform.coder);
+    List<T> elems = ReaderUtils.readElemsFromReader(reader);
     context.setPCollection(transform.getOutput(), elems);
   }
 
   private static <T> void evaluateWriteHelper(
-      Write.Bound<T> transform,
-      DirectPipelineRunner.EvaluationContext context) {
+      Write.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
     List<T> elems = context.getPCollection(transform.getInput());
     int numShards = transform.numShards;
     if (numShards < 1) {
@@ -588,17 +575,15 @@ private static <T> void evaluateWriteHelper(
       numShards = 1;
     }
     TextSink<WindowedValue<T>> writer = TextSink.createForDirectPipelineRunner(
-        transform.filenamePrefix, transform.getShardNameTemplate(),
-        transform.filenameSuffix, numShards,
-        true, null, null, transform.coder);
+        transform.filenamePrefix, transform.getShardNameTemplate(), transform.filenameSuffix,
+        numShards, true, null, null, transform.coder);
     try (Sink.SinkWriter<WindowedValue<T>> sink = writer.writer()) {
       for (T elem : elems) {
         sink.add(WindowedValue.valueInGlobalWindow(elem));
       }
     } catch (IOException exn) {
       throw new RuntimeException(
-          "unable to write to output file \"" + transform.filenamePrefix + "\"",
-          exn);
+          "unable to write to output file \"" + transform.filenamePrefix + "\"", exn);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
similarity index 76%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSource.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
index 5a8524eb88853..a62d20775b7d7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
@@ -20,7 +20,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericDatumReader;
@@ -38,34 +38,28 @@
  *
  * @param <T> the type of the elements read from the source
  */
-public class AvroByteSource<T> extends Source<T> {
-
-  final AvroSource<ByteBuffer> avroSource;
+public class AvroByteReader<T> extends Reader<T> {
+  final AvroReader<ByteBuffer> avroReader;
   final Coder<T> coder;
   private final Schema schema = Schema.create(Schema.Type.BYTES);
 
-  public AvroByteSource(String filename,
-                        @Nullable Long startPosition,
-                        @Nullable Long endPosition,
-                        Coder<T> coder) {
+  public AvroByteReader(
+      String filename, @Nullable Long startPosition, @Nullable Long endPosition, Coder<T> coder) {
     this.coder = coder;
-    avroSource = new AvroSource<>(
-        filename, startPosition, endPosition,
+    avroReader = new AvroReader<>(filename, startPosition, endPosition,
         WindowedValue.getValueOnlyCoder(AvroCoder.of(ByteBuffer.class, schema)));
   }
 
   @Override
-  public SourceIterator<T> iterator() throws IOException {
+  public ReaderIterator<T> iterator() throws IOException {
     return new AvroByteFileIterator();
   }
 
-  class AvroByteFileIterator extends AbstractSourceIterator<T> {
-
-    private final SourceIterator<WindowedValue<ByteBuffer>> avroFileIterator;
+  class AvroByteFileIterator extends AbstractReaderIterator<T> {
+    private final ReaderIterator<WindowedValue<ByteBuffer>> avroFileIterator;
 
     public AvroByteFileIterator() throws IOException {
-      avroFileIterator = avroSource.iterator(
-          new GenericDatumReader<ByteBuffer>(schema));
+      avroFileIterator = avroReader.iterator(new GenericDatumReader<ByteBuffer>(schema));
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
similarity index 76%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSource.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
index 3f071cff2c7a1..6fd10b2223c02 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
@@ -16,15 +16,12 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import static com.google.cloud.dataflow.sdk.util.WindowedValue.ValueOnlyWindowedValueCoder;
-import static com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
-
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.apache.avro.Schema;
 import org.apache.avro.file.DataFileReader;
@@ -46,24 +43,24 @@
  *
  * @param <T> the type of the elements read from the source
  */
-public class AvroSource<T> extends Source<WindowedValue<T>> {
+public class AvroReader<T> extends Reader<WindowedValue<T>> {
   private static final int BUF_SIZE = 200;
   final String filename;
-  @Nullable final Long startPosition;
-  @Nullable final Long endPosition;
+  @Nullable
+  final Long startPosition;
+  @Nullable
+  final Long endPosition;
   final AvroCoder<T> avroCoder;
   private final Schema schema;
 
-  public AvroSource(String filename,
-                    @Nullable Long startPosition,
-                    @Nullable Long endPosition,
-                    WindowedValueCoder<T> coder) {
-    if (!(coder instanceof ValueOnlyWindowedValueCoder)) {
+  public AvroReader(String filename, @Nullable Long startPosition, @Nullable Long endPosition,
+      WindowedValue.WindowedValueCoder<T> coder) {
+    if (!(coder instanceof WindowedValue.ValueOnlyWindowedValueCoder)) {
       throw new IllegalArgumentException("Expected ValueOnlyWindowedValueCoder");
     }
 
     if (!(coder.getValueCoder() instanceof AvroCoder)) {
-      throw new IllegalArgumentException("AvroSource requires an AvroCoder");
+      throw new IllegalArgumentException("AvroReader requires an AvroCoder");
     }
 
     this.filename = filename;
@@ -73,7 +70,7 @@ public AvroSource(String filename,
     this.schema = this.avroCoder.getSchema();
   }
 
-  public SourceIterator<WindowedValue<T>> iterator(DatumReader<T> datumReader) throws IOException {
+  public ReaderIterator<WindowedValue<T>> iterator(DatumReader<T> datumReader) throws IOException {
     IOChannelFactory factory = IOChannelUtils.getFactory(filename);
     Collection<String> inputs = factory.match(filename);
 
@@ -85,46 +82,40 @@ public SourceIterator<WindowedValue<T>> iterator(DatumReader<T> datumReader) thr
     } else {
       if (startPosition != null || endPosition != null) {
         throw new UnsupportedOperationException(
-            "Unable to apply range limits to multiple-input stream: " +
-                filename);
+            "Unable to apply range limits to multiple-input stream: " + filename);
       }
       return new AvroFileMultiIterator(datumReader, factory, inputs.iterator());
     }
   }
 
   @Override
-  public SourceIterator<WindowedValue<T>> iterator() throws IOException {
+  public ReaderIterator<WindowedValue<T>> iterator() throws IOException {
     return iterator(avroCoder.createDatumReader());
   }
 
-  class AvroFileMultiIterator extends LazyMultiSourceIterator<WindowedValue<T>> {
+  class AvroFileMultiIterator extends LazyMultiReaderIterator<WindowedValue<T>> {
     private final IOChannelFactory factory;
     private final DatumReader<T> datumReader;
 
-    public AvroFileMultiIterator(DatumReader<T> datumReader,
-                                 IOChannelFactory factory,
-                                 Iterator<String> inputs) {
+    public AvroFileMultiIterator(
+        DatumReader<T> datumReader, IOChannelFactory factory, Iterator<String> inputs) {
       super(inputs);
       this.factory = factory;
       this.datumReader = datumReader;
     }
 
     @Override
-    protected SourceIterator<WindowedValue<T>> open(String input) throws IOException {
+    protected ReaderIterator<WindowedValue<T>> open(String input) throws IOException {
       return new AvroFileIterator(datumReader, input, factory.open(input), null, null);
     }
   }
 
-  class AvroFileIterator extends AbstractSourceIterator<WindowedValue<T>> {
+  class AvroFileIterator extends AbstractReaderIterator<WindowedValue<T>> {
     final DataFileReader<T> fileReader;
     final Long endOffset;
 
-    public AvroFileIterator(DatumReader<T> datumReader,
-                            String filename,
-                            ReadableByteChannel reader,
-                            @Nullable Long startOffset,
-                            @Nullable Long endOffset)
-        throws IOException {
+    public AvroFileIterator(DatumReader<T> datumReader, String filename, ReadableByteChannel reader,
+        @Nullable Long startOffset, @Nullable Long endOffset) throws IOException {
       if (!(reader instanceof SeekableByteChannel)) {
         throw new UnsupportedOperationException(
             "Unable to seek to offset in stream for " + filename);
@@ -141,8 +132,7 @@ public AvroFileIterator(DatumReader<T> datumReader,
 
     @Override
     public boolean hasNext() throws IOException {
-      return fileReader.hasNext()
-          && (endOffset == null || !fileReader.pastSync(endOffset));
+      return fileReader.hasNext() && (endOffset == null || !fileReader.pastSync(endOffset));
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
similarity index 70%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactory.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
index 740f94965dd80..c4292c5bcb535 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
@@ -26,41 +26,36 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 /**
- * Creates an AvroSource from a CloudObject spec.
+ * Creates an AvroReader from a CloudObject spec.
  */
 @SuppressWarnings("rawtypes")
-public class AvroSourceFactory {
+public class AvroReaderFactory {
   // Do not instantiate.
-  private AvroSourceFactory() {}
+  private AvroReaderFactory() {}
 
-  public static <T> Source<T> create(PipelineOptions options,
-                                     CloudObject spec,
-                                     Coder<T> coder,
-                                     ExecutionContext executionContext)
-      throws Exception {
+  public static <T> Reader<T> create(PipelineOptions options, CloudObject spec, Coder<T> coder,
+      ExecutionContext executionContext) throws Exception {
     return create(spec, coder);
   }
 
-  static <T> Source<T> create(CloudObject spec,
-                              Coder<T> coder)
-      throws Exception {
+  static <T> Reader<T> create(CloudObject spec, Coder<T> coder) throws Exception {
     String filename = getString(spec, PropertyNames.FILENAME);
     Long startOffset = getLong(spec, PropertyNames.START_OFFSET, null);
     Long endOffset = getLong(spec, PropertyNames.END_OFFSET, null);
 
     if (!(coder instanceof WindowedValueCoder)) {
-      return new AvroByteSource<>(filename, startOffset, endOffset, coder);
+      return new AvroByteReader<>(filename, startOffset, endOffset, coder);
       //throw new IllegalArgumentException("Expected WindowedValueCoder");
     }
 
     WindowedValueCoder windowedCoder = (WindowedValueCoder) coder;
     if (windowedCoder.getValueCoder() instanceof AvroCoder) {
-      return new AvroSource(filename, startOffset, endOffset, windowedCoder);
+      return new AvroReader(filename, startOffset, endOffset, windowedCoder);
     } else {
-      return new AvroByteSource<>(filename, startOffset, endOffset, windowedCoder);
+      return new AvroByteReader<>(filename, startOffset, endOffset, windowedCoder);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
similarity index 81%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySource.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
index fa1afddf87a7f..3d442f3ffc768 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
@@ -24,7 +24,7 @@
 import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
 import com.google.cloud.dataflow.sdk.util.BigQueryTableRowIterator;
 import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import java.io.IOException;
 import java.util.NoSuchElementException;
@@ -37,13 +37,13 @@
  * progress reporting because the source is used only in situations where the entire table must be
  * read by each worker (i.e. the source is used as a side input).
  */
-public class BigQuerySource extends Source<TableRow> {
+public class BigQueryReader extends Reader<TableRow> {
   final TableReference tableRef;
   final BigQueryOptions bigQueryOptions;
   final Bigquery bigQueryClient;
 
   /** Builds a BigQuery source using pipeline options to instantiate a Bigquery client. */
-  public BigQuerySource(BigQueryOptions bigQueryOptions, TableReference tableRef) {
+  public BigQueryReader(BigQueryOptions bigQueryOptions, TableReference tableRef) {
     // Save pipeline options so that we can construct the BigQuery client on-demand whenever an
     // iterator gets created.
     this.bigQueryOptions = bigQueryOptions;
@@ -51,31 +51,29 @@ public BigQuerySource(BigQueryOptions bigQueryOptions, TableReference tableRef)
     this.bigQueryClient = null;
   }
 
-  /** Builds a BigQuerySource directly using a BigQuery client. */
-  public BigQuerySource(Bigquery bigQueryClient, TableReference tableRef) {
+  /** Builds a BigQueryReader directly using a BigQuery client. */
+  public BigQueryReader(Bigquery bigQueryClient, TableReference tableRef) {
     this.bigQueryOptions = null;
     this.tableRef = tableRef;
     this.bigQueryClient = bigQueryClient;
   }
 
   @Override
-  public SourceIterator<TableRow> iterator() throws IOException {
-    return new BigQuerySourceIterator(
+  public ReaderIterator<TableRow> iterator() throws IOException {
+    return new BigQueryReaderIterator(
         bigQueryClient != null
-            ? bigQueryClient
-            : Transport.newBigQueryClient(bigQueryOptions).build(),
+            ? bigQueryClient : Transport.newBigQueryClient(bigQueryOptions).build(),
         tableRef);
   }
 
   /**
-   * A SourceIterator that yields TableRow objects for each row of a BigQuery table.
+   * A ReaderIterator that yields TableRow objects for each row of a BigQuery table.
    */
-  class BigQuerySourceIterator extends AbstractSourceIterator<TableRow> {
-
+  class BigQueryReaderIterator extends AbstractReaderIterator<TableRow> {
     private BigQueryTableRowIterator rowIterator;
 
-    public BigQuerySourceIterator(Bigquery bigQueryClient, TableReference tableRef) {
-      rowIterator =  new BigQueryTableRowIterator(bigQueryClient, tableRef);
+    public BigQueryReaderIterator(Bigquery bigQueryClient, TableReference tableRef) {
+      rowIterator = new BigQueryTableRowIterator(bigQueryClient, tableRef);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySourceFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
similarity index 86%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySourceFactory.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
index 682b7faa1400b..e2c00ba4064fb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySourceFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
@@ -27,16 +27,15 @@
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 
 /**
- * Creates a BigQuerySource from a {@link CloudObject} spec.
+ * Creates a BigQueryReader from a {@link CloudObject} spec.
  */
-public class BigQuerySourceFactory {
+public class BigQueryReaderFactory {
   // Do not instantiate.
-  private BigQuerySourceFactory() {}
+  private BigQueryReaderFactory() {}
 
-  public static BigQuerySource create(
-      PipelineOptions options, CloudObject spec, Coder<?> coder,
+  public static BigQueryReader create(PipelineOptions options, CloudObject spec, Coder<?> coder,
       ExecutionContext executionContext) throws Exception {
-    return new BigQuerySource(
+    return new BigQueryReader(
         options.as(BigQueryOptions.class),
         new TableReference()
             .setProjectId(getString(spec, PropertyNames.BIGQUERY_PROJECT))
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
index f2569b1488f6e..250eb431ad4a3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
@@ -18,7 +18,7 @@
 
 import static com.google.cloud.dataflow.sdk.runners.worker.DataflowWorker.buildStatus;
 import static com.google.cloud.dataflow.sdk.runners.worker.DataflowWorker.uniqueId;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudDuration;
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudTime;
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudDuration;
@@ -54,11 +54,8 @@ public class DataflowWorkProgressUpdater extends WorkProgressUpdater {
   /** Options specifying information about the pipeline run by the worker.*/
   private final DataflowWorkerHarnessOptions options;
 
-  public DataflowWorkProgressUpdater(
-      WorkItem workItem,
-      WorkExecutor worker,
-      DataflowWorker.WorkUnitClient workUnitClient,
-      DataflowWorkerHarnessOptions options) {
+  public DataflowWorkProgressUpdater(WorkItem workItem, WorkExecutor worker,
+      DataflowWorker.WorkUnitClient workUnitClient, DataflowWorkerHarnessOptions options) {
     super(worker);
     this.workItem = workItem;
     this.workUnitClient = workUnitClient;
@@ -77,11 +74,9 @@ protected long getWorkUnitLeaseExpirationTimestamp() {
 
   @Override
   protected void reportProgressHelper() throws Exception {
-    WorkItemStatus status = buildStatus(
-        workItem, false /*completed*/,
-        worker.getOutputCounters(), worker.getOutputMetrics(), options,
-        worker.getWorkerProgress(), stopPositionToService,
-        null /*sourceOperationResponse*/, null /*errors*/);
+    WorkItemStatus status = buildStatus(workItem, false/*completed*/, worker.getOutputCounters(),
+        worker.getOutputMetrics(), options, worker.getWorkerProgress(), stopPositionToService,
+        null/*sourceOperationResponse*/, null/*errors*/);
     status.setRequestedLeaseDuration(toCloudDuration(Duration.millis(requestedLeaseDurationMs)));
 
     WorkItemServiceState result = workUnitClient.reportWorkItemStatus(status);
@@ -95,16 +90,15 @@ protected void reportProgressHelper() throws Exception {
 
       ApproximateProgress suggestedStopPoint = result.getSuggestedStopPoint();
       if (suggestedStopPoint == null && result.getSuggestedStopPosition() != null) {
-        suggestedStopPoint = new ApproximateProgress()
-            .setPosition(result.getSuggestedStopPosition());
+        suggestedStopPoint =
+            new ApproximateProgress().setPosition(result.getSuggestedStopPosition());
       }
 
       if (suggestedStopPoint != null) {
         LOG.info("Proposing stop progress on work unit {} at proposed stopping point {}",
             workString(), suggestedStopPoint);
         stopPositionToService =
-                 worker.proposeStopPosition(
-                     cloudProgressToSourceProgress(suggestedStopPoint));
+            worker.proposeStopPosition(cloudProgressToReaderProgress(suggestedStopPoint));
       }
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 5175d15aa882e..499653a11d0a7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -36,7 +36,7 @@
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.Metric;
 import com.google.cloud.dataflow.sdk.util.common.worker.CustomSourceFormat;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
 import com.google.cloud.dataflow.sdk.util.common.worker.WorkProgressUpdater;
 
@@ -63,7 +63,6 @@
  * execute it, and update the work.
  */
 public class DataflowWorker {
-
   private static final Logger LOG = LoggerFactory.getLogger(DataflowWorker.class);
 
   /**
@@ -77,8 +76,7 @@ public class DataflowWorker {
    */
   private final DataflowWorkerHarnessOptions options;
 
-  public DataflowWorker(WorkUnitClient workUnitClient,
-      DataflowWorkerHarnessOptions options) {
+  public DataflowWorker(WorkUnitClient workUnitClient, DataflowWorkerHarnessOptions options) {
     this.workUnitClient = workUnitClient;
     this.options = options;
   }
@@ -114,20 +112,17 @@ private boolean doWork(WorkItem workItem) throws IOException {
       ExecutionContext executionContext = new BatchModeExecutionContext();
 
       if (workItem.getMapTask() != null) {
-        worker = MapTaskExecutorFactory.create(options,
-                                               workItem.getMapTask(),
-                                               executionContext);
+        worker = MapTaskExecutorFactory.create(options, workItem.getMapTask(), executionContext);
 
       } else if (workItem.getSourceOperationTask() != null) {
-        worker = SourceOperationExecutorFactory.create(
-            workItem.getSourceOperationTask());
+        worker = SourceOperationExecutorFactory.create(workItem.getSourceOperationTask());
 
       } else {
         throw new RuntimeException("unknown kind of work item: " + workItem.toString());
       }
 
-      WorkProgressUpdater progressUpdater = new DataflowWorkProgressUpdater(
-          workItem, worker, workUnitClient, options);
+      WorkProgressUpdater progressUpdater =
+          new DataflowWorkProgressUpdater(workItem, worker, workUnitClient, options);
       progressUpdater.startReportingProgress();
 
       // Blocks while executing the work.
@@ -155,13 +150,13 @@ private boolean doWork(WorkItem workItem) throws IOException {
 
       // TODO: Find out a generic way for the WorkExecutor to report work-specific results
       // into the work update.
-      CustomSourceFormat.SourceOperationResponse sourceOperationResponse =
+      CustomSourceFormat.OperationResponse sourceOperationResponse =
           (worker instanceof SourceOperationExecutor)
               ? cloudSourceOperationResponseToSourceOperationResponse(
-              ((SourceOperationExecutor) worker).getResponse())
+                  ((SourceOperationExecutor) worker).getResponse())
               : null;
-      reportStatus(options, "Success", workItem, counters, metrics, sourceOperationResponse,
-          null /*errors*/);
+      reportStatus(
+          options, "Success", workItem, counters, metrics, sourceOperationResponse, null/*errors*/);
 
       return true;
 
@@ -181,8 +176,7 @@ private boolean doWork(WorkItem workItem) throws IOException {
   }
 
   /** Handles the exception thrown when reading and executing the work. */
-  private void handleWorkError(
-      WorkItem workItem, WorkExecutor worker, Throwable e)
+  private void handleWorkError(WorkItem workItem, WorkExecutor worker, Throwable e)
       throws IOException {
     LOG.warn("Uncaught exception occurred during work unit execution:", e);
 
@@ -190,14 +184,12 @@ private void handleWorkError(
     // into the client.
     Throwable t = e instanceof UserCodeException ? e.getCause() : e;
     Status error = new Status();
-    error.setCode(2);  // Code.UNKNOWN.  TODO: Replace with a generated definition.
+    error.setCode(2); // Code.UNKNOWN.  TODO: Replace with a generated definition.
     // TODO: Attach the stack trace as exception details, not to the message.
     error.setMessage(buildCloudStackTrace(t));
 
-    reportStatus(options, "Failure", workItem,
-        worker == null ? null : worker.getOutputCounters(),
-        worker == null ? null : worker.getOutputMetrics(),
-        null /*sourceOperationResponse*/,
+    reportStatus(options, "Failure", workItem, worker == null ? null : worker.getOutputCounters(),
+        worker == null ? null : worker.getOutputMetrics(), null/*sourceOperationResponse*/,
         error == null ? null : Collections.singletonList(error));
   }
 
@@ -225,30 +217,21 @@ private static String buildCloudStackTrace(Throwable t) {
     return result.toString();
   }
 
-  private void reportStatus(DataflowWorkerHarnessOptions options,
-                            String status,
-                            WorkItem workItem,
-                            @Nullable CounterSet counters,
-                            @Nullable Collection<Metric<?>> metrics,
-                            @Nullable CustomSourceFormat.SourceOperationResponse
-                                sourceOperationResponse,
-                            @Nullable List<Status> errors)
-      throws IOException {
+  private void reportStatus(DataflowWorkerHarnessOptions options, String status, WorkItem workItem,
+      @Nullable CounterSet counters, @Nullable Collection<Metric<?>> metrics,
+      @Nullable CustomSourceFormat.OperationResponse sourceOperationResponse,
+      @Nullable List<Status> errors) throws IOException {
     LOG.info("{} processing work item {}", status, uniqueId(workItem));
-    WorkItemStatus workItemStatus = buildStatus(workItem, true /*completed*/,
-        counters, metrics, options, null, null, sourceOperationResponse, errors);
+    WorkItemStatus workItemStatus = buildStatus(workItem, true/*completed*/, counters, metrics,
+        options, null, null, sourceOperationResponse, errors);
     workUnitClient.reportWorkItemStatus(workItemStatus);
   }
 
-  static WorkItemStatus buildStatus(
-      WorkItem workItem,
-      boolean completed,
-      @Nullable CounterSet counters,
-      @Nullable Collection<Metric<?>> metrics,
-      DataflowWorkerHarnessOptions options,
-      @Nullable Source.Progress progress,
-      @Nullable Source.Position stopPosition,
-      @Nullable CustomSourceFormat.SourceOperationResponse sourceOperationResponse,
+  static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
+      @Nullable CounterSet counters, @Nullable Collection<Metric<?>> metrics,
+      DataflowWorkerHarnessOptions options, @Nullable Reader.Progress progress,
+      @Nullable Reader.Position stopPosition,
+      @Nullable CustomSourceFormat.OperationResponse sourceOperationResponse,
       @Nullable List<Status> errors) {
     WorkItemStatus status = new WorkItemStatus();
     status.setWorkItemId(Long.toString(workItem.getId()));
@@ -323,8 +306,7 @@ public abstract static class WorkUnitClient {
      * @param workItemStatus the status to report
      * @return a {@link WorkServiceState} (e.g. a new stop position)
      */
-    public abstract WorkItemServiceState reportWorkItemStatus(
-        WorkItemStatus workItemStatus)
+    public abstract WorkItemServiceState reportWorkItemStatus(WorkItemStatus workItemStatus)
         throws IOException;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
similarity index 77%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedSource.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
index 2e152e794f80c..de116277fd262 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
@@ -17,8 +17,8 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static com.google.api.client.util.Preconditions.checkNotNull;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToSourcePosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
@@ -27,7 +27,7 @@
 import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.ProgressTracker;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -47,21 +47,20 @@
  *
  * @param <T> the type of the elements read from the source
  */
-public abstract class FileBasedSource<T> extends Source<T> {
+public abstract class FileBasedReader<T> extends Reader<T> {
   protected static final int BUF_SIZE = 200;
   protected final String filename;
-  @Nullable protected final Long startPosition;
-  @Nullable protected final Long endPosition;
+  @Nullable
+  protected final Long startPosition;
+  @Nullable
+  protected final Long endPosition;
   protected final Coder<T> coder;
   protected final boolean useDefaultBufferSize;
 
-  private static final Logger LOG = LoggerFactory.getLogger(FileBasedSource.class);
+  private static final Logger LOG = LoggerFactory.getLogger(FileBasedReader.class);
 
-  protected FileBasedSource(String filename,
-                            @Nullable Long startPosition,
-                            @Nullable Long endPosition,
-                            Coder<T> coder,
-                            boolean useDefaultBufferSize) {
+  protected FileBasedReader(String filename, @Nullable Long startPosition,
+      @Nullable Long endPosition, Coder<T> coder, boolean useDefaultBufferSize) {
     this.filename = filename;
     this.startPosition = startPosition;
     this.endPosition = endPosition;
@@ -80,20 +79,18 @@ protected FileBasedSource(String filename,
    *
    * @param endPosition offset of the end position; null means end-of-file
    */
-  protected abstract SourceIterator<T> newSourceIteratorForRangeInFile(
-      IOChannelFactory factory, String oneFile, long startPosition,
-      @Nullable Long endPosition)
-      throws IOException;
+  protected abstract ReaderIterator<T> newReaderIteratorForRangeInFile(IOChannelFactory factory,
+      String oneFile, long startPosition, @Nullable Long endPosition) throws IOException;
 
   /**
    * Returns a new iterator for elements in the given files.  Caller
    * must ensure that the file collection is not empty.
    */
-  protected abstract SourceIterator<T> newSourceIteratorForFiles(
+  protected abstract ReaderIterator<T> newReaderIteratorForFiles(
       IOChannelFactory factory, Collection<String> files) throws IOException;
 
   @Override
-  public SourceIterator<T> iterator() throws IOException {
+  public ReaderIterator<T> iterator() throws IOException {
     IOChannelFactory factory = IOChannelUtils.getFactory(filename);
     Collection<String> inputs = factory.match(filename);
     if (inputs.isEmpty()) {
@@ -103,22 +100,20 @@ public SourceIterator<T> iterator() throws IOException {
     if (startPosition != null || endPosition != null) {
       if (inputs.size() != 1) {
         throw new UnsupportedOperationException(
-            "Unable to apply range limits to multiple-input stream: "
-            + filename);
+            "Unable to apply range limits to multiple-input stream: " + filename);
       }
 
-      return newSourceIteratorForRangeInFile(
-          factory, inputs.iterator().next(),
+      return newReaderIteratorForRangeInFile(factory, inputs.iterator().next(),
           startPosition == null ? 0 : startPosition, endPosition);
     } else {
-      return newSourceIteratorForFiles(factory, inputs);
+      return newReaderIteratorForFiles(factory, inputs);
     }
   }
 
   /**
    * Abstract base class for file-based source iterators.
    */
-  protected abstract class FileBasedIterator extends AbstractSourceIterator<T> {
+  protected abstract class FileBasedIterator extends AbstractReaderIterator<T> {
     protected final CopyableSeekableByteChannel seeker;
     protected final PushbackInputStream stream;
     protected final Long startOffset;
@@ -128,16 +123,14 @@ protected abstract class FileBasedIterator extends AbstractSourceIterator<T> {
     protected boolean nextElementComputed = false;
     protected long offset;
 
-    FileBasedIterator(CopyableSeekableByteChannel seeker,
-        long startOffset,
-        long offset,
-        @Nullable Long endOffset,
-        ProgressTracker<Integer> tracker) throws IOException {
+    FileBasedIterator(CopyableSeekableByteChannel seeker, long startOffset, long offset,
+        @Nullable Long endOffset, ProgressTracker<Integer> tracker) throws IOException {
       this.seeker = checkNotNull(seeker);
       this.seeker.position(startOffset);
-      BufferedInputStream bufferedStream = useDefaultBufferSize
-          ? new BufferedInputStream(Channels.newInputStream(seeker))
-          : new BufferedInputStream(Channels.newInputStream(seeker), BUF_SIZE);
+      BufferedInputStream bufferedStream =
+          useDefaultBufferSize
+              ? new BufferedInputStream(Channels.newInputStream(seeker))
+              : new BufferedInputStream(Channels.newInputStream(seeker), BUF_SIZE);
       this.stream = new PushbackInputStream(bufferedStream, BUF_SIZE);
       this.startOffset = startOffset;
       this.offset = offset;
@@ -153,8 +146,7 @@ protected abstract class FileBasedIterator extends AbstractSourceIterator<T> {
      *     has been reached.
      * @throws IOException if an I/O error occurs
      */
-    protected abstract ByteArrayOutputStream readElement()
-        throws IOException;
+    protected abstract ByteArrayOutputStream readElement() throws IOException;
 
     @Override
     public boolean hasNext() throws IOException {
@@ -179,7 +171,7 @@ void advance() throws IOException {
     @Override
     public Progress getProgress() {
       // Currently we assume that only a offset position is reported as
-      // current progress. Source writer can override this method to update
+      // current progress. An implementor can override this method to update
       // other metrics, e.g. completion percentage or remaining time.
       com.google.api.services.dataflow.model.Position currentPosition =
           new com.google.api.services.dataflow.model.Position();
@@ -188,7 +180,7 @@ public Progress getProgress() {
       ApproximateProgress progress = new ApproximateProgress();
       progress.setPosition(currentPosition);
 
-      return cloudProgressToSourceProgress(progress);
+      return cloudProgressToReaderProgress(progress);
     }
 
     @Override
@@ -196,21 +188,19 @@ public Position updateStopPosition(Progress proposedStopPosition) {
       checkNotNull(proposedStopPosition);
 
       // Currently we only support stop position in byte offset of
-      // CloudPosition in a file-based Source. If stop position in
+      // CloudPosition in a file-based Reader. If stop position in
       // other types is proposed, the end position in iterator will
       // not be updated, and return null.
       com.google.api.services.dataflow.model.ApproximateProgress stopPosition =
           sourceProgressToCloudProgress(proposedStopPosition);
       if (stopPosition == null) {
-        LOG.warn(
-            "A stop position other than CloudPosition is not supported now.");
+        LOG.warn("A stop position other than CloudPosition is not supported now.");
         return null;
       }
 
       Long byteOffset = stopPosition.getPosition().getByteOffset();
       if (byteOffset == null) {
-        LOG.warn(
-            "A stop position other than byte offset is not supported in a "
+        LOG.warn("A stop position other than byte offset is not supported in a "
             + "file-based Source.");
         return null;
       }
@@ -227,7 +217,7 @@ public Position updateStopPosition(Progress proposedStopPosition) {
       }
 
       this.endOffset = byteOffset;
-      return cloudPositionToSourcePosition(stopPosition.getPosition());
+      return cloudPositionToReaderPosition(stopPosition.getPosition());
     }
 
     /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
similarity index 75%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSource.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index 19ce35800c6ee..7d0f92313e9fd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -17,12 +17,11 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static com.google.api.client.util.Preconditions.checkNotNull;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToSourcePosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
 
 import com.google.api.client.util.Preconditions;
-
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
@@ -37,9 +36,9 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.GroupingShuffleEntryIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.KeyGroupedShuffleEntries;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
 import com.google.cloud.dataflow.sdk.values.KV;
 
 import org.slf4j.Logger;
@@ -55,10 +54,8 @@
  * @param <K> the type of the keys read from the shuffle
  * @param <V> the type of the values read from the shuffle
  */
-public class GroupingShuffleSource<K, V>
-    extends Source<WindowedValue<KV<K, Reiterable<V>>>> {
-  private static final Logger LOG =
-      LoggerFactory.getLogger(GroupingShuffleSource.class);
+public class GroupingShuffleReader<K, V> extends Reader<WindowedValue<KV<K, Reiterable<V>>>> {
+  private static final Logger LOG = LoggerFactory.getLogger(GroupingShuffleReader.class);
 
   final byte[] shuffleReaderConfig;
   final String startShufflePosition;
@@ -68,12 +65,9 @@ public class GroupingShuffleSource<K, V>
   Coder<K> keyCoder;
   Coder<V> valueCoder;
 
-  public GroupingShuffleSource(PipelineOptions options,
-                               byte[] shuffleReaderConfig,
-                               String startShufflePosition,
-                               String stopShufflePosition,
-                               Coder<WindowedValue<KV<K, Iterable<V>>>> coder,
-                               BatchModeExecutionContext executionContext)
+  public GroupingShuffleReader(PipelineOptions options, byte[] shuffleReaderConfig,
+      String startShufflePosition, String stopShufflePosition,
+      Coder<WindowedValue<KV<K, Iterable<V>>>> coder, BatchModeExecutionContext executionContext)
       throws Exception {
     this.shuffleReaderConfig = shuffleReaderConfig;
     this.startShufflePosition = startShufflePosition;
@@ -83,45 +77,40 @@ public GroupingShuffleSource(PipelineOptions options,
   }
 
   @Override
-  public SourceIterator<WindowedValue<KV<K, Reiterable<V>>>> iterator()
-      throws IOException {
+  public ReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> iterator() throws IOException {
     Preconditions.checkArgument(shuffleReaderConfig != null);
     return iterator(new BatchingShuffleEntryReader(
-        new ChunkingShuffleBatchReader(new ApplianceShuffleReader(
-            shuffleReaderConfig))));
+        new ChunkingShuffleBatchReader(new ApplianceShuffleReader(shuffleReaderConfig))));
   }
 
   private void initCoder(Coder<WindowedValue<KV<K, Iterable<V>>>> coder) throws Exception {
     if (!(coder instanceof WindowedValueCoder)) {
-      throw new Exception(
-          "unexpected kind of coder for WindowedValue: " + coder);
+      throw new Exception("unexpected kind of coder for WindowedValue: " + coder);
     }
     Coder<KV<K, Iterable<V>>> elemCoder =
         ((WindowedValueCoder<KV<K, Iterable<V>>>) coder).getValueCoder();
     if (!(elemCoder instanceof KvCoder)) {
-      throw new Exception(
-          "unexpected kind of coder for elements read from " +
-          "a key-grouping shuffle: " + elemCoder);
+      throw new Exception("unexpected kind of coder for elements read from "
+          + "a key-grouping shuffle: " + elemCoder);
     }
     KvCoder<K, Iterable<V>> kvCoder = (KvCoder<K, Iterable<V>>) elemCoder;
     this.keyCoder = kvCoder.getKeyCoder();
     Coder<Iterable<V>> kvValueCoder = kvCoder.getValueCoder();
     if (!(kvValueCoder instanceof IterableCoder)) {
-      throw new Exception(
-          "unexpected kind of coder for values of KVs read from " +
-          "a key-grouping shuffle");
+      throw new Exception("unexpected kind of coder for values of KVs read from "
+          + "a key-grouping shuffle");
     }
     IterableCoder<V> iterCoder = (IterableCoder<V>) kvValueCoder;
     this.valueCoder = iterCoder.getElemCoder();
   }
 
-  final SourceIterator<WindowedValue<KV<K, Reiterable<V>>>> iterator(ShuffleEntryReader reader)
+  final ReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> iterator(ShuffleEntryReader reader)
       throws IOException {
-    return new GroupingShuffleSourceIterator(reader);
+    return new GroupingShuffleReaderIterator(reader);
   }
 
   /**
-   * A SourceIterator that reads from a ShuffleEntryReader and groups
+   * A ReaderIterator that reads from a ShuffleEntryReader and groups
    * all the values with the same key.
    *
    * <p>A key limitation of this implementation is that all iterator accesses
@@ -137,17 +126,17 @@ final SourceIterator<WindowedValue<KV<K, Reiterable<V>>>> iterator(ShuffleEntryR
    * to the current key -- which would introduce a performance
    * penalty.
    */
-  private final class GroupingShuffleSourceIterator
-      extends AbstractSourceIterator<WindowedValue<KV<K, Reiterable<V>>>> {
+  private final class GroupingShuffleReaderIterator
+      extends AbstractReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> {
     // N.B. This class is *not* static; it uses the keyCoder, valueCoder, and
-    // executionContext from its enclosing GroupingShuffleSource.
+    // executionContext from its enclosing GroupingShuffleReader.
 
     /** The iterator over shuffle entries, grouped by common key. */
     private final Iterator<KeyGroupedShuffleEntries> groups;
 
     /** The stop position. No records with a position at or after
      * @stopPosition will be returned.  Initialized
-     * to @AbstractShuffleSource.stopShufflePosition but can be
+     * to @AbstractShuffleReader.stopShufflePosition but can be
      * dynamically updated via @updateStopPosition() (note that such
      * updates can only decrease @stopPosition).
      *
@@ -157,7 +146,7 @@ private final class GroupingShuffleSourceIterator
     private ByteArrayShufflePosition stopPosition = null;
 
     /**
-     * Position that this @GroupingShuffleSourceIterator is guaranteed
+     * Position that this @GroupingShuffleReaderIterator is guaranteed
      * not to stop before reaching (inclusive); @promisedPosition can
      * only increase monotonically and is updated when advancing to a
      * new group of records (either in the most recent call to next()
@@ -168,20 +157,18 @@ private final class GroupingShuffleSourceIterator
     /** The next group to be consumed, if available. */
     private KeyGroupedShuffleEntries nextGroup = null;
 
-    public GroupingShuffleSourceIterator(ShuffleEntryReader reader) {
-      promisedPosition = ByteArrayShufflePosition.fromBase64(
-          startShufflePosition);
+    public GroupingShuffleReaderIterator(ShuffleEntryReader reader) {
+      promisedPosition = ByteArrayShufflePosition.fromBase64(startShufflePosition);
       if (promisedPosition == null) {
         promisedPosition = new ByteArrayShufflePosition(new byte[0]);
       }
       stopPosition = ByteArrayShufflePosition.fromBase64(stopShufflePosition);
-      this.groups = new GroupingShuffleEntryIterator(reader.read(
-          promisedPosition, stopPosition)) {
-          @Override
-          protected void notifyElementRead(long byteSize) {
-            GroupingShuffleSource.this.notifyElementRead(byteSize);
-          }
-        };
+      this.groups = new GroupingShuffleEntryIterator(reader.read(promisedPosition, stopPosition)) {
+        @Override
+        protected void notifyElementRead(long byteSize) {
+          GroupingShuffleReader.this.notifyElementRead(byteSize);
+        }
+      };
     }
 
     private void advanceIfNecessary() {
@@ -197,8 +184,7 @@ public boolean hasNext() throws IOException {
       if (nextGroup == null) {
         return false;
       }
-      return stopPosition == null
-          || promisedPosition.compareTo(stopPosition) < 0;
+      return stopPosition == null || promisedPosition.compareTo(stopPosition) < 0;
     }
 
     @Override
@@ -220,8 +206,8 @@ public WindowedValue<KV<K, Reiterable<V>>> next() throws IOException {
 
     /**
      * Returns the position before the next {@code KV<K, Reiterable<V>>} to be returned by the
-     * {@link GroupingShuffleSourceIterator}. Returns null if the
-     * {@link GroupingShuffleSourceIterator} is finished.
+     * {@link GroupingShuffleReaderIterator}. Returns null if the
+     * {@link GroupingShuffleReaderIterator} is finished.
      */
     @Override
     public Progress getProgress() {
@@ -230,13 +216,13 @@ public Progress getProgress() {
       ApproximateProgress progress = new ApproximateProgress();
       position.setShufflePosition(promisedPosition.encodeBase64());
       progress.setPosition(position);
-      return cloudProgressToSourceProgress(progress);
+      return cloudProgressToReaderProgress(progress);
     }
 
     /**
      * Updates the stop position of the shuffle source to the position proposed. Ignores the
      * proposed stop position if it is smaller than or equal to the position before the next
-     * {@code KV<K, Reiterable<V>>} to be returned by the {@link GroupingShuffleSourceIterator}.
+     * {@code KV<K, Reiterable<V>>} to be returned by the {@link GroupingShuffleReaderIterator}.
      */
     @Override
     public Position updateStopPosition(Progress proposedStopPosition) {
@@ -244,14 +230,12 @@ public Position updateStopPosition(Progress proposedStopPosition) {
       com.google.api.services.dataflow.model.Position stopCloudPosition =
           sourceProgressToCloudProgress(proposedStopPosition).getPosition();
       if (stopCloudPosition == null) {
-        LOG.warn(
-            "A stop position other than a Position is not supported now.");
+        LOG.warn("A stop position other than a Position is not supported now.");
         return null;
       }
 
       if (stopCloudPosition.getShufflePosition() == null) {
-        LOG.warn(
-            "A stop position other than shuffle position is not supported in "
+        LOG.warn("A stop position other than shuffle position is not supported in "
             + "a grouping shuffle source: " + stopCloudPosition.toString());
         return null;
       }
@@ -259,36 +243,31 @@ public Position updateStopPosition(Progress proposedStopPosition) {
           ByteArrayShufflePosition.fromBase64(stopCloudPosition.getShufflePosition());
 
       if (newStopPosition.compareTo(promisedPosition) <= 0) {
-        LOG.warn("Proposed stop position: "
-            + stopCloudPosition.getShufflePosition() + " <= promised position: "
-            + promisedPosition.encodeBase64());
+        LOG.warn("Proposed stop position: " + stopCloudPosition.getShufflePosition()
+            + " <= promised position: " + promisedPosition.encodeBase64());
         return null;
       }
 
-      if (this.stopPosition != null
-          && newStopPosition.compareTo(this.stopPosition) >= 0) {
-        LOG.warn("Proposed stop position: "
-            + stopCloudPosition.getShufflePosition()
-            + " >= current stop position: "
-            + this.stopPosition.encodeBase64());
+      if (this.stopPosition != null && newStopPosition.compareTo(this.stopPosition) >= 0) {
+        LOG.warn("Proposed stop position: " + stopCloudPosition.getShufflePosition()
+            + " >= current stop position: " + this.stopPosition.encodeBase64());
         return null;
       }
 
       this.stopPosition = newStopPosition;
-      LOG.info("Updated the stop position to "
-          + stopCloudPosition.getShufflePosition());
+      LOG.info("Updated the stop position to " + stopCloudPosition.getShufflePosition());
 
-      return cloudPositionToSourcePosition(stopCloudPosition);
+      return cloudPositionToReaderPosition(stopCloudPosition);
     }
 
     /**
      * Provides the {@link Reiterable} used to iterate through the values part
      * of a {@code KV<K, Reiterable<V>>} entry produced by a
-     * {@link GroupingShuffleSource}.
+     * {@link GroupingShuffleReader}.
      */
     private final class ValuesIterable implements Reiterable<V> {
       // N.B. This class is *not* static; it uses the valueCoder from
-      // its enclosing GroupingShuffleSource.
+      // its enclosing GroupingShuffleReader.
 
       private final Reiterable<ShuffleEntry> base;
 
@@ -305,11 +284,11 @@ public ValuesIterator iterator() {
     /**
      * Provides the {@link Reiterator} used to iterate through the values part
      * of a {@code KV<K, Reiterable<V>>} entry produced by a
-     * {@link GroupingShuffleSource}.
+     * {@link GroupingShuffleReader}.
      */
     private final class ValuesIterator implements Reiterator<V> {
       // N.B. This class is *not* static; it uses the valueCoder from
-      // its enclosing GroupingShuffleSource.
+      // its enclosing GroupingShuffleReader.
 
       private final Reiterator<ShuffleEntry> base;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
similarity index 68%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceFactory.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
index 2229a77ddc10b..29a444bb280dc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
@@ -29,34 +29,24 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 
 /**
- * Creates a GroupingShuffleSource from a CloudObject spec.
+ * Creates a GroupingShuffleReader from a CloudObject spec.
  */
-public class GroupingShuffleSourceFactory {
+public class GroupingShuffleReaderFactory {
   // Do not instantiate.
-  private GroupingShuffleSourceFactory() {}
+  private GroupingShuffleReaderFactory() {}
 
-  public static <K, V> GroupingShuffleSource<K, V> create(
-      PipelineOptions options,
-      CloudObject spec,
-      Coder<WindowedValue<KV<K, Iterable<V>>>> coder,
-      ExecutionContext executionContext)
+  public static <K, V> GroupingShuffleReader<K, V> create(PipelineOptions options, CloudObject spec,
+      Coder<WindowedValue<KV<K, Iterable<V>>>> coder, ExecutionContext executionContext)
       throws Exception {
-    return create(options, spec, coder,
-                  (BatchModeExecutionContext) executionContext);
+    return create(options, spec, coder, (BatchModeExecutionContext) executionContext);
   }
 
-  static <K, V> GroupingShuffleSource<K, V> create(
-      PipelineOptions options,
-      CloudObject spec,
-      Coder<WindowedValue<KV<K, Iterable<V>>>> coder,
-      BatchModeExecutionContext executionContext)
+  static <K, V> GroupingShuffleReader<K, V> create(PipelineOptions options, CloudObject spec,
+      Coder<WindowedValue<KV<K, Iterable<V>>>> coder, BatchModeExecutionContext executionContext)
       throws Exception {
-    return new GroupingShuffleSource<>(
-        options,
+    return new GroupingShuffleReader<>(options,
         decodeBase64(getString(spec, PropertyNames.SHUFFLE_READER_CONFIG)),
         getString(spec, PropertyNames.START_SHUFFLE_POSITION, null),
-        getString(spec, PropertyNames.END_SHUFFLE_POSITION, null),
-        coder,
-        executionContext);
+        getString(spec, PropertyNames.END_SHUFFLE_POSITION, null), coder, executionContext);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
similarity index 76%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySource.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
index a0a524ee0c9b9..487daa1affe45 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
@@ -17,8 +17,8 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static com.google.api.client.util.Preconditions.checkNotNull;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToSourcePosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
 import static java.lang.Math.min;
 
@@ -26,7 +26,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -42,18 +42,16 @@
  *
  * @param <T> the type of the elements read from the source
  */
-public class InMemorySource<T> extends Source<T> {
-  private static final Logger LOG = LoggerFactory.getLogger(InMemorySource.class);
+public class InMemoryReader<T> extends Reader<T> {
+  private static final Logger LOG = LoggerFactory.getLogger(InMemoryReader.class);
 
   final List<String> encodedElements;
   final int startIndex;
   final int endIndex;
   final Coder<T> coder;
 
-  public InMemorySource(List<String> encodedElements,
-                        @Nullable Long startIndex,
-                        @Nullable Long endIndex,
-                        Coder<T> coder) {
+  public InMemoryReader(List<String> encodedElements, @Nullable Long startIndex,
+      @Nullable Long endIndex, Coder<T> coder) {
     this.encodedElements = encodedElements;
     int maxIndex = encodedElements.size();
     if (startIndex == null) {
@@ -68,8 +66,7 @@ public InMemorySource(List<String> encodedElements,
       this.endIndex = maxIndex;
     } else {
       if (endIndex < this.startIndex) {
-        throw new IllegalArgumentException(
-            "end index should be >= start index");
+        throw new IllegalArgumentException("end index should be >= start index");
       }
       this.endIndex = (int) min(endIndex, maxIndex);
     }
@@ -77,18 +74,18 @@ public InMemorySource(List<String> encodedElements,
   }
 
   @Override
-  public SourceIterator<T> iterator() throws IOException {
-    return new InMemorySourceIterator();
+  public ReaderIterator<T> iterator() throws IOException {
+    return new InMemoryReaderIterator();
   }
 
   /**
-   * A SourceIterator that yields an in-memory list of elements.
+   * A ReaderIterator that yields an in-memory list of elements.
    */
-  class InMemorySourceIterator extends AbstractSourceIterator<T> {
+  class InMemoryReaderIterator extends AbstractReaderIterator<T> {
     int index;
     int endPosition;
 
-    public InMemorySourceIterator() {
+    public InMemoryReaderIterator() {
       index = startIndex;
       endPosition = endIndex;
     }
@@ -106,8 +103,7 @@ public T next() throws IOException {
       String encodedElementString = encodedElements.get(index++);
       // TODO: Replace with the real encoding used by the
       // front end, when we know what it is.
-      byte[] encodedElement =
-          StringUtils.jsonStringToByteArray(encodedElementString);
+      byte[] encodedElement = StringUtils.jsonStringToByteArray(encodedElementString);
       notifyElementRead(encodedElement.length);
       return CoderUtils.decodeFromByteArray(coder, encodedElement);
     }
@@ -115,7 +111,7 @@ public T next() throws IOException {
     @Override
     public Progress getProgress() {
       // Currently we assume that only a record index position is reported as
-      // current progress. Source writer can override this method to update
+      // current progress. An implementer can override this method to update
       // other metrics, e.g. completion percentage or remaining time.
       com.google.api.services.dataflow.model.Position currentPosition =
           new com.google.api.services.dataflow.model.Position();
@@ -124,7 +120,7 @@ public Progress getProgress() {
       ApproximateProgress progress = new ApproximateProgress();
       progress.setPosition(currentPosition);
 
-      return cloudProgressToSourceProgress(progress);
+      return cloudProgressToReaderProgress(progress);
     }
 
     @Override
@@ -132,21 +128,19 @@ public Position updateStopPosition(Progress proposedStopPosition) {
       checkNotNull(proposedStopPosition);
 
       // Currently we only support stop position in record index of
-      // an API Position in InMemorySource. If stop position in other types is
+      // an API Position in InMemoryReader. If stop position in other types is
       // proposed, the end position in iterator will not be updated,
       // and return null.
       com.google.api.services.dataflow.model.Position stopPosition =
           sourceProgressToCloudProgress(proposedStopPosition).getPosition();
       if (stopPosition == null) {
-        LOG.warn(
-            "A stop position other than a Dataflow API Position is not currently supported.");
+        LOG.warn("A stop position other than a Dataflow API Position is not currently supported.");
         return null;
       }
 
       Long recordIndex = stopPosition.getRecordIndex();
       if (recordIndex == null) {
-        LOG.warn(
-            "A stop position other than record index is not supported in InMemorySource.");
+        LOG.warn("A stop position other than record index is not supported in InMemoryReader.");
         return null;
       }
       if (recordIndex <= index || recordIndex >= endPosition) {
@@ -157,7 +151,7 @@ public Position updateStopPosition(Progress proposedStopPosition) {
       }
 
       this.endPosition = recordIndex.intValue();
-      return cloudPositionToSourcePosition(stopPosition);
+      return cloudPositionToReaderPosition(stopPosition);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java
similarity index 64%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceFactory.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java
index 3f2cd9c9a1dba..cc51f8f0c94c4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java
@@ -28,27 +28,21 @@
 import java.util.Collections;
 
 /**
- * Creates an InMemorySource from a CloudObject spec.
+ * Creates an InMemoryReader from a CloudObject spec.
  */
-public class InMemorySourceFactory {
+public class InMemoryReaderFactory {
   // Do not instantiate.
-  private InMemorySourceFactory() {}
+  private InMemoryReaderFactory() {}
 
-  public static <T> InMemorySource<T> create(PipelineOptions options,
-                                             CloudObject spec,
-                                             Coder<T> coder,
-                                             ExecutionContext executionContext)
-      throws Exception {
+  public static <T> InMemoryReader<T> create(PipelineOptions options, CloudObject spec,
+      Coder<T> coder, ExecutionContext executionContext) throws Exception {
     return create(spec, coder);
   }
 
-  static <T> InMemorySource<T> create(CloudObject spec,
-                                      Coder<T> coder) throws Exception {
-    return new InMemorySource<>(
-        getStrings(spec,
-            PropertyNames.ELEMENTS, Collections.<String>emptyList()),
+  static <T> InMemoryReader<T> create(CloudObject spec, Coder<T> coder) throws Exception {
+    return new InMemoryReader<>(
+        getStrings(spec, PropertyNames.ELEMENTS, Collections.<String>emptyList()),
         getLong(spec, PropertyNames.START_INDEX, null),
-        getLong(spec, PropertyNames.END_INDEX, null),
-        coder);
+        getLong(spec, PropertyNames.END_INDEX, null), coder);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiSourceIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
similarity index 78%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiSourceIterator.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
index 3ccebd5617565..aef1e9f191ec1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiSourceIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
@@ -16,14 +16,14 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import java.io.IOException;
 import java.util.Iterator;
 import java.util.NoSuchElementException;
 
 /**
- * Implements a SourceIterator over a collection of inputs.
+ * Implements a ReaderIterator over a collection of inputs.
  *
  * The sources are used sequentially, each consumed entirely before moving
  * to the next source.
@@ -33,18 +33,17 @@
  * be produced lazily, as an open source iterator may consume process resources
  * such as file descriptors.
  */
-abstract class LazyMultiSourceIterator<T>
-    extends Source.AbstractSourceIterator<T> {
+abstract class LazyMultiReaderIterator<T> extends Reader.AbstractReaderIterator<T> {
   private final Iterator<String> inputs;
-  Source.SourceIterator<T> current;
+  Reader.ReaderIterator<T> current;
 
-  public LazyMultiSourceIterator(Iterator<String> inputs) {
+  public LazyMultiReaderIterator(Iterator<String> inputs) {
     this.inputs = inputs;
   }
 
   @Override
   public boolean hasNext() throws IOException {
-    while (selectSource()) {
+    while (selectReader()) {
       if (!current.hasNext()) {
         current.close();
         current = null;
@@ -65,16 +64,15 @@ public T next() throws IOException {
 
   @Override
   public void close() throws IOException {
-    while (selectSource()) {
+    while (selectReader()) {
       current.close();
       current = null;
     }
   }
 
-  protected abstract Source.SourceIterator<T> open(String input)
-      throws IOException;
+  protected abstract Reader.ReaderIterator<T> open(String input) throws IOException;
 
-  boolean selectSource() throws IOException {
+  boolean selectReader() throws IOException {
     if (current != null) {
       return true;
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index 095aa0876ee8e..dd0133e7b3763 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -45,9 +45,9 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.ParDoOperation;
 import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation;
 import com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ReceivingOperation;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.common.worker.WriteOperation;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -64,24 +64,19 @@ public class MapTaskExecutorFactory {
   /**
    * Creates a new MapTaskExecutor from the given MapTask definition.
    */
-  public static MapTaskExecutor create(PipelineOptions options,
-                                       MapTask mapTask,
-                                       ExecutionContext context)
-      throws Exception {
+  public static MapTaskExecutor create(
+      PipelineOptions options, MapTask mapTask, ExecutionContext context) throws Exception {
     List<Operation> operations = new ArrayList<>();
     CounterSet counters = new CounterSet();
     String counterPrefix = mapTask.getStageName() + "-";
-    StateSampler stateSampler = new StateSampler(
-        counterPrefix, counters.getAddCounterMutator());
+    StateSampler stateSampler = new StateSampler(counterPrefix, counters.getAddCounterMutator());
     // Open-ended state.
     stateSampler.setState("other");
 
     // Instantiate operations for each instruction in the graph.
     for (ParallelInstruction instruction : mapTask.getInstructions()) {
-      operations.add(
-          createOperation(options, instruction, context, operations,
-                          counterPrefix, counters.getAddCounterMutator(),
-                          stateSampler));
+      operations.add(createOperation(options, instruction, context, operations, counterPrefix,
+          counters.getAddCounterMutator(), stateSampler));
     }
 
     return new MapTaskExecutor(operations, counters, stateSampler);
@@ -90,130 +85,87 @@ public static MapTaskExecutor create(PipelineOptions options,
   /**
    * Creates an Operation from the given ParallelInstruction definition.
    */
-  static Operation createOperation(
-      PipelineOptions options,
-      ParallelInstruction instruction,
-      ExecutionContext executionContext,
-      List<Operation> priorOperations,
-      String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler)
-      throws Exception {
+  static Operation createOperation(PipelineOptions options, ParallelInstruction instruction,
+      ExecutionContext executionContext, List<Operation> priorOperations, String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) throws Exception {
     if (instruction.getRead() != null) {
-      return createReadOperation(
-          options, instruction, executionContext, priorOperations,
+      return createReadOperation(options, instruction, executionContext, priorOperations,
           counterPrefix, addCounterMutator, stateSampler);
     } else if (instruction.getWrite() != null) {
-      return createWriteOperation(
-          options, instruction, executionContext, priorOperations,
+      return createWriteOperation(options, instruction, executionContext, priorOperations,
           counterPrefix, addCounterMutator, stateSampler);
     } else if (instruction.getParDo() != null) {
-      return createParDoOperation(
-          options, instruction, executionContext, priorOperations,
+      return createParDoOperation(options, instruction, executionContext, priorOperations,
           counterPrefix, addCounterMutator, stateSampler);
     } else if (instruction.getPartialGroupByKey() != null) {
-      return createPartialGroupByKeyOperation(
-          options, instruction, executionContext, priorOperations,
-          counterPrefix, addCounterMutator, stateSampler);
+      return createPartialGroupByKeyOperation(options, instruction, executionContext,
+          priorOperations, counterPrefix, addCounterMutator, stateSampler);
     } else if (instruction.getFlatten() != null) {
-      return createFlattenOperation(
-          options, instruction, executionContext, priorOperations,
+      return createFlattenOperation(options, instruction, executionContext, priorOperations,
           counterPrefix, addCounterMutator, stateSampler);
     } else {
       throw new Exception("Unexpected instruction: " + instruction);
     }
   }
 
-  static ReadOperation createReadOperation(
-      PipelineOptions options,
-      ParallelInstruction instruction,
-      ExecutionContext executionContext,
-      List<Operation> priorOperations,
-      String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler)
-      throws Exception {
+  static ReadOperation createReadOperation(PipelineOptions options, ParallelInstruction instruction,
+      ExecutionContext executionContext, List<Operation> priorOperations, String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) throws Exception {
     ReadInstruction read = instruction.getRead();
 
-    Source<?> source =
-        SourceFactory.create(options, read.getSource(), executionContext);
+    Reader<?> reader = ReaderFactory.create(options, read.getSource(), executionContext);
 
-    OutputReceiver[] receivers = createOutputReceivers(
-        instruction, counterPrefix, addCounterMutator, stateSampler, 1);
+    OutputReceiver[] receivers =
+        createOutputReceivers(instruction, counterPrefix, addCounterMutator, stateSampler, 1);
 
-    return new ReadOperation(instruction.getSystemName(), source, receivers,
-                             counterPrefix, addCounterMutator, stateSampler);
+    return new ReadOperation(instruction.getSystemName(), reader, receivers, counterPrefix,
+        addCounterMutator, stateSampler);
   }
 
-  static WriteOperation createWriteOperation(
-      PipelineOptions options,
-      ParallelInstruction instruction,
-      ExecutionContext executionContext,
-      List<Operation> priorOperations,
-      String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler)
-      throws Exception {
+  static WriteOperation createWriteOperation(PipelineOptions options,
+      ParallelInstruction instruction, ExecutionContext executionContext,
+      List<Operation> priorOperations, String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) throws Exception {
     WriteInstruction write = instruction.getWrite();
 
     Sink sink = SinkFactory.create(options, write.getSink(), executionContext);
 
-    OutputReceiver[] receivers = createOutputReceivers(
-        instruction, counterPrefix, addCounterMutator, stateSampler, 0);
+    OutputReceiver[] receivers =
+        createOutputReceivers(instruction, counterPrefix, addCounterMutator, stateSampler, 0);
 
-    WriteOperation operation =
-        new WriteOperation(instruction.getSystemName(), sink, receivers,
-                           counterPrefix, addCounterMutator, stateSampler);
+    WriteOperation operation = new WriteOperation(instruction.getSystemName(), sink, receivers,
+        counterPrefix, addCounterMutator, stateSampler);
 
     attachInput(operation, write.getInput(), priorOperations);
 
     return operation;
   }
 
-  static ParDoOperation createParDoOperation(
-      PipelineOptions options,
-      ParallelInstruction instruction,
-      ExecutionContext executionContext,
-      List<Operation> priorOperations,
-      String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler)
-      throws Exception {
+  static ParDoOperation createParDoOperation(PipelineOptions options,
+      ParallelInstruction instruction, ExecutionContext executionContext,
+      List<Operation> priorOperations, String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) throws Exception {
     ParDoInstruction parDo = instruction.getParDo();
 
-    ParDoFn fn = ParDoFnFactory.create(
-        options,
-        CloudObject.fromSpec(parDo.getUserFn()),
-        instruction.getSystemName(),
-        parDo.getSideInputs(),
-        parDo.getMultiOutputInfos(),
-        parDo.getNumOutputs(),
-        executionContext,
-        addCounterMutator,
-        stateSampler);
+    ParDoFn fn = ParDoFnFactory.create(options, CloudObject.fromSpec(parDo.getUserFn()),
+        instruction.getSystemName(), parDo.getSideInputs(), parDo.getMultiOutputInfos(),
+        parDo.getNumOutputs(), executionContext, addCounterMutator, stateSampler);
 
-    OutputReceiver[] receivers =
-        createOutputReceivers(instruction, counterPrefix, addCounterMutator,
-                              stateSampler, parDo.getNumOutputs());
+    OutputReceiver[] receivers = createOutputReceivers(
+        instruction, counterPrefix, addCounterMutator, stateSampler, parDo.getNumOutputs());
 
-    ParDoOperation operation =
-        new ParDoOperation(instruction.getSystemName(), fn, receivers,
-                           counterPrefix, addCounterMutator, stateSampler);
+    ParDoOperation operation = new ParDoOperation(
+        instruction.getSystemName(), fn, receivers, counterPrefix, addCounterMutator, stateSampler);
 
     attachInput(operation, parDo.getInput(), priorOperations);
 
     return operation;
   }
 
-  static PartialGroupByKeyOperation createPartialGroupByKeyOperation(
-      PipelineOptions options,
-      ParallelInstruction instruction,
-      ExecutionContext executionContext,
-      List<Operation> priorOperations,
-      String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler)
-      throws Exception {
+  static PartialGroupByKeyOperation createPartialGroupByKeyOperation(PipelineOptions options,
+      ParallelInstruction instruction, ExecutionContext executionContext,
+      List<Operation> priorOperations, String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) throws Exception {
     PartialGroupByKeyInstruction pgbk = instruction.getPartialGroupByKey();
 
     Coder<?> coder = Serializer.deserialize(pgbk.getInputElementCodec(), Coder.class);
@@ -230,19 +182,14 @@ static PartialGroupByKeyOperation createPartialGroupByKeyOperation(
     Coder keyCoder = kvCoder.getKeyCoder();
     Coder valueCoder = kvCoder.getValueCoder();
 
-    OutputReceiver[] receivers = createOutputReceivers(
-        instruction, counterPrefix, addCounterMutator, stateSampler, 1);
+    OutputReceiver[] receivers =
+        createOutputReceivers(instruction, counterPrefix, addCounterMutator, stateSampler, 1);
 
     PartialGroupByKeyOperation operation =
         new PartialGroupByKeyOperation(instruction.getSystemName(),
-                                       new CoderGroupingKeyCreator(keyCoder),
-                                       new CoderSizeEstimator(keyCoder),
-                                       new CoderSizeEstimator(valueCoder),
-                                       0.001 /*sizeEstimatorSampleRate*/,
-                                       PairInfo.create(),
-                                       receivers,
-                                       counterPrefix, addCounterMutator,
-                                       stateSampler);
+            new CoderGroupingKeyCreator(keyCoder), new CoderSizeEstimator(keyCoder),
+            new CoderSizeEstimator(valueCoder), 0.001/*sizeEstimatorSampleRate*/, PairInfo.create(),
+            receivers, counterPrefix, addCounterMutator, stateSampler);
 
     attachInput(operation, pgbk.getInput(), priorOperations);
 
@@ -254,7 +201,9 @@ static PartialGroupByKeyOperation createPartialGroupByKeyOperation(
    */
   public static class PairInfo implements PartialGroupByKeyOperation.PairInfo {
     private static PairInfo theInstance = new PairInfo();
-    public static PairInfo create() { return theInstance; }
+    public static PairInfo create() {
+      return theInstance;
+    }
     private PairInfo() {}
     @Override
     public Object getKeyFromInputPair(Object pair) {
@@ -293,8 +242,7 @@ public Object createGroupingKey(Object value) throws Exception {
   /**
    * Implements PGBKOp.SizeEstimator via Coder.
    */
-  public static class CoderSizeEstimator
-      implements PartialGroupByKeyOperation.SizeEstimator {
+  public static class CoderSizeEstimator implements PartialGroupByKeyOperation.SizeEstimator {
     final Coder coder;
 
     public CoderSizeEstimator(Coder coder) {
@@ -307,24 +255,17 @@ public long estimateSize(Object value) throws Exception {
     }
   }
 
-  static FlattenOperation createFlattenOperation(
-      PipelineOptions options,
-      ParallelInstruction instruction,
-      ExecutionContext executionContext,
-      List<Operation> priorOperations,
-      String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler)
-      throws Exception {
+  static FlattenOperation createFlattenOperation(PipelineOptions options,
+      ParallelInstruction instruction, ExecutionContext executionContext,
+      List<Operation> priorOperations, String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) throws Exception {
     FlattenInstruction flatten = instruction.getFlatten();
 
     OutputReceiver[] receivers =
-        createOutputReceivers(instruction, counterPrefix, addCounterMutator,
-                              stateSampler, 1);
+        createOutputReceivers(instruction, counterPrefix, addCounterMutator, stateSampler, 1);
 
-    FlattenOperation operation =
-        new FlattenOperation(instruction.getSystemName(), receivers,
-                             counterPrefix, addCounterMutator, stateSampler);
+    FlattenOperation operation = new FlattenOperation(
+        instruction.getSystemName(), receivers, counterPrefix, addCounterMutator, stateSampler);
 
     for (InstructionInput input : flatten.getInputs()) {
       attachInput(operation, input, priorOperations);
@@ -337,30 +278,23 @@ static FlattenOperation createFlattenOperation(
    * Returns an array of OutputReceivers for the given
    * ParallelInstruction definition.
    */
-  static OutputReceiver[] createOutputReceivers(
-      ParallelInstruction instruction,
-      String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler,
-      int expectedNumOutputs)
-      throws Exception {
+  static OutputReceiver[] createOutputReceivers(ParallelInstruction instruction,
+      String counterPrefix, CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler, int expectedNumOutputs) throws Exception {
     int numOutputs = 0;
     if (instruction.getOutputs() != null) {
       numOutputs = instruction.getOutputs().size();
     }
     if (numOutputs != expectedNumOutputs) {
-      throw new AssertionError(
-          "ParallelInstruction.Outputs has an unexpected length");
+      throw new AssertionError("ParallelInstruction.Outputs has an unexpected length");
     }
     OutputReceiver[] receivers = new OutputReceiver[numOutputs];
     for (int i = 0; i < numOutputs; i++) {
       InstructionOutput cloudOutput = instruction.getOutputs().get(i);
-      receivers[i] = new OutputReceiver(
-          cloudOutput.getName(),
-          new ElementByteSizeObservableCoder(
-              Serializer.deserialize(cloudOutput.getCodec(), Coder.class)),
-          counterPrefix,
-          addCounterMutator);
+      receivers[i] = new OutputReceiver(cloudOutput.getName(),
+          new ElementByteSizeObservableCoder(Serializer.deserialize(
+              cloudOutput.getCodec(), Coder.class)),
+          counterPrefix, addCounterMutator);
     }
     return receivers;
   }
@@ -368,8 +302,7 @@ static OutputReceiver[] createOutputReceivers(
   /**
    * Adapts a Coder to the ElementByteSizeObservable interface.
    */
-  public static class ElementByteSizeObservableCoder<T>
-      implements ElementByteSizeObservable<T> {
+  public static class ElementByteSizeObservableCoder<T> implements ElementByteSizeObservable<T> {
     final Coder<T> coder;
 
     public ElementByteSizeObservableCoder(Coder<T> coder) {
@@ -382,8 +315,7 @@ public boolean isRegisterByteSizeObserverCheap(T value) {
     }
 
     @Override
-    public void registerByteSizeObserver(T value,
-                                         ElementByteSizeObserver observer)
+    public void registerByteSizeObserver(T value, ElementByteSizeObserver observer)
         throws Exception {
       coder.registerByteSizeObserver(value, observer, Coder.Context.OUTER);
     }
@@ -393,9 +325,8 @@ public void registerByteSizeObserver(T value,
    * Adds an input to the given Operation, coming from the given
    * producer instruction output.
    */
-  static void attachInput(ReceivingOperation operation,
-                          @Nullable InstructionInput input,
-                          List<Operation> priorOperations) {
+  static void attachInput(ReceivingOperation operation, @Nullable InstructionInput input,
+      List<Operation> priorOperations) {
     Integer producerInstructionIndex = 0;
     Integer outputNum = 0;
     if (input != null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
similarity index 75%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSource.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
index e1401b6cd83ad..771bd2baadba5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
@@ -24,9 +24,9 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
 import com.google.cloud.dataflow.sdk.values.KV;
 
 import java.io.IOException;
@@ -39,19 +39,15 @@
  * @param <K> the type of the keys read from the shuffle
  * @param <V> the type of the values read from the shuffle
  */
-public class PartitioningShuffleSource<K, V> extends Source<WindowedValue<KV<K, V>>> {
-
+public class PartitioningShuffleReader<K, V> extends Reader<WindowedValue<KV<K, V>>> {
   final byte[] shuffleReaderConfig;
   final String startShufflePosition;
   final String stopShufflePosition;
   Coder<K> keyCoder;
   WindowedValueCoder<V> windowedValueCoder;
 
-  public PartitioningShuffleSource(PipelineOptions options,
-                                   byte[] shuffleReaderConfig,
-                                   String startShufflePosition,
-                                   String stopShufflePosition,
-                                   Coder<WindowedValue<KV<K, V>>> coder)
+  public PartitioningShuffleReader(PipelineOptions options, byte[] shuffleReaderConfig,
+      String startShufflePosition, String stopShufflePosition, Coder<WindowedValue<KV<K, V>>> coder)
       throws Exception {
     this.shuffleReaderConfig = shuffleReaderConfig;
     this.startShufflePosition = startShufflePosition;
@@ -65,14 +61,12 @@ public PartitioningShuffleSource(PipelineOptions options,
    */
   private void initCoder(Coder<WindowedValue<KV<K, V>>> coder) throws Exception {
     if (!(coder instanceof WindowedValueCoder)) {
-      throw new Exception(
-          "unexpected kind of coder for WindowedValue: " + coder);
+      throw new Exception("unexpected kind of coder for WindowedValue: " + coder);
     }
     WindowedValueCoder<KV<K, V>> windowedElemCoder = ((WindowedValueCoder<KV<K, V>>) coder);
     Coder<KV<K, V>> elemCoder = windowedElemCoder.getValueCoder();
     if (!(elemCoder instanceof KvCoder)) {
-      throw new Exception(
-          "unexpected kind of coder for elements read from "
+      throw new Exception("unexpected kind of coder for elements read from "
           + "a key-partitioning shuffle: " + elemCoder);
     }
     KvCoder<K, V> kvCoder = (KvCoder<K, V>) elemCoder;
@@ -81,28 +75,25 @@ private void initCoder(Coder<WindowedValue<KV<K, V>>> coder) throws Exception {
   }
 
   @Override
-  public com.google.cloud.dataflow.sdk.util.common.worker.Source.SourceIterator<
-      WindowedValue<KV<K, V>>> iterator() throws IOException {
+  public ReaderIterator<WindowedValue<KV<K, V>>> iterator() throws IOException {
     Preconditions.checkArgument(shuffleReaderConfig != null);
     return iterator(new BatchingShuffleEntryReader(
-        new ChunkingShuffleBatchReader(new ApplianceShuffleReader(
-            shuffleReaderConfig))));
+        new ChunkingShuffleBatchReader(new ApplianceShuffleReader(shuffleReaderConfig))));
   }
 
-  SourceIterator<WindowedValue<KV<K, V>>> iterator(ShuffleEntryReader reader) throws IOException {
-    return new PartitioningShuffleSourceIterator(reader);
+  ReaderIterator<WindowedValue<KV<K, V>>> iterator(ShuffleEntryReader reader) throws IOException {
+    return new PartitioningShuffleReaderIterator(reader);
   }
 
   /**
-   * A SourceIterator that reads from a ShuffleEntryReader,
+   * A ReaderIterator that reads from a ShuffleEntryReader,
    * extracts K and {@code WindowedValue<V>}, and returns a constructed
    * {@code WindowedValue<KV>}.
    */
-  class PartitioningShuffleSourceIterator
-      extends AbstractSourceIterator<WindowedValue<KV<K, V>>> {
+  class PartitioningShuffleReaderIterator extends AbstractReaderIterator<WindowedValue<KV<K, V>>> {
     Iterator<ShuffleEntry> iterator;
 
-    PartitioningShuffleSourceIterator(ShuffleEntryReader reader) {
+    PartitioningShuffleReaderIterator(ShuffleEntryReader reader) {
       this.iterator = reader.read(
           ByteArrayShufflePosition.fromBase64(startShufflePosition),
           ByteArrayShufflePosition.fromBase64(stopShufflePosition));
@@ -120,9 +111,8 @@ public WindowedValue<KV<K, V>> next() throws IOException {
       WindowedValue<V> windowedValue =
           CoderUtils.decodeFromByteArray(windowedValueCoder, record.getValue());
       notifyElementRead(record.length());
-      return WindowedValue.of(KV.of(key, windowedValue.getValue()),
-                              windowedValue.getTimestamp(),
-                              windowedValue.getWindows());
+      return WindowedValue.of(KV.of(key, windowedValue.getValue()), windowedValue.getTimestamp(),
+          windowedValue.getWindows());
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSourceFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java
similarity index 65%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSourceFactory.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java
index f97d1d5b82988..72d7d6c7cb5ca 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSourceFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java
@@ -28,23 +28,18 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 
 /**
- * Creates a PartitioningShuffleSource from a CloudObject spec.
+ * Creates a PartitioningShuffleReader from a CloudObject spec.
  */
-public class PartitioningShuffleSourceFactory {
+public class PartitioningShuffleReaderFactory {
   // Do not instantiate.
-  private PartitioningShuffleSourceFactory() {}
+  private PartitioningShuffleReaderFactory() {}
 
-  public static <K, V> PartitioningShuffleSource<K, V> create(
-      PipelineOptions options,
-      CloudObject spec,
-      Coder<WindowedValue<KV<K, V>>> coder,
-      ExecutionContext executionContext)
+  public static <K, V> PartitioningShuffleReader<K, V> create(PipelineOptions options,
+      CloudObject spec, Coder<WindowedValue<KV<K, V>>> coder, ExecutionContext executionContext)
       throws Exception {
-      return new PartitioningShuffleSource<K, V>(
-          options,
-          decodeBase64(getString(spec, PropertyNames.SHUFFLE_READER_CONFIG)),
-          getString(spec, PropertyNames.START_SHUFFLE_POSITION, null),
-          getString(spec, PropertyNames.END_SHUFFLE_POSITION, null),
-          coder);
+    return new PartitioningShuffleReader<K, V>(options,
+        decodeBase64(getString(spec, PropertyNames.SHUFFLE_READER_CONFIG)),
+        getString(spec, PropertyNames.START_SHUFFLE_POSITION, null),
+        getString(spec, PropertyNames.END_SHUFFLE_POSITION, null), coder);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
similarity index 59%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFactory.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
index d4726094a3ea6..5d42970c16000 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
@@ -23,7 +24,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
 import com.google.cloud.dataflow.sdk.util.Serializer;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.common.reflect.TypeToken;
 
 import java.util.HashMap;
@@ -32,71 +33,57 @@
 import javax.annotation.Nullable;
 
 /**
- * Constructs a Source from a Dataflow API Source definition.
+ * Constructs a Reader from a Dataflow API Source definition.
  *
- * A SourceFactory concrete "subclass" should define a method with the following
+ * A ReaderFactory concrete "subclass" should define a method with the following
  * signature:
  * <pre> {@code
- * static SomeSourceSubclass<T> create(PipelineOptions, CloudObject,
+ * static SomeReaderSubclass<T> create(PipelineOptions, CloudObject,
  *                                     Coder<T>, ExecutionContext);
  * } </pre>
  */
-public final class SourceFactory {
+public final class ReaderFactory {
   // Do not instantiate.
-  private SourceFactory() {}
+  private ReaderFactory() {}
 
   /**
    * A map from the short names of predefined sources to
    * their full factory class names.
    */
-  static Map<String, String> predefinedSourceFactories = new HashMap<>();
+  static Map<String, String> predefinedReaderFactories = new HashMap<>();
 
   static {
-    predefinedSourceFactories.put(
-        "TextSource",
-        TextSourceFactory.class.getName());
-    predefinedSourceFactories.put(
-        "AvroSource",
-        AvroSourceFactory.class.getName());
-    predefinedSourceFactories.put(
-        "UngroupedShuffleSource",
-        UngroupedShuffleSourceFactory.class.getName());
-    predefinedSourceFactories.put(
-        "PartitioningShuffleSource",
-        PartitioningShuffleSourceFactory.class.getName());
-    predefinedSourceFactories.put(
-        "GroupingShuffleSource",
-        GroupingShuffleSourceFactory.class.getName());
-    predefinedSourceFactories.put(
-        "InMemorySource",
-        InMemorySourceFactory.class.getName());
-    predefinedSourceFactories.put(
-        "BigQuerySource",
-        BigQuerySourceFactory.class.getName());
+    predefinedReaderFactories.put("TextSource", TextReaderFactory.class.getName());
+    predefinedReaderFactories.put("AvroSource", AvroReaderFactory.class.getName());
+    predefinedReaderFactories.put(
+        "UngroupedShuffleSource", UngroupedShuffleReaderFactory.class.getName());
+    predefinedReaderFactories.put(
+        "PartitioningShuffleSource", PartitioningShuffleReaderFactory.class.getName());
+    predefinedReaderFactories.put(
+        "GroupingShuffleSource", GroupingShuffleReaderFactory.class.getName());
+    predefinedReaderFactories.put("InMemorySource", InMemoryReaderFactory.class.getName());
+    predefinedReaderFactories.put("BigQuerySource", BigQueryReaderFactory.class.getName());
   }
 
   /**
-   * Creates a Source from a Dataflow API Source definition.
+   * Creates a Reader from a Dataflow API Source definition.
    *
    * @throws Exception if the source could not be decoded and
    * constructed
    */
-  public static <T> Source<T> create(
-      @Nullable PipelineOptions options,
-      com.google.api.services.dataflow.model.Source cloudSource,
-      @Nullable ExecutionContext executionContext)
-      throws Exception {
+  public static <T> Reader<T> create(@Nullable PipelineOptions options, Source cloudSource,
+      @Nullable ExecutionContext executionContext) throws Exception {
     cloudSource = CloudSourceUtils.flattenBaseSpecs(cloudSource);
     Coder<T> coder = Serializer.deserialize(cloudSource.getCodec(), Coder.class);
     CloudObject object = CloudObject.fromSpec(cloudSource.getSpec());
 
-    String sourceFactoryClassName = predefinedSourceFactories.get(object.getClassName());
+    String sourceFactoryClassName = predefinedReaderFactories.get(object.getClassName());
     if (sourceFactoryClassName == null) {
       sourceFactoryClassName = object.getClassName();
     }
 
     try {
-      return InstanceBuilder.ofType(new TypeToken<Source<T>>() {})
+      return InstanceBuilder.ofType(new TypeToken<Reader<T>>() {})
           .fromClassName(sourceFactoryClassName)
           .fromFactoryMethod("create")
           .withArg(PipelineOptions.class, options)
@@ -106,8 +93,7 @@ public static <T> Source<T> create(
           .build();
 
     } catch (ClassNotFoundException exn) {
-      throw new Exception(
-          "unable to create a source from " + cloudSource, exn);
+      throw new Exception("unable to create a source from " + cloudSource, exn);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
index 72ea16fc99b43..e581528598ec9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
@@ -37,8 +37,12 @@
  * @param <T> the type of the elements written to the sink
  */
 public class ShuffleSink<T> extends Sink<WindowedValue<T>> {
-
-  enum ShuffleKind { UNGROUPED, PARTITION_KEYS, GROUP_KEYS, GROUP_KEYS_AND_SORT_VALUES }
+  enum ShuffleKind {
+    UNGROUPED,
+    PARTITION_KEYS,
+    GROUP_KEYS,
+    GROUP_KEYS_AND_SORT_VALUES
+  }
 
   static final long SHUFFLE_WRITER_BUFFER_SIZE = 128 << 20;
 
@@ -58,8 +62,7 @@ enum ShuffleKind { UNGROUPED, PARTITION_KEYS, GROUP_KEYS, GROUP_KEYS_AND_SORT_VA
   Coder sortKeyCoder;
   Coder sortValueCoder;
 
-  public static ShuffleKind parseShuffleKind(String shuffleKind)
-      throws Exception {
+  public static ShuffleKind parseShuffleKind(String shuffleKind) throws Exception {
     try {
       return Enum.valueOf(ShuffleKind.class, shuffleKind.trim().toUpperCase());
     } catch (IllegalArgumentException e) {
@@ -67,11 +70,8 @@ public static ShuffleKind parseShuffleKind(String shuffleKind)
     }
   }
 
-  public ShuffleSink(PipelineOptions options,
-                     byte[] shuffleWriterConfig,
-                     ShuffleKind shuffleKind,
-                     Coder<WindowedValue<T>> coder)
-      throws Exception {
+  public ShuffleSink(PipelineOptions options, byte[] shuffleWriterConfig, ShuffleKind shuffleKind,
+      Coder<WindowedValue<T>> coder) throws Exception {
     this.shuffleWriterConfig = shuffleWriterConfig;
     this.shuffleKind = shuffleKind;
     initCoder(coder);
@@ -107,8 +107,7 @@ private void initCoder(Coder<WindowedValue<T>> coder) throws Exception {
     this.elemCoder = windowedElemCoder.getValueCoder();
     if (shardByKey) {
       if (!(elemCoder instanceof KvCoder)) {
-        throw new Exception(
-            "unexpected kind of coder for elements written to "
+        throw new Exception("unexpected kind of coder for elements written to "
             + "a key-grouping shuffle");
       }
       KvCoder<?, ?> kvCoder = (KvCoder) elemCoder;
@@ -118,8 +117,7 @@ private void initCoder(Coder<WindowedValue<T>> coder) throws Exception {
         // TODO: Decide the representation of sort-keyed values.
         // For now, we'll just use KVs.
         if (!(valueCoder instanceof KvCoder)) {
-          throw new Exception(
-              "unexpected kind of coder for values written to "
+          throw new Exception("unexpected kind of coder for values written to "
               + "a value-sorting shuffle");
         }
         KvCoder<?, ?> kvValueCoder = (KvCoder) valueCoder;
@@ -168,8 +166,7 @@ public long add(WindowedValue<T> windowedElem) throws IOException {
       T elem = windowedElem.getValue();
       if (shardByKey) {
         if (!(elem instanceof KV)) {
-          throw new AssertionError(
-              "expecting the values written to a key-grouping shuffle "
+          throw new AssertionError("expecting the values written to a key-grouping shuffle "
               + "to be KVs");
         }
         KV<?, ?> kv = (KV) elem;
@@ -180,8 +177,7 @@ public long add(WindowedValue<T> windowedElem) throws IOException {
 
         if (sortValues) {
           if (!(value instanceof KV)) {
-            throw new AssertionError(
-                "expecting the value parts of the KVs written to "
+            throw new AssertionError("expecting the value parts of the KVs written to "
                 + "a value-sorting shuffle to also be KVs");
           }
           KV<?, ?> kvValue = (KV) value;
@@ -189,12 +185,11 @@ public long add(WindowedValue<T> windowedElem) throws IOException {
           Object sortValue = kvValue.getValue();
 
           // TODO: Need to coordinate with the
-          // GroupingShuffleSource, to make sure it knows how to
+          // GroupingShuffleReader, to make sure it knows how to
           // reconstruct the value from the sortKeyBytes and
           // sortValueBytes.  Right now, it doesn't know between
           // sorting and non-sorting GBKs.
-          secondaryKeyBytes =
-              CoderUtils.encodeToByteArray(sortKeyCoder, sortKey);
+          secondaryKeyBytes = CoderUtils.encodeToByteArray(sortKeyCoder, sortKey);
           valueBytes = CoderUtils.encodeToByteArray(sortValueCoder, sortValue);
 
         } else if (groupValues) {
@@ -222,15 +217,13 @@ public long add(WindowedValue<T> windowedElem) throws IOException {
         // for writing a single-sharded ordered PCollection through a
         // shuffle, since the order of elements in the input will be
         // preserved in the output.
-        keyBytes =
-            CoderUtils.encodeToByteArray(BigEndianLongCoder.of(), seqNum++);
+        keyBytes = CoderUtils.encodeToByteArray(BigEndianLongCoder.of(), seqNum++);
 
         secondaryKeyBytes = null;
         valueBytes = CoderUtils.encodeToByteArray(windowedElemCoder, windowedElem);
       }
 
-      return writer.put(new ShuffleEntry(
-          keyBytes, secondaryKeyBytes, valueBytes));
+      return writer.put(new ShuffleEntry(keyBytes, secondaryKeyBytes, valueBytes));
     }
 
     @Override
@@ -242,7 +235,7 @@ public void close() throws IOException {
   @Override
   public SinkWriter<WindowedValue<T>> writer() throws IOException {
     Preconditions.checkArgument(shuffleWriterConfig != null);
-    return writer(new ChunkingShuffleEntryWriter(new ApplianceShuffleWriter(
-        shuffleWriterConfig, SHUFFLE_WRITER_BUFFER_SIZE)));
+    return writer(new ChunkingShuffleEntryWriter(
+        new ApplianceShuffleWriter(shuffleWriterConfig, SHUFFLE_WRITER_BUFFER_SIZE)));
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
index f3fc1cf3f3ef3..374fd65a969f6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
@@ -19,10 +19,11 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 
 import com.google.api.services.dataflow.model.SideInputInfo;
+import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import java.util.ArrayList;
 import java.util.Iterator;
@@ -41,20 +42,15 @@ public class SideInputUtils {
    * Reads the given side input, producing the contents associated
    * with a a {@link PCollectionView}.
    */
-  public static Object readSideInput(PipelineOptions options,
-                                     SideInputInfo sideInputInfo,
-                                     ExecutionContext executionContext)
-      throws Exception {
+  public static Object readSideInput(PipelineOptions options, SideInputInfo sideInputInfo,
+      ExecutionContext executionContext) throws Exception {
     Iterable<Object> elements =
         readSideInputSources(options, sideInputInfo.getSources(), executionContext);
     return readSideInputValue(sideInputInfo.getKind(), elements);
   }
 
-  static Iterable<Object> readSideInputSources(
-      PipelineOptions options,
-      List<com.google.api.services.dataflow.model.Source> sideInputSources,
-      ExecutionContext executionContext)
-      throws Exception {
+  static Iterable<Object> readSideInputSources(PipelineOptions options,
+      List<Source> sideInputSources, ExecutionContext executionContext) throws Exception {
     int numSideInputSources = sideInputSources.size();
     if (numSideInputSources == 0) {
       throw new Exception("expecting at least one side input Source");
@@ -62,25 +58,19 @@ static Iterable<Object> readSideInputSources(
       return readSideInputSource(options, sideInputSources.get(0), executionContext);
     } else {
       List<Iterable<Object>> shards = new ArrayList<>();
-      for (com.google.api.services.dataflow.model.Source sideInputSource
-               : sideInputSources) {
+      for (Source sideInputSource : sideInputSources) {
         shards.add(readSideInputSource(options, sideInputSource, executionContext));
       }
       return new ShardedIterable<>(shards);
     }
   }
 
-  static Iterable<Object> readSideInputSource(
-      PipelineOptions options,
-      com.google.api.services.dataflow.model.Source sideInputSource,
-      ExecutionContext executionContext)
-      throws Exception {
-    return new SourceIterable<>(
-        SourceFactory.create(options, sideInputSource, executionContext));
+  static Iterable<Object> readSideInputSource(PipelineOptions options, Source sideInputSource,
+      ExecutionContext executionContext) throws Exception {
+    return new ReaderIterable<>(ReaderFactory.create(options, sideInputSource, executionContext));
   }
 
-  static Object readSideInputValue(Map<String, Object> sideInputKind,
-                                   Iterable<Object> elements)
+  static Object readSideInputValue(Map<String, Object> sideInputKind, Iterable<Object> elements)
       throws Exception {
     String className = getString(sideInputKind, PropertyNames.OBJECT_TYPE_NAME);
     if (SINGLETON_KIND.equals(className)) {
@@ -91,8 +81,7 @@ static Object readSideInputValue(Map<String, Object> sideInputKind,
           return elem;
         }
       }
-      throw new Exception(
-          "expecting a singleton side input to have a single value");
+      throw new Exception("expecting a singleton side input to have a single value");
 
     } else if (COLLECTION_KIND.equals(className)) {
       return elements;
@@ -106,27 +95,27 @@ static Object readSideInputValue(Map<String, Object> sideInputKind,
   /////////////////////////////////////////////////////////////////////////////
 
 
-  static class SourceIterable<T> implements Iterable<T> {
-    final Source<T> source;
+  static class ReaderIterable<T> implements Iterable<T> {
+    final Reader<T> reader;
 
-    public SourceIterable(Source<T> source) {
-      this.source = source;
+    public ReaderIterable(Reader<T> reader) {
+      this.reader = reader;
     }
 
     @Override
     public Iterator<T> iterator() {
       try {
-        return new SourceIterator<>(source.iterator());
+        return new ReaderIterator<>(reader.iterator());
       } catch (Exception exn) {
         throw new RuntimeException(exn);
       }
     }
   }
 
-  static class SourceIterator<T> implements Iterator<T> {
-    final Source.SourceIterator<T> iterator;
+  static class ReaderIterator<T> implements Iterator<T> {
+    final Reader.ReaderIterator<T> iterator;
 
-    public SourceIterator(Source.SourceIterator<T> iterator) {
+    public ReaderIterator(Reader.ReaderIterator<T> iterator) {
       this.iterator = iterator;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
index 1e0c8aa234918..0f7ce18c480f5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
@@ -23,12 +23,13 @@
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.api.services.dataflow.model.Position;
+import com.google.api.services.dataflow.model.Source;
 import com.google.api.services.dataflow.model.SourceMetadata;
 import com.google.api.services.dataflow.model.SourceOperationRequest;
 import com.google.api.services.dataflow.model.SourceOperationResponse;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.common.worker.CustomSourceFormat;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -36,97 +37,81 @@
 import javax.annotation.Nullable;
 
 /**
- * Utilities for representing Source-specific objects
+ * Utilities for representing input-specific objects
  * using Dataflow model protos.
  */
 public class SourceTranslationUtils {
-  public static Source.Progress cloudProgressToSourceProgress(
+  public static Reader.Progress cloudProgressToReaderProgress(
       @Nullable ApproximateProgress cloudProgress) {
-    return cloudProgress == null ? null
-        : new DataflowSourceProgress(cloudProgress);
+    return cloudProgress == null ? null : new DataflowReaderProgress(cloudProgress);
   }
 
-  public static Source.Position cloudPositionToSourcePosition(
-      @Nullable Position cloudPosition) {
-    return cloudPosition == null ? null
-        : new DataflowSourcePosition(cloudPosition);
+  public static Reader.Position cloudPositionToReaderPosition(@Nullable Position cloudPosition) {
+    return cloudPosition == null ? null : new DataflowReaderPosition(cloudPosition);
   }
 
-  public static CustomSourceFormat.SourceOperationRequest
-  cloudSourceOperationRequestToSourceOperationRequest(
-      @Nullable SourceOperationRequest request) {
-    return request == null ? null
-        : new DataflowSourceOperationRequest(request);
+  public static CustomSourceFormat.OperationRequest
+      cloudSourceOperationRequestToSourceOperationRequest(
+          @Nullable SourceOperationRequest request) {
+    return request == null ? null : new DataflowSourceOperationRequest(request);
   }
 
-  public static CustomSourceFormat.SourceOperationResponse
-  cloudSourceOperationResponseToSourceOperationResponse(
-      @Nullable SourceOperationResponse response) {
-    return response == null ? null
-        : new DataflowSourceOperationResponse(response);
+  public static CustomSourceFormat.OperationResponse
+      cloudSourceOperationResponseToSourceOperationResponse(
+          @Nullable SourceOperationResponse response) {
+    return response == null ? null : new DataflowSourceOperationResponse(response);
   }
 
   public static CustomSourceFormat.SourceSpec cloudSourceToSourceSpec(
-      @Nullable com.google.api.services.dataflow.model.Source cloudSource) {
-    return cloudSource == null ? null
-        : new DataflowSourceSpec(cloudSource);
+      @Nullable Source cloudSource) {
+    return cloudSource == null ? null : new DataflowSourceSpec(cloudSource);
   }
 
   public static ApproximateProgress sourceProgressToCloudProgress(
-      @Nullable Source.Progress sourceProgress) {
-    return sourceProgress == null ? null
-        : ((DataflowSourceProgress) sourceProgress).cloudProgress;
+      @Nullable Reader.Progress sourceProgress) {
+    return sourceProgress == null ? null : ((DataflowReaderProgress) sourceProgress).cloudProgress;
   }
 
-  public static Position sourcePositionToCloudPosition(
-      @Nullable Source.Position sourcePosition) {
-    return sourcePosition == null ? null
-        : ((DataflowSourcePosition) sourcePosition).cloudPosition;
+  public static Position sourcePositionToCloudPosition(@Nullable Reader.Position sourcePosition) {
+    return sourcePosition == null ? null : ((DataflowReaderPosition) sourcePosition).cloudPosition;
   }
 
-  public static SourceOperationRequest
-  sourceOperationRequestToCloudSourceOperationRequest(
-      @Nullable CustomSourceFormat.SourceOperationRequest request) {
-    return (request == null) ? null
-        : ((DataflowSourceOperationRequest) request).cloudRequest;
+  public static SourceOperationRequest sourceOperationRequestToCloudSourceOperationRequest(
+      @Nullable CustomSourceFormat.OperationRequest request) {
+    return (request == null) ? null : ((DataflowSourceOperationRequest) request).cloudRequest;
   }
 
-  public static SourceOperationResponse
-  sourceOperationResponseToCloudSourceOperationResponse(
-      @Nullable CustomSourceFormat.SourceOperationResponse response) {
-    return (response == null) ? null
-        : ((DataflowSourceOperationResponse) response).cloudResponse;
+  public static SourceOperationResponse sourceOperationResponseToCloudSourceOperationResponse(
+      @Nullable CustomSourceFormat.OperationResponse response) {
+    return (response == null) ? null : ((DataflowSourceOperationResponse) response).cloudResponse;
   }
 
-  public static com.google.api.services.dataflow.model.Source sourceSpecToCloudSource(
-      @Nullable CustomSourceFormat.SourceSpec spec) {
-    return (spec == null) ? null
-        : ((DataflowSourceSpec) spec).cloudSource;
+  public static Source sourceSpecToCloudSource(@Nullable CustomSourceFormat.SourceSpec spec) {
+    return (spec == null) ? null : ((DataflowSourceSpec) spec).cloudSource;
   }
 
-  static class DataflowSourceProgress implements Source.Progress {
+  static class DataflowReaderProgress implements Reader.Progress {
     public final ApproximateProgress cloudProgress;
-    public DataflowSourceProgress(ApproximateProgress cloudProgress) {
+    public DataflowReaderProgress(ApproximateProgress cloudProgress) {
       this.cloudProgress = cloudProgress;
     }
   }
 
-  static class DataflowSourcePosition implements Source.Position {
+  static class DataflowReaderPosition implements Reader.Position {
     public final Position cloudPosition;
-    public DataflowSourcePosition(Position cloudPosition) {
+    public DataflowReaderPosition(Position cloudPosition) {
       this.cloudPosition = cloudPosition;
     }
   }
 
-  static class DataflowSourceOperationRequest implements CustomSourceFormat.SourceOperationRequest {
+  static class DataflowSourceOperationRequest implements CustomSourceFormat.OperationRequest {
     public final SourceOperationRequest cloudRequest;
     public DataflowSourceOperationRequest(SourceOperationRequest cloudRequest) {
       this.cloudRequest = cloudRequest;
     }
   }
 
-  static class DataflowSourceOperationResponse
-      implements CustomSourceFormat.SourceOperationResponse {
+  static class DataflowSourceOperationResponse implements CustomSourceFormat.OperationResponse {
     public final SourceOperationResponse cloudResponse;
     public DataflowSourceOperationResponse(SourceOperationResponse cloudResponse) {
       this.cloudResponse = cloudResponse;
@@ -134,16 +119,15 @@ public DataflowSourceOperationResponse(SourceOperationResponse cloudResponse) {
   }
 
   static class DataflowSourceSpec implements CustomSourceFormat.SourceSpec {
-    public final com.google.api.services.dataflow.model.Source cloudSource;
-    public DataflowSourceSpec(com.google.api.services.dataflow.model.Source cloudSource) {
+    public final Source cloudSource;
+    public DataflowSourceSpec(Source cloudSource) {
       this.cloudSource = cloudSource;
     }
   }
 
   // Represents a cloud Source as a dictionary for encoding inside the CUSTOM_SOURCE
   // property of CloudWorkflowStep.input.
-  public static Map<String, Object> cloudSourceToDictionary(
-      com.google.api.services.dataflow.model.Source source) {
+  public static Map<String, Object> cloudSourceToDictionary(Source source) {
     // Do not translate encoding - the source's encoding is translated elsewhere
     // to the step's output info.
     Map<String, Object> res = new HashMap<>();
@@ -159,28 +143,24 @@ public static Map<String, Object> cloudSourceToDictionary(
     return res;
   }
 
-  private static Map<String, Object> cloudSourceMetadataToDictionary(
-      SourceMetadata metadata) {
+  private static Map<String, Object> cloudSourceMetadataToDictionary(SourceMetadata metadata) {
     Map<String, Object> res = new HashMap<>();
     if (metadata.getProducesSortedKeys() != null) {
-      addBoolean(res, PropertyNames.CUSTOM_SOURCE_PRODUCES_SORTED_KEYS,
-          metadata.getProducesSortedKeys());
+      addBoolean(
+          res, PropertyNames.CUSTOM_SOURCE_PRODUCES_SORTED_KEYS, metadata.getProducesSortedKeys());
     }
     if (metadata.getEstimatedSizeBytes() != null) {
-      addLong(res, PropertyNames.CUSTOM_SOURCE_ESTIMATED_SIZE_BYTES,
-          metadata.getEstimatedSizeBytes());
+      addLong(
+          res, PropertyNames.CUSTOM_SOURCE_ESTIMATED_SIZE_BYTES, metadata.getEstimatedSizeBytes());
     }
     if (metadata.getInfinite() != null) {
-      addBoolean(res, PropertyNames.CUSTOM_SOURCE_IS_INFINITE,
-          metadata.getInfinite());
+      addBoolean(res, PropertyNames.CUSTOM_SOURCE_IS_INFINITE, metadata.getInfinite());
     }
     return res;
   }
 
-  public static com.google.api.services.dataflow.model.Source dictionaryToCloudSource(
-      Map<String, Object> params) throws Exception {
-    com.google.api.services.dataflow.model.Source res =
-        new com.google.api.services.dataflow.model.Source();
+  public static Source dictionaryToCloudSource(Map<String, Object> params) throws Exception {
+    Source res = new Source();
     res.setSpec(getDictionary(params, PropertyNames.CUSTOM_SOURCE_SPEC));
     // CUSTOM_SOURCE_METADATA and CUSTOM_SOURCE_DOES_NOT_NEED_SPLITTING do not have to be
     // translated, because they only make sense in cloud Source objects produced by the user.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
similarity index 73%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSource.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
index 5bbcba0e6b91d..f46eca2427356 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
@@ -36,37 +36,27 @@
  *
  * @param <T> the type of the elements read from the source
  */
-public class TextSource<T> extends FileBasedSource<T> {
+public class TextReader<T> extends FileBasedReader<T> {
   final boolean stripTrailingNewlines;
 
-  public TextSource(String filename,
-                    boolean stripTrailingNewlines,
-                    @Nullable Long startPosition,
-                    @Nullable Long endPosition,
-                    Coder<T> coder) {
-    this(filename, stripTrailingNewlines,
-         startPosition, endPosition, coder, true);
+  public TextReader(String filename, boolean stripTrailingNewlines, @Nullable Long startPosition,
+      @Nullable Long endPosition, Coder<T> coder) {
+    this(filename, stripTrailingNewlines, startPosition, endPosition, coder, true);
   }
 
-  protected TextSource(String filename,
-                       boolean stripTrailingNewlines,
-                       @Nullable Long startPosition,
-                       @Nullable Long endPosition,
-                       Coder<T> coder,
-                       boolean useDefaultBufferSize) {
+  protected TextReader(String filename, boolean stripTrailingNewlines, @Nullable Long startPosition,
+      @Nullable Long endPosition, Coder<T> coder, boolean useDefaultBufferSize) {
     super(filename, startPosition, endPosition, coder, useDefaultBufferSize);
     this.stripTrailingNewlines = stripTrailingNewlines;
   }
 
   @Override
-  protected SourceIterator<T> newSourceIteratorForRangeInFile(
-      IOChannelFactory factory, String oneFile, long startPosition,
-      @Nullable Long endPosition)
-      throws IOException {
+  protected ReaderIterator<T> newReaderIteratorForRangeInFile(IOChannelFactory factory,
+      String oneFile, long startPosition, @Nullable Long endPosition) throws IOException {
     // Position before the first record, so we can find the record beginning.
     final long start = startPosition > 0 ? startPosition - 1 : 0;
 
-    TextFileIterator iterator = newSourceIteratorForRangeWithStrictStart(
+    TextFileIterator iterator = newReaderIteratorForRangeWithStrictStart(
         factory, oneFile, stripTrailingNewlines, start, endPosition);
 
     // Skip the initial record if start position was set.
@@ -78,22 +68,18 @@ protected SourceIterator<T> newSourceIteratorForRangeInFile(
   }
 
   @Override
-  protected SourceIterator<T> newSourceIteratorForFiles(
+  protected ReaderIterator<T> newReaderIteratorForFiles(
       IOChannelFactory factory, Collection<String> files) throws IOException {
     if (files.size() == 1) {
-      return newSourceIteratorForFile(
-          factory, files.iterator().next(), stripTrailingNewlines);
+      return newReaderIteratorForFile(factory, files.iterator().next(), stripTrailingNewlines);
     }
 
-    return new TextFileMultiIterator(
-        factory, files.iterator(), stripTrailingNewlines);
+    return new TextFileMultiIterator(factory, files.iterator(), stripTrailingNewlines);
   }
 
-  private TextFileIterator newSourceIteratorForFile(
-      IOChannelFactory factory, String input, boolean stripTrailingNewlines)
-      throws IOException {
-    return newSourceIteratorForRangeWithStrictStart(
-        factory, input, stripTrailingNewlines, 0, null);
+  private TextFileIterator newReaderIteratorForFile(
+      IOChannelFactory factory, String input, boolean stripTrailingNewlines) throws IOException {
+    return newReaderIteratorForRangeWithStrictStart(factory, input, stripTrailingNewlines, 0, null);
   }
 
   /**
@@ -102,36 +88,34 @@ private TextFileIterator newSourceIteratorForFile(
    * middle of a line (instead, the latter half that starts at
    * startOffset will be returned as the first element).
    */
-  private TextFileIterator newSourceIteratorForRangeWithStrictStart(
-      IOChannelFactory factory, String input, boolean stripTrailingNewlines,
-      long startOffset, @Nullable Long endOffset) throws IOException {
+  private TextFileIterator newReaderIteratorForRangeWithStrictStart(IOChannelFactory factory,
+      String input, boolean stripTrailingNewlines, long startOffset, @Nullable Long endOffset)
+      throws IOException {
     ReadableByteChannel reader = factory.open(input);
     if (!(reader instanceof SeekableByteChannel)) {
-      throw new UnsupportedOperationException(
-          "Unable to seek in stream for " + input);
+      throw new UnsupportedOperationException("Unable to seek in stream for " + input);
     }
 
     SeekableByteChannel seeker = (SeekableByteChannel) reader;
 
     return new TextFileIterator(
-        new CopyableSeekableByteChannel(seeker),
-        stripTrailingNewlines, startOffset, endOffset);
+        new CopyableSeekableByteChannel(seeker), stripTrailingNewlines, startOffset, endOffset);
   }
 
-  class TextFileMultiIterator extends LazyMultiSourceIterator<T> {
+  class TextFileMultiIterator extends LazyMultiReaderIterator<T> {
     private final IOChannelFactory factory;
     private final boolean stripTrailingNewlines;
 
-    public TextFileMultiIterator(IOChannelFactory factory,
-        Iterator<String> inputs, boolean stripTrailingNewlines) {
+    public TextFileMultiIterator(
+        IOChannelFactory factory, Iterator<String> inputs, boolean stripTrailingNewlines) {
       super(inputs);
       this.factory = factory;
       this.stripTrailingNewlines = stripTrailingNewlines;
     }
 
     @Override
-    protected SourceIterator<T> open(String input) throws IOException {
-      return newSourceIteratorForFile(factory, input, stripTrailingNewlines);
+    protected ReaderIterator<T> open(String input) throws IOException {
+      return newReaderIteratorForFile(factory, input, stripTrailingNewlines);
     }
   }
 
@@ -139,25 +123,20 @@ class TextFileIterator extends FileBasedIterator {
     private final boolean stripTrailingNewlines;
     private ScanState state;
 
-    TextFileIterator(CopyableSeekableByteChannel seeker,
-        boolean stripTrailingNewlines,
-        long startOffset,
-        @Nullable Long endOffset) throws IOException {
+    TextFileIterator(CopyableSeekableByteChannel seeker, boolean stripTrailingNewlines,
+        long startOffset, @Nullable Long endOffset) throws IOException {
       this(seeker, stripTrailingNewlines, startOffset, startOffset, endOffset,
           new ProgressTrackerGroup<Integer>() {
             @Override
             protected void report(Integer lineLength) {
               notifyElementRead(lineLength.longValue());
             }
-          }.start(), new ScanState(BUF_SIZE, !stripTrailingNewlines));
+          }.start(),
+          new ScanState(BUF_SIZE, !stripTrailingNewlines));
     }
 
-    private TextFileIterator(CopyableSeekableByteChannel seeker,
-        boolean stripTrailingNewlines,
-        long startOffset,
-        long offset,
-        @Nullable Long endOffset,
-        ProgressTracker<Integer> tracker,
+    private TextFileIterator(CopyableSeekableByteChannel seeker, boolean stripTrailingNewlines,
+        long startOffset, long offset, @Nullable Long endOffset, ProgressTracker<Integer> tracker,
         ScanState state) throws IOException {
       super(seeker, startOffset, offset, endOffset, tracker);
 
@@ -166,17 +145,15 @@ private TextFileIterator(CopyableSeekableByteChannel seeker,
     }
 
     private TextFileIterator(TextFileIterator it) throws IOException {
-      this(it.seeker.copy(), it.stripTrailingNewlines,
-          /* Correctly adjust the start position of the seeker given
-           * that it may hold bytes that have been read and now reside
-           * in the read buffer (that is copied during cloning) */
-          it.startOffset + it.state.totalBytesRead,
-          it.offset,
-          it.endOffset, it.tracker.copy(), it.state.copy());
+      // Correctly adjust the start position of the seeker given
+      // that it may hold bytes that have been read and now reside
+      // in the read buffer (that is copied during cloning).
+      this(it.seeker.copy(), it.stripTrailingNewlines, it.startOffset + it.state.totalBytesRead,
+          it.offset, it.endOffset, it.tracker.copy(), it.state.copy());
     }
 
     @Override
-    public SourceIterator<T> copy() throws IOException {
+    public ReaderIterator<T> copy() throws IOException {
       return new TextFileIterator(this);
     }
 
@@ -193,8 +170,7 @@ public SourceIterator<T> copy() throws IOException {
      * @throws IOException if an I/O error occurs
      */
     @Override
-    protected ByteArrayOutputStream readElement()
-        throws IOException {
+    protected ByteArrayOutputStream readElement() throws IOException {
       ByteArrayOutputStream buffer = new ByteArrayOutputStream(BUF_SIZE);
 
       int charsConsumed = 0;
@@ -233,7 +209,7 @@ protected ByteArrayOutputStream readElement()
    * being scanned.
    */
   private static class ScanState {
-    private int start;  // Valid bytes in buf start at this index
+    private int start; // Valid bytes in buf start at this index
     private int pos; // Where the separator is in the buf (if one was found)
     private int end; // the index of the end of bytes in buf
     private byte[] buf;
@@ -251,16 +227,14 @@ public ScanState(int size, boolean keepNewlines) {
     }
 
     public ScanState copy() {
-      byte[] bufCopy = new byte[buf.length];  // copy :(
+      byte[] bufCopy = new byte[buf.length]; // copy :(
       System.arraycopy(buf, start, bufCopy, start, end - start);
       return new ScanState(
-          this.keepNewlines, this.start, this.pos, this.end,
-          bufCopy, this.lastByteRead, 0);
+          this.keepNewlines, this.start, this.pos, this.end, bufCopy, this.lastByteRead, 0);
     }
 
-    private ScanState(
-        boolean keepNewlines, int start, int pos, int end,
-        byte[] buf, byte lastByteRead, long totalBytesRead) {
+    private ScanState(boolean keepNewlines, int start, int pos, int end, byte[] buf,
+        byte lastByteRead, long totalBytesRead) {
       this.start = start;
       this.pos = pos;
       this.end = end;
@@ -299,7 +273,7 @@ public int consumeUntilSeparator(ByteArrayOutputStream out) {
         if (separatorFound()) {
           int charsConsumed = (pos - start + 1); // The separator is consumed
           copyToOutputBuffer(out);
-          start = pos + 1;  // skip the separator
+          start = pos + 1; // skip the separator
           return charsConsumed;
         }
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java
similarity index 64%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceFactory.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java
index a15c2d505c47f..4ec8973b87a12 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java
@@ -29,46 +29,35 @@
 import com.google.cloud.dataflow.sdk.util.Serializer;
 
 /**
- * Creates a TextSource from a CloudObject spec.
+ * Creates a TextReader from a CloudObject spec.
  */
-public class TextSourceFactory {
+public class TextReaderFactory {
   // Do not instantiate.
-  private TextSourceFactory() {}
+  private TextReaderFactory() {}
 
-  public static <T> TextSource<T> create(PipelineOptions options,
-                                         CloudObject spec,
-                                         Coder<T> coder,
-                                         ExecutionContext executionContext)
-      throws Exception {
+  public static <T> TextReader<T> create(PipelineOptions options, CloudObject spec, Coder<T> coder,
+      ExecutionContext executionContext) throws Exception {
     return create(spec, coder);
   }
 
-  static <T> TextSource<T> create(CloudObject spec,
-                                  Coder<T> coder)
-      throws Exception {
+  static <T> TextReader<T> create(CloudObject spec, Coder<T> coder) throws Exception {
     return create(spec, coder, true);
   }
 
-  public static <T> TextSource<T> create(Source spec)
-      throws Exception {
+  public static <T> TextReader<T> create(Source spec) throws Exception {
     return create(
-        CloudObject.fromSpec(spec.getSpec()),
-        Serializer.deserialize(spec.getCodec(), Coder.class));
+        CloudObject.fromSpec(spec.getSpec()), Serializer.deserialize(spec.getCodec(), Coder.class));
   }
 
-  static <T> TextSource<T> create(CloudObject spec,
-                                  Coder<T> coder,
-                                  boolean useDefaultBufferSize) throws Exception {
+  static <T> TextReader<T> create(CloudObject spec, Coder<T> coder, boolean useDefaultBufferSize)
+      throws Exception {
     String filenameOrPattern = getString(spec, PropertyNames.FILENAME, null);
     if (filenameOrPattern == null) {
       filenameOrPattern = getString(spec, PropertyNames.FILEPATTERN, null);
     }
-    return new TextSource<>(
-        filenameOrPattern,
+    return new TextReader<>(filenameOrPattern,
         getBoolean(spec, PropertyNames.STRIP_TRAILING_NEWLINES, true),
         getLong(spec, PropertyNames.START_OFFSET, null),
-        getLong(spec, PropertyNames.END_OFFSET, null),
-        coder,
-        useDefaultBufferSize);
+        getLong(spec, PropertyNames.END_OFFSET, null), coder, useDefaultBufferSize);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
similarity index 75%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSource.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
index d7d0cf7cf841e..4291d94cd974d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
@@ -21,9 +21,9 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
 
 import java.io.IOException;
 import java.util.Iterator;
@@ -36,17 +36,14 @@
  *
  * @param <T> the type of the elements read from the source
  */
-public class UngroupedShuffleSource<T> extends Source<T> {
+public class UngroupedShuffleReader<T> extends Reader<T> {
   final byte[] shuffleReaderConfig;
   final String startShufflePosition;
   final String stopShufflePosition;
   final Coder<T> coder;
 
-  public UngroupedShuffleSource(PipelineOptions options,
-                                byte[] shuffleReaderConfig,
-                                @Nullable String startShufflePosition,
-                                @Nullable String stopShufflePosition,
-                                Coder<T> coder) {
+  public UngroupedShuffleReader(PipelineOptions options, byte[] shuffleReaderConfig,
+      @Nullable String startShufflePosition, @Nullable String stopShufflePosition, Coder<T> coder) {
     this.shuffleReaderConfig = shuffleReaderConfig;
     this.startShufflePosition = startShufflePosition;
     this.stopShufflePosition = stopShufflePosition;
@@ -54,26 +51,24 @@ public UngroupedShuffleSource(PipelineOptions options,
   }
 
   @Override
-    public SourceIterator<T> iterator() throws IOException {
+  public ReaderIterator<T> iterator() throws IOException {
     Preconditions.checkArgument(shuffleReaderConfig != null);
     return iterator(new BatchingShuffleEntryReader(
-        new ChunkingShuffleBatchReader(new ApplianceShuffleReader(
-            shuffleReaderConfig))));
+        new ChunkingShuffleBatchReader(new ApplianceShuffleReader(shuffleReaderConfig))));
   }
 
-  SourceIterator<T> iterator(ShuffleEntryReader reader) throws IOException {
-    return new UngroupedShuffleSourceIterator(reader);
+  ReaderIterator<T> iterator(ShuffleEntryReader reader) throws IOException {
+    return new UngroupedShuffleReaderIterator(reader);
   }
 
   /**
-   * A SourceIterator that reads from a ShuffleEntryReader and extracts
+   * A ReaderIterator that reads from a ShuffleEntryReader and extracts
    * just the values.
    */
-  class UngroupedShuffleSourceIterator extends AbstractSourceIterator<T> {
+  class UngroupedShuffleReaderIterator extends AbstractReaderIterator<T> {
     Iterator<ShuffleEntry> iterator;
 
-    UngroupedShuffleSourceIterator(ShuffleEntryReader reader)
-        throws IOException {
+    UngroupedShuffleReaderIterator(ShuffleEntryReader reader) throws IOException {
       this.iterator = reader.read(
           ByteArrayShufflePosition.fromBase64(startShufflePosition),
           ByteArrayShufflePosition.fromBase64(stopShufflePosition));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java
similarity index 70%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceFactory.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java
index adff71226d6b8..3237bb8349bb2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java
@@ -26,31 +26,22 @@
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 
 /**
- * Creates an UngroupedShuffleSource from a CloudObject spec.
+ * Creates an UngroupedShuffleReader from a CloudObject spec.
  */
-public class UngroupedShuffleSourceFactory {
+public class UngroupedShuffleReaderFactory {
   // Do not instantiate.
-  private UngroupedShuffleSourceFactory() {}
+  private UngroupedShuffleReaderFactory() {}
 
-  public static <T> UngroupedShuffleSource<T> create(
-      PipelineOptions options,
-      CloudObject spec,
-      Coder<T> coder,
-      ExecutionContext executionContext)
-      throws Exception {
+  public static <T> UngroupedShuffleReader<T> create(PipelineOptions options, CloudObject spec,
+      Coder<T> coder, ExecutionContext executionContext) throws Exception {
     return create(options, spec, coder);
   }
 
-  static <T> UngroupedShuffleSource<T> create(
-      PipelineOptions options,
-      CloudObject spec,
-      Coder<T> coder)
-      throws Exception {
-    return new UngroupedShuffleSource<>(
-        options,
+  static <T> UngroupedShuffleReader<T> create(
+      PipelineOptions options, CloudObject spec, Coder<T> coder) throws Exception {
+    return new UngroupedShuffleReader<>(options,
         decodeBase64(getString(spec, PropertyNames.SHUFFLE_READER_CONFIG)),
         getString(spec, PropertyNames.START_SHUFFLE_POSITION, null),
-        getString(spec, PropertyNames.END_SHUFFLE_POSITION, null),
-        coder);
+        getString(spec, PropertyNames.END_SHUFFLE_POSITION, null), coder);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
index 7d97948af437e..bcb4d343a35a4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
@@ -16,17 +16,16 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import com.google.cloud.dataflow.sdk.runners.worker.SourceFactory;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.api.services.dataflow.model.Source;
+import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory;
 
-import java.io.IOException;
-import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
 /**
- * Utilities for working with Source Dataflow API definitions and {@link Source}
+ * Utilities for working with Source Dataflow API definitions and
+ * {@link com.google.cloud.dataflow.sdk.util.common.worker.Reader}
  * objects.
  */
 public class CloudSourceUtils {
@@ -35,8 +34,7 @@ public class CloudSourceUtils {
    * On conflict for a parameter name, values in {@code spec} override values in {@code baseSpecs},
    * and later values in {@code baseSpecs} override earlier ones.
    */
-  public static com.google.api.services.dataflow.model.Source
-      flattenBaseSpecs(com.google.api.services.dataflow.model.Source source) {
+  public static Source flattenBaseSpecs(Source source) {
     if (source.getBaseSpecs() == null) {
       return source;
     }
@@ -46,33 +44,19 @@ public class CloudSourceUtils {
     }
     params.putAll(source.getSpec());
 
-    com.google.api.services.dataflow.model.Source result = source.clone();
+    Source result = source.clone();
     result.setSpec(params);
     result.setBaseSpecs(null);
     return result;
   }
 
-  /** Reads all elements from the given {@link Source}. */
-  public static <T> List<T> readElemsFromSource(Source<T> source) {
-    List<T> elems = new ArrayList<>();
-    try (Source.SourceIterator<T> it = source.iterator()) {
-      while (it.hasNext()) {
-        elems.add(it.next());
-      }
-    } catch (IOException e) {
-      throw new RuntimeException("Failed to read from source: " + source, e);
-    }
-    return elems;
-  }
-
   /**
-   * Creates a {@link Source} from the given Dataflow Source API definition and
-   * reads all elements from it.
+   * Creates a {@link com.google.cloud.dataflow.sdk.util.common.worker.Reader}
+   * from the given Dataflow Source API definition and reads all elements from it.
    */
-  public static <T> List<T> readElemsFromSource(
-      com.google.api.services.dataflow.model.Source source) {
+  public static <T> List<T> readElemsFromSource(Source source) {
     try {
-      return readElemsFromSource(SourceFactory.<T>create(null, source, null));
+      return ReaderUtils.readElemsFromReader(ReaderFactory.<T>create(null, source, null));
     } catch (Exception e) {
       throw new RuntimeException("Failed to read from source: " + source.toString(), e);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReaderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReaderUtils.java
new file mode 100644
index 0000000000000..4c2f9bf35380a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReaderUtils.java
@@ -0,0 +1,44 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Utilities for working with {@link com.google.cloud.dataflow.sdk.util.common.worker.Reader}
+ * objects.
+ */
+public class ReaderUtils {
+  /**
+   * Reads all elements from the given
+   * {@link com.google.cloud.dataflow.sdk.util.common.worker.Reader}.
+   */
+  public static <T> List<T> readElemsFromReader(Reader<T> reader) {
+    List<T> elems = new ArrayList<>();
+    try (Reader.ReaderIterator<T> it = reader.iterator()) {
+      while (it.hasNext()) {
+        elems.add(it.next());
+      }
+    } catch (IOException e) {
+      throw new RuntimeException("Failed to read from source: " + reader, e);
+    }
+    return elems;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CustomSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CustomSourceFormat.java
index 4fc67d60f3c3f..12cdf30ac468a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CustomSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CustomSourceFormat.java
@@ -25,8 +25,7 @@ public interface CustomSourceFormat {
    * Performs an operation on the specification of a source.
    * See {@code SourceOperationRequest} for details.
    */
-  public SourceOperationResponse performSourceOperation(SourceOperationRequest operation)
-      throws Exception;
+  public OperationResponse performSourceOperation(OperationRequest operation) throws Exception;
 
   /**
    * A representation of an operation on the specification of a source,
@@ -38,24 +37,21 @@ public SourceOperationResponse performSourceOperation(SourceOperationRequest ope
    * about the implementation, and so the concrete Source subclasses used
    * by a tool-specific framework should match.
    */
-  public interface SourceOperationRequest {
-  }
+  public interface OperationRequest {}
 
   /**
    * A representation of the result of a SourceOperationRequest.
    *
-   * <p> See the comment on {@link SourceOperationRequest} for how instances of this
+   * <p> See the comment on {@link OperationRequest} for how instances of this
    * interface are used by the rest of the framework.
    */
-  public interface SourceOperationResponse {
-  }
+  public interface OperationResponse {}
 
   /**
    * A representation of a specification of a source.
    *
-   * <p> See the comment on {@link SourceOperationRequest} for how instances of this
+   * <p> See the comment on {@link OperationRequest} for how instances of this
    * interface are used by the rest of the framework.
    */
-  public interface SourceSpec {
-  }
+  public interface SourceSpec {}
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
index 45d5e8c6715e3..793343d6d1bb4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
@@ -28,8 +28,7 @@
  * An executor for a map task, defined by a list of Operations.
  */
 public class MapTaskExecutor extends WorkExecutor {
-  private static final Logger LOG =
-      LoggerFactory.getLogger(MapTaskExecutor.class);
+  private static final Logger LOG = LoggerFactory.getLogger(MapTaskExecutor.class);
 
   /** The operations in the map task, in execution order. */
   public final List<Operation> operations;
@@ -45,9 +44,8 @@ public class MapTaskExecutor extends WorkExecutor {
    * operations, which may get extended during execution
    * @param stateSampler a state sampler for tracking where time is being spent
    */
-  public MapTaskExecutor(List<Operation> operations,
-                         CounterSet counters,
-                         StateSampler stateSampler) {
+  public MapTaskExecutor(
+      List<Operation> operations, CounterSet counters, StateSampler stateSampler) {
     super(counters);
     this.operations = operations;
     this.stateSampler = stateSampler;
@@ -62,8 +60,7 @@ public void execute() throws Exception {
     // Starting a root operation such as a ReadOperation does the work
     // of processing the input dataset.
     LOG.debug("starting operations");
-    ListIterator<Operation> iterator =
-        operations.listIterator(operations.size());
+    ListIterator<Operation> iterator = operations.listIterator(operations.size());
     while (iterator.hasPrevious()) {
       Operation op = iterator.previous();
       op.start();
@@ -83,26 +80,24 @@ public void execute() throws Exception {
   }
 
   @Override
-  public Source.Progress getWorkerProgress() throws Exception {
+  public Reader.Progress getWorkerProgress() throws Exception {
     return getReadOperation().getProgress();
   }
 
   @Override
-  public Source.Position proposeStopPosition(
-      Source.Progress proposedStopPosition) throws Exception {
+  public Reader.Position proposeStopPosition(Reader.Progress proposedStopPosition)
+      throws Exception {
     return getReadOperation().proposeStopPosition(proposedStopPosition);
   }
 
   ReadOperation getReadOperation() throws Exception {
     if (operations == null || operations.isEmpty()) {
-      throw new IllegalStateException(
-          "Map task has no operation.");
+      throw new IllegalStateException("Map task has no operation.");
     }
 
     Operation readOperation = operations.get(0);
     if (!(readOperation instanceof ReadOperation)) {
-      throw new IllegalStateException(
-          "First operation in the map task is not a ReadOperation.");
+      throw new IllegalStateException("First operation in the map task is not a ReadOperation.");
     }
 
     return (ReadOperation) readOperation;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index f4caef51d0d1e..acd5d6468b691 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -41,20 +41,20 @@ public class ReadOperation extends Operation {
   private static final Logger LOG = LoggerFactory.getLogger(ReadOperation.class);
   private static final long DEFAULT_PROGRESS_UPDATE_PERIOD_MS = TimeUnit.SECONDS.toMillis(1);
 
-  /** The Source this operation reads from. */
-  public final Source<?> source;
+  /** The Reader this operation reads from. */
+  public final Reader<?> reader;
 
   /** The total byte counter for all data read by this operation. */
   final Counter<Long> byteCount;
 
-  /** StateSampler state for advancing the SourceIterator. */
+  /** StateSampler state for advancing the ReaderIterator. */
   private final int readState;
 
   /**
-   * The Source's reader this operation reads from, created by start().
+   * The Reader's iterator this operation reads from, created by start().
    * Guarded by sourceIteratorLock.
    */
-  volatile Source.SourceIterator<?> sourceIterator = null;
+  volatile Reader.ReaderIterator<?> readerIterator = null;
   private final Object sourceIteratorLock = new Object();
 
   /**
@@ -64,7 +64,7 @@ public class ReadOperation extends Operation {
    * wait for a read to complete (which can take an unbounded time, delay a worker progress update,
    * and cause lease expiration and all sorts of trouble).
    */
-  private AtomicReference<Source.Progress> progress = new AtomicReference<>();
+  private AtomicReference<Reader.Progress> progress = new AtomicReference<>();
 
   /**
    * On every iteration of the read loop, "progress" is fetched from sourceIterator if requested.
@@ -78,21 +78,21 @@ public class ReadOperation extends Operation {
   private AtomicBoolean isProgressUpdateRequested = new AtomicBoolean(true);
 
 
-  public ReadOperation(String operationName, Source<?> source, OutputReceiver[] receivers,
+  public ReadOperation(String operationName, Reader<?> reader, OutputReceiver[] receivers,
       String counterPrefix, CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler) {
     super(operationName, receivers, counterPrefix, addCounterMutator, stateSampler);
-    this.source = source;
+    this.reader = reader;
     this.byteCount = addCounterMutator.addCounter(
         Counter.longs(bytesCounterName(counterPrefix, operationName), SUM));
     readState = stateSampler.stateForName(operationName + "-read");
   }
 
   /** Invoked by tests. */
-  ReadOperation(Source<?> source, OutputReceiver outputReceiver, String counterPrefix,
+  ReadOperation(Reader<?> reader, OutputReceiver outputReceiver, String counterPrefix,
       CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) {
-    this("ReadOperation", source, new OutputReceiver[] {outputReceiver}, counterPrefix,
-         addCounterMutator, stateSampler);
+    this("ReadOperation", reader, new OutputReceiver[] {outputReceiver}, counterPrefix,
+        addCounterMutator, stateSampler);
   }
 
   /**
@@ -107,8 +107,8 @@ protected String bytesCounterName(String counterPrefix, String operationName) {
     return operationName + "-ByteCount";
   }
 
-  public Source<?> getSource() {
-    return source;
+  public Reader<?> getReader() {
+    return reader;
   }
 
   @Override
@@ -127,12 +127,12 @@ protected void runReadLoop() throws Exception {
       return;
     }
 
-    source.addObserver(new SourceObserver());
+    reader.addObserver(new ReaderObserver());
 
     try (StateSampler.ScopedState process = stateSampler.scopedState(processState)) {
       assert process != null;
       synchronized (sourceIteratorLock) {
-        sourceIterator = source.iterator();
+        readerIterator = reader.iterator();
       }
 
       // TODO: Consider using the ExecutorService from PipelineOptions instead.
@@ -157,7 +157,7 @@ public void run() {
       try {
         // Force a progress update at the beginning and at the end.
         synchronized (sourceIteratorLock) {
-          progress.set(sourceIterator.getProgress());
+          progress.set(readerIterator.getProgress());
         }
         while (true) {
           Object value;
@@ -166,24 +166,24 @@ public void run() {
           try (StateSampler.ScopedState read = stateSampler.scopedState(readState)) {
             assert read != null;
             synchronized (sourceIteratorLock) {
-              if (!sourceIterator.hasNext()) {
+              if (!readerIterator.hasNext()) {
                 break;
               }
-              value = sourceIterator.next();
+              value = readerIterator.next();
 
               if (isProgressUpdateRequested.getAndSet(false) || progressUpdatePeriodMs == 0) {
-                progress.set(sourceIterator.getProgress());
+                progress.set(readerIterator.getProgress());
               }
             }
           }
           receiver.process(value);
         }
         synchronized (sourceIteratorLock) {
-          progress.set(sourceIterator.getProgress());
+          progress.set(readerIterator.getProgress());
         }
       } finally {
         synchronized (sourceIteratorLock) {
-          sourceIterator.close();
+          readerIterator.close();
         }
         if (progressUpdatePeriodMs != 0) {
           updateRequester.interrupt();
@@ -200,24 +200,24 @@ public void run() {
    * @return the task progress, or {@code null} if the source iterator has not
    * been initialized
    */
-  public Source.Progress getProgress() {
+  public Reader.Progress getProgress() {
     return progress.get();
   }
 
   /**
-   * Relays the request to update the stop position to {@code SourceIterator}.
+   * Relays the request to update the stop position to {@code ReaderIterator}.
    *
    * @param proposedStopPosition the proposed stop position
-   * @return the new stop position updated in {@code SourceIterator}, or
+   * @return the new stop position updated in {@code ReaderIterator}, or
    * {@code null} if the source iterator has not been initialized
    */
-  public Source.Position proposeStopPosition(Source.Progress proposedStopPosition) {
+  public Reader.Position proposeStopPosition(Reader.Progress proposedStopPosition) {
     synchronized (sourceIteratorLock) {
-      if (sourceIterator == null) {
+      if (readerIterator == null) {
         LOG.warn("Iterator has not been initialized, returning null stop position.");
         return null;
       }
-      return sourceIterator.updateStopPosition(proposedStopPosition);
+      return readerIterator.updateStopPosition(proposedStopPosition);
     }
   }
 
@@ -226,10 +226,10 @@ public Source.Position proposeStopPosition(Source.Progress proposedStopPosition)
    * an element, update() gets called with the byte size of the element, which
    * gets added up into the ReadOperation's byte counter.
    */
-  private class SourceObserver implements Observer {
+  private class ReaderObserver implements Observer {
     @Override
     public void update(Observable obs, Object obj) {
-      Preconditions.checkArgument(obs == source, "unexpected observable" + obs);
+      Preconditions.checkArgument(obs == reader, "unexpected observable" + obs);
       Preconditions.checkArgument(obj instanceof Long, "unexpected parameter object: " + obj);
       byteCount.addValue((long) obj);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
similarity index 78%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Source.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
index d50b93dc54193..2ecef5b4cc869 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Source.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
@@ -21,26 +21,26 @@
 import java.util.Observable;
 
 /**
- * Abstract base class for Sources.
+ * Abstract base class for readers.
  *
  * <p> A Source is read from by getting an Iterator-like value and
  * iterating through it.
  *
  * @param <T> the type of the elements read from the source
  */
-public abstract class Source<T> extends Observable {
+public abstract class Reader<T> extends Observable {
   /**
-   * Returns a SourceIterator that allows reading from this source.
+   * Returns a ReaderIterator that allows reading from this source.
    */
-  public abstract SourceIterator<T> iterator() throws IOException;
+  public abstract ReaderIterator<T> iterator() throws IOException;
 
   /**
-   * A stateful iterator over the data in a Source.
+   * A stateful iterator over the data in a Reader.
    */
-  public interface SourceIterator<T> extends AutoCloseable {
+  public interface ReaderIterator<T> extends AutoCloseable {
     /**
      * Returns whether the source has any more elements. Some sources,
-     * such as GroupingShuffleSource, invalidate the return value of
+     * such as GroupingShuffleReader, invalidate the return value of
      * the previous next() call during the call to hasNext().
      */
     public boolean hasNext() throws IOException;
@@ -53,13 +53,13 @@ public interface SourceIterator<T> extends AutoCloseable {
     public T next() throws IOException;
 
     /**
-     * Copies the current SourceIterator.
+     * Copies the current ReaderIterator.
      *
      * @throws UnsupportedOperationException if the particular implementation
      * does not support copy
      * @throws IOException if copying the iterator involves IO that fails
      */
-    public SourceIterator<T> copy() throws IOException;
+    public ReaderIterator<T> copy() throws IOException;
 
     @Override
     public void close() throws IOException;
@@ -80,7 +80,7 @@ public interface SourceIterator<T> extends AutoCloseable {
      * position and returns the actual new stop position.
      *
      * <p> If the source finds the proposed one is not a convenient position to
-     * stop, it can pick a different stop position. The {@code SourceIterator}
+     * stop, it can pick a different stop position. The {@code ReaderIterator}
      * should start returning {@code false} from {@code hasNext()} once it has
      * passed its stop position. Subsequent stop position updates must be in
      * non-increasing order within a task.
@@ -96,11 +96,10 @@ public interface SourceIterator<T> extends AutoCloseable {
     public Position updateStopPosition(Progress proposedStopPosition);
   }
 
-  /** An abstract base class for SourceIterator implementations. */
-  public abstract static class AbstractSourceIterator<T>
-      implements SourceIterator<T> {
+  /** An abstract base class for ReaderIterator implementations. */
+  public abstract static class AbstractReaderIterator<T> implements ReaderIterator<T> {
     @Override
-    public SourceIterator<T> copy() throws IOException {
+    public ReaderIterator<T> copy() throws IOException {
       throw new UnsupportedOperationException();
     }
 
@@ -121,32 +120,30 @@ public Position updateStopPosition(Progress proposedStopPosition) {
   }
 
   /**
-   * A representation of how far a {@code SourceIterator} is through a
-   * {@code Source}.
+   * A representation of how far a {@code ReaderIterator} is through a
+   * {@code Reader}.
    *
    * <p> The common worker framework does not interpret instances of
    * this interface.  But a tool-specific framework can make assumptions
-   * about the implementation, and so the concrete Source subclasses used
+   * about the implementation, and so the concrete Reader subclasses used
    * by a tool-specific framework should match.
    */
-  public interface Progress {
-  }
+  public interface Progress {}
 
   /**
    * A representation of a position in an iteration through a
-   * {@code Source}.
+   * {@code Reader}.
    *
    * <p> See the comment on {@link Progress} for how instances of this
    * interface are used by the rest of the framework.
    */
-  public interface Position {
-  }
+  public interface Position {}
 
   /**
    * Utility method to notify observers about a new element, which has
-   * been read by this Source, and its size in bytes. Normally, there
+   * been read by this Reader, and its size in bytes. Normally, there
    * is only one observer, which is a ReadOperation that encapsules
-   * this Source. Derived classes must call this method whenever they
+   * this Reader. Derived classes must call this method whenever they
    * read additional data, even if that element may never be returned
    * from the corresponding source iterator.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
index c4a603f70caad..18ab4762d171d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
@@ -49,8 +49,7 @@ public abstract class WorkExecutor implements AutoCloseable {
    */
   public WorkExecutor(CounterSet outputCounters) {
     this.outputCounters = outputCounters;
-    this.os =
-        (OperatingSystemMXBean) ManagementFactory.getOperatingSystemMXBean();
+    this.os = (OperatingSystemMXBean) ManagementFactory.getOperatingSystemMXBean();
   }
 
   /**
@@ -78,7 +77,7 @@ public Collection<Metric<?>> getOutputMetrics() {
   /**
    * Returns the worker's current progress.
    */
-  public Source.Progress getWorkerProgress() throws Exception {
+  public Reader.Progress getWorkerProgress() throws Exception {
     // By default, return null indicating worker progress not available.
     return null;
   }
@@ -87,8 +86,8 @@ public Source.Progress getWorkerProgress() throws Exception {
    * Proposes that the worker changes the stop position for the current work.
    * Returns the new position if accepted, otherwise {@code null}.
    */
-  public Source.Position proposeStopPosition(
-      Source.Progress proposedStopPosition) throws Exception {
+  public Reader.Position proposeStopPosition(Reader.Progress proposedStopPosition)
+      throws Exception {
     // By default, returns null indicating that no task splitting happens.
     return null;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
index c5222eb04a2f1..b50afb4b5f886 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
@@ -49,15 +49,15 @@ public abstract class WorkProgressUpdater {
   private static final long DEFAULT_LEASE_DURATION_MILLIS = 3 * 60 * 1000;
 
   /** The lease renewal RPC latency margin. */
-  private static final long LEASE_RENEWAL_LATENCY_MARGIN = Long.valueOf(
-      System.getProperty("worker_lease_renewal_latency_margin", "5000"));
+  private static final long LEASE_RENEWAL_LATENCY_MARGIN =
+      Long.valueOf(System.getProperty("worker_lease_renewal_latency_margin", "5000"));
 
   /**
    * The minimum period between two consecutive progress updates. Ensures the
    * {@link WorkProgressUpdater} does not generate update storms.
    */
-  private static final long MIN_REPORTING_INTERVAL_MILLIS = Long.valueOf(
-      System.getProperty("minimum_worker_update_interval_millis", "5000"));
+  private static final long MIN_REPORTING_INTERVAL_MILLIS =
+      Long.valueOf(System.getProperty("minimum_worker_update_interval_millis", "5000"));
 
   /**
    * The maximum period between two consecutive progress updates. Ensures the
@@ -87,15 +87,12 @@ public abstract class WorkProgressUpdater {
    * we'll send the {@code null} as a stop position update, which is a no-op
    * for the service.
    */
-  protected Source.Position stopPositionToService;
+  protected Reader.Position stopPositionToService;
 
   public WorkProgressUpdater(WorkExecutor worker) {
     this.worker = worker;
     this.executor = Executors.newSingleThreadScheduledExecutor(
-        new ThreadFactoryBuilder()
-        .setDaemon(true)
-        .setNameFormat("WorkProgressUpdater-%d")
-        .build());
+        new ThreadFactoryBuilder().setDaemon(true).setNameFormat("WorkProgressUpdater-%d").build());
   }
 
   /**
@@ -104,10 +101,9 @@ public WorkProgressUpdater(WorkExecutor worker) {
   public void startReportingProgress() {
     // Send the initial work progress report half-way through the lease
     // expiration. Subsequent intervals adapt to hints from the service.
-    long leaseRemainingTime =
-        leaseRemainingTime(getWorkUnitLeaseExpirationTimestamp());
-    progressReportIntervalMs = nextProgressReportInterval(
-        leaseRemainingTime / 2, leaseRemainingTime);
+    long leaseRemainingTime = leaseRemainingTime(getWorkUnitLeaseExpirationTimestamp());
+    progressReportIntervalMs =
+        nextProgressReportInterval(leaseRemainingTime / 2, leaseRemainingTime);
     requestedLeaseDurationMs = DEFAULT_LEASE_DURATION_MILLIS;
 
     LOG.info("Started reporting progress for work item: {}", workString());
@@ -131,7 +127,7 @@ public void stopReportingProgress() throws Exception {
     // We send a final progress report in case there was an unreported stop position update.
     if (stopPositionToService != null) {
       LOG.info("Sending final progress update with unreported stop position.");
-      reportProgressHelper();  // This call can fail with an exception
+      reportProgressHelper(); // This call can fail with an exception
     }
 
     LOG.info("Stopped reporting progress for work item: {}", workString());
@@ -148,19 +144,19 @@ public void stopReportingProgress() throws Exception {
    * @param leaseRemainingTime milliseconds left before the work lease expires
    * @return the time in milliseconds before sending the next progress update
    */
-  protected static long nextProgressReportInterval(long suggestedInterval,
-                                                   long leaseRemainingTime) {
+  protected static long nextProgressReportInterval(
+      long suggestedInterval, long leaseRemainingTime) {
     // Sanitize input in case we get a negative suggested time interval.
     suggestedInterval = Math.max(0, suggestedInterval);
 
     // Try to send the next progress update before the next lease expiration
     // allowing some RPC latency margin.
-    suggestedInterval = Math.min(suggestedInterval,
-        leaseRemainingTime - LEASE_RENEWAL_LATENCY_MARGIN);
+    suggestedInterval =
+        Math.min(suggestedInterval, leaseRemainingTime - LEASE_RENEWAL_LATENCY_MARGIN);
 
     // Bound reporting interval to avoid staleness and progress update storms.
-    return Math.min(Math.max(MIN_REPORTING_INTERVAL_MILLIS, suggestedInterval),
-        MAX_REPORTING_INTERVAL_MILLIS);
+    return Math.min(
+        Math.max(MIN_REPORTING_INTERVAL_MILLIS, suggestedInterval), MAX_REPORTING_INTERVAL_MILLIS);
   }
 
   /**
@@ -181,7 +177,8 @@ public void run() {
           reportProgress();
         }
       }
-    }, progressReportIntervalMs, TimeUnit.MILLISECONDS);
+    },
+        progressReportIntervalMs, TimeUnit.MILLISECONDS);
     LOG.debug("Next work progress update for work item {} scheduled to occur in {} ms.",
         workString(), progressReportIntervalMs);
   }
@@ -211,13 +208,13 @@ protected long leaseRemainingTime(long leaseExpirationTimestamp) {
       LOG.debug("Lease remaining time for {} is 0 ms.", workString());
       return 0;
     }
-    LOG.debug("Lease remaining time for {} is {} ms.",
-        workString(), leaseExpirationTimestamp - now);
+    LOG.debug(
+        "Lease remaining time for {} is {} ms.", workString(), leaseExpirationTimestamp - now);
     return leaseExpirationTimestamp - now;
   }
 
   // Visible for testing.
-  public Source.Position getStopPosition() {
+  public Reader.Position getStopPosition() {
     return stopPositionToService;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
index fa09bd09106af..e9a6cbde3773f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
@@ -42,7 +42,7 @@
  * an unbounded {@code PCollection}.
  *
  * <p> Each element in a {@code PCollection} may have an associated implicit
- * timestamp.  Sources assign timestamps to elements when they create
+ * timestamp.  Readers assign timestamps to elements when they create
  * {@code PCollection}s, and other {@code PTransform}s propagate these
  * timestamps from their input to their output. For example, PubsubIO.Read
  * assigns pubsub message timestamps to elements, and TextIO.Read assigns
@@ -150,8 +150,7 @@ public PCollection<T> setOrdered(boolean isOrdered) {
     if (this.isOrdered != isOrdered) {
       if (isFinishedSpecifyingInternal()) {
         throw new IllegalStateException(
-            "cannot change the orderedness of " + this +
-            " once it's been used");
+            "cannot change the orderedness of " + this + " once it's been used");
       }
       this.isOrdered = isOrdered;
     }
@@ -162,8 +161,7 @@ public PCollection<T> setOrdered(boolean isOrdered) {
    * Applies the given PTransform to this input PCollection, and
    * returns the PTransform's Output.
    */
-  public <Output extends POutput> Output apply(
-      PTransform<? super PCollection<T>, Output> t) {
+  public <Output extends POutput> Output apply(PTransform<? super PCollection<T>, Output> t) {
     return Pipeline.applyTransform(this, t);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
similarity index 74%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSourceTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
index e6bfffdcb68ab..f9739dc2d6e8e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
@@ -24,7 +24,7 @@
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.MimeTypes;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.apache.avro.Schema;
 import org.apache.avro.file.DataFileWriter;
@@ -49,23 +49,21 @@
 import javax.annotation.Nullable;
 
 /**
- * Tests for AvroByteSource.
+ * Tests for AvroByteReader.
  */
 @RunWith(JUnit4.class)
-public class AvroByteSourceTest {
+public class AvroByteReaderTest {
   @Rule
   public TemporaryFolder tmpFolder = new TemporaryFolder();
 
-  private <T> void runTestRead(List<List<T>> elemsList,
-                               Coder<T> coder,
-                               boolean requireExactMatch)
+  private <T> void runTestRead(List<List<T>> elemsList, Coder<T> coder, boolean requireExactMatch)
       throws Exception {
     File tmpFile = tmpFolder.newFile("file.avro");
     String filename = tmpFile.getPath();
 
     // Write the data.
-    OutputStream outStream = Channels.newOutputStream(
-        IOChannelUtils.create(filename, MimeTypes.BINARY));
+    OutputStream outStream =
+        Channels.newOutputStream(IOChannelUtils.create(filename, MimeTypes.BINARY));
     Schema schema = Schema.create(Schema.Type.BYTES);
     DatumWriter<ByteBuffer> datumWriter = new GenericDatumWriter<>(schema);
     DataFileWriter<ByteBuffer> fileWriter = new DataFileWriter<>(datumWriter);
@@ -126,19 +124,14 @@ private <T> void runTestRead(List<List<T>> elemsList,
     Assert.assertEquals(expectedSizes, actualSizes);
   }
 
-  private <T> List<T> readElems(String filename,
-                                @Nullable Long startOffset,
-                                @Nullable Long endOffset,
-                                Coder<T> coder,
-                                List<Integer> actualSizes)
-      throws Exception {
-    AvroByteSource<T> avroSource =
-        new AvroByteSource<>(filename, startOffset, endOffset, coder);
-    ExecutorTestUtils.TestSourceObserver observer =
-        new ExecutorTestUtils.TestSourceObserver(avroSource, actualSizes);
+  private <T> List<T> readElems(String filename, @Nullable Long startOffset,
+      @Nullable Long endOffset, Coder<T> coder, List<Integer> actualSizes) throws Exception {
+    AvroByteReader<T> avroReader = new AvroByteReader<>(filename, startOffset, endOffset, coder);
+    ExecutorTestUtils.TestReaderObserver observer =
+        new ExecutorTestUtils.TestReaderObserver(avroReader, actualSizes);
 
     List<T> actualElems = new ArrayList<>();
-    try (Source.SourceIterator<T> iterator = avroSource.iterator()) {
+    try (Reader.ReaderIterator<T> iterator = avroReader.iterator()) {
       while (iterator.hasNext()) {
         actualElems.add(iterator.next());
       }
@@ -148,21 +141,18 @@ private <T> List<T> readElems(String filename,
 
   @Test
   public void testRead() throws Exception {
-    runTestRead(Collections.singletonList(TestUtils.INTS),
-                BigEndianIntegerCoder.of(),
-                true /* require exact match */);
+    runTestRead(Collections.singletonList(TestUtils.INTS), BigEndianIntegerCoder.of(),
+        true/* require exact match */);
   }
 
   @Test
   public void testReadEmpty() throws Exception {
-    runTestRead(Collections.singletonList(TestUtils.NO_INTS),
-                BigEndianIntegerCoder.of(),
-                true /* require exact match */);
+    runTestRead(Collections.singletonList(TestUtils.NO_INTS), BigEndianIntegerCoder.of(),
+        true/* require exact match */);
   }
 
-  private List<List<String>> generateInputBlocks(int numBlocks,
-                                                 int blockSizeBytes,
-                                                 int averageLineSizeBytes) {
+  private List<List<String>> generateInputBlocks(
+      int numBlocks, int blockSizeBytes, int averageLineSizeBytes) {
     Random random = new Random(0);
     List<List<String>> blocks = new ArrayList<>(numBlocks);
     for (int blockNum = 0; blockNum < numBlocks; blockNum++) {
@@ -183,16 +173,13 @@ private List<List<String>> generateInputBlocks(int numBlocks,
 
   @Test
   public void testReadSmallRanges() throws Exception {
-    runTestRead(generateInputBlocks(3, 50, 5),
-                StringUtf8Coder.of(),
-                true /* require exact match */);
+    runTestRead(generateInputBlocks(3, 50, 5), StringUtf8Coder.of(), true/* require exact match */);
   }
 
   @Test
   public void testReadBigRanges() throws Exception {
-    runTestRead(generateInputBlocks(10, 128 * 1024, 100),
-                StringUtf8Coder.of(),
-                false /* don't require exact match */);
+    runTestRead(generateInputBlocks(10, 128 * 1024, 100), StringUtf8Coder.of(),
+        false/* don't require exact match */);
   }
 
   // TODO: sharded filenames
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
index 3c7f29b40a8c1..83366800389e6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
@@ -65,18 +65,16 @@ <T> void runTestWriteFile(List<T> elems, Coder<T> coder) throws Exception {
 
     // Read back the file.
 
-    SeekableByteChannel inChannel = (SeekableByteChannel)
-        IOChannelUtils.getFactory(filename).open(filename);
+    SeekableByteChannel inChannel =
+        (SeekableByteChannel) IOChannelUtils.getFactory(filename).open(filename);
 
-    SeekableInput seekableInput =
-        new AvroSource.SeekableByteChannelInput(inChannel);
+    SeekableInput seekableInput = new AvroReader.SeekableByteChannelInput(inChannel);
 
     Schema schema = Schema.create(Schema.Type.BYTES);
 
     DatumReader<ByteBuffer> datumReader = new GenericDatumReader<>(schema);
 
-    DataFileReader<ByteBuffer> fileReader = new DataFileReader<>(
-        seekableInput, datumReader);
+    DataFileReader<ByteBuffer> fileReader = new DataFileReader<>(seekableInput, datumReader);
 
     List<T> actual = new ArrayList<>();
     List<Long> expectedSizes = new ArrayList<>();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
new file mode 100644
index 0000000000000..a71cc8ea510a2
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+
+import com.google.api.services.dataflow.model.Source;
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+
+import org.hamcrest.core.IsInstanceOf;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import javax.annotation.Nullable;
+
+/**
+ * Tests for AvroReaderFactory.
+ */
+@RunWith(JUnit4.class)
+@SuppressWarnings("rawtypes")
+public class AvroReaderFactoryTest {
+  private final String pathToAvroFile = "/path/to/file.avro";
+
+  Reader<?> runTestCreateAvroReader(String filename, @Nullable Long start, @Nullable Long end,
+      CloudObject encoding) throws Exception {
+    CloudObject spec = CloudObject.forClassName("AvroSource");
+    addString(spec, "filename", filename);
+    if (start != null) {
+      addLong(spec, "start_offset", start);
+    }
+    if (end != null) {
+      addLong(spec, "end_offset", end);
+    }
+
+    Source cloudSource = new Source();
+    cloudSource.setSpec(spec);
+    cloudSource.setCodec(encoding);
+
+    Reader<?> reader = ReaderFactory.create(
+        PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext());
+    return reader;
+  }
+
+  @Test
+  public void testCreatePlainAvroByteReader() throws Exception {
+    Coder<?> coder = WindowedValue.getValueOnlyCoder(BigEndianIntegerCoder.of());
+    Reader<?> reader = runTestCreateAvroReader(pathToAvroFile, null, null, coder.asCloudObject());
+
+    Assert.assertThat(reader, new IsInstanceOf(AvroByteReader.class));
+    AvroByteReader avroReader = (AvroByteReader) reader;
+    Assert.assertEquals(pathToAvroFile, avroReader.avroReader.filename);
+    Assert.assertEquals(null, avroReader.avroReader.startPosition);
+    Assert.assertEquals(null, avroReader.avroReader.endPosition);
+    Assert.assertEquals(coder, avroReader.coder);
+  }
+
+  @Test
+  public void testCreateRichAvroByteReader() throws Exception {
+    Coder<?> coder = WindowedValue.getValueOnlyCoder(BigEndianIntegerCoder.of());
+    Reader<?> reader = runTestCreateAvroReader(pathToAvroFile, 200L, 500L, coder.asCloudObject());
+
+    Assert.assertThat(reader, new IsInstanceOf(AvroByteReader.class));
+    AvroByteReader avroReader = (AvroByteReader) reader;
+    Assert.assertEquals(pathToAvroFile, avroReader.avroReader.filename);
+    Assert.assertEquals(200L, (long) avroReader.avroReader.startPosition);
+    Assert.assertEquals(500L, (long) avroReader.avroReader.endPosition);
+    Assert.assertEquals(coder, avroReader.coder);
+  }
+
+  @Test
+  public void testCreateRichAvroReader() throws Exception {
+    WindowedValue.WindowedValueCoder<?> coder =
+        WindowedValue.getValueOnlyCoder(AvroCoder.of(Integer.class));
+    Reader<?> reader = runTestCreateAvroReader(pathToAvroFile, 200L, 500L, coder.asCloudObject());
+
+    Assert.assertThat(reader, new IsInstanceOf(AvroReader.class));
+    AvroReader avroReader = (AvroReader) reader;
+    Assert.assertEquals(pathToAvroFile, avroReader.filename);
+    Assert.assertEquals(200L, (long) avroReader.startPosition);
+    Assert.assertEquals(500L, (long) avroReader.endPosition);
+    Assert.assertEquals(coder.getValueCoder(), avroReader.avroCoder);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
similarity index 73%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
index 4855ef92e4d96..4e5a4a71e4c6b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
@@ -24,11 +24,10 @@
 import com.google.cloud.dataflow.sdk.util.MimeTypes;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.apache.avro.file.DataFileWriter;
 import org.apache.avro.io.DatumWriter;
-
 import org.junit.Assert;
 import org.junit.Rule;
 import org.junit.Test;
@@ -47,23 +46,21 @@
 import javax.annotation.Nullable;
 
 /**
- * Tests for AvroSource.
+ * Tests for AvroReader.
  */
 @RunWith(JUnit4.class)
-public class AvroSourceTest {
+public class AvroReaderTest {
   @Rule
   public TemporaryFolder tmpFolder = new TemporaryFolder();
 
-  private <T> void runTestRead(List<List<T>> elemsList,
-                               AvroCoder<T> coder,
-                               boolean requireExactMatch)
-      throws Exception {
+  private <T> void runTestRead(
+      List<List<T>> elemsList, AvroCoder<T> coder, boolean requireExactMatch) throws Exception {
     File tmpFile = tmpFolder.newFile("file.avro");
     String filename = tmpFile.getPath();
 
     // Write the data.
-    OutputStream outStream = Channels.newOutputStream(
-        IOChannelUtils.create(filename, MimeTypes.BINARY));
+    OutputStream outStream =
+        Channels.newOutputStream(IOChannelUtils.create(filename, MimeTypes.BINARY));
     DatumWriter<T> datumWriter = coder.createDatumWriter();
     DataFileWriter<T> fileWriter = new DataFileWriter<>(datumWriter);
     fileWriter.create(coder.getSchema(), outStream);
@@ -122,19 +119,15 @@ private <T> void runTestRead(List<List<T>> elemsList,
     Assert.assertEquals(expectedSizes, actualSizes);
   }
 
-  private <T> List<T> readElems(String filename,
-                                @Nullable Long startOffset,
-                                @Nullable Long endOffset,
-                                Coder<T> coder,
-                                List<Integer> actualSizes)
-      throws Exception {
-    AvroSource<T> avroSource =
-        new AvroSource<>(filename, startOffset, endOffset, WindowedValue.getValueOnlyCoder(coder));
-    ExecutorTestUtils.TestSourceObserver observer =
-        new ExecutorTestUtils.TestSourceObserver(avroSource, actualSizes);
+  private <T> List<T> readElems(String filename, @Nullable Long startOffset,
+      @Nullable Long endOffset, Coder<T> coder, List<Integer> actualSizes) throws Exception {
+    AvroReader<T> avroReader =
+        new AvroReader<>(filename, startOffset, endOffset, WindowedValue.getValueOnlyCoder(coder));
+    ExecutorTestUtils.TestReaderObserver observer =
+        new ExecutorTestUtils.TestReaderObserver(avroReader, actualSizes);
 
     List<T> actualElems = new ArrayList<>();
-    try (Source.SourceIterator<WindowedValue<T>> iterator = avroSource.iterator()) {
+    try (Reader.ReaderIterator<WindowedValue<T>> iterator = avroReader.iterator()) {
       while (iterator.hasNext()) {
         actualElems.add(iterator.next().getValue());
       }
@@ -144,21 +137,18 @@ private <T> List<T> readElems(String filename,
 
   @Test
   public void testRead() throws Exception {
-    runTestRead(Collections.singletonList(TestUtils.INTS),
-                AvroCoder.of(Integer.class),
-                true /* require exact match */);
+    runTestRead(Collections.singletonList(TestUtils.INTS), AvroCoder.of(Integer.class),
+        true/* require exact match */);
   }
 
   @Test
   public void testReadEmpty() throws Exception {
-    runTestRead(Collections.singletonList(TestUtils.NO_INTS),
-                AvroCoder.of(Integer.class),
-                true /* require exact match */);
+    runTestRead(Collections.singletonList(TestUtils.NO_INTS), AvroCoder.of(Integer.class),
+        true/* require exact match */);
   }
 
-  private List<List<String>> generateInputBlocks(int numBlocks,
-                                                 int blockSizeBytes,
-                                                 int averageLineSizeBytes) {
+  private List<List<String>> generateInputBlocks(
+      int numBlocks, int blockSizeBytes, int averageLineSizeBytes) {
     Random random = new Random(0);
     List<List<String>> blocks = new ArrayList<>(numBlocks);
     for (int blockNum = 0; blockNum < numBlocks; blockNum++) {
@@ -179,16 +169,14 @@ private List<List<String>> generateInputBlocks(int numBlocks,
 
   @Test
   public void testReadSmallRanges() throws Exception {
-    runTestRead(generateInputBlocks(3, 50, 5),
-                AvroCoder.of(String.class),
-                true /* require exact match */);
+    runTestRead(
+        generateInputBlocks(3, 50, 5), AvroCoder.of(String.class), true/* require exact match */);
   }
 
   @Test
   public void testReadBigRanges() throws Exception {
-    runTestRead(generateInputBlocks(10, 128 * 1024, 100),
-                AvroCoder.of(String.class),
-                false /* don't require exact match */);
+    runTestRead(generateInputBlocks(10, 128 * 1024, 100), AvroCoder.of(String.class),
+        false/* don't require exact match */);
   }
 
   // TODO: sharded filenames
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
index 5f22d2774f4be..f2199e6781da5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
@@ -63,16 +63,14 @@ <T> void runTestWriteFile(List<T> elems, AvroCoder<T> coder) throws Exception {
 
     // Read back the file.
 
-    SeekableByteChannel inChannel = (SeekableByteChannel)
-        IOChannelUtils.getFactory(filename).open(filename);
+    SeekableByteChannel inChannel =
+        (SeekableByteChannel) IOChannelUtils.getFactory(filename).open(filename);
 
-    SeekableInput seekableInput =
-        new AvroSource.SeekableByteChannelInput(inChannel);
+    SeekableInput seekableInput = new AvroReader.SeekableByteChannelInput(inChannel);
 
     DatumReader<T> datumReader = new GenericDatumReader<>(coder.getSchema());
 
-    DataFileReader<T> fileReader = new DataFileReader<>(
-        seekableInput, datumReader);
+    DataFileReader<T> fileReader = new DataFileReader<>(seekableInput, datumReader);
 
     List<T> actual = new ArrayList<>();
     List<Long> expectedSizes = new ArrayList<>();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactoryTest.java
deleted file mode 100644
index 1db06de456057..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSourceFactoryTest.java
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (C) 2014 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
-
-import org.hamcrest.core.IsInstanceOf;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import javax.annotation.Nullable;
-
-/**
- * Tests for AvroSourceFactory.
- */
-@RunWith(JUnit4.class)
-@SuppressWarnings("rawtypes")
-public class AvroSourceFactoryTest {
-  private final String pathToAvroFile = "/path/to/file.avro";
-
-  Source<?> runTestCreateAvroSource(String filename,
-                               @Nullable Long start,
-                               @Nullable Long end,
-                               CloudObject encoding)
-      throws Exception {
-    CloudObject spec = CloudObject.forClassName("AvroSource");
-    addString(spec, "filename", filename);
-    if (start != null) {
-      addLong(spec, "start_offset", start);
-    }
-    if (end != null) {
-      addLong(spec, "end_offset", end);
-    }
-
-    com.google.api.services.dataflow.model.Source cloudSource =
-        new com.google.api.services.dataflow.model.Source();
-    cloudSource.setSpec(spec);
-    cloudSource.setCodec(encoding);
-
-    Source<?> source = SourceFactory.create(PipelineOptionsFactory.create(),
-                                            cloudSource,
-                                            new BatchModeExecutionContext());
-    return source;
-  }
-
-  @Test
-  public void testCreatePlainAvroByteSource() throws Exception {
-    Coder<?> coder =
-        WindowedValue.getValueOnlyCoder(BigEndianIntegerCoder.of());
-    Source<?> source = runTestCreateAvroSource(
-        pathToAvroFile, null, null, coder.asCloudObject());
-
-    Assert.assertThat(source, new IsInstanceOf(AvroByteSource.class));
-    AvroByteSource avroSource = (AvroByteSource) source;
-    Assert.assertEquals(pathToAvroFile, avroSource.avroSource.filename);
-    Assert.assertEquals(null, avroSource.avroSource.startPosition);
-    Assert.assertEquals(null, avroSource.avroSource.endPosition);
-    Assert.assertEquals(coder, avroSource.coder);
-  }
-
-  @Test
-  public void testCreateRichAvroByteSource() throws Exception {
-    Coder<?> coder =
-        WindowedValue.getValueOnlyCoder(BigEndianIntegerCoder.of());
-    Source<?> source = runTestCreateAvroSource(
-        pathToAvroFile, 200L, 500L, coder.asCloudObject());
-
-    Assert.assertThat(source, new IsInstanceOf(AvroByteSource.class));
-    AvroByteSource avroSource = (AvroByteSource) source;
-    Assert.assertEquals(pathToAvroFile, avroSource.avroSource.filename);
-    Assert.assertEquals(200L, (long) avroSource.avroSource.startPosition);
-    Assert.assertEquals(500L, (long) avroSource.avroSource.endPosition);
-    Assert.assertEquals(coder, avroSource.coder);
-  }
-
-  @Test
-  public void testCreateRichAvroSource() throws Exception {
-    WindowedValue.WindowedValueCoder<?> coder =
-        WindowedValue.getValueOnlyCoder(AvroCoder.of(Integer.class));
-    Source<?> source = runTestCreateAvroSource(
-        pathToAvroFile, 200L, 500L, coder.asCloudObject());
-
-    Assert.assertThat(source, new IsInstanceOf(AvroSource.class));
-    AvroSource avroSource = (AvroSource) source;
-    Assert.assertEquals(pathToAvroFile, avroSource.filename);
-    Assert.assertEquals(200L, (long) avroSource.startPosition);
-    Assert.assertEquals(500L, (long) avroSource.endPosition);
-    Assert.assertEquals(coder.getValueCoder(), avroSource.avroCoder);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySourceFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
similarity index 53%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySourceFactoryTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
index 0eb95c70205ca..f8a87c85d2c60 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySourceFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
@@ -19,10 +19,11 @@
 import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
 import static com.google.cloud.dataflow.sdk.util.Structs.addString;
 
+import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.hamcrest.core.IsInstanceOf;
 import org.junit.Assert;
@@ -31,48 +32,41 @@
 import org.junit.runners.JUnit4;
 
 /**
- * Tests for BigQuerySourceFactory.
+ * Tests for BigQueryReaderFactory.
  */
 @RunWith(JUnit4.class)
-public class BigQuerySourceFactoryTest {
-  void runTestCreateBigQuerySource(String project,
-                                   String dataset,
-                                   String table,
-                                   CloudObject encoding)
-      throws Exception {
+public class BigQueryReaderFactoryTest {
+  void runTestCreateBigQueryReader(
+      String project, String dataset, String table, CloudObject encoding) throws Exception {
     CloudObject spec = CloudObject.forClassName("BigQuerySource");
     addString(spec, "project", project);
     addString(spec, "dataset", dataset);
     addString(spec, "table", table);
 
-    com.google.api.services.dataflow.model.Source cloudSource =
-        new com.google.api.services.dataflow.model.Source();
+    Source cloudSource = new Source();
     cloudSource.setSpec(spec);
     cloudSource.setCodec(encoding);
 
-    Source<?> source = SourceFactory.create(PipelineOptionsFactory.create(),
-                                            cloudSource,
-                                            new BatchModeExecutionContext());
-    Assert.assertThat(source, new IsInstanceOf(BigQuerySource.class));
-    BigQuerySource bigQuerySource = (BigQuerySource) source;
-    Assert.assertEquals(project, bigQuerySource.tableRef.getProjectId());
-    Assert.assertEquals(dataset, bigQuerySource.tableRef.getDatasetId());
-    Assert.assertEquals(table, bigQuerySource.tableRef.getTableId());
+    Reader<?> reader = ReaderFactory.create(
+        PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext());
+    Assert.assertThat(reader, new IsInstanceOf(BigQueryReader.class));
+    BigQueryReader bigQueryReader = (BigQueryReader) reader;
+    Assert.assertEquals(project, bigQueryReader.tableRef.getProjectId());
+    Assert.assertEquals(dataset, bigQueryReader.tableRef.getDatasetId());
+    Assert.assertEquals(table, bigQueryReader.tableRef.getTableId());
   }
 
   @Test
-  public void testCreateBigQuerySource() throws Exception {
-    runTestCreateBigQuerySource(
-        "someproject", "somedataset", "sometable",
-        makeCloudEncoding("TableRowJsonCoder"));
+  public void testCreateBigQueryReader() throws Exception {
+    runTestCreateBigQueryReader(
+        "someproject", "somedataset", "sometable", makeCloudEncoding("TableRowJsonCoder"));
   }
 
   @Test
-  public void testCreateBigQuerySourceCoderIgnored() throws Exception {
+  public void testCreateBigQueryReaderCoderIgnored() throws Exception {
     // BigQuery sources do not need a coder because the TableRow objects are read directly from
     // the table using the BigQuery API.
-    runTestCreateBigQuerySource(
-        "someproject", "somedataset", "sometable",
-        makeCloudEncoding("BigEndianIntegerCoder"));
+    runTestCreateBigQueryReader(
+        "someproject", "somedataset", "sometable", makeCloudEncoding("BigEndianIntegerCoder"));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
similarity index 69%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySourceTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
index 2ed4635e8c10b..18248ef3183ad 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQuerySourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
@@ -32,6 +32,7 @@
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.junit.After;
 import org.junit.Assert;
@@ -48,20 +49,24 @@
 import java.util.List;
 
 /**
- * Tests for BigQuerySource.
+ * Tests for BigQueryReader.
  *
  * <p>The tests just make sure a basic scenario of reading works because the class itself is a
  * thin wrapper over {@code BigQueryTableRowIterator}. The tests for the wrapped class have
  * comprehensive coverage.
  */
 @RunWith(JUnit4.class)
-public class BigQuerySourceTest {
-
-  @Mock private Bigquery mockClient;
-  @Mock private Bigquery.Tables mockTables;
-  @Mock private Bigquery.Tables.Get mockTablesGet;
-  @Mock private Bigquery.Tabledata mockTabledata;
-  @Mock private Bigquery.Tabledata.List mockTabledataList;
+public class BigQueryReaderTest {
+  @Mock
+  private Bigquery mockClient;
+  @Mock
+  private Bigquery.Tables mockTables;
+  @Mock
+  private Bigquery.Tables.Get mockTablesGet;
+  @Mock
+  private Bigquery.Tabledata mockTabledata;
+  @Mock
+  private Bigquery.Tabledata.List mockTabledataList;
 
   @Before
   public void setUp() {
@@ -78,12 +83,9 @@ public void tearDown() {
   }
 
   private void onTableGet(Table table) throws IOException {
-    when(mockClient.tables())
-        .thenReturn(mockTables);
-    when(mockTables.get(anyString(), anyString(), anyString()))
-        .thenReturn(mockTablesGet);
-    when(mockTablesGet.execute())
-        .thenReturn(table);
+    when(mockClient.tables()).thenReturn(mockTables);
+    when(mockTables.get(anyString(), anyString(), anyString())).thenReturn(mockTablesGet);
+    when(mockTablesGet.execute()).thenReturn(table);
   }
 
   private void verifyTableGet() throws IOException {
@@ -93,12 +95,9 @@ private void verifyTableGet() throws IOException {
   }
 
   private void onTableList(TableDataList result) throws IOException {
-    when(mockClient.tabledata())
-        .thenReturn(mockTabledata);
-    when(mockTabledata.list(anyString(), anyString(), anyString()))
-        .thenReturn(mockTabledataList);
-    when(mockTabledataList.execute())
-        .thenReturn(result);
+    when(mockClient.tabledata()).thenReturn(mockTabledata);
+    when(mockTabledata.list(anyString(), anyString(), anyString())).thenReturn(mockTabledataList);
+    when(mockTabledataList.execute()).thenReturn(result);
   }
 
   private void verifyTabledataList() throws IOException {
@@ -110,25 +109,14 @@ private void verifyTabledataList() throws IOException {
   }
 
   private Table basicTableSchema() {
-    return new Table()
-        .setSchema(new TableSchema()
-            .setFields(Arrays.asList(
-                new TableFieldSchema()
-                    .setName("name")
-                    .setType("STRING"),
-                new TableFieldSchema()
-                    .setName("integer")
-                    .setType("INTEGER"),
-                new TableFieldSchema()
-                    .setName("float")
-                    .setType("FLOAT"),
-                new TableFieldSchema()
-                    .setName("bool")
-                    .setType("BOOLEAN")
-            )));
+    return new Table().setSchema(new TableSchema().setFields(Arrays.asList(
+        new TableFieldSchema().setName("name").setType("STRING"),
+        new TableFieldSchema().setName("integer").setType("INTEGER"),
+        new TableFieldSchema().setName("float").setType("FLOAT"),
+        new TableFieldSchema().setName("bool").setType("BOOLEAN"))));
   }
 
-  private TableRow rawRow(Object...args) {
+  private TableRow rawRow(Object... args) {
     List<TableCell> cells = new LinkedList<>();
     for (Object a : args) {
       cells.add(new TableCell().setV(a));
@@ -136,9 +124,8 @@ private TableRow rawRow(Object...args) {
     return new TableRow().setF(cells);
   }
 
-  private TableDataList rawDataList(TableRow...rows) {
-    return new TableDataList()
-        .setRows(Arrays.asList(rows));
+  private TableDataList rawDataList(TableRow... rows) {
+    return new TableDataList().setRows(Arrays.asList(rows));
   }
 
   @Test
@@ -147,19 +134,14 @@ public void testRead() throws IOException {
 
     // BQ API data is always encoded as a string
     TableDataList dataList = rawDataList(
-        rawRow("Arthur", "42", "3.14159", "false"),
-        rawRow("Allison", "79", "2.71828", "true")
-    );
+        rawRow("Arthur", "42", "3.14159", "false"), rawRow("Allison", "79", "2.71828", "true"));
     onTableList(dataList);
 
-    BigQuerySource source = new BigQuerySource(
+    BigQueryReader reader = new BigQueryReader(
         mockClient,
-        new TableReference()
-            .setProjectId("project")
-            .setDatasetId("dataset")
-            .setTableId("table"));
+        new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table"));
 
-    BigQuerySource.SourceIterator<TableRow> iterator = source.iterator();
+    Reader.ReaderIterator<TableRow> iterator = reader.iterator();
     Assert.assertTrue(iterator.hasNext());
     TableRow row = iterator.next();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
index 952a51bbce9c6..081e0b8c14ed9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -16,8 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToSourcePosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
 import static com.google.cloud.dataflow.sdk.util.CloudCounterUtils.extractCounter;
@@ -52,7 +52,7 @@
 import com.google.cloud.dataflow.sdk.util.common.Metric.DoubleMetric;
 import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
 import com.google.cloud.dataflow.sdk.util.common.worker.Operation;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 
 import org.hamcrest.Description;
@@ -83,24 +83,23 @@ static class TestMapTaskExecutor extends MapTaskExecutor {
     ApproximateProgress progress = null;
 
     public TestMapTaskExecutor(CounterSet counters) {
-      super(new ArrayList<Operation>(),
-            counters,
-            new StateSampler("test", counters.getAddCounterMutator()));
+      super(new ArrayList<Operation>(), counters,
+          new StateSampler("test", counters.getAddCounterMutator()));
     }
 
     @Override
-    public Source.Progress getWorkerProgress() {
-      return cloudProgressToSourceProgress(progress);
+    public Reader.Progress getWorkerProgress() {
+      return cloudProgressToReaderProgress(progress);
     }
 
     @Override
-    public Source.Position proposeStopPosition(
-        Source.Progress suggestedStopPoint) {
-      @Nullable ApproximateProgress progress = sourceProgressToCloudProgress(suggestedStopPoint);
+    public Reader.Position proposeStopPosition(Reader.Progress suggestedStopPoint) {
+      @Nullable
+      ApproximateProgress progress = sourceProgressToCloudProgress(suggestedStopPoint);
       if (progress == null) {
         return null;
       }
-      return cloudPositionToSourcePosition(progress.getPosition());
+      return cloudPositionToReaderPosition(progress.getPosition());
     }
 
     public void setWorkerProgress(ApproximateProgress progress) {
@@ -124,8 +123,10 @@ public void setWorkerProgress(ApproximateProgress progress) {
   private static final Double COUNTER_VALUE2 = Math.PI;
   private static final String COUNTER_VALUE3 = "value";
 
-  @Rule public final ExpectedException thrown = ExpectedException.none();
-  @Mock private DataflowWorker.WorkUnitClient workUnitClient;
+  @Rule
+  public final ExpectedException thrown = ExpectedException.none();
+  @Mock
+  private DataflowWorker.WorkUnitClient workUnitClient;
   private CounterSet counters;
   private List<Metric<?>> metrics;
   private TestMapTaskExecutor worker;
@@ -160,8 +161,7 @@ public Collection<Metric<?>> getOutputMetrics() {
     workItem.setLeaseExpireTime(toCloudTime(new Instant(nowMillis + 1000)));
     workItem.setReportStatusInterval(toCloudDuration(Duration.millis(500)));
 
-    progressUpdater = new DataflowWorkProgressUpdater(
-        workItem, worker, workUnitClient, options);
+    progressUpdater = new DataflowWorkProgressUpdater(workItem, worker, workUnitClient, options);
   }
 
   // TODO: Remove sleeps from this test by using a mock sleeper.  This
@@ -169,18 +169,17 @@ public Collection<Metric<?>> getOutputMetrics() {
   // not use a ScheduledThreadExecutor which relies on real time passing.
   @Test(timeout = 2000)
   public void workProgressUpdaterUpdates() throws Exception {
-    when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class))).thenReturn(
-        generateServiceState(nowMillis + 2000, 1000, null));
+    when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
+        .thenReturn(generateServiceState(nowMillis + 2000, 1000, null));
     setUpCounters(2);
     setUpMetrics(3);
     setUpProgress(makeRecordIndexProgress(1L));
     progressUpdater.startReportingProgress();
     // The initial update should be sent after leaseRemainingTime / 2.
-    verify(workUnitClient, timeout(600)).reportWorkItemStatus(argThat(
-        new ExpectedDataflowProgress()
-        .withCounters(2)
-        .withMetrics(3)
-        .withProgress(makeRecordIndexProgress(1L))));
+    verify(workUnitClient, timeout(600))
+        .reportWorkItemStatus(
+            argThat(new ExpectedDataflowProgress().withCounters(2).withMetrics(3).withProgress(
+                makeRecordIndexProgress(1L))));
     progressUpdater.stopReportingProgress();
   }
 
@@ -192,8 +191,7 @@ public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
     // us to truncate the task at index 3, and the next two will not ask us to
     // truncate at all.
     when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
-        .thenReturn(generateServiceState(nowMillis + 2000, 1000,
-                                         makeRecordIndexPosition(3L)))
+        .thenReturn(generateServiceState(nowMillis + 2000, 1000, makeRecordIndexPosition(3L)))
         .thenReturn(generateServiceState(nowMillis + 3000, 2000, null))
         .thenReturn(generateServiceState(nowMillis + 4000, 3000, null));
 
@@ -203,22 +201,22 @@ public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
     progressUpdater.startReportingProgress();
     // The initial update should be sent after
     // leaseRemainingTime (1000) / 2 = 500.
-    verify(workUnitClient, timeout(600)).reportWorkItemStatus(argThat(
-        new ExpectedDataflowProgress()
-        .withCounters(3)
-        .withMetrics(2)
-        .withProgress(makeRecordIndexProgress(1L))));
+    verify(workUnitClient, timeout(600))
+        .reportWorkItemStatus(
+            argThat(new ExpectedDataflowProgress().withCounters(3).withMetrics(2).withProgress(
+                makeRecordIndexProgress(1L))));
 
     setUpCounters(5);
     setUpMetrics(6);
     setUpProgress(makeRecordIndexProgress(2L));
     // The second update should be sent after one second (2000 / 2).
-    verify(workUnitClient, timeout(1100)).reportWorkItemStatus(argThat(
-        new ExpectedDataflowProgress()
-        .withCounters(5)
-        .withMetrics(6)
-        .withProgress(makeRecordIndexProgress(2L))
-        .withStopPosition(makeRecordIndexPosition(3L))));
+    verify(workUnitClient, timeout(1100))
+        .reportWorkItemStatus(argThat(
+            new ExpectedDataflowProgress()
+                .withCounters(5)
+                .withMetrics(6)
+                .withProgress(makeRecordIndexProgress(2L))
+                .withStopPosition(makeRecordIndexPosition(3L))));
 
     // After the request is sent, reset stop position cache to null.
     assertNull(progressUpdater.getStopPosition());
@@ -226,9 +224,9 @@ public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
     setUpProgress(makeRecordIndexProgress(3L));
 
     // The third update should be sent after one and half seconds (3000 / 2).
-    verify(workUnitClient, timeout(1600)).reportWorkItemStatus(argThat(
-        new ExpectedDataflowProgress()
-        .withProgress(makeRecordIndexProgress(3L))));
+    verify(workUnitClient, timeout(1600))
+        .reportWorkItemStatus(
+            argThat(new ExpectedDataflowProgress().withProgress(makeRecordIndexProgress(3L))));
 
     progressUpdater.stopReportingProgress();
   }
@@ -237,29 +235,30 @@ public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
   @Test(timeout = 3000)
   public void workProgressUpdaterLastUpdate() throws Exception {
     when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
-        .thenReturn(generateServiceState(nowMillis + 2000, 1000,
-                                         makeRecordIndexPosition(2L)))
+        .thenReturn(generateServiceState(nowMillis + 2000, 1000, makeRecordIndexPosition(2L)))
         .thenReturn(generateServiceState(nowMillis + 3000, 2000, null));
 
     setUpProgress(makeRecordIndexProgress(1L));
     progressUpdater.startReportingProgress();
     // The initial update should be sent after leaseRemainingTime / 2 = 500 msec.
     Thread.sleep(600);
-    verify(workUnitClient, timeout(200)).reportWorkItemStatus(argThat(
-        new ExpectedDataflowProgress()
-        .withProgress(makeRecordIndexProgress(1L))));
+    verify(workUnitClient, timeout(200))
+        .reportWorkItemStatus(
+            argThat(new ExpectedDataflowProgress().withProgress(makeRecordIndexProgress(1L))));
 
     // The first update should include the new actual stop position.
     // Verify that the progressUpdater has recorded it.
-    assertEquals(makeRecordIndexPosition(2L),
+    assertEquals(
+        makeRecordIndexPosition(2L),
         sourcePositionToCloudPosition(progressUpdater.getStopPosition()));
 
     setUpProgress(makeRecordIndexProgress(2L));
     // The second update should be sent after one second (2000 / 2).
-    Thread.sleep(200);  // not enough time for an update so the latest stop position is not
-                        // acknowledged.
+    Thread.sleep(200); // not enough time for an update so the latest stop position is not
+    // acknowledged.
     // Check that the progressUpdater still has a pending stop position to send
-    assertEquals(makeRecordIndexPosition(2L),
+    assertEquals(
+        makeRecordIndexPosition(2L),
         sourcePositionToCloudPosition(progressUpdater.getStopPosition()));
 
     progressUpdater.stopReportingProgress(); // should send the last update
@@ -267,9 +266,9 @@ public void workProgressUpdaterLastUpdate() throws Exception {
     assertNull(progressUpdater.getStopPosition());
 
     // Verify that the last update contained the latest stop position
-    verify(workUnitClient, timeout(1000)).reportWorkItemStatus(argThat(
-        new ExpectedDataflowProgress()
-        .withStopPosition(makeRecordIndexPosition(2L))));
+    verify(workUnitClient, timeout(1000))
+        .reportWorkItemStatus(
+            argThat(new ExpectedDataflowProgress().withStopPosition(makeRecordIndexPosition(2L))));
   }
 
   private void setUpCounters(int n) {
@@ -282,13 +281,16 @@ private void setUpCounters(int n) {
   private static Counter<?> makeCounter(int i) {
     if (i % 3 == 0) {
       return Counter.longs(COUNTER_NAME + i, COUNTER_KINDS[0])
-          .addValue(COUNTER_VALUE1 + i).addValue(COUNTER_VALUE1 + i * 2);
+          .addValue(COUNTER_VALUE1 + i)
+          .addValue(COUNTER_VALUE1 + i * 2);
     } else if (i % 3 == 1) {
       return Counter.doubles(COUNTER_NAME + i, COUNTER_KINDS[1])
-          .addValue(COUNTER_VALUE2 + i).addValue(COUNTER_VALUE2 + i * 3);
+          .addValue(COUNTER_VALUE2 + i)
+          .addValue(COUNTER_VALUE2 + i * 3);
     } else {
       return Counter.strings(COUNTER_NAME + i, COUNTER_KINDS[2])
-          .addValue(COUNTER_VALUE3 + i).addValue(COUNTER_NAME + i * 5);
+          .addValue(COUNTER_VALUE3 + i)
+          .addValue(COUNTER_NAME + i * 5);
     }
   }
 
@@ -318,10 +320,8 @@ private ApproximateProgress makeRecordIndexProgress(Long index) {
     return new ApproximateProgress().setPosition(makeRecordIndexPosition(index));
   }
 
-  private WorkItemServiceState generateServiceState(
-      long leaseExpirationTimestamp, int progressReportIntervalMs,
-      Position suggestedStopPosition)
-      throws IOException {
+  private WorkItemServiceState generateServiceState(long leaseExpirationTimestamp,
+      int progressReportIntervalMs, Position suggestedStopPosition) throws IOException {
     WorkItemServiceState responseState = new WorkItemServiceState();
     responseState.setFactory(Transport.getJsonFactory());
     responseState.setLeaseExpireTime(toCloudTime(new Instant(leaseExpirationTimestamp)));
@@ -336,10 +336,14 @@ private WorkItemServiceState generateServiceState(
   }
 
   private static final class ExpectedDataflowProgress extends ArgumentMatcher<WorkItemStatus> {
-    @Nullable Integer counterCount;
-    @Nullable Integer metricCount;
-    @Nullable ApproximateProgress expectedProgress;
-    @Nullable Position expectedStopPosition;
+    @Nullable
+    Integer counterCount;
+    @Nullable
+    Integer metricCount;
+    @Nullable
+    ApproximateProgress expectedProgress;
+    @Nullable
+    Position expectedStopPosition;
 
     public ExpectedDataflowProgress withCounters(Integer counterCount) {
       this.counterCount = counterCount;
@@ -388,9 +392,7 @@ public void describeTo(Description description) {
     @Override
     public boolean matches(Object status) {
       WorkItemStatus st = (WorkItemStatus) status;
-      return matchCountersAndMetrics(st)
-          && matchProgress(st)
-          && matchStopPosition(st);
+      return matchCountersAndMetrics(st) && matchProgress(st) && matchStopPosition(st);
     }
 
     private boolean matchCountersAndMetrics(WorkItemStatus status) {
@@ -405,8 +407,7 @@ private boolean matchCountersAndMetrics(WorkItemStatus status) {
       }
 
       for (int i = 0; i < counterCount; i++) {
-        if (!sentUpdates.contains(
-            CounterTestUtils.extractCounterUpdate(makeCounter(i), false))) {
+        if (!sentUpdates.contains(CounterTestUtils.extractCounterUpdate(makeCounter(i), false))) {
           return false;
         }
       }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
similarity index 68%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
index 21ecb0af04f2c..ff3abd3dbe426 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
@@ -17,7 +17,7 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static com.google.api.client.util.Base64.encodeBase64URLSafeString;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
 
@@ -35,10 +35,9 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.Reiterable;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source.SourceIterator;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.collect.Lists;
 
@@ -58,20 +57,18 @@
 import java.util.NoSuchElementException;
 
 /**
- * Tests for GroupingShuffleSource.
+ * Tests for GroupingShuffleReader.
  */
 @RunWith(JUnit4.class)
-public class GroupingShuffleSourceTest {
+public class GroupingShuffleReaderTest {
   private static final List<KV<Integer, List<String>>> NO_KVS = Collections.emptyList();
 
   private static final Instant timestamp = new Instant(123000);
   private static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
 
   private static final List<KV<Integer, List<String>>> KVS = Arrays.asList(
-      KV.of(1, Arrays.asList("in 1a", "in 1b")),
-      KV.of(2, Arrays.asList("in 2a", "in 2b")),
-      KV.of(3, Arrays.asList("in 3")),
-      KV.of(4, Arrays.asList("in 4a", "in 4b", "in 4c", "in 4d")),
+      KV.of(1, Arrays.asList("in 1a", "in 1b")), KV.of(2, Arrays.asList("in 2a", "in 2b")),
+      KV.of(3, Arrays.asList("in 3")), KV.of(4, Arrays.asList("in 4a", "in 4b", "in 4c", "in 4d")),
       KV.of(5, Arrays.asList("in 5")));
 
   /** How many of the values with each key are to be read. */
@@ -86,8 +83,9 @@ private enum ValuesToRead {
     READ_ALL_VALUES
   }
 
-  private void runTestReadShuffleSource(List<KV<Integer, List<String>>> input,
-                                ValuesToRead valuesToRead)
+  private void runTestReadFromShuffle(
+      List<KV<Integer, List<String>>> input,
+      ValuesToRead valuesToRead)
       throws Exception {
     Coder<WindowedValue<KV<Integer, String>>> sinkElemCoder =
         WindowedValue.getFullCoder(
@@ -129,16 +127,16 @@ private void runTestReadShuffleSource(List<KV<Integer, List<String>>> input,
     Assert.assertEquals(kvCount, records.size());
     Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
 
-    // Read from shuffle with GroupingShuffleSource.
+    // Read from shuffle with GroupingShuffleReader.
     BatchModeExecutionContext context = new BatchModeExecutionContext();
-    GroupingShuffleSource<Integer, String> shuffleSource =
-        new GroupingShuffleSource<>(
+    GroupingShuffleReader<Integer, String> groupingShuffleReader =
+        new GroupingShuffleReader<>(
             PipelineOptionsFactory.create(),
             null, null, null,
             sourceElemCoder,
             context);
-    ExecutorTestUtils.TestSourceObserver observer =
-        new ExecutorTestUtils.TestSourceObserver(shuffleSource);
+    ExecutorTestUtils.TestReaderObserver observer =
+        new ExecutorTestUtils.TestReaderObserver(groupingShuffleReader);
 
     TestShuffleReader shuffleReader = new TestShuffleReader();
     List<Integer> expectedSizes = new ArrayList<>();
@@ -148,8 +146,8 @@ private void runTestReadShuffleSource(List<KV<Integer, List<String>>> input,
     }
 
     List<KV<Integer, List<String>>> actual = new ArrayList<>();
-    try (SourceIterator<WindowedValue<KV<Integer, Reiterable<String>>>> iter =
-             shuffleSource.iterator(shuffleReader)) {
+    try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<String>>>> iter =
+             groupingShuffleReader.iterator(shuffleReader)) {
       Iterable<String> prevValuesIterable = null;
       Iterator<String> prevValuesIterator = null;
       while (iter.hasNext()) {
@@ -166,10 +164,10 @@ private void runTestReadShuffleSource(List<KV<Integer, List<String>>> input,
         List<String> values = new ArrayList<>();
         if (valuesToRead.ordinal() > ValuesToRead.SKIP_VALUES.ordinal()) {
           if (prevValuesIterable != null) {
-            prevValuesIterable.iterator();  // Verifies that this does not throw.
+            prevValuesIterable.iterator(); // Verifies that this does not throw.
           }
           if (prevValuesIterator != null) {
-            prevValuesIterator.hasNext();  // Verifies that this does not throw.
+            prevValuesIterator.hasNext(); // Verifies that this does not throw.
           }
 
           Iterable<String> valuesIterable = elem.getValue();
@@ -179,8 +177,7 @@ private void runTestReadShuffleSource(List<KV<Integer, List<String>>> input,
             while (valuesIterator.hasNext()) {
               Assert.assertTrue(valuesIterator.hasNext());
               Assert.assertTrue(valuesIterator.hasNext());
-              Assert.assertEquals("BatchModeExecutionContext key",
-                                  key, context.getKey());
+              Assert.assertEquals("BatchModeExecutionContext key", key, context.getKey());
               values.add(valuesIterator.next());
               if (valuesToRead == ValuesToRead.READ_ONE_VALUE) {
                 break;
@@ -196,7 +193,7 @@ private void runTestReadShuffleSource(List<KV<Integer, List<String>>> input,
               } catch (NoSuchElementException exn) {
                 // As expected.
               }
-              valuesIterable.iterator();  // Verifies that this does not throw.
+              valuesIterable.iterator(); // Verifies that this does not throw.
             }
           }
 
@@ -235,33 +232,33 @@ private void runTestReadShuffleSource(List<KV<Integer, List<String>>> input,
   }
 
   @Test
-  public void testReadEmptyShuffleSource() throws Exception {
-    runTestReadShuffleSource(NO_KVS, ValuesToRead.READ_ALL_VALUES);
+  public void testReadEmptyShuffleData() throws Exception {
+    runTestReadFromShuffle(NO_KVS, ValuesToRead.READ_ALL_VALUES);
   }
 
   @Test
-  public void testReadEmptyShuffleSourceSkippingValues() throws Exception {
-    runTestReadShuffleSource(NO_KVS, ValuesToRead.SKIP_VALUES);
+  public void testReadEmptyShuffleDataSkippingValues() throws Exception {
+    runTestReadFromShuffle(NO_KVS, ValuesToRead.SKIP_VALUES);
   }
 
   @Test
-  public void testReadNonEmptyShuffleSource() throws Exception {
-    runTestReadShuffleSource(KVS, ValuesToRead.READ_ALL_VALUES);
+  public void testReadNonEmptyShuffleData() throws Exception {
+    runTestReadFromShuffle(KVS, ValuesToRead.READ_ALL_VALUES);
   }
 
   @Test
-  public void testReadNonEmptyShuffleSourceReadingOneValue() throws Exception {
-    runTestReadShuffleSource(KVS, ValuesToRead.READ_ONE_VALUE);
+  public void testReadNonEmptyShuffleDataReadingOneValue() throws Exception {
+    runTestReadFromShuffle(KVS, ValuesToRead.READ_ONE_VALUE);
   }
 
   @Test
-  public void testReadNonEmptyShuffleSourceReadingNoValues() throws Exception {
-    runTestReadShuffleSource(KVS, ValuesToRead.READ_NO_VALUES);
+  public void testReadNonEmptyShuffleDataReadingNoValues() throws Exception {
+    runTestReadFromShuffle(KVS, ValuesToRead.READ_NO_VALUES);
   }
 
   @Test
-  public void testReadNonEmptyShuffleSourceSkippingValues() throws Exception {
-    runTestReadShuffleSource(KVS, ValuesToRead.SKIP_VALUES);
+  public void testReadNonEmptyShuffleDataSkippingValues() throws Exception {
+    runTestReadFromShuffle(KVS, ValuesToRead.SKIP_VALUES);
   }
 
   static byte[] fabricatePosition(int shard, byte[] key) throws Exception {
@@ -275,24 +272,17 @@ static byte[] fabricatePosition(int shard, byte[] key) throws Exception {
   }
 
   @Test
-  public void testReadFromEmptyShuffleSourceAndUpdateStopPosition()
-      throws Exception {
+  public void testReadFromEmptyShuffleDataAndUpdateStopPosition() throws Exception {
     BatchModeExecutionContext context = new BatchModeExecutionContext();
-    GroupingShuffleSource<Integer, Integer> shuffleSource =
-        new GroupingShuffleSource<>(
-            PipelineOptionsFactory.create(),
-            null, null, null,
-            WindowedValue.getFullCoder(
-                KvCoder.of(
-                    BigEndianIntegerCoder.of(),
-                    IterableCoder.of(BigEndianIntegerCoder.of())),
-                IntervalWindow.getCoder()),
-            context);
+    GroupingShuffleReader<Integer, Integer> groupingShuffleReader = new GroupingShuffleReader<>(
+        PipelineOptionsFactory.create(), null, null, null,
+        WindowedValue.getFullCoder(
+            KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())),
+            IntervalWindow.getCoder()),
+        context);
     TestShuffleReader shuffleReader = new TestShuffleReader();
-    try (Source.SourceIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
-        shuffleSource.iterator(shuffleReader)) {
-
-
+    try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
+        groupingShuffleReader.iterator(shuffleReader)) {
       // Can update the stop position, the source range spans all interval
       Position proposedStopPosition = new Position();
       String stop = encodeBase64URLSafeString(fabricatePosition(0, null));
@@ -301,25 +291,23 @@ public void testReadFromEmptyShuffleSourceAndUpdateStopPosition()
       Assert.assertEquals(
           stop,
           sourcePositionToCloudPosition(
-              iter.updateStopPosition(
-                  cloudProgressToSourceProgress(
-                      createApproximateProgress(proposedStopPosition))))
-          .getShufflePosition());
+              iter.updateStopPosition(cloudProgressToReaderProgress(
+                  createApproximateProgress(proposedStopPosition)))).getShufflePosition());
 
 
       // Cannot update stop position to a position >= the current stop position
       stop = encodeBase64URLSafeString(fabricatePosition(1, null));
       proposedStopPosition.setShufflePosition(stop);
 
-      Assert.assertEquals(null, iter.updateStopPosition(
-          cloudProgressToSourceProgress(
-              createApproximateProgress(proposedStopPosition))));
+      Assert.assertEquals(
+          null,
+          iter.updateStopPosition(
+              cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))));
     }
   }
 
   @Test
-  public void testReadFromShuffleSourceAndFailToUpdateStopPosition()
-      throws Exception {
+  public void testReadFromShuffleDataAndFailToUpdateStopPosition() throws Exception {
     BatchModeExecutionContext context = new BatchModeExecutionContext();
     final int kFirstShard = 0;
 
@@ -327,37 +315,32 @@ public void testReadFromShuffleSourceAndFailToUpdateStopPosition()
     final int kNumRecords = 2;
     for (int i = 0; i < kNumRecords; ++i) {
       byte[] key = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
-      shuffleReader.addEntry(new ShuffleEntry(
-          fabricatePosition(kFirstShard, key), key, null, key));
+      shuffleReader.addEntry(new ShuffleEntry(fabricatePosition(kFirstShard, key), key, null, key));
     }
 
     // Note that TestShuffleReader start/end positions are in the
     // space of keys not the positions (TODO: should probably always
     // use positions instead).
-    String stop = encodeBase64URLSafeString(
-        fabricatePosition(kNumRecords, null));
-    GroupingShuffleSource<Integer, Integer> shuffleSource =
-        new GroupingShuffleSource<>(
-            PipelineOptionsFactory.create(),
-            null, null, stop,
-            WindowedValue.getFullCoder(
-                KvCoder.of(
-                    BigEndianIntegerCoder.of(),
-                    IterableCoder.of(BigEndianIntegerCoder.of())),
-                IntervalWindow.getCoder()),
-            context);
-
-    try (Source.SourceIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
-        shuffleSource.iterator(shuffleReader)) {
+    String stop = encodeBase64URLSafeString(fabricatePosition(kNumRecords, null));
+    GroupingShuffleReader<Integer, Integer> groupingShuffleReader = new GroupingShuffleReader<>(
+        PipelineOptionsFactory.create(), null, null, stop,
+        WindowedValue.getFullCoder(
+            KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())),
+            IntervalWindow.getCoder()),
+        context);
 
+    try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
+        groupingShuffleReader.iterator(shuffleReader)) {
       Position proposedStopPosition = new Position();
       proposedStopPosition.setShufflePosition(
           encodeBase64URLSafeString(fabricatePosition(kNumRecords + 1, null)));
 
       // Cannot update the stop position since the value provided is
       // past the current stop position.
-      Assert.assertEquals(null, iter.updateStopPosition(
-          cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
+      Assert.assertEquals(
+          null,
+          iter.updateStopPosition(
+              cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))));
 
       int i = 0;
       for (; iter.hasNext(); ++i) {
@@ -369,14 +352,18 @@ public void testReadFromShuffleSourceAndFailToUpdateStopPosition()
               encodeBase64URLSafeString(fabricatePosition(kFirstShard, key)));
           // Cannot update stop position since it is identical with
           // the position of the record that was just returned.
-          Assert.assertEquals(null, iter.updateStopPosition(
-              cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
+          Assert.assertEquals(
+              null,
+              iter.updateStopPosition(
+                  cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))));
 
           proposedStopPosition.setShufflePosition(
               encodeBase64URLSafeString(fabricatePosition(kFirstShard, null)));
           // Cannot update stop position since it comes before current position
-          Assert.assertEquals(null, iter.updateStopPosition(
-              cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
+          Assert.assertEquals(
+              null,
+              iter.updateStopPosition(
+                  cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))));
         }
       }
       Assert.assertEquals(kNumRecords, i);
@@ -384,25 +371,22 @@ public void testReadFromShuffleSourceAndFailToUpdateStopPosition()
       proposedStopPosition.setShufflePosition(
           encodeBase64URLSafeString(fabricatePosition(kFirstShard, null)));
       // Cannot update stop position since all input was consumed.
-      Assert.assertEquals(null, iter.updateStopPosition(
-          cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
+      Assert.assertEquals(
+          null,
+          iter.updateStopPosition(
+              cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))));
     }
   }
 
   @Test
-  public void testReadFromShuffleSourceAndUpdateStopPosition()
-      throws Exception {
+  public void testReadFromShuffleAndUpdateStopPosition() throws Exception {
     BatchModeExecutionContext context = new BatchModeExecutionContext();
-    GroupingShuffleSource<Integer, Integer> shuffleSource =
-        new GroupingShuffleSource<>(
-            PipelineOptionsFactory.create(),
-            null, null, null,
-            WindowedValue.getFullCoder(
-                KvCoder.of(
-                    BigEndianIntegerCoder.of(),
-                    IterableCoder.of(BigEndianIntegerCoder.of())),
-                IntervalWindow.getCoder()),
-            context);
+    GroupingShuffleReader<Integer, Integer> groupingShuffleReader = new GroupingShuffleReader<>(
+        PipelineOptionsFactory.create(), null, null, null,
+        WindowedValue.getFullCoder(
+            KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())),
+            IntervalWindow.getCoder()),
+        context);
 
     TestShuffleReader shuffleReader = new TestShuffleReader();
     final int kNumRecords = 10;
@@ -413,30 +397,27 @@ public void testReadFromShuffleSourceAndUpdateStopPosition()
     // (hence groups of values for the same key are singletons)
     // therefore each record comes with a unique position constructed.
     for (int i = 0; i < kNumRecords; ++i) {
-      byte[] keyByte = CoderUtils.encodeToByteArray(
-          BigEndianIntegerCoder.of(), i);
-      ShuffleEntry entry = new ShuffleEntry(
-          fabricatePosition(kFirstShard, keyByte), keyByte, null, keyByte);
+      byte[] keyByte = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
+      ShuffleEntry entry =
+          new ShuffleEntry(fabricatePosition(kFirstShard, keyByte), keyByte, null, keyByte);
       shuffleReader.addEntry(entry);
     }
 
     for (int i = kNumRecords; i < 2 * kNumRecords; ++i) {
-      byte[] keyByte = CoderUtils.encodeToByteArray(
-          BigEndianIntegerCoder.of(), i);
+      byte[] keyByte = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
 
-      ShuffleEntry entry = new ShuffleEntry(
-          fabricatePosition(kSecondShard, keyByte), keyByte, null, keyByte);
+      ShuffleEntry entry =
+          new ShuffleEntry(fabricatePosition(kSecondShard, keyByte), keyByte, null, keyByte);
       shuffleReader.addEntry(entry);
     }
 
     int i = 0;
-    try (Source.SourceIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
-        shuffleSource.iterator(shuffleReader)) {
-
+    try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
+        groupingShuffleReader.iterator(shuffleReader)) {
       Position proposedStopPosition = new Position();
 
       Assert.assertNull(iter.updateStopPosition(
-          cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
+          cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))));
 
       // Stop at the shard boundary
       String stop = encodeBase64URLSafeString(fabricatePosition(kSecondShard, null));
@@ -445,9 +426,8 @@ public void testReadFromShuffleSourceAndUpdateStopPosition()
       Assert.assertEquals(
           stop,
           sourcePositionToCloudPosition(
-              iter.updateStopPosition(
-                  cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))))
-          .getShufflePosition());
+              iter.updateStopPosition(cloudProgressToReaderProgress(
+                  createApproximateProgress(proposedStopPosition)))).getShufflePosition());
 
       while (iter.hasNext()) {
         Assert.assertTrue(iter.hasNext());
@@ -482,16 +462,12 @@ public void testGetApproximateProgress() throws Exception {
     List<byte[]> positionsList = new ArrayList<byte[]>();
 
     BatchModeExecutionContext context = new BatchModeExecutionContext();
-    GroupingShuffleSource<Integer, Integer> shuffleSource =
-        new GroupingShuffleSource<>(
-            PipelineOptionsFactory.create(),
-            null, null, null,
-            WindowedValue.getFullCoder(
-                KvCoder.of(
-                    BigEndianIntegerCoder.of(),
-                    IterableCoder.of(BigEndianIntegerCoder.of())),
-                IntervalWindow.getCoder()),
-            context);
+    GroupingShuffleReader<Integer, Integer> groupingShuffleReader = new GroupingShuffleReader<>(
+        PipelineOptionsFactory.create(), null, null, null,
+        WindowedValue.getFullCoder(
+            KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())),
+            IntervalWindow.getCoder()),
+        context);
 
     TestShuffleReader shuffleReader = new TestShuffleReader();
     final int kNumRecords = 10;
@@ -504,30 +480,33 @@ public void testGetApproximateProgress() throws Exception {
       shuffleReader.addEntry(entry);
     }
 
-    try (Source.SourceIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> sourceIterator =
-        shuffleSource.iterator(shuffleReader)) {
+    try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> readerIterator =
+        groupingShuffleReader.iterator(shuffleReader)) {
       Integer i = 0;
-      while (sourceIterator.hasNext()) {
-        Assert.assertTrue(sourceIterator.hasNext());
-        ApproximateProgress progress = sourceProgressToCloudProgress(sourceIterator.getProgress());
+      while (readerIterator.hasNext()) {
+        Assert.assertTrue(readerIterator.hasNext());
+        ApproximateProgress progress = sourceProgressToCloudProgress(readerIterator.getProgress());
         Assert.assertNotNull(progress.getPosition().getShufflePosition());
 
         // Compare returned position with the expected position.
-        Assert.assertEquals(ByteArrayShufflePosition.of(positionsList.get(i)).encodeBase64(),
+        Assert.assertEquals(
+            ByteArrayShufflePosition.of(positionsList.get(i)).encodeBase64(),
             progress.getPosition().getShufflePosition());
 
-        WindowedValue<KV<Integer, Reiterable<Integer>>> elem = sourceIterator.next();
+        WindowedValue<KV<Integer, Reiterable<Integer>>> elem = readerIterator.next();
         Assert.assertEquals(i, elem.getValue().getKey());
         i++;
       }
-      Assert.assertFalse(sourceIterator.hasNext());
+      Assert.assertFalse(readerIterator.hasNext());
 
       // Cannot update stop position since all input was consumed.
       Position proposedStopPosition = new Position();
       String stop = encodeBase64URLSafeString(fabricatePosition(0, null));
       proposedStopPosition.setShufflePosition(stop);
-      Assert.assertEquals(null, sourceIterator.updateStopPosition(
-          cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
+      Assert.assertEquals(
+          null,
+          readerIterator.updateStopPosition(
+              cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))));
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
similarity index 50%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceFactoryTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
index 82d2c82e99761..c10792d6cc37a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
@@ -16,10 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import static com.google.cloud.dataflow.sdk.runners.worker.InMemorySourceTest.encodedElements;
+import static com.google.cloud.dataflow.sdk.runners.worker.InMemoryReaderTest.encodedElements;
 import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
 import static com.google.cloud.dataflow.sdk.util.Structs.addStringList;
 
+import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
@@ -27,7 +28,7 @@
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.hamcrest.core.IsInstanceOf;
 import org.junit.Assert;
@@ -39,16 +40,12 @@
 import java.util.List;
 
 /**
- * Tests for InMemorySourceFactory.
+ * Tests for InMemoryReaderFactory.
  */
 @RunWith(JUnit4.class)
-public class InMemorySourceFactoryTest {
-  static <T> com.google.api.services.dataflow.model.Source createInMemoryCloudSource(
-      List<T> elements,
-      Long start,
-      Long end,
-      Coder<T> coder)
-      throws Exception {
+public class InMemoryReaderFactoryTest {
+  static <T> Source createInMemoryCloudSource(
+      List<T> elements, Long start, Long end, Coder<T> coder) throws Exception {
     List<String> encodedElements = encodedElements(elements, coder);
 
     CloudObject spec = CloudObject.forClassName("InMemorySource");
@@ -61,50 +58,36 @@ static <T> com.google.api.services.dataflow.model.Source createInMemoryCloudSour
       addLong(spec, PropertyNames.END_INDEX, end);
     }
 
-    com.google.api.services.dataflow.model.Source cloudSource =
-        new com.google.api.services.dataflow.model.Source();
+    Source cloudSource = new Source();
     cloudSource.setSpec(spec);
     cloudSource.setCodec(coder.asCloudObject());
 
     return cloudSource;
   }
 
-  <T> void runTestCreateInMemorySource(List<T> elements,
-                                       Long start,
-                                       Long end,
-                                       int expectedStart,
-                                       int expectedEnd,
-                                       Coder<T> coder)
-      throws Exception {
-    com.google.api.services.dataflow.model.Source cloudSource =
-        createInMemoryCloudSource(elements, start, end, coder);
+  <T> void runTestCreateInMemoryReader(List<T> elements, Long start, Long end, int expectedStart,
+      int expectedEnd, Coder<T> coder) throws Exception {
+    Source cloudSource = createInMemoryCloudSource(elements, start, end, coder);
 
-    Source<?> source = SourceFactory.create(PipelineOptionsFactory.create(), cloudSource,
-                                            new BatchModeExecutionContext());
-    Assert.assertThat(source, new IsInstanceOf(InMemorySource.class));
-    InMemorySource<?> inMemorySource = (InMemorySource<?>) source;
-    Assert.assertEquals(encodedElements(elements, coder),
-                        inMemorySource.encodedElements);
-    Assert.assertEquals(expectedStart, inMemorySource.startIndex);
-    Assert.assertEquals(expectedEnd, inMemorySource.endIndex);
-    Assert.assertEquals(coder, inMemorySource.coder);
+    Reader<?> reader = ReaderFactory.create(
+        PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext());
+    Assert.assertThat(reader, new IsInstanceOf(InMemoryReader.class));
+    InMemoryReader inMemoryReader = (InMemoryReader<?>) reader;
+    Assert.assertEquals(encodedElements(elements, coder), inMemoryReader.encodedElements);
+    Assert.assertEquals(expectedStart, inMemoryReader.startIndex);
+    Assert.assertEquals(expectedEnd, inMemoryReader.endIndex);
+    Assert.assertEquals(coder, inMemoryReader.coder);
   }
 
   @Test
-  public void testCreatePlainInMemorySource() throws Exception {
-    runTestCreateInMemorySource(
-        Arrays.asList("hi", "there", "bob"),
-        null, null,
-        0, 3,
-        StringUtf8Coder.of());
+  public void testCreatePlainInMemoryReader() throws Exception {
+    runTestCreateInMemoryReader(
+        Arrays.asList("hi", "there", "bob"), null, null, 0, 3, StringUtf8Coder.of());
   }
 
   @Test
-  public void testCreateRichInMemorySource() throws Exception {
-    runTestCreateInMemorySource(
-        Arrays.asList(33, 44, 55, 66, 77, 88),
-        1L, 3L,
-        1, 3,
-        BigEndianIntegerCoder.of());
+  public void testCreateRichInMemoryReader() throws Exception {
+    runTestCreateInMemoryReader(
+        Arrays.asList(33, 44, 55, 66, 77, 88), 1L, 3L, 1, 3, BigEndianIntegerCoder.of());
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
new file mode 100644
index 0000000000000..0068e5e275172
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
+import static com.google.cloud.dataflow.sdk.util.StringUtils.byteArrayToJsonString;
+
+import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.Position;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests for InMemoryReader.
+ */
+@RunWith(JUnit4.class)
+public class InMemoryReaderTest {
+  static <T> List<String> encodedElements(List<T> elements, Coder<T> coder) throws Exception {
+    List<String> encodedElements = new ArrayList<>();
+    for (T element : elements) {
+      byte[] encodedElement = encodeToByteArray(coder, element);
+      String encodedElementString = byteArrayToJsonString(encodedElement);
+      encodedElements.add(encodedElementString);
+    }
+    return encodedElements;
+  }
+
+  <T> void runTestReadInMemory(List<T> elements, Long startIndex, Long endIndex,
+      List<T> expectedElements, List<Integer> expectedSizes, Coder<T> coder) throws Exception {
+    InMemoryReader<T> inMemoryReader =
+        new InMemoryReader<>(encodedElements(elements, coder), startIndex, endIndex, coder);
+    ExecutorTestUtils.TestReaderObserver observer =
+        new ExecutorTestUtils.TestReaderObserver(inMemoryReader);
+    List<T> actualElements = new ArrayList<>();
+    try (Reader.ReaderIterator<T> iterator = inMemoryReader.iterator()) {
+      for (long i = inMemoryReader.startIndex; iterator.hasNext(); i++) {
+        Assert.assertEquals(
+            new ApproximateProgress().setPosition(makeIndexPosition(i)),
+            sourceProgressToCloudProgress(iterator.getProgress()));
+        actualElements.add(iterator.next());
+      }
+    }
+    Assert.assertEquals(expectedElements, actualElements);
+    Assert.assertEquals(expectedSizes, observer.getActualSizes());
+  }
+
+  @Test
+  public void testReadAllElements() throws Exception {
+    runTestReadInMemory(Arrays.asList(33, 44, 55, 66, 77, 88), null, null,
+        Arrays.asList(33, 44, 55, 66, 77, 88), Arrays.asList(4, 4, 4, 4, 4, 4),
+        BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testReadElementsFromStart() throws Exception {
+    runTestReadInMemory(Arrays.asList(33, 44, 55, 66, 77, 88), 2L, null,
+        Arrays.asList(55, 66, 77, 88), Arrays.asList(4, 4, 4, 4), BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testReadElementsToEnd() throws Exception {
+    runTestReadInMemory(Arrays.asList(33, 44, 55, 66, 77, 88), null, 3L, Arrays.asList(33, 44, 55),
+        Arrays.asList(4, 4, 4), BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testReadElementsFromStartToEnd() throws Exception {
+    runTestReadInMemory(Arrays.asList(33, 44, 55, 66, 77, 88), 2L, 5L, Arrays.asList(55, 66, 77),
+        Arrays.asList(4, 4, 4), BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testReadElementsOffEnd() throws Exception {
+    runTestReadInMemory(Arrays.asList(33, 44, 55, 66, 77, 88), null, 30L,
+        Arrays.asList(33, 44, 55, 66, 77, 88), Arrays.asList(4, 4, 4, 4, 4, 4),
+        BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testReadElementsFromStartPastEnd() throws Exception {
+    runTestReadInMemory(Arrays.asList(33, 44, 55, 66, 77, 88), 20L, null, Arrays.<Integer>asList(),
+        Arrays.<Integer>asList(), BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testReadElementsFromStartToEndEmptyRange() throws Exception {
+    runTestReadInMemory(Arrays.asList(33, 44, 55, 66, 77, 88), 2L, 2L, Arrays.<Integer>asList(),
+        Arrays.<Integer>asList(), BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testReadNoElements() throws Exception {
+    runTestReadInMemory(Arrays.<Integer>asList(), null, null, Arrays.<Integer>asList(),
+        Arrays.<Integer>asList(), BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testReadNoElementsFromStartToEndEmptyRange() throws Exception {
+    runTestReadInMemory(Arrays.<Integer>asList(), 0L, 0L, Arrays.<Integer>asList(),
+        Arrays.<Integer>asList(), BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testUpdatePosition() throws Exception {
+    List<Integer> elements = Arrays.asList(33, 44, 55, 66, 77, 88);
+    final long start = 1L;
+    final long stop = 3L;
+    final long end = 4L;
+
+    Coder<Integer> coder = BigEndianIntegerCoder.of();
+    InMemoryReader<Integer> inMemoryReader =
+        new InMemoryReader<>(encodedElements(elements, coder), start, end, coder);
+
+    // Illegal proposed stop position.
+    try (Reader.ReaderIterator<Integer> iterator = inMemoryReader.iterator()) {
+      Assert.assertNull(
+          iterator.updateStopPosition(cloudProgressToReaderProgress(new ApproximateProgress())));
+      Assert.assertNull(iterator.updateStopPosition(cloudProgressToReaderProgress(
+          new ApproximateProgress().setPosition(makeIndexPosition(null)))));
+    }
+
+    // Successful update.
+    try (InMemoryReader<Integer>.InMemoryReaderIterator iterator =
+        (InMemoryReader<Integer>.InMemoryReaderIterator) inMemoryReader.iterator()) {
+      Assert.assertEquals(
+          makeIndexPosition(stop),
+          sourcePositionToCloudPosition(iterator.updateStopPosition(cloudProgressToReaderProgress(
+              new ApproximateProgress().setPosition(makeIndexPosition(stop))))));
+      Assert.assertEquals(stop, iterator.endPosition);
+      Assert.assertEquals(44, iterator.next().intValue());
+      Assert.assertEquals(55, iterator.next().intValue());
+      Assert.assertFalse(iterator.hasNext());
+    }
+
+    // Proposed stop position is before the current position, no update.
+    try (InMemoryReader<Integer>.InMemoryReaderIterator iterator =
+        (InMemoryReader<Integer>.InMemoryReaderIterator) inMemoryReader.iterator()) {
+      Assert.assertEquals(44, iterator.next().intValue());
+      Assert.assertEquals(55, iterator.next().intValue());
+      Assert.assertNull(iterator.updateStopPosition(cloudProgressToReaderProgress(
+          new ApproximateProgress().setPosition(makeIndexPosition(stop)))));
+      Assert.assertEquals((int) end, iterator.endPosition);
+      Assert.assertTrue(iterator.hasNext());
+    }
+
+    // Proposed stop position is after the current stop (end) position, no update.
+    try (InMemoryReader.InMemoryReaderIterator iterator =
+        (InMemoryReader.InMemoryReaderIterator) inMemoryReader.iterator()) {
+      Assert.assertNull(iterator.updateStopPosition(cloudProgressToReaderProgress(
+          new ApproximateProgress().setPosition(makeIndexPosition(end + 1)))));
+      Assert.assertEquals((int) end, iterator.endPosition);
+    }
+  }
+
+  private Position makeIndexPosition(Long index) {
+    Position position = new Position();
+    if (index != null) {
+      position.setRecordIndex(index);
+    }
+    return position;
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceTest.java
deleted file mode 100644
index d7574c517b4e8..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemorySourceTest.java
+++ /dev/null
@@ -1,236 +0,0 @@
-/*
- * Copyright (C) 2014 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
-import static com.google.cloud.dataflow.sdk.util.StringUtils.byteArrayToJsonString;
-
-import com.google.api.services.dataflow.model.ApproximateProgress;
-import com.google.api.services.dataflow.model.Position;
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Tests for InMemorySource.
- */
-@RunWith(JUnit4.class)
-public class InMemorySourceTest {
-  static <T> List<String> encodedElements(List<T> elements, Coder<T> coder)
-      throws Exception {
-    List<String> encodedElements = new ArrayList<>();
-    for (T element : elements) {
-      byte[] encodedElement = encodeToByteArray(coder, element);
-      String encodedElementString = byteArrayToJsonString(encodedElement);
-      encodedElements.add(encodedElementString);
-    }
-    return encodedElements;
-  }
-
-  <T> void runTestReadInMemorySource(List<T> elements,
-                                     Long startIndex,
-                                     Long endIndex,
-                                     List<T> expectedElements,
-                                     List<Integer> expectedSizes,
-                                     Coder<T> coder)
-      throws Exception {
-    InMemorySource<T> inMemorySource = new InMemorySource<>(
-        encodedElements(elements, coder), startIndex, endIndex, coder);
-    ExecutorTestUtils.TestSourceObserver observer =
-        new ExecutorTestUtils.TestSourceObserver(inMemorySource);
-    List<T> actualElements = new ArrayList<>();
-    try (Source.SourceIterator<T> iterator = inMemorySource.iterator()) {
-      for (long i = inMemorySource.startIndex; iterator.hasNext(); i++) {
-        Assert.assertEquals(
-            new ApproximateProgress().setPosition(makeIndexPosition(i)),
-            sourceProgressToCloudProgress(iterator.getProgress()));
-        actualElements.add(iterator.next());
-      }
-    }
-    Assert.assertEquals(expectedElements, actualElements);
-    Assert.assertEquals(expectedSizes, observer.getActualSizes());
-  }
-
-  @Test
-  public void testReadAllElements() throws Exception {
-    runTestReadInMemorySource(Arrays.asList(33, 44, 55, 66, 77, 88),
-                              null,
-                              null,
-                              Arrays.asList(33, 44, 55, 66, 77, 88),
-                              Arrays.asList(4, 4, 4, 4, 4, 4),
-                              BigEndianIntegerCoder.of());
-  }
-
-  @Test
-  public void testReadElementsFromStart() throws Exception {
-    runTestReadInMemorySource(Arrays.asList(33, 44, 55, 66, 77, 88),
-                              2L,
-                              null,
-                              Arrays.asList(55, 66, 77, 88),
-                              Arrays.asList(4, 4, 4, 4),
-                              BigEndianIntegerCoder.of());
-  }
-
-  @Test
-  public void testReadElementsToEnd() throws Exception {
-    runTestReadInMemorySource(Arrays.asList(33, 44, 55, 66, 77, 88),
-                              null,
-                              3L,
-                              Arrays.asList(33, 44, 55),
-                              Arrays.asList(4, 4, 4),
-                              BigEndianIntegerCoder.of());
-  }
-
-  @Test
-  public void testReadElementsFromStartToEnd() throws Exception {
-    runTestReadInMemorySource(Arrays.asList(33, 44, 55, 66, 77, 88),
-                              2L,
-                              5L,
-                              Arrays.asList(55, 66, 77),
-                              Arrays.asList(4, 4, 4),
-                              BigEndianIntegerCoder.of());
-  }
-
-  @Test
-  public void testReadElementsOffEnd() throws Exception {
-    runTestReadInMemorySource(Arrays.asList(33, 44, 55, 66, 77, 88),
-                              null,
-                              30L,
-                              Arrays.asList(33, 44, 55, 66, 77, 88),
-                              Arrays.asList(4, 4, 4, 4, 4, 4),
-                              BigEndianIntegerCoder.of());
-  }
-
-  @Test
-  public void testReadElementsFromStartPastEnd() throws Exception {
-    runTestReadInMemorySource(Arrays.asList(33, 44, 55, 66, 77, 88),
-                              20L,
-                              null,
-                              Arrays.<Integer>asList(),
-                              Arrays.<Integer>asList(),
-                              BigEndianIntegerCoder.of());
-  }
-
-  @Test
-  public void testReadElementsFromStartToEndEmptyRange() throws Exception {
-    runTestReadInMemorySource(Arrays.asList(33, 44, 55, 66, 77, 88),
-                              2L,
-                              2L,
-                              Arrays.<Integer>asList(),
-                              Arrays.<Integer>asList(),
-                              BigEndianIntegerCoder.of());
-  }
-
-  @Test
-  public void testReadNoElements() throws Exception {
-    runTestReadInMemorySource(Arrays.<Integer>asList(),
-                              null,
-                              null,
-                              Arrays.<Integer>asList(),
-                              Arrays.<Integer>asList(),
-                              BigEndianIntegerCoder.of());
-  }
-
-  @Test
-  public void testReadNoElementsFromStartToEndEmptyRange() throws Exception {
-    runTestReadInMemorySource(Arrays.<Integer>asList(),
-                              0L,
-                              0L,
-                              Arrays.<Integer>asList(),
-                              Arrays.<Integer>asList(),
-                              BigEndianIntegerCoder.of());
-  }
-
-  @Test
-  public void testUpdatePosition() throws Exception {
-    List<Integer> elements = Arrays.asList(33, 44, 55, 66, 77, 88);
-    final long start = 1L;
-    final long stop = 3L;
-    final long end = 4L;
-
-    Coder<Integer> coder = BigEndianIntegerCoder.of();
-    InMemorySource<Integer> inMemorySource = new InMemorySource<>(
-        encodedElements(elements, coder), start, end, coder);
-
-    // Illegal proposed stop position.
-    try (Source.SourceIterator<Integer> iterator = inMemorySource.iterator()) {
-      Assert.assertNull(iterator.updateStopPosition(
-          cloudProgressToSourceProgress(new ApproximateProgress())));
-      Assert.assertNull(iterator.updateStopPosition(
-          cloudProgressToSourceProgress(
-              new ApproximateProgress().setPosition(makeIndexPosition(null)))));
-    }
-
-    // Successful update.
-    try (InMemorySource<Integer>.InMemorySourceIterator iterator =
-        (InMemorySource<Integer>.InMemorySourceIterator) inMemorySource.iterator()) {
-      Assert.assertEquals(
-          makeIndexPosition(stop),
-          sourcePositionToCloudPosition(
-              iterator.updateStopPosition(
-                  cloudProgressToSourceProgress(
-                      new ApproximateProgress().setPosition(makeIndexPosition(stop))))));
-      Assert.assertEquals(stop, iterator.endPosition);
-      Assert.assertEquals(44, iterator.next().intValue());
-      Assert.assertEquals(55, iterator.next().intValue());
-      Assert.assertFalse(iterator.hasNext());
-    }
-
-    // Proposed stop position is before the current position, no update.
-    try (InMemorySource<Integer>.InMemorySourceIterator iterator =
-        (InMemorySource<Integer>.InMemorySourceIterator) inMemorySource.iterator()) {
-      Assert.assertEquals(44, iterator.next().intValue());
-      Assert.assertEquals(55, iterator.next().intValue());
-      Assert.assertNull(iterator.updateStopPosition(
-          cloudProgressToSourceProgress(
-              new ApproximateProgress().setPosition(makeIndexPosition(stop)))));
-      Assert.assertEquals((int) end, iterator.endPosition);
-      Assert.assertTrue(iterator.hasNext());
-    }
-
-    // Proposed stop position is after the current stop (end) position, no update.
-    try (InMemorySource<Integer>.InMemorySourceIterator iterator =
-        (InMemorySource<Integer>.InMemorySourceIterator) inMemorySource.iterator()) {
-      Assert.assertNull(
-          iterator.updateStopPosition(
-              cloudProgressToSourceProgress(
-                  new ApproximateProgress().setPosition(makeIndexPosition(end + 1)))));
-      Assert.assertEquals((int) end, iterator.endPosition);
-    }
-  }
-
-  private Position makeIndexPosition(Long index) {
-    Position position = new Position();
-    if (index != null) {
-      position.setRecordIndex(index);
-    }
-    return position;
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index 2060865759331..445755130db5d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -33,13 +33,14 @@
 import com.google.api.services.dataflow.model.ParallelInstruction;
 import com.google.api.services.dataflow.model.PartialGroupByKeyInstruction;
 import com.google.api.services.dataflow.model.ReadInstruction;
+import com.google.api.services.dataflow.model.Source;
 import com.google.api.services.dataflow.model.WriteInstruction;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactoryTest.TestReader;
+import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactoryTest.TestReaderFactory;
 import com.google.cloud.dataflow.sdk.runners.worker.SinkFactoryTest.TestSink;
 import com.google.cloud.dataflow.sdk.runners.worker.SinkFactoryTest.TestSinkFactory;
-import com.google.cloud.dataflow.sdk.runners.worker.SourceFactoryTest.TestSource;
-import com.google.cloud.dataflow.sdk.runners.worker.SourceFactoryTest.TestSourceFactory;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
@@ -80,93 +81,67 @@
 public class MapTaskExecutorFactoryTest {
   @Test
   public void testCreateMapTaskExecutor() throws Exception {
-    List<ParallelInstruction> instructions =
-        Arrays.asList(
-            createReadInstruction("Read"),
-            createParDoInstruction(0, 0, "DoFn1"),
-            createParDoInstruction(0, 0, "DoFn2"),
-            createFlattenInstruction(1, 0, 2, 0, "Flatten"),
-            createWriteInstruction(3, 0, "Write"));
+    List<ParallelInstruction> instructions = Arrays.asList(createReadInstruction("Read"),
+        createParDoInstruction(0, 0, "DoFn1"), createParDoInstruction(0, 0, "DoFn2"),
+        createFlattenInstruction(1, 0, 2, 0, "Flatten"), createWriteInstruction(3, 0, "Write"));
 
     MapTask mapTask = new MapTask();
     mapTask.setStageName("test");
     mapTask.setInstructions(instructions);
 
     CounterSet counterSet = null;
-    try (MapTaskExecutor executor =
-         MapTaskExecutorFactory.create(
-             PipelineOptionsFactory.create(),
-             mapTask,
-             new BatchModeExecutionContext())) {
-
+    try (
+        MapTaskExecutor executor = MapTaskExecutorFactory.create(
+            PipelineOptionsFactory.create(), mapTask, new BatchModeExecutionContext())) {
       @SuppressWarnings("unchecked")
       List<Object> operations = (List) executor.operations;
       assertThat(
           operations,
-          CoreMatchers.hasItems(
-              new IsInstanceOf(ReadOperation.class),
-              new IsInstanceOf(ParDoOperation.class),
-              new IsInstanceOf(ParDoOperation.class),
-              new IsInstanceOf(FlattenOperation.class),
-              new IsInstanceOf(WriteOperation.class)));
+          CoreMatchers.hasItems(new IsInstanceOf(ReadOperation.class),
+              new IsInstanceOf(ParDoOperation.class), new IsInstanceOf(ParDoOperation.class),
+              new IsInstanceOf(FlattenOperation.class), new IsInstanceOf(WriteOperation.class)));
       counterSet = executor.getOutputCounters();
     }
 
     assertEquals(
-        new CounterSet(
-            Counter.longs("read_output_name-ElementCount", SUM)
-                .resetToValue(0L),
-            Counter.longs("read_output_name-MeanByteCount", MEAN)
-                .resetToValue(0, 0L),
+        new CounterSet(Counter.longs("read_output_name-ElementCount", SUM).resetToValue(0L),
+            Counter.longs("read_output_name-MeanByteCount", MEAN).resetToValue(0, 0L),
             Counter.longs("Read-ByteCount", SUM).resetToValue(0L),
-            Counter.longs("test-Read-start-msecs", SUM)
-                .resetToValue(0L),
-            Counter.longs("test-Read-read-msecs", SUM)
-                .resetToValue(0L),
-            Counter.longs("test-Read-process-msecs", SUM)
-                .resetToValue(0L),
-            Counter.longs("test-Read-finish-msecs", SUM)
-                .resetToValue(0L),
-            Counter.longs("DoFn1_output-ElementCount", SUM)
-                .resetToValue(0L),
-            Counter.longs("DoFn1_output-MeanByteCount", MEAN)
-                .resetToValue(0, 0L),
+            Counter.longs("test-Read-start-msecs", SUM).resetToValue(0L),
+            Counter.longs("test-Read-read-msecs", SUM).resetToValue(0L),
+            Counter.longs("test-Read-process-msecs", SUM).resetToValue(0L),
+            Counter.longs("test-Read-finish-msecs", SUM).resetToValue(0L),
+            Counter.longs("DoFn1_output-ElementCount", SUM).resetToValue(0L),
+            Counter.longs("DoFn1_output-MeanByteCount", MEAN).resetToValue(0, 0L),
             Counter.longs("test-DoFn1-start-msecs", SUM).resetToValue(0L),
             Counter.longs("test-DoFn1-process-msecs", SUM).resetToValue(0L),
             Counter.longs("test-DoFn1-finish-msecs", SUM).resetToValue(0L),
-            Counter.longs("DoFn2_output-ElementCount", SUM)
-                .resetToValue(0L),
-            Counter.longs("DoFn2_output-MeanByteCount", MEAN)
-                .resetToValue(0, 0L),
+            Counter.longs("DoFn2_output-ElementCount", SUM).resetToValue(0L),
+            Counter.longs("DoFn2_output-MeanByteCount", MEAN).resetToValue(0, 0L),
             Counter.longs("test-DoFn2-start-msecs", SUM).resetToValue(0L),
             Counter.longs("test-DoFn2-process-msecs", SUM).resetToValue(0L),
             Counter.longs("test-DoFn2-finish-msecs", SUM).resetToValue(0L),
-            Counter.longs("flatten_output_name-ElementCount", SUM)
-                .resetToValue(0L),
-            Counter.longs("flatten_output_name-MeanByteCount", MEAN)
-                .resetToValue(0, 0L),
+            Counter.longs("flatten_output_name-ElementCount", SUM).resetToValue(0L),
+            Counter.longs("flatten_output_name-MeanByteCount", MEAN).resetToValue(0, 0L),
             Counter.longs("test-Flatten-start-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Flatten-process-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Flatten-finish-msecs", SUM).resetToValue(0L),
-            Counter.longs("Write-ByteCount", SUM)
-                .resetToValue(0L),
+            Counter.longs("Write-ByteCount", SUM).resetToValue(0L),
             Counter.longs("test-Write-start-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Write-process-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Write-finish-msecs", SUM).resetToValue(0L),
             Counter.longs("test-other-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-other-msecs")).getAggregate(false))),
+                .resetToValue(
+                    ((Counter<Long>)
+                        counterSet.getExistingCounter("test-other-msecs")).getAggregate(false))),
         counterSet);
   }
 
   @Test
   public void testExecutionContextPlumbing() throws Exception {
     List<ParallelInstruction> instructions =
-        Arrays.asList(
-            createReadInstruction("Read"),
-            createParDoInstruction(0, 0, "DoFn1"),
-            createParDoInstruction(1, 0, "DoFn2"),
-            createWriteInstruction(2, 0, "Write"));
+        Arrays.asList(createReadInstruction("Read"), createParDoInstruction(0, 0, "DoFn1"),
+            createParDoInstruction(1, 0, "DoFn2"), createWriteInstruction(2, 0, "Write"));
 
     MapTask mapTask = new MapTask();
     mapTask.setInstructions(instructions);
@@ -174,24 +149,21 @@ public void testExecutionContextPlumbing() throws Exception {
     BatchModeExecutionContext context = new BatchModeExecutionContext();
 
     try (MapTaskExecutor executor =
-         MapTaskExecutorFactory.create(
-             PipelineOptionsFactory.create(), mapTask, context)) {
+        MapTaskExecutorFactory.create(PipelineOptionsFactory.create(), mapTask, context)) {
       executor.execute();
     }
 
     List<String> stepNames = new ArrayList<>();
-    for (ExecutionContext.StepContext stepContext
-             : context.getAllStepContexts()) {
+    for (ExecutionContext.StepContext stepContext : context.getAllStepContexts()) {
       stepNames.add(stepContext.getStepName());
     }
     assertThat(stepNames, CoreMatchers.hasItems("DoFn1", "DoFn2"));
   }
 
   static ParallelInstruction createReadInstruction(String name) {
-    CloudObject spec = CloudObject.forClass(TestSourceFactory.class);
+    CloudObject spec = CloudObject.forClass(TestReaderFactory.class);
 
-    com.google.api.services.dataflow.model.Source cloudSource =
-        new com.google.api.services.dataflow.model.Source();
+    Source cloudSource = new Source();
     cloudSource.setSpec(spec);
     cloudSource.setCodec(CloudObject.forClass(StringUtf8Coder.class));
 
@@ -214,34 +186,25 @@ static ParallelInstruction createReadInstruction(String name) {
   public void testCreateReadOperation() throws Exception {
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(counterPrefix,
-        counterSet.getAddCounterMutator());
-    Operation operation = MapTaskExecutorFactory.createOperation(
-        PipelineOptionsFactory.create(),
-        createReadInstruction("Read"),
-        new BatchModeExecutionContext(),
-        Collections.<Operation>emptyList(),
-        counterPrefix,
-        counterSet.getAddCounterMutator(),
+    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
+    Operation operation = MapTaskExecutorFactory.createOperation(PipelineOptionsFactory.create(),
+        createReadInstruction("Read"), new BatchModeExecutionContext(),
+        Collections.<Operation>emptyList(), counterPrefix, counterSet.getAddCounterMutator(),
         stateSampler);
     assertThat(operation, new IsInstanceOf(ReadOperation.class));
     ReadOperation readOperation = (ReadOperation) operation;
 
     assertEquals(readOperation.receivers.length, 1);
     assertEquals(readOperation.receivers[0].getReceiverCount(), 0);
-    assertEquals(readOperation.initializationState,
-                 Operation.InitializationState.UNSTARTED);
-    assertThat(readOperation.source, new IsInstanceOf(TestSource.class));
+    assertEquals(readOperation.initializationState, Operation.InitializationState.UNSTARTED);
+    assertThat(readOperation.reader, new IsInstanceOf(TestReader.class));
 
     assertEquals(
         new CounterSet(
-            Counter.longs("test-Read-start-msecs", SUM)
-                .resetToValue(0L),
-            Counter.longs("read_output_name-MeanByteCount", MEAN)
-                .resetToValue(0, 0L),
+            Counter.longs("test-Read-start-msecs", SUM).resetToValue(0L),
+            Counter.longs("read_output_name-MeanByteCount", MEAN).resetToValue(0, 0L),
             Counter.longs("Read-ByteCount", SUM).resetToValue(0L),
-            Counter.longs("test-Read-finish-msecs", SUM)
-                .resetToValue(0L),
+            Counter.longs("test-Read-finish-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Read-read-msecs", SUM),
             Counter.longs("test-Read-process-msecs", SUM),
             Counter.longs("read_output_name-ElementCount", SUM).resetToValue(0L)),
@@ -249,9 +212,7 @@ public void testCreateReadOperation() throws Exception {
   }
 
   static ParallelInstruction createWriteInstruction(
-      int producerIndex,
-      int producerOutputNum,
-      String systemName) {
+      int producerIndex, int producerOutputNum, String systemName) {
     InstructionInput cloudInput = new InstructionInput();
     cloudInput.setProducerInstructionIndex(producerIndex);
     cloudInput.setOutputNum(producerOutputNum);
@@ -276,10 +237,8 @@ static ParallelInstruction createWriteInstruction(
 
   @Test
   public void testCreateWriteOperation() throws Exception {
-    List<Operation> priorOperations = Arrays.asList(new Operation[]{
-        new TestOperation(3),
-        new TestOperation(5),
-        new TestOperation(1) });
+    List<Operation> priorOperations = Arrays.asList(
+        new Operation[] {new TestOperation(3), new TestOperation(5), new TestOperation(1)});
 
     int producerIndex = 1;
     int producerOutputNum = 2;
@@ -289,55 +248,42 @@ public void testCreateWriteOperation() throws Exception {
 
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(counterPrefix,
-        counterSet.getAddCounterMutator());
-    Operation operation = MapTaskExecutorFactory.createOperation(
-        PipelineOptionsFactory.create(),
-        instruction,
-        new BatchModeExecutionContext(),
-        priorOperations,
-        counterPrefix,
-        counterSet.getAddCounterMutator(),
-        stateSampler);
+    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
+    Operation operation = MapTaskExecutorFactory.createOperation(PipelineOptionsFactory.create(),
+        instruction, new BatchModeExecutionContext(), priorOperations, counterPrefix,
+        counterSet.getAddCounterMutator(), stateSampler);
     assertThat(operation, new IsInstanceOf(WriteOperation.class));
     WriteOperation writeOperation = (WriteOperation) operation;
 
     assertEquals(writeOperation.receivers.length, 0);
-    assertEquals(writeOperation.initializationState,
-                 Operation.InitializationState.UNSTARTED);
-    assertThat(writeOperation.sink,
-               new IsInstanceOf(TestSink.class));
+    assertEquals(writeOperation.initializationState, Operation.InitializationState.UNSTARTED);
+    assertThat(writeOperation.sink, new IsInstanceOf(TestSink.class));
 
     assertSame(
         writeOperation,
-        priorOperations.get(producerIndex).receivers[producerOutputNum]
-        .getOnlyReceiver());
+        priorOperations.get(producerIndex).receivers[producerOutputNum].getOnlyReceiver());
 
     assertEquals(
-        new CounterSet(
-            Counter.longs("WriteOperation-ByteCount", SUM)
-                .resetToValue(0L),
+        new CounterSet(Counter.longs("WriteOperation-ByteCount", SUM).resetToValue(0L),
             Counter.longs("test-WriteOperation-start-msecs", SUM)
-              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                  "test-WriteOperation-start-msecs")).getAggregate(false)),
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                                   "test-WriteOperation-start-msecs")).getAggregate(false)),
             Counter.longs("test-WriteOperation-process-msecs", SUM)
-              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                  "test-WriteOperation-process-msecs")).getAggregate(false)),
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                                   "test-WriteOperation-process-msecs")).getAggregate(false)),
             Counter.longs("test-WriteOperation-finish-msecs", SUM)
-              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                  "test-WriteOperation-finish-msecs")).getAggregate(false))),
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                                   "test-WriteOperation-finish-msecs")).getAggregate(false))),
         counterSet);
   }
 
   static class TestDoFn extends DoFn<String, String> {
     @Override
-    public void processElement(ProcessContext c) { }
+    public void processElement(ProcessContext c) {}
   }
 
   static ParallelInstruction createParDoInstruction(
-      int producerIndex,
-      int producerOutputNum,
-      String systemName) {
+      int producerIndex, int producerOutputNum, String systemName) {
     InstructionInput cloudInput = new InstructionInput();
     cloudInput.setProducerInstructionIndex(producerIndex);
     cloudInput.setOutputNum(producerOutputNum);
@@ -345,8 +291,7 @@ static ParallelInstruction createParDoInstruction(
     TestDoFn fn = new TestDoFn();
 
     String serializedFn =
-        StringUtils.byteArrayToJsonString(
-            SerializableUtils.serializeToByteArray(fn));
+        StringUtils.byteArrayToJsonString(SerializableUtils.serializeToByteArray(fn));
 
     CloudObject cloudUserFn = CloudObject.forClassName("DoFn");
     addString(cloudUserFn, PropertyNames.SERIALIZED_FN, serializedFn);
@@ -369,10 +314,8 @@ static ParallelInstruction createParDoInstruction(
 
   @Test
   public void testCreateParDoOperation() throws Exception {
-    List<Operation> priorOperations = Arrays.asList(new Operation[]{
-        new TestOperation(3),
-        new TestOperation(5),
-        new TestOperation(1) });
+    List<Operation> priorOperations = Arrays.asList(
+        new Operation[] {new TestOperation(3), new TestOperation(5), new TestOperation(1)});
 
     int producerIndex = 1;
     int producerOutputNum = 2;
@@ -383,62 +326,46 @@ public void testCreateParDoOperation() throws Exception {
     BatchModeExecutionContext context = new BatchModeExecutionContext();
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(counterPrefix,
-        counterSet.getAddCounterMutator());
-    Operation operation = MapTaskExecutorFactory.createOperation(
-        PipelineOptionsFactory.create(),
-        instruction,
-        context,
-        priorOperations,
-        counterPrefix,
-        counterSet.getAddCounterMutator(), stateSampler);
+    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
+    Operation operation = MapTaskExecutorFactory.createOperation(PipelineOptionsFactory.create(),
+        instruction, context, priorOperations, counterPrefix, counterSet.getAddCounterMutator(),
+        stateSampler);
     assertThat(operation, new IsInstanceOf(ParDoOperation.class));
     ParDoOperation parDoOperation = (ParDoOperation) operation;
 
     assertEquals(parDoOperation.receivers.length, 1);
     assertEquals(parDoOperation.receivers[0].getReceiverCount(), 0);
-    assertEquals(parDoOperation.initializationState,
-                 Operation.InitializationState.UNSTARTED);
-    assertThat(parDoOperation.fn,
-               new IsInstanceOf(NormalParDoFn.class));
+    assertEquals(parDoOperation.initializationState, Operation.InitializationState.UNSTARTED);
+    assertThat(parDoOperation.fn, new IsInstanceOf(NormalParDoFn.class));
     NormalParDoFn normalParDoFn = (NormalParDoFn) parDoOperation.fn;
 
-    assertThat(normalParDoFn.fn,
-               new IsInstanceOf(TestDoFn.class));
+    assertThat(normalParDoFn.fn, new IsInstanceOf(TestDoFn.class));
 
     assertSame(
         parDoOperation,
-        priorOperations.get(producerIndex).receivers[producerOutputNum]
-        .getOnlyReceiver());
+        priorOperations.get(producerIndex).receivers[producerOutputNum].getOnlyReceiver());
 
     assertEquals(context, normalParDoFn.executionContext);
   }
 
   static ParallelInstruction createPartialGroupByKeyInstruction(
-      int producerIndex,
-      int producerOutputNum) {
+      int producerIndex, int producerOutputNum) {
     InstructionInput cloudInput = new InstructionInput();
     cloudInput.setProducerInstructionIndex(producerIndex);
     cloudInput.setOutputNum(producerOutputNum);
 
-    PartialGroupByKeyInstruction pgbkInstruction =
-        new PartialGroupByKeyInstruction();
+    PartialGroupByKeyInstruction pgbkInstruction = new PartialGroupByKeyInstruction();
     pgbkInstruction.setInput(cloudInput);
-    pgbkInstruction.setInputElementCodec(
-        makeCloudEncoding(FullWindowedValueCoder.class.getName(),
-          makeCloudEncoding("KvCoder",
-                            makeCloudEncoding("StringUtf8Coder"),
-                            makeCloudEncoding("BigEndianIntegerCoder")),
-          IntervalWindow.getCoder().asCloudObject()));
+    pgbkInstruction.setInputElementCodec(makeCloudEncoding(
+        FullWindowedValueCoder.class.getName(),
+        makeCloudEncoding("KvCoder", makeCloudEncoding("StringUtf8Coder"),
+            makeCloudEncoding("BigEndianIntegerCoder")),
+        IntervalWindow.getCoder().asCloudObject()));
 
     InstructionOutput output = new InstructionOutput();
     output.setName("pgbk_output_name");
-    output.setCodec(makeCloudEncoding(
-        "KvCoder",
-        makeCloudEncoding("StringUtf8Coder"),
-        makeCloudEncoding(
-            "IterableCoder",
-            makeCloudEncoding("BigEndianIntegerCoder"))));
+    output.setCodec(makeCloudEncoding("KvCoder", makeCloudEncoding("StringUtf8Coder"),
+        makeCloudEncoding("IterableCoder", makeCloudEncoding("BigEndianIntegerCoder"))));
 
     ParallelInstruction instruction = new ParallelInstruction();
     instruction.setPartialGroupByKey(pgbkInstruction);
@@ -449,10 +376,8 @@ static ParallelInstruction createPartialGroupByKeyInstruction(
 
   @Test
   public void testCreatePartialGroupByKeyOperation() throws Exception {
-    List<Operation> priorOperations = Arrays.asList(new Operation[]{
-        new TestOperation(3),
-        new TestOperation(5),
-        new TestOperation(1) });
+    List<Operation> priorOperations = Arrays.asList(
+        new Operation[] {new TestOperation(3), new TestOperation(5), new TestOperation(1)});
 
     int producerIndex = 1;
     int producerOutputNum = 2;
@@ -462,37 +387,24 @@ public void testCreatePartialGroupByKeyOperation() throws Exception {
 
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(counterPrefix,
-        counterSet.getAddCounterMutator());
-    Operation operation = MapTaskExecutorFactory.createOperation(
-        PipelineOptionsFactory.create(),
-        instruction,
-        new BatchModeExecutionContext(),
-        priorOperations,
-        counterPrefix,
-        counterSet.getAddCounterMutator(),
-        stateSampler);
+    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
+    Operation operation = MapTaskExecutorFactory.createOperation(PipelineOptionsFactory.create(),
+        instruction, new BatchModeExecutionContext(), priorOperations, counterPrefix,
+        counterSet.getAddCounterMutator(), stateSampler);
     assertThat(operation, instanceOf(PartialGroupByKeyOperation.class));
-    PartialGroupByKeyOperation pgbkOperation =
-        (PartialGroupByKeyOperation) operation;
+    PartialGroupByKeyOperation pgbkOperation = (PartialGroupByKeyOperation) operation;
 
     assertEquals(pgbkOperation.receivers.length, 1);
     assertEquals(pgbkOperation.receivers[0].getReceiverCount(), 0);
-    assertEquals(pgbkOperation.initializationState,
-                 Operation.InitializationState.UNSTARTED);
+    assertEquals(pgbkOperation.initializationState, Operation.InitializationState.UNSTARTED);
 
     assertSame(
         pgbkOperation,
-        priorOperations.get(producerIndex).receivers[producerOutputNum]
-        .getOnlyReceiver());
+        priorOperations.get(producerIndex).receivers[producerOutputNum].getOnlyReceiver());
   }
 
-  static ParallelInstruction createFlattenInstruction(
-      int producerIndex1,
-      int producerOutputNum1,
-      int producerIndex2,
-      int producerOutputNum2,
-      String systemName) {
+  static ParallelInstruction createFlattenInstruction(int producerIndex1, int producerOutputNum1,
+      int producerIndex2, int producerOutputNum2, String systemName) {
     List<InstructionInput> cloudInputs = new ArrayList<>();
 
     InstructionInput cloudInput1 = new InstructionInput();
@@ -522,47 +434,35 @@ static ParallelInstruction createFlattenInstruction(
 
   @Test
   public void testCreateFlattenOperation() throws Exception {
-    List<Operation> priorOperations = Arrays.asList(new Operation[]{
-        new TestOperation(3),
-        new TestOperation(5),
-        new TestOperation(1) });
+    List<Operation> priorOperations = Arrays.asList(
+        new Operation[] {new TestOperation(3), new TestOperation(5), new TestOperation(1)});
 
     int producerIndex1 = 1;
     int producerOutputNum1 = 2;
     int producerIndex2 = 0;
     int producerOutputNum2 = 1;
 
-    ParallelInstruction instruction =
-        createFlattenInstruction(producerIndex1, producerOutputNum1,
-                                 producerIndex2, producerOutputNum2, "Flatten");
+    ParallelInstruction instruction = createFlattenInstruction(
+        producerIndex1, producerOutputNum1, producerIndex2, producerOutputNum2, "Flatten");
 
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(counterPrefix,
-        counterSet.getAddCounterMutator());
-    Operation operation = MapTaskExecutorFactory.createOperation(
-        PipelineOptionsFactory.create(),
-        instruction,
-        new BatchModeExecutionContext(),
-        priorOperations,
-        counterPrefix,
-        counterSet.getAddCounterMutator(),
-        stateSampler);
+    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
+    Operation operation = MapTaskExecutorFactory.createOperation(PipelineOptionsFactory.create(),
+        instruction, new BatchModeExecutionContext(), priorOperations, counterPrefix,
+        counterSet.getAddCounterMutator(), stateSampler);
     assertThat(operation, new IsInstanceOf(FlattenOperation.class));
     FlattenOperation flattenOperation = (FlattenOperation) operation;
 
     assertEquals(flattenOperation.receivers.length, 1);
     assertEquals(flattenOperation.receivers[0].getReceiverCount(), 0);
-    assertEquals(flattenOperation.initializationState,
-                 Operation.InitializationState.UNSTARTED);
+    assertEquals(flattenOperation.initializationState, Operation.InitializationState.UNSTARTED);
 
     assertSame(
         flattenOperation,
-        priorOperations.get(producerIndex1).receivers[producerOutputNum1]
-        .getOnlyReceiver());
+        priorOperations.get(producerIndex1).receivers[producerOutputNum1].getOnlyReceiver());
     assertSame(
         flattenOperation,
-        priorOperations.get(producerIndex2).receivers[producerOutputNum2]
-        .getOnlyReceiver());
+        priorOperations.get(producerIndex2).receivers[producerOutputNum2].getOnlyReceiver());
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
similarity index 78%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSourceTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
index 2b88456011dc4..11d7c0616cf94 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
@@ -24,9 +24,9 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.collect.Lists;
 
@@ -43,10 +43,10 @@
 import java.util.NoSuchElementException;
 
 /**
- * Tests for PartitioningShuffleSource.
+ * Tests for PartitioningShuffleReader.
  */
 @RunWith(JUnit4.class)
-public class PartitioningShuffleSourceTest {
+public class PartitioningShuffleReaderTest {
   private static final List<WindowedValue<KV<Integer, String>>> NO_KVS = Collections.emptyList();
 
   private static final Instant timestamp = new Instant(123000);
@@ -64,23 +64,20 @@ public class PartitioningShuffleSourceTest {
       WindowedValue.of(KV.of(4, "in 4d"), timestamp, Lists.newArrayList(window)),
       WindowedValue.of(KV.of(5, "in 5"), timestamp, Lists.newArrayList(window)));
 
-  private void runTestReadShuffleSource(List<WindowedValue<KV<Integer, String>>> expected)
+  private void runTestReadFromShuffle(List<WindowedValue<KV<Integer, String>>> expected)
       throws Exception {
     Coder<WindowedValue<KV<Integer, String>>> elemCoder = WindowedValue.getFullCoder(
-            KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()),
-            IntervalWindow.getCoder());
+        KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()), IntervalWindow.getCoder());
 
     // Write to shuffle with PARTITION_KEYS ShuffleSink.
     ShuffleSink<KV<Integer, String>> shuffleSink = new ShuffleSink<>(
-        PipelineOptionsFactory.create(),
-        null, ShuffleSink.ShuffleKind.PARTITION_KEYS,
-        elemCoder);
+        PipelineOptionsFactory.create(), null, ShuffleSink.ShuffleKind.PARTITION_KEYS, elemCoder);
 
     TestShuffleWriter shuffleWriter = new TestShuffleWriter();
 
     List<Long> actualSizes = new ArrayList<>();
     try (Sink.SinkWriter<WindowedValue<KV<Integer, String>>> shuffleSinkWriter =
-             shuffleSink.writer(shuffleWriter)) {
+        shuffleSink.writer(shuffleWriter)) {
       for (WindowedValue<KV<Integer, String>> value : expected) {
         actualSizes.add(shuffleSinkWriter.add(value));
       }
@@ -89,14 +86,12 @@ private void runTestReadShuffleSource(List<WindowedValue<KV<Integer, String>>> e
     Assert.assertEquals(expected.size(), records.size());
     Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
 
-    // Read from shuffle with PartitioningShuffleSource.
-    PartitioningShuffleSource<Integer, String> shuffleSource =
-        new PartitioningShuffleSource<>(
-            PipelineOptionsFactory.create(),
-            null, null, null,
-            elemCoder);
-    ExecutorTestUtils.TestSourceObserver observer =
-        new ExecutorTestUtils.TestSourceObserver(shuffleSource);
+    // Read from shuffle with PartitioningShuffleReader.
+    PartitioningShuffleReader<Integer, String> partitioningShuffleReader =
+        new PartitioningShuffleReader<>(
+            PipelineOptionsFactory.create(), null, null, null, elemCoder);
+    ExecutorTestUtils.TestReaderObserver observer =
+        new ExecutorTestUtils.TestReaderObserver(partitioningShuffleReader);
 
     TestShuffleReader shuffleReader = new TestShuffleReader();
     List<Integer> expectedSizes = new ArrayList<>();
@@ -106,8 +101,8 @@ private void runTestReadShuffleSource(List<WindowedValue<KV<Integer, String>>> e
     }
 
     List<WindowedValue<KV<Integer, String>>> actual = new ArrayList<>();
-    try (Source.SourceIterator<WindowedValue<KV<Integer, String>>> iter =
-             shuffleSource.iterator(shuffleReader)) {
+    try (Reader.ReaderIterator<WindowedValue<KV<Integer, String>>> iter =
+        partitioningShuffleReader.iterator(shuffleReader)) {
       while (iter.hasNext()) {
         Assert.assertTrue(iter.hasNext());
         actual.add(iter.next());
@@ -126,12 +121,12 @@ private void runTestReadShuffleSource(List<WindowedValue<KV<Integer, String>>> e
   }
 
   @Test
-  public void testReadEmptyShuffleSource() throws Exception {
-    runTestReadShuffleSource(NO_KVS);
+  public void testReadEmptyShuffleData() throws Exception {
+    runTestReadFromShuffle(NO_KVS);
   }
 
   @Test
-  public void testReadNonEmptyShuffleSource() throws Exception {
-    runTestReadShuffleSource(KVS);
+  public void testReadNonEmptyShuffleData() throws Exception {
+    runTestReadFromShuffle(KVS);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
similarity index 52%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFactoryTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
index 4b4665b55869d..1a3ab6a8f6a2a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
@@ -19,13 +19,14 @@
 import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
 import static com.google.cloud.dataflow.sdk.util.Structs.addString;
 
+import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.hamcrest.CoreMatchers;
 import org.hamcrest.core.IsInstanceOf;
@@ -37,29 +38,29 @@
 import java.util.NoSuchElementException;
 
 /**
- * Tests for SourceFactory.
+ * Tests for ReaderFactory.
  */
 @RunWith(JUnit4.class)
-public class SourceFactoryTest {
-  static class TestSourceFactory {
-    public static TestSource create(PipelineOptions options,
-                                    CloudObject o,
-                                    Coder<Integer> coder,
-                                    ExecutionContext executionContext) {
-      return new TestSource();
+public class ReaderFactoryTest {
+  static class TestReaderFactory {
+    public static TestReader create(PipelineOptions options, CloudObject o, Coder<Integer> coder,
+        ExecutionContext executionContext) {
+      return new TestReader();
     }
   }
 
-  static class TestSource extends Source<Integer> {
+  static class TestReader extends Reader<Integer> {
     @Override
-    public SourceIterator<Integer> iterator() {
-      return new TestSourceIterator();
+    public ReaderIterator<Integer> iterator() {
+      return new TestReaderIterator();
     }
 
     /** A source iterator that produces no values, for testing. */
-    class TestSourceIterator extends AbstractSourceIterator<Integer> {
+    class TestReaderIterator extends AbstractReaderIterator<Integer> {
       @Override
-      public boolean hasNext() { return false; }
+      public boolean hasNext() {
+        return false;
+      }
 
       @Override
       public Integer next() {
@@ -67,58 +68,49 @@ public Integer next() {
       }
 
       @Override
-      public void close() {
-      }
+      public void close() {}
     }
   }
 
   @Test
-  public void testCreatePredefinedSource() throws Exception {
+  public void testCreatePredefinedReader() throws Exception {
     CloudObject spec = CloudObject.forClassName("TextSource");
     addString(spec, "filename", "/path/to/file.txt");
 
-    com.google.api.services.dataflow.model.Source cloudSource =
-        new com.google.api.services.dataflow.model.Source();
+    Source cloudSource = new Source();
     cloudSource.setSpec(spec);
     cloudSource.setCodec(makeCloudEncoding("StringUtf8Coder"));
 
-    Source<?> source = SourceFactory.create(PipelineOptionsFactory.create(),
-                                            cloudSource,
-                                            new BatchModeExecutionContext());
-    Assert.assertThat(source, new IsInstanceOf(TextSource.class));
+    Reader<?> reader = ReaderFactory.create(
+        PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext());
+    Assert.assertThat(reader, new IsInstanceOf(TextReader.class));
   }
 
   @Test
-  public void testCreateUserDefinedSource() throws Exception {
-    CloudObject spec = CloudObject.forClass(TestSourceFactory.class);
+  public void testCreateUserDefinedReader() throws Exception {
+    CloudObject spec = CloudObject.forClass(TestReaderFactory.class);
 
-    com.google.api.services.dataflow.model.Source cloudSource =
-        new com.google.api.services.dataflow.model.Source();
+    Source cloudSource = new Source();
     cloudSource.setSpec(spec);
     cloudSource.setCodec(makeCloudEncoding("BigEndianIntegerCoder"));
 
-    Source<?> source = SourceFactory.create(PipelineOptionsFactory.create(),
-                                            cloudSource,
-                                            new BatchModeExecutionContext());
-    Assert.assertThat(source, new IsInstanceOf(TestSource.class));
+    Reader<?> reader = ReaderFactory.create(
+        PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext());
+    Assert.assertThat(reader, new IsInstanceOf(TestReader.class));
   }
 
   @Test
-  public void testCreateUnknownSource() throws Exception {
+  public void testCreateUnknownReader() throws Exception {
     CloudObject spec = CloudObject.forClassName("UnknownSource");
-    com.google.api.services.dataflow.model.Source cloudSource =
-        new com.google.api.services.dataflow.model.Source();
+    Source cloudSource = new Source();
     cloudSource.setSpec(spec);
     cloudSource.setCodec(makeCloudEncoding("StringUtf8Coder"));
     try {
-      SourceFactory.create(PipelineOptionsFactory.create(),
-                           cloudSource,
-                           new BatchModeExecutionContext());
+      ReaderFactory.create(
+          PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext());
       Assert.fail("should have thrown an exception");
     } catch (Exception exn) {
-      Assert.assertThat(exn.toString(),
-                        CoreMatchers.containsString(
-                            "unable to create a source"));
+      Assert.assertThat(exn.toString(), CoreMatchers.containsString("unable to create a source"));
     }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
new file mode 100644
index 0000000000000..ab1fa9145b3cd
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.api.client.util.Base64.encodeBase64String;
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+
+import com.google.api.services.dataflow.model.Source;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+
+import org.hamcrest.core.IsInstanceOf;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import javax.annotation.Nullable;
+
+/**
+ * Tests for UngroupedShuffleReaderFactory, GroupingShuffleReaderFactory,
+ * and PartitioningShuffleReaderFactory.
+ */
+@RunWith(JUnit4.class)
+@SuppressWarnings({"rawtypes", "unchecked"})
+public class ShuffleReaderFactoryTest {
+  <T extends Reader> T runTestCreateShuffleReader(byte[] shuffleReaderConfig,
+      @Nullable String start, @Nullable String end, CloudObject encoding,
+      BatchModeExecutionContext context, Class<?> shuffleReaderClass, String shuffleSourceAlias)
+      throws Exception {
+    CloudObject spec = CloudObject.forClassName(shuffleSourceAlias);
+    addString(spec, "shuffle_reader_config", encodeBase64String(shuffleReaderConfig));
+    if (start != null) {
+      addString(spec, "start_shuffle_position", start);
+    }
+    if (end != null) {
+      addString(spec, "end_shuffle_position", end);
+    }
+
+    Source cloudSource = new Source();
+    cloudSource.setSpec(spec);
+    cloudSource.setCodec(encoding);
+
+    Reader<?> reader = ReaderFactory.create(PipelineOptionsFactory.create(), cloudSource, context);
+    Assert.assertThat(reader, new IsInstanceOf(shuffleReaderClass));
+    T shuffleSource = (T) reader;
+    return shuffleSource;
+  }
+
+  void runTestCreateUngroupedShuffleReader(byte[] shuffleReaderConfig, @Nullable String start,
+      @Nullable String end, CloudObject encoding, Coder<?> coder) throws Exception {
+    UngroupedShuffleReader ungroupedShuffleReader = runTestCreateShuffleReader(shuffleReaderConfig,
+        start, end, encoding, new BatchModeExecutionContext(), UngroupedShuffleReader.class,
+        "UngroupedShuffleSource");
+    Assert.assertArrayEquals(shuffleReaderConfig, ungroupedShuffleReader.shuffleReaderConfig);
+    Assert.assertEquals(start, ungroupedShuffleReader.startShufflePosition);
+    Assert.assertEquals(end, ungroupedShuffleReader.stopShufflePosition);
+
+    Assert.assertEquals(coder, ungroupedShuffleReader.coder);
+  }
+
+  void runTestCreateGroupingShuffleReader(byte[] shuffleReaderConfig, @Nullable String start,
+      @Nullable String end, CloudObject encoding, Coder<?> keyCoder, Coder<?> valueCoder)
+      throws Exception {
+    BatchModeExecutionContext context = new BatchModeExecutionContext();
+    GroupingShuffleReader groupingShuffleReader = runTestCreateShuffleReader(
+        shuffleReaderConfig, start, end, encoding, context, GroupingShuffleReader.class,
+        "GroupingShuffleSource");
+    Assert.assertArrayEquals(shuffleReaderConfig, groupingShuffleReader.shuffleReaderConfig);
+    Assert.assertEquals(start, groupingShuffleReader.startShufflePosition);
+    Assert.assertEquals(end, groupingShuffleReader.stopShufflePosition);
+
+    Assert.assertEquals(keyCoder, groupingShuffleReader.keyCoder);
+    Assert.assertEquals(valueCoder, groupingShuffleReader.valueCoder);
+    Assert.assertEquals(context, groupingShuffleReader.executionContext);
+  }
+
+  void runTestCreatePartitioningShuffleReader(byte[] shuffleReaderConfig, @Nullable String start,
+      @Nullable String end, CloudObject encoding, Coder<?> keyCoder, Coder<?> windowedValueCoder)
+      throws Exception {
+    PartitioningShuffleReader partitioningShuffleReader =
+        runTestCreateShuffleReader(shuffleReaderConfig, start, end, encoding,
+            new BatchModeExecutionContext(), PartitioningShuffleReader.class,
+            "PartitioningShuffleSource");
+    Assert.assertArrayEquals(shuffleReaderConfig, partitioningShuffleReader.shuffleReaderConfig);
+    Assert.assertEquals(start, partitioningShuffleReader.startShufflePosition);
+    Assert.assertEquals(end, partitioningShuffleReader.stopShufflePosition);
+
+    Assert.assertEquals(keyCoder, partitioningShuffleReader.keyCoder);
+    Assert.assertEquals(windowedValueCoder, partitioningShuffleReader.windowedValueCoder);
+  }
+
+  @Test
+  public void testCreatePlainUngroupedShuffleReader() throws Exception {
+    runTestCreateUngroupedShuffleReader(new byte[] {(byte) 0xE1}, null, null,
+        makeCloudEncoding("StringUtf8Coder"), StringUtf8Coder.of());
+  }
+
+  @Test
+  public void testCreateRichUngroupedShuffleReader() throws Exception {
+    runTestCreateUngroupedShuffleReader(new byte[] {(byte) 0xE2}, "aaa", "zzz",
+        makeCloudEncoding("BigEndianIntegerCoder"), BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testCreatePlainGroupingShuffleReader() throws Exception {
+    runTestCreateGroupingShuffleReader(
+        new byte[] {(byte) 0xE1}, null, null,
+        makeCloudEncoding(
+            FullWindowedValueCoder.class.getName(),
+            makeCloudEncoding("KvCoder", makeCloudEncoding("BigEndianIntegerCoder"),
+                makeCloudEncoding("IterableCoder", makeCloudEncoding("StringUtf8Coder"))),
+            IntervalWindow.getCoder().asCloudObject()),
+        BigEndianIntegerCoder.of(), StringUtf8Coder.of());
+  }
+
+  @Test
+  public void testCreateRichGroupingShuffleReader() throws Exception {
+    runTestCreateGroupingShuffleReader(
+        new byte[] {(byte) 0xE2}, "aaa", "zzz",
+        makeCloudEncoding(
+            FullWindowedValueCoder.class.getName(),
+            makeCloudEncoding(
+                "KvCoder", makeCloudEncoding("BigEndianIntegerCoder"),
+                makeCloudEncoding(
+                    "IterableCoder",
+                    makeCloudEncoding("KvCoder", makeCloudEncoding("StringUtf8Coder"),
+                        makeCloudEncoding("VoidCoder")))),
+            IntervalWindow.getCoder().asCloudObject()),
+        BigEndianIntegerCoder.of(), KvCoder.of(StringUtf8Coder.of(), VoidCoder.of()));
+  }
+
+  @Test
+  public void testCreatePlainPartitioningShuffleReader() throws Exception {
+    runTestCreatePartitioningShuffleReader(
+        new byte[] {(byte) 0xE1}, null, null,
+        makeCloudEncoding(
+            FullWindowedValueCoder.class.getName(),
+            makeCloudEncoding("KvCoder", makeCloudEncoding("BigEndianIntegerCoder"),
+                makeCloudEncoding("StringUtf8Coder")),
+            IntervalWindow.getCoder().asCloudObject()),
+        BigEndianIntegerCoder.of(),
+        FullWindowedValueCoder.of(StringUtf8Coder.of(), IntervalWindow.getCoder()));
+  }
+
+  @Test
+  public void testCreateRichPartitioningShuffleReader() throws Exception {
+    runTestCreatePartitioningShuffleReader(
+        new byte[] {(byte) 0xE2}, "aaa", "zzz",
+        makeCloudEncoding(
+            FullWindowedValueCoder.class.getName(),
+            makeCloudEncoding(
+                "KvCoder", makeCloudEncoding("BigEndianIntegerCoder"),
+                makeCloudEncoding("KvCoder", makeCloudEncoding("StringUtf8Coder"),
+                    makeCloudEncoding("VoidCoder"))),
+            IntervalWindow.getCoder().asCloudObject()),
+        BigEndianIntegerCoder.of(),
+        FullWindowedValueCoder.of(
+            KvCoder.of(StringUtf8Coder.of(), VoidCoder.of()), IntervalWindow.getCoder()));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSourceFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSourceFactoryTest.java
deleted file mode 100644
index 1803b06d7fd1e..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSourceFactoryTest.java
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- * Copyright (C) 2014 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.api.client.util.Base64.encodeBase64String;
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
-
-import org.hamcrest.core.IsInstanceOf;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import javax.annotation.Nullable;
-
-/**
- * Tests for UngroupedShuffleSourceFactory, GroupingShuffleSourceFactory,
- * and PartitioningShuffleSourceFactory.
- */
-@RunWith(JUnit4.class)
-@SuppressWarnings({"rawtypes", "unchecked"})
-public class ShuffleSourceFactoryTest {
-  <T extends Source>
-  T runTestCreateShuffleSource(byte[] shuffleReaderConfig,
-                               @Nullable String start,
-                               @Nullable String end,
-                               CloudObject encoding,
-                               BatchModeExecutionContext context,
-                               Class<T> shuffleSourceClass)
-      throws Exception {
-    CloudObject spec = CloudObject.forClassName(shuffleSourceClass.getSimpleName());
-    addString(spec, "shuffle_reader_config", encodeBase64String(shuffleReaderConfig));
-    if (start != null) {
-      addString(spec, "start_shuffle_position", start);
-    }
-    if (end != null) {
-      addString(spec, "end_shuffle_position", end);
-    }
-
-    com.google.api.services.dataflow.model.Source cloudSource =
-        new com.google.api.services.dataflow.model.Source();
-    cloudSource.setSpec(spec);
-    cloudSource.setCodec(encoding);
-
-    Source<?> source = SourceFactory.create(
-        PipelineOptionsFactory.create(), cloudSource, context);
-    Assert.assertThat(source, new IsInstanceOf(shuffleSourceClass));
-    T shuffleSource = (T) source;
-    return shuffleSource;
-  }
-
-  void runTestCreateUngroupedShuffleSource(byte[] shuffleReaderConfig,
-                                           @Nullable String start,
-                                           @Nullable String end,
-                                           CloudObject encoding,
-                                           Coder<?> coder) throws Exception {
-    UngroupedShuffleSource shuffleSource =
-        runTestCreateShuffleSource(shuffleReaderConfig,
-                                   start,
-                                   end,
-                                   encoding,
-                                   new BatchModeExecutionContext(),
-                                   UngroupedShuffleSource.class);
-    Assert.assertArrayEquals(shuffleReaderConfig,
-                             shuffleSource.shuffleReaderConfig);
-    Assert.assertEquals(start, shuffleSource.startShufflePosition);
-    Assert.assertEquals(end, shuffleSource.stopShufflePosition);
-
-    Assert.assertEquals(coder, shuffleSource.coder);
-  }
-
-  void runTestCreateGroupingShuffleSource(byte[] shuffleReaderConfig,
-                                          @Nullable String start,
-                                          @Nullable String end,
-                                          CloudObject encoding,
-                                          Coder<?> keyCoder,
-                                          Coder<?> valueCoder) throws Exception {
-    BatchModeExecutionContext context = new BatchModeExecutionContext();
-    GroupingShuffleSource shuffleSource =
-        runTestCreateShuffleSource(shuffleReaderConfig,
-                                   start,
-                                   end,
-                                   encoding,
-                                   context,
-                                   GroupingShuffleSource.class);
-    Assert.assertArrayEquals(shuffleReaderConfig,
-                             shuffleSource.shuffleReaderConfig);
-    Assert.assertEquals(start, shuffleSource.startShufflePosition);
-    Assert.assertEquals(end, shuffleSource.stopShufflePosition);
-
-    Assert.assertEquals(keyCoder, shuffleSource.keyCoder);
-    Assert.assertEquals(valueCoder, shuffleSource.valueCoder);
-    Assert.assertEquals(context, shuffleSource.executionContext);
-  }
-
-  void runTestCreatePartitioningShuffleSource(byte[] shuffleReaderConfig,
-                                              @Nullable String start,
-                                              @Nullable String end,
-                                              CloudObject encoding,
-                                              Coder<?> keyCoder,
-                                              Coder<?> windowedValueCoder) throws Exception {
-    PartitioningShuffleSource shuffleSource =
-        runTestCreateShuffleSource(shuffleReaderConfig,
-                                   start,
-                                   end,
-                                   encoding,
-                                   new BatchModeExecutionContext(),
-                                   PartitioningShuffleSource.class);
-    Assert.assertArrayEquals(shuffleReaderConfig,
-                             shuffleSource.shuffleReaderConfig);
-    Assert.assertEquals(start, shuffleSource.startShufflePosition);
-    Assert.assertEquals(end, shuffleSource.stopShufflePosition);
-
-    Assert.assertEquals(keyCoder, shuffleSource.keyCoder);
-    Assert.assertEquals(windowedValueCoder, shuffleSource.windowedValueCoder);
-  }
-
-  @Test
-  public void testCreatePlainUngroupedShuffleSource() throws Exception {
-    runTestCreateUngroupedShuffleSource(
-        new byte[]{(byte) 0xE1}, null, null,
-        makeCloudEncoding("StringUtf8Coder"),
-        StringUtf8Coder.of());
-  }
-
-  @Test
-  public void testCreateRichUngroupedShuffleSource() throws Exception {
-    runTestCreateUngroupedShuffleSource(
-        new byte[]{(byte) 0xE2}, "aaa", "zzz",
-        makeCloudEncoding("BigEndianIntegerCoder"),
-        BigEndianIntegerCoder.of());
-  }
-
-  @Test
-  public void testCreatePlainGroupingShuffleSource() throws Exception {
-    runTestCreateGroupingShuffleSource(
-        new byte[]{(byte) 0xE1}, null, null,
-        makeCloudEncoding(
-            FullWindowedValueCoder.class.getName(),
-            makeCloudEncoding(
-                "KvCoder",
-                makeCloudEncoding("BigEndianIntegerCoder"),
-                makeCloudEncoding(
-                    "IterableCoder",
-                    makeCloudEncoding("StringUtf8Coder"))),
-            IntervalWindow.getCoder().asCloudObject()),
-        BigEndianIntegerCoder.of(),
-        StringUtf8Coder.of());
-  }
-
-  @Test
-  public void testCreateRichGroupingShuffleSource() throws Exception {
-    runTestCreateGroupingShuffleSource(
-        new byte[]{(byte) 0xE2}, "aaa", "zzz",
-        makeCloudEncoding(
-            FullWindowedValueCoder.class.getName(),
-            makeCloudEncoding(
-                "KvCoder",
-                makeCloudEncoding("BigEndianIntegerCoder"),
-                makeCloudEncoding(
-                    "IterableCoder",
-                    makeCloudEncoding(
-                        "KvCoder",
-                        makeCloudEncoding("StringUtf8Coder"),
-                        makeCloudEncoding("VoidCoder")))),
-            IntervalWindow.getCoder().asCloudObject()),
-        BigEndianIntegerCoder.of(),
-        KvCoder.of(StringUtf8Coder.of(), VoidCoder.of()));
-  }
-
-  @Test
-  public void testCreatePlainPartitioningShuffleSource() throws Exception {
-    runTestCreatePartitioningShuffleSource(
-        new byte[]{(byte) 0xE1}, null, null,
-        makeCloudEncoding(
-            FullWindowedValueCoder.class.getName(),
-            makeCloudEncoding(
-                "KvCoder",
-                makeCloudEncoding("BigEndianIntegerCoder"),
-                makeCloudEncoding("StringUtf8Coder")),
-            IntervalWindow.getCoder().asCloudObject()),
-        BigEndianIntegerCoder.of(),
-        FullWindowedValueCoder.of(StringUtf8Coder.of(), IntervalWindow.getCoder()));
-  }
-
-  @Test
-  public void testCreateRichPartitioningShuffleSource() throws Exception {
-    runTestCreatePartitioningShuffleSource(
-        new byte[]{(byte) 0xE2}, "aaa", "zzz",
-        makeCloudEncoding(
-            FullWindowedValueCoder.class.getName(),
-            makeCloudEncoding(
-                "KvCoder",
-                makeCloudEncoding("BigEndianIntegerCoder"),
-                makeCloudEncoding(
-                    "KvCoder",
-                    makeCloudEncoding("StringUtf8Coder"),
-                    makeCloudEncoding("VoidCoder"))),
-            IntervalWindow.getCoder().asCloudObject()),
-        BigEndianIntegerCoder.of(),
-        FullWindowedValueCoder.of(KvCoder.of(StringUtf8Coder.of(), VoidCoder.of()),
-                                  IntervalWindow.getCoder()));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java
index ea879335ec02a..343e172715ad3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java
@@ -24,6 +24,7 @@
 import static org.junit.Assert.assertThat;
 
 import com.google.api.services.dataflow.model.SideInputInfo;
+import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
@@ -41,28 +42,23 @@
  */
 @RunWith(JUnit4.class)
 public class SideInputUtilsTest {
-  SideInputInfo createSingletonSideInputInfo(
-      com.google.api.services.dataflow.model.Source sideInputSource) {
+  SideInputInfo createSingletonSideInputInfo(Source sideInputSource) {
     SideInputInfo sideInputInfo = new SideInputInfo();
     sideInputInfo.setSources(Arrays.asList(sideInputSource));
     sideInputInfo.setKind(CloudObject.forClassName("singleton"));
     return sideInputInfo;
   }
 
-  SideInputInfo createCollectionSideInputInfo(
-      com.google.api.services.dataflow.model.Source... sideInputSources) {
+  SideInputInfo createCollectionSideInputInfo(Source... sideInputSources) {
     SideInputInfo sideInputInfo = new SideInputInfo();
     sideInputInfo.setSources(Arrays.asList(sideInputSources));
     sideInputInfo.setKind(CloudObject.forClassName("collection"));
     return sideInputInfo;
   }
 
-  com.google.api.services.dataflow.model.Source createSideInputSource(Integer... ints)
-      throws Exception {
-    return InMemorySourceFactoryTest.createInMemoryCloudSource(
-        Arrays.asList(ints),
-        null, null,
-        BigEndianIntegerCoder.of());
+  Source createSideInputSource(Integer... ints) throws Exception {
+    return InMemoryReaderFactoryTest.createInMemoryCloudSource(
+        Arrays.asList(ints), null, null, BigEndianIntegerCoder.of());
   }
 
   void assertThatContains(Object actual, Object... expected) {
@@ -77,52 +73,41 @@ void assertThatContains(Object actual, Object... expected) {
 
   @Test
   public void testReadSingletonSideInput() throws Exception {
-    SideInputInfo sideInputInfo =
-        createSingletonSideInputInfo(createSideInputSource(42));
+    SideInputInfo sideInputInfo = createSingletonSideInputInfo(createSideInputSource(42));
 
-    assertEquals(42,
-                 SideInputUtils.readSideInput(PipelineOptionsFactory.create(),
-                                              sideInputInfo,
-                                              new BatchModeExecutionContext()));
+    assertEquals(
+        42,
+        SideInputUtils.readSideInput(
+            PipelineOptionsFactory.create(), sideInputInfo, new BatchModeExecutionContext()));
   }
 
   @Test
   public void testReadEmptyCollectionSideInput() throws Exception {
-    SideInputInfo sideInputInfo =
-        createCollectionSideInputInfo(createSideInputSource());
+    SideInputInfo sideInputInfo = createCollectionSideInputInfo(createSideInputSource());
 
-    assertThatContains(
-        SideInputUtils.readSideInput(PipelineOptionsFactory.create(),
-                                     sideInputInfo,
-                                     new BatchModeExecutionContext()));
+    assertThatContains(SideInputUtils.readSideInput(
+        PipelineOptionsFactory.create(), sideInputInfo, new BatchModeExecutionContext()));
   }
 
   @Test
   public void testReadCollectionSideInput() throws Exception {
-    SideInputInfo sideInputInfo =
-        createCollectionSideInputInfo(createSideInputSource(3, 4, 5, 6));
+    SideInputInfo sideInputInfo = createCollectionSideInputInfo(createSideInputSource(3, 4, 5, 6));
 
     assertThatContains(
-        SideInputUtils.readSideInput(PipelineOptionsFactory.create(),
-                                     sideInputInfo,
-                                     new BatchModeExecutionContext()),
+        SideInputUtils.readSideInput(
+            PipelineOptionsFactory.create(), sideInputInfo, new BatchModeExecutionContext()),
         3, 4, 5, 6);
   }
 
   @Test
   public void testReadCollectionShardedSideInput() throws Exception {
     SideInputInfo sideInputInfo =
-        createCollectionSideInputInfo(
-            createSideInputSource(3),
-            createSideInputSource(),
-            createSideInputSource(4, 5),
-            createSideInputSource(6),
-            createSideInputSource());
+        createCollectionSideInputInfo(createSideInputSource(3), createSideInputSource(),
+            createSideInputSource(4, 5), createSideInputSource(6), createSideInputSource());
 
     assertThatContains(
-        SideInputUtils.readSideInput(PipelineOptionsFactory.create(),
-                                     sideInputInfo,
-                                     new BatchModeExecutionContext()),
+        SideInputUtils.readSideInput(
+            PipelineOptionsFactory.create(), sideInputInfo, new BatchModeExecutionContext()),
         3, 4, 5, 6);
   }
 
@@ -131,15 +116,13 @@ public void testReadSingletonSideInputValue() throws Exception {
     CloudObject sideInputKind = CloudObject.forClassName("singleton");
     Object elem = "hi";
     List<Object> elems = Arrays.asList(elem);
-    assertEquals(elem,
-                 SideInputUtils.readSideInputValue(sideInputKind, elems));
+    assertEquals(elem, SideInputUtils.readSideInputValue(sideInputKind, elems));
   }
 
   @Test
   public void testReadCollectionSideInputValue() throws Exception {
     CloudObject sideInputKind = CloudObject.forClassName("collection");
     List<Object> elems = Arrays.<Object>asList("hi", "there", "bob");
-    assertEquals(elems,
-                 SideInputUtils.readSideInputValue(sideInputKind, elems));
+    assertEquals(elems, SideInputUtils.readSideInputValue(sideInputKind, elems));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
similarity index 56%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceFactoryTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
index 05d8a721fefb3..160eace961a72 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
@@ -21,13 +21,14 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
 import static com.google.cloud.dataflow.sdk.util.Structs.addString;
 
+import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.hamcrest.core.IsInstanceOf;
 import org.junit.Assert;
@@ -38,16 +39,12 @@
 import javax.annotation.Nullable;
 
 /**
- * Tests for TextSourceFactory.
+ * Tests for TextReaderFactory.
  */
 @RunWith(JUnit4.class)
-public class TextSourceFactoryTest {
-  void runTestCreateTextSource(String filename,
-                               @Nullable Boolean stripTrailingNewlines,
-                               @Nullable Long start,
-                               @Nullable Long end,
-                               CloudObject encoding,
-                               Coder<?> coder)
+public class TextReaderFactoryTest {
+  void runTestCreateTextReader(String filename, @Nullable Boolean stripTrailingNewlines,
+      @Nullable Long start, @Nullable Long end, CloudObject encoding, Coder<?> coder)
       throws Exception {
     CloudObject spec = CloudObject.forClassName("TextSource");
     addString(spec, "filename", filename);
@@ -61,38 +58,32 @@ void runTestCreateTextSource(String filename,
       addLong(spec, "end_offset", end);
     }
 
-    com.google.api.services.dataflow.model.Source cloudSource =
-        new com.google.api.services.dataflow.model.Source();
+    Source cloudSource = new Source();
     cloudSource.setSpec(spec);
     cloudSource.setCodec(encoding);
 
-    Source<?> source = SourceFactory.create(PipelineOptionsFactory.create(),
-                                            cloudSource,
-                                            new BatchModeExecutionContext());
-    Assert.assertThat(source, new IsInstanceOf(TextSource.class));
-    TextSource<?> textSource = (TextSource<?>) source;
-    Assert.assertEquals(filename, textSource.filename);
+    Reader<?> reader = ReaderFactory.create(
+        PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext());
+    Assert.assertThat(reader, new IsInstanceOf(TextReader.class));
+    TextReader textReader = (TextReader<?>) reader;
+    Assert.assertEquals(filename, textReader.filename);
     Assert.assertEquals(
         stripTrailingNewlines == null ? true : stripTrailingNewlines,
-        textSource.stripTrailingNewlines);
-    Assert.assertEquals(start, textSource.startPosition);
-    Assert.assertEquals(end, textSource.endPosition);
-    Assert.assertEquals(coder, textSource.coder);
+        textReader.stripTrailingNewlines);
+    Assert.assertEquals(start, textReader.startPosition);
+    Assert.assertEquals(end, textReader.endPosition);
+    Assert.assertEquals(coder, textReader.coder);
   }
 
   @Test
-  public void testCreatePlainTextSource() throws Exception {
-    runTestCreateTextSource(
-        "/path/to/file.txt", null, null, null,
-        makeCloudEncoding("StringUtf8Coder"),
-        StringUtf8Coder.of());
+  public void testCreatePlainTextReader() throws Exception {
+    runTestCreateTextReader("/path/to/file.txt", null, null, null,
+        makeCloudEncoding("StringUtf8Coder"), StringUtf8Coder.of());
   }
 
   @Test
-  public void testCreateRichTextSource() throws Exception {
-    runTestCreateTextSource(
-        "gs://bucket/path/to/file2.txt", false, 200L, 500L,
-        makeCloudEncoding("TextualIntegerCoder"),
-        TextualIntegerCoder.of());
+  public void testCreateRichTextReader() throws Exception {
+    runTestCreateTextReader("gs://bucket/path/to/file2.txt", false, 200L, 500L,
+        makeCloudEncoding("TextualIntegerCoder"), TextualIntegerCoder.of());
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
similarity index 57%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
index 87f8379479ad8..4ebd671361eda 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
@@ -16,7 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
 import static org.hamcrest.Matchers.greaterThan;
@@ -27,7 +27,7 @@
 import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.junit.Assert;
 import org.junit.Rule;
@@ -46,13 +46,11 @@
 import java.util.List;
 
 /**
- * Tests for TextSource.
+ * Tests for TextReader.
  */
 @RunWith(JUnit4.class)
-public class TextSourceTest {
-  private static final String[] fileContent = {"First line\n",
-                                               "Second line\r\n",
-                                               "Third line"};
+public class TextReaderTest {
+  private static final String[] fileContent = {"First line\n", "Second line\r\n", "Third line"};
   private static final long TOTAL_BYTES_COUNT;
 
   static {
@@ -63,7 +61,8 @@ public class TextSourceTest {
     TOTAL_BYTES_COUNT = sumLen;
   }
 
-  @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
 
   private File initTestFile() throws IOException {
     File tmpFile = tmpFolder.newFile();
@@ -78,9 +77,9 @@ private File initTestFile() throws IOException {
 
   @Test
   public void testReadEmptyFile() throws Exception {
-    TextSource<String> textSource = new TextSource<>(
-        "/dev/null", true, null, null, StringUtf8Coder.of());
-    try (Source.SourceIterator<String> iterator = textSource.iterator()) {
+    TextReader<String> textReader =
+        new TextReader<>("/dev/null", true, null, null, StringUtf8Coder.of());
+    try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
       Assert.assertFalse(iterator.hasNext());
     }
   }
@@ -96,13 +95,13 @@ public void testStrippedNewlines() throws Exception {
   public void testStrippedNewlinesAtEndOfReadBuffer() throws Exception {
     boolean stripNewLines = true;
     StringBuilder payload = new StringBuilder();
-    for (int i = 0; i < TextSource.BUF_SIZE - 2; ++i) {
+    for (int i = 0; i < TextReader.BUF_SIZE - 2; ++i) {
       payload.append('a');
     }
     String[] lines = {payload.toString(), payload.toString()};
-    testStringPayload(lines , "\r", stripNewLines);
-    testStringPayload(lines , "\r\n", stripNewLines);
-    testStringPayload(lines , "\n", stripNewLines);
+    testStringPayload(lines, "\r", stripNewLines);
+    testStringPayload(lines, "\r\n", stripNewLines);
+    testStringPayload(lines, "\n", stripNewLines);
   }
 
   @Test
@@ -116,13 +115,13 @@ public void testUnstrippedNewlines() throws Exception {
   public void testUnstrippedNewlinesAtEndOfReadBuffer() throws Exception {
     boolean stripNewLines = false;
     StringBuilder payload = new StringBuilder();
-    for (int i = 0; i < TextSource.BUF_SIZE - 2; ++i) {
+    for (int i = 0; i < TextReader.BUF_SIZE - 2; ++i) {
       payload.append('a');
     }
     String[] lines = {payload.toString(), payload.toString()};
-    testStringPayload(lines , "\r", stripNewLines);
-    testStringPayload(lines , "\r\n", stripNewLines);
-    testStringPayload(lines , "\n", stripNewLines);
+    testStringPayload(lines, "\r", stripNewLines);
+    testStringPayload(lines, "\r\n", stripNewLines);
+    testStringPayload(lines, "\n", stripNewLines);
   }
 
   @Test
@@ -130,12 +129,12 @@ public void testStartPosition() throws Exception {
     File tmpFile = initTestFile();
 
     {
-      TextSource<String> textSource = new TextSource<>(
-          tmpFile.getPath(), false, 11L, null, StringUtf8Coder.of());
-      ExecutorTestUtils.TestSourceObserver observer =
-          new ExecutorTestUtils.TestSourceObserver(textSource);
+      TextReader<String> textReader =
+          new TextReader<>(tmpFile.getPath(), false, 11L, null, StringUtf8Coder.of());
+      ExecutorTestUtils.TestReaderObserver observer =
+          new ExecutorTestUtils.TestReaderObserver(textReader);
 
-      try (Source.SourceIterator<String> iterator = textSource.iterator()) {
+      try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
         Assert.assertEquals("Second line\r\n", iterator.next());
         Assert.assertEquals("Third line", iterator.next());
         Assert.assertFalse(iterator.hasNext());
@@ -146,12 +145,12 @@ public void testStartPosition() throws Exception {
     }
 
     {
-      TextSource<String> textSource = new TextSource<>(
-          tmpFile.getPath(), false, 20L, null, StringUtf8Coder.of());
-      ExecutorTestUtils.TestSourceObserver observer =
-          new ExecutorTestUtils.TestSourceObserver(textSource);
+      TextReader<String> textReader =
+          new TextReader<>(tmpFile.getPath(), false, 20L, null, StringUtf8Coder.of());
+      ExecutorTestUtils.TestReaderObserver observer =
+          new ExecutorTestUtils.TestReaderObserver(textReader);
 
-      try (Source.SourceIterator<String> iterator = textSource.iterator()) {
+      try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
         Assert.assertEquals("Third line", iterator.next());
         Assert.assertFalse(iterator.hasNext());
         // The first '5' in the array represents the reading of a portion of the second
@@ -161,12 +160,12 @@ public void testStartPosition() throws Exception {
     }
 
     {
-      TextSource<String> textSource = new TextSource<>(
-          tmpFile.getPath(), true, 0L, 20L, StringUtf8Coder.of());
-      ExecutorTestUtils.TestSourceObserver observer =
-          new ExecutorTestUtils.TestSourceObserver(textSource);
+      TextReader<String> textReader =
+          new TextReader<>(tmpFile.getPath(), true, 0L, 20L, StringUtf8Coder.of());
+      ExecutorTestUtils.TestReaderObserver observer =
+          new ExecutorTestUtils.TestReaderObserver(textReader);
 
-      try (Source.SourceIterator<String> iterator = textSource.iterator()) {
+      try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
         Assert.assertEquals("First line", iterator.next());
         Assert.assertEquals("Second line", iterator.next());
         Assert.assertFalse(iterator.hasNext());
@@ -175,12 +174,12 @@ public void testStartPosition() throws Exception {
     }
 
     {
-      TextSource<String> textSource = new TextSource<>(
-          tmpFile.getPath(), true, 1L, 20L, StringUtf8Coder.of());
-      ExecutorTestUtils.TestSourceObserver observer =
-          new ExecutorTestUtils.TestSourceObserver(textSource);
+      TextReader<String> textReader =
+          new TextReader<>(tmpFile.getPath(), true, 1L, 20L, StringUtf8Coder.of());
+      ExecutorTestUtils.TestReaderObserver observer =
+          new ExecutorTestUtils.TestReaderObserver(textReader);
 
-      try (Source.SourceIterator<String> iterator = textSource.iterator()) {
+      try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
         Assert.assertEquals("Second line", iterator.next());
         Assert.assertFalse(iterator.hasNext());
         // The first '11' in the array represents the reading of the entire first
@@ -196,22 +195,21 @@ public void testUtf8Handling() throws Exception {
     FileOutputStream output = new FileOutputStream(tmpFile);
     // first line:  €\n
     // second line: ¢\n
-    output.write(new byte[]{(byte) 0xE2, (byte) 0x82, (byte) 0xAC, '\n',
-        (byte) 0xC2, (byte) 0xA2, '\n'});
+    output.write(
+        new byte[] {(byte) 0xE2, (byte) 0x82, (byte) 0xAC, '\n', (byte) 0xC2, (byte) 0xA2, '\n'});
     output.close();
 
     {
       // 3L is after the first line if counting codepoints, but within
       // the first line if counting chars.  So correct behavior is to return
       // just one line, since offsets are in chars, not codepoints.
-      TextSource<String> textSource = new TextSource<>(
-          tmpFile.getPath(), true, 0L, 3L, StringUtf8Coder.of());
-      ExecutorTestUtils.TestSourceObserver observer =
-          new ExecutorTestUtils.TestSourceObserver(textSource);
-
-      try (Source.SourceIterator<String> iterator = textSource.iterator()) {
-        Assert.assertArrayEquals("€".getBytes("UTF-8"),
-            iterator.next().getBytes("UTF-8"));
+      TextReader<String> textReader =
+          new TextReader<>(tmpFile.getPath(), true, 0L, 3L, StringUtf8Coder.of());
+      ExecutorTestUtils.TestReaderObserver observer =
+          new ExecutorTestUtils.TestReaderObserver(textReader);
+
+      try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
+        Assert.assertArrayEquals("€".getBytes("UTF-8"), iterator.next().getBytes("UTF-8"));
         Assert.assertFalse(iterator.hasNext());
         Assert.assertEquals(Arrays.asList(4), observer.getActualSizes());
       }
@@ -220,14 +218,13 @@ public void testUtf8Handling() throws Exception {
     {
       // Starting location is mid-way into a codepoint.
       // Ensures we don't fail when skipping over an incomplete codepoint.
-      TextSource<String> textSource = new TextSource<>(
-          tmpFile.getPath(), true, 2L, null, StringUtf8Coder.of());
-      ExecutorTestUtils.TestSourceObserver observer =
-          new ExecutorTestUtils.TestSourceObserver(textSource);
-
-      try (Source.SourceIterator<String> iterator = textSource.iterator()) {
-        Assert.assertArrayEquals("¢".getBytes("UTF-8"),
-            iterator.next().getBytes("UTF-8"));
+      TextReader<String> textReader =
+          new TextReader<>(tmpFile.getPath(), true, 2L, null, StringUtf8Coder.of());
+      ExecutorTestUtils.TestReaderObserver observer =
+          new ExecutorTestUtils.TestReaderObserver(textReader);
+
+      try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
+        Assert.assertArrayEquals("¢".getBytes("UTF-8"), iterator.next().getBytes("UTF-8"));
         Assert.assertFalse(iterator.hasNext());
         // The first '3' in the array represents the reading of a portion of the first
         // line, which had to be read to find the beginning of the second line.
@@ -236,20 +233,10 @@ public void testUtf8Handling() throws Exception {
     }
   }
 
-  private void testNewlineHandling(String separator, boolean stripNewlines)
-      throws Exception {
+  private void testNewlineHandling(String separator, boolean stripNewlines) throws Exception {
     File tmpFile = tmpFolder.newFile();
-    PrintStream writer =
-        new PrintStream(
-            new FileOutputStream(tmpFile));
-    List<String> expected = Arrays.asList(
-        "",
-        "  hi there  ",
-        "bob",
-        "",
-        "  ",
-        "--zowie!--",
-        "");
+    PrintStream writer = new PrintStream(new FileOutputStream(tmpFile));
+    List<String> expected = Arrays.asList("", "  hi there  ", "bob", "", "  ", "--zowie!--", "");
     List<Integer> expectedSizes = new ArrayList<>();
     for (String line : expected) {
       writer.print(line);
@@ -258,13 +245,13 @@ private void testNewlineHandling(String separator, boolean stripNewlines)
     }
     writer.close();
 
-    TextSource<String> textSource = new TextSource<>(
-        tmpFile.getPath(), stripNewlines, null, null, StringUtf8Coder.of());
-    ExecutorTestUtils.TestSourceObserver observer =
-        new ExecutorTestUtils.TestSourceObserver(textSource);
+    TextReader<String> textReader =
+        new TextReader<>(tmpFile.getPath(), stripNewlines, null, null, StringUtf8Coder.of());
+    ExecutorTestUtils.TestReaderObserver observer =
+        new ExecutorTestUtils.TestReaderObserver(textReader);
 
     List<String> actual = new ArrayList<>();
-    try (Source.SourceIterator<String> iterator = textSource.iterator()) {
+    try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
       while (iterator.hasNext()) {
         actual.add(iterator.next());
       }
@@ -283,14 +270,11 @@ private void testNewlineHandling(String separator, boolean stripNewlines)
     Assert.assertEquals(expectedSizes, observer.getActualSizes());
   }
 
-  private void testStringPayload(
-      String[] lines, String separator, boolean stripNewlines)
+  private void testStringPayload(String[] lines, String separator, boolean stripNewlines)
       throws Exception {
     File tmpFile = tmpFolder.newFile();
     List<String> expected = new ArrayList<>();
-    PrintStream writer =
-        new PrintStream(
-            new FileOutputStream(tmpFile));
+    PrintStream writer = new PrintStream(new FileOutputStream(tmpFile));
     for (String line : lines) {
       writer.print(line);
       writer.print(separator);
@@ -298,31 +282,29 @@ private void testStringPayload(
     }
     writer.close();
 
-    TextSource<String> textSource = new TextSource<>(
-        tmpFile.getPath(), stripNewlines, null, null, StringUtf8Coder.of());
-    ExecutorTestUtils.TestSourceObserver observer =
-        new ExecutorTestUtils.TestSourceObserver(textSource);
+    TextReader<String> textReader =
+        new TextReader<>(tmpFile.getPath(), stripNewlines, null, null, StringUtf8Coder.of());
+    ExecutorTestUtils.TestReaderObserver observer =
+        new ExecutorTestUtils.TestReaderObserver(textReader);
 
     List<String> actual = new ArrayList<>();
-    try (Source.SourceIterator<String> iterator = textSource.iterator()) {
-        while (iterator.hasNext()) {
-          actual.add(iterator.next());
-        }
+    try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
+      while (iterator.hasNext()) {
+        actual.add(iterator.next());
       }
+    }
     Assert.assertEquals(expected, actual);
   }
 
   @Test
-  public void testCloneIteratorWithEndPositionAndFinalBytesInBuffer()
-      throws Exception {
+  public void testCloneIteratorWithEndPositionAndFinalBytesInBuffer() throws Exception {
     String line = "a\n";
     boolean stripNewlines = false;
     File tmpFile = tmpFolder.newFile();
     List<String> expected = new ArrayList<>();
     PrintStream writer = new PrintStream(new FileOutputStream(tmpFile));
     // Write 5x the size of the buffer and 10 extra trailing bytes
-    for (long bytesWritten = 0;
-         bytesWritten < TextSource.BUF_SIZE * 3 + 10; ) {
+    for (long bytesWritten = 0; bytesWritten < TextReader.BUF_SIZE * 3 + 10;) {
       writer.print(line);
       expected.add(line);
       bytesWritten += line.length();
@@ -330,12 +312,11 @@ public void testCloneIteratorWithEndPositionAndFinalBytesInBuffer()
     writer.close();
     Long fileSize = tmpFile.length();
 
-    TextSource<String> textSource = new TextSource<>(
-        tmpFile.getPath(), stripNewlines,
-        null, fileSize, StringUtf8Coder.of());
+    TextReader<String> textReader =
+        new TextReader<>(tmpFile.getPath(), stripNewlines, null, fileSize, StringUtf8Coder.of());
 
     List<String> actual = new ArrayList<>();
-    Source.SourceIterator<String> iterator = textSource.iterator();
+    Reader.ReaderIterator<String> iterator = textReader.iterator();
     while (iterator.hasNext()) {
       actual.add(iterator.next());
       iterator = iterator.copy();
@@ -346,27 +327,24 @@ public void testCloneIteratorWithEndPositionAndFinalBytesInBuffer()
   @Test
   public void testNonStringCoders() throws Exception {
     File tmpFile = tmpFolder.newFile();
-    PrintStream writer =
-        new PrintStream(
-            new FileOutputStream(tmpFile));
+    PrintStream writer = new PrintStream(new FileOutputStream(tmpFile));
     List<Integer> expected = TestUtils.INTS;
     List<Integer> expectedSizes = new ArrayList<>();
     for (Integer elem : expected) {
-      byte[] encodedElem =
-          CoderUtils.encodeToByteArray(TextualIntegerCoder.of(), elem);
+      byte[] encodedElem = CoderUtils.encodeToByteArray(TextualIntegerCoder.of(), elem);
       writer.print(elem);
       writer.print("\n");
       expectedSizes.add(1 + encodedElem.length);
     }
     writer.close();
 
-    TextSource<Integer> textSource = new TextSource<>(
-        tmpFile.getPath(), true, null, null, TextualIntegerCoder.of());
-    ExecutorTestUtils.TestSourceObserver observer =
-        new ExecutorTestUtils.TestSourceObserver(textSource);
+    TextReader<Integer> textReader =
+        new TextReader<>(tmpFile.getPath(), true, null, null, TextualIntegerCoder.of());
+    ExecutorTestUtils.TestReaderObserver observer =
+        new ExecutorTestUtils.TestReaderObserver(textReader);
 
     List<Integer> actual = new ArrayList<>();
-    try (Source.SourceIterator<Integer> iterator = textSource.iterator()) {
+    try (Reader.ReaderIterator<Integer> iterator = textReader.iterator()) {
       while (iterator.hasNext()) {
         actual.add(iterator.next());
       }
@@ -379,34 +357,29 @@ public void testNonStringCoders() throws Exception {
   @Test
   public void testGetApproximatePosition() throws Exception {
     File tmpFile = initTestFile();
-    TextSource<String> textSource = new TextSource<>(
-        tmpFile.getPath(), false, 0L, null, StringUtf8Coder.of());
-
-    try (Source.SourceIterator<String> iterator = textSource.iterator()) {
-      ApproximateProgress progress =
-          sourceProgressToCloudProgress(iterator.getProgress());
-      Assert.assertEquals(0L,
-          progress.getPosition().getByteOffset().longValue());
+    TextReader<String> textReader =
+        new TextReader<>(tmpFile.getPath(), false, 0L, null, StringUtf8Coder.of());
+
+    try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
+      ApproximateProgress progress = sourceProgressToCloudProgress(iterator.getProgress());
+      Assert.assertEquals(0L, progress.getPosition().getByteOffset().longValue());
       iterator.next();
       progress = sourceProgressToCloudProgress(iterator.getProgress());
-      Assert.assertEquals(11L,
-          progress.getPosition().getByteOffset().longValue());
+      Assert.assertEquals(11L, progress.getPosition().getByteOffset().longValue());
       iterator.next();
       progress = sourceProgressToCloudProgress(iterator.getProgress());
-      Assert.assertEquals(24L,
-          progress.getPosition().getByteOffset().longValue());
+      Assert.assertEquals(24L, progress.getPosition().getByteOffset().longValue());
       iterator.next();
       progress = sourceProgressToCloudProgress(iterator.getProgress());
-      Assert.assertEquals(34L,
-          progress.getPosition().getByteOffset().longValue());
+      Assert.assertEquals(34L, progress.getPosition().getByteOffset().longValue());
       Assert.assertFalse(iterator.hasNext());
     }
   }
 
   @Test
   public void testUpdateStopPosition() throws Exception {
-    final long end = 10L;  // in the first line
-    final long stop = 14L;  // in the middle of the second line
+    final long end = 10L; // in the first line
+    final long stop = 14L; // in the middle of the second line
     File tmpFile = initTestFile();
 
     com.google.api.services.dataflow.model.Position proposedStopPosition =
@@ -414,91 +387,88 @@ public void testUpdateStopPosition() throws Exception {
 
     // Illegal proposed stop position, no update.
     {
-      TextSource<String> textSource = new TextSource<>(
-          tmpFile.getPath(), false, null, null,
-          StringUtf8Coder.of());
-      ExecutorTestUtils.TestSourceObserver observer =
-          new ExecutorTestUtils.TestSourceObserver(textSource);
-
-      try (TextSource<String>.TextFileIterator iterator =
-          (TextSource<String>.TextFileIterator) textSource.iterator()) {
-          Assert.assertNull(iterator.updateStopPosition(
-              cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
-        }
+      TextReader<String> textReader =
+          new TextReader<>(tmpFile.getPath(), false, null, null, StringUtf8Coder.of());
+      ExecutorTestUtils.TestReaderObserver observer =
+          new ExecutorTestUtils.TestReaderObserver(textReader);
+
+      try (TextReader<String>.TextFileIterator iterator =
+          (TextReader<String>.TextFileIterator) textReader.iterator()) {
+        Assert.assertNull(iterator.updateStopPosition(
+            cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))));
+      }
     }
 
     proposedStopPosition.setByteOffset(stop);
 
     // Successful update.
     {
-      TextSource<String> textSource = new TextSource<>(
-          tmpFile.getPath(), false, null, null,
-          StringUtf8Coder.of());
-      ExecutorTestUtils.TestSourceObserver observer =
-          new ExecutorTestUtils.TestSourceObserver(textSource);
-
-      try (TextSource<String>.TextFileIterator iterator =
-          (TextSource<String>.TextFileIterator) textSource.iterator()) {
+      TextReader<String> textReader =
+          new TextReader<>(tmpFile.getPath(), false, null, null, StringUtf8Coder.of());
+      ExecutorTestUtils.TestReaderObserver observer =
+          new ExecutorTestUtils.TestReaderObserver(textReader);
+
+      try (TextReader<String>.TextFileIterator iterator =
+          (TextReader<String>.TextFileIterator) textReader.iterator()) {
         Assert.assertNull(iterator.getEndOffset());
         Assert.assertEquals(
             stop,
             sourcePositionToCloudPosition(
                 iterator.updateStopPosition(
-                    cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))))
-            .getByteOffset().longValue());
+                    cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))))
+                .getByteOffset()
+                .longValue());
         Assert.assertEquals(stop, iterator.getEndOffset().longValue());
         Assert.assertEquals(fileContent[0], iterator.next());
         Assert.assertEquals(fileContent[1], iterator.next());
         Assert.assertFalse(iterator.hasNext());
-        Assert.assertEquals(Arrays.asList(fileContent[0].length(),
-                                          fileContent[1].length()),
-                            observer.getActualSizes());
+        Assert.assertEquals(
+            Arrays.asList(fileContent[0].length(), fileContent[1].length()),
+            observer.getActualSizes());
       }
     }
 
     // Proposed stop position is before the current position, no update.
     {
-      TextSource<String> textSource = new TextSource<>(
-          tmpFile.getPath(), false, null, null,
-          StringUtf8Coder.of());
-      ExecutorTestUtils.TestSourceObserver observer =
-          new ExecutorTestUtils.TestSourceObserver(textSource);
-
-      try (TextSource<String>.TextFileIterator iterator =
-          (TextSource<String>.TextFileIterator) textSource.iterator()) {
+      TextReader<String> textReader =
+          new TextReader<>(tmpFile.getPath(), false, null, null, StringUtf8Coder.of());
+      ExecutorTestUtils.TestReaderObserver observer =
+          new ExecutorTestUtils.TestReaderObserver(textReader);
+
+      try (TextReader<String>.TextFileIterator iterator =
+          (TextReader<String>.TextFileIterator) textReader.iterator()) {
         Assert.assertEquals(fileContent[0], iterator.next());
         Assert.assertEquals(fileContent[1], iterator.next());
-        Assert.assertThat(sourceProgressToCloudProgress(iterator.getProgress())
-                          .getPosition().getByteOffset(),
+        Assert.assertThat(
+            sourceProgressToCloudProgress(iterator.getProgress()).getPosition().getByteOffset(),
             greaterThan(stop));
         Assert.assertNull(iterator.updateStopPosition(
-            cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
+            cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))));
         Assert.assertNull(iterator.getEndOffset());
         Assert.assertTrue(iterator.hasNext());
         Assert.assertEquals(fileContent[2], iterator.next());
-        Assert.assertEquals(Arrays.asList(fileContent[0].length(),
-                                          fileContent[1].length(),
-                                          fileContent[2].length()),
-                            observer.getActualSizes());
+        Assert.assertEquals(
+            Arrays.asList(
+                fileContent[0].length(), fileContent[1].length(), fileContent[2].length()),
+            observer.getActualSizes());
       }
     }
 
     // Proposed stop position is after the current stop (end) position, no update.
     {
-      TextSource<String> textSource = new TextSource<>(
-          tmpFile.getPath(), false, null, end, StringUtf8Coder.of());
-      ExecutorTestUtils.TestSourceObserver observer =
-          new ExecutorTestUtils.TestSourceObserver(textSource);
+      TextReader<String> textReader =
+          new TextReader<>(tmpFile.getPath(), false, null, end, StringUtf8Coder.of());
+      ExecutorTestUtils.TestReaderObserver observer =
+          new ExecutorTestUtils.TestReaderObserver(textReader);
 
-      try (TextSource<String>.TextFileIterator iterator =
-          (TextSource<String>.TextFileIterator) textSource.iterator()) {
+      try (TextReader<String>.TextFileIterator iterator =
+          (TextReader<String>.TextFileIterator) textReader.iterator()) {
         Assert.assertEquals(fileContent[0], iterator.next());
         Assert.assertNull(iterator.updateStopPosition(
-            cloudProgressToSourceProgress(createApproximateProgress(proposedStopPosition))));
+            cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))));
         Assert.assertEquals(end, iterator.getEndOffset().longValue());
         Assert.assertFalse(iterator.hasNext());
-        Assert.assertEquals(Arrays.asList(fileContent[0].length()),
-                            observer.getActualSizes());
+        Assert.assertEquals(Arrays.asList(fileContent[0].length()), observer.getActualSizes());
       }
     }
   }
@@ -513,8 +483,7 @@ public void testUpdateStopPositionExhaustive() throws Exception {
     for (long start = 0; start < TOTAL_BYTES_COUNT - 1; start++) {
       for (long end = start + 1; end < TOTAL_BYTES_COUNT; end++) {
         for (long stop = start; stop <= end; stop++) {
-          stopPositionTestInternal(start, end,
-              stop, tmpFile);
+          stopPositionTestInternal(start, end, stop, tmpFile);
         }
       }
     }
@@ -525,21 +494,18 @@ public void testUpdateStopPositionExhaustive() throws Exception {
     }
   }
 
-  private void stopPositionTestInternal(Long startOffset,
-                                        Long endOffset,
-                                        Long stopOffset,
-                                        File tmpFile) throws Exception {
+  private void stopPositionTestInternal(
+      Long startOffset, Long endOffset, Long stopOffset, File tmpFile) throws Exception {
     String readWithoutSplit;
     String readWithSplit1, readWithSplit2;
     StringBuilder accumulatedRead = new StringBuilder();
 
     // Read from source without split attempts.
-    TextSource<String> textSource = new TextSource<>(
-        tmpFile.getPath(), false, startOffset, endOffset,
-        StringUtf8Coder.of());
+    TextReader<String> textReader =
+        new TextReader<>(tmpFile.getPath(), false, startOffset, endOffset, StringUtf8Coder.of());
 
-    try (TextSource<String>.TextFileIterator iterator =
-        (TextSource<String>.TextFileIterator) textSource.iterator()) {
+    try (TextReader<String>.TextFileIterator iterator =
+        (TextReader<String>.TextFileIterator) textReader.iterator()) {
       while (iterator.hasNext()) {
         accumulatedRead.append((String) iterator.next());
       }
@@ -547,13 +513,12 @@ private void stopPositionTestInternal(Long startOffset,
     }
 
     // Read the first half of the split.
-    textSource = new TextSource<>(
-        tmpFile.getPath(), false, startOffset, stopOffset,
-        StringUtf8Coder.of());
+    textReader =
+        new TextReader<>(tmpFile.getPath(), false, startOffset, stopOffset, StringUtf8Coder.of());
     accumulatedRead = new StringBuilder();
 
-    try (TextSource<String>.TextFileIterator iterator =
-        (TextSource<String>.TextFileIterator) textSource.iterator()) {
+    try (TextReader<String>.TextFileIterator iterator =
+        (TextReader<String>.TextFileIterator) textReader.iterator()) {
       while (iterator.hasNext()) {
         accumulatedRead.append((String) iterator.next());
       }
@@ -561,13 +526,12 @@ private void stopPositionTestInternal(Long startOffset,
     }
 
     // Read the second half of the split.
-    textSource = new TextSource<>(
-        tmpFile.getPath(), false, stopOffset, endOffset,
-        StringUtf8Coder.of());
+    textReader =
+        new TextReader<>(tmpFile.getPath(), false, stopOffset, endOffset, StringUtf8Coder.of());
     accumulatedRead = new StringBuilder();
 
-    try (TextSource<String>.TextFileIterator iterator =
-        (TextSource<String>.TextFileIterator) textSource.iterator()) {
+    try (TextReader<String>.TextFileIterator iterator =
+        (TextReader<String>.TextFileIterator) textReader.iterator()) {
       while (iterator.hasNext()) {
         accumulatedRead.append((String) iterator.next());
       }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
similarity index 81%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
index 75d9803ed7eed..5406f4156a140 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
@@ -23,9 +23,9 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
 import com.google.common.collect.Lists;
 
 import org.joda.time.Instant;
@@ -39,14 +39,14 @@
 import java.util.NoSuchElementException;
 
 /**
- * Tests for UngroupedShuffleSource.
+ * Tests for UngroupedShuffleReader.
  */
 @RunWith(JUnit4.class)
-public class UngroupedShuffleSourceTest {
+public class UngroupedShuffleReaderTest {
   private static final Instant timestamp = new Instant(123000);
   private static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
 
-  void runTestReadShuffleSource(List<Integer> expected) throws Exception {
+  void runTestReadFromShuffle(List<Integer> expected) throws Exception {
     Coder<WindowedValue<Integer>> elemCoder =
         WindowedValue.getFullCoder(BigEndianIntegerCoder.of(), IntervalWindow.getCoder());
 
@@ -70,14 +70,14 @@ void runTestReadShuffleSource(List<Integer> expected) throws Exception {
     Assert.assertEquals(expected.size(), records.size());
     Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
 
-    // Read from shuffle with UngroupedShuffleSource.
-    UngroupedShuffleSource<WindowedValue<Integer>> shuffleSource =
-        new UngroupedShuffleSource<>(
+    // Read from shuffle with UngroupedShuffleReader.
+    UngroupedShuffleReader<WindowedValue<Integer>> ungroupedShuffleReader =
+        new UngroupedShuffleReader<>(
             PipelineOptionsFactory.create(),
             null, null, null,
             elemCoder);
-    ExecutorTestUtils.TestSourceObserver observer =
-        new ExecutorTestUtils.TestSourceObserver(shuffleSource);
+    ExecutorTestUtils.TestReaderObserver observer =
+        new ExecutorTestUtils.TestReaderObserver(ungroupedShuffleReader);
 
     TestShuffleReader shuffleReader = new TestShuffleReader();
     List<Integer> expectedSizes = new ArrayList<>();
@@ -87,8 +87,8 @@ void runTestReadShuffleSource(List<Integer> expected) throws Exception {
     }
 
     List<Integer> actual = new ArrayList<>();
-    try (Source.SourceIterator<WindowedValue<Integer>> iter =
-             shuffleSource.iterator(shuffleReader)) {
+    try (Reader.ReaderIterator<WindowedValue<Integer>> iter =
+        ungroupedShuffleReader.iterator(shuffleReader)) {
       while (iter.hasNext()) {
         Assert.assertTrue(iter.hasNext());
         Assert.assertTrue(iter.hasNext());
@@ -112,12 +112,12 @@ void runTestReadShuffleSource(List<Integer> expected) throws Exception {
   }
 
   @Test
-  public void testReadEmptyShuffleSource() throws Exception {
-    runTestReadShuffleSource(TestUtils.NO_INTS);
+  public void testReadEmptyShuffleData() throws Exception {
+    runTestReadFromShuffle(TestUtils.NO_INTS);
   }
 
   @Test
-  public void testReadNonEmptyShuffleSource() throws Exception {
-    runTestReadShuffleSource(TestUtils.INTS);
+  public void testReadNonEmptyShuffleData() throws Exception {
+    runTestReadFromShuffle(TestUtils.INTS);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
index 89ca01ef724d2..b3cebad3e21e5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
@@ -17,8 +17,8 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.runners.worker.TextSource;
-import com.google.cloud.dataflow.sdk.util.common.worker.Source;
+import com.google.cloud.dataflow.sdk.runners.worker.TextReader;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.junit.Assert;
 import org.junit.Rule;
@@ -38,7 +38,6 @@
  */
 @RunWith(JUnit4.class)
 public class IOFactoryTest {
-
   @Rule
   public TemporaryFolder tmpFolder = new TemporaryFolder();
 
@@ -77,13 +76,11 @@ public void testMultiFileRead() throws Exception {
     output.close();
 
 
-    TextSource<String> source = new TextSource<>(
-        tmpFolder.getRoot() + "/file*",
-        true /* strip newlines */,
-        null, null, StringUtf8Coder.of());
+    TextReader<String> reader = new TextReader<>(
+        tmpFolder.getRoot() + "/file*", true/* strip newlines */, null, null, StringUtf8Coder.of());
 
     Set<String> records = new TreeSet<>();
-    try (Source.SourceIterator<String> iterator = source.iterator()) {
+    try (Reader.ReaderIterator<String> iterator = reader.iterator()) {
       while (iterator.hasNext()) {
         records.add(iterator.next());
       }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
index 07b3bdde5dc26..c38bb56ac9304 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
@@ -37,7 +37,7 @@
 @SuppressWarnings({"rawtypes", "unchecked"})
 public class ExecutorTestUtils {
   // Do not instantiate.
-  private ExecutorTestUtils() { }
+  private ExecutorTestUtils() {}
 
   /** An Operation with a specified number of outputs. */
   public static class TestOperation extends Operation {
@@ -51,33 +51,23 @@ public TestOperation(int numOutputs) {
 
     TestOperation(int numOutputs, CounterSet counters, String counterPrefix) {
       this(numOutputs, counterPrefix, counters.getAddCounterMutator(),
-           new StateSampler(counterPrefix, counters.getAddCounterMutator()));
+          new StateSampler(counterPrefix, counters.getAddCounterMutator()));
     }
 
-    TestOperation(int numOutputs,
-                  String counterPrefix,
-                  CounterSet.AddCounterMutator addCounterMutator,
-                  StateSampler stateSampler) {
+    TestOperation(int numOutputs, String counterPrefix,
+        CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) {
       super("TestOperation",
-            createOutputReceivers(numOutputs, counterPrefix,
-                                  addCounterMutator, stateSampler),
-            counterPrefix,
-            addCounterMutator,
-            stateSampler);
-    }
-
-    private static OutputReceiver[] createOutputReceivers(
-        int numOutputs,
-        String counterPrefix,
-        CounterSet.AddCounterMutator addCounterMutator,
-        StateSampler stateSampler) {
+          createOutputReceivers(numOutputs, counterPrefix, addCounterMutator, stateSampler),
+          counterPrefix, addCounterMutator, stateSampler);
+    }
+
+    private static OutputReceiver[] createOutputReceivers(int numOutputs, String counterPrefix,
+        CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) {
       OutputReceiver[] receivers = new OutputReceiver[numOutputs];
       for (int i = 0; i < numOutputs; i++) {
-        receivers[i] = new OutputReceiver(
-            "out_" + i,
-            new ElementByteSizeObservableCoder(StringUtf8Coder.of()),
-            counterPrefix,
-            addCounterMutator);
+        receivers[i] =
+            new OutputReceiver("out_" + i, new ElementByteSizeObservableCoder(StringUtf8Coder.of()),
+                counterPrefix, addCounterMutator);
       }
       return receivers;
     }
@@ -96,10 +86,7 @@ public TestReceiver(Coder<?> coder) {
     }
 
     public TestReceiver(Coder<?> coder, CounterSet counterSet) {
-      this("test_receiver_out",
-          new ElementByteSizeObservableCoder(coder),
-          counterSet,
-          "test-");
+      this("test_receiver_out", new ElementByteSizeObservableCoder(coder), counterSet, "test-");
     }
 
     public TestReceiver(CounterSet counterSet, String counterPrefix) {
@@ -110,27 +97,20 @@ public TestReceiver(String outputName, CounterSet counterSet) {
       this(outputName, counterSet, "test-");
     }
 
-    public TestReceiver(String outputName,
-                        CounterSet counterSet, String counterPrefix) {
-      this(outputName,
-           new ElementByteSizeObservableCoder(StringUtf8Coder.of()),
-           counterSet,
-           counterPrefix);
+    public TestReceiver(String outputName, CounterSet counterSet, String counterPrefix) {
+      this(outputName, new ElementByteSizeObservableCoder(StringUtf8Coder.of()), counterSet,
+          counterPrefix);
     }
 
-    public TestReceiver(ElementByteSizeObservable elementByteSizeObservable,
-                        CounterSet counterSet, String counterPrefix) {
-      this("test_receiver_out", elementByteSizeObservable,
-           counterSet, counterPrefix);
+    public TestReceiver(ElementByteSizeObservable elementByteSizeObservable, CounterSet counterSet,
+        String counterPrefix) {
+      this("test_receiver_out", elementByteSizeObservable, counterSet, counterPrefix);
     }
 
-    public TestReceiver(String outputName,
-                        ElementByteSizeObservable elementByteSizeObservable,
-                        CounterSet counterSet, String counterPrefix) {
-      super(outputName,
-            elementByteSizeObservable,
-            counterPrefix,
-            counterSet.getAddCounterMutator());
+    public TestReceiver(String outputName, ElementByteSizeObservable elementByteSizeObservable,
+        CounterSet counterSet, String counterPrefix) {
+      super(
+          outputName, elementByteSizeObservable, counterPrefix, counterSet.getAddCounterMutator());
     }
 
     @Override
@@ -145,8 +125,8 @@ protected boolean sampleElement() {
     }
   }
 
-  /** A {@code Source<String>} that yields a specified set of values. */
-  public static class TestSource extends Source<String> {
+  /** A {@code Reader<String>} that yields a specified set of values. */
+  public static class TestReader extends Reader<String> {
     List<String> inputs = new ArrayList<>();
 
     public void addInput(String... inputs) {
@@ -154,20 +134,22 @@ public void addInput(String... inputs) {
     }
 
     @Override
-    public SourceIterator<String> iterator() {
-      return new TestSourceIterator(inputs);
+    public ReaderIterator<String> iterator() {
+      return new TestReaderIterator(inputs);
     }
 
-    class TestSourceIterator extends AbstractSourceIterator<String> {
+    class TestReaderIterator extends AbstractReaderIterator<String> {
       Iterator<String> iter;
       boolean closed = false;
 
-      public TestSourceIterator(List<String> inputs) {
+      public TestReaderIterator(List<String> inputs) {
         iter = inputs.iterator();
       }
 
       @Override
-      public boolean hasNext() { return iter.hasNext(); }
+      public boolean hasNext() {
+        return iter.hasNext();
+      }
 
       @Override
       public String next() {
@@ -188,18 +170,18 @@ public void close() {
    * An Observer that stores all sizes into an ArrayList, to compare
    * against the gold standard during testing.
    */
-  public static class TestSourceObserver implements Observer {
-    private final Source source;
+  public static class TestReaderObserver implements Observer {
+    private final Reader reader;
     private final List<Integer> sizes;
 
-    public TestSourceObserver(Source source) {
-      this(source, new ArrayList<Integer>());
+    public TestReaderObserver(Reader reader) {
+      this(reader, new ArrayList<Integer>());
     }
 
-    public TestSourceObserver(Source source, List<Integer> sizes) {
-      this.source = source;
+    public TestReaderObserver(Reader reader, List<Integer> sizes) {
+      this.reader = reader;
       this.sizes = sizes;
-      source.addObserver(this);
+      reader.addObserver(this);
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
index fa46e2f235b71..07944893ffebd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
@@ -16,8 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToSourcePosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
@@ -26,8 +26,8 @@
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
+import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReceiver;
-import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestSource;
 
 import org.hamcrest.CoreMatchers;
 import org.junit.Assert;
@@ -50,29 +50,22 @@ static class TestOperation extends Operation {
 
     private static CounterSet counterSet = new CounterSet();
     private static String counterPrefix = "test-";
-    private static StateSampler stateSampler = new StateSampler(
-        counterPrefix, counterSet.getAddCounterMutator());
+    private static StateSampler stateSampler =
+        new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
 
     TestOperation(String label, List<String> log) {
-      super(label,
-            new OutputReceiver[]{},
-            counterPrefix,
-            counterSet.getAddCounterMutator(),
-            stateSampler);
+      super(label, new OutputReceiver[] {}, counterPrefix, counterSet.getAddCounterMutator(),
+          stateSampler);
       this.label = label;
       this.log = log;
     }
 
-    TestOperation(String outputName,
-                  String counterPrefix,
-                  CounterSet.AddCounterMutator addCounterMutator,
-                  StateSampler stateSampler,
-                  long outputCount) {
-      super(outputName, new OutputReceiver[]{},
-            counterPrefix, addCounterMutator, stateSampler);
+    TestOperation(String outputName, String counterPrefix,
+        CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler,
+        long outputCount) {
+      super(outputName, new OutputReceiver[] {}, counterPrefix, addCounterMutator, stateSampler);
       addCounterMutator.addCounter(
-          Counter.longs(outputName + "-ElementCount", SUM)
-          .resetToValue(outputCount));
+          Counter.longs(outputName + "-ElementCount", SUM).resetToValue(outputCount));
     }
 
     @Override
@@ -92,26 +85,21 @@ public void finish() throws Exception {
   static class TestReadOperation extends ReadOperation {
     private ApproximateProgress progress = null;
 
-    TestReadOperation(OutputReceiver outputReceiver,
-                      String counterPrefix,
-                      AddCounterMutator addCounterMutator,
-                      StateSampler stateSampler) {
-      super(new TestSource(), outputReceiver,
-            counterPrefix, addCounterMutator, stateSampler);
+    TestReadOperation(OutputReceiver outputReceiver, String counterPrefix,
+        AddCounterMutator addCounterMutator, StateSampler stateSampler) {
+      super(new TestReader(), outputReceiver, counterPrefix, addCounterMutator, stateSampler);
     }
 
     @Override
-    public Source.Progress getProgress() {
-      return cloudProgressToSourceProgress(progress);
+    public Reader.Progress getProgress() {
+      return cloudProgressToReaderProgress(progress);
     }
 
     @Override
-    public Source.Position proposeStopPosition(
-        Source.Progress proposedStopPosition) {
+    public Reader.Position proposeStopPosition(Reader.Progress proposedStopPosition) {
       // Fakes the return with the same position as proposed.
-      return cloudPositionToSourcePosition(
-          sourceProgressToCloudProgress(proposedStopPosition)
-          .getPosition());
+      return cloudPositionToReaderPosition(
+          sourceProgressToCloudProgress(proposedStopPosition).getPosition());
     }
 
     public void setProgress(ApproximateProgress progress) {
@@ -123,27 +111,20 @@ public void setProgress(ApproximateProgress progress) {
   public void testExecuteMapTaskExecutor() throws Exception {
     List<String> log = new ArrayList<>();
 
-    List<Operation> operations = Arrays.asList(new Operation[]{
-        new TestOperation("o1", log),
-        new TestOperation("o2", log),
-        new TestOperation("o3", log)});
+    List<Operation> operations = Arrays.asList(new Operation[] {
+        new TestOperation("o1", log), new TestOperation("o2", log), new TestOperation("o3", log)});
 
     CounterSet counters = new CounterSet();
     String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(
-        counterPrefix, counters.getAddCounterMutator());
-    MapTaskExecutor executor =
-        new MapTaskExecutor(operations, counters, stateSampler);
+    StateSampler stateSampler = new StateSampler(counterPrefix, counters.getAddCounterMutator());
+    MapTaskExecutor executor = new MapTaskExecutor(operations, counters, stateSampler);
 
     executor.execute();
 
-    Assert.assertThat(log, CoreMatchers.hasItems(
-        "o3 started",
-        "o2 started",
-        "o1 started",
-        "o1 finished",
-        "o2 finished",
-        "o3 finished"));
+    Assert.assertThat(
+        log,
+        CoreMatchers.hasItems(
+            "o3 started", "o2 started", "o1 started", "o1 finished", "o2 finished", "o3 finished"));
 
     executor.close();
   }
@@ -153,55 +134,51 @@ public void testExecuteMapTaskExecutor() throws Exception {
   public void testGetOutputCounters() throws Exception {
     CounterSet counters = new CounterSet();
     String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(
-        counterPrefix, counters.getAddCounterMutator());
-    List<Operation> operations = Arrays.asList(new Operation[]{
-          new TestOperation(
-              "o1", counterPrefix, counters.getAddCounterMutator(),
-              stateSampler, 1),
-          new TestOperation(
-              "o2", counterPrefix, counters.getAddCounterMutator(),
-              stateSampler, 2),
-          new TestOperation(
-              "o3", counterPrefix, counters.getAddCounterMutator(),
-              stateSampler, 3)});
+    StateSampler stateSampler = new StateSampler(counterPrefix, counters.getAddCounterMutator());
+    List<Operation> operations = Arrays.asList(new Operation[] {
+        new TestOperation("o1", counterPrefix, counters.getAddCounterMutator(), stateSampler, 1),
+        new TestOperation("o2", counterPrefix, counters.getAddCounterMutator(), stateSampler, 2),
+        new TestOperation("o3", counterPrefix, counters.getAddCounterMutator(), stateSampler, 3)});
 
-    MapTaskExecutor executor =
-        new MapTaskExecutor(operations, counters, stateSampler);
+    MapTaskExecutor executor = new MapTaskExecutor(operations, counters, stateSampler);
 
     CounterSet counterSet = executor.getOutputCounters();
     Assert.assertEquals(
-        new CounterSet(
-            Counter.longs("o1-ElementCount", SUM).resetToValue(1L),
+        new CounterSet(Counter.longs("o1-ElementCount", SUM).resetToValue(1L),
             Counter.longs("test-o1-start-msecs", SUM)
-              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                  "test-o1-start-msecs")).getAggregate(false)),
+                .resetToValue(
+                    ((Counter<Long>)
+                        counterSet.getExistingCounter("test-o1-start-msecs")).getAggregate(false)),
             Counter.longs("test-o1-process-msecs", SUM)
-              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                  "test-o1-process-msecs")).getAggregate(false)),
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                                   "test-o1-process-msecs")).getAggregate(false)),
             Counter.longs("test-o1-finish-msecs", SUM)
-              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                  "test-o1-finish-msecs")).getAggregate(false)),
+                .resetToValue(
+                    ((Counter<Long>)
+                        counterSet.getExistingCounter("test-o1-finish-msecs")).getAggregate(false)),
             Counter.longs("o2-ElementCount", SUM).resetToValue(2L),
             Counter.longs("test-o2-start-msecs", SUM)
-              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                  "test-o2-start-msecs")).getAggregate(false)),
+                .resetToValue(
+                    ((Counter<Long>)
+                        counterSet.getExistingCounter("test-o2-start-msecs")).getAggregate(false)),
             Counter.longs("test-o2-process-msecs", SUM)
-              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                  "test-o2-process-msecs")).getAggregate(false)),
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                                   "test-o2-process-msecs")).getAggregate(false)),
             Counter.longs("test-o2-finish-msecs", SUM)
-              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                  "test-o2-finish-msecs")).getAggregate(false)),
+                .resetToValue(
+                    ((Counter<Long>)
+                        counterSet.getExistingCounter("test-o2-finish-msecs")).getAggregate(false)),
             Counter.longs("o3-ElementCount", SUM).resetToValue(3L),
             Counter.longs("test-o3-start-msecs", SUM)
-              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                  "test-o3-start-msecs")).getAggregate(false)),
+                .resetToValue(
+                    ((Counter<Long>)
+                        counterSet.getExistingCounter("test-o3-start-msecs")).getAggregate(false)),
             Counter.longs("test-o3-process-msecs", SUM)
-              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                  "test-o3-process-msecs")).getAggregate(false)),
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                                   "test-o3-process-msecs")).getAggregate(false)),
             Counter.longs("test-o3-finish-msecs", SUM)
-              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                  "test-o3-finish-msecs")).getAggregate(false))),
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                                   "test-o3-finish-msecs")).getAggregate(false))),
         counterSet);
 
     executor.close();
@@ -211,12 +188,10 @@ public void testGetOutputCounters() throws Exception {
   public void testGetReadOperation() throws Exception {
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(
-        counterPrefix, counterSet.getAddCounterMutator());
+    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
     // Test MapTaskExecutor without a single operation.
     MapTaskExecutor executor =
-        new MapTaskExecutor(new ArrayList<Operation>(),
-                            counterSet, stateSampler);
+        new MapTaskExecutor(new ArrayList<Operation>(), counterSet, stateSampler);
 
     try {
       ReadOperation readOperation = executor.getReadOperation();
@@ -225,13 +200,10 @@ public void testGetReadOperation() throws Exception {
       // Exception expected
     }
 
-    List<Operation> operations = Arrays.asList(new Operation[]{
-        new TestOperation("o1",
-                          counterPrefix, counterSet.getAddCounterMutator(),
-                          stateSampler, 1),
-        new TestOperation("o2",
-                          counterPrefix, counterSet.getAddCounterMutator(),
-                          stateSampler, 2)});
+    List<Operation> operations = Arrays.asList(new Operation[] {
+        new TestOperation("o1", counterPrefix, counterSet.getAddCounterMutator(), stateSampler, 1),
+        new TestOperation(
+            "o2", counterPrefix, counterSet.getAddCounterMutator(), stateSampler, 2)});
     // Test MapTaskExecutor without ReadOperation.
     executor = new MapTaskExecutor(operations, counterSet, stateSampler);
 
@@ -245,10 +217,8 @@ public void testGetReadOperation() throws Exception {
     executor.close();
 
     TestReceiver receiver = new TestReceiver(counterSet, counterPrefix);
-    operations = Arrays.asList(new Operation[]{
-        new TestReadOperation(
-            receiver, counterPrefix, counterSet.getAddCounterMutator(),
-            stateSampler)});
+    operations = Arrays.asList(new Operation[] {new TestReadOperation(
+        receiver, counterPrefix, counterSet.getAddCounterMutator(), stateSampler)});
     executor = new MapTaskExecutor(operations, counterSet, stateSampler);
     Assert.assertEquals(operations.get(0), executor.getReadOperation());
     executor.close();
@@ -258,15 +228,12 @@ public void testGetReadOperation() throws Exception {
   public void testGetProgressAndRequestSplit() throws Exception {
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(
-        counterPrefix, counterSet.getAddCounterMutator());
+    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
     TestReceiver receiver = new TestReceiver(counterSet, counterPrefix);
-    TestReadOperation operation =
-        new TestReadOperation(receiver,
-                              counterPrefix, counterSet.getAddCounterMutator(),
-                              stateSampler);
-    MapTaskExecutor executor = new MapTaskExecutor(
-        Arrays.asList(new Operation[]{operation}), counterSet, stateSampler);
+    TestReadOperation operation = new TestReadOperation(
+        receiver, counterPrefix, counterSet.getAddCounterMutator(), stateSampler);
+    MapTaskExecutor executor =
+        new MapTaskExecutor(Arrays.asList(new Operation[] {operation}), counterSet, stateSampler);
 
     operation.setProgress(new ApproximateProgress().setPosition(makePosition(1L)));
     Assert.assertEquals(
@@ -274,10 +241,8 @@ public void testGetProgressAndRequestSplit() throws Exception {
         sourceProgressToCloudProgress(executor.getWorkerProgress()).getPosition());
     Assert.assertEquals(
         makePosition(1L),
-        sourcePositionToCloudPosition(
-            executor.proposeStopPosition(
-                cloudProgressToSourceProgress(
-                    new ApproximateProgress().setPosition(makePosition(1L))))));
+        sourcePositionToCloudPosition(executor.proposeStopPosition(cloudProgressToReaderProgress(
+            new ApproximateProgress().setPosition(makePosition(1L))))));
 
     executor.close();
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
index c324a0bfe0010..922e32f1887af 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -16,7 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToSourceProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
@@ -29,8 +29,8 @@
 import com.google.api.services.dataflow.model.Position;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReceiver;
-import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestSource;
 
 import org.hamcrest.CoreMatchers;
 import org.junit.Assert;
@@ -53,19 +53,18 @@ public class ReadOperationTest {
   private static final long ITERATIONS = 3L;
 
   /**
-   * The test Source for testing updating stop position and progress report.
+   * The test Reader for testing updating stop position and progress report.
    * The number of read iterations is controlled by ITERATIONS.
    */
-  static class TestTextSource extends Source<String> {
+  static class TestTextReader extends Reader<String> {
     @Override
-    public SourceIterator<String> iterator() {
-      return new TestTextSourceIterator();
+    public ReaderIterator<String> iterator() {
+      return new TestTextReaderIterator();
     }
 
-    class TestTextSourceIterator extends AbstractSourceIterator<String> {
+    class TestTextReaderIterator extends AbstractReaderIterator<String> {
       long offset = 0L;
-      List<com.google.api.services.dataflow.model.Position> proposedPositions =
-          new ArrayList<>();
+      List<com.google.api.services.dataflow.model.Position> proposedPositions = new ArrayList<>();
 
       @Override
       public boolean hasNext() {
@@ -91,7 +90,7 @@ public Progress getProgress() {
         ApproximateProgress progress = new ApproximateProgress();
         progress.setPosition(currentPosition);
 
-        return cloudProgressToSourceProgress(progress);
+        return cloudProgressToReaderProgress(progress);
       }
 
       @Override
@@ -105,7 +104,7 @@ public Position updateStopPosition(Progress proposedStopPosition) {
 
   /**
    * The OutputReceiver for testing updating stop position and progress report.
-   * The offset of the Source (iterator) will be advanced each time this
+   * The offset of the Reader (iterator) will be advanced each time this
    * Receiver processes a record.
    */
   static class TestTextReceiver extends OutputReceiver {
@@ -131,18 +130,17 @@ public void process(Object outputElem) throws Exception {
       progresses.add(sourceProgressToCloudProgress(readOperation.getProgress()));
       // We expect that call to proposeStopPosition is a no-op that does not
       // update the stop position for every iteration. We will verify it is
-      // delegated to SourceIterator after ReadOperation finishes.
-      Assert.assertNull(
-          readOperation.proposeStopPosition(
-               cloudProgressToSourceProgress(makeApproximateProgress(proposedStopPosition))));
+      // delegated to ReaderIterator after ReadOperation finishes.
+      Assert.assertNull(readOperation.proposeStopPosition(
+          cloudProgressToReaderProgress(makeApproximateProgress(proposedStopPosition))));
     }
   }
 
   @Test
   @SuppressWarnings("unchecked")
   public void testRunReadOperation() throws Exception {
-    TestSource source = new TestSource();
-    source.addInput("hi", "there", "", "bob");
+    TestReader reader = new TestReader();
+    reader.addInput("hi", "there", "", "bob");
 
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
@@ -150,7 +148,7 @@ public void testRunReadOperation() throws Exception {
     TestReceiver receiver = new TestReceiver(counterSet, counterPrefix);
 
     ReadOperation readOperation = new ReadOperation(
-        source, receiver, counterPrefix, counterSet.getAddCounterMutator(), stateSampler);
+        reader, receiver, counterPrefix, counterSet.getAddCounterMutator(), stateSampler);
 
     readOperation.start();
     readOperation.finish();
@@ -158,35 +156,36 @@ public void testRunReadOperation() throws Exception {
     Assert.assertThat(
         receiver.outputElems, CoreMatchers.<Object>hasItems("hi", "there", "", "bob"));
 
-    Assert.assertEquals(
-        new CounterSet(
-            Counter.longs("ReadOperation-ByteCount", SUM).resetToValue(2L + 5 + 0 + 3),
-            Counter.longs("test_receiver_out-ElementCount", SUM).resetToValue(4L),
-            Counter.longs("test_receiver_out-MeanByteCount", MEAN).resetToValue(4, 10L),
-            Counter.longs("test-ReadOperation-start-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                   "test-ReadOperation-start-msecs")).getAggregate(false)),
-            Counter.longs("test-ReadOperation-read-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                   "test-ReadOperation-read-msecs")).getAggregate(false)),
-            Counter.longs("test-ReadOperation-process-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                   "test-ReadOperation-process-msecs")).getAggregate(false)),
-            Counter.longs("test-ReadOperation-finish-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                   "test-ReadOperation-finish-msecs")).getAggregate(false))),
-        counterSet);
+    Assert
+        .assertEquals(
+            new CounterSet(
+                Counter.longs("ReadOperation-ByteCount", SUM).resetToValue(2L + 5 + 0 + 3),
+                Counter.longs("test_receiver_out-ElementCount", SUM).resetToValue(4L),
+                Counter.longs("test_receiver_out-MeanByteCount", MEAN).resetToValue(4, 10L),
+                Counter.longs("test-ReadOperation-start-msecs", SUM)
+                    .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                                       "test-ReadOperation-start-msecs")).getAggregate(false)),
+                Counter.longs("test-ReadOperation-read-msecs", SUM)
+                    .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                                       "test-ReadOperation-read-msecs")).getAggregate(false)),
+                Counter.longs("test-ReadOperation-process-msecs", SUM)
+                    .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                                       "test-ReadOperation-process-msecs")).getAggregate(false)),
+                Counter.longs("test-ReadOperation-finish-msecs", SUM)
+                    .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                                       "test-ReadOperation-finish-msecs")).getAggregate(false))),
+            counterSet);
   }
 
   @Test
   public void testGetProgressAndProposeStopPosition() throws Exception {
-    TestTextSource testSource = new TestTextSource();
+    TestTextReader testTextReader = new TestTextReader();
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
     TestTextReceiver receiver = new TestTextReceiver(counterSet, counterPrefix);
     ReadOperation readOperation = new ReadOperation(
-        testSource, receiver, counterPrefix, counterSet.getAddCounterMutator(), stateSampler);
+        testTextReader, receiver, counterPrefix, counterSet.getAddCounterMutator(), stateSampler);
     readOperation.setProgressUpdatePeriodMs(0);
     receiver.setReadOperation(readOperation);
 
@@ -195,37 +194,36 @@ public void testGetProgressAndProposeStopPosition() throws Exception {
 
     Assert.assertNull(readOperation.getProgress());
     Assert.assertNull(readOperation.proposeStopPosition(
-                          cloudProgressToSourceProgress(
-                              makeApproximateProgress(proposedStopPosition))));
+        cloudProgressToReaderProgress(makeApproximateProgress(proposedStopPosition))));
 
     readOperation.start();
     readOperation.finish();
 
-    TestTextSource.TestTextSourceIterator testIterator =
-        (TestTextSource.TestTextSourceIterator) readOperation.sourceIterator;
+    TestTextReader.TestTextReaderIterator testIterator =
+        (TestTextReader.TestTextReaderIterator) readOperation.readerIterator;
 
-    Assert.assertEquals(sourceProgressToCloudProgress(testIterator.getProgress()),
-                        sourceProgressToCloudProgress(readOperation.getProgress()));
-    Assert.assertEquals(sourcePositionToCloudPosition(testIterator.updateStopPosition(
-                            cloudProgressToSourceProgress(
-                                makeApproximateProgress(proposedStopPosition)))),
-                        sourcePositionToCloudPosition(readOperation.proposeStopPosition(
-                            cloudProgressToSourceProgress(
-                                makeApproximateProgress(proposedStopPosition)))));
+    Assert.assertEquals(
+        sourceProgressToCloudProgress(testIterator.getProgress()),
+        sourceProgressToCloudProgress(readOperation.getProgress()));
+    Assert.assertEquals(
+        sourcePositionToCloudPosition(testIterator.updateStopPosition(
+            cloudProgressToReaderProgress(makeApproximateProgress(proposedStopPosition)))),
+        sourcePositionToCloudPosition(readOperation.proposeStopPosition(
+            cloudProgressToReaderProgress(makeApproximateProgress(proposedStopPosition)))));
 
     // Verifies progress report and stop position updates.
     Assert.assertEquals(testIterator.proposedPositions.size(), ITERATIONS + 2);
+    Assert.assertThat(testIterator.proposedPositions, everyItem(equalTo(makePosition(3L))));
     Assert.assertThat(
-        testIterator.proposedPositions, everyItem(equalTo(makePosition(3L))));
-    Assert.assertThat(
-        receiver.progresses, contains(makeApproximateProgress(1L), makeApproximateProgress(2L),
-            makeApproximateProgress(3L)));
+        receiver.progresses,
+        contains(
+            makeApproximateProgress(1L), makeApproximateProgress(2L), makeApproximateProgress(3L)));
   }
 
   @Test
   public void testGetProgressDoesNotBlock() throws Exception {
     final BlockingQueue<Integer> queue = new LinkedBlockingQueue<>();
-    final Source.SourceIterator<Integer> iterator = new Source.AbstractSourceIterator<Integer>() {
+    final Reader.ReaderIterator<Integer> iterator = new Reader.AbstractReaderIterator<Integer>() {
       private int itemsReturned = 0;
 
       @Override
@@ -244,15 +242,15 @@ public Integer next() throws IOException {
       }
 
       @Override
-      public Source.Progress getProgress() {
-        return cloudProgressToSourceProgress(new ApproximateProgress().setPosition(
+      public Reader.Progress getProgress() {
+        return cloudProgressToReaderProgress(new ApproximateProgress().setPosition(
             new Position().setRecordIndex((long) itemsReturned)));
       }
     };
 
-    Source<Integer> source = new Source<Integer>() {
+    Reader<Integer> reader = new Reader<Integer>() {
       @Override
-      public SourceIterator<Integer> iterator() throws IOException {
+      public ReaderIterator<Integer> iterator() throws IOException {
         return iterator;
       }
     };
@@ -262,7 +260,7 @@ public SourceIterator<Integer> iterator() throws IOException {
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
     TestTextReceiver receiver = new TestTextReceiver(counterSet, counterPrefix);
     final ReadOperation readOperation = new ReadOperation(
-        source, receiver, counterPrefix, counterSet.getAddCounterMutator(), stateSampler);
+        reader, receiver, counterPrefix, counterSet.getAddCounterMutator(), stateSampler);
     // Update progress not continuously, but so that it's never more than 1 record stale.
     readOperation.setProgressUpdatePeriodMs(150);
     receiver.setReadOperation(readOperation);

From 239a98328ad84109c4ad2e43c37f1d499208da19 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 22 Dec 2014 18:55:02 -0800
Subject: [PATCH 0054/1541] Add basic encode/decode tests for most coders. []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=82692960

---
 .../dataflow/sdk/coders/CollectionCoder.java  |   2 +-
 .../dataflow/sdk/coders/IterableCoder.java    |   2 +-
 .../sdk/coders/IterableLikeCoder.java         |  22 +--
 .../cloud/dataflow/sdk/coders/SetCoder.java   |  82 +++---------
 .../dataflow/sdk/coders/StringUtf8Coder.java  |   2 +-
 .../sdk/coders/TableRowJsonCoder.java         |   3 +-
 .../sdk/coders/TextualIntegerCoder.java       |   2 +-
 .../dataflow/sdk/coders/VarIntCoder.java      |   2 +-
 .../cloud/dataflow/sdk/coders/VoidCoder.java  |   2 +-
 .../dataflow/sdk/coders/AvroCoderTest.java    |  13 +-
 .../sdk/coders/BigEndianIntegerCoderTest.java |  44 ++++++
 .../sdk/coders/BigEndianLongCoderTest.java    |  46 +++++++
 .../sdk/coders/ByteArrayCoderTest.java        |  41 ++----
 .../dataflow/sdk/coders/CoderProperties.java  | 126 ++++++++++++++++--
 .../sdk/coders/CollectionCoderTest.java       |  51 +++++++
 .../dataflow/sdk/coders/CustomCoderTest.java  |  10 +-
 .../dataflow/sdk/coders/EntityCoderTest.java  |  67 ++++++++++
 .../dataflow/sdk/coders/InstantCoderTest.java |   5 +-
 .../sdk/coders/IterableCoderTest.java         |  18 +++
 .../dataflow/sdk/coders/KvCoderTest.java      |  67 ++++++++++
 .../dataflow/sdk/coders/ListCoderTest.java    |  28 +++-
 .../dataflow/sdk/coders/MapCoderTest.java     |  16 +++
 .../dataflow/sdk/coders/SetCoderTest.java     |  47 +++++++
 .../sdk/coders/StringUtf8CoderTest.java       |  45 +++++++
 .../sdk/coders/TableRowJsonCoderTest.java     |  61 +++++++++
 .../sdk/coders/TextualIntegerCoderTest.java   |  44 ++++++
 .../dataflow/sdk/coders/URICoderTest.java     |  19 +--
 .../dataflow/sdk/coders/VarIntCoderTest.java  |  45 +++++++
 .../dataflow/sdk/coders/VarLongCoderTest.java |  46 +++++++
 29 files changed, 801 insertions(+), 157 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringUtf8CoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
index c75f645d344ef..b78c2674176e0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
@@ -63,7 +63,7 @@ public static <T> List<Object> getInstanceComponents(
     return getInstanceComponentsHelper(exampleValue);
   }
 
-  CollectionCoder(Coder<T> elemCoder) {
+  protected CollectionCoder(Coder<T> elemCoder) {
     super(elemCoder);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
index 79d6173742a88..61737e298463d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
@@ -65,7 +65,7 @@ public static <T> List<Object> getInstanceComponents(
     return getInstanceComponentsHelper(exampleValue);
   }
 
-  IterableCoder(Coder<T> elemCoder) {
+  protected IterableCoder(Coder<T> elemCoder) {
     super(elemCoder);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
index 128d23bf21a2c..c9332853e267f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
@@ -40,7 +40,7 @@
 public abstract class IterableLikeCoder<T, IT extends Iterable<T>>
     extends StandardCoder<IT> {
 
-  public Coder<T> getElemCoder() { return elemCoder; }
+  public Coder<T> getElemCoder() { return elementCoder; }
 
   /**
    * Builds an instance of the coder's associated {@code Iterable} from a list
@@ -53,7 +53,7 @@ public abstract class IterableLikeCoder<T, IT extends Iterable<T>>
   /////////////////////////////////////////////////////////////////////////////
   // Internal operations below here.
 
-  final Coder<T> elemCoder;
+  private final Coder<T> elementCoder;
 
   /**
    * Returns the first element in this iterable-like if it is non-empty,
@@ -68,8 +68,8 @@ List<Object> getInstanceComponentsHelper(
     return null;
   }
 
-  protected IterableLikeCoder(Coder<T> elemCoder) {
-    this.elemCoder = elemCoder;
+  protected IterableLikeCoder(Coder<T> elementCoder) {
+    this.elementCoder = elementCoder;
   }
 
   @Override
@@ -86,7 +86,7 @@ public void encode(IT iterable, OutputStream outStream, Context context)
       Collection<T> collection = (Collection<T>) iterable;
       dataOutStream.writeInt(collection.size());
       for (T elem : collection) {
-        elemCoder.encode(elem, dataOutStream, nestedContext);
+        elementCoder.encode(elem, dataOutStream, nestedContext);
       }
     } else {
       // We don't know the size without traversing it.  So use a
@@ -95,7 +95,7 @@ public void encode(IT iterable, OutputStream outStream, Context context)
       dataOutStream.writeInt(-1);
       for (T elem : iterable) {
         dataOutStream.writeBoolean(true);
-        elemCoder.encode(elem, dataOutStream, nestedContext);
+        elementCoder.encode(elem, dataOutStream, nestedContext);
       }
       dataOutStream.writeBoolean(false);
     }
@@ -112,7 +112,7 @@ public IT decode(InputStream inStream, Context context)
     if (size >= 0) {
       List<T> elements = new ArrayList<>(size);
       for (int i = 0; i < size; i++) {
-        elements.add(elemCoder.decode(dataInStream, nestedContext));
+        elements.add(elementCoder.decode(dataInStream, nestedContext));
       }
       return decodeToIterable(elements);
     } else {
@@ -120,7 +120,7 @@ public IT decode(InputStream inStream, Context context)
       // each element.
       List<T> elements = new ArrayList<>();
       while (dataInStream.readBoolean()) {
-        elements.add(elemCoder.decode(dataInStream, nestedContext));
+        elements.add(elementCoder.decode(dataInStream, nestedContext));
       }
       return decodeToIterable(elements);
     }
@@ -128,7 +128,7 @@ public IT decode(InputStream inStream, Context context)
 
   @Override
   public List<? extends Coder<?>> getCoderArguments() {
-    return Arrays.asList(elemCoder);
+    return Arrays.asList(elementCoder);
   }
 
   /**
@@ -176,7 +176,7 @@ public void registerByteSizeObserver(
         Collection<T> collection = (Collection<T>) iterable;
         observer.update(4L);
         for (T elem : collection) {
-          elemCoder.registerByteSizeObserver(elem, observer, nestedContext);
+          elementCoder.registerByteSizeObserver(elem, observer, nestedContext);
         }
       } else {
         // We don't know the size without traversing it.  So use a
@@ -185,7 +185,7 @@ public void registerByteSizeObserver(
         observer.update(4L);
         for (T elem : iterable) {
           observer.update(1L);
-          elemCoder.registerByteSizeObserver(elem, observer, nestedContext);
+          elementCoder.registerByteSizeObserver(elem, observer, nestedContext);
         }
         observer.update(1L);
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
index 8b5fca7638a5c..9a65d39a0e403 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
@@ -18,17 +18,10 @@
 
 import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
 
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
@@ -39,7 +32,7 @@
  * @param <T> the type of the elements of the set
  */
 @SuppressWarnings("serial")
-public class SetCoder<T> extends StandardCoder<Set<T>> {
+public class SetCoder<T> extends IterableLikeCoder<T, Set<T>> {
 
   /**
    * Produces a SetCoder with the given elementCoder.
@@ -48,6 +41,9 @@ public static <T> SetCoder<T> of(Coder<T> elementCoder) {
     return new SetCoder<>(elementCoder);
   }
 
+  /**
+   * Dynamically typed constructor for JSON deserialization.
+   */
   @JsonCreator
   public static SetCoder<?> of(
       @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
@@ -57,53 +53,11 @@ public static SetCoder<?> of(
     return of((Coder<?>) components.get(0));
   }
 
-  public Coder<T> getElementCoder() { return elementCoder; }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  Coder<T> elementCoder;
-
-  SetCoder(Coder<T> elementCoder) {
-    this.elementCoder = elementCoder;
-  }
-
-  @Override
-  public void encode(
-      Set<T> set,
-      OutputStream outStream,
-      Context context)
-      throws IOException, CoderException  {
-    DataOutputStream dataOutStream = new DataOutputStream(outStream);
-    dataOutStream.writeInt(set.size());
-    for (T element : set) {
-      elementCoder.encode(element, outStream, context.nested());
-    }
-    dataOutStream.flush();
-  }
-
-  @Override
-  public Set<T> decode(InputStream inStream, Context context)
-      throws IOException, CoderException {
-    DataInputStream dataInStream = new DataInputStream(inStream);
-    int size = dataInStream.readInt();
-    Set<T> retval = new HashSet<T>();
-    for (int i = 0; i < size; ++i) {
-      T element = elementCoder.decode(inStream, context.nested());
-      retval.add(element);
-    }
-    return retval;
-  }
-
-  @Override
-  public List<? extends Coder<?>> getCoderArguments() {
-    return Arrays.<Coder<?>>asList(elementCoder);
-  }
-
   /**
    * Not all sets have a deterministic encoding.
    *
-   * <p> For example, HashSet comparison does not depend on element order, so
-   * two HashSet instances may be equal but produce different encodings.
+   * <p> For example, {@code HashSet} comparison does not depend on element order, so
+   * two {@code HashSet} instances may be equal but produce different encodings.
    */
   @Override
   public boolean isDeterministic() {
@@ -111,15 +65,23 @@ public boolean isDeterministic() {
   }
 
   /**
-   * Notifies ElementByteSizeObserver about the byte size of the encoded value using this coder.
+   * Returns the first element in this set if it is non-empty,
+   * otherwise returns {@code null}.
    */
+  public static <T> List<Object> getInstanceComponents(
+      Set<T> exampleValue) {
+    return getInstanceComponentsHelper(exampleValue);
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Internal operations below here.
+
   @Override
-  public void registerByteSizeObserver(
-      Set<T> set, ElementByteSizeObserver observer, Context context)
-      throws Exception {
-    observer.update(4L);
-    for (T element : set) {
-      elementCoder.registerByteSizeObserver(element, observer, context.nested());
-    }
+  protected final Set<T> decodeToIterable(List<T> decodedElements) {
+    return new HashSet(decodedElements);
+  }
+
+  protected SetCoder(Coder<T> elemCoder) {
+    super(elemCoder);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
index 94db9e4ed3899..48b807b6fcf8c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
@@ -37,12 +37,12 @@
  */
 @SuppressWarnings("serial")
 public class StringUtf8Coder extends AtomicCoder<String> {
+
   @JsonCreator
   public static StringUtf8Coder of() {
     return INSTANCE;
   }
 
-
   /////////////////////////////////////////////////////////////////////////////
 
   private static final StringUtf8Coder INSTANCE = new StringUtf8Coder();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
index f21aaa513eb2f..349ff323c1519 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
@@ -67,8 +67,7 @@ protected long getEncodedElementByteSize(TableRow value, Context context)
 
   private static final TableRowJsonCoder INSTANCE = new TableRowJsonCoder();
 
-  private TableRowJsonCoder() {
-  }
+  private TableRowJsonCoder() { }
 
   /**
    * TableCell can hold arbitrary Object instances, which makes the encoding
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
index bd01ecbcca7b7..92f343bd4d7e9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
@@ -27,12 +27,12 @@
  */
 @SuppressWarnings("serial")
 public class TextualIntegerCoder extends AtomicCoder<Integer> {
+
   @JsonCreator
   public static TextualIntegerCoder of() {
     return new TextualIntegerCoder();
   }
 
-
   /////////////////////////////////////////////////////////////////////////////
 
   private TextualIntegerCoder() {}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
index 2ce9ffd632e0f..f357fce890284 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
@@ -33,12 +33,12 @@
  */
 @SuppressWarnings("serial")
 public class VarIntCoder extends AtomicCoder<Integer> {
+
   @JsonCreator
   public static VarIntCoder of() {
     return INSTANCE;
   }
 
-
   /////////////////////////////////////////////////////////////////////////////
 
   private static final VarIntCoder INSTANCE =
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
index 24d5e061cfc73..e949fdfd03484 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
@@ -26,12 +26,12 @@
  */
 @SuppressWarnings("serial")
 public class VoidCoder extends AtomicCoder<Void> {
+
   @JsonCreator
   public static VoidCoder of() {
     return INSTANCE;
   }
 
-
   /////////////////////////////////////////////////////////////////////////////
 
   private static final VoidCoder INSTANCE = new VoidCoder();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
index fffbb5839e7ca..e3da71bcdff08 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
@@ -24,7 +24,6 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
 import org.apache.avro.Schema;
@@ -111,13 +110,10 @@ public void testAvroCoderEncoding() throws Exception {
 
   @Test
   public void testPojoEncoding() throws Exception {
-    Pojo before = new Pojo("Hello", 42);
-
+    Pojo value = new Pojo("Hello", 42);
     AvroCoder<Pojo> coder = AvroCoder.of(Pojo.class);
-    byte[] bytes = CoderUtils.encodeToByteArray(coder, before);
-    Pojo after = CoderUtils.decodeFromByteArray(coder, bytes);
 
-    Assert.assertEquals(before, after);
+    CoderProperties.coderDecodeEncodeEqual(coder, value);
   }
 
   @Test
@@ -140,11 +136,8 @@ public void testGenericRecordEncoding() throws Exception {
     // Leave favorite_color null
 
     AvroCoder<GenericRecord> coder = AvroCoder.of(GenericRecord.class, schema);
-    byte[] bytes = CoderUtils.encodeToByteArray(coder, before);
-    GenericRecord after = CoderUtils.decodeFromByteArray(coder, bytes);
-
-    Assert.assertEquals(before, after);
 
+    CoderProperties.coderDecodeEncodeEqual(coder, before);
     Assert.assertEquals(schema, coder.getSchema());
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java
new file mode 100644
index 0000000000000..14c4a72f8d7ff
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Test case for {@link BigEndianIntegerCoder}.
+ */
+@RunWith(JUnit4.class)
+public class BigEndianIntegerCoderTest {
+
+  private static final List<Integer> TEST_VALUES = Arrays.asList(
+      -11, -3, -1, 0, 1, 5, 13, 29,
+      Integer.MAX_VALUE,
+      Integer.MIN_VALUE);
+
+  @Test
+  public void testDecodeEncodeEqual() throws Exception {
+    Coder<Integer> coder = BigEndianIntegerCoder.of();
+    for (Integer value : TEST_VALUES) {
+      CoderProperties.coderDecodeEncodeEqual(coder, value);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java
new file mode 100644
index 0000000000000..4e94e6bca45a0
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Test case for {@link BigEndianLongCoder}.
+ */
+@RunWith(JUnit4.class)
+public class BigEndianLongCoderTest {
+
+  private static final List<Long> TEST_VALUES = Arrays.asList(
+      -11L, -3L, -1L, 0L, 1L, 5L, 13L, 29L,
+      Integer.MAX_VALUE + 131L,
+      Integer.MIN_VALUE - 29L,
+      Long.MAX_VALUE,
+      Long.MIN_VALUE);
+
+  @Test
+  public void testDecodeEncodeEqual() throws Exception {
+    Coder<Long> coder = BigEndianLongCoder.of();
+    for (Long value : TEST_VALUES) {
+      CoderProperties.coderDecodeEncodeEqual(coder, value);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
index 8f500a1655994..b8715a66c7057 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
@@ -16,49 +16,26 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
-import static org.hamcrest.Matchers.equalTo;
-import static org.junit.Assert.assertThat;
-
 import com.google.cloud.dataflow.sdk.util.common.CounterTestUtils;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-
-/** Unit tests for {@link ByteArrayCoder}. */
+/**
+ * Unit tests for {@link ByteArrayCoder}.
+ */
 @RunWith(JUnit4.class)
 public class ByteArrayCoderTest {
-  @Test
-  public void testOuterContext() throws CoderException, IOException {
-    byte[] buffer = {0xa, 0xb, 0xc};
 
-    ByteArrayOutputStream os = new ByteArrayOutputStream();
-    ByteArrayCoder.of().encode(buffer, os, Coder.Context.OUTER);
-    byte[] encoded = os.toByteArray();
-
-    ByteArrayInputStream is = new ByteArrayInputStream(encoded);
-    byte[] decoded = ByteArrayCoder.of().decode(is, Coder.Context.OUTER);
-    assertThat(decoded, equalTo(buffer));
-  }
+  private static final byte[][] TEST_VALUES = {
+    {0xa, 0xb, 0xc}, {}, {}, {0xd, 0xe}, {}};
 
   @Test
-  public void testNestedContext() throws CoderException, IOException {
-    byte[][] buffers = {{0xa, 0xb, 0xc}, {}, {}, {0xd, 0xe}, {}};
-
-    ByteArrayOutputStream os = new ByteArrayOutputStream();
-    for (byte[] buffer : buffers) {
-      ByteArrayCoder.of().encode(buffer, os, Coder.Context.NESTED);
-    }
-    byte[] encoded = os.toByteArray();
-
-    ByteArrayInputStream is = new ByteArrayInputStream(encoded);
-    for (byte[] buffer : buffers) {
-      byte[] decoded = ByteArrayCoder.of().decode(is, Coder.Context.NESTED);
-      assertThat(decoded, equalTo(buffer));
+  public void testDecodeEncodeEquals() throws Exception {
+    ByteArrayCoder coder = ByteArrayCoder.of();
+    for (byte[] value : TEST_VALUES) {
+      CoderProperties.coderDecodeEncodeEqual(coder, value);
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
index ef096eb01c999..89754789cc17f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
@@ -16,14 +16,21 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.emptyIterable;
 import static org.hamcrest.Matchers.equalTo;
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
 import static org.junit.Assume.assumeThat;
 
+import com.google.common.collect.Iterables;
+
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
 
 /**
  * Properties for use in {@link Coder} tests. These are implemented with junit assertions
@@ -31,28 +38,125 @@
  */
 public class CoderProperties {
 
+  /**
+   * All the contexts, for use in test cases.
+   */
+   public static final List<Coder.Context> ALL_CONTEXTS = Arrays.asList(
+       Coder.Context.OUTER, Coder.Context.NESTED);
+
+  /**
+   * Verifies that for the given {@link Coder<T>}, and values of
+   * type {@code T}, if the values are equal then the encoded bytes are equal,
+   * in any {@link Coder.Context}.
+   */
+  public static <T> void coderDeterministic(
+      Coder<T> coder, T value1, T value2)
+      throws Exception {
+    for (Coder.Context context : ALL_CONTEXTS) {
+      coderDeterministicInContext(coder, context, value1, value2);
+    }
+  }
+
   /**
    * Verifies that for the given {@link Coder<T>}, {@link Coder.Context}, and values of
    * type {@code T}, if the values are equal then the encoded bytes are equal.
    */
-  public static <T> void coderDeterministic(
+  public static <T> void coderDeterministicInContext(
       Coder<T> coder, Coder.Context context, T value1, T value2)
       throws Exception {
     assumeThat(value1, equalTo(value2));
-    assertArrayEquals(encode(coder, context, value1), encode(coder, context, value2));
+    assertThat(
+        encode(coder, context, value1),
+        equalTo(encode(coder, context, value2)));
   }
 
   /**
-   * Verifies that for the given {@link Coder<T>}, {@link Coder.Context},
+   * Verifies that for the given {@link Coder<T>},
    * and value of type {@code T}, encoding followed by decoding yields an
-   * equal of type {@code T}.
+   * equal value of type {@code T}, in any {@link Coder.Context}.
    */
   public static <T> void coderDecodeEncodeEqual(
+      Coder<T> coder, T value)
+      throws Exception {
+    for (Coder.Context context : ALL_CONTEXTS) {
+      coderDecodeEncodeEqualInContext(coder, context, value);
+    }
+  }
+
+  /**
+   * Verifies that for the given {@link Coder<T>}, {@link Coder.Context},
+   * and value of type {@code T}, encoding followed by decoding yields an
+   * equal value of type {@code T}.
+   */
+  public static <T> void coderDecodeEncodeEqualInContext(
       Coder<T> coder, Coder.Context context, T value)
       throws Exception {
-    assertEquals(
-        decode(coder, context, encode(coder, context, value)),
-        value);
+    assertThat(decodeEncode(coder, context, value), equalTo(value));
+  }
+
+  /**
+   * Verifies that for the given {@link Coder<Collection<T>>},
+   * and value of type {@code Collection<T>}, encoding followed by decoding yields an
+   * equal value of type {@code Collection<T>}, in any {@link Coder.Context}.
+   */
+  public static <T, IT extends Collection<T>> void coderDecodeEncodeContentsEqual(
+      Coder<IT> coder, IT value)
+      throws Exception {
+    for (Coder.Context context : ALL_CONTEXTS) {
+      coderDecodeEncodeContentsEqualInContext(coder, context, value);
+    }
+  }
+
+  /**
+   * Verifies that for the given {@link Coder<Collection<T>>},
+   * and value of type {@code Collection<T>}, encoding followed by decoding yields an
+   * equal value of type {@code Collection<T>}, in the given {@link Coder.Context}.
+   */
+  @SuppressWarnings("unchecked")
+  public static <T, CT extends Collection<T>> void coderDecodeEncodeContentsEqualInContext(
+      Coder<CT> coder, Coder.Context context, CT value)
+      throws Exception {
+    // Matchers.containsInAnyOrder() requires at least one element
+    Collection<T> result = decodeEncode(coder, context, value);
+    if (value.isEmpty()) {
+      assertThat(result, emptyIterable());
+    } else {
+      // This is the only Matchers.containInAnyOrder() overload that takes literal values
+      assertThat(result, containsInAnyOrder((T[]) value.toArray()));
+    }
+  }
+
+  /**
+   * Verifies that for the given {@link Coder<Collection<T>>},
+   * and value of type {@code Collection<T>}, encoding followed by decoding yields an
+   * equal value of type {@code Collection<T>}, in any {@link Coder.Context}.
+   */
+  public static <T, IT extends Iterable<T>> void coderDecodeEncodeContentsInSameOrder(
+      Coder<IT> coder, IT value)
+      throws Exception {
+    for (Coder.Context context : ALL_CONTEXTS) {
+      CoderProperties.<T, IT>coderDecodeEncodeContentsInSameOrderInContext(
+          coder, context, value);
+    }
+  }
+
+  /**
+   * Verifies that for the given {@link Coder<Iterable<T>>},
+   * and value of type {@code Iterable<T>}, encoding followed by decoding yields an
+   * equal value of type {@code Collection<T>}, in the given {@link Coder.Context}.
+   */
+  @SuppressWarnings("unchecked")
+  public static <T, IT extends Iterable<T>> void coderDecodeEncodeContentsInSameOrderInContext(
+      Coder<IT> coder, Coder.Context context, IT value)
+      throws Exception {
+    Iterable<T> result = decodeEncode(coder, context, value);
+    // Matchers.contains() requires at least one element
+    if (Iterables.isEmpty(value)) {
+      assertThat(result, emptyIterable());
+    } else {
+      // This is the only Matchers.contains() overload that takes literal values
+      assertThat(result, contains((T[]) Iterables.toArray(value, Object.class)));
+    }
   }
 
   //////////////////////////////////////////////////////////////////////////
@@ -70,4 +174,8 @@ private static <T> T decode(
     return coder.decode(is, context);
   }
 
+  private static <T> T decodeEncode(Coder<T> coder, Coder.Context context, T value)
+      throws CoderException, IOException {
+    return decode(coder, context, encode(coder, context, value));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java
new file mode 100644
index 0000000000000..ffacbd768a4e4
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Test case for {@link CollectionCoder}.
+ */
+@RunWith(JUnit4.class)
+public class CollectionCoderTest {
+
+  private static final List<Collection<Integer>> TEST_VALUES = Arrays.<Collection<Integer>>asList(
+      Collections.<Integer>emptyList(),
+      Collections.<Integer>emptySet(),
+      Collections.singletonList(13),
+      Arrays.asList(1, 2, 3, 4),
+      new LinkedList<>(Arrays.asList(7, 6, 5)),
+      new HashSet<>(Arrays.asList(31, -5, 83)));
+
+  @Test
+  public void testDecodeEncodeContentsEqual() throws Exception {
+    Coder<Collection<Integer>> coder = CollectionCoder.of(VarIntCoder.of());
+    for (Collection<Integer> value : TEST_VALUES) {
+      CoderProperties.coderDecodeEncodeContentsEqual(coder, value);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java
index c71a50843fd02..229bc7d5ae9d7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java
@@ -67,18 +67,18 @@ public int hashCode() {
     }
   }
 
-  @Test public void testEncodeDecode() throws Exception {
+  @Test
+  public void testEncodeDecode() throws Exception {
     MyCustomCoder coder = new MyCustomCoder("key");
-    byte[] encoded = CoderUtils.encodeToByteArray(coder, KV.of("key", 3L));
-    Assert.assertEquals(
-        KV.of("key", 3L), CoderUtils.decodeFromByteArray(coder, encoded));
+    CoderProperties.coderDecodeEncodeEqual(coder, KV.of("key", 3L));
 
     byte[] encoded2 = CoderUtils.encodeToByteArray(coder, KV.of("ignored", 3L));
     Assert.assertEquals(
         KV.of("key", 3L), CoderUtils.decodeFromByteArray(coder, encoded2));
   }
 
-  @Test public void testEncodable() throws Exception {
+  @Test
+  public void testEncodable() throws Exception {
     SerializableUtils.ensureSerializable(new MyCustomCoder("key"));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java
new file mode 100644
index 0000000000000..bd388a3343d1e
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import static com.google.api.services.datastore.client.DatastoreHelper.makeKey;
+import static com.google.api.services.datastore.client.DatastoreHelper.makeProperty;
+import static com.google.api.services.datastore.client.DatastoreHelper.makeValue;
+
+import com.google.api.services.datastore.DatastoreV1.Entity;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Test case for {@link EntityCoder}.
+ */
+@RunWith(JUnit4.class)
+public class EntityCoderTest {
+
+  // Presumably if anything works, everything works,
+  // as actual serialization is fully delegated to
+  // autogenerated code from a well-tested library.
+  private static final List<Entity> TEST_VALUES = Arrays.<Entity>asList(
+      Entity.newBuilder()
+          .setKey(makeKey("TestKind", "emptyEntity"))
+          .build(),
+      Entity.newBuilder()
+          .setKey(makeKey("TestKind", "testSimpleProperties"))
+          .addProperty(makeProperty("trueProperty", makeValue(true)))
+          .addProperty(makeProperty("falseProperty", makeValue(false)))
+          .addProperty(makeProperty("stringProperty", makeValue("hello")))
+          .addProperty(makeProperty("integerProperty", makeValue(3)))
+          .addProperty(makeProperty("doubleProperty", makeValue(-1.583257)))
+          .build(),
+      Entity.newBuilder()
+          .setKey(makeKey("TestKind", "testNestedEntity"))
+          .addProperty(makeProperty("entityProperty",
+              makeValue(Entity.newBuilder()
+                  .addProperty(makeProperty("stringProperty", makeValue("goodbye"))))))
+          .build());
+
+  @Test
+  public void testDecodeEncodeEqual() throws Exception {
+    Coder<Entity> coder = EntityCoder.of();
+    for (Entity value : TEST_VALUES) {
+      CoderProperties.coderDecodeEncodeEqual(coder, value);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java
index dd719004eab1c..31f80c92ae730 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java
@@ -34,6 +34,7 @@
 /** Unit tests for {@link InstantCoder}. */
 @RunWith(JUnit4.class)
 public class InstantCoderTest {
+
   private final InstantCoder coder = InstantCoder.of();
   private final List<Long> timestamps =
       Arrays.asList(0L, 1L, -1L, -255L, 256L, Long.MIN_VALUE, Long.MAX_VALUE);
@@ -41,9 +42,7 @@ public class InstantCoderTest {
   @Test
   public void testBasicEncoding() throws Exception {
     for (long timestamp : timestamps) {
-      Assert.assertEquals(new Instant(timestamp),
-          CoderUtils.decodeFromByteArray(coder,
-              CoderUtils.encodeToByteArray(coder, new Instant(timestamp))));
+      CoderProperties.coderDecodeEncodeEqual(coder, new Instant(timestamp));
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java
index 993c5d0a5e917..f44b53f828103 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java
@@ -24,11 +24,29 @@
 import org.junit.runners.JUnit4;
 
 import java.util.Arrays;
+import java.util.Collections;
+import java.util.LinkedList;
 import java.util.List;
 
 /** Unit tests for {@link IterableCoder}. */
 @RunWith(JUnit4.class)
 public class IterableCoderTest {
+
+  private static final List<Iterable<Integer>> TEST_VALUES = Arrays.<Iterable<Integer>>asList(
+      Collections.<Integer>emptyList(),
+      Collections.<Integer>singletonList(13),
+      Arrays.<Integer>asList(1, 2, 3, 4),
+      new LinkedList<Integer>(Arrays.asList(7, 6, 5)));
+
+  @Test
+  public void testDecodeEncodeContentsInSameOrder() throws Exception {
+    Coder<Iterable<Integer>> coder = IterableCoder.of(VarIntCoder.of());
+    for (Iterable<Integer> value : TEST_VALUES) {
+      CoderProperties.<Integer, Iterable<Integer>>coderDecodeEncodeContentsInSameOrder(
+          coder, value);
+    }
+  }
+
   @Test
   public void testGetInstanceComponentsNonempty() {
     Iterable<Integer> iterable = Arrays.asList(2, 58, 99, 5);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java
new file mode 100644
index 0000000000000..924e960740d63
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.collect.ImmutableMap;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Map;
+
+/**
+ * Test case for {@link KvCoder}.
+ */
+@RunWith(JUnit4.class)
+public class KvCoderTest {
+
+  private static final Map<Coder<?>, Iterable<?>> TEST_DATA =
+      new ImmutableMap.Builder<Coder<?>, Iterable<?>>()
+      .put(VarIntCoder.of(),
+          Arrays.asList(-1, 0, 1, 13, Integer.MAX_VALUE, Integer.MIN_VALUE))
+      .put(BigEndianLongCoder.of(),
+          Arrays.asList(-1L, 0L, 1L, 13L, Long.MAX_VALUE, Long.MIN_VALUE))
+      .put(StringUtf8Coder.of(),
+          Arrays.asList("", "hello", "goodbye", "1"))
+      .put(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()),
+          Arrays.asList(KV.of("", -1), KV.of("hello", 0), KV.of("goodbye", Integer.MAX_VALUE)))
+      .put(ListCoder.of(VarLongCoder.of()),
+          Arrays.asList(
+              Arrays.asList(1L, 2L, 3L),
+              Collections.emptyList()))
+       .build();
+
+  @Test
+  public void testDecodeEncodeEqual() throws Exception {
+    for (Map.Entry<Coder<?>, Iterable<?>> entry : TEST_DATA.entrySet()) {
+      // The coder and corresponding values must be the same type.
+      // If someone messes this up in the above test data, the test
+      // will fail anyhow (unless the coder magically works on data
+      // it does not understand).
+      @SuppressWarnings("unchecked")
+      Coder<Object> coder = (Coder<Object>) entry.getKey();
+      Iterable<?> values = entry.getValue();
+      for (Object value : values) {
+        CoderProperties.coderDecodeEncodeEqual(coder, value);
+      }
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java
index c04d3e16745b8..595cd7811b7a1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java
@@ -24,13 +24,30 @@
 import org.junit.runners.JUnit4;
 
 import java.util.Arrays;
+import java.util.Collections;
+import java.util.LinkedList;
 import java.util.List;
 
 /** Unit tests for {@link ListCoder}. */
 @RunWith(JUnit4.class)
 public class ListCoderTest {
+
+  private static final List<List<Integer>> TEST_VALUES = Arrays.<List<Integer>>asList(
+      Collections.<Integer>emptyList(),
+      Collections.singletonList(43),
+      Arrays.asList(1, 2, 3, 4),
+      new LinkedList<Integer>(Arrays.asList(7, 6, 5)));
+
+  @Test
+  public void testDecodeEncodeContentsInSameOrder() throws Exception {
+    Coder<List<Integer>> coder = ListCoder.of(VarIntCoder.of());
+    for (List<Integer> value : TEST_VALUES) {
+      CoderProperties.<Integer, List<Integer>>coderDecodeEncodeContentsInSameOrder(coder, value);
+    }
+  }
+
   @Test
-  public void testGetInstanceComponentsNonempty() {
+  public void testGetInstanceComponentsNonempty() throws Exception {
     List<Integer> list = Arrays.asList(21, 5, 3, 5);
     List<Object> components = ListCoder.getInstanceComponents(list);
     assertEquals(1, components.size());
@@ -38,9 +55,16 @@ public void testGetInstanceComponentsNonempty() {
   }
 
   @Test
-  public void testGetInstanceComponentsEmpty() {
+  public void testGetInstanceComponentsEmpty() throws Exception {
     List<Integer> list = Arrays.asList();
     List<Object> components = ListCoder.getInstanceComponents(list);
     assertNull(components);
   }
+
+  @Test
+  public void testEmptyList() throws Exception {
+    List<Integer> list = Collections.emptyList();
+    Coder<List<Integer>> coder = ListCoder.of(VarIntCoder.of());
+    CoderProperties.<List<Integer>>coderDecodeEncodeEqual(coder, list);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java
index 30cd0d8e8100b..4132776c8ad01 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java
@@ -19,10 +19,14 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 
+import com.google.common.collect.ImmutableMap;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -30,6 +34,18 @@
 /** Unit tests for {@link MapCoder}. */
 @RunWith(JUnit4.class)
 public class MapCoderTest {
+  private static final List<Map<Integer, String>> TEST_VALUES = Arrays.<Map<Integer, String>>asList(
+      Collections.<Integer, String>emptyMap(),
+      new ImmutableMap.Builder<Integer, String>().put(1, "hello").put(-1, "foo").build());
+
+  @Test
+  public void testDecodeEncodeContentsInSameOrder() throws Exception {
+    Coder<Map<Integer, String>> coder = MapCoder.of(VarIntCoder.of(), StringUtf8Coder.of());
+    for (Map<Integer, String> value : TEST_VALUES) {
+      CoderProperties.coderDecodeEncodeEqual(coder, value);
+    }
+  }
+
   @Test
   public void testGetInstanceComponentsNonempty() {
     Map<Integer, String> map = new HashMap<>();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java
new file mode 100644
index 0000000000000..db5b42aa4b1d2
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Test case for {@link SetCoder}.
+ */
+@RunWith(JUnit4.class)
+public class SetCoderTest {
+
+  private static final List<Set<Integer>> TEST_VALUES = Arrays.<Set<Integer>>asList(
+      Collections.<Integer>emptySet(),
+      Collections.singleton(13),
+      new HashSet<>(Arrays.asList(31, -5, 83)));
+
+  @Test
+  public void testDecodeEncodeContentsEqual() throws Exception {
+    Coder<Set<Integer>> coder = SetCoder.of(VarIntCoder.of());
+    for (Set<Integer> value : TEST_VALUES) {
+      CoderProperties.coderDecodeEncodeContentsEqual(coder, value);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringUtf8CoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringUtf8CoderTest.java
new file mode 100644
index 0000000000000..9c85836e94324
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringUtf8CoderTest.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Test case for {@link StringUtf8Coder}.
+ */
+@RunWith(JUnit4.class)
+public class StringUtf8CoderTest {
+
+  private static final List<String> TEST_VALUES = Arrays.asList(
+      "", "a", "13", "hello",
+      "a longer string with spaces and all that",
+      "a string with a \n newline",
+      "スタリング");
+
+  @Test
+  public void testDecodeEncodeEqual() throws Exception {
+    Coder<String> coder = StringUtf8Coder.of();
+    for (String value : TEST_VALUES) {
+      CoderProperties.coderDecodeEncodeEqual(coder, value);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java
new file mode 100644
index 0000000000000..cd185539442a3
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.api.services.bigquery.model.TableRow;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Test case for {@link TableRowJsonCoder}.
+ */
+@RunWith(JUnit4.class)
+public class TableRowJsonCoderTest {
+
+  private static class TableRowBuilder {
+    private TableRow row;
+    public TableRowBuilder() {
+      row = new TableRow();
+    }
+    public TableRowBuilder set(String fieldName, Object value) {
+      row.set(fieldName, value);
+      return this;
+    }
+    public TableRow build() {
+      return row;
+    }
+  }
+
+  private static final List<TableRow> TEST_VALUES = Arrays.asList(
+      new TableRowBuilder().build(),
+      new TableRowBuilder().set("a", "1").build(),
+      new TableRowBuilder().set("b", 3.14).build(),
+      new TableRowBuilder().set("a", "1").set("b", true).set("c", "hi").build());
+
+  @Test
+  public void testDecodeEncodeEqual() throws Exception {
+    Coder<TableRow> coder = TableRowJsonCoder.of();
+    for (TableRow value : TEST_VALUES) {
+      CoderProperties.coderDecodeEncodeEqual(coder, value);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java
new file mode 100644
index 0000000000000..a88c4764ef1bc
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Test case for {@link TextualIntegerCoder}.
+ */
+@RunWith(JUnit4.class)
+public class TextualIntegerCoderTest {
+
+  private static final List<Integer> TEST_VALUES = Arrays.asList(
+      -11, -3, -1, 0, 1, 5, 13, 29,
+      Integer.MAX_VALUE,
+      Integer.MIN_VALUE);
+
+  @Test
+  public void testDecodeEncodeEqual() throws Exception {
+    Coder<Integer> coder = TextualIntegerCoder.of();
+    for (Integer value : TEST_VALUES) {
+      CoderProperties.coderDecodeEncodeEqual(coder, value);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/URICoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/URICoderTest.java
index f464e813bf5d4..dd0d32ec419b9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/URICoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/URICoderTest.java
@@ -28,8 +28,6 @@
 @RunWith(JUnit4.class)
 public class URICoderTest {
 
-  // Test data
-
   private static final List<String> TEST_URI_STRINGS = Arrays.asList(
       "http://www.example.com",
       "gs://myproject/mybucket/a/gcs/path",
@@ -37,32 +35,19 @@ public class URICoderTest {
       "file:/path/with/no/authority",
       "file:///path/with/empty/authority");
 
-  private static final List<Coder.Context> TEST_CONTEXTS = Arrays.asList(
-      Coder.Context.OUTER,
-      Coder.Context.NESTED);
-
-  // Tests
-
   @Test
   public void testDeterministic() throws Exception {
     Coder<URI> coder = URICoder.of();
-
     for (String uriString : TEST_URI_STRINGS) {
-      for (Coder.Context context : TEST_CONTEXTS) {
-        // Obviously equal, but distinct as objects
-        CoderProperties.coderDeterministic(coder, context, new URI(uriString), new URI(uriString));
-      }
+      CoderProperties.coderDeterministic(coder, new URI(uriString), new URI(uriString));
     }
   }
 
   @Test
   public void testDecodeEncodeEqual() throws Exception {
     Coder<URI> coder = URICoder.of();
-
     for (String uriString : TEST_URI_STRINGS) {
-      for (Coder.Context context : TEST_CONTEXTS) {
-        CoderProperties.coderDecodeEncodeEqual(coder, context, new URI(uriString));
-      }
+      CoderProperties.coderDecodeEncodeEqual(coder, new URI(uriString));
     }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java
new file mode 100644
index 0000000000000..e0ccd1801638a
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Test case for {@link VarIntCoder}.
+ */
+@RunWith(JUnit4.class)
+public class VarIntCoderTest {
+
+  private static final List<Integer> TEST_VALUES = Arrays.asList(
+      -11, -3, -1, 0, 1, 5, 13, 29,
+      Integer.MAX_VALUE,
+      Integer.MIN_VALUE);
+
+  @Test
+  public void testDecodeEncodeEqual() throws Exception {
+    Coder<Integer> coder = VarIntCoder.of();
+    for (Integer value : TEST_VALUES) {
+      CoderProperties.coderDecodeEncodeEqual(coder, value);
+    }
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java
new file mode 100644
index 0000000000000..bc85dac79f689
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Test case for {@link VarLongCoder}.
+ */
+@RunWith(JUnit4.class)
+public class VarLongCoderTest {
+
+  private static final List<Long> TEST_VALUES = Arrays.asList(
+      -11L, -3L, -1L, 0L, 1L, 5L, 13L, 29L,
+      Integer.MAX_VALUE + 131L,
+      Integer.MIN_VALUE - 29L,
+      Long.MAX_VALUE,
+      Long.MIN_VALUE);
+
+  @Test
+  public void testDecodeEncodeEqual() throws Exception {
+    Coder<Long> coder = VarLongCoder.of();
+    for (Long value : TEST_VALUES) {
+      CoderProperties.coderDecodeEncodeEqual(coder, value);
+    }
+  }
+}

From 0e4feec9be53e489a6a80427f872edffe2879a9f Mon Sep 17 00:00:00 2001
From: vanya <vanya@google.com>
Date: Tue, 23 Dec 2014 16:16:11 -0800
Subject: [PATCH 0055/1541] Add TextIO.Write.{,.Bound}.withoutValidation()
 method and plumb the validate_sink value.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82750082
---
 .../google/cloud/dataflow/sdk/io/TextIO.java  | 58 ++++++++++++++++---
 .../runners/dataflow/TextIOTranslator.java    |  1 +
 .../dataflow/sdk/util/PropertyNames.java      |  1 +
 .../cloud/dataflow/sdk/io/TextIOTest.java     |  9 ++-
 4 files changed, 59 insertions(+), 10 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 86512be491927..14fa60cf42e5c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -355,6 +355,18 @@ public static <T> Bound<T> withCoder(Coder<T> coder) {
       return new Bound<>(coder);
     }
 
+    /**
+     * Returns a TextIO.Write PTransform that has GCS path validation on
+     * pipeline creation disabled.
+     *
+     * <p> This can be useful in the case where the GCS output location does
+     * not exist at the pipeline creation time, but is expected to be available
+     * at execution time.
+     */
+    public static Bound<String> withoutValidation() {
+      return new Bound<>(DEFAULT_TEXT_CODER).withoutValidation();
+    }
+
     // TODO: appendingNewlines, gzipped, header, footer, etc.
 
     /**
@@ -382,18 +394,22 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       /** Shard template string. */
       final String shardTemplate;
 
+      /** An option to indicate if output validation is desired. Default is true. */
+      final boolean validate;
+
       Bound(Coder<T> coder) {
-        this(null, null, "", coder, 0, ShardNameTemplate.INDEX_OF_MAX);
+        this(null, null, "", coder, 0, ShardNameTemplate.INDEX_OF_MAX, true);
       }
 
       Bound(String name, String filenamePrefix, String filenameSuffix, Coder<T> coder,
-          int numShards, String shardTemplate) {
+          int numShards, String shardTemplate, boolean validate) {
         super(name);
         this.coder = coder;
         this.filenamePrefix = filenamePrefix;
         this.filenameSuffix = filenameSuffix;
         this.numShards = numShards;
         this.shardTemplate = shardTemplate;
+        this.validate = validate;
       }
 
       /**
@@ -401,7 +417,8 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
        * with the given step name.  Does not modify this object.
        */
       public Bound<T> named(String name) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate,
+            validate);
       }
 
       /**
@@ -414,7 +431,8 @@ public Bound<T> named(String name) {
        */
       public Bound<T> to(String filenamePrefix) {
         validateOutputComponent(filenamePrefix);
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate,
+            validate);
       }
 
       /**
@@ -427,7 +445,8 @@ public Bound<T> to(String filenamePrefix) {
        */
       public Bound<T> withSuffix(String nameExtension) {
         validateOutputComponent(nameExtension);
-        return new Bound<>(name, filenamePrefix, nameExtension, coder, numShards, shardTemplate);
+        return new Bound<>(name, filenamePrefix, nameExtension, coder, numShards, shardTemplate,
+            validate);
       }
 
       /**
@@ -446,7 +465,8 @@ public Bound<T> withSuffix(String nameExtension) {
        */
       public Bound<T> withNumShards(int numShards) {
         Preconditions.checkArgument(numShards >= 0);
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate,
+            validate);
       }
 
       /**
@@ -458,7 +478,8 @@ public Bound<T> withNumShards(int numShards) {
        * @see ShardNameTemplate
        */
       public Bound<T> withShardNameTemplate(String shardTemplate) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate,
+            validate);
       }
 
       /**
@@ -471,7 +492,7 @@ public Bound<T> withShardNameTemplate(String shardTemplate) {
        * <p> Does not modify this object.
        */
       public Bound<T> withoutSharding() {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, 1, "");
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, 1, "", validate);
       }
 
       /**
@@ -483,7 +504,22 @@ public Bound<T> withoutSharding() {
        * @param <T1> the type of the elements of the input PCollection
        */
       public <T1> Bound<T1> withCoder(Coder<T1> coder) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate,
+            validate);
+      }
+
+      /**
+       * Returns a new TextIO.Write PTransform that's like this one but
+       * that has GCS output path validation on pipeline creation disabled.
+       * Does not modify this object.
+       *
+       * <p> This can be useful in the case where the GCS output location does
+       * not exist at the pipeline creation time, but is expected to be
+       * available at execution time.
+       */
+      public Bound<T> withoutValidation() {
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate,
+            false);
       }
 
       @Override
@@ -532,6 +568,10 @@ public Coder<T> getCoder() {
         return coder;
       }
 
+      public boolean needsValidation() {
+        return validate;
+      }
+
       static {
         DirectPipelineRunner.registerDefaultTransformEvaluator(
             Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
index ee70949d5954c..92aaf210e4ec8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
@@ -119,6 +119,7 @@ private <T> void translateWriteHelper(
       context.addInput(PropertyNames.SHARD_NAME_TEMPLATE,
           transform.getShardNameTemplate());
       context.addInput(PropertyNames.FILENAME_SUFFIX, transform.getFilenameSuffix());
+      context.addInput(PropertyNames.VALIDATE_SINK, transform.needsValidation());
 
       long numShards = transform.getNumShards();
       if (numShards > 0) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
index c10f07fd98ed2..c7a5a307f1965 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
@@ -83,6 +83,7 @@ public class PropertyNames {
   public static final String USER_FN = "user_fn";
   public static final String USER_NAME = "user_name";
   public static final String USES_KEYED_STATE = "uses_keyed_state";
+  public static final String VALIDATE_SINK = "validate_sink";
   public static final String VALIDATE_SOURCE = "validate_source";
   public static final String VALUE = "value";
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
index 46bdd5944a969..16748cf566d17 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -415,9 +415,16 @@ public void testBadWildcardBrackets() throws Exception {
   }
 
   @Test
-  public void testWithoutValidationFlag() throws Exception {
+  public void testReadWithoutValidationFlag() throws Exception {
     TextIO.Read.Bound<String> read = TextIO.Read.from("gs://bucket/foo*/baz");
     assertTrue(read.needsValidation());
     assertFalse(read.withoutValidation().needsValidation());
   }
+
+  @Test
+  public void testWriteWithoutValidationFlag() throws Exception {
+    TextIO.Write.Bound<String> write = TextIO.Write.to("gs://bucket/foo/baz");
+    assertTrue(write.needsValidation());
+    assertFalse(write.withoutValidation().needsValidation());
+  }
 }

From 7325a9a1e73d5583e6e8000fb99aa6eec7c7cc49 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 29 Dec 2014 05:51:00 -0800
Subject: [PATCH 0056/1541] In DataflowAssert.IterableAssert, use appropriate
 Coder instead of Java serialization for expected results. [] -------------
 Created by MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=82947656

---
 .../dataflow/sdk/testing/DataflowAssert.java  | 576 ++++++++++++------
 .../sdk/testing/DataflowAssertTest.java       | 227 +++++++
 2 files changed, 629 insertions(+), 174 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 3debc9ff4f034..3773aab5ea93e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -16,7 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.testing;
 
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
@@ -24,25 +27,26 @@
 import com.google.cloud.dataflow.sdk.transforms.View;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.common.base.Optional;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
 
 import java.io.Serializable;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
+import java.util.Arrays;
 import java.util.Collection;
+import java.util.List;
+import java.util.NoSuchElementException;
 
 /**
  * An assertion on the contents of a {@link PCollection}
  * incorporated into the pipeline.  Such an assertion
- * can be checked no matter what kind of
- * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner} is
- * used, so it's good for testing using the
- * {@link com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner},
- * the
- * {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner},
- * etc.
+ * can be checked no matter what kind of {@link PipelineRunner} is
+ * used.
  *
  * <p>Note that the {@code DataflowAssert} call must precede the call
- * to {@link com.google.cloud.dataflow.sdk.Pipeline#run}.
+ * to {@link Pipeline#run}.
  *
  * <p> Examples of use:
  * <pre>{@code
@@ -65,253 +69,368 @@
  * }</pre>
  *
  * <p>JUnit and Hamcrest must be linked in by any code that uses DataflowAssert.
- *
- * @param <T> The type of elements in the input collection.
  */
-public class DataflowAssert<T> {
+public class DataflowAssert {
+
+  // Do not instantiate.
+  private DataflowAssert() {}
+
   /**
-   * Constructs an IterableAssert for the elements of the provided
-   * {@code PCollection<T>}.
+   * Constructs an {@link IterableAssert} for the elements of the provided
+   * {@link PCollection PCollection<T>}.
    */
-  public static <T> IterableAssert<T> that(PCollection<T> futureResult) {
-    return new IterableAssert<>(futureResult.apply(View.<T>asIterable()));
+  public static <T> IterableAssert<T> that(PCollection<T> actual) {
+    return new IterableAssert<>(actual.apply(View.<T>asIterable()))
+        .setCoder(actual.getCoder());
   }
 
   /**
-   * Constructs an IterableAssert for the value of the provided
-   * {@code PCollection<Iterable<T>>}, which must be a singleton.
+   * Constructs an {@link IterableAssert} for the value of the provided
+   * {@link PCollection PCollection<Iterable<T>>}, which must be a singleton.
    */
-  public static <T> IterableAssert<T> thatSingletonIterable(
-      PCollection<Iterable<T>> futureResult) {
-    return new IterableAssert<>(futureResult.apply(View.<Iterable<T>>asSingleton()));
+  public static <T> IterableAssert<T> thatSingletonIterable(PCollection<Iterable<T>> actual) {
+    List<? extends Coder<?>> maybeElementCoder = actual.getCoder().getCoderArguments();
+    Coder<T> tCoder;
+    try {
+      tCoder = (Coder<T>) Iterables.getOnlyElement(maybeElementCoder);
+    } catch (NoSuchElementException | IllegalArgumentException exc) {
+      throw new IllegalArgumentException(
+        "DataflowAssert.<T>thatSingltonIterable requires a PCollection<Iterable<T>>"
+        + " with a Coder<Iterable<T>> where getCoderArguments() yields a"
+        + " single Coder<T> to apply to the elements.");
+    }
+
+    return new IterableAssert<>(actual.apply(View.<Iterable<T>>asSingleton()))
+        .setCoder(tCoder);
   }
 
   /**
-   * Constructs an IterableAssert for the value of the provided
-   * {@code PCollectionView<Iterable<T>, ?>}.
+   * Constructs an {@link IterableAssert} for the value of the provided
+   * {@code PCollectionView PCollectionView<Iterable<T>, ?>}.
    */
-  public static <T> IterableAssert<T> thatIterable(
-      PCollectionView<Iterable<T>, ?> futureResult) {
-    return new IterableAssert<>(futureResult);
+  public static <T> IterableAssert<T> thatIterable(PCollectionView<Iterable<T>, ?> actual) {
+    return new IterableAssert<>(actual);
   }
 
   /**
-   * An assertion about the contents of a {@link PCollectionView<<Iterable<T>, ?>}.
+   * Constructs a {@link SingletonAssert} for the value of the provided
+   * {@code PCollection PCollection<T>}, which must be a singleton.
+   */
+  public static <T> SingletonAssert<T> thatSingleton(PCollection<T> actual) {
+    return new SingletonAssert<>(actual.apply(View.<T>asSingleton()))
+        .setCoder(actual.getCoder());
+  }
+
+  ////////////////////////////////////////////////////////////
+
+  /**
+   * An assertion about the contents of a
+   * {@link PCollectionView PCollectionView<<Iterable<T>, ?>}.
    */
   @SuppressWarnings("serial")
   public static class IterableAssert<T> implements Serializable {
-    private final PCollectionView<Iterable<T>, ?> actualResults;
 
-    private IterableAssert(PCollectionView<Iterable<T>, ?> futureResult) {
-      actualResults = futureResult;
+    private final PCollectionView<Iterable<T>, ?> actualView;
+    private Optional<Coder<T>> coder;
+
+    protected IterableAssert(PCollectionView<Iterable<T>, ?> actualView) {
+      this.actualView = actualView;
+      coder = Optional.absent();
     }
 
     /**
-     * Applies a SerializableFunction to check the elements of the Iterable.
-     *
-     * <p> Returns this IterableAssert.
+     * Sets the coder to use for elements of type {@code T}, as needed
+     * for internal purposes.
      */
-    public IterableAssert<T> satisfies(
-        final SerializableFunction<Iterable<T>, Void> checkerFn) {
-
-      actualResults.getPipeline()
-          .apply(Create.<Void>of((Void) null))
-          .setCoder(VoidCoder.of())
-          .apply(ParDo
-            .withSideInputs(actualResults)
-            .of(new DoFn<Void, Void>() {
-              @Override
-              public void processElement(ProcessContext c) {
-                Iterable<T> actualContents = c.sideInput(actualResults);
-                checkerFn.apply(actualContents);
-              }
-            }));
-
+    public IterableAssert<T> setCoder(Coder<T> coder) {
+      this.coder = Optional.of(coder);
       return this;
     }
 
     /**
-     * Checks that the Iterable contains the expected elements, in any
-     * order.
+     * Sets the coder to use for elements of type {@code T}, as needed
+     * for internal purposes.
      *
-     * <p> Returns this IterableAssert.
+     * <p> Returns this {@code IterableAssert}.
      */
-    public IterableAssert<T> containsInAnyOrder(T... expectedElements) {
-      return this.satisfies(new AssertContainsInAnyOrder<T>(expectedElements));
+    public IterableAssert<T> setCoder(Optional<Coder<T>> coder) {
+      this.coder = coder;
+      return this;
     }
 
     /**
-     * Checks that the Iterable contains the expected elements, in any
-     * order.
-     *
-     * <p> Returns this IterableAssert.
+     * Gets the coder, which may yet be absent.
      */
-    public IterableAssert<T> containsInAnyOrder(
-        Collection<T> expectedElements) {
-      return this.satisfies(new AssertContainsInAnyOrder<T>(expectedElements));
+    public Coder<T> getCoder() {
+      if (coder.isPresent()) {
+        return coder.get();
+      } else {
+        throw new IllegalStateException(
+            "Attempting to access the coder of an IterableAssert"
+            + " which has not been set yet.");
+      }
     }
 
     /**
-     * Checks that the Iterable contains the expected elements, in the
-     * specified order.
+     * Applies a {@link SerializableFunction} to check the elements of the {@code Iterable}.
      *
-     * <p> Returns this IterableAssert.
+     * <p> Returns this {@code IterableAssert}.
      */
-    public IterableAssert<T> containsInOrder(T... expectedElements) {
-      return this.satisfies(new AssertContainsInOrder<T>(expectedElements));
+    public IterableAssert<T> satisfies(SerializableFunction<Iterable<T>, Void> checkerFn) {
+      new OneSideInputAssert<Iterable<T>>(actualView).satisfies(checkerFn);
+      return this;
     }
 
     /**
-     * Checks that the Iterable contains the expected elements, in the
-     * specified order.
+     * Applies a {@link SerializableFunction} to check the elements of the {@code Iterable}.
      *
-     * <p> Returns this IterableAssert.
+     * <p> Returns this {@code IterableAssert}.
      */
-    public IterableAssert<T> containsInOrder(Collection<T> expectedElements) {
-      return this.satisfies(new AssertContainsInOrder<T>(expectedElements));
+    public IterableAssert<T> satisfies(
+        AssertRelation<Iterable<T>, Iterable<T>> relation,
+        Iterable<T> expectedElements) {
+      new TwoSideInputAssert<Iterable<T>, Iterable<T>>(actualView,
+          actualView.getPipeline()
+              .apply(Create.of(expectedElements))
+              .setOrdered(true)
+              .setCoder(getCoder())
+              .apply(View.<T>asIterable()))
+          .satisfies(relation);
+      return this;
     }
 
     /**
-     * SerializableFunction that performs an {@code Assert.assertThat()}
-     * operation using a {@code Matcher} operation that takes an array
-     * of elements.
+     * Checks that the {@code Iterable} contains the expected elements, in any
+     * order.
+     *
+     * <p> Returns this {@code IterableAssert}.
      */
-    @SuppressWarnings("serial")
-    static class AssertThatIterable<T> extends AssertThat<Iterable<T>, T[]> {
-      AssertThatIterable(T[] expected,
-                         String matcherClassName,
-                         String matcherFactoryMethodName) {
-        super(expected, Object[].class,
-              matcherClassName, matcherFactoryMethodName);
-      }
+    public IterableAssert<T> containsInAnyOrder(Iterable<T> expectedElements) {
+      return satisfies(new AssertContainsInAnyOrderRelation<T>(), expectedElements);
     }
 
     /**
-     * SerializableFunction that verifies that an Iterable contains
-     * expected items in any order.
+     * Checks that the {@code Iterable} contains the expected elements, in any
+     * order.
+     *
+     * <p> Returns this {@code IterableAssert}.
      */
-    @SuppressWarnings("serial")
-    static class AssertContainsInAnyOrder<T> extends AssertThatIterable<T> {
-      AssertContainsInAnyOrder(T... expected) {
-        super(expected,
-              "org.hamcrest.collection.IsIterableContainingInAnyOrder",
-              "containsInAnyOrder");
-      }
-      @SuppressWarnings("unchecked")
-      AssertContainsInAnyOrder(Collection<T> expected) {
-        this((T[]) expected.toArray());
-      }
-    }
+    public IterableAssert<T> containsInAnyOrder(T... expectedElements) {
+      return satisfies(
+        new AssertContainsInAnyOrderRelation<T>(),
+        Arrays.asList(expectedElements));
+    };
+
 
     /**
-     * SerializableFunction that verifies that an Iterable contains
-     * expected items in the provided order.
+     * Checks that the {@code Iterable} contains the expected elements, in the
+     * specified order.
+     *
+     * <p> Returns this {@code IterableAssert}.
      */
-    @SuppressWarnings("serial")
-    static class AssertContainsInOrder<T> extends AssertThatIterable<T> {
-      AssertContainsInOrder(T... expected) {
-        super(expected,
-              "org.hamcrest.collection.IsIterableContainingInOrder",
-              "contains");
-      }
-      @SuppressWarnings("unchecked")
-      AssertContainsInOrder(Collection<T> expected) {
-        this((T[]) expected.toArray());
-      }
+    public IterableAssert<T> containsInOrder(T... expectedElements) {
+      return this.satisfies(
+          new AssertContainsInOrderRelation<T>(),
+          Arrays.asList(expectedElements));
     }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
 
-  /**
-   * Constructs a SingletonAssert for the value of the provided
-   * {@code PCollection<T>}, which must be a singleton.
-   */
-  public static <T> SingletonAssert<T> thatSingleton(PCollection<T> futureResult) {
-    return new SingletonAssert<>(futureResult.apply(View.<T>asSingleton()));
+    /**
+     * Checks that the {@code Iterable} contains the expected elements, in the
+     * specified order.
+     *
+     * <p> Returns this {@code IterableAssert}.
+     */
+    public IterableAssert<T> containsInOrder(Iterable<T> expectedElements) {
+      return this.satisfies(new AssertContainsInOrderRelation<T>(), expectedElements);
+    }
   }
 
   /**
-   * An assertion about a single value.
+   * An assertion about the single value of type {@code T}
+   * associated with a {@link PCollectionView PCollectionView<T, ?>}.
    */
   @SuppressWarnings("serial")
   public static class SingletonAssert<T> implements Serializable {
-    private final PCollectionView<T, ?> actualResult;
 
-    private SingletonAssert(PCollectionView<T, ?> futureResult) {
-      actualResult = futureResult;
+    private final PCollectionView<T, ?> actualView;
+    private Optional<Coder<T>> coder;
+
+    protected SingletonAssert(PCollectionView<T, ?> actualView) {
+      this.actualView = actualView;
+      coder = Optional.absent();
     }
 
     /**
-     * Applies a SerializableFunction to check the value of this
-     * SingletonAssert's view.
-     *
-     * <p> Returns this SingletonAssert.
+     * Sets the coder to use for elements of type {@code T}, as needed
+     * for internal purposes.
      */
-    public SingletonAssert<T> satisfies(final SerializableFunction<T, Void> checkerFn) {
-      actualResult.getPipeline()
-          .apply(Create.<Void>of((Void) null))
-          .setCoder(VoidCoder.of())
-          .apply(ParDo
-            .withSideInputs(actualResult)
-            .of(new DoFn<Void, Void>() {
-              @Override
-              public void processElement(ProcessContext c) {
-                T actualContents = c.sideInput(actualResult);
-                checkerFn.apply(actualContents);
-              }
-            }));
-
+    public SingletonAssert<T> setCoder(Coder<T> coder) {
+      this.coder = Optional.of(coder);
       return this;
     }
 
     /**
-     * Checks that the value of this SingletonAssert's view is equal
-     * to the expected value.
+     * Sets the coder to use for elements of type {@code T}, as needed
+     * for internal purposes.
      *
-     * <p> Returns this SingletonAssert.
+     * <p> Returns this {@code SingletonAssert}.
      */
-    public SingletonAssert<T> is(T expectedValue) {
-      return this.satisfies(new AssertIs<T>(expectedValue));
+    public SingletonAssert<T> setCoder(Optional<Coder<T>> coder) {
+      this.coder = coder;
+      return this;
     }
 
     /**
-     * SerializableFunction that performs an {@code Assert.assertThat()}
-     * operation using a {@code Matcher} operation that takes a single element.
+     * Gets the coder, which may yet be absent.
      */
-    @SuppressWarnings("serial")
-    static class AssertThatValue<T> extends AssertThat<T, T> {
-      AssertThatValue(T expected,
-                      String matcherClassName,
-                      String matcherFactoryMethodName) {
-        super(expected, Object.class,
-              matcherClassName, matcherFactoryMethodName);
+    public Coder<T> getCoder() {
+      if (coder.isPresent()) {
+        return coder.get();
+      } else {
+        throw new IllegalStateException(
+            "Attempting to access the coder of a SingletonAssert"
+            + " which has not been set yet.");
       }
     }
 
     /**
-     * SerializableFunction that verifies that a value is equal to an
-     * expected value.
+     * Applies a {@link SerializableFunction} to check the value of this
+     * {@code SingletonAssert}'s view.
+     *
+     * <p> Returns this {@code SingletonAssert}.
+     */
+    public SingletonAssert<T> satisfies(final SerializableFunction<T, Void> checkerFn) {
+      new OneSideInputAssert<T>(actualView).satisfies(checkerFn);
+      return this;
+    }
+
+    /**
+     * Applies an {@link AssertRelation} to check the provided relation against the
+     * value of this assert and the provided expected value.
+     *
+     * <p> Returns this {@code SingletonAssert}.
      */
-    @SuppressWarnings("serial")
-    public static class AssertIs<T> extends AssertThatValue<T> {
-      AssertIs(T expected) {
-        super(expected, "org.hamcrest.core.IsEqual", "equalTo");
-      }
+    public SingletonAssert<T> satisfies(
+        AssertRelation<T, T> relation,
+        T expectedValue) {
+      new TwoSideInputAssert<T, T>(actualView,
+          actualView.getPipeline()
+              .apply(Create.of(expectedValue))
+              .setOrdered(true)
+              .setCoder(getCoder())
+              .apply(View.<T>asSingleton()))
+          .satisfies(relation);
+      return this;
+    }
+
+
+    /**
+     * Checks that the value of this {@code SingletonAssert}'s view is equal
+     * to the expected value.
+     *
+     * <p> Returns this {@code SingletonAssert}.
+     */
+    public SingletonAssert<T> isEqualTo(T expectedValue) {
+      return satisfies(new AssertIsEqualToRelation<T>(), expectedValue);
+    }
+
+    @Deprecated
+    public SingletonAssert<T> is(T expectedValue) {
+      return isEqualTo(expectedValue);
+    }
+
+  }
+
+  ////////////////////////////////////////////////////////////////////////
+
+  /**
+   * An assertion checker that takes a single {@link PCollectionView PCollectionView<A, ?>}
+   * and an assertion over {@code A}, and checks it within a dataflow pipeline.
+   *
+   * <p> Note that the entire assertion must be serializable. If
+   * you need to make assertions involving multiple inputs
+   * that are each not serializable, use TwoSideInputAssert.
+   *
+   * <p> This is generally useful for assertion functions that
+   * are serializable but whose underlying data may not have a coder.
+   */
+  @SuppressWarnings("serial")
+  private static class OneSideInputAssert<Actual> implements Serializable {
+
+    private final PCollectionView<Actual, ?> actualView;
+
+    public OneSideInputAssert(PCollectionView<Actual, ?> actualView) {
+      this.actualView = actualView;
+    }
+
+    public OneSideInputAssert<Actual> satisfies(
+        final SerializableFunction<Actual, Void> checkerFn) {
+      actualView.getPipeline()
+        .apply(Create.<Void>of((Void) null))
+        .setCoder(VoidCoder.of())
+        .apply(ParDo
+          .withSideInputs(actualView)
+          .of(new DoFn<Void, Void>() {
+            @Override
+            public void processElement(ProcessContext c) {
+              Actual actualContents = c.sideInput(actualView);
+              checkerFn.apply(actualContents);
+            }
+          }));
+      return this;
     }
   }
 
-  /////////////////////////////////////////////////////////////////////////////
+  /**
+   * An assertion checker that takes a {@link PCollectionView PCollectionView<A, ?>},
+   * a {@link PCollectionView PCollectionView<B, ?>}, a relation
+   * over {@code A} and {@code B}, and checks that the relation holds
+   * within a dataflow pipeline.
+   *
+   * <p> This is useful when either/both of {@code A} and {@code B}
+   * are not serializable, but have coders (provided
+   * by the underlying {@link PCollection}s).
+   */
+  @SuppressWarnings("serial")
+  private static class TwoSideInputAssert<Actual, Expected> implements Serializable {
 
+    private final PCollectionView<Actual, ?> actualView;
+    private final PCollectionView<Expected, ?> expectedView;
 
-  // Do not instantiate.
-  private DataflowAssert() {}
+    protected TwoSideInputAssert(
+        PCollectionView<Actual, ?> actualView,
+        PCollectionView<Expected, ?> expectedView) {
+      this.actualView = actualView;
+      this.expectedView = expectedView;
+    }
+
+    public TwoSideInputAssert<Actual, Expected> satisfies(
+        final AssertRelation<Actual, Expected> relation) {
+      actualView.getPipeline()
+        .apply(Create.<Void>of((Void) null))
+        .setCoder(VoidCoder.of())
+        .apply(ParDo
+          .withSideInputs(actualView, expectedView)
+          .of(new DoFn<Void, Void>() {
+            @Override
+            public void processElement(ProcessContext c) {
+              Actual actualContents = c.sideInput(actualView);
+              Expected expectedContents = c.sideInput(expectedView);
+              relation.assertFor(expectedContents).apply(actualContents);
+            }
+          }));
+      return this;
+    }
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * SerializableFunction that performs an {@code Assert.assertThat()}
-   * operation using a {@code Matcher} operation.
+   * A {@link SerializableFunction} that performs an
+   * {@code Assert.assertThat()} operation using a
+   * {@code Matcher} operation.
    *
-   * <P> The MatcherFactory should take an {@code Expected} and
+   * <P> The {@code MatcherFactory} should take an {@code Expected} and
    * produce a Matcher to be used to check an {@code Actual} value
    * against.
    */
@@ -319,12 +438,12 @@ private DataflowAssert() {}
   public static class AssertThat<Actual, Expected>
       implements SerializableFunction<Actual, Void> {
     final Expected expected;
-    final Class expectedClass;
+    final Class<?> expectedClass;
     final String matcherClassName;
     final String matcherFactoryMethodName;
 
     AssertThat(Expected expected,
-               Class expectedClass,
+               Class<?> expectedClass,
                String matcherClassName,
                String matcherFactoryMethodName) {
       this.expected = expected;
@@ -358,11 +477,10 @@ public Void apply(Actual in) {
   }
 
   /**
-   * SerializableFunction that performs an {@code Assert.assertThat()}
-   * operation using a {@code Matcher} operation that takes a single element.
+   * An {@link AssertThat} taking a single element.
    */
   @SuppressWarnings("serial")
-  static class AssertThatValue<T> extends AssertThat<T, T> {
+  private static class AssertThatValue<T> extends AssertThat<T, T> {
     AssertThatValue(T expected,
                     String matcherClassName,
                     String matcherFactoryMethodName) {
@@ -372,13 +490,123 @@ static class AssertThatValue<T> extends AssertThat<T, T> {
   }
 
   /**
-   * SerializableFunction that verifies that a value is equal to an
+   * An {@link AssertThatValue} that verifies that an actual value is equal to an
    * expected value.
    */
   @SuppressWarnings("serial")
-  public static class AssertIs<T> extends AssertThatValue<T> {
-    public AssertIs(T expected) {
+  private static class AssertIsEqualTo<T> extends AssertThatValue<T> {
+    public AssertIsEqualTo(T expected) {
       super(expected, "org.hamcrest.core.IsEqual", "equalTo");
     }
   }
+
+  /**
+   * An {@link AssertThat} that operates on an {@code Iterable}. The
+   * underlying matcher takes a {@code T[]} of expected values, for
+   * compatibility with the corresponding Hamcrest {@code Matcher}s.
+   */
+  @SuppressWarnings("serial")
+  private static class AssertThatIterable<T> extends AssertThat<Iterable<T>, T[]> {
+    AssertThatIterable(T[] expected,
+                       String matcherClassName,
+                       String matcherFactoryMethodName) {
+      super(expected, Object[].class,
+            matcherClassName, matcherFactoryMethodName);
+    }
+  }
+
+  /**
+   * An {@link AssertThatIterable} that verifies that an {@code Iterable} contains
+   * expected items in any order.
+   */
+  @SuppressWarnings("serial")
+  private static class AssertContainsInAnyOrder<T> extends AssertThatIterable<T> {
+    public AssertContainsInAnyOrder(T... expected) {
+      super(expected,
+            "org.hamcrest.collection.IsIterableContainingInAnyOrder",
+            "containsInAnyOrder");
+    }
+
+    @SuppressWarnings("unchecked")
+    public AssertContainsInAnyOrder(Collection<T> expected) {
+      this((T[]) expected.toArray());
+    }
+
+    @SuppressWarnings("unchecked")
+    public AssertContainsInAnyOrder(Iterable<T> expected) {
+      this(Lists.newArrayList(expected));
+    }
+  }
+
+  /**
+   * An {@link AssertThatIterable} that verifies that an {@code Iterable} contains
+   * the expected items in the provided order.
+   */
+  @SuppressWarnings("serial")
+  private static class AssertContainsInOrder<T> extends AssertThatIterable<T> {
+    public AssertContainsInOrder(T... expected) {
+      super(expected,
+            "org.hamcrest.collection.IsIterableContainingInOrder",
+            "contains");
+    }
+
+    @SuppressWarnings("unchecked")
+    public AssertContainsInOrder(Collection<T> expected) {
+      this((T[]) expected.toArray());
+    }
+
+    @SuppressWarnings("unchecked")
+    public AssertContainsInOrder(Iterable<T> expected) {
+      this(Lists.newArrayList(expected));
+    }
+  }
+
+  ////////////////////////////////////////////////////////////
+
+  /**
+   * A serializable function implementing a binary predicate
+   * between types {@code Actual} and {@code Expected}.
+   */
+  public static interface AssertRelation<Actual, Expected> extends Serializable {
+    public SerializableFunction<Actual, Void> assertFor(Expected input);
+  }
+
+  /**
+   * An {@link AssertRelation} implementing the binary predicate
+   * that two objects are equal.
+   */
+  private static class AssertIsEqualToRelation<T>
+      implements AssertRelation<T, T> {
+
+    @Override
+    public AssertThat<T, T> assertFor(T expected) {
+      return new AssertIsEqualTo<T>(expected);
+    }
+  }
+
+  /**
+   * An {@code AssertRelation} implementing the binary predicate
+   * that two collections are equal modulo reordering.
+   */
+  private static class AssertContainsInAnyOrderRelation<T>
+      implements AssertRelation<Iterable<T>, Iterable<T>> {
+
+    @Override
+    public SerializableFunction<Iterable<T>, Void> assertFor(Iterable<T> expectedElements) {
+      return new AssertContainsInAnyOrder<T>(expectedElements);
+    }
+  }
+
+  /**
+   * A {@code AssertRelation} implementating the binary function
+   * that two iterables have equal contents, in the same order.
+   */
+  private static class AssertContainsInOrderRelation<T>
+      implements AssertRelation<Iterable<T>, Iterable<T>> {
+
+    @Override
+    public SerializableFunction<Iterable<T>, Void> assertFor(Iterable<T> expectedElements) {
+      return new AssertContainsInOrder<T>(expectedElements);
+    }
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
new file mode 100644
index 0000000000000..11a6384245591
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.Serializable;
+
+/**
+ * Test case for {@link DataflowAssert}.
+ */
+@RunWith(JUnit4.class)
+public class DataflowAssertTest implements Serializable {
+
+  @Rule
+  public transient ExpectedException thrown = ExpectedException.none();
+
+  private static class NotSerializableObject {
+
+    @Override
+    public boolean equals(Object other) {
+      return (other instanceof NotSerializableObject);
+    }
+
+    @Override
+    public int hashCode() {
+      return 73;
+    }
+  }
+
+  private static class NotSerializableObjectCoder extends AtomicCoder<NotSerializableObject> {
+    private NotSerializableObjectCoder() { }
+    private static final NotSerializableObjectCoder INSTANCE = new NotSerializableObjectCoder();
+
+    @JsonCreator
+    public static NotSerializableObjectCoder of() {
+      return INSTANCE;
+    }
+
+    @Override
+    public void encode(NotSerializableObject value, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+    }
+
+    @Override
+    public NotSerializableObject decode(InputStream inStream, Context context)
+        throws CoderException, IOException {
+      return new NotSerializableObject();
+    }
+
+    @Override
+    public boolean isDeterministic() {
+      return true;
+    }
+
+    @Override
+    public boolean isRegisterByteSizeObserverCheap(NotSerializableObject value, Context context) {
+      return true;
+    }
+
+    @Override
+    public void registerByteSizeObserver(
+        NotSerializableObject value, ElementByteSizeObserver observer, Context context)
+        throws Exception {
+      observer.update(0L);
+    }
+  }
+
+  /**
+   * A {@link DataflowAssert} about the contents of a {@link PCollection}
+   * must not require the contents of the {@link PCollection} to be
+   * serializable.
+   */
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testContainsInAnyOrderNotSerializable() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<NotSerializableObject> pcollection = pipeline
+        .apply(Create.of(
+          new NotSerializableObject(),
+          new NotSerializableObject()))
+        .setCoder(NotSerializableObjectCoder.of());
+
+    DataflowAssert.that(pcollection).containsInAnyOrder(
+      new NotSerializableObject(),
+      new NotSerializableObject());
+
+    pipeline.run();
+  }
+
+  /**
+   * A {@link DataflowAssert} about the contents of a {@link PCollection}
+   * is allows to be verified by an arbitrary {@link SerializableFunction},
+   * though.
+   */
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testSerializablePredicate() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<NotSerializableObject> pcollection = pipeline
+        .apply(Create.of(
+          new NotSerializableObject(),
+          new NotSerializableObject()))
+        .setCoder(NotSerializableObjectCoder.of());
+
+    DataflowAssert.that(pcollection).satisfies(
+        new SerializableFunction<Iterable<NotSerializableObject>, Void>() {
+          @Override
+          public Void apply(Iterable<NotSerializableObject> contents) {
+            return (Void) null; // no problem!
+          }
+        });
+
+    pipeline.run();
+  }
+
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testIsEqualTo() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<Integer> pcollection = pipeline
+        .apply(Create.of(43));
+
+    DataflowAssert.thatSingleton(pcollection).isEqualTo(43);
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testContainsInAnyOrder() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<Integer> pcollection = pipeline
+        .apply(Create.of(1, 2, 3, 4));
+
+    DataflowAssert.that(pcollection).containsInAnyOrder(2, 1, 4, 3);
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testContainsInAnyOrderFalse() throws Exception {
+    // The actual AssertionError is deep in the stack
+    // TODO: dig it out
+    thrown.expect(RuntimeException.class);
+
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<Integer> pcollection = pipeline
+        .apply(Create.of(1, 2, 3, 4));
+
+    DataflowAssert.that(pcollection).containsInAnyOrder(2, 1, 4, 3, 7);
+
+    pipeline.run();
+  }
+
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testContainsInOrder() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<Integer> pcollection = pipeline
+        .apply(Create.of(1, 2, 3, 4))
+        .setOrdered(true);
+
+    DataflowAssert.that(pcollection).containsInOrder(1, 2, 3, 4);
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testContainsInOrderFalse() throws Exception {
+    // The actual AssertionError is deep in the stack
+    // TODO: dig it out
+    thrown.expect(RuntimeException.class);
+
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<Integer> pcollection = pipeline
+        .apply(Create.of(1, 2, 3, 4))
+        .setOrdered(true);
+
+    DataflowAssert.that(pcollection).containsInOrder(1, 2, 4, 3);
+
+    pipeline.run();
+  }
+}

From 02cc89f5a2fe65380e7bdfbc22f42f29743e8f33 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 29 Dec 2014 11:46:32 -0800
Subject: [PATCH 0057/1541] Add DelegateCoder and StringDelegateCoder. Remove
 URICoder (now trivial). [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=82962937

---
 .../google/cloud/dataflow/examples/TfIdf.java |   5 +-
 .../cloud/dataflow/examples/TfIdfTest.java    |   3 +
 .../dataflow/sdk/coders/CoderRegistry.java    |   2 -
 .../dataflow/sdk/coders/DelegateCoder.java    | 103 ++++++++++++++++++
 .../sdk/coders/StringDelegateCoder.java       |  72 ++++++++++++
 .../cloud/dataflow/sdk/coders/URICoder.java   |  78 -------------
 .../dataflow/sdk/coders/CoderProperties.java  |   5 +
 .../sdk/coders/DelegateCoderTest.java         |  73 +++++++++++++
 ...Test.java => StringDelegateCoderTest.java} |  33 ++++--
 9 files changed, 284 insertions(+), 90 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/URICoder.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/{URICoderTest.java => StringDelegateCoderTest.java} (60%)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
index 3f22e27eab264..0349f3614f3be 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
@@ -19,8 +19,8 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringDelegateCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.URICoder;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.Default;
 import com.google.cloud.dataflow.sdk.options.Description;
@@ -153,7 +153,7 @@ public ReadDocuments(Iterable<URI> uris) {
 
     @Override
     public Coder<?> getDefaultOutputCoder() {
-      return KvCoder.of(URICoder.of(), StringUtf8Coder.of());
+      return KvCoder.of(StringDelegateCoder.of(URI.class), StringUtf8Coder.of());
     }
 
     @Override
@@ -397,6 +397,7 @@ public void processElement(ProcessContext c) {
   public static void main(String[] args) throws Exception {
     Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
     Pipeline pipeline = Pipeline.create(options);
+    pipeline.getCoderRegistry().registerCoder(URI.class, StringDelegateCoder.of(URI.class));
 
     pipeline
         .apply(new ReadDocuments(listInputDocuments(options)))
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java
index 990458f5e059c..b3a115b522362 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.examples;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.StringDelegateCoder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
@@ -45,6 +46,8 @@ public class TfIdfTest {
   public void testTfIdf() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
+    pipeline.getCoderRegistry().registerCoder(URI.class, StringDelegateCoder.of(URI.class));
+
     PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf = pipeline
         .apply(Create.of(
             KV.of(new URI("x"), "a b c d"),
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 670b4e3e320af..d2b61293c1dc6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -35,7 +35,6 @@
 import java.lang.reflect.Type;
 import java.lang.reflect.TypeVariable;
 import java.lang.reflect.WildcardType;
-import java.net.URI;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -116,7 +115,6 @@ public void registerStandardCoders() {
     registerCoder(TableRow.class, TableRowJsonCoder.class);
     registerCoder(Void.class, VoidCoder.class);
     registerCoder(byte[].class, ByteArrayCoder.class);
-    registerCoder(URI.class, URICoder.class);
     registerCoder(TimestampedValue.class, TimestampedValue.TimestampedValueCoder.class);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
new file mode 100644
index 0000000000000..9cc75167872fd
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.Serializable;
+
+/**
+ * A {@code DelegateCoder<T, DT>} wraps a {@link Coder Coder<DT>} and
+ * encodes/decodes values of type {@code T}s by converting
+ * to/from {@code DT} and then encoding/decoding using the underlying
+ * {@link Coder Coder<DT>}.
+ *
+ * <p> The conversions from {@code T} to {@code DT} and vice versa
+ * must be supplied as {@link CodingFunction}, a serializable
+ * function which may throw any {@code Exception}. If a thrown
+ * exception is an instance of {@link CoderException} or
+ * {@link IOException}, it will be re-thrown, otherwise it will be wrapped as
+ * a {@link CoderException}.
+ *
+ * @param <T> The type of objects coded by this Coder.
+ * @param <DT> The type of objects a {@code T} will be converted to for coding.
+ */
+public class DelegateCoder<T, DT> extends CustomCoder<T> {
+
+  /**
+   * A {@code CodingFunction<Input, Output>} is a serializable function
+   * from {@code Input} to {@code Output} that
+   * may throw any {@code Exception}.
+   */
+  public static interface CodingFunction<Input, Output> extends Serializable {
+     public abstract Output apply(Input input) throws Exception;
+  }
+
+  public static <T, DT> DelegateCoder<T, DT> of(Coder<DT> coder,
+      CodingFunction<T, DT> toFn,
+      CodingFunction<DT, T> fromFn) {
+    return new DelegateCoder<T, DT>(coder, toFn, fromFn);
+  }
+
+  @Override
+  public void encode(T value, OutputStream outStream, Context context)
+      throws CoderException, IOException {
+    coder.encode(applyAndWrapExceptions(toFn, value), outStream, context);
+  }
+
+  @Override
+  public T decode(InputStream inStream, Context context) throws CoderException, IOException {
+    return applyAndWrapExceptions(fromFn, coder.decode(inStream, context));
+  }
+
+  @Override
+  public boolean isDeterministic() {
+    return coder.isDeterministic();
+  }
+
+  @Override
+  public String toString() {
+    return "DelegateCoder(" + coder + ")";
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private <Input, Output> Output applyAndWrapExceptions(
+      CodingFunction<Input, Output> fn,
+      Input input) throws CoderException, IOException {
+    try {
+      return fn.apply(input);
+    } catch (IOException exc) {
+      throw exc;
+    } catch (Exception exc) {
+      throw new CoderException(exc);
+    }
+  }
+
+  private final Coder<DT> coder;
+  private final CodingFunction<T, DT> toFn;
+  private final CodingFunction<DT, T> fromFn;
+
+  protected DelegateCoder(Coder<DT> coder,
+      CodingFunction<T, DT> toFn,
+      CodingFunction<DT, T> fromFn) {
+    this.coder = coder;
+    this.fromFn = fromFn;
+    this.toFn = toFn;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
new file mode 100644
index 0000000000000..82fe806e43205
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import java.lang.reflect.InvocationTargetException;
+
+/**
+ * A {@code StringDelegateCoder<T>} wraps a {@link Coder<String>}
+ * and encodes/decodes values of type {@code T} via string representations.
+ *
+ * <p> To decode, the input byte stream is decoded to
+ * a {@code String}, and this is passed to the single-arg
+ * constructor for {@code T}.
+ *
+ * <p> To encode, the input value is converted via {@code toString()},
+ * and this string is encoded.
+ *
+ * <p> In order for this to operate correctly for a class {@code Clazz},
+ * it must be the case for any instance {@code x} that
+ * {@code x.equals(new Clazz(x.toString()))}.
+ *
+ * @param <T> The type of objects coded.
+ */
+public class StringDelegateCoder<T> extends DelegateCoder<T, String> {
+
+  public static <T> StringDelegateCoder<T> of(Class<T> clazz) {
+    return new StringDelegateCoder<T>(clazz);
+  }
+
+  @Override
+  public String toString() {
+    return "StringDelegateCoder(" + clazz + ")";
+  }
+
+  private final Class<T> clazz;
+
+  protected StringDelegateCoder(final Class<T> clazz) {
+    super(StringUtf8Coder.of(),
+      new CodingFunction<T, String>() {
+        @Override
+        public String apply(T input) {
+          return input.toString();
+        }
+      },
+      new CodingFunction<String, T>() {
+        @Override
+        public T apply(String input) throws
+            NoSuchMethodException,
+            InstantiationException,
+            IllegalAccessException,
+            InvocationTargetException {
+          return clazz.getConstructor(String.class).newInstance(input);
+        }
+      });
+
+    this.clazz = clazz;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/URICoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/URICoder.java
deleted file mode 100644
index eedcddf787e10..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/URICoder.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (C) 2014 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.net.URI;
-import java.net.URISyntaxException;
-
-/**
- * A {@code URICoder} encodes/decodes {@link URI}s by conversion to/from {@link String}, delegating
- * encoding/decoding of the string to {@link StringUtf8Coder}.
- */
-@SuppressWarnings("serial")
-public class URICoder extends AtomicCoder<URI> {
-
-  @JsonCreator
-  public static URICoder of() {
-    return INSTANCE;
-  }
-
-  private static final URICoder INSTANCE = new URICoder();
-  private static final StringUtf8Coder STRING_CODER = StringUtf8Coder.of();
-
-  private URICoder() {}
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  @Override
-  public void encode(URI value, OutputStream outStream, Context context)
-      throws IOException {
-    if (value == null) {
-      throw new CoderException("cannot encode a null URI");
-    }
-    STRING_CODER.encode(value.toString(), outStream, context);
-  }
-
-  @Override
-  public URI decode(InputStream inStream, Context context)
-      throws IOException {
-    try {
-      return new URI(STRING_CODER.decode(inStream, context));
-    } catch (URISyntaxException exn) {
-      throw new CoderException(exn);
-    }
-  }
-
-  @Override
-  public boolean isDeterministic() {
-    return STRING_CODER.isDeterministic();
-  }
-
-  @Override
-  protected long getEncodedElementByteSize(URI value, Context context)
-      throws Exception {
-    if (value == null) {
-      throw new CoderException("cannot encode a null URI");
-    }
-    return STRING_CODER.getEncodedElementByteSize(value.toString(), context);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
index 89754789cc17f..5fbaf1f5a59e6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
@@ -23,6 +23,7 @@
 import static org.junit.Assert.assertThat;
 import static org.junit.Assume.assumeThat;
 
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.common.collect.Iterables;
 
 import java.io.ByteArrayInputStream;
@@ -159,6 +160,10 @@ public static <T, IT extends Iterable<T>> void coderDecodeEncodeContentsInSameOr
     }
   }
 
+  public static <T> void coderSerializable(Coder<T> coder) {
+    SerializableUtils.ensureSerializable(coder);
+  }
+
   //////////////////////////////////////////////////////////////////////////
 
   private static <T> byte[] encode(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java
new file mode 100644
index 0000000000000..e6f5cb83b5016
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/** Unit tests for {@link DelegateCoder}. */
+@RunWith(JUnit4.class)
+public class DelegateCoderTest {
+
+  private static final List<Set<Integer>> TEST_VALUES = Arrays.<Set<Integer>>asList(
+      Collections.<Integer>emptySet(),
+      Collections.singleton(13),
+      new HashSet<>(Arrays.asList(31, -5, 83)));
+
+  private static final Coder<Set<Integer>> coder = DelegateCoder.of(
+      ListCoder.of(VarIntCoder.of()),
+      new DelegateCoder.CodingFunction<Set<Integer>, List<Integer>>() {
+        public List<Integer> apply(Set<Integer> input) {
+          return Lists.newArrayList(input);
+        }
+      },
+      new DelegateCoder.CodingFunction<List<Integer>, Set<Integer>>() {
+        public Set<Integer> apply(List<Integer> input) {
+          return Sets.newHashSet(input);
+        }
+      });
+
+  @Test
+  public void testDeterministic() throws Exception {
+    for (Set<Integer> value : TEST_VALUES) {
+      CoderProperties.coderDeterministic(
+          coder, value, Sets.newHashSet(value));
+    }
+  }
+
+  @Test
+  public void testDecodeEncodeEqual() throws Exception {
+    for (Set<Integer> value : TEST_VALUES) {
+      CoderProperties.coderDecodeEncodeEqual(coder, value);
+    }
+  }
+
+  @Test
+  public void testSerializable() throws Exception {
+    CoderProperties.coderSerializable(coder);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/URICoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java
similarity index 60%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/URICoderTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java
index dd0d32ec419b9..f930dfafbda68 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/URICoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java
@@ -16,6 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertThat;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -24,30 +27,44 @@
 import java.util.Arrays;
 import java.util.List;
 
-/** Unit tests for {@link URICoder}. */
+/** Unit tests for {@link StringDelegateCoder}. */
 @RunWith(JUnit4.class)
-public class URICoderTest {
+public class StringDelegateCoderTest {
+
+  // Test data
+
+  private static final Coder<URI> uriCoder = StringDelegateCoder.of(URI.class);
 
   private static final List<String> TEST_URI_STRINGS = Arrays.asList(
       "http://www.example.com",
-      "gs://myproject/mybucket/a/gcs/path",
-      "/just/a/path",
+      "gs://myproject/mybucket/some/gcs/path",
+      "/just/some/path",
       "file:/path/with/no/authority",
       "file:///path/with/empty/authority");
 
+  // Tests
+
+  private static final List<Coder.Context> TEST_CONTEXTS = Arrays.asList(
+      Coder.Context.NESTED,
+      Coder.Context.OUTER);
+
   @Test
   public void testDeterministic() throws Exception {
-    Coder<URI> coder = URICoder.of();
+    assertThat(uriCoder.isDeterministic(), equalTo(true));
     for (String uriString : TEST_URI_STRINGS) {
-      CoderProperties.coderDeterministic(coder, new URI(uriString), new URI(uriString));
+      CoderProperties.coderDeterministic(uriCoder, new URI(uriString), new URI(uriString));
     }
   }
 
   @Test
   public void testDecodeEncodeEqual() throws Exception {
-    Coder<URI> coder = URICoder.of();
     for (String uriString : TEST_URI_STRINGS) {
-      CoderProperties.coderDecodeEncodeEqual(coder, new URI(uriString));
+      CoderProperties.coderDecodeEncodeEqual(uriCoder, new URI(uriString));
     }
   }
+
+  @Test
+  public void testSerializable() throws Exception {
+    CoderProperties.coderSerializable(uriCoder);
+  }
 }

From c2a39f9d6c9342cae89b377398ee102b996e035f Mon Sep 17 00:00:00 2001
From: vanya <vanya@google.com>
Date: Mon, 29 Dec 2014 13:10:33 -0800
Subject: [PATCH 0058/1541] Clean up unused variables in TextIOTest.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=82967222
---
 .../google/cloud/dataflow/sdk/io/TextIOTest.java    | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
index 16748cf566d17..28270949c328a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -228,9 +228,9 @@ <T> void runTestWrite(T[] elems, Coder<T> coder) throws Exception {
       write = TextIO.Write.to(filename).withCoder(coder).withoutSharding();
     }
 
-    PDone output = input.apply(write);
+    input.apply(write);
 
-    EvaluationResults results = p.run();
+    p.run();
 
     BufferedReader reader = new BufferedReader(new FileReader(tmpFile));
     List<String> actual = new ArrayList<>();
@@ -285,10 +285,9 @@ public void testWriteSharded() throws IOException {
         p.apply(Create.of(Arrays.asList(LINES_ARRAY)))
             .setCoder(StringUtf8Coder.of());
 
-    PDone done = input.apply(
-        TextIO.Write.to(filename).withNumShards(2).withSuffix(".txt"));
+    input.apply(TextIO.Write.to(filename).withNumShards(2).withSuffix(".txt"));
 
-    EvaluationResults results = p.run();
+    p.run();
 
     String[] files = outFolder.list();
 
@@ -334,9 +333,9 @@ public void testUnsupportedFilePattern() throws IOException {
         p.apply(Create.of(Arrays.asList(LINES_ARRAY)))
             .setCoder(StringUtf8Coder.of());
 
-    PDone done = input.apply(TextIO.Write.to(filename));
+    input.apply(TextIO.Write.to(filename));
 
-    EvaluationResults results = p.run();
+    p.run();
     Assert.fail("Expected failure due to unsupported output pattern");
   }
 

From 0cbe51924de7243de0418a20ce3ecd5e85da2369 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Mon, 5 Jan 2015 17:55:00 -0800
Subject: [PATCH 0059/1541] Take bucket into account when running the
 PGBK(-and-bucket) operation. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=83305116

---
 .../worker/MapTaskExecutorFactory.java        | 59 +++++++++++-----
 .../PartialGroupByKeyOperationTest.java       | 69 ++++++++++---------
 2 files changed, 77 insertions(+), 51 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index dd0133e7b3763..b1c4f6a63a8d5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -44,6 +44,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
 import com.google.cloud.dataflow.sdk.util.common.worker.ParDoOperation;
 import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation;
+import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.GroupingKeyCreator;
 import com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ReceivingOperation;
@@ -52,6 +53,8 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.WriteOperation;
 import com.google.cloud.dataflow.sdk.values.KV;
 
+import org.joda.time.Instant;
+
 import java.util.ArrayList;
 import java.util.List;
 
@@ -168,26 +171,27 @@ static PartialGroupByKeyOperation createPartialGroupByKeyOperation(PipelineOptio
       CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) throws Exception {
     PartialGroupByKeyInstruction pgbk = instruction.getPartialGroupByKey();
 
-    Coder<?> coder = Serializer.deserialize(pgbk.getInputElementCodec(), Coder.class);
-    if (!(coder instanceof WindowedValueCoder)) {
+    Coder<?> windowedCoder = Serializer.deserialize(pgbk.getInputElementCodec(), Coder.class);
+    if (!(windowedCoder instanceof WindowedValueCoder)) {
       throw new Exception(
-          "unexpected kind of input coder for PartialGroupByKeyOperation: " + coder);
+          "unexpected kind of input coder for PartialGroupByKeyOperation: " + windowedCoder);
     }
-    Coder<?> elemCoder = ((WindowedValueCoder<?>) coder).getValueCoder();
+    Coder<?> elemCoder = ((WindowedValueCoder<?>) windowedCoder).getValueCoder();
     if (!(elemCoder instanceof KvCoder)) {
       throw new Exception(
           "unexpected kind of input element coder for PartialGroupByKeyOperation: " + elemCoder);
     }
-    KvCoder<Object, Object> kvCoder = (KvCoder<Object, Object>) elemCoder;
-    Coder keyCoder = kvCoder.getKeyCoder();
-    Coder valueCoder = kvCoder.getValueCoder();
+    KvCoder<?, ?> kvCoder = (KvCoder<?, ?>) elemCoder;
+    Coder<?> keyCoder = kvCoder.getKeyCoder();
+    Coder<?> valueCoder = kvCoder.getValueCoder();
 
     OutputReceiver[] receivers =
         createOutputReceivers(instruction, counterPrefix, addCounterMutator, stateSampler, 1);
 
     PartialGroupByKeyOperation operation =
         new PartialGroupByKeyOperation(instruction.getSystemName(),
-            new CoderGroupingKeyCreator(keyCoder), new CoderSizeEstimator(keyCoder),
+            new WindowingCoderGroupingKeyCreator(keyCoder),
+            new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)),
             new CoderSizeEstimator(valueCoder), 0.001/*sizeEstimatorSampleRate*/, PairInfo.create(),
             receivers, counterPrefix, addCounterMutator, stateSampler);
 
@@ -207,35 +211,52 @@ public static PairInfo create() {
     private PairInfo() {}
     @Override
     public Object getKeyFromInputPair(Object pair) {
-      WindowedValue<KV<Object, Object>> windowedKv = (WindowedValue<KV<Object, Object>>) pair;
-      return windowedKv.getValue().getKey();
+      @SuppressWarnings("unchecked")
+      WindowedValue<KV<?, ?>> windowedKv = (WindowedValue<KV<?, ?>>) pair;
+      return WindowedValue.of(
+          windowedKv.getValue().getKey(), windowedKv.getTimestamp(), windowedKv.getWindows());
     }
     @Override
     public Object getValueFromInputPair(Object pair) {
-      WindowedValue<KV<Object, Object>> windowedKv = (WindowedValue<KV<Object, Object>>) pair;
+      @SuppressWarnings("unchecked")
+      WindowedValue<KV<?, ?>> windowedKv = (WindowedValue<KV<?, ?>>) pair;
       return windowedKv.getValue().getValue();
     }
     @Override
     public Object makeOutputPair(Object key, Object values) {
-      return WindowedValue.valueInEmptyWindows(KV.of(key, values));
+      WindowedValue<?> windowedKey = (WindowedValue<?>) key;
+      return WindowedValue.of(
+          KV.of(windowedKey.getValue(), values),
+          windowedKey.getTimestamp(),
+          windowedKey.getWindows());
     }
   }
 
   /**
    * Implements PGBKOp.GroupingKeyCreator via Coder.
    */
-  public static class CoderGroupingKeyCreator
-      implements PartialGroupByKeyOperation.GroupingKeyCreator {
-    final Coder coder;
+  // TODO: Actually support window merging in the combiner table.
+  public static class WindowingCoderGroupingKeyCreator
+      implements GroupingKeyCreator {
+
+    private static final Instant ignored = new Instant(0);
+
+    private final Coder coder;
 
-    public CoderGroupingKeyCreator(Coder coder) {
+    public WindowingCoderGroupingKeyCreator(Coder coder) {
       this.coder = coder;
     }
 
     @Override
-    public Object createGroupingKey(Object value) throws Exception {
-      return new PartialGroupByKeyOperation.StructuralByteArray(
-          CoderUtils.encodeToByteArray(coder, value));
+    public Object createGroupingKey(Object key) throws Exception {
+      WindowedValue<?> windowedKey = (WindowedValue<?>) key;
+      // Ignore timestamp for grouping purposes.
+      // The PGBK output will inherit the timestamp of one of its inputs.
+      return WindowedValue.of(
+          new PartialGroupByKeyOperation.StructuralByteArray(
+              CoderUtils.encodeToByteArray(coder, windowedKey.getValue())),
+          ignored,
+          windowedKey.getWindows());
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
index 38aa5a35b3057..8a1b56f955681 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
@@ -31,10 +31,10 @@
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.CoderGroupingKeyCreator;
 import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.CoderSizeEstimator;
 import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.ElementByteSizeObservableCoder;
 import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.PairInfo;
+import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.WindowingCoderGroupingKeyCreator;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -54,7 +54,6 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Random;
@@ -82,14 +81,15 @@ public void testRunPartialGroupByKeyOperation() throws Exception {
             counterSet, counterPrefix);
 
     PartialGroupByKeyOperation pgbkOperation =
-        new PartialGroupByKeyOperation(new CoderGroupingKeyCreator(keyCoder),
-                                       new CoderSizeEstimator(keyCoder),
-                                       new CoderSizeEstimator(valueCoder),
-                                       PairInfo.create(),
-                                       receiver,
-                                       counterPrefix,
-                                       counterSet.getAddCounterMutator(),
-                                       stateSampler);
+        new PartialGroupByKeyOperation(
+            new WindowingCoderGroupingKeyCreator(keyCoder),
+            new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)),
+            new CoderSizeEstimator(valueCoder),
+            PairInfo.create(),
+            receiver,
+            counterPrefix,
+            counterSet.getAddCounterMutator(),
+            stateSampler);
 
     pgbkOperation.start();
 
@@ -165,32 +165,46 @@ public long estimateSize(String element) {
     }
   }
 
+  private static class KvPairInfo implements PartialGroupByKeyOperation.PairInfo {
+    @Override
+    public Object getKeyFromInputPair(Object pair) {
+      return ((KV<?, ?>) pair).getKey();
+    }
+    @Override
+    public Object getValueFromInputPair(Object pair) {
+      return ((KV<?, ?>) pair).getValue();
+    }
+    @Override
+    public Object makeOutputPair(Object key, Object value) {
+      return KV.of(key, value);
+    }
+  }
+
   @Test
   public void testBufferingGroupingTable() throws Exception {
     BufferingGroupingTable<String, String> table =
         new BufferingGroupingTable<>(
-            1000, new IdentityGroupingKeyCreator(), PairInfo.create(),
+            1000, new IdentityGroupingKeyCreator(), new KvPairInfo(),
             new StringPowerSizeEstimator(), new StringPowerSizeEstimator());
     TestReceiver receiver = new TestReceiver(
-        WindowedValue.getValueOnlyCoder(
-            KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(StringUtf8Coder.of()))));
+        KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(StringUtf8Coder.of())));
 
     table.put("A", "a", receiver);
     table.put("B", "b1", receiver);
     table.put("B", "b2", receiver);
     table.put("C", "c", receiver);
-    assertThat(unwindowed(receiver.outputElems), empty());
+    assertThat(receiver.outputElems, empty());
 
     table.put("C", "cccc", receiver);
-    assertThat(unwindowed(receiver.outputElems),
+    assertThat(receiver.outputElems,
                hasItem((Object) KV.of("C", Arrays.asList("c", "cccc"))));
 
     table.put("DDDD", "d", receiver);
-    assertThat(unwindowed(receiver.outputElems),
+    assertThat(receiver.outputElems,
                hasItem((Object) KV.of("DDDD", Arrays.asList("d"))));
 
     table.flush(receiver);
-    assertThat(unwindowed(receiver.outputElems),
+    assertThat(receiver.outputElems,
                IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(
                    KV.of("A", Arrays.asList("a")),
                    KV.of("B", Arrays.asList("b1", "b2")),
@@ -220,28 +234,27 @@ public Long extract(Object key, Long accumulator) {
 
     CombiningGroupingTable<String, Integer, Long> table =
         new CombiningGroupingTable<String, Integer, Long>(
-            1000, new IdentityGroupingKeyCreator(), PairInfo.create(),
+            1000, new IdentityGroupingKeyCreator(), new KvPairInfo(),
             summingCombineFn,
             new StringPowerSizeEstimator(), new IdentitySizeEstimator());
 
     TestReceiver receiver = new TestReceiver(
-        WindowedValue.getValueOnlyCoder(
-            KvCoder.of(StringUtf8Coder.of(), BigEndianLongCoder.of())));
+        KvCoder.of(StringUtf8Coder.of(), BigEndianLongCoder.of()));
 
     table.put("A", 1, receiver);
     table.put("B", 2, receiver);
     table.put("B", 3, receiver);
     table.put("C", 4, receiver);
-    assertThat(unwindowed(receiver.outputElems), empty());
+    assertThat(receiver.outputElems, empty());
 
     table.put("C", 5000, receiver);
-    assertThat(unwindowed(receiver.outputElems), hasItem((Object) KV.of("C", 5004L)));
+    assertThat(receiver.outputElems, hasItem((Object) KV.of("C", 5004L)));
 
     table.put("DDDD", 6, receiver);
-    assertThat(unwindowed(receiver.outputElems), hasItem((Object) KV.of("DDDD", 6L)));
+    assertThat(receiver.outputElems, hasItem((Object) KV.of("DDDD", 6L)));
 
     table.flush(receiver);
-    assertThat(unwindowed(receiver.outputElems),
+    assertThat(receiver.outputElems,
                IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(
                    KV.of("A", 1L),
                    KV.of("B", 2L + 3),
@@ -249,14 +262,6 @@ public Long extract(Object key, Long accumulator) {
                    KV.of("DDDD", 6L)));
   }
 
-  private List<Object> unwindowed(Iterable<Object> windowed) {
-    List<Object> unwindowed = new ArrayList<>();
-    for (Object withWindow : windowed) {
-      unwindowed.add(((WindowedValue<?>) withWindow).getValue());
-    }
-    return unwindowed;
-  }
-
 
   ////////////////////////////////////////////////////////////////////////////
   // Tests for the sampling size estimator.

From bc77eeeec2f4dbd9b779ac080e0c2f5e70ebc557 Mon Sep 17 00:00:00 2001
From: liangzhang <liangzhang@google.com>
Date: Tue, 6 Jan 2015 17:39:02 -0800
Subject: [PATCH 0060/1541] Add control code for Cloud Debugger: * New option
 for Debugger version string * Modify worker pool config for Debugger in
 DataflowPipelineTranslator

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=83391175
---
 .../sdk/options/CloudDebuggerOptions.java     | 55 +++++++++++++++++++
 .../sdk/options/DataflowPipelineOptions.java  |  2 +-
 .../runners/DataflowPipelineTranslator.java   | 20 +++++++
 .../DataflowPipelineTranslatorTest.java       | 19 +++++++
 4 files changed, 95 insertions(+), 1 deletion(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
new file mode 100644
index 0000000000000..c3632ed1b4bfe
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+/**
+ * Options for controlling Cloud Debugger. These options are experimental and subject to change.
+ */
+public interface CloudDebuggerOptions {
+
+  /**
+   * User defined application version. Cloud Debugger uses it to group all
+   * running debuggee processes. Version should be different if users have
+   * multiple parallel runs of the same application with different inputs.
+   */
+  String getCdbgVersion();
+  void setCdbgVersion(String value);
+
+  /**
+   * Return a JSON string for the Debugger metadata item.
+   */
+  public static class DebuggerConfig {
+    private String version;
+    public String getVersion() { return version; }
+    public void setVersion(String version) { this.version = version; }
+
+    /**
+     * Compute the string of Debugger config.
+     * @return JSON string of Debugger config metadata.
+     * @throws JsonProcessingException when converting to Json fails.
+     */
+    public String computeMetadataString() throws JsonProcessingException {
+      ObjectMapper mapper = new ObjectMapper();
+      String debuggerConfigString = mapper.writeValueAsString(this);
+      return debuggerConfigString;
+    }
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
index d30f7dc4d96b3..907d020cad23d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
@@ -34,7 +34,7 @@
 public interface DataflowPipelineOptions extends
     PipelineOptions, GcpOptions, ApplicationNameOptions, DataflowPipelineDebugOptions,
     DataflowPipelineWorkerPoolOptions, BigQueryOptions,
-    GcsOptions, StreamingOptions {
+    GcsOptions, StreamingOptions, CloudDebuggerOptions {
 
   /**
    * GCS path for temporary files.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index aff881ddfe999..f3ddbd1cb444a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -48,6 +48,7 @@
 import com.google.cloud.dataflow.sdk.io.DatastoreIO;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.CloudDebuggerOptions.DebuggerConfig;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType;
 import com.google.cloud.dataflow.sdk.runners.dataflow.AvroIOTranslator;
@@ -363,6 +364,25 @@ public Job translate(List<DataflowPackage> packages) {
 
       workerPool.setTaskrunnerSettings(taskRunnerSettings);
 
+      // Config Cloud Debugger
+      if (!Strings.isNullOrEmpty(options.getCdbgVersion())) {
+        String cdbgVersion = options.getCdbgVersion();
+        DebuggerConfig debuggerConfig = new DebuggerConfig();
+        debuggerConfig.setVersion(cdbgVersion);
+
+        Map<String, String> metadata = workerPool.getMetadata();
+        if (metadata == null) {
+          metadata = new HashMap<String, String>();
+        }
+
+        try {
+          metadata.put("debugger", debuggerConfig.computeMetadataString());
+        } catch (JsonProcessingException e) {
+          throw new IllegalArgumentException("Cannot format Debugger version.", e);
+        }
+        workerPool.setMetadata(metadata);
+      }
+
       if (options.isStreaming()) {
         job.setType("JOB_TYPE_STREAMING");
       } else {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 3eb90edbf78e6..e877ea33d3eac 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -175,6 +175,25 @@ public void testWorkerMachineTypeConfig() throws IOException {
     assertEquals(testMachineType, workerPool.getMachineType());
   }
 
+  @Test
+  public void testDebuggerConfig() throws IOException {
+    final String cdbgVersion = "test-v1";
+    DataflowPipelineOptions options = buildPipelineOptions();
+    options.setCdbgVersion(cdbgVersion);
+    String expectedConfig = "{\"version\":\"test-v1\"}";
+
+    Pipeline p = buildPipeline(options);
+    p.traverseTopologically(new RecordingPipelineVisitor());
+    Job job = DataflowPipelineTranslator.fromOptions(options).translate(
+        p, Collections.<DataflowPackage>emptyList());
+
+    for (WorkerPool pool : job.getEnvironment().getWorkerPools()) {
+      if (pool.getKind() == DataflowPipelineTranslator.HARNESS_WORKER_POOL) {
+        assertEquals(pool.getMetadata().get("debugger"), expectedConfig);
+      }
+    }
+  }
+
   @Test
   public void testDiskSizeGbConfig() throws IOException {
     final Integer diskSizeGb = 1234;

From d54dcd1cce96efc90c51191300f9903e2fa5808a Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Wed, 7 Jan 2015 13:58:11 -0800
Subject: [PATCH 0061/1541] We now mostly support wildcard matching for GCS
 paths so removing the checks. We still do not support recursive wildcards so
 adding a check for that.

Additionally this modifies the wildcard expansion logic by removing the delimiter when listing objects. This results in all objects after the first wildcard being listed out of which we select objects that match regex derived by the user specified glob pattern.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=83460566
---
 .../runners/dataflow/TextIOTranslator.java    | 13 +----
 .../cloud/dataflow/sdk/util/GcsUtil.java      | 58 +++++++++----------
 .../cloud/dataflow/sdk/io/TextIOTest.java     | 45 +++-----------
 .../runners/DataflowPipelineRunnerTest.java   |  1 +
 .../DataflowPipelineTranslatorTest.java       | 47 +++------------
 5 files changed, 48 insertions(+), 116 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
index 92aaf210e4ec8..302cea67a55c5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
@@ -21,7 +21,6 @@
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
-import com.google.cloud.dataflow.sdk.util.GcsUtil;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
@@ -50,16 +49,10 @@ private <T> void translateReadHelper(
         throw new IllegalArgumentException("TextIO not supported in streaming mode.");
       }
 
-      // Only GCS paths are permitted for filepatterns in the DataflowPipelineRunner.
+      // Validate the provided GCS path.
       GcsPath gcsPath = GcsPath.fromUri(transform.getFilepattern());
-      // Furthermore, on the service there is currently a limitation
-      // that the first wildcard character must occur after the last
-      // delimiter, and that the delimiter is fixed to '/'
-      if (!GcsUtil.GCS_READ_PATTERN.matcher(gcsPath.getObject()).matches()) {
-        throw new IllegalArgumentException(
-            "Unsupported wildcard usage in \"" + gcsPath + "\": "
-            + " all wildcards must occur after the final '/' delimiter.");
-      }
+      Preconditions.checkArgument(
+          context.getPipelineOptions().getGcsUtil().isGcsPatternSupported(gcsPath.getObject()));
 
       context.addStep(transform, "ParallelRead");
       // TODO: How do we want to specify format and
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
index bcb387f29a6ed..dd51b669e2820 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
@@ -73,20 +73,13 @@ public GcsUtil create(PipelineOptions options) {
   /** Matches a glob containing a wildcard, capturing the portion before the first wildcard. */
   private static final Pattern GLOB_PREFIX = Pattern.compile("(?<PREFIX>[^*?]*)[*?].*");
 
-  private static final String WILDCARD = "[\\[\\]*?]";
-  private static final String NON_WILDCARD = "[^\\[\\]*?]";
-  private static final String NON_DELIMITER = "[^/]";
-  private static final String OPTIONAL_WILDCARD_AND_SUFFIX = "(" + WILDCARD + NON_DELIMITER + "*)?";
+  private static final String RECURSIVE_WILDCARD = "[*]{2}";
 
   /**
-   * A {@link Pattern} that matches globs in which every wildcard is interpreted as such,
-   * assuming a delimiter of {@code '/'}.
-   *
-   * <p> Most importantly, if a {@code '*'} or {@code '?'} occurs before the
-   * final delimiter it will not be interpreted as a wildcard.
+   * A {@link Pattern} for globs with a recursive wildcard.
    */
-  public static final Pattern GCS_READ_PATTERN = Pattern.compile(
-      NON_WILDCARD + "*" + OPTIONAL_WILDCARD_AND_SUFFIX);
+  private static final Pattern RECURSIVE_GCS_PATTERN =
+      Pattern.compile(".*" + RECURSIVE_WILDCARD + ".*");
 
   /////////////////////////////////////////////////////////////////////////////
 
@@ -99,37 +92,44 @@ public GcsUtil create(PipelineOptions options) {
   // Exposed for testing.
   final ExecutorService executorService;
 
+  /**
+   * Returns true if the given GCS pattern is supported otherwise fails with an
+   * exception.
+   */
+  public boolean isGcsPatternSupported(String gcsPattern) {
+    if (RECURSIVE_GCS_PATTERN.matcher(gcsPattern).matches()) {
+      throw new IllegalArgumentException("Unsupported wildcard usage in \"" + gcsPattern + "\": "
+          + " recursive wildcards are not supported.");
+    }
+
+    return true;
+  }
+
   private GcsUtil(Storage storageClient, ExecutorService executorService) {
     storage = storageClient;
     this.executorService = executorService;
   }
 
   /**
-   * Expands a pattern into matched paths. The input path may contain
-   * globs (in the last component only!), which are expanded in the result.
-   *
-   * <p> TODO: add support for full path matching.
+   * Expands a pattern into matched paths. The pattern path may contain
+   * globs, which are expanded in the result.
    */
-  public List<GcsPath> expand(GcsPath path) throws IOException {
-    if (!GCS_READ_PATTERN.matcher(path.getObject()).matches()) {
-      throw new IllegalArgumentException(
-          "Unsupported wildcard usage in \"" + path + "\": "
-          + " all wildcards must occur after the final '/' delimiter.");
-    }
-
-    Matcher m = GLOB_PREFIX.matcher(path.getObject());
+  public List<GcsPath> expand(GcsPath gcsPattern) throws IOException {
+    Preconditions.checkArgument(isGcsPatternSupported(gcsPattern.getObject()));
+    Matcher m = GLOB_PREFIX.matcher(gcsPattern.getObject());
     if (!m.matches()) {
-      return Arrays.asList(path);
+      return Arrays.asList(gcsPattern);
     }
 
+    // Part before the first wildcard character.
     String prefix = m.group("PREFIX");
-    Pattern p = Pattern.compile(globToRegexp(path.getObject()));
+    Pattern p = Pattern.compile(globToRegexp(gcsPattern.getObject()));
     LOG.info("matching files in bucket {}, prefix {} against pattern {}",
-        path.getBucket(), prefix, p.toString());
+        gcsPattern.getBucket(), prefix, p.toString());
 
-    Storage.Objects.List listObject = storage.objects().list(path.getBucket());
+    // List all objects that start with the prefix (including objects in sub-directories).
+    Storage.Objects.List listObject = storage.objects().list(gcsPattern.getBucket());
     listObject.setMaxResults(MAX_LIST_ITEMS_PER_CALL);
-    listObject.setDelimiter("/");
     listObject.setPrefix(prefix);
 
     String pageToken = null;
@@ -146,7 +146,7 @@ public List<GcsPath> expand(GcsPath path) throws IOException {
         break;
       }
 
-      // Filter
+      // Filter objects based on the regex.
       for (StorageObject o : objects.getItems()) {
         String name = o.getName();
         // Skip directories, which end with a slash.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
index 28270949c328a..1cc3bf64030dd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -340,7 +340,6 @@ public void testUnsupportedFilePattern() throws IOException {
   }
 
   /**
-   * The first wildcard must occur after the last directory delimiter.
    * This tests a few corner cases that should not crash.
    */
   @Test
@@ -360,54 +359,26 @@ public void testGoodWildcards() throws Exception {
     pipeline.apply(TextIO.Read.from("gs://bucket/foo/[0-9]baz?"));
     pipeline.apply(TextIO.Read.from("gs://bucket/foo/baz/*"));
     pipeline.apply(TextIO.Read.from("gs://bucket/foo/baz/*wonka*"));
-
-    // Check that running doesn't fail.
-    pipeline.run();
-  }
-
-  /**
-   * The first wildcard must occur after the last directory delimiter.
-   * This tests "*".
-   */
-  @Test
-  public void testBadWildcardStar() throws Exception {
-    Pipeline pipeline = Pipeline.create(buildTestPipelineOptions());
-
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo/*baz/wonka*"));
     pipeline.apply(TextIO.Read.from("gs://bucket/foo*/baz"));
-
-    // Check that running does fail.
-    thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage("wildcard");
-    pipeline.run();
-  }
-
-  /**
-   * The first wildcard must occur after the last directory delimiter.
-   * This tests "?".
-   */
-  @Test
-  public void testBadWildcardOptional() throws Exception {
-    Pipeline pipeline = Pipeline.create(buildTestPipelineOptions());
-
     pipeline.apply(TextIO.Read.from("gs://bucket/foo?/baz"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo[0-9]/baz"));
 
-    // Check that running does fail.
-    thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage("wildcard");
+    // Check that running doesn't fail.
     pipeline.run();
   }
 
   /**
-   * The first wildcard must occur after the last directory delimiter.
-   * This tests "[]" based character classes.
+   * Recursive wildcards are not supported.
+   * This tests "**".
    */
   @Test
-  public void testBadWildcardBrackets() throws Exception {
+  public void testBadWildcardRecursive() throws Exception {
     Pipeline pipeline = Pipeline.create(buildTestPipelineOptions());
 
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo[0-9]/baz"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo**/baz"));
 
-    // Check that translation does fail.
+    // Check that running does fail.
     thrown.expect(IllegalArgumentException.class);
     thrown.expectMessage("wildcard");
     pipeline.run();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 3d6f804ed3a21..38f2fb407c367 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -122,6 +122,7 @@ private GcsUtil buildMockGcsUtil() throws IOException {
         .thenReturn(FileChannel.open(
             Files.createTempFile("channel-", ".tmp"),
             StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE));
+    when(mockGcsUtil.isGcsPatternSupported(anyString())).thenReturn(true);
     return mockGcsUtil;
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index e877ea33d3eac..bfa3ed7a21eb8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -420,7 +420,6 @@ public void testPartiallyBoundFailure() throws IOException {
   }
 
   /**
-   * The first wildcard must occur after the last directory delimiter.
    * This tests a few corner cases that should not crash.
    */
   @Test
@@ -439,58 +438,26 @@ public void testGoodWildcards() throws Exception {
     pipeline.apply(TextIO.Read.from("gs://bucket/foo/[0-9]baz?"));
     pipeline.apply(TextIO.Read.from("gs://bucket/foo/baz/*"));
     pipeline.apply(TextIO.Read.from("gs://bucket/foo/baz/*wonka*"));
-
-    // Check that translation doesn't fail.
-    t.translate(pipeline, Collections.<DataflowPackage>emptyList());
-  }
-
-  /**
-   * The first wildcard must occur after the last directory delimiter.
-   * This tests "*".
-   */
-  @Test
-  public void testBadWildcardStar() throws Exception {
-    DataflowPipelineOptions options = buildPipelineOptions();
-    Pipeline pipeline = DataflowPipeline.create(options);
-    DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(options);
-
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo/*/baz*"));
     pipeline.apply(TextIO.Read.from("gs://bucket/foo*/baz"));
-
-    // Check that translation does fail.
-    thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage("Unsupported wildcard usage");
-    t.translate(pipeline, Collections.<DataflowPackage>emptyList());
-  }
-
-  /**
-   * The first wildcard must occur after the last directory delimiter.
-   * This tests "?".
-   */
-  @Test
-  public void testBadWildcardOptional() throws Exception {
-    DataflowPipelineOptions options = buildPipelineOptions();
-    Pipeline pipeline = DataflowPipeline.create(options);
-    DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(options);
-
     pipeline.apply(TextIO.Read.from("gs://bucket/foo?/baz"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo[0-9]/baz"));
 
-    // Check that translation does fail.
-    thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage("Unsupported wildcard usage");
+    // Check that translation doesn't fail.
     t.translate(pipeline, Collections.<DataflowPackage>emptyList());
   }
 
   /**
-   * The first wildcard must occur after the last directory delimiter.
-   * This tests "[]" based character classes.
+   * Recursive wildcards are not supported.
+   * This tests "**".
    */
   @Test
-  public void testBadWildcardBrackets() throws Exception {
+  public void testBadWildcardRecursive() throws Exception {
     DataflowPipelineOptions options = buildPipelineOptions();
     Pipeline pipeline = DataflowPipeline.create(options);
     DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(options);
 
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo[0-9]/baz"));
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo**/baz"));
 
     // Check that translation does fail.
     thrown.expect(IllegalArgumentException.class);

From 224445d3d3d64c112efed072785bddfbf17bb0e9 Mon Sep 17 00:00:00 2001
From: amyu <amyu@google.com>
Date: Wed, 7 Jan 2015 14:13:39 -0800
Subject: [PATCH 0062/1541] Addition of MaxPerKeyExamples, an example that
 reads public samples of weather data from BigQuery, and finds the maximum
 temperature for each month. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=83461984

---
 .../dataflow/examples/MaxPerKeyExamples.java  | 163 ++++++++++++++++++
 .../examples/MaxPerKeyExamplesTest.java       |  88 ++++++++++
 2 files changed, 251 insertions(+)
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java
 create mode 100644 examples/src/test/java/com/google/cloud/dataflow/examples/MaxPerKeyExamplesTest.java

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java
new file mode 100644
index 0000000000000..a4c0a106da0a0
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Max;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * An example that reads the public samples of weather data from BigQuery, and finds
+ * the maximum temperature ('mean_temp') for each month.
+ *
+ * Concepts: The 'Max' statistical combination function, and how to find the max per
+ * key group.
+ *
+ * <p> Note: Before running this example, you must create a BigQuery dataset to contain your output
+ * table.
+ *
+ * <p> To execute this pipeline locally, specify general pipeline configuration:
+ *   --project=<PROJECT ID>
+ * and the BigQuery table for the output:
+ *   --output=<project_id>:<dataset_id>.<table_id>
+ *
+ * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ *   --project=<PROJECT ID>
+ *   --stagingLocation=gs://<STAGING DIRECTORY>
+ *   --runner=BlockingDataflowPipelineRunner
+ * and the BigQuery table for the output:
+ *   --output=<project_id>:<dataset_id>.<table_id>
+ *
+ * <p> The BigQuery input table defaults to clouddataflow-readonly:samples.weather_stations and can
+ * be overridden with --input.
+ */
+public class MaxPerKeyExamples {
+  // Default to using a 1000 row subset of the public weather station table publicdata:samples.gsod.
+  private static final String WEATHER_SAMPLES_TABLE =
+      "clouddataflow-readonly:samples.weather_stations";
+
+  /**
+   * Examines each row (weather reading) in the input table. Output the month of the reading,
+   * and the mean_temp.
+   */
+  static class ExtractTempFn extends DoFn<TableRow, KV<Integer, Double>> {
+    @Override
+    public void processElement(ProcessContext c) {
+      TableRow row = c.element();
+      Integer month = Integer.parseInt((String) row.get("month"));
+      Double meanTemp = Double.parseDouble(row.get("mean_temp").toString());
+      c.output(KV.of(month, meanTemp));
+    }
+  }
+
+  /**
+   * Format the results to a TableRow, to save to BigQuery.
+   *
+   */
+  static class FormatMaxesFn extends DoFn<KV<Integer, Double>, TableRow> {
+    @Override
+    public void processElement(ProcessContext c) {
+      TableRow row = new TableRow()
+          .set("month", c.element().getKey())
+          .set("max_mean_temp", c.element().getValue());
+      c.output(row);
+    }
+  }
+
+  /**
+   * Reads rows from a weather data table, and finds the max mean_temp for each
+   * month via the 'Max' statistical combination function.
+   */
+  static class MaxMeanTemp
+      extends PTransform<PCollection<TableRow>, PCollection<TableRow>> {
+    @Override
+    public PCollection<TableRow> apply(PCollection<TableRow> rows) {
+
+      // row... => <month, mean_temp> ...
+      PCollection<KV<Integer, Double>> temps = rows.apply(
+          ParDo.of(new ExtractTempFn()));
+
+      // month, mean_temp... => <month, max mean temp>...
+      PCollection<KV<Integer, Double>> tempMaxes =
+          temps.apply(Max.<Integer>doublesPerKey());
+
+      // <month, max>... => row...
+      PCollection<TableRow> results = tempMaxes.apply(
+          ParDo.of(new FormatMaxesFn()));
+
+      return results;
+    }
+  }
+
+  /**
+   * Options supported by {@link MaxPerKeyExamples}.
+   * <p>
+   * Inherits standard configuration options.
+   */
+  private static interface Options extends PipelineOptions {
+    @Description("Table to read from, specified as "
+        + "<project_id>:<dataset_id>.<table_id>")
+    @Default.String(WEATHER_SAMPLES_TABLE)
+    String getInput();
+    void setInput(String value);
+
+    @Description("Table to write to, specified as "
+        + "<project_id>:<dataset_id>.<table_id>")
+    @Validation.Required
+    String getOutput();
+    void setOutput(String value);
+  }
+
+  public static void main(String[] args)
+      throws Exception {
+
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    Pipeline p = Pipeline.create(options);
+
+    // Build the table schema for the output table.
+    List<TableFieldSchema> fields = new ArrayList<>();
+    fields.add(new TableFieldSchema().setName("month").setType("INTEGER"));
+    fields.add(new TableFieldSchema().setName("max_mean_temp").setType("FLOAT"));
+    TableSchema schema = new TableSchema().setFields(fields);
+
+    p.apply(BigQueryIO.Read.from(options.getInput()))
+     .apply(new MaxMeanTemp())
+     .apply(BigQueryIO.Write
+        .to(options.getOutput())
+        .withSchema(schema)
+        .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
+        .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
+
+    p.run();
+  }
+}
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/MaxPerKeyExamplesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/MaxPerKeyExamplesTest.java
new file mode 100644
index 0000000000000..abb5710dbe480
--- /dev/null
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/MaxPerKeyExamplesTest.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.examples.MaxPerKeyExamples.ExtractTempFn;
+import com.google.cloud.dataflow.examples.MaxPerKeyExamples.FormatMaxesFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.List;
+
+/** Unit tests for {@link MaxPerKeyExamples}. */
+@RunWith(JUnit4.class)
+public class MaxPerKeyExamplesTest {
+
+  private static final TableRow row1 = new TableRow()
+        .set("month", "6").set("day", "21")
+        .set("year", "2014").set("mean_temp", "85.3")
+        .set("tornado", true);
+  private static final TableRow row2 = new TableRow()
+        .set("month", "7").set("day", "20")
+        .set("year", "2014").set("mean_temp", "75.4")
+        .set("tornado", false);
+  private static final TableRow row3 = new TableRow()
+        .set("month", "6").set("day", "18")
+        .set("year", "2014").set("mean_temp", "45.3")
+        .set("tornado", true);
+  private static final TableRow[] ROWS_ARRAY = new TableRow[] {
+    row1, row2, row3
+  };
+
+  private static final KV<Integer, Double> kv1 = KV.of(6, 85.3);
+  private static final KV<Integer, Double> kv2 = KV.of(6, 45.3);
+  private static final KV<Integer, Double> kv3 = KV.of(7, 75.4);
+
+  static final KV[] TUPLES_ARRAY = new KV[] {
+    kv1, kv2, kv3
+  };
+
+  private static final TableRow resultRow1 = new TableRow()
+      .set("month", 6)
+      .set("max_mean_temp", 85.3);
+  private static final TableRow resultRow2 = new TableRow()
+      .set("month", 7)
+      .set("max_mean_temp", 75.4);
+
+
+  @Test
+  public void testExtractTempFn() {
+    DoFnTester<TableRow, KV<Integer, Double>> extractTempFn =
+        DoFnTester.of(new ExtractTempFn());
+    List<KV<Integer, Double>> results = extractTempFn.processBatch(ROWS_ARRAY);
+    Assert.assertThat(results, CoreMatchers.hasItem(kv1));
+    Assert.assertThat(results, CoreMatchers.hasItem(kv2));
+    Assert.assertThat(results, CoreMatchers.hasItem(kv3));
+  }
+
+  @Test
+  public void testFormatMaxesFn() {
+    DoFnTester<KV<Integer, Double>, TableRow> formatMaxesFnFn =
+        DoFnTester.of(new FormatMaxesFn());
+    List<TableRow> results = formatMaxesFnFn.processBatch(TUPLES_ARRAY);
+    Assert.assertThat(results, CoreMatchers.hasItem(resultRow1));
+    Assert.assertThat(results, CoreMatchers.hasItem(resultRow2));
+  }
+
+}

From cbd598c5f245eac9fae8abcd69a4ca481da9016a Mon Sep 17 00:00:00 2001
From: amyu <amyu@google.com>
Date: Wed, 7 Jan 2015 15:22:26 -0800
Subject: [PATCH 0063/1541] Addition of filtering and deduplication examples.
 FilterExamples demonstrates several approaches to filtering, and use of the
 Mean transform. It shows how to dynamically set parameters by defining and
 using new pipeline options, and how to use a value derived by the pipeline.
 [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=83468417

---
 .../cloud/dataflow/examples/DeDupExample.java |  99 +++++++
 .../dataflow/examples/FilterExamples.java     | 255 ++++++++++++++++++
 .../dataflow/examples/DeDupExampleTest.java   |  83 ++++++
 .../dataflow/examples/FilterExamplesTest.java |  85 ++++++
 4 files changed, 522 insertions(+)
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
 create mode 100644 examples/src/test/java/com/google/cloud/dataflow/examples/DeDupExampleTest.java
 create mode 100644 examples/src/test/java/com/google/cloud/dataflow/examples/FilterExamplesTest.java

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java
new file mode 100644
index 0000000000000..2407c7ad25640
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.RemoveDuplicates;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+
+/**
+ * This example uses as input Shakespeare's plays as plaintext files, and will remove any
+ * duplicate lines across all the files. (The output does not preserve any input order).
+ *
+ * Concepts: the RemoveDuplicates transform, and how to wire transforms together.
+ * Demonstrates TextIO.Read/RemoveDuplicates/TextIO.Write.
+ *
+ * <p> To execute this pipeline locally, specify general pipeline configuration:
+ *   --project=<PROJECT ID>
+ * and a local output file or output prefix on GCS:
+ *   --output=[<LOCAL FILE> | gs://<OUTPUT PREFIX>]
+ *
+ * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ *   --project=<PROJECT ID>
+ *   --stagingLocation=gs://<STAGING DIRECTORY>
+ *   --runner=BlockingDataflowPipelineRunner
+ * and an output prefix on GCS:
+ *   --output=gs://<OUTPUT PREFIX>
+ *
+ * <p> The input defaults to gs://dataflow-samples/shakespeare/* and can be
+ * overridden with --input.
+ */
+public class DeDupExample {
+
+  /**
+   * Options supported by {@link DeDupExample}.
+   * <p>
+   * Inherits standard configuration options.
+   */
+  private static interface Options extends PipelineOptions {
+    @Description("Path to the directory or GCS prefix containing files to read from")
+    @Default.String("gs://dataflow-samples/shakespeare/*")
+    String getInput();
+    void setInput(String value);
+
+    @Description("Path of the file to write to")
+    @Default.InstanceFactory(OutputFactory.class)
+    String getOutput();
+    void setOutput(String value);
+
+    /** Returns gs://${STAGING_LOCATION}/"deduped.txt". */
+    public static class OutputFactory implements DefaultValueFactory<String> {
+      @Override
+      public String create(PipelineOptions options) {
+        DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
+        if (dataflowOptions.getStagingLocation() != null) {
+          return GcsPath.fromUri(dataflowOptions.getStagingLocation())
+              .resolve("deduped.txt").toString();
+        } else {
+          throw new IllegalArgumentException("Must specify --output or --stagingLocation");
+        }
+      }
+    }
+  }
+
+
+  public static void main(String[] args)
+      throws Exception {
+
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    Pipeline p = Pipeline.create(options);
+
+    p.apply(TextIO.Read.named("ReadLines").from(options.getInput()))
+     .apply(RemoveDuplicates.<String>create())
+     .apply(TextIO.Write.named("DedupedShakespeare")
+         .to(options.getOutput()));
+
+    p.run();
+  }
+}
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
new file mode 100644
index 0000000000000..45646911a0a36
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Mean;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+/**
+ * This is an example that demonstrates several approaches to filtering, and use of the Mean
+ * transform. It shows how to dynamically set parameters by defining and using new pipeline options,
+ * and how to use a value derived by the pipeline.
+ *
+ * <p> Concepts: The Mean transform; Options configuration; using pipeline-derived data as a side
+ * input; approaches to filtering, selection, and projection.
+ *
+ * <p> The example reads public samples of weather data from BigQuery. It performs a
+ * projection on the data, finds the global mean of the temperature readings, filters on readings
+ * for a single given month, and then outputs only data (for that month) that has a mean temp
+ * smaller than the derived global mean.
+*
+ * <p> Note: Before running this example, you must create a BigQuery dataset to contain your output
+ * table.
+ *
+ * <p> To execute this pipeline locally, specify general pipeline configuration:
+ *   --project=<PROJECT ID>
+ * and the BigQuery table for the output:
+ *   --output=<project_id>:<dataset_id>.<table_id>
+ *   [--monthFilter=<month_number>]
+ * where optional parameter --monthFilter is set to a number 1-12.
+ *
+ * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ *   --project=<PROJECT ID>
+ *   --stagingLocation=gs://<STAGING DIRECTORY>
+ *   --runner=BlockingDataflowPipelineRunner
+ * and the BigQuery table for the output:
+ *   --output=<project_id>:<dataset_id>.<table_id>
+ *   [--monthFilter=<month_number>]
+ * where optional parameter --monthFilter is set to a number 1-12.
+ *
+ * <p> The BigQuery input table defaults to clouddataflow-readonly:samples.weather_stations and can
+ * be overridden with --input.
+ */
+public class FilterExamples {
+  // Default to using a 1000 row subset of the public weather station table publicdata:samples.gsod.
+  private static final String WEATHER_SAMPLES_TABLE =
+      "clouddataflow-readonly:samples.weather_stations";
+  static final Logger LOG = Logger.getLogger(FilterExamples.class.getName());
+  static final int MONTH_TO_FILTER = 7;
+
+  /**
+   * Examines each row in the input table. Outputs only the subset of the cells this example
+   * is interested in-- the mean_temp and year, month, and day-- as a bigquery table row.
+   */
+  static class ProjectionFn extends DoFn<TableRow, TableRow> {
+    @Override
+    public void processElement(ProcessContext c){
+      TableRow row = c.element();
+      // Grab year, month, day, mean_temp from the row
+      Integer year = Integer.parseInt((String) row.get("year"));
+      Integer month = Integer.parseInt((String) row.get("month"));
+      Integer day = Integer.parseInt((String) row.get("day"));
+      Double meanTemp = Double.parseDouble(row.get("mean_temp").toString());
+      // Prepares the data for writing to BigQuery by building a TableRow object
+      TableRow outRow = new TableRow()
+          .set("year", year).set("month", month)
+          .set("day", day).set("mean_temp", meanTemp);
+      c.output(outRow);
+    }
+  }
+
+  /**
+   * Implements 'filter' functionality.
+   *
+   * <p> Examines each row in the input table. Outputs only rows from the month
+   * monthFilter, which is passed in as a parameter during construction of this DoFn.
+   */
+  static class FilterSingleMonthDataFn extends DoFn<TableRow, TableRow> {
+
+    Integer monthFilter;
+
+    public FilterSingleMonthDataFn(Integer monthFilter) {
+      this.monthFilter = monthFilter;
+    }
+
+    @Override
+    public void processElement(ProcessContext c){
+      TableRow row = c.element();
+      Integer month;
+      month = (Integer) row.get("month");
+      if (month.equals(this.monthFilter)) {
+        c.output(row);
+      }
+    }
+  }
+
+  /**
+   * Examines each row (weather reading) in the input table. Output the temperature
+   * reading for that row ('mean_temp').
+   */
+  static class ExtractTempFn extends DoFn<TableRow, Double> {
+    @Override
+    public void processElement(ProcessContext c){
+      TableRow row = c.element();
+      Double meanTemp = Double.parseDouble(row.get("mean_temp").toString());
+      c.output(meanTemp);
+    }
+  }
+
+
+
+  /*
+   * Finds the global mean of the mean_temp for each day/record, and outputs
+   * only data that has a mean temp larger than this global mean.
+   **/
+  static class BelowGlobalMean
+      extends PTransform<PCollection<TableRow>, PCollection<TableRow>> {
+
+    Integer monthFilter;
+
+    public BelowGlobalMean(Integer monthFilter) {
+      this.monthFilter = monthFilter;
+    }
+
+
+    @Override
+    public PCollection<TableRow> apply(PCollection<TableRow> rows) {
+
+      // Extract the mean_temp from each row.
+      PCollection<Double> meanTemps = rows.apply(
+          ParDo.of(new ExtractTempFn()));
+
+      // Find the global mean, of all the mean_temp readings in the weather data,
+      // and prepare this singleton PCollectionView for use as a side input.
+      final PCollectionView<Double, ?> globalMeanTemp =
+          meanTemps.apply(Mean.<Double>globally())
+               .apply(View.<Double>asSingleton());
+
+      // Rows filtered to remove all but a single month
+      PCollection<TableRow> monthFilteredRows = rows
+          .apply(ParDo.of(new FilterSingleMonthDataFn(monthFilter)));
+
+      // Then, use the global mean as a side input, to further filter the weather data.
+      // By using a side input to pass in the filtering criteria, we can use a value
+      // that is computed earlier in pipeline execution.
+      // We'll only output readings with temperatures below this mean.
+      PCollection<TableRow> filteredRows = monthFilteredRows
+          .apply(ParDo
+              .withSideInputs(globalMeanTemp)
+              .of(new DoFn<TableRow, TableRow>() {
+                @Override
+                public void processElement(ProcessContext c) {
+                  Double meanTemp = Double.parseDouble(c.element().get("mean_temp").toString());
+                  Double gTemp = c.sideInput(globalMeanTemp);
+                  if (meanTemp < gTemp) {
+                    c.output(c.element());
+                  }
+                }
+              }));
+
+      return filteredRows;
+    }
+  }
+
+
+  /**
+   * Options supported by {@link FilterExamples}.
+   * <p>
+   * Inherits standard configuration options.
+   */
+  private static interface Options extends PipelineOptions {
+    @Description("Table to read from, specified as "
+        + "<project_id>:<dataset_id>.<table_id>")
+    @Default.String(WEATHER_SAMPLES_TABLE)
+    String getInput();
+    void setInput(String value);
+
+    @Description("Table to write to, specified as "
+        + "<project_id>:<dataset_id>.<table_id>. "
+        + "The dataset_id must already exist")
+    @Validation.Required
+    String getOutput();
+    void setOutput(String value);
+
+    @Description("Numeric value of month to filter on")
+    @Default.Integer(MONTH_TO_FILTER)
+    Integer getMonthFilter();
+    void setMonthFilter(Integer value);
+  }
+
+  /**
+   * Helper method to build the table schema for the output table.
+   */
+  private static TableSchema buildWeatherSchemaProjection() {
+    List<TableFieldSchema> fields = new ArrayList<>();
+    fields.add(new TableFieldSchema().setName("year").setType("INTEGER"));
+    fields.add(new TableFieldSchema().setName("month").setType("INTEGER"));
+    fields.add(new TableFieldSchema().setName("day").setType("INTEGER"));
+    fields.add(new TableFieldSchema().setName("mean_temp").setType("FLOAT"));
+    TableSchema schema = new TableSchema().setFields(fields);
+    return schema;
+  }
+
+  public static void main(String[] args)
+      throws Exception {
+
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    Pipeline p = Pipeline.create(options);
+
+    TableSchema schema = buildWeatherSchemaProjection();
+
+    p.apply(BigQueryIO.Read.from(options.getInput()))
+     .apply(ParDo.of(new ProjectionFn()))
+     .apply(new BelowGlobalMean(options.getMonthFilter()))
+     .apply(BigQueryIO.Write
+        .to(options.getOutput())
+        .withSchema(schema)
+        .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
+        .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
+
+    p.run();
+  }
+}
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/DeDupExampleTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/DeDupExampleTest.java
new file mode 100644
index 0000000000000..c52f675c01bce
--- /dev/null
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/DeDupExampleTest.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.RemoveDuplicates;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/** Unit tests for {@link DeDupExample}. */
+@RunWith(JUnit4.class)
+public class DeDupExampleTest {
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testRemoveDuplicates() {
+    List<String> strings = Arrays.asList(
+        "k1",
+        "k5",
+        "k5",
+        "k2",
+        "k1",
+        "k2",
+        "k3");
+
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input =
+        p.apply(Create.of(strings))
+        .setCoder(StringUtf8Coder.of());
+
+    PCollection<String> output =
+        input.apply(RemoveDuplicates.<String>create());
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder("k1", "k5", "k2", "k3");
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testRemoveDuplicatesEmpty() {
+    List<String> strings = Arrays.asList();
+
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input =
+        p.apply(Create.of(strings))
+        .setCoder(StringUtf8Coder.of());
+
+    PCollection<String> output =
+        input.apply(RemoveDuplicates.<String>create());
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder();
+    p.run();
+  }
+}
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/FilterExamplesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/FilterExamplesTest.java
new file mode 100644
index 0000000000000..5845eb4b53611
--- /dev/null
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/FilterExamplesTest.java
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.examples.FilterExamples.FilterSingleMonthDataFn;
+import com.google.cloud.dataflow.examples.FilterExamples.ProjectionFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/** Unit tests for {@link FilterExamples}. */
+@RunWith(JUnit4.class)
+public class FilterExamplesTest {
+
+  private static final TableRow row1 = new TableRow()
+      .set("month", "6").set("day", "21")
+      .set("year", "2014").set("mean_temp", "85.3")
+      .set("tornado", true);
+  private static final TableRow row2 = new TableRow()
+      .set("month", "7").set("day", "20")
+      .set("year", "2014").set("mean_temp", "75.4")
+      .set("tornado", false);
+  private static final TableRow row3 = new TableRow()
+      .set("month", "6").set("day", "18")
+      .set("year", "2014").set("mean_temp", "45.3")
+      .set("tornado", true);
+  static final TableRow[] ROWS_ARRAY = new TableRow[] {
+    row1, row2, row3
+  };
+  static final List<TableRow> ROWS = Arrays.asList(ROWS_ARRAY);
+
+  private static final TableRow outRow1 = new TableRow()
+      .set("year", 2014).set("month", 6)
+      .set("day", 21).set("mean_temp", 85.3);
+  private static final TableRow outRow2 = new TableRow()
+      .set("year", 2014).set("month", 7)
+      .set("day", 20).set("mean_temp", 75.4);
+  private static final TableRow outRow3 = new TableRow()
+      .set("year", 2014).set("month", 6)
+      .set("day", 18).set("mean_temp", 45.3);
+  private static final TableRow[] PROJROWS_ARRAY = new TableRow[] {
+    outRow1, outRow2, outRow3
+  };
+
+
+  @Test
+  public void testProjectionFn() {
+    DoFnTester<TableRow, TableRow> projectionFn =
+        DoFnTester.of(new ProjectionFn());
+    List<TableRow> results = projectionFn.processBatch(ROWS_ARRAY);
+    Assert.assertThat(results, CoreMatchers.hasItem(outRow1));
+    Assert.assertThat(results, CoreMatchers.hasItem(outRow2));
+    Assert.assertThat(results, CoreMatchers.hasItem(outRow3));
+  }
+
+  @Test
+  public void testFilterSingleMonthDataFn() {
+    DoFnTester<TableRow, TableRow> filterSingleMonthDataFn =
+        DoFnTester.of(new FilterSingleMonthDataFn(7));
+    List<TableRow> results = filterSingleMonthDataFn.processBatch(PROJROWS_ARRAY);
+    Assert.assertThat(results, CoreMatchers.hasItem(outRow2));
+  }
+}

From 8bdee5c6d981ba31f362b2cda53549afbee8d1f5 Mon Sep 17 00:00:00 2001
From: amyu <amyu@google.com>
Date: Wed, 7 Jan 2015 16:02:18 -0800
Subject: [PATCH 0064/1541] Addition of CombinePerKeyExamples. This example
 reads public 'Shakespeare' data, and for each word in the dataset that is
 over a given length, generates a string containing the list of play names in
 which that word appears. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=83471835

---
 .../examples/CombinePerKeyExamples.java       | 218 ++++++++++++++++++
 .../examples/CombinePerKeyExamplesTest.java   |  90 ++++++++
 2 files changed, 308 insertions(+)
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java
 create mode 100644 examples/src/test/java/com/google/cloud/dataflow/examples/CombinePerKeyExamplesTest.java

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java
new file mode 100644
index 0000000000000..14df95e27408b
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java
@@ -0,0 +1,218 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * An example that reads the public 'Shakespeare' data, and for each word in
+ * the dataset that is over a given length, generates a string containing the
+ * list of play names in which that word appears, and saves this information
+ * to a bigquery table.
+ *
+ * <p>Concepts: the Combine.perKey transform, which lets you combine the values in a
+ * key-grouped Collection, and how to use an Aggregator to track information in the
+ * Monitoring UI.
+ *
+ * <p> Note: Before running this example, you must create a BigQuery dataset to contain your output
+ * table.
+ *
+ * <p> To execute this pipeline locally, specify general pipeline configuration:
+ *   --project=<PROJECT ID>
+ * and the BigQuery table for the output:
+ *   --output=<project_id>:<dataset_id>.<table_id>
+ *
+ * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ *   --project=<PROJECT ID>
+ *   --stagingLocation=gs://<STAGING DIRECTORY>
+ *   --runner=BlockingDataflowPipelineRunner
+ * and the BigQuery table for the output:
+ *   --output=<project_id>:<dataset_id>.<table_id>
+ *
+ * <p> The BigQuery input table defaults to publicdata:samples.shakespeare and can
+ * be overridden with --input.
+ */
+public class CombinePerKeyExamples {
+  // Use the shakespeare public BigQuery sample
+  private static final String SHAKESPEARE_TABLE =
+      "publicdata:samples.shakespeare";
+  // We'll track words >= this word length across all plays in the table.
+  private static final int MIN_WORD_LENGTH = 9;
+
+  /**
+   * Examines each row in the input table. If the word is >= MIN_WORD_LENGTH,
+   * outputs word, play_name.
+   */
+  static class ExtractLargeWordsFn extends DoFn<TableRow, KV<String, String>> {
+
+    private Aggregator<Long> smallerWords;
+
+    @Override
+    public void startBundle(Context c) {
+      smallerWords = c.createAggregator("smallerWords", new Sum.SumLongFn());
+    }
+
+    @Override
+    public void processElement(ProcessContext c){
+      TableRow row = c.element();
+      String playName = (String) row.get("corpus");
+      String word = (String) row.get("word");
+      if (word.length() >= MIN_WORD_LENGTH) {
+        c.output(KV.of(word, playName));
+      } else {
+        // Track how many smaller words we're not including. This information will be
+        // visible in the Monitoring UI.
+        smallerWords.addValue(1L);
+      }
+    }
+  }
+
+
+  /**
+   * Prepares the data for writing to BigQuery by building a TableRow object
+   * containing a word with a string listing the plays in which it appeared.
+   */
+  static class FormatShakespeareOutputFn extends DoFn<KV<String, String>, TableRow> {
+    @Override
+    public void processElement(ProcessContext c) {
+      TableRow row = new TableRow()
+          .set("word", c.element().getKey())
+          .set("all_plays", c.element().getValue());
+      c.output(row);
+    }
+  }
+
+  /**
+   * Reads the public 'Shakespeare' data, and for each word in the dataset
+   * over a given length, generates a string containing the list of play names
+   * in which that word appears. It does this via the Combine.perKey
+   * transform, with the ConcatWords combine function.
+   *
+   * <p> Combine.perKey is similar to a GroupByKey followed by a ParDo, but
+   * has more restricted semantics that allow it to be executed more
+   * efficiently. These records are then formatted as BQ table rows.
+   */
+  static class PlaysForWord
+      extends PTransform<PCollection<TableRow>, PCollection<TableRow>> {
+    @Override
+    public PCollection<TableRow> apply(PCollection<TableRow> rows) {
+
+      // row... => <word, play_name> ...
+      PCollection<KV<String, String>> words = rows.apply(
+          ParDo.of(new ExtractLargeWordsFn()));
+
+      // word, play_name => word, all_plays ...
+      PCollection<KV<String, String>> wordAllPlays =
+          words.apply(Combine.<String, String>perKey(
+              new ConcatWords()));
+
+      // <word, all_plays>... => row...
+      PCollection<TableRow> results = wordAllPlays.apply(
+          ParDo.of(new FormatShakespeareOutputFn()));
+
+      return results;
+    }
+  }
+
+  /**
+   * A 'combine function' used with the Combine.perKey transform. Builds a
+   * comma-separated string of all input items.  So, it will build a string
+   * containing all the different Shakespeare plays in which the given input
+   * word has appeared.
+   */
+  public static class ConcatWords implements SerializableFunction<Iterable<String>, String> {
+    @Override
+    public String apply(Iterable<String> input) {
+      StringBuilder all = new StringBuilder();
+      for (String item : input) {
+        if (!item.isEmpty()) {
+          if (all.length() == 0) {
+            all.append(item);
+          } else {
+            all.append(",");
+            all.append(item);
+          }
+        }
+      }
+      return all.toString();
+    }
+  }
+
+  /**
+   * Options supported by {@link CombinePerKeyExamples}.
+   * <p>
+   * Inherits standard configuration options.
+   */
+  private static interface Options extends PipelineOptions {
+    @Description("Table to read from, specified as "
+        + "<project_id>:<dataset_id>.<table_id>")
+    @Default.String(SHAKESPEARE_TABLE)
+    String getInput();
+    void setInput(String value);
+
+    @Description("Table to write to, specified as "
+        + "<project_id>:<dataset_id>.<table_id>. "
+        + "The dataset_id must already exist")
+    @Validation.Required
+    String getOutput();
+    void setOutput(String value);
+  }
+
+  public static void main(String[] args)
+      throws Exception {
+
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    Pipeline p = Pipeline.create(options);
+
+    // Build the table schema for the output table.
+    List<TableFieldSchema> fields = new ArrayList<>();
+    fields.add(new TableFieldSchema().setName("word").setType("STRING"));
+    fields.add(new TableFieldSchema().setName("all_plays").setType("STRING"));
+    TableSchema schema = new TableSchema().setFields(fields);
+
+    p.apply(BigQueryIO.Read.from(options.getInput()))
+     .apply(new PlaysForWord())
+     .apply(BigQueryIO.Write
+        .to(options.getOutput())
+        .withSchema(schema)
+        .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
+        .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
+
+    p.run();
+  }
+}
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/CombinePerKeyExamplesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/CombinePerKeyExamplesTest.java
new file mode 100644
index 0000000000000..d30432afaac8f
--- /dev/null
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/CombinePerKeyExamplesTest.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.examples.CombinePerKeyExamples.ExtractLargeWordsFn;
+import com.google.cloud.dataflow.examples.CombinePerKeyExamples.FormatShakespeareOutputFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.List;
+
+/** Unit tests for {@link CombinePerKeyExamples}. */
+@RunWith(JUnit4.class)
+public class CombinePerKeyExamplesTest {
+
+  private static final TableRow row1 = new TableRow()
+      .set("corpus", "king_lear").set("word", "snuffleupaguses");
+  private static final TableRow row2 = new TableRow()
+      .set("corpus", "macbeth").set("word", "antidisestablishmentarianism");
+  private static final TableRow row3 = new TableRow()
+      .set("corpus", "king_lear").set("word", "antidisestablishmentarianism");
+  private static final TableRow row4 = new TableRow()
+      .set("corpus", "macbeth").set("word", "bob");
+  private static final TableRow row5 = new TableRow()
+      .set("corpus", "king_lear").set("word", "hi");
+
+  static final TableRow[] ROWS_ARRAY = new TableRow[] {
+    row1, row2, row3, row4, row5
+  };
+
+  private static final KV<String, String> tuple1 = KV.of("snuffleupaguses", "king_lear");
+  private static final KV<String, String> tuple2 = KV.of("antidisestablishmentarianism", "macbeth");
+  private static final KV<String, String> tuple3 = KV.of("antidisestablishmentarianism",
+      "king_lear");
+
+  private static final KV<String, String> combinedTuple1 = KV.of("antidisestablishmentarianism",
+      "king_lear,macbeth");
+  private static final KV<String, String> combinedTuple2 = KV.of("snuffleupaguses", "king_lear");
+
+  static final KV[] COMBINED_TUPLES_ARRAY = new KV[] {
+    combinedTuple1, combinedTuple2
+  };
+
+  private static final TableRow resultRow1 = new TableRow()
+      .set("word", "snuffleupaguses").set("all_plays", "king_lear");
+  private static final TableRow resultRow2 = new TableRow()
+      .set("word", "antidisestablishmentarianism")
+      .set("all_plays", "king_lear,macbeth");
+
+  @Test
+  public void testExtractLargeWordsFn() {
+    DoFnTester<TableRow, KV<String, String>> extractLargeWordsFn =
+        DoFnTester.of(new ExtractLargeWordsFn());
+    List<KV<String, String>> results = extractLargeWordsFn.processBatch(ROWS_ARRAY);
+    Assert.assertThat(results, CoreMatchers.hasItem(tuple1));
+    Assert.assertThat(results, CoreMatchers.hasItem(tuple2));
+    Assert.assertThat(results, CoreMatchers.hasItem(tuple2));
+  }
+
+  @Test
+  public void testFormatShakespeareOutputFn() {
+    DoFnTester<KV<String, String>, TableRow> formatShakespeareOutputFn =
+        DoFnTester.of(new FormatShakespeareOutputFn());
+    List<TableRow> results = formatShakespeareOutputFn.processBatch(COMBINED_TUPLES_ARRAY);
+    Assert.assertThat(results, CoreMatchers.hasItem(resultRow1));
+    Assert.assertThat(results, CoreMatchers.hasItem(resultRow2));
+  }
+
+}

From 8955e2576fd61827aa5ad71f19dc9f45ddb6e160 Mon Sep 17 00:00:00 2001
From: amyu <amyu@google.com>
Date: Wed, 7 Jan 2015 17:12:46 -0800
Subject: [PATCH 0065/1541] Addition of an example that shows how to do a join
 on two collections. It uses a sample of the GDELT 'world event' data, joining
 the event 'action' country code against a table that maps country codes to
 country names. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=83477930

---
 .../cloud/dataflow/examples/JoinExamples.java | 178 ++++++++++++++++++
 .../dataflow/examples/JoinExamplesTest.java   | 113 +++++++++++
 2 files changed, 291 insertions(+)
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
 create mode 100644 examples/src/test/java/com/google/cloud/dataflow/examples/JoinExamplesTest.java

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
new file mode 100644
index 0000000000000..5f910d6dfbcf4
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.join.CoGbkResult;
+import com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.join.KeyedPCollectionTuple;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+/**
+ * This example shows how to do a join on two collections.
+ * It uses a sample of the GDELT 'world event' data (http://goo.gl/OB6oin), joining the event
+ * 'action' country code against a table that maps country codes to country names.
+ *
+ * <p> Concepts: Join operation; multiple input sources.
+ *
+ * <p> To execute this pipeline locally, specify general pipeline configuration:
+ *   --project=<PROJECT ID>
+ * and a local output file or output prefix on GCS:
+ *   --output=[<LOCAL FILE> | gs://<OUTPUT PREFIX>]
+ *
+ * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ *   --project=<PROJECT ID>
+ *   --stagingLocation=gs://<STAGING DIRECTORY>
+ *   --runner=BlockingDataflowPipelineRunner
+ * and an output prefix on GCS:
+ *   --output=gs://<OUTPUT PREFIX>
+ */
+public class JoinExamples {
+
+  // A 1000-row sample of the GDELT data here: gdelt-bq:full.events.
+  private static final String GDELT_EVENTS_TABLE =
+      "clouddataflow-readonly:samples.gdelt_sample";
+  // A table that maps country codes to country names.
+  private static final String COUNTRY_CODES =
+      "gdelt-bq:full.crosswalk_geocountrycodetohuman";
+  private static final TupleTag<String> eventInfoTag = new TupleTag<String>();
+  private static final TupleTag<String> countryInfoTag = new TupleTag<String>();
+
+  /**
+   * Process the CoGbkResult elements generated by the CoGroupByKey transform.
+   */
+  static class ProcessJoinFn extends DoFn<KV<String, CoGbkResult>, KV<String, String>> {
+
+    @Override
+    public void processElement(ProcessContext c) {
+      KV<String, CoGbkResult> e = c.element();
+      CoGbkResult val = e.getValue();
+      String countryCode = e.getKey();
+      String countryName = "none";
+      countryName = e.getValue().getOnly(countryInfoTag);
+      for (String eventInfo : c.element().getValue().getAll(eventInfoTag)) {
+        // Generate a string that combines information from both collection values
+        c.output(KV.of(countryCode, "Country name: " + countryName + ", Event info: " + eventInfo));
+     }
+    }
+  }
+
+  /**
+   * Join two collections, using country code as the key.
+   */
+  static PCollection<String> joinEvents(PCollection<TableRow> eventsTable,
+      PCollection<TableRow> countryCodes) throws Exception {
+
+    // transform both input collections to tuple collections, where the keys are country
+    // codes in both cases.
+    PCollection<KV<String, String>> eventInfo = eventsTable.apply(
+        ParDo.of(new ExtractEventDataFn()));
+    PCollection<KV<String, String>> countryInfo = countryCodes.apply(
+        ParDo.of(new ExtractCountryInfoFn()));
+
+    // country code 'key' -> CGBKR (<event info>, <country name>)
+    PCollection<KV<String, CoGbkResult>> kvpCollection = KeyedPCollectionTuple
+        .of(eventInfoTag, eventInfo)
+        .and(countryInfoTag, countryInfo)
+        .apply(CoGroupByKey.<String>create());
+
+    // country code 'key' -> string of <event info>, <country name>
+    PCollection<KV<String, String>> finalResultCollection =
+      kvpCollection.apply(ParDo.of(new ProcessJoinFn()));
+
+    // write to GCS
+    PCollection<String> formattedResults = finalResultCollection
+        .apply(ParDo.of(new DoFn<KV<String, String>, String>() {
+          @Override
+          public void processElement(ProcessContext c) {
+            String outputstring = "Country code: " + c.element().getKey()
+                + ", " + c.element().getValue();
+            c.output(outputstring);
+          }
+        }));
+    return formattedResults;
+  }
+
+  /**
+   * Examines each row (event) in the input table. Output a KV with the key the country
+   * code of the event, and the value a string encoding event information.
+   */
+  static class ExtractEventDataFn extends DoFn<TableRow, KV<String, String>> {
+    @Override
+    public void processElement(ProcessContext c) {
+      TableRow row = c.element();
+      String countryCode = (String) row.get("ActionGeo_CountryCode");
+      String sqlDate = (String) row.get("SQLDATE");
+      String actor1Name = (String) row.get("Actor1Name");
+      String sourceUrl = (String) row.get("SOURCEURL");
+      String eventInfo = "Date: " + sqlDate + ", Actor1: " + actor1Name + ", url: " + sourceUrl;
+      c.output(KV.of(countryCode, eventInfo));
+    }
+  }
+
+
+  /**
+   * Examines each row (country info) in the input table. Output a KV with the key the country
+   * code, and the value the country name.
+   */
+  static class ExtractCountryInfoFn extends DoFn<TableRow, KV<String, String>> {
+    @Override
+    public void processElement(ProcessContext c) {
+      TableRow row = c.element();
+      String countryCode = (String) row.get("FIPSCC");
+      String countryName = (String) row.get("HumanName");
+      c.output(KV.of(countryCode, countryName));
+    }
+  }
+
+
+  /**
+   * Options supported by {@link JoinExamples}.
+   * <p>
+   * Inherits standard configuration options.
+   */
+  private static interface Options extends PipelineOptions {
+    @Description("Path of the file to write to")
+    @Validation.Required
+    String getOutput();
+    void setOutput(String value);
+  }
+
+  public static void main(String[] args) throws Exception {
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    Pipeline p = Pipeline.create(options);
+    // the following two 'applys' create multiple inputs to our pipeline, one for each
+    // of our two input sources.
+    PCollection<TableRow> eventsTable = p.apply(BigQueryIO.Read.from(GDELT_EVENTS_TABLE));
+    PCollection<TableRow> countryCodes = p.apply(BigQueryIO.Read.from(COUNTRY_CODES));
+    PCollection<String> formattedResults = joinEvents(eventsTable, countryCodes);
+    formattedResults.apply(TextIO.Write.to(options.getOutput()));
+    p.run();
+  }
+
+}
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/JoinExamplesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/JoinExamplesTest.java
new file mode 100644
index 0000000000000..0d51495d57496
--- /dev/null
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/JoinExamplesTest.java
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.examples.JoinExamples.ExtractCountryInfoFn;
+import com.google.cloud.dataflow.examples.JoinExamples.ExtractEventDataFn;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/** Unit tests for {@link JoinExamples}. */
+@RunWith(JUnit4.class)
+public class JoinExamplesTest {
+
+  private static final TableRow row1 = new TableRow()
+        .set("ActionGeo_CountryCode", "VM").set("SQLDATE", "20141212")
+        .set("Actor1Name", "BANGKOK").set("SOURCEURL", "http://cnn.com");
+  private static final TableRow row2 = new TableRow()
+        .set("ActionGeo_CountryCode", "VM").set("SQLDATE", "20141212")
+        .set("Actor1Name", "LAOS").set("SOURCEURL", "http://www.chicagotribune.com");
+  private static final TableRow row3 = new TableRow()
+        .set("ActionGeo_CountryCode", "BE").set("SQLDATE", "20141213")
+        .set("Actor1Name", "AFGHANISTAN").set("SOURCEURL", "http://cnn.com");
+  static final TableRow[] EVENTS = new TableRow[] {
+    row1, row2, row3
+  };
+  static final List<TableRow> EVENT_ARRAY = Arrays.asList(EVENTS);
+
+  private static final KV<String, String> kv1 = KV.of("VM",
+      "Date: 20141212, Actor1: LAOS, url: http://www.chicagotribune.com");
+  private static final KV<String, String> kv2 = KV.of("BE",
+      "Date: 20141213, Actor1: AFGHANISTAN, url: http://cnn.com");
+  private static final KV<String, String> kv3 = KV.of("BE", "Belgium");
+  private static final KV<String, String> kv4 = KV.of("VM", "Vietnam");
+
+  private static final TableRow cc1 = new TableRow()
+        .set("FIPSCC", "VM").set("HumanName", "Vietnam");
+  private static final TableRow cc2 = new TableRow()
+        .set("FIPSCC", "BE").set("HumanName", "Belgium");
+  static final TableRow[] CCS = new TableRow[] {
+    cc1, cc2
+  };
+  static final List<TableRow> CC_ARRAY = Arrays.asList(CCS);
+
+  static final String[] JOINED_EVENTS = new String[] {
+      "Country code: VM, Country name: Vietnam, Event info: Date: 20141212, Actor1: LAOS, "
+          + "url: http://www.chicagotribune.com",
+      "Country code: VM, Country name: Vietnam, Event info: Date: 20141212, Actor1: BANGKOK, "
+          + "url: http://cnn.com",
+      "Country code: BE, Country name: Belgium, Event info: Date: 20141213, Actor1: AFGHANISTAN, "
+          + "url: http://cnn.com"
+    };
+
+  @Test
+  public void testExtractEventDataFn() {
+    DoFnTester<TableRow, KV<String, String>> extractEventDataFn =
+        DoFnTester.of(new ExtractEventDataFn());
+    List<KV<String, String>> results = extractEventDataFn.processBatch(EVENTS);
+    Assert.assertThat(results, CoreMatchers.hasItem(kv1));
+    Assert.assertThat(results, CoreMatchers.hasItem(kv2));
+  }
+
+  @Test
+  public void testExtractCountryInfoFn() {
+    DoFnTester<TableRow, KV<String, String>> extractCountryInfoFn =
+        DoFnTester.of(new ExtractCountryInfoFn());
+    List<KV<String, String>> results = extractCountryInfoFn.processBatch(CCS);
+    Assert.assertThat(results, CoreMatchers.hasItem(kv3));
+    Assert.assertThat(results, CoreMatchers.hasItem(kv4));
+  }
+
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testJoin() throws java.lang.Exception {
+    Pipeline p = TestPipeline.create();
+    PCollection<TableRow> input1 = p.apply(Create.of(EVENT_ARRAY));
+    PCollection<TableRow> input2 = p.apply(Create.of(CC_ARRAY));
+
+    PCollection<String> output = JoinExamples.joinEvents(input1, input2);
+    DataflowAssert.that(output).containsInAnyOrder(JOINED_EVENTS);
+    p.run();
+  }
+}

From 65a6a10e75a2c262555c02517bbacf7fe92b2f3e Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Wed, 7 Jan 2015 23:36:11 -0800
Subject: [PATCH 0066/1541] Dataflow on Windows: fix local file matching by
 replacing backslash with a double-backslash.

This fixes #2 on GitHub.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=83495198
---
 .../dataflow/sdk/util/FileIOChannelFactory.java     | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
index 1bb7566a526b5..5b27e277f338c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
@@ -34,6 +34,7 @@
 import java.util.Collection;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.regex.Matcher;
 
 /**
  * Implements IOChannelFactory for local files.
@@ -52,8 +53,18 @@ public Collection<String> match(String spec) throws IOException {
       throw new IOException("Unable to find parent directory of " + spec);
     }
 
+    // Method getAbsolutePath() on Windows platform may return something like
+    // "c:\temp\file.txt". FileSystem.getPathMatcher() call below will treat
+    // '\' (backslash) as an escape character, instead of a directory
+    // separator. Replacing backslash with double-backslash solves the problem.
+    // We perform the replacement on all platforms, even those that allow
+    // backslash as a part of the filename, because Globs.toRegexPattern will
+    // eat one backslash.
+    String pathToMatch = file.getAbsolutePath().replaceAll(Matcher.quoteReplacement("\\"),
+                                                           Matcher.quoteReplacement("\\\\"));
+
     final PathMatcher matcher =
-        FileSystems.getDefault().getPathMatcher("glob:" + file.getAbsolutePath());
+        FileSystems.getDefault().getPathMatcher("glob:" + pathToMatch);
     File[] files = parent.listFiles(new FileFilter() {
       @Override
       public boolean accept(File pathname) {

From ec1d2d8a52d53885fd1b92ceeb55f51f0a0c347c Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Thu, 8 Jan 2015 17:04:55 -0800
Subject: [PATCH 0067/1541] Dataflow on Windows: remove usage of "/tmp/", since
 that is a platform-specific directory.

* In DataflowWorkerLoggingInitializer, we replace it with System.getProperty("java.io.tmpdir") making the environmentally-specified temporary directory the default logging location. This part may not be completely safe, in the unlikely case that there is some code depends on this being /tmp/, environmentally-specified temporary directory is something else, and the property isn't user-specified.

* In DataflowPipelineRunnerTest, we use actual files created by TemporaryFolder, instead of mock file names. The actual files aren't needed, but that allows us to easily construct valid file names.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=83565800
---
 .../DataflowWorkerLoggingInitializer.java       |  4 +++-
 .../sdk/runners/DataflowPipelineRunnerTest.java | 17 ++++++++++-------
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
index 6be47019e3c86..e89d6d73b94c2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
@@ -18,6 +18,7 @@
 
 import com.google.common.collect.ImmutableBiMap;
 
+import java.io.File;
 import java.io.IOException;
 import java.util.logging.ConsoleHandler;
 import java.util.logging.FileHandler;
@@ -37,7 +38,8 @@
  * within the systems temporary directory.
  */
 public class DataflowWorkerLoggingInitializer {
-  private static final String DEFAULT_LOGGING_LOCATION = "/tmp/dataflow-worker.log";
+  private static final String DEFAULT_LOGGING_LOCATION =
+      new File(System.getProperty("java.io.tmpdir"), "dataflow-worker.log").getPath();
   private static final String ROOT_LOGGER_NAME = "";
   public static final String DATAFLOW_WORKER_LOGGING_LEVEL = "dataflow.worker.logging.level";
   public static final String DATAFLOW_WORKER_LOGGING_LOCATION = "dataflow.worker.logging.location";
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 38f2fb407c367..86a307e1594c5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -53,6 +53,7 @@
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
+import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.mockito.ArgumentCaptor;
@@ -76,8 +77,10 @@
 @RunWith(JUnit4.class)
 @SuppressWarnings("serial")
 public class DataflowPipelineRunnerTest {
-
-  @Rule public ExpectedException thrown = ExpectedException.none();
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
 
   // Asserts that the given Job has all expected fields set.
   private static void assertValidJob(Job job) {
@@ -235,14 +238,14 @@ public void runWithDefaultFilesToStage() throws Exception {
 
   @Test
   public void detectClassPathResourceWithFileResources() throws Exception {
-    String path = "/tmp/file";
-    String path2 = "/tmp/file2";
+    File file = tmpFolder.newFile("file");
+    File file2 = tmpFolder.newFile("file2");
     URLClassLoader classLoader = new URLClassLoader(new URL[]{
-        new URL("file://" + path),
-        new URL("file://" + path2)
+        file.toURI().toURL(),
+        file2.toURI().toURL()
     });
 
-    assertEquals(ImmutableList.of(path, path2),
+    assertEquals(ImmutableList.of(file.getAbsolutePath(), file2.getAbsolutePath()),
         DataflowPipelineRunner.detectClassPathResourcesToStage(classLoader));
   }
 

From a5a2d9e07003f7e5859cb6dec4cc500f2381f2c8 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Thu, 8 Jan 2015 17:06:50 -0800
Subject: [PATCH 0068/1541] Dataflow on Windows: fix testReadEmptyFile in
 TextReaderTest that depends on "/dev/null". Replacing "/dev/null" with an
 empty temporary file.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=83565917
---
 .../cloud/dataflow/sdk/runners/worker/TextReaderTest.java       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
index 4ebd671361eda..5d2934c4535fc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
@@ -78,7 +78,7 @@ private File initTestFile() throws IOException {
   @Test
   public void testReadEmptyFile() throws Exception {
     TextReader<String> textReader =
-        new TextReader<>("/dev/null", true, null, null, StringUtf8Coder.of());
+        new TextReader<>(tmpFolder.newFile().getPath(), true, null, null, StringUtf8Coder.of());
     try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
       Assert.assertFalse(iterator.hasNext());
     }

From 19c14c6522d1d48fbe8f4d28f2c054a4b0fb3139 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Thu, 8 Jan 2015 17:22:50 -0800
Subject: [PATCH 0069/1541] Dataflow on Windows: fix issues with hardcoded
 newline characters. We cannot manually construct strings using "\n" and
 expect equality, since various APIs will return "\r\n" on various platforms
 instead.

The tests, however, need to expect a combination of '\n' and platform-specific newlines because the entire codebase is not platform-agnostic.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=83566994
---
 .../DataflowWorkerLoggingFormatter.java       |  2 +-
 .../DataflowWorkerLoggingFormatterTest.java   | 28 ++++++++++---------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
index a8f54b48668fd..911b3fe2e5e68 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
@@ -54,7 +54,7 @@ public String format(LogRecord record) {
         + " " + MoreObjects.firstNonNull(MDC.get(MDC_DATAFLOW_WORK_ID), "unknown")
         + " " + record.getThreadID()
         + " " + record.getLoggerName()
-        + " " + record.getMessage() + "\n"
+        + " " + record.getMessage() + System.lineSeparator()
         + (exception != null ? exception : "");
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java
index 065092aeaab14..4fd2b5f4688d4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java
@@ -40,7 +40,7 @@ public class DataflowWorkerLoggingFormatterTest {
   public void testWithUnsetValuesInMDC() {
     assertEquals(
         "1970-01-01T00:00:00.001Z INFO unknown unknown unknown 2 LoggerName "
-        + "test.message\n",
+        + "test.message" + System.lineSeparator(),
         new DataflowWorkerLoggingFormatter().format(
             createLogRecord("test.message", null)));
   }
@@ -53,7 +53,7 @@ public void testWithMessage() {
         "dataflow.workId", "testWorkId"));
     assertEquals(
         "1970-01-01T00:00:00.001Z INFO testJobId testWorkerId testWorkId 2 LoggerName "
-        + "test.message\n",
+        + "test.message" + System.lineSeparator(),
         new DataflowWorkerLoggingFormatter().format(
             createLogRecord("test.message", null)));
   }
@@ -66,11 +66,11 @@ public void testWithMessageAndException() {
         "dataflow.workId", "testWorkId"));
     assertEquals(
         "1970-01-01T00:00:00.001Z INFO testJobId testWorkerId testWorkId 2 LoggerName "
-        + "test.message\n"
-        + "java.lang.Throwable: exception.test.message\n"
-        + "\tat declaringClass1.method1(file1.java:1)\n"
-        + "\tat declaringClass2.method2(file2.java:1)\n"
-        + "\tat declaringClass3.method3(file3.java:1)\n",
+        + "test.message" + System.lineSeparator()
+        + "java.lang.Throwable: exception.test.message" + System.lineSeparator()
+        + "\tat declaringClass1.method1(file1.java:1)" + System.lineSeparator()
+        + "\tat declaringClass2.method2(file2.java:1)" + System.lineSeparator()
+        + "\tat declaringClass3.method3(file3.java:1)" + System.lineSeparator(),
         new DataflowWorkerLoggingFormatter().format(
             createLogRecord("test.message", createThrowable())));
   }
@@ -82,11 +82,12 @@ public void testWithException() {
         "dataflow.workerId", "testWorkerId",
         "dataflow.workId", "testWorkId"));
     assertEquals(
-        "1970-01-01T00:00:00.001Z INFO testJobId testWorkerId testWorkId 2 LoggerName null\n"
-        + "java.lang.Throwable: exception.test.message\n"
-        + "\tat declaringClass1.method1(file1.java:1)\n"
-        + "\tat declaringClass2.method2(file2.java:1)\n"
-        + "\tat declaringClass3.method3(file3.java:1)\n",
+        "1970-01-01T00:00:00.001Z INFO testJobId testWorkerId testWorkId 2 LoggerName null"
+        + System.lineSeparator()
+        + "java.lang.Throwable: exception.test.message" + System.lineSeparator()
+        + "\tat declaringClass1.method1(file1.java:1)" + System.lineSeparator()
+        + "\tat declaringClass2.method2(file2.java:1)" + System.lineSeparator()
+        + "\tat declaringClass3.method3(file3.java:1)" + System.lineSeparator(),
         new DataflowWorkerLoggingFormatter().format(
             createLogRecord(null, createThrowable())));
   }
@@ -98,7 +99,8 @@ public void testWithoutExceptionOrMessage() {
         "dataflow.workerId", "testWorkerId",
         "dataflow.workId", "testWorkId"));
     assertEquals(
-        "1970-01-01T00:00:00.001Z INFO testJobId testWorkerId testWorkId 2 LoggerName null\n",
+        "1970-01-01T00:00:00.001Z INFO testJobId testWorkerId testWorkId 2 LoggerName null"
+        + System.lineSeparator(),
         new DataflowWorkerLoggingFormatter().format(
             createLogRecord(null, null)));
   }

From 7598e29c5055d97b3798d17414fc2673f1592843 Mon Sep 17 00:00:00 2001
From: amancuso <amancuso@google.com>
Date: Thu, 8 Jan 2015 22:22:40 -0800
Subject: [PATCH 0070/1541] Update Dataflow GitHub examples README.MD file to
 clarify local and cloud mvn options. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=83580687

---
 README.md | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 18f78fc9f99d6..eea690f0a3f44 100644
--- a/README.md
+++ b/README.md
@@ -81,25 +81,30 @@ You can speed up the build and install process by using the following options:
 
         mvn -T 4 install
 
-After building and installing, the following command will execute the WordCount
-example using the DirectPipelineRunner on your local machine:
+After building and installing, you can execute the Wordcount and other example
+pipelines locally or in the cloud using `mvn` with command-line options.
+
+To execute the Wordcount pipeline locally (using the default
+`DirectPipelineRunner`) and write output to a local or
+Google Cloud Storage (GCS) location, use the following command-line syntax:
 
     mvn exec:java -pl examples \
     -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
-    -Dexec.args="--input=<INPUT FILE PATTERN> --output=<OUTPUT FILE>"
+    -Dexec.args=" ---output=[<LOCAL FILE> | gs://<OUTPUT PREFIX>]
 
 If you have been whitelisted for Alpha access to the Dataflow Service and
 followed the [developer setup](https://cloud.google.com/dataflow/java-sdk/getting-started#DeveloperSetup)
-steps, you can use the BlockingDataflowPipelineRunner to run the same program in
-the Google Cloud Platform (GCP):
+steps, you can use the BlockingDataflowPipelineRunner to execute the Wordcount
+program in the Google Cloud Platform (GCP). In this case, you specify
+your project name, pipeline runner, the GCS staging location (staging
+location should be entered in the form of `gs://bucket/staging-directory`),
+and the GCS output (in the form of `gs://bucket/filename_prefix`).
 
     mvn exec:java -pl examples \
     -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
-    -Dexec.args="--project=<YOUR GCP PROJECT NAME> --stagingLocation=<YOUR GCS LOCATION> --runner=BlockingDataflowPipelineRunner"
+    -Dexec.args="--project=<YOUR GCP PROJECT NAME> --runner=BlockingDataflowPipelineRunner \
+    --stagingLocation=<GCS STAGING LOCATION> --output=<GCS OUTPUT PREFIX>"
 
-Google Cloud Storage (GCS) location should be entered in the form of
-gs://bucket/path/to/staging/directory. Google Cloud Platform (GCP) project
-refers to its name (not number), which has been whitelisted for Cloud Dataflow.
 Refer [here](https://cloud.google.com/) for instructions to get started with
 Google Cloud Platform.
 

From adadb61037cf5a0fa1d0cc630334a417c25573b2 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Fri, 9 Jan 2015 11:50:40 -0800
Subject: [PATCH 0071/1541] Dataflow on Windows: fix an issue where the ZIP
 archive would contain file names with a '\' (backslash) on Windows. Using '/'
 (slash) is compatible with most platforms.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=83623330
---
 .../cloud/dataflow/sdk/util/PackageUtil.java  | 84 ++++++++++++++-----
 .../dataflow/sdk/util/PackageUtilTest.java    |  9 +-
 2 files changed, 68 insertions(+), 25 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
index 46f610836f57d..5afa04a8af1b3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
@@ -16,6 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+
 import com.google.api.client.util.BackOff;
 import com.google.api.client.util.BackOffUtils;
 import com.google.api.client.util.Sleeper;
@@ -36,6 +39,7 @@
 
 import java.io.File;
 import java.io.IOException;
+import java.io.OutputStream;
 import java.nio.channels.Channels;
 import java.nio.channels.WritableByteChannel;
 import java.nio.charset.StandardCharsets;
@@ -253,46 +257,84 @@ private static String computeContentHash(File classpathElement) throws IOExcepti
   private static void copyContent(String classpathElement, WritableByteChannel outputChannel)
       throws IOException {
     final File classpathElementFile = new File(classpathElement);
-    if (!classpathElementFile.isDirectory()) {
+    if (classpathElementFile.isDirectory()) {
+      zipDirectory(classpathElementFile, Channels.newOutputStream(outputChannel));
+    } else {
       Files.asByteSource(classpathElementFile).copyTo(Channels.newOutputStream(outputChannel));
-      return;
     }
+  }
 
-    ZipOutputStream zos = new ZipOutputStream(Channels.newOutputStream(outputChannel));
-    zipDirectoryRecursive(classpathElementFile, classpathElementFile, zos);
+  /**
+   * Zips an entire directory specified by the path.
+   *
+   * @param sourceDirectory the directory to read from. This directory and all
+   *     subdirectories will be added to the zip-file. The path within the zip
+   *     file is relative to the directory given as parameter, not absolute.
+   * @param outputStream the stream to write the zip-file to. This method does not close
+   *     outputStream.
+   * @throws IOException the zipping failed, e.g. because the input was not
+   *     readable.
+   */
+  private static void zipDirectory(
+      File sourceDirectory,
+      OutputStream outputStream) throws IOException {
+    checkNotNull(sourceDirectory);
+    checkNotNull(outputStream);
+    checkArgument(
+        sourceDirectory.isDirectory(),
+        "%s is not a valid directory",
+        sourceDirectory.getAbsolutePath());
+    ZipOutputStream zos = new ZipOutputStream(outputStream);
+    for (File file : sourceDirectory.listFiles()) {
+      zipDirectoryInternal(file, "", zos);
+    }
     zos.finish();
   }
 
   /**
-   * Private helper function for zipping files. This one goes recursively through the input
-   * directory and all of its subdirectories and adds the single zip entries.
+   * Private helper function for zipping files. This one goes recursively
+   * through the input directory and all of its subdirectories and adds the
+   * single zip entries.
    *
-   * @param file the file or directory to be added to the zip file.
-   * @param root each file uses the root directory to generate its relative path within the zip.
-   * @param zos the zipstream to write to.
-   * @throws IOException the zipping failed, e.g. because the output was not writable.
+   * @param inputFile the file or directory to be added to the zip file
+   * @param directoryName the string-representation of the parent directory
+   *     name. Might be an empty name, or a name containing multiple directory
+   *     names separated by "/". The directory name must be a valid name
+   *     according to the file system limitations.
+   * @param zos the zipstream to write to
+   * @throws IOException the zipping failed, e.g. because the output was not
+   *     writeable.
    */
-  private static void zipDirectoryRecursive(File file, File root, ZipOutputStream zos)
-      throws IOException {
-    final String entryName = relativize(file, root);
-    if (file.isDirectory()) {
-      // We are hitting a directory. Start the recursion.
-      // Add the empty entry if it is a subdirectory and the subdirectory has no children.
-      // Don't add it otherwise, as this is incompatible with certain implementations of unzip.
-      if (file.list().length == 0 && !file.equals(root)) {
+  private static void zipDirectoryInternal(
+      File inputFile,
+      String directoryName,
+      ZipOutputStream zos) throws IOException {
+    final String entryName;
+    if ("".equals(directoryName)) {
+      // no parent directories yet.
+      entryName = inputFile.getName();
+    } else {
+      entryName = directoryName + "/" + inputFile.getName();
+    }
+    if (inputFile.isDirectory()) {
+      // We are hitting a sub-directory. Start the recursion.
+      // Add the empty entry for a subdirectory if we have no children files.
+      // Don't add it if we have them, as this is incompatible with certain
+      // implementations of unzip.
+      if (inputFile.list().length == 0) {
         ZipEntry entry = new ZipEntry(entryName + "/");
         zos.putNextEntry(entry);
       } else {
         // loop through the directory content, and zip the files
-        for (File currentFile : file.listFiles()) {
-          zipDirectoryRecursive(currentFile, root, zos);
+        for (File file : inputFile.listFiles()) {
+          zipDirectoryInternal(file, entryName, zos);
         }
       }
     } else {
       // Put the next zip-entry into the zipoutputstream.
       ZipEntry entry = new ZipEntry(entryName);
       zos.putNextEntry(entry);
-      Files.asByteSource(file).copyTo(zos);
+      Files.asByteSource(inputFile).copyTo(zos);
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
index e49782f6b221b..751824b24282d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
@@ -16,10 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertThat;
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.anyString;
 import static org.mockito.Mockito.times;
@@ -35,7 +36,6 @@
 import com.google.common.io.Files;
 import com.google.common.io.LineReader;
 
-import org.hamcrest.CoreMatchers;
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
@@ -210,8 +210,9 @@ public void testPackageUploadWithDirectorySucceeds() throws Exception {
         entry = inputStream.getNextEntry()) {
       zipEntryNames.add(entry.getName());
     }
-    assertTrue(CoreMatchers.hasItems("directory/file.txt", "empty_directory/", "file.txt").matches(
-        zipEntryNames));
+
+    assertThat(zipEntryNames,
+        containsInAnyOrder("directory/file.txt", "empty_directory/", "file.txt"));
   }
 
   @Test

From 801a95b9e6fff9b167c2a16b73e30543daae0cac Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Fri, 9 Jan 2015 15:57:56 -0800
Subject: [PATCH 0072/1541] Update README.md

A few improvements, mostly formatting.
---
 README.md | 46 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/README.md b/README.md
index eea690f0a3f44..93fbfede73871 100644
--- a/README.md
+++ b/README.md
@@ -20,29 +20,29 @@ working with you to improve Cloud Dataflow.
 
 The key concepts in this programming model are:
 
-* [PCollection](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java):
+* [`PCollection`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java):
 represents a collection of data, which could be bounded or unbounded in size.
-* [PTransform](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java):
+* [`PTransform`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java):
 represents a computation that transforms input PCollections into output
 PCollections.
-* [Pipeline](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java):
+* [`Pipeline`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java):
 manages a directed acyclic graph of PTransforms and PCollections, which is ready
 for execution.
-* [PipelineRunner](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java):
+* [`PipelineRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java):
 specifies where and how the pipeline should execute.
 
-Currently there are three PipelineRunners:
+Currently there are three `PipelineRunners`:
 
-  1. The [DirectPipelineRunner](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java)
+  1. The [`DirectPipelineRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java)
 runs the pipeline on your local machine.
   2. The
-[DataflowPipelineRunner](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java)
+[`DataflowPipelineRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java)
 submits the pipeline to the Dataflow Service, where it runs using managed
 resources in the [Google Cloud Platform](http://cloud.google.com).
   3. The
-[BlockingDataflowPipelineRunner](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java)
-submits the pipeline to the Dataflow Service via the DataflowPipelineRunner and
-then prints messages about the job status until execution is complete.
+[`BlockingDataflowPipelineRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java)
+submits the pipeline to the Dataflow Service via the `DataflowPipelineRunner`
+and then prints messages about the job status until execution is complete.
 
 _The Dataflow Service is currently in the Alpha phase of development and
 access is limited to whitelisted users._
@@ -51,9 +51,9 @@ access is limited to whitelisted users._
 
 This repository consists of two modules:
 
-* [Java SDK](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk)
+* [`SDK`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk)
 module provides a set of basic Java APIs to program against.
-* [Examples](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples)
+* [`Examples`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples)
 module provides a few samples to get started. We recommend starting with the
 WordCount example.
 
@@ -69,7 +69,7 @@ You can speed up the build and install process by using the following options:
         mvn install -DskipTests
 
   2. While iterating on a specific module, use the following command to compile
-  and reinstall it. For example, to reinstall the 'examples' module, run:
+  and reinstall it. For example, to reinstall the `examples` module, run:
 
         mvn install -pl examples
 
@@ -81,22 +81,22 @@ You can speed up the build and install process by using the following options:
 
         mvn -T 4 install
 
-After building and installing, you can execute the Wordcount and other example
-pipelines locally or in the cloud using `mvn` with command-line options.
+After building and installing, you can execute the `WordCount` and other example
+pipelines locally or in the cloud using Maven with command-line options.
 
-To execute the Wordcount pipeline locally (using the default
+To execute the WordCount pipeline locally (using the default
 `DirectPipelineRunner`) and write output to a local or
 Google Cloud Storage (GCS) location, use the following command-line syntax:
 
     mvn exec:java -pl examples \
     -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
-    -Dexec.args=" ---output=[<LOCAL FILE> | gs://<OUTPUT PREFIX>]
+    -Dexec.args="--output=[<LOCAL FILE> | gs://<OUTPUT PREFIX>]
 
 If you have been whitelisted for Alpha access to the Dataflow Service and
 followed the [developer setup](https://cloud.google.com/dataflow/java-sdk/getting-started#DeveloperSetup)
-steps, you can use the BlockingDataflowPipelineRunner to execute the Wordcount
-program in the Google Cloud Platform (GCP). In this case, you specify
-your project name, pipeline runner, the GCS staging location (staging
+steps, you can use the `BlockingDataflowPipelineRunner` to execute the
+`WordCount` example in the Google Cloud Platform (GCP). In this case, you
+specify your project name, pipeline runner, the GCS staging location (staging
 location should be entered in the form of `gs://bucket/staging-directory`),
 and the GCS output (in the form of `gs://bucket/filename_prefix`).
 
@@ -108,9 +108,9 @@ and the GCS output (in the form of `gs://bucket/filename_prefix`).
 Refer [here](https://cloud.google.com/) for instructions to get started with
 Google Cloud Platform.
 
-Other examples can be run similarly by replacing the WordCount class name with
-BigQueryTornadoes, DatastoreWordCount, TfIdf, TopWikipediaSessions, etc. and
-adjusting runtime options under the Dexec.args parameter, as specified in the
+Other examples can be run similarly by replacing the `WordCount` class name with
+`BigQueryTornadoes`, `DatastoreWordCount`, `TfIdf`, `TopWikipediaSessions`, etc. and
+adjusting runtime options under the `Dexec.args` parameter, as specified in the
 example itself.
 
 ## More Information

From 8e4c7fedf82dc5c40c9464d480ae949bdf041f16 Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Mon, 12 Jan 2015 15:33:07 -0800
Subject: [PATCH 0073/1541] Revert "Generalize type signature of KV.of"

---
 .../main/java/com/google/cloud/dataflow/sdk/values/KV.java    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
index 81b6806a1125f..febb1302bd8c1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
@@ -35,8 +35,8 @@ public class KV<K, V> implements Serializable {
   private static final long serialVersionUID = 0;
 
   /** Returns a KV with the given key and value. */
-  public static <K, SK extends K, V, SV extends V> KV<K, V> of(SK key, SV value) {
-    return new KV<K, V>(key, value);
+  public static <K, V> KV<K, V> of(K key, V value) {
+    return new KV<>(key, value);
   }
 
   /** Returns the key of this KV. */

From 0cc72d62f6a9b4cf9c9f4ad7eb15c7365790fbc5 Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Wed, 21 Jan 2015 16:07:11 -0800
Subject: [PATCH 0074/1541] Enable Travis continuous integration coverage.

---
 .travis.yml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 .travis.yml

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000000000..9dc6a4497a094
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,14 @@
+language: java
+
+jdk:
+  - oraclejdk8
+  - oraclejdk7
+  - openjdk7
+  - openjdk6
+
+install:
+  - mvn install clean -U -DskipTests=true
+
+script:
+  - mvn -U verify
+

From 6842469f9f6b80d06a59b81fa8a0870845b1c05b Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Wed, 21 Jan 2015 16:24:18 -0800
Subject: [PATCH 0075/1541] Disable continuous integration coverage on
 openjdk6, as we don't support it.

---
 .travis.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 9dc6a4497a094..4922b290ff69e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,7 +4,6 @@ jdk:
   - oraclejdk8
   - oraclejdk7
   - openjdk7
-  - openjdk6
 
 install:
   - mvn install clean -U -DskipTests=true

From f255ed5fe3b07f4a41847324e7b3eb1ca841dbd1 Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Thu, 22 Jan 2015 13:31:53 -0800
Subject: [PATCH 0076/1541] Update README.md

Display the build status as reported by Travis continuous integration.
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 55629a751ca9f..3e6bb3136a548 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 provides a simple, powerful programming model for building both batch
 and streaming parallel data processing pipelines.
 
-## Status
+## Status [![Build Status](https://travis-ci.org/GoogleCloudPlatform/DataflowJavaSDK.svg?branch=master)](https://travis-ci.org/GoogleCloudPlatform/DataflowJavaSDK)
 
 The Cloud Dataflow SDK is used to access the Google Cloud Dataflow service,
 which is currently in Alpha and restricted to whitelisted users.

From 9623f20d5e4ae9db781bb82957175eec901e6eb3 Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Thu, 22 Jan 2015 15:27:35 -0800
Subject: [PATCH 0077/1541] Update .travis.yml

Enable email notification of build status.
---
 .travis.yml | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 4922b290ff69e..8cdc4ca197fe3 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,12 @@
 language: java
 
+notifications:
+  email:
+    recipients:
+      - dataflow-sdk+travis@google.com
+    on_success: change
+    on_failure: always
+
 jdk:
   - oraclejdk8
   - oraclejdk7
@@ -9,5 +16,4 @@ install:
   - mvn install clean -U -DskipTests=true
 
 script:
-  - mvn -U verify
-
+  - mvn verify -U

From 4549ebcbcfef60eba3f8db1c3acea0ac80040b47 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Fri, 9 Jan 2015 14:05:06 -0800
Subject: [PATCH 0078/1541] * High-level API for defining basic custom sources
 (doesn't support any of the advanced features, like bases and multi-level
 splitting): see the ReadSource and Source classes. Note that Source.Reader
 implements the advance/getCurrent model rather than the Java collection
 hasNext/next model, for reasons outlined in its documentation. * DatastoreIO
 rewritten using this API. * Propagates PipelineOptions through the worker
 harness to SourceFormat (renamed from CustomSourceFormat) constructor, so
 that options such as credentials are available during splitting.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=83633581
---
 .../dataflow/examples/DatastoreWordCount.java |   6 +-
 sdk/pom.xml                                   |   6 +
 .../cloud/dataflow/sdk/io/DatastoreIO.java    | 781 ++++++++++--------
 .../dataflow/sdk/io/DatastoreIterator.java    | 141 ----
 .../cloud/dataflow/sdk/io/ReadSource.java     | 123 +++
 .../google/cloud/dataflow/sdk/io/Source.java  | 174 ++++
 .../runners/DataflowPipelineTranslator.java   |   7 +-
 .../BasicSerializableSourceFormat.java        | 277 +++++++
 .../dataflow/DatastoreIOTranslator.java       |   4 +-
 .../dataflow/ReadSourceTranslator.java        |  32 +
 .../sdk/runners/worker/DataflowWorker.java    |  18 +-
 ...tFactory.java => SourceFormatFactory.java} |  16 +-
 .../worker/SourceOperationExecutor.java       |  15 +-
 .../SourceOperationExecutorFactory.java       |   7 +-
 .../worker/SourceTranslationUtils.java        |  39 +-
 .../dataflow/sdk/util/CloudSourceUtils.java   |   6 +-
 .../dataflow/sdk/util/PropertyNames.java      |  14 +-
 .../dataflow/sdk/util/TestCredential.java     |   4 +-
 .../sdk/util/common/worker/ReadOperation.java |  17 +-
 .../sdk/util/common/worker/Reader.java        |   6 +-
 ...tomSourceFormat.java => SourceFormat.java} |   2 +-
 .../dataflow/sdk/io/DatastoreIOTest.java      | 101 ++-
 .../BasicSerializableSourceFormatTest.java    | 218 +++++
 .../runners/worker/BigQueryReaderTest.java    |   2 +-
 .../runners/worker/DatastoreReaderTest.java   | 143 ++++
 25 files changed, 1571 insertions(+), 588 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIterator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadSourceTranslator.java
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{CustomSourceFormatFactory.java => SourceFormatFactory.java} (71%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/{CustomSourceFormat.java => SourceFormat.java} (98%)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
index 1e00589281aad..ab31aae15ab76 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
@@ -148,7 +148,7 @@ public static void writeDataToDatastore(Options options) {
     Pipeline p = Pipeline.create(PipelineOptionsFactory.create());
     p.apply(TextIO.Read.named("ReadLines").from(options.getInput()))
      .apply(ParDo.of(new CreateEntityFn(options.getKind())))
-     .apply(DatastoreIO.Write.to(options.getDataset()));
+     .apply(DatastoreIO.write().to(options.getDataset()));
 
     p.run();
   }
@@ -163,8 +163,8 @@ public static void readDataFromDatastore(Options options) {
     Query query = q.build();
 
     Pipeline p = Pipeline.create(options);
-    p.apply(DatastoreIO.Read.named("ReadShakespeareFromDatastore")
-        .from(options.getDataset(), query))
+    p.apply(DatastoreIO.readFrom(options.getDataset(), query)
+        .named("ReadShakespeareFromDatastore"))
         .apply(ParDo.of(new GetContentFn()))
         .apply(new WordCount.CountWords())
         .apply(TextIO.Write.named("WriteLines").to(options.getOutput()));
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 1e9633695d10d..c761197a78b3d 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -270,6 +270,12 @@
       </exclusions>
     </dependency>
 
+    <dependency>
+      <groupId>com.google.http-client</groupId>
+      <artifactId>google-http-client</artifactId>
+      <version>1.19.0</version>
+    </dependency>
+
     <dependency>
       <groupId>com.google.oauth-client</groupId>
       <artifactId>google-oauth-client-java6</artifactId>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 1b66b868c97ed..dfd5041720dd2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -16,7 +16,16 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
+import static com.google.api.services.datastore.DatastoreV1.PropertyFilter.Operator.EQUAL;
+import static com.google.api.services.datastore.DatastoreV1.PropertyOrder.Direction.DESCENDING;
+import static com.google.api.services.datastore.DatastoreV1.QueryResultBatch.MoreResultsType.NOT_FINISHED;
+import static com.google.api.services.datastore.client.DatastoreHelper.getPropertyMap;
+import static com.google.api.services.datastore.client.DatastoreHelper.makeFilter;
+import static com.google.api.services.datastore.client.DatastoreHelper.makeOrder;
+import static com.google.api.services.datastore.client.DatastoreHelper.makeValue;
+
 import com.google.api.client.auth.oauth2.Credential;
+import com.google.api.services.datastore.DatastoreV1;
 import com.google.api.services.datastore.DatastoreV1.BeginTransactionRequest;
 import com.google.api.services.datastore.DatastoreV1.BeginTransactionResponse;
 import com.google.api.services.datastore.DatastoreV1.CommitRequest;
@@ -27,34 +36,32 @@
 import com.google.api.services.datastore.client.DatastoreFactory;
 import com.google.api.services.datastore.client.DatastoreHelper;
 import com.google.api.services.datastore.client.DatastoreOptions;
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.api.services.datastore.client.QuerySplitter;
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
 import com.google.cloud.dataflow.sdk.coders.EntityCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.GcpOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.Flatten;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.util.Credentials;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.RetryHttpRequestInitializer;
-import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionList;
 import com.google.cloud.dataflow.sdk.values.PDone;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Supplier;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.security.GeneralSecurityException;
 import java.util.ArrayList;
 import java.util.HashMap;
-import java.util.LinkedList;
 import java.util.List;
+import java.util.NoSuchElementException;
 
 /**
  * Transforms for reading and writing
@@ -84,10 +91,10 @@
  * cache is keyed by the requested scopes.
  *
  * <p> To read a {@link PCollection} from a query to Datastore, use
- * {@link DatastoreIO.Read}, specifying {@link DatastoreIO.Read#from} to specify
- * dataset to read, the query to read from, and optionally
- * {@link DatastoreIO.Read#named} and {@link DatastoreIO.Read#withHost} to specify
- * the name of the pipeline step and the host of Datastore, respectively.
+ * {@link DatastoreIO#read} and its methods {#link DatastoreIO.Read#withDataset}
+ * and {#link DatastoreIO.Read#withQuery} to specify dataset to read, the query
+ * to read from, and optionally {@link DatastoreIO.Source#withHost} to specify
+ * the host of Datastore.
  * For example:
  *
  * <pre> {@code
@@ -95,16 +102,27 @@
  * PipelineOptions options =
  *     PipelineOptionsFactory.fromArgs(args).create();
  * Pipeline p = Pipeline.create(options);
- * PCollection<Entity> entities =
- *     p.apply(DatastoreIO.Read
- *             .named("Read Datastore")
- *             .from(datasetId, query)
- *             .withHost(host));
+ * PCollection<Entity> entities = p.apply(
+ *     ReadSource.from(DatastoreIO.read()
+ *         .withDataset(datasetId)
+ *         .withQuery(query)
+ *         .withHost(host)));
+ * p.run();
+ * } </pre>
+ *
+ * or:
+ *
+ * <pre> {@code
+ * // Read a query from Datastore
+ * PipelineOptions options =
+ *     PipelineOptionsFactory.fromArgs(args).create();
+ * Pipeline p = Pipeline.create(options);
+ * PCollection<Entity> entities = p.apply(DatastoreIO.readFrom(datasetId, query));
  * p.run();
  * } </pre>
  *
  * <p> To write a {@link PCollection} to a datastore, use
- * {@link DatastoreIO.Write}, specifying {@link DatastoreIO.Write#to} to specify
+ * {@link DatastoreIO.Sink}, specifying {@link DatastoreIO.Sink#to} to specify
  * the datastore to write to, and optionally {@link TextIO.Write#named} to specify
  * the name of the pipeline step.  For example:
  *
@@ -119,427 +137,353 @@
  */
 
 public class DatastoreIO {
-
   private static final Logger LOG = LoggerFactory.getLogger(DatastoreIO.class);
   private static final String DEFAULT_HOST = "https://www.googleapis.com";
 
   /**
-   * A PTransform that reads from a Datastore query and returns a
-   * {@code PCollection<Entity>} containing each of the rows of the table.
+   * Returns an empty {@code DatastoreIO.Read} builder with the default host.
+   * You'll need to configure the dataset and query using {@link DatastoreIO.Source#withDataset}
+   * and {@link DatastoreIO.Source#withQuery}.
    */
-  public static class Read {
+  public static Source read() {
+    return new Source(DEFAULT_HOST, null, null);
+  }
 
-    /**
-     * Returns a DatastoreIO.Read PTransform with the given step name.
-     */
-    public static Bound named(String name) {
-      return new Bound(DEFAULT_HOST).named(name);
-    }
+  /**
+   * Returns a {@code PTransform} which reads Datastore entities from the query
+   * against the given dataset.
+   */
+  public static ReadSource.Bound<Entity> readFrom(String datasetId, Query query) {
+    return ReadSource.from(new Source(DEFAULT_HOST, datasetId, query));
+  }
 
-    /**
-     * Reads entities retrieved from the dataset and a given query.
-     */
-    public static Bound from(String datasetId, Query query) {
-      return new Bound(DEFAULT_HOST).from(datasetId, query);
+  /**
+   * Returns a {@code PTransform} which reads Datastore entities from the query
+   * against the given dataset and host.
+   */
+  public static ReadSource.Bound<Entity> readFrom(String host, String datasetId, Query query) {
+    return ReadSource.from(new Source(host, datasetId, query));
+  }
+
+  /**
+   * A source that reads the result rows of a Datastore query as {@code Entity} objects.
+   */
+  @SuppressWarnings("serial")
+  public static class Source extends com.google.cloud.dataflow.sdk.io.Source<Entity> {
+    String host;
+    String datasetId;
+    Query query;
+    /** For testing only. */
+    private QuerySplitter mockSplitter;
+    private Supplier<Long> mockEstimateSizeBytes;
+
+    private Source(String host, String datasetId, Query query) {
+      this.host = host;
+      this.datasetId = datasetId;
+      this.query = query;
     }
 
-    /**
-     * Returns a DatastoreIO.Read PTransform with specified host.
-     */
-    public static Bound withHost(String host) {
-      return new Bound(host);
+    public Source withDataset(String datasetId) {
+      return new Source(host, datasetId, query);
     }
 
-    /**
-     * A PTransform that reads from a Datastore query and returns a bounded
-     * {@code PCollection<Entity>}.
-     */
-    @SuppressWarnings("serial")
-    public static class Bound extends PTransform<PBegin, PCollection<Entity>> {
-      String host;
-      String datasetId;
-      Query query;
-
-      /**
-       * Returns a DatastoreIO.Bound object with given query.
-       * Sets the name, Datastore host, datasetId, query associated
-       * with this PTransform, and options for this Pipeline.
-       */
-      Bound(String name, String host, String datasetId, Query query) {
-        super(name);
-        this.host = host;
-        this.datasetId = datasetId;
-        this.query = query;
-      }
+    public Source withQuery(Query query) {
+      return new Source(host, datasetId, query);
+    }
 
-      /**
-       * Returns a DatastoreIO.Read PTransform with host set up.
-       */
-      Bound(String host) {
-        this.host = host;
-      }
+    public Source withHost(String host) {
+      return new Source(host, datasetId, query);
+    }
 
-      /**
-       * Returns a new DatastoreIO.Read PTransform with the name
-       * associated with this transformation.
-       */
-      public Bound named(String name) {
-        return new Bound(name, host, datasetId, query);
-      }
+    @Override
+    public Coder<Entity> getDefaultOutputCoder() {
+      return EntityCoder.of();
+    }
 
-      /**
-       * Returns a new DatastoreIO.Read PTransform with datasetId,
-       * and query associated with this transformation, and options
-       * associated with this Pipleine.
-       */
-      public Bound from(String datasetId, Query query) {
-        return new Bound(name, host, datasetId, query);
+    @Override
+    public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
+      // Datastore provides no way to get a good estimate of how large the result of a query
+      // will be. As a rough approximation, we attempt to fetch the statistics of the whole
+      // entity kind being queried, using the __Stat_Kind__ system table, assuming exactly 1 kind
+      // is specified in the query.
+      if (mockEstimateSizeBytes != null) {
+        return mockEstimateSizeBytes.get();
       }
 
-      /**
-       * Returns a new DatastoreIO.Read PTransform with the host
-       * specified.
-       */
-      public Bound withHost(String host) {
-        return new Bound(name, host, datasetId, query);
+      Datastore datastore = getDatastore(options);
+      if (query.getKindCount() != 1) {
+        throw new UnsupportedOperationException(
+            "Can only estimate size for queries specifying exactly 1 kind");
       }
-
-      @Override
-      public PCollection<Entity> apply(PBegin input) {
-        if (datasetId == null || query == null) {
-          throw new IllegalStateException(
-              "need to set datasetId, and query "
-              + "of a DatastoreIO.Read transform");
-        }
-
-        QueryOptions queryOptions = QueryOptions.create(host, datasetId, query);
-        PCollection<Entity> output;
-        try {
-          DataflowPipelineOptions options =
-              getPipeline().getOptions().as(DataflowPipelineOptions.class);
-          PCollection<QueryOptions> queries = splitQueryOptions(queryOptions, options, input);
-
-          output = queries.apply(ParDo.of(new ReadEntitiesFn()));
-          getCoderRegistry().registerCoder(Entity.class, EntityCoder.class);
-        } catch (DatastoreException e) {
-          LOG.warn("DatastoreException: error while doing Datastore query splitting.", e);
-          throw new RuntimeException("Error while splitting Datastore query.");
-        }
-
-        return output;
+      String ourKind = query.getKind(0).getName();
+      long latestTimestamp = queryLatestStatisticsTimestamp(datastore);
+      Query.Builder query = Query.newBuilder();
+      query.addKindBuilder().setName("__Stat_Kind__");
+      query.setFilter(makeFilter(
+          makeFilter("kind_name", EQUAL, makeValue(ourKind)).build(),
+          makeFilter("timestamp", EQUAL, makeValue(latestTimestamp)).build()));
+      DatastoreV1.RunQueryRequest request =
+          DatastoreV1.RunQueryRequest.newBuilder().setQuery(query).build();
+
+      long now = System.currentTimeMillis();
+      DatastoreV1.RunQueryResponse response = datastore.runQuery(request);
+      LOG.info("Query for per-kind statistics took " + (System.currentTimeMillis() - now) + "ms");
+
+      DatastoreV1.QueryResultBatch batch = response.getBatch();
+      if (batch.getEntityResultCount() == 0) {
+        throw new NoSuchElementException(
+            "Datastore statistics for kind " + ourKind + " unavailable");
       }
-    }
-  }
-
-  ///////////////////// Write Class /////////////////////////////////
-  /**
-   * A {@link PTransform} that writes a {@code PCollection<Entity>} containing
-   * entities to a Datastore kind.
-   *
-   * <p> Current version only supports Write operation running on
-   * {@link DirectPipelineRunner}.  If Write is used on {@link DataflowPipelineRunner},
-   * it throws {@link UnsupportedOperationException} and won't continue on the
-   * operation.
-   *
-   */
-  public static class Write {
-    /**
-     * Returns a DatastoreIO.Write PTransform with the name
-     * associated with this PTransform.
-     */
-    public static Bound named(String name) {
-      return new Bound(DEFAULT_HOST).named(name);
+      Entity entity = batch.getEntityResult(0).getEntity();
+      return getPropertyMap(entity).get("entity_bytes").getIntegerValue();
     }
 
     /**
-     * Returns a DatastoreIO.Write PTransform with given datasetId.
+     * Datastore system tables with statistics are periodically updated. This method fetches
+     * the latest timestamp of statistics update using the __Stat_Total__ table.
      */
-    public static Bound to(String datasetId) {
-      return new Bound(DEFAULT_HOST).to(datasetId);
+    private long queryLatestStatisticsTimestamp(Datastore datastore) throws DatastoreException {
+      Query.Builder query = Query.newBuilder();
+      query.addKindBuilder().setName("__Stat_Total__");
+      query.addOrder(makeOrder("timestamp", DESCENDING));
+      query.setLimit(1);
+      DatastoreV1.RunQueryRequest request =
+          DatastoreV1.RunQueryRequest.newBuilder().setQuery(query).build();
+
+      long now = System.currentTimeMillis();
+      DatastoreV1.RunQueryResponse response = datastore.runQuery(request);
+      LOG.info("Query for latest stats timestamp of dataset " + datasetId + " took "
+          + (System.currentTimeMillis() - now) + "ms");
+      DatastoreV1.QueryResultBatch batch = response.getBatch();
+      if (batch.getEntityResultCount() == 0) {
+        throw new NoSuchElementException(
+            "Datastore total statistics for dataset " + datasetId + " unavailable");
+      }
+      Entity entity = batch.getEntityResult(0).getEntity();
+      return getPropertyMap(entity).get("timestamp").getTimestampMicrosecondsValue();
     }
 
-    /**
-     * Returns a DatastoreIO.Write PTransform with specified host.
-     */
-    public static Bound withHost(String host) {
-      return new Bound(host);
+    @Override
+    public boolean producesSortedKeys(PipelineOptions options) {
+      // TODO: Perhaps this can be implemented by inspecting the query.
+      return false;
     }
 
-    /**
-     * A PTransform that writes a bounded {@code PCollection<Entities>}
-     * to a Datastore.
-     */
-    @SuppressWarnings("serial")
-    public static class Bound extends PTransform<PCollection<Entity>, PDone> {
-      String host;
-      String datasetId;
-
-      /**
-       * Returns a DatastoreIO.Write PTransform with given host.
-       */
-      Bound(String host) {
-        this.host = host;
+    @Override
+    public List<Source> splitIntoShards(long desiredShardSizeBytes, PipelineOptions options)
+        throws Exception {
+      DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
+      long numSplits;
+      try {
+        numSplits = getEstimatedSizeBytes(options) / desiredShardSizeBytes;
+      } catch (Exception e) {
+        LOG.warn("Estimated size unavailable, using number of workers", e);
+        // Fallback in case estimated size is unavailable.
+        numSplits = dataflowOptions.getNumWorkers();
       }
-
-      /**
-       * Returns a DatastoreIO.Write.Bound object.
-       * Sets the name, datastore agent, and kind associated
-       * with this transformation.
-       */
-      Bound(String name, String host, String datasetId) {
-        super(name);
-        this.host = host;
-        this.datasetId = datasetId;
+      List<Query> splitQueries;
+      if (mockSplitter == null) {
+        splitQueries = DatastoreHelper.getQuerySplitter().getSplits(
+            query, (int) numSplits, getUserDatastore(host, datasetId, options));
+      } else {
+        splitQueries = mockSplitter.getSplits(query, (int) numSplits, null);
       }
-
-      /**
-       * Returns a DatastoreIO.Write PTransform with the name
-       * associated with this PTransform.
-       */
-      public Bound named(String name) {
-        return new Bound(name, host, datasetId);
+      List<Source> res = new ArrayList<>();
+      for (Query splitQuery : splitQueries) {
+        res.add(new Source(host, datasetId, splitQuery));
       }
+      return res;
+    }
 
-      /**
-       * Returns a DatastoreIO.Write PTransform with given datasetId.
-       */
-      public Bound to(String datasetId) {
-        return new Bound(name, host, datasetId);
+    @Override
+    public Reader<Entity> createBasicReader(
+        PipelineOptions pipelineOptions, Coder<Entity> coder, ExecutionContext executionContext)
+        throws IOException {
+      try {
+        return new DatastoreReader(query, getDatastore(pipelineOptions));
+      } catch (GeneralSecurityException e) {
+        throw new IOException(e);
       }
+    }
 
-      /**
-       * Returns a new DatastoreIO.Write PTransform with specified host.
-       */
-      public Bound withHost(String host) {
-        return new Bound(name, host, datasetId);
+    @Override
+    public void validate() {
+      Preconditions.checkNotNull(host, "host");
+      Preconditions.checkNotNull(query, "query");
+      Preconditions.checkNotNull(datasetId, "datasetId");
+    }
+
+    private Datastore getDatastore(PipelineOptions pipelineOptions)
+        throws IOException, GeneralSecurityException {
+      Datastore datastore = getUserDatastore(host, datasetId, pipelineOptions);
+      if (datastore == null) {
+        datastore = getWorkerDatastore(host, datasetId, pipelineOptions);
       }
+      return datastore;
+    }
 
-      @Override
-      public PDone apply(PCollection<Entity> input) {
-        if (this.host == null || this.datasetId == null) {
-          throw new IllegalStateException(
-              "need to set Datastore host and dataasetId"
-              + "of a DatastoreIO.Write transform");
-        }
+    /** For testing only. */
+    Source withMockSplitter(QuerySplitter splitter) {
+      Source res = new Source(host, datasetId, query);
+      res.mockSplitter = splitter;
+      res.mockEstimateSizeBytes = mockEstimateSizeBytes;
+      return res;
+    }
 
-        return new PDone();
-      }
+    /** For testing only. */
+    public Source withMockEstimateSizeBytes(Supplier<Long> estimateSizeBytes) {
+      Source res = new Source(host, datasetId, query);
+      res.mockSplitter = mockSplitter;
+      res.mockEstimateSizeBytes = estimateSizeBytes;
+      return res;
+    }
+  }
 
-      @Override
-      protected String getKindString() { return "DatastoreIO.Write"; }
+  public static Datastore getWorkerDatastore(
+      String host, String datasetId, PipelineOptions options) {
+    DatastoreOptions.Builder builder =
+        new DatastoreOptions.Builder().host(host).dataset(datasetId).initializer(
+            new RetryHttpRequestInitializer(null));
 
-      @Override
-      protected Coder<Void> getDefaultOutputCoder() {
-        return VoidCoder.of();
-      }
+    try {
+      Credential credential =
+          Credentials.getWorkerCredential(options.as(DataflowPipelineOptions.class));
+      builder.credential(credential);
+    } catch (IOException e) {
+      LOG.warn("IOException: can't get credential for worker.", e);
+      throw new RuntimeException("Failed on getting credential for worker.");
+    }
+    return DatastoreFactory.get().create(builder.build());
+  }
 
-      static {
-        DirectPipelineRunner.registerDefaultTransformEvaluator(
-            Bound.class,
-            new DirectPipelineRunner.TransformEvaluator<Bound>() {
-              @Override
-              public void evaluate(
-                  Bound transform,
-                  DirectPipelineRunner.EvaluationContext context) {
-                evaluateWriteHelper(transform, context);
-              }
-            });
-      }
+  public static Datastore getUserDatastore(String host, String datasetId, PipelineOptions options)
+      throws IOException, GeneralSecurityException {
+    DatastoreOptions.Builder builder =
+        new DatastoreOptions.Builder().host(host).dataset(datasetId).initializer(
+            new RetryHttpRequestInitializer(null));
+
+    Credential credential = Credentials.getUserCredential(options.as(GcpOptions.class));
+    if (credential != null) {
+      builder.credential(credential);
     }
+    return DatastoreFactory.get().create(builder.build());
   }
 
-  ///////////////////////////////////////////////////////////////////
+  ///////////////////// Write Class /////////////////////////////////
 
   /**
-   * A DoFn that performs query request to Datastore and converts
-   * each QueryOptions into Entities.
+   * Returns a new {@link DatastoreIO.Sink} builder using the default host.
+   * You need to further configure it using {@link DatastoreIO.Sink#named},
+   * {@link DatastoreIO.Sink#to}, and optionally {@link DatastoreIO.Sink#withHost}.
    */
-  @SuppressWarnings("serial")
-  private static class ReadEntitiesFn extends DoFn<QueryOptions, Entity> {
-    @Override
-    public void processElement(ProcessContext c) {
-      Query query = c.element().getQuery();
-      Datastore datastore = c.element().getWorkerDatastore(
-          c.getPipelineOptions().as(GcpOptions.class));
-      DatastoreIterator entityIterator = new DatastoreIterator(query, datastore);
-
-      while (entityIterator.hasNext()) {
-        c.output(entityIterator.next().getEntity());
-      }
-    }
+  public static Sink write() {
+    return new Sink(DEFAULT_HOST);
   }
 
   /**
-   * A class that stores query and datastore setup environments
-   * (host and datasetId).
+   * Returns a new {@link DatastoreIO.Sink} builder using the default host and given dataset.
+   * You need to further configure it using {@link DatastoreIO.Sink#named},
+   * and optionally {@link DatastoreIO.Sink#withHost}.
    */
-  @DefaultCoder(AvroCoder.class)
-  private static class QueryOptions {
-    // Query to read in byte array.
-    public byte[] byteQuery;
-
-    // Datastore host to read from.
-    public String host;
-
-    // Datastore dataset ID to read from.
-    public String datasetId;
+  public static Sink writeTo(String datasetId) {
+    return write().to(datasetId);
+  }
 
-    @SuppressWarnings("unused")
-    QueryOptions() {}
+  /**
+   * A {@link PTransform} that writes a {@code PCollection<Entity>} containing
+   * entities to a Datastore kind.
+   *
+   * <p> Current version only supports Write operation running on
+   * {@link DirectPipelineRunner}.  If Write is used on {@link DataflowPipelineRunner},
+   * it throws {@link UnsupportedOperationException} and won't continue on the
+   * operation.
+   *
+   */
+  @SuppressWarnings("serial")
+  public static class Sink extends PTransform<PCollection<Entity>, PDone> {
+    String host;
+    String datasetId;
 
     /**
-     * Returns a QueryOption object without account and private key file
-     * (for supporting query on local Datastore).
-     *
-     * @param host the host of Datastore to connect
-     * @param datasetId the dataset ID of Datastore to query
-     * @param query the query to perform
+     * Returns a DatastoreIO.Write PTransform with given host.
      */
-    QueryOptions(String host, String datasetId, Query query) {
+    Sink(String host) {
       this.host = host;
-      this.datasetId = datasetId;
-      this.setQuery(query);
     }
 
     /**
-     * Creates and returns a QueryOption object for query on local Datastore.
-     *
-     * @param host the host of Datastore to connect
-     * @param datasetId the dataset ID of Datastore to query
-     * @param query the query to perform
+     * Returns a DatastoreIO.Write.Bound object.
+     * Sets the name, datastore agent, and kind associated
+     * with this transformation.
      */
-    public static QueryOptions create(String host, String datasetId, Query query) {
-      return new QueryOptions(host, datasetId, query);
+    Sink(String name, String host, String datasetId) {
+      super(name);
+      this.host = host;
+      this.datasetId = datasetId;
     }
 
     /**
-     * Sets up a query.
-     * Stores query in a byte array so that we can use AvroCoder to encode/decode
-     * QueryOptions.
-     *
-     * @param q the query to be addressed
+     * Returns a DatastoreIO.Write PTransform with the name
+     * associated with this PTransform.
      */
-    public void setQuery(Query q) {
-      this.byteQuery = q.toByteArray();
+    public Sink named(String name) {
+      return new Sink(name, host, datasetId);
     }
 
     /**
-     * Returns query.
-     *
-     * @return query in this option.
+     * Returns a DatastoreIO.Write PTransform with given datasetId.
      */
-    public Query getQuery() {
-      try {
-        return Query.parseFrom(this.byteQuery);
-      } catch (IOException e) {
-        LOG.warn("IOException: parsing query failed.", e);
-        throw new RuntimeException("Cannot parse query from byte array.");
-      }
+    public Sink to(String datasetId) {
+      return new Sink(name, host, datasetId);
     }
 
     /**
-     * Returns the dataset ID.
-     *
-     * @return a dataset ID string for Datastore.
+     * Returns a new DatastoreIO.Write PTransform with specified host.
      */
-    public String getDatasetId() {
-      return this.datasetId;
+    public Sink withHost(String host) {
+      return new Sink(name, host, datasetId);
     }
 
-    /**
-     * Returns a copy of QueryOptions from current options with given query.
-     *
-     * @param query a new query to be set
-     * @return A QueryOptions object for query
-     */
-    public QueryOptions newQuery(Query query) {
-      return create(host, datasetId, query);
-    }
+    @Override
+    public PDone apply(PCollection<Entity> input) {
+      if (this.host == null || this.datasetId == null) {
+        throw new IllegalStateException("need to set Datastore host and dataasetId"
+            + "of a DatastoreIO.Write transform");
+      }
 
-    /**
-     * Returns a Datastore object for connecting to Datastore on workers.
-     * This method will try to get worker credential from Credentials
-     * library and constructs a Datastore object which is set up and
-     * ready to communicate with Datastore.
-     *
-     * @return a Datastore object setup with host and dataset.
-     */
-    public Datastore getWorkerDatastore(GcpOptions options) {
-      DatastoreOptions.Builder builder = new DatastoreOptions.Builder()
-          .host(this.host)
-          .dataset(this.datasetId)
-          .initializer(new RetryHttpRequestInitializer(null));
+      return new PDone();
+    }
 
-      try {
-        Credential credential = Credentials.getWorkerCredential(options);
-        builder.credential(credential);
-      } catch (IOException e) {
-        LOG.warn("IOException: can't get credential for worker.", e);
-        throw new RuntimeException("Failed on getting credential for worker.");
-      }
-      return DatastoreFactory.get().create(builder.build());
+    @Override
+    protected String getKindString() {
+      return "DatastoreIO.Write";
     }
 
-    /**
-     * Returns a Datastore object for connecting to Datastore for users.
-     * This method will use the passed in credentials and construct a Datastore
-     * object which is set up and ready to communicate with Datastore.
-     *
-     * @return a Datastore object setup with host and dataset.
-     */
-    public Datastore getUserDatastore(GcpOptions options) {
-      DatastoreOptions.Builder builder = new DatastoreOptions.Builder()
-          .host(this.host)
-          .dataset(this.datasetId)
-          .initializer(new RetryHttpRequestInitializer(null));
-
-      Credential credential = options.getGcpCredential();
-      if (credential != null) {
-        builder.credential(credential);
-      }
-      return DatastoreFactory.get().create(builder.build());
+    @Override
+    protected Coder<Void> getDefaultOutputCoder() {
+      return VoidCoder.of();
     }
-  }
 
-  /**
-   * Returns a list of QueryOptions by splitting a QueryOptions into sub-queries.
-   * This method leverages the QuerySplitter in Datastore to split the
-   * query into sub-queries for further parallel query in Dataflow service.
-   *
-   * @return a PCollection of QueryOptions for split queries
-   */
-  private static PCollection<QueryOptions> splitQueryOptions(
-      QueryOptions queryOptions, DataflowPipelineOptions options,
-      PBegin input)
-      throws DatastoreException {
-    Query query = queryOptions.getQuery();
-    Datastore datastore = queryOptions.getUserDatastore(options);
-
-    // Get splits from the QuerySplit interface.
-    List<Query> splitQueries = DatastoreHelper.getQuerySplitter()
-        .getSplits(query, options.getNumWorkers(), datastore);
-
-    List<PCollection<QueryOptions>> queryList = new LinkedList<>();
-    for (Query q : splitQueries) {
-      PCollection<QueryOptions> newQuery = input
-          .apply(Create.of(queryOptions.newQuery(q)));
-      queryList.add(newQuery);
-    }
-
-    // This is a workaround to allow for parallelism of a small collection.
-    return PCollectionList.of(queryList)
-        .apply(Flatten.<QueryOptions>create());
+    static {
+      DirectPipelineRunner.registerDefaultTransformEvaluator(
+          Sink.class, new DirectPipelineRunner.TransformEvaluator<Sink>() {
+            @Override
+            public void evaluate(
+                Sink transform, DirectPipelineRunner.EvaluationContext context) {
+              evaluateWriteHelper(transform, context);
+            }
+          });
+    }
   }
 
-  /////////////////////////////////////////////////////////////////////
+  ///////////////////////////////////////////////////////////////////
 
   /**
    * Direct mode write evaluator.
    * This writes the result to Datastore.
    */
   private static void evaluateWriteHelper(
-      Write.Bound transform,
-      DirectPipelineRunner.EvaluationContext context) {
+      Sink transform, DirectPipelineRunner.EvaluationContext context) {
     LOG.info("Writing to Datastore");
     GcpOptions options = context.getPipelineOptions();
     Credential credential = options.getGcpCredential();
@@ -556,8 +500,8 @@ private static void evaluateWriteHelper(
     // Create a map to put entities with same ancestor for writing in a batch.
     HashMap<String, List<Entity>> map = new HashMap<>();
     for (Entity e : entityList) {
-      String keyOfAncestor = e.getKey().getPathElement(0).getKind()
-          + e.getKey().getPathElement(0).getName();
+      String keyOfAncestor =
+          e.getKey().getPathElement(0).getKind() + e.getKey().getPathElement(0).getName();
       List<Entity> value = map.get(keyOfAncestor);
       if (value == null) {
         value = new ArrayList<>();
@@ -603,4 +547,127 @@ private static void writeBatch(List<Entity> listOfEntities, Datastore datastore)
       throw new RuntimeException("Datastore exception", e);
     }
   }
+
+  /**
+   * An iterator over the records from a query of the datastore.
+   *
+   * <p> Usage:
+   * <pre>{@code
+   *   DatastoreIterator iterator = new DatastoreIterator(query, datastore);
+   *   while (iterator.advance()) {
+   *     Entity e = iterator.getCurrent();
+   *     ...
+   *   }
+   * }</pre>
+   */
+  public static class DatastoreReader
+      implements Source.Reader<Entity> {
+    /**
+     * Query to select records.
+     */
+    private Query.Builder query;
+
+    /**
+     * Datastore to read from.
+     */
+    private Datastore datastore;
+
+    /**
+     * True if more results may be available.
+     */
+    private boolean moreResults;
+
+    /**
+     * Iterator over records.
+     */
+    private java.util.Iterator<DatastoreV1.EntityResult> entities;
+
+    /**
+     * Current batch of query results.
+     */
+    private DatastoreV1.QueryResultBatch currentBatch;
+
+    /**
+     * Maximum number of results to request per query.
+     *
+     * <p> Must be set, or it may result in an I/O error when querying
+     * Cloud Datastore.
+     */
+    private static final int QUERY_LIMIT = 5000;
+
+    private Entity currentEntity;
+
+    /**
+     * Returns a DatastoreIterator with query and Datastore object set.
+     *
+     * @param query the query to select records.
+     * @param datastore a datastore connection to use.
+     */
+    public DatastoreReader(Query query, Datastore datastore) {
+      this.query = query.toBuilder().clone();
+      this.datastore = datastore;
+      this.query.setLimit(QUERY_LIMIT);
+    }
+
+    @Override
+    public Entity getCurrent() {
+      return currentEntity;
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      if (entities == null || (!entities.hasNext() && moreResults)) {
+        try {
+          entities = getIteratorAndMoveCursor();
+        } catch (DatastoreException e) {
+          throw new IOException(e);
+        }
+      }
+
+      if (entities == null || !entities.hasNext()) {
+        currentEntity = null;
+        return false;
+      }
+
+      currentEntity = entities.next().getEntity();
+      return true;
+    }
+
+    @Override
+    public void close() throws IOException {
+      // Nothing
+    }
+
+    /**
+     * Returns an iterator over the next batch of records for the query
+     * and updates the cursor to get the next batch as needed.
+     * Query has specified limit and offset from InputSplit.
+     */
+    private java.util.Iterator getIteratorAndMoveCursor()
+        throws DatastoreException {
+      if (currentBatch != null && currentBatch.hasEndCursor()) {
+        query.setStartCursor(currentBatch.getEndCursor());
+      }
+
+      DatastoreV1.RunQueryRequest request =
+          DatastoreV1.RunQueryRequest.newBuilder().setQuery(query).build();
+      DatastoreV1.RunQueryResponse response = datastore.runQuery(request);
+
+      currentBatch = response.getBatch();
+
+      // MORE_RESULTS_AFTER_LIMIT is not implemented yet:
+      // https://groups.google.com/forum/#!topic/gcd-discuss/iNs6M1jA2Vw, so
+      // use result count to determine if more results might exist.
+      int numFetch = currentBatch.getEntityResultCount();
+      moreResults = (numFetch == QUERY_LIMIT) || (currentBatch.getMoreResults() == NOT_FINISHED);
+
+      // May receive a batch of 0 results if the number of records is a multiple
+      // of the request limit.
+      if (numFetch == 0) {
+        return null;
+      }
+
+      return currentBatch.getEntityResultList().iterator();
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIterator.java
deleted file mode 100644
index 1b6d92e73c76a..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIterator.java
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (C) 2014 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import com.google.api.services.datastore.DatastoreV1.EntityResult;
-import com.google.api.services.datastore.DatastoreV1.Query;
-import com.google.api.services.datastore.DatastoreV1.QueryResultBatch;
-import com.google.api.services.datastore.DatastoreV1.RunQueryRequest;
-import com.google.api.services.datastore.DatastoreV1.RunQueryResponse;
-import com.google.api.services.datastore.client.Datastore;
-import com.google.api.services.datastore.client.DatastoreException;
-import com.google.common.collect.AbstractIterator;
-
-import java.util.Iterator;
-
-/**
- * An iterator over the records from a query of the datastore.
- *
- * <p> Usage:
- * <pre>{@code
- *   // Need to pass query and datastore object.
- *   DatastoreIterator iterator = new DatastoreIterator(query, datastore);
- *   while (iterator.hasNext()) {
- *     Entity e = iterator.next().getEntity();
- *     ...
- *   }
- * }</pre>
- */
-class DatastoreIterator extends AbstractIterator<EntityResult> {
-  /**
-   * Query to select records.
-   */
-  private Query.Builder query;
-
-  /**
-   * Datastore to read from.
-   */
-  private Datastore datastore;
-
-  /**
-   * True if more results may be available.
-   */
-  private boolean moreResults;
-
-  /**
-   * Iterator over records.
-   */
-  private Iterator<EntityResult> entities;
-
-  /**
-   * Current batch of query results.
-   */
-  private QueryResultBatch currentBatch;
-
-  /**
-   * Maximum number of results to request per query.
-   *
-   * <p> Must be set, or it may result in an I/O error when querying
-   * Cloud Datastore.
-   */
-  private static final int QUERY_LIMIT = 5000;
-
-  /**
-   * Returns a DatastoreIterator with query and Datastore object set.
-   *
-   * @param query the query to select records.
-   * @param datastore a datastore connection to use.
-   */
-  public DatastoreIterator(Query query, Datastore datastore) {
-    this.query = query.toBuilder().clone();
-    this.datastore = datastore;
-    this.query.setLimit(QUERY_LIMIT);
-  }
-
-  /**
-   * Returns an iterator over the next batch of records for the query
-   * and updates the cursor to get the next batch as needed.
-   * Query has specified limit and offset from InputSplit.
-   */
-  private Iterator<EntityResult> getIteratorAndMoveCursor()
-      throws DatastoreException{
-    if (this.currentBatch != null && this.currentBatch.hasEndCursor()) {
-      this.query.setStartCursor(this.currentBatch.getEndCursor());
-    }
-
-    RunQueryRequest request = RunQueryRequest.newBuilder()
-        .setQuery(this.query)
-        .build();
-    RunQueryResponse response = this.datastore.runQuery(request);
-
-    this.currentBatch = response.getBatch();
-
-    // MORE_RESULTS_AFTER_LIMIT is not implemented yet:
-    // https://groups.google.com/forum/#!topic/gcd-discuss/iNs6M1jA2Vw, so
-    // use result count to determine if more results might exist.
-    int numFetch = this.currentBatch.getEntityResultCount();
-    moreResults = numFetch == QUERY_LIMIT;
-
-    // May receive a batch of 0 results if the number of records is a multiple
-    // of the request limit.
-    if (numFetch == 0) {
-      return null;
-    }
-
-    return this.currentBatch.getEntityResultList().iterator();
-  }
-
-  @Override
-  public EntityResult computeNext() {
-    try {
-      if (entities == null || (!entities.hasNext() && this.moreResults)) {
-        entities = getIteratorAndMoveCursor();
-      }
-
-      if (entities == null || !entities.hasNext()) {
-        return endOfData();
-      }
-
-      return entities.next();
-
-    } catch (DatastoreException e) {
-      throw new RuntimeException(
-          "Datastore error while iterating over entities", e);
-    }
-  }
-}
-
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java
new file mode 100644
index 0000000000000..fc44e34293382
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.common.base.Preconditions;
+
+import javax.annotation.Nullable;
+
+/**
+ * The {@code PTransform} for reading from a {@code Source}.
+ * <p>
+ * Usage example:
+ * <pre>
+ * Pipeline p = Pipeline.create();
+ * p.apply(ReadSource.from(new MySource().withFoo("foo").withBar("bar"))
+ *                   .named("foobar"));
+ * </pre>
+ */
+public class ReadSource {
+  /**
+   * Returns a new {@code ReadSource.Bound} {@code PTransform} with the given name.
+   */
+  @SuppressWarnings("unchecked")
+  public static Bound<?> named(String name) {
+    return new Bound(name, null);
+  }
+
+  /**
+   * Returns a new unnamed {@code ReadSource.Bound} {@code PTransform} reading from the given
+   * {@code Source}.
+   */
+  public static <T> Bound<T> from(Source<T> source) {
+    return new Bound<>("", source);
+  }
+
+  /**
+   * Implementation of the {@code ReadSource} {@code PTransform} builder.
+   */
+  public static class Bound<T>
+      extends PTransform<PInput, PCollection<T>> {
+    @Nullable
+    private Source<T> source;
+
+    private Bound(@Nullable String name, @Nullable Source<T> source) {
+      super(name);
+      this.source = source;
+    }
+
+    /**
+     * Returns a new {@code ReadSource} {@code PTransform} that's like this one but
+     * reads from the given {@code Source}.
+     *
+     * <p> Does not modify this object.
+     */
+    public <T> Bound<T> from(Source<T> source) {
+      return new Bound<T>(getName(), source);
+    }
+
+    /**
+     * Returns a new {@code ReadSource} {@code PTransform} that's like this one but
+     * has the given name.
+     *
+     * <p> Does not modify this object.
+     */
+    public Bound<T> named(String name) {
+      return new Bound<T>(name, source);
+    }
+
+    @Override
+    protected Coder<T> getDefaultOutputCoder() {
+      Preconditions.checkNotNull(source, "source must be set");
+      return source.getDefaultOutputCoder();
+    }
+
+    @Override
+    public final PCollection<T> apply(PInput input) {
+      Preconditions.checkNotNull(source, "source must be set");
+      source.validate();
+      return PCollection.<T>createPrimitiveOutputInternal(new GlobalWindow())
+          .setCoder(getDefaultOutputCoder());
+    }
+
+    /**
+     * Returns the {@code Source} used to create this {@code ReadSource} {@code PTransform}.
+     */
+    @Nullable
+    public Source<T> getSource() {
+      return source;
+    }
+
+    static {
+      DirectPipelineRunner.registerDefaultTransformEvaluator(
+          Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
+            @Override
+            public void evaluate(Bound transform, DirectPipelineRunner.EvaluationContext context) {
+              BasicSerializableSourceFormat.evaluateReadHelper(transform, context);
+            }
+          });
+    }
+  }
+
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
new file mode 100644
index 0000000000000..21f36c3b9c0f7
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
@@ -0,0 +1,174 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+import javax.annotation.Nullable;
+
+/**
+ * Base class for defining input formats, with custom logic for splitting the input
+ * into shards (parts of the input, each of which may be processed on a different worker)
+ * and creating a {@code Source} for reading the input.
+ *
+ * <p> To use this class for supporting your custom input type, derive your class
+ * class from it, and override the abstract methods. Also override either
+ * {@link #createWindowedReader} if your source supports timestamps and windows,
+ * or {@link #createBasicReader} otherwise. For an example, see {@link DatastoreIO}.
+ *
+ * <p> A {@code Source} passed to a {@code Read} transform must be
+ * {@code Serializable}.  This allows the {@code Source} instance
+ * created in this "main program" to be sent (in serialized form) to
+ * remote worker machines and reconstituted for each batch of elements
+ * of the input {@code PCollection} being processed or for each source splitting
+ * operation. A {@code Source} can have instance variable state, and
+ * non-transient instance variable state will be serialized in the main program
+ * and then deserialized on remote worker machines.
+ *
+ * <p> This API is experimental and subject to change.
+ *
+ * @param <T> Type of elements read by the source.
+ */
+public abstract class Source<T> implements Serializable {
+  /**
+   * Splits the source into shards.
+   *
+   * <p> {@code PipelineOptions} can be used to get information such as
+   * credentials for accessing an external storage.
+   */
+  public abstract List<? extends Source<T>> splitIntoShards(
+      long desiredShardSizeBytes, PipelineOptions options) throws Exception;
+
+  /**
+   * An estimate of the total size (in bytes) of the data that would be read from this source.
+   * This estimate is in terms of external storage size, before any decompression or other
+   * processing done by the reader.
+   */
+  public abstract long getEstimatedSizeBytes(PipelineOptions options) throws Exception;
+
+  /**
+   * Whether this source is known to produce key/value pairs with the (encoded) keys in
+   * lexicographically sorted order.
+   */
+  public abstract boolean producesSortedKeys(PipelineOptions options) throws Exception;
+
+  /**
+   * Creates a windowed reader for this source. The default implementation wraps
+   * {@link #createBasicReader}. Override this function if your reader supports timestamps
+   * and windows; otherwise, override {@link #createBasicReader} instead.
+   */
+  public Reader<WindowedValue<T>> createWindowedReader(PipelineOptions options,
+      Coder<WindowedValue<T>> coder, @Nullable ExecutionContext executionContext)
+      throws IOException {
+    return new WindowedReaderWrapper<T>(createBasicReader(
+        options, ((WindowedValue.WindowedValueCoder<T>) coder).getValueCoder(), executionContext));
+  }
+
+  /**
+   * Creates a basic (non-windowed) reader for this source. If you override this method, each value
+   * returned by this reader will be wrapped into the global window.
+   */
+  protected Reader<T> createBasicReader(PipelineOptions options, Coder<T> coder,
+      @Nullable ExecutionContext executionContext) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  /**
+   * Checks that this source is valid, before it can be used into a pipeline.
+   * It is recommended to use {@link com.google.common.base.Preconditions} for implementing
+   * this method.
+   */
+  public abstract void validate();
+
+  /**
+   * Returns the default {@code Coder} to use for the data read from this source.
+   */
+  public abstract Coder<T> getDefaultOutputCoder();
+
+  /**
+   * The interface which readers of custom input sources must implement.
+   * <p>
+   * This interface is deliberately distinct from {@link java.util.Iterator} because
+   * the current model tends to be easier to program and more efficient in practice
+   * for iterating over sources such as files, databases etc. (rather than pure collections).
+   * <p>
+   * To read a {@code SourceIterator}:
+   * <pre>
+   * while (iterator.advance()) {
+   *   T item = iterator.getCurrent();
+   *   ...
+   * }
+   * </pre>
+   * <p>
+   * Note: this interface is work-in-progress and may change.
+   */
+  public interface Reader<T> extends AutoCloseable {
+    /**
+     * Advances the iterator to the next valid record.
+     * Invalidates the result of the previous {@link #getCurrent} call.
+     * @return {@code true} if a record was read, {@code false} if we're at the end of input.
+     */
+    public boolean advance() throws IOException;
+
+    /**
+     * Returns the value of the data item which was read by the last {@link #advance} call.
+     * @throws java.util.NoSuchElementException if the iterator is at the beginning of the input
+     *   and {@link #advance} wasn't called, or if the last {@link #advance} returned {@code false}.
+     */
+    public T getCurrent() throws NoSuchElementException;
+
+    /**
+     * Closes the iterator. The iterator cannot be used after this method was called.
+     */
+    @Override
+    public void close() throws IOException;
+  }
+
+  /**
+   * An adapter from {@code SourceIterator<T>} to {@code SourceIterator<WindowedValue<T>>}.
+   */
+  private static class WindowedReaderWrapper<T> implements Reader<WindowedValue<T>> {
+    private final Reader<T> reader;
+
+    public WindowedReaderWrapper(Reader<T> reader) {
+      this.reader = reader;
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      return reader.advance();
+    }
+
+    @Override
+    public WindowedValue<T> getCurrent() throws NoSuchElementException {
+      return WindowedValue.valueInGlobalWindow(reader.getCurrent());
+    }
+
+    @Override
+    public void close() throws IOException {
+      reader.close();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index f3ddbd1cb444a..f948f5c689864 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -47,6 +47,7 @@
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.DatastoreIO;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.io.ReadSource;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.CloudDebuggerOptions.DebuggerConfig;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
@@ -55,6 +56,7 @@
 import com.google.cloud.dataflow.sdk.runners.dataflow.BigQueryIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.DatastoreIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
+import com.google.cloud.dataflow.sdk.runners.dataflow.ReadSourceTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.TextIOTranslator;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.Create;
@@ -923,7 +925,7 @@ private <I, O> void translateSingleHelper(
         BigQueryIO.Write.Bound.class, new BigQueryIOTranslator.WriteTranslator());
 
     registerTransformTranslator(
-        DatastoreIO.Write.Bound.class, new DatastoreIOTranslator.WriteTranslator());
+        DatastoreIO.Sink.class, new DatastoreIOTranslator.WriteTranslator());
 
     registerTransformTranslator(
         PubsubIO.Read.Bound.class, new PubsubIOTranslator.ReadTranslator());
@@ -934,6 +936,9 @@ private <I, O> void translateSingleHelper(
         TextIO.Read.Bound.class, new TextIOTranslator.ReadTranslator());
     registerTransformTranslator(
         TextIO.Write.Bound.class, new TextIOTranslator.WriteTranslator());
+
+    registerTransformTranslator(
+        ReadSource.Bound.class, new ReadSourceTranslator());
   }
 
   private static void translateInputs(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
new file mode 100644
index 0000000000000..cb3f4c237e7d3
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -0,0 +1,277 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.dataflow;
+
+import static com.google.api.client.util.Base64.encodeBase64String;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceOperationResponseToSourceOperationResponse;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceToDictionary;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceOperationRequestToCloudSourceOperationRequest;
+import static com.google.cloud.dataflow.sdk.util.SerializableUtils.deserializeFromByteArray;
+import static com.google.cloud.dataflow.sdk.util.SerializableUtils.serializeToByteArray;
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+
+import com.google.api.client.util.Base64;
+import com.google.api.services.dataflow.model.SourceGetMetadataRequest;
+import com.google.api.services.dataflow.model.SourceGetMetadataResponse;
+import com.google.api.services.dataflow.model.SourceMetadata;
+import com.google.api.services.dataflow.model.SourceOperationRequest;
+import com.google.api.services.dataflow.model.SourceOperationResponse;
+import com.google.api.services.dataflow.model.SourceSplitOptions;
+import com.google.api.services.dataflow.model.SourceSplitRequest;
+import com.google.api.services.dataflow.model.SourceSplitResponse;
+import com.google.api.services.dataflow.model.SourceSplitShard;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.io.ReadSource;
+import com.google.cloud.dataflow.sdk.io.Source;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+
+/**
+ * An helper class for supporting sources defined as {@code Source}.
+ *
+ * Provides a bridge between the high-level {@code Source} API and the raw
+ * API-level {@code SourceFormat} API, by encoding the serialized
+ * {@code Source} in a parameter of the API {@code Source} message.
+ * <p>
+ */
+public class BasicSerializableSourceFormat implements SourceFormat {
+  private static final String SERIALIZED_SOURCE = "serialized_source";
+  private static final long DEFAULT_DESIRED_SHARD_SIZE_BYTES = 64 * (1 << 20);
+
+  private final PipelineOptions options;
+
+  public BasicSerializableSourceFormat(PipelineOptions options) {
+    this.options = options;
+  }
+
+  /**
+   * Executes a protocol-level split {@code SourceOperationRequest} by deserializing its source
+   * to a {@code Source}, splitting it, and serializing results back.
+   */
+  @Override
+  public OperationResponse performSourceOperation(OperationRequest request) throws Exception {
+    SourceOperationRequest cloudRequest =
+        sourceOperationRequestToCloudSourceOperationRequest(request);
+    SourceOperationResponse cloudResponse = new SourceOperationResponse();
+    if (cloudRequest.getGetMetadata() != null) {
+      cloudResponse.setGetMetadata(performGetMetadata(cloudRequest.getGetMetadata()));
+    } else if (cloudRequest.getSplit() != null) {
+      cloudResponse.setSplit(performSplit(cloudRequest.getSplit()));
+    } else {
+      throw new UnsupportedOperationException("Unknown source operation request");
+    }
+    return cloudSourceOperationResponseToSourceOperationResponse(cloudResponse);
+  }
+
+  /**
+   * Factory method allowing this class to satisfy the implicit contract of {@code SourceFactory}.
+   */
+  @SuppressWarnings("unchecked")
+  public static <T> com.google.cloud.dataflow.sdk.util.common.worker.Reader create(
+      final PipelineOptions options, CloudObject spec,
+      final Coder<WindowedValue<T>> coder, final ExecutionContext executionContext)
+      throws Exception {
+    final Source<T> source = (Source<T>) deserializeFromCloudSource(spec);
+    return new com.google.cloud.dataflow.sdk.util.common.worker.Reader() {
+      @Override
+      public ReaderIterator iterator() throws IOException {
+        return new BasicSerializableSourceFormat.ReaderIterator<>(
+            source.createWindowedReader(options, coder, executionContext));
+      }
+    };
+  }
+
+  private SourceSplitResponse performSplit(SourceSplitRequest request) throws Exception {
+    Source<?> source = deserializeFromCloudSource(request.getSource().getSpec());
+
+    // Produce simple independent, unsplittable shards with no metadata attached.
+    SourceSplitResponse response = new SourceSplitResponse();
+    response.setShards(new ArrayList<SourceSplitShard>());
+    SourceSplitOptions splitOptions = request.getOptions();
+    Long desiredShardSizeBytes =
+        (splitOptions == null) ? null : splitOptions.getDesiredShardSizeBytes();
+    if (desiredShardSizeBytes == null) {
+      desiredShardSizeBytes = DEFAULT_DESIRED_SHARD_SIZE_BYTES;
+    }
+    for (Source split : source.splitIntoShards(desiredShardSizeBytes, options)) {
+      SourceSplitShard shard = new SourceSplitShard();
+
+      com.google.api.services.dataflow.model.Source cloudSource =
+          serializeToCloudSource(split, options);
+      cloudSource.setDoesNotNeedSplitting(true);
+
+      shard.setDerivationMode("SOURCE_DERIVATION_MODE_INDEPENDENT");
+      shard.setSource(cloudSource);
+      response.getShards().add(shard);
+    }
+    response.setOutcome("SOURCE_SPLIT_OUTCOME_SPLITTING_HAPPENED");
+    return response;
+  }
+
+  private SourceGetMetadataResponse performGetMetadata(SourceGetMetadataRequest request)
+      throws Exception {
+    Source<?> source = deserializeFromCloudSource(request.getSource().getSpec());
+    SourceMetadata metadata = new SourceMetadata();
+    metadata.setProducesSortedKeys(source.producesSortedKeys(options));
+    metadata.setEstimatedSizeBytes(source.getEstimatedSizeBytes(options));
+    SourceGetMetadataResponse response = new SourceGetMetadataResponse();
+    response.setMetadata(metadata);
+    return response;
+  }
+
+  private static Source<?> deserializeFromCloudSource(Map<String, Object> spec)
+      throws Exception {
+    return (Source<?>) deserializeFromByteArray(
+        Base64.decodeBase64(getString(spec, SERIALIZED_SOURCE)), "Source");
+  }
+
+  private static com.google.api.services.dataflow.model.Source serializeToCloudSource(
+      Source source, PipelineOptions options) throws Exception {
+    com.google.api.services.dataflow.model.Source cloudSource =
+        new com.google.api.services.dataflow.model.Source();
+    // We ourselves act as the SourceFormat.
+    cloudSource.setSpec(CloudObject.forClass(BasicSerializableSourceFormat.class));
+    addString(
+        cloudSource.getSpec(), SERIALIZED_SOURCE, encodeBase64String(serializeToByteArray(source)));
+
+    SourceMetadata metadata = new SourceMetadata();
+    metadata.setProducesSortedKeys(source.producesSortedKeys(options));
+    metadata.setEstimatedSizeBytes(source.getEstimatedSizeBytes(options));
+    cloudSource.setMetadata(metadata);
+    return cloudSource;
+  }
+
+  public static <T> void evaluateReadHelper(
+      ReadSource.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
+    try {
+      List<WindowedValue<T>> elems = new ArrayList<>();
+      Source<T> source = transform.getSource();
+      try (Source.Reader<WindowedValue<T>> reader =
+          source.createWindowedReader(context.getPipelineOptions(),
+              WindowedValue.getValueOnlyCoder(source.getDefaultOutputCoder()), null)) {
+        while (reader.advance()) {
+          elems.add(reader.getCurrent());
+        }
+      }
+      List<DirectPipelineRunner.ValueWithMetadata<T>> output = new ArrayList<>();
+      for (WindowedValue<T> elem : elems) {
+        output.add(DirectPipelineRunner.ValueWithMetadata.of(elem));
+      }
+      context.setPCollectionValuesWithMetadata(transform.getOutput(), output);
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  public static <T> void translateReadHelper(
+      ReadSource.Bound<T> transform, DataflowPipelineTranslator.TranslationContext context) {
+    try {
+      context.addStep(transform, "ParallelRead");
+      context.addInput(PropertyNames.FORMAT, PropertyNames.CUSTOM_SOURCE_FORMAT);
+      context.addInput(
+          PropertyNames.SOURCE_STEP_INPUT,
+          cloudSourceToDictionary(
+              serializeToCloudSource(transform.getSource(), context.getPipelineOptions())));
+      context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  /**
+   * Adapter from the {@code Source.Reader} interface to
+   * {@code Reader.ReaderIterator}.
+   *
+   * TODO: Consider changing the API of Reader.ReaderIterator so this adapter wouldn't be needed.
+   */
+  private static class ReaderIterator<T>
+      implements com.google.cloud.dataflow.sdk.util.common.worker.Reader.ReaderIterator {
+    private Source.Reader<T> reader;
+    private boolean hasNext;
+    private T next;
+    private boolean advanced;
+
+    private ReaderIterator(Source.Reader<T> reader) {
+      this.reader = reader;
+    }
+
+    @Override
+    public boolean hasNext() throws IOException {
+      if (!advanced) {
+        advanceInternal();
+      }
+      return hasNext;
+    }
+
+    @Override
+    public T next() throws IOException {
+      if (!hasNext()) {
+        throw new NoSuchElementException();
+      }
+      T res = this.next;
+      advanceInternal();
+      return res;
+    }
+
+    private void advanceInternal() throws IOException {
+      try {
+        hasNext = reader.advance();
+        if (hasNext) {
+          next = reader.getCurrent();
+        }
+        advanced = true;
+      } catch (Exception e) {
+        throw new IOException(e);
+      }
+    }
+
+    @Override
+    public com.google.cloud.dataflow.sdk.util.common.worker.Reader.ReaderIterator copy()
+        throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void close() throws IOException {
+      reader.close();
+    }
+
+    @Override
+    public com.google.cloud.dataflow.sdk.util.common.worker.Reader.Progress getProgress() {
+      return null;
+    }
+
+    @Override
+    public com.google.cloud.dataflow.sdk.util.common.worker.Reader.Position updateStopPosition(
+        com.google.cloud.dataflow.sdk.util.common.worker.Reader.Progress proposedStopPosition) {
+      return null;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DatastoreIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DatastoreIOTranslator.java
index 4292199174a14..e809d692387c0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DatastoreIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DatastoreIOTranslator.java
@@ -28,10 +28,10 @@ public class DatastoreIOTranslator {
   /**
    * Implements DatastoreIO Write translation for the Dataflow backend.
    */
-  public static class WriteTranslator implements TransformTranslator<DatastoreIO.Write.Bound> {
+  public static class WriteTranslator implements TransformTranslator<DatastoreIO.Sink> {
     @Override
     public void translate(
-        DatastoreIO.Write.Bound transform,
+        DatastoreIO.Sink transform,
         TranslationContext context) {
       // TODO: Not implemented yet.
       // translateWriteHelper(transform, context);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadSourceTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadSourceTranslator.java
new file mode 100644
index 0000000000000..7897c4cd17fac
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadSourceTranslator.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.dataflow;
+
+import com.google.cloud.dataflow.sdk.io.ReadSource;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
+
+/**
+ * Translator for the {@code ReadSource} {@code PTransform} for the Dataflow back-end.
+ */
+public class ReadSourceTranslator
+    implements DataflowPipelineTranslator.TransformTranslator<ReadSource.Bound> {
+  @Override
+  public void translate(
+      ReadSource.Bound transform, DataflowPipelineTranslator.TranslationContext context) {
+    BasicSerializableSourceFormat.translateReadHelper(transform, context);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 499653a11d0a7..555e15a0bc403 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -35,8 +35,8 @@
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.Metric;
-import com.google.cloud.dataflow.sdk.util.common.worker.CustomSourceFormat;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
 import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
 import com.google.cloud.dataflow.sdk.util.common.worker.WorkProgressUpdater;
 
@@ -115,7 +115,7 @@ private boolean doWork(WorkItem workItem) throws IOException {
         worker = MapTaskExecutorFactory.create(options, workItem.getMapTask(), executionContext);
 
       } else if (workItem.getSourceOperationTask() != null) {
-        worker = SourceOperationExecutorFactory.create(workItem.getSourceOperationTask());
+        worker = SourceOperationExecutorFactory.create(options, workItem.getSourceOperationTask());
 
       } else {
         throw new RuntimeException("unknown kind of work item: " + workItem.toString());
@@ -150,13 +150,13 @@ private boolean doWork(WorkItem workItem) throws IOException {
 
       // TODO: Find out a generic way for the WorkExecutor to report work-specific results
       // into the work update.
-      CustomSourceFormat.OperationResponse sourceOperationResponse =
+      SourceFormat.OperationResponse operationResponse =
           (worker instanceof SourceOperationExecutor)
               ? cloudSourceOperationResponseToSourceOperationResponse(
                   ((SourceOperationExecutor) worker).getResponse())
               : null;
       reportStatus(
-          options, "Success", workItem, counters, metrics, sourceOperationResponse, null/*errors*/);
+          options, "Success", workItem, counters, metrics, operationResponse, null/*errors*/);
 
       return true;
 
@@ -219,11 +219,11 @@ private static String buildCloudStackTrace(Throwable t) {
 
   private void reportStatus(DataflowWorkerHarnessOptions options, String status, WorkItem workItem,
       @Nullable CounterSet counters, @Nullable Collection<Metric<?>> metrics,
-      @Nullable CustomSourceFormat.OperationResponse sourceOperationResponse,
+      @Nullable SourceFormat.OperationResponse operationResponse,
       @Nullable List<Status> errors) throws IOException {
     LOG.info("{} processing work item {}", status, uniqueId(workItem));
     WorkItemStatus workItemStatus = buildStatus(workItem, true/*completed*/, counters, metrics,
-        options, null, null, sourceOperationResponse, errors);
+        options, null, null, operationResponse, errors);
     workUnitClient.reportWorkItemStatus(workItemStatus);
   }
 
@@ -231,7 +231,7 @@ static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
       @Nullable CounterSet counters, @Nullable Collection<Metric<?>> metrics,
       DataflowWorkerHarnessOptions options, @Nullable Reader.Progress progress,
       @Nullable Reader.Position stopPosition,
-      @Nullable CustomSourceFormat.OperationResponse sourceOperationResponse,
+      @Nullable SourceFormat.OperationResponse operationResponse,
       @Nullable List<Status> errors) {
     WorkItemStatus status = new WorkItemStatus();
     status.setWorkItemId(Long.toString(workItem.getId()));
@@ -280,7 +280,7 @@ static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
 
     if (workItem.getSourceOperationTask() != null) {
       status.setSourceOperationResponse(
-          sourceOperationResponseToCloudSourceOperationResponse(sourceOperationResponse));
+          sourceOperationResponseToCloudSourceOperationResponse(operationResponse));
     }
 
     return status;
@@ -304,7 +304,7 @@ public abstract static class WorkUnitClient {
      * Reports a {@link WorkItemStatus} for an assigned {@link WorkItem}.
      *
      * @param workItemStatus the status to report
-     * @return a {@link WorkServiceState} (e.g. a new stop position)
+     * @return a {@link WorkItemServiceState} (e.g. a new stop position)
      */
     public abstract WorkItemServiceState reportWorkItemStatus(WorkItemStatus workItemStatus)
         throws IOException;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CustomSourceFormatFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFormatFactory.java
similarity index 71%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CustomSourceFormatFactory.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFormatFactory.java
index 1bb3db228a730..98e6aabd794e5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CustomSourceFormatFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFormatFactory.java
@@ -19,29 +19,31 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 
 import com.google.api.services.dataflow.model.Source;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.worker.CustomSourceFormat;
+import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
 
 import java.util.Map;
 
 /**
- * Creates {@code CustomSourceFormat} objects from {@code Source}.
+ * Creates {@code SourceFormat} objects from {@code Source}.
  */
-public class CustomSourceFormatFactory {
-  private CustomSourceFormatFactory() {}
+public class SourceFormatFactory {
+  private SourceFormatFactory() {}
 
-  public static CustomSourceFormat create(Source source) throws Exception {
+  public static SourceFormat create(PipelineOptions options, Source source) throws Exception {
     Map<String, Object> spec = source.getSpec();
 
     try {
-      return InstanceBuilder.ofType(CustomSourceFormat.class)
+      return InstanceBuilder.ofType(SourceFormat.class)
           .fromClassName(getString(spec, PropertyNames.OBJECT_TYPE_NAME))
+          .withArg(PipelineOptions.class, options)
           .build();
 
     } catch (ClassNotFoundException exn) {
       throw new Exception(
-          "unable to create a custom source format from " + source, exn);
+          "unable to create a source format from " + source, exn);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
index 34bd27966bfc7..3ad4528ee977d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
@@ -22,6 +22,7 @@
 import com.google.api.services.dataflow.model.Source;
 import com.google.api.services.dataflow.model.SourceOperationRequest;
 import com.google.api.services.dataflow.model.SourceOperationResponse;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
 import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
@@ -36,12 +37,15 @@
 public class SourceOperationExecutor extends WorkExecutor {
   private static final Logger LOG = LoggerFactory.getLogger(MapTaskExecutor.class);
 
+  private final PipelineOptions options;
   private final SourceOperationRequest request;
   private SourceOperationResponse response;
 
-  public SourceOperationExecutor(SourceOperationRequest request,
+  public SourceOperationExecutor(PipelineOptions options,
+                                 SourceOperationRequest request,
                                  CounterSet counters) {
     super(counters);
+    this.options = options;
     this.request = request;
   }
 
@@ -58,11 +62,10 @@ public void execute() throws Exception {
       throw new UnsupportedOperationException("Unknown source operation");
     }
 
-    this.response =
-        sourceOperationResponseToCloudSourceOperationResponse(
-            CustomSourceFormatFactory.create(sourceSpec)
-                .performSourceOperation(
-                    cloudSourceOperationRequestToSourceOperationRequest(request)));
+    this.response = sourceOperationResponseToCloudSourceOperationResponse(
+        SourceFormatFactory.create(options, sourceSpec)
+            .performSourceOperation(
+                cloudSourceOperationRequestToSourceOperationRequest(request)));
 
     LOG.debug("Source operation execution complete");
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutorFactory.java
index 10c862e464875..3da3d6ab94e08 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutorFactory.java
@@ -17,15 +17,16 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import com.google.api.services.dataflow.model.SourceOperationRequest;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 
 /**
  * Creates a SourceOperationExecutor from a SourceOperation.
  */
 public class SourceOperationExecutorFactory {
-  public static SourceOperationExecutor create(SourceOperationRequest request)
-      throws Exception {
+  public static SourceOperationExecutor create(
+      PipelineOptions options, SourceOperationRequest request) throws Exception {
     CounterSet counters = new CounterSet();
-    return new SourceOperationExecutor(request, counters);
+    return new SourceOperationExecutor(options, request, counters);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
index 0f7ce18c480f5..07229941a82af 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
@@ -28,8 +28,8 @@
 import com.google.api.services.dataflow.model.SourceOperationRequest;
 import com.google.api.services.dataflow.model.SourceOperationResponse;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.worker.CustomSourceFormat;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -50,19 +50,19 @@ public static Reader.Position cloudPositionToReaderPosition(@Nullable Position c
     return cloudPosition == null ? null : new DataflowReaderPosition(cloudPosition);
   }
 
-  public static CustomSourceFormat.OperationRequest
+  public static SourceFormat.OperationRequest
       cloudSourceOperationRequestToSourceOperationRequest(
           @Nullable SourceOperationRequest request) {
     return request == null ? null : new DataflowSourceOperationRequest(request);
   }
 
-  public static CustomSourceFormat.OperationResponse
+  public static SourceFormat.OperationResponse
       cloudSourceOperationResponseToSourceOperationResponse(
           @Nullable SourceOperationResponse response) {
     return response == null ? null : new DataflowSourceOperationResponse(response);
   }
 
-  public static CustomSourceFormat.SourceSpec cloudSourceToSourceSpec(
+  public static SourceFormat.SourceSpec cloudSourceToSourceSpec(
       @Nullable Source cloudSource) {
     return cloudSource == null ? null : new DataflowSourceSpec(cloudSource);
   }
@@ -76,17 +76,18 @@ public static Position sourcePositionToCloudPosition(@Nullable Reader.Position s
     return sourcePosition == null ? null : ((DataflowReaderPosition) sourcePosition).cloudPosition;
   }
 
+
   public static SourceOperationRequest sourceOperationRequestToCloudSourceOperationRequest(
-      @Nullable CustomSourceFormat.OperationRequest request) {
+      @Nullable SourceFormat.OperationRequest request) {
     return (request == null) ? null : ((DataflowSourceOperationRequest) request).cloudRequest;
   }
 
   public static SourceOperationResponse sourceOperationResponseToCloudSourceOperationResponse(
-      @Nullable CustomSourceFormat.OperationResponse response) {
+      @Nullable SourceFormat.OperationResponse response) {
     return (response == null) ? null : ((DataflowSourceOperationResponse) response).cloudResponse;
   }
 
-  public static Source sourceSpecToCloudSource(@Nullable CustomSourceFormat.SourceSpec spec) {
+  public static Source sourceSpecToCloudSource(@Nullable SourceFormat.SourceSpec spec) {
     return (spec == null) ? null : ((DataflowSourceSpec) spec).cloudSource;
   }
 
@@ -104,40 +105,40 @@ public DataflowReaderPosition(Position cloudPosition) {
     }
   }
 
-  static class DataflowSourceOperationRequest implements CustomSourceFormat.OperationRequest {
+  static class DataflowSourceOperationRequest implements SourceFormat.OperationRequest {
     public final SourceOperationRequest cloudRequest;
     public DataflowSourceOperationRequest(SourceOperationRequest cloudRequest) {
       this.cloudRequest = cloudRequest;
     }
   }
 
-  static class DataflowSourceOperationResponse implements CustomSourceFormat.OperationResponse {
+  static class DataflowSourceOperationResponse implements SourceFormat.OperationResponse {
     public final SourceOperationResponse cloudResponse;
     public DataflowSourceOperationResponse(SourceOperationResponse cloudResponse) {
       this.cloudResponse = cloudResponse;
     }
   }
 
-  static class DataflowSourceSpec implements CustomSourceFormat.SourceSpec {
+  static class DataflowSourceSpec implements SourceFormat.SourceSpec {
     public final Source cloudSource;
     public DataflowSourceSpec(Source cloudSource) {
       this.cloudSource = cloudSource;
     }
   }
 
-  // Represents a cloud Source as a dictionary for encoding inside the CUSTOM_SOURCE
+  // Represents a cloud Source as a dictionary for encoding inside the {@code SOURCE_STEP_INPUT}
   // property of CloudWorkflowStep.input.
   public static Map<String, Object> cloudSourceToDictionary(Source source) {
     // Do not translate encoding - the source's encoding is translated elsewhere
     // to the step's output info.
     Map<String, Object> res = new HashMap<>();
-    addDictionary(res, PropertyNames.CUSTOM_SOURCE_SPEC, source.getSpec());
+    addDictionary(res, PropertyNames.SOURCE_SPEC, source.getSpec());
     if (source.getMetadata() != null) {
-      addDictionary(res, PropertyNames.CUSTOM_SOURCE_METADATA,
+      addDictionary(res, PropertyNames.SOURCE_METADATA,
           cloudSourceMetadataToDictionary(source.getMetadata()));
     }
     if (source.getDoesNotNeedSplitting() != null) {
-      addBoolean(res, PropertyNames.CUSTOM_SOURCE_DOES_NOT_NEED_SPLITTING,
+      addBoolean(res, PropertyNames.SOURCE_DOES_NOT_NEED_SPLITTING,
           source.getDoesNotNeedSplitting());
     }
     return res;
@@ -147,22 +148,22 @@ private static Map<String, Object> cloudSourceMetadataToDictionary(SourceMetadat
     Map<String, Object> res = new HashMap<>();
     if (metadata.getProducesSortedKeys() != null) {
       addBoolean(
-          res, PropertyNames.CUSTOM_SOURCE_PRODUCES_SORTED_KEYS, metadata.getProducesSortedKeys());
+          res, PropertyNames.SOURCE_PRODUCES_SORTED_KEYS, metadata.getProducesSortedKeys());
     }
     if (metadata.getEstimatedSizeBytes() != null) {
       addLong(
-          res, PropertyNames.CUSTOM_SOURCE_ESTIMATED_SIZE_BYTES, metadata.getEstimatedSizeBytes());
+          res, PropertyNames.SOURCE_ESTIMATED_SIZE_BYTES, metadata.getEstimatedSizeBytes());
     }
     if (metadata.getInfinite() != null) {
-      addBoolean(res, PropertyNames.CUSTOM_SOURCE_IS_INFINITE, metadata.getInfinite());
+      addBoolean(res, PropertyNames.SOURCE_IS_INFINITE, metadata.getInfinite());
     }
     return res;
   }
 
   public static Source dictionaryToCloudSource(Map<String, Object> params) throws Exception {
     Source res = new Source();
-    res.setSpec(getDictionary(params, PropertyNames.CUSTOM_SOURCE_SPEC));
-    // CUSTOM_SOURCE_METADATA and CUSTOM_SOURCE_DOES_NOT_NEED_SPLITTING do not have to be
+    res.setSpec(getDictionary(params, PropertyNames.SOURCE_SPEC));
+    // SOURCE_METADATA and SOURCE_DOES_NOT_NEED_SPLITTING do not have to be
     // translated, because they only make sense in cloud Source objects produced by the user.
     return res;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
index bcb4d343a35a4..e2d6ee6b6a72a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.api.services.dataflow.model.Source;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory;
 
 import java.util.HashMap;
@@ -54,9 +55,10 @@ public static Source flattenBaseSpecs(Source source) {
    * Creates a {@link com.google.cloud.dataflow.sdk.util.common.worker.Reader}
    * from the given Dataflow Source API definition and reads all elements from it.
    */
-  public static <T> List<T> readElemsFromSource(Source source) {
+
+  public static <T> List<T> readElemsFromSource(PipelineOptions options, Source source) {
     try {
-      return ReaderUtils.readElemsFromReader(ReaderFactory.<T>create(null, source, null));
+      return ReaderUtils.readElemsFromReader(ReaderFactory.<T>create(options, source, null));
     } catch (Exception e) {
       throw new RuntimeException("Failed to read from source: " + source.toString(), e);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
index c7a5a307f1965..26b260bd50f2d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
@@ -31,13 +31,13 @@ public class PropertyNames {
   public static final String COMBINE_FN = "combine_fn";
   public static final String COMPONENT_ENCODINGS = "component_encodings";
   public static final String CUSTOM_SOURCE_FORMAT = "custom_source";
-  public static final String CUSTOM_SOURCE_STEP_INPUT = "custom_source_step_input";
-  public static final String CUSTOM_SOURCE_SPEC = "spec";
-  public static final String CUSTOM_SOURCE_METADATA = "metadata";
-  public static final String CUSTOM_SOURCE_DOES_NOT_NEED_SPLITTING = "does_not_need_splitting";
-  public static final String CUSTOM_SOURCE_PRODUCES_SORTED_KEYS = "produces_sorted_keys";
-  public static final String CUSTOM_SOURCE_IS_INFINITE = "is_infinite";
-  public static final String CUSTOM_SOURCE_ESTIMATED_SIZE_BYTES = "estimated_size_bytes";
+  public static final String SOURCE_STEP_INPUT = "custom_source_step_input";
+  public static final String SOURCE_SPEC = "spec";
+  public static final String SOURCE_METADATA = "metadata";
+  public static final String SOURCE_DOES_NOT_NEED_SPLITTING = "does_not_need_splitting";
+  public static final String SOURCE_PRODUCES_SORTED_KEYS = "produces_sorted_keys";
+  public static final String SOURCE_IS_INFINITE = "is_infinite";
+  public static final String SOURCE_ESTIMATED_SIZE_BYTES = "estimated_size_bytes";
   public static final String ELEMENT = "element";
   public static final String ELEMENTS = "elements";
   public static final String ENCODING = "encoding";
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TestCredential.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TestCredential.java
index fa02a6bf3185b..6b9bc3b53ecb1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TestCredential.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TestCredential.java
@@ -19,6 +19,7 @@
 import com.google.api.client.auth.oauth2.BearerToken;
 import com.google.api.client.auth.oauth2.Credential;
 import com.google.api.client.auth.oauth2.TokenResponse;
+import com.google.api.client.testing.http.MockHttpTransport;
 
 import java.io.IOException;
 
@@ -35,7 +36,8 @@ public TestCredential() {
 
   public TestCredential(String token) {
     super(new Builder(
-        BearerToken.authorizationHeaderAccessMethod()));
+        BearerToken.authorizationHeaderAccessMethod())
+        .setTransport(new MockHttpTransport()));
     this.token = token;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index acd5d6468b691..e97cf2acb3daf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -157,7 +157,7 @@ public void run() {
       try {
         // Force a progress update at the beginning and at the end.
         synchronized (sourceIteratorLock) {
-          progress.set(readerIterator.getProgress());
+          setProgressFromIterator();
         }
         while (true) {
           Object value;
@@ -172,14 +172,14 @@ public void run() {
               value = readerIterator.next();
 
               if (isProgressUpdateRequested.getAndSet(false) || progressUpdatePeriodMs == 0) {
-                progress.set(readerIterator.getProgress());
+                setProgressFromIterator();
               }
             }
           }
           receiver.process(value);
         }
         synchronized (sourceIteratorLock) {
-          progress.set(readerIterator.getProgress());
+          setProgressFromIterator();
         }
       } finally {
         synchronized (sourceIteratorLock) {
@@ -193,6 +193,17 @@ public void run() {
     }
   }
 
+  private void setProgressFromIterator() {
+    try {
+      progress.set(readerIterator.getProgress());
+    } catch (UnsupportedOperationException e) {
+      // Ignore: same semantics as null.
+    } catch (Exception e) {
+      // This is not a normal situation, but should not kill the task.
+      LOG.warn("Progress estimation failed", e);
+    }
+  }
+
   /**
    * Returns a (possibly slightly stale) value of the progress of the task.
    * Guaranteed to not block indefinitely.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
index 2ecef5b4cc869..2b805c6f1d1fd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
@@ -71,7 +71,8 @@ public interface ReaderIterator<T> extends AutoCloseable {
      * called concurrently to any other methods.
      *
      * @return the progress, or {@code null} if no progress measure
-     * can be provided
+     * can be provided (implementors are discouraged from throwing
+     * {@code UnsupportedOperationException} in this case).
      */
     public Progress getProgress();
 
@@ -91,7 +92,8 @@ public interface ReaderIterator<T> extends AutoCloseable {
      * @param proposedStopPosition a proposed position to stop
      * iterating through the source
      * @return the new stop position, or {@code null} on failure if the
-     * implementation does not support position updates.
+     * implementation does not support position updates(implementors are discouraged
+     * from throwing {@code UnsupportedOperationException} in this case).
      */
     public Position updateStopPosition(Progress proposedStopPosition);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CustomSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/SourceFormat.java
similarity index 98%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CustomSourceFormat.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/SourceFormat.java
index 12cdf30ac468a..8b65c90877cb6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CustomSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/SourceFormat.java
@@ -20,7 +20,7 @@
  * An interface for sources which can perform operations on source specifications, such as
  * splitting the source and computing its metadata. See {@code SourceOperationRequest} for details.
  */
-public interface CustomSourceFormat {
+public interface SourceFormat {
   /**
    * Performs an operation on the specification of a source.
    * See {@code SourceOperationRequest} for details.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
index fba8ec20b3fd0..279a4f16b9047 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
@@ -17,13 +17,25 @@
 package com.google.cloud.dataflow.sdk.io;
 
 import static org.junit.Assert.assertEquals;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
 
+import com.google.api.services.datastore.DatastoreV1;
 import com.google.api.services.datastore.DatastoreV1.Entity;
 import com.google.api.services.datastore.DatastoreV1.Query;
+import com.google.api.services.datastore.client.Datastore;
+import com.google.api.services.datastore.client.DatastoreHelper;
+import com.google.api.services.datastore.client.QuerySplitter;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.EntityCoder;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.util.TestCredential;
+import com.google.common.base.Supplier;
 
 import org.junit.Before;
 import org.junit.Test;
@@ -31,12 +43,14 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.util.ArrayList;
+import java.util.List;
+
 /**
  * Tests for DatastoreIO Read and Write transforms.
  */
 @RunWith(JUnit4.class)
 public class DatastoreIOTest {
-
   private String host;
   private String datasetId;
   private Query query;
@@ -72,9 +86,8 @@ public void setUp() {
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testBuildRead() throws Exception {
-    DatastoreIO.Read.Bound readQuery = DatastoreIO.Read
-        .withHost(this.host)
-        .from(this.datasetId, this.query);
+    DatastoreIO.Source readQuery =
+        DatastoreIO.read().withHost(this.host).withDataset(this.datasetId).withQuery(this.query);
     assertEquals(this.query, readQuery.query);
     assertEquals(this.datasetId, readQuery.datasetId);
     assertEquals(this.host, readQuery.host);
@@ -82,45 +95,87 @@ public void testBuildRead() throws Exception {
 
   @Test
   public void testBuildReadAlt() throws Exception {
-    DatastoreIO.Read.Bound readQuery = DatastoreIO.Read
-        .from(this.datasetId, this.query)
-        .withHost(this.host);
+    DatastoreIO.Source readQuery =
+        DatastoreIO.read().withDataset(this.datasetId).withQuery(this.query).withHost(this.host);
     assertEquals(this.query, readQuery.query);
     assertEquals(this.datasetId, readQuery.datasetId);
     assertEquals(this.host, readQuery.host);
   }
 
-  @Test(expected = IllegalStateException.class)
-  public void testBuildReadWithoutDatastoreSettingToCatchException()
-      throws Exception {
+  @Test(expected = NullPointerException.class)
+  public void testBuildReadWithoutDatastoreSettingToCatchException() throws Exception {
     // create pipeline and run the pipeline to get result
     Pipeline p = DirectPipeline.createForTest();
-    p.apply(DatastoreIO.Read.named("ReadDatastore"));
+    p.apply(ReadSource.from(DatastoreIO.read().withHost(null)));
+  }
+
+  @Test
+  public void testQuerySplitWithMockSplitter() throws Exception {
+    String dataset = "mydataset";
+    DatastoreV1.KindExpression mykind =
+        DatastoreV1.KindExpression.newBuilder().setName("mykind").build();
+    Query query = Query.newBuilder().addKind(mykind).build();
+
+    DataflowPipelineOptions options = PipelineOptionsFactory.create()
+        .as(DataflowPipelineOptions.class);
+    options.setGcpCredential(new TestCredential());
+
+    List<Query> mockSplits = new ArrayList<>();
+    for (int i = 0; i < 8; ++i) {
+      mockSplits.add(
+          Query.newBuilder()
+              .addKind(mykind)
+              .setFilter(
+                  DatastoreHelper.makeFilter("foo", DatastoreV1.PropertyFilter.Operator.EQUAL,
+                      DatastoreV1.Value.newBuilder().setIntegerValue(i).build()))
+              .build());
+    }
+
+    QuerySplitter splitter = mock(QuerySplitter.class);
+    when(splitter.getSplits(any(Query.class), eq(8), any(Datastore.class))).thenReturn(mockSplits);
+
+    DatastoreIO.Source io =
+        DatastoreIO.read()
+            .withDataset(dataset)
+            .withQuery(query)
+            .withMockSplitter(splitter)
+            .withMockEstimateSizeBytes(new Supplier<Long>() {
+              @Override
+              public Long get() {
+                return 8 * 1024L;
+              }
+            });
+
+    List<DatastoreIO.Source> shards = io.splitIntoShards(1024, options);
+    assertEquals(8, shards.size());
+    for (int i = 0; i < 8; ++i) {
+      DatastoreIO.Source shard = shards.get(i);
+      Query shardQuery = shard.query;
+      assertEquals("mykind", shardQuery.getKind(0).getName());
+      assertEquals(i, shardQuery.getFilter().getPropertyFilter().getValue().getIntegerValue());
+    }
   }
 
   @Test
   public void testBuildWrite() throws Exception {
-    DatastoreIO.Write.Bound write = DatastoreIO.Write
-        .to(this.datasetId)
-        .withHost(this.host);
-    assertEquals(this.host, write.host);
-    assertEquals(this.datasetId, write.datasetId);
+    DatastoreIO.Sink sink = DatastoreIO.write().to(this.datasetId).withHost(this.host);
+    assertEquals(this.host, sink.host);
+    assertEquals(this.datasetId, sink.datasetId);
   }
 
   @Test
   public void testBuildWriteAlt() throws Exception {
-    DatastoreIO.Write.Bound write = DatastoreIO.Write
-        .withHost(this.host)
-        .to(this.datasetId);
-    assertEquals(this.host, write.host);
-    assertEquals(this.datasetId, write.datasetId);
+    DatastoreIO.Sink sink = DatastoreIO.write().withHost(this.host).to(this.datasetId);
+    assertEquals(this.host, sink.host);
+    assertEquals(this.datasetId, sink.datasetId);
   }
 
   @Test(expected = IllegalStateException.class)
   public void testBuildWriteWithoutDatastoreToCatchException() throws Exception {
     // create pipeline and run the pipeline to get result
     Pipeline p = DirectPipeline.createForTest();
-    p.apply(Create.<Entity>of()).setCoder(EntityCoder.of())
-        .apply(DatastoreIO.Write.named("WriteDatastore"));
+    p.apply(Create.<Entity>of())
+        .setCoder(EntityCoder.of())
+        .apply(DatastoreIO.write().named("WriteDatastore"));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
new file mode 100644
index 0000000000000..91d0003eea20c
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -0,0 +1,218 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.dataflow;
+
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceOperationRequestToSourceOperationRequest;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.dictionaryToCloudSource;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceOperationResponseToCloudSourceOperationResponse;
+import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
+import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.contains;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.api.services.dataflow.model.Job;
+import com.google.api.services.dataflow.model.SourceOperationRequest;
+import com.google.api.services.dataflow.model.SourceSplitRequest;
+import com.google.api.services.dataflow.model.SourceSplitResponse;
+import com.google.api.services.dataflow.model.SourceSplitShard;
+import com.google.api.services.dataflow.model.Step;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.io.ReadSource;
+import com.google.cloud.dataflow.sdk.io.Source;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import javax.annotation.Nullable;
+
+/**
+ * Tests for {@code BasicSerializableSourceFormat}.
+ */
+@RunWith(JUnit4.class)
+public class BasicSerializableSourceFormatTest {
+  static class TestIO {
+    public static Read fromRange(int from, int to) {
+      return new Read(from, to);
+    }
+
+    static class Read extends Source<Integer> {
+      final int from;
+      final int to;
+
+      Read(int from, int to) {
+        this.from = from;
+        this.to = to;
+      }
+
+      @Override
+      public List<Read> splitIntoShards(long desiredShardSizeBytes, PipelineOptions options)
+          throws Exception {
+        List<Read> res = new ArrayList<>();
+        DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
+        float step = 1.0f * (to - from) / dataflowOptions.getNumWorkers();
+        for (int i = 0; i < dataflowOptions.getNumWorkers(); ++i) {
+          res.add(new Read(Math.round(from + i * step), Math.round(from + (i + 1) * step)));
+        }
+        return res;
+      }
+
+      @Override
+      public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
+        return 8 * (to - from);
+      }
+
+      @Override
+      public boolean producesSortedKeys(PipelineOptions options) throws Exception {
+        return true;
+      }
+
+      @Override
+      public Reader<Integer> createBasicReader(
+          PipelineOptions options, Coder<Integer> coder,
+          @Nullable ExecutionContext executionContext) throws IOException {
+        return new RangeReader(from, to);
+      }
+
+      @Override
+      public void validate() {}
+
+      @Override
+      public Coder<Integer> getDefaultOutputCoder() {
+        return BigEndianIntegerCoder.of();
+      }
+
+      private class RangeReader implements Reader<Integer> {
+        private int to;
+        private int current;
+
+        public RangeReader(int from, int to) {
+          this.to = to;
+          this.current = from - 1;
+        }
+
+        @Override
+        public boolean advance() throws IOException {
+          current++;
+          return (current < to);
+        }
+
+        @Override
+        public Integer getCurrent() {
+          return current;
+        }
+
+        @Override
+        public void close() throws IOException {
+          // Nothing
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testSplitAndReadShardsBack() throws Exception {
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    options.setNumWorkers(5);
+    com.google.api.services.dataflow.model.Source source = translateIOToCloudSource(
+        TestIO.fromRange(10, 20), options);
+    List<WindowedValue<Integer>> elems = CloudSourceUtils.readElemsFromSource(options, source);
+    assertEquals(10, elems.size());
+    for (int i = 0; i < 10; ++i) {
+      assertEquals(WindowedValue.valueInGlobalWindow(10 + i), elems.get(i));
+    }
+    SourceSplitResponse response = performSplit(source, options);
+    assertEquals("SOURCE_SPLIT_OUTCOME_SPLITTING_HAPPENED", response.getOutcome());
+    List<SourceSplitShard> shards = response.getShards();
+    assertEquals(5, shards.size());
+    for (int i = 0; i < 5; ++i) {
+      SourceSplitShard shard = shards.get(i);
+      assertEquals("SOURCE_DERIVATION_MODE_INDEPENDENT", shard.getDerivationMode());
+      com.google.api.services.dataflow.model.Source shardSource = shard.getSource();
+      assertTrue(shardSource.getDoesNotNeedSplitting());
+      shardSource.setCodec(source.getCodec());
+      List<WindowedValue<Integer>> xs = CloudSourceUtils.readElemsFromSource(options, shardSource);
+      assertThat(
+          xs,
+          contains(
+              WindowedValue.valueInGlobalWindow(10 + 2 * i),
+              WindowedValue.valueInGlobalWindow(11 + 2 * i)));
+    }
+  }
+
+  private static com.google.api.services.dataflow.model.Source translateIOToCloudSource(
+      TestIO.Read io, DataflowPipelineOptions options) throws Exception {
+    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
+    Pipeline p = Pipeline.create(options);
+    p.begin().apply(ReadSource.from(io));
+
+    Job workflow = translator.translate(p, new ArrayList<DataflowPackage>());
+    Step step = workflow.getSteps().get(0);
+
+    return stepToCloudSource(step);
+  }
+
+  private static com.google.api.services.dataflow.model.Source stepToCloudSource(Step step)
+      throws Exception {
+    com.google.api.services.dataflow.model.Source res = dictionaryToCloudSource(
+        getDictionary(step.getProperties(), PropertyNames.SOURCE_STEP_INPUT));
+    // Encoding is specified in the step, not in the source itself.  This is
+    // normal: incoming Dataflow API Source objects in map tasks will have the
+    // encoding filled in from the step's output encoding.
+    CloudObject encoding = CloudObject.fromSpec(getObject(
+        // TODO: This should be done via a Structs accessor.
+        ((List<Map<String, Object>>) step.getProperties().get(PropertyNames.OUTPUT_INFO)).get(0),
+        PropertyNames.ENCODING));
+    res.setCodec(encoding);
+    return res;
+  }
+
+  private SourceSplitResponse performSplit(
+      com.google.api.services.dataflow.model.Source source, PipelineOptions options)
+      throws Exception {
+    SourceSplitRequest splitRequest = new SourceSplitRequest();
+    splitRequest.setSource(source);
+    SourceOperationRequest request = new SourceOperationRequest();
+    request.setSplit(splitRequest);
+    SourceFormat.OperationRequest request1 =
+        cloudSourceOperationRequestToSourceOperationRequest(request);
+    SourceFormat.OperationResponse response =
+        new BasicSerializableSourceFormat(options).performSourceOperation(request1);
+    return sourceOperationResponseToCloudSourceOperationResponse(response).getSplit();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
index 18248ef3183ad..f524e74744bfe 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
@@ -129,7 +129,7 @@ private TableDataList rawDataList(TableRow... rows) {
   }
 
   @Test
-  public void testRead() throws IOException {
+  public void testRead() throws Exception {
     onTableGet(basicTableSchema());
 
     // BQ API data is always encoded as a string
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java
new file mode 100644
index 0000000000000..93eb27cb57036
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.api.services.datastore.client.DatastoreHelper.makeProperty;
+import static com.google.api.services.datastore.client.DatastoreHelper.makeValue;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.argThat;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.google.api.services.datastore.DatastoreV1.Entity;
+import com.google.api.services.datastore.DatastoreV1.EntityResult;
+import com.google.api.services.datastore.DatastoreV1.EntityResult.ResultType;
+import com.google.api.services.datastore.DatastoreV1.Query;
+import com.google.api.services.datastore.DatastoreV1.QueryResultBatch;
+import com.google.api.services.datastore.DatastoreV1.QueryResultBatch.MoreResultsType;
+import com.google.api.services.datastore.DatastoreV1.RunQueryRequest;
+import com.google.api.services.datastore.DatastoreV1.RunQueryResponse;
+import com.google.api.services.datastore.client.Datastore;
+import com.google.api.services.datastore.client.DatastoreException;
+import com.google.cloud.dataflow.sdk.io.DatastoreIO;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.ArgumentMatcher;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Unit tests for {@code DatastoreSource}.
+ */
+@RunWith(JUnit4.class)
+public class DatastoreReaderTest {
+  private static final String TEST_HOST = "http://localhost:8080";
+  private static final String TEST_KIND = "mykind";
+  private static final String TEST_DATASET = "mydataset";
+  private static final String TEST_PROPERTY = "myproperty";
+
+  private static class IsValidRequest extends ArgumentMatcher<RunQueryRequest> {
+    @Override
+    public boolean matches(Object o) {
+      RunQueryRequest request = (RunQueryRequest) o;
+      return request.hasQuery();
+    }
+  }
+
+  private EntityResult createEntityResult(String kind, String val) {
+    Entity entity = Entity.newBuilder().addProperty(
+        makeProperty(TEST_PROPERTY, makeValue(val))).build();
+    return EntityResult.newBuilder().setEntity(entity).build();
+  }
+
+  private Datastore buildMockDatastore() throws DatastoreException {
+    Datastore datastore = mock(Datastore.class);
+    RunQueryResponse.Builder firstResponseBuilder = RunQueryResponse.newBuilder();
+    RunQueryResponse.Builder secondResponseBuilder = RunQueryResponse.newBuilder();
+    RunQueryResponse.Builder thirdResponseBuilder = RunQueryResponse.newBuilder();
+    {
+      QueryResultBatch.Builder resultsBatch = QueryResultBatch.newBuilder();
+      resultsBatch.addEntityResult(0, createEntityResult(TEST_KIND, "val0"));
+      resultsBatch.addEntityResult(1, createEntityResult(TEST_KIND, "val1"));
+      resultsBatch.addEntityResult(2, createEntityResult(TEST_KIND, "val2"));
+      resultsBatch.addEntityResult(3, createEntityResult(TEST_KIND, "val3"));
+      resultsBatch.addEntityResult(4, createEntityResult(TEST_KIND, "val4"));
+      resultsBatch.setEntityResultType(ResultType.FULL);
+
+      resultsBatch.setMoreResults(MoreResultsType.NOT_FINISHED);
+
+      firstResponseBuilder.setBatch(resultsBatch.build());
+    }
+    {
+      QueryResultBatch.Builder resultsBatch = QueryResultBatch.newBuilder();
+      resultsBatch.addEntityResult(0, createEntityResult(TEST_KIND, "val5"));
+      resultsBatch.addEntityResult(1, createEntityResult(TEST_KIND, "val6"));
+      resultsBatch.addEntityResult(2, createEntityResult(TEST_KIND, "val7"));
+      resultsBatch.addEntityResult(3, createEntityResult(TEST_KIND, "val8"));
+      resultsBatch.addEntityResult(4, createEntityResult(TEST_KIND, "val9"));
+      resultsBatch.setEntityResultType(ResultType.FULL);
+
+      resultsBatch.setMoreResults(MoreResultsType.NOT_FINISHED);
+
+      secondResponseBuilder.setBatch(resultsBatch.build());
+    }
+    {
+      QueryResultBatch.Builder resultsBatch = QueryResultBatch.newBuilder();
+      resultsBatch.setEntityResultType(ResultType.FULL);
+
+      resultsBatch.setMoreResults(MoreResultsType.NO_MORE_RESULTS);
+
+      thirdResponseBuilder.setBatch(resultsBatch.build());
+    }
+    when(datastore.runQuery(argThat(new IsValidRequest())))
+        .thenReturn(firstResponseBuilder.build())
+        .thenReturn(secondResponseBuilder.build())
+        .thenReturn(thirdResponseBuilder.build());
+    return datastore;
+  }
+
+
+  @Test
+  public void testRead() throws Exception {
+    Datastore datastore = buildMockDatastore();
+
+    Query.Builder q = Query.newBuilder();
+    q.addKindBuilder().setName(TEST_KIND);
+    Query query = q.build();
+
+    DatastoreIO.DatastoreReader iterator = new DatastoreIO.DatastoreReader(query, datastore);
+
+    List<Entity> entityResults = new ArrayList<Entity>();
+    while (iterator.advance()) {
+      entityResults.add(iterator.getCurrent());
+    }
+
+    assertEquals(10, entityResults.size());
+    for (int i = 0; i < 10; i++) {
+      assertNotNull(entityResults.get(i).getPropertyList());
+      assertEquals(entityResults.get(i).getPropertyList().size(), 1);
+      assertTrue(entityResults.get(i).getPropertyList().get(0).hasValue());
+      assertTrue(entityResults.get(i).getPropertyList().get(0).getValue().hasStringValue());
+      assertEquals(
+          entityResults.get(i).getPropertyList().get(0).getValue().getStringValue(), "val" + i);
+    }
+  }
+}

From 6cd24e2bc7ce82bd60710633a1791b48b253efe7 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 12 Jan 2015 09:28:58 -0800
Subject: [PATCH 0079/1541] Update the worker harness to request one task for
 each available processor.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=83762900
---
 .../runners/worker/DataflowWorkerHarness.java | 52 ++++++++++++++++---
 .../worker/DataflowWorkerHarnessTest.java     | 17 +++---
 2 files changed, 56 insertions(+), 13 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index f46a8d4a00039..ed197b3b15147 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -19,6 +19,7 @@
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudDuration;
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudTime;
 
+import com.google.api.client.util.Lists;
 import com.google.api.client.util.Preconditions;
 import com.google.api.services.dataflow.Dataflow;
 import com.google.api.services.dataflow.model.LeaseWorkItemRequest;
@@ -40,7 +41,9 @@
 import com.google.common.collect.ImmutableList;
 
 import org.joda.time.DateTime;
+import org.joda.time.DateTimeUtils;
 import org.joda.time.Duration;
+import org.joda.time.format.ISODateTimeFormat;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.slf4j.MDC;
@@ -49,6 +52,9 @@
 import java.lang.Thread.UncaughtExceptionHandler;
 import java.util.Collections;
 import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.CompletionService;
+import java.util.concurrent.ExecutorCompletionService;
 
 import javax.annotation.concurrent.ThreadSafe;
 
@@ -92,17 +98,49 @@ public static void main(String[] args) throws Exception {
     Thread.currentThread().setUncaughtExceptionHandler(WorkerUncaughtExceptionHandler.INSTANCE);
     new DataflowWorkerLoggingInitializer().initialize();
 
-    DataflowWorker worker = createFromSystemProperties();
-    processWork(worker);
+    DataflowWorkerHarnessOptions pipelineOptions =
+        PipelineOptionsFactory.createFromSystemProperties();
+    final DataflowWorker worker = create(pipelineOptions);
+    processWork(pipelineOptions, worker);
   }
 
   // Visible for testing.
-  static void processWork(DataflowWorker worker) throws IOException {
-    worker.getAndPerformWork();
-  }
+  static void processWork(DataflowWorkerHarnessOptions pipelineOptions,
+      final DataflowWorker worker) {
+
+    long startTime = DateTimeUtils.currentTimeMillis();
+    int numThreads = Math.max(Runtime.getRuntime().availableProcessors() - 1, 1);
+    CompletionService<Boolean> completionService =
+        new ExecutorCompletionService<>(pipelineOptions.getExecutorService());
+    for (int i = 0; i < numThreads; ++i) {
+      completionService.submit(new Callable<Boolean>() {
+        @Override
+        public Boolean call() throws Exception {
+          return worker.getAndPerformWork();
+        }
+      });
+    }
+
+    List<Long> completionTimes = Lists.newArrayList();
+    for (int i = 0; i < numThreads; ++i) {
+      try {
+        // CompletionService returns the tasks in the order in which the completed at.
+        completionService.take().get();
+      } catch (Exception e) {
+        LOG.error("Failed waiting on thread to process work.", e);
+      }
+      completionTimes.add(DateTimeUtils.currentTimeMillis());
+    }
 
-  static DataflowWorker createFromSystemProperties() {
-    return create(PipelineOptionsFactory.createFromSystemProperties());
+    long endTime = DateTimeUtils.currentTimeMillis();
+    LOG.info("processWork() start time: {}, end time: {}",
+        ISODateTimeFormat.dateTime().print(startTime),
+        ISODateTimeFormat.dateTime().print(endTime));
+    for (long completionTime : completionTimes) {
+      LOG.info("Duration: {}ms Wasted Time: {}ms",
+          completionTime - startTime,
+          endTime - completionTime);
+    }
   }
 
   static DataflowWorker create(DataflowWorkerHarnessOptions options) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
index d1d369fe99acd..e167aab7cf1ba 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
@@ -19,6 +19,7 @@
 import static org.junit.Assert.assertNull;
 import static org.mockito.Matchers.anyString;
 import static org.mockito.Mockito.doCallRealMethod;
+import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.verifyNoMoreInteractions;
 import static org.mockito.Mockito.when;
@@ -64,6 +65,7 @@ public class DataflowWorkerHarnessTest {
   @Mock private MockHttpTransport transport;
   @Mock private MockLowLevelHttpRequest request;
   @Mock private DataflowWorker mockDataflowWorker;
+  private DataflowWorkerHarnessOptions pipelineOptions;
 
   private Dataflow service;
 
@@ -74,21 +76,24 @@ public void setUp() throws Exception {
     doCallRealMethod().when(request).getContentAsString();
 
     service = new Dataflow(transport, Transport.getJsonFactory(), null);
+    pipelineOptions = PipelineOptionsFactory.as(DataflowWorkerHarnessOptions.class);
   }
 
   @Test
-  public void testThatWeOnlyProcessWorkOnce() throws Exception {
+  public void testThatWeOnlyProcessWorkOncePerAvailableProcessor() throws Exception {
+    int numWorkers = Math.max(Runtime.getRuntime().availableProcessors() - 1, 1);
     when(mockDataflowWorker.getAndPerformWork()).thenReturn(true);
-    DataflowWorkerHarness.processWork(mockDataflowWorker);
-    verify(mockDataflowWorker).getAndPerformWork();
+    DataflowWorkerHarness.processWork(pipelineOptions, mockDataflowWorker);
+    verify(mockDataflowWorker, times(numWorkers)).getAndPerformWork();
     verifyNoMoreInteractions(mockDataflowWorker);
   }
 
   @Test
-  public void testThatWeOnlyProcessWorkOnceEvenWhenFailing() throws Exception {
+  public void testThatWeOnlyProcessWorkOncePerAvailableProcessorEvenWhenFailing() throws Exception {
+    int numWorkers = Math.max(Runtime.getRuntime().availableProcessors() - 1, 1);
     when(mockDataflowWorker.getAndPerformWork()).thenReturn(false);
-    DataflowWorkerHarness.processWork(mockDataflowWorker);
-    verify(mockDataflowWorker).getAndPerformWork();
+    DataflowWorkerHarness.processWork(pipelineOptions, mockDataflowWorker);
+    verify(mockDataflowWorker, times(numWorkers)).getAndPerformWork();
     verifyNoMoreInteractions(mockDataflowWorker);
   }
 

From e8d903972989f950b8c174248e5f2577fb903575 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 14 Jan 2015 10:01:06 -0800
Subject: [PATCH 0080/1541] Updated command line parsing such that unrecognized
 or malformed command line options will result in an error. To ignore unknown
 or malformed command line options instead, call withoutStrictParsing() within
 the PipelineOptionsFactory when constructing PipelineOptions.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=83947457
---
 .../sdk/options/PipelineOptionsFactory.java   | 134 ++++++++++++------
 .../options/PipelineOptionsFactoryTest.java   |  80 ++++++++++-
 2 files changed, 164 insertions(+), 50 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 250baa62af277..5de1fa06cbbfe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -128,6 +128,10 @@ public static <T extends PipelineOptions> T as(Class<T> klass) {
    * List style properties are able to be bound to {@code boolean[]}, {@code char[]},
    * {@code short[]}, {@code int[]}, {@code long[]}, {@code float[]}, {@code double[]},
    * {@code String[]} and {@code List<String>}.
+   * <p>
+   * By default, strict parsing is enabled and arguments must conform to be either
+   * {@code --booleanArgName} or {@code --argName=argValue}. Strict parsing can be disabled with
+   * {@link Builder#withoutStrictParsing()}.
    */
   public static Builder fromArgs(String[] args) {
     return new Builder(getAppName(3)).fromArgs(args);
@@ -148,16 +152,19 @@ public static class Builder {
     private final String defaultAppName;
     private final String[] args;
     private final boolean validation;
+    private final boolean strictParsing;
 
     // Do not allow direct instantiation
     private Builder(String defaultAppName) {
-      this(defaultAppName, null, false);
+      this(defaultAppName, null, false, true);
     }
 
-    private Builder(String defaultAppName, String[] args, boolean validation) {
+    private Builder(String defaultAppName, String[] args, boolean validation,
+        boolean strictParsing) {
       this.defaultAppName = defaultAppName;
       this.args = args;
       this.validation = validation;
+      this.strictParsing = strictParsing;
     }
 
     /**
@@ -178,10 +185,14 @@ private Builder(String defaultAppName, String[] args, boolean validation) {
      * List style properties are able to be bound to {@code boolean[]}, {@code char[]},
      * {@code short[]}, {@code int[]}, {@code long[]}, {@code float[]}, {@code double[]},
      * {@code String[]} and {@code List<String>}.
+     * <p>
+     * By default, strict parsing is enabled and arguments must conform to be either
+     * {@code --booleanArgName} or {@code --argName=argValue}. Strict parsing can be disabled with
+     * {@link Builder#withoutStrictParsing()}.
      */
     public Builder fromArgs(String[] args) {
       Preconditions.checkNotNull(args, "Arguments should not be null.");
-      return new Builder(defaultAppName, args, validation);
+      return new Builder(defaultAppName, args, validation, strictParsing);
     }
 
     /**
@@ -191,7 +202,15 @@ public Builder fromArgs(String[] args) {
      * validation.
      */
     public Builder withValidation() {
-      return new Builder(defaultAppName, args, true);
+      return new Builder(defaultAppName, args, true, strictParsing);
+    }
+
+    /**
+     * During parsing of the arguments, we will skip over improperly formatted and unknown
+     * arguments.
+     */
+    public Builder withoutStrictParsing() {
+      return new Builder(defaultAppName, args, validation, false);
     }
 
     /**
@@ -218,9 +237,9 @@ public <T extends PipelineOptions> T as(Class<T> klass) {
 
       // Attempt to parse the arguments into the set of initial options to use
       if (args != null) {
-        ListMultimap<String, String> options = parseCommandLine(args);
+        ListMultimap<String, String> options = parseCommandLine(args, strictParsing);
         LOG.debug("Provided Arguments: {}", options);
-        initialOptions = parseObjects(klass, options);
+        initialOptions = parseObjects(klass, options, strictParsing);
       }
 
       // Create our proxy
@@ -814,20 +833,35 @@ public boolean apply(Method input) {
    * <p> List style properties are able to be bound to {@code boolean[]}, {@code char[]},
    * {@code short[]}, {@code int[]}, {@code long[]}, {@code float[]}, {@code double[]},
    * {@code String[]}, and {@code List<String>}.
+   *
+   * <p> If strict parsing is enabled, options must start with '--', and not have an empty argument
+   * name or value based upon the positioning of the '='.
    */
-  private static ListMultimap<String, String> parseCommandLine(String[] args) {
+  private static ListMultimap<String, String> parseCommandLine(
+      String[] args, boolean strictParsing) {
     ImmutableListMultimap.Builder<String, String> builder = ImmutableListMultimap.builder();
     for (String arg : args) {
-      Preconditions.checkArgument(arg.startsWith("--"),
-          "Unknown argument %s in command line %s", arg, Arrays.toString(args));
-      int index = arg.indexOf("=");
-      // Make sure that '=' isn't the first character after '--' or the last character
-      Preconditions.checkArgument(index != 2 && index != arg.length() - 1,
-          "Unknown argument %s in command line %s", arg, Arrays.toString(args));
-      if (index > 0) {
-        builder.put(arg.substring(2, index), arg.substring(index + 1, arg.length()));
-      } else {
-        builder.put(arg.substring(2), "true");
+      try {
+        Preconditions.checkArgument(arg.startsWith("--"),
+            "Argument '%s' does not begin with '--'", arg);
+        int index = arg.indexOf("=");
+        // Make sure that '=' isn't the first character after '--' or the last character
+        Preconditions.checkArgument(index != 2,
+            "Argument '%s' starts with '--=', empty argument name not allowed", arg);
+        Preconditions.checkArgument(index != arg.length() - 1,
+            "Argument '%s' ends with '=', empty argument value not allowed", arg);
+        if (index > 0) {
+          builder.put(arg.substring(2, index), arg.substring(index + 1, arg.length()));
+        } else {
+          builder.put(arg.substring(2), "true");
+        }
+      } catch (IllegalArgumentException e) {
+        if (strictParsing) {
+          throw e;
+        } else {
+          LOG.warn("Strict parsing is disabled, ignoring option '{}' because {}",
+              arg, e.getMessage());
+        }
       }
     }
     return builder.build();
@@ -842,9 +876,12 @@ private static ListMultimap<String, String> parseCommandLine(String[] args) {
    * <p>
    * We special case the "runner" option. It is mapped to the class of the {@link PipelineRunner}
    * based off of the {@link PipelineRunner}s simple class name.
+   * <p>
+   * If strict parsing is enabled, unknown options or options which can not be converted to
+   * the expected java type using an {@link ObjectMapper} will be ignored.
    */
   private static <T extends PipelineOptions> Map<String, Object> parseObjects(
-      Class<T> klass, ListMultimap<String, String> options) {
+      Class<T> klass, ListMultimap<String, String> options, boolean strictParsing) {
     Map<String, Method> propertyNamesToGetters = Maps.newHashMap();
     PipelineOptionsFactory.validateWellFormed(klass, getRegisteredOptions());
     @SuppressWarnings("unchecked")
@@ -856,33 +893,40 @@ private static <T extends PipelineOptions> Map<String, Object> parseObjects(
     }
     Map<String, Object> convertedOptions = Maps.newHashMap();
     for (Map.Entry<String, Collection<String>> entry : options.asMap().entrySet()) {
-      if (!propertyNamesToGetters.containsKey(entry.getKey())) {
-        LOG.warn("Ignoring argument {}={}", entry.getKey(), entry.getValue());
-        continue;
-      }
-
-      Method method = propertyNamesToGetters.get(entry.getKey());
-      JavaType type = MAPPER.getTypeFactory().constructType(method.getGenericReturnType());
-      if ("runner".equals(entry.getKey())) {
-        String runner = Iterables.getOnlyElement(entry.getValue());
-        Preconditions.checkArgument(SUPPORTED_PIPELINE_RUNNERS.containsKey(runner),
-            "Unknown 'runner' specified %s, supported pipeline runners %s",
-            runner, Sets.newTreeSet(SUPPORTED_PIPELINE_RUNNERS.keySet()));
-        convertedOptions.put("runner", SUPPORTED_PIPELINE_RUNNERS.get(runner));
-      } else if (method.getReturnType().isArray()
-          || Collection.class.isAssignableFrom(method.getReturnType())) {
-        // Split any strings with ","
-        List<String> values = FluentIterable.from(entry.getValue())
-            .transformAndConcat(new Function<String, Iterable<String>>() {
-              @Override
-              public Iterable<String> apply(String input) {
-                return Arrays.asList(input.split(","));
-              }
-        }).toList();
-        convertedOptions.put(entry.getKey(), MAPPER.convertValue(values, type));
-      } else {
-        String value = Iterables.getOnlyElement(entry.getValue());
-        convertedOptions.put(entry.getKey(), MAPPER.convertValue(value, type));
+      try {
+        Preconditions.checkArgument(propertyNamesToGetters.containsKey(entry.getKey()),
+            "Class %s missing a property named '%s'", klass, entry.getKey());
+
+        Method method = propertyNamesToGetters.get(entry.getKey());
+        JavaType type = MAPPER.getTypeFactory().constructType(method.getGenericReturnType());
+        if ("runner".equals(entry.getKey())) {
+          String runner = Iterables.getOnlyElement(entry.getValue());
+          Preconditions.checkArgument(SUPPORTED_PIPELINE_RUNNERS.containsKey(runner),
+              "Unknown 'runner' specified '%s', supported pipeline runners %s",
+              runner, Sets.newTreeSet(SUPPORTED_PIPELINE_RUNNERS.keySet()));
+          convertedOptions.put("runner", SUPPORTED_PIPELINE_RUNNERS.get(runner));
+        } else if (method.getReturnType().isArray()
+            || Collection.class.isAssignableFrom(method.getReturnType())) {
+          // Split any strings with ","
+          List<String> values = FluentIterable.from(entry.getValue())
+              .transformAndConcat(new Function<String, Iterable<String>>() {
+                @Override
+                public Iterable<String> apply(String input) {
+                  return Arrays.asList(input.split(","));
+                }
+          }).toList();
+          convertedOptions.put(entry.getKey(), MAPPER.convertValue(values, type));
+        } else {
+          String value = Iterables.getOnlyElement(entry.getValue());
+          convertedOptions.put(entry.getKey(), MAPPER.convertValue(value, type));
+        }
+      } catch (IllegalArgumentException e) {
+        if (strictParsing) {
+          throw e;
+        } else {
+          LOG.warn("Strict parsing is disabled, ignoring option '{}' with value '{}' because {}",
+              entry.getKey(), entry.getValue(), e.getMessage());
+        }
       }
     }
     return convertedOptions;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index d5fb27d9203c1..8ec5eb25e125b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -24,6 +24,7 @@
 
 import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
 import com.google.cloud.dataflow.sdk.testing.RestoreSystemProperties;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
@@ -44,6 +45,7 @@
 public class PipelineOptionsFactoryTest {
   @Rule public ExpectedException expectedException = ExpectedException.none();
   @Rule public TestRule restoreSystemProperties = new RestoreSystemProperties();
+  @Rule public ExpectedLogs expectedLogs = ExpectedLogs.none(PipelineOptionsFactory.class);
 
   @Test
   public void testAutomaticRegistrationOfPipelineOptions() {
@@ -494,6 +496,15 @@ public void testSetASingularAttributeUsingAListThrowsAnError() {
     PipelineOptionsFactory.fromArgs(args).create();
   }
 
+  @Test
+  public void testSetASingularAttributeUsingAListIsIgnoredWithoutStrictParsing() {
+    String[] args = new String[] {
+        "--diskSizeGb=100",
+        "--diskSizeGb=200"};
+    expectedLogs.expectWarn("Strict parsing is disabled, ignoring option");
+    PipelineOptionsFactory.fromArgs(args).withoutStrictParsing().create();
+  }
+
   @Test
   public void testSettingRunner() {
     String[] args = new String[] {"--runner=BlockingDataflowPipelineRunner"};
@@ -504,12 +515,71 @@ public void testSettingRunner() {
 
   @Test
   public void testSettingUnknownRunner() {
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("Unknown 'runner' specified UnknownRunner, supported pipeline "
-        + "runners [BlockingDataflowPipelineRunner, DataflowPipelineRunner, DirectPipelineRunner]");
     String[] args = new String[] {"--runner=UnknownRunner"};
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Unknown 'runner' specified 'UnknownRunner', supported "
+        + "pipeline runners [BlockingDataflowPipelineRunner, DataflowPipelineRunner, "
+        + "DirectPipelineRunner]");
+    PipelineOptionsFactory.fromArgs(args).create();
+  }
 
-    PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create();
-    options.getRunner();
+  @Test
+  public void testUsingArgumentWithUnknownPropertyIsNotAllowed() {
+    String[] args = new String[] {"--unknownProperty=value"};
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("missing a property named 'unknownProperty'");
+    PipelineOptionsFactory.fromArgs(args).create();
+  }
+
+  @Test
+  public void testUsingArgumentWithUnknownPropertyIsIgnoredWithoutStrictParsing() {
+    String[] args = new String[] {"--unknownProperty=value"};
+    expectedLogs.expectWarn("missing a property named 'unknownProperty'");
+    PipelineOptionsFactory.fromArgs(args).withoutStrictParsing().create();
+  }
+
+  @Test
+  public void testUsingArgumentWithoutValueIsNotAllowed() {
+    String[] args = new String[] {"--diskSizeGb="};
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Argument '--diskSizeGb=' ends with '='");
+    PipelineOptionsFactory.fromArgs(args).create();
+  }
+
+  @Test
+  public void testUsingArgumentWithoutValueIsIgnoredWithoutStrictParsing() {
+    String[] args = new String[] {"--diskSizeGb="};
+    expectedLogs.expectWarn("Strict parsing is disabled, ignoring option");
+    PipelineOptionsFactory.fromArgs(args).withoutStrictParsing().create();
+  }
+
+  @Test
+  public void testUsingArgumentStartingWithIllegalCharacterIsNotAllowed() {
+    String[] args = new String[] {" --diskSizeGb=100"};
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Argument ' --diskSizeGb=100' does not begin with '--'");
+    PipelineOptionsFactory.fromArgs(args).create();
+  }
+
+  @Test
+  public void testUsingArgumentStartingWithIllegalCharacterIsIgnoredWithoutStrictParsing() {
+    String[] args = new String[] {" --diskSizeGb=100"};
+    expectedLogs.expectWarn("Strict parsing is disabled, ignoring option");
+    PipelineOptionsFactory.fromArgs(args).withoutStrictParsing().create();
+  }
+
+  @Test
+  public void testUsingArgumentWithInvalidNameIsNotAllowed() {
+    String[] args = new String[] {"--=100"};
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Argument '--=100' starts with '--='");
+    PipelineOptionsFactory.fromArgs(args).create();
+  }
+
+  @Test
+  public void testUsingArgumentWithInvalidNameIsIgnoredWithoutStrictParsing() {
+    String[] args = new String[] {"--=100"};
+    expectedLogs.expectWarn("Strict parsing is disabled, ignoring option");
+    PipelineOptionsFactory.fromArgs(args).withoutStrictParsing().create();
   }
 }

From b7acea01f5c0d99ef096854b5fe18618dd8ca9aa Mon Sep 17 00:00:00 2001
From: amyu <amyu@google.com>
Date: Wed, 14 Jan 2015 15:05:07 -0800
Subject: [PATCH 0081/1541] Change join example to use non-static tags. []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=83977086

---
 .../cloud/dataflow/examples/JoinExamples.java | 41 +++++++++----------
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
index 5f910d6dfbcf4..803de8aad7f71 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
@@ -60,27 +60,6 @@ public class JoinExamples {
   // A table that maps country codes to country names.
   private static final String COUNTRY_CODES =
       "gdelt-bq:full.crosswalk_geocountrycodetohuman";
-  private static final TupleTag<String> eventInfoTag = new TupleTag<String>();
-  private static final TupleTag<String> countryInfoTag = new TupleTag<String>();
-
-  /**
-   * Process the CoGbkResult elements generated by the CoGroupByKey transform.
-   */
-  static class ProcessJoinFn extends DoFn<KV<String, CoGbkResult>, KV<String, String>> {
-
-    @Override
-    public void processElement(ProcessContext c) {
-      KV<String, CoGbkResult> e = c.element();
-      CoGbkResult val = e.getValue();
-      String countryCode = e.getKey();
-      String countryName = "none";
-      countryName = e.getValue().getOnly(countryInfoTag);
-      for (String eventInfo : c.element().getValue().getAll(eventInfoTag)) {
-        // Generate a string that combines information from both collection values
-        c.output(KV.of(countryCode, "Country name: " + countryName + ", Event info: " + eventInfo));
-     }
-    }
-  }
 
   /**
    * Join two collections, using country code as the key.
@@ -88,6 +67,9 @@ public void processElement(ProcessContext c) {
   static PCollection<String> joinEvents(PCollection<TableRow> eventsTable,
       PCollection<TableRow> countryCodes) throws Exception {
 
+    final TupleTag<String> eventInfoTag = new TupleTag<String>();
+    final TupleTag<String> countryInfoTag = new TupleTag<String>();
+
     // transform both input collections to tuple collections, where the keys are country
     // codes in both cases.
     PCollection<KV<String, String>> eventInfo = eventsTable.apply(
@@ -101,9 +83,24 @@ static PCollection<String> joinEvents(PCollection<TableRow> eventsTable,
         .and(countryInfoTag, countryInfo)
         .apply(CoGroupByKey.<String>create());
 
+    // Process the CoGbkResult elements generated by the CoGroupByKey transform.
     // country code 'key' -> string of <event info>, <country name>
     PCollection<KV<String, String>> finalResultCollection =
-      kvpCollection.apply(ParDo.of(new ProcessJoinFn()));
+      kvpCollection.apply(ParDo.of(new DoFn<KV<String, CoGbkResult>, KV<String, String>>() {
+        @Override
+        public void processElement(ProcessContext c) {
+           KV<String, CoGbkResult> e = c.element();
+           CoGbkResult val = e.getValue();
+           String countryCode = e.getKey();
+           String countryName = "none";
+           countryName = e.getValue().getOnly(countryInfoTag);
+           for (String eventInfo : c.element().getValue().getAll(eventInfoTag)) {
+             // Generate a string that combines information from both collection values
+             c.output(KV.of(countryCode, "Country name: " + countryName
+                     + ", Event info: " + eventInfo));
+          }
+        }
+      }));
 
     // write to GCS
     PCollection<String> formattedResults = finalResultCollection

From 0d62c34e098543838f73c7e0f6474c654fcdfffa Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 15 Jan 2015 10:08:16 -0800
Subject: [PATCH 0082/1541] Undo deprecation of PTransform.getInput and
 PTransform.getOutput. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=84051898

---
 .../com/google/cloud/dataflow/sdk/transforms/PTransform.java  | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
index 6fc2618dcc021..d7df4977a3992 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
@@ -237,9 +237,7 @@ public Pipeline getPipeline() {
    * Returns the input of this transform.
    *
    * @throws IllegalStateException if this PTransform hasn't been applied yet
-   * @deprecated Use pipeline.getInput(transform)
    */
-  @Deprecated
   public Input getInput() {
     @SuppressWarnings("unchecked")
     Input input = (Input) getPipeline().getInput(this);
@@ -250,9 +248,7 @@ public Input getInput() {
    * Returns the output of this transform.
    *
    * @throws IllegalStateException if this PTransform hasn't been applied yet
-   * #deprecated use pipeline.getOutput(transform)
    */
-  @Deprecated
   public Output getOutput() {
     @SuppressWarnings("unchecked")
     Output output = (Output) getPipeline().getOutput(this);

From d828500122b3fcd4382ab1bc8f29ec1a7b775665 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 15 Jan 2015 11:23:11 -0800
Subject: [PATCH 0083/1541] Add support for Google API tracing to the SDK.

To help debug communication issues between the SDK and Google services, a token may be given to you by a Googler which will record the inputs/outputs of some or all API calls between the SDK and Google services.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=84058790
---
 .../dataflow/sdk/options/GcpOptions.java      |   2 +-
 .../sdk/options/GoogleApiDebugOptions.java    | 103 ++++++++++++++
 .../cloud/dataflow/sdk/util/Transport.java    |  48 ++++++-
 .../options/GoogleApiDebugOptionsTest.java    | 134 ++++++++++++++++++
 4 files changed, 280 insertions(+), 7 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
index 959a4fc77f3fb..52354d92a0c35 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
@@ -56,7 +56,7 @@
  * application default credentials</a> falling back to gcloud. The other options can be
  * used by setting the corresponding properties.
  */
-public interface GcpOptions extends PipelineOptions {
+public interface GcpOptions extends GoogleApiDebugOptions, PipelineOptions {
   /**
    * Project id to use when launching jobs.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
new file mode 100644
index 0000000000000..72e4bae0032d8
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import com.google.api.client.googleapis.services.AbstractGoogleClient;
+import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
+import com.google.api.client.googleapis.services.GoogleClientRequestInitializer;
+import com.google.common.base.Preconditions;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import java.io.IOException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * These options configure debug settings for Google API clients generated by the Dataflow SDK.
+ */
+public interface GoogleApiDebugOptions extends PipelineOptions {
+  /**
+   * This option enables tracing of API calls to Google services used within the Dataflow SDK.
+   * A tracing token must be requested from Google to be able to use this option.
+   * An invalid tracing token will result in 400 errors from Google when the API is invoked.
+   */
+  GoogleApiTracer[] getGoogleApiTrace();
+  void setGoogleApiTrace(GoogleApiTracer[] commands);
+
+  /**
+   * A {@link GoogleClientRequestInitializer} which adds the 'trace' token to Google API calls.
+   */
+  public static class GoogleApiTracer implements GoogleClientRequestInitializer {
+    private static final Pattern COMMAND_LINE_PATTERN = Pattern.compile("([^#]*)#(.*)");
+    /**
+     * Creates a {@link GoogleApiTracer} which sets the trace {@code token} on all
+     * calls which match the given client type.
+     */
+    public static GoogleApiTracer create(AbstractGoogleClient client, String token) {
+      return new GoogleApiTracer(client.getClass().getCanonicalName(), token);
+    }
+
+    /**
+     * Creates a {@link GoogleApiTracer} which sets the trace {@code token} on all
+     * calls which match for the given request type.
+     */
+    public static GoogleApiTracer create(AbstractGoogleClientRequest<?> request, String token) {
+      return new GoogleApiTracer(request.getClass().getCanonicalName(), token);
+    }
+
+    /**
+     * Creates a {@link GoogleClientRequestInitializer} which adds the trace token
+     * based upon the passed in value.
+     * <p>
+     * The {@code value} represents a string containing {@code ApiName#TracingToken}.
+     * The {@code ApiName} is used to match against the request classes
+     * {@link Class#getCanonicalName() canonical name} for which to add the {@code TracingToken} to.
+     * For example, to match:
+     * <ul>
+     *   <li>all Google API calls: {@code #TracingToken}
+     *   <li>all Dataflow API calls: {@code Dataflow#TracingToken}
+     *   <li>all Dataflow V1B3 API calls: {@code Dataflow.V1b3#TracingToken}
+     *   <li>all Dataflow V1B3 Jobs API calls: {@code Dataflow.V1b3.Projects.Jobs#TracingToken}
+     *   <li>all Dataflow V1B3 Jobs Get calls: {@code Dataflow.V1b3.Projects.Jobs.Get#TracingToken}
+     *   <li>all Job creation calls in any version: {@code Jobs.Create#TracingToken}
+     * </ul>
+     */
+    @JsonCreator
+    public static GoogleApiTracer create(String value) {
+      Matcher matcher = COMMAND_LINE_PATTERN.matcher(value);
+      Preconditions.checkArgument(matcher.find() && matcher.groupCount() == 2,
+          "Unable to parse '%s', expected format 'ClientRequestName#Token'", value);
+      return new GoogleApiTracer(matcher.group(1), matcher.group(2));
+    }
+
+    private final String clientRequestName;
+    private final String token;
+
+    private GoogleApiTracer(String clientRequestName, String token) {
+      this.clientRequestName = clientRequestName;
+      this.token = token;
+    }
+
+    @Override
+    public void initialize(AbstractGoogleClientRequest<?> request) throws IOException {
+      if (request.getClass().getCanonicalName().contains(clientRequestName)) {
+        request.set("trace", token);
+      }
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
index d1accb2ac191f..c0c9485247f66 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
@@ -17,6 +17,9 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
+import com.google.api.client.googleapis.services.AbstractGoogleClient.Builder;
+import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
+import com.google.api.client.googleapis.services.GoogleClientRequestInitializer;
 import com.google.api.client.http.HttpTransport;
 import com.google.api.client.json.JsonFactory;
 import com.google.api.client.json.jackson2.JacksonFactory;
@@ -31,6 +34,7 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.GcsOptions;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
+import com.google.common.base.MoreObjects;
 
 import java.io.IOException;
 import java.net.MalformedURLException;
@@ -78,10 +82,12 @@ public static JsonFactory getJsonFactory() {
       newBigQueryClient(BigQueryOptions options) {
     return new Bigquery.Builder(getTransport(), getJsonFactory(),
         new RetryHttpRequestInitializer(options.getGcpCredential()))
-        .setApplicationName(options.getAppName());
+        .setApplicationName(options.getAppName())
+        .setGoogleClientRequestInitializer(
+            new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
   }
 
-/**
+  /**
    * Returns a Pubsub client builder.
    * <p>
    * Note: this client's endpoint is <b>not</b> modified by the
@@ -91,7 +97,9 @@ public static JsonFactory getJsonFactory() {
       newPubsubClient(StreamingOptions options) {
     return new Pubsub.Builder(getTransport(), getJsonFactory(),
         new RetryHttpRequestInitializer(options.getGcpCredential()))
-        .setApplicationName(options.getAppName());
+        .setApplicationName(options.getAppName())
+        .setGoogleClientRequestInitializer(
+            new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
   }
 
   /**
@@ -116,7 +124,9 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
         new RetryHttpRequestInitializer(options.getGcpCredential()))
         .setApplicationName(options.getAppName())
         .setRootUrl(rootUrl)
-        .setServicePath(servicePath);
+        .setServicePath(servicePath)
+        .setGoogleClientRequestInitializer(
+            new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
   }
 
   /**
@@ -126,7 +136,9 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
   public static Dataflow.Builder
       newRawDataflowClient(DataflowPipelineOptions options) {
     return newDataflowClient(options)
-        .setHttpRequestInitializer(options.getGcpCredential());
+        .setHttpRequestInitializer(options.getGcpCredential())
+        .setGoogleClientRequestInitializer(
+            new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
   }
 
   /**
@@ -142,6 +154,30 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
             // Do not log the code 404. Code up the stack will deal with 404's if needed, and
             // logging it by default clutters the output during file staging.
             options.getGcpCredential(), NanoClock.SYSTEM, Sleeper.DEFAULT, Arrays.asList(404)))
-        .setApplicationName(options.getAppName());
+        .setApplicationName(options.getAppName())
+        .setGoogleClientRequestInitializer(
+            new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
+  }
+
+  /**
+   * Allows multiple {@link GoogleClientRequestInitializer}s to be chained together for use with
+   * {@link Builder}.
+   */
+  private static final class ChainedGoogleClientRequestInitializer
+      implements GoogleClientRequestInitializer {
+    private static final GoogleClientRequestInitializer[] EMPTY_ARRAY =
+        new GoogleClientRequestInitializer[]{};
+    private final GoogleClientRequestInitializer[] chain;
+
+    private ChainedGoogleClientRequestInitializer(GoogleClientRequestInitializer... initializer) {
+      this.chain = MoreObjects.firstNonNull(initializer, EMPTY_ARRAY);
+    }
+
+    @Override
+    public void initialize(AbstractGoogleClientRequest<?> request) throws IOException {
+      for (GoogleClientRequestInitializer initializer : chain) {
+        initializer.initialize(request);
+      }
+    }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
new file mode 100644
index 0000000000000..147021a710e12
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+
+import com.google.api.services.bigquery.Bigquery.Datasets.Delete;
+import com.google.api.services.dataflow.Dataflow.V1b3.Projects.Jobs.Create;
+import com.google.api.services.dataflow.Dataflow.V1b3.Projects.Jobs.Get;
+import com.google.cloud.dataflow.sdk.options.GoogleApiDebugOptions.GoogleApiTracer;
+import com.google.cloud.dataflow.sdk.util.TestCredential;
+import com.google.cloud.dataflow.sdk.util.Transport;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link GoogleApiDebugOptions}. */
+@RunWith(JUnit4.class)
+public class GoogleApiDebugOptionsTest {
+  @Test
+  public void testWhenTracingMatches() throws Exception {
+    String[] args = new String[] {"--googleApiTrace=Projects.Jobs.Get#GetTestToken"};
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
+    options.setGcpCredential(new TestCredential());
+
+    assertNotNull(options.getGoogleApiTrace());
+
+    Get request =
+        options.getDataflowClient().v1b3().projects().jobs().get("testProjectId", "testJobId");
+    assertEquals("GetTestToken", request.get("trace"));
+  }
+
+  @Test
+  public void testWhenTracingDoesNotMatch() throws Exception {
+    String[] args = new String[] {"--googleApiTrace=Projects.Jobs.Create#testToken"};
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
+    options.setGcpCredential(new TestCredential());
+
+    assertNotNull(options.getGoogleApiTrace());
+
+    Get request =
+        options.getDataflowClient().v1b3().projects().jobs().get("testProjectId", "testJobId");
+    assertNull(request.get("trace"));
+  }
+
+  @Test
+  public void testWithMultipleTraces() throws Exception {
+    String[] args = new String[] {
+        "--googleApiTrace=Projects.Jobs.Create#CreateTestToken,Projects.Jobs.Get#GetTestToken"};
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
+    options.setGcpCredential(new TestCredential());
+
+    assertNotNull(options.getGoogleApiTrace());
+
+    Get getRequest =
+        options.getDataflowClient().v1b3().projects().jobs().get("testProjectId", "testJobId");
+    assertEquals("GetTestToken", getRequest.get("trace"));
+
+    Create createRequest =
+        options.getDataflowClient().v1b3().projects().jobs().create("testProjectId", null);
+    assertEquals("CreateTestToken", createRequest.get("trace"));
+  }
+
+  @Test
+  public void testMatchingAllDataflowV1b3Calls() throws Exception {
+    String[] args = new String[] {"--googleApiTrace=Dataflow.V1b3#TestToken"};
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
+    options.setGcpCredential(new TestCredential());
+
+    assertNotNull(options.getGoogleApiTrace());
+
+    Get getRequest =
+        options.getDataflowClient().v1b3().projects().jobs().get("testProjectId", "testJobId");
+    assertEquals("TestToken", getRequest.get("trace"));
+
+    Create createRequest =
+        options.getDataflowClient().v1b3().projects().jobs().create("testProjectId", null);
+    assertEquals("TestToken", createRequest.get("trace"));
+  }
+
+  @Test
+  public void testMatchingAgainstClient() throws Exception {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setGcpCredential(new TestCredential());
+    options.setGoogleApiTrace(new GoogleApiTracer[] {
+        GoogleApiTracer.create(Transport.newDataflowClient(options).build(), "TestToken")});
+
+    Get getRequest =
+        options.getDataflowClient().v1b3().projects().jobs().get("testProjectId", "testJobId");
+    assertEquals("TestToken", getRequest.get("trace"));
+
+    Delete deleteRequest = Transport.newBigQueryClient(options).build().datasets()
+        .delete("testProjectId", "testDatasetId");
+    assertNull(deleteRequest.get("trace"));
+  }
+
+  @Test
+  public void testMatchingAgainstRequestType() throws Exception {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setGcpCredential(new TestCredential());
+    options.setGoogleApiTrace(new GoogleApiTracer[] {GoogleApiTracer.create(
+        Transport.newDataflowClient(options).build().v1b3().projects().jobs()
+            .get("aProjectId", "aJobId"), "TestToken")});
+
+    Get getRequest =
+        options.getDataflowClient().v1b3().projects().jobs().get("testProjectId", "testJobId");
+    assertEquals("TestToken", getRequest.get("trace"));
+
+    Create createRequest =
+        options.getDataflowClient().v1b3().projects().jobs().create("testProjectId", null);
+    assertNull(createRequest.get("trace"));
+  }
+}

From 20d27ee687576cdb046181f2545b7f59cdae2730 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 15 Jan 2015 17:34:51 -0800
Subject: [PATCH 0084/1541] Pick windows for elements produced in
 start/finishBundle based on the WindowingFn of the input PCollection

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=84093449
---
 .../runners/DataflowPipelineTranslator.java   | 12 ++--
 .../runners/worker/AssignWindowsParDoFn.java  |  3 +-
 .../sdk/runners/worker/CombineValuesFn.java   |  3 +-
 .../worker/GroupAlsoByWindowsParDoFn.java     |  3 +-
 .../sdk/runners/worker/NormalParDoFn.java     | 21 +++---
 .../cloud/dataflow/sdk/transforms/DoFn.java   | 50 +++++++------
 .../dataflow/sdk/transforms/DoFnTester.java   |  4 +-
 .../cloud/dataflow/sdk/transforms/ParDo.java  |  3 +-
 .../sdk/transforms/windowing/Window.java      |  3 +-
 .../cloud/dataflow/sdk/util/DoFnContext.java  | 68 +++++++++++++-----
 .../cloud/dataflow/sdk/util/DoFnInfo.java     | 47 ++++++++++++
 .../cloud/dataflow/sdk/util/DoFnRunner.java   | 16 +++--
 .../dataflow/sdk/util/WindowedValue.java      |  3 +
 .../worker/MapTaskExecutorFactoryTest.java    |  8 ++-
 .../sdk/runners/worker/NormalParDoFnTest.java | 14 ++--
 .../runners/worker/ParDoFnFactoryTest.java    |  7 +-
 .../dataflow/sdk/transforms/ParDoTest.java    | 71 +++++++++++++++++++
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  |  4 +-
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  4 +-
 19 files changed, 270 insertions(+), 74 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index f948f5c689864..e116965897d3b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -66,7 +66,9 @@
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.OutputReference;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
@@ -88,7 +90,6 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -886,7 +887,7 @@ private <I, O> void translateMultiHelper(
               TranslationContext context) {
             context.addStep(transform, "ParallelDo");
             translateInputs(transform.getInput(), transform.getSideInputs(), context);
-            translateFn(transform.getFn(), context);
+            translateFn(transform.getFn(), transform.getInput().getWindowingFn(), context);
             translateOutputs(transform.getOutput(), context);
           }
         });
@@ -906,7 +907,7 @@ private <I, O> void translateSingleHelper(
               TranslationContext context) {
             context.addStep(transform, "ParallelDo");
             translateInputs(transform.getInput(), transform.getSideInputs(), context);
-            translateFn(transform.getFn(), context);
+            translateFn(transform.getFn(), transform.getInput().getWindowingFn(), context);
             context.addOutput("out", transform.getOutput());
           }
         });
@@ -965,12 +966,13 @@ private static void translateSideInputs(
   }
 
   private static void translateFn(
-      Serializable fn,
+      DoFn fn,
+      WindowingFn windowingFn,
       TranslationContext context) {
     context.addInput(PropertyNames.USER_FN, fn.getClass().getName());
     context.addInput(
         PropertyNames.SERIALIZED_FN,
-        byteArrayToJsonString(serializeToByteArray(fn)));
+        byteArrayToJsonString(serializeToByteArray(new DoFnInfo(fn, windowingFn))));
     if (fn instanceof DoFn.RequiresKeyedState) {
       context.addInput(PropertyNames.USES_KEYED_STATE, "true");
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
index 00558a306ead6..058fba3d2d4f4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
 import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
@@ -77,7 +78,7 @@ private AssignWindowsParDoFn(
       CounterSet.AddCounterMutator addCounterMutator) {
     super(
         options,
-        fn,
+        new DoFnInfo(fn, null),
         PTuple.empty(),
         Arrays.asList("output"),
         stepName,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
index 63c4089f616a2..79ee768263bc3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
@@ -118,7 +119,7 @@ private CombineValuesFn(
       CounterSet.AddCounterMutator addCounterMutator) {
     super(
         options,
-        doFn,
+        new DoFnInfo(doFn, null),
         PTuple.empty(),
         Arrays.asList("output"),
         stepName,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index adf0435e6e981..1903e44a66e5a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -28,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
@@ -109,7 +110,7 @@ private GroupAlsoByWindowsParDoFn(
       CounterSet.AddCounterMutator addCounterMutator) {
     super(
         options,
-        fn,
+        new DoFnInfo(fn, null),
         PTuple.empty(),
         Arrays.asList("output"),
         stepName,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
index 79d729b974d87..31bef7542cdbc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
@@ -21,8 +21,8 @@
 import com.google.api.services.dataflow.model.MultiOutputInfo;
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner.OutputManager;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
@@ -65,10 +65,10 @@ public static NormalParDoFn create(
         SerializableUtils.deserializeFromByteArray(
             getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
             "serialized user fn");
-    if (!(deserializedFn instanceof DoFn)) {
-      throw new Exception("unexpected kind of DoFn: " + deserializedFn.getClass().getName());
+    if (!(deserializedFn instanceof DoFnInfo)) {
+      throw new Exception("unexpected kind of DoFnInfo: " + deserializedFn.getClass().getName());
     }
-    DoFn<Object, Object> fn = (DoFn<Object, Object>) deserializedFn;
+    DoFnInfo fnInfo = (DoFnInfo) deserializedFn;
 
     PTuple sideInputValues = PTuple.empty();
     if (sideInputInfos != null) {
@@ -96,12 +96,12 @@ public static NormalParDoFn create(
           "unexpected number of outputTags for DoFn");
     }
 
-    return new NormalParDoFn(options, fn, sideInputValues, outputTags,
+    return new NormalParDoFn(options, fnInfo, sideInputValues, outputTags,
                              stepName, executionContext, addCounterMutator);
   }
 
   public final PipelineOptions options;
-  public final DoFn<Object, Object> fn;
+  public final DoFnInfo<Object, Object> fnInfo;
   public final PTuple sideInputValues;
   public final TupleTag<Object> mainOutputTag;
   public final List<TupleTag<?>> sideOutputTags;
@@ -113,14 +113,14 @@ public static NormalParDoFn create(
   DoFnRunner<Object, Object, Receiver> fnRunner;
 
   public NormalParDoFn(PipelineOptions options,
-                       DoFn fn,
+                       DoFnInfo fnInfo,
                        PTuple sideInputValues,
                        List<String> outputTags,
                        String stepName,
                        ExecutionContext executionContext,
                        CounterSet.AddCounterMutator addCounterMutator) {
     this.options = options;
-    this.fn = (DoFn<Object, Object>) fn;
+    this.fnInfo = fnInfo;
     this.sideInputValues = sideInputValues;
     if (outputTags.size() < 1) {
       throw new AssertionError("expected at least one output");
@@ -151,7 +151,7 @@ public void startBundle(final Receiver... receivers) throws Exception {
 
     fnRunner = DoFnRunner.create(
         options,
-        fn,
+        fnInfo.getDoFn(),
         sideInputValues,
         new OutputManager<Receiver>() {
           final Map<TupleTag<?>, OutputReceiver> undeclaredOutputs =
@@ -197,7 +197,8 @@ public void output(Receiver receiver, WindowedValue<?> output) {
         mainOutputTag,
         sideOutputTags,
         stepContext,
-        addCounterMutator);
+        addCounterMutator,
+        fnInfo.getWindowingFn());
 
     fnRunner.startBundle();
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index d6057d5b29c65..a62982ceff2db 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -81,13 +81,13 @@ public abstract class Context {
      * element will have the same timestamp and be in the same windows
      * as the input element passed to {@link DoFn#processElement}).
      *
-     * <p> Is is illegal to invoke this from {@link #startBundle} or
-     * {@link #finishBundle} unless the input {@code PCollection} is
-     * windowed by the
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow}.
-     * If this is the case, the output element will have a timestamp
-     * of negative infinity and be in the
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow}.
+     * <p> If invoked from {@link #startBundle} or {@link #finishValue},
+     * this will attempt to use the
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+     * of the input {@code PCollection} to determine what windows the element
+     * should be in, throwing an exception if the {@code WindowingFn} attempts
+     * to access any information about the input element. The output element
+     * will have a timestamp of negative infinity.
      */
     public abstract void output(O output);
 
@@ -100,13 +100,13 @@ public abstract class Context {
      * {@link DoFn#getAllowedTimestampSkew}.  The output element will
      * be in the same windows as the input element.
      *
-     * <p> Is is illegal to invoke this from {@link #startBundle} or
-     * {@link #finishBundle} unless the input {@code PCollection} is
-     * windowed by the
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow}.
-     * If this is the case, the output element's timestamp will be
-     * the given timestamp and its window will be the
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow}.
+     * <p> If invoked from {@link #startBundle} or {@link #finishValue},
+     * this will attempt to use the
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+     * of the input {@code PCollection} to determine what windows the element
+     * should be in, throwing an exception if the {@code WindowingFn} attempts
+     * to access any information about the input element except for the
+     * timestamp.
      */
     public abstract void outputWithTimestamp(O output, Instant timestamp);
 
@@ -122,6 +122,14 @@ public abstract class Context {
      * <p> The output element will have the same timestamp and be in the same
      * windows as the input element passed to {@link DoFn#processElement}).
      *
+     * <p> If invoked from {@link #startBundle} or {@link #finishValue},
+     * this will attempt to use the
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+     * of the input {@code PCollection} to determine what windows the element
+     * should be in, throwing an exception if the {@code WindowingFn} attempts
+     * to access any information about the input element. The output element
+     * will have a timestamp of negative infinity.
+     *
      * @throws IllegalArgumentException if the number of outputs exceeds
      * the limit of 1,000 outputs per DoFn
      * @see ParDo#withOutputTags
@@ -137,13 +145,13 @@ public abstract class Context {
      * {@link DoFn#getAllowedTimestampSkew}.  The output element will
      * be in the same windows as the input element.
      *
-     * <p> Is is illegal to invoke this from {@link #startBundle} or
-     * {@link #finishBundle} unless the input {@code PCollection} is
-     * windowed by the
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow}.
-     * If this is the case, the output element's timestamp will be
-     * the given timestamp and its window will be the
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow}.
+     * <p> If invoked from {@link #startBundle} or {@link #finishValue},
+     * this will attempt to use the
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+     * of the input {@code PCollection} to determine what windows the element
+     * should be in, throwing an exception if the {@code WindowingFn} attempts
+     * to access any information about the input element except for the
+     * timestamp.
      *
      * @throws IllegalArgumentException if the number of outputs exceeds
      * the limit of 1,000 outputs per DoFn
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index 70ae664a6fc20..55d0f8b18b964 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
 import com.google.cloud.dataflow.sdk.util.PTuple;
@@ -353,6 +354,7 @@ void initializeState() {
         mainOutputTag,
         sideOutputTags,
         (new BatchModeExecutionContext()).createStepContext("stepName"),
-        counterSet.getAddCounterMutator());
+        counterSet.getAddCounterMutator(),
+        new GlobalWindow());
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 4ddca9ef7dd12..0af3c3baf8971 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -1039,7 +1039,8 @@ private static <I, O> DoFnRunner<I, O, List> evaluateHelper(
             mainOutputTag,
             sideOutputTags,
             executionContext.getStepContext(name),
-            context.getAddCounterMutator());
+            context.getAddCounterMutator(),
+            input.getWindowingFn());
 
     fnRunner.startBundle();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 71e8b0be3e36d..702476362614b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -273,7 +273,8 @@ private static <T> void evaluateHelper(
             outputTag,
             new ArrayList<TupleTag<?>>(),
             executionContext.getStepContext(context.getStepName(transform)),
-            context.getAddCounterMutator());
+            context.getAddCounterMutator(),
+            transform.fn);
 
     addWindowsRunner.startBundle();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
index 7eca7d5f510bf..115b3f15f5540 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
@@ -22,7 +22,8 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn.AssignContext;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner.OutputManager;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -31,7 +32,6 @@
 
 import org.joda.time.Instant;
 
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
@@ -56,6 +56,7 @@ class DoFnContext<I, O, R> extends DoFn<I, O>.Context {
   final TupleTag<O> mainOutputTag;
   final StepContext stepContext;
   final CounterSet.AddCounterMutator addCounterMutator;
+  final WindowingFn windowingFn;
 
   public DoFnContext(PipelineOptions options,
                      DoFn<I, O> fn,
@@ -64,7 +65,8 @@ public DoFnContext(PipelineOptions options,
                      TupleTag<O> mainOutputTag,
                      List<TupleTag<?>> sideOutputTags,
                      StepContext stepContext,
-                     CounterSet.AddCounterMutator addCounterMutator) {
+                     CounterSet.AddCounterMutator addCounterMutator,
+                     WindowingFn windowingFn) {
     fn.super();
     this.options = options;
     this.fn = fn;
@@ -78,6 +80,7 @@ public DoFnContext(PipelineOptions options,
     }
     this.stepContext = stepContext;
     this.addCounterMutator = addCounterMutator;
+    this.windowingFn = windowingFn;
   }
 
   public R getReceiver(TupleTag<?> tag) {
@@ -109,11 +112,51 @@ public <T> T sideInput(PCollectionView<T, ?> view) {
     return view.fromIterableInternal((Iterable<WindowedValue<?>>) sideInputs.get(tag));
   }
 
+  <T> WindowedValue<T> makeWindowedValue(
+      T output, Instant timestamp, Collection<? extends BoundedWindow> windows) {
+    final Instant inputTimestamp = timestamp;
+
+    if (timestamp == null) {
+      timestamp = new Instant(Long.MIN_VALUE);
+    }
+
+    if (windows == null) {
+      try {
+        windows = windowingFn.assignWindows(windowingFn.new AssignContext() {
+            @Override
+            public Object element() {
+              throw new UnsupportedOperationException(
+                  "WindowingFn attemped to access input element when none was available");
+            }
+
+            @Override
+            public Instant timestamp() {
+              if (inputTimestamp == null) {
+                throw new UnsupportedOperationException(
+                    "WindowingFn attemped to access input timestamp when none was available");
+              }
+              return inputTimestamp;
+            }
+
+            @Override
+            public Collection<? extends BoundedWindow> windows() {
+              throw new UnsupportedOperationException(
+                  "WindowingFn attemped to access input windows when none were available");
+            }
+          });
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    return WindowedValue.of(output, timestamp, windows);
+  }
+
   void outputWindowedValue(
       O output,
       Instant timestamp,
       Collection<? extends BoundedWindow> windows) {
-    WindowedValue<O> windowedElem = WindowedValue.of(output, timestamp, windows);
+    WindowedValue<O> windowedElem = makeWindowedValue(output, timestamp, windows);
     outputManager.output(outputMap.get(mainOutputTag), windowedElem);
     if (stepContext != null) {
       stepContext.noteOutput(windowedElem);
@@ -143,7 +186,7 @@ protected <T> void sideOutputWindowedValue(TupleTag<T> tag,
       outputMap.put(tag, receiver);
     }
 
-    WindowedValue<T> windowedElem = WindowedValue.of(output, timestamp, windows);
+    WindowedValue<T> windowedElem = makeWindowedValue(output, timestamp, windows);
     outputManager.output(receiver, windowedElem);
     if (stepContext != null) {
       stepContext.noteSideOutput(tag, windowedElem);
@@ -153,31 +196,24 @@ protected <T> void sideOutputWindowedValue(TupleTag<T> tag,
   // Following implementations of output, outputWithTimestamp, and sideOutput
   // are only accessible in DoFn.startBundle and DoFn.finishBundle, and will be shadowed by
   // ProcessContext's versions in DoFn.processElement.
-  // TODO: it seems wrong to use Long.MIN_VALUE, since it will violate all our rules about
-  // DoFns preserving watermarks.
   @Override
   public void output(O output) {
-    outputWindowedValue(output,
-                        new Instant(Long.MIN_VALUE),
-                        Arrays.asList(GlobalWindow.Window.INSTANCE));
+    outputWindowedValue(output, null, null);
   }
 
   @Override
   public void outputWithTimestamp(O output, Instant timestamp) {
-    outputWindowedValue(output, timestamp, Arrays.asList(GlobalWindow.Window.INSTANCE));
+    outputWindowedValue(output, timestamp, null);
   }
 
   @Override
   public <T> void sideOutput(TupleTag<T> tag, T output) {
-    sideOutputWindowedValue(tag,
-                            output,
-                            new Instant(Long.MIN_VALUE),
-                            Arrays.asList(GlobalWindow.Window.INSTANCE));
+    sideOutputWindowedValue(tag, output, null, null);
   }
 
   @Override
   public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
-    sideOutputWindowedValue(tag, output, timestamp, Arrays.asList(GlobalWindow.Window.INSTANCE));
+    sideOutputWindowedValue(tag, output, timestamp, null);
   }
 
   private String generateInternalAggregatorName(String userName) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
new file mode 100644
index 0000000000000..1af14d7a130ea
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+
+import java.io.Serializable;
+
+/**
+ * Wrapper class holding the necessary information to serialize a DoFn.
+ *
+ * @param <I> the type of the (main) input elements of the DoFn
+ * @param <O> the type of the (main) output elements of the DoFn
+ */
+public class DoFnInfo<I, O> implements Serializable {
+  private static final long serialVersionUID = 0;
+  private DoFn<I, O> doFn;
+  private WindowingFn<?, ?> windowingFn;
+
+  public DoFnInfo(DoFn<I, O> doFn, WindowingFn<?, ?> windowingFn) {
+    this.doFn = doFn;
+    this.windowingFn = windowingFn;
+  }
+
+  public DoFn<I, O> getDoFn() {
+    return doFn;
+  }
+
+  public WindowingFn<?, ?> getWindowingFn() {
+    return windowingFn;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index d4d788d89af7f..09320962a3f2d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -58,11 +59,12 @@ private DoFnRunner(PipelineOptions options,
                      TupleTag<O> mainOutputTag,
                      List<TupleTag<?>> sideOutputTags,
                      StepContext stepContext,
-                     CounterSet.AddCounterMutator addCounterMutator) {
+                     CounterSet.AddCounterMutator addCounterMutator,
+                     WindowingFn windowingFn) {
     this.fn = fn;
     this.context = new DoFnContext<>(options, fn, sideInputs, outputManager,
                                      mainOutputTag, sideOutputTags, stepContext,
-                                     addCounterMutator);
+                                     addCounterMutator, windowingFn);
   }
 
   public static <I, O, R> DoFnRunner<I, O, R> create(
@@ -73,10 +75,11 @@ public static <I, O, R> DoFnRunner<I, O, R> create(
       TupleTag<O> mainOutputTag,
       List<TupleTag<?>> sideOutputTags,
       StepContext stepContext,
-      CounterSet.AddCounterMutator addCounterMutator) {
+      CounterSet.AddCounterMutator addCounterMutator,
+      WindowingFn windowingFn) {
     return new DoFnRunner<>(
         options, fn, sideInputs, outputManager,
-        mainOutputTag, sideOutputTags, stepContext, addCounterMutator);
+        mainOutputTag, sideOutputTags, stepContext, addCounterMutator, windowingFn);
   }
 
   @SuppressWarnings({"rawtypes", "unchecked"})
@@ -87,7 +90,8 @@ public static <I, O> DoFnRunner<I, O, List> createWithListOutputs(
       TupleTag<O> mainOutputTag,
       List<TupleTag<?>> sideOutputTags,
       StepContext stepContext,
-      CounterSet.AddCounterMutator addCounterMutator) {
+      CounterSet.AddCounterMutator addCounterMutator,
+      WindowingFn windowingFn) {
     return create(
         options, fn, sideInputs,
         new OutputManager<List>() {
@@ -100,7 +104,7 @@ public void output(List list, WindowedValue<?> output) {
             list.add(output);
           }
         },
-        mainOutputTag, sideOutputTags, stepContext, addCounterMutator);
+        mainOutputTag, sideOutputTags, stepContext, addCounterMutator, windowingFn);
   }
 
   /** Calls {@link DoFn#startBundle}. */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index 67daaf0de246a..29f6078baa277 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -87,6 +87,9 @@ public static <V> WindowedValue<V> valueInEmptyWindows(V value) {
   private WindowedValue(V value,
                         Instant timestamp,
                         Collection<? extends BoundedWindow> windows) {
+    checkNotNull(timestamp);
+    checkNotNull(windows);
+
     this.value = value;
     this.timestamp = timestamp;
     this.windows = windows;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index 445755130db5d..cfd89466d845d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -42,9 +42,11 @@
 import com.google.cloud.dataflow.sdk.runners.worker.SinkFactoryTest.TestSink;
 import com.google.cloud.dataflow.sdk.runners.worker.SinkFactoryTest.TestSinkFactory;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
@@ -291,7 +293,8 @@ static ParallelInstruction createParDoInstruction(
     TestDoFn fn = new TestDoFn();
 
     String serializedFn =
-        StringUtils.byteArrayToJsonString(SerializableUtils.serializeToByteArray(fn));
+        StringUtils.byteArrayToJsonString(
+            SerializableUtils.serializeToByteArray(new DoFnInfo(fn, new GlobalWindow())));
 
     CloudObject cloudUserFn = CloudObject.forClassName("DoFn");
     addString(cloudUserFn, PropertyNames.SERIALIZED_FN, serializedFn);
@@ -339,7 +342,8 @@ public void testCreateParDoOperation() throws Exception {
     assertThat(parDoOperation.fn, new IsInstanceOf(NormalParDoFn.class));
     NormalParDoFn normalParDoFn = (NormalParDoFn) parDoOperation.fn;
 
-    assertThat(normalParDoFn.fn, new IsInstanceOf(TestDoFn.class));
+    assertThat(normalParDoFn.fnInfo.getDoFn(),
+               new IsInstanceOf(TestDoFn.class));
 
     assertSame(
         parDoOperation,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
index 2c6811c27b5ea..f4a02679ce838 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
@@ -29,7 +29,9 @@
 
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.UserCodeException;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
@@ -139,6 +141,7 @@ public void testNormalParDoFn() throws Exception {
     List<String> sideOutputTags = Arrays.asList("tag1", "tag2", "tag3");
 
     TestDoFn fn = new TestDoFn(sideOutputTags);
+    DoFnInfo fnInfo = new DoFnInfo(fn, new GlobalWindow());
     TestReceiver receiver = new TestReceiver();
     TestReceiver receiver1 = new TestReceiver();
     TestReceiver receiver2 = new TestReceiver();
@@ -151,7 +154,7 @@ public void testNormalParDoFn() throws Exception {
     outputTags.addAll(sideOutputTags);
     NormalParDoFn normalParDoFn =
         new NormalParDoFn(PipelineOptionsFactory.create(),
-                          fn, sideInputValues, outputTags, "doFn",
+                          fnInfo, sideInputValues, outputTags, "doFn",
                           new BatchModeExecutionContext(),
                           (new CounterSet()).getAddCounterMutator());
 
@@ -203,13 +206,14 @@ public void testNormalParDoFn() throws Exception {
   @Test
   public void testUnexpectedNumberOfReceivers() throws Exception {
     TestDoFn fn = new TestDoFn(Collections.<String>emptyList());
+    DoFnInfo fnInfo = new DoFnInfo(fn, new GlobalWindow());
     TestReceiver receiver = new TestReceiver();
 
     PTuple sideInputValues = PTuple.empty();
     List<String> outputTags = Arrays.asList("output");
     NormalParDoFn normalParDoFn =
         new NormalParDoFn(PipelineOptionsFactory.create(),
-                          fn, sideInputValues, outputTags, "doFn",
+                          fnInfo, sideInputValues, outputTags, "doFn",
                           new BatchModeExecutionContext(),
                           (new CounterSet()).getAddCounterMutator());
 
@@ -241,13 +245,14 @@ private List<String> stackTraceFrameStrings(Throwable t) {
   @Test
   public void testErrorPropagation() throws Exception {
     TestErrorDoFn fn = new TestErrorDoFn();
+    DoFnInfo fnInfo = new DoFnInfo(fn, new GlobalWindow());
     TestReceiver receiver = new TestReceiver();
 
     PTuple sideInputValues = PTuple.empty();
     List<String> outputTags = Arrays.asList("output");
     NormalParDoFn normalParDoFn =
         new NormalParDoFn(PipelineOptionsFactory.create(),
-                          fn, sideInputValues, outputTags, "doFn",
+                          fnInfo, sideInputValues, outputTags, "doFn",
                           new BatchModeExecutionContext(),
                           (new CounterSet()).getAddCounterMutator());
 
@@ -308,9 +313,10 @@ public void testErrorPropagation() throws Exception {
   @Test
   public void testUndeclaredSideOutputs() throws Exception {
     TestDoFn fn = new TestDoFn(Arrays.asList("declared", "undecl1", "undecl2", "undecl3"));
+    DoFnInfo fnInfo = new DoFnInfo(fn, new GlobalWindow());
     CounterSet counters = new CounterSet();
     NormalParDoFn normalParDoFn =
-        new NormalParDoFn(PipelineOptionsFactory.create(), fn, PTuple.empty(),
+        new NormalParDoFn(PipelineOptionsFactory.create(), fnInfo, PTuple.empty(),
                           Arrays.asList("output", "declared"), "doFn",
                           new BatchModeExecutionContext(),
                           counters.getAddCounterMutator());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
index 6ceb61f267708..be31ece009e4b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
@@ -21,8 +21,10 @@
 import com.google.api.services.dataflow.model.MultiOutputInfo;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -69,7 +71,7 @@ public void testCreateNormalParDoFn() throws Exception {
 
     String serializedFn =
         StringUtils.byteArrayToJsonString(
-            SerializableUtils.serializeToByteArray(fn));
+            SerializableUtils.serializeToByteArray(new DoFnInfo(fn, new GlobalWindow())));
 
     CloudObject cloudUserFn = CloudObject.forClassName("DoFn");
     addString(cloudUserFn, "serialized_fn", serializedFn);
@@ -92,8 +94,9 @@ public void testCreateNormalParDoFn() throws Exception {
     Assert.assertThat(parDoFn, new IsInstanceOf(NormalParDoFn.class));
     NormalParDoFn normalParDoFn = (NormalParDoFn) parDoFn;
 
-    DoFn actualDoFn = normalParDoFn.fn;
+    DoFn actualDoFn = normalParDoFn.fnInfo.getDoFn();
     Assert.assertThat(actualDoFn, new IsInstanceOf(TestDoFn.class));
+    Assert.assertThat(normalParDoFn.fnInfo.getWindowingFn(), new IsInstanceOf(GlobalWindow.class));
     TestDoFn actualTestDoFn = (TestDoFn) actualDoFn;
 
     Assert.assertEquals(stringState, actualTestDoFn.stringState);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index c5e699216707b..39494e9647c2a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -40,12 +40,15 @@
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.cloud.dataflow.sdk.values.TupleTagList;
 
@@ -1136,4 +1139,72 @@ public void testParDoShiftTimestampInvalid() {
       // expected
     }
   }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testWindowingInStartAndFinishBundle() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> output = p
+        .apply(Create.timestamped(TimestampedValue.of("elem", new Instant(1))))
+        .apply(Window.<String>into(FixedWindows.of(Duration.millis(1))))
+        .apply(ParDo.of(new DoFn<String, String>() {
+                  @Override
+                  public void startBundle(Context c) {
+                    c.outputWithTimestamp("start", new Instant(2));
+                    System.out.println("Start: 2");
+                  }
+
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    c.output(c.element());
+                    System.out.println("Process: " + c.element() + ":" + c.timestamp().getMillis());
+                  }
+
+                  @Override
+                  public void finishBundle(Context c) {
+                    c.outputWithTimestamp("finish", new Instant(3));
+                    System.out.println("Finish: 3");
+                  }
+                }))
+        .apply(ParDo.of(new DoFn<String, String>() {
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    c.output(c.element() + ":" + c.timestamp().getMillis()
+                        + ":" + c.windows().iterator().next().maxTimestamp().getMillis());
+                  }
+                }));
+
+    DataflowAssert.that(output).containsInAnyOrder("elem:1:1", "start:2:2", "finish:3:3");
+
+    p.run();
+  }
+
+  @Test
+  public void testWindowingInStartBundleException() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> output = p
+        .apply(Create.timestamped(TimestampedValue.of("elem", new Instant(1))))
+        .apply(Window.<String>into(FixedWindows.of(Duration.millis(1))))
+        .apply(ParDo.of(new DoFn<String, String>() {
+                  @Override
+                  public void startBundle(Context c) {
+                    c.output("start");
+                  }
+
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    c.output(c.element());
+                  }
+                }));
+
+    try {
+      p.run();
+      fail("should have failed");
+    } catch (Exception e) {
+      assertThat(e.toString(), containsString(
+          "WindowingFn attemped to access input timestamp when none was available"));
+    }
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index 37c0f8edbff4a..20f24c2ebcd30 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
@@ -221,7 +222,8 @@ KV<String, Iterable<String>>, List> makeRunner(
             outputTag,
             new ArrayList<TupleTag<?>>(),
             execContext.createStepContext("merge"),
-            counters.getAddCounterMutator());
+            counters.getAddCounterMutator(),
+            new GlobalWindow());
 
     return runner;
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index f5895037e4fcb..c199b9190cfa6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
@@ -272,7 +273,8 @@ KV<String, Iterable<String>>, List> makeRunner(
             outputTag,
             new ArrayList<TupleTag<?>>(),
             execContext.createStepContext("merge"),
-            counters.getAddCounterMutator());
+            counters.getAddCounterMutator(),
+            new GlobalWindow());
 
     return runner;
   }

From 6f521f91290f016b46ffbecd60aeaffae54ee8e1 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 16 Jan 2015 14:15:02 -0800
Subject: [PATCH 0085/1541] Allow MapTaskExecutor to be run multiple times

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=84162247
---
 .../runners/worker/AssignWindowsParDoFn.java  | 18 ++++---
 .../sdk/runners/worker/CombineValuesFn.java   | 52 +++++++++++--------
 .../worker/GroupAlsoByWindowsParDoFn.java     | 24 +++++----
 .../sdk/runners/worker/NormalParDoFn.java     | 44 ++++++++++------
 .../util/common/worker/FlattenOperation.java  |  5 ++
 .../sdk/util/common/worker/Operation.java     | 11 +++-
 .../util/common/worker/ParDoOperation.java    |  5 ++
 .../sdk/util/common/worker/ReadOperation.java |  8 ++-
 .../sdk/util/common/worker/Reader.java        |  7 +++
 .../dataflow/sdk/util/common/worker/Sink.java |  7 +++
 .../util/common/worker/WriteOperation.java    |  5 ++
 .../worker/MapTaskExecutorFactoryTest.java    |  4 +-
 .../sdk/runners/worker/NormalParDoFnTest.java | 27 +++++++---
 .../runners/worker/ParDoFnFactoryTest.java    |  6 ++-
 14 files changed, 155 insertions(+), 68 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
index 058fba3d2d4f4..2a22793e81c4a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
@@ -21,7 +21,6 @@
 import com.google.api.services.dataflow.model.MultiOutputInfo;
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
 import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
@@ -55,7 +54,7 @@ public static AssignWindowsParDoFn create(
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler sampler /* unused */)
       throws Exception {
-    Object windowingFn =
+    final Object windowingFn =
         SerializableUtils.deserializeFromByteArray(
             getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
             "serialized window fn");
@@ -64,21 +63,28 @@ public static AssignWindowsParDoFn create(
           "unexpected kind of WindowingFn: " + windowingFn.getClass().getName());
     }
 
-    DoFn assignWindowsDoFn = new AssignWindowsDoFn((WindowingFn) windowingFn);
+    final AssignWindowsDoFn assignFn = new AssignWindowsDoFn((WindowingFn) windowingFn);
+
+    DoFnInfoFactory fnFactory = new DoFnInfoFactory() {
+        @Override
+        public DoFnInfo createDoFnInfo() {
+          return new DoFnInfo(assignFn, null);
+        }
+      };
 
     return new AssignWindowsParDoFn(
-        options, assignWindowsDoFn, stepName, executionContext, addCounterMutator);
+        options, fnFactory, stepName, executionContext, addCounterMutator);
   }
 
   private AssignWindowsParDoFn(
       PipelineOptions options,
-      DoFn fn,
+      DoFnInfoFactory fnFactory,
       String stepName,
       ExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator) {
     super(
         options,
-        new DoFnInfo(fn, null),
+        fnFactory,
         PTuple.empty(),
         Arrays.asList("output"),
         stepName,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
index 79ee768263bc3..31e87649a5d27 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
@@ -78,11 +78,11 @@ public static CombineValuesFn create(
             "serialized user fn");
     Preconditions.checkArgument(
         deserializedFn instanceof Combine.KeyedCombineFn);
-    Combine.KeyedCombineFn combineFn = (Combine.KeyedCombineFn) deserializedFn;
+    final Combine.KeyedCombineFn combineFn = (Combine.KeyedCombineFn) deserializedFn;
 
     // Get the combine phase, default to ALL. (The implementation
     // doesn't have to split the combiner).
-    String phase = getString(cloudUserFn, PropertyNames.PHASE, CombinePhase.ALL);
+    final String phase = getString(cloudUserFn, PropertyNames.PHASE, CombinePhase.ALL);
 
     Preconditions.checkArgument(
         sideInputInfos == null || sideInputInfos.size() == 0,
@@ -90,36 +90,42 @@ public static CombineValuesFn create(
     Preconditions.checkArgument(
         numOutputs == 1, "expected exactly one output for CombineValuesFn");
 
-    DoFn doFn = null;
-    switch (phase) {
-      case CombinePhase.ALL:
-        doFn = new CombineValuesDoFn(combineFn);
-        break;
-      case CombinePhase.ADD:
-        doFn = new AddInputsDoFn(combineFn);
-        break;
-      case CombinePhase.MERGE:
-        doFn = new MergeAccumulatorsDoFn(combineFn);
-        break;
-      case CombinePhase.EXTRACT:
-        doFn = new ExtractOutputDoFn(combineFn);
-        break;
-      default:
-        throw new IllegalArgumentException(
-            "phase must be one of 'all', 'add', 'merge', 'extract'");
-    }
-    return new CombineValuesFn(options, doFn, stepName, executionContext, addCounterMutator);
+    DoFnInfoFactory fnFactory = new DoFnInfoFactory() {
+        @Override
+        public DoFnInfo createDoFnInfo() {
+          DoFn doFn = null;
+          switch (phase) {
+            case CombinePhase.ALL:
+              doFn = new CombineValuesDoFn(combineFn);
+              break;
+            case CombinePhase.ADD:
+              doFn = new AddInputsDoFn(combineFn);
+              break;
+            case CombinePhase.MERGE:
+              doFn = new MergeAccumulatorsDoFn(combineFn);
+              break;
+            case CombinePhase.EXTRACT:
+              doFn = new ExtractOutputDoFn(combineFn);
+              break;
+            default:
+              throw new IllegalArgumentException(
+                  "phase must be one of 'all', 'add', 'merge', 'extract'");
+          }
+          return new DoFnInfo(doFn, null);
+        }
+      };
+    return new CombineValuesFn(options, fnFactory, stepName, executionContext, addCounterMutator);
   }
 
   private CombineValuesFn(
       PipelineOptions options,
-      DoFn doFn,
+      DoFnInfoFactory fnFactory,
       String stepName,
       ExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator) {
     super(
         options,
-        new DoFnInfo(doFn, null),
+        fnFactory,
         PTuple.empty(),
         Arrays.asList("output"),
         stepName,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index 1903e44a66e5a..a8ba7e840b4f5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -25,7 +25,6 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
@@ -61,7 +60,7 @@ public static GroupAlsoByWindowsParDoFn create(
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler sampler /* unused */)
       throws Exception {
-    Object windowingFn =
+    final Object windowingFn =
         SerializableUtils.deserializeFromByteArray(
             getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
             "serialized window fn");
@@ -88,29 +87,34 @@ public static GroupAlsoByWindowsParDoFn create(
           "Expected WindowedValueCoder for inputCoder, got: "
           + inputCoder.getClass().getName());
     }
-    Coder elemCoder = ((WindowedValueCoder) inputCoder).getValueCoder();
+    final Coder elemCoder = ((WindowedValueCoder) inputCoder).getValueCoder();
     if (!(elemCoder instanceof KvCoder)) {
       throw new Exception(
           "Expected KvCoder for inputCoder, got: " + elemCoder.getClass().getName());
     }
 
-    DoFn windowingDoFn = StreamingGroupAlsoByWindowsDoFn.create(
-        (WindowingFn) windowingFn,
-        ((KvCoder) elemCoder).getValueCoder());
-
+    DoFnInfoFactory fnFactory = new DoFnInfoFactory() {
+        @Override
+        public DoFnInfo createDoFnInfo() {
+          return new DoFnInfo(StreamingGroupAlsoByWindowsDoFn.create(
+              (WindowingFn) windowingFn,
+              ((KvCoder) elemCoder).getValueCoder()),
+              null);
+        }
+      };
     return new GroupAlsoByWindowsParDoFn(
-        options, windowingDoFn, stepName, executionContext, addCounterMutator);
+        options, fnFactory, stepName, executionContext, addCounterMutator);
   }
 
   private GroupAlsoByWindowsParDoFn(
       PipelineOptions options,
-      DoFn fn,
+      DoFnInfoFactory fnFactory,
       String stepName,
       ExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator) {
     super(
         options,
-        new DoFnInfo(fn, null),
+        fnFactory,
         PTuple.empty(),
         Arrays.asList("output"),
         stepName,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
index 31bef7542cdbc..ea9ea4a970d87 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
@@ -50,9 +50,17 @@
  */
 @SuppressWarnings({"rawtypes", "unchecked"})
 public class NormalParDoFn extends ParDoFn {
+
+  /**
+   * Factory for creating DoFn instances.
+   */
+  protected static interface DoFnInfoFactory {
+    public DoFnInfo createDoFnInfo() throws Exception;
+  }
+
   public static NormalParDoFn create(
       PipelineOptions options,
-      CloudObject cloudUserFn,
+      final CloudObject cloudUserFn,
       String stepName,
       @Nullable List<SideInputInfo> sideInputInfos,
       @Nullable List<MultiOutputInfo> multiOutputInfos,
@@ -61,14 +69,20 @@ public static NormalParDoFn create(
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler /* ignored */)
       throws Exception {
-    Object deserializedFn =
-        SerializableUtils.deserializeFromByteArray(
-            getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
-            "serialized user fn");
-    if (!(deserializedFn instanceof DoFnInfo)) {
-      throw new Exception("unexpected kind of DoFnInfo: " + deserializedFn.getClass().getName());
-    }
-    DoFnInfo fnInfo = (DoFnInfo) deserializedFn;
+    DoFnInfoFactory fnFactory = new DoFnInfoFactory() {
+        @Override
+        public DoFnInfo createDoFnInfo() throws Exception {
+          Object deserializedFn =
+              SerializableUtils.deserializeFromByteArray(
+                  getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
+                  "serialized user fn");
+          if (!(deserializedFn instanceof DoFnInfo)) {
+            throw new Exception(
+                "unexpected kind of DoFnInfo: " + deserializedFn.getClass().getName());
+          }
+          return (DoFnInfo) deserializedFn;
+        }
+      };
 
     PTuple sideInputValues = PTuple.empty();
     if (sideInputInfos != null) {
@@ -96,12 +110,12 @@ public static NormalParDoFn create(
           "unexpected number of outputTags for DoFn");
     }
 
-    return new NormalParDoFn(options, fnInfo, sideInputValues, outputTags,
+    return new NormalParDoFn(options, fnFactory, sideInputValues, outputTags,
                              stepName, executionContext, addCounterMutator);
   }
 
   public final PipelineOptions options;
-  public final DoFnInfo<Object, Object> fnInfo;
+  public final DoFnInfoFactory fnFactory;
   public final PTuple sideInputValues;
   public final TupleTag<Object> mainOutputTag;
   public final List<TupleTag<?>> sideOutputTags;
@@ -113,14 +127,14 @@ public static NormalParDoFn create(
   DoFnRunner<Object, Object, Receiver> fnRunner;
 
   public NormalParDoFn(PipelineOptions options,
-                       DoFnInfo fnInfo,
+                       DoFnInfoFactory fnFactory,
                        PTuple sideInputValues,
                        List<String> outputTags,
                        String stepName,
                        ExecutionContext executionContext,
                        CounterSet.AddCounterMutator addCounterMutator) {
     this.options = options;
-    this.fnInfo = fnInfo;
+    this.fnFactory = fnFactory;
     this.sideInputValues = sideInputValues;
     if (outputTags.size() < 1) {
       throw new AssertionError("expected at least one output");
@@ -151,7 +165,7 @@ public void startBundle(final Receiver... receivers) throws Exception {
 
     fnRunner = DoFnRunner.create(
         options,
-        fnInfo.getDoFn(),
+        fnFactory.createDoFnInfo().getDoFn(),
         sideInputValues,
         new OutputManager<Receiver>() {
           final Map<TupleTag<?>, OutputReceiver> undeclaredOutputs =
@@ -198,7 +212,7 @@ public void output(Receiver receiver, WindowedValue<?> output) {
         sideOutputTags,
         stepContext,
         addCounterMutator,
-        fnInfo.getWindowingFn());
+        fnFactory.createDoFnInfo().getWindowingFn());
 
     fnRunner.startBundle();
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperation.java
index 6325d1ac5cdb8..8076216211425 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperation.java
@@ -51,4 +51,9 @@ public void process(Object elem) throws Exception {
       }
     }
   }
+
+  @Override
+  public boolean supportsRestart() {
+    return true;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
index bedc081cec99d..207d034d4f3d6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
@@ -84,7 +84,9 @@ public Operation(String operationName,
    * exception otherwise.
    */
   void checkUnstarted() {
-    if (initializationState != InitializationState.UNSTARTED) {
+    if (!(initializationState == InitializationState.UNSTARTED
+          || (initializationState == InitializationState.FINISHED
+              && supportsRestart()))) {
       throw new AssertionError(
           "expecting this instruction to not yet be started");
     }
@@ -129,4 +131,11 @@ public void finish() throws Exception {
     checkStarted();
     initializationState = InitializationState.FINISHED;
   }
+
+  /**
+   * Returns true if this Operation can be started again after it is finished.
+   */
+  public boolean supportsRestart() {
+    return false;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperation.java
index 7a620983476f0..684d11a9cb23c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperation.java
@@ -62,4 +62,9 @@ public void finish() throws Exception {
       super.finish();
     }
   }
+
+  @Override
+  public boolean supportsRestart() {
+    return true;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index e97cf2acb3daf..b727961692ba6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -86,6 +86,7 @@ public ReadOperation(String operationName, Reader<?> reader, OutputReceiver[] re
     this.byteCount = addCounterMutator.addCounter(
         Counter.longs(bytesCounterName(counterPrefix, operationName), SUM));
     readState = stateSampler.stateForName(operationName + "-read");
+    reader.addObserver(new ReaderObserver());
   }
 
   /** Invoked by tests. */
@@ -120,6 +121,11 @@ public void start() throws Exception {
     }
   }
 
+  @Override
+  public boolean supportsRestart() {
+    return reader.supportsRestart();
+  }
+
   protected void runReadLoop() throws Exception {
     Receiver receiver = receivers[0];
     if (receiver == null) {
@@ -127,8 +133,6 @@ protected void runReadLoop() throws Exception {
       return;
     }
 
-    reader.addObserver(new ReaderObserver());
-
     try (StateSampler.ScopedState process = stateSampler.scopedState(processState)) {
       assert process != null;
       synchronized (sourceIteratorLock) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
index 2b805c6f1d1fd..7c9b783511c01 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
@@ -153,4 +153,11 @@ protected void notifyElementRead(long byteSize) {
     setChanged();
     notifyObservers(byteSize);
   }
+
+  /**
+   * Returns whether this Reader can be restarted.
+   */
+  public boolean supportsRestart() {
+    return false;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
index 829fd1a391535..3a23982f97c72 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
@@ -44,4 +44,11 @@ public interface SinkWriter<E> extends AutoCloseable {
     @Override
     public void close() throws IOException;
   }
+
+  /**
+   * Returns whether this Sink can be restarted.
+   */
+  public boolean supportsRestart() {
+    return false;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
index 3965e0d0a19c5..917d285fab9e0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
@@ -102,6 +102,11 @@ public void finish() throws Exception {
     }
   }
 
+  @Override
+  public boolean supportsRestart() {
+    return sink.supportsRestart();
+  }
+
   public Counter<Long> getByteCount() {
     return byteCount;
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index cfd89466d845d..6667ef0c78c06 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -342,8 +342,8 @@ public void testCreateParDoOperation() throws Exception {
     assertThat(parDoOperation.fn, new IsInstanceOf(NormalParDoFn.class));
     NormalParDoFn normalParDoFn = (NormalParDoFn) parDoOperation.fn;
 
-    assertThat(normalParDoFn.fnInfo.getDoFn(),
-               new IsInstanceOf(TestDoFn.class));
+    assertThat(normalParDoFn.fnFactory.createDoFnInfo().getDoFn(),
+        new IsInstanceOf(TestDoFn.class));
 
     assertSame(
         parDoOperation,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
index f4a02679ce838..a6c4238f4c19b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
@@ -136,6 +136,18 @@ public void process(Object outputElem) {
     }
   }
 
+  static class TestDoFnInfoFactory implements NormalParDoFn.DoFnInfoFactory {
+    DoFnInfo fnInfo;
+
+    TestDoFnInfoFactory(DoFnInfo fnInfo) {
+      this.fnInfo = fnInfo;
+    }
+
+    public DoFnInfo createDoFnInfo() {
+      return fnInfo;
+    }
+  }
+
   @Test
   public void testNormalParDoFn() throws Exception {
     List<String> sideOutputTags = Arrays.asList("tag1", "tag2", "tag3");
@@ -154,7 +166,7 @@ public void testNormalParDoFn() throws Exception {
     outputTags.addAll(sideOutputTags);
     NormalParDoFn normalParDoFn =
         new NormalParDoFn(PipelineOptionsFactory.create(),
-                          fnInfo, sideInputValues, outputTags, "doFn",
+                          new TestDoFnInfoFactory(fnInfo), sideInputValues, outputTags, "doFn",
                           new BatchModeExecutionContext(),
                           (new CounterSet()).getAddCounterMutator());
 
@@ -213,7 +225,7 @@ public void testUnexpectedNumberOfReceivers() throws Exception {
     List<String> outputTags = Arrays.asList("output");
     NormalParDoFn normalParDoFn =
         new NormalParDoFn(PipelineOptionsFactory.create(),
-                          fnInfo, sideInputValues, outputTags, "doFn",
+                          new TestDoFnInfoFactory(fnInfo), sideInputValues, outputTags, "doFn",
                           new BatchModeExecutionContext(),
                           (new CounterSet()).getAddCounterMutator());
 
@@ -252,7 +264,7 @@ public void testErrorPropagation() throws Exception {
     List<String> outputTags = Arrays.asList("output");
     NormalParDoFn normalParDoFn =
         new NormalParDoFn(PipelineOptionsFactory.create(),
-                          fnInfo, sideInputValues, outputTags, "doFn",
+                          new TestDoFnInfoFactory(fnInfo), sideInputValues, outputTags, "doFn",
                           new BatchModeExecutionContext(),
                           (new CounterSet()).getAddCounterMutator());
 
@@ -316,10 +328,11 @@ public void testUndeclaredSideOutputs() throws Exception {
     DoFnInfo fnInfo = new DoFnInfo(fn, new GlobalWindow());
     CounterSet counters = new CounterSet();
     NormalParDoFn normalParDoFn =
-        new NormalParDoFn(PipelineOptionsFactory.create(), fnInfo, PTuple.empty(),
-                          Arrays.asList("output", "declared"), "doFn",
-                          new BatchModeExecutionContext(),
-                          counters.getAddCounterMutator());
+        new NormalParDoFn(
+            PipelineOptionsFactory.create(), new TestDoFnInfoFactory(fnInfo), PTuple.empty(),
+            Arrays.asList("output", "declared"), "doFn",
+            new BatchModeExecutionContext(),
+            counters.getAddCounterMutator());
 
     normalParDoFn.startBundle(new TestReceiver(), new TestReceiver());
     normalParDoFn.processElement(WindowedValue.valueInGlobalWindow(5));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
index be31ece009e4b..8ae07b516de95 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
@@ -94,9 +94,11 @@ public void testCreateNormalParDoFn() throws Exception {
     Assert.assertThat(parDoFn, new IsInstanceOf(NormalParDoFn.class));
     NormalParDoFn normalParDoFn = (NormalParDoFn) parDoFn;
 
-    DoFn actualDoFn = normalParDoFn.fnInfo.getDoFn();
+    DoFn actualDoFn = normalParDoFn.fnFactory.createDoFnInfo().getDoFn();
     Assert.assertThat(actualDoFn, new IsInstanceOf(TestDoFn.class));
-    Assert.assertThat(normalParDoFn.fnInfo.getWindowingFn(), new IsInstanceOf(GlobalWindow.class));
+    Assert.assertThat(
+        normalParDoFn.fnFactory.createDoFnInfo().getWindowingFn(),
+        new IsInstanceOf(GlobalWindow.class));
     TestDoFn actualTestDoFn = (TestDoFn) actualDoFn;
 
     Assert.assertEquals(stringState, actualTestDoFn.stringState);

From 6972706246a2eeb0aca47a43324f7e10b64d9d2d Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 16 Jan 2015 15:13:33 -0800
Subject: [PATCH 0086/1541] The option machineType is a duplicate of
 workerMachineType. Remove the redundant machineType option.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=84167494
---
 .../sdk/options/DataflowPipelineWorkerPoolOptions.java     | 7 -------
 .../dataflow/sdk/runners/DataflowPipelineTranslator.java   | 3 ---
 2 files changed, 10 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index a9be13a16df4a..25653495cc0ea 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -122,13 +122,6 @@ public String getApiServiceName() {
   String getWorkerMachineType();
   void setWorkerMachineType(String value);
 
-  /**
-   * Machine type to create VMs as.
-   */
-  @Description("Dataflow VM machine type.")
-  String getMachineType();
-  void setMachineType(String value);
-
   /**
    * The policy for tearing down the workers spun up by the service.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index e116965897d3b..3bcca5e8b3afb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -402,9 +402,6 @@ public Job translate(List<DataflowPackage> packages) {
         workerPool.setDiskSourceImage(options.getDiskSourceImage());
       }
 
-      if (options.getMachineType() != null) {
-        workerPool.setMachineType(options.getMachineType());
-      }
       if (options.isStreaming()) {
         // Use separate data disk for streaming.
         Disk disk = new Disk();

From 800a77730ac4eb5fa3a7a7b54f3829ca2ffd16fb Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 21 Jan 2015 08:46:23 -0800
Subject: [PATCH 0087/1541] Update PipelineOptionsFactory to improve
 readability.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=84438386
---
 .../sdk/options/PipelineOptionsFactory.java   | 61 +++++++++++++------
 1 file changed, 42 insertions(+), 19 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 5de1fa06cbbfe..b20771a7c45f0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -31,6 +31,7 @@
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Iterables;
+import com.google.common.collect.Iterators;
 import com.google.common.collect.ListMultimap;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
@@ -56,6 +57,7 @@
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Queue;
@@ -93,7 +95,7 @@ public class PipelineOptionsFactory {
    * @return An object which implements {@link PipelineOptions}.
    */
   public static PipelineOptions create() {
-    return new Builder(getAppName(3)).as(PipelineOptions.class);
+    return new Builder().as(PipelineOptions.class);
   }
 
   /**
@@ -107,7 +109,7 @@ public static PipelineOptions create() {
    * @return An object which implements {@code <T>}.
    */
   public static <T extends PipelineOptions> T as(Class<T> klass) {
-    return new Builder(getAppName(3)).as(klass);
+    return new Builder().as(klass);
   }
 
   /**
@@ -134,7 +136,7 @@ public static <T extends PipelineOptions> T as(Class<T> klass) {
    * {@link Builder#withoutStrictParsing()}.
    */
   public static Builder fromArgs(String[] args) {
-    return new Builder(getAppName(3)).fromArgs(args);
+    return new Builder().fromArgs(args);
   }
 
   /**
@@ -144,7 +146,7 @@ public static Builder fromArgs(String[] args) {
    * validation.
    */
   public Builder withValidation() {
-    return new Builder(getAppName(3)).withValidation();
+    return new Builder().withValidation();
   }
 
   /** A fluent PipelineOptions builder. */
@@ -155,13 +157,13 @@ public static class Builder {
     private final boolean strictParsing;
 
     // Do not allow direct instantiation
-    private Builder(String defaultAppName) {
-      this(defaultAppName, null, false, true);
+    private Builder() {
+      this(null, false, true);
     }
 
-    private Builder(String defaultAppName, String[] args, boolean validation,
+    private Builder(String[] args, boolean validation,
         boolean strictParsing) {
-      this.defaultAppName = defaultAppName;
+      this.defaultAppName = findCallersClassName();
       this.args = args;
       this.validation = validation;
       this.strictParsing = strictParsing;
@@ -192,7 +194,7 @@ private Builder(String defaultAppName, String[] args, boolean validation,
      */
     public Builder fromArgs(String[] args) {
       Preconditions.checkNotNull(args, "Arguments should not be null.");
-      return new Builder(defaultAppName, args, validation, strictParsing);
+      return new Builder(args, validation, strictParsing);
     }
 
     /**
@@ -202,7 +204,7 @@ public Builder fromArgs(String[] args) {
      * validation.
      */
     public Builder withValidation() {
-      return new Builder(defaultAppName, args, true, strictParsing);
+      return new Builder(args, true, strictParsing);
     }
 
     /**
@@ -210,7 +212,7 @@ public Builder withValidation() {
      * arguments.
      */
     public Builder withoutStrictParsing() {
-      return new Builder(defaultAppName, args, validation, false);
+      return new Builder(args, validation, false);
     }
 
     /**
@@ -260,15 +262,31 @@ public <T extends PipelineOptions> T as(Class<T> klass) {
   }
 
   /**
-   * Returns the simple name of calling class at the stack trace {@code level}.
+   * Returns the simple name of the calling class using the current threads stack.
    */
-  private static String getAppName(int level) {
-    StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
-    try {
-      return Class.forName(stackTrace[level].getClassName()).getSimpleName();
-    } catch (ClassNotFoundException e) {
-      return "unknown";
+  private static String findCallersClassName() {
+    Iterator<StackTraceElement> elements =
+        Iterators.forArray(Thread.currentThread().getStackTrace());
+    // First find the PipelineOptionsFactory/Builder class in the stack trace.
+    while (elements.hasNext()) {
+      StackTraceElement next = elements.next();
+      if (PIPELINE_OPTIONS_FACTORY_CLASSES.contains(next.getClassName())) {
+        break;
+      }
     }
+    // Then find the first instance after which is not the PipelineOptionsFactory/Builder class.
+    while (elements.hasNext()) {
+      StackTraceElement next = elements.next();
+      if (!PIPELINE_OPTIONS_FACTORY_CLASSES.contains(next.getClassName())) {
+        try {
+          return Class.forName(next.getClassName()).getSimpleName();
+        } catch (ClassNotFoundException e) {
+          break;
+        }
+      }
+    }
+
+    return "unknown";
   }
 
   /**
@@ -301,6 +319,11 @@ Class<T> getProxyClass() {
   private static final ObjectMapper MAPPER = new ObjectMapper();
   private static final Map<String, Class<? extends PipelineRunner<?>>> SUPPORTED_PIPELINE_RUNNERS;
 
+  /** Classes which are used as the boundary in the stack trace to find the callers class name. */
+  private static final Set<String> PIPELINE_OPTIONS_FACTORY_CLASSES = ImmutableSet.of(
+      PipelineOptionsFactory.class.getName(),
+      Builder.class.getName());
+
   /** Methods which are ignored when validating the proxy class. */
   private static final Set<Method> IGNORED_METHODS;
 
@@ -334,7 +357,7 @@ Class<T> getProxyClass() {
 
     // Store the list of all available pipeline runners.
     ImmutableMap.Builder<String, Class<? extends PipelineRunner<?>>> builder =
-            new ImmutableMap.Builder<>();
+            ImmutableMap.builder();
     Set<PipelineRunnerRegistrar> pipelineRunnerRegistrars =
         Sets.newTreeSet(ObjectsClassComparator.INSTANCE);
     pipelineRunnerRegistrars.addAll(

From 24d0020c91086d40f6fa1296b1630156ef5bc0b9 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 21 Jan 2015 14:17:56 -0800
Subject: [PATCH 0088/1541] Update the BigQueryReaderTest to use fake JSON data
 rather than constructed Java wrappers. Fix two ClassCastExceptions that
 occurred when dealing with nested RECORDs and with REPEATED fields.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=84468071
---
 .../sdk/util/BigQueryTableRowIterator.java    |  24 +-
 .../runners/worker/BigQueryReaderTest.java    | 381 ++++++++++++++----
 2 files changed, 317 insertions(+), 88 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
index a6ea658ae3f42..1dde22d8ca0b8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
@@ -20,7 +20,6 @@
 import com.google.api.client.util.Preconditions;
 import com.google.api.services.bigquery.Bigquery;
 import com.google.api.services.bigquery.model.Table;
-import com.google.api.services.bigquery.model.TableCell;
 import com.google.api.services.bigquery.model.TableDataList;
 import com.google.api.services.bigquery.model.TableFieldSchema;
 import com.google.api.services.bigquery.model.TableReference;
@@ -33,6 +32,7 @@
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 import java.util.NoSuchElementException;
 import java.util.Objects;
 
@@ -99,16 +99,19 @@ private Object getTypedCellValue(TableFieldSchema fieldSchema, Object v) {
 
     if (Objects.equals(fieldSchema.getMode(), "REPEATED")) {
       TableFieldSchema elementSchema = fieldSchema.clone().setMode("REQUIRED");
-      List<Object> rawValues = (List<Object>) v;
+      @SuppressWarnings("unchecked")
+      List<Map<String, Object>> rawValues = (List<Map<String, Object>>) v;
       List<Object> values = new ArrayList<Object>(rawValues.size());
-      for (Object element : rawValues) {
-        values.add(getTypedCellValue(elementSchema, element));
+      for (Map<String, Object> element : rawValues) {
+        values.add(getTypedCellValue(elementSchema, element.get("v")));
       }
       return values;
     }
 
     if (fieldSchema.getType().equals("RECORD")) {
-      return getTypedTableRow(fieldSchema.getFields(), (TableRow) v);
+      @SuppressWarnings("unchecked")
+      Map<String, Object> typedV = (Map<String, Object>) v;
+      return getTypedTableRow(fieldSchema.getFields(), typedV);
     }
 
     if (fieldSchema.getType().equals("FLOAT")) {
@@ -122,18 +125,19 @@ private Object getTypedCellValue(TableFieldSchema fieldSchema, Object v) {
     return v;
   }
 
-  private TableRow getTypedTableRow(List<TableFieldSchema> fields, TableRow rawRow) {
-    List<TableCell> cells = rawRow.getF();
+  private TableRow getTypedTableRow(List<TableFieldSchema> fields, Map<String, Object> rawRow) {
+    @SuppressWarnings("unchecked")
+    List<Map<String, Object>> cells = (List<Map<String, Object>>) rawRow.get("f");
     Preconditions.checkState(cells.size() == fields.size());
 
-    Iterator<TableCell> cellIt = cells.iterator();
+    Iterator<Map<String, Object>> cellIt = cells.iterator();
     Iterator<TableFieldSchema> fieldIt = fields.iterator();
 
     TableRow row = new TableRow();
     while (cellIt.hasNext()) {
-      TableCell cell = cellIt.next();
+      Map<String, Object> cell = cellIt.next();
       TableFieldSchema fieldSchema = fieldIt.next();
-      row.set(fieldSchema.getName(), getTypedCellValue(fieldSchema, cell.getV()));
+      row.set(fieldSchema.getName(), getTypedCellValue(fieldSchema, cell.get("v")));
     }
     return row;
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
index f524e74744bfe..5c73190d0b4fe 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
@@ -16,23 +16,24 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import static org.mockito.Matchers.anyLong;
-import static org.mockito.Matchers.anyString;
-import static org.mockito.Mockito.atLeast;
+import static org.mockito.Matchers.endsWith;
+import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.atLeastOnce;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.verifyNoMoreInteractions;
 import static org.mockito.Mockito.when;
 
+import com.google.api.client.http.LowLevelHttpRequest;
+import com.google.api.client.json.Json;
+import com.google.api.client.testing.http.MockHttpTransport;
+import com.google.api.client.testing.http.MockLowLevelHttpRequest;
+import com.google.api.client.testing.http.MockLowLevelHttpResponse;
 import com.google.api.services.bigquery.Bigquery;
-import com.google.api.services.bigquery.model.Table;
-import com.google.api.services.bigquery.model.TableCell;
-import com.google.api.services.bigquery.model.TableDataList;
-import com.google.api.services.bigquery.model.TableFieldSchema;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.common.collect.Lists;
 
 import org.junit.After;
 import org.junit.Assert;
@@ -42,107 +43,308 @@
 import org.junit.runners.JUnit4;
 import org.mockito.Mock;
 import org.mockito.MockitoAnnotations;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
 
 import java.io.IOException;
-import java.util.Arrays;
-import java.util.LinkedList;
 import java.util.List;
 
 /**
  * Tests for BigQueryReader.
  *
- * <p>The tests just make sure a basic scenario of reading works because the class itself is a
- * thin wrapper over {@code BigQueryTableRowIterator}. The tests for the wrapped class have
- * comprehensive coverage.
+ * <p>The tests make sure a simple scenario (reading two rows) work for the various kinds of fields
+ * and modes.
  */
 @RunWith(JUnit4.class)
 public class BigQueryReaderTest {
+
+  private static final String PROJECT_ID = "project";
+  private static final String DATASET = "dataset";
+  private static final String TABLE = "table";
+
+  private static final String GET_TABLE_REQUEST_PATH =
+      String.format("projects/%s/datasets/%s/tables/%s", PROJECT_ID, DATASET, TABLE);
+
+  // This is a real response (with some unused fields removed) for the table created from this
+  // schema:
+  // [
+  //  {"name":"name","type":"STRING"},
+  //  {"name":"integer", "type":"INTEGER"},
+  //  {"name":"float", "type":"FLOAT"},
+  //  {"name":"bool", "type":"BOOLEAN"},
+  //  {"name":"record", "type":"RECORD", "fields":[
+  //    {"name": "nestedInt","type":"INTEGER"},
+  //    {"name": "nestedFloat","type":"FLOAT"}
+  //  ]},
+  //  {"name":"repeatedInt", "type":"INTEGER", "mode":"REPEATED"},
+  //  {"name":"repeatedFloat", "type":"FLOAT", "mode":"REPEATED"},
+  //
+  //  {"name":"repeatedRecord", "type":"RECORD", "mode":"REPEATED", "fields":[
+  //    {"name": "bool", "type": "BOOLEAN"},
+  //    {"name": "string", "type": "STRING"}
+  //  ]}
+  //]
+  private static final String GET_TABLE_RESPONSE_JSON = "{\n"
+      + " \"schema\": {\n"
+      + "  \"fields\": [\n"
+      + "   {\n"
+      + "    \"name\": \"name\",\n"
+      + "    \"type\": \"STRING\"\n"
+      + "   },\n"
+      + "   {\n"
+      + "    \"name\": \"integer\",\n"
+      + "    \"type\": \"INTEGER\"\n"
+      + "   },\n"
+      + "   {\n"
+      + "    \"name\": \"float\",\n"
+      + "    \"type\": \"FLOAT\"\n"
+      + "   },\n"
+      + "   {\n"
+      + "    \"name\": \"bool\",\n"
+      + "    \"type\": \"BOOLEAN\"\n"
+      + "   },\n"
+      + "   {\n"
+      + "    \"name\": \"record\",\n"
+      + "    \"type\": \"RECORD\",\n"
+      + "    \"fields\": [\n"
+      + "     {\n"
+      + "      \"name\": \"nestedInt\",\n"
+      + "      \"type\": \"INTEGER\"\n"
+      + "     },\n"
+      + "     {\n"
+      + "      \"name\": \"nestedFloat\",\n"
+      + "      \"type\": \"FLOAT\"\n"
+      + "     }\n"
+      + "    ]\n"
+      + "   },\n"
+      + "   {\n"
+      + "    \"name\": \"repeatedInt\",\n"
+      + "    \"type\": \"INTEGER\",\n"
+      + "    \"mode\": \"REPEATED\"\n"
+      + "   },\n"
+      + "   {\n"
+      + "    \"name\": \"repeatedFloat\",\n"
+      + "    \"type\": \"FLOAT\",\n"
+      + "    \"mode\": \"REPEATED\"\n"
+      + "   },\n"
+      + "   {\n"
+      + "    \"name\": \"repeatedRecord\",\n"
+      + "    \"type\": \"RECORD\",\n"
+      + "    \"mode\": \"REPEATED\",\n"
+      + "    \"fields\": [\n"
+      + "     {\n"
+      + "      \"name\": \"bool\",\n"
+      + "      \"type\": \"BOOLEAN\"\n"
+      + "     },\n"
+      + "     {\n"
+      + "      \"name\": \"string\",\n"
+      + "      \"type\": \"STRING\"\n"
+      + "     }\n"
+      + "    ]\n"
+      + "   }\n"
+      + "  ]\n"
+      + " },\n"
+      + " \"numRows\": \"2\",\n"
+      + " \"type\": \"TABLE\"\n"
+      + "}";
+
+  private static final String LIST_TABLE_DATA_REQUEST_PATH =
+      String.format("projects/%s/datasets/%s/tables/%s/data", PROJECT_ID, DATASET, TABLE);
+
+  // This is a real response (with some unused fields removed) for the table listed above, populated
+  // with the following data:
+  //{"name": "Arthur", "integer": 42, "float": 3.14159, "bool": "false",
+  // "record": {"nestedInt": 43, "nestedFloat": "4.14159"},
+  // "repeatedInt":[42, 43, 79]},
+  //
+  //{"name": "Allison", "integer": 79, "float": 2.71828, "bool": "true",
+  // "record": {"nestedInt": 80, "nestedFloat": "3.71828"},
+  // "repeatedFloat":[3.14159, 2.71828],
+  // "repeatedRecord":[{"bool":"true","string":"hello"},
+  //                   {"bool":"false","string":"world"}]}
+  private static final String LIST_TABLEDATA_RESPONSE_JSON = "{\n"
+      + " \"totalRows\": \"2\",\n"
+      + " \"rows\": [\n"
+      + "  {\n"
+      + "   \"f\": [\n"
+      + "    {\n"
+      + "     \"v\": \"Arthur\"\n"
+      + "    },\n"
+      + "    {\n"
+      + "     \"v\": \"42\"\n"
+      + "    },\n"
+      + "    {\n"
+      + "     \"v\": \"3.14159\"\n"
+      + "    },\n"
+      + "    {\n"
+      + "     \"v\": \"false\"\n"
+      + "    },\n"
+      + "    {\n"
+      + "     \"v\": {\n"
+      + "      \"f\": [\n"
+      + "       {\n"
+      + "        \"v\": \"43\"\n"
+      + "       },\n"
+      + "       {\n"
+      + "        \"v\": \"4.14159\"\n"
+      + "       }\n"
+      + "      ]\n"
+      + "     }\n"
+      + "    },\n"
+      + "    {\n"
+      + "     \"v\": [\n"
+      + "      {\n"
+      + "       \"v\": \"42\"\n"
+      + "      },\n"
+      + "      {\n"
+      + "       \"v\": \"43\"\n"
+      + "      },\n"
+      + "      {\n"
+      + "       \"v\": \"79\"\n"
+      + "      }\n"
+      + "     ]\n"
+      + "    },\n"
+      + "    {\n"
+      + "     \"v\": [\n"
+      + "     ]\n"
+      + "    },\n"
+      + "    {\n"
+      + "     \"v\": [\n"
+      + "     ]\n"
+      + "    }\n"
+      + "   ]\n"
+      + "  },\n"
+      + "  {\n"
+      + "   \"f\": [\n"
+      + "    {\n"
+      + "     \"v\": \"Allison\"\n"
+      + "    },\n"
+      + "    {\n"
+      + "     \"v\": \"79\"\n"
+      + "    },\n"
+      + "    {\n"
+      + "     \"v\": \"2.71828\"\n"
+      + "    },\n"
+      + "    {\n"
+      + "     \"v\": \"true\"\n"
+      + "    },\n"
+      + "    {\n"
+      + "     \"v\": {\n"
+      + "      \"f\": [\n"
+      + "       {\n"
+      + "        \"v\": \"80\"\n"
+      + "       },\n"
+      + "       {\n"
+      + "        \"v\": \"3.71828\"\n"
+      + "       }\n"
+      + "      ]\n"
+      + "     }\n"
+      + "    },\n"
+      + "    {\n"
+      + "     \"v\": [\n"
+      + "     ]\n"
+      + "    },\n"
+      + "    {\n"
+      + "     \"v\": [\n"
+      + "      {\n"
+      + "       \"v\": \"3.14159\"\n"
+      + "      },\n"
+      + "      {\n"
+      + "       \"v\": \"2.71828\"\n"
+      + "      }\n"
+      + "     ]\n"
+      + "    },\n"
+      + "    {\n"
+      + "     \"v\": [\n"
+      + "      {\n"
+      + "       \"v\": {\n"
+      + "        \"f\": [\n"
+      + "         {\n"
+      + "          \"v\": \"true\"\n"
+      + "         },\n"
+      + "         {\n"
+      + "          \"v\": \"hello\"\n"
+      + "         }\n"
+      + "        ]\n"
+      + "       }\n"
+      + "      },\n"
+      + "      {\n"
+      + "       \"v\": {\n"
+      + "        \"f\": [\n"
+      + "         {\n"
+      + "          \"v\": \"false\"\n"
+      + "         },\n"
+      + "         {\n"
+      + "          \"v\": \"world\"\n"
+      + "         }\n"
+      + "        ]\n"
+      + "       }\n"
+      + "      }\n"
+      + "     ]\n"
+      + "    }\n"
+      + "   ]\n"
+      + "  }\n"
+      + " ]\n"
+      + "}";
+
   @Mock
-  private Bigquery mockClient;
-  @Mock
-  private Bigquery.Tables mockTables;
-  @Mock
-  private Bigquery.Tables.Get mockTablesGet;
-  @Mock
-  private Bigquery.Tabledata mockTabledata;
-  @Mock
-  private Bigquery.Tabledata.List mockTabledataList;
+  private MockHttpTransport mockTransport;
+
+  private Bigquery bigQueryClient;
 
   @Before
-  public void setUp() {
+  public void setUp() throws Exception {
     MockitoAnnotations.initMocks(this);
-  }
+    when(mockTransport.buildRequest(eq("GET"), endsWith(GET_TABLE_REQUEST_PATH)))
+        .thenAnswer(new Answer<LowLevelHttpRequest>() {
+          @Override
+          public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
+            MockLowLevelHttpResponse response = new MockLowLevelHttpResponse()
+                .setContentType(Json.MEDIA_TYPE)
+                .setContent(GET_TABLE_RESPONSE_JSON);
+            return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
+                .setResponse(response);
+          }
+        });
+    when(mockTransport.buildRequest(eq("GET"), endsWith(LIST_TABLE_DATA_REQUEST_PATH)))
+        .thenAnswer(new Answer<LowLevelHttpRequest>() {
+          @Override
+          public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
+            MockLowLevelHttpResponse response = new MockLowLevelHttpResponse()
+                .setContentType(Json.MEDIA_TYPE)
+                .setContent(LIST_TABLEDATA_RESPONSE_JSON);
+            return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
+                .setResponse(response);
+          }
+        });
+    when(mockTransport.supportsMethod("GET")).thenReturn(true);
 
-  @After
-  public void tearDown() {
-    verifyNoMoreInteractions(mockClient);
-    verifyNoMoreInteractions(mockTables);
-    verifyNoMoreInteractions(mockTablesGet);
-    verifyNoMoreInteractions(mockTabledata);
-    verifyNoMoreInteractions(mockTabledataList);
+    bigQueryClient = new Bigquery(mockTransport, Transport.getJsonFactory(), null);
   }
 
-  private void onTableGet(Table table) throws IOException {
-    when(mockClient.tables()).thenReturn(mockTables);
-    when(mockTables.get(anyString(), anyString(), anyString())).thenReturn(mockTablesGet);
-    when(mockTablesGet.execute()).thenReturn(table);
+  @After
+  public void tearDown() throws IOException {
+    verify(mockTransport, atLeastOnce()).supportsMethod("GET");
+    verifyNoMoreInteractions(mockTransport);
   }
 
   private void verifyTableGet() throws IOException {
-    verify(mockClient).tables();
-    verify(mockTables).get("project", "dataset", "table");
-    verify(mockTablesGet).execute();
-  }
-
-  private void onTableList(TableDataList result) throws IOException {
-    when(mockClient.tabledata()).thenReturn(mockTabledata);
-    when(mockTabledata.list(anyString(), anyString(), anyString())).thenReturn(mockTabledataList);
-    when(mockTabledataList.execute()).thenReturn(result);
+    verify(mockTransport).buildRequest(eq("GET"), endsWith(GET_TABLE_REQUEST_PATH));
   }
 
   private void verifyTabledataList() throws IOException {
-    verify(mockClient, atLeastOnce()).tabledata();
-    verify(mockTabledata, atLeastOnce()).list("project", "dataset", "table");
-    verify(mockTabledataList, atLeastOnce()).execute();
-    // Max results may be set when testing for an empty table.
-    verify(mockTabledataList, atLeast(0)).setMaxResults(anyLong());
-  }
-
-  private Table basicTableSchema() {
-    return new Table().setSchema(new TableSchema().setFields(Arrays.asList(
-        new TableFieldSchema().setName("name").setType("STRING"),
-        new TableFieldSchema().setName("integer").setType("INTEGER"),
-        new TableFieldSchema().setName("float").setType("FLOAT"),
-        new TableFieldSchema().setName("bool").setType("BOOLEAN"))));
-  }
-
-  private TableRow rawRow(Object... args) {
-    List<TableCell> cells = new LinkedList<>();
-    for (Object a : args) {
-      cells.add(new TableCell().setV(a));
-    }
-    return new TableRow().setF(cells);
-  }
-
-  private TableDataList rawDataList(TableRow... rows) {
-    return new TableDataList().setRows(Arrays.asList(rows));
+    verify(mockTransport).buildRequest(eq("GET"), endsWith(LIST_TABLE_DATA_REQUEST_PATH));
   }
 
   @Test
   public void testRead() throws Exception {
-    onTableGet(basicTableSchema());
-
-    // BQ API data is always encoded as a string
-    TableDataList dataList = rawDataList(
-        rawRow("Arthur", "42", "3.14159", "false"), rawRow("Allison", "79", "2.71828", "true"));
-    onTableList(dataList);
-
     BigQueryReader reader = new BigQueryReader(
-        mockClient,
-        new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table"));
+        bigQueryClient,
+        new TableReference().setProjectId(PROJECT_ID).setDatasetId(DATASET).setTableId(TABLE));
 
     Reader.ReaderIterator<TableRow> iterator = reader.iterator();
     Assert.assertTrue(iterator.hasNext());
+
     TableRow row = iterator.next();
 
     Assert.assertEquals("Arthur", row.get("name"));
@@ -150,6 +352,14 @@ public void testRead() throws Exception {
     Assert.assertEquals(3.14159, row.get("float"));
     Assert.assertEquals(false, row.get("bool"));
 
+    TableRow nested = (TableRow) row.get("record");
+    Assert.assertEquals("43", nested.get("nestedInt"));
+    Assert.assertEquals(4.14159, nested.get("nestedFloat"));
+
+    Assert.assertEquals(Lists.newArrayList("42", "43", "79"), row.get("repeatedInt"));
+    Assert.assertTrue(((List<?>) row.get("repeatedFloat")).isEmpty());
+    Assert.assertTrue(((List<?>) row.get("repeatedRecord")).isEmpty());
+
     row = iterator.next();
 
     Assert.assertEquals("Allison", row.get("name"));
@@ -157,6 +367,21 @@ public void testRead() throws Exception {
     Assert.assertEquals(2.71828, row.get("float"));
     Assert.assertEquals(true, row.get("bool"));
 
+    nested = (TableRow) row.get("record");
+    Assert.assertEquals("80", nested.get("nestedInt"));
+    Assert.assertEquals(3.71828, nested.get("nestedFloat"));
+
+    Assert.assertTrue(((List<?>) row.get("repeatedInt")).isEmpty());
+    Assert.assertEquals(Lists.newArrayList(3.14159, 2.71828), row.get("repeatedFloat"));
+
+    @SuppressWarnings("unchecked")
+    List<TableRow> nestedRecords = (List<TableRow>) row.get("repeatedRecord");
+    Assert.assertEquals(2, nestedRecords.size());
+    Assert.assertEquals("hello", nestedRecords.get(0).get("string"));
+    Assert.assertEquals(true, nestedRecords.get(0).get("bool"));
+    Assert.assertEquals("world", nestedRecords.get(1).get("string"));
+    Assert.assertEquals(false, nestedRecords.get(1).get("bool"));
+
     Assert.assertFalse(iterator.hasNext());
 
     verifyTableGet();

From 6616fd636f289d52f255b1252434099838f7b685 Mon Sep 17 00:00:00 2001
From: vgough <vgough@google.com>
Date: Thu, 22 Jan 2015 12:05:35 -0800
Subject: [PATCH 0089/1541] Fixes a randomization bug and removes some unused
 checks in ProxyInvocationHandler.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=84553881
---
 .../sdk/options/ProxyInvocationHandler.java      | 16 +++++-----------
 .../sdk/options/ProxyInvocationHandlerTest.java  | 13 ++++++++-----
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
index 39ab988182370..e74e5a8f6e820 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
@@ -60,8 +60,8 @@
  * Represents and {@link InvocationHandler} for a {@link Proxy}. The invocation handler uses bean
  * introspection of the proxy class to store and retrieve values based off of the property name.
  * <p>
- * Unset properties use the {@Default} metadata on the getter to return values. If there
- * is no {@Default} annotation on the getter, then a <a
+ * Unset properties use the {@code @Default} metadata on the getter to return values. If there
+ * is no {@code @Default} annotation on the getter, then a <a
  * href="https://docs.oracle.com/javase/tutorial/java/nutsandbolts/datatypes.html">default</a> as
  * per the Java Language Specification for the expected return type is returned.
  * <p>
@@ -75,7 +75,7 @@ class ProxyInvocationHandler implements InvocationHandler {
    * No two instances of this class are considered equivalent hence we generate a random hash code
    * between 0 and {@link Integer#MAX_VALUE}.
    */
-  private final int hashCode = (int) Math.random() * Integer.MAX_VALUE;
+  private final int hashCode = (int) (Math.random() * Integer.MAX_VALUE);
   private final Set<Class<? extends PipelineOptions>> knownInterfaces;
   private final ClassToInstanceMap<PipelineOptions> interfaceToProxyCache;
   private final Map<String, Object> options;
@@ -214,8 +214,8 @@ private Object getValueFromJson(String propertyName, Method method) {
   }
 
   /**
-   * Returns a default value for the method based upon {@Default} metadata on the getter
-   * to return values. If there is no {@Default} annotation on the getter, then a <a
+   * Returns a default value for the method based upon {@code @Default} metadata on the getter
+   * to return values. If there is no {@code @Default} annotation on the getter, then a <a
    * href="https://docs.oracle.com/javase/tutorial/java/nutsandbolts/datatypes.html">default</a> as
    * per the Java Language Specification for the expected return type is returned.
    *
@@ -246,12 +246,6 @@ private Object getDefault(PipelineOptions proxy, Method method) {
         return ((Default.Float) annotation).value();
       } else if (annotation instanceof Default.Double) {
         return ((Default.Double) annotation).value();
-      } else if (annotation instanceof Default.String) {
-        return ((Default.String) annotation).value();
-      } else if (annotation instanceof Default.String) {
-        return ((Default.String) annotation).value();
-      } else if (annotation instanceof Default.String) {
-        return ((Default.String) annotation).value();
       } else if (annotation instanceof Default.Enum) {
         return Enum.valueOf((Class<Enum>) method.getReturnType(),
             ((Default.Enum) annotation).value());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
index 01085322f26e4..d893ba4d08d30 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
@@ -209,14 +209,17 @@ public void testHashCode() throws Exception {
     ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
     Simple proxy = handler.as(Simple.class);
     JLSDefaults sameAsProxy = proxy.as(JLSDefaults.class);
+
     ProxyInvocationHandler handler2 = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
-    Simple proxy2 = handler.as(Simple.class);
-    JLSDefaults sameAsProxy2 = proxy.as(JLSDefaults.class);
+    Simple proxy2 = handler2.as(Simple.class);
+    JLSDefaults sameAsProxy2 = proxy2.as(JLSDefaults.class);
+
+    // Hashcode comparisons below depend on random numbers, so could fail if seed changes.
     assertTrue(handler.hashCode() == proxy.hashCode());
     assertTrue(proxy.hashCode() == sameAsProxy.hashCode());
-    assertFalse(handler.hashCode() != handler2.hashCode());
-    assertFalse(proxy.hashCode() != proxy2.hashCode());
-    assertFalse(proxy.hashCode() != sameAsProxy2.hashCode());
+    assertFalse(handler.hashCode() == handler2.hashCode());
+    assertFalse(proxy.hashCode() == proxy2.hashCode());
+    assertFalse(proxy.hashCode() == sameAsProxy2.hashCode());
   }
 
   @Test

From 1942ce0a21d65885ce6be91a6746c3e747933bb0 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Thu, 22 Jan 2015 12:06:11 -0800
Subject: [PATCH 0090/1541] Use deterministic tag ids for tags created in
 static initializers.

These tags are often assigned to static members variables,
which are re-created rather than serialized on remote machines,
yielding surprising behavior.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=84553936
---
 .../cloud/dataflow/sdk/values/TupleTag.java   | 33 +++++++--
 .../dataflow/sdk/values/TupleTagTest.java     | 69 +++++++++++++++++++
 2 files changed, 97 insertions(+), 5 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TupleTagTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
index 4b47e80fdbabc..3a0fa15434216 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
@@ -21,6 +21,8 @@
 
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.common.collect.HashMultiset;
+import com.google.common.collect.Multiset;
 import com.google.common.reflect.TypeToken;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
@@ -125,17 +127,38 @@ public TypeToken<V> getTypeToken() {
   // Internal details below here.
 
   static final Random RANDOM = new Random(0);
+  private static final Multiset<String> staticInits = HashMultiset.create();
 
   final String id;
   final boolean generated;
 
   /** Generates and returns a fresh unique id for a TupleTag's id. */
-  static String genId() {
-    long randomLong;
-    synchronized (RANDOM) {
-      randomLong = RANDOM.nextLong();
+  static synchronized String genId() {
+    // It is a common pattern to store tags that are shared between the main
+    // program and workers in static variables, but such references are not
+    // serialized as part of the *Fns state.  Fortunately, most such tags
+    // are constructed in static class initializers, e.g.
+    //
+    //     static final TupleTag<T> MY_TAG = new TupleTag<>();
+    //
+    // and class initialization order is well defined by the JVM spec, so in
+    // this case we can assign deterministic ids.
+    StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
+    for (StackTraceElement frame : stackTrace) {
+      if (frame.getMethodName().equals("<clinit>")) {
+        int counter = staticInits.add(frame.getClassName(), 1);
+        return frame.getClassName() + "#" + counter;
+      }
     }
-    return Long.toHexString(randomLong);
+    // Otherwise, assume it'll be serialized and choose a random value to reduce
+    // the chance of collision.
+    String nonce = Long.toHexString(RANDOM.nextLong());
+    // [Thread.getStackTrace, TupleTag.getId, TupleTag.<init>, caller, ...]
+    String caller = stackTrace.length >= 4
+        ? stackTrace[3].getClassName() + "." + stackTrace[3].getMethodName()
+            + ":" + stackTrace[3].getLineNumber()
+        : "unknown";
+    return caller + "#" + nonce;
   }
 
   @JsonCreator
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TupleTagTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TupleTagTest.java
new file mode 100644
index 0000000000000..dfdae4df5907d
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TupleTagTest.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link TupleTag}.
+ */
+@RunWith(JUnit4.class)
+public class TupleTagTest {
+
+  private static TupleTag<Object> staticTag = new TupleTag<>();
+  private static TupleTag<Object> staticBlockTag;
+  private static TupleTag<Object> staticMethodTag = createTag();
+  private static TupleTag<Object> instanceMethodTag = new AnotherClass().createAnotherTag();
+
+  static {
+    staticBlockTag = new TupleTag<>();
+  }
+
+  private static TupleTag<Object> createTag() {
+    return new TupleTag<>();
+  }
+
+  private static class AnotherClass {
+    private static TupleTag<Object> anotherTag = new TupleTag<>();
+    private TupleTag<Object> createAnotherTag() {
+      return new TupleTag<>();
+    }
+  }
+
+  @Test
+  public void testStaticTupleTag() {
+    assertEquals("com.google.cloud.dataflow.sdk.values.TupleTagTest#0", staticTag.getId());
+    assertEquals("com.google.cloud.dataflow.sdk.values.TupleTagTest#3", staticBlockTag.getId());
+    assertEquals("com.google.cloud.dataflow.sdk.values.TupleTagTest#1", staticMethodTag.getId());
+    assertEquals("com.google.cloud.dataflow.sdk.values.TupleTagTest#2", instanceMethodTag.getId());
+    assertEquals(
+        "com.google.cloud.dataflow.sdk.values.TupleTagTest$AnotherClass#0",
+        AnotherClass.anotherTag.getId());
+  }
+
+  @Test
+  public void testNonstaticTupleTag() {
+    assertEquals("com.google.cloud.dataflow.sdk.values.TupleTagTest.testNonstaticTupleTag:65",
+                 new TupleTag<Object>().getId().split("#")[0]);
+    assertNotEquals(new TupleTag<Object>().getId(), new TupleTag<Object>().getId());
+  }
+}

From e09e6430c3c0ed584081ef048ab8e899863f42ce Mon Sep 17 00:00:00 2001
From: vgough <vgough@google.com>
Date: Thu, 22 Jan 2015 16:45:28 -0800
Subject: [PATCH 0091/1541] Fixes a couple window comparison issues in
 CalendarWindows.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=84579045
---
 .../dataflow/sdk/transforms/windowing/CalendarWindows.java  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
index 70590665be5ac..cdbc1a64232b3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
@@ -212,7 +212,7 @@ public boolean isCompatible(WindowingFn other) {
       }
       MonthsWindows that = (MonthsWindows) other;
       return number == that.number
-          && dayOfMonth == dayOfMonth
+          && dayOfMonth == that.dayOfMonth
           && startDate == that.startDate
           && timeZone == that.timeZone;
     }
@@ -291,8 +291,8 @@ public boolean isCompatible(WindowingFn other) {
       }
       YearsWindows that = (YearsWindows) other;
       return number == that.number
-          && monthOfYear == monthOfYear
-          && dayOfMonth == dayOfMonth
+          && monthOfYear == that.monthOfYear
+          && dayOfMonth == that.dayOfMonth
           && startDate == that.startDate
           && timeZone == that.timeZone;
     }

From 00dc9415d8b2c7b2ceceab87b7c45a2541736c77 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Fri, 23 Jan 2015 09:39:52 -0800
Subject: [PATCH 0092/1541] Fix parts of flakiness in RateLimitingTest.

runWithRate() returns the number of milliseconds that it took at execute a DoFn. The callers are expecting that the execution time is greater than the sum of the delay intervals during execution.

This test is flaky, in part by, the system time resolution. If the rest of execution took very little time, say, less than the timer resolution, it is certainly possible that the execution time is equal to the sum of the delay intervals.

Now, Thread.sleep() calls will, in most cases, sleep for *at least* the given time. But, this is also subject to precision and accuracy of system timers and schedulers, and not guaranteed by Java. This is possible to cause further issues, but unlikely to actually happen.

The problem tends to be bigger on Windows, where timer resolution is 15 or 50 ms, whereas on Linux is usually by an order of magnitude smaller. It was still hit on both platforms, however.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=84630029
---
 .../cloud/dataflow/sdk/transforms/RateLimitingTest.java  | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java
index 5a1c2a8f1340c..ee6e17a66810d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java
@@ -18,7 +18,6 @@
 
 import static org.hamcrest.Matchers.both;
 import static org.hamcrest.Matchers.containsString;
-import static org.hamcrest.Matchers.greaterThan;
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.hamcrest.Matchers.is;
 import static org.hamcrest.Matchers.lessThan;
@@ -132,7 +131,7 @@ public void testRateLimitingMax() {
 
     long perElementPause = (long) (1000L / rate);
     long minDuration = (n - 1) * perElementPause;
-    Assert.assertThat(duration, greaterThan(minDuration));
+    Assert.assertThat(duration, greaterThanOrEqualTo(minDuration));
   }
 
   @Test(timeout = 5000L)
@@ -185,10 +184,10 @@ public void testBacklogLimiter() {
     long duration = runWithRate(2 * RateLimiting.DEFAULT_MAX_PARALLELISM,
         -1.0 /* unlimited */, new DelayFn<Integer>());
 
-    // Should take > 2x the delay interval, since no more than half the elements
-    // can be scheduled at once.
+    // Should take >= 2x the delay interval, since no more than half the
+    // elements can be scheduled at once.
     Assert.assertThat(duration,
-        greaterThan(2 * DelayFn.DELAY_MS));
+        greaterThanOrEqualTo(2 * DelayFn.DELAY_MS));
   }
 
   private long runWithRate(int numElements, double rateLimit,

From 006401cd8c27a7c02f99001bd6acef5e72efb154 Mon Sep 17 00:00:00 2001
From: ccy <ccy@google.com>
Date: Fri, 23 Jan 2015 11:43:20 -0800
Subject: [PATCH 0093/1541] Don't rewrap RuntimeException in
 NormalParDoFn$1.output for cleaner error reporting. [] ------------- Created
 by MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=84640341

---
 .../cloud/dataflow/sdk/runners/worker/NormalParDoFn.java     | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
index ea9ea4a970d87..5b522875f1592 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
@@ -37,6 +37,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.Receiver;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Throwables;
 
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -203,8 +204,8 @@ public Receiver initialize(TupleTag tag) {
           public void output(Receiver receiver, WindowedValue<?> output) {
             try {
               receiver.process(output);
-            } catch (Exception e) {
-              throw new RuntimeException(e);
+            } catch (Throwable t) {
+              Throwables.propagate(t);
             }
           }
         },

From f8329b20387d455c3b3d9880f43dcdd796101637 Mon Sep 17 00:00:00 2001
From: ccy <ccy@google.com>
Date: Fri, 23 Jan 2015 15:59:09 -0800
Subject: [PATCH 0094/1541] Throw return value from Throwables.propagate in
 NormalParDoFn exception handling. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=84661143

---
 .../google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
index 5b522875f1592..09e3d247aa6b0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
@@ -205,7 +205,7 @@ public void output(Receiver receiver, WindowedValue<?> output) {
             try {
               receiver.process(output);
             } catch (Throwable t) {
-              Throwables.propagate(t);
+              throw Throwables.propagate(t);
             }
           }
         },

From e88a30230017a7e8c04f93236f9e88a8ddb66801 Mon Sep 17 00:00:00 2001
From: ccy <ccy@google.com>
Date: Fri, 23 Jan 2015 16:07:23 -0800
Subject: [PATCH 0095/1541] Don't rewrap UserCodeException for cleaner user
 error messages. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=84661933

---
 .../java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java   | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 09320962a3f2d..e8ac2486da6e0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Throwables;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -114,6 +115,7 @@ public void startBundle() {
       fn.startBundle(context);
     } catch (Throwable t) {
       // Exception in user code.
+      Throwables.propagateIfInstanceOf(t, UserCodeException.class);
       throw new UserCodeException(t);
     }
   }
@@ -130,6 +132,7 @@ public void processElement(WindowedValue<I> elem) {
       fn.processElement(processContext);
     } catch (Throwable t) {
       // Exception in user code.
+      Throwables.propagateIfInstanceOf(t, UserCodeException.class);
       throw new UserCodeException(t);
     }
   }
@@ -141,6 +144,7 @@ public void finishBundle() {
       fn.finishBundle(context);
     } catch (Throwable t) {
       // Exception in user code.
+      Throwables.propagateIfInstanceOf(t, UserCodeException.class);
       throw new UserCodeException(t);
     }
   }

From 5a4d6e3e2d59646fbd870d163d30d7e388c22bd8 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Sat, 24 Jan 2015 01:47:59 -0800
Subject: [PATCH 0096/1541] Rename WindowingFn to WindowFn for consistency with
 the other *Fns.

This is not backwards compatible, but is unlikely to affect much user code at this point.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=84685618
---
 .../sdk/runners/DataflowPipelineRunner.java   |  2 +-
 .../runners/DataflowPipelineTranslator.java   | 12 ++---
 .../runners/worker/AssignWindowsParDoFn.java  | 10 ++--
 .../worker/GroupAlsoByWindowsParDoFn.java     | 10 ++--
 .../sdk/runners/worker/NormalParDoFn.java     |  2 +-
 ...nTestUtils.java => WindowFnTestUtils.java} | 34 ++++++-------
 .../dataflow/sdk/transforms/Combine.java      | 18 +++----
 .../cloud/dataflow/sdk/transforms/Count.java  |  2 +-
 .../cloud/dataflow/sdk/transforms/DoFn.java   | 16 +++---
 .../dataflow/sdk/transforms/Flatten.java      | 20 ++++----
 .../dataflow/sdk/transforms/GroupByKey.java   | 50 +++++++++----------
 .../cloud/dataflow/sdk/transforms/Keys.java   |  2 +-
 .../cloud/dataflow/sdk/transforms/KvSwap.java |  2 +-
 .../cloud/dataflow/sdk/transforms/ParDo.java  |  8 +--
 .../dataflow/sdk/transforms/Partition.java    |  2 +-
 .../sdk/transforms/RemoveDuplicates.java      |  2 +-
 .../cloud/dataflow/sdk/transforms/Values.java |  2 +-
 .../dataflow/sdk/transforms/WithKeys.java     |  2 +-
 .../transforms/windowing/CalendarWindows.java | 28 +++++------
 .../transforms/windowing/FixedWindows.java    |  6 +--
 .../transforms/windowing/GlobalWindow.java    |  6 +--
 ...dWindowingFn.java => InvalidWindowFn.java} | 28 +++++------
 .../MergeOverlappingIntervalWindows.java      |  6 +--
 ...ndowingFn.java => NonMergingWindowFn.java} |  8 +--
 ...owingFn.java => PartitioningWindowFn.java} |  6 +--
 .../sdk/transforms/windowing/Sessions.java    | 10 ++--
 .../transforms/windowing/SlidingWindows.java  |  6 +--
 .../sdk/transforms/windowing/Window.java      | 40 +++++++--------
 .../{WindowingFn.java => WindowFn.java}       | 18 +++----
 .../transforms/windowing/package-info.java    |  2 +-
 .../dataflow/sdk/util/AbstractWindowSet.java  | 14 +++---
 .../dataflow/sdk/util/AssignWindowsDoFn.java  | 12 ++---
 .../dataflow/sdk/util/BufferingWindowSet.java | 16 +++---
 .../cloud/dataflow/sdk/util/DoFnContext.java  | 18 +++----
 .../cloud/dataflow/sdk/util/DoFnInfo.java     | 12 ++---
 .../cloud/dataflow/sdk/util/DoFnRunner.java   | 14 +++---
 .../sdk/util/GroupAlsoByWindowsDoFn.java      | 28 +++++------
 .../sdk/util/PartitionBufferingWindowSet.java | 12 ++---
 .../util/StreamingGroupAlsoByWindowsDoFn.java | 30 +++++------
 .../dataflow/sdk/values/PCollection.java      | 24 ++++-----
 .../dataflow/sdk/values/PCollectionTuple.java |  6 +--
 .../runners/worker/ParDoFnFactoryTest.java    |  2 +-
 .../dataflow/sdk/transforms/FlattenTest.java  | 12 ++---
 .../sdk/transforms/GroupByKeyTest.java        | 16 +++---
 .../dataflow/sdk/transforms/ParDoTest.java    |  2 +-
 .../windowing/CalendarWindowsTest.java        | 18 +++----
 .../windowing/FixedWindowsTest.java           | 12 ++---
 .../transforms/windowing/SessionsTest.java    | 14 +++---
 .../windowing/SlidingWindowsTest.java         | 16 +++---
 .../transforms/windowing/WindowingTest.java   |  8 +--
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  |  6 +--
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  4 +-
 52 files changed, 328 insertions(+), 328 deletions(-)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/{WindowingFnTestUtils.java => WindowFnTestUtils.java} (84%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/{InvalidWindowingFn.java => InvalidWindowFn.java} (58%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/{NonMergingWindowingFn.java => NonMergingWindowFn.java} (80%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/{PartitioningWindowingFn.java => PartitioningWindowFn.java} (86%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/{WindowingFn.java => WindowFn.java} (87%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 1a0d873307375..b6003b2001064 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -159,7 +159,7 @@ public <Output extends POutput, Input extends PInput> Output apply(
     if (transform instanceof Combine.GroupedValues) {
       // TODO: Redundant with translator registration?
       return (Output) PCollection.createPrimitiveOutputInternal(
-          ((PCollection<?>) input).getWindowingFn());
+          ((PCollection<?>) input).getWindowFn());
     } else if (transform instanceof GroupByKey) {
       // The DataflowPipelineRunner implementation of GroupByKey will sort values by timestamp,
       // so no need for an explicit sort transform.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 3bcca5e8b3afb..54dd9326c67f6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -66,7 +66,7 @@
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.View;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.OutputReference;
@@ -600,7 +600,7 @@ public void addOutput(String name, PValue value) {
           // Wrap the PCollection element Coder inside a WindowedValueCoder.
           coder = WindowedValue.getFullCoder(
               coder,
-              ((PCollection<?>) value).getWindowingFn().windowCoder());
+              ((PCollection<?>) value).getWindowFn().windowCoder());
         }
       } else {
         // No output coder to encode.
@@ -884,7 +884,7 @@ private <I, O> void translateMultiHelper(
               TranslationContext context) {
             context.addStep(transform, "ParallelDo");
             translateInputs(transform.getInput(), transform.getSideInputs(), context);
-            translateFn(transform.getFn(), transform.getInput().getWindowingFn(), context);
+            translateFn(transform.getFn(), transform.getInput().getWindowFn(), context);
             translateOutputs(transform.getOutput(), context);
           }
         });
@@ -904,7 +904,7 @@ private <I, O> void translateSingleHelper(
               TranslationContext context) {
             context.addStep(transform, "ParallelDo");
             translateInputs(transform.getInput(), transform.getSideInputs(), context);
-            translateFn(transform.getFn(), transform.getInput().getWindowingFn(), context);
+            translateFn(transform.getFn(), transform.getInput().getWindowFn(), context);
             context.addOutput("out", transform.getOutput());
           }
         });
@@ -964,12 +964,12 @@ private static void translateSideInputs(
 
   private static void translateFn(
       DoFn fn,
-      WindowingFn windowingFn,
+      WindowFn windowFn,
       TranslationContext context) {
     context.addInput(PropertyNames.USER_FN, fn.getClass().getName());
     context.addInput(
         PropertyNames.SERIALIZED_FN,
-        byteArrayToJsonString(serializeToByteArray(new DoFnInfo(fn, windowingFn))));
+        byteArrayToJsonString(serializeToByteArray(new DoFnInfo(fn, windowFn))));
     if (fn instanceof DoFn.RequiresKeyedState) {
       context.addInput(PropertyNames.USES_KEYED_STATE, "true");
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
index 2a22793e81c4a..16564e1a1d467 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
@@ -21,7 +21,7 @@
 import com.google.api.services.dataflow.model.MultiOutputInfo;
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
@@ -54,16 +54,16 @@ public static AssignWindowsParDoFn create(
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler sampler /* unused */)
       throws Exception {
-    final Object windowingFn =
+    final Object windowFn =
         SerializableUtils.deserializeFromByteArray(
             getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
             "serialized window fn");
-    if (!(windowingFn instanceof WindowingFn)) {
+    if (!(windowFn instanceof WindowFn)) {
       throw new Exception(
-          "unexpected kind of WindowingFn: " + windowingFn.getClass().getName());
+          "unexpected kind of WindowFn: " + windowFn.getClass().getName());
     }
 
-    final AssignWindowsDoFn assignFn = new AssignWindowsDoFn((WindowingFn) windowingFn);
+    final AssignWindowsDoFn assignFn = new AssignWindowsDoFn((WindowFn) windowFn);
 
     DoFnInfoFactory fnFactory = new DoFnInfoFactory() {
         @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index a8ba7e840b4f5..7e9e2bd89774e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -25,7 +25,7 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
@@ -60,13 +60,13 @@ public static GroupAlsoByWindowsParDoFn create(
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler sampler /* unused */)
       throws Exception {
-    final Object windowingFn =
+    final Object windowFn =
         SerializableUtils.deserializeFromByteArray(
             getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
             "serialized window fn");
-    if (!(windowingFn instanceof WindowingFn)) {
+    if (!(windowFn instanceof WindowFn)) {
       throw new Exception(
-          "unexpected kind of WindowingFn: " + windowingFn.getClass().getName());
+          "unexpected kind of WindowFn: " + windowFn.getClass().getName());
     }
 
     byte[] serializedCombineFn = getBytes(cloudUserFn, PropertyNames.COMBINE_FN, null);
@@ -97,7 +97,7 @@ public static GroupAlsoByWindowsParDoFn create(
         @Override
         public DoFnInfo createDoFnInfo() {
           return new DoFnInfo(StreamingGroupAlsoByWindowsDoFn.create(
-              (WindowingFn) windowingFn,
+              (WindowFn) windowFn,
               ((KvCoder) elemCoder).getValueCoder()),
               null);
         }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
index 09e3d247aa6b0..001e63f2a1263 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
@@ -213,7 +213,7 @@ public void output(Receiver receiver, WindowedValue<?> output) {
         sideOutputTags,
         stepContext,
         addCounterMutator,
-        fnFactory.createDoFnInfo().getWindowingFn());
+        fnFactory.createDoFnInfo().getWindowFn());
 
     fnRunner.startBundle();
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowingFnTestUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
similarity index 84%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowingFnTestUtils.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
index 687cb64530efa..0e14da6c4cefc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowingFnTestUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
@@ -17,7 +17,7 @@
 package com.google.cloud.dataflow.sdk.testing;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 
 import org.joda.time.Instant;
 
@@ -30,13 +30,13 @@
 import java.util.Set;
 
 /**
- * A utility class for testing {@link WindowingFn}s.
+ * A utility class for testing {@link WindowFn}s.
  */
-public class WindowingFnTestUtils {
+public class WindowFnTestUtils {
 
   /**
    * Creates a Set of elements to be used as expected output in
-   * {@link #runWindowingFn}.
+   * {@link #runWindowFn}.
    */
   public static Set<String> set(long... timestamps) {
     Set<String> result = new HashSet<>();
@@ -48,20 +48,20 @@ public static Set<String> set(long... timestamps) {
 
 
   /**
-   * Runs the {@link WindowingFn} over the provided input, returning a map
+   * Runs the {@link WindowFn} over the provided input, returning a map
    * of windows to the timestamps in those windows.
    */
-  public static <T, W extends BoundedWindow> Map<W, Set<String>> runWindowingFn(
-      WindowingFn<T, W> windowingFn,
+  public static <T, W extends BoundedWindow> Map<W, Set<String>> runWindowFn(
+      WindowFn<T, W> windowFn,
       List<Long> timestamps) throws Exception {
 
     final TestWindowSet<W, String> windowSet = new TestWindowSet<W, String>();
     for (final Long timestamp : timestamps) {
-      for (W window : windowingFn.assignWindows(
-          new TestAssignContext<T, W>(new Instant(timestamp), windowingFn))) {
+      for (W window : windowFn.assignWindows(
+          new TestAssignContext<T, W>(new Instant(timestamp), windowFn))) {
         windowSet.put(window, timestampValue(timestamp));
       }
-      windowingFn.mergeWindows(new TestMergeContext<T, W>(windowSet, windowingFn));
+      windowFn.mergeWindows(new TestMergeContext<T, W>(windowSet, windowFn));
     }
     Map<W, Set<String>> actual = new HashMap<>();
     for (W window : windowSet.windows()) {
@@ -78,11 +78,11 @@ private static String timestampValue(long timestamp) {
    * Test implementation of AssignContext.
    */
   private static class TestAssignContext<T, W extends BoundedWindow>
-      extends WindowingFn<T, W>.AssignContext {
+      extends WindowFn<T, W>.AssignContext {
     private Instant timestamp;
 
-    public TestAssignContext(Instant timestamp, WindowingFn<T, W> windowingFn) {
-      windowingFn.super();
+    public TestAssignContext(Instant timestamp, WindowFn<T, W> windowFn) {
+      windowFn.super();
       this.timestamp = timestamp;
     }
 
@@ -106,12 +106,12 @@ public Collection<? extends BoundedWindow> windows() {
    * Test implementation of MergeContext.
    */
   private static class TestMergeContext<T, W extends BoundedWindow>
-    extends WindowingFn<T, W>.MergeContext {
+    extends WindowFn<T, W>.MergeContext {
     private TestWindowSet<W, ?> windowSet;
 
     public TestMergeContext(
-        TestWindowSet<W, ?> windowSet, WindowingFn<T, W> windowingFn) {
-      windowingFn.super();
+        TestWindowSet<W, ?> windowSet, WindowFn<T, W> windowFn) {
+      windowFn.super();
       this.windowSet = windowSet;
     }
 
@@ -127,7 +127,7 @@ public void merge(Collection<W> toBeMerged, W mergeResult) {
   }
 
   /**
-   * A WindowSet useful for testing WindowingFns which simply
+   * A WindowSet useful for testing WindowFns which simply
    * collects the placed elements into multisets.
    */
   private static class TestWindowSet<W extends BoundedWindow, V> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index f27308b739589..eba6785b30162 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -90,7 +90,7 @@ public static <VI, VO> Globally<VI, VO> globally(
    * <p> Each output element is in the window by which its corresponding input
    * was grouped, and has the timestamp of the end of that window.  The output
    * {@code PCollection} has the same
-   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
    * as the input.
    *
    * <p> See {@link PerKey Combine.PerKey} for more information.
@@ -111,7 +111,7 @@ public static <K, V> PerKey<K, V, V> perKey(
    * <p> Each output element is in the window by which its corresponding input
    * was grouped, and has the timestamp of the end of that window.  The output
    * {@code PCollection} has the same
-   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
    * as the input.
    *
    * <p> See {@link PerKey Combine.PerKey} for more information.
@@ -132,7 +132,7 @@ public static <K, VI, VO> PerKey<K, VI, VO> perKey(
    * <p> Each output element is in the window by which its corresponding input
    * was grouped, and has the timestamp of the end of that window.  The output
    * {@code PCollection} has the same
-   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
    * as the input.
    *
    * <p> See {@link PerKey Combine.PerKey} for more information.
@@ -154,7 +154,7 @@ public static <K, VI, VO> PerKey<K, VI, VO> perKey(
    * <p> Each output element has the same timestamp and is in the same window
    * as its corresponding input element, and the output
    * {@code PCollection} has the same
-   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
    * associated with it as the input.
    *
    * <p> See {@link GroupedValues Combine.GroupedValues} for more information.
@@ -180,7 +180,7 @@ public static <K, V> GroupedValues<K, V, V> groupedValues(
    * <p> Each output element has the same timestamp and is in the same window
    * as its corresponding input element, and the output
    * {@code PCollection} has the same
-   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
    * associated with it as the input.
    *
    * <p> See {@link GroupedValues Combine.GroupedValues} for more information.
@@ -206,7 +206,7 @@ public static <K, VI, VO> GroupedValues<K, VI, VO> groupedValues(
    * <p> Each output element has the same timestamp and is in the same window
    * as its corresponding input element, and the output
    * {@code PCollection} has the same
-   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
    * associated with it as the input.
    *
    * <p> See {@link GroupedValues Combine.GroupedValues} for more information.
@@ -770,7 +770,7 @@ public PCollection<VO> apply(PCollection<VI> input) {
           .apply(Combine.<Void, VI, VO>perKey(fn.<Void>asKeyedFn()))
           .apply(Values.<VO>create());
 
-      if (input.getWindowingFn().isCompatible(new GlobalWindow())) {
+      if (input.getWindowFn().isCompatible(new GlobalWindow())) {
         return insertDefaultValueIfEmpty(output);
       } else {
         return output;
@@ -892,7 +892,7 @@ public V extractOutput(List<V> accumulator) {
    * <p> Each output element is in the window by which its corresponding input
    * was grouped, and has the timestamp of the end of that window.  The output
    * {@code PCollection} has the same
-   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
    * as the input.
    *
    * @param <K> the type of the keys of the input and output
@@ -970,7 +970,7 @@ protected String getKindString() {
    * <p> Each output element has the same timestamp and is in the same window
    * as its corresponding input element, and the output
    * {@code PCollection} has the same
-   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
    * associated with it as the input.
    *
    * <p> See also {@link #globally}/{@link Globally Combine.Globally},
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
index 0550b2fff5f40..b6e4561f2dde4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
@@ -39,7 +39,7 @@
  * <p> Each output element is in the window by which its corresponding input
  * was grouped, and has the timestamp of the end of that window.  The output
  * {@code PCollection} has the same
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
  * as the input.
  *
  * <p> Example of use:
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index a62982ceff2db..8b8eb1cfab92b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -83,9 +83,9 @@ public abstract class Context {
      *
      * <p> If invoked from {@link #startBundle} or {@link #finishValue},
      * this will attempt to use the
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
      * of the input {@code PCollection} to determine what windows the element
-     * should be in, throwing an exception if the {@code WindowingFn} attempts
+     * should be in, throwing an exception if the {@code WindowFn} attempts
      * to access any information about the input element. The output element
      * will have a timestamp of negative infinity.
      */
@@ -102,9 +102,9 @@ public abstract class Context {
      *
      * <p> If invoked from {@link #startBundle} or {@link #finishValue},
      * this will attempt to use the
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
      * of the input {@code PCollection} to determine what windows the element
-     * should be in, throwing an exception if the {@code WindowingFn} attempts
+     * should be in, throwing an exception if the {@code WindowFn} attempts
      * to access any information about the input element except for the
      * timestamp.
      */
@@ -124,9 +124,9 @@ public abstract class Context {
      *
      * <p> If invoked from {@link #startBundle} or {@link #finishValue},
      * this will attempt to use the
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
      * of the input {@code PCollection} to determine what windows the element
-     * should be in, throwing an exception if the {@code WindowingFn} attempts
+     * should be in, throwing an exception if the {@code WindowFn} attempts
      * to access any information about the input element. The output element
      * will have a timestamp of negative infinity.
      *
@@ -147,9 +147,9 @@ public abstract class Context {
      *
      * <p> If invoked from {@link #startBundle} or {@link #finishValue},
      * this will attempt to use the
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
      * of the input {@code PCollection} to determine what windows the element
-     * should be in, throwing an exception if the {@code WindowingFn} attempts
+     * should be in, throwing an exception if the {@code WindowFn} attempts
      * to access any information about the input element except for the
      * timestamp.
      *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index 3ef5e3a369a30..dd8efca82a240 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -20,7 +20,7 @@
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
 
@@ -57,11 +57,11 @@ public class Flatten {
    * the {@link PCollection}s in its input.
    *
    * <p> If any of the inputs to {@code Flatten<T>} require window merging,
-   * all inputs must have equal {@link WindowingFn}s.
+   * all inputs must have equal {@link WindowFn}s.
    * The output elements of {@code Flatten<T>} are in the same windows and
    * have the same timestamps as their corresponding input elements.  The output
    * {@code PCollection} will have the same
-   * {@link WindowingFn} as all of the inputs.
+   * {@link WindowFn} as all of the inputs.
    *
    * @param <T> the type of the elements in the input and output
    * {@code PCollection}s.
@@ -113,21 +113,21 @@ private FlattenPCollectionList() { }
 
     @Override
     public PCollection<T> apply(PCollectionList<T> inputs) {
-      WindowingFn<?, ?> windowingFn;
+      WindowFn<?, ?> windowFn;
       if (!getInput().getAll().isEmpty()) {
-        windowingFn = getInput().get(0).getWindowingFn();
+        windowFn = getInput().get(0).getWindowFn();
         for (PCollection<?> input : getInput().getAll()) {
-          if (!windowingFn.isCompatible(input.getWindowingFn())) {
+          if (!windowFn.isCompatible(input.getWindowFn())) {
             throw new IllegalStateException(
-                "Inputs to Flatten had incompatible window windowingFns: "
-                + windowingFn + ", " + input.getWindowingFn());
+                "Inputs to Flatten had incompatible window windowFns: "
+                + windowFn + ", " + input.getWindowFn());
           }
         }
       } else {
-        windowingFn = new GlobalWindow();
+        windowFn = new GlobalWindow();
       }
 
-      return PCollection.<T>createPrimitiveOutputInternal(windowingFn);
+      return PCollection.<T>createPrimitiveOutputInternal(windowFn);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index c6d0acf4094d4..225fd18dfdabb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -26,9 +26,9 @@
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindowingFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowingFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindowFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
@@ -98,17 +98,17 @@
  * <p> See {@link Combine.PerKey} for a common pattern of
  * {@code GroupByKey} followed by {@link Combine.GroupedValues}.
  *
- * <p> When grouping, windows that can be merged according to the {@link WindowingFn}
+ * <p> When grouping, windows that can be merged according to the {@link WindowFn}
  * of the input {@code PCollection} will be merged together, and a group
  * corresponding to the new, merged window will be emitted.
  * The timestamp for each group is the upper bound of its window, e.g., the most
  * recent timestamp that can be assigned into the window, and the group will be
  * in the window that it corresponds to.  The output {@code PCollection} will
- * have the same {@link WindowingFn} as the input.
+ * have the same {@link WindowFn} as the input.
  *
- * <p> If the {@link WindowingFn} of the input requires merging, it is not
+ * <p> If the {@link WindowFn} of the input requires merging, it is not
  * valid to apply another {@code GroupByKey} without first applying a new
- * {@link WindowingFn}.
+ * {@link WindowFn}.
  *
  * @param <K> the type of the keys of the input and output
  * {@code PCollection}s
@@ -159,7 +159,7 @@ public PCollection<KV<K, WindowedValue<V>>> apply(
       Coder<K> keyCoder = inputKvCoder.getKeyCoder();
       Coder<V> inputValueCoder = inputKvCoder.getValueCoder();
       Coder<WindowedValue<V>> outputValueCoder = FullWindowedValueCoder.of(
-          inputValueCoder, getInput().getWindowingFn().windowCoder());
+          inputValueCoder, getInput().getWindowFn().windowCoder());
       Coder<KV<K, WindowedValue<V>>> outputKvCoder =
           KvCoder.of(keyCoder, outputValueCoder);
       return input.apply(ParDo.of(
@@ -228,10 +228,10 @@ public int compare(WindowedValue<V> e1, WindowedValue<V> e2) {
   public static class GroupAlsoByWindow<K, V>
       extends PTransform<PCollection<KV<K, Iterable<WindowedValue<V>>>>,
                          PCollection<KV<K, Iterable<V>>>> {
-    private final WindowingFn<?, ?> windowingFn;
+    private final WindowFn<?, ?> windowFn;
 
-    public GroupAlsoByWindow(WindowingFn<?, ?> windowingFn) {
-      this.windowingFn = windowingFn;
+    public GroupAlsoByWindow(WindowFn<?, ?> windowFn) {
+      this.windowFn = windowFn;
     }
 
     @Override
@@ -259,7 +259,7 @@ public PCollection<KV<K, Iterable<V>>> apply(
 
       return input.apply(ParDo.of(
           new GroupAlsoByWindowsDoFn<K, V, BoundedWindow>(
-              (WindowingFn) windowingFn, inputIterableElementValueCoder)))
+              (WindowFn) windowFn, inputIterableElementValueCoder)))
           .setCoder(outputKvCoder);
     }
   }
@@ -282,16 +282,16 @@ public GroupByKeyOnly() { }
     @SuppressWarnings({"rawtypes", "unchecked"})
     @Override
     public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
-      WindowingFn windowingFn = getInput().getWindowingFn();
-      if (!(windowingFn instanceof NonMergingWindowingFn)) {
+      WindowFn windowFn = getInput().getWindowFn();
+      if (!(windowFn instanceof NonMergingWindowFn)) {
         // Prevent merging windows again, without explicit user
         // involvement, e.g., by Window.into() or Window.remerge().
-        windowingFn = new InvalidWindowingFn(
-            "WindowingFn has already been consumed by previous GroupByKey",
-            windowingFn);
+        windowFn = new InvalidWindowFn(
+            "WindowFn has already been consumed by previous GroupByKey",
+            windowFn);
       }
       return PCollection.<KV<K, Iterable<V>>>createPrimitiveOutputInternal(
-          windowingFn);
+          windowFn);
     }
 
     @Override
@@ -451,17 +451,17 @@ public PCollection<KV<K, Iterable<V>>> applyHelper(
     // This operation groups by the combination of key and window,
     // merging windows as needed, using the windows assigned to the
     // key/value input elements and the window merge operation of the
-    // windowing function associated with the input PCollection.
-    WindowingFn<?, ?> windowingFn = getInput().getWindowingFn();
-    if (windowingFn instanceof InvalidWindowingFn) {
-      String cause = ((InvalidWindowingFn<?>) windowingFn).getCause();
+    // window function associated with the input PCollection.
+    WindowFn<?, ?> windowFn = getInput().getWindowFn();
+    if (windowFn instanceof InvalidWindowFn) {
+      String cause = ((InvalidWindowFn<?>) windowFn).getCause();
       throw new IllegalStateException(
           "GroupByKey must have a valid Window merge function.  "
           + "Invalid because: " + cause);
     }
-    if (windowingFn.isCompatible(new GlobalWindow())) {
+    if (windowFn.isCompatible(new GlobalWindow())) {
       // The input PCollection is using the degenerate default
-      // windowing function, which uses a single global window for all
+      // window function, which uses a single global window for all
       // elements.  We can implement this using a more-primitive
       // non-window-aware GBK transform.
       return input.apply(new GroupByKeyOnly<K, V>());
@@ -491,7 +491,7 @@ public PCollection<KV<K, Iterable<V>>> applyHelper(
 
       return gbkOutput
           // Group each key's values by window, merging windows as needed.
-          .apply(new GroupAlsoByWindow<K, V>(windowingFn));
+          .apply(new GroupAlsoByWindow<K, V>(windowFn));
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java
index 0e78f4ad1e840..2b356ad6be549 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java
@@ -32,7 +32,7 @@
  * <p> Each output element has the same timestamp and is in the same windows
  * as its corresponding input element, and the output {@code PCollection}
  * has the same
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
  * associated with it as the input.
  *
  * <p> See also {@link Values}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
index 4b12d5db08c28..c898dff911f70 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
@@ -34,7 +34,7 @@
  * <p> Each output element has the same timestamp and is in the same windows
  * as its corresponding input element, and the output {@code PCollection}
  * has the same
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
  * associated with it as the input.
  *
  * @param <A> the type of the keys in the input {@code PCollection}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 0af3c3baf8971..f61b197bc58a3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -96,7 +96,7 @@
  * <p> Each output element has the same timestamp and is in the same windows
  * as its corresponding input element, and the output {@code PCollection}
  * has the same
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
  * associated with it as the input.
  *
  * <h2>Naming {@code ParDo}s</h2>
@@ -708,7 +708,7 @@ public PCollection<O> apply(PCollection<? extends I> input) {
       if (sideInputs == null) {
         sideInputs = Collections.emptyList();
       }
-      return PCollection.<O>createPrimitiveOutputInternal(getInput().getWindowingFn())
+      return PCollection.<O>createPrimitiveOutputInternal(getInput().getWindowFn())
           .setTypeTokenInternal(fn.getOutputTypeToken());
     }
 
@@ -900,7 +900,7 @@ public BoundMulti<I, O> withSideInputs(
     public PCollectionTuple apply(PCollection<? extends I> input) {
       PCollectionTuple outputs = PCollectionTuple.ofPrimitiveOutputsInternal(
           TupleTagList.of(mainOutputTag).and(sideOutputTags.getAll()),
-          getInput().getWindowingFn());
+          getInput().getWindowFn());
 
       // The fn will likely be an instance of an anonymous subclass
       // such as DoFn<Integer, String> { }, thus will have a high-fidelity
@@ -1040,7 +1040,7 @@ private static <I, O> DoFnRunner<I, O, List> evaluateHelper(
             sideOutputTags,
             executionContext.getStepContext(name),
             context.getAddCounterMutator(),
-            input.getWindowingFn());
+            input.getWindowFn());
 
     fnRunner.startBundle();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
index 7e5cc00f62a9f..e7051985c0598 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
@@ -55,7 +55,7 @@
  * <p> Each output element has the same timestamp and is in the same windows
  * as its corresponding input element, and each output {@code PCollection}
  * has the same
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
  * associated with it as the input.
  *
  * @param <T> the type of the elements of the input and output
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
index 9540eda6da7d1..8d8eaf1e75811 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
@@ -37,7 +37,7 @@
  * <p> Each output element is in the same window as its corresponding input
  * element, and has the timestamp of the end of that window.  The output
  * {@code PCollection} has the same
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
  * as the input.
  *
  * <p> Does not preserve any order the input PCollection might have had.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java
index 4ca84e45c6501..dad265f15211b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java
@@ -32,7 +32,7 @@
  * <p> Each output element has the same timestamp and is in the same windows
  * as its corresponding input element, and the output {@code PCollection}
  * has the same
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
  * associated with it as the input.
  *
  * <p> See also {@link Keys}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
index 82ce93cdf3274..bd8415f2684aa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
@@ -40,7 +40,7 @@
  * <p> Each output element has the same timestamp and is in the same windows
  * as its corresponding input element, and the output {@code PCollection}
  * has the same
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
  * associated with it as the input.
  *
  * @param <K> the type of the keys in the output {@code PCollection}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
index cdbc1a64232b3..78b3d59ecdd71 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
@@ -26,7 +26,7 @@
 import org.joda.time.Years;
 
 /**
- * A collection of {@link WindowingFn}s that windows values into calendar-based
+ * A collection of {@link WindowFn}s that windows values into calendar-based
  * windows such as spans of days, months, or years.
  *
  * <p> For example, to group data into quarters that change on the 15th, use
@@ -35,7 +35,7 @@
 public class CalendarWindows {
 
   /**
-   * Returns a {@link WindowingFn} that windows elements into periods measured by days.
+   * Returns a {@link WindowFn} that windows elements into periods measured by days.
    *
    * <p> For example, {@code CalendarWindows.days(1)} will window elements into
    * separate windows for each day.
@@ -45,7 +45,7 @@ public static DaysWindows days(int number) {
   }
 
   /**
-   * Returns a {@link WindowingFn} that windows elements into periods measured by weeks.
+   * Returns a {@link WindowFn} that windows elements into periods measured by weeks.
    *
    * <p> For example, {@code CalendarWindows.weeks(1, DateTimeConstants.TUESDAY)} will
    * window elements into week-long windows starting on Tuesdays.
@@ -58,7 +58,7 @@ public static DaysWindows weeks(int number, int startDayOfWeek) {
   }
 
   /**
-   * Returns a {@link WindowingFn} that windows elements into periods measured by months.
+   * Returns a {@link WindowFn} that windows elements into periods measured by months.
    *
    * <p> For example,
    * {@code CalendarWindows.months(8).withStartingMonth(2014, 1).beginningOnDay(10)}
@@ -70,7 +70,7 @@ public static MonthsWindows months(int number) {
   }
 
   /**
-   * Returns a {@link WindowingFn} that windows elements into periods measured by years.
+   * Returns a {@link WindowFn} that windows elements into periods measured by years.
    *
    * <p> For example,
    * {@code CalendarWindows.years(1).withTimeZone(DateTimeZone.forId("America/Los_Angeles"))}
@@ -82,7 +82,7 @@ public static YearsWindows years(int number) {
   }
 
   /**
-   * A {@link WindowingFn} that windows elements into periods measured by days.
+   * A {@link WindowFn} that windows elements into periods measured by days.
    *
    * <p> By default, periods of multiple days are measured starting at the
    * epoch.  This can be overridden with {@link #withStartingDay}.
@@ -90,7 +90,7 @@ public static YearsWindows years(int number) {
    * <p> The time zone used to determine calendar boundaries is UTC, unless this
    * is overridden with the {@link #withTimeZone} method.
    */
-  public static class DaysWindows extends PartitioningWindowingFn<Object, IntervalWindow> {
+  public static class DaysWindows extends PartitioningWindowFn<Object, IntervalWindow> {
 
     public DaysWindows withStartingDay(int year, int month, int day) {
       return new DaysWindows(
@@ -132,7 +132,7 @@ public Coder<IntervalWindow> windowCoder() {
     }
 
     @Override
-    public boolean isCompatible(WindowingFn other) {
+    public boolean isCompatible(WindowFn other) {
       if (!(other instanceof DaysWindows)) {
         return false;
       }
@@ -144,7 +144,7 @@ public boolean isCompatible(WindowingFn other) {
   }
 
   /**
-   * A {@link WindowingFn} that windows elements into periods measured by months.
+   * A {@link WindowFn} that windows elements into periods measured by months.
    *
    * <p> By default, periods of multiple months are measured starting at the
    * epoch.  This can be overridden with {@link #withStartingMonth}.
@@ -155,7 +155,7 @@ public boolean isCompatible(WindowingFn other) {
    * <p> The time zone used to determine calendar boundaries is UTC, unless this
    * is overridden with the {@link #withTimeZone} method.
    */
-  public static class MonthsWindows extends PartitioningWindowingFn<Object, IntervalWindow> {
+  public static class MonthsWindows extends PartitioningWindowFn<Object, IntervalWindow> {
 
     public MonthsWindows beginningOnDay(int dayOfMonth) {
       return new MonthsWindows(
@@ -206,7 +206,7 @@ public Coder<IntervalWindow> windowCoder() {
     }
 
     @Override
-    public boolean isCompatible(WindowingFn other) {
+    public boolean isCompatible(WindowFn other) {
       if (!(other instanceof MonthsWindows)) {
         return false;
       }
@@ -219,7 +219,7 @@ public boolean isCompatible(WindowingFn other) {
   }
 
   /**
-   * A {@link WindowingFn} that windows elements into periods measured by years.
+   * A {@link WindowFn} that windows elements into periods measured by years.
    *
    * <p> By default, periods of multiple years are measured starting at the
    * epoch.  This can be overridden with {@link #withStartingYear}.
@@ -230,7 +230,7 @@ public boolean isCompatible(WindowingFn other) {
    * <p> The time zone used to determine calendar boundaries is UTC, unless this
    * is overridden with the {@link #withTimeZone} method.
    */
-  public static class YearsWindows extends PartitioningWindowingFn<Object, IntervalWindow> {
+  public static class YearsWindows extends PartitioningWindowFn<Object, IntervalWindow> {
 
     public YearsWindows beginningOnDay(int monthOfYear, int dayOfMonth) {
       return new YearsWindows(
@@ -285,7 +285,7 @@ public Coder<IntervalWindow> windowCoder() {
     }
 
     @Override
-    public boolean isCompatible(WindowingFn other) {
+    public boolean isCompatible(WindowFn other) {
       if (!(other instanceof YearsWindows)) {
         return false;
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
index d381a80627b73..138d82399342a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
@@ -22,7 +22,7 @@
 import org.joda.time.Instant;
 
 /**
- * A {@link WindowingFn} that windows values into fixed-size timestamp-based windows.
+ * A {@link WindowFn} that windows values into fixed-size timestamp-based windows.
  *
  * <p> For example, in order to partition the data into 10 minute windows:
  * <pre> {@code
@@ -32,7 +32,7 @@
  * } </pre>
  */
 @SuppressWarnings("serial")
-public class FixedWindows extends PartitioningWindowingFn<Object, IntervalWindow> {
+public class FixedWindows extends PartitioningWindowFn<Object, IntervalWindow> {
 
   /**
    * Size of this window.
@@ -86,7 +86,7 @@ public Coder<IntervalWindow> windowCoder() {
   }
 
   @Override
-  public boolean isCompatible(WindowingFn<?, ?> other) {
+  public boolean isCompatible(WindowFn<?, ?> other) {
     return (other instanceof FixedWindows)
         && (size.equals(((FixedWindows) other).size))
         && (offset.equals(((FixedWindows) other).offset));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
index 507b1cc860783..e3298fbb3607c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
@@ -27,18 +27,18 @@
 import java.util.Collection;
 
 /**
- * Default {@link WindowingFn} where all data is in the same bucket.
+ * Default {@link WindowFn} where all data is in the same bucket.
  */
 @SuppressWarnings("serial")
 public class GlobalWindow
-    extends NonMergingWindowingFn<Object, GlobalWindow.Window> {
+    extends NonMergingWindowFn<Object, GlobalWindow.Window> {
   @Override
   public Collection<Window> assignWindows(AssignContext c) {
     return Arrays.asList(Window.INSTANCE);
   }
 
   @Override
-  public boolean isCompatible(WindowingFn o) {
+  public boolean isCompatible(WindowFn o) {
     return o instanceof GlobalWindow;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindowingFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindowFn.java
similarity index 58%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindowingFn.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindowFn.java
index 53dc93a213945..3a17f4a4cbb9a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindowingFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindowFn.java
@@ -21,31 +21,31 @@
 import java.util.Collection;
 
 /**
- * A {@link WindowingFn} that represents an invalid pipeline state.
+ * A {@link WindowFn} that represents an invalid pipeline state.
  *
  * @param <W> window type
  */
-public class InvalidWindowingFn<W extends BoundedWindow> extends WindowingFn<Object, W> {
+public class InvalidWindowFn<W extends BoundedWindow> extends WindowFn<Object, W> {
   private String cause;
-  private WindowingFn<?, W> originalWindowingFn;
+  private WindowFn<?, W> originalWindowFn;
 
-  public InvalidWindowingFn(String cause, WindowingFn<?, W> originalWindowingFn) {
-    this.originalWindowingFn = originalWindowingFn;
+  public InvalidWindowFn(String cause, WindowFn<?, W> originalWindowFn) {
+    this.originalWindowFn = originalWindowFn;
     this.cause = cause;
   }
 
   /**
-   * Returns the reason that this {@code WindowingFn} is invalid.
+   * Returns the reason that this {@code WindowFn} is invalid.
    */
   public String getCause() {
     return cause;
   }
 
   /**
-   * Returns the original windowingFn that this InvalidWindowingFn replaced.
+   * Returns the original windowFn that this InvalidWindowFn replaced.
    */
-  public WindowingFn<?, W> getOriginalWindowingFn() {
-    return originalWindowingFn;
+  public WindowFn<?, W> getOriginalWindowFn() {
+    return originalWindowFn;
   }
 
   @Override
@@ -60,16 +60,16 @@ public void mergeWindows(MergeContext c) {
 
   @Override
   public Coder<W> windowCoder() {
-    return originalWindowingFn.windowCoder();
+    return originalWindowFn.windowCoder();
   }
 
   /**
-   * {@code InvalidWindowingFn} objects with the same {@code originalWindowingFn} are compatible.
+   * {@code InvalidWindowFn} objects with the same {@code originalWindowFn} are compatible.
    */
   @Override
-  public boolean isCompatible(WindowingFn<?, ?> other) {
+  public boolean isCompatible(WindowFn<?, ?> other) {
     return getClass() == other.getClass()
-        && getOriginalWindowingFn().isCompatible(
-            ((InvalidWindowingFn<?>) other).getOriginalWindowingFn());
+        && getOriginalWindowFn().isCompatible(
+            ((InvalidWindowFn<?>) other).getOriginalWindowFn());
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/MergeOverlappingIntervalWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/MergeOverlappingIntervalWindows.java
index 4d4dd8492684d..e90f8f0897e6d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/MergeOverlappingIntervalWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/MergeOverlappingIntervalWindows.java
@@ -22,14 +22,14 @@
 import java.util.List;
 
 /**
- * A {@link WindowingFn} that merges overlapping {@link IntervalWindow}s.
+ * A {@link WindowFn} that merges overlapping {@link IntervalWindow}s.
  */
 public class MergeOverlappingIntervalWindows {
 
   /**
    * Merge overlapping intervals.
    */
-  public static void mergeWindows(WindowingFn<?, IntervalWindow>.MergeContext c) throws Exception {
+  public static void mergeWindows(WindowFn<?, IntervalWindow>.MergeContext c) throws Exception {
     // Merge any overlapping windows into a single window.
     // Sort the list of existing windows so we only have to
     // traverse the list once rather than considering all
@@ -72,7 +72,7 @@ public void add(IntervalWindow window) {
       union = union == null ? window : union.span(window);
       parts.add(window);
     }
-    public void apply(WindowingFn<?, IntervalWindow>.MergeContext c) throws Exception {
+    public void apply(WindowFn<?, IntervalWindow>.MergeContext c) throws Exception {
       if (parts.size() > 1) {
         c.merge(parts, union);
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowingFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java
similarity index 80%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowingFn.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java
index ffeea996d60dc..3cce009e9e728 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowingFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java
@@ -17,14 +17,14 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 /**
- * Abstract base class for {@link WindowingFn}s that do not merge windows.
+ * Abstract base class for {@link WindowFn}s that do not merge windows.
  *
  * @param <T> type of elements being windowed
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
- *            {@code WindowingFn}
+ *            {@code WindowFn}
  */
-public abstract class NonMergingWindowingFn<T, W extends BoundedWindow>
-    extends WindowingFn<T, W> {
+public abstract class NonMergingWindowFn<T, W extends BoundedWindow>
+    extends WindowFn<T, W> {
 
   @Override
   public final void mergeWindows(MergeContext c) { }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowingFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
similarity index 86%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowingFn.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
index 6a65ba134f181..7b9d8d025f760 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowingFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
@@ -22,14 +22,14 @@
 import java.util.Collection;
 
 /**
- * A {@link WindowingFn} that places each value into exactly one window
+ * A {@link WindowFn} that places each value into exactly one window
  * based on its timestamp and never merges windows.
  *
  * @param <T> type of elements being windowed
  * @param <W> window type
  */
-public abstract class PartitioningWindowingFn<T, W extends BoundedWindow>
-    extends NonMergingWindowingFn<T, W> {
+public abstract class PartitioningWindowFn<T, W extends BoundedWindow>
+    extends NonMergingWindowFn<T, W> {
   /**
    * Returns the single window to which elements with this timestamp belong.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
index 26744a549ba2a..ff936df120084 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
@@ -24,7 +24,7 @@
 import java.util.Collection;
 
 /**
- * A WindowingFn windowing values into sessions separated by {@link #gapDuration}-long
+ * A WindowFn windowing values into sessions separated by {@link #gapDuration}-long
  * periods with no elements.
  *
  * <p> For example, in order to window data into session with at least 10 minute
@@ -35,7 +35,7 @@
  *   Window.<Integer>by(Sessions.withGapDuration(Duration.standardMinutes(10))));
  * } </pre>
  */
-public class Sessions extends WindowingFn<Object, IntervalWindow> {
+public class Sessions extends WindowFn<Object, IntervalWindow> {
 
   /**
    * Duration of the gaps between sessions.
@@ -43,14 +43,14 @@ public class Sessions extends WindowingFn<Object, IntervalWindow> {
   private final Duration gapDuration;
 
   /**
-   * Creates a {@code Sessions} {@link WindowingFn} with the specified gap duration.
+   * Creates a {@code Sessions} {@link WindowFn} with the specified gap duration.
    */
   public static Sessions withGapDuration(Duration gapDuration) {
     return new Sessions(gapDuration);
   }
 
   /**
-   * Creates a {@code Sessions} {@link WindowingFn} with the specified gap duration.
+   * Creates a {@code Sessions} {@link WindowFn} with the specified gap duration.
    */
   private Sessions(Duration gapDuration) {
     this.gapDuration = gapDuration;
@@ -75,7 +75,7 @@ public Coder<IntervalWindow> windowCoder() {
   }
 
   @Override
-  public boolean isCompatible(WindowingFn<?, ?> other) {
+  public boolean isCompatible(WindowFn<?, ?> other) {
     return other instanceof Sessions;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
index 45b90dd33d1e4..d29062e31ef2e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
@@ -26,7 +26,7 @@
 import java.util.List;
 
 /**
- * A WindowingFn that windows values into possibly overlapping fixed-size
+ * A WindowFn that windows values into possibly overlapping fixed-size
  * timestamp-based windows.
  *
  * <p> For example, in order to window data into 10 minute windows that
@@ -38,7 +38,7 @@
  * } </pre>
  */
 @SuppressWarnings("serial")
-public class SlidingWindows extends NonMergingWindowingFn<Object, IntervalWindow> {
+public class SlidingWindows extends NonMergingWindowFn<Object, IntervalWindow> {
 
   /**
    * Amount of time between generated windows.
@@ -119,7 +119,7 @@ public Collection<IntervalWindow> assignWindows(AssignContext c) {
   }
 
   @Override
-  public boolean isCompatible(WindowingFn<?, ?> other) {
+  public boolean isCompatible(WindowFn<?, ?> other) {
     if (other instanceof SlidingWindows) {
       SlidingWindows that = (SlidingWindows) other;
       return period.equals(that.period)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 702476362614b..28956b1360989 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -41,7 +41,7 @@
 
 /**
  * {@code Window} logically divides up or groups the elements of a
- * {@link PCollection} into finite windows according to a {@link WindowingFn}.
+ * {@link PCollection} into finite windows according to a {@link WindowFn}.
  * The output of {@code Window} contains the same elements as input, but they
  * have been logically assigned to windows. The next
  * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}s, including one
@@ -80,7 +80,7 @@
  * {(KV("foo", 2), 1m), (KV("bar", 1), 1m), (KV("foo", 1), 2m)}
  *
  *
- * <p> Several predefined {@link WindowingFn}s are provided:
+ * <p> Several predefined {@link WindowFn}s are provided:
  * <ul>
  *  <li> {@link FixedWindows} partitions the timestamps into fixed-width intervals.
  *  <li> {@link SlidingWindows} places data into overlapping fixed-width intervals.
@@ -88,8 +88,8 @@
  *       is separated from the next by no more than a specified gap.
  * </ul>
  *
- * Additionally, custom {@link WindowingFn}s can be created, by creating new
- * subclasses of {@link WindowingFn}.
+ * Additionally, custom {@link WindowFn}s can be created, by creating new
+ * subclasses of {@link WindowFn}.
  */
 public class Window {
   /**
@@ -100,7 +100,7 @@ public class Window {
    *
    * <p> The resulting {@code PTransform} is incomplete, and its input/output
    * type is not yet bound.  Use {@link Window.Unbound#into} to specify the
-   * {@link WindowingFn} to use, which will also bind the input/output type of this
+   * {@link WindowFn} to use, which will also bind the input/output type of this
    * {@code PTransform}.
    */
   public static Unbound named(String name) {
@@ -109,14 +109,14 @@ public static Unbound named(String name) {
 
   /**
    * Creates a {@code Window} {@code PTransform} that uses the given
-   * {@link WindowingFn} to window the data.
+   * {@link WindowFn} to window the data.
    *
    * <p> The resulting {@code PTransform}'s types have been bound, with both the
    * input and output being a {@code PCollection<T>}, inferred from the types of
-   * the argument {@code WindowingFn<T, B>}.  It is ready to be applied, or further
+   * the argument {@code WindowFn<T, B>}.  It is ready to be applied, or further
    * properties can be set on it first.
    */
-  public static <T> Bound<T> into(WindowingFn<? super T, ?> fn) {
+  public static <T> Bound<T> into(WindowFn<? super T, ?> fn) {
     return new Unbound().into(fn);
   }
 
@@ -124,7 +124,7 @@ public static <T> Bound<T> into(WindowingFn<? super T, ?> fn) {
    * An incomplete {@code Window} transform, with unbound input/output type.
    *
    * <p> Before being applied, {@link Window.Unbound#into} must be
-   * invoked to specify the {@link WindowingFn} to invoke, which will also
+   * invoked to specify the {@link WindowFn} to invoke, which will also
    * bind the input/output type of this {@code PTransform}.
    */
   public static class Unbound {
@@ -151,27 +151,27 @@ public Unbound named(String name) {
 
     /**
      * Returns a new {@code Window} {@code PTransform} that's like this
-     * transform but which will use the given {@link WindowingFn}, and which has
+     * transform but which will use the given {@link WindowFn}, and which has
      * its input and output types bound.  Does not modify this transform.  The
      * resulting {@code PTransform} is sufficiently specified to be applied,
      * but more properties can still be specified.
      */
-    public <T> Bound<T> into(WindowingFn<? super T, ?> fn) {
+    public <T> Bound<T> into(WindowFn<? super T, ?> fn) {
       return new Bound<>(name, fn);
     }
   }
 
   /**
    * A {@code PTransform} that windows the elements of a {@code PCollection<T>},
-   * into finite windows according to a user-specified {@code WindowingFn<T, B>}.
+   * into finite windows according to a user-specified {@code WindowFn<T, B>}.
    *
    * @param <T> The type of elements this {@code Window} is applied to
    */
   @SuppressWarnings("serial")
   public static class Bound<T> extends PTransform<PCollection<T>, PCollection<T>> {
-    WindowingFn<? super T, ?> fn;
+    WindowFn<? super T, ?> fn;
 
-    Bound(String name, WindowingFn<? super T, ?> fn) {
+    Bound(String name, WindowFn<? super T, ?> fn) {
       this.name = name;
       this.fn = fn;
     }
@@ -226,17 +226,17 @@ public static <T> Remerge<T> remerge() {
   public static class Remerge<T> extends PTransform<PCollection<T>, PCollection<T>> {
     @Override
     public PCollection<T> apply(PCollection<T> input) {
-      WindowingFn<?, ?> windowingFn = getInput().getWindowingFn();
-      WindowingFn<?, ?> outputWindowingFn =
-          (windowingFn instanceof InvalidWindowingFn)
-          ? ((InvalidWindowingFn<?>) windowingFn).getOriginalWindowingFn()
-          : windowingFn;
+      WindowFn<?, ?> windowFn = getInput().getWindowFn();
+      WindowFn<?, ?> outputWindowFn =
+          (windowFn instanceof InvalidWindowFn)
+          ? ((InvalidWindowFn<?>) windowFn).getOriginalWindowFn()
+          : windowFn;
 
       return input.apply(ParDo.named("Identity").of(new DoFn<T, T>() {
                 @Override public void processElement(ProcessContext c) {
                   c.output(c.element());
                 }
-              })).setWindowingFnInternal(outputWindowingFn);
+              })).setWindowFnInternal(outputWindowFn);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
similarity index 87%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingFn.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
index 935deb09f2b7c..ed5cdb30dac64 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
@@ -26,19 +26,19 @@
 /**
  * The argument to the {@link Window} transform used to assign elements into
  * windows and to determine how windows are merged.  See {@link Window} for more
- * information on how {@code WindowingFn}s are used and for a library of
- * predefined {@code WindowingFn}s.
+ * information on how {@code WindowFn}s are used and for a library of
+ * predefined {@code WindowFn}s.
  *
  * <p> Users will generally want to use the predefined
- * {@code WindowingFn}s, but it is  also possible to create new
+ * {@code WindowFn}s, but it is  also possible to create new
  * subclasses.
- * TODO: Describe how to properly create {@code WindowingFn}s.
+ * TODO: Describe how to properly create {@code WindowFn}s.
  *
  * @param <T> type of elements being windowed
  * @param <W> {@link BoundedWindow} subclass used to represent the
- *            windows used by this {@code WindowingFn}
+ *            windows used by this {@code WindowFn}
  */
-public abstract class WindowingFn<T, W extends BoundedWindow>
+public abstract class WindowFn<T, W extends BoundedWindow>
     implements Serializable {
 
   /**
@@ -105,13 +105,13 @@ public abstract void merge(Collection<W> toBeMerged, W mergeResult)
 
   /**
    * Returns whether this performs the same merging as the given
-   * {@code WindowingFn}.
+   * {@code WindowFn}.
    */
-  public abstract boolean isCompatible(WindowingFn<?, ?> other);
+  public abstract boolean isCompatible(WindowFn<?, ?> other);
 
   /**
    * Returns the {@link Coder} used for serializing the windows used
-   * by this windowingFn.
+   * by this windowFn.
    */
   public abstract Coder<W> windowCoder();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java
index cab217d3e9698..eb6ecb3af3833 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java
@@ -20,7 +20,7 @@
  *
  * <p> {@code Window} logically divides up or groups the elements of a
  * {@link com.google.cloud.dataflow.sdk.values.PCollection} into finite windows according to a
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn}.
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}.
  * The output of {@code Window} contains the same elements as input, but they
  * have been logically assigned to windows. The next
  * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}s, including one
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
index fa54eee605b9c..9e8306bc25946 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
@@ -18,7 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.KV;
 
 import java.util.Arrays;
@@ -56,14 +56,14 @@ interface ActiveWindowManager<W> {
    * Wrapper around AbstractWindowSet that provides the MergeContext interface.
    */
   static class WindowMergeContext<T, W extends BoundedWindow>
-      extends WindowingFn<T, W>.MergeContext {
+      extends WindowFn<T, W>.MergeContext {
     private final AbstractWindowSet<?, ?, ?, W> windowSet;
 
     @SuppressWarnings("unchecked")
     public WindowMergeContext(
         AbstractWindowSet<?, ?, ?, W> windowSet,
-        WindowingFn<?, W> windowingFn) {
-      ((WindowingFn<T, W>) windowingFn).super();
+        WindowFn<?, W> windowFn) {
+      ((WindowFn<T, W>) windowFn).super();
       this.windowSet = windowSet;
     }
 
@@ -77,19 +77,19 @@ public WindowMergeContext(
   }
 
   protected final K key;
-  protected final WindowingFn<?, W> windowingFn;
+  protected final WindowFn<?, W> windowFn;
   protected final Coder<VI> inputCoder;
   protected final DoFnProcessContext<?, KV<K, VO>> context;
   protected final ActiveWindowManager<W> activeWindowManager;
 
   protected AbstractWindowSet(
       K key,
-      WindowingFn<?, W> windowingFn,
+      WindowFn<?, W> windowFn,
       Coder<VI> inputCoder,
       DoFnProcessContext<?, KV<K, VO>> context,
       ActiveWindowManager<W> activeWindowManager) {
     this.key = key;
-    this.windowingFn = windowingFn;
+    this.windowFn = windowFn;
     this.inputCoder = inputCoder;
     this.context = context;
     this.activeWindowManager = activeWindowManager;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
index a0b91b53037d9..27066f8091079 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
@@ -18,7 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 
 import org.joda.time.Instant;
 
@@ -26,15 +26,15 @@
 
 /**
  * {@link DoFn} that tags elements of a PCollection with windows, according
- * to the provided {@link WindowingFn}.
+ * to the provided {@link WindowFn}.
  * @param <T> Type of elements being windowed
  * @param <W> Window type
  */
 @SuppressWarnings("serial")
 public class AssignWindowsDoFn<T, W extends BoundedWindow> extends DoFn<T, T> {
-  private WindowingFn<? super T, W> fn;
+  private WindowFn<? super T, W> fn;
 
-  public AssignWindowsDoFn(WindowingFn<? super T, W> fn) {
+  public AssignWindowsDoFn(WindowFn<? super T, W> fn) {
     this.fn = fn;
   }
 
@@ -43,8 +43,8 @@ public AssignWindowsDoFn(WindowingFn<? super T, W> fn) {
   public void processElement(ProcessContext c) throws Exception {
     final DoFnProcessContext<T, T> context = (DoFnProcessContext<T, T>) c;
     Collection<W> windows =
-        ((WindowingFn<T, W>) fn).assignWindows(
-            ((WindowingFn<T, W>) fn).new AssignContext() {
+        ((WindowFn<T, W>) fn).assignWindows(
+            ((WindowFn<T, W>) fn).new AssignContext() {
                 @Override
                 public T element() {
                   return context.element();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
index 957434c154421..33cfe80a9231e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
@@ -22,7 +22,7 @@
 import com.google.cloud.dataflow.sdk.coders.MapCoder;
 import com.google.cloud.dataflow.sdk.coders.SetCoder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.KV;
 
@@ -49,8 +49,8 @@ class BufferingWindowSet<K, V, W extends BoundedWindow>
       CodedTupleTag.of(
           "mergeTree",
           MapCoder.of(
-              windowingFn.windowCoder(),
-              SetCoder.of(windowingFn.windowCoder())));
+              windowFn.windowCoder(),
+              SetCoder.of(windowFn.windowCoder())));
 
   /**
    * A map of live windows to windows that were merged into them.
@@ -70,11 +70,11 @@ class BufferingWindowSet<K, V, W extends BoundedWindow>
 
   protected BufferingWindowSet(
       K key,
-      WindowingFn<?, W> windowingFn,
+      WindowFn<?, W> windowFn,
       Coder<V> inputCoder,
       DoFnProcessContext<?, KV<K, Iterable<V>>> context,
       ActiveWindowManager<W> activeWindowManager) throws Exception {
-    super(key, windowingFn, inputCoder, context, activeWindowManager);
+    super(key, windowFn, inputCoder, context, activeWindowManager);
 
     mergeTree = emptyIfNull(
         context.context.stepContext.lookup(Arrays.asList(mergeTreeTag))
@@ -86,7 +86,7 @@ protected BufferingWindowSet(
   @Override
   public void put(W window, V value) throws Exception {
     context.context.stepContext.writeToTagList(
-        bufferTag(window, windowingFn.windowCoder(), inputCoder),
+        bufferTag(window, windowFn.windowCoder(), inputCoder),
         value,
         context.timestamp());
     if (!mergeTree.containsKey(window)) {
@@ -152,12 +152,12 @@ protected Iterable<V> finalValue(W window) throws Exception {
 
     for (W curWindow : curWindows) {
       Iterable<V> items = context.context.stepContext.readTagList(bufferTag(
-          curWindow, windowingFn.windowCoder(), inputCoder));
+          curWindow, windowFn.windowCoder(), inputCoder));
       for (V item : items) {
         toEmit.add(item);
       }
       context.context.stepContext.deleteTagList(bufferTag(
-          curWindow, windowingFn.windowCoder(), inputCoder));
+          curWindow, windowFn.windowCoder(), inputCoder));
     }
 
     return toEmit;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
index 115b3f15f5540..a900efbbfef67 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
@@ -22,8 +22,8 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn.AssignContext;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn.AssignContext;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner.OutputManager;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -56,7 +56,7 @@ class DoFnContext<I, O, R> extends DoFn<I, O>.Context {
   final TupleTag<O> mainOutputTag;
   final StepContext stepContext;
   final CounterSet.AddCounterMutator addCounterMutator;
-  final WindowingFn windowingFn;
+  final WindowFn windowFn;
 
   public DoFnContext(PipelineOptions options,
                      DoFn<I, O> fn,
@@ -66,7 +66,7 @@ public DoFnContext(PipelineOptions options,
                      List<TupleTag<?>> sideOutputTags,
                      StepContext stepContext,
                      CounterSet.AddCounterMutator addCounterMutator,
-                     WindowingFn windowingFn) {
+                     WindowFn windowFn) {
     fn.super();
     this.options = options;
     this.fn = fn;
@@ -80,7 +80,7 @@ public DoFnContext(PipelineOptions options,
     }
     this.stepContext = stepContext;
     this.addCounterMutator = addCounterMutator;
-    this.windowingFn = windowingFn;
+    this.windowFn = windowFn;
   }
 
   public R getReceiver(TupleTag<?> tag) {
@@ -122,18 +122,18 @@ <T> WindowedValue<T> makeWindowedValue(
 
     if (windows == null) {
       try {
-        windows = windowingFn.assignWindows(windowingFn.new AssignContext() {
+        windows = windowFn.assignWindows(windowFn.new AssignContext() {
             @Override
             public Object element() {
               throw new UnsupportedOperationException(
-                  "WindowingFn attemped to access input element when none was available");
+                  "WindowFn attemped to access input element when none was available");
             }
 
             @Override
             public Instant timestamp() {
               if (inputTimestamp == null) {
                 throw new UnsupportedOperationException(
-                    "WindowingFn attemped to access input timestamp when none was available");
+                    "WindowFn attemped to access input timestamp when none was available");
               }
               return inputTimestamp;
             }
@@ -141,7 +141,7 @@ public Instant timestamp() {
             @Override
             public Collection<? extends BoundedWindow> windows() {
               throw new UnsupportedOperationException(
-                  "WindowingFn attemped to access input windows when none were available");
+                  "WindowFn attemped to access input windows when none were available");
             }
           });
       } catch (Exception e) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
index 1af14d7a130ea..9fdb9890fb98f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
@@ -17,7 +17,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 
 import java.io.Serializable;
 
@@ -30,18 +30,18 @@
 public class DoFnInfo<I, O> implements Serializable {
   private static final long serialVersionUID = 0;
   private DoFn<I, O> doFn;
-  private WindowingFn<?, ?> windowingFn;
+  private WindowFn<?, ?> windowFn;
 
-  public DoFnInfo(DoFn<I, O> doFn, WindowingFn<?, ?> windowingFn) {
+  public DoFnInfo(DoFn<I, O> doFn, WindowFn<?, ?> windowFn) {
     this.doFn = doFn;
-    this.windowingFn = windowingFn;
+    this.windowFn = windowFn;
   }
 
   public DoFn<I, O> getDoFn() {
     return doFn;
   }
 
-  public WindowingFn<?, ?> getWindowingFn() {
-    return windowingFn;
+  public WindowFn<?, ?> getWindowFn() {
+    return windowFn;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index e8ac2486da6e0..02c80be035278 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -18,7 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -61,11 +61,11 @@ private DoFnRunner(PipelineOptions options,
                      List<TupleTag<?>> sideOutputTags,
                      StepContext stepContext,
                      CounterSet.AddCounterMutator addCounterMutator,
-                     WindowingFn windowingFn) {
+                     WindowFn windowFn) {
     this.fn = fn;
     this.context = new DoFnContext<>(options, fn, sideInputs, outputManager,
                                      mainOutputTag, sideOutputTags, stepContext,
-                                     addCounterMutator, windowingFn);
+                                     addCounterMutator, windowFn);
   }
 
   public static <I, O, R> DoFnRunner<I, O, R> create(
@@ -77,10 +77,10 @@ public static <I, O, R> DoFnRunner<I, O, R> create(
       List<TupleTag<?>> sideOutputTags,
       StepContext stepContext,
       CounterSet.AddCounterMutator addCounterMutator,
-      WindowingFn windowingFn) {
+      WindowFn windowFn) {
     return new DoFnRunner<>(
         options, fn, sideInputs, outputManager,
-        mainOutputTag, sideOutputTags, stepContext, addCounterMutator, windowingFn);
+        mainOutputTag, sideOutputTags, stepContext, addCounterMutator, windowFn);
   }
 
   @SuppressWarnings({"rawtypes", "unchecked"})
@@ -92,7 +92,7 @@ public static <I, O> DoFnRunner<I, O, List> createWithListOutputs(
       List<TupleTag<?>> sideOutputTags,
       StepContext stepContext,
       CounterSet.AddCounterMutator addCounterMutator,
-      WindowingFn windowingFn) {
+      WindowFn windowFn) {
     return create(
         options, fn, sideInputs,
         new OutputManager<List>() {
@@ -105,7 +105,7 @@ public void output(List list, WindowedValue<?> output) {
             list.add(output);
           }
         },
-        mainOutputTag, sideOutputTags, stepContext, addCounterMutator, windowingFn);
+        mainOutputTag, sideOutputTags, stepContext, addCounterMutator, windowFn);
   }
 
   /** Calls {@link DoFn#startBundle}. */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index f4bd28039686f..26297d0eba974 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -19,8 +19,8 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowingFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.common.PeekingReiterator;
 import com.google.cloud.dataflow.sdk.util.common.Reiterable;
 import com.google.cloud.dataflow.sdk.util.common.Reiterator;
@@ -52,13 +52,13 @@ public class GroupAlsoByWindowsDoFn<K, V, W extends BoundedWindow>
     extends DoFn<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>> {
   // TODO: Add back RequiresKeyed state once that is supported.
 
-  protected WindowingFn<?, W> windowingFn;
+  protected WindowFn<?, W> windowFn;
   protected Coder<V> inputCoder;
 
   public GroupAlsoByWindowsDoFn(
-      WindowingFn<?, W> windowingFn,
+      WindowFn<?, W> windowFn,
       Coder<V> inputCoder) {
-    this.windowingFn = windowingFn;
+    this.windowFn = windowFn;
     this.inputCoder = inputCoder;
   }
 
@@ -67,7 +67,7 @@ public void processElement(ProcessContext processContext) throws Exception {
     DoFnProcessContext<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>> context =
         (DoFnProcessContext<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>>) processContext;
 
-    if (windowingFn instanceof NonMergingWindowingFn) {
+    if (windowFn instanceof NonMergingWindowFn) {
       processElementViaIterators(context);
     } else {
       processElementViaWindowSet(context);
@@ -81,19 +81,19 @@ private void processElementViaWindowSet(
     K key = context.element().getKey();
     BatchActiveWindowManager<W> activeWindowManager = new BatchActiveWindowManager<>();
     AbstractWindowSet<K, V, Iterable<V>, W> windowSet =
-        new BufferingWindowSet(key, windowingFn, inputCoder, context, activeWindowManager);
+        new BufferingWindowSet(key, windowFn, inputCoder, context, activeWindowManager);
 
     for (WindowedValue<V> e : context.element().getValue()) {
       for (BoundedWindow window : e.getWindows()) {
         windowSet.put((W) window, e.getValue());
       }
-      ((WindowingFn<Object, W>) windowingFn)
-        .mergeWindows(new AbstractWindowSet.WindowMergeContext<Object, W>(windowSet, windowingFn));
+      ((WindowFn<Object, W>) windowFn)
+        .mergeWindows(new AbstractWindowSet.WindowMergeContext<Object, W>(windowSet, windowFn));
 
-      maybeOutputWindows(activeWindowManager, windowSet, windowingFn, e.getTimestamp());
+      maybeOutputWindows(activeWindowManager, windowSet, windowFn, e.getTimestamp());
     }
 
-    maybeOutputWindows(activeWindowManager, windowSet, windowingFn, null);
+    maybeOutputWindows(activeWindowManager, windowSet, windowFn, null);
 
     windowSet.flush();
   }
@@ -105,15 +105,15 @@ private void processElementViaWindowSet(
   private void maybeOutputWindows(
       BatchActiveWindowManager<W> activeWindowManager,
       AbstractWindowSet<?, ?, ?, W> windowSet,
-      WindowingFn<?, W> windowingFn,
+      WindowFn<?, W> windowFn,
       Instant nextTimestamp) throws Exception {
     if (activeWindowManager.hasMoreWindows()
         && (nextTimestamp == null
             || activeWindowManager.nextTimestamp().isBefore(nextTimestamp))) {
       // There is at least one window ready to emit.  Merge now in case that window should be merged
       // into a not yet completed one.
-      ((WindowingFn<Object, W>) windowingFn)
-        .mergeWindows(new AbstractWindowSet.WindowMergeContext<Object, W>(windowSet, windowingFn));
+      ((WindowFn<Object, W>) windowFn)
+        .mergeWindows(new AbstractWindowSet.WindowMergeContext<Object, W>(windowSet, windowFn));
     }
 
     while (activeWindowManager.hasMoreWindows()
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
index 96b2ece5cf987..f2b96c10434ec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
@@ -20,7 +20,7 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.KV;
 
@@ -38,17 +38,17 @@ class PartitionBufferingWindowSet<K, V, W extends BoundedWindow>
     extends AbstractWindowSet<K, V, Iterable<V>, W> {
   PartitionBufferingWindowSet(
       K key,
-      WindowingFn<?, W> windowingFn,
+      WindowFn<?, W> windowFn,
       Coder<V> inputCoder,
       DoFnProcessContext<?, KV<K, Iterable<V>>> context,
       ActiveWindowManager<W> activeWindowManager) {
-    super(key, windowingFn, inputCoder, context, activeWindowManager);
+    super(key, windowFn, inputCoder, context, activeWindowManager);
   }
 
   @Override
   public void put(W window, V value) throws Exception {
     context.context.stepContext.writeToTagList(
-        bufferTag(window, windowingFn.windowCoder(), inputCoder), value, context.timestamp());
+        bufferTag(window, windowFn.windowCoder(), inputCoder), value, context.timestamp());
     // Adds the window even if it is already present, relying on the streaming backend to
     // de-deduplicate.
     activeWindowManager.addWindow(window);
@@ -56,7 +56,7 @@ public void put(W window, V value) throws Exception {
 
   @Override
   public void remove(W window) throws Exception {
-    CodedTupleTag<V> tag = bufferTag(window, windowingFn.windowCoder(), inputCoder);
+    CodedTupleTag<V> tag = bufferTag(window, windowFn.windowCoder(), inputCoder);
     context.context.stepContext.deleteTagList(tag);
   }
 
@@ -77,7 +77,7 @@ public boolean contains(W window) {
 
   @Override
   protected Iterable<V> finalValue(W window) throws Exception {
-    CodedTupleTag<V> tag = bufferTag(window, windowingFn.windowCoder(), inputCoder);
+    CodedTupleTag<V> tag = bufferTag(window, windowFn.windowCoder(), inputCoder);
     Iterable<V> result = context.context.stepContext.readTagList(tag);
     if (result == null) {
       throw new IllegalStateException("finalValue called for non-existent window");
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 61911a9decd4c..3496bef97953f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -19,8 +19,8 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PartitioningWindowingFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PartitioningWindowFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.KV;
 
 import java.io.IOException;
@@ -37,32 +37,32 @@
 public class StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
     extends DoFn<TimerOrElement<KV<K, VI>>, KV<K, VO>> implements DoFn.RequiresKeyedState {
 
-  protected WindowingFn<?, W> windowingFn;
+  protected WindowFn<?, W> windowFn;
   protected Coder<VI> inputCoder;
 
   protected StreamingGroupAlsoByWindowsDoFn(
-      WindowingFn<?, W> windowingFn,
+      WindowFn<?, W> windowFn,
       Coder<VI> inputCoder) {
-    this.windowingFn = windowingFn;
+    this.windowFn = windowFn;
     this.inputCoder = inputCoder;
   }
 
   public static <K, VI, VO, W extends BoundedWindow>
       StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W> create(
-          WindowingFn<?, W> windowingFn,
+          WindowFn<?, W> windowFn,
           Coder<VI> inputCoder) {
-    return new StreamingGroupAlsoByWindowsDoFn<>(windowingFn, inputCoder);
+    return new StreamingGroupAlsoByWindowsDoFn<>(windowFn, inputCoder);
   }
 
   private AbstractWindowSet<K, VI, VO, W> createWindowSet(
       K key,
       DoFnProcessContext<?, KV<K, VO>> context,
       AbstractWindowSet.ActiveWindowManager<W> activeWindowManager) throws Exception {
-    if (windowingFn instanceof PartitioningWindowingFn) {
+    if (windowFn instanceof PartitioningWindowFn) {
       return new PartitionBufferingWindowSet(
-          key, windowingFn, inputCoder, context, activeWindowManager);
+          key, windowFn, inputCoder, context, activeWindowManager);
     } else {
-      return new BufferingWindowSet(key, windowingFn, inputCoder, context, activeWindowManager);
+      return new BufferingWindowSet(key, windowFn, inputCoder, context, activeWindowManager);
     }
   }
 
@@ -75,7 +75,7 @@ public void processElement(ProcessContext processContext) throws Exception {
       K key = element.getKey();
       VI value = element.getValue();
       AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(
-          key, context, new StreamingActiveWindowManager<>(context, windowingFn.windowCoder()));
+          key, context, new StreamingActiveWindowManager<>(context, windowFn.windowCoder()));
 
       for (BoundedWindow window : context.windows()) {
         windowSet.put((W) window, value);
@@ -86,14 +86,14 @@ public void processElement(ProcessContext processContext) throws Exception {
       TimerOrElement<?> timer = context.element();
       AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(
           (K) timer.key(), context, new StreamingActiveWindowManager<>(
-              context, windowingFn.windowCoder()));
+              context, windowFn.windowCoder()));
 
       // Attempt to merge windows before emitting; that may remove the current window under
       // consideration.
-      ((WindowingFn<Object, W>) windowingFn)
-        .mergeWindows(new AbstractWindowSet.WindowMergeContext<Object, W>(windowSet, windowingFn));
+      ((WindowFn<Object, W>) windowFn)
+        .mergeWindows(new AbstractWindowSet.WindowMergeContext<Object, W>(windowSet, windowFn));
 
-      W window = WindowUtils.windowFromString(timer.tag(), windowingFn.windowCoder());
+      W window = WindowUtils.windowFromString(timer.tag(), windowFn.windowCoder());
       boolean windowExists;
       try {
         windowExists = windowSet.contains(window);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
index e9a6cbde3773f..12e920d12cd0b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
@@ -19,7 +19,7 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.common.reflect.TypeToken;
 
 /**
@@ -51,7 +51,7 @@
  * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#outputWithTimestamp}.
  *
  * <p> Additionally, a {@code PCollection} has an associated
- * {@link WindowingFn} and each element is assigned to a set of windows.
+ * {@link WindowFn} and each element is assigned to a set of windows.
  * By default, the windowing function is
  * {@link com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow}
  * and all elements are assigned into a single default window.
@@ -166,10 +166,10 @@ public <Output extends POutput> Output apply(PTransform<? super PCollection<T>,
   }
 
   /**
-   * Returns the {@link WindowingFn} of this {@code PCollection}.
+   * Returns the {@link WindowFn} of this {@code PCollection}.
    */
-  public WindowingFn<?, ?> getWindowingFn() {
-    return windowingFn;
+  public WindowFn<?, ?> getWindowFn() {
+    return windowFn;
   }
 
   /////////////////////////////////////////////////////////////////////////////
@@ -183,13 +183,13 @@ public <Output extends POutput> Output apply(PTransform<? super PCollection<T>,
   private boolean isOrdered = false;
 
   /**
-   * {@link WindowingFn} that will be used to merge windows in
+   * {@link WindowFn} that will be used to merge windows in
    * this {@code PCollection} and subsequent {@code PCollection}s produced
    * from this one.
    *
    * <p> By default, no merging is performed.
    */
-  private WindowingFn<?, ?> windowingFn;
+  private WindowFn<?, ?> windowFn;
 
   private PCollection() {}
 
@@ -206,12 +206,12 @@ public PCollection<T> setTypeTokenInternal(TypeToken<T> typeToken) {
   }
 
   /**
-   * Sets the {@link WindowingFn} of this {@code PCollection}.
+   * Sets the {@link WindowFn} of this {@code PCollection}.
    *
    * <p> For use by primitive transformations only.
    */
-  public PCollection<T> setWindowingFnInternal(WindowingFn<?, ?> windowingFn) {
-     this.windowingFn = windowingFn;
+  public PCollection<T> setWindowFnInternal(WindowFn<?, ?> windowFn) {
+     this.windowFn = windowFn;
      return this;
   }
 
@@ -232,7 +232,7 @@ public PCollection<T> setPipelineInternal(Pipeline pipeline) {
    * <p> For use by primitive transformations only.
    */
   public static <T> PCollection<T> createPrimitiveOutputInternal(
-      WindowingFn<?, ?> windowingFn) {
-    return new PCollection<T>().setWindowingFnInternal(windowingFn);
+      WindowFn<?, ?> windowFn) {
+    return new PCollection<T>().setWindowFnInternal(windowFn);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
index 349ec2cc750cc..009ecd93b71e6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
@@ -18,7 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.reflect.TypeToken;
 
@@ -183,7 +183,7 @@ public <Output extends POutput> Output apply(
    * <p> For use by primitive transformations only.
    */
   public static PCollectionTuple ofPrimitiveOutputsInternal(
-      TupleTagList outputTags, WindowingFn<?, ?> windowingFn) {
+      TupleTagList outputTags, WindowFn<?, ?> windowFn) {
     Map<TupleTag<?>, PCollection<?>> pcollectionMap = new LinkedHashMap<>();
     for (TupleTag<?> outputTag : outputTags.tupleTags) {
       if (pcollectionMap.containsKey(outputTag)) {
@@ -200,7 +200,7 @@ public static PCollectionTuple ofPrimitiveOutputsInternal(
       @SuppressWarnings("unchecked")
       TypeToken<Object> token = (TypeToken<Object>) outputTag.getTypeToken();
       PCollection<Object> outputCollection = PCollection
-          .createPrimitiveOutputInternal(windowingFn)
+          .createPrimitiveOutputInternal(windowFn)
           .setTypeTokenInternal(token);
 
       pcollectionMap.put(outputTag, outputCollection);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
index 8ae07b516de95..63b8eea648bfc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
@@ -97,7 +97,7 @@ public void testCreateNormalParDoFn() throws Exception {
     DoFn actualDoFn = normalParDoFn.fnFactory.createDoFnInfo().getDoFn();
     Assert.assertThat(actualDoFn, new IsInstanceOf(TestDoFn.class));
     Assert.assertThat(
-        normalParDoFn.fnFactory.createDoFnInfo().getWindowingFn(),
+        normalParDoFn.fnFactory.createDoFnInfo().getWindowFn(),
         new IsInstanceOf(GlobalWindow.class));
     TestDoFn actualTestDoFn = (TestDoFn) actualDoFn;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
index 70cc4f1eaf88d..e5822433b3224 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
@@ -96,7 +96,7 @@ public void testFlattenPCollectionListEmpty() {
   }
 
   @Test
-  public void testWindowingFnPropagationFailure() {
+  public void testWindowFnPropagationFailure() {
     Pipeline p = TestPipeline.create();
 
     PCollection<String> input1 =
@@ -113,12 +113,12 @@ public void testWindowingFnPropagationFailure() {
       Assert.fail("Exception should have been thrown");
     } catch (IllegalStateException e) {
       Assert.assertTrue(e.getMessage().startsWith(
-          "Inputs to Flatten had incompatible window windowingFns"));
+          "Inputs to Flatten had incompatible window windowFns"));
     }
   }
 
   @Test
-  public void testWindowingFnPropagation() {
+  public void testWindowFnPropagation() {
     Pipeline p = TestPipeline.create();
 
     PCollection<String> input1 =
@@ -134,12 +134,12 @@ public void testWindowingFnPropagation() {
 
     p.run();
 
-    Assert.assertTrue(output.getWindowingFn().isCompatible(
+    Assert.assertTrue(output.getWindowFn().isCompatible(
         FixedWindows.<String>of(Duration.standardMinutes(1))));
   }
 
   @Test
-  public void testEqualWindowingFnPropagation() {
+  public void testEqualWindowFnPropagation() {
     Pipeline p = TestPipeline.create();
 
     PCollection<String> input1 =
@@ -155,7 +155,7 @@ public void testEqualWindowingFnPropagation() {
 
     p.run();
 
-    Assert.assertTrue(output.getWindowingFn().isCompatible(
+    Assert.assertTrue(output.getWindowFn().isCompatible(
         Sessions.<String>withGapDuration(Duration.standardMinutes(2))));
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
index 0ea40ae263ce9..02d63b9d3fb4d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
@@ -29,7 +29,7 @@
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -186,7 +186,7 @@ public void testGroupByKeyNonDeterministic() throws Exception {
   }
 
   @Test
-  public void testIdentityWindowingFnPropagation() {
+  public void testIdentityWindowFnPropagation() {
     Pipeline p = TestPipeline.create();
 
     List<KV<String, Integer>> ungroupedPairs = Arrays.asList();
@@ -201,13 +201,13 @@ public void testIdentityWindowingFnPropagation() {
 
     p.run();
 
-    Assert.assertTrue(output.getWindowingFn().isCompatible(
+    Assert.assertTrue(output.getWindowFn().isCompatible(
         FixedWindows.<KV<String, Integer>>of(Duration.standardMinutes(1))));
 
   }
 
   @Test
-  public void testWindowingFnInvalidation() {
+  public void testWindowFnInvalidation() {
     Pipeline p = TestPipeline.create();
 
     List<KV<String, Integer>> ungroupedPairs = Arrays.asList();
@@ -224,15 +224,15 @@ public void testWindowingFnInvalidation() {
     p.run();
 
     Assert.assertTrue(
-        output.getWindowingFn().isCompatible(
-            new InvalidWindowingFn(
+        output.getWindowFn().isCompatible(
+            new InvalidWindowFn(
                 "Invalid",
                 Sessions.<KV<String, Integer>>withGapDuration(
                     Duration.standardMinutes(1)))));
   }
 
   @Test
-  public void testInvalidWindowingFn() {
+  public void testInvalidWindowFn() {
     Pipeline p = TestPipeline.create();
 
     List<KV<String, Integer>> ungroupedPairs = Arrays.asList();
@@ -275,7 +275,7 @@ public void testRemerge() {
     p.run();
 
     Assert.assertTrue(
-        middle.getWindowingFn().isCompatible(
+        middle.getWindowFn().isCompatible(
             Sessions.withGapDuration(Duration.standardMinutes(1))));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 39494e9647c2a..22550a37ffa68 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -1204,7 +1204,7 @@ public void processElement(ProcessContext c) {
       fail("should have failed");
     } catch (Exception e) {
       assertThat(e.toString(), containsString(
-          "WindowingFn attemped to access input timestamp when none was available"));
+          "WindowFn attemped to access input timestamp when none was available"));
     }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindowsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindowsTest.java
index 36028e493a755..7950c46f566cd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindowsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindowsTest.java
@@ -16,8 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static com.google.cloud.dataflow.sdk.testing.WindowingFnTestUtils.runWindowingFn;
-import static com.google.cloud.dataflow.sdk.testing.WindowingFnTestUtils.set;
+import static com.google.cloud.dataflow.sdk.testing.WindowFnTestUtils.runWindowFn;
+import static com.google.cloud.dataflow.sdk.testing.WindowFnTestUtils.set;
 import static org.junit.Assert.assertEquals;
 
 import org.joda.time.DateTime;
@@ -35,7 +35,7 @@
 import java.util.Set;
 
 /**
- * Tests for CalendarWindows WindowingFn.
+ * Tests for CalendarWindows WindowFn.
  */
 @RunWith(JUnit4.class)
 public class CalendarWindowsTest {
@@ -77,7 +77,7 @@ public void testDays() throws Exception {
             makeTimestamp(2015, 1, 2, 0, 0)),
         set(timestamps.get(4), timestamps.get(5)));
 
-    assertEquals(expected, runWindowingFn(CalendarWindows.days(1), timestamps));
+    assertEquals(expected, runWindowFn(CalendarWindows.days(1), timestamps));
   }
 
   @Test
@@ -114,7 +114,7 @@ public void testWeeks() throws Exception {
         set(timestamps.get(4), timestamps.get(5)));
 
     assertEquals(expected,
-        runWindowingFn(CalendarWindows.weeks(1, DateTimeConstants.WEDNESDAY), timestamps));
+        runWindowFn(CalendarWindows.weeks(1, DateTimeConstants.WEDNESDAY), timestamps));
   }
 
   @Test
@@ -151,7 +151,7 @@ public void testMonths() throws Exception {
         set(timestamps.get(4), timestamps.get(5)));
 
     assertEquals(expected,
-        runWindowingFn(CalendarWindows.months(1), timestamps));
+        runWindowFn(CalendarWindows.months(1), timestamps));
   }
 
   @Test
@@ -187,7 +187,7 @@ public void testMultiMonths() throws Exception {
             makeTimestamp(2016, 7, 5, 0, 0)),
         set(timestamps.get(4), timestamps.get(5)));
 
-    assertEquals(expected, runWindowingFn(
+    assertEquals(expected, runWindowFn(
         CalendarWindows.months(7).withStartingMonth(2014, 3).beginningOnDay(5), timestamps));
   }
 
@@ -224,7 +224,7 @@ public void testYears() throws Exception {
             makeTimestamp(2060, 5, 5, 0, 0)),
         set(timestamps.get(4), timestamps.get(5)));
 
-    assertEquals(expected, runWindowingFn(
+    assertEquals(expected, runWindowFn(
         CalendarWindows.years(10).withStartingYear(2000).beginningOnDay(5, 5), timestamps));
   }
 
@@ -253,7 +253,7 @@ public void testTimeZone() throws Exception {
             new DateTime(2014, 1, 3, 0, 0, timeZone).toInstant()),
         set(timestamps.get(2), timestamps.get(3)));
 
-    assertEquals(expected, runWindowingFn(
+    assertEquals(expected, runWindowFn(
         CalendarWindows.days(1).withTimeZone(timeZone),
         timestamps));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindowsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindowsTest.java
index 0a68e72348f73..c59a668989b40 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindowsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindowsTest.java
@@ -16,8 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static com.google.cloud.dataflow.sdk.testing.WindowingFnTestUtils.runWindowingFn;
-import static com.google.cloud.dataflow.sdk.testing.WindowingFnTestUtils.set;
+import static com.google.cloud.dataflow.sdk.testing.WindowFnTestUtils.runWindowFn;
+import static com.google.cloud.dataflow.sdk.testing.WindowFnTestUtils.set;
 import static org.hamcrest.CoreMatchers.containsString;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -37,7 +37,7 @@
 import java.util.Set;
 
 /**
- * Tests for FixedWindows WindowingFn.
+ * Tests for FixedWindows WindowFn.
  */
 @RunWith(JUnit4.class)
 public class FixedWindowsTest {
@@ -50,7 +50,7 @@ public void testSimpleFixedWindow() throws Exception {
     expected.put(new IntervalWindow(new Instant(100), new Instant(110)), set(100));
     assertEquals(
         expected,
-        runWindowingFn(
+        runWindowFn(
             FixedWindows.of(new Duration(10)),
             Arrays.asList(1L, 2L, 5L, 9L, 10L, 11L, 100L)));
   }
@@ -63,7 +63,7 @@ public void testFixedOffsetWindow() throws Exception {
     expected.put(new IntervalWindow(new Instant(95), new Instant(105)), set(100));
     assertEquals(
         expected,
-        runWindowingFn(
+        runWindowFn(
             FixedWindows.of(new Duration(10)).withOffset(new Duration(5)),
             Arrays.asList(1L, 2L, 5L, 9L, 10L, 11L, 100L)));
   }
@@ -75,7 +75,7 @@ public void testTimeUnit() throws Exception {
     expected.put(new IntervalWindow(new Instant(5000), new Instant(15000)), set(5000, 5001, 10000));
     assertEquals(
         expected,
-        runWindowingFn(
+        runWindowFn(
             FixedWindows.of(Duration.standardSeconds(10)).withOffset(Duration.standardSeconds(5)),
             Arrays.asList(1L, 2L, 1000L, 5000L, 5001L, 10000L)));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SessionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SessionsTest.java
index ccb1ddecc4968..c500fcf340001 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SessionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SessionsTest.java
@@ -16,8 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static com.google.cloud.dataflow.sdk.testing.WindowingFnTestUtils.runWindowingFn;
-import static com.google.cloud.dataflow.sdk.testing.WindowingFnTestUtils.set;
+import static com.google.cloud.dataflow.sdk.testing.WindowFnTestUtils.runWindowFn;
+import static com.google.cloud.dataflow.sdk.testing.WindowFnTestUtils.set;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
@@ -33,7 +33,7 @@
 import java.util.Set;
 
 /**
- * Tests for Sessions WindowingFn.
+ * Tests for Sessions WindowFn.
  */
 @RunWith(JUnit4.class)
 public class SessionsTest {
@@ -46,7 +46,7 @@ public void testSimple() throws Exception {
     expected.put(new IntervalWindow(new Instant(101), new Instant(111)), set(101));
     assertEquals(
         expected,
-        runWindowingFn(
+        runWindowFn(
             Sessions.withGapDuration(new Duration(10)),
             Arrays.asList(0L, 10L, 101L)));
   }
@@ -58,7 +58,7 @@ public void testConsecutive() throws Exception {
     expected.put(new IntervalWindow(new Instant(100), new Instant(111)), set(100, 101));
     assertEquals(
         expected,
-        runWindowingFn(
+        runWindowFn(
             Sessions.withGapDuration(new Duration(10)),
             Arrays.asList(1L, 2L, 5L, 9L, 100L, 101L)));
   }
@@ -70,7 +70,7 @@ public void testMerging() throws Exception {
     expected.put(new IntervalWindow(new Instant(95), new Instant(111)), set(95, 100, 101));
     assertEquals(
         expected,
-        runWindowingFn(
+        runWindowFn(
             Sessions.withGapDuration(new Duration(10)),
             Arrays.asList(1L, 15L, 30L, 100L, 101L, 95L, 22L, 10L)));
   }
@@ -83,7 +83,7 @@ public void testTimeUnit() throws Exception {
     expected.put(new IntervalWindow(new Instant(10000), new Instant(11000)), set(10000));
     assertEquals(
         expected,
-        runWindowingFn(
+        runWindowFn(
             Sessions.withGapDuration(Duration.standardSeconds(1)),
             Arrays.asList(1L, 2L, 1000L, 5000L, 5001L, 10000L)));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java
index f187cb429940e..8af9782fc382c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java
@@ -16,8 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static com.google.cloud.dataflow.sdk.testing.WindowingFnTestUtils.runWindowingFn;
-import static com.google.cloud.dataflow.sdk.testing.WindowingFnTestUtils.set;
+import static com.google.cloud.dataflow.sdk.testing.WindowFnTestUtils.runWindowFn;
+import static com.google.cloud.dataflow.sdk.testing.WindowFnTestUtils.set;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
@@ -34,7 +34,7 @@
 import java.util.Set;
 
 /**
- * Tests for the SlidingWindows WindowingFn.
+ * Tests for the SlidingWindows WindowFn.
  */
 @RunWith(JUnit4.class)
 public class SlidingWindowsTest {
@@ -48,7 +48,7 @@ public void testSimple() throws Exception {
     expected.put(new IntervalWindow(new Instant(10), new Instant(20)), set(10, 11));
     assertEquals(
         expected,
-        runWindowingFn(
+        runWindowFn(
             SlidingWindows.of(new Duration(10)).every(new Duration(5)),
             Arrays.asList(1L, 2L, 5L, 9L, 10L, 11L)));
   }
@@ -62,7 +62,7 @@ public void testSlightlyOverlapping() throws Exception {
     expected.put(new IntervalWindow(new Instant(10), new Instant(17)), set(10, 11));
     assertEquals(
         expected,
-        runWindowingFn(
+        runWindowFn(
             SlidingWindows.of(new Duration(7)).every(new Duration(5)),
             Arrays.asList(1L, 2L, 5L, 9L, 10L, 11L)));
   }
@@ -75,7 +75,7 @@ public void testElidings() throws Exception {
     expected.put(new IntervalWindow(new Instant(100), new Instant(103)), set(100));
     assertEquals(
         expected,
-        runWindowingFn(
+        runWindowFn(
             // Only look at the first 3 millisecs of every 10-millisec interval.
             SlidingWindows.of(new Duration(3)).every(new Duration(10)),
             Arrays.asList(1L, 2L, 3L, 5L, 9L, 10L, 11L, 100L)));
@@ -90,7 +90,7 @@ public void testOffset() throws Exception {
     expected.put(new IntervalWindow(new Instant(7), new Instant(17)), set(9, 10, 11));
     assertEquals(
         expected,
-        runWindowingFn(
+        runWindowFn(
             SlidingWindows.of(new Duration(10)).every(new Duration(5)).withOffset(new Duration(2)),
             Arrays.asList(1L, 2L, 5L, 9L, 10L, 11L)));
   }
@@ -105,7 +105,7 @@ public void testTimeUnit() throws Exception {
     expected.put(new IntervalWindow(new Instant(10000), new Instant(20000)), set(10000));
     assertEquals(
         expected,
-        runWindowingFn(
+        runWindowFn(
             SlidingWindows.of(Duration.standardSeconds(10)).every(Duration.standardSeconds(5)),
             Arrays.asList(1L, 2L, 1000L, 5000L, 5001L, 10000L)));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
index 9e05a7eb26c4b..34342f7cbc2cf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
@@ -56,14 +56,14 @@ public class WindowingTest implements Serializable {
   public TemporaryFolder tmpFolder = new TemporaryFolder();
 
   private static class WindowedCount extends PTransform<PCollection<String>, PCollection<String>> {
-    private WindowingFn<Object, ?> windowingFn;
-    public WindowedCount(WindowingFn<? super String, ?> windowingFn) {
-      this.windowingFn = (WindowingFn) windowingFn;
+    private WindowFn<Object, ?> windowFn;
+    public WindowedCount(WindowFn<? super String, ?> windowFn) {
+      this.windowFn = (WindowFn) windowFn;
     }
     @Override
     public PCollection<String> apply(PCollection<String> in) {
       return in
-          .apply(Window.named("Window").<String>into(windowingFn))
+          .apply(Window.named("Window").<String>into(windowFn))
           .apply(Count.<String>perElement())
           .apply(ParDo
               .named("FormatCounts")
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index 20f24c2ebcd30..b2c3d32e6c171 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -27,7 +27,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -207,11 +207,11 @@ public class GroupAlsoByWindowsDoFnTest {
 
   private DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
     KV<String, Iterable<String>>, List> makeRunner(
-        WindowingFn<? super String, IntervalWindow> windowingFn) {
+        WindowFn<? super String, IntervalWindow> windowFn) {
 
     GroupAlsoByWindowsDoFn<String, String, IntervalWindow> fn =
         new GroupAlsoByWindowsDoFn<String, String, IntervalWindow>(
-            windowingFn, StringUtf8Coder.of());
+            windowFn, StringUtf8Coder.of());
 
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
         KV<String, Iterable<String>>, List> runner =
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index c199b9190cfa6..3de4a74207546 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -29,7 +29,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowingFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -259,7 +259,7 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
 
   private DoFnRunner<TimerOrElement<KV<String, String>>,
     KV<String, Iterable<String>>, List> makeRunner(
-        WindowingFn<? super String, IntervalWindow> windowingStrategy) {
+        WindowFn<? super String, IntervalWindow> windowingStrategy) {
 
     StreamingGroupAlsoByWindowsDoFn<String, String, Iterable<String>, IntervalWindow> fn =
         StreamingGroupAlsoByWindowsDoFn.create(windowingStrategy, StringUtf8Coder.of());

From c000e5b4c49b2e6c777bf07bb1c3c119e5d30c6c Mon Sep 17 00:00:00 2001
From: ccy <ccy@google.com>
Date: Mon, 26 Jan 2015 15:25:09 -0800
Subject: [PATCH 0097/1541] Report GCS path of error-producing write in
 Dataflow SDK GoogleCloudStorageWriteChannel. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=84821763

---
 .../gcsio/GoogleCloudStorageWriteChannel.java | 20 ++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
index 11113d0367ea5..a05650d64b69f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
@@ -20,7 +20,6 @@
 import com.google.api.client.http.InputStreamContent;
 import com.google.api.client.util.Preconditions;
 import com.google.api.services.storage.Storage;
-import com.google.api.services.storage.model.StorageObject;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -313,10 +312,6 @@ private void init(
       ExecutorService threadPool, Storage gcs, String bucketName,
       String objectName, String contentType)
       throws IOException {
-
-    // Create object with the given name.
-    StorageObject object = (new StorageObject()).setName(objectName);
-
     // Create a pipe such that its one end is connected to the input stream used by
     // the uploader and the other end is the write channel used by the caller.
     pipeSource = new PipedInputStream(pipeBufferSize);
@@ -330,7 +325,8 @@ private void init(
     objectContentStream.setLength(-1);
     objectContentStream.setCloseInputStream(false);
     Storage.Objects.Insert insertObject =
-        gcs.objects().insert(bucketName, object, objectContentStream);
+        gcs.objects().insert(bucketName, null, objectContentStream);
+    insertObject.setName(objectName);
     insertObject.setDisableGZipContent(true);
     insertObject.getMediaHttpUploader().setProgressListener(
         new LoggingMediaHttpUploaderProgressListener(objectName, MIN_LOGGING_INTERVAL_MS));
@@ -373,7 +369,17 @@ private void throwIfUploadFailed()
       if (uploadOperation.exception() instanceof Error) {
         throw (Error) uploadOperation.exception();
       }
-      throw new IOException(uploadOperation.exception());
+      throw new IOException(String.format("Failed to write to GCS path %s.", getPrintableGCSPath()),
+          uploadOperation.exception());
     }
   }
+
+  /**
+   * Gets the printable GCS path of the current channel.
+   */
+  private String getPrintableGCSPath() {
+    // The bucket and object name are fields stored in the uploadOperation.
+    return String.format("gs://%s/%s", uploadOperation.insertObject.getBucket(),
+        uploadOperation.insertObject.getName());
+  }
 }

From f4922de1f3c5ba7fd19a062f4108267a4c84e816 Mon Sep 17 00:00:00 2001
From: ananvay <ananvay@google.com>
Date: Mon, 26 Jan 2015 16:11:02 -0800
Subject: [PATCH 0098/1541] Adding a filter transform.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=84825824
---
 .../cloud/dataflow/sdk/transforms/Filter.java | 193 ++++++++++++++++++
 .../dataflow/sdk/transforms/FilterTest.java   | 116 +++++++++++
 2 files changed, 309 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
new file mode 100644
index 0000000000000..c870abe43f9c2
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+/**
+ * {@code PTransform}s for filtering from a {@code PCollection} the
+ * elements satisfying a predicate, or satisfying an inequality with
+ * a given value based on the elements' natural ordering.
+ *
+ * @param <T> the type of the values in the input {@code PCollection},
+ * and the type of the elements in the output {@code PCollection}
+ */
+@SuppressWarnings("serial")
+public class Filter<T> extends PTransform<PCollection<T>,
+                                          PCollection<T>> {
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<T>} and returns a {@code PCollection<T>} with
+   * elements that satisfy the given predicate.  The predicate must be
+   * a {@code SerializableFunction<T, Boolean>}.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<String> wordList = ...;
+   * PCollection<String> longWords =
+   *     wordList.apply(Filter.by(new MatchIfWordLengthGT(6)));
+   * } </pre>
+   *
+   * <p> See also {@link #lessThan}, {@link #lessThanEq},
+   * {@link #greaterThan}, {@link #greaterThanEq}, which return elements
+   * satisfying various inequalities with the specified value based on
+   * the elements' natural ordering.
+   */
+  public static <T, C extends SerializableFunction<T, Boolean>>
+      ParDo.Bound<T, T> by(final C filterPred) {
+    return ParDo.named("Filter").of(new DoFn<T, T>() {
+            @Override
+            public void processElement(ProcessContext c) {
+              if (filterPred.apply(c.element()) == true) {
+                c.output(c.element());
+              }
+            }
+        });
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<T>} and returns a {@code PCollection<T>} with
+   * elements that are less than a given value, based on the
+   * elements' natural ordering. Elements must be {@code Comparable}.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<String> listOfNumbers = ...;
+   * PCollection<String> smallNumbers =
+   *     listOfNumbers.apply(Filter.lessThan(10));
+   * } </pre>
+   *
+   * <p> See also {@link #lessThanEq}, {@link #greaterThanEq},
+   * {@link #greaterThan} which return elements satisfying various
+   * inequalities with the specified value based on the elements'
+   * natural ordering.
+   *
+   * <p> See also {@link #by}, which returns elements
+   * that satisfy the given predicate.
+   */
+  public static <T extends Comparable<T>>
+      ParDo.Bound<T, T> lessThan(final T value) {
+    return ParDo.named("Filter.lessThan").of(new DoFn<T, T>() {
+            @Override
+            public void processElement(ProcessContext c) {
+              if (c.element().compareTo(value) < 0) {
+                c.output(c.element());
+              }
+            }
+        });
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<T>} and returns a {@code PCollection<T>} with
+   * elements that are greater than a given value, based on the
+   * elements' natural ordering. Elements must be {@code Comparable}.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<String> listOfNumbers = ...;
+   * PCollection<String> largeNumbers =
+   *     listOfNumbers.apply(Filter.greaterThan(1000));
+   * } </pre>
+   *
+   * <p> See also {@link #greaterThanEq}, {@link #lessThan},
+   * {@link #lessThanEq} which return elements satisfying various
+   * inequalities with the specified value based on the elements'
+   * natural ordering.
+   *
+   * <p> See also {@link #by}, which returns elements
+   * that satisfy the given predicate.
+   */
+  public static <T extends Comparable<T>>
+      ParDo.Bound<T, T> greaterThan(final T value) {
+    return ParDo.named("Filter.greaterThan").of(new DoFn<T, T>() {
+            @Override
+            public void processElement(ProcessContext c) {
+              if (c.element().compareTo(value) > 0) {
+                c.output(c.element());
+              }
+            }
+        });
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<T>} and returns a {@code PCollection<T>} with
+   * elements that are less than or equal to a given value, based on the
+   * elements' natural ordering. Elements must be {@code Comparable}.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<String> listOfNumbers = ...;
+   * PCollection<String> smallOrEqualNumbers =
+   *     listOfNumbers.apply(Filter.lessThanEq(10));
+   * } </pre>
+   *
+   * <p> See also {@link #lessThan}, {@link #greaterThanEq},
+   * {@link #greaterThan} which return elements satisfying various
+   * inequalities with the specified value based on the elements'
+   * natural ordering.
+   *
+   * <p> See also {@link #by}, which returns elements
+   * that satisfy the given predicate.
+   */
+  public static <T extends Comparable<T>>
+      ParDo.Bound<T, T> lessThanEq(final T value) {
+    return ParDo.named("Filter.lessThanEq").of(new DoFn<T, T>() {
+            @Override
+            public void processElement(ProcessContext c) {
+              if (c.element().compareTo(value) <= 0) {
+                c.output(c.element());
+              }
+            }
+        });
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input
+   * {@code PCollection<T>} and returns a {@code PCollection<T>} with
+   * elements that are greater than or equal to a given value, based on
+   * the elements' natural ordering. Elements must be {@code Comparable}.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<String> listOfNumbers = ...;
+   * PCollection<String> largeOrEqualNumbers =
+   *     listOfNumbers.apply(Filter.greaterThanEq(1000));
+   * } </pre>
+   *
+   * <p> See also {@link #greaterThan}, {@link #lessThan},
+   * {@link #lessThanEq} which return elements satisfying various
+   * inequalities with the specified value based on the elements'
+   * natural ordering.
+   *
+   * <p> See also {@link #by}, which returns elements
+   * that satisfy the given predicate.
+   */
+  public static <T extends Comparable<T>>
+      ParDo.Bound<T, T> greaterThanEq(final T value) {
+    return ParDo.named("Filter.greaterThanEq").of(new DoFn<T, T>() {
+            @Override
+            public void processElement(ProcessContext c) {
+              if (c.element().compareTo(value) >= 0) {
+                c.output(c.element());
+              }
+            }
+        });
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
new file mode 100644
index 0000000000000..d89092299bb70
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static com.google.cloud.dataflow.sdk.TestUtils.createInts;
+
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+import java.util.Arrays;
+
+/**
+ * Tests for {@link Filter}.
+ */
+@RunWith(JUnit4.class)
+@SuppressWarnings("serial")
+public class FilterTest implements Serializable {
+
+  static class TrivialFn implements SerializableFunction<Integer, Boolean> {
+    private final Boolean returnVal;
+
+    TrivialFn(Boolean returnVal) {
+      this.returnVal = returnVal;
+    }
+
+    public Boolean apply(Integer elem) {
+      return this.returnVal;
+    }
+  }
+
+  static class EvenFn implements SerializableFunction<Integer, Boolean> {
+    public Boolean apply(Integer elem) {
+      return elem % 2 == 0;
+    }
+  }
+
+  @Test
+  public void testIdentityFilterBy() {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<Integer> input = createInts(p, Arrays.asList(591, 11789, 1257, 24578, 24799, 307));
+
+    PCollection<Integer> output = input.apply(Filter.by(new TrivialFn(true)));
+
+    DataflowAssert.that(output).containsInAnyOrder(591, 11789, 1257, 24578, 24799, 307);
+    p.run();
+  }
+
+  @Test
+  public void testNoFilter() {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<Integer> input = createInts(p, Arrays.asList(1, 2, 4, 5));
+
+    PCollection<Integer> output = input.apply(Filter.by(new TrivialFn(false)));
+
+    DataflowAssert.that(output).containsInAnyOrder();
+    p.run();
+  }
+
+  @Test
+  public void testFilterBy() {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<Integer> input = createInts(p, Arrays.asList(1, 2, 3, 4, 5, 6, 7));
+
+    PCollection<Integer> output = input.apply(Filter.by(new EvenFn()));
+
+    DataflowAssert.that(output).containsInAnyOrder(2, 4, 6);
+    p.run();
+  }
+
+  @Test
+  public void testFilterLessThan() {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<Integer> input = createInts(p, Arrays.asList(1, 2, 3, 4, 5, 6, 7));
+
+    PCollection<Integer> output = input.apply(Filter.lessThan(4));
+
+    DataflowAssert.that(output).containsInAnyOrder(1, 2, 3);
+    p.run();
+  }
+
+  @Test
+  public void testFilterGreaterThan() {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<Integer> input = createInts(p, Arrays.asList(1, 2, 3, 4, 5, 6, 7));
+
+    PCollection<Integer> output = input.apply(Filter.greaterThan(4));
+
+    DataflowAssert.that(output).containsInAnyOrder(5, 6, 7);
+    p.run();
+  }
+}

From 8d5718af85d98ff766764934a090b392441c8fbf Mon Sep 17 00:00:00 2001
From: vanya <vanya@google.com>
Date: Mon, 26 Jan 2015 16:18:28 -0800
Subject: [PATCH 0099/1541] Fix a typo: UserDame->UserName [] -------------
 Created by MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=84826672

---
 .../cloud/dataflow/sdk/options/DataflowPipelineOptions.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
index 907d020cad23d..b71949ec9947b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
@@ -69,7 +69,7 @@ public interface DataflowPipelineOptions extends
    * not be able to be created.
    */
   @Description("Dataflow job name, to uniquely identify active jobs. "
-      + "Defaults to using the ApplicationName-UserDame-Date.")
+      + "Defaults to using the ApplicationName-UserName-Date.")
   @Default.InstanceFactory(JobNameFactory.class)
   String getJobName();
   void setJobName(String value);

From caecd62913e9404618e2259ee513b467abd08800 Mon Sep 17 00:00:00 2001
From: ananvay <ananvay@google.com>
Date: Tue, 27 Jan 2015 19:27:39 -0800
Subject: [PATCH 0100/1541] Updated test for Filter Transform.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=84930230
---
 .../com/google/cloud/dataflow/sdk/transforms/FilterTest.java  | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
index d89092299bb70..7d91204edcfa3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
 import org.junit.Test;
+import org.junit.experimental.categories.Category;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -55,6 +56,7 @@ public Boolean apply(Integer elem) {
   }
 
   @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testIdentityFilterBy() {
     TestPipeline p = TestPipeline.create();
 
@@ -79,6 +81,7 @@ public void testNoFilter() {
   }
 
   @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testFilterBy() {
     TestPipeline p = TestPipeline.create();
 
@@ -91,6 +94,7 @@ public void testFilterBy() {
   }
 
   @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testFilterLessThan() {
     TestPipeline p = TestPipeline.create();
 

From 0fdcc1b06daa427b1af0db1f7f6a500a8056b7e7 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 28 Jan 2015 12:05:49 -0800
Subject: [PATCH 0101/1541] Make LogSaver thread-safe so that ExpectedLogs can
 be used to capture logs which are output from multiple threads.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=84990025
---
 .../dataflow/sdk/testing/ExpectedLogs.java    |  6 +++-
 .../sdk/testing/ExpectedLogsTest.java         | 33 +++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java
index 3f4e33d63268a..0e2d4722b4aef 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java
@@ -27,11 +27,14 @@
 import org.junit.rules.TestRule;
 
 import java.util.Collection;
+import java.util.concurrent.ConcurrentLinkedDeque;
 import java.util.logging.Handler;
 import java.util.logging.Level;
 import java.util.logging.LogRecord;
 import java.util.logging.Logger;
 
+import javax.annotation.concurrent.ThreadSafe;
+
 /**
  * This {@link TestRule} enables the ability to capture JUL logging events during test execution and
  * assert expectations that they contain certain messages (with or without {@link Throwable}) at
@@ -219,8 +222,9 @@ private ExpectedLogs(Class<?> klass) {
   /**
    * A JUL logging {@link Handler} that records all logging events which are passed to it.
    */
+  @ThreadSafe
   private static class LogSaver extends Handler {
-    Collection<LogRecord> logRecords = Lists.newArrayList();
+    Collection<LogRecord> logRecords = new ConcurrentLinkedDeque<>();
 
     public Collection<LogRecord> getLogs() {
       return logRecords;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java
index bcd96cefffe45..2766fa56db5e1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java
@@ -25,6 +25,10 @@
 
 import java.io.IOException;
 import java.util.Random;
+import java.util.concurrent.Callable;
+import java.util.concurrent.CompletionService;
+import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.Executors;
 
 /** Tests for {@link FastNanoClockAndSleeper}. */
 @RunWith(JUnit4.class)
@@ -89,6 +93,35 @@ public void testLogCaptureOccursAtLowestLogLevel() throws Throwable {
     expectedLogs.after();
   }
 
+  @Test
+  public void testThreadSafetyOfLogSaver() throws Throwable {
+    expectedLogs.before();
+
+    CompletionService<Void> completionService =
+        new ExecutorCompletionService<>(Executors.newCachedThreadPool());
+    final long scheduledLogTime = System.currentTimeMillis() + 500L;
+    for (int i = 0; i < 100; i++) {
+      final String expected = generateRandomString();
+      expectedLogs.expectTrace(expected);
+      completionService.submit(new Callable<Void>() {
+        @Override
+        public Void call() throws Exception {
+          // Have all threads started and waiting to log at about the same moment.
+          Thread.sleep(Math.max(1, scheduledLogTime - System.currentTimeMillis()));
+          LOG.trace(expected);
+          return null;
+        }
+      });
+    }
+
+    // Wait for all the threads to complete.
+    for (int i = 0; i < 100; i++) {
+      completionService.take();
+    }
+
+    expectedLogs.after();
+  }
+
   // Generates a random fake error message.
   private static String generateRandomString() {
     Random random = new Random();

From 2ee5d35a42e7a5a3c017ca2fcdb801f738ed67af Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 28 Jan 2015 13:35:22 -0800
Subject: [PATCH 0102/1541] Update dependencies to latest versions of Google
 API services.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=84997980
---
 examples/pom.xml |  4 ++--
 sdk/pom.xml      | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index 6604a90e531ef..81de0a61629b3 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -150,7 +150,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-storage</artifactId>
-      <version>v1-rev11-1.19.0</version>
+      <version>v1-rev25-1.19.1</version>
       <exclusions>
         <!-- Exclude an old version of guava which is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
@@ -164,7 +164,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-bigquery</artifactId>
-      <version>v2-rev167-1.19.0</version>
+      <version>v2-rev187-1.19.1</version>
       <exclusions>
         <!-- Exclude an old version of guava which is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
diff --git a/sdk/pom.xml b/sdk/pom.xml
index c761197a78b3d..1b3e7c26603a0 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -189,7 +189,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-dataflow</artifactId>
-      <version>v1beta3-rev1-1.19.0</version>
+      <version>v1beta3-rev5-1.19.1</version>
       <exclusions>
         <!-- Exclude an old version of guava which is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
@@ -203,7 +203,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-bigquery</artifactId>
-      <version>v2-rev167-1.19.0</version>
+      <version>v2-rev187-1.19.1</version>
       <exclusions>
         <!-- Exclude an old version of guava which is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
@@ -217,7 +217,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-compute</artifactId>
-      <version>v1-rev34-1.19.0</version>
+      <version>v1-rev46-1.19.1</version>
       <exclusions>
         <!-- Exclude an old version of guava which is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
@@ -231,7 +231,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-pubsub</artifactId>
-      <version>v1beta1-rev9-1.19.0</version>
+      <version>v1beta1-rev18-1.19.1</version>
       <exclusions>
         <!-- Exclude an old version of guava which is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
@@ -245,7 +245,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-storage</artifactId>
-      <version>v1-rev11-1.19.0</version>
+      <version>v1-rev25-1.19.1</version>
       <exclusions>
         <!-- Exclude an old version of guava which is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
@@ -293,7 +293,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-datastore-protobuf</artifactId>
-      <version>v1beta2-rev1-2.1.0</version>
+      <version>v1beta2-rev1-2.1.2</version>
       <exclusions>
         <!-- Exclude an old version of guava which is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->

From 84c8ea2be7c4936aa1d222807db7728d87f17992 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 28 Jan 2015 15:07:24 -0800
Subject: [PATCH 0103/1541] Drop the differentiation between user and worker
 credentials since the getUserCredentials is able to get credentials when
 executed from the worker for all the use cases the worker got them from. The
 use cases are to get them from GCE metadata or from a service account keyfile

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85006425
---
 .../cloud/dataflow/sdk/io/DatastoreIO.java    | 55 ++++---------------
 .../dataflow/sdk/options/GcpOptions.java      |  4 +-
 .../runners/worker/DataflowWorkerHarness.java | 15 +----
 .../cloud/dataflow/sdk/util/Credentials.java  | 40 ++------------
 4 files changed, 21 insertions(+), 93 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index dfd5041720dd2..6f3e3b8228a6e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -45,7 +45,6 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.Credentials;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.RetryHttpRequestInitializer;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -57,7 +56,6 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.security.GeneralSecurityException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -284,7 +282,7 @@ public List<Source> splitIntoShards(long desiredShardSizeBytes, PipelineOptions
       List<Query> splitQueries;
       if (mockSplitter == null) {
         splitQueries = DatastoreHelper.getQuerySplitter().getSplits(
-            query, (int) numSplits, getUserDatastore(host, datasetId, options));
+            query, (int) numSplits, getDatastore(options));
       } else {
         splitQueries = mockSplitter.getSplits(query, (int) numSplits, null);
       }
@@ -299,11 +297,7 @@ public List<Source> splitIntoShards(long desiredShardSizeBytes, PipelineOptions
     public Reader<Entity> createBasicReader(
         PipelineOptions pipelineOptions, Coder<Entity> coder, ExecutionContext executionContext)
         throws IOException {
-      try {
-        return new DatastoreReader(query, getDatastore(pipelineOptions));
-      } catch (GeneralSecurityException e) {
-        throw new IOException(e);
-      }
+      return new DatastoreReader(query, getDatastore(pipelineOptions));
     }
 
     @Override
@@ -313,13 +307,16 @@ public void validate() {
       Preconditions.checkNotNull(datasetId, "datasetId");
     }
 
-    private Datastore getDatastore(PipelineOptions pipelineOptions)
-        throws IOException, GeneralSecurityException {
-      Datastore datastore = getUserDatastore(host, datasetId, pipelineOptions);
-      if (datastore == null) {
-        datastore = getWorkerDatastore(host, datasetId, pipelineOptions);
+    private Datastore getDatastore(PipelineOptions pipelineOptions) {
+      DatastoreOptions.Builder builder =
+          new DatastoreOptions.Builder().host(host).dataset(datasetId).initializer(
+              new RetryHttpRequestInitializer(null));
+
+      Credential credential = pipelineOptions.as(GcpOptions.class).getGcpCredential();
+      if (credential != null) {
+        builder.credential(credential);
       }
-      return datastore;
+      return DatastoreFactory.get().create(builder.build());
     }
 
     /** For testing only. */
@@ -339,36 +336,6 @@ public Source withMockEstimateSizeBytes(Supplier<Long> estimateSizeBytes) {
     }
   }
 
-  public static Datastore getWorkerDatastore(
-      String host, String datasetId, PipelineOptions options) {
-    DatastoreOptions.Builder builder =
-        new DatastoreOptions.Builder().host(host).dataset(datasetId).initializer(
-            new RetryHttpRequestInitializer(null));
-
-    try {
-      Credential credential =
-          Credentials.getWorkerCredential(options.as(DataflowPipelineOptions.class));
-      builder.credential(credential);
-    } catch (IOException e) {
-      LOG.warn("IOException: can't get credential for worker.", e);
-      throw new RuntimeException("Failed on getting credential for worker.");
-    }
-    return DatastoreFactory.get().create(builder.build());
-  }
-
-  public static Datastore getUserDatastore(String host, String datasetId, PipelineOptions options)
-      throws IOException, GeneralSecurityException {
-    DatastoreOptions.Builder builder =
-        new DatastoreOptions.Builder().host(host).dataset(datasetId).initializer(
-            new RetryHttpRequestInitializer(null));
-
-    Credential credential = Credentials.getUserCredential(options.as(GcpOptions.class));
-    if (credential != null) {
-      builder.credential(credential);
-    }
-    return DatastoreFactory.get().create(builder.build());
-  }
-
   ///////////////////// Write Class /////////////////////////////////
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
index 52354d92a0c35..114bc0d7c5594 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
@@ -135,13 +135,13 @@ public String create(PipelineOptions options) {
 
   /**
    * Attempts to load the user credentials. See
-   * {@link Credentials#getUserCredential(GcpOptions)} for more details.
+   * {@link Credentials#getCredential(GcpOptions)} for more details.
    */
   public static class GcpUserCredentialsFactory implements DefaultValueFactory<Credential> {
     @Override
     public Credential create(PipelineOptions options) {
       try {
-        return Credentials.getUserCredential(options.as(GcpOptions.class));
+        return Credentials.getCredential(options.as(GcpOptions.class));
       } catch (IOException | GeneralSecurityException e) {
         throw new RuntimeException("Unable to obtain credential", e);
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index ed197b3b15147..f2601a6b5fa02 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -33,7 +33,6 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingFormatter;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingInitializer;
-import com.google.cloud.dataflow.sdk.util.Credentials;
 import com.google.cloud.dataflow.sdk.util.GcsIOChannelFactory;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
@@ -148,18 +147,8 @@ static DataflowWorker create(DataflowWorkerHarnessOptions options) {
     MDC.put(DataflowWorkerLoggingFormatter.MDC_DATAFLOW_WORKER_ID, options.getWorkerId());
     options.setAppName(APPLICATION_NAME);
 
-    if (options.getGcpCredential() == null) {
-      try {
-        // Load the worker credential, otherwise the default is to load user
-        // credentials.
-        options.setGcpCredential(Credentials.getWorkerCredential(options));
-        Preconditions.checkState(options.getGcpCredential() != null,
-            "Failed to obtain worker credential");
-      } catch (Throwable e) {
-        LOG.warn("Unable to obtain any valid credentials. Worker inoperable.", e);
-        return null;
-      }
-    }
+    Preconditions.checkState(options.getGcpCredential() != null,
+        "Failed to obtain GCP credential in worker.");
 
     // Configure standard IO factories.
     IOChannelUtils.setIOFactory("gs", new GcsIOChannelFactory(options));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
index 6d82eed7a7236..e37275cdb936f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
@@ -59,13 +59,7 @@ public class Credentials {
    *  services we access directly (GCS) as opposed to through the backend
    *  (BigQuery, GCE), we need to explicitly request that scope.
    */
-  private static final List<String> WORKER_SCOPES = Arrays.asList(
-      "https://www.googleapis.com/auth/cloud-platform",
-      "https://www.googleapis.com/auth/devstorage.full_control",
-      "https://www.googleapis.com/auth/userinfo.email",
-      "https://www.googleapis.com/auth/datastore");
-
-  private static final List<String> USER_SCOPES = Arrays.asList(
+  private static final List<String> SCOPES = Arrays.asList(
       "https://www.googleapis.com/auth/cloud-platform",
       "https://www.googleapis.com/auth/devstorage.full_control",
       "https://www.googleapis.com/auth/userinfo.email",
@@ -79,29 +73,7 @@ public String getRedirectUri() {
   }
 
   /**
-   * Initializes OAuth2 credential for a worker, using the
-   * <a href="https://developers.google.com/accounts/docs/application-default-credentials">
-   * application default credentials</a>, or from a local key file when running outside of GCE.
-   */
-  public static Credential getWorkerCredential(GcpOptions options)
-      throws IOException {
-    String keyFile = options.getServiceAccountKeyfile();
-    String accountName = options.getServiceAccountName();
-
-    if (keyFile != null && accountName != null) {
-      try {
-        return getCredentialFromFile(keyFile, accountName, WORKER_SCOPES);
-      } catch (GeneralSecurityException e) {
-        LOG.warn("Unable to obtain credentials from file {}", keyFile);
-        // Fall through..
-      }
-    }
-
-    return GoogleCredential.getApplicationDefault().createScoped(WORKER_SCOPES);
-  }
-
-  /**
-   * Initializes OAuth2 credential for an interactive user program.
+   * Initializes OAuth2 credentials.
    *
    * This can use 4 different mechanisms for obtaining a credential:
    * <ol>
@@ -132,25 +104,25 @@ public static Credential getWorkerCredential(GcpOptions options)
    * application default credentials</a> falling back to gcloud. The other options can be
    * used by providing the corresponding properties.
    */
-  public static Credential getUserCredential(GcpOptions options)
+  public static Credential getCredential(GcpOptions options)
       throws IOException, GeneralSecurityException {
     String keyFile = options.getServiceAccountKeyfile();
     String accountName = options.getServiceAccountName();
 
     if (keyFile != null && accountName != null) {
       try {
-        return getCredentialFromFile(keyFile, accountName, USER_SCOPES);
+        return getCredentialFromFile(keyFile, accountName, SCOPES);
       } catch (GeneralSecurityException e) {
         throw new IOException("Unable to obtain credentials from file", e);
       }
     }
 
     if (options.getSecretsFile() != null) {
-      return getCredentialFromClientSecrets(options, USER_SCOPES);
+      return getCredentialFromClientSecrets(options, SCOPES);
     }
 
     try {
-      return GoogleCredential.getApplicationDefault().createScoped(USER_SCOPES);
+      return GoogleCredential.getApplicationDefault().createScoped(SCOPES);
     } catch (IOException e) {
       LOG.debug("Failed to get application default credentials, falling back to gcloud.");
     }

From 33e72de6f4eafaa3183585ecf804ccc0267d23b2 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Wed, 28 Jan 2015 15:49:53 -0800
Subject: [PATCH 0104/1541] Rename GlobalWindow (the fn) to GlobalWindows for
 consistency.

Window instances are singular, WindowFns are plural.

This change is backwards incompatible, but unlikely to affect much user code.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85010354
---
 .../google/cloud/dataflow/sdk/io/AvroIO.java  |  4 +--
 .../cloud/dataflow/sdk/io/BigQueryIO.java     |  4 +--
 .../cloud/dataflow/sdk/io/PubsubIO.java       |  4 +--
 .../cloud/dataflow/sdk/io/ReadSource.java     |  4 +--
 .../google/cloud/dataflow/sdk/io/TextIO.java  |  4 +--
 .../dataflow/sdk/transforms/Combine.java      |  8 +++---
 .../cloud/dataflow/sdk/transforms/Create.java |  4 +--
 .../dataflow/sdk/transforms/DoFnTester.java   |  4 +--
 .../dataflow/sdk/transforms/Flatten.java      |  4 +--
 .../dataflow/sdk/transforms/GroupByKey.java   |  4 +--
 .../{GlobalWindow.java => GlobalWindows.java} | 28 +++++++++----------
 .../dataflow/sdk/util/WindowedValue.java      |  4 +--
 .../dataflow/sdk/values/PCollection.java      |  2 +-
 .../runners/DataflowPipelineRunnerTest.java   |  4 +--
 .../DataflowPipelineTranslatorTest.java       |  6 ++--
 .../sdk/runners/TransformTreeTest.java        |  4 +--
 .../worker/MapTaskExecutorFactoryTest.java    |  4 +--
 .../sdk/runners/worker/NormalParDoFnTest.java | 10 +++----
 .../runners/worker/ParDoFnFactoryTest.java    |  6 ++--
 .../sdk/runners/worker/ShuffleSinkTest.java   |  9 +++---
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  |  4 +--
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  4 +--
 22 files changed, 65 insertions(+), 64 deletions(-)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/{GlobalWindow.java => GlobalWindows.java} (67%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index ed8be3a761567..2eaf60343e5f6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -26,7 +26,7 @@
 import com.google.cloud.dataflow.sdk.runners.worker.AvroReader;
 import com.google.cloud.dataflow.sdk.runners.worker.AvroSink;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.util.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
@@ -263,7 +263,7 @@ public PCollection<T> apply(PInput input) {
         // Force the output's Coder to be what the read is using, and
         // unchangeable later, to ensure that we read the input in the
         // format specified by the Read transform.
-        return PCollection.<T>createPrimitiveOutputInternal(new GlobalWindow())
+        return PCollection.<T>createPrimitiveOutputInternal(new GlobalWindows())
             .setCoder(getDefaultOutputCoder());
       }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index d7ed8c2116a1f..11ad6f71e82f6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -31,7 +31,7 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.util.BigQueryTableInserter;
 import com.google.cloud.dataflow.sdk.util.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.Transport;
@@ -285,7 +285,7 @@ public PCollection<TableRow> apply(PInput input) {
           throw new IllegalStateException(
               "must set the table reference of a BigQueryIO.Read transform");
         }
-        return PCollection.<TableRow>createPrimitiveOutputInternal(new GlobalWindow())
+        return PCollection.<TableRow>createPrimitiveOutputInternal(new GlobalWindows())
             // Force the output's Coder to be what the read is using, and
             // unchangeable later, to ensure that we read the input in the
             // format specified by the Read transform.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index f5b57f82fb5dc..4a379920249e7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -20,7 +20,7 @@
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
@@ -231,7 +231,7 @@ public PCollection<String> apply(PInput input) {
               + "PubsubIO.Read transform");
         }
         return PCollection.<String>createPrimitiveOutputInternal(
-            new GlobalWindow());
+            new GlobalWindows());
       }
 
       @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java
index fc44e34293382..a4a5943fdbbee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java
@@ -20,7 +20,7 @@
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.common.base.Preconditions;
@@ -97,7 +97,7 @@ protected Coder<T> getDefaultOutputCoder() {
     public final PCollection<T> apply(PInput input) {
       Preconditions.checkNotNull(source, "source must be set");
       source.validate();
-      return PCollection.<T>createPrimitiveOutputInternal(new GlobalWindow())
+      return PCollection.<T>createPrimitiveOutputInternal(new GlobalWindows())
           .setCoder(getDefaultOutputCoder());
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 14fa60cf42e5c..bef838b2e5dc0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -24,7 +24,7 @@
 import com.google.cloud.dataflow.sdk.runners.worker.TextReader;
 import com.google.cloud.dataflow.sdk.runners.worker.TextSink;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.util.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
@@ -236,7 +236,7 @@ public PCollection<T> apply(PInput input) {
         // Force the output's Coder to be what the read is using, and
         // unchangeable later, to ensure that we read the input in the
         // format specified by the Read transform.
-        return PCollection.<T>createPrimitiveOutputInternal(new GlobalWindow()).setCoder(coder);
+        return PCollection.<T>createPrimitiveOutputInternal(new GlobalWindows()).setCoder(coder);
       }
 
       @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index eba6785b30162..429d10aa9eb5c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -21,7 +21,7 @@
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -50,7 +50,7 @@ public class Combine {
    *
    * <p>If the input {@code PCollection} is empty, the ouput will contain a the
    * default value of the combining function if the input is windowed into
-   * the {@link GlobalWindow}; otherwise, the output will be empty.  Note: this
+   * the {@link GlobalWindows}; otherwise, the output will be empty.  Note: this
    * behavior is subject to change.
    *
    * <p> See {@link Globally Combine.Globally} for more information.
@@ -69,7 +69,7 @@ public static <V> Globally<V, V> globally(
    *
    * If the input {@code PCollection} is empty, the ouput will contain a the
    * default value of the combining function if the input is windowed into
-   * the {@link GlobalWindow}; otherwise, the output will be empty.  Note: this
+   * the {@link GlobalWindows}; otherwise, the output will be empty.  Note: this
    * behavior is subject to change.
    *
    * <p> See {@link Globally Combine.Globally} for more information.
@@ -770,7 +770,7 @@ public PCollection<VO> apply(PCollection<VI> input) {
           .apply(Combine.<Void, VI, VO>perKey(fn.<Void>asKeyedFn()))
           .apply(Values.<VO>create());
 
-      if (input.getWindowFn().isCompatible(new GlobalWindow())) {
+      if (input.getWindowFn().isCompatible(new GlobalWindows())) {
         return insertDefaultValueIfEmpty(output);
       } else {
         return output;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index d28e6a28193f4..97b8f16776841 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -20,7 +20,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -184,7 +184,7 @@ public static <T> CreateTimestamped<T> timestamped(
 
   @Override
   public PCollection<T> apply(PInput input) {
-    return PCollection.<T>createPrimitiveOutputInternal(new GlobalWindow());
+    return PCollection.<T>createPrimitiveOutputInternal(new GlobalWindows());
   }
 
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index 55d0f8b18b964..d00652ce3f6d7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -18,7 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
 import com.google.cloud.dataflow.sdk.util.PTuple;
@@ -355,6 +355,6 @@ void initializeState() {
         sideOutputTags,
         (new BatchModeExecutionContext()).createStepContext("stepName"),
         counterSet.getAddCounterMutator(),
-        new GlobalWindow());
+        new GlobalWindows());
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index dd8efca82a240..adf588cbdf90b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -19,7 +19,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
@@ -124,7 +124,7 @@ public PCollection<T> apply(PCollectionList<T> inputs) {
           }
         }
       } else {
-        windowFn = new GlobalWindow();
+        windowFn = new GlobalWindows();
       }
 
       return PCollection.<T>createPrimitiveOutputInternal(windowFn);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 225fd18dfdabb..0391c0caf10eb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -25,7 +25,7 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
@@ -459,7 +459,7 @@ public PCollection<KV<K, Iterable<V>>> applyHelper(
           "GroupByKey must have a valid Window merge function.  "
           + "Invalid because: " + cause);
     }
-    if (windowFn.isCompatible(new GlobalWindow())) {
+    if (windowFn.isCompatible(new GlobalWindows())) {
       // The input PCollection is using the degenerate default
       // window function, which uses a single global window for all
       // elements.  We can implement this using a more-primitive
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
similarity index 67%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
index e3298fbb3607c..d4858b85ba5c7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
@@ -30,48 +30,48 @@
  * Default {@link WindowFn} where all data is in the same bucket.
  */
 @SuppressWarnings("serial")
-public class GlobalWindow
-    extends NonMergingWindowFn<Object, GlobalWindow.Window> {
+public class GlobalWindows
+    extends NonMergingWindowFn<Object, GlobalWindows.GlobalWindow> {
   @Override
-  public Collection<Window> assignWindows(AssignContext c) {
-    return Arrays.asList(Window.INSTANCE);
+  public Collection<GlobalWindow> assignWindows(AssignContext c) {
+    return Arrays.asList(GlobalWindow.INSTANCE);
   }
 
   @Override
   public boolean isCompatible(WindowFn o) {
-    return o instanceof GlobalWindow;
+    return o instanceof GlobalWindows;
   }
 
   @Override
-  public Coder<Window> windowCoder() {
-    return Window.Coder.INSTANCE;
+  public Coder<GlobalWindow> windowCoder() {
+    return GlobalWindow.Coder.INSTANCE;
   }
 
   /**
    * The default window into which all data is placed.
    */
-  public static class Window extends BoundedWindow {
-    public static final Window INSTANCE = new Window();
+  public static class GlobalWindow extends BoundedWindow {
+    public static final GlobalWindow INSTANCE = new GlobalWindow();
 
     @Override
     public Instant maxTimestamp() {
       return new Instant(Long.MAX_VALUE);
     }
 
-    private Window() {}
+    private GlobalWindow() {}
 
     /**
      * {@link Coder} for encoding and decoding {@code Window}s.
      */
-    public static class Coder extends AtomicCoder<Window> {
+    public static class Coder extends AtomicCoder<GlobalWindow> {
       public static final Coder INSTANCE = new Coder();
 
       @Override
-      public void encode(Window window, OutputStream outStream, Context context) {}
+      public void encode(GlobalWindow window, OutputStream outStream, Context context) {}
 
       @Override
-      public Window decode(InputStream inStream, Context context) {
-        return Window.INSTANCE;
+      public GlobalWindow decode(InputStream inStream, Context context) {
+        return GlobalWindow.INSTANCE;
       }
 
       @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index 29f6078baa277..0562b58b35c50 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -26,7 +26,7 @@
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.coders.StandardCoder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
@@ -72,7 +72,7 @@ public static <V> WindowedValue<V> of(
   public static <V> WindowedValue<V> valueInGlobalWindow(V value) {
     return new WindowedValue<>(value,
                                new Instant(Long.MIN_VALUE),
-                               Arrays.asList(GlobalWindow.Window.INSTANCE));
+                               Arrays.asList(GlobalWindows.GlobalWindow.INSTANCE));
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
index 12e920d12cd0b..d7cbe1f734b6d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
@@ -53,7 +53,7 @@
  * <p> Additionally, a {@code PCollection} has an associated
  * {@link WindowFn} and each element is assigned to a set of windows.
  * By default, the windowing function is
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow}
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows}
  * and all elements are assigned into a single default window.
  * This default can be overridden with the
  * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 86a307e1594c5..b3f80891244b3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -40,7 +40,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.util.DataflowReleaseInfo;
 import com.google.cloud.dataflow.sdk.util.GcsUtil;
 import com.google.cloud.dataflow.sdk.util.PackageUtil;
@@ -440,7 +440,7 @@ public static class TestTransform
 
     @Override
     public PCollection<Integer> apply(PCollection<Integer> input) {
-      return PCollection.<Integer>createPrimitiveOutputInternal(new GlobalWindow());
+      return PCollection.<Integer>createPrimitiveOutputInternal(new GlobalWindows());
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index bfa3ed7a21eb8..21a4a02a770a2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -46,7 +46,7 @@
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.View;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.util.OutputReference;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
@@ -303,7 +303,7 @@ public EmbeddedTransform(Step step) {
 
     @Override
     public PCollection<String> apply(PCollection<String> input) {
-      return PCollection.createPrimitiveOutputInternal(new GlobalWindow());
+      return PCollection.createPrimitiveOutputInternal(new GlobalWindows());
     }
 
     @Override
@@ -385,7 +385,7 @@ public PCollectionTuple apply(PCollection<Integer> input) {
       // Fails here when attempting to construct a tuple with an unbound object.
       return PCollectionTuple.of(sumTag, sum)
           .and(doneTag, PCollection.<Void>createPrimitiveOutputInternal(
-              new GlobalWindow()));
+              new GlobalWindows()));
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
index cceefe90f622e..7305fa20f6b54 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
@@ -27,7 +27,7 @@
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.First;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
@@ -75,7 +75,7 @@ public PCollectionList<String> apply(PBegin b) {
       // from within a composite transform.
       return PCollectionList.of(
           Arrays.asList(result, PCollection.<String>createPrimitiveOutputInternal(
-              new GlobalWindow())));
+              new GlobalWindows())));
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index 6667ef0c78c06..69c5859f22ea0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -42,7 +42,7 @@
 import com.google.cloud.dataflow.sdk.runners.worker.SinkFactoryTest.TestSink;
 import com.google.cloud.dataflow.sdk.runners.worker.SinkFactoryTest.TestSinkFactory;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
@@ -294,7 +294,7 @@ static ParallelInstruction createParDoInstruction(
 
     String serializedFn =
         StringUtils.byteArrayToJsonString(
-            SerializableUtils.serializeToByteArray(new DoFnInfo(fn, new GlobalWindow())));
+            SerializableUtils.serializeToByteArray(new DoFnInfo(fn, new GlobalWindows())));
 
     CloudObject cloudUserFn = CloudObject.forClassName("DoFn");
     addString(cloudUserFn, PropertyNames.SERIALIZED_FN, serializedFn);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
index a6c4238f4c19b..57a17ec2db4e2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
@@ -29,7 +29,7 @@
 
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.PTuple;
@@ -153,7 +153,7 @@ public void testNormalParDoFn() throws Exception {
     List<String> sideOutputTags = Arrays.asList("tag1", "tag2", "tag3");
 
     TestDoFn fn = new TestDoFn(sideOutputTags);
-    DoFnInfo fnInfo = new DoFnInfo(fn, new GlobalWindow());
+    DoFnInfo fnInfo = new DoFnInfo(fn, new GlobalWindows());
     TestReceiver receiver = new TestReceiver();
     TestReceiver receiver1 = new TestReceiver();
     TestReceiver receiver2 = new TestReceiver();
@@ -218,7 +218,7 @@ public void testNormalParDoFn() throws Exception {
   @Test
   public void testUnexpectedNumberOfReceivers() throws Exception {
     TestDoFn fn = new TestDoFn(Collections.<String>emptyList());
-    DoFnInfo fnInfo = new DoFnInfo(fn, new GlobalWindow());
+    DoFnInfo fnInfo = new DoFnInfo(fn, new GlobalWindows());
     TestReceiver receiver = new TestReceiver();
 
     PTuple sideInputValues = PTuple.empty();
@@ -257,7 +257,7 @@ private List<String> stackTraceFrameStrings(Throwable t) {
   @Test
   public void testErrorPropagation() throws Exception {
     TestErrorDoFn fn = new TestErrorDoFn();
-    DoFnInfo fnInfo = new DoFnInfo(fn, new GlobalWindow());
+    DoFnInfo fnInfo = new DoFnInfo(fn, new GlobalWindows());
     TestReceiver receiver = new TestReceiver();
 
     PTuple sideInputValues = PTuple.empty();
@@ -325,7 +325,7 @@ public void testErrorPropagation() throws Exception {
   @Test
   public void testUndeclaredSideOutputs() throws Exception {
     TestDoFn fn = new TestDoFn(Arrays.asList("declared", "undecl1", "undecl2", "undecl3"));
-    DoFnInfo fnInfo = new DoFnInfo(fn, new GlobalWindow());
+    DoFnInfo fnInfo = new DoFnInfo(fn, new GlobalWindows());
     CounterSet counters = new CounterSet();
     NormalParDoFn normalParDoFn =
         new NormalParDoFn(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
index 63b8eea648bfc..ab69d68965ce9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
@@ -21,7 +21,7 @@
 import com.google.api.services.dataflow.model.MultiOutputInfo;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
@@ -71,7 +71,7 @@ public void testCreateNormalParDoFn() throws Exception {
 
     String serializedFn =
         StringUtils.byteArrayToJsonString(
-            SerializableUtils.serializeToByteArray(new DoFnInfo(fn, new GlobalWindow())));
+            SerializableUtils.serializeToByteArray(new DoFnInfo(fn, new GlobalWindows())));
 
     CloudObject cloudUserFn = CloudObject.forClassName("DoFn");
     addString(cloudUserFn, "serialized_fn", serializedFn);
@@ -98,7 +98,7 @@ public void testCreateNormalParDoFn() throws Exception {
     Assert.assertThat(actualDoFn, new IsInstanceOf(TestDoFn.class));
     Assert.assertThat(
         normalParDoFn.fnFactory.createDoFnInfo().getWindowFn(),
-        new IsInstanceOf(GlobalWindow.class));
+        new IsInstanceOf(GlobalWindows.class));
     TestDoFn actualTestDoFn = (TestDoFn) actualDoFn;
 
     Assert.assertEquals(stringState, actualTestDoFn.stringState);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
index b2aa533ce53f2..b58fc38c8f46a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
@@ -23,7 +23,7 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
@@ -85,7 +85,7 @@ public class ShuffleSinkTest {
   private void runTestWriteUngroupingShuffleSink(List<Integer> expected)
       throws Exception {
     Coder<WindowedValue<Integer>> windowedValueCoder =
-        WindowedValue.getFullCoder(BigEndianIntegerCoder.of(), new GlobalWindow().windowCoder());
+        WindowedValue.getFullCoder(BigEndianIntegerCoder.of(), new GlobalWindows().windowCoder());
     ShuffleSink<Integer> shuffleSink = new ShuffleSink<>(
         PipelineOptionsFactory.create(),
         null, ShuffleSink.ShuffleKind.UNGROUPED,
@@ -107,7 +107,8 @@ private void runTestWriteUngroupingShuffleSink(List<Integer> expected)
       // Ignore the key.
       byte[] valueBytes = record.getValue();
       WindowedValue<Integer> value = CoderUtils.decodeFromByteArray(windowedValueCoder, valueBytes);
-      Assert.assertEquals(Lists.newArrayList(GlobalWindow.Window.INSTANCE), value.getWindows());
+      Assert.assertEquals(Lists.newArrayList(GlobalWindows.GlobalWindow.INSTANCE),
+                          value.getWindows());
       actual.add(value.getValue());
     }
 
@@ -170,7 +171,7 @@ void runTestWriteGroupingSortingShuffleSink(
                 KvCoder.of(BigEndianIntegerCoder.of(),
                            KvCoder.of(StringUtf8Coder.of(),
                                       BigEndianIntegerCoder.of())),
-                new GlobalWindow().windowCoder()));
+                new GlobalWindows().windowCoder()));
 
     TestShuffleWriter shuffleWriter = new TestShuffleWriter();
     List<Long> actualSizes = new ArrayList<>();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index b2c3d32e6c171..8ee581b6b78b3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -23,7 +23,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
@@ -223,7 +223,7 @@ KV<String, Iterable<String>>, List> makeRunner(
             new ArrayList<TupleTag<?>>(),
             execContext.createStepContext("merge"),
             counters.getAddCounterMutator(),
-            new GlobalWindow());
+            new GlobalWindows());
 
     return runner;
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 3de4a74207546..8b31c51620333 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -25,7 +25,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
@@ -274,7 +274,7 @@ KV<String, Iterable<String>>, List> makeRunner(
             new ArrayList<TupleTag<?>>(),
             execContext.createStepContext("merge"),
             counters.getAddCounterMutator(),
-            new GlobalWindow());
+            new GlobalWindows());
 
     return runner;
   }

From 4c16878ffb0c281e8efae5aebfa70bc84cec5c54 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Wed, 28 Jan 2015 20:41:15 -0800
Subject: [PATCH 0105/1541] Fix issue with retrying GCS staging, where we would
 log the "going to retry" message after actually retrying.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85030057
---
 .../sdk/util/AttemptBoundedExponentialBackOff.java |  4 ++++
 .../cloud/dataflow/sdk/util/PackageUtil.java       |  8 ++++----
 .../util/AttemptBoundedExponentialBackOffTest.java | 14 ++++++++++++++
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java
index 78e8e0538b824..8ac0e1ea8f3a0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java
@@ -79,4 +79,8 @@ public long nextBackOffMillis() {
     currentAttempt += 1;
     return Math.round(currentIntervalMillis + randomOffset);
   }
+
+  public boolean atMaxAttempts() {
+    return currentAttempt >= maximumNumberOfAttempts;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
index 5afa04a8af1b3..7226f3ae90772 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
@@ -19,7 +19,6 @@
 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
 
-import com.google.api.client.util.BackOff;
 import com.google.api.client.util.BackOffUtils;
 import com.google.api.client.util.Sleeper;
 import com.google.api.services.dataflow.model.DataflowPackage;
@@ -149,7 +148,7 @@ static List<DataflowPackage> stageClasspathElementsToGcs(
         }
 
         // Upload file, retrying on failure.
-        BackOff backoff = new AttemptBoundedExponentialBackOff(
+        AttemptBoundedExponentialBackOff backoff = new AttemptBoundedExponentialBackOff(
             MAX_ATTEMPTS,
             INITIAL_BACKOFF_INTERVAL_MS);
         while (true) {
@@ -161,9 +160,10 @@ static List<DataflowPackage> stageClasspathElementsToGcs(
             numUploaded++;
             break;
           } catch (IOException e) {
-            if (BackOffUtils.next(retrySleeper, backoff)) {
-              LOG.warn("Upload attempt failed, will retry staging of classpath: {}",
+            if (!backoff.atMaxAttempts()) {
+              LOG.warn("Upload attempt failed, sleeping before retrying staging of classpath: {}",
                   classpathElement, e);
+              BackOffUtils.next(retrySleeper, backoff);
             } else {
               // Rethrow last error, to be included as a cause in the catch below.
               LOG.error("Upload failed, will NOT retry staging of classpath: {}",
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOffTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOffTest.java
index e4f332db12e88..97f5225426faf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOffTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOffTest.java
@@ -21,6 +21,9 @@
 import static org.hamcrest.Matchers.greaterThan;
 import static org.hamcrest.Matchers.lessThan;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
 
 import com.google.api.client.util.BackOff;
 
@@ -68,4 +71,15 @@ public void testThatResettingAllowsReuse() throws Exception {
     assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
     assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
   }
+
+  @Test
+  public void testAtMaxAttempts() throws Exception {
+    AttemptBoundedExponentialBackOff backOff = new AttemptBoundedExponentialBackOff(3, 500);
+    assertFalse(backOff.atMaxAttempts());
+    backOff.nextBackOffMillis();
+    assertFalse(backOff.atMaxAttempts());
+    backOff.nextBackOffMillis();
+    assertTrue(backOff.atMaxAttempts());
+    assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
+  }
 }

From 02729933fb74c3ac62a6ae01151c8138c32a518f Mon Sep 17 00:00:00 2001
From: wan <wan@google.com>
Date: Wed, 28 Jan 2015 21:55:33 -0800
Subject: [PATCH 0106/1541] Renames InvalidWindowFn to InvalidWindows to
 conform with the convention that concrete window-fn classes are plural nouns.
  This is backward-incompatible but unlikely to affect users.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85033832
---
 .../cloud/dataflow/sdk/transforms/GroupByKey.java      |  8 ++++----
 .../{InvalidWindowFn.java => InvalidWindows.java}      | 10 +++++-----
 .../dataflow/sdk/transforms/windowing/Window.java      |  4 ++--
 .../cloud/dataflow/sdk/transforms/GroupByKeyTest.java  |  6 +++---
 4 files changed, 14 insertions(+), 14 deletions(-)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/{InvalidWindowFn.java => InvalidWindows.java} (81%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 0391c0caf10eb..9b56585fb9ea9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -26,7 +26,7 @@
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindowFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
@@ -286,7 +286,7 @@ public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
       if (!(windowFn instanceof NonMergingWindowFn)) {
         // Prevent merging windows again, without explicit user
         // involvement, e.g., by Window.into() or Window.remerge().
-        windowFn = new InvalidWindowFn(
+        windowFn = new InvalidWindows(
             "WindowFn has already been consumed by previous GroupByKey",
             windowFn);
       }
@@ -453,8 +453,8 @@ public PCollection<KV<K, Iterable<V>>> applyHelper(
     // key/value input elements and the window merge operation of the
     // window function associated with the input PCollection.
     WindowFn<?, ?> windowFn = getInput().getWindowFn();
-    if (windowFn instanceof InvalidWindowFn) {
-      String cause = ((InvalidWindowFn<?>) windowFn).getCause();
+    if (windowFn instanceof InvalidWindows) {
+      String cause = ((InvalidWindows<?>) windowFn).getCause();
       throw new IllegalStateException(
           "GroupByKey must have a valid Window merge function.  "
           + "Invalid because: " + cause);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java
similarity index 81%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindowFn.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java
index 3a17f4a4cbb9a..57985596dd765 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java
@@ -25,11 +25,11 @@
  *
  * @param <W> window type
  */
-public class InvalidWindowFn<W extends BoundedWindow> extends WindowFn<Object, W> {
+public class InvalidWindows<W extends BoundedWindow> extends WindowFn<Object, W> {
   private String cause;
   private WindowFn<?, W> originalWindowFn;
 
-  public InvalidWindowFn(String cause, WindowFn<?, W> originalWindowFn) {
+  public InvalidWindows(String cause, WindowFn<?, W> originalWindowFn) {
     this.originalWindowFn = originalWindowFn;
     this.cause = cause;
   }
@@ -42,7 +42,7 @@ public String getCause() {
   }
 
   /**
-   * Returns the original windowFn that this InvalidWindowFn replaced.
+   * Returns the original windowFn that this InvalidWindows replaced.
    */
   public WindowFn<?, W> getOriginalWindowFn() {
     return originalWindowFn;
@@ -64,12 +64,12 @@ public Coder<W> windowCoder() {
   }
 
   /**
-   * {@code InvalidWindowFn} objects with the same {@code originalWindowFn} are compatible.
+   * {@code InvalidWindows} objects with the same {@code originalWindowFn} are compatible.
    */
   @Override
   public boolean isCompatible(WindowFn<?, ?> other) {
     return getClass() == other.getClass()
         && getOriginalWindowFn().isCompatible(
-            ((InvalidWindowFn<?>) other).getOriginalWindowFn());
+            ((InvalidWindows<?>) other).getOriginalWindowFn());
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 28956b1360989..38e69813ecd54 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -228,8 +228,8 @@ public static class Remerge<T> extends PTransform<PCollection<T>, PCollection<T>
     public PCollection<T> apply(PCollection<T> input) {
       WindowFn<?, ?> windowFn = getInput().getWindowFn();
       WindowFn<?, ?> outputWindowFn =
-          (windowFn instanceof InvalidWindowFn)
-          ? ((InvalidWindowFn<?>) windowFn).getOriginalWindowFn()
+          (windowFn instanceof InvalidWindows)
+          ? ((InvalidWindows<?>) windowFn).getOriginalWindowFn()
           : windowFn;
 
       return input.apply(ParDo.named("Identity").of(new DoFn<T, T>() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
index 02d63b9d3fb4d..5d97a7a657b93 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
@@ -29,7 +29,7 @@
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindowFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -225,14 +225,14 @@ public void testWindowFnInvalidation() {
 
     Assert.assertTrue(
         output.getWindowFn().isCompatible(
-            new InvalidWindowFn(
+            new InvalidWindows(
                 "Invalid",
                 Sessions.<KV<String, Integer>>withGapDuration(
                     Duration.standardMinutes(1)))));
   }
 
   @Test
-  public void testInvalidWindowFn() {
+  public void testInvalidWindows() {
     Pipeline p = TestPipeline.create();
 
     List<KV<String, Integer>> ungroupedPairs = Arrays.asList();

From 815fb456404b5a1592aa6981ab9020c44e18a32f Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Thu, 29 Jan 2015 08:03:17 -0800
Subject: [PATCH 0107/1541] Adds support for compressed (gzip and bzip2) text
 sources.

By default, the compression mode for a text source is determined by examining file extensions (i.e., file names ending in .gz will be processed as gzip files and ending in .bz2 will be processed as bzip2 files).  This behavior can be overridden by specifying a compression type:
TextIO.Read.from(myFileName).withCompressionType(TextIO.CompressionType.GZIP)

GZIP, BZIP2, UNCOMPRESSED, and AUTO compression types are supported.  AUTO is the default.  GZIP, BZIP2, and UNCOMPRESSED will treat all files as gzipped, bzipped, or uncompressed regardless of their extension or contents.

If multiple files are to be read from (by using a glob), the extension of each file will be examined in AUTO mode.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85066281
---
 sdk/pom.xml                                   |   6 +
 .../google/cloud/dataflow/sdk/io/TextIO.java  | 115 +++++++++++-
 .../runners/dataflow/TextIOTranslator.java    |   1 +
 .../sdk/runners/worker/FileBasedReader.java   |  62 ++++++-
 .../sdk/runners/worker/TextReader.java        |  33 ++--
 .../sdk/runners/worker/TextReaderFactory.java |   6 +-
 .../dataflow/sdk/util/PropertyNames.java      |   1 +
 .../cloud/dataflow/sdk/io/TextIOTest.java     |  37 ++++
 .../runners/worker/FileBasedReaderTest.java   |  45 +++++
 .../runners/worker/TextReaderFactoryTest.java |  11 +-
 .../sdk/runners/worker/TextReaderTest.java    | 169 ++++++++++++++++--
 .../dataflow/sdk/util/IOFactoryTest.java      |   4 +-
 12 files changed, 442 insertions(+), 48 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReaderTest.java

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 1b3e7c26603a0..1b7992b00d382 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -347,6 +347,12 @@
       <version>1.7.7</version>
     </dependency>
 
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-compress</artifactId>
+      <version>1.9</version>
+    </dependency>
+
     <dependency>
       <groupId>joda-time</groupId>
       <artifactId>joda-time</artifactId>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index bef838b2e5dc0..487dcc371af31 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.worker.FileBasedReader;
 import com.google.cloud.dataflow.sdk.runners.worker.TextReader;
 import com.google.cloud.dataflow.sdk.runners.worker.TextSink;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -32,9 +33,13 @@
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
 
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
+
 import java.io.IOException;
+import java.io.InputStream;
 import java.util.List;
 import java.util.regex.Pattern;
+import java.util.zip.GZIPInputStream;
 
 import javax.annotation.Nullable;
 
@@ -148,7 +153,20 @@ public static Bound<String> withoutValidation() {
       return new Bound<>(DEFAULT_TEXT_CODER).withoutValidation();
     }
 
-    // TODO: strippingNewlines, gzipped, etc.
+    /**
+     * Returns a TextIO.Read PTransform that reads from a file with the
+     * specified compression type.
+     *
+     * <p> If no compression type is specified, the default is AUTO. In this
+     * mode, the compression type of the file is determined by its extension
+     * (e.g., *.gz is gzipped, *.bz2 is bzipped, all other extensions are
+     * uncompressed).
+     */
+    public static Bound<String> withCompressionType(TextIO.CompressionType compressionType) {
+      return new Bound<>(DEFAULT_TEXT_CODER).withCompressionType(compressionType);
+    }
+
+    // TODO: strippingNewlines, etc.
 
     /**
      * A root PTransform that reads from a text file (or multiple text files
@@ -173,15 +191,20 @@ public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
       /** An option to indicate if input validation is desired. Default is true. */
       final boolean validate;
 
+      /** Option to indicate the input source's compression type. Default is AUTO. */
+      final TextIO.CompressionType compressionType;
+
       Bound(Coder<T> coder) {
-        this(null, null, coder, true);
+        this(null, null, coder, true, TextIO.CompressionType.AUTO);
       }
 
-      Bound(String name, String filepattern, Coder<T> coder, boolean validate) {
+      Bound(String name, String filepattern, Coder<T> coder, boolean validate,
+          TextIO.CompressionType compressionType) {
         super(name);
         this.coder = coder;
         this.filepattern = filepattern;
         this.validate = validate;
+        this.compressionType = compressionType;
       }
 
       /**
@@ -189,7 +212,7 @@ public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
        * with the given step name.  Does not modify this object.
        */
       public Bound<T> named(String name) {
-        return new Bound<>(name, filepattern, coder, validate);
+        return new Bound<>(name, filepattern, coder, validate, compressionType);
       }
 
       /**
@@ -199,7 +222,7 @@ public Bound<T> named(String name) {
        * filepatterns.)  Does not modify this object.
        */
       public Bound<T> from(String filepattern) {
-        return new Bound<>(name, filepattern, coder, validate);
+        return new Bound<>(name, filepattern, coder, validate, compressionType);
       }
 
       /**
@@ -212,7 +235,7 @@ public Bound<T> from(String filepattern) {
        * elements of the resulting PCollection
        */
       public <T1> Bound<T1> withCoder(Coder<T1> coder) {
-        return new Bound<>(name, filepattern, coder, validate);
+        return new Bound<>(name, filepattern, coder, validate, compressionType);
       }
 
       /**
@@ -225,7 +248,24 @@ public <T1> Bound<T1> withCoder(Coder<T1> coder) {
        * available at execution time.
        */
       public Bound<T> withoutValidation() {
-        return new Bound<>(name, filepattern, coder, false);
+        return new Bound<>(name, filepattern, coder, false, compressionType);
+      }
+
+      /**
+       * Returns a new TextIO.Read PTransform that's like this one but
+       * reads from input sources using the specified compression type.
+       * Does not modify this object.
+       *
+       * <p> If AUTO compression type is specified, a compression type is
+       * selected on a per-file basis, based on the file's extension (e.g.,
+       * .gz will be processed as a gzipped file, .bz will be processed
+       * as a bzipped file, other extensions with be treated as uncompressed
+       * input).
+       *
+       * <p> If no compression type is specified, the default is AUTO.
+       */
+      public Bound<T> withCompressionType(TextIO.CompressionType compressionType) {
+        return new Bound<>(name, filepattern, coder, validate, compressionType);
       }
 
       @Override
@@ -257,6 +297,10 @@ public boolean needsValidation() {
         return validate;
       }
 
+      public TextIO.CompressionType getCompressionType() {
+        return compressionType;
+      }
+
       static {
         DirectPipelineRunner.registerDefaultTransformEvaluator(
             Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
@@ -367,7 +411,7 @@ public static Bound<String> withoutValidation() {
       return new Bound<>(DEFAULT_TEXT_CODER).withoutValidation();
     }
 
-    // TODO: appendingNewlines, gzipped, header, footer, etc.
+    // TODO: appendingNewlines, header, footer, etc.
 
     /**
      * A PTransform that writes a bounded PCollection to a text file (or
@@ -585,6 +629,58 @@ public void evaluate(
     }
   }
 
+  /**
+   * Possible text file compression types.
+   */
+  public static enum CompressionType implements FileBasedReader.DecompressingStreamFactory {
+    /**
+     * Automatically determine the compression type based on filename extension.
+     */
+    AUTO(""),
+    /**
+     * Uncompressed (i.e., may be split).
+     */
+    UNCOMPRESSED(""),
+    /**
+     * GZipped.
+     */
+    GZIP(".gz") {
+      @Override
+      public InputStream createInputStream(InputStream inputStream) throws IOException {
+        return new GZIPInputStream(inputStream);
+      }
+    },
+    /**
+     * BZipped.
+     */
+    BZIP2(".bz2") {
+      @Override
+      public InputStream createInputStream(InputStream inputStream) throws IOException {
+        return new BZip2CompressorInputStream(inputStream);
+      }
+    };
+
+    private String filenameSuffix;
+
+    private CompressionType(String suffix) {
+      this.filenameSuffix = suffix;
+    }
+
+    /**
+     * Determine if a given filename matches a compression type based on its extension.
+     * @param filename the filename to match
+     * @return true iff the filename ends with the compression type's known extension.
+     */
+    public boolean matches(String filename) {
+      return filename.toLowerCase().endsWith(filenameSuffix.toLowerCase());
+    }
+
+    @Override
+    public InputStream createInputStream(InputStream inputStream) throws IOException {
+      return inputStream;
+    }
+  }
+
   // Pattern which matches old-style shard output patterns, which are now
   // disallowed.
   private static final Pattern SHARD_OUTPUT_PATTERN = Pattern.compile("@([0-9]+|\\*)");
@@ -601,7 +697,8 @@ private static void validateOutputComponent(String partialFilePattern) {
   private static <T> void evaluateReadHelper(
       Read.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
     TextReader<T> reader =
-        new TextReader<>(transform.filepattern, true, null, null, transform.coder);
+        new TextReader<>(transform.filepattern, true, null, null, transform.coder,
+            transform.getCompressionType());
     List<T> elems = ReaderUtils.readElemsFromReader(reader);
     context.setPCollection(transform.getOutput(), elems);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
index 302cea67a55c5..9cf059bf53126 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
@@ -61,6 +61,7 @@ private <T> void translateReadHelper(
       context.addInput(PropertyNames.FILEPATTERN, gcsPath);
       context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
       context.addInput(PropertyNames.VALIDATE_SOURCE, transform.needsValidation());
+      context.addInput(PropertyNames.COMPRESSION_TYPE, transform.getCompressionType().toString());
 
       // TODO: Orderedness?
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
index de116277fd262..c0c16d88f26bf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
@@ -23,6 +23,7 @@
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
@@ -35,6 +36,7 @@
 import java.io.BufferedInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.PushbackInputStream;
 import java.nio.channels.Channels;
 import java.util.Collection;
@@ -122,15 +124,20 @@ protected abstract class FileBasedIterator extends AbstractReaderIterator<T> {
     protected ByteArrayOutputStream nextElement;
     protected boolean nextElementComputed = false;
     protected long offset;
+    protected FileBasedReader.DecompressingStreamFactory compressionStreamFactory;
 
     FileBasedIterator(CopyableSeekableByteChannel seeker, long startOffset, long offset,
-        @Nullable Long endOffset, ProgressTracker<Integer> tracker) throws IOException {
+        @Nullable Long endOffset, ProgressTracker<Integer> tracker,
+        FileBasedReader.DecompressingStreamFactory compressionStreamFactory) throws IOException {
       this.seeker = checkNotNull(seeker);
       this.seeker.position(startOffset);
+      this.compressionStreamFactory = compressionStreamFactory;
+      InputStream inputStream =
+          compressionStreamFactory.createInputStream(Channels.newInputStream(seeker));
       BufferedInputStream bufferedStream =
           useDefaultBufferSize
-              ? new BufferedInputStream(Channels.newInputStream(seeker))
-              : new BufferedInputStream(Channels.newInputStream(seeker), BUF_SIZE);
+              ? new BufferedInputStream(inputStream)
+              : new BufferedInputStream(inputStream, BUF_SIZE);
       this.stream = new PushbackInputStream(bufferedStream, BUF_SIZE);
       this.startOffset = startOffset;
       this.offset = offset;
@@ -246,4 +253,53 @@ private void computeNextElement() throws IOException {
       nextElementComputed = true;
     }
   }
+
+  /**
+   * Factory interface for creating a decompressing {@link InputStream}.
+   */
+  public interface DecompressingStreamFactory {
+
+    /**
+     * Create a decompressing {@link InputStream} from an existing {@link InputStream}.
+     *
+     * @param inputStream the existing stream
+     * @return a stream that decompresses the contents of the existing stream
+     * @throws IOException
+     */
+    public InputStream createInputStream(InputStream inputStream) throws IOException;
+  }
+
+  /**
+   * Factory for creating decompressing input streams based on a filename and
+   * a {@link TextIO.CompressionType}.  If the compression mode is AUTO, the filename
+   * is checked against known extensions to determine a compression type to use.
+   */
+  protected static class FilenameBasedStreamFactory
+      implements FileBasedReader.DecompressingStreamFactory {
+    private String filename;
+    private TextIO.CompressionType compressionType;
+
+    public FilenameBasedStreamFactory(String filename, TextIO.CompressionType compressionType) {
+      this.filename = filename;
+      this.compressionType = compressionType;
+    }
+
+    protected TextIO.CompressionType getCompressionTypeForAuto() {
+      for (TextIO.CompressionType type : TextIO.CompressionType.values()) {
+        if (type.matches(filename) && type != TextIO.CompressionType.AUTO
+            && type != TextIO.CompressionType.UNCOMPRESSED) {
+          return type;
+        }
+      }
+      return TextIO.CompressionType.UNCOMPRESSED;
+    }
+
+    @Override
+    public InputStream createInputStream(InputStream inputStream) throws IOException {
+      if (compressionType == TextIO.CompressionType.AUTO) {
+        return getCompressionTypeForAuto().createInputStream(inputStream);
+      }
+      return compressionType.createInputStream(inputStream);
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
index f46eca2427356..afd75fbfd77f9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
 import com.google.cloud.dataflow.sdk.util.common.worker.ProgressTracker;
 import com.google.cloud.dataflow.sdk.util.common.worker.ProgressTrackerGroup;
@@ -38,16 +39,20 @@
  */
 public class TextReader<T> extends FileBasedReader<T> {
   final boolean stripTrailingNewlines;
+  final TextIO.CompressionType compressionType;
 
   public TextReader(String filename, boolean stripTrailingNewlines, @Nullable Long startPosition,
-      @Nullable Long endPosition, Coder<T> coder) {
-    this(filename, stripTrailingNewlines, startPosition, endPosition, coder, true);
+      @Nullable Long endPosition, Coder<T> coder, TextIO.CompressionType compressionType) {
+    this(filename, stripTrailingNewlines, startPosition, endPosition, coder, true,
+        compressionType);
   }
 
   protected TextReader(String filename, boolean stripTrailingNewlines, @Nullable Long startPosition,
-      @Nullable Long endPosition, Coder<T> coder, boolean useDefaultBufferSize) {
+      @Nullable Long endPosition, Coder<T> coder, boolean useDefaultBufferSize,
+      TextIO.CompressionType compressionType) {
     super(filename, startPosition, endPosition, coder, useDefaultBufferSize);
     this.stripTrailingNewlines = stripTrailingNewlines;
+    this.compressionType = compressionType;
   }
 
   @Override
@@ -99,7 +104,8 @@ private TextFileIterator newReaderIteratorForRangeWithStrictStart(IOChannelFacto
     SeekableByteChannel seeker = (SeekableByteChannel) reader;
 
     return new TextFileIterator(
-        new CopyableSeekableByteChannel(seeker), stripTrailingNewlines, startOffset, endOffset);
+        new CopyableSeekableByteChannel(seeker), stripTrailingNewlines, startOffset, endOffset,
+        new FileBasedReader.FilenameBasedStreamFactory(input, compressionType));
   }
 
   class TextFileMultiIterator extends LazyMultiReaderIterator<T> {
@@ -124,7 +130,8 @@ class TextFileIterator extends FileBasedIterator {
     private ScanState state;
 
     TextFileIterator(CopyableSeekableByteChannel seeker, boolean stripTrailingNewlines,
-        long startOffset, @Nullable Long endOffset) throws IOException {
+        long startOffset, @Nullable Long endOffset,
+        FileBasedReader.DecompressingStreamFactory compressionStreamFactory) throws IOException {
       this(seeker, stripTrailingNewlines, startOffset, startOffset, endOffset,
           new ProgressTrackerGroup<Integer>() {
             @Override
@@ -132,13 +139,15 @@ protected void report(Integer lineLength) {
               notifyElementRead(lineLength.longValue());
             }
           }.start(),
-          new ScanState(BUF_SIZE, !stripTrailingNewlines));
+          new ScanState(BUF_SIZE, !stripTrailingNewlines),
+          compressionStreamFactory);
     }
 
     private TextFileIterator(CopyableSeekableByteChannel seeker, boolean stripTrailingNewlines,
         long startOffset, long offset, @Nullable Long endOffset, ProgressTracker<Integer> tracker,
-        ScanState state) throws IOException {
-      super(seeker, startOffset, offset, endOffset, tracker);
+        ScanState state, FileBasedReader.DecompressingStreamFactory compressionStreamFactory)
+            throws IOException {
+      super(seeker, startOffset, offset, endOffset, tracker, compressionStreamFactory);
 
       this.stripTrailingNewlines = stripTrailingNewlines;
       this.state = state;
@@ -149,7 +158,7 @@ private TextFileIterator(TextFileIterator it) throws IOException {
       // that it may hold bytes that have been read and now reside
       // in the read buffer (that is copied during cloning).
       this(it.seeker.copy(), it.stripTrailingNewlines, it.startOffset + it.state.totalBytesRead,
-          it.offset, it.endOffset, it.tracker.copy(), it.state.copy());
+          it.offset, it.endOffset, it.tracker.copy(), it.state.copy(), it.compressionStreamFactory);
     }
 
     @Override
@@ -262,7 +271,7 @@ public boolean readBytes(PushbackInputStream stream) throws IOException {
      * Consumes characters until a separator character is found or the
      * end of buffer is reached.
      *
-     * Updates the state to indicate the position of the separator
+     * <p>Updates the state to indicate the position of the separator
      * character. If pos==len, no separator was found.
      *
      * @return the number of characters consumed.
@@ -303,7 +312,7 @@ public int bytesBuffered() {
     /**
      * Copies data from the input buffer to the output buffer.
      *
-     * If keepNewlines==true, line-termination characters are included in the copy.
+     * <p>If keepNewlines==true, line-termination characters are included in the copy.
      */
     private void copyToOutputBuffer(ByteArrayOutputStream out) {
       int charsCopied = pos - start;
@@ -317,7 +326,7 @@ private void copyToOutputBuffer(ByteArrayOutputStream out) {
      * Scans the input buffer to determine if a matched carriage return
      * has an accompanying linefeed and process the input buffer accordingly.
      *
-     * If keepNewlines==true and a linefeed character is detected,
+     * <p>If keepNewlines==true and a linefeed character is detected,
      * it is included in the copy.
      *
      * @return the number of characters consumed
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java
index 4ec8973b87a12..132206ccbda7e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java
@@ -22,6 +22,7 @@
 
 import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
@@ -58,6 +59,9 @@ static <T> TextReader<T> create(CloudObject spec, Coder<T> coder, boolean useDef
     return new TextReader<>(filenameOrPattern,
         getBoolean(spec, PropertyNames.STRIP_TRAILING_NEWLINES, true),
         getLong(spec, PropertyNames.START_OFFSET, null),
-        getLong(spec, PropertyNames.END_OFFSET, null), coder, useDefaultBufferSize);
+        getLong(spec, PropertyNames.END_OFFSET, null), coder,
+        useDefaultBufferSize,
+        Enum.valueOf(TextIO.CompressionType.class,
+            getString(spec, PropertyNames.COMPRESSION_TYPE, "AUTO")));
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
index 26b260bd50f2d..0afe5ae411901 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
@@ -30,6 +30,7 @@ public class PropertyNames {
   public static final String CO_GBK_RESULT_SCHEMA = "co_gbk_result_schema";
   public static final String COMBINE_FN = "combine_fn";
   public static final String COMPONENT_ENCODINGS = "component_encodings";
+  public static final String COMPRESSION_TYPE = "compression_type";
   public static final String CUSTOM_SOURCE_FORMAT = "custom_source";
   public static final String SOURCE_STEP_INPUT = "custom_source_step_input";
   public static final String SOURCE_SPEC = "spec";
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
index 1cc3bf64030dd..ead2e779eb026 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -31,6 +31,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
+import com.google.cloud.dataflow.sdk.io.TextIO.CompressionType;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.EvaluationResults;
@@ -64,6 +65,7 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.zip.GZIPOutputStream;
 
 /**
  * Tests for TextIO Read and Write transforms.
@@ -397,4 +399,39 @@ public void testWriteWithoutValidationFlag() throws Exception {
     assertTrue(write.needsValidation());
     assertFalse(write.withoutValidation().needsValidation());
   }
+
+  @Test
+  public void testCompressionTypeIsSet() throws Exception {
+    TextIO.Read.Bound<String> read = TextIO.Read.from("gs://bucket/test");
+    assertEquals(CompressionType.AUTO, read.getCompressionType());
+    read = TextIO.Read.from("gs://bucket/test").withCompressionType(CompressionType.GZIP);
+    assertEquals(CompressionType.GZIP, read.getCompressionType());
+  }
+
+  @Test
+  public void testCompressedRead() throws Exception {
+    String[] lines = {"Irritable eagle", "Optimistic jay", "Fanciful hawk"};
+    File tmpFile = tmpFolder.newFile("test");
+    String filename = tmpFile.getPath();
+
+    List<String> expected = new ArrayList<>();
+    try (PrintStream writer =
+        new PrintStream(new GZIPOutputStream(new FileOutputStream(tmpFile)))) {
+      for (String line : lines) {
+        writer.println(line);
+        expected.add(line);
+      }
+    }
+
+    DirectPipeline p = DirectPipeline.createForTest();
+
+    TextIO.Read.Bound<String> read =
+        TextIO.Read.from(filename).withCompressionType(CompressionType.GZIP);
+    PCollection<String> output = p.apply(read);
+
+    EvaluationResults results = p.run();
+
+    assertThat(results.getPCollection(output), containsInAnyOrder(expected.toArray()));
+    tmpFile.delete();
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReaderTest.java
new file mode 100644
index 0000000000000..3765af8a55901
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReaderTest.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.io.TextIO.CompressionType;
+import com.google.cloud.dataflow.sdk.runners.worker.FileBasedReader.FilenameBasedStreamFactory;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for FileBasedReader.
+ */
+@RunWith(JUnit4.class)
+public class FileBasedReaderTest {
+
+  private void testGetStreamForAutoHelper(CompressionType expected, String filename) {
+    FilenameBasedStreamFactory factory = new FilenameBasedStreamFactory(filename,
+        CompressionType.AUTO);
+    CompressionType actual = factory.getCompressionTypeForAuto();
+    Assert.assertEquals(expected, actual);
+  }
+
+  @Test
+  public void testGetStreamForAuto() {
+    testGetStreamForAutoHelper(CompressionType.UNCOMPRESSED, "test");
+    testGetStreamForAutoHelper(CompressionType.UNCOMPRESSED, "test.txt");
+    testGetStreamForAutoHelper(CompressionType.GZIP, "test.gz");
+    testGetStreamForAutoHelper(CompressionType.BZIP2, "test.bz2");
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
index 160eace961a72..a4a857c7ba8d5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
+import com.google.cloud.dataflow.sdk.io.TextIO.CompressionType;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
@@ -44,7 +45,8 @@
 @RunWith(JUnit4.class)
 public class TextReaderFactoryTest {
   void runTestCreateTextReader(String filename, @Nullable Boolean stripTrailingNewlines,
-      @Nullable Long start, @Nullable Long end, CloudObject encoding, Coder<?> coder)
+      @Nullable Long start, @Nullable Long end, CloudObject encoding, Coder<?> coder,
+      CompressionType compressionType)
       throws Exception {
     CloudObject spec = CloudObject.forClassName("TextSource");
     addString(spec, "filename", filename);
@@ -57,6 +59,7 @@ void runTestCreateTextReader(String filename, @Nullable Boolean stripTrailingNew
     if (end != null) {
       addLong(spec, "end_offset", end);
     }
+    addString(spec, "compression_type", compressionType.toString());
 
     Source cloudSource = new Source();
     cloudSource.setSpec(spec);
@@ -73,17 +76,19 @@ void runTestCreateTextReader(String filename, @Nullable Boolean stripTrailingNew
     Assert.assertEquals(start, textReader.startPosition);
     Assert.assertEquals(end, textReader.endPosition);
     Assert.assertEquals(coder, textReader.coder);
+    Assert.assertEquals(compressionType, textReader.compressionType);
   }
 
   @Test
   public void testCreatePlainTextReader() throws Exception {
     runTestCreateTextReader("/path/to/file.txt", null, null, null,
-        makeCloudEncoding("StringUtf8Coder"), StringUtf8Coder.of());
+        makeCloudEncoding("StringUtf8Coder"), StringUtf8Coder.of(), CompressionType.UNCOMPRESSED);
   }
 
   @Test
   public void testCreateRichTextReader() throws Exception {
     runTestCreateTextReader("gs://bucket/path/to/file2.txt", false, 200L, 500L,
-        makeCloudEncoding("TextualIntegerCoder"), TextualIntegerCoder.of());
+        makeCloudEncoding("TextualIntegerCoder"), TextualIntegerCoder.of(),
+        CompressionType.UNCOMPRESSED);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
index 5d2934c4535fc..95981a7eecccf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
@@ -19,16 +19,20 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.greaterThan;
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.io.TextIO.CompressionType;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
 import org.junit.Assert;
 import org.junit.Rule;
 import org.junit.Test;
@@ -39,11 +43,13 @@
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.OutputStream;
 import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.zip.GZIPOutputStream;
 
 /**
  * Tests for TextReader.
@@ -78,7 +84,8 @@ private File initTestFile() throws IOException {
   @Test
   public void testReadEmptyFile() throws Exception {
     TextReader<String> textReader =
-        new TextReader<>(tmpFolder.newFile().getPath(), true, null, null, StringUtf8Coder.of());
+        new TextReader<>(tmpFolder.newFile().getPath(), true, null, null, StringUtf8Coder.of(),
+            TextIO.CompressionType.UNCOMPRESSED);
     try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
       Assert.assertFalse(iterator.hasNext());
     }
@@ -130,7 +137,8 @@ public void testStartPosition() throws Exception {
 
     {
       TextReader<String> textReader =
-          new TextReader<>(tmpFile.getPath(), false, 11L, null, StringUtf8Coder.of());
+          new TextReader<>(tmpFile.getPath(), false, 11L, null, StringUtf8Coder.of(),
+              TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
@@ -146,7 +154,8 @@ public void testStartPosition() throws Exception {
 
     {
       TextReader<String> textReader =
-          new TextReader<>(tmpFile.getPath(), false, 20L, null, StringUtf8Coder.of());
+          new TextReader<>(tmpFile.getPath(), false, 20L, null, StringUtf8Coder.of(),
+              TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
@@ -161,7 +170,8 @@ public void testStartPosition() throws Exception {
 
     {
       TextReader<String> textReader =
-          new TextReader<>(tmpFile.getPath(), true, 0L, 20L, StringUtf8Coder.of());
+          new TextReader<>(tmpFile.getPath(), true, 0L, 20L, StringUtf8Coder.of(),
+              TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
@@ -175,7 +185,8 @@ public void testStartPosition() throws Exception {
 
     {
       TextReader<String> textReader =
-          new TextReader<>(tmpFile.getPath(), true, 1L, 20L, StringUtf8Coder.of());
+          new TextReader<>(tmpFile.getPath(), true, 1L, 20L, StringUtf8Coder.of(),
+              TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
@@ -204,7 +215,8 @@ public void testUtf8Handling() throws Exception {
       // the first line if counting chars.  So correct behavior is to return
       // just one line, since offsets are in chars, not codepoints.
       TextReader<String> textReader =
-          new TextReader<>(tmpFile.getPath(), true, 0L, 3L, StringUtf8Coder.of());
+          new TextReader<>(tmpFile.getPath(), true, 0L, 3L, StringUtf8Coder.of(),
+              TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
@@ -219,7 +231,8 @@ public void testUtf8Handling() throws Exception {
       // Starting location is mid-way into a codepoint.
       // Ensures we don't fail when skipping over an incomplete codepoint.
       TextReader<String> textReader =
-          new TextReader<>(tmpFile.getPath(), true, 2L, null, StringUtf8Coder.of());
+          new TextReader<>(tmpFile.getPath(), true, 2L, null, StringUtf8Coder.of(),
+              TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
@@ -246,7 +259,8 @@ private void testNewlineHandling(String separator, boolean stripNewlines) throws
     writer.close();
 
     TextReader<String> textReader =
-        new TextReader<>(tmpFile.getPath(), stripNewlines, null, null, StringUtf8Coder.of());
+        new TextReader<>(tmpFile.getPath(), stripNewlines, null, null, StringUtf8Coder.of(),
+            TextIO.CompressionType.UNCOMPRESSED);
     ExecutorTestUtils.TestReaderObserver observer =
         new ExecutorTestUtils.TestReaderObserver(textReader);
 
@@ -283,7 +297,8 @@ private void testStringPayload(String[] lines, String separator, boolean stripNe
     writer.close();
 
     TextReader<String> textReader =
-        new TextReader<>(tmpFile.getPath(), stripNewlines, null, null, StringUtf8Coder.of());
+        new TextReader<>(tmpFile.getPath(), stripNewlines, null, null, StringUtf8Coder.of(),
+            TextIO.CompressionType.UNCOMPRESSED);
     ExecutorTestUtils.TestReaderObserver observer =
         new ExecutorTestUtils.TestReaderObserver(textReader);
 
@@ -313,7 +328,8 @@ public void testCloneIteratorWithEndPositionAndFinalBytesInBuffer() throws Excep
     Long fileSize = tmpFile.length();
 
     TextReader<String> textReader =
-        new TextReader<>(tmpFile.getPath(), stripNewlines, null, fileSize, StringUtf8Coder.of());
+        new TextReader<>(tmpFile.getPath(), stripNewlines, null, fileSize, StringUtf8Coder.of(),
+            TextIO.CompressionType.UNCOMPRESSED);
 
     List<String> actual = new ArrayList<>();
     Reader.ReaderIterator<String> iterator = textReader.iterator();
@@ -339,7 +355,8 @@ public void testNonStringCoders() throws Exception {
     writer.close();
 
     TextReader<Integer> textReader =
-        new TextReader<>(tmpFile.getPath(), true, null, null, TextualIntegerCoder.of());
+        new TextReader<>(tmpFile.getPath(), true, null, null, TextualIntegerCoder.of(),
+            TextIO.CompressionType.UNCOMPRESSED);
     ExecutorTestUtils.TestReaderObserver observer =
         new ExecutorTestUtils.TestReaderObserver(textReader);
 
@@ -358,7 +375,8 @@ public void testNonStringCoders() throws Exception {
   public void testGetApproximatePosition() throws Exception {
     File tmpFile = initTestFile();
     TextReader<String> textReader =
-        new TextReader<>(tmpFile.getPath(), false, 0L, null, StringUtf8Coder.of());
+        new TextReader<>(tmpFile.getPath(), false, 0L, null, StringUtf8Coder.of(),
+            TextIO.CompressionType.UNCOMPRESSED);
 
     try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
       ApproximateProgress progress = sourceProgressToCloudProgress(iterator.getProgress());
@@ -388,7 +406,8 @@ public void testUpdateStopPosition() throws Exception {
     // Illegal proposed stop position, no update.
     {
       TextReader<String> textReader =
-          new TextReader<>(tmpFile.getPath(), false, null, null, StringUtf8Coder.of());
+          new TextReader<>(tmpFile.getPath(), false, null, null, StringUtf8Coder.of(),
+              TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
@@ -404,7 +423,8 @@ public void testUpdateStopPosition() throws Exception {
     // Successful update.
     {
       TextReader<String> textReader =
-          new TextReader<>(tmpFile.getPath(), false, null, null, StringUtf8Coder.of());
+          new TextReader<>(tmpFile.getPath(), false, null, null, StringUtf8Coder.of(),
+              TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
@@ -431,7 +451,8 @@ public void testUpdateStopPosition() throws Exception {
     // Proposed stop position is before the current position, no update.
     {
       TextReader<String> textReader =
-          new TextReader<>(tmpFile.getPath(), false, null, null, StringUtf8Coder.of());
+          new TextReader<>(tmpFile.getPath(), false, null, null, StringUtf8Coder.of(),
+              TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
@@ -457,7 +478,8 @@ public void testUpdateStopPosition() throws Exception {
     // Proposed stop position is after the current stop (end) position, no update.
     {
       TextReader<String> textReader =
-          new TextReader<>(tmpFile.getPath(), false, null, end, StringUtf8Coder.of());
+          new TextReader<>(tmpFile.getPath(), false, null, end, StringUtf8Coder.of(),
+              TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
@@ -502,7 +524,8 @@ private void stopPositionTestInternal(
 
     // Read from source without split attempts.
     TextReader<String> textReader =
-        new TextReader<>(tmpFile.getPath(), false, startOffset, endOffset, StringUtf8Coder.of());
+        new TextReader<>(tmpFile.getPath(), false, startOffset, endOffset, StringUtf8Coder.of(),
+            TextIO.CompressionType.UNCOMPRESSED);
 
     try (TextReader<String>.TextFileIterator iterator =
         (TextReader<String>.TextFileIterator) textReader.iterator()) {
@@ -514,7 +537,8 @@ private void stopPositionTestInternal(
 
     // Read the first half of the split.
     textReader =
-        new TextReader<>(tmpFile.getPath(), false, startOffset, stopOffset, StringUtf8Coder.of());
+        new TextReader<>(tmpFile.getPath(), false, startOffset, stopOffset, StringUtf8Coder.of(),
+            TextIO.CompressionType.UNCOMPRESSED);
     accumulatedRead = new StringBuilder();
 
     try (TextReader<String>.TextFileIterator iterator =
@@ -527,7 +551,8 @@ private void stopPositionTestInternal(
 
     // Read the second half of the split.
     textReader =
-        new TextReader<>(tmpFile.getPath(), false, stopOffset, endOffset, StringUtf8Coder.of());
+        new TextReader<>(tmpFile.getPath(), false, stopOffset, endOffset, StringUtf8Coder.of(),
+            TextIO.CompressionType.UNCOMPRESSED);
     accumulatedRead = new StringBuilder();
 
     try (TextReader<String>.TextFileIterator iterator =
@@ -546,6 +571,112 @@ private ApproximateProgress createApproximateProgress(
     return new ApproximateProgress().setPosition(position);
   }
 
+  private OutputStream getOutputStreamForCompressionType(OutputStream stream,
+      CompressionType compressionType) throws IOException {
+    switch (compressionType) {
+      case GZIP:
+        return new GZIPOutputStream(stream);
+      case BZIP2:
+        return new BZip2CompressorOutputStream(stream);
+      case UNCOMPRESSED:
+      case AUTO:
+        return stream;
+      default:
+        Assert.fail("Unrecognized stream type");
+    }
+    return stream;
+  }
+
+  private File createFileWithCompressionType(String[] lines, String filename,
+      CompressionType compressionType) throws IOException {
+    File tmpFile = tmpFolder.newFile(filename);
+    PrintStream writer = new PrintStream(
+        getOutputStreamForCompressionType(new FileOutputStream(tmpFile), compressionType));
+    for (String line : lines) {
+      writer.println(line);
+    }
+    writer.close();
+    return tmpFile;
+  }
+
+  private void testCompressionTypeHelper(String[] lines, String filename,
+      CompressionType outputCompressionType, CompressionType inputCompressionType)
+          throws IOException {
+    File tmpFile = createFileWithCompressionType(lines, filename, outputCompressionType);
+
+    List<String> expected = new ArrayList<>();
+    for (String line : lines) {
+      expected.add(line);
+    }
+
+    TextReader<String> textReader =
+        new TextReader<>(tmpFile.getPath(), true, null, null, StringUtf8Coder.of(),
+            inputCompressionType);
+
+    List<String> actual = new ArrayList<>();
+    try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
+      while (iterator.hasNext()) {
+        actual.add(iterator.next());
+      }
+    }
+    Assert.assertEquals(expected, actual);
+    tmpFile.delete();
+  }
+
+  @Test
+  public void testCompressionTypeOneFile() throws IOException {
+    String[] contents = {"Miserable pigeon", "Vulnerable sparrow", "Brazen crow"};
+    // test AUTO compression type with different extensions
+    testCompressionTypeHelper(contents, "test.gz", CompressionType.GZIP, CompressionType.AUTO);
+    testCompressionTypeHelper(contents, "test.bz2", CompressionType.BZIP2, CompressionType.AUTO);
+    testCompressionTypeHelper(contents, "test.txt", CompressionType.UNCOMPRESSED,
+        CompressionType.AUTO);
+    testCompressionTypeHelper(contents, "test", CompressionType.UNCOMPRESSED,
+        CompressionType.AUTO);
+    // test GZIP, BZIP2, and UNCOMPRESSED
+    testCompressionTypeHelper(contents, "test.txt", CompressionType.GZIP, CompressionType.GZIP);
+    testCompressionTypeHelper(contents, "test.txt", CompressionType.BZIP2, CompressionType.BZIP2);
+    testCompressionTypeHelper(contents, "test.gz", CompressionType.UNCOMPRESSED,
+        CompressionType.UNCOMPRESSED);
+  }
+
+  @Test
+  public void testCompressionTypeFileGlob() throws IOException {
+    String[][] contents = {
+        {"Miserable pigeon", "Vulnerable sparrow", "Brazen crow"},
+        {"Timid osprey", "Lazy vulture"},
+        {"Erratic finch", "Impressible parakeet"},
+    };
+    File[] files = {
+        createFileWithCompressionType(contents[0], "test.gz", CompressionType.GZIP),
+        createFileWithCompressionType(contents[1], "test.bz2", CompressionType.BZIP2),
+        createFileWithCompressionType(contents[2], "test.txt", CompressionType.UNCOMPRESSED),
+    };
+
+    List<String> expected = new ArrayList<>();
+    for (String[] fileContents : contents) {
+      for (String line : fileContents) {
+        expected.add(line);
+      }
+    }
+
+    String path = tmpFolder.getRoot().getPath() + System.getProperty("file.separator") + "*";
+
+    TextReader<String> textReader =
+        new TextReader<>(path, true, null, null, StringUtf8Coder.of(), CompressionType.AUTO);
+
+    List<String> actual = new ArrayList<>();
+    try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
+      while (iterator.hasNext()) {
+        actual.add(iterator.next());
+      }
+    }
+    Assert.assertThat(actual, containsInAnyOrder(expected.toArray()));
+    for (File file : files) {
+      file.delete();
+    }
+  }
+
   // TODO: sharded filenames
   // TODO: reading from GCS
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
index b3cebad3e21e5..898d3ad1891af 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.runners.worker.TextReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
@@ -77,7 +78,8 @@ public void testMultiFileRead() throws Exception {
 
 
     TextReader<String> reader = new TextReader<>(
-        tmpFolder.getRoot() + "/file*", true/* strip newlines */, null, null, StringUtf8Coder.of());
+        tmpFolder.getRoot() + "/file*", true/* strip newlines */, null, null, StringUtf8Coder.of(),
+        TextIO.CompressionType.UNCOMPRESSED);
 
     Set<String> records = new TreeSet<>();
     try (Reader.ReaderIterator<String> iterator = reader.iterator()) {

From a741a31cc6455db62097121da4b5bb250690418f Mon Sep 17 00:00:00 2001
From: dhuo <dhuo@google.com>
Date: Thu, 29 Jan 2015 14:30:37 -0800
Subject: [PATCH 0108/1541] Explicitly close the underlying readChannel before
 retries in GoogleCloudStorageReadChannel, catching and ignoring (but logging)
 SSLException since these indicate close() cannot complete normally, but the
 underlying resources have already been cleaned up so it's safe for us to
 proceed with the retry and simply abandon the existing readChannel.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85102060
---
 .../gcsio/GoogleCloudStorageReadChannel.java  | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
index a3d9b65347b2a..61b931561c370 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
@@ -38,6 +38,7 @@
 import java.nio.channels.ReadableByteChannel;
 import java.nio.channels.SeekableByteChannel;
 import java.util.regex.Pattern;
+import javax.net.ssl.SSLException;
 
 /**
  * Provides seekable read access to GCS.
@@ -317,6 +318,27 @@ public int read(ByteBuffer buffer)
           long newPosition = currentPosition;
           currentPosition = -1;
           position(newPosition);
+
+          // Before performing lazy seek, explicitly close the underlying channel if necessary,
+          // catching and ignoring SSLException since the retry indicates an error occurred, so
+          // there's a high probability that SSL connections would be broken in a way that
+          // causes close() itself to throw an exception, even though underlying sockets have
+          // already been cleaned up; close() on an SSLSocketImpl requires a shutdown handshake
+          // in order to shutdown cleanly, and if the connection has been broken already, then
+          // this is not possible, and the SSLSocketImpl was already responsible for performing
+          // local cleanup at the time the exception was raised.
+          if (lazySeekPending && readChannel != null) {
+            try {
+              readChannel.close();
+              readChannel = null;
+            } catch (SSLException ssle) {
+              LOG.warn("Got SSLException on readChannel.close() before retry; ignoring it.", ssle);
+              readChannel = null;
+            }
+            // For "other" exceptions, we'll let it propagate out without setting readChannel to
+            // null, in case the caller is able to handle it and then properly try to close()
+            // again.
+          }
           performLazySeek();
         }
       }

From e3c4ded230309572b0a2fbba36ff7882c7fbf23e Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Thu, 29 Jan 2015 15:36:54 -0800
Subject: [PATCH 0109/1541] Windowing: Worker code for ReifyTsAndWindowsParDoFn
 and GroupAlsoByWindowsParDoFn.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85107960
---
 .../worker/GroupAlsoByWindowsParDoFn.java     | 22 ++++-
 .../sdk/runners/worker/ParDoFnFactory.java    |  2 +
 .../ReifyTimestampAndWindowsParDoFn.java      | 81 +++++++++++++++++++
 .../dataflow/sdk/transforms/GroupByKey.java   | 17 +---
 .../util/ReifyTimestampAndWindowsDoFn.java    | 41 ++++++++++
 5 files changed, 149 insertions(+), 14 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index 7e9e2bd89774e..9b924994b7958 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -24,11 +24,13 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
@@ -49,6 +51,7 @@
  * NormalParDoFn, except that it gets deserialized differently.
  */
 class GroupAlsoByWindowsParDoFn extends NormalParDoFn {
+
   public static GroupAlsoByWindowsParDoFn create(
       PipelineOptions options,
       CloudObject cloudUserFn,
@@ -93,7 +96,14 @@ public static GroupAlsoByWindowsParDoFn create(
           "Expected KvCoder for inputCoder, got: " + elemCoder.getClass().getName());
     }
 
-    DoFnInfoFactory fnFactory = new DoFnInfoFactory() {
+    boolean isStreamingPipeline = false;
+    if (options instanceof StreamingOptions) {
+      isStreamingPipeline = ((StreamingOptions) options).isStreaming();
+    }
+
+    DoFnInfoFactory fnFactory;
+    if (isStreamingPipeline) {
+      fnFactory = new DoFnInfoFactory() {
         @Override
         public DoFnInfo createDoFnInfo() {
           return new DoFnInfo(StreamingGroupAlsoByWindowsDoFn.create(
@@ -102,6 +112,16 @@ public DoFnInfo createDoFnInfo() {
               null);
         }
       };
+    } else {
+      fnFactory = new DoFnInfoFactory() {
+        @Override
+        public DoFnInfo createDoFnInfo() {
+          return new DoFnInfo(
+              new GroupAlsoByWindowsDoFn((WindowFn) windowFn, elemCoder),
+              null);
+        }
+      };
+    }
     return new GroupAlsoByWindowsParDoFn(
         options, fnFactory, stepName, executionContext, addCounterMutator);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
index 23d4040685bfd..e2ae0841fd8e6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
@@ -68,6 +68,8 @@ private ParDoFnFactory() {}
                                    GroupAlsoByWindowsParDoFn.class.getName());
     predefinedParDoFnFactories.put("AssignWindowsDoFn",
                                    AssignWindowsParDoFn.class.getName());
+    predefinedParDoFnFactories.put("ReifyTimestampAndWindowsDoFn",
+                                   ReifyTimestampAndWindowsParDoFn.class.getName());
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
new file mode 100644
index 0000000000000..4cab266e05068
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
@@ -0,0 +1,81 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.api.services.dataflow.model.MultiOutputInfo;
+import com.google.api.services.dataflow.model.SideInputInfo;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.DoFnInfo;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PTuple;
+import com.google.cloud.dataflow.sdk.util.ReifyTimestampAndWindowsDoFn;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+
+import java.util.Arrays;
+import java.util.List;
+
+import javax.annotation.Nullable;
+
+/**
+ * A wrapper around a ReifyTimestampAndWindowsDoFn. This class is the same as NormalParDoFn, except
+ * that it gets deserialized differently.
+ */
+@SuppressWarnings({"rawtypes", "unchecked"})
+public class ReifyTimestampAndWindowsParDoFn extends NormalParDoFn {
+
+  public static ReifyTimestampAndWindowsParDoFn create(
+      PipelineOptions options,
+      CloudObject cloudUserFn,
+      String stepName,
+      @Nullable List<SideInputInfo> sideInputInfos,
+      @Nullable List<MultiOutputInfo> multiOutputInfos,
+      Integer numOutputs,
+      ExecutionContext executionContext,
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler sampler /* unused */)
+      throws Exception {
+
+    final DoFn doFn = new ReifyTimestampAndWindowsDoFn();
+
+    DoFnInfoFactory fnFactory = new DoFnInfoFactory() {
+      @Override
+      public DoFnInfo createDoFnInfo() {
+        return new DoFnInfo(doFn, null);
+      }
+    };
+    return new ReifyTimestampAndWindowsParDoFn(
+        options, fnFactory, stepName, executionContext, addCounterMutator);
+  }
+  private ReifyTimestampAndWindowsParDoFn(
+      PipelineOptions options,
+      DoFnInfoFactory fnFactory,
+      String stepName,
+      ExecutionContext executionContext,
+      AddCounterMutator addCounterMutator) {
+    super(options,
+          fnFactory,
+          PTuple.empty(),
+          Arrays.asList("output"),
+          stepName,
+          executionContext,
+          addCounterMutator);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 9b56585fb9ea9..4d11723c99638 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
-import static com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
 import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -24,12 +23,14 @@
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
+import com.google.cloud.dataflow.sdk.util.ReifyTimestampAndWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
@@ -162,17 +163,7 @@ public PCollection<KV<K, WindowedValue<V>>> apply(
           inputValueCoder, getInput().getWindowFn().windowCoder());
       Coder<KV<K, WindowedValue<V>>> outputKvCoder =
           KvCoder.of(keyCoder, outputValueCoder);
-      return input.apply(ParDo.of(
-          new DoFn<KV<K, V>, KV<K, WindowedValue<V>>>() {
-            @Override
-            public void processElement(ProcessContext c) {
-              KV<K, V> kv = c.element();
-              K key = kv.getKey();
-              V value = kv.getValue();
-              c.output(KV.of(
-                  key,
-                  WindowedValue.of(value, c.timestamp(), c.windows())));
-            }}))
+      return input.apply(ParDo.of(new ReifyTimestampAndWindowsDoFn<K, V>()))
           .setCoder(outputKvCoder);
     }
   }
@@ -484,7 +475,7 @@ public PCollection<KV<K, Iterable<V>>> applyHelper(
           .apply(new GroupByKeyOnly<K, WindowedValue<V>>());
 
       if (!runnerSortsByTimestamp) {
-        // Sort each key's values by timestamp.  GroupAlsoByWindow requires
+        // Sort each key's values by timestamp. GroupAlsoByWindow requires
         // its input to be sorted by timestamp.
         gbkOutput = gbkOutput.apply(new SortValuesByTimestamp<K, V>());
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
new file mode 100644
index 0000000000000..d3d0947f44215
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
@@ -0,0 +1,41 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+/**
+ * DoFn that makes timestamps and window assignments explicit in the value part of each key/value
+ * pair.
+ *
+ * @param <K> the type of the keys of the input and output {@code PCollection}s
+ * @param <V> the type of the values of the input {@code PCollection}
+ */
+public class ReifyTimestampAndWindowsDoFn<K, V>
+    extends DoFn<KV<K, V>, KV<K, WindowedValue<V>>> {
+  @Override
+  public void processElement(ProcessContext c)
+      throws Exception {
+    KV<K, V> kv = c.element();
+    K key = kv.getKey();
+    V value = kv.getValue();
+    c.output(KV.of(
+        key,
+        WindowedValue.of(value, c.timestamp(), c.windows())));
+  }
+}

From fbdd33e2675ef8b79f5534ebd15cf58b00a826e2 Mon Sep 17 00:00:00 2001
From: andersjohnson <andersjohnson@google.com>
Date: Thu, 29 Jan 2015 16:21:28 -0800
Subject: [PATCH 0110/1541] Modify .travis.yml file also to test on osx, and
 add test_wordcount.sh, which it uses. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=85112006

---
 .travis.yml       |  10 ++--
 test_wordcount.sh | 119 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 124 insertions(+), 5 deletions(-)
 create mode 100644 test_wordcount.sh

diff --git a/.travis.yml b/.travis.yml
index 8cdc4ca197fe3..b9b15456657df 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,9 @@
 language: java
 
+os:
+  - linux
+  - osx
+
 notifications:
   email:
     recipients:
@@ -7,13 +11,9 @@ notifications:
     on_success: change
     on_failure: always
 
-jdk:
-  - oraclejdk8
-  - oraclejdk7
-  - openjdk7
-
 install:
   - mvn install clean -U -DskipTests=true
 
 script:
   - mvn verify -U
+  - ./test_wordcount.sh
diff --git a/test_wordcount.sh b/test_wordcount.sh
new file mode 100644
index 0000000000000..81e2e2a6baa8c
--- /dev/null
+++ b/test_wordcount.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+
+# This script runs WordCount example locally in a few different ways.
+# Specifically, all combinations of:
+#  a) using mvn exec, or java -cp with a bundled jar file;
+#  b) input filename with no directory component, with a relative directory, or
+#     with an absolute directory; AND
+#  c) input filename containing wildcards or not.
+#
+# The one optional parameter is a path from the directory containing the script
+# to the directory containing the top-level (parent) pom.xml.  If no parameter
+# is provided, the script assumes that directory is equal to the directory
+# containing the script itself.
+#
+# The exit-code of the script indicates success or a failure.
+
+set -e
+set -o pipefail
+
+cd $(dirname $0)
+
+TOPDIR="."
+if [[ $# -gt 0 ]]
+then
+  TOPDIR="$1"
+fi
+
+PASS=1
+JAR_FILE=$TOPDIR/examples/target/google-cloud-dataflow-java-examples-all-bundled-manual_build.jar
+
+function check_result_hash {
+  local name=$1
+  local outfile_prefix=$2
+  local expected=$3
+
+  local actual=$(md5sum $outfile_prefix-* | awk '{print $1}')
+  if [[ "$actual" != "$expected" ]]
+  then
+    echo "FAIL $name: Output hash mismatch.  Got $actual, expected $expected."
+    PASS=""
+  else
+    echo "pass $name"
+    # Output files are left behind in /tmp
+  fi
+}
+
+function get_outfile_prefix {
+  local name=$1
+  mktemp --tmpdir=/tmp -u "$name.out.XXXXXXXXXX"
+}
+
+function run_via_mvn {
+  local name=$1
+  local input=$2
+  local expected_hash=$3
+
+  local outfile_prefix="$(get_outfile_prefix "$name")"
+  local cmd='mvn exec:java -f '"$TOPDIR"'/pom.xml -pl examples \
+    -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
+    -Dexec.args="--runner=DirectPipelineRunner --input='"$input"' --output='"$outfile_prefix"'"'
+  echo "$name: Running $cmd" >&2
+  sh -c "$cmd"
+  check_result_hash "$name" "$outfile_prefix" "$expected_hash"
+}
+
+function run_bundled {
+  local name=$1
+  local input=$2
+  local expected_hash=$3
+
+  local outfile_prefix="$(get_outfile_prefix "$name")"
+  local cmd='java -cp '"$JAR_FILE"' \
+    com.google.cloud.dataflow.examples.WordCount \
+    --runner=DirectPipelineRunner \
+    --input='"$input"' \
+    --output='"$outfile_prefix"
+  echo "$name: Running $cmd" >&2
+  sh -c "$cmd"
+  check_result_hash "$name" "$outfile_prefix" "$expected_hash"
+}
+
+function run_all_ways {
+  local name=$1
+  local input=$2
+  local expected_hash=$3
+
+  run_via_mvn ${name}a $input $expected_hash
+  check_for_jar_file
+  run_bundled ${name}b $input $expected_hash
+}
+
+function check_for_jar_file {
+  if [[ ! -f $JAR_FILE ]]
+  then
+    echo "Jar file $JAR_FILE not created" >&2
+    exit 2
+  fi
+}
+
+# NOTE: We could still test via mvn exec if this fails for some reason.  Perhaps
+# we ought to do that.
+echo "Generating bundled JAR file" >&2
+# NOTE: If this fails, run "mvn clean install" and try again.
+mvn bundle:bundle -f $TOPDIR/pom.xml -pl examples
+check_for_jar_file
+
+run_all_ways wordcount1 "LICENSE" f4af56cd6f6f127536d586a6adcefba1
+run_all_ways wordcount2 "./LICENSE" f4af56cd6f6f127536d586a6adcefba1
+run_all_ways wordcount3 "$PWD/LICENSE" f4af56cd6f6f127536d586a6adcefba1
+run_all_ways wordcount4 "L*N?E*" f4af56cd6f6f127536d586a6adcefba1
+run_all_ways wordcount5 "./LICE*N?E" f4af56cd6f6f127536d586a6adcefba1
+run_all_ways wordcount6 "$PWD/*LIC?NSE" f4af56cd6f6f127536d586a6adcefba1
+
+if [[ ! "$PASS" ]]
+then
+  echo "One or more tests FAILED."
+  exit 1
+fi
+echo "All tests PASS"

From 8b9f2b8bec35f02003a2da3d24929f3156cc0732 Mon Sep 17 00:00:00 2001
From: relax <relax@google.com>
Date: Fri, 30 Jan 2015 14:19:55 -0800
Subject: [PATCH 0111/1541] Move path validation, credential validation, and
 file staging into separate classes that can be overridden.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85191128
---
 .../options/DataflowPipelineDebugOptions.java | 70 ++++++++++++++++
 .../DataflowPipelineWorkerPoolOptions.java    |  2 +-
 .../dataflow/sdk/options/GcpOptions.java      | 22 +++++-
 .../sdk/options/PipelineOptionsFactory.java   | 16 ++++
 .../sdk/runners/DataflowPipelineRunner.java   | 46 +++--------
 .../runners/DataflowPipelineTranslator.java   | 15 ----
 .../runners/dataflow/AvroIOTranslator.java    | 15 ++--
 .../runners/dataflow/TextIOTranslator.java    | 18 ++---
 .../runners/worker/DataflowWorkerHarness.java |  4 -
 .../dataflow/sdk/util/CredentialFactory.java  | 29 +++++++
 .../sdk/util/DataflowPathValidator.java       | 79 +++++++++++++++++++
 .../sdk/util/GcpCredentialFactory.java        | 45 +++++++++++
 .../cloud/dataflow/sdk/util/GcsStager.java    | 48 +++++++++++
 .../sdk/util/NoopCredentialFactory.java       | 38 +++++++++
 .../dataflow/sdk/util/NoopPathValidator.java  | 52 ++++++++++++
 .../cloud/dataflow/sdk/util/NoopStager.java   | 37 +++++++++
 .../dataflow/sdk/util/PathValidator.java      | 54 +++++++++++++
 .../cloud/dataflow/sdk/util/Stager.java       | 29 +++++++
 .../runners/DataflowPipelineRunnerTest.java   |  5 +-
 19 files changed, 544 insertions(+), 80 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CredentialFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcpCredentialFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsStager.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopCredentialFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopPathValidator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopStager.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PathValidator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Stager.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
index 76de6e6dd8bf6..ce536e69a93d9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
@@ -16,6 +16,12 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
+import com.google.cloud.dataflow.sdk.util.DataflowPathValidator;
+import com.google.cloud.dataflow.sdk.util.GcsStager;
+import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
+import com.google.cloud.dataflow.sdk.util.PathValidator;
+import com.google.cloud.dataflow.sdk.util.Stager;
+
 import java.util.List;
 
 /**
@@ -64,4 +70,68 @@ public interface DataflowPipelineDebugOptions extends PipelineOptions {
   @Description("File for writing dataflow job descriptions")
   String getDataflowJobFile();
   void setDataflowJobFile(String value);
+
+  /**
+   * The name of the validator class used to validate path names.
+   */
+  @Description("The validator class used to validate path names.")
+  @Default.Class(DataflowPathValidator.class)
+  Class<? extends PathValidator> getPathValidatorClass();
+  void setPathValidatorClass(Class<? extends PathValidator> validatorClass);
+
+  /**
+   * The validator class used to validate path names.
+   */
+  @Description("The validator class used to validate path names.")
+  @Default.InstanceFactory(PathValidatorFactory.class)
+  PathValidator getPathValidator();
+  void setPathValidator(PathValidator validator);
+
+  /**
+   * The class used to stage files.
+   */
+  @Description("The class used to stage files.")
+  @Default.Class(GcsStager.class)
+  Class<? extends Stager> getStagerClass();
+  void setStagerClass(Class<? extends Stager> stagerClass);
+
+  /**
+   * The stager instance used to stage files.
+   */
+  @Description("The class use to stage packages.")
+  @Default.InstanceFactory(StagerFactory.class)
+  Stager getStager();
+  void setStager(Stager stager);
+
+  /**
+   * Creates a {@link PathValidator} object using the class specified in
+   * {@link #getPathValidatorClass()}.
+   */
+  public static class PathValidatorFactory implements DefaultValueFactory<PathValidator> {
+      @Override
+      public PathValidator create(PipelineOptions options) {
+      DataflowPipelineDebugOptions debugOptions = options.as(DataflowPipelineDebugOptions.class);
+      return InstanceBuilder.ofType(PathValidator.class)
+          .fromClass(debugOptions.getPathValidatorClass())
+          .fromFactoryMethod("fromOptions")
+          .withArg(PipelineOptions.class, options)
+          .build();
+    }
+  }
+
+  /**
+   * Creates a {@link Stager} object using the class specified in
+   * {@link #getStagerClass()}.
+   */
+  public static class StagerFactory implements DefaultValueFactory<Stager> {
+      @Override
+      public Stager create(PipelineOptions options) {
+      DataflowPipelineDebugOptions debugOptions = options.as(DataflowPipelineDebugOptions.class);
+      return InstanceBuilder.ofType(Stager.class)
+          .fromClass(debugOptions.getStagerClass())
+          .fromFactoryMethod("fromOptions")
+          .withArg(PipelineOptions.class, options)
+          .build();
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index 25653495cc0ea..ddcdc695d23e1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -21,7 +21,7 @@
 /**
  * Options which are used to configure the Dataflow pipeline worker pool.
  */
-public interface DataflowPipelineWorkerPoolOptions {
+public interface DataflowPipelineWorkerPoolOptions extends PipelineOptions {
   /**
    * Disk source image to use by VMs for jobs.
    * @see <a href="https://developers.google.com/compute/docs/images">Compute Engine Images</a>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
index 114bc0d7c5594..0d824405b9075 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
@@ -17,7 +17,9 @@
 package com.google.cloud.dataflow.sdk.options;
 
 import com.google.api.client.auth.oauth2.Credential;
-import com.google.cloud.dataflow.sdk.util.Credentials;
+import com.google.cloud.dataflow.sdk.util.CredentialFactory;
+import com.google.cloud.dataflow.sdk.util.GcpCredentialFactory;
+import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
 
 import com.fasterxml.jackson.annotation.JsonIgnore;
 
@@ -126,6 +128,12 @@ public String create(PipelineOptions options) {
   String getCredentialId();
   void setCredentialId(String value);
 
+  @Description("The factory class used to create oauth credentials")
+  @Default.Class(GcpCredentialFactory.class)
+  Class<? extends CredentialFactory> getCredentialFactoryClass();
+  void setCredentialFactoryClass(
+      Class<? extends CredentialFactory> credentialFactoryClass);
+
   /** Alternative Google Cloud Platform Credential. */
   @JsonIgnore
   @Description("Google Cloud Platform user credentials.")
@@ -134,14 +142,20 @@ public String create(PipelineOptions options) {
   void setGcpCredential(Credential value);
 
   /**
-   * Attempts to load the user credentials. See
-   * {@link Credentials#getCredential(GcpOptions)} for more details.
+   * Attempts to load the GCP credentials. See
+   * {@link CredentialFactory#getCredential()} for more details.
    */
   public static class GcpUserCredentialsFactory implements DefaultValueFactory<Credential> {
     @Override
     public Credential create(PipelineOptions options) {
+      GcpOptions gcpOptions = options.as(GcpOptions.class);
       try {
-        return Credentials.getCredential(options.as(GcpOptions.class));
+        CredentialFactory factory = InstanceBuilder.ofType(CredentialFactory.class)
+            .fromClass(gcpOptions.getCredentialFactoryClass())
+            .fromFactoryMethod("fromOptions")
+            .withArg(PipelineOptions.class, options)
+            .build();
+        return factory.getCredential();
       } catch (IOException | GeneralSecurityException e) {
         throw new RuntimeException("Unable to obtain credential", e);
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index b20771a7c45f0..24f8e15069108 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -522,6 +522,22 @@ public static DataflowWorkerHarnessOptions createFromSystemProperties() {
     if (System.getProperties().containsKey("job_id")) {
       options.setJobId(System.getProperty("job_id"));
     }
+    if (System.getProperties().containsKey("path_validator_class")) {
+      try {
+        options.setPathValidatorClass((Class) Class.forName(
+            System.getProperty("path_validator_class")));
+      } catch (ClassNotFoundException e) {
+        throw new RuntimeException("Unable to find validator class", e);
+      }
+    }
+    if (System.getProperties().containsKey("credential_factory_class")) {
+      try {
+        options.setCredentialFactoryClass((Class) Class.forName(
+            System.getProperty("credential_factory_class")));
+      } catch (ClassNotFoundException e) {
+        throw new RuntimeException("Unable to find credential factory class", e);
+      }
+    }
     return options;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index b6003b2001064..4da69ac460e5d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -29,13 +29,11 @@
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.DataflowReleaseInfo;
-import com.google.cloud.dataflow.sdk.util.GcsUtil;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
-import com.google.cloud.dataflow.sdk.util.PackageUtil;
+import com.google.cloud.dataflow.sdk.util.PathValidator;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.cloud.dataflow.sdk.values.POutput;
@@ -71,12 +69,6 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
   /** Provided configuration options. */
   private final DataflowPipelineOptions options;
 
-  /** The directory on GCS where files should be uploaded. */
-  private final GcsPath gcsStaging;
-
-  /** The directory on GCS where temporary files are stored. */
-  private final GcsPath gcsTemp;
-
   /** Client for the Dataflow service. This is used to actually submit jobs. */
   private final Dataflow dataflowClient;
 
@@ -111,15 +103,8 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
           "Missing required values: " + Joiner.on(',').join(missing));
     }
 
-    Preconditions.checkArgument(!(Strings.isNullOrEmpty(dataflowOptions.getTempLocation())
-        && Strings.isNullOrEmpty(dataflowOptions.getStagingLocation())),
-        "Missing required value: at least one of tempLocation or stagingLocation must be set.");
-    if (Strings.isNullOrEmpty(dataflowOptions.getTempLocation())) {
-      dataflowOptions.setTempLocation(dataflowOptions.getStagingLocation());
-    } else if (Strings.isNullOrEmpty(dataflowOptions.getStagingLocation())) {
-      dataflowOptions.setStagingLocation(
-          GcsPath.fromUri(dataflowOptions.getTempLocation()).resolve("staging").toString());
-    }
+    PathValidator validator = dataflowOptions.getPathValidator();
+    validator.validateAndUpdateOptions();
 
     if (dataflowOptions.getFilesToStage() == null) {
       dataflowOptions.setFilesToStage(detectClassPathResourcesToStage(
@@ -144,8 +129,6 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
   private DataflowPipelineRunner(DataflowPipelineOptions options) {
     this.options = options;
     this.dataflowClient = options.getDataflowClient();
-    this.gcsTemp = GcsPath.fromUri(options.getTempLocation());
-    this.gcsStaging = GcsPath.fromUri(options.getStagingLocation());
     this.translator = DataflowPipelineTranslator.fromOptions(options);
 
     // (Re-)register standard IO factories. Clobbers any prior credentials.
@@ -176,10 +159,7 @@ public DataflowPipelineJob run(Pipeline pipeline) {
     LOG.info("Executing pipeline on the Dataflow Service, which will have billing implications "
         + "related to Google Compute Engine usage and other Google Cloud Services.");
 
-    GcsUtil gcsUtil = options.getGcsUtil();
-    List<DataflowPackage> packages =
-        PackageUtil.stageClasspathElementsToGcs(gcsUtil, options.getFilesToStage(), gcsStaging);
-
+    List<DataflowPackage> packages = options.getStager().stageFiles();
     Job newJob = translator.translate(pipeline, packages);
 
     String version = DataflowReleaseInfo.getReleaseInfo().getVersion();
@@ -188,7 +168,11 @@ public DataflowPipelineJob run(Pipeline pipeline) {
     newJob.getEnvironment().setUserAgent(DataflowReleaseInfo.getReleaseInfo());
     // The Dataflow Service may write to the temporary directory directly, so
     // must be verified.
-    newJob.getEnvironment().setTempStoragePrefix(verifyGcsPath(gcsTemp).toResourceName());
+    if (!Strings.isNullOrEmpty(options.getTempLocation())) {
+      DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
+      newJob.getEnvironment().setTempStoragePrefix(
+          dataflowOptions.getPathValidator().verifyGcsPath(options.getTempLocation()));
+    }
     newJob.getEnvironment().setDataset(options.getTempDatasetId());
     newJob.getEnvironment().setClusterManagerApiService(
         options.getClusterManagerApi().getApiServiceName());
@@ -269,18 +253,6 @@ public void setHooks(DataflowPipelineRunnerHooks hooks) {
   @Override
   public String toString() { return "DataflowPipelineRunner#" + hashCode(); }
 
-  /**
-   * Verifies that a path can be used by the Dataflow Service API.
-   * @return the supplied path
-   */
-  public static GcsPath verifyGcsPath(GcsPath path) {
-    Preconditions.checkArgument(path.isAbsolute(),
-        "Must provide absolute paths for Dataflow");
-    Preconditions.checkArgument(!path.getObject().contains("//"),
-        "Dataflow Service does not allow objects with consecutive slashes");
-    return path;
-  }
-
   /**
    * Attempts to detect all the resources the class loader has access to. This does not recurse
    * to class loader parents stopping it from pulling in resources from the system class loader.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 54dd9326c67f6..b072f64fd576b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -73,7 +73,6 @@
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -243,15 +242,6 @@ public interface TranslationContext {
      */
     public void addInput(String name, PInput value);
 
-    /**
-     * Adds an input with the given name and value to the current
-     * Dataflow step.
-     *
-     * <p> This applies any verification of paths required by the Dataflow
-     * service.
-     */
-    public void addInput(String name, GcsPath path);
-
     /**
      * Adds an input which is a dictionary of strings to objects.
      */
@@ -586,11 +576,6 @@ public void addInput(String name, PInput value) {
       }
     }
 
-    @Override
-    public void addInput(String name, GcsPath path) {
-      addInput(name, DataflowPipelineRunner.verifyGcsPath(path).toResourceName());
-    }
-
     @Override
     public void addOutput(String name, PValue value) {
       Coder<?> coder;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
index f191b25ba86ee..e23afad8357b9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
@@ -22,9 +22,9 @@
 import com.google.cloud.dataflow.sdk.io.ShardNameTemplate;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
+import com.google.cloud.dataflow.sdk.util.PathValidator;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 
 /**
  * Avro transform support code for the Dataflow backend.
@@ -51,11 +51,11 @@ private <T> void translateReadHelper(
         throw new IllegalArgumentException("AvroIO not supported in streaming mode.");
       }
 
-      // Only GCS paths are permitted for filepatterns in the DataflowPipelineRunner.
-      GcsPath gcsPath = GcsPath.fromUri(transform.getFilepattern());
+      PathValidator validator = context.getPipelineOptions().getPathValidator();
+      String filepattern = validator.validateInputFilePatternSupported(transform.getFilepattern());
       context.addStep(transform, "ParallelRead");
       context.addInput(PropertyNames.FORMAT, "avro");
-      context.addInput(PropertyNames.FILEPATTERN, gcsPath);
+      context.addInput(PropertyNames.FILEPATTERN, filepattern);
       context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
       // TODO: Orderedness?
     }
@@ -77,8 +77,9 @@ public void translate(
     private <T> void translateWriteHelper(
         AvroIO.Write.Bound<T> transform,
         TranslationContext context) {
-      // Only GCS paths are permitted for filepatterns in the DataflowPipelineRunner.
-      GcsPath gcsPath = GcsPath.fromUri(transform.getFilenamePrefix());
+      PathValidator validator = context.getPipelineOptions().getPathValidator();
+      String filenamePrefix = validator.validateOutputFilePrefixSupported(
+          transform.getFilenamePrefix());
       context.addStep(transform, "ParallelWrite");
       context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
 
@@ -98,7 +99,7 @@ private <T> void translateWriteHelper(
       }
 
       context.addInput(PropertyNames.FORMAT, "avro");
-      context.addInput(PropertyNames.FILENAME_PREFIX, gcsPath);
+      context.addInput(PropertyNames.FILENAME_PREFIX, filenamePrefix);
       context.addInput(PropertyNames.SHARD_NAME_TEMPLATE, transform.getShardTemplate());
       context.addInput(PropertyNames.FILENAME_SUFFIX, transform.getFilenameSuffix());
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
index 9cf059bf53126..aa80248f3fcba 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
@@ -21,9 +21,9 @@
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
+import com.google.cloud.dataflow.sdk.util.PathValidator;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 
 /**
  * TextIO transform support code for the Dataflow backend.
@@ -49,16 +49,14 @@ private <T> void translateReadHelper(
         throw new IllegalArgumentException("TextIO not supported in streaming mode.");
       }
 
-      // Validate the provided GCS path.
-      GcsPath gcsPath = GcsPath.fromUri(transform.getFilepattern());
-      Preconditions.checkArgument(
-          context.getPipelineOptions().getGcsUtil().isGcsPatternSupported(gcsPath.getObject()));
+      PathValidator validator = context.getPipelineOptions().getPathValidator();
+      String filepattern = validator.validateInputFilePatternSupported(transform.getFilepattern());
 
       context.addStep(transform, "ParallelRead");
       // TODO: How do we want to specify format and
       // format-specific properties?
       context.addInput(PropertyNames.FORMAT, "text");
-      context.addInput(PropertyNames.FILEPATTERN, gcsPath);
+      context.addInput(PropertyNames.FILEPATTERN, filepattern);
       context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
       context.addInput(PropertyNames.VALIDATE_SOURCE, transform.needsValidation());
       context.addInput(PropertyNames.COMPRESSION_TYPE, transform.getCompressionType().toString());
@@ -86,8 +84,10 @@ private <T> void translateWriteHelper(
         throw new IllegalArgumentException("TextIO not supported in streaming mode.");
       }
 
-      // Only GCS paths are permitted for filepatterns in the DataflowPipelineRunner.
-      GcsPath gcsPath = GcsPath.fromUri(transform.getFilenamePrefix());
+      PathValidator validator = context.getPipelineOptions().getPathValidator();
+      String filenamePrefix = validator.validateOutputFilePrefixSupported(
+          transform.getFilenamePrefix());
+
       context.addStep(transform, "ParallelWrite");
       context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
 
@@ -109,7 +109,7 @@ private <T> void translateWriteHelper(
       // TODO: How do we want to specify format and
       // format-specific properties?
       context.addInput(PropertyNames.FORMAT, "text");
-      context.addInput(PropertyNames.FILENAME_PREFIX, gcsPath);
+      context.addInput(PropertyNames.FILENAME_PREFIX, filenamePrefix);
       context.addInput(PropertyNames.SHARD_NAME_TEMPLATE,
           transform.getShardNameTemplate());
       context.addInput(PropertyNames.FILENAME_SUFFIX, transform.getFilenameSuffix());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index f2601a6b5fa02..d069b63d67a6a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -20,7 +20,6 @@
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudTime;
 
 import com.google.api.client.util.Lists;
-import com.google.api.client.util.Preconditions;
 import com.google.api.services.dataflow.Dataflow;
 import com.google.api.services.dataflow.model.LeaseWorkItemRequest;
 import com.google.api.services.dataflow.model.LeaseWorkItemResponse;
@@ -147,9 +146,6 @@ static DataflowWorker create(DataflowWorkerHarnessOptions options) {
     MDC.put(DataflowWorkerLoggingFormatter.MDC_DATAFLOW_WORKER_ID, options.getWorkerId());
     options.setAppName(APPLICATION_NAME);
 
-    Preconditions.checkState(options.getGcpCredential() != null,
-        "Failed to obtain GCP credential in worker.");
-
     // Configure standard IO factories.
     IOChannelUtils.setIOFactory("gs", new GcsIOChannelFactory(options));
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CredentialFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CredentialFactory.java
new file mode 100644
index 0000000000000..0bc3012a5f202
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CredentialFactory.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.auth.oauth2.Credential;
+
+import java.io.IOException;
+import java.security.GeneralSecurityException;
+
+/**
+ * Construct an oauth credential to be used by the SDK and the SDK workers.
+ */
+public interface CredentialFactory {
+  public Credential getCredential() throws IOException, GeneralSecurityException;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
new file mode 100644
index 0000000000000..ab79906eb77d5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.common.base.Strings;
+
+/**
+ * GCP implementation of {@link PathValidator}. Only GCS paths are allowed.
+ */
+public class DataflowPathValidator implements PathValidator {
+
+  private DataflowPipelineOptions dataflowOptions;
+
+  private DataflowPathValidator(DataflowPipelineOptions options) {
+    this.dataflowOptions = options;
+  }
+
+  public static DataflowPathValidator fromOptions(PipelineOptions options) {
+    return new DataflowPathValidator(options.as(DataflowPipelineOptions.class));
+  }
+
+  @Override
+  public void validateAndUpdateOptions() {
+    Preconditions.checkArgument(!(Strings.isNullOrEmpty(dataflowOptions.getTempLocation())
+        && Strings.isNullOrEmpty(dataflowOptions.getStagingLocation())),
+        "Missing required value: at least one of tempLocation or stagingLocation must be set.");
+    if (Strings.isNullOrEmpty(dataflowOptions.getTempLocation())) {
+      dataflowOptions.setTempLocation(dataflowOptions.getStagingLocation());
+    } else if (Strings.isNullOrEmpty(dataflowOptions.getStagingLocation())) {
+      dataflowOptions.setStagingLocation(
+          GcsPath.fromUri(dataflowOptions.getTempLocation()).resolve("staging").toString());
+    }
+  }
+
+  @Override
+  public String validateInputFilePatternSupported(String filepattern) {
+    GcsPath gcsPath = GcsPath.fromUri(filepattern);
+    Preconditions.checkArgument(
+        dataflowOptions.getGcsUtil().isGcsPatternSupported(gcsPath.getObject()));
+    return verifyGcsPath(filepattern);
+  }
+
+  @Override
+  public String validateOutputFilePrefixSupported(String filePrefix) {
+    return verifyGcsPath(filePrefix);
+  }
+
+  /**
+   * Verifies that a path can be used by the Dataflow Service API.
+   * @return the supplied path
+   */
+  @Override
+  public String verifyGcsPath(String path) {
+    GcsPath gcsPath = GcsPath.fromUri(path);
+    Preconditions.checkArgument(gcsPath.isAbsolute(),
+        "Must provide absolute paths for Dataflow");
+    Preconditions.checkArgument(!gcsPath.getObject().contains("//"),
+        "Dataflow Service does not allow objects with consecutive slashes");
+    return gcsPath.toResourceName();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcpCredentialFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcpCredentialFactory.java
new file mode 100644
index 0000000000000..2999a56e4368b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcpCredentialFactory.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.auth.oauth2.Credential;
+import com.google.cloud.dataflow.sdk.options.GcpOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+import java.io.IOException;
+import java.security.GeneralSecurityException;
+
+/**
+ * Construct an oauth credential to be used by the SDK and the SDK workers.
+ * Returns a GCP credential.
+ */
+public class GcpCredentialFactory implements CredentialFactory {
+  private GcpOptions options;
+
+  private GcpCredentialFactory(GcpOptions options) {
+    this.options = options;
+  }
+
+  public static GcpCredentialFactory fromOptions(PipelineOptions options) {
+    return new GcpCredentialFactory(options.as(GcpOptions.class));
+  }
+
+  @Override
+  public Credential getCredential()  throws IOException, GeneralSecurityException {
+    return Credentials.getCredential(options);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsStager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsStager.java
new file mode 100644
index 0000000000000..718071d2afab1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsStager.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.common.base.Preconditions;
+
+import java.util.List;
+
+/**
+ * Utility class for staging files to GCS.
+ */
+public class GcsStager implements Stager {
+  private DataflowPipelineOptions options;
+
+  private GcsStager(DataflowPipelineOptions options) {
+    this.options = options;
+  }
+
+  public static GcsStager fromOptions(PipelineOptions options) {
+    return new GcsStager(options.as(DataflowPipelineOptions.class));
+  }
+
+  @Override
+  public List<DataflowPackage> stageFiles() {
+    Preconditions.checkNotNull(options.getStagingLocation());
+    return PackageUtil.stageClasspathElementsToGcs(
+        options.getGcsUtil(), options.getFilesToStage(),
+        GcsPath.fromUri(options.getStagingLocation()));
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopCredentialFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopCredentialFactory.java
new file mode 100644
index 0000000000000..5292f8e5c2d53
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopCredentialFactory.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.auth.oauth2.Credential;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+import java.io.IOException;
+import java.security.GeneralSecurityException;
+
+/**
+ * Construct an oauth credential to be used by the SDK and the SDK workers.
+ * Always returns a null Credential object.
+ */
+public class NoopCredentialFactory implements CredentialFactory {
+  public static NoopCredentialFactory fromOptions(PipelineOptions options) {
+    return new NoopCredentialFactory();
+  }
+
+  @Override
+  public Credential getCredential() throws IOException, GeneralSecurityException {
+    return null;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopPathValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopPathValidator.java
new file mode 100644
index 0000000000000..9920126b92c63
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopPathValidator.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+/**
+ * Noop implementation of {@link PathValidator}. All paths are allowed and returned unchanged.
+ */
+public class NoopPathValidator implements PathValidator {
+
+  private NoopPathValidator() {
+  }
+
+  public static PathValidator fromOptions(PipelineOptions options) {
+    return new NoopPathValidator();
+  }
+
+  @Override
+  public void validateAndUpdateOptions() {
+    return;
+  }
+
+  @Override
+  public String validateInputFilePatternSupported(String filepattern) {
+    return filepattern;
+  }
+
+  @Override
+  public String validateOutputFilePrefixSupported(String filePrefix) {
+    return filePrefix;
+  }
+
+  @Override
+  public String verifyGcsPath(String path) {
+    return path;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopStager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopStager.java
new file mode 100644
index 0000000000000..b54018c49c4d9
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopStager.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Do-nothing stager class. stageFiles() does nothing and returns an empty list of packages.
+ */
+class NoopStager implements Stager {
+  public static NoopStager fromOptions(PipelineOptions options) {
+    return new NoopStager();
+  }
+
+  @Override
+  public List<DataflowPackage> stageFiles() {
+    return new ArrayList<DataflowPackage>();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PathValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PathValidator.java
new file mode 100644
index 0000000000000..c1a65ab7b57dd
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PathValidator.java
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+/**
+ * Interface for controlling validation of paths.
+ */
+public interface PathValidator {
+  /**
+   * Validates paths in the current {@link PipelineOptions} object. May modify the
+   * options object.
+   */
+  public void validateAndUpdateOptions();
+
+  /**
+   * Validate that a file pattern is conforming.
+   *
+   * @param filepattern The file pattern to verify.
+   * @return The post-validation filepattern.
+   */
+  public String validateInputFilePatternSupported(String filepattern);
+
+  /**
+   * Validate that an output file prefix is conforming.
+   *
+   * @param filePrefix the file prefix to verify.
+   * @return The post-validation filePrefix.
+   */
+  public String validateOutputFilePrefixSupported(String filePrefix);
+
+  /**
+   * Validate that a GCS path is conforming.
+   *
+   * @param path The GCS path to verify.
+   * @return The post-validation path.
+   */
+  public String verifyGcsPath(String path);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Stager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Stager.java
new file mode 100644
index 0000000000000..f4a1a6035a121
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Stager.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.services.dataflow.model.DataflowPackage;
+
+import java.util.List;
+
+/**
+ * Interface for staging files needed for running a Dataflow pipeline.
+ */
+public interface Stager {
+  /* Stage files and return a list of packages. */
+  public List<DataflowPackage> stageFiles();
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index b3f80891244b3..cd5902d3b12ce 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -133,8 +133,7 @@ private DataflowPipelineOptions buildPipelineOptions(
       ArgumentCaptor<Job> jobCaptor) throws IOException {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setProject("someProject");
-    options.setTempLocation(DataflowPipelineRunner.verifyGcsPath(
-        GcsPath.fromComponents("somebucket", "some/path")).toString());
+    options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString());
     // Set FILES_PROPERTY to empty to prevent a default value calculated from classpath.
     options.setFilesToStage(new LinkedList<String>());
     options.setDataflowClient(buildMockDataflow(jobCaptor));
@@ -483,7 +482,7 @@ public void testTransformTranslator() throws IOException {
     DataflowPipelineTranslator translator = DataflowPipelineRunner
         .fromOptions(options).getTranslator();
 
-    translator.registerTransformTranslator(
+    DataflowPipelineTranslator.registerTransformTranslator(
         TestTransform.class,
         new DataflowPipelineTranslator.TransformTranslator<TestTransform>() {
           @SuppressWarnings("unchecked")

From 39a821051ffc296d02dd88c9f4cd571016224f58 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Fri, 30 Jan 2015 14:46:22 -0800
Subject: [PATCH 0112/1541] FIX: 1. not depends on
 UnsupportedOperationException. 2. removeWindow from activeWindowManager in
 PartitionBufferingWindowSet.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85193656
---
 .../cloud/dataflow/sdk/util/BufferingWindowSet.java  | 12 +++++++++---
 .../sdk/util/PartitionBufferingWindowSet.java        |  4 ++--
 .../sdk/util/StreamingGroupAlsoByWindowsDoFn.java    | 11 ++++-------
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
index 33cfe80a9231e..2317072b91b43 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
@@ -97,6 +97,13 @@ public void put(W window, V value) throws Exception {
 
   @Override
   public void remove(W window) throws Exception {
+    Set<W> subWindows = mergeTree.get(window);
+    for (W w : subWindows) {
+      context.context.stepContext.deleteTagList(
+          bufferTag(w, windowFn.windowCoder(), inputCoder));
+    }
+    context.context.stepContext.deleteTagList(
+        bufferTag(window, windowFn.windowCoder(), inputCoder));
     mergeTree.remove(window);
     activeWindowManager.removeWindow(window);
   }
@@ -113,7 +120,8 @@ public void merge(Collection<W> otherWindows, W newWindow) throws Exception {
       }
       subWindows.addAll(mergeTree.get(other));
       subWindows.add(other);
-      remove(other);
+      mergeTree.remove(other);
+      activeWindowManager.removeWindow(other);
     }
     mergeTree.put(newWindow, subWindows);
     activeWindowManager.addWindow(newWindow);
@@ -156,8 +164,6 @@ protected Iterable<V> finalValue(W window) throws Exception {
       for (V item : items) {
         toEmit.add(item);
       }
-      context.context.stepContext.deleteTagList(bufferTag(
-          curWindow, windowFn.windowCoder(), inputCoder));
     }
 
     return toEmit;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
index f2b96c10434ec..3c20b5f1f3cbc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
@@ -56,8 +56,8 @@ public void put(W window, V value) throws Exception {
 
   @Override
   public void remove(W window) throws Exception {
-    CodedTupleTag<V> tag = bufferTag(window, windowFn.windowCoder(), inputCoder);
-    context.context.stepContext.deleteTagList(tag);
+    context.context.stepContext.deleteTagList(
+        bufferTag(window, windowFn.windowCoder(), inputCoder));
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 3496bef97953f..9868b33a2db1c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PartitioningWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.base.Preconditions;
 
 import java.io.IOException;
 
@@ -94,13 +95,9 @@ public void processElement(ProcessContext processContext) throws Exception {
         .mergeWindows(new AbstractWindowSet.WindowMergeContext<Object, W>(windowSet, windowFn));
 
       W window = WindowUtils.windowFromString(timer.tag(), windowFn.windowCoder());
-      boolean windowExists;
-      try {
-        windowExists = windowSet.contains(window);
-      } catch (UnsupportedOperationException e) {
-        windowExists = true;
-      }
-      if (windowExists) {
+
+      if ((windowFn instanceof PartitioningWindowFn) || windowSet.contains(window)) {
+        Preconditions.checkState(!timer.timestamp().isBefore(window.maxTimestamp()));
         windowSet.markCompleted(window);
         windowSet.flush();
       }

From 62b0dd48115255debb926b750435d8e68feda715 Mon Sep 17 00:00:00 2001
From: relax <relax@google.com>
Date: Mon, 2 Feb 2015 12:09:23 -0800
Subject: [PATCH 0113/1541] Allows users of PubsubIO to specify which pubsub
 labels are used to propagate record timestamps and record ids. Strictly SDK
 changes, cannot yet be used.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85350708
---
 .../cloud/dataflow/sdk/io/PubsubIO.java       | 181 +++++++++++++++++-
 .../runners/dataflow/PubsubIOTranslator.java  |  13 ++
 .../dataflow/sdk/util/PropertyNames.java      |   3 +
 3 files changed, 188 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 4a379920249e7..060844699b550 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -153,6 +153,10 @@ public static Bound named(String name) {
      * <li>Must end with a letter or a number.</li>
      * <li>Cannot begin with 'goog' prefix.</li>
      * </ul>
+     *
+     * Dataflow will start reading data published on this topic from the time the pipeline is
+     * started. Any data published on the topic before the pipeline is started will not be read
+     * by Dataflow.
      */
     public static Bound topic(String topic) {
       return new Bound().topic(topic);
@@ -180,6 +184,46 @@ public static Bound subscription(String subscription) {
       return new Bound().subscription(subscription);
     }
 
+    /**
+     * Creates and returns a PubsubIO.Read PTransform where record timestamps are expected
+     * to be provided using the PubSub labeling API. The {@code <timestampLabel>} parameter
+     * specifies the label name. The label value sent to PubsSub is a numerical value representing
+     * the number of milliseconds since the Unix epoch. For example, if using the joda time classes,
+     * org.joda.time.Instant.getMillis() returns the correct value for this label.
+     *
+     * When this feature is used,
+     *
+     * If {@code <timestampLabel>} is not provided, the system will generate record timestamps the
+     * first time it sees each record. All windowing will be done relative to these timestamps.
+     * Windows are closed based on an estimate of when this source has finished producing data for
+     * a timestamp range, which means that late data can arrive after a window has been closed. The
+     * {#dropLateData} field allows you to control what to do with late data.
+     */
+    public static Bound timestampLabel(String timestampLabel) {
+      return new Bound().timestampLabel(timestampLabel);
+    }
+
+    /**
+     * If true, then late-arriving data from this source will be dropped.
+     */
+    public static Bound dropLateData(boolean dropLateData) {
+      return new Bound().dropLateData(dropLateData);
+    }
+
+    /**
+     * Creates and returns a PubSubIO.Read PTransform where unique record identifiers are
+     * expected to be provided using the PubSub labeling API. The {@code <idLabel>} parameter
+     * specifies the label name. The label value sent to PubSub can be any string value that
+     * uniquely identifies this record.
+     *
+     * if idLabel is not provided, Dataflow cannot guarantee that no duplicate data will be
+     * delivered on the PubSub stream. In this case,  deduplication of the stream will be
+     * stricly best effort.
+     */
+    public static Bound idLabel(String idLabel) {
+      return new Bound().idLabel(idLabel);
+    }
+
     /**
      * A PTransform that reads from a PubSub source and returns
      * a unbounded PCollection containing the items from the stream.
@@ -191,10 +235,18 @@ public static class Bound
       String topic;
       /** The Pubsub subscription to read from. */
       String subscription;
+      /** The Pubsub label to read timestamps from. */
+      String timestampLabel;
+      Boolean dropLateData;
+      /** The Pubsub label to read ids from. */
+      String idLabel;
+
+      Bound() {
+        this.dropLateData = true;
+      }
 
-      Bound() {}
-
-      Bound(String name, String subscription, String topic) {
+      Bound(String name, String subscription, String topic, String timestampLabel,
+          boolean dropLateData, String idLabel) {
         super(name);
         if (subscription != null) {
           Validator.validateSubscriptionName(subscription);
@@ -204,18 +256,57 @@ public static class Bound
         }
         this.subscription = subscription;
         this.topic = topic;
+        this.timestampLabel = timestampLabel;
+        this.dropLateData = dropLateData;
+        this.idLabel = idLabel;
       }
 
+      /**
+       * Returns a new TextIO.Read PTransform that's like this one but with the given
+       * step name. Does not modify the object.
+       */
       public Bound named(String name) {
-        return new Bound(name, subscription, topic);
+        return new Bound(name, subscription, topic, timestampLabel, dropLateData, idLabel);
       }
 
+      /**
+       * Returns a new TextIO.Read PTransform that's like this one but reading from the
+       * given subscription. Does not modify the object.
+       */
       public Bound subscription(String subscription) {
-        return new Bound(name, subscription, topic);
+        return new Bound(name, subscription, topic, timestampLabel, dropLateData, idLabel);
       }
 
+      /**
+       * Returns a new TextIO.Read PTransform that's like this one but reading from the
+       * give topic. Does not modify the object.
+       */
       public Bound topic(String topic) {
-        return new Bound(name, subscription, topic);
+        return new Bound(name, subscription, topic, timestampLabel, dropLateData, idLabel);
+      }
+
+      /**
+       * Returns a new TextIO.Read PTransform that's like this one but reading timestamps
+       * from the given PubSub label. Does not modify the object.
+       */
+      public Bound timestampLabel(String timestampLabel) {
+        return new Bound(name, subscription, topic, timestampLabel, dropLateData, idLabel);
+      }
+
+      /**
+       * Returns a new TextIO.Read PTransform that's like this one but with the specified
+       * setting for dropLateData. Does not modify the object.
+       */
+      public Bound dropLateData(boolean dropLateData) {
+        return new Bound(name, subscription, topic, timestampLabel, dropLateData, idLabel);
+      }
+
+      /**
+       * Returns a new TextIO.Read PTransform that's like this one but reading unique ids
+       * from the given PubSub label. Does not modify the object.
+       */
+      public Bound idLabel(String idLabel) {
+        return new Bound(name, subscription, topic, timestampLabel, dropLateData, idLabel);
       }
 
       @Override
@@ -250,6 +341,18 @@ public String getSubscription() {
         return subscription;
       }
 
+      public String getTimestampLabel() {
+        return timestampLabel;
+      }
+
+      public boolean getDropLateData() {
+        return dropLateData;
+      }
+
+      public String getIdLabel() {
+        return idLabel;
+      }
+
       static {
         // TODO: Figure out how to make this work under
         // DirectPipelineRunner.
@@ -278,6 +381,30 @@ public static Bound topic(String topic) {
       return new Bound().topic(topic);
     }
 
+    /**
+     * If specified, Dataflow will add a Pubsub label to each output record specifying the logical
+     * timestamp of the record. {@code <timestampLabel>} determines the label name. The label value
+     * is a numerical value representing the number of milliseconds since the Unix epoch. For
+     * example, if using the joda time classes, the org.joda.time.Instant(long) constructor can be
+     * used to parse this value. If the output from this sink is being read by another Dataflow
+     * source, then PubsubIO.Read.timestampLabel can be used to ensure that the other source reads
+     * these timestamps from the appropriate label.
+     */
+    public static Bound timestampLabel(String timestampLabel) {
+      return new Bound().timestampLabel(timestampLabel);
+    }
+
+    /**
+     * If specified, Dataflow will add a Pubsub label to each output record containing a unique
+     * identifier for that record. {@code <idLabel>} determines the label name. The label value
+     * is an opaque string value. This is useful if the the output from this sink is being read
+     * by another Dataflow source, in which case PubsubIO.Read.idLabel can be used to ensure that
+     * the other source reads these ids from the appropriate label.
+     */
+    public static Bound idLabel(String idLabel) {
+      return new Bound().idLabel(idLabel);
+    }
+
     /**
      * A PTransfrom that writes a unbounded {@code PCollection<String>}
      * to a PubSub stream.
@@ -287,23 +414,51 @@ public static class Bound
         extends PTransform<PCollection<String>, PDone> {
       /** The Pubsub topic to publish to. */
       String topic;
+      String timestampLabel;
+      String idLabel;
 
       Bound() {}
 
-      Bound(String name, String topic) {
+      Bound(String name, String topic, String timestampLabel, String idLabel) {
         super(name);
         if (topic != null) {
           Validator.validateTopicName(topic);
           this.topic = topic;
         }
+        this.timestampLabel = timestampLabel;
+        this.idLabel = idLabel;
       }
 
+      /**
+       * Returns a new TextIO.Write PTransform that's like this one but with the given step
+       * name. Does not modify the object.
+       */
       public Bound named(String name) {
-        return new Bound(name, topic);
+        return new Bound(name, topic, timestampLabel, idLabel);
       }
 
+      /**
+       * Returns a new TextIO.Write PTransform that's like this one but writing to the given
+       * topic. Does not modify the object.
+       */
       public Bound topic(String topic) {
-        return new Bound(name, topic);
+        return new Bound(name, topic, timestampLabel, idLabel);
+      }
+
+      /**
+       * Returns a new TextIO.Write PTransform that's like this one but publishing timestamps
+       * to the given PubSub label. Does not modify the object.
+       */
+      public Bound timestampLabel(String timestampLabel) {
+        return new Bound(name, topic, timestampLabel, idLabel);
+      }
+
+      /**
+       * Returns a new TextIO.Write PTransform that's like this one but publishing record ids
+       * to the given PubSub label. Does not modify the object.
+       */
+     public Bound idLabel(String idLabel) {
+        return new Bound(name, topic, timestampLabel, idLabel);
       }
 
       @Override
@@ -327,6 +482,14 @@ public String getTopic() {
         return topic;
       }
 
+      public String getTimestampLabel() {
+        return timestampLabel;
+      }
+
+      public String getIdLabel() {
+        return idLabel;
+      }
+
       static {
         // TODO: Figure out how to make this work under
         // DirectPipelineRunner.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
index 706397bddd37b..591fe8c79de89 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
@@ -57,6 +57,13 @@ private void translateReadHelper(
       if (transform.getSubscription() != null) {
         context.addInput(PropertyNames.PUBSUB_SUBSCRIPTION, transform.getSubscription());
       }
+      if (transform.getTimestampLabel() != null) {
+        context.addInput(PropertyNames.PUBSUB_TIMESTAMP_LABEL, transform.getTimestampLabel());
+      }
+      context.addInput(PropertyNames.PUBSUB_DROP_LATE_DATA, transform.getDropLateData());
+      if (transform.getIdLabel() != null) {
+        context.addInput(PropertyNames.PUBSUB_ID_LABEL, transform.getIdLabel());
+      }
       context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
       // TODO: Orderedness?
     }
@@ -83,6 +90,12 @@ private void translateWriteHelper(
       context.addStep(transform, "ParallelWrite");
       context.addInput(PropertyNames.FORMAT, "pubsub");
       context.addInput(PropertyNames.PUBSUB_TOPIC, transform.getTopic());
+      if (transform.getTimestampLabel() != null) {
+        context.addInput(PropertyNames.PUBSUB_TIMESTAMP_LABEL, transform.getTimestampLabel());
+      }
+      if (transform.getIdLabel() != null) {
+        context.addInput(PropertyNames.PUBSUB_ID_LABEL, transform.getIdLabel());
+      }
       context.addEncodingInput(
           WindowedValue.getValueOnlyCoder(transform.getInput().getCoder()));
       context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
index 0afe5ae411901..a22f7893a334c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
@@ -68,7 +68,10 @@ public class PropertyNames {
   public static final String OUTPUT_NAME = "output_name";
   public static final String PARALLEL_INPUT = "parallel_input";
   public static final String PHASE = "phase";
+  public static final String PUBSUB_DROP_LATE_DATA = "pubsub_drop_late_data";
+  public static final String PUBSUB_ID_LABEL = "pubsub_id_label";
   public static final String PUBSUB_SUBSCRIPTION = "pubsub_subscription";
+  public static final String PUBSUB_TIMESTAMP_LABEL = "pubsub_timestamp_label";
   public static final String PUBSUB_TOPIC = "pubsub_topic";
   public static final String SCALAR_FIELD_NAME = "value";
   public static final String SERIALIZED_FN = "serialized_fn";

From 774c368edd60e6b3cdcd39ee6aa455b7ac7dcdf8 Mon Sep 17 00:00:00 2001
From: relax <relax@google.com>
Date: Mon, 2 Feb 2015 15:34:06 -0800
Subject: [PATCH 0114/1541] Automated rollback of a previous change.

*** Original change description ***

Allows users of PubsubIO to specify which pubsub labels are used to propagate record timestamps and record ids. Strictly SDK changes, cannot yet be used.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85370721
---
 .../cloud/dataflow/sdk/io/PubsubIO.java       | 181 +-----------------
 .../runners/dataflow/PubsubIOTranslator.java  |  13 --
 .../dataflow/sdk/util/PropertyNames.java      |   3 -
 3 files changed, 9 insertions(+), 188 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 060844699b550..4a379920249e7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -153,10 +153,6 @@ public static Bound named(String name) {
      * <li>Must end with a letter or a number.</li>
      * <li>Cannot begin with 'goog' prefix.</li>
      * </ul>
-     *
-     * Dataflow will start reading data published on this topic from the time the pipeline is
-     * started. Any data published on the topic before the pipeline is started will not be read
-     * by Dataflow.
      */
     public static Bound topic(String topic) {
       return new Bound().topic(topic);
@@ -184,46 +180,6 @@ public static Bound subscription(String subscription) {
       return new Bound().subscription(subscription);
     }
 
-    /**
-     * Creates and returns a PubsubIO.Read PTransform where record timestamps are expected
-     * to be provided using the PubSub labeling API. The {@code <timestampLabel>} parameter
-     * specifies the label name. The label value sent to PubsSub is a numerical value representing
-     * the number of milliseconds since the Unix epoch. For example, if using the joda time classes,
-     * org.joda.time.Instant.getMillis() returns the correct value for this label.
-     *
-     * When this feature is used,
-     *
-     * If {@code <timestampLabel>} is not provided, the system will generate record timestamps the
-     * first time it sees each record. All windowing will be done relative to these timestamps.
-     * Windows are closed based on an estimate of when this source has finished producing data for
-     * a timestamp range, which means that late data can arrive after a window has been closed. The
-     * {#dropLateData} field allows you to control what to do with late data.
-     */
-    public static Bound timestampLabel(String timestampLabel) {
-      return new Bound().timestampLabel(timestampLabel);
-    }
-
-    /**
-     * If true, then late-arriving data from this source will be dropped.
-     */
-    public static Bound dropLateData(boolean dropLateData) {
-      return new Bound().dropLateData(dropLateData);
-    }
-
-    /**
-     * Creates and returns a PubSubIO.Read PTransform where unique record identifiers are
-     * expected to be provided using the PubSub labeling API. The {@code <idLabel>} parameter
-     * specifies the label name. The label value sent to PubSub can be any string value that
-     * uniquely identifies this record.
-     *
-     * if idLabel is not provided, Dataflow cannot guarantee that no duplicate data will be
-     * delivered on the PubSub stream. In this case,  deduplication of the stream will be
-     * stricly best effort.
-     */
-    public static Bound idLabel(String idLabel) {
-      return new Bound().idLabel(idLabel);
-    }
-
     /**
      * A PTransform that reads from a PubSub source and returns
      * a unbounded PCollection containing the items from the stream.
@@ -235,18 +191,10 @@ public static class Bound
       String topic;
       /** The Pubsub subscription to read from. */
       String subscription;
-      /** The Pubsub label to read timestamps from. */
-      String timestampLabel;
-      Boolean dropLateData;
-      /** The Pubsub label to read ids from. */
-      String idLabel;
-
-      Bound() {
-        this.dropLateData = true;
-      }
 
-      Bound(String name, String subscription, String topic, String timestampLabel,
-          boolean dropLateData, String idLabel) {
+      Bound() {}
+
+      Bound(String name, String subscription, String topic) {
         super(name);
         if (subscription != null) {
           Validator.validateSubscriptionName(subscription);
@@ -256,57 +204,18 @@ public static class Bound
         }
         this.subscription = subscription;
         this.topic = topic;
-        this.timestampLabel = timestampLabel;
-        this.dropLateData = dropLateData;
-        this.idLabel = idLabel;
       }
 
-      /**
-       * Returns a new TextIO.Read PTransform that's like this one but with the given
-       * step name. Does not modify the object.
-       */
       public Bound named(String name) {
-        return new Bound(name, subscription, topic, timestampLabel, dropLateData, idLabel);
+        return new Bound(name, subscription, topic);
       }
 
-      /**
-       * Returns a new TextIO.Read PTransform that's like this one but reading from the
-       * given subscription. Does not modify the object.
-       */
       public Bound subscription(String subscription) {
-        return new Bound(name, subscription, topic, timestampLabel, dropLateData, idLabel);
+        return new Bound(name, subscription, topic);
       }
 
-      /**
-       * Returns a new TextIO.Read PTransform that's like this one but reading from the
-       * give topic. Does not modify the object.
-       */
       public Bound topic(String topic) {
-        return new Bound(name, subscription, topic, timestampLabel, dropLateData, idLabel);
-      }
-
-      /**
-       * Returns a new TextIO.Read PTransform that's like this one but reading timestamps
-       * from the given PubSub label. Does not modify the object.
-       */
-      public Bound timestampLabel(String timestampLabel) {
-        return new Bound(name, subscription, topic, timestampLabel, dropLateData, idLabel);
-      }
-
-      /**
-       * Returns a new TextIO.Read PTransform that's like this one but with the specified
-       * setting for dropLateData. Does not modify the object.
-       */
-      public Bound dropLateData(boolean dropLateData) {
-        return new Bound(name, subscription, topic, timestampLabel, dropLateData, idLabel);
-      }
-
-      /**
-       * Returns a new TextIO.Read PTransform that's like this one but reading unique ids
-       * from the given PubSub label. Does not modify the object.
-       */
-      public Bound idLabel(String idLabel) {
-        return new Bound(name, subscription, topic, timestampLabel, dropLateData, idLabel);
+        return new Bound(name, subscription, topic);
       }
 
       @Override
@@ -341,18 +250,6 @@ public String getSubscription() {
         return subscription;
       }
 
-      public String getTimestampLabel() {
-        return timestampLabel;
-      }
-
-      public boolean getDropLateData() {
-        return dropLateData;
-      }
-
-      public String getIdLabel() {
-        return idLabel;
-      }
-
       static {
         // TODO: Figure out how to make this work under
         // DirectPipelineRunner.
@@ -381,30 +278,6 @@ public static Bound topic(String topic) {
       return new Bound().topic(topic);
     }
 
-    /**
-     * If specified, Dataflow will add a Pubsub label to each output record specifying the logical
-     * timestamp of the record. {@code <timestampLabel>} determines the label name. The label value
-     * is a numerical value representing the number of milliseconds since the Unix epoch. For
-     * example, if using the joda time classes, the org.joda.time.Instant(long) constructor can be
-     * used to parse this value. If the output from this sink is being read by another Dataflow
-     * source, then PubsubIO.Read.timestampLabel can be used to ensure that the other source reads
-     * these timestamps from the appropriate label.
-     */
-    public static Bound timestampLabel(String timestampLabel) {
-      return new Bound().timestampLabel(timestampLabel);
-    }
-
-    /**
-     * If specified, Dataflow will add a Pubsub label to each output record containing a unique
-     * identifier for that record. {@code <idLabel>} determines the label name. The label value
-     * is an opaque string value. This is useful if the the output from this sink is being read
-     * by another Dataflow source, in which case PubsubIO.Read.idLabel can be used to ensure that
-     * the other source reads these ids from the appropriate label.
-     */
-    public static Bound idLabel(String idLabel) {
-      return new Bound().idLabel(idLabel);
-    }
-
     /**
      * A PTransfrom that writes a unbounded {@code PCollection<String>}
      * to a PubSub stream.
@@ -414,51 +287,23 @@ public static class Bound
         extends PTransform<PCollection<String>, PDone> {
       /** The Pubsub topic to publish to. */
       String topic;
-      String timestampLabel;
-      String idLabel;
 
       Bound() {}
 
-      Bound(String name, String topic, String timestampLabel, String idLabel) {
+      Bound(String name, String topic) {
         super(name);
         if (topic != null) {
           Validator.validateTopicName(topic);
           this.topic = topic;
         }
-        this.timestampLabel = timestampLabel;
-        this.idLabel = idLabel;
       }
 
-      /**
-       * Returns a new TextIO.Write PTransform that's like this one but with the given step
-       * name. Does not modify the object.
-       */
       public Bound named(String name) {
-        return new Bound(name, topic, timestampLabel, idLabel);
+        return new Bound(name, topic);
       }
 
-      /**
-       * Returns a new TextIO.Write PTransform that's like this one but writing to the given
-       * topic. Does not modify the object.
-       */
       public Bound topic(String topic) {
-        return new Bound(name, topic, timestampLabel, idLabel);
-      }
-
-      /**
-       * Returns a new TextIO.Write PTransform that's like this one but publishing timestamps
-       * to the given PubSub label. Does not modify the object.
-       */
-      public Bound timestampLabel(String timestampLabel) {
-        return new Bound(name, topic, timestampLabel, idLabel);
-      }
-
-      /**
-       * Returns a new TextIO.Write PTransform that's like this one but publishing record ids
-       * to the given PubSub label. Does not modify the object.
-       */
-     public Bound idLabel(String idLabel) {
-        return new Bound(name, topic, timestampLabel, idLabel);
+        return new Bound(name, topic);
       }
 
       @Override
@@ -482,14 +327,6 @@ public String getTopic() {
         return topic;
       }
 
-      public String getTimestampLabel() {
-        return timestampLabel;
-      }
-
-      public String getIdLabel() {
-        return idLabel;
-      }
-
       static {
         // TODO: Figure out how to make this work under
         // DirectPipelineRunner.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
index 591fe8c79de89..706397bddd37b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
@@ -57,13 +57,6 @@ private void translateReadHelper(
       if (transform.getSubscription() != null) {
         context.addInput(PropertyNames.PUBSUB_SUBSCRIPTION, transform.getSubscription());
       }
-      if (transform.getTimestampLabel() != null) {
-        context.addInput(PropertyNames.PUBSUB_TIMESTAMP_LABEL, transform.getTimestampLabel());
-      }
-      context.addInput(PropertyNames.PUBSUB_DROP_LATE_DATA, transform.getDropLateData());
-      if (transform.getIdLabel() != null) {
-        context.addInput(PropertyNames.PUBSUB_ID_LABEL, transform.getIdLabel());
-      }
       context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
       // TODO: Orderedness?
     }
@@ -90,12 +83,6 @@ private void translateWriteHelper(
       context.addStep(transform, "ParallelWrite");
       context.addInput(PropertyNames.FORMAT, "pubsub");
       context.addInput(PropertyNames.PUBSUB_TOPIC, transform.getTopic());
-      if (transform.getTimestampLabel() != null) {
-        context.addInput(PropertyNames.PUBSUB_TIMESTAMP_LABEL, transform.getTimestampLabel());
-      }
-      if (transform.getIdLabel() != null) {
-        context.addInput(PropertyNames.PUBSUB_ID_LABEL, transform.getIdLabel());
-      }
       context.addEncodingInput(
           WindowedValue.getValueOnlyCoder(transform.getInput().getCoder()));
       context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
index a22f7893a334c..0afe5ae411901 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
@@ -68,10 +68,7 @@ public class PropertyNames {
   public static final String OUTPUT_NAME = "output_name";
   public static final String PARALLEL_INPUT = "parallel_input";
   public static final String PHASE = "phase";
-  public static final String PUBSUB_DROP_LATE_DATA = "pubsub_drop_late_data";
-  public static final String PUBSUB_ID_LABEL = "pubsub_id_label";
   public static final String PUBSUB_SUBSCRIPTION = "pubsub_subscription";
-  public static final String PUBSUB_TIMESTAMP_LABEL = "pubsub_timestamp_label";
   public static final String PUBSUB_TOPIC = "pubsub_topic";
   public static final String SCALAR_FIELD_NAME = "value";
   public static final String SERIALIZED_FN = "serialized_fn";

From d692b7c21dc8993b43ab4086ae41b9f9ed9604fe Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 2 Feb 2015 17:15:29 -0800
Subject: [PATCH 0115/1541] Adds protobuf for communicating with Windmill
 service.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85379329
---
 sdk/src/main/proto/windmill.proto | 214 ++++++++++++++++++++++++++++++
 1 file changed, 214 insertions(+)
 create mode 100644 sdk/src/main/proto/windmill.proto

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
new file mode 100644
index 0000000000000..88aa796417143
--- /dev/null
+++ b/sdk/src/main/proto/windmill.proto
@@ -0,0 +1,214 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/*
+ * Protocol Buffers describing the interface between streaming Dataflow workers
+ * and the Windmill servers.
+ */
+
+syntax = "proto2";
+
+package windmill;
+
+option java_package = "com.google.cloud.dataflow.sdk.runners.worker.windmill";
+option java_outer_classname = "Windmill";
+
+////////////////////////////////////////////////////////////////////////////////
+// API Data types
+
+message Message {
+  required int64 timestamp = 1 [default=-0x8000000000000000];
+  required bytes data = 2;
+  optional bytes metadata = 3;
+}
+
+message Timer {
+  required bytes tag = 1;
+  optional int64 timestamp = 2 [default=-0x8000000000000000];
+}
+
+message InputMessageBundle {
+  required string source_computation_id = 1;
+  repeated Message messages = 2;
+}
+
+message KeyedMessageBundle {
+  required bytes key = 1;
+  repeated Message messages = 2;
+}
+
+message OutputMessageBundle {
+  optional string destination_computation_id = 1;
+  optional string destination_stream_id = 3;
+  repeated KeyedMessageBundle bundles = 2;
+}
+
+message PubSubMessageBundle {
+  required string topic = 1;
+  repeated Message messages = 2;
+}
+
+message TimerBundle {
+  repeated Timer timers = 1;
+}
+
+message Value {
+  required int64 timestamp = 1 [default=-0x8000000000000000];
+  required bytes data = 2;
+}
+
+message TagValue {
+  required bytes tag = 1;
+  optional Value value = 2;
+}
+
+message TagList {
+  required bytes tag = 1;
+  optional int64 end_timestamp = 2 [default=-0x8000000000000000];
+  repeated Value values = 3;
+}
+
+message WorkItem {
+  required bytes key = 1;
+  required fixed64 work_token = 2;
+
+  repeated InputMessageBundle message_bundles = 3;
+  optional TimerBundle timers = 4;
+}
+
+message ComputationWorkItems {
+  required string computation_id = 1;
+  repeated WorkItem work = 2;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// API calls
+
+// GetWork
+
+message GetWorkRequest {
+  required fixed64 client_id = 1;
+  optional int64 max_items = 2 [default = 0xffffffff];
+}
+
+message GetWorkResponse {
+  repeated ComputationWorkItems work = 1;
+}
+
+// GetData
+
+message KeyedGetDataRequest {
+  required bytes key = 1;
+  required fixed64 work_token = 2;
+  repeated TagValue values_to_fetch = 3;
+  repeated TagList lists_to_fetch = 4;
+}
+
+message ComputationGetDataRequest {
+  required string computation_id = 1;
+  repeated KeyedGetDataRequest requests = 2;
+}
+
+message GetDataRequest {
+  repeated ComputationGetDataRequest requests = 1;
+}
+
+message KeyedGetDataResponse {
+  required bytes key = 1;
+  // The response for this key is not populated due to the fetch failing.
+  optional bool failed = 2;
+  repeated TagValue values = 3;
+  repeated TagList lists = 4;
+}
+
+message ComputationGetDataResponse {
+  required string computation_id = 1;
+  repeated KeyedGetDataResponse data = 2;
+}
+
+message GetDataResponse {
+  repeated ComputationGetDataResponse data = 1;
+}
+
+// CommitWork
+
+message Counter {
+  optional string name = 1;
+  enum Kind {
+    SUM = 0;
+    MAX = 1;
+    MIN = 2;
+    MEAN = 3;
+  };
+  optional Kind kind = 2;
+
+  // For SUM, MAX, MIN, AND, OR, MEAN at most one of the following should be
+  // set.  For MEAN it is the sum
+  optional double double_scalar = 3;
+  optional int64 int_scalar = 4;
+
+  // Only set for MEAN. Count of elements contributing to the sum.
+  optional int64 mean_count = 6;
+}
+
+// next id: 9
+message WorkItemCommitRequest {
+  required bytes key = 1;
+  required fixed64 work_token = 2;
+  repeated OutputMessageBundle output_messages = 3;
+  repeated PubSubMessageBundle pubsub_messages = 7;
+  repeated Timer output_timers = 4;
+  repeated TagValue value_updates = 5;
+  repeated TagList list_updates = 6;
+  repeated Counter counter_updates = 8;
+}
+
+message ComputationCommitWorkRequest {
+  required string computation_id = 1;
+  repeated WorkItemCommitRequest requests = 2;
+}
+
+message CommitWorkRequest {
+  repeated ComputationCommitWorkRequest requests = 1;
+}
+
+message CommitWorkResponse {}
+
+// Configuration
+
+message GetConfigRequest {
+  repeated string computations = 1;
+}
+
+message GetConfigResponse {
+  repeated string cloud_works = 1;
+}
+
+// Reporting
+
+message Exception {
+  repeated string stack_frames = 1;
+  optional Exception cause = 2;
+}
+
+message ReportStatsRequest {
+  required string computation_id = 1;
+  required bytes key = 2;
+  required fixed64 work_token = 3;
+  repeated Exception exceptions = 4;
+}
+
+message ReportStatsResponse {}

From a46c40d27b25e11e64cf4df687747ae747cea8b4 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Mon, 2 Feb 2015 17:41:44 -0800
Subject: [PATCH 0116/1541] Updated DataStoreWordCount so that it can be
 adapted as an integration test for DataStoreIO.

Also updated  BasicSerializableSourceFormat.serializeToCloudSource() to not fail when estimated size calculation fails.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85381606
---
 .../dataflow/examples/DatastoreWordCount.java | 58 +++++++++++++------
 .../cloud/dataflow/sdk/io/DatastoreIO.java    | 12 ++--
 .../BasicSerializableSourceFormat.java        | 16 ++++-
 3 files changed, 61 insertions(+), 25 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
index ab31aae15ab76..ec9d864eefe15 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.examples;
 
+import com.google.api.services.datastore.DatastoreV1;
 import com.google.api.services.datastore.DatastoreV1.Entity;
 import com.google.api.services.datastore.DatastoreV1.Key;
 import com.google.api.services.datastore.DatastoreV1.Property;
@@ -30,6 +31,8 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 
@@ -75,7 +78,10 @@ static class GetContentFn extends DoFn<Entity, String> {
     @Override
     public void processElement(ProcessContext c) {
       Map<String, Value> props = DatastoreHelper.getPropertyMap(c.element());
-      c.output(DatastoreHelper.getString(props.get("content")));
+      DatastoreV1.Value value = props.get("content");
+      if (value != null) {
+        c.output(DatastoreHelper.getString(value));
+      }
     }
   }
 
@@ -94,9 +100,9 @@ public Entity makeEntity(String content) {
       // Create entities with same ancestor Key.
       Key ancestorKey = DatastoreHelper.makeKey(kind, "root").build();
       Key key = DatastoreHelper.makeKey(ancestorKey, kind).build();
+
       entityBuilder.setKey(key);
-      entityBuilder.addProperty(Property.newBuilder()
-          .setName("content")
+      entityBuilder.addProperty(Property.newBuilder().setName("content")
           .setValue(Value.newBuilder().setStringValue(content)));
       return entityBuilder.build();
     }
@@ -112,7 +118,7 @@ public void processElement(ProcessContext c) {
    * <p>
    * Inherits standard configuration options.
    */
-  private static interface Options extends PipelineOptions {
+  public static interface Options extends PipelineOptions {
     @Description("Path of the file to read from and store to Datastore")
     @Default.String("gs://dataflow-samples/shakespeare/kinglear.txt")
     String getInput();
@@ -136,6 +142,11 @@ private static interface Options extends PipelineOptions {
     @Description("Read an existing dataset, do not write first")
     boolean isReadOnly();
     void setReadOnly(boolean value);
+
+    @Description("Number of output shards")
+    @Default.Integer(0) // If the system should choose automatically.
+    int getNumShards();
+    void setNumShards(int value);
   }
 
   /**
@@ -143,14 +154,23 @@ private static interface Options extends PipelineOptions {
    * text input.  Forces use of DirectPipelineRunner for local execution mode.
    */
   public static void writeDataToDatastore(Options options) {
-    // Runs locally via DirectPiplineRunner, as writing is not yet implemented
-    // for the other runners which is why we just create a PipelineOptions with defaults.
-    Pipeline p = Pipeline.create(PipelineOptionsFactory.create());
-    p.apply(TextIO.Read.named("ReadLines").from(options.getInput()))
-     .apply(ParDo.of(new CreateEntityFn(options.getKind())))
-     .apply(DatastoreIO.write().to(options.getDataset()));
-
-    p.run();
+    // Storing the user-specified runner.
+    Class<? extends PipelineRunner<?>> tempRunner = options.getRunner();
+
+    try {
+      // Runs locally via DirectPiplineRunner, as writing is not yet implemented
+      // for the other runners.
+      options.setRunner(DirectPipelineRunner.class);
+      Pipeline p = Pipeline.create(options);
+      p.apply(TextIO.Read.named("ReadLines").from(options.getInput()))
+       .apply(ParDo.of(new CreateEntityFn(options.getKind())))
+       .apply(DatastoreIO.write().to(options.getDataset()));
+
+      p.run();
+    } finally {
+      // Resetting the runner to the user specified class.
+      options.setRunner(tempRunner);
+    }
   }
 
   /**
@@ -163,12 +183,12 @@ public static void readDataFromDatastore(Options options) {
     Query query = q.build();
 
     Pipeline p = Pipeline.create(options);
-    p.apply(DatastoreIO.readFrom(options.getDataset(), query)
-        .named("ReadShakespeareFromDatastore"))
-        .apply(ParDo.of(new GetContentFn()))
-        .apply(new WordCount.CountWords())
-        .apply(TextIO.Write.named("WriteLines").to(options.getOutput()));
-
+    p.apply(DatastoreIO.readFrom(options.getDataset(), query).named("ReadShakespeareFromDatastore"))
+     .apply(ParDo.of(new GetContentFn()))
+     .apply(new WordCount.CountWords())
+     .apply(TextIO.Write.named("WriteLines")
+                        .to(options.getOutput())
+                        .withNumShards(options.getNumShards()));
     p.run();
   }
 
@@ -185,7 +205,7 @@ public static void main(String args[]) {
 
     if (!options.isReadOnly()) {
       // First example: write data to Datastore for reading later.
-      // Note: this will insert new entries with the given kind.  Existing entries
+      // Note: this will insert new entries with the given kind. Existing entries
       // should be cleared first, or the final counts will contain duplicates.
       // The Datastore Admin tool in the AppEngine console can be used to erase
       // all entries with a particular kind.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 6f3e3b8228a6e..4901ca30ead82 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -136,7 +136,13 @@
 
 public class DatastoreIO {
   private static final Logger LOG = LoggerFactory.getLogger(DatastoreIO.class);
-  private static final String DEFAULT_HOST = "https://www.googleapis.com";
+  public static final String DEFAULT_HOST = "https://www.googleapis.com";
+
+  /**
+   * Datastore has a limit of 500 mutations per batch operation, so we flush
+   * changes to Datastore every 500 entities.
+   */
+  public static final int DATASTORE_BATCH_UPDATE_LIMIT = 500;
 
   /**
    * Returns an empty {@code DatastoreIO.Read} builder with the default host.
@@ -484,9 +490,7 @@ private static void evaluateWriteHelper(
       List<Entity> toInsert = new ArrayList<>();
       for (Entity e : entitiesWithSameAncestor) {
         toInsert.add(e);
-        // Note that Datastore has limit as 500 for a batch operation,
-        // so just flush to Datastore with every 500 entties.
-        if (toInsert.size() >= 500) {
+        if (toInsert.size() >= DATASTORE_BATCH_UPDATE_LIMIT) {
           writeBatch(toInsert, datastore);
           toInsert.clear();
         }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index cb3f4c237e7d3..4f723e66e7315 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -47,6 +47,9 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
@@ -54,7 +57,7 @@
 import java.util.NoSuchElementException;
 
 /**
- * An helper class for supporting sources defined as {@code Source}.
+ * A helper class for supporting sources defined as {@code Source}.
  *
  * Provides a bridge between the high-level {@code Source} API and the raw
  * API-level {@code SourceFormat} API, by encoding the serialized
@@ -65,6 +68,8 @@ public class BasicSerializableSourceFormat implements SourceFormat {
   private static final String SERIALIZED_SOURCE = "serialized_source";
   private static final long DEFAULT_DESIRED_SHARD_SIZE_BYTES = 64 * (1 << 20);
 
+  private static final Logger LOG = LoggerFactory.getLogger(BasicSerializableSourceFormat.class);
+
   private final PipelineOptions options;
 
   public BasicSerializableSourceFormat(PipelineOptions options) {
@@ -163,7 +168,14 @@ private static com.google.api.services.dataflow.model.Source serializeToCloudSou
 
     SourceMetadata metadata = new SourceMetadata();
     metadata.setProducesSortedKeys(source.producesSortedKeys(options));
-    metadata.setEstimatedSizeBytes(source.getEstimatedSizeBytes(options));
+
+    // Size estimation is best effort so we continue even if it fails here.
+    try {
+      metadata.setEstimatedSizeBytes(source.getEstimatedSizeBytes(options));
+    } catch (Exception e) {
+      LOG.warn("Size estimation of the source failed.", e);
+    }
+
     cloudSource.setMetadata(metadata);
     return cloudSource;
   }

From 1848854fc8afaa457b7a832cd9999f26b93d76d5 Mon Sep 17 00:00:00 2001
From: wan <wan@google.com>
Date: Tue, 3 Feb 2015 09:47:53 -0800
Subject: [PATCH 0117/1541] Moves class GlobalWindows.GlobalWindow to top-level
 for consistency with other window types.  This change is
 backward-incompatible but unlikely to affect many users.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85434463
---
 .../transforms/windowing/GlobalWindow.java    | 61 +++++++++++++++++++
 .../transforms/windowing/GlobalWindows.java   | 43 +------------
 .../dataflow/sdk/util/WindowedValue.java      |  4 +-
 .../sdk/runners/worker/ShuffleSinkTest.java   |  3 +-
 4 files changed, 66 insertions(+), 45 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
new file mode 100644
index 0000000000000..97c82eeac5b76
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+
+import org.joda.time.Instant;
+
+import java.io.InputStream;
+import java.io.OutputStream;
+
+/**
+ * The default window into which all data is placed (via {@link GlobalWindows}).
+ */
+public class GlobalWindow extends BoundedWindow {
+  public static final GlobalWindow INSTANCE = new GlobalWindow();
+
+  @Override
+  public Instant maxTimestamp() {
+    return new Instant(Long.MAX_VALUE);
+  }
+
+  private GlobalWindow() {}
+
+  /**
+   * {@link Coder} for encoding and decoding {@code Window}s.
+   */
+  public static class Coder extends AtomicCoder<GlobalWindow> {
+    public static final Coder INSTANCE = new Coder();
+
+    @Override
+      public void encode(GlobalWindow window, OutputStream outStream, Context context) {}
+
+    @Override
+      public GlobalWindow decode(InputStream inStream, Context context) {
+      return GlobalWindow.INSTANCE;
+    }
+
+    @Override
+      public boolean isDeterministic() {
+      return true;
+    }
+
+    private Coder() {}
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
index d4858b85ba5c7..c52ec92a9d3b2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
@@ -16,13 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 
-import org.joda.time.Instant;
-
-import java.io.InputStream;
-import java.io.OutputStream;
 import java.util.Arrays;
 import java.util.Collection;
 
@@ -31,7 +26,7 @@
  */
 @SuppressWarnings("serial")
 public class GlobalWindows
-    extends NonMergingWindowFn<Object, GlobalWindows.GlobalWindow> {
+    extends NonMergingWindowFn<Object, GlobalWindow> {
   @Override
   public Collection<GlobalWindow> assignWindows(AssignContext c) {
     return Arrays.asList(GlobalWindow.INSTANCE);
@@ -46,40 +41,4 @@ public boolean isCompatible(WindowFn o) {
   public Coder<GlobalWindow> windowCoder() {
     return GlobalWindow.Coder.INSTANCE;
   }
-
-  /**
-   * The default window into which all data is placed.
-   */
-  public static class GlobalWindow extends BoundedWindow {
-    public static final GlobalWindow INSTANCE = new GlobalWindow();
-
-    @Override
-    public Instant maxTimestamp() {
-      return new Instant(Long.MAX_VALUE);
-    }
-
-    private GlobalWindow() {}
-
-    /**
-     * {@link Coder} for encoding and decoding {@code Window}s.
-     */
-    public static class Coder extends AtomicCoder<GlobalWindow> {
-      public static final Coder INSTANCE = new Coder();
-
-      @Override
-      public void encode(GlobalWindow window, OutputStream outStream, Context context) {}
-
-      @Override
-      public GlobalWindow decode(InputStream inStream, Context context) {
-        return GlobalWindow.INSTANCE;
-      }
-
-      @Override
-      public boolean isDeterministic() {
-        return true;
-      }
-
-      private Coder() {}
-    }
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index 0562b58b35c50..4897a3e10c229 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -26,7 +26,7 @@
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.coders.StandardCoder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
@@ -72,7 +72,7 @@ public static <V> WindowedValue<V> of(
   public static <V> WindowedValue<V> valueInGlobalWindow(V value) {
     return new WindowedValue<>(value,
                                new Instant(Long.MIN_VALUE),
-                               Arrays.asList(GlobalWindows.GlobalWindow.INSTANCE));
+                               Arrays.asList(GlobalWindow.INSTANCE));
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
index b58fc38c8f46a..806588a8a3a81 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
@@ -107,7 +108,7 @@ private void runTestWriteUngroupingShuffleSink(List<Integer> expected)
       // Ignore the key.
       byte[] valueBytes = record.getValue();
       WindowedValue<Integer> value = CoderUtils.decodeFromByteArray(windowedValueCoder, valueBytes);
-      Assert.assertEquals(Lists.newArrayList(GlobalWindows.GlobalWindow.INSTANCE),
+      Assert.assertEquals(Lists.newArrayList(GlobalWindow.INSTANCE),
                           value.getWindows());
       actual.add(value.getValue());
     }

From 56e23990d8fb4443464388576b86b963e2557601 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Tue, 3 Feb 2015 10:58:25 -0800
Subject: [PATCH 0118/1541] Updates to Dataflow README on GitHub.

* Fixes the problem with DirectPipelineRunner and GCS locations.
* Adds a note about Maven on Windows.
* Explains how to run a bundled JAR.

This fixes #6 on GitHub.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85441294
---
 README.md | 72 ++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 50 insertions(+), 22 deletions(-)

diff --git a/README.md b/README.md
index 3e6bb3136a548..3b7869190e18d 100644
--- a/README.md
+++ b/README.md
@@ -31,22 +31,26 @@ for execution.
 * [`PipelineRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java):
 specifies where and how the pipeline should execute.
 
-Currently there are three `PipelineRunners`:
+We provide three PipelineRunners:
 
   1. The [`DirectPipelineRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java)
 runs the pipeline on your local machine.
-  2. The
-[`DataflowPipelineRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java)
+  2. The [`DataflowPipelineRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java)
 submits the pipeline to the Dataflow Service, where it runs using managed
-resources in the [Google Cloud Platform](http://cloud.google.com).
+resources in the [Google Cloud Platform](https://cloud.google.com) (GCP).
   3. The
 [`BlockingDataflowPipelineRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java)
 submits the pipeline to the Dataflow Service via the `DataflowPipelineRunner`
-and then prints messages about the job status until execution is complete.
+and then prints messages about the job status until the execution is complete.
 
 _The Dataflow Service is currently in the Alpha phase of development and
 access is limited to whitelisted users._
 
+Additionally, in partnership with [Cloudera](https://www.cloudera.com/), you can
+run Dataflow pipelines on an [Apache Spark](https://spark.apache.org/) backend.
+The relevant runner code is hosted in
+[this](https://github.com/cloudera/spark-dataflow) repository.
+
 ## Getting Started
 
 This repository consists of two modules:
@@ -82,36 +86,60 @@ You can speed up the build and install process by using the following options:
         mvn -T 4 install
 
 After building and installing, you can execute the `WordCount` and other example
-pipelines locally or in the cloud using Maven with command-line options.
-
-To execute the WordCount pipeline locally (using the default
-`DirectPipelineRunner`) and write output to a local or
-Google Cloud Storage (GCS) location, use the following command-line syntax:
+pipelines using the `DirectPipelineRunner` on your local machine:
 
     mvn exec:java -pl examples \
     -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
-    -Dexec.args="--output=[<LOCAL FILE> | gs://<OUTPUT PREFIX>]
+    -Dexec.args="--input=<INPUT FILE PATTERN> --output=<OUTPUT FILE>"
 
 If you have been whitelisted for Alpha access to the Dataflow Service and
 followed the [developer setup](https://cloud.google.com/dataflow/java-sdk/getting-started#DeveloperSetup)
 steps, you can use the `BlockingDataflowPipelineRunner` to execute the
-`WordCount` example in the Google Cloud Platform (GCP). In this case, you
-specify your project name, pipeline runner, the GCS staging location (staging
-location should be entered in the form of `gs://bucket/staging-directory`),
-and the GCS output (in the form of `gs://bucket/filename_prefix`).
+`WordCount` example in the GCP. In this case, you specify your project name,
+pipeline runner, and the staging location in
+[Google Cloud Storage](https://cloud.google.com/storage/) (GCS), as follows:
 
     mvn exec:java -pl examples \
     -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
-    -Dexec.args="--project=<YOUR GCP PROJECT NAME> --runner=BlockingDataflowPipelineRunner \
-    --stagingLocation=<GCS STAGING LOCATION> --output=<GCS OUTPUT PREFIX>"
+    -Dexec.args="--project=<YOUR GCP PROJECT NAME> --stagingLocation=<YOUR GCS LOCATION> --runner=BlockingDataflowPipelineRunner"
+
+GCS location should be entered in the form of
+`gs://bucket/path/to/staging/directory`. GCP project refers to its name (not
+number), which has been whitelisted for Cloud Dataflow. Refer to
+[Google Cloud Platform](https://cloud.google.com/) for general instructions on
+getting started with GCP.
+
+Alternatively, you may choose to bundle all dependencies into a single JAR and
+execute it outside of the Maven environment. For example, after building and
+installing as usual, you can execute the following commands to create the
+bundled JAR of the `Examples` module and execute it both locally and in GCP:
+
+    mvn bundle:bundle -pl examples
 
-Refer to [Google Cloud Platform](https://cloud.google.com/) for general
-instructions on getting started with GCP.
+    java -cp examples/target/google-cloud-dataflow-java-examples-all-bundled-manual_build.jar \
+    com.google.cloud.dataflow.examples.WordCount \
+    --input=<INPUT FILE PATTERN> --output=<OUTPUT FILE>
+
+    java -cp examples/target/google-cloud-dataflow-java-examples-all-bundled-manual_build.jar \
+    com.google.cloud.dataflow.examples.WordCount \
+    --project=<YOUR GCP PROJECT NAME> --stagingLocation=<YOUR GCS LOCATION> --runner=BlockingDataflowPipelineRunner
 
 Other examples can be run similarly by replacing the `WordCount` class name with
-`BigQueryTornadoes`, `DatastoreWordCount`, `TfIdf`, `TopWikipediaSessions`, etc. and
-adjusting runtime options under the `Dexec.args` parameter, as specified in the
-example itself.
+`BigQueryTornadoes`, `DatastoreWordCount`, `TfIdf`, `TopWikipediaSessions`, etc.
+and adjusting runtime options under the `Dexec.args` parameter, as specified in
+the example itself.
+
+Note that when running Maven on Microsoft Windows platform, backslashes (`\`)
+under the `Dexec.args` parameter should be escaped with another backslash. For
+example, input file pattern of `c:\*.txt` should be entered as `c:\\*.txt`.
+
+## Contact Us
+
+We welcome all usage-related questions on [Stack Overflow](http://stackoverflow.com/questions/tagged/google-cloud-dataflow)
+tagged with `google-cloud-dataflow`.
+
+Please use [issue tracker](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/issues)
+on GitHub to report any bugs, comments or questions regarding SDK development.
 
 ## More Information
 

From ffbf31fd08cef56f4b941e028ef89f3d7fb4a2b6 Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Tue, 3 Feb 2015 12:59:28 -0800
Subject: [PATCH 0119/1541] Make test_wordcount.sh script executable by
 changing the file's mode.

---
 test_wordcount.sh | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 test_wordcount.sh

diff --git a/test_wordcount.sh b/test_wordcount.sh
old mode 100644
new mode 100755

From 242b4c3025b74090e574087d54a2ae46d091e9b7 Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Tue, 3 Feb 2015 16:25:37 -0800
Subject: [PATCH 0120/1541] Attempt to fix .travis.yml for OSX

More information available here:
https://github.com/travis-ci/travis-ci/issues/2839
https://jira.codehaus.org/browse/MNG-5658
https://jira.codehaus.org/browse/MNG-5686
https://github.com/Seagate/kinetic-cpp-client/blob/master/.travis.yml
---
 .travis.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index b9b15456657df..c860c7cfe55bb 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -11,6 +11,9 @@ notifications:
     on_success: change
     on_failure: always
 
+before_install:
+  - if [ "$TRAVIS_OS_NAME" == "osx" ]; then export JAVA_HOME=$(/usr/libexec/java_home); fi
+
 install:
   - mvn install clean -U -DskipTests=true
 

From c3627f406e369f5b8070ff8c036481a7c527e2bf Mon Sep 17 00:00:00 2001
From: andersjohnson <andersjohnson@google.com>
Date: Tue, 3 Feb 2015 16:43:34 -0800
Subject: [PATCH 0121/1541] Tweaks to support OSX. [] ------------- Created by
 MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=85474161

---
 test_wordcount.sh | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/test_wordcount.sh b/test_wordcount.sh
index 81e2e2a6baa8c..699489c89b11c 100755
--- a/test_wordcount.sh
+++ b/test_wordcount.sh
@@ -17,7 +17,8 @@
 set -e
 set -o pipefail
 
-cd $(dirname $0)
+MYDIR=$(dirname $0) || exit 2
+cd $MYDIR
 
 TOPDIR="."
 if [[ $# -gt 0 ]]
@@ -33,7 +34,8 @@ function check_result_hash {
   local outfile_prefix=$2
   local expected=$3
 
-  local actual=$(md5sum $outfile_prefix-* | awk '{print $1}')
+  local actual=$(md5sum $outfile_prefix-* | awk '{print $1}' || \
+    md5 -q $outfile_prefix-*) || exit 2  # OSX
   if [[ "$actual" != "$expected" ]]
   then
     echo "FAIL $name: Output hash mismatch.  Got $actual, expected $expected."
@@ -46,7 +48,8 @@ function check_result_hash {
 
 function get_outfile_prefix {
   local name=$1
-  mktemp --tmpdir=/tmp -u "$name.out.XXXXXXXXXX"
+  # NOTE: mktemp on OSX doesn't support --tmpdir
+  mktemp -u "/tmp/$name.out.XXXXXXXXXX"
 }
 
 function run_via_mvn {
@@ -54,7 +57,7 @@ function run_via_mvn {
   local input=$2
   local expected_hash=$3
 
-  local outfile_prefix="$(get_outfile_prefix "$name")"
+  local outfile_prefix="$(get_outfile_prefix "$name")" || exit 2
   local cmd='mvn exec:java -f '"$TOPDIR"'/pom.xml -pl examples \
     -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
     -Dexec.args="--runner=DirectPipelineRunner --input='"$input"' --output='"$outfile_prefix"'"'
@@ -68,7 +71,7 @@ function run_bundled {
   local input=$2
   local expected_hash=$3
 
-  local outfile_prefix="$(get_outfile_prefix "$name")"
+  local outfile_prefix="$(get_outfile_prefix "$name")" || exit 2
   local cmd='java -cp '"$JAR_FILE"' \
     com.google.cloud.dataflow.examples.WordCount \
     --runner=DirectPipelineRunner \

From 95bb0ea26c43502b7e2e35e739f60ad94865ef59 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Tue, 3 Feb 2015 17:38:06 -0800
Subject: [PATCH 0122/1541] Merge the streaming runner code into the main sdk.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85478753
---
 sdk/pom.xml                                   |  18 +
 .../sdk/runners/DataflowPipelineRunner.java   |   2 +-
 .../sdk/runners/worker/PubsubReader.java      |  98 ++
 .../sdk/runners/worker/PubsubSink.java        | 108 +++
 .../sdk/runners/worker/ReaderFactory.java     |  15 +
 .../sdk/runners/worker/SinkFactory.java       |   4 +
 .../worker/StreamingDataflowWorker.java       | 641 ++++++++++++
 .../worker/UngroupedWindmillReader.java       | 116 +++
 .../sdk/runners/worker/WindmillSink.java      | 130 +++
 .../worker/WindowingWindmillReader.java       | 157 +++
 .../worker/windmill/WindmillServerStub.java   |  45 +
 .../sdk/util/BoundedQueueExecutor.java        |  69 ++
 .../cloud/dataflow/sdk/util/StateFetcher.java | 147 +++
 .../util/StreamingModeExecutionContext.java   | 208 ++++
 .../worker/StreamingDataflowWorkerTest.java   | 913 ++++++++++++++++++
 15 files changed, 2670 insertions(+), 1 deletion(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/windmill/WindmillServerStub.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 1b7992b00d382..f269e2bc70f6d 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -200,6 +200,12 @@
       </exclusions>
     </dependency>
 
+    <dependency>
+      <groupId>com.google.cloud.dataflow</groupId>
+      <artifactId>google-cloud-dataflow-java-proto-library-all</artifactId>
+      <version>0.3.150203</version>
+    </dependency>
+
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-bigquery</artifactId>
@@ -359,6 +365,18 @@
       <version>2.4</version>
     </dependency>
 
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-server</artifactId>
+      <version>9.2.6.v20141205</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-jmx</artifactId>
+      <version>9.2.6.v20141205</version>
+    </dependency>
+
     <!-- build dependencies -->
     <dependency>
       <groupId>com.google.auto.service</groupId>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 4da69ac460e5d..86781cd60eff3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -79,7 +79,7 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
   private DataflowPipelineRunnerHooks hooks;
 
   // Environment version information
-  private static final String ENVIRONMENT_MAJOR_VERSION = "0";
+  private static final String ENVIRONMENT_MAJOR_VERSION = "1";
 
   /**
    * Construct a runner from the provided options.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
new file mode 100644
index 0000000000000..b39976f71e651
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
@@ -0,0 +1,98 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.ValueOnlyWindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * A Reader that receives elements from Pubsub, via a Windmill server.
+ */
+class PubsubReader<T> extends Reader<WindowedValue<T>> {
+  private final ValueOnlyWindowedValueCoder coder;
+  private StreamingModeExecutionContext context;
+
+  PubsubReader(Coder<WindowedValue<T>> coder, StreamingModeExecutionContext context) {
+    this.coder = (ValueOnlyWindowedValueCoder) coder;
+    this.context = context;
+  }
+
+  public static <T> PubsubReader<T> create(
+      PipelineOptions options,
+      CloudObject spec,
+      Coder<WindowedValue<T>> coder,
+      ExecutionContext context) {
+    return new PubsubReader<>(coder, (StreamingModeExecutionContext) context);
+  }
+
+  @Override
+  public ReaderIterator<WindowedValue<T>> iterator() throws IOException {
+    return new PubsubReaderIterator();
+  }
+
+  class PubsubReaderIterator extends AbstractReaderIterator<WindowedValue<T>> {
+    private int bundleIndex = 0;
+    private int messageIndex = 0;
+
+    @Override
+    public boolean hasNext() throws IOException {
+      Windmill.WorkItem work = context.getWork();
+      return bundleIndex < work.getMessageBundlesCount() &&
+          messageIndex < work.getMessageBundles(bundleIndex).getMessagesCount();
+    }
+
+    @Override
+    public WindowedValue<T> next() throws IOException {
+      Windmill.Message message =
+          context.getWork().getMessageBundles(bundleIndex).getMessages(messageIndex);
+      if (messageIndex >=
+          context.getWork().getMessageBundles(bundleIndex).getMessagesCount() - 1) {
+        messageIndex = 0;
+        bundleIndex++;
+      } else {
+        messageIndex++;
+      }
+      long timestampMillis = TimeUnit.MICROSECONDS.toMillis(message.getTimestamp());
+      InputStream data = message.getData().newInput();
+      notifyElementRead(data.available());
+      T value = (T) coder.getValueCoder().decode(data, Coder.Context.OUTER);
+      return WindowedValue.of(value,
+                              new Instant(timestampMillis),
+                              Arrays.asList(GlobalWindow.INSTANCE));
+    }
+  }
+
+  @Override
+  public boolean supportsRestart() {
+    return true;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
new file mode 100644
index 0000000000000..e75d9155efee0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
@@ -0,0 +1,108 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+import com.google.protobuf.ByteString;
+
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * A sink that writes to Pubsub, via a Windmill server.
+ *
+ * @param <T> the type of the elements written to the sink
+ */
+class PubsubSink<T> extends Sink<WindowedValue<T>> {
+  private String topic;
+  private Coder<WindowedValue<T>> coder;
+  private StreamingModeExecutionContext context;
+
+  PubsubSink(String topic,
+             Coder<WindowedValue<T>> coder,
+             StreamingModeExecutionContext context) {
+    this.topic = topic;
+    this.coder = coder;
+    this.context = context;
+  }
+
+  public static <T> PubsubSink<T> create(PipelineOptions options,
+                                         CloudObject spec,
+                                         Coder<WindowedValue<T>> coder,
+                                         ExecutionContext context)
+      throws Exception {
+    String topic = getString(spec, "pubsub_topic");
+    return new PubsubSink<>(topic, coder, (StreamingModeExecutionContext) context);
+  }
+
+  @Override
+  public SinkWriter<WindowedValue<T>> writer() {
+    return new PubsubWriter(topic);
+  }
+
+  /** The SinkWriter for a PubsubSink. */
+  class PubsubWriter implements SinkWriter<WindowedValue<T>> {
+    private Windmill.PubSubMessageBundle.Builder outputBuilder;
+
+    private PubsubWriter(String topic) {
+      outputBuilder = Windmill.PubSubMessageBundle.newBuilder().setTopic(topic);
+    }
+
+    private <S> ByteString encode(Coder<S> coder, S object) throws IOException {
+      ByteString.Output stream = ByteString.newOutput();
+      coder.encode(object, stream, Coder.Context.OUTER);
+      return stream.toByteString();
+    }
+
+    @Override
+    public long add(WindowedValue<T> data) throws IOException {
+      ByteString byteString = encode(coder, data);
+
+      long timestampMicros = TimeUnit.MILLISECONDS.toMicros(data.getTimestamp().getMillis());
+      outputBuilder.addMessages(
+          Windmill.Message.newBuilder()
+          .setData(byteString)
+          .setTimestamp(timestampMicros)
+          .build());
+
+      return byteString.size();
+    }
+
+    @Override
+    public void close() throws IOException {
+      Windmill.PubSubMessageBundle pubsubMessages = outputBuilder.build();
+      if (pubsubMessages.getMessagesCount() > 0) {
+        context.getOutputBuilder().addPubsubMessages(pubsubMessages);
+      }
+      outputBuilder.clear();
+    }
+  }
+
+  @Override
+  public boolean supportsRestart() {
+    return true;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
index 5d42970c16000..17c695482ffae 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
@@ -63,6 +63,21 @@ private ReaderFactory() {}
         "GroupingShuffleSource", GroupingShuffleReaderFactory.class.getName());
     predefinedReaderFactories.put("InMemorySource", InMemoryReaderFactory.class.getName());
     predefinedReaderFactories.put("BigQuerySource", BigQueryReaderFactory.class.getName());
+    predefinedReaderFactories.put(
+        "com.google.cloud.dataflow.sdk.runners.worker.BucketingWindmillSource",
+        WindowingWindmillReader.class.getName());
+    predefinedReaderFactories.put(
+        "WindowingWindmillReader", WindowingWindmillReader.class.getName());
+    predefinedReaderFactories.put(
+        "com.google.cloud.dataflow.sdk.runners.worker.UngroupedWindmillSource",
+        UngroupedWindmillReader.class.getName());
+    predefinedReaderFactories.put(
+        "UngroupedWindmillReader", UngroupedWindmillReader.class.getName());
+    predefinedReaderFactories.put(
+        "com.google.cloud.dataflow.sdk.runners.worker.PubsubSource",
+        PubsubReader.class.getName());
+    predefinedReaderFactories.put(
+        "PubsubReader", PubsubReader.class.getName());
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
index 590e7f8151347..363d830dffbd4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
@@ -55,6 +55,10 @@ private SinkFactory() {}
                                 AvroSinkFactory.class.getName());
     predefinedSinkFactories.put("ShuffleSink",
                                 ShuffleSinkFactory.class.getName());
+    predefinedSinkFactories.put("PubsubSink",
+                                PubsubSink.class.getName());
+    predefinedSinkFactories.put("WindmillSink",
+                                WindmillSink.class.getName());
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
new file mode 100644
index 0000000000000..9234d31c7e7fa
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -0,0 +1,641 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.api.services.dataflow.model.MapTask;
+import com.google.api.services.dataflow.model.MetricUpdate;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
+import com.google.cloud.dataflow.sdk.util.BoundedQueueExecutor;
+import com.google.cloud.dataflow.sdk.util.CloudCounterUtils;
+import com.google.cloud.dataflow.sdk.util.StateFetcher;
+import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.cloud.dataflow.sdk.util.UserCodeException;
+import com.google.cloud.dataflow.sdk.util.Values;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
+
+import org.eclipse.jetty.server.Request;
+import org.eclipse.jetty.server.Server;
+import org.eclipse.jetty.server.handler.AbstractHandler;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+/**
+ * Implements a Streaming Dataflow worker.
+ */
+public class StreamingDataflowWorker {
+  private static final Logger LOG = Logger.getLogger(StreamingDataflowWorker.class.getName());
+  static final int MAX_THREAD_POOL_SIZE = 100;
+  static final long THREAD_EXPIRATION_TIME_SEC = 60;
+  static final int MAX_THREAD_POOL_QUEUE_SIZE = 100;
+  static final long MAX_COMMIT_BYTES = 32 << 20;
+  static final int DEFAULT_STATUS_PORT = 8081;
+  // Memory threshold under which no new work will be processed.  Set to 0 to disable pushback.
+  static final double PUSHBACK_THRESHOLD = 0.1;
+  static final String WINDMILL_SERVER_CLASS_NAME =
+      "com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServer";
+
+  /**
+   * Indicates that the key token was invalid when data was attempted to be fetched.
+   */
+  public static class KeyTokenInvalidException extends RuntimeException {
+    public KeyTokenInvalidException(String key) {
+      super("Unable to fetch data due to token mismatch for key " + key);
+    }
+  }
+
+  static MapTask parseMapTask(String input) throws IOException {
+    return Transport.getJsonFactory()
+        .fromString(input, MapTask.class);
+  }
+
+  public static void main(String[] args) throws Exception {
+    LOG.setLevel(Level.INFO);
+    String hostport = System.getProperty("windmill.hostport");
+    if (hostport == null) {
+      throw new Exception("-Dwindmill.hostport must be set to the location of the windmill server");
+    }
+
+    int statusPort = DEFAULT_STATUS_PORT;
+    if (System.getProperties().containsKey("status_port")) {
+      statusPort = Integer.parseInt(System.getProperty("status_port"));
+    }
+
+    ArrayList<MapTask> mapTasks = new ArrayList<>();
+    for (int i = 0; i < args.length; i++) {
+      mapTasks.add(parseMapTask(args[i]));
+    }
+
+    WindmillServerStub windmillServer =
+        (WindmillServerStub) Class.forName(WINDMILL_SERVER_CLASS_NAME)
+        .getDeclaredConstructor(String.class).newInstance(hostport);
+
+    StreamingDataflowWorker worker =
+        new StreamingDataflowWorker(mapTasks, windmillServer);
+    worker.start();
+
+    worker.runStatusServer(statusPort);
+  }
+
+  private ConcurrentMap<String, MapTask> instructionMap;
+  private ConcurrentMap<String, ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest>> outputMap;
+  private ConcurrentMap<String, ConcurrentLinkedQueue<WorkerAndContext>> mapTaskExecutors;
+  private ThreadFactory threadFactory;
+  private BoundedQueueExecutor executor;
+  private WindmillServerStub windmillServer;
+  private Thread dispatchThread;
+  private Thread commitThread;
+  private AtomicBoolean running;
+  private StateFetcher stateFetcher;
+  private DataflowPipelineOptions options;
+  private long clientId;
+  private Server statusServer;
+  private AtomicReference<Throwable> lastException;
+
+  /** Regular constructor. */
+  public StreamingDataflowWorker(
+      List<MapTask> mapTasks, WindmillServerStub server) {
+    initialize(mapTasks, server);
+    options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setAppName("StreamingWorkerHarness");
+    options.setStreaming(true);
+
+    if (System.getProperties().containsKey("path_validator_class")) {
+      try {
+        options.setPathValidatorClass((Class) Class.forName(
+            System.getProperty("path_validator_class")));
+      } catch (ClassNotFoundException e) {
+        throw new RuntimeException("Unable to find validator class", e);
+      }
+    }
+    if (System.getProperties().containsKey("credential_factory_class")) {
+      try {
+        options.setCredentialFactoryClass((Class) Class.forName(
+            System.getProperty("credential_factory_class")));
+      } catch (ClassNotFoundException e) {
+        throw new RuntimeException("Unable to find credential factory class", e);
+      }
+    }
+  }
+
+  /** The constructor that takes PipelineOptions.  Should be used only by unit tests. */
+  StreamingDataflowWorker(
+      List<MapTask> mapTasks, WindmillServerStub server, DataflowPipelineOptions options) {
+    initialize(mapTasks, server);
+    this.options = options;
+  }
+
+  public void start() {
+    running.set(true);
+    dispatchThread = threadFactory.newThread(new Runnable() {
+        public void run() {
+          dispatchLoop();
+        }
+      });
+    dispatchThread.setPriority(Thread.MIN_PRIORITY);
+    dispatchThread.setName("DispatchThread");
+    dispatchThread.start();
+
+    commitThread = threadFactory.newThread(new Runnable() {
+        public void run() {
+          commitLoop();
+        }
+      });
+    commitThread.setPriority(Thread.MAX_PRIORITY);
+    commitThread.setName("CommitThread");
+    commitThread.start();
+  }
+
+  public void stop() {
+    try {
+      if (statusServer != null) {
+        statusServer.stop();
+      }
+      running.set(false);
+      dispatchThread.join();
+      executor.shutdown();
+      if (!executor.awaitTermination(5, TimeUnit.MINUTES)) {
+        throw new RuntimeException("Process did not terminate within 5 minutes");
+      }
+      for (ConcurrentLinkedQueue<WorkerAndContext> queue : mapTaskExecutors.values()) {
+        WorkerAndContext workerAndContext;
+        while ((workerAndContext = queue.poll()) != null) {
+          workerAndContext.getWorker().close();
+        }
+      }
+      commitThread.join();
+    } catch (Exception e) {
+      LOG.warning("Exception while shutting down: " + e);
+      e.printStackTrace();
+    }
+  }
+
+  /** Initializes the execution harness. */
+  private void initialize(List<MapTask> mapTasks, WindmillServerStub server) {
+    this.instructionMap = new ConcurrentHashMap<>();
+    this.outputMap = new ConcurrentHashMap<>();
+    this.mapTaskExecutors = new ConcurrentHashMap<>();
+    for (MapTask mapTask : mapTasks) {
+      addComputation(mapTask);
+    }
+    this.threadFactory = new ThreadFactory() {
+        private final Thread.UncaughtExceptionHandler handler =
+            new Thread.UncaughtExceptionHandler() {
+              public void uncaughtException(Thread thread, Throwable e) {
+                LOG.severe("Uncaught exception: " + e);
+                e.printStackTrace();
+                System.exit(1);
+              }
+            };
+        public Thread newThread(Runnable r) {
+          Thread t = new Thread(r);
+          t.setUncaughtExceptionHandler(handler);
+          t.setDaemon(true);
+          return t;
+        }
+      };
+    this.executor = new BoundedQueueExecutor(
+        MAX_THREAD_POOL_SIZE, THREAD_EXPIRATION_TIME_SEC, TimeUnit.SECONDS,
+        MAX_THREAD_POOL_QUEUE_SIZE, threadFactory);
+    this.windmillServer = server;
+    this.running = new AtomicBoolean();
+    this.stateFetcher = new StateFetcher(server);
+    this.clientId = new Random().nextLong();
+    this.lastException = new AtomicReference<>();
+  }
+
+  public void runStatusServer(int statusPort) {
+    statusServer = new Server(statusPort);
+    statusServer.setHandler(new StatusHandler());
+    try {
+      statusServer.start();
+      LOG.info("Status server started on port " + statusPort);
+      statusServer.join();
+    } catch (Exception e) {
+      LOG.warning("Status server failed to start: " + e);
+    }
+  }
+
+  private void addComputation(MapTask mapTask) {
+    String computation = mapTask.getSystemName();
+    if (!instructionMap.containsKey(computation)) {
+      LOG.info("Adding config for " + computation + ": " + mapTask);
+      outputMap.put(computation, new ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest>());
+      instructionMap.put(computation, mapTask);
+      mapTaskExecutors.put(
+          computation,
+          new ConcurrentLinkedQueue<WorkerAndContext>());
+    }
+  }
+
+  private static void sleep(int millis) {
+    try {
+      Thread.sleep(millis);
+    } catch (InterruptedException e) {
+      // NOLINT
+    }
+  }
+
+  private void dispatchLoop() {
+    LOG.info("Dispatch starting");
+    Runtime rt = Runtime.getRuntime();
+    long lastPushbackLog = 0;
+    while (running.get()) {
+
+      // If free memory is less than a percentage of total memory, block
+      // until current work drains and memory is released.
+      // Also force a GC to try to get under the memory threshold if possible.
+      while (rt.freeMemory() < rt.totalMemory() * PUSHBACK_THRESHOLD) {
+        if (lastPushbackLog < (lastPushbackLog = System.currentTimeMillis()) - 60 * 1000) {
+          LOG.warning("In pushback, not accepting new work. Free Memory: "
+              + rt.freeMemory() + "MB / " + rt.totalMemory() + "MB");
+          System.gc();
+        }
+        sleep(10);
+      }
+
+      int backoff = 1;
+      Windmill.GetWorkResponse workResponse;
+      do {
+        workResponse = getWork();
+        if (workResponse.getWorkCount() > 0) {
+          break;
+        }
+        sleep(backoff);
+        backoff = Math.min(1000, backoff * 2);
+      } while (running.get());
+      for (final Windmill.ComputationWorkItems computationWork : workResponse.getWorkList()) {
+        for (final Windmill.WorkItem work : computationWork.getWorkList()) {
+          final String computation = computationWork.getComputationId();
+          if (!instructionMap.containsKey(computation)) {
+            getConfig(computation);
+          }
+          executor.execute(new Runnable() {
+              public void run() {
+                process(computation, work);
+              }
+            });
+        }
+      }
+    }
+    LOG.info("Dispatch done");
+  }
+
+  private void process(
+      final String computation, final Windmill.WorkItem work) {
+    LOG.log(Level.FINE, "Starting processing for " + computation + ":\n{0}", work);
+
+    MapTask mapTask = instructionMap.get(computation);
+    if (mapTask == null) {
+      LOG.info("Received work for unknown computation: " + computation
+          + ". Known computations are " + instructionMap.keySet());
+      return;
+    }
+
+    Windmill.WorkItemCommitRequest.Builder outputBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder()
+        .setKey(work.getKey())
+        .setWorkToken(work.getWorkToken());
+
+    StreamingModeExecutionContext context = null;
+    MapTaskExecutor worker = null;
+
+    try {
+      WorkerAndContext workerAndContext = mapTaskExecutors.get(computation).poll();
+      if (workerAndContext == null) {
+        context = new StreamingModeExecutionContext(computation, stateFetcher);
+        worker = MapTaskExecutorFactory.create(options, mapTask, context);
+      } else {
+        worker = workerAndContext.getWorker();
+        context = workerAndContext.getContext();
+      }
+
+      context.start(work, outputBuilder);
+
+      // Blocks while executing work.
+      worker.execute();
+
+      buildCounters(worker.getOutputCounters(), outputBuilder);
+
+      context.flushState();
+
+      mapTaskExecutors.get(computation).offer(new WorkerAndContext(worker, context));
+      worker = null;
+      context = null;
+    } catch (Throwable t) {
+      if (worker != null) {
+        try {
+          worker.close();
+        } catch (Exception e) {
+          LOG.warning("Failed to close worker: " + e.getMessage());
+          e.printStackTrace();
+        }
+      }
+
+      t = t instanceof UserCodeException ? t.getCause() : t;
+
+      if (t instanceof KeyTokenInvalidException) {
+        LOG.fine("Execution of work for " + computation + " for key " + work.getKey().toStringUtf8()
+            + " failed due to token expiration, will not retry locally.");
+      } else {
+        LOG.warning("Execution of work for " + computation + " for key "
+            + work.getKey().toStringUtf8() + " failed, retrying."
+            + "\nError: " + t.getMessage());
+        t.printStackTrace();
+        lastException.set(t);
+        LOG.fine("Failed work: " + work);
+        reportFailure(computation, work, t);
+        // Try again, but go to the end of the queue to avoid a tight loop.
+        sleep(60000);
+        executor.forceExecute(new Runnable() {
+            public void run() {
+              process(computation, work);
+            }
+          });
+      }
+      return;
+    }
+
+    Windmill.WorkItemCommitRequest output = outputBuilder.build();
+    outputMap.get(computation).add(output);
+    LOG.fine("Processing done for work token: " + work.getWorkToken());
+  }
+
+  private void commitLoop() {
+    while (running.get()) {
+      Windmill.CommitWorkRequest.Builder commitRequestBuilder =
+          Windmill.CommitWorkRequest.newBuilder();
+      long remainingCommitBytes = MAX_COMMIT_BYTES;
+      for (Map.Entry<String, ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest>> entry :
+               outputMap.entrySet()) {
+        Windmill.ComputationCommitWorkRequest.Builder computationRequestBuilder =
+            Windmill.ComputationCommitWorkRequest.newBuilder();
+        ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest> queue = entry.getValue();
+        while (remainingCommitBytes > 0) {
+          Windmill.WorkItemCommitRequest request = queue.poll();
+          if (request == null) {
+            break;
+          }
+          remainingCommitBytes -= request.getSerializedSize();
+          computationRequestBuilder.addRequests(request);
+        }
+        if (computationRequestBuilder.getRequestsCount() > 0) {
+          computationRequestBuilder.setComputationId(entry.getKey());
+          commitRequestBuilder.addRequests(computationRequestBuilder);
+        }
+      }
+      if (commitRequestBuilder.getRequestsCount() > 0) {
+        Windmill.CommitWorkRequest commitRequest = commitRequestBuilder.build();
+        LOG.log(Level.FINE, "Commit: {0}", commitRequest);
+        commitWork(commitRequest);
+      }
+      if (remainingCommitBytes > 0) {
+        sleep(100);
+      }
+    }
+  }
+
+  private Windmill.GetWorkResponse getWork() {
+    return windmillServer.getWork(
+        Windmill.GetWorkRequest.newBuilder()
+        .setClientId(clientId)
+        .setMaxItems(100)
+        .build());
+  }
+
+  private void commitWork(Windmill.CommitWorkRequest request) {
+    windmillServer.commitWork(request);
+  }
+
+  private void getConfig(String computation) {
+    Windmill.GetConfigRequest request =
+        Windmill.GetConfigRequest.newBuilder().addComputations(computation).build();
+    for (String serializedMapTask : windmillServer.getConfig(request).getCloudWorksList()) {
+      try {
+        addComputation(parseMapTask(serializedMapTask));
+      } catch (IOException e) {
+        LOG.warning("Parsing MapTask failed: " + serializedMapTask);
+        e.printStackTrace();
+      }
+    }
+  }
+
+  private void buildCounters(CounterSet counterSet,
+                             Windmill.WorkItemCommitRequest.Builder builder) {
+    for (MetricUpdate metricUpdate :
+             CloudCounterUtils.extractCounters(counterSet, true /* delta */)) {
+      Windmill.Counter.Kind kind;
+      String cloudKind = metricUpdate.getKind();
+      if (cloudKind.equals(Counter.AggregationKind.SUM.name())) {
+        kind = Windmill.Counter.Kind.SUM;
+      } else if (cloudKind.equals(Counter.AggregationKind.MEAN.name())) {
+        kind = Windmill.Counter.Kind.MEAN;
+      } else if (cloudKind.equals(Counter.AggregationKind.MAX.name())) {
+        kind = Windmill.Counter.Kind.MAX;
+      } else if (cloudKind.equals(Counter.AggregationKind.MIN.name())) {
+        kind = Windmill.Counter.Kind.MIN;
+      } else {
+        LOG.log(Level.FINE, "Unhandled counter type: " + metricUpdate.getKind());
+        return;
+      }
+      Windmill.Counter.Builder counterBuilder = builder.addCounterUpdatesBuilder();
+      counterBuilder.setName(metricUpdate.getName().getName()).setKind(kind);
+      Object element = null;
+      if (kind == Windmill.Counter.Kind.MEAN) {
+        Object meanCount = metricUpdate.getMeanCount();
+        if (meanCount != null) {
+          try {
+            Long longValue = Values.asLong(meanCount);
+            if (longValue != 0) {
+              counterBuilder.setMeanCount(longValue);
+            }
+          } catch (ClassCastException e) {
+            // Nothing to do.
+          }
+        }
+        element = metricUpdate.getMeanSum();
+      } else {
+        element = metricUpdate.getScalar();
+      }
+      if (element != null) {
+        try {
+          Double doubleValue = Values.asDouble(element);
+          if (doubleValue != 0) {
+            counterBuilder.setDoubleScalar(doubleValue);
+          }
+        } catch (ClassCastException e) {
+          // Nothing to do.
+        }
+        try {
+          Long longValue = Values.asLong(element);
+          if (longValue != 0) {
+            counterBuilder.setIntScalar(longValue);
+          }
+        } catch (ClassCastException e) {
+          // Nothing to do.
+        }
+      }
+    }
+  }
+
+  private Windmill.Exception buildExceptionReport(Throwable t) {
+    Windmill.Exception.Builder builder = Windmill.Exception.newBuilder();
+
+    builder.addStackFrames(t.toString());
+    for (StackTraceElement frame : t.getStackTrace()) {
+      builder.addStackFrames(frame.toString());
+    }
+    if (t.getCause() != null) {
+      builder.setCause(buildExceptionReport(t.getCause()));
+    }
+
+    return builder.build();
+  }
+
+  private void reportFailure(String computation, Windmill.WorkItem work, Throwable t) {
+    windmillServer.reportStats(Windmill.ReportStatsRequest.newBuilder()
+        .setComputationId(computation)
+        .setKey(work.getKey())
+        .setWorkToken(work.getWorkToken())
+        .addExceptions(buildExceptionReport(t))
+        .build());
+  }
+
+  private static class WorkerAndContext {
+    public MapTaskExecutor worker;
+    public StreamingModeExecutionContext context;
+
+    public WorkerAndContext(MapTaskExecutor worker, StreamingModeExecutionContext context) {
+      this.worker = worker;
+      this.context = context;
+    }
+
+    public MapTaskExecutor getWorker() {
+      return worker;
+    }
+
+    public StreamingModeExecutionContext getContext() {
+      return context;
+    }
+  }
+
+  private class StatusHandler extends AbstractHandler {
+    @Override
+    public void handle(
+        String target, Request baseRequest,
+        HttpServletRequest request, HttpServletResponse response)
+        throws IOException, ServletException {
+
+      response.setContentType("text/html;charset=utf-8");
+      response.setStatus(HttpServletResponse.SC_OK);
+      baseRequest.setHandled(true);
+
+      PrintWriter responseWriter = response.getWriter();
+
+      responseWriter.println("<html><body>");
+
+      printHeader(responseWriter);
+
+      printMetrics(responseWriter);
+
+      printResources(responseWriter);
+
+      printLastException(responseWriter);
+
+      printSpecs(responseWriter);
+
+      responseWriter.println("</body></html>");
+    }
+  }
+
+  private void printHeader(PrintWriter response) {
+    response.println("<h1>Streaming Worker Harness</h1>");
+    response.println("Running: " + running.get() + "<br>");
+    response.println("ID: " + clientId + "<br>");
+  }
+
+  private void printMetrics(PrintWriter response) {
+    response.println("<h2>Metrics</h2>");
+    response.println("Worker Threads: " + executor.getPoolSize()
+        + "/" + MAX_THREAD_POOL_QUEUE_SIZE + "<br>");
+    response.println("Active Threads: " + executor.getActiveCount() + "<br>");
+    response.println("Work Queue Size: " + executor.getQueue().size() + "<br>");
+    response.println("Commit Queues: <ul>");
+    for (Map.Entry<String, ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest>> entry
+             : outputMap.entrySet()) {
+      response.print("<li>");
+      response.print(entry.getKey());
+      response.print(": ");
+      response.print(entry.getValue().size());
+      response.println("</li>");
+    }
+    response.println("</ul>");
+  }
+
+  private void printResources(PrintWriter response) {
+    Runtime rt = Runtime.getRuntime();
+    int mb = 1024 * 1024;
+
+    response.append("<h2>Resources</h2>\n");
+    response.append("Total Memory: " + rt.totalMemory() / mb + "MB<br>\n");
+    response.append("Used Memory: " + (rt.totalMemory() - rt.freeMemory()) / mb + "MB<br>\n");
+    response.append("Max Memory: " + rt.maxMemory() / mb + "MB<br>\n");
+  }
+
+  private void printSpecs(PrintWriter response) {
+    response.append("<h2>Specs</h2>\n");
+    for (Map.Entry<String, MapTask> entry : instructionMap.entrySet()) {
+      response.println("<h3>" + entry.getKey() + "</h3>");
+      response.print("<script>document.write(JSON.stringify(");
+      response.print(entry.getValue().toString());
+      response.println(", null, \"&nbsp&nbsp\").replace(/\\n/g, \"<br>\"))</script>");
+    }
+  }
+
+  private void printLastException(PrintWriter response) {
+    Throwable t = lastException.get();
+    if (t != null) {
+      response.println("<h2>Last Exception</h2>");
+      StringWriter writer = new StringWriter();
+      t.printStackTrace(new PrintWriter(writer));
+      response.println(writer.toString().replace("\t", "&nbsp&nbsp").replace("\n", "<br>"));
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
new file mode 100644
index 0000000000000..bbe63746965c1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
@@ -0,0 +1,116 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collection;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * A Reader that receives input data from a Windmill server, and returns it as
+ * individual elements.
+ */
+class UngroupedWindmillReader<T> extends Reader<WindowedValue<T>> {
+  private final Coder<T> valueCoder;
+  private final Coder<Collection<? extends BoundedWindow>> windowsCoder;
+  private StreamingModeExecutionContext context;
+
+  UngroupedWindmillReader(Coder<WindowedValue<T>> coder, StreamingModeExecutionContext context) {
+    FullWindowedValueCoder<T> inputCoder = (FullWindowedValueCoder<T>) coder;
+    this.valueCoder = inputCoder.getValueCoder();
+    this.windowsCoder = inputCoder.getWindowsCoder();
+    this.context = context;
+  }
+
+  public static <T> UngroupedWindmillReader<T> create(PipelineOptions options,
+                                             CloudObject spec,
+                                             Coder coder,
+                                             ExecutionContext context) {
+    return new UngroupedWindmillReader<>(coder, (StreamingModeExecutionContext) context);
+  }
+
+  @Override
+  public ReaderIterator<WindowedValue<T>> iterator() throws IOException {
+    return new UngroupedWindmillReaderIterator();
+  }
+
+  class UngroupedWindmillReaderIterator extends AbstractReaderIterator<WindowedValue<T>> {
+    private int bundleIndex = 0;
+    private int messageIndex = 0;
+
+    @Override
+    public boolean hasNext() throws IOException {
+      Windmill.WorkItem work = context.getWork();
+      return bundleIndex < work.getMessageBundlesCount() &&
+          messageIndex < work.getMessageBundles(bundleIndex).getMessagesCount();
+    }
+
+    @Override
+    public WindowedValue<T> next() throws IOException {
+      Windmill.Message message =
+          context.getWork().getMessageBundles(bundleIndex).getMessages(messageIndex);
+      if (messageIndex >=
+          context.getWork().getMessageBundles(bundleIndex).getMessagesCount() - 1) {
+        messageIndex = 0;
+        bundleIndex++;
+      } else {
+        messageIndex++;
+      }
+      Instant timestampMillis = new Instant(TimeUnit.MICROSECONDS.toMillis(message.getTimestamp()));
+      InputStream data = message.getData().newInput();
+      InputStream metadata = message.getMetadata().newInput();
+      Collection<? extends BoundedWindow> windows = decode(windowsCoder, metadata);
+      if (valueCoder instanceof KvCoder) {
+        KvCoder kvCoder = (KvCoder) valueCoder;
+        InputStream key = context.getSerializedKey().newInput();
+        notifyElementRead(key.available() + data.available() + metadata.available());
+        return WindowedValue.of((T) KV.of(decode(kvCoder.getKeyCoder(), key),
+                                          decode(kvCoder.getValueCoder(), data)),
+                                timestampMillis,
+                                windows);
+      } else {
+        notifyElementRead(data.available() + metadata.available());
+        return WindowedValue.of(decode(valueCoder, data), timestampMillis, windows);
+      }
+    }
+
+    private <S> S decode(Coder<S> coder, InputStream input) throws IOException {
+      return coder.decode(input, Coder.Context.OUTER);
+    }
+  }
+
+  @Override
+  public boolean supportsRestart() {
+    return true;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
new file mode 100644
index 0000000000000..3bfcac116bf47
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
@@ -0,0 +1,130 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.protobuf.ByteString;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+class WindmillSink<T> extends Sink<WindowedValue<T>> {
+  private WindmillStreamWriter writer;
+  private final Coder<T> valueCoder;
+  private final Coder windowsCoder;
+  private StreamingModeExecutionContext context;
+
+  WindmillSink(String destinationName,
+               Coder<WindowedValue<T>> coder,
+               StreamingModeExecutionContext context) {
+    this.writer = new WindmillStreamWriter(destinationName);
+    FullWindowedValueCoder<T> inputCoder = (FullWindowedValueCoder<T>) coder;
+    this.valueCoder = inputCoder.getValueCoder();
+    this.windowsCoder = inputCoder.getWindowsCoder();
+    this.context = context;
+  }
+
+  public static <T> WindmillSink<T> create(PipelineOptions options,
+                                           CloudObject spec,
+                                           Coder<WindowedValue<T>> coder,
+                                           ExecutionContext context) throws Exception {
+    return new WindmillSink<>(getString(spec, "stream_id"), coder,
+        (StreamingModeExecutionContext) context);
+  }
+
+  @Override
+  public SinkWriter<WindowedValue<T>> writer() {
+    return writer;
+  }
+
+  class WindmillStreamWriter implements SinkWriter<WindowedValue<T>> {
+    private Map<ByteString, Windmill.KeyedMessageBundle.Builder> productionMap;
+    private final String destinationName;
+
+    private WindmillStreamWriter(String destinationName) {
+      this.destinationName = destinationName;
+      productionMap = new HashMap<ByteString, Windmill.KeyedMessageBundle.Builder>();
+    }
+
+    private <S> ByteString encode(Coder<S> coder, S object) throws IOException {
+      ByteString.Output stream = ByteString.newOutput();
+      coder.encode(object, stream, Coder.Context.OUTER);
+      return stream.toByteString();
+    }
+
+    @Override
+    public long add(WindowedValue<T> data) throws IOException {
+      ByteString key, value;
+      ByteString windows = encode(windowsCoder, data.getWindows());
+      if (valueCoder instanceof KvCoder) {
+        KvCoder kvCoder = (KvCoder) valueCoder;
+        KV kv = (KV) data.getValue();
+        key = encode(kvCoder.getKeyCoder(), kv.getKey());
+        value = encode(kvCoder.getValueCoder(), kv.getValue());
+      } else {
+        key = context.getSerializedKey();
+        value = encode(valueCoder, data.getValue());
+      }
+      Windmill.KeyedMessageBundle.Builder keyedOutput = productionMap.get(key);
+      if (keyedOutput == null) {
+        keyedOutput = Windmill.KeyedMessageBundle.newBuilder().setKey(key);
+        productionMap.put(key, keyedOutput);
+      }
+
+      long timestampMicros = TimeUnit.MILLISECONDS.toMicros(data.getTimestamp().getMillis());
+      Windmill.Message.Builder builder = Windmill.Message.newBuilder()
+          .setTimestamp(timestampMicros)
+          .setData(value)
+          .setMetadata(windows);
+      keyedOutput.addMessages(builder.build());
+      return key.size() + value.size() + windows.size();
+    }
+
+    @Override
+    public void close() throws IOException {
+      Windmill.OutputMessageBundle.Builder outputBuilder =
+          Windmill.OutputMessageBundle.newBuilder().setDestinationStreamId(destinationName);
+
+      for (Windmill.KeyedMessageBundle.Builder keyedOutput : productionMap.values()) {
+        outputBuilder.addBundles(keyedOutput.build());
+      }
+      if (outputBuilder.getBundlesCount() > 0) {
+        context.getOutputBuilder().addOutputMessages(outputBuilder.build());
+      }
+      productionMap.clear();
+    }
+  }
+
+  @Override
+  public boolean supportsRestart() {
+    return true;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
new file mode 100644
index 0000000000000..8bd6d89ec6e2f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
@@ -0,0 +1,157 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.TimerOrElement.TimerOrElementCoder;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.TimerOrElement;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * A Reader that receives input data from a Windmill server, and returns it as
+ * groups of elements and timers.
+ */
+class WindowingWindmillReader<T> extends Reader<WindowedValue<TimerOrElement<T>>> {
+  private final Coder<T> valueCoder;
+  private final Coder<Collection<? extends BoundedWindow>> windowsCoder;
+  private StreamingModeExecutionContext context;
+
+  WindowingWindmillReader(Coder<WindowedValue<TimerOrElement<T>>> coder,
+                          StreamingModeExecutionContext context) {
+    FullWindowedValueCoder<TimerOrElement<T>> inputCoder =
+        (FullWindowedValueCoder<TimerOrElement<T>>) coder;
+    this.windowsCoder = inputCoder.getWindowsCoder();
+    this.valueCoder = ((TimerOrElementCoder<T>) inputCoder.getValueCoder()).getElementCoder();
+    this.context = context;
+  }
+
+  public static <T> WindowingWindmillReader<T> create(PipelineOptions options,
+                                             CloudObject spec,
+                                             Coder coder,
+                                             ExecutionContext context) {
+    return new WindowingWindmillReader<>(coder, (StreamingModeExecutionContext) context);
+  }
+
+  @Override
+  public ReaderIterator<WindowedValue<TimerOrElement<T>>> iterator() throws IOException {
+    return new WindowingWindmillReaderIterator();
+  }
+
+  class WindowingWindmillReaderIterator
+  extends AbstractReaderIterator<WindowedValue<TimerOrElement<T>>> {
+    private int bundleIndex = 0;
+    private int messageIndex = 0;
+    private int timerIndex = 0;
+
+    private boolean hasMoreMessages() {
+      Windmill.WorkItem work = context.getWork();
+      return bundleIndex < work.getMessageBundlesCount() &&
+          messageIndex < work.getMessageBundles(bundleIndex).getMessagesCount();
+    }
+
+    private boolean hasMoreTimers() {
+      Windmill.WorkItem work = context.getWork();
+      return work.hasTimers() && timerIndex < work.getTimers().getTimersCount();
+    }
+
+    @Override
+    public boolean hasNext() throws IOException {
+      return hasMoreMessages() || hasMoreTimers();
+    }
+
+    @Override
+    public WindowedValue<TimerOrElement<T>> next() throws IOException {
+      if (hasMoreTimers()) {
+        if (valueCoder instanceof KvCoder) {
+          Windmill.Timer timer = context.getWork().getTimers().getTimers(timerIndex++);
+          long timestampMillis = TimeUnit.MICROSECONDS.toMillis(timer.getTimestamp());
+
+          KvCoder kvCoder = (KvCoder) valueCoder;
+          Object key = kvCoder.getKeyCoder().decode(
+              context.getSerializedKey().newInput(), Coder.Context.OUTER);
+
+          return WindowedValue.of(TimerOrElement.<T>timer(timer.getTag().toStringUtf8(),
+                                                          new Instant(timestampMillis),
+                                                          key),
+                                  new Instant(timestampMillis),
+                                  new ArrayList());
+        } else {
+          throw new RuntimeException("Timer set on non-keyed DoFn");
+        }
+      } else {
+        Windmill.Message message =
+            context.getWork().getMessageBundles(bundleIndex).getMessages(messageIndex);
+
+        if (messageIndex >=
+            context.getWork().getMessageBundles(bundleIndex).getMessagesCount() - 1) {
+          messageIndex = 0;
+          bundleIndex++;
+        } else {
+          messageIndex++;
+        }
+        Instant timestampMillis =
+            new Instant(TimeUnit.MICROSECONDS.toMillis(message.getTimestamp()));
+        InputStream data = message.getData().newInput();
+        InputStream metadata = message.getMetadata().newInput();
+        Collection<? extends BoundedWindow> windows = decode(windowsCoder, metadata);
+        if (valueCoder instanceof KvCoder) {
+          KvCoder kvCoder = (KvCoder) valueCoder;
+          InputStream key = context.getSerializedKey().newInput();
+          notifyElementRead(key.available() + data.available() + metadata.available());
+          return WindowedValue.of(
+              TimerOrElement.element((T) KV.of(decode(kvCoder.getKeyCoder(), key),
+                                               decode(kvCoder.getValueCoder(), data))),
+              timestampMillis,
+              windows);
+        } else {
+          notifyElementRead(data.available() + metadata.available());
+          return WindowedValue.of(TimerOrElement.element(decode(valueCoder, data)),
+                                  timestampMillis,
+                                  windows);
+        }
+      }
+    }
+
+    private <S> S decode(Coder<S> coder, InputStream input) throws IOException {
+      return coder.decode(input, Coder.Context.OUTER);
+    }
+  }
+
+  @Override
+  public boolean supportsRestart() {
+    return true;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/windmill/WindmillServerStub.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/windmill/WindmillServerStub.java
new file mode 100644
index 0000000000000..105c8486db446
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/windmill/WindmillServerStub.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker.windmill;
+
+/**
+ * Stub for communicating with a Windmill server.
+ */
+public abstract class WindmillServerStub {
+  /**
+   * Get a batch of work to process.
+   */
+  public abstract Windmill.GetWorkResponse getWork(Windmill.GetWorkRequest request);
+  /**
+   * Get addition data such as state needed to process work.
+   */
+  public abstract Windmill.GetDataResponse getData(Windmill.GetDataRequest request);
+  /**
+   * Commit the work, issuing any output productions, state modifications etc.
+   */
+  public abstract Windmill.CommitWorkResponse commitWork(
+      Windmill.CommitWorkRequest request);
+  /**
+   * Get configuration data from the server.
+   */
+  public abstract Windmill.GetConfigResponse getConfig(Windmill.GetConfigRequest request);
+
+  /**
+   * Report execution information to the server.
+   */
+  public abstract Windmill.ReportStatsResponse reportStats(Windmill.ReportStatsRequest request);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java
new file mode 100644
index 0000000000000..983946915c205
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Executor that blocks on execute() if its queue is full.
+ */
+public class BoundedQueueExecutor extends ThreadPoolExecutor {
+  private static class ReducableSemaphore extends Semaphore {
+    ReducableSemaphore(int permits) {
+      super(permits);
+    }
+
+    public void reducePermits(int permits) {
+      super.reducePermits(permits);
+    }
+  }
+  private ReducableSemaphore semaphore;
+
+  public BoundedQueueExecutor(int maximumPoolSize,
+                   long keepAliveTime,
+                   TimeUnit unit,
+                   int maximumQueueSize,
+                   ThreadFactory threadFactory) {
+    super(maximumQueueSize, maximumPoolSize, keepAliveTime, unit,
+        new LinkedBlockingQueue<Runnable>(), threadFactory);
+    this.semaphore = new ReducableSemaphore(maximumQueueSize);
+    allowCoreThreadTimeOut(true);
+  }
+
+  // Before adding a Runnable to the queue, acquire the semaphore.
+  @Override
+  public void execute(Runnable r) {
+    semaphore.acquireUninterruptibly();
+    super.execute(r);
+  }
+
+  // Forcibly add something to the queue, ignoring the length limit.
+  public void forceExecute(Runnable r) {
+    semaphore.reducePermits(1);
+    super.execute(r);
+  }
+
+  // Release the semaphore after taking a Runnable off the queue.
+  @Override
+  public void beforeExecute(Thread t, Runnable r) {
+    semaphore.release();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
new file mode 100644
index 0000000000000..c3fae8851f563
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.runners.worker.StreamingDataflowWorker;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.protobuf.ByteString;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Class responsible for fetching state from the windmill server.
+ */
+public class StateFetcher {
+  private WindmillServerStub server;
+
+  public StateFetcher(WindmillServerStub server) {
+    this.server = server;
+  }
+
+  public Map<CodedTupleTag<?>, Object> fetch(
+      String computation, ByteString key, long workToken, String prefix,
+      List<? extends CodedTupleTag<?>> tags) throws CoderException, IOException {
+    Map<CodedTupleTag<?>, Object> resultMap = new HashMap<>();
+    if (tags.isEmpty()) {
+      return resultMap;
+    }
+
+    Windmill.KeyedGetDataRequest.Builder requestBuilder = Windmill.KeyedGetDataRequest.newBuilder()
+        .setKey(key)
+        .setWorkToken(workToken);
+
+    Map<ByteString, CodedTupleTag<?>> tagMap = new HashMap<>();
+    for (CodedTupleTag<?> tag : tags) {
+      ByteString tagString = ByteString.copyFromUtf8(prefix + tag.getId());
+      requestBuilder.addValuesToFetch(
+          Windmill.TagValue.newBuilder()
+          .setTag(tagString)
+          .build());
+      tagMap.put(tagString, tag);
+    }
+
+    Windmill.GetDataResponse response = server.getData(
+        Windmill.GetDataRequest.newBuilder()
+        .addRequests(
+            Windmill.ComputationGetDataRequest.newBuilder()
+            .setComputationId(computation)
+            .addRequests(requestBuilder.build())
+            .build())
+        .build());
+
+    if (response.getDataCount() != 1
+        || !response.getData(0).getComputationId().equals(computation)
+        || response.getData(0).getDataCount() != 1
+        || !response.getData(0).getData(0).getKey().equals(key)) {
+      throw new IOException("Invalid data response, expected single computation and key");
+    }
+    Windmill.KeyedGetDataResponse keyResponse = response.getData(0).getData(0);
+    if (keyResponse.getFailed()) {
+      throw new StreamingDataflowWorker.KeyTokenInvalidException(key.toStringUtf8());
+    }
+
+    for (Windmill.TagValue tv : keyResponse.getValuesList()) {
+      CodedTupleTag<?> tag = tagMap.get(tv.getTag());
+      if (tag != null) {
+        if (tv.getValue().hasData() && !tv.getValue().getData().isEmpty()) {
+          resultMap.put(tag, tag.getCoder().decode(tv.getValue().getData().newInput(),
+                  Coder.Context.OUTER));
+        } else {
+          resultMap.put(tag, null);
+        }
+      }
+    }
+
+    return resultMap;
+  }
+
+  public <T> List<T> fetchList(
+      String computation, ByteString key, long workToken, String prefix, CodedTupleTag<T> tag)
+      throws IOException {
+
+    ByteString tagString = ByteString.copyFromUtf8(prefix + tag.getId());
+    Windmill.GetDataRequest request = Windmill.GetDataRequest.newBuilder()
+        .addRequests(
+            Windmill.ComputationGetDataRequest.newBuilder()
+            .setComputationId(computation)
+            .addRequests(
+                Windmill.KeyedGetDataRequest.newBuilder()
+                .setKey(key)
+                .setWorkToken(workToken)
+                .addListsToFetch(
+                    Windmill.TagList.newBuilder()
+                    .setTag(tagString)
+                    .setEndTimestamp(Long.MAX_VALUE)
+                    .build())
+                .build())
+            .build())
+        .build();
+
+    Windmill.GetDataResponse response = server.getData(request);
+
+    if (response.getDataCount() != 1
+        || !response.getData(0).getComputationId().equals(computation)
+        || response.getData(0).getDataCount() != 1
+        || !response.getData(0).getData(0).getKey().equals(key)) {
+      throw new IOException("Invalid data response, expected single computation and key\n");
+    }
+
+    Windmill.KeyedGetDataResponse keyResponse = response.getData(0).getData(0);
+    if (keyResponse.getFailed()) {
+      throw new StreamingDataflowWorker.KeyTokenInvalidException(key.toStringUtf8());
+    }
+    if (keyResponse.getListsCount() != 1
+        || !keyResponse.getLists(0).getTag().equals(tagString)) {
+      throw new IOException("Expected single list for tag " + tagString);
+    }
+    Windmill.TagList tagList = keyResponse.getLists(0);
+    List<T> result = new ArrayList<>();
+    for (Windmill.Value value : tagList.getValuesList()) {
+      result.add(tag.getCoder().decode(value.getData().newInput(), Coder.Context.OUTER));
+    }
+
+    return result;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
new file mode 100644
index 0000000000000..13801339126b7
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -0,0 +1,208 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.protobuf.ByteString;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * {@link ExecutionContext} for use in streaming mode.
+ */
+public class StreamingModeExecutionContext extends ExecutionContext {
+  private String computation;
+  private Windmill.WorkItem work;
+  private StateFetcher stateFetcher;
+  private Windmill.WorkItemCommitRequest.Builder outputBuilder;
+
+  public StreamingModeExecutionContext(String computation, StateFetcher stateFetcher) {
+    this.computation = computation;
+    this.stateFetcher = stateFetcher;
+  }
+
+  public void start(Windmill.WorkItem work, Windmill.WorkItemCommitRequest.Builder outputBuilder) {
+    this.work = work;
+    this.outputBuilder = outputBuilder;
+  }
+
+  @Override
+  public ExecutionContext.StepContext createStepContext(String stepName) {
+    return new StepContext(stepName);
+  }
+
+  @Override
+  public void setTimer(String timer, Instant timestamp) {
+    long timestampMicros = TimeUnit.MILLISECONDS.toMicros(timestamp.getMillis());
+    outputBuilder.addOutputTimers(
+        Windmill.Timer.newBuilder()
+        .setTimestamp(timestampMicros)
+        .setTag(ByteString.copyFromUtf8(timer))
+        .build());
+  }
+
+  @Override
+  public void deleteTimer(String timer) {
+    outputBuilder.addOutputTimers(
+        Windmill.Timer.newBuilder().setTag(ByteString.copyFromUtf8(timer)).build());
+  }
+
+  public ByteString getSerializedKey() {
+    return work.getKey();
+  }
+
+  public long getWorkToken() {
+    return work.getWorkToken();
+  }
+
+  public Windmill.WorkItem getWork() {
+    return work;
+  }
+
+  public Windmill.WorkItemCommitRequest.Builder getOutputBuilder() {
+    return outputBuilder;
+  }
+
+  public void flushState() {
+    for (ExecutionContext.StepContext stepContext : getAllStepContexts()) {
+      ((StepContext) stepContext).flushState();
+    }
+  }
+
+  public Map<CodedTupleTag<?>, Object> lookupState(
+      String prefix, List<? extends CodedTupleTag<?>> tags) throws CoderException, IOException {
+    return stateFetcher.fetch(computation, getSerializedKey(), getWorkToken(), prefix, tags);
+  }
+
+  class StepContext extends ExecutionContext.StepContext {
+    private final String mangledPrefix;
+    private Map<CodedTupleTag<?>, KV<?, ByteString>> stateCache = new HashMap<>();
+    private Map<CodedTupleTag<?>, List<KV<ByteString, Instant>>> tagListUpdates = new HashMap<>();
+
+    public StepContext(String stepName) {
+      super(stepName);
+      // Mangle such that there are no partially overlapping prefixes.
+      this.mangledPrefix = stepName.length() + ":" + stepName;
+    }
+
+    @Override
+    public <T> void store(CodedTupleTag<T> tag, T value) throws CoderException, IOException {
+      ByteString.Output stream = ByteString.newOutput();
+      tag.getCoder().encode(value, stream, Coder.Context.OUTER);
+      stateCache.put(tag, KV.of(value, stream.toByteString()));
+    }
+
+    @Override
+    public CodedTupleTagMap lookup(List<? extends CodedTupleTag<?>> tags)
+        throws CoderException, IOException {
+      List<CodedTupleTag<?>> tagsToLookup = new ArrayList<>();
+      List<CodedTupleTag<?>> residentTags = new ArrayList<>();
+      for (CodedTupleTag<?> tag : tags) {
+        if (stateCache.containsKey(tag)) {
+          residentTags.add(tag);
+        } else {
+          tagsToLookup.add(tag);
+        }
+      }
+      Map<CodedTupleTag<?>, Object> result =
+          StreamingModeExecutionContext.this.lookupState(mangledPrefix, tagsToLookup);
+      for (CodedTupleTag<?> tag : residentTags) {
+        result.put(tag, stateCache.get(tag).getKey());
+      }
+      return CodedTupleTagMap.of(result);
+    }
+
+    @Override
+    public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp)
+        throws IOException {
+      List<KV<ByteString, Instant>> list = tagListUpdates.get(tag);
+      if (list == null) {
+        list = new ArrayList<>();
+        tagListUpdates.put(tag, list);
+      }
+      ByteString.Output stream = ByteString.newOutput();
+      tag.getCoder().encode(value, stream, Coder.Context.OUTER);
+      list.add(KV.of(stream.toByteString(), timestamp));
+    }
+
+    @Override
+    public <T> Iterable<T> readTagList(CodedTupleTag<T> tag) throws IOException {
+      return stateFetcher.fetchList(
+          computation, getSerializedKey(), getWorkToken(), mangledPrefix, tag);
+    }
+
+    @Override
+    public <T> void deleteTagList(CodedTupleTag<T> tag) {
+      outputBuilder.addListUpdates(
+          Windmill.TagList.newBuilder()
+          .setTag(serializeTag(tag))
+          .setEndTimestamp(Long.MAX_VALUE)
+          .build());
+    }
+
+    public void flushState() {
+      for (Map.Entry<CodedTupleTag<?>, KV<?, ByteString>> entry : stateCache.entrySet()) {
+        CodedTupleTag<?> tag = entry.getKey();
+        ByteString encodedValue = entry.getValue().getValue();
+        outputBuilder.addValueUpdates(
+            Windmill.TagValue.newBuilder()
+            .setTag(serializeTag(tag))
+            .setValue(
+                Windmill.Value.newBuilder()
+                .setData(encodedValue)
+                .setTimestamp(Long.MAX_VALUE)
+                .build())
+            .build());
+      }
+
+      for (Map.Entry<CodedTupleTag<?>, List<KV<ByteString, Instant>>> entry :
+               tagListUpdates.entrySet()) {
+        CodedTupleTag<?> tag = entry.getKey();
+        Windmill.TagList.Builder listBuilder =
+            Windmill.TagList.newBuilder()
+            .setTag(serializeTag(tag));
+        for (KV<ByteString, Instant> item : entry.getValue()) {
+          long timestampMicros = TimeUnit.MILLISECONDS.toMicros(item.getValue().getMillis());
+          listBuilder.addValues(
+              Windmill.Value.newBuilder()
+              .setData(item.getKey())
+              .setTimestamp(timestampMicros));
+        }
+        outputBuilder.addListUpdates(listBuilder.build());
+      }
+
+      stateCache.clear();
+      tagListUpdates.clear();
+    }
+
+    private ByteString serializeTag(CodedTupleTag<?> tag) {
+      return ByteString.copyFromUtf8(mangledPrefix + tag.getId());
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
new file mode 100644
index 0000000000000..9c5c5cbd24f0c
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -0,0 +1,913 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.addObject;
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+
+import com.google.api.services.dataflow.model.InstructionInput;
+import com.google.api.services.dataflow.model.InstructionOutput;
+import com.google.api.services.dataflow.model.MapTask;
+import com.google.api.services.dataflow.model.ParDoInstruction;
+import com.google.api.services.dataflow.model.ParallelInstruction;
+import com.google.api.services.dataflow.model.ReadInstruction;
+import com.google.api.services.dataflow.model.Sink;
+import com.google.api.services.dataflow.model.Source;
+import com.google.api.services.dataflow.model.WriteInstruction;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CollectionCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.ListCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.DoFnInfo;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.util.StringUtils;
+import com.google.cloud.dataflow.sdk.util.TimerOrElement.TimerOrElementCoder;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.TextFormat;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.matchers.JUnitMatchers;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+import java.util.TreeMap;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.TimeUnit;
+
+/** Unit tests for {@link StreamingDataflowWorker}. */
+@RunWith(JUnit4.class)
+public class StreamingDataflowWorkerTest {
+  private static final IntervalWindow DEFAULT_WINDOW =
+      new IntervalWindow(new Instant(1234), new Duration(1000));
+
+  private static class FakeWindmillServer extends WindmillServerStub {
+    private Queue<Windmill.GetWorkResponse> workToOffer;
+    private Queue<Windmill.GetDataResponse> dataToOffer;
+    private Map<Long, Windmill.WorkItemCommitRequest> commitsReceived;
+    private LinkedBlockingQueue<Windmill.Exception> exceptions;
+    private int commitsRequested = 0;
+
+    public FakeWindmillServer() {
+      workToOffer = new ConcurrentLinkedQueue<Windmill.GetWorkResponse>();
+      dataToOffer = new ConcurrentLinkedQueue<Windmill.GetDataResponse>();
+      commitsReceived = new ConcurrentHashMap<Long, Windmill.WorkItemCommitRequest>();
+      exceptions = new LinkedBlockingQueue<>();
+    }
+
+    public void addWorkToOffer(Windmill.GetWorkResponse work) {
+      workToOffer.add(work);
+    }
+
+    public void addDataToOffer(Windmill.GetDataResponse data) {
+      dataToOffer.add(data);
+    }
+
+    @Override
+    public Windmill.GetWorkResponse getWork(Windmill.GetWorkRequest request) {
+      Windmill.GetWorkResponse response = workToOffer.poll();
+      if (response == null) {
+        return Windmill.GetWorkResponse.newBuilder().build();
+      }
+      return response;
+    }
+
+    @Override
+    public Windmill.GetDataResponse getData(Windmill.GetDataRequest request) {
+      Windmill.GetDataResponse response = dataToOffer.poll();
+      if (response == null) {
+        return Windmill.GetDataResponse.newBuilder().build();
+      }
+      return response;
+    }
+
+    @Override
+    public Windmill.CommitWorkResponse commitWork(Windmill.CommitWorkRequest request) {
+      for (Windmill.ComputationCommitWorkRequest computationRequest : request.getRequestsList()) {
+        for (Windmill.WorkItemCommitRequest commit : computationRequest.getRequestsList()) {
+          commitsReceived.put(commit.getWorkToken(), commit);
+        }
+      }
+      return Windmill.CommitWorkResponse.newBuilder().build();
+    }
+
+    @Override
+    public Windmill.GetConfigResponse getConfig(Windmill.GetConfigRequest request) {
+      return Windmill.GetConfigResponse.newBuilder().build();
+    }
+
+    @Override
+    public Windmill.ReportStatsResponse reportStats(Windmill.ReportStatsRequest request) {
+      for (Windmill.Exception exception : request.getExceptionsList()) {
+        try {
+          exceptions.put(exception);
+        } catch (InterruptedException e) {}
+      }
+      return Windmill.ReportStatsResponse.newBuilder().build();
+    }
+
+    public Map<Long, Windmill.WorkItemCommitRequest> waitForAndGetCommits(int numCommits) {
+      while (commitsReceived.size() < commitsRequested + numCommits) {
+        try {
+          Thread.sleep(1000);
+        } catch (InterruptedException e) {}
+      }
+
+      commitsRequested += numCommits;
+
+      return commitsReceived;
+    }
+
+    public Windmill.Exception getException() throws InterruptedException {
+      return exceptions.take();
+    }
+  }
+
+  private ParallelInstruction makeWindowingSourceInstruction(Coder coder) {
+    CloudObject encodedCoder = FullWindowedValueCoder.of(
+        TimerOrElementCoder.of(coder), IntervalWindow.getCoder()).asCloudObject();
+    return new ParallelInstruction()
+        .setSystemName("source")
+        .setRead(new ReadInstruction().setSource(
+            new Source()
+            .setSpec(CloudObject.forClass(WindowingWindmillReader.class))
+            .setCodec(encodedCoder)))
+        .setOutputs(Arrays.asList(
+            new InstructionOutput()
+            .setName("read_output")
+            .setCodec(encodedCoder)));
+  }
+
+  private ParallelInstruction makeSourceInstruction(Coder coder) {
+    return new ParallelInstruction()
+        .setSystemName("source")
+        .setRead(new ReadInstruction().setSource(
+            new Source()
+            .setSpec(CloudObject.forClass(UngroupedWindmillReader.class))
+            .setCodec(WindowedValue.getFullCoder(coder, IntervalWindow.getCoder())
+                                   .asCloudObject())))
+        .setOutputs(Arrays.asList(
+            new InstructionOutput()
+            .setName("read_output")
+            .setCodec(WindowedValue.getFullCoder(coder, IntervalWindow.getCoder())
+                                   .asCloudObject())));
+  }
+
+  private ParallelInstruction makeDoFnInstruction(
+      DoFn<?, ?> doFn, int producerIndex, Coder outputCoder) {
+    CloudObject spec = CloudObject.forClassName("DoFn");
+    addString(spec, PropertyNames.SERIALIZED_FN,
+        StringUtils.byteArrayToJsonString(
+            SerializableUtils.serializeToByteArray(new DoFnInfo(doFn, null))));
+    return new ParallelInstruction()
+        .setSystemName("parDo")
+        .setParDo(new ParDoInstruction()
+            .setInput(
+                new InstructionInput().setProducerInstructionIndex(producerIndex).setOutputNum(0))
+            .setNumOutputs(1)
+            .setUserFn(spec))
+        .setOutputs(Arrays.asList(
+            new InstructionOutput()
+            .setName("par_do_output")
+            .setCodec(WindowedValue.getFullCoder(outputCoder, IntervalWindow.getCoder())
+                                   .asCloudObject())));
+  }
+
+  private ParallelInstruction makeSinkInstruction(Coder coder, int producerIndex) {
+    CloudObject spec = CloudObject.forClass(WindmillSink.class);
+    addString(spec, "stream_id", "out");
+    return new ParallelInstruction()
+        .setSystemName("sink")
+        .setWrite(new WriteInstruction()
+            .setInput(
+                new InstructionInput().setProducerInstructionIndex(producerIndex).setOutputNum(0))
+            .setSink(new Sink()
+                .setSpec(spec)
+                .setCodec(WindowedValue.getFullCoder(coder, IntervalWindow.getCoder())
+                                       .asCloudObject())));
+  }
+
+  private MapTask makeMapTask(List<ParallelInstruction> instructions) {
+    return new MapTask()
+        .setStageName("computation")
+        .setSystemName("computation")
+        .setInstructions(instructions);
+  }
+
+  private Windmill.GetWorkResponse buildTimerInput(String input) throws Exception {
+    Windmill.GetWorkResponse.Builder builder = Windmill.GetWorkResponse.newBuilder();
+    TextFormat.merge(input, builder);
+    return builder.build();
+  }
+
+  private Windmill.GetWorkResponse buildInput(String input, byte[] metadata) throws Exception {
+    Windmill.GetWorkResponse.Builder builder = Windmill.GetWorkResponse.newBuilder();
+    TextFormat.merge(input, builder);
+    Windmill.InputMessageBundle.Builder messageBundleBuilder =
+        builder.getWorkBuilder(0)
+        .getWorkBuilder(0)
+        .getMessageBundlesBuilder(0);
+    for (Windmill.Message.Builder messageBuilder : messageBundleBuilder.getMessagesBuilderList()) {
+      messageBuilder.setMetadata(ByteString.copyFrom(metadata));
+    }
+    return builder.build();
+  }
+
+  private Windmill.GetDataResponse buildData(String input) throws Exception {
+    Windmill.GetDataResponse.Builder builder = Windmill.GetDataResponse.newBuilder();
+    TextFormat.merge(input, builder);
+    return builder.build();
+  }
+
+  private Windmill.GetWorkResponse makeInput(int index, long timestamp) throws Exception {
+    return buildInput(
+        "work {" +
+        "  computation_id: \"computation\"" +
+        "  work {" +
+        "    key: \"key\"" +
+        "    work_token: " + index +
+        "    message_bundles {" +
+        "      source_computation_id: \"upstream\"" +
+        "      messages {" +
+        "        timestamp: " + timestamp +
+        "        data: \"data" + index + "\"" +
+        "      }" +
+        "    }" +
+        "  }" +
+        "}",
+        CoderUtils.encodeToByteArray(
+            CollectionCoder.of(IntervalWindow.getCoder()), Arrays.asList(DEFAULT_WINDOW)));
+  }
+
+  private Windmill.WorkItemCommitRequest buildExpectedOutput(String output) throws Exception {
+    Windmill.WorkItemCommitRequest.Builder builder = Windmill.WorkItemCommitRequest.newBuilder();
+    TextFormat.merge(output, builder);
+    return builder.build();
+  }
+
+  private Windmill.WorkItemCommitRequest buildExpectedOutput(String output, byte[] metadata)
+      throws Exception {
+    Windmill.WorkItemCommitRequest.Builder builder = Windmill.WorkItemCommitRequest.newBuilder();
+    TextFormat.merge(output, builder);
+    builder.getOutputMessagesBuilder(0)
+           .getBundlesBuilder(0)
+           .getMessagesBuilder(0)
+           .setMetadata(ByteString.copyFrom(metadata));
+    return builder.build();
+  }
+
+  private Windmill.WorkItemCommitRequest makeExpectedOutput(int index, long timestamp, String key)
+      throws Exception {
+    return buildExpectedOutput(
+        "key: \"key\" " +
+        "work_token: " + index + " " +
+        "output_messages {" +
+        "  destination_stream_id: \"out\"" +
+        "  bundles {" +
+        "    key: \"" + key + "\"" +
+        "    messages {" +
+        "      timestamp: " + timestamp +
+        "      data: \"data" + index + "\"" +
+        "      metadata: \"\"" +
+        "    }" +
+        "  }" +
+        "}",
+        CoderUtils.encodeToByteArray(CollectionCoder.of(IntervalWindow.getCoder()),
+                                     Arrays.asList(DEFAULT_WINDOW)));
+  }
+
+  private DataflowPipelineOptions createTestingPipelineOptions() {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setAppName("StreamingWorkerHarnessTest");
+    options.setStreaming(true);
+    return options;
+  }
+
+  private Windmill.WorkItemCommitRequest stripCounters(Windmill.WorkItemCommitRequest request) {
+    return Windmill.WorkItemCommitRequest.newBuilder(request).clearCounterUpdates().build();
+  }
+
+  private Windmill.WorkItemCommitRequest stripProcessingTimeCounters(
+      Windmill.WorkItemCommitRequest request) {
+    Windmill.WorkItemCommitRequest.Builder builder =
+        Windmill.WorkItemCommitRequest.newBuilder(request).clearCounterUpdates();
+    TreeMap<String, Windmill.Counter> sortedCounters = new TreeMap<>();
+    for (Windmill.Counter counter : request.getCounterUpdatesList()) {
+      String name = counter.getName();
+      if (!(name.startsWith("computation-") && name.endsWith("-msecs"))) {
+        sortedCounters.put(name, counter);
+      }
+    }
+    for (Windmill.Counter counter : sortedCounters.values()) {
+      builder.addCounterUpdates(counter);
+    }
+    return builder.build();
+  }
+
+
+  @Test public void testBasicHarness() throws Exception {
+    List<ParallelInstruction> instructions = Arrays.asList(
+        makeSourceInstruction(StringUtf8Coder.of()),
+        makeSinkInstruction(StringUtf8Coder.of(), 0));
+
+    FakeWindmillServer server = new FakeWindmillServer();
+    StreamingDataflowWorker worker = new StreamingDataflowWorker(
+        Arrays.asList(makeMapTask(instructions)), server, createTestingPipelineOptions());
+    worker.start();
+
+    final int numIters = 2000;
+    for (int i = 0; i < numIters; ++i) {
+      server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
+    }
+
+    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(numIters);
+    worker.stop();
+
+    for (int i = 0; i < numIters; ++i) {
+      Assert.assertTrue(result.containsKey((long) i));
+      Assert.assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i), "key"),
+                          stripCounters(result.get((long) i)));
+    }
+  }
+
+  static class ChangeKeysFn extends DoFn<KV<String, String>, KV<String, String>> {
+    @Override
+    public void processElement(ProcessContext c) {
+      KV<String, String> elem = c.element();
+      c.output(KV.of(elem.getKey() + "_" + elem.getValue(), elem.getValue()));
+    }
+  }
+
+  @Test public void testKeyChange() throws Exception {
+    KvCoder<String, String> kvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
+
+    List<ParallelInstruction> instructions = Arrays.asList(
+        makeSourceInstruction(kvCoder),
+        makeDoFnInstruction(new ChangeKeysFn(), 0, kvCoder),
+        makeSinkInstruction(kvCoder, 1));
+
+    FakeWindmillServer server = new FakeWindmillServer();
+    server.addWorkToOffer(makeInput(0, 0));
+    server.addWorkToOffer(makeInput(1, TimeUnit.MILLISECONDS.toMicros(1)));
+    StreamingDataflowWorker worker = new StreamingDataflowWorker(
+        Arrays.asList(makeMapTask(instructions)), server, createTestingPipelineOptions());
+    worker.start();
+
+    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(2);
+
+    Assert.assertEquals(makeExpectedOutput(0, 0, "key_data0"), stripCounters(result.get(0L)));
+    Assert.assertEquals(makeExpectedOutput(1, TimeUnit.MILLISECONDS.toMicros(1), "key_data1"),
+                        stripCounters(result.get(1L)));
+  }
+
+  static class TestStateFn extends DoFn<KV<String, String>, KV<String, String>>
+      implements DoFn.RequiresKeyedState {
+    @Override
+    public void processElement(ProcessContext c) {
+      try {
+        CodedTupleTag<String> stateTag = CodedTupleTag.of("state", StringUtf8Coder.of());
+        CodedTupleTag<String> emptyStateTag =
+            CodedTupleTag.of("other_state", StringUtf8Coder.of());
+        CodedTupleTagMap result =
+            c.keyedState().lookup(Arrays.asList(stateTag, emptyStateTag));
+        Assert.assertNull(result.get(emptyStateTag));
+        String state = result.get(stateTag);
+        state += "-" + c.element().getValue();
+        c.keyedState().store(CodedTupleTag.of("state", StringUtf8Coder.of()), state);
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+  }
+
+  @Test public void testState() throws Exception {
+    KvCoder<String, String> kvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
+
+    List<ParallelInstruction> instructions = Arrays.asList(
+        makeSourceInstruction(kvCoder),
+        makeDoFnInstruction(new TestStateFn(), 0, kvCoder),
+        makeSinkInstruction(kvCoder, 1));
+
+    FakeWindmillServer server = new FakeWindmillServer();
+    StreamingDataflowWorker worker = new StreamingDataflowWorker(
+        Arrays.asList(makeMapTask(instructions)), server, createTestingPipelineOptions());
+    worker.start();
+
+    server.addDataToOffer(buildData(
+        "data {" +
+        "  computation_id: \"computation\"" +
+        "  data {" +
+        "    key: \"key0\"" +
+        "    values {" +
+        "      tag: \"5:parDostate\"" +
+        "      value {" +
+        "        timestamp: 0" +
+        "        data: \"key0\"" +
+        "      }" +
+        "    }" +
+        "  }" +
+        "}"));
+    server.addDataToOffer(buildData(
+        "data {" +
+        "  computation_id: \"computation\"" +
+        "  data {" +
+        "    key: \"key0\"" +
+        "    values {" +
+        "      tag: \"5:Stagestate\"" +
+        "      value {" +
+        "        timestamp: 1" +
+        "        data: \"key0\"" +
+        "      }" +
+        "    }" +
+        "  }" +
+        "}"));
+
+    server.addWorkToOffer(buildInput(
+        "work {" +
+        "  computation_id: \"computation\"" +
+        "  work {" +
+        "    key: \"key0\"" +
+        "    work_token: 0" +
+        "    message_bundles {" +
+        "      source_computation_id: \"upstream\"" +
+        "      messages {" +
+        "        timestamp: 0" +
+        "        data: \"0\"" +
+        "      }" +
+        "      messages {" +
+        "        timestamp: 1" +
+        "        data: \"1\"" +
+        "      }" +
+        "    }" +
+        "  }" +
+        "}",
+        CoderUtils.encodeToByteArray(CollectionCoder.of(IntervalWindow.getCoder()),
+                                     Arrays.asList(DEFAULT_WINDOW))));
+
+    server.waitForAndGetCommits(1);
+
+    server.addDataToOffer(buildData(
+        "data {" +
+        "  computation_id: \"computation\"" +
+        "  data {" +
+        "    key: \"key1\"" +
+        "    values {" +
+        "      tag: \"5:parDostate\"" +
+        "      value {" +
+        "        timestamp: 0" +
+        "        data: \"key1\"" +
+        "      }" +
+        "    }" +
+        "  }" +
+        "}"));
+
+    server.addWorkToOffer(buildInput(
+        "work {" +
+        "  computation_id: \"computation\"" +
+        "  work {" +
+        "    key: \"key1\"" +
+        "    work_token: 1" +
+        "    message_bundles {" +
+        "      source_computation_id: \"upstream\"" +
+        "      messages {" +
+        "        timestamp: 2" +
+        "        data: \"2\"" +
+        "      }" +
+        "    }" +
+        "  }" +
+        "}",
+        CoderUtils.encodeToByteArray(CollectionCoder.of(IntervalWindow.getCoder()),
+            Arrays.asList(DEFAULT_WINDOW))));
+
+    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
+
+    Assert.assertEquals(buildExpectedOutput(
+        "key: \"key0\" " +
+        "work_token: 0 " +
+        "value_updates {" +
+        "  tag: \"5:parDostate\"" +
+        "  value {" +
+        "    timestamp: 9223372036854775807" +
+        "    data: \"key0-0-1\"" +
+        "  }" +
+        "} " +
+        "counter_updates {" +
+        "  name: \"par_do_output-ElementCount\"" +
+        "  kind: SUM" +
+        "} " +
+        "counter_updates {" +
+        "  name: \"read_output-ElementCount\"" +
+        "  kind: SUM" +
+        "  int_scalar: 2" +
+        "} " +
+        "counter_updates {" +
+        "  name: \"read_output-MeanByteCount\"" +
+        "  kind: MEAN" +
+        "  int_scalar: 70" +
+        "  mean_count: 2" +
+        "} " +
+        "counter_updates {" +
+        "  name: \"sink-ByteCount\"" +
+        "  kind: SUM" +
+        "}" +
+        "counter_updates {" +
+        "  name: \"source-ByteCount\"" +
+        "  kind: SUM" +
+        "  int_scalar: 10" +
+        "} "),
+        stripProcessingTimeCounters(result.get(0L)));
+
+    Assert.assertEquals(buildExpectedOutput(
+        "key: \"key1\" " +
+        "work_token: 1 " +
+        "value_updates {" +
+        "  tag: \"5:parDostate\"" +
+        "  value {" +
+        "    timestamp: 9223372036854775807" +
+        "    data: \"key1-2\"" +
+        "  }" +
+        "}" +
+        "counter_updates {" +
+        "  name: \"par_do_output-ElementCount\"" +
+        "  kind: SUM" +
+        "} " +
+        "counter_updates {" +
+        "  name: \"read_output-ElementCount\"" +
+        "  kind: SUM" +
+        "  int_scalar: 1" +
+        "} " +
+        "counter_updates {" +
+        "  name: \"read_output-MeanByteCount\"" +
+        "  kind: MEAN" +
+        "  int_scalar: 35" +
+        "  mean_count: 1" +
+        "} " +
+        "counter_updates {" +
+        "  name: \"sink-ByteCount\"" +
+        "  kind: SUM" +
+        "} " +
+        "counter_updates {" +
+        "  name: \"source-ByteCount\"" +
+        "  kind: SUM" +
+        "  int_scalar: 5" +
+        "} "),
+        stripProcessingTimeCounters(result.get(1L)));
+  }
+
+  static class TestExceptionFn extends DoFn<String, String> {
+    @Override
+    public void processElement(ProcessContext c) throws Exception {
+      try {
+        throw new Exception("Exception!");
+      } catch (Exception e) {
+        throw new Exception("Another exception!", e);
+      }
+    }
+  }
+
+  @Test public void testExceptions() throws Exception {
+    List<ParallelInstruction> instructions = Arrays.asList(
+        makeSourceInstruction(StringUtf8Coder.of()),
+        makeDoFnInstruction(new TestExceptionFn(), 0, StringUtf8Coder.of()),
+        makeSinkInstruction(StringUtf8Coder.of(), 1));
+
+    FakeWindmillServer server = new FakeWindmillServer();
+    server.addWorkToOffer(buildInput(
+        "work {" +
+        "  computation_id: \"computation\"" +
+        "  work {" +
+        "    key: \"key0\"" +
+        "    work_token: 0" +
+        "    message_bundles {" +
+        "      source_computation_id: \"upstream\"" +
+        "      messages {" +
+        "        timestamp: 0" +
+        "        data: \"0\"" +
+        "      }" +
+        "    }" +
+        "  }" +
+        "}",
+        CoderUtils.encodeToByteArray(CollectionCoder.of(IntervalWindow.getCoder()),
+                                     Arrays.asList(DEFAULT_WINDOW))));
+
+    StreamingDataflowWorker worker = new StreamingDataflowWorker(
+        Arrays.asList(makeMapTask(instructions)), server, createTestingPipelineOptions());
+    worker.start();
+
+    Windmill.Exception exception = server.getException();
+
+    Assert.assertThat(exception.getStackFrames(0),
+        JUnitMatchers.containsString("Another exception!"));
+    Assert.assertThat(exception.getStackFrames(1),
+        JUnitMatchers.containsString("processElement"));
+    Assert.assertTrue(exception.hasCause());
+
+    Assert.assertThat(exception.getCause().getStackFrames(0),
+        JUnitMatchers.containsString("Exception!"));
+    Assert.assertThat(exception.getCause().getStackFrames(1),
+        JUnitMatchers.containsString("processElement"));
+    Assert.assertFalse(exception.getCause().hasCause());
+  }
+
+  private static class TestTimerFn
+      extends AssignWindowsDoFn<KV<String, String>, BoundedWindow> {
+    public TestTimerFn() {
+      super(null);
+    }
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(KV.of("key0", Long.toString(c.timestamp().getMillis())));
+    }
+  }
+
+  @Test public void testTimers() throws Exception {
+    KvCoder<String, String> kvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
+
+    List<ParallelInstruction> instructions = Arrays.asList(
+        makeWindowingSourceInstruction(kvCoder),
+        makeDoFnInstruction(new TestTimerFn(), 0, kvCoder),
+        makeSinkInstruction(kvCoder, 1));
+
+    FakeWindmillServer server = new FakeWindmillServer();
+
+    server.addWorkToOffer(buildTimerInput(
+        "work {" +
+        "  computation_id: \"computation\"" +
+        "  work {" +
+        "    key: \"key0\"" +
+        "    work_token: 0" +
+        "    timers {" +
+        "      timers {" +
+        "        tag: \"tag\"" +
+        "        timestamp: 3000" +
+        "      }" +
+        "    }" +
+        "  }" +
+        "}"));
+
+    StreamingDataflowWorker worker = new StreamingDataflowWorker(
+        Arrays.asList(makeMapTask(instructions)), server, createTestingPipelineOptions());
+    worker.start();
+
+    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
+
+    Assert.assertEquals(buildExpectedOutput(
+        "key: \"key0\" " +
+        "work_token: 0 " +
+        "output_messages {" +
+        "  destination_stream_id: \"out\"" +
+        "  bundles {" +
+        "    key: \"key0\"" +
+        "    messages {" +
+        "      timestamp: 3000" +
+        "      data: \"3\"" +
+        "    }" +
+        "  }" +
+        "} ",
+        CoderUtils.encodeToByteArray(CollectionCoder.of(IntervalWindow.getCoder()),
+                                     new ArrayList())),
+        stripCounters(result.get(0L)));
+  }
+
+  @Test public void testAssignWindows() throws Exception {
+    Duration gapDuration = Duration.standardSeconds(1);
+    CloudObject spec = CloudObject.forClassName("AssignWindowsDoFn");
+    addString(spec, PropertyNames.SERIALIZED_FN,
+        StringUtils.byteArrayToJsonString(
+            SerializableUtils.serializeToByteArray(FixedWindows.of(gapDuration))));
+
+    ParallelInstruction addWindowsInstruction =
+        new ParallelInstruction()
+        .setSystemName("AssignWindows")
+        .setParDo(new ParDoInstruction()
+            .setInput(new InstructionInput().setProducerInstructionIndex(0).setOutputNum(0))
+            .setNumOutputs(1)
+            .setUserFn(spec))
+        .setOutputs(Arrays.asList(new InstructionOutput()
+                .setName("output")
+                .setCodec(WindowedValue.getFullCoder(StringUtf8Coder.of(),
+                                                     IntervalWindow.getCoder()).asCloudObject())));
+
+    List<ParallelInstruction> instructions = Arrays.asList(
+        makeSourceInstruction(StringUtf8Coder.of()),
+        addWindowsInstruction,
+        makeSinkInstruction(StringUtf8Coder.of(), 1));
+
+    FakeWindmillServer server = new FakeWindmillServer();
+
+    server.addWorkToOffer(makeInput(0, 0));
+    server.addWorkToOffer(makeInput(1000000, 1000000));
+
+    StreamingDataflowWorker worker = new StreamingDataflowWorker(
+        Arrays.asList(makeMapTask(instructions)), server, createTestingPipelineOptions());
+    worker.start();
+
+    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(2);
+
+    Assert.assertEquals(buildExpectedOutput(
+        "key: \"key\" " +
+        "work_token: 0 " +
+        "output_messages {" +
+        "  destination_stream_id: \"out\"" +
+        "  bundles {" +
+        "    key: \"key\"" +
+        "    messages {" +
+        "      timestamp: 0" +
+        "      data: \"data0\"" +
+        "    }" +
+        "  }" +
+        "} ",
+        CoderUtils.encodeToByteArray(
+            CollectionCoder.of(IntervalWindow.getCoder()),
+            Arrays.asList(new IntervalWindow(new Instant(0), new Instant(1000))))),
+        stripCounters(result.get(0L)));
+
+    Windmill.WorkItemCommitRequest.Builder expected = buildExpectedOutput(
+        "key: \"key\" " +
+        "work_token: 1000000 " +
+        "output_messages {" +
+        "  destination_stream_id: \"out\"" +
+        "  bundles {" +
+        "    key: \"key\"" +
+        "    messages {" +
+        "      timestamp: 1000000" +
+        "      data: \"data1000000\"" +
+        "    }" +
+        "  }" +
+        "} ",
+        CoderUtils.encodeToByteArray(
+            CollectionCoder.of(IntervalWindow.getCoder()),
+            Arrays.asList(new IntervalWindow(new Instant(1000), new Instant(2000))))).toBuilder();
+
+    Assert.assertEquals(expected.build(), stripCounters(result.get(1000000L)));
+  }
+
+  @Test public void testMergeWindows() throws Exception {
+    Coder<KV<String, String>> kvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
+    Coder<WindowedValue<KV<String, String>>> windowedKvCoder =
+        FullWindowedValueCoder.of(kvCoder, IntervalWindow.getCoder());
+    KvCoder<String, List<String>> groupedCoder =
+        KvCoder.of(StringUtf8Coder.of(), ListCoder.of(StringUtf8Coder.of()));
+    Coder<WindowedValue<KV<String, List<String>>>> windowedGroupedCoder =
+        FullWindowedValueCoder.of(groupedCoder, IntervalWindow.getCoder());
+
+    CloudObject spec = CloudObject.forClassName("MergeWindowsDoFn");
+    addString(spec, PropertyNames.SERIALIZED_FN,
+        StringUtils.byteArrayToJsonString(
+            SerializableUtils.serializeToByteArray(FixedWindows.of(Duration.standardSeconds(1)))));
+    addObject(spec, PropertyNames.INPUT_CODER, windowedKvCoder.asCloudObject());
+
+    ParallelInstruction mergeWindowsInstruction =
+        new ParallelInstruction()
+        .setSystemName("MergeWindows")
+        .setParDo(new ParDoInstruction()
+            .setInput(new InstructionInput().setProducerInstructionIndex(0).setOutputNum(0))
+            .setNumOutputs(1)
+            .setUserFn(spec))
+        .setOutputs(Arrays.asList(new InstructionOutput()
+                .setName("output")
+                .setCodec(windowedGroupedCoder.asCloudObject())));
+
+    List<ParallelInstruction> instructions = Arrays.asList(
+        makeWindowingSourceInstruction(kvCoder),
+        mergeWindowsInstruction,
+        makeSinkInstruction(groupedCoder, 1));
+
+    FakeWindmillServer server = new FakeWindmillServer();
+
+    StreamingDataflowWorker worker = new StreamingDataflowWorker(
+        Arrays.asList(makeMapTask(instructions)), server, createTestingPipelineOptions());
+    worker.start();
+
+    server.addWorkToOffer(buildInput(
+        "work {" +
+        "  computation_id: \"computation\"" +
+        "  work {" +
+        "    key: \"key\"" +
+        "    work_token: 0" +
+        "    message_bundles {" +
+        "      source_computation_id: \"upstream\"" +
+        "      messages {" +
+        "        timestamp: 0" +
+        "        data: \"data0\"" +
+        "      }" +
+        "    }" +
+        "  }" +
+        "}",
+        CoderUtils.encodeToByteArray(
+            CollectionCoder.of(IntervalWindow.getCoder()),
+            Arrays.asList(new IntervalWindow(new Instant(0), new Instant(1000))))));
+
+    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
+
+    Assert.assertEquals(buildExpectedOutput(
+        "key: \"key\" " +
+        "work_token: 0 " +
+        "output_timers {" +
+        "  tag: \"gAAAAAAAAAA=\"" +
+        "  timestamp: 999000" +
+        "} " +
+        "list_updates {" +
+        "  tag: \"12:MergeWindowsbuffer:gAAAAAAAAAA=\"" +
+        "    values {" +
+        "    timestamp: 0" +
+        "    data: \"data0\"" +
+        "  }" +
+        "}"),
+        stripCounters(result.get(0L)));
+
+    server.addWorkToOffer(buildTimerInput(
+        "work {" +
+        "  computation_id: \"computation\"" +
+        "  work {" +
+        "    key: \"key\"" +
+        "    work_token: 1" +
+        "    timers {" +
+        "      timers {" +
+        "        tag: \"gAAAAAAAAAA=\"" +
+        "        timestamp: 999000" +
+        "      }" +
+        "    }" +
+        "  }" +
+        "}"));
+    server.addDataToOffer(buildData(
+        "data {" +
+        "  computation_id: \"computation\"" +
+        "  data {" +
+        "    key: \"key\"" +
+        "    lists {" +
+        "      tag: \"12:MergeWindowsbuffer:gAAAAAAAAAA=\"" +
+        "      values {" +
+        "        timestamp: 0" +
+        "        data: \"data0\"" +
+        "      }" +
+        "    }" +
+        "  }" +
+        "}"));
+
+    result = server.waitForAndGetCommits(1);
+
+    Assert.assertEquals(buildExpectedOutput(
+        "key: \"key\" " +
+        "work_token: 1 " +
+        "output_messages {" +
+        "  destination_stream_id: \"out\"" +
+        "  bundles {" +
+        "    key: \"key\"" +
+        "    messages {" +
+        "      timestamp: 999000" +
+        "      data: \"\000\000\000\001\005data0\"" +
+        "    }" +
+        "  }" +
+        "} " +
+        "list_updates {" +
+        "  tag: \"12:MergeWindowsbuffer:gAAAAAAAAAA=\"" +
+        "  end_timestamp: 9223372036854775807" +
+        "}",
+        CoderUtils.encodeToByteArray(
+            CollectionCoder.of(IntervalWindow.getCoder()),
+            Arrays.asList(new IntervalWindow(new Instant(0), new Instant(1000))))),
+        stripCounters(result.get(1L)));
+  }
+}

From 22ce583ee784e045c987312baa861ed1ab9b5cad Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Thu, 5 Feb 2015 14:58:25 -0800
Subject: [PATCH 0123/1541] Update .travis.yml

Try to enable testing with multiple JDKs on Linux platform using Travis, while also supporting multiple operating systems. This is a work-around of the following Travis issue: https://github.com/travis-ci/travis-ci/issues/2317.
---
 .travis.yml | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index c860c7cfe55bb..ed72aff6253c4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,9 +1,5 @@
 language: java
 
-os:
-  - linux
-  - osx
-
 notifications:
   email:
     recipients:
@@ -11,8 +7,29 @@ notifications:
     on_success: change
     on_failure: always
 
+os:
+  - linux
+  - osx
+
+env:
+  matrix:
+    - CUSTOM_JDK="oraclejdk8"
+    - CUSTOM_JDK="oraclejdk7"
+    - CUSTOM_JDK="openjdk7"
+
+matrix:
+  exclude:
+     # On OSX, run with default JDK only.
+     - os: osx
+       env: CUSTOM_JDK="oraclejdk8"
+     - os: osx
+       env: CUSTOM_JDK="oraclejdk7"
+     - os: osx
+       env: CUSTOM_JDK="openjdk7"
+
 before_install:
   - if [ "$TRAVIS_OS_NAME" == "osx" ]; then export JAVA_HOME=$(/usr/libexec/java_home); fi
+  - if [ "$TRAVIS_OS_NAME" == "linux" ]; then jdk_switcher use "CUSTOM_JDK"; fi
 
 install:
   - mvn install clean -U -DskipTests=true

From 6618ea7f3a9eb2a4fd39623d158cc2bba6b3efda Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Thu, 5 Feb 2015 15:05:41 -0800
Subject: [PATCH 0124/1541] Update .travis.yml

* Fix an issue in .travis.yml where CUSTOM_JDK environment variable is not prefixed with a dollar sign.
* Fix an issue in .travis.yml where OSX doesn't have any JDK specified.
---
 .travis.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index ed72aff6253c4..4815f28e12944 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,6 +13,7 @@ os:
 
 env:
   matrix:
+    - CUSTOM_JDK="default"
     - CUSTOM_JDK="oraclejdk8"
     - CUSTOM_JDK="oraclejdk7"
     - CUSTOM_JDK="openjdk7"
@@ -26,10 +27,13 @@ matrix:
        env: CUSTOM_JDK="oraclejdk7"
      - os: osx
        env: CUSTOM_JDK="openjdk7"
+     # On Linux, run with specific JDKs only.
+     - os: linux
+       env: CUSTOM_JDK="default"
 
 before_install:
   - if [ "$TRAVIS_OS_NAME" == "osx" ]; then export JAVA_HOME=$(/usr/libexec/java_home); fi
-  - if [ "$TRAVIS_OS_NAME" == "linux" ]; then jdk_switcher use "CUSTOM_JDK"; fi
+  - if [ "$TRAVIS_OS_NAME" == "linux" ]; then jdk_switcher use "$CUSTOM_JDK"; fi
 
 install:
   - mvn install clean -U -DskipTests=true

From 4e37c016b43eea27d100e161eaa4cc5cd621c6be Mon Sep 17 00:00:00 2001
From: jlewi <jlewi@google.com>
Date: Wed, 4 Feb 2015 23:17:33 -0800
Subject: [PATCH 0125/1541] Fix a bug that was causing the
 BlockingDataflowRunner to exit before printing out some messages. We need to
 make sure that we don't drop messages that arrive in between when list job
 messages is called and when we get the status of the job. To fix this we
 change the order of operations. 1. Get the status of the job. 2. List all
 messages. 3. Check if the status of the job is done; importantly we use the
 result of GetJob from step 1. This ensures that we always call list messages
 after determining that the job is done.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85595332
---
 .../sdk/runners/DataflowPipelineJob.java      | 36 ++++++++++---------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
index c1facb0288b84..c1e18cd63eaee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
@@ -118,9 +118,18 @@ public JobState waitToFinish(
     int errorGettingMessages = 0;
     int errorGettingJobStatus = 0;
     while (true) {
-      if (System.currentTimeMillis() >= endTime) {
-        // Timed out.
-        return null;
+      // Get the state of the job before listing messages. This ensures we always fetch job
+      // messages after the job finishes to ensure we have all them.
+      Job job = null;
+      try {
+        job = dataflowClient.v1b3().projects().jobs().get(project, jobId).execute();
+      } catch (GoogleJsonResponseException | SocketTimeoutException e) {
+        if (++errorGettingJobStatus > 5) {
+          // We want to continue to wait for the job to finish so
+          // we ignore this error, but warn occasionally if it keeps happening.
+          LOG.warn("There were problems getting job status: ", e);
+          errorGettingJobStatus = 0;
+        }
       }
 
       if (messageHandler != null) {
@@ -145,19 +154,14 @@ public JobState waitToFinish(
       }
 
       // Check if the job is done.
-      try {
-        Job job = dataflowClient.v1b3().projects().jobs().get(project, jobId).execute();
-        JobState state = JobState.toState(job.getCurrentState());
-        if (state.isTerminal()) {
-          return state;
-        }
-      } catch (GoogleJsonResponseException | SocketTimeoutException e) {
-        if (++errorGettingJobStatus > 5) {
-          // We want to continue to wait for the job to finish so
-          // we ignore this error, but warn occasionally if it keeps happening.
-          LOG.warn("There were problems getting job status: ", e);
-          errorGettingJobStatus = 0;
-        }
+      JobState state = JobState.toState(job.getCurrentState());
+      if (state.isTerminal()) {
+        return state;
+      }
+
+      if (System.currentTimeMillis() >= endTime) {
+        // Timed out.
+        return null;
       }
 
       // Job not yet done.  Wait a little, then check again.

From adca9145fbb69cdfcf0dc62de34a8b7fc6eae9a3 Mon Sep 17 00:00:00 2001
From: malo <malo@google.com>
Date: Thu, 5 Feb 2015 10:20:31 -0800
Subject: [PATCH 0126/1541] Fix NullPointerException that happens when an
 exception is raised while looking for the job. [] ------------- Created by
 MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=85633961

---
 .../cloud/dataflow/sdk/runners/DataflowPipelineJob.java   | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
index c1e18cd63eaee..a2fe55255fe3d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
@@ -154,9 +154,11 @@ public JobState waitToFinish(
       }
 
       // Check if the job is done.
-      JobState state = JobState.toState(job.getCurrentState());
-      if (state.isTerminal()) {
-        return state;
+      if (job != null) {
+        JobState state = JobState.toState(job.getCurrentState());
+        if (state.isTerminal()) {
+          return state;
+        }
       }
 
       if (System.currentTimeMillis() >= endTime) {

From 0193099ac550ec8963a61357df5a7d2ee214ee84 Mon Sep 17 00:00:00 2001
From: samuelw <samuelw@google.com>
Date: Thu, 5 Feb 2015 13:18:57 -0800
Subject: [PATCH 0127/1541] Add failed response to ReportStats. []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=85651192

---
 sdk/src/main/proto/windmill.proto | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index 88aa796417143..2135ce53be263 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -211,4 +211,6 @@ message ReportStatsRequest {
   repeated Exception exceptions = 4;
 }
 
-message ReportStatsResponse {}
+message ReportStatsResponse {
+  optional bool failed = 1;
+}

From 3872228bde72de38b3b891e0d595cee59a15c2d2 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Thu, 5 Feb 2015 14:41:37 -0800
Subject: [PATCH 0128/1541] Logging which stop position was unreported, when
 tearing down a task which has an unreported stop position. [] -------------
 Created by MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=85659485

---
 .../dataflow/sdk/util/common/worker/WorkProgressUpdater.java   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
index b50afb4b5f886..4794fe3efb2ec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
@@ -126,7 +126,8 @@ public void stopReportingProgress() throws Exception {
 
     // We send a final progress report in case there was an unreported stop position update.
     if (stopPositionToService != null) {
-      LOG.info("Sending final progress update with unreported stop position.");
+      LOG.info("Sending final progress update with unreported stop position: {} "
+          + "for work item: {}", stopPositionToService, workString());
       reportProgressHelper(); // This call can fail with an exception
     }
 

From e5c3a0d33b4e291d0ce8ae1b7530c7996af59b36 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Thu, 5 Feb 2015 16:06:11 -0800
Subject: [PATCH 0129/1541] Dataflow JavaDoc: fix typo (vary --> very).

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85667847
---
 .../java/com/google/cloud/dataflow/sdk/transforms/ParDo.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index f61b197bc58a3..dee5096f83613 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -425,7 +425,7 @@
  * <p> The Google Cloud Dataflow service applies fusion as much as
  * possible, greatly reducing the cost of executing pipelines.  As a
  * result, it is essentially "free" to write ParDo operations in a
- * vary modular, composable style, each ParDo operation doing one
+ * very modular, composable style, each ParDo operation doing one
  * clear task, and stringing together sequences of ParDo operations to
  * get the desired overall effect.  Such programs can be easier to
  * understand, easier to unit-test, easier to extend and evolve, and

From 6e8cfbc96405030ad65ee9a8aaac8e8a886c9b9f Mon Sep 17 00:00:00 2001
From: andersjohnson <andersjohnson@google.com>
Date: Fri, 6 Feb 2015 08:46:17 -0800
Subject: [PATCH 0130/1541] Allow actual output line ordering to vary.  Also,
 add more diagnostics in case of an output mismatch. [] ------------- Created
 by MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=85720217

---
 test_wordcount.sh | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/test_wordcount.sh b/test_wordcount.sh
index 699489c89b11c..45f5a6bd93625 100755
--- a/test_wordcount.sh
+++ b/test_wordcount.sh
@@ -34,12 +34,14 @@ function check_result_hash {
   local outfile_prefix=$2
   local expected=$3
 
-  local actual=$(md5sum $outfile_prefix-* | awk '{print $1}' || \
-    md5 -q $outfile_prefix-*) || exit 2  # OSX
+  local actual=$(sort $outfile_prefix-* | md5sum | awk '{print $1}' || \
+    sort $outfile_prefix-* | md5 -q) || exit 2  # OSX
   if [[ "$actual" != "$expected" ]]
   then
     echo "FAIL $name: Output hash mismatch.  Got $actual, expected $expected."
     PASS=""
+    echo "head hexdump of actual:"
+    head $outfile_prefix-* | hexdump -c
   else
     echo "pass $name"
     # Output files are left behind in /tmp
@@ -107,12 +109,12 @@ echo "Generating bundled JAR file" >&2
 mvn bundle:bundle -f $TOPDIR/pom.xml -pl examples
 check_for_jar_file
 
-run_all_ways wordcount1 "LICENSE" f4af56cd6f6f127536d586a6adcefba1
-run_all_ways wordcount2 "./LICENSE" f4af56cd6f6f127536d586a6adcefba1
-run_all_ways wordcount3 "$PWD/LICENSE" f4af56cd6f6f127536d586a6adcefba1
-run_all_ways wordcount4 "L*N?E*" f4af56cd6f6f127536d586a6adcefba1
-run_all_ways wordcount5 "./LICE*N?E" f4af56cd6f6f127536d586a6adcefba1
-run_all_ways wordcount6 "$PWD/*LIC?NSE" f4af56cd6f6f127536d586a6adcefba1
+run_all_ways wordcount1 "LICENSE" 9e164209cfe94043e3d64e6cc1c11a0c
+run_all_ways wordcount2 "./LICENSE" 9e164209cfe94043e3d64e6cc1c11a0c
+run_all_ways wordcount3 "$PWD/LICENSE" 9e164209cfe94043e3d64e6cc1c11a0c
+run_all_ways wordcount4 "L*N?E*" 9e164209cfe94043e3d64e6cc1c11a0c
+run_all_ways wordcount5 "./LICE*N?E" 9e164209cfe94043e3d64e6cc1c11a0c
+run_all_ways wordcount6 "$PWD/*LIC?NSE" 9e164209cfe94043e3d64e6cc1c11a0c
 
 if [[ ! "$PASS" ]]
 then

From 1d32572577e4dc290191e3c98a6c968f02ea4271 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 6 Feb 2015 10:49:37 -0800
Subject: [PATCH 0131/1541] Adds streaming Dataflow examples.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85730302
---
 .../dataflow/examples/PubsubFileInjector.java | 105 ++++++++++++
 .../examples/StreamingWordExtract.java        | 141 +++++++++++++++
 .../dataflow/examples/WindowingWordCount.java | 162 ++++++++++++++++++
 3 files changed, 408 insertions(+)
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java b/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
new file mode 100644
index 0000000000000..ac9c15e809949
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.api.services.pubsub.Pubsub;
+import com.google.api.services.pubsub.model.PublishRequest;
+import com.google.api.services.pubsub.model.PubsubMessage;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.StreamingOptions;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.RateLimiting;
+import com.google.cloud.dataflow.sdk.util.Transport;
+
+import java.io.IOException;
+
+/**
+ * A batch Dataflow pipeline for injecting a set of GCS files into
+ * a PubSub topic line by line.
+ *
+ * <p>  This is useful for testing streaming
+ * pipelines. Note that since batch pipelines might retry chunks, this
+ * does _not_ guarantee exactly-once injection of file data. Some lines may
+ * be published multiple times.
+ * </p>
+ */
+public class PubsubFileInjector {
+
+  /** A DoFn that publishes lines to Google Cloud PubSub. */
+  static class Publish extends DoFn<String, Void> {
+    private String outputTopic;
+    public transient Pubsub pubsub;
+
+    Publish(String outputTopic) {
+      this.outputTopic = outputTopic;
+    }
+
+    @Override
+    public void startBundle(Context context) {
+      StreamingOptions options =
+          context.getPipelineOptions().as(StreamingOptions.class);
+      this.pubsub = Transport.newPubsubClient(options).build();
+    }
+
+    @Override
+    public void processElement(ProcessContext c) throws IOException {
+      PubsubMessage pubsubMessage = new PubsubMessage();
+      pubsubMessage.encodeData(c.element().getBytes());
+      PublishRequest publishRequest = new PublishRequest();
+      publishRequest.setTopic(outputTopic).setMessage(pubsubMessage);
+      this.pubsub.topics().publish(publishRequest).execute();
+    }
+  }
+
+  /**
+   * Command line parameter options.
+   */
+  private interface PubsubFileInjectorOptions extends PipelineOptions {
+    @Description("GCS location of files.")
+    @Validation.Required
+    String getInput();
+    void setInput(String value);
+
+    @Description("Topic to publish on.")
+    @Validation.Required
+    String getOutputTopic();
+    void setOutputTopic(String value);
+  }
+
+  /**
+   * Sets up and starts streaming pipeline.
+   */
+  public static void main(String[] args) {
+    PubsubFileInjectorOptions options = PipelineOptionsFactory.fromArgs(args)
+        .withValidation()
+        .as(PubsubFileInjectorOptions.class);
+
+    Pipeline pipeline = Pipeline.create(options);
+
+    pipeline
+        .apply(TextIO.Read.from(options.getInput()))
+        .apply(RateLimiting.perWorker(new Publish(options.getOutputTopic()))
+            .withMaxParallelism(20));
+
+    pipeline.run();
+  }
+}
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java b/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java
new file mode 100644
index 0000000000000..2f8dc079991df
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+
+import java.util.ArrayList;
+
+/**
+ * A streaming Dataflow Example using BigQuery output.
+ *
+ * <p> This pipeline example lines of text from a PubSub topic, splits each line
+ * into individual words, capitalizes those words, and writes the output to
+ * a BigQuery table. </p>
+ *
+ * <p> To run this example using the Dataflow service, you must provide an input
+ * pubsub topic and an output BigQuery table, using the {@literal --inputTopic}
+ * {@literal --dataset} and {@literal --table} options. Since this is a streaming
+ * pipeline that never completes, select the non-blocking pipeline runner
+ * {@literal --runner=DataflowPipelineRunner}.
+ */
+public class StreamingWordExtract {
+
+  /** A DoFn that tokenizes lines of text into individual words. */
+  static class ExtractWords extends DoFn<String, String> {
+    @Override
+    public void processElement(ProcessContext c) {
+      String[] words = c.element().split("[^a-zA-Z']+");
+      for (String word : words) {
+        if (!word.isEmpty()) {
+          c.output(word);
+        }
+      }
+    }
+  }
+
+  /** A DoFn that uppercases a word. */
+  static class Uppercase extends DoFn<String, String> {
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(c.element().toUpperCase());
+    }
+  }
+
+  /**
+   * Converts strings into BigQuery rows.
+   */
+  static class StringToRowConverter extends DoFn<String, TableRow> {
+
+    /**
+     * In this example, put the whole string into single BigQuery field.
+     */
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(new TableRow().set("string_field", c.element()));
+    }
+
+    static TableSchema getSchema() {
+      return new TableSchema().setFields(new ArrayList<TableFieldSchema>() {
+            // Compose the list of TableFieldSchema from tableSchema.
+            {
+              add(new TableFieldSchema().setName("string_field").setType("STRING"));
+            }
+      });
+    }
+  }
+
+  /**
+   * Command line parameter options.
+   */
+  private interface StreamingWordExtractOptions extends PipelineOptions {
+    @Description("Input Pubsub topic")
+    @Validation.Required
+    String getInputTopic();
+    void setInputTopic(String value);
+
+    @Description("BigQuery dataset name")
+    @Validation.Required
+    String getDataset();
+    void setDataset(String value);
+
+    @Description("BigQuery table name")
+    @Validation.Required
+    String getTable();
+    void setTable(String value);
+  }
+
+  /**
+   * Sets up and starts streaming pipeline.
+   */
+  public static void main(String[] args) {
+    StreamingWordExtractOptions options = PipelineOptionsFactory.fromArgs(args)
+        .withValidation()
+        .as(StreamingWordExtractOptions.class);
+    DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
+    dataflowOptions.setStreaming(true);
+
+    Pipeline pipeline = Pipeline.create(options);
+
+    String tableSpec = new StringBuilder()
+        .append(dataflowOptions.getProject()).append(":")
+        .append(options.getDataset()).append(".")
+        .append(options.getTable())
+        .toString();
+    pipeline
+        .apply(PubsubIO.Read.topic(options.getInputTopic()))
+        .apply(ParDo.of(new ExtractWords()))
+        .apply(ParDo.of(new Uppercase()))
+        .apply(ParDo.of(new StringToRowConverter()))
+        .apply(BigQueryIO.Write.to(tableSpec)
+            .withSchema(StringToRowConverter.getSchema()));
+
+    pipeline.run();
+  }
+}
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
new file mode 100644
index 0000000000000..a8e9dde7e7092
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+/**
+ * An example that counts words in Shakespeare. For a detailed walkthrough of this
+ * example see:
+ *   https://cloud.google.com/dataflow/java-sdk/wordcount-example
+ *
+ * To execute this pipeline locally, specify general pipeline configuration:
+ *   --project=<PROJECT ID>
+ * and example configuration:
+ *   --output=[<LOCAL FILE> | gs://<OUTPUT PATH>]
+ *
+ * To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ *   --project=<PROJECT ID> --stagingLocation=gs://<STAGING DIRECTORY>
+ *   --runner=BlockingDataflowPipelineRunner
+ * and example configuration:
+ *   --output=gs://<OUTPUT PATH>
+ *
+ * The input file defaults to gs://dataflow-samples/shakespeare/kinglear.txt and can be
+ * overridden with --input.
+ */
+public class WindowingWordCount {
+
+  /** A DoFn that tokenizes lines of text into individual words with timestamp. */
+  static class ExtractWordsWithTimestampFn extends DoFn<String, String> {
+    @Override
+    public void processElement(ProcessContext c) {
+      String[] words = c.element().split("[^a-zA-Z']+");
+      for (String word : words) {
+        if (!word.isEmpty()) {
+          c.outputWithTimestamp(word, new Instant(System.currentTimeMillis()));
+        }
+      }
+    }
+  }
+
+  /** A DoFn that converts a Word and Count into a printable string. */
+  static class FormatCountsFn extends DoFn<KV<String, Long>, String> {
+    @Override
+    public void processElement(ProcessContext c) {
+      String output = "Element: " + c.element().getKey()
+          + " Value: " + c.element().getValue()
+          + " Timestamp: " + c.timestamp()
+          + " Windows: (" + c.windows() + ")";
+      c.output(output);
+    }
+  }
+
+  /**
+   * A PTransform that converts a PCollection containing lines of text into a PCollection of
+   * formatted word counts.
+   * <p>
+   * Although this pipeline fragment could be inlined, bundling it as a PTransform allows for easy
+   * reuse, modular testing, and an improved monitoring experience.
+   */
+  public static class CountWords extends PTransform<PCollection<String>, PCollection<String>> {
+    @Override
+    public PCollection<String> apply(PCollection<String> lines) {
+
+      // Convert lines of text into individual words.
+      PCollection<String> words = lines.apply(
+          ParDo.of(new ExtractWordsWithTimestampFn()));
+
+      PCollection<String> windowedWords = words.apply(
+          Window.<String>into(FixedWindows.of(Duration.millis(1))));
+
+      // Count the number of times each word occurs.
+      PCollection<KV<String, Long>> wordCounts =
+          windowedWords.apply(Count.<String>perElement());
+
+      // Format each word and count into a printable string.
+      PCollection<String> results = wordCounts.apply(
+          ParDo.of(new FormatCountsFn()));
+
+      return results;
+    }
+  }
+
+  private interface Options extends PipelineOptions {
+    @Description("Path of the file to read from")
+    @Default.String("gs://dataflow-samples/shakespeare/kinglear.txt")
+    String getInput();
+    void setInput(String value);
+
+    @Description("Path of the file to write to")
+    String getOutput();
+    void setOutput(String value);
+
+    /**
+     * By default (numShards == 0), the system will choose the shard count.
+     * Most programs will not need this option.
+     */
+    @Description("Number of output shards (0 if the system should choose automatically)")
+    @Default.Integer(0)
+    int getNumShards();
+    void setNumShard(int value);
+  }
+
+  private static String getOutputLocation(Options options) {
+    DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
+    if (options.getOutput() != null) {
+      return options.getOutput();
+    } else if (dataflowOptions.getStagingLocation() != null) {
+      return GcsPath.fromUri(dataflowOptions.getStagingLocation())
+          .resolve("counts.txt").toString();
+    } else {
+      throw new IllegalArgumentException("Must specify --output or --stagingLocation");
+    }
+  }
+
+
+
+  public static void main(String[] args) {
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+
+    Pipeline p = Pipeline.create(options);
+
+    p.apply(TextIO.Read.named("ReadLines").from(options.getInput()))
+     .apply(new CountWords())
+     .apply(TextIO.Write.named("WriteCounts")
+         .to(getOutputLocation(options))
+         .withNumShards(options.getNumShards()));
+
+    p.run();
+  }
+}

From 424c56df97be1b4b76cde18386a95b8a30a793f4 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Fri, 6 Feb 2015 12:42:59 -0800
Subject: [PATCH 0132/1541] Refactor: 1. skipping deleteTimer() is an
 optimization for Dataflow Streaming Runner. Move it from
 PartitionBufferingWindowSet to StreamingActiveWindowManager. 2. remove the
 extra poll() from GroupAlsoByWindowsDoFn (windows are removed by WindowSets).

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85740534
---
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  4 ++--
 .../sdk/util/PartitionBufferingWindowSet.java |  3 ++-
 .../util/StreamingGroupAlsoByWindowsDoFn.java | 22 +++++++++++--------
 3 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 26297d0eba974..84fa834824ab8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -351,10 +351,10 @@ public Instant nextTimestamp() {
     }
 
     /**
-     * Returns and removes the next window.
+     * Returns the next window.
      */
     public W getWindow() {
-      return windows.poll();
+      return windows.peek();
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
index 3c20b5f1f3cbc..885392cc27605 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
@@ -32,7 +32,7 @@
  * WindowSet itself is never exposed to user code, allowing
  * a much simpler (and cheaper) implementation.
  *
- * This WindowSet only works with {@link StreamingGroupAlsoByWindowsDoFn}.
+ * <p>This WindowSet only works with {@link StreamingGroupAlsoByWindowsDoFn}.
  */
 class PartitionBufferingWindowSet<K, V, W extends BoundedWindow>
     extends AbstractWindowSet<K, V, Iterable<V>, W> {
@@ -58,6 +58,7 @@ public void put(W window, V value) throws Exception {
   public void remove(W window) throws Exception {
     context.context.stepContext.deleteTagList(
         bufferTag(window, windowFn.windowCoder(), inputCoder));
+    activeWindowManager.removeWindow(window);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 9868b33a2db1c..a0aebe7538704 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -76,7 +76,7 @@ public void processElement(ProcessContext processContext) throws Exception {
       K key = element.getKey();
       VI value = element.getValue();
       AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(
-          key, context, new StreamingActiveWindowManager<>(context, windowFn.windowCoder()));
+          key, context, new StreamingActiveWindowManager<>(windowFn, context));
 
       for (BoundedWindow window : context.windows()) {
         windowSet.put((W) window, value);
@@ -86,8 +86,7 @@ public void processElement(ProcessContext processContext) throws Exception {
     } else {
       TimerOrElement<?> timer = context.element();
       AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(
-          (K) timer.key(), context, new StreamingActiveWindowManager<>(
-              context, windowFn.windowCoder()));
+          (K) timer.key(), context, new StreamingActiveWindowManager<>(windowFn, context));
 
       // Attempt to merge windows before emitting; that may remove the current window under
       // consideration.
@@ -106,26 +105,31 @@ public void processElement(ProcessContext processContext) throws Exception {
 
   private static class StreamingActiveWindowManager<W extends BoundedWindow>
       implements AbstractWindowSet.ActiveWindowManager<W> {
+    WindowFn<?, W> windowFn;
     DoFnProcessContext<?, ?> context;
-    Coder<W> coder;
 
     StreamingActiveWindowManager(
-        DoFnProcessContext<?, ?> context,
-        Coder<W> coder) {
+        WindowFn<?, W> windowFn,
+        DoFnProcessContext<?, ?> context) {
+      this.windowFn = windowFn;
       this.context = context;
-      this.coder = coder;
     }
 
     @Override
     public void addWindow(W window) throws IOException {
       context.context.stepContext.getExecutionContext().setTimer(
-          WindowUtils.windowToString(window, coder), window.maxTimestamp());
+          WindowUtils.windowToString(window, windowFn.windowCoder()), window.maxTimestamp());
     }
 
     @Override
     public void removeWindow(W window) throws IOException {
+      if (windowFn instanceof PartitioningWindowFn) {
+        // For PartitioningWindowFn, each window triggers exactly one timer.
+        // And, timers are automatically deleted once they are fired.
+        return;
+      }
       context.context.stepContext.getExecutionContext().deleteTimer(
-          WindowUtils.windowToString(window, coder));
+          WindowUtils.windowToString(window, windowFn.windowCoder()));
     }
   }
 }

From 1c34edf27fa5a23146838bbcc83d4fb10c8b083f Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Fri, 6 Feb 2015 14:12:42 -0800
Subject: [PATCH 0133/1541] Dataflow SDK: prepare a test protobuf for testing
 future Proto2Coder implementation.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85748245
---
 sdk/src/main/proto/README.md                  | 27 ++++++++++++
 .../proto/proto2_coder_test_messages.proto    | 43 +++++++++++++++++++
 2 files changed, 70 insertions(+)
 create mode 100644 sdk/src/main/proto/README.md
 create mode 100644 sdk/src/main/proto/proto2_coder_test_messages.proto

diff --git a/sdk/src/main/proto/README.md b/sdk/src/main/proto/README.md
new file mode 100644
index 0000000000000..ea299389a63b5
--- /dev/null
+++ b/sdk/src/main/proto/README.md
@@ -0,0 +1,27 @@
+## Protocol Buffers in Google Cloud Dataflow
+
+This directory contains the Protocol Buffer messages used in Google Cloud
+Dataflow.
+
+They aren't, however, used during the Maven build process, and are included here
+for completeness only. Instead, the following artifact on Maven Central contains
+the binary version of the generated code from these Protocol Buffers:
+
+    <dependency>
+      <groupId>com.google.cloud.dataflow</groupId>
+      <artifactId>google-cloud-dataflow-java-proto-library-all</artifactId>
+      <version>LATEST</version>
+    </dependency>
+
+Please follow this process for testing changes:
+
+* Make changes to the Protocol Buffer messages in this directory.
+* Use `protoc` to create a new Java library of the compiled generated code.
+* Install that Java binary into your local Maven repository.
+* Update SDK's `pom.xml` to pick up the newly installed library, instead of
+downloading it from Maven Central.
+
+Once the changes are ready for submission, please separate them into two
+commits. The first commit should update the Protocol Buffer messages only. After
+that, we need to update the generated artifact on Maven Central. Finally,
+changes that make use of the Protocol Buffer changes may be committed.
diff --git a/sdk/src/main/proto/proto2_coder_test_messages.proto b/sdk/src/main/proto/proto2_coder_test_messages.proto
new file mode 100644
index 0000000000000..56efd89c36ad5
--- /dev/null
+++ b/sdk/src/main/proto/proto2_coder_test_messages.proto
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/*
+ * Protocol Buffer messages used for testing Proto2Coder implementation.
+ */
+
+syntax = "proto2";
+
+package proto2_coder_test_messages;
+
+option java_package = "com.google.cloud.dataflow.sdk.coders";
+
+message MessageA {
+  optional string field1 = 1;
+  repeated MessageB field2 = 2;
+}
+
+message MessageB {
+  optional bool field1 = 1;
+}
+
+message MessageC {
+  extensions 100 to 105;
+}
+
+extend MessageC {
+  optional MessageA field1 = 101;
+  optional MessageB field2 = 102;
+}

From 6edf5729e7fdf0ab693e79d2e89e9012056ed535 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Fri, 6 Feb 2015 15:12:25 -0800
Subject: [PATCH 0134/1541] Dataflow SDK: update SDK's pom.xml to pick up a
 newer version of our proto-library artifact.

The newer version includes a windmill protobuf fix and test protobufs for Proto2Coder.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85753678
---
 sdk/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index f269e2bc70f6d..b3537af67fd96 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -203,7 +203,7 @@
     <dependency>
       <groupId>com.google.cloud.dataflow</groupId>
       <artifactId>google-cloud-dataflow-java-proto-library-all</artifactId>
-      <version>0.3.150203</version>
+      <version>0.3.150206</version>
     </dependency>
 
     <dependency>

From fa8b3e06d142535e0384393427d6276134888f9b Mon Sep 17 00:00:00 2001
From: samuelw <samuelw@google.com>
Date: Fri, 6 Feb 2015 15:23:41 -0800
Subject: [PATCH 0135/1541] Fix infinite retries of work on lease expiration in
 streaming mode. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=85754733

---
 .../worker/StreamingDataflowWorker.java       | 37 +++++++++++--------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 9234d31c7e7fa..07d58b89975be 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -381,14 +381,19 @@ private void process(
         t.printStackTrace();
         lastException.set(t);
         LOG.fine("Failed work: " + work);
-        reportFailure(computation, work, t);
-        // Try again, but go to the end of the queue to avoid a tight loop.
-        sleep(60000);
-        executor.forceExecute(new Runnable() {
-            public void run() {
-              process(computation, work);
-            }
-          });
+        if (reportFailure(computation, work, t)) {
+          // Try again, after some delay and at the end of the queue to avoid a tight loop.
+          sleep(60000);
+          executor.forceExecute(new Runnable() {
+              public void run() {
+                process(computation, work);
+              }
+            });
+        } else {
+          // If we failed to report the error, the item is invalid and should
+          // not be retried internally.  It will be retried at the higher level.
+          LOG.fine("Aborting processing for work token: " + work.getWorkToken());
+        }
       }
       return;
     }
@@ -529,13 +534,15 @@ private Windmill.Exception buildExceptionReport(Throwable t) {
     return builder.build();
   }
 
-  private void reportFailure(String computation, Windmill.WorkItem work, Throwable t) {
-    windmillServer.reportStats(Windmill.ReportStatsRequest.newBuilder()
-        .setComputationId(computation)
-        .setKey(work.getKey())
-        .setWorkToken(work.getWorkToken())
-        .addExceptions(buildExceptionReport(t))
-        .build());
+  private boolean reportFailure(String computation, Windmill.WorkItem work, Throwable t) {
+    Windmill.ReportStatsResponse response =
+        windmillServer.reportStats(Windmill.ReportStatsRequest.newBuilder()
+            .setComputationId(computation)
+            .setKey(work.getKey())
+            .setWorkToken(work.getWorkToken())
+            .addExceptions(buildExceptionReport(t))
+            .build());
+    return response.getFailed();
   }
 
   private static class WorkerAndContext {

From 946bdd094770409ca0a5cd4d50571a4e37a05e10 Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Fri, 6 Feb 2015 15:45:47 -0800
Subject: [PATCH 0136/1541] Update README.md

---
 sdk/src/main/proto/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/proto/README.md b/sdk/src/main/proto/README.md
index ea299389a63b5..fa4e925c982c6 100644
--- a/sdk/src/main/proto/README.md
+++ b/sdk/src/main/proto/README.md
@@ -16,8 +16,8 @@ the binary version of the generated code from these Protocol Buffers:
 Please follow this process for testing changes:
 
 * Make changes to the Protocol Buffer messages in this directory.
-* Use `protoc` to create a new Java library of the compiled generated code.
-* Install that Java binary into your local Maven repository.
+* Use `protoc` to generate the new code, and compile it into a new Java library.
+* Install that Java library into your local Maven repository.
 * Update SDK's `pom.xml` to pick up the newly installed library, instead of
 downloading it from Maven Central.
 

From cc96bbafed2cba8c4eb7e30097a5c8ee8d25d29f Mon Sep 17 00:00:00 2001
From: andersjohnson <andersjohnson@google.com>
Date: Fri, 6 Feb 2015 17:09:39 -0800
Subject: [PATCH 0137/1541] Set LC_ALL=C for sorting to make it consistent
 across platforms. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=85763192

---
 test_wordcount.sh | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/test_wordcount.sh b/test_wordcount.sh
index 45f5a6bd93625..f852d2f6f77b5 100755
--- a/test_wordcount.sh
+++ b/test_wordcount.sh
@@ -34,8 +34,8 @@ function check_result_hash {
   local outfile_prefix=$2
   local expected=$3
 
-  local actual=$(sort $outfile_prefix-* | md5sum | awk '{print $1}' || \
-    sort $outfile_prefix-* | md5 -q) || exit 2  # OSX
+  local actual=$(LC_ALL=C sort $outfile_prefix-* | md5sum | awk '{print $1}' \
+    || LC_ALL=C sort $outfile_prefix-* | md5 -q) || exit 2  # OSX
   if [[ "$actual" != "$expected" ]]
   then
     echo "FAIL $name: Output hash mismatch.  Got $actual, expected $expected."
@@ -109,12 +109,12 @@ echo "Generating bundled JAR file" >&2
 mvn bundle:bundle -f $TOPDIR/pom.xml -pl examples
 check_for_jar_file
 
-run_all_ways wordcount1 "LICENSE" 9e164209cfe94043e3d64e6cc1c11a0c
-run_all_ways wordcount2 "./LICENSE" 9e164209cfe94043e3d64e6cc1c11a0c
-run_all_ways wordcount3 "$PWD/LICENSE" 9e164209cfe94043e3d64e6cc1c11a0c
-run_all_ways wordcount4 "L*N?E*" 9e164209cfe94043e3d64e6cc1c11a0c
-run_all_ways wordcount5 "./LICE*N?E" 9e164209cfe94043e3d64e6cc1c11a0c
-run_all_ways wordcount6 "$PWD/*LIC?NSE" 9e164209cfe94043e3d64e6cc1c11a0c
+run_all_ways wordcount1 "LICENSE" c5350a5ad4bb51e3e018612b4b044097
+run_all_ways wordcount2 "./LICENSE" c5350a5ad4bb51e3e018612b4b044097
+run_all_ways wordcount3 "$PWD/LICENSE" c5350a5ad4bb51e3e018612b4b044097
+run_all_ways wordcount4 "L*N?E*" c5350a5ad4bb51e3e018612b4b044097
+run_all_ways wordcount5 "./LICE*N?E" c5350a5ad4bb51e3e018612b4b044097
+run_all_ways wordcount6 "$PWD/*LIC?NSE" c5350a5ad4bb51e3e018612b4b044097
 
 if [[ ! "$PASS" ]]
 then

From b6c742e5b537eeca3646bde02a73db78d35ef495 Mon Sep 17 00:00:00 2001
From: relax <relax@google.com>
Date: Sat, 7 Feb 2015 18:40:04 -0800
Subject: [PATCH 0138/1541] Rollback:

Fix infinite retries of work on lease expiration in streaming mode.
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85810964
---
 .../worker/StreamingDataflowWorker.java       | 37 ++++++++-----------
 1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 07d58b89975be..9234d31c7e7fa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -381,19 +381,14 @@ private void process(
         t.printStackTrace();
         lastException.set(t);
         LOG.fine("Failed work: " + work);
-        if (reportFailure(computation, work, t)) {
-          // Try again, after some delay and at the end of the queue to avoid a tight loop.
-          sleep(60000);
-          executor.forceExecute(new Runnable() {
-              public void run() {
-                process(computation, work);
-              }
-            });
-        } else {
-          // If we failed to report the error, the item is invalid and should
-          // not be retried internally.  It will be retried at the higher level.
-          LOG.fine("Aborting processing for work token: " + work.getWorkToken());
-        }
+        reportFailure(computation, work, t);
+        // Try again, but go to the end of the queue to avoid a tight loop.
+        sleep(60000);
+        executor.forceExecute(new Runnable() {
+            public void run() {
+              process(computation, work);
+            }
+          });
       }
       return;
     }
@@ -534,15 +529,13 @@ private Windmill.Exception buildExceptionReport(Throwable t) {
     return builder.build();
   }
 
-  private boolean reportFailure(String computation, Windmill.WorkItem work, Throwable t) {
-    Windmill.ReportStatsResponse response =
-        windmillServer.reportStats(Windmill.ReportStatsRequest.newBuilder()
-            .setComputationId(computation)
-            .setKey(work.getKey())
-            .setWorkToken(work.getWorkToken())
-            .addExceptions(buildExceptionReport(t))
-            .build());
-    return response.getFailed();
+  private void reportFailure(String computation, Windmill.WorkItem work, Throwable t) {
+    windmillServer.reportStats(Windmill.ReportStatsRequest.newBuilder()
+        .setComputationId(computation)
+        .setKey(work.getKey())
+        .setWorkToken(work.getWorkToken())
+        .addExceptions(buildExceptionReport(t))
+        .build());
   }
 
   private static class WorkerAndContext {

From 911a0859be6cb359ebe181f565468b1c8b209834 Mon Sep 17 00:00:00 2001
From: malo <malo@google.com>
Date: Sat, 7 Feb 2015 20:53:24 -0800
Subject: [PATCH 0139/1541] Add messages about reasons why stop positions are
 not accepted for FileBasedReader and InMemoryReader. Stop proposeStopPosition
 from looking into the iterator if it is in Finish state. [] -------------
 Created by MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=85815080

---
 .../sdk/runners/worker/FileBasedReader.java   |  8 +++-
 .../sdk/runners/worker/InMemoryReader.java    |  6 ++-
 .../sdk/util/common/worker/Operation.java     | 38 +++++++++++++------
 .../sdk/util/common/worker/ReadOperation.java | 16 +++++---
 .../util/common/worker/ReadOperationTest.java |  6 ++-
 5 files changed, 54 insertions(+), 20 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
index c0c16d88f26bf..53ebbd52ad82f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
@@ -207,22 +207,26 @@ public Position updateStopPosition(Progress proposedStopPosition) {
 
       Long byteOffset = stopPosition.getPosition().getByteOffset();
       if (byteOffset == null) {
-        LOG.warn("A stop position other than byte offset is not supported in a "
-            + "file-based Source.");
+        LOG.warn("A proposed stop position must be a byte offset for a file-based Source.");
         return null;
       }
       if (byteOffset <= offset) {
         // Proposed stop position is not after the current position:
         // No stop position update.
+        LOG.warn("The proposed stop position " + byteOffset
+            + " is past the current position " + offset);
         return null;
       }
 
       if (endOffset != null && byteOffset >= endOffset) {
         // Proposed stop position is after the current stop (end) position: No
         // stop position update.
+        LOG.warn("The proposed stop position " + byteOffset
+            + " is after the current stop position " + endOffset);
         return null;
       }
 
+      LOG.info("Updated the stop position to offset " + byteOffset);
       this.endOffset = byteOffset;
       return cloudPositionToReaderPosition(stopPosition.getPosition());
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
index 487daa1affe45..72107b979d841 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
@@ -140,16 +140,20 @@ public Position updateStopPosition(Progress proposedStopPosition) {
 
       Long recordIndex = stopPosition.getRecordIndex();
       if (recordIndex == null) {
-        LOG.warn("A stop position other than record index is not supported in InMemoryReader.");
+        LOG.warn("A proposed stop position must be a record index for InMemoryReader.");
         return null;
       }
       if (recordIndex <= index || recordIndex >= endPosition) {
         // Proposed stop position is not after the current position or proposed
         // stop position is after the current stop (end) position: No stop
         // position update.
+        LOG.warn("The proposed stop position " + recordIndex
+            + " is not between the current stop position " + index
+            + " and the current stop position " + endPosition);
         return null;
       }
 
+      LOG.info("Updated the stop position to record " + recordIndex);
       this.endPosition = recordIndex.intValue();
       return cloudPositionToReaderPosition(stopPosition);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
index 207d034d4f3d6..31312b7238fc6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
@@ -21,14 +21,14 @@
 /**
  * The abstract base class for Operations, which correspond to
  * Instructions in the original MapTask InstructionGraph.
- *
+ * <p>
  * Call start() to start the operation.
- *
+ * <p>
  * A read operation's start() method actually reads the data, and in
  * effect runs the pipeline.
- *
+ * <p>
  * Call finish() to finish the operation.
- *
+ * <p>
  * Since both start() and finish() may call process() on
  * this operation's consumers, start an operation after
  * starting its consumers, and finish an operation before
@@ -61,6 +61,11 @@ public enum InitializationState {
   public InitializationState initializationState =
       InitializationState.UNSTARTED;
 
+  /** The lock protecting the initialization state. InitializationState is only
+   * written from one thread, but can be read by concurrent threads.
+   */
+  protected final Object initializationStateLock = new Object();
+
   protected final StateSampler stateSampler;
 
   protected final int startState;
@@ -80,7 +85,7 @@ public Operation(String operationName,
   }
 
   /**
-   * Checks that this oepration is not yet started, throwing an
+   * Checks that this operation is not yet started, throwing an
    * exception otherwise.
    */
   void checkUnstarted() {
@@ -93,7 +98,7 @@ && supportsRestart()))) {
   }
 
   /**
-   * Checks that this oepration has been started but not yet finished,
+   * Checks that this operation has been started but not yet finished,
    * throwing an exception otherwise.
    */
   void checkStarted() {
@@ -104,7 +109,7 @@ void checkStarted() {
   }
 
   /**
-   * Checks that this oepration has been finished, throwing an
+   * Checks that this operation has been finished, throwing an
    * exception otherwise.
    */
   void checkFinished() {
@@ -114,13 +119,22 @@ void checkFinished() {
     }
   }
 
+  /**
+   * Returns true if this Operation has been finished.
+   */
+  boolean isFinished() {
+    return (initializationState == InitializationState.FINISHED);
+  }
+
   /**
    * Starts this Operation's execution.  Called after all successsor
    * consuming operations have been started.
    */
   public void start() throws Exception {
-    checkUnstarted();
-    initializationState = InitializationState.STARTED;
+    synchronized (initializationStateLock) {
+      checkUnstarted();
+      initializationState = InitializationState.STARTED;
+    }
   }
 
   /**
@@ -128,8 +142,10 @@ public void start() throws Exception {
    * predecessor producing operations have been finished.
    */
   public void finish() throws Exception {
-    checkStarted();
-    initializationState = InitializationState.FINISHED;
+    synchronized (initializationStateLock) {
+      checkStarted();
+      initializationState = InitializationState.FINISHED;
+    }
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index b727961692ba6..73b289ce9a227 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -33,7 +33,7 @@
 
 /**
  * A read operation.
- *
+ * <p>
  * Its start() method iterates through all elements of the source
  * and emits them on its output.
  */
@@ -227,12 +227,18 @@ public Reader.Progress getProgress() {
    * {@code null} if the source iterator has not been initialized
    */
   public Reader.Position proposeStopPosition(Reader.Progress proposedStopPosition) {
-    synchronized (sourceIteratorLock) {
-      if (readerIterator == null) {
-        LOG.warn("Iterator has not been initialized, returning null stop position.");
+    synchronized (initializationStateLock) {
+      if (isFinished()) {
+        LOG.warn("Iterator is in the Finished state, returning null stop position.");
         return null;
       }
-      return readerIterator.updateStopPosition(proposedStopPosition);
+      synchronized (sourceIteratorLock) {
+        if (readerIterator == null) {
+          LOG.warn("Iterator has not been initialized, returning null stop position.");
+          return null;
+        }
+        return readerIterator.updateStopPosition(proposedStopPosition);
+      }
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
index 922e32f1887af..8d754bd0d4368 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -197,7 +197,6 @@ public void testGetProgressAndProposeStopPosition() throws Exception {
         cloudProgressToReaderProgress(makeApproximateProgress(proposedStopPosition))));
 
     readOperation.start();
-    readOperation.finish();
 
     TestTextReader.TestTextReaderIterator testIterator =
         (TestTextReader.TestTextReaderIterator) readOperation.readerIterator;
@@ -218,6 +217,11 @@ public void testGetProgressAndProposeStopPosition() throws Exception {
         receiver.progresses,
         contains(
             makeApproximateProgress(1L), makeApproximateProgress(2L), makeApproximateProgress(3L)));
+
+    readOperation.finish();
+
+    Assert.assertNull(readOperation.proposeStopPosition(
+        cloudProgressToReaderProgress(makeApproximateProgress(proposedStopPosition))));
   }
 
   @Test

From f42c13c8b917f1228f78ec16b63dcc14b7e83bac Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Sun, 8 Feb 2015 12:25:21 -0800
Subject: [PATCH 0140/1541] Generalizes updateStopPosition to requestFork,
 which can potentially return something that isn't a position.

A few minor cleanups along the way.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85842155
---
 .../BasicSerializableSourceFormat.java        |  16 +-
 .../sdk/runners/worker/BigQueryReader.java    |   7 +-
 .../worker/DataflowWorkProgressUpdater.java   |  17 +-
 .../sdk/runners/worker/DataflowWorker.java    |  22 +-
 .../sdk/runners/worker/FileBasedReader.java   |  56 ++-
 .../runners/worker/GroupingShuffleReader.java |  40 +-
 .../sdk/runners/worker/InMemoryReader.java    |  49 ++-
 .../worker/SourceTranslationUtils.java        |  53 ++-
 .../util/common/worker/MapTaskExecutor.java   |   5 +-
 .../sdk/util/common/worker/ReadOperation.java |  12 +-
 .../sdk/util/common/worker/Reader.java        |  75 +++-
 .../sdk/util/common/worker/WorkExecutor.java  |   8 +-
 .../common/worker/WorkProgressUpdater.java    |  25 +-
 .../BasicSerializableSourceFormatTest.java    |  15 +-
 .../DataflowWorkProgressUpdaterTest.java      | 143 ++++---
 .../worker/GroupingShuffleReaderTest.java     | 257 ++++++-------
 .../runners/worker/InMemoryReaderTest.java    |  85 +++--
 .../sdk/runners/worker/ReaderTestUtils.java   |  72 ++++
 .../sdk/runners/worker/TextReaderTest.java    | 308 +++++++--------
 .../common/worker/MapTaskExecutorTest.java    |  35 +-
 .../util/common/worker/ReadOperationTest.java | 351 ++++++++----------
 21 files changed, 832 insertions(+), 819 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index 4f723e66e7315..e2ab2d86aa13b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -45,6 +45,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
 
 import org.slf4j.Logger;
@@ -100,9 +101,8 @@ public OperationResponse performSourceOperation(OperationRequest request) throws
    */
   @SuppressWarnings("unchecked")
   public static <T> com.google.cloud.dataflow.sdk.util.common.worker.Reader create(
-      final PipelineOptions options, CloudObject spec,
-      final Coder<WindowedValue<T>> coder, final ExecutionContext executionContext)
-      throws Exception {
+      final PipelineOptions options, CloudObject spec, final Coder<WindowedValue<T>> coder,
+      final ExecutionContext executionContext) throws Exception {
     final Source<T> source = (Source<T>) deserializeFromCloudSource(spec);
     return new com.google.cloud.dataflow.sdk.util.common.worker.Reader() {
       @Override
@@ -151,13 +151,12 @@ private SourceGetMetadataResponse performGetMetadata(SourceGetMetadataRequest re
     return response;
   }
 
-  private static Source<?> deserializeFromCloudSource(Map<String, Object> spec)
-      throws Exception {
+  private static Source<?> deserializeFromCloudSource(Map<String, Object> spec) throws Exception {
     return (Source<?>) deserializeFromByteArray(
         Base64.decodeBase64(getString(spec, SERIALIZED_SOURCE)), "Source");
   }
 
-  private static com.google.api.services.dataflow.model.Source serializeToCloudSource(
+  static com.google.api.services.dataflow.model.Source serializeToCloudSource(
       Source source, PipelineOptions options) throws Exception {
     com.google.api.services.dataflow.model.Source cloudSource =
         new com.google.api.services.dataflow.model.Source();
@@ -276,13 +275,12 @@ public void close() throws IOException {
     }
 
     @Override
-    public com.google.cloud.dataflow.sdk.util.common.worker.Reader.Progress getProgress() {
+    public Reader.Progress getProgress() {
       return null;
     }
 
     @Override
-    public com.google.cloud.dataflow.sdk.util.common.worker.Reader.Position updateStopPosition(
-        com.google.cloud.dataflow.sdk.util.common.worker.Reader.Progress proposedStopPosition) {
+    public Reader.ForkResult requestFork(Reader.ForkRequest request) {
       return null;
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
index 3d442f3ffc768..1a6a2937d8f9d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
@@ -16,8 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import static com.google.api.client.util.Preconditions.checkNotNull;
-
 import com.google.api.services.bigquery.Bigquery;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
@@ -97,11 +95,10 @@ public Progress getProgress() {
     }
 
     @Override
-    public Position updateStopPosition(Progress proposedStopPosition) {
-      // For now updating the stop position is not supported because this source
+    public ForkResult requestFork(ForkRequest forkRequest) {
+      // For now fork is not supported because this source
       // is used only when an entire table needs to be read by each worker (used
       // as a side input for instance).
-      checkNotNull(proposedStopPosition);
       throw new UnsupportedOperationException();
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
index 250eb431ad4a3..66c1fd1a2c862 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
@@ -18,7 +18,7 @@
 
 import static com.google.cloud.dataflow.sdk.runners.worker.DataflowWorker.buildStatus;
 import static com.google.cloud.dataflow.sdk.runners.worker.DataflowWorker.uniqueId;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toForkRequest;
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudDuration;
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudTime;
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudDuration;
@@ -75,30 +75,23 @@ protected long getWorkUnitLeaseExpirationTimestamp() {
   @Override
   protected void reportProgressHelper() throws Exception {
     WorkItemStatus status = buildStatus(workItem, false/*completed*/, worker.getOutputCounters(),
-        worker.getOutputMetrics(), options, worker.getWorkerProgress(), stopPositionToService,
+        worker.getOutputMetrics(), options, worker.getWorkerProgress(), forkResultToReport,
         null/*sourceOperationResponse*/, null/*errors*/);
     status.setRequestedLeaseDuration(toCloudDuration(Duration.millis(requestedLeaseDurationMs)));
 
     WorkItemServiceState result = workUnitClient.reportWorkItemStatus(status);
     if (result != null) {
       // Resets state after a successful progress report.
-      stopPositionToService = null;
+      forkResultToReport = null;
 
       progressReportIntervalMs = nextProgressReportInterval(
           fromCloudDuration(workItem.getReportStatusInterval()).getMillis(),
           leaseRemainingTime(getLeaseExpirationTimestamp(result)));
 
       ApproximateProgress suggestedStopPoint = result.getSuggestedStopPoint();
-      if (suggestedStopPoint == null && result.getSuggestedStopPosition() != null) {
-        suggestedStopPoint =
-            new ApproximateProgress().setPosition(result.getSuggestedStopPosition());
-      }
-
       if (suggestedStopPoint != null) {
-        LOG.info("Proposing stop progress on work unit {} at proposed stopping point {}",
-            workString(), suggestedStopPoint);
-        stopPositionToService =
-            worker.proposeStopPosition(cloudProgressToReaderProgress(suggestedStopPoint));
+        LOG.info("Proposing fork of work unit {} at {}", workString(), suggestedStopPoint);
+        forkResultToReport = worker.requestFork(toForkRequest(suggestedStopPoint));
       }
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 555e15a0bc403..3a46be9c173cd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -17,9 +17,9 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceOperationResponseToSourceOperationResponse;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceOperationResponseToCloudSourceOperationResponse;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
 
 import com.google.api.services.dataflow.model.MetricUpdate;
 import com.google.api.services.dataflow.model.Status;
@@ -219,8 +219,8 @@ private static String buildCloudStackTrace(Throwable t) {
 
   private void reportStatus(DataflowWorkerHarnessOptions options, String status, WorkItem workItem,
       @Nullable CounterSet counters, @Nullable Collection<Metric<?>> metrics,
-      @Nullable SourceFormat.OperationResponse operationResponse,
-      @Nullable List<Status> errors) throws IOException {
+      @Nullable SourceFormat.OperationResponse operationResponse, @Nullable List<Status> errors)
+      throws IOException {
     LOG.info("{} processing work item {}", status, uniqueId(workItem));
     WorkItemStatus workItemStatus = buildStatus(workItem, true/*completed*/, counters, metrics,
         options, null, null, operationResponse, errors);
@@ -230,9 +230,8 @@ private void reportStatus(DataflowWorkerHarnessOptions options, String status, W
   static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
       @Nullable CounterSet counters, @Nullable Collection<Metric<?>> metrics,
       DataflowWorkerHarnessOptions options, @Nullable Reader.Progress progress,
-      @Nullable Reader.Position stopPosition,
-      @Nullable SourceFormat.OperationResponse operationResponse,
-      @Nullable List<Status> errors) {
+      @Nullable Reader.ForkResult forkResult,
+      @Nullable SourceFormat.OperationResponse operationResponse, @Nullable List<Status> errors) {
     WorkItemStatus status = new WorkItemStatus();
     status.setWorkItemId(Long.toString(workItem.getId()));
     status.setCompleted(completed);
@@ -272,10 +271,13 @@ static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
     }
 
     if (progress != null) {
-      status.setProgress(sourceProgressToCloudProgress(progress));
+      status.setProgress(readerProgressToCloudProgress(progress));
     }
-    if (stopPosition != null) {
-      status.setStopPosition(sourcePositionToCloudPosition(stopPosition));
+    if (forkResult instanceof Reader.ForkResultWithPosition) {
+      Reader.ForkResultWithPosition asPosition = (Reader.ForkResultWithPosition) forkResult;
+      status.setStopPosition(toCloudPosition(asPosition.getAcceptedPosition()));
+    } else if (forkResult != null) {
+      throw new IllegalArgumentException("Unexpected type of fork result: " + forkResult);
     }
 
     if (workItem.getSourceOperationTask() != null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
index 53ebbd52ad82f..218f1f5b5f661 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
@@ -19,7 +19,7 @@
 import static com.google.api.client.util.Preconditions.checkNotNull;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.forkRequestToApproximateProgress;
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -52,8 +52,10 @@
 public abstract class FileBasedReader<T> extends Reader<T> {
   protected static final int BUF_SIZE = 200;
   protected final String filename;
+
   @Nullable
   protected final Long startPosition;
+
   @Nullable
   protected final Long endPosition;
   protected final Coder<T> coder;
@@ -191,44 +193,37 @@ public Progress getProgress() {
     }
 
     @Override
-    public Position updateStopPosition(Progress proposedStopPosition) {
-      checkNotNull(proposedStopPosition);
-
-      // Currently we only support stop position in byte offset of
-      // CloudPosition in a file-based Reader. If stop position in
-      // other types is proposed, the end position in iterator will
-      // not be updated, and return null.
-      com.google.api.services.dataflow.model.ApproximateProgress stopPosition =
-          sourceProgressToCloudProgress(proposedStopPosition);
-      if (stopPosition == null) {
-        LOG.warn("A stop position other than CloudPosition is not supported now.");
+    public ForkResult requestFork(ForkRequest forkRequest) {
+      checkNotNull(forkRequest);
+
+      // Currently, file-based Reader only supports fork at a byte offset.
+      ApproximateProgress forkProgress = forkRequestToApproximateProgress(forkRequest);
+      com.google.api.services.dataflow.model.Position forkPosition = forkProgress.getPosition();
+      if (forkPosition == null) {
+        LOG.warn("FileBasedReader only supports fork at a Position. Requested: {}", forkRequest);
         return null;
       }
-
-      Long byteOffset = stopPosition.getPosition().getByteOffset();
-      if (byteOffset == null) {
-        LOG.warn("A proposed stop position must be a byte offset for a file-based Source.");
+      Long forkOffset = forkPosition.getByteOffset();
+      if (forkOffset == null) {
+        LOG.warn("FileBasedReader only supports fork at byte offset. Requested: {}", forkPosition);
         return null;
       }
-      if (byteOffset <= offset) {
-        // Proposed stop position is not after the current position:
-        // No stop position update.
-        LOG.warn("The proposed stop position " + byteOffset
-            + " is past the current position " + offset);
+      if (forkOffset <= offset) {
+        LOG.info("Already progressed to offset {} which is after the requested fork offset {}",
+            offset, forkOffset);
         return null;
       }
 
-      if (endOffset != null && byteOffset >= endOffset) {
-        // Proposed stop position is after the current stop (end) position: No
-        // stop position update.
-        LOG.warn("The proposed stop position " + byteOffset
-            + " is after the current stop position " + endOffset);
-        return null;
+      if (endOffset != null && forkOffset >= endOffset) {
+        throw new IllegalArgumentException(
+            "Fork requested at an offset beyond the end of the current range: " + forkOffset
+            + " >= " + endOffset);
       }
 
-      LOG.info("Updated the stop position to offset " + byteOffset);
-      this.endOffset = byteOffset;
-      return cloudPositionToReaderPosition(stopPosition.getPosition());
+      this.endOffset = forkOffset;
+      LOG.info("Forked FileBasedReader at offset {}", forkOffset);
+
+      return new ForkResultWithPosition(cloudPositionToReaderPosition(forkPosition));
     }
 
     /**
@@ -262,7 +257,6 @@ private void computeNextElement() throws IOException {
    * Factory interface for creating a decompressing {@link InputStream}.
    */
   public interface DecompressingStreamFactory {
-
     /**
      * Create a decompressing {@link InputStream} from an existing {@link InputStream}.
      *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index 7d0f92313e9fd..0cb1d98408a79 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -19,7 +19,7 @@
 import static com.google.api.client.util.Preconditions.checkNotNull;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.forkRequestToApproximateProgress;
 
 import com.google.api.client.util.Preconditions;
 import com.google.api.services.dataflow.model.ApproximateProgress;
@@ -225,39 +225,41 @@ public Progress getProgress() {
      * {@code KV<K, Reiterable<V>>} to be returned by the {@link GroupingShuffleReaderIterator}.
      */
     @Override
-    public Position updateStopPosition(Progress proposedStopPosition) {
-      checkNotNull(proposedStopPosition);
-      com.google.api.services.dataflow.model.Position stopCloudPosition =
-          sourceProgressToCloudProgress(proposedStopPosition).getPosition();
-      if (stopCloudPosition == null) {
-        LOG.warn("A stop position other than a Position is not supported now.");
+    public ForkResult requestFork(ForkRequest forkRequest) {
+      checkNotNull(forkRequest);
+      ApproximateProgress forkProgress = forkRequestToApproximateProgress(forkRequest);
+      com.google.api.services.dataflow.model.Position forkPosition = forkProgress.getPosition();
+      if (forkPosition == null) {
+        LOG.warn("GroupingShuffleReader only supports fork at a Position. Requested: {}",
+            forkRequest);
         return null;
       }
-
-      if (stopCloudPosition.getShufflePosition() == null) {
-        LOG.warn("A stop position other than shuffle position is not supported in "
-            + "a grouping shuffle source: " + stopCloudPosition.toString());
+      String forkShufflePosition = forkPosition.getShufflePosition();
+      if (forkShufflePosition == null) {
+        LOG.warn("GroupingShuffleReader only supports fork at a shuffle position. Requested: {}",
+            forkPosition);
         return null;
       }
       ByteArrayShufflePosition newStopPosition =
-          ByteArrayShufflePosition.fromBase64(stopCloudPosition.getShufflePosition());
-
+          ByteArrayShufflePosition.fromBase64(forkShufflePosition);
       if (newStopPosition.compareTo(promisedPosition) <= 0) {
-        LOG.warn("Proposed stop position: " + stopCloudPosition.getShufflePosition()
-            + " <= promised position: " + promisedPosition.encodeBase64());
+        LOG.info("Already progressed to promised shuffle position {} "
+            + "which is after the requested fork shuffle position {}",
+            promisedPosition.encodeBase64(), forkShufflePosition);
         return null;
       }
 
       if (this.stopPosition != null && newStopPosition.compareTo(this.stopPosition) >= 0) {
-        LOG.warn("Proposed stop position: " + stopCloudPosition.getShufflePosition()
+        throw new IllegalArgumentException(
+            "Fork requested at a shuffle position beyond the end of the current range: "
+            + forkShufflePosition
             + " >= current stop position: " + this.stopPosition.encodeBase64());
-        return null;
       }
 
       this.stopPosition = newStopPosition;
-      LOG.info("Updated the stop position to " + stopCloudPosition.getShufflePosition());
+      LOG.info("Forked GroupingShuffleReader at {}", forkShufflePosition);
 
-      return cloudPositionToReaderPosition(stopCloudPosition);
+      return new ForkResultWithPosition(cloudPositionToReaderPosition(forkPosition));
     }
 
     /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
index 72107b979d841..3fa6cb61a7387 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
@@ -19,7 +19,7 @@
 import static com.google.api.client.util.Preconditions.checkNotNull;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.forkRequestToApproximateProgress;
 import static java.lang.Math.min;
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
@@ -124,38 +124,37 @@ public Progress getProgress() {
     }
 
     @Override
-    public Position updateStopPosition(Progress proposedStopPosition) {
-      checkNotNull(proposedStopPosition);
-
-      // Currently we only support stop position in record index of
-      // an API Position in InMemoryReader. If stop position in other types is
-      // proposed, the end position in iterator will not be updated,
-      // and return null.
-      com.google.api.services.dataflow.model.Position stopPosition =
-          sourceProgressToCloudProgress(proposedStopPosition).getPosition();
-      if (stopPosition == null) {
-        LOG.warn("A stop position other than a Dataflow API Position is not currently supported.");
+    public ForkResult requestFork(ForkRequest forkRequest) {
+      checkNotNull(forkRequest);
+
+      com.google.api.services.dataflow.model.Position forkPosition =
+          forkRequestToApproximateProgress(forkRequest).getPosition();
+      if (forkPosition == null) {
+        LOG.warn("InMemoryReader only supports fork at a Position. Requested: {}", forkRequest);
         return null;
       }
 
-      Long recordIndex = stopPosition.getRecordIndex();
-      if (recordIndex == null) {
-        LOG.warn("A proposed stop position must be a record index for InMemoryReader.");
+      Long forkIndex = forkPosition.getRecordIndex();
+      if (forkIndex == null) {
+        LOG.warn("InMemoryReader only supports fork at a record index. Requested: {}",
+            forkPosition);
         return null;
       }
-      if (recordIndex <= index || recordIndex >= endPosition) {
-        // Proposed stop position is not after the current position or proposed
-        // stop position is after the current stop (end) position: No stop
-        // position update.
-        LOG.warn("The proposed stop position " + recordIndex
-            + " is not between the current stop position " + index
-            + " and the current stop position " + endPosition);
+      if (forkIndex <= index) {
+        LOG.info("Already progressed to index {} which is after the requested fork index {}",
+            index, forkIndex);
         return null;
       }
+      if (forkIndex >= endPosition) {
+        throw new IllegalArgumentException(
+            "Fork requested at an index beyond the end of the current range: " + forkIndex
+            + " >= " + endPosition);
+      }
+
+      this.endPosition = forkIndex.intValue();
+      LOG.info("Forked InMemoryReader at index {}", forkIndex);
 
-      LOG.info("Updated the stop position to record " + recordIndex);
-      this.endPosition = recordIndex.intValue();
-      return cloudPositionToReaderPosition(stopPosition);
+      return new ForkResultWithPosition(cloudPositionToReaderPosition(forkPosition));
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
index 07229941a82af..6386d038fbf79 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
@@ -50,9 +50,8 @@ public static Reader.Position cloudPositionToReaderPosition(@Nullable Position c
     return cloudPosition == null ? null : new DataflowReaderPosition(cloudPosition);
   }
 
-  public static SourceFormat.OperationRequest
-      cloudSourceOperationRequestToSourceOperationRequest(
-          @Nullable SourceOperationRequest request) {
+  public static SourceFormat.OperationRequest cloudSourceOperationRequestToSourceOperationRequest(
+      @Nullable SourceOperationRequest request) {
     return request == null ? null : new DataflowSourceOperationRequest(request);
   }
 
@@ -62,18 +61,13 @@ public static Reader.Position cloudPositionToReaderPosition(@Nullable Position c
     return response == null ? null : new DataflowSourceOperationResponse(response);
   }
 
-  public static SourceFormat.SourceSpec cloudSourceToSourceSpec(
-      @Nullable Source cloudSource) {
-    return cloudSource == null ? null : new DataflowSourceSpec(cloudSource);
+  public static ApproximateProgress readerProgressToCloudProgress(
+      @Nullable Reader.Progress readerProgress) {
+    return readerProgress == null ? null : ((DataflowReaderProgress) readerProgress).cloudProgress;
   }
 
-  public static ApproximateProgress sourceProgressToCloudProgress(
-      @Nullable Reader.Progress sourceProgress) {
-    return sourceProgress == null ? null : ((DataflowReaderProgress) sourceProgress).cloudProgress;
-  }
-
-  public static Position sourcePositionToCloudPosition(@Nullable Reader.Position sourcePosition) {
-    return sourcePosition == null ? null : ((DataflowReaderPosition) sourcePosition).cloudPosition;
+  public static Position toCloudPosition(@Nullable Reader.Position readerPosition) {
+    return readerPosition == null ? null : ((DataflowReaderPosition) readerPosition).cloudPosition;
   }
 
 
@@ -91,8 +85,19 @@ public static Source sourceSpecToCloudSource(@Nullable SourceFormat.SourceSpec s
     return (spec == null) ? null : ((DataflowSourceSpec) spec).cloudSource;
   }
 
+  public static ApproximateProgress forkRequestToApproximateProgress(
+      @Nullable Reader.ForkRequest stopRequest) {
+    return (stopRequest == null) ? null : ((DataflowForkRequest) stopRequest).approximateProgress;
+  }
+
+  public static Reader.ForkRequest toForkRequest(
+      @Nullable ApproximateProgress approximateProgress) {
+    return (approximateProgress == null) ? null : new DataflowForkRequest(approximateProgress);
+  }
+
   static class DataflowReaderProgress implements Reader.Progress {
     public final ApproximateProgress cloudProgress;
+
     public DataflowReaderProgress(ApproximateProgress cloudProgress) {
       this.cloudProgress = cloudProgress;
     }
@@ -100,6 +105,7 @@ public DataflowReaderProgress(ApproximateProgress cloudProgress) {
 
   static class DataflowReaderPosition implements Reader.Position {
     public final Position cloudPosition;
+
     public DataflowReaderPosition(Position cloudPosition) {
       this.cloudPosition = cloudPosition;
     }
@@ -107,6 +113,7 @@ public DataflowReaderPosition(Position cloudPosition) {
 
   static class DataflowSourceOperationRequest implements SourceFormat.OperationRequest {
     public final SourceOperationRequest cloudRequest;
+
     public DataflowSourceOperationRequest(SourceOperationRequest cloudRequest) {
       this.cloudRequest = cloudRequest;
     }
@@ -114,6 +121,7 @@ public DataflowSourceOperationRequest(SourceOperationRequest cloudRequest) {
 
   static class DataflowSourceOperationResponse implements SourceFormat.OperationResponse {
     public final SourceOperationResponse cloudResponse;
+
     public DataflowSourceOperationResponse(SourceOperationResponse cloudResponse) {
       this.cloudResponse = cloudResponse;
     }
@@ -121,6 +129,7 @@ public DataflowSourceOperationResponse(SourceOperationResponse cloudResponse) {
 
   static class DataflowSourceSpec implements SourceFormat.SourceSpec {
     public final Source cloudSource;
+
     public DataflowSourceSpec(Source cloudSource) {
       this.cloudSource = cloudSource;
     }
@@ -138,8 +147,8 @@ public static Map<String, Object> cloudSourceToDictionary(Source source) {
           cloudSourceMetadataToDictionary(source.getMetadata()));
     }
     if (source.getDoesNotNeedSplitting() != null) {
-      addBoolean(res, PropertyNames.SOURCE_DOES_NOT_NEED_SPLITTING,
-          source.getDoesNotNeedSplitting());
+      addBoolean(
+          res, PropertyNames.SOURCE_DOES_NOT_NEED_SPLITTING, source.getDoesNotNeedSplitting());
     }
     return res;
   }
@@ -147,12 +156,10 @@ public static Map<String, Object> cloudSourceToDictionary(Source source) {
   private static Map<String, Object> cloudSourceMetadataToDictionary(SourceMetadata metadata) {
     Map<String, Object> res = new HashMap<>();
     if (metadata.getProducesSortedKeys() != null) {
-      addBoolean(
-          res, PropertyNames.SOURCE_PRODUCES_SORTED_KEYS, metadata.getProducesSortedKeys());
+      addBoolean(res, PropertyNames.SOURCE_PRODUCES_SORTED_KEYS, metadata.getProducesSortedKeys());
     }
     if (metadata.getEstimatedSizeBytes() != null) {
-      addLong(
-          res, PropertyNames.SOURCE_ESTIMATED_SIZE_BYTES, metadata.getEstimatedSizeBytes());
+      addLong(res, PropertyNames.SOURCE_ESTIMATED_SIZE_BYTES, metadata.getEstimatedSizeBytes());
     }
     if (metadata.getInfinite() != null) {
       addBoolean(res, PropertyNames.SOURCE_IS_INFINITE, metadata.getInfinite());
@@ -167,4 +174,12 @@ public static Source dictionaryToCloudSource(Map<String, Object> params) throws
     // translated, because they only make sense in cloud Source objects produced by the user.
     return res;
   }
+
+  private static class DataflowForkRequest implements Reader.ForkRequest {
+    public final ApproximateProgress approximateProgress;
+
+    private DataflowForkRequest(ApproximateProgress approximateProgress) {
+      this.approximateProgress = approximateProgress;
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
index 793343d6d1bb4..717c44aeec422 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
@@ -85,9 +85,8 @@ public Reader.Progress getWorkerProgress() throws Exception {
   }
 
   @Override
-  public Reader.Position proposeStopPosition(Reader.Progress proposedStopPosition)
-      throws Exception {
-    return getReadOperation().proposeStopPosition(proposedStopPosition);
+  public Reader.ForkResult requestFork(Reader.ForkRequest forkRequest) throws Exception {
+    return getReadOperation().requestFork(forkRequest);
   }
 
   ReadOperation getReadOperation() throws Exception {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index 73b289ce9a227..d7f3f479f0ee3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -220,13 +220,9 @@ public Reader.Progress getProgress() {
   }
 
   /**
-   * Relays the request to update the stop position to {@code ReaderIterator}.
-   *
-   * @param proposedStopPosition the proposed stop position
-   * @return the new stop position updated in {@code ReaderIterator}, or
-   * {@code null} if the source iterator has not been initialized
+   * Relays the fork request to {@code ReaderIterator}.
    */
-  public Reader.Position proposeStopPosition(Reader.Progress proposedStopPosition) {
+  public Reader.ForkResult requestFork(Reader.ForkRequest forkRequest) {
     synchronized (initializationStateLock) {
       if (isFinished()) {
         LOG.warn("Iterator is in the Finished state, returning null stop position.");
@@ -234,10 +230,10 @@ public Reader.Position proposeStopPosition(Reader.Progress proposedStopPosition)
       }
       synchronized (sourceIteratorLock) {
         if (readerIterator == null) {
-          LOG.warn("Iterator has not been initialized, returning null stop position.");
+          LOG.warn("Iterator has not been initialized, refusing to fork at {}", forkRequest);
           return null;
         }
-        return readerIterator.updateStopPosition(proposedStopPosition);
+        return readerIterator.requestFork(forkRequest);
       }
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
index 7c9b783511c01..93a398907dd1e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
@@ -77,25 +77,38 @@ public interface ReaderIterator<T> extends AutoCloseable {
     public Progress getProgress();
 
     /**
-     * Attempts to update the stop position of the task with the proposed stop
-     * position and returns the actual new stop position.
-     *
-     * <p> If the source finds the proposed one is not a convenient position to
-     * stop, it can pick a different stop position. The {@code ReaderIterator}
-     * should start returning {@code false} from {@code hasNext()} once it has
-     * passed its stop position. Subsequent stop position updates must be in
-     * non-increasing order within a task.
-     *
-     * <p> This method is not required to be thread-safe, and it will not be
+     * Attempts to split the input in two parts: the "primary" part and the "residual" part.
+     * The current {@link ReaderIterator} keeps processing the primary part, while the residual part
+     * will be processed elsewhere (e.g. perhaps on a different worker).
+     * <p>
+     * The primary and residual parts, if concatenated, must represent the same input as the
+     * current input of this {@link ReaderIterator} before this call.
+     * <p>
+     * The boundary between the primary part and the residual part is specified in
+     * a framework-specific way using {@link ForkRequest}: e.g., if the framework supports the
+     * notion of positions, it might be a position at which the input is asked to split itself
+     * (which is not necessarily the same position at which it <i>will</i> split itself); it might
+     * be an approximate fraction of input, or something else.
+     * <p>
+     * {@link ForkResult} encodes, in a framework-specific way, the information sufficient to
+     * construct a description of the resulting primary and residual inputs. For example, it might,
+     * again, be a position demarcating these parts, or it might be a pair of fully-specified input
+     * descriptions, or something else.
+     * <p>
+     * After a successful call to {@link #requestFork}, subsequent calls should be interpreted
+     * relative to the new primary.
+     * <p>
+     * This method is not required to be thread-safe, and it will not be
      * called concurrently to any other methods.
+     * <p>
+     * This call should not affect the range of input represented by the {@link Reader} which
+     * produced this {@link ReaderIterator}.
      *
-     * @param proposedStopPosition a proposed position to stop
-     * iterating through the source
-     * @return the new stop position, or {@code null} on failure if the
-     * implementation does not support position updates(implementors are discouraged
-     * from throwing {@code UnsupportedOperationException} in this case).
+     * @return {@code null} if the {@link ForkRequest} cannot be honored (in that case the input
+     *   represented by this {@link ReaderIterator} stays the same), or a {@link ForkResult}
+     *   describing how the input was split into a primary and residual part.
      */
-    public Position updateStopPosition(Progress proposedStopPosition);
+    public ForkResult requestFork(ForkRequest request);
   }
 
   /** An abstract base class for ReaderIterator implementations. */
@@ -116,7 +129,7 @@ public Progress getProgress() {
     }
 
     @Override
-    public Position updateStopPosition(Progress proposedStopPosition) {
+    public ForkResult requestFork(ForkRequest forkRequest) {
       return null;
     }
   }
@@ -141,6 +154,34 @@ public interface Progress {}
    */
   public interface Position {}
 
+  /**
+   * A framework-specific way to specify how {@link ReaderIterator#requestFork} should split
+   * the input into a primary and residual part.
+   */
+  public interface ForkRequest {}
+
+  /**
+   * A framework-specific way to specify how {@link ReaderIterator#requestFork} has split
+   * the input into a primary and residual part.
+   */
+  public interface ForkResult {}
+
+  /**
+   * A {@link ForkResult} which specifies the boundary between the primary and residual parts
+   * of the input using a {@link Position}.
+   */
+  public static final class ForkResultWithPosition implements ForkResult {
+    private final Position acceptedPosition;
+
+    public ForkResultWithPosition(Position acceptedPosition) {
+      this.acceptedPosition = acceptedPosition;
+    }
+
+    public Position getAcceptedPosition() {
+      return acceptedPosition;
+    }
+  }
+
   /**
    * Utility method to notify observers about a new element, which has
    * been read by this Reader, and its size in bytes. Normally, there
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
index 18ab4762d171d..117affab8d9a1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
@@ -83,12 +83,10 @@ public Reader.Progress getWorkerProgress() throws Exception {
   }
 
   /**
-   * Proposes that the worker changes the stop position for the current work.
-   * Returns the new position if accepted, otherwise {@code null}.
+   * See {@link Reader.ReaderIterator#requestFork}. Makes sense only for tasks which read input.
    */
-  public Reader.Position proposeStopPosition(Reader.Progress proposedStopPosition)
-      throws Exception {
-    // By default, returns null indicating that no task splitting happens.
+  public Reader.ForkResult requestFork(Reader.ForkRequest forkRequest) throws Exception {
+    // By default, fork is unsupported.
     return null;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
index 4794fe3efb2ec..f212d42e469f1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
@@ -78,16 +78,11 @@ public abstract class WorkProgressUpdater {
   protected long progressReportIntervalMs;
 
   /**
-   * The stop position to report to the service in the next progress update,
-   * or {@code null} if there is nothing to report.
-   * In cases that there is no split request from service, or worker failed to
-   * split in response to the last received split request, the task stop
-   * position implicitly stays the same as it was before that last request
-   * (as a result of a prior split request), and on the next reportProgress
-   * we'll send the {@code null} as a stop position update, which is a no-op
-   * for the service.
+   * The {@link Reader.ForkResult} to report to the service in the next progress update,
+   * or {@code null} if there is nothing to report (if no fork happened since the last progress
+   * update).
    */
-  protected Reader.Position stopPositionToService;
+  protected Reader.ForkResult forkResultToReport;
 
   public WorkProgressUpdater(WorkExecutor worker) {
     this.worker = worker;
@@ -124,10 +119,10 @@ public void stopReportingProgress() throws Exception {
       executor.shutdownNow();
     }
 
-    // We send a final progress report in case there was an unreported stop position update.
-    if (stopPositionToService != null) {
-      LOG.info("Sending final progress update with unreported stop position: {} "
-          + "for work item: {}", stopPositionToService, workString());
+    // We send a final progress report in case there was an unreported fork.
+    if (forkResultToReport != null) {
+      LOG.info("Sending final progress update with unreported fork: {} "
+          + "for work item: {}", forkResultToReport, workString());
       reportProgressHelper(); // This call can fail with an exception
     }
 
@@ -215,8 +210,8 @@ protected long leaseRemainingTime(long leaseExpirationTimestamp) {
   }
 
   // Visible for testing.
-  public Reader.Position getStopPosition() {
-    return stopPositionToService;
+  public Reader.ForkResult getForkResultToReport() {
+    return forkResultToReport;
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 91d0003eea20c..34dc51bb3c041 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -102,10 +102,9 @@ public boolean producesSortedKeys(PipelineOptions options) throws Exception {
       }
 
       @Override
-      public Reader<Integer> createBasicReader(
-          PipelineOptions options, Coder<Integer> coder,
+      public Reader<Integer> createBasicReader(PipelineOptions options, Coder<Integer> coder,
           @Nullable ExecutionContext executionContext) throws IOException {
-        return new RangeReader(from, to);
+        return new RangeReader(this);
       }
 
       @Override
@@ -117,12 +116,10 @@ public Coder<Integer> getDefaultOutputCoder() {
       }
 
       private class RangeReader implements Reader<Integer> {
-        private int to;
         private int current;
 
-        public RangeReader(int from, int to) {
-          this.to = to;
-          this.current = from - 1;
+        public RangeReader(Read source) {
+          this.current = source.from - 1;
         }
 
         @Override
@@ -149,8 +146,8 @@ public void testSplitAndReadShardsBack() throws Exception {
     DataflowPipelineOptions options =
         PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
     options.setNumWorkers(5);
-    com.google.api.services.dataflow.model.Source source = translateIOToCloudSource(
-        TestIO.fromRange(10, 20), options);
+    com.google.api.services.dataflow.model.Source source =
+        translateIOToCloudSource(TestIO.fromRange(10, 20), options);
     List<WindowedValue<Integer>> elems = CloudSourceUtils.readElemsFromSource(options, source);
     assertEquals(10, elems.size());
     for (int i = 0; i < 10; ++i) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
index 081e0b8c14ed9..0745e40faef03 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -16,10 +16,13 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.approximateProgressAtIndex;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.approximateProgressAtPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionAtIndex;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.forkRequestToApproximateProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
 import static com.google.cloud.dataflow.sdk.util.CloudCounterUtils.extractCounter;
 import static com.google.cloud.dataflow.sdk.util.CloudMetricUtils.extractCloudMetric;
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudDuration;
@@ -93,13 +96,14 @@ public Reader.Progress getWorkerProgress() {
     }
 
     @Override
-    public Reader.Position proposeStopPosition(Reader.Progress suggestedStopPoint) {
+    public Reader.ForkResult requestFork(Reader.ForkRequest forkRequest) {
       @Nullable
-      ApproximateProgress progress = sourceProgressToCloudProgress(suggestedStopPoint);
+      ApproximateProgress progress = forkRequestToApproximateProgress(forkRequest);
       if (progress == null) {
         return null;
       }
-      return cloudPositionToReaderPosition(progress.getPosition());
+      return new Reader.ForkResultWithPosition(
+          cloudPositionToReaderPosition(progress.getPosition()));
     }
 
     public void setWorkerProgress(ApproximateProgress progress) {
@@ -125,6 +129,7 @@ public void setWorkerProgress(ApproximateProgress progress) {
 
   @Rule
   public final ExpectedException thrown = ExpectedException.none();
+
   @Mock
   private DataflowWorker.WorkUnitClient workUnitClient;
   private CounterSet counters;
@@ -173,60 +178,60 @@ public void workProgressUpdaterUpdates() throws Exception {
         .thenReturn(generateServiceState(nowMillis + 2000, 1000, null));
     setUpCounters(2);
     setUpMetrics(3);
-    setUpProgress(makeRecordIndexProgress(1L));
+    setUpProgress(approximateProgressAtIndex(1L));
     progressUpdater.startReportingProgress();
     // The initial update should be sent after leaseRemainingTime / 2.
     verify(workUnitClient, timeout(600))
-        .reportWorkItemStatus(
-            argThat(new ExpectedDataflowProgress().withCounters(2).withMetrics(3).withProgress(
-                makeRecordIndexProgress(1L))));
+        .reportWorkItemStatus(argThat(
+            new ExpectedDataflowWorkItemStatus().withCounters(2).withMetrics(3).withProgress(
+                approximateProgressAtIndex(1L))));
     progressUpdater.stopReportingProgress();
   }
 
   // Verifies that ReportWorkItemStatusRequest contains correct progress report
-  // and actual stop position report.
+  // and actual fork result.
   @Test(timeout = 5000)
   public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
     // Mock that the next reportProgress call will return a response that asks
     // us to truncate the task at index 3, and the next two will not ask us to
     // truncate at all.
     when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
-        .thenReturn(generateServiceState(nowMillis + 2000, 1000, makeRecordIndexPosition(3L)))
+        .thenReturn(generateServiceState(nowMillis + 2000, 1000, positionAtIndex(3L)))
         .thenReturn(generateServiceState(nowMillis + 3000, 2000, null))
         .thenReturn(generateServiceState(nowMillis + 4000, 3000, null));
 
     setUpCounters(3);
     setUpMetrics(2);
-    setUpProgress(makeRecordIndexProgress(1L));
+    setUpProgress(approximateProgressAtIndex(1L));
     progressUpdater.startReportingProgress();
     // The initial update should be sent after
     // leaseRemainingTime (1000) / 2 = 500.
     verify(workUnitClient, timeout(600))
-        .reportWorkItemStatus(
-            argThat(new ExpectedDataflowProgress().withCounters(3).withMetrics(2).withProgress(
-                makeRecordIndexProgress(1L))));
+        .reportWorkItemStatus(argThat(
+            new ExpectedDataflowWorkItemStatus().withCounters(3).withMetrics(2).withProgress(
+                approximateProgressAtIndex(1L))));
 
     setUpCounters(5);
     setUpMetrics(6);
-    setUpProgress(makeRecordIndexProgress(2L));
+    setUpProgress(approximateProgressAtIndex(2L));
     // The second update should be sent after one second (2000 / 2).
     verify(workUnitClient, timeout(1100))
         .reportWorkItemStatus(argThat(
-            new ExpectedDataflowProgress()
+            new ExpectedDataflowWorkItemStatus()
                 .withCounters(5)
                 .withMetrics(6)
-                .withProgress(makeRecordIndexProgress(2L))
-                .withStopPosition(makeRecordIndexPosition(3L))));
+                .withProgress(approximateProgressAtIndex(2L))
+                .withForkAtPosition(positionAtIndex(3L))));
 
-    // After the request is sent, reset stop position cache to null.
-    assertNull(progressUpdater.getStopPosition());
+    // After the request is sent, reset cached fork result to null.
+    assertNull(progressUpdater.getForkResultToReport());
 
-    setUpProgress(makeRecordIndexProgress(3L));
+    setUpProgress(approximateProgressAtIndex(3L));
 
     // The third update should be sent after one and half seconds (3000 / 2).
     verify(workUnitClient, timeout(1600))
-        .reportWorkItemStatus(
-            argThat(new ExpectedDataflowProgress().withProgress(makeRecordIndexProgress(3L))));
+        .reportWorkItemStatus(argThat(
+            new ExpectedDataflowWorkItemStatus().withProgress(approximateProgressAtIndex(3L))));
 
     progressUpdater.stopReportingProgress();
   }
@@ -235,40 +240,41 @@ public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
   @Test(timeout = 3000)
   public void workProgressUpdaterLastUpdate() throws Exception {
     when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
-        .thenReturn(generateServiceState(nowMillis + 2000, 1000, makeRecordIndexPosition(2L)))
+        .thenReturn(generateServiceState(nowMillis + 2000, 1000, positionAtIndex(2L)))
         .thenReturn(generateServiceState(nowMillis + 3000, 2000, null));
 
-    setUpProgress(makeRecordIndexProgress(1L));
+    setUpProgress(approximateProgressAtIndex(1L));
     progressUpdater.startReportingProgress();
     // The initial update should be sent after leaseRemainingTime / 2 = 500 msec.
     Thread.sleep(600);
     verify(workUnitClient, timeout(200))
-        .reportWorkItemStatus(
-            argThat(new ExpectedDataflowProgress().withProgress(makeRecordIndexProgress(1L))));
+        .reportWorkItemStatus(argThat(
+            new ExpectedDataflowWorkItemStatus().withProgress(approximateProgressAtIndex(1L))));
 
-    // The first update should include the new actual stop position.
+    // The first update should include the new fork result..
     // Verify that the progressUpdater has recorded it.
-    assertEquals(
-        makeRecordIndexPosition(2L),
-        sourcePositionToCloudPosition(progressUpdater.getStopPosition()));
+    Reader.ForkResultWithPosition forkResult =
+        (Reader.ForkResultWithPosition) progressUpdater.getForkResultToReport();
+    assertEquals(positionAtIndex(2L), toCloudPosition(forkResult.getAcceptedPosition()));
 
-    setUpProgress(makeRecordIndexProgress(2L));
+    setUpProgress(approximateProgressAtIndex(2L));
     // The second update should be sent after one second (2000 / 2).
-    Thread.sleep(200); // not enough time for an update so the latest stop position is not
-    // acknowledged.
-    // Check that the progressUpdater still has a pending stop position to send
-    assertEquals(
-        makeRecordIndexPosition(2L),
-        sourcePositionToCloudPosition(progressUpdater.getStopPosition()));
-
-    progressUpdater.stopReportingProgress(); // should send the last update
-    // check that the progressUpdater is done with reporting its latest stop position
-    assertNull(progressUpdater.getStopPosition());
-
-    // Verify that the last update contained the latest stop position
+
+    // Not enough time for an update so the latest fork result is not acknowledged.
+    Thread.sleep(200);
+
+    // Check that the progressUpdater still has a pending fork result to send
+    forkResult = (Reader.ForkResultWithPosition) progressUpdater.getForkResultToReport();
+    assertEquals(positionAtIndex(2L), toCloudPosition(forkResult.getAcceptedPosition()));
+
+    progressUpdater.stopReportingProgress(); // Should send the last update.
+    // Check that the progressUpdater is done with reporting its latest fork result.
+    assertNull(progressUpdater.getForkResultToReport());
+
+    // Verify that the last update contained the latest fork result.
     verify(workUnitClient, timeout(1000))
-        .reportWorkItemStatus(
-            argThat(new ExpectedDataflowProgress().withStopPosition(makeRecordIndexPosition(2L))));
+        .reportWorkItemStatus(argThat(
+            new ExpectedDataflowWorkItemStatus().withForkAtPosition(positionAtIndex(2L))));
   }
 
   private void setUpCounters(int n) {
@@ -309,17 +315,6 @@ private void setUpProgress(ApproximateProgress progress) {
     worker.setWorkerProgress(progress);
   }
 
-  private com.google.api.services.dataflow.model.Position makeRecordIndexPosition(Long index) {
-    com.google.api.services.dataflow.model.Position position =
-        new com.google.api.services.dataflow.model.Position();
-    position.setRecordIndex(index);
-    return position;
-  }
-
-  private ApproximateProgress makeRecordIndexProgress(Long index) {
-    return new ApproximateProgress().setPosition(makeRecordIndexPosition(index));
-  }
-
   private WorkItemServiceState generateServiceState(long leaseExpirationTimestamp,
       int progressReportIntervalMs, Position suggestedStopPosition) throws IOException {
     WorkItemServiceState responseState = new WorkItemServiceState();
@@ -329,39 +324,43 @@ private WorkItemServiceState generateServiceState(long leaseExpirationTimestamp,
         toCloudDuration(Duration.millis(progressReportIntervalMs)));
 
     if (suggestedStopPosition != null) {
-      responseState.setSuggestedStopPosition(suggestedStopPosition);
+      responseState.setSuggestedStopPoint(approximateProgressAtPosition(suggestedStopPosition));
     }
 
     return responseState;
   }
 
-  private static final class ExpectedDataflowProgress extends ArgumentMatcher<WorkItemStatus> {
+  private static final class ExpectedDataflowWorkItemStatus
+      extends ArgumentMatcher<WorkItemStatus> {
     @Nullable
     Integer counterCount;
+
     @Nullable
     Integer metricCount;
+
     @Nullable
     ApproximateProgress expectedProgress;
+
     @Nullable
-    Position expectedStopPosition;
+    Position expectedForkPosition;
 
-    public ExpectedDataflowProgress withCounters(Integer counterCount) {
+    public ExpectedDataflowWorkItemStatus withCounters(Integer counterCount) {
       this.counterCount = counterCount;
       return this;
     }
 
-    public ExpectedDataflowProgress withMetrics(Integer metricCount) {
+    public ExpectedDataflowWorkItemStatus withMetrics(Integer metricCount) {
       this.metricCount = metricCount;
       return this;
     }
 
-    public ExpectedDataflowProgress withProgress(ApproximateProgress expectedProgress) {
+    public ExpectedDataflowWorkItemStatus withProgress(ApproximateProgress expectedProgress) {
       this.expectedProgress = expectedProgress;
       return this;
     }
 
-    public ExpectedDataflowProgress withStopPosition(Position expectedStopPosition) {
-      this.expectedStopPosition = expectedStopPosition;
+    public ExpectedDataflowWorkItemStatus withForkAtPosition(Position expectedForkPosition) {
+      this.expectedForkPosition = expectedForkPosition;
       return this;
     }
 
@@ -381,12 +380,12 @@ public void describeTo(Description description) {
       if (this.expectedProgress != null) {
         values.add("progress " + this.expectedProgress);
       }
-      if (this.expectedStopPosition != null) {
-        values.add("stop position " + this.expectedStopPosition);
+      if (this.expectedForkPosition != null) {
+        values.add("fork position " + this.expectedForkPosition);
       } else {
-        values.add("no stop position present");
+        values.add("no fork position present");
       }
-      description.appendValueList("Dataflow progress with ", ", ", ".", values);
+      description.appendValueList("Dataflow WorkItemStatus with ", ", ", ".", values);
     }
 
     @Override
@@ -431,10 +430,10 @@ private boolean matchProgress(WorkItemStatus status) {
 
     private boolean matchStopPosition(WorkItemStatus status) {
       Position actualStopPosition = status.getStopPosition();
-      if (expectedStopPosition == null) {
+      if (expectedForkPosition == null) {
         return actualStopPosition == null;
       }
-      return expectedStopPosition.equals(actualStopPosition);
+      return expectedForkPosition.equals(actualStopPosition);
     }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
index ff3abd3dbe426..c023123c4a6d6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
@@ -17,9 +17,18 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static com.google.api.client.util.Base64.encodeBase64URLSafeString;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.forkRequestAtPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromForkResult;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toForkRequest;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.api.services.dataflow.model.Position;
@@ -41,8 +50,8 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.collect.Lists;
 
+import org.hamcrest.Matchers;
 import org.joda.time.Instant;
-import org.junit.Assert;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -56,6 +65,8 @@
 import java.util.List;
 import java.util.NoSuchElementException;
 
+import javax.annotation.Nullable;
+
 /**
  * Tests for GroupingShuffleReader.
  */
@@ -84,57 +95,42 @@ private enum ValuesToRead {
   }
 
   private void runTestReadFromShuffle(
-      List<KV<Integer, List<String>>> input,
-      ValuesToRead valuesToRead)
-      throws Exception {
-    Coder<WindowedValue<KV<Integer, String>>> sinkElemCoder =
-        WindowedValue.getFullCoder(
-            KvCoder.of(BigEndianIntegerCoder.of(),
-                       StringUtf8Coder.of()),
-            IntervalWindow.getCoder());
+      List<KV<Integer, List<String>>> input, ValuesToRead valuesToRead) throws Exception {
+    Coder<WindowedValue<KV<Integer, String>>> sinkElemCoder = WindowedValue.getFullCoder(
+        KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()), IntervalWindow.getCoder());
 
     Coder<WindowedValue<KV<Integer, Iterable<String>>>> sourceElemCoder =
         WindowedValue.getFullCoder(
-            KvCoder.of(
-                BigEndianIntegerCoder.of(),
-                IterableCoder.of(StringUtf8Coder.of())),
+            KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(StringUtf8Coder.of())),
             IntervalWindow.getCoder());
 
     // Write to shuffle with GROUP_KEYS ShuffleSink.
     ShuffleSink<KV<Integer, String>> shuffleSink = new ShuffleSink<>(
-        PipelineOptionsFactory.create(),
-        null, ShuffleSink.ShuffleKind.GROUP_KEYS,
-        sinkElemCoder);
+        PipelineOptionsFactory.create(), null, ShuffleSink.ShuffleKind.GROUP_KEYS, sinkElemCoder);
 
     TestShuffleWriter shuffleWriter = new TestShuffleWriter();
 
     int kvCount = 0;
     List<Long> actualSizes = new ArrayList<>();
     try (Sink.SinkWriter<WindowedValue<KV<Integer, String>>> shuffleSinkWriter =
-             shuffleSink.writer(shuffleWriter)) {
+        shuffleSink.writer(shuffleWriter)) {
       for (KV<Integer, List<String>> kvs : input) {
         Integer key = kvs.getKey();
         for (String value : kvs.getValue()) {
           ++kvCount;
           actualSizes.add(shuffleSinkWriter.add(
-              WindowedValue.of(KV.of(key, value),
-                               timestamp,
-                               Lists.newArrayList(window))));
+              WindowedValue.of(KV.of(key, value), timestamp, Lists.newArrayList(window))));
         }
       }
     }
     List<ShuffleEntry> records = shuffleWriter.getRecords();
-    Assert.assertEquals(kvCount, records.size());
-    Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
+    assertEquals(kvCount, records.size());
+    assertEquals(shuffleWriter.getSizes(), actualSizes);
 
     // Read from shuffle with GroupingShuffleReader.
     BatchModeExecutionContext context = new BatchModeExecutionContext();
-    GroupingShuffleReader<Integer, String> groupingShuffleReader =
-        new GroupingShuffleReader<>(
-            PipelineOptionsFactory.create(),
-            null, null, null,
-            sourceElemCoder,
-            context);
+    GroupingShuffleReader<Integer, String> groupingShuffleReader = new GroupingShuffleReader<>(
+        PipelineOptionsFactory.create(), null, null, null, sourceElemCoder, context);
     ExecutorTestUtils.TestReaderObserver observer =
         new ExecutorTestUtils.TestReaderObserver(groupingShuffleReader);
 
@@ -147,17 +143,17 @@ private void runTestReadFromShuffle(
 
     List<KV<Integer, List<String>>> actual = new ArrayList<>();
     try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<String>>>> iter =
-             groupingShuffleReader.iterator(shuffleReader)) {
+        groupingShuffleReader.iterator(shuffleReader)) {
       Iterable<String> prevValuesIterable = null;
       Iterator<String> prevValuesIterator = null;
       while (iter.hasNext()) {
-        Assert.assertTrue(iter.hasNext());
-        Assert.assertTrue(iter.hasNext());
+        assertTrue(iter.hasNext());
+        assertTrue(iter.hasNext());
 
         WindowedValue<KV<Integer, Reiterable<String>>> windowedValue = iter.next();
         // Verify value is in an empty windows.
-        Assert.assertEquals(Long.MIN_VALUE, windowedValue.getTimestamp().getMillis());
-        Assert.assertEquals(0, windowedValue.getWindows().size());
+        assertEquals(Long.MIN_VALUE, windowedValue.getTimestamp().getMillis());
+        assertEquals(0, windowedValue.getWindows().size());
 
         KV<Integer, Reiterable<String>> elem = windowedValue.getValue();
         Integer key = elem.getKey();
@@ -175,21 +171,21 @@ private void runTestReadFromShuffle(
 
           if (valuesToRead.ordinal() >= ValuesToRead.READ_ONE_VALUE.ordinal()) {
             while (valuesIterator.hasNext()) {
-              Assert.assertTrue(valuesIterator.hasNext());
-              Assert.assertTrue(valuesIterator.hasNext());
-              Assert.assertEquals("BatchModeExecutionContext key", key, context.getKey());
+              assertTrue(valuesIterator.hasNext());
+              assertTrue(valuesIterator.hasNext());
+              assertEquals("BatchModeExecutionContext key", key, context.getKey());
               values.add(valuesIterator.next());
               if (valuesToRead == ValuesToRead.READ_ONE_VALUE) {
                 break;
               }
             }
             if (valuesToRead == ValuesToRead.READ_ALL_VALUES) {
-              Assert.assertFalse(valuesIterator.hasNext());
-              Assert.assertFalse(valuesIterator.hasNext());
+              assertFalse(valuesIterator.hasNext());
+              assertFalse(valuesIterator.hasNext());
 
               try {
                 valuesIterator.next();
-                Assert.fail("Expected NoSuchElementException");
+                fail("Expected NoSuchElementException");
               } catch (NoSuchElementException exn) {
                 // As expected.
               }
@@ -203,11 +199,11 @@ private void runTestReadFromShuffle(
 
         actual.add(KV.of(key, values));
       }
-      Assert.assertFalse(iter.hasNext());
-      Assert.assertFalse(iter.hasNext());
+      assertFalse(iter.hasNext());
+      assertFalse(iter.hasNext());
       try {
         iter.next();
-        Assert.fail("Expected NoSuchElementException");
+        fail("Expected NoSuchElementException");
       } catch (NoSuchElementException exn) {
         // As expected.
       }
@@ -227,8 +223,8 @@ private void runTestReadFromShuffle(
       }
       expected.add(KV.of(key, values));
     }
-    Assert.assertEquals(expected, actual);
-    Assert.assertEquals(expectedSizes, observer.getActualSizes());
+    assertEquals(expected, actual);
+    assertEquals(expectedSizes, observer.getActualSizes());
   }
 
   @Test
@@ -261,7 +257,7 @@ public void testReadNonEmptyShuffleDataSkippingValues() throws Exception {
     runTestReadFromShuffle(KVS, ValuesToRead.SKIP_VALUES);
   }
 
-  static byte[] fabricatePosition(int shard, byte[] key) throws Exception {
+  static byte[] fabricatePosition(int shard, @Nullable byte[] key) throws Exception {
     ByteArrayOutputStream os = new ByteArrayOutputStream();
     DataOutputStream dos = new DataOutputStream(os);
     dos.writeInt(shard);
@@ -272,7 +268,7 @@ static byte[] fabricatePosition(int shard, byte[] key) throws Exception {
   }
 
   @Test
-  public void testReadFromEmptyShuffleDataAndUpdateStopPosition() throws Exception {
+  public void testReadFromEmptyShuffleDataAndRequestFork() throws Exception {
     BatchModeExecutionContext context = new BatchModeExecutionContext();
     GroupingShuffleReader<Integer, Integer> groupingShuffleReader = new GroupingShuffleReader<>(
         PipelineOptionsFactory.create(), null, null, null,
@@ -283,31 +279,34 @@ public void testReadFromEmptyShuffleDataAndUpdateStopPosition() throws Exception
     TestShuffleReader shuffleReader = new TestShuffleReader();
     try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
         groupingShuffleReader.iterator(shuffleReader)) {
-      // Can update the stop position, the source range spans all interval
-      Position proposedStopPosition = new Position();
+      // Can fork, the source range spans the entire interval.
+      Position proposedForkPosition = new Position();
       String stop = encodeBase64URLSafeString(fabricatePosition(0, null));
-      proposedStopPosition.setShufflePosition(stop);
+      proposedForkPosition.setShufflePosition(stop);
 
-      Assert.assertEquals(
-          stop,
-          sourcePositionToCloudPosition(
-              iter.updateStopPosition(cloudProgressToReaderProgress(
-                  createApproximateProgress(proposedStopPosition)))).getShufflePosition());
+      Reader.ForkResult forkResult =
+          iter.requestFork(toForkRequest(createApproximateProgress(proposedForkPosition)));
+      Reader.Position acceptedForkPosition =
+          ((Reader.ForkResultWithPosition) forkResult).getAcceptedPosition();
+      assertEquals(stop, toCloudPosition(acceptedForkPosition).getShufflePosition());
 
 
-      // Cannot update stop position to a position >= the current stop position
+      // Cannot fork at a position >= the current stop position
       stop = encodeBase64URLSafeString(fabricatePosition(1, null));
-      proposedStopPosition.setShufflePosition(stop);
+      proposedForkPosition.setShufflePosition(stop);
 
-      Assert.assertEquals(
-          null,
-          iter.updateStopPosition(
-              cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))));
+      try {
+        iter.requestFork(toForkRequest(createApproximateProgress(proposedForkPosition)));
+        fail("IllegalArgumentException expected");
+      } catch (IllegalArgumentException e) {
+        assertThat(e.getMessage(), Matchers.containsString(
+            "Fork requested at a shuffle position beyond the end of the current range"));
+      }
     }
   }
 
   @Test
-  public void testReadFromShuffleDataAndFailToUpdateStopPosition() throws Exception {
+  public void testReadFromShuffleDataAndFailToFork() throws Exception {
     BatchModeExecutionContext context = new BatchModeExecutionContext();
     final int kFirstShard = 0;
 
@@ -331,16 +330,14 @@ public void testReadFromShuffleDataAndFailToUpdateStopPosition() throws Exceptio
 
     try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
         groupingShuffleReader.iterator(shuffleReader)) {
-      Position proposedStopPosition = new Position();
-      proposedStopPosition.setShufflePosition(
-          encodeBase64URLSafeString(fabricatePosition(kNumRecords + 1, null)));
-
-      // Cannot update the stop position since the value provided is
-      // past the current stop position.
-      Assert.assertEquals(
-          null,
-          iter.updateStopPosition(
-              cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))));
+       // Cannot fork since the value provided is past the current stop position.
+       try {
+        iter.requestFork(forkRequestAtPosition(makeShufflePosition(kNumRecords + 1, null)));
+        fail("IllegalArgumentException expected");
+      } catch (IllegalArgumentException e) {
+        assertThat(e.getMessage(), Matchers.containsString(
+            "Fork requested at a shuffle position beyond the end of the current range"));
+      }
 
       int i = 0;
       for (; iter.hasNext(); ++i) {
@@ -348,38 +345,31 @@ public void testReadFromShuffleDataAndFailToUpdateStopPosition() throws Exceptio
         if (i == 0) {
           // First record
           byte[] key = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
-          proposedStopPosition.setShufflePosition(
-              encodeBase64URLSafeString(fabricatePosition(kFirstShard, key)));
-          // Cannot update stop position since it is identical with
-          // the position of the record that was just returned.
-          Assert.assertEquals(
-              null,
-              iter.updateStopPosition(
-                  cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))));
-
-          proposedStopPosition.setShufflePosition(
-              encodeBase64URLSafeString(fabricatePosition(kFirstShard, null)));
-          // Cannot update stop position since it comes before current position
-          Assert.assertEquals(
-              null,
-              iter.updateStopPosition(
-                  cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))));
+          // Cannot fork since the fork position is identical with the position of the record
+          // that was just returned.
+          assertNull(
+              iter.requestFork(forkRequestAtPosition(makeShufflePosition(kFirstShard, key))));
+
+          // Cannot fork since the requested fork position comes before current position
+          assertNull(
+              iter.requestFork(forkRequestAtPosition(makeShufflePosition(kFirstShard, null))));
         }
       }
-      Assert.assertEquals(kNumRecords, i);
-
-      proposedStopPosition.setShufflePosition(
-          encodeBase64URLSafeString(fabricatePosition(kFirstShard, null)));
-      // Cannot update stop position since all input was consumed.
-      Assert.assertEquals(
-          null,
-          iter.updateStopPosition(
-              cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))));
+      assertEquals(kNumRecords, i);
+
+      // Cannot fork since all input was consumed.
+      assertNull(
+          iter.requestFork(forkRequestAtPosition(makeShufflePosition(kFirstShard, null))));
     }
   }
 
+  private Position makeShufflePosition(int shard, byte[] key) throws Exception {
+    return new Position().setShufflePosition(
+        encodeBase64URLSafeString(fabricatePosition(shard, key)));
+  }
+
   @Test
-  public void testReadFromShuffleAndUpdateStopPosition() throws Exception {
+  public void testReadFromShuffleAndFork() throws Exception {
     BatchModeExecutionContext context = new BatchModeExecutionContext();
     GroupingShuffleReader<Integer, Integer> groupingShuffleReader = new GroupingShuffleReader<>(
         PipelineOptionsFactory.create(), null, null, null,
@@ -414,46 +404,45 @@ public void testReadFromShuffleAndUpdateStopPosition() throws Exception {
     int i = 0;
     try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
         groupingShuffleReader.iterator(shuffleReader)) {
-      Position proposedStopPosition = new Position();
-
-      Assert.assertNull(iter.updateStopPosition(
-          cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))));
-
-      // Stop at the shard boundary
-      String stop = encodeBase64URLSafeString(fabricatePosition(kSecondShard, null));
-      proposedStopPosition.setShufflePosition(stop);
+      assertNull(iter.requestFork(forkRequestAtPosition(new Position())));
 
-      Assert.assertEquals(
-          stop,
-          sourcePositionToCloudPosition(
-              iter.updateStopPosition(cloudProgressToReaderProgress(
-                  createApproximateProgress(proposedStopPosition)))).getShufflePosition());
+      // Fork at the shard boundary
+      Reader.ForkResult forkResult =
+          iter.requestFork(forkRequestAtPosition(makeShufflePosition(kSecondShard, null)));
+      assertEquals(
+          encodeBase64URLSafeString(fabricatePosition(kSecondShard, null)),
+          positionFromForkResult(forkResult).getShufflePosition());
 
       while (iter.hasNext()) {
-        Assert.assertTrue(iter.hasNext());
-        Assert.assertTrue(iter.hasNext());
+        // iter.hasNext() is supposed to be side-effect-free and give the same result if called
+        // repeatedly. Test that this is indeed the case.
+        assertTrue(iter.hasNext());
+        assertTrue(iter.hasNext());
 
         KV<Integer, Reiterable<Integer>> elem = iter.next().getValue();
         int key = elem.getKey();
-        Assert.assertEquals(key, i);
+        assertEquals(key, i);
 
         Iterable<Integer> valuesIterable = elem.getValue();
         Iterator<Integer> valuesIterator = valuesIterable.iterator();
 
         int j = 0;
         while (valuesIterator.hasNext()) {
-          Assert.assertTrue(valuesIterator.hasNext());
-          Assert.assertTrue(valuesIterator.hasNext());
+          assertTrue(valuesIterator.hasNext());
+          assertTrue(valuesIterator.hasNext());
 
           int value = valuesIterator.next();
-          Assert.assertEquals(value, i);
+          assertEquals(value, i);
           ++j;
         }
-        Assert.assertEquals(j, 1);
+        assertFalse(valuesIterator.hasNext());
+        assertFalse(valuesIterator.hasNext());
+        assertEquals(j, 1);
         ++i;
       }
+      assertFalse(iter.hasNext());
     }
-    Assert.assertEquals(i, kNumRecords);
+    assertEquals(i, kNumRecords);
   }
 
   @Test
@@ -484,34 +473,32 @@ public void testGetApproximateProgress() throws Exception {
         groupingShuffleReader.iterator(shuffleReader)) {
       Integer i = 0;
       while (readerIterator.hasNext()) {
-        Assert.assertTrue(readerIterator.hasNext());
-        ApproximateProgress progress = sourceProgressToCloudProgress(readerIterator.getProgress());
-        Assert.assertNotNull(progress.getPosition().getShufflePosition());
+        assertTrue(readerIterator.hasNext());
+        ApproximateProgress progress = readerProgressToCloudProgress(readerIterator.getProgress());
+        assertNotNull(progress.getPosition().getShufflePosition());
 
         // Compare returned position with the expected position.
-        Assert.assertEquals(
+        assertEquals(
             ByteArrayShufflePosition.of(positionsList.get(i)).encodeBase64(),
             progress.getPosition().getShufflePosition());
 
         WindowedValue<KV<Integer, Reiterable<Integer>>> elem = readerIterator.next();
-        Assert.assertEquals(i, elem.getValue().getKey());
+        assertEquals(i, elem.getValue().getKey());
         i++;
       }
-      Assert.assertFalse(readerIterator.hasNext());
+      assertFalse(readerIterator.hasNext());
 
-      // Cannot update stop position since all input was consumed.
-      Position proposedStopPosition = new Position();
+      // Cannot fork since all input was consumed.
+      Position proposedForkPosition = new Position();
       String stop = encodeBase64URLSafeString(fabricatePosition(0, null));
-      proposedStopPosition.setShufflePosition(stop);
-      Assert.assertEquals(
-          null,
-          readerIterator.updateStopPosition(
-              cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))));
+      proposedForkPosition.setShufflePosition(stop);
+      assertNull(
+          readerIterator.requestFork(
+              toForkRequest(createApproximateProgress(proposedForkPosition))));
     }
   }
 
-  private ApproximateProgress createApproximateProgress(
-      com.google.api.services.dataflow.model.Position position) {
+  private ApproximateProgress createApproximateProgress(Position position) {
     return new ApproximateProgress().setPosition(position);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
index 0068e5e275172..bd6f02226a60e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
@@ -16,20 +16,28 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.approximateProgressAtIndex;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.forkRequestAtIndex;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionAtIndex;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromForkResult;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toForkRequest;
 import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
 import static com.google.cloud.dataflow.sdk.util.StringUtils.byteArrayToJsonString;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
-import com.google.api.services.dataflow.model.Position;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
-import org.junit.Assert;
+import org.hamcrest.Matchers;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -62,14 +70,13 @@ <T> void runTestReadInMemory(List<T> elements, Long startIndex, Long endIndex,
     List<T> actualElements = new ArrayList<>();
     try (Reader.ReaderIterator<T> iterator = inMemoryReader.iterator()) {
       for (long i = inMemoryReader.startIndex; iterator.hasNext(); i++) {
-        Assert.assertEquals(
-            new ApproximateProgress().setPosition(makeIndexPosition(i)),
-            sourceProgressToCloudProgress(iterator.getProgress()));
+        assertEquals(
+            approximateProgressAtIndex(i), readerProgressToCloudProgress(iterator.getProgress()));
         actualElements.add(iterator.next());
       }
     }
-    Assert.assertEquals(expectedElements, actualElements);
-    Assert.assertEquals(expectedSizes, observer.getActualSizes());
+    assertEquals(expectedElements, actualElements);
+    assertEquals(expectedSizes, observer.getActualSizes());
   }
 
   @Test
@@ -129,7 +136,7 @@ public void testReadNoElementsFromStartToEndEmptyRange() throws Exception {
   }
 
   @Test
-  public void testUpdatePosition() throws Exception {
+  public void testFork() throws Exception {
     List<Integer> elements = Arrays.asList(33, 44, 55, 66, 77, 88);
     final long start = 1L;
     final long stop = 3L;
@@ -139,52 +146,44 @@ public void testUpdatePosition() throws Exception {
     InMemoryReader<Integer> inMemoryReader =
         new InMemoryReader<>(encodedElements(elements, coder), start, end, coder);
 
-    // Illegal proposed stop position.
+    // Illegal proposed fork position.
     try (Reader.ReaderIterator<Integer> iterator = inMemoryReader.iterator()) {
-      Assert.assertNull(
-          iterator.updateStopPosition(cloudProgressToReaderProgress(new ApproximateProgress())));
-      Assert.assertNull(iterator.updateStopPosition(cloudProgressToReaderProgress(
-          new ApproximateProgress().setPosition(makeIndexPosition(null)))));
+      assertNull(iterator.requestFork(toForkRequest(new ApproximateProgress())));
+      assertNull(iterator.requestFork(forkRequestAtIndex(null)));
     }
 
     // Successful update.
     try (InMemoryReader<Integer>.InMemoryReaderIterator iterator =
         (InMemoryReader<Integer>.InMemoryReaderIterator) inMemoryReader.iterator()) {
-      Assert.assertEquals(
-          makeIndexPosition(stop),
-          sourcePositionToCloudPosition(iterator.updateStopPosition(cloudProgressToReaderProgress(
-              new ApproximateProgress().setPosition(makeIndexPosition(stop))))));
-      Assert.assertEquals(stop, iterator.endPosition);
-      Assert.assertEquals(44, iterator.next().intValue());
-      Assert.assertEquals(55, iterator.next().intValue());
-      Assert.assertFalse(iterator.hasNext());
+      Reader.ForkResult forkResult = iterator.requestFork(forkRequestAtIndex(stop));
+      assertEquals(positionAtIndex(stop), positionFromForkResult(forkResult));
+      assertEquals(stop, iterator.endPosition);
+      assertEquals(44, iterator.next().intValue());
+      assertEquals(55, iterator.next().intValue());
+      assertFalse(iterator.hasNext());
     }
 
-    // Proposed stop position is before the current position, no update.
+    // Proposed fork position is before the current position, no update.
     try (InMemoryReader<Integer>.InMemoryReaderIterator iterator =
         (InMemoryReader<Integer>.InMemoryReaderIterator) inMemoryReader.iterator()) {
-      Assert.assertEquals(44, iterator.next().intValue());
-      Assert.assertEquals(55, iterator.next().intValue());
-      Assert.assertNull(iterator.updateStopPosition(cloudProgressToReaderProgress(
-          new ApproximateProgress().setPosition(makeIndexPosition(stop)))));
-      Assert.assertEquals((int) end, iterator.endPosition);
-      Assert.assertTrue(iterator.hasNext());
+      assertEquals(44, iterator.next().intValue());
+      assertEquals(55, iterator.next().intValue());
+      assertNull(iterator.requestFork(forkRequestAtIndex(stop)));
+      assertEquals((int) end, iterator.endPosition);
+      assertTrue(iterator.hasNext());
     }
 
-    // Proposed stop position is after the current stop (end) position, no update.
+    // Proposed fork position is after the current stop (end) position, no update.
     try (InMemoryReader.InMemoryReaderIterator iterator =
         (InMemoryReader.InMemoryReaderIterator) inMemoryReader.iterator()) {
-      Assert.assertNull(iterator.updateStopPosition(cloudProgressToReaderProgress(
-          new ApproximateProgress().setPosition(makeIndexPosition(end + 1)))));
-      Assert.assertEquals((int) end, iterator.endPosition);
-    }
-  }
-
-  private Position makeIndexPosition(Long index) {
-    Position position = new Position();
-    if (index != null) {
-      position.setRecordIndex(index);
+      try {
+        iterator.requestFork(forkRequestAtIndex(end + 1));
+        fail("IllegalArgumentException expected");
+      } catch (IllegalArgumentException e) {
+        assertThat(e.getMessage(), Matchers.containsString(
+            "Fork requested at an index beyond the end of the current range"));
+      }
+      assertEquals((int) end, iterator.endPosition);
     }
-    return position;
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
new file mode 100644
index 0000000000000..96f8b8b432cc5
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toForkRequest;
+
+import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.Position;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+
+import javax.annotation.Nullable;
+
+/**
+ * Helpers for testing {@code Reader} and related classes, especially
+ * {@link Reader.ReaderIterator#getProgress} and {@link Reader.ReaderIterator#requestFork}.
+ */
+public class ReaderTestUtils {
+  public static Position positionAtIndex(@Nullable Long index) {
+    return new Position().setRecordIndex(index);
+  }
+
+  public static Position positionAtByteOffset(@Nullable Long byteOffset) {
+    return new Position().setByteOffset(byteOffset);
+  }
+
+  public static ApproximateProgress approximateProgressAtPosition(@Nullable Position position) {
+    return new ApproximateProgress().setPosition(position);
+  }
+
+  public static ApproximateProgress approximateProgressAtIndex(@Nullable Long index) {
+    return approximateProgressAtPosition(positionAtIndex(index));
+  }
+
+  public static ApproximateProgress approximateProgressAtByteOffset(@Nullable Long byteOffset) {
+    return approximateProgressAtPosition(positionAtByteOffset(byteOffset));
+  }
+
+  public static Reader.ForkRequest forkRequestAtPosition(@Nullable Position position) {
+    return toForkRequest(approximateProgressAtPosition(position));
+  }
+
+  public static Reader.ForkRequest forkRequestAtIndex(@Nullable Long index) {
+    return toForkRequest(approximateProgressAtIndex(index));
+  }
+
+  public static Reader.ForkRequest forkRequestAtByteOffset(@Nullable Long byteOffset) {
+    return toForkRequest(approximateProgressAtByteOffset(byteOffset));
+  }
+
+  public static Position positionFromForkResult(Reader.ForkResult forkResult) {
+    return toCloudPosition(((Reader.ForkResultWithPosition) forkResult).getAcceptedPosition());
+  }
+
+  public static Position positionFromProgress(Reader.Progress progress) {
+    return readerProgressToCloudProgress(progress).getPosition();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
index 95981a7eecccf..52c9c1cfd2866 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
@@ -16,13 +16,22 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.forkRequestAtByteOffset;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.forkRequestAtPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromForkResult;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.greaterThan;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.Position;
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
@@ -33,7 +42,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
-import org.junit.Assert;
+import org.hamcrest.Matchers;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
@@ -61,6 +70,7 @@ public class TextReaderTest {
 
   static {
     long sumLen = 0L;
+
     for (String s : fileContent) {
       sumLen += s.length();
     }
@@ -83,11 +93,10 @@ private File initTestFile() throws IOException {
 
   @Test
   public void testReadEmptyFile() throws Exception {
-    TextReader<String> textReader =
-        new TextReader<>(tmpFolder.newFile().getPath(), true, null, null, StringUtf8Coder.of(),
-            TextIO.CompressionType.UNCOMPRESSED);
+    TextReader<String> textReader = new TextReader<>(tmpFolder.newFile().getPath(), true, null,
+        null, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
     try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
-      Assert.assertFalse(iterator.hasNext());
+      assertFalse(iterator.hasNext());
     }
   }
 
@@ -136,66 +145,62 @@ public void testStartPosition() throws Exception {
     File tmpFile = initTestFile();
 
     {
-      TextReader<String> textReader =
-          new TextReader<>(tmpFile.getPath(), false, 11L, null, StringUtf8Coder.of(),
-              TextIO.CompressionType.UNCOMPRESSED);
+      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 11L, null,
+          StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
       try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
-        Assert.assertEquals("Second line\r\n", iterator.next());
-        Assert.assertEquals("Third line", iterator.next());
-        Assert.assertFalse(iterator.hasNext());
+        assertEquals("Second line\r\n", iterator.next());
+        assertEquals("Third line", iterator.next());
+        assertFalse(iterator.hasNext());
         // The first '1' in the array represents the reading of '\n' between first and
         // second line, to confirm that we are reading from the beginning of a record.
-        Assert.assertEquals(Arrays.asList(1, 13, 10), observer.getActualSizes());
+        assertEquals(Arrays.asList(1, 13, 10), observer.getActualSizes());
       }
     }
 
     {
-      TextReader<String> textReader =
-          new TextReader<>(tmpFile.getPath(), false, 20L, null, StringUtf8Coder.of(),
-              TextIO.CompressionType.UNCOMPRESSED);
+      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 20L, null,
+          StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
       try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
-        Assert.assertEquals("Third line", iterator.next());
-        Assert.assertFalse(iterator.hasNext());
+        assertEquals("Third line", iterator.next());
+        assertFalse(iterator.hasNext());
         // The first '5' in the array represents the reading of a portion of the second
         // line, which had to be read to find the beginning of the third line.
-        Assert.assertEquals(Arrays.asList(5, 10), observer.getActualSizes());
+        assertEquals(Arrays.asList(5, 10), observer.getActualSizes());
       }
     }
 
     {
-      TextReader<String> textReader =
-          new TextReader<>(tmpFile.getPath(), true, 0L, 20L, StringUtf8Coder.of(),
-              TextIO.CompressionType.UNCOMPRESSED);
+      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), true, 0L, 20L,
+          StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
       try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
-        Assert.assertEquals("First line", iterator.next());
-        Assert.assertEquals("Second line", iterator.next());
-        Assert.assertFalse(iterator.hasNext());
-        Assert.assertEquals(Arrays.asList(11, 13), observer.getActualSizes());
+        assertEquals("First line", iterator.next());
+        assertEquals("Second line", iterator.next());
+        assertFalse(iterator.hasNext());
+        assertEquals(Arrays.asList(11, 13), observer.getActualSizes());
       }
     }
 
     {
-      TextReader<String> textReader =
-          new TextReader<>(tmpFile.getPath(), true, 1L, 20L, StringUtf8Coder.of(),
-              TextIO.CompressionType.UNCOMPRESSED);
+      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), true, 1L, 20L,
+          StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
       try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
-        Assert.assertEquals("Second line", iterator.next());
-        Assert.assertFalse(iterator.hasNext());
+        assertEquals("Second line", iterator.next());
+        assertFalse(iterator.hasNext());
         // The first '11' in the array represents the reading of the entire first
         // line, which had to be read to find the beginning of the second line.
-        Assert.assertEquals(Arrays.asList(11, 13), observer.getActualSizes());
+        assertEquals(Arrays.asList(11, 13), observer.getActualSizes());
       }
     }
   }
@@ -214,34 +219,32 @@ public void testUtf8Handling() throws Exception {
       // 3L is after the first line if counting codepoints, but within
       // the first line if counting chars.  So correct behavior is to return
       // just one line, since offsets are in chars, not codepoints.
-      TextReader<String> textReader =
-          new TextReader<>(tmpFile.getPath(), true, 0L, 3L, StringUtf8Coder.of(),
-              TextIO.CompressionType.UNCOMPRESSED);
+      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), true, 0L, 3L,
+          StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
       try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
-        Assert.assertArrayEquals("€".getBytes("UTF-8"), iterator.next().getBytes("UTF-8"));
-        Assert.assertFalse(iterator.hasNext());
-        Assert.assertEquals(Arrays.asList(4), observer.getActualSizes());
+        assertArrayEquals("€".getBytes("UTF-8"), iterator.next().getBytes("UTF-8"));
+        assertFalse(iterator.hasNext());
+        assertEquals(Arrays.asList(4), observer.getActualSizes());
       }
     }
 
     {
       // Starting location is mid-way into a codepoint.
       // Ensures we don't fail when skipping over an incomplete codepoint.
-      TextReader<String> textReader =
-          new TextReader<>(tmpFile.getPath(), true, 2L, null, StringUtf8Coder.of(),
-              TextIO.CompressionType.UNCOMPRESSED);
+      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), true, 2L, null,
+          StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
       try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
-        Assert.assertArrayEquals("¢".getBytes("UTF-8"), iterator.next().getBytes("UTF-8"));
-        Assert.assertFalse(iterator.hasNext());
+        assertArrayEquals("¢".getBytes("UTF-8"), iterator.next().getBytes("UTF-8"));
+        assertFalse(iterator.hasNext());
         // The first '3' in the array represents the reading of a portion of the first
         // line, which had to be read to find the beginning of the second line.
-        Assert.assertEquals(Arrays.asList(3, 3), observer.getActualSizes());
+        assertEquals(Arrays.asList(3, 3), observer.getActualSizes());
       }
     }
   }
@@ -258,9 +261,8 @@ private void testNewlineHandling(String separator, boolean stripNewlines) throws
     }
     writer.close();
 
-    TextReader<String> textReader =
-        new TextReader<>(tmpFile.getPath(), stripNewlines, null, null, StringUtf8Coder.of(),
-            TextIO.CompressionType.UNCOMPRESSED);
+    TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), stripNewlines, null, null,
+        StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
     ExecutorTestUtils.TestReaderObserver observer =
         new ExecutorTestUtils.TestReaderObserver(textReader);
 
@@ -272,16 +274,16 @@ private void testNewlineHandling(String separator, boolean stripNewlines) throws
     }
 
     if (stripNewlines) {
-      Assert.assertEquals(expected, actual);
+      assertEquals(expected, actual);
     } else {
       List<String> unstripped = new LinkedList<>();
       for (String s : expected) {
         unstripped.add(s + separator);
       }
-      Assert.assertEquals(unstripped, actual);
+      assertEquals(unstripped, actual);
     }
 
-    Assert.assertEquals(expectedSizes, observer.getActualSizes());
+    assertEquals(expectedSizes, observer.getActualSizes());
   }
 
   private void testStringPayload(String[] lines, String separator, boolean stripNewlines)
@@ -296,19 +298,15 @@ private void testStringPayload(String[] lines, String separator, boolean stripNe
     }
     writer.close();
 
-    TextReader<String> textReader =
-        new TextReader<>(tmpFile.getPath(), stripNewlines, null, null, StringUtf8Coder.of(),
-            TextIO.CompressionType.UNCOMPRESSED);
-    ExecutorTestUtils.TestReaderObserver observer =
-        new ExecutorTestUtils.TestReaderObserver(textReader);
-
+    TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), stripNewlines, null, null,
+        StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
     List<String> actual = new ArrayList<>();
     try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
       while (iterator.hasNext()) {
         actual.add(iterator.next());
       }
     }
-    Assert.assertEquals(expected, actual);
+    assertEquals(expected, actual);
   }
 
   @Test
@@ -327,9 +325,8 @@ public void testCloneIteratorWithEndPositionAndFinalBytesInBuffer() throws Excep
     writer.close();
     Long fileSize = tmpFile.length();
 
-    TextReader<String> textReader =
-        new TextReader<>(tmpFile.getPath(), stripNewlines, null, fileSize, StringUtf8Coder.of(),
-            TextIO.CompressionType.UNCOMPRESSED);
+    TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), stripNewlines, null,
+        fileSize, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
 
     List<String> actual = new ArrayList<>();
     Reader.ReaderIterator<String> iterator = textReader.iterator();
@@ -337,7 +334,7 @@ public void testCloneIteratorWithEndPositionAndFinalBytesInBuffer() throws Excep
       actual.add(iterator.next());
       iterator = iterator.copy();
     }
-    Assert.assertEquals(expected, actual);
+    assertEquals(expected, actual);
   }
 
   @Test
@@ -354,9 +351,8 @@ public void testNonStringCoders() throws Exception {
     }
     writer.close();
 
-    TextReader<Integer> textReader =
-        new TextReader<>(tmpFile.getPath(), true, null, null, TextualIntegerCoder.of(),
-            TextIO.CompressionType.UNCOMPRESSED);
+    TextReader<Integer> textReader = new TextReader<>(tmpFile.getPath(), true, null, null,
+        TextualIntegerCoder.of(), TextIO.CompressionType.UNCOMPRESSED);
     ExecutorTestUtils.TestReaderObserver observer =
         new ExecutorTestUtils.TestReaderObserver(textReader);
 
@@ -367,30 +363,29 @@ public void testNonStringCoders() throws Exception {
       }
     }
 
-    Assert.assertEquals(expected, actual);
-    Assert.assertEquals(expectedSizes, observer.getActualSizes());
+    assertEquals(expected, actual);
+    assertEquals(expectedSizes, observer.getActualSizes());
   }
 
   @Test
   public void testGetApproximatePosition() throws Exception {
     File tmpFile = initTestFile();
-    TextReader<String> textReader =
-        new TextReader<>(tmpFile.getPath(), false, 0L, null, StringUtf8Coder.of(),
-            TextIO.CompressionType.UNCOMPRESSED);
+    TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 0L, null,
+        StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
 
     try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
-      ApproximateProgress progress = sourceProgressToCloudProgress(iterator.getProgress());
-      Assert.assertEquals(0L, progress.getPosition().getByteOffset().longValue());
+      ApproximateProgress progress = readerProgressToCloudProgress(iterator.getProgress());
+      assertEquals(0L, progress.getPosition().getByteOffset().longValue());
       iterator.next();
-      progress = sourceProgressToCloudProgress(iterator.getProgress());
-      Assert.assertEquals(11L, progress.getPosition().getByteOffset().longValue());
+      progress = readerProgressToCloudProgress(iterator.getProgress());
+      assertEquals(11L, progress.getPosition().getByteOffset().longValue());
       iterator.next();
-      progress = sourceProgressToCloudProgress(iterator.getProgress());
-      Assert.assertEquals(24L, progress.getPosition().getByteOffset().longValue());
+      progress = readerProgressToCloudProgress(iterator.getProgress());
+      assertEquals(24L, progress.getPosition().getByteOffset().longValue());
       iterator.next();
-      progress = sourceProgressToCloudProgress(iterator.getProgress());
-      Assert.assertEquals(34L, progress.getPosition().getByteOffset().longValue());
-      Assert.assertFalse(iterator.hasNext());
+      progress = readerProgressToCloudProgress(iterator.getProgress());
+      assertEquals(34L, progress.getPosition().getByteOffset().longValue());
+      assertFalse(iterator.hasNext());
     }
   }
 
@@ -400,49 +395,36 @@ public void testUpdateStopPosition() throws Exception {
     final long stop = 14L; // in the middle of the second line
     File tmpFile = initTestFile();
 
-    com.google.api.services.dataflow.model.Position proposedStopPosition =
-        new com.google.api.services.dataflow.model.Position();
-
     // Illegal proposed stop position, no update.
     {
-      TextReader<String> textReader =
-          new TextReader<>(tmpFile.getPath(), false, null, null, StringUtf8Coder.of(),
-              TextIO.CompressionType.UNCOMPRESSED);
-      ExecutorTestUtils.TestReaderObserver observer =
-          new ExecutorTestUtils.TestReaderObserver(textReader);
+      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, null, null,
+          StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
 
       try (TextReader<String>.TextFileIterator iterator =
           (TextReader<String>.TextFileIterator) textReader.iterator()) {
-        Assert.assertNull(iterator.updateStopPosition(
-            cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))));
+        assertNull(iterator.requestFork(forkRequestAtPosition(new Position())));
       }
     }
 
-    proposedStopPosition.setByteOffset(stop);
-
     // Successful update.
     {
-      TextReader<String> textReader =
-          new TextReader<>(tmpFile.getPath(), false, null, null, StringUtf8Coder.of(),
-              TextIO.CompressionType.UNCOMPRESSED);
+      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, null, null,
+          StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
       try (TextReader<String>.TextFileIterator iterator =
           (TextReader<String>.TextFileIterator) textReader.iterator()) {
-        Assert.assertNull(iterator.getEndOffset());
-        Assert.assertEquals(
-            stop,
-            sourcePositionToCloudPosition(
-                iterator.updateStopPosition(
-                    cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))))
-                .getByteOffset()
-                .longValue());
-        Assert.assertEquals(stop, iterator.getEndOffset().longValue());
-        Assert.assertEquals(fileContent[0], iterator.next());
-        Assert.assertEquals(fileContent[1], iterator.next());
-        Assert.assertFalse(iterator.hasNext());
-        Assert.assertEquals(
+        assertNull(iterator.getEndOffset());
+        assertEquals(
+            Long.valueOf(stop),
+            positionFromForkResult(iterator.requestFork(forkRequestAtByteOffset(stop)))
+                .getByteOffset());
+        assertEquals(stop, iterator.getEndOffset().longValue());
+        assertEquals(fileContent[0], iterator.next());
+        assertEquals(fileContent[1], iterator.next());
+        assertFalse(iterator.hasNext());
+        assertEquals(
             Arrays.asList(fileContent[0].length(), fileContent[1].length()),
             observer.getActualSizes());
       }
@@ -450,25 +432,23 @@ public void testUpdateStopPosition() throws Exception {
 
     // Proposed stop position is before the current position, no update.
     {
-      TextReader<String> textReader =
-          new TextReader<>(tmpFile.getPath(), false, null, null, StringUtf8Coder.of(),
-              TextIO.CompressionType.UNCOMPRESSED);
+      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, null, null,
+          StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
       try (TextReader<String>.TextFileIterator iterator =
           (TextReader<String>.TextFileIterator) textReader.iterator()) {
-        Assert.assertEquals(fileContent[0], iterator.next());
-        Assert.assertEquals(fileContent[1], iterator.next());
-        Assert.assertThat(
-            sourceProgressToCloudProgress(iterator.getProgress()).getPosition().getByteOffset(),
+        assertEquals(fileContent[0], iterator.next());
+        assertEquals(fileContent[1], iterator.next());
+        assertThat(
+            readerProgressToCloudProgress(iterator.getProgress()).getPosition().getByteOffset(),
             greaterThan(stop));
-        Assert.assertNull(iterator.updateStopPosition(
-            cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))));
-        Assert.assertNull(iterator.getEndOffset());
-        Assert.assertTrue(iterator.hasNext());
-        Assert.assertEquals(fileContent[2], iterator.next());
-        Assert.assertEquals(
+        assertNull(iterator.requestFork(forkRequestAtByteOffset(stop)));
+        assertNull(iterator.getEndOffset());
+        assertTrue(iterator.hasNext());
+        assertEquals(fileContent[2], iterator.next());
+        assertEquals(
             Arrays.asList(
                 fileContent[0].length(), fileContent[1].length(), fileContent[2].length()),
             observer.getActualSizes());
@@ -477,20 +457,25 @@ public void testUpdateStopPosition() throws Exception {
 
     // Proposed stop position is after the current stop (end) position, no update.
     {
-      TextReader<String> textReader =
-          new TextReader<>(tmpFile.getPath(), false, null, end, StringUtf8Coder.of(),
-              TextIO.CompressionType.UNCOMPRESSED);
+      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, null, end,
+          StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
       try (TextReader<String>.TextFileIterator iterator =
           (TextReader<String>.TextFileIterator) textReader.iterator()) {
-        Assert.assertEquals(fileContent[0], iterator.next());
-        Assert.assertNull(iterator.updateStopPosition(
-            cloudProgressToReaderProgress(createApproximateProgress(proposedStopPosition))));
-        Assert.assertEquals(end, iterator.getEndOffset().longValue());
-        Assert.assertFalse(iterator.hasNext());
-        Assert.assertEquals(Arrays.asList(fileContent[0].length()), observer.getActualSizes());
+        assertEquals(fileContent[0], iterator.next());
+        try {
+          iterator.requestFork(forkRequestAtByteOffset(stop));
+          fail("IllegalArgumentException expected");
+        } catch (IllegalArgumentException e) {
+          assertThat(e.getMessage(), Matchers.containsString(
+              "Fork requested at an offset beyond the end of the current range"));
+        }
+
+        assertEquals(end, iterator.getEndOffset().longValue());
+        assertFalse(iterator.hasNext());
+        assertEquals(Arrays.asList(fileContent[0].length()), observer.getActualSizes());
       }
     }
   }
@@ -523,56 +508,48 @@ private void stopPositionTestInternal(
     StringBuilder accumulatedRead = new StringBuilder();
 
     // Read from source without split attempts.
-    TextReader<String> textReader =
-        new TextReader<>(tmpFile.getPath(), false, startOffset, endOffset, StringUtf8Coder.of(),
-            TextIO.CompressionType.UNCOMPRESSED);
+    TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, startOffset,
+        endOffset, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
 
     try (TextReader<String>.TextFileIterator iterator =
         (TextReader<String>.TextFileIterator) textReader.iterator()) {
       while (iterator.hasNext()) {
-        accumulatedRead.append((String) iterator.next());
+        accumulatedRead.append(iterator.next());
       }
       readWithoutSplit = accumulatedRead.toString();
     }
 
     // Read the first half of the split.
-    textReader =
-        new TextReader<>(tmpFile.getPath(), false, startOffset, stopOffset, StringUtf8Coder.of(),
-            TextIO.CompressionType.UNCOMPRESSED);
+    textReader = new TextReader<>(tmpFile.getPath(), false, startOffset, stopOffset,
+        StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
     accumulatedRead = new StringBuilder();
 
     try (TextReader<String>.TextFileIterator iterator =
         (TextReader<String>.TextFileIterator) textReader.iterator()) {
       while (iterator.hasNext()) {
-        accumulatedRead.append((String) iterator.next());
+        accumulatedRead.append(iterator.next());
       }
       readWithSplit1 = accumulatedRead.toString();
     }
 
     // Read the second half of the split.
-    textReader =
-        new TextReader<>(tmpFile.getPath(), false, stopOffset, endOffset, StringUtf8Coder.of(),
-            TextIO.CompressionType.UNCOMPRESSED);
+    textReader = new TextReader<>(tmpFile.getPath(), false, stopOffset, endOffset,
+        StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
     accumulatedRead = new StringBuilder();
 
     try (TextReader<String>.TextFileIterator iterator =
         (TextReader<String>.TextFileIterator) textReader.iterator()) {
       while (iterator.hasNext()) {
-        accumulatedRead.append((String) iterator.next());
+        accumulatedRead.append(iterator.next());
       }
       readWithSplit2 = accumulatedRead.toString();
     }
 
-    Assert.assertEquals(readWithoutSplit, readWithSplit1 + readWithSplit2);
-  }
-
-  private ApproximateProgress createApproximateProgress(
-      com.google.api.services.dataflow.model.Position position) {
-    return new ApproximateProgress().setPosition(position);
+    assertEquals(readWithoutSplit, readWithSplit1 + readWithSplit2);
   }
 
-  private OutputStream getOutputStreamForCompressionType(OutputStream stream,
-      CompressionType compressionType) throws IOException {
+  private OutputStream getOutputStreamForCompressionType(
+      OutputStream stream, CompressionType compressionType) throws IOException {
     switch (compressionType) {
       case GZIP:
         return new GZIPOutputStream(stream);
@@ -582,13 +559,13 @@ private OutputStream getOutputStreamForCompressionType(OutputStream stream,
       case AUTO:
         return stream;
       default:
-        Assert.fail("Unrecognized stream type");
+        fail("Unrecognized stream type");
     }
     return stream;
   }
 
-  private File createFileWithCompressionType(String[] lines, String filename,
-      CompressionType compressionType) throws IOException {
+  private File createFileWithCompressionType(
+      String[] lines, String filename, CompressionType compressionType) throws IOException {
     File tmpFile = tmpFolder.newFile(filename);
     PrintStream writer = new PrintStream(
         getOutputStreamForCompressionType(new FileOutputStream(tmpFile), compressionType));
@@ -601,7 +578,7 @@ private File createFileWithCompressionType(String[] lines, String filename,
 
   private void testCompressionTypeHelper(String[] lines, String filename,
       CompressionType outputCompressionType, CompressionType inputCompressionType)
-          throws IOException {
+      throws IOException {
     File tmpFile = createFileWithCompressionType(lines, filename, outputCompressionType);
 
     List<String> expected = new ArrayList<>();
@@ -609,9 +586,8 @@ private void testCompressionTypeHelper(String[] lines, String filename,
       expected.add(line);
     }
 
-    TextReader<String> textReader =
-        new TextReader<>(tmpFile.getPath(), true, null, null, StringUtf8Coder.of(),
-            inputCompressionType);
+    TextReader<String> textReader = new TextReader<>(
+        tmpFile.getPath(), true, null, null, StringUtf8Coder.of(), inputCompressionType);
 
     List<String> actual = new ArrayList<>();
     try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
@@ -619,7 +595,7 @@ private void testCompressionTypeHelper(String[] lines, String filename,
         actual.add(iterator.next());
       }
     }
-    Assert.assertEquals(expected, actual);
+    assertEquals(expected, actual);
     tmpFile.delete();
   }
 
@@ -629,22 +605,20 @@ public void testCompressionTypeOneFile() throws IOException {
     // test AUTO compression type with different extensions
     testCompressionTypeHelper(contents, "test.gz", CompressionType.GZIP, CompressionType.AUTO);
     testCompressionTypeHelper(contents, "test.bz2", CompressionType.BZIP2, CompressionType.AUTO);
-    testCompressionTypeHelper(contents, "test.txt", CompressionType.UNCOMPRESSED,
-        CompressionType.AUTO);
-    testCompressionTypeHelper(contents, "test", CompressionType.UNCOMPRESSED,
-        CompressionType.AUTO);
+    testCompressionTypeHelper(
+        contents, "test.txt", CompressionType.UNCOMPRESSED, CompressionType.AUTO);
+    testCompressionTypeHelper(contents, "test", CompressionType.UNCOMPRESSED, CompressionType.AUTO);
     // test GZIP, BZIP2, and UNCOMPRESSED
     testCompressionTypeHelper(contents, "test.txt", CompressionType.GZIP, CompressionType.GZIP);
     testCompressionTypeHelper(contents, "test.txt", CompressionType.BZIP2, CompressionType.BZIP2);
-    testCompressionTypeHelper(contents, "test.gz", CompressionType.UNCOMPRESSED,
-        CompressionType.UNCOMPRESSED);
+    testCompressionTypeHelper(
+        contents, "test.gz", CompressionType.UNCOMPRESSED, CompressionType.UNCOMPRESSED);
   }
 
   @Test
   public void testCompressionTypeFileGlob() throws IOException {
     String[][] contents = {
-        {"Miserable pigeon", "Vulnerable sparrow", "Brazen crow"},
-        {"Timid osprey", "Lazy vulture"},
+        {"Miserable pigeon", "Vulnerable sparrow", "Brazen crow"}, {"Timid osprey", "Lazy vulture"},
         {"Erratic finch", "Impressible parakeet"},
     };
     File[] files = {
@@ -671,7 +645,7 @@ public void testCompressionTypeFileGlob() throws IOException {
         actual.add(iterator.next());
       }
     }
-    Assert.assertThat(actual, containsInAnyOrder(expected.toArray()));
+    assertThat(actual, containsInAnyOrder(expected.toArray()));
     for (File file : files) {
       file.delete();
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
index 07944893ffebd..f13d0e02606c5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
@@ -16,10 +16,14 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.approximateProgressAtIndex;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.forkRequestAtIndex;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionAtIndex;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromForkResult;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.forkRequestToApproximateProgress;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
@@ -96,10 +100,10 @@ public Reader.Progress getProgress() {
     }
 
     @Override
-    public Reader.Position proposeStopPosition(Reader.Progress proposedStopPosition) {
+    public Reader.ForkResult requestFork(Reader.ForkRequest forkRequest) {
       // Fakes the return with the same position as proposed.
-      return cloudPositionToReaderPosition(
-          sourceProgressToCloudProgress(proposedStopPosition).getPosition());
+      return new Reader.ForkResultWithPosition(cloudPositionToReaderPosition(
+          forkRequestToApproximateProgress(forkRequest).getPosition()));
     }
 
     public void setProgress(ApproximateProgress progress) {
@@ -194,7 +198,7 @@ public void testGetReadOperation() throws Exception {
         new MapTaskExecutor(new ArrayList<Operation>(), counterSet, stateSampler);
 
     try {
-      ReadOperation readOperation = executor.getReadOperation();
+      executor.getReadOperation();
       Assert.fail("Expected IllegalStateException.");
     } catch (IllegalStateException e) {
       // Exception expected
@@ -208,7 +212,7 @@ public void testGetReadOperation() throws Exception {
     executor = new MapTaskExecutor(operations, counterSet, stateSampler);
 
     try {
-      ReadOperation readOperation = executor.getReadOperation();
+      executor.getReadOperation();
       Assert.fail("Expected IllegalStateException.");
     } catch (IllegalStateException e) {
       // Exception expected
@@ -235,22 +239,11 @@ public void testGetProgressAndRequestSplit() throws Exception {
     MapTaskExecutor executor =
         new MapTaskExecutor(Arrays.asList(new Operation[] {operation}), counterSet, stateSampler);
 
-    operation.setProgress(new ApproximateProgress().setPosition(makePosition(1L)));
+    operation.setProgress(approximateProgressAtIndex(1L));
+    Assert.assertEquals(positionAtIndex(1L), positionFromProgress(executor.getWorkerProgress()));
     Assert.assertEquals(
-        makePosition(1L),
-        sourceProgressToCloudProgress(executor.getWorkerProgress()).getPosition());
-    Assert.assertEquals(
-        makePosition(1L),
-        sourcePositionToCloudPosition(executor.proposeStopPosition(cloudProgressToReaderProgress(
-            new ApproximateProgress().setPosition(makePosition(1L))))));
+        positionAtIndex(1L), positionFromForkResult(executor.requestFork(forkRequestAtIndex(1L))));
 
     executor.close();
   }
-
-  private com.google.api.services.dataflow.model.Position makePosition(long index) {
-    com.google.api.services.dataflow.model.Position position =
-        new com.google.api.services.dataflow.model.Position();
-    position.setRecordIndex(index);
-    return position;
-  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
index 8d754bd0d4368..9a778bdef25c6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -16,14 +16,15 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.forkRequestAtIndex;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionAtIndex;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourcePositionToCloudPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.forkRequestToApproximateProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-import static org.hamcrest.CoreMatchers.equalTo;
-import static org.hamcrest.CoreMatchers.everyItem;
-import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.api.services.dataflow.model.Position;
@@ -39,103 +40,14 @@
 import org.junit.runners.JUnit4;
 
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
 import java.util.NoSuchElementException;
-import java.util.concurrent.BlockingQueue;
-import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.Exchanger;
 
 /**
  * Tests for ReadOperation.
  */
 @RunWith(JUnit4.class)
 public class ReadOperationTest {
-  private static final long ITERATIONS = 3L;
-
-  /**
-   * The test Reader for testing updating stop position and progress report.
-   * The number of read iterations is controlled by ITERATIONS.
-   */
-  static class TestTextReader extends Reader<String> {
-    @Override
-    public ReaderIterator<String> iterator() {
-      return new TestTextReaderIterator();
-    }
-
-    class TestTextReaderIterator extends AbstractReaderIterator<String> {
-      long offset = 0L;
-      List<com.google.api.services.dataflow.model.Position> proposedPositions = new ArrayList<>();
-
-      @Override
-      public boolean hasNext() {
-        return offset < ITERATIONS;
-      }
-
-      @Override
-      public String next() {
-        if (hasNext()) {
-          offset++;
-          return "hi";
-        } else {
-          throw new AssertionError("No next Element.");
-        }
-      }
-
-      @Override
-      public Progress getProgress() {
-        com.google.api.services.dataflow.model.Position currentPosition =
-            new com.google.api.services.dataflow.model.Position();
-        currentPosition.setByteOffset(offset);
-
-        ApproximateProgress progress = new ApproximateProgress();
-        progress.setPosition(currentPosition);
-
-        return cloudProgressToReaderProgress(progress);
-      }
-
-      @Override
-      public Position updateStopPosition(Progress proposedStopPosition) {
-        proposedPositions.add(sourceProgressToCloudProgress(proposedStopPosition).getPosition());
-        // Actually no update happens, returns null.
-        return null;
-      }
-    }
-  }
-
-  /**
-   * The OutputReceiver for testing updating stop position and progress report.
-   * The offset of the Reader (iterator) will be advanced each time this
-   * Receiver processes a record.
-   */
-  static class TestTextReceiver extends OutputReceiver {
-    ReadOperation readOperation = null;
-    com.google.api.services.dataflow.model.Position proposedStopPosition = null;
-    List<ApproximateProgress> progresses = new ArrayList<>();
-
-    public TestTextReceiver(CounterSet counterSet, String counterPrefix) {
-      super("test_receiver_out", counterPrefix, counterSet.getAddCounterMutator());
-    }
-
-    public void setReadOperation(ReadOperation readOp) {
-      this.readOperation = readOp;
-    }
-
-    public void setProposedStopPosition(com.google.api.services.dataflow.model.Position position) {
-      this.proposedStopPosition = position;
-    }
-
-    @Override
-    public void process(Object outputElem) throws Exception {
-      // Calls getProgress() and proposeStopPosition() in each iteration.
-      progresses.add(sourceProgressToCloudProgress(readOperation.getProgress()));
-      // We expect that call to proposeStopPosition is a no-op that does not
-      // update the stop position for every iteration. We will verify it is
-      // delegated to ReaderIterator after ReadOperation finishes.
-      Assert.assertNull(readOperation.proposeStopPosition(
-          cloudProgressToReaderProgress(makeApproximateProgress(proposedStopPosition))));
-    }
-  }
-
   @Test
   @SuppressWarnings("unchecked")
   public void testRunReadOperation() throws Exception {
@@ -178,129 +90,180 @@ public void testRunReadOperation() throws Exception {
   }
 
   @Test
-  public void testGetProgressAndProposeStopPosition() throws Exception {
-    TestTextReader testTextReader = new TestTextReader();
+  public void testGetProgress() throws Exception {
+    MockReaderIterator iterator = new MockReaderIterator(0, 5);
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    TestTextReceiver receiver = new TestTextReceiver(counterSet, counterPrefix);
-    ReadOperation readOperation = new ReadOperation(
-        testTextReader, receiver, counterPrefix, counterSet.getAddCounterMutator(), stateSampler);
-    readOperation.setProgressUpdatePeriodMs(0);
-    receiver.setReadOperation(readOperation);
-
-    Position proposedStopPosition = makePosition(3L);
-    receiver.setProposedStopPosition(proposedStopPosition);
-
-    Assert.assertNull(readOperation.getProgress());
-    Assert.assertNull(readOperation.proposeStopPosition(
-        cloudProgressToReaderProgress(makeApproximateProgress(proposedStopPosition))));
-
-    readOperation.start();
-
-    TestTextReader.TestTextReaderIterator testIterator =
-        (TestTextReader.TestTextReaderIterator) readOperation.readerIterator;
-
-    Assert.assertEquals(
-        sourceProgressToCloudProgress(testIterator.getProgress()),
-        sourceProgressToCloudProgress(readOperation.getProgress()));
-    Assert.assertEquals(
-        sourcePositionToCloudPosition(testIterator.updateStopPosition(
-            cloudProgressToReaderProgress(makeApproximateProgress(proposedStopPosition)))),
-        sourcePositionToCloudPosition(readOperation.proposeStopPosition(
-            cloudProgressToReaderProgress(makeApproximateProgress(proposedStopPosition)))));
-
-    // Verifies progress report and stop position updates.
-    Assert.assertEquals(testIterator.proposedPositions.size(), ITERATIONS + 2);
-    Assert.assertThat(testIterator.proposedPositions, everyItem(equalTo(makePosition(3L))));
-    Assert.assertThat(
-        receiver.progresses,
-        contains(
-            makeApproximateProgress(1L), makeApproximateProgress(2L), makeApproximateProgress(3L)));
-
-    readOperation.finish();
+    final ReadOperation readOperation = new ReadOperation(new MockReader(iterator),
+        new OutputReceiver("out", "test-", counterSet.getAddCounterMutator()), counterPrefix,
+        counterSet.getAddCounterMutator(),
+        new StateSampler(counterPrefix, counterSet.getAddCounterMutator()));
+    // Update progress not continuously, but so that it's never more than 1 record stale.
+    readOperation.setProgressUpdatePeriodMs(150);
 
-    Assert.assertNull(readOperation.proposeStopPosition(
-        cloudProgressToReaderProgress(makeApproximateProgress(proposedStopPosition))));
+    Thread thread = runReadLoopInThread(readOperation);
+    for (int i = 0; i < 5; ++i) {
+      Thread.sleep(300); // Wait for the operation to start and block.
+      // Ensure that getProgress() doesn't block while the next() method is blocked.
+      ApproximateProgress progress = readerProgressToCloudProgress(readOperation.getProgress());
+      long observedIndex = progress.getPosition().getRecordIndex().longValue();
+      Assert.assertTrue("Actual: " + observedIndex, i == observedIndex || i == observedIndex + 1);
+      iterator.offerNext(i);
+    }
+    thread.join();
   }
 
   @Test
-  public void testGetProgressDoesNotBlock() throws Exception {
-    final BlockingQueue<Integer> queue = new LinkedBlockingQueue<>();
-    final Reader.ReaderIterator<Integer> iterator = new Reader.AbstractReaderIterator<Integer>() {
-      private int itemsReturned = 0;
+  public void testFork() throws Exception {
+    MockReaderIterator iterator = new MockReaderIterator(0, 10);
+    CounterSet counterSet = new CounterSet();
+    MockOutputReceiver receiver = new MockOutputReceiver(counterSet.getAddCounterMutator());
+    ReadOperation readOperation = new ReadOperation(new MockReader(iterator), receiver, "test-",
+        counterSet.getAddCounterMutator(),
+        new StateSampler("test-", counterSet.getAddCounterMutator()));
+    // Update progress on every iteration of the read loop.
+    readOperation.setProgressUpdatePeriodMs(0);
 
-      @Override
-      public boolean hasNext() throws IOException {
-        return itemsReturned < 5;
-      }
+    // An unstarted ReadOperation refuses fork requests.
+    Assert.assertNull(
+        readOperation.requestFork(forkRequestAtIndex(7L)));
+
+    Thread thread = runReadLoopInThread(readOperation);
+    iterator.offerNext(0); // Await first next() and return 0 from it.
+    // Read loop is now blocked in process() (not next()).
+    Reader.ForkResultWithPosition fork = (Reader.ForkResultWithPosition) readOperation.requestFork(
+        forkRequestAtIndex(7L));
+    Assert.assertNotNull(fork);
+    Assert.assertEquals(positionAtIndex(7L), toCloudPosition(fork.getAcceptedPosition()));
+    receiver.unblockProcess();
+    iterator.offerNext(1);
+    receiver.unblockProcess();
+    iterator.offerNext(2);
+
+    // Should accept a fork at an earlier position than previously requested.
+    // Should reject a fork at a later position than previously requested.
+    // Note that here we're testing our own MockReaderIterator class, so it's kind of pointless,
+    // but we're also testing that ReadOperation correctly relays the request to the iterator.
+    fork = (Reader.ForkResultWithPosition) readOperation.requestFork(forkRequestAtIndex(5L));
+    Assert.assertNotNull(fork);
+    Assert.assertEquals(positionAtIndex(5L), toCloudPosition(fork.getAcceptedPosition()));
+    fork = (Reader.ForkResultWithPosition) readOperation.requestFork(forkRequestAtIndex(5L));
+    Assert.assertNull(fork);
+    receiver.unblockProcess();
+
+    iterator.offerNext(3);
+    receiver.unblockProcess();
+    iterator.offerNext(4);
+    receiver.unblockProcess();
+
+    // Should return false from hasNext() and exit read loop now.
+
+    thread.join();
+
+    // Operation is now finished. Check that it refuses a fork request.
+    Assert.assertNull(readOperation.requestFork(forkRequestAtIndex(5L)));
+  }
 
+  private Thread runReadLoopInThread(final ReadOperation readOperation) {
+    Thread thread = new Thread() {
       @Override
-      public Integer next() throws IOException {
-        ++itemsReturned;
+      public void run() {
         try {
-          return queue.take();
-        } catch (InterruptedException e) {
-          throw new NoSuchElementException("interrupted");
+          readOperation.start();
+          readOperation.finish();
+        } catch (Exception e) {
+          e.printStackTrace();
         }
       }
-
-      @Override
-      public Reader.Progress getProgress() {
-        return cloudProgressToReaderProgress(new ApproximateProgress().setPosition(
-            new Position().setRecordIndex((long) itemsReturned)));
-      }
     };
+    thread.start();
+    return thread;
+  }
 
-    Reader<Integer> reader = new Reader<Integer>() {
-      @Override
-      public ReaderIterator<Integer> iterator() throws IOException {
-        return iterator;
+  private static class MockReaderIterator extends Reader.AbstractReaderIterator<Integer> {
+    private int to;
+    private Exchanger<Integer> exchanger = new Exchanger<>();
+    private int current;
+
+    public MockReaderIterator(int from, int to) {
+      this.current = from;
+      this.to = to;
+    }
+
+    @Override
+    public boolean hasNext() throws IOException {
+      return current < to;
+    }
+
+    @Override
+    public Integer next() throws IOException {
+      ++current;
+      try {
+        return exchanger.exchange(current);
+      } catch (InterruptedException e) {
+        throw new NoSuchElementException("interrupted");
       }
-    };
+    }
 
-    CounterSet counterSet = new CounterSet();
-    String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    TestTextReceiver receiver = new TestTextReceiver(counterSet, counterPrefix);
-    final ReadOperation readOperation = new ReadOperation(
-        reader, receiver, counterPrefix, counterSet.getAddCounterMutator(), stateSampler);
-    // Update progress not continuously, but so that it's never more than 1 record stale.
-    readOperation.setProgressUpdatePeriodMs(150);
-    receiver.setReadOperation(readOperation);
+    @Override
+    public Reader.Progress getProgress() {
+      return cloudProgressToReaderProgress(
+          new ApproximateProgress().setPosition(new Position().setRecordIndex((long) current)));
+    }
 
-    new Thread() {
-      @Override
-      public void run() {
-        try {
-          readOperation.start();
-          readOperation.finish();
-        } catch (Exception e) {
-          e.printStackTrace();
-        }
+    @Override
+    public Reader.ForkResult requestFork(Reader.ForkRequest forkRequest) {
+      ApproximateProgress progress = forkRequestToApproximateProgress(forkRequest);
+      int index = progress.getPosition().getRecordIndex().intValue();
+      if (index >= to) {
+        return null;
+      } else {
+        this.to = index;
+        return new Reader.ForkResultWithPosition(
+            cloudPositionToReaderPosition(progress.getPosition()));
       }
-    }.start();
+    }
 
-    for (int i = 0; i < 5; ++i) {
-      Thread.sleep(100); // Wait for the operation to start and block.
-      // Ensure that getProgress() doesn't block.
-      ApproximateProgress progress = sourceProgressToCloudProgress(readOperation.getProgress());
-      long observedIndex = progress.getPosition().getRecordIndex().longValue();
-      Assert.assertTrue("Actual: " + observedIndex, i == observedIndex || i == observedIndex + 1);
-      queue.offer(i);
+    public int offerNext(int next) {
+      try {
+        return exchanger.exchange(next);
+      } catch (InterruptedException e) {
+        throw new RuntimeException(e);
+      }
     }
   }
 
-  private static Position makePosition(long offset) {
-    return new Position().setByteOffset(offset);
-  }
+  private static class MockReader extends Reader<Integer> {
+    private ReaderIterator<Integer> iterator;
+
+    private MockReader(ReaderIterator<Integer> iterator) {
+      this.iterator = iterator;
+    }
 
-  private static ApproximateProgress makeApproximateProgress(long offset) {
-    return makeApproximateProgress(makePosition(offset));
+    @Override
+    public ReaderIterator<Integer> iterator() throws IOException {
+      return iterator;
+    }
   }
 
-  private static ApproximateProgress makeApproximateProgress(
-      com.google.api.services.dataflow.model.Position position) {
-    return new ApproximateProgress().setPosition(position);
+  private static class MockOutputReceiver extends OutputReceiver {
+    private Exchanger<Object> exchanger = new Exchanger<>();
+
+    MockOutputReceiver(CounterSet.AddCounterMutator mutator) {
+      super("out", "test-", mutator);
+    }
+
+    @Override
+    public void process(Object elem) throws Exception {
+      exchanger.exchange(null);
+    }
+
+    public void unblockProcess() {
+      try {
+        exchanger.exchange(null);
+      } catch (InterruptedException e) {
+        throw new RuntimeException(e);
+      }
+    }
   }
 }

From c06125d29229309b8e2d2070d7fa140c9a2e14f5 Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Mon, 9 Feb 2015 07:31:11 -0800
Subject: [PATCH 0141/1541] Fix NullPointerException thrown by getOnly when a
 value is not present for a given tag.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85888387
---
 .../sdk/transforms/join/CoGbkResult.java      |  3 +-
 .../sdk/transforms/join/CoGroupByKeyTest.java | 94 +++++++++++++++++++
 2 files changed, 96 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
index cbacb5fec1385..56fc3fc84bc0a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
@@ -225,6 +225,7 @@ public CoGbkResult decode(
       return new CoGbkResult(schema, map);
     }
 
+    @Override
     public boolean equals(Object other) {
       if (!super.equals(other)) {
         return false;
@@ -317,7 +318,7 @@ private <V> V innerGetOnly(
           + " is not in the schema");
     }
     List<RawUnionValue> unions = valueMap.get(index);
-    if (unions.isEmpty()) {
+    if (unions == null || unions.isEmpty()) {
       if (useDefault) {
         return defaultValue;
       } else {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
index 6369d06aee522..13a74b419fda1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
@@ -17,8 +17,10 @@
 package com.google.cloud.dataflow.sdk.transforms.join;
 
 import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
+import static org.hamcrest.core.IsEqual.equalTo;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
@@ -75,6 +77,98 @@ public void processElement(ProcessContext c) {
             }));
   }
 
+  /**
+   * Returns a PCollection<KV<Integer, CoGbkResult>> containing the result
+   * of a CoGbk over 2 PCollection<KV<Integer, String>>, where each PCollection
+   * has no duplicate keys and the key sets of each PCollection are
+   * intersecting but neither is a subset of the other.
+   */
+  private PCollection<KV<Integer, CoGbkResult>> buildGetOnlyGbk(
+      Pipeline p,
+      TupleTag<String> tag1,
+      TupleTag<String> tag2) {
+    List<KV<Integer, String>> list1 =
+        Arrays.asList(
+            KV.of(1, "collection1-1"),
+            KV.of(2, "collection1-2"));
+    List<KV<Integer, String>> list2 =
+        Arrays.asList(
+            KV.of(2, "collection2-2"),
+            KV.of(3, "collection2-3"));
+    PCollection<KV<Integer, String>> collection1 = createInput(p, list1);
+    PCollection<KV<Integer, String>> collection2 = createInput(p, list2);
+    PCollection<KV<Integer, CoGbkResult>> coGbkResults =
+        KeyedPCollectionTuple.of(tag1, collection1)
+            .and(tag2, collection2)
+            .apply(CoGroupByKey.<Integer>create());
+    return coGbkResults;
+  }
+
+  @Test
+  public void testCoGroupByKeyGetOnly() {
+    TupleTag<String> tag1 = new TupleTag<>();
+    TupleTag<String> tag2 = new TupleTag<>();
+
+    DirectPipeline p = DirectPipeline.createForTest();
+
+    PCollection<KV<Integer, CoGbkResult>> coGbkResults =
+        buildGetOnlyGbk(p, tag1, tag2);
+
+    EvaluationResults results = p.run();
+
+    List<KV<Integer, CoGbkResult>> finalResult =
+        results.getPCollection(coGbkResults);
+
+    HashMap<Integer, Matcher<String>> collection1Matchers =
+        new HashMap<Integer, Matcher<String>>() {
+      {
+        put(1, equalTo("collection1-1"));
+        put(2, equalTo("collection1-2"));
+      }
+    };
+
+    HashMap<Integer, Matcher<String>> collection2Matchers =
+        new HashMap<Integer, Matcher<String>>() {
+      {
+        put(2, equalTo("collection2-2"));
+        put(3, equalTo("collection2-3"));
+      }
+    };
+
+    for (KV<Integer, CoGbkResult> result : finalResult) {
+      int key = result.getKey();
+      CoGbkResult row = result.getValue();
+      checkGetOnlyForKey(key, collection1Matchers, row, tag1, "default");
+      checkGetOnlyForKey(key, collection2Matchers, row, tag2, "default");
+    }
+  }
+
+  /**
+   * Check that a singleton value for a key in a CoGbkResult matches the
+   * expected value in a map.  If no value exists for the key, check that
+   * a default value is given (if supplied) and that an
+   * {@link IllegalArgumentException} is thrown if no default is supplied.
+   */
+  private <K, V> void checkGetOnlyForKey(
+      K key,
+      HashMap<K, Matcher<V>> matchers,
+      CoGbkResult row,
+      TupleTag<V> tag,
+      V defaultValue) {
+    if (matchers.containsKey(key)) {
+      assertThat(row.getOnly(tag), matchers.get(key));
+    } else {
+      assertThat(row.getOnly(tag, defaultValue), equalTo(defaultValue));
+      try {
+        row.getOnly(tag);
+        fail();
+      } catch (IllegalArgumentException e) {
+        // if no value exists, an IllegalArgumentException should be thrown
+      }
+
+    }
+  }
+
   /**
    * Returns a PCollection<KV<Integer, CoGbkResult>> containing the
    * results of the CoGbk over 3 PCollection<KV<Integer, String>>, each of

From 914fc7c984e88e94918c4331fd9fb87e2fbfcb4b Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Mon, 9 Feb 2015 10:44:43 -0800
Subject: [PATCH 0142/1541] Don't log the whole classpath at INFO, but log how
 many files will be staged and how to see what they are. [] -------------
 Created by MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=85904037

---
 .../cloud/dataflow/sdk/runners/DataflowPipelineRunner.java   | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 86781cd60eff3..fe6079faad55f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -110,7 +110,10 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
       dataflowOptions.setFilesToStage(detectClassPathResourcesToStage(
           DataflowPipelineRunner.class.getClassLoader()));
       LOG.info("PipelineOptions.filesToStage was not specified. "
-          + "Defaulting to files from the classpath: {}", dataflowOptions.getFilesToStage());
+          + "Defaulting to files from the classpath: will stage {} files. "
+          + "Enable logging at DEBUG level to see which files will be staged.",
+          dataflowOptions.getFilesToStage().size());
+      LOG.debug("Classpath elements: {}", dataflowOptions.getFilesToStage());
     }
 
     // Verify jobName according to service requirements.

From df106aafebe4dfeacfb05c654eb07c2d43a6f55b Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 9 Feb 2015 11:00:29 -0800
Subject: [PATCH 0143/1541] Update AvroCoder to be serializable, and verify
 that in the tests.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85905701
---
 .../cloud/dataflow/sdk/coders/AvroCoder.java  | 29 +++++++++++++++++++
 .../dataflow/sdk/coders/AvroCoderTest.java    | 19 ++++++++++++
 2 files changed, 48 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
index db0000cd25b60..75216120ac0fb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -40,6 +40,7 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.io.Serializable;
 import java.util.List;
 
 /**
@@ -82,6 +83,7 @@
  */
 @SuppressWarnings("serial")
 public class AvroCoder<T> extends StandardCoder<T> {
+
   /**
    * Returns an {@code AvroCoder} instance for the provided element type.
    * @param <T> the element type
@@ -134,6 +136,12 @@ protected AvroCoder(Class<T> type, Schema schema) {
     this.writer = createDatumWriter();
   }
 
+  private Object writeReplace() {
+    // When serialized by Java, instances of AvroCoder should be replaced by
+    // a SerializedAvroCoderProxy.
+    return new SerializedAvroCoderProxy<>(type, schema.toString());
+  }
+
   @Override
   public void encode(T value, OutputStream outStream, Context context)
       throws IOException {
@@ -199,4 +207,25 @@ public DatumWriter<T> createDatumWriter() {
   public Schema getSchema() {
     return schema;
   }
+
+  /**
+   * Proxy to use in place of serializing the AvroCoder. This allows the fields
+   * to remain final.
+   */
+  private static class SerializedAvroCoderProxy<T> implements Serializable {
+    private final Class<T> type;
+    private final String schemaStr;
+
+    public SerializedAvroCoderProxy(Class<T> type, String schemaStr) {
+      this.type = type;
+      this.schemaStr = schemaStr;
+    }
+
+    private Object readResolve() {
+      // When deserialized, instances of this object should be replaced by
+      // constructing an AvroCoder.
+      Schema.Parser parser = new Schema.Parser();
+      return new AvroCoder<T>(type, parser.parse(schemaStr));
+    }
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
index e3da71bcdff08..234fb046b2737 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
@@ -24,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
 import org.apache.avro.Schema;
@@ -37,6 +38,7 @@
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
+import java.io.Serializable;
 
 /**
  * Tests for AvroCoder.
@@ -180,4 +182,21 @@ public void testDefaultCoder() throws Exception {
         .containsInAnyOrder("hello", "world");
     p.run();
   }
+
+  @Test
+  public void testAvroCoderJavaSerializable() throws Exception {
+    AvroCoder<Pojo> coder = AvroCoder.of(Pojo.class);
+
+    // Cast the coder to serializable to test that it is serializable using
+    // Java serialization.
+    SerializableUtils.ensureSerializable((Serializable) coder);
+  }
+
+  @Test
+  public void testAvroCoderJsonSerializable() throws Exception {
+    AvroCoder<Pojo> coder = AvroCoder.of(Pojo.class);
+
+    // Check that the coder is serializable using the regular JSON approach.
+    SerializableUtils.ensureSerializable(coder);
+  }
 }

From 02d4fb4357c9af517edbe0d3a66425dab67cff45 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Mon, 9 Feb 2015 11:27:47 -0800
Subject: [PATCH 0144/1541] Testing: Run CoGroupByKeyTest on service, and add a
 windowing test case.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85908565
---
 .../sdk/transforms/join/CoGroupByKeyTest.java | 151 +++++++++++++++++-
 1 file changed, 145 insertions(+), 6 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
index 13a74b419fda1..69524fb623755 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
@@ -22,6 +22,7 @@
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.fail;
 
+import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
@@ -34,12 +35,18 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.collect.Iterables;
 
 import org.hamcrest.Matcher;
+import org.joda.time.Duration;
 import org.junit.Test;
+import org.junit.experimental.categories.Category;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -62,8 +69,21 @@ public class CoGroupByKeyTest implements Serializable {
    */
   private PCollection<KV<Integer, String>> createInput(
       Pipeline p, List<KV<Integer, String>> list) {
-    return p
-            .apply(Create.of(list))
+    return createInput(p, list,  new ArrayList<Long>());
+  }
+
+  /**
+   * Converts the given list with timestamps into a PCollection.
+   */
+  private PCollection<KV<Integer, String>> createInput(
+      Pipeline p, List<KV<Integer, String>> list, List<Long> timestamps) {
+    PCollection<KV<Integer, String>> input;
+    if (timestamps.isEmpty()) {
+      input = p.apply(Create.of(list));
+    } else {
+      input = p.apply(Create.timestamped(list, timestamps));
+    }
+    return input
             // Create doesn't infer coders for parameterized types.
             .setCoder(
                 KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()))
@@ -227,6 +247,59 @@ private PCollection<KV<Integer, CoGbkResult>> buildPurchasesCoGbk(
     return coGbkResults;
   }
 
+  /**
+   * Returns a PCollection<KV<Integer, CoGbkResult>> containing the
+   * results of the CoGbk over 2 PCollection<KV<Integer, String>>, each of
+   * which correlates a customer id to clicks, purchases, respectively.
+   */
+  private PCollection<KV<Integer, CoGbkResult>> buildPurchasesCoGbkWithWindowing(
+      Pipeline p,
+      TupleTag<String> clicksTag,
+      TupleTag<String> purchasesTag) {
+    List<KV<Integer, String>> idToClick =
+        Arrays.asList(
+            KV.of(1, "Click t0"),
+            KV.of(2, "Click t2"),
+            KV.of(1, "Click t4"),
+            KV.of(1, "Click t6"),
+            KV.of(2, "Click t8"));
+
+    List<KV<Integer, String>> idToPurchases =
+        Arrays.asList(
+            KV.of(1, "Boat t1"),
+            KV.of(1, "Shoesi t2"),
+            KV.of(1, "Pens t3"),
+            KV.of(2, "House t4"),
+            KV.of(2, "Suit t5"),
+            KV.of(1, "Car t6"),
+            KV.of(1, "Book t7"),
+            KV.of(2, "House t8"),
+            KV.of(2, "Shoes t9"),
+            KV.of(2, "House t10"));
+
+    PCollection<KV<Integer, String>> clicksTable =
+        createInput(
+            p,
+            idToClick,
+            Arrays.asList(0L, 2L, 4L, 6L, 8L))
+        .apply(Window.<KV<Integer, String>>into(
+            FixedWindows.<KV<Integer, String>>of(new Duration(4))));
+
+    PCollection<KV<Integer, String>> purchasesTable =
+        createInput(
+            p,
+            idToPurchases,
+            Arrays.asList(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L))
+        .apply(Window.<KV<Integer, String>>into(
+            FixedWindows.<KV<Integer, String>>of(new Duration(4))));
+
+    PCollection<KV<Integer, CoGbkResult>> coGbkResults =
+        KeyedPCollectionTuple.of(clicksTag, clicksTable)
+            .and(purchasesTag, purchasesTable)
+            .apply(CoGroupByKey.<Integer>create());
+    return coGbkResults;
+  }
+
   @Test
   public void testCoGroupByKey() {
     TupleTag<String> namesTag = new TupleTag<>();
@@ -307,6 +380,41 @@ private <K, V> void checkValuesMatch(
     }
   }
 
+  /**
+   * A DoFn used in testCoGroupByKeyWithWindowing(), to test processing the
+   * results of a CoGroupByKey.
+   */
+  private static class ClickOfPurchaseFn extends
+      DoFn<KV<Integer, CoGbkResult>, KV<String, String>> {
+    private final TupleTag<String> clicksTag;
+
+    private final TupleTag<String> purchasesTag;
+
+    private ClickOfPurchaseFn(
+        TupleTag<String> clicksTag,
+        TupleTag<String> purchasesTag) {
+      this.clicksTag = clicksTag;
+      this.purchasesTag = purchasesTag;
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      Preconditions.checkState(c.windows().size() == 1);
+      BoundedWindow w = c.windows().iterator().next();
+      KV<Integer, CoGbkResult> e = c.element();
+      CoGbkResult row = e.getValue();
+      Iterable<String> clicks = row.getAll(clicksTag);
+      Iterable<String> purchases = row.getAll(purchasesTag);
+      for (String click : clicks) {
+        for (String purchase : purchases) {
+          c.output(KV.of(click + ":" + purchase,
+                         c.timestamp().getMillis() + ":" + w.maxTimestamp().getMillis()));
+        }
+      }
+    }
+  }
+
+
   /**
    * A DoFn used in testCoGroupByKeyHandleResults(), to test processing the
    * results of a CoGroupByKey.
@@ -353,10 +461,7 @@ public void processElement(ProcessContext c) {
 
       Iterable<String> purchases = row.getAll(purchasesTag);
 
-      int purchaseCount = 0;
-      for (String purchase : purchases) {
-        purchaseCount++;
-      }
+      int purchaseCount = Iterables.size(purchases);
 
       for (String address : addressList) {
         c.output(KV.of(address, purchaseCount));
@@ -416,6 +521,7 @@ public void testConsumingDoFn() {
    */
   @SuppressWarnings("unchecked")
   @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testCoGroupByKeyHandleResults() {
     TupleTag<String> namesTag = new TupleTag<>();
     TupleTag<String> addressesTag = new TupleTag<>();
@@ -440,4 +546,37 @@ public void testCoGroupByKeyHandleResults() {
             KV.of("383 Jackson Street", 1));
     p.run();
   }
+
+  /**
+   * Tests the pipeline end-to-end with FixedWindows.
+   */
+  @SuppressWarnings("unchecked")
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testCoGroupByKeyWithWindowing() {
+    TupleTag<String> clicksTag = new TupleTag<>();
+    TupleTag<String> purchasesTag = new TupleTag<>();
+
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<Integer, CoGbkResult>> coGbkResults =
+        buildPurchasesCoGbkWithWindowing(p, clicksTag, purchasesTag);
+
+    PCollection<KV<String, String>>
+        clickOfPurchase = coGbkResults.apply(ParDo.of(
+            new ClickOfPurchaseFn(clicksTag, purchasesTag)));
+    DataflowAssert.that(clickOfPurchase)
+        .containsInAnyOrder(
+            KV.of("Click t0:Boat t1", "3:3"),
+            KV.of("Click t0:Shoesi t2", "3:3"),
+            KV.of("Click t0:Pens t3", "3:3"),
+            KV.of("Click t4:Car t6", "7:7"),
+            KV.of("Click t4:Book t7", "7:7"),
+            KV.of("Click t6:Car t6", "7:7"),
+            KV.of("Click t6:Book t7", "7:7"),
+            KV.of("Click t8:House t8", "11:11"),
+            KV.of("Click t8:Shoes t9", "11:11"),
+            KV.of("Click t8:House t10", "11:11"));
+    p.run();
+  }
 }

From e76eff5e0cd3e00321b73a71aace8879e7c30143 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Mon, 9 Feb 2015 11:35:52 -0800
Subject: [PATCH 0145/1541] Dataflow SDK: Change email address to send Travis
 build status notifications.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85909441
---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 4815f28e12944..dc499659491a4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,7 +3,7 @@ language: java
 notifications:
   email:
     recipients:
-      - dataflow-sdk+travis@google.com
+      - dataflow-sdk-build-notifications+travis@google.com
     on_success: change
     on_failure: always
 

From a392b2c79a642f04a2c2e0a1eb4bc0c72810dc73 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 9 Feb 2015 12:29:54 -0800
Subject: [PATCH 0146/1541] Uses slf4j logging in the streaming worker to
 enable Cloud Logging. Removes the use of MDC because it doesn't actually
 provide thread-local formatting parameters
 (http://bugzilla.slf4j.org/show_bug.cgi?id=325,
 https://github.com/qos-ch/slf4j/pull/99).

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85914693
---
 .../runners/worker/DataflowWorkerHarness.java |  7 +-
 .../worker/StreamingDataflowWorker.java       | 86 ++++++++++---------
 .../DataflowWorkerLoggingFormatter.java       | 57 ++++++++++--
 .../worker/DataflowWorkerHarnessTest.java     | 12 +--
 .../worker/StreamingDataflowWorkerTest.java   |  7 +-
 .../DataflowWorkerLoggingFormatterTest.java   | 38 ++++----
 ...a => RestoreDataflowLoggingFormatter.java} | 26 +++---
 ... RestoreDataflowLoggingFormatterTest.java} | 25 +++---
 8 files changed, 154 insertions(+), 104 deletions(-)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/{RestoreMappedDiagnosticContext.java => RestoreDataflowLoggingFormatter.java} (51%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/{RestoreMappedDiagnosticContextTest.java => RestoreDataflowLoggingFormatterTest.java} (59%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index d069b63d67a6a..4b0879e22c99d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -44,7 +44,6 @@
 import org.joda.time.format.ISODateTimeFormat;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import org.slf4j.MDC;
 
 import java.io.IOException;
 import java.lang.Thread.UncaughtExceptionHandler;
@@ -142,8 +141,8 @@ public Boolean call() throws Exception {
   }
 
   static DataflowWorker create(DataflowWorkerHarnessOptions options) {
-    MDC.put(DataflowWorkerLoggingFormatter.MDC_DATAFLOW_JOB_ID, options.getJobId());
-    MDC.put(DataflowWorkerLoggingFormatter.MDC_DATAFLOW_WORKER_ID, options.getWorkerId());
+    DataflowWorkerLoggingFormatter.setJobId(options.getJobId());
+    DataflowWorkerLoggingFormatter.setWorkerId(options.getWorkerId());
     options.setAppName(APPLICATION_NAME);
 
     // Configure standard IO factories.
@@ -223,7 +222,7 @@ public WorkItem getWorkItem() throws IOException {
         return null;
       }
 
-      MDC.put(DataflowWorkerLoggingFormatter.MDC_DATAFLOW_WORK_ID, Long.toString(work.getId()));
+      DataflowWorkerLoggingFormatter.setWorkId(Long.toString(work.getId()));
       // Looks like the work's a'ight.
       return work;
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 9234d31c7e7fa..48d7ebd30623b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -18,8 +18,10 @@
 
 import com.google.api.services.dataflow.model.MapTask;
 import com.google.api.services.dataflow.model.MetricUpdate;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingFormatter;
+import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingInitializer;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
 import com.google.cloud.dataflow.sdk.util.BoundedQueueExecutor;
@@ -37,6 +39,9 @@
 import org.eclipse.jetty.server.Server;
 import org.eclipse.jetty.server.handler.AbstractHandler;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import java.io.IOException;
 import java.io.PrintWriter;
 import java.io.StringWriter;
@@ -51,8 +56,6 @@
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicReference;
-import java.util.logging.Level;
-import java.util.logging.Logger;
 import javax.servlet.ServletException;
 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;
@@ -61,7 +64,7 @@
  * Implements a Streaming Dataflow worker.
  */
 public class StreamingDataflowWorker {
-  private static final Logger LOG = Logger.getLogger(StreamingDataflowWorker.class.getName());
+  private static final Logger LOG = LoggerFactory.getLogger(StreamingDataflowWorker.class);
   static final int MAX_THREAD_POOL_SIZE = 100;
   static final long THREAD_EXPIRATION_TIME_SEC = 60;
   static final int MAX_THREAD_POOL_QUEUE_SIZE = 100;
@@ -87,7 +90,8 @@ static MapTask parseMapTask(String input) throws IOException {
   }
 
   public static void main(String[] args) throws Exception {
-    LOG.setLevel(Level.INFO);
+    new DataflowWorkerLoggingInitializer().initialize();
+
     String hostport = System.getProperty("windmill.hostport");
     if (hostport == null) {
       throw new Exception("-Dwindmill.hostport must be set to the location of the windmill server");
@@ -124,7 +128,7 @@ public static void main(String[] args) throws Exception {
   private Thread commitThread;
   private AtomicBoolean running;
   private StateFetcher stateFetcher;
-  private DataflowPipelineOptions options;
+  private DataflowWorkerHarnessOptions options;
   private long clientId;
   private Server statusServer;
   private AtomicReference<Throwable> lastException;
@@ -132,11 +136,12 @@ public static void main(String[] args) throws Exception {
   /** Regular constructor. */
   public StreamingDataflowWorker(
       List<MapTask> mapTasks, WindmillServerStub server) {
-    initialize(mapTasks, server);
-    options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options = PipelineOptionsFactory.as(DataflowWorkerHarnessOptions.class);
     options.setAppName("StreamingWorkerHarness");
     options.setStreaming(true);
 
+    initialize(mapTasks, server);
+
     if (System.getProperties().containsKey("path_validator_class")) {
       try {
         options.setPathValidatorClass((Class) Class.forName(
@@ -157,9 +162,9 @@ public StreamingDataflowWorker(
 
   /** The constructor that takes PipelineOptions.  Should be used only by unit tests. */
   StreamingDataflowWorker(
-      List<MapTask> mapTasks, WindmillServerStub server, DataflowPipelineOptions options) {
-    initialize(mapTasks, server);
+      List<MapTask> mapTasks, WindmillServerStub server, DataflowWorkerHarnessOptions options) {
     this.options = options;
+    initialize(mapTasks, server);
   }
 
   public void start() {
@@ -202,8 +207,7 @@ public void stop() {
       }
       commitThread.join();
     } catch (Exception e) {
-      LOG.warning("Exception while shutting down: " + e);
-      e.printStackTrace();
+      LOG.warn("Exception while shutting down: ", e);
     }
   }
 
@@ -219,8 +223,7 @@ private void initialize(List<MapTask> mapTasks, WindmillServerStub server) {
         private final Thread.UncaughtExceptionHandler handler =
             new Thread.UncaughtExceptionHandler() {
               public void uncaughtException(Thread thread, Throwable e) {
-                LOG.severe("Uncaught exception: " + e);
-                e.printStackTrace();
+                LOG.error("Uncaught exception: ", e);
                 System.exit(1);
               }
             };
@@ -239,6 +242,9 @@ public Thread newThread(Runnable r) {
     this.stateFetcher = new StateFetcher(server);
     this.clientId = new Random().nextLong();
     this.lastException = new AtomicReference<>();
+
+    DataflowWorkerLoggingFormatter.setJobId(options.getJobId());
+    DataflowWorkerLoggingFormatter.setWorkerId(options.getWorkerId());
   }
 
   public void runStatusServer(int statusPort) {
@@ -246,17 +252,17 @@ public void runStatusServer(int statusPort) {
     statusServer.setHandler(new StatusHandler());
     try {
       statusServer.start();
-      LOG.info("Status server started on port " + statusPort);
+      LOG.info("Status server started on port {}", statusPort);
       statusServer.join();
     } catch (Exception e) {
-      LOG.warning("Status server failed to start: " + e);
+      LOG.warn("Status server failed to start: ", e);
     }
   }
 
   private void addComputation(MapTask mapTask) {
     String computation = mapTask.getSystemName();
     if (!instructionMap.containsKey(computation)) {
-      LOG.info("Adding config for " + computation + ": " + mapTask);
+      LOG.info("Adding config for {}: {}", computation, mapTask);
       outputMap.put(computation, new ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest>());
       instructionMap.put(computation, mapTask);
       mapTaskExecutors.put(
@@ -284,8 +290,8 @@ private void dispatchLoop() {
       // Also force a GC to try to get under the memory threshold if possible.
       while (rt.freeMemory() < rt.totalMemory() * PUSHBACK_THRESHOLD) {
         if (lastPushbackLog < (lastPushbackLog = System.currentTimeMillis()) - 60 * 1000) {
-          LOG.warning("In pushback, not accepting new work. Free Memory: "
-              + rt.freeMemory() + "MB / " + rt.totalMemory() + "MB");
+          LOG.warn("In pushback, not accepting new work. Free Memory: {}MB / {}MB",
+              rt.freeMemory(), rt.totalMemory());
           System.gc();
         }
         sleep(10);
@@ -320,12 +326,12 @@ public void run() {
 
   private void process(
       final String computation, final Windmill.WorkItem work) {
-    LOG.log(Level.FINE, "Starting processing for " + computation + ":\n{0}", work);
+    LOG.debug("Starting processing for {}:\n{}", computation, work);
 
     MapTask mapTask = instructionMap.get(computation);
     if (mapTask == null) {
-      LOG.info("Received work for unknown computation: " + computation
-          + ". Known computations are " + instructionMap.keySet());
+      LOG.info("Received work for unknown computation: {}. Known computations are {}",
+          computation, instructionMap.keySet());
       return;
     }
 
@@ -338,6 +344,8 @@ private void process(
     MapTaskExecutor worker = null;
 
     try {
+      DataflowWorkerLoggingFormatter.setWorkId(
+          work.getKey().toStringUtf8() + "-" + Long.toString(work.getWorkToken()));
       WorkerAndContext workerAndContext = mapTaskExecutors.get(computation).poll();
       if (workerAndContext == null) {
         context = new StreamingModeExecutionContext(computation, stateFetcher);
@@ -359,28 +367,31 @@ private void process(
       mapTaskExecutors.get(computation).offer(new WorkerAndContext(worker, context));
       worker = null;
       context = null;
+
+      Windmill.WorkItemCommitRequest output = outputBuilder.build();
+      outputMap.get(computation).add(output);
+      LOG.debug("Processing done for work token: {}", work.getWorkToken());
     } catch (Throwable t) {
       if (worker != null) {
         try {
           worker.close();
         } catch (Exception e) {
-          LOG.warning("Failed to close worker: " + e.getMessage());
-          e.printStackTrace();
+          LOG.warn("Failed to close worker: ", e);
         }
       }
 
       t = t instanceof UserCodeException ? t.getCause() : t;
 
       if (t instanceof KeyTokenInvalidException) {
-        LOG.fine("Execution of work for " + computation + " for key " + work.getKey().toStringUtf8()
+        LOG.debug("Execution of work for " + computation
+            + " for key " + work.getKey().toStringUtf8()
             + " failed due to token expiration, will not retry locally.");
       } else {
-        LOG.warning("Execution of work for " + computation + " for key "
-            + work.getKey().toStringUtf8() + " failed, retrying."
-            + "\nError: " + t.getMessage());
-        t.printStackTrace();
+        LOG.error("Execution of work for {} for key {} failed, retrying.",
+            computation, work.getKey().toStringUtf8());
+        LOG.error("\nError: ", t);
         lastException.set(t);
-        LOG.fine("Failed work: " + work);
+        LOG.debug("Failed work: {}", work);
         reportFailure(computation, work, t);
         // Try again, but go to the end of the queue to avoid a tight loop.
         sleep(60000);
@@ -390,12 +401,9 @@ public void run() {
             }
           });
       }
-      return;
+    } finally {
+      DataflowWorkerLoggingFormatter.setWorkId(null);
     }
-
-    Windmill.WorkItemCommitRequest output = outputBuilder.build();
-    outputMap.get(computation).add(output);
-    LOG.fine("Processing done for work token: " + work.getWorkToken());
   }
 
   private void commitLoop() {
@@ -423,7 +431,7 @@ private void commitLoop() {
       }
       if (commitRequestBuilder.getRequestsCount() > 0) {
         Windmill.CommitWorkRequest commitRequest = commitRequestBuilder.build();
-        LOG.log(Level.FINE, "Commit: {0}", commitRequest);
+        LOG.debug("Commit: {}", commitRequest);
         commitWork(commitRequest);
       }
       if (remainingCommitBytes > 0) {
@@ -451,8 +459,8 @@ private void getConfig(String computation) {
       try {
         addComputation(parseMapTask(serializedMapTask));
       } catch (IOException e) {
-        LOG.warning("Parsing MapTask failed: " + serializedMapTask);
-        e.printStackTrace();
+        LOG.warn("Parsing MapTask failed: {}", serializedMapTask);
+        LOG.warn("Error: ", e);
       }
     }
   }
@@ -472,7 +480,7 @@ private void buildCounters(CounterSet counterSet,
       } else if (cloudKind.equals(Counter.AggregationKind.MIN.name())) {
         kind = Windmill.Counter.Kind.MIN;
       } else {
-        LOG.log(Level.FINE, "Unhandled counter type: " + metricUpdate.getKind());
+        LOG.debug("Unhandled counter type: {}", metricUpdate.getKind());
         return;
       }
       Windmill.Counter.Builder counterBuilder = builder.addCounterUpdatesBuilder();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
index 911b3fe2e5e68..b6bb0ce960173 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
@@ -22,7 +22,6 @@
 
 import org.joda.time.format.DateTimeFormatter;
 import org.joda.time.format.ISODateTimeFormat;
-import org.slf4j.MDC;
 
 import java.io.PrintWriter;
 import java.io.StringWriter;
@@ -39,9 +38,53 @@
 public class DataflowWorkerLoggingFormatter extends Formatter {
   private static final DateTimeFormatter DATE_FORMATTER =
       ISODateTimeFormat.dateTime().withZoneUTC();
-  public static final String MDC_DATAFLOW_JOB_ID = "dataflow.jobId";
-  public static final String MDC_DATAFLOW_WORKER_ID = "dataflow.workerId";
-  public static final String MDC_DATAFLOW_WORK_ID = "dataflow.workId";
+
+  private static final InheritableThreadLocal<String> jobId = new InheritableThreadLocal<>();
+  private static final InheritableThreadLocal<String> workerId = new InheritableThreadLocal<>();
+  private static final InheritableThreadLocal<String> workId = new InheritableThreadLocal<>();
+
+  /**
+   * Sets the Job ID of the current thread, which will be inherited by child threads.
+   */
+  public static void setJobId(String newJobId) {
+    jobId.set(newJobId);
+  }
+
+  /**
+   * Sets the Worker ID of the current thread, which will be inherited by child threads.
+   */
+  public static void setWorkerId(String newWorkerId) {
+    workerId.set(newWorkerId);
+  }
+
+  /**
+   * Sets the Work ID of the current thread, which will be inherited by child threads.
+   */
+  public static void setWorkId(String newWorkId) {
+    workId.set(newWorkId);
+  }
+
+  /**
+   * Gets the Job ID of the current thread.
+   */
+  public static String getJobId() {
+    return jobId.get();
+  }
+
+  /**
+   * Gets the Worker ID of the current thread.
+   */
+  public static String getWorkerId() {
+    return workerId.get();
+  }
+
+  /**
+   * Gets the Work ID of the current thread.
+   */
+  public static String getWorkId() {
+    return workId.get();
+  }
+
 
   @Override
   public String format(LogRecord record) {
@@ -49,9 +92,9 @@ public String format(LogRecord record) {
     return DATE_FORMATTER.print(record.getMillis())
         + " " + MoreObjects.firstNonNull(LEVELS.get(record.getLevel()),
                                          record.getLevel().getName())
-        + " " + MoreObjects.firstNonNull(MDC.get(MDC_DATAFLOW_JOB_ID), "unknown")
-        + " " + MoreObjects.firstNonNull(MDC.get(MDC_DATAFLOW_WORKER_ID), "unknown")
-        + " " + MoreObjects.firstNonNull(MDC.get(MDC_DATAFLOW_WORK_ID), "unknown")
+        + " " + MoreObjects.firstNonNull(jobId.get(), "unknown")
+        + " " + MoreObjects.firstNonNull(workerId.get(), "unknown")
+        + " " + MoreObjects.firstNonNull(workId.get(), "unknown")
         + " " + record.getThreadID()
         + " " + record.getLoggerName()
         + " " + record.getMessage() + System.lineSeparator()
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
index e167aab7cf1ba..6dc9424e9848c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
@@ -35,7 +35,8 @@
 import com.google.api.services.dataflow.model.WorkItem;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.testing.RestoreMappedDiagnosticContext;
+import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingFormatter;
+import com.google.cloud.dataflow.sdk.testing.RestoreDataflowLoggingFormatter;
 import com.google.cloud.dataflow.sdk.testing.RestoreSystemProperties;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
 import com.google.cloud.dataflow.sdk.util.Transport;
@@ -52,7 +53,6 @@
 import org.junit.runners.JUnit4;
 import org.mockito.Mock;
 import org.mockito.MockitoAnnotations;
-import org.slf4j.MDC;
 
 import java.io.IOException;
 
@@ -60,7 +60,7 @@
 @RunWith(JUnit4.class)
 public class DataflowWorkerHarnessTest {
   @Rule public TestRule restoreSystemProperties = new RestoreSystemProperties();
-  @Rule public TestRule restoreMDC = new RestoreMappedDiagnosticContext();
+  @Rule public TestRule restoreLogging = new RestoreDataflowLoggingFormatter();
   @Rule public ExpectedException expectedException = ExpectedException.none();
   @Mock private MockHttpTransport transport;
   @Mock private MockLowLevelHttpRequest request;
@@ -108,8 +108,8 @@ public void testCreationOfWorkerHarness() throws Exception {
     DataflowWorkerHarnessOptions options = PipelineOptionsFactory.createFromSystemProperties();
     options.setGcpCredential(new TestCredential());
     assertNotNull(DataflowWorkerHarness.create(options));
-    assertEquals("jobId", MDC.get("dataflow.jobId"));
-    assertEquals("workerId", MDC.get("dataflow.workerId"));
+    assertEquals("jobId", DataflowWorkerLoggingFormatter.getJobId());
+    assertEquals("workerId", DataflowWorkerLoggingFormatter.getWorkerId());
   }
 
   @Test
@@ -138,7 +138,7 @@ public void testCloudServiceCall() throws Exception {
         actualRequest.getWorkerCapabilities());
     assertEquals(ImmutableList.<String>of("map_task", "seq_map_task", "remote_source_task"),
         actualRequest.getWorkItemTypes());
-    assertEquals("1234", MDC.get("dataflow.workId"));
+    assertEquals("1234", DataflowWorkerLoggingFormatter.getWorkId());
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 9c5c5cbd24f0c..536ba0b7fa133 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -33,7 +33,7 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.ListCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
@@ -317,8 +317,9 @@ private Windmill.WorkItemCommitRequest makeExpectedOutput(int index, long timest
                                      Arrays.asList(DEFAULT_WINDOW)));
   }
 
-  private DataflowPipelineOptions createTestingPipelineOptions() {
-    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+  private DataflowWorkerHarnessOptions createTestingPipelineOptions() {
+    DataflowWorkerHarnessOptions options =
+        PipelineOptionsFactory.as(DataflowWorkerHarnessOptions.class);
     options.setAppName("StreamingWorkerHarnessTest");
     options.setStreaming(true);
     return options;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java
index 4fd2b5f4688d4..2bf711c84bbca 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java
@@ -18,15 +18,13 @@
 
 import static org.junit.Assert.assertEquals;
 
-import com.google.cloud.dataflow.sdk.testing.RestoreMappedDiagnosticContext;
-import com.google.common.collect.ImmutableMap;
+import com.google.cloud.dataflow.sdk.testing.RestoreDataflowLoggingFormatter;
 
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TestRule;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
-import org.slf4j.MDC;
 
 import java.util.logging.Level;
 import java.util.logging.LogRecord;
@@ -34,7 +32,7 @@
 /** Unit tests for {@link DataflowWorkerLoggingFormatter}. */
 @RunWith(JUnit4.class)
 public class DataflowWorkerLoggingFormatterTest {
-  @Rule public TestRule restoreMDC = new RestoreMappedDiagnosticContext();
+  @Rule public TestRule restoreMDC = new RestoreDataflowLoggingFormatter();
 
   @Test
   public void testWithUnsetValuesInMDC() {
@@ -47,10 +45,10 @@ public void testWithUnsetValuesInMDC() {
 
   @Test
   public void testWithMessage() {
-    MDC.setContextMap(ImmutableMap.<String, String>of(
-        "dataflow.jobId", "testJobId",
-        "dataflow.workerId", "testWorkerId",
-        "dataflow.workId", "testWorkId"));
+    DataflowWorkerLoggingFormatter.setJobId("testJobId");
+    DataflowWorkerLoggingFormatter.setWorkerId("testWorkerId");
+    DataflowWorkerLoggingFormatter.setWorkId("testWorkId");
+
     assertEquals(
         "1970-01-01T00:00:00.001Z INFO testJobId testWorkerId testWorkId 2 LoggerName "
         + "test.message" + System.lineSeparator(),
@@ -60,10 +58,10 @@ public void testWithMessage() {
 
   @Test
   public void testWithMessageAndException() {
-    MDC.setContextMap(ImmutableMap.<String, String>of(
-        "dataflow.jobId", "testJobId",
-        "dataflow.workerId", "testWorkerId",
-        "dataflow.workId", "testWorkId"));
+    DataflowWorkerLoggingFormatter.setJobId("testJobId");
+    DataflowWorkerLoggingFormatter.setWorkerId("testWorkerId");
+    DataflowWorkerLoggingFormatter.setWorkId("testWorkId");
+
     assertEquals(
         "1970-01-01T00:00:00.001Z INFO testJobId testWorkerId testWorkId 2 LoggerName "
         + "test.message" + System.lineSeparator()
@@ -77,10 +75,10 @@ public void testWithMessageAndException() {
 
   @Test
   public void testWithException() {
-    MDC.setContextMap(ImmutableMap.<String, String>of(
-        "dataflow.jobId", "testJobId",
-        "dataflow.workerId", "testWorkerId",
-        "dataflow.workId", "testWorkId"));
+    DataflowWorkerLoggingFormatter.setJobId("testJobId");
+    DataflowWorkerLoggingFormatter.setWorkerId("testWorkerId");
+    DataflowWorkerLoggingFormatter.setWorkId("testWorkId");
+
     assertEquals(
         "1970-01-01T00:00:00.001Z INFO testJobId testWorkerId testWorkId 2 LoggerName null"
         + System.lineSeparator()
@@ -94,10 +92,10 @@ public void testWithException() {
 
   @Test
   public void testWithoutExceptionOrMessage() {
-    MDC.setContextMap(ImmutableMap.<String, String>of(
-        "dataflow.jobId", "testJobId",
-        "dataflow.workerId", "testWorkerId",
-        "dataflow.workId", "testWorkId"));
+    DataflowWorkerLoggingFormatter.setJobId("testJobId");
+    DataflowWorkerLoggingFormatter.setWorkerId("testWorkerId");
+    DataflowWorkerLoggingFormatter.setWorkId("testWorkId");
+
     assertEquals(
         "1970-01-01T00:00:00.001Z INFO testJobId testWorkerId testWorkId 2 LoggerName null"
         + System.lineSeparator(),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreMappedDiagnosticContext.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingFormatter.java
similarity index 51%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreMappedDiagnosticContext.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingFormatter.java
index f0bdb9e217044..6f5309e332e18 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreMappedDiagnosticContext.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingFormatter.java
@@ -16,32 +16,32 @@
 
 package com.google.cloud.dataflow.sdk.testing;
 
-import com.google.common.base.MoreObjects;
-import com.google.common.collect.ImmutableMap;
+import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingFormatter;
 
 import org.junit.rules.ExternalResource;
-import org.slf4j.MDC;
-
-import java.util.Map;
 
 /**
- * Saves and restores the current MDC for tests.
+ * Saves and restores the current thread-local logging parameters for tests.
  */
-public class RestoreMappedDiagnosticContext extends ExternalResource {
-  private Map<String, String> previousValue;
+public class RestoreDataflowLoggingFormatter extends ExternalResource {
+  private String previousJobId;
+  private String previousWorkerId;
+  private String previousWorkId;
 
-  public RestoreMappedDiagnosticContext() {
+  public RestoreDataflowLoggingFormatter() {
   }
 
   @Override
   protected void before() throws Throwable {
-    previousValue = MoreObjects.firstNonNull(
-        MDC.getCopyOfContextMap(),
-        ImmutableMap.<String, String>of());
+    previousJobId = DataflowWorkerLoggingFormatter.getJobId();
+    previousWorkerId = DataflowWorkerLoggingFormatter.getWorkerId();
+    previousWorkId = DataflowWorkerLoggingFormatter.getWorkId();
   }
 
   @Override
   protected void after() {
-    MDC.setContextMap(previousValue);
+    DataflowWorkerLoggingFormatter.setJobId(previousJobId);
+    DataflowWorkerLoggingFormatter.setWorkerId(previousWorkerId);
+    DataflowWorkerLoggingFormatter.setWorkId(previousWorkId);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreMappedDiagnosticContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingFormatterTest.java
similarity index 59%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreMappedDiagnosticContextTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingFormatterTest.java
index c88f275f4bf65..46ff22912ef42 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreMappedDiagnosticContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingFormatterTest.java
@@ -19,33 +19,34 @@
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 
+import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingFormatter;
+
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TestRule;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
-import org.slf4j.MDC;
 
-/** Tests for {@link RestoreMappedDiagnosticContext}. */
+/** Tests for {@link RestoreDataflowLoggingFormatter}. */
 @RunWith(JUnit4.class)
-public class RestoreMappedDiagnosticContextTest {
-  @Rule public TestRule restoreMappedDiagnosticContext = new RestoreMappedDiagnosticContext();
+public class RestoreDataflowLoggingFormatterTest {
+  @Rule public TestRule restoreDataflowLoggingFormatter = new RestoreDataflowLoggingFormatter();
 
   /*
    * Since these tests can run out of order, both test A and B verify that they
    * could insert their property and that the other does not exist.
    */
   @Test
-  public void testThatMDCIsClearedA() {
-    MDC.put("TestA", "TestA");
-    assertNotNull(MDC.get("TestA"));
-    assertNull(MDC.get("TestB"));
+  public void testLoggingParamsClearedA() {
+    DataflowWorkerLoggingFormatter.setJobId("job");
+    assertNotNull(DataflowWorkerLoggingFormatter.getJobId());
+    assertNull(DataflowWorkerLoggingFormatter.getWorkerId());
   }
 
   @Test
-  public void testThatMDCIsClearedB() {
-    MDC.put("TestB", "TestB");
-    assertNotNull(MDC.get("TestB"));
-    assertNull(MDC.get("TestA"));
+  public void testLoggingParamsClearedB() {
+    DataflowWorkerLoggingFormatter.setWorkerId("worker");
+    assertNotNull(DataflowWorkerLoggingFormatter.getWorkerId());
+    assertNull(DataflowWorkerLoggingFormatter.getJobId());
   }
 }

From ea18500b103ac76c7bc83ac6533dac689bf84a39 Mon Sep 17 00:00:00 2001
From: earhart <earhart@google.com>
Date: Mon, 9 Feb 2015 13:21:33 -0800
Subject: [PATCH 0147/1541] Add the report index to work item status messages
 sent to the Dataflow service. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=85919759

---
 .../worker/DataflowWorkProgressUpdater.java   | 16 +++++++++-
 .../sdk/runners/worker/DataflowWorker.java    | 17 +++++-----
 .../DataflowWorkProgressUpdaterTest.java      | 31 ++++++++++++++++---
 3 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
index 66c1fd1a2c862..ca0b1667dae15 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
@@ -54,12 +54,16 @@ public class DataflowWorkProgressUpdater extends WorkProgressUpdater {
   /** Options specifying information about the pipeline run by the worker.*/
   private final DataflowWorkerHarnessOptions options;
 
+  /** The index to use for the next report sent for the updater's work item. */
+  private long nextReportIndex;
+
   public DataflowWorkProgressUpdater(WorkItem workItem, WorkExecutor worker,
       DataflowWorker.WorkUnitClient workUnitClient, DataflowWorkerHarnessOptions options) {
     super(worker);
     this.workItem = workItem;
     this.workUnitClient = workUnitClient;
     this.options = options;
+    this.nextReportIndex = 1;
   }
 
   @Override
@@ -76,13 +80,15 @@ protected long getWorkUnitLeaseExpirationTimestamp() {
   protected void reportProgressHelper() throws Exception {
     WorkItemStatus status = buildStatus(workItem, false/*completed*/, worker.getOutputCounters(),
         worker.getOutputMetrics(), options, worker.getWorkerProgress(), forkResultToReport,
-        null/*sourceOperationResponse*/, null/*errors*/);
+        null/*sourceOperationResponse*/, null/*errors*/,
+        getNextReportIndex());
     status.setRequestedLeaseDuration(toCloudDuration(Duration.millis(requestedLeaseDurationMs)));
 
     WorkItemServiceState result = workUnitClient.reportWorkItemStatus(status);
     if (result != null) {
       // Resets state after a successful progress report.
       forkResultToReport = null;
+      nextReportIndex++;
 
       progressReportIntervalMs = nextProgressReportInterval(
           fromCloudDuration(workItem.getReportStatusInterval()).getMillis(),
@@ -105,4 +111,12 @@ private long getLeaseExpirationTimestamp(WorkItem workItem) {
   private long getLeaseExpirationTimestamp(WorkItemServiceState workItemServiceState) {
     return fromCloudTime(workItemServiceState.getLeaseExpireTime()).getMillis();
   }
+
+  /**
+   * Returns the index to use for the next work item report for the work
+   * progress updater's work item.
+   */
+  long getNextReportIndex() {
+    return nextReportIndex;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 3a46be9c173cd..f1fe0d2f6ec01 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -38,7 +38,6 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
 import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
-import com.google.cloud.dataflow.sdk.util.common.worker.WorkProgressUpdater;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -121,7 +120,7 @@ private boolean doWork(WorkItem workItem) throws IOException {
         throw new RuntimeException("unknown kind of work item: " + workItem.toString());
       }
 
-      WorkProgressUpdater progressUpdater =
+      DataflowWorkProgressUpdater progressUpdater =
           new DataflowWorkProgressUpdater(workItem, worker, workUnitClient, options);
       progressUpdater.startReportingProgress();
 
@@ -156,7 +155,8 @@ private boolean doWork(WorkItem workItem) throws IOException {
                   ((SourceOperationExecutor) worker).getResponse())
               : null;
       reportStatus(
-          options, "Success", workItem, counters, metrics, operationResponse, null/*errors*/);
+          options, "Success", workItem, counters, metrics, operationResponse, null/*errors*/,
+          progressUpdater.getNextReportIndex());
 
       return true;
 
@@ -190,7 +190,7 @@ private void handleWorkError(WorkItem workItem, WorkExecutor worker, Throwable e
 
     reportStatus(options, "Failure", workItem, worker == null ? null : worker.getOutputCounters(),
         worker == null ? null : worker.getOutputMetrics(), null/*sourceOperationResponse*/,
-        error == null ? null : Collections.singletonList(error));
+        error == null ? null : Collections.singletonList(error), 0);
   }
 
   /**
@@ -219,11 +219,12 @@ private static String buildCloudStackTrace(Throwable t) {
 
   private void reportStatus(DataflowWorkerHarnessOptions options, String status, WorkItem workItem,
       @Nullable CounterSet counters, @Nullable Collection<Metric<?>> metrics,
-      @Nullable SourceFormat.OperationResponse operationResponse, @Nullable List<Status> errors)
+      @Nullable SourceFormat.OperationResponse operationResponse, @Nullable List<Status> errors,
+      long finalReportIndex)
       throws IOException {
     LOG.info("{} processing work item {}", status, uniqueId(workItem));
     WorkItemStatus workItemStatus = buildStatus(workItem, true/*completed*/, counters, metrics,
-        options, null, null, operationResponse, errors);
+        options, null, null, operationResponse, errors, finalReportIndex);
     workUnitClient.reportWorkItemStatus(workItemStatus);
   }
 
@@ -231,10 +232,12 @@ static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
       @Nullable CounterSet counters, @Nullable Collection<Metric<?>> metrics,
       DataflowWorkerHarnessOptions options, @Nullable Reader.Progress progress,
       @Nullable Reader.ForkResult forkResult,
-      @Nullable SourceFormat.OperationResponse operationResponse, @Nullable List<Status> errors) {
+      @Nullable SourceFormat.OperationResponse operationResponse, @Nullable List<Status> errors,
+      long finalReportIndex) {
     WorkItemStatus status = new WorkItemStatus();
     status.setWorkItemId(Long.toString(workItem.getId()));
     status.setCompleted(completed);
+    status.setReportIndex(finalReportIndex);
 
     List<MetricUpdate> counterUpdates = null;
     List<MetricUpdate> metricUpdates = null;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
index 0745e40faef03..3146c0e72c379 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -209,7 +209,7 @@ public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
     verify(workUnitClient, timeout(600))
         .reportWorkItemStatus(argThat(
             new ExpectedDataflowWorkItemStatus().withCounters(3).withMetrics(2).withProgress(
-                approximateProgressAtIndex(1L))));
+                approximateProgressAtIndex(1L)).withReportIndex(1L)));
 
     setUpCounters(5);
     setUpMetrics(6);
@@ -221,7 +221,8 @@ public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
                 .withCounters(5)
                 .withMetrics(6)
                 .withProgress(approximateProgressAtIndex(2L))
-                .withForkAtPosition(positionAtIndex(3L))));
+                .withForkAtPosition(positionAtIndex(3L))
+                .withReportIndex(2L)));
 
     // After the request is sent, reset cached fork result to null.
     assertNull(progressUpdater.getForkResultToReport());
@@ -231,9 +232,12 @@ public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
     // The third update should be sent after one and half seconds (3000 / 2).
     verify(workUnitClient, timeout(1600))
         .reportWorkItemStatus(argThat(
-            new ExpectedDataflowWorkItemStatus().withProgress(approximateProgressAtIndex(3L))));
+            new ExpectedDataflowWorkItemStatus().withProgress(approximateProgressAtIndex(3L))
+                .withReportIndex(3L)));
 
     progressUpdater.stopReportingProgress();
+
+    assertEquals(4L, progressUpdater.getNextReportIndex());
   }
 
   // Verifies that a last update is sent when there is an unacknowledged split request.
@@ -344,6 +348,9 @@ private static final class ExpectedDataflowWorkItemStatus
     @Nullable
     Position expectedForkPosition;
 
+    @Nullable
+    Long expectedReportIndex;
+
     public ExpectedDataflowWorkItemStatus withCounters(Integer counterCount) {
       this.counterCount = counterCount;
       return this;
@@ -364,6 +371,11 @@ public ExpectedDataflowWorkItemStatus withForkAtPosition(Position expectedForkPo
       return this;
     }
 
+    public ExpectedDataflowWorkItemStatus withReportIndex(Long reportIndex) {
+      this.expectedReportIndex = reportIndex;
+      return this;
+    }
+
     @Override
     public void describeTo(Description description) {
       List<String> values = new ArrayList<>();
@@ -385,13 +397,17 @@ public void describeTo(Description description) {
       } else {
         values.add("no fork position present");
       }
+      if (this.expectedReportIndex != null) {
+        values.add("reportIndex " + this.expectedReportIndex);
+      }
       description.appendValueList("Dataflow WorkItemStatus with ", ", ", ".", values);
     }
 
     @Override
     public boolean matches(Object status) {
       WorkItemStatus st = (WorkItemStatus) status;
-      return matchCountersAndMetrics(st) && matchProgress(st) && matchStopPosition(st);
+      return matchCountersAndMetrics(st) && matchProgress(st) && matchStopPosition(st)
+          && matchReportIndex(st);
     }
 
     private boolean matchCountersAndMetrics(WorkItemStatus status) {
@@ -435,5 +451,12 @@ private boolean matchStopPosition(WorkItemStatus status) {
       }
       return expectedForkPosition.equals(actualStopPosition);
     }
+
+    private boolean matchReportIndex(WorkItemStatus status) {
+      if (expectedReportIndex == null) {
+        return true;
+      }
+      return expectedReportIndex.equals(status.getReportIndex());
+    }
   }
 }

From fcd3a5291da5d204e2bf530e62afdcd744308233 Mon Sep 17 00:00:00 2001
From: samuelw <samuelw@google.com>
Date: Mon, 9 Feb 2015 18:56:47 -0800
Subject: [PATCH 0148/1541] Fixed logic inversion and added test of exception
 handling retries.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85949368
---
 .../worker/StreamingDataflowWorker.java       | 38 +++++++++++--------
 .../worker/StreamingDataflowWorkerTest.java   | 24 ++++++++++--
 2 files changed, 44 insertions(+), 18 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 48d7ebd30623b..c9167b13da975 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -392,14 +392,19 @@ private void process(
         LOG.error("\nError: ", t);
         lastException.set(t);
         LOG.debug("Failed work: {}", work);
-        reportFailure(computation, work, t);
-        // Try again, but go to the end of the queue to avoid a tight loop.
-        sleep(60000);
-        executor.forceExecute(new Runnable() {
-            public void run() {
-              process(computation, work);
-            }
-          });
+        if (reportFailure(computation, work, t)) {
+          // Try again, after some delay and at the end of the queue to avoid a tight loop.
+          sleep(10000);
+          executor.forceExecute(new Runnable() {
+              public void run() {
+                process(computation, work);
+              }
+            });
+        } else {
+          // If we failed to report the error, the item is invalid and should
+          // not be retried internally.  It will be retried at the higher level.
+          LOG.debug("Aborting processing due to exception reporting failure");
+        }
       }
     } finally {
       DataflowWorkerLoggingFormatter.setWorkId(null);
@@ -537,13 +542,16 @@ private Windmill.Exception buildExceptionReport(Throwable t) {
     return builder.build();
   }
 
-  private void reportFailure(String computation, Windmill.WorkItem work, Throwable t) {
-    windmillServer.reportStats(Windmill.ReportStatsRequest.newBuilder()
-        .setComputationId(computation)
-        .setKey(work.getKey())
-        .setWorkToken(work.getWorkToken())
-        .addExceptions(buildExceptionReport(t))
-        .build());
+  // Returns true if reporting the exception is successful and the work should be retried.
+  private boolean reportFailure(String computation, Windmill.WorkItem work, Throwable t) {
+    Windmill.ReportStatsResponse response =
+        windmillServer.reportStats(Windmill.ReportStatsRequest.newBuilder()
+            .setComputationId(computation)
+            .setKey(work.getKey())
+            .setWorkToken(work.getWorkToken())
+            .addExceptions(buildExceptionReport(t))
+            .build());
+    return !response.getFailed();
   }
 
   private static class WorkerAndContext {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 536ba0b7fa133..4005140fcd1d9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -75,6 +75,7 @@
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
 
 /** Unit tests for {@link StreamingDataflowWorker}. */
 @RunWith(JUnit4.class)
@@ -88,12 +89,14 @@ private static class FakeWindmillServer extends WindmillServerStub {
     private Map<Long, Windmill.WorkItemCommitRequest> commitsReceived;
     private LinkedBlockingQueue<Windmill.Exception> exceptions;
     private int commitsRequested = 0;
+    private AtomicInteger expectedExceptionCount;
 
     public FakeWindmillServer() {
       workToOffer = new ConcurrentLinkedQueue<Windmill.GetWorkResponse>();
       dataToOffer = new ConcurrentLinkedQueue<Windmill.GetDataResponse>();
       commitsReceived = new ConcurrentHashMap<Long, Windmill.WorkItemCommitRequest>();
       exceptions = new LinkedBlockingQueue<>();
+      expectedExceptionCount = new AtomicInteger();
     }
 
     public void addWorkToOffer(Windmill.GetWorkResponse work) {
@@ -142,16 +145,22 @@ public Windmill.ReportStatsResponse reportStats(Windmill.ReportStatsRequest requ
       for (Windmill.Exception exception : request.getExceptionsList()) {
         try {
           exceptions.put(exception);
-        } catch (InterruptedException e) {}
+        } catch (InterruptedException expected) {}
+      }
+
+      if (expectedExceptionCount.getAndDecrement() > 0) {
+        return Windmill.ReportStatsResponse.newBuilder().build();
+      } else {
+        return Windmill.ReportStatsResponse.newBuilder()
+            .setFailed(true).build();
       }
-      return Windmill.ReportStatsResponse.newBuilder().build();
     }
 
     public Map<Long, Windmill.WorkItemCommitRequest> waitForAndGetCommits(int numCommits) {
       while (commitsReceived.size() < commitsRequested + numCommits) {
         try {
           Thread.sleep(1000);
-        } catch (InterruptedException e) {}
+        } catch (InterruptedException expected) {}
       }
 
       commitsRequested += numCommits;
@@ -159,6 +168,10 @@ public Map<Long, Windmill.WorkItemCommitRequest> waitForAndGetCommits(int numCom
       return commitsReceived;
     }
 
+    public void setExpectedExceptionCount(int i) {
+      expectedExceptionCount.getAndAdd(i);
+    }
+
     public Windmill.Exception getException() throws InterruptedException {
       return exceptions.take();
     }
@@ -614,6 +627,7 @@ public void processElement(ProcessContext c) throws Exception {
         makeSinkInstruction(StringUtf8Coder.of(), 1));
 
     FakeWindmillServer server = new FakeWindmillServer();
+    server.setExpectedExceptionCount(1);
     server.addWorkToOffer(buildInput(
         "work {" +
         "  computation_id: \"computation\"" +
@@ -649,6 +663,10 @@ public void processElement(ProcessContext c) throws Exception {
     Assert.assertThat(exception.getCause().getStackFrames(1),
         JUnitMatchers.containsString("processElement"));
     Assert.assertFalse(exception.getCause().hasCause());
+
+    // The server should retry the work since reporting the exception succeeded.
+    // Make next retry should fail because we only expected 1 exception.
+    exception = server.getException();
   }
 
   private static class TestTimerFn

From 8378460a9f932906a11f54f9ecae2e73df182472 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 10 Feb 2015 09:19:08 -0800
Subject: [PATCH 0149/1541] Setup worker harness to only log to file to prevent
 duplication of log records sent to Cloud Logging and Monitoring.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85994616
---
 .../DataflowWorkerLoggingInitializer.java      |  6 ------
 .../DataflowWorkerLoggingInitializerTest.java  | 18 ++++--------------
 2 files changed, 4 insertions(+), 20 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
index e89d6d73b94c2..a513dc75ee359 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
@@ -20,7 +20,6 @@
 
 import java.io.File;
 import java.io.IOException;
-import java.util.logging.ConsoleHandler;
 import java.util.logging.FileHandler;
 import java.util.logging.Formatter;
 import java.util.logging.Handler;
@@ -69,10 +68,6 @@ void initialize(LogManager logManager) {
       fileHandler.setFormatter(formatter);
       fileHandler.setLevel(logLevel);
 
-      ConsoleHandler consoleHandler = new ConsoleHandler();
-      consoleHandler.setFormatter(formatter);
-      consoleHandler.setLevel(logLevel);
-
       // Reset the global log manager, get the root logger and remove the default log handlers.
       logManager.reset();
       Logger rootLogger = logManager.getLogger(ROOT_LOGGER_NAME);
@@ -81,7 +76,6 @@ void initialize(LogManager logManager) {
       }
 
       rootLogger.setLevel(logLevel);
-      rootLogger.addHandler(consoleHandler);
       rootLogger.addHandler(fileHandler);
     } catch (SecurityException | IOException e) {
       throw new ExceptionInInitializerError(e);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
index 68a6b58a2d9bc..50cc1e2d3ed18 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
@@ -17,7 +17,6 @@
 package com.google.cloud.dataflow.sdk.runners.worker.logging;
 
 import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.verifyNoMoreInteractions;
 import static org.mockito.Mockito.when;
@@ -35,7 +34,6 @@
 import org.mockito.MockitoAnnotations;
 
 import java.util.List;
-import java.util.logging.ConsoleHandler;
 import java.util.logging.FileHandler;
 import java.util.logging.Handler;
 import java.util.logging.Level;
@@ -68,12 +66,11 @@ public void testWithDefaults() {
     verify(mockRootLogger).getHandlers();
     verify(mockRootLogger).removeHandler(mockHandler);
     verify(mockRootLogger).setLevel(Level.INFO);
-    verify(mockRootLogger, times(2)).addHandler(argument.capture());
+    verify(mockRootLogger).addHandler(argument.capture());
     verifyNoMoreInteractions(mockLogManager, mockRootLogger);
 
     List<Handler> handlers = argument.getAllValues();
-    assertTrue(isConsoleHandler(handlers.get(0), Level.INFO));
-    assertTrue(isFileHandler(handlers.get(1), Level.INFO));
+    assertTrue(isFileHandler(handlers.get(0), Level.INFO));
   }
 
   @Test
@@ -87,18 +84,11 @@ public void testWithOverrides() {
     verify(mockRootLogger).getHandlers();
     verify(mockRootLogger).removeHandler(mockHandler);
     verify(mockRootLogger).setLevel(Level.WARNING);
-    verify(mockRootLogger, times(2)).addHandler(argument.capture());
+    verify(mockRootLogger).addHandler(argument.capture());
     verifyNoMoreInteractions(mockLogManager, mockRootLogger);
 
     List<Handler> handlers = argument.getAllValues();
-    assertTrue(isConsoleHandler(handlers.get(0), Level.WARNING));
-    assertTrue(isFileHandler(handlers.get(1), Level.WARNING));
-  }
-
-  private boolean isConsoleHandler(Handler handler, Level level) {
-    return handler instanceof ConsoleHandler
-        && level.equals(handler.getLevel())
-        && handler.getFormatter() instanceof DataflowWorkerLoggingFormatter;
+    assertTrue(isFileHandler(handlers.get(0), Level.WARNING));
   }
 
   private boolean isFileHandler(Handler handler, Level level) {

From 1294029bd8b2311f4a2dacda8d74aa024f0ba0fb Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 10 Feb 2015 10:07:25 -0800
Subject: [PATCH 0150/1541] Remove logic for getting credentials from gcloud
 binary since official application default credentials can now pass along
 credentials from the currently logged in user. See
 https://developers.google.com/accounts/docs/application-default-credentials
 for more details. Users will need to update to the latest version of gcloud
 by running 'gcloud components update'. See
 https://cloud.google.com/sdk/gcloud/ for more details.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=85998709
---
 .../dataflow/sdk/options/GcpOptions.java      |   5 -
 .../cloud/dataflow/sdk/util/Credentials.java  |  63 +++-------
 .../dataflow/sdk/util/GCloudCredential.java   | 113 ------------------
 3 files changed, 18 insertions(+), 163 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
index 0d824405b9075..571dba49cd0e2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
@@ -96,11 +96,6 @@ public interface GcpOptions extends GoogleApiDebugOptions, PipelineOptions {
   String getServiceAccountName();
   void setServiceAccountName(String value);
 
-  @Description("The path to the gcloud binary. "
-      + " Default is to search the system path.")
-  String getGCloudPath();
-  void setGCloudPath(String value);
-
   /**
    * Directory for storing dataflow credentials.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
index e37275cdb936f..c416eae3d844e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
@@ -28,7 +28,6 @@
 import com.google.api.client.json.JsonFactory;
 import com.google.api.client.json.jackson2.JacksonFactory;
 import com.google.api.client.util.Preconditions;
-import com.google.api.client.util.Strings;
 import com.google.api.client.util.store.FileDataStoreFactory;
 import com.google.cloud.dataflow.sdk.options.GcpOptions;
 
@@ -50,14 +49,15 @@ public class Credentials {
 
   private static final Logger LOG = LoggerFactory.getLogger(Credentials.class);
 
-  /** OAuth 2.0 scopes used by a local worker (not on GCE).
-   *  The scope cloud-platform provides access to all Cloud Platform resources.
-   *  cloud-platform isn't sufficient yet for talking to datastore so we request
-   *  those resources separately.
-   *
-   *  Note that trusted scope relationships don't apply to OAuth tokens, so for
-   *  services we access directly (GCS) as opposed to through the backend
-   *  (BigQuery, GCE), we need to explicitly request that scope.
+  /**
+   * OAuth 2.0 scopes used by a local worker (not on GCE).
+   * The scope cloud-platform provides access to all Cloud Platform resources.
+   * cloud-platform isn't sufficient yet for talking to datastore so we request
+   * those resources separately.
+   * <p>
+   * Note that trusted scope relationships don't apply to OAuth tokens, so for
+   * services we access directly (GCS) as opposed to through the backend
+   * (BigQuery, GCE), we need to explicitly request that scope.
    */
   private static final List<String> SCOPES = Arrays.asList(
       "https://www.googleapis.com/auth/cloud-platform",
@@ -74,8 +74,8 @@ public String getRedirectUri() {
 
   /**
    * Initializes OAuth2 credentials.
-   *
-   * This can use 4 different mechanisms for obtaining a credential:
+   * <p>
+   * This can use 3 different mechanisms for obtaining a credential:
    * <ol>
    *   <li>
    *     It can fetch the
@@ -83,11 +83,6 @@ public String getRedirectUri() {
    *     application default credentials</a>.
    *   </li>
    *   <li>
-   *     It can run the gcloud tool in a subprocess to obtain a credential.
-   *     This is the preferred mechanism.  The property "gcloud_path" can be
-   *     used to specify where we search for gcloud data.
-   *   </li>
-   *   <li>
    *     The user can specify a client secrets file and go through the OAuth2
    *     webflow. The credential will then be cached in the user's home
    *     directory for reuse. Provide the property "secrets_file" to use this
@@ -101,8 +96,8 @@ public String getRedirectUri() {
    * </ol>
    * The default mechanism is to use the
    * <a href="https://developers.google.com/accounts/docs/application-default-credentials">
-   * application default credentials</a> falling back to gcloud. The other options can be
-   * used by providing the corresponding properties.
+   * application default credentials</a>. The other options can be used by providing the
+   * corresponding properties.
    */
   public static Credential getCredential(GcpOptions options)
       throws IOException, GeneralSecurityException {
@@ -124,11 +119,12 @@ public static Credential getCredential(GcpOptions options)
     try {
       return GoogleCredential.getApplicationDefault().createScoped(SCOPES);
     } catch (IOException e) {
-      LOG.debug("Failed to get application default credentials, falling back to gcloud.");
+      throw new RuntimeException("Unable to get application default credentials. Please see "
+          + "https://developers.google.com/accounts/docs/application-default-credentials "
+          + "for details on how to specify credentials. This version of the SDK is "
+          + "dependent on the gcloud core component version 2015.02.05 or newer to "
+          + "be able to get credentials from the currently authorized user via gcloud auth.", e);
     }
-
-    String gcloudPath = options.getGCloudPath();
-    return getCredentialFromGCloud(gcloudPath);
   }
 
   /**
@@ -149,29 +145,6 @@ private static Credential getCredentialFromFile(
     return credential;
   }
 
-  /**
-   * Loads OAuth2 credential from GCloud utility.
-   */
-  private static Credential getCredentialFromGCloud(String gcloudPath)
-      throws IOException, GeneralSecurityException {
-    GCloudCredential credential;
-    HttpTransport transport = GoogleNetHttpTransport.newTrustedTransport();
-    if (Strings.isNullOrEmpty(gcloudPath)) {
-      credential = new GCloudCredential(transport);
-    } else {
-      credential = new GCloudCredential(gcloudPath, transport);
-    }
-
-    try {
-      credential.refreshToken();
-    } catch (IOException e) {
-      throw new RuntimeException("Could not obtain credential using gcloud", e);
-    }
-
-    LOG.info("Got user credential from GCloud");
-    return credential;
-  }
-
   /**
    * Loads OAuth2 credential from client secrets, which may require an
    * interactive authorization prompt.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java
deleted file mode 100644
index a3a3fd2eb5bf1..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (C) 2014 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.auth.oauth2.BearerToken;
-import com.google.api.client.auth.oauth2.Credential;
-import com.google.api.client.auth.oauth2.TokenResponse;
-import com.google.api.client.http.HttpTransport;
-import com.google.api.client.util.IOUtils;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Arrays;
-
-/**
- * A credential object which uses the GCloud command line tool to get
- * an access token.
- */
-public class GCloudCredential extends Credential {
-  private static final String DEFAULT_GCLOUD_BINARY = "gcloud";
-  private final String binary;
-
-  public GCloudCredential(HttpTransport transport) {
-    this(DEFAULT_GCLOUD_BINARY, transport);
-  }
-
-  /**
-   * Path to the GCloud binary.
-   */
-  public GCloudCredential(String binary, HttpTransport transport) {
-    super(new Builder(BearerToken.authorizationHeaderAccessMethod())
-        .setTransport(transport));
-
-    this.binary = binary;
-  }
-
-  private String readStream(InputStream stream) throws IOException {
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    IOUtils.copy(stream, baos);
-    return baos.toString("UTF-8");
-  }
-
-  @Override
-  protected TokenResponse executeRefreshToken() throws IOException {
-    TokenResponse response = new TokenResponse();
-
-    ProcessBuilder builder = new ProcessBuilder();
-    // ProcessBuilder will search the path automatically for the binary
-    // GCLOUD_BINARY.
-    builder.command(Arrays.asList(binary, "auth", "print-access-token"));
-    Process process = builder.start();
-
-    try {
-      process.waitFor();
-    } catch (InterruptedException e) {
-      throw new RuntimeException(
-          "Could not obtain an access token using gcloud; timed out waiting " +
-          "for gcloud.");
-    }
-
-    if (process.exitValue() != 0) {
-      String output;
-      try {
-        output = readStream(process.getErrorStream());
-      } catch (IOException e) {
-        throw new RuntimeException(
-            "Could not obtain an access token using gcloud.");
-      }
-
-      throw new RuntimeException(
-          "Could not obtain an access token using gcloud. Result of " +
-          "invoking gcloud was:\n" + output);
-    }
-
-    String output;
-    try {
-      output = readStream(process.getInputStream());
-    } catch (IOException e) {
-      throw new RuntimeException(
-          "Could not obtain an access token using gcloud. We encountered an " +
-          "an error trying to read stdout.", e);
-    }
-    String[] lines = output.split("\n");
-
-    if (lines.length != 1) {
-      throw new RuntimeException(
-          "Could not obtain an access token using gcloud. Result of " +
-          "invoking gcloud was:\n" + output);
-    }
-
-    // Access token should be good for 5 minutes.
-    Long expiresInSeconds = 5L * 60;
-    response.setExpiresInSeconds(expiresInSeconds);
-    response.setAccessToken(output.trim());
-
-    return response;
-  }
-}

From 0f05e56cb04600b3851ffee5c93b22dd01fe7e86 Mon Sep 17 00:00:00 2001
From: relax <relax@google.com>
Date: Tue, 10 Feb 2015 15:47:40 -0800
Subject: [PATCH 0151/1541] Allows users of PubsubIO to specify which pubsub
 labels are used to propagate record timestamps and record ids.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86030744
---
 .../cloud/dataflow/sdk/io/PubsubIO.java       | 193 +++++++++++++++++-
 .../runners/dataflow/PubsubIOTranslator.java  |  15 ++
 .../dataflow/sdk/util/PropertyNames.java      |   3 +
 3 files changed, 202 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 4a379920249e7..2414feeb34f9f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -153,6 +153,10 @@ public static Bound named(String name) {
      * <li>Must end with a letter or a number.</li>
      * <li>Cannot begin with 'goog' prefix.</li>
      * </ul>
+     *
+     * Dataflow will start reading data published on this topic from the time the pipeline is
+     * started. Any data published on the topic before the pipeline is started will not be read
+     * by Dataflow.
      */
     public static Bound topic(String topic) {
       return new Bound().topic(topic);
@@ -180,6 +184,44 @@ public static Bound subscription(String subscription) {
       return new Bound().subscription(subscription);
     }
 
+    /**
+     * Creates and returns a PubsubIO.Read PTransform where record timestamps are expected
+     * to be provided using the PubSub labeling API. The {@code <timestampLabel>} parameter
+     * specifies the label name. The label value sent to PubsSub is a numerical value representing
+     * the number of milliseconds since the Unix epoch. For example, if using the joda time classes,
+     * org.joda.time.Instant.getMillis() returns the correct value for this label.
+     *
+     * <p> If {@code <timestampLabel>} is not provided, the system will generate record timestamps
+     * the first time it sees each record. All windowing will be done relative to these timestamps.
+     * Windows are closed based on an estimate of when this source has finished producing data for
+     * a timestamp range, which means that late data can arrive after a window has been closed. The
+     * {#dropLateData} field allows you to control what to do with late data.
+     */
+    public static Bound timestampLabel(String timestampLabel) {
+      return new Bound().timestampLabel(timestampLabel);
+    }
+
+    /**
+     * If true, then late-arriving data from this source will be dropped.
+     */
+    public static Bound dropLateData(boolean dropLateData) {
+      return new Bound().dropLateData(dropLateData);
+    }
+
+    /**
+     * Creates and returns a PubSubIO.Read PTransform where unique record identifiers are
+     * expected to be provided using the PubSub labeling API. The {@code <idLabel>} parameter
+     * specifies the label name. The label value sent to PubSub can be any string value that
+     * uniquely identifies this record.
+     *
+     * <p> If idLabel is not provided, Dataflow cannot guarantee that no duplicate data will be
+     * delivered on the PubSub stream. In this case,  deduplication of the stream will be
+     * stricly best effort.
+     */
+    public static Bound idLabel(String idLabel) {
+      return new Bound().idLabel(idLabel);
+    }
+
     /**
      * A PTransform that reads from a PubSub source and returns
      * a unbounded PCollection containing the items from the stream.
@@ -191,10 +233,22 @@ public static class Bound
       String topic;
       /** The Pubsub subscription to read from. */
       String subscription;
+      /** The Pubsub label to read timestamps from. */
+      String timestampLabel;
+      Boolean dropLateData;
+      /** This is set for backwards compatibility with old services. If dropLateData is not
+       * explicitly called, then we won't forward that parameter to the service. */
+      Boolean dropLateDataExplicit;
+      /** The Pubsub label to read ids from. */
+      String idLabel;
+
+      Bound() {
+        this.dropLateData = true;
+        this.dropLateDataExplicit = false;
+      }
 
-      Bound() {}
-
-      Bound(String name, String subscription, String topic) {
+      Bound(String name, String subscription, String topic, String timestampLabel,
+          boolean dropLateData, boolean dropLateDataExplicit, String idLabel) {
         super(name);
         if (subscription != null) {
           Validator.validateSubscriptionName(subscription);
@@ -204,18 +258,63 @@ public static class Bound
         }
         this.subscription = subscription;
         this.topic = topic;
+        this.timestampLabel = timestampLabel;
+        this.dropLateData = dropLateData;
+        this.dropLateDataExplicit = dropLateDataExplicit;
+        this.idLabel = idLabel;
       }
 
+      /**
+       * Returns a new TextIO.Read PTransform that's like this one but with the given
+       * step name. Does not modify the object.
+       */
       public Bound named(String name) {
-        return new Bound(name, subscription, topic);
+        return new Bound(name, subscription, topic, timestampLabel, dropLateData,
+            dropLateDataExplicit, idLabel);
       }
 
+      /**
+       * Returns a new TextIO.Read PTransform that's like this one but reading from the
+       * given subscription. Does not modify the object.
+       */
       public Bound subscription(String subscription) {
-        return new Bound(name, subscription, topic);
+        return new Bound(name, subscription, topic, timestampLabel, dropLateData,
+            dropLateDataExplicit, idLabel);
       }
 
+      /**
+       * Returns a new TextIO.Read PTransform that's like this one but reading from the
+       * give topic. Does not modify the object.
+       */
       public Bound topic(String topic) {
-        return new Bound(name, subscription, topic);
+        return new Bound(name, subscription, topic, timestampLabel, dropLateData,
+            dropLateDataExplicit, idLabel);
+      }
+
+      /**
+       * Returns a new TextIO.Read PTransform that's like this one but reading timestamps
+       * from the given PubSub label. Does not modify the object.
+       */
+      public Bound timestampLabel(String timestampLabel) {
+        return new Bound(name, subscription, topic, timestampLabel, dropLateData,
+            dropLateDataExplicit, idLabel);
+      }
+
+      /**
+       * Returns a new TextIO.Read PTransform that's like this one but with the specified
+       * setting for dropLateData. Does not modify the object.
+       */
+      public Bound dropLateData(boolean dropLateData) {
+        return new Bound(name, subscription, topic, timestampLabel, dropLateData, true, idLabel);
+      }
+
+      /**
+       * Returns a new TextIO.Read PTransform that's like this one but reading unique ids
+       * from the given PubSub label. Does not modify the object.
+       */
+      public Bound idLabel(String idLabel) {
+        return new Bound(name, subscription, topic, timestampLabel, dropLateData,
+            dropLateDataExplicit, idLabel);
       }
 
       @Override
@@ -250,6 +349,22 @@ public String getSubscription() {
         return subscription;
       }
 
+      public String getTimestampLabel() {
+        return timestampLabel;
+      }
+
+      public boolean getDropLateData() {
+        return dropLateData;
+      }
+
+      public boolean getDropLateDataExplicit() {
+        return dropLateDataExplicit;
+      }
+
+      public String getIdLabel() {
+        return idLabel;
+      }
+
       static {
         // TODO: Figure out how to make this work under
         // DirectPipelineRunner.
@@ -278,6 +393,30 @@ public static Bound topic(String topic) {
       return new Bound().topic(topic);
     }
 
+    /**
+     * If specified, Dataflow will add a Pubsub label to each output record specifying the logical
+     * timestamp of the record. {@code <timestampLabel>} determines the label name. The label value
+     * is a numerical value representing the number of milliseconds since the Unix epoch. For
+     * example, if using the joda time classes, the org.joda.time.Instant(long) constructor can be
+     * used to parse this value. If the output from this sink is being read by another Dataflow
+     * source, then PubsubIO.Read.timestampLabel can be used to ensure that the other source reads
+     * these timestamps from the appropriate label.
+     */
+    public static Bound timestampLabel(String timestampLabel) {
+      return new Bound().timestampLabel(timestampLabel);
+    }
+
+    /**
+     * If specified, Dataflow will add a Pubsub label to each output record containing a unique
+     * identifier for that record. {@code <idLabel>} determines the label name. The label value
+     * is an opaque string value. This is useful if the the output from this sink is being read
+     * by another Dataflow source, in which case PubsubIO.Read.idLabel can be used to ensure that
+     * the other source reads these ids from the appropriate label.
+     */
+    public static Bound idLabel(String idLabel) {
+      return new Bound().idLabel(idLabel);
+    }
+
     /**
      * A PTransfrom that writes a unbounded {@code PCollection<String>}
      * to a PubSub stream.
@@ -287,23 +426,51 @@ public static class Bound
         extends PTransform<PCollection<String>, PDone> {
       /** The Pubsub topic to publish to. */
       String topic;
+      String timestampLabel;
+      String idLabel;
 
       Bound() {}
 
-      Bound(String name, String topic) {
+      Bound(String name, String topic, String timestampLabel, String idLabel) {
         super(name);
         if (topic != null) {
           Validator.validateTopicName(topic);
           this.topic = topic;
         }
+        this.timestampLabel = timestampLabel;
+        this.idLabel = idLabel;
       }
 
+      /**
+       * Returns a new TextIO.Write PTransform that's like this one but with the given step
+       * name. Does not modify the object.
+       */
       public Bound named(String name) {
-        return new Bound(name, topic);
+        return new Bound(name, topic, timestampLabel, idLabel);
       }
 
+      /**
+       * Returns a new TextIO.Write PTransform that's like this one but writing to the given
+       * topic. Does not modify the object.
+       */
       public Bound topic(String topic) {
-        return new Bound(name, topic);
+        return new Bound(name, topic, timestampLabel, idLabel);
+      }
+
+      /**
+       * Returns a new TextIO.Write PTransform that's like this one but publishing timestamps
+       * to the given PubSub label. Does not modify the object.
+       */
+      public Bound timestampLabel(String timestampLabel) {
+        return new Bound(name, topic, timestampLabel, idLabel);
+      }
+
+      /**
+       * Returns a new TextIO.Write PTransform that's like this one but publishing record ids
+       * to the given PubSub label. Does not modify the object.
+       */
+     public Bound idLabel(String idLabel) {
+        return new Bound(name, topic, timestampLabel, idLabel);
       }
 
       @Override
@@ -327,6 +494,14 @@ public String getTopic() {
         return topic;
       }
 
+      public String getTimestampLabel() {
+        return timestampLabel;
+      }
+
+      public String getIdLabel() {
+        return idLabel;
+      }
+
       static {
         // TODO: Figure out how to make this work under
         // DirectPipelineRunner.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
index 706397bddd37b..8b297d6f31e37 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
@@ -57,6 +57,15 @@ private void translateReadHelper(
       if (transform.getSubscription() != null) {
         context.addInput(PropertyNames.PUBSUB_SUBSCRIPTION, transform.getSubscription());
       }
+      if (transform.getTimestampLabel() != null) {
+        context.addInput(PropertyNames.PUBSUB_TIMESTAMP_LABEL, transform.getTimestampLabel());
+      }
+      if (transform.getDropLateDataExplicit()) {
+        context.addInput(PropertyNames.PUBSUB_DROP_LATE_DATA, transform.getDropLateData());
+      }
+      if (transform.getIdLabel() != null) {
+        context.addInput(PropertyNames.PUBSUB_ID_LABEL, transform.getIdLabel());
+      }
       context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
       // TODO: Orderedness?
     }
@@ -83,6 +92,12 @@ private void translateWriteHelper(
       context.addStep(transform, "ParallelWrite");
       context.addInput(PropertyNames.FORMAT, "pubsub");
       context.addInput(PropertyNames.PUBSUB_TOPIC, transform.getTopic());
+      if (transform.getTimestampLabel() != null) {
+        context.addInput(PropertyNames.PUBSUB_TIMESTAMP_LABEL, transform.getTimestampLabel());
+      }
+      if (transform.getIdLabel() != null) {
+        context.addInput(PropertyNames.PUBSUB_ID_LABEL, transform.getIdLabel());
+      }
       context.addEncodingInput(
           WindowedValue.getValueOnlyCoder(transform.getInput().getCoder()));
       context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
index 0afe5ae411901..a22f7893a334c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
@@ -68,7 +68,10 @@ public class PropertyNames {
   public static final String OUTPUT_NAME = "output_name";
   public static final String PARALLEL_INPUT = "parallel_input";
   public static final String PHASE = "phase";
+  public static final String PUBSUB_DROP_LATE_DATA = "pubsub_drop_late_data";
+  public static final String PUBSUB_ID_LABEL = "pubsub_id_label";
   public static final String PUBSUB_SUBSCRIPTION = "pubsub_subscription";
+  public static final String PUBSUB_TIMESTAMP_LABEL = "pubsub_timestamp_label";
   public static final String PUBSUB_TOPIC = "pubsub_topic";
   public static final String SCALAR_FIELD_NAME = "value";
   public static final String SERIALIZED_FN = "serialized_fn";

From 5a11734d638b53662c036ead93094c5a7fdd9994 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 11 Feb 2015 14:54:15 -0800
Subject: [PATCH 0152/1541] Re-add support for getting credentials from gcloud
 for older versions of gcloud.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86122002
---
 .../dataflow/sdk/options/GcpOptions.java      |   5 +
 .../cloud/dataflow/sdk/util/Credentials.java  |  63 +++++++---
 .../dataflow/sdk/util/GCloudCredential.java   | 113 ++++++++++++++++++
 3 files changed, 163 insertions(+), 18 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
index 571dba49cd0e2..0d824405b9075 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
@@ -96,6 +96,11 @@ public interface GcpOptions extends GoogleApiDebugOptions, PipelineOptions {
   String getServiceAccountName();
   void setServiceAccountName(String value);
 
+  @Description("The path to the gcloud binary. "
+      + " Default is to search the system path.")
+  String getGCloudPath();
+  void setGCloudPath(String value);
+
   /**
    * Directory for storing dataflow credentials.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
index c416eae3d844e..e37275cdb936f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
@@ -28,6 +28,7 @@
 import com.google.api.client.json.JsonFactory;
 import com.google.api.client.json.jackson2.JacksonFactory;
 import com.google.api.client.util.Preconditions;
+import com.google.api.client.util.Strings;
 import com.google.api.client.util.store.FileDataStoreFactory;
 import com.google.cloud.dataflow.sdk.options.GcpOptions;
 
@@ -49,15 +50,14 @@ public class Credentials {
 
   private static final Logger LOG = LoggerFactory.getLogger(Credentials.class);
 
-  /**
-   * OAuth 2.0 scopes used by a local worker (not on GCE).
-   * The scope cloud-platform provides access to all Cloud Platform resources.
-   * cloud-platform isn't sufficient yet for talking to datastore so we request
-   * those resources separately.
-   * <p>
-   * Note that trusted scope relationships don't apply to OAuth tokens, so for
-   * services we access directly (GCS) as opposed to through the backend
-   * (BigQuery, GCE), we need to explicitly request that scope.
+  /** OAuth 2.0 scopes used by a local worker (not on GCE).
+   *  The scope cloud-platform provides access to all Cloud Platform resources.
+   *  cloud-platform isn't sufficient yet for talking to datastore so we request
+   *  those resources separately.
+   *
+   *  Note that trusted scope relationships don't apply to OAuth tokens, so for
+   *  services we access directly (GCS) as opposed to through the backend
+   *  (BigQuery, GCE), we need to explicitly request that scope.
    */
   private static final List<String> SCOPES = Arrays.asList(
       "https://www.googleapis.com/auth/cloud-platform",
@@ -74,8 +74,8 @@ public String getRedirectUri() {
 
   /**
    * Initializes OAuth2 credentials.
-   * <p>
-   * This can use 3 different mechanisms for obtaining a credential:
+   *
+   * This can use 4 different mechanisms for obtaining a credential:
    * <ol>
    *   <li>
    *     It can fetch the
@@ -83,6 +83,11 @@ public String getRedirectUri() {
    *     application default credentials</a>.
    *   </li>
    *   <li>
+   *     It can run the gcloud tool in a subprocess to obtain a credential.
+   *     This is the preferred mechanism.  The property "gcloud_path" can be
+   *     used to specify where we search for gcloud data.
+   *   </li>
+   *   <li>
    *     The user can specify a client secrets file and go through the OAuth2
    *     webflow. The credential will then be cached in the user's home
    *     directory for reuse. Provide the property "secrets_file" to use this
@@ -96,8 +101,8 @@ public String getRedirectUri() {
    * </ol>
    * The default mechanism is to use the
    * <a href="https://developers.google.com/accounts/docs/application-default-credentials">
-   * application default credentials</a>. The other options can be used by providing the
-   * corresponding properties.
+   * application default credentials</a> falling back to gcloud. The other options can be
+   * used by providing the corresponding properties.
    */
   public static Credential getCredential(GcpOptions options)
       throws IOException, GeneralSecurityException {
@@ -119,12 +124,11 @@ public static Credential getCredential(GcpOptions options)
     try {
       return GoogleCredential.getApplicationDefault().createScoped(SCOPES);
     } catch (IOException e) {
-      throw new RuntimeException("Unable to get application default credentials. Please see "
-          + "https://developers.google.com/accounts/docs/application-default-credentials "
-          + "for details on how to specify credentials. This version of the SDK is "
-          + "dependent on the gcloud core component version 2015.02.05 or newer to "
-          + "be able to get credentials from the currently authorized user via gcloud auth.", e);
+      LOG.debug("Failed to get application default credentials, falling back to gcloud.");
     }
+
+    String gcloudPath = options.getGCloudPath();
+    return getCredentialFromGCloud(gcloudPath);
   }
 
   /**
@@ -145,6 +149,29 @@ private static Credential getCredentialFromFile(
     return credential;
   }
 
+  /**
+   * Loads OAuth2 credential from GCloud utility.
+   */
+  private static Credential getCredentialFromGCloud(String gcloudPath)
+      throws IOException, GeneralSecurityException {
+    GCloudCredential credential;
+    HttpTransport transport = GoogleNetHttpTransport.newTrustedTransport();
+    if (Strings.isNullOrEmpty(gcloudPath)) {
+      credential = new GCloudCredential(transport);
+    } else {
+      credential = new GCloudCredential(gcloudPath, transport);
+    }
+
+    try {
+      credential.refreshToken();
+    } catch (IOException e) {
+      throw new RuntimeException("Could not obtain credential using gcloud", e);
+    }
+
+    LOG.info("Got user credential from GCloud");
+    return credential;
+  }
+
   /**
    * Loads OAuth2 credential from client secrets, which may require an
    * interactive authorization prompt.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java
new file mode 100644
index 0000000000000..a3a3fd2eb5bf1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.auth.oauth2.BearerToken;
+import com.google.api.client.auth.oauth2.Credential;
+import com.google.api.client.auth.oauth2.TokenResponse;
+import com.google.api.client.http.HttpTransport;
+import com.google.api.client.util.IOUtils;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+
+/**
+ * A credential object which uses the GCloud command line tool to get
+ * an access token.
+ */
+public class GCloudCredential extends Credential {
+  private static final String DEFAULT_GCLOUD_BINARY = "gcloud";
+  private final String binary;
+
+  public GCloudCredential(HttpTransport transport) {
+    this(DEFAULT_GCLOUD_BINARY, transport);
+  }
+
+  /**
+   * Path to the GCloud binary.
+   */
+  public GCloudCredential(String binary, HttpTransport transport) {
+    super(new Builder(BearerToken.authorizationHeaderAccessMethod())
+        .setTransport(transport));
+
+    this.binary = binary;
+  }
+
+  private String readStream(InputStream stream) throws IOException {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    IOUtils.copy(stream, baos);
+    return baos.toString("UTF-8");
+  }
+
+  @Override
+  protected TokenResponse executeRefreshToken() throws IOException {
+    TokenResponse response = new TokenResponse();
+
+    ProcessBuilder builder = new ProcessBuilder();
+    // ProcessBuilder will search the path automatically for the binary
+    // GCLOUD_BINARY.
+    builder.command(Arrays.asList(binary, "auth", "print-access-token"));
+    Process process = builder.start();
+
+    try {
+      process.waitFor();
+    } catch (InterruptedException e) {
+      throw new RuntimeException(
+          "Could not obtain an access token using gcloud; timed out waiting " +
+          "for gcloud.");
+    }
+
+    if (process.exitValue() != 0) {
+      String output;
+      try {
+        output = readStream(process.getErrorStream());
+      } catch (IOException e) {
+        throw new RuntimeException(
+            "Could not obtain an access token using gcloud.");
+      }
+
+      throw new RuntimeException(
+          "Could not obtain an access token using gcloud. Result of " +
+          "invoking gcloud was:\n" + output);
+    }
+
+    String output;
+    try {
+      output = readStream(process.getInputStream());
+    } catch (IOException e) {
+      throw new RuntimeException(
+          "Could not obtain an access token using gcloud. We encountered an " +
+          "an error trying to read stdout.", e);
+    }
+    String[] lines = output.split("\n");
+
+    if (lines.length != 1) {
+      throw new RuntimeException(
+          "Could not obtain an access token using gcloud. Result of " +
+          "invoking gcloud was:\n" + output);
+    }
+
+    // Access token should be good for 5 minutes.
+    Long expiresInSeconds = 5L * 60;
+    response.setExpiresInSeconds(expiresInSeconds);
+    response.setAccessToken(output.trim());
+
+    return response;
+  }
+}

From eea6c20cfb30a7388ecea4f89cc4001ac96bcfec Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Wed, 11 Feb 2015 15:16:33 -0800
Subject: [PATCH 0153/1541] Internal support for demultiplexing reiterables. 
 No externally visible change.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86124146
---
 .../sdk/util/common/TaggedReiteratorList.java | 152 +++++++++++++
 .../util/common/TaggedReiteratorListTest.java | 209 ++++++++++++++++++
 2 files changed, 361 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorListTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java
new file mode 100644
index 0000000000000..c11f5098306bb
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java
@@ -0,0 +1,152 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common;
+
+import java.util.AbstractList;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * Provides a view a of re-iterable of tagged values, with monotonically
+ * increasing tags, as a list of tagged re-iterables.
+ *
+ * This class, and the returned iterators, are not threadsafe.
+ */
+public class TaggedReiteratorList extends AbstractList<Reiterator<Object>> {
+
+  /**
+   * Interface for extracting the tag and value from an opaque element.
+   */
+  public interface TagExtractor<T> {
+    public int getTag(T elem);
+    public Object getValue(T elem);
+  }
+
+  private final TagExtractor<Object> extractor;
+
+  private final List<PeekingReiterator<Object>> starts;
+
+  private final int size;
+
+  public <T> TaggedReiteratorList(Reiterator<T> taggedReiterator,
+      TagExtractor<T> extractor) {
+    this(taggedReiterator, extractor, -1);
+  }
+
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  public <T> TaggedReiteratorList(Reiterator<T> taggedReiterator,
+                                   TagExtractor<T> extractor,
+                                   int size) {
+    starts = new ArrayList<>();
+    starts.add(new PeekingReiterator<Object>((Reiterator) taggedReiterator));
+    this.extractor = (TagExtractor) extractor;
+    this.size = size;
+  }
+
+  @Override
+  public Reiterator<Object> get(int tag) {
+    return new SubIterator(tag);
+  }
+
+  @Override
+  public int size() {
+    if (size == -1) {
+      throw new UnsupportedOperationException();
+    } else {
+      return size;
+    }
+  }
+
+  private PeekingReiterator<Object> getStart(int tag) {
+    if (tag >= starts.size()) {
+      PeekingReiterator<Object> start = getStart(tag - 1);
+      while (start.hasNext() && extractor.getTag(start.peek()) < tag) {
+        start.next();
+      }
+      starts.add(start);
+    }
+    // Use the stored value, store a copy.
+    return starts.set(tag, starts.get(tag).copy());
+  }
+
+  private static final PeekingReiterator<Object> EMPTY_TAIL =
+      new PeekingReiterator<Object>(
+          new Reiterator<Object>() {
+            public boolean hasNext() { return false; }
+            public Object next() { throw new NoSuchElementException(); }
+            public void remove() { throw new IllegalArgumentException(); }
+            public Reiterator<Object> copy() { throw new IllegalArgumentException(); }
+      });
+
+  private class SubIterator implements Reiterator<Object> {
+
+    private final int tag;
+    private PeekingReiterator<Object> iterator;
+
+    private SubIterator(int tag) {
+      this(tag, null);
+    }
+
+    private SubIterator(int tag, PeekingReiterator<Object> iterator) {
+      this.tag = tag;
+      this.iterator = iterator;
+    }
+
+    @Override
+    public boolean hasNext() {
+      if (iterator == null) {
+        iterator = getStart(tag);
+      }
+      if (iterator.hasNext() && extractor.getTag(iterator.peek()) == tag) {
+        return true;
+      } else {
+        if (iterator != EMPTY_TAIL) {
+          // Set up for the common case that we're iterating over the
+          // next tag soon.
+          if (starts.size() > tag + 1) {
+            starts.set(tag + 1, iterator);
+          } else {
+            starts.add(tag + 1, iterator);
+          }
+          iterator = EMPTY_TAIL;
+        }
+        return false;
+      }
+    }
+
+    @Override
+    public Object next() {
+      if (hasNext()) {
+        return extractor.getValue(iterator.next());
+      } else {
+        throw new NoSuchElementException();
+      }
+    }
+
+    @Override
+    public void remove() {
+      throw new IllegalArgumentException();
+    }
+
+    @Override
+    public Reiterator<Object> copy() {
+      return new SubIterator(
+          tag, iterator == null || iterator == EMPTY_TAIL ? iterator : iterator.copy());
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorListTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorListTest.java
new file mode 100644
index 0000000000000..a3682c25105c2
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorListTest.java
@@ -0,0 +1,209 @@
+/*******************************************************************************
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * Tests for {@link TaggedReiteratorList}.
+ */
+@RunWith(JUnit4.class)
+public class TaggedReiteratorListTest {
+
+  @Test
+  public void testSingleIterator() {
+    TaggedReiteratorList iter = create(new String[] {"a", "b", "c"});
+    assertEquals(iter.get(0), "a", "b", "c");
+    assertEquals(iter.get(0), "a", "b", "c");
+    assertEquals(iter.get(1) /*empty*/);
+    assertEquals(iter.get(0), "a", "b", "c");
+  }
+
+  @Test
+  public void testSequentialAccess() {
+    TaggedReiteratorList iter = create(3, new String[] {"a", "b", "c"});
+    for (int i = 0; i < 2; i++) {
+      assertEquals(iter.get(0), "a0", "b0", "c0");
+      assertEquals(iter.get(1), "a1", "b1", "c1");
+      assertEquals(iter.get(2), "a2", "b2", "c2");
+    }
+    for (int i = 0; i < 2; i++) {
+      assertEquals(iter.get(2), "a2", "b2", "c2");
+      assertEquals(iter.get(1), "a1", "b1", "c1");
+      assertEquals(iter.get(0), "a0", "b0", "c0");
+    }
+  }
+
+  @Test
+  public void testRandomAccess() {
+    TaggedReiteratorList iter = create(6, new String[] {"a", "b"});
+    assertEquals(iter.get(3), "a3", "b3");
+    assertEquals(iter.get(1), "a1", "b1");
+    assertEquals(iter.get(5), "a5", "b5");
+    assertEquals(iter.get(0), "a0", "b0");
+    assertEquals(iter.get(4), "a4", "b4");
+    assertEquals(iter.get(4), "a4", "b4");
+    assertEquals(iter.get(1), "a1", "b1");
+  }
+
+  @Test
+  public void testPartialIteration() {
+    TaggedReiteratorList iter = create(6, new String[] {"a", "b", "c"});
+    Iterator<?> get0 = iter.get(0);
+    Iterator<?> get1 = iter.get(1);
+    Iterator<?> get3 = iter.get(3);
+    assertEquals(asList(get0, 1), "a0");
+    assertEquals(asList(get1, 2), "a1", "b1");
+    assertEquals(asList(get3, 3), "a3", "b3", "c3");
+    Iterator<?> get2 = iter.get(2);
+    Iterator<?> get0Again = iter.get(0);
+    assertEquals(asList(get0, 1), "b0");
+    assertEquals(get2, "a2", "b2", "c2");
+    assertEquals(get0Again, "a0", "b0", "c0");
+    assertEquals(asList(get0), "c0");
+    Iterator<?> get4 = iter.get(4);
+    assertEquals(get4, "a4", "b4", "c4");
+    assertEquals(get4 /*empty*/);
+    assertEquals(iter.get(4), "a4", "b4", "c4");
+  }
+
+  @Test
+  public void testNextIteration() {
+    TaggedReiteratorList iter = create(2, new String[] {"a", "b", "c"});
+    Reiterator<?> get0 = iter.get(0);
+    assertEquals(get0, "a0", "b0", "c0");
+    Iterator<?> get1 = iter.get(1);
+    Assert.assertEquals(get1.next(), "a1");
+    assertEquals(get0.copy() /*empty*/);
+    Assert.assertEquals(get1.next(), "b1");
+    assertEquals(iter.get(1), "a1", "b1", "c1");
+  }
+
+  @Test
+  public void testEmpties() {
+    TaggedReiteratorList iter = create(new String[] {},
+                                       new String[] {"a", "b", "c"},
+                                       new String[] {},
+                                       new String[] {},
+                                       new String[] {"d"});
+    assertEquals(iter.get(2) /*empty*/);
+    assertEquals(iter.get(1), "a", "b", "c");
+    assertEquals(iter.get(2) /*empty*/);
+    assertEquals(iter.get(0) /*empty*/);
+    assertEquals(iter.get(2) /*empty*/);
+    assertEquals(iter.get(4), "d");
+    assertEquals(iter.get(3) /*empty*/);
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+  // Helpers
+
+  private TaggedReiteratorList create(String[]... values) {
+    ArrayList<TaggedValue> taggedValues = new ArrayList<>();
+    for (int tag = 0; tag < values.length; tag++) {
+      for (String value : values[tag]) {
+        taggedValues.add(new TaggedValue(tag, value));
+      }
+    }
+    return new TaggedReiteratorList(
+        new TestReiterator(taggedValues.toArray(new TaggedValue[0])),
+        new TaggedValueExtractor());
+  }
+
+  private TaggedReiteratorList create(int repeat, String... values) {
+    ArrayList<TaggedValue> taggedValues = new ArrayList<>();
+    for (int tag = 0; tag < repeat; tag++) {
+      for (String value : values) {
+        taggedValues.add(new TaggedValue(tag, value + tag));
+      }
+    }
+    return new TaggedReiteratorList(
+        new TestReiterator(taggedValues.toArray(new TaggedValue[0])),
+        new TaggedValueExtractor());
+  }
+
+  private <T> List<T> asList(Iterator<T> iter) {
+    return asList(iter, Integer.MAX_VALUE);
+  }
+
+  private <T> List<T> asList(Iterator<T> iter, int limit) {
+    List<T> list = new ArrayList<>();
+    for (int i = 0; i < limit && iter.hasNext(); i++) {
+      list.add(iter.next());
+    }
+    return list;
+  }
+
+  private void assertEquals(Iterator<?> actual, Object... expected) {
+    assertEquals(asList(actual), expected);
+  }
+
+  private void assertEquals(List<?> actual, Object... expected) {
+    Assert.assertEquals(Arrays.asList(expected), actual);
+  }
+
+  private static class TestReiterator implements Reiterator<TaggedValue> {
+    private final TaggedValue[] values;
+    private int pos = 0;
+    public TestReiterator(TaggedValue... values) {
+      this(values, 0);
+    }
+    private TestReiterator(TaggedValue[] values, int pos) {
+      this.values = values;
+      this.pos = pos;
+    }
+    public boolean hasNext() {
+      return pos < values.length;
+    }
+    public TaggedValue next() {
+      return values[pos++];
+    }
+    public void remove() {
+      throw new IllegalArgumentException();
+    }
+    public TestReiterator copy() {
+      return new TestReiterator(values, pos);
+    }
+  }
+
+  private static class TaggedValueExtractor
+      implements TaggedReiteratorList.TagExtractor<TaggedValue> {
+    public int getTag(TaggedValue elem) {
+      return elem.tag;
+    }
+    public String getValue(TaggedValue elem) {
+      return elem.value;
+    }
+  }
+
+  private static class TaggedValue {
+    public final int tag;
+    public final String value;
+    public TaggedValue(int tag, String value) {
+      this.tag = tag;
+      this.value = value;
+    }
+  }
+}

From d1ec460f2324edbda3288ef73a6c119d12085132 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Wed, 11 Feb 2015 16:04:47 -0800
Subject: [PATCH 0154/1541] Test: add testSessionsCombine to cover
 GroupAlsoByWindowsDoFn.processElementViaWindowSet().

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86128434
---
 .../dataflow/sdk/transforms/CombineTest.java  | 29 ++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 403190b6115aa..284ae327eabcf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -37,6 +37,7 @@
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
@@ -190,7 +191,7 @@ private void runTestAccumulatingCombine(KV<String, Integer>[] table,
 
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
-  public void testWindowedCombine() {
+  public void testFixedWindowsCombine() {
     Pipeline p = TestPipeline.create();
 
     PCollection<KV<String, Integer>> input =
@@ -215,6 +216,32 @@ public void testWindowedCombine() {
     p.run();
   }
 
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testSessionsCombine() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> input =
+        p.apply(Create.timestamped(Arrays.asList(TABLE),
+                                   Arrays.asList(0L, 4L, 7L, 10L, 16L)))
+         .setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()))
+         .apply(Window.<KV<String, Integer>>into(Sessions.withGapDuration(Duration.millis(5))));
+
+    PCollection<Integer> sum = input
+        .apply(Values.<Integer>create())
+        .apply(Combine.globally(new SumInts()));
+
+    PCollection<KV<String, Integer>> sumPerKey = input
+        .apply(Combine.<String, Integer>perKey(new SumInts()));
+
+    DataflowAssert.that(sum).containsInAnyOrder(7, 13);
+    DataflowAssert.that(sumPerKey).containsInAnyOrder(
+        KV.of("a", 6),
+        KV.of("b", 1),
+        KV.of("b", 13));
+    p.run();
+  }
+
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testWindowedCombineEmpty() {

From 2a71b265199a0a9bd61d37c5497678dfa5ba9596 Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Wed, 11 Feb 2015 17:02:12 -0800
Subject: [PATCH 0155/1541] Do not try to decompress gzip input streams that
 are already decompressed.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86133530
---
 .../google/cloud/dataflow/sdk/io/TextIO.java  | 14 +++++++++-
 .../cloud/dataflow/sdk/io/TextIOTest.java     | 26 +++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 487dcc371af31..40c24353c1d7b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -32,11 +32,13 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.common.primitives.Ints;
 
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.PushbackInputStream;
 import java.util.List;
 import java.util.regex.Pattern;
 import java.util.zip.GZIPInputStream;
@@ -647,7 +649,17 @@ public static enum CompressionType implements FileBasedReader.DecompressingStrea
     GZIP(".gz") {
       @Override
       public InputStream createInputStream(InputStream inputStream) throws IOException {
-        return new GZIPInputStream(inputStream);
+        // Determine if the input stream is gzipped.  The input stream returned from the
+        // GCS connector may already be decompressed, and no action is required.
+        PushbackInputStream stream = new PushbackInputStream(inputStream, 2);
+        byte[] headerBytes = new byte[2];
+        int bytesRead = stream.read(headerBytes);
+        stream.unread(headerBytes, 0, bytesRead);
+        int header = Ints.fromBytes((byte) 0, (byte) 0, headerBytes[1], headerBytes[0]);
+        if (header == GZIPInputStream.GZIP_MAGIC) {
+          return new GZIPInputStream(stream);
+        }
+        return stream;
       }
     },
     /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
index ead2e779eb026..ee54668db71ed 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -423,6 +423,7 @@ public void testCompressedRead() throws Exception {
       }
     }
 
+
     DirectPipeline p = DirectPipeline.createForTest();
 
     TextIO.Read.Bound<String> read =
@@ -434,4 +435,29 @@ public void testCompressedRead() throws Exception {
     assertThat(results.getPCollection(output), containsInAnyOrder(expected.toArray()));
     tmpFile.delete();
   }
+
+  @Test
+  public void testGZIPReadWhenUncompressed() throws Exception {
+    String[] lines = {"Meritorious condor", "Obnoxious duck"};
+    File tmpFile = tmpFolder.newFile("test");
+    String filename = tmpFile.getPath();
+
+    List<String> expected = new ArrayList<>();
+    try (PrintStream writer = new PrintStream(new FileOutputStream(tmpFile))) {
+      for (String line : lines) {
+        writer.println(line);
+        expected.add(line);
+      }
+    }
+
+    DirectPipeline p = DirectPipeline.createForTest();
+    TextIO.Read.Bound<String> read =
+        TextIO.Read.from(filename).withCompressionType(CompressionType.GZIP);
+    PCollection<String> output = p.apply(read);
+
+    EvaluationResults results = p.run();
+
+    assertThat(results.getPCollection(output), containsInAnyOrder(expected.toArray()));
+    tmpFile.delete();
+  }
 }

From 21c8f75e2f43225db9972622078dd07e92396ad3 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 12 Feb 2015 13:28:19 -0800
Subject: [PATCH 0156/1541] Fix units in log message in the
 StreamingDataflowWorker

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86208755
---
 .../dataflow/sdk/runners/worker/StreamingDataflowWorker.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index c9167b13da975..f16200038f326 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -291,7 +291,7 @@ private void dispatchLoop() {
       while (rt.freeMemory() < rt.totalMemory() * PUSHBACK_THRESHOLD) {
         if (lastPushbackLog < (lastPushbackLog = System.currentTimeMillis()) - 60 * 1000) {
           LOG.warn("In pushback, not accepting new work. Free Memory: {}MB / {}MB",
-              rt.freeMemory(), rt.totalMemory());
+              rt.freeMemory() / 1e6, rt.totalMemory() / 1e6);
           System.gc();
         }
         sleep(10);

From 544de16ca6b827be61db469d3bf14093c885a72a Mon Sep 17 00:00:00 2001
From: sisk <sisk@google.com>
Date: Fri, 13 Feb 2015 05:41:17 -0800
Subject: [PATCH 0157/1541] Removing log lines from workers that aren't
 relevant to users, also fix up logs that should stay.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86265043
---
 .../dataflow/sdk/coders/CoderRegistry.java    | 22 +++++++++----------
 .../sdk/runners/worker/DataflowWorker.java    | 13 +++++++----
 .../runners/worker/DataflowWorkerHarness.java |  4 ++--
 .../util/common/worker/MapTaskExecutor.java   |  8 +++----
 .../common/worker/WorkProgressUpdater.java    | 10 ++++-----
 5 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index d2b61293c1dc6..37128918b5d71 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -161,20 +161,20 @@ public void registerCoder(Class<?> clazz,
           coderClazz.getDeclaredMethod("of", factoryMethodArgTypes);
     } catch (NoSuchMethodException | SecurityException exn) {
       throw new IllegalArgumentException(
-          "cannot register Coder " + coderClazz + ": "
+          "Cannot register Coder " + coderClazz + ": "
           + "does not have an accessible method named 'of' with "
           + numTypeParameters + " arguments of Coder type",
           exn);
     }
     if (!Modifier.isStatic(factoryMethod.getModifiers())) {
       throw new IllegalArgumentException(
-          "cannot register Coder " + coderClazz + ": "
+          "Cannot register Coder " + coderClazz + ": "
           + "method named 'of' with " + numTypeParameters
           + " arguments of Coder type is not static");
     }
     if (!coderClazz.isAssignableFrom(factoryMethod.getReturnType())) {
       throw new IllegalArgumentException(
-          "cannot register Coder " + coderClazz + ": "
+          "Cannot register Coder " + coderClazz + ": "
           + "method named 'of' with " + numTypeParameters
           + " arguments of Coder type does not return a " + coderClazz);
     }
@@ -184,7 +184,7 @@ public void registerCoder(Class<?> clazz,
       }
     } catch (SecurityException exn) {
       throw new IllegalArgumentException(
-          "cannot register Coder " + coderClazz + ": "
+          "Cannot register Coder " + coderClazz + ": "
           + "method named 'of' with " + numTypeParameters
           + " arguments of Coder type is not accessible",
           exn);
@@ -200,7 +200,7 @@ public void registerCoder(Class<?> clazz,
             "getInstanceComponents",
             clazz);
       } catch (NoSuchMethodException | SecurityException exn) {
-        LOG.warn("cannot find getInstanceComponents for class {}. This may limit the ability to"
+        LOG.warn("Cannot find getInstanceComponents for class {}. This may limit the ability to"
             + " infer a Coder for values of this type.", coderClazz, exn);
       }
     }
@@ -212,7 +212,7 @@ public void registerCoder(Class<?> rawClazz,
                             CoderFactory coderFactory) {
     if (coderFactoryMap.put(rawClazz, coderFactory) != null) {
       throw new IllegalArgumentException(
-          "cannot register multiple default Coder factories for " + rawClazz);
+          "Cannot register multiple default Coder factories for " + rawClazz);
     }
   }
 
@@ -497,7 +497,7 @@ public Coder<?> create(List<? extends Coder<?>> typeArgumentCoders) {
                  NullPointerException |
                  ExceptionInInitializerError exn) {
           throw new IllegalStateException(
-              "error when invoking Coder factory method " + coderFactoryMethod,
+              "Error when invoking Coder factory method " + coderFactoryMethod,
               exn);
         }
       }
@@ -506,7 +506,7 @@ public Coder<?> create(List<? extends Coder<?>> typeArgumentCoders) {
       public List<Object> getInstanceComponents(Object value) {
         if (getComponentsMethod == null) {
           throw new IllegalStateException(
-              "no suitable static getInstanceComponents method available for "
+              "No suitable static getInstanceComponents method available for "
               + "Coder " + coderClazz);
         }
 
@@ -521,7 +521,7 @@ public List<Object> getInstanceComponents(Object value) {
             | NullPointerException
             | ExceptionInInitializerError exn) {
           throw new IllegalStateException(
-              "error when invoking Coder getComponents method " + getComponentsMethod,
+              "Error when invoking Coder getComponents method " + getComponentsMethod,
               exn);
         }
       }
@@ -540,7 +540,7 @@ static CoderFactory defaultCoderFactory(Class<?> coderClazz, final Method coderF
   CoderFactory getDefaultCoderFactory(Class<?> clazz) {
     CoderFactory coderFactory = coderFactoryMap.get(clazz);
     if (coderFactory == null) {
-      LOG.debug("No Coder registered for {}", clazz);
+      LOG.info("No Coder registered for {}", clazz);
     }
     return coderFactory;
   }
@@ -582,7 +582,7 @@ Coder<?> getDefaultCoder(Type type, Map<Type, Coder<?>> typeCoderBindings) {
       return null;
     } else {
       throw new RuntimeException(
-          "internal error: unexpected kind of Type: " + type);
+          "Internal error: unexpected kind of Type: " + type);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index f1fe0d2f6ec01..fcd6ccd618a4e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -101,7 +101,7 @@ public boolean getAndPerformWork() throws IOException {
    * @throws IOException Only if the WorkUnitClient fails.
    */
   private boolean doWork(WorkItem workItem) throws IOException {
-    LOG.info("Executing: {}", workItem);
+    LOG.debug("Executing: {}", workItem);
 
     WorkExecutor worker = null;
     try {
@@ -132,13 +132,13 @@ private boolean doWork(WorkItem workItem) throws IOException {
       // Log all counter values for debugging purposes.
       CounterSet counters = worker.getOutputCounters();
       for (Counter counter : counters) {
-        LOG.info("COUNTER {}.", counter);
+        LOG.trace("COUNTER {}.", counter);
       }
 
       // Log all metrics for debugging purposes.
       Collection<Metric<?>> metrics = worker.getOutputMetrics();
       for (Metric<?> metric : metrics) {
-        LOG.info("METRIC {}: {}", metric.getName(), metric.getValue());
+        LOG.trace("METRIC {}: {}", metric.getName(), metric.getValue());
       }
 
       // stopReportingProgress can throw an exception if the final progress
@@ -222,7 +222,12 @@ private void reportStatus(DataflowWorkerHarnessOptions options, String status, W
       @Nullable SourceFormat.OperationResponse operationResponse, @Nullable List<Status> errors,
       long finalReportIndex)
       throws IOException {
-    LOG.info("{} processing work item {}", status, uniqueId(workItem));
+    String message = "{} processing work item {}";
+    if (null != errors && errors.size() > 0) {
+      LOG.warn(message, status, uniqueId(workItem));
+    } else {
+      LOG.debug(message, status, uniqueId(workItem));
+    }
     WorkItemStatus workItemStatus = buildStatus(workItem, true/*completed*/, counters, metrics,
         options, null, null, operationResponse, errors, finalReportIndex);
     workUnitClient.reportWorkItemStatus(workItemStatus);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index 4b0879e22c99d..3ac2895afcba4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -130,11 +130,11 @@ public Boolean call() throws Exception {
     }
 
     long endTime = DateTimeUtils.currentTimeMillis();
-    LOG.info("processWork() start time: {}, end time: {}",
+    LOG.debug("Parallel worker thread processing start time: {}, end time: {}",
         ISODateTimeFormat.dateTime().print(startTime),
         ISODateTimeFormat.dateTime().print(endTime));
     for (long completionTime : completionTimes) {
-      LOG.info("Duration: {}ms Wasted Time: {}ms",
+      LOG.debug("Worker thread execution time {}ms, idle time waiting for other work threads: {}ms",
           completionTime - startTime,
           endTime - completionTime);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
index 717c44aeec422..1d2595feda39c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
@@ -53,13 +53,13 @@ public MapTaskExecutor(
 
   @Override
   public void execute() throws Exception {
-    LOG.debug("executing map task");
+    LOG.debug("Executing map task");
 
     // Start operations, in reverse-execution-order, so that a
     // consumer is started before a producer might output to it.
     // Starting a root operation such as a ReadOperation does the work
     // of processing the input dataset.
-    LOG.debug("starting operations");
+    LOG.debug("Starting operations");
     ListIterator<Operation> iterator = operations.listIterator(operations.size());
     while (iterator.hasPrevious()) {
       Operation op = iterator.previous();
@@ -69,12 +69,12 @@ public void execute() throws Exception {
     // Finish operations, in forward-execution-order, so that a
     // producer finishes outputting to its consumers before those
     // consumers are themselves finished.
-    LOG.debug("finishing operations");
+    LOG.debug("Finishing operations");
     for (Operation op : operations) {
       op.finish();
     }
 
-    LOG.debug("map task execution complete");
+    LOG.debug("Map task execution complete");
 
     // TODO: support for success / failure ports?
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
index f212d42e469f1..90dfb7727cdf5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
@@ -101,7 +101,7 @@ public void startReportingProgress() {
         nextProgressReportInterval(leaseRemainingTime / 2, leaseRemainingTime);
     requestedLeaseDurationMs = DEFAULT_LEASE_DURATION_MILLIS;
 
-    LOG.info("Started reporting progress for work item: {}", workString());
+    LOG.debug("Started reporting progress for work item: {}", workString());
     scheduleNextUpdate();
   }
 
@@ -121,12 +121,12 @@ public void stopReportingProgress() throws Exception {
 
     // We send a final progress report in case there was an unreported fork.
     if (forkResultToReport != null) {
-      LOG.info("Sending final progress update with unreported fork: {} "
+      LOG.debug("Sending final progress update with unreported fork: {} "
           + "for work item: {}", forkResultToReport, workString());
       reportProgressHelper(); // This call can fail with an exception
     }
 
-    LOG.info("Stopped reporting progress for work item: {}", workString());
+    LOG.debug("Stopped reporting progress for work item: {}", workString());
   }
 
   /**
@@ -183,11 +183,11 @@ public void run() {
    * Reports the current work progress to the worker service.
    */
   private void reportProgress() {
-    LOG.info("Updating progress on work item {}", workString());
+    LOG.debug("Updating progress on work item {}", workString());
     try {
       reportProgressHelper();
     } catch (Throwable e) {
-      LOG.warn("Error reporting work progress update: ", e);
+      LOG.warn("Error reporting workitem progress update to Dataflow service: ", e);
     } finally {
       scheduleNextUpdate();
     }

From 47de70d885b96f92c7bfe3916a5acfbe4dbb6303 Mon Sep 17 00:00:00 2001
From: tudorm <tudorm@google.com>
Date: Fri, 13 Feb 2015 10:21:23 -0800
Subject: [PATCH 0158/1541] Add an extra check to ease reasoning about the
 PeekingReiterator. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=86282507

---
 .../cloud/dataflow/sdk/util/common/PeekingReiterator.java       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/PeekingReiterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/PeekingReiterator.java
index 8789c4e84c8df..ed779da0b4cca 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/PeekingReiterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/PeekingReiterator.java
@@ -36,7 +36,7 @@ public PeekingReiterator(Reiterator<T> iterator) {
   }
 
   PeekingReiterator(PeekingReiterator<T> it) {
-    this.iterator = checkNotNull(it).iterator.copy();
+    this.iterator = checkNotNull(checkNotNull(it).iterator.copy());
     this.nextElement = it.nextElement;
     this.nextElementComputed = it.nextElementComputed;
   }

From 926dda2257e467e5cdb381189ece728755ce0bfe Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 13 Feb 2015 11:03:29 -0800
Subject: [PATCH 0159/1541] Construct StreamingDataflowWorker options with
 createFromSystemProperties

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86286736
---
 .../sdk/runners/worker/StreamingDataflowWorker.java    | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index f16200038f326..6567a0cda2442 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -136,7 +136,7 @@ public static void main(String[] args) throws Exception {
   /** Regular constructor. */
   public StreamingDataflowWorker(
       List<MapTask> mapTasks, WindmillServerStub server) {
-    options = PipelineOptionsFactory.as(DataflowWorkerHarnessOptions.class);
+    options = PipelineOptionsFactory.createFromSystemProperties();
     options.setAppName("StreamingWorkerHarness");
     options.setStreaming(true);
 
@@ -627,12 +627,10 @@ private void printMetrics(PrintWriter response) {
 
   private void printResources(PrintWriter response) {
     Runtime rt = Runtime.getRuntime();
-    int mb = 1024 * 1024;
-
     response.append("<h2>Resources</h2>\n");
-    response.append("Total Memory: " + rt.totalMemory() / mb + "MB<br>\n");
-    response.append("Used Memory: " + (rt.totalMemory() - rt.freeMemory()) / mb + "MB<br>\n");
-    response.append("Max Memory: " + rt.maxMemory() / mb + "MB<br>\n");
+    response.append("Total Memory: " + rt.totalMemory() / 1e6 + "MB<br>\n");
+    response.append("Used Memory: " + (rt.totalMemory() - rt.freeMemory()) / 1e6 + "MB<br>\n");
+    response.append("Max Memory: " + rt.maxMemory() / 1e6 + "MB<br>\n");
   }
 
   private void printSpecs(PrintWriter response) {

From 162c25429b7509177e4a5a744d38d81b74924d43 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 13 Feb 2015 12:55:46 -0800
Subject: [PATCH 0160/1541] Change streaming worker pushback code to only stop
 pulling work if it is within a threshold of max available memory instead of
 current total memory.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86296819
---
 .../worker/StreamingDataflowWorker.java       | 22 ++++++++++++-------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 6567a0cda2442..0f75fb9cbb42a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -70,8 +70,9 @@ public class StreamingDataflowWorker {
   static final int MAX_THREAD_POOL_QUEUE_SIZE = 100;
   static final long MAX_COMMIT_BYTES = 32 << 20;
   static final int DEFAULT_STATUS_PORT = 8081;
-  // Memory threshold under which no new work will be processed.  Set to 0 to disable pushback.
-  static final double PUSHBACK_THRESHOLD = 0.1;
+  // Memory threshold over which no new work will be processed.
+  // Set to a value >= 1 to disable pushback.
+  static final double PUSHBACK_THRESHOLD_RATIO = 0.9;
   static final String WINDMILL_SERVER_CLASS_NAME =
       "com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServer";
 
@@ -288,10 +289,15 @@ private void dispatchLoop() {
       // If free memory is less than a percentage of total memory, block
       // until current work drains and memory is released.
       // Also force a GC to try to get under the memory threshold if possible.
-      while (rt.freeMemory() < rt.totalMemory() * PUSHBACK_THRESHOLD) {
+      long currentMemorySize = rt.totalMemory();
+      long memoryUsed = currentMemorySize - rt.freeMemory();
+      long maxMemory = rt.maxMemory();
+
+      while (memoryUsed > maxMemory * PUSHBACK_THRESHOLD_RATIO) {
         if (lastPushbackLog < (lastPushbackLog = System.currentTimeMillis()) - 60 * 1000) {
-          LOG.warn("In pushback, not accepting new work. Free Memory: {}MB / {}MB",
-              rt.freeMemory() / 1e6, rt.totalMemory() / 1e6);
+          LOG.warn(
+              "In pushback, not accepting new work. Using {}MB / {}MB ({}MB currently used by JVM)",
+              memoryUsed >> 20, maxMemory >> 20, currentMemorySize >> 20);
           System.gc();
         }
         sleep(10);
@@ -628,9 +634,9 @@ private void printMetrics(PrintWriter response) {
   private void printResources(PrintWriter response) {
     Runtime rt = Runtime.getRuntime();
     response.append("<h2>Resources</h2>\n");
-    response.append("Total Memory: " + rt.totalMemory() / 1e6 + "MB<br>\n");
-    response.append("Used Memory: " + (rt.totalMemory() - rt.freeMemory()) / 1e6 + "MB<br>\n");
-    response.append("Max Memory: " + rt.maxMemory() / 1e6 + "MB<br>\n");
+    response.append("Total Memory: " + (rt.totalMemory() >> 20) + "MB<br>\n");
+    response.append("Used Memory: " + ((rt.totalMemory() - rt.freeMemory()) >> 20) + "MB<br>\n");
+    response.append("Max Memory: " + (rt.maxMemory() >> 20) + "MB<br>\n");
   }
 
   private void printSpecs(PrintWriter response) {

From 20be3253eda5e813913d8b0e6484b4c59661c4f8 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 13 Feb 2015 14:28:46 -0800
Subject: [PATCH 0161/1541] Have coders which depend on state information
 during encoding/decoding or that can't serialize all their state information
 fail when being used within the DirectPipelineRunner, DataflowPipelineRunner,
 and our test utilities.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86304960
---
 .../dataflow/sdk/util/SerializableUtils.java  |   8 +-
 .../dataflow/sdk/coders/AvroCoderTest.java    |  17 +-
 .../dataflow/sdk/coders/CoderProperties.java  |  38 ++--
 .../sdk/coders/CoderPropertiesTest.java       | 184 ++++++++++++++++++
 .../sdk/util/SerializableUtilsTest.java       | 155 ++++++++++++---
 5 files changed, 346 insertions(+), 56 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java
index 9ee09c8608ab7..10080f144861f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java
@@ -92,7 +92,11 @@ public static <T extends Serializable> T ensureSerializable(T value) {
    * @return the serialized Coder, as a {@link CloudObject}
    */
   public static CloudObject ensureSerializable(Coder<?> coder) {
-    CloudObject cloudObject = coder.asCloudObject();
+    // Make sure that Coders are java serializable as well since
+    // they are regularly captured within DoFn's.
+    Coder<?> copy = (Coder<?>) ensureSerializable((Serializable) coder);
+
+    CloudObject cloudObject = copy.asCloudObject();
 
     Coder<?> decoded;
     try {
@@ -114,7 +118,7 @@ public static CloudObject ensureSerializable(Coder<?> coder) {
   }
 
   /**
-   * Serializes an arbitrary T with the given Coder<T> and verifies
+   * Serializes an arbitrary T with the given {@code Coder<T>} and verifies
    * that it can be correctly deserialized.
    */
   public static <T> T ensureSerializableByCoder(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
index 234fb046b2737..43023a1f63dd9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
@@ -38,11 +38,8 @@
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
-import java.io.Serializable;
 
-/**
- * Tests for AvroCoder.
- */
+/** Tests for {@link AvroCoder}. */
 @RunWith(JUnit4.class)
 @SuppressWarnings("serial")
 public class AvroCoderTest {
@@ -53,6 +50,7 @@ private static class Pojo {
     public int count;
 
     // Empty constructor required for Avro decoding.
+    @SuppressWarnings("unused")
     public Pojo() {
     }
 
@@ -184,16 +182,7 @@ public void testDefaultCoder() throws Exception {
   }
 
   @Test
-  public void testAvroCoderJavaSerializable() throws Exception {
-    AvroCoder<Pojo> coder = AvroCoder.of(Pojo.class);
-
-    // Cast the coder to serializable to test that it is serializable using
-    // Java serialization.
-    SerializableUtils.ensureSerializable((Serializable) coder);
-  }
-
-  @Test
-  public void testAvroCoderJsonSerializable() throws Exception {
+  public void testAvroCoderIsSerializable() throws Exception {
     AvroCoder<Pojo> coder = AvroCoder.of(Pojo.class);
 
     // Check that the coder is serializable using the regular JSON approach.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
index 5fbaf1f5a59e6..1b9200a9164de 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
@@ -21,11 +21,13 @@
 import static org.hamcrest.Matchers.emptyIterable;
 import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertThat;
-import static org.junit.Assume.assumeThat;
 
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.util.Serializer;
 import com.google.common.collect.Iterables;
 
+import org.hamcrest.CoreMatchers;
+
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
@@ -36,6 +38,10 @@
 /**
  * Properties for use in {@link Coder} tests. These are implemented with junit assertions
  * rather than as predicates for the sake of error messages.
+ * <p>
+ * We serialize and deserialize the coder to make sure that any state information required by the
+ * coder is preserved. This causes tests written such that coders which lose information during
+ * serialization or change state during encoding/decoding will fail.
  */
 public class CoderProperties {
 
@@ -46,7 +52,7 @@ public class CoderProperties {
        Coder.Context.OUTER, Coder.Context.NESTED);
 
   /**
-   * Verifies that for the given {@link Coder<T>}, and values of
+   * Verifies that for the given {@link Coder Coder<T>}, and values of
    * type {@code T}, if the values are equal then the encoded bytes are equal,
    * in any {@link Coder.Context}.
    */
@@ -59,20 +65,22 @@ public static <T> void coderDeterministic(
   }
 
   /**
-   * Verifies that for the given {@link Coder<T>}, {@link Coder.Context}, and values of
+   * Verifies that for the given {@link Coder Coder<T>}, {@link Coder.Context}, and values of
    * type {@code T}, if the values are equal then the encoded bytes are equal.
    */
   public static <T> void coderDeterministicInContext(
       Coder<T> coder, Coder.Context context, T value1, T value2)
       throws Exception {
-    assumeThat(value1, equalTo(value2));
+    assertThat("Expected that the coder is deterministic",
+        coder.isDeterministic(), CoreMatchers.is(true));
+    assertThat("Expected that the passed in values are equal()", value1, equalTo(value2));
     assertThat(
         encode(coder, context, value1),
         equalTo(encode(coder, context, value2)));
   }
 
   /**
-   * Verifies that for the given {@link Coder<T>},
+   * Verifies that for the given {@link Coder Coder<T>},
    * and value of type {@code T}, encoding followed by decoding yields an
    * equal value of type {@code T}, in any {@link Coder.Context}.
    */
@@ -85,7 +93,7 @@ public static <T> void coderDecodeEncodeEqual(
   }
 
   /**
-   * Verifies that for the given {@link Coder<T>}, {@link Coder.Context},
+   * Verifies that for the given {@link Coder Coder<T>}, {@link Coder.Context},
    * and value of type {@code T}, encoding followed by decoding yields an
    * equal value of type {@code T}.
    */
@@ -96,7 +104,7 @@ public static <T> void coderDecodeEncodeEqualInContext(
   }
 
   /**
-   * Verifies that for the given {@link Coder<Collection<T>>},
+   * Verifies that for the given {@link Coder Coder<Collection<T>>},
    * and value of type {@code Collection<T>}, encoding followed by decoding yields an
    * equal value of type {@code Collection<T>}, in any {@link Coder.Context}.
    */
@@ -109,7 +117,7 @@ public static <T, IT extends Collection<T>> void coderDecodeEncodeContentsEqual(
   }
 
   /**
-   * Verifies that for the given {@link Coder<Collection<T>>},
+   * Verifies that for the given {@link Coder Coder<Collection<T>>},
    * and value of type {@code Collection<T>}, encoding followed by decoding yields an
    * equal value of type {@code Collection<T>}, in the given {@link Coder.Context}.
    */
@@ -128,7 +136,7 @@ public static <T, CT extends Collection<T>> void coderDecodeEncodeContentsEqualI
   }
 
   /**
-   * Verifies that for the given {@link Coder<Collection<T>>},
+   * Verifies that for the given {@link Coder Coder<Collection<T>>},
    * and value of type {@code Collection<T>}, encoding followed by decoding yields an
    * equal value of type {@code Collection<T>}, in any {@link Coder.Context}.
    */
@@ -142,7 +150,7 @@ public static <T, IT extends Iterable<T>> void coderDecodeEncodeContentsInSameOr
   }
 
   /**
-   * Verifies that for the given {@link Coder<Iterable<T>>},
+   * Verifies that for the given {@link Coder Coder<Iterable<T>>},
    * and value of type {@code Iterable<T>}, encoding followed by decoding yields an
    * equal value of type {@code Collection<T>}, in the given {@link Coder.Context}.
    */
@@ -168,15 +176,21 @@ public static <T> void coderSerializable(Coder<T> coder) {
 
   private static <T> byte[] encode(
       Coder<T> coder, Coder.Context context, T value) throws CoderException, IOException {
+    @SuppressWarnings("unchecked")
+    Coder<T> deserializedCoder = Serializer.deserialize(coder.asCloudObject(), Coder.class);
+
     ByteArrayOutputStream os = new ByteArrayOutputStream();
-    coder.encode(value, os, context);
+    deserializedCoder.encode(value, os, context);
     return os.toByteArray();
   }
 
   private static <T> T decode(
       Coder<T> coder, Coder.Context context, byte[] bytes) throws CoderException, IOException {
+    @SuppressWarnings("unchecked")
+    Coder<T> deserializedCoder = Serializer.deserialize(coder.asCloudObject(), Coder.class);
+
     ByteArrayInputStream is = new ByteArrayInputStream(bytes);
-    return coder.decode(is, context);
+    return deserializedCoder.decode(is, context);
   }
 
   private static <T> T decodeEncode(Coder<T> coder, Coder.Context context, T value)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java
new file mode 100644
index 0000000000000..66edb00204979
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
+
+import com.google.common.base.Strings;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+/** Unit tests for {@link CoderProperties}. */
+@RunWith(JUnit4.class)
+public class CoderPropertiesTest {
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+
+  @Test
+  public void testGoodCoderIsDeterministic() throws Exception {
+    CoderProperties.coderDeterministic(StringUtf8Coder.of(), "TestData", "TestData");
+  }
+
+  /** A coder which says it is not deterministic but actually is. */
+  private static class NonDeterministicCoder extends CustomCoder<String> {
+    @Override
+    public void encode(String value, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+      StringUtf8Coder.of().encode(value, outStream, context);
+    }
+
+    @Override
+    public String decode(InputStream inStream, Context context)
+        throws CoderException, IOException {
+      return StringUtf8Coder.of().decode(inStream, context);
+    }
+  }
+
+  @Test
+  public void testNonDeterministicCoder() throws Exception {
+    try {
+      CoderProperties.coderDeterministic(new NonDeterministicCoder(), "TestData", "TestData");
+      fail("Expected AssertionError");
+    } catch (AssertionError error) {
+      assertThat(error.getMessage(),
+          CoreMatchers.containsString("Expected that the coder is deterministic"));
+    }
+  }
+
+  @Test
+  public void testPassingInNonEqualValuesWithDeterministicCoder() throws Exception {
+    try {
+      CoderProperties.coderDeterministic(StringUtf8Coder.of(), "AAA", "BBB");
+      fail("Expected AssertionError");
+    } catch (AssertionError error) {
+      assertThat(error.getMessage(),
+          CoreMatchers.containsString("Expected that the passed in values"));
+    }
+  }
+
+  /** A coder which is non-deterministic because it adds a string to the value. */
+  private static class BadDeterminsticCoder extends CustomCoder<String> {
+    public BadDeterminsticCoder() {
+    }
+
+    @Override
+    public void encode(String value, OutputStream outStream, Context context)
+        throws IOException, CoderException {
+      StringUtf8Coder.of().encode(value + System.nanoTime(), outStream, context);
+    }
+
+    @Override
+    public String decode(InputStream inStream, Context context)
+        throws CoderException, IOException {
+      return StringUtf8Coder.of().decode(inStream, context);
+    }
+
+    @Override
+    public boolean isDeterministic() {
+      return true;
+    }
+  }
+
+  @Test
+  public void testBadCoderIsNotDeterministic() throws Exception {
+    try {
+      CoderProperties.coderDeterministic(new BadDeterminsticCoder(), "TestData", "TestData");
+      fail("Expected AssertionError");
+    } catch (AssertionError error) {
+      assertThat(error.getMessage(),
+          CoreMatchers.containsString("<84>, <101>, <115>, <116>, <68>"));
+    }
+  }
+
+  @Test
+  public void testGoodCoderEncodesEqualValues() throws Exception {
+    CoderProperties.coderDecodeEncodeEqual(StringUtf8Coder.of(), "TestData");
+  }
+
+  /** This coder changes state during encoding/decoding. */
+  private static class StateChangingSerializingCoder extends CustomCoder<String> {
+    private int changedState;
+
+    public StateChangingSerializingCoder() {
+      changedState = 10;
+    }
+
+    @Override
+    public void encode(String value, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+      changedState += 1;
+      StringUtf8Coder.of().encode(value + Strings.repeat("A", changedState), outStream, context);
+    }
+
+    @Override
+    public String decode(InputStream inStream, Context context)
+        throws CoderException, IOException {
+      String decodedValue = StringUtf8Coder.of().decode(inStream, context);
+      return decodedValue.substring(0, decodedValue.length() - changedState);
+    }
+  }
+
+  @Test
+  public void testBadCoderThatDependsOnChangingState() throws Exception {
+    try {
+      CoderProperties.coderDecodeEncodeEqual(new StateChangingSerializingCoder(), "TestData");
+      fail("Expected AssertionError");
+    } catch (AssertionError error) {
+      assertThat(error.getMessage(), CoreMatchers.containsString("TestData"));
+    }
+  }
+
+  /** This coder loses information critical to its operation. */
+  private static class ForgetfulSerializingCoder extends CustomCoder<String> {
+    private transient int lostState;
+
+    public ForgetfulSerializingCoder(int lostState) {
+      this.lostState = lostState;
+    }
+
+    @Override
+    public void encode(String value, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+      if (lostState == 0) {
+        throw new RuntimeException("I forgot something...");
+      }
+      StringUtf8Coder.of().encode(value, outStream, context);
+    }
+
+    @Override
+    public String decode(InputStream inStream, Context context)
+        throws CoderException, IOException {
+      return StringUtf8Coder.of().decode(inStream, context);
+    }
+  }
+
+  @Test
+  public void testBadCoderThatDependsOnStateThatIsLost() throws Exception {
+    expectedException.expect(RuntimeException.class);
+    expectedException.expectMessage("I forgot something...");
+    CoderProperties.coderDecodeEncodeEqual(new ForgetfulSerializingCoder(1), "TestData");
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
index daddde564aa09..38c73ff6263c6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
@@ -16,26 +16,39 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import org.hamcrest.CoreMatchers;
-import org.hamcrest.core.IsInstanceOf;
-import org.junit.Assert;
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.common.collect.ImmutableList;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.io.Serializable;
+import java.util.List;
 
-/**
- * Tests for SerializableUtils.
- */
+/** Tests for {@link SerializableUtils}. */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class SerializableUtilsTest {
-  static class TestClass implements Serializable {
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+
+  /** A class which is serializable by Java. */
+  private static class SerializableByJava implements Serializable {
     final String stringValue;
     final int intValue;
 
-    public TestClass(String stringValue, int intValue) {
+    public SerializableByJava(String stringValue, int intValue) {
       this.stringValue = stringValue;
       this.intValue = intValue;
     }
@@ -46,31 +59,117 @@ public void testTranscode() {
     String stringValue = "hi bob";
     int intValue = 42;
 
-    TestClass testObject = new TestClass(stringValue, intValue);
+    SerializableByJava testObject = new SerializableByJava(stringValue, intValue);
+    SerializableByJava testCopy = SerializableUtils.ensureSerializable(testObject);
 
-    Object copy =
-        SerializableUtils.deserializeFromByteArray(
-            SerializableUtils.serializeToByteArray(testObject),
-            "a TestObject");
+    assertEquals(stringValue, testCopy.stringValue);
+    assertEquals(intValue, testCopy.intValue);
+  }
 
-    Assert.assertThat(copy, new IsInstanceOf(TestClass.class));
-    TestClass testCopy = (TestClass) copy;
+  @Test
+  public void testDeserializationError() {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("unable to deserialize a bogus string");
+    SerializableUtils.deserializeFromByteArray(
+        "this isn't legal".getBytes(),
+        "a bogus string");
+  }
 
-    Assert.assertEquals(stringValue, testCopy.stringValue);
-    Assert.assertEquals(intValue, testCopy.intValue);
+  /** A class which is not serializable by Java. */
+  private static class UnserializableByJava implements Serializable {
+    @SuppressWarnings("unused")
+    private Object unserializableField = new Object();
   }
 
   @Test
-  public void testDeserializationError() {
-    try {
-      SerializableUtils.deserializeFromByteArray(
-          "this isn't legal".getBytes(),
-          "a bogus string");
-      Assert.fail("should have thrown an exception");
-    } catch (Exception exn) {
-      Assert.assertThat(exn.toString(),
-                        CoreMatchers.containsString(
-                            "unable to deserialize a bogus string"));
+  public void testSerializationError() {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("unable to serialize");
+    SerializableUtils.serializeToByteArray(new UnserializableByJava());
+  }
+
+  /** A {@link Coder} which is not serializable by Java. */
+  private static class UnserializableCoderByJava extends StandardCoder<Object> {
+    private final Object unserializableField = new Object();
+
+    @Override
+    public void encode(Object value, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+    }
+
+    @Override
+    public Object decode(InputStream inStream, Context context)
+        throws CoderException, IOException {
+      return unserializableField;
+    }
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return ImmutableList.of();
+    }
+
+    @Override
+    public boolean isDeterministic() {
+      return true;
+    }
+  }
+
+  @Test
+  public void testEnsureSerializableWithUnserializableCoderByJava() {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("unable to serialize");
+    SerializableUtils.ensureSerializable(new UnserializableCoderByJava());
+  }
+
+  /** A {@link Coder} which is not serializable by Jackson. */
+  private static class UnserializableCoderByJackson extends StandardCoder<Object> {
+    private final SerializableByJava unserializableField;
+
+    public UnserializableCoderByJackson(SerializableByJava unserializableField) {
+      this.unserializableField = unserializableField;
+    }
+
+    @JsonCreator
+    public static UnserializableCoderByJackson of(
+        @JsonProperty("unserializableField") SerializableByJava unserializableField) {
+      return new UnserializableCoderByJackson(unserializableField);
+    }
+
+    @Override
+    public CloudObject asCloudObject() {
+      CloudObject result = super.asCloudObject();
+      result.put("unserializableField", unserializableField);
+      return result;
+    }
+
+    @Override
+    public void encode(Object value, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+    }
+
+    @Override
+    public Object decode(InputStream inStream, Context context)
+        throws CoderException, IOException {
+      return unserializableField;
+    }
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return ImmutableList.of();
+    }
+
+    @Override
+    public boolean isDeterministic() {
+      return true;
     }
   }
+
+  @Test
+  public void testEnsureSerializableWithUnserializableCoderByJackson() throws Exception {
+    expectedException.expect(RuntimeException.class);
+    expectedException.expectMessage("Unable to deserialize Coder:");
+    SerializableUtils.ensureSerializable(
+        new UnserializableCoderByJackson(new SerializableByJava("TestData", 5)));
+  }
+
 }

From f1e07218359cbdc8ce24a9985bdda696b3ee3f70 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 13 Feb 2015 16:15:21 -0800
Subject: [PATCH 0162/1541] Remove code duplication and standardize on the same
 exception formatter for job messages and log statements.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86313678
---
 .../sdk/runners/worker/DataflowWorker.java    | 31 ++-----------------
 .../DataflowWorkerLoggingFormatter.java       |  3 +-
 .../runners/worker/DataflowWorkerTest.java    | 10 +++++-
 3 files changed, 13 insertions(+), 31 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index fcd6ccd618a4e..13003108a022d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -27,6 +27,7 @@
 import com.google.api.services.dataflow.model.WorkItemServiceState;
 import com.google.api.services.dataflow.model.WorkItemStatus;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
+import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingFormatter;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudCounterUtils;
 import com.google.cloud.dataflow.sdk.util.CloudMetricUtils;
@@ -43,8 +44,6 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.io.PrintWriter;
-import java.io.StringWriter;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@@ -117,7 +116,7 @@ private boolean doWork(WorkItem workItem) throws IOException {
         worker = SourceOperationExecutorFactory.create(options, workItem.getSourceOperationTask());
 
       } else {
-        throw new RuntimeException("unknown kind of work item: " + workItem.toString());
+        throw new RuntimeException("Unknown kind of work item: " + workItem.toString());
       }
 
       DataflowWorkProgressUpdater progressUpdater =
@@ -186,37 +185,13 @@ private void handleWorkError(WorkItem workItem, WorkExecutor worker, Throwable e
     Status error = new Status();
     error.setCode(2); // Code.UNKNOWN.  TODO: Replace with a generated definition.
     // TODO: Attach the stack trace as exception details, not to the message.
-    error.setMessage(buildCloudStackTrace(t));
+    error.setMessage(DataflowWorkerLoggingFormatter.formatException(t));
 
     reportStatus(options, "Failure", workItem, worker == null ? null : worker.getOutputCounters(),
         worker == null ? null : worker.getOutputMetrics(), null/*sourceOperationResponse*/,
         error == null ? null : Collections.singletonList(error), 0);
   }
 
-  /**
-   * Recursively goes through an exception, pulling out the stack trace. If the
-   * exception is a chained exception, it recursively goes through any causes
-   * and appends them to the stack trace.
-   */
-  private static String buildCloudStackTrace(Throwable t) {
-    StringWriter result = new StringWriter();
-    PrintWriter printResult = new PrintWriter(result);
-
-    printResult.print("Exception: ");
-    for (;;) {
-      printResult.println(t.toString());
-      for (StackTraceElement frame : t.getStackTrace()) {
-        printResult.println(frame.toString());
-      }
-      t = t.getCause();
-      if (t == null) {
-        break;
-      }
-      printResult.print("Caused by: ");
-    }
-    return result.toString();
-  }
-
   private void reportStatus(DataflowWorkerHarnessOptions options, String status, WorkItem workItem,
       @Nullable CounterSet counters, @Nullable Collection<Metric<?>> metrics,
       @Nullable SourceFormat.OperationResponse operationResponse, @Nullable List<Status> errors,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
index b6bb0ce960173..9f748eb05228b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
@@ -85,7 +85,6 @@ public static String getWorkId() {
     return workId.get();
   }
 
-
   @Override
   public String format(LogRecord record) {
     String exception = formatException(record.getThrown());
@@ -107,7 +106,7 @@ public String format(LogRecord record) {
    * @param thrown The throwable to format.
    * @return A string containing the contents of {@link Throwable#printStackTrace()}.
    */
-  private String formatException(Throwable thrown) {
+  public static String formatException(Throwable thrown) {
     if (thrown == null) {
       return null;
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
index 037946c29a917..aee95ef0a28f9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
@@ -15,6 +15,7 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
 import static org.mockito.Matchers.argThat;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
@@ -24,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
 import com.google.cloud.dataflow.sdk.testing.FastNanoClockAndSleeper;
 
+import org.hamcrest.CoreMatchers;
 import org.hamcrest.Description;
 import org.hamcrest.Matcher;
 import org.hamcrest.TypeSafeMatcher;
@@ -79,8 +81,14 @@ public void describeTo(Description description) {
 
       @Override
       protected boolean matchesSafely(WorkItemStatus status) {
-        return status.getCompleted() && !status.getErrors().isEmpty();
+        boolean returnValue = status.getCompleted() && !status.getErrors().isEmpty();
+        if (returnValue) {
+          assertThat(status.getErrors().get(0).getMessage(),
+              CoreMatchers.containsString("java.lang.RuntimeException: Unknown kind of work"));
+        }
+        return returnValue;
       }
     };
   }
 }
+

From e02cb7b49b5eb5d41f9512db189b60c559a4d503 Mon Sep 17 00:00:00 2001
From: tudorm <tudorm@google.com>
Date: Fri, 13 Feb 2015 16:38:59 -0800
Subject: [PATCH 0163/1541] Remove serializations of objects from precondition
 checks. [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=86315430

---
 .../cloud/dataflow/sdk/util/common/worker/ReadOperation.java  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index d7f3f479f0ee3..883107484919d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -246,8 +246,8 @@ public Reader.ForkResult requestFork(Reader.ForkRequest forkRequest) {
   private class ReaderObserver implements Observer {
     @Override
     public void update(Observable obs, Object obj) {
-      Preconditions.checkArgument(obs == reader, "unexpected observable" + obs);
-      Preconditions.checkArgument(obj instanceof Long, "unexpected parameter object: " + obj);
+      Preconditions.checkArgument(obs == reader, "unexpected observable");
+      Preconditions.checkArgument(obj instanceof Long, "unexpected parameter object");
       byteCount.addValue((long) obj);
     }
   }

From ac6787a6665d7922759d9e6970b6c5536a85f004 Mon Sep 17 00:00:00 2001
From: rfernand <rfernand@google.com>
Date: Tue, 17 Feb 2015 10:20:16 -0800
Subject: [PATCH 0164/1541] Explicitly throwing NoSuchElementException in the
 iterator.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86505113
---
 .../google/cloud/dataflow/sdk/util/common/worker/Reader.java   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
index 93a398907dd1e..bb0c0b25a9758 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
@@ -48,9 +48,10 @@ public interface ReaderIterator<T> extends AutoCloseable {
     /**
      * Returns the next element.
      *
+     * @throws IOException if attempting to access an element involves IO that fails
      * @throws NoSuchElementException if there are no more elements
      */
-    public T next() throws IOException;
+    public T next() throws IOException, NoSuchElementException;
 
     /**
      * Copies the current ReaderIterator.

From 37827d7b2fc4a1cf8789b02ee618ccd1ce7f4c79 Mon Sep 17 00:00:00 2001
From: relax <relax@google.com>
Date: Wed, 18 Feb 2015 19:04:00 -0800
Subject: [PATCH 0165/1541] The BigQuery insertion API does not allow more than
 500 rows per insertion request. Make sure that BigtableIO honors this limit.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86653226
---
 .../cloud/dataflow/sdk/util/BigQueryTableInserter.java    | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
index c241ee2f25912..0ae41a057b1b2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
@@ -45,7 +45,10 @@ public class BigQueryTableInserter {
   private static final Logger LOG = LoggerFactory.getLogger(BigQueryTableInserter.class);
 
   // Approximate amount of table data to upload per InsertAll request.
-  private static final long UPLOAD_BATCH_SIZE = 64 * 1024;
+  private static final long UPLOAD_BATCH_SIZE_BYTES = 64 * 1024;
+
+  // The maximum number of rows to upload per InsertAll request.
+  private static final long MAX_ROWS_PER_BATCH = 500;
 
   private final Bigquery client;
   private final TableReference ref;
@@ -92,7 +95,8 @@ public void insertAll(Iterator<TableRow> rowIterator,
       rows.add(out);
 
       dataSize += row.toString().length();
-      if (dataSize >= UPLOAD_BATCH_SIZE || !rowIterator.hasNext()) {
+      if (dataSize >= UPLOAD_BATCH_SIZE_BYTES || rows.size() >= MAX_ROWS_PER_BATCH ||
+          !rowIterator.hasNext()) {
         TableDataInsertAllRequest content = new TableDataInsertAllRequest();
         content.setRows(rows);
 

From 8f800a7a2b013406a2c81df4d2a7d390b899bb84 Mon Sep 17 00:00:00 2001
From: mariand <mariand@google.com>
Date: Wed, 18 Feb 2015 19:08:34 -0800
Subject: [PATCH 0166/1541] StateSampler: Atomically set the thread state
 rather than using locks.

Improves S02 in WordCount by ~10%.

Also, removed unused stack dumping and cleaned up the member fields.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86653445
---
 .../sdk/util/common/worker/StateSampler.java  | 151 +++++-------------
 .../util/common/worker/StateSamplerTest.java  |  32 ++--
 2 files changed, 51 insertions(+), 132 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
index 91d90e9d2a05f..59d21b93aa4bd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
@@ -19,51 +19,52 @@
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 
-import java.util.AbstractMap.SimpleEntry;
 import java.util.ArrayList;
 import java.util.HashMap;
-import java.util.Map;
 import java.util.Random;
 import java.util.Timer;
 import java.util.TimerTask;
 
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import javax.annotation.concurrent.ThreadSafe;
+
 /**
  * A StateSampler object may be used to obtain an approximate
  * breakdown of the time spent by an execution context in various
  * states, as a fraction of the total time.  The sampling is taken at
  * regular intervals, with adjustment for scheduling delay.
- *
- * <p> Thread-safe.
  */
+@ThreadSafe
 public class StateSampler extends TimerTask implements AutoCloseable {
-  private final String prefix;
-  private CounterSet.AddCounterMutator counterSetMutator;
-  // Sampling period of internal Timer (thread).
-  public final long samplingPeriodMs;
-  public static final int DO_NOT_SAMPLE = -1;
   public static final long DEFAULT_SAMPLING_PERIOD_MS = 200;
-  // Array of counters indexed by their state.
+
+  private final String prefix;
+  private final CounterSet.AddCounterMutator counterSetMutator;
+
+  /** Array of counters indexed by their state. */
   private ArrayList<Counter<Long>> countersByState = new ArrayList<>();
-  // Map of state name to state.
+
+  /** Map of state name to state. */
   private HashMap<String, Integer> statesByName = new HashMap<>();
-  // The current state.
-  private int currentState;
-  // The timestamp corresponding to the last state change or the last
-  // time the current state was sampled (and recorded).
-  private long stateTimestamp = 0;
 
-  // When sampling this state, a stack trace is also logged.
-  private int stateToSampleThreadStacks = DO_NOT_SAMPLE;
-  // The thread that performed the last state transition.
-  private Thread sampledThread = null;
-  // The frequency with which the stack traces are logged, with respect
-  // to the sampling period.
-  private static final int SAMPLE_THREAD_STACK_FREQ = 10;
-  private int sampleThreadStackFreq = 0;
+  /** The current state. */
+  private final AtomicInteger currentState;
+
+  /** Special value of {@code currentState} which we do not sample. */
+  private static final int DO_NOT_SAMPLE = -1;
+
+  /**
+   * The timestamp (in nanoseconds) corresponding to the last time the
+   * state was sampled (and recorded).
+   */
+  private long stateTimestampNs = 0;
 
-  // Using a fixed number of timers for all StateSampler objects.
+  /** Using a fixed number of timers for all StateSampler objects. */
   private static final int NUM_TIMER_THREADS = 16;
-  // The timers is used for periodically sampling the states.
+
+  /** The timers are used for periodically sampling the states. */
   private static Timer[] timers = new Timer[NUM_TIMER_THREADS];
   static {
     for (int i = 0; i < timers.length; ++i) {
@@ -86,13 +87,12 @@ public StateSampler(String prefix,
                       long samplingPeriodMs) {
     this.prefix = prefix;
     this.counterSetMutator = counterSetMutator;
-    this.samplingPeriodMs = samplingPeriodMs;
-    currentState = DO_NOT_SAMPLE;
+    currentState = new AtomicInteger(DO_NOT_SAMPLE);
     Random rand = new Random();
     int initialDelay = rand.nextInt((int) samplingPeriodMs);
     timers[rand.nextInt(NUM_TIMER_THREADS)].scheduleAtFixedRate(
         this, initialDelay, samplingPeriodMs);
-    stateTimestamp = System.currentTimeMillis();
+    stateTimestampNs = System.nanoTime();
   }
 
   /**
@@ -109,37 +109,17 @@ public StateSampler(String prefix,
     this(prefix, counterSetMutator, DEFAULT_SAMPLING_PERIOD_MS);
   }
 
-  private void printStackTrace(Thread thread) {
-    System.out.println("Sampled stack trace:");
-    StackTraceElement[] stack = thread.getStackTrace();
-    for (StackTraceElement elem : stack) {
-      System.out.println("\t" + elem.toString());
-    }
-  }
-
-  /**
-   * Selects a state for which the thread stacks will also be logged
-   * during the sampling.  Useful for debugging.
-   *
-   * @param state name of the selected state
-   */
-  public synchronized void setStateToSampleThreadStacks(int state) {
-    stateToSampleThreadStacks = state;
-  }
-
   @Override
-  public synchronized void run() {
-    long now = System.currentTimeMillis();
-    if (currentState != DO_NOT_SAMPLE) {
-      countersByState.get(currentState).addValue(now - stateTimestamp);
-      if (sampledThread != null
-          && currentState == stateToSampleThreadStacks
-          && ++sampleThreadStackFreq >= SAMPLE_THREAD_STACK_FREQ) {
-        printStackTrace(sampledThread);
-        sampleThreadStackFreq = 0;
+  public void run() {
+    long startTimestampNs = System.nanoTime();
+    int state = currentState.get();
+    if (state != DO_NOT_SAMPLE) {
+      synchronized (this) {
+        countersByState.get(state).addValue(
+            TimeUnit.NANOSECONDS.toMillis(startTimestampNs - stateTimestampNs));
       }
     }
-    stateTimestamp = now;
+    stateTimestampNs = startTimestampNs;
   }
 
   @Override
@@ -180,15 +160,8 @@ public int stateForName(String name) {
    * @param state the new state to transition to
    * @return the previous state
    */
-  public synchronized int setState(int state) {
-    // TODO: investigate whether this can be made cheaper, (e.g.,
-    // using atomic operations).
-    int previousState = currentState;
-    currentState = state;
-    if (stateToSampleThreadStacks != DO_NOT_SAMPLE) {
-      sampledThread = Thread.currentThread();
-    }
-    return previousState;
+  public int setState(int state) {
+    return currentState.getAndSet(state);
   }
 
   /**
@@ -197,39 +170,10 @@ public synchronized int setState(int state) {
    * @param name the name of the new state to transition to
    * @return the previous state
    */
-  public synchronized int setState(String name) {
+  public int setState(String name) {
     return setState(stateForName(name));
   }
 
-  /**
-   *  Returns a tuple consisting of the current state and duration.
-   *
-   * @return a {@link Map.Entry} entry with current state and duration
-   */
-  public synchronized Map.Entry<String, Long> getCurrentStateAndDuration() {
-    if (currentState == DO_NOT_SAMPLE) {
-      return new SimpleEntry<>("", 0L);
-    }
-
-    Counter<Long> counter = countersByState.get(currentState);
-    return new SimpleEntry<>(counter.getName(),
-        counter.getAggregate(false)
-        + System.currentTimeMillis() - stateTimestamp);
-  }
-
-  /**
-   * Get the duration for a given state.
-   *
-   * @param state the state whose duration is returned
-   * @return the duration of a given state
-   */
-  public synchronized long getStateDuration(int state) {
-    Counter<Long> counter = countersByState.get(state);
-    return counter.getAggregate(false)
-        + (state == currentState
-            ? System.currentTimeMillis() - stateTimestamp : 0);
-  }
-
   /**
    * Returns an AutoCloseable {@link ScopedState} that will perform a
    * state transition to the given state, and will automatically reset
@@ -239,23 +183,10 @@ public synchronized long getStateDuration(int state) {
    * @return a {@link ScopedState} that automatically resets the state
    * to the prior state
    */
-  public synchronized ScopedState scopedState(int state) {
+  public ScopedState scopedState(int state) {
     return new ScopedState(this, setState(state));
   }
 
-  /**
-   * Returns an AutoCloseable {@link ScopedState} that will perform a
-   * state transition to the given state, and will automatically reset
-   * the state to the prior state upon closing.
-   *
-   * @param stateName the name of the new state
-   * @return a {@link ScopedState} that automatically resets the state
-   * to the prior state
-   */
-  public synchronized ScopedState scopedState(String stateName) {
-    return new ScopedState(this, setState(stateName));
-  }
-
   /**
    * A nested class that is used to account for states and state
    * transitions based on lexical scopes.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
index 291efe14a5546..18e2103840372 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
-import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.util.common.Counter;
@@ -26,14 +25,15 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-import java.util.AbstractMap.SimpleEntry;
-import java.util.Map;
-
 /**
  * Unit tests for the {@link Counter} API.
  */
 @RunWith(JUnit4.class)
 public class StateSamplerTest {
+  public static long getCounterLongValue(CounterSet counters, String name) {
+    Counter<Long> counter = (Counter<Long>) counters.getExistingCounter(name);
+    return counter.getAggregate(false);
+  }
 
   @Test
   public void basicTest() throws InterruptedException {
@@ -45,9 +45,6 @@ public void basicTest() throws InterruptedException {
     int state1 = stateSampler.stateForName("1");
     int state2 = stateSampler.stateForName("2");
 
-    assertEquals(new SimpleEntry<>("", 0L),
-        stateSampler.getCurrentStateAndDuration());
-
     try (StateSampler.ScopedState s1 =
       stateSampler.scopedState(state1)) {
       assert s1 != null;
@@ -60,8 +57,8 @@ public void basicTest() throws InterruptedException {
       Thread.sleep(3 * periodMs);
     }
 
-    long s1 = stateSampler.getStateDuration(state1);
-    long s2 = stateSampler.getStateDuration(state2);
+    long s1 = getCounterLongValue(counters, "test-1-msecs");
+    long s2 = getCounterLongValue(counters, "test-2-msecs");
 
     System.out.println("basic s1: " + s1);
     System.out.println("basic s2: " + s2);
@@ -82,9 +79,6 @@ public void nestingTest() throws InterruptedException {
     int state2 = stateSampler.stateForName("2");
     int state3 = stateSampler.stateForName("3");
 
-    assertEquals(new SimpleEntry<>("", 0L),
-        stateSampler.getCurrentStateAndDuration());
-
     try (StateSampler.ScopedState s1 =
         stateSampler.scopedState(state1)) {
       assert s1 != null;
@@ -105,9 +99,9 @@ public void nestingTest() throws InterruptedException {
       Thread.sleep(periodMs);
     }
 
-    long s1 = stateSampler.getStateDuration(state1);
-    long s2 = stateSampler.getStateDuration(state2);
-    long s3 = stateSampler.getStateDuration(state3);
+    long s1 = getCounterLongValue(counters, "test-1-msecs");
+    long s2 = getCounterLongValue(counters, "test-2-msecs");
+    long s3 = getCounterLongValue(counters, "test-3-msecs");
 
     System.out.println("s1: " + s1);
     System.out.println("s2: " + s2);
@@ -128,17 +122,11 @@ public void nonScopedTest() throws InterruptedException {
     int state1 = stateSampler.stateForName("1");
     int previousState = stateSampler.setState(state1);
     Thread.sleep(2 * periodMs);
-    Map.Entry<String, Long> currentStateAndDuration =
-        stateSampler.getCurrentStateAndDuration();
     stateSampler.setState(previousState);
-    assertEquals("test-1-msecs", currentStateAndDuration.getKey());
     long tolerance = periodMs;
-    long s = currentStateAndDuration.getValue();
+    long s = getCounterLongValue(counters, "test-1-msecs");
     System.out.println("s: " + s);
     assertTrue(s >= periodMs - tolerance);
     assertTrue(s <= 4 * periodMs + tolerance);
-
-    assertTrue(stateSampler.getCurrentStateAndDuration()
-        .getKey().isEmpty());
   }
 }

From 6bfb3e2371c11fa2d0931e08a2e1355cc7cd5c73 Mon Sep 17 00:00:00 2001
From: relax <relax@google.com>
Date: Thu, 19 Feb 2015 01:21:54 -0800
Subject: [PATCH 0167/1541] Cleanup BigQueryIO class. Remove the logic to
 automatically serialize/deserialize TableRowObjects and replace it with a
 transient member variable. Replace ThreadLocal HashSet with
 ConcurrentHashMap.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86670376
---
 .../cloud/dataflow/sdk/io/BigQueryIO.java     | 132 ++++++------------
 1 file changed, 40 insertions(+), 92 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 11ad6f71e82f6..dca65ad9f592e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -44,12 +44,12 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.io.Serializable;
 import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.Iterator;
+import java.util.Collections;
 import java.util.List;
+import java.util.Set;
 import java.util.UUID;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ThreadLocalRandom;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.regex.Matcher;
@@ -638,62 +638,6 @@ public boolean getValidate() {
    */
   private static class StreamingWriteFn
       extends DoFn<KV<Integer, KV<String, TableRow>>, Void> implements DoFn.RequiresKeyedState {
-    /**
-     * Class to accumulate BigQuery row data as a list of String.
-     * DoFn implementation must be Serializable, but BigQuery classes,
-     * such as TableRow are not.  Therefore, convert into JSON String
-     * for accumulation.
-     */
-    private static class JsonTableRows implements Iterable<TableRow>, Serializable {
-      /** The list where BigQuery row data is accumulated. */
-      private final List<String> jsonRows = new ArrayList<>();
-
-      /** Iterator of JsonTableRows converts the row in String to TableRow. */
-      static class JsonTableRowIterator implements Iterator<TableRow> {
-        private final Iterator<String> iteratorInternal;
-
-        /** Constructor. */
-        JsonTableRowIterator(List<String> jsonRowList) {
-          iteratorInternal = jsonRowList.iterator();
-        }
-
-        @Override
-        public boolean hasNext() {
-          return iteratorInternal.hasNext();
-        }
-
-        @Override
-        public TableRow next() {
-          try {
-            // Converts the String back into TableRow.
-            return JSON_FACTORY.fromString(iteratorInternal.next(), TableRow.class);
-          } catch (IOException e) {
-            throw new RuntimeException(e);
-          }
-        }
-
-        @Override
-        public void remove() {
-          iteratorInternal.remove();
-        }
-      }
-
-      /** Returns the iterator. */
-      @Override
-      public Iterator<TableRow> iterator() {
-        return new JsonTableRowIterator(jsonRows);
-      }
-
-      /** Adds a BigQuery TableRow. */
-      void add(TableRow row) {
-        try {
-          // Converts into JSON format.
-          jsonRows.add(JSON_FACTORY.toString(row));
-        } catch (IOException e) {
-          throw new RuntimeException(e);
-        }
-      }
-    }
 
     /** TableReference in JSON.  Use String to make the class Serializable. */
     private final String jsonTableReference;
@@ -701,20 +645,18 @@ void add(TableRow row) {
     /** TableSchema in JSON.  Use String to make the class Serializable. */
     private final String jsonTableSchema;
 
+    private transient TableReference tableReference;
+
     /** JsonTableRows to accumulate BigQuery rows. */
-    private JsonTableRows jsonTableRows;
+    private transient List<TableRow> tableRows;
 
     /** The list of unique ids for each BigQuery table row. */
-    private List<String> uniqueIdsForTableRows;
+    private transient List<String> uniqueIdsForTableRows;
 
     /** The list of tables created so far, so we don't try the creation
         each time. */
-    private static ThreadLocal<HashSet<String>> createdTables = new ThreadLocal<HashSet<String>>() {
-      @Override
-      protected HashSet<String> initialValue() {
-        return new HashSet<>();
-      }
-    };
+    private static Set<String> createdTables =
+        Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>());
 
     /** Constructor. */
     StreamingWriteFn(TableReference table, TableSchema schema) {
@@ -729,27 +671,25 @@ protected HashSet<String> initialValue() {
     /** Prepares a target BigQuery table. */
     @Override
     public void startBundle(Context context) {
-      jsonTableRows = new JsonTableRows();
+      tableRows = new ArrayList<>();
       uniqueIdsForTableRows = new ArrayList<>();
       BigQueryOptions options = context.getPipelineOptions().as(BigQueryOptions.class);
-      Bigquery client = Transport.newBigQueryClient(options).build();
 
       // TODO: Support table sharding and the better place to initialize
-      //     BigQuery table.
-      HashSet<String> tables = createdTables.get();
-      if (!tables.contains(jsonTableSchema)) {
-        try {
-          TableSchema tableSchema = JSON_FACTORY.fromString(jsonTableSchema, TableSchema.class);
-          TableReference tableReference =
-              JSON_FACTORY.fromString(jsonTableReference, TableReference.class);
-
+      // BigQuery table.
+      try {
+        tableReference =
+            JSON_FACTORY.fromString(jsonTableReference, TableReference.class);
 
+        if (!createdTables.contains(jsonTableSchema)) {
+          TableSchema tableSchema = JSON_FACTORY.fromString(jsonTableSchema, TableSchema.class);
+          Bigquery client = Transport.newBigQueryClient(options).build();
           BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
           inserter.tryCreateTable(tableSchema);
-          tables.add(jsonTableSchema);
-        } catch (IOException e) {
-          throw new RuntimeException(e);
+          createdTables.add(jsonTableSchema);
         }
+      } catch (IOException e) {
+        throw new RuntimeException(e);
       }
     }
 
@@ -757,25 +697,33 @@ public void startBundle(Context context) {
     @Override
     public void processElement(ProcessContext context) {
       KV<Integer, KV<String, TableRow>> kv = context.element();
-      TableRow tableRow = kv.getValue().getValue();
-      uniqueIdsForTableRows.add(kv.getValue().getKey());
-      jsonTableRows.add(tableRow);
+      addRow(kv.getValue().getValue(), kv.getValue().getKey());
     }
 
     /** Writes the accumulated rows into BigQuery with streaming API. */
     @Override
     public void finishBundle(Context context) {
-      BigQueryOptions options = context.getPipelineOptions().as(BigQueryOptions.class);
-      Bigquery client = Transport.newBigQueryClient(options).build();
+      flushRows(context.getPipelineOptions().as(BigQueryOptions.class));
+    }
 
-      try {
-        TableReference tableReference =
-            JSON_FACTORY.fromString(jsonTableReference, TableReference.class);
+    /** Accumulate a row to be written to BigQuery. */
+    private void addRow(TableRow tableRow, String uniqueId) {
+      uniqueIdsForTableRows.add(uniqueId);
+      tableRows.add(tableRow);
+    }
 
-        BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
-        inserter.insertAll(jsonTableRows.iterator(), uniqueIdsForTableRows.iterator());
-      } catch (IOException e) {
-        throw new RuntimeException(e);
+    /** Writes the accumulated rows into BigQuery with streaming API. */
+    private void flushRows(BigQueryOptions options) {
+      if (!tableRows.isEmpty()) {
+        Bigquery client = Transport.newBigQueryClient(options).build();
+        try {
+          BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
+          inserter.insertAll(tableRows.iterator(), uniqueIdsForTableRows.iterator());
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+        tableRows.clear();
+        uniqueIdsForTableRows.clear();
       }
     }
   }

From f724a169065e77ec42f1282848714dd7c56a156d Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 19 Feb 2015 10:30:13 -0800
Subject: [PATCH 0168/1541] Removes notes about streaming requiring
 whitelisting

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86703478
---
 .../google/cloud/dataflow/sdk/io/PubsubIO.java    | 12 ++++++------
 .../dataflow/sdk/options/StreamingOptions.java    | 15 ++-------------
 2 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 2414feeb34f9f..a405a7547ef19 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -29,14 +29,14 @@
 import java.util.regex.Pattern;
 
 /**
- * [Whitelisting Required] Read and Write transforms for Pub/Sub streams. These transforms create
+ * Read and Write transforms for Pub/Sub streams. These transforms create
  * and consume unbounded {@link com.google.cloud.dataflow.sdk.values.PCollection}s.
  *
- * <p> <b>Important:</b> PubsubIO is experimental. It is not supported by the
- * {@link com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner} and is only supported in the
- * {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner} for users whitelisted in a
- * streaming early access program and who enable
- * {@link com.google.cloud.dataflow.sdk.options.StreamingOptions#setStreaming(boolean)}.
+ * <p> {@code PubsubIO} is experimental.  It is only usable
+ * with the {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner}
+ * and requires
+ * {@link com.google.cloud.dataflow.sdk.options.StreamingOptions#setStreaming(boolean)}
+ * to be enabled.
  *
  * <p> You should expect this class to change significantly in future versions of the SDK
  * or be removed entirely.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java
index 6deca04b4888f..56db83da2a548 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java
@@ -17,22 +17,11 @@
 package com.google.cloud.dataflow.sdk.options;
 
 /**
- * [Whitelisting Required] Options used to configure the streaming backend.
- *
- * <p> <b>Important:</b> Streaming support is experimental. It is only supported in the
- * {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner} for users whitelisted in a
- * streaming early access program.
- *
- * <p> You should expect this class to change significantly in future
- * versions of the SDK or be removed entirely.
+ * Options used to configure the streaming backend.
  */
 public interface StreamingOptions extends
     ApplicationNameOptions, GcpOptions, PipelineOptions {
-  /**
-   * Note that this feature is currently experimental and only available to users whitelisted in
-   * a streaming early access program.
-   */
-  @Description("True if running in streaming mode (experimental)")
+  @Description("True if running in streaming mode")
   boolean isStreaming();
   void setStreaming(boolean value);
 }

From 25c3a7e548d188dc4beeb29d69bfacaf0707482a Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Thu, 19 Feb 2015 11:10:53 -0800
Subject: [PATCH 0169/1541] Slightly stronger coder inference.

Now a TemplateType<X, List<X>> can infer either argument's Coder from the other.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86707653
---
 .../cloud/dataflow/sdk/coders/CoderRegistry.java  |  6 +++---
 .../cloud/dataflow/sdk/coders/StandardCoder.java  |  2 +-
 .../dataflow/sdk/coders/CoderRegistryTest.java    | 15 +++++++++++----
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 37128918b5d71..ec2ff2579cdc1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -413,7 +413,7 @@ public <T> Coder<?>[] getDefaultCoders(
               "Cannot encode elements of type " + typeArgs[i]
                   + " with " + knownCoders[i]);
         }
-        context.put(typeArgs[i], knownCoders[i]);
+        fillTypeBindings(typeArgs[i], knownCoders[i], context);
       }
     }
     Coder<?>[] result = new Coder<?>[typeArgs.length];
@@ -574,7 +574,7 @@ Coder<?> getDefaultCoder(Type type, Map<Type, Coder<?>> typeCoderBindings) {
       return getDefaultCoder((Class) type);
     } else if (type instanceof ParameterizedType) {
       return this.getDefaultCoder((ParameterizedType) type,
-                                     typeCoderBindings);
+                                  typeCoderBindings);
     } else if (type instanceof TypeVariable
         || type instanceof WildcardType) {
       // No default coder for an unknown generic type.
@@ -666,7 +666,7 @@ Map<Type, Coder<?>> createTypeBindings(TypeToken<?> typeToken,
   void fillTypeBindings(Type type,
                         Coder<?> coder,
                         Map<Type, Coder<?>> typeCoderBindings) {
-    if (type instanceof TypeVariable) {
+    if (type instanceof TypeVariable || type instanceof Class) {
       LOG.debug("Binding type {} to Coder {}", type, coder);
       typeCoderBindings.put(type, coder);
     } else if (type instanceof ParameterizedType) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
index 7a35fdcafbf27..9f475a3d9a786 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
@@ -53,7 +53,7 @@ public List<? extends Coder<?>> getComponents() {
 
   @Override
   public boolean equals(Object o) {
-    if (this.getClass() != o.getClass()) {
+    if (o == null || this.getClass() != o.getClass()) {
       return false;
     }
     StandardCoder<?> that = (StandardCoder<?>) o;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index a05816c776886..2c411fae42316 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -92,28 +92,35 @@ public void testTemplateInference() {
     CoderRegistry registry = getStandardRegistry();
     MyTemplateClass<MyValue, List<MyValue>> instance =
         new MyTemplateClass<MyValue, List<MyValue>>() {};
-    Coder<List<MyValue>> expected = ListCoder.of(MyValueCoder.of());
+    Coder<List<MyValue>> listCoder = ListCoder.of(MyValueCoder.of());
 
     // The map method operates on parameter names.
     Map<String, Coder<?>> coderMap = registry.getDefaultCoders(
         instance.getClass(),
         MyTemplateClass.class,
         Collections.singletonMap("A", MyValueCoder.of()));
-    assertEquals(expected, coderMap.get("B"));
+    assertEquals(listCoder, coderMap.get("B"));
+
+    // Check we can infer the other direction as well.
+    Map<String, Coder<?>> coderMap2 = registry.getDefaultCoders(
+        instance.getClass(),
+        MyTemplateClass.class,
+        Collections.singletonMap("B", listCoder));
+    assertEquals(MyValueCoder.of(), coderMap2.get("A"));
 
     // The array interface operates on position.
     Coder<?>[] coders = registry.getDefaultCoders(
         instance.getClass(),
         MyTemplateClass.class,
         new Coder<?>[] { MyValueCoder.of(), null });
-    assertEquals(expected, coders[1]);
+    assertEquals(listCoder, coders[1]);
 
     // The "last argument" coder handles a common case.
     Coder<List<MyValueCoder>> actual = registry.getDefaultCoder(
         instance.getClass(),
         MyTemplateClass.class,
         MyValueCoder.of());
-    assertEquals(expected, actual);
+    assertEquals(listCoder, actual);
 
     try {
       registry.getDefaultCoder(

From 1ba2a06a3da3fd1992d49250fed880375b81d883 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 20 Feb 2015 11:36:07 -0800
Subject: [PATCH 0170/1541] Add javadoc to GroupByKey explaining what will
 happen in the presence of late data

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86806759
---
 .../com/google/cloud/dataflow/sdk/io/PubsubIO.java     | 10 +++++++++-
 .../cloud/dataflow/sdk/transforms/GroupByKey.java      |  5 +++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index a405a7547ef19..667653deea115 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -195,7 +195,10 @@ public static Bound subscription(String subscription) {
      * the first time it sees each record. All windowing will be done relative to these timestamps.
      * Windows are closed based on an estimate of when this source has finished producing data for
      * a timestamp range, which means that late data can arrive after a window has been closed. The
-     * {#dropLateData} field allows you to control what to do with late data.
+     * {#dropLateData} field allows you to control what to do with late data.  The relaxes the
+     * semantics of {@code GroupByKey}; see
+     * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} for additional information on
+     * late data and windowing.
      */
     public static Bound timestampLabel(String timestampLabel) {
       return new Bound().timestampLabel(timestampLabel);
@@ -203,6 +206,11 @@ public static Bound timestampLabel(String timestampLabel) {
 
     /**
      * If true, then late-arriving data from this source will be dropped.
+     *
+     * <p> If late data is not dropped, data for a window can arrive after that window has already
+     * been closed.  The relaxes the semantics of {@code GroupByKey}; see
+     * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}
+     * for additional information on late data and windowing.
      */
     public static Bound dropLateData(boolean dropLateData) {
       return new Bound().dropLateData(dropLateData);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 4d11723c99638..471229f308e9a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -107,6 +107,11 @@
  * in the window that it corresponds to.  The output {@code PCollection} will
  * have the same {@link WindowFn} as the input.
  *
+ * <p> If the input {@code PCollection} contains late data (see
+ * {@link com.google.cloud.dataflow.sdk.PubsubIO.Read.Bound#timestampLabel}
+ * for an example of how this can occur), then there may be multiple elements
+ * output by a {@code GroupByKey} that correspond to the same key and window.
+ *
  * <p> If the {@link WindowFn} of the input requires merging, it is not
  * valid to apply another {@code GroupByKey} without first applying a new
  * {@link WindowFn}.

From 81a4c3cc0629bec78358170e409ece21239c15ba Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 20 Feb 2015 13:21:05 -0800
Subject: [PATCH 0171/1541] Add Coder#verifyDeterministic() to document the
 reasons for non-determinism. Implementations should overload to explain why
 they are non-determinstic. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=86815676

---
 .../cloud/dataflow/sdk/coders/Coder.java      | 70 +++++++++++++++++++
 .../dataflow/sdk/coders/CustomCoder.java      |  1 +
 .../dataflow/sdk/coders/DelegateCoder.java    |  6 ++
 .../dataflow/sdk/coders/DoubleCoder.java      |  7 ++
 .../dataflow/sdk/coders/EntityCoder.java      |  7 ++
 .../dataflow/sdk/coders/InstantCoder.java     |  5 ++
 .../sdk/coders/IterableLikeCoder.java         |  7 ++
 .../cloud/dataflow/sdk/coders/KvCoder.java    |  7 ++
 .../cloud/dataflow/sdk/coders/ListCoder.java  |  8 +++
 .../cloud/dataflow/sdk/coders/MapCoder.java   |  7 ++
 .../sdk/coders/SerializableCoder.java         |  7 ++
 .../cloud/dataflow/sdk/coders/SetCoder.java   |  7 ++
 .../dataflow/sdk/coders/StandardCoder.java    | 37 ++++++++++
 .../dataflow/sdk/coders/StringUtf8Coder.java  |  4 ++
 .../sdk/coders/TableRowJsonCoder.java         |  7 ++
 .../sdk/coders/TextualIntegerCoder.java       |  4 ++
 .../dataflow/sdk/coders/VarIntCoder.java      |  4 ++
 .../dataflow/sdk/coders/VarLongCoder.java     |  4 ++
 .../cloud/dataflow/sdk/coders/VoidCoder.java  |  4 ++
 .../sdk/transforms/ApproximateQuantiles.java  | 14 +++-
 .../cloud/dataflow/sdk/transforms/Count.java  |  2 +-
 .../dataflow/sdk/transforms/GroupByKey.java   |  9 ++-
 .../cloud/dataflow/sdk/transforms/Top.java    |  7 ++
 .../sdk/transforms/join/CoGbkResult.java      |  7 ++
 .../sdk/transforms/join/UnionCoder.java       |  8 +++
 .../dataflow/sdk/util/TimerOrElement.java     |  7 ++
 .../dataflow/sdk/util/WindowedValue.java      | 20 ++++++
 .../dataflow/sdk/values/TimestampedValue.java |  8 +++
 .../dataflow/sdk/coders/CoderProperties.java  | 12 ++--
 .../sdk/coders/CoderRegistryTest.java         |  4 ++
 .../sdk/coders/StringDelegateCoderTest.java   | 10 +--
 .../runners/worker/CombineValuesFnTest.java   |  3 +
 .../sdk/transforms/GroupByKeyTest.java        |  3 +-
 33 files changed, 301 insertions(+), 16 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
index 3760cb82003bb..c009e6ff7ac11 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
@@ -18,13 +18,18 @@
 
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
 
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.Serializable;
+import java.util.Arrays;
 import java.util.List;
 
+import javax.annotation.Nullable;
+
 /**
  * A {@code Coder<T>} defines how to encode and decode values of type {@code T} into byte streams.
  *
@@ -125,9 +130,31 @@ public T decode(InputStream inStream, Context context)
    *       same for an instance of an object even if produced on different
    *       computers at different times.
    * </ul>
+   *
+   * @deprecated {@link #verifyDeterministic()} should be used instead to
+   * produce explanations of why a given Coder is non-deterministic.
    */
+  @Deprecated
   public boolean isDeterministic();
 
+  /**
+   * Throw {@link NonDeterministicException} if the coding is not deterministic.
+   *
+   * <p> In order for a {@code Coder} to be considered deterministic,
+   * the following must be true:
+   * <ul>
+   *   <li>two values which compare as equal (via {@code Object.equals()}
+   *       or {@code Comparable.compareTo()}, if supported), have the same
+   *       encoding.
+   *   <li>the {@code Coder} always produces a canonical encoding, which is the
+   *       same for an instance of an object even if produced on different
+   *       computers at different times.
+   * </ul>
+   *
+   * @throws Coder.NonDeterministicException if this coder is not deterministic.
+   */
+  public void verifyDeterministic() throws Coder.NonDeterministicException;
+
   /**
    * Returns whether {@link #registerByteSizeObserver} cheap enough to
    * call for every element, that is, if this {@code Coder} can
@@ -151,4 +178,47 @@ public T decode(InputStream inStream, Context context)
   public void registerByteSizeObserver(
       T value, ElementByteSizeObserver observer, Context context)
       throws Exception;
+
+  /**
+   * Exception thrown by {@link Coder#verifyDeterministic()} if the encoding is
+   * not deterministic.
+   */
+  public static class NonDeterministicException extends Throwable {
+    private Coder<?> coder;
+    private List<String> reasons;
+
+    public NonDeterministicException(
+        Coder<?> coder, String reason, @Nullable NonDeterministicException e) {
+      this(coder, Arrays.asList(reason), e);
+    }
+
+    public NonDeterministicException(Coder<?> coder, String reason) {
+      this(coder, Arrays.asList(reason), null);
+    }
+
+    public NonDeterministicException(Coder<?> coder, List<String> reasons) {
+      this(coder, reasons, null);
+    }
+
+    public NonDeterministicException(
+        Coder<?> coder,
+        List<String> reasons,
+        @Nullable NonDeterministicException cause) {
+      super(cause);
+      Preconditions.checkArgument(reasons.size() > 0,
+          "Reasons must not be empty.");
+      this.reasons = reasons;
+      this.coder = coder;
+    }
+
+    public Iterable<String> getReasons() {
+      return reasons;
+    }
+
+    @Override
+    public String getMessage() {
+      return String.format("%s is not deterministic because:\n  %s",
+          coder, Joiner.on("\n  ").join(reasons));
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
index 6b31297a1071c..f7ce7003217a4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
@@ -73,6 +73,7 @@ public CloudObject asCloudObject() {
   }
 
   @Override
+  @Deprecated
   public boolean isDeterministic() {
     return false;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
index 9cc75167872fd..6d3f926145e02 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
@@ -66,10 +66,16 @@ public T decode(InputStream inStream, Context context) throws CoderException, IO
   }
 
   @Override
+  @Deprecated
   public boolean isDeterministic() {
     return coder.isDeterministic();
   }
 
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    coder.verifyDeterministic();
+  }
+
   @Override
   public String toString() {
     return "DelegateCoder(" + coder + ")";
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
index 1726ea9ed127d..ee647d4fbcc1a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
@@ -70,10 +70,17 @@ public Double decode(InputStream inStream, Context context)
    * recommended for use in operations which require deterministic inputs.
    */
   @Override
+  @Deprecated
   public boolean isDeterministic() {
     return false;
   }
 
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    throw new NonDeterministicException(this,
+        "Floating point encodings are not guaranteed to be deterministic.");
+  }
+
   /**
    * Returns true since registerByteSizeObserver() runs in constant time.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
index da9769303b702..fe24a55ce7257 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
@@ -77,7 +77,14 @@ protected long getEncodedElementByteSize(Entity value, Context context)
    * which makes the encoding non-deterministic.
    */
   @Override
+  @Deprecated
   public boolean isDeterministic() {
     return false;
   }
+
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    throw new NonDeterministicException(this,
+        "Datastore encodings can hold arbitrary Object instances");
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
index 5918eb220aef6..3ec04e61ba2d1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
@@ -19,6 +19,7 @@
 import com.fasterxml.jackson.annotation.JsonCreator;
 
 import org.joda.time.Instant;
+
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
@@ -55,7 +56,11 @@ public Instant decode(InputStream inStream, Context context)
   }
 
   @Override
+  @Deprecated
   public boolean isDeterministic() {
     return true;
   }
+
+  @Override
+  public void verifyDeterministic() { }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
index c9332853e267f..e8cfb29a9fc60 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
@@ -137,10 +137,17 @@ public List<? extends Coder<?>> getCoderArguments() {
    * while the encoding differs.
    */
   @Override
+  @Deprecated
   public boolean isDeterministic() {
     return false;
   }
 
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    throw new NonDeterministicException(this,
+        "IterableLikeCoder can not guarantee deterministic ordering.");
+  }
+
   /**
    * Returns whether iterable can use lazy counting, since that
    * requires minimal extra computation.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
index bf6e7c9d78892..f2276d9ea2120 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
@@ -102,10 +102,17 @@ public List<? extends Coder<?>> getCoderArguments() {
   }
 
   @Override
+  @Deprecated
   public boolean isDeterministic() {
     return getKeyCoder().isDeterministic() && getValueCoder().isDeterministic();
   }
 
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    verifyDeterministic("Key coder must be deterministic", getKeyCoder());
+    verifyDeterministic("Value coder must be deterministic", getValueCoder());
+  }
+
   @Override
   public CloudObject asCloudObject() {
     CloudObject result = super.asCloudObject();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
index f6f04b60d34ff..90685b8072cca 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
@@ -70,7 +70,15 @@ public static <T> List<Object> getInstanceComponents(List<T> exampleValue) {
    * the general IterableLikeCoder is not.
    */
   @Override
+  @Deprecated
   public boolean isDeterministic() {
     return getElemCoder().isDeterministic();
   }
+
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    verifyDeterministic(
+        "ListCoder.elemCoder must be deterministic", getElemCoder());
+  }
+
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
index 1e12ef347e70f..25e4a99d61168 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
@@ -126,10 +126,17 @@ public List<? extends Coder<?>> getCoderArguments() {
    * two HashMap instances may be equal but produce different encodings.
    */
   @Override
+  @Deprecated
   public boolean isDeterministic() {
     return false;
   }
 
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    throw new NonDeterministicException(this,
+        "Ordering of entries in a Map may be non-deterministic.");
+  }
+
   /**
    * Notifies ElementByteSizeObserver about the byte size of the
    * encoded value using this coder.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
index 303370aac0e51..b24b27e558a17 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
@@ -109,10 +109,17 @@ public CloudObject asCloudObject() {
   }
 
   @Override
+  @Deprecated
   public boolean isDeterministic() {
     return false;
   }
 
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    throw new NonDeterministicException(this,
+        "Java Serialization may be non-deterministic.");
+  }
+
   @Override
   public boolean equals(Object other) {
     if (getClass() != other.getClass()) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
index 9a65d39a0e403..88cae0a64c93a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
@@ -60,10 +60,17 @@ public static SetCoder<?> of(
    * two {@code HashSet} instances may be equal but produce different encodings.
    */
   @Override
+  @Deprecated
   public boolean isDeterministic() {
     return false;
   }
 
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    throw new NonDeterministicException(this,
+        "Ordering of elements in a set may be non-deterministic.");
+  }
+
   /**
    * Returns the first element in this set if it is non-empty,
    * otherwise returns {@code null}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
index 9f475a3d9a786..b5838908497e5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
@@ -24,9 +24,12 @@
 
 import java.io.ByteArrayOutputStream;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 
+import javax.annotation.Nullable;
+
 /**
  * A StandardCoder is one that defines equality, hashing, and printing
  * via the class name and recursively using {@link #getComponents}.
@@ -140,4 +143,38 @@ public void registerByteSizeObserver(
       throws Exception {
     observer.update(getEncodedElementByteSize(value, context));
   }
+
+  @SuppressWarnings("deprecation")
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    if (!isDeterministic()) {
+      throw new NonDeterministicException(this,
+          getClass().getSimpleName() + " reported it was not determinsitic.");
+    }
+  }
+
+  protected void verifyDeterministic(String message, Iterable<Coder<?>> coders)
+      throws NonDeterministicException {
+    for (Coder<?> coder : coders) {
+      try {
+        coder.verifyDeterministic();
+      } catch (NonDeterministicException e) {
+        throw new NonDeterministicException(this, message, e);
+      }
+    }
+  }
+
+  protected void verifyDeterministic(String message, Coder<?>... coders)
+      throws NonDeterministicException {
+    verifyDeterministic(message, Arrays.asList(coders));
+  }
+
+  protected void addReasons(String prefix, List<String> accumulator,
+      @Nullable List<String> newReasons) {
+    if (newReasons != null) {
+      for (String reason : newReasons) {
+        accumulator.add(prefix + reason);
+      }
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
index 48b807b6fcf8c..0c903501c0c73 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
@@ -105,10 +105,14 @@ public String decode(InputStream inStream, Context context)
   }
 
   @Override
+  @Deprecated
   public boolean isDeterministic() {
     return true;
   }
 
+  @Override
+  public void verifyDeterministic() { }
+
   protected long getEncodedElementByteSize(String value, Context context)
       throws Exception {
     if (value == null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
index 349ff323c1519..0ce713c29c256 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
@@ -74,7 +74,14 @@ private TableRowJsonCoder() { }
    * non-deterministic.
    */
   @Override
+  @Deprecated
   public boolean isDeterministic() {
     return false;
   }
+
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    throw new NonDeterministicException(this,
+        "TableCell can hold arbitrary instances which may be non-deterministic.");
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
index 92f343bd4d7e9..a78ae3778e1d7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
@@ -59,10 +59,14 @@ public Integer decode(InputStream inStream, Context context)
   }
 
   @Override
+  @Deprecated
   public boolean isDeterministic() {
     return true;
   }
 
+  @Override
+  public void verifyDeterministic() { }
+
   protected long getEncodedElementByteSize(Integer value, Context context)
       throws Exception {
     if (value == null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
index f357fce890284..e30a094273f19 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
@@ -68,10 +68,14 @@ public Integer decode(InputStream inStream, Context context)
   }
 
   @Override
+  @Deprecated
   public boolean isDeterministic() {
     return true;
   }
 
+  @Override
+  public void verifyDeterministic() { }
+
   /**
    * Returns true since registerByteSizeObserver() runs in constant time.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
index 50866383bca2a..56a20cf4093e7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
@@ -68,10 +68,14 @@ public Long decode(InputStream inStream, Context context)
   }
 
   @Override
+  @Deprecated
   public boolean isDeterministic() {
     return true;
   }
 
+  @Override
+  public void verifyDeterministic() { }
+
   /**
    * Returns true since registerByteSizeObserver() runs in constant time.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
index e949fdfd03484..a3bed700eb209 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
@@ -50,10 +50,14 @@ public Void decode(InputStream inStream, Context context) {
   }
 
   @Override
+  @Deprecated
   public boolean isDeterministic() {
     return true;
   }
 
+  @Override
+  public void verifyDeterministic() { }
+
   /**
    * Returns true since registerByteSizeObserver() runs in constant time.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
index 5702583ebe65c..3a583f639756d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
@@ -654,8 +654,20 @@ public void registerByteSizeObserver(
       }
 
       @Override
+      @Deprecated
       public boolean isDeterministic() {
-        return elementListCoder.isDeterministic();
+        return elementCoder.isDeterministic()
+            && elementListCoder.isDeterministic();
+      }
+
+      @Override
+      public void verifyDeterministic() throws NonDeterministicException{
+        verifyDeterministic(
+            "QuantileState.ElementCoder must be deterministic",
+            elementCoder);
+        verifyDeterministic(
+            "QuantileState.ElementListCoder must be deterministic",
+            elementListCoder);
       }
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
index b6e4561f2dde4..f5fceb29c5096 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
@@ -120,7 +120,7 @@ public void processElement(ProcessContext c) {
    * encoding each element using the input {@code PCollection}'s
    * {@code Coder}, then comparing the encoded bytes. Because of this,
    * the input coder must be deterministic. (See
-   * {@link com.google.cloud.dataflow.sdk.coders.Coder#isDeterministic()} for more detail).
+   * {@link com.google.cloud.dataflow.sdk.coders.Coder#verifyDeterministic()} for more detail).
    * Performing the comparison in this manner admits efficient parallel evaluation.
    *
    * <p> By default, the {@code Coder} of the keys of the output
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 471229f308e9a..f89b224fb9829 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -19,6 +19,7 @@
 import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
@@ -65,7 +66,7 @@
  * keys of the input {@code PCollection}, and then comparing the
  * encoded bytes.  This admits efficient parallel evaluation.  Note that
  * this requires that the {@code Coder} of the keys be deterministic (see
- * {@link Coder#isDeterministic()}).  If the key {@code Coder} is not
+ * {@link Coder#verifyDeterministic()}).  If the key {@code Coder} is not
  * deterministic, an exception is thrown at runtime.
  *
  * <p> By default, the {@code Coder} of the keys of the output
@@ -295,9 +296,11 @@ public void finishSpecifying() {
       // Verify that the input Coder<KV<K, V>> is a KvCoder<K, V>, and that
       // the key coder is deterministic.
       Coder<K> keyCoder = getKeyCoder();
-      if (!keyCoder.isDeterministic()) {
+      try {
+        keyCoder.verifyDeterministic();
+      } catch (NonDeterministicException e) {
         throw new IllegalStateException(
-            "the key Coder must be deterministic for grouping");
+            "the keyCoder of a GroupByKey must be deterministic", e);
       }
       if (getOutput().isOrdered()) {
         throw new IllegalStateException(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index c4fb15bd23232..bc0270f01bce7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -441,10 +441,17 @@ public Heap decode(InputStream inStream, Coder.Context context)
       }
 
       @Override
+      @Deprecated
       public boolean isDeterministic() {
         return listCoder.isDeterministic();
       }
 
+      @Override
+      public void verifyDeterministic() throws NonDeterministicException {
+        verifyDeterministic(
+            "HeapCoder requires a deterministic list coder", listCoder);
+      }
+
       @Override
       public boolean isRegisterByteSizeObserverCheap(
           Heap value, Context context) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
index 56fc3fc84bc0a..81b12d1c09792 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
@@ -234,9 +234,16 @@ public boolean equals(Object other) {
     }
 
     @Override
+    @Deprecated
     public boolean isDeterministic() {
       return mapCoder.isDeterministic();
     }
+
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException {
+      verifyDeterministic(
+          "CoGbkResult requires the mapCoder to be deterministic", mapCoder);
+    }
   }
 
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
index f3d128c67b9e1..a2b56c2f1cabb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
@@ -140,6 +140,7 @@ private UnionCoder(List<Coder<?>> elementCoders) {
   }
 
   @Override
+  @Deprecated
   public boolean isDeterministic() {
     for (Coder<?> elementCoder : elementCoders) {
       if (!elementCoder.isDeterministic()) {
@@ -149,4 +150,11 @@ public boolean isDeterministic() {
 
     return true;
   }
+
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    verifyDeterministic(
+        "UnionCoder is only deterministic if all element coders are",
+        elementCoders);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
index d0216d3075782..5303ca3ec844f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
@@ -155,11 +155,18 @@ public void registerByteSizeObserver(
       }
     }
 
+    @Deprecated
     @Override
     public boolean isDeterministic() {
       return elemCoder.isDeterministic();
     }
 
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException {
+      verifyDeterministic(
+          "TimerOrElementCoder requires a deterministic elemCoder", elemCoder);
+    }
+
     @Override
     public List<? extends Coder<?>> getCoderArguments() {
       return Arrays.asList(elemCoder);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index 4897a3e10c229..f724e440261d7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -275,10 +275,21 @@ public WindowedValue<T> decode(InputStream inStream, Context context)
     }
 
     @Override
+    @Deprecated
     public boolean isDeterministic() {
       return valueCoder.isDeterministic() && windowCoder.isDeterministic();
     }
 
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException {
+      verifyDeterministic(
+          "FullWindowedValueCoder requires a deterministic valueCoder",
+          valueCoder);
+      verifyDeterministic(
+          "FullWindowedValueCoder requires a deterministic windowCoder",
+          windowCoder);
+    }
+
     @Override
     public void registerByteSizeObserver(WindowedValue<T> value,
                                          ElementByteSizeObserver observer,
@@ -351,10 +362,19 @@ public WindowedValue<T> decode(InputStream inStream, Context context)
     }
 
     @Override
+    @Deprecated
     public boolean isDeterministic() {
       return valueCoder.isDeterministic();
     }
 
+
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException {
+      verifyDeterministic(
+          "ValueOnlyWindowedValueCoder requires a deterministic valueCoder",
+          valueCoder);
+    }
+
     @Override
     public void registerByteSizeObserver(
         WindowedValue<T> value, ElementByteSizeObserver observer, Context context)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
index ffa0202e15b2c..1886b7f0a9490 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
@@ -108,10 +108,18 @@ public TimestampedValue<T> decode(InputStream inStream, Context context)
     }
 
     @Override
+    @Deprecated
     public boolean isDeterministic() {
       return valueCoder.isDeterministic();
     }
 
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException {
+      verifyDeterministic(
+          "TimestampedValueCoder requires a deterministic valueCoder",
+          valueCoder);
+    }
+
     @Override
     public List<? extends Coder<?>> getCoderArguments() {
       return Arrays.<Coder<?>>asList(valueCoder);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
index 1b9200a9164de..f846418284173 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
@@ -21,13 +21,13 @@
 import static org.hamcrest.Matchers.emptyIterable;
 import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
 
+import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.Serializer;
 import com.google.common.collect.Iterables;
 
-import org.hamcrest.CoreMatchers;
-
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
@@ -71,8 +71,12 @@ public static <T> void coderDeterministic(
   public static <T> void coderDeterministicInContext(
       Coder<T> coder, Coder.Context context, T value1, T value2)
       throws Exception {
-    assertThat("Expected that the coder is deterministic",
-        coder.isDeterministic(), CoreMatchers.is(true));
+
+    try {
+      coder.verifyDeterministic();
+    } catch (NonDeterministicException e) {
+      fail("Expected that the coder is deterministic");
+    }
     assertThat("Expected that the passed in values are equal()", value1, equalTo(value2));
     assertThat(
         encode(coder, context, value1),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index 2c411fae42316..c2a1e6c9f023f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -218,9 +218,13 @@ public CloudObject asCloudObject() {
       return null;
     }
 
+    @Deprecated
     @Override
     public boolean isDeterministic() { return true; }
 
+    @Override
+    public void verifyDeterministic() { }
+
     @Override
     public boolean isRegisterByteSizeObserverCheap(MyValue value, Context context) {
       return true;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java
index f930dfafbda68..d74a74f7ac57d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java
@@ -19,6 +19,8 @@
 import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertThat;
 
+import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -44,12 +46,10 @@ public class StringDelegateCoderTest {
 
   // Tests
 
-  private static final List<Coder.Context> TEST_CONTEXTS = Arrays.asList(
-      Coder.Context.NESTED,
-      Coder.Context.OUTER);
-
+  @SuppressWarnings("deprecation")
   @Test
-  public void testDeterministic() throws Exception {
+  public void testDeterministic() throws Exception, NonDeterministicException {
+    uriCoder.verifyDeterministic();
     assertThat(uriCoder.isDeterministic(), equalTo(true));
     for (String uriString : TEST_URI_STRINGS) {
       CoderProperties.coderDeterministic(uriCoder, new URI(uriString), new URI(uriString));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
index abddf51ad5bc9..a8040b6347df6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
@@ -150,6 +150,9 @@ public MeanInts.CountSum decode(InputStream inStream, Context context)
     @Override
     public boolean isDeterministic() { return true; }
 
+    @Override
+    public void verifyDeterministic() { }
+
     public CloudObject asCloudObject() {
       return makeCloudEncoding(this.getClass().getName());
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
index 5d97a7a657b93..5cc50dc1e7aa1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
@@ -35,7 +35,6 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
-import org.hamcrest.Matchers;
 import org.joda.time.Duration;
 import org.junit.Assert;
 import org.junit.Rule;
@@ -168,7 +167,7 @@ public void testGroupByKeyEmpty() {
   @Test
   public void testGroupByKeyNonDeterministic() throws Exception {
     expectedEx.expect(IllegalStateException.class);
-    expectedEx.expectMessage(Matchers.containsString("must be deterministic"));
+    expectedEx.expectMessage("must be deterministic");
 
     List<KV<Map<String, String>, Integer>> ungroupedPairs = Arrays.asList();
 

From bf6b39189aa0aafd590dc831e18267aba302443f Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Fri, 20 Feb 2015 15:41:59 -0800
Subject: [PATCH 0172/1541] Fix cross-platform line endings issues by
 specifying repository-wide settings.

This fixes #7 on GitHub.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86828537
---
 .gitattributes | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 .gitattributes

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000000..cce74a2d901d5
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,26 @@
+# The default behavior, which overrides 'core.autocrlf', is to use Git's
+# built-in heuristics to determine whether a particular file is text or binary.
+# Text files are automatically normalized to the user's platforms.
+* text=auto
+
+# Explicitly declare text files that should always be normalized and converted
+# to native line endings.
+.gitattributes text
+.gitignore text
+LICENSE text
+*.avsc text
+*.html text
+*.java text
+*.md text
+*.properties text
+*.proto text
+*.py text
+*.sh text
+*.xml text
+*.yml text
+
+# Declare files that will always have CRLF line endings on checkout.
+# *.sln text eol=crlf
+
+# Explicitly denote all files that are truly binary and should not be modified.
+# *.jpg binary

From 021243892bf3b2457ae0e72bfb186183b0bb86f7 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Sat, 21 Feb 2015 00:15:10 -0800
Subject: [PATCH 0173/1541] Fix following bugs in direct runner GCS file
 pattern expansion.

* Add support for range patterns (e.g., "gs:[][1-5]").
* GcsUtil.expand() should give zero results for directories.

Updated storage field of GcsUtil to be non-final and added a protected setter so that Storage can be mocked for testing purposes.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86853046
---
 .../cloud/dataflow/sdk/util/GcsUtil.java      |  31 +++--
 .../cloud/dataflow/sdk/util/GcsUtilTest.java  | 122 ++++++++++++++++++
 2 files changed, 142 insertions(+), 11 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
index dd51b669e2820..6f7ae37cb2c91 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
@@ -33,7 +33,6 @@
 import java.io.IOException;
 import java.nio.channels.SeekableByteChannel;
 import java.nio.channels.WritableByteChannel;
-import java.util.Arrays;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.concurrent.ExecutorService;
@@ -71,7 +70,7 @@ public GcsUtil create(PipelineOptions options) {
   private static final long MAX_LIST_ITEMS_PER_CALL = 1024;
 
   /** Matches a glob containing a wildcard, capturing the portion before the first wildcard. */
-  private static final Pattern GLOB_PREFIX = Pattern.compile("(?<PREFIX>[^*?]*)[*?].*");
+  private static final Pattern GLOB_PREFIX = Pattern.compile("(?<PREFIX>[^\\[*?]*)[\\[*?].*");
 
   private static final String RECURSIVE_WILDCARD = "[*]{2}";
 
@@ -84,7 +83,7 @@ public GcsUtil create(PipelineOptions options) {
   /////////////////////////////////////////////////////////////////////////////
 
   /** Client for the GCS API. */
-  private final Storage storage;
+  private Storage storage;
 
   // Helper delegate for turning IOExceptions from API calls into higher-level semantics.
   private final ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
@@ -110,22 +109,32 @@ private GcsUtil(Storage storageClient, ExecutorService executorService) {
     this.executorService = executorService;
   }
 
+  // Use this only for testing purposes.
+  protected void setStorageClient(Storage storage) {
+    this.storage = storage;
+  }
+
   /**
-   * Expands a pattern into matched paths. The pattern path may contain
-   * globs, which are expanded in the result.
+   * Expands a pattern into matched paths. The pattern path may contain globs, which are expanded in
+   * the result. This function validates the existence of each matched file in GCS.
    */
   public List<GcsPath> expand(GcsPath gcsPattern) throws IOException {
     Preconditions.checkArgument(isGcsPatternSupported(gcsPattern.getObject()));
     Matcher m = GLOB_PREFIX.matcher(gcsPattern.getObject());
+    Pattern p = null;
+    String prefix = null;
     if (!m.matches()) {
-      return Arrays.asList(gcsPattern);
+      // Not a glob. But we should verify that the file exists in GCS.
+      prefix = gcsPattern.getObject();
+      p = Pattern.compile(gcsPattern.getObject());
+    } else {
+      // Part before the first wildcard character.
+      prefix = m.group("PREFIX");
+      p = Pattern.compile(globToRegexp(gcsPattern.getObject()));
     }
 
-    // Part before the first wildcard character.
-    String prefix = m.group("PREFIX");
-    Pattern p = Pattern.compile(globToRegexp(gcsPattern.getObject()));
-    LOG.info("matching files in bucket {}, prefix {} against pattern {}",
-        gcsPattern.getBucket(), prefix, p.toString());
+    LOG.info("matching files in bucket {}, prefix {} against pattern {}", gcsPattern.getBucket(),
+        prefix, p.toString());
 
     // List all objects that start with the prefix (including objects in sub-directories).
     Storage.Objects.List listObject = storage.objects().list(gcsPattern.getBucket());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
index c7a7c55f5b585..e6ec624f45373 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
@@ -16,21 +16,34 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static org.hamcrest.Matchers.contains;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.when;
 
 import com.google.api.client.auth.oauth2.Credential;
 import com.google.api.client.util.Throwables;
+import com.google.api.services.storage.Storage;
+import com.google.api.services.storage.model.Objects;
+import com.google.api.services.storage.model.StorageObject;
 import com.google.cloud.dataflow.sdk.options.GcsOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.common.collect.ImmutableList;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.mockito.Mockito;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -39,6 +52,8 @@
 /** Test case for {@link GcsUtil}. */
 @RunWith(JUnit4.class)
 public class GcsUtilTest {
+  @Rule public ExpectedException exception = ExpectedException.none();
+
   @Test
   public void testGlobTranslation() {
     assertEquals("foo", GcsUtil.globToRegexp("foo"));
@@ -102,4 +117,111 @@ public void run() {
     assertTrue("Expected tasks to complete",
         executorService.awaitTermination(10, TimeUnit.SECONDS));
   }
+
+  @Test
+  public void testGlobExpansion() throws IOException {
+    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
+    pipelineOptions.setGcpCredential(Mockito.mock(Credential.class));
+    GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
+
+    Storage mockStorage = Mockito.mock(Storage.class);
+    gcsUtil.setStorageClient(mockStorage);
+
+    Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
+    Storage.Objects.List mockStorageList = Mockito.mock(Storage.Objects.List.class);
+
+    Objects modelObjects = new Objects();
+    List<StorageObject> items = new ArrayList<>();
+    // A directory
+    items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/"));
+
+    // Files within the directory
+    items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/file1name"));
+    items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/file2name"));
+    items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/file3name"));
+    items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/otherfile"));
+    items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/anotherfile"));
+
+    modelObjects.setItems(items);
+
+    when(mockStorage.objects()).thenReturn(mockStorageObjects);
+    when(mockStorageObjects.list("testbucket")).thenReturn(mockStorageList);
+    when(mockStorageList.execute()).thenReturn(modelObjects);
+
+    // Test a single file.
+    {
+      GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/otherfile");
+      List<GcsPath> expectedFiles =
+          ImmutableList.of(GcsPath.fromUri("gs://testbucket/testdirectory/otherfile"));
+
+      assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
+    }
+
+    // Directories should not match.
+    {
+      GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/");
+      List<GcsPath> pathList = gcsUtil.expand(pattern);
+      assertEquals(pathList.size(), 0);
+    }
+
+    {
+      GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory");
+      List<GcsPath> pathList = gcsUtil.expand(pattern);
+      assertEquals(pathList.size(), 0);
+    }
+
+    // Test patterns.
+    {
+      GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/file*");
+      List<GcsPath> expectedFiles = ImmutableList.of(
+          GcsPath.fromUri("gs://testbucket/testdirectory/file1name"),
+          GcsPath.fromUri("gs://testbucket/testdirectory/file2name"),
+          GcsPath.fromUri("gs://testbucket/testdirectory/file3name"));
+
+      assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
+    }
+
+    {
+      GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/file[1-3]*");
+      List<GcsPath> expectedFiles = ImmutableList.of(
+          GcsPath.fromUri("gs://testbucket/testdirectory/file1name"),
+          GcsPath.fromUri("gs://testbucket/testdirectory/file2name"),
+          GcsPath.fromUri("gs://testbucket/testdirectory/file3name"));
+
+      assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
+    }
+
+    {
+      GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/file?name");
+      List<GcsPath> expectedFiles = ImmutableList.of(
+          GcsPath.fromUri("gs://testbucket/testdirectory/file1name"),
+          GcsPath.fromUri("gs://testbucket/testdirectory/file2name"),
+          GcsPath.fromUri("gs://testbucket/testdirectory/file3name"));
+
+      assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
+    }
+
+    {
+      GcsPath pattern = GcsPath.fromUri("gs://testbucket/test*ectory/fi*name");
+      List<GcsPath> expectedFiles = ImmutableList.of(
+          GcsPath.fromUri("gs://testbucket/testdirectory/file1name"),
+          GcsPath.fromUri("gs://testbucket/testdirectory/file2name"),
+          GcsPath.fromUri("gs://testbucket/testdirectory/file3name"));
+
+      assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
+    }
+  }
+
+  // Patterns that contain recursive wildcards ('**') are not supported.
+  @Test
+  public void testRecursiveGlobExpansionFails() throws IOException {
+    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
+    pipelineOptions.setGcpCredential(Mockito.mock(Credential.class));
+    GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
+    GcsPath pattern = GcsPath.fromUri("gs://testbucket/test**");
+
+    exception.expect(IllegalArgumentException.class);
+    exception.expectMessage("Unsupported wildcard usage");
+    gcsUtil.expand(pattern);
+  }
 }

From 207007ba498662e8e6953e3eff978789d31ab2d9 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Sat, 21 Feb 2015 10:53:36 -0800
Subject: [PATCH 0174/1541] Travis CI: add retries to decrease the
 false-positive rate.

Ideally, we should be using 'travis_retry' on 'mvn dependency:go-offline' only. However, due to http://jira.codehaus.org/browse/MDEP-82, that doesn't work.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86870674
---
 .travis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index dc499659491a4..a9e4b84d71244 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -36,8 +36,8 @@ before_install:
   - if [ "$TRAVIS_OS_NAME" == "linux" ]; then jdk_switcher use "$CUSTOM_JDK"; fi
 
 install:
-  - mvn install clean -U -DskipTests=true
+  - travis_retry mvn install clean -U -DskipTests=true
 
 script:
-  - mvn verify -U
-  - ./test_wordcount.sh
+  - travis_retry mvn verify -U
+  - travis_retry ./test_wordcount.sh

From 1f8d4c4bd821e1b9cb545e6bfb3eaccf58af5108 Mon Sep 17 00:00:00 2001
From: malo <malo@google.com>
Date: Sat, 21 Feb 2015 13:19:49 -0800
Subject: [PATCH 0175/1541] Add GenomicsVariantSimilarity integration test. []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=86874467

---
 checkstyle.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/checkstyle.xml b/checkstyle.xml
index c6c07ef9f1c98..062f1346d9583 100644
--- a/checkstyle.xml
+++ b/checkstyle.xml
@@ -103,7 +103,7 @@ page at http://checkstyle.sourceforge.net/config.html -->
       <!-- Checks for out of order import statements. -->
 
       <property name="severity" value="error"/>
-      <property name="groups" value="com.google,android,com,junit,net,org,sun,java,javax"/>
+      <property name="groups" value="com.google,android,com,Jama,junit,net,org,sun,java,javax"/>
       <!-- This ensures that static imports go first. -->
       <property name="option" value="top"/>
       <property name="tokens" value="STATIC_IMPORT, IMPORT"/>

From 63a89683f15475e069dd0911db25b8e2d99dbfd7 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 23 Feb 2015 11:07:16 -0800
Subject: [PATCH 0176/1541] Add support for toString() to print out serialized
 pipeline options in addition to the ones programmatically set.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86973245
---
 .../sdk/options/ProxyInvocationHandler.java   | 15 ++++++++--
 .../options/ProxyInvocationHandlerTest.java   | 28 +++++++++++++++++++
 2 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
index e74e5a8f6e820..0212bfa463c3e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
@@ -54,6 +54,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.SortedMap;
 import java.util.TreeMap;
 
 /**
@@ -182,15 +183,23 @@ public int hashCode() {
   }
 
   /**
-   * This will output all the currently set values.
+   * This will output all the currently set values. This is a relatively costly function
+   * as it will call {@code toString()} on each object that has been set and format
+   * the results in a readable format.
    *
-   * @return A string representation of this.
+   * @return A pretty printed string representation of this.
    */
   @Override
   public synchronized String toString() {
+    SortedMap<String, Object> sortedOptions = new TreeMap<>();
+    // Add the options that we received from deserialization
+    sortedOptions.putAll(jsonOptions);
+    // Override with any programmatically set options.
+    sortedOptions.putAll(options);
+
     StringBuilder b = new StringBuilder();
     b.append("Current Settings:\n");
-    for (Map.Entry<String, Object> entry : new TreeMap<>(options).entrySet()) {
+    for (Map.Entry<String, Object> entry : sortedOptions.entrySet()) {
       b.append("  " + entry.getKey() + ": " + entry.getValue() + "\n");
     }
     return b.toString();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
index d893ba4d08d30..8d2f859f83d82 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
@@ -235,6 +235,34 @@ public void testToString() throws Exception {
         proxy.toString());
   }
 
+  @Test
+  public void testToStringAfterDeserializationContainsJsonEntries() throws Exception {
+    ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
+    Simple proxy = handler.as(Simple.class);
+    proxy.setString("stringValue");
+    DefaultAnnotations proxy2 = proxy.as(DefaultAnnotations.class);
+    proxy2.setLong(57L);
+    assertEquals("Current Settings:\n"
+        + "  long: 57\n"
+        + "  string: \"stringValue\"\n",
+        serializeDeserialize(PipelineOptions.class, proxy2).toString());
+  }
+
+  @Test
+  public void testToStringAfterDeserializationContainsOverriddenEntries() throws Exception {
+    ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
+    Simple proxy = handler.as(Simple.class);
+    proxy.setString("stringValue");
+    DefaultAnnotations proxy2 = proxy.as(DefaultAnnotations.class);
+    proxy2.setLong(57L);
+    Simple deserializedOptions = serializeDeserialize(Simple.class, proxy2);
+    deserializedOptions.setString("overriddenValue");
+    assertEquals("Current Settings:\n"
+        + "  long: 57\n"
+        + "  string: overriddenValue\n",
+        deserializedOptions.toString());
+  }
+
   /** A test interface containing an unknown method. */
   public static interface UnknownMethod {
     void unknownMethod();

From 170376d652b3d0fc894d7dfc87c760f255ea236e Mon Sep 17 00:00:00 2001
From: relax <relax@google.com>
Date: Mon, 23 Feb 2015 11:35:42 -0800
Subject: [PATCH 0177/1541] Fix retry logic in BigQueryIO. Previously if an
 insert failed, we threw an exception. However this caused us to retry all of
 the rows processed in the bundle, even if the failure was only for a single
 row. Now we detect which rows failed and attempt to retry only those rows. If
 insert fails five times in a row, we throw an exception to make sure the
 failure is exposed to the user.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=86976384
---
 .../cloud/dataflow/sdk/io/BigQueryIO.java     |   4 +-
 .../sdk/util/BigQueryTableInserter.java       | 150 ++++++++++++------
 .../dataflow/sdk/util/BigQueryUtilTest.java   |  68 ++++++++
 3 files changed, 175 insertions(+), 47 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index dca65ad9f592e..63e29aaf07162 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -718,7 +718,7 @@ private void flushRows(BigQueryOptions options) {
         Bigquery client = Transport.newBigQueryClient(options).build();
         try {
           BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
-          inserter.insertAll(tableRows.iterator(), uniqueIdsForTableRows.iterator());
+          inserter.insertAll(tableRows, uniqueIdsForTableRows);
         } catch (IOException e) {
           throw new RuntimeException(e);
         }
@@ -854,7 +854,7 @@ private static void evaluateWriteHelper(
           transform.writeDisposition, transform.createDisposition, transform.schema);
 
       List<TableRow> tableRows = context.getPCollection(transform.getInput());
-      inserter.insertAll(tableRows.iterator());
+      inserter.insertAll(tableRows);
     } catch (IOException e) {
       throw new RuntimeException(e);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
index 0ae41a057b1b2..dbe8a26a7d672 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
@@ -32,7 +32,7 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.util.Iterator;
+import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
 
@@ -50,8 +50,15 @@ public class BigQueryTableInserter {
   // The maximum number of rows to upload per InsertAll request.
   private static final long MAX_ROWS_PER_BATCH = 500;
 
+  // The maximum number of times to retry inserting rows into BigQuery.
+  private static final int MAX_INSERT_ATTEMPTS = 5;
+
+  // The initial backoff after a failure inserting rows into BigQuery.
+  private static final long INITIAL_INSERT_BACKOFF_INTERVAL_MS = 200L;
+
   private final Bigquery client;
   private final TableReference ref;
+  private final long maxRowsPerBatch;
 
   /**
    * Constructs a new row inserter.
@@ -62,63 +69,116 @@ public class BigQueryTableInserter {
   public BigQueryTableInserter(Bigquery client, TableReference ref) {
     this.client = client;
     this.ref = ref;
+    this.maxRowsPerBatch = MAX_ROWS_PER_BATCH;
   }
 
   /**
-   * Insert all rows from the given iterator.
+   * Constructs a new row inserter.
+   *
+   * @param client a BigQuery client
+   * @param ref identifies the table to insert into
    */
-  public void insertAll(Iterator<TableRow> rowIterator) throws IOException {
-    insertAll(rowIterator, null);
+  public BigQueryTableInserter(Bigquery client, TableReference ref, int maxRowsPerBatch) {
+    this.client = client;
+    this.ref = ref;
+    this.maxRowsPerBatch = maxRowsPerBatch;
   }
 
   /**
-   * Insert all rows from the given iterator using specified insertIds if not null.
+   * Insert all rows from the given list.
    */
-  public void insertAll(Iterator<TableRow> rowIterator,
-      @Nullable Iterator<String> insertIdIterator) throws IOException {
-    // Upload in batches.
-    List<TableDataInsertAllRequest.Rows> rows = new LinkedList<>();
-    int numInserted = 0;
-    int dataSize = 0;
-    while (rowIterator.hasNext()) {
-      TableRow row = rowIterator.next();
-      TableDataInsertAllRequest.Rows out = new TableDataInsertAllRequest.Rows();
-      if (insertIdIterator != null) {
-        if (insertIdIterator.hasNext()) {
-          out.setInsertId(insertIdIterator.next());
-        } else {
-          throw new AssertionError("If insertIdIterator is not null it needs to have at least "
-              + "as many elements as rowIterator");
-        }
+  public void insertAll(List<TableRow> rowList) throws IOException {
+    insertAll(rowList, null);
+  }
+
+  /**
+   * Insert all rows from the given list using specified insertIds if not null.
+   */
+  public void insertAll(List<TableRow> rowList,
+      @Nullable List<String> insertIdList) throws IOException {
+    if (insertIdList != null && rowList.size() != insertIdList.size()) {
+      throw new AssertionError("If insertIdList is not null it needs to have at least "
+          + "as many elements as rowList");
+    }
+
+
+    AttemptBoundedExponentialBackOff backoff = new AttemptBoundedExponentialBackOff(
+        MAX_INSERT_ATTEMPTS,
+        INITIAL_INSERT_BACKOFF_INTERVAL_MS);
+
+    List<TableDataInsertAllResponse.InsertErrors> allErrors = new ArrayList<>();
+    // These lists contain the rows to publish. Initially the contain the entire list. If there are
+    // failures, they will contain only the failed rows to be retried.
+    List<TableRow> rowsToPublish = rowList;
+    List<String> idsToPublish = insertIdList;
+    while (true) {
+      List<TableRow> retryRows = new ArrayList<>();
+      List<String> retryIds = null;
+      if (idsToPublish != null) {
+        retryIds = new ArrayList<>();
       }
-      out.setJson(row.getUnknownKeys());
-      rows.add(out);
-
-      dataSize += row.toString().length();
-      if (dataSize >= UPLOAD_BATCH_SIZE_BYTES || rows.size() >= MAX_ROWS_PER_BATCH ||
-          !rowIterator.hasNext()) {
-        TableDataInsertAllRequest content = new TableDataInsertAllRequest();
-        content.setRows(rows);
-
-        LOG.info("Number of rows in BigQuery insert: {}", rows.size());
-        numInserted += rows.size();
-
-        Bigquery.Tabledata.InsertAll insert = client.tabledata()
-            .insertAll(ref.getProjectId(), ref.getDatasetId(), ref.getTableId(),
-                content);
-        TableDataInsertAllResponse response = insert.execute();
-        List<TableDataInsertAllResponse.InsertErrors> errors = response
-            .getInsertErrors();
-        if (errors != null && !errors.isEmpty()) {
-          throw new IOException("Insert failed: " + errors);
+      int strideIndex = 0;
+      // Upload in batches.
+      List<TableDataInsertAllRequest.Rows> rows = new LinkedList<>();
+      int dataSize = 0;
+      for (int i = 0; i < rowsToPublish.size(); ++i) {
+        TableRow row = rowsToPublish.get(i);
+        TableDataInsertAllRequest.Rows out = new TableDataInsertAllRequest.Rows();
+        if (idsToPublish != null) {
+          out.setInsertId(idsToPublish.get(i));
         }
+        out.setJson(row.getUnknownKeys());
+        rows.add(out);
+
+        dataSize += row.toString().length();
+        if (dataSize >= UPLOAD_BATCH_SIZE_BYTES || rows.size() >= maxRowsPerBatch ||
+            i == rowsToPublish.size() - 1) {
+          TableDataInsertAllRequest content = new TableDataInsertAllRequest();
+          content.setRows(rows);
+
+          Bigquery.Tabledata.InsertAll insert = client.tabledata()
+              .insertAll(ref.getProjectId(), ref.getDatasetId(), ref.getTableId(),
+                  content);
+          TableDataInsertAllResponse response = insert.execute();
+          List<TableDataInsertAllResponse.InsertErrors> errors = response.getInsertErrors();
+          if (errors != null) {
+            allErrors.addAll(errors);
+            for (TableDataInsertAllResponse.InsertErrors error : errors) {
+              if (error.getIndex() == null) {
+                throw new IOException("Insert failed: " + allErrors);
+              }
+
+              int errorIndex = error.getIndex().intValue() + strideIndex;
+              retryRows.add(rowsToPublish.get(errorIndex));
+              if (retryIds != null) {
+                retryIds.add(idsToPublish.get(errorIndex));
+              }
+            }
+          }
 
-        dataSize = 0;
-        rows.clear();
+          dataSize = 0;
+          strideIndex = i + 1;
+          rows.clear();
+        }
       }
-    }
 
-    LOG.info("Number of rows written to BigQuery: {}", numInserted);
+      if (!allErrors.isEmpty() && !backoff.atMaxAttempts()) {
+        try {
+          Thread.sleep(backoff.nextBackOffMillis());
+        } catch (InterruptedException e) {
+          // ignore.
+        }
+        LOG.info("Retrying failed inserts to BigQuery");
+        rowsToPublish = retryRows;
+        idsToPublish = retryIds;
+        allErrors.clear();
+      } else {
+        break;
+      }
+    }
+    if (!allErrors.isEmpty()) {
+      throw new IOException("Insert failed: " + allErrors);
+    }
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
index ca75e6f94ca7b..783c44847a6cf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
@@ -16,10 +16,12 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.anyLong;
 import static org.mockito.Matchers.anyString;
 import static org.mockito.Mockito.atLeast;
 import static org.mockito.Mockito.atLeastOnce;
+import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.verifyNoMoreInteractions;
 import static org.mockito.Mockito.when;
@@ -27,6 +29,8 @@
 import com.google.api.services.bigquery.Bigquery;
 import com.google.api.services.bigquery.model.Table;
 import com.google.api.services.bigquery.model.TableCell;
+import com.google.api.services.bigquery.model.TableDataInsertAllRequest;
+import com.google.api.services.bigquery.model.TableDataInsertAllResponse;
 import com.google.api.services.bigquery.model.TableDataList;
 import com.google.api.services.bigquery.model.TableFieldSchema;
 import com.google.api.services.bigquery.model.TableReference;
@@ -49,6 +53,7 @@
 import org.mockito.MockitoAnnotations;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.LinkedList;
 import java.util.List;
@@ -66,6 +71,7 @@ public class BigQueryUtilTest {
   @Mock private Bigquery.Tables mockTables;
   @Mock private Bigquery.Tables.Get mockTablesGet;
   @Mock private Bigquery.Tabledata mockTabledata;
+  @Mock private Bigquery.Tabledata.InsertAll mockInsertAll;
   @Mock private Bigquery.Tabledata.List mockTabledataList;
 
   @Before
@@ -82,6 +88,39 @@ public void tearDown() {
     verifyNoMoreInteractions(mockTabledataList);
   }
 
+  private void onInsertAll(List<List<Long>> errorIndicesSequence) throws Exception {
+    when(mockClient.tabledata())
+        .thenReturn(mockTabledata);
+
+    List<TableDataInsertAllResponse> responses = new ArrayList<>();
+    for (List<Long> errorIndices : errorIndicesSequence) {
+      List<TableDataInsertAllResponse.InsertErrors> errors = new ArrayList<>();
+      for (long i : errorIndices) {
+        TableDataInsertAllResponse.InsertErrors error =
+            new TableDataInsertAllResponse.InsertErrors();
+        error.setIndex(i);
+      }
+      TableDataInsertAllResponse response = new TableDataInsertAllResponse();
+      response.setInsertErrors(errors);
+      responses.add(response);
+    }
+
+
+    when(mockTabledata.insertAll(
+        anyString(), anyString(), anyString(), any(TableDataInsertAllRequest.class)))
+        .thenReturn(mockInsertAll);
+    when(mockInsertAll.execute())
+        .thenReturn(responses.get(0),
+            responses.subList(1, responses.size()).toArray(
+                new TableDataInsertAllResponse[responses.size() - 1]));
+  }
+
+  private void verifyInsertAll(int expectedRetries) throws IOException {
+    verify(mockClient, times(expectedRetries)).tabledata();
+    verify(mockTabledata, times(expectedRetries))
+        .insertAll(anyString(), anyString(), anyString(), any(TableDataInsertAllRequest.class));
+  }
+
   private void onTableGet(Table table) throws IOException {
     when(mockClient.tables())
         .thenReturn(mockTables);
@@ -303,4 +342,33 @@ public void testWriteEmptyFail() throws IOException {
       verifyTabledataList();
     }
   }
+
+  @Test
+  public void testInsertAll() throws Exception, IOException {
+    // Build up a list of indices to fail on each invocation. This should result in
+    // 5 calls to insertAll.
+    List<List<Long>> errorsIndices = new ArrayList<>();
+    errorsIndices.add(Arrays.asList(0L, 5L, 10L, 15L, 20L));
+    errorsIndices.add(Arrays.asList(0L, 2L, 4L));
+    errorsIndices.add(Arrays.asList(0L, 2L));
+    errorsIndices.add(new ArrayList<Long>());
+    onInsertAll(errorsIndices);
+
+    TableReference ref = BigQueryIO
+        .parseTableSpec("project:dataset.table");
+    BigQueryTableInserter inserter = new BigQueryTableInserter(mockClient, ref, 5);
+
+    List<TableRow> rows = new ArrayList<>();
+    List<String> ids = new ArrayList<>();
+    for (int i = 0; i < 25; ++i) {
+      rows.add(new TableRow());
+      ids.add(new String());
+    }
+
+    try {
+      inserter.insertAll(rows, ids);
+    } finally {
+      verifyInsertAll(5);
+    }
+  }
 }

From 09361a516c329a66082b1f6a03699888df4b7b25 Mon Sep 17 00:00:00 2001
From: Max <max@posteo.de>
Date: Fri, 20 Feb 2015 14:47:56 +0100
Subject: [PATCH 0178/1541] increase scope of a few classes to write APIs
 against them

---
 .../sdk/transforms/join/CoGbkResultSchema.java  |  2 +-
 .../transforms/join/KeyedPCollectionTuple.java  | 17 ++++++++++++++++-
 .../sdk/transforms/join/RawUnionValue.java      |  2 +-
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
index 6ab1042e3b186..6bb9d055c5018 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
@@ -38,7 +38,7 @@
  * CoGroupByKey).
  */
 @SuppressWarnings("serial")
-class CoGbkResultSchema implements Serializable {
+public class CoGbkResultSchema implements Serializable {
 
   private final TupleTagList tupleTagList;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
index a9fd4b684f85b..1dfbf9efbf72c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
@@ -151,7 +151,8 @@ public void finishSpecifying() {
    * A utility class to help ensure coherence of tag and input PCollection
    * types.
    */
-  static class TaggedKeyedPCollection<K, V> {
+  public static class TaggedKeyedPCollection<K, V> {
+
     final TupleTag<V> tupleTag;
     final PCollection<KV<K, V>> pCollection;
 
@@ -161,6 +162,20 @@ public TaggedKeyedPCollection(
       this.tupleTag = tupleTag;
       this.pCollection = pCollection;
     }
+
+    /**
+     * Returns the underlying PCollection of this TaggedKeyedPCollection.
+     */
+    public PCollection<KV<K, V>> getCollection() {
+      return pCollection;
+    }
+
+    /**
+     * Returns the TupleTag of this TaggedKeyedPCollection.
+     */
+    public TupleTag<V> getTupleTag() {
+      return tupleTag;
+    }
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/RawUnionValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/RawUnionValue.java
index b52f8b3e49c2f..3b6fa73868732 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/RawUnionValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/RawUnionValue.java
@@ -23,7 +23,7 @@
  * This corresponds to an integer union tag and value.  The mapping of
  * union tag to type must come from elsewhere.
  */
-class RawUnionValue {
+public class RawUnionValue {
   private final int unionTag;
   private final Object value;
 

From 3604b24906a1c28f33d7a988fcfdecdf1c592c1f Mon Sep 17 00:00:00 2001
From: Aljoscha Krettek <aljoscha.krettek@gmail.com>
Date: Fri, 20 Feb 2015 11:04:45 +0100
Subject: [PATCH 0179/1541] Add Getter for fn in Combine.PerKey

This enables PipelineRunners to generate a more efficient operation that
does the groping and aggregation of values in one operation instead of
first grouping and then aggregating in two steps.
---
 .../com/google/cloud/dataflow/sdk/transforms/Combine.java  | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 429d10aa9eb5c..1c6841fca04f6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -910,6 +910,13 @@ private PerKey(
       this.fn = fn;
     }
 
+    /**
+     * Returns the KeyedCombineFn used by this Combine operation.
+     */
+    public KeyedCombineFn<? super K, ? super VI, ?, VO> getFn() {
+      return fn;
+    }
+
     @Override
     public PCollection<KV<K, VO>> apply(PCollection<KV<K, VI>> input) {
       return input

From a806df86c59840f433da871ee4d96dcfc648ecb0 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 24 Feb 2015 11:26:52 -0800
Subject: [PATCH 0180/1541] Update the AvroCoder so that it can return true for
 isDeterministic.

This assumes that AvroCoder uses directBinaryEncoder so Arrays and Maps
will be encoded as (number of elements) followed by each elements. They
are therefore deterministic if the underlying Array/Collecion/Map is ordered.

This deals with several AvroAnnotations:
@Stringable: Not deterministic because we know nothing about #toString()
@AvroSchema: We aren't sure enough about this to be confident it will lead
to deterministic encodings.
@AvroEncode: Not deterministic because we know nothing about the encoder.
@AvroName: Recognizes that this causes a different field name to be used.

This doesn't address the case of GenericRecord or SpecificRecord from a given
schema.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87075588
---
 .../cloud/dataflow/sdk/coders/AvroCoder.java  | 294 ++++++++++++-
 .../dataflow/sdk/coders/StandardCoder.java    |  14 +-
 .../dataflow/sdk/coders/AvroCoderTest.java    | 387 +++++++++++++++++-
 3 files changed, 679 insertions(+), 16 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
index 75216120ac0fb..3200f54b8bd4c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -18,7 +18,9 @@
 
 import static com.google.cloud.dataflow.sdk.util.Structs.addString;
 
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.common.reflect.TypeToken;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
@@ -27,21 +29,35 @@
 import org.apache.avro.generic.GenericDatumReader;
 import org.apache.avro.generic.GenericDatumWriter;
 import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
 import org.apache.avro.io.BinaryDecoder;
 import org.apache.avro.io.BinaryEncoder;
 import org.apache.avro.io.DatumReader;
 import org.apache.avro.io.DatumWriter;
 import org.apache.avro.io.DecoderFactory;
 import org.apache.avro.io.EncoderFactory;
+import org.apache.avro.reflect.AvroEncode;
+import org.apache.avro.reflect.AvroName;
+import org.apache.avro.reflect.AvroSchema;
 import org.apache.avro.reflect.ReflectData;
 import org.apache.avro.reflect.ReflectDatumReader;
 import org.apache.avro.reflect.ReflectDatumWriter;
+import org.apache.avro.reflect.Union;
 
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.Serializable;
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
 
 /**
  * An encoder using Avro binary format.
@@ -78,6 +94,11 @@
  *   ...
  * }
  * </code></pre>
+ * <p>
+ * The implementation attempts to determine if the Avro encoding of the given type will satisfy
+ * the criteria of {@link Coder#verifyDeterministic} by inspecting both the type and the
+ * Schema provided or generated by Avro. Only coders that are deterministic can be used in
+ * {@link GroupByKey} operations.
  *
  * @param <T> the type of elements handled by this coder
  */
@@ -124,6 +145,9 @@ public static AvroCoder<?> of(
 
   private final Class<T> type;
   private final Schema schema;
+
+  private final List<String> nonDeterministicReasons;
+
   private final DatumWriter<T> writer;
   private final DatumReader<T> reader;
   private final EncoderFactory encoderFactory = new EncoderFactory();
@@ -132,6 +156,13 @@ public static AvroCoder<?> of(
   protected AvroCoder(Class<T> type, Schema schema) {
     this.type = type;
     this.schema = schema;
+
+    if (GenericRecord.class.isAssignableFrom(type)) {
+      nonDeterministicReasons = Arrays.asList(
+          "GenericRecord may have non-deterministic fields.");
+    } else {
+      nonDeterministicReasons = new AvroDeterminismChecker().check(TypeToken.of(type), schema);
+    }
     this.reader = createDatumReader();
     this.writer = createDatumWriter();
   }
@@ -170,11 +201,26 @@ public CloudObject asCloudObject() {
   }
 
   /**
-   * Depends upon the structure being serialized.
+   * Returns true if the given type should be deterministically encoded using
+   * the given Schema, the directBinaryEncoder, and the ReflectDatumWriter or
+   * GenericDatumWriter.
    */
   @Override
+  @Deprecated
   public boolean isDeterministic() {
-    return false;
+    return nonDeterministicReasons.isEmpty();
+  }
+
+  /**
+   * Raises an exception describing reasons why the type may not be deterministically
+   * encoded using the given Schema, the directBinaryEncoder, and the ReflectDatumWriter
+   * or GenericDatumWriter.
+   */
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    if (!nonDeterministicReasons.isEmpty()) {
+      throw new NonDeterministicException(this, nonDeterministicReasons);
+    }
   }
 
   /**
@@ -228,4 +274,248 @@ private Object readResolve() {
       return new AvroCoder<T>(type, parser.parse(schemaStr));
     }
   }
+
+  /**
+   * Helper class encapsulating the various pieces of state maintained by the
+   * recursive walk used for checking if the encoding will be deterministic.
+   */
+  protected static class AvroDeterminismChecker {
+
+    // Reasons that the original type are not deterministic. This accumulates
+    // the actual output.
+    private List<String> reasons = new ArrayList<>();
+
+    // Types that are currently "open". Used to make sure we don't have any
+    // recursive types. Note that we assume that all occurrences of a given type
+    // are equal, rather than tracking pairs of type + schema.
+    private Set<TypeToken<?>> activeTypes = new HashSet<>();
+
+    /**
+     * Report an error in the current context.
+     */
+    private void reportError(String context, String fmt, Object... args) {
+      String message = String.format(fmt, args);
+      reasons.add(context + ": " + message);
+    }
+
+    /**
+     * Classes that are serialized by Avro using their toString() are only deterministic
+     * if their associated #toString() method is deterministic. Rather than determine all
+     * of them, we conservatively list some classes that definitely are, and treat any
+     * others an non-deterministic.
+     */
+    private static final Set<Class<?>> DETERMINISTIC_STRINGABLE_CLASSES = new HashSet<>();
+    static {
+      DETERMINISTIC_STRINGABLE_CLASSES.add(java.math.BigDecimal.class);
+      DETERMINISTIC_STRINGABLE_CLASSES.add(java.math.BigInteger.class);
+      DETERMINISTIC_STRINGABLE_CLASSES.add(java.net.URI.class);
+      DETERMINISTIC_STRINGABLE_CLASSES.add(java.net.URL.class);
+      DETERMINISTIC_STRINGABLE_CLASSES.add(String.class);
+    }
+
+    /**
+     * Return true if the given type token is a subtype of *any* of the listed parents.
+     */
+    private static boolean isSubtypeOf(TypeToken<?> type, Class<?>... parents) {
+      for (Class<?> parent : parents) {
+        if (TypeToken.of(parent).isAssignableFrom(type)) {
+          return true;
+        }
+      }
+      return false;
+    }
+
+    protected AvroDeterminismChecker() {}
+
+    // The entry point for the check. Should not be recursively called.
+    public List<String> check(TypeToken<?> type, Schema schema) {
+      recurse(type.getRawType().getName(), type, schema);
+      return reasons;
+    }
+
+    // This is the method that should be recursively called. It sets up the path
+    // and visited types correctly.
+    private void recurse(String context, TypeToken<?> type, Schema schema) {
+      if (type.getRawType().isAnnotationPresent(AvroSchema.class)) {
+        reportError(context, "Custom schemas are not supported -- remove @AvroSchema.");
+        return;
+      }
+
+      if (!activeTypes.add(type)) {
+        reportError(context, "%s appears recursively", type);
+        return;
+      }
+
+      doCheck(context, type, schema);
+      activeTypes.remove(type);
+    }
+
+    private void doCheck(String context, TypeToken<?> type, Schema schema) {
+      switch (schema.getType()) {
+        case ARRAY:
+          checkArray(context, type, schema);
+          break;
+        case ENUM:
+          // Enums should be deterministic, since they depend only on the ordinal.
+          break;
+        case FIXED:
+          // Depending on the implementation of GenericFixed, we don't know how
+          // the given field will be encoded. So, we assume that it isn't
+          // deterministic.
+          reportError(context, "FIXED encodings are not guaranteed to be deterministic");
+          break;
+        case MAP:
+          checkMap(context, type, schema);
+          break;
+        case RECORD:
+          checkRecord(context, type, schema);
+          break;
+        case UNION:
+          checkUnion(context, type, schema);
+          break;
+        case STRING:
+          checkString(context, type);
+          break;
+        case BOOLEAN:
+        case BYTES:
+        case DOUBLE:
+        case INT:
+        case FLOAT:
+        case LONG:
+        case NULL:
+          // For types that Avro encodes using one of the above primitives, we assume they are
+          // deterministic.
+          break;
+        default:
+          // In any other case (eg., new types added to Avro) we cautiously return
+          // false.
+          reportError(context, "Unknown Avro Schema Type: %s", schema.getType());
+          break;
+      }
+    }
+
+    private void checkString(String context, TypeToken<?> type) {
+      // For types that are encoded as strings, we need to make sure they're in an approved
+      // whitelist. For other types that are annotated @Stringable, Avro will just use the
+      // #toString() methods, which has no guarantees of determinism.
+      if (!DETERMINISTIC_STRINGABLE_CLASSES.contains(type.getRawType())) {
+        reportError(context, "%s may not have deterministic #toString()", type);
+      }
+    }
+
+    private void checkUnion(String context, TypeToken<?> type, Schema schema) {
+      if (!type.getRawType().isAnnotationPresent(Union.class)) {
+        reportError(context, "Expected type %s to have @Union annotation", type);
+        return;
+      }
+
+      // Errors associated with this union will use the base class as their context.
+      String baseClassContext = type.getRawType().getName();
+
+      // For a union, we need to make sure that each possible instantiation is deterministic.
+      for (Schema concrete : schema.getTypes()) {
+        @SuppressWarnings("unchecked")
+        TypeToken<?> unionType = TypeToken.of(ReflectData.get().getClass(concrete));
+
+        recurse(baseClassContext, unionType, concrete);
+      }
+    }
+
+    private void checkRecord(String context, TypeToken<?> type, Schema schema) {
+      // If the the record isn't a true class, but rather a GenericRecord, SpecificRecord, etc.
+      // with a specificified schema, then we need to make the decision based on the generated
+      // implementations.
+      if (isSubtypeOf(type, IndexedRecord.class)) {
+        // TODO: Update this once we support deterministic GenericRecord/SpecificRecords.
+        reportError(context, "IndexedRecords may be non-deterministic");
+        return;
+      }
+
+      // For a record, we want to make sure that all the fields are deterministic.
+      Class<?> clazz = type.getRawType();
+      for (org.apache.avro.Schema.Field fieldSchema : schema.getFields()) {
+        Field field = getField(clazz, fieldSchema.name());
+        String fieldContext = field.getDeclaringClass().getName() + "#" + field.getName();
+
+        if (field.isAnnotationPresent(AvroEncode.class)) {
+          reportError(fieldContext,
+              "Custom encoders may be non-deterministic -- remove @AvroEncode");
+          continue;
+        }
+
+        if (field.isAnnotationPresent(AvroSchema.class)) {
+          reportError(fieldContext, "Custom schemas are not supported -- remove @AvroSchema");
+          continue;
+        }
+
+        TypeToken<?> fieldType = type.resolveType(field.getGenericType());
+        recurse(fieldContext, fieldType, fieldSchema.schema());
+      }
+    }
+
+    private void checkMap(String context, TypeToken<?> type, Schema schema) {
+      if (!isSubtypeOf(type, SortedMap.class)) {
+        reportError(context, "%s may not be deterministically ordered", type);
+      }
+
+      // Avro (currently) asserts that all keys are strings.
+      // In case that changes, we double check that the key was a string:
+      Class<?> keyType = type.resolveType(Map.class.getTypeParameters()[0]).getRawType();
+      if (!String.class.equals(keyType)) {
+        reportError(context, "map keys should be Strings, but was %s", keyType);
+      }
+
+      recurse(context,
+          type.resolveType(Map.class.getTypeParameters()[1]),
+          schema.getValueType());
+    }
+
+    private void checkArray(String context, TypeToken<?> type, Schema schema) {
+      TypeToken<?> elementType = null;
+      if (type.isArray()) {
+        // The type is an array (with ordering)-> deterministic iff the element is deterministic.
+        elementType = type.getComponentType();
+      } else if (isSubtypeOf(type, Collection.class)) {
+        if (isSubtypeOf(type, List.class, SortedSet.class)) {
+          // Ordered collection -> deterministic iff the element is deterministic
+          elementType = type.resolveType(Collection.class.getTypeParameters()[0]);
+        } else {
+          // Not an ordered collection -> not deterministic
+          reportError(context, "%s may not be deterministically ordered", type);
+          return;
+        }
+      } else {
+        // If it was an unknown type encoded as an array, be conservative and assume
+        // that we don't know anything about the order.
+        reportError(context, "encoding %s as an ARRAY was unexpected");
+        return;
+      }
+
+      // If we get here, it's either a deterministically-ordered Collection, or
+      // an array. Either way, the type is deterministic iff the element type is
+      // deterministic.
+      recurse(context, elementType, schema.getElementType());
+    }
+
+    /**
+     * Extract a field from a class. We need to look at the declared fields so that we can
+     * see private fields. We may need to walk up to the parent to get classes from the parent.
+     */
+    private static Field getField(Class<?> clazz, String name) {
+      while (clazz != null) {
+        for (Field field : clazz.getDeclaredFields()) {
+          AvroName avroName = field.getAnnotation(AvroName.class);
+          if (avroName != null && name.equals(avroName.value())) {
+            return field;
+          } else if (avroName == null && name.equals(field.getName())) {
+            return field;
+          }
+        }
+        clazz = clazz.getSuperclass();
+      }
+
+      throw new IllegalArgumentException(
+          "Unable to get field " + name + " from class " + clazz);
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
index b5838908497e5..2df352757a142 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
@@ -28,8 +28,6 @@
 import java.util.Collections;
 import java.util.List;
 
-import javax.annotation.Nullable;
-
 /**
  * A StandardCoder is one that defines equality, hashing, and printing
  * via the class name and recursively using {@link #getComponents}.
@@ -148,8 +146,7 @@ public void registerByteSizeObserver(
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
     if (!isDeterministic()) {
-      throw new NonDeterministicException(this,
-          getClass().getSimpleName() + " reported it was not determinsitic.");
+      throw new NonDeterministicException(this, "Coder reported it was not determinsitic.");
     }
   }
 
@@ -168,13 +165,4 @@ protected void verifyDeterministic(String message, Coder<?>... coders)
       throws NonDeterministicException {
     verifyDeterministic(message, Arrays.asList(coders));
   }
-
-  protected void addReasons(String prefix, List<String> accumulator,
-      @Nullable List<String> newReasons) {
-    if (newReasons != null) {
-      for (String reason : newReasons) {
-        accumulator.add(prefix + reason);
-      }
-    }
-  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
index 43023a1f63dd9..781c8a85766db 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
@@ -16,8 +16,14 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import static org.hamcrest.Matchers.containsString;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder.Context;
+import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
@@ -27,10 +33,19 @@
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
+import org.apache.avro.AvroTypeException;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.reflect.AvroName;
+import org.apache.avro.reflect.AvroSchema;
+import org.apache.avro.reflect.ReflectData;
+import org.apache.avro.reflect.Stringable;
+import org.apache.avro.reflect.Union;
+import org.hamcrest.Description;
+import org.hamcrest.Matcher;
 import org.hamcrest.Matchers;
+import org.hamcrest.TypeSafeMatcher;
 import org.junit.Assert;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -38,6 +53,17 @@
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
 
 /** Tests for {@link AvroCoder}. */
 @RunWith(JUnit4.class)
@@ -92,7 +118,7 @@ public String toString() {
     }
   }
 
-  static class GetTextFn extends DoFn<Pojo, String> {
+  private static class GetTextFn extends DoFn<Pojo, String> {
     @Override
     public void processElement(ProcessContext c) {
       c.output(c.element().text);
@@ -188,4 +214,363 @@ public void testAvroCoderIsSerializable() throws Exception {
     // Check that the coder is serializable using the regular JSON approach.
     SerializableUtils.ensureSerializable(coder);
   }
+
+  private final void assertDeterministic(Class<?> clazz) {
+    try {
+      AvroCoder.of(clazz).verifyDeterministic();
+    } catch (NonDeterministicException e) {
+      fail("Expected AvroCoder<" + clazz + "> to be deterministic.");
+    }
+  }
+
+  private final void assertNonDeterministic(Class<?> clazz,
+      Matcher<String> reason1) {
+    try {
+      AvroCoder.of(clazz).verifyDeterministic();
+      fail("Expected AvroCoder<" + clazz + "> to be non-deterministic.");
+    } catch (NonDeterministicException e) {
+      assertThat(e.getReasons(), Matchers.<String>iterableWithSize(1));
+      assertThat(e.getReasons(), Matchers.<String>contains(reason1));
+    }
+  }
+
+  @Test
+  public void testDeterministicInteger() {
+    assertDeterministic(Integer.class);
+  }
+
+  @Test
+  public void testDeterministicInt() {
+    assertDeterministic(int.class);
+  }
+
+  private static class SimpleDeterministicClass {
+    @SuppressWarnings("unused")
+    private Integer intField;
+    @SuppressWarnings("unused")
+    private char charField;
+    @SuppressWarnings("unused")
+    private Integer[] intArray;
+  }
+
+  @Test
+  public void testDeterministicSimple() {
+    assertDeterministic(SimpleDeterministicClass.class);
+  }
+
+  private static class UnorderedMapClass {
+    @SuppressWarnings("unused")
+    private Map<String, String> mapField;
+  }
+
+  private Matcher<String> reasonMatcher(final String prefix, final String messagePart) {
+    return new TypeSafeMatcher<String>(String.class) {
+      @Override
+      public void describeTo(Description description) {
+        description.appendText(String.format("Reason starting with '%s' containing '%s'",
+            prefix, messagePart));
+      }
+
+      @Override
+      protected boolean matchesSafely(String item) {
+        return item.startsWith(prefix) && item.contains(messagePart);
+      }
+    };
+  }
+
+  private Matcher<String> reasonClass(Class<?> clazz, String message) {
+    return reasonMatcher(clazz.getName(), message);
+  }
+
+  private Matcher<String> reasonField(
+      Class<?> clazz, String field, String message) {
+    return reasonMatcher(clazz.getName() + "#" + field, message);
+  }
+
+  @Test
+  public void testDeterministicUnorderedMap() {
+    assertNonDeterministic(UnorderedMapClass.class,
+        reasonField(UnorderedMapClass.class, "mapField",
+            "java.util.Map<java.lang.String, java.lang.String> "
+            + "may not be deterministically ordered"));
+  }
+
+  private static class NonDeterministicArray {
+    @SuppressWarnings("unused")
+    private UnorderedMapClass[] arrayField;
+  }
+  @Test
+  public void testDeterministicNonDeterministicArray() {
+    assertNonDeterministic(NonDeterministicArray.class,
+        reasonField(UnorderedMapClass.class, "mapField",
+            "java.util.Map<java.lang.String, java.lang.String>"
+            + " may not be deterministically ordered"));
+  }
+
+  private static class SubclassOfUnorderedMapClass extends UnorderedMapClass {}
+
+
+  @Test
+  public void testDeterministicNonDeterministicChild() {
+    // Super class has non deterministic fields.
+    assertNonDeterministic(
+        SubclassOfUnorderedMapClass.class,
+        reasonField(UnorderedMapClass.class, "mapField",
+            "may not be deterministically ordered"));
+  }
+
+  private static class SubclassHidingParent extends UnorderedMapClass {
+    @SuppressWarnings("unused")
+    @AvroName("mapField2") // AvroName is not enough
+    private int mapField;
+  }
+
+  @Test
+  public void testAvroProhibitsShadowing() {
+    // This test verifies that Avro won't serialize a class with two fields of
+    // the same name. This is important for our error reporting, and also how
+    // we lookup a field.
+    try {
+      ReflectData.get().getSchema(SubclassHidingParent.class);
+      fail("Expected AvroTypeException");
+    } catch (AvroTypeException e) {
+      assertThat(e.getMessage(), containsString("mapField"));
+      assertThat(e.getMessage(), containsString("two fields named"));
+    }
+  }
+
+  private static class FieldWithAvroName {
+    @AvroName("name")
+    @SuppressWarnings("unused")
+    private int someField;
+  }
+
+  @Test
+  public void testDeterministicWithAvroName() {
+    assertDeterministic(FieldWithAvroName.class);
+  }
+
+  @Test
+  public void testDeterminismSortedMap() {
+    assertDeterministic(StringSortedMapField.class);
+  }
+
+  private static class StringSortedMapField {
+    @SuppressWarnings("unused")
+    SortedMap<String, String> sortedMapField;
+  }
+
+  @Test
+  public void testDeterminismTreeMapValue() {
+    // The value is non-deterministic, so we should fail.
+    assertNonDeterministic(TreeMapNonDetValue.class,
+        reasonField(UnorderedMapClass.class, "mapField",
+            "java.util.Map<java.lang.String, java.lang.String> "
+            + "may not be deterministically ordered"));
+  }
+
+  private static class TreeMapNonDetValue {
+    @SuppressWarnings("unused")
+    TreeMap<String, NonDeterministicArray> nonDeterministicField;
+  }
+
+  @Test
+  public void testDeterminismUnorderedMap() {
+    // LinkedHashMap is not deterministically ordered, so we should fail.
+    assertNonDeterministic(
+        LinkedHashMapField.class,
+        reasonField(LinkedHashMapField.class, "nonDeterministicMap",
+            "java.util.LinkedHashMap<java.lang.String, java.lang.String> "
+            + "may not be deterministically ordered"));
+  }
+
+  private static class LinkedHashMapField {
+    @SuppressWarnings("unused")
+    LinkedHashMap<String, String> nonDeterministicMap;
+  }
+
+  @Test
+  public void testDeterminismCollection() {
+    assertNonDeterministic(StringCollection.class,
+        reasonField(StringCollection.class, "stringCollection",
+            "java.util.Collection<java.lang.String> may not be deterministically ordered"));
+  }
+
+  private static class StringCollection {
+    @SuppressWarnings("unused")
+    Collection<String> stringCollection;
+  }
+
+  @Test
+  public void testDeterminismList() {
+    assertDeterministic(StringList.class);
+    assertDeterministic(StringArrayList.class);
+  }
+
+  private static class StringList {
+    @SuppressWarnings("unused")
+    List<String> stringCollection;
+  }
+
+  private static class StringArrayList {
+    @SuppressWarnings("unused")
+    ArrayList<String> stringCollection;
+  }
+
+  @Test
+  public void testDeterminismSet() {
+    assertDeterministic(StringSortedSet.class);
+    assertDeterministic(StringTreeSet.class);
+    assertNonDeterministic(StringHashSet.class,
+        reasonField(StringHashSet.class, "stringCollection",
+            "java.util.HashSet<java.lang.String> may not be deterministically ordered"));
+  }
+
+  private static class StringSortedSet{
+    @SuppressWarnings("unused")
+    SortedSet<String> stringCollection;
+  }
+
+  private static class StringTreeSet {
+    @SuppressWarnings("unused")
+    TreeSet<String> stringCollection;
+  }
+
+  private static class StringHashSet {
+    @SuppressWarnings("unused")
+    HashSet<String> stringCollection;
+  }
+
+  @Test
+  public void testDeterminismCollectionValue() {
+    assertNonDeterministic(OrderedSetOfNonDetValues.class,
+        reasonField(UnorderedMapClass.class, "mapField",
+            "may not be deterministically ordered"));
+    assertNonDeterministic(ListOfNonDetValues.class,
+        reasonField(UnorderedMapClass.class, "mapField",
+            "may not be deterministically ordered"));
+  }
+
+  private static class OrderedSetOfNonDetValues {
+    @SuppressWarnings("unused")
+    SortedSet<UnorderedMapClass> set;
+  }
+
+  private static class ListOfNonDetValues {
+    @SuppressWarnings("unused")
+    List<UnorderedMapClass> set;
+  }
+
+  @Test
+  public void testDeterminismUnion() {
+    assertDeterministic(DeterministicUnionBase.class);
+    assertNonDeterministic(
+        NonDeterministicUnionBase.class,
+        reasonField(UnionCase3.class, "mapField", "may not be deterministically ordered"));
+  }
+
+  @Test
+  public void testDeterminismStringable() {
+    assertDeterministic(String.class);
+    assertNonDeterministic(StringableClass.class,
+        reasonClass(StringableClass.class, "may not have deterministic #toString()"));
+  }
+
+  @Stringable
+  private static class StringableClass {
+  }
+
+  @Test
+  public void testDeterminismCyclicClass() {
+    assertNonDeterministic(Cyclic.class,
+        reasonClass(Cyclic.class, "appears recursively"));
+    assertNonDeterministic(CyclicField.class,
+        reasonField(Cyclic.class, "cyclicField",
+            Cyclic.class.getName() + " appears recursively"));
+    assertNonDeterministic(IndirectCycle1.class,
+        reasonField(IndirectCycle2.class, "field2",
+            IndirectCycle1.class.getName() +  " appears recursively"));
+  }
+
+  private static class Cyclic {
+    @SuppressWarnings("unused")
+    int intField;
+    @SuppressWarnings("unused")
+    Cyclic cyclicField;
+  }
+
+  private static class CyclicField {
+    @SuppressWarnings("unused")
+    Cyclic cyclicField2;
+  }
+
+  private static class IndirectCycle1 {
+    @SuppressWarnings("unused")
+    IndirectCycle2 field1;
+  }
+
+  private static class IndirectCycle2 {
+    @SuppressWarnings("unused")
+    IndirectCycle1 field2;
+  }
+
+  @Test
+  public void testDeterminismHasCustomSchema() {
+    assertNonDeterministic(HasCustomSchema.class,
+        reasonClass(HasCustomSchema.class, "Custom schemas are not supported"));
+  }
+
+  private static class HasCustomSchema {
+    @AvroSchema("{\"name\": \"bar\", \"type\": \"record\", \"fields\": ["
+        + "{\"name\": \"foo\", \"type\": \"int\"}]}")
+    @SuppressWarnings("unused")
+    GenericRecord genericRecord;
+  }
+
+  @Test
+  public void testAvroCoderTreeMapDeterminism()
+      throws Exception, NonDeterministicException {
+    TreeMapField size1 = new TreeMapField();
+    TreeMapField size2 = new TreeMapField();
+
+    // Different order for entries
+    size1.field.put("hello", "world");
+    size1.field.put("another", "entry");
+
+    size2.field.put("another", "entry");
+    size2.field.put("hello", "world");
+
+    AvroCoder<TreeMapField> coder = AvroCoder.of(TreeMapField.class);
+    coder.verifyDeterministic();
+
+    ByteArrayOutputStream outStream1 = new ByteArrayOutputStream();
+    ByteArrayOutputStream outStream2 = new ByteArrayOutputStream();
+
+    Context context = Context.NESTED;
+    coder.encode(size1, outStream1, context);
+    coder.encode(size2, outStream2, context);
+
+    assertTrue(Arrays.equals(
+        outStream1.toByteArray(), outStream2.toByteArray()));
+  }
+
+  private static class TreeMapField {
+    private TreeMap<String, String> field = new TreeMap<>();
+  }
+
+  @Union({ UnionCase1.class, UnionCase2.class })
+  private abstract static class DeterministicUnionBase {}
+
+  @Union({ UnionCase1.class, UnionCase2.class, UnionCase3.class })
+  private abstract static class NonDeterministicUnionBase {}
+
+  private static class UnionCase1 extends DeterministicUnionBase {}
+  private static class UnionCase2 extends DeterministicUnionBase {
+    @SuppressWarnings("unused")
+    String field;
+  }
+  private static class UnionCase3 extends NonDeterministicUnionBase {
+    @SuppressWarnings("unused")
+    private Map<String, String> mapField;
+  }
 }

From f247a0de03250af8c1d36c8e972b04f422c0dfa0 Mon Sep 17 00:00:00 2001
From: amyu <amyu@google.com>
Date: Tue, 24 Feb 2015 13:00:26 -0800
Subject: [PATCH 0181/1541] Streaming examples that use PubSub, from a 'traffic
 sensor' domain. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=87083984

---
 .../examples/TrafficStreamingMaxLaneFlow.java | 397 ++++++++++++++++++
 .../examples/TrafficStreamingRoutes.java      | 369 ++++++++++++++++
 2 files changed, 766 insertions(+)
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingMaxLaneFlow.java
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingRoutes.java

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingMaxLaneFlow.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingMaxLaneFlow.java
new file mode 100644
index 0000000000000..77a392bf6e3f3
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingMaxLaneFlow.java
@@ -0,0 +1,397 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.apache.avro.reflect.Nullable;
+import org.joda.time.Duration;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A streaming Dataflow Example using BigQuery output, in the 'traffic sensor' domain.
+ *
+ * <p>Concepts: The streaming runner, sliding windows, PubSub topic ingestion, use of the AvroCoder
+ * to encode a custom class, and custom Combine transforms.
+ *
+ * <p> This pipeline takes as input traffic sensor data from a PubSub topic, and analyzes it using
+ * SlidingWindows. For each window, it finds the lane that had the highest flow recorded, for each
+ * sensor station. It writes those max values along with auxiliary info to a BigQuery table.
+ *
+ * <p> This pipeline expects input from
+ * <a href="https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher">
+ * this script</a>,
+ * which publishes traffic sensor data to a PubSub topic. After you've started this pipeline, start
+ * up the input generation script as per its instructions. The default SlidingWindow parameters
+ * assume that you're running this script with the {@literal --replay} flag, which simulates pauses
+ * in the sensor data publication.
+ *
+ * <p> To run this example using the Dataflow service, you must provide an input
+ * PubSub topic and an output BigQuery table, using the {@literal --inputTopic},
+ * {@literal --dataset}, and {@literal --table} options. Since this is a streaming
+ * pipeline that never completes, select the non-blocking pipeline runner by specifying
+ * {@literal --runner=DataflowPipelineRunner}.
+ *
+ * <p> When you are done running the example, cancel your pipeline so that you do not continue to
+ * be charged for its instances. You can do this by visiting
+ * https://console.developers.google.com/project/your-project-name/dataflow/job-id
+ * in the Developers Console. You should also terminate the generator script so that you do not
+ * use unnecessary PubSub quota.
+ */
+public class TrafficStreamingMaxLaneFlow {
+
+  static final int WINDOW_DURATION = 60;  // Default sliding window duration in minutes
+  static final int WINDOW_SLIDE_EVERY = 5;  // Default window 'slide every' setting in minutes
+
+  /**
+   * This class holds information about each lane in a station reading, along with some general
+   * information from the reading.
+   */
+  @DefaultCoder(AvroCoder.class)
+  static class LaneInfo {
+    @Nullable String stationId;
+    @Nullable String lane;
+    @Nullable String direction;
+    @Nullable String freeway;
+    @Nullable String recordedTimestamp;
+    @Nullable Integer laneFlow;
+    @Nullable Integer totalFlow;
+    @Nullable Double laneAO;
+    @Nullable Double laneAS;
+
+    public LaneInfo() {}
+
+    public LaneInfo(String stationId, String lane, String direction, String freeway,
+        String timestamp, Integer laneFlow, Double laneAO,
+        Double laneAS, Integer totalFlow) {
+      this.stationId = stationId;
+      this.lane = lane;
+      this.direction = direction;
+      this.freeway = freeway;
+      this.recordedTimestamp = timestamp;
+      this.laneFlow = laneFlow;
+      this.laneAO = laneAO;
+      this.laneAS = laneAS;
+      this.totalFlow = totalFlow;
+    }
+
+    public String getStationId() {
+      return this.stationId;
+    }
+    public String getLane() {
+      return this.lane;
+    }
+    public String getDirection() {
+      return this.direction;
+    }
+    public String getFreeway() {
+      return this.freeway;
+    }
+    public String getRecordedTimestamp() {
+      return this.recordedTimestamp;
+    }
+    public Integer getLaneFlow() {
+      return this.laneFlow;
+    }
+    public Double getLaneAO() {
+      return this.laneAO;
+    }
+    public Double getLaneAS() {
+      return this.laneAS;
+    }
+    public Integer getTotalFlow() {
+      return this.totalFlow;
+    }
+  }
+
+  /**
+   * Extract flow information for each of the 8 lanes in a reading, and output as separate tuples.
+   * This will let us determine which lane has the max flow for that station over the span of the
+   * window, and output not only the max flow from that calculcation, but other associated
+   * information. The number of lanes for which data is present depends upon which freeway the data
+   * point comes from.
+   */
+  static class ExtractFlowInfoFn extends DoFn<String, KV<String, LaneInfo>> {
+    @Override
+    public void processElement(ProcessContext c) {
+      String[] items = c.element().split(",");
+      // extract the sensor information for the lanes from the input string fields.
+      String timestamp = items[0];
+      String stationId = items[1];
+      String freeway = items[2];
+      String direction = items[3];
+      Integer totalFlow = tryIntParse(items[7]);
+      // lane 1
+      Integer lane1Flow = tryIntParse(items[11]);
+      Double lane1AO = tryDoubleParse(items[12]);
+      Double lane1AS = tryDoubleParse(items[13]);
+      // lane2
+      Integer lane2Flow = tryIntParse(items[16]);
+      Double lane2AO = tryDoubleParse(items[17]);
+      Double lane2AS = tryDoubleParse(items[18]);
+      // lane3
+      Integer lane3Flow = tryIntParse(items[21]);
+      Double lane3AO = tryDoubleParse(items[22]);
+      Double lane3AS = tryDoubleParse(items[23]);
+      // lane4
+      Integer lane4Flow = tryIntParse(items[26]);
+      Double lane4AO = tryDoubleParse(items[27]);
+      Double lane4AS = tryDoubleParse(items[28]);
+      // lane5
+      Integer lane5Flow = tryIntParse(items[31]);
+      Double lane5AO = tryDoubleParse(items[32]);
+      Double lane5AS = tryDoubleParse(items[33]);
+      // lane6
+      Integer lane6Flow = tryIntParse(items[36]);
+      Double lane6AO = tryDoubleParse(items[37]);
+      Double lane6AS = tryDoubleParse(items[38]);
+      // lane7
+      Integer lane7Flow = tryIntParse(items[41]);
+      Double lane7AO = tryDoubleParse(items[42]);
+      Double lane7AS = tryDoubleParse(items[43]);
+      // lane8
+      Integer lane8Flow = tryIntParse(items[46]);
+      Double lane8AO = tryDoubleParse(items[47]);
+      Double lane8AS = tryDoubleParse(items[48]);
+
+      // For each lane in the reading, output LaneInfo keyed to its station.
+      LaneInfo laneInfo1 = new LaneInfo(stationId, "lane1", direction, freeway, timestamp,
+          lane1Flow, lane1AO, lane1AS, totalFlow);
+      c.output(KV.of(stationId, laneInfo1));
+      LaneInfo laneInfo2 = new LaneInfo(stationId, "lane2", direction, freeway, timestamp,
+          lane2Flow, lane2AO, lane2AS, totalFlow);
+      c.output(KV.of(stationId, laneInfo2));
+      LaneInfo laneInfo3 = new LaneInfo(stationId, "lane3", direction, freeway, timestamp,
+          lane3Flow, lane3AO, lane3AS, totalFlow);
+      c.output(KV.of(stationId, laneInfo3));
+      LaneInfo laneInfo4 = new LaneInfo(stationId, "lane4", direction, freeway, timestamp,
+          lane4Flow, lane4AO, lane4AS, totalFlow);
+      c.output(KV.of(stationId, laneInfo4));
+      LaneInfo laneInfo5 = new LaneInfo(stationId, "lane5", direction, freeway, timestamp,
+          lane5Flow, lane5AO, lane5AS, totalFlow);
+      c.output(KV.of(stationId, laneInfo5));
+      LaneInfo laneInfo6 = new LaneInfo(stationId, "lane6", direction, freeway, timestamp,
+          lane6Flow, lane6AO, lane6AS, totalFlow);
+      c.output(KV.of(stationId, laneInfo6));
+      LaneInfo laneInfo7 = new LaneInfo(stationId, "lane7", direction, freeway, timestamp,
+          lane7Flow, lane7AO, lane7AS, totalFlow);
+      c.output(KV.of(stationId, laneInfo7));
+      LaneInfo laneInfo8 = new LaneInfo(stationId, "lane8", direction, freeway, timestamp,
+          lane8Flow, lane8AO, lane8AS, totalFlow);
+      c.output(KV.of(stationId, laneInfo8));
+    }
+  }
+
+  /**
+   * A custom 'combine function' used with the Combine.perKey transform. Used to find the max lane
+   * flow over all the data points in the Window. Extracts the lane flow from the input string and
+   * determines whether it's the max seen so far. We're using a custom combiner instead of the Max
+   * transform because we want to retain the additional information we've associated with the flow
+   * value.
+   */
+  public static class MaxFlow implements SerializableFunction<Iterable<LaneInfo>, LaneInfo> {
+    @Override
+    public LaneInfo apply(Iterable<LaneInfo> input) {
+      Integer max = 0;
+      LaneInfo maxInfo = new LaneInfo();
+      for (LaneInfo item : input) {
+        Integer flow = item.getLaneFlow();
+        if (flow != null && (flow >= max)) {
+          max = flow;
+          maxInfo = item;
+        }
+      }
+      return maxInfo;
+    }
+  }
+
+  /**
+   * Format the results of the Max Lane flow calculation to a TableRow, to save to BigQuery.
+   * Add the timestamp from the window context.
+   */
+  static class FormatMaxesFn extends DoFn<KV<String, LaneInfo>, TableRow> {
+    @Override
+    public void processElement(ProcessContext c) {
+
+      LaneInfo laneInfo = (LaneInfo) c.element().getValue();
+      TableRow row = new TableRow()
+          .set("station_id", c.element().getKey())
+          .set("direction", laneInfo.getDirection())
+          .set("freeway", laneInfo.getFreeway())
+          .set("lane_max_flow", laneInfo.getLaneFlow())
+          .set("lane", laneInfo.getLane())
+          .set("avg_occ", laneInfo.getLaneAO())
+          .set("avg_speed", laneInfo.getLaneAS())
+          .set("total_flow", laneInfo.getTotalFlow())
+          .set("recorded_timestamp", laneInfo.getRecordedTimestamp())
+          .set("window_timestamp", c.timestamp().toString());
+      c.output(row);
+    }
+
+    /** Defines the BigQuery schema used for the output. */
+    static TableSchema getSchema() {
+      List<TableFieldSchema> fields = new ArrayList<>();
+      fields.add(new TableFieldSchema().setName("station_id").setType("STRING"));
+      fields.add(new TableFieldSchema().setName("direction").setType("STRING"));
+      fields.add(new TableFieldSchema().setName("freeway").setType("STRING"));
+      fields.add(new TableFieldSchema().setName("lane_max_flow").setType("INTEGER"));
+      fields.add(new TableFieldSchema().setName("lane").setType("STRING"));
+      fields.add(new TableFieldSchema().setName("avg_occ").setType("FLOAT"));
+      fields.add(new TableFieldSchema().setName("avg_speed").setType("FLOAT"));
+      fields.add(new TableFieldSchema().setName("total_flow").setType("INTEGER"));
+      fields.add(new TableFieldSchema().setName("window_timestamp").setType("TIMESTAMP"));
+      fields.add(new TableFieldSchema().setName("recorded_timestamp").setType("STRING"));
+      TableSchema schema = new TableSchema().setFields(fields);
+      return schema;
+    }
+  }
+
+  /**
+   * This PTransform extracts lane info, calculates the max lane flow found for a given station (for
+   * the current Window) using a custom 'combiner', and formats the results for BigQuery.
+   */
+  static class MaxLaneFlow
+      extends PTransform<PCollection<String>, PCollection<TableRow>> {
+    @Override
+    public PCollection<TableRow> apply(PCollection<String> rows) {
+      // row... => <stationId, LaneInfo> ...
+      PCollection<KV<String, LaneInfo>> flowInfo = rows.apply(
+          ParDo.of(new ExtractFlowInfoFn()));
+
+      // stationId, LaneInfo => stationId + max lane flow info
+      PCollection<KV<String, LaneInfo>> flowMaxes =
+          flowInfo.apply(Combine.<String, LaneInfo>perKey(
+              new MaxFlow()));
+
+      // <stationId, max lane flow info>... => row...
+      PCollection<TableRow> results = flowMaxes.apply(
+          ParDo.of(new FormatMaxesFn()));
+
+      return results;
+    }
+  }
+
+  /**
+    * Options supported by {@link TrafficStreamingMaxLaneFlow}.
+    * <p>
+    * Inherits standard configuration options.
+    */
+  private interface TrafficStreamingMaxLaneFlowOptions extends PipelineOptions {
+    @Description("Input PubSub topic")
+    @Validation.Required
+    String getInputTopic();
+    void setInputTopic(String value);
+
+    @Description("BigQuery dataset name")
+    @Validation.Required
+    String getDataset();
+    void setDataset(String value);
+
+    @Description("BigQuery table name")
+    @Validation.Required
+    String getTable();
+    void setTable(String value);
+
+    @Description("Numeric value of sliding window duration, in minutes")
+    @Default.Integer(WINDOW_DURATION)
+    Integer getWindowDuration();
+    void setWindowDuration(Integer value);
+
+    @Description("Numeric value of window 'slide every' setting, in minutes")
+    @Default.Integer(WINDOW_SLIDE_EVERY)
+    Integer getWindowSlideEvery();
+    void setWindowSlideEvery(Integer value);
+  }
+
+  /**
+   * Sets up and starts streaming pipeline.
+   */
+  public static void main(String[] args) {
+    TrafficStreamingMaxLaneFlowOptions options = PipelineOptionsFactory.fromArgs(args)
+        .withValidation()
+        .as(TrafficStreamingMaxLaneFlowOptions.class);
+    DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
+    dataflowOptions.setStreaming(true);
+
+    Pipeline pipeline = Pipeline.create(options);
+    TableReference tableRef = new TableReference();
+    tableRef.setProjectId(dataflowOptions.getProject());
+    tableRef.setDatasetId(options.getDataset());
+    tableRef.setTableId(options.getTable());
+    pipeline
+        .apply(PubsubIO.Read.topic(options.getInputTopic()))
+        /* map the incoming data stream into sliding windows. The default window duration values
+           work well if you're running the accompanying PubSub generator script with the
+           --replay flag, which simulates pauses in the sensor data publication. You may want to
+           adjust them otherwise. */
+        .apply(Window.<String>into(SlidingWindows.of(
+            Duration.standardMinutes(options.getWindowDuration())).
+            every(Duration.standardMinutes(options.getWindowSlideEvery()))))
+        .apply(new MaxLaneFlow())
+        .apply(BigQueryIO.Write.to(tableRef)
+            .withSchema(FormatMaxesFn.getSchema()));
+
+    /* When you are done running the example, cancel your pipeline so that you do not continue to
+       be charged for its instances. You can do this by visiting
+       https://console.developers.google.com/project/your-project-name/dataflow/job-id
+       in the Developers Console. You should also terminate the generator script so that you do not
+       use unnecessary PubSub quota. */
+    pipeline.run();
+  }
+
+  private static Integer tryIntParse(String number) {
+    try {
+      return Integer.parseInt(number);
+    } catch (NumberFormatException e) {
+      return null;
+    }
+  }
+
+  private static Double tryDoubleParse(String number) {
+    try {
+      return Double.parseDouble(number);
+    } catch (NumberFormatException e) {
+      return null;
+    }
+  }
+}
+
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingRoutes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingRoutes.java
new file mode 100644
index 0000000000000..df065167b6bf4
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingRoutes.java
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.MoreObjects;
+
+import org.apache.avro.reflect.Nullable;
+import org.joda.time.Duration;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Hashtable;
+import java.util.List;
+import java.util.Map;
+
+
+/**
+ * A streaming Dataflow Example using BigQuery output, in the 'traffic sensor' domain.
+ *
+ * <p>Concepts: The streaming runner, GroupByKey, keyed state, sliding windows, and
+ * PubSub topic ingestion.
+ *
+ * <p> This pipeline takes as input traffic sensor data from a PubSub topic, and analyzes it using
+ * SlidingWindows. For each window, it calculates the average speed over the window for some small
+ * set of predefined 'routes', and looks for 'slowdowns' in those routes. It uses keyed state to
+ * track slowdown information across successive sliding windows. It writes its results to a
+ * BigQuery table.
+ *
+ * <p> This pipeline expects input from
+ * <a href="https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher">
+ * this script</a>,
+ * which publishes traffic sensor data to a PubSub topic. After you've started this pipeline, start
+ * up the input generation script as per its instructions. The default SlidingWindow parameters
+ * assume that you're running this script without the {@literal --replay} flag, so that there are
+ * no simulated pauses in the sensor data publication.
+ *
+ * <p> To run this example using the Dataflow service, you must provide an input
+ * PubSub topic and an output BigQuery table, using the {@literal --inputTopic},
+ * {@literal --dataset}, and {@literal --table} options. Since this is a streaming
+ * pipeline that never completes, select the non-blocking pipeline runner by specifying
+ * {@literal --runner=DataflowPipelineRunner}.
+ *
+ * <p> When you are done running the example, cancel your pipeline so that you do not continue to
+ * be charged for its instances. You can do this by visiting
+ * https://console.developers.google.com/project/your-project-name/dataflow/job-id
+ * in the Developers Console. You should also terminate the generator script so that you do not
+ * use unnecessary PubSub quota.
+ */
+public class TrafficStreamingRoutes {
+  // Instantiate some small predefined San Diego routes to analyze
+  static Map<String, String> sdStations = buildStationInfo();
+  static final int WINDOW_DURATION = 3;  // Default sliding window duration in minutes
+  static final int WINDOW_SLIDE_EVERY = 1;  // Default window 'slide every' setting in minutes
+
+  /**
+   * This class holds information about a station reading's average speed.
+   */
+  @DefaultCoder(AvroCoder.class)
+  static class StationSpeed {
+    @Nullable String stationId;
+    @Nullable Double avgSpeed;
+
+    public StationSpeed() {}
+
+    public StationSpeed(String stationId, Double avgSpeed) {
+      this.stationId = stationId;
+      this.avgSpeed = avgSpeed;
+    }
+
+    public String getStationId() {
+      return this.stationId;
+    }
+    public Double getAvgSpeed() {
+      return this.avgSpeed;
+    }
+  }
+
+  /**
+   * This class holds information about a route's speed/slowdown.
+   */
+  @DefaultCoder(AvroCoder.class)
+  static class RouteInfo {
+    @Nullable String route;
+    @Nullable Double avgSpeed;
+    @Nullable Boolean slowdownEvent;
+
+
+    public RouteInfo() {}
+
+    public RouteInfo(String route, Double avgSpeed, Boolean slowdownEvent) {
+      this.route = route;
+      this.avgSpeed = avgSpeed;
+      this.slowdownEvent = slowdownEvent;
+    }
+
+    public String getRoute() {
+      return this.route;
+    }
+    public Double getAvgSpeed() {
+      return this.avgSpeed;
+    }
+    public Boolean getSlowdownEvent() {
+      return this.slowdownEvent;
+    }
+  }
+
+  /**
+   * Filter out readings for the stations along predefined 'routes', and output
+   * (station, speed info) keyed on route.
+   */
+  static class ExtractStationSpeedFn extends DoFn<String, KV<String, StationSpeed>> {
+    @Override
+    public void processElement(ProcessContext c) {
+      String[] items = c.element().split(",");
+      String stationId = items[1];
+      String stationType = items[4];
+      Double avgSpeed = tryDoubleParse(items[9]);
+      // For this analysis, use only 'main line' station types
+      if (stationType.equals("ML")) {
+        // For this simple example, filter out everything but some hardwired routes.
+        if (sdStations.containsKey(stationId)) {
+          StationSpeed stationSpeed = new StationSpeed(stationId, avgSpeed);
+          // The tuple key is the 'route' name stored in the 'sdStations' hash.
+          c.output(KV.of(sdStations.get(stationId), stationSpeed));
+        }
+      }
+    }
+  }
+
+  /*
+   * For a given route, track average speed for the window. Calculate whether traffic is currently
+   * slowing down, via a predefined threshold. Use keyed state to keep a count of the speed drops,
+   * with at least 3 in a row constituting a 'slowdown'.
+   * Note: these calculations are for example purposes only, and are unrealistic and oversimplified.
+   */
+  static class GatherStats extends DoFn<KV<String, Iterable<StationSpeed>>, KV<String, RouteInfo>>
+    implements DoFn.RequiresKeyedState {
+
+    static final int SLOWDOWN_THRESH = 67;
+    static final int SLOWDOWN_COUNT_CAP = 3;
+
+    @Override
+    public void processElement(ProcessContext c) throws IOException {
+      String route = c.element().getKey();
+      CodedTupleTag<Integer> tag = CodedTupleTag.of(route, BigEndianIntegerCoder.of());
+      // For the given key (a route), get the keyed state.
+      Integer slowdownCount = MoreObjects.firstNonNull(c.keyedState().lookup(tag), 0);
+      Double speedSum = 0.0;
+      Integer scount = 0;
+      Iterable<StationSpeed> infoList = c.element().getValue();
+      // For all stations in the route, sum (non-null) speeds. Keep a count of the non-null speeds.
+      for (StationSpeed item : infoList) {
+        Double speed = item.getAvgSpeed();
+        if (speed != null) {
+          speedSum += speed;
+          scount++;
+        }
+      }
+      // calculate average speed.
+      if (scount == 0) {
+        return;
+      }
+      Double speedAvg = speedSum / scount;
+      Boolean slowdownEvent = false;
+      if (speedAvg != null) {
+        // see if the speed falls below defined threshold. If it does, increment the count of
+        // slow readings, as retrieved from the keyed state, up to the defined cap.
+        if (speedAvg < SLOWDOWN_THRESH) {
+          if (slowdownCount < SLOWDOWN_COUNT_CAP) {
+            slowdownCount++;
+          }
+        } else if (slowdownCount > 0) {
+          // if speed is not below threshold, then decrement the count of slow readings.
+          slowdownCount--;
+        }
+        // if our count of slowdowns has reached its cap, we consider this a 'slowdown event'
+        if (slowdownCount >= SLOWDOWN_COUNT_CAP) {
+          slowdownEvent = true;
+        }
+      }
+      // store the new slowdownCount in the keyed state for the route key.
+      c.keyedState().store(tag, slowdownCount);
+      RouteInfo routeInfo = new RouteInfo(route, speedAvg, slowdownEvent);
+      c.output(KV.of(route, routeInfo));
+    }
+  }
+
+  /**
+   * Format the results of the slowdown calculations to a TableRow, to save to BigQuery.
+   */
+  static class FormatStatsFn extends DoFn<KV<String, RouteInfo>, TableRow> {
+    @Override
+    public void processElement(ProcessContext c) {
+      RouteInfo routeInfo = c.element().getValue();
+      TableRow row = new TableRow()
+          .set("avg_speed", routeInfo.getAvgSpeed())
+          .set("slowdown_event", routeInfo.getSlowdownEvent())
+          .set("route", c.element().getKey())
+          .set("window_timestamp", c.timestamp().toString());
+      c.output(row);
+    }
+
+    /** Defines the BigQuery schema used for the output. */
+    static TableSchema getSchema() {
+      List<TableFieldSchema> fields = new ArrayList<>();
+      fields.add(new TableFieldSchema().setName("route").setType("STRING"));
+      fields.add(new TableFieldSchema().setName("avg_speed").setType("FLOAT"));
+      fields.add(new TableFieldSchema().setName("slowdown_event").setType("BOOLEAN"));
+      fields.add(new TableFieldSchema().setName("window_timestamp").setType("TIMESTAMP"));
+      TableSchema schema = new TableSchema().setFields(fields);
+      return schema;
+    }
+  }
+
+  /**
+   * This PTransform extracts speed info from traffic station readings.
+   * It groups the readings by 'route' and analyzes traffic slowdown for that route, using keyed
+   * state to retain previous slowdown information. Then, it formats the results for BigQuery.
+   */
+  static class TrackSpeed extends PTransform<PCollection<String>, PCollection<TableRow>> {
+    @Override
+    public PCollection<TableRow> apply(PCollection<String> rows) {
+      // row... => <station route, station speed> ...
+      PCollection<KV<String, StationSpeed>> flowInfo = rows.apply(
+          ParDo.of(new ExtractStationSpeedFn()));
+
+      // Apply a GroupByKey transform to collect a list of all station
+      // readings for a given route.
+      PCollection<KV<String, Iterable<StationSpeed>>> timeGroup = flowInfo.apply(
+        GroupByKey.<String, StationSpeed>create());
+
+      // Analyze 'slowdown' over the route readings.
+      PCollection<KV<String, RouteInfo>> stats = timeGroup.apply(ParDo.of(new GatherStats()));
+
+      // Format the results for writing to BigQuery
+      PCollection<TableRow> results = stats.apply(
+          ParDo.of(new FormatStatsFn()));
+
+      return results;
+    }
+  }
+
+
+  /**
+  * Options supported by {@link TrafficStreamingRoutes}.
+  * <p>
+  * Inherits standard configuration options.
+  */
+  private interface TrafficStreamingRoutesOptions extends PipelineOptions {
+    @Description("Input PubSub topic")
+    @Validation.Required
+    String getInputTopic();
+    void setInputTopic(String value);
+
+    @Description("BigQuery dataset name")
+    @Validation.Required
+    String getDataset();
+    void setDataset(String value);
+
+    @Description("BigQuery table name")
+    @Validation.Required
+    String getTable();
+    void setTable(String value);
+
+    @Description("Numeric value of sliding window duration, in minutes")
+    @Default.Integer(WINDOW_DURATION)
+    Integer getWindowDuration();
+    void setWindowDuration(Integer value);
+
+    @Description("Numeric value of window 'slide every' setting, in minutes")
+    @Default.Integer(WINDOW_SLIDE_EVERY)
+    Integer getWindowSlideEvery();
+    void setWindowSlideEvery(Integer value);
+  }
+
+  /**
+   * Sets up and starts streaming pipeline.
+   */
+  public static void main(String[] args) {
+    TrafficStreamingRoutesOptions options = PipelineOptionsFactory.fromArgs(args)
+        .withValidation()
+        .as(TrafficStreamingRoutesOptions.class);
+    DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
+    dataflowOptions.setStreaming(true);
+
+    Pipeline pipeline = Pipeline.create(options);
+    TableReference tableRef = new TableReference();
+    tableRef.setProjectId(dataflowOptions.getProject());
+    tableRef.setDatasetId(options.getDataset());
+    tableRef.setTableId(options.getTable());
+    pipeline
+        .apply(PubsubIO.Read.topic(options.getInputTopic()))
+        /* map the incoming data stream into sliding windows.
+           The default window duration values work well if you're running the accompanying PubSub
+           generator script without the --replay flag, so that there are no simulated pauses in
+           the sensor data publication. You may want to adjust the values otherwise. */
+        .apply(Window.<String>into(SlidingWindows.of(
+            Duration.standardMinutes(options.getWindowDuration())).
+            every(Duration.standardMinutes(options.getWindowSlideEvery()))))
+        .apply(new TrackSpeed())
+        .apply(BigQueryIO.Write.to(tableRef)
+            .withSchema(FormatStatsFn.getSchema()));
+
+    /* When you are done running the example, cancel your pipeline so that you do not continue to
+       be charged for its instances. You can do this by visiting
+       https://console.developers.google.com/project/your-project-name/dataflow/job-id
+       in the Developers Console. You should also terminate the generator script so that you do not
+       use unnecessary PubSub quota. */
+    pipeline.run();
+  }
+
+  private static Double tryDoubleParse(String number) {
+    try {
+      return Double.parseDouble(number);
+    } catch (NumberFormatException e) {
+      return null;
+    }
+  }
+
+  /** Define some small hard-wired San Diego 'routes' to track based on sensor station ID. */
+  private static Map<String, String> buildStationInfo() {
+    Map<String, String> stations = new Hashtable<String, String>();
+      stations.put("1108413", "SDRoute1"); // from freeway 805 S
+      stations.put("1108699", "SDRoute2"); // from freeway 78 E
+      stations.put("1108702", "SDRoute2");
+    return stations;
+  }
+
+}
+

From dd3985db3bb763d1b44350a49689292d73958ab4 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Tue, 24 Feb 2015 15:21:01 -0800
Subject: [PATCH 0182/1541] Hashtag auto-completion streaming Dataflow example.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87098231
---
 .../cloud/dataflow/examples/AutoComplete.java | 465 ++++++++++++++++++
 .../dataflow/examples/AutoCompleteTest.java   | 181 +++++++
 .../dataflow/sdk/transforms/Combine.java      | 187 +++++++
 .../cloud/dataflow/sdk/transforms/Top.java    |   3 +-
 .../dataflow/sdk/transforms/CombineTest.java  |  42 ++
 5 files changed, 877 insertions(+), 1 deletion(-)
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
 create mode 100644 examples/src/test/java/com/google/cloud/dataflow/examples/AutoCompleteTest.java

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
new file mode 100644
index 0000000000000..70d638c0ca1ab
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
@@ -0,0 +1,465 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.api.services.datastore.DatastoreV1.Entity;
+import com.google.api.services.datastore.DatastoreV1.Key;
+import com.google.api.services.datastore.DatastoreV1.Value;
+import com.google.api.services.datastore.client.DatastoreHelper;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.DatastoreIO;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Filter;
+import com.google.cloud.dataflow.sdk.transforms.Flatten;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.Partition;
+import com.google.cloud.dataflow.sdk.transforms.Partition.PartitionFn;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.Top;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PBegin;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+
+import org.joda.time.Duration;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * An example that computes the most popular hash tags for a for every prefix,
+ * which can be used for auto-completion.
+ *
+ * <p> Concepts: Using the same pipeline in both streaming and batch, combiners,
+ *               composite transforms.
+ *
+ * <p> To execute this pipeline using the Dataflow service in batch mode,
+ * specify pipeline configuration:
+ *   --project=<PROJECT ID>
+ *   --stagingLocation=gs://<STAGING DIRECTORY>
+ *   --runner=[Blocking]DataflowPipelineRunner
+ *   --inputFile=gs://path/to/input*.txt
+ *   [--outputDataset=<DATASTORE DATASET ID>]
+ *
+ * <p> To execute this pipeline using the Dataflow service in streaming mode,
+ * specify pipeline configuration:
+ *   --project=<PROJECT ID>
+ *   --stagingLocation=gs://<STAGING DIRECTORY>
+ *   --runner=DataflowPipelineRunner
+ *   --inputTopic=/topics/someproject/sometopic
+ *   [--outputDataset=<DATASTORE DATASET ID>]
+ *
+ * <p> Which will update the datastore every 10 seconds based on the last 30 minutes
+ * of data received.
+ */
+public class AutoComplete {
+
+  /**
+   * A PTransform that takes as input a list of tokens and returns
+   * the most common tokens per prefix.
+   */
+  public static class ComputeTopCompletions
+      extends PTransform<PCollection<String>, PCollection<KV<String, List<CompletionCandidate>>>> {
+    private final int candidatesPerPrefix;
+    private final boolean recursive;
+
+    protected ComputeTopCompletions(int candidatesPerPrefix, boolean recursive) {
+      this.candidatesPerPrefix = candidatesPerPrefix;
+      this.recursive = recursive;
+    }
+
+    public static ComputeTopCompletions top(int candidatesPerPrefix, boolean recursive) {
+      return new ComputeTopCompletions(candidatesPerPrefix, recursive);
+    }
+
+    @Override
+    public PCollection<KV<String, List<CompletionCandidate>>> apply(PCollection<String> input) {
+      PCollection<CompletionCandidate> candidates = input
+        // First count how often each token appears.
+        .apply(new Count.PerElement<String>())
+
+        // Map the KV outputs of Count into our own CompletionCandiate class.
+        .apply(ParDo.of(
+            new DoFn<KV<String, Long>, CompletionCandidate>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                c.output(new CompletionCandidate(c.element().getKey(), c.element().getValue()));
+              }
+            }));
+
+      // Compute the top via either a flat or recursive algorithm.
+      if (recursive) {
+        return candidates
+          .apply(new ComputeTopRecursive(candidatesPerPrefix, 1))
+          .apply(Flatten.<KV<String, List<CompletionCandidate>>>pCollections());
+      } else {
+        return candidates
+          .apply(new ComputeTopFlat(candidatesPerPrefix, 1));
+      }
+    }
+  }
+
+  /**
+   * Lower latency, but more expensive.
+   */
+  private static class ComputeTopFlat
+      extends PTransform<PCollection<CompletionCandidate>,
+                         PCollection<KV<String, List<CompletionCandidate>>>> {
+
+    private final int candidatesPerPrefix;
+    private final int minPrefix;
+
+    public ComputeTopFlat(int candidatesPerPrefix, int minPrefix) {
+      this.candidatesPerPrefix = candidatesPerPrefix;
+      this.minPrefix = minPrefix;
+    }
+
+    @Override
+    public PCollection<KV<String, List<CompletionCandidate>>> apply(
+        PCollection<CompletionCandidate> input) {
+      return input
+        // For each completion candidate, map it to all prefixes.
+        .apply(ParDo.of(new AllPrefixes(minPrefix)))
+
+        // Find and return the top candiates for each prefix.
+        .apply(Top.<String, CompletionCandidate>largestPerKey(candidatesPerPrefix)
+               .withHotKeys(new HotKeySpread()));
+    }
+
+    private static class HotKeySpread implements SerializableFunction<String, Integer> {
+      @Override
+      public Integer apply(String input) {
+        return (int) Math.pow(4, 5 - input.length());
+      }
+    }
+  }
+
+  /**
+   * Cheaper but higher latency.
+   *
+   * <p> Returns two PCollections, the first is top prefixes of size greater
+   * than minPrefix, and the second is top prefixes of size exactly
+   * minPrefix.
+   */
+  private static class ComputeTopRecursive
+      extends PTransform<PCollection<CompletionCandidate>,
+                         PCollectionList<KV<String, List<CompletionCandidate>>>> {
+
+    private final int candidatesPerPrefix;
+    private final int minPrefix;
+
+    public ComputeTopRecursive(int candidatesPerPrefix, int minPrefix) {
+      this.candidatesPerPrefix = candidatesPerPrefix;
+      this.minPrefix = minPrefix;
+    }
+
+    private class KeySizePartitionFn implements PartitionFn<KV<String, List<CompletionCandidate>>> {
+      public int partitionFor(KV<String, List<CompletionCandidate>> elem, int numPartitions) {
+        return elem.getKey().length() > minPrefix ? 0 : 1;
+      }
+    }
+
+    private static class FlattenTops
+        extends DoFn<KV<String, List<CompletionCandidate>>, CompletionCandidate> {
+      public void processElement(ProcessContext c) {
+        for (CompletionCandidate cc : c.element().getValue()) {
+          c.output(cc);
+        }
+      }
+    }
+
+    public PCollectionList<KV<String, List<CompletionCandidate>>> apply(
+          PCollection<CompletionCandidate> input) {
+        if (minPrefix > 10) {
+          // Base case, partitioning to return the output in the expected format.
+          return input
+            .apply(new ComputeTopFlat(candidatesPerPrefix, minPrefix))
+            .apply(Partition.of(2, new KeySizePartitionFn()));
+        } else {
+          // If a candidate is in the top N for prefix a...b, it must also be in the top
+          // N for a...bX for every X, which is typlically a much smaller set to consider.
+          // First, compute the top candidate for prefixes of size at least minPrefix + 1.
+          PCollectionList<KV<String, List<CompletionCandidate>>> larger = input
+            .apply(new ComputeTopRecursive(candidatesPerPrefix, minPrefix + 1));
+          // Consider the top candidates for each prefix of length minPrefix + 1...
+          PCollection<KV<String, List<CompletionCandidate>>> small =
+            PCollectionList
+            .of(larger.get(1).apply(ParDo.of(new FlattenTops())))
+            // ...together with those (previously excluded) candidates of length
+            // exactly minPrefix...
+            .and(input.apply(Filter.by(new SerializableFunction<CompletionCandidate, Boolean>() {
+                    public Boolean apply(CompletionCandidate c) {
+                      return c.getValue().length() == minPrefix;
+                    }
+                  })))
+            .apply(Flatten.<CompletionCandidate>pCollections())
+            // ...set the key to be the minPrefix-length prefix...
+            .apply(ParDo.of(new AllPrefixes(minPrefix, minPrefix)))
+            // ...and (re)apply the Top operator to all of them together.
+            .apply(Top.<String, CompletionCandidate>largestPerKey(candidatesPerPrefix));
+          return PCollectionList
+            .of(larger.apply(Flatten.<KV<String, List<CompletionCandidate>>>pCollections()))
+            .and(small);
+        }
+    }
+  }
+
+  /**
+   * A DoFn that keys each candidate by all its prefixes.
+   */
+  private static class AllPrefixes
+      extends DoFn<CompletionCandidate, KV<String, CompletionCandidate>> {
+    private final int minPrefix;
+    private final int maxPrefix;
+    public AllPrefixes(int minPrefix) {
+      this(minPrefix, Integer.MAX_VALUE);
+    }
+    public AllPrefixes(int minPrefix, int maxPrefix) {
+      this.minPrefix = minPrefix;
+      this.maxPrefix = maxPrefix;
+    }
+    @Override
+      public void processElement(ProcessContext c) {
+      String word = c.element().value;
+      for (int i = minPrefix; i <= Math.min(word.length(), maxPrefix); i++) {
+        c.output(KV.of(word.substring(0, i), c.element()));
+      }
+    }
+  }
+
+  /**
+   * Class used to store tag-count pairs.
+   */
+  @DefaultCoder(AvroCoder.class)
+  static class CompletionCandidate implements Comparable<CompletionCandidate> {
+    private long count;
+    private String value;
+
+    public CompletionCandidate(String value, long count) {
+      this.value = value;
+      this.count = count;
+    }
+
+    public long getCount() {
+      return count;
+    }
+
+    public String getValue() {
+      return value;
+    }
+
+    // Empty constructor required for Avro decoding.
+    @SuppressWarnings("unused")
+    public CompletionCandidate() {}
+
+    @Override
+    public int compareTo(CompletionCandidate o) {
+      if (this.count < o.count) {
+        return -1;
+      } else if (this.count == o.count) {
+        return this.value.compareTo(o.value);
+      } else {
+        return 1;
+      }
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (other instanceof CompletionCandidate) {
+        CompletionCandidate that = (CompletionCandidate) other;
+        return this.count == that.count && this.value.equals(that.value);
+      } else {
+        return false;
+      }
+    }
+
+    @Override
+    public String toString() {
+      return "CompletionCandidate[" + value + ", " + count + "]";
+    }
+  }
+
+  /**
+   * Takes as input a set of strings, and emits each #hashtag found therein.
+   */
+  static class ExtractHashtags extends DoFn<String, String> {
+    public void processElement(ProcessContext c) {
+      Matcher m = Pattern.compile("#\\S+").matcher(c.element());
+      while (m.find()) {
+        c.output(m.group().substring(1));
+      }
+    }
+  }
+
+  static class FormatForBigquery extends DoFn<KV<String, List<CompletionCandidate>>, TableRow> {
+    public void processElement(ProcessContext c) {
+      List<TableRow> completions = new ArrayList<>();
+      for (CompletionCandidate cc : c.element().getValue()) {
+        completions.add(new TableRow()
+          .set("count", cc.getCount())
+          .set("tag", cc.getValue()));
+      }
+      TableRow row = new TableRow()
+        .set("prefix", c.element().getKey())
+        .set("tags", completions);
+      c.output(row);
+    }
+  }
+
+  /**
+   * Takes as input a the top candidates per prefix, and emits an entity
+   * suitable for writing to Datastore.
+   */
+  static class FormatForDatastore extends DoFn<KV<String, List<CompletionCandidate>>, Entity> {
+    private String kind;
+
+    public FormatForDatastore(String kind) {
+      this.kind = kind;
+    }
+
+    public void processElement(ProcessContext c) {
+      Entity.Builder entityBuilder = Entity.newBuilder();
+      // Create entities with same ancestor Key.???
+      Key ancestorKey = DatastoreHelper.makeKey(kind, "root").build();
+      Key key = DatastoreHelper.makeKey(ancestorKey, c.element().getKey()).build();
+
+      entityBuilder.setKey(key);
+      List<Value> candidates = new ArrayList<>();
+      for (CompletionCandidate tag : c.element().getValue()) {
+        Entity.Builder tagEntity = Entity.newBuilder();
+        tagEntity.addProperty(
+            DatastoreHelper.makeProperty("tag", DatastoreHelper.makeValue(tag.value)));
+        tagEntity.addProperty(
+            DatastoreHelper.makeProperty("count", DatastoreHelper.makeValue(tag.count)));
+        candidates.add(DatastoreHelper.makeValue(tagEntity).build());
+      }
+      entityBuilder.addProperty(
+          DatastoreHelper.makeProperty("candidates", DatastoreHelper.makeValue(candidates)));
+      c.output(entityBuilder.build());
+    }
+  }
+
+  /**
+   * Options supported by this class.
+   *
+   * <p> Inherits standard Dataflow configuration options.
+   */
+  private static interface Options extends PipelineOptions {
+    @Description("Input text file")
+    String getInputFile();
+    void setInputFile(String value);
+
+    @Description("Input Pubsub topic")
+    String getInputTopic();
+    void setInputTopic(String value);
+
+    @Description("Whether to use the recursive algorithm")
+    @Default.Boolean(true)
+    Boolean getRecursive();
+    void setRecursive(Boolean value);
+
+    @Description("BigQuery table to write to, specified as "
+                 + "<project_id>:<dataset_id>.<table_id>. The dataset must already exist.")
+    String getOutputBigqueryTable();
+    void setOutputBigqueryTable(String value);
+
+    @Description("Dataset entity kind")
+    @Default.String("autocomplete-demo")
+    String getKind();
+    void setKind(String value);
+
+    @Description("Dataset ID to write to in datastore")
+    String getOutputDataset();
+    void setOutputDataset(String value);
+  }
+
+  public static void main(String[] args) {
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+
+    // We support running the same pipeline in either
+    // batch or windowed streaming mode.
+    PTransform<? super PBegin, PCollection<String>> readSource;
+    WindowFn<Object, ?> windowFn;
+    if (options.getInputFile() != null) {
+      readSource = TextIO.Read.from(options.getInputFile());
+      windowFn = new GlobalWindows();
+    } else {
+      DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
+      dataflowOptions.setStreaming(true);
+      readSource = PubsubIO.Read.topic(options.getInputTopic());
+      windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5));
+    }
+
+    // Create the pipeline.
+    Pipeline p = Pipeline.create(options);
+    PCollection<KV<String, List<CompletionCandidate>>> toWrite = p
+      .apply(readSource)
+      .apply(ParDo.of(new ExtractHashtags()))
+      .apply(Window.<String>into(windowFn))
+      .apply(ComputeTopCompletions.top(10, options.getRecursive()));
+
+    // Optionally write the result out to bigquery...
+    if (options.getOutputBigqueryTable() != null) {
+      List<TableFieldSchema> tagFields = new ArrayList<>();
+      tagFields.add(new TableFieldSchema().setName("count").setType("INTEGER"));
+      tagFields.add(new TableFieldSchema().setName("tag").setType("STRING"));
+      List<TableFieldSchema> fields = new ArrayList<>();
+      fields.add(new TableFieldSchema().setName("prefix").setType("STRING"));
+      fields.add(new TableFieldSchema().setName("tags").setType("RECORD").setFields(tagFields));
+      TableSchema schema = new TableSchema().setFields(fields);
+
+      toWrite
+        .apply(ParDo.of(new FormatForBigquery()))
+        .apply(BigQueryIO.Write
+               .to(options.getOutputBigqueryTable())
+               .withSchema(schema)
+               .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
+               .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
+    }
+
+    // ...and to Datastore.
+    if (options.getOutputDataset() != null) {
+      toWrite
+        .apply(ParDo.of(new FormatForDatastore(options.getKind())))
+        .apply(DatastoreIO.write().to(options.getOutputDataset()));
+    }
+
+    // Run the pipeline.
+    p.run();
+  }
+}
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/AutoCompleteTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/AutoCompleteTest.java
new file mode 100644
index 0000000000000..7f72095ed911c
--- /dev/null
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/AutoCompleteTest.java
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.cloud.dataflow.examples.AutoComplete.CompletionCandidate;
+import com.google.cloud.dataflow.examples.AutoComplete.ComputeTopCompletions;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.ProcessContext;
+import com.google.cloud.dataflow.sdk.transforms.Filter;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * Tests of AutoComplete.
+ */
+@RunWith(Parameterized.class)
+public class AutoCompleteTest implements Serializable {
+
+  private boolean recursive;
+
+  public AutoCompleteTest(Boolean recursive) {
+    this.recursive = recursive;
+  }
+
+  @Parameterized.Parameters
+  public static Collection<Object[]> primeNumbers() {
+    return Arrays.asList(new Object[][] {
+        { true },
+        { false }
+      });
+  }
+
+  @Test
+  public void testAutoComplete() {
+    List<String> words = Arrays.asList(
+        "apple",
+        "apple",
+        "apricot",
+        "banana",
+        "blackberry",
+        "blackberry",
+        "blackberry",
+        "blueberry",
+        "blueberry",
+        "cherry");
+
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of(words));
+
+    PCollection<KV<String, List<CompletionCandidate>>> output =
+      input.apply(new ComputeTopCompletions(2, recursive))
+           .apply(Filter.by(
+                        new SerializableFunction<KV<String, List<CompletionCandidate>>, Boolean>() {
+                          public Boolean apply(KV<String, List<CompletionCandidate>> element) {
+                            return element.getKey().length() <= 2;
+                          }
+                      }));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("a", parseList("apple:2", "apricot:1")),
+        KV.of("ap", parseList("apple:2", "apricot:1")),
+        KV.of("b", parseList("blackberry:3", "blueberry:2")),
+        KV.of("ba", parseList("banana:1")),
+        KV.of("bl", parseList("blackberry:3", "blueberry:2")),
+        KV.of("c", parseList("cherry:1")),
+        KV.of("ch", parseList("cherry:1")));
+    p.run();
+  }
+
+  @Test
+  public void testTinyAutoComplete() {
+    List<String> words = Arrays.asList("x", "x", "x", "xy", "xy", "xyz");
+
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of(words));
+
+    PCollection<KV<String, List<CompletionCandidate>>> output =
+      input.apply(new ComputeTopCompletions(2, recursive));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("x", parseList("x:3", "xy:2")),
+        KV.of("xy", parseList("xy:2", "xyz:1")),
+        KV.of("xyz", parseList("xyz:1")));
+    p.run();
+  }
+
+  @Test
+  public void testWindowedAutoComplete() {
+    List<TimestampedValue<String>> words = Arrays.asList(
+        TimestampedValue.of("xA", new Instant(1)),
+        TimestampedValue.of("xA", new Instant(1)),
+        TimestampedValue.of("xB", new Instant(1)),
+        TimestampedValue.of("xB", new Instant(2)),
+        TimestampedValue.of("xB", new Instant(2)));
+
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = p
+      .apply(Create.of(words))
+      .apply(new ReifyTimestamps<String>());
+
+    PCollection<KV<String, List<CompletionCandidate>>> output =
+      input.apply(Window.<String>into(SlidingWindows.of(new Duration(2))))
+           .apply(new ComputeTopCompletions(2, recursive));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        // Window [0, 2)
+        KV.of("x", parseList("xA:2", "xB:1")),
+        KV.of("xA", parseList("xA:2")),
+        KV.of("xB", parseList("xB:1")),
+
+        // Window [1, 3)
+        KV.of("x", parseList("xB:3", "xA:2")),
+        KV.of("xA", parseList("xA:2")),
+        KV.of("xB", parseList("xB:3")),
+
+        // Window [2, 3)
+        KV.of("x", parseList("xB:2")),
+        KV.of("xB", parseList("xB:2")));
+    p.run();
+  }
+
+  private static List<CompletionCandidate> parseList(String... entries) {
+    List<CompletionCandidate> all = new ArrayList<>();
+    for (String s : entries) {
+      String[] countValue = s.split(":");
+      all.add(new CompletionCandidate(countValue[0], Integer.valueOf(countValue[1])));
+    }
+    return all;
+  }
+
+  private static class ReifyTimestamps<T>
+      extends PTransform<PCollection<TimestampedValue<T>>, PCollection<T>> {
+    public PCollection<T> apply(PCollection<TimestampedValue<T>> input) {
+      return input.apply(ParDo.of(new DoFn<TimestampedValue<T>, T>() {
+        @Override
+        public void processElement(ProcessContext c) {
+          c.outputWithTimestamp(c.element().getValue(), c.element().getTimestamp());
+        }
+      }));
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 1c6841fca04f6..e078bd7ad0e8d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -20,11 +20,17 @@
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.cloud.dataflow.sdk.values.TupleTagList;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterables;
 
@@ -910,6 +916,39 @@ private PerKey(
       this.fn = fn;
     }
 
+    /**
+     * If a single key has disproportionately many values, it may become a
+     * bottleneck, especially in streaming mode.  This returns a new per-key
+     * combining transform that inserts an intermediate node to combine "hot"
+     * keys partially before performing the full combine.
+     *
+     * @param hotKeySpread a function from keys to an integer N, where the key
+     * will be spread among N intermediate nodes for partial combining.
+     * If N is less than or equal to 1, this key will not be sent through an
+     * intermediate node.
+     */
+    public PerKeyWithHotKeys<K, VI, VO> withHotKeys(
+        SerializableFunction<? super K, Integer> hotKeySpread) {
+      return new PerKeyWithHotKeys<K, VI, VO>(fn, hotKeySpread).withName(name);
+    }
+
+    /**
+     * Like {@link #withHotKeys(SerializableFunction)}, but returning the given
+     * constant value for every key.
+     */
+    public PerKeyWithHotKeys<K, VI, VO> withHotKeys(final int hotKeySpread) {
+      return withHotKeys(
+          new SerializableFunction<K, Integer>(){
+            @Override public Integer apply(K unused) { return hotKeySpread; }
+          });
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    public PerKey<K, VI, VO> withName(String name) {
+      return (PerKey<K, VI, VO>) super.withName(name);
+    }
+
     /**
      * Returns the KeyedCombineFn used by this Combine operation.
      */
@@ -930,6 +969,154 @@ protected String getKindString() {
     }
   }
 
+  /**
+   * Like {@link PerKey}, but sharding the combining of hot keys.
+   */
+  public static class PerKeyWithHotKeys<K, VI, VO>
+      extends PTransform<PCollection<KV<K, VI>>, PCollection<KV<K, VO>>> {
+
+    private final transient KeyedCombineFn<? super K, ? super VI, ?, VO> fn;
+    private final SerializableFunction<? super K, Integer> hotKeySpread;
+
+    private PerKeyWithHotKeys(
+        KeyedCombineFn<? super K, ? super VI, ?, VO> fn,
+        SerializableFunction<? super K, Integer> hotKeySpread) {
+      this.fn = fn;
+      this.hotKeySpread = hotKeySpread;
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    public PerKeyWithHotKeys<K, VI, VO> withName(String name) {
+      return (PerKeyWithHotKeys<K, VI, VO>) super.withName(name);
+    }
+
+    @Override
+    public PCollection<KV<K, VO>> apply(PCollection<KV<K, VI>> input) {
+      return applyHelper(input);
+    }
+
+    private <VA> PCollection<KV<K, VO>> applyHelper(PCollection<KV<K, VI>> input) {
+      // Name the accumulator type.
+      @SuppressWarnings("unchecked")
+      final KeyedCombineFn<K, VI, VA, VO> fn = (KeyedCombineFn<K, VI, VA, VO>) this.fn;
+
+      // A CombineFn's mergeAccumulator can be applied in a tree-like fashon.
+      // Here we shard the key using an integer nonce, combine on that partial
+      // set of values, then drop the nonce and do a final combine of the
+      // aggregates.  We do this by splitting the original CombineFn into two,
+      // on that does addInput + merge and another that does merge + extract.
+      KeyedCombineFn<KV<K, Integer>, VI, VA, VA> hotPreCombine =
+          new KeyedCombineFn<KV<K, Integer>, VI, VA, VA>() {
+            @Override
+            public VA createAccumulator(KV<K, Integer> key) {
+              return fn.createAccumulator(key.getKey());
+            }
+            @Override
+            public void addInput(KV<K, Integer> key, VA accumulator, VI value) {
+              fn.addInput(key.getKey(), accumulator, value);
+            }
+            @Override
+            public VA mergeAccumulators(KV<K, Integer> key, Iterable<VA> accumulators) {
+              return fn.mergeAccumulators(key.getKey(), accumulators);
+            }
+            @Override
+            public VA extractOutput(KV<K, Integer> key, VA accumulator) {
+              return accumulator;
+            }
+            @Override
+            @SuppressWarnings("unchecked")
+            public Coder<VA> getAccumulatorCoder(
+                CoderRegistry registry, Coder<KV<K, Integer>> keyCoder, Coder<VI> inputCoder) {
+              return fn.getAccumulatorCoder(
+                  registry, ((KvCoder<K, Integer>) keyCoder).getKeyCoder(), inputCoder);
+            }
+      };
+
+      @SuppressWarnings("unchecked")
+      final KvCoder<K, VI> inputCoder = ((KvCoder<K, VI>) input.getCoder());
+      // List required because the accumulator must be mutable.
+      KeyedCombineFn<K, VA, List<VA>, VO> hotPostCombine =
+          new KeyedCombineFn<K, VA, List<VA>, VO>() {
+            @Override
+            public List<VA> createAccumulator(K key) {
+              return new ArrayList<>();
+            }
+            @Override
+            public void addInput(K key, List<VA> accumulator, VA value) {
+              VA merged = fn.mergeAccumulators(
+                  key, Iterables.concat(accumulator, ImmutableList.of(value)));
+              accumulator.clear();
+              accumulator.add(merged);
+            }
+            @Override
+            public List<VA> mergeAccumulators(K key, Iterable<List<VA>> accumulators) {
+              List<VA> singleton = new ArrayList<>();
+              singleton.add(fn.mergeAccumulators(key, Iterables.concat(accumulators)));
+              return singleton;
+            }
+            @Override
+            public VO extractOutput(K key, List<VA> accumulator) {
+              return fn.extractOutput(key, fn.mergeAccumulators(key, accumulator));
+            }
+            @Override
+            public Coder<VO> getDefaultOutputCoder(
+                CoderRegistry registry, Coder<K> keyCoder, Coder<VA> accumulatorCoder) {
+              return fn.getDefaultOutputCoder(registry, keyCoder, inputCoder.getValueCoder());
+            }
+      };
+
+      // Use the provided hotKeySpread fn to split into "hot" and "cold" keys,
+      // augmenting the hot keys with a nonce.
+      final TupleTag<KV<KV<K, Integer>, VI>> hot = new TupleTag<>();
+      final TupleTag<KV<K, VI>> cold = new TupleTag<>();
+      PCollectionTuple split = input.apply(
+          ParDo.of(new DoFn<KV<K, VI>, KV<K, VI>>(){
+                     int counter = 0;
+                     @Override
+                     public void processElement(ProcessContext c) {
+                       KV<K, VI> kv = c.element();
+                       int spread = hotKeySpread.apply(kv.getKey());
+                       if (spread <= 1) {
+                         c.output(kv);
+                       } else {
+                         int nonce = counter++ % spread;
+                         c.sideOutput(hot, KV.of(KV.of(kv.getKey(), nonce), kv.getValue()));
+                       }
+                     }
+                   })
+               .withOutputTags(cold, TupleTagList.of(hot)));
+
+      // Combine the hot and cold keys separately.
+      PCollection<KV<K, VO>> combinedHot = split
+          .get(hot)
+          .setCoder(KvCoder.of(KvCoder.of(inputCoder.getKeyCoder(), VarIntCoder.of()),
+                               inputCoder.getValueCoder()))
+          .apply(Combine.perKey(hotPreCombine))
+          .apply(ParDo.of(
+              new DoFn<KV<KV<K, Integer>, VA>, KV<K, VA>>() {
+                @Override
+                public void processElement(ProcessContext c) {
+                  c.output(KV.of(c.element().getKey().getKey(), c.element().getValue()));
+                }
+              }))
+          .apply(Combine.perKey(hotPostCombine));
+      PCollection<KV<K, VO>> combinedCold = split
+          .get(cold)
+          .setCoder(inputCoder)
+          .apply(Combine.perKey(fn));
+
+      // Return the union of the hot and cold key results.
+      return PCollectionList.of(combinedHot).and(combinedCold)
+          .apply(Flatten.<KV<K, VO>>pCollections());
+    }
+
+    @Override
+    protected String getKindString() {
+      return "Combine.PerKey";
+    }
+  }
+
 
   /////////////////////////////////////////////////////////////////////////////
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index bc0270f01bce7..cc6ee9d7b889b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.coders.CustomCoder;
 import com.google.cloud.dataflow.sdk.coders.ListCoder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Combine.PerKey;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -303,7 +304,7 @@ PTransform<PCollection<T>, PCollection<List<T>>> largest(int count) {
    * which take a {@code PCollection} and return the top elements.
    */
   public static <K, V extends Comparable<V>>
-      PTransform<PCollection<KV<K, V>>, PCollection<KV<K, List<V>>>>
+      PerKey<K, V, List<V>>
       largestPerKey(int count) {
     return Combine.perKey(
         new TopCombineFn<>(count, new Largest<V>()).<K>asKeyedFn())
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 284ae327eabcf..2dec13aa10afb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -36,6 +36,7 @@
 import com.google.cloud.dataflow.sdk.runners.RecordingPipelineVisitor;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
@@ -312,6 +313,36 @@ counter.new Counter(8, 2, 0, 0),
         counter.new Counter(1, 1, 0, 0)));
   }
 
+  private static final SerializableFunction<String, Integer> hotKeySpread =
+      new SerializableFunction<String, Integer>() {
+        @Override
+        public Integer apply(String input) {
+          return input.equals("a") ? 3 : 0;
+        }
+      };
+
+  @Test
+  public void testHotKeyCombining() {
+    Pipeline p = TestPipeline.create();
+    PCollection<KV<String, Integer>> input = copy(createInput(p, TABLE), 10);
+
+    KeyedCombineFn<String, Integer, ?, Double> mean =
+        new MeanInts().<String>asKeyedFn();
+    PCollection<KV<String, Double>> coldMean = input.apply(
+        Combine.perKey(mean).withHotKeys(0));
+    PCollection<KV<String, Double>> warmMean = input.apply(
+        Combine.perKey(mean).withHotKeys(hotKeySpread));
+    PCollection<KV<String, Double>> hotMean = input.apply(
+        Combine.perKey(mean).withHotKeys(5));
+
+    List<KV<String, Double>> expected = Arrays.asList(KV.of("a", 2.0), KV.of("b", 7.0));
+    DataflowAssert.that(coldMean).containsInAnyOrder(expected);
+    DataflowAssert.that(warmMean).containsInAnyOrder(expected);
+    DataflowAssert.that(hotMean).containsInAnyOrder(expected);
+
+    p.run();
+  }
+
   ////////////////////////////////////////////////////////////////////////////
   // Test classes, for different kinds of combining fns.
 
@@ -588,4 +619,15 @@ public Coder<Counter> getAccumulatorCoder(
       return SerializableCoder.of(Counter.class);
     }
   }
+
+  private static <T> PCollection<T> copy(PCollection<T> pc, final int n) {
+    return pc.apply(ParDo.of(new DoFn<T, T>() {
+      @Override
+      public void processElement(ProcessContext c) throws Exception {
+        for (int i = 0; i < n; i++) {
+          c.output(c.element());
+        }
+      }
+    }));
+  }
 }

From 7c83f0dfd40e24fb2b8739b3f874b8fc2c44b649 Mon Sep 17 00:00:00 2001
From: vanya <vanya@google.com>
Date: Tue, 24 Feb 2015 15:30:36 -0800
Subject: [PATCH 0183/1541] Throw an exception when we get an unexpected End of
 Stream marker from HttpClient.

This fixes potential data loss when reading from GCS and the connection gets prematurely terminated. Unfortunately it's impossible to make it work for compressed streams at this level, since we can't get a proper count of transferred bytes.

Root cause of the problem: JDK ignores incomplete HTTP responses when Content-Length is specified. Chunked encoded streams work fine and unexpected termination is being detected.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87099191
---
 .../gcsio/GoogleCloudStorageReadChannel.java  | 25 ++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
index 61b931561c370..13c3dc427eaab 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
@@ -78,6 +78,7 @@ public class GoogleCloudStorageReadChannel implements SeekableByteChannel {
 
   // Size of the object being read.
   private long size = -1;
+  private boolean isCompressedStream;
 
   // Maximum number of automatic retries when reading from the underlying channel without making
   // progress; each time at least one byte is successfully read, the counter of attempted retries
@@ -256,6 +257,12 @@ public int read(ByteBuffer buffer)
         int numBytesRead = readChannel.read(buffer);
         Preconditions.checkState(numBytesRead != 0, "Read 0 bytes without blocking!");
         if (numBytesRead < 0) {
+          // Check that we didn't get a premature End of Stream signal by checking the number of
+          // bytes read against the stream size. Unfortunately we don't have information about the
+          // actual size of the data stream when stream compression is used, so we can only ignore
+          // this case here.
+          Preconditions.checkState(isCompressedStream || currentPosition == size,
+              "Received end of stream result before all the file data has been received");
           break;
         }
         totalBytesRead += numBytesRead;
@@ -347,7 +354,17 @@ public int read(ByteBuffer buffer)
     // If this method was called when the stream was already at EOF
     // (indicated by totalBytesRead == 0) then return EOF else,
     // return the number of bytes read.
-    return (totalBytesRead == 0) ? -1 : totalBytesRead;
+    boolean isEndOfStream = (totalBytesRead == 0);
+    if (isEndOfStream) {
+      // Check that we didn't get a premature End of Stream signal by checking the number of bytes
+      // read against the stream size. Unfortunately we don't have information about the actual size
+      // of the data stream when stream compression is used, so we can only ignore this case here.
+      Preconditions.checkState(isCompressedStream || currentPosition == size,
+          "Failed to read any data before all the file data has been received");
+      return -1;
+    } else {
+      return totalBytesRead;
+    }
   }
 
   @Override
@@ -531,6 +548,12 @@ protected InputStream openStreamAndSetSize(long newPosition)
       }
     }
 
+    // If the content is compressed, content length reported in the header is counting the number of
+    // compressed bytes. That means that we cannot rely on the reported content length to check that
+    // we have received all the data from the data stream.
+    String contentEncoding = response.getContentEncoding();
+    isCompressedStream = (contentEncoding != null && contentEncoding.contains("gzip"));
+
     String contentRange = response.getHeaders().getContentRange();
     if (response.getHeaders().getContentLength() != null) {
       size = response.getHeaders().getContentLength() + newPosition;

From 2203745152c6c7387abbb1897fa3f4f42f4738f1 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Tue, 24 Feb 2015 15:55:20 -0800
Subject: [PATCH 0184/1541] Add AvroIO validation to SDK.  This enables the
 service to check permissions on the inputs and outputs for AvroIO when a
 Dataflow is submitted.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87101426
---
 .../google/cloud/dataflow/sdk/io/AvroIO.java  | 97 ++++++++++++++++---
 .../runners/dataflow/AvroIOTranslator.java    |  2 +
 .../cloud/dataflow/sdk/io/AvroIOTest.java     | 16 +++
 3 files changed, 99 insertions(+), 16 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index 2eaf60343e5f6..d7c2f1ef47da3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -171,6 +171,18 @@ public static Bound<GenericRecord> withSchema(String schema) {
       return withSchema((new Schema.Parser()).parse(schema));
     }
 
+    /**
+     * Returns a AvroIO.Read PTransform that has GCS path validation on
+     * pipeline creation disabled.
+     *
+     * <p> This can be useful in the case where the GCS input location does
+     * not exist at the pipeline creation time, but is expected to be available
+     * at execution time.
+     */
+    public static Bound<GenericRecord> withoutValidation() {
+      return new Bound<>(GenericRecord.class).withoutValidation();
+    }
+
     /**
      * A PTransform that reads from an Avro file (or multiple Avro
      * files matching a pattern) and returns a bounded PCollection containing
@@ -190,16 +202,19 @@ public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
       /** The schema of the input file. */
       @Nullable
       final Schema schema;
+      /** An option to indicate if input validation is desired. Default is true. */
+      final boolean validate;
 
       Bound(Class<T> type) {
-        this(null, null, type, null);
+        this(null, null, type, null, true);
       }
 
-      Bound(String name, String filepattern, Class<T> type, Schema schema) {
+      Bound(String name, String filepattern, Class<T> type, Schema schema, boolean validate) {
         super(name);
         this.filepattern = filepattern;
         this.type = type;
         this.schema = schema;
+        this.validate = validate;
       }
 
       /**
@@ -207,7 +222,7 @@ public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
        * with the given step name.  Does not modify this object.
        */
       public Bound<T> named(String name) {
-        return new Bound<>(name, filepattern, type, schema);
+        return new Bound<>(name, filepattern, type, schema, validate);
       }
 
       /**
@@ -217,7 +232,7 @@ public Bound<T> named(String name) {
        * filepatterns.)  Does not modify this object.
        */
       public Bound<T> from(String filepattern) {
-        return new Bound<>(name, filepattern, type, schema);
+        return new Bound<>(name, filepattern, type, schema, validate);
       }
 
       /**
@@ -229,7 +244,7 @@ public Bound<T> from(String filepattern) {
        * the resulting PCollection
        */
       public <T1> Bound<T1> withSchema(Class<T1> type) {
-        return new Bound<>(name, filepattern, type, ReflectData.get().getSchema(type));
+        return new Bound<>(name, filepattern, type, ReflectData.get().getSchema(type), validate);
       }
 
       /**
@@ -238,7 +253,7 @@ public <T1> Bound<T1> withSchema(Class<T1> type) {
        * Does not modify this object.
        */
       public Bound<GenericRecord> withSchema(Schema schema) {
-        return new Bound<>(name, filepattern, GenericRecord.class, schema);
+        return new Bound<>(name, filepattern, GenericRecord.class, schema, validate);
       }
 
       /**
@@ -250,6 +265,19 @@ public Bound<GenericRecord> withSchema(String schema) {
         return withSchema((new Schema.Parser()).parse(schema));
       }
 
+      /**
+       * Returns a new TextIO.Read PTransform that's like this one but
+       * that has GCS input path validation on pipeline creation disabled.
+       * Does not modify this object.
+       *
+       * <p> This can be useful in the case where the GCS input location does
+       * not exist at the pipeline creation time, but is expected to be
+       * available at execution time.
+       */
+      public Bound<T> withoutValidation() {
+        return new Bound<>(name, filepattern, type, schema, false);
+      }
+
       @Override
       public PCollection<T> apply(PInput input) {
         if (filepattern == null) {
@@ -285,6 +313,10 @@ public Schema getSchema() {
         return schema;
       }
 
+      public boolean needsValidation() {
+        return validate;
+      }
+
       static {
         DirectPipelineRunner.registerDefaultTransformEvaluator(
             Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
@@ -398,6 +430,18 @@ public static Bound<GenericRecord> withSchema(String schema) {
       return withSchema((new Schema.Parser()).parse(schema));
     }
 
+    /**
+     * Returns a AvroIO.Write PTransform that has GCS path validation on
+     * pipeline creation disabled.
+     *
+     * <p> This can be useful in the case where the GCS output location does
+     * not exist at the pipeline creation time, but is expected to be available
+     * at execution time.
+     */
+    public static Bound<GenericRecord> withoutValidation() {
+      return new Bound<>(GenericRecord.class).withoutValidation();
+    }
+
     /**
      * A PTransform that writes a bounded PCollection to an Avro file (or
      * multiple Avro files matching a sharding pattern).
@@ -421,13 +465,15 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       /** The schema of the output file. */
       @Nullable
       final Schema schema;
+      /** An option to indicate if output validation is desired. Default is true. */
+      final boolean validate;
 
       Bound(Class<T> type) {
-        this(null, null, "", 0, ShardNameTemplate.INDEX_OF_MAX, type, null);
+        this(null, null, "", 0, ShardNameTemplate.INDEX_OF_MAX, type, null, true);
       }
 
       Bound(String name, String filenamePrefix, String filenameSuffix, int numShards,
-          String shardTemplate, Class<T> type, Schema schema) {
+          String shardTemplate, Class<T> type, Schema schema, boolean validate) {
         super(name);
         this.filenamePrefix = filenamePrefix;
         this.filenameSuffix = filenameSuffix;
@@ -435,6 +481,7 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
         this.shardTemplate = shardTemplate;
         this.type = type;
         this.schema = schema;
+        this.validate = validate;
       }
 
       /**
@@ -443,7 +490,7 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
        */
       public Bound<T> named(String name) {
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema);
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, validate);
       }
 
       /**
@@ -457,7 +504,7 @@ public Bound<T> named(String name) {
       public Bound<T> to(String filenamePrefix) {
         validateOutputComponent(filenamePrefix);
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema);
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, validate);
       }
 
       /**
@@ -471,7 +518,7 @@ public Bound<T> to(String filenamePrefix) {
       public Bound<T> withSuffix(String filenameSuffix) {
         validateOutputComponent(filenameSuffix);
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema);
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, validate);
       }
 
       /**
@@ -491,7 +538,7 @@ public Bound<T> withSuffix(String filenameSuffix) {
       public Bound<T> withNumShards(int numShards) {
         Preconditions.checkArgument(numShards >= 0);
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema);
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, validate);
       }
 
       /**
@@ -504,7 +551,7 @@ public Bound<T> withNumShards(int numShards) {
        */
       public Bound<T> withShardNameTemplate(String shardTemplate) {
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema);
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, validate);
       }
 
       /**
@@ -517,7 +564,7 @@ public Bound<T> withShardNameTemplate(String shardTemplate) {
        * <p> Does not modify this object.
        */
       public Bound<T> withoutSharding() {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, 1, "", type, schema);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, 1, "", type, schema, validate);
       }
 
       /**
@@ -529,7 +576,7 @@ public Bound<T> withoutSharding() {
        */
       public <T1> Bound<T1> withSchema(Class<T1> type) {
         return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type,
-            ReflectData.get().getSchema(type));
+            ReflectData.get().getSchema(type), validate);
       }
 
       /**
@@ -539,7 +586,7 @@ public <T1> Bound<T1> withSchema(Class<T1> type) {
        */
       public Bound<GenericRecord> withSchema(Schema schema) {
         return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
-            GenericRecord.class, schema);
+            GenericRecord.class, schema, validate);
       }
 
       /**
@@ -551,6 +598,20 @@ public Bound<GenericRecord> withSchema(String schema) {
         return withSchema((new Schema.Parser()).parse(schema));
       }
 
+      /**
+       * Returns a new TextIO.Write PTransform that's like this one but
+       * that has GCS output path validation on pipeline creation disabled.
+       * Does not modify this object.
+       *
+       * <p> This can be useful in the case where the GCS output location does
+       * not exist at the pipeline creation time, but is expected to be
+       * available at execution time.
+       */
+      public Bound<T> withoutValidation() {
+        return new Bound<>(
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, false);
+      }
+
       @Override
       public PDone apply(PCollection<T> input) {
         if (filenamePrefix == null) {
@@ -605,6 +666,10 @@ public Schema getSchema() {
         return schema;
       }
 
+      public boolean needsValidation() {
+        return validate;
+      }
+
       static {
         DirectPipelineRunner.registerDefaultTransformEvaluator(
             Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
index e23afad8357b9..d4c2355d4e402 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
@@ -57,6 +57,7 @@ private <T> void translateReadHelper(
       context.addInput(PropertyNames.FORMAT, "avro");
       context.addInput(PropertyNames.FILEPATTERN, filepattern);
       context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
+      context.addInput(PropertyNames.VALIDATE_SOURCE, transform.needsValidation());
       // TODO: Orderedness?
     }
   }
@@ -102,6 +103,7 @@ private <T> void translateWriteHelper(
       context.addInput(PropertyNames.FILENAME_PREFIX, filenamePrefix);
       context.addInput(PropertyNames.SHARD_NAME_TEMPLATE, transform.getShardTemplate());
       context.addInput(PropertyNames.FILENAME_SUFFIX, transform.getFilenameSuffix());
+      context.addInput(PropertyNames.VALIDATE_SINK, transform.needsValidation());
 
       long numShards = transform.getNumShards();
       if (numShards > 0) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
index 1d45f4ab9386c..e211eb5941f83 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
@@ -18,7 +18,9 @@
 
 import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -361,6 +363,20 @@ public void testWriteFromSchemaString() throws Exception {
                  "HerWrite");
   }
 
+  @Test
+  public void testReadWithoutValidationFlag() throws Exception {
+    AvroIO.Read.Bound<GenericRecord> read = AvroIO.Read.from("gs://bucket/foo*/baz");
+    assertTrue(read.needsValidation());
+    assertFalse(read.withoutValidation().needsValidation());
+  }
+
+  @Test
+  public void testWriteWithoutValidationFlag() throws Exception {
+    AvroIO.Write.Bound<GenericRecord> write = AvroIO.Write.to("gs://bucket/foo/baz");
+    assertTrue(write.needsValidation());
+    assertFalse(write.withoutValidation().needsValidation());
+  }
+
   // TODO: for Write only, test withSuffix, withNumShards,
   // withShardNameTemplate and withoutSharding.
 }

From a3b6e1d8b9595325e64f991206ca2ef88c277b74 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 25 Feb 2015 09:26:35 -0800
Subject: [PATCH 0185/1541] Make the full set of PipelineOptions specified by
 the user during construction of their Dataflow available on the worker.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87162112
---
 sdk/pom.xml                                   |   2 +-
 .../options/DataflowPipelineDebugOptions.java |   4 +
 .../sdk/options/PipelineOptionsFactory.java   |  32 +++--
 .../runners/DataflowPipelineTranslator.java   |  10 ++
 .../worker/StreamingDataflowWorker.java       | 113 ++++++++----------
 .../options/PipelineOptionsFactoryTest.java   |   6 +-
 .../DataflowPipelineTranslatorTest.java       |  28 +++++
 .../DataflowWorkProgressUpdaterTest.java      |   2 +-
 8 files changed, 119 insertions(+), 78 deletions(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index b3537af67fd96..abe71557a5c96 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -189,7 +189,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-dataflow</artifactId>
-      <version>v1beta3-rev5-1.19.1</version>
+      <version>v1beta3-rev7-1.19.1</version>
       <exclusions>
         <!-- Exclude an old version of guava which is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
index ce536e69a93d9..0ec0bbf044375 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
@@ -22,6 +22,8 @@
 import com.google.cloud.dataflow.sdk.util.PathValidator;
 import com.google.cloud.dataflow.sdk.util.Stager;
 
+import com.fasterxml.jackson.annotation.JsonIgnore;
+
 import java.util.List;
 
 /**
@@ -82,6 +84,7 @@ public interface DataflowPipelineDebugOptions extends PipelineOptions {
   /**
    * The validator class used to validate path names.
    */
+  @JsonIgnore
   @Description("The validator class used to validate path names.")
   @Default.InstanceFactory(PathValidatorFactory.class)
   PathValidator getPathValidator();
@@ -98,6 +101,7 @@ public interface DataflowPipelineDebugOptions extends PipelineOptions {
   /**
    * The stager instance used to stage files.
    */
+  @JsonIgnore
   @Description("The class use to stage packages.")
   @Default.InstanceFactory(StagerFactory.class)
   Stager getStager();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 24f8e15069108..3885b61de09a0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -50,6 +50,7 @@
 import java.beans.IntrospectionException;
 import java.beans.Introspector;
 import java.beans.PropertyDescriptor;
+import java.io.IOException;
 import java.lang.reflect.Method;
 import java.lang.reflect.Modifier;
 import java.lang.reflect.Proxy;
@@ -495,9 +496,28 @@ static List<PropertyDescriptor> getPropertyDescriptors(
    *         {@link DataflowWorkerHarness}.
    */
   @Deprecated
-  public static DataflowWorkerHarnessOptions createFromSystemProperties() {
-    DataflowWorkerHarnessOptions options = as(DataflowWorkerHarnessOptions.class);
-    options.setRunner(null);
+  public static DataflowWorkerHarnessOptions createFromSystemProperties() throws IOException {
+    ObjectMapper objectMapper = new ObjectMapper();
+    DataflowWorkerHarnessOptions options;
+    if (System.getProperties().containsKey("sdk_pipeline_options")) {
+      String serializedOptions = System.getProperty("sdk_pipeline_options");
+      LOG.info("Worker harness starting with: " + serializedOptions);
+      options = objectMapper.readValue(serializedOptions, PipelineOptions.class)
+          .as(DataflowWorkerHarnessOptions.class);
+    } else {
+      options = PipelineOptionsFactory.as(DataflowWorkerHarnessOptions.class);
+    }
+
+    // These values will not be known at job submission time and must be provided.
+    if (System.getProperties().containsKey("worker_id")) {
+      options.setWorkerId(System.getProperty("worker_id"));
+    }
+    if (System.getProperties().containsKey("job_id")) {
+      options.setJobId(System.getProperty("job_id"));
+    }
+
+    // TODO: Remove setting these options once we have migrated to passing
+    // through the pipeline options.
     if (System.getProperties().containsKey("root_url")) {
       options.setApiRootUrl(System.getProperty("root_url"));
     }
@@ -513,15 +533,9 @@ public static DataflowWorkerHarnessOptions createFromSystemProperties() {
     if (System.getProperties().containsKey("service_account_keyfile")) {
       options.setServiceAccountKeyfile(System.getProperty("service_account_keyfile"));
     }
-    if (System.getProperties().containsKey("worker_id")) {
-      options.setWorkerId(System.getProperty("worker_id"));
-    }
     if (System.getProperties().containsKey("project_id")) {
       options.setProject(System.getProperty("project_id"));
     }
-    if (System.getProperties().containsKey("job_id")) {
-      options.setJobId(System.getProperty("job_id"));
-    }
     if (System.getProperties().containsKey("path_validator_class")) {
       try {
         options.setPathValidatorClass((Class) Class.forName(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index b072f64fd576b..1a3d27498c892 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -89,6 +89,7 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -108,6 +109,7 @@ public class DataflowPipelineTranslator {
   // Must be kept in sync with their internal counterparts.
   public static final String HARNESS_WORKER_POOL = "harness";
   private static final Logger LOG = LoggerFactory.getLogger(DataflowPipelineTranslator.class);
+  private static final ObjectMapper MAPPER = new ObjectMapper();
 
   /**
    * A map from PTransform class to the corresponding
@@ -340,6 +342,14 @@ public Job translate(List<DataflowPackage> packages) {
       Environment environment = new Environment();
       job.setEnvironment(environment);
 
+      try {
+        environment.setSdkPipelineOptions(
+            MAPPER.readValue(MAPPER.writeValueAsBytes(options), Map.class));
+      } catch (IOException e) {
+        throw new IllegalArgumentException(
+            "PipelineOptions specified failed to serialize to JSON.", e);
+      }
+
       WorkerPool workerPool = new WorkerPool();
 
       workerPool.setKind(HARNESS_WORKER_POOL);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 0f75fb9cbb42a..25eaa4aa1b100 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -112,8 +112,15 @@ public static void main(String[] args) throws Exception {
         (WindmillServerStub) Class.forName(WINDMILL_SERVER_CLASS_NAME)
         .getDeclaredConstructor(String.class).newInstance(hostport);
 
+    DataflowWorkerHarnessOptions options =
+        PipelineOptionsFactory.createFromSystemProperties();
+    // TODO: Remove setting these options once we have migrated to passing
+    // through the pipeline options.
+    options.setAppName("StreamingWorkerHarness");
+    options.setStreaming(true);
+
     StreamingDataflowWorker worker =
-        new StreamingDataflowWorker(mapTasks, windmillServer);
+        new StreamingDataflowWorker(mapTasks, windmillServer, options);
     worker.start();
 
     worker.runStatusServer(statusPort);
@@ -134,43 +141,50 @@ public static void main(String[] args) throws Exception {
   private Server statusServer;
   private AtomicReference<Throwable> lastException;
 
-  /** Regular constructor. */
   public StreamingDataflowWorker(
-      List<MapTask> mapTasks, WindmillServerStub server) {
-    options = PipelineOptionsFactory.createFromSystemProperties();
-    options.setAppName("StreamingWorkerHarness");
-    options.setStreaming(true);
-
-    initialize(mapTasks, server);
-
-    if (System.getProperties().containsKey("path_validator_class")) {
-      try {
-        options.setPathValidatorClass((Class) Class.forName(
-            System.getProperty("path_validator_class")));
-      } catch (ClassNotFoundException e) {
-        throw new RuntimeException("Unable to find validator class", e);
-      }
-    }
-    if (System.getProperties().containsKey("credential_factory_class")) {
-      try {
-        options.setCredentialFactoryClass((Class) Class.forName(
-            System.getProperty("credential_factory_class")));
-      } catch (ClassNotFoundException e) {
-        throw new RuntimeException("Unable to find credential factory class", e);
-      }
-    }
-  }
-
-  /** The constructor that takes PipelineOptions.  Should be used only by unit tests. */
-  StreamingDataflowWorker(
       List<MapTask> mapTasks, WindmillServerStub server, DataflowWorkerHarnessOptions options) {
     this.options = options;
-    initialize(mapTasks, server);
+    this.instructionMap = new ConcurrentHashMap<>();
+    this.outputMap = new ConcurrentHashMap<>();
+    this.mapTaskExecutors = new ConcurrentHashMap<>();
+    for (MapTask mapTask : mapTasks) {
+      addComputation(mapTask);
+    }
+    this.threadFactory = new ThreadFactory() {
+        private final Thread.UncaughtExceptionHandler handler =
+            new Thread.UncaughtExceptionHandler() {
+              @Override
+              public void uncaughtException(Thread thread, Throwable e) {
+                LOG.error("Uncaught exception: ", e);
+                System.exit(1);
+              }
+            };
+
+        @Override
+        public Thread newThread(Runnable r) {
+          Thread t = new Thread(r);
+          t.setUncaughtExceptionHandler(handler);
+          t.setDaemon(true);
+          return t;
+        }
+      };
+    this.executor = new BoundedQueueExecutor(
+        MAX_THREAD_POOL_SIZE, THREAD_EXPIRATION_TIME_SEC, TimeUnit.SECONDS,
+        MAX_THREAD_POOL_QUEUE_SIZE, threadFactory);
+    this.windmillServer = server;
+    this.running = new AtomicBoolean();
+    this.stateFetcher = new StateFetcher(server);
+    this.clientId = new Random().nextLong();
+    this.lastException = new AtomicReference<>();
+
+    DataflowWorkerLoggingFormatter.setJobId(options.getJobId());
+    DataflowWorkerLoggingFormatter.setWorkerId(options.getWorkerId());
   }
 
   public void start() {
     running.set(true);
     dispatchThread = threadFactory.newThread(new Runnable() {
+        @Override
         public void run() {
           dispatchLoop();
         }
@@ -180,6 +194,7 @@ public void run() {
     dispatchThread.start();
 
     commitThread = threadFactory.newThread(new Runnable() {
+        @Override
         public void run() {
           commitLoop();
         }
@@ -212,42 +227,6 @@ public void stop() {
     }
   }
 
-  /** Initializes the execution harness. */
-  private void initialize(List<MapTask> mapTasks, WindmillServerStub server) {
-    this.instructionMap = new ConcurrentHashMap<>();
-    this.outputMap = new ConcurrentHashMap<>();
-    this.mapTaskExecutors = new ConcurrentHashMap<>();
-    for (MapTask mapTask : mapTasks) {
-      addComputation(mapTask);
-    }
-    this.threadFactory = new ThreadFactory() {
-        private final Thread.UncaughtExceptionHandler handler =
-            new Thread.UncaughtExceptionHandler() {
-              public void uncaughtException(Thread thread, Throwable e) {
-                LOG.error("Uncaught exception: ", e);
-                System.exit(1);
-              }
-            };
-        public Thread newThread(Runnable r) {
-          Thread t = new Thread(r);
-          t.setUncaughtExceptionHandler(handler);
-          t.setDaemon(true);
-          return t;
-        }
-      };
-    this.executor = new BoundedQueueExecutor(
-        MAX_THREAD_POOL_SIZE, THREAD_EXPIRATION_TIME_SEC, TimeUnit.SECONDS,
-        MAX_THREAD_POOL_QUEUE_SIZE, threadFactory);
-    this.windmillServer = server;
-    this.running = new AtomicBoolean();
-    this.stateFetcher = new StateFetcher(server);
-    this.clientId = new Random().nextLong();
-    this.lastException = new AtomicReference<>();
-
-    DataflowWorkerLoggingFormatter.setJobId(options.getJobId());
-    DataflowWorkerLoggingFormatter.setWorkerId(options.getWorkerId());
-  }
-
   public void runStatusServer(int statusPort) {
     statusServer = new Server(statusPort);
     statusServer.setHandler(new StatusHandler());
@@ -320,6 +299,7 @@ private void dispatchLoop() {
             getConfig(computation);
           }
           executor.execute(new Runnable() {
+              @Override
               public void run() {
                 process(computation, work);
               }
@@ -402,6 +382,7 @@ private void process(
           // Try again, after some delay and at the end of the queue to avoid a tight loop.
           sleep(10000);
           executor.forceExecute(new Runnable() {
+              @Override
               public void run() {
                 process(computation, work);
               }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index 8ec5eb25e125b..3cf4e1258f628 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -59,7 +59,7 @@ public void testAutomaticRegistrationOfRunners() {
   }
 
   @Test
-  public void testCreationFromSystemProperties() {
+  public void testCreationFromSystemProperties() throws Exception {
     System.getProperties().putAll(ImmutableMap
         .<String, String>builder()
         .put("root_url", "test_root_url")
@@ -71,7 +71,10 @@ public void testCreationFromSystemProperties() {
         .put("worker_id", "test_worker_id")
         .put("project_id", "test_project_id")
         .put("job_id", "test_job_id")
+        // Set a non-default value for testing
+        .put("sdk_pipeline_options", "{\"options\":{\"numWorkers\":999}}")
         .build());
+
     DataflowWorkerHarnessOptions options = PipelineOptionsFactory.createFromSystemProperties();
     assertEquals("test_root_url", options.getApiRootUrl());
     assertEquals("test_service_path", options.getDataflowEndpoint());
@@ -82,6 +85,7 @@ public void testCreationFromSystemProperties() {
     assertEquals("test_worker_id", options.getWorkerId());
     assertEquals("test_project_id", options.getProject());
     assertEquals("test_job_id", options.getJobId());
+    assertEquals(999, options.getNumWorkers());
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 21a4a02a770a2..24509b8dd374c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -56,6 +56,8 @@
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterables;
 
 import org.junit.Assert;
@@ -133,6 +135,7 @@ private static Dataflow buildMockDataflow(
   private static DataflowPipelineOptions buildPipelineOptions() throws IOException {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
+    options.setJobName("some-job-name");
     options.setProject("some-project");
     options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString());
     options.setFilesToStage(new LinkedList<String>());
@@ -140,12 +143,37 @@ private static DataflowPipelineOptions buildPipelineOptions() throws IOException
     return options;
   }
 
+  @Test
+  public void testSettingOfSdkPipelineOptions() throws IOException {
+    DataflowPipelineOptions options = buildPipelineOptions();
+    options.setRunner(DataflowPipelineRunner.class);
+
+    Pipeline p = buildPipeline(options);
+    p.traverseTopologically(new RecordingPipelineVisitor());
+    Job job = DataflowPipelineTranslator.fromOptions(options).translate(
+        p, Collections.<DataflowPackage>emptyList());
+
+    assertEquals(ImmutableMap.of("options",
+        ImmutableMap.builder()
+          .put("appName", "DataflowPipelineTranslatorTest")
+          .put("project", "some-project")
+          .put("pathValidatorClass", "com.google.cloud.dataflow.sdk.util.DataflowPathValidator")
+          .put("runner", "com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner")
+          .put("jobName", "some-job-name")
+          .put("tempLocation", "gs://somebucket/some/path")
+          .put("filesToStage", ImmutableList.of())
+          .put("stagingLocation", "gs://somebucket/some/path/staging")
+          .build()),
+        job.getEnvironment().getSdkPipelineOptions());
+  }
+
   @Test
   public void testZoneConfig() throws IOException {
     final String testZone = "test-zone-1";
 
     DataflowPipelineOptions options = buildPipelineOptions();
     options.setZone(testZone);
+    options.setRunner(DataflowPipelineRunner.class);
 
     Pipeline p = buildPipeline(options);
     p.traverseTopologically(new RecordingPipelineVisitor());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
index 3146c0e72c379..4d11659508985 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -144,7 +144,7 @@ public void setWorkerProgress(ApproximateProgress progress) {
   public void initMocksAndWorkflowServiceAndWorkerAndWork() throws IOException {
     MockitoAnnotations.initMocks(this);
 
-    options = PipelineOptionsFactory.createFromSystemProperties();
+    options = PipelineOptionsFactory.as(DataflowWorkerHarnessOptions.class);
     options.setProject(PROJECT_ID);
     options.setJobId(JOB_ID);
     options.setWorkerId(WORKER_ID);

From 177b0d19f65b26ab0dd4e51e8eee817405de09e4 Mon Sep 17 00:00:00 2001
From: dhuo <dhuo@google.com>
Date: Wed, 25 Feb 2015 13:46:36 -0800
Subject: [PATCH 0186/1541] Add unittest for premature end-of-stream, add entry
 to CHANGES.txt.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87187120
---
 .../sdk/util/gcsio/GoogleCloudStorageReadChannel.java      | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
index 13c3dc427eaab..ab9c747b32228 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
@@ -262,7 +262,9 @@ public int read(ByteBuffer buffer)
           // actual size of the data stream when stream compression is used, so we can only ignore
           // this case here.
           Preconditions.checkState(isCompressedStream || currentPosition == size,
-              "Received end of stream result before all the file data has been received");
+              "Received end of stream result before all the file data has been received; "
+              + "totalBytesRead: %s, currentPosition: %s, size: %s",
+              totalBytesRead, currentPosition, size);
           break;
         }
         totalBytesRead += numBytesRead;
@@ -360,7 +362,8 @@ public int read(ByteBuffer buffer)
       // read against the stream size. Unfortunately we don't have information about the actual size
       // of the data stream when stream compression is used, so we can only ignore this case here.
       Preconditions.checkState(isCompressedStream || currentPosition == size,
-          "Failed to read any data before all the file data has been received");
+          "Failed to read any data before all the file data has been received; "
+          + "currentPosition: %s, size: %s", currentPosition, size);
       return -1;
     } else {
       return totalBytesRead;

From 734d441825cdb9fe9e96f2691e43d41e0ece5a9a Mon Sep 17 00:00:00 2001
From: vanya <vanya@google.com>
Date: Wed, 25 Feb 2015 13:58:32 -0800
Subject: [PATCH 0187/1541] Improve error message on exception in
 StandardCoder.getEncodedElementByteSize().

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87188532
---
 .../com/google/cloud/dataflow/sdk/coders/StandardCoder.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
index 2df352757a142..790e96fcf6d53 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
@@ -126,7 +126,7 @@ protected long getEncodedElementByteSize(T value, Context context)
       return os.size();
     } catch (Exception exn) {
       throw new IllegalArgumentException(
-          "Unable to encode element " + value + " with coder " + this, exn);
+          "Unable to encode element '" + value + "' with coder '" + this + "'.", exn);
     }
   }
 

From ac6bbdd034902855d04df5d857c8f0e7ebc73e5a Mon Sep 17 00:00:00 2001
From: vanya <vanya@google.com>
Date: Wed, 25 Feb 2015 15:07:29 -0800
Subject: [PATCH 0188/1541] Slightly improve invalid bucket name error message.

This gets the specific problem description to the beginning, and more information is given after that. Also gets rid of an inconsistent double space at the beginning of one of the sentences.
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87195785
---
 .../cloud/dataflow/sdk/util/gcsfs/GcsPath.java    | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
index f1da8b767ef2e..9b0c75c09c3d7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
@@ -203,14 +203,13 @@ public GcsPath(@Nullable FileSystem fs,
     }
     Preconditions.checkArgument(!bucket.contains("/"),
         "GCS bucket may not contain a slash");
-    Preconditions
-        .checkArgument(bucket.isEmpty()
-                || bucket.matches("[a-z0-9][-_a-z0-9.]+[a-z0-9]"),
-            "GCS bucket names must contain only lowercase letters, numbers, "
-                + "dashes (-), underscores (_), and dots (.). Bucket names "
-                + "must start and end with a number or letter. "
-                + "See https://developers.google.com/storage/docs/bucketnaming "
-                + "for more details.  Bucket name: " + bucket);
+    Preconditions.checkArgument(
+        bucket.isEmpty() || bucket.matches("[a-z0-9][-_a-z0-9.]+[a-z0-9]"), ""
+            + "Invalid bucket name: '" + bucket + "'. GCS bucket names must contain only "
+            + "lowercase letters, numbers, dashes (-), underscores (_), and dots (.). "
+            + "Bucket names must start and end with a number or letter. "
+            + "See https://developers.google.com/storage/docs/bucketnaming "
+            + "for more details.");
 
     if (object == null) {
       object = "";

From 1e0afad75dd3f762460493da6a9be6d282763207 Mon Sep 17 00:00:00 2001
From: vanya <vanya@google.com>
Date: Wed, 25 Feb 2015 15:51:43 -0800
Subject: [PATCH 0189/1541] Improve error message on CoderException.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87200225
---
 .../dataflow/sdk/runners/DataflowPipelineTranslator.java      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 1a3d27498c892..622de9a4cbd7d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -805,8 +805,8 @@ private <T> void createHelper(
                 // TODO: Put in better element printing:
                 // truncate if too long.
                 throw new IllegalArgumentException(
-                    "unable to encode element " + elem + " of " + transform
-                    + " using " + coder,
+                    "Unable to encode element '" + elem + "' of transform '" + transform
+                    + "' using coder '" + coder + "'.",
                     exn);
               }
               String encodedJson = byteArrayToJsonString(encodedBytes);

From 48dfb0cecd2a1a2e1fed25d7c047225c4a2127e2 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Wed, 25 Feb 2015 15:54:38 -0800
Subject: [PATCH 0190/1541] Fix the types in ExtractOutputDoFn java doc. []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=87200523

---
 .../cloud/dataflow/sdk/runners/worker/CombineValuesFn.java      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
index 31e87649a5d27..b32588081828c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
@@ -203,7 +203,7 @@ public void processElement(ProcessContext c) {
   }
 
   /*
-   * EXTRACT phase: KV<K, Iterable<VA>> -> KV<K, VA>.
+   * EXTRACT phase: KV<K, VA> -> KV<K, VO>.
    */
   private static class ExtractOutputDoFn<K, VA, VO>
       extends DoFn<KV<K, VA>, KV<K, VO>>{

From b76455fcd29842b6c562de7587cc39d1e87cae13 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Wed, 25 Feb 2015 17:06:02 -0800
Subject: [PATCH 0191/1541] Mark PartialGroupByKeyOperation can restart.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87206956
---
 .../sdk/util/common/worker/PartialGroupByKeyOperation.java  | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
index a4afa5b2820d2..97072a5a18879 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
@@ -194,6 +194,12 @@ public void finish() throws Exception {
     }
   }
 
+  @Override
+  public boolean supportsRestart() {
+    // SizeEstimators are safe to be reused.
+    return true;
+  }
+
   /**
    * Sets the maximum amount of memory the grouping table is allowed to
    * consume before it has to be flushed.

From 19f9f05cd5c9d1dbb215eddd8dbff5d394f94de8 Mon Sep 17 00:00:00 2001
From: earhart <earhart@google.com>
Date: Wed, 25 Feb 2015 21:16:26 -0800
Subject: [PATCH 0192/1541] Switch the Dataflow SDK to use the new Dataflow
 service endpoint. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=87220492

---
 .../dataflow/sdk/options/DataflowPipelineDebugOptions.java    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
index 0ec0bbf044375..aba60e1ef5e7e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
@@ -41,7 +41,7 @@ public interface DataflowPipelineDebugOptions extends PipelineOptions {
    * url.
    */
   @Description("Cloud Dataflow Endpoint")
-  @Default.String("dataflow/v1b3/projects/")
+  @Default.String("v1b3/projects/")
   String getDataflowEndpoint();
   void setDataflowEndpoint(String value);
 
@@ -61,7 +61,7 @@ public interface DataflowPipelineDebugOptions extends PipelineOptions {
    * The API endpoint to use when communicating with the Dataflow service.
    */
   @Description("Google Cloud root API")
-  @Default.String("https://www.googleapis.com/")
+  @Default.String("https://dataflow.googleapis.com/")
   String getApiRootUrl();
   void setApiRootUrl(String value);
 

From 92d06ebc0c61bfd3aa9fd6f487d848abe7e9ed79 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Thu, 26 Feb 2015 11:39:30 -0800
Subject: [PATCH 0193/1541] Rollback the change to slightly improve invalid
 bucket name error message.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87270734
---
 .../cloud/dataflow/sdk/util/gcsfs/GcsPath.java    | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
index 9b0c75c09c3d7..f1da8b767ef2e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
@@ -203,13 +203,14 @@ public GcsPath(@Nullable FileSystem fs,
     }
     Preconditions.checkArgument(!bucket.contains("/"),
         "GCS bucket may not contain a slash");
-    Preconditions.checkArgument(
-        bucket.isEmpty() || bucket.matches("[a-z0-9][-_a-z0-9.]+[a-z0-9]"), ""
-            + "Invalid bucket name: '" + bucket + "'. GCS bucket names must contain only "
-            + "lowercase letters, numbers, dashes (-), underscores (_), and dots (.). "
-            + "Bucket names must start and end with a number or letter. "
-            + "See https://developers.google.com/storage/docs/bucketnaming "
-            + "for more details.");
+    Preconditions
+        .checkArgument(bucket.isEmpty()
+                || bucket.matches("[a-z0-9][-_a-z0-9.]+[a-z0-9]"),
+            "GCS bucket names must contain only lowercase letters, numbers, "
+                + "dashes (-), underscores (_), and dots (.). Bucket names "
+                + "must start and end with a number or letter. "
+                + "See https://developers.google.com/storage/docs/bucketnaming "
+                + "for more details.  Bucket name: " + bucket);
 
     if (object == null) {
       object = "";

From 0a97da58291c946be8a0beb60aafa71ccc68d89d Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 26 Feb 2015 11:48:37 -0800
Subject: [PATCH 0194/1541] Pick a sensible default period for SlidingWindows
 based on the given size of the windows

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87271369
---
 .../transforms/windowing/SlidingWindows.java  | 18 +++++++++++++--
 .../windowing/SlidingWindowsTest.java         | 22 +++++++++++++++++++
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
index d29062e31ef2e..2ab1737ac9975 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
@@ -61,10 +61,11 @@ public class SlidingWindows extends NonMergingWindowFn<Object, IntervalWindow> {
    * [N * period, N * period + size), where 0 is the epoch.
    *
    * <p> If {@link SlidingWindows#every} is not called, the period defaults
-   * to one millisecond.
+   * to the largest time unit smaller than the given duration.  For example,
+   * specifying a size of 5 seconds will result in a default period of 1 second.
    */
   public static SlidingWindows of(Duration size) {
-    return new SlidingWindows(new Duration(1), size, Duration.ZERO);
+    return new SlidingWindows(getDefaultPeriod(size), size, Duration.ZERO);
   }
 
   /**
@@ -129,4 +130,17 @@ public boolean isCompatible(WindowFn<?, ?> other) {
       return false;
     }
   }
+
+  static Duration getDefaultPeriod(Duration size) {
+    if (size.isLongerThan(Duration.standardHours(1))) {
+      return Duration.standardHours(1);
+    }
+    if (size.isLongerThan(Duration.standardMinutes(1))) {
+      return Duration.standardMinutes(1);
+    }
+    if (size.isLongerThan(Duration.standardSeconds(1))) {
+      return Duration.standardSeconds(1);
+    }
+    return Duration.millis(1);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java
index 8af9782fc382c..fbad9e790e0ea 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java
@@ -110,6 +110,28 @@ public void testTimeUnit() throws Exception {
             Arrays.asList(1L, 2L, 1000L, 5000L, 5001L, 10000L)));
   }
 
+  @Test
+  public void testDefaultPeriods() throws Exception {
+    assertEquals(Duration.standardHours(1),
+        SlidingWindows.getDefaultPeriod(Duration.standardDays(1)));
+    assertEquals(Duration.standardHours(1),
+        SlidingWindows.getDefaultPeriod(Duration.standardHours(2)));
+    assertEquals(Duration.standardMinutes(1),
+        SlidingWindows.getDefaultPeriod(Duration.standardHours(1)));
+    assertEquals(Duration.standardMinutes(1),
+        SlidingWindows.getDefaultPeriod(Duration.standardMinutes(10)));
+    assertEquals(Duration.standardSeconds(1),
+        SlidingWindows.getDefaultPeriod(Duration.standardMinutes(1)));
+    assertEquals(Duration.standardSeconds(1),
+        SlidingWindows.getDefaultPeriod(Duration.standardSeconds(10)));
+    assertEquals(Duration.millis(1),
+        SlidingWindows.getDefaultPeriod(Duration.standardSeconds(1)));
+    assertEquals(Duration.millis(1),
+        SlidingWindows.getDefaultPeriod(Duration.millis(10)));
+    assertEquals(Duration.millis(1),
+        SlidingWindows.getDefaultPeriod(Duration.millis(1)));
+  }
+
   @Test
   public void testEquality() {
     assertTrue(

From cd845a9594eb6dc87c697c948561bd19dfd28e9e Mon Sep 17 00:00:00 2001
From: relax <relax@google.com>
Date: Fri, 27 Feb 2015 01:12:45 -0800
Subject: [PATCH 0195/1541] Reduce the number of calls to BigQuery table
 creation to reduce quota issues. Previously every thread tried to create the
 table at startup. Synchronizing this call prevents this.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87323976
---
 .../cloud/dataflow/sdk/io/BigQueryIO.java       | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 63e29aaf07162..afee01f2fe040 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -682,11 +682,18 @@ public void startBundle(Context context) {
             JSON_FACTORY.fromString(jsonTableReference, TableReference.class);
 
         if (!createdTables.contains(jsonTableSchema)) {
-          TableSchema tableSchema = JSON_FACTORY.fromString(jsonTableSchema, TableSchema.class);
-          Bigquery client = Transport.newBigQueryClient(options).build();
-          BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
-          inserter.tryCreateTable(tableSchema);
-          createdTables.add(jsonTableSchema);
+          synchronized (createdTables) {
+            // Another thread may have succeeded in creating the table in the meanwhile, so
+            // check again. This check isn't needed for correctness, but we add it to prevent
+            // every thread from attempting a create and overwhelming our BigQuery quota.
+            if (!createdTables.contains(jsonTableSchema)) {
+              TableSchema tableSchema = JSON_FACTORY.fromString(jsonTableSchema, TableSchema.class);
+              Bigquery client = Transport.newBigQueryClient(options).build();
+              BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
+              inserter.tryCreateTable(tableSchema);
+              createdTables.add(jsonTableSchema);
+            }
+          }
         }
       } catch (IOException e) {
         throw new RuntimeException(e);

From 0aa4d9d539a23d765a6a0d4f7fa9d82626d960ae Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 27 Feb 2015 10:45:54 -0800
Subject: [PATCH 0196/1541] Add support for setting the default log level and
 also custom log level overrides on the Dataflow worker.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87355253
---
 .../sdk/options/DataflowPipelineOptions.java  |   2 +-
 .../options/DataflowWorkerLoggingOptions.java | 154 ++++++++++++++++++
 .../runners/worker/DataflowWorkerHarness.java |   4 +-
 .../worker/StreamingDataflowWorker.java       |  16 +-
 .../DataflowWorkerLoggingInitializer.java     | 103 +++++++++---
 .../DataflowWorkerLoggingOptionsTest.java     |  70 ++++++++
 .../DataflowWorkerLoggingInitializerTest.java | 106 ++++++------
 7 files changed, 371 insertions(+), 84 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptionsTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
index b71949ec9947b..6028326788806 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
@@ -34,7 +34,7 @@
 public interface DataflowPipelineOptions extends
     PipelineOptions, GcpOptions, ApplicationNameOptions, DataflowPipelineDebugOptions,
     DataflowPipelineWorkerPoolOptions, BigQueryOptions,
-    GcsOptions, StreamingOptions, CloudDebuggerOptions {
+    GcsOptions, StreamingOptions, CloudDebuggerOptions, DataflowWorkerLoggingOptions {
 
   /**
    * GCS path for temporary files.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
new file mode 100644
index 0000000000000..e14b97e718d24
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.options;
+
+import com.google.common.base.Preconditions;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonValue;
+
+import java.util.Arrays;
+
+/**
+ * Options which are used to control logging configuration on the Dataflow worker.
+ */
+public interface DataflowWorkerLoggingOptions extends PipelineOptions {
+  /**
+   * The set of log levels which can be used on the Dataflow worker.
+   */
+  public enum Level {
+    DEBUG, ERROR, INFO, TRACE, WARN
+  }
+
+  /**
+   * This option controls the default log level of all loggers without a
+   * log level override.
+   */
+  @Default.Enum("INFO")
+  Level getDefaultWorkerLogLevel();
+  void setDefaultWorkerLogLevel(Level level);
+
+  /**
+   * This option controls the log levels for specifically named loggers.
+   * <p>
+   * Later options with equivalent names override earlier options.
+   * <p>
+   * See {@link WorkerLogLevelOverride} for more information on how to configure logging
+   * on a per {@link Class}, {@link Package}, or name basis.
+   */
+  WorkerLogLevelOverride[] getWorkerLogLevelOverrides();
+  void setWorkerLogLevelOverrides(WorkerLogLevelOverride[] string);
+
+  /**
+   * Defines a log level override for a specific class, package, or name.
+   * <p>
+   * {@link java.util.logging} is used on the Dataflow worker harness and supports
+   * a logging hierarchy based off of names which are "." separated. It is a common
+   * pattern to have the logger for a given class share the same name as the class itself.
+   * Given the classes {@code a.b.c.Foo}, {@code a.b.c.Xyz}, and {@code a.b.Bar}, with
+   * loggers named {@code "a.b.c.Foo"}, {@code "a.b.c.Xyz"}, and {@code "a.b.Bar"} respectively,
+   * we can override the log levels:
+   * <ul>
+   *    <li>for {@code Foo} by specifying the name {@code "a.b.c.Foo"} or the {@link Class}
+   *    representing {@code a.b.c.Foo}.
+   *    <li>for {@code Foo}, {@code Xyz}, and {@code Bar} by specifying the name {@code "a.b"} or
+   *    the {@link Package} representing {@code a.b}.
+   *    <li>for {@code Foo} and {@code Bar} by specifying both of their names or classes.
+   * </ul>
+   * Note that by specifying multiple overrides, the exact name followed by the closest parent
+   * takes precedence.
+   */
+  public static class WorkerLogLevelOverride {
+    private static final String SEPARATOR = "#";
+
+    /**
+     * Overrides the default log level for the passed in class.
+     * <p>
+     * This is equivalent to calling {@link #forName(String, Level)} and
+     * passing in the {@link Class#getName() class name}.
+     */
+    public static WorkerLogLevelOverride forClass(Class<?> klass, Level level) {
+      Preconditions.checkNotNull(klass, "Expected class to be not null.");
+      return forName(klass.getName(), level);
+    }
+
+    /**
+     * Overrides the default log level for the passed in package.
+     * <p>
+     * This is equivalent to calling {@link #forName(String, Level)} and
+     * passing in the {@link Package#getName() package name}.
+     */
+    public static WorkerLogLevelOverride forPackage(Package pkg, Level level) {
+      Preconditions.checkNotNull(pkg, "Expected package to be not null.");
+      return forName(pkg.getName(), level);
+    }
+
+    /**
+     * Overrides the default log level for the passed in name.
+     * <p>
+     * Note that because of the hierarchical nature of logger names, this will
+     * override the log level of all loggers which have the passed in name or
+     * a parent logger which has the passed in name.
+     */
+    public static WorkerLogLevelOverride forName(String name, Level level) {
+      Preconditions.checkNotNull(name, "Expected name to be not null.");
+      Preconditions.checkNotNull(level,
+          "Expected level to be one of %s.", Arrays.toString(Level.values()));
+      return new WorkerLogLevelOverride(name, level);
+    }
+
+    /**
+     * Expects a value of the form {@code Name#Level}.
+     */
+    @JsonCreator
+    public static WorkerLogLevelOverride create(String value) {
+      Preconditions.checkNotNull(value, "Expected value to be not null.");
+      Preconditions.checkArgument(value.contains(SEPARATOR),
+          "Expected '#' separator but none found within '%s'.", value);
+      String[] parts = value.split(SEPARATOR, 2);
+      Level level;
+      try {
+        level = Level.valueOf(parts[1]);
+      } catch (IllegalArgumentException e) {
+        throw new IllegalArgumentException(String.format(
+            "Unsupported log level '%s' requested. Must be one of %s.",
+            parts[1], Arrays.toString(Level.values())));
+      }
+      return forName(parts[0], level);
+    }
+
+    private final String name;
+    private final Level level;
+    private WorkerLogLevelOverride(String name, Level level) {
+      this.name = name;
+      this.level = level;
+    }
+
+    public String getName() {
+      return name;
+    }
+
+    public Level getLevel() {
+      return level;
+    }
+
+    @JsonValue
+    @Override
+    public String toString() {
+      return name + SEPARATOR + level;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index 3ac2895afcba4..36f603f7d4127 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -93,10 +93,12 @@ public void uncaughtException(Thread t, Throwable e) {
    */
   public static void main(String[] args) throws Exception {
     Thread.currentThread().setUncaughtExceptionHandler(WorkerUncaughtExceptionHandler.INSTANCE);
-    new DataflowWorkerLoggingInitializer().initialize();
+    DataflowWorkerLoggingInitializer.initialize();
 
     DataflowWorkerHarnessOptions pipelineOptions =
         PipelineOptionsFactory.createFromSystemProperties();
+    DataflowWorkerLoggingInitializer.configure(pipelineOptions);
+
     final DataflowWorker worker = create(pipelineOptions);
     processWork(pipelineOptions, worker);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 25eaa4aa1b100..f92edbb0cffa5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -91,8 +91,15 @@ static MapTask parseMapTask(String input) throws IOException {
   }
 
   public static void main(String[] args) throws Exception {
-    new DataflowWorkerLoggingInitializer().initialize();
+    DataflowWorkerLoggingInitializer.initialize();
+    DataflowWorkerHarnessOptions options =
+        PipelineOptionsFactory.createFromSystemProperties();
+    // TODO: Remove setting these options once we have migrated to passing
+    // through the pipeline options.
+    options.setAppName("StreamingWorkerHarness");
+    options.setStreaming(true);
 
+    DataflowWorkerLoggingInitializer.configure(options);
     String hostport = System.getProperty("windmill.hostport");
     if (hostport == null) {
       throw new Exception("-Dwindmill.hostport must be set to the location of the windmill server");
@@ -112,13 +119,6 @@ public static void main(String[] args) throws Exception {
         (WindmillServerStub) Class.forName(WINDMILL_SERVER_CLASS_NAME)
         .getDeclaredConstructor(String.class).newInstance(hostport);
 
-    DataflowWorkerHarnessOptions options =
-        PipelineOptionsFactory.createFromSystemProperties();
-    // TODO: Remove setting these options once we have migrated to passing
-    // through the pipeline options.
-    options.setAppName("StreamingWorkerHarness");
-    options.setStreaming(true);
-
     StreamingDataflowWorker worker =
         new StreamingDataflowWorker(mapTasks, windmillServer, options);
     worker.start();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
index a513dc75ee359..dbe8c6e1d468d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
@@ -16,10 +16,20 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker.logging;
 
+import static com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.Level.DEBUG;
+import static com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.Level.ERROR;
+import static com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.Level.INFO;
+import static com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.Level.TRACE;
+import static com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.Level.WARN;
+
+import com.google.api.client.util.Lists;
+import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.WorkerLogLevelOverride;
 import com.google.common.collect.ImmutableBiMap;
 
 import java.io.File;
 import java.io.IOException;
+import java.util.List;
 import java.util.logging.FileHandler;
 import java.util.logging.Formatter;
 import java.util.logging.Handler;
@@ -28,47 +38,60 @@
 import java.util.logging.Logger;
 
 /**
- * Sets up java.util.Logging configuration on the Dataflow Worker Harness with a
- * console and file logger. The console and file loggers use the
- * {@link DataflowWorkerLoggingFormatter} format. A user can override
- * the logging level and location by specifying the Java system properties
- * "dataflow.worker.logging.level" and "dataflow.worker.logging.location" respectively.
- * The default log level is INFO and the default location is a file named dataflow-worker.log
- * within the systems temporary directory.
+ * Sets up {@link java.util.logging} configuration on the Dataflow worker with a
+ * file logger. The file logger uses the {@link DataflowWorkerLoggingFormatter} format.
+ * A user can override the logging level by customizing the options found within
+ * {@link DataflowWorkerLoggingOptions}. A user can override the location by specifying the
+ * Java system property "dataflow.worker.logging.location". The default log level is INFO
+ * and the default location is a file named dataflow-worker.log within the systems temporary
+ * directory.
  */
 public class DataflowWorkerLoggingInitializer {
   private static final String DEFAULT_LOGGING_LOCATION =
       new File(System.getProperty("java.io.tmpdir"), "dataflow-worker.log").getPath();
   private static final String ROOT_LOGGER_NAME = "";
-  public static final String DATAFLOW_WORKER_LOGGING_LEVEL = "dataflow.worker.logging.level";
-  public static final String DATAFLOW_WORKER_LOGGING_LOCATION = "dataflow.worker.logging.location";
-  public static final ImmutableBiMap<Level, String> LEVELS =
-      ImmutableBiMap.<Level, String>builder()
-      .put(Level.SEVERE, "ERROR")
-      .put(Level.WARNING, "WARNING")
-      .put(Level.INFO, "INFO")
-      .put(Level.FINE, "DEBUG")
-      .put(Level.FINEST, "TRACE")
+  private static final String DATAFLOW_WORKER_LOGGING_LOCATION = "dataflow.worker.logging.location";
+  static final ImmutableBiMap<Level, DataflowWorkerLoggingOptions.Level> LEVELS =
+      ImmutableBiMap.<Level, DataflowWorkerLoggingOptions.Level>builder()
+      .put(Level.SEVERE, ERROR)
+      .put(Level.WARNING, WARN)
+      .put(Level.INFO, INFO)
+      .put(Level.FINE, DEBUG)
+      .put(Level.FINEST, TRACE)
       .build();
-  private static final String DEFAULT_LOG_LEVEL = LEVELS.get(Level.INFO);
 
-  public void initialize() {
-    initialize(LogManager.getLogManager());
-  }
+  /**
+   * This default log level is overridden by the log level found at
+   * {@code DataflowWorkerLoggingOptions#getDefaultWorkerLogLevel()}.
+   */
+  private static final DataflowWorkerLoggingOptions.Level DEFAULT_LOG_LEVEL =
+      LEVELS.get(Level.INFO);
+
+  /* We need to store a reference to the configured loggers so that they are not
+   * garbage collected. java.util.logging only has weak references to the loggers
+   * so if they are garbage collection, our hierarchical configuration will be lost. */
+  private static List<Logger> configuredLoggers = Lists.newArrayList();
+  private static FileHandler fileHandler;
 
-  void initialize(LogManager logManager) {
+  /**
+   * Sets up the initial logging configuration.
+   */
+  public static synchronized void initialize() {
+    if (fileHandler != null) {
+      return;
+    }
     try {
-      Level logLevel = LEVELS.inverse().get(
-              System.getProperty(DATAFLOW_WORKER_LOGGING_LEVEL, DEFAULT_LOG_LEVEL));
+      Level logLevel = LEVELS.inverse().get(DEFAULT_LOG_LEVEL);
       Formatter formatter = new DataflowWorkerLoggingFormatter();
 
-      FileHandler fileHandler = new FileHandler(
+      fileHandler = new FileHandler(
           System.getProperty(DATAFLOW_WORKER_LOGGING_LOCATION, DEFAULT_LOGGING_LOCATION),
           true /* Append so that we don't squash existing logs */);
       fileHandler.setFormatter(formatter);
-      fileHandler.setLevel(logLevel);
+      fileHandler.setLevel(Level.ALL);
 
       // Reset the global log manager, get the root logger and remove the default log handlers.
+      LogManager logManager = LogManager.getLogManager();
       logManager.reset();
       Logger rootLogger = logManager.getLogger(ROOT_LOGGER_NAME);
       for (Handler handler : rootLogger.getHandlers()) {
@@ -81,4 +104,34 @@ void initialize(LogManager logManager) {
       throw new ExceptionInInitializerError(e);
     }
   }
+
+  /**
+   * Reconfigures logging with the passed in options.
+   */
+  public static synchronized void configure(DataflowWorkerLoggingOptions options) {
+    initialize();
+    if (options.getDefaultWorkerLogLevel() != null) {
+      LogManager.getLogManager().getLogger(ROOT_LOGGER_NAME).setLevel(
+          LEVELS.inverse().get(options.getDefaultWorkerLogLevel()));
+    }
+    /* We store a reference to all the custom loggers the user configured.
+     * To make sure that these custom levels override the default logger level,
+     * we break the parent chain and have the logger directly pass log records
+     * to the file handler. */
+    if (options.getWorkerLogLevelOverrides() != null) {
+      for (WorkerLogLevelOverride loggerOverride : options.getWorkerLogLevelOverrides()) {
+        Logger logger = Logger.getLogger(loggerOverride.getName());
+        logger.setUseParentHandlers(false);
+        logger.setLevel(LEVELS.inverse().get(loggerOverride.getLevel()));
+        logger.addHandler(fileHandler);
+        configuredLoggers.add(logger);
+      }
+    }
+  }
+
+  // Visible for testing
+  static void reset() {
+    configuredLoggers = Lists.newArrayList();
+    fileHandler = null;
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptionsTest.java
new file mode 100644
index 0000000000000..fffef0e888f28
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptionsTest.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.options;
+
+import static com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.Level.WARN;
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.WorkerLogLevelOverride;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link DataflowWorkerLoggingOptions}. */
+@RunWith(JUnit4.class)
+public class DataflowWorkerLoggingOptionsTest {
+  private static final ObjectMapper MAPPER = new ObjectMapper();
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+
+  @Test
+  public void testWorkerLogLevelOverrideWithInvalidLogLevel() {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Unsupported log level");
+    WorkerLogLevelOverride.create("Name#FakeLevel");
+  }
+
+  @Test
+  public void testWorkerLogLevelOverrideForClass() {
+    assertEquals("org.junit.Test#WARN",
+        MAPPER.convertValue(WorkerLogLevelOverride.forClass(Test.class, WARN), String.class));
+  }
+
+  @Test
+  public void testWorkerLogLevelOverrideForPackage() {
+    assertEquals("org.junit#WARN",
+        MAPPER.convertValue(
+            WorkerLogLevelOverride.forPackage(Test.class.getPackage(), WARN), String.class));
+  }
+
+  @Test
+  public void testWorkerLogLevelOverrideForName() {
+    assertEquals("A#WARN",
+        MAPPER.convertValue(WorkerLogLevelOverride.forName("A", WARN), String.class));
+  }
+
+  @Test
+  public void testSerializationAndDeserializationOf() {
+    String testValue = "A#WARN";
+    assertEquals(testValue,
+        MAPPER.convertValue(
+            MAPPER.convertValue(testValue, WorkerLogLevelOverride.class), String.class));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
index 50cc1e2d3ed18..8cc25a9e0e7df 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
@@ -16,24 +16,19 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker.logging;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.verifyNoMoreInteractions;
-import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.testing.RestoreSystemProperties;
+import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.WorkerLogLevelOverride;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 
-import org.junit.Before;
-import org.junit.Rule;
+import org.junit.After;
 import org.junit.Test;
-import org.junit.rules.TestRule;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
-import org.mockito.ArgumentCaptor;
-import org.mockito.Mock;
-import org.mockito.MockitoAnnotations;
 
-import java.util.List;
 import java.util.logging.FileHandler;
 import java.util.logging.Handler;
 import java.util.logging.Level;
@@ -43,52 +38,65 @@
 /** Unit tests for {@link DataflowWorkerLoggingInitializer}. */
 @RunWith(JUnit4.class)
 public class DataflowWorkerLoggingInitializerTest {
-  @Rule public TestRule restoreSystemProperties = new RestoreSystemProperties();
+  @After
+  public void tearDown() {
+    LogManager.getLogManager().reset();
+    DataflowWorkerLoggingInitializer.reset();
+  }
 
-  @Mock LogManager mockLogManager;
-  @Mock Logger mockRootLogger;
-  @Mock Handler mockHandler;
+  @Test
+  public void testWithDefaults() {
+    DataflowWorkerLoggingOptions options =
+        PipelineOptionsFactory.as(DataflowWorkerLoggingOptions.class);
 
-  @Before
-  public void setUp() {
-    MockitoAnnotations.initMocks(this);
-    when(mockLogManager.getLogger("")).thenReturn(mockRootLogger);
-    when(mockRootLogger.getHandlers()).thenReturn(new Handler[]{ mockHandler });
+    DataflowWorkerLoggingInitializer.initialize();
+    DataflowWorkerLoggingInitializer.configure(options);
+
+    Logger rootLogger = LogManager.getLogManager().getLogger("");
+    assertEquals(1, rootLogger.getHandlers().length);
+    assertEquals(Level.INFO, rootLogger.getLevel());
+    assertTrue(isFileHandler(rootLogger.getHandlers()[0], Level.ALL));
   }
 
   @Test
-  public void testWithDefaults() {
-    ArgumentCaptor<Handler> argument = ArgumentCaptor.forClass(Handler.class);
-
-    new DataflowWorkerLoggingInitializer().initialize(mockLogManager);
-    verify(mockLogManager).getLogger("");
-    verify(mockLogManager).reset();
-    verify(mockRootLogger).getHandlers();
-    verify(mockRootLogger).removeHandler(mockHandler);
-    verify(mockRootLogger).setLevel(Level.INFO);
-    verify(mockRootLogger).addHandler(argument.capture());
-    verifyNoMoreInteractions(mockLogManager, mockRootLogger);
-
-    List<Handler> handlers = argument.getAllValues();
-    assertTrue(isFileHandler(handlers.get(0), Level.INFO));
+  public void testWithConfigurationOverride() {
+    DataflowWorkerLoggingOptions options =
+        PipelineOptionsFactory.as(DataflowWorkerLoggingOptions.class);
+    options.setDefaultWorkerLogLevel(DataflowWorkerLoggingOptions.Level.WARN);
+
+    DataflowWorkerLoggingInitializer.initialize();
+    DataflowWorkerLoggingInitializer.configure(options);
+
+    Logger rootLogger = LogManager.getLogManager().getLogger("");
+    assertEquals(1, rootLogger.getHandlers().length);
+    assertEquals(Level.WARNING, rootLogger.getLevel());
+    assertTrue(isFileHandler(rootLogger.getHandlers()[0], Level.ALL));
   }
 
   @Test
-  public void testWithOverrides() {
-    ArgumentCaptor<Handler> argument = ArgumentCaptor.forClass(Handler.class);
-    System.setProperty("dataflow.worker.logging.level", "WARNING");
-
-    new DataflowWorkerLoggingInitializer().initialize(mockLogManager);
-    verify(mockLogManager).getLogger("");
-    verify(mockLogManager).reset();
-    verify(mockRootLogger).getHandlers();
-    verify(mockRootLogger).removeHandler(mockHandler);
-    verify(mockRootLogger).setLevel(Level.WARNING);
-    verify(mockRootLogger).addHandler(argument.capture());
-    verifyNoMoreInteractions(mockLogManager, mockRootLogger);
-
-    List<Handler> handlers = argument.getAllValues();
-    assertTrue(isFileHandler(handlers.get(0), Level.WARNING));
+  public void testWithCustomLogLevels() {
+    DataflowWorkerLoggingOptions options =
+        PipelineOptionsFactory.as(DataflowWorkerLoggingOptions.class);
+    options.setWorkerLogLevelOverrides(
+        new WorkerLogLevelOverride[] {
+            WorkerLogLevelOverride.forName("A", DataflowWorkerLoggingOptions.Level.DEBUG),
+            WorkerLogLevelOverride.forName("B", DataflowWorkerLoggingOptions.Level.ERROR),
+        });
+
+    DataflowWorkerLoggingInitializer.initialize();
+    DataflowWorkerLoggingInitializer.configure(options);
+
+    Logger aLogger = LogManager.getLogManager().getLogger("A");
+    assertEquals(1, aLogger.getHandlers().length);
+    assertEquals(Level.FINE, aLogger.getLevel());
+    assertFalse(aLogger.getUseParentHandlers());
+    assertTrue(isFileHandler(aLogger.getHandlers()[0], Level.ALL));
+
+    Logger bLogger = LogManager.getLogManager().getLogger("B");
+    assertEquals(1, bLogger.getHandlers().length);
+    assertEquals(Level.SEVERE, bLogger.getLevel());
+    assertFalse(bLogger.getUseParentHandlers());
+    assertTrue(isFileHandler(bLogger.getHandlers()[0], Level.ALL));
   }
 
   private boolean isFileHandler(Handler handler, Level level) {

From 6fffedec103721965c42a17d42b2f3701c4cb9fa Mon Sep 17 00:00:00 2001
From: Derek Perez <pzd@google.com>
Date: Wed, 28 Jan 2015 15:08:25 -0800
Subject: [PATCH 0197/1541] Proto2 Coder support.

---
 .../dataflow/sdk/coders/Proto2Coder.java      | 139 ++++++++++++++++++
 .../dataflow/sdk/coders/Proto2CoderTest.java  |  64 ++++++++
 2 files changed, 203 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
new file mode 100644
index 0000000000000..6b89ec140fc20
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.protobuf.ExtensionRegistry;
+import com.google.protobuf.Message;
+import com.google.protobuf.Parser;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * An encoder using Google Protocol Buffers 2 binary format.
+ * <p>
+ * To learn more about Protocol Buffers, visit:
+ * <a href="https://developers.google.com/protocol-buffers">https://developers.google.com/protocol-buffers</a>
+ * <p>
+ * To use, specify the {@code Coder} type on a PCollection:
+ * <pre>
+ * {@code
+ * PCollection<MyProto.Message> records =
+ *     input.apply(...)
+ *          .setCoder(Proto2Coder.of(MyProto.Message.class));
+ * }
+ * </pre>
+ * <p>
+ * Custom message extensions are also supported, but the coder must be made
+ * aware of them explicitly:
+ * <pre>
+ * {@code
+ * PCollection<MyProto.Message> records =
+ *     input.apply(...)
+ *          .setCoder(Proto2Coder.of(MyProto.Message.class)
+ *          .withExtensionsFrom(MyProto.class));
+ * }
+ * </pre>
+ *
+ * @param <T> the type of elements handled by this coder, must extend {@code Message}
+ */
+public class Proto2Coder<T extends Message> extends CustomCoder<T> {
+
+  /**
+   * Produces a new Proto2Coder instance, for a given Protobuf message class.
+   */
+  public static <T extends Message> Proto2Coder<T> of(Class<T> protoMessageClass) {
+    return new Proto2Coder<>(protoMessageClass);
+  }
+
+  private final Class<?> protoMessageClass;
+  private final List<Class<?>> extensionClassList = new ArrayList<>();
+  private transient Parser<T> parser;
+  private transient ExtensionRegistry extensionRegistry;
+
+  Proto2Coder(Class<T> protoMessageClass) {
+    this.protoMessageClass = protoMessageClass;
+  }
+
+  /**
+   * Adds custom Protobuf extensions to the coder.
+   *
+   * @param extensionHosts must be a class that defines a static
+   *      method name {@code registerAllExtensions}
+   */
+  public Proto2Coder<T> withExtensionsFrom(Class<?>... extensionHosts) {
+    for (Class<?> extensionHost : extensionHosts) {
+      try {
+        // Attempt to access the declared method, to make sure its present.
+        extensionHost
+            .getDeclaredMethod("registerAllExtensions", ExtensionRegistry.class);
+      } catch (NoSuchMethodException e) {
+        throw new IllegalArgumentException(e);
+      }
+      extensionClassList.add(extensionHost);
+    }
+    return this;
+  }
+
+  @Override
+  public void encode(T value, OutputStream outStream, Context context) throws IOException {
+    value.writeTo(outStream);
+  }
+
+  @Override
+  public T decode(InputStream inStream, Context context) throws IOException {
+    return (T) getParser().parseFrom(inStream, getExtensionRegistry());
+  }
+
+  private Parser<T> getParser() {
+    if (parser != null) {
+      return parser;
+    }
+    try {
+      T protoMessageInstance = (T) protoMessageClass
+          .getMethod("getDefaultInstance").invoke(null);
+      parser = (Parser<T>) protoMessageInstance.getParserForType();
+    } catch (IllegalAccessException
+        | InvocationTargetException
+        | NoSuchMethodException e) {
+      throw new IllegalArgumentException(e);
+    }
+    return parser;
+  }
+
+  private ExtensionRegistry getExtensionRegistry() {
+    if (extensionRegistry != null) {
+      return extensionRegistry;
+    }
+    extensionRegistry = ExtensionRegistry.newInstance();
+    for (Class<?> extensionHost : extensionClassList) {
+      try {
+        extensionHost
+            .getDeclaredMethod("registerAllExtensions", ExtensionRegistry.class)
+            .invoke(null, extensionRegistry);
+      } catch (IllegalAccessException
+          | InvocationTargetException
+          | NoSuchMethodException e) {
+        throw new IllegalStateException(e);
+      }
+    }
+    return extensionRegistry;
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
new file mode 100644
index 0000000000000..11b5f6e85b7c9
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageA;
+import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageB;
+import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageC;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for Proto2Coder.
+ */
+@RunWith(JUnit4.class)
+public class Proto2CoderTest {
+
+  @Test
+  public void testCoderEncodeDecodeEqual() throws Exception {
+    MessageA value = MessageA.newBuilder()
+        .setField1("hello")
+        .addField2(MessageB.newBuilder()
+            .setField1(true).build())
+        .addField2(MessageB.newBuilder()
+            .setField1(false).build())
+        .build();
+    CoderProperties.coderDecodeEncodeEqual(Proto2Coder.of(MessageA.class), value);
+  }
+
+  @Test
+  public void testCoderEncodeDecodeExtensionsEqual() throws Exception {
+    MessageC value = MessageC.newBuilder()
+        .setExtension(Proto2CoderTestMessages.field1,
+            MessageA.newBuilder()
+            .setField1("hello")
+            .addField2(MessageB.newBuilder()
+                .setField1(true)
+                .build())
+            .build())
+        .setExtension(Proto2CoderTestMessages.field2,
+            MessageB.newBuilder()
+            .setField1(false)
+            .build())
+        .build();
+    CoderProperties.coderDecodeEncodeEqual(
+        Proto2Coder.of(MessageC.class)
+        .withExtensionsFrom(Proto2CoderTestMessages.class), value);
+  }
+}

From 3f38c967fdb59395c183e7b94d8f1711b335744f Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 27 Feb 2015 13:52:08 -0800
Subject: [PATCH 0198/1541] Enable Google API tracing on the worker.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87370988
---
 .../dataflow/sdk/options/GoogleApiDebugOptions.java   |  7 +++++++
 .../sdk/options/GoogleApiDebugOptionsTest.java        | 11 +++++++++++
 2 files changed, 18 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
index 72e4bae0032d8..6675de8b6f335 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
@@ -22,6 +22,7 @@
 import com.google.common.base.Preconditions;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonValue;
 
 import java.io.IOException;
 import java.util.regex.Matcher;
@@ -99,5 +100,11 @@ public void initialize(AbstractGoogleClientRequest<?> request) throws IOExceptio
         request.set("trace", token);
       }
     }
+
+    @JsonValue
+    @Override
+    public String toString() {
+      return clientRequestName + "#" + token;
+    }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
index 147021a710e12..4df9fe9aaed00 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
@@ -27,6 +27,8 @@
 import com.google.cloud.dataflow.sdk.util.TestCredential;
 import com.google.cloud.dataflow.sdk.util.Transport;
 
+import com.fasterxml.jackson.databind.ObjectMapper;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -131,4 +133,13 @@ public void testMatchingAgainstRequestType() throws Exception {
         options.getDataflowClient().v1b3().projects().jobs().create("testProjectId", null);
     assertNull(createRequest.get("trace"));
   }
+
+  @Test
+  public void testDeserializationAndSerializationOfGoogleApiTracer() {
+    String serializedValue = "Api#Token";
+    ObjectMapper objectMapper = new ObjectMapper();
+    assertEquals(serializedValue,
+        objectMapper.convertValue(
+            objectMapper.convertValue(serializedValue, GoogleApiTracer.class), String.class));
+  }
 }

From 4f14cb6a5a155d69733f5168f8862a93f0e17447 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 27 Feb 2015 15:02:24 -0800
Subject: [PATCH 0199/1541] Update Copyright notice to 2015. [] -------------
 Created by MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=87377460

---
 checkstyle.xml                                                | 4 ++--
 examples/pom.xml                                              | 2 +-
 .../com/google/cloud/dataflow/examples/BigQueryTornadoes.java | 2 +-
 .../google/cloud/dataflow/examples/CombinePerKeyExamples.java | 2 +-
 .../google/cloud/dataflow/examples/DatastoreWordCount.java    | 2 +-
 .../java/com/google/cloud/dataflow/examples/DeDupExample.java | 2 +-
 .../com/google/cloud/dataflow/examples/FilterExamples.java    | 2 +-
 .../java/com/google/cloud/dataflow/examples/JoinExamples.java | 2 +-
 .../com/google/cloud/dataflow/examples/MaxPerKeyExamples.java | 2 +-
 .../google/cloud/dataflow/examples/PubsubFileInjector.java    | 2 +-
 .../google/cloud/dataflow/examples/StreamingWordExtract.java  | 2 +-
 .../main/java/com/google/cloud/dataflow/examples/TfIdf.java   | 2 +-
 .../google/cloud/dataflow/examples/TopWikipediaSessions.java  | 2 +-
 .../google/cloud/dataflow/examples/WindowingWordCount.java    | 2 +-
 .../java/com/google/cloud/dataflow/examples/WordCount.java    | 2 +-
 .../google/cloud/dataflow/examples/BigQueryTornadoesTest.java | 2 +-
 .../cloud/dataflow/examples/CombinePerKeyExamplesTest.java    | 2 +-
 .../com/google/cloud/dataflow/examples/DeDupExampleTest.java  | 2 +-
 .../google/cloud/dataflow/examples/FilterExamplesTest.java    | 2 +-
 .../com/google/cloud/dataflow/examples/JoinExamplesTest.java  | 2 +-
 .../google/cloud/dataflow/examples/MaxPerKeyExamplesTest.java | 2 +-
 .../java/com/google/cloud/dataflow/examples/TfIdfTest.java    | 2 +-
 .../cloud/dataflow/examples/TopWikipediaSessionsTest.java     | 2 +-
 .../com/google/cloud/dataflow/examples/WordCountTest.java     | 2 +-
 pom.xml                                                       | 2 +-
 sdk/pom.xml                                                   | 2 +-
 sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java | 2 +-
 .../java/com/google/cloud/dataflow/sdk/PipelineResult.java    | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/AtomicCoder.java     | 2 +-
 .../java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java  | 2 +-
 .../cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java      | 2 +-
 .../google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java  | 2 +-
 .../main/java/com/google/cloud/dataflow/sdk/coders/Coder.java | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/CoderException.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/CoderRegistry.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/CollectionCoder.java | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/CustomCoder.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/DefaultCoder.java    | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/DelegateCoder.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/DoubleCoder.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/EntityCoder.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/InstantCoder.java    | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/IterableCoder.java   | 2 +-
 .../google/cloud/dataflow/sdk/coders/IterableLikeCoder.java   | 2 +-
 .../java/com/google/cloud/dataflow/sdk/coders/KvCoder.java    | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/KvCoderBase.java     | 2 +-
 .../java/com/google/cloud/dataflow/sdk/coders/ListCoder.java  | 2 +-
 .../java/com/google/cloud/dataflow/sdk/coders/MapCoder.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/MapCoderBase.java    | 2 +-
 .../google/cloud/dataflow/sdk/coders/SerializableCoder.java   | 2 +-
 .../java/com/google/cloud/dataflow/sdk/coders/SetCoder.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/StandardCoder.java   | 2 +-
 .../google/cloud/dataflow/sdk/coders/StringDelegateCoder.java | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java | 2 +-
 .../google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java   | 2 +-
 .../google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/VarIntCoder.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/VarLongCoder.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/package-info.java    | 2 +-
 .../main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java     | 2 +-
 .../java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java    | 2 +-
 .../main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java  | 2 +-
 .../java/com/google/cloud/dataflow/sdk/io/ReadSource.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/io/ShardNameTemplate.java   | 2 +-
 .../main/java/com/google/cloud/dataflow/sdk/io/Source.java    | 2 +-
 .../main/java/com/google/cloud/dataflow/sdk/io/TextIO.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/io/package-info.java   | 2 +-
 .../cloud/dataflow/sdk/options/ApplicationNameOptions.java    | 2 +-
 .../google/cloud/dataflow/sdk/options/BigQueryOptions.java    | 2 +-
 .../dataflow/sdk/options/BlockingDataflowPipelineOptions.java | 2 +-
 .../cloud/dataflow/sdk/options/CloudDebuggerOptions.java      | 2 +-
 .../dataflow/sdk/options/DataflowPipelineDebugOptions.java    | 2 +-
 .../cloud/dataflow/sdk/options/DataflowPipelineOptions.java   | 2 +-
 .../sdk/options/DataflowPipelineWorkerPoolOptions.java        | 2 +-
 .../dataflow/sdk/options/DataflowWorkerHarnessOptions.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/options/Default.java   | 2 +-
 .../cloud/dataflow/sdk/options/DefaultValueFactory.java       | 2 +-
 .../com/google/cloud/dataflow/sdk/options/Description.java    | 2 +-
 .../cloud/dataflow/sdk/options/DirectPipelineOptions.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/options/GcpOptions.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/options/GcsOptions.java     | 2 +-
 .../cloud/dataflow/sdk/options/GoogleApiDebugOptions.java     | 2 +-
 .../google/cloud/dataflow/sdk/options/PipelineOptions.java    | 2 +-
 .../cloud/dataflow/sdk/options/PipelineOptionsFactory.java    | 2 +-
 .../cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java  | 2 +-
 .../cloud/dataflow/sdk/options/PipelineOptionsValidator.java  | 2 +-
 .../cloud/dataflow/sdk/options/ProxyInvocationHandler.java    | 2 +-
 .../google/cloud/dataflow/sdk/options/StreamingOptions.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/options/Validation.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/options/package-info.java   | 2 +-
 .../main/java/com/google/cloud/dataflow/sdk/package-info.java | 2 +-
 .../dataflow/sdk/runners/BlockingDataflowPipelineRunner.java  | 2 +-
 .../google/cloud/dataflow/sdk/runners/DataflowPipeline.java   | 2 +-
 .../cloud/dataflow/sdk/runners/DataflowPipelineJob.java       | 2 +-
 .../cloud/dataflow/sdk/runners/DataflowPipelineRegistrar.java | 2 +-
 .../cloud/dataflow/sdk/runners/DataflowPipelineRunner.java    | 2 +-
 .../dataflow/sdk/runners/DataflowPipelineRunnerHooks.java     | 2 +-
 .../dataflow/sdk/runners/DataflowPipelineTranslator.java      | 2 +-
 .../com/google/cloud/dataflow/sdk/runners/DirectPipeline.java | 2 +-
 .../cloud/dataflow/sdk/runners/DirectPipelineRegistrar.java   | 2 +-
 .../cloud/dataflow/sdk/runners/DirectPipelineRunner.java      | 2 +-
 .../com/google/cloud/dataflow/sdk/runners/PipelineRunner.java | 2 +-
 .../cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java   | 2 +-
 .../cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java  | 2 +-
 .../google/cloud/dataflow/sdk/runners/TransformHierarchy.java | 2 +-
 .../google/cloud/dataflow/sdk/runners/TransformTreeNode.java  | 2 +-
 .../cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java | 2 +-
 .../sdk/runners/dataflow/BasicSerializableSourceFormat.java   | 2 +-
 .../dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java   | 2 +-
 .../dataflow/sdk/runners/dataflow/DatastoreIOTranslator.java  | 2 +-
 .../dataflow/sdk/runners/dataflow/PubsubIOTranslator.java     | 2 +-
 .../dataflow/sdk/runners/dataflow/ReadSourceTranslator.java   | 2 +-
 .../cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java | 2 +-
 .../cloud/dataflow/sdk/runners/dataflow/package-info.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/runners/package-info.java   | 2 +-
 .../dataflow/sdk/runners/worker/ApplianceShuffleReader.java   | 2 +-
 .../dataflow/sdk/runners/worker/ApplianceShuffleWriter.java   | 2 +-
 .../dataflow/sdk/runners/worker/AssignWindowsParDoFn.java     | 2 +-
 .../cloud/dataflow/sdk/runners/worker/AvroByteReader.java     | 2 +-
 .../cloud/dataflow/sdk/runners/worker/AvroByteSink.java       | 2 +-
 .../google/cloud/dataflow/sdk/runners/worker/AvroReader.java  | 2 +-
 .../cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java  | 2 +-
 .../google/cloud/dataflow/sdk/runners/worker/AvroSink.java    | 2 +-
 .../cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java    | 2 +-
 .../cloud/dataflow/sdk/runners/worker/BigQueryReader.java     | 2 +-
 .../dataflow/sdk/runners/worker/BigQueryReaderFactory.java    | 2 +-
 .../dataflow/sdk/runners/worker/ByteArrayShufflePosition.java | 2 +-
 .../sdk/runners/worker/ChunkingShuffleBatchReader.java        | 2 +-
 .../sdk/runners/worker/ChunkingShuffleEntryWriter.java        | 2 +-
 .../cloud/dataflow/sdk/runners/worker/CombineValuesFn.java    | 2 +-
 .../sdk/runners/worker/CopyableSeekableByteChannel.java       | 2 +-
 .../sdk/runners/worker/DataflowWorkProgressUpdater.java       | 2 +-
 .../cloud/dataflow/sdk/runners/worker/DataflowWorker.java     | 2 +-
 .../dataflow/sdk/runners/worker/DataflowWorkerHarness.java    | 2 +-
 .../cloud/dataflow/sdk/runners/worker/FileBasedReader.java    | 2 +-
 .../sdk/runners/worker/GroupAlsoByWindowsParDoFn.java         | 2 +-
 .../dataflow/sdk/runners/worker/GroupingShuffleReader.java    | 2 +-
 .../sdk/runners/worker/GroupingShuffleReaderFactory.java      | 2 +-
 .../cloud/dataflow/sdk/runners/worker/InMemoryReader.java     | 2 +-
 .../dataflow/sdk/runners/worker/InMemoryReaderFactory.java    | 2 +-
 .../dataflow/sdk/runners/worker/LazyMultiReaderIterator.java  | 2 +-
 .../dataflow/sdk/runners/worker/MapTaskExecutorFactory.java   | 2 +-
 .../cloud/dataflow/sdk/runners/worker/NormalParDoFn.java      | 2 +-
 .../google/cloud/dataflow/sdk/runners/worker/OrderedCode.java | 2 +-
 .../cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java     | 2 +-
 .../sdk/runners/worker/PartitioningShuffleReader.java         | 2 +-
 .../sdk/runners/worker/PartitioningShuffleReaderFactory.java  | 2 +-
 .../cloud/dataflow/sdk/runners/worker/PubsubReader.java       | 2 +-
 .../google/cloud/dataflow/sdk/runners/worker/PubsubSink.java  | 2 +-
 .../cloud/dataflow/sdk/runners/worker/ReaderFactory.java      | 2 +-
 .../cloud/dataflow/sdk/runners/worker/ShuffleEntryWriter.java | 2 +-
 .../cloud/dataflow/sdk/runners/worker/ShuffleLibrary.java     | 2 +-
 .../cloud/dataflow/sdk/runners/worker/ShuffleReader.java      | 2 +-
 .../google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java | 2 +-
 .../cloud/dataflow/sdk/runners/worker/ShuffleSinkFactory.java | 2 +-
 .../cloud/dataflow/sdk/runners/worker/ShuffleWriter.java      | 2 +-
 .../cloud/dataflow/sdk/runners/worker/SideInputUtils.java     | 2 +-
 .../google/cloud/dataflow/sdk/runners/worker/SinkFactory.java | 2 +-
 .../dataflow/sdk/runners/worker/SourceFormatFactory.java      | 2 +-
 .../dataflow/sdk/runners/worker/SourceOperationExecutor.java  | 2 +-
 .../sdk/runners/worker/SourceOperationExecutorFactory.java    | 2 +-
 .../dataflow/sdk/runners/worker/SourceTranslationUtils.java   | 2 +-
 .../dataflow/sdk/runners/worker/StreamingDataflowWorker.java  | 2 +-
 .../google/cloud/dataflow/sdk/runners/worker/TextReader.java  | 2 +-
 .../cloud/dataflow/sdk/runners/worker/TextReaderFactory.java  | 2 +-
 .../google/cloud/dataflow/sdk/runners/worker/TextSink.java    | 2 +-
 .../cloud/dataflow/sdk/runners/worker/TextSinkFactory.java    | 2 +-
 .../dataflow/sdk/runners/worker/UngroupedShuffleReader.java   | 2 +-
 .../sdk/runners/worker/UngroupedShuffleReaderFactory.java     | 2 +-
 .../dataflow/sdk/runners/worker/UngroupedWindmillReader.java  | 2 +-
 .../cloud/dataflow/sdk/runners/worker/WindmillSink.java       | 2 +-
 .../dataflow/sdk/runners/worker/WindowingWindmillReader.java  | 2 +-
 .../worker/logging/DataflowWorkerLoggingFormatter.java        | 2 +-
 .../worker/logging/DataflowWorkerLoggingInitializer.java      | 2 +-
 .../cloud/dataflow/sdk/runners/worker/package-info.java       | 2 +-
 .../sdk/runners/worker/windmill/WindmillServerStub.java       | 2 +-
 .../com/google/cloud/dataflow/sdk/testing/DataflowAssert.java | 2 +-
 .../google/cloud/dataflow/sdk/testing/RunnableOnService.java  | 2 +-
 .../dataflow/sdk/testing/TestDataflowPipelineOptions.java     | 2 +-
 .../dataflow/sdk/testing/TestDataflowPipelineRunner.java      | 2 +-
 .../com/google/cloud/dataflow/sdk/testing/TestPipeline.java   | 2 +-
 .../google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/testing/package-info.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/Aggregator.java  | 2 +-
 .../cloud/dataflow/sdk/transforms/ApproximateQuantiles.java   | 2 +-
 .../cloud/dataflow/sdk/transforms/ApproximateUnique.java      | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/Combine.java     | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/Count.java  | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/Create.java | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/DoFn.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/DoFnTester.java  | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/Filter.java | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/First.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/Flatten.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/GroupByKey.java  | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/Keys.java   | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/Max.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/Mean.java   | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/Min.java    | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/PTransform.java  | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/ParDo.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/Partition.java   | 2 +-
 .../google/cloud/dataflow/sdk/transforms/RateLimiting.java    | 2 +-
 .../cloud/dataflow/sdk/transforms/RemoveDuplicates.java       | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/Sample.java | 2 +-
 .../cloud/dataflow/sdk/transforms/SerializableComparator.java | 2 +-
 .../cloud/dataflow/sdk/transforms/SerializableFunction.java   | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/Sum.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/Top.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/Values.java | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/View.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/WithKeys.java    | 2 +-
 .../cloud/dataflow/sdk/transforms/join/CoGbkResult.java       | 2 +-
 .../cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java | 2 +-
 .../cloud/dataflow/sdk/transforms/join/CoGroupByKey.java      | 2 +-
 .../dataflow/sdk/transforms/join/KeyedPCollectionTuple.java   | 2 +-
 .../cloud/dataflow/sdk/transforms/join/RawUnionValue.java     | 2 +-
 .../google/cloud/dataflow/sdk/transforms/join/UnionCoder.java | 2 +-
 .../cloud/dataflow/sdk/transforms/join/package-info.java      | 2 +-
 .../google/cloud/dataflow/sdk/transforms/package-info.java    | 2 +-
 .../dataflow/sdk/transforms/windowing/BoundedWindow.java      | 2 +-
 .../dataflow/sdk/transforms/windowing/CalendarWindows.java    | 2 +-
 .../cloud/dataflow/sdk/transforms/windowing/FixedWindows.java | 2 +-
 .../cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java | 2 +-
 .../dataflow/sdk/transforms/windowing/GlobalWindows.java      | 2 +-
 .../dataflow/sdk/transforms/windowing/IntervalWindow.java     | 2 +-
 .../dataflow/sdk/transforms/windowing/InvalidWindows.java     | 2 +-
 .../transforms/windowing/MergeOverlappingIntervalWindows.java | 2 +-
 .../dataflow/sdk/transforms/windowing/NonMergingWindowFn.java | 2 +-
 .../sdk/transforms/windowing/PartitioningWindowFn.java        | 2 +-
 .../cloud/dataflow/sdk/transforms/windowing/Sessions.java     | 2 +-
 .../dataflow/sdk/transforms/windowing/SlidingWindows.java     | 2 +-
 .../cloud/dataflow/sdk/transforms/windowing/Window.java       | 2 +-
 .../cloud/dataflow/sdk/transforms/windowing/WindowFn.java     | 2 +-
 .../cloud/dataflow/sdk/transforms/windowing/package-info.java | 2 +-
 .../com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java | 2 +-
 .../com/google/cloud/dataflow/sdk/util/AggregatorImpl.java    | 2 +-
 .../com/google/cloud/dataflow/sdk/util/ApiErrorExtractor.java | 2 +-
 .../google/cloud/dataflow/sdk/util/AppEngineEnvironment.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java | 2 +-
 .../dataflow/sdk/util/AttemptBoundedExponentialBackOff.java   | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/Base64Utils.java  | 2 +-
 .../cloud/dataflow/sdk/util/BatchModeExecutionContext.java    | 2 +-
 .../google/cloud/dataflow/sdk/util/BigQueryTableInserter.java | 2 +-
 .../cloud/dataflow/sdk/util/BigQueryTableRowIterator.java     | 2 +-
 .../google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java  | 2 +-
 .../google/cloud/dataflow/sdk/util/BufferingWindowSet.java    | 2 +-
 .../com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java | 2 +-
 .../com/google/cloud/dataflow/sdk/util/CloudKnownType.java    | 2 +-
 .../com/google/cloud/dataflow/sdk/util/CloudMetricUtils.java  | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/CloudObject.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java  | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/CoderUtils.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/util/CredentialFactory.java | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/Credentials.java  | 2 +-
 .../google/cloud/dataflow/sdk/util/DataflowPathValidator.java | 2 +-
 .../google/cloud/dataflow/sdk/util/DataflowReleaseInfo.java   | 2 +-
 .../cloud/dataflow/sdk/util/DirectModeExecutionContext.java   | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/DoFnContext.java  | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java     | 2 +-
 .../google/cloud/dataflow/sdk/util/DoFnProcessContext.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/util/ExecutionContext.java  | 2 +-
 .../google/cloud/dataflow/sdk/util/FileIOChannelFactory.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/util/GCloudCredential.java  | 2 +-
 .../google/cloud/dataflow/sdk/util/GcpCredentialFactory.java  | 2 +-
 .../google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java   | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/GcsStager.java    | 2 +-
 .../main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java | 2 +-
 .../cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java       | 2 +-
 .../com/google/cloud/dataflow/sdk/util/IOChannelFactory.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/util/IOChannelUtils.java    | 2 +-
 .../com/google/cloud/dataflow/sdk/util/InstanceBuilder.java   | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/MimeTypes.java    | 2 +-
 .../com/google/cloud/dataflow/sdk/util/MonitoringUtil.java    | 2 +-
 .../google/cloud/dataflow/sdk/util/NoopCredentialFactory.java | 2 +-
 .../com/google/cloud/dataflow/sdk/util/NoopPathValidator.java | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/NoopStager.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/util/OutputReference.java   | 2 +-
 .../main/java/com/google/cloud/dataflow/sdk/util/PTuple.java  | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/PackageUtil.java  | 2 +-
 .../cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/util/PathValidator.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/util/PropertyNames.java     | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/ReaderUtils.java  | 2 +-
 .../cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/util/SerializableUtils.java | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/Serializer.java   | 2 +-
 .../cloud/dataflow/sdk/util/ShardingWritableByteChannel.java  | 2 +-
 .../main/java/com/google/cloud/dataflow/sdk/util/Stager.java  | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/StateFetcher.java | 2 +-
 .../dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java    | 2 +-
 .../dataflow/sdk/util/StreamingModeExecutionContext.java      | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/StringUtils.java  | 2 +-
 .../main/java/com/google/cloud/dataflow/sdk/util/Structs.java | 2 +-
 .../com/google/cloud/dataflow/sdk/util/TestCredential.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/TimeUtil.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/util/TimerOrElement.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/Transport.java    | 2 +-
 .../com/google/cloud/dataflow/sdk/util/UserCodeException.java | 2 +-
 .../main/java/com/google/cloud/dataflow/sdk/util/Values.java  | 2 +-
 .../main/java/com/google/cloud/dataflow/sdk/util/VarInt.java  | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/WindowUtils.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/util/WindowedValue.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/util/common/Counter.java    | 2 +-
 .../com/google/cloud/dataflow/sdk/util/common/CounterSet.java | 2 +-
 .../dataflow/sdk/util/common/ElementByteSizeObservable.java   | 2 +-
 .../sdk/util/common/ElementByteSizeObservableIterable.java    | 2 +-
 .../sdk/util/common/ElementByteSizeObservableIterator.java    | 2 +-
 .../dataflow/sdk/util/common/ElementByteSizeObserver.java     | 2 +-
 .../cloud/dataflow/sdk/util/common/ForwardingReiterator.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/util/common/Metric.java     | 2 +-
 .../cloud/dataflow/sdk/util/common/PeekingReiterator.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/util/common/Reiterable.java | 2 +-
 .../com/google/cloud/dataflow/sdk/util/common/Reiterator.java | 2 +-
 .../google/cloud/dataflow/sdk/util/common/package-info.java   | 2 +-
 .../sdk/util/common/worker/BatchingShuffleEntryReader.java    | 2 +-
 .../sdk/util/common/worker/CachingShuffleBatchReader.java     | 2 +-
 .../dataflow/sdk/util/common/worker/FlattenOperation.java     | 2 +-
 .../sdk/util/common/worker/GroupingShuffleEntryIterator.java  | 2 +-
 .../sdk/util/common/worker/KeyGroupedShuffleEntries.java      | 2 +-
 .../dataflow/sdk/util/common/worker/MapTaskExecutor.java      | 2 +-
 .../cloud/dataflow/sdk/util/common/worker/Operation.java      | 2 +-
 .../cloud/dataflow/sdk/util/common/worker/OutputReceiver.java | 2 +-
 .../google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java | 2 +-
 .../cloud/dataflow/sdk/util/common/worker/ParDoOperation.java | 2 +-
 .../sdk/util/common/worker/PartialGroupByKeyOperation.java    | 2 +-
 .../dataflow/sdk/util/common/worker/ProgressTracker.java      | 2 +-
 .../dataflow/sdk/util/common/worker/ProgressTrackerGroup.java | 2 +-
 .../sdk/util/common/worker/ProgressTrackingReiterator.java    | 2 +-
 .../cloud/dataflow/sdk/util/common/worker/ReadOperation.java  | 2 +-
 .../google/cloud/dataflow/sdk/util/common/worker/Reader.java  | 2 +-
 .../cloud/dataflow/sdk/util/common/worker/Receiver.java       | 2 +-
 .../dataflow/sdk/util/common/worker/ReceivingOperation.java   | 2 +-
 .../dataflow/sdk/util/common/worker/ShuffleBatchReader.java   | 2 +-
 .../cloud/dataflow/sdk/util/common/worker/ShuffleEntry.java   | 2 +-
 .../dataflow/sdk/util/common/worker/ShuffleEntryReader.java   | 2 +-
 .../dataflow/sdk/util/common/worker/ShufflePosition.java      | 2 +-
 .../google/cloud/dataflow/sdk/util/common/worker/Sink.java    | 2 +-
 .../cloud/dataflow/sdk/util/common/worker/SourceFormat.java   | 2 +-
 .../cloud/dataflow/sdk/util/common/worker/StateSampler.java   | 2 +-
 .../cloud/dataflow/sdk/util/common/worker/WorkExecutor.java   | 2 +-
 .../dataflow/sdk/util/common/worker/WorkProgressUpdater.java  | 2 +-
 .../cloud/dataflow/sdk/util/common/worker/WriteOperation.java | 2 +-
 .../cloud/dataflow/sdk/util/common/worker/package-info.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java     | 2 +-
 .../google/cloud/dataflow/sdk/util/gcsfs/package-info.java    | 2 +-
 .../cloud/dataflow/sdk/util/gcsio/ClientRequestHelper.java    | 2 +-
 .../dataflow/sdk/util/gcsio/GoogleCloudStorageExceptions.java | 2 +-
 .../sdk/util/gcsio/GoogleCloudStorageReadChannel.java         | 2 +-
 .../sdk/util/gcsio/GoogleCloudStorageWriteChannel.java        | 2 +-
 .../util/gcsio/LoggingMediaHttpUploaderProgressListener.java  | 2 +-
 .../cloud/dataflow/sdk/util/gcsio/StorageResourceId.java      | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/package-info.java | 2 +-
 .../com/google/cloud/dataflow/sdk/values/CodedTupleTag.java   | 2 +-
 .../google/cloud/dataflow/sdk/values/CodedTupleTagMap.java    | 2 +-
 .../main/java/com/google/cloud/dataflow/sdk/values/KV.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/values/PBegin.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/values/PCollection.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/values/PCollectionList.java | 2 +-
 .../google/cloud/dataflow/sdk/values/PCollectionTuple.java    | 2 +-
 .../com/google/cloud/dataflow/sdk/values/PCollectionView.java | 2 +-
 .../main/java/com/google/cloud/dataflow/sdk/values/PDone.java | 2 +-
 .../java/com/google/cloud/dataflow/sdk/values/PInput.java     | 2 +-
 .../java/com/google/cloud/dataflow/sdk/values/POutput.java    | 2 +-
 .../google/cloud/dataflow/sdk/values/POutputValueBase.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/values/PValue.java     | 2 +-
 .../java/com/google/cloud/dataflow/sdk/values/PValueBase.java | 2 +-
 .../google/cloud/dataflow/sdk/values/TimestampedValue.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/values/TupleTag.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/values/TupleTagList.java    | 2 +-
 .../com/google/cloud/dataflow/sdk/values/TypedPValue.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/values/package-info.java    | 2 +-
 sdk/src/main/proto/windmill.proto                             | 2 +-
 .../test/java/com/google/cloud/dataflow/sdk/PipelineTest.java | 2 +-
 .../test/java/com/google/cloud/dataflow/sdk/TestUtils.java    | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java   | 2 +-
 .../cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java  | 2 +-
 .../cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java     | 2 +-
 .../google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/CoderProperties.java | 2 +-
 .../google/cloud/dataflow/sdk/coders/CoderRegistryTest.java   | 2 +-
 .../google/cloud/dataflow/sdk/coders/CollectionCoderTest.java | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java | 2 +-
 .../google/cloud/dataflow/sdk/coders/DefaultCoderTest.java    | 2 +-
 .../google/cloud/dataflow/sdk/coders/DelegateCoderTest.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java | 2 +-
 .../google/cloud/dataflow/sdk/coders/InstantCoderTest.java    | 2 +-
 .../google/cloud/dataflow/sdk/coders/IterableCoderTest.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/KvCoderTest.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/ListCoderTest.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/MapCoderTest.java    | 2 +-
 .../cloud/dataflow/sdk/coders/SerializableCoderTest.java      | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/SetCoderTest.java    | 2 +-
 .../cloud/dataflow/sdk/coders/StringDelegateCoderTest.java    | 2 +-
 .../google/cloud/dataflow/sdk/coders/StringUtf8CoderTest.java | 2 +-
 .../cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java      | 2 +-
 .../cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java    | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java | 2 +-
 .../google/cloud/dataflow/sdk/coders/VarLongCoderTest.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java     | 2 +-
 .../java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java | 2 +-
 .../com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java     | 2 +-
 .../java/com/google/cloud/dataflow/sdk/io/TextIOTest.java     | 2 +-
 .../dataflow/sdk/options/DataflowPipelineOptionsTest.java     | 2 +-
 .../cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java | 2 +-
 .../dataflow/sdk/options/PipelineOptionsFactoryTest.java      | 2 +-
 .../cloud/dataflow/sdk/options/PipelineOptionsTest.java       | 2 +-
 .../dataflow/sdk/options/PipelineOptionsValidatorTest.java    | 2 +-
 .../dataflow/sdk/options/ProxyInvocationHandlerTest.java      | 2 +-
 .../sdk/runners/BlockingDataflowPipelineRunnerTest.java       | 2 +-
 .../cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java   | 2 +-
 .../dataflow/sdk/runners/DataflowPipelineRegistrarTest.java   | 2 +-
 .../dataflow/sdk/runners/DataflowPipelineRunnerTest.java      | 2 +-
 .../dataflow/sdk/runners/DataflowPipelineTranslatorTest.java  | 2 +-
 .../dataflow/sdk/runners/DirectPipelineRegistrarTest.java     | 2 +-
 .../google/cloud/dataflow/sdk/runners/PipelineRunnerTest.java | 2 +-
 .../google/cloud/dataflow/sdk/runners/TransformTreeTest.java  | 2 +-
 .../runners/dataflow/BasicSerializableSourceFormatTest.java   | 2 +-
 .../cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java | 2 +-
 .../cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java   | 2 +-
 .../dataflow/sdk/runners/worker/AvroReaderFactoryTest.java    | 2 +-
 .../cloud/dataflow/sdk/runners/worker/AvroReaderTest.java     | 2 +-
 .../dataflow/sdk/runners/worker/AvroSinkFactoryTest.java      | 2 +-
 .../cloud/dataflow/sdk/runners/worker/AvroSinkTest.java       | 2 +-
 .../sdk/runners/worker/BigQueryReaderFactoryTest.java         | 2 +-
 .../cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java | 2 +-
 .../dataflow/sdk/runners/worker/CombineValuesFnTest.java      | 2 +-
 .../sdk/runners/worker/CopyableSeekableByteChannelTest.java   | 2 +-
 .../sdk/runners/worker/DataflowWorkProgressUpdaterTest.java   | 2 +-
 .../sdk/runners/worker/DataflowWorkerHarnessTest.java         | 2 +-
 .../cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java | 2 +-
 .../dataflow/sdk/runners/worker/DatastoreReaderTest.java      | 2 +-
 .../sdk/runners/worker/GroupingShuffleReaderTest.java         | 2 +-
 .../sdk/runners/worker/InMemoryReaderFactoryTest.java         | 2 +-
 .../cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java | 2 +-
 .../sdk/runners/worker/MapTaskExecutorFactoryTest.java        | 2 +-
 .../cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java  | 2 +-
 .../cloud/dataflow/sdk/runners/worker/OrderedCodeTest.java    | 2 +-
 .../cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java | 2 +-
 .../sdk/runners/worker/PartitioningShuffleReaderTest.java     | 2 +-
 .../cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java  | 2 +-
 .../dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java | 2 +-
 .../dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java   | 2 +-
 .../cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java    | 2 +-
 .../cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java | 2 +-
 .../cloud/dataflow/sdk/runners/worker/SinkFactoryTest.java    | 2 +-
 .../sdk/runners/worker/StreamingDataflowWorkerTest.java       | 2 +-
 .../cloud/dataflow/sdk/runners/worker/TestShuffleReader.java  | 2 +-
 .../dataflow/sdk/runners/worker/TestShuffleReaderTest.java    | 2 +-
 .../cloud/dataflow/sdk/runners/worker/TestShuffleWriter.java  | 2 +-
 .../dataflow/sdk/runners/worker/TextReaderFactoryTest.java    | 2 +-
 .../cloud/dataflow/sdk/runners/worker/TextReaderTest.java     | 2 +-
 .../dataflow/sdk/runners/worker/TextSinkFactoryTest.java      | 2 +-
 .../cloud/dataflow/sdk/runners/worker/TextSinkTest.java       | 2 +-
 .../sdk/runners/worker/UngroupedShuffleReaderTest.java        | 2 +-
 .../worker/logging/DataflowWorkerLoggingFormatterTest.java    | 2 +-
 .../worker/logging/DataflowWorkerLoggingInitializerTest.java  | 2 +-
 .../google/cloud/dataflow/sdk/testing/DataflowAssertTest.java | 2 +-
 .../com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java   | 2 +-
 .../google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java   | 2 +-
 .../cloud/dataflow/sdk/testing/FastNanoClockAndSleeper.java   | 2 +-
 .../dataflow/sdk/testing/FastNanoClockAndSleeperTest.java     | 2 +-
 .../cloud/dataflow/sdk/testing/ResetDateTimeProvider.java     | 2 +-
 .../cloud/dataflow/sdk/testing/ResetDateTimeProviderTest.java | 2 +-
 .../dataflow/sdk/testing/RestoreDataflowLoggingFormatter.java | 2 +-
 .../sdk/testing/RestoreDataflowLoggingFormatterTest.java      | 2 +-
 .../cloud/dataflow/sdk/testing/RestoreSystemProperties.java   | 2 +-
 .../dataflow/sdk/testing/RestoreSystemPropertiesTest.java     | 2 +-
 .../google/cloud/dataflow/sdk/testing/TestPipelineTest.java   | 2 +-
 .../dataflow/sdk/transforms/ApproximateQuantilesTest.java     | 2 +-
 .../cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/CombineTest.java | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/CountTest.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/CreateTest.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/FilterTest.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/FirstTest.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/FlattenTest.java | 2 +-
 .../google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/KeysTest.java    | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/ParDoTest.java   | 2 +-
 .../google/cloud/dataflow/sdk/transforms/PartitionTest.java   | 2 +-
 .../cloud/dataflow/sdk/transforms/RateLimitingTest.java       | 2 +-
 .../cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/SampleTest.java  | 2 +-
 .../cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/TopTest.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/ValuesTest.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/ViewTest.java    | 2 +-
 .../google/cloud/dataflow/sdk/transforms/WithKeysTest.java    | 2 +-
 .../dataflow/sdk/transforms/join/CoGbkResultCoderTest.java    | 2 +-
 .../cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java  | 2 +-
 .../cloud/dataflow/sdk/transforms/join/UnionCoderTest.java    | 2 +-
 .../sdk/transforms/windowing/CalendarWindowsTest.java         | 2 +-
 .../dataflow/sdk/transforms/windowing/FixedWindowsTest.java   | 2 +-
 .../cloud/dataflow/sdk/transforms/windowing/SessionsTest.java | 2 +-
 .../dataflow/sdk/transforms/windowing/SlidingWindowsTest.java | 2 +-
 .../dataflow/sdk/transforms/windowing/WindowingTest.java      | 2 +-
 .../google/cloud/dataflow/sdk/util/AggregatorImplTest.java    | 2 +-
 .../sdk/util/AttemptBoundedExponentialBackOffTest.java        | 2 +-
 .../com/google/cloud/dataflow/sdk/util/Base64UtilsTest.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java  | 2 +-
 .../google/cloud/dataflow/sdk/util/CloudMetricUtilsTest.java  | 2 +-
 .../google/cloud/dataflow/sdk/util/CloudSourceUtilsTest.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java  | 2 +-
 .../cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java   | 2 +-
 .../google/cloud/dataflow/sdk/util/IOChannelUtilsTest.java    | 2 +-
 .../com/google/cloud/dataflow/sdk/util/IOFactoryTest.java     | 2 +-
 .../google/cloud/dataflow/sdk/util/InstanceBuilderTest.java   | 2 +-
 .../google/cloud/dataflow/sdk/util/MonitoringUtilTest.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/PTupleTest.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/util/PackageUtilTest.java   | 2 +-
 .../dataflow/sdk/util/RetryHttpRequestInitializerTest.java    | 2 +-
 .../google/cloud/dataflow/sdk/util/SerializableUtilsTest.java | 2 +-
 .../com/google/cloud/dataflow/sdk/util/SerializerTest.java    | 2 +-
 .../sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java         | 2 +-
 .../com/google/cloud/dataflow/sdk/util/StringUtilsTest.java   | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/StructsTest.java  | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/TimeUtilTest.java | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/VarIntTest.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/util/WindowedValueTest.java | 2 +-
 .../google/cloud/dataflow/sdk/util/common/CounterSetTest.java | 2 +-
 .../google/cloud/dataflow/sdk/util/common/CounterTest.java    | 2 +-
 .../cloud/dataflow/sdk/util/common/CounterTestUtils.java      | 2 +-
 .../com/google/cloud/dataflow/sdk/util/common/MetricTest.java | 2 +-
 .../dataflow/sdk/util/common/TaggedReiteratorListTest.java    | 2 +-
 .../util/common/worker/BatchingShuffleEntryReaderTest.java    | 2 +-
 .../sdk/util/common/worker/CachingShuffleBatchReaderTest.java | 2 +-
 .../dataflow/sdk/util/common/worker/ExecutorTestUtils.java    | 2 +-
 .../dataflow/sdk/util/common/worker/FlattenOperationTest.java | 2 +-
 .../dataflow/sdk/util/common/worker/MapTaskExecutorTest.java  | 2 +-
 .../dataflow/sdk/util/common/worker/OutputReceiverTest.java   | 2 +-
 .../dataflow/sdk/util/common/worker/ParDoOperationTest.java   | 2 +-
 .../util/common/worker/PartialGroupByKeyOperationTest.java    | 2 +-
 .../dataflow/sdk/util/common/worker/ReadOperationTest.java    | 2 +-
 .../dataflow/sdk/util/common/worker/ShuffleEntryTest.java     | 2 +-
 .../dataflow/sdk/util/common/worker/StateSamplerTest.java     | 2 +-
 .../dataflow/sdk/util/common/worker/WorkExecutorTest.java     | 2 +-
 .../dataflow/sdk/util/common/worker/WriteOperationTest.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/util/gcsfs/GcsPathTest.java | 2 +-
 .../gcsio/LoggingMediaHttpUploaderProgressListenerTest.java   | 2 +-
 .../java/com/google/cloud/dataflow/sdk/values/KVTest.java     | 2 +-
 .../google/cloud/dataflow/sdk/values/PCollectionListTest.java | 2 +-
 .../java/com/google/cloud/dataflow/sdk/values/PDoneTest.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/values/TupleTagTest.java    | 2 +-
 551 files changed, 552 insertions(+), 552 deletions(-)

diff --git a/checkstyle.xml b/checkstyle.xml
index 062f1346d9583..3e79c3d48028c 100644
--- a/checkstyle.xml
+++ b/checkstyle.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  ~ Copyright (C) 2014 Google Inc.
+  ~ Copyright (C) 2015 Google Inc.
   ~
   ~ Licensed under the Apache License, Version 2.0 (the "License"); you may not
   ~ use this file except in compliance with the License. You may obtain a copy of
@@ -30,7 +30,7 @@ page at http://checkstyle.sourceforge.net/config.html -->
       notice, so that this required text appears on the second line:
       <pre>
         /*
-         * Copyright 2008 Google Inc.
+         * Copyright 2015 Google Inc.
          *
          * (details of open-source license...)
       </pre>
diff --git a/examples/pom.xml b/examples/pom.xml
index 81de0a61629b3..863bf76459e2d 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  ~ Copyright (C) 2014 Google Inc.
+  ~ Copyright (C) 2015 Google Inc.
   ~
   ~ Licensed under the Apache License, Version 2.0 (the "License"); you may not
   ~ use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
index b6974e74eb829..23bbd53ef8d8b 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java
index 14df95e27408b..5cbd91af80279 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
index ec9d864eefe15..c169084cb9b04 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java
index 2407c7ad25640..42eb0554da049 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
index 45646911a0a36..1a4fd56a239c5 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
index 803de8aad7f71..cfbea51359547 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java
index a4c0a106da0a0..7e52d3e724e33 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java b/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
index ac9c15e809949..aec37a9e04a42 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java b/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java
index 2f8dc079991df..da54d604e8f61 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
index 0349f3614f3be..e02ae64a27178 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
index f58871be308d2..d6e8261947acb 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
index a8e9dde7e7092..b9e7f541c2869 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
index bdd5fb6208fdf..01eb95254f020 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/BigQueryTornadoesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/BigQueryTornadoesTest.java
index 6dafef7036481..acdc88cf84a51 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/BigQueryTornadoesTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/BigQueryTornadoesTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/CombinePerKeyExamplesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/CombinePerKeyExamplesTest.java
index d30432afaac8f..f98d5c3a38490 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/CombinePerKeyExamplesTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/CombinePerKeyExamplesTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/DeDupExampleTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/DeDupExampleTest.java
index c52f675c01bce..5051a539990d8 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/DeDupExampleTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/DeDupExampleTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/FilterExamplesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/FilterExamplesTest.java
index 5845eb4b53611..ec858f3fcf884 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/FilterExamplesTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/FilterExamplesTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/JoinExamplesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/JoinExamplesTest.java
index 0d51495d57496..154cd0f701c69 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/JoinExamplesTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/JoinExamplesTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/MaxPerKeyExamplesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/MaxPerKeyExamplesTest.java
index abb5710dbe480..8f189f1636a59 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/MaxPerKeyExamplesTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/MaxPerKeyExamplesTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java
index b3a115b522362..19df88499957b 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/TopWikipediaSessionsTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/TopWikipediaSessionsTest.java
index ce43ae9930a4b..af5c38d9da170 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/TopWikipediaSessionsTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/TopWikipediaSessionsTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
index 36efec738ddc5..bd2b1eeb509e6 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/pom.xml b/pom.xml
index 39eec27b528b2..0f9ac499f832e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  ~ Copyright (C) 2014 Google Inc.
+  ~ Copyright (C) 2015 Google Inc.
   ~
   ~ Licensed under the Apache License, Version 2.0 (the "License"); you may not
   ~ use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/pom.xml b/sdk/pom.xml
index abe71557a5c96..a1babd81f7fee 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  ~ Copyright (C) 2014 Google Inc.
+  ~ Copyright (C) 2015 Google Inc.
   ~
   ~ Licensed under the Apache License, Version 2.0 (the "License"); you may not
   ~ use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index df7411f6521b6..23405801827ee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java
index 7ab3845724f29..7062b29a86142 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
index 6d032371207f1..ae72091737bbf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
index 3200f54b8bd4c..b5412384ef2c9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
index 0f872f637673d..2ad124f4f2e02 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
index f9e8d3ea34ee3..270f938b07229 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
index 19d97f3b7e6b0..c0d3eb1c74bea 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
index c009e6ff7ac11..79a1baa4c4fe4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java
index b19eb202dfed2..8afd0cbddc790 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index ec2ff2579cdc1..227ed77b2ca7e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
index b78c2674176e0..4e7f630249955 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
index f7ce7003217a4..f9adaf96c3242 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java
index 6c6f4197c5a86..1c8ca385bd7c4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
index 6d3f926145e02..de95c1a2fb141 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
index ee647d4fbcc1a..c6520ee3ff463 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
index fe24a55ce7257..afb69bb72d06d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
index 3ec04e61ba2d1..2285d02a4369c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
index 61737e298463d..17b1d875154a8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
index e8cfb29a9fc60..c5202b22b286e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
index f2276d9ea2120..00a29988b80d1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java
index b959e1c3c576b..212c6339a7010 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
index 90685b8072cca..ef60b0828a9b5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
index 25e4a99d61168..17625bc5b847b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java
index e896e0d36dc14..7a6a99790e906 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
index b24b27e558a17..84920e4e8dbdb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
index 88cae0a64c93a..23f6838bde902 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
index 790e96fcf6d53..ff27fd2be6dc8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
index 82fe806e43205..44d3f8c406692 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
index 0c903501c0c73..9d40a43030112 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
index 0ce713c29c256..1043aab2e477a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
index a78ae3778e1d7..4a1d5ac1a99b2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
index e30a094273f19..5f1ede1b6c1ff 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
index 56a20cf4093e7..2c6c02bbfad62 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
index a3bed700eb209..ec5e486c0f88d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java
index 3366ff2267010..b26e07ba0efeb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index d7c2f1ef47da3..907ec2a9ebac2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index afee01f2fe040..0297f011b74c4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 4901ca30ead82..a32691a9d525e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 667653deea115..f501a316eb6c0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java
index a4a5943fdbbee..55a91b473944c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ShardNameTemplate.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ShardNameTemplate.java
index 5ab0a99084b84..e427ba31775cf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ShardNameTemplate.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ShardNameTemplate.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
index 21f36c3b9c0f7..cfc2820aef5c3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 40c24353c1d7b..11d423a7f0899 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/package-info.java
index b472b3f7c6200..ffc3ebce20a0d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/package-info.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java
index 327e5c08445cb..edbc57af3c2b3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BigQueryOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BigQueryOptions.java
index b764f20918b02..076ec5cc41d0d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BigQueryOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BigQueryOptions.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BlockingDataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BlockingDataflowPipelineOptions.java
index cdd5019b5df14..ca30ea2c20e9c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BlockingDataflowPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BlockingDataflowPipelineOptions.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
index c3632ed1b4bfe..3fc2b6a5ff271 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
index aba60e1ef5e7e..a5bc999be534e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
index 6028326788806..b4b1bccfb3973 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index ddcdc695d23e1..09571f1d8beec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java
index 0b8e1f809cc23..adb45c8a7481b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java
index c295bde5eaf71..f8cb64cecbb76 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java
index 18fd7827798c5..2d61d20ceb2c7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java
index 9de8b1cd25805..b02d3b9f13984 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java
index 85a280d991934..1044162f1b94c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
index 0d824405b9075..a51336ae8986a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
index 39614242f7f7c..3136a8a42985e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
index 6675de8b6f335..ad1ca996075d2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
index d626b90d3c520..3f9515f4cee28 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 3885b61de09a0..633e8095ca797 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java
index 4235ec7dc056b..6e0b60e58f397 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
index 9878056af64db..70622baea5e28 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
index 0212bfa463c3e..e30fc0fe46176 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java
index 56db83da2a548..8e40a716b7100 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java
index 10f205fcadb96..e426d4b667bbf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/package-info.java
index 0541b942a0c21..b56a5186e1522 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/package-info.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java
index ad5f04ca68ca9..c836331e9ae44 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
index 61fb09746921d..1606a7ea03c9e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java
index 310b4d97a323f..028a0a46e0ab6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
index a2fe55255fe3d..c989c6f02b035 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRegistrar.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRegistrar.java
index 8e8f3c5017f7e..0e4d4e96b9284 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRegistrar.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRegistrar.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index fe6079faad55f..896c60bcb7d15 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
index ba822e876e481..2e9a3c4a2686e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 622de9a4cbd7d..1d39867a91726 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipeline.java
index e3cd18ecfda3b..f00ebe1bdcb67 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipeline.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrar.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrar.java
index 03be8546f2846..765a94fa381bd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrar.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrar.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index c71eaf8434d3c..c665e0d3c5227 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java
index 8b134e98601ce..d2daafd35e1e9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java
index aea6b5151747c..d63de01948aac 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java
index cb1850d654bf4..dda033a27438c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java
index 53a90b2b80121..13fddea5da74b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
index efd28b354f073..18f8f3b55dfa0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
index d4c2355d4e402..768fc435913ce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index e2ab2d86aa13b..5c31954165ab7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
index b3d0c19977db2..313aba3e6e03a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DatastoreIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DatastoreIOTranslator.java
index e809d692387c0..a13478b46365b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DatastoreIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DatastoreIOTranslator.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
index 8b297d6f31e37..3fbe1ce5aa905 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadSourceTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadSourceTranslator.java
index 7897c4cd17fac..4bb90bf56667c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadSourceTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadSourceTranslator.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
index aa80248f3fcba..091bf3072818c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/package-info.java
index 751ca69e5572c..b6b2ce6901652 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/package-info.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/package-info.java
index 8d543a4041b87..d1aa6af192d74 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/package-info.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java
index 912c570f8efaa..51c7ad0d63b64 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java
index d6b3c7518e3e0..d4e6b1dc0db9c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
index 16564e1a1d467..0a4fb72de1410 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
index a62d20775b7d7..ce44bc7bb4500 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSink.java
index 504aa69eecfcd..2fc0f4d5c30bd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSink.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
index 6fd10b2223c02..ac0c250e7d4f7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
index c4292c5bcb535..7d9f2277dd84f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java
index 64fe691aa41f9..bae4cd843be48 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
index f1c96dcf478c1..82dbed23d06fe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
index 1a6a2937d8f9d..c5a27c71e7fa1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
index e2c00ba4064fb..59158258efc61 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
index 881f61b730207..65553c508a29b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java
index 6f746ffec8c59..de5cb81748f19 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleEntryWriter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleEntryWriter.java
index 9c55c181aebfe..e8288269220b9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleEntryWriter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleEntryWriter.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
index b32588081828c..256d157d160f2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannel.java
index 660b374665572..55d25b9444cab 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannel.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
index ca0b1667dae15..3eefb1bce2a40 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 13003108a022d..a17755c8e7762 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index 36f603f7d4127..96bce707b6111 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
index 218f1f5b5f661..d9e8bd2ab09fb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index 9b924994b7958..c3434b75d9834 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index 0cb1d98408a79..780949d935958 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
index 29a444bb280dc..49ef31d0acae9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
index 3fa6cb61a7387..1bb1841466571 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java
index cc51f8f0c94c4..7c73dd4390bd4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
index aef1e9f191ec1..37dd5898efc45 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index b1c4f6a63a8d5..df952a181d9ac 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
index 001e63f2a1263..58bd037453894 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java
index 487420ce39342..b25fd145fbb3c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
index e2ae0841fd8e6..920065663631e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
index 771bd2baadba5..a0ef83f5c29f7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java
index 72d7d6c7cb5ca..d634d21820acf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
index b39976f71e651..d28a2eb319398 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
index e75d9155efee0..2880f9a1d2380 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
index 17c695482ffae..ef249c555fe38 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleEntryWriter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleEntryWriter.java
index 4fd44230421d5..4b21a13550f57 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleEntryWriter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleEntryWriter.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleLibrary.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleLibrary.java
index 8863436d2e1d6..6694e99924544 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleLibrary.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleLibrary.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReader.java
index 8a1018b237ee7..273456ddc560f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReader.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
index e581528598ec9..351f4ae811f3e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactory.java
index 6db9945eb6135..91572adced0d4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactory.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleWriter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleWriter.java
index ff880fd13c4c1..386a2f10feba7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleWriter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleWriter.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
index 374fd65a969f6..3e234663ec1a0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
index 363d830dffbd4..bef9192cb1303 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFormatFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFormatFactory.java
index 98e6aabd794e5..301a091fd991d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFormatFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFormatFactory.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
index 3ad4528ee977d..f67f27b59dcbc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutorFactory.java
index 3da3d6ab94e08..6c897004d48ae 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutorFactory.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
index 6386d038fbf79..8074e92527929 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index f92edbb0cffa5..143a176445d05 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
index afd75fbfd77f9..f8ac8d29f2c9b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java
index 132206ccbda7e..03990c3590149 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
index 5fef80f725131..9c9ff327569c0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactory.java
index bac663dea2da5..5d5638416aa6a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactory.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
index 4291d94cd974d..2d788d2d8c3ff 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java
index 3237bb8349bb2..bf6fa2b9550eb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
index bbe63746965c1..0b439eed3757f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
index 3bfcac116bf47..737f6679887b3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
index 8bd6d89ec6e2f..37b1d354af24d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
index 9f748eb05228b..5af5cafa74aee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
index dbe8c6e1d468d..23cbc8d44b950 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/package-info.java
index ffc9df2e43f26..af0a345d6d956 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/package-info.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/windmill/WindmillServerStub.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/windmill/WindmillServerStub.java
index 105c8486db446..8b55824de367d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/windmill/WindmillServerStub.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/windmill/WindmillServerStub.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 3773aab5ea93e..4f345357a4c92 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/RunnableOnService.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/RunnableOnService.java
index 048ea36a25338..e3c8884d8ab11 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/RunnableOnService.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/RunnableOnService.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineOptions.java
index e9f8f828120fd..1afb6910446cc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineOptions.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
index 96da50189a905..62bdf9aa36ac1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
index ff003d5dbf9ef..dd8eb8866a470 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
index 0e14da6c4cefc..ef6983bcefca4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/package-info.java
index 9a410fb229f1f..d6f075d097cc3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/package-info.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
index 13ad17efa7027..2ce549bb13231 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
index 3a583f639756d..bc9a7d99df340 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
index 39612397de172..0b1d495f04a0b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index e078bd7ad0e8d..14ff00c49f810 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
index f5fceb29c5096..87e6a1c5e10b8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index 97b8f16776841..05a2ebf43a958 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 8b8eb1cfab92b..fa5fc4ed1ed30 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index d00652ce3f6d7..dd5b83541f895 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
index c870abe43f9c2..8707062ff3ef5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
index 9e4f3b099d48a..8032a2f54fdd3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index adf588cbdf90b..d0cf77a5a2bdc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index f89b224fb9829..63e98a58dd5e8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java
index 2b356ad6be549..5de4fbc35a798 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
index c898dff911f70..a9dec913a39ff 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
index fc901664be5a1..70d595da982eb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
index c82f4ab45d5ef..cc19c21a55fc1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
index 337a051160973..e5fd5ba0b244e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
index d7df4977a3992..bf2f4bd4d1ff6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index dee5096f83613..234c5fb825d77 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
index e7051985c0598..7e9070876d926 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
index d5f177ac83c8e..63b7591cefd57 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
index 8d8eaf1e75811..07d9e4ec13b65 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
index 832cc996ea761..4a807429b6c12 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableComparator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableComparator.java
index 3d538faa54d85..0194b0cf0bcbd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableComparator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableComparator.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableFunction.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableFunction.java
index 857491a11fe84..0f721e87404e7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableFunction.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableFunction.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
index ce62e40b4580a..b61def31d3aad 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index cc6ee9d7b889b..c11e16fe0b6a5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java
index dad265f15211b..2a5c440c17563 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index 637aeb73f6f3b..7a91fe98692d4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
index bd8415f2684aa..7b79533a7b68b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
index 81b12d1c09792..14121d649aa06 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
index 6bb9d055c5018..4e22fd36c3a10 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
index bc0eed2c78de7..3a97a700def50 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
index 1dfbf9efbf72c..1a98825d464e6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/RawUnionValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/RawUnionValue.java
index 3b6fa73868732..514853e446431 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/RawUnionValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/RawUnionValue.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
index a2b56c2f1cabb..59d4ba961fdd5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/package-info.java
index 25eafd34aac18..be8bffad29a1b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/package-info.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java
index 5b50fd37634b0..e70f32dfb9559 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java
index 01de83f1585d0..42f039ee33dd8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
index 78b3d59ecdd71..b3ffe77e05db3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
index 138d82399342a..8db171e7b089d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
index 97c82eeac5b76..2feafa5bcc04b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
index c52ec92a9d3b2..2ac12a933c29f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
index ab34ad5049a45..f4a96e49e7fcc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java
index 57985596dd765..3f8f7644c06bf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/MergeOverlappingIntervalWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/MergeOverlappingIntervalWindows.java
index e90f8f0897e6d..1dc15128445de 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/MergeOverlappingIntervalWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/MergeOverlappingIntervalWindows.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java
index 3cce009e9e728..0caaabe72281e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
index 7b9d8d025f760..7ec97bc40d5e8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
index ff936df120084..704c5a9086223 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
index 2ab1737ac9975..ab7887a1ac2d5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 38e69813ecd54..7e3a318eecf79 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
index ed5cdb30dac64..9eab380dba208 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java
index eb6ecb3af3833..197dbd6b711e9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
index 9e8306bc25946..b94b11211a4ab 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AggregatorImpl.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AggregatorImpl.java
index e71bf7f8a7f0c..3c48700b319f1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AggregatorImpl.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AggregatorImpl.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiErrorExtractor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiErrorExtractor.java
index ad181cee40b38..46008820a0795 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiErrorExtractor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiErrorExtractor.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2013 Google Inc. All Rights Reserved.
+ * Copyright 2015 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppEngineEnvironment.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppEngineEnvironment.java
index c83fd1da79d89..61afdd242860e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppEngineEnvironment.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppEngineEnvironment.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
index 27066f8091079..2ea1da6bacc02 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java
index 8ac0e1ea8f3a0..9aa91ecf054b5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Base64Utils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Base64Utils.java
index 0ea25102e1321..457048eb9ca06 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Base64Utils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Base64Utils.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
index fb5277189f2ba..f524b23ad1c78 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
index dbe8a26a7d672..ee9a2d1bebc6d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
index 1dde22d8ca0b8..4b5ecca789748 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java
index 983946915c205..0b7df3ec4e0b7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
index 2317072b91b43..314920fadc79b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java
index f96ba486f24da..dd74b40296831 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudKnownType.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudKnownType.java
index ad57b99536313..8b41eb83dacfb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudKnownType.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudKnownType.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtils.java
index da99e5b3c3851..8a66fc3f2167e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtils.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudObject.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudObject.java
index 973fe5ab7707e..00172ff746627 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudObject.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudObject.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
index e2d6ee6b6a72a..9a6dc3d2aeee7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
index 5dbfc2d4ef6ed..25541aaede6bc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CredentialFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CredentialFactory.java
index 0bc3012a5f202..4913a1e66ddb2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CredentialFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CredentialFactory.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
index e37275cdb936f..edf5b03e0aecd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
index ab79906eb77d5..a8a4a15023775 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowReleaseInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowReleaseInfo.java
index ab7e0de6a8e0f..39b30054f123f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowReleaseInfo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowReleaseInfo.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
index a157ceefa57ca..d073c0750bdac 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
index a900efbbfef67..200f84faa6bb8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
index 9fdb9890fb98f..9d8481be932f2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
index b6faaa4e8631a..dc6ea63faa776 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 02c80be035278..4a060a4aa40e2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
index 12d0745b67b6c..e0f5f903f88e8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
index 5b27e277f338c..b43924273c98a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java
index a3a3fd2eb5bf1..49ba1ed20fa12 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcpCredentialFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcpCredentialFactory.java
index 2999a56e4368b..8b6f495e9e8bf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcpCredentialFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcpCredentialFactory.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java
index 9ff133261e601..c0bb4624e1f5c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsStager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsStager.java
index 718071d2afab1..bca6253d50815 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsStager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsStager.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
index 6f7ae37cb2c91..71c7e03bb74ed 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 84fa834824ab8..bd90a7f85690b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
index 683ca76efa5d5..3661ffd3b1208 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
index a9e997a8357e0..5fd06353ad36b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
index 8712855a86220..f065c0605f792 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MimeTypes.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MimeTypes.java
index 3318a150662ac..489d1832a1c92 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MimeTypes.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MimeTypes.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
index 89df25c391112..677c9ee738ac3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopCredentialFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopCredentialFactory.java
index 5292f8e5c2d53..9ef4c2eb09b7f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopCredentialFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopCredentialFactory.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopPathValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopPathValidator.java
index 9920126b92c63..043f8d076f78a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopPathValidator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopPathValidator.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopStager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopStager.java
index b54018c49c4d9..ae7d19a0f2d6f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopStager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopStager.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/OutputReference.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/OutputReference.java
index eade03d252041..096c9965284f5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/OutputReference.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/OutputReference.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java
index 879693584ae56..004943d8277f7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
index 7226f3ae90772..54d1f8fe62b5e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
index 885392cc27605..5d55ae8f7125c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PathValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PathValidator.java
index c1a65ab7b57dd..eec6395376cdf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PathValidator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PathValidator.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
index a22f7893a334c..21fa0d3679488 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReaderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReaderUtils.java
index 4c2f9bf35380a..7a730c4eb3bcf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReaderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReaderUtils.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
index f5e660fa156bc..1301d6c010fc7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java
index 10080f144861f..cdd528dc32a1f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java
index 68ada31d981ce..fdc75bce29518 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ShardingWritableByteChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ShardingWritableByteChannel.java
index 4a3322b345355..a70c7203390b2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ShardingWritableByteChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ShardingWritableByteChannel.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Stager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Stager.java
index f4a1a6035a121..04fd599ab5138 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Stager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Stager.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
index c3fae8851f563..210791dc6b0b0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index a0aebe7538704..01503f783a900 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index 13801339126b7..69f13f4de5709 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
index 382683c2de3c3..e898d8e0923d1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java
index 8fb2e834f19e5..eab606c8a214a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TestCredential.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TestCredential.java
index 6b9bc3b53ecb1..359e15774fd0e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TestCredential.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TestCredential.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeUtil.java
index 48324818ca63b..1313286203e7f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeUtil.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
index 5303ca3ec844f..2e9ca9b600d05 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
index c0c9485247f66..a5e9fe8ad6981 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java
index 840261d591492..661f697eaa8a0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Values.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Values.java
index f5ce4540d931a..d4440e76de6d2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Values.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Values.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java
index a7399473d4b4a..7cdb92eefbac4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowUtils.java
index de0a8f24ba645..ed4fb028b3580 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowUtils.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index f724e440261d7..c031b63c61fa8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
index 1df87a244cb61..796d5482ed2b5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
index a9e83f3237919..513f036c91989 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservable.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservable.java
index 447dadcb8ef75..fee6737745701 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservable.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservable.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterable.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterable.java
index f8f727090237a..3bed2bef47a6f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterable.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterable.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterator.java
index 50c9add0edaab..c0949003017af 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterator.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObserver.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObserver.java
index 9cccb4365c6f3..e2e9be3514604 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObserver.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObserver.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ForwardingReiterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ForwardingReiterator.java
index 179b5abf4d42e..a072ac02e1474 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ForwardingReiterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ForwardingReiterator.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Metric.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Metric.java
index 23a590743b21f..f7534bab5873a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Metric.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Metric.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/PeekingReiterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/PeekingReiterator.java
index ed779da0b4cca..0948747918257 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/PeekingReiterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/PeekingReiterator.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterable.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterable.java
index ebf30459e2778..01c5775a3f883 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterable.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterable.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterator.java
index 7613a3a37bd37..dd8036da02a06 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterator.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/package-info.java
index 0dd2af486ba0f..7fb16c58fde00 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/package-info.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReader.java
index 47cfa3646dec6..d30c01be9ce71 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReader.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReader.java
index 87abf21d4229b..98f2b26448490 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReader.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperation.java
index 8076216211425..fb6a48f35a9d4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperation.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java
index 4dc9a1e1a8f16..f37068825402f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/KeyGroupedShuffleEntries.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/KeyGroupedShuffleEntries.java
index 1b8b552b521e8..17177a25166f9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/KeyGroupedShuffleEntries.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/KeyGroupedShuffleEntries.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
index 1d2595feda39c..2da7f9a21998b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
index 31312b7238fc6..e861749e811b6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiver.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiver.java
index d758bbd298d9f..7d7ae0dfb114c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiver.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiver.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java
index b922acc412d4c..1d9ec59aedfa0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperation.java
index 684d11a9cb23c..0777fcf197cb1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperation.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
index 97072a5a18879..a1315d5d1a9f5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTracker.java
index fd26caa31e69b..b88edebd62f0b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTracker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTracker.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackerGroup.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackerGroup.java
index 7ed370f16770d..a2131c63db8be 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackerGroup.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackerGroup.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackingReiterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackingReiterator.java
index 8d5d43fa74889..547073fde9f80 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackingReiterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackingReiterator.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index 883107484919d..8b013d856dab4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
index bb0c0b25a9758..40875b3bac3d3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Receiver.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Receiver.java
index f772ee4c24453..ca13ea3185d8e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Receiver.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Receiver.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java
index 60deea53fa9c5..6ccb750d99b9f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleBatchReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleBatchReader.java
index f5102dd14e059..00e575adb328b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleBatchReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleBatchReader.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntry.java
index 750c3ac5c71c3..0ece836aa212e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntry.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryReader.java
index bbc5f47a4b8ce..b27cf42afb896 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryReader.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShufflePosition.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShufflePosition.java
index c512269a49506..b3a7d5b59892c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShufflePosition.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShufflePosition.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
index 3a23982f97c72..106accb1a308e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/SourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/SourceFormat.java
index 8b65c90877cb6..f41611285ae60 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/SourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/SourceFormat.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
index 59d21b93aa4bd..4d32fb9f0238d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
index 117affab8d9a1..b8d42f271add8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
index 90dfb7727cdf5..b6dce4bf0c6a1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
index 917d285fab9e0..db5e0a5c61a01 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/package-info.java
index 1bef723c9ac75..c3da9ed8b82a9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/package-info.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
index f1da8b767ef2e..1a78fb9d35cef 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/package-info.java
index 6784109e82af8..2f57938e20071 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/package-info.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/ClientRequestHelper.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/ClientRequestHelper.java
index 155dd79f795b3..f851cc72b6d0d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/ClientRequestHelper.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/ClientRequestHelper.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2013 Google Inc. All Rights Reserved.
+ * Copyright 2015 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageExceptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageExceptions.java
index 5535a90826a9a..268aa75712e13 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageExceptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageExceptions.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2013 Google Inc. All Rights Reserved.
+ * Copyright 2015 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
index ab9c747b32228..689a57495a253 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2013 Google Inc. All Rights Reserved.
+ * Copyright 2015 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
index a05650d64b69f..9930697b2c675 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2013 Google Inc. All Rights Reserved.
+ * Copyright 2015 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListener.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListener.java
index 26c88838b0045..f9dcc8ae44c31 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListener.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListener.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2013 Google Inc. All Rights Reserved.
+ * Copyright 2015 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/StorageResourceId.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/StorageResourceId.java
index b6051a5147d3e..0131c1a9d4b76 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/StorageResourceId.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/StorageResourceId.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2013 Google Inc. All Rights Reserved.
+ * Copyright 2015 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/package-info.java
index 98fdc44113a34..c92adab9b6eb4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/package-info.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
index ffbf9c577ce53..29fd698ae830f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java
index a30d3feeeb612..2453afb0b0b50 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
index febb1302bd8c1..491d35a4621e5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
index fc3f179fc1765..ec3b920995289 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
index d7cbe1f734b6d..095642d101fa5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
index 26b7300a9341a..8f6f018142379 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
index 009ecd93b71e6..31fa2aa5d0a3a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
index d19854ccc588b..7eee0f4a94540 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PDone.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PDone.java
index dda48fc530a8c..c8041f7481e30 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PDone.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PDone.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java
index 6d86fb069535e..8834c2d08a1b5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
index 3b3264985d559..35842827facea 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
index 0401393f142b8..3aeacff766f84 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java
index 7e45196af813c..e9dd0790a5b14 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
index 25b1fd6fd9a13..0c158523dc8e9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
index 1886b7f0a9490..a94c3ee8c9268 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
index 3a0fa15434216..9aa72d3c92d9d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java
index 515a388b22e2d..142ec57045aab 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
index 2bde2121a9fe2..fda9a1a4e01a3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java
index 232e0b4942a40..09473cc340583 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index 2135ce53be263..d191def678ffe 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
index 13d2b2996cfd1..a61fad9ccaff9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
index 9a92ba0167c4e..c33d677b2d41c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
index 781c8a85766db..3bf67a3b05102 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java
index 14c4a72f8d7ff..2c8f52cbe206f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java
index 4e94e6bca45a0..12b0a7b031c32 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
index b8715a66c7057..3fb4141fa5b41 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
index f846418284173..240fd8b9431d2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index c2a1e6c9f023f..6edcc783285dd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java
index ffacbd768a4e4..98b4b54a983c9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java
index 229bc7d5ae9d7..265d1cd3d0bd1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
index dc0d20c4f8a53..d70e1318a8836 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java
index e6f5cb83b5016..e21b854493ff6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java
index bd388a3343d1e..924877280abfa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java
index 31f80c92ae730..37043c891a56f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java
index f44b53f828103..5bf29ee0b8b73 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java
index 924e960740d63..d4aa1909aad22 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java
index 595cd7811b7a1..227b2a42ec7e5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java
index 4132776c8ad01..6af8e21b05733 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java
index d8a7e03092518..2d1eee5bc5448 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java
index db5b42aa4b1d2..094bc23101de6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java
index d74a74f7ac57d..4a959c7337cf6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringUtf8CoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringUtf8CoderTest.java
index 9c85836e94324..27c34d5da65dd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringUtf8CoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringUtf8CoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java
index cd185539442a3..e761ebaf11ac7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java
index a88c4764ef1bc..420baf26fc6dd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java
index e0ccd1801638a..a7529233445d3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java
index bc85dac79f689..eb8e875bdb23f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
index e211eb5941f83..205cfa2714fd5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
index 863e260282a34..49ba60e202699 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
index 279a4f16b9047..038adf7e12a0c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
index ee54668db71ed..b1fad04b971c9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptionsTest.java
index 50ea20bf539c6..725288a1a779d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptionsTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
index 4df9fe9aaed00..4eb01eaaad4c3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index 3cf4e1258f628..a7ff30c12fe19 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java
index 1a11039504a6d..3e75a06b55e2b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java
index 714647e35de88..694c5e71dac67 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
index 8d2f859f83d82..8d0beb7f11f31 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
index 8ab8de6dbfe12..7137da76062ff 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java
index 30697deecc0fe..d32e12d1315df 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRegistrarTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRegistrarTest.java
index 13c20d41d6805..d5125cf5ce537 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRegistrarTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRegistrarTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index cd5902d3b12ce..f31362d92be1b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 24509b8dd374c..8583f56f326ff 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrarTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrarTest.java
index 6f59ff641d1ac..e7b24f0b30064 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrarTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrarTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerTest.java
index 238e1844ca16a..39e37f9de81a6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
index 7305fa20f6b54..5a5a0678bf97f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 34dc51bb3c041..3726c543b3633 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
index f9739dc2d6e8e..66a1004d7ef86 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
index 83366800389e6..cef372b7681ef 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
index a71cc8ea510a2..25b3c01072910 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
index 4e5a4a71e4c6b..d5ea3b82664ab 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
index c55be77cbceba..cb064c8371226 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
index f2199e6781da5..9e58b9565b3a4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
index f8a87c85d2c60..5ece943d91068 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
index 5c73190d0b4fe..41ee8fb83d79d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
index a8040b6347df6..4aaefc7ad49ea 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannelTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannelTest.java
index e27fa1832870a..f31ff6987cc4c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannelTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannelTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
index 4d11659508985..236057b217361 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
index 6dc9424e9848c..50eb5a72f8a5f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
  * in compliance with the License. You may obtain a copy of the License at
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
index aee95ef0a28f9..9fe059b352811 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
  * in compliance with the License. You may obtain a copy of the License at
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java
index 93eb27cb57036..e0fcf32c70a5b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
index c023123c4a6d6..2bc08717ed992 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
index c10792d6cc37a..2b79eef26e176 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
index bd6f02226a60e..2b957e46affaa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index 69c5859f22ea0..e5adbd339b11f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
index 57a17ec2db4e2..00035ecce4001 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCodeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCodeTest.java
index 6f467ba1173af..5acef2e08040f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCodeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCodeTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
index ab69d68965ce9..eaf069ec79419 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
index 11d7c0616cf94..b749ec0cdb70c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
index 1a3ab6a8f6a2a..7935954befe39 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
index ab1fa9145b3cd..6278cd5ab6697 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java
index 0bf4916359cfa..2c04c0c9f61e0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
index 806588a8a3a81..ca9d29dd1a6b2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java
index 343e172715ad3..c58cf5cefa402 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactoryTest.java
index 66e72545cb717..229bf47985744 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactoryTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 4005140fcd1d9..cc427f566067b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReader.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReader.java
index 4d5e85881be91..4aba127e044d5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReader.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReader.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReaderTest.java
index 87935a7bb3d86..7c349108ce5ea 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReaderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleWriter.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleWriter.java
index 4fde0bbcdbaa1..de5a5d8a07d5b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleWriter.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleWriter.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
index a4a857c7ba8d5..98921dcea74a5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
index 52c9c1cfd2866..ea7a3e5c37882 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java
index 1a2b843242c82..4ee803d9e9bf9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkTest.java
index d1b8b436a2510..35dae1b1c63df 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
index 5406f4156a140..7c1d3da73ceea 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java
index 2bf711c84bbca..916a38567d58f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
index 8cc25a9e0e7df..5d6fa6687a1d3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
index 11a6384245591..2616aa5b4d857 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java
index 0e2d4722b4aef..5275d813f8d5f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java
index 2766fa56db5e1..e10d58fb4ff87 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/FastNanoClockAndSleeper.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/FastNanoClockAndSleeper.java
index e9fa9839e7374..9447956733991 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/FastNanoClockAndSleeper.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/FastNanoClockAndSleeper.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/FastNanoClockAndSleeperTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/FastNanoClockAndSleeperTest.java
index 3c9275f54a23d..76256962ad4da 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/FastNanoClockAndSleeperTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/FastNanoClockAndSleeperTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ResetDateTimeProvider.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ResetDateTimeProvider.java
index 675d7ac113611..34c901b3941d2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ResetDateTimeProvider.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ResetDateTimeProvider.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ResetDateTimeProviderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ResetDateTimeProviderTest.java
index 5aa96835676c9..294bb41c67c29 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ResetDateTimeProviderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ResetDateTimeProviderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingFormatter.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingFormatter.java
index 6f5309e332e18..93b9ddd621f28 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingFormatter.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingFormatter.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingFormatterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingFormatterTest.java
index 46ff22912ef42..374223aa268ca 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingFormatterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingFormatterTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreSystemProperties.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreSystemProperties.java
index ef4f3427b8891..03bc6a530c99b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreSystemProperties.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreSystemProperties.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreSystemPropertiesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreSystemPropertiesTest.java
index 8a4bb488922ef..ab49c75adc176 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreSystemPropertiesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreSystemPropertiesTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
index b61c9fdd95599..730efdd677fa3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
index 045c1df4d913e..d6634b8826277 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
index ce357ca7e2b76..60fb7c90ba1fe 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 2dec13aa10afb..38497c3028fd4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java
index 256b06198bc30..802d6df3bfb1a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
index 3875ac465b331..76e6fe9b12259 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
index 7d91204edcfa3..74bc642222d85 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java
index 1a7301379cb03..299f64ffd1752 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
index e5822433b3224..7467fc72ad523 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
index 5cc50dc1e7aa1..1ad16cb55a788 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java
index 6763793d490f1..ad8bf2216b08f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java
index d192b39fcd6ca..9ac2614b1d196 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 22550a37ffa68..131ac14caf765 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
index a4de8c80b7fb1..c95d14392f1c1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java
index ee6e17a66810d..377e533fc039b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java
index a44fa2d39103c..7d5872c897687 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
index 9eaeb9a0e3fac..a4dcfc5763d9a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
index 131aaad3142e7..1b58832379172 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
index 92dc8a90c1dff..07f00d699ccfa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
index 0dde1f92cd48d..e21cbf19f8cc6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index 7b0313962fede..fc98ae8923b10 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
index f0e608b2257d7..0f8098f23bd2a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultCoderTest.java
index afb8a998798ec..1a3e841de5d7a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
index 69524fb623755..4a75276fb8030 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoderTest.java
index 8144851f5cbb6..8fbe0d4badb9c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindowsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindowsTest.java
index 7950c46f566cd..512fcbc292d27 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindowsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindowsTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindowsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindowsTest.java
index c59a668989b40..bd7ac02e3e6b0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindowsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindowsTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SessionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SessionsTest.java
index c500fcf340001..db66841d1b9cc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SessionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SessionsTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java
index fbad9e790e0ea..8e0853eebb385 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
index 34342f7cbc2cf..f37eaf30d1d1d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java
index 9a375678d3e52..85ddbef41e334 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOffTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOffTest.java
index 97f5225426faf..6f86e61b8ab45 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOffTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOffTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/Base64UtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/Base64UtilsTest.java
index d557284ce0806..441a3dc7b6c86 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/Base64UtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/Base64UtilsTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
index 783c44847a6cf..8ae876e51e303 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtilsTest.java
index 31bc2f9241eaf..ff3451f51a343 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtilsTest.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtilsTest.java
index d813a103fabb2..6df36a9c38e7c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtilsTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
index 1aec837d82ea4..1d8acef0c984c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
index e6ec624f45373..fbbe090773e95 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index 8ee581b6b78b3..4e60e41df72a0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOChannelUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOChannelUtilsTest.java
index fe82972044d03..68f9b5902bec0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOChannelUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOChannelUtilsTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
index 898d3ad1891af..729520523dd6b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/InstanceBuilderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/InstanceBuilderTest.java
index 2c0eb94123373..21245ed74bc9a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/InstanceBuilderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/InstanceBuilderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java
index 8ec3012da4482..592222b8d6613 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PTupleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PTupleTest.java
index f09d59ce671b0..05a8546930677 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PTupleTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PTupleTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
index 751824b24282d..e8d5ffff4b2fa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializerTest.java
index 09998a36a2fe6..b0caf42c81fbd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializerTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
index 38c73ff6263c6..c2e6f2e7f0e55 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializerTest.java
index 40e2cc00f650d..94fea9df1666b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializerTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 8b31c51620333..c1bb363bad2f8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java
index bf1a3193b7e3c..83db3ae615a39 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StructsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StructsTest.java
index 9b8cc208fca9b..e951d36b78840 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StructsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StructsTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TimeUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TimeUtilTest.java
index 1faebeba7c0e4..d7c3384be1141 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TimeUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TimeUtilTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/VarIntTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/VarIntTest.java
index d6b771bd0512b..3288e7826cbd8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/VarIntTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/VarIntTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/WindowedValueTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/WindowedValueTest.java
index 67f21f5490928..7edd71287be73 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/WindowedValueTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/WindowedValueTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterSetTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterSetTest.java
index c8fa6c2fab5a0..4286175892a91 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterSetTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterSetTest.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
index 249df71f71e18..4e50b2a3f1cf2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java
index 6f7157c8b0e96..2aa6d864e590d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/MetricTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/MetricTest.java
index 0c60901ca0a69..d7810c676f801 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/MetricTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/MetricTest.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorListTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorListTest.java
index a3682c25105c2..1bdc420cdb5d9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorListTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorListTest.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReaderTest.java
index 5a41494717280..93c16e92b5a31 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReaderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReaderTest.java
index 4175c91505963..afc8e385807f5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReaderTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
index c38bb56ac9304..2fdb430974288 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java
index 84fe39a6d9212..afa2d06dd8133 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
index f13d0e02606c5..4f95b57a69b4d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java
index 044674e7e9318..362fb456ca2cd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
index 1c95766e73c01..79ec4f6dd0467 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
index 8a1b56f955681..07efddfc59628 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
index 9a778bdef25c6..24f6512b3dc9f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryTest.java
index 10e3b4da63f4c..6b7e0e9d1f024 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
index 18e2103840372..b4f12582627c0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutorTest.java
index ecce00d68b76c..249e42c626c71 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutorTest.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperationTest.java
index 4100880427f2b..0da219d3e6f17 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperationTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPathTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPathTest.java
index 9904bd5a2428e..b58a26c7f4d03 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPathTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPathTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListenerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListenerTest.java
index f577ff5c80a9c..49092700721ff 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListenerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListenerTest.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2013 Google Inc. All Rights Reserved.
+ * Copyright 2015 Google Inc. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/KVTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/KVTest.java
index 6dc77eba1475e..86555078872c5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/KVTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/KVTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionListTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionListTest.java
index a6c180fc9abe3..fa163dd634198 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionListTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionListTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java
index 5d75100f58f5e..f00b8a436b183 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TupleTagTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TupleTagTest.java
index dfdae4df5907d..5d56b813647b8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TupleTagTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TupleTagTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of

From b014bc328d9a11bc1df12c0909959413abcf2acd Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 27 Feb 2015 17:05:13 -0800
Subject: [PATCH 0200/1541] Windmill API changes for streaming side inputs.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87387296
---
 sdk/src/main/proto/windmill.proto | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index d191def678ffe..a8d856766f83f 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -81,12 +81,19 @@ message TagList {
   repeated Value values = 3;
 }
 
+message GlobalData {
+  required bytes data_id = 1;
+  optional bool is_ready = 2;
+  optional bytes data = 3;
+}
+
 message WorkItem {
   required bytes key = 1;
   required fixed64 work_token = 2;
 
   repeated InputMessageBundle message_bundles = 3;
   optional TimerBundle timers = 4;
+  repeated bytes global_data_ids_ready = 5;
 }
 
 message ComputationWorkItems {
@@ -115,6 +122,7 @@ message KeyedGetDataRequest {
   required fixed64 work_token = 2;
   repeated TagValue values_to_fetch = 3;
   repeated TagList lists_to_fetch = 4;
+  repeated GlobalData global_data_to_fetch = 5;
 }
 
 message ComputationGetDataRequest {
@@ -132,6 +140,7 @@ message KeyedGetDataResponse {
   optional bool failed = 2;
   repeated TagValue values = 3;
   repeated TagList lists = 4;
+  repeated GlobalData global_data = 5;
 }
 
 message ComputationGetDataResponse {
@@ -164,7 +173,7 @@ message Counter {
   optional int64 mean_count = 6;
 }
 
-// next id: 9
+// next id: 10
 message WorkItemCommitRequest {
   required bytes key = 1;
   required fixed64 work_token = 2;
@@ -174,6 +183,7 @@ message WorkItemCommitRequest {
   repeated TagValue value_updates = 5;
   repeated TagList list_updates = 6;
   repeated Counter counter_updates = 8;
+  repeated bytes blocked_on_global_data_ids = 9;
 }
 
 message ComputationCommitWorkRequest {

From fb61000d9bc3870a24b45d44604b2dcdcf7d1c92 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 27 Feb 2015 17:15:47 -0800
Subject: [PATCH 0201/1541] Add Avro's Utf8 to the deterministic stringable
 types. [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=87388067

---
 .../cloud/dataflow/sdk/coders/AvroCoder.java  | 22 ++++++++++++++-----
 .../dataflow/sdk/coders/AvroCoderTest.java    |  3 +++
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
index b5412384ef2c9..b6539a96fa6d2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -43,6 +43,7 @@
 import org.apache.avro.reflect.ReflectDatumReader;
 import org.apache.avro.reflect.ReflectDatumWriter;
 import org.apache.avro.reflect.Union;
+import org.apache.avro.util.Utf8;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -299,18 +300,29 @@ private void reportError(String context, String fmt, Object... args) {
     }
 
     /**
-     * Classes that are serialized by Avro using their toString() are only deterministic
-     * if their associated #toString() method is deterministic. Rather than determine all
-     * of them, we conservatively list some classes that definitely are, and treat any
-     * others an non-deterministic.
+     * Classes that are serialized by Avro as a String include
+     * <ul>
+     * <li>Subtypes of CharSequence (including String, Avro's mutable Utf8, etc.)
+     * <li>Several predefined classes (BigDecimal, BigInteger, URI, URL)
+     * <li>Classes annotated with @Stringable (uses their #toString() and a String constructor)
+     * </ul>
+     *
+     * <p>Rather than determine which of these cases are deterministic, we list some classes
+     * that definitely are, and treat any others as non-deterministic.
      */
     private static final Set<Class<?>> DETERMINISTIC_STRINGABLE_CLASSES = new HashSet<>();
     static {
+      // CharSequences:
+      DETERMINISTIC_STRINGABLE_CLASSES.add(String.class);
+      DETERMINISTIC_STRINGABLE_CLASSES.add(Utf8.class);
+
+      // Explicitly Stringable:
       DETERMINISTIC_STRINGABLE_CLASSES.add(java.math.BigDecimal.class);
       DETERMINISTIC_STRINGABLE_CLASSES.add(java.math.BigInteger.class);
       DETERMINISTIC_STRINGABLE_CLASSES.add(java.net.URI.class);
       DETERMINISTIC_STRINGABLE_CLASSES.add(java.net.URL.class);
-      DETERMINISTIC_STRINGABLE_CLASSES.add(String.class);
+
+      // Classes annotated with @Stringable:
     }
 
     /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
index 3bf67a3b05102..5dc19b74c7e69 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
@@ -42,6 +42,7 @@
 import org.apache.avro.reflect.ReflectData;
 import org.apache.avro.reflect.Stringable;
 import org.apache.avro.reflect.Union;
+import org.apache.avro.util.Utf8;
 import org.hamcrest.Description;
 import org.hamcrest.Matcher;
 import org.hamcrest.Matchers;
@@ -251,6 +252,8 @@ private static class SimpleDeterministicClass {
     private char charField;
     @SuppressWarnings("unused")
     private Integer[] intArray;
+    @SuppressWarnings("unused")
+    private Utf8 utf8field;
   }
 
   @Test

From ab43b27ff5d5879b940dacd5bb1f140288099377 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 27 Feb 2015 22:05:26 -0800
Subject: [PATCH 0202/1541] Implement support for GenericRecord and
 SpecificRecord, both at the top level and as fields inside a Java POJO.

Our uses of the various decoders always pass "old" as NULL, so we never
reuse an existing value. This is important because reusing a value (such
as a Collection) could cause us to use a Set<T> rather than the
GenericData.Array<T> that Avro would default to.

This allows @AvroSchema on fields of any type extending IndexedRecord.
The only likely cases are GenericRecord and SpecificRecords. If a user
extends IndexedRecord on their own, it is their responsibility to ensure
it is deterministic.
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87401020
---
 .../cloud/dataflow/sdk/coders/AvroCoder.java  | 124 ++++++++++--
 .../dataflow/sdk/coders/AvroCoderTest.java    | 184 +++++++++++++-----
 2 files changed, 238 insertions(+), 70 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
index b6539a96fa6d2..5e268081ecbc2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -43,6 +43,8 @@
 import org.apache.avro.reflect.ReflectDatumReader;
 import org.apache.avro.reflect.ReflectDatumWriter;
 import org.apache.avro.reflect.Union;
+import org.apache.avro.specific.SpecificData;
+import org.apache.avro.util.ClassUtils;
 import org.apache.avro.util.Utf8;
 
 import java.io.IOException;
@@ -51,7 +53,6 @@
 import java.io.Serializable;
 import java.lang.reflect.Field;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.List;
@@ -60,6 +61,8 @@
 import java.util.SortedMap;
 import java.util.SortedSet;
 
+import javax.annotation.Nullable;
+
 /**
  * An encoder using Avro binary format.
  * <p>
@@ -158,12 +161,8 @@ protected AvroCoder(Class<T> type, Schema schema) {
     this.type = type;
     this.schema = schema;
 
-    if (GenericRecord.class.isAssignableFrom(type)) {
-      nonDeterministicReasons = Arrays.asList(
-          "GenericRecord may have non-deterministic fields.");
-    } else {
-      nonDeterministicReasons = new AvroDeterminismChecker().check(TypeToken.of(type), schema);
-    }
+    nonDeterministicReasons = new AvroDeterminismChecker()
+        .check(TypeToken.of(type), schema);
     this.reader = createDatumReader();
     this.writer = createDatumWriter();
   }
@@ -291,6 +290,10 @@ protected static class AvroDeterminismChecker {
     // are equal, rather than tracking pairs of type + schema.
     private Set<TypeToken<?>> activeTypes = new HashSet<>();
 
+    // Similarly to how we record active types, we record the schemas we visit
+    // to make sure we don't encounter recursive fields.
+    private Set<Schema> activeSchemas = new HashSet<>();
+
     /**
      * Report an error in the current context.
      */
@@ -358,7 +361,15 @@ private void recurse(String context, TypeToken<?> type, Schema schema) {
         return;
       }
 
-      doCheck(context, type, schema);
+      // If the the record isn't a true class, but rather a GenericRecord, SpecificRecord, etc.
+      // with a specified schema, then we need to make the decision based on the generated
+      // implementations.
+      if (isSubtypeOf(type, IndexedRecord.class)) {
+        checkIndexedRecord(context, schema, null);
+      } else {
+        doCheck(context, type, schema);
+      }
+
       activeTypes.remove(type);
     }
 
@@ -401,7 +412,7 @@ private void doCheck(String context, TypeToken<?> type, Schema schema) {
         default:
           // In any other case (eg., new types added to Avro) we cautiously return
           // false.
-          reportError(context, "Unknown Avro Schema Type: %s", schema.getType());
+          reportError(context, "Unknown schema type %s may be non-deterministic", schema.getType());
           break;
       }
     }
@@ -434,15 +445,6 @@ private void checkUnion(String context, TypeToken<?> type, Schema schema) {
     }
 
     private void checkRecord(String context, TypeToken<?> type, Schema schema) {
-      // If the the record isn't a true class, but rather a GenericRecord, SpecificRecord, etc.
-      // with a specificified schema, then we need to make the decision based on the generated
-      // implementations.
-      if (isSubtypeOf(type, IndexedRecord.class)) {
-        // TODO: Update this once we support deterministic GenericRecord/SpecificRecords.
-        reportError(context, "IndexedRecords may be non-deterministic");
-        return;
-      }
-
       // For a record, we want to make sure that all the fields are deterministic.
       Class<?> clazz = type.getRawType();
       for (org.apache.avro.Schema.Field fieldSchema : schema.getFields()) {
@@ -455,8 +457,12 @@ private void checkRecord(String context, TypeToken<?> type, Schema schema) {
           continue;
         }
 
-        if (field.isAnnotationPresent(AvroSchema.class)) {
-          reportError(fieldContext, "Custom schemas are not supported -- remove @AvroSchema");
+        if (!IndexedRecord.class.isAssignableFrom(field.getType())
+            && field.isAnnotationPresent(AvroSchema.class)) {
+          // TODO: We should be able to support custom schemas on POJO fields, but we shouldn't
+          // need to, so we just allow it in the case of IndexedRecords.
+          reportError(fieldContext,
+              "Custom schemas are only supported for subtypes of IndexedRecord.");
           continue;
         }
 
@@ -465,6 +471,84 @@ private void checkRecord(String context, TypeToken<?> type, Schema schema) {
       }
     }
 
+    private void checkIndexedRecord(String context, Schema schema,
+        @Nullable String specificClassStr) {
+
+      if (!activeSchemas.add(schema)) {
+        reportError(context, "%s appears recursively", schema.getName());
+        return;
+      }
+
+      switch (schema.getType()) {
+        case ARRAY:
+          // Generic Records use GenericData.Array to implement arrays, which is
+          // essentially an ArrayList, and therefore ordering is deterministic.
+          // The array is thus deterministic if the elements are deterministic.
+          checkIndexedRecord(context, schema.getElementType(), null);
+          break;
+        case ENUM:
+          // Enums are deterministic because they encode as a single integer.
+          break;
+        case FIXED:
+          // In the case of GenericRecords, FIXED is deterministic because it
+          // encodes/decodes as a Byte[].
+          break;
+        case MAP:
+          reportError(context,
+              "GenericRecord and SpecificRecords use a HashMap to represent MAPs,"
+              + " so it is non-deterministic");
+          break;
+        case RECORD:
+          for (org.apache.avro.Schema.Field field : schema.getFields()) {
+            checkIndexedRecord(
+                schema.getName() + "." + field.name(),
+                field.schema(),
+                field.getProp(SpecificData.CLASS_PROP));
+          }
+          break;
+        case STRING:
+          // GenericDatumWriter#findStringClass will use a CharSequence or a String
+          // for each string, so it is deterministic.
+
+          // SpecificCompiler#getStringType will use java.lang.String, org.apache.avro.util.Utf8,
+          // or java.lang.CharSequence, unless SpecificData.CLASS_PROP overrides that.
+          if (specificClassStr != null) {
+            Class<?> specificClass;
+            try {
+              specificClass = ClassUtils.forName(specificClassStr);
+              if (!DETERMINISTIC_STRINGABLE_CLASSES.contains(specificClass)) {
+                reportError(context, "Specific class %s is not known to be deterministic",
+                    specificClassStr);
+              }
+            } catch (ClassNotFoundException e) {
+              reportError(context, "Specific class %s is not known to be deterministic",
+                  specificClassStr);
+            }
+          }
+          break;
+        case UNION:
+          for (org.apache.avro.Schema subschema : schema.getTypes()) {
+            checkIndexedRecord(subschema.getName(), subschema, null);
+          }
+          break;
+        case BOOLEAN:
+        case BYTES:
+        case DOUBLE:
+        case INT:
+        case FLOAT:
+        case LONG:
+        case NULL:
+          // For types that Avro encodes using one of the above primitives, we assume they are
+          // deterministic.
+          break;
+        default:
+          reportError(context, "Unknown schema type %s may be non-deterministic", schema.getType());
+          break;
+      }
+
+      activeSchemas.remove(schema);
+    }
+
     private void checkMap(String context, TypeToken<?> type, Schema schema) {
       if (!isSubtypeOf(type, SortedMap.class)) {
         reportError(context, "%s may not be deterministically ordered", type);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
index 5dc19b74c7e69..513cf8ba0dc3c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
@@ -35,6 +35,7 @@
 
 import org.apache.avro.AvroTypeException;
 import org.apache.avro.Schema;
+import org.apache.avro.SchemaBuilder;
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.reflect.AvroName;
@@ -42,6 +43,7 @@
 import org.apache.avro.reflect.ReflectData;
 import org.apache.avro.reflect.Stringable;
 import org.apache.avro.reflect.Union;
+import org.apache.avro.specific.SpecificData;
 import org.apache.avro.util.Utf8;
 import org.hamcrest.Description;
 import org.hamcrest.Matcher;
@@ -216,19 +218,19 @@ public void testAvroCoderIsSerializable() throws Exception {
     SerializableUtils.ensureSerializable(coder);
   }
 
-  private final void assertDeterministic(Class<?> clazz) {
+  private final void assertDeterministic(AvroCoder<?> coder) {
     try {
-      AvroCoder.of(clazz).verifyDeterministic();
+      coder.verifyDeterministic();
     } catch (NonDeterministicException e) {
-      fail("Expected AvroCoder<" + clazz + "> to be deterministic.");
+      fail("Expected " + coder + " to be deterministic, but got:\n" + e);
     }
   }
 
-  private final void assertNonDeterministic(Class<?> clazz,
+  private final void assertNonDeterministic(AvroCoder<?> coder,
       Matcher<String> reason1) {
     try {
-      AvroCoder.of(clazz).verifyDeterministic();
-      fail("Expected AvroCoder<" + clazz + "> to be non-deterministic.");
+      coder.verifyDeterministic();
+      fail("Expected " + coder + " to be non-deterministic.");
     } catch (NonDeterministicException e) {
       assertThat(e.getReasons(), Matchers.<String>iterableWithSize(1));
       assertThat(e.getReasons(), Matchers.<String>contains(reason1));
@@ -237,12 +239,12 @@ private final void assertNonDeterministic(Class<?> clazz,
 
   @Test
   public void testDeterministicInteger() {
-    assertDeterministic(Integer.class);
+    assertDeterministic(AvroCoder.of(Integer.class));
   }
 
   @Test
   public void testDeterministicInt() {
-    assertDeterministic(int.class);
+    assertDeterministic(AvroCoder.of(int.class));
   }
 
   private static class SimpleDeterministicClass {
@@ -258,7 +260,7 @@ private static class SimpleDeterministicClass {
 
   @Test
   public void testDeterministicSimple() {
-    assertDeterministic(SimpleDeterministicClass.class);
+    assertDeterministic(AvroCoder.of(SimpleDeterministicClass.class));
   }
 
   private static class UnorderedMapClass {
@@ -266,36 +268,36 @@ private static class UnorderedMapClass {
     private Map<String, String> mapField;
   }
 
-  private Matcher<String> reasonMatcher(final String prefix, final String messagePart) {
+  private Matcher<String> reason(final String prefix, final String messagePart) {
     return new TypeSafeMatcher<String>(String.class) {
       @Override
       public void describeTo(Description description) {
-        description.appendText(String.format("Reason starting with '%s' containing '%s'",
+        description.appendText(String.format("Reason starting with '%s:' containing '%s'",
             prefix, messagePart));
       }
 
       @Override
       protected boolean matchesSafely(String item) {
-        return item.startsWith(prefix) && item.contains(messagePart);
+        return item.startsWith(prefix + ":") && item.contains(messagePart);
       }
     };
   }
 
   private Matcher<String> reasonClass(Class<?> clazz, String message) {
-    return reasonMatcher(clazz.getName(), message);
+    return reason(clazz.getName(), message);
   }
 
   private Matcher<String> reasonField(
       Class<?> clazz, String field, String message) {
-    return reasonMatcher(clazz.getName() + "#" + field, message);
+    return reason(clazz.getName() + "#" + field, message);
   }
 
   @Test
   public void testDeterministicUnorderedMap() {
-    assertNonDeterministic(UnorderedMapClass.class,
+    assertNonDeterministic(AvroCoder.of(UnorderedMapClass.class),
         reasonField(UnorderedMapClass.class, "mapField",
             "java.util.Map<java.lang.String, java.lang.String> "
-            + "may not be deterministically ordered"));
+                + "may not be deterministically ordered"));
   }
 
   private static class NonDeterministicArray {
@@ -304,10 +306,10 @@ private static class NonDeterministicArray {
   }
   @Test
   public void testDeterministicNonDeterministicArray() {
-    assertNonDeterministic(NonDeterministicArray.class,
+    assertNonDeterministic(AvroCoder.of(NonDeterministicArray.class),
         reasonField(UnorderedMapClass.class, "mapField",
             "java.util.Map<java.lang.String, java.lang.String>"
-            + " may not be deterministically ordered"));
+                + " may not be deterministically ordered"));
   }
 
   private static class SubclassOfUnorderedMapClass extends UnorderedMapClass {}
@@ -316,8 +318,7 @@ private static class SubclassOfUnorderedMapClass extends UnorderedMapClass {}
   @Test
   public void testDeterministicNonDeterministicChild() {
     // Super class has non deterministic fields.
-    assertNonDeterministic(
-        SubclassOfUnorderedMapClass.class,
+    assertNonDeterministic(AvroCoder.of(SubclassOfUnorderedMapClass.class),
         reasonField(UnorderedMapClass.class, "mapField",
             "may not be deterministically ordered"));
   }
@@ -350,12 +351,12 @@ private static class FieldWithAvroName {
 
   @Test
   public void testDeterministicWithAvroName() {
-    assertDeterministic(FieldWithAvroName.class);
+    assertDeterministic(AvroCoder.of(FieldWithAvroName.class));
   }
 
   @Test
   public void testDeterminismSortedMap() {
-    assertDeterministic(StringSortedMapField.class);
+    assertDeterministic(AvroCoder.of(StringSortedMapField.class));
   }
 
   private static class StringSortedMapField {
@@ -366,10 +367,10 @@ private static class StringSortedMapField {
   @Test
   public void testDeterminismTreeMapValue() {
     // The value is non-deterministic, so we should fail.
-    assertNonDeterministic(TreeMapNonDetValue.class,
+    assertNonDeterministic(AvroCoder.of(TreeMapNonDetValue.class),
         reasonField(UnorderedMapClass.class, "mapField",
             "java.util.Map<java.lang.String, java.lang.String> "
-            + "may not be deterministically ordered"));
+                + "may not be deterministically ordered"));
   }
 
   private static class TreeMapNonDetValue {
@@ -380,11 +381,10 @@ private static class TreeMapNonDetValue {
   @Test
   public void testDeterminismUnorderedMap() {
     // LinkedHashMap is not deterministically ordered, so we should fail.
-    assertNonDeterministic(
-        LinkedHashMapField.class,
+    assertNonDeterministic(AvroCoder.of(LinkedHashMapField.class),
         reasonField(LinkedHashMapField.class, "nonDeterministicMap",
             "java.util.LinkedHashMap<java.lang.String, java.lang.String> "
-            + "may not be deterministically ordered"));
+                + "may not be deterministically ordered"));
   }
 
   private static class LinkedHashMapField {
@@ -394,7 +394,7 @@ private static class LinkedHashMapField {
 
   @Test
   public void testDeterminismCollection() {
-    assertNonDeterministic(StringCollection.class,
+    assertNonDeterministic(AvroCoder.of(StringCollection.class),
         reasonField(StringCollection.class, "stringCollection",
             "java.util.Collection<java.lang.String> may not be deterministically ordered"));
   }
@@ -406,8 +406,8 @@ private static class StringCollection {
 
   @Test
   public void testDeterminismList() {
-    assertDeterministic(StringList.class);
-    assertDeterministic(StringArrayList.class);
+    assertDeterministic(AvroCoder.of(StringList.class));
+    assertDeterministic(AvroCoder.of(StringArrayList.class));
   }
 
   private static class StringList {
@@ -422,9 +422,9 @@ private static class StringArrayList {
 
   @Test
   public void testDeterminismSet() {
-    assertDeterministic(StringSortedSet.class);
-    assertDeterministic(StringTreeSet.class);
-    assertNonDeterministic(StringHashSet.class,
+    assertDeterministic(AvroCoder.of(StringSortedSet.class));
+    assertDeterministic(AvroCoder.of(StringTreeSet.class));
+    assertNonDeterministic(AvroCoder.of(StringHashSet.class),
         reasonField(StringHashSet.class, "stringCollection",
             "java.util.HashSet<java.lang.String> may not be deterministically ordered"));
   }
@@ -446,10 +446,10 @@ private static class StringHashSet {
 
   @Test
   public void testDeterminismCollectionValue() {
-    assertNonDeterministic(OrderedSetOfNonDetValues.class,
+    assertNonDeterministic(AvroCoder.of(OrderedSetOfNonDetValues.class),
         reasonField(UnorderedMapClass.class, "mapField",
             "may not be deterministically ordered"));
-    assertNonDeterministic(ListOfNonDetValues.class,
+    assertNonDeterministic(AvroCoder.of(ListOfNonDetValues.class),
         reasonField(UnorderedMapClass.class, "mapField",
             "may not be deterministically ordered"));
   }
@@ -466,16 +466,15 @@ private static class ListOfNonDetValues {
 
   @Test
   public void testDeterminismUnion() {
-    assertDeterministic(DeterministicUnionBase.class);
-    assertNonDeterministic(
-        NonDeterministicUnionBase.class,
+    assertDeterministic(AvroCoder.of(DeterministicUnionBase.class));
+    assertNonDeterministic(AvroCoder.of(NonDeterministicUnionBase.class),
         reasonField(UnionCase3.class, "mapField", "may not be deterministically ordered"));
   }
 
   @Test
   public void testDeterminismStringable() {
-    assertDeterministic(String.class);
-    assertNonDeterministic(StringableClass.class,
+    assertDeterministic(AvroCoder.of(String.class));
+    assertNonDeterministic(AvroCoder.of(StringableClass.class),
         reasonClass(StringableClass.class, "may not have deterministic #toString()"));
   }
 
@@ -485,14 +484,14 @@ private static class StringableClass {
 
   @Test
   public void testDeterminismCyclicClass() {
-    assertNonDeterministic(Cyclic.class,
-        reasonClass(Cyclic.class, "appears recursively"));
-    assertNonDeterministic(CyclicField.class,
+    assertNonDeterministic(AvroCoder.of(Cyclic.class),
+        reasonField(Cyclic.class, "cyclicField", "appears recursively"));
+    assertNonDeterministic(AvroCoder.of(CyclicField.class),
         reasonField(Cyclic.class, "cyclicField",
-            Cyclic.class.getName() + " appears recursively"));
-    assertNonDeterministic(IndirectCycle1.class,
+    Cyclic.class.getName() + " appears recursively"));
+    assertNonDeterministic(AvroCoder.of(IndirectCycle1.class),
         reasonField(IndirectCycle2.class, "field2",
-            IndirectCycle1.class.getName() +  " appears recursively"));
+    IndirectCycle1.class.getName() +  " appears recursively"));
   }
 
   private static class Cyclic {
@@ -517,17 +516,30 @@ private static class IndirectCycle2 {
     IndirectCycle1 field2;
   }
 
+  @Test
+  public void testDeterminismHasGenericRecord() {
+    assertDeterministic(AvroCoder.of(HasGenericRecord.class));
+  }
+
+  private static class HasGenericRecord {
+    @AvroSchema("{\"name\": \"bar\", \"type\": \"record\", \"fields\": ["
+        + "{\"name\": \"foo\", \"type\": \"int\"}]}")
+    @SuppressWarnings("unused")
+    GenericRecord genericRecord;
+  }
+
   @Test
   public void testDeterminismHasCustomSchema() {
-    assertNonDeterministic(HasCustomSchema.class,
-        reasonClass(HasCustomSchema.class, "Custom schemas are not supported"));
+    assertNonDeterministic(AvroCoder.of(HasCustomSchema.class),
+        reasonField(HasCustomSchema.class, "withCustomSchema",
+            "Custom schemas are only supported for subtypes of IndexedRecord."));
   }
 
   private static class HasCustomSchema {
     @AvroSchema("{\"name\": \"bar\", \"type\": \"record\", \"fields\": ["
         + "{\"name\": \"foo\", \"type\": \"int\"}]}")
     @SuppressWarnings("unused")
-    GenericRecord genericRecord;
+    int withCustomSchema;
   }
 
   @Test
@@ -566,14 +578,86 @@ private abstract static class DeterministicUnionBase {}
 
   @Union({ UnionCase1.class, UnionCase2.class, UnionCase3.class })
   private abstract static class NonDeterministicUnionBase {}
-
   private static class UnionCase1 extends DeterministicUnionBase {}
   private static class UnionCase2 extends DeterministicUnionBase {
     @SuppressWarnings("unused")
     String field;
   }
+
   private static class UnionCase3 extends NonDeterministicUnionBase {
     @SuppressWarnings("unused")
     private Map<String, String> mapField;
   }
+
+  @Test
+  public void testAvroCoderSimpleSchemaDeterminism() {
+    assertDeterministic(AvroCoder.of(SchemaBuilder.record("someRecord").fields()
+        .endRecord()));
+    assertDeterministic(AvroCoder.of(SchemaBuilder.record("someRecord").fields()
+        .name("int").type().intType().noDefault()
+        .endRecord()));
+    assertDeterministic(AvroCoder.of(SchemaBuilder.record("someRecord").fields()
+        .name("string").type().stringType().noDefault()
+        .endRecord()));
+
+    assertNonDeterministic(AvroCoder.of(SchemaBuilder.record("someRecord").fields()
+        .name("map").type().map().values().stringType().noDefault()
+        .endRecord()),
+        reason("someRecord.map", "HashMap to represent MAPs"));
+
+    assertDeterministic(AvroCoder.of(SchemaBuilder.record("someRecord").fields()
+        .name("array").type().array().items().stringType().noDefault()
+        .endRecord()));
+
+    assertDeterministic(AvroCoder.of(SchemaBuilder.record("someRecord").fields()
+        .name("enum").type().enumeration("anEnum").symbols("s1", "s2").enumDefault("s1")
+        .endRecord()));
+
+    assertDeterministic(AvroCoder.of(SchemaBuilder.unionOf()
+        .intType().and()
+        .record("someRecord").fields().nullableString("someField", "").endRecord()
+        .endUnion()));
+  }
+
+  @Test
+  public void testAvroCoderStrings() {
+    // Custom Strings in Records
+    assertDeterministic(AvroCoder.of(SchemaBuilder.record("someRecord").fields()
+        .name("string").prop(SpecificData.CLASS_PROP, "java.lang.String")
+        .type().stringType().noDefault()
+        .endRecord()));
+    assertNonDeterministic(AvroCoder.of(SchemaBuilder.record("someRecord").fields()
+        .name("string").prop(SpecificData.CLASS_PROP, "unknownString")
+        .type().stringType().noDefault()
+        .endRecord()),
+        reason("someRecord.string", "unknownString is not known to be deterministic"));
+
+    // Custom Strings in Unions
+    assertNonDeterministic(AvroCoder.of(SchemaBuilder.unionOf()
+        .intType().and()
+        .record("someRecord").fields()
+        .name("someField").prop(SpecificData.CLASS_PROP, "unknownString")
+        .type().stringType().noDefault().endRecord()
+        .endUnion()),
+        reason("someRecord.someField", "unknownString is not known to be deterministic"));
+  }
+
+  @Test
+  public void testAvroCoderNestedRecords() {
+    // Nested Record
+    assertDeterministic(AvroCoder.of(SchemaBuilder.record("nestedRecord").fields()
+        .name("subRecord").type().record("subRecord").fields()
+            .name("innerField").type().stringType().noDefault()
+        .endRecord().noDefault()
+        .endRecord()));
+  }
+
+  @Test
+  public void testAvroCoderCyclicRecords() {
+    // Recursive record
+    assertNonDeterministic(AvroCoder.of(SchemaBuilder.record("cyclicRecord").fields()
+        .name("cycle").type("cyclicRecord").noDefault()
+        .endRecord()),
+        reason("cyclicRecord.cycle", "cyclicRecord appears recursively"));
+  }
 }

From 22570afae9d5f9c023c31fd1e11353e2083f4800 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Fri, 27 Feb 2015 23:54:21 -0800
Subject: [PATCH 0203/1541] Added transforms to use a PCollection<KV> as a
 Map<K, V> side input.

Also cache the side input pre-processing in the context rather than re-reading/creating it each time.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87404433
---
 .../dataflow/sdk/coders/CoderRegistry.java    |   1 +
 .../cloud/dataflow/sdk/transforms/View.java   | 124 ++++++++++++++++++
 .../cloud/dataflow/sdk/util/DoFnContext.java  |  18 ++-
 .../dataflow/sdk/transforms/ViewTest.java     |  83 +++++++++++-
 4 files changed, 219 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 227ed77b2ca7e..8c9ad930ff2bc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -110,6 +110,7 @@ public void registerStandardCoders() {
     registerCoder(Iterable.class, IterableCoder.class);
     registerCoder(KV.class, KvCoder.class);
     registerCoder(List.class, ListCoder.class);
+    registerCoder(Map.class, MapCoder.class);
     registerCoder(Long.class, VarLongCoder.class);
     registerCoder(String.class, StringUtf8Coder.class);
     registerCoder(TableRow.class, TableRowJsonCoder.class);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index 7a91fe98692d4..5d29764e9d25e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -18,15 +18,22 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.PValueBase;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Function;
+import com.google.common.collect.HashMultimap;
 import com.google.common.collect.Iterables;
+import com.google.common.collect.Multimap;
 
+import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.NoSuchElementException;
 
 /**
@@ -65,6 +72,16 @@ public static <T> AsIterable<T> asIterable() {
     return new AsIterable<>();
   }
 
+  /**
+   * Returns an {@link AsMultimap} that takes a {@link PCollection<KV>} as input
+   * and produces a {@link PCollectionView} of the values to be consumed
+   * as a {@code Map<K, Iterable<V>>} side input.
+   *
+   * <p> Currently, the resulting map is required to fit into memory.
+   */
+  public static <K, V> AsMultimap<K, V> asMap() {
+    return new AsMultimap<K, V>();
+  }
 
   /**
    * A {@PTransform} that produces a {@link PCollectionView} of a singleton {@link PCollection}
@@ -107,6 +124,72 @@ public PCollectionView<T, WindowedValue<T>> apply(PCollection<T> input) {
 
   }
 
+  /**
+   * A {@PTransform} that produces a {@link PCollectionView} of a keyed {@link PCollection}
+   * yielding a map of keys to all associated values.
+   *
+   * <p> Instantiate via {@link View#asMap}.
+   */
+  public static class AsMultimap<K, V>
+      extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, Iterable<V>>, ?>> {
+
+    private AsMultimap() { }
+
+    /**
+     * Returns a PTransform creating a view as a {@code Map<K, V>} rather than a
+     * {@code Map<K, Iterable<V>>}. Requires that the PCollection have only
+     * one value per key.
+     */
+    public AsSingletonMap<K, V, V> withSingletonValues() {
+      return new AsSingletonMap<K, V, V>(null);
+    }
+
+    /**
+     * Returns a PTransform creating a view as a {@code Map<K, VO>} rather than a
+     * {@code Map<K, Iterable<V>>} by applying the given combiner to the set of
+     * values associated with each key.
+     */
+    public <VO> AsSingletonMap<K, V, VO> withCombiner(CombineFn<V, ?, VO> combineFn) {
+      return new AsSingletonMap<K, V, VO>(combineFn);
+    }
+
+    @Override
+    public PCollectionView<Map<K, Iterable<V>>, ?> apply(PCollection<KV<K, V>> input) {
+      return input.apply(
+        new CreatePCollectionView<KV<K, V>, Map<K, Iterable<V>>, Object>(
+          new MultimapPCollectionView<K, V>(input.getPipeline())));
+    }
+  }
+
+
+  /**
+   * A {@PTransform} that produces a {@link PCollectionView} of a keyed {@link PCollection}
+   * yielding a map of keys to a single associated values.
+   *
+   * <p> Instantiate via {@link View#asMap}.
+   */
+  public static class AsSingletonMap<K, VI, VO>
+      extends PTransform<PCollection<KV<K, VI>>, PCollectionView<Map<K, VO>, ?>> {
+
+    private CombineFn<VI, ?, VO> combineFn;
+
+    private AsSingletonMap(CombineFn<VI, ?, VO> combineFn) {
+      this.combineFn = combineFn;
+    }
+
+    @Override
+    public PCollectionView<Map<K, VO>, ?> apply(PCollection<KV<K, VI>> input) {
+      // VI == VO if combineFn is null
+      @SuppressWarnings("unchecked")
+      PCollection<KV<K, VO>> combined =
+        combineFn == null
+        ? (PCollection) input
+        : input.apply(Combine.perKey(combineFn.<K>asKeyedFn()));
+      return combined.apply(
+        new CreatePCollectionView<KV<K, VO>, Map<K, VO>, Object>(
+          new MapPCollectionView<K, VO>(input.getPipeline())));
+    }
+  }
 
   ////////////////////////////////////////////////////////////////////////////
   // Internal details below
@@ -199,6 +282,47 @@ public T apply(WindowedValue<?> input) {
     }
   }
 
+  private static class MultimapPCollectionView<K, V>
+      extends PCollectionViewBase<Map<K, Iterable<V>>, Object> {
+
+    public MultimapPCollectionView(Pipeline pipeline) {
+      setPipelineInternal(pipeline);
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    public Map<K, Iterable<V>> fromIterableInternal(Iterable<WindowedValue<?>> contents) {
+      Multimap<K, V> multimap = HashMultimap.create();
+      for (WindowedValue<?> elem : contents) {
+        KV<K, V> kv = (KV<K, V>) elem.getValue();
+        multimap.put(kv.getKey(), kv.getValue());
+      }
+      // Don't want to promise in-memory or cheap Collection.size().
+      return (Map) multimap.asMap();
+    }
+  }
+
+  private static class MapPCollectionView<K, V>
+      extends PCollectionViewBase<Map<K, V>, Object> {
+
+    public MapPCollectionView(Pipeline pipeline) {
+      setPipelineInternal(pipeline);
+    }
+
+    @Override
+    public Map<K, V> fromIterableInternal(Iterable<WindowedValue<?>> contents) {
+      Map<K, V> map = new HashMap<>();
+      for (WindowedValue<?> elem : contents) {
+        @SuppressWarnings("unchecked")
+        KV<K, V> kv = (KV<K, V>) elem.getValue();
+        if (map.put(kv.getKey(), kv.getValue()) != null) {
+          throw new IllegalArgumentException("Duplicate values for " + kv.getKey());
+        }
+      }
+      return Collections.unmodifiableMap(map);
+    }
+  }
+
   private abstract static class PCollectionViewBase<T, WT>
       extends PValueBase
       implements PCollectionView<T, WT> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
index 200f84faa6bb8..451fe76f4dad1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
@@ -51,6 +51,7 @@ class DoFnContext<I, O, R> extends DoFn<I, O>.Context {
   final PipelineOptions options;
   final DoFn<I, O> fn;
   final PTuple sideInputs;
+  final Map<TupleTag<?>, Object> sideInputCache;
   final OutputManager<R> outputManager;
   final Map<TupleTag<?>, R> outputMap;
   final TupleTag<O> mainOutputTag;
@@ -71,6 +72,7 @@ public DoFnContext(PipelineOptions options,
     this.options = options;
     this.fn = fn;
     this.sideInputs = sideInputs;
+    this.sideInputCache = new HashMap<>();
     this.outputManager = outputManager;
     this.mainOutputTag = mainOutputTag;
     this.outputMap = new HashMap<>();
@@ -103,13 +105,17 @@ public PipelineOptions getPipelineOptions() {
   @SuppressWarnings("unchecked")
   public <T> T sideInput(PCollectionView<T, ?> view) {
     TupleTag<?> tag = view.getTagInternal();
-    if (!sideInputs.has(tag)) {
-      throw new IllegalArgumentException(
-          "calling sideInput() with unknown view; " +
-          "did you forget to pass the view in " +
-          "ParDo.withSideInputs()?");
+    if (!sideInputCache.containsKey(tag)) {
+      if (!sideInputs.has(tag)) {
+        throw new IllegalArgumentException(
+            "calling sideInput() with unknown view; " +
+            "did you forget to pass the view in " +
+            "ParDo.withSideInputs()?");
+      }
+      sideInputCache.put(
+          tag, view.fromIterableInternal((Iterable<WindowedValue<?>>) sideInputs.get(tag)));
     }
-    return view.fromIterableInternal((Iterable<WindowedValue<?>>) sideInputs.get(tag));
+    return (T) sideInputCache.get(tag);
   }
 
   <T> WindowedValue<T> makeWindowedValue(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index fc98ae8923b10..f4843c4154278 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -22,10 +22,10 @@
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 
-
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
@@ -34,6 +34,7 @@
 import org.junit.runners.JUnit4;
 
 import java.io.Serializable;
+import java.util.Map;
 import java.util.NoSuchElementException;
 
 /**
@@ -157,4 +158,84 @@ public void processElement(ProcessContext c) {
 
     pipeline.run();
   }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testMapSideInput() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Map<String, Iterable<Integer>>, ?> view = pipeline
+        .apply(Create.of(KV.of("a", 1), KV.of("a", 2), KV.of("b", 3)))
+        .apply(View.<String, Integer>asMap());
+
+    PCollection<KV<String, Integer>> output = pipeline
+        .apply(Create.of("apple", "banana", "blackberry"))
+        .apply(ParDo.withSideInputs(view).of(
+            new DoFn<String, KV<String, Integer>>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                for (Integer v : c.sideInput(view).get(c.element().substring(0, 1))) {
+                  c.output(KV.of(c.element(), v));
+                }
+              }
+            }));
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder(KV.of("apple", 1), KV.of("apple", 2),
+                            KV.of("banana", 3), KV.of("blackberry", 3));
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testSingletonMapSideInput() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Map<String, Integer>, ?> view = pipeline
+        .apply(Create.of(KV.of("a", 1), KV.of("b", 3)))
+        .apply(View.<String, Integer>asMap().withSingletonValues());
+
+    PCollection<KV<String, Integer>> output = pipeline
+        .apply(Create.of("apple", "banana", "blackberry"))
+        .apply(ParDo.withSideInputs(view).of(
+            new DoFn<String, KV<String, Integer>>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                c.output(KV.of(c.element(), c.sideInput(view).get(c.element().substring(0, 1))));
+              }
+            }));
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder(KV.of("apple", 1),
+                            KV.of("banana", 3), KV.of("blackberry", 3));
+
+    pipeline.run();
+  }
+
+  @Test
+  public void testCombinedMapSideInput() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Map<String, Integer>, ?> view = pipeline
+        .apply(Create.of(KV.of("a", 1), KV.of("a", 20), KV.of("b", 3)))
+        .apply(View.<String, Integer>asMap().withCombiner(
+                   Combine.SimpleCombineFn.of(new Sum.SumIntegerFn())));
+
+    PCollection<KV<String, Integer>> output = pipeline
+        .apply(Create.of("apple", "banana", "blackberry"))
+        .apply(ParDo.withSideInputs(view).of(
+            new DoFn<String, KV<String, Integer>>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                c.output(KV.of(c.element(), c.sideInput(view).get(c.element().substring(0, 1))));
+              }
+            }));
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder(KV.of("apple", 21),
+                            KV.of("banana", 3), KV.of("blackberry", 3));
+
+    pipeline.run();
+  }
 }

From 77c939eef78f3263904e796e96051ce2b6dce25f Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Sat, 28 Feb 2015 11:33:06 -0800
Subject: [PATCH 0204/1541] Downgrades a very chatty GCS message from info to
 debug. [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=87424740

---
 .../main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
index 71c7e03bb74ed..5cabed127baae 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
@@ -133,7 +133,7 @@ public List<GcsPath> expand(GcsPath gcsPattern) throws IOException {
       p = Pattern.compile(globToRegexp(gcsPattern.getObject()));
     }
 
-    LOG.info("matching files in bucket {}, prefix {} against pattern {}", gcsPattern.getBucket(),
+    LOG.debug("matching files in bucket {}, prefix {} against pattern {}", gcsPattern.getBucket(),
         prefix, p.toString());
 
     // List all objects that start with the prefix (including objects in sub-directories).

From 8a6cfe139a46a4ae3d1dd76ec42e249af49d8c2b Mon Sep 17 00:00:00 2001
From: earhart <earhart@google.com>
Date: Sat, 28 Feb 2015 22:38:06 -0800
Subject: [PATCH 0205/1541] Resolve serialVersionUID warnings in non-anonymous
 classes, by setting serialVersionUID to zero in serializable classes. []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=87443707

---
 .../cloud/dataflow/examples/AutoComplete.java  | 18 ++++++++++++++++++
 .../dataflow/examples/BigQueryTornadoes.java   |  6 ++++++
 .../examples/CombinePerKeyExamples.java        |  7 +++++++
 .../dataflow/examples/DatastoreWordCount.java  |  4 ++++
 .../dataflow/examples/FilterExamples.java      |  6 ++++++
 .../cloud/dataflow/examples/JoinExamples.java  |  4 ++++
 .../dataflow/examples/MaxPerKeyExamples.java   |  6 ++++++
 .../dataflow/examples/PubsubFileInjector.java  |  2 ++
 .../examples/StreamingWordExtract.java         |  5 +++++
 .../google/cloud/dataflow/examples/TfIdf.java  |  3 +++
 .../examples/TopWikipediaSessions.java         |  8 ++++++++
 .../examples/TrafficStreamingMaxLaneFlow.java  |  8 ++++++++
 .../examples/TrafficStreamingRoutes.java       |  7 +++++++
 .../dataflow/examples/WindowingWordCount.java  |  6 ++++++
 .../cloud/dataflow/examples/WordCount.java     |  6 ++++++
 .../dataflow/examples/AutoCompleteTest.java    |  3 +++
 .../cloud/dataflow/sdk/coders/Coder.java       |  2 ++
 .../dataflow/sdk/coders/DelegateCoder.java     |  1 +
 .../sdk/coders/StringDelegateCoder.java        |  1 +
 .../cloud/dataflow/sdk/io/BigQueryIO.java      |  9 +++++++++
 .../cloud/dataflow/sdk/io/ReadSource.java      |  2 ++
 .../worker/StreamingDataflowWorker.java        |  2 ++
 .../dataflow/sdk/testing/DataflowAssert.java   |  3 +++
 .../cloud/dataflow/sdk/transforms/First.java   |  4 ++++
 .../cloud/dataflow/sdk/transforms/Mean.java    |  1 +
 .../cloud/dataflow/sdk/transforms/Min.java     |  7 +++++++
 .../dataflow/sdk/transforms/RateLimiting.java  |  2 ++
 .../cloud/dataflow/sdk/transforms/Sample.java  |  2 ++
 .../cloud/dataflow/sdk/transforms/View.java    |  5 +++++
 .../transforms/windowing/CalendarWindows.java  |  3 +++
 .../sdk/transforms/windowing/GlobalWindow.java |  2 ++
 .../transforms/windowing/InvalidWindows.java   |  2 ++
 .../sdk/transforms/windowing/Sessions.java     |  1 +
 .../sdk/util/BoundedQueueExecutor.java         |  2 ++
 .../sdk/util/ReifyTimestampAndWindowsDoFn.java |  2 ++
 .../sdk/coders/CoderPropertiesTest.java        |  8 ++++++++
 .../BasicSerializableSourceFormatTest.java     |  2 ++
 .../worker/StreamingDataflowWorkerTest.java    |  8 ++++++++
 .../sdk/testing/DataflowAssertTest.java        |  3 +++
 .../sdk/util/SerializableUtilsTest.java        |  8 ++++++++
 40 files changed, 181 insertions(+)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
index 70d638c0ca1ab..959c96ff0246b 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
@@ -95,6 +95,8 @@ public class AutoComplete {
    */
   public static class ComputeTopCompletions
       extends PTransform<PCollection<String>, PCollection<KV<String, List<CompletionCandidate>>>> {
+    private static final long serialVersionUID = 0;
+
     private final int candidatesPerPrefix;
     private final boolean recursive;
 
@@ -140,6 +142,7 @@ public void processElement(ProcessContext c) {
   private static class ComputeTopFlat
       extends PTransform<PCollection<CompletionCandidate>,
                          PCollection<KV<String, List<CompletionCandidate>>>> {
+    private static final long serialVersionUID = 0;
 
     private final int candidatesPerPrefix;
     private final int minPrefix;
@@ -162,6 +165,8 @@ public PCollection<KV<String, List<CompletionCandidate>>> apply(
     }
 
     private static class HotKeySpread implements SerializableFunction<String, Integer> {
+      private static final long serialVersionUID = 0;
+
       @Override
       public Integer apply(String input) {
         return (int) Math.pow(4, 5 - input.length());
@@ -179,6 +184,7 @@ public Integer apply(String input) {
   private static class ComputeTopRecursive
       extends PTransform<PCollection<CompletionCandidate>,
                          PCollectionList<KV<String, List<CompletionCandidate>>>> {
+    private static final long serialVersionUID = 0;
 
     private final int candidatesPerPrefix;
     private final int minPrefix;
@@ -189,6 +195,8 @@ public ComputeTopRecursive(int candidatesPerPrefix, int minPrefix) {
     }
 
     private class KeySizePartitionFn implements PartitionFn<KV<String, List<CompletionCandidate>>> {
+      private static final long serialVersionUID = 0;
+
       public int partitionFor(KV<String, List<CompletionCandidate>> elem, int numPartitions) {
         return elem.getKey().length() > minPrefix ? 0 : 1;
       }
@@ -196,6 +204,8 @@ public int partitionFor(KV<String, List<CompletionCandidate>> elem, int numParti
 
     private static class FlattenTops
         extends DoFn<KV<String, List<CompletionCandidate>>, CompletionCandidate> {
+      private static final long serialVersionUID = 0;
+
       public void processElement(ProcessContext c) {
         for (CompletionCandidate cc : c.element().getValue()) {
           c.output(cc);
@@ -244,6 +254,8 @@ public Boolean apply(CompletionCandidate c) {
    */
   private static class AllPrefixes
       extends DoFn<CompletionCandidate, KV<String, CompletionCandidate>> {
+    private static final long serialVersionUID = 0;
+
     private final int minPrefix;
     private final int maxPrefix;
     public AllPrefixes(int minPrefix) {
@@ -318,6 +330,8 @@ public String toString() {
    * Takes as input a set of strings, and emits each #hashtag found therein.
    */
   static class ExtractHashtags extends DoFn<String, String> {
+    private static final long serialVersionUID = 0;
+
     public void processElement(ProcessContext c) {
       Matcher m = Pattern.compile("#\\S+").matcher(c.element());
       while (m.find()) {
@@ -327,6 +341,8 @@ public void processElement(ProcessContext c) {
   }
 
   static class FormatForBigquery extends DoFn<KV<String, List<CompletionCandidate>>, TableRow> {
+    private static final long serialVersionUID = 0;
+
     public void processElement(ProcessContext c) {
       List<TableRow> completions = new ArrayList<>();
       for (CompletionCandidate cc : c.element().getValue()) {
@@ -346,6 +362,8 @@ public void processElement(ProcessContext c) {
    * suitable for writing to Datastore.
    */
   static class FormatForDatastore extends DoFn<KV<String, List<CompletionCandidate>>, Entity> {
+    private static final long serialVersionUID = 0;
+
     private String kind;
 
     public FormatForDatastore(String kind) {
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
index 23bbd53ef8d8b..c396a0f4ba717 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
@@ -70,6 +70,8 @@ public class BigQueryTornadoes {
    * which it occurred is output.
    */
   static class ExtractTornadoesFn extends DoFn<TableRow, Integer> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c){
       TableRow row = c.element();
@@ -84,6 +86,8 @@ public void processElement(ProcessContext c){
    * integer representation of month and the number of tornadoes that occurred in each month.
    */
   static class FormatCountsFn extends DoFn<KV<Integer, Long>, TableRow> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c) {
       TableRow row = new TableRow()
@@ -107,6 +111,8 @@ public void processElement(ProcessContext c) {
    */
   static class CountTornadoes
       extends PTransform<PCollection<TableRow>, PCollection<TableRow>> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public PCollection<TableRow> apply(PCollection<TableRow> rows) {
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java
index 5cbd91af80279..f62e12a3f49b3 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java
@@ -79,6 +79,7 @@ public class CombinePerKeyExamples {
    * outputs word, play_name.
    */
   static class ExtractLargeWordsFn extends DoFn<TableRow, KV<String, String>> {
+    private static final long serialVersionUID = 0;
 
     private Aggregator<Long> smallerWords;
 
@@ -108,6 +109,8 @@ public void processElement(ProcessContext c){
    * containing a word with a string listing the plays in which it appeared.
    */
   static class FormatShakespeareOutputFn extends DoFn<KV<String, String>, TableRow> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c) {
       TableRow row = new TableRow()
@@ -129,6 +132,8 @@ public void processElement(ProcessContext c) {
    */
   static class PlaysForWord
       extends PTransform<PCollection<TableRow>, PCollection<TableRow>> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public PCollection<TableRow> apply(PCollection<TableRow> rows) {
 
@@ -156,6 +161,8 @@ public PCollection<TableRow> apply(PCollection<TableRow> rows) {
    * word has appeared.
    */
   public static class ConcatWords implements SerializableFunction<Iterable<String>, String> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public String apply(Iterable<String> input) {
       StringBuilder all = new StringBuilder();
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
index c169084cb9b04..0d7da855ebb68 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
@@ -75,6 +75,8 @@ public class DatastoreWordCount {
    * Shakespeare play) and converts it to a string.
    */
   static class GetContentFn extends DoFn<Entity, String> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c) {
       Map<String, Value> props = DatastoreHelper.getPropertyMap(c.element());
@@ -89,6 +91,8 @@ public void processElement(ProcessContext c) {
    * A DoFn that creates entity for every line in Shakespeare.
    */
   static class CreateEntityFn extends DoFn<String, Entity> {
+    private static final long serialVersionUID = 0;
+
     private String kind;
 
     CreateEntityFn(String kind) {
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
index 1a4fd56a239c5..f1677bd780b19 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
@@ -85,6 +85,8 @@ public class FilterExamples {
    * is interested in-- the mean_temp and year, month, and day-- as a bigquery table row.
    */
   static class ProjectionFn extends DoFn<TableRow, TableRow> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c){
       TableRow row = c.element();
@@ -108,6 +110,7 @@ public void processElement(ProcessContext c){
    * monthFilter, which is passed in as a parameter during construction of this DoFn.
    */
   static class FilterSingleMonthDataFn extends DoFn<TableRow, TableRow> {
+    private static final long serialVersionUID = 0;
 
     Integer monthFilter;
 
@@ -131,6 +134,8 @@ public void processElement(ProcessContext c){
    * reading for that row ('mean_temp').
    */
   static class ExtractTempFn extends DoFn<TableRow, Double> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c){
       TableRow row = c.element();
@@ -147,6 +152,7 @@ public void processElement(ProcessContext c){
    **/
   static class BelowGlobalMean
       extends PTransform<PCollection<TableRow>, PCollection<TableRow>> {
+    private static final long serialVersionUID = 0;
 
     Integer monthFilter;
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
index cfbea51359547..51a40b278365d 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
@@ -120,6 +120,8 @@ public void processElement(ProcessContext c) {
    * code of the event, and the value a string encoding event information.
    */
   static class ExtractEventDataFn extends DoFn<TableRow, KV<String, String>> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c) {
       TableRow row = c.element();
@@ -138,6 +140,8 @@ public void processElement(ProcessContext c) {
    * code, and the value the country name.
    */
   static class ExtractCountryInfoFn extends DoFn<TableRow, KV<String, String>> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c) {
       TableRow row = c.element();
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java
index 7e52d3e724e33..80851defd6d6f 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java
@@ -71,6 +71,8 @@ public class MaxPerKeyExamples {
    * and the mean_temp.
    */
   static class ExtractTempFn extends DoFn<TableRow, KV<Integer, Double>> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c) {
       TableRow row = c.element();
@@ -85,6 +87,8 @@ public void processElement(ProcessContext c) {
    *
    */
   static class FormatMaxesFn extends DoFn<KV<Integer, Double>, TableRow> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c) {
       TableRow row = new TableRow()
@@ -100,6 +104,8 @@ public void processElement(ProcessContext c) {
    */
   static class MaxMeanTemp
       extends PTransform<PCollection<TableRow>, PCollection<TableRow>> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public PCollection<TableRow> apply(PCollection<TableRow> rows) {
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java b/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
index aec37a9e04a42..5a2f9ad4553e9 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
@@ -46,6 +46,8 @@ public class PubsubFileInjector {
 
   /** A DoFn that publishes lines to Google Cloud PubSub. */
   static class Publish extends DoFn<String, Void> {
+    private static final long serialVersionUID = 0;
+
     private String outputTopic;
     public transient Pubsub pubsub;
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java b/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java
index da54d604e8f61..2e7b7c977c78b 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java
@@ -49,6 +49,8 @@ public class StreamingWordExtract {
 
   /** A DoFn that tokenizes lines of text into individual words. */
   static class ExtractWords extends DoFn<String, String> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c) {
       String[] words = c.element().split("[^a-zA-Z']+");
@@ -62,6 +64,8 @@ public void processElement(ProcessContext c) {
 
   /** A DoFn that uppercases a word. */
   static class Uppercase extends DoFn<String, String> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c) {
       c.output(c.element().toUpperCase());
@@ -72,6 +76,7 @@ public void processElement(ProcessContext c) {
    * Converts strings into BigQuery rows.
    */
   static class StringToRowConverter extends DoFn<String, TableRow> {
+    private static final long serialVersionUID = 0;
 
     /**
      * In this example, put the whole string into single BigQuery field.
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
index e02ae64a27178..b8dd973968d69 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
@@ -144,6 +144,7 @@ public static Set<URI> listInputDocuments(Options options)
    */
   public static class ReadDocuments
       extends PTransform<PInput, PCollection<KV<URI, String>>> {
+    private static final long serialVersionUID = 0;
 
     private Iterable<URI> uris;
 
@@ -196,6 +197,7 @@ public PCollection<KV<URI, String>> apply(PInput input) {
    */
   public static class ComputeTfIdf
       extends PTransform<PCollection<KV<URI, String>>, PCollection<KV<String, KV<URI, Double>>>> {
+    private static final long serialVersionUID = 0;
 
     public ComputeTfIdf() { }
 
@@ -369,6 +371,7 @@ public void processElement(ProcessContext c) {
    */
   public static class WriteTfIdf
       extends PTransform<PCollection<KV<String, KV<URI, Double>>>, PDone> {
+    private static final long serialVersionUID = 0;
 
     private String output;
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
index d6e8261947acb..49de29a718b55 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
@@ -76,6 +76,8 @@ public class TopWikipediaSessions {
    * Extracts user and timestamp from a TableRow representing a Wikipedia edit.
    */
   static class ExtractUserAndTimestamp extends DoFn<TableRow, String> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c) {
       TableRow row = c.element();
@@ -94,6 +96,8 @@ public void processElement(ProcessContext c) {
    */
   static class ComputeSessions
       extends PTransform<PCollection<String>, PCollection<KV<String, Long>>> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public PCollection<KV<String, Long>> apply(PCollection<String> actions) {
       return actions
@@ -108,6 +112,8 @@ public PCollection<KV<String, Long>> apply(PCollection<String> actions) {
    */
   private static class TopPerMonth
       extends PTransform<PCollection<KV<String, Long>>, PCollection<List<KV<String, Long>>>> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public PCollection<List<KV<String, Long>>> apply(PCollection<KV<String, Long>> sessions) {
       return sessions
@@ -123,6 +129,8 @@ public int compare(KV<String, Long> o1, KV<String, Long> o2) {
   }
 
   static class ComputeTopSessions extends PTransform<PCollection<TableRow>, PCollection<String>> {
+    private static final long serialVersionUID = 0;
+
     private final double samplingThreshold;
 
     public ComputeTopSessions(double samplingThreshold) {
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingMaxLaneFlow.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingMaxLaneFlow.java
index 77a392bf6e3f3..7c25b51f462c3 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingMaxLaneFlow.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingMaxLaneFlow.java
@@ -151,6 +151,8 @@ public Integer getTotalFlow() {
    * point comes from.
    */
   static class ExtractFlowInfoFn extends DoFn<String, KV<String, LaneInfo>> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c) {
       String[] items = c.element().split(",");
@@ -229,6 +231,8 @@ public void processElement(ProcessContext c) {
    * value.
    */
   public static class MaxFlow implements SerializableFunction<Iterable<LaneInfo>, LaneInfo> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public LaneInfo apply(Iterable<LaneInfo> input) {
       Integer max = 0;
@@ -249,6 +253,8 @@ public LaneInfo apply(Iterable<LaneInfo> input) {
    * Add the timestamp from the window context.
    */
   static class FormatMaxesFn extends DoFn<KV<String, LaneInfo>, TableRow> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c) {
 
@@ -291,6 +297,8 @@ static TableSchema getSchema() {
    */
   static class MaxLaneFlow
       extends PTransform<PCollection<String>, PCollection<TableRow>> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public PCollection<TableRow> apply(PCollection<String> rows) {
       // row... => <stationId, LaneInfo> ...
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingRoutes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingRoutes.java
index df065167b6bf4..b9e02ee5fcf3d 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingRoutes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingRoutes.java
@@ -148,6 +148,8 @@ public Boolean getSlowdownEvent() {
    * (station, speed info) keyed on route.
    */
   static class ExtractStationSpeedFn extends DoFn<String, KV<String, StationSpeed>> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c) {
       String[] items = c.element().split(",");
@@ -174,6 +176,7 @@ public void processElement(ProcessContext c) {
    */
   static class GatherStats extends DoFn<KV<String, Iterable<StationSpeed>>, KV<String, RouteInfo>>
     implements DoFn.RequiresKeyedState {
+    private static final long serialVersionUID = 0;
 
     static final int SLOWDOWN_THRESH = 67;
     static final int SLOWDOWN_COUNT_CAP = 3;
@@ -228,6 +231,8 @@ public void processElement(ProcessContext c) throws IOException {
    * Format the results of the slowdown calculations to a TableRow, to save to BigQuery.
    */
   static class FormatStatsFn extends DoFn<KV<String, RouteInfo>, TableRow> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c) {
       RouteInfo routeInfo = c.element().getValue();
@@ -257,6 +262,8 @@ static TableSchema getSchema() {
    * state to retain previous slowdown information. Then, it formats the results for BigQuery.
    */
   static class TrackSpeed extends PTransform<PCollection<String>, PCollection<TableRow>> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public PCollection<TableRow> apply(PCollection<String> rows) {
       // row... => <station route, station speed> ...
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
index b9e7f541c2869..72e005b2c50bc 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
@@ -59,6 +59,8 @@ public class WindowingWordCount {
 
   /** A DoFn that tokenizes lines of text into individual words with timestamp. */
   static class ExtractWordsWithTimestampFn extends DoFn<String, String> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c) {
       String[] words = c.element().split("[^a-zA-Z']+");
@@ -72,6 +74,8 @@ public void processElement(ProcessContext c) {
 
   /** A DoFn that converts a Word and Count into a printable string. */
   static class FormatCountsFn extends DoFn<KV<String, Long>, String> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c) {
       String output = "Element: " + c.element().getKey()
@@ -90,6 +94,8 @@ public void processElement(ProcessContext c) {
    * reuse, modular testing, and an improved monitoring experience.
    */
   public static class CountWords extends PTransform<PCollection<String>, PCollection<String>> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public PCollection<String> apply(PCollection<String> lines) {
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
index 01eb95254f020..29bc514aef08c 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
@@ -60,6 +60,8 @@ public class WordCount {
 
   /** A DoFn that tokenizes lines of text into individual words. */
   static class ExtractWordsFn extends DoFn<String, String> {
+    private static final long serialVersionUID = 0;
+
     private Aggregator<Long> emptyLines;
 
     @Override
@@ -89,6 +91,8 @@ public void processElement(ProcessContext c) {
 
   /** A DoFn that converts a Word and Count into a printable string. */
   static class FormatCountsFn extends DoFn<KV<String, Long>, String> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c) {
       c.output(c.element().getKey() + ": " + c.element().getValue());
@@ -103,6 +107,8 @@ public void processElement(ProcessContext c) {
    * reuse, modular testing, and an improved monitoring experience.
    */
   public static class CountWords extends PTransform<PCollection<String>, PCollection<String>> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public PCollection<String> apply(PCollection<String> lines) {
 
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/AutoCompleteTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/AutoCompleteTest.java
index 7f72095ed911c..0a91e40e236b6 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/AutoCompleteTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/AutoCompleteTest.java
@@ -51,6 +51,7 @@
  */
 @RunWith(Parameterized.class)
 public class AutoCompleteTest implements Serializable {
+  private static final long serialVersionUID = 0;
 
   private boolean recursive;
 
@@ -169,6 +170,8 @@ private static List<CompletionCandidate> parseList(String... entries) {
 
   private static class ReifyTimestamps<T>
       extends PTransform<PCollection<TimestampedValue<T>>, PCollection<T>> {
+    private static final long serialVersionUID = 0;
+
     public PCollection<T> apply(PCollection<TimestampedValue<T>> input) {
       return input.apply(ParDo.of(new DoFn<TimestampedValue<T>, T>() {
         @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
index 79a1baa4c4fe4..108428d98331e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
@@ -184,6 +184,8 @@ public void registerByteSizeObserver(
    * not deterministic.
    */
   public static class NonDeterministicException extends Throwable {
+    private static final long serialVersionUID = 0;
+
     private Coder<?> coder;
     private List<String> reasons;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
index de95c1a2fb141..ae2c5e27e3b27 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
@@ -38,6 +38,7 @@
  * @param <DT> The type of objects a {@code T} will be converted to for coding.
  */
 public class DelegateCoder<T, DT> extends CustomCoder<T> {
+  private static final long serialVersionUID = 0;
 
   /**
    * A {@code CodingFunction<Input, Output>} is a serializable function
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
index 44d3f8c406692..765b2c973b40b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
@@ -36,6 +36,7 @@
  * @param <T> The type of objects coded.
  */
 public class StringDelegateCoder<T> extends DelegateCoder<T, String> {
+  private static final long serialVersionUID = 0;
 
   public static <T> StringDelegateCoder<T> of(Class<T> clazz) {
     return new StringDelegateCoder<T>(clazz);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 0297f011b74c4..ae017680a1e26 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -236,6 +236,8 @@ public static Bound withoutValidation() {
      * {@code PCollection<TableRow>}.
      */
     public static class Bound extends PTransform<PInput, PCollection<TableRow>> {
+      private static final long serialVersionUID = 0;
+
       TableReference table;
       final boolean validate;
 
@@ -480,6 +482,8 @@ public static Bound withoutValidation() {
      * {@code PCollection<TableRow>}s to a BigQuery table.
      */
     public static class Bound extends PTransform<PCollection<TableRow>, PDone> {
+      private static final long serialVersionUID = 0;
+
       final TableReference table;
 
       // Table schema. The schema is required only if the table does not exist.
@@ -638,6 +642,7 @@ public boolean getValidate() {
    */
   private static class StreamingWriteFn
       extends DoFn<KV<Integer, KV<String, TableRow>>, Void> implements DoFn.RequiresKeyedState {
+    private static final long serialVersionUID = 0;
 
     /** TableReference in JSON.  Use String to make the class Serializable. */
     private final String jsonTableReference;
@@ -744,6 +749,8 @@ private void flushRows(BigQueryOptions options) {
    * id is created by concatenating this randomUUID with a sequential number.
    */
   private static class TagWithUniqueIds extends DoFn<TableRow, KV<Integer, KV<String, TableRow>>> {
+    private static final long serialVersionUID = 0;
+
     private transient String randomUUID;
     private transient AtomicLong sequenceNo;
 
@@ -771,6 +778,8 @@ public void processElement(ProcessContext context) {
   * it leverages BigQuery best effort de-dup mechanism.
    */
   private static class StreamWithDeDup extends PTransform<PCollection<TableRow>, PDone> {
+    private static final long serialVersionUID = 0;
+
     private final TableReference tableReference;
     private final TableSchema tableSchema;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java
index 55a91b473944c..8a5290234455e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java
@@ -59,6 +59,8 @@ public static <T> Bound<T> from(Source<T> source) {
    */
   public static class Bound<T>
       extends PTransform<PInput, PCollection<T>> {
+    private static final long serialVersionUID = 0;
+
     @Nullable
     private Source<T> source;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 143a176445d05..612c5646c9ec1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -80,6 +80,8 @@ public class StreamingDataflowWorker {
    * Indicates that the key token was invalid when data was attempted to be fetched.
    */
   public static class KeyTokenInvalidException extends RuntimeException {
+    private static final long serialVersionUID = 0;
+
     public KeyTokenInvalidException(String key) {
       super("Unable to fetch data due to token mismatch for key " + key);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 4f345357a4c92..ee6152c04b04d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -577,6 +577,7 @@ public static interface AssertRelation<Actual, Expected> extends Serializable {
    */
   private static class AssertIsEqualToRelation<T>
       implements AssertRelation<T, T> {
+    private static final long serialVersionUID = 0;
 
     @Override
     public AssertThat<T, T> assertFor(T expected) {
@@ -590,6 +591,7 @@ public AssertThat<T, T> assertFor(T expected) {
    */
   private static class AssertContainsInAnyOrderRelation<T>
       implements AssertRelation<Iterable<T>, Iterable<T>> {
+    private static final long serialVersionUID = 0;
 
     @Override
     public SerializableFunction<Iterable<T>, Void> assertFor(Iterable<T> expectedElements) {
@@ -603,6 +605,7 @@ public SerializableFunction<Iterable<T>, Void> assertFor(Iterable<T> expectedEle
    */
   private static class AssertContainsInOrderRelation<T>
       implements AssertRelation<Iterable<T>, Iterable<T>> {
+    private static final long serialVersionUID = 0;
 
     @Override
     public SerializableFunction<Iterable<T>, Void> assertFor(Iterable<T> expectedElements) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
index 8032a2f54fdd3..35681d7b01d99 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
@@ -46,6 +46,8 @@
  * {@code PCollection}s
  */
 public class First<T> extends PTransform<PCollection<T>, PCollection<T>> {
+  private static final long serialVersionUID = 0;
+
   /**
    * Returns a {@code First<T>} {@code PTransform}.
    *
@@ -73,6 +75,8 @@ private First(long limit) {
   }
 
   private static class CopyFirstDoFn<T> extends DoFn<Void, T> {
+    private static final long serialVersionUID = 0;
+
     long limit;
     final PCollectionView<Iterable<T>, ?> iterableView;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
index cc19c21a55fc1..29f2391afdd5b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
@@ -98,6 +98,7 @@ public static <K, N extends Number> Combine.PerKey<K, N, Double> perKey() {
    */
   public static class MeanFn<N extends Number> extends
     Combine.AccumulatingCombineFn<N, MeanFn<N>.CountSum, Double> {
+    private static final long serialVersionUID = 0;
 
     /**
      * Constructs a combining function that computes the mean over
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
index e5fd5ba0b244e..7bd1ac1fd639e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
@@ -141,6 +141,7 @@ public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
    */
   public static class MinFn<N extends Number & Comparable<N>>
       implements SerializableFunction<Iterable<N>, N> {
+    /** private static final long serialVersionUID = 0;
 
     /** The largest value of type N. */
     private final N initialValue;
@@ -173,6 +174,8 @@ public N apply(Iterable<N> input) {
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
   public static class MinIntegerFn extends MinFn<Integer> {
+    private static final long serialVersionUID = 0;
+
     public MinIntegerFn() { super(Integer.MAX_VALUE); }
   }
 
@@ -182,6 +185,8 @@ public static class MinIntegerFn extends MinFn<Integer> {
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
   public static class MinLongFn extends MinFn<Long> {
+    private static final long serialVersionUID = 0;
+
     public MinLongFn() { super(Long.MAX_VALUE); }
   }
 
@@ -191,6 +196,8 @@ public static class MinLongFn extends MinFn<Long> {
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
   public static class MinDoubleFn extends MinFn<Double> {
+    private static final long serialVersionUID = 0;
+
     public MinDoubleFn() { super(Double.MAX_VALUE); }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
index 63b7591cefd57..76d6dbf3d0fd7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
@@ -150,6 +150,8 @@ public PCollection<O> apply(PCollection<? extends I> input) {
    * @param <O> the type of the (main) output elements
    */
   public static class RateLimitingDoFn<I, O> extends DoFn<I, O> {
+    private static final long serialVersionUID = 0;
+
     private static final Logger LOG = LoggerFactory.getLogger(RateLimitingDoFn.class);
 
     public RateLimitingDoFn(DoFn<I, O> doFn, double rateLimit,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
index 4a807429b6c12..947712643dbc8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
@@ -100,6 +100,8 @@ public class Sample {
    */
   public static class FixedSizedSampleFn<T>
       extends CombineFn<T, Top.TopCombineFn<KV<Integer, T>>.Heap, Iterable<T>> {
+    private static final long serialVersionUID = 0;
+
     private final Top.TopCombineFn<KV<Integer, T>> topCombineFn;
     private final Random rand = new Random();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index 5d29764e9d25e..7eb0563f721de 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -92,6 +92,7 @@ public static <K, V> AsMultimap<K, V> asMap() {
   public static class AsIterable<T> extends PTransform<
       PCollection<T>,
       PCollectionView<Iterable<T>, Iterable<WindowedValue<T>>>> {
+    private static final long serialVersionUID = 0;
 
     private AsIterable() { }
 
@@ -112,6 +113,7 @@ public PCollectionView<Iterable<T>, Iterable<WindowedValue<T>>> apply(
    */
   public static class AsSingleton<T>
       extends PTransform<PCollection<T>, PCollectionView<T, WindowedValue<T>>> {
+    private static final long serialVersionUID = 0;
 
     private AsSingleton() { }
 
@@ -206,6 +208,7 @@ private AsSingletonMap(CombineFn<VI, ?, VO> combineFn) {
    */
   public static class CreatePCollectionView<R, T, WT>
       extends PTransform<PCollection<R>, PCollectionView<T, WT>> {
+    private static final long serialVersionUID = 0;
 
     private PCollectionView<T, WT> view;
 
@@ -242,6 +245,7 @@ private <R, T, WT> void evaluateTyped(
 
   private static class SingletonPCollectionView<T>
       extends PCollectionViewBase<T, WindowedValue<T>> {
+    private static final long serialVersionUID = 0;
 
     public SingletonPCollectionView(Pipeline pipeline) {
       setPipelineInternal(pipeline);
@@ -265,6 +269,7 @@ public T fromIterableInternal(Iterable<WindowedValue<?>> contents) {
 
   private static class IterablePCollectionView<T>
       extends PCollectionViewBase<Iterable<T>, Iterable<WindowedValue<T>>> {
+    private static final long serialVersionUID = 0;
 
     public IterablePCollectionView(Pipeline pipeline) {
       setPipelineInternal(pipeline);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
index b3ffe77e05db3..07ff371b7b03a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
@@ -91,6 +91,7 @@ public static YearsWindows years(int number) {
    * is overridden with the {@link #withTimeZone} method.
    */
   public static class DaysWindows extends PartitioningWindowFn<Object, IntervalWindow> {
+    private static final long serialVersionUID = 0;
 
     public DaysWindows withStartingDay(int year, int month, int day) {
       return new DaysWindows(
@@ -156,6 +157,7 @@ public boolean isCompatible(WindowFn other) {
    * is overridden with the {@link #withTimeZone} method.
    */
   public static class MonthsWindows extends PartitioningWindowFn<Object, IntervalWindow> {
+    private static final long serialVersionUID = 0;
 
     public MonthsWindows beginningOnDay(int dayOfMonth) {
       return new MonthsWindows(
@@ -231,6 +233,7 @@ public boolean isCompatible(WindowFn other) {
    * is overridden with the {@link #withTimeZone} method.
    */
   public static class YearsWindows extends PartitioningWindowFn<Object, IntervalWindow> {
+    private static final long serialVersionUID = 0;
 
     public YearsWindows beginningOnDay(int monthOfYear, int dayOfMonth) {
       return new YearsWindows(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
index 2feafa5bcc04b..7d788e7e63676 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
@@ -41,6 +41,8 @@ private GlobalWindow() {}
    * {@link Coder} for encoding and decoding {@code Window}s.
    */
   public static class Coder extends AtomicCoder<GlobalWindow> {
+    private static final long serialVersionUID = 0;
+
     public static final Coder INSTANCE = new Coder();
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java
index 3f8f7644c06bf..21f95b5a02d8d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java
@@ -26,6 +26,8 @@
  * @param <W> window type
  */
 public class InvalidWindows<W extends BoundedWindow> extends WindowFn<Object, W> {
+  private static final long serialVersionUID = 0;
+
   private String cause;
   private WindowFn<?, W> originalWindowFn;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
index 704c5a9086223..9a1f061cdcf8c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
@@ -36,6 +36,7 @@
  * } </pre>
  */
 public class Sessions extends WindowFn<Object, IntervalWindow> {
+  private static final long serialVersionUID = 0;
 
   /**
    * Duration of the gaps between sessions.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java
index 0b7df3ec4e0b7..2b4e6e289d31f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java
@@ -27,6 +27,8 @@
  */
 public class BoundedQueueExecutor extends ThreadPoolExecutor {
   private static class ReducableSemaphore extends Semaphore {
+    private static final long serialVersionUID = 0;
+
     ReducableSemaphore(int permits) {
       super(permits);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
index d3d0947f44215..53e645503681e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
@@ -28,6 +28,8 @@
  */
 public class ReifyTimestampAndWindowsDoFn<K, V>
     extends DoFn<KV<K, V>, KV<K, WindowedValue<V>>> {
+  private static final long serialVersionUID = 0;
+
   @Override
   public void processElement(ProcessContext c)
       throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java
index 66edb00204979..03343cc1ab51f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java
@@ -44,6 +44,8 @@ public void testGoodCoderIsDeterministic() throws Exception {
 
   /** A coder which says it is not deterministic but actually is. */
   private static class NonDeterministicCoder extends CustomCoder<String> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void encode(String value, OutputStream outStream, Context context)
         throws CoderException, IOException {
@@ -81,6 +83,8 @@ public void testPassingInNonEqualValuesWithDeterministicCoder() throws Exception
 
   /** A coder which is non-deterministic because it adds a string to the value. */
   private static class BadDeterminsticCoder extends CustomCoder<String> {
+    private static final long serialVersionUID = 0;
+
     public BadDeterminsticCoder() {
     }
 
@@ -120,6 +124,8 @@ public void testGoodCoderEncodesEqualValues() throws Exception {
 
   /** This coder changes state during encoding/decoding. */
   private static class StateChangingSerializingCoder extends CustomCoder<String> {
+    private static final long serialVersionUID = 0;
+
     private int changedState;
 
     public StateChangingSerializingCoder() {
@@ -153,6 +159,8 @@ public void testBadCoderThatDependsOnChangingState() throws Exception {
 
   /** This coder loses information critical to its operation. */
   private static class ForgetfulSerializingCoder extends CustomCoder<String> {
+    private static final long serialVersionUID = 0;
+
     private transient int lostState;
 
     public ForgetfulSerializingCoder(int lostState) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 3726c543b3633..76e2f9c953e77 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -71,6 +71,8 @@ public static Read fromRange(int from, int to) {
     }
 
     static class Read extends Source<Integer> {
+      private static final long serialVersionUID = 0;
+
       final int from;
       final int to;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index cc427f566067b..ee4dd7d649957 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -386,6 +386,8 @@ private Windmill.WorkItemCommitRequest stripProcessingTimeCounters(
   }
 
   static class ChangeKeysFn extends DoFn<KV<String, String>, KV<String, String>> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c) {
       KV<String, String> elem = c.element();
@@ -417,6 +419,8 @@ public void processElement(ProcessContext c) {
 
   static class TestStateFn extends DoFn<KV<String, String>, KV<String, String>>
       implements DoFn.RequiresKeyedState {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c) {
       try {
@@ -610,6 +614,8 @@ public void processElement(ProcessContext c) {
   }
 
   static class TestExceptionFn extends DoFn<String, String> {
+    private static final long serialVersionUID = 0;
+
     @Override
     public void processElement(ProcessContext c) throws Exception {
       try {
@@ -671,6 +677,8 @@ public void processElement(ProcessContext c) throws Exception {
 
   private static class TestTimerFn
       extends AssignWindowsDoFn<KV<String, String>, BoundedWindow> {
+    private static final long serialVersionUID = 0;
+
     public TestTimerFn() {
       super(null);
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
index 2616aa5b4d857..b708ecd6e0e25 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
@@ -43,6 +43,7 @@
  */
 @RunWith(JUnit4.class)
 public class DataflowAssertTest implements Serializable {
+  private static final long serialVersionUID = 0;
 
   @Rule
   public transient ExpectedException thrown = ExpectedException.none();
@@ -61,6 +62,8 @@ public int hashCode() {
   }
 
   private static class NotSerializableObjectCoder extends AtomicCoder<NotSerializableObject> {
+    private static final long serialVersionUID = 0;
+
     private NotSerializableObjectCoder() { }
     private static final NotSerializableObjectCoder INSTANCE = new NotSerializableObjectCoder();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
index c2e6f2e7f0e55..a16c14c350e84 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
@@ -45,6 +45,8 @@ public class SerializableUtilsTest {
 
   /** A class which is serializable by Java. */
   private static class SerializableByJava implements Serializable {
+    private static final long serialVersionUID = 0;
+
     final String stringValue;
     final int intValue;
 
@@ -77,6 +79,8 @@ public void testDeserializationError() {
 
   /** A class which is not serializable by Java. */
   private static class UnserializableByJava implements Serializable {
+    private static final long serialVersionUID = 0;
+
     @SuppressWarnings("unused")
     private Object unserializableField = new Object();
   }
@@ -90,6 +94,8 @@ public void testSerializationError() {
 
   /** A {@link Coder} which is not serializable by Java. */
   private static class UnserializableCoderByJava extends StandardCoder<Object> {
+    private static final long serialVersionUID = 0;
+
     private final Object unserializableField = new Object();
 
     @Override
@@ -123,6 +129,8 @@ public void testEnsureSerializableWithUnserializableCoderByJava() {
 
   /** A {@link Coder} which is not serializable by Jackson. */
   private static class UnserializableCoderByJackson extends StandardCoder<Object> {
+    private static final long serialVersionUID = 0;
+
     private final SerializableByJava unserializableField;
 
     public UnserializableCoderByJackson(SerializableByJava unserializableField) {

From ff5e1067cc9e7e99d9aec232412c608cba85e1a4 Mon Sep 17 00:00:00 2001
From: earhart <earhart@google.com>
Date: Mon, 2 Mar 2015 14:01:15 -0800
Subject: [PATCH 0206/1541] Resolve serialVersionUID warnings in non-anonymous
 classes, by setting serialVersionUID to zero in serializable classes. []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=87547754

---
 .../java/com/google/cloud/dataflow/sdk/transforms/Min.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/View.java   | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
index 7bd1ac1fd639e..0eb58e41563cd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
@@ -141,7 +141,7 @@ public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
    */
   public static class MinFn<N extends Number & Comparable<N>>
       implements SerializableFunction<Iterable<N>, N> {
-    /** private static final long serialVersionUID = 0;
+    private static final long serialVersionUID = 0;
 
     /** The largest value of type N. */
     private final N initialValue;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index 7eb0563f721de..760bccf0229f6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -134,6 +134,7 @@ public PCollectionView<T, WindowedValue<T>> apply(PCollection<T> input) {
    */
   public static class AsMultimap<K, V>
       extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, Iterable<V>>, ?>> {
+    private static final long serialVersionUID = 0;
 
     private AsMultimap() { }
 
@@ -172,6 +173,7 @@ public <VO> AsSingletonMap<K, V, VO> withCombiner(CombineFn<V, ?, VO> combineFn)
    */
   public static class AsSingletonMap<K, VI, VO>
       extends PTransform<PCollection<KV<K, VI>>, PCollectionView<Map<K, VO>, ?>> {
+    private static final long serialVersionUID = 0;
 
     private CombineFn<VI, ?, VO> combineFn;
 
@@ -289,6 +291,7 @@ public T apply(WindowedValue<?> input) {
 
   private static class MultimapPCollectionView<K, V>
       extends PCollectionViewBase<Map<K, Iterable<V>>, Object> {
+    private static final long serialVersionUID = 0;
 
     public MultimapPCollectionView(Pipeline pipeline) {
       setPipelineInternal(pipeline);
@@ -309,6 +312,7 @@ public Map<K, Iterable<V>> fromIterableInternal(Iterable<WindowedValue<?>> conte
 
   private static class MapPCollectionView<K, V>
       extends PCollectionViewBase<Map<K, V>, Object> {
+    private static final long serialVersionUID = 0;
 
     public MapPCollectionView(Pipeline pipeline) {
       setPipelineInternal(pipeline);

From aa07773999e43de50e0ca54d40a56fbdf0ab6de3 Mon Sep 17 00:00:00 2001
From: earhart <earhart@google.com>
Date: Mon, 2 Mar 2015 16:37:52 -0800
Subject: [PATCH 0207/1541] Resolve serialVersionUID warnings in anonymous
 classes, by setting serialVersionUID to zero in serializable classes. []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=87563147

---
 .../google/cloud/dataflow/examples/AutoComplete.java |  4 ++++
 .../cloud/dataflow/examples/FilterExamples.java      |  2 ++
 .../google/cloud/dataflow/examples/JoinExamples.java |  4 ++++
 .../dataflow/examples/StreamingWordExtract.java      |  2 ++
 .../com/google/cloud/dataflow/examples/TfIdf.java    | 12 ++++++++++++
 .../dataflow/examples/TopWikipediaSessions.java      |  8 ++++++++
 .../cloud/dataflow/examples/AutoCompleteTest.java    |  4 ++++
 .../dataflow/sdk/coders/StringDelegateCoder.java     |  4 ++++
 .../dataflow/sdk/runners/worker/ReaderFactory.java   |  4 +++-
 .../google/cloud/dataflow/sdk/transforms/Mean.java   |  2 ++
 .../cloud/dataflow/sdk/coders/DelegateCoderTest.java |  4 ++++
 .../dataflow/sdk/testing/DataflowAssertTest.java     |  2 ++
 12 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
index 959c96ff0246b..79d5515e35668 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
@@ -118,6 +118,8 @@ public PCollection<KV<String, List<CompletionCandidate>>> apply(PCollection<Stri
         // Map the KV outputs of Count into our own CompletionCandiate class.
         .apply(ParDo.of(
             new DoFn<KV<String, Long>, CompletionCandidate>() {
+              private static final long serialVersionUID = 0;
+
               @Override
               public void processElement(ProcessContext c) {
                 c.output(new CompletionCandidate(c.element().getKey(), c.element().getValue()));
@@ -233,6 +235,8 @@ public PCollectionList<KV<String, List<CompletionCandidate>>> apply(
             // ...together with those (previously excluded) candidates of length
             // exactly minPrefix...
             .and(input.apply(Filter.by(new SerializableFunction<CompletionCandidate, Boolean>() {
+                    private static final long serialVersionUID = 0;
+
                     public Boolean apply(CompletionCandidate c) {
                       return c.getValue().length() == minPrefix;
                     }
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
index f1677bd780b19..8aa6cb0b2fca2 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
@@ -186,6 +186,8 @@ public PCollection<TableRow> apply(PCollection<TableRow> rows) {
           .apply(ParDo
               .withSideInputs(globalMeanTemp)
               .of(new DoFn<TableRow, TableRow>() {
+                private static final long serialVersionUID = 0;
+
                 @Override
                 public void processElement(ProcessContext c) {
                   Double meanTemp = Double.parseDouble(c.element().get("mean_temp").toString());
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
index 51a40b278365d..ecd205888284e 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
@@ -87,6 +87,8 @@ static PCollection<String> joinEvents(PCollection<TableRow> eventsTable,
     // country code 'key' -> string of <event info>, <country name>
     PCollection<KV<String, String>> finalResultCollection =
       kvpCollection.apply(ParDo.of(new DoFn<KV<String, CoGbkResult>, KV<String, String>>() {
+        private static final long serialVersionUID = 0;
+
         @Override
         public void processElement(ProcessContext c) {
            KV<String, CoGbkResult> e = c.element();
@@ -105,6 +107,8 @@ public void processElement(ProcessContext c) {
     // write to GCS
     PCollection<String> formattedResults = finalResultCollection
         .apply(ParDo.of(new DoFn<KV<String, String>, String>() {
+          private static final long serialVersionUID = 0;
+
           @Override
           public void processElement(ProcessContext c) {
             String outputstring = "Country code: " + c.element().getKey()
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java b/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java
index 2e7b7c977c78b..4378795108d02 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java
@@ -88,6 +88,8 @@ public void processElement(ProcessContext c) {
 
     static TableSchema getSchema() {
       return new TableSchema().setFields(new ArrayList<TableFieldSchema>() {
+            private static final long serialVersionUID = 0;
+
             // Compose the list of TableFieldSchema from tableSchema.
             {
               add(new TableFieldSchema().setName("string_field").setType("STRING"));
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
index b8dd973968d69..422ffdf7dbbf2 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
@@ -220,6 +220,8 @@ public PCollection<KV<String, KV<URI, Double>>> apply(
       PCollection<KV<URI, String>> uriToWords = uriToContent
           .apply(ParDo.named("SplitWords").of(
               new DoFn<KV<URI, String>, KV<URI, String>>() {
+                private static final long serialVersionUID = 0;
+
                 @Override
                 public void processElement(ProcessContext c) {
                   URI uri = c.element().getKey();
@@ -257,6 +259,8 @@ public void processElement(ProcessContext c) {
       // by the URI key.
       PCollection<KV<URI, KV<String, Long>>> uriToWordAndCount = uriAndWordToCount
           .apply(ParDo.of(new DoFn<KV<KV<URI, String>, Long>, KV<URI, KV<String, Long>>>() {
+            private static final long serialVersionUID = 0;
+
             @Override
             public void processElement(ProcessContext c) {
               URI uri = c.element().getKey().getKey();
@@ -295,6 +299,8 @@ public void processElement(ProcessContext c) {
       // divided by the total number of words in the document.
       PCollection<KV<String, KV<URI, Double>>> wordToUriAndTf = uriToWordAndCountAndTotal
           .apply(ParDo.of(new DoFn<KV<URI, CoGbkResult>, KV<String, KV<URI, Double>>>() {
+            private static final long serialVersionUID = 0;
+
             @Override
             public void processElement(ProcessContext c) {
               URI uri = c.element().getKey();
@@ -319,6 +325,8 @@ public void processElement(ProcessContext c) {
           .apply(ParDo
               .withSideInputs(totalDocuments)
               .of(new DoFn<KV<String, Long>, KV<String, Double>>() {
+                private static final long serialVersionUID = 0;
+
                 @Override
                 public void processElement(ProcessContext c) {
                   String word = c.element().getKey();
@@ -347,6 +355,8 @@ public void processElement(ProcessContext c) {
       // divided by the log of the document frequency.
       PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf = wordToUriAndTfAndDf
           .apply(ParDo.of(new DoFn<KV<String, CoGbkResult>, KV<String, KV<URI, Double>>>() {
+            private static final long serialVersionUID = 0;
+
             @Override
             public void processElement(ProcessContext c) {
               String word = c.element().getKey();
@@ -383,6 +393,8 @@ public WriteTfIdf(String output) {
     public PDone apply(PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf) {
       return wordToUriAndTfIdf
           .apply(ParDo.of(new DoFn<KV<String, KV<URI, Double>>, String>() {
+            private static final long serialVersionUID = 0;
+
             @Override
             public void processElement(ProcessContext c) {
               c.output(String.format("%s,\t%s,\t%f",
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
index 49de29a718b55..fd50326e076f0 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
@@ -120,6 +120,8 @@ public PCollection<List<KV<String, Long>>> apply(PCollection<KV<String, Long>> s
         .apply(Window.<KV<String, Long>>into(CalendarWindows.months(1)))
 
           .apply(Top.of(1, new SerializableComparator<KV<String, Long>>() {
+                    private static final long serialVersionUID = 0;
+
                     @Override
                     public int compare(KV<String, Long> o1, KV<String, Long> o2) {
                       return Long.compare(o1.getValue(), o2.getValue());
@@ -144,6 +146,8 @@ public PCollection<String> apply(PCollection<TableRow> input) {
 
           .apply(ParDo.named("SampleUsers").of(
               new DoFn<String, String>() {
+                private static final long serialVersionUID = 0;
+
                 @Override
                 public void processElement(ProcessContext c) {
                   if (Math.abs(c.element().hashCode()) <= Integer.MAX_VALUE * samplingThreshold) {
@@ -156,6 +160,8 @@ public void processElement(ProcessContext c) {
 
           .apply(ParDo.named("SessionsToStrings").of(
               new DoFn<KV<String, Long>, KV<String, Long>>() {
+                private static final long serialVersionUID = 0;
+
                 @Override
                 public void processElement(ProcessContext c) {
                   c.output(KV.of(
@@ -168,6 +174,8 @@ public void processElement(ProcessContext c) {
 
           .apply(ParDo.named("FormatOutput").of(
               new DoFn<List<KV<String, Long>>, String>() {
+                private static final long serialVersionUID = 0;
+
                 @Override
                 public void processElement(ProcessContext c) {
                   for (KV<String, Long> item : c.element()) {
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/AutoCompleteTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/AutoCompleteTest.java
index 0a91e40e236b6..5887f9e31575c 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/AutoCompleteTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/AutoCompleteTest.java
@@ -89,6 +89,8 @@ public void testAutoComplete() {
       input.apply(new ComputeTopCompletions(2, recursive))
            .apply(Filter.by(
                         new SerializableFunction<KV<String, List<CompletionCandidate>>, Boolean>() {
+                          private static final long serialVersionUID = 0;
+
                           public Boolean apply(KV<String, List<CompletionCandidate>> element) {
                             return element.getKey().length() <= 2;
                           }
@@ -174,6 +176,8 @@ private static class ReifyTimestamps<T>
 
     public PCollection<T> apply(PCollection<TimestampedValue<T>> input) {
       return input.apply(ParDo.of(new DoFn<TimestampedValue<T>, T>() {
+        private static final long serialVersionUID = 0;
+
         @Override
         public void processElement(ProcessContext c) {
           c.outputWithTimestamp(c.element().getValue(), c.element().getTimestamp());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
index 765b2c973b40b..567c94a3100b6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
@@ -52,12 +52,16 @@ public String toString() {
   protected StringDelegateCoder(final Class<T> clazz) {
     super(StringUtf8Coder.of(),
       new CodingFunction<T, String>() {
+        private static final long serialVersionUID = 0;
+
         @Override
         public String apply(T input) {
           return input.toString();
         }
       },
       new CodingFunction<String, T>() {
+        private static final long serialVersionUID = 0;
+
         @Override
         public T apply(String input) throws
             NoSuchMethodException,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
index ef249c555fe38..2224410215877 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
@@ -98,7 +98,9 @@ public static <T> Reader<T> create(@Nullable PipelineOptions options, Source clo
     }
 
     try {
-      return InstanceBuilder.ofType(new TypeToken<Reader<T>>() {})
+      return InstanceBuilder.ofType(new TypeToken<Reader<T>>() {
+          private static final long serialVersionUID = 0;
+      })
           .fromClassName(sourceFactoryClassName)
           .fromFactoryMethod("create")
           .withArg(PipelineOptions.class, options)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
index 29f2391afdd5b..d3b397161e842 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
@@ -155,6 +155,8 @@ public CountSum createAccumulator() {
     public Coder<CountSum> getAccumulatorCoder(
         CoderRegistry registry, Coder<N> inputCoder) {
       return new CustomCoder<CountSum> () {
+        private static final long serialVersionUID = 0;
+
         @Override
         public void encode(CountSum value, OutputStream outStream, Coder.Context context)
             throws CoderException, IOException {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java
index e21b854493ff6..1e8bd711e5b35 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java
@@ -41,11 +41,15 @@ public class DelegateCoderTest {
   private static final Coder<Set<Integer>> coder = DelegateCoder.of(
       ListCoder.of(VarIntCoder.of()),
       new DelegateCoder.CodingFunction<Set<Integer>, List<Integer>>() {
+        private static final long serialVersionUID = 0;
+
         public List<Integer> apply(Set<Integer> input) {
           return Lists.newArrayList(input);
         }
       },
       new DelegateCoder.CodingFunction<List<Integer>, Set<Integer>>() {
+        private static final long serialVersionUID = 0;
+
         public Set<Integer> apply(List<Integer> input) {
           return Sets.newHashSet(input);
         }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
index b708ecd6e0e25..ccaf402b64930 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
@@ -142,6 +142,8 @@ public void testSerializablePredicate() throws Exception {
 
     DataflowAssert.that(pcollection).satisfies(
         new SerializableFunction<Iterable<NotSerializableObject>, Void>() {
+          private static final long serialVersionUID = 0;
+
           @Override
           public Void apply(Iterable<NotSerializableObject> contents) {
             return (Void) null; // no problem!

From b49483e522dfba61eefbbbc4f9e18c376179dd65 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Mon, 2 Mar 2015 18:05:42 -0800
Subject: [PATCH 0208/1541] Adds a start() method to custom source Reader.

This is a backwards incompatible change. Current Readers can be fixed by adding a start() method and calling advance() once in that method, i.e.,

@Override
public boolean start() throws IOException {
  return advance();
}

Code that uses readers should be updated to following form:

for (boolean available = reader.start(); available; available = reader.advance()) {
 T value = reader.getCurrent();
 ...
}

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87570381
---
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |  5 +++
 .../google/cloud/dataflow/sdk/io/Source.java  | 32 +++++++++++++++----
 .../BasicSerializableSourceFormat.java        |  8 +++--
 .../BasicSerializableSourceFormatTest.java    |  7 +++-
 4 files changed, 43 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index a32691a9d525e..8a85429287c08 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -585,6 +585,11 @@ public Entity getCurrent() {
       return currentEntity;
     }
 
+    @Override
+    public boolean start() throws IOException {
+      return advance();
+    }
+
     @Override
     public boolean advance() throws IOException {
       if (entities == null || (!entities.hasNext() && moreResults)) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
index cfc2820aef5c3..a4bf54f40651f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
@@ -114,10 +114,10 @@ protected Reader<T> createBasicReader(PipelineOptions options, Coder<T> coder,
    * the current model tends to be easier to program and more efficient in practice
    * for iterating over sources such as files, databases etc. (rather than pure collections).
    * <p>
-   * To read a {@code SourceIterator}:
+   * To read a {@code Reader}:
    * <pre>
-   * while (iterator.advance()) {
-   *   T item = iterator.getCurrent();
+   * for (boolean available = reader.start(); available; available = reader.advance()) {
+   *   T item = reader.getCurrent();
    *   ...
    * }
    * </pre>
@@ -125,6 +125,18 @@ protected Reader<T> createBasicReader(PipelineOptions options, Coder<T> coder,
    * Note: this interface is work-in-progress and may change.
    */
   public interface Reader<T> extends AutoCloseable {
+
+    /**
+     * Initializes the reader and advances the reader to the first record.
+     *
+     * <p> This method should be called exactly once. The invocation should occur prior to calling
+     * {@link #advance} or {@link #getCurrent}. This method may perform expensive operations that
+     * are needed to initialize the reader.
+     *
+     * @return {@code true} if a record was read, {@code false} if we're at the end of input.
+     */
+    public boolean start() throws IOException;
+
     /**
      * Advances the iterator to the next valid record.
      * Invalidates the result of the previous {@link #getCurrent} call.
@@ -133,9 +145,12 @@ public interface Reader<T> extends AutoCloseable {
     public boolean advance() throws IOException;
 
     /**
-     * Returns the value of the data item which was read by the last {@link #advance} call.
-     * @throws java.util.NoSuchElementException if the iterator is at the beginning of the input
-     *   and {@link #advance} wasn't called, or if the last {@link #advance} returned {@code false}.
+     * Returns the value of the data item which was read by the last {@link #start} or
+     * {@link #advance} call.
+     *
+     * @throws java.util.NoSuchElementException if the iterator is at the beginning of the input and
+     *         {@link #start} or {@link #advance} wasn't called, or if the last {@link #start} or
+     *         {@link #advance} returned {@code false}.
      */
     public T getCurrent() throws NoSuchElementException;
 
@@ -156,6 +171,11 @@ public WindowedReaderWrapper(Reader<T> reader) {
       this.reader = reader;
     }
 
+    @Override
+    public boolean start() throws IOException {
+      return reader.start();
+    }
+
     @Override
     public boolean advance() throws IOException {
       return reader.advance();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index 5c31954165ab7..04e572ee80f33 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -187,7 +187,7 @@ public static <T> void evaluateReadHelper(
       try (Source.Reader<WindowedValue<T>> reader =
           source.createWindowedReader(context.getPipelineOptions(),
               WindowedValue.getValueOnlyCoder(source.getDefaultOutputCoder()), null)) {
-        while (reader.advance()) {
+        for (boolean available = reader.start(); available; available = reader.advance()) {
           elems.add(reader.getCurrent());
         }
       }
@@ -253,7 +253,11 @@ public T next() throws IOException {
 
     private void advanceInternal() throws IOException {
       try {
-        hasNext = reader.advance();
+        if (!advanced) {
+          hasNext = reader.start();
+        } else {
+          hasNext = reader.advance();
+        }
         if (hasNext) {
           next = reader.getCurrent();
         }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 76e2f9c953e77..f8626c47885df 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -121,7 +121,12 @@ private class RangeReader implements Reader<Integer> {
         private int current;
 
         public RangeReader(Read source) {
-          this.current = source.from - 1;
+          this.current = source.from;
+        }
+
+        @Override
+        public boolean start() throws IOException {
+          return (current < to);
         }
 
         @Override

From e0c99556d5b18fc10746d92617ac402955e1576b Mon Sep 17 00:00:00 2001
From: earhart <earhart@google.com>
Date: Mon, 2 Mar 2015 21:23:39 -0800
Subject: [PATCH 0209/1541] Switch warnings to be errors, and suppress current
 warnings. [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=87580507

---
 pom.xml | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 0f9ac499f832e..e8d531f600c93 100644
--- a/pom.xml
+++ b/pom.xml
@@ -81,9 +81,24 @@
           <configuration>
             <source>1.7</source>
             <target>1.7</target>
-            <compilerArgument>-Xlint:all</compilerArgument>
+            <compilerArgs>
+              <arg>-Xlint:all</arg>
+              <arg>-Werror</arg>
+              <!-- Temporary lint overrides, to be removed over time. -->
+              <arg>-Xlint:-cast</arg>
+              <arg>-Xlint:-deprecation</arg>
+              <arg>-Xlint:-processing</arg>
+              <arg>-Xlint:-rawtypes</arg>
+              <arg>-Xlint:-try</arg>
+              <arg>-Xlint:-unchecked</arg>
+              <arg>-Xlint:-varargs</arg>
+              <!-- Uncomment the following args to display more warnings.  -->
+              <!-- -Xmaxwarns -->
+              <!-- 10000 -->
+            </compilerArgs>
             <showWarnings>true</showWarnings>
-            <showDeprecation>true</showDeprecation>
+            <!-- Another temp override, to be set to true in due course. -->
+            <showDeprecation>false</showDeprecation>
           </configuration>
         </plugin>
 

From db05c447a439a40a542c0d08f9bd9241e9dfee60 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 3 Mar 2015 14:18:24 -0800
Subject: [PATCH 0210/1541] Do not transport options which control credential
 generation to the worker since the worker does not understand them.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87647672
---
 .../com/google/cloud/dataflow/sdk/options/GcpOptions.java     | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
index a51336ae8986a..a884d3ec7ebb4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
@@ -70,6 +70,7 @@ public interface GcpOptions extends GoogleApiDebugOptions, PipelineOptions {
    * This option controls which file to use when attempting to create the credentials using the
    * OAuth 2 webflow.
    */
+  @JsonIgnore
   @Description("Path to a file containing Google API secret")
   String getSecretsFile();
   void setSecretsFile(String value);
@@ -81,6 +82,7 @@ public interface GcpOptions extends GoogleApiDebugOptions, PipelineOptions {
    * This option if specified, needs be combined with the
    * {@link GcpOptions#getServiceAccountName() serviceAccountName}.
    */
+  @JsonIgnore
   @Description("Path to a file containing the P12 service credentials")
   String getServiceAccountKeyfile();
   void setServiceAccountKeyfile(String value);
@@ -92,10 +94,12 @@ public interface GcpOptions extends GoogleApiDebugOptions, PipelineOptions {
    * This option if specified, needs be combined with the
    * {@link GcpOptions#getServiceAccountKeyfile() serviceAccountKeyfile}.
    */
+  @JsonIgnore
   @Description("Name of the service account for Google APIs")
   String getServiceAccountName();
   void setServiceAccountName(String value);
 
+  @JsonIgnore
   @Description("The path to the gcloud binary. "
       + " Default is to search the system path.")
   String getGCloudPath();

From e63dd15137d8348aff6d7629163402de08876267 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 3 Mar 2015 17:29:33 -0800
Subject: [PATCH 0211/1541] Add additional documentation about worker logger
 configuration from the command line. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=87666318

---
 .../sdk/options/DataflowWorkerLoggingOptions.java    | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
index e14b97e718d24..a38cd44ab7e02 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
@@ -47,10 +47,12 @@ public enum Level {
    * Later options with equivalent names override earlier options.
    * <p>
    * See {@link WorkerLogLevelOverride} for more information on how to configure logging
-   * on a per {@link Class}, {@link Package}, or name basis.
+   * on a per {@link Class}, {@link Package}, or name basis. If used from the command line,
+   * the expected format is {@code Name#Level}, further details on
+   * {@link WorkerLogLevelOverride#create(String)}.
    */
   WorkerLogLevelOverride[] getWorkerLogLevelOverrides();
-  void setWorkerLogLevelOverrides(WorkerLogLevelOverride[] string);
+  void setWorkerLogLevelOverrides(WorkerLogLevelOverride[] workerLogLevelOverrides);
 
   /**
    * Defines a log level override for a specific class, package, or name.
@@ -111,7 +113,11 @@ public static WorkerLogLevelOverride forName(String name, Level level) {
     }
 
     /**
-     * Expects a value of the form {@code Name#Level}.
+     * Expects a value of the form {@code Name#Level}. The {@code Name} generally
+     * represents the fully qualified Java {@link Class#getName() class name},
+     * or fully qualified Java {@link Package#getName() package name}, or custom
+     * logger name. The {@code Level} represents the log level and must be one
+     * of {@link Level}.
      */
     @JsonCreator
     public static WorkerLogLevelOverride create(String value) {

From 95c496626e69e53d56a9f7af01ec70513f828d95 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Tue, 3 Mar 2015 17:42:48 -0800
Subject: [PATCH 0212/1541] Use a custom UncaughtExceptionHandler in main in
 addition to on worker threads in the StreamingDataflowWorker

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87667368
---
 .../sdk/runners/worker/DataflowWorkerHarness.java   |  2 +-
 .../sdk/runners/worker/StreamingDataflowWorker.java | 13 +++----------
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index 96bce707b6111..a078d754abb1f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -92,7 +92,7 @@ public void uncaughtException(Thread t, Throwable e) {
    * Fetches and processes work units from the Dataflow service.
    */
   public static void main(String[] args) throws Exception {
-    Thread.currentThread().setUncaughtExceptionHandler(WorkerUncaughtExceptionHandler.INSTANCE);
+    Thread.setDefaultUncaughtExceptionHandler(WorkerUncaughtExceptionHandler.INSTANCE);
     DataflowWorkerLoggingInitializer.initialize();
 
     DataflowWorkerHarnessOptions pipelineOptions =
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 612c5646c9ec1..05c7722aeb58b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -93,6 +93,9 @@ static MapTask parseMapTask(String input) throws IOException {
   }
 
   public static void main(String[] args) throws Exception {
+    Thread.setDefaultUncaughtExceptionHandler(
+        DataflowWorkerHarness.WorkerUncaughtExceptionHandler.INSTANCE);
+
     DataflowWorkerLoggingInitializer.initialize();
     DataflowWorkerHarnessOptions options =
         PipelineOptionsFactory.createFromSystemProperties();
@@ -153,19 +156,9 @@ public StreamingDataflowWorker(
       addComputation(mapTask);
     }
     this.threadFactory = new ThreadFactory() {
-        private final Thread.UncaughtExceptionHandler handler =
-            new Thread.UncaughtExceptionHandler() {
-              @Override
-              public void uncaughtException(Thread thread, Throwable e) {
-                LOG.error("Uncaught exception: ", e);
-                System.exit(1);
-              }
-            };
-
         @Override
         public Thread newThread(Runnable r) {
           Thread t = new Thread(r);
-          t.setUncaughtExceptionHandler(handler);
           t.setDaemon(true);
           return t;
         }

From 7ddf5b12e4590a995da3be3cf600f708c14f8b7f Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Tue, 3 Mar 2015 17:57:24 -0800
Subject: [PATCH 0213/1541] Fix off-by-one bug in GroupAlsoByWindowsDoFn
 iterator logic.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87668345
---
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  2 +-
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  | 43 +++++++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index bd90a7f85690b..86876bfcf5bdc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -261,7 +261,7 @@ public void remove() {
     private void skipToValidElement() {
       while (iterator.hasNext()) {
         WindowedValue<V> peek = iterator.peek();
-        if (!peek.getTimestamp().isBefore(window.maxTimestamp())) {
+        if (peek.getTimestamp().isAfter(window.maxTimestamp())) {
           // We are past the end of this window, so there can't be any more
           // elements in this iterator.
           break;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index 4e60e41df72a0..677dac3ff8b0e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -161,6 +161,49 @@ public class GroupAlsoByWindowsDoFnTest {
         Matchers.contains(window(10, 30)));
   }
 
+  @Test public void testDiscontiguousWindows() throws Exception {
+    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
+        KV<String, Iterable<String>>, List> runner =
+        makeRunner(FixedWindows.<String>of(Duration.millis(10)));
+
+    runner.startBundle();
+
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        KV.of("k", (Iterable<WindowedValue<String>>) Arrays.asList(
+            WindowedValue.of(
+                "v1",
+                new Instant(1),
+                Arrays.asList(window(0, 5))),
+            WindowedValue.of(
+                "v2",
+                new Instant(4),
+                Arrays.asList(window(1, 5))),
+            WindowedValue.of(
+                "v3",
+                new Instant(4),
+                Arrays.asList(window(0, 5)))))));
+
+    runner.finishBundle();
+
+    List<WindowedValue<KV<String, Iterable<String>>>> result = runner.getReceiver(outputTag);
+
+    assertEquals(2, result.size());
+
+    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
+    assertEquals("k", item0.getValue().getKey());
+    assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v1", "v3"));
+    assertEquals(new Instant(4), item0.getTimestamp());
+    assertThat(item0.getWindows(),
+        Matchers.contains(window(0, 5)));
+
+    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
+    assertEquals("k", item1.getValue().getKey());
+    assertThat(item1.getValue().getValue(), Matchers.contains("v2"));
+    assertEquals(new Instant(4), item1.getTimestamp());
+    assertThat(item1.getWindows(),
+        Matchers.contains(window(1, 5)));
+  }
+
   @Test public void testSessions() throws Exception {
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
         KV<String, Iterable<String>>, List> runner =

From 573b31e0819497a1b249dfbabd47823237254fc0 Mon Sep 17 00:00:00 2001
From: zhouyunqing <zhouyunqing@google.com>
Date: Wed, 4 Mar 2015 01:46:37 -0800
Subject: [PATCH 0214/1541] Change the example in the comment to use
 non-deprecated pCollections instead of deprecated Flatten.create().

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87691484
---
 .../java/com/google/cloud/dataflow/sdk/transforms/Flatten.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index d0cf77a5a2bdc..c2a10d375b0d3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -40,7 +40,7 @@
  * PCollection<String> pc2 = ...;
  * PCollection<String> pc3 = ...;
  * PCollectionList<String> pcs = PCollectionList.of(pc1).and(pc2).and(pc3);
- * PCollection<String> merged = pcs.apply(Flatten.<String>.create());
+ * PCollection<String> merged = pcs.apply(Flatten.<String>.pCollections());
  * } </pre>
  *
  * <p> By default, the {@code Coder} of the output {@code PCollection}

From a4c45171e69e061f30fe18fd80e78caaaf91523a Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 4 Mar 2015 10:20:10 -0800
Subject: [PATCH 0215/1541] Simplify the worker logging configuration for
 overrides.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87721993
---
 .../logging/DataflowWorkerLoggingInitializer.java     |  7 +------
 .../logging/DataflowWorkerLoggingInitializerTest.java | 11 ++++-------
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
index 23cbc8d44b950..b285586646da6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
@@ -114,16 +114,11 @@ public static synchronized void configure(DataflowWorkerLoggingOptions options)
       LogManager.getLogManager().getLogger(ROOT_LOGGER_NAME).setLevel(
           LEVELS.inverse().get(options.getDefaultWorkerLogLevel()));
     }
-    /* We store a reference to all the custom loggers the user configured.
-     * To make sure that these custom levels override the default logger level,
-     * we break the parent chain and have the logger directly pass log records
-     * to the file handler. */
+
     if (options.getWorkerLogLevelOverrides() != null) {
       for (WorkerLogLevelOverride loggerOverride : options.getWorkerLogLevelOverrides()) {
         Logger logger = Logger.getLogger(loggerOverride.getName());
-        logger.setUseParentHandlers(false);
         logger.setLevel(LEVELS.inverse().get(loggerOverride.getLevel()));
-        logger.addHandler(fileHandler);
         configuredLoggers.add(logger);
       }
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
index 5d6fa6687a1d3..665c4f129a421 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
@@ -17,7 +17,6 @@
 package com.google.cloud.dataflow.sdk.runners.worker.logging;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions;
@@ -87,16 +86,14 @@ public void testWithCustomLogLevels() {
     DataflowWorkerLoggingInitializer.configure(options);
 
     Logger aLogger = LogManager.getLogManager().getLogger("A");
-    assertEquals(1, aLogger.getHandlers().length);
+    assertEquals(0, aLogger.getHandlers().length);
     assertEquals(Level.FINE, aLogger.getLevel());
-    assertFalse(aLogger.getUseParentHandlers());
-    assertTrue(isFileHandler(aLogger.getHandlers()[0], Level.ALL));
+    assertTrue(aLogger.getUseParentHandlers());
 
     Logger bLogger = LogManager.getLogManager().getLogger("B");
-    assertEquals(1, bLogger.getHandlers().length);
     assertEquals(Level.SEVERE, bLogger.getLevel());
-    assertFalse(bLogger.getUseParentHandlers());
-    assertTrue(isFileHandler(bLogger.getHandlers()[0], Level.ALL));
+    assertEquals(0, bLogger.getHandlers().length);
+    assertTrue(aLogger.getUseParentHandlers());
   }
 
   private boolean isFileHandler(Handler handler, Level level) {

From 95abb3fc7c36263406cb5ed6c62b5411c87886ae Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Wed, 4 Mar 2015 11:44:01 -0800
Subject: [PATCH 0216/1541] Cap TestPipeline job names at 40 chars; they'll
 throw an IllegalArgumentException otherwise.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87732028
---
 .../cloud/dataflow/sdk/testing/TestPipeline.java    |  3 ++-
 .../dataflow/sdk/testing/TestPipelineTest.java      | 13 +++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
index dd8eb8866a470..7545e7ee10521 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
@@ -136,7 +136,8 @@ private static String getAppName() {
   private static String getJobName() {
     Optional<StackTraceElement> stackTraceElement = findCallersStackTrace();
     if (stackTraceElement.isPresent()) {
-      return stackTraceElement.get().getMethodName();
+      String name = stackTraceElement.get().getMethodName();
+      return name.substring(0, Math.min(40, name.length()));
     }
     return "unittestjob";
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
index 730efdd677fa3..e01b8b5c2b54d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
@@ -71,4 +71,17 @@ public void testCreationOfPipelineOptions() throws Exception {
     assertEquals("testZone", options.getZone());
     assertEquals(2, options.getDiskSizeGb());
   }
+
+  @Test
+  public void testCreationOfPipelineOptionsFromReallyVerboselyNamedTestCase() throws Exception {
+        ObjectMapper mapper = new ObjectMapper();
+    String stringOptions = mapper.writeValueAsString(
+        ImmutableMap.of("options",
+          ImmutableMap.<String, String>builder()
+          .build()));
+    System.getProperties().put("dataflowOptions", stringOptions);
+    TestDataflowPipelineOptions options = TestPipeline.getPipelineOptions();
+    assertEquals("TestPipelineTest", options.getAppName());
+    assertEquals("testCreationOfPipelineOptionsFromReallyV", options.getJobName());
+  }
 }

From f5c63011acac19a77f7ae5d8483eefebff9af59b Mon Sep 17 00:00:00 2001
From: sisk <sisk@google.com>
Date: Wed, 4 Mar 2015 16:36:28 -0800
Subject: [PATCH 0217/1541] Add code to one of the examples demonstrating how
 to properly log from a dataflow job.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87766081
---
 .../com/google/cloud/dataflow/examples/TfIdf.java | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
index 422ffdf7dbbf2..79033eb9eede2 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
@@ -51,6 +51,9 @@
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import java.io.File;
 import java.io.IOException;
 import java.net.URI;
@@ -61,7 +64,7 @@
 /**
  * An example that computes a basic TF-IDF search table for a directory or GCS prefix.
  *
- * <p> Concepts: joining data; side inputs
+ * <p> Concepts: joining data; side inputs; logging
  *
  * <p> To execute this pipeline locally, specify general pipeline configuration:
  *   --project=<PROJECT ID>
@@ -227,6 +230,11 @@ public void processElement(ProcessContext c) {
                   URI uri = c.element().getKey();
                   String line = c.element().getValue();
                   for (String word : line.split("\\W+")) {
+                    // Log INFO messages when the word “love” is found.
+                    if (word.toLowerCase().equals("love")) {
+                      LOG.info("Found {}", word.toLowerCase());
+                    }
+
                     if (!word.isEmpty()) {
                       c.output(KV.of(uri, word.toLowerCase()));
                     }
@@ -373,6 +381,11 @@ public void processElement(ProcessContext c) {
 
       return wordToUriAndTfIdf;
     }
+
+    // Instantiate Logger.
+    // It is suggested that the user specify the class name of the containing class
+    // (in this case ComputeTfIdf).
+    private static final Logger LOG = LoggerFactory.getLogger(ComputeTfIdf.class);
   }
 
   /**

From 474eb6ba5b63e2e1c2fdc2a71fe626132a9755f9 Mon Sep 17 00:00:00 2001
From: vanya <vanya@google.com>
Date: Wed, 4 Mar 2015 17:12:55 -0800
Subject: [PATCH 0218/1541] Do not stop ignoring HTTP 307/308 when extra
 ignored status codes are provided.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87769846
---
 .../cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
index 1301d6c010fc7..c0278e1ba5f0f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
@@ -137,11 +137,11 @@ public RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained) {
   }
 
   public RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained,
-      NanoClock nanoClock, Sleeper sleeper, Collection<Integer> ignoredResponseCodes) {
+      NanoClock nanoClock, Sleeper sleeper, Collection<Integer> additionalIgnoredResponseCodes) {
     this.chained = chained;
     this.nanoClock = nanoClock;
     this.sleeper = sleeper;
-    this.ignoredResponseCodes = new HashSet<>(ignoredResponseCodes);
+    this.ignoredResponseCodes.addAll(additionalIgnoredResponseCodes);
   }
 
   @Override

From 636b38795326ffd9e6fcab63786536ac5697d314 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Wed, 4 Mar 2015 21:07:37 -0800
Subject: [PATCH 0219/1541] CL resubmit: Windowing: java worker code to handle
 CombineFn in StreamingGroupAlsoByWindowsDoFn

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87786512
---
 .../worker/GroupAlsoByWindowsParDoFn.java     |  21 ++-
 .../dataflow/sdk/transforms/Combine.java      |   2 +-
 .../cloud/dataflow/sdk/transforms/DoFn.java   |   5 +
 .../sdk/util/BatchModeExecutionContext.java   |   8 +
 .../dataflow/sdk/util/CombiningWindowSet.java | 140 ++++++++++++++++++
 .../dataflow/sdk/util/DoFnProcessContext.java |  14 +-
 .../util/StreamingGroupAlsoByWindowsDoFn.java |  15 +-
 .../util/StreamingModeExecutionContext.java   |   6 +
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  84 +++++++++--
 9 files changed, 274 insertions(+), 21 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index c3434b75d9834..eb05e336bfb9c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -19,6 +19,7 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.getBytes;
 import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
 
+import com.google.api.client.util.Preconditions;
 import com.google.api.services.dataflow.model.MultiOutputInfo;
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -73,13 +74,15 @@ public static GroupAlsoByWindowsParDoFn create(
     }
 
     byte[] serializedCombineFn = getBytes(cloudUserFn, PropertyNames.COMBINE_FN, null);
-    Object combineFn = null;
+    final Object combineFn;
     if (serializedCombineFn != null) {
       combineFn =
           SerializableUtils.deserializeFromByteArray(serializedCombineFn, "serialized combine fn");
       if (!(combineFn instanceof KeyedCombineFn)) {
         throw new Exception("unexpected kind of KeyedCombineFn: " + combineFn.getClass().getName());
       }
+    } else {
+      combineFn = null;
     }
 
     Map<String, Object> inputCoderObject = getObject(cloudUserFn, PropertyNames.INPUT_CODER);
@@ -106,18 +109,26 @@ public static GroupAlsoByWindowsParDoFn create(
       fnFactory = new DoFnInfoFactory() {
         @Override
         public DoFnInfo createDoFnInfo() {
-          return new DoFnInfo(StreamingGroupAlsoByWindowsDoFn.create(
-              (WindowFn) windowFn,
-              ((KvCoder) elemCoder).getValueCoder()),
+          return new DoFnInfo(
+              StreamingGroupAlsoByWindowsDoFn.create(
+                  (WindowFn) windowFn,
+                  (KeyedCombineFn) combineFn,
+                  ((KvCoder) elemCoder).getValueCoder()),
               null);
         }
       };
     } else {
+      //TODO: handle CombineFn in batch GroupAlsoByWindowsDoFn.
+      Preconditions.checkArgument(
+          combineFn == null,
+          "combineFn is expected to be null in batch, but it is " + combineFn);
       fnFactory = new DoFnInfoFactory() {
         @Override
         public DoFnInfo createDoFnInfo() {
           return new DoFnInfo(
-              new GroupAlsoByWindowsDoFn((WindowFn) windowFn, elemCoder),
+              new GroupAlsoByWindowsDoFn(
+                  (WindowFn) windowFn,
+                  elemCoder),
               null);
         }
       };
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 14ff00c49f810..3e7f2895631df 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -818,7 +818,7 @@ protected String getKindString() {
    * {@link #perKey(SerializableFunction)}, and
    * {@link #groupedValues(SerializableFunction)}.
    */
-  static class SimpleCombineFn<V> extends CombineFn<V, List<V>, V> {
+  public static class SimpleCombineFn<V> extends CombineFn<V, List<V>, V> {
     /**
      * Returns a {@code CombineFn} that uses the given
      * {@code SerializableFunction} to combine values.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index fa5fc4ed1ed30..3b71738ad7970 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -281,6 +281,11 @@ public interface KeyedState {
      */
     public <T> void store(CodedTupleTag<T> tag, T value) throws IOException;
 
+    /**
+     * Removes the data associated with the given tag from {@code KeyedState}.
+     */
+    public <T> void remove(CodedTupleTag<T> tag);
+
     /**
      * Returns the value associated with the given tag in this
      * {@code KeyedState}, or {@code null} if the tag has no asssociated
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
index f524b23ad1c78..051fe792db9cd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
@@ -106,6 +106,14 @@ public <T> void store(CodedTupleTag<T> tag, T value) {
       perKeyState.put(tag, value);
     }
 
+    @Override
+    public <T> void remove(CodedTupleTag<T> tag) {
+      Map<CodedTupleTag<?>, Object> perKeyState = state.get(getKey());
+      if (perKeyState != null) {
+        perKeyState.remove(tag);
+      }
+    }
+
     @Override
     public CodedTupleTagMap lookup(List<? extends CodedTupleTag<?>> tags) {
       Map<CodedTupleTag<?>, Object> perKeyState = state.get(getKey());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
new file mode 100644
index 0000000000000..fe754bcb7cd96
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.cloud.dataflow.sdk.util.WindowUtils.bufferTag;
+
+import com.google.api.client.util.Lists;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.collect.Iterators;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * A WindowSet for combine accumulators.
+ * It merges accumulators when windows are added or merged.
+ *
+ * @param <K> key tyep
+ * @param <VA> accumulator type
+ * @param <W> window type
+ */
+public class CombiningWindowSet<K, VA, W extends BoundedWindow>
+    extends AbstractWindowSet<K, VA, VA, W> {
+
+  private final CodedTupleTag<Iterable<W>> windowListTag =
+      CodedTupleTag.of("liveWindowsList", IterableCoder.of(windowFn.windowCoder()));
+
+  private final KeyedCombineFn<K, ?, VA, ?> combineFn;
+  private final Set<W> liveWindows;
+  private boolean liveWindowsModified;
+
+  protected CombiningWindowSet(
+      K key,
+      WindowFn<?, W> windowFn,
+      KeyedCombineFn<K, ?, VA, ?> combineFn,
+      Coder<VA> inputCoder,
+      DoFnProcessContext<?, KV<K, VA>> context,
+      ActiveWindowManager<W> activeWindowManager) throws Exception {
+    super(key, windowFn, inputCoder, context, activeWindowManager);
+    this.combineFn = combineFn;
+    liveWindows = new HashSet<W>();
+    Iterators.addAll(liveWindows,
+                     emptyIfNull(context.keyedState().lookup(windowListTag)).iterator());
+    liveWindowsModified = false;
+  }
+
+  @Override
+  protected Collection<W> windows() {
+    return Collections.unmodifiableSet(liveWindows);
+  }
+
+  @Override
+  protected VA finalValue(W window) throws Exception {
+    return context.keyedState().lookup(
+        bufferTag(window, windowFn.windowCoder(), inputCoder));
+  }
+
+  @Override
+  protected void put(W window, VA value) throws Exception {
+    CodedTupleTag<VA> tag = bufferTag(window, windowFn.windowCoder(), inputCoder);
+    VA va = context.keyedState().lookup(tag);
+    VA newValue;
+    if (va == null) {
+      newValue = value;
+    } else {
+      newValue = combineFn.mergeAccumulators(key, Arrays.asList(value, va));
+    }
+    context.keyedState().store(tag, newValue);
+    activeWindowManager.addWindow(window);
+    liveWindowsModified = liveWindows.add(window);
+  }
+
+  @Override
+  protected void remove(W window) throws Exception {
+    context.keyedState().remove(bufferTag(window, windowFn.windowCoder(), inputCoder));
+    activeWindowManager.addWindow(window);
+    liveWindowsModified = liveWindows.remove(window);
+  }
+
+  @Override
+  protected void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
+    List<VA> accumulators = Lists.newArrayList();
+    for (W w : toBeMerged) {
+      VA va = context.keyedState().lookup(
+          bufferTag(w, windowFn.windowCoder(), inputCoder));
+      // TODO: determine whether null means no value associated with the tag, b/19201776.
+      if (va != null) {
+        accumulators.add(va);
+      }
+      remove(w);
+    }
+    VA mergedVa = combineFn.mergeAccumulators(key, accumulators);
+    put(mergeResult, mergedVa);
+  }
+
+  @Override
+  protected boolean contains(W window) {
+    return liveWindows.contains(window);
+  }
+
+  private static <T> Iterable<T> emptyIfNull(Iterable<T> list) {
+    if (list == null) {
+      return Collections.emptyList();
+    } else {
+      return list;
+    }
+  }
+
+  @Override
+  protected void flush() throws Exception {
+    if (liveWindowsModified) {
+      context.keyedState().store(windowListTag, liveWindows);
+      liveWindowsModified = false;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
index dc6ea63faa776..9470e76c5762c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
@@ -68,7 +68,7 @@ public I element() {
   @Override
   public KeyedState keyedState() {
     if (!(fn instanceof RequiresKeyedState)
-        || (element() != null && !(element() instanceof KV))) {
+        || !equivalentToKV(element())) {
       throw new UnsupportedOperationException(
           "Keyed state is only available in the context of a keyed DoFn marked as requiring state");
     }
@@ -139,4 +139,16 @@ private void checkTimestamp(Instant timestamp) {
     Preconditions.checkArgument(
         !timestamp.isBefore(windowedValue.getTimestamp().minus(fn.getAllowedTimestampSkew())));
   }
+
+  private boolean equivalentToKV(I input) {
+    if (input == null) {
+      return true;
+    } else if (input instanceof KV) {
+      return true;
+    } else if (input instanceof TimerOrElement) {
+      return ((TimerOrElement) input).isTimer()
+          || ((TimerOrElement) input).element() instanceof KV;
+    }
+    return false;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 01503f783a900..741cd7130bfaf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PartitioningWindowFn;
@@ -39,31 +40,39 @@ public class StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
     extends DoFn<TimerOrElement<KV<K, VI>>, KV<K, VO>> implements DoFn.RequiresKeyedState {
 
   protected WindowFn<?, W> windowFn;
+  protected KeyedCombineFn combineFn;
   protected Coder<VI> inputCoder;
 
   protected StreamingGroupAlsoByWindowsDoFn(
       WindowFn<?, W> windowFn,
+      KeyedCombineFn combineFn,
       Coder<VI> inputCoder) {
     this.windowFn = windowFn;
+    this.combineFn = combineFn;
     this.inputCoder = inputCoder;
   }
 
   public static <K, VI, VO, W extends BoundedWindow>
       StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W> create(
           WindowFn<?, W> windowFn,
+          KeyedCombineFn combineFn,
           Coder<VI> inputCoder) {
-    return new StreamingGroupAlsoByWindowsDoFn<>(windowFn, inputCoder);
+    return new StreamingGroupAlsoByWindowsDoFn<>(windowFn, combineFn, inputCoder);
   }
 
   private AbstractWindowSet<K, VI, VO, W> createWindowSet(
       K key,
       DoFnProcessContext<?, KV<K, VO>> context,
       AbstractWindowSet.ActiveWindowManager<W> activeWindowManager) throws Exception {
-    if (windowFn instanceof PartitioningWindowFn) {
+    if (combineFn != null) {
+      return new CombiningWindowSet(
+          key, windowFn, combineFn, inputCoder, context, activeWindowManager);
+    } else if (windowFn instanceof PartitioningWindowFn) {
       return new PartitionBufferingWindowSet(
           key, windowFn, inputCoder, context, activeWindowManager);
     } else {
-      return new BufferingWindowSet(key, windowFn, inputCoder, context, activeWindowManager);
+      return new BufferingWindowSet(
+          key, windowFn, inputCoder, context, activeWindowManager);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index 69f13f4de5709..61e00ac101bb5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -118,6 +118,12 @@ public <T> void store(CodedTupleTag<T> tag, T value) throws CoderException, IOEx
       stateCache.put(tag, KV.of(value, stream.toByteString()));
     }
 
+    @Override
+    public <T> void remove(CodedTupleTag<T> tag) {
+      // Write ByteString.EMPTY to indicate the value associated with the tag is removed.
+      stateCache.put(tag, KV.of(null, ByteString.EMPTY));
+    }
+
     @Override
     public CodedTupleTagMap lookup(List<? extends CodedTupleTag<?>> tags)
         throws CoderException, IOException {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index c1bb363bad2f8..5d5c03406f29c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -23,6 +23,10 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
@@ -63,7 +67,7 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
   @Test public void testEmpty() throws Exception {
     DoFnRunner<TimerOrElement<KV<String, String>>,
         KV<String, Iterable<String>>, List> runner =
-        makeRunner(FixedWindows.<String>of(Duration.millis(10)));
+        makeRunner(FixedWindows.<String>of(Duration.millis(10)), null);
 
     runner.startBundle();
 
@@ -77,7 +81,7 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
   @Test public void testFixedWindows() throws Exception {
     DoFnRunner<TimerOrElement<KV<String, String>>,
         KV<String, Iterable<String>>, List> runner =
-        makeRunner(FixedWindows.<String>of(Duration.millis(10)));
+        makeRunner(FixedWindows.<String>of(Duration.millis(10)), null);
 
     Coder<IntervalWindow> windowCoder = FixedWindows.<String>of(Duration.millis(10)).windowCoder();
 
@@ -135,7 +139,7 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
   @Test public void testSlidingWindows() throws Exception {
     DoFnRunner<TimerOrElement<KV<String, String>>,
         KV<String, Iterable<String>>, List> runner =
-        makeRunner(SlidingWindows.<String>of(Duration.millis(20)).every(Duration.millis(10)));
+        makeRunner(SlidingWindows.<String>of(Duration.millis(20)).every(Duration.millis(10)), null);
 
     Coder<IntervalWindow> windowCoder =
         SlidingWindows.<String>of(Duration.millis(10)).every(Duration.millis(10)).windowCoder();
@@ -200,7 +204,7 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
   @Test public void testSessions() throws Exception {
     DoFnRunner<TimerOrElement<KV<String, String>>,
         KV<String, Iterable<String>>, List> runner =
-        makeRunner(Sessions.<String>withGapDuration(Duration.millis(10)));
+        makeRunner(Sessions.<String>withGapDuration(Duration.millis(10)), null);
 
     Coder<IntervalWindow> windowCoder =
         Sessions.<String>withGapDuration(Duration.millis(10)).windowCoder();
@@ -256,16 +260,74 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
     assertThat(item1.getWindows(), Matchers.contains(window(15, 25)));
   }
 
+  @Test public void testSessionsCombine() throws Exception {
+    CombineFn combineFn = Combine.SimpleCombineFn.of(new Sum.SumLongFn());
+    DoFnRunner<TimerOrElement<KV<String, Iterable<Long>>>,
+        KV<String, Iterable<Long>>, List> runner =
+        makeRunner(Sessions.<String>withGapDuration(Duration.millis(10)),
+                   combineFn.asKeyedFn());
 
-  private DoFnRunner<TimerOrElement<KV<String, String>>,
-    KV<String, Iterable<String>>, List> makeRunner(
-        WindowFn<? super String, IntervalWindow> windowingStrategy) {
+    Coder<IntervalWindow> windowCoder =
+        Sessions.<String>withGapDuration(Duration.millis(10)).windowCoder();
 
-    StreamingGroupAlsoByWindowsDoFn<String, String, Iterable<String>, IntervalWindow> fn =
-        StreamingGroupAlsoByWindowsDoFn.create(windowingStrategy, StringUtf8Coder.of());
+    runner.startBundle();
 
-    DoFnRunner<TimerOrElement<KV<String, String>>,
-        KV<String, Iterable<String>>, List> runner =
+    runner.processElement(WindowedValue.of(
+        TimerOrElement.element(KV.of("k", (Iterable<Long>) Arrays.asList(1L))),
+        new Instant(0),
+        Arrays.asList(window(0, 10))));
+
+    runner.processElement(WindowedValue.of(
+        TimerOrElement.element(KV.of("k", (Iterable<Long>) Arrays.asList(2L))),
+        new Instant(5),
+        Arrays.asList(window(5, 15))));
+
+    runner.processElement(WindowedValue.of(
+        TimerOrElement.element(KV.of("k", (Iterable<Long>) Arrays.asList(3L))),
+        new Instant(15),
+        Arrays.asList(window(15, 25))));
+
+    runner.processElement(WindowedValue.of(
+        TimerOrElement.element(KV.of("k", (Iterable<Long>) Arrays.asList(4L))),
+        new Instant(3),
+        Arrays.asList(window(3, 13))));
+
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        TimerOrElement.<KV<String, Iterable<Long>>>timer(
+            windowToString((IntervalWindow) window(0, 15), windowCoder),
+            new Instant(14), "k")));
+
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        TimerOrElement.<KV<String, Iterable<Long>>>timer(
+            windowToString((IntervalWindow) window(15, 25), windowCoder),
+            new Instant(24), "k")));
+
+    runner.finishBundle();
+
+    List<WindowedValue<KV<String, Iterable<Long>>>> result = runner.getReceiver(outputTag);
+
+    assertEquals(2, result.size());
+
+    WindowedValue<KV<String, Iterable<Long>>> item0 = result.get(0);
+    assertEquals("k", item0.getValue().getKey());
+    assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder(7L));
+    assertEquals(new Instant(14), item0.getTimestamp());
+    assertThat(item0.getWindows(), Matchers.contains(window(0, 15)));
+
+    WindowedValue<KV<String, Iterable<Long>>> item1 = result.get(1);
+    assertEquals("k", item1.getValue().getKey());
+    assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder(3L));
+    assertEquals(new Instant(24), item1.getTimestamp());
+    assertThat(item1.getWindows(), Matchers.contains(window(15, 25)));
+  }
+
+  private DoFnRunner makeRunner(
+        WindowFn<? super String, IntervalWindow> windowingStrategy,
+        KeyedCombineFn combineFn) {
+    StreamingGroupAlsoByWindowsDoFn fn =
+        StreamingGroupAlsoByWindowsDoFn.create(windowingStrategy, combineFn, StringUtf8Coder.of());
+
+    DoFnRunner runner =
         DoFnRunner.createWithListOutputs(
             PipelineOptionsFactory.create(),
             fn,

From 01d143774ba0988b9afbcacb95a63a69f1ec93e8 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 5 Mar 2015 10:34:32 -0800
Subject: [PATCH 0220/1541] Slight improvement of RetryHttpRequestInitializer
 interface. [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=87841386

---
 .../sdk/util/RetryHttpRequestInitializer.java | 21 ++++++++++++++++++-
 .../cloud/dataflow/sdk/util/Transport.java    |  4 +---
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
index c0278e1ba5f0f..f01d5ffd62db6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
@@ -133,10 +133,29 @@ public boolean handleResponse(HttpRequest request, HttpResponse response,
    *                applied to HttpRequest initialization.  May be null.
    */
   public RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained) {
-    this(chained, NanoClock.SYSTEM, Sleeper.DEFAULT, Collections.<Integer>emptyList());
+    this(chained, Collections.<Integer>emptyList());
   }
 
+  /**
+   * @param chained a downstream HttpRequestInitializer, which will also be
+   *                applied to HttpRequest initialization.  May be null.
+   * @param additionalIgnoredResponseCodes a list of HTTP status codes which should not be logged.
+   */
   public RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained,
+      Collection<Integer> additionalIgnoredResponseCodes) {
+    this(chained, NanoClock.SYSTEM, Sleeper.DEFAULT, additionalIgnoredResponseCodes);
+  }
+
+  /**
+   * Visible for testing.
+   *
+   * @param chained a downstream HttpRequestInitializer, which will also be
+   *                applied to HttpRequest initialization.  May be null.
+   * @param nanoClock used as a timing source for knowing how much time has elapsed.
+   * @param sleeper used to sleep between retries.
+   * @param additionalIgnoredResponseCodes a list of HTTP status codes which should not be logged.
+   */
+  RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained,
       NanoClock nanoClock, Sleeper sleeper, Collection<Integer> additionalIgnoredResponseCodes) {
     this.chained = chained;
     this.nanoClock = nanoClock;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
index a5e9fe8ad6981..57876ae7fa4ca 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
@@ -23,8 +23,6 @@
 import com.google.api.client.http.HttpTransport;
 import com.google.api.client.json.JsonFactory;
 import com.google.api.client.json.jackson2.JacksonFactory;
-import com.google.api.client.util.NanoClock;
-import com.google.api.client.util.Sleeper;
 import com.google.api.services.bigquery.Bigquery;
 import com.google.api.services.dataflow.Dataflow;
 import com.google.api.services.pubsub.Pubsub;
@@ -153,7 +151,7 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
         new RetryHttpRequestInitializer(
             // Do not log the code 404. Code up the stack will deal with 404's if needed, and
             // logging it by default clutters the output during file staging.
-            options.getGcpCredential(), NanoClock.SYSTEM, Sleeper.DEFAULT, Arrays.asList(404)))
+            options.getGcpCredential(), Arrays.asList(404)))
         .setApplicationName(options.getAppName())
         .setGoogleClientRequestInitializer(
             new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));

From ad389cb2869595eb964b6c5999ccc43acaf39df9 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Thu, 5 Mar 2015 11:26:27 -0800
Subject: [PATCH 0221/1541] Fix misleading member name (no externally visible
 change).

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87847659
---
 .../util/common/worker/PartialGroupByKeyOperation.java    | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
index a1315d5d1a9f5..dc8c00ffe651c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
@@ -369,18 +369,18 @@ public static class CombiningGroupingTable<K, VI, VA> extends GroupingTable<K, V
 
     private final Combiner<? super K, VI, VA, ?> combiner;
     private final SizeEstimator<? super K> keySizer;
-    private final SizeEstimator<? super VA> valueSizer;
+    private final SizeEstimator<? super VA> accumulatorSizer;
 
     public CombiningGroupingTable(long maxSize,
                                   GroupingKeyCreator<? super K> groupingKeyCreator,
                                   PairInfo pairInfo,
                                   Combiner<? super K, VI, VA, ?> combineFn,
                                   SizeEstimator<? super K> keySizer,
-                                  SizeEstimator<? super VA> valueSizer) {
+                                  SizeEstimator<? super VA> accumulatorSizer) {
       super(maxSize, groupingKeyCreator, pairInfo);
       this.combiner =  combineFn;
       this.keySizer = keySizer;
-      this.valueSizer = valueSizer;
+      this.accumulatorSizer = accumulatorSizer;
     }
 
     @Override
@@ -394,7 +394,7 @@ public GroupingTableEntry<K, VI, VA> createTableEntry(final K key) throws Except
         public long getSize() { return keySize + accumulatorSize; }
         public void add(VI value) throws Exception {
           accumulator = combiner.add(key, accumulator, value);
-          accumulatorSize = valueSizer.estimateSize(accumulator);
+          accumulatorSize = accumulatorSizer.estimateSize(accumulator);
         }
       };
     }

From e1b3e43ca0789bc1d9d9c496ba53fc8339ec18f3 Mon Sep 17 00:00:00 2001
From: ckuhn <ckuhn@google.com>
Date: Thu, 5 Mar 2015 14:11:42 -0800
Subject: [PATCH 0222/1541] Removing the precondition that job names have 40
 characters or less. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=87864968

---
 .../dataflow/sdk/runners/DataflowPipelineRunner.java     | 2 --
 .../dataflow/sdk/runners/DataflowPipelineRunnerTest.java | 9 ++++-----
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 896c60bcb7d15..fffec136c5a20 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -123,8 +123,6 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
         "JobName invalid; the name must consist of only the characters "
             + "[-a-z0-9], starting with a letter and ending with a letter "
             + "or number");
-    Preconditions.checkArgument(jobName.length() <= 40,
-        "JobName too long; must be no more than 40 characters in length");
 
     return new DataflowPipelineRunner(dataflowOptions);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index f31362d92be1b..79e17cec7d595 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -389,13 +389,11 @@ public void testInvalidJobName() throws IOException {
     List<String> invalidNames = Arrays.asList(
         "invalid_name",
         "0invalid",
-        "invalid-",
-        "this-one-is-too-long-01234567890123456789");
+        "invalid-");
     List<String> expectedReason = Arrays.asList(
         "JobName invalid",
         "JobName invalid",
-        "JobName invalid",
-        "JobName too long");
+        "JobName invalid");
 
     for (int i = 0; i < invalidNames.size(); ++i) {
       ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
@@ -416,7 +414,8 @@ public void testInvalidJobName() throws IOException {
 
   @Test
   public void testValidJobName() throws IOException {
-    List<String> names = Arrays.asList("ok", "Ok", "A-Ok", "ok-123");
+    List<String> names = Arrays.asList("ok", "Ok", "A-Ok", "ok-123",
+        "this-one-is-fairly-long-01234567890123456789");
 
     for (String name : names) {
       ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);

From e8da55c4b38307bedbc2dabd19a2d953120b4284 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Thu, 5 Mar 2015 14:36:09 -0800
Subject: [PATCH 0223/1541] FIX: sideinput expects WindowedValues, update
 BigQueryReader to support BigqueryIO directly as sideinput.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87867783
---
 .../cloud/dataflow/sdk/io/BigQueryIO.java     |  6 ++--
 .../sdk/runners/DirectPipelineRunner.java     | 29 +++++++++++++++++--
 .../sdk/runners/worker/BigQueryReader.java    | 11 +++----
 .../runners/worker/BigQueryReaderTest.java    |  7 +++--
 4 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index ae017680a1e26..061bf68cd5049 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -35,6 +35,7 @@
 import com.google.cloud.dataflow.sdk.util.BigQueryTableInserter;
 import com.google.cloud.dataflow.sdk.util.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PDone;
@@ -841,9 +842,10 @@ private static void evaluateReadHelper(
     }
 
     LOG.info("Reading from BigQuery table {}", toTableSpec(ref));
-    List<TableRow> elems = ReaderUtils.readElemsFromReader(new BigQueryReader(client, ref));
+    List<WindowedValue<TableRow>> elems =
+        ReaderUtils.readElemsFromReader(new BigQueryReader(client, ref));
     LOG.info("Number of records read from BigQuery: {}", elems.size());
-    context.setPCollection(transform.getOutput(), elems);
+    context.setPCollectionWindowedValue(transform.getOutput(), elems);
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index c665e0d3c5227..6c481d624701d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -487,6 +487,13 @@ public interface EvaluationContext extends EvaluationResults {
     <T> void setPCollectionValuesWithMetadata(
         PCollection<T> pc, List<ValueWithMetadata<T>> elements);
 
+    /**
+     * Sets the value of the given PCollection, where each element also has a timestamp
+     * and collection of windows.
+     * Throws an exception if the PCollection's value has already been set.
+     */
+    <T> void setPCollectionWindowedValue(PCollection<T> pc, List<WindowedValue<T>> elements);
+
     /**
      * Shorthand for setting the value of a PCollection where the elements do not have
      * timestamps or windows.
@@ -655,7 +662,7 @@ Object getPValue(PValue pvalue) {
      * Convert a list of T to a list of {@code ValueWithMetadata<T>}, with a timestamp of 0
      * and null windows.
      */
-    <T> List<ValueWithMetadata<T>> toValuesWithMetadata(List<T> values) {
+    <T> List<ValueWithMetadata<T>> toValueWithMetadata(List<T> values) {
       List<ValueWithMetadata<T>> result = new ArrayList<>(values.size());
       for (T value : values) {
         result.add(ValueWithMetadata.of(WindowedValue.valueInGlobalWindow(value)));
@@ -663,9 +670,27 @@ <T> List<ValueWithMetadata<T>> toValuesWithMetadata(List<T> values) {
       return result;
     }
 
+    /**
+     * Convert a list of {@code WindowedValue<T>} to a list of {@code ValueWithMetadata<T>}.
+     */
+    <T> List<ValueWithMetadata<T>> toValueWithMetadataFromWindowedValue(
+        List<WindowedValue<T>> values) {
+      List<ValueWithMetadata<T>> result = new ArrayList<>(values.size());
+      for (WindowedValue<T> value : values) {
+        result.add(ValueWithMetadata.of(value));
+      }
+      return result;
+    }
+
     @Override
     public <T> void setPCollection(PCollection<T> pc, List<T> elements) {
-      setPCollectionValuesWithMetadata(pc, toValuesWithMetadata(elements));
+      setPCollectionValuesWithMetadata(pc, toValueWithMetadata(elements));
+    }
+
+    @Override
+    public <T> void setPCollectionWindowedValue(
+        PCollection<T> pc, List<WindowedValue<T>> elements) {
+      setPCollectionValuesWithMetadata(pc, toValueWithMetadataFromWindowedValue(elements));
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
index c5a27c71e7fa1..149c33a331e06 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
 import com.google.cloud.dataflow.sdk.util.BigQueryTableRowIterator;
 import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import java.io.IOException;
@@ -35,7 +36,7 @@
  * progress reporting because the source is used only in situations where the entire table must be
  * read by each worker (i.e. the source is used as a side input).
  */
-public class BigQueryReader extends Reader<TableRow> {
+public class BigQueryReader extends Reader<WindowedValue<TableRow>> {
   final TableReference tableRef;
   final BigQueryOptions bigQueryOptions;
   final Bigquery bigQueryClient;
@@ -57,7 +58,7 @@ public BigQueryReader(Bigquery bigQueryClient, TableReference tableRef) {
   }
 
   @Override
-  public ReaderIterator<TableRow> iterator() throws IOException {
+  public ReaderIterator<WindowedValue<TableRow>> iterator() throws IOException {
     return new BigQueryReaderIterator(
         bigQueryClient != null
             ? bigQueryClient : Transport.newBigQueryClient(bigQueryOptions).build(),
@@ -67,7 +68,7 @@ public ReaderIterator<TableRow> iterator() throws IOException {
   /**
    * A ReaderIterator that yields TableRow objects for each row of a BigQuery table.
    */
-  class BigQueryReaderIterator extends AbstractReaderIterator<TableRow> {
+  class BigQueryReaderIterator extends AbstractReaderIterator<WindowedValue<TableRow>> {
     private BigQueryTableRowIterator rowIterator;
 
     public BigQueryReaderIterator(Bigquery bigQueryClient, TableReference tableRef) {
@@ -80,11 +81,11 @@ public boolean hasNext() {
     }
 
     @Override
-    public TableRow next() throws IOException {
+    public WindowedValue<TableRow> next() throws IOException {
       if (!hasNext()) {
         throw new NoSuchElementException();
       }
-      return rowIterator.next();
+      return WindowedValue.valueInGlobalWindow(rowIterator.next());
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
index 41ee8fb83d79d..ee308f42a5707 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
@@ -32,6 +32,7 @@
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.common.collect.Lists;
 
@@ -342,10 +343,10 @@ public void testRead() throws Exception {
         bigQueryClient,
         new TableReference().setProjectId(PROJECT_ID).setDatasetId(DATASET).setTableId(TABLE));
 
-    Reader.ReaderIterator<TableRow> iterator = reader.iterator();
+    Reader.ReaderIterator<WindowedValue<TableRow>> iterator = reader.iterator();
     Assert.assertTrue(iterator.hasNext());
 
-    TableRow row = iterator.next();
+    TableRow row = iterator.next().getValue();
 
     Assert.assertEquals("Arthur", row.get("name"));
     Assert.assertEquals("42", row.get("integer"));
@@ -360,7 +361,7 @@ public void testRead() throws Exception {
     Assert.assertTrue(((List<?>) row.get("repeatedFloat")).isEmpty());
     Assert.assertTrue(((List<?>) row.get("repeatedRecord")).isEmpty());
 
-    row = iterator.next();
+    row = iterator.next().getValue();
 
     Assert.assertEquals("Allison", row.get("name"));
     Assert.assertEquals("79", row.get("integer"));

From 0e165fa1af3f48fe954725c8100875121570bb9c Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 5 Mar 2015 16:43:27 -0800
Subject: [PATCH 0224/1541] Fix bug in selection of AvroSink vs AvroByteSink
 and AvroReader vs AvroByteReader

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87881403
---
 .../dataflow/sdk/runners/worker/AvroReader.java  |  5 +----
 .../sdk/runners/worker/AvroReaderFactory.java    | 16 +++++-----------
 .../dataflow/sdk/runners/worker/AvroSink.java    |  9 ++-------
 .../sdk/runners/worker/AvroSinkFactory.java      | 15 ++++-----------
 .../cloud/dataflow/sdk/util/WindowedValue.java   |  4 ++--
 .../runners/worker/AvroReaderFactoryTest.java    |  7 +++++--
 .../sdk/runners/worker/AvroSinkFactoryTest.java  |  3 ++-
 7 files changed, 21 insertions(+), 38 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
index ac0c250e7d4f7..cf51cc7344034 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
@@ -54,10 +54,7 @@ public class AvroReader<T> extends Reader<WindowedValue<T>> {
   private final Schema schema;
 
   public AvroReader(String filename, @Nullable Long startPosition, @Nullable Long endPosition,
-      WindowedValue.WindowedValueCoder<T> coder) {
-    if (!(coder instanceof WindowedValue.ValueOnlyWindowedValueCoder)) {
-      throw new IllegalArgumentException("Expected ValueOnlyWindowedValueCoder");
-    }
+      WindowedValue.ValueOnlyWindowedValueCoder<T> coder) {
 
     if (!(coder.getValueCoder() instanceof AvroCoder)) {
       throw new IllegalArgumentException("AvroReader requires an AvroCoder");
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
index 7d9f2277dd84f..1f3d320e16c5d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
@@ -19,13 +19,12 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.getLong;
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.ValueOnlyWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 /**
@@ -46,16 +45,11 @@ static <T> Reader<T> create(CloudObject spec, Coder<T> coder) throws Exception {
     Long startOffset = getLong(spec, PropertyNames.START_OFFSET, null);
     Long endOffset = getLong(spec, PropertyNames.END_OFFSET, null);
 
-    if (!(coder instanceof WindowedValueCoder)) {
-      return new AvroByteReader<>(filename, startOffset, endOffset, coder);
-      //throw new IllegalArgumentException("Expected WindowedValueCoder");
-    }
-
-    WindowedValueCoder windowedCoder = (WindowedValueCoder) coder;
-    if (windowedCoder.getValueCoder() instanceof AvroCoder) {
-      return new AvroReader(filename, startOffset, endOffset, windowedCoder);
+    if (coder instanceof ValueOnlyWindowedValueCoder) {
+      return (Reader<T>) new AvroReader(
+          filename, startOffset, endOffset, (ValueOnlyWindowedValueCoder<?>) coder);
     } else {
-      return new AvroByteReader<>(filename, startOffset, endOffset, windowedCoder);
+      return new AvroByteReader<>(filename, startOffset, endOffset, coder);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java
index bae4cd843be48..9ecf1c3cdebe2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java
@@ -17,7 +17,6 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static com.google.cloud.dataflow.sdk.util.WindowedValue.ValueOnlyWindowedValueCoder;
-import static com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
 
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
@@ -51,16 +50,12 @@ public class AvroSink<T> extends Sink<WindowedValue<T>> {
   final AvroCoder<T> avroCoder;
   final Schema schema;
 
-  public AvroSink(String filename, WindowedValueCoder<T> coder) {
+  public AvroSink(String filename, ValueOnlyWindowedValueCoder<T> coder) {
     this(filename, "", "", 1, coder);
   }
 
   public AvroSink(String filenamePrefix, String shardFormat, String filenameSuffix, int shardCount,
-                  WindowedValueCoder<T> coder) {
-    if (!(coder instanceof ValueOnlyWindowedValueCoder)) {
-      throw new IllegalArgumentException("Expected ValueOnlyWindowedValueCoder");
-    }
-
+                  ValueOnlyWindowedValueCoder<T> coder) {
     if (!(coder.getValueCoder() instanceof AvroCoder)) {
       throw new IllegalArgumentException("AvroSink requires an AvroCoder");
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
index 82dbed23d06fe..001d624223b37 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
@@ -18,13 +18,12 @@
 
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.ValueOnlyWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 
 /**
@@ -47,16 +46,10 @@ static <T> Sink<T> create(CloudObject spec, Coder<T> coder)
       throws Exception {
     String filename = getString(spec, PropertyNames.FILENAME);
 
-    if (!(coder instanceof WindowedValueCoder)) {
-      return new AvroByteSink<>(filename, coder);
-      //throw new IllegalArgumentException("Expected WindowedValueCoder");
-    }
-
-    WindowedValueCoder windowedCoder = (WindowedValueCoder) coder;
-    if (windowedCoder.getValueCoder() instanceof AvroCoder) {
-      return new AvroSink(filename, windowedCoder);
+    if (coder instanceof ValueOnlyWindowedValueCoder) {
+      return (Sink<T>) new AvroSink(filename, (ValueOnlyWindowedValueCoder<?>) coder);
     } else {
-      return new AvroByteSink<>(filename, windowedCoder);
+      return new AvroByteSink<>(filename, coder);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index c031b63c61fa8..595d1b0985795 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -127,7 +127,7 @@ public Collection<? extends BoundedWindow> getWindows() {
    * Returns the {@code Coder} to use for a {@code WindowedValue<T>},
    * using the given valueCoder and windowCoder.
    */
-  public static <T> WindowedValueCoder<T> getFullCoder(
+  public static <T> FullWindowedValueCoder<T> getFullCoder(
       Coder<T> valueCoder,
       Coder<? extends BoundedWindow> windowCoder) {
     return FullWindowedValueCoder.of(valueCoder, windowCoder);
@@ -136,7 +136,7 @@ public static <T> WindowedValueCoder<T> getFullCoder(
   /**
    * Returns the {@code ValueOnlyCoder} from the given valueCoder.
    */
-  public static <T> WindowedValueCoder<T> getValueOnlyCoder(Coder<T> valueCoder) {
+  public static <T> ValueOnlyWindowedValueCoder<T> getValueOnlyCoder(Coder<T> valueCoder) {
     return ValueOnlyWindowedValueCoder.of(valueCoder);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
index 25b3c01072910..06486696e4050 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
@@ -24,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
@@ -67,7 +68,8 @@ Reader<?> runTestCreateAvroReader(String filename, @Nullable Long start, @Nullab
 
   @Test
   public void testCreatePlainAvroByteReader() throws Exception {
-    Coder<?> coder = WindowedValue.getValueOnlyCoder(BigEndianIntegerCoder.of());
+    Coder<?> coder = WindowedValue.getFullCoder(
+        BigEndianIntegerCoder.of(), GlobalWindow.Coder.INSTANCE);
     Reader<?> reader = runTestCreateAvroReader(pathToAvroFile, null, null, coder.asCloudObject());
 
     Assert.assertThat(reader, new IsInstanceOf(AvroByteReader.class));
@@ -80,7 +82,8 @@ public void testCreatePlainAvroByteReader() throws Exception {
 
   @Test
   public void testCreateRichAvroByteReader() throws Exception {
-    Coder<?> coder = WindowedValue.getValueOnlyCoder(BigEndianIntegerCoder.of());
+    Coder<?> coder = WindowedValue.getFullCoder(
+        BigEndianIntegerCoder.of(), GlobalWindow.Coder.INSTANCE);
     Reader<?> reader = runTestCreateAvroReader(pathToAvroFile, 200L, 500L, coder.asCloudObject());
 
     Assert.assertThat(reader, new IsInstanceOf(AvroByteReader.class));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
index cb064c8371226..50309f3bb6ff8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
@@ -59,7 +60,7 @@ Sink<?> runTestCreateAvroSink(String filename,
   @Test
   public void testCreateAvroByteSink() throws Exception {
     Coder<?> coder =
-        WindowedValue.getValueOnlyCoder(BigEndianIntegerCoder.of());
+        WindowedValue.getFullCoder(BigEndianIntegerCoder.of(), GlobalWindow.Coder.INSTANCE);
     Sink<?> sink = runTestCreateAvroSink(
         pathToAvroFile, coder.asCloudObject());
 

From bc188f03996bf4ca177a431b2adcc57557b37b88 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Fri, 6 Mar 2015 02:48:20 -0800
Subject: [PATCH 0225/1541] Windowing: java worker code to handle CombineFn in
 batch GroupAlsoByWindowsDoFn.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87922554
---
 .../worker/GroupAlsoByWindowsParDoFn.java     |  1 +
 .../dataflow/sdk/transforms/GroupByKey.java   |  4 +-
 .../sdk/util/GroupAlsoByWindowsDoFn.java      | 51 +++++++++++--------
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  |  6 +--
 4 files changed, 37 insertions(+), 25 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index eb05e336bfb9c..8b74253bd7fa6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -128,6 +128,7 @@ public DoFnInfo createDoFnInfo() {
           return new DoFnInfo(
               new GroupAlsoByWindowsDoFn(
                   (WindowFn) windowFn,
+                  (KeyedCombineFn) combineFn,
                   elemCoder),
               null);
         }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 63e98a58dd5e8..93d657a800cf2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -255,8 +255,8 @@ public PCollection<KV<K, Iterable<V>>> apply(
           KvCoder.of(keyCoder, outputValueCoder);
 
       return input.apply(ParDo.of(
-          new GroupAlsoByWindowsDoFn<K, V, BoundedWindow>(
-              (WindowFn) windowFn, inputIterableElementValueCoder)))
+          new GroupAlsoByWindowsDoFn<K, V, Iterable<V>, BoundedWindow>(
+              (WindowFn) windowFn, null, inputIterableElementValueCoder)))
           .setCoder(outputKvCoder);
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 86876bfcf5bdc..b493e75d2dda6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
@@ -44,30 +45,34 @@
  * combining values.
  *
  * @param <K> key type
- * @param <V> input value element type
+ * @param <VI> input value element type
+ * @param <VO> output value element type
  * @param <W> window type
  */
 @SuppressWarnings("serial")
-public class GroupAlsoByWindowsDoFn<K, V, W extends BoundedWindow>
-    extends DoFn<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>> {
+public class GroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
+    extends DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> {
   // TODO: Add back RequiresKeyed state once that is supported.
 
   protected WindowFn<?, W> windowFn;
-  protected Coder<V> inputCoder;
+  protected KeyedCombineFn<K, ?, VI, ?> combineFn;
+  protected Coder<VI> inputCoder;
 
   public GroupAlsoByWindowsDoFn(
       WindowFn<?, W> windowFn,
-      Coder<V> inputCoder) {
+      KeyedCombineFn<K, ?, VI, ?> combineFn,
+      Coder<VI> inputCoder) {
     this.windowFn = windowFn;
+    this.combineFn = combineFn;
     this.inputCoder = inputCoder;
   }
 
   @Override
   public void processElement(ProcessContext processContext) throws Exception {
-    DoFnProcessContext<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>> context =
-        (DoFnProcessContext<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>>) processContext;
+    DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context =
+        (DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>) processContext;
 
-    if (windowFn instanceof NonMergingWindowFn) {
+    if (windowFn instanceof NonMergingWindowFn && combineFn == null) {
       processElementViaIterators(context);
     } else {
       processElementViaWindowSet(context);
@@ -75,15 +80,21 @@ public void processElement(ProcessContext processContext) throws Exception {
   }
 
   private void processElementViaWindowSet(
-      DoFnProcessContext<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>> context)
+      DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context)
       throws Exception {
 
     K key = context.element().getKey();
     BatchActiveWindowManager<W> activeWindowManager = new BatchActiveWindowManager<>();
-    AbstractWindowSet<K, V, Iterable<V>, W> windowSet =
-        new BufferingWindowSet(key, windowFn, inputCoder, context, activeWindowManager);
+    AbstractWindowSet<K, VI, VO, W> windowSet;
+    if (combineFn == null) {
+      windowSet = new BufferingWindowSet(
+          key, windowFn, inputCoder, context, activeWindowManager);
+    } else {
+      windowSet = new CombiningWindowSet(
+          key, windowFn, combineFn, inputCoder, context, activeWindowManager);
+    }
 
-    for (WindowedValue<V> e : context.element().getValue()) {
+    for (WindowedValue<VI> e : context.element().getValue()) {
       for (BoundedWindow window : e.getWindows()) {
         windowSet.put((W) window, e.getValue());
       }
@@ -127,17 +138,17 @@ private void maybeOutputWindows(
   }
 
   private void processElementViaIterators(
-      DoFnProcessContext<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>> context)
+      DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context)
       throws Exception {
     K key = context.element().getKey();
-    Iterable<WindowedValue<V>> value = context.element().getValue();
-    PeekingReiterator<WindowedValue<V>> iterator;
+    Iterable<WindowedValue<VI>> value = context.element().getValue();
+    PeekingReiterator<WindowedValue<VI>> iterator;
 
     if (value instanceof Collection) {
-      iterator = new PeekingReiterator<>(new ListReiterator<WindowedValue<V>>(
-          new ArrayList<WindowedValue<V>>((Collection<WindowedValue<V>>) value), 0));
+      iterator = new PeekingReiterator<>(new ListReiterator<WindowedValue<VI>>(
+          new ArrayList<WindowedValue<VI>>((Collection<WindowedValue<VI>>) value), 0));
     } else if (value instanceof Reiterable) {
-      iterator = new PeekingReiterator(((Reiterable<WindowedValue<V>>) value).iterator());
+      iterator = new PeekingReiterator(((Reiterable<WindowedValue<VI>>) value).iterator());
     } else {
       throw new IllegalArgumentException(
           "Input to GroupAlsoByWindowsDoFn must be a Collection or Reiterable");
@@ -148,7 +159,7 @@ private void processElementViaIterators(
     ListMultimap<Instant, BoundedWindow> windows = ArrayListMultimap.create();
 
     while (iterator.hasNext()) {
-      WindowedValue<V> e = iterator.peek();
+      WindowedValue<VI> e = iterator.peek();
       for (BoundedWindow window : e.getWindows()) {
         // If this window is not already in the active set, emit a new WindowReiterable
         // corresponding to this window, starting at this element in the input Reiterable.
@@ -157,7 +168,7 @@ private void processElementViaIterators(
           // for as long as it detects that there are no new windows.
           windows.put(window.maxTimestamp(), window);
           context.outputWindowedValue(
-              KV.of(key, (Iterable<V>) new WindowReiterable<V>(iterator, window)),
+              KV.of(key, (VO) new WindowReiterable<VI>(iterator, window)),
               window.maxTimestamp(),
               Arrays.asList((W) window));
         }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index 677dac3ff8b0e..464fcf9b868a7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -252,9 +252,9 @@ public class GroupAlsoByWindowsDoFnTest {
     KV<String, Iterable<String>>, List> makeRunner(
         WindowFn<? super String, IntervalWindow> windowFn) {
 
-    GroupAlsoByWindowsDoFn<String, String, IntervalWindow> fn =
-        new GroupAlsoByWindowsDoFn<String, String, IntervalWindow>(
-            windowFn, StringUtf8Coder.of());
+    GroupAlsoByWindowsDoFn<String, String, Iterable<String>, IntervalWindow> fn =
+        new GroupAlsoByWindowsDoFn<String, String, Iterable<String>, IntervalWindow>(
+            windowFn, null, StringUtf8Coder.of());
 
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
         KV<String, Iterable<String>>, List> runner =

From 8a3e55d2b8fdf6b7f800976cf20610bd400e7965 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Fri, 6 Mar 2015 10:50:21 -0800
Subject: [PATCH 0226/1541] Setting up 'contrib' directory and rules.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87958147
---
 CONTRIBUTING.md   |  7 ++++++-
 contrib/README.md | 53 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 1 deletion(-)
 create mode 100644 contrib/README.md

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 26147ec273b71..8b0fcba196f81 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -19,7 +19,12 @@ frustration later on.
 
 ### Code reviews
 All submissions, including submissions by project members, require review. We
-use Github pull requests for this purpose.
+use GitHub pull requests for this purpose.
+
+### Organization
+During our review and triage of incoming pull requests, we'll advise whether to
+include your contribution into the mainline SDK, or to maintain it within the
+separate group of [community-contributed modules](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/tree/master/contrib).
 
 ### The small print
 Contributions made by corporations are covered by a different agreement than
diff --git a/contrib/README.md b/contrib/README.md
new file mode 100644
index 0000000000000..e3381b5e41d4f
--- /dev/null
+++ b/contrib/README.md
@@ -0,0 +1,53 @@
+# Community contributions
+
+This directory hosts a wide variety of community contributions that may be
+useful to other users of
+[Google Cloud Dataflow](https://cloud.google.com/dataflow/),
+but may not be appropriate or ready yet for inclusion into the
+[mainline SDK](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/) or a
+separate Google-maintained artifact.
+
+## Organization
+
+Each subdirectory represents a logically separate and independent module.
+Preferably, the code is hosted directly in this repository. When appropriate, we
+are also open to linking external repositories via
+[`submodule`](http://git-scm.com/docs/git-submodule/) functionality within Git.
+
+While we are happy to host individual modules to provide additional value to all
+Cloud Dataflow users, the modules are _maintanted solely by their respective
+authors_. We will make sure that modules are related to Cloud Dataflow, that
+they are distributed under the same license as the mainline SDK, and provide
+some guidance to the authors to make the quality as high as possible.
+
+We __cannot__, however, provide _any_ guarantees about correctness,
+compatibility, performance, support, test coverage, maintenance or future
+availability of individual modules hosted here.
+
+## Process
+
+In general, we recommend to get in touch with us through the issue tracker
+first. That way we can help out and possibly guide you. Coordinating up front
+makes it much easier to avoid frustration later on.
+
+We welcome pull requests with a new module from everyone. Every module must be
+related to Cloud Dataflow and must have an informative README.md file. We will
+provide general guidance, but usually won't be reviewing the module in detail.
+We reserve the right to refuse acceptance to any module, or remove it at any
+time in the future.
+
+We also welcome improvements to an existing module from everyone. We'll often
+wait for comments from the primary author of the module before merging a pull
+request from a non-primary author.
+
+As the module matures, we may choose to pull it directly into the mainline SDK
+or promote it to a Google-managed artifact.
+
+## Licensing
+
+We require all contributors to sign the Contributor License Agreement, exactly
+as we require for any contributions to the mainline SDK. More information is
+available in our [CONTRIBUTING.md](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/CONTRIBUTING.md)
+file.
+
+_Thank you for your contribution to the Cloud Dataflow community!_

From fa9ecb9b95b0c6462ee2d97ea38f44253ac6f4d8 Mon Sep 17 00:00:00 2001
From: earhart <earhart@gmail.com>
Date: Fri, 6 Mar 2015 14:06:59 -0800
Subject: [PATCH 0227/1541] Update Proto2Coder.java

Add serialVersionUID for this serializable class.
---
 .../java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
index 6b89ec140fc20..7e19cce9eb44a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
@@ -55,6 +55,7 @@
  * @param <T> the type of elements handled by this coder, must extend {@code Message}
  */
 public class Proto2Coder<T extends Message> extends CustomCoder<T> {
+  private static final long serialVersionUID = 0;
 
   /**
    * Produces a new Proto2Coder instance, for a given Protobuf message class.

From e1926f3fe5394dae0287bb15abe23331a65241a1 Mon Sep 17 00:00:00 2001
From: earhart <earhart@gmail.com>
Date: Fri, 6 Mar 2015 14:28:27 -0800
Subject: [PATCH 0228/1541] Add -Xlint:-options to pom.xml

Cross-compiling with JDK 8 causes an innocuous warning; this flag disables that warning.
---
 pom.xml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pom.xml b/pom.xml
index e8d531f600c93..e5da472cd1257 100644
--- a/pom.xml
+++ b/pom.xml
@@ -84,6 +84,8 @@
             <compilerArgs>
               <arg>-Xlint:all</arg>
               <arg>-Werror</arg>
+              <!-- Override options warnings to support cross-compilation -->
+              <arg>-Xlint:-options</arg>
               <!-- Temporary lint overrides, to be removed over time. -->
               <arg>-Xlint:-cast</arg>
               <arg>-Xlint:-deprecation</arg>

From 2f7daac4fd8553d26c3fbbe6d65a9786fa7ff106 Mon Sep 17 00:00:00 2001
From: Rob Earhart <earhart@gmail.com>
Date: Fri, 6 Mar 2015 14:45:24 -0800
Subject: [PATCH 0229/1541] Add serialVersionUID to serializable abstract base
 classes.

---
 .../java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java  | 2 ++
 .../java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java  | 1 +
 .../com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java | 1 +
 .../java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java  | 2 ++
 .../java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java | 2 ++
 .../com/google/cloud/dataflow/sdk/coders/StandardCoder.java     | 1 +
 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java  | 2 ++
 .../com/google/cloud/dataflow/sdk/transforms/PTransform.java    | 1 +
 .../java/com/google/cloud/dataflow/sdk/transforms/View.java     | 1 +
 .../dataflow/sdk/transforms/windowing/NonMergingWindowFn.java   | 1 +
 .../dataflow/sdk/transforms/windowing/PartitioningWindowFn.java | 2 ++
 .../cloud/dataflow/sdk/transforms/windowing/WindowFn.java       | 1 +
 .../java/com/google/cloud/dataflow/sdk/util/WindowedValue.java  | 2 ++
 13 files changed, 19 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
index ae72091737bbf..15380ca380d94 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
@@ -26,6 +26,8 @@
  * @param <T> the type of the values being transcoded
  */
 public abstract class AtomicCoder<T> extends StandardCoder<T> {
+  private static final long serialVersionUID = 0;
+
   protected AtomicCoder() {}
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
index f9adaf96c3242..5ad93521f172e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
@@ -39,6 +39,7 @@
  */
 public abstract class CustomCoder<T> extends AtomicCoder<T>
     implements Serializable {
+  private static final long serialVersionUID = 0;
 
   @JsonCreator
   public static CustomCoder<?> of(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
index c5202b22b286e..0aa24b6725742 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
@@ -39,6 +39,7 @@
  */
 public abstract class IterableLikeCoder<T, IT extends Iterable<T>>
     extends StandardCoder<IT> {
+  private static final long serialVersionUID = 0;
 
   public Coder<T> getElemCoder() { return elementCoder; }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java
index 212c6339a7010..f418e574c592b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java
@@ -31,6 +31,8 @@
  * @param <T> the type of values being transcoded
  */
 public abstract class KvCoderBase<T> extends StandardCoder<T> {
+  private static final long serialVersionUID = 0;
+
   @JsonCreator
   public static KvCoderBase<?> of(
       // N.B. typeId is a required parameter here, since a field named "@type"
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java
index 7a6a99790e906..d187fa3b04fe7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java
@@ -31,6 +31,8 @@
  * @param <T> the type of values being transcoded
  */
 public abstract class MapCoderBase<T> extends StandardCoder<T> {
+  private static final long serialVersionUID = 0;
+
   @JsonCreator
   public static MapCoderBase<?> of(
       // N.B. typeId is a required parameter here, since a field named "@type"
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
index ff27fd2be6dc8..8d7229168e94a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
@@ -35,6 +35,7 @@
  * @param <T> the type of the values being transcoded
  */
 public abstract class StandardCoder<T> implements Coder<T> {
+  private static final long serialVersionUID = 0;
 
   protected StandardCoder() {}
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
index a4bf54f40651f..6ddf2a6c8642a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
@@ -52,6 +52,8 @@
  * @param <T> Type of elements read by the source.
  */
 public abstract class Source<T> implements Serializable {
+  private static final long serialVersionUID = 0;
+
   /**
    * Splits the source into shards.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
index bf2f4bd4d1ff6..8f927c6e37639 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
@@ -169,6 +169,7 @@
  */
 public abstract class PTransform<Input extends PInput, Output extends POutput>
     implements Serializable /* See the note above */ {
+  private static final long serialVersionUID = 0;
 
   /**
    * Applies this {@code PTransform} on the given {@code Input}, and returns its
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index 760bccf0229f6..4f7fcd543628e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -335,6 +335,7 @@ public Map<K, V> fromIterableInternal(Iterable<WindowedValue<?>> contents) {
   private abstract static class PCollectionViewBase<T, WT>
       extends PValueBase
       implements PCollectionView<T, WT> {
+    private static final long serialVersionUID = 0;
 
     @Override
     public TupleTag<Iterable<WindowedValue<?>>> getTagInternal() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java
index 0caaabe72281e..c42c4ff09a28b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java
@@ -25,6 +25,7 @@
  */
 public abstract class NonMergingWindowFn<T, W extends BoundedWindow>
     extends WindowFn<T, W> {
+  private static final long serialVersionUID = 0;
 
   @Override
   public final void mergeWindows(MergeContext c) { }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
index 7ec97bc40d5e8..85282084bfb72 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
@@ -30,6 +30,8 @@
  */
 public abstract class PartitioningWindowFn<T, W extends BoundedWindow>
     extends NonMergingWindowFn<T, W> {
+  private static final long serialVersionUID = 0;
+
   /**
    * Returns the single window to which elements with this timestamp belong.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
index 9eab380dba208..fdfa136ba5251 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
@@ -40,6 +40,7 @@
  */
 public abstract class WindowFn<T, W extends BoundedWindow>
     implements Serializable {
+  private static final long serialVersionUID = 0;
 
   /**
    * Information available when running {@link #assignWindows}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index 595d1b0985795..2147ef4867364 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -178,6 +178,8 @@ public String toString() {
    */
   public abstract static class WindowedValueCoder<T>
       extends StandardCoder<WindowedValue<T>> {
+    private static final long serialVersionUID = 0;
+
     final Coder<T> valueCoder;
 
     WindowedValueCoder(Coder<T> valueCoder) {

From 45b973d0ed9fc772ac3176b2699b6597e68e174b Mon Sep 17 00:00:00 2001
From: Rob Earhart <earhart@gmail.com>
Date: Fri, 6 Mar 2015 14:59:09 -0800
Subject: [PATCH 0230/1541] Add trivial hashCode() functions for tests which
 override equals()

---
 .../cloud/dataflow/sdk/coders/AvroCoderTest.java       |  5 +++++
 .../sdk/options/ProxyInvocationHandlerTest.java        | 10 ++++++++++
 2 files changed, 15 insertions(+)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
index 513cf8ba0dc3c..2bd506cdf3549 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
@@ -112,6 +112,11 @@ public boolean equals(Object o) {
       return true;
     }
 
+    @Override
+    public int hashCode() {
+      return 0;
+    }
+
     @Override
     public String toString() {
       return "Pojo{"
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
index 8d0beb7f11f31..e7f9d0e483978 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
@@ -525,6 +525,11 @@ static InnerType of(double value) {
       return rval;
     }
 
+    @Override
+    public int hashCode() {
+      return 0;
+    }
+
     @Override
     public boolean equals(Object obj) {
       return obj != null
@@ -540,6 +545,11 @@ private static class ComplexType {
     public List<InnerType> genericType;
     public InnerType innerType;
 
+    @Override
+    public int hashCode() {
+      return 0;
+    }
+
     @Override
     public boolean equals(Object obj) {
       return obj != null

From d1271e608f85fd2c01502ac00b955e93a89674e9 Mon Sep 17 00:00:00 2001
From: Rob Earhart <earhart@gmail.com>
Date: Fri, 6 Mar 2015 15:08:13 -0800
Subject: [PATCH 0231/1541] Add trivial hashCode() functions for tests which
 override equals()

---
 .../com/google/cloud/dataflow/examples/AutoComplete.java     | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
index 79d5515e35668..19d6984a66b56 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
@@ -324,6 +324,11 @@ public boolean equals(Object other) {
       }
     }
 
+    @Override
+    public int hashCode() {
+      return Long.valueOf(count).hashCode() ^ value.hashCode();
+    }
+
     @Override
     public String toString() {
       return "CompletionCandidate[" + value + ", " + count + "]";

From 73a03a29ee272af23dca09da869843adfa7ddcac Mon Sep 17 00:00:00 2001
From: Magnus Runesson <magru@spotify.com>
Date: Mon, 9 Mar 2015 09:12:12 +0100
Subject: [PATCH 0232/1541] Ignore files created by Intellij.

---
 .gitignore | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.gitignore b/.gitignore
index 2f7896d1d1365..42ce01d2ce26b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,5 @@
 target/
+
+# Ignore Intellij files
+.idea
+*.iml

From f8765aa576d60bc2a01c95a36bcf69495501c00a Mon Sep 17 00:00:00 2001
From: Magnus Runesson <magru@spotify.com>
Date: Tue, 3 Mar 2015 08:52:51 +0100
Subject: [PATCH 0233/1541] Generic implementation of inner and outer
 left/right join.

This is a patch to make it easier for user of DataFlow to do joins in the most common
circumstances. It implements inner join and outer left and right join.

In the cases of outer join. Since null cannot be serialized the user have to provide a
value that represent null for that particular usecase.
---
 .gitignore                                    |   2 +
 .../dataflow/sdk/transforms/join/Join.java    | 176 ++++++++++++++++++
 .../sdk/transforms/join/InnerJoinTest.java    | 134 +++++++++++++
 .../transforms/join/OuterLeftJoinTest.java    | 141 ++++++++++++++
 .../transforms/join/OuterRightJoinTest.java   | 141 ++++++++++++++
 5 files changed, 594 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/Join.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/InnerJoinTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterLeftJoinTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterRightJoinTest.java

diff --git a/.gitignore b/.gitignore
index 2f7896d1d1365..e02456b1ee1f0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
 target/
+.idea
+*.iml
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/Join.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/Join.java
new file mode 100644
index 0000000000000..b3e3196105487
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/Join.java
@@ -0,0 +1,176 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.join;
+
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Preconditions;
+
+/**
+ * Utility class with different versions of joins. All methods join two collections of
+ * key/value pairs (KV).
+ */
+public class Join {
+
+  /**
+   * Inner join of two collections of KV elements.
+   * @param leftCollection Left side collection to join.
+   * @param rightCollection Right side collection to join.
+   * @param <K> Type of the key for both collections
+   * @param <V1> Type of the values for the left collection.
+   * @param <V2> Type of the values for the right collection.
+   * @return A joined collection of KV where Key is of type V1 and Value is type V2.
+   */
+  public static <K, V1, V2> PCollection<KV<V1, V2>> innerJoin(
+    final PCollection<KV<K, V1>> leftCollection, final PCollection<KV<K, V2>> rightCollection) {
+    Preconditions.checkNotNull(leftCollection);
+    Preconditions.checkNotNull(rightCollection);
+
+    final TupleTag<V1> v1Tuple = new TupleTag<>();
+    final TupleTag<V2> v2Tuple = new TupleTag<>();
+
+    PCollection<KV<K, CoGbkResult>> coGbkResultCollection =
+      KeyedPCollectionTuple.of(v1Tuple, leftCollection)
+        .and(v2Tuple, rightCollection)
+        .apply(CoGroupByKey.<K>create());
+
+    return coGbkResultCollection.apply(ParDo.of(
+      new DoFn<KV<K, CoGbkResult>, KV<V1, V2>>() {
+        @Override
+        public void processElement(ProcessContext c) {
+          KV<K, CoGbkResult> e = c.element();
+
+          Iterable<V1> leftValuesIterable = e.getValue().getAll(v1Tuple);
+          Iterable<V2> rightValuesIterable = e.getValue().getAll(v2Tuple);
+
+          for (V1 leftValue : leftValuesIterable) {
+            for (V2 rightValue : rightValuesIterable) {
+              c.output(KV.of(leftValue, rightValue));
+            }
+          }
+        }
+      }))
+      .setCoder(KvCoder.of(((KvCoder) leftCollection.getCoder()).getValueCoder(),
+                           ((KvCoder) rightCollection.getCoder()).getValueCoder()));
+  }
+
+  /**
+   * Left Outer Join of two collections of KV elements.
+   * @param leftCollection Left side collection to join.
+   * @param rightCollection Right side collection to join.
+   * @param nullValue Value to use as null value when right side do not match left side.
+   * @param <K> Type of the key for both collections
+   * @param <V1> Type of the values for the left collection.
+   * @param <V2> Type of the values for the right collection.
+   * @return A joined collection of KV where Key is of type V1 and Value is type V2. Values that
+   *         should be null or empty is replaced with nullValue.
+   */
+  public static <K, V1, V2> PCollection<KV<V1, V2>> leftOuterJoin(
+    final PCollection<KV<K, V1>> leftCollection,
+    final PCollection<KV<K, V2>> rightCollection,
+    final V2 nullValue) {
+    Preconditions.checkNotNull(leftCollection);
+    Preconditions.checkNotNull(rightCollection);
+    Preconditions.checkNotNull(nullValue);
+
+    final TupleTag<V1> v1Tuple = new TupleTag<>();
+    final TupleTag<V2> v2Tuple = new TupleTag<>();
+
+    PCollection<KV<K, CoGbkResult>> coGbkResultCollection =
+      KeyedPCollectionTuple.of(v1Tuple, leftCollection)
+        .and(v2Tuple, rightCollection)
+        .apply(CoGroupByKey.<K>create());
+
+    return coGbkResultCollection.apply(ParDo.of(
+      new DoFn<KV<K, CoGbkResult>, KV<V1, V2>>() {
+        @Override
+        public void processElement(ProcessContext c) {
+          KV<K, CoGbkResult> e = c.element();
+
+          Iterable<V1> leftValuesIterable = e.getValue().getAll(v1Tuple);
+          Iterable<V2> rightValuesIterable = e.getValue().getAll(v2Tuple);
+
+          for (V1 leftValue : leftValuesIterable) {
+            if (rightValuesIterable.iterator().hasNext()) {
+              for (V2 rightValue : rightValuesIterable) {
+                c.output(KV.of(leftValue, rightValue));
+              }
+            } else {
+              c.output(KV.of(leftValue, nullValue));
+            }
+          }
+        }
+      }))
+      .setCoder(KvCoder.of(((KvCoder) leftCollection.getCoder()).getValueCoder(),
+                           ((KvCoder) rightCollection.getCoder()).getValueCoder()));
+  }
+
+  /**
+   * Right Outer Join of two collections of KV elements.
+   * @param leftCollection Left side collection to join.
+   * @param rightCollection Right side collection to join.
+   * @param nullValue Value to use as null value when left side do not match right side.
+   * @param <K> Type of the key for both collections
+   * @param <V1> Type of the values for the left collection.
+   * @param <V2> Type of the values for the right collection.
+   * @return A joined collection of KV where Key is of type V1 and Value is type V2. Keys that
+   *         should be null or empty is replaced with nullValue.
+   */
+  public static <K, V1, V2> PCollection<KV<V1, V2>> rightOuterJoin(
+    final PCollection<KV<K, V1>> leftCollection,
+    final PCollection<KV<K, V2>> rightCollection,
+    final V1 nullValue) {
+    Preconditions.checkNotNull(leftCollection);
+    Preconditions.checkNotNull(rightCollection);
+    Preconditions.checkNotNull(nullValue);
+
+    final TupleTag<V1> v1Tuple = new TupleTag<>();
+    final TupleTag<V2> v2Tuple = new TupleTag<>();
+
+    PCollection<KV<K, CoGbkResult>> coGbkResultCollection =
+      KeyedPCollectionTuple.of(v1Tuple, leftCollection)
+        .and(v2Tuple, rightCollection)
+        .apply(CoGroupByKey.<K>create());
+
+    return coGbkResultCollection.apply(ParDo.of(
+      new DoFn<KV<K, CoGbkResult>, KV<V1, V2>>() {
+        @Override
+        public void processElement(ProcessContext c) {
+          KV<K, CoGbkResult> e = c.element();
+
+          Iterable<V1> leftValuesIterable = e.getValue().getAll(v1Tuple);
+          Iterable<V2> rightValuesIterable = e.getValue().getAll(v2Tuple);
+
+          for (V2 rightValue : rightValuesIterable) {
+            if (leftValuesIterable.iterator().hasNext()) {
+              for (V1 leftValue : leftValuesIterable) {
+                c.output(KV.of(leftValue, rightValue));
+              }
+            } else {
+              c.output(KV.of(nullValue, rightValue));
+            }
+          }
+        }
+      }))
+      .setCoder(KvCoder.of(((KvCoder) leftCollection.getCoder()).getValueCoder(),
+                           ((KvCoder) rightCollection.getCoder()).getValueCoder()));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/InnerJoinTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/InnerJoinTest.java
new file mode 100644
index 0000000000000..900835f172cf4
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/InnerJoinTest.java
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.join;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This test Inner Join functionality.
+ */
+public class InnerJoinTest {
+
+  Pipeline p;
+  List<KV<String, Long>> leftListOfKv;
+  List<KV<String, String>> listRightOfKv;
+  List<KV<Long, String>> expectedResult;
+
+  @Before
+  public void setup() {
+
+    p = TestPipeline.create();
+    leftListOfKv = new ArrayList<>();
+    listRightOfKv = new ArrayList<>();
+
+    expectedResult = new ArrayList<>();
+  }
+
+  @Test
+  public void testJoinOneToOneMapping() {
+    leftListOfKv.add(KV.of("Key1", 5L));
+    leftListOfKv.add(KV.of("Key2", 4L));
+    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+
+    listRightOfKv.add(KV.of("Key1", "foo"));
+    listRightOfKv.add(KV.of("Key2", "bar"));
+    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+
+    PCollection<KV<Long, String>> output = Join.innerJoin(
+      leftCollection, rightCollection);
+
+    expectedResult.add(KV.of(5L, "foo"));
+    expectedResult.add(KV.of(4L, "bar"));
+    DataflowAssert.that(output).containsInAnyOrder(expectedResult);
+
+    p.run();
+  }
+
+  @Test
+  public void testJoinOneToManyMapping() {
+    leftListOfKv.add(KV.of("Key2", 4L));
+    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+
+    listRightOfKv.add(KV.of("Key2", "bar"));
+    listRightOfKv.add(KV.of("Key2", "gazonk"));
+    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+
+    PCollection<KV<Long, String>> output = Join.innerJoin(
+      leftCollection, rightCollection);
+
+    expectedResult.add(KV.of(4L, "bar"));
+    expectedResult.add(KV.of(4L, "gazonk"));
+    DataflowAssert.that(output).containsInAnyOrder(expectedResult);
+
+    p.run();
+  }
+
+  @Test
+  public void testJoinManyToOneMapping() {
+    leftListOfKv.add(KV.of("Key2", 4L));
+    leftListOfKv.add(KV.of("Key2", 6L));
+    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+
+    listRightOfKv.add(KV.of("Key2", "bar"));
+    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+
+    PCollection<KV<Long, String>> output = Join.innerJoin(
+      leftCollection, rightCollection);
+
+    expectedResult.add(KV.of(4L, "bar"));
+    expectedResult.add(KV.of(6L, "bar"));
+    DataflowAssert.that(output).containsInAnyOrder(expectedResult);
+
+    p.run();
+  }
+
+  @Test
+  public void testJoinNoneToNoneMapping() {
+    leftListOfKv.add(KV.of("Key2", 4L));
+    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+
+    listRightOfKv.add(KV.of("Key3", "bar"));
+    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+
+    PCollection<KV<Long, String>> output = Join.innerJoin(
+      leftCollection, rightCollection);
+
+    DataflowAssert.that(output).containsInAnyOrder(expectedResult);
+    p.run();
+  }
+
+  @Test(expected = NullPointerException.class)
+  public void testJoinLeftCollectionNull() {
+    Join.innerJoin(null, p.apply(Create.of(listRightOfKv)));
+  }
+
+  @Test(expected = NullPointerException.class)
+  public void testJoinRightCollectionNull() {
+    Join.innerJoin(p.apply(Create.of(leftListOfKv)), null);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterLeftJoinTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterLeftJoinTest.java
new file mode 100644
index 0000000000000..81c27157cf97b
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterLeftJoinTest.java
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.join;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * This test Outer Left Join functionality.
+ */
+public class OuterLeftJoinTest {
+
+  Pipeline p;
+  List<KV<String, Long>> leftListOfKv;
+  List<KV<String, String>> listRightOfKv;
+  List<KV<Long, String>> expectedResult;
+
+  @Before
+  public void setup() {
+
+    p = TestPipeline.create();
+    leftListOfKv = new ArrayList<>();
+    listRightOfKv = new ArrayList<>();
+
+    expectedResult = new ArrayList<>();
+  }
+
+  @Test
+  public void testJoinOneToOneMapping() {
+    leftListOfKv.add(KV.of("Key1", 5L));
+    leftListOfKv.add(KV.of("Key2", 4L));
+    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+
+    listRightOfKv.add(KV.of("Key1", "foo"));
+    listRightOfKv.add(KV.of("Key2", "bar"));
+    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+
+    PCollection<KV<Long, String>> output = Join.leftOuterJoin(
+      leftCollection, rightCollection, "");
+
+    expectedResult.add(KV.of(5L, "foo"));
+    expectedResult.add(KV.of(4L, "bar"));
+    DataflowAssert.that(output).containsInAnyOrder(expectedResult);
+
+    p.run();
+  }
+
+  @Test
+  public void testJoinOneToManyMapping() {
+    leftListOfKv.add(KV.of("Key2", 4L));
+    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+
+    listRightOfKv.add(KV.of("Key2", "bar"));
+    listRightOfKv.add(KV.of("Key2", "gazonk"));
+    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+
+    PCollection<KV<Long, String>> output = Join.leftOuterJoin(
+      leftCollection, rightCollection, "");
+
+    expectedResult.add(KV.of(4L, "bar"));
+    expectedResult.add(KV.of(4L, "gazonk"));
+    DataflowAssert.that(output).containsInAnyOrder(expectedResult);
+
+    p.run();
+  }
+
+  @Test
+  public void testJoinManyToOneMapping() {
+    leftListOfKv.add(KV.of("Key2", 4L));
+    leftListOfKv.add(KV.of("Key2", 6L));
+    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+
+    listRightOfKv.add(KV.of("Key2", "bar"));
+    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+
+    PCollection<KV<Long, String>> output = Join.leftOuterJoin(
+      leftCollection, rightCollection, "");
+
+    expectedResult.add(KV.of(4L, "bar"));
+    expectedResult.add(KV.of(6L, "bar"));
+    DataflowAssert.that(output).containsInAnyOrder(expectedResult);
+
+    p.run();
+  }
+
+  @Test
+  public void testJoinOneToNoneMapping() {
+    leftListOfKv.add(KV.of("Key2", 4L));
+    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+
+    listRightOfKv.add(KV.of("Key3", "bar"));
+    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+
+    PCollection<KV<Long, String>> output = Join.leftOuterJoin(
+      leftCollection, rightCollection, "");
+
+    expectedResult.add(KV.of(4L, ""));
+    DataflowAssert.that(output).containsInAnyOrder(expectedResult);
+    p.run();
+  }
+
+  @Test(expected = NullPointerException.class)
+  public void testJoinLeftCollectionNull() {
+    Join.leftOuterJoin(null, p.apply(Create.of(listRightOfKv)), "");
+  }
+
+  @Test(expected = NullPointerException.class)
+  public void testJoinRightCollectionNull() {
+    Join.leftOuterJoin(p.apply(Create.of(leftListOfKv)), null, "");
+  }
+
+  @Test(expected = NullPointerException.class)
+  public void testJoinNullValueIsNull() {
+    Join.leftOuterJoin(p.apply(Create.of(leftListOfKv)), p.apply(Create.of(listRightOfKv)), null);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterRightJoinTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterRightJoinTest.java
new file mode 100644
index 0000000000000..5d668aaed4d5a
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterRightJoinTest.java
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.join;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * This test Outer Right Join functionality.
+ */
+public class OuterRightJoinTest {
+
+  Pipeline p;
+  List<KV<String, Long>> leftListOfKv;
+  List<KV<String, String>> listRightOfKv;
+  List<KV<Long, String>> expectedResult;
+
+  @Before
+  public void setup() {
+
+    p = TestPipeline.create();
+    leftListOfKv = new ArrayList<>();
+    listRightOfKv = new ArrayList<>();
+
+    expectedResult = new ArrayList<>();
+  }
+
+  @Test
+  public void testJoinOneToOneMapping() {
+    leftListOfKv.add(KV.of("Key1", 5L));
+    leftListOfKv.add(KV.of("Key2", 4L));
+    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+
+    listRightOfKv.add(KV.of("Key1", "foo"));
+    listRightOfKv.add(KV.of("Key2", "bar"));
+    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+
+    PCollection<KV<Long, String>> output = Join.rightOuterJoin(
+      leftCollection, rightCollection, -1L);
+
+    expectedResult.add(KV.of(5L, "foo"));
+    expectedResult.add(KV.of(4L, "bar"));
+    DataflowAssert.that(output).containsInAnyOrder(expectedResult);
+
+    p.run();
+  }
+
+  @Test
+  public void testJoinOneToManyMapping() {
+    leftListOfKv.add(KV.of("Key2", 4L));
+    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+
+    listRightOfKv.add(KV.of("Key2", "bar"));
+    listRightOfKv.add(KV.of("Key2", "gazonk"));
+    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+
+    PCollection<KV<Long, String>> output = Join.rightOuterJoin(
+      leftCollection, rightCollection, -1L);
+
+    expectedResult.add(KV.of(4L, "bar"));
+    expectedResult.add(KV.of(4L, "gazonk"));
+    DataflowAssert.that(output).containsInAnyOrder(expectedResult);
+
+    p.run();
+  }
+
+  @Test
+  public void testJoinManyToOneMapping() {
+    leftListOfKv.add(KV.of("Key2", 4L));
+    leftListOfKv.add(KV.of("Key2", 6L));
+    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+
+    listRightOfKv.add(KV.of("Key2", "bar"));
+    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+
+    PCollection<KV<Long, String>> output = Join.rightOuterJoin(
+      leftCollection, rightCollection, -1L);
+
+    expectedResult.add(KV.of(4L, "bar"));
+    expectedResult.add(KV.of(6L, "bar"));
+    DataflowAssert.that(output).containsInAnyOrder(expectedResult);
+
+    p.run();
+  }
+
+  @Test
+  public void testJoinNoneToOneMapping() {
+    leftListOfKv.add(KV.of("Key2", 4L));
+    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+
+    listRightOfKv.add(KV.of("Key3", "bar"));
+    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+
+    PCollection<KV<Long, String>> output = Join.rightOuterJoin(
+      leftCollection, rightCollection, -1L);
+
+    expectedResult.add(KV.of(-1L, "bar"));
+    DataflowAssert.that(output).containsInAnyOrder(expectedResult);
+    p.run();
+  }
+
+  @Test(expected = NullPointerException.class)
+  public void testJoinLeftCollectionNull() {
+    Join.rightOuterJoin(null, p.apply(Create.of(listRightOfKv)), "");
+  }
+
+  @Test(expected = NullPointerException.class)
+  public void testJoinRightCollectionNull() {
+    Join.rightOuterJoin(p.apply(Create.of(leftListOfKv)), null, -1L);
+  }
+
+  @Test(expected = NullPointerException.class)
+  public void testJoinNullValueIsNull() {
+    Join.rightOuterJoin(p.apply(Create.of(leftListOfKv)), p.apply(Create.of(listRightOfKv)), null);
+  }
+}

From 314596aebc956fc6ba60c89f833e7cea71b58524 Mon Sep 17 00:00:00 2001
From: Magnus Runesson <magru@spotify.com>
Date: Wed, 4 Mar 2015 13:19:48 +0100
Subject: [PATCH 0234/1541] Join key is keept

Decided to keep the key in after the join is done. Original usecase did not require the key.
It is a breaking change but worth doing.
---
 .../dataflow/sdk/transforms/join/Join.java    | 49 +++++++++++--------
 .../sdk/transforms/join/InnerJoinTest.java    | 22 ++++-----
 .../transforms/join/OuterLeftJoinTest.java    | 24 ++++-----
 .../transforms/join/OuterRightJoinTest.java   | 24 ++++-----
 4 files changed, 63 insertions(+), 56 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/Join.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/Join.java
index b3e3196105487..2ada8c9b0f8b5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/Join.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/Join.java
@@ -37,10 +37,12 @@ public class Join {
    * @param <K> Type of the key for both collections
    * @param <V1> Type of the values for the left collection.
    * @param <V2> Type of the values for the right collection.
-   * @return A joined collection of KV where Key is of type V1 and Value is type V2.
+   * @return A joined collection of KV where Key is the key and value is a
+   *         KV where Key is of type V1 and Value is type V2.
    */
-  public static <K, V1, V2> PCollection<KV<V1, V2>> innerJoin(
-    final PCollection<KV<K, V1>> leftCollection, final PCollection<KV<K, V2>> rightCollection) {
+  public static <K, V1, V2> PCollection<KV<K, KV<V1, V2>>> innerJoin(
+    final PCollection<KV<K, V1>> leftCollection, final PCollection<KV<K, V2>> rightCollection)
+  {
     Preconditions.checkNotNull(leftCollection);
     Preconditions.checkNotNull(rightCollection);
 
@@ -53,7 +55,7 @@ public static <K, V1, V2> PCollection<KV<V1, V2>> innerJoin(
         .apply(CoGroupByKey.<K>create());
 
     return coGbkResultCollection.apply(ParDo.of(
-      new DoFn<KV<K, CoGbkResult>, KV<V1, V2>>() {
+      new DoFn<KV<K, CoGbkResult>, KV<K, KV<V1, V2>>>() {
         @Override
         public void processElement(ProcessContext c) {
           KV<K, CoGbkResult> e = c.element();
@@ -63,13 +65,14 @@ public void processElement(ProcessContext c) {
 
           for (V1 leftValue : leftValuesIterable) {
             for (V2 rightValue : rightValuesIterable) {
-              c.output(KV.of(leftValue, rightValue));
+              c.output(KV.of(e.getKey(), KV.of(leftValue, rightValue)));
             }
           }
         }
       }))
-      .setCoder(KvCoder.of(((KvCoder) leftCollection.getCoder()).getValueCoder(),
-                           ((KvCoder) rightCollection.getCoder()).getValueCoder()));
+      .setCoder(KvCoder.of(((KvCoder) leftCollection.getCoder()).getKeyCoder(),
+                           KvCoder.of(((KvCoder) leftCollection.getCoder()).getValueCoder(),
+                                      ((KvCoder) rightCollection.getCoder()).getValueCoder())));
   }
 
   /**
@@ -80,10 +83,11 @@ public void processElement(ProcessContext c) {
    * @param <K> Type of the key for both collections
    * @param <V1> Type of the values for the left collection.
    * @param <V2> Type of the values for the right collection.
-   * @return A joined collection of KV where Key is of type V1 and Value is type V2. Values that
+   * @return A joined collection of KV where Key is the key and value is a
+   *         KV where Key is of type V1 and Value is type V2. Values that
    *         should be null or empty is replaced with nullValue.
    */
-  public static <K, V1, V2> PCollection<KV<V1, V2>> leftOuterJoin(
+  public static <K, V1, V2> PCollection<KV<K, KV<V1, V2>>> leftOuterJoin(
     final PCollection<KV<K, V1>> leftCollection,
     final PCollection<KV<K, V2>> rightCollection,
     final V2 nullValue) {
@@ -100,7 +104,7 @@ public static <K, V1, V2> PCollection<KV<V1, V2>> leftOuterJoin(
         .apply(CoGroupByKey.<K>create());
 
     return coGbkResultCollection.apply(ParDo.of(
-      new DoFn<KV<K, CoGbkResult>, KV<V1, V2>>() {
+      new DoFn<KV<K, CoGbkResult>, KV<K, KV<V1, V2>>>() {
         @Override
         public void processElement(ProcessContext c) {
           KV<K, CoGbkResult> e = c.element();
@@ -111,16 +115,17 @@ public void processElement(ProcessContext c) {
           for (V1 leftValue : leftValuesIterable) {
             if (rightValuesIterable.iterator().hasNext()) {
               for (V2 rightValue : rightValuesIterable) {
-                c.output(KV.of(leftValue, rightValue));
+                c.output(KV.of(e.getKey(), KV.of(leftValue, rightValue)));
               }
             } else {
-              c.output(KV.of(leftValue, nullValue));
+              c.output(KV.of(e.getKey(), KV.of(leftValue, nullValue)));
             }
           }
         }
       }))
-      .setCoder(KvCoder.of(((KvCoder) leftCollection.getCoder()).getValueCoder(),
-                           ((KvCoder) rightCollection.getCoder()).getValueCoder()));
+      .setCoder(KvCoder.of(((KvCoder) leftCollection.getCoder()).getKeyCoder(),
+                           KvCoder.of(((KvCoder) leftCollection.getCoder()).getValueCoder(),
+                                      ((KvCoder) rightCollection.getCoder()).getValueCoder())));
   }
 
   /**
@@ -131,10 +136,11 @@ public void processElement(ProcessContext c) {
    * @param <K> Type of the key for both collections
    * @param <V1> Type of the values for the left collection.
    * @param <V2> Type of the values for the right collection.
-   * @return A joined collection of KV where Key is of type V1 and Value is type V2. Keys that
+   * @return A joined collection of KV where Key is the key and value is a
+   *         KV where Key is of type V1 and Value is type V2. Keys that
    *         should be null or empty is replaced with nullValue.
    */
-  public static <K, V1, V2> PCollection<KV<V1, V2>> rightOuterJoin(
+  public static <K, V1, V2> PCollection<KV<K, KV<V1, V2>>> rightOuterJoin(
     final PCollection<KV<K, V1>> leftCollection,
     final PCollection<KV<K, V2>> rightCollection,
     final V1 nullValue) {
@@ -151,7 +157,7 @@ public static <K, V1, V2> PCollection<KV<V1, V2>> rightOuterJoin(
         .apply(CoGroupByKey.<K>create());
 
     return coGbkResultCollection.apply(ParDo.of(
-      new DoFn<KV<K, CoGbkResult>, KV<V1, V2>>() {
+      new DoFn<KV<K, CoGbkResult>, KV<K, KV<V1, V2>>>() {
         @Override
         public void processElement(ProcessContext c) {
           KV<K, CoGbkResult> e = c.element();
@@ -162,15 +168,16 @@ public void processElement(ProcessContext c) {
           for (V2 rightValue : rightValuesIterable) {
             if (leftValuesIterable.iterator().hasNext()) {
               for (V1 leftValue : leftValuesIterable) {
-                c.output(KV.of(leftValue, rightValue));
+                c.output(KV.of(e.getKey(), KV.of(leftValue, rightValue)));
               }
             } else {
-              c.output(KV.of(nullValue, rightValue));
+              c.output(KV.of(e.getKey(), KV.of(nullValue, rightValue)));
             }
           }
         }
       }))
-      .setCoder(KvCoder.of(((KvCoder) leftCollection.getCoder()).getValueCoder(),
-                           ((KvCoder) rightCollection.getCoder()).getValueCoder()));
+      .setCoder(KvCoder.of(((KvCoder) leftCollection.getCoder()).getKeyCoder(),
+                           KvCoder.of(((KvCoder) leftCollection.getCoder()).getValueCoder(),
+                                      ((KvCoder) rightCollection.getCoder()).getValueCoder())));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/InnerJoinTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/InnerJoinTest.java
index 900835f172cf4..574a19323b6c0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/InnerJoinTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/InnerJoinTest.java
@@ -37,7 +37,7 @@ public class InnerJoinTest {
   Pipeline p;
   List<KV<String, Long>> leftListOfKv;
   List<KV<String, String>> listRightOfKv;
-  List<KV<Long, String>> expectedResult;
+  List<KV<String, KV<Long, String>>> expectedResult;
 
   @Before
   public void setup() {
@@ -59,11 +59,11 @@ public void testJoinOneToOneMapping() {
     listRightOfKv.add(KV.of("Key2", "bar"));
     PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
 
-    PCollection<KV<Long, String>> output = Join.innerJoin(
+    PCollection<KV<String, KV<Long, String>>> output = Join.innerJoin(
       leftCollection, rightCollection);
 
-    expectedResult.add(KV.of(5L, "foo"));
-    expectedResult.add(KV.of(4L, "bar"));
+    expectedResult.add(KV.of("Key1", KV.of(5L, "foo")));
+    expectedResult.add(KV.of("Key2", KV.of(4L, "bar")));
     DataflowAssert.that(output).containsInAnyOrder(expectedResult);
 
     p.run();
@@ -78,11 +78,11 @@ public void testJoinOneToManyMapping() {
     listRightOfKv.add(KV.of("Key2", "gazonk"));
     PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
 
-    PCollection<KV<Long, String>> output = Join.innerJoin(
+    PCollection<KV<String, KV<Long, String>>> output = Join.innerJoin(
       leftCollection, rightCollection);
 
-    expectedResult.add(KV.of(4L, "bar"));
-    expectedResult.add(KV.of(4L, "gazonk"));
+    expectedResult.add(KV.of("Key2", KV.of(4L, "bar")));
+    expectedResult.add(KV.of("Key2", KV.of(4L, "gazonk")));
     DataflowAssert.that(output).containsInAnyOrder(expectedResult);
 
     p.run();
@@ -97,11 +97,11 @@ public void testJoinManyToOneMapping() {
     listRightOfKv.add(KV.of("Key2", "bar"));
     PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
 
-    PCollection<KV<Long, String>> output = Join.innerJoin(
+    PCollection<KV<String, KV<Long, String>>> output = Join.innerJoin(
       leftCollection, rightCollection);
 
-    expectedResult.add(KV.of(4L, "bar"));
-    expectedResult.add(KV.of(6L, "bar"));
+    expectedResult.add(KV.of("Key2", KV.of(4L, "bar")));
+    expectedResult.add(KV.of("Key2", KV.of(6L, "bar")));
     DataflowAssert.that(output).containsInAnyOrder(expectedResult);
 
     p.run();
@@ -115,7 +115,7 @@ public void testJoinNoneToNoneMapping() {
     listRightOfKv.add(KV.of("Key3", "bar"));
     PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
 
-    PCollection<KV<Long, String>> output = Join.innerJoin(
+    PCollection<KV<String, KV<Long, String>>> output = Join.innerJoin(
       leftCollection, rightCollection);
 
     DataflowAssert.that(output).containsInAnyOrder(expectedResult);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterLeftJoinTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterLeftJoinTest.java
index 81c27157cf97b..ce8d531b6f3c1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterLeftJoinTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterLeftJoinTest.java
@@ -38,7 +38,7 @@ public class OuterLeftJoinTest {
   Pipeline p;
   List<KV<String, Long>> leftListOfKv;
   List<KV<String, String>> listRightOfKv;
-  List<KV<Long, String>> expectedResult;
+  List<KV<String, KV<Long, String>>> expectedResult;
 
   @Before
   public void setup() {
@@ -60,11 +60,11 @@ public void testJoinOneToOneMapping() {
     listRightOfKv.add(KV.of("Key2", "bar"));
     PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
 
-    PCollection<KV<Long, String>> output = Join.leftOuterJoin(
+    PCollection<KV<String, KV<Long, String>>> output = Join.leftOuterJoin(
       leftCollection, rightCollection, "");
 
-    expectedResult.add(KV.of(5L, "foo"));
-    expectedResult.add(KV.of(4L, "bar"));
+    expectedResult.add(KV.of("Key1", KV.of(5L, "foo")));
+    expectedResult.add(KV.of("Key2", KV.of(4L, "bar")));
     DataflowAssert.that(output).containsInAnyOrder(expectedResult);
 
     p.run();
@@ -79,11 +79,11 @@ public void testJoinOneToManyMapping() {
     listRightOfKv.add(KV.of("Key2", "gazonk"));
     PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
 
-    PCollection<KV<Long, String>> output = Join.leftOuterJoin(
+    PCollection<KV<String, KV<Long, String>>> output = Join.leftOuterJoin(
       leftCollection, rightCollection, "");
 
-    expectedResult.add(KV.of(4L, "bar"));
-    expectedResult.add(KV.of(4L, "gazonk"));
+    expectedResult.add(KV.of("Key2", KV.of(4L, "bar")));
+    expectedResult.add(KV.of("Key2", KV.of(4L, "gazonk")));
     DataflowAssert.that(output).containsInAnyOrder(expectedResult);
 
     p.run();
@@ -98,11 +98,11 @@ public void testJoinManyToOneMapping() {
     listRightOfKv.add(KV.of("Key2", "bar"));
     PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
 
-    PCollection<KV<Long, String>> output = Join.leftOuterJoin(
+    PCollection<KV<String, KV<Long, String>>> output = Join.leftOuterJoin(
       leftCollection, rightCollection, "");
 
-    expectedResult.add(KV.of(4L, "bar"));
-    expectedResult.add(KV.of(6L, "bar"));
+    expectedResult.add(KV.of("Key2", KV.of(4L, "bar")));
+    expectedResult.add(KV.of("Key2", KV.of(6L, "bar")));
     DataflowAssert.that(output).containsInAnyOrder(expectedResult);
 
     p.run();
@@ -116,10 +116,10 @@ public void testJoinOneToNoneMapping() {
     listRightOfKv.add(KV.of("Key3", "bar"));
     PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
 
-    PCollection<KV<Long, String>> output = Join.leftOuterJoin(
+    PCollection<KV<String, KV<Long, String>>> output = Join.leftOuterJoin(
       leftCollection, rightCollection, "");
 
-    expectedResult.add(KV.of(4L, ""));
+    expectedResult.add(KV.of("Key2", KV.of(4L, "")));
     DataflowAssert.that(output).containsInAnyOrder(expectedResult);
     p.run();
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterRightJoinTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterRightJoinTest.java
index 5d668aaed4d5a..f6c245d7afdfc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterRightJoinTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterRightJoinTest.java
@@ -38,7 +38,7 @@ public class OuterRightJoinTest {
   Pipeline p;
   List<KV<String, Long>> leftListOfKv;
   List<KV<String, String>> listRightOfKv;
-  List<KV<Long, String>> expectedResult;
+  List<KV<String, KV<Long, String>>> expectedResult;
 
   @Before
   public void setup() {
@@ -60,11 +60,11 @@ public void testJoinOneToOneMapping() {
     listRightOfKv.add(KV.of("Key2", "bar"));
     PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
 
-    PCollection<KV<Long, String>> output = Join.rightOuterJoin(
+    PCollection<KV<String, KV<Long, String>>> output = Join.rightOuterJoin(
       leftCollection, rightCollection, -1L);
 
-    expectedResult.add(KV.of(5L, "foo"));
-    expectedResult.add(KV.of(4L, "bar"));
+    expectedResult.add(KV.of("Key1", KV.of(5L, "foo")));
+    expectedResult.add(KV.of("Key2", KV.of(4L, "bar")));
     DataflowAssert.that(output).containsInAnyOrder(expectedResult);
 
     p.run();
@@ -79,11 +79,11 @@ public void testJoinOneToManyMapping() {
     listRightOfKv.add(KV.of("Key2", "gazonk"));
     PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
 
-    PCollection<KV<Long, String>> output = Join.rightOuterJoin(
+    PCollection<KV<String, KV<Long, String>>> output = Join.rightOuterJoin(
       leftCollection, rightCollection, -1L);
 
-    expectedResult.add(KV.of(4L, "bar"));
-    expectedResult.add(KV.of(4L, "gazonk"));
+    expectedResult.add(KV.of("Key2", KV.of(4L, "bar")));
+    expectedResult.add(KV.of("Key2", KV.of(4L, "gazonk")));
     DataflowAssert.that(output).containsInAnyOrder(expectedResult);
 
     p.run();
@@ -98,11 +98,11 @@ public void testJoinManyToOneMapping() {
     listRightOfKv.add(KV.of("Key2", "bar"));
     PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
 
-    PCollection<KV<Long, String>> output = Join.rightOuterJoin(
+    PCollection<KV<String, KV<Long, String>>> output = Join.rightOuterJoin(
       leftCollection, rightCollection, -1L);
 
-    expectedResult.add(KV.of(4L, "bar"));
-    expectedResult.add(KV.of(6L, "bar"));
+    expectedResult.add(KV.of("Key2", KV.of(4L, "bar")));
+    expectedResult.add(KV.of("Key2", KV.of(6L, "bar")));
     DataflowAssert.that(output).containsInAnyOrder(expectedResult);
 
     p.run();
@@ -116,10 +116,10 @@ public void testJoinNoneToOneMapping() {
     listRightOfKv.add(KV.of("Key3", "bar"));
     PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
 
-    PCollection<KV<Long, String>> output = Join.rightOuterJoin(
+    PCollection<KV<String, KV<Long, String>>> output = Join.rightOuterJoin(
       leftCollection, rightCollection, -1L);
 
-    expectedResult.add(KV.of(-1L, "bar"));
+    expectedResult.add(KV.of("Key3", KV.of(-1L, "bar")));
     DataflowAssert.that(output).containsInAnyOrder(expectedResult);
     p.run();
   }

From ecc44e1d9f444ea6b78d145896c4e668b9f038aa Mon Sep 17 00:00:00 2001
From: Magnus Runesson <magru@spotify.com>
Date: Mon, 9 Mar 2015 12:38:31 +0100
Subject: [PATCH 0235/1541] Move join code to contrib module join-library.
 Adding pom.xml and README.md.

---
 contrib/join-library/README.md                | 25 +++++
 contrib/join-library/pom.xml                  | 91 +++++++++++++++++++
 .../dataflow/contrib/joinlibrary}/Join.java   |  5 +-
 .../contrib/joinlibrary}/InnerJoinTest.java   |  2 +-
 .../joinlibrary}/OuterLeftJoinTest.java       |  2 +-
 .../joinlibrary}/OuterRightJoinTest.java      |  2 +-
 6 files changed, 123 insertions(+), 4 deletions(-)
 create mode 100644 contrib/join-library/README.md
 create mode 100644 contrib/join-library/pom.xml
 rename {sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join => contrib/join-library/src/main/java/com/google/cloud/dataflow/contrib/joinlibrary}/Join.java (96%)
 rename {sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join => contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary}/InnerJoinTest.java (98%)
 rename {sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join => contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary}/OuterLeftJoinTest.java (98%)
 rename {sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join => contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary}/OuterRightJoinTest.java (98%)

diff --git a/contrib/join-library/README.md b/contrib/join-library/README.md
new file mode 100644
index 0000000000000..b5766df50c6fd
--- /dev/null
+++ b/contrib/join-library/README.md
@@ -0,0 +1,25 @@
+Join-library
+============
+
+Join-library provide inner join, outer left and right join functions to
+Google DataFlow. The aim is to simplify the most common cases of join to a
+simple function call.
+
+The functions are generic so it supports join of any types supported by
+DataFlow. Input to the join functions are PCollections of Key/Values. Both the
+left and right PCollections need the same type for the key. All the join
+functions returns a Key/Value where Key is the join key and value is
+a Key/Value where the key is the left value and right is the value.
+
+In the cases of outer join, since null cannot be serialized the user have
+to provide a value that represent null for that particular use case.
+
+Example how to use join-library:
+
+    PCollection<KV<String, String>> leftPcollection = ...
+    PCollection<KV<String, Long>> rightPcollection = ...
+
+    PCollection<KV<String, KV<String, Long>>> joinedPcollection =
+      Join.innerJoin(leftPcollection, rightPcollection);
+
+Questions or comments: <M.Runesson@gmail.com>
diff --git a/contrib/join-library/pom.xml b/contrib/join-library/pom.xml
new file mode 100644
index 0000000000000..2ae8bad3a2301
--- /dev/null
+++ b/contrib/join-library/pom.xml
@@ -0,0 +1,91 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <groupId>com.google.cloud.dataflow</groupId>
+  <artifactId>join-library</artifactId>
+  <name>Join library</name>
+  <description>Library with generic join functions for DataFlow.</description>
+  <version>0.0.2-SNAPSHOT</version>
+  <packaging>jar</packaging>
+
+  <licenses>
+    <license>
+      <name>Apache License, Version 2.0</name>
+      <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+      <distribution>repo</distribution>
+    </license>
+  </licenses>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <google-cloud-dataflow-version>0.3.150227</google-cloud-dataflow-version>
+  </properties>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>3.2</version>
+        <configuration>
+          <source>1.7</source>
+          <target>1.7</target>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+        <version>2.12</version>
+        <configuration>
+          <configLocation>../../checkstyle.xml</configLocation>
+          <consoleOutput>true</consoleOutput>
+          <failOnViolation>true</failOnViolation>
+          <includeTestSourceDirectory>true</includeTestSourceDirectory>
+        </configuration>
+        <executions>
+          <execution>
+            <phase>validate</phase>
+            <goals>
+              <goal>check</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+  <dependencies>
+    <dependency>
+      <groupId>com.google.cloud.dataflow</groupId>
+      <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
+      <version>${google-cloud-dataflow-version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>18.0</version>
+    </dependency>
+
+    <!-- Dependency for tests -->
+    <dependency>
+      <groupId>org.hamcrest</groupId>
+      <artifactId>hamcrest-all</artifactId>
+      <version>1.3</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>4.12</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+</project>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/Join.java b/contrib/join-library/src/main/java/com/google/cloud/dataflow/contrib/joinlibrary/Join.java
similarity index 96%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/Join.java
rename to contrib/join-library/src/main/java/com/google/cloud/dataflow/contrib/joinlibrary/Join.java
index 2ada8c9b0f8b5..e13f9747c7942 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/Join.java
+++ b/contrib/join-library/src/main/java/com/google/cloud/dataflow/contrib/joinlibrary/Join.java
@@ -14,11 +14,14 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.transforms.join;
+package com.google.cloud.dataflow.contrib.joinlibrary;
 
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.join.CoGbkResult;
+import com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.join.KeyedPCollectionTuple;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/InnerJoinTest.java b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/InnerJoinTest.java
similarity index 98%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/InnerJoinTest.java
rename to contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/InnerJoinTest.java
index 574a19323b6c0..c031cba7956a4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/InnerJoinTest.java
+++ b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/InnerJoinTest.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.transforms.join;
+package com.google.cloud.dataflow.contrib.joinlibrary;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterLeftJoinTest.java b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterLeftJoinTest.java
similarity index 98%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterLeftJoinTest.java
rename to contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterLeftJoinTest.java
index ce8d531b6f3c1..c342ef706b0e1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterLeftJoinTest.java
+++ b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterLeftJoinTest.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.transforms.join;
+package com.google.cloud.dataflow.contrib.joinlibrary;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterRightJoinTest.java b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterRightJoinTest.java
similarity index 98%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterRightJoinTest.java
rename to contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterRightJoinTest.java
index f6c245d7afdfc..23bdf301dc90a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/OuterRightJoinTest.java
+++ b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterRightJoinTest.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.transforms.join;
+package com.google.cloud.dataflow.contrib.joinlibrary;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;

From 87dd6286db01702ab29741c54474254fc634f2b7 Mon Sep 17 00:00:00 2001
From: Magnus Runesson <magru@spotify.com>
Date: Mon, 9 Mar 2015 13:01:04 +0100
Subject: [PATCH 0236/1541] Gitignor should not be changed in this pullrequest.

---
 .gitignore | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index e02456b1ee1f0..2f7896d1d1365 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1 @@
 target/
-.idea
-*.iml

From a99854466247bf541b6e288c6a7ac3fb8d1ba96f Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Mon, 9 Mar 2015 10:46:28 -0700
Subject: [PATCH 0237/1541] Update comment in .gitignore.

---
 .gitignore | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 42ce01d2ce26b..1fed5fc45db84 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,5 @@
 target/
 
-# Ignore Intellij files
-.idea
+# Ignore IntelliJ files.
+.idea/
 *.iml

From 70b0005485a96609fb1e48d92f0cd891968df2c8 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Fri, 6 Mar 2015 11:16:01 -0800
Subject: [PATCH 0238/1541] Explicitly check cached mapTaskExecutor's all
 operations could restart. Previously it fails at operation level.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87960788
---
 .../sdk/runners/worker/StreamingDataflowWorker.java      | 4 +++-
 .../dataflow/sdk/util/common/worker/MapTaskExecutor.java | 9 +++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 05c7722aeb58b..e8c2758bb37f6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -34,11 +34,11 @@
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
+import com.google.common.base.Preconditions;
 
 import org.eclipse.jetty.server.Request;
 import org.eclipse.jetty.server.Server;
 import org.eclipse.jetty.server.handler.AbstractHandler;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -331,6 +331,8 @@ private void process(
       if (workerAndContext == null) {
         context = new StreamingModeExecutionContext(computation, stateFetcher);
         worker = MapTaskExecutorFactory.create(options, mapTask, context);
+        Preconditions.checkState(worker.supportsRestart(),
+            "Streaming runner requires all operations support restart.");
       } else {
         worker = workerAndContext.getWorker();
         context = workerAndContext.getContext();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
index 2da7f9a21998b..8f83e4bdc7f21 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
@@ -107,4 +107,13 @@ public void close() throws Exception {
     stateSampler.close();
     super.close();
   }
+
+  public boolean supportsRestart() {
+    for (Operation op : operations) {
+      if (!op.supportsRestart()) {
+        return false;
+      }
+    }
+    return true;
+  }
 }

From 29936902ea80079f2bfecdab47b0e77c9660c661 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Fri, 6 Mar 2015 12:18:43 -0800
Subject: [PATCH 0239/1541] Create: java sdk code and integration test for
 supporting Create in streaming.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87967100
---
 .../cloud/dataflow/sdk/io/PubsubIO.java       |  4 +-
 .../sdk/runners/DataflowPipelineRunner.java   |  3 +
 .../cloud/dataflow/sdk/transforms/Create.java | 83 ++++++++++++++++---
 3 files changed, 79 insertions(+), 11 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index f501a316eb6c0..af9c7b9d60b03 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -64,6 +64,7 @@ public class PubsubIO {
   private static final int PUBSUB_NAME_MAX_LENGTH = 255;
 
   private static final String SUBSCRIPTION_RANDOM_TEST_PREFIX = "_random/";
+  private static final String SUBSCRIPTION_STARTING_SIGNAL = "_starting_signal/";
   private static final String TOPIC_DEV_NULL_TEST_NAME = "/topics/dev/null";
 
   /**
@@ -85,7 +86,8 @@ public static void validateTopicName(String topic) {
     }
 
     public static void validateSubscriptionName(String subscription) {
-      if (subscription.startsWith(SUBSCRIPTION_RANDOM_TEST_PREFIX)) {
+      if (subscription.startsWith(SUBSCRIPTION_RANDOM_TEST_PREFIX)
+          || subscription.startsWith(SUBSCRIPTION_STARTING_SIGNAL)) {
         return;
       }
       Matcher match = SUBSCRIPTION_REGEXP.matcher(subscription);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index fffec136c5a20..4283614c5e5bc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.DataflowReleaseInfo;
@@ -150,6 +151,8 @@ public <Output extends POutput, Input extends PInput> Output apply(
       boolean runnerSortsByTimestamp = true;
       return (Output) ((GroupByKey) transform).applyHelper(
           (PCollection<?>) input, options.isStreaming(), runnerSortsByTimestamp);
+    } else if (transform instanceof Create) {
+      return (Output) ((Create) transform).applyHelper(input, options.isStreaming());
     } else {
       return super.apply(transform, input);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index 05a2ebf43a958..6cdb11b342b4e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -17,10 +17,17 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -31,6 +38,7 @@
 
 import org.joda.time.Instant;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -184,9 +192,60 @@ public static <T> CreateTimestamped<T> timestamped(
 
   @Override
   public PCollection<T> apply(PInput input) {
-    return PCollection.<T>createPrimitiveOutputInternal(new GlobalWindows());
+    return applyHelper(input, false);
   }
 
+  public PCollection<T> applyHelper(PInput input, boolean isStreaming) {
+    if (isStreaming) {
+      Coder<T> elemCoder = (Coder<T>) getElementCoder();
+      return Pipeline.applyTransform(
+          input, PubsubIO.Read.named("StartingSignal").subscription("_starting_signal/"))
+          .apply(ParDo.of(new DoFn<String, KV<Void, Void>>() {
+            private static final long serialVersionUID = 0;
+
+            @Override
+            public void processElement(DoFn<String, KV<Void, Void>>.ProcessContext c)
+                throws Exception {
+              c.output(KV.of((Void) null, (Void) null));
+            }
+          }))
+          .apply(ParDo.of(new OutputOnceDoFn<>(elems, elemCoder)));
+    } else {
+      return PCollection.<T>createPrimitiveOutputInternal(new GlobalWindows());
+    }
+  }
+
+  private static class OutputOnceDoFn<T> extends DoFn<KV<Void, Void>, T>
+      implements DoFn.RequiresKeyedState {
+    private static final long serialVersionUID = 0;
+
+    private final CodedTupleTag<String> outputOnceTag =
+        CodedTupleTag.of("outputOnce", StringUtf8Coder.of());
+    private final byte[] encodedBytes;
+    private final IterableCoder<T> iterableCoder;
+
+    public OutputOnceDoFn(Iterable<T> elems, Coder<T> coder) {
+      this.iterableCoder = IterableCoder.of(coder);
+      try {
+        this.encodedBytes = CoderUtils.encodeToByteArray(iterableCoder, elems);
+      } catch (CoderException e) {
+        throw new IllegalArgumentException(
+            "Unable to encode element '" + elems + "' using coder '" + coder + "'.", e);
+      }
+    }
+
+    @Override
+    public void processElement(ProcessContext c) throws IOException {
+      String state = c.keyedState().lookup(outputOnceTag);
+      if (state == null || state.isEmpty()) {
+        Iterable<T> elems = CoderUtils.decodeFromByteArray(iterableCoder, encodedBytes);
+        for (T t : elems) {
+          c.output(t);
+        }
+        c.keyedState().store(outputOnceTag, "done");
+      }
+    }
+  }
 
   /////////////////////////////////////////////////////////////////////////////
 
@@ -207,8 +266,7 @@ public Iterable<T> getElements() {
     return elems;
   }
 
-  @Override
-  protected Coder<?> getDefaultOutputCoder() {
+  private Coder<?> getElementCoder() {
     // First try to deduce a coder using the types of the elements.
     Class<?> elementType = null;
     for (T elem : elems) {
@@ -222,7 +280,7 @@ protected Coder<?> getDefaultOutputCoder() {
       }
     }
     if (elementType == null) {
-      return super.getDefaultOutputCoder();
+      return null;
     }
     if (elementType.getTypeParameters().length == 0) {
       Coder<?> candidate = getCoderRegistry().getDefaultCoder(TypeToken.of(elementType));
@@ -242,11 +300,17 @@ protected Coder<?> getDefaultOutputCoder() {
         break;
       }
     }
-    if (coder != null) {
-      return coder;
-    }
+    return coder;
+  }
 
-    return super.getDefaultOutputCoder();
+  @Override
+  protected Coder<?> getDefaultOutputCoder() {
+    Coder<?> elemCoder = getElementCoder();
+    if (elemCoder == null) {
+      return super.getDefaultOutputCoder();
+    } else {
+      return elemCoder;
+    }
   }
 
   /**
@@ -279,13 +343,12 @@ public PCollection<T> apply(PBegin input) {
 
     private static class ConvertTimestamps<T> extends DoFn<TimestampedValue<T>, T> {
       @Override
-        public void processElement(ProcessContext c) {
+      public void processElement(ProcessContext c) {
         c.outputWithTimestamp(c.element().getValue(), c.element().getTimestamp());
       }
     }
   }
 
-
   /////////////////////////////////////////////////////////////////////////////
 
   static {

From dc816e3847e2bb33f49fed0cf8e9c22f3dbee70b Mon Sep 17 00:00:00 2001
From: jeremiele <jeremiele@google.com>
Date: Fri, 6 Mar 2015 14:12:06 -0800
Subject: [PATCH 0240/1541] Added a getter to access the WindowFn from the
 Window.Bound PTransform. This is needed for third party runner
 implementations wanting to support windowing. ----Release Notes---- []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=87978148

---
 .../cloud/dataflow/sdk/transforms/windowing/Window.java    | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 7e3a318eecf79..7521db8582baf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -189,6 +189,13 @@ public Bound<T> named(String name) {
       return new Bound<>(name, fn);
     }
 
+    /**
+     * Returns the user-specified {@code WindowFn}.
+     */
+    public WindowFn<? super T, ?> getWindowFn() {
+      return fn;
+    }
+
     @Override
     public PCollection<T> apply(PCollection<T> input) {
       return PCollection.<T>createPrimitiveOutputInternal(fn);

From c9f072d30262691ebf555c7b08ff527b5d22c8a5 Mon Sep 17 00:00:00 2001
From: sisk <sisk@google.com>
Date: Fri, 6 Mar 2015 15:27:27 -0800
Subject: [PATCH 0241/1541] Reorder the log line to move the user's message
 earlier in the log line, making it easier to read in the Cloud Logging UI.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=87985958
---
 .../logging/DataflowWorkerLoggingFormatter.java  |  6 +++---
 .../DataflowWorkerLoggingFormatterTest.java      | 16 ++++++++--------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
index 5af5cafa74aee..24a7e7ff82d2b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
@@ -91,12 +91,12 @@ public String format(LogRecord record) {
     return DATE_FORMATTER.print(record.getMillis())
         + " " + MoreObjects.firstNonNull(LEVELS.get(record.getLevel()),
                                          record.getLevel().getName())
-        + " " + MoreObjects.firstNonNull(jobId.get(), "unknown")
+        + " " + record.getMessage()
+        + " [" + MoreObjects.firstNonNull(jobId.get(), "unknown")
         + " " + MoreObjects.firstNonNull(workerId.get(), "unknown")
         + " " + MoreObjects.firstNonNull(workId.get(), "unknown")
         + " " + record.getThreadID()
-        + " " + record.getLoggerName()
-        + " " + record.getMessage() + System.lineSeparator()
+        + "] " + record.getLoggerName() + System.lineSeparator()
         + (exception != null ? exception : "");
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java
index 916a38567d58f..97650831acaac 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java
@@ -37,8 +37,8 @@ public class DataflowWorkerLoggingFormatterTest {
   @Test
   public void testWithUnsetValuesInMDC() {
     assertEquals(
-        "1970-01-01T00:00:00.001Z INFO unknown unknown unknown 2 LoggerName "
-        + "test.message" + System.lineSeparator(),
+        "1970-01-01T00:00:00.001Z INFO test.message [unknown unknown unknown 2] LoggerName"
+        + System.lineSeparator(),
         new DataflowWorkerLoggingFormatter().format(
             createLogRecord("test.message", null)));
   }
@@ -50,8 +50,8 @@ public void testWithMessage() {
     DataflowWorkerLoggingFormatter.setWorkId("testWorkId");
 
     assertEquals(
-        "1970-01-01T00:00:00.001Z INFO testJobId testWorkerId testWorkId 2 LoggerName "
-        + "test.message" + System.lineSeparator(),
+        "1970-01-01T00:00:00.001Z INFO test.message [testJobId testWorkerId testWorkId 2] "
+        + "LoggerName" + System.lineSeparator(),
         new DataflowWorkerLoggingFormatter().format(
             createLogRecord("test.message", null)));
   }
@@ -63,8 +63,8 @@ public void testWithMessageAndException() {
     DataflowWorkerLoggingFormatter.setWorkId("testWorkId");
 
     assertEquals(
-        "1970-01-01T00:00:00.001Z INFO testJobId testWorkerId testWorkId 2 LoggerName "
-        + "test.message" + System.lineSeparator()
+        "1970-01-01T00:00:00.001Z INFO test.message [testJobId testWorkerId testWorkId 2] "
+        + "LoggerName" + System.lineSeparator()
         + "java.lang.Throwable: exception.test.message" + System.lineSeparator()
         + "\tat declaringClass1.method1(file1.java:1)" + System.lineSeparator()
         + "\tat declaringClass2.method2(file2.java:1)" + System.lineSeparator()
@@ -80,7 +80,7 @@ public void testWithException() {
     DataflowWorkerLoggingFormatter.setWorkId("testWorkId");
 
     assertEquals(
-        "1970-01-01T00:00:00.001Z INFO testJobId testWorkerId testWorkId 2 LoggerName null"
+        "1970-01-01T00:00:00.001Z INFO null [testJobId testWorkerId testWorkId 2] LoggerName"
         + System.lineSeparator()
         + "java.lang.Throwable: exception.test.message" + System.lineSeparator()
         + "\tat declaringClass1.method1(file1.java:1)" + System.lineSeparator()
@@ -97,7 +97,7 @@ public void testWithoutExceptionOrMessage() {
     DataflowWorkerLoggingFormatter.setWorkId("testWorkId");
 
     assertEquals(
-        "1970-01-01T00:00:00.001Z INFO testJobId testWorkerId testWorkId 2 LoggerName null"
+        "1970-01-01T00:00:00.001Z INFO null [testJobId testWorkerId testWorkId 2] LoggerName"
         + System.lineSeparator(),
         new DataflowWorkerLoggingFormatter().format(
             createLogRecord(null, null)));

From 6d0d1cd28280313a51283adef899164a42bf3bd0 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Sun, 8 Mar 2015 18:22:15 -0700
Subject: [PATCH 0242/1541] Adds a method to IOChannelFactory that can be used
 to efficiently determine if a read channel created using the factory is
 seekable.

This is a backward incompatible change.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88082111
---
 .../sdk/util/FileIOChannelFactory.java        |  5 ++++
 .../sdk/util/GcsIOChannelFactory.java         |  6 +++++
 .../dataflow/sdk/util/IOChannelFactory.java   | 26 ++++++++++++++-----
 3 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
index b43924273c98a..82a3f9ea37521 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
@@ -99,4 +99,9 @@ public WritableByteChannel create(String spec, String mimeType)
   public long getSizeBytes(String spec) throws IOException {
     return Files.size(FileSystems.getDefault().getPath(spec));
   }
+
+  @Override
+  public boolean isReadSeekEfficient(String spec) throws IOException {
+    return true;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java
index c0bb4624e1f5c..2f90428daa282 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java
@@ -72,4 +72,10 @@ public long getSizeBytes(String spec) throws IOException {
     GcsUtil util = options.getGcsUtil();
     return util.fileSize(path);
   }
+
+  @Override
+  public boolean isReadSeekEfficient(String spec) throws IOException {
+    // TODO It is incorrect to return true here for files with content encoding set to gzip.
+    return true;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
index 3661ffd3b1208..9e37907d233d9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
@@ -35,20 +35,20 @@ public interface IOChannelFactory {
    * Matches a specification, which may contain globs, against available
    * resources.
    *
-   * Glob handling is dependent on the implementation.  Implementations should
+   * <p>Glob handling is dependent on the implementation.  Implementations should
    * all support globs in the final component of a path (eg /foo/bar/*.txt),
    * however they are not required to support globs in the directory paths.
    *
-   * The result is the (possibly empty) set of specifications which match.
+   * <p>The result is the (possibly empty) set of specifications which match.
    */
   Collection<String> match(String spec) throws IOException;
 
   /**
    * Returns a read channel for the given specification.
    *
-   * The specification is not expanded; it is used verbatim.
+   * <p>The specification is not expanded; it is used verbatim.
    *
-   * If seeking is supported, then this returns a
+   * <p>If seeking is supported, then this returns a
    * {@link java.nio.channels.SeekableByteChannel}.
    */
   ReadableByteChannel open(String spec) throws IOException;
@@ -56,14 +56,28 @@ public interface IOChannelFactory {
   /**
    * Returns a write channel for the given specification.
    *
-   * The specification is not expanded; is it used verbatim.
+   * <p>The specification is not expanded; is it used verbatim.
    */
   WritableByteChannel create(String spec, String mimeType) throws IOException;
 
   /**
    * Returns the size in bytes for the given specification.
    *
-   * The specification is not expanded; it is used verbatim.
+   * <p>The specification is not expanded; it is used verbatim.
    */
   long getSizeBytes(String spec) throws IOException;
+
+  /**
+   * Returns {@code true} if the channel created when invoking method {@link #open} for the given
+   * file specification is guaranteed to be of type {@link java.nio.channels.SeekableByteChannel
+   * SeekableByteChannel} and if seeking into positions of the channel is recommended. Returns
+   * {@code false} if the channel returned is not a {@code SeekableByteChannel}. May return
+   * {@code false} even if the channel returned is a {@code SeekableByteChannel}, if seeking is not
+   * efficient for the given file specification.
+   *
+   * <p> Only efficiently seekable files can be split into offset ranges.
+   *
+   * <p>The specification is not expanded; it is used verbatim.
+   */
+  boolean isReadSeekEfficient(String spec) throws IOException;
 }

From 73dc3715c382a2441895b8556d2e2553840ad8c9 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 9 Mar 2015 14:32:08 -0700
Subject: [PATCH 0243/1541] Cleanup warnings about use of generics in
 GroupAlsoByWindowsDoFn.

There were a bunch of warnings around unsafe casts, etc. These look to be
safe, subject to assumptions about how the DoFn is being used. This change
attempts to put all the assumptions into the creation of the DoFn based on
presence of a combineFn and the type of windowFn being used.

It also documents the assumptions behind each of the @SuppressWarnings.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88152959
---
 .../worker/GroupAlsoByWindowsParDoFn.java     |   2 +-
 .../dataflow/sdk/transforms/GroupByKey.java   |   9 +-
 .../sdk/util/GroupAlsoByWindowsDoFn.java      | 290 +++++++++++-------
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  |   3 +-
 4 files changed, 184 insertions(+), 120 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index 8b74253bd7fa6..62c185a06f286 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -126,7 +126,7 @@ public DoFnInfo createDoFnInfo() {
         @Override
         public DoFnInfo createDoFnInfo() {
           return new DoFnInfo(
-              new GroupAlsoByWindowsDoFn(
+              GroupAlsoByWindowsDoFn.create(
                   (WindowFn) windowFn,
                   (KeyedCombineFn) combineFn,
                   elemCoder),
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 93d657a800cf2..1f7024d4314b9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -25,7 +25,6 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
@@ -254,10 +253,10 @@ public PCollection<KV<K, Iterable<V>>> apply(
       Coder<KV<K, Iterable<V>>> outputKvCoder =
           KvCoder.of(keyCoder, outputValueCoder);
 
-      return input.apply(ParDo.of(
-          new GroupAlsoByWindowsDoFn<K, V, Iterable<V>, BoundedWindow>(
-              (WindowFn) windowFn, null, inputIterableElementValueCoder)))
-          .setCoder(outputKvCoder);
+      GroupAlsoByWindowsDoFn<K, V, Iterable<V>, ?> fn =
+          GroupAlsoByWindowsDoFn.create(windowFn, inputIterableElementValueCoder);
+
+      return input.apply(ParDo.of(fn)).setCoder(outputKvCoder);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index b493e75d2dda6..896dc98b5d033 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -50,142 +50,207 @@
  * @param <W> window type
  */
 @SuppressWarnings("serial")
-public class GroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
+public abstract class GroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
     extends DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> {
-  // TODO: Add back RequiresKeyed state once that is supported.
 
-  protected WindowFn<?, W> windowFn;
-  protected KeyedCombineFn<K, ?, VI, ?> combineFn;
-  protected Coder<VI> inputCoder;
-
-  public GroupAlsoByWindowsDoFn(
-      WindowFn<?, W> windowFn,
-      KeyedCombineFn<K, ?, VI, ?> combineFn,
-      Coder<VI> inputCoder) {
-    this.windowFn = windowFn;
-    this.combineFn = combineFn;
-    this.inputCoder = inputCoder;
-  }
-
-  @Override
-  public void processElement(ProcessContext processContext) throws Exception {
-    DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context =
-        (DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>) processContext;
-
-    if (windowFn instanceof NonMergingWindowFn && combineFn == null) {
-      processElementViaIterators(context);
+  /**
+   * Create a {@link GroupAlsoByWindowsDoFn} without a combine function. Depending on the
+   * {@code windowFn} this will either use iterators or window sets to implement the grouping.
+   *
+   * @param windowFn The window function to use for grouping
+   * @param inputCoder the input coder to use
+   */
+  public static <K, V, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W>
+  create(final WindowFn<?, W> windowFn, final Coder<V> inputCoder) {
+    if (windowFn instanceof NonMergingWindowFn) {
+      return new GABWViaIteratorsDoFn<K, V, W>();
     } else {
-      processElementViaWindowSet(context);
+      return new GABWViaWindowSetDoFn<K, V, Iterable<V>, W>(windowFn) {
+        @Override
+        AbstractWindowSet<K, V, Iterable<V>, W> createWindowSet(K key,
+            DoFnProcessContext<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>> context,
+            BatchActiveWindowManager<W> activeWindowManager) throws Exception {
+          return  new BufferingWindowSet<K, V, W>(key, windowFn, inputCoder,
+              context, activeWindowManager);
+        }
+      };
     }
   }
 
-  private void processElementViaWindowSet(
-      DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context)
-      throws Exception {
+  /**
+   * Create a {@link GroupAlsoByWindowsDoFn} using the specified combineFn.
+   */
+  private static <K, VA, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, VA, VA, W>
+  createCombine(final WindowFn<?, W> windowFn,
+      final KeyedCombineFn<K, ?, VA, ?> combineFn,
+      final Coder<VA> inputCoder) {
+    return new GABWViaWindowSetDoFn<K, VA, VA, W>(windowFn) {
+    @Override
+    AbstractWindowSet<K, VA, VA, W> createWindowSet(K key,
+        DoFnProcessContext<KV<K, Iterable<WindowedValue<VA>>>, KV<K, VA>> context,
+        BatchActiveWindowManager<W> activeWindowManager) throws Exception {
+        return new CombiningWindowSet<K, VA, W>(
+            key, windowFn, combineFn, inputCoder, context, activeWindowManager);
+      }
+    };
+  }
 
-    K key = context.element().getKey();
-    BatchActiveWindowManager<W> activeWindowManager = new BatchActiveWindowManager<>();
-    AbstractWindowSet<K, VI, VO, W> windowSet;
+  /**
+   * Construct a {@link GroupAlsoByWindowsDoFn} using the {@code combineFn} if available.
+   */
+  public static <K, VI, VO, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, VI, VO, W>
+  create(WindowFn<?, W> windowFn, KeyedCombineFn<K, ?, VI, ?> combineFn,
+      Coder<VI> inputCoder) {
     if (combineFn == null) {
-      windowSet = new BufferingWindowSet(
-          key, windowFn, inputCoder, context, activeWindowManager);
+      // Without combineFn, it should be the case that VO = Iterable<VI>, so this is safe
+      @SuppressWarnings("unchecked")
+      GroupAlsoByWindowsDoFn<K, VI, VO, W> fn =
+          (GroupAlsoByWindowsDoFn<K, VI, VO, W>) create(windowFn, inputCoder);
+      return fn;
     } else {
-      windowSet = new CombiningWindowSet(
-          key, windowFn, combineFn, inputCoder, context, activeWindowManager);
+      // With a combineFn, then VI = VO, and we'll use those as the type of the accumulator
+      @SuppressWarnings("unchecked")
+      GroupAlsoByWindowsDoFn<K, VI, VO, W> fn =
+          (GroupAlsoByWindowsDoFn<K, VI, VO, W>) createCombine(windowFn, combineFn, inputCoder);
+      return fn;
     }
+  }
 
-    for (WindowedValue<VI> e : context.element().getValue()) {
-      for (BoundedWindow window : e.getWindows()) {
-        windowSet.put((W) window, e.getValue());
+  private static class GABWViaIteratorsDoFn<K, V, W extends BoundedWindow>
+      extends GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W> {
+
+    @Override
+    public void processElement(ProcessContext c) throws Exception {
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      DoFnProcessContext<?, KV<K, Iterable<V>>> internal = (DoFnProcessContext) c;
+
+      K key = c.element().getKey();
+      Iterable<WindowedValue<V>> value = c.element().getValue();
+      PeekingReiterator<WindowedValue<V>> iterator;
+
+      if (value instanceof Collection) {
+        iterator = new PeekingReiterator<>(new ListReiterator<WindowedValue<V>>(
+            new ArrayList<WindowedValue<V>>((Collection<WindowedValue<V>>) value), 0));
+      } else if (value instanceof Reiterable) {
+        iterator = new PeekingReiterator<>(((Reiterable<WindowedValue<V>>) value).iterator());
+      } else {
+        throw new IllegalArgumentException(
+            "Input to GroupAlsoByWindowsDoFn must be a Collection or Reiterable");
       }
-      ((WindowFn<Object, W>) windowFn)
-        .mergeWindows(new AbstractWindowSet.WindowMergeContext<Object, W>(windowSet, windowFn));
 
-      maybeOutputWindows(activeWindowManager, windowSet, windowFn, e.getTimestamp());
-    }
+      // This ListMultimap is a map of window maxTimestamps to the list of active
+      // windows with that maxTimestamp.
+      ListMultimap<Instant, BoundedWindow> windows = ArrayListMultimap.create();
 
-    maybeOutputWindows(activeWindowManager, windowSet, windowFn, null);
+      while (iterator.hasNext()) {
+        WindowedValue<V> e = iterator.peek();
+        for (BoundedWindow window : e.getWindows()) {
+          // If this window is not already in the active set, emit a new WindowReiterable
+          // corresponding to this window, starting at this element in the input Reiterable.
+          if (!windows.containsEntry(window.maxTimestamp(), window)) {
+            // Iterating through the WindowReiterable may advance iterator as an optimization
+            // for as long as it detects that there are no new windows.
+            windows.put(window.maxTimestamp(), window);
+            internal.outputWindowedValue(
+                KV.of(key, (Iterable<V>) new WindowReiterable<V>(iterator, window)),
+                window.maxTimestamp(),
+                Arrays.asList(window));
+          }
+        }
+        // Copy the iterator in case the next DoFn cached its version of the iterator instead
+        // of immediately iterating through it.
+        // And, only advance the iterator if the consuming operation hasn't done so.
+        iterator = iterator.copy();
+        if (iterator.hasNext() && iterator.peek() == e) {
+          iterator.next();
+        }
 
-    windowSet.flush();
+        // Remove all windows with maxTimestamp behind the current timestamp.
+        Iterator<Instant> windowIterator = windows.keys().iterator();
+        while (windowIterator.hasNext()
+            && windowIterator.next().isBefore(e.getTimestamp())) {
+          windowIterator.remove();
+        }
+      }
+    }
   }
 
-  /**
-   * Outputs any windows that are complete, with their corresponding elemeents.
-   * If there are potentially complete windows, try merging windows first.
-   */
-  private void maybeOutputWindows(
-      BatchActiveWindowManager<W> activeWindowManager,
-      AbstractWindowSet<?, ?, ?, W> windowSet,
-      WindowFn<?, W> windowFn,
-      Instant nextTimestamp) throws Exception {
-    if (activeWindowManager.hasMoreWindows()
-        && (nextTimestamp == null
-            || activeWindowManager.nextTimestamp().isBefore(nextTimestamp))) {
-      // There is at least one window ready to emit.  Merge now in case that window should be merged
-      // into a not yet completed one.
-      ((WindowFn<Object, W>) windowFn)
-        .mergeWindows(new AbstractWindowSet.WindowMergeContext<Object, W>(windowSet, windowFn));
-    }
+  private abstract static class GABWViaWindowSetDoFn<K, VI, VO, W extends BoundedWindow>
+     extends GroupAlsoByWindowsDoFn<K, VI, VO, W> {
 
-    while (activeWindowManager.hasMoreWindows()
-        && (nextTimestamp == null
-            || activeWindowManager.nextTimestamp().isBefore(nextTimestamp))) {
-      W window = activeWindowManager.getWindow();
-      if (windowSet.contains(window)) {
-        windowSet.markCompleted(window);
-      }
+    private WindowFn<Object, W> windowFn;
+
+    public GABWViaWindowSetDoFn(WindowFn<?, W> windowFn) {
+      @SuppressWarnings("unchecked")
+      WindowFn<Object, W> noWildcard = (WindowFn<Object, W>) windowFn;
+      this.windowFn = noWildcard;
     }
-  }
 
-  private void processElementViaIterators(
-      DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context)
-      throws Exception {
-    K key = context.element().getKey();
-    Iterable<WindowedValue<VI>> value = context.element().getValue();
-    PeekingReiterator<WindowedValue<VI>> iterator;
-
-    if (value instanceof Collection) {
-      iterator = new PeekingReiterator<>(new ListReiterator<WindowedValue<VI>>(
-          new ArrayList<WindowedValue<VI>>((Collection<WindowedValue<VI>>) value), 0));
-    } else if (value instanceof Reiterable) {
-      iterator = new PeekingReiterator(((Reiterable<WindowedValue<VI>>) value).iterator());
-    } else {
-      throw new IllegalArgumentException(
-          "Input to GroupAlsoByWindowsDoFn must be a Collection or Reiterable");
+    abstract AbstractWindowSet<K, VI, VO, W> createWindowSet(
+        K key,
+        DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context,
+        BatchActiveWindowManager<W> activeWindowManager)
+        throws Exception;
+
+    @Override
+    public void processElement(
+        DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>.ProcessContext c) throws Exception {
+      @SuppressWarnings("unchecked")
+      DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context =
+          (DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>) c;
+      processElementViaWindowSet(context);
     }
 
-    // This ListMultimap is a map of window maxTimestamps to the list of active
-    // windows with that maxTimestamp.
-    ListMultimap<Instant, BoundedWindow> windows = ArrayListMultimap.create();
-
-    while (iterator.hasNext()) {
-      WindowedValue<VI> e = iterator.peek();
-      for (BoundedWindow window : e.getWindows()) {
-        // If this window is not already in the active set, emit a new WindowReiterable
-        // corresponding to this window, starting at this element in the input Reiterable.
-        if (!windows.containsEntry(window.maxTimestamp(), window)) {
-          // Iterating through the WindowReiterable may advance iterator as an optimization
-          // for as long as it detects that there are no new windows.
-          windows.put(window.maxTimestamp(), window);
-          context.outputWindowedValue(
-              KV.of(key, (VO) new WindowReiterable<VI>(iterator, window)),
-              window.maxTimestamp(),
-              Arrays.asList((W) window));
+    public void processElementViaWindowSet(
+        DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context)
+            throws Exception {
+      K key = context.element().getKey();
+      BatchActiveWindowManager<W> activeWindowManager = new BatchActiveWindowManager<>();
+      AbstractWindowSet<K, VI, ?, W> windowSet =
+          createWindowSet(key, context, activeWindowManager);
+
+      for (WindowedValue<VI> e : context.element().getValue()) {
+        for (BoundedWindow window : e.getWindows()) {
+          @SuppressWarnings("unchecked")
+          W w = (W) window;
+          windowSet.put(w, e.getValue());
         }
+        windowFn.mergeWindows(
+            new AbstractWindowSet.WindowMergeContext<Object, W>(windowSet, windowFn));
+
+        maybeOutputWindows(activeWindowManager, windowSet, e.getTimestamp());
       }
-      // Copy the iterator in case the next DoFn cached its version of the iterator instead
-      // of immediately iterating through it.
-      // And, only advance the iterator if the consuming operation hasn't done so.
-      iterator = iterator.copy();
-      if (iterator.hasNext() && iterator.peek() == e) {
-        iterator.next();
+
+      maybeOutputWindows(activeWindowManager, windowSet, null);
+
+      windowSet.flush();
+    }
+
+
+    /**
+     * Outputs any windows that are complete, with their corresponding elemeents.
+     * If there are potentially complete windows, try merging windows first.
+     */
+    private void maybeOutputWindows(
+        BatchActiveWindowManager<W> activeWindowManager,
+        AbstractWindowSet<?, ?, ?, W> windowSet,
+        Instant nextTimestamp) throws Exception {
+      if (activeWindowManager.hasMoreWindows()
+          && (nextTimestamp == null
+              || activeWindowManager.nextTimestamp().isBefore(nextTimestamp))) {
+        // There is at least one window ready to emit.  Merge now in case that window should be
+        // merged into a not yet completed one.
+        windowFn.mergeWindows(
+           new AbstractWindowSet.WindowMergeContext<Object, W>(windowSet, windowFn));
       }
 
-      // Remove all windows with maxTimestamp behind the current timestamp.
-      Iterator<Instant> windowIterator = windows.keys().iterator();
-      while (windowIterator.hasNext()
-          && windowIterator.next().isBefore(e.getTimestamp())) {
-        windowIterator.remove();
+      while (activeWindowManager.hasMoreWindows()
+          && (nextTimestamp == null
+              || activeWindowManager.nextTimestamp().isBefore(nextTimestamp))) {
+        W window = activeWindowManager.getWindow();
+        if (windowSet.contains(window)) {
+          windowSet.markCompleted(window);
+        }
       }
     }
   }
@@ -228,7 +293,8 @@ public String toString() {
   }
 
   /**
-   * The {@link Reiterator} used by {@link WindowReiterable}.
+   * The {@link Reiterator} used by
+   * {@link com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn.WindowReiterable}.
    */
   private static class WindowReiterator<V> implements Reiterator<V> {
     private PeekingReiterator<WindowedValue<V>> iterator;
@@ -322,7 +388,7 @@ public void remove() {
 
     @Override
     public Reiterator<T> copy() {
-      return new ListReiterator(list, index);
+      return new ListReiterator<T>(list, index);
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index 464fcf9b868a7..c3482f074fd74 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -253,8 +253,7 @@ KV<String, Iterable<String>>, List> makeRunner(
         WindowFn<? super String, IntervalWindow> windowFn) {
 
     GroupAlsoByWindowsDoFn<String, String, Iterable<String>, IntervalWindow> fn =
-        new GroupAlsoByWindowsDoFn<String, String, Iterable<String>, IntervalWindow>(
-            windowFn, null, StringUtf8Coder.of());
+        GroupAlsoByWindowsDoFn.create(windowFn, StringUtf8Coder.of());
 
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
         KV<String, Iterable<String>>, List> runner =

From 241e58bda1df52d983666d78b4356294aa9b7f7a Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Mon, 9 Mar 2015 15:27:58 -0700
Subject: [PATCH 0244/1541] Temporarily disable the unit test.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88158689
---
 .../com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java   | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
index 11b5f6e85b7c9..bd9535f1f11c0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageB;
 import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageC;
 
+import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -27,6 +28,7 @@
 /**
  * Tests for Proto2Coder.
  */
+@Ignore("Enable when we fix interal build process")
 @RunWith(JUnit4.class)
 public class Proto2CoderTest {
 

From 783a5fa507460d9acce98d255365b536cfa7c5fb Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Mon, 9 Mar 2015 15:42:19 -0700
Subject: [PATCH 0245/1541] String scrubbing.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88160076
---
 .../cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java  | 4 ++--
 .../main/java/com/google/cloud/dataflow/sdk/util/Structs.java | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
index f01d5ffd62db6..cf2e87452308c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
@@ -55,8 +55,8 @@ public class RetryHttpRequestInitializer implements HttpRequestInitializer {
    * Http response codes that should be silently ignored.
    */
   private static final Set<Integer> DEFAULT_IGNORED_RESPONSE_CODES = new HashSet<>(
-      Arrays.asList(307 /* Redirect, handled by Apiary client */,
-                    308 /* Resume Incomplete, handled by Apiary client */));
+      Arrays.asList(307 /* Redirect, handled by the client library */,
+                    308 /* Resume Incomplete, handled by the client library */));
 
   /**
    * Http response timeout to use for hanging gets.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java
index eab606c8a214a..25d72cbbf73a4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java
@@ -59,7 +59,7 @@ public static byte[] getBytes(Map<String, Object> map, String name, @Nullable by
       return defaultValue;
     }
     // TODO: Need to agree on a format for encoding bytes in
-    // a string that can be sent over the Apiary wire, over the cloud
+    // a string that can be sent to the backend, over the cloud
     // map task work API.  base64 encoding seems pretty common.  Switch to it?
     return StringUtils.jsonStringToByteArray(jsonString);
   }

From b14a07043797b3cadad499008aaf1f0100567eae Mon Sep 17 00:00:00 2001
From: Magnus Runesson <magru@spotify.com>
Date: Tue, 10 Mar 2015 09:42:25 +0100
Subject: [PATCH 0246/1541] Fix spelling errors and adjusted copyright headers.

---
 contrib/join-library/README.md                         | 10 +++++-----
 contrib/join-library/pom.xml                           |  4 ++--
 .../cloud/dataflow/contrib/joinlibrary/Join.java       |  2 +-
 .../dataflow/contrib/joinlibrary/InnerJoinTest.java    |  2 +-
 .../contrib/joinlibrary/OuterLeftJoinTest.java         |  2 +-
 .../contrib/joinlibrary/OuterRightJoinTest.java        |  2 +-
 6 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/contrib/join-library/README.md b/contrib/join-library/README.md
index b5766df50c6fd..47d85a18a4c1c 100644
--- a/contrib/join-library/README.md
+++ b/contrib/join-library/README.md
@@ -1,14 +1,14 @@
 Join-library
 ============
 
-Join-library provide inner join, outer left and right join functions to
-Google DataFlow. The aim is to simplify the most common cases of join to a
+Join-library provides inner join, outer left and right join functions to
+Google Cloud Dataflow. The aim is to simplify the most common cases of join to a
 simple function call.
 
 The functions are generic so it supports join of any types supported by
-DataFlow. Input to the join functions are PCollections of Key/Values. Both the
+Dataflow. Input to the join functions are PCollections of Key/Values. Both the
 left and right PCollections need the same type for the key. All the join
-functions returns a Key/Value where Key is the join key and value is
+functions return a Key/Value where Key is the join key and value is
 a Key/Value where the key is the left value and right is the value.
 
 In the cases of outer join, since null cannot be serialized the user have
@@ -22,4 +22,4 @@ Example how to use join-library:
     PCollection<KV<String, KV<String, Long>>> joinedPcollection =
       Join.innerJoin(leftPcollection, rightPcollection);
 
-Questions or comments: <M.Runesson@gmail.com>
+Questions or comments: <M.Runesson [at] gmail [dot] com>
diff --git a/contrib/join-library/pom.xml b/contrib/join-library/pom.xml
index 2ae8bad3a2301..263efbe58722d 100644
--- a/contrib/join-library/pom.xml
+++ b/contrib/join-library/pom.xml
@@ -6,9 +6,9 @@
   <modelVersion>4.0.0</modelVersion>
 
   <groupId>com.google.cloud.dataflow</groupId>
-  <artifactId>join-library</artifactId>
+  <artifactId>google-cloud-dataflow-java-contrib-join-library</artifactId>
   <name>Join library</name>
-  <description>Library with generic join functions for DataFlow.</description>
+  <description>Library with generic join functions for Dataflow.</description>
   <version>0.0.2-SNAPSHOT</version>
   <packaging>jar</packaging>
 
diff --git a/contrib/join-library/src/main/java/com/google/cloud/dataflow/contrib/joinlibrary/Join.java b/contrib/join-library/src/main/java/com/google/cloud/dataflow/contrib/joinlibrary/Join.java
index e13f9747c7942..9b550f82b6a31 100644
--- a/contrib/join-library/src/main/java/com/google/cloud/dataflow/contrib/joinlibrary/Join.java
+++ b/contrib/join-library/src/main/java/com/google/cloud/dataflow/contrib/joinlibrary/Join.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 The Google Cloud Dataflow Authors
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/InnerJoinTest.java b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/InnerJoinTest.java
index c031cba7956a4..bdd3b46a5dff0 100644
--- a/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/InnerJoinTest.java
+++ b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/InnerJoinTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 The Google Cloud Dataflow Authors
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterLeftJoinTest.java b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterLeftJoinTest.java
index c342ef706b0e1..e26f9b13e94a8 100644
--- a/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterLeftJoinTest.java
+++ b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterLeftJoinTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 The Google Cloud Dataflow Authors
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterRightJoinTest.java b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterRightJoinTest.java
index 23bdf301dc90a..b0b4c81ecd58d 100644
--- a/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterRightJoinTest.java
+++ b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterRightJoinTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Google Inc.
+ * Copyright (C) 2015 The Google Cloud Dataflow Authors
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of

From c8c305cc4cdeebbe4a4ba84071a4963eceaa121c Mon Sep 17 00:00:00 2001
From: Magnus Runesson <magru@spotify.com>
Date: Tue, 10 Mar 2015 09:43:10 +0100
Subject: [PATCH 0247/1541] Modify checkstyle to accept new copyright header.

As discussed in https://github.com/GoogleCloudPlatform/DataflowJavaSDK/pull/10
With external contributors the copyright header should be:
Copyright (C) 2015 The Google Cloud Dataflow Authors

Old copyright header is still accepted.
---
 checkstyle.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/checkstyle.xml b/checkstyle.xml
index 3e79c3d48028c..d2a302c701bc1 100644
--- a/checkstyle.xml
+++ b/checkstyle.xml
@@ -36,7 +36,7 @@ page at http://checkstyle.sourceforge.net/config.html -->
       </pre>
     -->
     <property name="format"
-        value="^(//| \*) Copyright (\([cC]\) )?[\d]{4}(\-[\d]{4})? (Google Inc\.).*$" />
+        value="^(//| \*) Copyright (\([cC]\) )?[\d]{4}(\-[\d]{4})? (Google Inc\.|The Google Cloud Dataflow Authors).*$" />
     <property name="minimum" value="1" />
     <property name="maximum" value="10" />
     <property name="message" value="Google copyright is missing or malformed." />

From 3d4bd412f0823fccc992ced6a0e126ce1307e334 Mon Sep 17 00:00:00 2001
From: Magnus Runesson <magru@spotify.com>
Date: Tue, 10 Mar 2015 09:52:26 +0100
Subject: [PATCH 0248/1541] Adjusted comments to the new copyright header and
 copyright in checkstyle.xml

---
 checkstyle.xml | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/checkstyle.xml b/checkstyle.xml
index d2a302c701bc1..224f37c1b4d9e 100644
--- a/checkstyle.xml
+++ b/checkstyle.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  ~ Copyright (C) 2015 Google Inc.
+  ~ Copyright (C) 2015 The Google Cloud Dataflow Authors
   ~
   ~ Licensed under the Apache License, Version 2.0 (the "License"); you may not
   ~ use this file except in compliance with the License. You may obtain a copy of
@@ -28,6 +28,13 @@ page at http://checkstyle.sourceforge.net/config.html -->
     <!-- Requires a Google copyright notice in each file.
       Code intended to be open-sourced may have a multi-line copyright
       notice, so that this required text appears on the second line:
+      <pre>
+        /*
+         * Copyright 2015 The Google Cloud Dataflow Authors
+         *
+         * (details of open-source license...)
+      </pre>
+      or
       <pre>
         /*
          * Copyright 2015 Google Inc.
@@ -39,7 +46,7 @@ page at http://checkstyle.sourceforge.net/config.html -->
         value="^(//| \*) Copyright (\([cC]\) )?[\d]{4}(\-[\d]{4})? (Google Inc\.|The Google Cloud Dataflow Authors).*$" />
     <property name="minimum" value="1" />
     <property name="maximum" value="10" />
-    <property name="message" value="Google copyright is missing or malformed." />
+    <property name="message" value="Copyright is missing or malformed." />
     <property name="severity" value="error" />
   </module>
 

From 14be24766a715faf42a115b99ee63e4f44a6f66a Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Tue, 10 Mar 2015 11:14:24 -0700
Subject: [PATCH 0249/1541] Revert "Adjusted comments to the new copyright
 header and copyright in checkstyle.xml"

This reverts commit 3d4bd412f0823fccc992ced6a0e126ce1307e334.
---
 checkstyle.xml | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/checkstyle.xml b/checkstyle.xml
index 224f37c1b4d9e..d2a302c701bc1 100644
--- a/checkstyle.xml
+++ b/checkstyle.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  ~ Copyright (C) 2015 The Google Cloud Dataflow Authors
+  ~ Copyright (C) 2015 Google Inc.
   ~
   ~ Licensed under the Apache License, Version 2.0 (the "License"); you may not
   ~ use this file except in compliance with the License. You may obtain a copy of
@@ -28,13 +28,6 @@ page at http://checkstyle.sourceforge.net/config.html -->
     <!-- Requires a Google copyright notice in each file.
       Code intended to be open-sourced may have a multi-line copyright
       notice, so that this required text appears on the second line:
-      <pre>
-        /*
-         * Copyright 2015 The Google Cloud Dataflow Authors
-         *
-         * (details of open-source license...)
-      </pre>
-      or
       <pre>
         /*
          * Copyright 2015 Google Inc.
@@ -46,7 +39,7 @@ page at http://checkstyle.sourceforge.net/config.html -->
         value="^(//| \*) Copyright (\([cC]\) )?[\d]{4}(\-[\d]{4})? (Google Inc\.|The Google Cloud Dataflow Authors).*$" />
     <property name="minimum" value="1" />
     <property name="maximum" value="10" />
-    <property name="message" value="Copyright is missing or malformed." />
+    <property name="message" value="Google copyright is missing or malformed." />
     <property name="severity" value="error" />
   </module>
 

From e9b3c96957a31c5d38cad87ce186e633eba50218 Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Tue, 10 Mar 2015 11:46:50 -0700
Subject: [PATCH 0250/1541] Addition of AUTHORS.md.

---
 contrib/join-library/AUTHORS.md | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 contrib/join-library/AUTHORS.md

diff --git a/contrib/join-library/AUTHORS.md b/contrib/join-library/AUTHORS.md
new file mode 100644
index 0000000000000..331f927548c0b
--- /dev/null
+++ b/contrib/join-library/AUTHORS.md
@@ -0,0 +1,6 @@
+# Authors of join-library               
+
+The following is the official list of authors for copyright purposes of this community-contributed module.
+
+    Google Inc.
+    Magnus Runesson, M.Runesson [at] gmail [dot] com

From 6ba73cb47a54a72de6d1b4a556ef23fa98961b45 Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Tue, 10 Mar 2015 11:48:28 -0700
Subject: [PATCH 0251/1541] Minor fixes to README.md and pom.xml.

---
 contrib/join-library/README.md | 2 +-
 contrib/join-library/pom.xml   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/contrib/join-library/README.md b/contrib/join-library/README.md
index 47d85a18a4c1c..a6e0b73d6c2e2 100644
--- a/contrib/join-library/README.md
+++ b/contrib/join-library/README.md
@@ -22,4 +22,4 @@ Example how to use join-library:
     PCollection<KV<String, KV<String, Long>>> joinedPcollection =
       Join.innerJoin(leftPcollection, rightPcollection);
 
-Questions or comments: <M.Runesson [at] gmail [dot] com>
+Questions or comments: `M.Runesson [at] gmail [dot] com`
diff --git a/contrib/join-library/pom.xml b/contrib/join-library/pom.xml
index 263efbe58722d..5e843a20c9f74 100644
--- a/contrib/join-library/pom.xml
+++ b/contrib/join-library/pom.xml
@@ -6,7 +6,7 @@
   <modelVersion>4.0.0</modelVersion>
 
   <groupId>com.google.cloud.dataflow</groupId>
-  <artifactId>google-cloud-dataflow-java-contrib-join-library</artifactId>
+  <artifactId>google-cloud-dataflow-java-contrib-joinlibrary</artifactId>
   <name>Join library</name>
   <description>Library with generic join functions for Dataflow.</description>
   <version>0.0.2-SNAPSHOT</version>

From ea56a5fe5c80281ef5e0d42d28671218d3d99794 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Mon, 9 Mar 2015 20:16:23 -0700
Subject: [PATCH 0252/1541] Fix parts of flakiness in RateLimitingTest.

On a fast-enough machine, numFailures could be equal to DEFAULT_MAX_PARALLELISM.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88180618
---
 .../google/cloud/dataflow/sdk/transforms/RateLimitingTest.java | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java
index 377e533fc039b..14eeff96b937c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java
@@ -21,6 +21,7 @@
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.hamcrest.Matchers.is;
 import static org.hamcrest.Matchers.lessThan;
+import static org.hamcrest.Matchers.lessThanOrEqualTo;
 
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
@@ -153,7 +154,7 @@ public void testExceptionHandling() {
     // The first failure should prevent the scheduling of any more elements.
     Assert.assertThat(fn.numFailures.get(),
         is(both(greaterThanOrEqualTo(1))
-            .and(lessThan(RateLimiting.DEFAULT_MAX_PARALLELISM))));
+            .and(lessThanOrEqualTo(RateLimiting.DEFAULT_MAX_PARALLELISM))));
   }
 
   /**

From ca3ffef4d93b1dacdca94898844c2a0e15542d12 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Tue, 10 Mar 2015 10:19:44 -0700
Subject: [PATCH 0253/1541] Allow arbitrarily large CoGroupByKey results by
 only caching the first 10K elements in memory.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88231835
---
 .../sdk/transforms/join/CoGbkResult.java      | 233 +++++++++++-------
 .../sdk/transforms/join/CoGbkResultTest.java  | 109 ++++++++
 2 files changed, 258 insertions(+), 84 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
index 14121d649aa06..42a3e6e75d8d9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
@@ -21,17 +21,21 @@
 import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.ListCoder;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.MapCoder;
 import com.google.cloud.dataflow.sdk.coders.StandardCoder;
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.Reiterator;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.cloud.dataflow.sdk.values.TupleTagList;
+import com.google.common.collect.Iterators;
+import com.google.common.collect.PeekingIterator;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -40,41 +44,60 @@
 import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
 
 /**
  * A row result of a CoGroupByKey.  This is a tuple of Iterables produced for
  * a given key, and these can be accessed in different ways.
  */
 public class CoGbkResult {
-  // TODO: If we keep this representation for any amount of time,
-  // optimize it so that the union tag does not have to be repeated in the
-  // values stored under the union tag key.
   /**
    * A map of integer union tags to a list of union objects.
    * Note: the key and the embedded union tag are the same, so it is redundant
    * to store it multiple times, but for now it makes encoding easier.
    */
-  private final Map<Integer, List<RawUnionValue>> valueMap;
+  private final List<Iterable<?>> valueMap;
 
   private final CoGbkResultSchema schema;
 
+  private static final int DEFAULT_IN_MEMORY_ELEMENT_COUNT = 10_000;
+
+  private static final Logger LOG = LoggerFactory.getLogger(CoGbkResult.class);
+
   /**
    * A row in the PCollection resulting from a CoGroupByKey transform.
    * Currently, this row must fit into memory.
    *
    * @param schema the set of tuple tags used to refer to input tables and
    *               result values
-   * @param values the raw results from a group-by-key
+   * @param taggedValues the raw results from a group-by-key
    */
+  public CoGbkResult(
+      CoGbkResultSchema schema,
+      Iterable<RawUnionValue> taggedValues) {
+    this(schema, taggedValues, DEFAULT_IN_MEMORY_ELEMENT_COUNT);
+  }
+
   @SuppressWarnings("unchecked")
   public CoGbkResult(
       CoGbkResultSchema schema,
-      Iterable<RawUnionValue> values) {
+      Iterable<RawUnionValue> taggedValues,
+      int inMemoryElementCount) {
     this.schema = schema;
-    valueMap = new TreeMap<>();
-    for (RawUnionValue value : values) {
+    valueMap = new ArrayList<>();
+    for (int unionTag = 0; unionTag < schema.size(); unionTag++) {
+      valueMap.add(new ArrayList<>());
+    }
+
+    // Demultiplex the first imMemoryElementCount tagged union values
+    // according to their tag.
+    final Iterator<RawUnionValue> taggedIter = taggedValues.iterator();
+    int elementCount = 0;
+    while (taggedIter.hasNext()) {
+      if (elementCount++ >= inMemoryElementCount && taggedIter instanceof Reiterator) {
+        // Let the tails be lazy.
+        break;
+      }
+      RawUnionValue value = taggedIter.next();
       // Make sure the given union tag has a corresponding tuple tag in the
       // schema.
       int unionTag = value.getUnionTag();
@@ -82,17 +105,48 @@ public CoGbkResult(
         throw new IllegalStateException("union tag " + unionTag +
             " has no corresponding tuple tag in the result schema");
       }
-      List<RawUnionValue> taggedValueList = valueMap.get(unionTag);
-      if (taggedValueList == null) {
-        taggedValueList = new ArrayList<>();
-        valueMap.put(unionTag, taggedValueList);
+      List<Object> valueList = (List<Object>) valueMap.get(unionTag);
+      valueList.add(value.getValue());
+    }
+
+    if (taggedIter.hasNext()) {
+      // If we get here, there were more elements than we can afford to
+      // keep in memory, so we copy the re-iterable of remaining items
+      // and append filtered views to each of the sorted lists computed earlier.
+      LOG.info("CoGbkResult has more than " + inMemoryElementCount + " elements,"
+               + "reiteration (which may be slow) is required.");
+      final Reiterator<RawUnionValue> tail = (Reiterator<RawUnionValue>) taggedIter;
+      for (int unionTag = 0; unionTag < schema.size(); unionTag++) {
+        final int unionTag0 = unionTag;
+        final Iterable<?> head = valueMap.get(unionTag);
+        // This is a trinary-state array recording whether a given tag is
+        // present in the tail.  The inital value is null (unknown) for all
+        // tags, and the first iteration through the entire list will set
+        // these values to true or false to avoid needlessly iterating if
+        // filtering against a given tag would not match anything.
+        final Boolean[] containsTag = new Boolean[schema.size()];
+        valueMap.set(
+            unionTag,
+            new Iterable() {
+              Reiterator<RawUnionValue> start = tail.copy();
+              @Override
+              public Iterator iterator() {
+                return Iterators.concat(
+                    head.iterator(),
+                    new UnionValueIterator<>(unionTag0, tail.copy(), containsTag));
+              }
+            });
       }
-      taggedValueList.add(value);
     }
   }
 
   public boolean isEmpty() {
-    return valueMap == null || valueMap.isEmpty();
+    for (Iterable<?> tagValues : valueMap) {
+      if (tagValues.iterator().hasNext()) {
+        return false;
+      }
+    }
+    return true;
   }
 
   /**
@@ -118,11 +172,9 @@ public <V> Iterable<V> getAll(TupleTag<V> tag) {
       throw new IllegalArgumentException("TupleTag " + tag +
           " is not in the schema");
     }
-    List<RawUnionValue> unions = valueMap.get(index);
-    if (unions == null) {
-      return buildEmptyIterable(tag);
-    }
-    return new UnionValueIterable<>(unions);
+    @SuppressWarnings("unchecked")
+    Iterable<V> unions = (Iterable<V>) valueMap.get(index);
+    return unions;
   }
 
   /**
@@ -149,7 +201,8 @@ public <V> V getOnly(TupleTag<V> tag, V defaultValue) {
   public static class CoGbkResultCoder extends StandardCoder<CoGbkResult> {
 
     private final CoGbkResultSchema schema;
-    private final MapCoder<Integer, List<RawUnionValue>> mapCoder;
+    private final UnionCoder unionCoder;
+    private MapCoder<Integer, List<RawUnionValue>> mapCoder;
 
     /**
      * Returns a CoGbkResultCoder for the given schema and unionCoder.
@@ -167,22 +220,14 @@ public static CoGbkResultCoder of(
         @JsonProperty(PropertyNames.CO_GBK_RESULT_SCHEMA) CoGbkResultSchema schema) {
       Preconditions.checkArgument(components.size() == 1,
           "Expecting 1 component, got " + components.size());
-      return new CoGbkResultCoder(schema, (MapCoder) components.get(0));
+      return new CoGbkResultCoder(schema, (UnionCoder) components.get(0));
     }
 
     private CoGbkResultCoder(
         CoGbkResultSchema tupleTags,
         UnionCoder unionCoder) {
       this.schema = tupleTags;
-      this.mapCoder = MapCoder.of(VarIntCoder.of(),
-          ListCoder.of(unionCoder));
-    }
-
-    private CoGbkResultCoder(
-        CoGbkResultSchema tupleTags,
-        MapCoder mapCoder) {
-      this.schema = tupleTags;
-      this.mapCoder = mapCoder;
+      this.unionCoder = unionCoder;
     }
 
 
@@ -193,7 +238,7 @@ public List<? extends Coder<?>> getCoderArguments() {
 
     @Override
     public List<? extends Coder<?>> getComponents() {
-      return Arrays.<Coder<?>>asList(mapCoder);
+      return Arrays.<Coder<?>>asList(unionCoder);
     }
 
     @Override
@@ -204,6 +249,7 @@ public CloudObject asCloudObject() {
     }
 
     @Override
+    @SuppressWarnings("unchecked")
     public void encode(
         CoGbkResult value,
         OutputStream outStream,
@@ -212,7 +258,9 @@ public void encode(
       if (!schema.equals(value.getSchema())) {
         throw new CoderException("input schema does not match coder schema");
       }
-      mapCoder.encode(value.valueMap, outStream, context);
+      for (int unionTag = 0; unionTag < schema.size(); unionTag++) {
+        tagListCoder(unionTag).encode(value.valueMap.get(unionTag), outStream, Context.NESTED);
+      }
     }
 
     @Override
@@ -220,9 +268,16 @@ public CoGbkResult decode(
         InputStream inStream,
         Context context)
         throws CoderException, IOException {
-      Map<Integer, List<RawUnionValue>> map = mapCoder.decode(
-          inStream, context);
-      return new CoGbkResult(schema, map);
+      List<Iterable<?>> valueMap = new ArrayList<>();
+      for (int unionTag = 0; unionTag < schema.size(); unionTag++) {
+        valueMap.add(tagListCoder(unionTag).decode(inStream, Context.NESTED));
+      }
+      return new CoGbkResult(schema, valueMap);
+    }
+
+    @SuppressWarnings("rawtypes")
+    private IterableCoder tagListCoder(int unionTag) {
+      return IterableCoder.of(unionCoder.getComponents().get(unionTag));
     }
 
     @Override
@@ -267,9 +322,8 @@ public <V> CoGbkResult and(TupleTag<V> tag, List<V> data) {
           "Attempting to call and() on a CoGbkResult apparently not created by"
           + " of().");
     }
-    Map<Integer, List<RawUnionValue>> valueMap = new TreeMap<>(this.valueMap);
-    valueMap.put(nextTestUnionId,
-        convertValueListToUnionList(nextTestUnionId, data));
+    List<Iterable<?>> valueMap = new ArrayList<>(this.valueMap);
+    valueMap.add(data);
     return new CoGbkResult(
         new CoGbkResultSchema(schema.getTupleTagList().and(tag)), valueMap,
         nextTestUnionId + 1);
@@ -280,7 +334,7 @@ public <V> CoGbkResult and(TupleTag<V> tag, List<V> data) {
    */
   public static <V> CoGbkResult empty() {
     return new CoGbkResult(new CoGbkResultSchema(TupleTagList.empty()),
-        new TreeMap<Integer, List<RawUnionValue>>());
+        new ArrayList<Iterable<?>>());
   }
 
   //////////////////////////////////////////////////////////////////////////////
@@ -289,7 +343,7 @@ public static <V> CoGbkResult empty() {
 
   private CoGbkResult(
       CoGbkResultSchema schema,
-      Map<Integer, List<RawUnionValue>> valueMap,
+      List<Iterable<?>> valueMap,
       int nextTestUnionId) {
     this(schema, valueMap);
     this.nextTestUnionId = nextTestUnionId;
@@ -297,24 +351,11 @@ private CoGbkResult(
 
   private CoGbkResult(
       CoGbkResultSchema schema,
-      Map<Integer, List<RawUnionValue>> valueMap) {
+      List<Iterable<?>> valueMap) {
     this.schema = schema;
     this.valueMap = valueMap;
   }
 
-  private static <V> List<RawUnionValue> convertValueListToUnionList(
-      int unionTag, List<V> data) {
-    List<RawUnionValue> unionList = new ArrayList<>();
-    for (V value : data) {
-      unionList.add(new RawUnionValue(unionTag, value));
-    }
-    return unionList;
-  }
-
-  private <V> Iterable<V> buildEmptyIterable(TupleTag<V> tag) {
-    return new ArrayList<>();
-  }
-
   private <V> V innerGetOnly(
       TupleTag<V> tag,
       V defaultValue,
@@ -324,8 +365,9 @@ private <V> V innerGetOnly(
       throw new IllegalArgumentException("TupleTag " + tag
           + " is not in the schema");
     }
-    List<RawUnionValue> unions = valueMap.get(index);
-    if (unions == null || unions.isEmpty()) {
+    @SuppressWarnings("unchecked")
+    Iterator<V> unions = (Iterator<V>) valueMap.get(index).iterator();
+    if (!unions.hasNext()) {
       if (useDefault) {
         return defaultValue;
       } else {
@@ -333,44 +375,67 @@ private <V> V innerGetOnly(
             + " corresponds to an empty result, and no default was provided");
       }
     }
-    if (unions.size() != 1) {
+    V value = unions.next();
+    if (unions.hasNext()) {
       throw new IllegalArgumentException("TupleTag " + tag
-          + " corresponds to a non-singleton result of size " + unions.size());
+          + " corresponds to a non-singleton result");
     }
-    return (V) unions.get(0).getValue();
+    return value;
   }
 
   /**
-   * Lazily converts and recasts an {@code Iterable<RawUnionValue>} into an
-   * {@code Iterable<V>}, where V is the type of the raw union value's contents.
+   * Lazily filters and recasts an {@code Iterator<RawUnionValue>} into an
+   * {@code Iterator<V>}, where V is the type of the raw union value's contents.
    */
-  private static class UnionValueIterable<V> implements Iterable<V> {
+  private static class UnionValueIterator<V> implements Iterator<V> {
 
-    private final Iterable<RawUnionValue> unions;
+    private final int tag;
+    private final PeekingIterator<RawUnionValue> unions;
+    private final Boolean[] containsTag;
 
-    private UnionValueIterable(Iterable<RawUnionValue> unions) {
-      this.unions = unions;
+    private UnionValueIterator(int tag, Iterator<RawUnionValue> unions, Boolean[] containsTag) {
+      this.tag = tag;
+      this.unions = Iterators.peekingIterator(unions);
+      this.containsTag = containsTag;
     }
 
     @Override
-    public Iterator<V> iterator() {
-      final Iterator<RawUnionValue> unionsIterator = unions.iterator();
-      return new Iterator<V>() {
-        @Override
-        public boolean hasNext() {
-          return unionsIterator.hasNext();
+    public boolean hasNext() {
+      if (containsTag[tag] == Boolean.FALSE) {
+        return false;
+      }
+      advance();
+      if (unions.hasNext()) {
+        return true;
+      } else {
+        // We can now resolve all the "unknown" null values.
+        for (int i = 0; i < containsTag.length; i++) {
+          if (containsTag[i] == null) {
+            containsTag[i] = false;
+          }
         }
+        return false;
+      }
+    }
 
-        @Override
-        public V next() {
-          return (V) unionsIterator.next().getValue();
-        }
+    @Override
+    @SuppressWarnings("unchecked")
+    public V next() {
+      advance();
+      return (V) unions.next().getValue();
+    }
 
-        @Override
-        public void remove() {
-          throw new UnsupportedOperationException();
-        }
-      };
+    private void advance() {
+      int curTag;
+      while (unions.hasNext() && (curTag = unions.peek().getUnionTag()) != tag) {
+        containsTag[curTag] = true;
+        unions.next();
+      }
+    }
+
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException();
     }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultTest.java
new file mode 100644
index 0000000000000..c71439aa351de
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultTest.java
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.join;
+
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.emptyIterable;
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.util.common.Reiterable;
+import com.google.cloud.dataflow.sdk.util.common.Reiterator;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.cloud.dataflow.sdk.values.TupleTagList;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Tests the CoGbkResult.
+ */
+@RunWith(JUnit4.class)
+public class CoGbkResultTest {
+
+  @Test
+  public void testLazyResults() {
+    runLazyResult(0);
+    runLazyResult(1);
+    runLazyResult(3);
+    runLazyResult(10);
+  }
+
+  public void runLazyResult(int cacheSize) {
+    int valueLen = 7;
+    TestUnionValues values = new TestUnionValues(0, 1, 0, 3, 0, 3, 3);
+    CoGbkResult result = new CoGbkResult(createSchema(5), values, cacheSize);
+    assertThat(values.maxPos(), equalTo(Math.min(cacheSize, valueLen)));
+    assertThat(result.getAll(new TupleTag<Integer>("tag0")), contains(0, 2, 4));
+    assertThat(values.maxPos(), equalTo(valueLen));
+    assertThat(result.getAll(new TupleTag<Integer>("tag3")), contains(3, 5, 6));
+    assertThat(result.getAll(new TupleTag<Integer>("tag2")), emptyIterable());
+    assertThat(result.getOnly(new TupleTag<Integer>("tag1")), equalTo(1));
+  }
+
+  private CoGbkResultSchema createSchema(int size) {
+    List<TupleTag<?>> tags = new ArrayList<>();
+    for (int i = 0; i < size; i++) {
+      tags.add(new TupleTag<Integer>("tag" + i));
+    }
+    return new CoGbkResultSchema(TupleTagList.of(tags));
+  }
+
+  private static class TestUnionValues implements Reiterable<RawUnionValue> {
+
+    final int[] tags;
+    int maxPos = 0;
+
+    /**
+     * This will create a list of RawUnionValues whose tags are as given and
+     * values are increasing starting at 0 (i.e. the index in the constructor).
+     */
+    public TestUnionValues(int... tags) {
+      this.tags = tags;
+    }
+
+    /**
+     * Returns the highest position iterated to so far, useful for ensuring
+     * laziness.
+     */
+    public int maxPos() {
+      return maxPos;
+    }
+
+    @Override
+    public Reiterator<RawUnionValue> iterator() {
+      return iterator(0);
+    }
+
+    public Reiterator<RawUnionValue> iterator(final int start) {
+      return new Reiterator<RawUnionValue>() {
+        int pos = start;
+        public boolean hasNext() { return pos < tags.length; }
+        public RawUnionValue next() {
+          maxPos = Math.max(pos + 1, maxPos);
+          return new RawUnionValue(tags[pos], pos++);
+        }
+        public void remove() { throw new UnsupportedOperationException(); }
+        public Reiterator<RawUnionValue> copy() { return iterator(pos); }
+      };
+    }
+  }
+}

From 29d3312483540ac3ceb42b7e995d5caba5edc49c Mon Sep 17 00:00:00 2001
From: jlewi <jlewi@google.com>
Date: Tue, 10 Mar 2015 11:46:33 -0700
Subject: [PATCH 0254/1541] Fix the javadoc for Count transforms. ----Release
 Notes---- [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=88241545

---
 .../cloud/dataflow/sdk/transforms/Count.java  | 32 +++----------------
 1 file changed, 4 insertions(+), 28 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
index 87e6a1c5e10b8..2422483ac69aa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
@@ -20,34 +20,10 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
 /**
- * {@code Count<T>} takes a {@code PCollection<T>} and returns a
- * {@code PCollection<KV<T, Long>>} representing a map from each
- * distinct element of the input {@code PCollection} to the number of times
- * that element occurs in the input.  Each of the keys in the output
- * {@code PCollection} is unique.
- *
- * <p> Two values of type {@code T} are compared for equality <b>not</b> by
- * regular Java {@link Object#equals}, but instead by first encoding
- * each of the elements using the {@code PCollection}'s {@code Coder}, and then
- * comparing the encoded bytes.  This admits efficient parallel
- * evaluation.
- *
- * <p> By default, the {@code Coder} of the keys of the output
- * {@code PCollection} is the same as the {@code Coder} of the
- * elements of the input {@code PCollection}.
- *
- * <p> Each output element is in the window by which its corresponding input
- * was grouped, and has the timestamp of the end of that window.  The output
- * {@code PCollection} has the same
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
- * as the input.
- *
- * <p> Example of use:
- * <pre> {@code
- * PCollection<String> words = ...;
- * PCollection<KV<String, Long>> wordCounts =
- *     words.apply(Count.<String>perElement());
- * } </pre>
+ * Count transforms can be used to count the number of elements in a PCollection.
+ * {@link PerElement Count.PerElement can be used to count the number of occurrences of each
+ * distinct element in the PCollection. {@link Globally Count.Globally} can
+ * be used to count the total number of elements in a PCollection.
  */
 public class Count {
 

From a60e1ac94cdd027b219f744dcd3e8ddad509dfab Mon Sep 17 00:00:00 2001
From: chernyak <chernyak@google.com>
Date: Tue, 10 Mar 2015 12:09:12 -0700
Subject: [PATCH 0255/1541] Updates to Windmill API for streaming side inputs:
 break the GlobalDataId into an explicit tag and version field

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88243950
---
 sdk/src/main/proto/windmill.proto | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index a8d856766f83f..64d6d18a40044 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -81,8 +81,13 @@ message TagList {
   repeated Value values = 3;
 }
 
+message GlobalDataId {
+  required string tag = 1;
+  required bytes version = 2;
+}
+
 message GlobalData {
-  required bytes data_id = 1;
+  required GlobalDataId data_id = 1;
   optional bool is_ready = 2;
   optional bytes data = 3;
 }
@@ -93,7 +98,7 @@ message WorkItem {
 
   repeated InputMessageBundle message_bundles = 3;
   optional TimerBundle timers = 4;
-  repeated bytes global_data_ids_ready = 5;
+  repeated GlobalDataId global_data_id_notifications = 5;
 }
 
 message ComputationWorkItems {
@@ -122,7 +127,7 @@ message KeyedGetDataRequest {
   required fixed64 work_token = 2;
   repeated TagValue values_to_fetch = 3;
   repeated TagList lists_to_fetch = 4;
-  repeated GlobalData global_data_to_fetch = 5;
+  repeated GlobalDataId global_data_to_fetch = 5;
 }
 
 message ComputationGetDataRequest {
@@ -183,7 +188,7 @@ message WorkItemCommitRequest {
   repeated TagValue value_updates = 5;
   repeated TagList list_updates = 6;
   repeated Counter counter_updates = 8;
-  repeated bytes blocked_on_global_data_ids = 9;
+  repeated GlobalDataId global_data_id_requests = 9;
 }
 
 message ComputationCommitWorkRequest {

From f1afe86d2c11edc1044275ae1996310861107cd6 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 10 Mar 2015 15:34:36 -0700
Subject: [PATCH 0256/1541] Fix Javadoc links.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88265230
---
 .../com/google/cloud/dataflow/sdk/Pipeline.java   |  6 +++---
 .../cloud/dataflow/sdk/coders/DelegateCoder.java  |  4 ++--
 .../dataflow/sdk/coders/StringDelegateCoder.java  |  2 +-
 .../google/cloud/dataflow/sdk/io/BigQueryIO.java  |  3 ++-
 .../google/cloud/dataflow/sdk/io/DatastoreIO.java |  3 ++-
 .../dataflow/sdk/options/PipelineOptions.java     |  9 +++++----
 .../sdk/options/PipelineOptionsRegistrar.java     |  8 ++++----
 .../sdk/runners/DirectPipelineRunner.java         |  5 +++--
 .../sdk/runners/PipelineRunnerRegistrar.java      | 12 ++++++------
 .../dataflow/sdk/testing/DataflowAssert.java      | 15 ++++++++-------
 .../sdk/transforms/ApproximateQuantiles.java      |  4 ++--
 .../cloud/dataflow/sdk/transforms/Combine.java    |  7 ++++---
 .../cloud/dataflow/sdk/transforms/DoFn.java       |  8 ++++----
 .../cloud/dataflow/sdk/transforms/Flatten.java    |  8 ++++----
 .../cloud/dataflow/sdk/transforms/GroupByKey.java |  2 +-
 .../cloud/dataflow/sdk/transforms/ParDo.java      |  2 +-
 .../dataflow/sdk/transforms/RateLimiting.java     |  3 +--
 .../cloud/dataflow/sdk/transforms/View.java       | 14 +++++++-------
 .../dataflow/sdk/values/PCollectionView.java      | 10 ++++++----
 .../google/cloud/dataflow/sdk/values/POutput.java |  5 ++---
 20 files changed, 68 insertions(+), 62 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index 23405801827ee..dade39c8e9bc5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -50,8 +50,8 @@
  * and concurrently.
  *
  * <p> Each {@code Pipeline} is self-contained and isolated from any other
- * {@code Pipeline}.  The {@link PValues} that are inputs and outputs of each of a
- * {@code Pipeline}'s {@link PTransform}s are also owned by that {@code Pipeline}.
+ * {@code Pipeline}.  The {@link PValue PValues} that are inputs and outputs of each of a
+ * {@code Pipeline}'s {@link PTransform PTransforms} are also owned by that {@code Pipeline}.
  * A {@code PValue} owned by one {@code Pipeline} can be read only by {@code PTransform}s
  * also owned by that {@code Pipeline}.
  *
@@ -279,7 +279,7 @@ Output applyInternal(Input input,
   }
 
   /**
-   * Returns all producing transforms for the {@link PValue}s contained
+   * Returns all producing transforms for the {@link PValue PValues} contained
    * in {@code output}.
    */
   private List<PTransform<?, ?>> getProducingTransforms(POutput output) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
index ae2c5e27e3b27..89e996c510a60 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
@@ -22,10 +22,10 @@
 import java.io.Serializable;
 
 /**
- * A {@code DelegateCoder<T, DT>} wraps a {@link Coder Coder<DT>} and
+ * A {@code DelegateCoder<T, DT>} wraps a {@link Coder Coder&lt;DT&gt;} and
  * encodes/decodes values of type {@code T}s by converting
  * to/from {@code DT} and then encoding/decoding using the underlying
- * {@link Coder Coder<DT>}.
+ * {@link Coder Coder&lt;DT&gt;}.
  *
  * <p> The conversions from {@code T} to {@code DT} and vice versa
  * must be supplied as {@link CodingFunction}, a serializable
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
index 567c94a3100b6..139dfba83a6d2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
@@ -19,7 +19,7 @@
 import java.lang.reflect.InvocationTargetException;
 
 /**
- * A {@code StringDelegateCoder<T>} wraps a {@link Coder<String>}
+ * A {@code StringDelegateCoder<T>} wraps a {@code Coder<String>}
  * and encodes/decodes values of type {@code T} via string representations.
  *
  * <p> To decode, the input byte stream is decoded to
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 061bf68cd5049..cf89d23c5134e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -69,7 +69,8 @@
  * </ul>
  * <p>
  * BigQuery table references are stored as a {@link TableReference}, which comes
- * from the <a href="BigQuery Java Client API">BigQuery Java Client API</a>.
+ * from the <a href="https://cloud.google.com/bigquery/client-libraries">
+ * BigQuery Java Client API</a>.
  * Tables can be referred to as Strings, with or without the {@code projectId}.
  * A helper function is provided ({@link BigQueryIO#parseTableSpec(String)}),
  * which parses the following string forms into a {@link TableReference}:
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 8a85429287c08..64ca240d66823 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -367,7 +367,8 @@ public static Sink writeTo(String datasetId) {
    * entities to a Datastore kind.
    *
    * <p> Current version only supports Write operation running on
-   * {@link DirectPipelineRunner}.  If Write is used on {@link DataflowPipelineRunner},
+   * {@link DirectPipelineRunner}.  If Write is used on
+   * {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner},
    * it throws {@link UnsupportedOperationException} and won't continue on the
    * operation.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
index 3f9515f4cee28..1fb5d9aabb63c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
@@ -43,11 +43,12 @@
 @JsonDeserialize(using = Deserializer.class)
 public interface PipelineOptions {
   /**
-   * Transforms this object into an object of type <T>. <T> must extend {@link PipelineOptions}.
+   * Transforms this object into an object of type {@code <T>}. {@code <T>} must extend
+   * {@link PipelineOptions}.
    * <p>
-   * If <T> is not registered with the {@link PipelineOptionsFactory}, then we attempt to
-   * verify that <T> is composable with every interface that this instance of the PipelineOptions
-   * has seen.
+   * If {@code <T>} is not registered with the {@link PipelineOptionsFactory}, then we
+   * attempt to verify that {@code <T>} is composable with every interface that this
+   * instance of the {@code PipelineOptions} has seen.
    *
    * @param kls The class of the type to transform to.
    * @return An object of type kls.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java
index 6e0b60e58f397..d23fe682ddb14 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java
@@ -16,8 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
-import com.google.auto.service.AutoService;
-
 import java.util.ServiceLoader;
 
 /**
@@ -25,11 +23,13 @@
  * {@link PipelineOptions} registered with this SDK by creating a {@link ServiceLoader} entry
  * and a concrete implementation of this interface.
  * <p>
- * Note that automatic registration of any {@PipelineOptions} requires users
+ * Note that automatic registration of any
+ * {@link com.google.cloud.dataflow.sdk.options.PipelineOptions} requires users
  * conform to the limitations discussed on {@link PipelineOptionsFactory#register(Class)}.
  * <p>
  * It is optional but recommended to use one of the many build time tools such as
- * {@link AutoService} to generate the necessary META-INF files automatically.
+ * {@link com.google.auto.service.AutoService} to generate the necessary META-INF
+ * files automatically.
  */
 public interface PipelineOptionsRegistrar {
   Iterable<Class<? extends PipelineOptions>> getPipelineOptions();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 6c481d624701d..27e15304fadb7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -372,8 +372,9 @@ public interface EvaluationResults extends PipelineResult {
 
     /**
      * Retrieves the values indicated by the given {@link PCollectionView}.
-     * Note that within the {@link DoFnContext} a {@link PCollectionView}
-     * converts from this representation to a suitable side input value.
+     * Note that within the {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context}
+     * implementation a {@link PCollectionView} should convert from this representation to a
+     * suitable side input value.
      */
     <T, WT> Iterable<WindowedValue<?>> getPCollectionView(PCollectionView<T, WT> view);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java
index d63de01948aac..954bace372ce9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java
@@ -16,8 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.runners;
 
-import com.google.auto.service.AutoService;
-
 import java.util.ServiceLoader;
 
 /**
@@ -25,12 +23,14 @@
  * {@link PipelineRunner} registered with this SDK by creating a {@link ServiceLoader} entry
  * and a concrete implementation of this interface.
  * <p>
- * Note that automatic registration of any {@PipelineOptions} requires users
- * conform to the limit that each {@link PipelineRunner}'s {@link Class#getSimpleName() simple name}
- * must be unique.
+ * Note that automatic registration of any
+ * {@link com.google.cloud.dataflow.sdk.options.PipelineOptions} requires users
+ * conform to the limit that each {@link PipelineRunner}'s
+ * {@link Class#getSimpleName() simple name} must be unique.
  * <p>
  * It is optional but recommended to use one of the many build time tools such as
- * {@link AutoService} to generate the necessary META-INF files automatically.
+ * {@link com.google.auto.service.AutoService} to generate the necessary
+ * META-INF files automatically.
  */
 public interface PipelineRunnerRegistrar {
   public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index ee6152c04b04d..5d75312e9eb7a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -77,7 +77,7 @@ private DataflowAssert() {}
 
   /**
    * Constructs an {@link IterableAssert} for the elements of the provided
-   * {@link PCollection PCollection<T>}.
+   * {@link PCollection PCollection&lt;T&gt;}.
    */
   public static <T> IterableAssert<T> that(PCollection<T> actual) {
     return new IterableAssert<>(actual.apply(View.<T>asIterable()))
@@ -86,7 +86,8 @@ public static <T> IterableAssert<T> that(PCollection<T> actual) {
 
   /**
    * Constructs an {@link IterableAssert} for the value of the provided
-   * {@link PCollection PCollection<Iterable<T>>}, which must be a singleton.
+   * {@link PCollection PCollection&lt;Iterable&lt;T&gt;&gt;}, which must be a
+   * singleton.
    */
   public static <T> IterableAssert<T> thatSingletonIterable(PCollection<Iterable<T>> actual) {
     List<? extends Coder<?>> maybeElementCoder = actual.getCoder().getCoderArguments();
@@ -125,7 +126,7 @@ public static <T> SingletonAssert<T> thatSingleton(PCollection<T> actual) {
 
   /**
    * An assertion about the contents of a
-   * {@link PCollectionView PCollectionView<<Iterable<T>, ?>}.
+   * {@link PCollectionView PCollectionView&lt;Iterable&lt;T&gt;, ?&gt;}.
    */
   @SuppressWarnings("serial")
   public static class IterableAssert<T> implements Serializable {
@@ -247,7 +248,7 @@ public IterableAssert<T> containsInOrder(Iterable<T> expectedElements) {
 
   /**
    * An assertion about the single value of type {@code T}
-   * associated with a {@link PCollectionView PCollectionView<T, ?>}.
+   * associated with a {@link PCollectionView PCollectionView&lt;T, ?&gt;}.
    */
   @SuppressWarnings("serial")
   public static class SingletonAssert<T> implements Serializable {
@@ -344,7 +345,7 @@ public SingletonAssert<T> is(T expectedValue) {
   ////////////////////////////////////////////////////////////////////////
 
   /**
-   * An assertion checker that takes a single {@link PCollectionView PCollectionView<A, ?>}
+   * An assertion checker that takes a single {@link PCollectionView PCollectionView&lt;A, ?&gt;}
    * and an assertion over {@code A}, and checks it within a dataflow pipeline.
    *
    * <p> Note that the entire assertion must be serializable. If
@@ -382,8 +383,8 @@ public void processElement(ProcessContext c) {
   }
 
   /**
-   * An assertion checker that takes a {@link PCollectionView PCollectionView<A, ?>},
-   * a {@link PCollectionView PCollectionView<B, ?>}, a relation
+   * An assertion checker that takes a {@link PCollectionView PCollectionView&lt;A, ?&gt;},
+   * a {@link PCollectionView PCollectionView&lt;B, ?&gt;}, a relation
    * over {@code A} and {@code B}, and checks that the relation holds
    * within a dataflow pipeline.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
index bc9a7d99df340..80c80b539b34c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
@@ -192,10 +192,10 @@ PTransform<PCollection<T>, PCollection<List<T>>> globally(int numQuantiles) {
    *
    * <p> To evaluate the quantiles we use the "New Algorithm" described here:
    * <pre>
-   *   [MRL98] Manku, Rajagopalan & Lindsay, "Approximate Medians and other
+   *   [MRL98] Manku, Rajagopalan &amp; Lindsay, "Approximate Medians and other
    *   Quantiles in One Pass and with Limited Memory", Proc. 1998 ACM
    *   SIGMOD, Vol 27, No 2, p 426-435, June 1998.
-   *   http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.6.6513&rep=rep1&type=pdf
+   *   http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.6.6513&amp;rep=rep1&amp;type=pdf
    * </pre>
    *
    * <P> The default error bound is {@code 1 / N}, though in practice
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 3e7f2895631df..5d51a3c17303d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -728,8 +729,8 @@ public Coder<VO> getDefaultOutputCoder(
    * {@code Combine.Globally<VI, VO>} takes a {@code PCollection<VI>}
    * and returns a {@code PCollection<VO>} whose single element is the result of
    * combining all the elements of the input {@code PCollection},
-   * using a specified
-   * {@link CombineFn CombineFn<VI, VA, VO>}.  It is common
+   * using a specified}
+   * {@link CombineFn CombineFn&lt;VI, VA, VO&gt;}.  It is common
    * for {@code VI == VO}, but not required.  Common combining
    * functions include sums, mins, maxes, and averages of numbers,
    * conjunctions and disjunctions of booleans, statistical
@@ -1124,7 +1125,7 @@ protected String getKindString() {
    * {@code GroupedValues<K, VI, VO>} takes a
    * {@code PCollection<KV<K, Iterable<VI>>>}, such as the result of
    * {@link GroupByKey}, applies a specified
-   * {@link KeyedCombineFn KeyedCombineFn<K, VI, VA, VO>}
+   * {@link KeyedCombineFn KeyedCombineFn&lt;K, VI, VA, VO&gt;}
    * to each of the input {@code KV<K, Iterable<VI>>} elements to
    * produce a combined output {@code KV<K, VO>} element, and returns a
    * {@code PCollection<KV<K, VO>>} containing all the combined output
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 3b71738ad7970..fdfa2672620e2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -81,7 +81,7 @@ public abstract class Context {
      * element will have the same timestamp and be in the same windows
      * as the input element passed to {@link DoFn#processElement}).
      *
-     * <p> If invoked from {@link #startBundle} or {@link #finishValue},
+     * <p> If invoked from {@link #startBundle} or {@link #finishBundle},
      * this will attempt to use the
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
      * of the input {@code PCollection} to determine what windows the element
@@ -100,7 +100,7 @@ public abstract class Context {
      * {@link DoFn#getAllowedTimestampSkew}.  The output element will
      * be in the same windows as the input element.
      *
-     * <p> If invoked from {@link #startBundle} or {@link #finishValue},
+     * <p> If invoked from {@link #startBundle} or {@link #finishBundle},
      * this will attempt to use the
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
      * of the input {@code PCollection} to determine what windows the element
@@ -122,7 +122,7 @@ public abstract class Context {
      * <p> The output element will have the same timestamp and be in the same
      * windows as the input element passed to {@link DoFn#processElement}).
      *
-     * <p> If invoked from {@link #startBundle} or {@link #finishValue},
+     * <p> If invoked from {@link #startBundle} or {@link #finishBundle},
      * this will attempt to use the
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
      * of the input {@code PCollection} to determine what windows the element
@@ -145,7 +145,7 @@ public abstract class Context {
      * {@link DoFn#getAllowedTimestampSkew}.  The output element will
      * be in the same windows as the input element.
      *
-     * <p> If invoked from {@link #startBundle} or {@link #finishValue},
+     * <p> If invoked from {@link #startBundle} or {@link #finishBundle},
      * this will attempt to use the
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
      * of the input {@code PCollection} to determine what windows the element
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index c2a10d375b0d3..bce444d955158 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -52,8 +52,8 @@
 public class Flatten {
 
   /**
-   * Returns a {@link PTransform} that flattens a {@link CollectionList<T>}
-   * into a {@link PCollection<T>} containing all the elements of all
+   * Returns a {@link PTransform} that flattens a {@link PCollectionList}
+   * into a {@link PCollection} containing all the elements of all
    * the {@link PCollection}s in its input.
    *
    * <p> If any of the inputs to {@code Flatten<T>} require window merging,
@@ -98,8 +98,8 @@ public static <T> FlattenIterables<T> iterables() {
   }
 
   /**
-   * A {@link PTransform} that flattens a {@link PCollectionList<T>}
-   * into a {@link PCollection<T>} containing all the elements of all
+   * A {@link PTransform} that flattens a {@link PCollectionList}
+   * into a {@link PCollection} containing all the elements of all
    * the {@link PCollection}s in its input.
    *
    * @param <T> the type of the elements in the input and output
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 1f7024d4314b9..1483ceb7ec989 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -108,7 +108,7 @@
  * have the same {@link WindowFn} as the input.
  *
  * <p> If the input {@code PCollection} contains late data (see
- * {@link com.google.cloud.dataflow.sdk.PubsubIO.Read.Bound#timestampLabel}
+ * {@link com.google.cloud.dataflow.sdk.io.PubsubIO.Read.Bound#timestampLabel}
  * for an example of how this can occur), then there may be multiple elements
  * output by a {@code GroupByKey} that correspond to the same key and window.
  *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 234c5fb825d77..0b23080f0fcd0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -52,7 +52,7 @@
  * <h2>{@code DoFn}s</h2>
  *
  * <p> The function to use to process each element is specified by a
- * {@link DoFn DoFn<I, O>}.
+ * {@link DoFn DoFn&lt;I, O&gt;}.
  *
  * <p> Conceptually, when a {@code ParDo} transform is executed, the
  * elements of the input {@code PCollection<I>} are first divided up
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
index 76d6dbf3d0fd7..2c5a1ba500bbd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
@@ -27,7 +27,6 @@
 import com.google.common.util.concurrent.RateLimiter;
 
 import org.joda.time.Instant;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -110,7 +109,7 @@ public RateLimitingTransform(DoFn<I, O> doFn) {
      * <p> This rate limit may not be reachable unless there is sufficient
      * parallelism.
      *
-     * <p> A rate of <= 0.0 disables rate limiting.
+     * <p> A rate of &le; 0.0 disables rate limiting.
      */
     public RateLimitingTransform<I, O> withRateLimit(
         double maxElementsPerSecond) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index 4f7fcd543628e..f7ec7a6d40895 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -73,7 +73,7 @@ public static <T> AsIterable<T> asIterable() {
   }
 
   /**
-   * Returns an {@link AsMultimap} that takes a {@link PCollection<KV>} as input
+   * Returns an {@link AsMultimap} that takes a {@link PCollection} as input
    * and produces a {@link PCollectionView} of the values to be consumed
    * as a {@code Map<K, Iterable<V>>} side input.
    *
@@ -84,8 +84,8 @@ public static <K, V> AsMultimap<K, V> asMap() {
   }
 
   /**
-   * A {@PTransform} that produces a {@link PCollectionView} of a singleton {@link PCollection}
-   * yielding the single element it contains.
+   * A {@link PTransform} that produces a {@link PCollectionView} of a singleton
+   * {@link PCollection} yielding the single element it contains.
    *
    * <p> Instantiate via {@link View#asIterable}.
    */
@@ -106,8 +106,8 @@ public PCollectionView<Iterable<T>, Iterable<WindowedValue<T>>> apply(
   }
 
   /**
-   * A {@PTransform} that produces a {@link PCollectionView} of a singleton {@link PCollection}
-   * yielding the single element it contains.
+   * A {@link PTransform} that produces a {@link PCollectionView} of a singleton
+   * {@link PCollection} yielding the single element it contains.
    *
    * <p> Instantiate via {@link View#asIterable}.
    */
@@ -127,7 +127,7 @@ public PCollectionView<T, WindowedValue<T>> apply(PCollection<T> input) {
   }
 
   /**
-   * A {@PTransform} that produces a {@link PCollectionView} of a keyed {@link PCollection}
+   * A {@link PTransform} that produces a {@link PCollectionView} of a keyed {@link PCollection}
    * yielding a map of keys to all associated values.
    *
    * <p> Instantiate via {@link View#asMap}.
@@ -166,7 +166,7 @@ public <VO> AsSingletonMap<K, V, VO> withCombiner(CombineFn<V, ?, VO> combineFn)
 
 
   /**
-   * A {@PTransform} that produces a {@link PCollectionView} of a keyed {@link PCollection}
+   * A {@link PTransform} that produces a {@link PCollectionView} of a keyed {@link PCollection}
    * yielding a map of keys to a single associated values.
    *
    * <p> Instantiate via {@link View#asMap}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
index 7eee0f4a94540..35d1e6a433cce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
@@ -23,11 +23,13 @@
 /**
  * A {@code PCollectionView<T, WT>} is an immutable view of a
  * {@link PCollection} that can be accessed e.g. as a
- * side input to a {@link DoFn}.
+ * side input to a {@link com.google.cloud.dataflow.sdk.transforms.DoFn}.
  *
- * <p> A {@PCollectionView} should always be the output of a {@link PTransform}. It is
- * the joint responsibility of this transform and each {@link PipelineRunner} to
- * implement the view in a runner-specific manner.
+ * <p> A {@link PCollectionView} should always be the output of a
+ * {@link com.google.cloud.dataflow.sdk.transforms.PTransform}. It is the joint
+ * responsibility of this transform and each
+ * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner} to implement the
+ * view in a runner-specific manner.
  *
  * @param <T> the type of the value(s) accessible via this {@code PCollectionView}
  * @param <WT> the type of the windowed value(s) accessible via this {@code PCollectionView}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
index 35842827facea..521a1a1ec34f5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
@@ -45,9 +45,8 @@ public interface POutput {
    * {@code PTransform} in the given {@code Pipeline}.
    *
    * <p> Should expand this {@code POutput} and invoke
-   * {@link PValue#recordAsOutput(Pipeline,
-   * com.google.cloud.dataflow.sdk.transforms.PTransform,
-   * String)} on each component output {@code PValue}.
+   * {@link PValue#recordAsOutput(Pipeline, com.google.cloud.dataflow.sdk.transforms.PTransform)}
+   * on each component output {@code PValue}.
    *
    * <p> Automatically invoked as part of applying a
    * {@code PTransform}.  Not to be invoked directly by user code.

From e89f47f756d0729c1a4567064a58970a74b4ea98 Mon Sep 17 00:00:00 2001
From: malo <malo@google.com>
Date: Tue, 10 Mar 2015 18:37:58 -0700
Subject: [PATCH 0257/1541] Change DataflowWorkerHarness to repeatedly request
 work. Switches to having 1 thread per core. Remove deprecated completion time
 logging. ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=88283022

---
 .../runners/worker/DataflowWorkerHarness.java | 106 ++++++++++++------
 .../worker/DataflowWorkerHarnessTest.java     |  22 ++--
 2 files changed, 79 insertions(+), 49 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index a078d754abb1f..106b4ec151224 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -19,7 +19,9 @@
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudDuration;
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudTime;
 
-import com.google.api.client.util.Lists;
+import com.google.api.client.util.BackOff;
+import com.google.api.client.util.BackOffUtils;
+import com.google.api.client.util.Sleeper;
 import com.google.api.services.dataflow.Dataflow;
 import com.google.api.services.dataflow.model.LeaseWorkItemRequest;
 import com.google.api.services.dataflow.model.LeaseWorkItemResponse;
@@ -32,6 +34,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingFormatter;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingInitializer;
+import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
 import com.google.cloud.dataflow.sdk.util.GcsIOChannelFactory;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
@@ -39,19 +42,17 @@
 import com.google.common.collect.ImmutableList;
 
 import org.joda.time.DateTime;
-import org.joda.time.DateTimeUtils;
 import org.joda.time.Duration;
-import org.joda.time.format.ISODateTimeFormat;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.lang.Thread.UncaughtExceptionHandler;
 import java.util.Collections;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.concurrent.Callable;
-import java.util.concurrent.CompletionService;
-import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.ExecutorService;
 
 import javax.annotation.concurrent.ThreadSafe;
 
@@ -72,6 +73,10 @@ public class DataflowWorkerHarness {
 
   private static final String APPLICATION_NAME = "DataflowWorkerHarness";
 
+  // ExponentialBackOff parameters for the task retry strategy. Visible for testing.
+  static final int BACKOFF_INITIAL_INTERVAL_MILLIS = 5000;  // 5 second
+  static final int BACKOFF_MAX_ATTEMPTS = 10;  // 10 attempts will take approx. 15 min.
+
   /**
    * This uncaught exception handler logs the {@link Throwable} to the logger, {@link System#err}
    * and exits the application with status code 1.
@@ -88,6 +93,14 @@ public void uncaughtException(Thread t, Throwable e) {
     }
   }
 
+  /**
+   * Helper for initializing the BackOff used for retries.
+   */
+  private static BackOff createBackOff() {
+    return new AttemptBoundedExponentialBackOff(
+            BACKOFF_MAX_ATTEMPTS, BACKOFF_INITIAL_INTERVAL_MILLIS);
+  }
+
   /**
    * Fetches and processes work units from the Dataflow service.
    */
@@ -99,47 +112,68 @@ public static void main(String[] args) throws Exception {
         PipelineOptionsFactory.createFromSystemProperties();
     DataflowWorkerLoggingInitializer.configure(pipelineOptions);
 
+    final Sleeper sleeper = Sleeper.DEFAULT;
     final DataflowWorker worker = create(pipelineOptions);
-    processWork(pipelineOptions, worker);
+    processWork(pipelineOptions, worker, sleeper);
   }
 
-  // Visible for testing.
-  static void processWork(DataflowWorkerHarnessOptions pipelineOptions,
-      final DataflowWorker worker) {
-
-    long startTime = DateTimeUtils.currentTimeMillis();
-    int numThreads = Math.max(Runtime.getRuntime().availableProcessors() - 1, 1);
-    CompletionService<Boolean> completionService =
-        new ExecutorCompletionService<>(pipelineOptions.getExecutorService());
-    for (int i = 0; i < numThreads; ++i) {
-      completionService.submit(new Callable<Boolean>() {
-        @Override
-        public Boolean call() throws Exception {
-          return worker.getAndPerformWork();
-        }
-      });
+  /**
+   * A thread which repeatedly fetches and processes work units from the Dataflow service.
+   */
+  private static class WorkerThread implements Callable<Boolean> {
+    // sleeper is used to sleep the appropriate amount of time
+    WorkerThread(final DataflowWorker worker, final Sleeper sleeper) {
+      this.worker = worker;
+      this.sleeper = sleeper;
+      this.backOff = createBackOff();
     }
 
-    List<Long> completionTimes = Lists.newArrayList();
-    for (int i = 0; i < numThreads; ++i) {
+    @Override
+    public Boolean call() {
+      boolean success = true;
       try {
-        // CompletionService returns the tasks in the order in which the completed at.
-        completionService.take().get();
-      } catch (Exception e) {
-        LOG.error("Failed waiting on thread to process work.", e);
+        do { // We loop getting and processing work.
+          try {
+            LOG.debug("Thread starting getAndPerformWork.");
+            success = worker.getAndPerformWork();
+            LOG.debug("{} processing one WorkItem.", success ? "Finished" : "Failed");
+          } catch (IOException e) {  // If there is a problem getting work.
+            success = false;
+          }
+          if (success) {
+            backOff.reset();
+          }
+          // Sleeping a while if there is a problem with the work, then go on with the next work.
+        } while (success || BackOffUtils.next(sleeper, backOff));
+      } catch (IOException e) {  // Failure of BackOff.
+        LOG.error("Already tried several attempts at working on tasks. Aborting.", e);
+      } catch (InterruptedException e) {
+        LOG.error("Interrupted during thread execution or sleep.", e);
       }
-      completionTimes.add(DateTimeUtils.currentTimeMillis());
+      return false;
     }
 
-    long endTime = DateTimeUtils.currentTimeMillis();
-    LOG.debug("Parallel worker thread processing start time: {}, end time: {}",
-        ISODateTimeFormat.dateTime().print(startTime),
-        ISODateTimeFormat.dateTime().print(endTime));
-    for (long completionTime : completionTimes) {
-      LOG.debug("Worker thread execution time {}ms, idle time waiting for other work threads: {}ms",
-          completionTime - startTime,
-          endTime - completionTime);
+    private final DataflowWorker worker;
+    private final Sleeper sleeper;
+    private final BackOff backOff;
+  }
+
+  // Visible for testing.
+  static void processWork(DataflowWorkerHarnessOptions pipelineOptions,
+      final DataflowWorker worker, Sleeper sleeper) throws InterruptedException {
+    int numThreads = Math.max(Runtime.getRuntime().availableProcessors(), 1);
+    ExecutorService executor = pipelineOptions.getExecutorService();
+    final List<Callable<Boolean>> tasks = new LinkedList<>();
+
+    LOG.debug("Starting {} worker threads", numThreads);
+    // We start the appropriate number of threads.
+    for (int i = 0; i < numThreads; ++i) {
+      tasks.add(new WorkerThread(worker, sleeper));
     }
+
+    LOG.debug("Waiting for {} worker threads", numThreads);
+    // We wait forever unless there is a big problem.
+    executor.invokeAll(tasks);
   }
 
   static DataflowWorker create(DataflowWorkerHarnessOptions options) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
index 50eb5a72f8a5f..70895f0f8cc6e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
@@ -36,6 +36,7 @@
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingFormatter;
+import com.google.cloud.dataflow.sdk.testing.FastNanoClockAndSleeper;
 import com.google.cloud.dataflow.sdk.testing.RestoreDataflowLoggingFormatter;
 import com.google.cloud.dataflow.sdk.testing.RestoreSystemProperties;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
@@ -62,6 +63,7 @@ public class DataflowWorkerHarnessTest {
   @Rule public TestRule restoreSystemProperties = new RestoreSystemProperties();
   @Rule public TestRule restoreLogging = new RestoreDataflowLoggingFormatter();
   @Rule public ExpectedException expectedException = ExpectedException.none();
+  @Rule public FastNanoClockAndSleeper fastNanoClockAndSleeper = new FastNanoClockAndSleeper();
   @Mock private MockHttpTransport transport;
   @Mock private MockLowLevelHttpRequest request;
   @Mock private DataflowWorker mockDataflowWorker;
@@ -80,20 +82,14 @@ public void setUp() throws Exception {
   }
 
   @Test
-  public void testThatWeOnlyProcessWorkOncePerAvailableProcessor() throws Exception {
-    int numWorkers = Math.max(Runtime.getRuntime().availableProcessors() - 1, 1);
-    when(mockDataflowWorker.getAndPerformWork()).thenReturn(true);
-    DataflowWorkerHarness.processWork(pipelineOptions, mockDataflowWorker);
-    verify(mockDataflowWorker, times(numWorkers)).getAndPerformWork();
-    verifyNoMoreInteractions(mockDataflowWorker);
-  }
-
-  @Test
-  public void testThatWeOnlyProcessWorkOncePerAvailableProcessorEvenWhenFailing() throws Exception {
-    int numWorkers = Math.max(Runtime.getRuntime().availableProcessors() - 1, 1);
+  public void testThatWeRetryIfTaskExecutionFailAgainAndAgain() throws Exception {
+    int numWorkers = Math.max(Runtime.getRuntime().availableProcessors(), 1);
     when(mockDataflowWorker.getAndPerformWork()).thenReturn(false);
-    DataflowWorkerHarness.processWork(pipelineOptions, mockDataflowWorker);
-    verify(mockDataflowWorker, times(numWorkers)).getAndPerformWork();
+    DataflowWorkerHarness.processWork(
+            pipelineOptions, mockDataflowWorker, fastNanoClockAndSleeper);
+    // Test that the backoff mechanism will retry the BACKOFF_MAX_ATTEMPTS number of times.
+    verify(mockDataflowWorker, times(numWorkers * DataflowWorkerHarness.BACKOFF_MAX_ATTEMPTS))
+        .getAndPerformWork();
     verifyNoMoreInteractions(mockDataflowWorker);
   }
 

From 8ca93fb64589c46bf594567e2193bfdb93616401 Mon Sep 17 00:00:00 2001
From: ckuhn <ckuhn@google.com>
Date: Wed, 11 Mar 2015 09:39:04 -0700
Subject: [PATCH 0258/1541] To create unique job names for tests, we need to
 use method name rather than class name, and prioritize the unique part of the
 name. We no longer need to limit these names to 40 characters. ----Release
 Notes---- [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=88342400

---
 .../sdk/options/DataflowPipelineOptions.java  | 10 ---------
 .../dataflow/sdk/testing/TestPipeline.java    | 21 ++++++-------------
 .../options/DataflowPipelineOptionsTest.java  | 20 ++++++++----------
 .../sdk/testing/TestPipelineTest.java         | 11 ++++++----
 4 files changed, 22 insertions(+), 40 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
index b4b1bccfb3973..5dec12031b2f5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
@@ -86,8 +86,6 @@ public interface DataflowPipelineOptions extends
   public static class JobNameFactory implements DefaultValueFactory<String> {
     private static final DateTimeFormatter FORMATTER =
         DateTimeFormat.forPattern("MMddHHmmss").withZone(DateTimeZone.UTC);
-    private static final int MAX_APP_NAME = 19;
-    private static final int MAX_USER_NAME = 9;
 
     @Override
     public String create(PipelineOptions options) {
@@ -100,14 +98,6 @@ public String create(PipelineOptions options) {
       String normalizedUserName = userName.toLowerCase()
                                           .replaceAll("[^a-z0-9]", "0");
       String datePart = FORMATTER.print(DateTimeUtils.currentTimeMillis());
-
-      // Maximize the amount of the app name and user name we can use.
-      normalizedAppName = normalizedAppName.substring(0,
-            Math.min(normalizedAppName.length(),
-                MAX_APP_NAME + Math.max(0, MAX_USER_NAME - normalizedUserName.length())));
-      normalizedUserName = normalizedUserName.substring(0,
-            Math.min(userName.length(),
-                MAX_USER_NAME + Math.max(0, MAX_APP_NAME - normalizedAppName.length())));
       return normalizedAppName + "-" + normalizedUserName + "-" + datePart;
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
index 7545e7ee10521..45645888e71a0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
@@ -112,7 +112,6 @@ static TestDataflowPipelineOptions getPipelineOptions() {
           System.getProperty(PROPERTY_DATAFLOW_OPTIONS), PipelineOptions.class)
           .as(TestDataflowPipelineOptions.class);
       options.setAppName(getAppName());
-      options.setJobName(getJobName());
       return options;
     } catch (IOException e) {
       throw new RuntimeException("Unable to instantiate test options from system property "
@@ -120,28 +119,20 @@ static TestDataflowPipelineOptions getPipelineOptions() {
     }
   }
 
-  /** Returns the class name of the test, or a default name. */
+  /** Returns the class + method name of the test, or a default name. */
   private static String getAppName() {
     Optional<StackTraceElement> stackTraceElement = findCallersStackTrace();
     if (stackTraceElement.isPresent()) {
+      String methodName = stackTraceElement.get().getMethodName();
       String className = stackTraceElement.get().getClassName();
-      return className.contains(".")
-          ? className.substring(className.lastIndexOf(".") + 1)
-              : className;
+      if (className.contains(".")) {
+        className = className.substring(className.lastIndexOf(".") + 1);
+      }
+      return className + "-" + methodName;
     }
     return "UnitTest";
   }
 
-  /** Returns the method name of the test, or a default name. */
-  private static String getJobName() {
-    Optional<StackTraceElement> stackTraceElement = findCallersStackTrace();
-    if (stackTraceElement.isPresent()) {
-      String name = stackTraceElement.get().getMethodName();
-      return name.substring(0, Math.min(40, name.length()));
-    }
-    return "unittestjob";
-  }
-
   /** Returns the {@link StackTraceElement} of the calling class. */
   private static Optional<StackTraceElement> findCallersStackTrace() {
     Iterator<StackTraceElement> elements =
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptionsTest.java
index 725288a1a779d..03edfacd0ab89 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptionsTest.java
@@ -52,33 +52,32 @@ public void testUserNameIsNotSet() {
   }
 
   @Test
-  public void testAppNameAndUserNameIsTooLong() {
+  public void testAppNameAndUserNameAreLong() {
     resetDateTimeProviderRule.setDateTimeFixed("2014-12-08T19:07:06.698Z");
     System.getProperties().put("user.name", "abcdeabcdeabcdeabcdeabcdeabcde");
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setAppName("1234567890123456789012345678901234567890");
-    assertEquals("a234567890123456789-abcdeabcd-1208190706", options.getJobName());
-    assertTrue(options.getJobName().length() <= 40);
+    assertEquals(
+        "a234567890123456789012345678901234567890-abcdeabcdeabcdeabcdeabcdeabcde-1208190706",
+        options.getJobName());
   }
 
   @Test
-  public void testAppNameIsTooLong() {
+  public void testAppNameIsLong() {
     resetDateTimeProviderRule.setDateTimeFixed("2014-12-08T19:07:06.698Z");
     System.getProperties().put("user.name", "abcde");
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setAppName("1234567890123456789012345678901234567890");
-    assertEquals("a2345678901234567890123-abcde-1208190706", options.getJobName());
-    assertTrue(options.getJobName().length() <= 40);
+    assertEquals("a234567890123456789012345678901234567890-abcde-1208190706", options.getJobName());
   }
 
   @Test
-  public void testUserNameIsTooLong() {
+  public void testUserNameIsLong() {
     resetDateTimeProviderRule.setDateTimeFixed("2014-12-08T19:07:06.698Z");
     System.getProperties().put("user.name", "abcdeabcdeabcdeabcdeabcdeabcde");
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setAppName("1234567890");
-    assertEquals("a234567890-abcdeabcdeabcdeabc-1208190706", options.getJobName());
-    assertTrue(options.getJobName().length() <= 40);
+    assertEquals("a234567890-abcdeabcdeabcdeabcdeabcdeabcde-1208190706", options.getJobName());
   }
 
 
@@ -88,7 +87,6 @@ public void testUtf8UserNameAndApplicationNameIsNormalized() {
     System.getProperties().put("user.name", "ði ıntəˈnæʃənəl ");
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setAppName("fəˈnɛtık əsoʊsiˈeıʃn");
-    assertEquals("f00n0t0k00so0si0e00-0i00nt00n-1208190706", options.getJobName());
-    assertTrue(options.getJobName().length() <= 40);
+    assertEquals("f00n0t0k00so0si0e00n-0i00nt00n000n0l0-1208190706", options.getJobName());
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
index e01b8b5c2b54d..5e476f2450f12 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
@@ -16,8 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.testing;
 
+import static org.hamcrest.CoreMatchers.startsWith;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertThat;
 
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.common.collect.ImmutableMap;
@@ -60,8 +62,7 @@ public void testCreationOfPipelineOptions() throws Exception {
     System.getProperties().put("dataflowOptions", stringOptions);
     TestDataflowPipelineOptions options = TestPipeline.getPipelineOptions();
     assertEquals(DataflowPipelineRunner.class, options.getRunner());
-    assertEquals("TestPipelineTest", options.getAppName());
-    assertEquals("testCreationOfPipelineOptions", options.getJobName());
+    assertThat(options.getJobName(), startsWith("testpipelinetest0testcreationofpipelineoptions-"));
     assertEquals("testProject", options.getProject());
     assertEquals("testApiRootUrl", options.getApiRootUrl());
     assertEquals("testDataflowEndpoint", options.getDataflowEndpoint());
@@ -81,7 +82,9 @@ public void testCreationOfPipelineOptionsFromReallyVerboselyNamedTestCase() thro
           .build()));
     System.getProperties().put("dataflowOptions", stringOptions);
     TestDataflowPipelineOptions options = TestPipeline.getPipelineOptions();
-    assertEquals("TestPipelineTest", options.getAppName());
-    assertEquals("testCreationOfPipelineOptionsFromReallyV", options.getJobName());
+    assertThat(options.getAppName(), startsWith(
+        "TestPipelineTest-testCreationOfPipelineOptionsFromReallyVerboselyNamedTestCase"));
+    assertThat(options.getJobName(), startsWith(
+        "testpipelinetest0testcreationofpipelineoptionsfrom"));
   }
 }

From 84742de2b34777fc143e65da3c935c36614871e5 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Sat, 7 Feb 2015 10:26:41 -0800
Subject: [PATCH 0259/1541] Some fixes for combining in streaming mode.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88347787
---
 .../worker/GroupAlsoByWindowsParDoFn.java     | 32 ++++---
 .../dataflow/sdk/util/CombiningWindowSet.java | 84 +++++++++++------
 .../sdk/util/GroupAlsoByWindowsDoFn.java      | 34 +++----
 .../util/StreamingGroupAlsoByWindowsDoFn.java | 65 ++++++++-----
 .../StreamingGroupAlsoByWindowsDoFnTest.java  | 92 +++++++++++--------
 5 files changed, 189 insertions(+), 118 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index 62c185a06f286..1c573701d9c77 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -64,23 +64,26 @@ public static GroupAlsoByWindowsParDoFn create(
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler sampler /* unused */)
       throws Exception {
-    final Object windowFn =
+    final Object windowFnObj =
         SerializableUtils.deserializeFromByteArray(
             getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
             "serialized window fn");
-    if (!(windowFn instanceof WindowFn)) {
+    if (!(windowFnObj instanceof WindowFn)) {
       throw new Exception(
-          "unexpected kind of WindowFn: " + windowFn.getClass().getName());
+          "unexpected kind of WindowFn: " + windowFnObj.getClass().getName());
     }
+    final WindowFn windowFn = (WindowFn) windowFnObj;
 
     byte[] serializedCombineFn = getBytes(cloudUserFn, PropertyNames.COMBINE_FN, null);
-    final Object combineFn;
+    final KeyedCombineFn combineFn;
     if (serializedCombineFn != null) {
-      combineFn =
+      Object combineFnObj =
           SerializableUtils.deserializeFromByteArray(serializedCombineFn, "serialized combine fn");
-      if (!(combineFn instanceof KeyedCombineFn)) {
-        throw new Exception("unexpected kind of KeyedCombineFn: " + combineFn.getClass().getName());
+      if (!(combineFnObj instanceof KeyedCombineFn)) {
+        throw new Exception(
+            "unexpected kind of KeyedCombineFn: " + combineFnObj.getClass().getName());
       }
+      combineFn = (KeyedCombineFn) combineFnObj;
     } else {
       combineFn = null;
     }
@@ -98,6 +101,7 @@ public static GroupAlsoByWindowsParDoFn create(
       throw new Exception(
           "Expected KvCoder for inputCoder, got: " + elemCoder.getClass().getName());
     }
+    final KvCoder kvCoder = (KvCoder) elemCoder;
 
     boolean isStreamingPipeline = false;
     if (options instanceof StreamingOptions) {
@@ -111,9 +115,10 @@ public static GroupAlsoByWindowsParDoFn create(
         public DoFnInfo createDoFnInfo() {
           return new DoFnInfo(
               StreamingGroupAlsoByWindowsDoFn.create(
-                  (WindowFn) windowFn,
-                  (KeyedCombineFn) combineFn,
-                  ((KvCoder) elemCoder).getValueCoder()),
+                  windowFn,
+                  combineFn,
+                  kvCoder.getKeyCoder(),
+                  kvCoder.getValueCoder()),
               null);
         }
       };
@@ -127,9 +132,10 @@ public DoFnInfo createDoFnInfo() {
         public DoFnInfo createDoFnInfo() {
           return new DoFnInfo(
               GroupAlsoByWindowsDoFn.create(
-                  (WindowFn) windowFn,
-                  (KeyedCombineFn) combineFn,
-                  elemCoder),
+                  windowFn,
+                  combineFn,
+                  kvCoder.getKeyCoder(),
+                  kvCoder.getValueCoder()),
               null);
         }
       };
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
index fe754bcb7cd96..d726551f80f16 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
@@ -20,6 +20,7 @@
 
 import com.google.api.client.util.Lists;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
@@ -28,7 +29,6 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.collect.Iterators;
 
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
@@ -39,33 +39,54 @@
  * A WindowSet for combine accumulators.
  * It merges accumulators when windows are added or merged.
  *
- * @param <K> key tyep
+ * @param <K> key type
+ * @param <VI> value input type
  * @param <VA> accumulator type
+ * @param <VO> value output type
  * @param <W> window type
  */
-public class CombiningWindowSet<K, VA, W extends BoundedWindow>
-    extends AbstractWindowSet<K, VA, VA, W> {
+public class CombiningWindowSet<K, VI, VA, VO, W extends BoundedWindow>
+    extends AbstractWindowSet<K, VI, VO, W> {
 
   private final CodedTupleTag<Iterable<W>> windowListTag =
       CodedTupleTag.of("liveWindowsList", IterableCoder.of(windowFn.windowCoder()));
 
-  private final KeyedCombineFn<K, ?, VA, ?> combineFn;
+  private final KeyedCombineFn<K, VI, VA, VO> combineFn;
   private final Set<W> liveWindows;
+  private final Coder<VA> accumulatorCoder;
   private boolean liveWindowsModified;
 
   protected CombiningWindowSet(
       K key,
       WindowFn<?, W> windowFn,
-      KeyedCombineFn<K, ?, VA, ?> combineFn,
-      Coder<VA> inputCoder,
-      DoFnProcessContext<?, KV<K, VA>> context,
+      KeyedCombineFn<K, VI, VA, VO> combineFn,
+      Coder<K> keyCoder,
+      Coder<VI> inputValueCoder,
+      DoFnProcessContext<?, KV<K, VO>> context,
       ActiveWindowManager<W> activeWindowManager) throws Exception {
-    super(key, windowFn, inputCoder, context, activeWindowManager);
+    super(key, windowFn, inputValueCoder, context, activeWindowManager);
     this.combineFn = combineFn;
     liveWindows = new HashSet<W>();
     Iterators.addAll(liveWindows,
                      emptyIfNull(context.keyedState().lookup(windowListTag)).iterator());
     liveWindowsModified = false;
+    // TODO: Use the pipeline's registry once the TODO in GroupByKey is resolved.
+    CoderRegistry coderRegistry = new CoderRegistry();
+    coderRegistry.registerStandardCoders();
+    accumulatorCoder = combineFn.getAccumulatorCoder(coderRegistry, keyCoder, inputValueCoder);
+  }
+
+  protected static <K, VI, VA, VO, W extends BoundedWindow> CombiningWindowSet<K, VI, VA, VO, W>
+  create(
+      K key,
+      WindowFn<?, W> windowFn,
+      KeyedCombineFn<K, VI, VA, VO> combineFn,
+      Coder<K> keyCoder,
+      Coder<VI> inputValueCoder,
+      DoFnProcessContext<?, KV<K, VO>> context,
+      ActiveWindowManager<W> activeWindowManager) throws Exception {
+    return new CombiningWindowSet<K, VI, VA, VO, W>(
+        key, windowFn, combineFn, keyCoder, inputValueCoder, context, activeWindowManager);
   }
 
   @Override
@@ -74,29 +95,25 @@ protected Collection<W> windows() {
   }
 
   @Override
-  protected VA finalValue(W window) throws Exception {
-    return context.keyedState().lookup(
-        bufferTag(window, windowFn.windowCoder(), inputCoder));
+  protected VO finalValue(W window) throws Exception {
+    return combineFn.extractOutput(
+        key,
+        context.keyedState().lookup(bufferTag(window, windowFn.windowCoder(), accumulatorCoder)));
   }
 
   @Override
-  protected void put(W window, VA value) throws Exception {
-    CodedTupleTag<VA> tag = bufferTag(window, windowFn.windowCoder(), inputCoder);
-    VA va = context.keyedState().lookup(tag);
-    VA newValue;
+  protected void put(W window, VI value) throws Exception {
+    VA va = context.keyedState().lookup(accumulatorTag(window));
     if (va == null) {
-      newValue = value;
-    } else {
-      newValue = combineFn.mergeAccumulators(key, Arrays.asList(value, va));
+      va = combineFn.createAccumulator(key);
     }
-    context.keyedState().store(tag, newValue);
-    activeWindowManager.addWindow(window);
-    liveWindowsModified = liveWindows.add(window);
+    combineFn.addInput(key, va, value);
+    store(window, va);
   }
 
   @Override
   protected void remove(W window) throws Exception {
-    context.keyedState().remove(bufferTag(window, windowFn.windowCoder(), inputCoder));
+    context.keyedState().remove(accumulatorTag(window));
     activeWindowManager.addWindow(window);
     liveWindowsModified = liveWindows.remove(window);
   }
@@ -104,17 +121,28 @@ protected void remove(W window) throws Exception {
   @Override
   protected void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
     List<VA> accumulators = Lists.newArrayList();
-    for (W w : toBeMerged) {
-      VA va = context.keyedState().lookup(
-          bufferTag(w, windowFn.windowCoder(), inputCoder));
+    for (W window : toBeMerged) {
+      VA va = context.keyedState().lookup(accumulatorTag(window));
       // TODO: determine whether null means no value associated with the tag, b/19201776.
       if (va != null) {
         accumulators.add(va);
       }
-      remove(w);
+      remove(window);
     }
     VA mergedVa = combineFn.mergeAccumulators(key, accumulators);
-    put(mergeResult, mergedVa);
+    store(mergeResult, mergedVa);
+  }
+
+  private CodedTupleTag<VA> accumulatorTag(W window) throws Exception {
+    // TODO: Cache this.
+    return bufferTag(window, windowFn.windowCoder(), accumulatorCoder);
+  }
+
+  private void store(W window, VA va) throws Exception {
+    CodedTupleTag<VA> tag = accumulatorTag(window);
+    context.keyedState().store(tag, va);
+    activeWindowManager.addWindow(window);
+    liveWindowsModified = liveWindows.add(window);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 896dc98b5d033..f109e619fe4e9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -80,17 +80,21 @@ AbstractWindowSet<K, V, Iterable<V>, W> createWindowSet(K key,
   /**
    * Create a {@link GroupAlsoByWindowsDoFn} using the specified combineFn.
    */
-  private static <K, VA, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, VA, VA, W>
-  createCombine(final WindowFn<?, W> windowFn,
-      final KeyedCombineFn<K, ?, VA, ?> combineFn,
-      final Coder<VA> inputCoder) {
-    return new GABWViaWindowSetDoFn<K, VA, VA, W>(windowFn) {
-    @Override
-    AbstractWindowSet<K, VA, VA, W> createWindowSet(K key,
-        DoFnProcessContext<KV<K, Iterable<WindowedValue<VA>>>, KV<K, VA>> context,
-        BatchActiveWindowManager<W> activeWindowManager) throws Exception {
-        return new CombiningWindowSet<K, VA, W>(
-            key, windowFn, combineFn, inputCoder, context, activeWindowManager);
+  private static <K, VI, VA, VO, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, VI, VO, W>
+  createCombine(
+      final WindowFn<?, W> windowFn,
+      final KeyedCombineFn<K, VI, VA, VO> combineFn,
+      final Coder<K> keyCoder,
+      final Coder<VI> inputCoder) {
+    return new GABWViaWindowSetDoFn<K, VI, VO, W>(windowFn) {
+      @Override
+      AbstractWindowSet<K, VI, VO, W> createWindowSet(
+          K key,
+          DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context,
+          BatchActiveWindowManager<W> activeWindowManager) throws Exception {
+        return CombiningWindowSet.create(
+            key, windowFn, combineFn, keyCoder, inputCoder,
+            (DoFnProcessContext<?, KV<K, VO>>) (DoFnProcessContext) context, activeWindowManager);
       }
     };
   }
@@ -99,8 +103,8 @@ AbstractWindowSet<K, VA, VA, W> createWindowSet(K key,
    * Construct a {@link GroupAlsoByWindowsDoFn} using the {@code combineFn} if available.
    */
   public static <K, VI, VO, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, VI, VO, W>
-  create(WindowFn<?, W> windowFn, KeyedCombineFn<K, ?, VI, ?> combineFn,
-      Coder<VI> inputCoder) {
+  create(WindowFn<?, W> windowFn, KeyedCombineFn<K, VI, ?, VO> combineFn,
+      Coder<K> keyCoder, Coder<VI> inputCoder) {
     if (combineFn == null) {
       // Without combineFn, it should be the case that VO = Iterable<VI>, so this is safe
       @SuppressWarnings("unchecked")
@@ -108,10 +112,8 @@ AbstractWindowSet<K, VA, VA, W> createWindowSet(K key,
           (GroupAlsoByWindowsDoFn<K, VI, VO, W>) create(windowFn, inputCoder);
       return fn;
     } else {
-      // With a combineFn, then VI = VO, and we'll use those as the type of the accumulator
-      @SuppressWarnings("unchecked")
       GroupAlsoByWindowsDoFn<K, VI, VO, W> fn =
-          (GroupAlsoByWindowsDoFn<K, VI, VO, W>) createCombine(windowFn, combineFn, inputCoder);
+          createCombine(windowFn, combineFn, keyCoder, inputCoder);
       return fn;
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 741cd7130bfaf..fa7a62f102862 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -39,25 +39,29 @@
 public class StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
     extends DoFn<TimerOrElement<KV<K, VI>>, KV<K, VO>> implements DoFn.RequiresKeyedState {
 
-  protected WindowFn<?, W> windowFn;
-  protected KeyedCombineFn combineFn;
-  protected Coder<VI> inputCoder;
+  protected final WindowFn<?, W> windowFn;
+  protected final KeyedCombineFn<K, VI, ?, VO> combineFn;
+  protected final Coder<K> keyCoder;
+  protected final Coder<VI> inputValueCoder;
 
   protected StreamingGroupAlsoByWindowsDoFn(
       WindowFn<?, W> windowFn,
-      KeyedCombineFn combineFn,
-      Coder<VI> inputCoder) {
+      KeyedCombineFn<K, VI, ?, VO> combineFn,
+      Coder<K> keyCoder,
+      Coder<VI> inputValueCoder) {
     this.windowFn = windowFn;
     this.combineFn = combineFn;
-    this.inputCoder = inputCoder;
+    this.keyCoder = keyCoder;
+    this.inputValueCoder = inputValueCoder;
   }
 
   public static <K, VI, VO, W extends BoundedWindow>
       StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W> create(
           WindowFn<?, W> windowFn,
-          KeyedCombineFn combineFn,
-          Coder<VI> inputCoder) {
-    return new StreamingGroupAlsoByWindowsDoFn<>(windowFn, combineFn, inputCoder);
+          KeyedCombineFn<K, VI, ?, VO> combineFn,
+          Coder<K> keyCoder,
+          Coder<VI> inputValueCoder) {
+    return new StreamingGroupAlsoByWindowsDoFn<>(windowFn, combineFn, keyCoder, inputValueCoder);
   }
 
   private AbstractWindowSet<K, VI, VO, W> createWindowSet(
@@ -65,27 +69,44 @@ private AbstractWindowSet<K, VI, VO, W> createWindowSet(
       DoFnProcessContext<?, KV<K, VO>> context,
       AbstractWindowSet.ActiveWindowManager<W> activeWindowManager) throws Exception {
     if (combineFn != null) {
-      return new CombiningWindowSet(
-          key, windowFn, combineFn, inputCoder, context, activeWindowManager);
-    } else if (windowFn instanceof PartitioningWindowFn) {
-      return new PartitionBufferingWindowSet(
-          key, windowFn, inputCoder, context, activeWindowManager);
+      return CombiningWindowSet.create(
+          key, windowFn, combineFn, keyCoder, inputValueCoder, context, activeWindowManager);
     } else {
-      return new BufferingWindowSet(
-          key, windowFn, inputCoder, context, activeWindowManager);
+      // VO == Iterable<VI>
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      DoFnProcessContext<?, KV<K, Iterable<VI>>> iterableContext = (DoFnProcessContext) context;
+      AbstractWindowSet<K, VI, Iterable<VI>, W> iterableWindowSet =
+          createNonCombiningWindowSet(key, iterableContext, activeWindowManager);
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      AbstractWindowSet<K, VI, VO, W> windowSet = (AbstractWindowSet) iterableWindowSet;
+      return windowSet;
+    }
+  }
+
+  private AbstractWindowSet<K, VI, Iterable<VI>, W> createNonCombiningWindowSet(
+      K key,
+      DoFnProcessContext<?, KV<K, Iterable<VI>>> context,
+      AbstractWindowSet.ActiveWindowManager<W> activeWindowManager) throws Exception {
+    if (windowFn instanceof PartitioningWindowFn) {
+      return new PartitionBufferingWindowSet<K, VI, W>(
+        key, windowFn, inputValueCoder, context, activeWindowManager);
+    } else {
+      return new BufferingWindowSet<K, VI, W>(
+          key, windowFn, inputValueCoder, context, activeWindowManager);
     }
   }
 
   @Override
-  public void processElement(ProcessContext processContext) throws Exception {
-    DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>> context =
-        (DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>>) processContext;
+  public void processElement(ProcessContext context) throws Exception {
+    @SuppressWarnings("unchecked")
+    DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>> doFnContext =
+        (DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>>) context;
     if (!context.element().isTimer()) {
       KV<K, VI> element = context.element().element();
       K key = element.getKey();
       VI value = element.getValue();
       AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(
-          key, context, new StreamingActiveWindowManager<>(windowFn, context));
+          key, doFnContext, new StreamingActiveWindowManager<>(windowFn, doFnContext));
 
       for (BoundedWindow window : context.windows()) {
         windowSet.put((W) window, value);
@@ -93,9 +114,9 @@ public void processElement(ProcessContext processContext) throws Exception {
 
       windowSet.flush();
     } else {
-      TimerOrElement<?> timer = context.element();
+      TimerOrElement<KV<K, VI>> timer = context.element();
       AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(
-          (K) timer.key(), context, new StreamingActiveWindowManager<>(windowFn, context));
+          (K) timer.key(), doFnContext, new StreamingActiveWindowManager<>(windowFn, doFnContext));
 
       // Attempt to merge windows before emitting; that may remove the current window under
       // consideration.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 5d5c03406f29c..94cf19599ab2a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -20,6 +20,7 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 
+import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
@@ -29,7 +30,6 @@
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
@@ -52,7 +52,7 @@
 
 /** Unit tests for {@link StreamingGroupAlsoByWindowsDoFn}. */
 @RunWith(JUnit4.class)
-@SuppressWarnings({"rawtypes", "unchecked"})
+@SuppressWarnings("rawtypes")
 public class StreamingGroupAlsoByWindowsDoFnTest {
   ExecutionContext execContext;
   CounterSet counters;
@@ -65,15 +65,14 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
   }
 
   @Test public void testEmpty() throws Exception {
-    DoFnRunner<TimerOrElement<KV<String, String>>,
-        KV<String, Iterable<String>>, List> runner =
-        makeRunner(FixedWindows.<String>of(Duration.millis(10)), null);
+    DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>, List> runner =
+        makeRunner(FixedWindows.of(Duration.millis(10)));
 
     runner.startBundle();
 
     runner.finishBundle();
 
-    List<KV<String, Iterable<String>>> result = runner.getReceiver(outputTag);
+    List<?> result = runner.getReceiver(outputTag);
 
     assertEquals(0, result.size());
   }
@@ -81,9 +80,9 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
   @Test public void testFixedWindows() throws Exception {
     DoFnRunner<TimerOrElement<KV<String, String>>,
         KV<String, Iterable<String>>, List> runner =
-        makeRunner(FixedWindows.<String>of(Duration.millis(10)), null);
+        makeRunner(FixedWindows.of(Duration.millis(10)));
 
-    Coder<IntervalWindow> windowCoder = FixedWindows.<String>of(Duration.millis(10)).windowCoder();
+    Coder<IntervalWindow> windowCoder = FixedWindows.of(Duration.millis(10)).windowCoder();
 
     runner.startBundle();
 
@@ -119,6 +118,7 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
 
     runner.finishBundle();
 
+    @SuppressWarnings("unchecked")
     List<WindowedValue<KV<String, Iterable<String>>>> result = runner.getReceiver(outputTag);
 
     assertEquals(2, result.size());
@@ -139,10 +139,10 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
   @Test public void testSlidingWindows() throws Exception {
     DoFnRunner<TimerOrElement<KV<String, String>>,
         KV<String, Iterable<String>>, List> runner =
-        makeRunner(SlidingWindows.<String>of(Duration.millis(20)).every(Duration.millis(10)), null);
+        makeRunner(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10)));
 
     Coder<IntervalWindow> windowCoder =
-        SlidingWindows.<String>of(Duration.millis(10)).every(Duration.millis(10)).windowCoder();
+        SlidingWindows.of(Duration.millis(10)).every(Duration.millis(10)).windowCoder();
 
     runner.startBundle();
 
@@ -178,6 +178,7 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
 
     runner.finishBundle();
 
+    @SuppressWarnings("unchecked")
     List<WindowedValue<KV<String, Iterable<String>>>> result = runner.getReceiver(outputTag);
 
     assertEquals(3, result.size());
@@ -204,10 +205,10 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
   @Test public void testSessions() throws Exception {
     DoFnRunner<TimerOrElement<KV<String, String>>,
         KV<String, Iterable<String>>, List> runner =
-        makeRunner(Sessions.<String>withGapDuration(Duration.millis(10)), null);
+        makeRunner(Sessions.withGapDuration(Duration.millis(10)));
 
     Coder<IntervalWindow> windowCoder =
-        Sessions.<String>withGapDuration(Duration.millis(10)).windowCoder();
+        Sessions.withGapDuration(Duration.millis(10)).windowCoder();
 
     runner.startBundle();
 
@@ -243,6 +244,7 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
 
     runner.finishBundle();
 
+    @SuppressWarnings("unchecked")
     List<WindowedValue<KV<String, Iterable<String>>>> result = runner.getReceiver(outputTag);
 
     assertEquals(2, result.size());
@@ -261,84 +263,96 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
   }
 
   @Test public void testSessionsCombine() throws Exception {
-    CombineFn combineFn = Combine.SimpleCombineFn.of(new Sum.SumLongFn());
-    DoFnRunner<TimerOrElement<KV<String, Iterable<Long>>>,
-        KV<String, Iterable<Long>>, List> runner =
-        makeRunner(Sessions.<String>withGapDuration(Duration.millis(10)),
-                   combineFn.asKeyedFn());
+    CombineFn<Long, ?, Long> combineFn = Combine.SimpleCombineFn.of(new Sum.SumLongFn());
+    DoFnRunner<TimerOrElement<KV<String, Long>>,
+        KV<String, Long>, List> runner =
+        makeRunner(Sessions.withGapDuration(Duration.millis(10)),
+                   combineFn.<String>asKeyedFn());
 
     Coder<IntervalWindow> windowCoder =
-        Sessions.<String>withGapDuration(Duration.millis(10)).windowCoder();
+        Sessions.withGapDuration(Duration.millis(10)).windowCoder();
 
     runner.startBundle();
 
     runner.processElement(WindowedValue.of(
-        TimerOrElement.element(KV.of("k", (Iterable<Long>) Arrays.asList(1L))),
+        TimerOrElement.element(KV.of("k", 1L)),
         new Instant(0),
         Arrays.asList(window(0, 10))));
 
     runner.processElement(WindowedValue.of(
-        TimerOrElement.element(KV.of("k", (Iterable<Long>) Arrays.asList(2L))),
+        TimerOrElement.element(KV.of("k", 2L)),
         new Instant(5),
         Arrays.asList(window(5, 15))));
 
     runner.processElement(WindowedValue.of(
-        TimerOrElement.element(KV.of("k", (Iterable<Long>) Arrays.asList(3L))),
+        TimerOrElement.element(KV.of("k", 3L)),
         new Instant(15),
         Arrays.asList(window(15, 25))));
 
     runner.processElement(WindowedValue.of(
-        TimerOrElement.element(KV.of("k", (Iterable<Long>) Arrays.asList(4L))),
+        TimerOrElement.element(KV.of("k", 4L)),
         new Instant(3),
         Arrays.asList(window(3, 13))));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
-        TimerOrElement.<KV<String, Iterable<Long>>>timer(
+        TimerOrElement.<KV<String, Long>>timer(
             windowToString((IntervalWindow) window(0, 15), windowCoder),
             new Instant(14), "k")));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
-        TimerOrElement.<KV<String, Iterable<Long>>>timer(
+        TimerOrElement.<KV<String, Long>>timer(
             windowToString((IntervalWindow) window(15, 25), windowCoder),
             new Instant(24), "k")));
 
     runner.finishBundle();
 
-    List<WindowedValue<KV<String, Iterable<Long>>>> result = runner.getReceiver(outputTag);
+    @SuppressWarnings("unchecked")
+    List<WindowedValue<KV<String, Long>>> result = runner.getReceiver(outputTag);
 
     assertEquals(2, result.size());
 
-    WindowedValue<KV<String, Iterable<Long>>> item0 = result.get(0);
+    WindowedValue<KV<String, Long>> item0 = result.get(0);
     assertEquals("k", item0.getValue().getKey());
-    assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder(7L));
+    assertEquals((Long) 7L, item0.getValue().getValue());
     assertEquals(new Instant(14), item0.getTimestamp());
     assertThat(item0.getWindows(), Matchers.contains(window(0, 15)));
 
-    WindowedValue<KV<String, Iterable<Long>>> item1 = result.get(1);
+    WindowedValue<KV<String, Long>> item1 = result.get(1);
     assertEquals("k", item1.getValue().getKey());
-    assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder(3L));
+    assertEquals((Long) 3L, item1.getValue().getValue());
     assertEquals(new Instant(24), item1.getTimestamp());
     assertThat(item1.getWindows(), Matchers.contains(window(15, 25)));
   }
 
-  private DoFnRunner makeRunner(
-        WindowFn<? super String, IntervalWindow> windowingStrategy,
-        KeyedCombineFn combineFn) {
-    StreamingGroupAlsoByWindowsDoFn fn =
-        StreamingGroupAlsoByWindowsDoFn.create(windowingStrategy, combineFn, StringUtf8Coder.of());
+  private DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>, List>
+      makeRunner(WindowFn<? super String, IntervalWindow> windowFn) {
+    return makeRunner(windowFn, null, StringUtf8Coder.of());
+  }
 
-    DoFnRunner runner =
+  private DoFnRunner<TimerOrElement<KV<String, Long>>, KV<String, Long>, List> makeRunner(
+        WindowFn<? super String, IntervalWindow> windowFn,
+        KeyedCombineFn<String, Long, ?, Long> combineFn) {
+    return makeRunner(windowFn, combineFn, BigEndianLongCoder.of());
+  }
+
+  private <VI, VO> DoFnRunner<TimerOrElement<KV<String, VI>>, KV<String, VO>, List> makeRunner(
+        WindowFn<? super String, IntervalWindow> windowFn,
+        KeyedCombineFn<String, VI, ?, VO> combineFn,
+        Coder<VI> inputValueCoder) {
+    StreamingGroupAlsoByWindowsDoFn<String, VI, VO, IntervalWindow> fn =
+        StreamingGroupAlsoByWindowsDoFn.create(
+            windowFn, combineFn, StringUtf8Coder.of(), inputValueCoder);
+
+    return
         DoFnRunner.createWithListOutputs(
             PipelineOptionsFactory.create(),
             fn,
             PTuple.empty(),
-            outputTag,
+            (TupleTag<KV<String, VO>>) (TupleTag) outputTag,
             new ArrayList<TupleTag<?>>(),
             execContext.createStepContext("merge"),
             counters.getAddCounterMutator(),
-            new GlobalWindows());
-
-    return runner;
+            windowFn);
   }
 
   private BoundedWindow window(long start, long end) {

From dc3057b550a326a57a9c78155bd1c992823c2585 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Wed, 11 Mar 2015 17:44:55 -0700
Subject: [PATCH 0260/1541] Add Combine.BinaryCombineFn for efficiently
 implementing aggregations that most easily expressed as binary operations.

Also added primitive int, long, and double versions of BinaryCombineFn.

Used the aforementioned binary combine classes to improve the efficiency of sum, min, max.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88393409
---
 .../dataflow/sdk/transforms/Combine.java      | 326 ++++++++++++++++++
 .../cloud/dataflow/sdk/transforms/Max.java    |  37 +-
 .../cloud/dataflow/sdk/transforms/Min.java    |  35 +-
 .../cloud/dataflow/sdk/transforms/Sum.java    |  48 +--
 .../dataflow/sdk/transforms/CombineTest.java  |  44 +++
 .../dataflow/sdk/transforms/ViewTest.java     |   3 +-
 .../dataflow/sdk/util/AggregatorImplTest.java |  25 +-
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |   3 +-
 8 files changed, 452 insertions(+), 69 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 5d51a3c17303d..729ad0ff33211 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -17,7 +17,12 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
+import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.CustomCoder;
+import com.google.cloud.dataflow.sdk.coders.DelegateCoder;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
@@ -35,6 +40,9 @@
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterables;
 
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -442,6 +450,324 @@ public Coder<VO> getDefaultOutputCoder(
   }
 
 
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * An abstract subclass of CombineFn for implementing combiners that are more
+   * easily expressed as binary operations.
+   */
+  public abstract static class BinaryCombineFn<V>
+      extends CombineFn<V, BinaryCombineFn.Holder<V>, V> {
+
+    /**
+     * Applies the binary operation to the two operands, returning the result.
+     */
+    public abstract V apply(V left, V right);
+
+    /**
+     * Returns the value that should be used for the combine of the empty set.
+     */
+    public V identity() {
+      return null;
+    }
+
+    @Override
+    public Holder<V> createAccumulator() {
+      return new Holder<>();
+    }
+
+    @Override
+    public void addInput(Holder<V> accumulator, V input) {
+      if (accumulator.present) {
+        accumulator.set(apply(accumulator.value, input));
+      } else {
+        accumulator.set(input);
+      }
+    }
+
+    @Override
+    public Holder<V> mergeAccumulators(Iterable<Holder<V>> accumulators) {
+      Holder<V> running = new Holder<>();
+      for (Holder<V> accumulator : accumulators) {
+        if (accumulator.present) {
+          if (running.present) {
+            running.set(apply(running.value, accumulator.value));
+          } else {
+            running.set(accumulator.value);
+          }
+        }
+      }
+      return running;
+    }
+
+    @Override
+    public V extractOutput(Holder<V> accumulator) {
+      if (accumulator.present) {
+        return accumulator.value;
+      } else {
+        return identity();
+      }
+    }
+
+    @Override
+    public Coder<Holder<V>> getAccumulatorCoder(CoderRegistry registry, final Coder<V> inputCoder) {
+      return new CustomCoder<Holder<V>>() {
+        @Override
+        public void encode(Holder<V> accumulator, OutputStream outStream, Context context)
+            throws CoderException, IOException {
+          if (accumulator.present) {
+            outStream.write(1);
+            inputCoder.encode(accumulator.value, outStream, context);
+          } else {
+            outStream.write(0);
+          }
+        }
+
+        @Override
+        public Holder<V> decode(InputStream inStream, Context context)
+            throws CoderException, IOException {
+          if (inStream.read() == 1) {
+            return new Holder(inputCoder.decode(inStream, context));
+          } else {
+            return new Holder<>();
+          }
+        }
+
+        @Override
+        @Deprecated
+        public boolean isDeterministic() {
+          return inputCoder.isDeterministic();
+        }
+
+        @Override
+        public void verifyDeterministic() throws NonDeterministicException {
+          inputCoder.verifyDeterministic();
+        }
+      };
+    }
+
+    @Override
+    public Coder<V> getDefaultOutputCoder(CoderRegistry registry, Coder<V> inputCoder) {
+      return inputCoder;
+    }
+
+    private static class Holder<V> {
+      public V value;
+      public boolean present;
+      public Holder() { }
+      public Holder(V value) { set(value); }
+      public void set(V value) {
+        this.present = true;
+        this.value = value;
+      }
+    }
+  }
+
+  /**
+   * An abstract subclass of CombineFn for implementing combiners that are more
+   * easily expressed as binary operations on ints.
+   */
+  public abstract static class BinaryCombineIntegerFn extends CombineFn<Integer, int[], Integer> {
+
+    /**
+     * Applies the binary operation to the two operands, returning the result.
+     */
+    public abstract int apply(int left, int right);
+
+    /**
+     * Returns the identity element of this operation, i.e. an element {@code e}
+     * such that {@code apply(e, x) == apply(x, e) == x} for all values of {@code x}.
+     */
+    public abstract int identity();
+
+    @Override
+    public int[] createAccumulator() {
+      return wrap(identity());
+    }
+
+    @Override
+    public void addInput(int[] accumulator, Integer input) {
+      accumulator[0] = apply(accumulator[0], input);
+    }
+
+    @Override
+    public int[] mergeAccumulators(Iterable<int[]> accumulators) {
+      Iterator<int[]> iter = accumulators.iterator();
+      if (!iter.hasNext()) {
+        return createAccumulator();
+      } else {
+        int running = iter.next()[0];
+        while (iter.hasNext()) {
+          running = apply(running, iter.next()[0]);
+        }
+        return wrap(running);
+      }
+    }
+
+    @Override
+    public Integer extractOutput(int[] accumulator) {
+      return accumulator[0];
+    }
+
+    @Override
+    public Coder<int[]> getAccumulatorCoder(CoderRegistry registry, Coder<Integer> inputCoder) {
+      return DelegateCoder.of(
+          inputCoder,
+          new DelegateCoder.CodingFunction<int[], Integer>() {
+            @Override public Integer apply(int[] accumulator) { return accumulator[0]; }
+          },
+          new DelegateCoder.CodingFunction<Integer, int[]>() {
+            @Override public int[] apply(Integer value) { return wrap(value); }
+          });
+    }
+
+    @Override
+    public Coder<Integer> getDefaultOutputCoder(CoderRegistry registry,
+                                                Coder<Integer> inputCoder) {
+      return inputCoder;
+    }
+
+    private int[] wrap(int value) {
+      return new int[] { value };
+    }
+  }
+
+  /**
+   * An abstract subclass of CombineFn for implementing combiners that are more
+   * easily expressed as binary operations on longs.
+   */
+  public abstract static class BinaryCombineLongFn extends CombineFn<Long, long[], Long> {
+
+    /**
+     * Applies the binary operation to the two operands, returning the result.
+     */
+    public abstract long apply(long left, long right);
+
+    /**
+     * Returns the identity element of this operation, i.e. an element {@code e}
+     * such that {@code apply(e, x) == apply(x, e) == x} for all values of {@code x}.
+     */
+    public abstract long identity();
+
+    @Override
+    public long[] createAccumulator() {
+      return wrap(identity());
+    }
+
+    @Override
+    public void addInput(long[] accumulator, Long input) {
+      accumulator[0] = apply(accumulator[0], input);
+    }
+
+    @Override
+    public long[] mergeAccumulators(Iterable<long[]> accumulators) {
+      Iterator<long[]> iter = accumulators.iterator();
+      if (!iter.hasNext()) {
+        return createAccumulator();
+      } else {
+        long running = iter.next()[0];
+        while (iter.hasNext()) {
+          running = apply(running, iter.next()[0]);
+        }
+        return wrap(running);
+      }
+    }
+
+    @Override
+    public Long extractOutput(long[] accumulator) {
+      return accumulator[0];
+    }
+
+    @Override
+    public Coder<long[]> getAccumulatorCoder(CoderRegistry registry, Coder<Long> inputCoder) {
+      return DelegateCoder.of(
+          inputCoder,
+          new DelegateCoder.CodingFunction<long[], Long>() {
+            @Override public Long apply(long[] accumulator) { return accumulator[0]; }
+          },
+          new DelegateCoder.CodingFunction<Long, long[]>() {
+            @Override public long[] apply(Long value) { return wrap(value); }
+          });
+    }
+
+    @Override
+    public Coder<Long> getDefaultOutputCoder(CoderRegistry registry, Coder<Long> inputCoder) {
+      return inputCoder;
+    }
+
+    private long[] wrap(long value) {
+      return new long[] { value };
+    }
+  }
+
+  /**
+   * An abstract subclass of CombineFn for implementing combiners that are more
+   * easily expressed as binary operations on doubles.
+   */
+  public abstract static class BinaryCombineDoubleFn extends CombineFn<Double, double[], Double> {
+
+    /**
+     * Applies the binary operation to the two operands, returning the result.
+     */
+    public abstract double apply(double left, double right);
+
+    /**
+     * Returns the identity element of this operation, i.e. an element {@code e}
+     * such that {@code apply(e, x) == apply(x, e) == x} for all values of {@code x}.
+     */
+    public abstract double identity();
+
+    @Override
+    public double[] createAccumulator() {
+      return wrap(identity());
+    }
+
+    @Override
+    public void addInput(double[] accumulator, Double input) {
+      accumulator[0] = apply(accumulator[0], input);
+    }
+
+    @Override
+    public double[] mergeAccumulators(Iterable<double[]> accumulators) {
+      Iterator<double[]> iter = accumulators.iterator();
+      if (!iter.hasNext()) {
+        return createAccumulator();
+      } else {
+        double running = iter.next()[0];
+        while (iter.hasNext()) {
+          running = apply(running, iter.next()[0]);
+        }
+        return wrap(running);
+      }
+    }
+
+    @Override
+    public Double extractOutput(double[] accumulator) {
+      return accumulator[0];
+    }
+
+    @Override
+    public Coder<double[]> getAccumulatorCoder(CoderRegistry registry, Coder<Double> inputCoder) {
+      return DelegateCoder.of(
+          inputCoder,
+          new DelegateCoder.CodingFunction<double[], Double>() {
+            @Override public Double apply(double[] accumulator) { return accumulator[0]; }
+          },
+          new DelegateCoder.CodingFunction<Double, double[]>() {
+            @Override public double[] apply(Double value) { return wrap(value); }
+          });
+    }
+
+    @Override
+    public Coder<Double> getDefaultOutputCoder(CoderRegistry registry, Coder<Double> inputCoder) {
+      return inputCoder;
+    }
+
+    private double[] wrap(double value) {
+      return new double[] { value };
+    }
+  }
+
   /////////////////////////////////////////////////////////////////////////////
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
index 70d595da982eb..7c707140a7a7b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
@@ -133,15 +133,15 @@ public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * A {@code SerializableFunction} that computes the maximum of an
-   * {@code Iterable} of numbers of type {@code N}, useful as an
-   * argument to {@link Combine#globally} or {@link Combine#perKey}.
+   * A {@code CombineFn} that computes the maximum of a set of elements
+   * of type {@code N}, useful as an argument to {@link Combine#globally}
+   * or {@link Combine#perKey}.
    *
    * @param <N> the type of the {@code Number}s being compared
    */
   @SuppressWarnings("serial")
-  public static class MaxFn<N extends Number & Comparable<N>>
-      implements SerializableFunction<Iterable<N>, N> {
+  public static class MaxFn<N extends Comparable<N>>
+      extends Combine.BinaryCombineFn<N> {
 
     /** The smallest value of type N. */
     private final N initialValue;
@@ -157,20 +157,19 @@ public MaxFn(N initialValue) {
     }
 
     @Override
-    public N apply(Iterable<N> input) {
-      N max = initialValue;
-      for (N value : input) {
-        if (value.compareTo(max) > 0) {
-          max = value;
-        }
-      }
-      return max;
+    public N apply(N a, N b) {
+      return a.compareTo(b) >= 0 ? a : b;
+    }
+
+    @Override
+    public N identity() {
+      return initialValue;
     }
   }
 
   /**
-   * A {@code SerializableFunction} that computes the maximum of an
-   * {@code Iterable} of {@code Integer}s, useful as an argument to
+   * A {@code CombineFn} that computes the maximum of a collection
+   * of {@code Integer}s, useful as an argument to
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
   @SuppressWarnings("serial")
@@ -179,8 +178,8 @@ public static class MaxIntegerFn extends MaxFn<Integer> {
   }
 
   /**
-   * A {@code SerializableFunction} that computes the maximum of an
-   * {@code Iterable} of {@code Long}s, useful as an argument to
+   * A {@code CombineFn} that computes the maximum of a collection
+   * of {@code Long}s, useful as an argument to
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
   @SuppressWarnings("serial")
@@ -189,8 +188,8 @@ public static class MaxLongFn extends MaxFn<Long> {
   }
 
   /**
-   * A {@code SerializableFunction} that computes the maximum of an
-   * {@code Iterable} of {@code Double}s, useful as an argument to
+   * A {@code CombineFn} that computes the maximum of a collection
+   * of {@code Double}s, useful as an argument to
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
   @SuppressWarnings("serial")
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
index 0eb58e41563cd..aaf7d3aaf61e4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
@@ -133,14 +133,14 @@ public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * A {@code SerializableFunction} that computes the minimum of an
-   * {@code Iterable} of numbers of type {@code N}, useful as an
+   * A {@code CombineFn} that computes the minimum of a collection
+   * of elements of type {@code N}, useful as an
    * argument to {@link Combine#globally} or {@link Combine#perKey}.
    *
    * @param <N> the type of the {@code Number}s being compared
    */
-  public static class MinFn<N extends Number & Comparable<N>>
-      implements SerializableFunction<Iterable<N>, N> {
+  public static class MinFn<N extends Comparable<N>>
+      extends Combine.BinaryCombineFn<N> {
     private static final long serialVersionUID = 0;
 
     /** The largest value of type N. */
@@ -157,20 +157,19 @@ public MinFn(N initialValue) {
     }
 
     @Override
-    public N apply(Iterable<N> input) {
-      N min = initialValue;
-      for (N value : input) {
-        if (value.compareTo(min) < 0) {
-          min = value;
-        }
-      }
-      return min;
+    public N apply(N a, N b) {
+      return a.compareTo(b) <= 0 ? a : b;
+    }
+
+    @Override
+    public N identity() {
+      return initialValue;
     }
   }
 
   /**
-   * A {@code SerializableFunction} that computes the minimum of an
-   * {@code Iterable} of {@code Integer}s, useful as an argument to
+   * A {@code CombineFn} that computes the minimum of a collection
+   * of {@code Integer}s, useful as an argument to
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
   public static class MinIntegerFn extends MinFn<Integer> {
@@ -180,8 +179,8 @@ public static class MinIntegerFn extends MinFn<Integer> {
   }
 
   /**
-   * A {@code SerializableFunction} that computes the minimum of an
-   * {@code Iterable} of {@code Long}s, useful as an argument to
+   * A {@code CombineFn} that computes the minimum of a collection
+   * of {@code Long}s, useful as an argument to
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
   public static class MinLongFn extends MinFn<Long> {
@@ -191,8 +190,8 @@ public static class MinLongFn extends MinFn<Long> {
   }
 
   /**
-   * A {@code SerializableFunction} that computes the minimum of an
-   * {@code Iterable} of {@code Double}s, useful as an argument to
+   * A {@code CombineFn} that computes the minimum of a collection
+   * of {@code Double}s, useful as an argument to
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
   public static class MinDoubleFn extends MinFn<Double> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
index b61def31d3aad..98f2adb2b20a1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
@@ -132,15 +132,15 @@ public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
    * {@code Iterable} of {@code Integer}s, useful as an argument to
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
-  public static class SumIntegerFn
-      implements SerializableFunction<Iterable<Integer>, Integer> {
+  public static class SumIntegerFn extends Combine.BinaryCombineIntegerFn {
     @Override
-    public Integer apply(Iterable<Integer> input) {
-      int sum = 0;
-      for (int value : input) {
-        sum += value;
-      }
-      return sum;
+    public int apply(int a, int b) {
+      return a + b;
+    }
+
+    @Override
+    public int identity() {
+      return 0;
     }
   }
 
@@ -149,15 +149,15 @@ public Integer apply(Iterable<Integer> input) {
    * {@code Iterable} of {@code Long}s, useful as an argument to
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
-  public static class SumLongFn
-      implements SerializableFunction<Iterable<Long>, Long> {
+  public static class SumLongFn extends Combine.BinaryCombineLongFn {
     @Override
-    public Long apply(Iterable<Long> input) {
-      long sum = 0;
-      for (long value : input) {
-        sum += value;
-      }
-      return sum;
+    public long apply(long a, long b) {
+      return a + b;
+    }
+
+    @Override
+    public long identity() {
+      return 0;
     }
   }
 
@@ -166,15 +166,15 @@ public Long apply(Iterable<Long> input) {
    * {@code Iterable} of {@code Double}s, useful as an argument to
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
-  public static class SumDoubleFn
-      implements SerializableFunction<Iterable<Double>, Double> {
+  public static class SumDoubleFn extends Combine.BinaryCombineDoubleFn {
+    @Override
+    public double apply(double a, double b) {
+      return a + b;
+    }
+
     @Override
-    public Double apply(Iterable<Double> input) {
-      double sum = 0;
-      for (double value : input) {
-        sum += value;
-      }
-      return sum;
+    public double identity() {
+      return 0;
     }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 38497c3028fd4..0a3dcdce719e6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import static com.google.cloud.dataflow.sdk.TestUtils.checkCombineFn;
 import static org.junit.Assert.assertThat;
 
 import com.google.api.client.util.Preconditions;
@@ -343,6 +344,49 @@ public void testHotKeyCombining() {
     p.run();
   }
 
+  @Test
+  public void testBinaryCombineFn() {
+    Pipeline p = TestPipeline.create();
+    PCollection<KV<String, Integer>> input = copy(createInput(p, TABLE), 2);
+    PCollection<KV<String, Integer>> intProduct = input
+        .apply(Combine.<String, Integer, Integer>perKey(new TestProdInt()));
+    PCollection<KV<String, Integer>> objProduct = input
+        .apply(Combine.<String, Integer, Integer>perKey(new TestProdObj()));
+
+    List<KV<String, Integer>> expected = Arrays.asList(KV.of("a", 16), KV.of("b", 169));
+    DataflowAssert.that(intProduct).containsInAnyOrder(expected);
+    DataflowAssert.that(objProduct).containsInAnyOrder(expected);
+
+    p.run();
+  }
+
+  @Test
+  public void testBinaryCombineFnWithNulls() {
+    checkCombineFn(new NullCombiner(), Arrays.asList(3, 3, 5), 45);
+    checkCombineFn(new NullCombiner(), Arrays.asList(null, 3, 5), 30);
+    checkCombineFn(new NullCombiner(), Arrays.asList(3, 3, null), 18);
+    checkCombineFn(new NullCombiner(), Arrays.asList(null, 3, null), 12);
+    checkCombineFn(new NullCombiner(), Arrays.<Integer>asList(null, null, null), 8);
+  }
+
+  private static final class TestProdInt extends Combine.BinaryCombineIntegerFn {
+    public int apply(int left, int right) { return left * right; }
+    public int identity() { return 1; }
+  }
+
+  private static final class TestProdObj extends Combine.BinaryCombineFn<Integer> {
+    public Integer apply(Integer left, Integer right) { return left * right; }
+  }
+
+  /**
+   * Computes the product, considering null values to be 2.
+   */
+  private static final class NullCombiner extends Combine.BinaryCombineFn<Integer> {
+    public Integer apply(Integer left, Integer right) {
+      return (left == null ? 2 : left) * (right == null ? 2 : right);
+    }
+  }
+
   ////////////////////////////////////////////////////////////////////////////
   // Test classes, for different kinds of combining fns.
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index f4843c4154278..852c73cbe4948 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -219,8 +219,7 @@ public void testCombinedMapSideInput() {
 
     final PCollectionView<Map<String, Integer>, ?> view = pipeline
         .apply(Create.of(KV.of("a", 1), KV.of("a", 20), KV.of("b", 3)))
-        .apply(View.<String, Integer>asMap().withCombiner(
-                   Combine.SimpleCombineFn.of(new Sum.SumIntegerFn())));
+        .apply(View.<String, Integer>asMap().withCombiner(new Sum.SumIntegerFn()));
 
     PCollection<KV<String, Integer>> output = pipeline
         .apply(Create.of("apple", "banana", "blackberry"))
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java
index 85ddbef41e334..dc9c8e5c7c858 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java
@@ -70,6 +70,23 @@ private <V> void testAggregator(List<V> items,
                         CounterTestUtils.extractCounterUpdate(expectedCounter, false));
   }
 
+  @SuppressWarnings("rawtypes")
+  private <V, VA> void testAggregator(List<V> items,
+                                      Combine.CombineFn<V, VA, V> combiner,
+                                      Counter expectedCounter) {
+    CounterSet counters = new CounterSet();
+    Aggregator<V> aggregator = new AggregatorImpl<V, VA, V>(
+        AGGREGATOR_NAME, combiner, counters.getAddCounterMutator());
+    for (V item : items) {
+      aggregator.addValue(item);
+    }
+
+    List<MetricUpdate> cloudCounterSet = CounterTestUtils.extractCounterUpdates(counters, false);
+    Assert.assertEquals(cloudCounterSet.size(), 1);
+    Assert.assertEquals(cloudCounterSet.get(0),
+                        CounterTestUtils.extractCounterUpdate(expectedCounter, false));
+  }
+
   @Test
   public void testSumInteger() throws Exception {
     testAggregator(Arrays.asList(2, 4, 1, 3), new Sum.SumIntegerFn(),
@@ -128,12 +145,12 @@ public void testMaxDouble() throws Exception {
   public void testCompatibleDuplicateNames() throws Exception {
     CounterSet counters = new CounterSet();
     Aggregator<Integer> aggregator1 =
-        new AggregatorImpl<Integer, Iterable<Integer>, Integer>(
+        new AggregatorImpl<Integer, int[], Integer>(
             AGGREGATOR_NAME, new Sum.SumIntegerFn(),
             counters.getAddCounterMutator());
 
     Aggregator<Integer> aggregator2 =
-        new AggregatorImpl<Integer, Iterable<Integer>, Integer>(
+        new AggregatorImpl<Integer, int[], Integer>(
             AGGREGATOR_NAME, new Sum.SumIntegerFn(),
             counters.getAddCounterMutator());
 
@@ -148,7 +165,7 @@ public void testCompatibleDuplicateNames() throws Exception {
   @Test
   public void testIncompatibleDuplicateNames() throws Exception {
     CounterSet counters = new CounterSet();
-    new AggregatorImpl<Integer, Iterable<Integer>, Integer>(
+    new AggregatorImpl<Integer, int[], Integer>(
         AGGREGATOR_NAME, new Sum.SumIntegerFn(),
         counters.getAddCounterMutator());
 
@@ -156,7 +173,7 @@ public void testIncompatibleDuplicateNames() throws Exception {
     expectedEx.expectMessage(Matchers.containsString(
         "aggregator's name collides with an existing aggregator or "
         + "system-provided counter of an incompatible type"));
-    new AggregatorImpl<Long, Iterable<Long>, Long>(
+    new AggregatorImpl<Long, long[], Long>(
         AGGREGATOR_NAME, new Sum.SumLongFn(),
         counters.getAddCounterMutator());
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 94cf19599ab2a..cef060822f060 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -24,7 +24,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
@@ -263,7 +262,7 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
   }
 
   @Test public void testSessionsCombine() throws Exception {
-    CombineFn<Long, ?, Long> combineFn = Combine.SimpleCombineFn.of(new Sum.SumLongFn());
+    CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
     DoFnRunner<TimerOrElement<KV<String, Long>>,
         KV<String, Long>, List> runner =
         makeRunner(Sessions.withGapDuration(Duration.millis(10)),

From 48ae86accd33821658d96be39c9b55d6497e972a Mon Sep 17 00:00:00 2001
From: vanya <vanya@google.com>
Date: Wed, 11 Mar 2015 23:55:48 -0700
Subject: [PATCH 0261/1541] Retry GCS file read on transient problems that
 cause RuntimeExceptions instead of IOExceptions. ----Release Notes---- []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=88415354

---
 .../gcsio/GoogleCloudStorageReadChannel.java  | 34 +++++++++++++------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
index 689a57495a253..a67d18205873d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
@@ -255,16 +255,17 @@ public int read(ByteBuffer buffer)
       int remainingBeforeRead = buffer.remaining();
       try {
         int numBytesRead = readChannel.read(buffer);
-        Preconditions.checkState(numBytesRead != 0, "Read 0 bytes without blocking!");
+        checkIOPrecondition(numBytesRead != 0, "Read 0 bytes without blocking");
         if (numBytesRead < 0) {
           // Check that we didn't get a premature End of Stream signal by checking the number of
           // bytes read against the stream size. Unfortunately we don't have information about the
           // actual size of the data stream when stream compression is used, so we can only ignore
           // this case here.
-          Preconditions.checkState(isCompressedStream || currentPosition == size,
-              "Received end of stream result before all the file data has been received; "
-              + "totalBytesRead: %s, currentPosition: %s, size: %s",
-              totalBytesRead, currentPosition, size);
+          checkIOPrecondition(isCompressedStream || currentPosition == size,
+              String.format(
+                  "Received end of stream result before all the file data has been received; "
+                  + "totalBytesRead: %s, currentPosition: %s, size: %s",
+                  totalBytesRead, currentPosition, size));
           break;
         }
         totalBytesRead += numBytesRead;
@@ -341,7 +342,7 @@ public int read(ByteBuffer buffer)
               readChannel.close();
               readChannel = null;
             } catch (SSLException ssle) {
-              LOG.warn("Got SSLException on readChannel.close() before retry; ignoring it.", ssle);
+              LOG.debug("Got SSLException on readChannel.close() before retry; ignoring it.", ssle);
               readChannel = null;
             }
             // For "other" exceptions, we'll let it propagate out without setting readChannel to
@@ -361,9 +362,9 @@ public int read(ByteBuffer buffer)
       // Check that we didn't get a premature End of Stream signal by checking the number of bytes
       // read against the stream size. Unfortunately we don't have information about the actual size
       // of the data stream when stream compression is used, so we can only ignore this case here.
-      Preconditions.checkState(isCompressedStream || currentPosition == size,
-          "Failed to read any data before all the file data has been received; "
-          + "currentPosition: %s, size: %s", currentPosition, size);
+      checkIOPrecondition(isCompressedStream || currentPosition == size,
+          String.format("Failed to read any data before all the file data has been received; "
+              + "currentPosition: %s, size: %s", currentPosition, size));
       return -1;
     } else {
       return totalBytesRead;
@@ -490,7 +491,7 @@ protected void validatePosition(long newPosition) {
   /**
    * Seeks to the given position in the underlying stream.
    *
-   * Note: Seek is an expensive operation because a new stream is opened each time.
+   * <p>Note: Seek is an expensive operation because a new stream is opened each time.
    *
    * @throws java.io.FileNotFoundException if the underlying object does not exist.
    * @throws IOException on IO error
@@ -583,4 +584,17 @@ private void throwIfNotOpen()
       throw new ClosedChannelException();
     }
   }
+
+  /**
+   * Throws an IOException if precondition is false.
+   *
+   * <p>This method should be used in place of Preconditions.checkState in cases where the
+   * precondition is derived from the status of the IO operation. That makes it possible to retry
+   * the operation by catching IOException.
+   */
+  private void checkIOPrecondition(boolean precondition, String errorMessage) throws IOException {
+    if (!precondition) {
+      throw new IOException(errorMessage);
+    }
+  }
 }

From ca2e7dafb8cb021ffb9211f84d0ab86220b9af66 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Thu, 12 Mar 2015 13:31:44 -0700
Subject: [PATCH 0262/1541] Improves error messages in AvroReader and
 FileBasedReader when filepattern matches zero files or multiple files AND an
 offset range is specified. ----Release Notes---- [] ------------- Created by
 MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=88472006

---
 .../sdk/runners/worker/AvroReader.java        | 11 +++++--
 .../sdk/runners/worker/FileBasedReader.java   |  9 ++++--
 .../sdk/runners/worker/AvroReaderTest.java    | 23 ++++++++++++++
 .../sdk/runners/worker/TextReaderTest.java    | 30 +++++++++++++++++++
 4 files changed, 67 insertions(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
index cf51cc7344034..92c3492e218d7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
@@ -28,6 +28,7 @@
 import org.apache.avro.file.SeekableInput;
 import org.apache.avro.io.DatumReader;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.channels.ReadableByteChannel;
@@ -70,16 +71,20 @@ public AvroReader(String filename, @Nullable Long startPosition, @Nullable Long
   public ReaderIterator<WindowedValue<T>> iterator(DatumReader<T> datumReader) throws IOException {
     IOChannelFactory factory = IOChannelUtils.getFactory(filename);
     Collection<String> inputs = factory.match(filename);
+    if (inputs.isEmpty()) {
+      throw new FileNotFoundException("No match for file pattern '" + filename + "'");
+    }
 
     if (inputs.size() == 1) {
       String input = inputs.iterator().next();
       ReadableByteChannel reader = factory.open(input);
       return new AvroFileIterator(datumReader, input, reader, startPosition, endPosition);
-
     } else {
       if (startPosition != null || endPosition != null) {
-        throw new UnsupportedOperationException(
-            "Unable to apply range limits to multiple-input stream: " + filename);
+        throw new IllegalArgumentException(
+            "Offset range specified: [" + startPosition + ", " + endPosition + "), so "
+            + "an exact filename was expected, but more than 1 file matched \"" + filename
+            + "\" (total " + inputs.size() + "): apparently a filepattern was given.");
       }
       return new AvroFileMultiIterator(datumReader, factory, inputs.iterator());
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
index d9e8bd2ab09fb..f55165c4e5ba7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
@@ -35,6 +35,7 @@
 
 import java.io.BufferedInputStream;
 import java.io.ByteArrayOutputStream;
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.PushbackInputStream;
@@ -98,13 +99,15 @@ public ReaderIterator<T> iterator() throws IOException {
     IOChannelFactory factory = IOChannelUtils.getFactory(filename);
     Collection<String> inputs = factory.match(filename);
     if (inputs.isEmpty()) {
-      throw new IOException("No match for file pattern '" + filename + "'");
+      throw new FileNotFoundException("No match for file pattern '" + filename + "'");
     }
 
     if (startPosition != null || endPosition != null) {
       if (inputs.size() != 1) {
-        throw new UnsupportedOperationException(
-            "Unable to apply range limits to multiple-input stream: " + filename);
+        throw new IllegalArgumentException(
+            "Offset range specified: [" + startPosition + ", " + endPosition + "), so "
+            + "an exact filename was expected, but more than 1 file matched \"" + filename
+            + "\" (total " + inputs.size() + "): apparently a filepattern was given.");
       }
 
       return newReaderIteratorForRangeInFile(factory, inputs.iterator().next(),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
index d5ea3b82664ab..957da90ab5807 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
@@ -31,11 +31,13 @@
 import org.junit.Assert;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
 import java.io.File;
+import java.io.FileNotFoundException;
 import java.io.OutputStream;
 import java.nio.channels.Channels;
 import java.util.ArrayList;
@@ -52,6 +54,8 @@
 public class AvroReaderTest {
   @Rule
   public TemporaryFolder tmpFolder = new TemporaryFolder();
+  @Rule
+  public ExpectedException expectedException = ExpectedException.none();
 
   private <T> void runTestRead(
       List<List<T>> elemsList, AvroCoder<T> coder, boolean requireExactMatch) throws Exception {
@@ -179,6 +183,25 @@ public void testReadBigRanges() throws Exception {
         false/* don't require exact match */);
   }
 
+  @Test
+  public void testErrorOnFileNotFound() throws Exception {
+    expectedException.expect(FileNotFoundException.class);
+    readElems("file-not-found", 0L, 100L, AvroCoder.of(String.class), new ArrayList<Integer>());
+  }
+
+  @Test
+  public void testErrorOnMultipleFiles() throws Exception {
+    File file1 = tmpFolder.newFile("foo1.avro");
+    File file2 = tmpFolder.newFile("foo2.avro");
+    Channels.newOutputStream(IOChannelUtils.create(file1.getPath(), MimeTypes.BINARY)).close();
+    Channels.newOutputStream(IOChannelUtils.create(file2.getPath(), MimeTypes.BINARY)).close();
+
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("more than 1 file matched");
+    readElems(new File(tmpFolder.getRoot(), "*").getPath(), 0L, 100L,
+        AvroCoder.of(String.class), new ArrayList<Integer>());
+  }
+
   // TODO: sharded filenames
   // TODO: reading from GCS
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
index ea7a3e5c37882..857f0b66bde82 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
@@ -38,6 +38,8 @@
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.io.TextIO.CompressionType;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.MimeTypes;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
@@ -45,15 +47,18 @@
 import org.hamcrest.Matchers;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
 import java.io.File;
+import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.io.PrintStream;
+import java.nio.channels.Channels;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.LinkedList;
@@ -79,6 +84,8 @@ public class TextReaderTest {
 
   @Rule
   public TemporaryFolder tmpFolder = new TemporaryFolder();
+  @Rule
+  public ExpectedException expectedException = ExpectedException.none();
 
   private File initTestFile() throws IOException {
     File tmpFile = tmpFolder.newFile();
@@ -651,6 +658,29 @@ public void testCompressionTypeFileGlob() throws IOException {
     }
   }
 
+  @Test
+  public void testErrorOnFileNotFound() throws Exception {
+    expectedException.expect(FileNotFoundException.class);
+    TextReader<String> textReader = new TextReader<>(
+        "file-not-found", true, 0L, 100L,
+        StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
+    textReader.iterator();
+  }
+
+  @Test
+  public void testErrorOnMultipleFiles() throws Exception {
+    File file1 = tmpFolder.newFile("foo1.avro");
+    File file2 = tmpFolder.newFile("foo2.avro");
+    Channels.newOutputStream(IOChannelUtils.create(file1.getPath(), MimeTypes.BINARY)).close();
+    Channels.newOutputStream(IOChannelUtils.create(file2.getPath(), MimeTypes.BINARY)).close();
+    TextReader<String> textReader = new TextReader<>(
+        new File(tmpFolder.getRoot(), "*").getPath(), true, 0L, 100L,
+        StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("more than 1 file matched");
+    textReader.iterator();
+  }
+
   // TODO: sharded filenames
   // TODO: reading from GCS
 }

From f0d71517219cb416140db5d4a6a36e4f0fb3834c Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Thu, 12 Mar 2015 15:26:35 -0700
Subject: [PATCH 0263/1541] Makes a few ReaderIterator classes treat the
 condition "requestFork at a position below the current stop position" as
 normal (but of course not fork-inducing), rather than throw an exception.
 ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=88484651

---
 .../sdk/runners/worker/FileBasedReader.java   |  7 ++++---
 .../runners/worker/GroupingShuffleReader.java |  7 ++++---
 .../sdk/runners/worker/InMemoryReader.java    |  7 ++++---
 .../worker/GroupingShuffleReaderTest.java     | 21 ++++---------------
 .../runners/worker/InMemoryReaderTest.java    | 11 +---------
 .../sdk/runners/worker/TextReaderTest.java    | 10 +--------
 6 files changed, 18 insertions(+), 45 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
index f55165c4e5ba7..b5f1e46bf3edf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
@@ -218,9 +218,10 @@ public ForkResult requestFork(ForkRequest forkRequest) {
       }
 
       if (endOffset != null && forkOffset >= endOffset) {
-        throw new IllegalArgumentException(
-            "Fork requested at an offset beyond the end of the current range: " + forkOffset
-            + " >= " + endOffset);
+        LOG.info(
+            "Fork requested at an offset beyond the end of the current range: {} >= {}",
+            forkOffset, endOffset);
+        return null;
       }
 
       this.endOffset = forkOffset;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index 780949d935958..9e39250007b13 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -250,10 +250,11 @@ public ForkResult requestFork(ForkRequest forkRequest) {
       }
 
       if (this.stopPosition != null && newStopPosition.compareTo(this.stopPosition) >= 0) {
-        throw new IllegalArgumentException(
+        LOG.info(
             "Fork requested at a shuffle position beyond the end of the current range: "
-            + forkShufflePosition
-            + " >= current stop position: " + this.stopPosition.encodeBase64());
+            + "{} >= current stop position: {}",
+            forkShufflePosition, this.stopPosition.encodeBase64());
+        return null;
       }
 
       this.stopPosition = newStopPosition;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
index 1bb1841466571..da6b209940a33 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
@@ -146,9 +146,10 @@ public ForkResult requestFork(ForkRequest forkRequest) {
         return null;
       }
       if (forkIndex >= endPosition) {
-        throw new IllegalArgumentException(
-            "Fork requested at an index beyond the end of the current range: " + forkIndex
-            + " >= " + endPosition);
+        LOG.info(
+            "Fork requested at an index beyond the end of the current range: {} >= {}",
+            forkIndex, endPosition);
+        return null;
       }
 
       this.endPosition = forkIndex.intValue();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
index 2bc08717ed992..c20626e70e59c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
@@ -26,7 +26,6 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
@@ -50,7 +49,6 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.collect.Lists;
 
-import org.hamcrest.Matchers;
 import org.joda.time.Instant;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -295,13 +293,7 @@ public void testReadFromEmptyShuffleDataAndRequestFork() throws Exception {
       stop = encodeBase64URLSafeString(fabricatePosition(1, null));
       proposedForkPosition.setShufflePosition(stop);
 
-      try {
-        iter.requestFork(toForkRequest(createApproximateProgress(proposedForkPosition)));
-        fail("IllegalArgumentException expected");
-      } catch (IllegalArgumentException e) {
-        assertThat(e.getMessage(), Matchers.containsString(
-            "Fork requested at a shuffle position beyond the end of the current range"));
-      }
+      assertNull(iter.requestFork(toForkRequest(createApproximateProgress(proposedForkPosition))));
     }
   }
 
@@ -330,14 +322,9 @@ public void testReadFromShuffleDataAndFailToFork() throws Exception {
 
     try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
         groupingShuffleReader.iterator(shuffleReader)) {
-       // Cannot fork since the value provided is past the current stop position.
-       try {
-        iter.requestFork(forkRequestAtPosition(makeShufflePosition(kNumRecords + 1, null)));
-        fail("IllegalArgumentException expected");
-      } catch (IllegalArgumentException e) {
-        assertThat(e.getMessage(), Matchers.containsString(
-            "Fork requested at a shuffle position beyond the end of the current range"));
-      }
+      // Cannot fork since the value provided is past the current stop position.
+      assertNull(
+          iter.requestFork(forkRequestAtPosition(makeShufflePosition(kNumRecords + 1, null))));
 
       int i = 0;
       for (; iter.hasNext(); ++i) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
index 2b957e46affaa..7d7ac3dbc3a3e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
@@ -27,9 +27,7 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
@@ -37,7 +35,6 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
-import org.hamcrest.Matchers;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -176,13 +173,7 @@ public void testFork() throws Exception {
     // Proposed fork position is after the current stop (end) position, no update.
     try (InMemoryReader.InMemoryReaderIterator iterator =
         (InMemoryReader.InMemoryReaderIterator) inMemoryReader.iterator()) {
-      try {
-        iterator.requestFork(forkRequestAtIndex(end + 1));
-        fail("IllegalArgumentException expected");
-      } catch (IllegalArgumentException e) {
-        assertThat(e.getMessage(), Matchers.containsString(
-            "Fork requested at an index beyond the end of the current range"));
-      }
+      assertNull(iterator.requestFork(forkRequestAtIndex(end + 1)));
       assertEquals((int) end, iterator.endPosition);
     }
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
index 857f0b66bde82..1c245973badbf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
@@ -44,7 +44,6 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
-import org.hamcrest.Matchers;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
@@ -472,14 +471,7 @@ public void testUpdateStopPosition() throws Exception {
       try (TextReader<String>.TextFileIterator iterator =
           (TextReader<String>.TextFileIterator) textReader.iterator()) {
         assertEquals(fileContent[0], iterator.next());
-        try {
-          iterator.requestFork(forkRequestAtByteOffset(stop));
-          fail("IllegalArgumentException expected");
-        } catch (IllegalArgumentException e) {
-          assertThat(e.getMessage(), Matchers.containsString(
-              "Fork requested at an offset beyond the end of the current range"));
-        }
-
+        assertNull(iterator.requestFork(forkRequestAtByteOffset(stop)));
         assertEquals(end, iterator.getEndOffset().longValue());
         assertFalse(iterator.hasNext());
         assertEquals(Arrays.asList(fileContent[0].length()), observer.getActualSizes());

From 05c1ca13089d2be1cafa736ff4f8aebdcbb219ca Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 12 Mar 2015 17:26:22 -0700
Subject: [PATCH 0264/1541] Move GlobalData requests to the top level of
 GetData

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88495958
---
 sdk/src/main/proto/windmill.proto | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index 64d6d18a40044..09cd2e1ddc63f 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -127,7 +127,6 @@ message KeyedGetDataRequest {
   required fixed64 work_token = 2;
   repeated TagValue values_to_fetch = 3;
   repeated TagList lists_to_fetch = 4;
-  repeated GlobalDataId global_data_to_fetch = 5;
 }
 
 message ComputationGetDataRequest {
@@ -137,6 +136,7 @@ message ComputationGetDataRequest {
 
 message GetDataRequest {
   repeated ComputationGetDataRequest requests = 1;
+  repeated GlobalDataId global_data_to_fetch = 2;
 }
 
 message KeyedGetDataResponse {
@@ -145,7 +145,6 @@ message KeyedGetDataResponse {
   optional bool failed = 2;
   repeated TagValue values = 3;
   repeated TagList lists = 4;
-  repeated GlobalData global_data = 5;
 }
 
 message ComputationGetDataResponse {
@@ -155,6 +154,7 @@ message ComputationGetDataResponse {
 
 message GetDataResponse {
   repeated ComputationGetDataResponse data = 1;
+  repeated GlobalData global_data = 2;
 }
 
 // CommitWork

From 1167ab9df8cbb14251df5d78b9f6f11d0f22768b Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 13 Mar 2015 09:44:26 -0700
Subject: [PATCH 0265/1541] Swap to use varargs instead of array based setters
 within PipelineOptions. ----Release Notes---- [] ------------- Created by
 MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=88554068

---
 .../dataflow/sdk/options/DataflowWorkerLoggingOptions.java      | 2 +-
 .../cloud/dataflow/sdk/options/GoogleApiDebugOptions.java       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
index a38cd44ab7e02..3f59c8365495c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
@@ -52,7 +52,7 @@ public enum Level {
    * {@link WorkerLogLevelOverride#create(String)}.
    */
   WorkerLogLevelOverride[] getWorkerLogLevelOverrides();
-  void setWorkerLogLevelOverrides(WorkerLogLevelOverride[] workerLogLevelOverrides);
+  void setWorkerLogLevelOverrides(WorkerLogLevelOverride... workerLogLevelOverrides);
 
   /**
    * Defines a log level override for a specific class, package, or name.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
index ad1ca996075d2..4b3d69c87a9e8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
@@ -38,7 +38,7 @@ public interface GoogleApiDebugOptions extends PipelineOptions {
    * An invalid tracing token will result in 400 errors from Google when the API is invoked.
    */
   GoogleApiTracer[] getGoogleApiTrace();
-  void setGoogleApiTrace(GoogleApiTracer[] commands);
+  void setGoogleApiTrace(GoogleApiTracer... commands);
 
   /**
    * A {@link GoogleClientRequestInitializer} which adds the 'trace' token to Google API calls.

From d805b83041ce0859c0a9f34668970e828c48c593 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 13 Mar 2015 11:05:28 -0700
Subject: [PATCH 0266/1541] Add GlobalData updates to the Windmill API.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88561610
---
 sdk/src/main/proto/windmill.proto | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index 09cd2e1ddc63f..76b17a51dd6b3 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -178,7 +178,7 @@ message Counter {
   optional int64 mean_count = 6;
 }
 
-// next id: 10
+// next id: 11
 message WorkItemCommitRequest {
   required bytes key = 1;
   required fixed64 work_token = 2;
@@ -189,6 +189,7 @@ message WorkItemCommitRequest {
   repeated TagList list_updates = 6;
   repeated Counter counter_updates = 8;
   repeated GlobalDataId global_data_id_requests = 9;
+  repeated GlobalData global_data_updates = 10;
 }
 
 message ComputationCommitWorkRequest {

From b9f4384544149d7ed9a5f9d8ce649fd918fedca1 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Fri, 13 Mar 2015 11:07:14 -0700
Subject: [PATCH 0267/1541] Introduces FileBasedSource<T>, a custom source that
 implements functionality common to all file-based custom sources. Also
 introduces ByteOffsetBasedSource<T> as the parent of FileBasedSource<T>.
 FileBasedSource<T> supports both single files and file patterns.

Additionally introduces readers ByteOffsetBasedReader<T> and FileBasedReader<T> to encapsulate code common to readers of ByteOffsetBasedSources and FileBasedSources respectively.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88561798
---
 .../sdk/io/ByteOffsetBasedSource.java         | 161 ++++
 .../dataflow/sdk/io/FileBasedSource.java      | 514 +++++++++++++
 .../sdk/io/ByteOffsetBasedSourceTest.java     | 126 ++++
 .../dataflow/sdk/io/FileBasedSourceTest.java  | 689 ++++++++++++++++++
 4 files changed, 1490 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
new file mode 100644
index 0000000000000..4d48d210a5464
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
@@ -0,0 +1,161 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.common.base.Preconditions;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A source that uses byte offsets to define starting and ending positions. Extend this class to
+ * implement your own byte offset based custom source. {@link FileBasedSource} which is a subclass
+ * of this adds additional functionality useful for custom sources that are based on files. If
+ * possible implementors should start from {@code FileBasedSource} instead of
+ * {@code ByteOffsetBasedSource}.
+ *
+ * <p>This is a common base class for all sources that use a byte offset range. It stores the range
+ * and implements splitting into shards. This should be used for sources which can be cheaply read
+ * starting at any given byte offset.
+ *
+ * <p>The byte offset range of the source is between {@code startOffset} (inclusive) and endOffset
+ * (exclusive), i.e. [{@code startOffset}, {@code endOffset}). The source may include a record if
+ * its offset is at the range [{@code startOffset}, {@code endOffset}) even if the record extend
+ * past the range. The source does not include any record at offsets before this range even if it
+ * extend into this range because the previous range will include this record. A source may choose
+ * to include records at offsets after this range. For example, a source may choose to set offset
+ * boundaries based on blocks of records in which case certain records may start after
+ * {@code endOffset}. But for any given source type the combined set of data read by two sources for
+ * ranges [A, B) and [B, C) must be the same as the records read by a single source of the same type
+ * for the range [A, C).
+ *
+ * @param <T> Type of records represented by the source.
+ */
+public abstract class ByteOffsetBasedSource<T> extends Source<T> {
+  private final long startOffset;
+  private final long endOffset;
+  private final long minShardSize;
+
+  /**
+   * @param startOffset starting byte offset (inclusive) of the source. Must be non-negative.
+   *
+   * @param endOffset ending byte offset (exclusive) of the source. Any
+   *        {@code offset >= getMaxEndOffset()}, e.g., {@code Long.MAX_VALUE}, means the same as
+   *        {@code getMaxEndOffset()}. Must be {@code >= startOffset}.
+   *
+   * @param minShardSize minimum shard size in bytes that should be used when splitting the source
+   *        into sub-sources. This will not be respected if the total range of the source is smaller
+   *        than the specified {@code minShardSize}. Must be non-negative.
+   */
+  public ByteOffsetBasedSource(long startOffset, long endOffset, long minShardSize) {
+    this.startOffset = startOffset;
+    this.endOffset = endOffset;
+    this.minShardSize = minShardSize;
+    Preconditions.checkArgument(startOffset >= 0,
+        "Start offset has value " + startOffset + ", must be non-negative");
+    Preconditions.checkArgument(endOffset >= 0,
+        "End offset has value " + endOffset + ", must be non-negative");
+    Preconditions.checkArgument(minShardSize >= 0,
+        "minShardSize has value " + minShardSize + ", must be non-negative");
+  }
+
+  /**
+   * Returns the starting offset of the source.
+   */
+  public long getStartOffset() {
+    return startOffset;
+  }
+
+  /**
+   * Returns the specified ending offset of the source. If this is {@code >= getMaxEndOffset()},
+   * e.g. Long.MAX_VALUE, this implies {@code getMaxEndOffset()}.
+   */
+  public long getEndOffset() {
+    return endOffset;
+  }
+
+  /**
+   * Returns the minimum shard size that should be used when splitting the source into sub-sources.
+   * This will not be respected if the total range of the source is smaller than the specified
+   * {@code minShardSize}.
+   */
+  public long getMinShardSize() {
+    return minShardSize;
+  }
+
+  @Override
+  public List<? extends ByteOffsetBasedSource<T>> splitIntoShards(long desiredShardSizeBytes,
+      PipelineOptions options) throws Exception {
+    // Split the range into shards based on the desiredShardSizeBytes. Final shard is adjusted to
+    // make sure that we do not end up with a too small shard at the end. If desiredShardSizeBytes
+    // is smaller than the minShardSize of the source then minShardSize will be used instead.
+
+    desiredShardSizeBytes = Math.max(desiredShardSizeBytes, minShardSize);
+
+    List<ByteOffsetBasedSource<T>> subSources = new ArrayList<>();
+    long start = startOffset;
+    long maxEnd = Math.min(endOffset, getMaxEndOffset(options));
+
+    while (start < maxEnd) {
+      long end = start + desiredShardSizeBytes;
+      end = Math.min(end, maxEnd);
+      // Avoid having a too small shard at the end and ensure that we respect minShardSize.
+      long remainingBytes = maxEnd - end;
+      if ((remainingBytes < desiredShardSizeBytes / 4) || (remainingBytes < minShardSize)) {
+        end = maxEnd;
+      }
+      subSources.add(createSourceForSubrange(start, end));
+
+      start = end;
+    }
+    return subSources;
+  }
+
+  /**
+   * Returns the exact ending offset of the current source. This will be used if the source was
+   * constructed with an endOffset value {@code Long.MAX_VALUE}.
+   */
+  public abstract long getMaxEndOffset(PipelineOptions options) throws Exception;
+
+  /**
+   * Returns a {@code ByteOffsetBasedSource} for a subrange of the current source. [start, end) will
+   * be within the range [startOffset, endOffset] of the current source.
+   */
+  public abstract ByteOffsetBasedSource<T> createSourceForSubrange(long start, long end);
+
+  /**
+   * A reader that implements code common to readers of all {@link ByteOffsetBasedSource}s.
+   */
+  public abstract static class ByteOffsetBasedReader<T> implements Reader<T> {
+
+    /**
+     * @param source the {@code ByteOffsetBasedSource} to be read by the current reader.
+     */
+    public ByteOffsetBasedReader(ByteOffsetBasedSource<T> source) {}
+
+    /**
+     * Returns the current offset of the reader. The value returned by this method is undefined
+     * until the method {@link Source.Reader#start} is called. After {@link Source.Reader#start} is
+     * called the value returned by this method should represent the offset of the value that will
+     * be returned by the {@link Source.Reader#getCurrent} call. Values returned for two consecutive
+     * records should be non-strictly increasing. If the reader has reached the end of the stream
+     * this should return {@code Long.MAX_VALUE}. The value returned may be outside the range
+     * defined by the {@code ByteOffsetBasedSource} corresponding to this reader, for reasons
+     * described in the comment to {@code ByteOffsetBasedSource}.
+     */
+    protected abstract long getCurrentOffset();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
new file mode 100644
index 0000000000000..865d923713de5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -0,0 +1,514 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.common.collect.ImmutableList;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.SeekableByteChannel;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * A common base class for all file-based {@link Source}s. Extend this class to implement your own
+ * file-based custom source.
+ *
+ * <p>A file-based {@code Source} is a {@code Source} backed by a file pattern defined as a Java
+ * glob, a single file, or a offset range for a single file. See {@link ByteOffsetBasedSource} for
+ * semantics of offset ranges.
+ *
+ * <p>This source stores a {@code String} that is an {@link IOChannelFactory} specification for a
+ * file or file pattern. There should be an {@code IOChannelFactory} defined for the file
+ * specification provided. Please refer to {@link IOChannelUtils} and {@link IOChannelFactory} for
+ * more information on this.
+ *
+ * <p>In addition to the methods left abstract from {@code Source}, subclasses must implement
+ * methods to create a sub-source and a reader for a range of a single file -
+ * {@link #createForSubrangeOfFile} and {@link #createSingleFileReader}.
+ *
+ * @param <T> Type of records represented by the source.
+ */
+public abstract class FileBasedSource<T> extends ByteOffsetBasedSource<T> {
+  private static final Logger LOG = LoggerFactory.getLogger(FileBasedSource.class);
+
+  private final String fileOrPatternSpec;
+  private final Mode mode;
+
+  /**
+   * A given {@code FileBasedSource} represents a file resource of one of these types.
+   */
+  public enum Mode {
+    FILEPATTERN, FULL_SINGLE_FILE, SUBRANGE_OF_SINGLE_FILE
+  }
+
+  /**
+   * Create a {@code FileBasedSource} based on a file or a file pattern specification.
+   *
+   * <p>See {@link ByteOffsetBasedSource} for detailed descriptions of {@code minShardSize},
+   * {@code startOffset}, and {@code endOffset}.
+   *
+   * @param isFilePattern if {@code true} provided {@code fileOrPatternSpec} may be a file pattern
+   *        and {@code FileBasedSource} will try to expand the file pattern, if {@code false}
+   *        provided {@code fileOrPatternSpec} will be considered a single file and will be used
+   *        verbatim.
+   * @param fileOrPatternSpec {@link IOChannelFactory} specification of file or file pattern
+   *        represented by the {@link FileBasedSource}.
+   * @param minShardSize minimum shard size in bytes.
+   * @param startOffset starting byte offset.
+   * @param endOffset ending byte offset. If the specified value {@code >= #getMaxEndOffset()} it
+   *        implies {@code #getMaxEndOffSet()}.
+   */
+  public FileBasedSource(boolean isFilePattern, String fileOrPatternSpec, long minShardSize,
+      long startOffset, long endOffset) {
+    super(startOffset, endOffset, minShardSize);
+    if (isFilePattern) {
+      mode = Mode.FILEPATTERN;
+    } else if (startOffset == 0 && endOffset == Long.MAX_VALUE) {
+      mode = Mode.FULL_SINGLE_FILE;
+    } else {
+      mode = Mode.SUBRANGE_OF_SINGLE_FILE;
+    }
+    if (mode == Mode.FILEPATTERN || mode == Mode.FULL_SINGLE_FILE) {
+      Preconditions.checkArgument(startOffset == 0,
+          "FileBasedSource is based on a file pattern or a full single file but the starting offset"
+          + " proposed " + startOffset + " is not zero");
+      Preconditions.checkArgument(startOffset == 0,
+          "FileBasedSource is based on a file pattern or a full single file but the starting offset"
+          + " proposed " + endOffset + " is not Long.MAX_VALUE");
+    }
+    this.fileOrPatternSpec = fileOrPatternSpec;
+  }
+
+  public final String getFileOrPatternSpec() {
+    return fileOrPatternSpec;
+  }
+
+  public final Mode getMode() {
+    return mode;
+  }
+
+  @Override
+  public final FileBasedSource<T> createSourceForSubrange(long start, long end) {
+    Preconditions.checkArgument(mode != Mode.FILEPATTERN,
+        "Cannot split a file pattern based source based on positions");
+    Preconditions.checkArgument(start >= getStartOffset(), "Start offset value " + start
+        + " of the subrange cannot be smaller than the start offset value " + getStartOffset()
+        + " of the parent source");
+    Preconditions.checkArgument(end <= getEndOffset(), "End offset value " + end
+        + " of the subrange cannot be larger than the end offset value " + getEndOffset()
+        + " of the parent source");
+
+    FileBasedSource<T> source = createForSubrangeOfFile(fileOrPatternSpec, start, end);
+    if (start > 0 || end != Long.MAX_VALUE) {
+      Preconditions.checkArgument(source.getMode() == Mode.SUBRANGE_OF_SINGLE_FILE,
+          "Source created for the range [" + start + "," + end + ")"
+          + " must be a subrange source");
+    }
+    return source;
+  }
+
+  /**
+   * Creates and returns a new {@code FileBasedSource} of the same type as the current
+   * {@code FileBasedSource} backed by a given file and an offset range. When current source is
+   * being split, this method is used to generate new sub-sources. When creating the source
+   * subclasses must call the constructor of {@code FileBasedSource} with exactly the same
+   * {@code start} and {@code end} values passed here.
+   *
+   * @param fileName file backing the new {@code FileBasedSource}.
+   * @param start starting byte offset of the new {@code FileBasedSource}.
+   * @param end ending byte offset of the new {@code FileBasedSource}. May be Long.MAX_VALUE, in
+   *        which case it will be inferred using {@link #getMaxEndOffset}.
+   */
+  public abstract FileBasedSource<T> createForSubrangeOfFile(String fileName, long start, long end);
+
+  /**
+   * Creates and returns an instance of a {@code FileBasedReader} implementation for the current
+   * source assuming the source represents a single file. File patterns will be handled by
+   * {@code FileBasedSource} implementation automatically.
+   */
+  public abstract FileBasedReader<T> createSingleFileReader(PipelineOptions options, Coder<T> coder,
+      ExecutionContext executionContext);
+
+  @Override
+  public final long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
+    // This implementation of method getEstimatedSizeBytes is provided to simplify subclasses. Here
+    // we perform the size estimation of files and file patterns using the interface provided by
+    // IOChannelFactory.
+
+    IOChannelFactory factory = IOChannelUtils.getFactory(fileOrPatternSpec);
+    if (mode == Mode.FILEPATTERN) {
+      // TODO Implement a more efficient parallel/batch size estimation mechanism for file patterns.
+      long startTime = System.currentTimeMillis();
+      long totalSize = 0;
+      Collection<String> inputs = factory.match(fileOrPatternSpec);
+      for (String input : inputs) {
+        totalSize += factory.getSizeBytes(input);
+      }
+      LOG.debug("Size estimation of file pattern " + fileOrPatternSpec + " took "
+          + (System.currentTimeMillis() - startTime) + " ms");
+      return totalSize;
+    } else {
+      long start = getStartOffset();
+      long end = Math.min(getEndOffset(), getMaxEndOffset(options));
+      return end - start;
+    }
+  }
+
+  @Override
+  public final List<? extends FileBasedSource<T>> splitIntoShards(long desiredShardSizeBytes,
+      PipelineOptions options) throws Exception {
+    // This implementation of method splitIntoShards is provided to simplify subclasses. Here we
+    // split a FileBasedSource based on a file pattern to FileBasedSources based on full single
+    // files. For files that can be efficiently seeked, we further split FileBasedSources based on
+    // those files to FileBasedSources based on sub ranges of single files.
+
+    if (mode == Mode.FILEPATTERN) {
+      long startTime = System.currentTimeMillis();
+      List<FileBasedSource<T>> splitResults = new ArrayList<>();
+      for (String file : expandFilePattern()) {
+        splitResults.addAll(createForSubrangeOfFile(file, 0, Long.MAX_VALUE).splitIntoShards(
+            desiredShardSizeBytes, options));
+      }
+      LOG.debug("Splitting the source based on file pattern " + fileOrPatternSpec + " took "
+          + (System.currentTimeMillis() - startTime) + " ms");
+      return splitResults;
+    } else {
+      // We split a file-based source into subranges only if the file is seekable. If a file is not
+      // seekable it will be highly inefficient to create and read a source based on a subrange of
+      // that file.
+      IOChannelFactory factory = IOChannelUtils.getFactory(fileOrPatternSpec);
+      if (factory.isReadSeekEfficient(fileOrPatternSpec)) {
+        List<FileBasedSource<T>> splitResults = new ArrayList<>();
+        for (ByteOffsetBasedSource<T> split :
+            super.splitIntoShards(desiredShardSizeBytes, options)) {
+          splitResults.add((FileBasedSource<T>) split);
+        }
+        return splitResults;
+      } else {
+        LOG.debug("The source for file " + fileOrPatternSpec
+            + " is not split into sub-range based sources since the file is not seekable");
+        return ImmutableList.of(this);
+      }
+    }
+  }
+
+  @Override
+  protected final Reader<T> createBasicReader(PipelineOptions options, Coder<T> coder,
+      ExecutionContext executionContext) throws IOException {
+    if (mode == Mode.FILEPATTERN) {
+      long startTime = System.currentTimeMillis();
+      Collection<String> files = expandFilePattern();
+      List<FileBasedReader<T>> fileReaders = new ArrayList<>();
+      for (String fileName : files) {
+        fileReaders.add(createForSubrangeOfFile(fileName, 0, Long.MAX_VALUE).createSingleFileReader(
+            options, coder, executionContext));
+      }
+      LOG.debug("Creating a reader for file pattern " + fileOrPatternSpec + " took "
+          + (System.currentTimeMillis() - startTime) + " ms");
+      return new FilePatternReader(fileReaders.iterator());
+    } else {
+      return createSingleFileReader(options, coder, executionContext);
+    }
+  }
+
+  @Override
+  public final long getMaxEndOffset(PipelineOptions options) throws Exception {
+    if (mode == Mode.FILEPATTERN) {
+      throw new IllegalArgumentException("Cannot determine the exact end offset of a file pattern");
+    }
+    if (getEndOffset() == Long.MAX_VALUE) {
+      IOChannelFactory factory = IOChannelUtils.getFactory(fileOrPatternSpec);
+      return factory.getSizeBytes(fileOrPatternSpec);
+    } else {
+      return getEndOffset();
+    }
+  }
+
+  private Collection<String> expandFilePattern() throws IOException {
+    if (mode != Mode.FILEPATTERN) {
+      throw new IllegalArgumentException("Not a file pattern");
+    }
+    IOChannelFactory factory = IOChannelUtils.getFactory(fileOrPatternSpec);
+    return factory.match(fileOrPatternSpec);
+  }
+
+  /**
+   * A {@link Source.Reader reader} that implements code common to readers of
+   * {@code FileBasedSource}s.
+   *
+   * <h2>Seekability</h2>
+   *
+   * <p>This reader uses a {@link ReadableByteChannel} created for the file represented by the
+   * corresponding source to efficiently move to the correct starting position defined in the
+   * source. Subclasses of this reader should implement {@link #startReading} to get access to this
+   * channel. If the source corresponding to the reader is for a subrange of a file the
+   * {@code ReadableByteChannel} provided is guaranteed to be an instance of the type
+   * {@link SeekableByteChannel} which may be used by subclass to traverse back in the channel to
+   * determine the correct starting position.
+   *
+   * <h2>Split Points</h2>
+   *
+   * <p>Simple record-based formats (such as reading lines, reading CSV etc.), where each record can
+   * be identified by a unique offset, should interpret a range [A, B) as "read from the first
+   * record starting at or after offset A, up to but not including the first record starting at or
+   * after offset B".
+   *
+   * <p>More complex formats, such as some block-based formats, may have records which are not
+   * directly addressable: i.e. for some records, there is no way to describe the location of a
+   * record using a single offset number. For example, imagine a file format consisting of a
+   * sequence of blocks, where each block is compressed using some block compression algorithm. Then
+   * blocks have offsets, but individual records don't. More complex cases are also possible.
+   *
+   * <p>Many such formats still admit reading a range of offsets in a way consistent with the
+   * semantics of {@code ByteOffsetBasedReader}, i.e. reading [A, B) and [B, C) is equivalent to
+   * reading [A, C). E.g., for the compressed block-based format discussed above, reading [A, B)
+   * would mean "read all the records in all blocks whose starting offset is in [A, B)".
+   *
+   * <p>To support such complex formats in {@code FileBasedReader}, we introduce the notion of
+   * <i>split points</i>. We say that a record is a split point if there exists an offset A such
+   * that the record is the first one to be read for a range [A, {@code Long.MAX_VALUE}). E.g. for
+   * the block-based format above, the only split points would be the first records in each block.
+   *
+   * <p>With the above definition of split points an extended definition of the offset of a record
+   * can be specified. For a record which is at a split point, its offset is defined to be the
+   * largest A such that reading a source with the range [A, Long.MAX_VALUE) includes this record;
+   * offsets of other records are only required to be non-strictly increasing. Offsets of records of
+   * a {@code FileBasedReader} should be set based on this definition.
+   *
+   * <h2>Reading Records</h2>
+   *
+   * <p>Sequential reading is implemented using {@link #readNextRecord}.
+   *
+   * <p>Then {@code FileBasedReader} implements "reading a range [A, B)" in the following way.
+   * <ol>
+   * <li>{@code start()} opens the file
+   * <li>{@code start()} seeks the {@code SeekableByteChannel} to A (reading offset ranges for
+   * non-seekable files is not supported) and calls {@code startReading()}
+   * <li>the subclass must do whatever is needed to move to the first split point at or after this
+   * position in the channel
+   * <li>{@code start()} calls {@code advance()} once
+   * <li>if the previous advance call returned {@code true} sequential reading starts and
+   * {@code advance()} will be called repeatedly
+   * </ol>
+   * {@code advance()} calls {@code readNextRecord()} on the subclass, and stops (returns false) if
+   * the new record is at a split point AND the offset of the new record is at or after B.
+   *
+   * <h2>Thread Safety</h2>
+   *
+   * Since this class implements {@link Source.Reader} it guarantees thread safety. Abstract methods
+   * defined here will not be accessed by more than one thread concurrently.
+   */
+  public abstract static class FileBasedReader<T> extends ByteOffsetBasedReader<T> {
+    private ReadableByteChannel channel = null;
+    private boolean finished = false; // Reader has finished advancing.
+    private boolean endPositionReached = false; // If true, records have been read up to the ending
+                                                // offset but the last split point may not have been
+                                                // reached.
+    private boolean startCalled = false;
+    private FileBasedSource<T> source = null;
+
+    /**
+     * Subclasses should not perform IO operations at the constructor. All IO operations should be
+     * delayed until the {@link #startReading} method is invoked.
+     */
+    public FileBasedReader(FileBasedSource<T> source) {
+      super(source);
+      Preconditions.checkArgument(source.getMode() != Mode.FILEPATTERN,
+          "FileBasedReader does not support reading file patterns");
+      this.source = source;
+    }
+
+    protected final FileBasedSource<T> getSource() {
+      return source;
+    }
+
+    @Override
+    public final boolean start() throws IOException {
+      Preconditions.checkState(!startCalled, "start() should only be called once");
+      IOChannelFactory factory = IOChannelUtils.getFactory(source.getFileOrPatternSpec());
+      this.channel = factory.open(source.getFileOrPatternSpec());
+
+      if (channel instanceof SeekableByteChannel) {
+        SeekableByteChannel seekChannel = (SeekableByteChannel) channel;
+        seekChannel.position(source.getStartOffset());
+      } else {
+        // Channel is not seekable. Must not be a subrange.
+        Preconditions.checkArgument(source.mode != Mode.SUBRANGE_OF_SINGLE_FILE,
+            "Subrange-based sources must only be defined for file types that support seekable "
+            + " read channels");
+        Preconditions.checkArgument(source.getStartOffset() == 0, "Start offset "
+            + source.getStartOffset()
+            + " is not zero but channel for reading the file is not seekable.");
+      }
+
+      startReading(channel);
+      startCalled = true;
+
+      // Advance once to load the first record.
+      return advance();
+    }
+
+    @Override
+    public final boolean advance() throws IOException {
+      Preconditions.checkState(startCalled, "advance() called before calling start()");
+      if (finished) {
+        return false;
+      }
+
+      if (!readNextRecord()) {
+        // End of the stream reached.
+        finished = true;
+        return false;
+      }
+      if (getCurrentOffset() >= source.getEndOffset()) {
+        // Current record is at or after the end position defined by the source. The reader should
+        // continue reading until the next split point is reached.
+        endPositionReached = true;
+      }
+
+      // If the current record is at or after the end position defined by the source and if the
+      // current record is at a split point, then the current record, and any record after that
+      // does not belong to the offset range of the source.
+      if (endPositionReached && isAtSplitPoint()) {
+        finished = true;
+        return false;
+      }
+
+      return true;
+    }
+
+    /**
+     * Closes any {@link ReadableByteChannel} created for the current reader. This implementation is
+     * idempotent. Any {@code close()} method introduced by a subclass must be idempotent and must
+     * call the {@code close()} method in the {@code FileBasedReader}.
+     */
+    @Override
+    public void close() throws IOException {
+      if (channel != null) {
+        channel.close();
+      }
+    }
+
+    /**
+     * Specifies if the current record of the reader is at a split point.
+     *
+     * <p>This returns {@code true} if {@link #readNextRecord} was invoked at least once and the
+     * last record returned by {@link #readNextRecord} is at a split point, {@code false} otherwise.
+     * Please refer to {@link FileBasedSource.FileBasedReader FileBasedReader} for the definition of
+     * split points.
+     */
+    protected abstract boolean isAtSplitPoint();
+
+    /**
+     * Performs any initialization of the subclass of {@code FileBasedReader} that involves IO
+     * operations. Will only be invoked once and before that invocation the base class will seek the
+     * channel to the source's starting offset.
+     *
+     * <p>Provided {@link ReadableByteChannel} is for the file represented by the source of this
+     * reader. Subclass may use the {@code channel} to build a higher level IO abstraction, e.g., a
+     * BufferedReader or an XML parser.
+     *
+     * <p>A subclass may additionally use this to adjust the starting position prior to reading
+     * records. For example, the channel of a reader that reads text lines may point to the middle
+     * of a line after the position adjustment done at {@code FileBasedReader}. In this case the
+     * subclass could adjust the position of the channel to the beginning of the next line. If the
+     * corresponding source is for a subrange of a file, {@code channel} is guaranteed to be an
+     * instance of the type {@link SeekableByteChannel} in which case the subclass may traverse back
+     * in the channel to determine if the channel is already at the correct starting position (e.g.,
+     * to check if the previous character was a newline).
+     *
+     * <p>After this method is invoked the base class will not be reading data from the channel or
+     * adjusting the position of the channel. But the base class is responsible for properly closing
+     * the channel.
+     *
+     * @param channel a byte channel representing the file backing the reader.
+     */
+    protected abstract void startReading(ReadableByteChannel channel) throws IOException;
+
+    /**
+     * Reads the next record from the channel provided by {@link #startReading}. Methods
+     * {@link #getCurrent}, {@link #getCurrentOffset}, and {@link #isSplitPoint} should return the
+     * corresponding information about the record read by the last invocation of this method.
+     *
+     * @return {@code true} if a record was successfully read, {@code false} if the end of the
+     *         channel was reached before successfully reading a new record.
+     */
+    protected abstract boolean readNextRecord() throws IOException;
+  }
+
+  // An internal Reader implementation that concatenates a sequence of FileBasedReaders.
+  private class FilePatternReader implements Reader<T> {
+    final Iterator<FileBasedReader<T>> fileReaders;
+    FileBasedReader<T> currentReader = null;
+
+    public FilePatternReader(Iterator<FileBasedReader<T>> fileReaders) {
+      this.fileReaders = fileReaders;
+    }
+
+    @Override
+    public boolean start() throws IOException {
+      return startNextNonemptyReader();
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      Preconditions.checkState(currentReader != null, "Call start() before advance()");
+      if (currentReader.advance()) {
+        return true;
+      }
+      return startNextNonemptyReader();
+    }
+
+    private boolean startNextNonemptyReader() throws IOException {
+      while (fileReaders.hasNext()) {
+        currentReader = fileReaders.next();
+        if (currentReader.start()) {
+          return true;
+        }
+        currentReader.close();
+      }
+      return false;
+    }
+
+    @Override
+    public T getCurrent() throws NoSuchElementException {
+      // A NoSuchElement will be thrown by the last FileBasedReader if getCurrent() is called after
+      // advance() returns false.
+      return currentReader.getCurrent();
+    }
+
+    @Override
+    public void close() throws IOException {
+      // Close all readers that may have not yet been closed.
+      currentReader.close();
+      while (fileReaders.hasNext()) {
+        fileReaders.next().close();
+      }
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
new file mode 100644
index 0000000000000..d46530a081d45
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.List;
+
+/**
+ * Tests code common to all offset-based sources.
+ */
+@RunWith(JUnit4.class)
+public class ByteOffsetBasedSourceTest {
+
+  class TestByteOffsetBasedSource extends ByteOffsetBasedSource<String> {
+
+    private static final long serialVersionUID = 85539250;
+
+    public TestByteOffsetBasedSource(long startOffset, long endOffset, long minShardSize) {
+      super(startOffset, endOffset, minShardSize);
+    }
+
+    @Override
+    public ByteOffsetBasedSource<String> createSourceForSubrange(long start, long end) {
+      return new TestByteOffsetBasedSource(start, end, 1024);
+    }
+
+    @Override
+    public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
+      return 0;
+    }
+
+    @Override
+    public boolean producesSortedKeys(PipelineOptions options) throws Exception {
+      return false;
+    }
+
+    @Override
+    public void validate() {}
+
+    @Override
+    public Coder<String> getDefaultOutputCoder() {
+      return null;
+    }
+
+    @Override
+    public long getMaxEndOffset(PipelineOptions options) {
+      return getEndOffset();
+    }
+  }
+
+  public static void assertSplitsAre(List<? extends ByteOffsetBasedSource<String>> splits,
+      long[] expectedBoundaries) {
+    assertEquals(splits.size(), expectedBoundaries.length - 1);
+    int i = 0;
+    for (ByteOffsetBasedSource<String> split : splits) {
+      assertEquals(split.getStartOffset(), expectedBoundaries[i]);
+      assertEquals(split.getEndOffset(), expectedBoundaries[i + 1]);
+      i++;
+    }
+  }
+
+  @Test
+  public void testSplitPositionsZeroStart() throws Exception {
+    long start = 0;
+    long end = 1000;
+    long minShardSize = 50;
+    long desiredShardSize = 150;
+    TestByteOffsetBasedSource testSource = new TestByteOffsetBasedSource(start, end, minShardSize);
+    long[] boundaries = {0, 150, 300, 450, 600, 750, 900, 1000};
+    assertSplitsAre(testSource.splitIntoShards(desiredShardSize, null), boundaries);
+  }
+
+  @Test
+  public void testSplitPositionsNonZeroStart() throws Exception {
+    long start = 300;
+    long end = 1000;
+    long minShardSize = 50;
+    long desiredShardSize = 150;
+    TestByteOffsetBasedSource testSource = new TestByteOffsetBasedSource(start, end, minShardSize);
+    long[] boundaries = {300, 450, 600, 750, 900, 1000};
+    assertSplitsAre(testSource.splitIntoShards(desiredShardSize, null), boundaries);
+  }
+
+  @Test
+  public void testMinShardSize() throws Exception {
+    long start = 300;
+    long end = 1000;
+    long minShardSize = 150;
+    long desiredShardSize = 100;
+    TestByteOffsetBasedSource testSource = new TestByteOffsetBasedSource(start, end, minShardSize);
+    long[] boundaries = {300, 450, 600, 750, 1000};
+    assertSplitsAre(testSource.splitIntoShards(desiredShardSize, null), boundaries);
+  }
+
+  @Test
+  public void testSplitPositionsCollapseEndShard() throws Exception {
+    long start = 0;
+    long end = 1000;
+    long minShardSize = 50;
+    long desiredShardSize = 110;
+    TestByteOffsetBasedSource testSource = new TestByteOffsetBasedSource(start, end, minShardSize);
+    // Last 10 bytes should collapse to the previous shard.
+    long[] boundaries = {0, 110, 220, 330, 440, 550, 660, 770, 880, 1000};
+    assertSplitsAre(testSource.splitIntoShards(desiredShardSize, null), boundaries);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
new file mode 100644
index 0000000000000..d995f9ba5ce60
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -0,0 +1,689 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.io.FileBasedSource.FileBasedReader;
+import com.google.cloud.dataflow.sdk.io.FileBasedSource.Mode;
+import com.google.cloud.dataflow.sdk.io.Source.Reader;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.EvaluationResults;
+import com.google.cloud.dataflow.sdk.testing.TestDataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.TestCredential;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.collect.ImmutableList;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mockito;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.SeekableByteChannel;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.NoSuchElementException;
+import java.util.Random;
+
+/**
+ * Tests code common to all file-based sources.
+ */
+@RunWith(JUnit4.class)
+public class FileBasedSourceTest {
+
+  Random random = new Random(System.currentTimeMillis());
+
+  @Rule
+  public TemporaryFolder tempFolder = new TemporaryFolder();
+
+  /**
+   * If {@code splitHeader} is null, this is just a simple line-based reader. Otherwise, the file is
+   * considered to consist of blocks beginning with {@code splitHeader}. The header itself is not
+   * returned as a record. The first record after the header is considered to be a split point.
+   *
+   * <p>E.g., if {@code splitHeader} is "h" and the lines of the file are: h, a, b, h, h, c, then
+   * the records in this source are a,b,c, and records a and c are split points.
+   */
+  class TestFileBasedSource extends FileBasedSource<String> {
+
+    private static final long serialVersionUID = 85539251;
+
+    ReadableByteChannel channel = null;
+    final String splitHeader;
+
+    public TestFileBasedSource(boolean isFilePattern, String fileOrPattern, long minShardSize,
+        String splitHeader) {
+      super(isFilePattern, fileOrPattern, minShardSize, 0L, Long.MAX_VALUE);
+      this.splitHeader = splitHeader;
+    }
+
+    public TestFileBasedSource(String fileOrPattern, long minShardSize, long startOffset,
+        long endOffset, String splitHeader) {
+      super(false, fileOrPattern, minShardSize, startOffset, endOffset);
+      this.splitHeader = splitHeader;
+    }
+
+    @Override
+    public boolean producesSortedKeys(PipelineOptions options) throws Exception {
+      return false;
+    }
+
+    @Override
+    public void validate() {}
+
+    @Override
+    public Coder<String> getDefaultOutputCoder() {
+      return StringUtf8Coder.of();
+    }
+
+    @Override
+    public FileBasedSource<String> createForSubrangeOfFile(String fileName, long start, long end) {
+      return new TestFileBasedSource(fileName, getMinShardSize(), start, end, splitHeader);
+    }
+
+    @Override
+    public FileBasedReader<String> createSingleFileReader(PipelineOptions options,
+        Coder<String> coder, ExecutionContext executionContext) {
+      if (splitHeader == null) {
+        return new TestReader(this);
+      } else {
+        return new TestReaderWithSplits(this);
+      }
+    }
+  }
+
+  /**
+   * A reader that can read lines of text from a {@link TestFileBasedSource}. This reader does not
+   * consider {@code splitHeader} defined by {@code TestFileBasedSource} hence every line can be the
+   * first line of a split.
+   */
+  class TestReader extends FileBasedReader<String> {
+    private ReadableByteChannel channel = null;
+    private final byte boundary;
+    private long nextOffset = 0;
+    private long currentOffset = 0;
+    private boolean isAtSplitPoint = false;
+    private final ByteBuffer buf;
+    private static final int BUF_SIZE = 1024;
+    private String currentValue = null;
+
+    public TestReader(TestFileBasedSource source) {
+      super(source);
+      boundary = '\n';
+      buf = ByteBuffer.allocate(BUF_SIZE);
+      buf.flip();
+    }
+
+    private int readNextLine(ByteArrayOutputStream out) throws IOException {
+      int byteCount = 0;
+      while (true) {
+        if (!buf.hasRemaining()) {
+          buf.clear();
+          int read = channel.read(buf);
+          if (read < 0) {
+            break;
+          }
+          buf.flip();
+        }
+        byte b = buf.get();
+        byteCount++;
+        if (b == boundary) {
+          break;
+        }
+        out.write(b);
+      }
+      return byteCount;
+    }
+
+    @Override
+    protected void startReading(ReadableByteChannel channel) throws IOException {
+      boolean removeLine = false;
+      if (getSource().getMode() == Mode.SUBRANGE_OF_SINGLE_FILE) {
+        SeekableByteChannel seekChannel = (SeekableByteChannel) channel;
+        // If we are not at the beginning of a line, we should ignore the current line.
+        if (seekChannel.position() > 0) {
+          // Start from one character back and read till we find a new line.
+          seekChannel.position(seekChannel.position() - 1);
+          removeLine = true;
+        }
+        nextOffset = seekChannel.position();
+      }
+      this.channel = channel;
+      if (removeLine) {
+        nextOffset += readNextLine(new ByteArrayOutputStream());
+      }
+    }
+
+    @Override
+    protected boolean readNextRecord() throws IOException {
+      currentOffset = nextOffset;
+
+      ByteArrayOutputStream buf = new ByteArrayOutputStream();
+      int offsetAdjustment = readNextLine(buf);
+      if (offsetAdjustment == 0) {
+        // EOF
+        return false;
+      }
+      nextOffset += offsetAdjustment;
+      isAtSplitPoint = true;
+      currentValue = CoderUtils.decodeFromByteArray(StringUtf8Coder.of(), buf.toByteArray());
+      return true;
+    }
+
+    @Override
+    protected boolean isAtSplitPoint() {
+      return isAtSplitPoint;
+    }
+
+    @Override
+    protected long getCurrentOffset() {
+      return currentOffset;
+    }
+
+    @Override
+    public String getCurrent() throws NoSuchElementException {
+      return currentValue;
+    }
+  }
+
+  /**
+   * A reader that can read lines of text from a {@link TestFileBasedSource}. This reader considers
+   * {@code splitHeader} defined by {@code TestFileBasedSource} hence only lines that immediately
+   * follow a {@code splitHeader} are split points.
+   */
+  class TestReaderWithSplits extends TestReader {
+    private final String splitHeader;
+    private boolean isAtSplitPoint = false;
+
+    public TestReaderWithSplits(TestFileBasedSource source) {
+      super(source);
+      this.splitHeader = source.splitHeader;
+    }
+
+    @Override
+    protected void startReading(ReadableByteChannel channel) throws IOException {
+      super.startReading(channel);
+
+      // Ignore all lines until next header.
+      if (!super.readNextRecord()) {
+        return;
+      }
+      String current = super.getCurrent();
+      while (current == null || !current.equals(splitHeader)) {
+        if (!super.readNextRecord()) {
+          return;
+        }
+        current = super.getCurrent();
+      }
+    }
+
+    @Override
+    protected boolean readNextRecord() throws IOException {
+      // Get next record. If next record is a header read up to the next non-header record (ignoring
+      // any empty splits that does not have any records).
+
+      isAtSplitPoint = false;
+      while (true) {
+        if (!super.readNextRecord()) {
+          return false;
+        }
+        String current = super.getCurrent();
+        if (current == null || !current.equals(splitHeader)) {
+          return true;
+        }
+        isAtSplitPoint = true;
+      }
+    }
+
+    @Override
+    protected boolean isAtSplitPoint() {
+      return isAtSplitPoint;
+    }
+  }
+
+  public File createFileWithData(String fileName, List<String> data) throws IOException {
+    File file = tempFolder.newFile(fileName);
+    Files.write(file.toPath(), data, StandardCharsets.UTF_8);
+    return file;
+  }
+
+  private String createRandomString(int length) {
+    char[] chars = "abcdefghijklmnopqrstuvwxyz".toCharArray();
+    StringBuilder builder = new StringBuilder();
+    for (int i = 0; i < length; i++) {
+      builder.append(chars[random.nextInt(chars.length)]);
+    }
+    return builder.toString();
+  }
+
+  public List<String> createStringDataset(int dataItemLength, int numItems) {
+    List<String> list = new ArrayList<String>();
+    for (int i = 0; i < numItems; i++) {
+      list.add(createRandomString(dataItemLength));
+    }
+    return list;
+  }
+
+  private List<String> readEverythingFromReader(Reader<String> reader) throws IOException {
+    List<String> results = new ArrayList<String>();
+    for (boolean available = reader.start(); available; available = reader.advance()) {
+      results.add(reader.getCurrent());
+    }
+    return results;
+  }
+
+  @Test
+  public void testFullyReadSingleFile() throws IOException {
+    List<String> data = createStringDataset(3, 5000);
+
+    String fileName = "file";
+    File file = createFileWithData(fileName, data);
+
+    TestFileBasedSource source = new TestFileBasedSource(false, file.getPath(), 1024, null);
+    assertEquals(data, readEverythingFromReader(source.createBasicReader(null, null, null)));
+  }
+
+  @Test
+  public void testFullyReadFilePattern() throws IOException {
+    List<String> data1 = createStringDataset(3, 1000);
+    File file1 = createFileWithData("file1", data1);
+
+    List<String> data2 = createStringDataset(3, 1000);
+    createFileWithData("file2", data2);
+
+    List<String> data3 = createStringDataset(3, 1000);
+    createFileWithData("file3", data3);
+
+    List<String> data4 = createStringDataset(3, 1000);
+    createFileWithData("otherfile", data4);
+
+    TestFileBasedSource source =
+        new TestFileBasedSource(true, file1.getParent() + "/" + "file*", 1024, null);
+    List<String> expectedResults = new ArrayList<String>();
+    expectedResults.addAll(data1);
+    expectedResults.addAll(data2);
+    expectedResults.addAll(data3);
+    assertThat(expectedResults, containsInAnyOrder(
+        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
+  }
+
+  @Test
+  public void testFullyReadFilePatternFirstRecordEmpty() throws IOException {
+    File file1 = createFileWithData("file1", new ArrayList<String>());
+
+    IOChannelFactory mockIOFactory = Mockito.mock(IOChannelFactory.class);
+    String parent = file1.getParent();
+    String pattern = "mocked://test";
+    when(mockIOFactory.match(pattern)).thenReturn(
+        ImmutableList.of(parent + "/" + "file1", parent + "/" + "file2", parent + "/" + "file3"));
+    IOChannelUtils.setIOFactory("mocked", mockIOFactory);
+
+    List<String> data2 = createStringDataset(3, 1000);
+    createFileWithData("file2", data2);
+
+    List<String> data3 = createStringDataset(3, 1000);
+    createFileWithData("file3", data3);
+
+    List<String> data4 = createStringDataset(3, 1000);
+    createFileWithData("otherfile", data4);
+
+    TestFileBasedSource source = new TestFileBasedSource(true, pattern, 1024, null);
+
+    List<String> expectedResults = new ArrayList<String>();
+    expectedResults.addAll(data2);
+    expectedResults.addAll(data3);
+    assertThat(expectedResults, containsInAnyOrder(
+        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
+  }
+
+  @Test
+  public void testReadRangeAtStart() throws IOException {
+    List<String> data = createStringDataset(3, 1000);
+
+    String fileName = "file";
+    File file = createFileWithData(fileName, data);
+
+    TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 1024, 0, 102, null);
+
+    // Each line represents 4 bytes (3 random characters + new line
+    // character).
+    // So offset range 0-102 include 26 lines.
+    assertEquals(data.subList(0, 26),
+        readEverythingFromReader(source.createBasicReader(null, null, null)));
+  }
+
+  @Test
+  public void testReadEverythingFromFileWithSplits() throws IOException {
+    String header = "<h>";
+    List<String> data = new ArrayList<>();
+    for (int i = 0; i < 100; i++) {
+      data.add(header);
+      data.addAll(createStringDataset(3, 9));
+    }
+    String fileName = "file";
+    File file = createFileWithData(fileName, data);
+
+    TestFileBasedSource source =
+        new TestFileBasedSource(file.getPath(), 1024, 0, Long.MAX_VALUE, header);
+
+    List<String> expectedResults = new ArrayList<String>();
+    expectedResults.addAll(data);
+    // Remove all occurrences of header from expected results.
+    expectedResults.removeAll(Arrays.asList(header));
+
+    assertEquals(expectedResults,
+        readEverythingFromReader(source.createBasicReader(null, null, null)));
+  }
+
+  @Test
+  public void testReadRangeFromFileWithSplitsFromStart() throws IOException {
+    String header = "<h>";
+    List<String> data = new ArrayList<>();
+    for (int i = 0; i < 100; i++) {
+      data.add(header);
+      data.addAll(createStringDataset(3, 9));
+    }
+    String fileName = "file";
+    File file = createFileWithData(fileName, data);
+
+    TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 1024, 0, 60, header);
+
+    List<String> expectedResults = new ArrayList<String>();
+    expectedResults.addAll(data.subList(0, 20));
+    // Remove all occurrences of header from expected results.
+    expectedResults.removeAll(Arrays.asList(header));
+
+    assertEquals(expectedResults,
+        readEverythingFromReader(source.createBasicReader(null, null, null)));
+  }
+
+  @Test
+  public void testReadRangeFromFileWithSplitsFromMiddle() throws IOException {
+    String header = "<h>";
+    List<String> data = new ArrayList<>();
+    for (int i = 0; i < 100; i++) {
+      data.add(header);
+      data.addAll(createStringDataset(3, 9));
+    }
+    String fileName = "file";
+    File file = createFileWithData(fileName, data);
+
+    TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 1024, 502, 702, header);
+
+    List<String> expectedResults = new ArrayList<String>();
+
+    // Each line represents 4 bytes (3 random characters + new line
+    // character).
+    // First 126 lines take 504 bytes of space. So record starting at next split point (130)
+    // should be the first line that belongs to the split.
+    // Similarly, record at index 179 should be the last record in the split.
+    expectedResults.addAll(data.subList(130, 180));
+    // Remove all occurrences of header from expected results.
+    expectedResults.removeAll(Arrays.asList(header));
+
+    assertEquals(expectedResults,
+        readEverythingFromReader(source.createBasicReader(null, null, null)));
+  }
+
+  @Test
+  public void testReadRangeFromFileWithSplitsFromMiddleOfHeader() throws IOException {
+    String header = "<h>";
+    List<String> data = new ArrayList<>();
+    for (int i = 0; i < 100; i++) {
+      data.add(header);
+      data.addAll(createStringDataset(3, 9));
+    }
+    String fileName = "file";
+    File file = createFileWithData(fileName, data);
+
+    List<String> expectedResults = new ArrayList<String>();
+    expectedResults.addAll(data.subList(10, 20));
+    // Remove all occurrences of header from expected results.
+    expectedResults.removeAll(Arrays.asList(header));
+
+    // Split starts after "<" of the header
+    TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 1024, 1, 60, header);
+    assertEquals(expectedResults,
+        readEverythingFromReader(source.createBasicReader(null, null, null)));
+
+    // Split starts after "<h" of the header
+    source = new TestFileBasedSource(file.getPath(), 1024, 2, 60, header);
+    assertEquals(expectedResults,
+        readEverythingFromReader(source.createBasicReader(null, null, null)));
+
+    // Split starts after "<h>" of the header
+    source = new TestFileBasedSource(file.getPath(), 1024, 3, 60, header);
+    assertEquals(expectedResults,
+        readEverythingFromReader(source.createBasicReader(null, null, null)));
+  }
+
+  @Test
+  public void testReadRangeAtMiddle() throws IOException {
+    List<String> data = createStringDataset(3, 1000);
+    String fileName = "file";
+    File file = createFileWithData(fileName, data);
+
+    TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 1024, 502, 702, null);
+
+    // Each line represents 4 bytes (3 random characters + new line
+    // character).
+    // First 126 lines take 504 bytes of space. So 127th line (index 126)
+    // should be the first line that belongs to the split.
+    // Similarly, 176th line (index 175) should be the last line of the
+    // split. (Note that end index of data.subList() is exclusive).
+    assertEquals(data.subList(126, 176),
+        readEverythingFromReader(source.createBasicReader(null, null, null)));
+  }
+
+  @Test
+  public void testReadRangeAtEnd() throws IOException {
+    List<String> data = createStringDataset(3, 1000);
+
+    String fileName = "file";
+    File file = createFileWithData(fileName, data);
+
+    TestFileBasedSource source =
+        new TestFileBasedSource(file.getPath(), 1024, 802, Long.MAX_VALUE, null);
+
+    // Each line represents 4 bytes (3 random characters + new line
+    // character).
+    // First 201 lines take 804 bytes so line 202 (index 201) should be the
+    // first line of the split.
+    assertEquals(data.subList(201, data.size()),
+        readEverythingFromReader(source.createBasicReader(null, null, null)));
+  }
+
+  @Test
+  public void testReadAllSplitsOfSingleFile() throws Exception {
+    List<String> data = createStringDataset(3, 10000);
+
+    String fileName = "file";
+    File file = createFileWithData(fileName, data);
+
+    TestFileBasedSource source = new TestFileBasedSource(false, file.getPath(), 1024, null);
+
+    List<? extends Source<String>> sources = source.splitIntoShards(4096, null);
+    // Each line is 4 bytes (3 random characters + new line character) we write
+    // 10,000 lines so the total size of the file is 40,000 bytes. Because of
+    // this above call produces 10 (40000/4096) splits.
+    assertEquals(sources.size(), 10);
+
+    List<String> results = new ArrayList<String>();
+    for (Source<String> split : sources) {
+      results.addAll(readEverythingFromReader(split.createBasicReader(null, null, null)));
+    }
+
+    assertEquals(data, results);
+  }
+
+  @Test
+  public void testDataflowFile() throws IOException {
+    TestDataflowPipelineOptions options =
+        PipelineOptionsFactory.as(TestDataflowPipelineOptions.class);
+    options.setGcpCredential(new TestCredential());
+
+    DirectPipeline p = DirectPipeline.createForTest();
+    List<String> data = createStringDataset(3, 10000);
+
+    String fileName = "file";
+    File file = createFileWithData(fileName, data);
+
+    TestFileBasedSource source = new TestFileBasedSource(false, file.getPath(), 1024, null);
+
+    PCollection<String> output = p.apply(ReadSource.from(source).named("ReadFileData"));
+
+    EvaluationResults results = p.run();
+    List<String> readData = results.getPCollection(output);
+
+    // Need to sort here since we have no control over the order of files returned from a file
+    // pattern expansion.
+    Collections.sort(data);
+    Collections.sort(readData);
+
+    assertEquals(data, readData);
+  }
+
+  @Test
+  public void testDataflowFilePattern() throws IOException {
+    TestDataflowPipelineOptions options =
+        PipelineOptionsFactory.as(TestDataflowPipelineOptions.class);
+    options.setGcpCredential(new TestCredential());
+
+    DirectPipeline p = DirectPipeline.createForTest();
+
+    List<String> data1 = createStringDataset(3, 1000);
+    File file1 = createFileWithData("file1", data1);
+
+    List<String> data2 = createStringDataset(3, 1000);
+    createFileWithData("file2", data2);
+
+    List<String> data3 = createStringDataset(3, 1000);
+    createFileWithData("file3", data3);
+
+    List<String> data4 = createStringDataset(3, 1000);
+    createFileWithData("otherfile", data4);
+
+    TestFileBasedSource source =
+        new TestFileBasedSource(true, file1.getParent() + "/" + "file*", 1024, null);
+
+    PCollection<String> output = p.apply(ReadSource.from(source).named("ReadFileData"));
+
+    EvaluationResults pipelineResults = p.run();
+    List<String> results = pipelineResults.getPCollection(output);
+
+    List<String> expectedResults = new ArrayList<String>();
+    expectedResults.addAll(data1);
+    expectedResults.addAll(data2);
+    expectedResults.addAll(data3);
+
+    // Need to sort here since we have no control over the order of files returned from a file
+    // pattern expansion.
+    Collections.sort(expectedResults);
+    Collections.sort(results);
+
+    assertEquals(expectedResults, results);
+  }
+
+  @Test
+  public void testEstimatedSizeOfFile() throws Exception {
+    List<String> data = createStringDataset(3, 1000);
+    String fileName = "file";
+    File file = createFileWithData(fileName, data);
+
+    TestFileBasedSource source = new TestFileBasedSource(false, file.getPath(), 1024, null);
+
+    // Size of the file should be 4*1000
+    assertEquals(4000, source.getEstimatedSizeBytes(null));
+
+  }
+
+  @Test
+  public void testEstimatedSizeOfFilePattern() throws Exception {
+    List<String> data1 = createStringDataset(3, 500);
+    File file1 = createFileWithData("file1", data1);
+
+    List<String> data2 = createStringDataset(3, 1000);
+    createFileWithData("file2", data2);
+
+    List<String> data3 = createStringDataset(3, 1500);
+    createFileWithData("file3", data3);
+
+    List<String> data4 = createStringDataset(3, 600);
+    createFileWithData("otherfile", data4);
+
+    List<String> data5 = createStringDataset(3, 700);
+    createFileWithData("anotherfile", data5);
+
+    TestFileBasedSource source =
+        new TestFileBasedSource(true, file1.getParent() + "/" + "file*", 1024, null);
+
+    // Size of the pattern should be 4*(500+1000+1500)
+    assertEquals(12000, source.getEstimatedSizeBytes(null));
+  }
+
+  @Test
+  public void testReadAllSplitsOfFilePattern() throws Exception {
+    List<String> data1 = createStringDataset(3, 10000);
+    File file1 = createFileWithData("file1", data1);
+
+    List<String> data2 = createStringDataset(3, 10000);
+    createFileWithData("file2", data2);
+
+    List<String> data3 = createStringDataset(3, 10000);
+    createFileWithData("file3", data3);
+
+    List<String> data4 = createStringDataset(3, 10000);
+    createFileWithData("otherfile", data4);
+
+    TestFileBasedSource source =
+        new TestFileBasedSource(true, file1.getParent() + "/" + "file*", 1024, null);
+    List<? extends Source<String>> sources = source.splitIntoShards(4096, null);
+    assertEquals(sources.size(), 30);
+
+    List<String> results = new ArrayList<String>();
+    for (Source<String> split : sources) {
+      results.addAll(readEverythingFromReader(split.createBasicReader(null, null, null)));
+    }
+
+    List<String> expectedResults = new ArrayList<String>();
+    expectedResults.addAll(data1);
+    expectedResults.addAll(data2);
+    expectedResults.addAll(data3);
+
+    assertThat(expectedResults, containsInAnyOrder(results.toArray()));
+  }
+}

From a791bed4845bbfe675d41608b05c5d39d2e2b590 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Fri, 13 Mar 2015 14:00:44 -0700
Subject: [PATCH 0268/1541] Make pom.xml compatible with Eclipse and m2e
 plugin.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88577082
---
 pom.xml     | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 sdk/pom.xml | 24 ++++++++++++++++++++++
 2 files changed, 81 insertions(+)

diff --git a/pom.xml b/pom.xml
index e5da472cd1257..b51b7f27d4e3f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -196,6 +196,63 @@
             </dependency>
           </dependencies>
         </plugin>
+
+        <!-- This plugin's configuration tells the m2e plugin how to import this
+             Maven project into the Eclipse environment. -->
+        <plugin>
+          <groupId>org.eclipse.m2e</groupId>
+          <artifactId>lifecycle-mapping</artifactId>
+          <version>1.0.0</version>
+          <configuration>
+            <lifecycleMappingMetadata>
+              <pluginExecutions>
+                <pluginExecution>
+                  <pluginExecutionFilter>
+                    <groupId>org.apache.avro</groupId>
+                    <artifactId>avro-maven-plugin</artifactId>
+                    <versionRange>[1.7.7,)</versionRange>
+                    <goals>
+                      <goal>schema</goal>
+                    </goals>
+                  </pluginExecutionFilter>
+                  <action>
+                    <execute>
+                      <runOnIncremental>false</runOnIncremental>
+                    </execute>
+                  </action>
+                </pluginExecution>
+                <pluginExecution>
+                  <pluginExecutionFilter>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-jar-plugin</artifactId>
+                    <versionRange>[2.5,)</versionRange>
+                    <goals>
+                      <goal>jar</goal>
+                      <goal>test-jar</goal>
+                    </goals>
+                  </pluginExecutionFilter>
+                  <action>
+                    <ignore/>
+                  </action>
+                </pluginExecution>
+                <pluginExecution>
+                  <pluginExecutionFilter>
+                    <groupId>org.jacoco</groupId>
+                    <artifactId>jacoco-maven-plugin</artifactId>
+                    <versionRange>[0.7.1,)</versionRange>
+                    <goals>
+                      <goal>report</goal>
+                      <goal>prepare-agent</goal>
+                    </goals>
+                  </pluginExecutionFilter>
+                  <action>
+                    <ignore/>
+                  </action>
+                </pluginExecution>
+              </pluginExecutions>
+            </lifecycleMappingMetadata>
+          </configuration>
+        </plugin>
       </plugins>
     </pluginManagement>
   </build>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index a1babd81f7fee..5cd5c0ab046ba 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -182,6 +182,30 @@
           </execution>
         </executions>
       </plugin>
+
+      <!-- This plugin tells Maven about an additional test-source directory to
+           build, which contains Avro-generated source files. This is not
+           strictly needed for the regular Maven build, but helps certain IDEs
+           automatically find and compile generated code. -->
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+        <version>1.9.1</version>
+        <executions>
+          <execution>
+            <id>add-test-source</id>
+            <phase>generate-test-sources</phase>
+            <goals>
+              <goal>add-test-source</goal>
+            </goals>
+            <configuration>
+              <sources>
+                <source>${project.build.directory}/generated-test-sources/java</source>
+              </sources>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 

From d6d4a343e164e8a31bee3bd76ff995550a46c630 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 13 Mar 2015 14:35:42 -0700
Subject: [PATCH 0269/1541] Pull some helpful functions for extracting and
 formatting Classes and Methods.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88580191
---
 .../sdk/options/PipelineOptionsFactory.java   | 32 ++------
 .../sdk/util/common/ReflectHelpers.java       | 72 ++++++++++++++++++
 .../options/PipelineOptionsFactoryTest.java   |  2 +-
 .../sdk/util/common/ReflectHelpersTest.java   | 76 +++++++++++++++++++
 4 files changed, 154 insertions(+), 28 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 633e8095ca797..1e765470c4b46 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunnerRegistrar;
 import com.google.cloud.dataflow.sdk.runners.worker.DataflowWorkerHarness;
+import com.google.cloud.dataflow.sdk.util.common.ReflectHelpers;
 import com.google.common.base.Equivalence;
 import com.google.common.base.Function;
 import com.google.common.base.Preconditions;
@@ -728,10 +729,10 @@ private static void validateClass(Class<? extends PipelineOptions> iface,
 
       Iterable<String> getterClassNames = FluentIterable.from(getters)
           .transform(MethodToDeclaringClassFunction.INSTANCE)
-          .transform(ClassNameFunction.INSTANCE);
+          .transform(ReflectHelpers.CLASS_NAME);
       Iterable<String> gettersWithJsonIgnoreClassNames = FluentIterable.from(gettersWithJsonIgnore)
           .transform(MethodToDeclaringClassFunction.INSTANCE)
-          .transform(ClassNameFunction.INSTANCE);
+          .transform(ReflectHelpers.CLASS_NAME);
 
       Preconditions.checkArgument(gettersWithJsonIgnore.isEmpty()
           || getters.size() == gettersWithJsonIgnore.size(),
@@ -746,7 +747,7 @@ private static void validateClass(Class<? extends PipelineOptions> iface,
 
       Iterable<String> settersWithJsonIgnoreClassNames = FluentIterable.from(settersWithJsonIgnore)
           .transform(MethodToDeclaringClassFunction.INSTANCE)
-          .transform(ClassNameFunction.INSTANCE);
+          .transform(ReflectHelpers.CLASS_NAME);
 
       Preconditions.checkArgument(settersWithJsonIgnore.isEmpty(),
           "Expected setter for property [%s] to not be marked with @JsonIgnore on %s",
@@ -777,7 +778,7 @@ private static void validateClass(Class<? extends PipelineOptions> iface,
     Set<Method> unknownMethods = Sets.difference(Sets.newHashSet(klass.getMethods()), methods);
     Preconditions.checkArgument(unknownMethods.isEmpty(),
         "Methods %s on [%s] do not conform to being bean properties.",
-        FluentIterable.from(unknownMethods).transform(MethodFormatterFunction.INSTANCE),
+        FluentIterable.from(unknownMethods).transform(ReflectHelpers.METHOD_FORMATTER),
         iface.getName());
   }
 
@@ -808,29 +809,6 @@ public Class<?> apply(Method input) {
     }
   }
 
-  /** A {@link Function} which turns a method into a simple method signature. */
-  private static class MethodFormatterFunction implements Function<Method, String> {
-    static final MethodFormatterFunction INSTANCE = new MethodFormatterFunction();
-    @Override
-    public String apply(Method input) {
-      String parameterTypes = FluentIterable.of(input.getParameterTypes())
-          .transform(ClassNameFunction.INSTANCE)
-          .toSortedList(String.CASE_INSENSITIVE_ORDER)
-          .toString();
-      return ClassNameFunction.INSTANCE.apply(input.getReturnType()) + " " + input.getName()
-          + "(" + parameterTypes.substring(1, parameterTypes.length() - 1) + ")";
-    }
-  }
-
-  /** A {@link Function} with returns the classes name. */
-  private static class ClassNameFunction implements Function<Class<?>, String> {
-    static final ClassNameFunction INSTANCE = new ClassNameFunction();
-    @Override
-    public String apply(Class<?> input) {
-      return input.getName();
-    }
-  }
-
   /** A {@link Function} with returns the declaring class for the method. */
   private static class MethodToDeclaringClassFunction implements Function<Method, Class<?>> {
     static final MethodToDeclaringClassFunction INSTANCE = new MethodToDeclaringClassFunction();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java
new file mode 100644
index 0000000000000..b2f5f17a43a9c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.common;
+
+import com.google.common.base.Function;
+import com.google.common.base.Joiner;
+import com.google.common.collect.FluentIterable;
+
+import java.lang.reflect.Method;
+
+/**
+ * Utilities for working with with {@link Class Classes} and {@link Method Methods}.
+ */
+public class ReflectHelpers {
+
+  private static final Joiner COMMA_SEPARATOR = Joiner.on(", ");
+
+  /** A {@link Function} which turns a method into a simple method signature. */
+  public static final Function<Method, String> METHOD_FORMATTER = new Function<Method, String>() {
+    @Override
+    public String apply(Method input) {
+      String parameterTypes = FluentIterable.of(input.getParameterTypes())
+          .transform(CLASS_SIMPLE_NAME)
+          .join(COMMA_SEPARATOR);
+      return String.format("%s(%s)",
+          input.getName(),
+          parameterTypes);
+    }
+  };
+
+  /** A {@link Function} which turns a method into the declaring class + method signature. */
+  public static final Function<Method, String> CLASS_AND_METHOD_FORMATTER =
+      new Function<Method, String>() {
+    @Override
+    public String apply(Method input) {
+      return String.format("%s#%s",
+          CLASS_NAME.apply(input.getDeclaringClass()),
+          METHOD_FORMATTER.apply(input));
+    }
+  };
+
+  /** A {@link Function} with returns the classes name. */
+  public static final Function<Class<?>, String> CLASS_NAME =
+      new Function<Class<?>, String>(){
+    @Override
+    public String apply(Class<?> input) {
+      return input.getName();
+    }
+  };
+
+  /** A {@link Function} with returns the classes name. */
+  public static final Function<Class<?>, String> CLASS_SIMPLE_NAME =
+      new Function<Class<?>, String>(){
+    @Override
+    public String apply(Class<?> input) {
+      return input.getSimpleName();
+    }
+  };
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index a7ff30c12fe19..0068713ef24f1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -167,7 +167,7 @@ public static interface ExtraneousMethod extends PipelineOptions {
   public void testHavingExtraneousMethodThrows() throws Exception {
     expectedException.expect(IllegalArgumentException.class);
     expectedException.expectMessage(
-        "Methods [java.lang.String extraneousMethod(int, java.lang.String)] on "
+        "Methods [extraneousMethod(int, String)] on "
         + "[com.google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest$ExtraneousMethod] "
         + "do not conform to being bean properties.");
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java
new file mode 100644
index 0000000000000..db4b8179af5ec
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.common;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.List;
+
+/**
+ * Tests for {@link ReflectHelpers}.
+ */
+@RunWith(JUnit4.class)
+public class ReflectHelpersTest {
+
+  @Test
+  public void testClassName() {
+    assertEquals(getClass().getName(), ReflectHelpers.CLASS_NAME.apply(getClass()));
+  }
+
+  @Test
+  public void testClassSimpleName() {
+    assertEquals(getClass().getSimpleName(),
+        ReflectHelpers.CLASS_SIMPLE_NAME.apply(getClass()));
+  }
+
+  @Test
+  public void testMethodFormatter() throws Exception {
+    assertEquals("testMethodFormatter()",
+        ReflectHelpers.METHOD_FORMATTER.apply(getClass().getMethod("testMethodFormatter")));
+
+    assertEquals("oneArg(int)",
+        ReflectHelpers.METHOD_FORMATTER.apply(getClass().getDeclaredMethod("oneArg", int.class)));
+    assertEquals("twoArg(String, List)",
+        ReflectHelpers.METHOD_FORMATTER.apply(
+            getClass().getDeclaredMethod("twoArg", String.class, List.class)));
+  }
+
+  @Test
+  public void testClassMethodFormatter() throws Exception {
+    assertEquals(
+        getClass().getName() + "#testMethodFormatter()",
+        ReflectHelpers.CLASS_AND_METHOD_FORMATTER
+        .apply(getClass().getMethod("testMethodFormatter")));
+
+    assertEquals(
+        getClass().getName() + "#oneArg(int)",
+        ReflectHelpers.CLASS_AND_METHOD_FORMATTER
+        .apply(getClass().getDeclaredMethod("oneArg", int.class)));
+    assertEquals(
+        getClass().getName() + "#twoArg(String, List)",
+        ReflectHelpers.CLASS_AND_METHOD_FORMATTER.apply(
+            getClass().getDeclaredMethod("twoArg", String.class, List.class)));
+  }
+
+  @SuppressWarnings("unused")
+  void oneArg(int n) {}
+  @SuppressWarnings("unused")
+  void twoArg(String foo, List<Integer> bar) {}
+}

From 4e0ecff4da9f6f49bd24eb05e9a7f975ada2e882 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Fri, 13 Mar 2015 14:56:12 -0700
Subject: [PATCH 0270/1541] FIX: windmill uses kint64max usec as the timer max
 timestamp, keep the Java side in sync.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88582344
---
 .../cloud/dataflow/sdk/runners/worker/ShuffleSink.java   | 3 ++-
 .../dataflow/sdk/transforms/windowing/BoundedWindow.java | 9 +++++++++
 .../dataflow/sdk/transforms/windowing/GlobalWindow.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/util/DoFnContext.java  | 2 +-
 .../google/cloud/dataflow/sdk/util/WindowedValue.java    | 4 ++--
 .../sdk/runners/worker/GroupingShuffleReaderTest.java    | 3 ++-
 6 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
index 351f4ae811f3e..12486766bc612 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
@@ -194,7 +195,7 @@ public long add(WindowedValue<T> windowedElem) throws IOException {
 
         } else if (groupValues) {
           // Sort values by timestamp so that GroupAlsoByWindows can run efficiently.
-          if (windowedElem.getTimestamp().getMillis() == Long.MIN_VALUE) {
+          if (windowedElem.getTimestamp().equals(BoundedWindow.TIMESTAMP_MIN_VALUE)) {
             // Empty secondary keys sort before all other secondary keys, so we
             // can omit this common value here for efficiency.
             secondaryKeyBytes = null;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java
index 42f039ee33dd8..6547efbc9ff61 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java
@@ -18,6 +18,8 @@
 
 import org.joda.time.Instant;
 
+import java.util.concurrent.TimeUnit;
+
 /**
  * A {@code BoundedWindow} represents a finite grouping of elements, with an
  * upper bound (larger timestamps represent more recent data) on the timestamps
@@ -30,6 +32,13 @@
  * be treated as equal by {@code equals()} and {@code hashCode()}.
  */
 public abstract class BoundedWindow {
+  // The min and max timestmaps that won't overflow when they are converted to
+  // usec.
+  public static final Instant TIMESTAMP_MIN_VALUE =
+      new Instant(TimeUnit.MICROSECONDS.toMillis(Long.MIN_VALUE));
+  public static final Instant TIMESTAMP_MAX_VALUE =
+      new Instant(TimeUnit.MICROSECONDS.toMillis(Long.MAX_VALUE));
+
   /**
    * Returns the upper bound of timestamps for values in this window.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
index 7d788e7e63676..6cbb66beb5a4e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
@@ -32,7 +32,7 @@ public class GlobalWindow extends BoundedWindow {
 
   @Override
   public Instant maxTimestamp() {
-    return new Instant(Long.MAX_VALUE);
+    return TIMESTAMP_MAX_VALUE;
   }
 
   private GlobalWindow() {}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
index 451fe76f4dad1..4a6fd1f2a9bb5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
@@ -123,7 +123,7 @@ <T> WindowedValue<T> makeWindowedValue(
     final Instant inputTimestamp = timestamp;
 
     if (timestamp == null) {
-      timestamp = new Instant(Long.MIN_VALUE);
+      timestamp = BoundedWindow.TIMESTAMP_MIN_VALUE;
     }
 
     if (windows == null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index 2147ef4867364..57bad7db8c648 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -71,7 +71,7 @@ public static <V> WindowedValue<V> of(
    */
   public static <V> WindowedValue<V> valueInGlobalWindow(V value) {
     return new WindowedValue<>(value,
-                               new Instant(Long.MIN_VALUE),
+                               BoundedWindow.TIMESTAMP_MIN_VALUE,
                                Arrays.asList(GlobalWindow.INSTANCE));
   }
 
@@ -80,7 +80,7 @@ public static <V> WindowedValue<V> valueInGlobalWindow(V value) {
    */
   public static <V> WindowedValue<V> valueInEmptyWindows(V value) {
     return new WindowedValue<V>(value,
-                                new Instant(Long.MIN_VALUE),
+                                BoundedWindow.TIMESTAMP_MIN_VALUE,
                                 Collections.<BoundedWindow>emptyList());
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
index c20626e70e59c..522f318649005 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
@@ -37,6 +37,7 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
@@ -150,7 +151,7 @@ private void runTestReadFromShuffle(
 
         WindowedValue<KV<Integer, Reiterable<String>>> windowedValue = iter.next();
         // Verify value is in an empty windows.
-        assertEquals(Long.MIN_VALUE, windowedValue.getTimestamp().getMillis());
+        assertEquals(BoundedWindow.TIMESTAMP_MIN_VALUE, windowedValue.getTimestamp());
         assertEquals(0, windowedValue.getWindows().size());
 
         KV<Integer, Reiterable<String>> elem = windowedValue.getValue();

From b20519f77f739e6319502410440fc52f507b03ce Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Fri, 13 Mar 2015 15:03:10 -0700
Subject: [PATCH 0271/1541] Rename Eclipse launch files to make it compatible
 with certain tools.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88582893
---
 eclipse/starter/.classpath                    | 20 ++++++
 eclipse/starter/.project                      | 23 +++++++
 eclipse/starter/.settings/LOCAL.launch        | 15 +++++
 eclipse/starter/.settings/SERVICE.launch      | 16 +++++
 eclipse/starter/pom.xml                       | 17 +++++
 .../dataflow/starter/StarterPipeline.java     | 66 +++++++++++++++++++
 6 files changed, 157 insertions(+)
 create mode 100644 eclipse/starter/.classpath
 create mode 100644 eclipse/starter/.project
 create mode 100644 eclipse/starter/.settings/LOCAL.launch
 create mode 100644 eclipse/starter/.settings/SERVICE.launch
 create mode 100644 eclipse/starter/pom.xml
 create mode 100644 eclipse/starter/src/main/java/com/google/cloud/dataflow/starter/StarterPipeline.java

diff --git a/eclipse/starter/.classpath b/eclipse/starter/.classpath
new file mode 100644
index 0000000000000..9f9ff59805559
--- /dev/null
+++ b/eclipse/starter/.classpath
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+  <classpathentry kind="src" output="target/classes" path="src/main/java">
+    <attributes>
+      <attribute name="optional" value="true"/>
+      <attribute name="maven.pomderived" value="true"/>
+    </attributes>
+  </classpathentry>
+  <classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
+    <attributes>
+      <attribute name="maven.pomderived" value="true"/>
+    </attributes>
+  </classpathentry>
+  <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER">
+    <attributes>
+      <attribute name="maven.pomderived" value="true"/>
+    </attributes>
+  </classpathentry>
+  <classpathentry kind="output" path="target/classes"/>
+</classpath>
diff --git a/eclipse/starter/.project b/eclipse/starter/.project
new file mode 100644
index 0000000000000..64c4186fcb661
--- /dev/null
+++ b/eclipse/starter/.project
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+  <name>google-cloud-dataflow-starter</name>
+  <comment></comment>
+  <projects>
+  </projects>
+  <buildSpec>
+    <buildCommand>
+      <name>org.eclipse.jdt.core.javabuilder</name>
+      <arguments>
+      </arguments>
+    </buildCommand>
+    <buildCommand>
+      <name>org.eclipse.m2e.core.maven2Builder</name>
+      <arguments>
+      </arguments>
+    </buildCommand>
+  </buildSpec>
+  <natures>
+    <nature>org.eclipse.jdt.core.javanature</nature>
+    <nature>org.eclipse.m2e.core.maven2Nature</nature>
+  </natures>
+</projectDescription>
diff --git a/eclipse/starter/.settings/LOCAL.launch b/eclipse/starter/.settings/LOCAL.launch
new file mode 100644
index 0000000000000..74145d0378c9e
--- /dev/null
+++ b/eclipse/starter/.settings/LOCAL.launch
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+
+<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication">
+  <listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
+    <listEntry value="/google-cloud-dataflow-starter/src/main/java/com/google/cloud/dataflow/starter/StarterPipeline.java"/>
+  </listAttribute>
+  <listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
+    <listEntry value="1"/>
+  </listAttribute>
+  <booleanAttribute key="org.eclipse.jdt.launching.ATTR_USE_START_ON_FIRST_THREAD" value="true"/>
+  <stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/>
+  <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="com.google.cloud.dataflow.starter.StarterPipeline"/>
+  <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="google-cloud-dataflow-starter"/>
+  <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
+</launchConfiguration>
diff --git a/eclipse/starter/.settings/SERVICE.launch b/eclipse/starter/.settings/SERVICE.launch
new file mode 100644
index 0000000000000..4dce6b7bb0333
--- /dev/null
+++ b/eclipse/starter/.settings/SERVICE.launch
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+
+<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication">
+  <listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
+    <listEntry value="/google-cloud-dataflow-starter/src/main/java/com/google/cloud/dataflow/starter/StarterPipeline.java"/>
+  </listAttribute>
+  <listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
+    <listEntry value="1"/>
+  </listAttribute>
+  <booleanAttribute key="org.eclipse.jdt.launching.ATTR_USE_START_ON_FIRST_THREAD" value="true"/>
+  <stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/>
+  <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="com.google.cloud.dataflow.starter.StarterPipeline"/>
+  <stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="--runner=BlockingDataflowPipelineRunner&#10;--project=PUT_YOUR_GOOGLE_CLOUD_PROJECT_HERE&#10;--stagingLocation=gs://PUT_YOUR_GOOGLE_CLOUD_STORAGE_BUCKET_HERE/"/>
+  <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="google-cloud-dataflow-starter"/>
+  <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
+</launchConfiguration>
diff --git a/eclipse/starter/pom.xml b/eclipse/starter/pom.xml
new file mode 100644
index 0000000000000..1f5553e74576c
--- /dev/null
+++ b/eclipse/starter/pom.xml
@@ -0,0 +1,17 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <groupId>com.google.cloud.dataflow</groupId>
+  <artifactId>google-cloud-dataflow-starter</artifactId>
+  <version>0.0.1-SNAPSHOT</version>
+
+  <dependencies>
+    <dependency>
+      <groupId>com.google.cloud.dataflow</groupId>
+      <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
+      <version>LATEST</version>
+    </dependency>
+  </dependencies>
+</project>
diff --git a/eclipse/starter/src/main/java/com/google/cloud/dataflow/starter/StarterPipeline.java b/eclipse/starter/src/main/java/com/google/cloud/dataflow/starter/StarterPipeline.java
new file mode 100644
index 0000000000000..db2ed6ae675a4
--- /dev/null
+++ b/eclipse/starter/src/main/java/com/google/cloud/dataflow/starter/StarterPipeline.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.starter;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+
+/**
+ * A starter example for writing Google Cloud Dataflow programs.
+ *
+ * <p>The example takes two strings, converts them to their upper-case
+ * representation and prints them on the console.
+ *
+ * <p>To run this starter example locally using DirectPipelineRunner, just
+ * execute it without any additional parameters from your favorite development
+ * environment. In Eclipse, this corresponds to the existing 'LOCAL' run
+ * configuration.
+ *
+ * <p>To run this starter example using managed resource in Google Cloud
+ * Platform, you should specify the following command-line options:
+ *   --project=<YOUR_PROJECT_ID>
+ *   --stagingLocation=<STAGING_LOCATION_IN_CLOUD_STORAGE>
+ *   --runner=BlockingDataflowPipelineRunner
+ * In Eclipse, you can just modify the existing 'SERVICE' run configuration.
+ */
+@SuppressWarnings("serial")
+public class StarterPipeline {
+
+  public static void main(String[] args) {
+    Pipeline p = Pipeline.create(
+        PipelineOptionsFactory.fromArgs(args).withValidation().create());
+
+    p.apply(Create.of("Hello", "World"))
+    .apply(ParDo.of(new DoFn<String, String>() {
+      @Override
+      public void processElement(ProcessContext c) {
+        c.output(c.element().toUpperCase());
+      }
+    }))
+    .apply(ParDo.of(new DoFn<String, Void>() {
+      @Override
+      public void processElement(ProcessContext c)  {
+        System.out.println(c.element());
+      }
+    }));
+
+    p.run();
+  }
+}

From 11597b46ca605071fb41ec70d0eefd87d415d14a Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Fri, 13 Mar 2015 16:43:39 -0700
Subject: [PATCH 0272/1541] Add serialversionuid member to several classes that
 are serializable in Java 8.

---
 .../com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java  | 1 +
 .../java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
index 4d48d210a5464..cde606063293e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
@@ -45,6 +45,7 @@
  * @param <T> Type of records represented by the source.
  */
 public abstract class ByteOffsetBasedSource<T> extends Source<T> {
+  private static final long serialVersionUID = 0;
   private final long startOffset;
   private final long endOffset;
   private final long minShardSize;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index 865d923713de5..3b89fe7e39c35 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -54,6 +54,7 @@
  * @param <T> Type of records represented by the source.
  */
 public abstract class FileBasedSource<T> extends ByteOffsetBasedSource<T> {
+  private static final long serialVersionUID = 0;
   private static final Logger LOG = LoggerFactory.getLogger(FileBasedSource.class);
 
   private final String fileOrPatternSpec;

From 1d352f9fc0e3083aa5b3ff03682777c9ac33f0f3 Mon Sep 17 00:00:00 2001
From: Chikanaga Tomoyuki <t-chikanaga@groovenauts.jp>
Date: Mon, 16 Mar 2015 16:07:57 +0900
Subject: [PATCH 0273/1541] Fix javadoc markups and descriptions according to
 WordCount.

---
 .../dataflow/examples/WindowingWordCount.java   | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
index 72e005b2c50bc..ef2eac97836b8 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
@@ -41,18 +41,19 @@
  * example see:
  *   https://cloud.google.com/dataflow/java-sdk/wordcount-example
  *
- * To execute this pipeline locally, specify general pipeline configuration:
+ * <p> To execute this pipeline locally, specify general pipeline configuration:
  *   --project=<PROJECT ID>
- * and example configuration:
- *   --output=[<LOCAL FILE> | gs://<OUTPUT PATH>]
+ * and a local output file or output prefix on GCS:
+ *   --output=[<LOCAL FILE> | gs://<OUTPUT PREFIX>]
  *
- * To execute this pipeline using the Dataflow service, specify pipeline configuration:
- *   --project=<PROJECT ID> --stagingLocation=gs://<STAGING DIRECTORY>
+ * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ *   --project=<PROJECT ID>
+ *   --stagingLocation=gs://<STAGING DIRECTORY>
  *   --runner=BlockingDataflowPipelineRunner
- * and example configuration:
- *   --output=gs://<OUTPUT PATH>
+ * and an output prefix on GCS:
+ *   --output=gs://<OUTPUT PREFIX>
  *
- * The input file defaults to gs://dataflow-samples/shakespeare/kinglear.txt and can be
+ * <p> The input file defaults to gs://dataflow-samples/shakespeare/kinglear.txt and can be
  * overridden with --input.
  */
 public class WindowingWordCount {

From 6adbd754670717da80f61c69cb51c268cc23451a Mon Sep 17 00:00:00 2001
From: Chikanaga Tomoyuki <t-chikanaga@groovenauts.jp>
Date: Mon, 16 Mar 2015 16:53:27 +0900
Subject: [PATCH 0274/1541] fix a typo. setNumShard -> setNumShards.

---
 .../com/google/cloud/dataflow/examples/WindowingWordCount.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
index ef2eac97836b8..578bc96847202 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
@@ -136,7 +136,7 @@ private interface Options extends PipelineOptions {
     @Description("Number of output shards (0 if the system should choose automatically)")
     @Default.Integer(0)
     int getNumShards();
-    void setNumShard(int value);
+    void setNumShards(int value);
   }
 
   private static String getOutputLocation(Options options) {

From d625f34681f978c858593ed97d0fc2f641c03418 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 13 Mar 2015 15:25:59 -0700
Subject: [PATCH 0275/1541] Add support for printing help via the
 PipelineOptionsFactory either by programmatic calling printHelp or passing in
 --help as an argument.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88584969
---
 .../sdk/options/ApplicationNameOptions.java   |   8 +-
 .../dataflow/sdk/options/BigQueryOptions.java |   6 +-
 .../BlockingDataflowPipelineOptions.java      |   3 +
 .../sdk/options/CloudDebuggerOptions.java     |   6 +-
 .../options/DataflowPipelineDebugOptions.java |  84 ++++-
 .../sdk/options/DataflowPipelineOptions.java  |  53 ++-
 .../DataflowPipelineWorkerPoolOptions.java    |  53 ++-
 .../options/DataflowWorkerHarnessOptions.java |   9 +-
 .../options/DataflowWorkerLoggingOptions.java |  15 +-
 .../dataflow/sdk/options/Description.java     |   8 +-
 .../dataflow/sdk/options/GcpOptions.java      |  75 +++--
 .../dataflow/sdk/options/GcsOptions.java      |  20 +-
 .../sdk/options/GoogleApiDebugOptions.java    |  10 +-
 .../cloud/dataflow/sdk/options/Hidden.java    |  31 ++
 .../dataflow/sdk/options/PipelineOptions.java |   4 +-
 .../sdk/options/PipelineOptionsFactory.java   | 307 ++++++++++++++++--
 .../sdk/options/StreamingOptions.java         |   4 +-
 .../options/PipelineOptionsFactoryTest.java   | 156 +++++++++
 18 files changed, 723 insertions(+), 129 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Hidden.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java
index edbc57af3c2b3..d6d7db8cdf9fb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java
@@ -23,11 +23,11 @@ public interface ApplicationNameOptions extends PipelineOptions {
   /**
    * Name of application, for display purposes.
    * <p>
-   * Defaults to the name of the class which constructs the
-   * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner}.
+   * Defaults to the name of the class which constructs the {@link PipelineOptions}
+   * via the {@link PipelineOptionsFactory}.
    */
-  @Description("Application name. Defaults to the name of the class which "
-      + "constructs the Pipeline.")
+  @Description("Name of application for display purposes. Defaults to the name of the class which "
+      + "constructs the PipelineOptions via the PipelineOptionsFactory.")
   String getAppName();
   void setAppName(String value);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BigQueryOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BigQueryOptions.java
index 076ec5cc41d0d..d240d1c4b4076 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BigQueryOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BigQueryOptions.java
@@ -19,10 +19,12 @@
 /**
  * Properties needed when using BigQuery with the Dataflow SDK.
  */
+@Description("Options which are used to configure BigQuery. See "
+    + "https://cloud.google.com/bigquery/what-is-bigquery for details on BigQuery.")
 public interface BigQueryOptions extends ApplicationNameOptions, GcpOptions,
     PipelineOptions, StreamingOptions {
-  @Description("Temporary staging dataset ID for BigQuery "
-      + "table operations")
+  @Description("Temporary dataset for BigQuery table operations. "
+      + "Supported values are \"bigquery.googleapis.com/{dataset}\"")
   @Default.String("bigquery.googleapis.com/cloud_dataflow")
   String getTempDatasetId();
   void setTempDatasetId(String value);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BlockingDataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BlockingDataflowPipelineOptions.java
index ca30ea2c20e9c..d65f38c6c6b58 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BlockingDataflowPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BlockingDataflowPipelineOptions.java
@@ -25,11 +25,14 @@
 /**
  * Options which are used to configure the {@link BlockingDataflowPipelineRunner}.
  */
+@Description("Configure options on the BlockingDataflowPipelineRunner.")
 public interface BlockingDataflowPipelineOptions extends DataflowPipelineOptions {
   /**
    * Output stream for job status messages.
    */
+  @Description("Where messages generated during execution of the Dataflow job will be output.")
   @JsonIgnore
+  @Hidden
   @Default.InstanceFactory(StandardOutputFactory.class)
   PrintStream getJobMessageOutput();
   void setJobMessageOutput(PrintStream value);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
index 3fc2b6a5ff271..84710df258451 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
@@ -22,13 +22,17 @@
 /**
  * Options for controlling Cloud Debugger. These options are experimental and subject to change.
  */
+@Description("[Experimental] Used to configure the Cloud Debugger")
 public interface CloudDebuggerOptions {
 
   /**
    * User defined application version. Cloud Debugger uses it to group all
-   * running debuggee processes. Version should be different if users have
+   * running debugged processes. Version should be different if users have
    * multiple parallel runs of the same application with different inputs.
    */
+  @Description("User defined application version. Cloud Debugger uses it to group all "
+      + "running debugged processes. cdbgVersion should be different if users have "
+      + "multiple parallel runs of the same application with different inputs.")
   String getCdbgVersion();
   void setCdbgVersion(String value);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
index a5bc999be534e..937af6cf40bf7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
@@ -16,19 +16,25 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
+import com.google.api.services.dataflow.Dataflow;
 import com.google.cloud.dataflow.sdk.util.DataflowPathValidator;
 import com.google.cloud.dataflow.sdk.util.GcsStager;
 import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
 import com.google.cloud.dataflow.sdk.util.PathValidator;
 import com.google.cloud.dataflow.sdk.util.Stager;
+import com.google.cloud.dataflow.sdk.util.Transport;
 
 import com.fasterxml.jackson.annotation.JsonIgnore;
 
 import java.util.List;
 
 /**
- * Options used for testing and debugging the Dataflow SDK.
+ * Internal. Options used to control execution of the Dataflow SDK for
+ * debugging and testing purposes.
  */
+@Description("[Internal] Options used to control execution of the Dataflow SDK for "
+    + "debugging and testing purposes.")
+@Hidden
 public interface DataflowPipelineDebugOptions extends PipelineOptions {
   /**
    * Dataflow endpoint to use.
@@ -40,7 +46,8 @@ public interface DataflowPipelineDebugOptions extends PipelineOptions {
    * otherwise {@link #getApiRootUrl()} is used as the root
    * url.
    */
-  @Description("Cloud Dataflow Endpoint")
+  @Description("The URL for the Dataflow API. If the string contains \"://\""
+      + " will be treated as the entire URL, otherwise will be treated relative to apiRootUrl.")
   @Default.String("v1b3/projects/")
   String getDataflowEndpoint();
   void setDataflowEndpoint(String value);
@@ -51,62 +58,105 @@ public interface DataflowPipelineDebugOptions extends PipelineOptions {
    * <p> Dataflow provides a number of experimental features that can be enabled
    * with this flag.
    *
-   * <p> Please sync with the Dataflow team when enabling any experiments.
+   * <p> Please sync with the Dataflow team before enabling any experiments.
    */
-  @Description("Backend experiments to enable.")
+  @Description("[Experimental] Dataflow provides a number of experimental features that can "
+      + "be enabled with this flag. Please sync with the Dataflow team before enabling any "
+      + "experiments.")
   List<String> getExperiments();
   void setExperiments(List<String> value);
 
   /**
-   * The API endpoint to use when communicating with the Dataflow service.
+   * The endpoint to use with the Dataflow API. dataflowEndpoint can override this value
+   * if it contains an absolute URL, otherwise apiRootUrl will be combined with dataflowEndpoint
+   * to generate the full URL to communicate with the Dataflow API.
    */
-  @Description("Google Cloud root API")
+  @Description("The endpoint to use with the Dataflow API. dataflowEndpoint can override this "
+      + "value if it contains an absolute URL, otherwise apiRootUrl will be combined with "
+      + "dataflowEndpoint to generate the full URL to communicate with the Dataflow API.")
   @Default.String("https://dataflow.googleapis.com/")
   String getApiRootUrl();
   void setApiRootUrl(String value);
 
   /**
-   * The path to write the translated Dataflow specification out to
-   * at job submission time.
+   * The path to write the translated Dataflow job specification out to
+   * at job submission time. The Dataflow job specification will be represented in JSON
+   * format.
    */
-  @Description("File for writing dataflow job descriptions")
+  @Description("The path to write the translated Dataflow job specification out to "
+      + "at job submission time. The Dataflow job specification will be represented in JSON "
+      + "format.")
   String getDataflowJobFile();
   void setDataflowJobFile(String value);
 
   /**
-   * The name of the validator class used to validate path names.
+   * The class of the validator that should be created and used to validate paths.
+   * If pathValidator has not been set explicitly, an instance of this class will be
+   * constructed and used as the path validator.
    */
-  @Description("The validator class used to validate path names.")
+  @Description("The class of the validator that should be created and used to validate paths. "
+      + "If pathValidator has not been set explicitly, an instance of this class will be "
+      + "constructed and used as the path validator.")
   @Default.Class(DataflowPathValidator.class)
   Class<? extends PathValidator> getPathValidatorClass();
   void setPathValidatorClass(Class<? extends PathValidator> validatorClass);
 
   /**
-   * The validator class used to validate path names.
+   * The path validator instance that should be created and used to validate paths.
+   * If no path validator has been set explicitly, the default is to use the instance factory which
+   * constructs a path validator based upon the currently set pathValidatorClass.
    */
   @JsonIgnore
-  @Description("The validator class used to validate path names.")
+  @Description("The path validator instance that should be created and used to validate paths. "
+      + "If no path validator has been set explicitly, the default is to use the instance factory "
+      + "which constructs a path validator based upon the currently set pathValidatorClass.")
   @Default.InstanceFactory(PathValidatorFactory.class)
   PathValidator getPathValidator();
   void setPathValidator(PathValidator validator);
 
   /**
-   * The class used to stage files.
+   * The class responsible for staging resources to be accessible by workers
+   * during job execution.
    */
-  @Description("The class used to stage files.")
+  @Description("The class of the stager that should be created and used to stage resources. "
+      + "If stager has not been set explicitly, an instance of this class will be "
+      + "constructed and used as the resource stager.")
   @Default.Class(GcsStager.class)
   Class<? extends Stager> getStagerClass();
   void setStagerClass(Class<? extends Stager> stagerClass);
 
   /**
-   * The stager instance used to stage files.
+   * The resource stager instance that should be created and used to stage resources.
+   * If no stager has been set explicitly, the default is to use the instance factory
+   * which constructs a resource stager based upon the currently set stagerClass.
    */
   @JsonIgnore
-  @Description("The class use to stage packages.")
+  @Description("The resource stager instance that should be created and used to stage resources. "
+      + "If no stager has been set explicitly, the default is to use the instance factory "
+      + "which constructs a resource stager based upon the currently set stagerClass.")
   @Default.InstanceFactory(StagerFactory.class)
   Stager getStager();
   void setStager(Stager stager);
 
+  /**
+   * An instance of the Dataflow client. Defaults to creating a Dataflow client
+   * using the current set of options.
+   */
+  @JsonIgnore
+  @Description("An instance of the Dataflow client. Defaults to creating a Dataflow client "
+      + "using the current set of options.")
+  @Default.InstanceFactory(DataflowClientFactory.class)
+  Dataflow getDataflowClient();
+  void setDataflowClient(Dataflow value);
+
+  /** Returns the default Dataflow client built from the passed in PipelineOptions. */
+  public static class DataflowClientFactory implements DefaultValueFactory<Dataflow> {
+    @Override
+    public Dataflow create(PipelineOptions options) {
+        return Transport.newDataflowClient(options.as(DataflowPipelineOptions.class)).build();
+    }
+  }
+
   /**
    * Creates a {@link PathValidator} object using the class specified in
    * {@link #getPathValidatorClass()}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
index 5dec12031b2f5..bfcc47c1a8912 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
@@ -16,13 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
-import com.google.api.services.dataflow.Dataflow;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipeline;
-import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.common.base.MoreObjects;
 
-import com.fasterxml.jackson.annotation.JsonIgnore;
-
 import org.joda.time.DateTimeUtils;
 import org.joda.time.DateTimeZone;
 import org.joda.time.format.DateTimeFormat;
@@ -31,13 +27,14 @@
 /**
  * Options which can be used to configure the {@link DataflowPipeline}.
  */
+@Description("Options which configure the Dataflow pipeline.")
 public interface DataflowPipelineOptions extends
     PipelineOptions, GcpOptions, ApplicationNameOptions, DataflowPipelineDebugOptions,
     DataflowPipelineWorkerPoolOptions, BigQueryOptions,
     GcsOptions, StreamingOptions, CloudDebuggerOptions, DataflowWorkerLoggingOptions {
 
   /**
-   * GCS path for temporary files.
+   * GCS path for temporary files, e.g. gs://bucket/object
    * <p>
    * Must be a valid Cloud Storage url, beginning with the prefix "gs://"
    * <p>
@@ -45,31 +42,37 @@ public interface DataflowPipelineOptions extends
    * {@link #getTempLocation()} is not set, then the Dataflow pipeline defaults to using
    * {@link #getStagingLocation()}.
    */
-  @Description("GCS path for temporary files, eg \"gs://bucket/object\".  "
-      + "Defaults to stagingLocation.")
+  @Description("GCS path for temporary files, eg \"gs://bucket/object\". "
+      + "Must be a valid Cloud Storage url, beginning with the prefix \"gs://\". "
+      + "At least one of tempLocation or stagingLocation must be set. If tempLocation is unset, "
+      + "defaults to using stagingLocation.")
   String getTempLocation();
   void setTempLocation(String value);
 
   /**
-   * GCS path for staging local files.
+   * GCS path for staging local files, e.g. gs://bucket/object
    * <p>
-   * If {@link #getStagingLocation()} is not set, then the Dataflow pipeline defaults to a staging
-   * directory within {@link #getTempLocation}.
+   * Must be a valid Cloud Storage url, beginning with the prefix "gs://"
    * <p>
-   * At least one of {@link #getTempLocation()} or {@link #getStagingLocation()} must be set.
+   * At least one of {@link #getTempLocation()} or {@link #getStagingLocation()} must be set. If
+   * {@link #getTempLocation()} is not set, then the Dataflow pipeline defaults to using
+   * {@link #getStagingLocation()}.
    */
-  @Description("GCS staging path.  Defaults to a staging directory"
-      + " with the tempLocation")
+  @Description("GCS path for staging local files, e.g. \"gs://bucket/object\". "
+      + "Must be a valid Cloud Storage url, beginning with the prefix \"gs://\". "
+      + "At least one of stagingLocation or tempLocation must be set. If stagingLocation is unset, "
+      + "defaults to using tempLocation.")
   String getStagingLocation();
   void setStagingLocation(String value);
 
   /**
-   * The job name is used as an idempotence key within the Dataflow service. If there
-   * is an existing job which is currently active, another job with the same name will
-   * not be able to be created.
+   * The Dataflow job name is used as an idempotence key within the Dataflow service.
+   * If there is an existing job which is currently active, another active job with the same
+   * name will not be able to be created. Defaults to using the ApplicationName-UserName-Date.
    */
-  @Description("Dataflow job name, to uniquely identify active jobs. "
-      + "Defaults to using the ApplicationName-UserName-Date.")
+  @Description("The Dataflow job name is used as an idempotence key within the Dataflow service. "
+      + "If there is an existing job which is currently active, another active job with the same "
+      + "name will not be able to be created. Defaults to using the ApplicationName-UserName-Date.")
   @Default.InstanceFactory(JobNameFactory.class)
   String getJobName();
   void setJobName(String value);
@@ -101,18 +104,4 @@ public String create(PipelineOptions options) {
       return normalizedAppName + "-" + normalizedUserName + "-" + datePart;
     }
   }
-
-  /** Alternative Dataflow client. */
-  @JsonIgnore
-  @Default.InstanceFactory(DataflowClientFactory.class)
-  Dataflow getDataflowClient();
-  void setDataflowClient(Dataflow value);
-
-  /** Returns the default Dataflow client built from the passed in PipelineOptions. */
-  public static class DataflowClientFactory implements DefaultValueFactory<Dataflow> {
-    @Override
-    public Dataflow create(PipelineOptions options) {
-        return Transport.newDataflowClient(options.as(DataflowPipelineOptions.class)).build();
-    }
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index 09571f1d8beec..9c06e6f3acb29 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -21,19 +21,23 @@
 /**
  * Options which are used to configure the Dataflow pipeline worker pool.
  */
+@Description("Options which are used to configure the Dataflow pipeline worker pool.")
 public interface DataflowPipelineWorkerPoolOptions extends PipelineOptions {
   /**
    * Disk source image to use by VMs for jobs.
    * @see <a href="https://developers.google.com/compute/docs/images">Compute Engine Images</a>
    */
-  @Description("Dataflow VM disk image.")
+  @Description("Disk source image to use by VMs for jobs. See "
+      + "https://developers.google.com/compute/docs/images for further details.")
   String getDiskSourceImage();
   void setDiskSourceImage(String value);
 
   /**
-   * Number of workers to use in remote execution.
+   * Number of workers to use when executing the Dataflow job.
    */
-  @Description("Number of workers, when using remote execution")
+  @Description("Number of workers to use when executing the Dataflow job. Note that "
+      + "selection of an autoscaling algorithm other then \"NONE\" will effect the "
+      + "size of the worker pool.")
   @Default.Integer(3)
   int getNumWorkers();
   void setNumWorkers(int value);
@@ -60,16 +64,19 @@ public String getAlgorithm() {
     }
   }
 
-  @Description("(experimental) The autoscaling algorithm to use for the workerpool.")
+  @Description("[Experimental] The autoscaling algorithm to use for the workerpool. "
+      + "NONE: does not change the size of the worker pool. "
+      + "BASIC: autoscale the worker pool size up to maxNumWorkers until the job completes.")
   @Default.Enum("NONE")
   AutoscalingAlgorithmType getAutoscalingAlgorithm();
   void setAutoscalingAlgorithm(AutoscalingAlgorithmType value);
 
   /**
-   * Max number of workers to use when using workerpool autoscaling.
+   * The maximum number of workers to use when using workerpool autoscaling.
    * This option is experimental and subject to change.
    */
-  @Description("Max number of workers to use, when using autoscaling")
+  @Description("[Experimental] The maximum number of workers to use when using workerpool "
+      + "autoscaling.")
   @Default.Integer(20)
   int getMaxNumWorkers();
   void setMaxNumWorkers(int value);
@@ -85,15 +92,15 @@ public String getAlgorithm() {
    * GCE <a href="https://developers.google.com/compute/docs/zones"
    * >availability zone</a> for launching workers.
    *
-   * <p> Default is up to the service.
+   * <p> Default is up to the Dataflow service.
    */
   @Description("GCE availability zone for launching workers. "
-      + "Default is up to the service")
+      + "Default is up to the Dataflow service.")
   String getZone();
   void setZone(String value);
 
   /**
-   * Type of API for handling cluster management,i.e. resizing, healthchecking, etc.
+   * Type of API for handling cluster management, i.e. resizing, healthchecking, etc.
    */
   public enum ClusterManagerApiType {
     COMPUTE_ENGINE("compute.googleapis.com"),
@@ -110,15 +117,25 @@ public String getApiServiceName() {
     }
   }
 
-  @Description("Type of API for handling cluster management,i.e. resizing, healthchecking, etc.")
+  /**
+   * Type of API for handling cluster management, i.e. resizing, healthchecking, etc.
+   */
+  @Description("Type of API for handling cluster management, i.e. resizing, healthchecking, etc.")
   @Default.Enum("COMPUTE_ENGINE")
   ClusterManagerApiType getClusterManagerApi();
   void setClusterManagerApi(ClusterManagerApiType value);
 
   /**
-   * Machine type to create worker VMs as.
+   * Machine type to create Dataflow worker VMs as.
+   * <p>
+   * See <a href="https://cloud.google.com/compute/docs/machine-types">GCE machine types</a>
+   * for a list of valid options.
+   * <p>
+   * If unset, the Dataflow service will choose a reasonable default.
    */
-  @Description("Dataflow VM machine type for workers.")
+  @Description("Machine type to create Dataflow worker VMs as. See "
+      + "https://cloud.google.com/compute/docs/machine-types for a list of valid options. "
+      + "If unset, the Dataflow service will choose a reasonable default.")
   String getWorkerMachineType();
   void setWorkerMachineType(String value);
 
@@ -141,23 +158,25 @@ public String getTeardownPolicyName() {
   }
 
   /**
-   * Teardown policy for the VMs.
+   * The teardown policy for the VMs.
    *
    * <p> By default this is left unset and the service sets the default policy.
    */
-  @Description("The teardown policy for the VMs.")
+  @Description("The teardown policy for the VMs. By default this is left unset "
+      + "and the service sets the default policy.")
   TeardownPolicy getTeardownPolicy();
   void setTeardownPolicy(TeardownPolicy value);
 
   /**
    * List of local files to make available to workers.
    * <p>
-   * Jars are placed on the worker's classpath.
+   * Files are placed on the worker's classpath.
    * <p>
    * The default value is the list of jars from the main program's classpath.
    */
-  @Description("Files to stage on GCS and make available to "
-      + "workers.  The default value is all files from the classpath.")
+  @Description("Files to stage on GCS and make available to workers. "
+      + "Files are placed on the worker's classpath. "
+      + "The default value is all files from the classpath.")
   List<String> getFilesToStage();
   void setFilesToStage(List<String> value);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java
index adb45c8a7481b..6f69db26f96c1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java
@@ -20,16 +20,21 @@
  * Options which are used exclusively within the Dataflow worker harness.
  * These options have no effect at pipeline creation time.
  */
+@Description("[Internal] Options which are used exclusively within the Dataflow worker harness. "
+    + "These options have no effect at pipeline creation time.")
+@Hidden
 public interface DataflowWorkerHarnessOptions extends DataflowPipelineOptions {
   /**
-   * ID of the worker running this pipeline.
+   * The identity of the worker running this pipeline.
    */
+  @Description("The identity of the worker running this pipeline.")
   String getWorkerId();
   void setWorkerId(String value);
 
   /**
-   * ID of the job this pipeline represents.
+   * The identity of the Dataflow job.
    */
+  @Description("The identity of the Dataflow job.")
   String getJobId();
   void setJobId(String value);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
index 3f59c8365495c..2ba61c6ce42b0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
@@ -25,6 +25,7 @@
 /**
  * Options which are used to control logging configuration on the Dataflow worker.
  */
+@Description("Options which are used to control logging configuration on the Dataflow worker.")
 public interface DataflowWorkerLoggingOptions extends PipelineOptions {
   /**
    * The set of log levels which can be used on the Dataflow worker.
@@ -34,9 +35,9 @@ public enum Level {
   }
 
   /**
-   * This option controls the default log level of all loggers without a
-   * log level override.
+   * This option controls the default log level of all loggers without a log level override.
    */
+  @Description("Controls the default log level of all loggers without a log level override.")
   @Default.Enum("INFO")
   Level getDefaultWorkerLogLevel();
   void setDefaultWorkerLogLevel(Level level);
@@ -51,6 +52,16 @@ public enum Level {
    * the expected format is {@code Name#Level}, further details on
    * {@link WorkerLogLevelOverride#create(String)}.
    */
+  @Description("This option controls the log levels for specifically named loggers. "
+      + "The expected format is Name#Level. The Dataflow worker uses java.util.logging which "
+      + "supports a logging hierarchy based off of names which are \".\" separated. "
+      + "For example, by specifying the value \"a.b.c.Foo#DEBUG\", the logger for the class "
+      + "\"a.b.c.Foo\" will be configured to output logs at the DEBUG level. Similarly, "
+      + "by specifying the value \"a.b.c#WARN\", all loggers underneath the \"a.b.c\" package "
+      + "will be configured to output logs at the WARN level. Note that multiple overrides can "
+      + "be specified and that later values with equivalent names override earlier values. Also, "
+      + "note that when multiple overrides are specified, the exact name followed by the closest "
+      + "parent takes precedence.")
   WorkerLogLevelOverride[] getWorkerLogLevelOverrides();
   void setWorkerLogLevelOverrides(WorkerLogLevelOverride... workerLogLevelOverrides);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java
index b02d3b9f13984..eaf7aac7d1fa6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java
@@ -22,9 +22,13 @@
 import java.lang.annotation.Target;
 
 /**
- * Machine-readable description for options in {@link PipelineOptions}.
+ * Descriptions are used to generate human readable output when the --help
+ * command is specified. Description annotations placed on interfaces which extend
+ * {@link PipelineOptions} to describe groups of related options. Description annotations
+ * placed on getter methods will be used to provide human readable information
+ * for the specific option.
  */
-@Target(value = ElementType.METHOD)
+@Target({ElementType.METHOD, ElementType.TYPE})
 @Retention(RetentionPolicy.RUNTIME)
 public @interface Description {
   String value();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
index a884d3ec7ebb4..7da5277e48777 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
@@ -58,23 +58,16 @@
  * application default credentials</a> falling back to gcloud. The other options can be
  * used by setting the corresponding properties.
  */
+@Description("Options used to configure Google Cloud Platform project and credentials.")
 public interface GcpOptions extends GoogleApiDebugOptions, PipelineOptions {
   /**
    * Project id to use when launching jobs.
    */
-  @Description("Project id.  Required when running a Dataflow in the cloud.")
+  @Description("Project id. Required when running a Dataflow in the cloud. "
+      + "See https://cloud.google.com/storage/docs/projects for further details.")
   String getProject();
   void setProject(String value);
 
-  /**
-   * This option controls which file to use when attempting to create the credentials using the
-   * OAuth 2 webflow.
-   */
-  @JsonIgnore
-  @Description("Path to a file containing Google API secret")
-  String getSecretsFile();
-  void setSecretsFile(String value);
-
   /**
    * This option controls which file to use when attempting to create the credentials using the
    * service account method.
@@ -83,7 +76,9 @@ public interface GcpOptions extends GoogleApiDebugOptions, PipelineOptions {
    * {@link GcpOptions#getServiceAccountName() serviceAccountName}.
    */
   @JsonIgnore
-  @Description("Path to a file containing the P12 service credentials")
+  @Description("Controls which file to use when attempting to create the credentials "
+      + "using the service account method. This option if specified, needs to be combined with "
+      + "the serviceAccountName option.")
   String getServiceAccountKeyfile();
   void setServiceAccountKeyfile(String value);
 
@@ -95,7 +90,9 @@ public interface GcpOptions extends GoogleApiDebugOptions, PipelineOptions {
    * {@link GcpOptions#getServiceAccountKeyfile() serviceAccountKeyfile}.
    */
   @JsonIgnore
-  @Description("Name of the service account for Google APIs")
+  @Description("Controls which service account to use when attempting to create the credentials "
+      + "using the service account method. This option if specified, needs to be combined with "
+      + "the serviceAccountKeyfile option.")
   String getServiceAccountName();
   void setServiceAccountName(String value);
 
@@ -106,9 +103,33 @@ public interface GcpOptions extends GoogleApiDebugOptions, PipelineOptions {
   void setGCloudPath(String value);
 
   /**
-   * Directory for storing dataflow credentials.
+   * This option controls which file to use when attempting to create the credentials
+   * using the OAuth 2 webflow. After the OAuth2 webflow, the credentials will be stored
+   * within credentialDir.
    */
-  @Description("Directory for storing dataflow credentials")
+  @JsonIgnore
+  @Description("This option controls which file to use when attempting to create the credentials "
+      + "using the OAuth 2 webflow. After the OAuth2 webflow, the credentials will be stored "
+      + "within credentialDir.")
+  String getSecretsFile();
+  void setSecretsFile(String value);
+
+  /**
+   * This option controls which credential store to use when creating the credentials
+   * using the OAuth 2 webflow.
+   */
+  @Description("This option controls which credential store to use when creating the credentials "
+      + "using the OAuth 2 webflow.")
+  @Default.String("cloud_dataflow")
+  String getCredentialId();
+  void setCredentialId(String value);
+
+  /**
+   * Directory for storing dataflow credentials after execution of the OAuth 2 webflow. Defaults
+   * to using the $HOME/.store/data-flow directory.
+   */
+  @Description("Directory for storing dataflow credentials after execution of the OAuth 2 webflow. "
+      + "Defaults to using the $HOME/.store/data-flow directory.")
   @Default.InstanceFactory(CredentialDirFactory.class)
   String getCredentialDir();
   void setCredentialDir(String value);
@@ -126,22 +147,30 @@ public String create(PipelineOptions options) {
     }
   }
 
-  @Description("The credential identifier when using a persistent"
-      + " credential store")
-  @Default.String("cloud_dataflow")
-  String getCredentialId();
-  void setCredentialId(String value);
-
-  @Description("The factory class used to create oauth credentials")
+  /**
+   * The class of the credential factory that should be created and used to create
+   * credentials. If gcpCredential has not been set explicitly, an instance of this class will
+   * be constructed and used as a credential factory.
+   */
+  @Description("The class of the credential factory that should be created and used to create "
+      + "credentials. If gcpCredential has not been set explicitly, an instance of this class will "
+      + "be constructed and used as a credential factory. The default credential factory will")
   @Default.Class(GcpCredentialFactory.class)
   Class<? extends CredentialFactory> getCredentialFactoryClass();
   void setCredentialFactoryClass(
       Class<? extends CredentialFactory> credentialFactoryClass);
 
-  /** Alternative Google Cloud Platform Credential. */
+  /**
+   * The credential instance that should be used to authenticate against GCP services.
+   * If no credential has been set explicitly, the default is to use the instance factory
+   * which constructs a credential based upon the currently set credentialFactoryClass.
+   */
   @JsonIgnore
-  @Description("Google Cloud Platform user credentials.")
+  @Description("The credential instance that should be used to authenticate against GCP services. "
+      + "If no credential has been set explicitly, the default is to use the instance factory "
+      + "which constructs a credential based upon the currently set credentialFactoryClass.")
   @Default.InstanceFactory(GcpUserCredentialsFactory.class)
+  @Hidden
   Credential getGcpCredential();
   void setGcpCredential(Credential value);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
index 3136a8a42985e..9c52d5a3f97d7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
@@ -33,17 +33,29 @@
  */
 public interface GcsOptions extends
     ApplicationNameOptions, GcpOptions, PipelineOptions {
-  /** Alternative GcsUtil instance. */
+  /**
+   * The GcsUtil instance that should be used to communicate with Google Cloud Storage.
+   */
   @JsonIgnore
+  @Description("The GcsUtil instance that should be used to communicate with Google Cloud Storage.")
   @Default.InstanceFactory(GcsUtil.GcsUtilFactory.class)
+  @Hidden
   GcsUtil getGcsUtil();
   void setGcsUtil(GcsUtil value);
 
-  ////////////////////////////////////////////////////////////////////////////
-  // Allows the user to provide an alternative ExecutorService if their
-  // environment does not support the default implementation.
+  /**
+   * The ExecutorService instance to use to create threads, can be overridden to specify an
+   * ExecutorService which is compatible with the users environment. If unset, the
+   * default is to create an ExecutorService with an unbounded number of threads which
+   * is compatible with Google AppEngine.
+   */
   @JsonIgnore
+  @Description("The ExecutorService instance to use to create multiple threads. can be overridden "
+      + "to specify an ExecutorService which is compatible with the users environment. If unset, "
+      + "the default is to create an ExecutorService with an unbounded number of threads which "
+      + "is compatible with Google AppEngine.")
   @Default.InstanceFactory(ExecutorServiceFactory.class)
+  @Hidden
   ExecutorService getExecutorService();
   void setExecutorService(ExecutorService value);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
index 4b3d69c87a9e8..3af9b000aebf0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
@@ -29,7 +29,7 @@
 import java.util.regex.Pattern;
 
 /**
- * These options configure debug settings for Google API clients generated by the Dataflow SDK.
+ * These options configure debug settings for Google API clients created within the Dataflow SDK.
  */
 public interface GoogleApiDebugOptions extends PipelineOptions {
   /**
@@ -37,6 +37,14 @@ public interface GoogleApiDebugOptions extends PipelineOptions {
    * A tracing token must be requested from Google to be able to use this option.
    * An invalid tracing token will result in 400 errors from Google when the API is invoked.
    */
+  @Description("This option enables tracing of API calls to Google services used within the "
+      + "Dataflow SDK. Values are expected in the format \"ApiName#TracingToken\" where the "
+      + "ApiName represents the request classes canonical name. The TracingToken must be requested "
+      + "from Google to be able to use this option. An invalid tracing token will result in HTTP "
+      + "400 errors from Google when the API is invoked. Note, that by enabling this option, the "
+      + "contents of the requests to and from Google Cloud services will be made available to "
+      + "Google. For example, by specifiying \"Dataflow#TracingToken\", all calls to the Dataflow "
+      + "service will be made available to Google.")
   GoogleApiTracer[] getGoogleApiTrace();
   void setGoogleApiTrace(GoogleApiTracer... commands);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Hidden.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Hidden.java
new file mode 100644
index 0000000000000..8cfa6b7ba2f0f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Hidden.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * Methods and/or interfaces annotated with {@code @Hidden} will be suppressed from
+ * being output when {@code --help} is specified on the command-line.
+ */
+@Target({ElementType.METHOD, ElementType.TYPE})
+@Retention(RetentionPolicy.RUNTIME)
+public @interface Hidden {
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
index 1fb5d9aabb63c..9e75875ebfa0b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
@@ -56,7 +56,9 @@ public interface PipelineOptions {
   <T extends PipelineOptions> T as(Class<T> kls);
 
   @Validation.Required
-  @Description("The runner which will be used when executing the pipeline.")
+  @Description("The pipeline runner which will be used to execute the pipeline. "
+      + "For registered runners, the class name can be specified, otherwise the fully"
+      + "qualified name needs to be specified.")
   @Default.Class(DirectPipelineRunner.class)
   Class<? extends PipelineRunner<?>> getRunner();
   void setRunner(Class<? extends PipelineRunner<?>> kls);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 1e765470c4b46..e8afb3452aeed 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -22,12 +22,15 @@
 import com.google.cloud.dataflow.sdk.util.common.ReflectHelpers;
 import com.google.common.base.Equivalence;
 import com.google.common.base.Function;
+import com.google.common.base.Joiner;
+import com.google.common.base.Optional;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Predicate;
 import com.google.common.base.Throwables;
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.Collections2;
 import com.google.common.collect.FluentIterable;
 import com.google.common.collect.HashMultimap;
-import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableListMultimap;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableSet;
@@ -52,6 +55,8 @@
 import java.beans.Introspector;
 import java.beans.PropertyDescriptor;
 import java.io.IOException;
+import java.io.PrintStream;
+import java.lang.annotation.Annotation;
 import java.lang.reflect.Method;
 import java.lang.reflect.Modifier;
 import java.lang.reflect.Proxy;
@@ -62,11 +67,13 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.NoSuchElementException;
 import java.util.Queue;
 import java.util.ServiceLoader;
 import java.util.Set;
 import java.util.SortedMap;
 import java.util.SortedSet;
+import java.util.TreeSet;
 
 /**
  * Constructs a {@link PipelineOptions} or any derived interface which is composable to any other
@@ -136,6 +143,15 @@ public static <T extends PipelineOptions> T as(Class<T> klass) {
    * By default, strict parsing is enabled and arguments must conform to be either
    * {@code --booleanArgName} or {@code --argName=argValue}. Strict parsing can be disabled with
    * {@link Builder#withoutStrictParsing()}.
+   * <p>
+   * Help information can be output to {@link System#out} by specifying {@code --help} as an
+   * argument. After help is printed, the application will exit. Specifying only {@code --help}
+   * will print out the list of
+   * {@link PipelineOptionsFactory#getRegisteredOptions() registered options}
+   * by invoking {@link PipelineOptionsFactory#printHelp(PrintStream)}. Specifying
+   * {@code --help=PipelineOptionsClassName} will print out detailed usage information about the
+   * specifically requested PipelineOptions by invoking
+   * {@link PipelineOptionsFactory#printHelp(PrintStream, Class)}.
    */
   public static Builder fromArgs(String[] args) {
     return new Builder().fromArgs(args);
@@ -193,6 +209,15 @@ private Builder(String[] args, boolean validation,
      * By default, strict parsing is enabled and arguments must conform to be either
      * {@code --booleanArgName} or {@code --argName=argValue}. Strict parsing can be disabled with
      * {@link Builder#withoutStrictParsing()}.
+     * <p>
+     * Help information can be output to {@link System#out} by specifying {@code --help} as an
+     * argument. After help is printed, the application will exit. Specifying only {@code --help}
+     * will print out the list of
+     * {@link PipelineOptionsFactory#getRegisteredOptions() registered options}
+     * by invoking {@link PipelineOptionsFactory#printHelp(PrintStream)}. Specifying
+     * {@code --help=PipelineOptionsClassName} will print out detailed usage information about the
+     * specifically requested PipelineOptions by invoking
+     * {@link PipelineOptionsFactory#printHelp(PrintStream, Class)}.
      */
     public Builder fromArgs(String[] args) {
       Preconditions.checkNotNull(args, "Arguments should not be null.");
@@ -243,6 +268,7 @@ public <T extends PipelineOptions> T as(Class<T> klass) {
       if (args != null) {
         ListMultimap<String, String> options = parseCommandLine(args, strictParsing);
         LOG.debug("Provided Arguments: {}", options);
+        printHelpUsageAndExitIfNeeded(options, System.out, true /* exit */);
         initialOptions = parseObjects(klass, options, strictParsing);
       }
 
@@ -263,6 +289,73 @@ public <T extends PipelineOptions> T as(Class<T> klass) {
     }
   }
 
+  /**
+   * Determines whether the generic {@code --help} was requested or help was
+   * requested for a specific class and invokes the appropriate
+   * {@link PipelineOptionsFactory#printHelp(PrintStream)} and
+   * {@link PipelineOptionsFactory#printHelp(PrintStream, Class)} variant.
+   * Prints to the specified {@link PrintStream}, and exits if requested.
+   * <p>
+   * Visible for testing.
+   * {@code printStream} and {@code exit} used for testing.
+   */
+  @SuppressWarnings("unchecked")
+  static boolean printHelpUsageAndExitIfNeeded(ListMultimap<String, String> options,
+      PrintStream printStream, boolean exit) {
+    if (options.containsKey("help")) {
+      final String helpOption = Iterables.getOnlyElement(options.get("help"));
+
+      // Print the generic help if only --help was specified.
+      if (Boolean.TRUE.toString().equals(helpOption)) {
+        printHelp(printStream);
+        if (exit) {
+          System.exit(0);
+        } else {
+          return true;
+        }
+      }
+
+      // Otherwise attempt to print the specific help option.
+      try {
+        Class<?> klass = Class.forName(helpOption);
+        if (!PipelineOptions.class.isAssignableFrom(klass)) {
+          throw new ClassNotFoundException("PipelineOptions of type " + klass + " not found.");
+        }
+        printHelp(printStream, (Class<? extends PipelineOptions>) klass);
+      } catch (ClassNotFoundException e) {
+        // If we didn't find an exact match, look for any that match the class name.
+        Iterable<Class<? extends PipelineOptions>> matches = Iterables.filter(
+            getRegisteredOptions(),
+            new Predicate<Class<? extends PipelineOptions>>() {
+              @Override
+              public boolean apply(Class<? extends PipelineOptions> input) {
+                if (helpOption.contains(".")) {
+                  return input.getName().endsWith(helpOption);
+                } else {
+                  return input.getSimpleName().equals(helpOption);
+                }
+              }
+          });
+        try {
+          printHelp(printStream, Iterables.getOnlyElement(matches));
+        } catch (NoSuchElementException exception) {
+          printStream.format("Unable to find option %s.%n", helpOption);
+          printHelp(printStream);
+        } catch (IllegalArgumentException exception) {
+          printStream.format("Multiple matches found for %s: %s.%n", helpOption,
+              Iterables.transform(matches, ReflectHelpers.CLASS_NAME));
+          printHelp(printStream);
+        }
+      }
+      if (exit) {
+        System.exit(0);
+      } else {
+        return true;
+      }
+    }
+    return false;
+  }
+
   /**
    * Returns the simple name of the calling class using the current threads stack.
    */
@@ -341,6 +434,9 @@ Class<T> getProxyClass() {
   private static final Map<Set<Class<? extends PipelineOptions>>, Registration<?>> COMBINED_CACHE =
       Maps.newConcurrentMap();
 
+  /** The width at which options should be output. */
+  private static final int TERMINAL_WIDTH = 80;
+
   static {
     try {
       IGNORED_METHODS = ImmutableSet.<Method>builder()
@@ -479,6 +575,162 @@ public static Set<Class<? extends PipelineOptions>> getRegisteredOptions() {
     return Collections.unmodifiableSet(REGISTERED_OPTIONS);
   }
 
+  /**
+   * Outputs the set of registered options with the PipelineOptionsFactory
+   * with a description for each one if available to the output stream. This output
+   * is pretty printed and meant to be human readable. This method will attempt to
+   * format its output to be compatible with a terminal window.
+   */
+  public static void printHelp(PrintStream out) {
+    Preconditions.checkNotNull(out);
+    out.println("The set of registered options are:");
+    Set<Class<? extends PipelineOptions>> sortedOptions =
+        new TreeSet<>(ClassNameComparator.INSTANCE);
+    sortedOptions.addAll(REGISTERED_OPTIONS);
+    for (Class<? extends PipelineOptions> kls : sortedOptions) {
+      out.format("  %s%n", kls.getName());
+    }
+    out.format("%nUse --help=<OptionsName> for detailed help. For example:%n"
+        + "  --help=DataflowPipelineOptions <short names valid for registered options>%n"
+        + "  --help=com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions%n");
+  }
+
+  /**
+   * Outputs the set of options available to be set for the passed in {@link PipelineOptions}
+   * interface. The output is in a human readable format. The format is:
+   * <pre>
+   * OptionGroup:
+   *     ... option group description ...
+   *
+   *  --option1={@code <type>} or list of valid enum choices
+   *     Default: value (if available, see {@link Default})
+   *     ... option description ... (if available, see {@link Description})
+   *  --option2={@code <type>} or list of valid enum choices
+   *     Default: value (if available, see {@link Default})
+   *     ... option description ... (if available, see {@link Description})
+   * </pre>
+   * This method will attempt to format its output to be compatible with a terminal window.
+   */
+  public static void printHelp(PrintStream out, Class<? extends PipelineOptions> iface) {
+    Preconditions.checkNotNull(out);
+    Preconditions.checkNotNull(iface);
+    validateWellFormed(iface, REGISTERED_OPTIONS);
+
+    Iterable<Method> methods = getClosureOfMethodsOnInterface(iface);
+    ListMultimap<Class<?>, Method> ifaceToMethods = ArrayListMultimap.create();
+    for (Method method : methods) {
+      // Process only methods which are not marked as hidden.
+      if (method.getAnnotation(Hidden.class) == null) {
+        ifaceToMethods.put(method.getDeclaringClass(), method);
+      }
+    }
+    SortedSet<Class<?>> ifaces = new TreeSet<>(ClassNameComparator.INSTANCE);
+    // Keep interfaces which are not marked as hidden.
+    ifaces.addAll(Collections2.filter(ifaceToMethods.keySet(), new Predicate<Class<?>>() {
+      @Override
+      public boolean apply(Class<?> input) {
+        return input.getAnnotation(Hidden.class) == null;
+      }
+    }));
+    for (Class<?> currentIface : ifaces) {
+      Map<String, Method> propertyNamesToGetters =
+          getPropertyNamesToGetters(ifaceToMethods.get(currentIface));
+
+      // Don't output anything if there are no defined options
+      if (propertyNamesToGetters.isEmpty()) {
+        continue;
+      }
+
+      out.format("%s:%n", currentIface.getName());
+      prettyPrintDescription(out, currentIface.getAnnotation(Description.class));
+
+      out.println();
+
+      List<String> lists = Lists.newArrayList(propertyNamesToGetters.keySet());
+      Collections.sort(lists, String.CASE_INSENSITIVE_ORDER);
+      for (String propertyName : lists) {
+        Method method = propertyNamesToGetters.get(propertyName);
+        String printableType = method.getReturnType().getSimpleName();
+        if (method.getReturnType().isEnum()) {
+          printableType = Joiner.on(" | ").join(method.getReturnType().getEnumConstants());
+        }
+        out.format("  --%s=<%s>%n", propertyName, printableType);
+        Optional<String> defaultValue = getDefaultValueFromAnnotation(method);
+        if (defaultValue.isPresent()) {
+          out.format("    Default: %s%n", defaultValue.get());
+        }
+        prettyPrintDescription(out, method.getAnnotation(Description.class));
+      }
+      out.println();
+    }
+  }
+
+  /**
+   * Outputs the value of the description, breaking up long lines on white space characters
+   * and attempting to honor a line limit of {@code TERMINAL_WIDTH}.
+   */
+  private static void prettyPrintDescription(PrintStream out, Description description) {
+    final String spacing = "   ";
+    if (description == null || description.value() == null) {
+      return;
+    }
+
+    String[] words = description.value().split("\\s+");
+    if (words.length == 0) {
+      return;
+    }
+
+    out.print(spacing);
+    int lineLength = spacing.length();
+    for (int i = 0; i < words.length; ++i) {
+      out.print(" ");
+      out.print(words[i]);
+      lineLength += 1 + words[i].length();
+
+      // If the next word takes us over the terminal width, then goto the next line.
+      if (i + 1 != words.length && words[i + 1].length() + lineLength + 1 > TERMINAL_WIDTH) {
+        out.println();
+        out.print(spacing);
+        lineLength = spacing.length();
+      }
+    }
+    out.println();
+  }
+
+  /**
+   * Returns a string representation of the {@link Default} value on the passed in method.
+   */
+  private static Optional<String> getDefaultValueFromAnnotation(Method method) {
+    for (Annotation annotation : method.getAnnotations()) {
+      if (annotation instanceof Default.Class) {
+        return Optional.of(((Default.Class) annotation).value().getSimpleName());
+      } else if (annotation instanceof Default.String) {
+        return Optional.of(((Default.String) annotation).value());
+      } else if (annotation instanceof Default.Boolean) {
+        return Optional.of(Boolean.toString(((Default.Boolean) annotation).value()));
+      } else if (annotation instanceof Default.Character) {
+        return Optional.of(Character.toString(((Default.Character) annotation).value()));
+      } else if (annotation instanceof Default.Byte) {
+        return Optional.of(Byte.toString(((Default.Byte) annotation).value()));
+      } else if (annotation instanceof Default.Short) {
+        return Optional.of(Short.toString(((Default.Short) annotation).value()));
+      } else if (annotation instanceof Default.Integer) {
+        return Optional.of(Integer.toString(((Default.Integer) annotation).value()));
+      } else if (annotation instanceof Default.Long) {
+        return Optional.of(Long.toString(((Default.Long) annotation).value()));
+      } else if (annotation instanceof Default.Float) {
+        return Optional.of(Float.toString(((Default.Float) annotation).value()));
+      } else if (annotation instanceof Default.Double) {
+        return Optional.of(Double.toString(((Default.Double) annotation).value()));
+      } else if (annotation instanceof Default.Enum) {
+        return Optional.of(((Default.Enum) annotation).value());
+      } else if (annotation instanceof Default.InstanceFactory) {
+        return Optional.of(((Default.InstanceFactory) annotation).value().getSimpleName());
+      }
+    }
+    return Optional.absent();
+  }
+
   static Map<String, Class<? extends PipelineRunner<?>>> getRegisteredRunners() {
     return SUPPORTED_PIPELINE_RUNNERS;
   }
@@ -582,7 +834,7 @@ public Iterable<Method> apply(Class<? extends PipelineOptions> input) {
   static Iterable<Method> getClosureOfMethodsOnInterface(Class<? extends PipelineOptions> iface) {
     Preconditions.checkNotNull(iface);
     Preconditions.checkArgument(iface.isInterface());
-    ImmutableList.Builder<Method> builder = ImmutableList.builder();
+    ImmutableSet.Builder<Method> builder = ImmutableSet.builder();
     Queue<Class<?>> interfacesToProcess = Queues.newArrayDeque();
     interfacesToProcess.add(iface);
     while (!interfacesToProcess.isEmpty()) {
@@ -604,21 +856,7 @@ private static List<PropertyDescriptor> getPropertyDescriptors(Class<?> beanClas
     // The sorting is important to make this method stable.
     SortedSet<Method> methods = Sets.newTreeSet(MethodComparator.INSTANCE);
     methods.addAll(Arrays.asList(beanClass.getMethods()));
-    // Build a map of property names to getters.
-    SortedMap<String, Method> propertyNamesToGetters = Maps.newTreeMap();
-    for (Method method : methods) {
-      String methodName = method.getName();
-      if ((!methodName.startsWith("get")
-          && !methodName.startsWith("is"))
-          || method.getParameterTypes().length != 0
-          || method.getReturnType() == void.class) {
-        continue;
-      }
-      String propertyName = Introspector.decapitalize(
-          methodName.startsWith("is") ? methodName.substring(2) : methodName.substring(3));
-      propertyNamesToGetters.put(propertyName, method);
-    }
-
+    SortedMap<String, Method> propertyNamesToGetters = getPropertyNamesToGetters(methods);
     List<PropertyDescriptor> descriptors = Lists.newArrayList();
 
     /*
@@ -645,6 +883,28 @@ private static List<PropertyDescriptor> getPropertyDescriptors(Class<?> beanClas
     return descriptors;
   }
 
+  /**
+   * Returns a map of the property name to the getter method it represents.
+   * If there are duplicate methods with the same bean name, then it is indeterminate
+   * as to which method will be returned.
+   */
+  private static SortedMap<String, Method> getPropertyNamesToGetters(Iterable<Method> methods) {
+    SortedMap<String, Method> propertyNamesToGetters = Maps.newTreeMap();
+    for (Method method : methods) {
+      String methodName = method.getName();
+      if ((!methodName.startsWith("get")
+          && !methodName.startsWith("is"))
+          || method.getParameterTypes().length != 0
+          || method.getReturnType() == void.class) {
+        continue;
+      }
+      String propertyName = Introspector.decapitalize(
+          methodName.startsWith("is") ? methodName.substring(2) : methodName.substring(3));
+      propertyNamesToGetters.put(propertyName, method);
+    }
+    return propertyNamesToGetters;
+  }
+
   /**
    * Validates that a given class conforms to the following properties:
    * <ul>
@@ -782,7 +1042,16 @@ private static void validateClass(Class<? extends PipelineOptions> iface,
         iface.getName());
   }
 
-  /** A {@link Comparator} which uses the classes canonical name to compare them. */
+  /** A {@link Comparator} which uses the classes name to compare them. */
+  private static class ClassNameComparator implements Comparator<Class<?>> {
+    static final ClassNameComparator INSTANCE = new ClassNameComparator();
+    @Override
+    public int compare(Class<?> o1, Class<?> o2) {
+      return o1.getName().compareTo(o2.getName());
+    }
+  }
+
+  /** A {@link Comparator} which uses the object's classes canonical name to compare them. */
   private static class ObjectsClassComparator implements Comparator<Object> {
     static final ObjectsClassComparator INSTANCE = new ObjectsClassComparator();
     @Override
@@ -914,7 +1183,7 @@ private static ListMultimap<String, String> parseCommandLine(
   private static <T extends PipelineOptions> Map<String, Object> parseObjects(
       Class<T> klass, ListMultimap<String, String> options, boolean strictParsing) {
     Map<String, Method> propertyNamesToGetters = Maps.newHashMap();
-    PipelineOptionsFactory.validateWellFormed(klass, getRegisteredOptions());
+    PipelineOptionsFactory.validateWellFormed(klass, REGISTERED_OPTIONS);
     @SuppressWarnings("unchecked")
     Iterable<PropertyDescriptor> propertyDescriptors =
         PipelineOptionsFactory.getPropertyDescriptors(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java
index 8e40a716b7100..69e72099455ec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java
@@ -17,11 +17,11 @@
 package com.google.cloud.dataflow.sdk.options;
 
 /**
- * Options used to configure the streaming backend.
+ * Options used to configure streaming.
  */
 public interface StreamingOptions extends
     ApplicationNameOptions, GcpOptions, PipelineOptions {
-  @Description("True if running in streaming mode")
+  @Description("Set to true if running a streaming pipeline.")
   boolean isStreaming();
   void setStreaming(boolean value);
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index 0068713ef24f1..dbab5aa141b1b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -16,18 +16,23 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.not;
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
 import com.google.cloud.dataflow.sdk.testing.RestoreSystemProperties;
+import com.google.common.collect.ArrayListMultimap;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ListMultimap;
 
 import com.fasterxml.jackson.annotation.JsonIgnore;
 
@@ -38,6 +43,8 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
 import java.util.List;
 
 /** Tests for {@link PipelineOptionsFactory}. */
@@ -586,4 +593,153 @@ public void testUsingArgumentWithInvalidNameIsIgnoredWithoutStrictParsing() {
     expectedLogs.expectWarn("Strict parsing is disabled, ignoring option");
     PipelineOptionsFactory.fromArgs(args).withoutStrictParsing().create();
   }
+
+  @Test
+  public void testWhenNoHelpIsRequested() {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    ListMultimap<String, String> arguments = ArrayListMultimap.create();
+    assertFalse(PipelineOptionsFactory.printHelpUsageAndExitIfNeeded(
+        arguments, new PrintStream(baos), false /* exit */));
+    String output = new String(baos.toByteArray());
+    assertEquals("", output);
+  }
+
+  @Test
+  public void testDefaultHelpAsArgument() {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    ListMultimap<String, String> arguments = ArrayListMultimap.create();
+    arguments.put("help", "true");
+    assertTrue(PipelineOptionsFactory.printHelpUsageAndExitIfNeeded(
+        arguments, new PrintStream(baos), false /* exit */));
+    String output = new String(baos.toByteArray());
+    assertThat(output, containsString("The set of registered options are:"));
+    assertThat(output, containsString("com.google.cloud.dataflow.sdk.options.PipelineOptions"));
+    assertThat(output, containsString("Use --help=<OptionsName> for detailed help."));
+  }
+
+  @Test
+  public void testSpecificHelpAsArgument() {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    ListMultimap<String, String> arguments = ArrayListMultimap.create();
+    arguments.put("help", "com.google.cloud.dataflow.sdk.options.PipelineOptions");
+    assertTrue(PipelineOptionsFactory.printHelpUsageAndExitIfNeeded(
+        arguments, new PrintStream(baos), false /* exit */));
+    String output = new String(baos.toByteArray());
+    assertThat(output, containsString("com.google.cloud.dataflow.sdk.options.PipelineOptions"));
+    assertThat(output, containsString("--runner"));
+    assertThat(output, containsString("Default: DirectPipelineRunner"));
+    assertThat(output,
+        containsString("The pipeline runner which will be used to execute the pipeline."));
+  }
+
+  @Test
+  public void testSpecificHelpAsArgumentWithSimpleClassName() {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    ListMultimap<String, String> arguments = ArrayListMultimap.create();
+    arguments.put("help", "PipelineOptions");
+    assertTrue(PipelineOptionsFactory.printHelpUsageAndExitIfNeeded(
+        arguments, new PrintStream(baos), false /* exit */));
+    String output = new String(baos.toByteArray());
+    assertThat(output, containsString("com.google.cloud.dataflow.sdk.options.PipelineOptions"));
+    assertThat(output, containsString("--runner"));
+    assertThat(output, containsString("Default: DirectPipelineRunner"));
+    assertThat(output,
+        containsString("The pipeline runner which will be used to execute the pipeline."));
+  }
+
+  @Test
+  public void testSpecificHelpAsArgumentWithClassNameSuffix() {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    ListMultimap<String, String> arguments = ArrayListMultimap.create();
+    arguments.put("help", "options.PipelineOptions");
+    assertTrue(PipelineOptionsFactory.printHelpUsageAndExitIfNeeded(
+        arguments, new PrintStream(baos), false /* exit */));
+    String output = new String(baos.toByteArray());
+    assertThat(output, containsString("com.google.cloud.dataflow.sdk.options.PipelineOptions"));
+    assertThat(output, containsString("--runner"));
+    assertThat(output, containsString("Default: DirectPipelineRunner"));
+    assertThat(output,
+        containsString("The pipeline runner which will be used to execute the pipeline."));
+  }
+
+  /** Used for a name collision test with the other DataflowPipelineOptions. */
+  private interface DataflowPipelineOptions extends PipelineOptions {
+  }
+
+  @Test
+  public void testShortnameSpecificHelpHasMultipleMatches() {
+    PipelineOptionsFactory.register(DataflowPipelineOptions.class);
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    ListMultimap<String, String> arguments = ArrayListMultimap.create();
+    arguments.put("help", "DataflowPipelineOptions");
+    assertTrue(PipelineOptionsFactory.printHelpUsageAndExitIfNeeded(
+        arguments, new PrintStream(baos), false /* exit */));
+    String output = new String(baos.toByteArray());
+    assertThat(output, containsString("Multiple matches found for DataflowPipelineOptions"));
+    assertThat(output, containsString("com.google.cloud.dataflow.sdk.options."
+        + "PipelineOptionsFactoryTest$DataflowPipelineOptions"));
+    assertThat(output, containsString("The set of registered options are:"));
+    assertThat(output, containsString("com.google.cloud.dataflow.sdk.options.PipelineOptions"));
+  }
+
+  @Test
+  public void testHelpWithOptionThatOutputsValidEnumTypes() {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    ListMultimap<String, String> arguments = ArrayListMultimap.create();
+    arguments.put("help", "com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions");
+    assertTrue(PipelineOptionsFactory.printHelpUsageAndExitIfNeeded(
+        arguments, new PrintStream(baos), false /* exit */));
+    String output = new String(baos.toByteArray());
+    assertThat(output, containsString("<DEBUG | ERROR | INFO | TRACE | WARN>"));
+  }
+
+  @Test
+  public void testHelpWithBadOptionNameAsArgument() {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    ListMultimap<String, String> arguments = ArrayListMultimap.create();
+    arguments.put("help", "com.google.cloud.dataflow.sdk.Pipeline");
+    assertTrue(PipelineOptionsFactory.printHelpUsageAndExitIfNeeded(
+        arguments, new PrintStream(baos), false /* exit */));
+    String output = new String(baos.toByteArray());
+    assertThat(output,
+        containsString("Unable to find option com.google.cloud.dataflow.sdk.Pipeline"));
+    assertThat(output, containsString("The set of registered options are:"));
+    assertThat(output, containsString("com.google.cloud.dataflow.sdk.options.PipelineOptions"));
+  }
+
+  @Test
+  public void testHelpWithHiddenMethodAndInterface() {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    ListMultimap<String, String> arguments = ArrayListMultimap.create();
+    arguments.put("help", "com.google.cloud.dataflow.sdk.option.DataflowPipelineOptions");
+    assertTrue(PipelineOptionsFactory.printHelpUsageAndExitIfNeeded(
+        arguments, new PrintStream(baos), false /* exit */));
+    String output = new String(baos.toByteArray());
+    // A hidden interface.
+    assertThat(output, not(
+        containsString("com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions")));
+    // A hidden option.
+    assertThat(output, not(containsString("--gcpCredential")));
+  }
+
+  @Test
+  public void testProgrammaticPrintHelp() {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    PipelineOptionsFactory.printHelp(new PrintStream(baos));
+    String output = new String(baos.toByteArray());
+    assertThat(output, containsString("The set of registered options are:"));
+    assertThat(output, containsString("com.google.cloud.dataflow.sdk.options.PipelineOptions"));
+  }
+
+  @Test
+  public void testProgrammaticPrintHelpForSpecificType() {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    PipelineOptionsFactory.printHelp(new PrintStream(baos), PipelineOptions.class);
+    String output = new String(baos.toByteArray());
+    assertThat(output, containsString("com.google.cloud.dataflow.sdk.options.PipelineOptions"));
+    assertThat(output, containsString("--runner"));
+    assertThat(output, containsString("Default: DirectPipelineRunner"));
+    assertThat(output,
+        containsString("The pipeline runner which will be used to execute the pipeline."));
+  }
 }

From f569b67eb5711c29426dcdeeeecbd4dc978d55fa Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Fri, 13 Mar 2015 18:50:01 -0700
Subject: [PATCH 0276/1541] Updating the dependencies to move to the 3/13
 release of the Dataflow proto library.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88599362
---
 sdk/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 5cd5c0ab046ba..e21399c63355a 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -227,7 +227,7 @@
     <dependency>
       <groupId>com.google.cloud.dataflow</groupId>
       <artifactId>google-cloud-dataflow-java-proto-library-all</artifactId>
-      <version>0.3.150206</version>
+      <version>0.3.150313</version>
     </dependency>
 
     <dependency>

From 2ecb1be8b6547832fc382c363e4ba0310ae5d6fb Mon Sep 17 00:00:00 2001
From: jlewi <jlewi@google.com>
Date: Sat, 14 Mar 2015 19:43:00 -0700
Subject: [PATCH 0277/1541] Fix javadoc. The javadoc incorrectly references
 TextIO rather than PubsubIO.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88646866
---
 .../cloud/dataflow/sdk/io/PubsubIO.java       | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index af9c7b9d60b03..3bc28ad83fa8f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -275,7 +275,7 @@ public static class Bound
       }
 
       /**
-       * Returns a new TextIO.Read PTransform that's like this one but with the given
+       * Returns a new PubsubIO.Read PTransform that's like this one but with the given
        * step name. Does not modify the object.
        */
       public Bound named(String name) {
@@ -284,7 +284,7 @@ public Bound named(String name) {
       }
 
       /**
-       * Returns a new TextIO.Read PTransform that's like this one but reading from the
+       * Returns a new PubsubIO.Read PTransform that's like this one but reading from the
        * given subscription. Does not modify the object.
        */
       public Bound subscription(String subscription) {
@@ -293,7 +293,7 @@ public Bound subscription(String subscription) {
       }
 
       /**
-       * Returns a new TextIO.Read PTransform that's like this one but reading from the
+       * Returns a new PubsubIO.Read PTransform that's like this one but reading from the
        * give topic. Does not modify the object.
        */
       public Bound topic(String topic) {
@@ -302,7 +302,7 @@ public Bound topic(String topic) {
       }
 
       /**
-       * Returns a new TextIO.Read PTransform that's like this one but reading timestamps
+       * Returns a new PubsubIO.Read PTransform that's like this one but reading timestamps
        * from the given PubSub label. Does not modify the object.
        */
       public Bound timestampLabel(String timestampLabel) {
@@ -311,7 +311,7 @@ public Bound timestampLabel(String timestampLabel) {
       }
 
       /**
-       * Returns a new TextIO.Read PTransform that's like this one but with the specified
+       * Returns a new PubsubIO.Read PTransform that's like this one but with the specified
        * setting for dropLateData. Does not modify the object.
        */
       public Bound dropLateData(boolean dropLateData) {
@@ -319,7 +319,7 @@ public Bound dropLateData(boolean dropLateData) {
       }
 
       /**
-       * Returns a new TextIO.Read PTransform that's like this one but reading unique ids
+       * Returns a new PubsubIO.Read PTransform that's like this one but reading unique ids
        * from the given PubSub label. Does not modify the object.
        */
       public Bound idLabel(String idLabel) {
@@ -452,7 +452,7 @@ public static class Bound
       }
 
       /**
-       * Returns a new TextIO.Write PTransform that's like this one but with the given step
+       * Returns a new PubsubIO.Write PTransform that's like this one but with the given step
        * name. Does not modify the object.
        */
       public Bound named(String name) {
@@ -460,7 +460,7 @@ public Bound named(String name) {
       }
 
       /**
-       * Returns a new TextIO.Write PTransform that's like this one but writing to the given
+       * Returns a new PubsubIO.Write PTransform that's like this one but writing to the given
        * topic. Does not modify the object.
        */
       public Bound topic(String topic) {
@@ -468,7 +468,7 @@ public Bound topic(String topic) {
       }
 
       /**
-       * Returns a new TextIO.Write PTransform that's like this one but publishing timestamps
+       * Returns a new PubsubIO.Write PTransform that's like this one but publishing timestamps
        * to the given PubSub label. Does not modify the object.
        */
       public Bound timestampLabel(String timestampLabel) {
@@ -476,7 +476,7 @@ public Bound timestampLabel(String timestampLabel) {
       }
 
       /**
-       * Returns a new TextIO.Write PTransform that's like this one but publishing record ids
+       * Returns a new PubsubIO.Write PTransform that's like this one but publishing record ids
        * to the given PubSub label. Does not modify the object.
        */
      public Bound idLabel(String idLabel) {

From 4fea8a77b25cc7d79dd6fe1aabdd0d54e4b4f358 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Mon, 16 Mar 2015 15:06:03 -0700
Subject: [PATCH 0278/1541] GcsUtil.expand() should not check for the existance
 of single files since GCS storage list feature is only eventually consistent.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88768888
---
 .../cloud/dataflow/sdk/util/GcsUtil.java      | 11 ++--
 .../cloud/dataflow/sdk/util/GcsUtilTest.java  | 55 ++++++++++++++-----
 2 files changed, 49 insertions(+), 17 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
index 5cabed127baae..2eac3a6091349 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.cloud.dataflow.sdk.util.gcsio.GoogleCloudStorageReadChannel;
 import com.google.cloud.dataflow.sdk.util.gcsio.GoogleCloudStorageWriteChannel;
+import com.google.common.collect.ImmutableList;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -116,7 +117,8 @@ protected void setStorageClient(Storage storage) {
 
   /**
    * Expands a pattern into matched paths. The pattern path may contain globs, which are expanded in
-   * the result. This function validates the existence of each matched file in GCS.
+   * the result. This function may return non-existent files so this should not be used to validate
+   * the existence of files in GCS.
    */
   public List<GcsPath> expand(GcsPath gcsPattern) throws IOException {
     Preconditions.checkArgument(isGcsPatternSupported(gcsPattern.getObject()));
@@ -124,9 +126,10 @@ public List<GcsPath> expand(GcsPath gcsPattern) throws IOException {
     Pattern p = null;
     String prefix = null;
     if (!m.matches()) {
-      // Not a glob. But we should verify that the file exists in GCS.
-      prefix = gcsPattern.getObject();
-      p = Pattern.compile(gcsPattern.getObject());
+      // Not a glob.
+      // Results of GCS storage list feature is only eventually consistent so we should not use that
+      // feature to check the existence of single files.
+      return ImmutableList.of(gcsPattern);
     } else {
       // Part before the first wildcard character.
       prefix = m.group("PREFIX");
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
index fbbe090773e95..1d9a88fd460f9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
@@ -157,19 +157,6 @@ public void testGlobExpansion() throws IOException {
       assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
     }
 
-    // Directories should not match.
-    {
-      GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/");
-      List<GcsPath> pathList = gcsUtil.expand(pattern);
-      assertEquals(pathList.size(), 0);
-    }
-
-    {
-      GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory");
-      List<GcsPath> pathList = gcsUtil.expand(pattern);
-      assertEquals(pathList.size(), 0);
-    }
-
     // Test patterns.
     {
       GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/file*");
@@ -224,4 +211,46 @@ public void testRecursiveGlobExpansionFails() throws IOException {
     exception.expectMessage("Unsupported wildcard usage");
     gcsUtil.expand(pattern);
   }
+
+  // GCSUtil.expand() should not fail for non-existent single files or directories, since GCS file
+  // listing is only eventually consistent.
+  @Test
+  public void testNonExistent() throws IOException {
+    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
+    pipelineOptions.setGcpCredential(Mockito.mock(Credential.class));
+    GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
+
+    Storage mockStorage = Mockito.mock(Storage.class);
+    gcsUtil.setStorageClient(mockStorage);
+
+    Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
+    Storage.Objects.List mockStorageList = Mockito.mock(Storage.Objects.List.class);
+
+    Objects modelObjects = new Objects();
+    List<StorageObject> items = new ArrayList<>();
+
+    // A directory
+    items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/"));
+    modelObjects.setItems(items);
+
+    when(mockStorage.objects()).thenReturn(mockStorageObjects);
+    when(mockStorageObjects.list("testbucket")).thenReturn(mockStorageList);
+    when(mockStorageList.execute()).thenReturn(modelObjects);
+
+    {
+      GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/nonexistentfile");
+      List<GcsPath> expectedFiles =
+          ImmutableList.of(GcsPath.fromUri("gs://testbucket/testdirectory/nonexistentfile"));
+
+      assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
+    }
+
+    {
+      GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/nonexistentdirectory/");
+      List<GcsPath> expectedFiles =
+          ImmutableList.of(GcsPath.fromUri("gs://testbucket/testdirectory/nonexistentdirectory/"));
+
+      assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
+    }
+  }
 }

From c919a29a3a20b51b1e3ee81d8146568b43c18c98 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Tue, 17 Mar 2015 01:44:35 -0700
Subject: [PATCH 0279/1541] Remove unnecessary type parameter on
 PCollectionView.

This is a backwards-incompatible change, the fix is to remove the (unused) second type parameter on any PCollectionView declarations.

----Release Notes----

(Backwards incompatible) PCollectionView's no longer have a second type parameter.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88808250
---
 .../dataflow/examples/FilterExamples.java     |  2 +-
 .../google/cloud/dataflow/examples/TfIdf.java |  2 +-
 .../runners/DataflowPipelineTranslator.java   | 10 ++--
 .../sdk/runners/DirectPipelineRunner.java     |  8 +--
 .../dataflow/sdk/testing/DataflowAssert.java  | 34 ++++++-------
 .../dataflow/sdk/transforms/Combine.java      |  2 +-
 .../cloud/dataflow/sdk/transforms/DoFn.java   |  2 +-
 .../dataflow/sdk/transforms/DoFnTester.java   | 10 ++--
 .../cloud/dataflow/sdk/transforms/First.java  |  6 +--
 .../cloud/dataflow/sdk/transforms/ParDo.java  | 44 ++++++++--------
 .../dataflow/sdk/transforms/RateLimiting.java |  2 +-
 .../cloud/dataflow/sdk/transforms/View.java   | 51 +++++++++----------
 .../cloud/dataflow/sdk/util/DoFnContext.java  |  2 +-
 .../dataflow/sdk/util/DoFnProcessContext.java |  2 +-
 .../dataflow/sdk/values/PCollectionView.java  |  5 +-
 .../google/cloud/dataflow/sdk/TestUtils.java  |  2 +-
 .../DataflowPipelineTranslatorTest.java       |  8 +--
 .../dataflow/sdk/transforms/ParDoTest.java    | 22 ++++----
 .../dataflow/sdk/transforms/ViewTest.java     | 14 ++---
 19 files changed, 113 insertions(+), 115 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
index 8aa6cb0b2fca2..dc0eb05525571 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
@@ -170,7 +170,7 @@ public PCollection<TableRow> apply(PCollection<TableRow> rows) {
 
       // Find the global mean, of all the mean_temp readings in the weather data,
       // and prepare this singleton PCollectionView for use as a side input.
-      final PCollectionView<Double, ?> globalMeanTemp =
+      final PCollectionView<Double> globalMeanTemp =
           meanTemps.apply(Mean.<Double>globally())
                .apply(View.<Double>asSingleton());
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
index 79033eb9eede2..9b95993746f1a 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
@@ -211,7 +211,7 @@ public PCollection<KV<String, KV<URI, Double>>> apply(
       // Compute the total number of documents, and
       // prepare this singleton PCollectionView for
       // use as a side input.
-      final PCollectionView<Long, ?> totalDocuments =
+      final PCollectionView<Long> totalDocuments =
           uriToContent
           .apply(Keys.<URI>create())
           .apply(RemoveDuplicates.<URI>create())
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 1d39867a91726..446c8efe9069e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -744,8 +744,8 @@ public void translate(
             translateTyped(transform, context);
           }
 
-          private <R, T, WT> void translateTyped(
-              View.CreatePCollectionView<R, T, WT> transform,
+          private <R, T> void translateTyped(
+              View.CreatePCollectionView<R, T> transform,
               TranslationContext context) {
             context.addStep(transform, "CollectionToSingleton");
             context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
@@ -936,7 +936,7 @@ private <I, O> void translateSingleHelper(
 
   private static void translateInputs(
       PCollection<?> input,
-      List<PCollectionView<?, ?>> sideInputs,
+      List<PCollectionView<?>> sideInputs,
       TranslationContext context) {
     context.addInput(PropertyNames.PARALLEL_INPUT, input);
     translateSideInputs(sideInputs, context);
@@ -944,11 +944,11 @@ private static void translateInputs(
 
   // Used for ParDo
   private static void translateSideInputs(
-      List<PCollectionView<?, ?>> sideInputs,
+      List<PCollectionView<?>> sideInputs,
       TranslationContext context) {
     Map<String, Object> nonParInputs = new HashMap<>();
 
-    for (PCollectionView<?, ?> view : sideInputs) {
+    for (PCollectionView<?> view : sideInputs) {
       nonParInputs.put(
           view.getTagInternal().getId(),
           context.asOutputReference(view));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 27e15304fadb7..929326452f4b5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -376,7 +376,7 @@ public interface EvaluationResults extends PipelineResult {
      * implementation a {@link PCollectionView} should convert from this representation to a
      * suitable side input value.
      */
-    <T, WT> Iterable<WindowedValue<?>> getPCollectionView(PCollectionView<T, WT> view);
+    <T, WT> Iterable<WindowedValue<?>> getPCollectionView(PCollectionView<T> view);
   }
 
   /**
@@ -514,7 +514,7 @@ <T> void setPCollectionValuesWithMetadata(
      * Throws an exception if the {@link PCollectionView}'s value has already been set.
      */
     <R, T, WT> void setPCollectionView(
-        PCollectionView<T, WT> pc,
+        PCollectionView<T> pc,
         Iterable<WindowedValue<R>> value);
 
     /**
@@ -703,7 +703,7 @@ public <T> void setPCollectionValuesWithMetadata(
 
     @Override
     public <R, T, WT> void setPCollectionView(
-        PCollectionView<T, WT> view,
+        PCollectionView<T> view,
         Iterable<WindowedValue<R>> value) {
       LOG.debug("Setting {} = {}", view, value);
       setPValue(view, value);
@@ -758,7 +758,7 @@ public <T> List<List<T>> getPCollectionList(PCollectionList<T> pcs) {
      * converts from this representation to a suitable side input value.
      */
     @Override
-    public <T, WT> Iterable<WindowedValue<?>> getPCollectionView(PCollectionView<T, WT> view) {
+    public <T, WT> Iterable<WindowedValue<?>> getPCollectionView(PCollectionView<T> view) {
       @SuppressWarnings("unchecked")
       Iterable<WindowedValue<?>> value = (Iterable<WindowedValue<?>>) getPValue(view);
       LOG.debug("Getting {} = {}", view, value);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 5d75312e9eb7a..ba88a089ad048 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -107,9 +107,9 @@ public static <T> IterableAssert<T> thatSingletonIterable(PCollection<Iterable<T
 
   /**
    * Constructs an {@link IterableAssert} for the value of the provided
-   * {@code PCollectionView PCollectionView<Iterable<T>, ?>}.
+   * {@code PCollectionView PCollectionView<Iterable<T>>}.
    */
-  public static <T> IterableAssert<T> thatIterable(PCollectionView<Iterable<T>, ?> actual) {
+  public static <T> IterableAssert<T> thatIterable(PCollectionView<Iterable<T>> actual) {
     return new IterableAssert<>(actual);
   }
 
@@ -126,15 +126,15 @@ public static <T> SingletonAssert<T> thatSingleton(PCollection<T> actual) {
 
   /**
    * An assertion about the contents of a
-   * {@link PCollectionView PCollectionView&lt;Iterable&lt;T&gt;, ?&gt;}.
+   * {@link PCollectionView PCollectionView&lt;Iterable&lt;T&gt;&gt;}.
    */
   @SuppressWarnings("serial")
   public static class IterableAssert<T> implements Serializable {
 
-    private final PCollectionView<Iterable<T>, ?> actualView;
+    private final PCollectionView<Iterable<T>> actualView;
     private Optional<Coder<T>> coder;
 
-    protected IterableAssert(PCollectionView<Iterable<T>, ?> actualView) {
+    protected IterableAssert(PCollectionView<Iterable<T>> actualView) {
       this.actualView = actualView;
       coder = Optional.absent();
     }
@@ -248,15 +248,15 @@ public IterableAssert<T> containsInOrder(Iterable<T> expectedElements) {
 
   /**
    * An assertion about the single value of type {@code T}
-   * associated with a {@link PCollectionView PCollectionView&lt;T, ?&gt;}.
+   * associated with a {@link PCollectionView PCollectionView&lt;T&gt;}.
    */
   @SuppressWarnings("serial")
   public static class SingletonAssert<T> implements Serializable {
 
-    private final PCollectionView<T, ?> actualView;
+    private final PCollectionView<T> actualView;
     private Optional<Coder<T>> coder;
 
-    protected SingletonAssert(PCollectionView<T, ?> actualView) {
+    protected SingletonAssert(PCollectionView<T> actualView) {
       this.actualView = actualView;
       coder = Optional.absent();
     }
@@ -345,7 +345,7 @@ public SingletonAssert<T> is(T expectedValue) {
   ////////////////////////////////////////////////////////////////////////
 
   /**
-   * An assertion checker that takes a single {@link PCollectionView PCollectionView&lt;A, ?&gt;}
+   * An assertion checker that takes a single {@link PCollectionView PCollectionView&lt;A&gt;}
    * and an assertion over {@code A}, and checks it within a dataflow pipeline.
    *
    * <p> Note that the entire assertion must be serializable. If
@@ -358,9 +358,9 @@ public SingletonAssert<T> is(T expectedValue) {
   @SuppressWarnings("serial")
   private static class OneSideInputAssert<Actual> implements Serializable {
 
-    private final PCollectionView<Actual, ?> actualView;
+    private final PCollectionView<Actual> actualView;
 
-    public OneSideInputAssert(PCollectionView<Actual, ?> actualView) {
+    public OneSideInputAssert(PCollectionView<Actual> actualView) {
       this.actualView = actualView;
     }
 
@@ -383,8 +383,8 @@ public void processElement(ProcessContext c) {
   }
 
   /**
-   * An assertion checker that takes a {@link PCollectionView PCollectionView&lt;A, ?&gt;},
-   * a {@link PCollectionView PCollectionView&lt;B, ?&gt;}, a relation
+   * An assertion checker that takes a {@link PCollectionView PCollectionView&lt;A&gt;},
+   * a {@link PCollectionView PCollectionView&lt;B&gt;}, a relation
    * over {@code A} and {@code B}, and checks that the relation holds
    * within a dataflow pipeline.
    *
@@ -395,12 +395,12 @@ public void processElement(ProcessContext c) {
   @SuppressWarnings("serial")
   private static class TwoSideInputAssert<Actual, Expected> implements Serializable {
 
-    private final PCollectionView<Actual, ?> actualView;
-    private final PCollectionView<Expected, ?> expectedView;
+    private final PCollectionView<Actual> actualView;
+    private final PCollectionView<Expected> expectedView;
 
     protected TwoSideInputAssert(
-        PCollectionView<Actual, ?> actualView,
-        PCollectionView<Expected, ?> expectedView) {
+        PCollectionView<Actual> actualView,
+        PCollectionView<Expected> expectedView) {
       this.actualView = actualView;
       this.expectedView = expectedView;
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 729ad0ff33211..688857f81871d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -1111,7 +1111,7 @@ public PCollection<VO> apply(PCollection<VI> input) {
     }
 
     private PCollection<VO> insertDefaultValueIfEmpty(PCollection<VO> maybeEmpty) {
-      final PCollectionView<Iterable<VO>, ?> maybeEmptyView = maybeEmpty.apply(
+      final PCollectionView<Iterable<VO>> maybeEmptyView = maybeEmpty.apply(
           View.<VO>asIterable());
       return maybeEmpty.getPipeline()
           .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index fdfa2672620e2..68f78c9d8e435 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -72,7 +72,7 @@ public abstract class Context {
      * @throws IllegalArgumentException if this is not a side input
      * @see ParDo#withSideInputs
      */
-    public abstract <T> T sideInput(PCollectionView<T, ?> view);
+    public abstract <T> T sideInput(PCollectionView<T> view);
 
     /**
      * Adds the given element to the main output {@code PCollection}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index dd5b83541f895..3940d6c6be9a5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -85,7 +85,7 @@ public static <I, O> DoFnTester<I, O> of(DoFn<I, O> fn) {
    * <p> If this isn't called, {@code DoFnTester} assumes the
    * {@link DoFn} takes no side inputs.
    */
-  public void setSideInputs(Map<PCollectionView<?, ?>, Iterable<WindowedValue<?>>> sideInputs) {
+  public void setSideInputs(Map<PCollectionView<?>, Iterable<WindowedValue<?>>> sideInputs) {
     this.sideInputs = sideInputs;
     resetState();
   }
@@ -100,7 +100,7 @@ public void setSideInputs(Map<PCollectionView<?, ?>, Iterable<WindowedValue<?>>>
    * <p> If this isn't called, {@code DoFnTester} assumes the
    * {@code DoFn} takes no side inputs.
    */
-  public void setSideInput(PCollectionView<?, ?> sideInput, Iterable<WindowedValue<?>> value) {
+  public void setSideInput(PCollectionView<?> sideInput, Iterable<WindowedValue<?>> value) {
     sideInputs.put(sideInput, value);
   }
 
@@ -110,7 +110,7 @@ public void setSideInput(PCollectionView<?, ?> sideInput, Iterable<WindowedValue
    * in the global window.
    */
   public void setSideInputInGlobalWindow(
-      PCollectionView<?, ?> sideInput,
+      PCollectionView<?> sideInput,
       Iterable<?> value) {
     sideInputs.put(
         sideInput,
@@ -302,7 +302,7 @@ enum State { UNSTARTED, STARTED, FINISHED }
   final DoFn<I, O> origFn;
 
   /** The side input values to provide to the DoFn under test. */
-  private Map<PCollectionView<?, ?>, Iterable<WindowedValue<?>>> sideInputs =
+  private Map<PCollectionView<?>, Iterable<WindowedValue<?>>> sideInputs =
       new HashMap<>();
 
   /** The output tags used by the DoFn under test. */
@@ -343,7 +343,7 @@ void initializeState() {
             origFn.toString());
     counterSet = new CounterSet();
     PTuple runnerSideInputs = PTuple.empty();
-    for (Map.Entry<PCollectionView<?, ?>, Iterable<WindowedValue<?>>> entry
+    for (Map.Entry<PCollectionView<?>, Iterable<WindowedValue<?>>> entry
         : sideInputs.entrySet()) {
       runnerSideInputs = runnerSideInputs.and(entry.getKey().getTagInternal(), entry.getValue());
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
index 35681d7b01d99..dc314445e2da3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
@@ -78,9 +78,9 @@ private static class CopyFirstDoFn<T> extends DoFn<Void, T> {
     private static final long serialVersionUID = 0;
 
     long limit;
-    final PCollectionView<Iterable<T>, ?> iterableView;
+    final PCollectionView<Iterable<T>> iterableView;
 
-    public CopyFirstDoFn(long limit, PCollectionView<Iterable<T>, ?> iterableView) {
+    public CopyFirstDoFn(long limit, PCollectionView<Iterable<T>> iterableView) {
       this.limit = limit;
       this.iterableView = iterableView;
     }
@@ -98,7 +98,7 @@ public void processElement(ProcessContext c) {
 
   @Override
   public PCollection<T> apply(PCollection<T> in) {
-    PCollectionView<Iterable<T>, ?> iterableView = in.apply(View.<T>asIterable());
+    PCollectionView<Iterable<T>> iterableView = in.apply(View.<T>asIterable());
     return
         in.getPipeline()
         .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 0b23080f0fcd0..75ab7fb725d02 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -473,7 +473,7 @@ public static Unbound named(String name) {
    * invoke, which will also bind the input/output types of this
    * {@code PTransform}.
    */
-  public static Unbound withSideInputs(PCollectionView<?, ?>... sideInputs) {
+  public static Unbound withSideInputs(PCollectionView<?>... sideInputs) {
     return new Unbound().withSideInputs(sideInputs);
   }
 
@@ -493,7 +493,7 @@ public static Unbound withSideInputs(PCollectionView<?, ?>... sideInputs) {
    * {@code PTransform}.
    */
   public static Unbound withSideInputs(
-      Iterable<? extends PCollectionView<?, ?>> sideInputs) {
+      Iterable<? extends PCollectionView<?>> sideInputs) {
     return new Unbound().withSideInputs(sideInputs);
   }
 
@@ -551,12 +551,12 @@ public static <I, O> Bound<I, O> of(DoFn<I, O> fn) {
    */
   public static class Unbound {
     String name;
-    List<PCollectionView<?, ?>> sideInputs = Collections.emptyList();
+    List<PCollectionView<?>> sideInputs = Collections.emptyList();
 
     Unbound() {}
 
     Unbound(String name,
-            List<PCollectionView<?, ?>> sideInputs) {
+            List<PCollectionView<?>> sideInputs) {
       this.name = name;
       this.sideInputs = sideInputs;
     }
@@ -581,7 +581,7 @@ public Unbound named(String name) {
      * <p> See the discussion of Side Inputs above and on
      * {@link ParDo#withSideInputs} for more explanation.
      */
-    public Unbound withSideInputs(PCollectionView<?, ?>... sideInputs) {
+    public Unbound withSideInputs(PCollectionView<?>... sideInputs) {
       return new Unbound(name, ImmutableList.copyOf(sideInputs));
     }
 
@@ -594,7 +594,7 @@ public Unbound withSideInputs(PCollectionView<?, ?>... sideInputs) {
      * {@link ParDo#withSideInputs} for more explanation.
      */
     public Unbound withSideInputs(
-        Iterable<? extends PCollectionView<?, ?>> sideInputs) {
+        Iterable<? extends PCollectionView<?>> sideInputs) {
       return new Unbound(name, ImmutableList.copyOf(sideInputs));
     }
 
@@ -642,11 +642,11 @@ public <I, O> Bound<I, O> of(DoFn<I, O> fn) {
   public static class Bound<I, O>
       extends PTransform<PCollection<? extends I>, PCollection<O>> {
     // Inherits name.
-    List<PCollectionView<?, ?>> sideInputs;
+    List<PCollectionView<?>> sideInputs;
     DoFn<I, O> fn;
 
     Bound(String name,
-          List<PCollectionView<?, ?>> sideInputs,
+          List<PCollectionView<?>> sideInputs,
           DoFn<I, O> fn) {
       super(name);
       this.sideInputs = sideInputs;
@@ -672,7 +672,7 @@ public Bound<I, O> named(String name) {
      * <p> See the discussion of Side Inputs above and on
      * {@link ParDo#withSideInputs} for more explanation.
      */
-    public Bound<I, O> withSideInputs(PCollectionView<?, ?>... sideInputs) {
+    public Bound<I, O> withSideInputs(PCollectionView<?>... sideInputs) {
       return new Bound<>(name, ImmutableList.copyOf(sideInputs), fn);
     }
 
@@ -685,7 +685,7 @@ public Bound<I, O> withSideInputs(PCollectionView<?, ?>... sideInputs) {
      * {@link ParDo#withSideInputs} for more explanation.
      */
     public Bound<I, O> withSideInputs(
-        Iterable<? extends PCollectionView<?, ?>> sideInputs) {
+        Iterable<? extends PCollectionView<?>> sideInputs) {
       return new Bound<>(name, ImmutableList.copyOf(sideInputs), fn);
     }
 
@@ -732,7 +732,7 @@ public DoFn<I, O> getFn() {
       return fn;
     }
 
-    public List<PCollectionView<?, ?>> getSideInputs() {
+    public List<PCollectionView<?>> getSideInputs() {
       return sideInputs;
     }
   }
@@ -749,12 +749,12 @@ public DoFn<I, O> getFn() {
    */
   public static class UnboundMulti<O> {
     String name;
-    List<PCollectionView<?, ?>> sideInputs;
+    List<PCollectionView<?>> sideInputs;
     TupleTag<O> mainOutputTag;
     TupleTagList sideOutputTags;
 
     UnboundMulti(String name,
-                 List<PCollectionView<?, ?>> sideInputs,
+                 List<PCollectionView<?>> sideInputs,
                  TupleTag<O> mainOutputTag,
                  TupleTagList sideOutputTags) {
       this.name = name;
@@ -785,7 +785,7 @@ public UnboundMulti<O> named(String name) {
      * {@link ParDo#withSideInputs} for more explanation.
      */
     public UnboundMulti<O> withSideInputs(
-        PCollectionView<?, ?>... sideInputs) {
+        PCollectionView<?>... sideInputs) {
       return new UnboundMulti<>(
           name, ImmutableList.copyOf(sideInputs),
           mainOutputTag, sideOutputTags);
@@ -801,7 +801,7 @@ public UnboundMulti<O> withSideInputs(
      * {@link ParDo#withSideInputs} for more explanation.
      */
     public UnboundMulti<O> withSideInputs(
-        Iterable<? extends PCollectionView<?, ?>> sideInputs) {
+        Iterable<? extends PCollectionView<?>> sideInputs) {
       return new UnboundMulti<>(
           name, ImmutableList.copyOf(sideInputs),
           mainOutputTag, sideOutputTags);
@@ -836,13 +836,13 @@ public <I> BoundMulti<I, O> of(DoFn<I, O> fn) {
   public static class BoundMulti<I, O>
       extends PTransform<PCollection<? extends I>, PCollectionTuple> {
     // Inherits name.
-    List<PCollectionView<?, ?>> sideInputs;
+    List<PCollectionView<?>> sideInputs;
     TupleTag<O> mainOutputTag;
     TupleTagList sideOutputTags;
     DoFn<I, O> fn;
 
     BoundMulti(String name,
-               List<PCollectionView<?, ?>> sideInputs,
+               List<PCollectionView<?>> sideInputs,
                TupleTag<O> mainOutputTag,
                TupleTagList sideOutputTags,
                DoFn<I, O> fn) {
@@ -874,7 +874,7 @@ public BoundMulti<I, O> named(String name) {
      * {@link ParDo#withSideInputs} for more explanation.
      */
     public BoundMulti<I, O> withSideInputs(
-        PCollectionView<?, ?>... sideInputs) {
+        PCollectionView<?>... sideInputs) {
       return new BoundMulti<>(
           name, ImmutableList.copyOf(sideInputs),
           mainOutputTag, sideOutputTags, fn);
@@ -889,7 +889,7 @@ public BoundMulti<I, O> withSideInputs(
      * {@link ParDo#withSideInputs} for more explanation.
      */
     public BoundMulti<I, O> withSideInputs(
-        Iterable<? extends PCollectionView<?, ?>> sideInputs) {
+        Iterable<? extends PCollectionView<?>> sideInputs) {
       return new BoundMulti<>(
           name, ImmutableList.copyOf(sideInputs),
           mainOutputTag, sideOutputTags, fn);
@@ -932,7 +932,7 @@ public TupleTag<O> getMainOutputTag() {
       return mainOutputTag;
     }
 
-    public List<PCollectionView<?, ?>> getSideInputs() {
+    public List<PCollectionView<?>> getSideInputs() {
       return sideInputs;
     }
   }
@@ -1016,7 +1016,7 @@ private static <I, O> DoFnRunner<I, O, List> evaluateHelper(
       DoFn<I, O> doFn,
       String name,
       PCollection<? extends I> input,
-      List<PCollectionView<?, ?>> sideInputs,
+      List<PCollectionView<?>> sideInputs,
       TupleTag<O> mainOutputTag,
       List<TupleTag<?>> sideOutputTags,
       DirectPipelineRunner.EvaluationContext context,
@@ -1025,7 +1025,7 @@ private static <I, O> DoFnRunner<I, O, List> evaluateHelper(
     DoFn<I, O> fn = context.ensureSerializable(doFn);
 
     PTuple sideInputValues = PTuple.empty();
-    for (PCollectionView<?, ?> view : sideInputs) {
+    for (PCollectionView<?> view : sideInputs) {
       sideInputValues = sideInputValues.and(
           view.getTagInternal(),
           context.getPCollectionView(view));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
index 2c5a1ba500bbd..1e682bbbc2949 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
@@ -272,7 +272,7 @@ public PipelineOptions getPipelineOptions() {
       }
 
       @Override
-      public <T> T sideInput(PCollectionView<T, ?> view) {
+      public <T> T sideInput(PCollectionView<T> view) {
         return context.sideInput(view);
       }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index f7ec7a6d40895..590d8eebd2051 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -90,17 +90,16 @@ public static <K, V> AsMultimap<K, V> asMap() {
    * <p> Instantiate via {@link View#asIterable}.
    */
   public static class AsIterable<T> extends PTransform<
-      PCollection<T>,
-      PCollectionView<Iterable<T>, Iterable<WindowedValue<T>>>> {
+      PCollection<T>, PCollectionView<Iterable<T>>> {
     private static final long serialVersionUID = 0;
 
     private AsIterable() { }
 
     @Override
-    public PCollectionView<Iterable<T>, Iterable<WindowedValue<T>>> apply(
+    public PCollectionView<Iterable<T>> apply(
         PCollection<T> input) {
       return input.apply(
-          new CreatePCollectionView<T, Iterable<T>, Iterable<WindowedValue<T>>>(
+          new CreatePCollectionView<T, Iterable<T>>(
               new IterablePCollectionView<T>(input.getPipeline())));
     }
   }
@@ -112,15 +111,15 @@ public PCollectionView<Iterable<T>, Iterable<WindowedValue<T>>> apply(
    * <p> Instantiate via {@link View#asIterable}.
    */
   public static class AsSingleton<T>
-      extends PTransform<PCollection<T>, PCollectionView<T, WindowedValue<T>>> {
+      extends PTransform<PCollection<T>, PCollectionView<T>> {
     private static final long serialVersionUID = 0;
 
     private AsSingleton() { }
 
     @Override
-    public PCollectionView<T, WindowedValue<T>> apply(PCollection<T> input) {
+    public PCollectionView<T> apply(PCollection<T> input) {
       return input.apply(
-          new CreatePCollectionView<T, T, WindowedValue<T>>(
+          new CreatePCollectionView<T, T>(
             new SingletonPCollectionView<T>(input.getPipeline())));
     }
 
@@ -133,7 +132,7 @@ public PCollectionView<T, WindowedValue<T>> apply(PCollection<T> input) {
    * <p> Instantiate via {@link View#asMap}.
    */
   public static class AsMultimap<K, V>
-      extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, Iterable<V>>, ?>> {
+      extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, Iterable<V>>>> {
     private static final long serialVersionUID = 0;
 
     private AsMultimap() { }
@@ -157,9 +156,9 @@ public <VO> AsSingletonMap<K, V, VO> withCombiner(CombineFn<V, ?, VO> combineFn)
     }
 
     @Override
-    public PCollectionView<Map<K, Iterable<V>>, ?> apply(PCollection<KV<K, V>> input) {
+    public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
       return input.apply(
-        new CreatePCollectionView<KV<K, V>, Map<K, Iterable<V>>, Object>(
+        new CreatePCollectionView<KV<K, V>, Map<K, Iterable<V>>>(
           new MultimapPCollectionView<K, V>(input.getPipeline())));
     }
   }
@@ -172,7 +171,7 @@ public <VO> AsSingletonMap<K, V, VO> withCombiner(CombineFn<V, ?, VO> combineFn)
    * <p> Instantiate via {@link View#asMap}.
    */
   public static class AsSingletonMap<K, VI, VO>
-      extends PTransform<PCollection<KV<K, VI>>, PCollectionView<Map<K, VO>, ?>> {
+      extends PTransform<PCollection<KV<K, VI>>, PCollectionView<Map<K, VO>>> {
     private static final long serialVersionUID = 0;
 
     private CombineFn<VI, ?, VO> combineFn;
@@ -182,7 +181,7 @@ private AsSingletonMap(CombineFn<VI, ?, VO> combineFn) {
     }
 
     @Override
-    public PCollectionView<Map<K, VO>, ?> apply(PCollection<KV<K, VI>> input) {
+    public PCollectionView<Map<K, VO>> apply(PCollection<KV<K, VI>> input) {
       // VI == VO if combineFn is null
       @SuppressWarnings("unchecked")
       PCollection<KV<K, VO>> combined =
@@ -190,7 +189,7 @@ private AsSingletonMap(CombineFn<VI, ?, VO> combineFn) {
         ? (PCollection) input
         : input.apply(Combine.perKey(combineFn.<K>asKeyedFn()));
       return combined.apply(
-        new CreatePCollectionView<KV<K, VO>, Map<K, VO>, Object>(
+        new CreatePCollectionView<KV<K, VO>, Map<K, VO>>(
           new MapPCollectionView<K, VO>(input.getPipeline())));
     }
   }
@@ -208,18 +207,18 @@ private AsSingletonMap(CombineFn<VI, ?, VO> combineFn) {
    * @param <WT> The type associated with a windowed side input from the
    * PCollectionView
    */
-  public static class CreatePCollectionView<R, T, WT>
-      extends PTransform<PCollection<R>, PCollectionView<T, WT>> {
+  public static class CreatePCollectionView<R, T>
+      extends PTransform<PCollection<R>, PCollectionView<T>> {
     private static final long serialVersionUID = 0;
 
-    private PCollectionView<T, WT> view;
+    private PCollectionView<T> view;
 
-    public CreatePCollectionView(PCollectionView<T, WT> view) {
+    public CreatePCollectionView(PCollectionView<T> view) {
       this.view = view;
     }
 
     @Override
-    public PCollectionView<T, WT> apply(PCollection<R> input) {
+    public PCollectionView<T> apply(PCollection<R> input) {
       return view;
     }
 
@@ -234,8 +233,8 @@ public void evaluate(
               evaluateTyped(transform, context);
             }
 
-            private <R, T, WT> void evaluateTyped(
-                CreatePCollectionView<R, T, WT> transform,
+            private <R, T> void evaluateTyped(
+                CreatePCollectionView<R, T> transform,
                 DirectPipelineRunner.EvaluationContext context) {
               List<WindowedValue<R>> elems =
                   context.getPCollectionWindowedValues(transform.getInput());
@@ -246,7 +245,7 @@ private <R, T, WT> void evaluateTyped(
   }
 
   private static class SingletonPCollectionView<T>
-      extends PCollectionViewBase<T, WindowedValue<T>> {
+      extends PCollectionViewBase<T> {
     private static final long serialVersionUID = 0;
 
     public SingletonPCollectionView(Pipeline pipeline) {
@@ -270,7 +269,7 @@ public T fromIterableInternal(Iterable<WindowedValue<?>> contents) {
   }
 
   private static class IterablePCollectionView<T>
-      extends PCollectionViewBase<Iterable<T>, Iterable<WindowedValue<T>>> {
+      extends PCollectionViewBase<Iterable<T>> {
     private static final long serialVersionUID = 0;
 
     public IterablePCollectionView(Pipeline pipeline) {
@@ -290,7 +289,7 @@ public T apply(WindowedValue<?> input) {
   }
 
   private static class MultimapPCollectionView<K, V>
-      extends PCollectionViewBase<Map<K, Iterable<V>>, Object> {
+      extends PCollectionViewBase<Map<K, Iterable<V>>> {
     private static final long serialVersionUID = 0;
 
     public MultimapPCollectionView(Pipeline pipeline) {
@@ -311,7 +310,7 @@ public Map<K, Iterable<V>> fromIterableInternal(Iterable<WindowedValue<?>> conte
   }
 
   private static class MapPCollectionView<K, V>
-      extends PCollectionViewBase<Map<K, V>, Object> {
+      extends PCollectionViewBase<Map<K, V>> {
     private static final long serialVersionUID = 0;
 
     public MapPCollectionView(Pipeline pipeline) {
@@ -332,9 +331,9 @@ public Map<K, V> fromIterableInternal(Iterable<WindowedValue<?>> contents) {
     }
   }
 
-  private abstract static class PCollectionViewBase<T, WT>
+  private abstract static class PCollectionViewBase<T>
       extends PValueBase
-      implements PCollectionView<T, WT> {
+      implements PCollectionView<T> {
     private static final long serialVersionUID = 0;
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
index 4a6fd1f2a9bb5..947a78cdcaf38 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
@@ -103,7 +103,7 @@ public PipelineOptions getPipelineOptions() {
 
   @Override
   @SuppressWarnings("unchecked")
-  public <T> T sideInput(PCollectionView<T, ?> view) {
+  public <T> T sideInput(PCollectionView<T> view) {
     TupleTag<?> tag = view.getTagInternal();
     if (!sideInputCache.containsKey(tag)) {
       if (!sideInputs.has(tag)) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
index 9470e76c5762c..0f4e37dc5be0d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
@@ -77,7 +77,7 @@ public KeyedState keyedState() {
   }
 
   @Override
-  public <T> T sideInput(PCollectionView<T, ?> view) {
+  public <T> T sideInput(PCollectionView<T> view) {
     return context.sideInput(view);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
index 35d1e6a433cce..43f99f0b3ac9b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
@@ -21,7 +21,7 @@
 import java.io.Serializable;
 
 /**
- * A {@code PCollectionView<T, WT>} is an immutable view of a
+ * A {@code PCollectionView<T>} is an immutable view of a
  * {@link PCollection} that can be accessed e.g. as a
  * side input to a {@link com.google.cloud.dataflow.sdk.transforms.DoFn}.
  *
@@ -32,9 +32,8 @@
  * view in a runner-specific manner.
  *
  * @param <T> the type of the value(s) accessible via this {@code PCollectionView}
- * @param <WT> the type of the windowed value(s) accessible via this {@code PCollectionView}
  */
-public interface PCollectionView<T, WT> extends PValue, Serializable {
+public interface PCollectionView<T> extends PValue, Serializable {
   /**
    * A unique identifier, for internal use.
    */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
index c33d677b2d41c..b26cf4874d6e6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
@@ -145,7 +145,7 @@ public static PCollection<Integer> createInts(Pipeline p,
     return p.apply(Create.of(values)).setCoder(BigEndianIntegerCoder.of());
   }
 
-  public static PCollectionView<Integer, ?>
+  public static PCollectionView<Integer>
       createSingletonInt(Pipeline p, Integer value) {
     PCollection<Integer> collection = p.apply(Create.of(value));
     return collection.apply(View.<Integer>asSingleton());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 8583f56f326ff..90f3564ba1d8e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -496,14 +496,14 @@ public void testBadWildcardRecursive() throws Exception {
   @Test
   public void testToSingletonTranslation() throws Exception {
     // A "change detector" test that makes sure the translation
-    // of getting a PCollectionView<T, ...> does not change
+    // of getting a PCollectionView<T> does not change
     // in bad ways during refactor
 
     DataflowPipelineOptions options = buildPipelineOptions();
     DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
 
     DataflowPipeline pipeline = DataflowPipeline.create(options);
-    PCollectionView<Integer, ?> view =  pipeline
+    PCollectionView<Integer> view =  pipeline
         .apply(Create.of(1))
         .apply(View.<Integer>asSingleton());
     Job job = translator.translate(pipeline, Collections.<DataflowPackage>emptyList());
@@ -522,14 +522,14 @@ public void testToSingletonTranslation() throws Exception {
   @Test
   public void testToIterableTranslation() throws Exception {
     // A "change detector" test that makes sure the translation
-    // of getting a PCollectionView<Iterable<T>, ...> does not change
+    // of getting a PCollectionView<Iterable<T>> does not change
     // in bad ways during refactor
 
     DataflowPipelineOptions options = buildPipelineOptions();
     DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
 
     DataflowPipeline pipeline = DataflowPipeline.create(options);
-    PCollectionView<Iterable<Integer>, ?> view =  pipeline
+    PCollectionView<Iterable<Integer>> view =  pipeline
         .apply(Create.of(1, 2, 3))
         .apply(View.<Integer>asIterable());
     Job job = translator.translate(pipeline, Collections.<DataflowPackage>emptyList());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 131ac14caf765..95ddc589ca193 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -89,13 +89,13 @@ static class TestDoFn extends DoFn<Integer, String> {
     enum State { UNSTARTED, STARTED, PROCESSING, FINISHED }
     State state = State.UNSTARTED;
 
-    final List<PCollectionView<Integer, ?>> sideInputViews = new ArrayList<>();
+    final List<PCollectionView<Integer>> sideInputViews = new ArrayList<>();
     final List<TupleTag<String>> sideOutputTupleTags = new ArrayList<>();
 
     public TestDoFn() {
     }
 
-    public TestDoFn(List<PCollectionView<Integer, ?>> sideInputViews,
+    public TestDoFn(List<PCollectionView<Integer>> sideInputViews,
                     List<TupleTag<String>> sideOutputTupleTags) {
       this.sideInputViews.addAll(sideInputViews);
       this.sideOutputTupleTags.addAll(sideOutputTupleTags);
@@ -127,7 +127,7 @@ public void finishBundle(Context c) {
     private void outputToAll(Context c, String value) {
       if (!sideInputViews.isEmpty()) {
         List<Integer> sideInputValues = new ArrayList<>();
-        for (PCollectionView<Integer, ?> sideInputView : sideInputViews) {
+        for (PCollectionView<Integer> sideInputView : sideInputViews) {
           sideInputValues.add(c.sideInput(sideInputView));
         }
         value += ": " + sideInputValues;
@@ -458,7 +458,7 @@ public void testParDoWithSideOutputs() {
         input
         .apply(ParDo
                .of(new TestDoFn(
-                   Arrays.<PCollectionView<Integer, ?>>asList(),
+                   Arrays.<PCollectionView<Integer>>asList(),
                    Arrays.asList(sideTag1, sideTag2, sideTag3)))
                .withOutputTags(
                    mainTag,
@@ -522,7 +522,7 @@ public void testParDoWritingToUndeclaredSideOutput() {
     PCollection<String> output =
         input
         .apply(ParDo.of(new TestDoFn(
-            Arrays.<PCollectionView<Integer, ?>>asList(),
+            Arrays.<PCollectionView<Integer>>asList(),
             Arrays.asList(sideTag))));
 
     DataflowAssert.that(output)
@@ -579,9 +579,9 @@ public void testParDoWithSideInputs() {
 
     PCollection<Integer> input = createInts(p, inputs);
 
-    PCollectionView<Integer, ?> sideInput1 = TestUtils.createSingletonInt(p, 11);
-    PCollectionView<Integer, ?> sideInputUnread = TestUtils.createSingletonInt(p, -3333);
-    PCollectionView<Integer, ?> sideInput2 = TestUtils.createSingletonInt(p, 222);
+    PCollectionView<Integer> sideInput1 = TestUtils.createSingletonInt(p, 11);
+    PCollectionView<Integer> sideInputUnread = TestUtils.createSingletonInt(p, -3333);
+    PCollectionView<Integer> sideInput2 = TestUtils.createSingletonInt(p, 222);
 
     PCollection<String> output =
         input
@@ -607,11 +607,11 @@ public void testParDoReadingFromUnknownSideInput() {
 
     PCollection<Integer> input = createInts(p, inputs);
 
-    PCollectionView<Integer, ?> sideView = TestUtils.createSingletonInt(p, 3);
+    PCollectionView<Integer> sideView = TestUtils.createSingletonInt(p, 3);
 
     input
         .apply(ParDo.of(new TestDoFn(
-            Arrays.<PCollectionView<Integer, ?>>asList(sideView),
+            Arrays.<PCollectionView<Integer>>asList(sideView),
             Arrays.<TupleTag<String>>asList())));
 
     try {
@@ -807,7 +807,7 @@ public void testParDoWithSideOutputsName() {
         .apply(ParDo
                .named("MyParDo")
                .of(new TestDoFn(
-                   Arrays.<PCollectionView<Integer, ?>>asList(),
+                   Arrays.<PCollectionView<Integer>>asList(),
                    Arrays.asList(sideTag1, sideTag2, sideTag3)))
                .withOutputTags(
                    mainTag,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index 852c73cbe4948..fd482beed4738 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -56,7 +56,7 @@ public class ViewTest implements Serializable {
   public void testSingletonSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<Integer, ?> view = pipeline
+    final PCollectionView<Integer> view = pipeline
         .apply(Create.of(47))
         .apply(View.<Integer>asSingleton());
 
@@ -80,7 +80,7 @@ public void processElement(ProcessContext c) {
   public void testEmptySingletonSideInput() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<Integer, ?> view = pipeline
+    final PCollectionView<Integer> view = pipeline
         .apply(Create.<Integer>of())
         .setCoder(VarIntCoder.of())
         .apply(View.<Integer>asSingleton());
@@ -108,7 +108,7 @@ public void processElement(ProcessContext c) {
   public void testNonSingletonSideInput() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<Integer, ?> view = pipeline
+    final PCollectionView<Integer> view = pipeline
         .apply(Create.<Integer>of(1, 2, 3))
         .apply(View.<Integer>asSingleton());
 
@@ -136,7 +136,7 @@ public void processElement(ProcessContext c) {
   public void testIterableSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<Iterable<Integer>, ?> view = pipeline
+    final PCollectionView<Iterable<Integer>> view = pipeline
         .apply(Create.of(11, 13, 17, 23))
         .apply(View.<Integer>asIterable());
 
@@ -164,7 +164,7 @@ public void processElement(ProcessContext c) {
   public void testMapSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<Map<String, Iterable<Integer>>, ?> view = pipeline
+    final PCollectionView<Map<String, Iterable<Integer>>> view = pipeline
         .apply(Create.of(KV.of("a", 1), KV.of("a", 2), KV.of("b", 3)))
         .apply(View.<String, Integer>asMap());
 
@@ -192,7 +192,7 @@ public void processElement(ProcessContext c) {
   public void testSingletonMapSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<Map<String, Integer>, ?> view = pipeline
+    final PCollectionView<Map<String, Integer>> view = pipeline
         .apply(Create.of(KV.of("a", 1), KV.of("b", 3)))
         .apply(View.<String, Integer>asMap().withSingletonValues());
 
@@ -217,7 +217,7 @@ public void processElement(ProcessContext c) {
   public void testCombinedMapSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<Map<String, Integer>, ?> view = pipeline
+    final PCollectionView<Map<String, Integer>> view = pipeline
         .apply(Create.of(KV.of("a", 1), KV.of("a", 20), KV.of("b", 3)))
         .apply(View.<String, Integer>asMap().withCombiner(new Sum.SumIntegerFn()));
 

From 6cb196335f0fc696aa25bdd40377baa614cc4eba Mon Sep 17 00:00:00 2001
From: jeremiele <jeremiele@google.com>
Date: Tue, 17 Mar 2015 09:49:56 -0700
Subject: [PATCH 0280/1541] Added support for nested context to the
 Proto2Coder. ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=88835941

---
 .../dataflow/sdk/coders/Proto2Coder.java      | 12 +++++++++--
 .../dataflow/sdk/coders/Proto2CoderTest.java  | 21 +++++++++++++++++++
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
index 7e19cce9eb44a..a16fa79341225 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
@@ -95,12 +95,20 @@ public Proto2Coder<T> withExtensionsFrom(Class<?>... extensionHosts) {
 
   @Override
   public void encode(T value, OutputStream outStream, Context context) throws IOException {
-    value.writeTo(outStream);
+    if (context.isWholeStream) {
+      value.writeTo(outStream);
+    } else {
+      value.writeDelimitedTo(outStream);
+    }
   }
 
   @Override
   public T decode(InputStream inStream, Context context) throws IOException {
-    return (T) getParser().parseFrom(inStream, getExtensionRegistry());
+    if (context.isWholeStream) {
+      return (T) getParser().parseFrom(inStream, getExtensionRegistry());
+    } else {
+      return (T) getParser().parseDelimitedFrom(inStream, getExtensionRegistry());
+    }
   }
 
   private Parser<T> getParser() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
index bd9535f1f11c0..bde7b31b22541 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageA;
 import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageB;
 import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageC;
+import com.google.common.collect.ImmutableList;
 
 import org.junit.Ignore;
 import org.junit.Test;
@@ -44,6 +45,26 @@ public void testCoderEncodeDecodeEqual() throws Exception {
     CoderProperties.coderDecodeEncodeEqual(Proto2Coder.of(MessageA.class), value);
   }
 
+  @Test
+  public void testCoderEncodeDecodeEqualNestedContext() throws Exception {
+    MessageA value1 = MessageA.newBuilder()
+        .setField1("hello")
+        .addField2(MessageB.newBuilder()
+            .setField1(true).build())
+        .addField2(MessageB.newBuilder()
+            .setField1(false).build())
+        .build();
+    MessageA value2 = MessageA.newBuilder()
+        .setField1("world")
+        .addField2(MessageB.newBuilder()
+            .setField1(false).build())
+        .addField2(MessageB.newBuilder()
+            .setField1(true).build())
+        .build();
+    CoderProperties.coderDecodeEncodeEqual(ListCoder.of(Proto2Coder.of(MessageA.class)),
+        ImmutableList.of(value1, value2));
+  }
+
   @Test
   public void testCoderEncodeDecodeExtensionsEqual() throws Exception {
     MessageC value = MessageC.newBuilder()

From 4bedd06e638a80498d585176f2885d563fbd23fc Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Wed, 18 Mar 2015 11:21:31 -0700
Subject: [PATCH 0281/1541] BundedSource: use GlobalWindows as the default in
 GroupAlsoByWindowsParDoFn, and test BoundedSource with GlobalWindows.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88950077
---
 .../worker/GroupAlsoByWindowsParDoFn.java     | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index 1c573701d9c77..20e2573189eb7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -27,6 +27,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
@@ -64,13 +65,17 @@ public static GroupAlsoByWindowsParDoFn create(
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler sampler /* unused */)
       throws Exception {
-    final Object windowFnObj =
-        SerializableUtils.deserializeFromByteArray(
-            getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
-            "serialized window fn");
-    if (!(windowFnObj instanceof WindowFn)) {
-      throw new Exception(
-          "unexpected kind of WindowFn: " + windowFnObj.getClass().getName());
+    final Object windowFnObj;
+    byte[] encodedWindowFn = getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN);
+    if (encodedWindowFn.length == 0) {
+      windowFnObj = new GlobalWindows();
+    } else {
+      windowFnObj =
+        SerializableUtils.deserializeFromByteArray(encodedWindowFn, "serialized window fn");
+      if (!(windowFnObj instanceof WindowFn)) {
+        throw new Exception(
+            "unexpected kind of WindowFn: " + windowFnObj.getClass().getName());
+      }
     }
     final WindowFn windowFn = (WindowFn) windowFnObj;
 

From 185109f5cfa49dc746c70cb3645bc415ed0cbaf5 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Wed, 18 Mar 2015 13:05:13 -0700
Subject: [PATCH 0282/1541] Implement the worker-side logic of conditionally
 doing the full merge in window-grouping combine.

In some cases (batch/global window) we want to do combining upstream of the shuffle, whereas in others (streaming/small windows) it's better to avoid it (the potential for combining is extremely limited and accumulators are generally more expensive to send and merge than input values).  We now allow for either option when instantiating the GroupAlsoByWindow DoFns on the worker.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88960200
---
 .../worker/GroupAlsoByWindowsParDoFn.java     | 47 +++++++++++++++++--
 1 file changed, 44 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index 20e2573189eb7..9c93d875fb6ac 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -41,7 +41,9 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+import com.google.common.collect.Iterables;
 
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
@@ -80,7 +82,7 @@ public static GroupAlsoByWindowsParDoFn create(
     final WindowFn windowFn = (WindowFn) windowFnObj;
 
     byte[] serializedCombineFn = getBytes(cloudUserFn, PropertyNames.COMBINE_FN, null);
-    final KeyedCombineFn combineFn;
+    KeyedCombineFn combineFn;
     if (serializedCombineFn != null) {
       Object combineFnObj =
           SerializableUtils.deserializeFromByteArray(serializedCombineFn, "serialized combine fn");
@@ -113,6 +115,45 @@ public static GroupAlsoByWindowsParDoFn create(
       isStreamingPipeline = ((StreamingOptions) options).isStreaming();
     }
 
+    boolean isMergingOnly = true;
+    final KeyedCombineFn maybeMergingCombineFn;
+    if (isMergingOnly && combineFn != null) {
+      class MergingKeyedCombineFn<K, VA> extends KeyedCombineFn<K, VA, List<VA>, VA> {
+        private static final long serialVersionUID = 0;
+        final KeyedCombineFn<K, ?, VA, ?> combineFn;
+        MergingKeyedCombineFn(KeyedCombineFn<K, ?, VA, ?> combineFn) {
+          this.combineFn = combineFn;
+        }
+        public List<VA> createAccumulator(K key) {
+          return new ArrayList<>();
+        }
+        public void addInput(K key, List<VA> accumulator, VA input) {
+          accumulator.add(input);
+          // TODO: Buffer more once we have compaction operation.
+          if (accumulator.size() > 1) {
+            VA all = combineFn.mergeAccumulators(key, accumulator);
+            accumulator.clear();
+            accumulator.add(all);
+          }
+        }
+        public List<VA> mergeAccumulators(K key, Iterable<List<VA>> accumulators) {
+          List<VA> singleton = new ArrayList<>();
+          singleton.add(combineFn.mergeAccumulators(key, Iterables.concat(accumulators)));
+          return singleton;
+        }
+        public VA extractOutput(K key, List<VA> accumulator) {
+          if (accumulator.size() == 0) {
+            return combineFn.createAccumulator(key);
+          } else {
+            return combineFn.mergeAccumulators(key, accumulator);
+          }
+        }
+      };
+      maybeMergingCombineFn = new MergingKeyedCombineFn(combineFn);
+    } else {
+      maybeMergingCombineFn = combineFn;
+    }
+
     DoFnInfoFactory fnFactory;
     if (isStreamingPipeline) {
       fnFactory = new DoFnInfoFactory() {
@@ -121,7 +162,7 @@ public DoFnInfo createDoFnInfo() {
           return new DoFnInfo(
               StreamingGroupAlsoByWindowsDoFn.create(
                   windowFn,
-                  combineFn,
+                  maybeMergingCombineFn,
                   kvCoder.getKeyCoder(),
                   kvCoder.getValueCoder()),
               null);
@@ -138,7 +179,7 @@ public DoFnInfo createDoFnInfo() {
           return new DoFnInfo(
               GroupAlsoByWindowsDoFn.create(
                   windowFn,
-                  combineFn,
+                  maybeMergingCombineFn,
                   kvCoder.getKeyCoder(),
                   kvCoder.getValueCoder()),
               null);

From adb4bcd770f46531ec9161f64151709dfc100357 Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Wed, 18 Mar 2015 14:32:11 -0700
Subject: [PATCH 0283/1541] When reading from source with gzip
 Content-Encoding, always request the full file. ----Release Notes---- []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=88968884

---
 .../gcsio/GoogleCloudStorageReadChannel.java  | 142 +++++++++++++-----
 1 file changed, 107 insertions(+), 35 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
index a67d18205873d..9c3c4a34ee35a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
@@ -24,6 +24,7 @@
 import com.google.api.client.util.Preconditions;
 import com.google.api.client.util.Sleeper;
 import com.google.api.services.storage.Storage;
+import com.google.api.services.storage.model.StorageObject;
 import com.google.cloud.dataflow.sdk.util.ApiErrorExtractor;
 
 import org.slf4j.Logger;
@@ -38,6 +39,7 @@
 import java.nio.channels.ReadableByteChannel;
 import java.nio.channels.SeekableByteChannel;
 import java.util.regex.Pattern;
+
 import javax.net.ssl.SSLException;
 
 /**
@@ -78,7 +80,6 @@ public class GoogleCloudStorageReadChannel implements SeekableByteChannel {
 
   // Size of the object being read.
   private long size = -1;
-  private boolean isCompressedStream;
 
   // Maximum number of automatic retries when reading from the underlying channel without making
   // progress; each time at least one byte is successfully read, the counter of attempted retries
@@ -125,6 +126,13 @@ public class GoogleCloudStorageReadChannel implements SeekableByteChannel {
   // accounting for the randomization of backoff intervals.
   public static final int DEFAULT_BACKOFF_MAX_ELAPSED_TIME_MILLIS = 2 * 60 * 1000;
 
+  // For files that have Content-Encoding: gzip set in the file metadata, the size of the response
+  // from GCS is the size of the compressed file. However, the HTTP client wraps the content
+  // in a GZIPInputStream, so the number of bytes that can be read from the stream may be greater
+  // than the size of the response. In this case, we allow the position in the stream to be greater
+  // than size when the position is validated.
+  private FileEncoding fileEncoding = FileEncoding.UNINITIALIZED;
+
   // ClientRequestHelper to be used instead of calling final methods in client requests.
   private static ClientRequestHelper clientRequestHelper = new ClientRequestHelper();
 
@@ -261,7 +269,7 @@ public int read(ByteBuffer buffer)
           // bytes read against the stream size. Unfortunately we don't have information about the
           // actual size of the data stream when stream compression is used, so we can only ignore
           // this case here.
-          checkIOPrecondition(isCompressedStream || currentPosition == size,
+          checkIOPrecondition(fileEncoding == FileEncoding.GZIPPED || currentPosition == size,
               String.format(
                   "Received end of stream result before all the file data has been received; "
                   + "totalBytesRead: %s, currentPosition: %s, size: %s",
@@ -362,7 +370,7 @@ public int read(ByteBuffer buffer)
       // Check that we didn't get a premature End of Stream signal by checking the number of bytes
       // read against the stream size. Unfortunately we don't have information about the actual size
       // of the data stream when stream compression is used, so we can only ignore this case here.
-      checkIOPrecondition(isCompressedStream || currentPosition == size,
+      checkIOPrecondition(fileEncoding == FileEncoding.GZIPPED || currentPosition == size,
           String.format("Failed to read any data before all the file data has been received; "
               + "currentPosition: %s, size: %s", currentPosition, size));
       return -1;
@@ -480,11 +488,13 @@ protected void validatePosition(long newPosition) {
     // this channel has been computed by a prior call. This means that position could be
     // potentially set to an invalid value (>= size) by position(long). However, that error
     // gets caught during lazy seek.
-    if ((size >= 0) && (newPosition >= size)) {
-      throw new IllegalArgumentException(
-          String.format(
-              "Invalid seek offset: position value (%d) must be between 0 and %d",
-              newPosition, size));
+    // If a file is gzip encoded, the size of the response may be less than the number of bytes
+    // that can be read. In this case, the new position may be a valid offset, and we proceed.
+    // If not, then the size of the response is the number of bytes that can be read and we throw
+    // an exception for an invalid seek.
+    if ((size >= 0) && (newPosition >= size) && (fileEncoding != FileEncoding.GZIPPED)) {
+      throw new IllegalArgumentException(String.format(
+          "Invalid seek offset: position value (%d) must be between 0 and %d", newPosition, size));
     }
   }
 
@@ -509,26 +519,98 @@ private void performLazySeek()
       readChannel.close();
     }
 
-    InputStream objectContentStream = openStreamAndSetSize(currentPosition);
+    InputStream objectContentStream = openStreamAndSetMetadata(currentPosition);
     readChannel = Channels.newChannel(objectContentStream);
     lazySeekPending = false;
   }
 
   /**
-   * Opens the underlying stream, sets its position to the given value and sets size based on
-   * stream content size.
+   * Retrieve the object's metadata from GCS.
+   *
+   * @throws IOException on IO error.
+   */
+  protected StorageObject getMetadata() throws IOException {
+    Storage.Objects.Get getObject = gcs.objects().get(bucketName, objectName);
+    try {
+      StorageObject response = getObject.execute();
+      return response;
+    } catch (IOException e) {
+      if (errorExtractor.itemNotFound(e)) {
+        throw GoogleCloudStorageExceptions.getFileNotFoundException(bucketName, objectName);
+      }
+      String msg =
+          "Error reading " + StorageResourceId.createReadableString(bucketName, objectName);
+      throw new IOException(msg, e);
+    }
+  }
+
+  /**
+   * Returns the FileEncoding of a file given its metadata. Currently supports GZIPPED and OTHER.
+   *
+   * @param metadata the object's metadata.
+   * @return FileEncoding.GZIPPED if the response from GCS will have gzip encoding or
+   *         FileEncoding.OTHER otherwise.
+   */
+  protected FileEncoding getEncoding(StorageObject metadata) {
+    String contentEncoding = metadata.getContentEncoding();
+    return contentEncoding != null && contentEncoding.contains("gzip") ? FileEncoding.GZIPPED
+        : FileEncoding.OTHER;
+  }
+
+  /**
+   * Set the size of the content.
+   *
+   * <p>First, we look at the object's metadata.  If no value exists, then we
+   * examine the Content-Length header.  If neither exists, we then look for and parse the
+   * Content-Range header. If there is no way to determine the content length, an exception
+   * is thrown. If the Content-Length header is present, then the offset is added to this
+   * value (i.e., offset is the number of bytes that were not requested).
+   *
+   * @param response response to parse.
+   * @param offset the number of bytes that were not requested.
+   * @throws IOException on IO error.
+   */
+  protected void setSize(StorageObject metadata, HttpResponse response, long offset)
+      throws IOException {
+    String contentRange = response.getHeaders().getContentRange();
+    if (metadata.getSize() != null) {
+      size = metadata.getSize().longValue();
+    } else if (response.getHeaders().getContentLength() != null) {
+      size = response.getHeaders().getContentLength() + offset;
+    } else if (contentRange != null) {
+      String sizeStr = SLASH.split(contentRange)[1];
+      try {
+        size = Long.parseLong(sizeStr);
+      } catch (NumberFormatException e) {
+        throw new IOException(
+            "Could not determine size from response from Content-Range: " + contentRange, e);
+      }
+    } else {
+      throw new IOException("Could not determine size of response");
+    }
+  }
+
+  /**
+   * Opens the underlying stream, sets its position to the given value and initializes the object's
+   * metadata (size and encoding).
+   *
+   * <p>If the file encoding in GCS is gzip (and therefore the HTTP client will attempt to
+   * decompress it), the entire file is always requested and we seek to the position requested. If
+   * the file encoding is not gzip, only the remaining bytes to be read are requested from GCS.
    *
    * @param newPosition position to seek into the new stream.
    * @throws IOException on IO error
    */
-  protected InputStream openStreamAndSetSize(long newPosition)
+  protected InputStream openStreamAndSetMetadata(long newPosition)
       throws IOException {
+    StorageObject metadata = getMetadata();
+    fileEncoding = getEncoding(metadata);
     validatePosition(newPosition);
     Storage.Objects.Get getObject = gcs.objects().get(bucketName, objectName);
     // Set the range on the existing request headers which may have been initialized with things
-    // like user-agent already.
-    clientRequestHelper.getRequestHeaders(getObject)
-        .setRange(String.format("bytes=%d-", newPosition));
+    // like user-agent already. If the file is gzip encoded, request the entire file.
+    clientRequestHelper.getRequestHeaders(getObject).setRange(
+        String.format("bytes=%d-", fileEncoding == FileEncoding.GZIPPED ? 0 : newPosition));
     HttpResponse response;
     try {
       response = getObject.executeMedia();
@@ -552,27 +634,13 @@ protected InputStream openStreamAndSetSize(long newPosition)
       }
     }
 
-    // If the content is compressed, content length reported in the header is counting the number of
-    // compressed bytes. That means that we cannot rely on the reported content length to check that
-    // we have received all the data from the data stream.
-    String contentEncoding = response.getContentEncoding();
-    isCompressedStream = (contentEncoding != null && contentEncoding.contains("gzip"));
+    InputStream content = response.getContent();
+    // If the file is gzip encoded, we requested the entire file and need to seek in the content
+    // to the desired position.  If it is not, we only requested the bytes we haven't read.
+    setSize(metadata, response, fileEncoding == FileEncoding.GZIPPED ? 0 : newPosition);
+    content.skip(fileEncoding == FileEncoding.GZIPPED ? newPosition : 0);
 
-    String contentRange = response.getHeaders().getContentRange();
-    if (response.getHeaders().getContentLength() != null) {
-      size = response.getHeaders().getContentLength() + newPosition;
-    } else if (contentRange != null) {
-      String sizeStr = SLASH.split(contentRange)[1];
-      try {
-        size = Long.parseLong(sizeStr);
-      } catch (NumberFormatException e) {
-        throw new IOException(
-            "Could not determine size from response from Content-Range: " + contentRange, e);
-      }
-    } else {
-      throw new IOException("Could not determine size of response");
-    }
-    return response.getContent();
+    return content;
   }
 
   /**
@@ -597,4 +665,8 @@ private void checkIOPrecondition(boolean precondition, String errorMessage) thro
       throw new IOException(errorMessage);
     }
   }
+
+  private static enum FileEncoding {
+    UNINITIALIZED, GZIPPED, OTHER;
+  }
 }

From a310df95c3ca2aebd14602f3a6f7c19f10cc0e24 Mon Sep 17 00:00:00 2001
From: malo <malo@google.com>
Date: Wed, 18 Mar 2015 15:28:09 -0700
Subject: [PATCH 0284/1541] Set the lease of a newly requested workitem to be 3
 minutes. ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=88974496

---
 .../dataflow/sdk/runners/worker/DataflowWorkerHarness.java    | 4 +++-
 .../dataflow/sdk/util/common/worker/WorkProgressUpdater.java  | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index 106b4ec151224..4510206ef8202 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -39,6 +39,7 @@
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.cloud.dataflow.sdk.util.common.worker.WorkProgressUpdater;
 import com.google.common.collect.ImmutableList;
 
 import org.joda.time.DateTime;
@@ -235,7 +236,8 @@ public WorkItem getWorkItem() throws IOException {
       // This shouldn't be necessary, but a valid cloud duration string is
       // required by the Google API parsing framework.  TODO: Fix the framework
       // so that an empty or not-present string can be used as a default value.
-      request.setRequestedLeaseDuration(toCloudDuration(Duration.standardSeconds(60)));
+      request.setRequestedLeaseDuration(
+          toCloudDuration(Duration.millis(WorkProgressUpdater.DEFAULT_LEASE_DURATION_MILLIS)));
 
       LOG.debug("Leasing work: {}", request);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
index b6dce4bf0c6a1..c654eabc13fbd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
@@ -46,7 +46,7 @@ public abstract class WorkProgressUpdater {
   private static final Logger LOG = LoggerFactory.getLogger(WorkProgressUpdater.class);
 
   /** The default lease duration to request from the external worker service. */
-  private static final long DEFAULT_LEASE_DURATION_MILLIS = 3 * 60 * 1000;
+  public static final long DEFAULT_LEASE_DURATION_MILLIS = 3 * 60 * 1000;
 
   /** The lease renewal RPC latency margin. */
   private static final long LEASE_RENEWAL_LATENCY_MARGIN =

From fb62173cec51fe89a5d88c51ec39bbf86ee5ec4d Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Wed, 18 Mar 2015 16:03:42 -0700
Subject: [PATCH 0285/1541] Minor improvements related to custom sources: *
 Defines toString() on existing subclasses of Source. * Uses that when reading
 from a Source fails. * Uses that when splitting a Source - the original
 source and the produced sources are validate()'d. * Adds serialVersionUID to
 all current Source subclasses to make it possible to deserialize them from
 the API objects when doing manual post-mortem debugging. * Cosmetic
 improvements / typos.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88977786
---
 .../sdk/io/ByteOffsetBasedSource.java         |  24 ++-
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |  46 ++---
 .../dataflow/sdk/io/FileBasedSource.java      |  47 +++--
 .../google/cloud/dataflow/sdk/io/Source.java  |   8 +-
 .../BasicSerializableSourceFormat.java        |  54 ++++--
 .../cloud/dataflow/sdk/util/ReaderUtils.java  |   2 +-
 .../BasicSerializableSourceFormatTest.java    | 161 +++++++++++++++++-
 7 files changed, 286 insertions(+), 56 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
index cde606063293e..7b8167ecd5eb8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
@@ -46,6 +46,7 @@
  */
 public abstract class ByteOffsetBasedSource<T> extends Source<T> {
   private static final long serialVersionUID = 0;
+
   private final long startOffset;
   private final long endOffset;
   private final long minShardSize;
@@ -65,12 +66,6 @@ public ByteOffsetBasedSource(long startOffset, long endOffset, long minShardSize
     this.startOffset = startOffset;
     this.endOffset = endOffset;
     this.minShardSize = minShardSize;
-    Preconditions.checkArgument(startOffset >= 0,
-        "Start offset has value " + startOffset + ", must be non-negative");
-    Preconditions.checkArgument(endOffset >= 0,
-        "End offset has value " + endOffset + ", must be non-negative");
-    Preconditions.checkArgument(minShardSize >= 0,
-        "minShardSize has value " + minShardSize + ", must be non-negative");
   }
 
   /**
@@ -125,6 +120,23 @@ public List<? extends ByteOffsetBasedSource<T>> splitIntoShards(long desiredShar
     return subSources;
   }
 
+  @Override
+  public void validate() {
+    Preconditions.checkArgument(this.startOffset >= 0,
+        "Start offset has value " + this.startOffset + ", must be non-negative");
+    Preconditions.checkArgument(this.endOffset >= 0,
+        "End offset has value " + this.endOffset + ", must be non-negative");
+    Preconditions.checkArgument(this.startOffset < this.endOffset,
+        "Start offset " + this.startOffset + " must be before end offset " + this.endOffset);
+    Preconditions.checkArgument(this.minShardSize >= 0,
+        "minShardSize has value " + this.minShardSize + ", must be non-negative");
+  }
+
+  @Override
+  public String toString() {
+    return "[" + startOffset + ", " + endOffset + ")";
+  }
+
   /**
    * Returns the exact ending offset of the current source. This will be used if the source was
    * constructed with an endOffset value {@code Long.MAX_VALUE}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 64ca240d66823..bb395ae2407fd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -58,6 +58,7 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.NoSuchElementException;
 
@@ -172,8 +173,9 @@ public static ReadSource.Bound<Entity> readFrom(String host, String datasetId, Q
   /**
    * A source that reads the result rows of a Datastore query as {@code Entity} objects.
    */
-  @SuppressWarnings("serial")
   public static class Source extends com.google.cloud.dataflow.sdk.io.Source<Entity> {
+    private static final long serialVersionUID = -6078498627204891522L;
+
     String host;
     String datasetId;
     Query query;
@@ -217,7 +219,7 @@ public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
       Datastore datastore = getDatastore(options);
       if (query.getKindCount() != 1) {
         throw new UnsupportedOperationException(
-            "Can only estimate size for queries specifying exactly 1 kind");
+            "Can only estimate size for queries specifying exactly 1 kind.");
       }
       String ourKind = query.getKind(0).getName();
       long latestTimestamp = queryLatestStatisticsTimestamp(datastore);
@@ -231,7 +233,7 @@ public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
 
       long now = System.currentTimeMillis();
       DatastoreV1.RunQueryResponse response = datastore.runQuery(request);
-      LOG.info("Query for per-kind statistics took " + (System.currentTimeMillis() - now) + "ms");
+      LOG.info("Query for per-kind statistics took {}ms", System.currentTimeMillis() - now);
 
       DatastoreV1.QueryResultBatch batch = response.getBatch();
       if (batch.getEntityResultCount() == 0) {
@@ -256,8 +258,8 @@ private long queryLatestStatisticsTimestamp(Datastore datastore) throws Datastor
 
       long now = System.currentTimeMillis();
       DatastoreV1.RunQueryResponse response = datastore.runQuery(request);
-      LOG.info("Query for latest stats timestamp of dataset " + datasetId + " took "
-          + (System.currentTimeMillis() - now) + "ms");
+      LOG.info("Query for latest stats timestamp of dataset {} took {}ms",
+          datasetId, System.currentTimeMillis() - now);
       DatastoreV1.QueryResultBatch batch = response.getBatch();
       if (batch.getEntityResultCount() == 0) {
         throw new NoSuchElementException(
@@ -340,6 +342,21 @@ public Source withMockEstimateSizeBytes(Supplier<Long> estimateSizeBytes) {
       res.mockEstimateSizeBytes = estimateSizeBytes;
       return res;
     }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder();
+      sb.append("Datastore: ");
+      sb.append("host ").append((host == null) ? "null" : host);
+      sb.append("; dataset ").append((datasetId == null) ? "null" : datasetId);
+      sb.append("; query: ");
+      if (query == null) {
+        sb.append("null");
+      } else {
+        sb.append("\n").append(query.toString());
+      }
+      return sb.toString();
+    }
   }
 
   ///////////////////// Write Class /////////////////////////////////
@@ -421,7 +438,7 @@ public Sink withHost(String host) {
     @Override
     public PDone apply(PCollection<Entity> input) {
       if (this.host == null || this.datasetId == null) {
-        throw new IllegalStateException("need to set Datastore host and dataasetId"
+        throw new IllegalStateException("need to set Datastore host and datasetId"
             + "of a DatastoreIO.Write transform");
       }
 
@@ -515,25 +532,14 @@ private static void writeBatch(List<Entity> listOfEntities, Datastore datastore)
       creq.getMutationBuilder().addAllInsertAutoId(listOfEntities);
       datastore.commit(creq.build());
     } catch (DatastoreException e) {
-      LOG.warn("Error while doing datastore operation: {}", e);
       throw new RuntimeException("Datastore exception", e);
     }
   }
 
   /**
-   * An iterator over the records from a query of the datastore.
-   *
-   * <p> Usage:
-   * <pre>{@code
-   *   DatastoreIterator iterator = new DatastoreIterator(query, datastore);
-   *   while (iterator.advance()) {
-   *     Entity e = iterator.getCurrent();
-   *     ...
-   *   }
-   * }</pre>
+   * A reader over the records from a query of the datastore.
    */
-  public static class DatastoreReader
-      implements Source.Reader<Entity> {
+  public static class DatastoreReader implements Source.Reader<Entity> {
     /**
      * Query to select records.
      */
@@ -620,7 +626,7 @@ public void close() throws IOException {
      * and updates the cursor to get the next batch as needed.
      * Query has specified limit and offset from InputSplit.
      */
-    private java.util.Iterator getIteratorAndMoveCursor()
+    private Iterator<DatastoreV1.EntityResult> getIteratorAndMoveCursor()
         throws DatastoreException {
       if (currentBatch != null && currentBatch.hasEndCursor()) {
         query.setStartCursor(currentBatch.getEndCursor());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index 3b89fe7e39c35..73b709bd041a7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -94,14 +94,6 @@ public FileBasedSource(boolean isFilePattern, String fileOrPatternSpec, long min
     } else {
       mode = Mode.SUBRANGE_OF_SINGLE_FILE;
     }
-    if (mode == Mode.FILEPATTERN || mode == Mode.FULL_SINGLE_FILE) {
-      Preconditions.checkArgument(startOffset == 0,
-          "FileBasedSource is based on a file pattern or a full single file but the starting offset"
-          + " proposed " + startOffset + " is not zero");
-      Preconditions.checkArgument(startOffset == 0,
-          "FileBasedSource is based on a file pattern or a full single file but the starting offset"
-          + " proposed " + endOffset + " is not Long.MAX_VALUE");
-    }
     this.fileOrPatternSpec = fileOrPatternSpec;
   }
 
@@ -237,6 +229,41 @@ protected final Reader<T> createBasicReader(PipelineOptions options, Coder<T> co
     }
   }
 
+  @Override
+  public String toString() {
+    switch (mode) {
+      case FILEPATTERN:
+        return fileOrPatternSpec;
+      case FULL_SINGLE_FILE:
+        return fileOrPatternSpec;
+      case SUBRANGE_OF_SINGLE_FILE:
+        return fileOrPatternSpec + " range " + super.toString();
+      default:
+        throw new IllegalStateException("Unexpected mode: " + mode);
+    }
+  }
+
+  @Override
+  public void validate() {
+    super.validate();
+    switch (mode) {
+      case FILEPATTERN:
+      case FULL_SINGLE_FILE:
+        Preconditions.checkArgument(getStartOffset() == 0,
+            "FileBasedSource is based on a file pattern or a full single file "
+            + "but the starting offset proposed " + getStartOffset() + " is not zero");
+        Preconditions.checkArgument(getEndOffset() == Long.MAX_VALUE,
+            "FileBasedSource is based on a file pattern or a full single file "
+            + "but the ending offset proposed " + getEndOffset() + " is not Long.MAX_VALUE");
+        break;
+      case SUBRANGE_OF_SINGLE_FILE:
+        // Nothing more to validate.
+        break;
+      default:
+        throw new IllegalStateException("Unknown mode: " + mode);
+    }
+  }
+
   @Override
   public final long getMaxEndOffset(PipelineOptions options) throws Exception {
     if (mode == Mode.FILEPATTERN) {
@@ -453,8 +480,8 @@ public void close() throws IOException {
 
     /**
      * Reads the next record from the channel provided by {@link #startReading}. Methods
-     * {@link #getCurrent}, {@link #getCurrentOffset}, and {@link #isSplitPoint} should return the
-     * corresponding information about the record read by the last invocation of this method.
+     * {@link #getCurrent}, {@link #getCurrentOffset}, and {@link #isAtSplitPoint()} should return
+     * the corresponding information about the record read by the last invocation of this method.
      *
      * @return {@code true} if a record was successfully read, {@code false} if the end of the
      *         channel was reached before successfully reading a new record.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
index 6ddf2a6c8642a..907abf51a2999 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
@@ -47,6 +47,9 @@
  * non-transient instance variable state will be serialized in the main program
  * and then deserialized on remote worker machines.
  *
+ * <p> {@code Source} objects should implement {@link Object#toString}, as it will be
+ * used in important error and debugging messages.
+ *
  * <p> This API is experimental and subject to change.
  *
  * @param <T> Type of elements read by the source.
@@ -98,8 +101,9 @@ protected Reader<T> createBasicReader(PipelineOptions options, Coder<T> coder,
   }
 
   /**
-   * Checks that this source is valid, before it can be used into a pipeline.
-   * It is recommended to use {@link com.google.common.base.Preconditions} for implementing
+   * Checks that this source is valid, before it can be used in a pipeline.
+   *
+   * <p>It is recommended to use {@link com.google.common.base.Preconditions} for implementing
    * this method.
    */
   public abstract void validate();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index 04e572ee80f33..69c96aeded553 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -99,15 +99,16 @@ public OperationResponse performSourceOperation(OperationRequest request) throws
   /**
    * Factory method allowing this class to satisfy the implicit contract of {@code SourceFactory}.
    */
-  @SuppressWarnings("unchecked")
   public static <T> com.google.cloud.dataflow.sdk.util.common.worker.Reader create(
       final PipelineOptions options, CloudObject spec, final Coder<WindowedValue<T>> coder,
       final ExecutionContext executionContext) throws Exception {
+    @SuppressWarnings("unchecked")
     final Source<T> source = (Source<T>) deserializeFromCloudSource(spec);
     return new com.google.cloud.dataflow.sdk.util.common.worker.Reader() {
       @Override
       public ReaderIterator iterator() throws IOException {
-        return new BasicSerializableSourceFormat.ReaderIterator<>(
+        return new BasicSerializableSourceFormat.ReaderIterator<T>(
+            source,
             source.createWindowedReader(options, coder, executionContext));
       }
     };
@@ -115,6 +116,7 @@ public ReaderIterator iterator() throws IOException {
 
   private SourceSplitResponse performSplit(SourceSplitRequest request) throws Exception {
     Source<?> source = deserializeFromCloudSource(request.getSource().getSpec());
+    LOG.debug("Splitting source: {}", source);
 
     // Produce simple independent, unsplittable shards with no metadata attached.
     SourceSplitResponse response = new SourceSplitResponse();
@@ -125,7 +127,17 @@ private SourceSplitResponse performSplit(SourceSplitRequest request) throws Exce
     if (desiredShardSizeBytes == null) {
       desiredShardSizeBytes = DEFAULT_DESIRED_SHARD_SIZE_BYTES;
     }
-    for (Source split : source.splitIntoShards(desiredShardSizeBytes, options)) {
+    List<? extends Source<?>> shards = source.splitIntoShards(desiredShardSizeBytes, options);
+    LOG.debug("Splitting produced {} shards", shards.size());
+    for (Source<?> split : shards) {
+      try {
+        split.validate();
+      } catch  (Exception e) {
+        throw new IllegalArgumentException(
+            "Splitting a valid source produced an invalid shard. "
+            + "\nOriginal source: " + source
+            + "\nInvalid shard: " + split, e);
+      }
       SourceSplitShard shard = new SourceSplitShard();
 
       com.google.api.services.dataflow.model.Source cloudSource =
@@ -151,9 +163,16 @@ private SourceGetMetadataResponse performGetMetadata(SourceGetMetadataRequest re
     return response;
   }
 
-  private static Source<?> deserializeFromCloudSource(Map<String, Object> spec) throws Exception {
-    return (Source<?>) deserializeFromByteArray(
+  public static Source<?> deserializeFromCloudSource(Map<String, Object> spec) throws Exception {
+    Source<?> source = (Source<?>) deserializeFromByteArray(
         Base64.decodeBase64(getString(spec, SERIALIZED_SOURCE)), "Source");
+    try {
+      source.validate();
+    } catch (Exception e) {
+      LOG.error("Invalid source: " + source, e);
+      throw e;
+    }
+    return source;
   }
 
   static com.google.api.services.dataflow.model.Source serializeToCloudSource(
@@ -222,14 +241,15 @@ public static <T> void translateReadHelper(
    *
    * TODO: Consider changing the API of Reader.ReaderIterator so this adapter wouldn't be needed.
    */
-  private static class ReaderIterator<T>
-      implements com.google.cloud.dataflow.sdk.util.common.worker.Reader.ReaderIterator {
-    private Source.Reader<T> reader;
+  private static class ReaderIterator<T> implements Reader.ReaderIterator<WindowedValue<T>> {
+    private final Source<T> source;
+    private Source.Reader<WindowedValue<T>> reader;
     private boolean hasNext;
-    private T next;
+    private WindowedValue<T> next;
     private boolean advanced;
 
-    private ReaderIterator(Source.Reader<T> reader) {
+    private ReaderIterator(Source<T> source, Source.Reader<WindowedValue<T>> reader) {
+      this.source = source;
       this.reader = reader;
     }
 
@@ -242,11 +262,11 @@ public boolean hasNext() throws IOException {
     }
 
     @Override
-    public T next() throws IOException {
+    public WindowedValue<T> next() throws IOException {
       if (!hasNext()) {
         throw new NoSuchElementException();
       }
-      T res = this.next;
+      WindowedValue<T> res = this.next;
       advanceInternal();
       return res;
     }
@@ -254,7 +274,12 @@ public T next() throws IOException {
     private void advanceInternal() throws IOException {
       try {
         if (!advanced) {
-          hasNext = reader.start();
+          try {
+            hasNext = reader.start();
+          } catch (Exception e) {
+            throw new IOException(
+                "Failed to start reading from source: " + source, e);
+          }
         } else {
           hasNext = reader.advance();
         }
@@ -268,8 +293,7 @@ private void advanceInternal() throws IOException {
     }
 
     @Override
-    public com.google.cloud.dataflow.sdk.util.common.worker.Reader.ReaderIterator copy()
-        throws IOException {
+    public Reader.ReaderIterator<WindowedValue<T>> copy() throws IOException {
       throw new UnsupportedOperationException();
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReaderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReaderUtils.java
index 7a730c4eb3bcf..edbd2994f6fed 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReaderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReaderUtils.java
@@ -37,7 +37,7 @@ public static <T> List<T> readElemsFromReader(Reader<T> reader) {
         elems.add(it.next());
       }
     } catch (IOException e) {
-      throw new RuntimeException("Failed to read from source: " + reader, e);
+      throw new RuntimeException("Failed to read from reader: " + reader, e);
     }
     return elems;
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index f8626c47885df..2941e3c522052 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -21,10 +21,15 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceOperationResponseToCloudSourceOperationResponse;
 import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
 import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
+import static com.google.common.base.Throwables.getStackTraceAsString;
 import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.allOf;
 import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.containsString;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.junit.internal.matchers.ThrowableMessageMatcher.hasMessage;
 
 import com.google.api.services.dataflow.model.DataflowPackage;
 import com.google.api.services.dataflow.model.Job;
@@ -48,15 +53,20 @@
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
+import com.google.common.base.Preconditions;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
+import java.util.NoSuchElementException;
 
 import javax.annotation.Nullable;
 
@@ -65,6 +75,9 @@
  */
 @RunWith(JUnit4.class)
 public class BasicSerializableSourceFormatTest {
+  @Rule
+  public ExpectedException expectedException = ExpectedException.none();
+
   static class TestIO {
     public static Read fromRange(int from, int to) {
       return new Read(from, to);
@@ -112,6 +125,11 @@ public Reader<Integer> createBasicReader(PipelineOptions options, Coder<Integer>
       @Override
       public void validate() {}
 
+      @Override
+      public String toString() {
+        return "[" + from + ", " + to + ")";
+      }
+
       @Override
       public Coder<Integer> getDefaultOutputCoder() {
         return BigEndianIntegerCoder.of();
@@ -179,8 +197,147 @@ public void testSplitAndReadShardsBack() throws Exception {
     }
   }
 
+  /**
+   * A source that cannot do anything. Intended to be overridden for testing of individual methods.
+   */
+  private static class MockSource extends Source<Integer> {
+    private static final long serialVersionUID = -5041539913488064889L;
+
+    @Override
+    public List<? extends Source<Integer>> splitIntoShards(
+        long desiredShardSizeBytes, PipelineOptions options) throws Exception {
+      return Arrays.asList(this);
+    }
+
+    @Override
+    public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
+      return 0L;
+    }
+
+    @Override
+    public boolean producesSortedKeys(PipelineOptions options) throws Exception {
+      return false;
+    }
+
+    @Override
+    public void validate() { }
+
+    @Override
+    public String toString() {
+      return "<unknown>";
+    }
+
+    @Override
+    public Coder<Integer> getDefaultOutputCoder() {
+      return BigEndianIntegerCoder.of();
+    }
+  }
+
+  private static class SourceProducingInvalidSplits extends MockSource {
+    private static final long serialVersionUID = -1731497848893255523L;
+
+    private String description;
+    private String errorMessage;
+
+    private SourceProducingInvalidSplits(String description, String errorMessage) {
+      this.description = description;
+      this.errorMessage = errorMessage;
+    }
+
+    @Override
+    public List<? extends Source<Integer>> splitIntoShards(
+        long desiredShardSizeBytes, PipelineOptions options) throws Exception {
+      Preconditions.checkState(errorMessage == null, "Unexpected invalid source");
+      return Arrays.asList(
+          new SourceProducingInvalidSplits("goodShard", null),
+          new SourceProducingInvalidSplits("badShard", "intentionally invalid"));
+    }
+
+    @Override
+    public void validate() {
+      Preconditions.checkState(errorMessage == null, errorMessage);
+    }
+
+    @Override
+    public String toString() {
+      return description;
+    }
+  }
+
+  @Test
+  public void testSplittingProducedInvalidSource() throws Exception {
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    com.google.api.services.dataflow.model.Source cloudSource =
+        translateIOToCloudSource(new SourceProducingInvalidSplits("original", null), options);
+
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage(allOf(
+        containsString("Splitting a valid source produced an invalid shard"),
+        containsString("original"),
+        containsString("badShard")));
+    expectedException.expectCause(hasMessage(containsString("intentionally invalid")));
+    performSplit(cloudSource, options);
+  }
+
+  private static class FailingReader implements Source.Reader<Integer> {
+    @Override
+    public boolean start() throws IOException {
+      throw new IOException("Intentional error");
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      throw new IllegalStateException("Should have failed in start()");
+    }
+
+    @Override
+    public Integer getCurrent() throws NoSuchElementException {
+      throw new IllegalStateException("Should have failed in start()");
+    }
+
+    @Override
+    public void close() throws IOException { }
+  }
+
+  private static class SourceProducingFailingReader extends MockSource {
+    private static final long serialVersionUID = -1288303253742972653L;
+
+    @Override
+    protected Reader<Integer> createBasicReader(
+        PipelineOptions options, Coder<Integer> coder, @Nullable ExecutionContext executionContext)
+        throws IOException {
+      return new FailingReader();
+    }
+
+    @Override
+    public String toString() {
+      return "Some description";
+    }
+  }
+
+  @Test
+  public void testFailureToStartReadingIncludesSourceDetails() throws Exception {
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    com.google.api.services.dataflow.model.Source source =
+        translateIOToCloudSource(new SourceProducingFailingReader(), options);
+    // Unfortunately Hamcrest doesn't have a matcher that can match on the exception's
+    // printStackTrace(), however we just want to verify that the error and source description
+    // would be contained in the exception *somewhere*, not necessarily in the top-level
+    // Exception object. So instead we use Throwables.getStackTraceAsString and match on that.
+    try {
+      CloudSourceUtils.readElemsFromSource(options, source);
+      fail("Expected to fail");
+    } catch (Exception e) {
+      assertThat(
+          getStackTraceAsString(e),
+          allOf(containsString("Intentional error"), containsString("Some description")));
+    }
+  }
+
   private static com.google.api.services.dataflow.model.Source translateIOToCloudSource(
-      TestIO.Read io, DataflowPipelineOptions options) throws Exception {
+      Source<?> io, DataflowPipelineOptions options) throws Exception {
     DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
     Pipeline p = Pipeline.create(options);
     p.begin().apply(ReadSource.from(io));
@@ -206,7 +363,7 @@ private static com.google.api.services.dataflow.model.Source stepToCloudSource(S
     return res;
   }
 
-  private SourceSplitResponse performSplit(
+  private static SourceSplitResponse performSplit(
       com.google.api.services.dataflow.model.Source source, PipelineOptions options)
       throws Exception {
     SourceSplitRequest splitRequest = new SourceSplitRequest();

From 6a344bb4d1942e77d2693d91527e233d8deeeb71 Mon Sep 17 00:00:00 2001
From: chambers <chambers@google.com>
Date: Wed, 18 Mar 2015 17:18:02 -0700
Subject: [PATCH 0286/1541] Added some test cases to FlattenTest to exercise
 Flatten().ParDo().

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88984362
---
 .../dataflow/sdk/testing/DataflowAssert.java  |   4 +-
 .../dataflow/sdk/transforms/Flatten.java      |   2 +-
 .../dataflow/sdk/transforms/FlattenTest.java  | 217 +++++++++++-------
 .../sdk/transforms/PartitionTest.java         |   2 +
 4 files changed, 140 insertions(+), 85 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index ba88a089ad048..0f5bda75ef935 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -230,7 +230,7 @@ public IterableAssert<T> containsInAnyOrder(T... expectedElements) {
      * <p> Returns this {@code IterableAssert}.
      */
     public IterableAssert<T> containsInOrder(T... expectedElements) {
-      return this.satisfies(
+      return satisfies(
           new AssertContainsInOrderRelation<T>(),
           Arrays.asList(expectedElements));
     }
@@ -242,7 +242,7 @@ public IterableAssert<T> containsInOrder(T... expectedElements) {
      * <p> Returns this {@code IterableAssert}.
      */
     public IterableAssert<T> containsInOrder(Iterable<T> expectedElements) {
-      return this.satisfies(new AssertContainsInOrderRelation<T>(), expectedElements);
+      return satisfies(new AssertContainsInOrderRelation<T>(), expectedElements);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index bce444d955158..2f1b90413da07 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -40,7 +40,7 @@
  * PCollection<String> pc2 = ...;
  * PCollection<String> pc3 = ...;
  * PCollectionList<String> pcs = PCollectionList.of(pc1).and(pc2).and(pc3);
- * PCollection<String> merged = pcs.apply(Flatten.<String>.pCollections());
+ * PCollection<String> merged = pcs.apply(Flatten.<String>pCollections());
  * } </pre>
  *
  * <p> By default, the {@code Coder} of the output {@code PCollection}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
index 7467fc72ad523..65fe984c03bc5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
@@ -41,6 +41,7 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -48,7 +49,8 @@
  * Tests for Flatten.
  */
 @RunWith(JUnit4.class)
-public class FlattenTest {
+@SuppressWarnings("serial")
+public class FlattenTest implements Serializable {
 
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
@@ -66,6 +68,23 @@ public void testFlattenPCollectionListUnordered() {
     p.run();
   }
 
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testFlattenPCollectionListUnorderedThenParDo() {
+    Pipeline p = TestPipeline.create();
+
+    List<String>[] inputs = new List[] {
+      LINES, NO_LINES, LINES2, NO_LINES, LINES, NO_LINES };
+
+    PCollection<String> output =
+        makePCollectionListOfStrings(false /* not ordered */, p, inputs)
+        .apply(Flatten.<String>pCollections())
+        .apply(ParDo.of(new IdentityFn<String>(){}));
+
+    DataflowAssert.that(output).containsInAnyOrder(flatten(inputs));
+    p.run();
+  }
+
   // TODO: setOrdered(true) isn't supported yet by the Dataflow service.
   @Test
   public void testFlattenPCollectionListOrdered() {
@@ -96,29 +115,83 @@ public void testFlattenPCollectionListEmpty() {
   }
 
   @Test
-  public void testWindowFnPropagationFailure() {
+  // TODO: Enable this test to run on the Dataflow service when it is
+  // correctly implemented.
+  // @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testFlattenPCollectionListEmptyThenParDo() {
     Pipeline p = TestPipeline.create();
 
-    PCollection<String> input1 =
-        p.apply(Create.of("Input1"))
-        .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))));
-    PCollection<String> input2 =
-        p.apply(Create.of("Input2"))
-        .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(2))));
+    PCollection<String> output =
+        PCollectionList.<String>empty(p)
+        .apply(Flatten.<String>pCollections()).setCoder(StringUtf8Coder.of())
+        .apply(ParDo.of(new IdentityFn<String>(){}));
 
-    try {
-      PCollection<String> output =
-          PCollectionList.of(input1).and(input2)
-          .apply(Flatten.<String>create());
-      Assert.fail("Exception should have been thrown");
-    } catch (IllegalStateException e) {
-      Assert.assertTrue(e.getMessage().startsWith(
-          "Inputs to Flatten had incompatible window windowFns"));
-    }
+    DataflowAssert.that(output).containsInAnyOrder();
+    p.run();
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testFlattenIterables() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Iterable<String>> input = p
+        .apply(Create.<Iterable<String>>of(LINES))
+        .setCoder(IterableCoder.of(StringUtf8Coder.of()));
+
+    PCollection<String> output =
+        input.apply(Flatten.<String>iterables());
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder(LINES_ARRAY);
+
+    p.run();
   }
 
+  // TODO: setOrdered(true) isn't supported yet by the Dataflow service.
   @Test
-  public void testWindowFnPropagation() {
+  public void testFlattenIterablesOrdered() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Iterable<String>> input = p
+        .apply(Create.<Iterable<String>>of(LINES))
+        .setCoder(IterableCoder.of(StringUtf8Coder.of()));
+
+    PCollection<String> output =
+        input.apply(Flatten.<String>iterables()).setOrdered(true);
+
+    DataflowAssert.that(output)
+        .containsInOrder(LINES_ARRAY);
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testFlattenIterablesEmpty() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Iterable<String>> input = p
+        .apply(Create.<Iterable<String>>of(NO_LINES))
+        .setCoder(IterableCoder.of(StringUtf8Coder.of()));
+
+    PCollection<String> output =
+        input.apply(Flatten.<String>iterables());
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder(NO_LINES_ARRAY);
+
+    p.run();
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  @Test
+  public void testEqualWindowFnPropagation() {
     Pipeline p = TestPipeline.create();
 
     PCollection<String> input1 =
@@ -130,7 +203,7 @@ public void testWindowFnPropagation() {
 
     PCollection<String> output =
         PCollectionList.of(input1).and(input2)
-        .apply(Flatten.<String>create());
+        .apply(Flatten.<String>pCollections());
 
     p.run();
 
@@ -139,7 +212,7 @@ public void testWindowFnPropagation() {
   }
 
   @Test
-  public void testEqualWindowFnPropagation() {
+  public void testCompatibleWindowFnPropagation() {
     Pipeline p = TestPipeline.create();
 
     PCollection<String> input1 =
@@ -151,7 +224,7 @@ public void testEqualWindowFnPropagation() {
 
     PCollection<String> output =
         PCollectionList.of(input1).and(input2)
-        .apply(Flatten.<String>create());
+        .apply(Flatten.<String>pCollections());
 
     p.run();
 
@@ -159,17 +232,50 @@ public void testEqualWindowFnPropagation() {
         Sessions.<String>withGapDuration(Duration.standardMinutes(2))));
   }
 
+  @Test
+  public void testIncompatibleWindowFnPropagationFailure() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input1 =
+        p.apply(Create.of("Input1"))
+        .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))));
+    PCollection<String> input2 =
+        p.apply(Create.of("Input2"))
+        .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(2))));
 
-  PCollectionList<String> makePCollectionListOfStrings(boolean ordered,
-                                                       Pipeline p,
-                                                       List<String>... lists) {
+    try {
+      PCollection<String> output =
+          PCollectionList.of(input1).and(input2)
+          .apply(Flatten.<String>pCollections());
+      Assert.fail("Exception should have been thrown");
+    } catch (IllegalStateException e) {
+      Assert.assertTrue(e.getMessage().startsWith(
+          "Inputs to Flatten had incompatible window windowFns"));
+    }
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private static class IdentityFn<T> extends DoFn<T, T> {
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(c.element());
+    }
+  }
+
+  private PCollectionList<String> makePCollectionListOfStrings(
+      boolean ordered,
+      Pipeline p,
+      List<String>... lists) {
     return makePCollectionList(ordered, p, StringUtf8Coder.of(), lists);
   }
 
-  <T> PCollectionList<T> makePCollectionList(boolean ordered,
-                                             Pipeline p,
-                                             Coder<T> coder,
-                                             List<T>... lists) {
+  private <T> PCollectionList<T> makePCollectionList(
+      boolean ordered,
+      Pipeline p,
+      Coder<T> coder,
+      List<T>... lists) {
     List<PCollection<T>> pcs = new ArrayList<>();
     for (List<T> list : lists) {
       PCollection<T> pc = p.apply(Create.of(list)).setCoder(coder);
@@ -181,64 +287,11 @@ <T> PCollectionList<T> makePCollectionList(boolean ordered,
     return PCollectionList.of(pcs);
   }
 
-  <T> T[] flatten(List<T>... lists) {
+  private <T> T[] flatten(List<T>... lists) {
     List<T> flattened = new ArrayList<>();
     for (List<T> list : lists) {
       flattened.addAll(list);
     }
     return flattened.toArray((T[]) new Object[flattened.size()]);
   }
-
-  @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
-  public void testFlattenIterables() {
-    Pipeline p = TestPipeline.create();
-
-    PCollection<Iterable<String>> input = p
-        .apply(Create.<Iterable<String>>of(LINES))
-        .setCoder(IterableCoder.of(StringUtf8Coder.of()));
-
-    PCollection<String> output =
-        input.apply(Flatten.<String>iterables());
-
-    DataflowAssert.that(output)
-        .containsInAnyOrder(LINES_ARRAY);
-
-    p.run();
-  }
-
-  @Test
-  public void testFlattenIterablesOrdered() {
-    Pipeline p = TestPipeline.create();
-
-    PCollection<Iterable<String>> input = p
-        .apply(Create.<Iterable<String>>of(LINES))
-        .setCoder(IterableCoder.of(StringUtf8Coder.of()));
-
-    PCollection<String> output =
-        input.apply(Flatten.<String>iterables()).setOrdered(true);
-
-    DataflowAssert.that(output)
-        .containsInOrder(LINES_ARRAY);
-
-    p.run();
-  }
-
-  @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
-  public void testFlattenIterablesEmpty() {
-    Pipeline p = TestPipeline.create();
-
-    PCollection<Iterable<String>> input = p
-        .apply(Create.<Iterable<String>>of(NO_LINES))
-        .setCoder(IterableCoder.of(StringUtf8Coder.of()));
-
-    PCollection<String> output =
-        input.apply(Flatten.<String>iterables());
-
-    DataflowAssert.that(output)
-        .containsInAnyOrder(NO_LINES_ARRAY);
-
-    p.run();
-  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
index c95d14392f1c1..cb12468e60604 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
@@ -45,12 +45,14 @@
 @SuppressWarnings("serial")
 public class PartitionTest implements Serializable {
   static class ModFn implements PartitionFn<Integer> {
+    @Override
     public int partitionFor(Integer elem, int numPartitions) {
       return elem % numPartitions;
     }
   }
 
   static class IdentityFn implements PartitionFn<Integer> {
+    @Override
     public int partitionFor(Integer elem, int numPartitions) {
       return elem;
     }

From 9c34d8ccc0a28d860581a4058a8407b6bb585ee7 Mon Sep 17 00:00:00 2001
From: jeremiele <jeremiele@google.com>
Date: Thu, 19 Mar 2015 14:59:59 -0700
Subject: [PATCH 0287/1541] Before calling ServiceLoader.load check if
 Thread.currentThread().getContextClassLoader() returns null first, if it
 does, fallback to the class ClassLoader then the System ClassLoader if it
 fails. ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=89070877

---
 .../sdk/options/PipelineOptionsFactory.java   | 22 ++++++++++--
 .../options/PipelineOptionsFactoryTest.java   | 34 +++++++++++++++++++
 2 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index e8afb3452aeed..13c68cbb49389 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -437,6 +437,22 @@ Class<T> getProxyClass() {
   /** The width at which options should be output. */
   private static final int TERMINAL_WIDTH = 80;
 
+  /**
+   * Finds the appropriate {@code ClassLoader} to be used by the {@link ServiceLoader#load} call,
+   * which by default would use the context {@code ClassLoader} which can be null. The fallback is
+   * as follow: context ClassLoader, class ClassLoader and finaly the system ClassLoader.
+   */
+  static ClassLoader findClassLoader() {
+    ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
+    if (classLoader == null) {
+      classLoader = PipelineOptionsFactory.class.getClassLoader();
+    }
+    if (classLoader == null) {
+      classLoader = ClassLoader.getSystemClassLoader();
+    }
+    return classLoader;
+  }
+
   static {
     try {
       IGNORED_METHODS = ImmutableSet.<Method>builder()
@@ -453,13 +469,15 @@ Class<T> getProxyClass() {
       throw new ExceptionInInitializerError(e);
     }
 
+    ClassLoader classLoader = findClassLoader();
+
     // Store the list of all available pipeline runners.
     ImmutableMap.Builder<String, Class<? extends PipelineRunner<?>>> builder =
             ImmutableMap.builder();
     Set<PipelineRunnerRegistrar> pipelineRunnerRegistrars =
         Sets.newTreeSet(ObjectsClassComparator.INSTANCE);
     pipelineRunnerRegistrars.addAll(
-        Lists.newArrayList(ServiceLoader.load(PipelineRunnerRegistrar.class)));
+        Lists.newArrayList(ServiceLoader.load(PipelineRunnerRegistrar.class, classLoader)));
     for (PipelineRunnerRegistrar registrar : pipelineRunnerRegistrars) {
       for (Class<? extends PipelineRunner<?>> klass : registrar.getPipelineRunners()) {
         builder.put(klass.getSimpleName(), klass);
@@ -472,7 +490,7 @@ Class<T> getProxyClass() {
     Set<PipelineOptionsRegistrar> pipelineOptionsRegistrars =
         Sets.newTreeSet(ObjectsClassComparator.INSTANCE);
     pipelineOptionsRegistrars.addAll(
-        Lists.newArrayList(ServiceLoader.load(PipelineOptionsRegistrar.class)));
+        Lists.newArrayList(ServiceLoader.load(PipelineOptionsRegistrar.class, classLoader)));
     for (PipelineOptionsRegistrar registrar : pipelineOptionsRegistrars) {
       for (Class<? extends PipelineOptions> klass : registrar.getPipelineOptions()) {
         register(klass);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index dbab5aa141b1b..2961a7064ce52 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -742,4 +742,38 @@ public void testProgrammaticPrintHelpForSpecificType() {
     assertThat(output,
         containsString("The pipeline runner which will be used to execute the pipeline."));
   }
+
+  @Test
+  public void testFindProperClassLoaderIfContextClassLoaderIsNull() throws InterruptedException {
+    final ClassLoader[] classLoader = new ClassLoader[1];
+    Thread thread = new Thread(new Runnable() {
+
+      @Override
+      public void run() {
+        classLoader[0] = PipelineOptionsFactory.findClassLoader();
+      }
+    });
+    thread.setContextClassLoader(null);
+    thread.start();
+    thread.join();
+    assertEquals(PipelineOptionsFactory.class.getClassLoader(), classLoader[0]);
+  }
+
+  @Test
+  public void testFindProperClassLoaderIfContextClassLoaderIsAvailable()
+      throws InterruptedException {
+    final ClassLoader[] classLoader = new ClassLoader[1];
+    Thread thread = new Thread(new Runnable() {
+
+      @Override
+      public void run() {
+        classLoader[0] = PipelineOptionsFactory.findClassLoader();
+      }
+    });
+    ClassLoader cl = new ClassLoader() {};
+    thread.setContextClassLoader(cl);
+    thread.start();
+    thread.join();
+    assertEquals(cl, classLoader[0]);
+  }
 }

From 75b51618d1b787dba0adbc4179224b10cbf47d6b Mon Sep 17 00:00:00 2001
From: vanya <vanya@google.com>
Date: Thu, 19 Mar 2015 15:06:36 -0700
Subject: [PATCH 0288/1541] Rollback of a previous commit.

Reason: This might be aggravating our testing issues right now.

Original description: When reading from source with gzip Content-Encoding, always request the full file.

----Release Notes----
[]

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89071575
---
 .../gcsio/GoogleCloudStorageReadChannel.java  | 142 +++++-------------
 1 file changed, 35 insertions(+), 107 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
index 9c3c4a34ee35a..a67d18205873d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
@@ -24,7 +24,6 @@
 import com.google.api.client.util.Preconditions;
 import com.google.api.client.util.Sleeper;
 import com.google.api.services.storage.Storage;
-import com.google.api.services.storage.model.StorageObject;
 import com.google.cloud.dataflow.sdk.util.ApiErrorExtractor;
 
 import org.slf4j.Logger;
@@ -39,7 +38,6 @@
 import java.nio.channels.ReadableByteChannel;
 import java.nio.channels.SeekableByteChannel;
 import java.util.regex.Pattern;
-
 import javax.net.ssl.SSLException;
 
 /**
@@ -80,6 +78,7 @@ public class GoogleCloudStorageReadChannel implements SeekableByteChannel {
 
   // Size of the object being read.
   private long size = -1;
+  private boolean isCompressedStream;
 
   // Maximum number of automatic retries when reading from the underlying channel without making
   // progress; each time at least one byte is successfully read, the counter of attempted retries
@@ -126,13 +125,6 @@ public class GoogleCloudStorageReadChannel implements SeekableByteChannel {
   // accounting for the randomization of backoff intervals.
   public static final int DEFAULT_BACKOFF_MAX_ELAPSED_TIME_MILLIS = 2 * 60 * 1000;
 
-  // For files that have Content-Encoding: gzip set in the file metadata, the size of the response
-  // from GCS is the size of the compressed file. However, the HTTP client wraps the content
-  // in a GZIPInputStream, so the number of bytes that can be read from the stream may be greater
-  // than the size of the response. In this case, we allow the position in the stream to be greater
-  // than size when the position is validated.
-  private FileEncoding fileEncoding = FileEncoding.UNINITIALIZED;
-
   // ClientRequestHelper to be used instead of calling final methods in client requests.
   private static ClientRequestHelper clientRequestHelper = new ClientRequestHelper();
 
@@ -269,7 +261,7 @@ public int read(ByteBuffer buffer)
           // bytes read against the stream size. Unfortunately we don't have information about the
           // actual size of the data stream when stream compression is used, so we can only ignore
           // this case here.
-          checkIOPrecondition(fileEncoding == FileEncoding.GZIPPED || currentPosition == size,
+          checkIOPrecondition(isCompressedStream || currentPosition == size,
               String.format(
                   "Received end of stream result before all the file data has been received; "
                   + "totalBytesRead: %s, currentPosition: %s, size: %s",
@@ -370,7 +362,7 @@ public int read(ByteBuffer buffer)
       // Check that we didn't get a premature End of Stream signal by checking the number of bytes
       // read against the stream size. Unfortunately we don't have information about the actual size
       // of the data stream when stream compression is used, so we can only ignore this case here.
-      checkIOPrecondition(fileEncoding == FileEncoding.GZIPPED || currentPosition == size,
+      checkIOPrecondition(isCompressedStream || currentPosition == size,
           String.format("Failed to read any data before all the file data has been received; "
               + "currentPosition: %s, size: %s", currentPosition, size));
       return -1;
@@ -488,13 +480,11 @@ protected void validatePosition(long newPosition) {
     // this channel has been computed by a prior call. This means that position could be
     // potentially set to an invalid value (>= size) by position(long). However, that error
     // gets caught during lazy seek.
-    // If a file is gzip encoded, the size of the response may be less than the number of bytes
-    // that can be read. In this case, the new position may be a valid offset, and we proceed.
-    // If not, then the size of the response is the number of bytes that can be read and we throw
-    // an exception for an invalid seek.
-    if ((size >= 0) && (newPosition >= size) && (fileEncoding != FileEncoding.GZIPPED)) {
-      throw new IllegalArgumentException(String.format(
-          "Invalid seek offset: position value (%d) must be between 0 and %d", newPosition, size));
+    if ((size >= 0) && (newPosition >= size)) {
+      throw new IllegalArgumentException(
+          String.format(
+              "Invalid seek offset: position value (%d) must be between 0 and %d",
+              newPosition, size));
     }
   }
 
@@ -519,98 +509,26 @@ private void performLazySeek()
       readChannel.close();
     }
 
-    InputStream objectContentStream = openStreamAndSetMetadata(currentPosition);
+    InputStream objectContentStream = openStreamAndSetSize(currentPosition);
     readChannel = Channels.newChannel(objectContentStream);
     lazySeekPending = false;
   }
 
   /**
-   * Retrieve the object's metadata from GCS.
-   *
-   * @throws IOException on IO error.
-   */
-  protected StorageObject getMetadata() throws IOException {
-    Storage.Objects.Get getObject = gcs.objects().get(bucketName, objectName);
-    try {
-      StorageObject response = getObject.execute();
-      return response;
-    } catch (IOException e) {
-      if (errorExtractor.itemNotFound(e)) {
-        throw GoogleCloudStorageExceptions.getFileNotFoundException(bucketName, objectName);
-      }
-      String msg =
-          "Error reading " + StorageResourceId.createReadableString(bucketName, objectName);
-      throw new IOException(msg, e);
-    }
-  }
-
-  /**
-   * Returns the FileEncoding of a file given its metadata. Currently supports GZIPPED and OTHER.
-   *
-   * @param metadata the object's metadata.
-   * @return FileEncoding.GZIPPED if the response from GCS will have gzip encoding or
-   *         FileEncoding.OTHER otherwise.
-   */
-  protected FileEncoding getEncoding(StorageObject metadata) {
-    String contentEncoding = metadata.getContentEncoding();
-    return contentEncoding != null && contentEncoding.contains("gzip") ? FileEncoding.GZIPPED
-        : FileEncoding.OTHER;
-  }
-
-  /**
-   * Set the size of the content.
-   *
-   * <p>First, we look at the object's metadata.  If no value exists, then we
-   * examine the Content-Length header.  If neither exists, we then look for and parse the
-   * Content-Range header. If there is no way to determine the content length, an exception
-   * is thrown. If the Content-Length header is present, then the offset is added to this
-   * value (i.e., offset is the number of bytes that were not requested).
-   *
-   * @param response response to parse.
-   * @param offset the number of bytes that were not requested.
-   * @throws IOException on IO error.
-   */
-  protected void setSize(StorageObject metadata, HttpResponse response, long offset)
-      throws IOException {
-    String contentRange = response.getHeaders().getContentRange();
-    if (metadata.getSize() != null) {
-      size = metadata.getSize().longValue();
-    } else if (response.getHeaders().getContentLength() != null) {
-      size = response.getHeaders().getContentLength() + offset;
-    } else if (contentRange != null) {
-      String sizeStr = SLASH.split(contentRange)[1];
-      try {
-        size = Long.parseLong(sizeStr);
-      } catch (NumberFormatException e) {
-        throw new IOException(
-            "Could not determine size from response from Content-Range: " + contentRange, e);
-      }
-    } else {
-      throw new IOException("Could not determine size of response");
-    }
-  }
-
-  /**
-   * Opens the underlying stream, sets its position to the given value and initializes the object's
-   * metadata (size and encoding).
-   *
-   * <p>If the file encoding in GCS is gzip (and therefore the HTTP client will attempt to
-   * decompress it), the entire file is always requested and we seek to the position requested. If
-   * the file encoding is not gzip, only the remaining bytes to be read are requested from GCS.
+   * Opens the underlying stream, sets its position to the given value and sets size based on
+   * stream content size.
    *
    * @param newPosition position to seek into the new stream.
    * @throws IOException on IO error
    */
-  protected InputStream openStreamAndSetMetadata(long newPosition)
+  protected InputStream openStreamAndSetSize(long newPosition)
       throws IOException {
-    StorageObject metadata = getMetadata();
-    fileEncoding = getEncoding(metadata);
     validatePosition(newPosition);
     Storage.Objects.Get getObject = gcs.objects().get(bucketName, objectName);
     // Set the range on the existing request headers which may have been initialized with things
-    // like user-agent already. If the file is gzip encoded, request the entire file.
-    clientRequestHelper.getRequestHeaders(getObject).setRange(
-        String.format("bytes=%d-", fileEncoding == FileEncoding.GZIPPED ? 0 : newPosition));
+    // like user-agent already.
+    clientRequestHelper.getRequestHeaders(getObject)
+        .setRange(String.format("bytes=%d-", newPosition));
     HttpResponse response;
     try {
       response = getObject.executeMedia();
@@ -634,13 +552,27 @@ protected InputStream openStreamAndSetMetadata(long newPosition)
       }
     }
 
-    InputStream content = response.getContent();
-    // If the file is gzip encoded, we requested the entire file and need to seek in the content
-    // to the desired position.  If it is not, we only requested the bytes we haven't read.
-    setSize(metadata, response, fileEncoding == FileEncoding.GZIPPED ? 0 : newPosition);
-    content.skip(fileEncoding == FileEncoding.GZIPPED ? newPosition : 0);
+    // If the content is compressed, content length reported in the header is counting the number of
+    // compressed bytes. That means that we cannot rely on the reported content length to check that
+    // we have received all the data from the data stream.
+    String contentEncoding = response.getContentEncoding();
+    isCompressedStream = (contentEncoding != null && contentEncoding.contains("gzip"));
 
-    return content;
+    String contentRange = response.getHeaders().getContentRange();
+    if (response.getHeaders().getContentLength() != null) {
+      size = response.getHeaders().getContentLength() + newPosition;
+    } else if (contentRange != null) {
+      String sizeStr = SLASH.split(contentRange)[1];
+      try {
+        size = Long.parseLong(sizeStr);
+      } catch (NumberFormatException e) {
+        throw new IOException(
+            "Could not determine size from response from Content-Range: " + contentRange, e);
+      }
+    } else {
+      throw new IOException("Could not determine size of response");
+    }
+    return response.getContent();
   }
 
   /**
@@ -665,8 +597,4 @@ private void checkIOPrecondition(boolean precondition, String errorMessage) thro
       throw new IOException(errorMessage);
     }
   }
-
-  private static enum FileEncoding {
-    UNINITIALIZED, GZIPPED, OTHER;
-  }
 }

From 38b3685de361a315265b3969ec1f78486d93db1e Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Thu, 19 Mar 2015 18:35:09 -0700
Subject: [PATCH 0289/1541] Updated the FileBasedSourceTest so that it does not
 depend on the size of the new line character (it was failing on Windows due
 to this).

Reduce the size of some test methods in FileBasedSourceTest that were unnecessarily large.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89089035
---
 .../dataflow/sdk/io/FileBasedSourceTest.java  | 293 +++++++++++-------
 1 file changed, 184 insertions(+), 109 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index d995f9ba5ce60..eaeaa000d825f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -17,6 +17,7 @@
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -140,6 +141,7 @@ class TestReader extends FileBasedReader<String> {
     private final ByteBuffer buf;
     private static final int BUF_SIZE = 1024;
     private String currentValue = null;
+    private boolean emptyShard = false;
 
     public TestReader(TestFileBasedSource source) {
       super(source);
@@ -186,10 +188,17 @@ protected void startReading(ReadableByteChannel channel) throws IOException {
       if (removeLine) {
         nextOffset += readNextLine(new ByteArrayOutputStream());
       }
+      if (nextOffset >= getSource().getEndOffset()) {
+        emptyShard = true;
+      }
     }
 
     @Override
     protected boolean readNextRecord() throws IOException {
+      if (emptyShard) {
+        return false;
+      }
+
       currentOffset = nextOffset;
 
       ByteArrayOutputStream buf = new ByteArrayOutputStream();
@@ -228,6 +237,8 @@ public String getCurrent() throws NoSuchElementException {
   class TestReaderWithSplits extends TestReader {
     private final String splitHeader;
     private boolean isAtSplitPoint = false;
+    private long currentOffset;
+    private boolean emptyShard = false;
 
     public TestReaderWithSplits(TestFileBasedSource source) {
       super(source);
@@ -243,12 +254,20 @@ protected void startReading(ReadableByteChannel channel) throws IOException {
         return;
       }
       String current = super.getCurrent();
+
+      currentOffset = super.getCurrentOffset();
       while (current == null || !current.equals(splitHeader)) {
+        // Offset of a split point should be the offset of the header. Hence marking current
+        // offset before reading the record.
+        currentOffset = super.getCurrentOffset();
         if (!super.readNextRecord()) {
           return;
         }
         current = super.getCurrent();
       }
+      if (currentOffset >= getSource().getEndOffset()) {
+        emptyShard = true;
+      }
     }
 
     @Override
@@ -256,13 +275,24 @@ protected boolean readNextRecord() throws IOException {
       // Get next record. If next record is a header read up to the next non-header record (ignoring
       // any empty splits that does not have any records).
 
+      if (emptyShard) {
+        return false;
+      }
+
       isAtSplitPoint = false;
       while (true) {
+        long previousOffset = super.getCurrentOffset();
         if (!super.readNextRecord()) {
           return false;
         }
         String current = super.getCurrent();
         if (current == null || !current.equals(splitHeader)) {
+          if (isAtSplitPoint) {
+            // Offset of a split point should be the offset of the header.
+            currentOffset = previousOffset;
+          } else {
+            currentOffset = super.getCurrentOffset();
+          }
           return true;
         }
         isAtSplitPoint = true;
@@ -273,6 +303,11 @@ protected boolean readNextRecord() throws IOException {
     protected boolean isAtSplitPoint() {
       return isAtSplitPoint;
     }
+
+    @Override
+    protected long getCurrentOffset() {
+      return currentOffset;
+    }
   }
 
   public File createFileWithData(String fileName, List<String> data) throws IOException {
@@ -308,31 +343,32 @@ private List<String> readEverythingFromReader(Reader<String> reader) throws IOEx
 
   @Test
   public void testFullyReadSingleFile() throws IOException {
-    List<String> data = createStringDataset(3, 5000);
+    List<String> data = createStringDataset(3, 50);
 
     String fileName = "file";
     File file = createFileWithData(fileName, data);
 
-    TestFileBasedSource source = new TestFileBasedSource(false, file.getPath(), 1024, null);
-    assertEquals(data, readEverythingFromReader(source.createBasicReader(null, null, null)));
+    TestFileBasedSource source = new TestFileBasedSource(false, file.getPath(), 64, null);
+    assertThat(data, containsInAnyOrder(
+        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
   }
 
   @Test
   public void testFullyReadFilePattern() throws IOException {
-    List<String> data1 = createStringDataset(3, 1000);
+    List<String> data1 = createStringDataset(3, 50);
     File file1 = createFileWithData("file1", data1);
 
-    List<String> data2 = createStringDataset(3, 1000);
+    List<String> data2 = createStringDataset(3, 50);
     createFileWithData("file2", data2);
 
-    List<String> data3 = createStringDataset(3, 1000);
+    List<String> data3 = createStringDataset(3, 50);
     createFileWithData("file3", data3);
 
-    List<String> data4 = createStringDataset(3, 1000);
+    List<String> data4 = createStringDataset(3, 50);
     createFileWithData("otherfile", data4);
 
     TestFileBasedSource source =
-        new TestFileBasedSource(true, file1.getParent() + "/" + "file*", 1024, null);
+        new TestFileBasedSource(true, file1.getParent() + "/" + "file*", 64, null);
     List<String> expectedResults = new ArrayList<String>();
     expectedResults.addAll(data1);
     expectedResults.addAll(data2);
@@ -352,16 +388,16 @@ public void testFullyReadFilePatternFirstRecordEmpty() throws IOException {
         ImmutableList.of(parent + "/" + "file1", parent + "/" + "file2", parent + "/" + "file3"));
     IOChannelUtils.setIOFactory("mocked", mockIOFactory);
 
-    List<String> data2 = createStringDataset(3, 1000);
+    List<String> data2 = createStringDataset(3, 50);
     createFileWithData("file2", data2);
 
-    List<String> data3 = createStringDataset(3, 1000);
+    List<String> data3 = createStringDataset(3, 50);
     createFileWithData("file3", data3);
 
-    List<String> data4 = createStringDataset(3, 1000);
+    List<String> data4 = createStringDataset(3, 50);
     createFileWithData("otherfile", data4);
 
-    TestFileBasedSource source = new TestFileBasedSource(true, pattern, 1024, null);
+    TestFileBasedSource source = new TestFileBasedSource(true, pattern, 64, null);
 
     List<String> expectedResults = new ArrayList<String>();
     expectedResults.addAll(data2);
@@ -372,25 +408,26 @@ public void testFullyReadFilePatternFirstRecordEmpty() throws IOException {
 
   @Test
   public void testReadRangeAtStart() throws IOException {
-    List<String> data = createStringDataset(3, 1000);
+    List<String> data = createStringDataset(3, 50);
 
     String fileName = "file";
     File file = createFileWithData(fileName, data);
 
-    TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 1024, 0, 102, null);
+    TestFileBasedSource source1 = new TestFileBasedSource(file.getPath(), 64, 0, 25, null);
+    TestFileBasedSource source2 =
+        new TestFileBasedSource(file.getPath(), 64, 25, Long.MAX_VALUE, null);
 
-    // Each line represents 4 bytes (3 random characters + new line
-    // character).
-    // So offset range 0-102 include 26 lines.
-    assertEquals(data.subList(0, 26),
-        readEverythingFromReader(source.createBasicReader(null, null, null)));
+    List<String> results = new ArrayList<String>();
+    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
+    assertThat(data, containsInAnyOrder(results.toArray()));
   }
 
   @Test
   public void testReadEverythingFromFileWithSplits() throws IOException {
     String header = "<h>";
     List<String> data = new ArrayList<>();
-    for (int i = 0; i < 100; i++) {
+    for (int i = 0; i < 10; i++) {
       data.add(header);
       data.addAll(createStringDataset(3, 9));
     }
@@ -398,72 +435,109 @@ public void testReadEverythingFromFileWithSplits() throws IOException {
     File file = createFileWithData(fileName, data);
 
     TestFileBasedSource source =
-        new TestFileBasedSource(file.getPath(), 1024, 0, Long.MAX_VALUE, header);
+        new TestFileBasedSource(file.getPath(), 64, 0, Long.MAX_VALUE, header);
 
     List<String> expectedResults = new ArrayList<String>();
     expectedResults.addAll(data);
     // Remove all occurrences of header from expected results.
     expectedResults.removeAll(Arrays.asList(header));
 
-    assertEquals(expectedResults,
-        readEverythingFromReader(source.createBasicReader(null, null, null)));
+    assertThat(expectedResults, containsInAnyOrder(
+        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
   }
 
   @Test
   public void testReadRangeFromFileWithSplitsFromStart() throws IOException {
     String header = "<h>";
     List<String> data = new ArrayList<>();
-    for (int i = 0; i < 100; i++) {
+    for (int i = 0; i < 10; i++) {
       data.add(header);
       data.addAll(createStringDataset(3, 9));
     }
     String fileName = "file";
     File file = createFileWithData(fileName, data);
 
-    TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 1024, 0, 60, header);
+    TestFileBasedSource source1 = new TestFileBasedSource(file.getPath(), 64, 0, 60, header);
+    TestFileBasedSource source2 =
+        new TestFileBasedSource(file.getPath(), 64, 60, Long.MAX_VALUE, header);
 
     List<String> expectedResults = new ArrayList<String>();
-    expectedResults.addAll(data.subList(0, 20));
+    expectedResults.addAll(data);
     // Remove all occurrences of header from expected results.
     expectedResults.removeAll(Arrays.asList(header));
 
-    assertEquals(expectedResults,
-        readEverythingFromReader(source.createBasicReader(null, null, null)));
+    List<String> results = new ArrayList<>();
+    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
+
+    assertThat(expectedResults, containsInAnyOrder(results.toArray()));
   }
 
   @Test
   public void testReadRangeFromFileWithSplitsFromMiddle() throws IOException {
     String header = "<h>";
     List<String> data = new ArrayList<>();
-    for (int i = 0; i < 100; i++) {
+    for (int i = 0; i < 10; i++) {
+      data.add(header);
+      data.addAll(createStringDataset(3, 9));
+    }
+    String fileName = "file";
+    File file = createFileWithData(fileName, data);
+
+    TestFileBasedSource source1 = new TestFileBasedSource(file.getPath(), 64, 0, 42, header);
+    TestFileBasedSource source2 = new TestFileBasedSource(file.getPath(), 64, 42, 112, header);
+    TestFileBasedSource source3 =
+        new TestFileBasedSource(file.getPath(), 64, 112, Long.MAX_VALUE, header);
+
+    List<String> expectedResults = new ArrayList<String>();
+
+    expectedResults.addAll(data);
+    // Remove all occurrences of header from expected results.
+    expectedResults.removeAll(Arrays.asList(header));
+
+    List<String> results = new ArrayList<>();
+    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source3.createBasicReader(null, null, null)));
+
+    assertThat(expectedResults, containsInAnyOrder(results.toArray()));
+  }
+
+  @Test
+  public void testReadFileWithSplitsWithEmptyRange() throws IOException {
+    String header = "<h>";
+    List<String> data = new ArrayList<>();
+    for (int i = 0; i < 5; i++) {
       data.add(header);
       data.addAll(createStringDataset(3, 9));
     }
     String fileName = "file";
     File file = createFileWithData(fileName, data);
 
-    TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 1024, 502, 702, header);
+    TestFileBasedSource source1 = new TestFileBasedSource(file.getPath(), 64, 0, 42, header);
+    TestFileBasedSource source2 = new TestFileBasedSource(file.getPath(), 64, 42, 62, header);
+    TestFileBasedSource source3 =
+        new TestFileBasedSource(file.getPath(), 64, 62, Long.MAX_VALUE, header);
 
     List<String> expectedResults = new ArrayList<String>();
 
-    // Each line represents 4 bytes (3 random characters + new line
-    // character).
-    // First 126 lines take 504 bytes of space. So record starting at next split point (130)
-    // should be the first line that belongs to the split.
-    // Similarly, record at index 179 should be the last record in the split.
-    expectedResults.addAll(data.subList(130, 180));
+    expectedResults.addAll(data);
     // Remove all occurrences of header from expected results.
     expectedResults.removeAll(Arrays.asList(header));
 
-    assertEquals(expectedResults,
-        readEverythingFromReader(source.createBasicReader(null, null, null)));
+    List<String> results = new ArrayList<>();
+    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source3.createBasicReader(null, null, null)));
+
+    assertThat(expectedResults, containsInAnyOrder(results.toArray()));
   }
 
   @Test
   public void testReadRangeFromFileWithSplitsFromMiddleOfHeader() throws IOException {
     String header = "<h>";
     List<String> data = new ArrayList<>();
-    for (int i = 0; i < 100; i++) {
+    for (int i = 0; i < 10; i++) {
       data.add(header);
       data.addAll(createStringDataset(3, 9));
     }
@@ -471,83 +545,83 @@ public void testReadRangeFromFileWithSplitsFromMiddleOfHeader() throws IOExcepti
     File file = createFileWithData(fileName, data);
 
     List<String> expectedResults = new ArrayList<String>();
-    expectedResults.addAll(data.subList(10, 20));
+    expectedResults.addAll(data.subList(10, data.size()));
     // Remove all occurrences of header from expected results.
     expectedResults.removeAll(Arrays.asList(header));
 
     // Split starts after "<" of the header
-    TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 1024, 1, 60, header);
-    assertEquals(expectedResults,
-        readEverythingFromReader(source.createBasicReader(null, null, null)));
+    TestFileBasedSource source =
+        new TestFileBasedSource(file.getPath(), 64, 1, Long.MAX_VALUE, header);
+    assertThat(expectedResults, containsInAnyOrder(
+        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
 
     // Split starts after "<h" of the header
-    source = new TestFileBasedSource(file.getPath(), 1024, 2, 60, header);
-    assertEquals(expectedResults,
-        readEverythingFromReader(source.createBasicReader(null, null, null)));
+    source = new TestFileBasedSource(file.getPath(), 64, 2, Long.MAX_VALUE, header);
+    assertThat(expectedResults, containsInAnyOrder(
+        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
 
     // Split starts after "<h>" of the header
-    source = new TestFileBasedSource(file.getPath(), 1024, 3, 60, header);
-    assertEquals(expectedResults,
-        readEverythingFromReader(source.createBasicReader(null, null, null)));
+    source = new TestFileBasedSource(file.getPath(), 64, 3, Long.MAX_VALUE, header);
+    assertThat(expectedResults, containsInAnyOrder(
+        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
   }
 
   @Test
   public void testReadRangeAtMiddle() throws IOException {
-    List<String> data = createStringDataset(3, 1000);
+    List<String> data = createStringDataset(3, 50);
     String fileName = "file";
     File file = createFileWithData(fileName, data);
 
-    TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 1024, 502, 702, null);
+    TestFileBasedSource source1 = new TestFileBasedSource(file.getPath(), 64, 0, 52, null);
+    TestFileBasedSource source2 = new TestFileBasedSource(file.getPath(), 64, 52, 72, null);
+    TestFileBasedSource source3 =
+        new TestFileBasedSource(file.getPath(), 64, 72, Long.MAX_VALUE, null);
+
+    List<String> results = new ArrayList<>();
+    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source3.createBasicReader(null, null, null)));
 
-    // Each line represents 4 bytes (3 random characters + new line
-    // character).
-    // First 126 lines take 504 bytes of space. So 127th line (index 126)
-    // should be the first line that belongs to the split.
-    // Similarly, 176th line (index 175) should be the last line of the
-    // split. (Note that end index of data.subList() is exclusive).
-    assertEquals(data.subList(126, 176),
-        readEverythingFromReader(source.createBasicReader(null, null, null)));
+    assertThat(data, containsInAnyOrder(results.toArray()));
   }
 
   @Test
   public void testReadRangeAtEnd() throws IOException {
-    List<String> data = createStringDataset(3, 1000);
+    List<String> data = createStringDataset(3, 50);
 
     String fileName = "file";
     File file = createFileWithData(fileName, data);
 
-    TestFileBasedSource source =
-        new TestFileBasedSource(file.getPath(), 1024, 802, Long.MAX_VALUE, null);
-
-    // Each line represents 4 bytes (3 random characters + new line
-    // character).
-    // First 201 lines take 804 bytes so line 202 (index 201) should be the
-    // first line of the split.
-    assertEquals(data.subList(201, data.size()),
-        readEverythingFromReader(source.createBasicReader(null, null, null)));
+    TestFileBasedSource source1 = new TestFileBasedSource(file.getPath(), 64, 0, 162, null);
+    TestFileBasedSource source2 =
+        new TestFileBasedSource(file.getPath(), 1024, 162, Long.MAX_VALUE, null);
+
+    List<String> results = new ArrayList<>();
+    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
+    assertThat(data, containsInAnyOrder(results.toArray()));
   }
 
   @Test
   public void testReadAllSplitsOfSingleFile() throws Exception {
-    List<String> data = createStringDataset(3, 10000);
+    List<String> data = createStringDataset(3, 50);
 
     String fileName = "file";
     File file = createFileWithData(fileName, data);
 
-    TestFileBasedSource source = new TestFileBasedSource(false, file.getPath(), 1024, null);
+    TestFileBasedSource source = new TestFileBasedSource(false, file.getPath(), 16, null);
 
-    List<? extends Source<String>> sources = source.splitIntoShards(4096, null);
-    // Each line is 4 bytes (3 random characters + new line character) we write
-    // 10,000 lines so the total size of the file is 40,000 bytes. Because of
-    // this above call produces 10 (40000/4096) splits.
-    assertEquals(sources.size(), 10);
+    List<? extends Source<String>> sources = source.splitIntoShards(32, null);
+
+    // Not a trivial split.
+    assertTrue(sources.size() > 1);
 
     List<String> results = new ArrayList<String>();
     for (Source<String> split : sources) {
       results.addAll(readEverythingFromReader(split.createBasicReader(null, null, null)));
     }
 
-    assertEquals(data, results);
+    assertThat(data, containsInAnyOrder(results.toArray()));
   }
 
   @Test
@@ -557,12 +631,12 @@ public void testDataflowFile() throws IOException {
     options.setGcpCredential(new TestCredential());
 
     DirectPipeline p = DirectPipeline.createForTest();
-    List<String> data = createStringDataset(3, 10000);
+    List<String> data = createStringDataset(3, 50);
 
     String fileName = "file";
     File file = createFileWithData(fileName, data);
 
-    TestFileBasedSource source = new TestFileBasedSource(false, file.getPath(), 1024, null);
+    TestFileBasedSource source = new TestFileBasedSource(false, file.getPath(), 64, null);
 
     PCollection<String> output = p.apply(ReadSource.from(source).named("ReadFileData"));
 
@@ -574,7 +648,7 @@ public void testDataflowFile() throws IOException {
     Collections.sort(data);
     Collections.sort(readData);
 
-    assertEquals(data, readData);
+    assertThat(data, containsInAnyOrder(readData.toArray()));
   }
 
   @Test
@@ -585,20 +659,20 @@ public void testDataflowFilePattern() throws IOException {
 
     DirectPipeline p = DirectPipeline.createForTest();
 
-    List<String> data1 = createStringDataset(3, 1000);
+    List<String> data1 = createStringDataset(3, 50);
     File file1 = createFileWithData("file1", data1);
 
-    List<String> data2 = createStringDataset(3, 1000);
+    List<String> data2 = createStringDataset(3, 50);
     createFileWithData("file2", data2);
 
-    List<String> data3 = createStringDataset(3, 1000);
+    List<String> data3 = createStringDataset(3, 50);
     createFileWithData("file3", data3);
 
-    List<String> data4 = createStringDataset(3, 1000);
+    List<String> data4 = createStringDataset(3, 50);
     createFileWithData("otherfile", data4);
 
     TestFileBasedSource source =
-        new TestFileBasedSource(true, file1.getParent() + "/" + "file*", 1024, null);
+        new TestFileBasedSource(true, file1.getParent() + "/" + "file*", 64, null);
 
     PCollection<String> output = p.apply(ReadSource.from(source).named("ReadFileData"));
 
@@ -615,64 +689,65 @@ public void testDataflowFilePattern() throws IOException {
     Collections.sort(expectedResults);
     Collections.sort(results);
 
-    assertEquals(expectedResults, results);
+    assertThat(expectedResults, containsInAnyOrder(results.toArray()));
   }
 
   @Test
   public void testEstimatedSizeOfFile() throws Exception {
-    List<String> data = createStringDataset(3, 1000);
+    List<String> data = createStringDataset(3, 50);
     String fileName = "file";
     File file = createFileWithData(fileName, data);
 
-    TestFileBasedSource source = new TestFileBasedSource(false, file.getPath(), 1024, null);
-
-    // Size of the file should be 4*1000
-    assertEquals(4000, source.getEstimatedSizeBytes(null));
-
+    TestFileBasedSource source = new TestFileBasedSource(false, file.getPath(), 64, null);
+    assertEquals(file.length(), source.getEstimatedSizeBytes(null));
   }
 
   @Test
   public void testEstimatedSizeOfFilePattern() throws Exception {
-    List<String> data1 = createStringDataset(3, 500);
+    List<String> data1 = createStringDataset(3, 20);
     File file1 = createFileWithData("file1", data1);
 
-    List<String> data2 = createStringDataset(3, 1000);
-    createFileWithData("file2", data2);
+    List<String> data2 = createStringDataset(3, 40);
+    File file2 = createFileWithData("file2", data2);
 
-    List<String> data3 = createStringDataset(3, 1500);
-    createFileWithData("file3", data3);
+    List<String> data3 = createStringDataset(3, 30);
+    File file3 = createFileWithData("file3", data3);
 
-    List<String> data4 = createStringDataset(3, 600);
+    List<String> data4 = createStringDataset(3, 45);
     createFileWithData("otherfile", data4);
 
-    List<String> data5 = createStringDataset(3, 700);
+    List<String> data5 = createStringDataset(3, 53);
     createFileWithData("anotherfile", data5);
 
     TestFileBasedSource source =
-        new TestFileBasedSource(true, file1.getParent() + "/" + "file*", 1024, null);
+        new TestFileBasedSource(true, file1.getParent() + "/" + "file*", 64, null);
 
-    // Size of the pattern should be 4*(500+1000+1500)
-    assertEquals(12000, source.getEstimatedSizeBytes(null));
+    // Estimated size of the file pattern based source should be the total size of files that the
+    // corresponding pattern is expanded into.
+    assertEquals(file1.length() + file2.length() + file3.length(),
+        source.getEstimatedSizeBytes(null));
   }
 
   @Test
   public void testReadAllSplitsOfFilePattern() throws Exception {
-    List<String> data1 = createStringDataset(3, 10000);
+    List<String> data1 = createStringDataset(3, 50);
     File file1 = createFileWithData("file1", data1);
 
-    List<String> data2 = createStringDataset(3, 10000);
+    List<String> data2 = createStringDataset(3, 50);
     createFileWithData("file2", data2);
 
-    List<String> data3 = createStringDataset(3, 10000);
+    List<String> data3 = createStringDataset(3, 50);
     createFileWithData("file3", data3);
 
-    List<String> data4 = createStringDataset(3, 10000);
+    List<String> data4 = createStringDataset(3, 50);
     createFileWithData("otherfile", data4);
 
     TestFileBasedSource source =
-        new TestFileBasedSource(true, file1.getParent() + "/" + "file*", 1024, null);
-    List<? extends Source<String>> sources = source.splitIntoShards(4096, null);
-    assertEquals(sources.size(), 30);
+        new TestFileBasedSource(true, file1.getParent() + "/" + "file*", 64, null);
+    List<? extends Source<String>> sources = source.splitIntoShards(512, null);
+
+    // Not a trivial split.
+    assertTrue(sources.size() > 1);
 
     List<String> results = new ArrayList<String>();
     for (Source<String> split : sources) {

From 32d07dbf2bf768d5761e50392e3abbb72005c913 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 20 Mar 2015 15:23:46 -0700
Subject: [PATCH 0290/1541] Makes side inputs per window.

This is a backwards-incompatible change.
     - sideInput() can no longer be called from startBundle/finishBundle.
     - Calls to sideInput() now return values only in a specific window corresponding to the window of the main input element, not the whole side input PCollectionView

----Release Notes----
- sideInput() can no longer be called from startBundle/finishBundle.
- Calls to sideInput() now return values only in a specific window corresponding to the window of the main input element, not the whole side input PCollectionView.  For PCollections and side inputs that are both windowed by GlobalWindows, this is identical to the old behavior

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89161487
---
 .../examples/TopWikipediaSessions.java        |   2 +-
 .../dataflow/sdk/testing/DataflowAssert.java  |  19 +-
 .../dataflow/sdk/transforms/Combine.java      | 174 +++++++++++++++---
 .../cloud/dataflow/sdk/transforms/DoFn.java   |  21 ++-
 .../dataflow/sdk/transforms/GroupByKey.java   |   9 +-
 .../cloud/dataflow/sdk/transforms/Top.java    |   7 +-
 .../cloud/dataflow/sdk/transforms/View.java   |  93 ++++++++--
 .../transforms/windowing/GlobalWindows.java   |   7 +-
 .../sdk/transforms/windowing/WindowFn.java    |  42 +++++
 .../cloud/dataflow/sdk/util/DoFnContext.java  |  35 +++-
 .../dataflow/sdk/util/DoFnProcessContext.java |  21 ++-
 .../dataflow/sdk/values/PCollectionView.java  |   6 +
 .../dataflow/sdk/transforms/CombineTest.java  |  28 ++-
 .../dataflow/sdk/transforms/FlattenTest.java  |  33 +++-
 .../dataflow/sdk/transforms/ParDoTest.java    |  16 +-
 .../dataflow/sdk/transforms/ViewTest.java     | 136 ++++++++++++++
 16 files changed, 566 insertions(+), 83 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
index fd50326e076f0..463429d20fd5d 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
@@ -126,7 +126,7 @@ public PCollection<List<KV<String, Long>>> apply(PCollection<KV<String, Long>> s
                     public int compare(KV<String, Long> o1, KV<String, Long> o2) {
                       return Long.compare(o1.getValue(), o2.getValue());
                     }
-                  }));
+                  }).withoutDefaults());
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 0f5bda75ef935..7eed9d24a34a2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -25,6 +25,8 @@
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.common.base.Optional;
@@ -80,7 +82,8 @@ private DataflowAssert() {}
    * {@link PCollection PCollection&lt;T&gt;}.
    */
   public static <T> IterableAssert<T> that(PCollection<T> actual) {
-    return new IterableAssert<>(actual.apply(View.<T>asIterable()))
+    return
+        new IterableAssert<>(inGlobalWindows(actual).apply(View.<T>asIterable()))
         .setCoder(actual.getCoder());
   }
 
@@ -101,7 +104,7 @@ public static <T> IterableAssert<T> thatSingletonIterable(PCollection<Iterable<T
         + " single Coder<T> to apply to the elements.");
     }
 
-    return new IterableAssert<>(actual.apply(View.<Iterable<T>>asSingleton()))
+    return new IterableAssert<>(inGlobalWindows(actual).apply(View.<Iterable<T>>asSingleton()))
         .setCoder(tCoder);
   }
 
@@ -118,7 +121,7 @@ public static <T> IterableAssert<T> thatIterable(PCollectionView<Iterable<T>> ac
    * {@code PCollection PCollection<T>}, which must be a singleton.
    */
   public static <T> SingletonAssert<T> thatSingleton(PCollection<T> actual) {
-    return new SingletonAssert<>(actual.apply(View.<T>asSingleton()))
+    return new SingletonAssert<>(inGlobalWindows(actual).apply(View.<T>asSingleton()))
         .setCoder(actual.getCoder());
   }
 
@@ -344,6 +347,16 @@ public SingletonAssert<T> is(T expectedValue) {
 
   ////////////////////////////////////////////////////////////////////////
 
+  /**
+   * Returns a new PCollection equivalent to the input, but with all elements
+   * in the GlobalWindow.  Preserves ordering if the input is ordered.
+   */
+  private static <T> PCollection<T> inGlobalWindows(PCollection<T> input) {
+    return input
+        .apply(Window.<T>into(new GlobalWindows()))
+        .setOrdered(input.isOrdered());
+  }
+
   /**
    * An assertion checker that takes a single {@link PCollectionView PCollectionView&lt;A&gt;}
    * and an assertion over {@code A}, and checks it within a dataflow pipeline.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 688857f81871d..2c677efa16582 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -59,14 +59,15 @@ public class Combine {
   /**
    * Returns a {@link Globally Combine.Globally} {@code PTransform}
    * that uses the given {@code SerializableFunction} to combine all
-   * the elements of the input {@code PCollection} into a singleton
-   * {@code PCollection} value.  The types of the input elements and the
-   * output value must be the same.
+   * the elements in each window of the input {@code PCollection} into a
+   * single value in the output {@code PCollection}.  The types of the input
+   * elements and the output elements must be the same.
    *
-   * <p>If the input {@code PCollection} is empty, the ouput will contain a the
-   * default value of the combining function if the input is windowed into
-   * the {@link GlobalWindows}; otherwise, the output will be empty.  Note: this
-   * behavior is subject to change.
+   * <p> If the input {@code PCollection} is windowed into {@link GlobalWindows},
+   * a default value in the {@link GlobalWindow} will be output if the input
+   * {@code PCollection} is empty.  To use this with inputs with other windowing,
+   * either {@link Globally#withoutDefaults} or {@link Globally#asSingletonView}
+   * must be called.
    *
    * <p> See {@link Globally Combine.Globally} for more information.
    */
@@ -77,21 +78,22 @@ public static <V> Globally<V, V> globally(
 
   /**
    * Returns a {@link Globally Combine.Globally} {@code PTransform}
-   * that uses the given {@code CombineFn} to combine all the elements
-   * of the input {@code PCollection} into a singleton {@code PCollection}
-   * value.  The types of the input elements and the output value can
-   * differ.
+   * that uses the given {@code SerializableFunction} to combine all
+   * the elements in each window of the input {@code PCollection} into a
+   * single value in the output {@code PCollection}.  The types of the input
+   * elements and the output elements can differ
    *
-   * If the input {@code PCollection} is empty, the ouput will contain a the
-   * default value of the combining function if the input is windowed into
-   * the {@link GlobalWindows}; otherwise, the output will be empty.  Note: this
-   * behavior is subject to change.
+   * <p> If the input {@code PCollection} is windowed into {@link GlobalWindows},
+   * a default value in the {@link GlobalWindow} will be output if the input
+   * {@code PCollection} is empty.  To use this with inputs with other windowing,
+   * either {@link Globally#withoutDefaults} or {@link Globally#asSingletonView}
+   * must be called.
    *
    * <p> See {@link Globally Combine.Globally} for more information.
    */
   public static <VI, VO> Globally<VI, VO> globally(
       CombineFn<? super VI, ?, VO> fn) {
-    return new Globally<>(fn);
+    return new Globally<>(fn, true);
   }
 
   /**
@@ -1053,10 +1055,9 @@ public Coder<VO> getDefaultOutputCoder(
 
   /**
    * {@code Combine.Globally<VI, VO>} takes a {@code PCollection<VI>}
-   * and returns a {@code PCollection<VO>} whose single element is the result of
-   * combining all the elements of the input {@code PCollection},
-   * using a specified}
-   * {@link CombineFn CombineFn&lt;VI, VA, VO&gt;}.  It is common
+   * and returns a {@code PCollection<VO>} whose elements are the result of
+   * combining all the elements in each window of the input {@code PCollection},
+   * using a specified {@link CombineFn CombineFn<VI, VA, VO>}.  It is common
    * for {@code VI == VO}, but not required.  Common combining
    * functions include sums, mins, maxes, and averages of numbers,
    * conjunctions and disjunctions of booleans, statistical
@@ -1074,6 +1075,11 @@ public Coder<VO> getDefaultOutputCoder(
    * intermediate results combined further, in an arbitrary tree
    * reduction pattern, until a single result value is produced.
    *
+   * <p> If the input {@code PCollection} is windowed into {@link GlobalWindows},
+   * a default value in the {@link GlobalWindow} will be output if the input
+   * {@code PCollection} is empty.  To use this with inputs with other windowing,
+   * either {@link #withoutDefaults} or {@link #asSingletonView} must be called.
+   *
    * <p> By default, the {@code Coder} of the output {@code PValue<VO>}
    * is inferred from the concrete type of the
    * {@code CombineFn<VI, VA, VO>}'s output type {@code VO}.
@@ -1090,9 +1096,36 @@ public static class Globally<VI, VO>
       extends PTransform<PCollection<VI>, PCollection<VO>> {
 
     private final CombineFn<? super VI, ?, VO> fn;
+    private final boolean insertDefault;
 
-    private Globally(CombineFn<? super VI, ?, VO> fn) {
+    private Globally(CombineFn<? super VI, ?, VO> fn, boolean insertDefault) {
       this.fn = fn;
+      this.insertDefault = insertDefault;
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    public Globally<VI, VO> withName(String name) {
+      return (Globally<VI, VO>) super.withName(name);
+    }
+
+    /**
+     * Returns a {@link PTransform} that produces a {@code PCollectionView}
+     * whose elements are the result of combining elements per-window in
+     * the input {@code PCollection}.  If a value is requested from the view
+     * for a window that is not present, the result of calling the {@code CombineFn}
+     * on empty input will returned.
+     */
+    public GloballyAsSingletonView<VI, VO> asSingletonView() {
+      return new GloballyAsSingletonView<>(fn, insertDefault);
+    }
+
+    /**
+     * Returns a {@link PTransform} identical to this, but that does not attempt to
+     * provide a default value in the case of empty input.
+     */
+    public Globally<VI, VO> withoutDefaults() {
+      return new Globally<>(fn, false);
     }
 
     @Override
@@ -1103,7 +1136,13 @@ public PCollection<VO> apply(PCollection<VI> input) {
           .apply(Combine.<Void, VI, VO>perKey(fn.<Void>asKeyedFn()))
           .apply(Values.<VO>create());
 
-      if (input.getWindowFn().isCompatible(new GlobalWindows())) {
+      if (insertDefault) {
+        if (!output.getWindowFn().isCompatible(new GlobalWindows())) {
+          throw new IllegalStateException(
+              "Attempted to add default value to PCollection not windowed by GlobalWindows. "
+              + "Instead, use Combine.globally().withoutDefaults() or "
+              + "Combine.globally().asSingletonView().");
+        }
         return insertDefaultValueIfEmpty(output);
       } else {
         return output;
@@ -1117,15 +1156,15 @@ private PCollection<VO> insertDefaultValueIfEmpty(PCollection<VO> maybeEmpty) {
           .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
           .apply(ParDo.of(
               new DoFn<Void, VO>() {
-                  @Override
-                  public void processElement(DoFn<Void, VO>.ProcessContext c) {
-                    Iterator<VO> combined = c.sideInput(maybeEmptyView).iterator();
-                    if (combined.hasNext()) {
-                      c.output(combined.next());
-                    } else {
-                      c.output(fn.apply(Collections.<VI>emptyList()));
-                    }
+                @Override
+                public void processElement(DoFn<Void, VO>.ProcessContext c) {
+                  Iterator<VO> combined = c.sideInput(maybeEmptyView).iterator();
+                  if (combined.hasNext()) {
+                    c.output(combined.next());
+                  } else {
+                    c.output(fn.apply(Collections.<VI>emptyList()));
                   }
+                }
               }).withSideInputs(maybeEmptyView))
           .setCoder(maybeEmpty.getCoder());
     }
@@ -1136,6 +1175,81 @@ protected String getKindString() {
     }
   }
 
+  /**
+   * {@code Combine.GloballyAsSingletonView<VI, VO>} takes a {@code PCollection<VI>}
+   * and returns a {@code PCollectionView<VO>} whose elements are the result of
+   * combining all the elements in each window of the input {@code PCollection},
+   * using a specified {@link CombineFn CombineFn<VI, VA, VO>}. It is common for
+   * {@code VI == VO}, but not required. Common combining
+   * functions include sums, mins, maxes, and averages of numbers,
+   * conjunctions and disjunctions of booleans, statistical
+   * aggregations, etc.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<Integer> pc = ...;
+   * PCollection<Integer> sum = pc.apply(
+   *     Combine.globally(new Sum.SumIntegerFn()));
+   * } </pre>
+   *
+   * <p> Combining can happen in parallel, with different subsets of the
+   * input {@code PCollection} being combined separately, and their
+   * intermediate results combined further, in an arbitrary tree
+   * reduction pattern, until a single result value is produced.
+   *
+   * <p> If a value is requested from the view for a window that is not present
+   * and {@code insertDefault} is true, the result of calling the {@code CombineFn}
+   * on empty input will returned. If {@code insertDefault} is false, an
+   * exception will be thrown instead.
+   *
+   * <p> By default, the {@code Coder} of the output {@code PValue<VO>}
+   * is inferred from the concrete type of the
+   * {@code CombineFn<VI, VA, VO>}'s output type {@code VO}.
+   *
+   * <p> See also {@link #perKey}/{@link PerKey Combine.PerKey} and
+   * {@link #groupedValues}/{@link GroupedValues Combine.GroupedValues},
+   * which are useful for combining values associated with each key in
+   * a {@code PCollection} of {@code KV}s.
+   *
+   * @param <VI> type of input values
+   * @param <VO> type of output values
+   */
+  public static class GloballyAsSingletonView<VI, VO>
+      extends PTransform<PCollection<VI>, PCollectionView<VO>> {
+
+    private final CombineFn<? super VI, ?, VO> fn;
+    private final boolean insertDefault;
+
+    private GloballyAsSingletonView(CombineFn<? super VI, ?, VO> fn, boolean insertDefault) {
+      this.fn = fn;
+      this.insertDefault = insertDefault;
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    public GloballyAsSingletonView<VI, VO> withName(String name) {
+      return (GloballyAsSingletonView<VI, VO>) super.withName(name);
+    }
+
+    @Override
+    public PCollectionView<VO> apply(PCollection<VI> input) {
+      PCollection<VO> combined = input
+          .apply(Combine.globally(fn).withoutDefaults());
+      if (insertDefault) {
+        return combined
+            .apply(View.<VO>asSingleton().withDefaultValue(
+                fn.apply(Collections.<VI>emptyList())));
+      } else {
+        return combined.apply(View.<VO>asSingleton());
+      }
+    }
+
+    @Override
+    protected String getKindString() {
+      return "Combine.GloballyAsSingletonView";
+    }
+  }
+
   /**
    * Converts a {@link SerializableFunction} from {@code Iterable<V>}s
    * to {@code V}s into a simple {@link CombineFn} over {@code V}s.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 68f78c9d8e435..56d99d360d24b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -66,14 +66,6 @@ public abstract class Context {
      */
     public abstract PipelineOptions getPipelineOptions();
 
-    /**
-     * Returns the value of the side input.
-     *
-     * @throws IllegalArgumentException if this is not a side input
-     * @see ParDo#withSideInputs
-     */
-    public abstract <T> T sideInput(PCollectionView<T> view);
-
     /**
      * Adds the given element to the main output {@code PCollection}.
      *
@@ -207,6 +199,19 @@ public abstract class ProcessContext extends Context {
      */
     public abstract I element();
 
+    /**
+     * Returns the value of the side input for the window corresponding to the
+     * window of the main input element.
+     *
+     * <p> See
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn#getSideInputWindow}
+     * for how this corresponding window is determined.
+     *
+     * @throws IllegalArgumentException if this is not a side input
+     * @see ParDo#withSideInputs
+     */
+    public abstract <T> T sideInput(PCollectionView<T> view);
+
     /**
      * Returns this {@code DoFn}'s state associated with the input
      * element's key.  This state can be used by the {@code DoFn} to
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 1483ceb7ec989..cd1726eda38d8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -28,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.ReifyTimestampAndWindowsDoFn;
@@ -462,7 +463,13 @@ public PCollection<KV<K, Iterable<V>>> applyHelper(
       // window function, which uses a single global window for all
       // elements.  We can implement this using a more-primitive
       // non-window-aware GBK transform.
-      return input.apply(new GroupByKeyOnly<K, V>());
+      return input
+          .apply(new GroupByKeyOnly<K, V>())
+
+          // In the non-global window case, GroupAlsoByWindows adds the windows
+          // back to the PCollection elements.  We don't have that here, so
+          // explicitly put the elements back into GlobalWindows.
+          .apply(Window.<KV<K, Iterable<V>>>into(new GlobalWindows()));
 
     } else if (isStreaming) {
       // If using the streaming runner, the service will do the insertion of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index c11e16fe0b6a5..8afe04a073bff 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -80,10 +80,9 @@ public class Top {
    * {@code KV}s and return the top values associated with each key.
    */
   public static <T, C extends Comparator<T> & Serializable>
-      PTransform<PCollection<T>, PCollection<List<T>>> of(int count, C compareFn) {
+      Combine.Globally<T, List<T>> of(int count, C compareFn) {
     return Combine.globally(new TopCombineFn<>(count, compareFn))
         .withName("Top");
-
   }
 
   /**
@@ -121,7 +120,7 @@ PTransform<PCollection<T>, PCollection<List<T>>> of(int count, C compareFn) {
    * {@code KV}s and return the top values associated with each key.
    */
   public static <T extends Comparable<T>>
-      PTransform<PCollection<T>, PCollection<List<T>>> smallest(int count) {
+      Combine.Globally<T, List<T>> smallest(int count) {
     return Combine.globally(new TopCombineFn<>(count, new Smallest<T>()))
         .withName("Top.Smallest");
   }
@@ -161,7 +160,7 @@ PTransform<PCollection<T>, PCollection<List<T>>> smallest(int count) {
    * {@code KV}s and return the top values associated with each key.
    */
   public static <T extends Comparable<T>>
-      PTransform<PCollection<T>, PCollection<List<T>>> largest(int count) {
+      Combine.Globally<T, List<T>> largest(int count) {
     return Combine.globally(new TopCombineFn<>(count, new Largest<T>()))
         .withName("Top.Largest");
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index 590d8eebd2051..427185e2443c8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -17,8 +17,12 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -30,6 +34,7 @@
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Multimap;
 
+import java.io.IOException;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
@@ -100,7 +105,7 @@ public PCollectionView<Iterable<T>> apply(
         PCollection<T> input) {
       return input.apply(
           new CreatePCollectionView<T, Iterable<T>>(
-              new IterablePCollectionView<T>(input.getPipeline())));
+              new IterablePCollectionView<T>(input.getPipeline(), input.getWindowFn())));
     }
   }
 
@@ -113,16 +118,34 @@ public PCollectionView<Iterable<T>> apply(
   public static class AsSingleton<T>
       extends PTransform<PCollection<T>, PCollectionView<T>> {
     private static final long serialVersionUID = 0;
+    private final T defaultValue;
+    private final boolean hasDefault;
 
-    private AsSingleton() { }
+    private AsSingleton() {
+      this.defaultValue = null;
+      this.hasDefault = false;
+    }
+
+    private AsSingleton(T defaultValue) {
+      this.defaultValue = defaultValue;
+      this.hasDefault = true;
+    }
+
+    /**
+     * Default value to return for windows with no value in them.
+     */
+    public AsSingleton<T> withDefaultValue(T defaultValue) {
+      return new AsSingleton(defaultValue);
+    }
 
     @Override
     public PCollectionView<T> apply(PCollection<T> input) {
       return input.apply(
           new CreatePCollectionView<T, T>(
-            new SingletonPCollectionView<T>(input.getPipeline())));
+              new SingletonPCollectionView<T>(
+                  input.getPipeline(), input.getWindowFn(),
+                  hasDefault, defaultValue, input.getCoder())));
     }
-
   }
 
   /**
@@ -158,12 +181,11 @@ public <VO> AsSingletonMap<K, V, VO> withCombiner(CombineFn<V, ?, VO> combineFn)
     @Override
     public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
       return input.apply(
-        new CreatePCollectionView<KV<K, V>, Map<K, Iterable<V>>>(
-          new MultimapPCollectionView<K, V>(input.getPipeline())));
+          new CreatePCollectionView<KV<K, V>, Map<K, Iterable<V>>>(
+              new MultimapPCollectionView<K, V>(input.getPipeline(), input.getWindowFn())));
     }
   }
 
-
   /**
    * A {@link PTransform} that produces a {@link PCollectionView} of a keyed {@link PCollection}
    * yielding a map of keys to a single associated values.
@@ -189,8 +211,8 @@ public PCollectionView<Map<K, VO>> apply(PCollection<KV<K, VI>> input) {
         ? (PCollection) input
         : input.apply(Combine.perKey(combineFn.<K>asKeyedFn()));
       return combined.apply(
-        new CreatePCollectionView<KV<K, VO>, Map<K, VO>>(
-          new MapPCollectionView<K, VO>(input.getPipeline())));
+          new CreatePCollectionView<KV<K, VO>, Map<K, VO>>(
+              new MapPCollectionView<K, VO>(input.getPipeline(), combined.getWindowFn())));
     }
   }
 
@@ -247,14 +269,40 @@ private <R, T> void evaluateTyped(
   private static class SingletonPCollectionView<T>
       extends PCollectionViewBase<T> {
     private static final long serialVersionUID = 0;
-
-    public SingletonPCollectionView(Pipeline pipeline) {
+    private byte[] encodedDefaultValue;
+    private transient T defaultValue;
+    private Coder<T> defaultValueCoder;
+
+    public SingletonPCollectionView(
+        Pipeline pipeline, WindowFn<?, ?> windowFn,
+        boolean hasDefault, T defaultValue, Coder<T> defaultValueCoder) {
+      super(windowFn);
       setPipelineInternal(pipeline);
+      this.defaultValue = defaultValue;
+      this.defaultValueCoder = defaultValueCoder;
+      if (hasDefault) {
+        try {
+          this.encodedDefaultValue = CoderUtils.encodeToByteArray(defaultValueCoder, defaultValue);
+        } catch (IOException e) {
+          throw new RuntimeException("Unexpected IOException: ", e);
+        }
+      }
     }
 
     @SuppressWarnings("unchecked")
     @Override
     public T fromIterableInternal(Iterable<WindowedValue<?>> contents) {
+      if (encodedDefaultValue != null && defaultValue == null) {
+        try {
+          defaultValue = CoderUtils.decodeFromByteArray(defaultValueCoder, encodedDefaultValue);
+        } catch (IOException e) {
+          throw new RuntimeException("Unexpected IOException: ", e);
+        }
+      }
+
+      if (encodedDefaultValue != null && !contents.iterator().hasNext()) {
+        return defaultValue;
+      }
       try {
         return (T) Iterables.getOnlyElement(contents).getValue();
       } catch (NoSuchElementException exc) {
@@ -272,7 +320,8 @@ private static class IterablePCollectionView<T>
       extends PCollectionViewBase<Iterable<T>> {
     private static final long serialVersionUID = 0;
 
-    public IterablePCollectionView(Pipeline pipeline) {
+    public IterablePCollectionView(Pipeline pipeline, WindowFn<?, ?> windowFn) {
+      super(windowFn);
       setPipelineInternal(pipeline);
     }
 
@@ -292,7 +341,8 @@ private static class MultimapPCollectionView<K, V>
       extends PCollectionViewBase<Map<K, Iterable<V>>> {
     private static final long serialVersionUID = 0;
 
-    public MultimapPCollectionView(Pipeline pipeline) {
+    public MultimapPCollectionView(Pipeline pipeline, WindowFn<?, ?> windowFn) {
+      super(windowFn);
       setPipelineInternal(pipeline);
     }
 
@@ -313,7 +363,8 @@ private static class MapPCollectionView<K, V>
       extends PCollectionViewBase<Map<K, V>> {
     private static final long serialVersionUID = 0;
 
-    public MapPCollectionView(Pipeline pipeline) {
+    public MapPCollectionView(Pipeline pipeline, WindowFn<?, ?> windowFn) {
+      super(windowFn);
       setPipelineInternal(pipeline);
     }
 
@@ -336,11 +387,25 @@ private abstract static class PCollectionViewBase<T>
       implements PCollectionView<T> {
     private static final long serialVersionUID = 0;
 
+    PCollectionViewBase(WindowFn<?, ?> windowFn) {
+      if (windowFn instanceof InvalidWindows) {
+        throw new IllegalArgumentException("WindowFn of PCollectionView cannot be InvalidWindows");
+      }
+      this.windowFn = windowFn;
+    }
+
     @Override
     public TupleTag<Iterable<WindowedValue<?>>> getTagInternal() {
       return tag;
     }
 
+    @Override
+    public WindowFn getWindowFnInternal() {
+      return windowFn;
+    }
+
     private TupleTag<Iterable<WindowedValue<?>>> tag = new TupleTag<>();
+
+    private WindowFn<?, ?> windowFn;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
index 2ac12a933c29f..93db03ece0797 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
@@ -22,7 +22,7 @@
 import java.util.Collection;
 
 /**
- * Default {@link WindowFn} where all data is in the same bucket.
+ * Default {@link WindowFn} where all data is in the same window.
  */
 @SuppressWarnings("serial")
 public class GlobalWindows
@@ -41,4 +41,9 @@ public boolean isCompatible(WindowFn o) {
   public Coder<GlobalWindow> windowCoder() {
     return GlobalWindow.Coder.INSTANCE;
   }
+
+  @Override
+  public GlobalWindow getSideInputWindow(BoundedWindow window) {
+    return GlobalWindow.INSTANCE;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
index fdfa136ba5251..afeb4e83d73e6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
@@ -115,4 +115,46 @@ public abstract void merge(Collection<W> toBeMerged, W mergeResult)
    * by this windowFn.
    */
   public abstract Coder<W> windowCoder();
+
+  /**
+   * Returns the window of the side input corresponding to the given window of
+   * the main input. By default, this runs assignWindows over a non-existent
+   * element whose timestamp is the maxTimestamp() of the input window.
+   *
+   * <p> For example, if both the main and side inputs are windowed by
+   * {@link FixedWindows}, the side input corresponding to a particular main
+   * input element will be the one in the same window as that element.
+   *
+   * <p> Authors of custom {@code WindowFn}s should override this if that is not
+   * the desired behavior for side inputs with their {@code WindowFn}.
+   */
+  public W getSideInputWindow(final BoundedWindow window) {
+    if (window instanceof GlobalWindow) {
+      throw new IllegalArgumentException(
+          "Attempted to get side input window for GlobalWindow from non-global WindowFn");
+    }
+
+    try {
+      return assignWindows(new AssignContext() {
+          @Override
+          public T element() {
+            throw new UnsupportedOperationException(
+                "WindowFn attemped to access input element when none was available");
+          }
+
+          @Override
+          public Instant timestamp() {
+            return window.maxTimestamp();
+          }
+
+          @Override
+          public Collection<? extends BoundedWindow> windows() {
+            throw new UnsupportedOperationException(
+                "WindowFn attemped to access input windows when none were available");
+          }
+        }).iterator().next();
+    } catch (Exception e) {
+      throw new RuntimeException("Failed to get side input window: ", e);
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
index 947a78cdcaf38..a4f4d8abda54e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
@@ -29,6 +29,8 @@
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Predicate;
+import com.google.common.collect.Iterables;
 
 import org.joda.time.Instant;
 
@@ -51,7 +53,7 @@ class DoFnContext<I, O, R> extends DoFn<I, O>.Context {
   final PipelineOptions options;
   final DoFn<I, O> fn;
   final PTuple sideInputs;
-  final Map<TupleTag<?>, Object> sideInputCache;
+  final Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
   final OutputManager<R> outputManager;
   final Map<TupleTag<?>, R> outputMap;
   final TupleTag<O> mainOutputTag;
@@ -101,21 +103,40 @@ public PipelineOptions getPipelineOptions() {
     return options;
   }
 
-  @Override
   @SuppressWarnings("unchecked")
-  public <T> T sideInput(PCollectionView<T> view) {
+  <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
     TupleTag<?> tag = view.getTagInternal();
-    if (!sideInputCache.containsKey(tag)) {
+    Map<BoundedWindow, Object> tagCache = sideInputCache.get(tag);
+    if (tagCache == null) {
       if (!sideInputs.has(tag)) {
         throw new IllegalArgumentException(
             "calling sideInput() with unknown view; " +
             "did you forget to pass the view in " +
             "ParDo.withSideInputs()?");
       }
-      sideInputCache.put(
-          tag, view.fromIterableInternal((Iterable<WindowedValue<?>>) sideInputs.get(tag)));
+      tagCache = new HashMap<>();
+      sideInputCache.put(tag, tagCache);
+    }
+
+    final BoundedWindow sideInputWindow =
+        view.getWindowFnInternal().getSideInputWindow(mainInputWindow);
+
+    T result = (T) tagCache.get(sideInputWindow);
+
+    // TODO: Consider partial prefetching like in CoGBK to reduce iteration cost.
+    if (result == null) {
+      result = view.fromIterableInternal(Iterables.filter(
+          (Iterable<WindowedValue<?>>) sideInputs.get(tag),
+          new Predicate<WindowedValue<?>>() {
+            @Override
+            public boolean apply(WindowedValue<?> element) {
+              return element.getWindows().contains(sideInputWindow);
+            }
+          }));
+      tagCache.put(sideInputWindow, result);
     }
-    return (T) sideInputCache.get(tag);
+
+    return result;
   }
 
   <T> WindowedValue<T> makeWindowedValue(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
index 0f4e37dc5be0d..b2be44a4cebd0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
@@ -32,6 +32,7 @@
 import org.joda.time.Instant;
 
 import java.util.Collection;
+import java.util.Iterator;
 
 /**
  * A concrete implementation of {@link DoFn<I, O>.ProcessContext} used for running
@@ -65,6 +66,21 @@ public I element() {
     return windowedValue.getValue();
   }
 
+  @Override
+  public <T> T sideInput(PCollectionView<T> view) {
+    Iterator<? extends BoundedWindow> windowIter = windows().iterator();
+    if (!windowIter.hasNext()) {
+      throw new IllegalStateException(
+          "sideInput called when main input element is not in any windows");
+    }
+    BoundedWindow window = windowIter.next();
+    if (windowIter.hasNext()) {
+      throw new IllegalStateException(
+          "sideInput called when main input element is in multiple windows");
+    }
+    return context.sideInput(view, window);
+  }
+
   @Override
   public KeyedState keyedState() {
     if (!(fn instanceof RequiresKeyedState)
@@ -76,11 +92,6 @@ public KeyedState keyedState() {
     return context.stepContext;
   }
 
-  @Override
-  public <T> T sideInput(PCollectionView<T> view) {
-    return context.sideInput(view);
-  }
-
   @Override
   public void output(O output) {
     context.outputWindowedValue(output, windowedValue.getTimestamp(), windowedValue.getWindows());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
index 43f99f0b3ac9b..d897d3fa24fea 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.values;
 
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 
 import java.io.Serializable;
@@ -43,4 +44,9 @@ public interface PCollectionView<T> extends PValue, Serializable {
    * For internal use only.
    */
   public T fromIterableInternal(Iterable<WindowedValue<?>> contents);
+
+  /**
+   * For internal use only.
+   */
+  public WindowFn getWindowFnInternal();
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 0a3dcdce719e6..603ab2cab956e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -45,6 +45,7 @@
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Sets;
 
@@ -204,7 +205,7 @@ public void testFixedWindowsCombine() {
 
     PCollection<Integer> sum = input
         .apply(Values.<Integer>create())
-        .apply(Combine.globally(new SumInts()));
+        .apply(Combine.globally(new SumInts()).withoutDefaults());
 
     PCollection<KV<String, Integer>> sumPerKey = input
         .apply(Combine.<String, Integer>perKey(new SumInts()));
@@ -231,7 +232,7 @@ public void testSessionsCombine() {
 
     PCollection<Integer> sum = input
         .apply(Values.<Integer>create())
-        .apply(Combine.globally(new SumInts()));
+        .apply(Combine.globally(new SumInts()).withoutDefaults());
 
     PCollection<KV<String, Integer>> sumPerKey = input
         .apply(Combine.<String, Integer>perKey(new SumInts()));
@@ -252,7 +253,7 @@ public void testWindowedCombineEmpty() {
     PCollection<Double> mean = p
         .apply(Create.<Integer>of()).setCoder(BigEndianIntegerCoder.of())
         .apply(Window.<Integer>into(FixedWindows.of(Duration.millis(1))))
-        .apply(Combine.globally(new MeanInts()));
+        .apply(Combine.globally(new MeanInts()).withoutDefaults());
 
     DataflowAssert.that(mean).containsInAnyOrder();
 
@@ -387,6 +388,27 @@ public Integer apply(Integer left, Integer right) {
     }
   }
 
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testCombineGloballyAsSingletonView() {
+    Pipeline p = TestPipeline.create();
+    final PCollectionView<Integer> view = p
+        .apply(Create.<Integer>of())
+        .setCoder(BigEndianIntegerCoder.of())
+        .apply(Sum.integersGlobally().asSingletonView());
+
+    PCollection<Integer> output = p
+        .apply(Create.of((Void) null))
+        .apply(ParDo.of(new DoFn<Void, Integer>() {
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    c.output(c.sideInput(view));
+                  }
+                }));
+
+    DataflowAssert.thatSingleton(output).isEqualTo(0);
+  }
+
   ////////////////////////////////////////////////////////////////////////////
   // Test classes, for different kinds of combining fns.
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
index 65fe984c03bc5..43b85bf491443 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
@@ -33,6 +34,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
 
 import org.joda.time.Duration;
 import org.junit.Assert;
@@ -101,8 +103,9 @@ public void testFlattenPCollectionListOrdered() {
     p.run();
   }
 
+  // TODO: re-enable running this test on the service once empty flattens
+  // followed by ParDos work properly.
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testFlattenPCollectionListEmpty() {
     Pipeline p = TestPipeline.create();
 
@@ -114,11 +117,39 @@ public void testFlattenPCollectionListEmpty() {
     p.run();
   }
 
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testEmptyFlattenAsSideInput() {
+    Pipeline p = TestPipeline.create();
+
+    final PCollectionView<Iterable<String>> view =
+        PCollectionList.<String>empty(p)
+        .apply(Flatten.<String>pCollections()).setCoder(StringUtf8Coder.of())
+        .apply(View.<String>asIterable());
+
+    PCollection<String> output = p
+        .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
+        .apply(ParDo.withSideInputs(view).of(new DoFn<Void, String>() {
+                  private static final long serialVersionUID = 0;
+
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    for (String side : c.sideInput(view)) {
+                      c.output(side);
+                    }
+                  }
+                }));
+
+    DataflowAssert.that(output).containsInAnyOrder();
+    p.run();
+  }
+
   @Test
   // TODO: Enable this test to run on the Dataflow service when it is
   // correctly implemented.
   // @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testFlattenPCollectionListEmptyThenParDo() {
+
     Pipeline p = TestPipeline.create();
 
     PCollection<String> output =
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 95ddc589ca193..9e2a16804ebbb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -113,7 +113,7 @@ public void processElement(ProcessContext c) {
       assertThat(state,
                  anyOf(equalTo(State.STARTED), equalTo(State.PROCESSING)));
       state = State.PROCESSING;
-      outputToAll(c, "processing: " + c.element());
+      outputToAllWithSideInputs(c, "processing: " + c.element());
     }
 
     @Override
@@ -125,6 +125,14 @@ public void finishBundle(Context c) {
     }
 
     private void outputToAll(Context c, String value) {
+      c.output(value);
+      for (TupleTag<String> sideOutputTupleTag : sideOutputTupleTags) {
+        c.sideOutput(sideOutputTupleTag,
+                     sideOutputTupleTag.getId() + ": " + value);
+      }
+    }
+
+    private void outputToAllWithSideInputs(ProcessContext c, String value) {
       if (!sideInputViews.isEmpty()) {
         List<Integer> sideInputValues = new ArrayList<>();
         for (PCollectionView<Integer> sideInputView : sideInputViews) {
@@ -229,12 +237,10 @@ public Void apply(Iterable<String> outputs) {
         assertEquals(starteds.size(), finisheds.size());
         assertTrue(starteds.size() > 0);
         for (String started : starteds) {
-          assertEquals(sideOutputPrefix + "started" + sideInputsSuffix,
-                       started);
+          assertEquals(sideOutputPrefix + "started", started);
         }
         for (String finished : finisheds) {
-          assertEquals(sideOutputPrefix + "finished" + sideInputsSuffix,
-                       finished);
+          assertEquals(sideOutputPrefix + "finished", finished);
         }
 
         return null;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index fd482beed4738..6e3c3610e5118 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -20,11 +20,19 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
 
 import org.junit.Rule;
 import org.junit.Test;
@@ -214,6 +222,7 @@ public void processElement(ProcessContext c) {
   }
 
   @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testCombinedMapSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
@@ -237,4 +246,131 @@ public void processElement(ProcessContext c) {
 
     pipeline.run();
   }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testWindowedSideInputFixedToFixed() {
+    Pipeline p = TestPipeline.create();
+
+    final PCollectionView<Integer> view = p
+        .apply(Create.timestamped(
+            TimestampedValue.of(1, new Instant(1)),
+            TimestampedValue.of(2, new Instant(11)),
+            TimestampedValue.of(3, new Instant(13))))
+        .apply(Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
+        .apply(Sum.integersGlobally().withoutDefaults())
+        .apply(View.<Integer>asSingleton());
+
+    PCollection<String> output = p
+        .apply(Create.timestamped(
+            TimestampedValue.of("A", new Instant(4)),
+            TimestampedValue.of("B", new Instant(15)),
+            TimestampedValue.of("C", new Instant(7))))
+        .apply(Window.<String>into(FixedWindows.of(Duration.millis(10))))
+        .apply(ParDo.withSideInputs(view).of(
+            new DoFn<String, String>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                c.output(c.element() + c.sideInput(view));
+              }
+            }));
+
+    DataflowAssert.that(output).containsInAnyOrder("A1", "B5", "C1");
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testWindowedSideInputFixedToGlobal() {
+    Pipeline p = TestPipeline.create();
+
+    final PCollectionView<Integer> view = p
+        .apply(Create.timestamped(
+            TimestampedValue.of(1, new Instant(1)),
+            TimestampedValue.of(2, new Instant(11)),
+            TimestampedValue.of(3, new Instant(13))))
+        .apply(Window.<Integer>into(new GlobalWindows()))
+        .apply(Sum.integersGlobally())
+        .apply(View.<Integer>asSingleton());
+
+    PCollection<String> output = p
+        .apply(Create.timestamped(
+            TimestampedValue.of("A", new Instant(4)),
+            TimestampedValue.of("B", new Instant(15)),
+            TimestampedValue.of("C", new Instant(7))))
+        .apply(Window.<String>into(FixedWindows.of(Duration.millis(10))))
+        .apply(ParDo.withSideInputs(view).of(
+            new DoFn<String, String>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                c.output(c.element() + c.sideInput(view));
+              }
+            }));
+
+    DataflowAssert.that(output).containsInAnyOrder("A6", "B6", "C6");
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testWindowedSideInputFixedToFixedWithDefault() {
+    Pipeline p = TestPipeline.create();
+
+    final PCollectionView<Integer> view = p
+        .apply(Create.timestamped(
+            TimestampedValue.of(2, new Instant(11)),
+            TimestampedValue.of(3, new Instant(13))))
+        .apply(Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
+        .apply(Sum.integersGlobally().asSingletonView());
+
+    PCollection<String> output = p
+        .apply(Create.timestamped(
+            TimestampedValue.of("A", new Instant(4)),
+            TimestampedValue.of("B", new Instant(15)),
+            TimestampedValue.of("C", new Instant(7))))
+        .apply(Window.<String>into(FixedWindows.of(Duration.millis(10))))
+        .apply(ParDo.withSideInputs(view).of(
+            new DoFn<String, String>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                c.output(c.element() + c.sideInput(view));
+              }
+            }));
+
+    DataflowAssert.that(output).containsInAnyOrder("A0", "B5", "C0");
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testSideInputWithNullDefault() {
+    Pipeline p = TestPipeline.create();
+
+    final PCollectionView<Void> view = p
+        .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
+        .apply(Combine.globally(new SerializableFunction<Iterable<Void>, Void>() {
+                  @Override
+                  public Void apply(Iterable<Void> input) {
+                    return (Void) null;
+                  }
+                }).asSingletonView());
+
+    PCollection<String> output = p
+        .apply(Create.of(""))
+        .apply(ParDo.withSideInputs(view).of(
+            new DoFn<String, String>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                c.output(c.element() + c.sideInput(view));
+              }
+            }));
+
+    DataflowAssert.that(output).containsInAnyOrder("null");
+
+    p.run();
+  }
+
 }

From b1813cb11534c5b9391883747813916c9f6af4a0 Mon Sep 17 00:00:00 2001
From: chambers <chambers@google.com>
Date: Fri, 20 Mar 2015 16:42:40 -0700
Subject: [PATCH 0291/1541] Introduced several private subclasses of
 WindowedValue, each optimized for a particular subset of WindowedValues,
 e.g., WindowedValues in the GlobalWindow, in no windows, in a single
 non-global window, or in multiple windows.

Cleaned up the code and fixed a bug around equals and hashCode for
WindowedValues, particularly for WindowedValues in multiple windows.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89167721
---
 .../worker/MapTaskExecutorFactory.java        |   8 +-
 .../sdk/runners/worker/ShuffleSink.java       |   2 +-
 .../transforms/windowing/GlobalWindow.java    |   6 +-
 .../dataflow/sdk/util/WindowedValue.java      | 389 +++++++++++++++---
 4 files changed, 333 insertions(+), 72 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index df952a181d9ac..acbe0f81e9045 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -213,8 +213,7 @@ private PairInfo() {}
     public Object getKeyFromInputPair(Object pair) {
       @SuppressWarnings("unchecked")
       WindowedValue<KV<?, ?>> windowedKv = (WindowedValue<KV<?, ?>>) pair;
-      return WindowedValue.of(
-          windowedKv.getValue().getKey(), windowedKv.getTimestamp(), windowedKv.getWindows());
+      return windowedKv.withValue(windowedKv.getValue().getKey());
     }
     @Override
     public Object getValueFromInputPair(Object pair) {
@@ -225,10 +224,7 @@ public Object getValueFromInputPair(Object pair) {
     @Override
     public Object makeOutputPair(Object key, Object values) {
       WindowedValue<?> windowedKey = (WindowedValue<?>) key;
-      return WindowedValue.of(
-          KV.of(windowedKey.getValue(), values),
-          windowedKey.getTimestamp(),
-          windowedKey.getWindows());
+      return windowedKey.withValue(KV.of(windowedKey.getValue(), values));
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
index 12486766bc612..159d19e394101 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
@@ -208,7 +208,7 @@ public long add(WindowedValue<T> windowedElem) throws IOException {
           secondaryKeyBytes = null;
           valueBytes = CoderUtils.encodeToByteArray(
               windowedValueCoder,
-              WindowedValue.of(value, windowedElem.getTimestamp(), windowedElem.getWindows()));
+              windowedElem.withValue(value));
         }
 
       } else {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
index 6cbb66beb5a4e..1c31a96e3fa8a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
@@ -46,15 +46,15 @@ public static class Coder extends AtomicCoder<GlobalWindow> {
     public static final Coder INSTANCE = new Coder();
 
     @Override
-      public void encode(GlobalWindow window, OutputStream outStream, Context context) {}
+    public void encode(GlobalWindow window, OutputStream outStream, Context context) {}
 
     @Override
-      public GlobalWindow decode(InputStream inStream, Context context) {
+    public GlobalWindow decode(InputStream inStream, Context context) {
       return GlobalWindow.INSTANCE;
     }
 
     @Override
-      public boolean isDeterministic() {
+    public boolean isDeterministic() {
       return true;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index 57bad7db8c648..c38492d2d58be 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -41,28 +41,55 @@
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Iterator;
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Objects;
+import java.util.Set;
 
 /**
  * An immutable triple of value, timestamp, and windows.
  *
  * @param <V> the type of the value
  */
-public class WindowedValue<V> {
+public abstract class WindowedValue<V> {
 
-  private final V value;
-  private final Instant timestamp;
-  private final Collection<? extends BoundedWindow> windows;
+  protected final V value;
 
   /**
-   * Returns a {@code WindowedValue} with the given value, timestamp, and windows.
+   * Returns a {@code WindowedValue} with the given value, timestamp,
+   * and windows.
    */
   public static <V> WindowedValue<V> of(
       V value,
       Instant timestamp,
       Collection<? extends BoundedWindow> windows) {
-    return new WindowedValue<>(value, timestamp, windows);
+    Iterator<? extends BoundedWindow> windowsIter = windows.iterator();
+    if (BoundedWindow.TIMESTAMP_MIN_VALUE.equals(timestamp)) {
+      if (!windowsIter.hasNext()) {
+        return valueInEmptyWindows(value);
+      }
+      BoundedWindow firstWindow = windowsIter.next();
+      if (!windowsIter.hasNext()
+          && GlobalWindow.INSTANCE.equals(firstWindow)) {
+        return valueInGlobalWindow(value);
+      }
+      return new TimestampedValueInMultipleWindows<>(value, timestamp, windows);
+    } else {
+      if (windowsIter.hasNext()) {
+        BoundedWindow firstWindow = windowsIter.next();
+        if (!windowsIter.hasNext()) {
+          if (GlobalWindow.INSTANCE.equals(firstWindow)) {
+            return new TimestampedValueInGlobalWindow<>(
+                value, timestamp);
+          } else {
+            return new TimestampedValueInSingleWindow<>(
+                value, timestamp, firstWindow);
+          }
+        }
+      }
+      // value in 0 or several windows.
+      return new TimestampedValueInMultipleWindows<>(value, timestamp, windows);
+    }
   }
 
   /**
@@ -70,37 +97,26 @@ public static <V> WindowedValue<V> of(
    * and {@code GlobalWindow}.
    */
   public static <V> WindowedValue<V> valueInGlobalWindow(V value) {
-    return new WindowedValue<>(value,
-                               BoundedWindow.TIMESTAMP_MIN_VALUE,
-                               Arrays.asList(GlobalWindow.INSTANCE));
+    return new ValueInGlobalWindow<>(value);
   }
 
   /**
-   * Returns a {@code WindowedValue} with the given value and default timestamp and empty windows.
+   * Returns a {@code WindowedValue} with the given value and default
+   * timestamp and empty windows.
    */
   public static <V> WindowedValue<V> valueInEmptyWindows(V value) {
-    return new WindowedValue<V>(value,
-                                BoundedWindow.TIMESTAMP_MIN_VALUE,
-                                Collections.<BoundedWindow>emptyList());
+    return new ValueInEmptyWindows<>(value);
   }
 
-  private WindowedValue(V value,
-                        Instant timestamp,
-                        Collection<? extends BoundedWindow> windows) {
-    checkNotNull(timestamp);
-    checkNotNull(windows);
-
+  private WindowedValue(V value) {
     this.value = value;
-    this.timestamp = timestamp;
-    this.windows = windows;
   }
 
   /**
-   * Returns a new {@code WindowedValue} that is a copy of this one, but with a different value.
+   * Returns a new {@code WindowedValue} that is a copy of this one,
+   * but with a different value.
    */
-  public <V> WindowedValue<V> withValue(V value) {
-    return new WindowedValue<>(value, this.timestamp, this.windows);
-  }
+  public abstract <V> WindowedValue<V> withValue(V value);
 
   /**
    * Returns the value of this {@code WindowedValue}.
@@ -112,67 +128,316 @@ public V getValue() {
   /**
    * Returns the timestamp of this {@code WindowedValue}.
    */
-  public Instant getTimestamp() {
-    return timestamp;
-  }
+  public abstract Instant getTimestamp();
 
   /**
    * Returns the windows of this {@code WindowedValue}.
    */
-  public Collection<? extends BoundedWindow> getWindows() {
-    return windows;
+  public abstract Collection<? extends BoundedWindow> getWindows();
+
+  @Override
+  public abstract boolean equals(Object o);
+
+  @Override
+  public abstract int hashCode();
+
+  @Override
+  public abstract String toString();
+
+  private static final Collection<? extends BoundedWindow> GLOBAL_WINDOWS =
+      Collections.singletonList(GlobalWindow.INSTANCE);
+
+  /**
+   * The abstract superclass of WindowedValue representations where
+   * timestamp == MIN.
+   */
+  private abstract static class MinTimestampWindowedValue<V>
+      extends WindowedValue<V> {
+    public MinTimestampWindowedValue(V value) {
+      super(value);
+    }
+
+    @Override
+    public Instant getTimestamp() {
+      return BoundedWindow.TIMESTAMP_MIN_VALUE;
+    }
   }
 
   /**
-   * Returns the {@code Coder} to use for a {@code WindowedValue<T>},
-   * using the given valueCoder and windowCoder.
+   * The representation of a WindowedValue where timestamp == MIN and
+   * windows == {GlobalWindow}.
    */
-  public static <T> FullWindowedValueCoder<T> getFullCoder(
-      Coder<T> valueCoder,
-      Coder<? extends BoundedWindow> windowCoder) {
-    return FullWindowedValueCoder.of(valueCoder, windowCoder);
+  private static class ValueInGlobalWindow<V>
+      extends MinTimestampWindowedValue<V> {
+    public ValueInGlobalWindow(V value) { super(value); }
+
+    @Override
+    public <V> WindowedValue<V> withValue(V value) {
+      return new ValueInGlobalWindow<>(value);
+    }
+
+    @Override
+    public Collection<? extends BoundedWindow> getWindows() {
+      return GLOBAL_WINDOWS;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (o instanceof ValueInGlobalWindow) {
+        ValueInGlobalWindow<?> that = (ValueInGlobalWindow) o;
+        return Objects.equals(that.value, this.value);
+      } else {
+        return false;
+      }
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(value);
+    }
+
+    @Override
+    public String toString() {
+      return "[ValueInGlobalWindow: " + value + "]";
+    }
   }
 
   /**
-   * Returns the {@code ValueOnlyCoder} from the given valueCoder.
+   * The representation of a WindowedValue where timestamp == MIN and
+   * windows == {}.
    */
-  public static <T> ValueOnlyWindowedValueCoder<T> getValueOnlyCoder(Coder<T> valueCoder) {
-    return ValueOnlyWindowedValueCoder.of(valueCoder);
+  private static class ValueInEmptyWindows<V>
+      extends MinTimestampWindowedValue<V> {
+    public ValueInEmptyWindows(V value) { super(value); }
+
+    @Override
+    public <V> WindowedValue<V> withValue(V value) {
+      return new ValueInEmptyWindows<>(value);
+    }
+
+    @Override
+    public Collection<? extends BoundedWindow> getWindows() {
+      return Collections.emptyList();
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (o instanceof ValueInEmptyWindows) {
+        ValueInEmptyWindows<?> that = (ValueInEmptyWindows) o;
+        return Objects.equals(that.value, this.value);
+      } else {
+        return false;
+      }
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(value);
+    }
+
+    @Override
+    public String toString() {
+      return "[ValueInEmptyWindows: " + value + "]";
+    }
   }
 
-  @Override
-  public boolean equals(Object o) {
-    if (o instanceof WindowedValue) {
-      WindowedValue<?> that = (WindowedValue) o;
-      if (Objects.equals(that.value, this.value)
-          && that.timestamp.isEqual(timestamp)
-          && that.windows.size() == windows.size()) {
-        for (Iterator<?> thatIterator = that.windows.iterator(), thisIterator = windows.iterator();
-            thatIterator.hasNext() && thisIterator.hasNext();
-            /* do nothing */) {
-          if (!thatIterator.next().equals(thisIterator.next())) {
-            return false;
-          }
-        }
-        return true;
+  /**
+   * The abstract superclass of WindowedValue representations where
+   * timestamp is arbitrary.
+   */
+  private abstract static class TimestampedWindowedValue<V>
+      extends WindowedValue<V> {
+    protected final Instant timestamp;
+
+    public TimestampedWindowedValue(V value,
+                                    Instant timestamp) {
+      super(value);
+      this.timestamp = checkNotNull(timestamp);
+    }
+
+    @Override
+    public Instant getTimestamp() {
+      return timestamp;
+    }
+  }
+
+  /**
+   * The representation of a WindowedValue where timestamp {@code >}
+   * MIN and windows == {GlobalWindow}.
+   */
+  private static class TimestampedValueInGlobalWindow<V>
+      extends TimestampedWindowedValue<V> {
+    public TimestampedValueInGlobalWindow(V value,
+                                          Instant timestamp) {
+      super(value, timestamp);
+    }
+
+    @Override
+    public <V> WindowedValue<V> withValue(V value) {
+      return new TimestampedValueInGlobalWindow<>(value, timestamp);
+    }
+
+    @Override
+    public Collection<? extends BoundedWindow> getWindows() {
+      return GLOBAL_WINDOWS;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (o instanceof TimestampedValueInGlobalWindow) {
+        TimestampedValueInGlobalWindow<?> that =
+            (TimestampedValueInGlobalWindow) o;
+        return Objects.equals(that.value, this.value)
+            && that.timestamp.isEqual(this.timestamp);
+      } else {
+        return false;
       }
     }
-    return false;
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(value, timestamp);
+    }
+
+    @Override
+    public String toString() {
+      return "[ValueInGlobalWindow: " + value
+          + ", timestamp: " + timestamp.getMillis() + "]";
+    }
   }
 
-  @Override
-  public int hashCode() {
-    return Objects.hash(value, timestamp, Arrays.hashCode(windows.toArray()));
+  /**
+   * The representation of a WindowedValue where timestamp is arbitrary and
+   * windows == a single non-Global window.
+   */
+  private static class TimestampedValueInSingleWindow<V>
+      extends TimestampedWindowedValue<V> {
+    private final BoundedWindow window;
+
+    public TimestampedValueInSingleWindow(V value,
+                                          Instant timestamp,
+                                          BoundedWindow window) {
+      super(value, timestamp);
+      this.window = checkNotNull(window);
+    }
+
+    @Override
+    public <V> WindowedValue<V> withValue(V value) {
+      return new TimestampedValueInSingleWindow<>(value, timestamp, window);
+    }
+
+    @Override
+    public Collection<? extends BoundedWindow> getWindows() {
+      return Collections.singletonList(window);
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (o instanceof TimestampedValueInSingleWindow) {
+        TimestampedValueInSingleWindow<?> that =
+            (TimestampedValueInSingleWindow) o;
+        return Objects.equals(that.value, this.value)
+            && that.timestamp.isEqual(this.timestamp)
+            && that.window.equals(this.window);
+      } else {
+        return false;
+      }
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(value, timestamp, window);
+    }
+
+    @Override
+    public String toString() {
+      return "[WindowedValue: " + value
+          + ", timestamp: " + timestamp.getMillis()
+          + ", window: " + window + "]";
+    }
   }
 
-  @Override
-  public String toString() {
-    return "[WindowedValue: " + value + ", timestamp: " + timestamp.getMillis()
-        + ", windows: " + windows + "]";
+  /**
+   * The representation of a WindowedValue, excluding the special
+   * cases captured above.
+   */
+  private static class TimestampedValueInMultipleWindows<V>
+      extends TimestampedWindowedValue<V> {
+    private Collection<? extends BoundedWindow> windows;
+
+    public TimestampedValueInMultipleWindows(
+        V value,
+        Instant timestamp,
+        Collection<? extends BoundedWindow> windows) {
+      super(value, timestamp);
+      this.windows = checkNotNull(windows);
+    }
+
+    @Override
+    public <V> WindowedValue<V> withValue(V value) {
+      return new TimestampedValueInMultipleWindows<>(value, timestamp, windows);
+    }
+
+    @Override
+    public Collection<? extends BoundedWindow> getWindows() {
+      return windows;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (o instanceof TimestampedValueInMultipleWindows) {
+        TimestampedValueInMultipleWindows<?> that =
+            (TimestampedValueInMultipleWindows) o;
+        if (Objects.equals(that.value, this.value)
+            && that.timestamp.isEqual(this.timestamp)) {
+          ensureWindowsAreASet();
+          that.ensureWindowsAreASet();
+          return that.windows.equals(this.windows);
+        }
+      }
+      return false;
+    }
+
+    @Override
+    public int hashCode() {
+      ensureWindowsAreASet();
+      return Objects.hash(value, timestamp, windows);
+    }
+
+    @Override
+    public String toString() {
+      return "[WindowedValue: " + value
+          + ", timestamp: " + timestamp.getMillis()
+          + ", windows: " + windows + "]";
+    }
+
+    private void ensureWindowsAreASet() {
+      if (!(windows instanceof Set)) {
+        windows = new LinkedHashSet<>(windows);
+      }
+    }
   }
 
+
   /////////////////////////////////////////////////////////////////////////////
 
+  /**
+   * Returns the {@code Coder} to use for a {@code WindowedValue<T>},
+   * using the given valueCoder and windowCoder.
+   */
+  public static <T> FullWindowedValueCoder<T> getFullCoder(
+      Coder<T> valueCoder,
+      Coder<? extends BoundedWindow> windowCoder) {
+    return FullWindowedValueCoder.of(valueCoder, windowCoder);
+  }
+
+  /**
+   * Returns the {@code ValueOnlyCoder} from the given valueCoder.
+   */
+  public static <T> ValueOnlyWindowedValueCoder<T> getValueOnlyCoder(Coder<T> valueCoder) {
+    return ValueOnlyWindowedValueCoder.of(valueCoder);
+  }
+
   /**
    * Abstract class for {@code WindowedValue} coder.
    */

From 12766b50f901ac4fc0f857305618e8dddf5acb8f Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Fri, 13 Mar 2015 16:31:00 -0700
Subject: [PATCH 0292/1541] Minor changes to the checkstyle rule for the
 copyright header.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88590299
---
 checkstyle.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/checkstyle.xml b/checkstyle.xml
index d2a302c701bc1..788c9ea999850 100644
--- a/checkstyle.xml
+++ b/checkstyle.xml
@@ -36,7 +36,7 @@ page at http://checkstyle.sourceforge.net/config.html -->
       </pre>
     -->
     <property name="format"
-        value="^(//| \*) Copyright (\([cC]\) )?[\d]{4}(\-[\d]{4})? (Google Inc\.|The Google Cloud Dataflow Authors).*$" />
+        value="^(//| \*) Copyright (\([cC]\) )?[\d]{4}(\-[\d]{4})? (Google Inc\.|The Google Cloud Dataflow.*Authors).*$" />
     <property name="minimum" value="1" />
     <property name="maximum" value="10" />
     <property name="message" value="Google copyright is missing or malformed." />

From 40f379a5eb8a36dfd3cfd5787f2e709ad306a166 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Fri, 20 Mar 2015 16:40:04 -0700
Subject: [PATCH 0293/1541] Tweaking the copyright statement in a
 community-contributed module.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89167564
---
 .../com/google/cloud/dataflow/contrib/joinlibrary/Join.java     | 2 +-
 .../cloud/dataflow/contrib/joinlibrary/InnerJoinTest.java       | 2 +-
 .../cloud/dataflow/contrib/joinlibrary/OuterLeftJoinTest.java   | 2 +-
 .../cloud/dataflow/contrib/joinlibrary/OuterRightJoinTest.java  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/contrib/join-library/src/main/java/com/google/cloud/dataflow/contrib/joinlibrary/Join.java b/contrib/join-library/src/main/java/com/google/cloud/dataflow/contrib/joinlibrary/Join.java
index 9b550f82b6a31..ad409c7090b1b 100644
--- a/contrib/join-library/src/main/java/com/google/cloud/dataflow/contrib/joinlibrary/Join.java
+++ b/contrib/join-library/src/main/java/com/google/cloud/dataflow/contrib/joinlibrary/Join.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 The Google Cloud Dataflow Authors
+ * Copyright (C) 2015 The Google Cloud Dataflow join-library Authors
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/InnerJoinTest.java b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/InnerJoinTest.java
index bdd3b46a5dff0..28f9d4bf217f6 100644
--- a/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/InnerJoinTest.java
+++ b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/InnerJoinTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 The Google Cloud Dataflow Authors
+ * Copyright (C) 2015 The Google Cloud Dataflow join-library Authors
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterLeftJoinTest.java b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterLeftJoinTest.java
index e26f9b13e94a8..f48adb04f7a3a 100644
--- a/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterLeftJoinTest.java
+++ b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterLeftJoinTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 The Google Cloud Dataflow Authors
+ * Copyright (C) 2015 The Google Cloud Dataflow join-library Authors
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
diff --git a/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterRightJoinTest.java b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterRightJoinTest.java
index b0b4c81ecd58d..33c948bf48497 100644
--- a/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterRightJoinTest.java
+++ b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterRightJoinTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 The Google Cloud Dataflow Authors
+ * Copyright (C) 2015 The Google Cloud Dataflow join-library Authors
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of

From 90c811ae7343549d5d3ea39a171dbb27f9221fa9 Mon Sep 17 00:00:00 2001
From: mariand <mariand@google.com>
Date: Fri, 20 Mar 2015 23:25:01 -0700
Subject: [PATCH 0294/1541] Rollback of a "Makes side inputs per window"
 change.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89187283
---
 .../examples/TopWikipediaSessions.java        |   2 +-
 .../dataflow/sdk/testing/DataflowAssert.java  |  19 +-
 .../dataflow/sdk/transforms/Combine.java      | 174 +++---------------
 .../cloud/dataflow/sdk/transforms/DoFn.java   |  21 +--
 .../dataflow/sdk/transforms/GroupByKey.java   |   9 +-
 .../cloud/dataflow/sdk/transforms/Top.java    |   7 +-
 .../cloud/dataflow/sdk/transforms/View.java   |  93 ++--------
 .../transforms/windowing/GlobalWindows.java   |   7 +-
 .../sdk/transforms/windowing/WindowFn.java    |  42 -----
 .../cloud/dataflow/sdk/util/DoFnContext.java  |  35 +---
 .../dataflow/sdk/util/DoFnProcessContext.java |  21 +--
 .../dataflow/sdk/values/PCollectionView.java  |   6 -
 .../dataflow/sdk/transforms/CombineTest.java  |  28 +--
 .../dataflow/sdk/transforms/FlattenTest.java  |  33 +---
 .../dataflow/sdk/transforms/ParDoTest.java    |  16 +-
 .../dataflow/sdk/transforms/ViewTest.java     | 136 --------------
 16 files changed, 83 insertions(+), 566 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
index 463429d20fd5d..fd50326e076f0 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
@@ -126,7 +126,7 @@ public PCollection<List<KV<String, Long>>> apply(PCollection<KV<String, Long>> s
                     public int compare(KV<String, Long> o1, KV<String, Long> o2) {
                       return Long.compare(o1.getValue(), o2.getValue());
                     }
-                  }).withoutDefaults());
+                  }));
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 7eed9d24a34a2..0f5bda75ef935 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -25,8 +25,6 @@
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.View;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.common.base.Optional;
@@ -82,8 +80,7 @@ private DataflowAssert() {}
    * {@link PCollection PCollection&lt;T&gt;}.
    */
   public static <T> IterableAssert<T> that(PCollection<T> actual) {
-    return
-        new IterableAssert<>(inGlobalWindows(actual).apply(View.<T>asIterable()))
+    return new IterableAssert<>(actual.apply(View.<T>asIterable()))
         .setCoder(actual.getCoder());
   }
 
@@ -104,7 +101,7 @@ public static <T> IterableAssert<T> thatSingletonIterable(PCollection<Iterable<T
         + " single Coder<T> to apply to the elements.");
     }
 
-    return new IterableAssert<>(inGlobalWindows(actual).apply(View.<Iterable<T>>asSingleton()))
+    return new IterableAssert<>(actual.apply(View.<Iterable<T>>asSingleton()))
         .setCoder(tCoder);
   }
 
@@ -121,7 +118,7 @@ public static <T> IterableAssert<T> thatIterable(PCollectionView<Iterable<T>> ac
    * {@code PCollection PCollection<T>}, which must be a singleton.
    */
   public static <T> SingletonAssert<T> thatSingleton(PCollection<T> actual) {
-    return new SingletonAssert<>(inGlobalWindows(actual).apply(View.<T>asSingleton()))
+    return new SingletonAssert<>(actual.apply(View.<T>asSingleton()))
         .setCoder(actual.getCoder());
   }
 
@@ -347,16 +344,6 @@ public SingletonAssert<T> is(T expectedValue) {
 
   ////////////////////////////////////////////////////////////////////////
 
-  /**
-   * Returns a new PCollection equivalent to the input, but with all elements
-   * in the GlobalWindow.  Preserves ordering if the input is ordered.
-   */
-  private static <T> PCollection<T> inGlobalWindows(PCollection<T> input) {
-    return input
-        .apply(Window.<T>into(new GlobalWindows()))
-        .setOrdered(input.isOrdered());
-  }
-
   /**
    * An assertion checker that takes a single {@link PCollectionView PCollectionView&lt;A&gt;}
    * and an assertion over {@code A}, and checks it within a dataflow pipeline.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 2c677efa16582..688857f81871d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -59,15 +59,14 @@ public class Combine {
   /**
    * Returns a {@link Globally Combine.Globally} {@code PTransform}
    * that uses the given {@code SerializableFunction} to combine all
-   * the elements in each window of the input {@code PCollection} into a
-   * single value in the output {@code PCollection}.  The types of the input
-   * elements and the output elements must be the same.
+   * the elements of the input {@code PCollection} into a singleton
+   * {@code PCollection} value.  The types of the input elements and the
+   * output value must be the same.
    *
-   * <p> If the input {@code PCollection} is windowed into {@link GlobalWindows},
-   * a default value in the {@link GlobalWindow} will be output if the input
-   * {@code PCollection} is empty.  To use this with inputs with other windowing,
-   * either {@link Globally#withoutDefaults} or {@link Globally#asSingletonView}
-   * must be called.
+   * <p>If the input {@code PCollection} is empty, the ouput will contain a the
+   * default value of the combining function if the input is windowed into
+   * the {@link GlobalWindows}; otherwise, the output will be empty.  Note: this
+   * behavior is subject to change.
    *
    * <p> See {@link Globally Combine.Globally} for more information.
    */
@@ -78,22 +77,21 @@ public static <V> Globally<V, V> globally(
 
   /**
    * Returns a {@link Globally Combine.Globally} {@code PTransform}
-   * that uses the given {@code SerializableFunction} to combine all
-   * the elements in each window of the input {@code PCollection} into a
-   * single value in the output {@code PCollection}.  The types of the input
-   * elements and the output elements can differ
+   * that uses the given {@code CombineFn} to combine all the elements
+   * of the input {@code PCollection} into a singleton {@code PCollection}
+   * value.  The types of the input elements and the output value can
+   * differ.
    *
-   * <p> If the input {@code PCollection} is windowed into {@link GlobalWindows},
-   * a default value in the {@link GlobalWindow} will be output if the input
-   * {@code PCollection} is empty.  To use this with inputs with other windowing,
-   * either {@link Globally#withoutDefaults} or {@link Globally#asSingletonView}
-   * must be called.
+   * If the input {@code PCollection} is empty, the ouput will contain a the
+   * default value of the combining function if the input is windowed into
+   * the {@link GlobalWindows}; otherwise, the output will be empty.  Note: this
+   * behavior is subject to change.
    *
    * <p> See {@link Globally Combine.Globally} for more information.
    */
   public static <VI, VO> Globally<VI, VO> globally(
       CombineFn<? super VI, ?, VO> fn) {
-    return new Globally<>(fn, true);
+    return new Globally<>(fn);
   }
 
   /**
@@ -1055,9 +1053,10 @@ public Coder<VO> getDefaultOutputCoder(
 
   /**
    * {@code Combine.Globally<VI, VO>} takes a {@code PCollection<VI>}
-   * and returns a {@code PCollection<VO>} whose elements are the result of
-   * combining all the elements in each window of the input {@code PCollection},
-   * using a specified {@link CombineFn CombineFn<VI, VA, VO>}.  It is common
+   * and returns a {@code PCollection<VO>} whose single element is the result of
+   * combining all the elements of the input {@code PCollection},
+   * using a specified}
+   * {@link CombineFn CombineFn&lt;VI, VA, VO&gt;}.  It is common
    * for {@code VI == VO}, but not required.  Common combining
    * functions include sums, mins, maxes, and averages of numbers,
    * conjunctions and disjunctions of booleans, statistical
@@ -1075,11 +1074,6 @@ public Coder<VO> getDefaultOutputCoder(
    * intermediate results combined further, in an arbitrary tree
    * reduction pattern, until a single result value is produced.
    *
-   * <p> If the input {@code PCollection} is windowed into {@link GlobalWindows},
-   * a default value in the {@link GlobalWindow} will be output if the input
-   * {@code PCollection} is empty.  To use this with inputs with other windowing,
-   * either {@link #withoutDefaults} or {@link #asSingletonView} must be called.
-   *
    * <p> By default, the {@code Coder} of the output {@code PValue<VO>}
    * is inferred from the concrete type of the
    * {@code CombineFn<VI, VA, VO>}'s output type {@code VO}.
@@ -1096,36 +1090,9 @@ public static class Globally<VI, VO>
       extends PTransform<PCollection<VI>, PCollection<VO>> {
 
     private final CombineFn<? super VI, ?, VO> fn;
-    private final boolean insertDefault;
 
-    private Globally(CombineFn<? super VI, ?, VO> fn, boolean insertDefault) {
+    private Globally(CombineFn<? super VI, ?, VO> fn) {
       this.fn = fn;
-      this.insertDefault = insertDefault;
-    }
-
-    @Override
-    @SuppressWarnings("unchecked")
-    public Globally<VI, VO> withName(String name) {
-      return (Globally<VI, VO>) super.withName(name);
-    }
-
-    /**
-     * Returns a {@link PTransform} that produces a {@code PCollectionView}
-     * whose elements are the result of combining elements per-window in
-     * the input {@code PCollection}.  If a value is requested from the view
-     * for a window that is not present, the result of calling the {@code CombineFn}
-     * on empty input will returned.
-     */
-    public GloballyAsSingletonView<VI, VO> asSingletonView() {
-      return new GloballyAsSingletonView<>(fn, insertDefault);
-    }
-
-    /**
-     * Returns a {@link PTransform} identical to this, but that does not attempt to
-     * provide a default value in the case of empty input.
-     */
-    public Globally<VI, VO> withoutDefaults() {
-      return new Globally<>(fn, false);
     }
 
     @Override
@@ -1136,13 +1103,7 @@ public PCollection<VO> apply(PCollection<VI> input) {
           .apply(Combine.<Void, VI, VO>perKey(fn.<Void>asKeyedFn()))
           .apply(Values.<VO>create());
 
-      if (insertDefault) {
-        if (!output.getWindowFn().isCompatible(new GlobalWindows())) {
-          throw new IllegalStateException(
-              "Attempted to add default value to PCollection not windowed by GlobalWindows. "
-              + "Instead, use Combine.globally().withoutDefaults() or "
-              + "Combine.globally().asSingletonView().");
-        }
+      if (input.getWindowFn().isCompatible(new GlobalWindows())) {
         return insertDefaultValueIfEmpty(output);
       } else {
         return output;
@@ -1156,15 +1117,15 @@ private PCollection<VO> insertDefaultValueIfEmpty(PCollection<VO> maybeEmpty) {
           .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
           .apply(ParDo.of(
               new DoFn<Void, VO>() {
-                @Override
-                public void processElement(DoFn<Void, VO>.ProcessContext c) {
-                  Iterator<VO> combined = c.sideInput(maybeEmptyView).iterator();
-                  if (combined.hasNext()) {
-                    c.output(combined.next());
-                  } else {
-                    c.output(fn.apply(Collections.<VI>emptyList()));
+                  @Override
+                  public void processElement(DoFn<Void, VO>.ProcessContext c) {
+                    Iterator<VO> combined = c.sideInput(maybeEmptyView).iterator();
+                    if (combined.hasNext()) {
+                      c.output(combined.next());
+                    } else {
+                      c.output(fn.apply(Collections.<VI>emptyList()));
+                    }
                   }
-                }
               }).withSideInputs(maybeEmptyView))
           .setCoder(maybeEmpty.getCoder());
     }
@@ -1175,81 +1136,6 @@ protected String getKindString() {
     }
   }
 
-  /**
-   * {@code Combine.GloballyAsSingletonView<VI, VO>} takes a {@code PCollection<VI>}
-   * and returns a {@code PCollectionView<VO>} whose elements are the result of
-   * combining all the elements in each window of the input {@code PCollection},
-   * using a specified {@link CombineFn CombineFn<VI, VA, VO>}. It is common for
-   * {@code VI == VO}, but not required. Common combining
-   * functions include sums, mins, maxes, and averages of numbers,
-   * conjunctions and disjunctions of booleans, statistical
-   * aggregations, etc.
-   *
-   * <p> Example of use:
-   * <pre> {@code
-   * PCollection<Integer> pc = ...;
-   * PCollection<Integer> sum = pc.apply(
-   *     Combine.globally(new Sum.SumIntegerFn()));
-   * } </pre>
-   *
-   * <p> Combining can happen in parallel, with different subsets of the
-   * input {@code PCollection} being combined separately, and their
-   * intermediate results combined further, in an arbitrary tree
-   * reduction pattern, until a single result value is produced.
-   *
-   * <p> If a value is requested from the view for a window that is not present
-   * and {@code insertDefault} is true, the result of calling the {@code CombineFn}
-   * on empty input will returned. If {@code insertDefault} is false, an
-   * exception will be thrown instead.
-   *
-   * <p> By default, the {@code Coder} of the output {@code PValue<VO>}
-   * is inferred from the concrete type of the
-   * {@code CombineFn<VI, VA, VO>}'s output type {@code VO}.
-   *
-   * <p> See also {@link #perKey}/{@link PerKey Combine.PerKey} and
-   * {@link #groupedValues}/{@link GroupedValues Combine.GroupedValues},
-   * which are useful for combining values associated with each key in
-   * a {@code PCollection} of {@code KV}s.
-   *
-   * @param <VI> type of input values
-   * @param <VO> type of output values
-   */
-  public static class GloballyAsSingletonView<VI, VO>
-      extends PTransform<PCollection<VI>, PCollectionView<VO>> {
-
-    private final CombineFn<? super VI, ?, VO> fn;
-    private final boolean insertDefault;
-
-    private GloballyAsSingletonView(CombineFn<? super VI, ?, VO> fn, boolean insertDefault) {
-      this.fn = fn;
-      this.insertDefault = insertDefault;
-    }
-
-    @Override
-    @SuppressWarnings("unchecked")
-    public GloballyAsSingletonView<VI, VO> withName(String name) {
-      return (GloballyAsSingletonView<VI, VO>) super.withName(name);
-    }
-
-    @Override
-    public PCollectionView<VO> apply(PCollection<VI> input) {
-      PCollection<VO> combined = input
-          .apply(Combine.globally(fn).withoutDefaults());
-      if (insertDefault) {
-        return combined
-            .apply(View.<VO>asSingleton().withDefaultValue(
-                fn.apply(Collections.<VI>emptyList())));
-      } else {
-        return combined.apply(View.<VO>asSingleton());
-      }
-    }
-
-    @Override
-    protected String getKindString() {
-      return "Combine.GloballyAsSingletonView";
-    }
-  }
-
   /**
    * Converts a {@link SerializableFunction} from {@code Iterable<V>}s
    * to {@code V}s into a simple {@link CombineFn} over {@code V}s.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 56d99d360d24b..68f78c9d8e435 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -66,6 +66,14 @@ public abstract class Context {
      */
     public abstract PipelineOptions getPipelineOptions();
 
+    /**
+     * Returns the value of the side input.
+     *
+     * @throws IllegalArgumentException if this is not a side input
+     * @see ParDo#withSideInputs
+     */
+    public abstract <T> T sideInput(PCollectionView<T> view);
+
     /**
      * Adds the given element to the main output {@code PCollection}.
      *
@@ -199,19 +207,6 @@ public abstract class ProcessContext extends Context {
      */
     public abstract I element();
 
-    /**
-     * Returns the value of the side input for the window corresponding to the
-     * window of the main input element.
-     *
-     * <p> See
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn#getSideInputWindow}
-     * for how this corresponding window is determined.
-     *
-     * @throws IllegalArgumentException if this is not a side input
-     * @see ParDo#withSideInputs
-     */
-    public abstract <T> T sideInput(PCollectionView<T> view);
-
     /**
      * Returns this {@code DoFn}'s state associated with the input
      * element's key.  This state can be used by the {@code DoFn} to
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index cd1726eda38d8..1483ceb7ec989 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -28,7 +28,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.ReifyTimestampAndWindowsDoFn;
@@ -463,13 +462,7 @@ public PCollection<KV<K, Iterable<V>>> applyHelper(
       // window function, which uses a single global window for all
       // elements.  We can implement this using a more-primitive
       // non-window-aware GBK transform.
-      return input
-          .apply(new GroupByKeyOnly<K, V>())
-
-          // In the non-global window case, GroupAlsoByWindows adds the windows
-          // back to the PCollection elements.  We don't have that here, so
-          // explicitly put the elements back into GlobalWindows.
-          .apply(Window.<KV<K, Iterable<V>>>into(new GlobalWindows()));
+      return input.apply(new GroupByKeyOnly<K, V>());
 
     } else if (isStreaming) {
       // If using the streaming runner, the service will do the insertion of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index 8afe04a073bff..c11e16fe0b6a5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -80,9 +80,10 @@ public class Top {
    * {@code KV}s and return the top values associated with each key.
    */
   public static <T, C extends Comparator<T> & Serializable>
-      Combine.Globally<T, List<T>> of(int count, C compareFn) {
+      PTransform<PCollection<T>, PCollection<List<T>>> of(int count, C compareFn) {
     return Combine.globally(new TopCombineFn<>(count, compareFn))
         .withName("Top");
+
   }
 
   /**
@@ -120,7 +121,7 @@ Combine.Globally<T, List<T>> of(int count, C compareFn) {
    * {@code KV}s and return the top values associated with each key.
    */
   public static <T extends Comparable<T>>
-      Combine.Globally<T, List<T>> smallest(int count) {
+      PTransform<PCollection<T>, PCollection<List<T>>> smallest(int count) {
     return Combine.globally(new TopCombineFn<>(count, new Smallest<T>()))
         .withName("Top.Smallest");
   }
@@ -160,7 +161,7 @@ Combine.Globally<T, List<T>> smallest(int count) {
    * {@code KV}s and return the top values associated with each key.
    */
   public static <T extends Comparable<T>>
-      Combine.Globally<T, List<T>> largest(int count) {
+      PTransform<PCollection<T>, PCollection<List<T>>> largest(int count) {
     return Combine.globally(new TopCombineFn<>(count, new Largest<T>()))
         .withName("Top.Largest");
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index 427185e2443c8..590d8eebd2051 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -17,12 +17,8 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -34,7 +30,6 @@
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Multimap;
 
-import java.io.IOException;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
@@ -105,7 +100,7 @@ public PCollectionView<Iterable<T>> apply(
         PCollection<T> input) {
       return input.apply(
           new CreatePCollectionView<T, Iterable<T>>(
-              new IterablePCollectionView<T>(input.getPipeline(), input.getWindowFn())));
+              new IterablePCollectionView<T>(input.getPipeline())));
     }
   }
 
@@ -118,34 +113,16 @@ public PCollectionView<Iterable<T>> apply(
   public static class AsSingleton<T>
       extends PTransform<PCollection<T>, PCollectionView<T>> {
     private static final long serialVersionUID = 0;
-    private final T defaultValue;
-    private final boolean hasDefault;
 
-    private AsSingleton() {
-      this.defaultValue = null;
-      this.hasDefault = false;
-    }
-
-    private AsSingleton(T defaultValue) {
-      this.defaultValue = defaultValue;
-      this.hasDefault = true;
-    }
-
-    /**
-     * Default value to return for windows with no value in them.
-     */
-    public AsSingleton<T> withDefaultValue(T defaultValue) {
-      return new AsSingleton(defaultValue);
-    }
+    private AsSingleton() { }
 
     @Override
     public PCollectionView<T> apply(PCollection<T> input) {
       return input.apply(
           new CreatePCollectionView<T, T>(
-              new SingletonPCollectionView<T>(
-                  input.getPipeline(), input.getWindowFn(),
-                  hasDefault, defaultValue, input.getCoder())));
+            new SingletonPCollectionView<T>(input.getPipeline())));
     }
+
   }
 
   /**
@@ -181,11 +158,12 @@ public <VO> AsSingletonMap<K, V, VO> withCombiner(CombineFn<V, ?, VO> combineFn)
     @Override
     public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
       return input.apply(
-          new CreatePCollectionView<KV<K, V>, Map<K, Iterable<V>>>(
-              new MultimapPCollectionView<K, V>(input.getPipeline(), input.getWindowFn())));
+        new CreatePCollectionView<KV<K, V>, Map<K, Iterable<V>>>(
+          new MultimapPCollectionView<K, V>(input.getPipeline())));
     }
   }
 
+
   /**
    * A {@link PTransform} that produces a {@link PCollectionView} of a keyed {@link PCollection}
    * yielding a map of keys to a single associated values.
@@ -211,8 +189,8 @@ public PCollectionView<Map<K, VO>> apply(PCollection<KV<K, VI>> input) {
         ? (PCollection) input
         : input.apply(Combine.perKey(combineFn.<K>asKeyedFn()));
       return combined.apply(
-          new CreatePCollectionView<KV<K, VO>, Map<K, VO>>(
-              new MapPCollectionView<K, VO>(input.getPipeline(), combined.getWindowFn())));
+        new CreatePCollectionView<KV<K, VO>, Map<K, VO>>(
+          new MapPCollectionView<K, VO>(input.getPipeline())));
     }
   }
 
@@ -269,40 +247,14 @@ private <R, T> void evaluateTyped(
   private static class SingletonPCollectionView<T>
       extends PCollectionViewBase<T> {
     private static final long serialVersionUID = 0;
-    private byte[] encodedDefaultValue;
-    private transient T defaultValue;
-    private Coder<T> defaultValueCoder;
-
-    public SingletonPCollectionView(
-        Pipeline pipeline, WindowFn<?, ?> windowFn,
-        boolean hasDefault, T defaultValue, Coder<T> defaultValueCoder) {
-      super(windowFn);
+
+    public SingletonPCollectionView(Pipeline pipeline) {
       setPipelineInternal(pipeline);
-      this.defaultValue = defaultValue;
-      this.defaultValueCoder = defaultValueCoder;
-      if (hasDefault) {
-        try {
-          this.encodedDefaultValue = CoderUtils.encodeToByteArray(defaultValueCoder, defaultValue);
-        } catch (IOException e) {
-          throw new RuntimeException("Unexpected IOException: ", e);
-        }
-      }
     }
 
     @SuppressWarnings("unchecked")
     @Override
     public T fromIterableInternal(Iterable<WindowedValue<?>> contents) {
-      if (encodedDefaultValue != null && defaultValue == null) {
-        try {
-          defaultValue = CoderUtils.decodeFromByteArray(defaultValueCoder, encodedDefaultValue);
-        } catch (IOException e) {
-          throw new RuntimeException("Unexpected IOException: ", e);
-        }
-      }
-
-      if (encodedDefaultValue != null && !contents.iterator().hasNext()) {
-        return defaultValue;
-      }
       try {
         return (T) Iterables.getOnlyElement(contents).getValue();
       } catch (NoSuchElementException exc) {
@@ -320,8 +272,7 @@ private static class IterablePCollectionView<T>
       extends PCollectionViewBase<Iterable<T>> {
     private static final long serialVersionUID = 0;
 
-    public IterablePCollectionView(Pipeline pipeline, WindowFn<?, ?> windowFn) {
-      super(windowFn);
+    public IterablePCollectionView(Pipeline pipeline) {
       setPipelineInternal(pipeline);
     }
 
@@ -341,8 +292,7 @@ private static class MultimapPCollectionView<K, V>
       extends PCollectionViewBase<Map<K, Iterable<V>>> {
     private static final long serialVersionUID = 0;
 
-    public MultimapPCollectionView(Pipeline pipeline, WindowFn<?, ?> windowFn) {
-      super(windowFn);
+    public MultimapPCollectionView(Pipeline pipeline) {
       setPipelineInternal(pipeline);
     }
 
@@ -363,8 +313,7 @@ private static class MapPCollectionView<K, V>
       extends PCollectionViewBase<Map<K, V>> {
     private static final long serialVersionUID = 0;
 
-    public MapPCollectionView(Pipeline pipeline, WindowFn<?, ?> windowFn) {
-      super(windowFn);
+    public MapPCollectionView(Pipeline pipeline) {
       setPipelineInternal(pipeline);
     }
 
@@ -387,25 +336,11 @@ private abstract static class PCollectionViewBase<T>
       implements PCollectionView<T> {
     private static final long serialVersionUID = 0;
 
-    PCollectionViewBase(WindowFn<?, ?> windowFn) {
-      if (windowFn instanceof InvalidWindows) {
-        throw new IllegalArgumentException("WindowFn of PCollectionView cannot be InvalidWindows");
-      }
-      this.windowFn = windowFn;
-    }
-
     @Override
     public TupleTag<Iterable<WindowedValue<?>>> getTagInternal() {
       return tag;
     }
 
-    @Override
-    public WindowFn getWindowFnInternal() {
-      return windowFn;
-    }
-
     private TupleTag<Iterable<WindowedValue<?>>> tag = new TupleTag<>();
-
-    private WindowFn<?, ?> windowFn;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
index 93db03ece0797..2ac12a933c29f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
@@ -22,7 +22,7 @@
 import java.util.Collection;
 
 /**
- * Default {@link WindowFn} where all data is in the same window.
+ * Default {@link WindowFn} where all data is in the same bucket.
  */
 @SuppressWarnings("serial")
 public class GlobalWindows
@@ -41,9 +41,4 @@ public boolean isCompatible(WindowFn o) {
   public Coder<GlobalWindow> windowCoder() {
     return GlobalWindow.Coder.INSTANCE;
   }
-
-  @Override
-  public GlobalWindow getSideInputWindow(BoundedWindow window) {
-    return GlobalWindow.INSTANCE;
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
index afeb4e83d73e6..fdfa136ba5251 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
@@ -115,46 +115,4 @@ public abstract void merge(Collection<W> toBeMerged, W mergeResult)
    * by this windowFn.
    */
   public abstract Coder<W> windowCoder();
-
-  /**
-   * Returns the window of the side input corresponding to the given window of
-   * the main input. By default, this runs assignWindows over a non-existent
-   * element whose timestamp is the maxTimestamp() of the input window.
-   *
-   * <p> For example, if both the main and side inputs are windowed by
-   * {@link FixedWindows}, the side input corresponding to a particular main
-   * input element will be the one in the same window as that element.
-   *
-   * <p> Authors of custom {@code WindowFn}s should override this if that is not
-   * the desired behavior for side inputs with their {@code WindowFn}.
-   */
-  public W getSideInputWindow(final BoundedWindow window) {
-    if (window instanceof GlobalWindow) {
-      throw new IllegalArgumentException(
-          "Attempted to get side input window for GlobalWindow from non-global WindowFn");
-    }
-
-    try {
-      return assignWindows(new AssignContext() {
-          @Override
-          public T element() {
-            throw new UnsupportedOperationException(
-                "WindowFn attemped to access input element when none was available");
-          }
-
-          @Override
-          public Instant timestamp() {
-            return window.maxTimestamp();
-          }
-
-          @Override
-          public Collection<? extends BoundedWindow> windows() {
-            throw new UnsupportedOperationException(
-                "WindowFn attemped to access input windows when none were available");
-          }
-        }).iterator().next();
-    } catch (Exception e) {
-      throw new RuntimeException("Failed to get side input window: ", e);
-    }
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
index a4f4d8abda54e..947a78cdcaf38 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
@@ -29,8 +29,6 @@
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.base.Predicate;
-import com.google.common.collect.Iterables;
 
 import org.joda.time.Instant;
 
@@ -53,7 +51,7 @@ class DoFnContext<I, O, R> extends DoFn<I, O>.Context {
   final PipelineOptions options;
   final DoFn<I, O> fn;
   final PTuple sideInputs;
-  final Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
+  final Map<TupleTag<?>, Object> sideInputCache;
   final OutputManager<R> outputManager;
   final Map<TupleTag<?>, R> outputMap;
   final TupleTag<O> mainOutputTag;
@@ -103,40 +101,21 @@ public PipelineOptions getPipelineOptions() {
     return options;
   }
 
+  @Override
   @SuppressWarnings("unchecked")
-  <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
+  public <T> T sideInput(PCollectionView<T> view) {
     TupleTag<?> tag = view.getTagInternal();
-    Map<BoundedWindow, Object> tagCache = sideInputCache.get(tag);
-    if (tagCache == null) {
+    if (!sideInputCache.containsKey(tag)) {
       if (!sideInputs.has(tag)) {
         throw new IllegalArgumentException(
             "calling sideInput() with unknown view; " +
             "did you forget to pass the view in " +
             "ParDo.withSideInputs()?");
       }
-      tagCache = new HashMap<>();
-      sideInputCache.put(tag, tagCache);
-    }
-
-    final BoundedWindow sideInputWindow =
-        view.getWindowFnInternal().getSideInputWindow(mainInputWindow);
-
-    T result = (T) tagCache.get(sideInputWindow);
-
-    // TODO: Consider partial prefetching like in CoGBK to reduce iteration cost.
-    if (result == null) {
-      result = view.fromIterableInternal(Iterables.filter(
-          (Iterable<WindowedValue<?>>) sideInputs.get(tag),
-          new Predicate<WindowedValue<?>>() {
-            @Override
-            public boolean apply(WindowedValue<?> element) {
-              return element.getWindows().contains(sideInputWindow);
-            }
-          }));
-      tagCache.put(sideInputWindow, result);
+      sideInputCache.put(
+          tag, view.fromIterableInternal((Iterable<WindowedValue<?>>) sideInputs.get(tag)));
     }
-
-    return result;
+    return (T) sideInputCache.get(tag);
   }
 
   <T> WindowedValue<T> makeWindowedValue(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
index b2be44a4cebd0..0f4e37dc5be0d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
@@ -32,7 +32,6 @@
 import org.joda.time.Instant;
 
 import java.util.Collection;
-import java.util.Iterator;
 
 /**
  * A concrete implementation of {@link DoFn<I, O>.ProcessContext} used for running
@@ -66,21 +65,6 @@ public I element() {
     return windowedValue.getValue();
   }
 
-  @Override
-  public <T> T sideInput(PCollectionView<T> view) {
-    Iterator<? extends BoundedWindow> windowIter = windows().iterator();
-    if (!windowIter.hasNext()) {
-      throw new IllegalStateException(
-          "sideInput called when main input element is not in any windows");
-    }
-    BoundedWindow window = windowIter.next();
-    if (windowIter.hasNext()) {
-      throw new IllegalStateException(
-          "sideInput called when main input element is in multiple windows");
-    }
-    return context.sideInput(view, window);
-  }
-
   @Override
   public KeyedState keyedState() {
     if (!(fn instanceof RequiresKeyedState)
@@ -92,6 +76,11 @@ public KeyedState keyedState() {
     return context.stepContext;
   }
 
+  @Override
+  public <T> T sideInput(PCollectionView<T> view) {
+    return context.sideInput(view);
+  }
+
   @Override
   public void output(O output) {
     context.outputWindowedValue(output, windowedValue.getTimestamp(), windowedValue.getWindows());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
index d897d3fa24fea..43f99f0b3ac9b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.values;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 
 import java.io.Serializable;
@@ -44,9 +43,4 @@ public interface PCollectionView<T> extends PValue, Serializable {
    * For internal use only.
    */
   public T fromIterableInternal(Iterable<WindowedValue<?>> contents);
-
-  /**
-   * For internal use only.
-   */
-  public WindowFn getWindowFnInternal();
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 603ab2cab956e..0a3dcdce719e6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -45,7 +45,6 @@
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Sets;
 
@@ -205,7 +204,7 @@ public void testFixedWindowsCombine() {
 
     PCollection<Integer> sum = input
         .apply(Values.<Integer>create())
-        .apply(Combine.globally(new SumInts()).withoutDefaults());
+        .apply(Combine.globally(new SumInts()));
 
     PCollection<KV<String, Integer>> sumPerKey = input
         .apply(Combine.<String, Integer>perKey(new SumInts()));
@@ -232,7 +231,7 @@ public void testSessionsCombine() {
 
     PCollection<Integer> sum = input
         .apply(Values.<Integer>create())
-        .apply(Combine.globally(new SumInts()).withoutDefaults());
+        .apply(Combine.globally(new SumInts()));
 
     PCollection<KV<String, Integer>> sumPerKey = input
         .apply(Combine.<String, Integer>perKey(new SumInts()));
@@ -253,7 +252,7 @@ public void testWindowedCombineEmpty() {
     PCollection<Double> mean = p
         .apply(Create.<Integer>of()).setCoder(BigEndianIntegerCoder.of())
         .apply(Window.<Integer>into(FixedWindows.of(Duration.millis(1))))
-        .apply(Combine.globally(new MeanInts()).withoutDefaults());
+        .apply(Combine.globally(new MeanInts()));
 
     DataflowAssert.that(mean).containsInAnyOrder();
 
@@ -388,27 +387,6 @@ public Integer apply(Integer left, Integer right) {
     }
   }
 
-  @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
-  public void testCombineGloballyAsSingletonView() {
-    Pipeline p = TestPipeline.create();
-    final PCollectionView<Integer> view = p
-        .apply(Create.<Integer>of())
-        .setCoder(BigEndianIntegerCoder.of())
-        .apply(Sum.integersGlobally().asSingletonView());
-
-    PCollection<Integer> output = p
-        .apply(Create.of((Void) null))
-        .apply(ParDo.of(new DoFn<Void, Integer>() {
-                  @Override
-                  public void processElement(ProcessContext c) {
-                    c.output(c.sideInput(view));
-                  }
-                }));
-
-    DataflowAssert.thatSingleton(output).isEqualTo(0);
-  }
-
   ////////////////////////////////////////////////////////////////////////////
   // Test classes, for different kinds of combining fns.
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
index 43b85bf491443..65fe984c03bc5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
@@ -26,7 +26,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
@@ -34,7 +33,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
 
 import org.joda.time.Duration;
 import org.junit.Assert;
@@ -103,9 +101,8 @@ public void testFlattenPCollectionListOrdered() {
     p.run();
   }
 
-  // TODO: re-enable running this test on the service once empty flattens
-  // followed by ParDos work properly.
   @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testFlattenPCollectionListEmpty() {
     Pipeline p = TestPipeline.create();
 
@@ -117,39 +114,11 @@ public void testFlattenPCollectionListEmpty() {
     p.run();
   }
 
-  @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
-  public void testEmptyFlattenAsSideInput() {
-    Pipeline p = TestPipeline.create();
-
-    final PCollectionView<Iterable<String>> view =
-        PCollectionList.<String>empty(p)
-        .apply(Flatten.<String>pCollections()).setCoder(StringUtf8Coder.of())
-        .apply(View.<String>asIterable());
-
-    PCollection<String> output = p
-        .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
-        .apply(ParDo.withSideInputs(view).of(new DoFn<Void, String>() {
-                  private static final long serialVersionUID = 0;
-
-                  @Override
-                  public void processElement(ProcessContext c) {
-                    for (String side : c.sideInput(view)) {
-                      c.output(side);
-                    }
-                  }
-                }));
-
-    DataflowAssert.that(output).containsInAnyOrder();
-    p.run();
-  }
-
   @Test
   // TODO: Enable this test to run on the Dataflow service when it is
   // correctly implemented.
   // @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testFlattenPCollectionListEmptyThenParDo() {
-
     Pipeline p = TestPipeline.create();
 
     PCollection<String> output =
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 9e2a16804ebbb..95ddc589ca193 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -113,7 +113,7 @@ public void processElement(ProcessContext c) {
       assertThat(state,
                  anyOf(equalTo(State.STARTED), equalTo(State.PROCESSING)));
       state = State.PROCESSING;
-      outputToAllWithSideInputs(c, "processing: " + c.element());
+      outputToAll(c, "processing: " + c.element());
     }
 
     @Override
@@ -125,14 +125,6 @@ public void finishBundle(Context c) {
     }
 
     private void outputToAll(Context c, String value) {
-      c.output(value);
-      for (TupleTag<String> sideOutputTupleTag : sideOutputTupleTags) {
-        c.sideOutput(sideOutputTupleTag,
-                     sideOutputTupleTag.getId() + ": " + value);
-      }
-    }
-
-    private void outputToAllWithSideInputs(ProcessContext c, String value) {
       if (!sideInputViews.isEmpty()) {
         List<Integer> sideInputValues = new ArrayList<>();
         for (PCollectionView<Integer> sideInputView : sideInputViews) {
@@ -237,10 +229,12 @@ public Void apply(Iterable<String> outputs) {
         assertEquals(starteds.size(), finisheds.size());
         assertTrue(starteds.size() > 0);
         for (String started : starteds) {
-          assertEquals(sideOutputPrefix + "started", started);
+          assertEquals(sideOutputPrefix + "started" + sideInputsSuffix,
+                       started);
         }
         for (String finished : finisheds) {
-          assertEquals(sideOutputPrefix + "finished", finished);
+          assertEquals(sideOutputPrefix + "finished" + sideInputsSuffix,
+                       finished);
         }
 
         return null;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index 6e3c3610e5118..fd482beed4738 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -20,19 +20,11 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
 
 import org.junit.Rule;
 import org.junit.Test;
@@ -222,7 +214,6 @@ public void processElement(ProcessContext c) {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testCombinedMapSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
@@ -246,131 +237,4 @@ public void processElement(ProcessContext c) {
 
     pipeline.run();
   }
-
-  @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
-  public void testWindowedSideInputFixedToFixed() {
-    Pipeline p = TestPipeline.create();
-
-    final PCollectionView<Integer> view = p
-        .apply(Create.timestamped(
-            TimestampedValue.of(1, new Instant(1)),
-            TimestampedValue.of(2, new Instant(11)),
-            TimestampedValue.of(3, new Instant(13))))
-        .apply(Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
-        .apply(Sum.integersGlobally().withoutDefaults())
-        .apply(View.<Integer>asSingleton());
-
-    PCollection<String> output = p
-        .apply(Create.timestamped(
-            TimestampedValue.of("A", new Instant(4)),
-            TimestampedValue.of("B", new Instant(15)),
-            TimestampedValue.of("C", new Instant(7))))
-        .apply(Window.<String>into(FixedWindows.of(Duration.millis(10))))
-        .apply(ParDo.withSideInputs(view).of(
-            new DoFn<String, String>() {
-              @Override
-              public void processElement(ProcessContext c) {
-                c.output(c.element() + c.sideInput(view));
-              }
-            }));
-
-    DataflowAssert.that(output).containsInAnyOrder("A1", "B5", "C1");
-
-    p.run();
-  }
-
-  @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
-  public void testWindowedSideInputFixedToGlobal() {
-    Pipeline p = TestPipeline.create();
-
-    final PCollectionView<Integer> view = p
-        .apply(Create.timestamped(
-            TimestampedValue.of(1, new Instant(1)),
-            TimestampedValue.of(2, new Instant(11)),
-            TimestampedValue.of(3, new Instant(13))))
-        .apply(Window.<Integer>into(new GlobalWindows()))
-        .apply(Sum.integersGlobally())
-        .apply(View.<Integer>asSingleton());
-
-    PCollection<String> output = p
-        .apply(Create.timestamped(
-            TimestampedValue.of("A", new Instant(4)),
-            TimestampedValue.of("B", new Instant(15)),
-            TimestampedValue.of("C", new Instant(7))))
-        .apply(Window.<String>into(FixedWindows.of(Duration.millis(10))))
-        .apply(ParDo.withSideInputs(view).of(
-            new DoFn<String, String>() {
-              @Override
-              public void processElement(ProcessContext c) {
-                c.output(c.element() + c.sideInput(view));
-              }
-            }));
-
-    DataflowAssert.that(output).containsInAnyOrder("A6", "B6", "C6");
-
-    p.run();
-  }
-
-  @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
-  public void testWindowedSideInputFixedToFixedWithDefault() {
-    Pipeline p = TestPipeline.create();
-
-    final PCollectionView<Integer> view = p
-        .apply(Create.timestamped(
-            TimestampedValue.of(2, new Instant(11)),
-            TimestampedValue.of(3, new Instant(13))))
-        .apply(Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
-        .apply(Sum.integersGlobally().asSingletonView());
-
-    PCollection<String> output = p
-        .apply(Create.timestamped(
-            TimestampedValue.of("A", new Instant(4)),
-            TimestampedValue.of("B", new Instant(15)),
-            TimestampedValue.of("C", new Instant(7))))
-        .apply(Window.<String>into(FixedWindows.of(Duration.millis(10))))
-        .apply(ParDo.withSideInputs(view).of(
-            new DoFn<String, String>() {
-              @Override
-              public void processElement(ProcessContext c) {
-                c.output(c.element() + c.sideInput(view));
-              }
-            }));
-
-    DataflowAssert.that(output).containsInAnyOrder("A0", "B5", "C0");
-
-    p.run();
-  }
-
-  @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
-  public void testSideInputWithNullDefault() {
-    Pipeline p = TestPipeline.create();
-
-    final PCollectionView<Void> view = p
-        .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
-        .apply(Combine.globally(new SerializableFunction<Iterable<Void>, Void>() {
-                  @Override
-                  public Void apply(Iterable<Void> input) {
-                    return (Void) null;
-                  }
-                }).asSingletonView());
-
-    PCollection<String> output = p
-        .apply(Create.of(""))
-        .apply(ParDo.withSideInputs(view).of(
-            new DoFn<String, String>() {
-              @Override
-              public void processElement(ProcessContext c) {
-                c.output(c.element() + c.sideInput(view));
-              }
-            }));
-
-    DataflowAssert.that(output).containsInAnyOrder("null");
-
-    p.run();
-  }
-
 }

From 2a8d57169f3a9cdb2c4118d9bf1895a576a18b10 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Sun, 22 Mar 2015 10:01:39 -0700
Subject: [PATCH 0295/1541] Renaming shard => bundle in the Java custom source
 API. This is a backward-incompatible change for people who have written their
 own custom sources. Please rename shard => bundle in your subclass.
 ----Release Notes---- The custom source API uses the term "bundle" instead of
 "shard" in all names. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=89247412

---
 .../sdk/io/ByteOffsetBasedSource.java         | 40 +++++++++---------
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |  4 +-
 .../dataflow/sdk/io/FileBasedSource.java      | 18 ++++----
 .../google/cloud/dataflow/sdk/io/Source.java  |  8 ++--
 .../BasicSerializableSourceFormat.java        | 28 ++++++-------
 .../sdk/io/ByteOffsetBasedSourceTest.java     | 42 +++++++++----------
 .../dataflow/sdk/io/DatastoreIOTest.java      | 12 +++---
 .../dataflow/sdk/io/FileBasedSourceTest.java  | 26 ++++++------
 .../BasicSerializableSourceFormatTest.java    | 36 ++++++++--------
 9 files changed, 107 insertions(+), 107 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
index 7b8167ecd5eb8..26ac1a7ee34fb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
@@ -28,7 +28,7 @@
  * {@code ByteOffsetBasedSource}.
  *
  * <p>This is a common base class for all sources that use a byte offset range. It stores the range
- * and implements splitting into shards. This should be used for sources which can be cheaply read
+ * and implements splitting into bundles. This should be used for sources which can be cheaply read
  * starting at any given byte offset.
  *
  * <p>The byte offset range of the source is between {@code startOffset} (inclusive) and endOffset
@@ -49,7 +49,7 @@ public abstract class ByteOffsetBasedSource<T> extends Source<T> {
 
   private final long startOffset;
   private final long endOffset;
-  private final long minShardSize;
+  private final long minBundleSize;
 
   /**
    * @param startOffset starting byte offset (inclusive) of the source. Must be non-negative.
@@ -58,14 +58,14 @@ public abstract class ByteOffsetBasedSource<T> extends Source<T> {
    *        {@code offset >= getMaxEndOffset()}, e.g., {@code Long.MAX_VALUE}, means the same as
    *        {@code getMaxEndOffset()}. Must be {@code >= startOffset}.
    *
-   * @param minShardSize minimum shard size in bytes that should be used when splitting the source
+   * @param minBundleSize minimum bundle size in bytes that should be used when splitting the source
    *        into sub-sources. This will not be respected if the total range of the source is smaller
-   *        than the specified {@code minShardSize}. Must be non-negative.
+   *        than the specified {@code minBundleSize}. Must be non-negative.
    */
-  public ByteOffsetBasedSource(long startOffset, long endOffset, long minShardSize) {
+  public ByteOffsetBasedSource(long startOffset, long endOffset, long minBundleSize) {
     this.startOffset = startOffset;
     this.endOffset = endOffset;
-    this.minShardSize = minShardSize;
+    this.minBundleSize = minBundleSize;
   }
 
   /**
@@ -84,33 +84,33 @@ public long getEndOffset() {
   }
 
   /**
-   * Returns the minimum shard size that should be used when splitting the source into sub-sources.
+   * Returns the minimum bundle size that should be used when splitting the source into sub-sources.
    * This will not be respected if the total range of the source is smaller than the specified
-   * {@code minShardSize}.
+   * {@code minBundleSize}.
    */
-  public long getMinShardSize() {
-    return minShardSize;
+  public long getMinBundleSize() {
+    return minBundleSize;
   }
 
   @Override
-  public List<? extends ByteOffsetBasedSource<T>> splitIntoShards(long desiredShardSizeBytes,
+  public List<? extends ByteOffsetBasedSource<T>> splitIntoBundles(long desiredBundleSizeBytes,
       PipelineOptions options) throws Exception {
-    // Split the range into shards based on the desiredShardSizeBytes. Final shard is adjusted to
-    // make sure that we do not end up with a too small shard at the end. If desiredShardSizeBytes
-    // is smaller than the minShardSize of the source then minShardSize will be used instead.
+    // Split the range into bundles based on the desiredBundleSizeBytes. Final bundle is adjusted to
+    // make sure that we do not end up with a too small bundle at the end. If desiredBundleSizeBytes
+    // is smaller than the minBundleSize of the source then minBundleSize will be used instead.
 
-    desiredShardSizeBytes = Math.max(desiredShardSizeBytes, minShardSize);
+    desiredBundleSizeBytes = Math.max(desiredBundleSizeBytes, minBundleSize);
 
     List<ByteOffsetBasedSource<T>> subSources = new ArrayList<>();
     long start = startOffset;
     long maxEnd = Math.min(endOffset, getMaxEndOffset(options));
 
     while (start < maxEnd) {
-      long end = start + desiredShardSizeBytes;
+      long end = start + desiredBundleSizeBytes;
       end = Math.min(end, maxEnd);
-      // Avoid having a too small shard at the end and ensure that we respect minShardSize.
+      // Avoid having a too small bundle at the end and ensure that we respect minBundleSize.
       long remainingBytes = maxEnd - end;
-      if ((remainingBytes < desiredShardSizeBytes / 4) || (remainingBytes < minShardSize)) {
+      if ((remainingBytes < desiredBundleSizeBytes / 4) || (remainingBytes < minBundleSize)) {
         end = maxEnd;
       }
       subSources.add(createSourceForSubrange(start, end));
@@ -128,8 +128,8 @@ public void validate() {
         "End offset has value " + this.endOffset + ", must be non-negative");
     Preconditions.checkArgument(this.startOffset < this.endOffset,
         "Start offset " + this.startOffset + " must be before end offset " + this.endOffset);
-    Preconditions.checkArgument(this.minShardSize >= 0,
-        "minShardSize has value " + this.minShardSize + ", must be non-negative");
+    Preconditions.checkArgument(this.minBundleSize >= 0,
+        "minBundleSize has value " + this.minBundleSize + ", must be non-negative");
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index bb395ae2407fd..e0c507dae6d3f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -276,12 +276,12 @@ public boolean producesSortedKeys(PipelineOptions options) {
     }
 
     @Override
-    public List<Source> splitIntoShards(long desiredShardSizeBytes, PipelineOptions options)
+    public List<Source> splitIntoBundles(long desiredBundleSizeBytes, PipelineOptions options)
         throws Exception {
       DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
       long numSplits;
       try {
-        numSplits = getEstimatedSizeBytes(options) / desiredShardSizeBytes;
+        numSplits = getEstimatedSizeBytes(options) / desiredBundleSizeBytes;
       } catch (Exception e) {
         LOG.warn("Estimated size unavailable, using number of workers", e);
         // Fallback in case estimated size is unavailable.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index 73b709bd041a7..7b64711067a17 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -70,7 +70,7 @@ public enum Mode {
   /**
    * Create a {@code FileBasedSource} based on a file or a file pattern specification.
    *
-   * <p>See {@link ByteOffsetBasedSource} for detailed descriptions of {@code minShardSize},
+   * <p>See {@link ByteOffsetBasedSource} for detailed descriptions of {@code minBundleSize},
    * {@code startOffset}, and {@code endOffset}.
    *
    * @param isFilePattern if {@code true} provided {@code fileOrPatternSpec} may be a file pattern
@@ -79,14 +79,14 @@ public enum Mode {
    *        verbatim.
    * @param fileOrPatternSpec {@link IOChannelFactory} specification of file or file pattern
    *        represented by the {@link FileBasedSource}.
-   * @param minShardSize minimum shard size in bytes.
+   * @param minBundleSize minimum bundle size in bytes.
    * @param startOffset starting byte offset.
    * @param endOffset ending byte offset. If the specified value {@code >= #getMaxEndOffset()} it
    *        implies {@code #getMaxEndOffSet()}.
    */
-  public FileBasedSource(boolean isFilePattern, String fileOrPatternSpec, long minShardSize,
+  public FileBasedSource(boolean isFilePattern, String fileOrPatternSpec, long minBundleSize,
       long startOffset, long endOffset) {
-    super(startOffset, endOffset, minShardSize);
+    super(startOffset, endOffset, minBundleSize);
     if (isFilePattern) {
       mode = Mode.FILEPATTERN;
     } else if (startOffset == 0 && endOffset == Long.MAX_VALUE) {
@@ -173,9 +173,9 @@ public final long getEstimatedSizeBytes(PipelineOptions options) throws Exceptio
   }
 
   @Override
-  public final List<? extends FileBasedSource<T>> splitIntoShards(long desiredShardSizeBytes,
+  public final List<? extends FileBasedSource<T>> splitIntoBundles(long desiredBundleSizeBytes,
       PipelineOptions options) throws Exception {
-    // This implementation of method splitIntoShards is provided to simplify subclasses. Here we
+    // This implementation of method splitIntoBundles is provided to simplify subclasses. Here we
     // split a FileBasedSource based on a file pattern to FileBasedSources based on full single
     // files. For files that can be efficiently seeked, we further split FileBasedSources based on
     // those files to FileBasedSources based on sub ranges of single files.
@@ -184,8 +184,8 @@ public final List<? extends FileBasedSource<T>> splitIntoShards(long desiredShar
       long startTime = System.currentTimeMillis();
       List<FileBasedSource<T>> splitResults = new ArrayList<>();
       for (String file : expandFilePattern()) {
-        splitResults.addAll(createForSubrangeOfFile(file, 0, Long.MAX_VALUE).splitIntoShards(
-            desiredShardSizeBytes, options));
+        splitResults.addAll(createForSubrangeOfFile(file, 0, Long.MAX_VALUE).splitIntoBundles(
+            desiredBundleSizeBytes, options));
       }
       LOG.debug("Splitting the source based on file pattern " + fileOrPatternSpec + " took "
           + (System.currentTimeMillis() - startTime) + " ms");
@@ -198,7 +198,7 @@ public final List<? extends FileBasedSource<T>> splitIntoShards(long desiredShar
       if (factory.isReadSeekEfficient(fileOrPatternSpec)) {
         List<FileBasedSource<T>> splitResults = new ArrayList<>();
         for (ByteOffsetBasedSource<T> split :
-            super.splitIntoShards(desiredShardSizeBytes, options)) {
+            super.splitIntoBundles(desiredBundleSizeBytes, options)) {
           splitResults.add((FileBasedSource<T>) split);
         }
         return splitResults;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
index 907abf51a2999..f7590d039d677 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
@@ -30,7 +30,7 @@
 
 /**
  * Base class for defining input formats, with custom logic for splitting the input
- * into shards (parts of the input, each of which may be processed on a different worker)
+ * into bundles (parts of the input, each of which may be processed on a different worker)
  * and creating a {@code Source} for reading the input.
  *
  * <p> To use this class for supporting your custom input type, derive your class
@@ -58,13 +58,13 @@ public abstract class Source<T> implements Serializable {
   private static final long serialVersionUID = 0;
 
   /**
-   * Splits the source into shards.
+   * Splits the source into bundles.
    *
    * <p> {@code PipelineOptions} can be used to get information such as
    * credentials for accessing an external storage.
    */
-  public abstract List<? extends Source<T>> splitIntoShards(
-      long desiredShardSizeBytes, PipelineOptions options) throws Exception;
+  public abstract List<? extends Source<T>> splitIntoBundles(
+      long desiredBundleSizeBytes, PipelineOptions options) throws Exception;
 
   /**
    * An estimate of the total size (in bytes) of the data that would be read from this source.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index 69c96aeded553..6b89286016045 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -67,7 +67,7 @@
  */
 public class BasicSerializableSourceFormat implements SourceFormat {
   private static final String SERIALIZED_SOURCE = "serialized_source";
-  private static final long DEFAULT_DESIRED_SHARD_SIZE_BYTES = 64 * (1 << 20);
+  private static final long DEFAULT_DESIRED_BUNDLE_SIZE_BYTES = 64 * (1 << 20);
 
   private static final Logger LOG = LoggerFactory.getLogger(BasicSerializableSourceFormat.class);
 
@@ -118,35 +118,35 @@ private SourceSplitResponse performSplit(SourceSplitRequest request) throws Exce
     Source<?> source = deserializeFromCloudSource(request.getSource().getSpec());
     LOG.debug("Splitting source: {}", source);
 
-    // Produce simple independent, unsplittable shards with no metadata attached.
+    // Produce simple independent, unsplittable bundles with no metadata attached.
     SourceSplitResponse response = new SourceSplitResponse();
     response.setShards(new ArrayList<SourceSplitShard>());
     SourceSplitOptions splitOptions = request.getOptions();
-    Long desiredShardSizeBytes =
+    Long desiredBundleSizeBytes =
         (splitOptions == null) ? null : splitOptions.getDesiredShardSizeBytes();
-    if (desiredShardSizeBytes == null) {
-      desiredShardSizeBytes = DEFAULT_DESIRED_SHARD_SIZE_BYTES;
+    if (desiredBundleSizeBytes == null) {
+      desiredBundleSizeBytes = DEFAULT_DESIRED_BUNDLE_SIZE_BYTES;
     }
-    List<? extends Source<?>> shards = source.splitIntoShards(desiredShardSizeBytes, options);
-    LOG.debug("Splitting produced {} shards", shards.size());
-    for (Source<?> split : shards) {
+    List<? extends Source<?>> bundles = source.splitIntoBundles(desiredBundleSizeBytes, options);
+    LOG.debug("Splitting produced {} bundles", bundles.size());
+    for (Source<?> split : bundles) {
       try {
         split.validate();
       } catch  (Exception e) {
         throw new IllegalArgumentException(
-            "Splitting a valid source produced an invalid shard. "
+            "Splitting a valid source produced an invalid bundle. "
             + "\nOriginal source: " + source
-            + "\nInvalid shard: " + split, e);
+            + "\nInvalid bundle: " + split, e);
       }
-      SourceSplitShard shard = new SourceSplitShard();
+      SourceSplitShard bundle = new SourceSplitShard();
 
       com.google.api.services.dataflow.model.Source cloudSource =
           serializeToCloudSource(split, options);
       cloudSource.setDoesNotNeedSplitting(true);
 
-      shard.setDerivationMode("SOURCE_DERIVATION_MODE_INDEPENDENT");
-      shard.setSource(cloudSource);
-      response.getShards().add(shard);
+      bundle.setDerivationMode("SOURCE_DERIVATION_MODE_INDEPENDENT");
+      bundle.setSource(cloudSource);
+      response.getShards().add(bundle);
     }
     response.setOutcome("SOURCE_SPLIT_OUTCOME_SPLITTING_HAPPENED");
     return response;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
index d46530a081d45..6a326cbcd2bc3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
@@ -35,8 +35,8 @@ class TestByteOffsetBasedSource extends ByteOffsetBasedSource<String> {
 
     private static final long serialVersionUID = 85539250;
 
-    public TestByteOffsetBasedSource(long startOffset, long endOffset, long minShardSize) {
-      super(startOffset, endOffset, minShardSize);
+    public TestByteOffsetBasedSource(long startOffset, long endOffset, long minBundleSize) {
+      super(startOffset, endOffset, minBundleSize);
     }
 
     @Override
@@ -83,44 +83,44 @@ public static void assertSplitsAre(List<? extends ByteOffsetBasedSource<String>>
   public void testSplitPositionsZeroStart() throws Exception {
     long start = 0;
     long end = 1000;
-    long minShardSize = 50;
-    long desiredShardSize = 150;
-    TestByteOffsetBasedSource testSource = new TestByteOffsetBasedSource(start, end, minShardSize);
+    long minBundleSize = 50;
+    long desiredBundleSize = 150;
+    TestByteOffsetBasedSource testSource = new TestByteOffsetBasedSource(start, end, minBundleSize);
     long[] boundaries = {0, 150, 300, 450, 600, 750, 900, 1000};
-    assertSplitsAre(testSource.splitIntoShards(desiredShardSize, null), boundaries);
+    assertSplitsAre(testSource.splitIntoBundles(desiredBundleSize, null), boundaries);
   }
 
   @Test
   public void testSplitPositionsNonZeroStart() throws Exception {
     long start = 300;
     long end = 1000;
-    long minShardSize = 50;
-    long desiredShardSize = 150;
-    TestByteOffsetBasedSource testSource = new TestByteOffsetBasedSource(start, end, minShardSize);
+    long minBundleSize = 50;
+    long desiredBundleSize = 150;
+    TestByteOffsetBasedSource testSource = new TestByteOffsetBasedSource(start, end, minBundleSize);
     long[] boundaries = {300, 450, 600, 750, 900, 1000};
-    assertSplitsAre(testSource.splitIntoShards(desiredShardSize, null), boundaries);
+    assertSplitsAre(testSource.splitIntoBundles(desiredBundleSize, null), boundaries);
   }
 
   @Test
-  public void testMinShardSize() throws Exception {
+  public void testMinBundleSize() throws Exception {
     long start = 300;
     long end = 1000;
-    long minShardSize = 150;
-    long desiredShardSize = 100;
-    TestByteOffsetBasedSource testSource = new TestByteOffsetBasedSource(start, end, minShardSize);
+    long minBundleSize = 150;
+    long desiredBundleSize = 100;
+    TestByteOffsetBasedSource testSource = new TestByteOffsetBasedSource(start, end, minBundleSize);
     long[] boundaries = {300, 450, 600, 750, 1000};
-    assertSplitsAre(testSource.splitIntoShards(desiredShardSize, null), boundaries);
+    assertSplitsAre(testSource.splitIntoBundles(desiredBundleSize, null), boundaries);
   }
 
   @Test
-  public void testSplitPositionsCollapseEndShard() throws Exception {
+  public void testSplitPositionsCollapseEndBundle() throws Exception {
     long start = 0;
     long end = 1000;
-    long minShardSize = 50;
-    long desiredShardSize = 110;
-    TestByteOffsetBasedSource testSource = new TestByteOffsetBasedSource(start, end, minShardSize);
-    // Last 10 bytes should collapse to the previous shard.
+    long minBundleSize = 50;
+    long desiredBundleSize = 110;
+    TestByteOffsetBasedSource testSource = new TestByteOffsetBasedSource(start, end, minBundleSize);
+    // Last 10 bytes should collapse to the previous bundle.
     long[] boundaries = {0, 110, 220, 330, 440, 550, 660, 770, 880, 1000};
-    assertSplitsAre(testSource.splitIntoShards(desiredShardSize, null), boundaries);
+    assertSplitsAre(testSource.splitIntoBundles(desiredBundleSize, null), boundaries);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
index 038adf7e12a0c..1c69cb1124894 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
@@ -146,13 +146,13 @@ public Long get() {
               }
             });
 
-    List<DatastoreIO.Source> shards = io.splitIntoShards(1024, options);
-    assertEquals(8, shards.size());
+    List<DatastoreIO.Source> bundles = io.splitIntoBundles(1024, options);
+    assertEquals(8, bundles.size());
     for (int i = 0; i < 8; ++i) {
-      DatastoreIO.Source shard = shards.get(i);
-      Query shardQuery = shard.query;
-      assertEquals("mykind", shardQuery.getKind(0).getName());
-      assertEquals(i, shardQuery.getFilter().getPropertyFilter().getValue().getIntegerValue());
+      DatastoreIO.Source bundle = bundles.get(i);
+      Query bundleQuery = bundle.query;
+      assertEquals("mykind", bundleQuery.getKind(0).getName());
+      assertEquals(i, bundleQuery.getFilter().getPropertyFilter().getValue().getIntegerValue());
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index eaeaa000d825f..9e1d560cc7e05 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -86,15 +86,15 @@ class TestFileBasedSource extends FileBasedSource<String> {
     ReadableByteChannel channel = null;
     final String splitHeader;
 
-    public TestFileBasedSource(boolean isFilePattern, String fileOrPattern, long minShardSize,
+    public TestFileBasedSource(boolean isFilePattern, String fileOrPattern, long minBundleSize,
         String splitHeader) {
-      super(isFilePattern, fileOrPattern, minShardSize, 0L, Long.MAX_VALUE);
+      super(isFilePattern, fileOrPattern, minBundleSize, 0L, Long.MAX_VALUE);
       this.splitHeader = splitHeader;
     }
 
-    public TestFileBasedSource(String fileOrPattern, long minShardSize, long startOffset,
+    public TestFileBasedSource(String fileOrPattern, long minBundleSize, long startOffset,
         long endOffset, String splitHeader) {
-      super(false, fileOrPattern, minShardSize, startOffset, endOffset);
+      super(false, fileOrPattern, minBundleSize, startOffset, endOffset);
       this.splitHeader = splitHeader;
     }
 
@@ -113,7 +113,7 @@ public Coder<String> getDefaultOutputCoder() {
 
     @Override
     public FileBasedSource<String> createForSubrangeOfFile(String fileName, long start, long end) {
-      return new TestFileBasedSource(fileName, getMinShardSize(), start, end, splitHeader);
+      return new TestFileBasedSource(fileName, getMinBundleSize(), start, end, splitHeader);
     }
 
     @Override
@@ -141,7 +141,7 @@ class TestReader extends FileBasedReader<String> {
     private final ByteBuffer buf;
     private static final int BUF_SIZE = 1024;
     private String currentValue = null;
-    private boolean emptyShard = false;
+    private boolean emptyBundle = false;
 
     public TestReader(TestFileBasedSource source) {
       super(source);
@@ -189,13 +189,13 @@ protected void startReading(ReadableByteChannel channel) throws IOException {
         nextOffset += readNextLine(new ByteArrayOutputStream());
       }
       if (nextOffset >= getSource().getEndOffset()) {
-        emptyShard = true;
+        emptyBundle = true;
       }
     }
 
     @Override
     protected boolean readNextRecord() throws IOException {
-      if (emptyShard) {
+      if (emptyBundle) {
         return false;
       }
 
@@ -238,7 +238,7 @@ class TestReaderWithSplits extends TestReader {
     private final String splitHeader;
     private boolean isAtSplitPoint = false;
     private long currentOffset;
-    private boolean emptyShard = false;
+    private boolean emptyBundle = false;
 
     public TestReaderWithSplits(TestFileBasedSource source) {
       super(source);
@@ -266,7 +266,7 @@ protected void startReading(ReadableByteChannel channel) throws IOException {
         current = super.getCurrent();
       }
       if (currentOffset >= getSource().getEndOffset()) {
-        emptyShard = true;
+        emptyBundle = true;
       }
     }
 
@@ -275,7 +275,7 @@ protected boolean readNextRecord() throws IOException {
       // Get next record. If next record is a header read up to the next non-header record (ignoring
       // any empty splits that does not have any records).
 
-      if (emptyShard) {
+      if (emptyBundle) {
         return false;
       }
 
@@ -611,7 +611,7 @@ public void testReadAllSplitsOfSingleFile() throws Exception {
 
     TestFileBasedSource source = new TestFileBasedSource(false, file.getPath(), 16, null);
 
-    List<? extends Source<String>> sources = source.splitIntoShards(32, null);
+    List<? extends Source<String>> sources = source.splitIntoBundles(32, null);
 
     // Not a trivial split.
     assertTrue(sources.size() > 1);
@@ -744,7 +744,7 @@ public void testReadAllSplitsOfFilePattern() throws Exception {
 
     TestFileBasedSource source =
         new TestFileBasedSource(true, file1.getParent() + "/" + "file*", 64, null);
-    List<? extends Source<String>> sources = source.splitIntoShards(512, null);
+    List<? extends Source<String>> sources = source.splitIntoBundles(512, null);
 
     // Not a trivial split.
     assertTrue(sources.size() > 1);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 2941e3c522052..4c844561680a8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -95,7 +95,7 @@ static class Read extends Source<Integer> {
       }
 
       @Override
-      public List<Read> splitIntoShards(long desiredShardSizeBytes, PipelineOptions options)
+      public List<Read> splitIntoBundles(long desiredBundleSizeBytes, PipelineOptions options)
           throws Exception {
         List<Read> res = new ArrayList<>();
         DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
@@ -167,7 +167,7 @@ public void close() throws IOException {
   }
 
   @Test
-  public void testSplitAndReadShardsBack() throws Exception {
+  public void testSplitAndReadBundlesBack() throws Exception {
     DataflowPipelineOptions options =
         PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
     options.setNumWorkers(5);
@@ -180,15 +180,15 @@ public void testSplitAndReadShardsBack() throws Exception {
     }
     SourceSplitResponse response = performSplit(source, options);
     assertEquals("SOURCE_SPLIT_OUTCOME_SPLITTING_HAPPENED", response.getOutcome());
-    List<SourceSplitShard> shards = response.getShards();
-    assertEquals(5, shards.size());
+    List<SourceSplitShard> bundles = response.getShards();
+    assertEquals(5, bundles.size());
     for (int i = 0; i < 5; ++i) {
-      SourceSplitShard shard = shards.get(i);
-      assertEquals("SOURCE_DERIVATION_MODE_INDEPENDENT", shard.getDerivationMode());
-      com.google.api.services.dataflow.model.Source shardSource = shard.getSource();
-      assertTrue(shardSource.getDoesNotNeedSplitting());
-      shardSource.setCodec(source.getCodec());
-      List<WindowedValue<Integer>> xs = CloudSourceUtils.readElemsFromSource(options, shardSource);
+      SourceSplitShard bundle = bundles.get(i);
+      assertEquals("SOURCE_DERIVATION_MODE_INDEPENDENT", bundle.getDerivationMode());
+      com.google.api.services.dataflow.model.Source bundleSource = bundle.getSource();
+      assertTrue(bundleSource.getDoesNotNeedSplitting());
+      bundleSource.setCodec(source.getCodec());
+      List<WindowedValue<Integer>> xs = CloudSourceUtils.readElemsFromSource(options, bundleSource);
       assertThat(
           xs,
           contains(
@@ -204,8 +204,8 @@ private static class MockSource extends Source<Integer> {
     private static final long serialVersionUID = -5041539913488064889L;
 
     @Override
-    public List<? extends Source<Integer>> splitIntoShards(
-        long desiredShardSizeBytes, PipelineOptions options) throws Exception {
+    public List<? extends Source<Integer>> splitIntoBundles(
+        long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
       return Arrays.asList(this);
     }
 
@@ -245,12 +245,12 @@ private SourceProducingInvalidSplits(String description, String errorMessage) {
     }
 
     @Override
-    public List<? extends Source<Integer>> splitIntoShards(
-        long desiredShardSizeBytes, PipelineOptions options) throws Exception {
+    public List<? extends Source<Integer>> splitIntoBundles(
+        long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
       Preconditions.checkState(errorMessage == null, "Unexpected invalid source");
       return Arrays.asList(
-          new SourceProducingInvalidSplits("goodShard", null),
-          new SourceProducingInvalidSplits("badShard", "intentionally invalid"));
+          new SourceProducingInvalidSplits("goodBundle", null),
+          new SourceProducingInvalidSplits("badBundle", "intentionally invalid"));
     }
 
     @Override
@@ -273,9 +273,9 @@ public void testSplittingProducedInvalidSource() throws Exception {
 
     expectedException.expect(IllegalArgumentException.class);
     expectedException.expectMessage(allOf(
-        containsString("Splitting a valid source produced an invalid shard"),
+        containsString("Splitting a valid source produced an invalid bundle"),
         containsString("original"),
-        containsString("badShard")));
+        containsString("badBundle")));
     expectedException.expectCause(hasMessage(containsString("intentionally invalid")));
     performSplit(cloudSource, options);
   }

From 4d7ae286c919187b09b310ead79d0dfc6b47ed82 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Sun, 22 Mar 2015 11:20:32 -0700
Subject: [PATCH 0296/1541] Updated TestFileBasedSourceTest to use
 File.separator instead of "/".

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89249768
---
 .../cloud/dataflow/sdk/io/FileBasedSourceTest.java  | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index 9e1d560cc7e05..e2ac886f07e92 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -368,7 +368,7 @@ public void testFullyReadFilePattern() throws IOException {
     createFileWithData("otherfile", data4);
 
     TestFileBasedSource source =
-        new TestFileBasedSource(true, file1.getParent() + "/" + "file*", 64, null);
+        new TestFileBasedSource(true, new File(file1.getParent(), "file*").getPath(), 64, null);
     List<String> expectedResults = new ArrayList<String>();
     expectedResults.addAll(data1);
     expectedResults.addAll(data2);
@@ -384,8 +384,9 @@ public void testFullyReadFilePatternFirstRecordEmpty() throws IOException {
     IOChannelFactory mockIOFactory = Mockito.mock(IOChannelFactory.class);
     String parent = file1.getParent();
     String pattern = "mocked://test";
-    when(mockIOFactory.match(pattern)).thenReturn(
-        ImmutableList.of(parent + "/" + "file1", parent + "/" + "file2", parent + "/" + "file3"));
+    when(mockIOFactory.match(pattern)).thenReturn(ImmutableList.of(
+        new File(parent, "file1").getPath(), new File(parent, "file2").getPath(),
+        new File(parent, "file3").getPath()));
     IOChannelUtils.setIOFactory("mocked", mockIOFactory);
 
     List<String> data2 = createStringDataset(3, 50);
@@ -672,7 +673,7 @@ public void testDataflowFilePattern() throws IOException {
     createFileWithData("otherfile", data4);
 
     TestFileBasedSource source =
-        new TestFileBasedSource(true, file1.getParent() + "/" + "file*", 64, null);
+        new TestFileBasedSource(true, new File(file1.getParent(), "file*").getPath(), 64, null);
 
     PCollection<String> output = p.apply(ReadSource.from(source).named("ReadFileData"));
 
@@ -720,7 +721,7 @@ public void testEstimatedSizeOfFilePattern() throws Exception {
     createFileWithData("anotherfile", data5);
 
     TestFileBasedSource source =
-        new TestFileBasedSource(true, file1.getParent() + "/" + "file*", 64, null);
+        new TestFileBasedSource(true, new File(file1.getParent(), "file*").getPath(), 64, null);
 
     // Estimated size of the file pattern based source should be the total size of files that the
     // corresponding pattern is expanded into.
@@ -743,7 +744,7 @@ public void testReadAllSplitsOfFilePattern() throws Exception {
     createFileWithData("otherfile", data4);
 
     TestFileBasedSource source =
-        new TestFileBasedSource(true, file1.getParent() + "/" + "file*", 64, null);
+        new TestFileBasedSource(true, new File(file1.getParent(), "file*").getPath(), 64, null);
     List<? extends Source<String>> sources = source.splitIntoBundles(512, null);
 
     // Not a trivial split.

From 3cc7dfcb4e2e0123551c10f0821dd0d8f005480b Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Sun, 22 Mar 2015 20:46:17 -0700
Subject: [PATCH 0297/1541] Create separate factory methods for different
 generics types in StreamingGroupAlsoByWindowsDoFn and GroupAlsoByWindowsDoFn.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89267149
---
 .../worker/GroupAlsoByWindowsParDoFn.java     | 143 ++++++++-------
 .../dataflow/sdk/transforms/GroupByKey.java   |   2 +-
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  33 +---
 .../util/StreamingGroupAlsoByWindowsDoFn.java | 165 +++++++++---------
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  |   2 +-
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  19 +-
 6 files changed, 181 insertions(+), 183 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index 9c93d875fb6ac..127f4ed036df6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -19,7 +19,6 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.getBytes;
 import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
 
-import com.google.api.client.util.Preconditions;
 import com.google.api.services.dataflow.model.MultiOutputInfo;
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -27,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
@@ -67,7 +67,7 @@ public static GroupAlsoByWindowsParDoFn create(
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler sampler /* unused */)
       throws Exception {
-    final Object windowFnObj;
+    Object windowFnObj;
     byte[] encodedWindowFn = getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN);
     if (encodedWindowFn.length == 0) {
       windowFnObj = new GlobalWindows();
@@ -79,7 +79,7 @@ public static GroupAlsoByWindowsParDoFn create(
             "unexpected kind of WindowFn: " + windowFnObj.getClass().getName());
       }
     }
-    final WindowFn windowFn = (WindowFn) windowFnObj;
+    WindowFn windowFn = (WindowFn) windowFnObj;
 
     byte[] serializedCombineFn = getBytes(cloudUserFn, PropertyNames.COMBINE_FN, null);
     KeyedCombineFn combineFn;
@@ -103,12 +103,12 @@ public static GroupAlsoByWindowsParDoFn create(
           "Expected WindowedValueCoder for inputCoder, got: "
           + inputCoder.getClass().getName());
     }
-    final Coder elemCoder = ((WindowedValueCoder) inputCoder).getValueCoder();
+    Coder elemCoder = ((WindowedValueCoder) inputCoder).getValueCoder();
     if (!(elemCoder instanceof KvCoder)) {
       throw new Exception(
           "Expected KvCoder for inputCoder, got: " + elemCoder.getClass().getName());
     }
-    final KvCoder kvCoder = (KvCoder) elemCoder;
+    KvCoder kvCoder = (KvCoder) elemCoder;
 
     boolean isStreamingPipeline = false;
     if (options instanceof StreamingOptions) {
@@ -116,78 +116,87 @@ public static GroupAlsoByWindowsParDoFn create(
     }
 
     boolean isMergingOnly = true;
-    final KeyedCombineFn maybeMergingCombineFn;
+    KeyedCombineFn maybeMergingCombineFn;
     if (isMergingOnly && combineFn != null) {
-      class MergingKeyedCombineFn<K, VA> extends KeyedCombineFn<K, VA, List<VA>, VA> {
-        private static final long serialVersionUID = 0;
-        final KeyedCombineFn<K, ?, VA, ?> combineFn;
-        MergingKeyedCombineFn(KeyedCombineFn<K, ?, VA, ?> combineFn) {
-          this.combineFn = combineFn;
-        }
-        public List<VA> createAccumulator(K key) {
-          return new ArrayList<>();
-        }
-        public void addInput(K key, List<VA> accumulator, VA input) {
-          accumulator.add(input);
-          // TODO: Buffer more once we have compaction operation.
-          if (accumulator.size() > 1) {
-            VA all = combineFn.mergeAccumulators(key, accumulator);
-            accumulator.clear();
-            accumulator.add(all);
-          }
-        }
-        public List<VA> mergeAccumulators(K key, Iterable<List<VA>> accumulators) {
-          List<VA> singleton = new ArrayList<>();
-          singleton.add(combineFn.mergeAccumulators(key, Iterables.concat(accumulators)));
-          return singleton;
-        }
-        public VA extractOutput(K key, List<VA> accumulator) {
-          if (accumulator.size() == 0) {
-            return combineFn.createAccumulator(key);
-          } else {
-            return combineFn.mergeAccumulators(key, accumulator);
-          }
-        }
-      };
+
       maybeMergingCombineFn = new MergingKeyedCombineFn(combineFn);
     } else {
       maybeMergingCombineFn = combineFn;
     }
 
     DoFnInfoFactory fnFactory;
+    final DoFn groupAlsoByWindowsDoFn = getGroupAlsoByWindowsDoFn(
+        isStreamingPipeline, windowFn, kvCoder, maybeMergingCombineFn);
+
+    fnFactory = new DoFnInfoFactory() {
+      @Override
+      public DoFnInfo createDoFnInfo() {
+        return new DoFnInfo(groupAlsoByWindowsDoFn, null);
+      }
+    };
+    return new GroupAlsoByWindowsParDoFn(
+        options, fnFactory, stepName, executionContext, addCounterMutator);
+  }
+
+  @SuppressWarnings({"rawtypes", "unchecked"})
+  private static DoFn getGroupAlsoByWindowsDoFn(
+      boolean isStreamingPipeline,
+      WindowFn windowFn,
+      KvCoder kvCoder,
+      KeyedCombineFn maybeMergingCombineFn) {
     if (isStreamingPipeline) {
-      fnFactory = new DoFnInfoFactory() {
-        @Override
-        public DoFnInfo createDoFnInfo() {
-          return new DoFnInfo(
-              StreamingGroupAlsoByWindowsDoFn.create(
-                  windowFn,
-                  maybeMergingCombineFn,
-                  kvCoder.getKeyCoder(),
-                  kvCoder.getValueCoder()),
-              null);
-        }
-      };
+      if (maybeMergingCombineFn == null) {
+        return StreamingGroupAlsoByWindowsDoFn.createForIterable(
+            windowFn, kvCoder.getValueCoder());
+      } else {
+        return StreamingGroupAlsoByWindowsDoFn.create(
+            windowFn, maybeMergingCombineFn, kvCoder.getKeyCoder(), kvCoder.getValueCoder());
+      }
     } else {
-      //TODO: handle CombineFn in batch GroupAlsoByWindowsDoFn.
-      Preconditions.checkArgument(
-          combineFn == null,
-          "combineFn is expected to be null in batch, but it is " + combineFn);
-      fnFactory = new DoFnInfoFactory() {
-        @Override
-        public DoFnInfo createDoFnInfo() {
-          return new DoFnInfo(
-              GroupAlsoByWindowsDoFn.create(
-                  windowFn,
-                  maybeMergingCombineFn,
-                  kvCoder.getKeyCoder(),
-                  kvCoder.getValueCoder()),
-              null);
-        }
-      };
+      if (maybeMergingCombineFn == null) {
+        return GroupAlsoByWindowsDoFn.createForIterable(
+            windowFn, kvCoder.getValueCoder());
+      } else {
+        return GroupAlsoByWindowsDoFn.create(
+            windowFn, maybeMergingCombineFn, kvCoder.getKeyCoder(), kvCoder.getValueCoder());
+      }
+    }
+  }
+
+  static class MergingKeyedCombineFn<K, VA> extends KeyedCombineFn<K, VA, List<VA>, VA> {
+    private static final long serialVersionUID = 0;
+    final KeyedCombineFn<K, ?, VA, ?> keyedCombineFn;
+    MergingKeyedCombineFn(KeyedCombineFn<K, ?, VA, ?> keyedCombineFn) {
+      this.keyedCombineFn = keyedCombineFn;
+    }
+    @Override
+    public List<VA> createAccumulator(K key) {
+      return new ArrayList<>();
+    }
+    @Override
+    public void addInput(K key, List<VA> accumulator, VA input) {
+      accumulator.add(input);
+      // TODO: Buffer more once we have compaction operation.
+      if (accumulator.size() > 1) {
+        VA all = keyedCombineFn.mergeAccumulators(key, accumulator);
+        accumulator.clear();
+        accumulator.add(all);
+      }
+    }
+    @Override
+    public List<VA> mergeAccumulators(K key, Iterable<List<VA>> accumulators) {
+      List<VA> singleton = new ArrayList<>();
+      singleton.add(keyedCombineFn.mergeAccumulators(key, Iterables.concat(accumulators)));
+      return singleton;
+    }
+    @Override
+    public VA extractOutput(K key, List<VA> accumulator) {
+      if (accumulator.size() == 0) {
+        return keyedCombineFn.createAccumulator(key);
+      } else {
+        return keyedCombineFn.mergeAccumulators(key, accumulator);
+      }
     }
-    return new GroupAlsoByWindowsParDoFn(
-        options, fnFactory, stepName, executionContext, addCounterMutator);
   }
 
   private GroupAlsoByWindowsParDoFn(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 1483ceb7ec989..f8f2202de05b0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -254,7 +254,7 @@ public PCollection<KV<K, Iterable<V>>> apply(
           KvCoder.of(keyCoder, outputValueCoder);
 
       GroupAlsoByWindowsDoFn<K, V, Iterable<V>, ?> fn =
-          GroupAlsoByWindowsDoFn.create(windowFn, inputIterableElementValueCoder);
+          GroupAlsoByWindowsDoFn.createForIterable(windowFn, inputIterableElementValueCoder);
 
       return input.apply(ParDo.of(fn)).setCoder(outputKvCoder);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index f109e619fe4e9..a7244f45d74f4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.util.common.Reiterable;
 import com.google.cloud.dataflow.sdk.util.common.Reiterator;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.ArrayListMultimap;
 import com.google.common.collect.ListMultimap;
 
@@ -61,7 +62,7 @@ public abstract class GroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
    * @param inputCoder the input coder to use
    */
   public static <K, V, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W>
-  create(final WindowFn<?, W> windowFn, final Coder<V> inputCoder) {
+  createForIterable(final WindowFn<?, W> windowFn, final Coder<V> inputCoder) {
     if (windowFn instanceof NonMergingWindowFn) {
       return new GABWViaIteratorsDoFn<K, V, W>();
     } else {
@@ -78,14 +79,15 @@ AbstractWindowSet<K, V, Iterable<V>, W> createWindowSet(K key,
   }
 
   /**
-   * Create a {@link GroupAlsoByWindowsDoFn} using the specified combineFn.
+   * Construct a {@link GroupAlsoByWindowsDoFn} using the {@code combineFn} if available.
    */
-  private static <K, VI, VA, VO, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, VI, VO, W>
-  createCombine(
+  public static <K, VI, VO, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, VI, VO, W>
+  create(
       final WindowFn<?, W> windowFn,
-      final KeyedCombineFn<K, VI, VA, VO> combineFn,
+      final KeyedCombineFn<K, VI, ?, VO> combineFn,
       final Coder<K> keyCoder,
       final Coder<VI> inputCoder) {
+    Preconditions.checkNotNull(combineFn);
     return new GABWViaWindowSetDoFn<K, VI, VO, W>(windowFn) {
       @Override
       AbstractWindowSet<K, VI, VO, W> createWindowSet(
@@ -94,30 +96,11 @@ AbstractWindowSet<K, VI, VO, W> createWindowSet(
           BatchActiveWindowManager<W> activeWindowManager) throws Exception {
         return CombiningWindowSet.create(
             key, windowFn, combineFn, keyCoder, inputCoder,
-            (DoFnProcessContext<?, KV<K, VO>>) (DoFnProcessContext) context, activeWindowManager);
+            context, activeWindowManager);
       }
     };
   }
 
-  /**
-   * Construct a {@link GroupAlsoByWindowsDoFn} using the {@code combineFn} if available.
-   */
-  public static <K, VI, VO, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, VI, VO, W>
-  create(WindowFn<?, W> windowFn, KeyedCombineFn<K, VI, ?, VO> combineFn,
-      Coder<K> keyCoder, Coder<VI> inputCoder) {
-    if (combineFn == null) {
-      // Without combineFn, it should be the case that VO = Iterable<VI>, so this is safe
-      @SuppressWarnings("unchecked")
-      GroupAlsoByWindowsDoFn<K, VI, VO, W> fn =
-          (GroupAlsoByWindowsDoFn<K, VI, VO, W>) create(windowFn, inputCoder);
-      return fn;
-    } else {
-      GroupAlsoByWindowsDoFn<K, VI, VO, W> fn =
-          createCombine(windowFn, combineFn, keyCoder, inputCoder);
-      return fn;
-    }
-  }
-
   private static class GABWViaIteratorsDoFn<K, V, W extends BoundedWindow>
       extends GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W> {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index fa7a62f102862..6a840989bde80 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -36,99 +36,104 @@
  * @param <W> window type
  */
 @SuppressWarnings("serial")
-public class StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
+public abstract class StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
     extends DoFn<TimerOrElement<KV<K, VI>>, KV<K, VO>> implements DoFn.RequiresKeyedState {
 
-  protected final WindowFn<?, W> windowFn;
-  protected final KeyedCombineFn<K, VI, ?, VO> combineFn;
-  protected final Coder<K> keyCoder;
-  protected final Coder<VI> inputValueCoder;
-
-  protected StreamingGroupAlsoByWindowsDoFn(
-      WindowFn<?, W> windowFn,
-      KeyedCombineFn<K, VI, ?, VO> combineFn,
-      Coder<K> keyCoder,
-      Coder<VI> inputValueCoder) {
-    this.windowFn = windowFn;
-    this.combineFn = combineFn;
-    this.keyCoder = keyCoder;
-    this.inputValueCoder = inputValueCoder;
-  }
-
   public static <K, VI, VO, W extends BoundedWindow>
       StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W> create(
-          WindowFn<?, W> windowFn,
-          KeyedCombineFn<K, VI, ?, VO> combineFn,
-          Coder<K> keyCoder,
-          Coder<VI> inputValueCoder) {
-    return new StreamingGroupAlsoByWindowsDoFn<>(windowFn, combineFn, keyCoder, inputValueCoder);
-  }
-
-  private AbstractWindowSet<K, VI, VO, W> createWindowSet(
-      K key,
-      DoFnProcessContext<?, KV<K, VO>> context,
-      AbstractWindowSet.ActiveWindowManager<W> activeWindowManager) throws Exception {
-    if (combineFn != null) {
-      return CombiningWindowSet.create(
-          key, windowFn, combineFn, keyCoder, inputValueCoder, context, activeWindowManager);
-    } else {
-      // VO == Iterable<VI>
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      DoFnProcessContext<?, KV<K, Iterable<VI>>> iterableContext = (DoFnProcessContext) context;
-      AbstractWindowSet<K, VI, Iterable<VI>, W> iterableWindowSet =
-          createNonCombiningWindowSet(key, iterableContext, activeWindowManager);
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      AbstractWindowSet<K, VI, VO, W> windowSet = (AbstractWindowSet) iterableWindowSet;
-      return windowSet;
-    }
+          final WindowFn<?, W> windowFn,
+          final KeyedCombineFn<K, VI, ?, VO> combineFn,
+          final Coder<K> keyCoder,
+          final Coder<VI> inputValueCoder) {
+    Preconditions.checkNotNull(combineFn);
+    return new StreamingGABWViaWindowSetDoFn<K, VI, VO, W>(windowFn) {
+      @Override
+      AbstractWindowSet<K, VI, VO, W> createWindowSet(K key,
+          DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>> context,
+          StreamingActiveWindowManager<W> activeWindowManager)
+          throws Exception {
+        return CombiningWindowSet.create(
+            key, windowFn, combineFn, keyCoder, inputValueCoder, context, activeWindowManager);
+      }
+    };
   }
 
-  private AbstractWindowSet<K, VI, Iterable<VI>, W> createNonCombiningWindowSet(
-      K key,
-      DoFnProcessContext<?, KV<K, Iterable<VI>>> context,
-      AbstractWindowSet.ActiveWindowManager<W> activeWindowManager) throws Exception {
-    if (windowFn instanceof PartitioningWindowFn) {
-      return new PartitionBufferingWindowSet<K, VI, W>(
-        key, windowFn, inputValueCoder, context, activeWindowManager);
-    } else {
-      return new BufferingWindowSet<K, VI, W>(
-          key, windowFn, inputValueCoder, context, activeWindowManager);
-    }
+  public static <K, VI, W extends BoundedWindow>
+  StreamingGroupAlsoByWindowsDoFn<K, VI, Iterable<VI>, W>
+  createForIterable(final WindowFn<?, W> windowFn, final Coder<VI> inputValueCoder) {
+    return new StreamingGABWViaWindowSetDoFn<K, VI, Iterable<VI>, W>(windowFn) {
+      @Override
+      AbstractWindowSet<K, VI, Iterable<VI>, W> createWindowSet(K key,
+          DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, Iterable<VI>>> context,
+          StreamingActiveWindowManager<W> activeWindowManager)
+          throws Exception {
+        if (windowFn instanceof PartitioningWindowFn) {
+          return new PartitionBufferingWindowSet<K, VI, W>(
+            key, windowFn, inputValueCoder, context, activeWindowManager);
+        } else {
+          return new BufferingWindowSet<K, VI, W>(
+              key, windowFn, inputValueCoder, context, activeWindowManager);
+        }
+      }
+    };
   }
 
-  @Override
-  public void processElement(ProcessContext context) throws Exception {
-    @SuppressWarnings("unchecked")
-    DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>> doFnContext =
-        (DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>>) context;
-    if (!context.element().isTimer()) {
-      KV<K, VI> element = context.element().element();
-      K key = element.getKey();
-      VI value = element.getValue();
-      AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(
-          key, doFnContext, new StreamingActiveWindowManager<>(windowFn, doFnContext));
-
-      for (BoundedWindow window : context.windows()) {
-        windowSet.put((W) window, value);
-      }
+  private abstract static class StreamingGABWViaWindowSetDoFn<K, VI, VO, W extends BoundedWindow>
+  extends StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W> {
+    private final WindowFn<Object, W> windowFn;
 
-      windowSet.flush();
-    } else {
-      TimerOrElement<KV<K, VI>> timer = context.element();
-      AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(
-          (K) timer.key(), doFnContext, new StreamingActiveWindowManager<>(windowFn, doFnContext));
+    public StreamingGABWViaWindowSetDoFn(WindowFn<?, W> windowFn) {
+      @SuppressWarnings("unchecked")
+      WindowFn<Object, W> noWildcard = (WindowFn<Object, W>) windowFn;
+      this.windowFn = noWildcard;
+    }
 
-      // Attempt to merge windows before emitting; that may remove the current window under
-      // consideration.
-      ((WindowFn<Object, W>) windowFn)
-        .mergeWindows(new AbstractWindowSet.WindowMergeContext<Object, W>(windowSet, windowFn));
+    abstract AbstractWindowSet<K, VI, VO, W> createWindowSet(
+        K key,
+        DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>> context,
+        StreamingActiveWindowManager<W> activeWindowManager)
+        throws Exception;
 
-      W window = WindowUtils.windowFromString(timer.tag(), windowFn.windowCoder());
+    @Override
+    public void processElement(ProcessContext context) throws Exception {
+      @SuppressWarnings("unchecked")
+      DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>> doFnContext =
+          (DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>>) context;
+      if (!context.element().isTimer()) {
+        KV<K, VI> element = context.element().element();
+        K key = element.getKey();
+        VI value = element.getValue();
+        AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(
+            key,
+            doFnContext,
+            new StreamingActiveWindowManager<>(windowFn, doFnContext));
+
+        for (BoundedWindow window : context.windows()) {
+          @SuppressWarnings("unchecked")
+          W w = (W) window;
+          windowSet.put(w, value);
+        }
 
-      if ((windowFn instanceof PartitioningWindowFn) || windowSet.contains(window)) {
-        Preconditions.checkState(!timer.timestamp().isBefore(window.maxTimestamp()));
-        windowSet.markCompleted(window);
         windowSet.flush();
+      } else {
+        TimerOrElement<KV<K, VI>> timer = context.element();
+        AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(
+            (K) timer.key(),
+            doFnContext,
+            new StreamingActiveWindowManager<>(windowFn, doFnContext));
+
+        // Attempt to merge windows before emitting; that may remove the current window under
+        // consideration.
+        windowFn.mergeWindows(
+            new AbstractWindowSet.WindowMergeContext<Object, W>(windowSet, windowFn));
+
+        W window = WindowUtils.windowFromString(timer.tag(), windowFn.windowCoder());
+
+        if ((windowFn instanceof PartitioningWindowFn) || windowSet.contains(window)) {
+          Preconditions.checkState(!timer.timestamp().isBefore(window.maxTimestamp()));
+          windowSet.markCompleted(window);
+          windowSet.flush();
+        }
       }
     }
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index c3482f074fd74..8860d1cce75e6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -253,7 +253,7 @@ KV<String, Iterable<String>>, List> makeRunner(
         WindowFn<? super String, IntervalWindow> windowFn) {
 
     GroupAlsoByWindowsDoFn<String, String, Iterable<String>, IntervalWindow> fn =
-        GroupAlsoByWindowsDoFn.create(windowFn, StringUtf8Coder.of());
+        GroupAlsoByWindowsDoFn.createForIterable(windowFn, StringUtf8Coder.of());
 
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
         KV<String, Iterable<String>>, List> runner =
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index cef060822f060..b881936694e04 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -325,23 +325,24 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
 
   private DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>, List>
       makeRunner(WindowFn<? super String, IntervalWindow> windowFn) {
-    return makeRunner(windowFn, null, StringUtf8Coder.of());
+    StreamingGroupAlsoByWindowsDoFn<String, String, Iterable<String>, IntervalWindow> fn =
+        StreamingGroupAlsoByWindowsDoFn.createForIterable(windowFn, StringUtf8Coder.of());
+    return makeRunner(windowFn, fn);
   }
 
   private DoFnRunner<TimerOrElement<KV<String, Long>>, KV<String, Long>, List> makeRunner(
         WindowFn<? super String, IntervalWindow> windowFn,
         KeyedCombineFn<String, Long, ?, Long> combineFn) {
-    return makeRunner(windowFn, combineFn, BigEndianLongCoder.of());
+    StreamingGroupAlsoByWindowsDoFn<String, Long, Long, IntervalWindow> fn =
+        StreamingGroupAlsoByWindowsDoFn.create(
+            windowFn, combineFn, StringUtf8Coder.of(), BigEndianLongCoder.of());
+
+    return makeRunner(windowFn, fn);
   }
 
   private <VI, VO> DoFnRunner<TimerOrElement<KV<String, VI>>, KV<String, VO>, List> makeRunner(
-        WindowFn<? super String, IntervalWindow> windowFn,
-        KeyedCombineFn<String, VI, ?, VO> combineFn,
-        Coder<VI> inputValueCoder) {
-    StreamingGroupAlsoByWindowsDoFn<String, VI, VO, IntervalWindow> fn =
-        StreamingGroupAlsoByWindowsDoFn.create(
-            windowFn, combineFn, StringUtf8Coder.of(), inputValueCoder);
-
+      WindowFn<? super String, IntervalWindow> windowFn,
+      StreamingGroupAlsoByWindowsDoFn<String, VI, VO, IntervalWindow> fn) {
     return
         DoFnRunner.createWithListOutputs(
             PipelineOptionsFactory.create(),

From 8c232fc57c6816b7243040db45cba908ff82dc6b Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Fri, 13 Mar 2015 16:31:00 -0700
Subject: [PATCH 0298/1541] Removing unnecessary spacing.

Merge branch 'mrunesson-mrunesson/copytight_checkstyle'

-------------
Merge branch 'mrunesson-mrunesson/add_join_utilities'

MOE_PREV_MIGRATED_REVID=695ea8374762ba9cdfa3233e4f9866d0bb20b888

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=88590299
---
 contrib/join-library/AUTHORS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/join-library/AUTHORS.md b/contrib/join-library/AUTHORS.md
index 331f927548c0b..d32b6a7ebaed4 100644
--- a/contrib/join-library/AUTHORS.md
+++ b/contrib/join-library/AUTHORS.md
@@ -1,4 +1,4 @@
-# Authors of join-library               
+# Authors of join-library
 
 The following is the official list of authors for copyright purposes of this community-contributed module.
 

From adf856438a3035b82b960084bbd6396b7f214d56 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 23 Mar 2015 17:26:46 -0700
Subject: [PATCH 0299/1541] Update ReflectHelpers to support short descriptions
 of arbitrary types.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89354073
---
 .../sdk/util/common/ReflectHelpers.java       | 75 ++++++++++++++++++-
 .../sdk/util/common/ReflectHelpersTest.java   | 60 +++++++++++++++
 2 files changed, 133 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java
index b2f5f17a43a9c..2fab0171c3f47 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java
@@ -19,7 +19,14 @@
 import com.google.common.base.Joiner;
 import com.google.common.collect.FluentIterable;
 
+import java.lang.reflect.GenericArrayType;
 import java.lang.reflect.Method;
+import java.lang.reflect.ParameterizedType;
+import java.lang.reflect.Type;
+import java.lang.reflect.TypeVariable;
+import java.lang.reflect.WildcardType;
+
+import javax.annotation.Nullable;
 
 /**
  * Utilities for working with with {@link Class Classes} and {@link Method Methods}.
@@ -54,7 +61,7 @@ public String apply(Method input) {
 
   /** A {@link Function} with returns the classes name. */
   public static final Function<Class<?>, String> CLASS_NAME =
-      new Function<Class<?>, String>(){
+      new Function<Class<?>, String>() {
     @Override
     public String apply(Class<?> input) {
       return input.getName();
@@ -63,10 +70,74 @@ public String apply(Class<?> input) {
 
   /** A {@link Function} with returns the classes name. */
   public static final Function<Class<?>, String> CLASS_SIMPLE_NAME =
-      new Function<Class<?>, String>(){
+      new Function<Class<?>, String>() {
     @Override
     public String apply(Class<?> input) {
       return input.getSimpleName();
     }
   };
+
+  /** A {@link Function} which formats types. */
+  public static final Function<Type, String> TYPE_SIMPLE_DESCRIPTION =
+      new Function<Type, String>() {
+    @Override
+    @Nullable
+    public String apply(@Nullable Type input) {
+      StringBuilder builder = new StringBuilder();
+      format(builder, input);
+      return builder.toString();
+    }
+
+    private void format(StringBuilder builder, Type t) {
+      if (t instanceof Class) {
+        formatClass(builder, (Class<?>) t);
+      } else if (t instanceof TypeVariable) {
+        formatTypeVariable(builder, (TypeVariable<?>) t);
+      } else if (t instanceof WildcardType) {
+        formatWildcardType(builder, (WildcardType) t);
+      } else if (t instanceof ParameterizedType) {
+        formatParameterizedType(builder, (ParameterizedType) t);
+      } else if (t instanceof GenericArrayType) {
+        formatGenericArrayType(builder, (GenericArrayType) t);
+      } else {
+        builder.append(t.toString());
+      }
+    }
+
+    private void formatClass(StringBuilder builder, Class<?> clazz) {
+      builder.append(clazz.getSimpleName());
+    }
+
+    private void formatTypeVariable(StringBuilder builder, TypeVariable<?> t) {
+      builder.append(t.getName());
+    }
+
+    private void formatWildcardType(StringBuilder builder, WildcardType t) {
+      builder.append("?");
+      for (Type lowerBound : t.getLowerBounds()) {
+        builder.append(" super ");
+        format(builder, lowerBound);
+      }
+      for (Type upperBound : t.getUpperBounds()) {
+        if (!Object.class.equals(upperBound)) {
+          builder.append(" extends ");
+          format(builder, upperBound);
+        }
+      }
+    }
+
+    private void formatParameterizedType(StringBuilder builder, ParameterizedType t) {
+      format(builder, t.getRawType());
+      builder.append('<');
+      COMMA_SEPARATOR.appendTo(builder,
+          FluentIterable.of(t.getActualTypeArguments())
+          .transform(TYPE_SIMPLE_DESCRIPTION));
+      builder.append('>');
+    }
+
+    private void formatGenericArrayType(StringBuilder builder, GenericArrayType t) {
+      format(builder, t.getGenericComponentType());
+      builder.append("[]");
+    }
+  };
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java
index db4b8179af5ec..bd1e6db128b0f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java
@@ -17,11 +17,14 @@
 
 import static org.junit.Assert.assertEquals;
 
+import com.google.common.reflect.TypeToken;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
 import java.util.List;
+import java.util.Map;
 
 /**
  * Tests for {@link ReflectHelpers}.
@@ -73,4 +76,61 @@ public void testClassMethodFormatter() throws Exception {
   void oneArg(int n) {}
   @SuppressWarnings("unused")
   void twoArg(String foo, List<Integer> bar) {}
+
+  @Test
+  public void testTypeFormatterOnClasses() throws Exception {
+    assertEquals("Integer",
+        ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(Integer.class));
+    assertEquals("int",
+        ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(int.class));
+    assertEquals("Map",
+        ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(Map.class));
+    assertEquals(getClass().getSimpleName(),
+        ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(getClass()));
+  }
+
+  @Test
+  public void testTypeFormatterOnArrays() throws Exception {
+    assertEquals("Integer[]",
+        ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(Integer[].class));
+    assertEquals("int[]",
+        ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(int[].class));
+  }
+
+  @Test
+  public void testTypeFormatterWithGenerics() throws Exception {
+    assertEquals("Map<Integer, String>",
+        ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(
+            new TypeToken<Map<Integer, String>>() {
+              private static final long serialVersionUID = 0;
+            }.getType()));
+    assertEquals("Map<?, String>",
+        ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(
+            new TypeToken<Map<?, String>>() {
+              private static final long serialVersionUID = 0;
+            }.getType()));
+    assertEquals("Map<? extends Integer, String>",
+        ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(
+            new TypeToken<Map<? extends Integer, String>>() {
+              private static final long serialVersionUID = 0;
+            }.getType()));
+  }
+
+  @Test
+  public <I> void testTypeFormatterWithWildcards() throws Exception {
+    assertEquals("Map<I, I>",
+        ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(
+            new TypeToken<Map<I, I>>() {
+              private static final long serialVersionUID = 0;
+            }.getType()));
+  }
+
+  @Test
+  public <I, O> void testTypeFormatterWithMultipleWildcards() throws Exception {
+    assertEquals("Map<? super I, ? extends O>",
+        ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(
+            new TypeToken<Map<? super I, ? extends O>>() {
+              private static final long serialVersionUID = 0;
+            }.getType()));
+  }
 }

From da8dfdac90db6a6bb904dad043808aa1b752f660 Mon Sep 17 00:00:00 2001
From: boyuan <boyuan@google.com>
Date: Mon, 23 Mar 2015 17:41:42 -0700
Subject: [PATCH 0300/1541] Update pom.xml with the new
 google-api-services-dataflow version: v1beta3-rev8-1.19.1

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89355306
---
 sdk/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index e21399c63355a..0928d1bea7d36 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -213,7 +213,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-dataflow</artifactId>
-      <version>v1beta3-rev7-1.19.1</version>
+      <version>v1beta3-rev8-1.19.1</version>
       <exclusions>
         <!-- Exclude an old version of guava which is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->

From 67e99cc6e9e43673ccc148f26f43b8454df63954 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 23 Mar 2015 18:00:21 -0700
Subject: [PATCH 0301/1541] Makes side inputs per window.

This is a backwards-incompatible change.
     - sideInput() can no longer be called from startBundle/finishBundle.
     - Calls to sideInput() now return values only in a specific window corresponding to the window of the main input element, not the whole side input PCollectionView

----Release Notes----
- sideInput() can no longer be called from startBundle/finishBundle.
- Calls to sideInput() now return values only in a specific window corresponding to the window of the main input element, not the whole side input PCollectionView.  For PCollections and side inputs that are both windowed by GlobalWindows, this is identical to the old behavior

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89356757
---
 .../examples/TopWikipediaSessions.java        |   2 +-
 .../dataflow/sdk/testing/DataflowAssert.java  |  19 +-
 .../dataflow/sdk/transforms/Combine.java      | 174 +++++++++++++++---
 .../cloud/dataflow/sdk/transforms/DoFn.java   |  21 ++-
 .../cloud/dataflow/sdk/transforms/Top.java    |   7 +-
 .../cloud/dataflow/sdk/transforms/View.java   |  93 ++++++++--
 .../transforms/windowing/GlobalWindows.java   |   7 +-
 .../sdk/transforms/windowing/WindowFn.java    |  42 +++++
 .../cloud/dataflow/sdk/util/DoFnContext.java  |  40 +++-
 .../dataflow/sdk/util/DoFnProcessContext.java |  31 +++-
 .../dataflow/sdk/values/PCollectionView.java  |   6 +
 .../dataflow/sdk/transforms/CombineTest.java  |  28 ++-
 .../dataflow/sdk/transforms/FlattenTest.java  |  33 +++-
 .../dataflow/sdk/transforms/ParDoTest.java    |  16 +-
 .../dataflow/sdk/transforms/ViewTest.java     | 136 ++++++++++++++
 15 files changed, 573 insertions(+), 82 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
index fd50326e076f0..463429d20fd5d 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
@@ -126,7 +126,7 @@ public PCollection<List<KV<String, Long>>> apply(PCollection<KV<String, Long>> s
                     public int compare(KV<String, Long> o1, KV<String, Long> o2) {
                       return Long.compare(o1.getValue(), o2.getValue());
                     }
-                  }));
+                  }).withoutDefaults());
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 0f5bda75ef935..7eed9d24a34a2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -25,6 +25,8 @@
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.common.base.Optional;
@@ -80,7 +82,8 @@ private DataflowAssert() {}
    * {@link PCollection PCollection&lt;T&gt;}.
    */
   public static <T> IterableAssert<T> that(PCollection<T> actual) {
-    return new IterableAssert<>(actual.apply(View.<T>asIterable()))
+    return
+        new IterableAssert<>(inGlobalWindows(actual).apply(View.<T>asIterable()))
         .setCoder(actual.getCoder());
   }
 
@@ -101,7 +104,7 @@ public static <T> IterableAssert<T> thatSingletonIterable(PCollection<Iterable<T
         + " single Coder<T> to apply to the elements.");
     }
 
-    return new IterableAssert<>(actual.apply(View.<Iterable<T>>asSingleton()))
+    return new IterableAssert<>(inGlobalWindows(actual).apply(View.<Iterable<T>>asSingleton()))
         .setCoder(tCoder);
   }
 
@@ -118,7 +121,7 @@ public static <T> IterableAssert<T> thatIterable(PCollectionView<Iterable<T>> ac
    * {@code PCollection PCollection<T>}, which must be a singleton.
    */
   public static <T> SingletonAssert<T> thatSingleton(PCollection<T> actual) {
-    return new SingletonAssert<>(actual.apply(View.<T>asSingleton()))
+    return new SingletonAssert<>(inGlobalWindows(actual).apply(View.<T>asSingleton()))
         .setCoder(actual.getCoder());
   }
 
@@ -344,6 +347,16 @@ public SingletonAssert<T> is(T expectedValue) {
 
   ////////////////////////////////////////////////////////////////////////
 
+  /**
+   * Returns a new PCollection equivalent to the input, but with all elements
+   * in the GlobalWindow.  Preserves ordering if the input is ordered.
+   */
+  private static <T> PCollection<T> inGlobalWindows(PCollection<T> input) {
+    return input
+        .apply(Window.<T>into(new GlobalWindows()))
+        .setOrdered(input.isOrdered());
+  }
+
   /**
    * An assertion checker that takes a single {@link PCollectionView PCollectionView&lt;A&gt;}
    * and an assertion over {@code A}, and checks it within a dataflow pipeline.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 688857f81871d..2c677efa16582 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -59,14 +59,15 @@ public class Combine {
   /**
    * Returns a {@link Globally Combine.Globally} {@code PTransform}
    * that uses the given {@code SerializableFunction} to combine all
-   * the elements of the input {@code PCollection} into a singleton
-   * {@code PCollection} value.  The types of the input elements and the
-   * output value must be the same.
+   * the elements in each window of the input {@code PCollection} into a
+   * single value in the output {@code PCollection}.  The types of the input
+   * elements and the output elements must be the same.
    *
-   * <p>If the input {@code PCollection} is empty, the ouput will contain a the
-   * default value of the combining function if the input is windowed into
-   * the {@link GlobalWindows}; otherwise, the output will be empty.  Note: this
-   * behavior is subject to change.
+   * <p> If the input {@code PCollection} is windowed into {@link GlobalWindows},
+   * a default value in the {@link GlobalWindow} will be output if the input
+   * {@code PCollection} is empty.  To use this with inputs with other windowing,
+   * either {@link Globally#withoutDefaults} or {@link Globally#asSingletonView}
+   * must be called.
    *
    * <p> See {@link Globally Combine.Globally} for more information.
    */
@@ -77,21 +78,22 @@ public static <V> Globally<V, V> globally(
 
   /**
    * Returns a {@link Globally Combine.Globally} {@code PTransform}
-   * that uses the given {@code CombineFn} to combine all the elements
-   * of the input {@code PCollection} into a singleton {@code PCollection}
-   * value.  The types of the input elements and the output value can
-   * differ.
+   * that uses the given {@code SerializableFunction} to combine all
+   * the elements in each window of the input {@code PCollection} into a
+   * single value in the output {@code PCollection}.  The types of the input
+   * elements and the output elements can differ
    *
-   * If the input {@code PCollection} is empty, the ouput will contain a the
-   * default value of the combining function if the input is windowed into
-   * the {@link GlobalWindows}; otherwise, the output will be empty.  Note: this
-   * behavior is subject to change.
+   * <p> If the input {@code PCollection} is windowed into {@link GlobalWindows},
+   * a default value in the {@link GlobalWindow} will be output if the input
+   * {@code PCollection} is empty.  To use this with inputs with other windowing,
+   * either {@link Globally#withoutDefaults} or {@link Globally#asSingletonView}
+   * must be called.
    *
    * <p> See {@link Globally Combine.Globally} for more information.
    */
   public static <VI, VO> Globally<VI, VO> globally(
       CombineFn<? super VI, ?, VO> fn) {
-    return new Globally<>(fn);
+    return new Globally<>(fn, true);
   }
 
   /**
@@ -1053,10 +1055,9 @@ public Coder<VO> getDefaultOutputCoder(
 
   /**
    * {@code Combine.Globally<VI, VO>} takes a {@code PCollection<VI>}
-   * and returns a {@code PCollection<VO>} whose single element is the result of
-   * combining all the elements of the input {@code PCollection},
-   * using a specified}
-   * {@link CombineFn CombineFn&lt;VI, VA, VO&gt;}.  It is common
+   * and returns a {@code PCollection<VO>} whose elements are the result of
+   * combining all the elements in each window of the input {@code PCollection},
+   * using a specified {@link CombineFn CombineFn<VI, VA, VO>}.  It is common
    * for {@code VI == VO}, but not required.  Common combining
    * functions include sums, mins, maxes, and averages of numbers,
    * conjunctions and disjunctions of booleans, statistical
@@ -1074,6 +1075,11 @@ public Coder<VO> getDefaultOutputCoder(
    * intermediate results combined further, in an arbitrary tree
    * reduction pattern, until a single result value is produced.
    *
+   * <p> If the input {@code PCollection} is windowed into {@link GlobalWindows},
+   * a default value in the {@link GlobalWindow} will be output if the input
+   * {@code PCollection} is empty.  To use this with inputs with other windowing,
+   * either {@link #withoutDefaults} or {@link #asSingletonView} must be called.
+   *
    * <p> By default, the {@code Coder} of the output {@code PValue<VO>}
    * is inferred from the concrete type of the
    * {@code CombineFn<VI, VA, VO>}'s output type {@code VO}.
@@ -1090,9 +1096,36 @@ public static class Globally<VI, VO>
       extends PTransform<PCollection<VI>, PCollection<VO>> {
 
     private final CombineFn<? super VI, ?, VO> fn;
+    private final boolean insertDefault;
 
-    private Globally(CombineFn<? super VI, ?, VO> fn) {
+    private Globally(CombineFn<? super VI, ?, VO> fn, boolean insertDefault) {
       this.fn = fn;
+      this.insertDefault = insertDefault;
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    public Globally<VI, VO> withName(String name) {
+      return (Globally<VI, VO>) super.withName(name);
+    }
+
+    /**
+     * Returns a {@link PTransform} that produces a {@code PCollectionView}
+     * whose elements are the result of combining elements per-window in
+     * the input {@code PCollection}.  If a value is requested from the view
+     * for a window that is not present, the result of calling the {@code CombineFn}
+     * on empty input will returned.
+     */
+    public GloballyAsSingletonView<VI, VO> asSingletonView() {
+      return new GloballyAsSingletonView<>(fn, insertDefault);
+    }
+
+    /**
+     * Returns a {@link PTransform} identical to this, but that does not attempt to
+     * provide a default value in the case of empty input.
+     */
+    public Globally<VI, VO> withoutDefaults() {
+      return new Globally<>(fn, false);
     }
 
     @Override
@@ -1103,7 +1136,13 @@ public PCollection<VO> apply(PCollection<VI> input) {
           .apply(Combine.<Void, VI, VO>perKey(fn.<Void>asKeyedFn()))
           .apply(Values.<VO>create());
 
-      if (input.getWindowFn().isCompatible(new GlobalWindows())) {
+      if (insertDefault) {
+        if (!output.getWindowFn().isCompatible(new GlobalWindows())) {
+          throw new IllegalStateException(
+              "Attempted to add default value to PCollection not windowed by GlobalWindows. "
+              + "Instead, use Combine.globally().withoutDefaults() or "
+              + "Combine.globally().asSingletonView().");
+        }
         return insertDefaultValueIfEmpty(output);
       } else {
         return output;
@@ -1117,15 +1156,15 @@ private PCollection<VO> insertDefaultValueIfEmpty(PCollection<VO> maybeEmpty) {
           .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
           .apply(ParDo.of(
               new DoFn<Void, VO>() {
-                  @Override
-                  public void processElement(DoFn<Void, VO>.ProcessContext c) {
-                    Iterator<VO> combined = c.sideInput(maybeEmptyView).iterator();
-                    if (combined.hasNext()) {
-                      c.output(combined.next());
-                    } else {
-                      c.output(fn.apply(Collections.<VI>emptyList()));
-                    }
+                @Override
+                public void processElement(DoFn<Void, VO>.ProcessContext c) {
+                  Iterator<VO> combined = c.sideInput(maybeEmptyView).iterator();
+                  if (combined.hasNext()) {
+                    c.output(combined.next());
+                  } else {
+                    c.output(fn.apply(Collections.<VI>emptyList()));
                   }
+                }
               }).withSideInputs(maybeEmptyView))
           .setCoder(maybeEmpty.getCoder());
     }
@@ -1136,6 +1175,81 @@ protected String getKindString() {
     }
   }
 
+  /**
+   * {@code Combine.GloballyAsSingletonView<VI, VO>} takes a {@code PCollection<VI>}
+   * and returns a {@code PCollectionView<VO>} whose elements are the result of
+   * combining all the elements in each window of the input {@code PCollection},
+   * using a specified {@link CombineFn CombineFn<VI, VA, VO>}. It is common for
+   * {@code VI == VO}, but not required. Common combining
+   * functions include sums, mins, maxes, and averages of numbers,
+   * conjunctions and disjunctions of booleans, statistical
+   * aggregations, etc.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<Integer> pc = ...;
+   * PCollection<Integer> sum = pc.apply(
+   *     Combine.globally(new Sum.SumIntegerFn()));
+   * } </pre>
+   *
+   * <p> Combining can happen in parallel, with different subsets of the
+   * input {@code PCollection} being combined separately, and their
+   * intermediate results combined further, in an arbitrary tree
+   * reduction pattern, until a single result value is produced.
+   *
+   * <p> If a value is requested from the view for a window that is not present
+   * and {@code insertDefault} is true, the result of calling the {@code CombineFn}
+   * on empty input will returned. If {@code insertDefault} is false, an
+   * exception will be thrown instead.
+   *
+   * <p> By default, the {@code Coder} of the output {@code PValue<VO>}
+   * is inferred from the concrete type of the
+   * {@code CombineFn<VI, VA, VO>}'s output type {@code VO}.
+   *
+   * <p> See also {@link #perKey}/{@link PerKey Combine.PerKey} and
+   * {@link #groupedValues}/{@link GroupedValues Combine.GroupedValues},
+   * which are useful for combining values associated with each key in
+   * a {@code PCollection} of {@code KV}s.
+   *
+   * @param <VI> type of input values
+   * @param <VO> type of output values
+   */
+  public static class GloballyAsSingletonView<VI, VO>
+      extends PTransform<PCollection<VI>, PCollectionView<VO>> {
+
+    private final CombineFn<? super VI, ?, VO> fn;
+    private final boolean insertDefault;
+
+    private GloballyAsSingletonView(CombineFn<? super VI, ?, VO> fn, boolean insertDefault) {
+      this.fn = fn;
+      this.insertDefault = insertDefault;
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    public GloballyAsSingletonView<VI, VO> withName(String name) {
+      return (GloballyAsSingletonView<VI, VO>) super.withName(name);
+    }
+
+    @Override
+    public PCollectionView<VO> apply(PCollection<VI> input) {
+      PCollection<VO> combined = input
+          .apply(Combine.globally(fn).withoutDefaults());
+      if (insertDefault) {
+        return combined
+            .apply(View.<VO>asSingleton().withDefaultValue(
+                fn.apply(Collections.<VI>emptyList())));
+      } else {
+        return combined.apply(View.<VO>asSingleton());
+      }
+    }
+
+    @Override
+    protected String getKindString() {
+      return "Combine.GloballyAsSingletonView";
+    }
+  }
+
   /**
    * Converts a {@link SerializableFunction} from {@code Iterable<V>}s
    * to {@code V}s into a simple {@link CombineFn} over {@code V}s.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 68f78c9d8e435..56d99d360d24b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -66,14 +66,6 @@ public abstract class Context {
      */
     public abstract PipelineOptions getPipelineOptions();
 
-    /**
-     * Returns the value of the side input.
-     *
-     * @throws IllegalArgumentException if this is not a side input
-     * @see ParDo#withSideInputs
-     */
-    public abstract <T> T sideInput(PCollectionView<T> view);
-
     /**
      * Adds the given element to the main output {@code PCollection}.
      *
@@ -207,6 +199,19 @@ public abstract class ProcessContext extends Context {
      */
     public abstract I element();
 
+    /**
+     * Returns the value of the side input for the window corresponding to the
+     * window of the main input element.
+     *
+     * <p> See
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn#getSideInputWindow}
+     * for how this corresponding window is determined.
+     *
+     * @throws IllegalArgumentException if this is not a side input
+     * @see ParDo#withSideInputs
+     */
+    public abstract <T> T sideInput(PCollectionView<T> view);
+
     /**
      * Returns this {@code DoFn}'s state associated with the input
      * element's key.  This state can be used by the {@code DoFn} to
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index c11e16fe0b6a5..8afe04a073bff 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -80,10 +80,9 @@ public class Top {
    * {@code KV}s and return the top values associated with each key.
    */
   public static <T, C extends Comparator<T> & Serializable>
-      PTransform<PCollection<T>, PCollection<List<T>>> of(int count, C compareFn) {
+      Combine.Globally<T, List<T>> of(int count, C compareFn) {
     return Combine.globally(new TopCombineFn<>(count, compareFn))
         .withName("Top");
-
   }
 
   /**
@@ -121,7 +120,7 @@ PTransform<PCollection<T>, PCollection<List<T>>> of(int count, C compareFn) {
    * {@code KV}s and return the top values associated with each key.
    */
   public static <T extends Comparable<T>>
-      PTransform<PCollection<T>, PCollection<List<T>>> smallest(int count) {
+      Combine.Globally<T, List<T>> smallest(int count) {
     return Combine.globally(new TopCombineFn<>(count, new Smallest<T>()))
         .withName("Top.Smallest");
   }
@@ -161,7 +160,7 @@ PTransform<PCollection<T>, PCollection<List<T>>> smallest(int count) {
    * {@code KV}s and return the top values associated with each key.
    */
   public static <T extends Comparable<T>>
-      PTransform<PCollection<T>, PCollection<List<T>>> largest(int count) {
+      Combine.Globally<T, List<T>> largest(int count) {
     return Combine.globally(new TopCombineFn<>(count, new Largest<T>()))
         .withName("Top.Largest");
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index 590d8eebd2051..427185e2443c8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -17,8 +17,12 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -30,6 +34,7 @@
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Multimap;
 
+import java.io.IOException;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
@@ -100,7 +105,7 @@ public PCollectionView<Iterable<T>> apply(
         PCollection<T> input) {
       return input.apply(
           new CreatePCollectionView<T, Iterable<T>>(
-              new IterablePCollectionView<T>(input.getPipeline())));
+              new IterablePCollectionView<T>(input.getPipeline(), input.getWindowFn())));
     }
   }
 
@@ -113,16 +118,34 @@ public PCollectionView<Iterable<T>> apply(
   public static class AsSingleton<T>
       extends PTransform<PCollection<T>, PCollectionView<T>> {
     private static final long serialVersionUID = 0;
+    private final T defaultValue;
+    private final boolean hasDefault;
 
-    private AsSingleton() { }
+    private AsSingleton() {
+      this.defaultValue = null;
+      this.hasDefault = false;
+    }
+
+    private AsSingleton(T defaultValue) {
+      this.defaultValue = defaultValue;
+      this.hasDefault = true;
+    }
+
+    /**
+     * Default value to return for windows with no value in them.
+     */
+    public AsSingleton<T> withDefaultValue(T defaultValue) {
+      return new AsSingleton(defaultValue);
+    }
 
     @Override
     public PCollectionView<T> apply(PCollection<T> input) {
       return input.apply(
           new CreatePCollectionView<T, T>(
-            new SingletonPCollectionView<T>(input.getPipeline())));
+              new SingletonPCollectionView<T>(
+                  input.getPipeline(), input.getWindowFn(),
+                  hasDefault, defaultValue, input.getCoder())));
     }
-
   }
 
   /**
@@ -158,12 +181,11 @@ public <VO> AsSingletonMap<K, V, VO> withCombiner(CombineFn<V, ?, VO> combineFn)
     @Override
     public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
       return input.apply(
-        new CreatePCollectionView<KV<K, V>, Map<K, Iterable<V>>>(
-          new MultimapPCollectionView<K, V>(input.getPipeline())));
+          new CreatePCollectionView<KV<K, V>, Map<K, Iterable<V>>>(
+              new MultimapPCollectionView<K, V>(input.getPipeline(), input.getWindowFn())));
     }
   }
 
-
   /**
    * A {@link PTransform} that produces a {@link PCollectionView} of a keyed {@link PCollection}
    * yielding a map of keys to a single associated values.
@@ -189,8 +211,8 @@ public PCollectionView<Map<K, VO>> apply(PCollection<KV<K, VI>> input) {
         ? (PCollection) input
         : input.apply(Combine.perKey(combineFn.<K>asKeyedFn()));
       return combined.apply(
-        new CreatePCollectionView<KV<K, VO>, Map<K, VO>>(
-          new MapPCollectionView<K, VO>(input.getPipeline())));
+          new CreatePCollectionView<KV<K, VO>, Map<K, VO>>(
+              new MapPCollectionView<K, VO>(input.getPipeline(), combined.getWindowFn())));
     }
   }
 
@@ -247,14 +269,40 @@ private <R, T> void evaluateTyped(
   private static class SingletonPCollectionView<T>
       extends PCollectionViewBase<T> {
     private static final long serialVersionUID = 0;
-
-    public SingletonPCollectionView(Pipeline pipeline) {
+    private byte[] encodedDefaultValue;
+    private transient T defaultValue;
+    private Coder<T> defaultValueCoder;
+
+    public SingletonPCollectionView(
+        Pipeline pipeline, WindowFn<?, ?> windowFn,
+        boolean hasDefault, T defaultValue, Coder<T> defaultValueCoder) {
+      super(windowFn);
       setPipelineInternal(pipeline);
+      this.defaultValue = defaultValue;
+      this.defaultValueCoder = defaultValueCoder;
+      if (hasDefault) {
+        try {
+          this.encodedDefaultValue = CoderUtils.encodeToByteArray(defaultValueCoder, defaultValue);
+        } catch (IOException e) {
+          throw new RuntimeException("Unexpected IOException: ", e);
+        }
+      }
     }
 
     @SuppressWarnings("unchecked")
     @Override
     public T fromIterableInternal(Iterable<WindowedValue<?>> contents) {
+      if (encodedDefaultValue != null && defaultValue == null) {
+        try {
+          defaultValue = CoderUtils.decodeFromByteArray(defaultValueCoder, encodedDefaultValue);
+        } catch (IOException e) {
+          throw new RuntimeException("Unexpected IOException: ", e);
+        }
+      }
+
+      if (encodedDefaultValue != null && !contents.iterator().hasNext()) {
+        return defaultValue;
+      }
       try {
         return (T) Iterables.getOnlyElement(contents).getValue();
       } catch (NoSuchElementException exc) {
@@ -272,7 +320,8 @@ private static class IterablePCollectionView<T>
       extends PCollectionViewBase<Iterable<T>> {
     private static final long serialVersionUID = 0;
 
-    public IterablePCollectionView(Pipeline pipeline) {
+    public IterablePCollectionView(Pipeline pipeline, WindowFn<?, ?> windowFn) {
+      super(windowFn);
       setPipelineInternal(pipeline);
     }
 
@@ -292,7 +341,8 @@ private static class MultimapPCollectionView<K, V>
       extends PCollectionViewBase<Map<K, Iterable<V>>> {
     private static final long serialVersionUID = 0;
 
-    public MultimapPCollectionView(Pipeline pipeline) {
+    public MultimapPCollectionView(Pipeline pipeline, WindowFn<?, ?> windowFn) {
+      super(windowFn);
       setPipelineInternal(pipeline);
     }
 
@@ -313,7 +363,8 @@ private static class MapPCollectionView<K, V>
       extends PCollectionViewBase<Map<K, V>> {
     private static final long serialVersionUID = 0;
 
-    public MapPCollectionView(Pipeline pipeline) {
+    public MapPCollectionView(Pipeline pipeline, WindowFn<?, ?> windowFn) {
+      super(windowFn);
       setPipelineInternal(pipeline);
     }
 
@@ -336,11 +387,25 @@ private abstract static class PCollectionViewBase<T>
       implements PCollectionView<T> {
     private static final long serialVersionUID = 0;
 
+    PCollectionViewBase(WindowFn<?, ?> windowFn) {
+      if (windowFn instanceof InvalidWindows) {
+        throw new IllegalArgumentException("WindowFn of PCollectionView cannot be InvalidWindows");
+      }
+      this.windowFn = windowFn;
+    }
+
     @Override
     public TupleTag<Iterable<WindowedValue<?>>> getTagInternal() {
       return tag;
     }
 
+    @Override
+    public WindowFn getWindowFnInternal() {
+      return windowFn;
+    }
+
     private TupleTag<Iterable<WindowedValue<?>>> tag = new TupleTag<>();
+
+    private WindowFn<?, ?> windowFn;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
index 2ac12a933c29f..93db03ece0797 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
@@ -22,7 +22,7 @@
 import java.util.Collection;
 
 /**
- * Default {@link WindowFn} where all data is in the same bucket.
+ * Default {@link WindowFn} where all data is in the same window.
  */
 @SuppressWarnings("serial")
 public class GlobalWindows
@@ -41,4 +41,9 @@ public boolean isCompatible(WindowFn o) {
   public Coder<GlobalWindow> windowCoder() {
     return GlobalWindow.Coder.INSTANCE;
   }
+
+  @Override
+  public GlobalWindow getSideInputWindow(BoundedWindow window) {
+    return GlobalWindow.INSTANCE;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
index fdfa136ba5251..afeb4e83d73e6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
@@ -115,4 +115,46 @@ public abstract void merge(Collection<W> toBeMerged, W mergeResult)
    * by this windowFn.
    */
   public abstract Coder<W> windowCoder();
+
+  /**
+   * Returns the window of the side input corresponding to the given window of
+   * the main input. By default, this runs assignWindows over a non-existent
+   * element whose timestamp is the maxTimestamp() of the input window.
+   *
+   * <p> For example, if both the main and side inputs are windowed by
+   * {@link FixedWindows}, the side input corresponding to a particular main
+   * input element will be the one in the same window as that element.
+   *
+   * <p> Authors of custom {@code WindowFn}s should override this if that is not
+   * the desired behavior for side inputs with their {@code WindowFn}.
+   */
+  public W getSideInputWindow(final BoundedWindow window) {
+    if (window instanceof GlobalWindow) {
+      throw new IllegalArgumentException(
+          "Attempted to get side input window for GlobalWindow from non-global WindowFn");
+    }
+
+    try {
+      return assignWindows(new AssignContext() {
+          @Override
+          public T element() {
+            throw new UnsupportedOperationException(
+                "WindowFn attemped to access input element when none was available");
+          }
+
+          @Override
+          public Instant timestamp() {
+            return window.maxTimestamp();
+          }
+
+          @Override
+          public Collection<? extends BoundedWindow> windows() {
+            throw new UnsupportedOperationException(
+                "WindowFn attemped to access input windows when none were available");
+          }
+        }).iterator().next();
+    } catch (Exception e) {
+      throw new RuntimeException("Failed to get side input window: ", e);
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
index 947a78cdcaf38..9a3729e743d9c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn.AssignContext;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner.OutputManager;
@@ -29,6 +30,8 @@
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Predicate;
+import com.google.common.collect.Iterables;
 
 import org.joda.time.Instant;
 
@@ -51,7 +54,7 @@ class DoFnContext<I, O, R> extends DoFn<I, O>.Context {
   final PipelineOptions options;
   final DoFn<I, O> fn;
   final PTuple sideInputs;
-  final Map<TupleTag<?>, Object> sideInputCache;
+  final Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
   final OutputManager<R> outputManager;
   final Map<TupleTag<?>, R> outputMap;
   final TupleTag<O> mainOutputTag;
@@ -101,21 +104,44 @@ public PipelineOptions getPipelineOptions() {
     return options;
   }
 
-  @Override
   @SuppressWarnings("unchecked")
-  public <T> T sideInput(PCollectionView<T> view) {
+  <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
     TupleTag<?> tag = view.getTagInternal();
-    if (!sideInputCache.containsKey(tag)) {
+    Map<BoundedWindow, Object> tagCache = sideInputCache.get(tag);
+    if (tagCache == null) {
       if (!sideInputs.has(tag)) {
         throw new IllegalArgumentException(
             "calling sideInput() with unknown view; " +
             "did you forget to pass the view in " +
             "ParDo.withSideInputs()?");
       }
-      sideInputCache.put(
-          tag, view.fromIterableInternal((Iterable<WindowedValue<?>>) sideInputs.get(tag)));
+      tagCache = new HashMap<>();
+      sideInputCache.put(tag, tagCache);
+    }
+
+    final BoundedWindow sideInputWindow =
+        view.getWindowFnInternal().getSideInputWindow(mainInputWindow);
+
+    T result = (T) tagCache.get(sideInputWindow);
+
+    // TODO: Consider partial prefetching like in CoGBK to reduce iteration cost.
+    if (result == null) {
+      if (windowFn instanceof GlobalWindows) {
+        result = view.fromIterableInternal((Iterable<WindowedValue<?>>) sideInputs.get(tag));
+      } else {
+        result = view.fromIterableInternal(Iterables.filter(
+            (Iterable<WindowedValue<?>>) sideInputs.get(tag),
+            new Predicate<WindowedValue<?>>() {
+              @Override
+              public boolean apply(WindowedValue<?> element) {
+                return element.getWindows().contains(sideInputWindow);
+              }
+            }));
+      }
+      tagCache.put(sideInputWindow, result);
     }
-    return (T) sideInputCache.get(tag);
+
+    return result;
   }
 
   <T> WindowedValue<T> makeWindowedValue(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
index 0f4e37dc5be0d..e1b988e75ed0b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
@@ -25,6 +25,8 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresKeyedState;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -32,6 +34,7 @@
 import org.joda.time.Instant;
 
 import java.util.Collection;
+import java.util.Iterator;
 
 /**
  * A concrete implementation of {@link DoFn<I, O>.ProcessContext} used for running
@@ -65,6 +68,29 @@ public I element() {
     return windowedValue.getValue();
   }
 
+  @Override
+  public <T> T sideInput(PCollectionView<T> view) {
+    Iterator<? extends BoundedWindow> windowIter = windows().iterator();
+    BoundedWindow window;
+    if (!windowIter.hasNext()) {
+      if (context.windowFn instanceof GlobalWindows) {
+        // TODO: Remove this once GroupByKeyOnly no longer outputs elements
+        // without windows
+        window = GlobalWindow.INSTANCE;
+      } else {
+        throw new IllegalStateException(
+            "sideInput called when main input element is not in any windows");
+      }
+    } else {
+      window = windowIter.next();
+      if (windowIter.hasNext()) {
+        throw new IllegalStateException(
+            "sideInput called when main input element is in multiple windows");
+      }
+    }
+    return context.sideInput(view, window);
+  }
+
   @Override
   public KeyedState keyedState() {
     if (!(fn instanceof RequiresKeyedState)
@@ -76,11 +102,6 @@ public KeyedState keyedState() {
     return context.stepContext;
   }
 
-  @Override
-  public <T> T sideInput(PCollectionView<T> view) {
-    return context.sideInput(view);
-  }
-
   @Override
   public void output(O output) {
     context.outputWindowedValue(output, windowedValue.getTimestamp(), windowedValue.getWindows());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
index 43f99f0b3ac9b..d897d3fa24fea 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.values;
 
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 
 import java.io.Serializable;
@@ -43,4 +44,9 @@ public interface PCollectionView<T> extends PValue, Serializable {
    * For internal use only.
    */
   public T fromIterableInternal(Iterable<WindowedValue<?>> contents);
+
+  /**
+   * For internal use only.
+   */
+  public WindowFn getWindowFnInternal();
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 0a3dcdce719e6..603ab2cab956e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -45,6 +45,7 @@
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Sets;
 
@@ -204,7 +205,7 @@ public void testFixedWindowsCombine() {
 
     PCollection<Integer> sum = input
         .apply(Values.<Integer>create())
-        .apply(Combine.globally(new SumInts()));
+        .apply(Combine.globally(new SumInts()).withoutDefaults());
 
     PCollection<KV<String, Integer>> sumPerKey = input
         .apply(Combine.<String, Integer>perKey(new SumInts()));
@@ -231,7 +232,7 @@ public void testSessionsCombine() {
 
     PCollection<Integer> sum = input
         .apply(Values.<Integer>create())
-        .apply(Combine.globally(new SumInts()));
+        .apply(Combine.globally(new SumInts()).withoutDefaults());
 
     PCollection<KV<String, Integer>> sumPerKey = input
         .apply(Combine.<String, Integer>perKey(new SumInts()));
@@ -252,7 +253,7 @@ public void testWindowedCombineEmpty() {
     PCollection<Double> mean = p
         .apply(Create.<Integer>of()).setCoder(BigEndianIntegerCoder.of())
         .apply(Window.<Integer>into(FixedWindows.of(Duration.millis(1))))
-        .apply(Combine.globally(new MeanInts()));
+        .apply(Combine.globally(new MeanInts()).withoutDefaults());
 
     DataflowAssert.that(mean).containsInAnyOrder();
 
@@ -387,6 +388,27 @@ public Integer apply(Integer left, Integer right) {
     }
   }
 
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testCombineGloballyAsSingletonView() {
+    Pipeline p = TestPipeline.create();
+    final PCollectionView<Integer> view = p
+        .apply(Create.<Integer>of())
+        .setCoder(BigEndianIntegerCoder.of())
+        .apply(Sum.integersGlobally().asSingletonView());
+
+    PCollection<Integer> output = p
+        .apply(Create.of((Void) null))
+        .apply(ParDo.of(new DoFn<Void, Integer>() {
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    c.output(c.sideInput(view));
+                  }
+                }));
+
+    DataflowAssert.thatSingleton(output).isEqualTo(0);
+  }
+
   ////////////////////////////////////////////////////////////////////////////
   // Test classes, for different kinds of combining fns.
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
index 65fe984c03bc5..43b85bf491443 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
@@ -33,6 +34,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
 
 import org.joda.time.Duration;
 import org.junit.Assert;
@@ -101,8 +103,9 @@ public void testFlattenPCollectionListOrdered() {
     p.run();
   }
 
+  // TODO: re-enable running this test on the service once empty flattens
+  // followed by ParDos work properly.
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testFlattenPCollectionListEmpty() {
     Pipeline p = TestPipeline.create();
 
@@ -114,11 +117,39 @@ public void testFlattenPCollectionListEmpty() {
     p.run();
   }
 
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testEmptyFlattenAsSideInput() {
+    Pipeline p = TestPipeline.create();
+
+    final PCollectionView<Iterable<String>> view =
+        PCollectionList.<String>empty(p)
+        .apply(Flatten.<String>pCollections()).setCoder(StringUtf8Coder.of())
+        .apply(View.<String>asIterable());
+
+    PCollection<String> output = p
+        .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
+        .apply(ParDo.withSideInputs(view).of(new DoFn<Void, String>() {
+                  private static final long serialVersionUID = 0;
+
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    for (String side : c.sideInput(view)) {
+                      c.output(side);
+                    }
+                  }
+                }));
+
+    DataflowAssert.that(output).containsInAnyOrder();
+    p.run();
+  }
+
   @Test
   // TODO: Enable this test to run on the Dataflow service when it is
   // correctly implemented.
   // @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testFlattenPCollectionListEmptyThenParDo() {
+
     Pipeline p = TestPipeline.create();
 
     PCollection<String> output =
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 95ddc589ca193..9e2a16804ebbb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -113,7 +113,7 @@ public void processElement(ProcessContext c) {
       assertThat(state,
                  anyOf(equalTo(State.STARTED), equalTo(State.PROCESSING)));
       state = State.PROCESSING;
-      outputToAll(c, "processing: " + c.element());
+      outputToAllWithSideInputs(c, "processing: " + c.element());
     }
 
     @Override
@@ -125,6 +125,14 @@ public void finishBundle(Context c) {
     }
 
     private void outputToAll(Context c, String value) {
+      c.output(value);
+      for (TupleTag<String> sideOutputTupleTag : sideOutputTupleTags) {
+        c.sideOutput(sideOutputTupleTag,
+                     sideOutputTupleTag.getId() + ": " + value);
+      }
+    }
+
+    private void outputToAllWithSideInputs(ProcessContext c, String value) {
       if (!sideInputViews.isEmpty()) {
         List<Integer> sideInputValues = new ArrayList<>();
         for (PCollectionView<Integer> sideInputView : sideInputViews) {
@@ -229,12 +237,10 @@ public Void apply(Iterable<String> outputs) {
         assertEquals(starteds.size(), finisheds.size());
         assertTrue(starteds.size() > 0);
         for (String started : starteds) {
-          assertEquals(sideOutputPrefix + "started" + sideInputsSuffix,
-                       started);
+          assertEquals(sideOutputPrefix + "started", started);
         }
         for (String finished : finisheds) {
-          assertEquals(sideOutputPrefix + "finished" + sideInputsSuffix,
-                       finished);
+          assertEquals(sideOutputPrefix + "finished", finished);
         }
 
         return null;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index fd482beed4738..6e3c3610e5118 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -20,11 +20,19 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
 
 import org.junit.Rule;
 import org.junit.Test;
@@ -214,6 +222,7 @@ public void processElement(ProcessContext c) {
   }
 
   @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testCombinedMapSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
@@ -237,4 +246,131 @@ public void processElement(ProcessContext c) {
 
     pipeline.run();
   }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testWindowedSideInputFixedToFixed() {
+    Pipeline p = TestPipeline.create();
+
+    final PCollectionView<Integer> view = p
+        .apply(Create.timestamped(
+            TimestampedValue.of(1, new Instant(1)),
+            TimestampedValue.of(2, new Instant(11)),
+            TimestampedValue.of(3, new Instant(13))))
+        .apply(Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
+        .apply(Sum.integersGlobally().withoutDefaults())
+        .apply(View.<Integer>asSingleton());
+
+    PCollection<String> output = p
+        .apply(Create.timestamped(
+            TimestampedValue.of("A", new Instant(4)),
+            TimestampedValue.of("B", new Instant(15)),
+            TimestampedValue.of("C", new Instant(7))))
+        .apply(Window.<String>into(FixedWindows.of(Duration.millis(10))))
+        .apply(ParDo.withSideInputs(view).of(
+            new DoFn<String, String>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                c.output(c.element() + c.sideInput(view));
+              }
+            }));
+
+    DataflowAssert.that(output).containsInAnyOrder("A1", "B5", "C1");
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testWindowedSideInputFixedToGlobal() {
+    Pipeline p = TestPipeline.create();
+
+    final PCollectionView<Integer> view = p
+        .apply(Create.timestamped(
+            TimestampedValue.of(1, new Instant(1)),
+            TimestampedValue.of(2, new Instant(11)),
+            TimestampedValue.of(3, new Instant(13))))
+        .apply(Window.<Integer>into(new GlobalWindows()))
+        .apply(Sum.integersGlobally())
+        .apply(View.<Integer>asSingleton());
+
+    PCollection<String> output = p
+        .apply(Create.timestamped(
+            TimestampedValue.of("A", new Instant(4)),
+            TimestampedValue.of("B", new Instant(15)),
+            TimestampedValue.of("C", new Instant(7))))
+        .apply(Window.<String>into(FixedWindows.of(Duration.millis(10))))
+        .apply(ParDo.withSideInputs(view).of(
+            new DoFn<String, String>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                c.output(c.element() + c.sideInput(view));
+              }
+            }));
+
+    DataflowAssert.that(output).containsInAnyOrder("A6", "B6", "C6");
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testWindowedSideInputFixedToFixedWithDefault() {
+    Pipeline p = TestPipeline.create();
+
+    final PCollectionView<Integer> view = p
+        .apply(Create.timestamped(
+            TimestampedValue.of(2, new Instant(11)),
+            TimestampedValue.of(3, new Instant(13))))
+        .apply(Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
+        .apply(Sum.integersGlobally().asSingletonView());
+
+    PCollection<String> output = p
+        .apply(Create.timestamped(
+            TimestampedValue.of("A", new Instant(4)),
+            TimestampedValue.of("B", new Instant(15)),
+            TimestampedValue.of("C", new Instant(7))))
+        .apply(Window.<String>into(FixedWindows.of(Duration.millis(10))))
+        .apply(ParDo.withSideInputs(view).of(
+            new DoFn<String, String>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                c.output(c.element() + c.sideInput(view));
+              }
+            }));
+
+    DataflowAssert.that(output).containsInAnyOrder("A0", "B5", "C0");
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testSideInputWithNullDefault() {
+    Pipeline p = TestPipeline.create();
+
+    final PCollectionView<Void> view = p
+        .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
+        .apply(Combine.globally(new SerializableFunction<Iterable<Void>, Void>() {
+                  @Override
+                  public Void apply(Iterable<Void> input) {
+                    return (Void) null;
+                  }
+                }).asSingletonView());
+
+    PCollection<String> output = p
+        .apply(Create.of(""))
+        .apply(ParDo.withSideInputs(view).of(
+            new DoFn<String, String>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                c.output(c.element() + c.sideInput(view));
+              }
+            }));
+
+    DataflowAssert.that(output).containsInAnyOrder("null");
+
+    p.run();
+  }
+
 }

From b426cbde1403a479d1628d943a6626c41bedfd16 Mon Sep 17 00:00:00 2001
From: boyuan <boyuan@google.com>
Date: Mon, 23 Mar 2015 18:13:43 -0700
Subject: [PATCH 0302/1541] Modify the Dataflow Java SDK to support the
 combining function in PartialGroupByKey.

* Implements PGBKOp.Combiner via Combine.KeyedCombineFn.

* Modify createPartialGroupByKeyOperation to pass combiner to PartialGroupByKeyOperation when the PartialGroupByKeyInstruction contains value combining function.

* Add new constructors of PartialGroupByKeyOperation to initiate the PartialGroupByKeyOperation object with CombiningGroupingTable.

* Set SDK major version to "2" to turn on in-place combining in PGBK combiner.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89357867
---
 .../sdk/runners/DataflowPipelineRunner.java   |  2 +-
 .../worker/MapTaskExecutorFactory.java        | 63 ++++++++++++--
 .../worker/PartialGroupByKeyOperation.java    | 58 ++++++++++---
 .../PartialGroupByKeyOperationTest.java       | 83 +++++++++++++++++++
 4 files changed, 188 insertions(+), 18 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 4283614c5e5bc..67d5df0973d0f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -80,7 +80,7 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
   private DataflowPipelineRunnerHooks hooks;
 
   // Environment version information
-  private static final String ENVIRONMENT_MAJOR_VERSION = "1";
+  private static final String ENVIRONMENT_MAJOR_VERSION = "2";
 
   /**
    * Construct a runner from the provided options.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index acbe0f81e9045..dae317a19ce7c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.util.Structs.getBytes;
+
 import com.google.api.services.dataflow.model.FlattenInstruction;
 import com.google.api.services.dataflow.model.InstructionInput;
 import com.google.api.services.dataflow.model.InstructionOutput;
@@ -28,9 +30,12 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.Serializer;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
@@ -188,18 +193,64 @@ static PartialGroupByKeyOperation createPartialGroupByKeyOperation(PipelineOptio
     OutputReceiver[] receivers =
         createOutputReceivers(instruction, counterPrefix, addCounterMutator, stateSampler, 1);
 
-    PartialGroupByKeyOperation operation =
-        new PartialGroupByKeyOperation(instruction.getSystemName(),
-            new WindowingCoderGroupingKeyCreator(keyCoder),
-            new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)),
-            new CoderSizeEstimator(valueCoder), 0.001/*sizeEstimatorSampleRate*/, PairInfo.create(),
-            receivers, counterPrefix, addCounterMutator, stateSampler);
+    PartialGroupByKeyOperation.Combiner valueCombiner = createValueCombiner(pgbk);
+
+    PartialGroupByKeyOperation operation = new PartialGroupByKeyOperation(
+        instruction.getSystemName(),
+        new WindowingCoderGroupingKeyCreator(keyCoder),
+        new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)),
+        new CoderSizeEstimator(valueCoder), 0.001 /*sizeEstimatorSampleRate*/, valueCombiner,
+        PairInfo.create(), receivers, counterPrefix, addCounterMutator, stateSampler);
 
     attachInput(operation, pgbk.getInput(), priorOperations);
 
     return operation;
   }
 
+  static ValueCombiner createValueCombiner(PartialGroupByKeyInstruction pgbk) throws Exception {
+    if (pgbk.getValueCombiningFn() == null) {
+      return null;
+    }
+
+    Object deserializedFn = SerializableUtils.deserializeFromByteArray(
+        getBytes(CloudObject.fromSpec(pgbk.getValueCombiningFn()), PropertyNames.SERIALIZED_FN),
+        "serialized combine fn");
+    return new ValueCombiner((Combine.KeyedCombineFn) deserializedFn);
+  }
+
+  /**
+   * Implements PGBKOp.Combiner via Combine.KeyedCombineFn.
+   */
+  public static class ValueCombiner<K, VI, VA, VO>
+      implements PartialGroupByKeyOperation.Combiner<K, VI, VA, VO> {
+    private final Combine.KeyedCombineFn<K, VI, VA, VO> combineFn;
+
+    private ValueCombiner(Combine.KeyedCombineFn<K, VI, VA, VO> combineFn) {
+      this.combineFn = combineFn;
+    }
+
+    @Override
+    public VA createAccumulator(K key) {
+      return this.combineFn.createAccumulator(key);
+    }
+
+    @Override
+    public VA add(K key, VA accumulator, VI value) {
+      this.combineFn.addInput(key, accumulator, value);
+      return accumulator;
+    }
+
+    @Override
+    public VA merge(K key, Iterable<VA> accumulators) {
+      return this.combineFn.mergeAccumulators(key, accumulators);
+    }
+
+    @Override
+    public VO extract(K key, VA accumulator) {
+      return this.combineFn.extractOutput(key, accumulator);
+    }
+  }
+
   /**
    * Implements PGBKOp.PairInfo via KVs.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
index dc8c00ffe651c..5cc41f7e0633c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
@@ -133,43 +133,79 @@ public PartialGroupByKeyOperation(
       String counterPrefix,
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler) {
+    this(operationName, groupingKeyCreator, keySizeEstimator, valueSizeEstimator, null, pairInfo,
+        receivers, counterPrefix, addCounterMutator, stateSampler);
+  }
+
+  @SuppressWarnings({"rawtypes", "unchecked"})
+  public PartialGroupByKeyOperation(
+      String operationName,
+      GroupingKeyCreator<?> groupingKeyCreator,
+      SizeEstimator<?> keySizeEstimator, SizeEstimator<?> valueSizeEstimator,
+      Combiner combineFn,
+      PairInfo pairInfo,
+      OutputReceiver[] receivers,
+      String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler) {
     super(operationName, receivers, counterPrefix, addCounterMutator, stateSampler);
-    groupingTable = new BufferingGroupingTable(
-        DEFAULT_MAX_GROUPING_TABLE_BYTES, groupingKeyCreator,
-        pairInfo, keySizeEstimator, valueSizeEstimator);
+    if (combineFn == null) {
+      groupingTable = new BufferingGroupingTable(DEFAULT_MAX_GROUPING_TABLE_BYTES,
+          groupingKeyCreator, pairInfo, keySizeEstimator, valueSizeEstimator);
+    } else {
+      groupingTable = new CombiningGroupingTable(DEFAULT_MAX_GROUPING_TABLE_BYTES,
+          groupingKeyCreator, pairInfo, combineFn, keySizeEstimator, valueSizeEstimator);
+    }
   }
 
-  @SuppressWarnings("unchecked")
+  @SuppressWarnings({"rawtypes", "unchecked"})
   public PartialGroupByKeyOperation(
       String operationName,
       GroupingKeyCreator<?> groupingKeyCreator,
       SizeEstimator<?> keySizeEstimator, SizeEstimator<?> valueSizeEstimator,
       double sizeEstimatorSampleRate,
+      Combiner combineFn,
       PairInfo pairInfo,
       OutputReceiver[] receivers,
       String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) {
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler) {
     this(operationName, groupingKeyCreator,
         new SamplingSizeEstimator(keySizeEstimator, sizeEstimatorSampleRate, 1.0),
-        new SamplingSizeEstimator(valueSizeEstimator, sizeEstimatorSampleRate, 1.0),
+        new SamplingSizeEstimator(valueSizeEstimator, sizeEstimatorSampleRate, 1.0), combineFn,
         pairInfo, receivers, counterPrefix, addCounterMutator, stateSampler);
   }
 
   /** Invoked by tests. */
+  public PartialGroupByKeyOperation(GroupingKeyCreator<?> groupingKeyCreator,
+      SizeEstimator<?> keySizeEstimator, SizeEstimator<?> valueSizeEstimator, PairInfo pairInfo,
+      OutputReceiver outputReceiver, String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) {
+    this(groupingKeyCreator,
+        keySizeEstimator, valueSizeEstimator, null, pairInfo,
+        outputReceiver,
+        counterPrefix,
+        addCounterMutator,
+        stateSampler);
+  }
+
+  /** Invoked by tests. */
+  @SuppressWarnings({"rawtypes"})
   public PartialGroupByKeyOperation(
       GroupingKeyCreator<?> groupingKeyCreator,
       SizeEstimator<?> keySizeEstimator, SizeEstimator<?> valueSizeEstimator,
+      Combiner combineFn,
       PairInfo pairInfo,
       OutputReceiver outputReceiver,
       String counterPrefix,
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler) {
     this("PartialGroupByKeyOperation", groupingKeyCreator,
-         keySizeEstimator, valueSizeEstimator, pairInfo,
-         new OutputReceiver[]{ outputReceiver },
-         counterPrefix,
-         addCounterMutator,
-         stateSampler);
+        keySizeEstimator, valueSizeEstimator, combineFn, pairInfo,
+        new OutputReceiver[]{ outputReceiver },
+        counterPrefix,
+        addCounterMutator,
+        stateSampler);
   }
 
   @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
index 07efddfc59628..394c6785d5f1a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
@@ -128,6 +128,89 @@ public void testRunPartialGroupByKeyOperation() throws Exception {
         counterSet);
   }
 
+  @Test
+  public void testRunPartialGroupByKeyOperationWithCombiner() throws Exception {
+    Coder keyCoder = StringUtf8Coder.of();
+    Coder valueCoder = BigEndianIntegerCoder.of();
+
+    CounterSet counterSet = new CounterSet();
+    String counterPrefix = "test-";
+    StateSampler stateSampler = new StateSampler(
+        counterPrefix, counterSet.getAddCounterMutator());
+    TestReceiver receiver =
+        new TestReceiver(new ElementByteSizeObservableCoder(
+                             WindowedValue.getValueOnlyCoder(KvCoder.of(keyCoder, valueCoder))),
+            counterSet, counterPrefix);
+
+    Combiner<WindowedValue<String>, Integer, Integer, Integer> combineFn =
+        new Combiner<WindowedValue<String>, Integer, Integer, Integer>() {
+          public Integer createAccumulator(WindowedValue<String> key) {
+            return 0;
+          }
+          public Integer add(WindowedValue<String> key, Integer accumulator, Integer value) {
+            return accumulator + value;
+          }
+          public Integer merge(WindowedValue<String> key, Iterable<Integer> accumulators) {
+            Integer sum = 0;
+            for (Integer part : accumulators) {
+              sum += part;
+            }
+            return sum;
+          }
+          public Integer extract(WindowedValue<String> key, Integer accumulator) {
+            return accumulator;
+          }
+        };
+
+    PartialGroupByKeyOperation pgbkOperation =
+        new PartialGroupByKeyOperation(
+            new WindowingCoderGroupingKeyCreator(keyCoder),
+            new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)),
+            new CoderSizeEstimator(valueCoder),
+            combineFn,
+            PairInfo.create(),
+            receiver,
+            counterPrefix,
+            counterSet.getAddCounterMutator(),
+            stateSampler);
+
+    pgbkOperation.start();
+
+    pgbkOperation.process(WindowedValue.valueInGlobalWindow(KV.of("hi", 4)));
+    pgbkOperation.process(WindowedValue.valueInGlobalWindow(KV.of("there", 5)));
+    pgbkOperation.process(WindowedValue.valueInGlobalWindow(KV.of("hi", 6)));
+    pgbkOperation.process(WindowedValue.valueInGlobalWindow(KV.of("joe", 7)));
+    pgbkOperation.process(WindowedValue.valueInGlobalWindow(KV.of("there", 8)));
+    pgbkOperation.process(WindowedValue.valueInGlobalWindow(KV.of("hi", 9)));
+
+    pgbkOperation.finish();
+
+    assertThat(receiver.outputElems,
+               IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(
+                   WindowedValue.valueInGlobalWindow(KV.of("hi", 19)),
+                   WindowedValue.valueInGlobalWindow(KV.of("there", 13)),
+                   WindowedValue.valueInGlobalWindow(KV.of("joe", 7))));
+
+    // Exact counter values depend on size of encoded data.  If encoding
+    // changes, then these expected counters should change to match.
+    assertEquals(
+        new CounterSet(
+            Counter.longs("test-PartialGroupByKeyOperation-start-msecs", SUM)
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                    "test-PartialGroupByKeyOperation-start-msecs")).getAggregate(false)),
+            Counter.longs("test-PartialGroupByKeyOperation-process-msecs", SUM)
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                    "test-PartialGroupByKeyOperation-process-msecs")).getAggregate(false)),
+            Counter.longs("test-PartialGroupByKeyOperation-finish-msecs", SUM)
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                    "test-PartialGroupByKeyOperation-finish-msecs")).getAggregate(false)),
+            Counter.longs("test_receiver_out-ElementCount", SUM)
+                .resetToValue(3L),
+            Counter.longs("test_receiver_out-MeanByteCount", MEAN)
+                .resetToValue(3, 25L)),
+        counterSet);
+  }
+
   // TODO: Add tests about early flushing when the table fills.
 
   ////////////////////////////////////////////////////////////////////////////

From 7cbc12af564b23a3632ebf1d53fedfc9241d3bdd Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Mon, 23 Mar 2015 18:27:13 -0700
Subject: [PATCH 0303/1541] Renames fork => dynamic split in the Dataflow Java
 SDK. ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=89358962

---
 .../BasicSerializableSourceFormat.java        |  2 +-
 .../sdk/runners/worker/BigQueryReader.java    |  4 +-
 .../worker/DataflowWorkProgressUpdater.java   | 11 +--
 .../sdk/runners/worker/DataflowWorker.java    | 12 +--
 .../sdk/runners/worker/FileBasedReader.java   | 44 ++++++-----
 .../runners/worker/GroupingShuffleReader.java | 39 +++++-----
 .../sdk/runners/worker/InMemoryReader.java    | 43 ++++++-----
 .../worker/SourceTranslationUtils.java        | 16 ++--
 .../util/common/worker/MapTaskExecutor.java   |  5 +-
 .../sdk/util/common/worker/ReadOperation.java |  9 ++-
 .../sdk/util/common/worker/Reader.java        | 47 ++++++------
 .../sdk/util/common/worker/WorkExecutor.java  |  8 +-
 .../common/worker/WorkProgressUpdater.java    | 20 ++---
 .../DataflowWorkProgressUpdaterTest.java      | 58 +++++++-------
 .../worker/GroupingShuffleReaderTest.java     | 76 ++++++++++---------
 .../runners/worker/InMemoryReaderTest.java    | 27 +++----
 .../sdk/runners/worker/ReaderTestUtils.java   | 21 ++---
 .../sdk/runners/worker/TextReaderTest.java    | 14 ++--
 .../common/worker/MapTaskExecutorTest.java    | 15 ++--
 .../util/common/worker/ReadOperationTest.java | 46 ++++++-----
 20 files changed, 272 insertions(+), 245 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index 6b89286016045..e35004de4b728 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -308,7 +308,7 @@ public Reader.Progress getProgress() {
     }
 
     @Override
-    public Reader.ForkResult requestFork(Reader.ForkRequest request) {
+    public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest request) {
       return null;
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
index 149c33a331e06..4e073ed73a214 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
@@ -96,8 +96,8 @@ public Progress getProgress() {
     }
 
     @Override
-    public ForkResult requestFork(ForkRequest forkRequest) {
-      // For now fork is not supported because this source
+    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
+      // For now dynamic splitting is not supported because this source
       // is used only when an entire table needs to be read by each worker (used
       // as a side input for instance).
       throw new UnsupportedOperationException();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
index 3eefb1bce2a40..c5f802dce1053 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
@@ -18,7 +18,7 @@
 
 import static com.google.cloud.dataflow.sdk.runners.worker.DataflowWorker.buildStatus;
 import static com.google.cloud.dataflow.sdk.runners.worker.DataflowWorker.uniqueId;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toForkRequest;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toDynamicSplitRequest;
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudDuration;
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudTime;
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudDuration;
@@ -79,7 +79,7 @@ protected long getWorkUnitLeaseExpirationTimestamp() {
   @Override
   protected void reportProgressHelper() throws Exception {
     WorkItemStatus status = buildStatus(workItem, false/*completed*/, worker.getOutputCounters(),
-        worker.getOutputMetrics(), options, worker.getWorkerProgress(), forkResultToReport,
+        worker.getOutputMetrics(), options, worker.getWorkerProgress(), dynamicSplitResultToReport,
         null/*sourceOperationResponse*/, null/*errors*/,
         getNextReportIndex());
     status.setRequestedLeaseDuration(toCloudDuration(Duration.millis(requestedLeaseDurationMs)));
@@ -87,7 +87,7 @@ protected void reportProgressHelper() throws Exception {
     WorkItemServiceState result = workUnitClient.reportWorkItemStatus(status);
     if (result != null) {
       // Resets state after a successful progress report.
-      forkResultToReport = null;
+      dynamicSplitResultToReport = null;
       nextReportIndex++;
 
       progressReportIntervalMs = nextProgressReportInterval(
@@ -96,8 +96,9 @@ protected void reportProgressHelper() throws Exception {
 
       ApproximateProgress suggestedStopPoint = result.getSuggestedStopPoint();
       if (suggestedStopPoint != null) {
-        LOG.info("Proposing fork of work unit {} at {}", workString(), suggestedStopPoint);
-        forkResultToReport = worker.requestFork(toForkRequest(suggestedStopPoint));
+        LOG.info("Proposing dynamic split of work unit {} at {}", workString(), suggestedStopPoint);
+        dynamicSplitResultToReport = worker.requestDynamicSplit(
+            toDynamicSplitRequest(suggestedStopPoint));
       }
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index a17755c8e7762..9d589b58f53d4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -211,7 +211,7 @@ private void reportStatus(DataflowWorkerHarnessOptions options, String status, W
   static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
       @Nullable CounterSet counters, @Nullable Collection<Metric<?>> metrics,
       DataflowWorkerHarnessOptions options, @Nullable Reader.Progress progress,
-      @Nullable Reader.ForkResult forkResult,
+      @Nullable Reader.DynamicSplitResult dynamicSplitResult,
       @Nullable SourceFormat.OperationResponse operationResponse, @Nullable List<Status> errors,
       long finalReportIndex) {
     WorkItemStatus status = new WorkItemStatus();
@@ -256,11 +256,13 @@ static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
     if (progress != null) {
       status.setProgress(readerProgressToCloudProgress(progress));
     }
-    if (forkResult instanceof Reader.ForkResultWithPosition) {
-      Reader.ForkResultWithPosition asPosition = (Reader.ForkResultWithPosition) forkResult;
+    if (dynamicSplitResult instanceof Reader.DynamicSplitResultWithPosition) {
+      Reader.DynamicSplitResultWithPosition asPosition =
+          (Reader.DynamicSplitResultWithPosition) dynamicSplitResult;
       status.setStopPosition(toCloudPosition(asPosition.getAcceptedPosition()));
-    } else if (forkResult != null) {
-      throw new IllegalArgumentException("Unexpected type of fork result: " + forkResult);
+    } else if (dynamicSplitResult != null) {
+      throw new IllegalArgumentException(
+          "Unexpected type of dynamic split result: " + dynamicSplitResult);
     }
 
     if (workItem.getSourceOperationTask() != null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
index b5f1e46bf3edf..8e38c15240aba 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
@@ -19,7 +19,7 @@
 import static com.google.api.client.util.Preconditions.checkNotNull;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.forkRequestToApproximateProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -196,38 +196,40 @@ public Progress getProgress() {
     }
 
     @Override
-    public ForkResult requestFork(ForkRequest forkRequest) {
-      checkNotNull(forkRequest);
-
-      // Currently, file-based Reader only supports fork at a byte offset.
-      ApproximateProgress forkProgress = forkRequestToApproximateProgress(forkRequest);
-      com.google.api.services.dataflow.model.Position forkPosition = forkProgress.getPosition();
-      if (forkPosition == null) {
-        LOG.warn("FileBasedReader only supports fork at a Position. Requested: {}", forkRequest);
+    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
+      checkNotNull(splitRequest);
+
+      // Currently, file-based Reader only supports split at a byte offset.
+      ApproximateProgress splitProgress = splitRequestToApproximateProgress(splitRequest);
+      com.google.api.services.dataflow.model.Position splitPosition = splitProgress.getPosition();
+      if (splitPosition == null) {
+        LOG.warn("FileBasedReader only supports split at a Position. Requested: {}",
+            splitRequest);
         return null;
       }
-      Long forkOffset = forkPosition.getByteOffset();
-      if (forkOffset == null) {
-        LOG.warn("FileBasedReader only supports fork at byte offset. Requested: {}", forkPosition);
+      Long splitOffset = splitPosition.getByteOffset();
+      if (splitOffset == null) {
+        LOG.warn("FileBasedReader only supports split at byte offset. Requested: {}",
+            splitPosition);
         return null;
       }
-      if (forkOffset <= offset) {
-        LOG.info("Already progressed to offset {} which is after the requested fork offset {}",
-            offset, forkOffset);
+      if (splitOffset <= offset) {
+        LOG.info("Already progressed to offset {} which is after the requested split offset {}",
+            offset, splitOffset);
         return null;
       }
 
-      if (endOffset != null && forkOffset >= endOffset) {
+      if (endOffset != null && splitOffset >= endOffset) {
         LOG.info(
-            "Fork requested at an offset beyond the end of the current range: {} >= {}",
-            forkOffset, endOffset);
+            "Split requested at an offset beyond the end of the current range: {} >= {}",
+            splitOffset, endOffset);
         return null;
       }
 
-      this.endOffset = forkOffset;
-      LOG.info("Forked FileBasedReader at offset {}", forkOffset);
+      this.endOffset = splitOffset;
+      LOG.info("Split FileBasedReader at offset {}", splitOffset);
 
-      return new ForkResultWithPosition(cloudPositionToReaderPosition(forkPosition));
+      return new DynamicSplitResultWithPosition(cloudPositionToReaderPosition(splitPosition));
     }
 
     /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index 9e39250007b13..b3a07fe18b06a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -19,7 +19,7 @@
 import static com.google.api.client.util.Preconditions.checkNotNull;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.forkRequestToApproximateProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
 
 import com.google.api.client.util.Preconditions;
 import com.google.api.services.dataflow.model.ApproximateProgress;
@@ -225,42 +225,43 @@ public Progress getProgress() {
      * {@code KV<K, Reiterable<V>>} to be returned by the {@link GroupingShuffleReaderIterator}.
      */
     @Override
-    public ForkResult requestFork(ForkRequest forkRequest) {
-      checkNotNull(forkRequest);
-      ApproximateProgress forkProgress = forkRequestToApproximateProgress(forkRequest);
-      com.google.api.services.dataflow.model.Position forkPosition = forkProgress.getPosition();
-      if (forkPosition == null) {
-        LOG.warn("GroupingShuffleReader only supports fork at a Position. Requested: {}",
-            forkRequest);
+    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
+      checkNotNull(splitRequest);
+      ApproximateProgress splitProgress = splitRequestToApproximateProgress(
+          splitRequest);
+      com.google.api.services.dataflow.model.Position splitPosition = splitProgress.getPosition();
+      if (splitPosition == null) {
+        LOG.warn("GroupingShuffleReader only supports split at a Position. Requested: {}",
+            splitRequest);
         return null;
       }
-      String forkShufflePosition = forkPosition.getShufflePosition();
-      if (forkShufflePosition == null) {
-        LOG.warn("GroupingShuffleReader only supports fork at a shuffle position. Requested: {}",
-            forkPosition);
+      String splitShufflePosition = splitPosition.getShufflePosition();
+      if (splitShufflePosition == null) {
+        LOG.warn("GroupingShuffleReader only supports split at a shuffle position. Requested: {}",
+            splitPosition);
         return null;
       }
       ByteArrayShufflePosition newStopPosition =
-          ByteArrayShufflePosition.fromBase64(forkShufflePosition);
+          ByteArrayShufflePosition.fromBase64(splitShufflePosition);
       if (newStopPosition.compareTo(promisedPosition) <= 0) {
         LOG.info("Already progressed to promised shuffle position {} "
-            + "which is after the requested fork shuffle position {}",
-            promisedPosition.encodeBase64(), forkShufflePosition);
+            + "which is after the requested split shuffle position {}",
+            promisedPosition.encodeBase64(), splitShufflePosition);
         return null;
       }
 
       if (this.stopPosition != null && newStopPosition.compareTo(this.stopPosition) >= 0) {
         LOG.info(
-            "Fork requested at a shuffle position beyond the end of the current range: "
+            "Split requested at a shuffle position beyond the end of the current range: "
             + "{} >= current stop position: {}",
-            forkShufflePosition, this.stopPosition.encodeBase64());
+            splitShufflePosition, this.stopPosition.encodeBase64());
         return null;
       }
 
       this.stopPosition = newStopPosition;
-      LOG.info("Forked GroupingShuffleReader at {}", forkShufflePosition);
+      LOG.info("Split GroupingShuffleReader at {}", splitShufflePosition);
 
-      return new ForkResultWithPosition(cloudPositionToReaderPosition(forkPosition));
+      return new DynamicSplitResultWithPosition(cloudPositionToReaderPosition(splitPosition));
     }
 
     /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
index da6b209940a33..8d48230f4b45f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
@@ -19,7 +19,7 @@
 import static com.google.api.client.util.Preconditions.checkNotNull;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.forkRequestToApproximateProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
 import static java.lang.Math.min;
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
@@ -124,38 +124,39 @@ public Progress getProgress() {
     }
 
     @Override
-    public ForkResult requestFork(ForkRequest forkRequest) {
-      checkNotNull(forkRequest);
-
-      com.google.api.services.dataflow.model.Position forkPosition =
-          forkRequestToApproximateProgress(forkRequest).getPosition();
-      if (forkPosition == null) {
-        LOG.warn("InMemoryReader only supports fork at a Position. Requested: {}", forkRequest);
+    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
+      checkNotNull(splitRequest);
+
+      com.google.api.services.dataflow.model.Position splitPosition =
+          splitRequestToApproximateProgress(splitRequest).getPosition();
+      if (splitPosition == null) {
+        LOG.warn("InMemoryReader only supports split at a Position. Requested: {}",
+            splitRequest);
         return null;
       }
 
-      Long forkIndex = forkPosition.getRecordIndex();
-      if (forkIndex == null) {
-        LOG.warn("InMemoryReader only supports fork at a record index. Requested: {}",
-            forkPosition);
+      Long splitIndex = splitPosition.getRecordIndex();
+      if (splitIndex == null) {
+        LOG.warn("InMemoryReader only supports split at a record index. Requested: {}",
+            splitPosition);
         return null;
       }
-      if (forkIndex <= index) {
-        LOG.info("Already progressed to index {} which is after the requested fork index {}",
-            index, forkIndex);
+      if (splitIndex <= index) {
+        LOG.info("Already progressed to index {} which is after the requested split index {}",
+            index, splitIndex);
         return null;
       }
-      if (forkIndex >= endPosition) {
+      if (splitIndex >= endPosition) {
         LOG.info(
-            "Fork requested at an index beyond the end of the current range: {} >= {}",
-            forkIndex, endPosition);
+            "Split requested at an index beyond the end of the current range: {} >= {}",
+            splitIndex, endPosition);
         return null;
       }
 
-      this.endPosition = forkIndex.intValue();
-      LOG.info("Forked InMemoryReader at index {}", forkIndex);
+      this.endPosition = splitIndex.intValue();
+      LOG.info("Split InMemoryReader at index {}", splitIndex);
 
-      return new ForkResultWithPosition(cloudPositionToReaderPosition(forkPosition));
+      return new DynamicSplitResultWithPosition(cloudPositionToReaderPosition(splitPosition));
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
index 8074e92527929..6f826c5dc5d60 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
@@ -85,14 +85,16 @@ public static Source sourceSpecToCloudSource(@Nullable SourceFormat.SourceSpec s
     return (spec == null) ? null : ((DataflowSourceSpec) spec).cloudSource;
   }
 
-  public static ApproximateProgress forkRequestToApproximateProgress(
-      @Nullable Reader.ForkRequest stopRequest) {
-    return (stopRequest == null) ? null : ((DataflowForkRequest) stopRequest).approximateProgress;
+  public static ApproximateProgress splitRequestToApproximateProgress(
+      @Nullable Reader.DynamicSplitRequest splitRequest) {
+    return (splitRequest == null)
+        ? null : ((DataflowDynamicSplitRequest) splitRequest).approximateProgress;
   }
 
-  public static Reader.ForkRequest toForkRequest(
+  public static Reader.DynamicSplitRequest toDynamicSplitRequest(
       @Nullable ApproximateProgress approximateProgress) {
-    return (approximateProgress == null) ? null : new DataflowForkRequest(approximateProgress);
+    return (approximateProgress == null)
+        ? null : new DataflowDynamicSplitRequest(approximateProgress);
   }
 
   static class DataflowReaderProgress implements Reader.Progress {
@@ -175,10 +177,10 @@ public static Source dictionaryToCloudSource(Map<String, Object> params) throws
     return res;
   }
 
-  private static class DataflowForkRequest implements Reader.ForkRequest {
+  private static class DataflowDynamicSplitRequest implements Reader.DynamicSplitRequest {
     public final ApproximateProgress approximateProgress;
 
-    private DataflowForkRequest(ApproximateProgress approximateProgress) {
+    private DataflowDynamicSplitRequest(ApproximateProgress approximateProgress) {
       this.approximateProgress = approximateProgress;
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
index 8f83e4bdc7f21..17c3040cb41ce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
@@ -85,8 +85,9 @@ public Reader.Progress getWorkerProgress() throws Exception {
   }
 
   @Override
-  public Reader.ForkResult requestFork(Reader.ForkRequest forkRequest) throws Exception {
-    return getReadOperation().requestFork(forkRequest);
+  public Reader.DynamicSplitResult requestDynamicSplit(
+      Reader.DynamicSplitRequest splitRequest) throws Exception {
+    return getReadOperation().requestDynamicSplit(splitRequest);
   }
 
   ReadOperation getReadOperation() throws Exception {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index 8b013d856dab4..416dbdbd4f07f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -220,9 +220,9 @@ public Reader.Progress getProgress() {
   }
 
   /**
-   * Relays the fork request to {@code ReaderIterator}.
+   * Relays the split request to {@code ReaderIterator}.
    */
-  public Reader.ForkResult requestFork(Reader.ForkRequest forkRequest) {
+  public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest splitRequest) {
     synchronized (initializationStateLock) {
       if (isFinished()) {
         LOG.warn("Iterator is in the Finished state, returning null stop position.");
@@ -230,10 +230,11 @@ public Reader.ForkResult requestFork(Reader.ForkRequest forkRequest) {
       }
       synchronized (sourceIteratorLock) {
         if (readerIterator == null) {
-          LOG.warn("Iterator has not been initialized, refusing to fork at {}", forkRequest);
+          LOG.warn("Iterator has not been initialized, refusing to split at {}",
+              splitRequest);
           return null;
         }
-        return readerIterator.requestFork(forkRequest);
+        return readerIterator.requestDynamicSplit(splitRequest);
       }
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
index 40875b3bac3d3..acb55366538e6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
@@ -86,18 +86,18 @@ public interface ReaderIterator<T> extends AutoCloseable {
      * current input of this {@link ReaderIterator} before this call.
      * <p>
      * The boundary between the primary part and the residual part is specified in
-     * a framework-specific way using {@link ForkRequest}: e.g., if the framework supports the
-     * notion of positions, it might be a position at which the input is asked to split itself
-     * (which is not necessarily the same position at which it <i>will</i> split itself); it might
-     * be an approximate fraction of input, or something else.
+     * a framework-specific way using {@link Reader.DynamicSplitRequest}: e.g., if the framework
+     * supports the notion of positions, it might be a position at which the input is asked to split
+     * itself (which is not necessarily the same position at which it <i>will</i> split itself);
+     * it might be an approximate fraction of input, or something else.
      * <p>
-     * {@link ForkResult} encodes, in a framework-specific way, the information sufficient to
-     * construct a description of the resulting primary and residual inputs. For example, it might,
-     * again, be a position demarcating these parts, or it might be a pair of fully-specified input
-     * descriptions, or something else.
+     * {@link Reader.DynamicSplitResult} encodes, in a framework-specific way, the information
+     * sufficient to construct a description of the resulting primary and residual inputs.
+     * For example, it might, again, be a position demarcating these parts, or it might be a pair of
+     * fully-specified input descriptions, or something else.
      * <p>
-     * After a successful call to {@link #requestFork}, subsequent calls should be interpreted
-     * relative to the new primary.
+     * After a successful call to {@link #requestDynamicSplit}, subsequent calls should be
+     * interpreted relative to the new primary.
      * <p>
      * This method is not required to be thread-safe, and it will not be
      * called concurrently to any other methods.
@@ -105,11 +105,12 @@ public interface ReaderIterator<T> extends AutoCloseable {
      * This call should not affect the range of input represented by the {@link Reader} which
      * produced this {@link ReaderIterator}.
      *
-     * @return {@code null} if the {@link ForkRequest} cannot be honored (in that case the input
-     *   represented by this {@link ReaderIterator} stays the same), or a {@link ForkResult}
-     *   describing how the input was split into a primary and residual part.
+     * @return {@code null} if the {@link Reader.DynamicSplitRequest} cannot be honored
+     *   (in that case the input represented by this {@link ReaderIterator} stays the same), or
+     *   a {@link Reader.DynamicSplitResult} describing how the input was split into a primary
+     *   and residual part.
      */
-    public ForkResult requestFork(ForkRequest request);
+    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest request);
   }
 
   /** An abstract base class for ReaderIterator implementations. */
@@ -130,7 +131,7 @@ public Progress getProgress() {
     }
 
     @Override
-    public ForkResult requestFork(ForkRequest forkRequest) {
+    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
       return null;
     }
   }
@@ -156,25 +157,25 @@ public interface Progress {}
   public interface Position {}
 
   /**
-   * A framework-specific way to specify how {@link ReaderIterator#requestFork} should split
+   * A framework-specific way to specify how {@link ReaderIterator#requestDynamicSplit} should split
    * the input into a primary and residual part.
    */
-  public interface ForkRequest {}
+  public interface DynamicSplitRequest {}
 
   /**
-   * A framework-specific way to specify how {@link ReaderIterator#requestFork} has split
+   * A framework-specific way to specify how {@link ReaderIterator#requestDynamicSplit} has split
    * the input into a primary and residual part.
    */
-  public interface ForkResult {}
+  public interface DynamicSplitResult {}
 
   /**
-   * A {@link ForkResult} which specifies the boundary between the primary and residual parts
-   * of the input using a {@link Position}.
+   * A {@link Reader.DynamicSplitResult} which specifies the boundary between the primary and
+   * residual parts of the input using a {@link Position}.
    */
-  public static final class ForkResultWithPosition implements ForkResult {
+  public static final class DynamicSplitResultWithPosition implements DynamicSplitResult {
     private final Position acceptedPosition;
 
-    public ForkResultWithPosition(Position acceptedPosition) {
+    public DynamicSplitResultWithPosition(Position acceptedPosition) {
       this.acceptedPosition = acceptedPosition;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
index b8d42f271add8..9f6c3ef986eaa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
@@ -83,10 +83,12 @@ public Reader.Progress getWorkerProgress() throws Exception {
   }
 
   /**
-   * See {@link Reader.ReaderIterator#requestFork}. Makes sense only for tasks which read input.
+   * See {@link Reader.ReaderIterator#requestDynamicSplit}.
+   * Makes sense only for tasks which read input.
    */
-  public Reader.ForkResult requestFork(Reader.ForkRequest forkRequest) throws Exception {
-    // By default, fork is unsupported.
+  public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest splitRequest)
+      throws Exception {
+    // By default, dynamic splitting is unsupported.
     return null;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
index c654eabc13fbd..672eaf6842ef2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
@@ -78,11 +78,11 @@ public abstract class WorkProgressUpdater {
   protected long progressReportIntervalMs;
 
   /**
-   * The {@link Reader.ForkResult} to report to the service in the next progress update,
-   * or {@code null} if there is nothing to report (if no fork happened since the last progress
-   * update).
+   * The {@link Reader.DynamicSplitResult} to report to the service in the next progress update,
+   * or {@code null} if there is nothing to report (if no dynamic split happened since the last
+   * progress update).
    */
-  protected Reader.ForkResult forkResultToReport;
+  protected Reader.DynamicSplitResult dynamicSplitResultToReport;
 
   public WorkProgressUpdater(WorkExecutor worker) {
     this.worker = worker;
@@ -119,10 +119,10 @@ public void stopReportingProgress() throws Exception {
       executor.shutdownNow();
     }
 
-    // We send a final progress report in case there was an unreported fork.
-    if (forkResultToReport != null) {
-      LOG.debug("Sending final progress update with unreported fork: {} "
-          + "for work item: {}", forkResultToReport, workString());
+    // We send a final progress report in case there was an unreported dynamic split.
+    if (dynamicSplitResultToReport != null) {
+      LOG.debug("Sending final progress update with unreported split: {} "
+          + "for work item: {}", dynamicSplitResultToReport, workString());
       reportProgressHelper(); // This call can fail with an exception
     }
 
@@ -210,8 +210,8 @@ protected long leaseRemainingTime(long leaseExpirationTimestamp) {
   }
 
   // Visible for testing.
-  public Reader.ForkResult getForkResultToReport() {
-    return forkResultToReport;
+  public Reader.DynamicSplitResult getDynamicSplitResultToReport() {
+    return dynamicSplitResultToReport;
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
index 236057b217361..58fe0edc3c827 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -21,7 +21,7 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionAtIndex;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.forkRequestToApproximateProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
 import static com.google.cloud.dataflow.sdk.util.CloudCounterUtils.extractCounter;
 import static com.google.cloud.dataflow.sdk.util.CloudMetricUtils.extractCloudMetric;
@@ -96,13 +96,13 @@ public Reader.Progress getWorkerProgress() {
     }
 
     @Override
-    public Reader.ForkResult requestFork(Reader.ForkRequest forkRequest) {
+    public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest splitRequest) {
       @Nullable
-      ApproximateProgress progress = forkRequestToApproximateProgress(forkRequest);
+      ApproximateProgress progress = splitRequestToApproximateProgress(splitRequest);
       if (progress == null) {
         return null;
       }
-      return new Reader.ForkResultWithPosition(
+      return new Reader.DynamicSplitResultWithPosition(
           cloudPositionToReaderPosition(progress.getPosition()));
     }
 
@@ -189,7 +189,7 @@ public void workProgressUpdaterUpdates() throws Exception {
   }
 
   // Verifies that ReportWorkItemStatusRequest contains correct progress report
-  // and actual fork result.
+  // and actual dynamic split result.
   @Test(timeout = 5000)
   public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
     // Mock that the next reportProgress call will return a response that asks
@@ -221,11 +221,11 @@ public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
                 .withCounters(5)
                 .withMetrics(6)
                 .withProgress(approximateProgressAtIndex(2L))
-                .withForkAtPosition(positionAtIndex(3L))
+                .withDynamicSplitAtPosition(positionAtIndex(3L))
                 .withReportIndex(2L)));
 
-    // After the request is sent, reset cached fork result to null.
-    assertNull(progressUpdater.getForkResultToReport());
+    // After the request is sent, reset cached dynamic split result to null.
+    assertNull(progressUpdater.getDynamicSplitResultToReport());
 
     setUpProgress(approximateProgressAtIndex(3L));
 
@@ -255,30 +255,31 @@ public void workProgressUpdaterLastUpdate() throws Exception {
         .reportWorkItemStatus(argThat(
             new ExpectedDataflowWorkItemStatus().withProgress(approximateProgressAtIndex(1L))));
 
-    // The first update should include the new fork result..
+    // The first update should include the new dynamic split result.
     // Verify that the progressUpdater has recorded it.
-    Reader.ForkResultWithPosition forkResult =
-        (Reader.ForkResultWithPosition) progressUpdater.getForkResultToReport();
-    assertEquals(positionAtIndex(2L), toCloudPosition(forkResult.getAcceptedPosition()));
+    Reader.DynamicSplitResultWithPosition splitResult =
+        (Reader.DynamicSplitResultWithPosition) progressUpdater.getDynamicSplitResultToReport();
+    assertEquals(positionAtIndex(2L), toCloudPosition(splitResult.getAcceptedPosition()));
 
     setUpProgress(approximateProgressAtIndex(2L));
     // The second update should be sent after one second (2000 / 2).
 
-    // Not enough time for an update so the latest fork result is not acknowledged.
+    // Not enough time for an update so the latest split result is not acknowledged.
     Thread.sleep(200);
 
-    // Check that the progressUpdater still has a pending fork result to send
-    forkResult = (Reader.ForkResultWithPosition) progressUpdater.getForkResultToReport();
-    assertEquals(positionAtIndex(2L), toCloudPosition(forkResult.getAcceptedPosition()));
+    // Check that the progressUpdater still has a pending split result to send
+    splitResult = (Reader.DynamicSplitResultWithPosition)
+        progressUpdater.getDynamicSplitResultToReport();
+    assertEquals(positionAtIndex(2L), toCloudPosition(splitResult.getAcceptedPosition()));
 
     progressUpdater.stopReportingProgress(); // Should send the last update.
-    // Check that the progressUpdater is done with reporting its latest fork result.
-    assertNull(progressUpdater.getForkResultToReport());
+    // Check that the progressUpdater is done with reporting its latest split result.
+    assertNull(progressUpdater.getDynamicSplitResultToReport());
 
-    // Verify that the last update contained the latest fork result.
+    // Verify that the last update contained the latest split result.
     verify(workUnitClient, timeout(1000))
         .reportWorkItemStatus(argThat(
-            new ExpectedDataflowWorkItemStatus().withForkAtPosition(positionAtIndex(2L))));
+            new ExpectedDataflowWorkItemStatus().withDynamicSplitAtPosition(positionAtIndex(2L))));
   }
 
   private void setUpCounters(int n) {
@@ -346,7 +347,7 @@ private static final class ExpectedDataflowWorkItemStatus
     ApproximateProgress expectedProgress;
 
     @Nullable
-    Position expectedForkPosition;
+    Position expectedSplitPosition;
 
     @Nullable
     Long expectedReportIndex;
@@ -366,8 +367,9 @@ public ExpectedDataflowWorkItemStatus withProgress(ApproximateProgress expectedP
       return this;
     }
 
-    public ExpectedDataflowWorkItemStatus withForkAtPosition(Position expectedForkPosition) {
-      this.expectedForkPosition = expectedForkPosition;
+    public ExpectedDataflowWorkItemStatus withDynamicSplitAtPosition(
+        Position expectedSplitPosition) {
+      this.expectedSplitPosition = expectedSplitPosition;
       return this;
     }
 
@@ -392,10 +394,10 @@ public void describeTo(Description description) {
       if (this.expectedProgress != null) {
         values.add("progress " + this.expectedProgress);
       }
-      if (this.expectedForkPosition != null) {
-        values.add("fork position " + this.expectedForkPosition);
+      if (this.expectedSplitPosition != null) {
+        values.add("split position " + this.expectedSplitPosition);
       } else {
-        values.add("no fork position present");
+        values.add("no split position present");
       }
       if (this.expectedReportIndex != null) {
         values.add("reportIndex " + this.expectedReportIndex);
@@ -446,10 +448,10 @@ private boolean matchProgress(WorkItemStatus status) {
 
     private boolean matchStopPosition(WorkItemStatus status) {
       Position actualStopPosition = status.getStopPosition();
-      if (expectedForkPosition == null) {
+      if (expectedSplitPosition == null) {
         return actualStopPosition == null;
       }
-      return expectedForkPosition.equals(actualStopPosition);
+      return expectedSplitPosition.equals(actualStopPosition);
     }
 
     private boolean matchReportIndex(WorkItemStatus status) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
index 522f318649005..d21b3ad0360c1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
@@ -17,11 +17,11 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static com.google.api.client.util.Base64.encodeBase64URLSafeString;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.forkRequestAtPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromForkResult;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromSplitResult;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toForkRequest;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toDynamicSplitRequest;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
@@ -267,7 +267,7 @@ static byte[] fabricatePosition(int shard, @Nullable byte[] key) throws Exceptio
   }
 
   @Test
-  public void testReadFromEmptyShuffleDataAndRequestFork() throws Exception {
+  public void testReadFromEmptyShuffleDataAndRequestDynamicSplit() throws Exception {
     BatchModeExecutionContext context = new BatchModeExecutionContext();
     GroupingShuffleReader<Integer, Integer> groupingShuffleReader = new GroupingShuffleReader<>(
         PipelineOptionsFactory.create(), null, null, null,
@@ -278,28 +278,30 @@ public void testReadFromEmptyShuffleDataAndRequestFork() throws Exception {
     TestShuffleReader shuffleReader = new TestShuffleReader();
     try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
         groupingShuffleReader.iterator(shuffleReader)) {
-      // Can fork, the source range spans the entire interval.
-      Position proposedForkPosition = new Position();
+      // Can split, the source range spans the entire interval.
+      Position proposedSplitPosition = new Position();
       String stop = encodeBase64URLSafeString(fabricatePosition(0, null));
-      proposedForkPosition.setShufflePosition(stop);
+      proposedSplitPosition.setShufflePosition(stop);
 
-      Reader.ForkResult forkResult =
-          iter.requestFork(toForkRequest(createApproximateProgress(proposedForkPosition)));
-      Reader.Position acceptedForkPosition =
-          ((Reader.ForkResultWithPosition) forkResult).getAcceptedPosition();
-      assertEquals(stop, toCloudPosition(acceptedForkPosition).getShufflePosition());
+      Reader.DynamicSplitResult dynamicSplitResult =
+          iter.requestDynamicSplit(toDynamicSplitRequest(
+              createApproximateProgress(proposedSplitPosition)));
+      Reader.Position acceptedSplitPosition =
+          ((Reader.DynamicSplitResultWithPosition) dynamicSplitResult).getAcceptedPosition();
+      assertEquals(stop, toCloudPosition(acceptedSplitPosition).getShufflePosition());
 
 
-      // Cannot fork at a position >= the current stop position
+      // Cannot split at a position >= the current stop position
       stop = encodeBase64URLSafeString(fabricatePosition(1, null));
-      proposedForkPosition.setShufflePosition(stop);
+      proposedSplitPosition.setShufflePosition(stop);
 
-      assertNull(iter.requestFork(toForkRequest(createApproximateProgress(proposedForkPosition))));
+      assertNull(iter.requestDynamicSplit(toDynamicSplitRequest(
+          createApproximateProgress(proposedSplitPosition))));
     }
   }
 
   @Test
-  public void testReadFromShuffleDataAndFailToFork() throws Exception {
+  public void testReadFromShuffleDataAndFailToSplit() throws Exception {
     BatchModeExecutionContext context = new BatchModeExecutionContext();
     final int kFirstShard = 0;
 
@@ -323,9 +325,9 @@ public void testReadFromShuffleDataAndFailToFork() throws Exception {
 
     try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
         groupingShuffleReader.iterator(shuffleReader)) {
-      // Cannot fork since the value provided is past the current stop position.
-      assertNull(
-          iter.requestFork(forkRequestAtPosition(makeShufflePosition(kNumRecords + 1, null))));
+      // Cannot split since the value provided is past the current stop position.
+      assertNull(iter.requestDynamicSplit(splitRequestAtPosition(
+          makeShufflePosition(kNumRecords + 1, null))));
 
       int i = 0;
       for (; iter.hasNext(); ++i) {
@@ -333,21 +335,23 @@ public void testReadFromShuffleDataAndFailToFork() throws Exception {
         if (i == 0) {
           // First record
           byte[] key = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
-          // Cannot fork since the fork position is identical with the position of the record
+          // Cannot split since the split position is identical with the position of the record
           // that was just returned.
           assertNull(
-              iter.requestFork(forkRequestAtPosition(makeShufflePosition(kFirstShard, key))));
+              iter.requestDynamicSplit(splitRequestAtPosition(
+                  makeShufflePosition(kFirstShard, key))));
 
-          // Cannot fork since the requested fork position comes before current position
+          // Cannot split since the requested split position comes before current position
           assertNull(
-              iter.requestFork(forkRequestAtPosition(makeShufflePosition(kFirstShard, null))));
+              iter.requestDynamicSplit(splitRequestAtPosition(
+                  makeShufflePosition(kFirstShard, null))));
         }
       }
       assertEquals(kNumRecords, i);
 
-      // Cannot fork since all input was consumed.
+      // Cannot split since all input was consumed.
       assertNull(
-          iter.requestFork(forkRequestAtPosition(makeShufflePosition(kFirstShard, null))));
+          iter.requestDynamicSplit(splitRequestAtPosition(makeShufflePosition(kFirstShard, null))));
     }
   }
 
@@ -357,7 +361,7 @@ private Position makeShufflePosition(int shard, byte[] key) throws Exception {
   }
 
   @Test
-  public void testReadFromShuffleAndFork() throws Exception {
+  public void testReadFromShuffleAndDynamicSplit() throws Exception {
     BatchModeExecutionContext context = new BatchModeExecutionContext();
     GroupingShuffleReader<Integer, Integer> groupingShuffleReader = new GroupingShuffleReader<>(
         PipelineOptionsFactory.create(), null, null, null,
@@ -392,14 +396,14 @@ public void testReadFromShuffleAndFork() throws Exception {
     int i = 0;
     try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
         groupingShuffleReader.iterator(shuffleReader)) {
-      assertNull(iter.requestFork(forkRequestAtPosition(new Position())));
+      assertNull(iter.requestDynamicSplit(splitRequestAtPosition(new Position())));
 
-      // Fork at the shard boundary
-      Reader.ForkResult forkResult =
-          iter.requestFork(forkRequestAtPosition(makeShufflePosition(kSecondShard, null)));
+      // Split at the shard boundary
+      Reader.DynamicSplitResult dynamicSplitResult =
+          iter.requestDynamicSplit(splitRequestAtPosition(makeShufflePosition(kSecondShard, null)));
       assertEquals(
           encodeBase64URLSafeString(fabricatePosition(kSecondShard, null)),
-          positionFromForkResult(forkResult).getShufflePosition());
+          positionFromSplitResult(dynamicSplitResult).getShufflePosition());
 
       while (iter.hasNext()) {
         // iter.hasNext() is supposed to be side-effect-free and give the same result if called
@@ -476,13 +480,13 @@ public void testGetApproximateProgress() throws Exception {
       }
       assertFalse(readerIterator.hasNext());
 
-      // Cannot fork since all input was consumed.
-      Position proposedForkPosition = new Position();
+      // Cannot split since all input was consumed.
+      Position proposedSplitPosition = new Position();
       String stop = encodeBase64URLSafeString(fabricatePosition(0, null));
-      proposedForkPosition.setShufflePosition(stop);
+      proposedSplitPosition.setShufflePosition(stop);
       assertNull(
-          readerIterator.requestFork(
-              toForkRequest(createApproximateProgress(proposedForkPosition))));
+          readerIterator.requestDynamicSplit(
+              toDynamicSplitRequest(createApproximateProgress(proposedSplitPosition))));
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
index 7d7ac3dbc3a3e..a79300dd72ed2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
@@ -17,11 +17,11 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.approximateProgressAtIndex;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.forkRequestAtIndex;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionAtIndex;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromForkResult;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromSplitResult;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtIndex;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toForkRequest;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toDynamicSplitRequest;
 import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
 import static com.google.cloud.dataflow.sdk.util.StringUtils.byteArrayToJsonString;
 import static org.junit.Assert.assertEquals;
@@ -133,7 +133,7 @@ public void testReadNoElementsFromStartToEndEmptyRange() throws Exception {
   }
 
   @Test
-  public void testFork() throws Exception {
+  public void testDynamicSplit() throws Exception {
     List<Integer> elements = Arrays.asList(33, 44, 55, 66, 77, 88);
     final long start = 1L;
     final long stop = 3L;
@@ -143,37 +143,38 @@ public void testFork() throws Exception {
     InMemoryReader<Integer> inMemoryReader =
         new InMemoryReader<>(encodedElements(elements, coder), start, end, coder);
 
-    // Illegal proposed fork position.
+    // Illegal proposed split position.
     try (Reader.ReaderIterator<Integer> iterator = inMemoryReader.iterator()) {
-      assertNull(iterator.requestFork(toForkRequest(new ApproximateProgress())));
-      assertNull(iterator.requestFork(forkRequestAtIndex(null)));
+      assertNull(iterator.requestDynamicSplit(toDynamicSplitRequest(new ApproximateProgress())));
+      assertNull(iterator.requestDynamicSplit(splitRequestAtIndex(null)));
     }
 
     // Successful update.
     try (InMemoryReader<Integer>.InMemoryReaderIterator iterator =
         (InMemoryReader<Integer>.InMemoryReaderIterator) inMemoryReader.iterator()) {
-      Reader.ForkResult forkResult = iterator.requestFork(forkRequestAtIndex(stop));
-      assertEquals(positionAtIndex(stop), positionFromForkResult(forkResult));
+      Reader.DynamicSplitResult dynamicSplitResult = iterator.requestDynamicSplit(
+          splitRequestAtIndex(stop));
+      assertEquals(positionAtIndex(stop), positionFromSplitResult(dynamicSplitResult));
       assertEquals(stop, iterator.endPosition);
       assertEquals(44, iterator.next().intValue());
       assertEquals(55, iterator.next().intValue());
       assertFalse(iterator.hasNext());
     }
 
-    // Proposed fork position is before the current position, no update.
+    // Proposed split position is before the current position, no update.
     try (InMemoryReader<Integer>.InMemoryReaderIterator iterator =
         (InMemoryReader<Integer>.InMemoryReaderIterator) inMemoryReader.iterator()) {
       assertEquals(44, iterator.next().intValue());
       assertEquals(55, iterator.next().intValue());
-      assertNull(iterator.requestFork(forkRequestAtIndex(stop)));
+      assertNull(iterator.requestDynamicSplit(splitRequestAtIndex(stop)));
       assertEquals((int) end, iterator.endPosition);
       assertTrue(iterator.hasNext());
     }
 
-    // Proposed fork position is after the current stop (end) position, no update.
+    // Proposed split position is after the current stop (end) position, no update.
     try (InMemoryReader.InMemoryReaderIterator iterator =
         (InMemoryReader.InMemoryReaderIterator) inMemoryReader.iterator()) {
-      assertNull(iterator.requestFork(forkRequestAtIndex(end + 1)));
+      assertNull(iterator.requestDynamicSplit(splitRequestAtIndex(end + 1)));
       assertEquals((int) end, iterator.endPosition);
     }
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
index 96f8b8b432cc5..85bfc4807be80 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
@@ -17,7 +17,7 @@
 
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toForkRequest;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toDynamicSplitRequest;
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.api.services.dataflow.model.Position;
@@ -27,7 +27,7 @@
 
 /**
  * Helpers for testing {@code Reader} and related classes, especially
- * {@link Reader.ReaderIterator#getProgress} and {@link Reader.ReaderIterator#requestFork}.
+ * {@link Reader.ReaderIterator#getProgress} and {@link Reader.ReaderIterator#requestDynamicSplit}.
  */
 public class ReaderTestUtils {
   public static Position positionAtIndex(@Nullable Long index) {
@@ -50,20 +50,21 @@ public static ApproximateProgress approximateProgressAtByteOffset(@Nullable Long
     return approximateProgressAtPosition(positionAtByteOffset(byteOffset));
   }
 
-  public static Reader.ForkRequest forkRequestAtPosition(@Nullable Position position) {
-    return toForkRequest(approximateProgressAtPosition(position));
+  public static Reader.DynamicSplitRequest splitRequestAtPosition(@Nullable Position position) {
+    return toDynamicSplitRequest(approximateProgressAtPosition(position));
   }
 
-  public static Reader.ForkRequest forkRequestAtIndex(@Nullable Long index) {
-    return toForkRequest(approximateProgressAtIndex(index));
+  public static Reader.DynamicSplitRequest splitRequestAtIndex(@Nullable Long index) {
+    return toDynamicSplitRequest(approximateProgressAtIndex(index));
   }
 
-  public static Reader.ForkRequest forkRequestAtByteOffset(@Nullable Long byteOffset) {
-    return toForkRequest(approximateProgressAtByteOffset(byteOffset));
+  public static Reader.DynamicSplitRequest splitRequestAtByteOffset(@Nullable Long byteOffset) {
+    return toDynamicSplitRequest(approximateProgressAtByteOffset(byteOffset));
   }
 
-  public static Position positionFromForkResult(Reader.ForkResult forkResult) {
-    return toCloudPosition(((Reader.ForkResultWithPosition) forkResult).getAcceptedPosition());
+  public static Position positionFromSplitResult(Reader.DynamicSplitResult dynamicSplitResult) {
+    return toCloudPosition(
+        ((Reader.DynamicSplitResultWithPosition) dynamicSplitResult).getAcceptedPosition());
   }
 
   public static Position positionFromProgress(Reader.Progress progress) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
index 1c245973badbf..dc112f5b4775a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
@@ -16,9 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.forkRequestAtByteOffset;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.forkRequestAtPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromForkResult;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromSplitResult;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtByteOffset;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.greaterThan;
@@ -408,7 +408,7 @@ public void testUpdateStopPosition() throws Exception {
 
       try (TextReader<String>.TextFileIterator iterator =
           (TextReader<String>.TextFileIterator) textReader.iterator()) {
-        assertNull(iterator.requestFork(forkRequestAtPosition(new Position())));
+        assertNull(iterator.requestDynamicSplit(splitRequestAtPosition(new Position())));
       }
     }
 
@@ -424,7 +424,7 @@ public void testUpdateStopPosition() throws Exception {
         assertNull(iterator.getEndOffset());
         assertEquals(
             Long.valueOf(stop),
-            positionFromForkResult(iterator.requestFork(forkRequestAtByteOffset(stop)))
+            positionFromSplitResult(iterator.requestDynamicSplit(splitRequestAtByteOffset(stop)))
                 .getByteOffset());
         assertEquals(stop, iterator.getEndOffset().longValue());
         assertEquals(fileContent[0], iterator.next());
@@ -450,7 +450,7 @@ public void testUpdateStopPosition() throws Exception {
         assertThat(
             readerProgressToCloudProgress(iterator.getProgress()).getPosition().getByteOffset(),
             greaterThan(stop));
-        assertNull(iterator.requestFork(forkRequestAtByteOffset(stop)));
+        assertNull(iterator.requestDynamicSplit(splitRequestAtByteOffset(stop)));
         assertNull(iterator.getEndOffset());
         assertTrue(iterator.hasNext());
         assertEquals(fileContent[2], iterator.next());
@@ -471,7 +471,7 @@ public void testUpdateStopPosition() throws Exception {
       try (TextReader<String>.TextFileIterator iterator =
           (TextReader<String>.TextFileIterator) textReader.iterator()) {
         assertEquals(fileContent[0], iterator.next());
-        assertNull(iterator.requestFork(forkRequestAtByteOffset(stop)));
+        assertNull(iterator.requestDynamicSplit(splitRequestAtByteOffset(stop)));
         assertEquals(end, iterator.getEndOffset().longValue());
         assertFalse(iterator.hasNext());
         assertEquals(Arrays.asList(fileContent[0].length()), observer.getActualSizes());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
index 4f95b57a69b4d..14e73d3a574b0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
@@ -17,13 +17,13 @@
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.approximateProgressAtIndex;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.forkRequestAtIndex;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionAtIndex;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromForkResult;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromSplitResult;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtIndex;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.forkRequestToApproximateProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
@@ -100,10 +100,10 @@ public Reader.Progress getProgress() {
     }
 
     @Override
-    public Reader.ForkResult requestFork(Reader.ForkRequest forkRequest) {
+    public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest splitRequest) {
       // Fakes the return with the same position as proposed.
-      return new Reader.ForkResultWithPosition(cloudPositionToReaderPosition(
-          forkRequestToApproximateProgress(forkRequest).getPosition()));
+      return new Reader.DynamicSplitResultWithPosition(cloudPositionToReaderPosition(
+          splitRequestToApproximateProgress(splitRequest).getPosition()));
     }
 
     public void setProgress(ApproximateProgress progress) {
@@ -242,7 +242,8 @@ public void testGetProgressAndRequestSplit() throws Exception {
     operation.setProgress(approximateProgressAtIndex(1L));
     Assert.assertEquals(positionAtIndex(1L), positionFromProgress(executor.getWorkerProgress()));
     Assert.assertEquals(
-        positionAtIndex(1L), positionFromForkResult(executor.requestFork(forkRequestAtIndex(1L))));
+        positionAtIndex(1L),
+        positionFromSplitResult(executor.requestDynamicSplit(splitRequestAtIndex(1L))));
 
     executor.close();
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
index 24f6512b3dc9f..bde15eb7293c5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -16,12 +16,12 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.forkRequestAtIndex;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionAtIndex;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtIndex;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.forkRequestToApproximateProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
@@ -114,7 +114,7 @@ public void testGetProgress() throws Exception {
   }
 
   @Test
-  public void testFork() throws Exception {
+  public void testDynamicSplit() throws Exception {
     MockReaderIterator iterator = new MockReaderIterator(0, 10);
     CounterSet counterSet = new CounterSet();
     MockOutputReceiver receiver = new MockOutputReceiver(counterSet.getAddCounterMutator());
@@ -124,31 +124,34 @@ public void testFork() throws Exception {
     // Update progress on every iteration of the read loop.
     readOperation.setProgressUpdatePeriodMs(0);
 
-    // An unstarted ReadOperation refuses fork requests.
+    // An unstarted ReadOperation refuses split requests.
     Assert.assertNull(
-        readOperation.requestFork(forkRequestAtIndex(7L)));
+        readOperation.requestDynamicSplit(splitRequestAtIndex(7L)));
 
     Thread thread = runReadLoopInThread(readOperation);
     iterator.offerNext(0); // Await first next() and return 0 from it.
     // Read loop is now blocked in process() (not next()).
-    Reader.ForkResultWithPosition fork = (Reader.ForkResultWithPosition) readOperation.requestFork(
-        forkRequestAtIndex(7L));
-    Assert.assertNotNull(fork);
-    Assert.assertEquals(positionAtIndex(7L), toCloudPosition(fork.getAcceptedPosition()));
+    Reader.DynamicSplitResultWithPosition split =
+        (Reader.DynamicSplitResultWithPosition) readOperation.requestDynamicSplit(
+          splitRequestAtIndex(7L));
+    Assert.assertNotNull(split);
+    Assert.assertEquals(positionAtIndex(7L), toCloudPosition(split.getAcceptedPosition()));
     receiver.unblockProcess();
     iterator.offerNext(1);
     receiver.unblockProcess();
     iterator.offerNext(2);
 
-    // Should accept a fork at an earlier position than previously requested.
-    // Should reject a fork at a later position than previously requested.
+    // Should accept a split at an earlier position than previously requested.
+    // Should reject a split at a later position than previously requested.
     // Note that here we're testing our own MockReaderIterator class, so it's kind of pointless,
     // but we're also testing that ReadOperation correctly relays the request to the iterator.
-    fork = (Reader.ForkResultWithPosition) readOperation.requestFork(forkRequestAtIndex(5L));
-    Assert.assertNotNull(fork);
-    Assert.assertEquals(positionAtIndex(5L), toCloudPosition(fork.getAcceptedPosition()));
-    fork = (Reader.ForkResultWithPosition) readOperation.requestFork(forkRequestAtIndex(5L));
-    Assert.assertNull(fork);
+    split = (Reader.DynamicSplitResultWithPosition) readOperation.requestDynamicSplit(
+        splitRequestAtIndex(5L));
+    Assert.assertNotNull(split);
+    Assert.assertEquals(positionAtIndex(5L), toCloudPosition(split.getAcceptedPosition()));
+    split = (Reader.DynamicSplitResultWithPosition) readOperation.requestDynamicSplit(
+        splitRequestAtIndex(5L));
+    Assert.assertNull(split);
     receiver.unblockProcess();
 
     iterator.offerNext(3);
@@ -160,8 +163,8 @@ public void testFork() throws Exception {
 
     thread.join();
 
-    // Operation is now finished. Check that it refuses a fork request.
-    Assert.assertNull(readOperation.requestFork(forkRequestAtIndex(5L)));
+    // Operation is now finished. Check that it refuses a split request.
+    Assert.assertNull(readOperation.requestDynamicSplit(splitRequestAtIndex(5L)));
   }
 
   private Thread runReadLoopInThread(final ReadOperation readOperation) {
@@ -212,14 +215,15 @@ public Reader.Progress getProgress() {
     }
 
     @Override
-    public Reader.ForkResult requestFork(Reader.ForkRequest forkRequest) {
-      ApproximateProgress progress = forkRequestToApproximateProgress(forkRequest);
+    public Reader.DynamicSplitResult requestDynamicSplit(
+        Reader.DynamicSplitRequest splitRequest) {
+      ApproximateProgress progress = splitRequestToApproximateProgress(splitRequest);
       int index = progress.getPosition().getRecordIndex().intValue();
       if (index >= to) {
         return null;
       } else {
         this.to = index;
-        return new Reader.ForkResultWithPosition(
+        return new Reader.DynamicSplitResultWithPosition(
             cloudPositionToReaderPosition(progress.getPosition()));
       }
     }

From d24cb9424e1b242936a69a81b6050f6700a944d8 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Tue, 24 Mar 2015 12:03:13 -0700
Subject: [PATCH 0304/1541] Updated TestFileBasedSource.TestReader so that
 lines are read properly when running on Windows.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89422772
---
 .../com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index e2ac886f07e92..78b7d83379d17 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -209,7 +209,9 @@ protected boolean readNextRecord() throws IOException {
       }
       nextOffset += offsetAdjustment;
       isAtSplitPoint = true;
-      currentValue = CoderUtils.decodeFromByteArray(StringUtf8Coder.of(), buf.toByteArray());
+      // When running on Windows, each line obtained from 'readNextLine()' will end with a '\r'
+      // since we use '\n' as the line boundary of the reader. So we trim it off here.
+      currentValue = CoderUtils.decodeFromByteArray(StringUtf8Coder.of(), buf.toByteArray()).trim();
       return true;
     }
 

From 8192a7ef617c12e9b7cced32b32c7216f1524e83 Mon Sep 17 00:00:00 2001
From: boyuan <boyuan@google.com>
Date: Wed, 25 Mar 2015 19:13:50 -0700
Subject: [PATCH 0305/1541] Change the ENVIRONMENT_MAJOR_VERSION to "1"

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89562565
---
 .../cloud/dataflow/sdk/runners/DataflowPipelineRunner.java      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 67d5df0973d0f..4283614c5e5bc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -80,7 +80,7 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
   private DataflowPipelineRunnerHooks hooks;
 
   // Environment version information
-  private static final String ENVIRONMENT_MAJOR_VERSION = "2";
+  private static final String ENVIRONMENT_MAJOR_VERSION = "1";
 
   /**
    * Construct a runner from the provided options.

From 6a1066eafd81ae9e4ce9dacf6a98b2f3a4c6971a Mon Sep 17 00:00:00 2001
From: malo <malo@google.com>
Date: Tue, 24 Mar 2015 15:11:00 -0700
Subject: [PATCH 0306/1541] Set initial progress update interval to respect the
 work_item hint. ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=89441897

---
 .../worker/DataflowWorkProgressUpdater.java   |  7 +++-
 .../common/worker/WorkProgressUpdater.java    | 13 +++++--
 .../DataflowWorkProgressUpdaterTest.java      | 39 ++++++++++++-------
 3 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
index c5f802dce1053..671c72391fa4f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
@@ -76,6 +76,11 @@ protected long getWorkUnitLeaseExpirationTimestamp() {
     return getLeaseExpirationTimestamp(workItem);
   }
 
+  @Override
+  protected long getWorkUnitSuggestedReportingInterval(){
+    return fromCloudDuration(workItem.getReportStatusInterval()).getMillis();
+  }
+
   @Override
   protected void reportProgressHelper() throws Exception {
     WorkItemStatus status = buildStatus(workItem, false/*completed*/, worker.getOutputCounters(),
@@ -91,7 +96,7 @@ protected void reportProgressHelper() throws Exception {
       nextReportIndex++;
 
       progressReportIntervalMs = nextProgressReportInterval(
-          fromCloudDuration(workItem.getReportStatusInterval()).getMillis(),
+          getWorkUnitSuggestedReportingInterval(),
           leaseRemainingTime(getLeaseExpirationTimestamp(result)));
 
       ApproximateProgress suggestedStopPoint = result.getSuggestedStopPoint();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
index 672eaf6842ef2..d40bd205ffacd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
@@ -94,11 +94,11 @@ public WorkProgressUpdater(WorkExecutor worker) {
    * Starts sending work progress updates to the worker service.
    */
   public void startReportingProgress() {
-    // Send the initial work progress report half-way through the lease
-    // expiration. Subsequent intervals adapt to hints from the service.
+    // The initial work progress report is sent according to hints from the service if any.
+    // Otherwise the default is half-way through the lease.
     long leaseRemainingTime = leaseRemainingTime(getWorkUnitLeaseExpirationTimestamp());
     progressReportIntervalMs =
-        nextProgressReportInterval(leaseRemainingTime / 2, leaseRemainingTime);
+        nextProgressReportInterval(getWorkUnitSuggestedReportingInterval(), leaseRemainingTime);
     requestedLeaseDurationMs = DEFAULT_LEASE_DURATION_MILLIS;
 
     LOG.debug("Started reporting progress for work item: {}", workString());
@@ -224,6 +224,13 @@ public Reader.DynamicSplitResult getDynamicSplitResultToReport() {
    */
   protected abstract long getWorkUnitLeaseExpirationTimestamp();
 
+  /**
+   * Returns the current work item's suggested progress Reporting interval.
+   */
+  protected long getWorkUnitSuggestedReportingInterval() {
+    return getWorkUnitLeaseExpirationTimestamp() / 2;
+  }
+
   /**
    * Returns a string representation of the work item whose progress
    * is being updated, for use in logging messages.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
index 58fe0edc3c827..7145127abf301 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -164,7 +164,7 @@ public Collection<Metric<?>> getOutputMetrics() {
     workItem.setJobId(JOB_ID);
     workItem.setId(WORK_ID);
     workItem.setLeaseExpireTime(toCloudTime(new Instant(nowMillis + 1000)));
-    workItem.setReportStatusInterval(toCloudDuration(Duration.millis(500)));
+    workItem.setReportStatusInterval(toCloudDuration(Duration.millis(300)));
 
     progressUpdater = new DataflowWorkProgressUpdater(workItem, worker, workUnitClient, options);
   }
@@ -172,7 +172,7 @@ public Collection<Metric<?>> getOutputMetrics() {
   // TODO: Remove sleeps from this test by using a mock sleeper.  This
   // requires a redesign of the WorkProgressUpdater to use a Sleeper and
   // not use a ScheduledThreadExecutor which relies on real time passing.
-  @Test(timeout = 2000)
+  @Test(timeout = 1000)
   public void workProgressUpdaterUpdates() throws Exception {
     when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
         .thenReturn(generateServiceState(nowMillis + 2000, 1000, null));
@@ -180,8 +180,8 @@ public void workProgressUpdaterUpdates() throws Exception {
     setUpMetrics(3);
     setUpProgress(approximateProgressAtIndex(1L));
     progressUpdater.startReportingProgress();
-    // The initial update should be sent after leaseRemainingTime / 2.
-    verify(workUnitClient, timeout(600))
+    // The initial update should be sent after 300.
+    verify(workUnitClient, timeout(400))
         .reportWorkItemStatus(argThat(
             new ExpectedDataflowWorkItemStatus().withCounters(2).withMetrics(3).withProgress(
                 approximateProgressAtIndex(1L))));
@@ -198,15 +198,15 @@ public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
     when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
         .thenReturn(generateServiceState(nowMillis + 2000, 1000, positionAtIndex(3L)))
         .thenReturn(generateServiceState(nowMillis + 3000, 2000, null))
+        .thenReturn(generateServiceState(nowMillis + 1000, 3000, null))
         .thenReturn(generateServiceState(nowMillis + 4000, 3000, null));
 
     setUpCounters(3);
     setUpMetrics(2);
     setUpProgress(approximateProgressAtIndex(1L));
     progressUpdater.startReportingProgress();
-    // The initial update should be sent after
-    // leaseRemainingTime (1000) / 2 = 500.
-    verify(workUnitClient, timeout(600))
+    // The initial update should be sent after 300.
+    verify(workUnitClient, timeout(400))
         .reportWorkItemStatus(argThat(
             new ExpectedDataflowWorkItemStatus().withCounters(3).withMetrics(2).withProgress(
                 approximateProgressAtIndex(1L)).withReportIndex(1L)));
@@ -214,7 +214,7 @@ public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
     setUpCounters(5);
     setUpMetrics(6);
     setUpProgress(approximateProgressAtIndex(2L));
-    // The second update should be sent after one second (2000 / 2).
+    // The second update should be sent after one second as requested.
     verify(workUnitClient, timeout(1100))
         .reportWorkItemStatus(argThat(
             new ExpectedDataflowWorkItemStatus()
@@ -229,19 +229,28 @@ public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
 
     setUpProgress(approximateProgressAtIndex(3L));
 
-    // The third update should be sent after one and half seconds (3000 / 2).
-    verify(workUnitClient, timeout(1600))
+    // The third update should be sent after 2 seconds.
+    verify(workUnitClient, timeout(2100))
         .reportWorkItemStatus(argThat(
             new ExpectedDataflowWorkItemStatus().withProgress(approximateProgressAtIndex(3L))
                 .withReportIndex(3L)));
 
+    setUpProgress(approximateProgressAtIndex(4L));
+
+    // The forth update should not respect the suggested report interval.
+    // It should be sent before the lease expires
+    verify(workUnitClient, timeout(900))
+        .reportWorkItemStatus(argThat(
+            new ExpectedDataflowWorkItemStatus().withProgress(approximateProgressAtIndex(4L))
+                .withReportIndex(4L)));
+
     progressUpdater.stopReportingProgress();
 
-    assertEquals(4L, progressUpdater.getNextReportIndex());
+    assertEquals(5L, progressUpdater.getNextReportIndex());
   }
 
   // Verifies that a last update is sent when there is an unacknowledged split request.
-  @Test(timeout = 3000)
+  @Test(timeout = 2000)
   public void workProgressUpdaterLastUpdate() throws Exception {
     when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
         .thenReturn(generateServiceState(nowMillis + 2000, 1000, positionAtIndex(2L)))
@@ -249,8 +258,8 @@ public void workProgressUpdaterLastUpdate() throws Exception {
 
     setUpProgress(approximateProgressAtIndex(1L));
     progressUpdater.startReportingProgress();
-    // The initial update should be sent after leaseRemainingTime / 2 = 500 msec.
-    Thread.sleep(600);
+    // The initial update should be sent after 300 msec.
+    Thread.sleep(200);
     verify(workUnitClient, timeout(200))
         .reportWorkItemStatus(argThat(
             new ExpectedDataflowWorkItemStatus().withProgress(approximateProgressAtIndex(1L))));
@@ -262,7 +271,7 @@ public void workProgressUpdaterLastUpdate() throws Exception {
     assertEquals(positionAtIndex(2L), toCloudPosition(splitResult.getAcceptedPosition()));
 
     setUpProgress(approximateProgressAtIndex(2L));
-    // The second update should be sent after one second (2000 / 2).
+    // The second update should be sent after one second.
 
     // Not enough time for an update so the latest split result is not acknowledged.
     Thread.sleep(200);

From dfb43acda5ae8c1fe155d29d4c14300f043f9018 Mon Sep 17 00:00:00 2001
From: relax <relax@google.com>
Date: Tue, 24 Mar 2015 16:49:37 -0700
Subject: [PATCH 0307/1541] Dataflow SDK changes for internal testing.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89451289
---
 .../DataflowPipelineWorkerPoolOptions.java    | 18 +++++++++++
 .../runners/DataflowPipelineTranslator.java   |  7 ++--
 .../cloud/dataflow/sdk/util/NoopStager.java   | 32 +++++++++++++++++--
 .../DataflowPipelineTranslatorTest.java       |  2 +-
 4 files changed, 50 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index 9c06e6f3acb29..af1f7667177c0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -179,4 +179,22 @@ public String getTeardownPolicyName() {
       + "The default value is all files from the classpath.")
   List<String> getFilesToStage();
   void setFilesToStage(List<String> value);
+
+  /**
+   * Specifies what type of worker pool should be used.
+   * <p> This is an internal option, and should not be set when using the Dataflow service.
+   */
+  @Description("Specifies what type of worker pool should be used. Should never be modified when "
+      + "using the Dataflow service")
+  @Default.String("harness")
+  String getWorkerPoolType();
+  void setWorkerPoolType(String value);
+
+  /**
+   * Specifies what type of persistent disk should be used.
+   */
+  @Description("Specifies what type of persistent disk should be used.")
+  @Default.String("compute.googleapis.com/projects//zones//diskTypes/pd-standard")
+  String getWorkerDiskType();
+  void setWorkerDiskType(String value);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 446c8efe9069e..4f738920a60e2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -107,7 +107,6 @@
 @SuppressWarnings({"rawtypes", "unchecked"})
 public class DataflowPipelineTranslator {
   // Must be kept in sync with their internal counterparts.
-  public static final String HARNESS_WORKER_POOL = "harness";
   private static final Logger LOG = LoggerFactory.getLogger(DataflowPipelineTranslator.class);
   private static final ObjectMapper MAPPER = new ObjectMapper();
 
@@ -352,7 +351,7 @@ public Job translate(List<DataflowPackage> packages) {
 
       WorkerPool workerPool = new WorkerPool();
 
-      workerPool.setKind(HARNESS_WORKER_POOL);
+      workerPool.setKind(options.getWorkerPoolType());
       if (options.getTeardownPolicy() != null) {
         workerPool.setTeardownPolicy(options.getTeardownPolicy().getTeardownPolicyName());
       }
@@ -406,9 +405,7 @@ public Job translate(List<DataflowPackage> packages) {
         // Use separate data disk for streaming.
         Disk disk = new Disk();
         disk.setSizeGb(10);
-        disk.setDiskType(
-            // TODO: Fill in the project and zone.
-            "compute.googleapis.com/projects//zones//diskTypes/pd-standard");
+        disk.setDiskType(options.getWorkerDiskType());
         // TODO: introduce a separate location for Windmill binary in the
         // TaskRunner so it wouldn't interfere with the data disk mount point.
         disk.setMountPoint("/windmill");
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopStager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopStager.java
index ae7d19a0f2d6f..306a9d6606bcc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopStager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopStager.java
@@ -17,21 +17,47 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 
+import java.io.File;
 import java.util.ArrayList;
 import java.util.List;
 
 /**
- * Do-nothing stager class. stageFiles() does nothing and returns an empty list of packages.
+ * Do-nothing stager class. stageFiles() returns a package list for all files in
+ * options.getFilesToStage in their original locations.
  */
 class NoopStager implements Stager {
+  private DataflowPipelineOptions options;
+
+  private NoopStager(DataflowPipelineOptions options) {
+      this.options = options;
+  }
+
   public static NoopStager fromOptions(PipelineOptions options) {
-    return new NoopStager();
+    return new NoopStager(options.as(DataflowPipelineOptions.class));
   }
 
   @Override
   public List<DataflowPackage> stageFiles() {
-    return new ArrayList<DataflowPackage>();
+    ArrayList<DataflowPackage> packages = new ArrayList<>();
+    for (String fileName : options.getFilesToStage()) {
+      String packageName = null;
+      if (fileName.contains("=")) {
+        String[] components = fileName.split("=", 2);
+        packageName = components[0];
+        fileName = components[1];
+      }
+      if (packageName == null) {
+        packageName = PackageUtil.getUniqueContentName(new File(fileName), "");
+      }
+
+      DataflowPackage workflowPackage = new DataflowPackage();
+      workflowPackage.setName(packageName);
+      workflowPackage.setLocation(fileName);
+      packages.add(workflowPackage);
+    }
+    return packages;
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 90f3564ba1d8e..3921ad0547b3e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -216,7 +216,7 @@ public void testDebuggerConfig() throws IOException {
         p, Collections.<DataflowPackage>emptyList());
 
     for (WorkerPool pool : job.getEnvironment().getWorkerPools()) {
-      if (pool.getKind() == DataflowPipelineTranslator.HARNESS_WORKER_POOL) {
+      if ("harness".equals(pool.getKind())) {
         assertEquals(pool.getMetadata().get("debugger"), expectedConfig);
       }
     }

From 5a7f838eccbc8cec3ec7c927c59c83ac7e6eb448 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Tue, 24 Mar 2015 21:21:40 -0700
Subject: [PATCH 0308/1541] Modify the [Keyed]CombineFn to return the new
 accumulator value rather than forcing the implementor to mutate it.  Mutating
 and returning the accumulator is still allowed (and in some cases more
 natural/efficient).

----Release Notes----

This is backwards incompatible as the signature of [Keyed]CombineFn.addInput has changed,
but should be an easy fix.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89468337
---
 .../sdk/runners/DirectPipelineRunner.java     |  2 +-
 .../sdk/runners/worker/CombineValuesFn.java   |  2 +-
 .../worker/GroupAlsoByWindowsParDoFn.java     | 18 +++--
 .../worker/MapTaskExecutorFactory.java        |  3 +-
 .../sdk/transforms/ApproximateUnique.java     |  3 +-
 .../dataflow/sdk/transforms/Combine.java      | 80 ++++++++++---------
 .../cloud/dataflow/sdk/transforms/Sample.java |  5 +-
 .../sdk/transforms/ApproximateUniqueTest.java | 33 +++-----
 .../dataflow/sdk/transforms/CombineTest.java  |  3 +-
 .../dataflow/sdk/util/AggregatorImplTest.java |  7 +-
 10 files changed, 80 insertions(+), 76 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 929326452f4b5..2f3d700419df3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -287,7 +287,7 @@ public static <K, VA, VI> List<VA> addInputsRandomly(
       boolean hasInput = false;
 
       for (VI value : values) {
-        fn.addInput(key, accumulator, value);
+        accumulator = fn.addInput(key, accumulator, value);
         hasInput = true;
 
         // For each index i, flip a 1/2^i weighted coin for whether to
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
index 256d157d160f2..25685dfb09edb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
@@ -173,7 +173,7 @@ public void processElement(ProcessContext c) {
       K key = kv.getKey();
       VA accum = this.combineFn.createAccumulator(key);
       for (VI input : kv.getValue()) {
-        this.combineFn.addInput(key, accum, input);
+        accum = this.combineFn.addInput(key, accum, input);
       }
 
       c.output(KV.of(key, accum));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index 127f4ed036df6..86c32b2a13b5e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -118,7 +118,6 @@ public static GroupAlsoByWindowsParDoFn create(
     boolean isMergingOnly = true;
     KeyedCombineFn maybeMergingCombineFn;
     if (isMergingOnly && combineFn != null) {
-
       maybeMergingCombineFn = new MergingKeyedCombineFn(combineFn);
     } else {
       maybeMergingCombineFn = combineFn;
@@ -174,20 +173,18 @@ public List<VA> createAccumulator(K key) {
       return new ArrayList<>();
     }
     @Override
-    public void addInput(K key, List<VA> accumulator, VA input) {
+    public List<VA> addInput(K key, List<VA> accumulator, VA input) {
       accumulator.add(input);
       // TODO: Buffer more once we have compaction operation.
       if (accumulator.size() > 1) {
-        VA all = keyedCombineFn.mergeAccumulators(key, accumulator);
-        accumulator.clear();
-        accumulator.add(all);
+        return mergeToSingleton(key, accumulator);
+      } else {
+        return accumulator;
       }
     }
     @Override
     public List<VA> mergeAccumulators(K key, Iterable<List<VA>> accumulators) {
-      List<VA> singleton = new ArrayList<>();
-      singleton.add(keyedCombineFn.mergeAccumulators(key, Iterables.concat(accumulators)));
-      return singleton;
+      return mergeToSingleton(key, Iterables.concat(accumulators));
     }
     @Override
     public VA extractOutput(K key, List<VA> accumulator) {
@@ -197,6 +194,11 @@ public VA extractOutput(K key, List<VA> accumulator) {
         return keyedCombineFn.mergeAccumulators(key, accumulator);
       }
     }
+    private List<VA> mergeToSingleton(K key, Iterable<VA> accumulators) {
+      List<VA> singleton = new ArrayList<>();
+      singleton.add(keyedCombineFn.mergeAccumulators(key, accumulators));
+      return singleton;
+    }
   }
 
   private GroupAlsoByWindowsParDoFn(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index dae317a19ce7c..8cf95d58d1321 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -236,8 +236,7 @@ public VA createAccumulator(K key) {
 
     @Override
     public VA add(K key, VA accumulator, VI value) {
-      this.combineFn.addInput(key, accumulator, value);
-      return accumulator;
+      return this.combineFn.addInput(key, accumulator, value);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
index 0b1d495f04a0b..0af85ace079cb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
@@ -356,9 +356,10 @@ public LargestUnique createAccumulator() {
     }
 
     @Override
-    public void addInput(LargestUnique heap, T input) {
+    public LargestUnique addInput(LargestUnique heap, T input) {
       try {
         heap.add(hash(input, coder));
+        return heap;
       } catch (Throwable e) {
         throw new RuntimeException(e);
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 2c677efa16582..a7aaea70a6c52 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -328,10 +328,12 @@ public abstract static class CombineFn<VI, VA, VO> implements Serializable {
     public abstract VA createAccumulator();
 
     /**
-     * Adds the given input value to the given accumulator,
-     * modifying the accumulator.
+     * Adds the given input value to the given accumulator, returning the
+     * new accumulator value.
+     *
+     * <P> For efficiency, the input accumulator may be modified and returned.
      */
-    public abstract void addInput(VA accumulator, VI input);
+    public abstract VA addInput(VA accumulator, VI input);
 
     /**
      * Returns an accumulator representing the accumulation of all the
@@ -422,8 +424,8 @@ public VA createAccumulator(K key) {
         }
 
         @Override
-        public void addInput(K key, VA accumulator, VI input) {
-          CombineFn.this.addInput(accumulator, input);
+        public VA addInput(K key, VA accumulator, VI input) {
+          return CombineFn.this.addInput(accumulator, input);
         }
 
         @Override
@@ -479,12 +481,13 @@ public Holder<V> createAccumulator() {
     }
 
     @Override
-    public void addInput(Holder<V> accumulator, V input) {
+    public Holder<V> addInput(Holder<V> accumulator, V input) {
       if (accumulator.present) {
         accumulator.set(apply(accumulator.value, input));
       } else {
         accumulator.set(input);
       }
+      return accumulator;
     }
 
     @Override
@@ -588,8 +591,9 @@ public int[] createAccumulator() {
     }
 
     @Override
-    public void addInput(int[] accumulator, Integer input) {
+    public int[] addInput(int[] accumulator, Integer input) {
       accumulator[0] = apply(accumulator[0], input);
+      return accumulator;
     }
 
     @Override
@@ -657,8 +661,9 @@ public long[] createAccumulator() {
     }
 
     @Override
-    public void addInput(long[] accumulator, Long input) {
+    public long[] addInput(long[] accumulator, Long input) {
       accumulator[0] = apply(accumulator[0], input);
+      return accumulator;
     }
 
     @Override
@@ -725,8 +730,9 @@ public double[] createAccumulator() {
     }
 
     @Override
-    public void addInput(double[] accumulator, Double input) {
+    public double[] addInput(double[] accumulator, Double input) {
       accumulator[0] = apply(accumulator[0], input);
+      return accumulator;
     }
 
     @Override
@@ -843,8 +849,9 @@ public abstract static interface Accumulator<VI, VA, VO> {
     }
 
     @Override
-    public final void addInput(VA accumulator, VI input) {
+    public final VA addInput(VA accumulator, VI input) {
       accumulator.addInput(input);
+      return accumulator;
     }
 
     @Override
@@ -964,10 +971,12 @@ public abstract static class KeyedCombineFn<K, VI, VA, VO>
      * Adds the given input value to the given accumulator,
      * modifying the accumulator.
      *
+     * <P> For efficiency, the input accumulator may be modified and returned.
+     *
      * @param key the key that all the accumulated values using the
      * accumulator are associated with
      */
-    public abstract void addInput(K key, VA accumulator, VI value);
+    public abstract VA addInput(K key, VA accumulator, VI value);
 
     /**
      * Returns an accumulator representing the accumulation of all the
@@ -1288,26 +1297,30 @@ public List<V> createAccumulator() {
     }
 
     @Override
-    public void addInput(List<V> accumulator, V input) {
+    public List<V> addInput(List<V> accumulator, V input) {
       accumulator.add(input);
       if (accumulator.size() > BUFFER_SIZE) {
-        V combined = combiner.apply(accumulator);
-        accumulator.clear();
-        accumulator.add(combined);
+        return mergeToSingleton(accumulator);
+      } else {
+        return accumulator;
       }
     }
 
     @Override
     public List<V> mergeAccumulators(Iterable<List<V>> accumulators) {
-      List<V> singleton = new ArrayList<>();
-      singleton.add(combiner.apply(Iterables.concat(accumulators)));
-      return singleton;
+      return mergeToSingleton(Iterables.concat(accumulators));
     }
 
     @Override
     public V extractOutput(List<V> accumulator) {
       return combiner.apply(accumulator);
     }
+
+    private List<V> mergeToSingleton(Iterable<V> values) {
+      List<V> singleton = new ArrayList<>();
+      singleton.add(combiner.apply(values));
+      return singleton;
+    }
   }
 
 
@@ -1454,8 +1467,8 @@ public VA createAccumulator(KV<K, Integer> key) {
               return fn.createAccumulator(key.getKey());
             }
             @Override
-            public void addInput(KV<K, Integer> key, VA accumulator, VI value) {
-              fn.addInput(key.getKey(), accumulator, value);
+            public VA addInput(KV<K, Integer> key, VA accumulator, VI value) {
+              return fn.addInput(key.getKey(), accumulator, value);
             }
             @Override
             public VA mergeAccumulators(KV<K, Integer> key, Iterable<VA> accumulators) {
@@ -1476,29 +1489,24 @@ public Coder<VA> getAccumulatorCoder(
 
       @SuppressWarnings("unchecked")
       final KvCoder<K, VI> inputCoder = ((KvCoder<K, VI>) input.getCoder());
-      // List required because the accumulator must be mutable.
-      KeyedCombineFn<K, VA, List<VA>, VO> hotPostCombine =
-          new KeyedCombineFn<K, VA, List<VA>, VO>() {
+      KeyedCombineFn<K, VA, VA, VO> hotPostCombine =
+          new KeyedCombineFn<K, VA, VA, VO>() {
             @Override
-            public List<VA> createAccumulator(K key) {
-              return new ArrayList<>();
+            public VA createAccumulator(K key) {
+              return fn.createAccumulator(key);
             }
             @Override
-            public void addInput(K key, List<VA> accumulator, VA value) {
-              VA merged = fn.mergeAccumulators(
-                  key, Iterables.concat(accumulator, ImmutableList.of(value)));
-              accumulator.clear();
-              accumulator.add(merged);
+            public VA addInput(K key, VA accumulator, VA value) {
+              return fn.mergeAccumulators(
+                  key, ImmutableList.of(accumulator, value));
             }
             @Override
-            public List<VA> mergeAccumulators(K key, Iterable<List<VA>> accumulators) {
-              List<VA> singleton = new ArrayList<>();
-              singleton.add(fn.mergeAccumulators(key, Iterables.concat(accumulators)));
-              return singleton;
+            public VA mergeAccumulators(K key, Iterable<VA> accumulators) {
+              return fn.mergeAccumulators(key, accumulators);
             }
             @Override
-            public VO extractOutput(K key, List<VA> accumulator) {
-              return fn.extractOutput(key, fn.mergeAccumulators(key, accumulator));
+            public VO extractOutput(K key, VA accumulator) {
+              return fn.extractOutput(key, accumulator);
             }
             @Override
             public Coder<VO> getDefaultOutputCoder(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
index 947712643dbc8..03a352c09cc9e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
@@ -119,9 +119,10 @@ public Top.TopCombineFn<KV<Integer, T>>.Heap createAccumulator() {
     }
 
     @Override
-    public void addInput(Top.TopCombineFn<KV<Integer, T>>.Heap accumulator,
-                         T input) {
+    public Top.TopCombineFn<KV<Integer, T>>.Heap addInput(
+        Top.TopCombineFn<KV<Integer, T>>.Heap accumulator, T input) {
       accumulator.addInput(KV.of(rand.nextInt(), input));
+      return accumulator;
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
index 60fb7c90ba1fe..723e3d5d7c0d0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
@@ -24,10 +24,8 @@
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.coders.DoubleCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.EvaluationResults;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
@@ -265,39 +263,30 @@ private static void verifyEstimate(long uniqueCount, int sampleSize,
    *
    * @param <E> the type of elements in the input PCollection.
    */
-  private static class CountElements<E> extends CombineFn<E, Long[], Long> {
+  private static class CountElements<E> extends CombineFn<E, Long, Long> {
 
     @Override
-    public Long[] createAccumulator() {
-      Long[] accumulator = new Long[1];
-      accumulator[0] = 0L;
-      return accumulator;
+    public Long createAccumulator() {
+      return 0L;
     }
 
     @Override
-    public void addInput(Long[] accumulator, E input) {
-      accumulator[0]++;
+    public Long addInput(Long accumulator, E input) {
+      return accumulator + 1;
     }
 
     @Override
-    public Long[] mergeAccumulators(Iterable<Long[]> accumulators) {
-      Long[] sum = new Long[1];
-      sum[0] = 0L;
-      for (Long[] accumulator : accumulators) {
-        sum[0] += accumulator[0];
+    public Long mergeAccumulators(Iterable<Long> accumulators) {
+      long sum = 0;
+      for (Long accumulator : accumulators) {
+        sum += accumulator;
       }
       return sum;
     }
 
     @Override
-    public Long extractOutput(Long[] accumulator) {
-      return accumulator[0];
-    }
-
-    @Override
-    public Coder<Long[]> getAccumulatorCoder(CoderRegistry registry,
-        Coder<E> inputCoder) {
-      return SerializableCoder.of(Long[].class);
+    public Long extractOutput(Long accumulator) {
+      return accumulator;
     }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 603ab2cab956e..0de560500f69d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -435,8 +435,9 @@ public Set<Integer> createAccumulator() {
     }
 
     @Override
-    public void addInput(Set<Integer> accumulator, Integer input) {
+    public Set<Integer> addInput(Set<Integer> accumulator, Integer input) {
       accumulator.add(input);
+      return accumulator;
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java
index dc9c8e5c7c858..7f51dd6232eab 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java
@@ -188,10 +188,13 @@ public void testUnsupportedCombineFn() throws Exception {
           @Override
           public List<Integer> createAccumulator() { return null; }
           @Override
-          public void addInput(List<Integer> accumulator, Integer input) { }
+          public List<Integer> addInput(List<Integer> accumulator, Integer input) {
+            return null;
+          }
           @Override
           public List<Integer> mergeAccumulators(Iterable<List<Integer>> accumulators) {
-            return null; }
+            return null;
+          }
           @Override
           public Integer extractOutput(List<Integer> accumulator) { return null; }
         },

From 3c56d2f0e7defc04a00bf4bee263bee60860318f Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 24 Mar 2015 22:27:15 -0700
Subject: [PATCH 0309/1541] Change DoFn.ProcessContext#windows to #window.

In order for a DoFn to call ProcessContext#window it must implement
RequiresWindowAccess.

----Release Notes----
Change DoFn.ProcessContext#windows to #window.

In order for a DoFn to call ProcessContext#window it must implement
RequiresWindowAccess.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89471256
---
 .../examples/TopWikipediaSessions.java        | 58 ++++++++++---------
 .../dataflow/examples/WindowingWordCount.java |  6 +-
 .../cloud/dataflow/sdk/transforms/DoFn.java   | 18 ++++--
 .../dataflow/sdk/transforms/RateLimiting.java |  5 +-
 .../dataflow/sdk/util/DoFnProcessContext.java | 13 ++++-
 .../cloud/dataflow/sdk/util/DoFnRunner.java   | 18 +++++-
 .../util/ReifyTimestampAndWindowsDoFn.java    |  5 +-
 .../util/StreamingGroupAlsoByWindowsDoFn.java |  2 +-
 .../dataflow/sdk/util/WindowedValue.java      | 47 +++++++--------
 .../dataflow/sdk/transforms/ParDoTest.java    | 16 ++---
 .../sdk/transforms/join/CoGroupByKeyTest.java |  7 +--
 .../transforms/windowing/WindowingTest.java   | 21 ++++---
 12 files changed, 130 insertions(+), 86 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
index 463429d20fd5d..da45ea3a6ba3b 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
@@ -28,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.options.Validation;
 import com.google.cloud.dataflow.sdk.transforms.Count;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.SerializableComparator;
@@ -130,7 +131,34 @@ public int compare(KV<String, Long> o1, KV<String, Long> o2) {
     }
   }
 
+  static class SessionsToStringsDoFn extends DoFn<KV<String, Long>, KV<String, Long>>
+      implements RequiresWindowAccess {
+
+    private static final long serialVersionUID = 0;
+
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(KV.of(
+          c.element().getKey() + " : " + c.window(), c.element().getValue()));
+    }
+  }
+
+  static class FormatOutputDoFn extends DoFn<List<KV<String, Long>>, String>
+      implements RequiresWindowAccess {
+    private static final long serialVersionUID = 0;
+
+    @Override
+    public void processElement(ProcessContext c) {
+      for (KV<String, Long> item : c.element()) {
+        String session = item.getKey();
+        long count = item.getValue();
+        c.output(session + " : " + count + " : " + ((IntervalWindow) c.window()).start());
+      }
+    }
+  }
+
   static class ComputeTopSessions extends PTransform<PCollection<TableRow>, PCollection<String>> {
+
     private static final long serialVersionUID = 0;
 
     private final double samplingThreshold;
@@ -158,35 +186,9 @@ public void processElement(ProcessContext c) {
 
           .apply(new ComputeSessions())
 
-          .apply(ParDo.named("SessionsToStrings").of(
-              new DoFn<KV<String, Long>, KV<String, Long>>() {
-                private static final long serialVersionUID = 0;
-
-                @Override
-                public void processElement(ProcessContext c) {
-                  c.output(KV.of(
-                      c.element().getKey() + " : "
-                      + c.windows().iterator().next(), c.element().getValue()));
-                }
-              }))
-
+          .apply(ParDo.named("SessionsToStrings").of(new SessionsToStringsDoFn()))
           .apply(new TopPerMonth())
-
-          .apply(ParDo.named("FormatOutput").of(
-              new DoFn<List<KV<String, Long>>, String>() {
-                private static final long serialVersionUID = 0;
-
-                @Override
-                public void processElement(ProcessContext c) {
-                  for (KV<String, Long> item : c.element()) {
-                    String session = item.getKey();
-                    long count = item.getValue();
-                    c.output(
-                        session + " : " + count + " : "
-                        + ((IntervalWindow) c.windows().iterator().next()).start());
-                  }
-                }
-              }));
+          .apply(ParDo.named("FormatOutput").of(new FormatOutputDoFn()));
     }
   }
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
index 578bc96847202..849e3718954e0 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.Count;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
@@ -74,7 +75,8 @@ public void processElement(ProcessContext c) {
   }
 
   /** A DoFn that converts a Word and Count into a printable string. */
-  static class FormatCountsFn extends DoFn<KV<String, Long>, String> {
+  static class FormatCountsFn extends DoFn<KV<String, Long>, String>
+      implements RequiresWindowAccess {
     private static final long serialVersionUID = 0;
 
     @Override
@@ -82,7 +84,7 @@ public void processElement(ProcessContext c) {
       String output = "Element: " + c.element().getKey()
           + " Value: " + c.element().getValue()
           + " Timestamp: " + c.timestamp()
-          + " Windows: (" + c.windows() + ")";
+          + " Window: (" + c.window() + ")";
       c.output(output);
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 56d99d360d24b..140e0913dfc2f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -30,7 +30,6 @@
 
 import java.io.IOException;
 import java.io.Serializable;
-import java.util.Collection;
 import java.util.List;
 
 /**
@@ -229,7 +228,7 @@ public abstract class ProcessContext extends Context {
      * key.
      *
      * @throws UnsupportedOperationException if this {@link DoFn} does
-     * not implement {@link RequiresKeyedState}
+     * not implement {@link RequiresKeyedState}.
      */
     public abstract KeyedState keyedState();
 
@@ -242,12 +241,15 @@ public abstract class ProcessContext extends Context {
     public abstract Instant timestamp();
 
     /**
-     * Returns the set of windows to which the input element has been assigned.
+     * Returns the window into which the input element has been assigned.
      *
      * <p> See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window}
      * for more information.
+     *
+     * @throws UnsupportedOperationException if this {@link DoFn} does
+     * not implement {@link RequiresWindowAccess}.
      */
-    public abstract Collection<? extends BoundedWindow> windows();
+    public abstract BoundedWindow window();
   }
 
   /**
@@ -272,6 +274,14 @@ public Duration getAllowedTimestampSkew() {
    */
   public interface RequiresKeyedState {}
 
+  /**
+   * Interface for signaling that a {@link DoFn} needs to access the window the
+   * element is being processed in, via {@link DoFn.ProcessContext#window}.
+   *
+   * <p> This functionality is experimental and likely to change.
+   */
+  public interface RequiresWindowAccess {}
+
   /**
    * Interface for interacting with keyed state.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
index 1e682bbbc2949..f3f8fc2a01061 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
@@ -30,7 +30,6 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.Collection;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Semaphore;
@@ -322,8 +321,8 @@ public Instant timestamp() {
       }
 
       @Override
-      public Collection<? extends BoundedWindow> windows() {
-        return context.windows();
+      public BoundedWindow window() {
+        return context.window();
       }
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
index e1b988e75ed0b..4b111d2dadfce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresKeyedState;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
@@ -30,6 +31,7 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.collect.Iterables;
 
 import org.joda.time.Instant;
 
@@ -102,6 +104,16 @@ public KeyedState keyedState() {
     return context.stepContext;
   }
 
+
+  @Override
+  public BoundedWindow window() {
+    if (!(fn instanceof RequiresWindowAccess)) {
+      throw new UnsupportedOperationException(
+          "window() is only available in the context of a DoFn marked as RequiresWindow.");
+    }
+    return Iterables.getOnlyElement(windows());
+  }
+
   @Override
   public void output(O output) {
     context.outputWindowedValue(output, windowedValue.getTimestamp(), windowedValue.getWindows());
@@ -151,7 +163,6 @@ public Instant timestamp() {
     return windowedValue.getTimestamp();
   }
 
-  @Override
   public Collection<? extends BoundedWindow> windows() {
     return windowedValue.getWindows();
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 4a060a4aa40e2..bd6e95502975f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -18,6 +18,8 @@
 
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -125,8 +127,22 @@ public void startBundle() {
    * the current element.
    */
   public void processElement(WindowedValue<I> elem) {
-    DoFnProcessContext<I, O> processContext = new DoFnProcessContext<I, O>(fn, context, elem);
 
+    if (elem.getWindows().size() == 1
+        || !RequiresWindowAccess.class.isAssignableFrom(fn.getClass())) {
+      invokeProcessElement(elem);
+    } else {
+      // We could modify the windowed value (and the processContext) to
+      // avoid repeated allocations, but this is more straightforward.
+      for (BoundedWindow window : elem.getWindows()) {
+        invokeProcessElement(WindowedValue.of(
+            elem.getValue(), elem.getTimestamp(), window));
+      }
+    }
+  }
+
+  private void invokeProcessElement(WindowedValue<I> elem) {
+    DoFnProcessContext<I, O> processContext = new DoFnProcessContext<I, O>(fn, context, elem);
     // This can contain user code. Wrap it in case it throws an exception.
     try {
       fn.processElement(processContext);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
index 53e645503681e..c8c66680146e6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
@@ -33,11 +33,14 @@ public class ReifyTimestampAndWindowsDoFn<K, V>
   @Override
   public void processElement(ProcessContext c)
       throws Exception {
+    @SuppressWarnings("unchecked")
+    DoFnProcessContext<KV<K, V>, KV<K, WindowedValue<V>>> context =
+        (DoFnProcessContext<KV<K, V>, KV<K, WindowedValue<V>>>) c;
     KV<K, V> kv = c.element();
     K key = kv.getKey();
     V value = kv.getValue();
     c.output(KV.of(
         key,
-        WindowedValue.of(value, c.timestamp(), c.windows())));
+        WindowedValue.of(value, c.timestamp(), context.windows())));
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 6a840989bde80..bb2e28b876ec8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -108,7 +108,7 @@ public void processElement(ProcessContext context) throws Exception {
             doFnContext,
             new StreamingActiveWindowManager<>(windowFn, doFnContext));
 
-        for (BoundedWindow window : context.windows()) {
+        for (BoundedWindow window : doFnContext.windows()) {
           @SuppressWarnings("unchecked")
           W w = (W) window;
           windowSet.put(w, value);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index c38492d2d58be..af887ebf9b353 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -40,7 +40,6 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.Iterator;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Objects;
@@ -63,35 +62,33 @@ public static <V> WindowedValue<V> of(
       V value,
       Instant timestamp,
       Collection<? extends BoundedWindow> windows) {
-    Iterator<? extends BoundedWindow> windowsIter = windows.iterator();
-    if (BoundedWindow.TIMESTAMP_MIN_VALUE.equals(timestamp)) {
-      if (!windowsIter.hasNext()) {
-        return valueInEmptyWindows(value);
-      }
-      BoundedWindow firstWindow = windowsIter.next();
-      if (!windowsIter.hasNext()
-          && GlobalWindow.INSTANCE.equals(firstWindow)) {
-        return valueInGlobalWindow(value);
-      }
-      return new TimestampedValueInMultipleWindows<>(value, timestamp, windows);
+
+    if (windows.size() == 0 && BoundedWindow.TIMESTAMP_MIN_VALUE.equals(timestamp)) {
+      return valueInEmptyWindows(value);
+    } else if (windows.size() == 1) {
+      return of(value, timestamp, windows.iterator().next());
     } else {
-      if (windowsIter.hasNext()) {
-        BoundedWindow firstWindow = windowsIter.next();
-        if (!windowsIter.hasNext()) {
-          if (GlobalWindow.INSTANCE.equals(firstWindow)) {
-            return new TimestampedValueInGlobalWindow<>(
-                value, timestamp);
-          } else {
-            return new TimestampedValueInSingleWindow<>(
-                value, timestamp, firstWindow);
-          }
-        }
-      }
-      // value in 0 or several windows.
       return new TimestampedValueInMultipleWindows<>(value, timestamp, windows);
     }
   }
 
+  /**
+   * Returns a {@code WindowedValue} with the given value, timestamp, and window.
+   */
+  public static <V> WindowedValue<V> of(
+      V value,
+      Instant timestamp,
+      BoundedWindow window) {
+    boolean isGlobal = GlobalWindow.INSTANCE.equals(window);
+    if (isGlobal && BoundedWindow.TIMESTAMP_MIN_VALUE.equals(timestamp)) {
+      return valueInGlobalWindow(value);
+    } else if (isGlobal) {
+      return new TimestampedValueInGlobalWindow<>(value, timestamp);
+    } else {
+      return new TimestampedValueInSingleWindow<>(value, timestamp, window);
+    }
+  }
+
   /**
    * Returns a {@code WindowedValue} with the given value, default timestamp,
    * and {@code GlobalWindow}.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 9e2a16804ebbb..7ec392abb9edb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -40,6 +40,7 @@
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
@@ -84,6 +85,13 @@ public class ParDoTest implements Serializable {
   @Rule
   public transient ExpectedException thrown = ExpectedException.none();
 
+  private static class PrintingDoFn extends DoFn<String, String> implements RequiresWindowAccess {
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(c.element() + ":" + c.timestamp().getMillis()
+          + ":" + c.window().maxTimestamp().getMillis());
+    }
+  }
 
   static class TestDoFn extends DoFn<Integer, String> {
     enum State { UNSTARTED, STARTED, PROCESSING, FINISHED }
@@ -1173,13 +1181,7 @@ public void finishBundle(Context c) {
                     System.out.println("Finish: 3");
                   }
                 }))
-        .apply(ParDo.of(new DoFn<String, String>() {
-                  @Override
-                  public void processElement(ProcessContext c) {
-                    c.output(c.element() + ":" + c.timestamp().getMillis()
-                        + ":" + c.windows().iterator().next().maxTimestamp().getMillis());
-                  }
-                }));
+        .apply(ParDo.of(new PrintingDoFn()));
 
     DataflowAssert.that(output).containsInAnyOrder("elem:1:1", "start:2:2", "finish:3:3");
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
index 4a75276fb8030..f5ea4c5603509 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
@@ -22,7 +22,6 @@
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.fail;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
@@ -33,6 +32,7 @@
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
 import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
@@ -385,7 +385,7 @@ private <K, V> void checkValuesMatch(
    * results of a CoGroupByKey.
    */
   private static class ClickOfPurchaseFn extends
-      DoFn<KV<Integer, CoGbkResult>, KV<String, String>> {
+      DoFn<KV<Integer, CoGbkResult>, KV<String, String>> implements RequiresWindowAccess {
     private final TupleTag<String> clicksTag;
 
     private final TupleTag<String> purchasesTag;
@@ -399,8 +399,7 @@ private ClickOfPurchaseFn(
 
     @Override
     public void processElement(ProcessContext c) {
-      Preconditions.checkState(c.windows().size() == 1);
-      BoundedWindow w = c.windows().iterator().next();
+      BoundedWindow w = c.window();
       KV<Integer, CoGbkResult> e = c.element();
       CoGbkResult row = e.getValue();
       Iterable<String> clicks = row.getAll(clicksTag);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
index f37eaf30d1d1d..911d47a9e66ec 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
@@ -24,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.transforms.Count;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
 import com.google.cloud.dataflow.sdk.transforms.Flatten;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -56,6 +57,15 @@ public class WindowingTest implements Serializable {
   public TemporaryFolder tmpFolder = new TemporaryFolder();
 
   private static class WindowedCount extends PTransform<PCollection<String>, PCollection<String>> {
+
+    private final class FormatCountsDoFn
+        extends DoFn<KV<String, Long>, String> implements RequiresWindowAccess {
+      @Override
+          public void processElement(ProcessContext c) {
+        c.output(c.element().getKey() + ":" + c.element().getValue()
+            + ":" + c.timestamp().getMillis() + ":" + c.window());
+      }
+    }
     private WindowFn<Object, ?> windowFn;
     public WindowedCount(WindowFn<? super String, ?> windowFn) {
       this.windowFn = (WindowFn) windowFn;
@@ -66,21 +76,14 @@ public PCollection<String> apply(PCollection<String> in) {
           .apply(Window.named("Window").<String>into(windowFn))
           .apply(Count.<String>perElement())
           .apply(ParDo
-              .named("FormatCounts")
-              .of(new DoFn<KV<String, Long>, String>() {
-                    @Override
-                        public void processElement(ProcessContext c) {
-                      c.output(c.element().getKey() + ":" + c.element().getValue()
-                          + ":" + c.timestamp().getMillis() + ":" + c.windows());
-                    }
-                  }))
+              .named("FormatCounts").of(new FormatCountsDoFn()))
           .setCoder(StringUtf8Coder.of());
     }
   }
 
   private String output(String value, int count, int timestamp, int windowStart, int windowEnd) {
     return value + ":" + count + ":" + timestamp
-        + ":[[" + new Instant(windowStart) + ".." + new Instant(windowEnd) + ")]";
+        + ":[" + new Instant(windowStart) + ".." + new Instant(windowEnd) + ")";
   }
 
   @Test

From 5b08366e3c8022f3f16460b561ea7b72ead175e7 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 25 Mar 2015 09:02:16 -0700
Subject: [PATCH 0310/1541] Improve CounterSetTest Tests

Existing tests tested all functionality in a single test. The new tests
target individual features in each test, to more effectively document
the behavior of CounterSet

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89505421
---
 .../sdk/util/common/CounterSetTest.java       | 158 ++++++++++++++----
 1 file changed, 122 insertions(+), 36 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterSetTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterSetTest.java
index 4286175892a91..5eb7dd1a4bc9d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterSetTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterSetTest.java
@@ -19,12 +19,16 @@
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MAX;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SET;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-import static org.junit.Assert.assertEquals;
+import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
 
+import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -33,43 +37,125 @@
  */
 @RunWith(JUnit4.class)
 public class CounterSetTest {
+  private CounterSet set;
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Before
+  public void setup() {
+    set = new CounterSet();
+  }
+
+  @Test
+  public void testAddWithDifferentNamesAddsAll() {
+    Counter<?> c1 = Counter.longs("c1", SUM);
+    Counter<?> c2 = Counter.ints("c2", MAX);
+
+    boolean c1Add = set.add(c1);
+    boolean c2Add = set.add(c2);
+
+    assertTrue(c1Add);
+    assertTrue(c2Add);
+    assertThat(set, containsInAnyOrder(c1, c2));
+  }
+
+  @Test
+  public void testAddWithAlreadyPresentNameReturnsFalse() {
+    Counter<?> c1 = Counter.longs("c1", SUM);
+    Counter<?> c1Dup = Counter.longs("c1", SUM);
+
+    boolean c1Add = set.add(c1);
+    boolean c1DupAdd = set.add(c1Dup);
+
+    assertTrue(c1Add);
+    assertFalse(c1DupAdd);
+    assertThat(set, containsInAnyOrder((Counter) c1));
+  }
+
+  @Test
+  public void testAddOrReuseWithAlreadyPresentReturnsPresent() {
+    Counter<?> c1 = Counter.longs("c1", SUM);
+    Counter<?> c1Dup = Counter.longs("c1", SUM);
+
+    Counter<?> c1AddResult = set.addOrReuseCounter(c1);
+    Counter<?> c1DupAddResult = set.addOrReuseCounter(c1Dup);
+
+    assertSame(c1, c1AddResult);
+    assertSame(c1AddResult, c1DupAddResult);
+    assertThat(set, containsInAnyOrder((Counter) c1));
+  }
+
+  @Test
+  public void testAddOrReuseWithNoCounterReturnsProvided() {
+    Counter<?> c1 = Counter.longs("c1", SUM);
+
+    Counter<?> c1AddResult = set.addOrReuseCounter(c1);
+
+    assertSame(c1, c1AddResult);
+    assertThat(set, containsInAnyOrder((Counter) c1));
+  }
+
+  @Test
+  public void testAddOrReuseWithIncompatibleTypesThrowsException() {
+    Counter<?> c1 = Counter.longs("c1", SUM);
+    Counter<?> c1Incompatible = Counter.ints("c1", MAX);
+
+    set.addOrReuseCounter(c1);
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Counter " + c1Incompatible
+        + " duplicates incompatible counter " + c1 + " in " + set);
+
+    set.addOrReuseCounter(c1Incompatible);
+  }
+
   @Test
-  public void testSet() {
-    CounterSet set = new CounterSet();
-    assertTrue(set.add(Counter.longs("c1", SUM)));
-    assertFalse(set.add(Counter.longs("c1", SUM)));
-    assertTrue(set.add(Counter.longs("c2", MAX)));
-    assertEquals(2, set.size());
+  public void testAddCounterMutatorAddCounterAddsCounter() {
+    Counter<?> c1 = Counter.longs("c1", SUM);
+
+    Counter<?> addC1Result = set.getAddCounterMutator().addCounter(c1);
+
+    assertSame(c1, addC1Result);
+    assertThat(set, containsInAnyOrder((Counter) c1));
   }
 
   @Test
-  public void testAddCounterMutator() {
-    CounterSet set = new CounterSet();
-    Counter c1 = Counter.longs("c1", SUM);
-    Counter c1SecondInstance = Counter.longs("c1", SUM);
-    Counter c1IncompatibleInstance = Counter.longs("c1", SET);
-    Counter c2 = Counter.longs("c2", MAX);
-    Counter c2IncompatibleInstance = Counter.doubles("c2", MAX);
-
-    assertEquals(c1, set.getAddCounterMutator().addCounter(c1));
-    assertEquals(c2, set.getAddCounterMutator().addCounter(c2));
-
-    assertEquals(c1, set.getAddCounterMutator().addCounter(c1SecondInstance));
-
-    try {
-      set.getAddCounterMutator().addCounter(c1IncompatibleInstance);
-      fail("should have failed");
-    } catch (IllegalArgumentException exn) {
-      // Expected.
-    }
-
-    try {
-      set.getAddCounterMutator().addCounter(c2IncompatibleInstance);
-      fail("should have failed");
-    } catch (IllegalArgumentException exn) {
-      // Expected.
-    }
-
-    assertEquals(2, set.size());
+  public void testAddCounterMutatorAddEqualCounterReusesCounter() {
+    Counter<?> c1 = Counter.longs("c1", SUM);
+    Counter<?> c1dup = Counter.longs("c1", SUM);
+
+    Counter<?> addC1Result = set.getAddCounterMutator().addCounter(c1);
+    Counter<?> addC1DupResult = set.getAddCounterMutator().addCounter(c1dup);
+
+    assertThat(set, containsInAnyOrder((Counter) c1));
+    assertSame(c1, addC1Result);
+    assertSame(c1, addC1DupResult);
   }
+
+  @Test
+  public void testAddCounterMutatorIncompatibleTypesThrowsException() {
+    Counter<?> c1 = Counter.longs("c1", SUM);
+    Counter<?> c1Incompatible = Counter.longs("c1", SET);
+
+    set.getAddCounterMutator().addCounter(c1);
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Counter " + c1Incompatible
+        + " duplicates incompatible counter " + c1 + " in " + set);
+
+    set.getAddCounterMutator().addCounter(c1Incompatible);
+  }
+
+  @Test
+  public void testAddCounterMutatorAddMultipleCounters() {
+    Counter<?> c1 = Counter.longs("c1", SUM);
+    Counter<?> c2 = Counter.longs("c2", MAX);
+
+    set.getAddCounterMutator().addCounter(c1);
+    set.getAddCounterMutator().addCounter(c2);
+
+    assertThat(set, containsInAnyOrder(c1, c2));
+  }
+
 }

From b6ba98101c09c089e878ee3b2d200c16edc69830 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 25 Mar 2015 09:52:17 -0700
Subject: [PATCH 0311/1541] Introduce an interface for the DoFns that implement
 windowing to use.

This interface is accessed via DoFn.ProcessContext#windowingInternals().

Move the actual implementation classes into DoFnRunner to prevent
down-casting.

----Release Notes----
Update windowing DoFn's to use an extra interface for accessing the
special functionality that must be avaliable to actually implemnet
windowing.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89509966
---
 .../cloud/dataflow/sdk/transforms/DoFn.java   |  53 +++
 .../dataflow/sdk/transforms/RateLimiting.java |   5 +
 .../dataflow/sdk/util/AbstractWindowSet.java  |   7 +-
 .../dataflow/sdk/util/AssignWindowsDoFn.java  |  11 +-
 .../dataflow/sdk/util/BufferingWindowSet.java |  15 +-
 .../dataflow/sdk/util/CombiningWindowSet.java |   5 +-
 .../cloud/dataflow/sdk/util/DoFnContext.java  | 267 -----------
 .../dataflow/sdk/util/DoFnProcessContext.java | 186 --------
 .../cloud/dataflow/sdk/util/DoFnRunner.java   | 438 ++++++++++++++++++
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  31 +-
 .../sdk/util/PartitionBufferingWindowSet.java |   9 +-
 .../util/ReifyTimestampAndWindowsDoFn.java    |   5 +-
 .../util/StreamingGroupAlsoByWindowsDoFn.java |  28 +-
 13 files changed, 544 insertions(+), 516 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 140e0913dfc2f..e1e385102bae7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -30,6 +30,7 @@
 
 import java.io.IOException;
 import java.io.Serializable;
+import java.util.Collection;
 import java.util.List;
 
 /**
@@ -250,6 +251,11 @@ public abstract class ProcessContext extends Context {
      * not implement {@link RequiresWindowAccess}.
      */
     public abstract BoundedWindow window();
+
+    /**
+     * Returns the process context to use for implementing windowing.
+     */
+    public abstract WindowingInternals<I, O> windowingInternals();
   }
 
   /**
@@ -327,6 +333,53 @@ public interface KeyedState {
     public CodedTupleTagMap lookup(List<? extends CodedTupleTag<?>> tags) throws IOException;
   }
 
+  /**
+   * Interface that may be required by some (internal) {@code DoFn}s to implement windowing.
+   * @param <I> input type
+   * @param <O> output type
+   */
+  public interface WindowingInternals<I, O> {
+    void outputWindowedValue(O output, Instant timestamp,
+        Collection<? extends BoundedWindow> windows);
+
+    /**
+     * Writes the provided value to the list of values in stored state corresponding to the
+     * provided tag.
+     *
+     * @throws IOException if encoding the given value fails
+     */
+    <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp) throws IOException;
+
+    /**
+     * Deletes the list corresponding to the given tag.
+     */
+    <T> void deleteTagList(CodedTupleTag<T> tag);
+
+    /**
+     * Reads the elements of the list in stored state corresponding to the provided tag.
+     *
+     * @throws IOException if decoding any of the requested values fails
+     */
+    <T> Iterable<T> readTagList(CodedTupleTag<T> tag) throws IOException;
+
+    /**
+     * Writes out a timer to be fired when the watermark reaches the given
+     * timestamp.  Timers are identified by their name, and can be moved
+     * by calling {@code setTimer} again, or deleted with {@link #deleteTimer}.
+     */
+    void setTimer(String timer, Instant timestamp);
+
+    /**
+     * Deletes the given timer.
+     */
+    void deleteTimer(String timer);
+
+    /**
+     * Access the windows the element is being processed in without "exploding" it.
+     */
+    Collection<? extends BoundedWindow> windows();
+  }
+
   /////////////////////////////////////////////////////////////////////////////
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
index f3f8fc2a01061..075d486f5a28b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
@@ -324,6 +324,11 @@ public Instant timestamp() {
       public BoundedWindow window() {
         return context.window();
       }
+
+      @Override
+      public WindowingInternals<I, O> windowingInternals() {
+        return context.windowingInternals();
+      }
     }
 
     private final DoFn<I, O> doFn;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
index b94b11211a4ab..2ffc6c8f434cf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -79,14 +80,14 @@ public WindowMergeContext(
   protected final K key;
   protected final WindowFn<?, W> windowFn;
   protected final Coder<VI> inputCoder;
-  protected final DoFnProcessContext<?, KV<K, VO>> context;
+  protected final DoFn<?, KV<K, VO>>.ProcessContext context;
   protected final ActiveWindowManager<W> activeWindowManager;
 
   protected AbstractWindowSet(
       K key,
       WindowFn<?, W> windowFn,
       Coder<VI> inputCoder,
-      DoFnProcessContext<?, KV<K, VO>> context,
+      DoFn<?, KV<K, VO>>.ProcessContext context,
       ActiveWindowManager<W> activeWindowManager) {
     this.key = key;
     this.windowFn = windowFn;
@@ -158,7 +159,7 @@ protected AbstractWindowSet(
   public void markCompleted(W window) throws Exception {
     VO value = finalValue(window);
     remove(window);
-    context.outputWindowedValue(
+    context.windowingInternals().outputWindowedValue(
         KV.of(key, value),
         window.maxTimestamp(),
         Arrays.asList(window));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
index 2ea1da6bacc02..c109af0161157 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
@@ -40,27 +40,26 @@ public AssignWindowsDoFn(WindowFn<? super T, W> fn) {
 
   @Override
   @SuppressWarnings("unchecked")
-  public void processElement(ProcessContext c) throws Exception {
-    final DoFnProcessContext<T, T> context = (DoFnProcessContext<T, T>) c;
+  public void processElement(final ProcessContext c) throws Exception {
     Collection<W> windows =
         ((WindowFn<T, W>) fn).assignWindows(
             ((WindowFn<T, W>) fn).new AssignContext() {
                 @Override
                 public T element() {
-                  return context.element();
+                  return c.element();
                 }
 
                 @Override
                 public Instant timestamp() {
-                  return context.timestamp();
+                  return c.timestamp();
                 }
 
                 @Override
                 public Collection<? extends BoundedWindow> windows() {
-                  return context.windows();
+                  return c.windowingInternals().windows();
                 }
               });
 
-    context.outputWindowedValue(context.element(), context.timestamp(), windows);
+    c.windowingInternals().outputWindowedValue(c.element(), c.timestamp(), windows);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
index 314920fadc79b..3b3ea8427430c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.MapCoder;
 import com.google.cloud.dataflow.sdk.coders.SetCoder;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
@@ -72,12 +73,12 @@ protected BufferingWindowSet(
       K key,
       WindowFn<?, W> windowFn,
       Coder<V> inputCoder,
-      DoFnProcessContext<?, KV<K, Iterable<V>>> context,
+      DoFn<?, KV<K, Iterable<V>>>.ProcessContext context,
       ActiveWindowManager<W> activeWindowManager) throws Exception {
     super(key, windowFn, inputCoder, context, activeWindowManager);
 
     mergeTree = emptyIfNull(
-        context.context.stepContext.lookup(Arrays.asList(mergeTreeTag))
+        context.keyedState().lookup(Arrays.asList(mergeTreeTag))
         .get(mergeTreeTag));
 
     originalMergeTree = deepCopy(mergeTree);
@@ -85,7 +86,7 @@ protected BufferingWindowSet(
 
   @Override
   public void put(W window, V value) throws Exception {
-    context.context.stepContext.writeToTagList(
+    context.windowingInternals().writeToTagList(
         bufferTag(window, windowFn.windowCoder(), inputCoder),
         value,
         context.timestamp());
@@ -99,10 +100,10 @@ public void put(W window, V value) throws Exception {
   public void remove(W window) throws Exception {
     Set<W> subWindows = mergeTree.get(window);
     for (W w : subWindows) {
-      context.context.stepContext.deleteTagList(
+      context.windowingInternals().deleteTagList(
           bufferTag(w, windowFn.windowCoder(), inputCoder));
     }
-    context.context.stepContext.deleteTagList(
+    context.windowingInternals().deleteTagList(
         bufferTag(window, windowFn.windowCoder(), inputCoder));
     mergeTree.remove(window);
     activeWindowManager.removeWindow(window);
@@ -159,7 +160,7 @@ protected Iterable<V> finalValue(W window) throws Exception {
     }
 
     for (W curWindow : curWindows) {
-      Iterable<V> items = context.context.stepContext.readTagList(bufferTag(
+      Iterable<V> items = context.windowingInternals().readTagList(bufferTag(
           curWindow, windowFn.windowCoder(), inputCoder));
       for (V item : items) {
         toEmit.add(item);
@@ -172,7 +173,7 @@ protected Iterable<V> finalValue(W window) throws Exception {
   @Override
   public void flush() throws Exception {
     if (!mergeTree.equals(originalMergeTree)) {
-      context.context.stepContext.store(mergeTreeTag, mergeTree);
+      context.keyedState().store(mergeTreeTag, mergeTree);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
index d726551f80f16..a2a96d3d5dc5e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
@@ -62,7 +63,7 @@ protected CombiningWindowSet(
       KeyedCombineFn<K, VI, VA, VO> combineFn,
       Coder<K> keyCoder,
       Coder<VI> inputValueCoder,
-      DoFnProcessContext<?, KV<K, VO>> context,
+      DoFn<?, KV<K, VO>>.ProcessContext context,
       ActiveWindowManager<W> activeWindowManager) throws Exception {
     super(key, windowFn, inputValueCoder, context, activeWindowManager);
     this.combineFn = combineFn;
@@ -83,7 +84,7 @@ protected CombiningWindowSet(
       KeyedCombineFn<K, VI, VA, VO> combineFn,
       Coder<K> keyCoder,
       Coder<VI> inputValueCoder,
-      DoFnProcessContext<?, KV<K, VO>> context,
+      DoFn<?, KV<K, VO>>.ProcessContext context,
       ActiveWindowManager<W> activeWindowManager) throws Exception {
     return new CombiningWindowSet<K, VI, VA, VO, W>(
         key, windowFn, combineFn, keyCoder, inputValueCoder, context, activeWindowManager);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
deleted file mode 100644
index 9a3729e743d9c..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Combine;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn.AssignContext;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner.OutputManager;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.base.Predicate;
-import com.google.common.collect.Iterables;
-
-import org.joda.time.Instant;
-
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * A concrete implementation of {@link DoFn<I, O>.Context} used for running
- * a {@link DoFn}.
- *
- * @param <I> the type of the DoFn's (main) input elements
- * @param <O> the type of the DoFn's (main) output elements
- * @param <R> the type of object which receives outputs
- */
-class DoFnContext<I, O, R> extends DoFn<I, O>.Context {
-  private static final int MAX_SIDE_OUTPUTS = 1000;
-
-  final PipelineOptions options;
-  final DoFn<I, O> fn;
-  final PTuple sideInputs;
-  final Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
-  final OutputManager<R> outputManager;
-  final Map<TupleTag<?>, R> outputMap;
-  final TupleTag<O> mainOutputTag;
-  final StepContext stepContext;
-  final CounterSet.AddCounterMutator addCounterMutator;
-  final WindowFn windowFn;
-
-  public DoFnContext(PipelineOptions options,
-                     DoFn<I, O> fn,
-                     PTuple sideInputs,
-                     OutputManager<R> outputManager,
-                     TupleTag<O> mainOutputTag,
-                     List<TupleTag<?>> sideOutputTags,
-                     StepContext stepContext,
-                     CounterSet.AddCounterMutator addCounterMutator,
-                     WindowFn windowFn) {
-    fn.super();
-    this.options = options;
-    this.fn = fn;
-    this.sideInputs = sideInputs;
-    this.sideInputCache = new HashMap<>();
-    this.outputManager = outputManager;
-    this.mainOutputTag = mainOutputTag;
-    this.outputMap = new HashMap<>();
-    outputMap.put(mainOutputTag, outputManager.initialize(mainOutputTag));
-    for (TupleTag<?> sideOutputTag : sideOutputTags) {
-      outputMap.put(sideOutputTag, outputManager.initialize(sideOutputTag));
-    }
-    this.stepContext = stepContext;
-    this.addCounterMutator = addCounterMutator;
-    this.windowFn = windowFn;
-  }
-
-  public R getReceiver(TupleTag<?> tag) {
-    R receiver = outputMap.get(tag);
-    if (receiver == null) {
-      throw new IllegalArgumentException(
-          "calling getReceiver() with unknown tag " + tag);
-    }
-    return receiver;
-  }
-
-  //////////////////////////////////////////////////////////////////////////////
-
-  @Override
-  public PipelineOptions getPipelineOptions() {
-    return options;
-  }
-
-  @SuppressWarnings("unchecked")
-  <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
-    TupleTag<?> tag = view.getTagInternal();
-    Map<BoundedWindow, Object> tagCache = sideInputCache.get(tag);
-    if (tagCache == null) {
-      if (!sideInputs.has(tag)) {
-        throw new IllegalArgumentException(
-            "calling sideInput() with unknown view; " +
-            "did you forget to pass the view in " +
-            "ParDo.withSideInputs()?");
-      }
-      tagCache = new HashMap<>();
-      sideInputCache.put(tag, tagCache);
-    }
-
-    final BoundedWindow sideInputWindow =
-        view.getWindowFnInternal().getSideInputWindow(mainInputWindow);
-
-    T result = (T) tagCache.get(sideInputWindow);
-
-    // TODO: Consider partial prefetching like in CoGBK to reduce iteration cost.
-    if (result == null) {
-      if (windowFn instanceof GlobalWindows) {
-        result = view.fromIterableInternal((Iterable<WindowedValue<?>>) sideInputs.get(tag));
-      } else {
-        result = view.fromIterableInternal(Iterables.filter(
-            (Iterable<WindowedValue<?>>) sideInputs.get(tag),
-            new Predicate<WindowedValue<?>>() {
-              @Override
-              public boolean apply(WindowedValue<?> element) {
-                return element.getWindows().contains(sideInputWindow);
-              }
-            }));
-      }
-      tagCache.put(sideInputWindow, result);
-    }
-
-    return result;
-  }
-
-  <T> WindowedValue<T> makeWindowedValue(
-      T output, Instant timestamp, Collection<? extends BoundedWindow> windows) {
-    final Instant inputTimestamp = timestamp;
-
-    if (timestamp == null) {
-      timestamp = BoundedWindow.TIMESTAMP_MIN_VALUE;
-    }
-
-    if (windows == null) {
-      try {
-        windows = windowFn.assignWindows(windowFn.new AssignContext() {
-            @Override
-            public Object element() {
-              throw new UnsupportedOperationException(
-                  "WindowFn attemped to access input element when none was available");
-            }
-
-            @Override
-            public Instant timestamp() {
-              if (inputTimestamp == null) {
-                throw new UnsupportedOperationException(
-                    "WindowFn attemped to access input timestamp when none was available");
-              }
-              return inputTimestamp;
-            }
-
-            @Override
-            public Collection<? extends BoundedWindow> windows() {
-              throw new UnsupportedOperationException(
-                  "WindowFn attemped to access input windows when none were available");
-            }
-          });
-      } catch (Exception e) {
-        throw new RuntimeException(e);
-      }
-    }
-
-    return WindowedValue.of(output, timestamp, windows);
-  }
-
-  void outputWindowedValue(
-      O output,
-      Instant timestamp,
-      Collection<? extends BoundedWindow> windows) {
-    WindowedValue<O> windowedElem = makeWindowedValue(output, timestamp, windows);
-    outputManager.output(outputMap.get(mainOutputTag), windowedElem);
-    if (stepContext != null) {
-      stepContext.noteOutput(windowedElem);
-    }
-  }
-
-  protected <T> void sideOutputWindowedValue(TupleTag<T> tag,
-                                             T output,
-                                             Instant timestamp,
-                                             Collection<? extends BoundedWindow> windows) {
-    R receiver = outputMap.get(tag);
-    if (receiver == null) {
-      // This tag wasn't declared nor was it seen before during this execution.
-      // Thus, this must be a new, undeclared and unconsumed output.
-
-      // To prevent likely user errors, enforce the limit on the number of side
-      // outputs.
-      if (outputMap.size() >= MAX_SIDE_OUTPUTS) {
-        throw new IllegalArgumentException(
-            "the number of side outputs has exceeded a limit of "
-            + MAX_SIDE_OUTPUTS);
-      }
-
-      // Register the new TupleTag with outputManager and add an entry for it in
-      // the outputMap.
-      receiver = outputManager.initialize(tag);
-      outputMap.put(tag, receiver);
-    }
-
-    WindowedValue<T> windowedElem = makeWindowedValue(output, timestamp, windows);
-    outputManager.output(receiver, windowedElem);
-    if (stepContext != null) {
-      stepContext.noteSideOutput(tag, windowedElem);
-    }
-  }
-
-  // Following implementations of output, outputWithTimestamp, and sideOutput
-  // are only accessible in DoFn.startBundle and DoFn.finishBundle, and will be shadowed by
-  // ProcessContext's versions in DoFn.processElement.
-  @Override
-  public void output(O output) {
-    outputWindowedValue(output, null, null);
-  }
-
-  @Override
-  public void outputWithTimestamp(O output, Instant timestamp) {
-    outputWindowedValue(output, timestamp, null);
-  }
-
-  @Override
-  public <T> void sideOutput(TupleTag<T> tag, T output) {
-    sideOutputWindowedValue(tag, output, null, null);
-  }
-
-  @Override
-  public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
-    sideOutputWindowedValue(tag, output, timestamp, null);
-  }
-
-  private String generateInternalAggregatorName(String userName) {
-    return "user-" + stepContext.getStepName() + "-" + userName;
-  }
-
-  @Override
-  public <AI, AA, AO> Aggregator<AI> createAggregator(
-      String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
-    return new AggregatorImpl<>(generateInternalAggregatorName(name), combiner, addCounterMutator);
-  }
-
-  @Override
-  public <AI, AO> Aggregator<AI> createAggregator(
-      String name, SerializableFunction<Iterable<AI>, AO> combiner) {
-    return new AggregatorImpl<AI, Iterable<AI>, AO>(
-        generateInternalAggregatorName(name), combiner, addCounterMutator);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
deleted file mode 100644
index 4b111d2dadfce..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.util.Preconditions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Combine;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresKeyedState;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.collect.Iterables;
-
-import org.joda.time.Instant;
-
-import java.util.Collection;
-import java.util.Iterator;
-
-/**
- * A concrete implementation of {@link DoFn<I, O>.ProcessContext} used for running
- * a {@link DoFn} over a single element.
- *
- * @param <I> the type of the DoFn's (main) input elements
- * @param <O> the type of the DoFn's (main) output elements
- */
-class DoFnProcessContext<I, O> extends DoFn<I, O>.ProcessContext {
-
-  final DoFn<I, O> fn;
-  final DoFnContext<I, O, ?> context;
-  final WindowedValue<I> windowedValue;
-
-  public DoFnProcessContext(DoFn<I, O> fn,
-                            DoFnContext<I, O, ?> context,
-                            WindowedValue<I> windowedValue) {
-    fn.super();
-    this.fn = fn;
-    this.context = context;
-    this.windowedValue = windowedValue;
-  }
-
-  @Override
-  public PipelineOptions getPipelineOptions() {
-    return context.getPipelineOptions();
-  }
-
-  @Override
-  public I element() {
-    return windowedValue.getValue();
-  }
-
-  @Override
-  public <T> T sideInput(PCollectionView<T> view) {
-    Iterator<? extends BoundedWindow> windowIter = windows().iterator();
-    BoundedWindow window;
-    if (!windowIter.hasNext()) {
-      if (context.windowFn instanceof GlobalWindows) {
-        // TODO: Remove this once GroupByKeyOnly no longer outputs elements
-        // without windows
-        window = GlobalWindow.INSTANCE;
-      } else {
-        throw new IllegalStateException(
-            "sideInput called when main input element is not in any windows");
-      }
-    } else {
-      window = windowIter.next();
-      if (windowIter.hasNext()) {
-        throw new IllegalStateException(
-            "sideInput called when main input element is in multiple windows");
-      }
-    }
-    return context.sideInput(view, window);
-  }
-
-  @Override
-  public KeyedState keyedState() {
-    if (!(fn instanceof RequiresKeyedState)
-        || !equivalentToKV(element())) {
-      throw new UnsupportedOperationException(
-          "Keyed state is only available in the context of a keyed DoFn marked as requiring state");
-    }
-
-    return context.stepContext;
-  }
-
-
-  @Override
-  public BoundedWindow window() {
-    if (!(fn instanceof RequiresWindowAccess)) {
-      throw new UnsupportedOperationException(
-          "window() is only available in the context of a DoFn marked as RequiresWindow.");
-    }
-    return Iterables.getOnlyElement(windows());
-  }
-
-  @Override
-  public void output(O output) {
-    context.outputWindowedValue(output, windowedValue.getTimestamp(), windowedValue.getWindows());
-  }
-
-  @Override
-  public void outputWithTimestamp(O output, Instant timestamp) {
-    checkTimestamp(timestamp);
-    context.outputWindowedValue(output, timestamp, windowedValue.getWindows());
-  }
-
-  void outputWindowedValue(
-      O output,
-      Instant timestamp,
-      Collection<? extends BoundedWindow> windows) {
-    context.outputWindowedValue(output, timestamp, windows);
-  }
-
-  @Override
-  public <T> void sideOutput(TupleTag<T> tag, T output) {
-    context.sideOutputWindowedValue(tag,
-                                    output,
-                                    windowedValue.getTimestamp(),
-                                    windowedValue.getWindows());
-  }
-
-  @Override
-  public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
-    checkTimestamp(timestamp);
-    context.sideOutputWindowedValue(tag, output, timestamp, windowedValue.getWindows());
-  }
-
-  @Override
-  public <AI, AA, AO> Aggregator<AI> createAggregator(
-      String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
-    return context.createAggregator(name, combiner);
-  }
-
-  @Override
-  public <AI, AO> Aggregator<AI> createAggregator(
-      String name, SerializableFunction<Iterable<AI>, AO> combiner) {
-    return context.createAggregator(name, combiner);
-  }
-
-  @Override
-  public Instant timestamp() {
-    return windowedValue.getTimestamp();
-  }
-
-  public Collection<? extends BoundedWindow> windows() {
-    return windowedValue.getWindows();
-  }
-
-  private void checkTimestamp(Instant timestamp) {
-    Preconditions.checkArgument(
-        !timestamp.isBefore(windowedValue.getTimestamp().minus(fn.getAllowedTimestampSkew())));
-  }
-
-  private boolean equivalentToKV(I input) {
-    if (input == null) {
-      return true;
-    } else if (input instanceof KV) {
-      return true;
-    } else if (input instanceof TimerOrElement) {
-      return ((TimerOrElement) input).isTimer()
-          || ((TimerOrElement) input).element() instanceof KV;
-    }
-    return false;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index bd6e95502975f..85e60adfbceb6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -16,18 +16,39 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresKeyedState;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.WindowingInternals;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Predicate;
 import com.google.common.base.Throwables;
+import com.google.common.collect.Iterables;
 
+import org.joda.time.Instant;
+
+import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 
 /**
  * Runs a DoFn by constructing the appropriate contexts and passing them in.
@@ -169,4 +190,421 @@ public void finishBundle() {
   public R getReceiver(TupleTag<?> tag) {
     return context.getReceiver(tag);
   }
+
+  /**
+   * A concrete implementation of {@link DoFn<I, O>.Context} used for running
+   * a {@link DoFn}.
+   *
+   * @param <I> the type of the DoFn's (main) input elements
+   * @param <O> the type of the DoFn's (main) output elements
+   * @param <R> the type of object which receives outputs
+   */
+  private static class DoFnContext<I, O, R> extends DoFn<I, O>.Context {
+    private static final int MAX_SIDE_OUTPUTS = 1000;
+
+    final PipelineOptions options;
+    final DoFn<I, O> fn;
+    final PTuple sideInputs;
+    final Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
+    final OutputManager<R> outputManager;
+    final Map<TupleTag<?>, R> outputMap;
+    final TupleTag<O> mainOutputTag;
+    final StepContext stepContext;
+    final CounterSet.AddCounterMutator addCounterMutator;
+    final WindowFn windowFn;
+
+    public DoFnContext(PipelineOptions options,
+                       DoFn<I, O> fn,
+                       PTuple sideInputs,
+                       OutputManager<R> outputManager,
+                       TupleTag<O> mainOutputTag,
+                       List<TupleTag<?>> sideOutputTags,
+                       StepContext stepContext,
+                       CounterSet.AddCounterMutator addCounterMutator,
+                       WindowFn windowFn) {
+      fn.super();
+      this.options = options;
+      this.fn = fn;
+      this.sideInputs = sideInputs;
+      this.sideInputCache = new HashMap<>();
+      this.outputManager = outputManager;
+      this.mainOutputTag = mainOutputTag;
+      this.outputMap = new HashMap<>();
+      outputMap.put(mainOutputTag, outputManager.initialize(mainOutputTag));
+      for (TupleTag<?> sideOutputTag : sideOutputTags) {
+        outputMap.put(sideOutputTag, outputManager.initialize(sideOutputTag));
+      }
+      this.stepContext = stepContext;
+      this.addCounterMutator = addCounterMutator;
+      this.windowFn = windowFn;
+    }
+
+    public R getReceiver(TupleTag<?> tag) {
+      R receiver = outputMap.get(tag);
+      if (receiver == null) {
+        throw new IllegalArgumentException(
+            "calling getReceiver() with unknown tag " + tag);
+      }
+      return receiver;
+    }
+
+    //////////////////////////////////////////////////////////////////////////////
+
+    @Override
+    public PipelineOptions getPipelineOptions() {
+      return options;
+    }
+
+    @SuppressWarnings("unchecked")
+    <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
+      TupleTag<?> tag = view.getTagInternal();
+      Map<BoundedWindow, Object> tagCache = sideInputCache.get(tag);
+      if (tagCache == null) {
+        if (!sideInputs.has(tag)) {
+          throw new IllegalArgumentException(
+              "calling sideInput() with unknown view; did you forget to pass the view in "
+              + "ParDo.withSideInputs()?");
+        }
+        tagCache = new HashMap<>();
+        sideInputCache.put(tag, tagCache);
+      }
+
+      final BoundedWindow sideInputWindow =
+          view.getWindowFnInternal().getSideInputWindow(mainInputWindow);
+
+      T result = (T) tagCache.get(sideInputWindow);
+
+      // TODO: Consider partial prefetching like in CoGBK to reduce iteration cost.
+      if (result == null) {
+        if (windowFn instanceof GlobalWindows) {
+          result = view.fromIterableInternal((Iterable<WindowedValue<?>>) sideInputs.get(tag));
+        } else {
+          result = view.fromIterableInternal(Iterables.filter(
+              (Iterable<WindowedValue<?>>) sideInputs.get(tag),
+              new Predicate<WindowedValue<?>>() {
+                @Override
+                public boolean apply(WindowedValue<?> element) {
+                  return element.getWindows().contains(sideInputWindow);
+                }
+              }));
+        }
+        tagCache.put(sideInputWindow, result);
+      }
+
+      return result;
+    }
+
+    <T> WindowedValue<T> makeWindowedValue(
+        T output, Instant timestamp, Collection<? extends BoundedWindow> windows) {
+      final Instant inputTimestamp = timestamp;
+
+      if (timestamp == null) {
+        timestamp = BoundedWindow.TIMESTAMP_MIN_VALUE;
+      }
+
+      if (windows == null) {
+        try {
+          windows = windowFn.assignWindows(windowFn.new AssignContext() {
+              @Override
+              public Object element() {
+                throw new UnsupportedOperationException(
+                    "WindowFn attemped to access input element when none was available");
+              }
+
+              @Override
+              public Instant timestamp() {
+                if (inputTimestamp == null) {
+                  throw new UnsupportedOperationException(
+                      "WindowFn attemped to access input timestamp when none was available");
+                }
+                return inputTimestamp;
+              }
+
+              @Override
+              public Collection<? extends BoundedWindow> windows() {
+                throw new UnsupportedOperationException(
+                    "WindowFn attemped to access input windows when none were available");
+              }
+            });
+        } catch (Exception e) {
+          throw new RuntimeException(e);
+        }
+      }
+
+      return WindowedValue.of(output, timestamp, windows);
+    }
+
+    void outputWindowedValue(
+        O output,
+        Instant timestamp,
+        Collection<? extends BoundedWindow> windows) {
+      WindowedValue<O> windowedElem = makeWindowedValue(output, timestamp, windows);
+      outputManager.output(outputMap.get(mainOutputTag), windowedElem);
+      if (stepContext != null) {
+        stepContext.noteOutput(windowedElem);
+      }
+    }
+
+    protected <T> void sideOutputWindowedValue(TupleTag<T> tag,
+                                               T output,
+                                               Instant timestamp,
+                                               Collection<? extends BoundedWindow> windows) {
+      R receiver = outputMap.get(tag);
+      if (receiver == null) {
+        // This tag wasn't declared nor was it seen before during this execution.
+        // Thus, this must be a new, undeclared and unconsumed output.
+
+        // To prevent likely user errors, enforce the limit on the number of side
+        // outputs.
+        if (outputMap.size() >= MAX_SIDE_OUTPUTS) {
+          throw new IllegalArgumentException(
+              "the number of side outputs has exceeded a limit of "
+              + MAX_SIDE_OUTPUTS);
+        }
+
+        // Register the new TupleTag with outputManager and add an entry for it in
+        // the outputMap.
+        receiver = outputManager.initialize(tag);
+        outputMap.put(tag, receiver);
+      }
+
+      WindowedValue<T> windowedElem = makeWindowedValue(output, timestamp, windows);
+      outputManager.output(receiver, windowedElem);
+      if (stepContext != null) {
+        stepContext.noteSideOutput(tag, windowedElem);
+      }
+    }
+
+    // Following implementations of output, outputWithTimestamp, and sideOutput
+    // are only accessible in DoFn.startBundle and DoFn.finishBundle, and will be shadowed by
+    // ProcessContext's versions in DoFn.processElement.
+    @Override
+    public void output(O output) {
+      outputWindowedValue(output, null, null);
+    }
+
+    @Override
+    public void outputWithTimestamp(O output, Instant timestamp) {
+      outputWindowedValue(output, timestamp, null);
+    }
+
+    @Override
+    public <T> void sideOutput(TupleTag<T> tag, T output) {
+      sideOutputWindowedValue(tag, output, null, null);
+    }
+
+    @Override
+    public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
+      sideOutputWindowedValue(tag, output, timestamp, null);
+    }
+
+    private String generateInternalAggregatorName(String userName) {
+      return "user-" + stepContext.getStepName() + "-" + userName;
+    }
+
+    @Override
+    public <AI, AA, AO> Aggregator<AI> createAggregator(
+        String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
+      return new AggregatorImpl<>(
+          generateInternalAggregatorName(name), combiner, addCounterMutator);
+    }
+
+    @Override
+    public <AI, AO> Aggregator<AI> createAggregator(
+        String name, SerializableFunction<Iterable<AI>, AO> combiner) {
+      return new AggregatorImpl<AI, Iterable<AI>, AO>(
+          generateInternalAggregatorName(name), combiner, addCounterMutator);
+    }
+  }
+
+  /**
+   * A concrete implementation of {@link DoFn<I, O>.ProcessContext} used for running
+   * a {@link DoFn} over a single element.
+   *
+   * @param <I> the type of the DoFn's (main) input elements
+   * @param <O> the type of the DoFn's (main) output elements
+   */
+  private static class DoFnProcessContext<I, O> extends DoFn<I, O>.ProcessContext {
+
+
+    final DoFn<I, O> fn;
+    final DoFnContext<I, O, ?> context;
+    final WindowedValue<I> windowedValue;
+
+    public DoFnProcessContext(DoFn<I, O> fn,
+                              DoFnContext<I, O, ?> context,
+                              WindowedValue<I> windowedValue) {
+      fn.super();
+      this.fn = fn;
+      this.context = context;
+      this.windowedValue = windowedValue;
+    }
+
+    @Override
+    public PipelineOptions getPipelineOptions() {
+      return context.getPipelineOptions();
+    }
+
+    @Override
+    public I element() {
+      return windowedValue.getValue();
+    }
+
+    @Override
+    public <T> T sideInput(PCollectionView<T> view) {
+      Iterator<? extends BoundedWindow> windowIter = windows().iterator();
+      BoundedWindow window;
+      if (!windowIter.hasNext()) {
+        if (context.windowFn instanceof GlobalWindows) {
+          // TODO: Remove this once GroupByKeyOnly no longer outputs elements
+          // without windows
+          window = GlobalWindow.INSTANCE;
+        } else {
+          throw new IllegalStateException(
+              "sideInput called when main input element is not in any windows");
+        }
+      } else {
+        window = windowIter.next();
+        if (windowIter.hasNext()) {
+          throw new IllegalStateException(
+              "sideInput called when main input element is in multiple windows");
+        }
+      }
+      return context.sideInput(view, window);
+    }
+
+    @Override
+    public KeyedState keyedState() {
+      if (!(fn instanceof RequiresKeyedState)
+          || !equivalentToKV(element())) {
+        throw new UnsupportedOperationException(
+            "Keyed state is only available in the context of a keyed DoFn "
+            + "marked as requiring state");
+      }
+
+      return context.stepContext;
+    }
+
+
+    @Override
+    public BoundedWindow window() {
+      if (!(fn instanceof RequiresWindowAccess)) {
+        throw new UnsupportedOperationException(
+            "window() is only available in the context of a DoFn marked as RequiresWindow.");
+      }
+      return Iterables.getOnlyElement(windows());
+    }
+
+    @Override
+    public void output(O output) {
+      context.outputWindowedValue(output, windowedValue.getTimestamp(), windowedValue.getWindows());
+    }
+
+    @Override
+    public void outputWithTimestamp(O output, Instant timestamp) {
+      checkTimestamp(timestamp);
+      context.outputWindowedValue(output, timestamp, windowedValue.getWindows());
+    }
+
+    void outputWindowedValue(
+        O output,
+        Instant timestamp,
+        Collection<? extends BoundedWindow> windows) {
+      context.outputWindowedValue(output, timestamp, windows);
+    }
+
+    @Override
+    public <T> void sideOutput(TupleTag<T> tag, T output) {
+      context.sideOutputWindowedValue(tag,
+                                      output,
+                                      windowedValue.getTimestamp(),
+                                      windowedValue.getWindows());
+    }
+
+    @Override
+    public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
+      checkTimestamp(timestamp);
+      context.sideOutputWindowedValue(tag, output, timestamp, windowedValue.getWindows());
+    }
+
+    @Override
+    public <AI, AA, AO> Aggregator<AI> createAggregator(
+        String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
+      return context.createAggregator(name, combiner);
+    }
+
+    @Override
+    public <AI, AO> Aggregator<AI> createAggregator(
+        String name, SerializableFunction<Iterable<AI>, AO> combiner) {
+      return context.createAggregator(name, combiner);
+    }
+
+    @Override
+    public Instant timestamp() {
+      return windowedValue.getTimestamp();
+    }
+
+    public Collection<? extends BoundedWindow> windows() {
+      return windowedValue.getWindows();
+    }
+
+    private void checkTimestamp(Instant timestamp) {
+      Preconditions.checkArgument(
+          !timestamp.isBefore(windowedValue.getTimestamp().minus(fn.getAllowedTimestampSkew())));
+    }
+
+    private boolean equivalentToKV(I input) {
+      if (input == null) {
+        return true;
+      } else if (input instanceof KV) {
+        return true;
+      } else if (input instanceof TimerOrElement) {
+        return ((TimerOrElement) input).isTimer()
+            || ((TimerOrElement) input).element() instanceof KV;
+      }
+      return false;
+    }
+
+    @Override
+    public WindowingInternals<I, O> windowingInternals() {
+      return new WindowingInternals<I, O>() {
+        @Override
+        public void outputWindowedValue(O output, Instant timestamp,
+            Collection<? extends BoundedWindow> windows) {
+          context.outputWindowedValue(output, timestamp, windows);
+        }
+
+        @Override
+        public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp)
+            throws IOException {
+          context.stepContext.writeToTagList(tag, value, timestamp);
+        }
+
+        @Override
+        public <T> void deleteTagList(CodedTupleTag<T> tag) {
+          context.stepContext.deleteTagList(tag);
+        }
+
+        @Override
+        public <T> Iterable<T> readTagList(CodedTupleTag<T> tag) throws IOException {
+          return context.stepContext.readTagList(tag);
+        }
+
+        @Override
+        public void setTimer(String timer, Instant timestamp) {
+          context.stepContext.getExecutionContext().setTimer(timer, timestamp);
+        }
+
+        @Override
+        public void deleteTimer(String timer) {
+          context.stepContext.getExecutionContext().deleteTimer(timer);
+        }
+
+        @Override
+        public Collection<? extends BoundedWindow> windows() {
+          return windowedValue.getWindows();
+        }
+      };
+    }
+}
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index a7244f45d74f4..3cd54c27e7aba 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresKeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
@@ -52,7 +53,8 @@
  */
 @SuppressWarnings("serial")
 public abstract class GroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
-    extends DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> {
+    extends DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>
+    implements RequiresKeyedState {
 
   /**
    * Create a {@link GroupAlsoByWindowsDoFn} without a combine function. Depending on the
@@ -69,7 +71,7 @@ public abstract class GroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
       return new GABWViaWindowSetDoFn<K, V, Iterable<V>, W>(windowFn) {
         @Override
         AbstractWindowSet<K, V, Iterable<V>, W> createWindowSet(K key,
-            DoFnProcessContext<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>> context,
+            DoFn<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>>.ProcessContext context,
             BatchActiveWindowManager<W> activeWindowManager) throws Exception {
           return  new BufferingWindowSet<K, V, W>(key, windowFn, inputCoder,
               context, activeWindowManager);
@@ -92,7 +94,7 @@ AbstractWindowSet<K, V, Iterable<V>, W> createWindowSet(K key,
       @Override
       AbstractWindowSet<K, VI, VO, W> createWindowSet(
           K key,
-          DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context,
+          DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>.ProcessContext context,
           BatchActiveWindowManager<W> activeWindowManager) throws Exception {
         return CombiningWindowSet.create(
             key, windowFn, combineFn, keyCoder, inputCoder,
@@ -106,9 +108,6 @@ private static class GABWViaIteratorsDoFn<K, V, W extends BoundedWindow>
 
     @Override
     public void processElement(ProcessContext c) throws Exception {
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      DoFnProcessContext<?, KV<K, Iterable<V>>> internal = (DoFnProcessContext) c;
-
       K key = c.element().getKey();
       Iterable<WindowedValue<V>> value = c.element().getValue();
       PeekingReiterator<WindowedValue<V>> iterator;
@@ -136,7 +135,7 @@ public void processElement(ProcessContext c) throws Exception {
             // Iterating through the WindowReiterable may advance iterator as an optimization
             // for as long as it detects that there are no new windows.
             windows.put(window.maxTimestamp(), window);
-            internal.outputWindowedValue(
+            c.windowingInternals().outputWindowedValue(
                 KV.of(key, (Iterable<V>) new WindowReiterable<V>(iterator, window)),
                 window.maxTimestamp(),
                 Arrays.asList(window));
@@ -173,28 +172,18 @@ public GABWViaWindowSetDoFn(WindowFn<?, W> windowFn) {
 
     abstract AbstractWindowSet<K, VI, VO, W> createWindowSet(
         K key,
-        DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context,
+        DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>.ProcessContext context,
         BatchActiveWindowManager<W> activeWindowManager)
         throws Exception;
 
     @Override
     public void processElement(
         DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>.ProcessContext c) throws Exception {
-      @SuppressWarnings("unchecked")
-      DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context =
-          (DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>) c;
-      processElementViaWindowSet(context);
-    }
-
-    public void processElementViaWindowSet(
-        DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context)
-            throws Exception {
-      K key = context.element().getKey();
+      K key = c.element().getKey();
       BatchActiveWindowManager<W> activeWindowManager = new BatchActiveWindowManager<>();
-      AbstractWindowSet<K, VI, ?, W> windowSet =
-          createWindowSet(key, context, activeWindowManager);
+      AbstractWindowSet<K, VI, ?, W> windowSet = createWindowSet(key, c, activeWindowManager);
 
-      for (WindowedValue<VI> e : context.element().getValue()) {
+      for (WindowedValue<VI> e : c.element().getValue()) {
         for (BoundedWindow window : e.getWindows()) {
           @SuppressWarnings("unchecked")
           W w = (W) window;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
index 5d55ae8f7125c..3c0e99936925c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
@@ -19,6 +19,7 @@
 import static com.google.cloud.dataflow.sdk.util.WindowUtils.bufferTag;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
@@ -40,14 +41,14 @@ class PartitionBufferingWindowSet<K, V, W extends BoundedWindow>
       K key,
       WindowFn<?, W> windowFn,
       Coder<V> inputCoder,
-      DoFnProcessContext<?, KV<K, Iterable<V>>> context,
+      DoFn<?, KV<K, Iterable<V>>>.ProcessContext context,
       ActiveWindowManager<W> activeWindowManager) {
     super(key, windowFn, inputCoder, context, activeWindowManager);
   }
 
   @Override
   public void put(W window, V value) throws Exception {
-    context.context.stepContext.writeToTagList(
+    context.windowingInternals().writeToTagList(
         bufferTag(window, windowFn.windowCoder(), inputCoder), value, context.timestamp());
     // Adds the window even if it is already present, relying on the streaming backend to
     // de-deduplicate.
@@ -56,7 +57,7 @@ public void put(W window, V value) throws Exception {
 
   @Override
   public void remove(W window) throws Exception {
-    context.context.stepContext.deleteTagList(
+    context.windowingInternals().deleteTagList(
         bufferTag(window, windowFn.windowCoder(), inputCoder));
     activeWindowManager.removeWindow(window);
   }
@@ -79,7 +80,7 @@ public boolean contains(W window) {
   @Override
   protected Iterable<V> finalValue(W window) throws Exception {
     CodedTupleTag<V> tag = bufferTag(window, windowFn.windowCoder(), inputCoder);
-    Iterable<V> result = context.context.stepContext.readTagList(tag);
+    Iterable<V> result = context.windowingInternals().readTagList(tag);
     if (result == null) {
       throw new IllegalStateException("finalValue called for non-existent window");
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
index c8c66680146e6..bc1e5f6419885 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
@@ -33,14 +33,11 @@ public class ReifyTimestampAndWindowsDoFn<K, V>
   @Override
   public void processElement(ProcessContext c)
       throws Exception {
-    @SuppressWarnings("unchecked")
-    DoFnProcessContext<KV<K, V>, KV<K, WindowedValue<V>>> context =
-        (DoFnProcessContext<KV<K, V>, KV<K, WindowedValue<V>>>) c;
     KV<K, V> kv = c.element();
     K key = kv.getKey();
     V value = kv.getValue();
     c.output(KV.of(
         key,
-        WindowedValue.of(value, c.timestamp(), context.windows())));
+        WindowedValue.of(value, c.timestamp(), c.windowingInternals().windows())));
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index bb2e28b876ec8..4f05e4c8fd892 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -49,7 +49,7 @@ StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W> create(
     return new StreamingGABWViaWindowSetDoFn<K, VI, VO, W>(windowFn) {
       @Override
       AbstractWindowSet<K, VI, VO, W> createWindowSet(K key,
-          DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>> context,
+          DoFn<TimerOrElement<KV<K, VI>>, KV<K, VO>>.ProcessContext context,
           StreamingActiveWindowManager<W> activeWindowManager)
           throws Exception {
         return CombiningWindowSet.create(
@@ -64,7 +64,7 @@ AbstractWindowSet<K, VI, VO, W> createWindowSet(K key,
     return new StreamingGABWViaWindowSetDoFn<K, VI, Iterable<VI>, W>(windowFn) {
       @Override
       AbstractWindowSet<K, VI, Iterable<VI>, W> createWindowSet(K key,
-          DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, Iterable<VI>>> context,
+          DoFn<TimerOrElement<KV<K, VI>>, KV<K, Iterable<VI>>>.ProcessContext context,
           StreamingActiveWindowManager<W> activeWindowManager)
           throws Exception {
         if (windowFn instanceof PartitioningWindowFn) {
@@ -90,25 +90,21 @@ public StreamingGABWViaWindowSetDoFn(WindowFn<?, W> windowFn) {
 
     abstract AbstractWindowSet<K, VI, VO, W> createWindowSet(
         K key,
-        DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>> context,
+        DoFn<TimerOrElement<KV<K, VI>>, KV<K, VO>>.ProcessContext context,
         StreamingActiveWindowManager<W> activeWindowManager)
         throws Exception;
 
     @Override
     public void processElement(ProcessContext context) throws Exception {
-      @SuppressWarnings("unchecked")
-      DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>> doFnContext =
-          (DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>>) context;
       if (!context.element().isTimer()) {
         KV<K, VI> element = context.element().element();
         K key = element.getKey();
         VI value = element.getValue();
         AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(
-            key,
-            doFnContext,
-            new StreamingActiveWindowManager<>(windowFn, doFnContext));
+            key, context,
+            new StreamingActiveWindowManager<>(windowFn, context));
 
-        for (BoundedWindow window : doFnContext.windows()) {
+        for (BoundedWindow window : context.windowingInternals().windows()) {
           @SuppressWarnings("unchecked")
           W w = (W) window;
           windowSet.put(w, value);
@@ -119,8 +115,8 @@ public void processElement(ProcessContext context) throws Exception {
         TimerOrElement<KV<K, VI>> timer = context.element();
         AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(
             (K) timer.key(),
-            doFnContext,
-            new StreamingActiveWindowManager<>(windowFn, doFnContext));
+            context,
+            new StreamingActiveWindowManager<>(windowFn, context));
 
         // Attempt to merge windows before emitting; that may remove the current window under
         // consideration.
@@ -141,18 +137,18 @@ public void processElement(ProcessContext context) throws Exception {
   private static class StreamingActiveWindowManager<W extends BoundedWindow>
       implements AbstractWindowSet.ActiveWindowManager<W> {
     WindowFn<?, W> windowFn;
-    DoFnProcessContext<?, ?> context;
+    DoFn<?, ?>.ProcessContext context;
 
     StreamingActiveWindowManager(
         WindowFn<?, W> windowFn,
-        DoFnProcessContext<?, ?> context) {
+        DoFn<?, ?>.ProcessContext context) {
       this.windowFn = windowFn;
       this.context = context;
     }
 
     @Override
     public void addWindow(W window) throws IOException {
-      context.context.stepContext.getExecutionContext().setTimer(
+      context.windowingInternals().setTimer(
           WindowUtils.windowToString(window, windowFn.windowCoder()), window.maxTimestamp());
     }
 
@@ -163,7 +159,7 @@ public void removeWindow(W window) throws IOException {
         // And, timers are automatically deleted once they are fired.
         return;
       }
-      context.context.stepContext.getExecutionContext().deleteTimer(
+      context.windowingInternals().deleteTimer(
           WindowUtils.windowToString(window, windowFn.windowCoder()));
     }
   }

From c633a33dca5106dbb89364adfc0f97fe5c5a1a3c Mon Sep 17 00:00:00 2001
From: ckuhn <ckuhn@google.com>
Date: Wed, 25 Mar 2015 16:14:31 -0700
Subject: [PATCH 0312/1541] Rollback of the commit that introduces an interface
 for the DoFns that implement windowing to use.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89548569
---
 .../cloud/dataflow/sdk/transforms/DoFn.java   |  53 ---
 .../dataflow/sdk/transforms/RateLimiting.java |   5 -
 .../dataflow/sdk/util/AbstractWindowSet.java  |   7 +-
 .../dataflow/sdk/util/AssignWindowsDoFn.java  |  11 +-
 .../dataflow/sdk/util/BufferingWindowSet.java |  15 +-
 .../dataflow/sdk/util/CombiningWindowSet.java |   5 +-
 .../cloud/dataflow/sdk/util/DoFnContext.java  | 267 +++++++++++
 .../dataflow/sdk/util/DoFnProcessContext.java | 186 ++++++++
 .../cloud/dataflow/sdk/util/DoFnRunner.java   | 438 ------------------
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  31 +-
 .../sdk/util/PartitionBufferingWindowSet.java |   9 +-
 .../util/ReifyTimestampAndWindowsDoFn.java    |   5 +-
 .../util/StreamingGroupAlsoByWindowsDoFn.java |  28 +-
 13 files changed, 516 insertions(+), 544 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index e1e385102bae7..140e0913dfc2f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -30,7 +30,6 @@
 
 import java.io.IOException;
 import java.io.Serializable;
-import java.util.Collection;
 import java.util.List;
 
 /**
@@ -251,11 +250,6 @@ public abstract class ProcessContext extends Context {
      * not implement {@link RequiresWindowAccess}.
      */
     public abstract BoundedWindow window();
-
-    /**
-     * Returns the process context to use for implementing windowing.
-     */
-    public abstract WindowingInternals<I, O> windowingInternals();
   }
 
   /**
@@ -333,53 +327,6 @@ public interface KeyedState {
     public CodedTupleTagMap lookup(List<? extends CodedTupleTag<?>> tags) throws IOException;
   }
 
-  /**
-   * Interface that may be required by some (internal) {@code DoFn}s to implement windowing.
-   * @param <I> input type
-   * @param <O> output type
-   */
-  public interface WindowingInternals<I, O> {
-    void outputWindowedValue(O output, Instant timestamp,
-        Collection<? extends BoundedWindow> windows);
-
-    /**
-     * Writes the provided value to the list of values in stored state corresponding to the
-     * provided tag.
-     *
-     * @throws IOException if encoding the given value fails
-     */
-    <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp) throws IOException;
-
-    /**
-     * Deletes the list corresponding to the given tag.
-     */
-    <T> void deleteTagList(CodedTupleTag<T> tag);
-
-    /**
-     * Reads the elements of the list in stored state corresponding to the provided tag.
-     *
-     * @throws IOException if decoding any of the requested values fails
-     */
-    <T> Iterable<T> readTagList(CodedTupleTag<T> tag) throws IOException;
-
-    /**
-     * Writes out a timer to be fired when the watermark reaches the given
-     * timestamp.  Timers are identified by their name, and can be moved
-     * by calling {@code setTimer} again, or deleted with {@link #deleteTimer}.
-     */
-    void setTimer(String timer, Instant timestamp);
-
-    /**
-     * Deletes the given timer.
-     */
-    void deleteTimer(String timer);
-
-    /**
-     * Access the windows the element is being processed in without "exploding" it.
-     */
-    Collection<? extends BoundedWindow> windows();
-  }
-
   /////////////////////////////////////////////////////////////////////////////
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
index 075d486f5a28b..f3f8fc2a01061 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
@@ -324,11 +324,6 @@ public Instant timestamp() {
       public BoundedWindow window() {
         return context.window();
       }
-
-      @Override
-      public WindowingInternals<I, O> windowingInternals() {
-        return context.windowingInternals();
-      }
     }
 
     private final DoFn<I, O> doFn;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
index 2ffc6c8f434cf..b94b11211a4ab 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
@@ -17,7 +17,6 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -80,14 +79,14 @@ public WindowMergeContext(
   protected final K key;
   protected final WindowFn<?, W> windowFn;
   protected final Coder<VI> inputCoder;
-  protected final DoFn<?, KV<K, VO>>.ProcessContext context;
+  protected final DoFnProcessContext<?, KV<K, VO>> context;
   protected final ActiveWindowManager<W> activeWindowManager;
 
   protected AbstractWindowSet(
       K key,
       WindowFn<?, W> windowFn,
       Coder<VI> inputCoder,
-      DoFn<?, KV<K, VO>>.ProcessContext context,
+      DoFnProcessContext<?, KV<K, VO>> context,
       ActiveWindowManager<W> activeWindowManager) {
     this.key = key;
     this.windowFn = windowFn;
@@ -159,7 +158,7 @@ protected AbstractWindowSet(
   public void markCompleted(W window) throws Exception {
     VO value = finalValue(window);
     remove(window);
-    context.windowingInternals().outputWindowedValue(
+    context.outputWindowedValue(
         KV.of(key, value),
         window.maxTimestamp(),
         Arrays.asList(window));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
index c109af0161157..2ea1da6bacc02 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
@@ -40,26 +40,27 @@ public AssignWindowsDoFn(WindowFn<? super T, W> fn) {
 
   @Override
   @SuppressWarnings("unchecked")
-  public void processElement(final ProcessContext c) throws Exception {
+  public void processElement(ProcessContext c) throws Exception {
+    final DoFnProcessContext<T, T> context = (DoFnProcessContext<T, T>) c;
     Collection<W> windows =
         ((WindowFn<T, W>) fn).assignWindows(
             ((WindowFn<T, W>) fn).new AssignContext() {
                 @Override
                 public T element() {
-                  return c.element();
+                  return context.element();
                 }
 
                 @Override
                 public Instant timestamp() {
-                  return c.timestamp();
+                  return context.timestamp();
                 }
 
                 @Override
                 public Collection<? extends BoundedWindow> windows() {
-                  return c.windowingInternals().windows();
+                  return context.windows();
                 }
               });
 
-    c.windowingInternals().outputWindowedValue(c.element(), c.timestamp(), windows);
+    context.outputWindowedValue(context.element(), context.timestamp(), windows);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
index 3b3ea8427430c..314920fadc79b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
@@ -21,7 +21,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.MapCoder;
 import com.google.cloud.dataflow.sdk.coders.SetCoder;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
@@ -73,12 +72,12 @@ protected BufferingWindowSet(
       K key,
       WindowFn<?, W> windowFn,
       Coder<V> inputCoder,
-      DoFn<?, KV<K, Iterable<V>>>.ProcessContext context,
+      DoFnProcessContext<?, KV<K, Iterable<V>>> context,
       ActiveWindowManager<W> activeWindowManager) throws Exception {
     super(key, windowFn, inputCoder, context, activeWindowManager);
 
     mergeTree = emptyIfNull(
-        context.keyedState().lookup(Arrays.asList(mergeTreeTag))
+        context.context.stepContext.lookup(Arrays.asList(mergeTreeTag))
         .get(mergeTreeTag));
 
     originalMergeTree = deepCopy(mergeTree);
@@ -86,7 +85,7 @@ protected BufferingWindowSet(
 
   @Override
   public void put(W window, V value) throws Exception {
-    context.windowingInternals().writeToTagList(
+    context.context.stepContext.writeToTagList(
         bufferTag(window, windowFn.windowCoder(), inputCoder),
         value,
         context.timestamp());
@@ -100,10 +99,10 @@ public void put(W window, V value) throws Exception {
   public void remove(W window) throws Exception {
     Set<W> subWindows = mergeTree.get(window);
     for (W w : subWindows) {
-      context.windowingInternals().deleteTagList(
+      context.context.stepContext.deleteTagList(
           bufferTag(w, windowFn.windowCoder(), inputCoder));
     }
-    context.windowingInternals().deleteTagList(
+    context.context.stepContext.deleteTagList(
         bufferTag(window, windowFn.windowCoder(), inputCoder));
     mergeTree.remove(window);
     activeWindowManager.removeWindow(window);
@@ -160,7 +159,7 @@ protected Iterable<V> finalValue(W window) throws Exception {
     }
 
     for (W curWindow : curWindows) {
-      Iterable<V> items = context.windowingInternals().readTagList(bufferTag(
+      Iterable<V> items = context.context.stepContext.readTagList(bufferTag(
           curWindow, windowFn.windowCoder(), inputCoder));
       for (V item : items) {
         toEmit.add(item);
@@ -173,7 +172,7 @@ protected Iterable<V> finalValue(W window) throws Exception {
   @Override
   public void flush() throws Exception {
     if (!mergeTree.equals(originalMergeTree)) {
-      context.keyedState().store(mergeTreeTag, mergeTree);
+      context.context.stepContext.store(mergeTreeTag, mergeTree);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
index a2a96d3d5dc5e..d726551f80f16 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
@@ -23,7 +23,6 @@
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
@@ -63,7 +62,7 @@ protected CombiningWindowSet(
       KeyedCombineFn<K, VI, VA, VO> combineFn,
       Coder<K> keyCoder,
       Coder<VI> inputValueCoder,
-      DoFn<?, KV<K, VO>>.ProcessContext context,
+      DoFnProcessContext<?, KV<K, VO>> context,
       ActiveWindowManager<W> activeWindowManager) throws Exception {
     super(key, windowFn, inputValueCoder, context, activeWindowManager);
     this.combineFn = combineFn;
@@ -84,7 +83,7 @@ protected CombiningWindowSet(
       KeyedCombineFn<K, VI, VA, VO> combineFn,
       Coder<K> keyCoder,
       Coder<VI> inputValueCoder,
-      DoFn<?, KV<K, VO>>.ProcessContext context,
+      DoFnProcessContext<?, KV<K, VO>> context,
       ActiveWindowManager<W> activeWindowManager) throws Exception {
     return new CombiningWindowSet<K, VI, VA, VO, W>(
         key, windowFn, combineFn, keyCoder, inputValueCoder, context, activeWindowManager);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
new file mode 100644
index 0000000000000..9a3729e743d9c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
@@ -0,0 +1,267 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn.AssignContext;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner.OutputManager;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Predicate;
+import com.google.common.collect.Iterables;
+
+import org.joda.time.Instant;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * A concrete implementation of {@link DoFn<I, O>.Context} used for running
+ * a {@link DoFn}.
+ *
+ * @param <I> the type of the DoFn's (main) input elements
+ * @param <O> the type of the DoFn's (main) output elements
+ * @param <R> the type of object which receives outputs
+ */
+class DoFnContext<I, O, R> extends DoFn<I, O>.Context {
+  private static final int MAX_SIDE_OUTPUTS = 1000;
+
+  final PipelineOptions options;
+  final DoFn<I, O> fn;
+  final PTuple sideInputs;
+  final Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
+  final OutputManager<R> outputManager;
+  final Map<TupleTag<?>, R> outputMap;
+  final TupleTag<O> mainOutputTag;
+  final StepContext stepContext;
+  final CounterSet.AddCounterMutator addCounterMutator;
+  final WindowFn windowFn;
+
+  public DoFnContext(PipelineOptions options,
+                     DoFn<I, O> fn,
+                     PTuple sideInputs,
+                     OutputManager<R> outputManager,
+                     TupleTag<O> mainOutputTag,
+                     List<TupleTag<?>> sideOutputTags,
+                     StepContext stepContext,
+                     CounterSet.AddCounterMutator addCounterMutator,
+                     WindowFn windowFn) {
+    fn.super();
+    this.options = options;
+    this.fn = fn;
+    this.sideInputs = sideInputs;
+    this.sideInputCache = new HashMap<>();
+    this.outputManager = outputManager;
+    this.mainOutputTag = mainOutputTag;
+    this.outputMap = new HashMap<>();
+    outputMap.put(mainOutputTag, outputManager.initialize(mainOutputTag));
+    for (TupleTag<?> sideOutputTag : sideOutputTags) {
+      outputMap.put(sideOutputTag, outputManager.initialize(sideOutputTag));
+    }
+    this.stepContext = stepContext;
+    this.addCounterMutator = addCounterMutator;
+    this.windowFn = windowFn;
+  }
+
+  public R getReceiver(TupleTag<?> tag) {
+    R receiver = outputMap.get(tag);
+    if (receiver == null) {
+      throw new IllegalArgumentException(
+          "calling getReceiver() with unknown tag " + tag);
+    }
+    return receiver;
+  }
+
+  //////////////////////////////////////////////////////////////////////////////
+
+  @Override
+  public PipelineOptions getPipelineOptions() {
+    return options;
+  }
+
+  @SuppressWarnings("unchecked")
+  <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
+    TupleTag<?> tag = view.getTagInternal();
+    Map<BoundedWindow, Object> tagCache = sideInputCache.get(tag);
+    if (tagCache == null) {
+      if (!sideInputs.has(tag)) {
+        throw new IllegalArgumentException(
+            "calling sideInput() with unknown view; " +
+            "did you forget to pass the view in " +
+            "ParDo.withSideInputs()?");
+      }
+      tagCache = new HashMap<>();
+      sideInputCache.put(tag, tagCache);
+    }
+
+    final BoundedWindow sideInputWindow =
+        view.getWindowFnInternal().getSideInputWindow(mainInputWindow);
+
+    T result = (T) tagCache.get(sideInputWindow);
+
+    // TODO: Consider partial prefetching like in CoGBK to reduce iteration cost.
+    if (result == null) {
+      if (windowFn instanceof GlobalWindows) {
+        result = view.fromIterableInternal((Iterable<WindowedValue<?>>) sideInputs.get(tag));
+      } else {
+        result = view.fromIterableInternal(Iterables.filter(
+            (Iterable<WindowedValue<?>>) sideInputs.get(tag),
+            new Predicate<WindowedValue<?>>() {
+              @Override
+              public boolean apply(WindowedValue<?> element) {
+                return element.getWindows().contains(sideInputWindow);
+              }
+            }));
+      }
+      tagCache.put(sideInputWindow, result);
+    }
+
+    return result;
+  }
+
+  <T> WindowedValue<T> makeWindowedValue(
+      T output, Instant timestamp, Collection<? extends BoundedWindow> windows) {
+    final Instant inputTimestamp = timestamp;
+
+    if (timestamp == null) {
+      timestamp = BoundedWindow.TIMESTAMP_MIN_VALUE;
+    }
+
+    if (windows == null) {
+      try {
+        windows = windowFn.assignWindows(windowFn.new AssignContext() {
+            @Override
+            public Object element() {
+              throw new UnsupportedOperationException(
+                  "WindowFn attemped to access input element when none was available");
+            }
+
+            @Override
+            public Instant timestamp() {
+              if (inputTimestamp == null) {
+                throw new UnsupportedOperationException(
+                    "WindowFn attemped to access input timestamp when none was available");
+              }
+              return inputTimestamp;
+            }
+
+            @Override
+            public Collection<? extends BoundedWindow> windows() {
+              throw new UnsupportedOperationException(
+                  "WindowFn attemped to access input windows when none were available");
+            }
+          });
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    return WindowedValue.of(output, timestamp, windows);
+  }
+
+  void outputWindowedValue(
+      O output,
+      Instant timestamp,
+      Collection<? extends BoundedWindow> windows) {
+    WindowedValue<O> windowedElem = makeWindowedValue(output, timestamp, windows);
+    outputManager.output(outputMap.get(mainOutputTag), windowedElem);
+    if (stepContext != null) {
+      stepContext.noteOutput(windowedElem);
+    }
+  }
+
+  protected <T> void sideOutputWindowedValue(TupleTag<T> tag,
+                                             T output,
+                                             Instant timestamp,
+                                             Collection<? extends BoundedWindow> windows) {
+    R receiver = outputMap.get(tag);
+    if (receiver == null) {
+      // This tag wasn't declared nor was it seen before during this execution.
+      // Thus, this must be a new, undeclared and unconsumed output.
+
+      // To prevent likely user errors, enforce the limit on the number of side
+      // outputs.
+      if (outputMap.size() >= MAX_SIDE_OUTPUTS) {
+        throw new IllegalArgumentException(
+            "the number of side outputs has exceeded a limit of "
+            + MAX_SIDE_OUTPUTS);
+      }
+
+      // Register the new TupleTag with outputManager and add an entry for it in
+      // the outputMap.
+      receiver = outputManager.initialize(tag);
+      outputMap.put(tag, receiver);
+    }
+
+    WindowedValue<T> windowedElem = makeWindowedValue(output, timestamp, windows);
+    outputManager.output(receiver, windowedElem);
+    if (stepContext != null) {
+      stepContext.noteSideOutput(tag, windowedElem);
+    }
+  }
+
+  // Following implementations of output, outputWithTimestamp, and sideOutput
+  // are only accessible in DoFn.startBundle and DoFn.finishBundle, and will be shadowed by
+  // ProcessContext's versions in DoFn.processElement.
+  @Override
+  public void output(O output) {
+    outputWindowedValue(output, null, null);
+  }
+
+  @Override
+  public void outputWithTimestamp(O output, Instant timestamp) {
+    outputWindowedValue(output, timestamp, null);
+  }
+
+  @Override
+  public <T> void sideOutput(TupleTag<T> tag, T output) {
+    sideOutputWindowedValue(tag, output, null, null);
+  }
+
+  @Override
+  public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
+    sideOutputWindowedValue(tag, output, timestamp, null);
+  }
+
+  private String generateInternalAggregatorName(String userName) {
+    return "user-" + stepContext.getStepName() + "-" + userName;
+  }
+
+  @Override
+  public <AI, AA, AO> Aggregator<AI> createAggregator(
+      String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
+    return new AggregatorImpl<>(generateInternalAggregatorName(name), combiner, addCounterMutator);
+  }
+
+  @Override
+  public <AI, AO> Aggregator<AI> createAggregator(
+      String name, SerializableFunction<Iterable<AI>, AO> combiner) {
+    return new AggregatorImpl<AI, Iterable<AI>, AO>(
+        generateInternalAggregatorName(name), combiner, addCounterMutator);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
new file mode 100644
index 0000000000000..4b111d2dadfce
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
@@ -0,0 +1,186 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresKeyedState;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.collect.Iterables;
+
+import org.joda.time.Instant;
+
+import java.util.Collection;
+import java.util.Iterator;
+
+/**
+ * A concrete implementation of {@link DoFn<I, O>.ProcessContext} used for running
+ * a {@link DoFn} over a single element.
+ *
+ * @param <I> the type of the DoFn's (main) input elements
+ * @param <O> the type of the DoFn's (main) output elements
+ */
+class DoFnProcessContext<I, O> extends DoFn<I, O>.ProcessContext {
+
+  final DoFn<I, O> fn;
+  final DoFnContext<I, O, ?> context;
+  final WindowedValue<I> windowedValue;
+
+  public DoFnProcessContext(DoFn<I, O> fn,
+                            DoFnContext<I, O, ?> context,
+                            WindowedValue<I> windowedValue) {
+    fn.super();
+    this.fn = fn;
+    this.context = context;
+    this.windowedValue = windowedValue;
+  }
+
+  @Override
+  public PipelineOptions getPipelineOptions() {
+    return context.getPipelineOptions();
+  }
+
+  @Override
+  public I element() {
+    return windowedValue.getValue();
+  }
+
+  @Override
+  public <T> T sideInput(PCollectionView<T> view) {
+    Iterator<? extends BoundedWindow> windowIter = windows().iterator();
+    BoundedWindow window;
+    if (!windowIter.hasNext()) {
+      if (context.windowFn instanceof GlobalWindows) {
+        // TODO: Remove this once GroupByKeyOnly no longer outputs elements
+        // without windows
+        window = GlobalWindow.INSTANCE;
+      } else {
+        throw new IllegalStateException(
+            "sideInput called when main input element is not in any windows");
+      }
+    } else {
+      window = windowIter.next();
+      if (windowIter.hasNext()) {
+        throw new IllegalStateException(
+            "sideInput called when main input element is in multiple windows");
+      }
+    }
+    return context.sideInput(view, window);
+  }
+
+  @Override
+  public KeyedState keyedState() {
+    if (!(fn instanceof RequiresKeyedState)
+        || !equivalentToKV(element())) {
+      throw new UnsupportedOperationException(
+          "Keyed state is only available in the context of a keyed DoFn marked as requiring state");
+    }
+
+    return context.stepContext;
+  }
+
+
+  @Override
+  public BoundedWindow window() {
+    if (!(fn instanceof RequiresWindowAccess)) {
+      throw new UnsupportedOperationException(
+          "window() is only available in the context of a DoFn marked as RequiresWindow.");
+    }
+    return Iterables.getOnlyElement(windows());
+  }
+
+  @Override
+  public void output(O output) {
+    context.outputWindowedValue(output, windowedValue.getTimestamp(), windowedValue.getWindows());
+  }
+
+  @Override
+  public void outputWithTimestamp(O output, Instant timestamp) {
+    checkTimestamp(timestamp);
+    context.outputWindowedValue(output, timestamp, windowedValue.getWindows());
+  }
+
+  void outputWindowedValue(
+      O output,
+      Instant timestamp,
+      Collection<? extends BoundedWindow> windows) {
+    context.outputWindowedValue(output, timestamp, windows);
+  }
+
+  @Override
+  public <T> void sideOutput(TupleTag<T> tag, T output) {
+    context.sideOutputWindowedValue(tag,
+                                    output,
+                                    windowedValue.getTimestamp(),
+                                    windowedValue.getWindows());
+  }
+
+  @Override
+  public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
+    checkTimestamp(timestamp);
+    context.sideOutputWindowedValue(tag, output, timestamp, windowedValue.getWindows());
+  }
+
+  @Override
+  public <AI, AA, AO> Aggregator<AI> createAggregator(
+      String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
+    return context.createAggregator(name, combiner);
+  }
+
+  @Override
+  public <AI, AO> Aggregator<AI> createAggregator(
+      String name, SerializableFunction<Iterable<AI>, AO> combiner) {
+    return context.createAggregator(name, combiner);
+  }
+
+  @Override
+  public Instant timestamp() {
+    return windowedValue.getTimestamp();
+  }
+
+  public Collection<? extends BoundedWindow> windows() {
+    return windowedValue.getWindows();
+  }
+
+  private void checkTimestamp(Instant timestamp) {
+    Preconditions.checkArgument(
+        !timestamp.isBefore(windowedValue.getTimestamp().minus(fn.getAllowedTimestampSkew())));
+  }
+
+  private boolean equivalentToKV(I input) {
+    if (input == null) {
+      return true;
+    } else if (input instanceof KV) {
+      return true;
+    } else if (input instanceof TimerOrElement) {
+      return ((TimerOrElement) input).isTimer()
+          || ((TimerOrElement) input).element() instanceof KV;
+    }
+    return false;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 85e60adfbceb6..bd6e95502975f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -16,39 +16,18 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresKeyedState;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.WindowingInternals;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.base.Predicate;
 import com.google.common.base.Throwables;
-import com.google.common.collect.Iterables;
 
-import org.joda.time.Instant;
-
-import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Iterator;
 import java.util.List;
-import java.util.Map;
 
 /**
  * Runs a DoFn by constructing the appropriate contexts and passing them in.
@@ -190,421 +169,4 @@ public void finishBundle() {
   public R getReceiver(TupleTag<?> tag) {
     return context.getReceiver(tag);
   }
-
-  /**
-   * A concrete implementation of {@link DoFn<I, O>.Context} used for running
-   * a {@link DoFn}.
-   *
-   * @param <I> the type of the DoFn's (main) input elements
-   * @param <O> the type of the DoFn's (main) output elements
-   * @param <R> the type of object which receives outputs
-   */
-  private static class DoFnContext<I, O, R> extends DoFn<I, O>.Context {
-    private static final int MAX_SIDE_OUTPUTS = 1000;
-
-    final PipelineOptions options;
-    final DoFn<I, O> fn;
-    final PTuple sideInputs;
-    final Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
-    final OutputManager<R> outputManager;
-    final Map<TupleTag<?>, R> outputMap;
-    final TupleTag<O> mainOutputTag;
-    final StepContext stepContext;
-    final CounterSet.AddCounterMutator addCounterMutator;
-    final WindowFn windowFn;
-
-    public DoFnContext(PipelineOptions options,
-                       DoFn<I, O> fn,
-                       PTuple sideInputs,
-                       OutputManager<R> outputManager,
-                       TupleTag<O> mainOutputTag,
-                       List<TupleTag<?>> sideOutputTags,
-                       StepContext stepContext,
-                       CounterSet.AddCounterMutator addCounterMutator,
-                       WindowFn windowFn) {
-      fn.super();
-      this.options = options;
-      this.fn = fn;
-      this.sideInputs = sideInputs;
-      this.sideInputCache = new HashMap<>();
-      this.outputManager = outputManager;
-      this.mainOutputTag = mainOutputTag;
-      this.outputMap = new HashMap<>();
-      outputMap.put(mainOutputTag, outputManager.initialize(mainOutputTag));
-      for (TupleTag<?> sideOutputTag : sideOutputTags) {
-        outputMap.put(sideOutputTag, outputManager.initialize(sideOutputTag));
-      }
-      this.stepContext = stepContext;
-      this.addCounterMutator = addCounterMutator;
-      this.windowFn = windowFn;
-    }
-
-    public R getReceiver(TupleTag<?> tag) {
-      R receiver = outputMap.get(tag);
-      if (receiver == null) {
-        throw new IllegalArgumentException(
-            "calling getReceiver() with unknown tag " + tag);
-      }
-      return receiver;
-    }
-
-    //////////////////////////////////////////////////////////////////////////////
-
-    @Override
-    public PipelineOptions getPipelineOptions() {
-      return options;
-    }
-
-    @SuppressWarnings("unchecked")
-    <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
-      TupleTag<?> tag = view.getTagInternal();
-      Map<BoundedWindow, Object> tagCache = sideInputCache.get(tag);
-      if (tagCache == null) {
-        if (!sideInputs.has(tag)) {
-          throw new IllegalArgumentException(
-              "calling sideInput() with unknown view; did you forget to pass the view in "
-              + "ParDo.withSideInputs()?");
-        }
-        tagCache = new HashMap<>();
-        sideInputCache.put(tag, tagCache);
-      }
-
-      final BoundedWindow sideInputWindow =
-          view.getWindowFnInternal().getSideInputWindow(mainInputWindow);
-
-      T result = (T) tagCache.get(sideInputWindow);
-
-      // TODO: Consider partial prefetching like in CoGBK to reduce iteration cost.
-      if (result == null) {
-        if (windowFn instanceof GlobalWindows) {
-          result = view.fromIterableInternal((Iterable<WindowedValue<?>>) sideInputs.get(tag));
-        } else {
-          result = view.fromIterableInternal(Iterables.filter(
-              (Iterable<WindowedValue<?>>) sideInputs.get(tag),
-              new Predicate<WindowedValue<?>>() {
-                @Override
-                public boolean apply(WindowedValue<?> element) {
-                  return element.getWindows().contains(sideInputWindow);
-                }
-              }));
-        }
-        tagCache.put(sideInputWindow, result);
-      }
-
-      return result;
-    }
-
-    <T> WindowedValue<T> makeWindowedValue(
-        T output, Instant timestamp, Collection<? extends BoundedWindow> windows) {
-      final Instant inputTimestamp = timestamp;
-
-      if (timestamp == null) {
-        timestamp = BoundedWindow.TIMESTAMP_MIN_VALUE;
-      }
-
-      if (windows == null) {
-        try {
-          windows = windowFn.assignWindows(windowFn.new AssignContext() {
-              @Override
-              public Object element() {
-                throw new UnsupportedOperationException(
-                    "WindowFn attemped to access input element when none was available");
-              }
-
-              @Override
-              public Instant timestamp() {
-                if (inputTimestamp == null) {
-                  throw new UnsupportedOperationException(
-                      "WindowFn attemped to access input timestamp when none was available");
-                }
-                return inputTimestamp;
-              }
-
-              @Override
-              public Collection<? extends BoundedWindow> windows() {
-                throw new UnsupportedOperationException(
-                    "WindowFn attemped to access input windows when none were available");
-              }
-            });
-        } catch (Exception e) {
-          throw new RuntimeException(e);
-        }
-      }
-
-      return WindowedValue.of(output, timestamp, windows);
-    }
-
-    void outputWindowedValue(
-        O output,
-        Instant timestamp,
-        Collection<? extends BoundedWindow> windows) {
-      WindowedValue<O> windowedElem = makeWindowedValue(output, timestamp, windows);
-      outputManager.output(outputMap.get(mainOutputTag), windowedElem);
-      if (stepContext != null) {
-        stepContext.noteOutput(windowedElem);
-      }
-    }
-
-    protected <T> void sideOutputWindowedValue(TupleTag<T> tag,
-                                               T output,
-                                               Instant timestamp,
-                                               Collection<? extends BoundedWindow> windows) {
-      R receiver = outputMap.get(tag);
-      if (receiver == null) {
-        // This tag wasn't declared nor was it seen before during this execution.
-        // Thus, this must be a new, undeclared and unconsumed output.
-
-        // To prevent likely user errors, enforce the limit on the number of side
-        // outputs.
-        if (outputMap.size() >= MAX_SIDE_OUTPUTS) {
-          throw new IllegalArgumentException(
-              "the number of side outputs has exceeded a limit of "
-              + MAX_SIDE_OUTPUTS);
-        }
-
-        // Register the new TupleTag with outputManager and add an entry for it in
-        // the outputMap.
-        receiver = outputManager.initialize(tag);
-        outputMap.put(tag, receiver);
-      }
-
-      WindowedValue<T> windowedElem = makeWindowedValue(output, timestamp, windows);
-      outputManager.output(receiver, windowedElem);
-      if (stepContext != null) {
-        stepContext.noteSideOutput(tag, windowedElem);
-      }
-    }
-
-    // Following implementations of output, outputWithTimestamp, and sideOutput
-    // are only accessible in DoFn.startBundle and DoFn.finishBundle, and will be shadowed by
-    // ProcessContext's versions in DoFn.processElement.
-    @Override
-    public void output(O output) {
-      outputWindowedValue(output, null, null);
-    }
-
-    @Override
-    public void outputWithTimestamp(O output, Instant timestamp) {
-      outputWindowedValue(output, timestamp, null);
-    }
-
-    @Override
-    public <T> void sideOutput(TupleTag<T> tag, T output) {
-      sideOutputWindowedValue(tag, output, null, null);
-    }
-
-    @Override
-    public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
-      sideOutputWindowedValue(tag, output, timestamp, null);
-    }
-
-    private String generateInternalAggregatorName(String userName) {
-      return "user-" + stepContext.getStepName() + "-" + userName;
-    }
-
-    @Override
-    public <AI, AA, AO> Aggregator<AI> createAggregator(
-        String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
-      return new AggregatorImpl<>(
-          generateInternalAggregatorName(name), combiner, addCounterMutator);
-    }
-
-    @Override
-    public <AI, AO> Aggregator<AI> createAggregator(
-        String name, SerializableFunction<Iterable<AI>, AO> combiner) {
-      return new AggregatorImpl<AI, Iterable<AI>, AO>(
-          generateInternalAggregatorName(name), combiner, addCounterMutator);
-    }
-  }
-
-  /**
-   * A concrete implementation of {@link DoFn<I, O>.ProcessContext} used for running
-   * a {@link DoFn} over a single element.
-   *
-   * @param <I> the type of the DoFn's (main) input elements
-   * @param <O> the type of the DoFn's (main) output elements
-   */
-  private static class DoFnProcessContext<I, O> extends DoFn<I, O>.ProcessContext {
-
-
-    final DoFn<I, O> fn;
-    final DoFnContext<I, O, ?> context;
-    final WindowedValue<I> windowedValue;
-
-    public DoFnProcessContext(DoFn<I, O> fn,
-                              DoFnContext<I, O, ?> context,
-                              WindowedValue<I> windowedValue) {
-      fn.super();
-      this.fn = fn;
-      this.context = context;
-      this.windowedValue = windowedValue;
-    }
-
-    @Override
-    public PipelineOptions getPipelineOptions() {
-      return context.getPipelineOptions();
-    }
-
-    @Override
-    public I element() {
-      return windowedValue.getValue();
-    }
-
-    @Override
-    public <T> T sideInput(PCollectionView<T> view) {
-      Iterator<? extends BoundedWindow> windowIter = windows().iterator();
-      BoundedWindow window;
-      if (!windowIter.hasNext()) {
-        if (context.windowFn instanceof GlobalWindows) {
-          // TODO: Remove this once GroupByKeyOnly no longer outputs elements
-          // without windows
-          window = GlobalWindow.INSTANCE;
-        } else {
-          throw new IllegalStateException(
-              "sideInput called when main input element is not in any windows");
-        }
-      } else {
-        window = windowIter.next();
-        if (windowIter.hasNext()) {
-          throw new IllegalStateException(
-              "sideInput called when main input element is in multiple windows");
-        }
-      }
-      return context.sideInput(view, window);
-    }
-
-    @Override
-    public KeyedState keyedState() {
-      if (!(fn instanceof RequiresKeyedState)
-          || !equivalentToKV(element())) {
-        throw new UnsupportedOperationException(
-            "Keyed state is only available in the context of a keyed DoFn "
-            + "marked as requiring state");
-      }
-
-      return context.stepContext;
-    }
-
-
-    @Override
-    public BoundedWindow window() {
-      if (!(fn instanceof RequiresWindowAccess)) {
-        throw new UnsupportedOperationException(
-            "window() is only available in the context of a DoFn marked as RequiresWindow.");
-      }
-      return Iterables.getOnlyElement(windows());
-    }
-
-    @Override
-    public void output(O output) {
-      context.outputWindowedValue(output, windowedValue.getTimestamp(), windowedValue.getWindows());
-    }
-
-    @Override
-    public void outputWithTimestamp(O output, Instant timestamp) {
-      checkTimestamp(timestamp);
-      context.outputWindowedValue(output, timestamp, windowedValue.getWindows());
-    }
-
-    void outputWindowedValue(
-        O output,
-        Instant timestamp,
-        Collection<? extends BoundedWindow> windows) {
-      context.outputWindowedValue(output, timestamp, windows);
-    }
-
-    @Override
-    public <T> void sideOutput(TupleTag<T> tag, T output) {
-      context.sideOutputWindowedValue(tag,
-                                      output,
-                                      windowedValue.getTimestamp(),
-                                      windowedValue.getWindows());
-    }
-
-    @Override
-    public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
-      checkTimestamp(timestamp);
-      context.sideOutputWindowedValue(tag, output, timestamp, windowedValue.getWindows());
-    }
-
-    @Override
-    public <AI, AA, AO> Aggregator<AI> createAggregator(
-        String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
-      return context.createAggregator(name, combiner);
-    }
-
-    @Override
-    public <AI, AO> Aggregator<AI> createAggregator(
-        String name, SerializableFunction<Iterable<AI>, AO> combiner) {
-      return context.createAggregator(name, combiner);
-    }
-
-    @Override
-    public Instant timestamp() {
-      return windowedValue.getTimestamp();
-    }
-
-    public Collection<? extends BoundedWindow> windows() {
-      return windowedValue.getWindows();
-    }
-
-    private void checkTimestamp(Instant timestamp) {
-      Preconditions.checkArgument(
-          !timestamp.isBefore(windowedValue.getTimestamp().minus(fn.getAllowedTimestampSkew())));
-    }
-
-    private boolean equivalentToKV(I input) {
-      if (input == null) {
-        return true;
-      } else if (input instanceof KV) {
-        return true;
-      } else if (input instanceof TimerOrElement) {
-        return ((TimerOrElement) input).isTimer()
-            || ((TimerOrElement) input).element() instanceof KV;
-      }
-      return false;
-    }
-
-    @Override
-    public WindowingInternals<I, O> windowingInternals() {
-      return new WindowingInternals<I, O>() {
-        @Override
-        public void outputWindowedValue(O output, Instant timestamp,
-            Collection<? extends BoundedWindow> windows) {
-          context.outputWindowedValue(output, timestamp, windows);
-        }
-
-        @Override
-        public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp)
-            throws IOException {
-          context.stepContext.writeToTagList(tag, value, timestamp);
-        }
-
-        @Override
-        public <T> void deleteTagList(CodedTupleTag<T> tag) {
-          context.stepContext.deleteTagList(tag);
-        }
-
-        @Override
-        public <T> Iterable<T> readTagList(CodedTupleTag<T> tag) throws IOException {
-          return context.stepContext.readTagList(tag);
-        }
-
-        @Override
-        public void setTimer(String timer, Instant timestamp) {
-          context.stepContext.getExecutionContext().setTimer(timer, timestamp);
-        }
-
-        @Override
-        public void deleteTimer(String timer) {
-          context.stepContext.getExecutionContext().deleteTimer(timer);
-        }
-
-        @Override
-        public Collection<? extends BoundedWindow> windows() {
-          return windowedValue.getWindows();
-        }
-      };
-    }
-}
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 3cd54c27e7aba..a7244f45d74f4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -19,7 +19,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresKeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
@@ -53,8 +52,7 @@
  */
 @SuppressWarnings("serial")
 public abstract class GroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
-    extends DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>
-    implements RequiresKeyedState {
+    extends DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> {
 
   /**
    * Create a {@link GroupAlsoByWindowsDoFn} without a combine function. Depending on the
@@ -71,7 +69,7 @@ public abstract class GroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
       return new GABWViaWindowSetDoFn<K, V, Iterable<V>, W>(windowFn) {
         @Override
         AbstractWindowSet<K, V, Iterable<V>, W> createWindowSet(K key,
-            DoFn<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>>.ProcessContext context,
+            DoFnProcessContext<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>> context,
             BatchActiveWindowManager<W> activeWindowManager) throws Exception {
           return  new BufferingWindowSet<K, V, W>(key, windowFn, inputCoder,
               context, activeWindowManager);
@@ -94,7 +92,7 @@ AbstractWindowSet<K, V, Iterable<V>, W> createWindowSet(K key,
       @Override
       AbstractWindowSet<K, VI, VO, W> createWindowSet(
           K key,
-          DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>.ProcessContext context,
+          DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context,
           BatchActiveWindowManager<W> activeWindowManager) throws Exception {
         return CombiningWindowSet.create(
             key, windowFn, combineFn, keyCoder, inputCoder,
@@ -108,6 +106,9 @@ private static class GABWViaIteratorsDoFn<K, V, W extends BoundedWindow>
 
     @Override
     public void processElement(ProcessContext c) throws Exception {
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      DoFnProcessContext<?, KV<K, Iterable<V>>> internal = (DoFnProcessContext) c;
+
       K key = c.element().getKey();
       Iterable<WindowedValue<V>> value = c.element().getValue();
       PeekingReiterator<WindowedValue<V>> iterator;
@@ -135,7 +136,7 @@ public void processElement(ProcessContext c) throws Exception {
             // Iterating through the WindowReiterable may advance iterator as an optimization
             // for as long as it detects that there are no new windows.
             windows.put(window.maxTimestamp(), window);
-            c.windowingInternals().outputWindowedValue(
+            internal.outputWindowedValue(
                 KV.of(key, (Iterable<V>) new WindowReiterable<V>(iterator, window)),
                 window.maxTimestamp(),
                 Arrays.asList(window));
@@ -172,18 +173,28 @@ public GABWViaWindowSetDoFn(WindowFn<?, W> windowFn) {
 
     abstract AbstractWindowSet<K, VI, VO, W> createWindowSet(
         K key,
-        DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>.ProcessContext context,
+        DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context,
         BatchActiveWindowManager<W> activeWindowManager)
         throws Exception;
 
     @Override
     public void processElement(
         DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>.ProcessContext c) throws Exception {
-      K key = c.element().getKey();
+      @SuppressWarnings("unchecked")
+      DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context =
+          (DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>) c;
+      processElementViaWindowSet(context);
+    }
+
+    public void processElementViaWindowSet(
+        DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context)
+            throws Exception {
+      K key = context.element().getKey();
       BatchActiveWindowManager<W> activeWindowManager = new BatchActiveWindowManager<>();
-      AbstractWindowSet<K, VI, ?, W> windowSet = createWindowSet(key, c, activeWindowManager);
+      AbstractWindowSet<K, VI, ?, W> windowSet =
+          createWindowSet(key, context, activeWindowManager);
 
-      for (WindowedValue<VI> e : c.element().getValue()) {
+      for (WindowedValue<VI> e : context.element().getValue()) {
         for (BoundedWindow window : e.getWindows()) {
           @SuppressWarnings("unchecked")
           W w = (W) window;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
index 3c0e99936925c..5d55ae8f7125c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
@@ -19,7 +19,6 @@
 import static com.google.cloud.dataflow.sdk.util.WindowUtils.bufferTag;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
@@ -41,14 +40,14 @@ class PartitionBufferingWindowSet<K, V, W extends BoundedWindow>
       K key,
       WindowFn<?, W> windowFn,
       Coder<V> inputCoder,
-      DoFn<?, KV<K, Iterable<V>>>.ProcessContext context,
+      DoFnProcessContext<?, KV<K, Iterable<V>>> context,
       ActiveWindowManager<W> activeWindowManager) {
     super(key, windowFn, inputCoder, context, activeWindowManager);
   }
 
   @Override
   public void put(W window, V value) throws Exception {
-    context.windowingInternals().writeToTagList(
+    context.context.stepContext.writeToTagList(
         bufferTag(window, windowFn.windowCoder(), inputCoder), value, context.timestamp());
     // Adds the window even if it is already present, relying on the streaming backend to
     // de-deduplicate.
@@ -57,7 +56,7 @@ public void put(W window, V value) throws Exception {
 
   @Override
   public void remove(W window) throws Exception {
-    context.windowingInternals().deleteTagList(
+    context.context.stepContext.deleteTagList(
         bufferTag(window, windowFn.windowCoder(), inputCoder));
     activeWindowManager.removeWindow(window);
   }
@@ -80,7 +79,7 @@ public boolean contains(W window) {
   @Override
   protected Iterable<V> finalValue(W window) throws Exception {
     CodedTupleTag<V> tag = bufferTag(window, windowFn.windowCoder(), inputCoder);
-    Iterable<V> result = context.windowingInternals().readTagList(tag);
+    Iterable<V> result = context.context.stepContext.readTagList(tag);
     if (result == null) {
       throw new IllegalStateException("finalValue called for non-existent window");
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
index bc1e5f6419885..c8c66680146e6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
@@ -33,11 +33,14 @@ public class ReifyTimestampAndWindowsDoFn<K, V>
   @Override
   public void processElement(ProcessContext c)
       throws Exception {
+    @SuppressWarnings("unchecked")
+    DoFnProcessContext<KV<K, V>, KV<K, WindowedValue<V>>> context =
+        (DoFnProcessContext<KV<K, V>, KV<K, WindowedValue<V>>>) c;
     KV<K, V> kv = c.element();
     K key = kv.getKey();
     V value = kv.getValue();
     c.output(KV.of(
         key,
-        WindowedValue.of(value, c.timestamp(), c.windowingInternals().windows())));
+        WindowedValue.of(value, c.timestamp(), context.windows())));
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 4f05e4c8fd892..bb2e28b876ec8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -49,7 +49,7 @@ StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W> create(
     return new StreamingGABWViaWindowSetDoFn<K, VI, VO, W>(windowFn) {
       @Override
       AbstractWindowSet<K, VI, VO, W> createWindowSet(K key,
-          DoFn<TimerOrElement<KV<K, VI>>, KV<K, VO>>.ProcessContext context,
+          DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>> context,
           StreamingActiveWindowManager<W> activeWindowManager)
           throws Exception {
         return CombiningWindowSet.create(
@@ -64,7 +64,7 @@ AbstractWindowSet<K, VI, VO, W> createWindowSet(K key,
     return new StreamingGABWViaWindowSetDoFn<K, VI, Iterable<VI>, W>(windowFn) {
       @Override
       AbstractWindowSet<K, VI, Iterable<VI>, W> createWindowSet(K key,
-          DoFn<TimerOrElement<KV<K, VI>>, KV<K, Iterable<VI>>>.ProcessContext context,
+          DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, Iterable<VI>>> context,
           StreamingActiveWindowManager<W> activeWindowManager)
           throws Exception {
         if (windowFn instanceof PartitioningWindowFn) {
@@ -90,21 +90,25 @@ public StreamingGABWViaWindowSetDoFn(WindowFn<?, W> windowFn) {
 
     abstract AbstractWindowSet<K, VI, VO, W> createWindowSet(
         K key,
-        DoFn<TimerOrElement<KV<K, VI>>, KV<K, VO>>.ProcessContext context,
+        DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>> context,
         StreamingActiveWindowManager<W> activeWindowManager)
         throws Exception;
 
     @Override
     public void processElement(ProcessContext context) throws Exception {
+      @SuppressWarnings("unchecked")
+      DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>> doFnContext =
+          (DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>>) context;
       if (!context.element().isTimer()) {
         KV<K, VI> element = context.element().element();
         K key = element.getKey();
         VI value = element.getValue();
         AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(
-            key, context,
-            new StreamingActiveWindowManager<>(windowFn, context));
+            key,
+            doFnContext,
+            new StreamingActiveWindowManager<>(windowFn, doFnContext));
 
-        for (BoundedWindow window : context.windowingInternals().windows()) {
+        for (BoundedWindow window : doFnContext.windows()) {
           @SuppressWarnings("unchecked")
           W w = (W) window;
           windowSet.put(w, value);
@@ -115,8 +119,8 @@ public void processElement(ProcessContext context) throws Exception {
         TimerOrElement<KV<K, VI>> timer = context.element();
         AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(
             (K) timer.key(),
-            context,
-            new StreamingActiveWindowManager<>(windowFn, context));
+            doFnContext,
+            new StreamingActiveWindowManager<>(windowFn, doFnContext));
 
         // Attempt to merge windows before emitting; that may remove the current window under
         // consideration.
@@ -137,18 +141,18 @@ public void processElement(ProcessContext context) throws Exception {
   private static class StreamingActiveWindowManager<W extends BoundedWindow>
       implements AbstractWindowSet.ActiveWindowManager<W> {
     WindowFn<?, W> windowFn;
-    DoFn<?, ?>.ProcessContext context;
+    DoFnProcessContext<?, ?> context;
 
     StreamingActiveWindowManager(
         WindowFn<?, W> windowFn,
-        DoFn<?, ?>.ProcessContext context) {
+        DoFnProcessContext<?, ?> context) {
       this.windowFn = windowFn;
       this.context = context;
     }
 
     @Override
     public void addWindow(W window) throws IOException {
-      context.windowingInternals().setTimer(
+      context.context.stepContext.getExecutionContext().setTimer(
           WindowUtils.windowToString(window, windowFn.windowCoder()), window.maxTimestamp());
     }
 
@@ -159,7 +163,7 @@ public void removeWindow(W window) throws IOException {
         // And, timers are automatically deleted once they are fired.
         return;
       }
-      context.windowingInternals().deleteTimer(
+      context.context.stepContext.getExecutionContext().deleteTimer(
           WindowUtils.windowToString(window, windowFn.windowCoder()));
     }
   }

From 08f8bf70175c82048b84a00a7c63837d4ce43fbf Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Wed, 25 Mar 2015 17:21:14 -0700
Subject: [PATCH 0313/1541] Makes Source<T> yield values simply of type T and
 renames createBasicReader to createReader. WindowedValue<T> is a system
 concept and should not be created directly by readers. ----Release Notes----
 Source.createBasicReader is renamed to Source.createReader and
 Source.createWindowedReader is gone. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=89554590

---
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |  2 +-
 .../dataflow/sdk/io/FileBasedSource.java      |  4 +-
 .../google/cloud/dataflow/sdk/io/Source.java  | 55 ++-----------------
 .../BasicSerializableSourceFormat.java        | 43 +++++++--------
 .../dataflow/sdk/io/FileBasedSourceTest.java  | 48 ++++++++--------
 .../BasicSerializableSourceFormatTest.java    | 18 +++---
 6 files changed, 59 insertions(+), 111 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index e0c507dae6d3f..31271076091e8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -302,7 +302,7 @@ public List<Source> splitIntoBundles(long desiredBundleSizeBytes, PipelineOption
     }
 
     @Override
-    public Reader<Entity> createBasicReader(
+    public Reader<Entity> createReader(
         PipelineOptions pipelineOptions, Coder<Entity> coder, ExecutionContext executionContext)
         throws IOException {
       return new DatastoreReader(query, getDatastore(pipelineOptions));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index 7b64711067a17..7d2d94ebf5433 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -211,8 +211,8 @@ public final List<? extends FileBasedSource<T>> splitIntoBundles(long desiredBun
   }
 
   @Override
-  protected final Reader<T> createBasicReader(PipelineOptions options, Coder<T> coder,
-      ExecutionContext executionContext) throws IOException {
+  public final Reader<T> createReader(PipelineOptions options, Coder<T> coder,
+                                      ExecutionContext executionContext) throws IOException {
     if (mode == Mode.FILEPATTERN) {
       long startTime = System.currentTimeMillis();
       Collection<String> files = expandFilePattern();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
index f7590d039d677..d075efbd969eb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
@@ -19,7 +19,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
 
 import java.io.IOException;
 import java.io.Serializable;
@@ -34,9 +33,7 @@
  * and creating a {@code Source} for reading the input.
  *
  * <p> To use this class for supporting your custom input type, derive your class
- * class from it, and override the abstract methods. Also override either
- * {@link #createWindowedReader} if your source supports timestamps and windows,
- * or {@link #createBasicReader} otherwise. For an example, see {@link DatastoreIO}.
+ * class from it, and override the abstract methods. For an example, see {@link DatastoreIO}.
  *
  * <p> A {@code Source} passed to a {@code Read} transform must be
  * {@code Serializable}.  This allows the {@code Source} instance
@@ -80,23 +77,10 @@ public abstract List<? extends Source<T>> splitIntoBundles(
   public abstract boolean producesSortedKeys(PipelineOptions options) throws Exception;
 
   /**
-   * Creates a windowed reader for this source. The default implementation wraps
-   * {@link #createBasicReader}. Override this function if your reader supports timestamps
-   * and windows; otherwise, override {@link #createBasicReader} instead.
+   * Creates a reader for this source.
    */
-  public Reader<WindowedValue<T>> createWindowedReader(PipelineOptions options,
-      Coder<WindowedValue<T>> coder, @Nullable ExecutionContext executionContext)
-      throws IOException {
-    return new WindowedReaderWrapper<T>(createBasicReader(
-        options, ((WindowedValue.WindowedValueCoder<T>) coder).getValueCoder(), executionContext));
-  }
-
-  /**
-   * Creates a basic (non-windowed) reader for this source. If you override this method, each value
-   * returned by this reader will be wrapped into the global window.
-   */
-  protected Reader<T> createBasicReader(PipelineOptions options, Coder<T> coder,
-      @Nullable ExecutionContext executionContext) throws IOException {
+  public Reader<T> createReader(PipelineOptions options, Coder<T> coder,
+                                @Nullable ExecutionContext executionContext) throws IOException {
     throw new UnsupportedOperationException();
   }
 
@@ -166,35 +150,4 @@ public interface Reader<T> extends AutoCloseable {
     @Override
     public void close() throws IOException;
   }
-
-  /**
-   * An adapter from {@code SourceIterator<T>} to {@code SourceIterator<WindowedValue<T>>}.
-   */
-  private static class WindowedReaderWrapper<T> implements Reader<WindowedValue<T>> {
-    private final Reader<T> reader;
-
-    public WindowedReaderWrapper(Reader<T> reader) {
-      this.reader = reader;
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      return reader.start();
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      return reader.advance();
-    }
-
-    @Override
-    public WindowedValue<T> getCurrent() throws NoSuchElementException {
-      return WindowedValue.valueInGlobalWindow(reader.getCurrent());
-    }
-
-    @Override
-    public void close() throws IOException {
-      reader.close();
-    }
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index e35004de4b728..b761bf46409dd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -59,11 +59,10 @@
 
 /**
  * A helper class for supporting sources defined as {@code Source}.
- *
+ * <p>
  * Provides a bridge between the high-level {@code Source} API and the raw
  * API-level {@code SourceFormat} API, by encoding the serialized
  * {@code Source} in a parameter of the API {@code Source} message.
- * <p>
  */
 public class BasicSerializableSourceFormat implements SourceFormat {
   private static final String SERIALIZED_SOURCE = "serialized_source";
@@ -99,17 +98,17 @@ public OperationResponse performSourceOperation(OperationRequest request) throws
   /**
    * Factory method allowing this class to satisfy the implicit contract of {@code SourceFactory}.
    */
-  public static <T> com.google.cloud.dataflow.sdk.util.common.worker.Reader create(
-      final PipelineOptions options, CloudObject spec, final Coder<WindowedValue<T>> coder,
+  public static <T> Reader<T> create(
+      final PipelineOptions options, CloudObject spec, final Coder<T> coder,
       final ExecutionContext executionContext) throws Exception {
     @SuppressWarnings("unchecked")
     final Source<T> source = (Source<T>) deserializeFromCloudSource(spec);
-    return new com.google.cloud.dataflow.sdk.util.common.worker.Reader() {
+    return new Reader<T>() {
       @Override
-      public ReaderIterator iterator() throws IOException {
+      public ReaderIterator<T> iterator() throws IOException {
         return new BasicSerializableSourceFormat.ReaderIterator<T>(
             source,
-            source.createWindowedReader(options, coder, executionContext));
+            source.createReader(options, coder, executionContext));
       }
     };
   }
@@ -176,7 +175,7 @@ public static Source<?> deserializeFromCloudSource(Map<String, Object> spec) thr
   }
 
   static com.google.api.services.dataflow.model.Source serializeToCloudSource(
-      Source source, PipelineOptions options) throws Exception {
+      Source<?> source, PipelineOptions options) throws Exception {
     com.google.api.services.dataflow.model.Source cloudSource =
         new com.google.api.services.dataflow.model.Source();
     // We ourselves act as the SourceFormat.
@@ -201,18 +200,18 @@ static com.google.api.services.dataflow.model.Source serializeToCloudSource(
   public static <T> void evaluateReadHelper(
       ReadSource.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
     try {
-      List<WindowedValue<T>> elems = new ArrayList<>();
+      List<T> elems = new ArrayList<>();
       Source<T> source = transform.getSource();
-      try (Source.Reader<WindowedValue<T>> reader =
-          source.createWindowedReader(context.getPipelineOptions(),
-              WindowedValue.getValueOnlyCoder(source.getDefaultOutputCoder()), null)) {
+      try (Source.Reader<T> reader = source.createReader(
+              context.getPipelineOptions(), source.getDefaultOutputCoder(), null)) {
         for (boolean available = reader.start(); available; available = reader.advance()) {
           elems.add(reader.getCurrent());
         }
       }
       List<DirectPipelineRunner.ValueWithMetadata<T>> output = new ArrayList<>();
-      for (WindowedValue<T> elem : elems) {
-        output.add(DirectPipelineRunner.ValueWithMetadata.of(elem));
+      for (T elem : elems) {
+        output.add(DirectPipelineRunner.ValueWithMetadata.of(
+            WindowedValue.valueInGlobalWindow(elem)));
       }
       context.setPCollectionValuesWithMetadata(transform.getOutput(), output);
     } catch (Exception e) {
@@ -238,17 +237,17 @@ public static <T> void translateReadHelper(
   /**
    * Adapter from the {@code Source.Reader} interface to
    * {@code Reader.ReaderIterator}.
-   *
+   * <p>
    * TODO: Consider changing the API of Reader.ReaderIterator so this adapter wouldn't be needed.
    */
-  private static class ReaderIterator<T> implements Reader.ReaderIterator<WindowedValue<T>> {
+  private static class ReaderIterator<T> implements Reader.ReaderIterator<T> {
     private final Source<T> source;
-    private Source.Reader<WindowedValue<T>> reader;
+    private Source.Reader<T> reader;
     private boolean hasNext;
-    private WindowedValue<T> next;
+    private T next;
     private boolean advanced;
 
-    private ReaderIterator(Source<T> source, Source.Reader<WindowedValue<T>> reader) {
+    private ReaderIterator(Source<T> source, Source.Reader<T> reader) {
       this.source = source;
       this.reader = reader;
     }
@@ -262,11 +261,11 @@ public boolean hasNext() throws IOException {
     }
 
     @Override
-    public WindowedValue<T> next() throws IOException {
+    public T next() throws IOException {
       if (!hasNext()) {
         throw new NoSuchElementException();
       }
-      WindowedValue<T> res = this.next;
+      T res = this.next;
       advanceInternal();
       return res;
     }
@@ -293,7 +292,7 @@ private void advanceInternal() throws IOException {
     }
 
     @Override
-    public Reader.ReaderIterator<WindowedValue<T>> copy() throws IOException {
+    public Reader.ReaderIterator<T> copy() throws IOException {
       throw new UnsupportedOperationException();
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index 78b7d83379d17..6e7fe88a50388 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -352,7 +352,7 @@ public void testFullyReadSingleFile() throws IOException {
 
     TestFileBasedSource source = new TestFileBasedSource(false, file.getPath(), 64, null);
     assertThat(data, containsInAnyOrder(
-        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
+        readEverythingFromReader(source.createReader(null, null, null)).toArray()));
   }
 
   @Test
@@ -376,7 +376,7 @@ public void testFullyReadFilePattern() throws IOException {
     expectedResults.addAll(data2);
     expectedResults.addAll(data3);
     assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
+        readEverythingFromReader(source.createReader(null, null, null)).toArray()));
   }
 
   @Test
@@ -406,7 +406,7 @@ public void testFullyReadFilePatternFirstRecordEmpty() throws IOException {
     expectedResults.addAll(data2);
     expectedResults.addAll(data3);
     assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
+        readEverythingFromReader(source.createReader(null, null, null)).toArray()));
   }
 
   @Test
@@ -421,8 +421,8 @@ public void testReadRangeAtStart() throws IOException {
         new TestFileBasedSource(file.getPath(), 64, 25, Long.MAX_VALUE, null);
 
     List<String> results = new ArrayList<String>();
-    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source1.createReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source2.createReader(null, null, null)));
     assertThat(data, containsInAnyOrder(results.toArray()));
   }
 
@@ -446,7 +446,7 @@ public void testReadEverythingFromFileWithSplits() throws IOException {
     expectedResults.removeAll(Arrays.asList(header));
 
     assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
+        readEverythingFromReader(source.createReader(null, null, null)).toArray()));
   }
 
   @Test
@@ -470,8 +470,8 @@ public void testReadRangeFromFileWithSplitsFromStart() throws IOException {
     expectedResults.removeAll(Arrays.asList(header));
 
     List<String> results = new ArrayList<>();
-    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source1.createReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source2.createReader(null, null, null)));
 
     assertThat(expectedResults, containsInAnyOrder(results.toArray()));
   }
@@ -499,9 +499,9 @@ public void testReadRangeFromFileWithSplitsFromMiddle() throws IOException {
     expectedResults.removeAll(Arrays.asList(header));
 
     List<String> results = new ArrayList<>();
-    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source3.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source1.createReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source2.createReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source3.createReader(null, null, null)));
 
     assertThat(expectedResults, containsInAnyOrder(results.toArray()));
   }
@@ -529,9 +529,9 @@ public void testReadFileWithSplitsWithEmptyRange() throws IOException {
     expectedResults.removeAll(Arrays.asList(header));
 
     List<String> results = new ArrayList<>();
-    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source3.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source1.createReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source2.createReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source3.createReader(null, null, null)));
 
     assertThat(expectedResults, containsInAnyOrder(results.toArray()));
   }
@@ -556,17 +556,17 @@ public void testReadRangeFromFileWithSplitsFromMiddleOfHeader() throws IOExcepti
     TestFileBasedSource source =
         new TestFileBasedSource(file.getPath(), 64, 1, Long.MAX_VALUE, header);
     assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
+        readEverythingFromReader(source.createReader(null, null, null)).toArray()));
 
     // Split starts after "<h" of the header
     source = new TestFileBasedSource(file.getPath(), 64, 2, Long.MAX_VALUE, header);
     assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
+        readEverythingFromReader(source.createReader(null, null, null)).toArray()));
 
     // Split starts after "<h>" of the header
     source = new TestFileBasedSource(file.getPath(), 64, 3, Long.MAX_VALUE, header);
     assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
+        readEverythingFromReader(source.createReader(null, null, null)).toArray()));
   }
 
   @Test
@@ -581,9 +581,9 @@ public void testReadRangeAtMiddle() throws IOException {
         new TestFileBasedSource(file.getPath(), 64, 72, Long.MAX_VALUE, null);
 
     List<String> results = new ArrayList<>();
-    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source3.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source1.createReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source2.createReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source3.createReader(null, null, null)));
 
     assertThat(data, containsInAnyOrder(results.toArray()));
   }
@@ -600,8 +600,8 @@ public void testReadRangeAtEnd() throws IOException {
         new TestFileBasedSource(file.getPath(), 1024, 162, Long.MAX_VALUE, null);
 
     List<String> results = new ArrayList<>();
-    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source1.createReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source2.createReader(null, null, null)));
     assertThat(data, containsInAnyOrder(results.toArray()));
   }
 
@@ -621,7 +621,7 @@ public void testReadAllSplitsOfSingleFile() throws Exception {
 
     List<String> results = new ArrayList<String>();
     for (Source<String> split : sources) {
-      results.addAll(readEverythingFromReader(split.createBasicReader(null, null, null)));
+      results.addAll(readEverythingFromReader(split.createReader(null, null, null)));
     }
 
     assertThat(data, containsInAnyOrder(results.toArray()));
@@ -754,7 +754,7 @@ public void testReadAllSplitsOfFilePattern() throws Exception {
 
     List<String> results = new ArrayList<String>();
     for (Source<String> split : sources) {
-      results.addAll(readEverythingFromReader(split.createBasicReader(null, null, null)));
+      results.addAll(readEverythingFromReader(split.createReader(null, null, null)));
     }
 
     List<String> expectedResults = new ArrayList<String>();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 4c844561680a8..757691d739ce5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -51,7 +51,6 @@
 import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
 import com.google.common.base.Preconditions;
 
@@ -117,7 +116,8 @@ public boolean producesSortedKeys(PipelineOptions options) throws Exception {
       }
 
       @Override
-      public Reader<Integer> createBasicReader(PipelineOptions options, Coder<Integer> coder,
+      public Reader<Integer> createReader(
+          PipelineOptions options, Coder<Integer> coder,
           @Nullable ExecutionContext executionContext) throws IOException {
         return new RangeReader(this);
       }
@@ -173,10 +173,10 @@ public void testSplitAndReadBundlesBack() throws Exception {
     options.setNumWorkers(5);
     com.google.api.services.dataflow.model.Source source =
         translateIOToCloudSource(TestIO.fromRange(10, 20), options);
-    List<WindowedValue<Integer>> elems = CloudSourceUtils.readElemsFromSource(options, source);
+    List<Integer> elems = CloudSourceUtils.readElemsFromSource(options, source);
     assertEquals(10, elems.size());
     for (int i = 0; i < 10; ++i) {
-      assertEquals(WindowedValue.valueInGlobalWindow(10 + i), elems.get(i));
+      assertEquals(Integer.valueOf(10 + i), elems.get(i));
     }
     SourceSplitResponse response = performSplit(source, options);
     assertEquals("SOURCE_SPLIT_OUTCOME_SPLITTING_HAPPENED", response.getOutcome());
@@ -188,12 +188,8 @@ public void testSplitAndReadBundlesBack() throws Exception {
       com.google.api.services.dataflow.model.Source bundleSource = bundle.getSource();
       assertTrue(bundleSource.getDoesNotNeedSplitting());
       bundleSource.setCodec(source.getCodec());
-      List<WindowedValue<Integer>> xs = CloudSourceUtils.readElemsFromSource(options, bundleSource);
-      assertThat(
-          xs,
-          contains(
-              WindowedValue.valueInGlobalWindow(10 + 2 * i),
-              WindowedValue.valueInGlobalWindow(11 + 2 * i)));
+      List<Integer> xs = CloudSourceUtils.readElemsFromSource(options, bundleSource);
+      assertThat(xs, contains(10 + 2 * i, 11 + 2 * i));
     }
   }
 
@@ -304,7 +300,7 @@ private static class SourceProducingFailingReader extends MockSource {
     private static final long serialVersionUID = -1288303253742972653L;
 
     @Override
-    protected Reader<Integer> createBasicReader(
+    public Reader<Integer> createReader(
         PipelineOptions options, Coder<Integer> coder, @Nullable ExecutionContext executionContext)
         throws IOException {
       return new FailingReader();

From 80187b782ff0e89d36e3a83bdaf3bae58ab65779 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 26 Mar 2015 08:59:52 -0700
Subject: [PATCH 0314/1541] This removes and cleans up PCollection
 ordererdness.

----Release Notes----
Removes the unimplemented concept of ordered PCollections.
This updates public facing APIs by removing methods and drops unused parameters.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89605089
---
 .../runners/DataflowPipelineTranslator.java   |  2 -
 .../sdk/runners/DirectPipelineRunner.java     | 15 ++-
 .../runners/dataflow/AvroIOTranslator.java    |  1 -
 .../runners/dataflow/PubsubIOTranslator.java  |  1 -
 .../runners/dataflow/TextIOTranslator.java    |  2 -
 .../dataflow/sdk/testing/DataflowAssert.java  | 31 +------
 .../cloud/dataflow/sdk/transforms/First.java  |  4 -
 .../dataflow/sdk/transforms/GroupByKey.java   | 17 +---
 .../cloud/dataflow/sdk/transforms/ParDo.java  | 20 ----
 .../dataflow/sdk/values/PCollection.java      | 45 ---------
 .../sdk/testing/DataflowAssertTest.java       | 33 -------
 .../dataflow/sdk/transforms/CreateTest.java   | 14 ---
 .../dataflow/sdk/transforms/FirstTest.java    | 92 ++++++-------------
 .../dataflow/sdk/transforms/FlattenTest.java  | 49 +---------
 .../dataflow/sdk/transforms/KeysTest.java     | 18 ----
 .../dataflow/sdk/transforms/KvSwapTest.java   | 23 -----
 .../dataflow/sdk/transforms/ParDoTest.java    | 27 +-----
 .../dataflow/sdk/transforms/ValuesTest.java   | 19 ----
 .../dataflow/sdk/transforms/WithKeysTest.java | 18 ----
 19 files changed, 46 insertions(+), 385 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 4f738920a60e2..aef53338241c6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -837,7 +837,6 @@ private <T> void flattenHelper(
             }
             context.addInput(PropertyNames.INPUTS, inputs);
             context.addOutput(PropertyNames.OUTPUT, transform.getOutput());
-            // TODO: Need to specify orderedness.
           }
         });
 
@@ -975,7 +974,6 @@ private static void translateOutputs(
       TupleTag<?> tag = entry.getKey();
       PCollection<?> output = entry.getValue();
       context.addOutput(tag.getId(), output);
-      // TODO: Need to specify orderedness.
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 2f3d700419df3..631797381e7a7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -525,13 +525,12 @@ <R, T, WT> void setPCollectionView(
     <T> T ensureElementEncodable(TypedPValue<T> pvalue, T element);
 
     /**
-     * If the evaluation context is testing unorderedness and
-     * !isOrdered, randomly permutes the order of the elements, in a
+     * If the evaluation context is testing unorderedness,
+     * randomly permutes the order of the elements, in a
      * copy if !inPlaceAllowed, and returns the permuted list,
      * otherwise returns the argument unchanged.
      */
-    <T> List<T> randomizeIfUnordered(boolean isOrdered,
-                                     List<T> elements,
+    <T> List<T> randomizeIfUnordered(List<T> elements,
                                      boolean inPlaceAllowed);
 
     /**
@@ -737,8 +736,7 @@ public WindowedValue<T> apply(ValueWithMetadata<T> input) {
     public <T> List<ValueWithMetadata<T>> getPCollectionValuesWithMetadata(PCollection<T> pc) {
       @SuppressWarnings("unchecked")
       List<ValueWithMetadata<T>> elements = (List<ValueWithMetadata<T>>) getPValue(pc);
-      elements = randomizeIfUnordered(
-          pc.isOrdered(), elements, false /* not inPlaceAllowed */);
+      elements = randomizeIfUnordered(elements, false /* not inPlaceAllowed */);
       LOG.debug("Getting {} = {}", pc, elements);
       return elements;
     }
@@ -791,10 +789,9 @@ public <T> T ensureElementEncodable(TypedPValue<T> pvalue, T element) {
     }
 
     @Override
-    public <T> List<T> randomizeIfUnordered(boolean isOrdered,
-                                            List<T> elements,
+    public <T> List<T> randomizeIfUnordered(List<T> elements,
                                             boolean inPlaceAllowed) {
-      if (!testUnorderedness || isOrdered) {
+      if (!testUnorderedness) {
         return elements;
       }
       List<T> elementsCopy = new ArrayList<>(elements);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
index 768fc435913ce..23ad587cec86d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
@@ -58,7 +58,6 @@ private <T> void translateReadHelper(
       context.addInput(PropertyNames.FILEPATTERN, filepattern);
       context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
       context.addInput(PropertyNames.VALIDATE_SOURCE, transform.needsValidation());
-      // TODO: Orderedness?
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
index 3fbe1ce5aa905..6ff2deb090ffb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
@@ -67,7 +67,6 @@ private void translateReadHelper(
         context.addInput(PropertyNames.PUBSUB_ID_LABEL, transform.getIdLabel());
       }
       context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
-      // TODO: Orderedness?
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
index 091bf3072818c..57729b546b809 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
@@ -60,8 +60,6 @@ private <T> void translateReadHelper(
       context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
       context.addInput(PropertyNames.VALIDATE_SOURCE, transform.needsValidation());
       context.addInput(PropertyNames.COMPRESSION_TYPE, transform.getCompressionType().toString());
-
-      // TODO: Orderedness?
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 7eed9d24a34a2..6af925fcc38c7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -196,7 +196,6 @@ public IterableAssert<T> satisfies(
       new TwoSideInputAssert<Iterable<T>, Iterable<T>>(actualView,
           actualView.getPipeline()
               .apply(Create.of(expectedElements))
-              .setOrdered(true)
               .setCoder(getCoder())
               .apply(View.<T>asIterable()))
           .satisfies(relation);
@@ -224,29 +223,6 @@ public IterableAssert<T> containsInAnyOrder(T... expectedElements) {
         new AssertContainsInAnyOrderRelation<T>(),
         Arrays.asList(expectedElements));
     };
-
-
-    /**
-     * Checks that the {@code Iterable} contains the expected elements, in the
-     * specified order.
-     *
-     * <p> Returns this {@code IterableAssert}.
-     */
-    public IterableAssert<T> containsInOrder(T... expectedElements) {
-      return satisfies(
-          new AssertContainsInOrderRelation<T>(),
-          Arrays.asList(expectedElements));
-    }
-
-    /**
-     * Checks that the {@code Iterable} contains the expected elements, in the
-     * specified order.
-     *
-     * <p> Returns this {@code IterableAssert}.
-     */
-    public IterableAssert<T> containsInOrder(Iterable<T> expectedElements) {
-      return satisfies(new AssertContainsInOrderRelation<T>(), expectedElements);
-    }
   }
 
   /**
@@ -320,7 +296,6 @@ public SingletonAssert<T> satisfies(
       new TwoSideInputAssert<T, T>(actualView,
           actualView.getPipeline()
               .apply(Create.of(expectedValue))
-              .setOrdered(true)
               .setCoder(getCoder())
               .apply(View.<T>asSingleton()))
           .satisfies(relation);
@@ -349,12 +324,10 @@ public SingletonAssert<T> is(T expectedValue) {
 
   /**
    * Returns a new PCollection equivalent to the input, but with all elements
-   * in the GlobalWindow.  Preserves ordering if the input is ordered.
+   * in the GlobalWindow.
    */
   private static <T> PCollection<T> inGlobalWindows(PCollection<T> input) {
-    return input
-        .apply(Window.<T>into(new GlobalWindows()))
-        .setOrdered(input.isOrdered());
+    return input.apply(Window.<T>into(new GlobalWindows()));
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
index dc314445e2da3..5228d67891152 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
@@ -25,10 +25,6 @@
  * produces a new {@code PCollection<T>} containing up to limit
  * elements of the input {@code PCollection}.
  *
- * <p> If the input and output {@code PCollection}s are ordered, then
- * {@code First} will select the first elements, otherwise it will
- * select any elements.
- *
  * <p> If limit is less than or equal to the size of the input
  * {@code PCollection}, then all the input's elements will be selected.
  *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index f8f2202de05b0..78c0075271676 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -270,9 +270,6 @@ public PCollection<KV<K, Iterable<V>>> apply(
   public static class GroupByKeyOnly<K, V>
       extends PTransform<PCollection<KV<K, V>>,
                          PCollection<KV<K, Iterable<V>>>> {
-    // TODO: Define and implement sorting by value.
-    boolean sortsValues = false;
-
     public GroupByKeyOnly() { }
 
     @SuppressWarnings({"rawtypes", "unchecked"})
@@ -301,10 +298,6 @@ public void finishSpecifying() {
         throw new IllegalStateException(
             "the keyCoder of a GroupByKey must be deterministic", e);
       }
-      if (getOutput().isOrdered()) {
-        throw new IllegalStateException(
-            "the result of a GroupByKey cannot be specified to be ordered");
-      }
       super.finishSpecifying();
     }
 
@@ -357,13 +350,6 @@ KvCoder<K, Iterable<V>> getOutputKvCoder() {
     protected Coder<KV<K, Iterable<V>>> getDefaultOutputCoder() {
       return getOutputKvCoder();
     }
-
-    /**
-     * Returns whether this GBK sorts values.
-     */
-    boolean sortsValues() {
-      return sortsValues;
-    }
   }
 
 
@@ -428,8 +414,7 @@ private static <K, V> void evaluateHelper(
       GroupingKey<K> groupingKey = entry.getKey();
       K key = groupingKey.getKey();
       List<V> values = entry.getValue();
-      values = context.randomizeIfUnordered(
-          transform.sortsValues(), values, true /* inPlaceAllowed */);
+      values = context.randomizeIfUnordered(values, true /* inPlaceAllowed */);
       outputElems.add(ValueWithMetadata
                       .of(WindowedValue.valueInEmptyWindows(KV.<K, Iterable<V>>of(key, values)))
                       .withKey(key));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 75ab7fb725d02..9352a073059c4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -266,26 +266,6 @@
  * This style of {@code TupleTag} instantiation is used in the example of
  * multiple side outputs, above.
  *
- * <h2>Ordered Input and/or Output PCollections</h2>
- *
- * <p> If the input {@code PCollection} is ordered (see
- * {@link PCollection#setOrdered}), then each batch of the input
- * processed by a {@code DoFn} instance will correspond to a
- * consecutive subsequence of elements of the input, and the
- * {@link DoFn#processElement} operation will be invoked on each
- * element of the batch in order; otherwise, batches will correspond
- * to arbitrary subsets of elements of the input, processed in
- * arbitrary order.
- *
- * <p> Independently, if a main or side output {@code PCollection} is
- * ordered, then the order in which elements are output to it will be
- * preserved in the output {@code PCollection}; otherwise, the order
- * in which elements are output to the {@code PCollection} doesn't
- * matter.  If the input {@code PCollection} is also ordered, then the
- * sequences of elements output from the batches will be concatenated
- * together in the same order as the batches appear in the input,
- * supporting order-preserving transforms on {@code PCollection}s.
- *
  * <h2>Serializability of {@code DoFn}s</h2>
  *
  * <p> A {@code DoFn} passed to a {@code ParDo} transform must be
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
index 095642d101fa5..155a0e6406a69 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
@@ -119,44 +119,6 @@ public PCollection<T> setCoder(Coder<T> coder) {
     return this;
   }
 
-  /**
-   * Returns whether or not the elements of this PCollection have a
-   * well-defined and fixed order, such that subsequent reading of the
-   * PCollection is guaranteed to process the elements in order.
-   *
-   * <p> Requiring a fixed order can limit optimization opportunities.
-   *
-   * <p> By default, PCollections do not have a well-defined or fixed order.
-   */
-  public boolean isOrdered() {
-    return isOrdered;
-  }
-
-  /**
-   * Sets whether or not this PCollection should preserve the order in
-   * which elements are put in it, such that subsequent parallel
-   * reading of the PCollection is guaranteed to process the elements
-   * in order.
-   *
-   * <p> Requiring a fixed order can limit optimization opportunities.
-   *
-   * <p> Returns {@code this}.
-   *
-   * @throws IllegalStateException if this PCollection has already
-   * been finalized and is no longer settable, e.g., by having
-   * {@code apply()} called on it
-   */
-  public PCollection<T> setOrdered(boolean isOrdered) {
-    if (this.isOrdered != isOrdered) {
-      if (isFinishedSpecifyingInternal()) {
-        throw new IllegalStateException(
-            "cannot change the orderedness of " + this + " once it's been used");
-      }
-      this.isOrdered = isOrdered;
-    }
-    return this;
-  }
-
   /**
    * Applies the given PTransform to this input PCollection, and
    * returns the PTransform's Output.
@@ -175,13 +137,6 @@ public <Output extends POutput> Output apply(PTransform<? super PCollection<T>,
   /////////////////////////////////////////////////////////////////////////////
   // Internal details below here.
 
-  /**
-   * Whether or not the elements of this PCollection have a
-   * well-defined and fixed order, such that subsequent reading of the
-   * PCollection is guaranteed to process the elements in order.
-   */
-  private boolean isOrdered = false;
-
   /**
    * {@link WindowFn} that will be used to merge windows in
    * this {@code PCollection} and subsequent {@code PCollection}s produced
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
index ccaf402b64930..03c19c2c38b31 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
@@ -196,37 +196,4 @@ public void testContainsInAnyOrderFalse() throws Exception {
 
     pipeline.run();
   }
-
-
-  @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
-  public void testContainsInOrder() throws Exception {
-    Pipeline pipeline = TestPipeline.create();
-
-    PCollection<Integer> pcollection = pipeline
-        .apply(Create.of(1, 2, 3, 4))
-        .setOrdered(true);
-
-    DataflowAssert.that(pcollection).containsInOrder(1, 2, 3, 4);
-
-    pipeline.run();
-  }
-
-  @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
-  public void testContainsInOrderFalse() throws Exception {
-    // The actual AssertionError is deep in the stack
-    // TODO: dig it out
-    thrown.expect(RuntimeException.class);
-
-    Pipeline pipeline = TestPipeline.create();
-
-    PCollection<Integer> pcollection = pipeline
-        .apply(Create.of(1, 2, 3, 4))
-        .setOrdered(true);
-
-    DataflowAssert.that(pcollection).containsInOrder(1, 2, 4, 3);
-
-    pipeline.run();
-  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
index 76e6fe9b12259..a919da7f12a04 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
@@ -64,20 +64,6 @@ public void testCreate() {
     p.run();
   }
 
-  // TODO: setOrdered(true) isn't supported yet by the Dataflow service.
-  @Test
-  public void testCreateOrdered() {
-    Pipeline p = TestPipeline.create();
-
-    PCollection<String> output =
-        p.apply(Create.of(LINES))
-        .setOrdered(true);
-
-    DataflowAssert.that(output)
-        .containsInOrder(LINES_ARRAY);
-    p.run();
-  }
-
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testCreateEmpty() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java
index 299f64ffd1752..3e61760144164 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java
@@ -46,54 +46,36 @@
 public class FirstTest
     implements Serializable /* to allow anon inner classes */ {
   // PRE: lines contains no duplicates.
-  void runTestFirst(final List<String> lines, int limit, boolean ordered) {
+  void runTestFirst(final List<String> lines, int limit) {
     Pipeline p = TestPipeline.create();
 
     PCollection<String> input = p.apply(Create.of(lines))
         .setCoder(StringUtf8Coder.of());
 
-    if (ordered) {
-      input.setOrdered(true);
-    }
-
     PCollection<String> output =
         input.apply(First.<String>of(limit));
 
-    if (ordered) {
-      output.setOrdered(true);
-    }
-
     final int expectedSize = Math.min(limit, lines.size());
-    if (ordered) {
-      List<String> expected = lines.subList(0, expectedSize);
-      if (expected.isEmpty()) {
-        DataflowAssert.that(output)
-            .containsInAnyOrder(expected);
-      } else {
-        DataflowAssert.that(output)
-            .containsInOrder(expected);
-      }
-    } else {
-      DataflowAssert.that(output)
-          .satisfies(new SerializableFunction<Iterable<String>, Void>() {
-              @Override
-              public Void apply(Iterable<String> actualIter) {
-                // Make sure actual is the right length, and is a
-                // subset of expected.
-                List<String> actual = new ArrayList<>();
-                for (String s : actualIter) {
-                  actual.add(s);
-                }
-                assertEquals(expectedSize, actual.size());
-                Set<String> actualAsSet = new TreeSet<>(actual);
-                Set<String> linesAsSet = new TreeSet<>(lines);
-                assertEquals(actual.size(), actualAsSet.size());
-                assertEquals(lines.size(), linesAsSet.size());
-                assertTrue(linesAsSet.containsAll(actualAsSet));
-                return null;
+
+    DataflowAssert.that(output)
+        .satisfies(new SerializableFunction<Iterable<String>, Void>() {
+            @Override
+            public Void apply(Iterable<String> actualIter) {
+              // Make sure actual is the right length, and is a
+              // subset of expected.
+              List<String> actual = new ArrayList<>();
+              for (String s : actualIter) {
+                actual.add(s);
               }
-            });
-    }
+              assertEquals(expectedSize, actual.size());
+              Set<String> actualAsSet = new TreeSet<>(actual);
+              Set<String> linesAsSet = new TreeSet<>(lines);
+              assertEquals(actual.size(), actualAsSet.size());
+              assertEquals(lines.size(), linesAsSet.size());
+              assertTrue(linesAsSet.containsAll(actualAsSet));
+              return null;
+            }
+          });
 
     p.run();
   }
@@ -101,41 +83,23 @@ public Void apply(Iterable<String> actualIter) {
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testFirst() {
-    runTestFirst(LINES, 0, false);
-    runTestFirst(LINES, LINES.size() / 2, false);
-    runTestFirst(LINES, LINES.size() * 2, false);
+    runTestFirst(LINES, 0);
+    runTestFirst(LINES, LINES.size() / 2);
+    runTestFirst(LINES, LINES.size() * 2);
   }
 
   @Test
   // Extra tests, not worth the time to run on the real service.
   public void testFirstMore() {
-    runTestFirst(LINES, LINES.size() - 1, false);
-    runTestFirst(LINES, LINES.size(), false);
-    runTestFirst(LINES, LINES.size() + 1, false);
-  }
-
-  // TODO: setOrdered(true) isn't supported yet by the Dataflow service.
-  @Test
-  public void testFirstOrdered() {
-    runTestFirst(LINES, 0, true);
-    runTestFirst(LINES, LINES.size() / 2, true);
-    runTestFirst(LINES, LINES.size() - 1, true);
-    runTestFirst(LINES, LINES.size(), true);
-    runTestFirst(LINES, LINES.size() + 1, true);
-    runTestFirst(LINES, LINES.size() * 2, true);
+    runTestFirst(LINES, LINES.size() - 1);
+    runTestFirst(LINES, LINES.size());
+    runTestFirst(LINES, LINES.size() + 1);
   }
 
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testFirstEmpty() {
-    runTestFirst(NO_LINES, 0, false);
-    runTestFirst(NO_LINES, 1, false);
-  }
-
-  @Test
-  // TODO: setOrdered(true) isn't supported yet by the Dataflow service.
-  public void testFirstEmptyOrdered() {
-    runTestFirst(NO_LINES, 0, true);
-    runTestFirst(NO_LINES, 1, true);
+    runTestFirst(NO_LINES, 0);
+    runTestFirst(NO_LINES, 1);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
index 43b85bf491443..44b87521c1eb2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
@@ -56,14 +56,14 @@ public class FlattenTest implements Serializable {
 
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
-  public void testFlattenPCollectionListUnordered() {
+  public void testFlattenPCollectionList() {
     Pipeline p = TestPipeline.create();
 
     List<String>[] inputs = new List[] {
       LINES, NO_LINES, LINES2, NO_LINES, LINES, NO_LINES };
 
     PCollection<String> output =
-        makePCollectionListOfStrings(false /* not ordered */, p, inputs)
+        makePCollectionListOfStrings(p, inputs)
         .apply(Flatten.<String>pCollections());
 
     DataflowAssert.that(output).containsInAnyOrder(flatten(inputs));
@@ -72,14 +72,14 @@ public void testFlattenPCollectionListUnordered() {
 
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
-  public void testFlattenPCollectionListUnorderedThenParDo() {
+  public void testFlattenPCollectionListThenParDo() {
     Pipeline p = TestPipeline.create();
 
     List<String>[] inputs = new List[] {
       LINES, NO_LINES, LINES2, NO_LINES, LINES, NO_LINES };
 
     PCollection<String> output =
-        makePCollectionListOfStrings(false /* not ordered */, p, inputs)
+        makePCollectionListOfStrings(p, inputs)
         .apply(Flatten.<String>pCollections())
         .apply(ParDo.of(new IdentityFn<String>(){}));
 
@@ -87,22 +87,6 @@ public void testFlattenPCollectionListUnorderedThenParDo() {
     p.run();
   }
 
-  // TODO: setOrdered(true) isn't supported yet by the Dataflow service.
-  @Test
-  public void testFlattenPCollectionListOrdered() {
-    Pipeline p = TestPipeline.create();
-
-    List<String>[] inputs = new List[] {
-      LINES, NO_LINES, LINES2, NO_LINES, LINES, NO_LINES };
-
-    PCollection<String> output =
-        makePCollectionListOfStrings(true /* ordered */, p, inputs)
-        .apply(Flatten.<String>pCollections()).setOrdered(true);
-
-    DataflowAssert.that(output).containsInOrder(flatten(inputs));
-    p.run();
-  }
-
   // TODO: re-enable running this test on the service once empty flattens
   // followed by ParDos work properly.
   @Test
@@ -182,24 +166,6 @@ public void testFlattenIterables() {
     p.run();
   }
 
-  // TODO: setOrdered(true) isn't supported yet by the Dataflow service.
-  @Test
-  public void testFlattenIterablesOrdered() {
-    Pipeline p = TestPipeline.create();
-
-    PCollection<Iterable<String>> input = p
-        .apply(Create.<Iterable<String>>of(LINES))
-        .setCoder(IterableCoder.of(StringUtf8Coder.of()));
-
-    PCollection<String> output =
-        input.apply(Flatten.<String>iterables()).setOrdered(true);
-
-    DataflowAssert.that(output)
-        .containsInOrder(LINES_ARRAY);
-
-    p.run();
-  }
-
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testFlattenIterablesEmpty() {
@@ -296,23 +262,18 @@ public void processElement(ProcessContext c) {
   }
 
   private PCollectionList<String> makePCollectionListOfStrings(
-      boolean ordered,
       Pipeline p,
       List<String>... lists) {
-    return makePCollectionList(ordered, p, StringUtf8Coder.of(), lists);
+    return makePCollectionList(p, StringUtf8Coder.of(), lists);
   }
 
   private <T> PCollectionList<T> makePCollectionList(
-      boolean ordered,
       Pipeline p,
       Coder<T> coder,
       List<T>... lists) {
     List<PCollection<T>> pcs = new ArrayList<>();
     for (List<T> list : lists) {
       PCollection<T> pc = p.apply(Create.of(list)).setCoder(coder);
-      if (ordered) {
-        pc.setOrdered(true);
-      }
       pcs.add(pc);
     }
     return PCollectionList.of(pcs);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java
index ad8bf2216b08f..0cd549735f598 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java
@@ -66,24 +66,6 @@ public void testKeys() {
     p.run();
   }
 
-  // TODO: setOrdered(true) isn't supported yet by the Dataflow service.
-  @Test
-  public void testKeysOrdered() {
-    Pipeline p = TestPipeline.create();
-
-    PCollection<KV<String, Integer>> input =
-        p.apply(Create.of(Arrays.asList(TABLE))).setCoder(
-            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
-
-    input.setOrdered(true);
-    PCollection<String> output =
-        input.apply(Keys.<String>create()).setOrdered(true);
-    DataflowAssert.that(output)
-        .containsInOrder("one", "two", "three", "dup", "dup");
-
-    p.run();
-  }
-
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testKeysEmpty() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java
index 9ac2614b1d196..476b4a8155251 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java
@@ -72,29 +72,6 @@ public void testKvSwap() {
     p.run();
   }
 
-  // TODO: setOrdered(true) isn't supported yet by the Dataflow service.
-  @Test
-  public void testKvSwapOrdered() {
-    Pipeline p = TestPipeline.create();
-
-    PCollection<KV<String, Integer>> input =
-        p.apply(Create.of(Arrays.asList(TABLE))).setCoder(
-            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
-
-    input.setOrdered(true);
-    PCollection<KV<Integer, String>> output = input.apply(
-        KvSwap.<String, Integer>create()).setOrdered(true);
-
-    DataflowAssert.that(output).containsInOrder(
-        KV.of(1, "one"),
-        KV.of(2, "two"),
-        KV.of(3, "three"),
-        KV.of(4, "four"),
-        KV.of(4, "dup"),
-        KV.of(5, "dup"));
-    p.run();
-  }
-
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testKvSwapEmpty() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 7ec392abb9edb..5f0366654bdda 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -415,25 +415,6 @@ public void testParDo() {
     p.run();
   }
 
-  // TODO: setOrdered(true) isn't supported yet by the Dataflow service.
-  @Test
-  public void testParDoOrdered() {
-    Pipeline p = TestPipeline.create();
-
-    List<Integer> inputs = Arrays.asList(3, -42, 666);
-
-    PCollection<Integer> input = createInts(p, inputs).setOrdered(true);
-
-    PCollection<String> output =
-        input
-        .apply(ParDo.of(new TestDoFn())).setOrdered(true);
-
-    DataflowAssert.that(output)
-        .satisfies(TestDoFn.HasExpectedOutput.forInput(inputs).inOrder());
-
-    p.run();
-  }
-
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testParDoEmpty() {
@@ -1067,7 +1048,7 @@ public void testParDoOutputWithTimestamp() {
     Pipeline p = TestPipeline.create();
 
     PCollection<Integer> input =
-        createInts(p, Arrays.asList(3, 42, 6)).setOrdered(true);
+        createInts(p, Arrays.asList(3, 42, 6));
 
     PCollection<String> output =
         input
@@ -1088,7 +1069,7 @@ public void testParDoSideOutputWithTimestamp() {
     Pipeline p = TestPipeline.create();
 
     PCollection<Integer> input =
-        createInts(p, Arrays.asList(3, 42, 6)).setOrdered(true);
+        createInts(p, Arrays.asList(3, 42, 6));
 
     final TupleTag<Integer> mainTag = new TupleTag<Integer>(){};
     final TupleTag<Integer> sideTag = new TupleTag<Integer>(){};
@@ -1119,7 +1100,7 @@ public void testParDoShiftTimestamp() {
     Pipeline p = TestPipeline.create();
 
     PCollection<Integer> input =
-        createInts(p, Arrays.asList(3, 42, 6)).setOrdered(true);
+        createInts(p, Arrays.asList(3, 42, 6));
 
     PCollection<String> output =
         input
@@ -1140,7 +1121,7 @@ public void testParDoShiftTimestamp() {
   public void testParDoShiftTimestampInvalid() {
     Pipeline p = TestPipeline.create();
 
-    createInts(p, Arrays.asList(3, 42, 6)).setOrdered(true)
+    createInts(p, Arrays.asList(3, 42, 6))
         .apply(ParDo.of(new TestOutputTimestampDoFn()))
         .apply(ParDo.of(new TestShiftTimestampDoFn(Duration.millis(1000),
                                                    Duration.millis(-1001))))
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
index e21cbf19f8cc6..dbcb840d12ced 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
@@ -67,25 +67,6 @@ public void testValues() {
     p.run();
   }
 
-  // TODO: setOrdered(true) isn't supported yet by the Dataflow service.
-  @Test
-  public void testValuesOrdered() {
-    Pipeline p = TestPipeline.create();
-
-    PCollection<KV<String, Integer>> input =
-        p.apply(Create.of(Arrays.asList(TABLE))).setCoder(
-            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
-
-    input.setOrdered(true);
-    PCollection<Integer> output =
-        input.apply(Values.<Integer>create()).setOrdered(true);
-
-    DataflowAssert.that(output)
-        .containsInOrder(1, 2, 3, 4, 4);
-
-    p.run();
-  }
-
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testValuesEmpty() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
index 0f8098f23bd2a..399c475542987 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
@@ -76,24 +76,6 @@ public void testExtractKeys() {
     p.run();
   }
 
-  // TODO: setOrdered(true) isn't supported yet by the Dataflow service.
-  @Test
-  public void testExtractKeysOrdered() {
-    Pipeline p = TestPipeline.create();
-
-    PCollection<String> input =
-        p.apply(Create.of(Arrays.asList(COLLECTION))).setCoder(
-            StringUtf8Coder.of());
-
-    input.setOrdered(true);
-    PCollection<KV<Integer, String>> output = input.apply(WithKeys.of(
-        new LengthAsKey())).setOrdered(true);
-    DataflowAssert.that(output)
-        .containsInAnyOrder(WITH_KEYS);
-
-    p.run();
-  }
-
   @Test
   public void testConstantKeys() {
     Pipeline p = TestPipeline.create();

From f3ddf6f394230de1bb9b767469c902617c2f4524 Mon Sep 17 00:00:00 2001
From: ckuhn <ckuhn@google.com>
Date: Thu, 26 Mar 2015 10:00:55 -0700
Subject: [PATCH 0315/1541] Rollback of the commit that makes Source<T> yield
 values simply of type T and renames createBasicReader to createReader. The
 reason is that the change is causing failures in an internal test.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89609929
---
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |  2 +-
 .../dataflow/sdk/io/FileBasedSource.java      |  4 +-
 .../google/cloud/dataflow/sdk/io/Source.java  | 55 +++++++++++++++++--
 .../BasicSerializableSourceFormat.java        | 43 ++++++++-------
 .../dataflow/sdk/io/FileBasedSourceTest.java  | 48 ++++++++--------
 .../BasicSerializableSourceFormatTest.java    | 18 +++---
 6 files changed, 111 insertions(+), 59 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 31271076091e8..e0c507dae6d3f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -302,7 +302,7 @@ public List<Source> splitIntoBundles(long desiredBundleSizeBytes, PipelineOption
     }
 
     @Override
-    public Reader<Entity> createReader(
+    public Reader<Entity> createBasicReader(
         PipelineOptions pipelineOptions, Coder<Entity> coder, ExecutionContext executionContext)
         throws IOException {
       return new DatastoreReader(query, getDatastore(pipelineOptions));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index 7d2d94ebf5433..7b64711067a17 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -211,8 +211,8 @@ public final List<? extends FileBasedSource<T>> splitIntoBundles(long desiredBun
   }
 
   @Override
-  public final Reader<T> createReader(PipelineOptions options, Coder<T> coder,
-                                      ExecutionContext executionContext) throws IOException {
+  protected final Reader<T> createBasicReader(PipelineOptions options, Coder<T> coder,
+      ExecutionContext executionContext) throws IOException {
     if (mode == Mode.FILEPATTERN) {
       long startTime = System.currentTimeMillis();
       Collection<String> files = expandFilePattern();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
index d075efbd969eb..f7590d039d677 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
 
 import java.io.IOException;
 import java.io.Serializable;
@@ -33,7 +34,9 @@
  * and creating a {@code Source} for reading the input.
  *
  * <p> To use this class for supporting your custom input type, derive your class
- * class from it, and override the abstract methods. For an example, see {@link DatastoreIO}.
+ * class from it, and override the abstract methods. Also override either
+ * {@link #createWindowedReader} if your source supports timestamps and windows,
+ * or {@link #createBasicReader} otherwise. For an example, see {@link DatastoreIO}.
  *
  * <p> A {@code Source} passed to a {@code Read} transform must be
  * {@code Serializable}.  This allows the {@code Source} instance
@@ -77,10 +80,23 @@ public abstract List<? extends Source<T>> splitIntoBundles(
   public abstract boolean producesSortedKeys(PipelineOptions options) throws Exception;
 
   /**
-   * Creates a reader for this source.
+   * Creates a windowed reader for this source. The default implementation wraps
+   * {@link #createBasicReader}. Override this function if your reader supports timestamps
+   * and windows; otherwise, override {@link #createBasicReader} instead.
    */
-  public Reader<T> createReader(PipelineOptions options, Coder<T> coder,
-                                @Nullable ExecutionContext executionContext) throws IOException {
+  public Reader<WindowedValue<T>> createWindowedReader(PipelineOptions options,
+      Coder<WindowedValue<T>> coder, @Nullable ExecutionContext executionContext)
+      throws IOException {
+    return new WindowedReaderWrapper<T>(createBasicReader(
+        options, ((WindowedValue.WindowedValueCoder<T>) coder).getValueCoder(), executionContext));
+  }
+
+  /**
+   * Creates a basic (non-windowed) reader for this source. If you override this method, each value
+   * returned by this reader will be wrapped into the global window.
+   */
+  protected Reader<T> createBasicReader(PipelineOptions options, Coder<T> coder,
+      @Nullable ExecutionContext executionContext) throws IOException {
     throw new UnsupportedOperationException();
   }
 
@@ -150,4 +166,35 @@ public interface Reader<T> extends AutoCloseable {
     @Override
     public void close() throws IOException;
   }
+
+  /**
+   * An adapter from {@code SourceIterator<T>} to {@code SourceIterator<WindowedValue<T>>}.
+   */
+  private static class WindowedReaderWrapper<T> implements Reader<WindowedValue<T>> {
+    private final Reader<T> reader;
+
+    public WindowedReaderWrapper(Reader<T> reader) {
+      this.reader = reader;
+    }
+
+    @Override
+    public boolean start() throws IOException {
+      return reader.start();
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      return reader.advance();
+    }
+
+    @Override
+    public WindowedValue<T> getCurrent() throws NoSuchElementException {
+      return WindowedValue.valueInGlobalWindow(reader.getCurrent());
+    }
+
+    @Override
+    public void close() throws IOException {
+      reader.close();
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index b761bf46409dd..e35004de4b728 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -59,10 +59,11 @@
 
 /**
  * A helper class for supporting sources defined as {@code Source}.
- * <p>
+ *
  * Provides a bridge between the high-level {@code Source} API and the raw
  * API-level {@code SourceFormat} API, by encoding the serialized
  * {@code Source} in a parameter of the API {@code Source} message.
+ * <p>
  */
 public class BasicSerializableSourceFormat implements SourceFormat {
   private static final String SERIALIZED_SOURCE = "serialized_source";
@@ -98,17 +99,17 @@ public OperationResponse performSourceOperation(OperationRequest request) throws
   /**
    * Factory method allowing this class to satisfy the implicit contract of {@code SourceFactory}.
    */
-  public static <T> Reader<T> create(
-      final PipelineOptions options, CloudObject spec, final Coder<T> coder,
+  public static <T> com.google.cloud.dataflow.sdk.util.common.worker.Reader create(
+      final PipelineOptions options, CloudObject spec, final Coder<WindowedValue<T>> coder,
       final ExecutionContext executionContext) throws Exception {
     @SuppressWarnings("unchecked")
     final Source<T> source = (Source<T>) deserializeFromCloudSource(spec);
-    return new Reader<T>() {
+    return new com.google.cloud.dataflow.sdk.util.common.worker.Reader() {
       @Override
-      public ReaderIterator<T> iterator() throws IOException {
+      public ReaderIterator iterator() throws IOException {
         return new BasicSerializableSourceFormat.ReaderIterator<T>(
             source,
-            source.createReader(options, coder, executionContext));
+            source.createWindowedReader(options, coder, executionContext));
       }
     };
   }
@@ -175,7 +176,7 @@ public static Source<?> deserializeFromCloudSource(Map<String, Object> spec) thr
   }
 
   static com.google.api.services.dataflow.model.Source serializeToCloudSource(
-      Source<?> source, PipelineOptions options) throws Exception {
+      Source source, PipelineOptions options) throws Exception {
     com.google.api.services.dataflow.model.Source cloudSource =
         new com.google.api.services.dataflow.model.Source();
     // We ourselves act as the SourceFormat.
@@ -200,18 +201,18 @@ static com.google.api.services.dataflow.model.Source serializeToCloudSource(
   public static <T> void evaluateReadHelper(
       ReadSource.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
     try {
-      List<T> elems = new ArrayList<>();
+      List<WindowedValue<T>> elems = new ArrayList<>();
       Source<T> source = transform.getSource();
-      try (Source.Reader<T> reader = source.createReader(
-              context.getPipelineOptions(), source.getDefaultOutputCoder(), null)) {
+      try (Source.Reader<WindowedValue<T>> reader =
+          source.createWindowedReader(context.getPipelineOptions(),
+              WindowedValue.getValueOnlyCoder(source.getDefaultOutputCoder()), null)) {
         for (boolean available = reader.start(); available; available = reader.advance()) {
           elems.add(reader.getCurrent());
         }
       }
       List<DirectPipelineRunner.ValueWithMetadata<T>> output = new ArrayList<>();
-      for (T elem : elems) {
-        output.add(DirectPipelineRunner.ValueWithMetadata.of(
-            WindowedValue.valueInGlobalWindow(elem)));
+      for (WindowedValue<T> elem : elems) {
+        output.add(DirectPipelineRunner.ValueWithMetadata.of(elem));
       }
       context.setPCollectionValuesWithMetadata(transform.getOutput(), output);
     } catch (Exception e) {
@@ -237,17 +238,17 @@ public static <T> void translateReadHelper(
   /**
    * Adapter from the {@code Source.Reader} interface to
    * {@code Reader.ReaderIterator}.
-   * <p>
+   *
    * TODO: Consider changing the API of Reader.ReaderIterator so this adapter wouldn't be needed.
    */
-  private static class ReaderIterator<T> implements Reader.ReaderIterator<T> {
+  private static class ReaderIterator<T> implements Reader.ReaderIterator<WindowedValue<T>> {
     private final Source<T> source;
-    private Source.Reader<T> reader;
+    private Source.Reader<WindowedValue<T>> reader;
     private boolean hasNext;
-    private T next;
+    private WindowedValue<T> next;
     private boolean advanced;
 
-    private ReaderIterator(Source<T> source, Source.Reader<T> reader) {
+    private ReaderIterator(Source<T> source, Source.Reader<WindowedValue<T>> reader) {
       this.source = source;
       this.reader = reader;
     }
@@ -261,11 +262,11 @@ public boolean hasNext() throws IOException {
     }
 
     @Override
-    public T next() throws IOException {
+    public WindowedValue<T> next() throws IOException {
       if (!hasNext()) {
         throw new NoSuchElementException();
       }
-      T res = this.next;
+      WindowedValue<T> res = this.next;
       advanceInternal();
       return res;
     }
@@ -292,7 +293,7 @@ private void advanceInternal() throws IOException {
     }
 
     @Override
-    public Reader.ReaderIterator<T> copy() throws IOException {
+    public Reader.ReaderIterator<WindowedValue<T>> copy() throws IOException {
       throw new UnsupportedOperationException();
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index 6e7fe88a50388..78b7d83379d17 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -352,7 +352,7 @@ public void testFullyReadSingleFile() throws IOException {
 
     TestFileBasedSource source = new TestFileBasedSource(false, file.getPath(), 64, null);
     assertThat(data, containsInAnyOrder(
-        readEverythingFromReader(source.createReader(null, null, null)).toArray()));
+        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
   }
 
   @Test
@@ -376,7 +376,7 @@ public void testFullyReadFilePattern() throws IOException {
     expectedResults.addAll(data2);
     expectedResults.addAll(data3);
     assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createReader(null, null, null)).toArray()));
+        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
   }
 
   @Test
@@ -406,7 +406,7 @@ public void testFullyReadFilePatternFirstRecordEmpty() throws IOException {
     expectedResults.addAll(data2);
     expectedResults.addAll(data3);
     assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createReader(null, null, null)).toArray()));
+        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
   }
 
   @Test
@@ -421,8 +421,8 @@ public void testReadRangeAtStart() throws IOException {
         new TestFileBasedSource(file.getPath(), 64, 25, Long.MAX_VALUE, null);
 
     List<String> results = new ArrayList<String>();
-    results.addAll(readEverythingFromReader(source1.createReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source2.createReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
     assertThat(data, containsInAnyOrder(results.toArray()));
   }
 
@@ -446,7 +446,7 @@ public void testReadEverythingFromFileWithSplits() throws IOException {
     expectedResults.removeAll(Arrays.asList(header));
 
     assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createReader(null, null, null)).toArray()));
+        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
   }
 
   @Test
@@ -470,8 +470,8 @@ public void testReadRangeFromFileWithSplitsFromStart() throws IOException {
     expectedResults.removeAll(Arrays.asList(header));
 
     List<String> results = new ArrayList<>();
-    results.addAll(readEverythingFromReader(source1.createReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source2.createReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
 
     assertThat(expectedResults, containsInAnyOrder(results.toArray()));
   }
@@ -499,9 +499,9 @@ public void testReadRangeFromFileWithSplitsFromMiddle() throws IOException {
     expectedResults.removeAll(Arrays.asList(header));
 
     List<String> results = new ArrayList<>();
-    results.addAll(readEverythingFromReader(source1.createReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source2.createReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source3.createReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source3.createBasicReader(null, null, null)));
 
     assertThat(expectedResults, containsInAnyOrder(results.toArray()));
   }
@@ -529,9 +529,9 @@ public void testReadFileWithSplitsWithEmptyRange() throws IOException {
     expectedResults.removeAll(Arrays.asList(header));
 
     List<String> results = new ArrayList<>();
-    results.addAll(readEverythingFromReader(source1.createReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source2.createReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source3.createReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source3.createBasicReader(null, null, null)));
 
     assertThat(expectedResults, containsInAnyOrder(results.toArray()));
   }
@@ -556,17 +556,17 @@ public void testReadRangeFromFileWithSplitsFromMiddleOfHeader() throws IOExcepti
     TestFileBasedSource source =
         new TestFileBasedSource(file.getPath(), 64, 1, Long.MAX_VALUE, header);
     assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createReader(null, null, null)).toArray()));
+        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
 
     // Split starts after "<h" of the header
     source = new TestFileBasedSource(file.getPath(), 64, 2, Long.MAX_VALUE, header);
     assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createReader(null, null, null)).toArray()));
+        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
 
     // Split starts after "<h>" of the header
     source = new TestFileBasedSource(file.getPath(), 64, 3, Long.MAX_VALUE, header);
     assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createReader(null, null, null)).toArray()));
+        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
   }
 
   @Test
@@ -581,9 +581,9 @@ public void testReadRangeAtMiddle() throws IOException {
         new TestFileBasedSource(file.getPath(), 64, 72, Long.MAX_VALUE, null);
 
     List<String> results = new ArrayList<>();
-    results.addAll(readEverythingFromReader(source1.createReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source2.createReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source3.createReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source3.createBasicReader(null, null, null)));
 
     assertThat(data, containsInAnyOrder(results.toArray()));
   }
@@ -600,8 +600,8 @@ public void testReadRangeAtEnd() throws IOException {
         new TestFileBasedSource(file.getPath(), 1024, 162, Long.MAX_VALUE, null);
 
     List<String> results = new ArrayList<>();
-    results.addAll(readEverythingFromReader(source1.createReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source2.createReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
     assertThat(data, containsInAnyOrder(results.toArray()));
   }
 
@@ -621,7 +621,7 @@ public void testReadAllSplitsOfSingleFile() throws Exception {
 
     List<String> results = new ArrayList<String>();
     for (Source<String> split : sources) {
-      results.addAll(readEverythingFromReader(split.createReader(null, null, null)));
+      results.addAll(readEverythingFromReader(split.createBasicReader(null, null, null)));
     }
 
     assertThat(data, containsInAnyOrder(results.toArray()));
@@ -754,7 +754,7 @@ public void testReadAllSplitsOfFilePattern() throws Exception {
 
     List<String> results = new ArrayList<String>();
     for (Source<String> split : sources) {
-      results.addAll(readEverythingFromReader(split.createReader(null, null, null)));
+      results.addAll(readEverythingFromReader(split.createBasicReader(null, null, null)));
     }
 
     List<String> expectedResults = new ArrayList<String>();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 757691d739ce5..4c844561680a8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -51,6 +51,7 @@
 import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
 import com.google.common.base.Preconditions;
 
@@ -116,8 +117,7 @@ public boolean producesSortedKeys(PipelineOptions options) throws Exception {
       }
 
       @Override
-      public Reader<Integer> createReader(
-          PipelineOptions options, Coder<Integer> coder,
+      public Reader<Integer> createBasicReader(PipelineOptions options, Coder<Integer> coder,
           @Nullable ExecutionContext executionContext) throws IOException {
         return new RangeReader(this);
       }
@@ -173,10 +173,10 @@ public void testSplitAndReadBundlesBack() throws Exception {
     options.setNumWorkers(5);
     com.google.api.services.dataflow.model.Source source =
         translateIOToCloudSource(TestIO.fromRange(10, 20), options);
-    List<Integer> elems = CloudSourceUtils.readElemsFromSource(options, source);
+    List<WindowedValue<Integer>> elems = CloudSourceUtils.readElemsFromSource(options, source);
     assertEquals(10, elems.size());
     for (int i = 0; i < 10; ++i) {
-      assertEquals(Integer.valueOf(10 + i), elems.get(i));
+      assertEquals(WindowedValue.valueInGlobalWindow(10 + i), elems.get(i));
     }
     SourceSplitResponse response = performSplit(source, options);
     assertEquals("SOURCE_SPLIT_OUTCOME_SPLITTING_HAPPENED", response.getOutcome());
@@ -188,8 +188,12 @@ public void testSplitAndReadBundlesBack() throws Exception {
       com.google.api.services.dataflow.model.Source bundleSource = bundle.getSource();
       assertTrue(bundleSource.getDoesNotNeedSplitting());
       bundleSource.setCodec(source.getCodec());
-      List<Integer> xs = CloudSourceUtils.readElemsFromSource(options, bundleSource);
-      assertThat(xs, contains(10 + 2 * i, 11 + 2 * i));
+      List<WindowedValue<Integer>> xs = CloudSourceUtils.readElemsFromSource(options, bundleSource);
+      assertThat(
+          xs,
+          contains(
+              WindowedValue.valueInGlobalWindow(10 + 2 * i),
+              WindowedValue.valueInGlobalWindow(11 + 2 * i)));
     }
   }
 
@@ -300,7 +304,7 @@ private static class SourceProducingFailingReader extends MockSource {
     private static final long serialVersionUID = -1288303253742972653L;
 
     @Override
-    public Reader<Integer> createReader(
+    protected Reader<Integer> createBasicReader(
         PipelineOptions options, Coder<Integer> coder, @Nullable ExecutionContext executionContext)
         throws IOException {
       return new FailingReader();

From 18d2cc73d015031456ab565199da761047c585f4 Mon Sep 17 00:00:00 2001
From: sidebotham <sidebotham@google.com>
Date: Thu, 26 Mar 2015 10:23:17 -0700
Subject: [PATCH 0316/1541] Fixed typo in error message. ----Release Notes----
 [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=89612198

---
 .../com/google/cloud/dataflow/sdk/coders/StandardCoder.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
index 8d7229168e94a..2a66429e96425 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
@@ -147,7 +147,7 @@ public void registerByteSizeObserver(
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
     if (!isDeterministic()) {
-      throw new NonDeterministicException(this, "Coder reported it was not determinsitic.");
+      throw new NonDeterministicException(this, "Coder reported it was not deterministic.");
     }
   }
 

From 3c4101b8e57960990117fe4dc99ae559758adb13 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Thu, 26 Mar 2015 11:49:38 -0700
Subject: [PATCH 0317/1541] Introduce an interface for the DoFns that implement
 windowing to use.

This interface is accessed via DoFn.ProcessContext#windowingInternals().

Move the actual implementation classes into DoFnRunner to prevent
down-casting.

----Release Notes----
Update windowing DoFn's to use an extra interface for accessing the
special functionality that must be avaliable to actually implemnet
windowing.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89621424
---
 .../runners/DataflowPipelineTranslator.java   |   6 +-
 .../cloud/dataflow/sdk/transforms/DoFn.java   |  53 +++
 .../dataflow/sdk/transforms/RateLimiting.java |   5 +
 .../dataflow/sdk/util/AbstractWindowSet.java  |   7 +-
 .../dataflow/sdk/util/AssignWindowsDoFn.java  |  11 +-
 .../dataflow/sdk/util/BufferingWindowSet.java |  15 +-
 .../dataflow/sdk/util/CombiningWindowSet.java |   5 +-
 .../cloud/dataflow/sdk/util/DoFnContext.java  | 267 -----------
 .../dataflow/sdk/util/DoFnProcessContext.java | 186 --------
 .../cloud/dataflow/sdk/util/DoFnRunner.java   | 438 ++++++++++++++++++
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  31 +-
 .../sdk/util/PartitionBufferingWindowSet.java |   9 +-
 .../util/ReifyTimestampAndWindowsDoFn.java    |   5 +-
 .../util/StreamingGroupAlsoByWindowsDoFn.java |  28 +-
 14 files changed, 549 insertions(+), 517 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index aef53338241c6..23c7f2ee3749b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -69,6 +69,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
+import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.OutputReference;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
@@ -961,7 +962,10 @@ private static void translateFn(
     context.addInput(
         PropertyNames.SERIALIZED_FN,
         byteArrayToJsonString(serializeToByteArray(new DoFnInfo(fn, windowFn))));
-    if (fn instanceof DoFn.RequiresKeyedState) {
+    if (fn instanceof DoFn.RequiresKeyedState
+        // Adjust requires keyed state property for the Dataflow Service.
+        // TODO: Remove when this is performed by the service.
+        && !(fn instanceof GroupAlsoByWindowsDoFn)) {
       context.addInput(PropertyNames.USES_KEYED_STATE, "true");
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 140e0913dfc2f..e1e385102bae7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -30,6 +30,7 @@
 
 import java.io.IOException;
 import java.io.Serializable;
+import java.util.Collection;
 import java.util.List;
 
 /**
@@ -250,6 +251,11 @@ public abstract class ProcessContext extends Context {
      * not implement {@link RequiresWindowAccess}.
      */
     public abstract BoundedWindow window();
+
+    /**
+     * Returns the process context to use for implementing windowing.
+     */
+    public abstract WindowingInternals<I, O> windowingInternals();
   }
 
   /**
@@ -327,6 +333,53 @@ public interface KeyedState {
     public CodedTupleTagMap lookup(List<? extends CodedTupleTag<?>> tags) throws IOException;
   }
 
+  /**
+   * Interface that may be required by some (internal) {@code DoFn}s to implement windowing.
+   * @param <I> input type
+   * @param <O> output type
+   */
+  public interface WindowingInternals<I, O> {
+    void outputWindowedValue(O output, Instant timestamp,
+        Collection<? extends BoundedWindow> windows);
+
+    /**
+     * Writes the provided value to the list of values in stored state corresponding to the
+     * provided tag.
+     *
+     * @throws IOException if encoding the given value fails
+     */
+    <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp) throws IOException;
+
+    /**
+     * Deletes the list corresponding to the given tag.
+     */
+    <T> void deleteTagList(CodedTupleTag<T> tag);
+
+    /**
+     * Reads the elements of the list in stored state corresponding to the provided tag.
+     *
+     * @throws IOException if decoding any of the requested values fails
+     */
+    <T> Iterable<T> readTagList(CodedTupleTag<T> tag) throws IOException;
+
+    /**
+     * Writes out a timer to be fired when the watermark reaches the given
+     * timestamp.  Timers are identified by their name, and can be moved
+     * by calling {@code setTimer} again, or deleted with {@link #deleteTimer}.
+     */
+    void setTimer(String timer, Instant timestamp);
+
+    /**
+     * Deletes the given timer.
+     */
+    void deleteTimer(String timer);
+
+    /**
+     * Access the windows the element is being processed in without "exploding" it.
+     */
+    Collection<? extends BoundedWindow> windows();
+  }
+
   /////////////////////////////////////////////////////////////////////////////
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
index f3f8fc2a01061..075d486f5a28b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
@@ -324,6 +324,11 @@ public Instant timestamp() {
       public BoundedWindow window() {
         return context.window();
       }
+
+      @Override
+      public WindowingInternals<I, O> windowingInternals() {
+        return context.windowingInternals();
+      }
     }
 
     private final DoFn<I, O> doFn;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
index b94b11211a4ab..2ffc6c8f434cf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -79,14 +80,14 @@ public WindowMergeContext(
   protected final K key;
   protected final WindowFn<?, W> windowFn;
   protected final Coder<VI> inputCoder;
-  protected final DoFnProcessContext<?, KV<K, VO>> context;
+  protected final DoFn<?, KV<K, VO>>.ProcessContext context;
   protected final ActiveWindowManager<W> activeWindowManager;
 
   protected AbstractWindowSet(
       K key,
       WindowFn<?, W> windowFn,
       Coder<VI> inputCoder,
-      DoFnProcessContext<?, KV<K, VO>> context,
+      DoFn<?, KV<K, VO>>.ProcessContext context,
       ActiveWindowManager<W> activeWindowManager) {
     this.key = key;
     this.windowFn = windowFn;
@@ -158,7 +159,7 @@ protected AbstractWindowSet(
   public void markCompleted(W window) throws Exception {
     VO value = finalValue(window);
     remove(window);
-    context.outputWindowedValue(
+    context.windowingInternals().outputWindowedValue(
         KV.of(key, value),
         window.maxTimestamp(),
         Arrays.asList(window));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
index 2ea1da6bacc02..c109af0161157 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
@@ -40,27 +40,26 @@ public AssignWindowsDoFn(WindowFn<? super T, W> fn) {
 
   @Override
   @SuppressWarnings("unchecked")
-  public void processElement(ProcessContext c) throws Exception {
-    final DoFnProcessContext<T, T> context = (DoFnProcessContext<T, T>) c;
+  public void processElement(final ProcessContext c) throws Exception {
     Collection<W> windows =
         ((WindowFn<T, W>) fn).assignWindows(
             ((WindowFn<T, W>) fn).new AssignContext() {
                 @Override
                 public T element() {
-                  return context.element();
+                  return c.element();
                 }
 
                 @Override
                 public Instant timestamp() {
-                  return context.timestamp();
+                  return c.timestamp();
                 }
 
                 @Override
                 public Collection<? extends BoundedWindow> windows() {
-                  return context.windows();
+                  return c.windowingInternals().windows();
                 }
               });
 
-    context.outputWindowedValue(context.element(), context.timestamp(), windows);
+    c.windowingInternals().outputWindowedValue(c.element(), c.timestamp(), windows);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
index 314920fadc79b..3b3ea8427430c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.MapCoder;
 import com.google.cloud.dataflow.sdk.coders.SetCoder;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
@@ -72,12 +73,12 @@ protected BufferingWindowSet(
       K key,
       WindowFn<?, W> windowFn,
       Coder<V> inputCoder,
-      DoFnProcessContext<?, KV<K, Iterable<V>>> context,
+      DoFn<?, KV<K, Iterable<V>>>.ProcessContext context,
       ActiveWindowManager<W> activeWindowManager) throws Exception {
     super(key, windowFn, inputCoder, context, activeWindowManager);
 
     mergeTree = emptyIfNull(
-        context.context.stepContext.lookup(Arrays.asList(mergeTreeTag))
+        context.keyedState().lookup(Arrays.asList(mergeTreeTag))
         .get(mergeTreeTag));
 
     originalMergeTree = deepCopy(mergeTree);
@@ -85,7 +86,7 @@ protected BufferingWindowSet(
 
   @Override
   public void put(W window, V value) throws Exception {
-    context.context.stepContext.writeToTagList(
+    context.windowingInternals().writeToTagList(
         bufferTag(window, windowFn.windowCoder(), inputCoder),
         value,
         context.timestamp());
@@ -99,10 +100,10 @@ public void put(W window, V value) throws Exception {
   public void remove(W window) throws Exception {
     Set<W> subWindows = mergeTree.get(window);
     for (W w : subWindows) {
-      context.context.stepContext.deleteTagList(
+      context.windowingInternals().deleteTagList(
           bufferTag(w, windowFn.windowCoder(), inputCoder));
     }
-    context.context.stepContext.deleteTagList(
+    context.windowingInternals().deleteTagList(
         bufferTag(window, windowFn.windowCoder(), inputCoder));
     mergeTree.remove(window);
     activeWindowManager.removeWindow(window);
@@ -159,7 +160,7 @@ protected Iterable<V> finalValue(W window) throws Exception {
     }
 
     for (W curWindow : curWindows) {
-      Iterable<V> items = context.context.stepContext.readTagList(bufferTag(
+      Iterable<V> items = context.windowingInternals().readTagList(bufferTag(
           curWindow, windowFn.windowCoder(), inputCoder));
       for (V item : items) {
         toEmit.add(item);
@@ -172,7 +173,7 @@ protected Iterable<V> finalValue(W window) throws Exception {
   @Override
   public void flush() throws Exception {
     if (!mergeTree.equals(originalMergeTree)) {
-      context.context.stepContext.store(mergeTreeTag, mergeTree);
+      context.keyedState().store(mergeTreeTag, mergeTree);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
index d726551f80f16..a2a96d3d5dc5e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
@@ -62,7 +63,7 @@ protected CombiningWindowSet(
       KeyedCombineFn<K, VI, VA, VO> combineFn,
       Coder<K> keyCoder,
       Coder<VI> inputValueCoder,
-      DoFnProcessContext<?, KV<K, VO>> context,
+      DoFn<?, KV<K, VO>>.ProcessContext context,
       ActiveWindowManager<W> activeWindowManager) throws Exception {
     super(key, windowFn, inputValueCoder, context, activeWindowManager);
     this.combineFn = combineFn;
@@ -83,7 +84,7 @@ protected CombiningWindowSet(
       KeyedCombineFn<K, VI, VA, VO> combineFn,
       Coder<K> keyCoder,
       Coder<VI> inputValueCoder,
-      DoFnProcessContext<?, KV<K, VO>> context,
+      DoFn<?, KV<K, VO>>.ProcessContext context,
       ActiveWindowManager<W> activeWindowManager) throws Exception {
     return new CombiningWindowSet<K, VI, VA, VO, W>(
         key, windowFn, combineFn, keyCoder, inputValueCoder, context, activeWindowManager);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
deleted file mode 100644
index 9a3729e743d9c..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnContext.java
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Combine;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn.AssignContext;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner.OutputManager;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.base.Predicate;
-import com.google.common.collect.Iterables;
-
-import org.joda.time.Instant;
-
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * A concrete implementation of {@link DoFn<I, O>.Context} used for running
- * a {@link DoFn}.
- *
- * @param <I> the type of the DoFn's (main) input elements
- * @param <O> the type of the DoFn's (main) output elements
- * @param <R> the type of object which receives outputs
- */
-class DoFnContext<I, O, R> extends DoFn<I, O>.Context {
-  private static final int MAX_SIDE_OUTPUTS = 1000;
-
-  final PipelineOptions options;
-  final DoFn<I, O> fn;
-  final PTuple sideInputs;
-  final Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
-  final OutputManager<R> outputManager;
-  final Map<TupleTag<?>, R> outputMap;
-  final TupleTag<O> mainOutputTag;
-  final StepContext stepContext;
-  final CounterSet.AddCounterMutator addCounterMutator;
-  final WindowFn windowFn;
-
-  public DoFnContext(PipelineOptions options,
-                     DoFn<I, O> fn,
-                     PTuple sideInputs,
-                     OutputManager<R> outputManager,
-                     TupleTag<O> mainOutputTag,
-                     List<TupleTag<?>> sideOutputTags,
-                     StepContext stepContext,
-                     CounterSet.AddCounterMutator addCounterMutator,
-                     WindowFn windowFn) {
-    fn.super();
-    this.options = options;
-    this.fn = fn;
-    this.sideInputs = sideInputs;
-    this.sideInputCache = new HashMap<>();
-    this.outputManager = outputManager;
-    this.mainOutputTag = mainOutputTag;
-    this.outputMap = new HashMap<>();
-    outputMap.put(mainOutputTag, outputManager.initialize(mainOutputTag));
-    for (TupleTag<?> sideOutputTag : sideOutputTags) {
-      outputMap.put(sideOutputTag, outputManager.initialize(sideOutputTag));
-    }
-    this.stepContext = stepContext;
-    this.addCounterMutator = addCounterMutator;
-    this.windowFn = windowFn;
-  }
-
-  public R getReceiver(TupleTag<?> tag) {
-    R receiver = outputMap.get(tag);
-    if (receiver == null) {
-      throw new IllegalArgumentException(
-          "calling getReceiver() with unknown tag " + tag);
-    }
-    return receiver;
-  }
-
-  //////////////////////////////////////////////////////////////////////////////
-
-  @Override
-  public PipelineOptions getPipelineOptions() {
-    return options;
-  }
-
-  @SuppressWarnings("unchecked")
-  <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
-    TupleTag<?> tag = view.getTagInternal();
-    Map<BoundedWindow, Object> tagCache = sideInputCache.get(tag);
-    if (tagCache == null) {
-      if (!sideInputs.has(tag)) {
-        throw new IllegalArgumentException(
-            "calling sideInput() with unknown view; " +
-            "did you forget to pass the view in " +
-            "ParDo.withSideInputs()?");
-      }
-      tagCache = new HashMap<>();
-      sideInputCache.put(tag, tagCache);
-    }
-
-    final BoundedWindow sideInputWindow =
-        view.getWindowFnInternal().getSideInputWindow(mainInputWindow);
-
-    T result = (T) tagCache.get(sideInputWindow);
-
-    // TODO: Consider partial prefetching like in CoGBK to reduce iteration cost.
-    if (result == null) {
-      if (windowFn instanceof GlobalWindows) {
-        result = view.fromIterableInternal((Iterable<WindowedValue<?>>) sideInputs.get(tag));
-      } else {
-        result = view.fromIterableInternal(Iterables.filter(
-            (Iterable<WindowedValue<?>>) sideInputs.get(tag),
-            new Predicate<WindowedValue<?>>() {
-              @Override
-              public boolean apply(WindowedValue<?> element) {
-                return element.getWindows().contains(sideInputWindow);
-              }
-            }));
-      }
-      tagCache.put(sideInputWindow, result);
-    }
-
-    return result;
-  }
-
-  <T> WindowedValue<T> makeWindowedValue(
-      T output, Instant timestamp, Collection<? extends BoundedWindow> windows) {
-    final Instant inputTimestamp = timestamp;
-
-    if (timestamp == null) {
-      timestamp = BoundedWindow.TIMESTAMP_MIN_VALUE;
-    }
-
-    if (windows == null) {
-      try {
-        windows = windowFn.assignWindows(windowFn.new AssignContext() {
-            @Override
-            public Object element() {
-              throw new UnsupportedOperationException(
-                  "WindowFn attemped to access input element when none was available");
-            }
-
-            @Override
-            public Instant timestamp() {
-              if (inputTimestamp == null) {
-                throw new UnsupportedOperationException(
-                    "WindowFn attemped to access input timestamp when none was available");
-              }
-              return inputTimestamp;
-            }
-
-            @Override
-            public Collection<? extends BoundedWindow> windows() {
-              throw new UnsupportedOperationException(
-                  "WindowFn attemped to access input windows when none were available");
-            }
-          });
-      } catch (Exception e) {
-        throw new RuntimeException(e);
-      }
-    }
-
-    return WindowedValue.of(output, timestamp, windows);
-  }
-
-  void outputWindowedValue(
-      O output,
-      Instant timestamp,
-      Collection<? extends BoundedWindow> windows) {
-    WindowedValue<O> windowedElem = makeWindowedValue(output, timestamp, windows);
-    outputManager.output(outputMap.get(mainOutputTag), windowedElem);
-    if (stepContext != null) {
-      stepContext.noteOutput(windowedElem);
-    }
-  }
-
-  protected <T> void sideOutputWindowedValue(TupleTag<T> tag,
-                                             T output,
-                                             Instant timestamp,
-                                             Collection<? extends BoundedWindow> windows) {
-    R receiver = outputMap.get(tag);
-    if (receiver == null) {
-      // This tag wasn't declared nor was it seen before during this execution.
-      // Thus, this must be a new, undeclared and unconsumed output.
-
-      // To prevent likely user errors, enforce the limit on the number of side
-      // outputs.
-      if (outputMap.size() >= MAX_SIDE_OUTPUTS) {
-        throw new IllegalArgumentException(
-            "the number of side outputs has exceeded a limit of "
-            + MAX_SIDE_OUTPUTS);
-      }
-
-      // Register the new TupleTag with outputManager and add an entry for it in
-      // the outputMap.
-      receiver = outputManager.initialize(tag);
-      outputMap.put(tag, receiver);
-    }
-
-    WindowedValue<T> windowedElem = makeWindowedValue(output, timestamp, windows);
-    outputManager.output(receiver, windowedElem);
-    if (stepContext != null) {
-      stepContext.noteSideOutput(tag, windowedElem);
-    }
-  }
-
-  // Following implementations of output, outputWithTimestamp, and sideOutput
-  // are only accessible in DoFn.startBundle and DoFn.finishBundle, and will be shadowed by
-  // ProcessContext's versions in DoFn.processElement.
-  @Override
-  public void output(O output) {
-    outputWindowedValue(output, null, null);
-  }
-
-  @Override
-  public void outputWithTimestamp(O output, Instant timestamp) {
-    outputWindowedValue(output, timestamp, null);
-  }
-
-  @Override
-  public <T> void sideOutput(TupleTag<T> tag, T output) {
-    sideOutputWindowedValue(tag, output, null, null);
-  }
-
-  @Override
-  public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
-    sideOutputWindowedValue(tag, output, timestamp, null);
-  }
-
-  private String generateInternalAggregatorName(String userName) {
-    return "user-" + stepContext.getStepName() + "-" + userName;
-  }
-
-  @Override
-  public <AI, AA, AO> Aggregator<AI> createAggregator(
-      String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
-    return new AggregatorImpl<>(generateInternalAggregatorName(name), combiner, addCounterMutator);
-  }
-
-  @Override
-  public <AI, AO> Aggregator<AI> createAggregator(
-      String name, SerializableFunction<Iterable<AI>, AO> combiner) {
-    return new AggregatorImpl<AI, Iterable<AI>, AO>(
-        generateInternalAggregatorName(name), combiner, addCounterMutator);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
deleted file mode 100644
index 4b111d2dadfce..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnProcessContext.java
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.util.Preconditions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Combine;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresKeyedState;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.collect.Iterables;
-
-import org.joda.time.Instant;
-
-import java.util.Collection;
-import java.util.Iterator;
-
-/**
- * A concrete implementation of {@link DoFn<I, O>.ProcessContext} used for running
- * a {@link DoFn} over a single element.
- *
- * @param <I> the type of the DoFn's (main) input elements
- * @param <O> the type of the DoFn's (main) output elements
- */
-class DoFnProcessContext<I, O> extends DoFn<I, O>.ProcessContext {
-
-  final DoFn<I, O> fn;
-  final DoFnContext<I, O, ?> context;
-  final WindowedValue<I> windowedValue;
-
-  public DoFnProcessContext(DoFn<I, O> fn,
-                            DoFnContext<I, O, ?> context,
-                            WindowedValue<I> windowedValue) {
-    fn.super();
-    this.fn = fn;
-    this.context = context;
-    this.windowedValue = windowedValue;
-  }
-
-  @Override
-  public PipelineOptions getPipelineOptions() {
-    return context.getPipelineOptions();
-  }
-
-  @Override
-  public I element() {
-    return windowedValue.getValue();
-  }
-
-  @Override
-  public <T> T sideInput(PCollectionView<T> view) {
-    Iterator<? extends BoundedWindow> windowIter = windows().iterator();
-    BoundedWindow window;
-    if (!windowIter.hasNext()) {
-      if (context.windowFn instanceof GlobalWindows) {
-        // TODO: Remove this once GroupByKeyOnly no longer outputs elements
-        // without windows
-        window = GlobalWindow.INSTANCE;
-      } else {
-        throw new IllegalStateException(
-            "sideInput called when main input element is not in any windows");
-      }
-    } else {
-      window = windowIter.next();
-      if (windowIter.hasNext()) {
-        throw new IllegalStateException(
-            "sideInput called when main input element is in multiple windows");
-      }
-    }
-    return context.sideInput(view, window);
-  }
-
-  @Override
-  public KeyedState keyedState() {
-    if (!(fn instanceof RequiresKeyedState)
-        || !equivalentToKV(element())) {
-      throw new UnsupportedOperationException(
-          "Keyed state is only available in the context of a keyed DoFn marked as requiring state");
-    }
-
-    return context.stepContext;
-  }
-
-
-  @Override
-  public BoundedWindow window() {
-    if (!(fn instanceof RequiresWindowAccess)) {
-      throw new UnsupportedOperationException(
-          "window() is only available in the context of a DoFn marked as RequiresWindow.");
-    }
-    return Iterables.getOnlyElement(windows());
-  }
-
-  @Override
-  public void output(O output) {
-    context.outputWindowedValue(output, windowedValue.getTimestamp(), windowedValue.getWindows());
-  }
-
-  @Override
-  public void outputWithTimestamp(O output, Instant timestamp) {
-    checkTimestamp(timestamp);
-    context.outputWindowedValue(output, timestamp, windowedValue.getWindows());
-  }
-
-  void outputWindowedValue(
-      O output,
-      Instant timestamp,
-      Collection<? extends BoundedWindow> windows) {
-    context.outputWindowedValue(output, timestamp, windows);
-  }
-
-  @Override
-  public <T> void sideOutput(TupleTag<T> tag, T output) {
-    context.sideOutputWindowedValue(tag,
-                                    output,
-                                    windowedValue.getTimestamp(),
-                                    windowedValue.getWindows());
-  }
-
-  @Override
-  public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
-    checkTimestamp(timestamp);
-    context.sideOutputWindowedValue(tag, output, timestamp, windowedValue.getWindows());
-  }
-
-  @Override
-  public <AI, AA, AO> Aggregator<AI> createAggregator(
-      String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
-    return context.createAggregator(name, combiner);
-  }
-
-  @Override
-  public <AI, AO> Aggregator<AI> createAggregator(
-      String name, SerializableFunction<Iterable<AI>, AO> combiner) {
-    return context.createAggregator(name, combiner);
-  }
-
-  @Override
-  public Instant timestamp() {
-    return windowedValue.getTimestamp();
-  }
-
-  public Collection<? extends BoundedWindow> windows() {
-    return windowedValue.getWindows();
-  }
-
-  private void checkTimestamp(Instant timestamp) {
-    Preconditions.checkArgument(
-        !timestamp.isBefore(windowedValue.getTimestamp().minus(fn.getAllowedTimestampSkew())));
-  }
-
-  private boolean equivalentToKV(I input) {
-    if (input == null) {
-      return true;
-    } else if (input instanceof KV) {
-      return true;
-    } else if (input instanceof TimerOrElement) {
-      return ((TimerOrElement) input).isTimer()
-          || ((TimerOrElement) input).element() instanceof KV;
-    }
-    return false;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index bd6e95502975f..85e60adfbceb6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -16,18 +16,39 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresKeyedState;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.WindowingInternals;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Predicate;
 import com.google.common.base.Throwables;
+import com.google.common.collect.Iterables;
 
+import org.joda.time.Instant;
+
+import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 
 /**
  * Runs a DoFn by constructing the appropriate contexts and passing them in.
@@ -169,4 +190,421 @@ public void finishBundle() {
   public R getReceiver(TupleTag<?> tag) {
     return context.getReceiver(tag);
   }
+
+  /**
+   * A concrete implementation of {@link DoFn<I, O>.Context} used for running
+   * a {@link DoFn}.
+   *
+   * @param <I> the type of the DoFn's (main) input elements
+   * @param <O> the type of the DoFn's (main) output elements
+   * @param <R> the type of object which receives outputs
+   */
+  private static class DoFnContext<I, O, R> extends DoFn<I, O>.Context {
+    private static final int MAX_SIDE_OUTPUTS = 1000;
+
+    final PipelineOptions options;
+    final DoFn<I, O> fn;
+    final PTuple sideInputs;
+    final Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
+    final OutputManager<R> outputManager;
+    final Map<TupleTag<?>, R> outputMap;
+    final TupleTag<O> mainOutputTag;
+    final StepContext stepContext;
+    final CounterSet.AddCounterMutator addCounterMutator;
+    final WindowFn windowFn;
+
+    public DoFnContext(PipelineOptions options,
+                       DoFn<I, O> fn,
+                       PTuple sideInputs,
+                       OutputManager<R> outputManager,
+                       TupleTag<O> mainOutputTag,
+                       List<TupleTag<?>> sideOutputTags,
+                       StepContext stepContext,
+                       CounterSet.AddCounterMutator addCounterMutator,
+                       WindowFn windowFn) {
+      fn.super();
+      this.options = options;
+      this.fn = fn;
+      this.sideInputs = sideInputs;
+      this.sideInputCache = new HashMap<>();
+      this.outputManager = outputManager;
+      this.mainOutputTag = mainOutputTag;
+      this.outputMap = new HashMap<>();
+      outputMap.put(mainOutputTag, outputManager.initialize(mainOutputTag));
+      for (TupleTag<?> sideOutputTag : sideOutputTags) {
+        outputMap.put(sideOutputTag, outputManager.initialize(sideOutputTag));
+      }
+      this.stepContext = stepContext;
+      this.addCounterMutator = addCounterMutator;
+      this.windowFn = windowFn;
+    }
+
+    public R getReceiver(TupleTag<?> tag) {
+      R receiver = outputMap.get(tag);
+      if (receiver == null) {
+        throw new IllegalArgumentException(
+            "calling getReceiver() with unknown tag " + tag);
+      }
+      return receiver;
+    }
+
+    //////////////////////////////////////////////////////////////////////////////
+
+    @Override
+    public PipelineOptions getPipelineOptions() {
+      return options;
+    }
+
+    @SuppressWarnings("unchecked")
+    <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
+      TupleTag<?> tag = view.getTagInternal();
+      Map<BoundedWindow, Object> tagCache = sideInputCache.get(tag);
+      if (tagCache == null) {
+        if (!sideInputs.has(tag)) {
+          throw new IllegalArgumentException(
+              "calling sideInput() with unknown view; did you forget to pass the view in "
+              + "ParDo.withSideInputs()?");
+        }
+        tagCache = new HashMap<>();
+        sideInputCache.put(tag, tagCache);
+      }
+
+      final BoundedWindow sideInputWindow =
+          view.getWindowFnInternal().getSideInputWindow(mainInputWindow);
+
+      T result = (T) tagCache.get(sideInputWindow);
+
+      // TODO: Consider partial prefetching like in CoGBK to reduce iteration cost.
+      if (result == null) {
+        if (windowFn instanceof GlobalWindows) {
+          result = view.fromIterableInternal((Iterable<WindowedValue<?>>) sideInputs.get(tag));
+        } else {
+          result = view.fromIterableInternal(Iterables.filter(
+              (Iterable<WindowedValue<?>>) sideInputs.get(tag),
+              new Predicate<WindowedValue<?>>() {
+                @Override
+                public boolean apply(WindowedValue<?> element) {
+                  return element.getWindows().contains(sideInputWindow);
+                }
+              }));
+        }
+        tagCache.put(sideInputWindow, result);
+      }
+
+      return result;
+    }
+
+    <T> WindowedValue<T> makeWindowedValue(
+        T output, Instant timestamp, Collection<? extends BoundedWindow> windows) {
+      final Instant inputTimestamp = timestamp;
+
+      if (timestamp == null) {
+        timestamp = BoundedWindow.TIMESTAMP_MIN_VALUE;
+      }
+
+      if (windows == null) {
+        try {
+          windows = windowFn.assignWindows(windowFn.new AssignContext() {
+              @Override
+              public Object element() {
+                throw new UnsupportedOperationException(
+                    "WindowFn attemped to access input element when none was available");
+              }
+
+              @Override
+              public Instant timestamp() {
+                if (inputTimestamp == null) {
+                  throw new UnsupportedOperationException(
+                      "WindowFn attemped to access input timestamp when none was available");
+                }
+                return inputTimestamp;
+              }
+
+              @Override
+              public Collection<? extends BoundedWindow> windows() {
+                throw new UnsupportedOperationException(
+                    "WindowFn attemped to access input windows when none were available");
+              }
+            });
+        } catch (Exception e) {
+          throw new RuntimeException(e);
+        }
+      }
+
+      return WindowedValue.of(output, timestamp, windows);
+    }
+
+    void outputWindowedValue(
+        O output,
+        Instant timestamp,
+        Collection<? extends BoundedWindow> windows) {
+      WindowedValue<O> windowedElem = makeWindowedValue(output, timestamp, windows);
+      outputManager.output(outputMap.get(mainOutputTag), windowedElem);
+      if (stepContext != null) {
+        stepContext.noteOutput(windowedElem);
+      }
+    }
+
+    protected <T> void sideOutputWindowedValue(TupleTag<T> tag,
+                                               T output,
+                                               Instant timestamp,
+                                               Collection<? extends BoundedWindow> windows) {
+      R receiver = outputMap.get(tag);
+      if (receiver == null) {
+        // This tag wasn't declared nor was it seen before during this execution.
+        // Thus, this must be a new, undeclared and unconsumed output.
+
+        // To prevent likely user errors, enforce the limit on the number of side
+        // outputs.
+        if (outputMap.size() >= MAX_SIDE_OUTPUTS) {
+          throw new IllegalArgumentException(
+              "the number of side outputs has exceeded a limit of "
+              + MAX_SIDE_OUTPUTS);
+        }
+
+        // Register the new TupleTag with outputManager and add an entry for it in
+        // the outputMap.
+        receiver = outputManager.initialize(tag);
+        outputMap.put(tag, receiver);
+      }
+
+      WindowedValue<T> windowedElem = makeWindowedValue(output, timestamp, windows);
+      outputManager.output(receiver, windowedElem);
+      if (stepContext != null) {
+        stepContext.noteSideOutput(tag, windowedElem);
+      }
+    }
+
+    // Following implementations of output, outputWithTimestamp, and sideOutput
+    // are only accessible in DoFn.startBundle and DoFn.finishBundle, and will be shadowed by
+    // ProcessContext's versions in DoFn.processElement.
+    @Override
+    public void output(O output) {
+      outputWindowedValue(output, null, null);
+    }
+
+    @Override
+    public void outputWithTimestamp(O output, Instant timestamp) {
+      outputWindowedValue(output, timestamp, null);
+    }
+
+    @Override
+    public <T> void sideOutput(TupleTag<T> tag, T output) {
+      sideOutputWindowedValue(tag, output, null, null);
+    }
+
+    @Override
+    public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
+      sideOutputWindowedValue(tag, output, timestamp, null);
+    }
+
+    private String generateInternalAggregatorName(String userName) {
+      return "user-" + stepContext.getStepName() + "-" + userName;
+    }
+
+    @Override
+    public <AI, AA, AO> Aggregator<AI> createAggregator(
+        String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
+      return new AggregatorImpl<>(
+          generateInternalAggregatorName(name), combiner, addCounterMutator);
+    }
+
+    @Override
+    public <AI, AO> Aggregator<AI> createAggregator(
+        String name, SerializableFunction<Iterable<AI>, AO> combiner) {
+      return new AggregatorImpl<AI, Iterable<AI>, AO>(
+          generateInternalAggregatorName(name), combiner, addCounterMutator);
+    }
+  }
+
+  /**
+   * A concrete implementation of {@link DoFn<I, O>.ProcessContext} used for running
+   * a {@link DoFn} over a single element.
+   *
+   * @param <I> the type of the DoFn's (main) input elements
+   * @param <O> the type of the DoFn's (main) output elements
+   */
+  private static class DoFnProcessContext<I, O> extends DoFn<I, O>.ProcessContext {
+
+
+    final DoFn<I, O> fn;
+    final DoFnContext<I, O, ?> context;
+    final WindowedValue<I> windowedValue;
+
+    public DoFnProcessContext(DoFn<I, O> fn,
+                              DoFnContext<I, O, ?> context,
+                              WindowedValue<I> windowedValue) {
+      fn.super();
+      this.fn = fn;
+      this.context = context;
+      this.windowedValue = windowedValue;
+    }
+
+    @Override
+    public PipelineOptions getPipelineOptions() {
+      return context.getPipelineOptions();
+    }
+
+    @Override
+    public I element() {
+      return windowedValue.getValue();
+    }
+
+    @Override
+    public <T> T sideInput(PCollectionView<T> view) {
+      Iterator<? extends BoundedWindow> windowIter = windows().iterator();
+      BoundedWindow window;
+      if (!windowIter.hasNext()) {
+        if (context.windowFn instanceof GlobalWindows) {
+          // TODO: Remove this once GroupByKeyOnly no longer outputs elements
+          // without windows
+          window = GlobalWindow.INSTANCE;
+        } else {
+          throw new IllegalStateException(
+              "sideInput called when main input element is not in any windows");
+        }
+      } else {
+        window = windowIter.next();
+        if (windowIter.hasNext()) {
+          throw new IllegalStateException(
+              "sideInput called when main input element is in multiple windows");
+        }
+      }
+      return context.sideInput(view, window);
+    }
+
+    @Override
+    public KeyedState keyedState() {
+      if (!(fn instanceof RequiresKeyedState)
+          || !equivalentToKV(element())) {
+        throw new UnsupportedOperationException(
+            "Keyed state is only available in the context of a keyed DoFn "
+            + "marked as requiring state");
+      }
+
+      return context.stepContext;
+    }
+
+
+    @Override
+    public BoundedWindow window() {
+      if (!(fn instanceof RequiresWindowAccess)) {
+        throw new UnsupportedOperationException(
+            "window() is only available in the context of a DoFn marked as RequiresWindow.");
+      }
+      return Iterables.getOnlyElement(windows());
+    }
+
+    @Override
+    public void output(O output) {
+      context.outputWindowedValue(output, windowedValue.getTimestamp(), windowedValue.getWindows());
+    }
+
+    @Override
+    public void outputWithTimestamp(O output, Instant timestamp) {
+      checkTimestamp(timestamp);
+      context.outputWindowedValue(output, timestamp, windowedValue.getWindows());
+    }
+
+    void outputWindowedValue(
+        O output,
+        Instant timestamp,
+        Collection<? extends BoundedWindow> windows) {
+      context.outputWindowedValue(output, timestamp, windows);
+    }
+
+    @Override
+    public <T> void sideOutput(TupleTag<T> tag, T output) {
+      context.sideOutputWindowedValue(tag,
+                                      output,
+                                      windowedValue.getTimestamp(),
+                                      windowedValue.getWindows());
+    }
+
+    @Override
+    public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
+      checkTimestamp(timestamp);
+      context.sideOutputWindowedValue(tag, output, timestamp, windowedValue.getWindows());
+    }
+
+    @Override
+    public <AI, AA, AO> Aggregator<AI> createAggregator(
+        String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
+      return context.createAggregator(name, combiner);
+    }
+
+    @Override
+    public <AI, AO> Aggregator<AI> createAggregator(
+        String name, SerializableFunction<Iterable<AI>, AO> combiner) {
+      return context.createAggregator(name, combiner);
+    }
+
+    @Override
+    public Instant timestamp() {
+      return windowedValue.getTimestamp();
+    }
+
+    public Collection<? extends BoundedWindow> windows() {
+      return windowedValue.getWindows();
+    }
+
+    private void checkTimestamp(Instant timestamp) {
+      Preconditions.checkArgument(
+          !timestamp.isBefore(windowedValue.getTimestamp().minus(fn.getAllowedTimestampSkew())));
+    }
+
+    private boolean equivalentToKV(I input) {
+      if (input == null) {
+        return true;
+      } else if (input instanceof KV) {
+        return true;
+      } else if (input instanceof TimerOrElement) {
+        return ((TimerOrElement) input).isTimer()
+            || ((TimerOrElement) input).element() instanceof KV;
+      }
+      return false;
+    }
+
+    @Override
+    public WindowingInternals<I, O> windowingInternals() {
+      return new WindowingInternals<I, O>() {
+        @Override
+        public void outputWindowedValue(O output, Instant timestamp,
+            Collection<? extends BoundedWindow> windows) {
+          context.outputWindowedValue(output, timestamp, windows);
+        }
+
+        @Override
+        public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp)
+            throws IOException {
+          context.stepContext.writeToTagList(tag, value, timestamp);
+        }
+
+        @Override
+        public <T> void deleteTagList(CodedTupleTag<T> tag) {
+          context.stepContext.deleteTagList(tag);
+        }
+
+        @Override
+        public <T> Iterable<T> readTagList(CodedTupleTag<T> tag) throws IOException {
+          return context.stepContext.readTagList(tag);
+        }
+
+        @Override
+        public void setTimer(String timer, Instant timestamp) {
+          context.stepContext.getExecutionContext().setTimer(timer, timestamp);
+        }
+
+        @Override
+        public void deleteTimer(String timer) {
+          context.stepContext.getExecutionContext().deleteTimer(timer);
+        }
+
+        @Override
+        public Collection<? extends BoundedWindow> windows() {
+          return windowedValue.getWindows();
+        }
+      };
+    }
+}
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index a7244f45d74f4..3cd54c27e7aba 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresKeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
@@ -52,7 +53,8 @@
  */
 @SuppressWarnings("serial")
 public abstract class GroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
-    extends DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> {
+    extends DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>
+    implements RequiresKeyedState {
 
   /**
    * Create a {@link GroupAlsoByWindowsDoFn} without a combine function. Depending on the
@@ -69,7 +71,7 @@ public abstract class GroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
       return new GABWViaWindowSetDoFn<K, V, Iterable<V>, W>(windowFn) {
         @Override
         AbstractWindowSet<K, V, Iterable<V>, W> createWindowSet(K key,
-            DoFnProcessContext<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>> context,
+            DoFn<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>>.ProcessContext context,
             BatchActiveWindowManager<W> activeWindowManager) throws Exception {
           return  new BufferingWindowSet<K, V, W>(key, windowFn, inputCoder,
               context, activeWindowManager);
@@ -92,7 +94,7 @@ AbstractWindowSet<K, V, Iterable<V>, W> createWindowSet(K key,
       @Override
       AbstractWindowSet<K, VI, VO, W> createWindowSet(
           K key,
-          DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context,
+          DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>.ProcessContext context,
           BatchActiveWindowManager<W> activeWindowManager) throws Exception {
         return CombiningWindowSet.create(
             key, windowFn, combineFn, keyCoder, inputCoder,
@@ -106,9 +108,6 @@ private static class GABWViaIteratorsDoFn<K, V, W extends BoundedWindow>
 
     @Override
     public void processElement(ProcessContext c) throws Exception {
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      DoFnProcessContext<?, KV<K, Iterable<V>>> internal = (DoFnProcessContext) c;
-
       K key = c.element().getKey();
       Iterable<WindowedValue<V>> value = c.element().getValue();
       PeekingReiterator<WindowedValue<V>> iterator;
@@ -136,7 +135,7 @@ public void processElement(ProcessContext c) throws Exception {
             // Iterating through the WindowReiterable may advance iterator as an optimization
             // for as long as it detects that there are no new windows.
             windows.put(window.maxTimestamp(), window);
-            internal.outputWindowedValue(
+            c.windowingInternals().outputWindowedValue(
                 KV.of(key, (Iterable<V>) new WindowReiterable<V>(iterator, window)),
                 window.maxTimestamp(),
                 Arrays.asList(window));
@@ -173,28 +172,18 @@ public GABWViaWindowSetDoFn(WindowFn<?, W> windowFn) {
 
     abstract AbstractWindowSet<K, VI, VO, W> createWindowSet(
         K key,
-        DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context,
+        DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>.ProcessContext context,
         BatchActiveWindowManager<W> activeWindowManager)
         throws Exception;
 
     @Override
     public void processElement(
         DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>.ProcessContext c) throws Exception {
-      @SuppressWarnings("unchecked")
-      DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context =
-          (DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>) c;
-      processElementViaWindowSet(context);
-    }
-
-    public void processElementViaWindowSet(
-        DoFnProcessContext<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>> context)
-            throws Exception {
-      K key = context.element().getKey();
+      K key = c.element().getKey();
       BatchActiveWindowManager<W> activeWindowManager = new BatchActiveWindowManager<>();
-      AbstractWindowSet<K, VI, ?, W> windowSet =
-          createWindowSet(key, context, activeWindowManager);
+      AbstractWindowSet<K, VI, ?, W> windowSet = createWindowSet(key, c, activeWindowManager);
 
-      for (WindowedValue<VI> e : context.element().getValue()) {
+      for (WindowedValue<VI> e : c.element().getValue()) {
         for (BoundedWindow window : e.getWindows()) {
           @SuppressWarnings("unchecked")
           W w = (W) window;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
index 5d55ae8f7125c..3c0e99936925c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
@@ -19,6 +19,7 @@
 import static com.google.cloud.dataflow.sdk.util.WindowUtils.bufferTag;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
@@ -40,14 +41,14 @@ class PartitionBufferingWindowSet<K, V, W extends BoundedWindow>
       K key,
       WindowFn<?, W> windowFn,
       Coder<V> inputCoder,
-      DoFnProcessContext<?, KV<K, Iterable<V>>> context,
+      DoFn<?, KV<K, Iterable<V>>>.ProcessContext context,
       ActiveWindowManager<W> activeWindowManager) {
     super(key, windowFn, inputCoder, context, activeWindowManager);
   }
 
   @Override
   public void put(W window, V value) throws Exception {
-    context.context.stepContext.writeToTagList(
+    context.windowingInternals().writeToTagList(
         bufferTag(window, windowFn.windowCoder(), inputCoder), value, context.timestamp());
     // Adds the window even if it is already present, relying on the streaming backend to
     // de-deduplicate.
@@ -56,7 +57,7 @@ public void put(W window, V value) throws Exception {
 
   @Override
   public void remove(W window) throws Exception {
-    context.context.stepContext.deleteTagList(
+    context.windowingInternals().deleteTagList(
         bufferTag(window, windowFn.windowCoder(), inputCoder));
     activeWindowManager.removeWindow(window);
   }
@@ -79,7 +80,7 @@ public boolean contains(W window) {
   @Override
   protected Iterable<V> finalValue(W window) throws Exception {
     CodedTupleTag<V> tag = bufferTag(window, windowFn.windowCoder(), inputCoder);
-    Iterable<V> result = context.context.stepContext.readTagList(tag);
+    Iterable<V> result = context.windowingInternals().readTagList(tag);
     if (result == null) {
       throw new IllegalStateException("finalValue called for non-existent window");
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
index c8c66680146e6..bc1e5f6419885 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
@@ -33,14 +33,11 @@ public class ReifyTimestampAndWindowsDoFn<K, V>
   @Override
   public void processElement(ProcessContext c)
       throws Exception {
-    @SuppressWarnings("unchecked")
-    DoFnProcessContext<KV<K, V>, KV<K, WindowedValue<V>>> context =
-        (DoFnProcessContext<KV<K, V>, KV<K, WindowedValue<V>>>) c;
     KV<K, V> kv = c.element();
     K key = kv.getKey();
     V value = kv.getValue();
     c.output(KV.of(
         key,
-        WindowedValue.of(value, c.timestamp(), context.windows())));
+        WindowedValue.of(value, c.timestamp(), c.windowingInternals().windows())));
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index bb2e28b876ec8..4f05e4c8fd892 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -49,7 +49,7 @@ StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W> create(
     return new StreamingGABWViaWindowSetDoFn<K, VI, VO, W>(windowFn) {
       @Override
       AbstractWindowSet<K, VI, VO, W> createWindowSet(K key,
-          DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>> context,
+          DoFn<TimerOrElement<KV<K, VI>>, KV<K, VO>>.ProcessContext context,
           StreamingActiveWindowManager<W> activeWindowManager)
           throws Exception {
         return CombiningWindowSet.create(
@@ -64,7 +64,7 @@ AbstractWindowSet<K, VI, VO, W> createWindowSet(K key,
     return new StreamingGABWViaWindowSetDoFn<K, VI, Iterable<VI>, W>(windowFn) {
       @Override
       AbstractWindowSet<K, VI, Iterable<VI>, W> createWindowSet(K key,
-          DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, Iterable<VI>>> context,
+          DoFn<TimerOrElement<KV<K, VI>>, KV<K, Iterable<VI>>>.ProcessContext context,
           StreamingActiveWindowManager<W> activeWindowManager)
           throws Exception {
         if (windowFn instanceof PartitioningWindowFn) {
@@ -90,25 +90,21 @@ public StreamingGABWViaWindowSetDoFn(WindowFn<?, W> windowFn) {
 
     abstract AbstractWindowSet<K, VI, VO, W> createWindowSet(
         K key,
-        DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>> context,
+        DoFn<TimerOrElement<KV<K, VI>>, KV<K, VO>>.ProcessContext context,
         StreamingActiveWindowManager<W> activeWindowManager)
         throws Exception;
 
     @Override
     public void processElement(ProcessContext context) throws Exception {
-      @SuppressWarnings("unchecked")
-      DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>> doFnContext =
-          (DoFnProcessContext<TimerOrElement<KV<K, VI>>, KV<K, VO>>) context;
       if (!context.element().isTimer()) {
         KV<K, VI> element = context.element().element();
         K key = element.getKey();
         VI value = element.getValue();
         AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(
-            key,
-            doFnContext,
-            new StreamingActiveWindowManager<>(windowFn, doFnContext));
+            key, context,
+            new StreamingActiveWindowManager<>(windowFn, context));
 
-        for (BoundedWindow window : doFnContext.windows()) {
+        for (BoundedWindow window : context.windowingInternals().windows()) {
           @SuppressWarnings("unchecked")
           W w = (W) window;
           windowSet.put(w, value);
@@ -119,8 +115,8 @@ public void processElement(ProcessContext context) throws Exception {
         TimerOrElement<KV<K, VI>> timer = context.element();
         AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(
             (K) timer.key(),
-            doFnContext,
-            new StreamingActiveWindowManager<>(windowFn, doFnContext));
+            context,
+            new StreamingActiveWindowManager<>(windowFn, context));
 
         // Attempt to merge windows before emitting; that may remove the current window under
         // consideration.
@@ -141,18 +137,18 @@ public void processElement(ProcessContext context) throws Exception {
   private static class StreamingActiveWindowManager<W extends BoundedWindow>
       implements AbstractWindowSet.ActiveWindowManager<W> {
     WindowFn<?, W> windowFn;
-    DoFnProcessContext<?, ?> context;
+    DoFn<?, ?>.ProcessContext context;
 
     StreamingActiveWindowManager(
         WindowFn<?, W> windowFn,
-        DoFnProcessContext<?, ?> context) {
+        DoFn<?, ?>.ProcessContext context) {
       this.windowFn = windowFn;
       this.context = context;
     }
 
     @Override
     public void addWindow(W window) throws IOException {
-      context.context.stepContext.getExecutionContext().setTimer(
+      context.windowingInternals().setTimer(
           WindowUtils.windowToString(window, windowFn.windowCoder()), window.maxTimestamp());
     }
 
@@ -163,7 +159,7 @@ public void removeWindow(W window) throws IOException {
         // And, timers are automatically deleted once they are fired.
         return;
       }
-      context.context.stepContext.getExecutionContext().deleteTimer(
+      context.windowingInternals().deleteTimer(
           WindowUtils.windowToString(window, windowFn.windowCoder()));
     }
   }

From d5bdfb59612319d46bbc3f87f49a3470ef71b1dd Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Thu, 26 Mar 2015 13:17:38 -0700
Subject: [PATCH 0318/1541] * Makes Source<T> yield values simply of type T and
 renames createBasicReader   to createReader. WindowedValue<T> is a system
 concept and should not be   created directly by user-defined readers. *
 Removes mention of coders from the Source<T> API. There is in fact no such  
 thing as "the reader's coder", and the fact that coders are passed to all  
 readers by ReaderFactory is an accident because many native readers actually 
  decode their input from bytes. ----Release Notes----
 Source.createWindowedReader is gone, Source.createBasicReader is renamed to
 Source.createReader, and its Coder argument is gone. If your reader needs to
 be configured with a Coder, include it explicitly in your Source class. []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=89629550

---
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |  4 +-
 .../dataflow/sdk/io/FileBasedSource.java      | 13 ++---
 .../google/cloud/dataflow/sdk/io/Source.java  | 55 ++-----------------
 .../BasicSerializableSourceFormat.java        | 51 +++++++++--------
 .../dataflow/sdk/io/FileBasedSourceTest.java  | 50 ++++++++---------
 .../BasicSerializableSourceFormatTest.java    | 17 +++---
 6 files changed, 71 insertions(+), 119 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index e0c507dae6d3f..753835a37323d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -302,8 +302,8 @@ public List<Source> splitIntoBundles(long desiredBundleSizeBytes, PipelineOption
     }
 
     @Override
-    public Reader<Entity> createBasicReader(
-        PipelineOptions pipelineOptions, Coder<Entity> coder, ExecutionContext executionContext)
+    public Reader<Entity> createReader(
+        PipelineOptions pipelineOptions, ExecutionContext executionContext)
         throws IOException {
       return new DatastoreReader(query, getDatastore(pipelineOptions));
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index 7b64711067a17..c95430bc2a5af 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -15,7 +15,6 @@
 package com.google.cloud.dataflow.sdk.io;
 
 import com.google.api.client.util.Preconditions;
-import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
@@ -144,8 +143,8 @@ public final FileBasedSource<T> createSourceForSubrange(long start, long end) {
    * source assuming the source represents a single file. File patterns will be handled by
    * {@code FileBasedSource} implementation automatically.
    */
-  public abstract FileBasedReader<T> createSingleFileReader(PipelineOptions options, Coder<T> coder,
-      ExecutionContext executionContext);
+  public abstract FileBasedReader<T> createSingleFileReader(PipelineOptions options,
+                                                            ExecutionContext executionContext);
 
   @Override
   public final long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
@@ -211,21 +210,21 @@ public final List<? extends FileBasedSource<T>> splitIntoBundles(long desiredBun
   }
 
   @Override
-  protected final Reader<T> createBasicReader(PipelineOptions options, Coder<T> coder,
-      ExecutionContext executionContext) throws IOException {
+  public final Reader<T> createReader(PipelineOptions options,
+                                      ExecutionContext executionContext) throws IOException {
     if (mode == Mode.FILEPATTERN) {
       long startTime = System.currentTimeMillis();
       Collection<String> files = expandFilePattern();
       List<FileBasedReader<T>> fileReaders = new ArrayList<>();
       for (String fileName : files) {
         fileReaders.add(createForSubrangeOfFile(fileName, 0, Long.MAX_VALUE).createSingleFileReader(
-            options, coder, executionContext));
+            options, executionContext));
       }
       LOG.debug("Creating a reader for file pattern " + fileOrPatternSpec + " took "
           + (System.currentTimeMillis() - startTime) + " ms");
       return new FilePatternReader(fileReaders.iterator());
     } else {
-      return createSingleFileReader(options, coder, executionContext);
+      return createSingleFileReader(options, executionContext);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
index f7590d039d677..f973cecd8cfc3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
@@ -19,7 +19,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
 
 import java.io.IOException;
 import java.io.Serializable;
@@ -34,9 +33,7 @@
  * and creating a {@code Source} for reading the input.
  *
  * <p> To use this class for supporting your custom input type, derive your class
- * class from it, and override the abstract methods. Also override either
- * {@link #createWindowedReader} if your source supports timestamps and windows,
- * or {@link #createBasicReader} otherwise. For an example, see {@link DatastoreIO}.
+ * class from it, and override the abstract methods. For an example, see {@link DatastoreIO}.
  *
  * <p> A {@code Source} passed to a {@code Read} transform must be
  * {@code Serializable}.  This allows the {@code Source} instance
@@ -80,23 +77,10 @@ public abstract List<? extends Source<T>> splitIntoBundles(
   public abstract boolean producesSortedKeys(PipelineOptions options) throws Exception;
 
   /**
-   * Creates a windowed reader for this source. The default implementation wraps
-   * {@link #createBasicReader}. Override this function if your reader supports timestamps
-   * and windows; otherwise, override {@link #createBasicReader} instead.
+   * Creates a reader for this source.
    */
-  public Reader<WindowedValue<T>> createWindowedReader(PipelineOptions options,
-      Coder<WindowedValue<T>> coder, @Nullable ExecutionContext executionContext)
-      throws IOException {
-    return new WindowedReaderWrapper<T>(createBasicReader(
-        options, ((WindowedValue.WindowedValueCoder<T>) coder).getValueCoder(), executionContext));
-  }
-
-  /**
-   * Creates a basic (non-windowed) reader for this source. If you override this method, each value
-   * returned by this reader will be wrapped into the global window.
-   */
-  protected Reader<T> createBasicReader(PipelineOptions options, Coder<T> coder,
-      @Nullable ExecutionContext executionContext) throws IOException {
+  public Reader<T> createReader(
+      PipelineOptions options, @Nullable ExecutionContext executionContext) throws IOException {
     throw new UnsupportedOperationException();
   }
 
@@ -166,35 +150,4 @@ public interface Reader<T> extends AutoCloseable {
     @Override
     public void close() throws IOException;
   }
-
-  /**
-   * An adapter from {@code SourceIterator<T>} to {@code SourceIterator<WindowedValue<T>>}.
-   */
-  private static class WindowedReaderWrapper<T> implements Reader<WindowedValue<T>> {
-    private final Reader<T> reader;
-
-    public WindowedReaderWrapper(Reader<T> reader) {
-      this.reader = reader;
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      return reader.start();
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      return reader.advance();
-    }
-
-    @Override
-    public WindowedValue<T> getCurrent() throws NoSuchElementException {
-      return WindowedValue.valueInGlobalWindow(reader.getCurrent());
-    }
-
-    @Override
-    public void close() throws IOException {
-      reader.close();
-    }
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index e35004de4b728..3d54364b0bef0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -59,11 +59,10 @@
 
 /**
  * A helper class for supporting sources defined as {@code Source}.
- *
+ * <p>
  * Provides a bridge between the high-level {@code Source} API and the raw
  * API-level {@code SourceFormat} API, by encoding the serialized
  * {@code Source} in a parameter of the API {@code Source} message.
- * <p>
  */
 public class BasicSerializableSourceFormat implements SourceFormat {
   private static final String SERIALIZED_SOURCE = "serialized_source";
@@ -99,17 +98,20 @@ public OperationResponse performSourceOperation(OperationRequest request) throws
   /**
    * Factory method allowing this class to satisfy the implicit contract of {@code SourceFactory}.
    */
-  public static <T> com.google.cloud.dataflow.sdk.util.common.worker.Reader create(
-      final PipelineOptions options, CloudObject spec, final Coder<WindowedValue<T>> coder,
-      final ExecutionContext executionContext) throws Exception {
+  public static <T> Reader<WindowedValue<T>> create(final PipelineOptions options, CloudObject spec,
+      final Coder<WindowedValue<T>> coder, final ExecutionContext executionContext)
+      throws Exception {
+    // The parameter "coder" is deliberately never used. It is an artifact of ReaderFactory:
+    // some readers need a coder, some don't (i.e. for some it doesn't even make sense),
+    // but ReaderFactory passes it to all readers anyway.
     @SuppressWarnings("unchecked")
     final Source<T> source = (Source<T>) deserializeFromCloudSource(spec);
-    return new com.google.cloud.dataflow.sdk.util.common.worker.Reader() {
+    return new Reader<WindowedValue<T>>() {
       @Override
-      public ReaderIterator iterator() throws IOException {
+      public ReaderIterator<WindowedValue<T>> iterator() throws IOException {
         return new BasicSerializableSourceFormat.ReaderIterator<T>(
             source,
-            source.createWindowedReader(options, coder, executionContext));
+            source.createReader(options, executionContext));
       }
     };
   }
@@ -176,7 +178,7 @@ public static Source<?> deserializeFromCloudSource(Map<String, Object> spec) thr
   }
 
   static com.google.api.services.dataflow.model.Source serializeToCloudSource(
-      Source source, PipelineOptions options) throws Exception {
+      Source<?> source, PipelineOptions options) throws Exception {
     com.google.api.services.dataflow.model.Source cloudSource =
         new com.google.api.services.dataflow.model.Source();
     // We ourselves act as the SourceFormat.
@@ -201,18 +203,18 @@ static com.google.api.services.dataflow.model.Source serializeToCloudSource(
   public static <T> void evaluateReadHelper(
       ReadSource.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
     try {
-      List<WindowedValue<T>> elems = new ArrayList<>();
+      List<T> elems = new ArrayList<>();
       Source<T> source = transform.getSource();
-      try (Source.Reader<WindowedValue<T>> reader =
-          source.createWindowedReader(context.getPipelineOptions(),
-              WindowedValue.getValueOnlyCoder(source.getDefaultOutputCoder()), null)) {
+      try (Source.Reader<T> reader = source.createReader(
+          context.getPipelineOptions(), null)) {
         for (boolean available = reader.start(); available; available = reader.advance()) {
           elems.add(reader.getCurrent());
         }
       }
       List<DirectPipelineRunner.ValueWithMetadata<T>> output = new ArrayList<>();
-      for (WindowedValue<T> elem : elems) {
-        output.add(DirectPipelineRunner.ValueWithMetadata.of(elem));
+      for (T elem : elems) {
+        output.add(DirectPipelineRunner.ValueWithMetadata.of(
+            WindowedValue.valueInGlobalWindow(elem)));
       }
       context.setPCollectionValuesWithMetadata(transform.getOutput(), output);
     } catch (Exception e) {
@@ -236,19 +238,20 @@ public static <T> void translateReadHelper(
   }
 
   /**
-   * Adapter from the {@code Source.Reader} interface to
-   * {@code Reader.ReaderIterator}.
-   *
+   * Adapter from the {@code Source.Reader} interface to {@code Reader.ReaderIterator},
+   * wrapping every value into the global window. Proper windowing will be assigned by the
+   * subsequent Window transform.
+   * <p>
    * TODO: Consider changing the API of Reader.ReaderIterator so this adapter wouldn't be needed.
    */
   private static class ReaderIterator<T> implements Reader.ReaderIterator<WindowedValue<T>> {
     private final Source<T> source;
-    private Source.Reader<WindowedValue<T>> reader;
+    private Source.Reader<T> reader;
     private boolean hasNext;
-    private WindowedValue<T> next;
+    private T next;
     private boolean advanced;
 
-    private ReaderIterator(Source<T> source, Source.Reader<WindowedValue<T>> reader) {
+    private ReaderIterator(Source<T> source, Source.Reader<T> reader) {
       this.source = source;
       this.reader = reader;
     }
@@ -266,9 +269,9 @@ public WindowedValue<T> next() throws IOException {
       if (!hasNext()) {
         throw new NoSuchElementException();
       }
-      WindowedValue<T> res = this.next;
+      T res = this.next;
       advanceInternal();
-      return res;
+      return WindowedValue.valueInGlobalWindow(res);
     }
 
     private void advanceInternal() throws IOException {
@@ -293,7 +296,7 @@ private void advanceInternal() throws IOException {
     }
 
     @Override
-    public Reader.ReaderIterator<WindowedValue<T>> copy() throws IOException {
+    public ReaderIterator<T> copy() throws IOException {
       throw new UnsupportedOperationException();
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index 78b7d83379d17..e0f1cec5e5399 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -118,7 +118,7 @@ public FileBasedSource<String> createForSubrangeOfFile(String fileName, long sta
 
     @Override
     public FileBasedReader<String> createSingleFileReader(PipelineOptions options,
-        Coder<String> coder, ExecutionContext executionContext) {
+                                                          ExecutionContext executionContext) {
       if (splitHeader == null) {
         return new TestReader(this);
       } else {
@@ -352,7 +352,7 @@ public void testFullyReadSingleFile() throws IOException {
 
     TestFileBasedSource source = new TestFileBasedSource(false, file.getPath(), 64, null);
     assertThat(data, containsInAnyOrder(
-        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
+        readEverythingFromReader(source.createReader(null, null)).toArray()));
   }
 
   @Test
@@ -376,7 +376,7 @@ public void testFullyReadFilePattern() throws IOException {
     expectedResults.addAll(data2);
     expectedResults.addAll(data3);
     assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
+        readEverythingFromReader(source.createReader(null, null)).toArray()));
   }
 
   @Test
@@ -406,7 +406,7 @@ public void testFullyReadFilePatternFirstRecordEmpty() throws IOException {
     expectedResults.addAll(data2);
     expectedResults.addAll(data3);
     assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
+        readEverythingFromReader(source.createReader(null, null)).toArray()));
   }
 
   @Test
@@ -421,8 +421,8 @@ public void testReadRangeAtStart() throws IOException {
         new TestFileBasedSource(file.getPath(), 64, 25, Long.MAX_VALUE, null);
 
     List<String> results = new ArrayList<String>();
-    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source1.createReader(null, null)));
+    results.addAll(readEverythingFromReader(source2.createReader(null, null)));
     assertThat(data, containsInAnyOrder(results.toArray()));
   }
 
@@ -446,7 +446,7 @@ public void testReadEverythingFromFileWithSplits() throws IOException {
     expectedResults.removeAll(Arrays.asList(header));
 
     assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
+        readEverythingFromReader(source.createReader(null, null)).toArray()));
   }
 
   @Test
@@ -470,8 +470,8 @@ public void testReadRangeFromFileWithSplitsFromStart() throws IOException {
     expectedResults.removeAll(Arrays.asList(header));
 
     List<String> results = new ArrayList<>();
-    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source1.createReader(null, null)));
+    results.addAll(readEverythingFromReader(source2.createReader(null, null)));
 
     assertThat(expectedResults, containsInAnyOrder(results.toArray()));
   }
@@ -499,9 +499,9 @@ public void testReadRangeFromFileWithSplitsFromMiddle() throws IOException {
     expectedResults.removeAll(Arrays.asList(header));
 
     List<String> results = new ArrayList<>();
-    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source3.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source1.createReader(null, null)));
+    results.addAll(readEverythingFromReader(source2.createReader(null, null)));
+    results.addAll(readEverythingFromReader(source3.createReader(null, null)));
 
     assertThat(expectedResults, containsInAnyOrder(results.toArray()));
   }
@@ -529,9 +529,9 @@ public void testReadFileWithSplitsWithEmptyRange() throws IOException {
     expectedResults.removeAll(Arrays.asList(header));
 
     List<String> results = new ArrayList<>();
-    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source3.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source1.createReader(null, null)));
+    results.addAll(readEverythingFromReader(source2.createReader(null, null)));
+    results.addAll(readEverythingFromReader(source3.createReader(null, null)));
 
     assertThat(expectedResults, containsInAnyOrder(results.toArray()));
   }
@@ -556,17 +556,17 @@ public void testReadRangeFromFileWithSplitsFromMiddleOfHeader() throws IOExcepti
     TestFileBasedSource source =
         new TestFileBasedSource(file.getPath(), 64, 1, Long.MAX_VALUE, header);
     assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
+        readEverythingFromReader(source.createReader(null, null)).toArray()));
 
     // Split starts after "<h" of the header
     source = new TestFileBasedSource(file.getPath(), 64, 2, Long.MAX_VALUE, header);
     assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
+        readEverythingFromReader(source.createReader(null, null)).toArray()));
 
     // Split starts after "<h>" of the header
     source = new TestFileBasedSource(file.getPath(), 64, 3, Long.MAX_VALUE, header);
     assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createBasicReader(null, null, null)).toArray()));
+        readEverythingFromReader(source.createReader(null, null)).toArray()));
   }
 
   @Test
@@ -581,9 +581,9 @@ public void testReadRangeAtMiddle() throws IOException {
         new TestFileBasedSource(file.getPath(), 64, 72, Long.MAX_VALUE, null);
 
     List<String> results = new ArrayList<>();
-    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source3.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source1.createReader(null, null)));
+    results.addAll(readEverythingFromReader(source2.createReader(null, null)));
+    results.addAll(readEverythingFromReader(source3.createReader(null, null)));
 
     assertThat(data, containsInAnyOrder(results.toArray()));
   }
@@ -600,8 +600,8 @@ public void testReadRangeAtEnd() throws IOException {
         new TestFileBasedSource(file.getPath(), 1024, 162, Long.MAX_VALUE, null);
 
     List<String> results = new ArrayList<>();
-    results.addAll(readEverythingFromReader(source1.createBasicReader(null, null, null)));
-    results.addAll(readEverythingFromReader(source2.createBasicReader(null, null, null)));
+    results.addAll(readEverythingFromReader(source1.createReader(null, null)));
+    results.addAll(readEverythingFromReader(source2.createReader(null, null)));
     assertThat(data, containsInAnyOrder(results.toArray()));
   }
 
@@ -621,7 +621,7 @@ public void testReadAllSplitsOfSingleFile() throws Exception {
 
     List<String> results = new ArrayList<String>();
     for (Source<String> split : sources) {
-      results.addAll(readEverythingFromReader(split.createBasicReader(null, null, null)));
+      results.addAll(readEverythingFromReader(split.createReader(null, null)));
     }
 
     assertThat(data, containsInAnyOrder(results.toArray()));
@@ -754,7 +754,7 @@ public void testReadAllSplitsOfFilePattern() throws Exception {
 
     List<String> results = new ArrayList<String>();
     for (Source<String> split : sources) {
-      results.addAll(readEverythingFromReader(split.createBasicReader(null, null, null)));
+      results.addAll(readEverythingFromReader(split.createReader(null, null)));
     }
 
     List<String> expectedResults = new ArrayList<String>();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 4c844561680a8..2041f12937ddc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -21,6 +21,7 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceOperationResponseToCloudSourceOperationResponse;
 import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
 import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
+import static com.google.cloud.dataflow.sdk.util.WindowedValue.valueInGlobalWindow;
 import static com.google.common.base.Throwables.getStackTraceAsString;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.Matchers.allOf;
@@ -117,8 +118,8 @@ public boolean producesSortedKeys(PipelineOptions options) throws Exception {
       }
 
       @Override
-      public Reader<Integer> createBasicReader(PipelineOptions options, Coder<Integer> coder,
-          @Nullable ExecutionContext executionContext) throws IOException {
+      public Reader<Integer> createReader(
+          PipelineOptions options, @Nullable ExecutionContext executionContext) throws IOException {
         return new RangeReader(this);
       }
 
@@ -176,7 +177,7 @@ public void testSplitAndReadBundlesBack() throws Exception {
     List<WindowedValue<Integer>> elems = CloudSourceUtils.readElemsFromSource(options, source);
     assertEquals(10, elems.size());
     for (int i = 0; i < 10; ++i) {
-      assertEquals(WindowedValue.valueInGlobalWindow(10 + i), elems.get(i));
+      assertEquals(valueInGlobalWindow(10 + i), elems.get(i));
     }
     SourceSplitResponse response = performSplit(source, options);
     assertEquals("SOURCE_SPLIT_OUTCOME_SPLITTING_HAPPENED", response.getOutcome());
@@ -189,11 +190,7 @@ public void testSplitAndReadBundlesBack() throws Exception {
       assertTrue(bundleSource.getDoesNotNeedSplitting());
       bundleSource.setCodec(source.getCodec());
       List<WindowedValue<Integer>> xs = CloudSourceUtils.readElemsFromSource(options, bundleSource);
-      assertThat(
-          xs,
-          contains(
-              WindowedValue.valueInGlobalWindow(10 + 2 * i),
-              WindowedValue.valueInGlobalWindow(11 + 2 * i)));
+      assertThat(xs, contains(valueInGlobalWindow(10 + 2 * i), valueInGlobalWindow(11 + 2 * i)));
     }
   }
 
@@ -304,8 +301,8 @@ private static class SourceProducingFailingReader extends MockSource {
     private static final long serialVersionUID = -1288303253742972653L;
 
     @Override
-    protected Reader<Integer> createBasicReader(
-        PipelineOptions options, Coder<Integer> coder, @Nullable ExecutionContext executionContext)
+    public Reader<Integer> createReader(
+        PipelineOptions options, @Nullable ExecutionContext executionContext)
         throws IOException {
       return new FailingReader();
     }

From e07ed2fb506b6ed4de58c39137ce40d2a9d75708 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Thu, 26 Mar 2015 14:48:50 -0700
Subject: [PATCH 0319/1541] SDK support for progress estimation and dynamic
 work rebalancing in custom sources: * Introduces BoundedSource - a subclass
 of Source with features specific to bounded   sources, and a corresponding
 BoundedSource.Reader - subclass of Source.Reader. * Introduces
 Reader.getCurrentSource(), splitAtFraction(),  
 BoundedSource.Reader.getFractionConsumed(). Implements them for
 ByteOffsetBasedSource   and FileBasedSource. * BasicSerializableSourceFormat
 introduces a Reader.DynamicSplitResult subclass for a pair   of custom
 sources. * DataflowWorker supports reporting such a dynamic split to the
 service. ----Release Notes---- Added support for progress estimation and
 dynamic work rebalancing for custom sources. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=89638521

---
 .../cloud/dataflow/sdk/io/BoundedSource.java  | 117 ++++++++++
 .../sdk/io/ByteOffsetBasedSource.java         |  75 ++++++-
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |  40 ++--
 .../dataflow/sdk/io/FileBasedSource.java      |  78 +++++--
 .../cloud/dataflow/sdk/io/ReadSource.java     |   2 +-
 .../google/cloud/dataflow/sdk/io/Source.java  |  33 ++-
 .../BasicSerializableSourceFormat.java        | 205 +++++++++++++-----
 .../sdk/runners/worker/DataflowWorker.java    |   4 +
 .../sdk/runners/worker/FileBasedReader.java   |  23 +-
 .../sdk/io/ByteOffsetBasedSourceTest.java     | 175 +++++++++++++--
 .../dataflow/sdk/io/FileBasedSourceTest.java  | 120 ++++++----
 .../dataflow/sdk/io/SourceTestUtils.java      | 175 +++++++++++++++
 .../BasicSerializableSourceFormatTest.java    | 174 +++++++++++++--
 .../runners/worker/DatastoreReaderTest.java   |  31 ++-
 .../sdk/runners/worker/ReaderTestUtils.java   |   8 +
 .../sdk/runners/worker/TextReaderTest.java    |  24 +-
 16 files changed, 1085 insertions(+), 199 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
new file mode 100644
index 0000000000000..2be56bb3e53d4
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+
+import java.io.IOException;
+
+import javax.annotation.Nullable;
+
+/**
+ * A {@code Source} which reads a bounded amount of input and, because of that, supports
+ * some additional operations, e.g. size estimation, and its reader supports progress estimation.
+ *
+ * @param <T> Type of records read by the source.
+ */
+public abstract class BoundedSource<T> extends Source<T> {
+  private static final long serialVersionUID = 0L;
+
+  /**
+   * An estimate of the total size (in bytes) of the data that would be read from this source.
+   * This estimate is in terms of external storage size, before any decompression or other
+   * processing done by the reader.
+   */
+  public abstract long getEstimatedSizeBytes(PipelineOptions options) throws Exception;
+
+  /**
+   * Whether this source is known to produce key/value pairs with the (encoded) keys in
+   * lexicographically sorted order.
+   */
+  public abstract boolean producesSortedKeys(PipelineOptions options) throws Exception;
+
+  @Override
+  public BoundedReader<T> createReader(
+      PipelineOptions options, @Nullable ExecutionContext executionContext)
+      throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  /**
+   * A {@code Reader} which reads a bounded amount of input and supports some additional
+   * operations, such as progress estimation and dynamic work rebalancing.
+   *
+   * <p>This API is experimental and subject to change.
+   */
+  public interface BoundedReader<T> extends Source.Reader<T> {
+    /**
+     * Returns a value in [0, 1] representing approximately what fraction of the source
+     * ({@link #getCurrentSource}) this reader has read so far.
+     *
+     * <p>It is recommended that this method should satisfy the following properties:
+     * <ul>
+     *   <li>Should return 0 before the {@link #start} call.
+     *   <li>Should return 1 after a {@link #start} or {@link #advance} call that returns false.
+     *   <li>The returned values should be non-decreasing (though they don't have to be unique).
+     * </ul>
+     *
+     * @return A value in [0, 1] representing the fraction of this reader's current input
+     *   read so far, or {@code null} if such an estimate is not available.
+     */
+    Double getFractionConsumed();
+
+    @Override
+    BoundedSource<T> getCurrentSource();
+
+    /**
+     * Tells the reader to narrow the range of the input it's going to read and give up
+     * the remainder, so that the new range would contain approximately the given
+     * fraction of the amount of data in the current range.
+     * Returns a {@code BoundedSource} representing the remainder.
+     * <p>
+     * More detailed description: Assuming the following sequence of calls:
+     * <pre>{@code
+     *   BoundedSource<T> initial = reader.getCurrentSource();
+     *   BoundedSource<T> residual = reader.splitAtFraction(fraction);
+     *   BoundedSource<T> primary = reader.getCurrentSource();
+     * }</pre>
+     * <ul>
+     *  <li> The "primary" and "residual" sources, when read, would together cover the same
+     *  set of records as "initial".
+     *  <li> The current reader should continue to be in a valid state, and continuing to read
+     *  from it should, together with the records it already read, yield the same records
+     *  as would have been read by "primary".
+     *  <li> The amount of data read by "primary" should ideally represent approximately
+     *  the given fraction of the amount of data read by "initial".
+     * </ul>
+     * For example, a reader which reads a range of offsets <i>[A, B)</i> in a file might implement
+     * this method by truncating the current range to <i>[A, A + fraction*(B-A))</i> and returning
+     * a Source representing the range <i>[A + fraction*(B-A), B)</i>.
+     * <p>
+     * This method should return {@code null} if the split cannot be performed for this fraction
+     * while satisfying the semantics above. E.g. a reader which reads a range of offsets
+     * in a file should return {@code null} if it is already past the position in its range
+     * corresponding to the given fraction. In this case, the method MUST have no effect
+     * (the reader must behave as if the method hadn't been called at all).
+     * <p>
+     * Since this method (if successful) affects the reader's source, in subsequent invocations
+     * "fraction" should be interpreted relative to the new current source.
+     */
+    BoundedSource<T> splitAtFraction(double fraction);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
index 26ac1a7ee34fb..edb522cb0b7f0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
@@ -17,6 +17,9 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.common.base.Preconditions;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import java.util.ArrayList;
 import java.util.List;
 
@@ -44,7 +47,7 @@
  *
  * @param <T> Type of records represented by the source.
  */
-public abstract class ByteOffsetBasedSource<T> extends Source<T> {
+public abstract class ByteOffsetBasedSource<T> extends BoundedSource<T> {
   private static final long serialVersionUID = 0;
 
   private final long startOffset;
@@ -152,12 +155,17 @@ public String toString() {
   /**
    * A reader that implements code common to readers of all {@link ByteOffsetBasedSource}s.
    */
-  public abstract static class ByteOffsetBasedReader<T> implements Reader<T> {
+  public abstract static class ByteOffsetBasedReader<T> implements BoundedReader<T> {
+    private static final Logger LOG = LoggerFactory.getLogger(ByteOffsetBasedReader.class);
+
+    private ByteOffsetBasedSource<T> source;
 
     /**
      * @param source the {@code ByteOffsetBasedSource} to be read by the current reader.
      */
-    public ByteOffsetBasedReader(ByteOffsetBasedSource<T> source) {}
+    public ByteOffsetBasedReader(ByteOffsetBasedSource<T> source) {
+      this.source = source;
+    }
 
     /**
      * Returns the current offset of the reader. The value returned by this method is undefined
@@ -170,5 +178,66 @@ public ByteOffsetBasedReader(ByteOffsetBasedSource<T> source) {}
      * described in the comment to {@code ByteOffsetBasedSource}.
      */
     protected abstract long getCurrentOffset();
+
+    @Override
+    public ByteOffsetBasedSource<T> getCurrentSource() {
+      return source;
+    }
+
+    @Override
+    public Double getFractionConsumed() {
+      if (source.getEndOffset() == Long.MAX_VALUE) {
+        // True fraction consumed is unknown.
+        return null;
+      }
+      // TODO: a more sophisticated implementation could account for the fact that
+      // the first record's offset is not necessarily the same as getStartOffset(),
+      // and same for the last record. For example, we could assume that
+      // the position of the last record is as far after getEndOffset() as the
+      // position of the first record was after getStartOffset(), and compute
+      // fraction based on this adjusted range.
+      long current = getCurrentOffset();
+      double fraction =
+          1.0 * (current - source.getStartOffset())
+              / (source.getEndOffset() - source.getStartOffset());
+      return Math.max(0, Math.min(1, fraction));
+    }
+
+    @Override
+    public ByteOffsetBasedSource<T> splitAtFraction(double fraction) {
+      if (source.getEndOffset() == Long.MAX_VALUE) {
+        // Impossible to convert fraction to an offset.
+        LOG.debug("Refusing to split at fraction {} because source does not have an end offset",
+            fraction);
+        return null;
+      }
+      long start = source.getStartOffset();
+      long end = source.getEndOffset();
+      long splitOffset = (long) (start + fraction * (end - start));
+      long current = getCurrentOffset();
+      if (splitOffset <= current) {
+        LOG.info(
+            "Refusing to split at fraction {} (offset {}) because current offset is {} of [{}, {})",
+            fraction, splitOffset, current, start, end);
+        return null;
+      }
+      if (splitOffset <= start || splitOffset >= end) {
+        LOG.info(
+            "Refusing to split at fraction {} (offset {}) outside current range [{}, {})",
+            fraction, splitOffset, start, end);
+        return null;
+      }
+      // Note: we intentionally ignore minBundleSize here.
+      // It is useful to respect it during initial splitting so we don't produce work items
+      // which are likely to turn out too small - but once dynamic work rebalancing kicks in,
+      // its estimates are far more precise and should take priority. If it says split into
+      // tiny single-record bundles, we should do that.
+      ByteOffsetBasedSource<T> primary = source.createSourceForSubrange(start, splitOffset);
+      ByteOffsetBasedSource<T> residual = source.createSourceForSubrange(splitOffset, end);
+      this.source = primary;
+      LOG.info("Split at fraction {} (offset {}) of [{}, {}) (current offset {})",
+          fraction, splitOffset, start, end, current);
+      return residual;
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 753835a37323d..3b720611e2421 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -173,7 +173,7 @@ public static ReadSource.Bound<Entity> readFrom(String host, String datasetId, Q
   /**
    * A source that reads the result rows of a Datastore query as {@code Entity} objects.
    */
-  public static class Source extends com.google.cloud.dataflow.sdk.io.Source<Entity> {
+  public static class Source extends BoundedSource<Entity> {
     private static final long serialVersionUID = -6078498627204891522L;
 
     String host;
@@ -302,10 +302,10 @@ public List<Source> splitIntoBundles(long desiredBundleSizeBytes, PipelineOption
     }
 
     @Override
-    public Reader<Entity> createReader(
+    public BoundedReader<Entity> createReader(
         PipelineOptions pipelineOptions, ExecutionContext executionContext)
         throws IOException {
-      return new DatastoreReader(query, getDatastore(pipelineOptions));
+      return new DatastoreReader(this, getDatastore(pipelineOptions));
     }
 
     @Override
@@ -539,16 +539,13 @@ private static void writeBatch(List<Entity> listOfEntities, Datastore datastore)
   /**
    * A reader over the records from a query of the datastore.
    */
-  public static class DatastoreReader implements Source.Reader<Entity> {
-    /**
-     * Query to select records.
-     */
-    private Query.Builder query;
+  public static class DatastoreReader implements BoundedSource.BoundedReader<Entity> {
+    private final Source source;
 
     /**
      * Datastore to read from.
      */
-    private Datastore datastore;
+    private final Datastore datastore;
 
     /**
      * True if more results may be available.
@@ -578,13 +575,11 @@ public static class DatastoreReader implements Source.Reader<Entity> {
     /**
      * Returns a DatastoreIterator with query and Datastore object set.
      *
-     * @param query the query to select records.
      * @param datastore a datastore connection to use.
      */
-    public DatastoreReader(Query query, Datastore datastore) {
-      this.query = query.toBuilder().clone();
+    public DatastoreReader(Source source, Datastore datastore) {
+      this.source = source;
       this.datastore = datastore;
-      this.query.setLimit(QUERY_LIMIT);
     }
 
     @Override
@@ -621,6 +616,23 @@ public void close() throws IOException {
       // Nothing
     }
 
+    @Override
+    public DatastoreIO.Source getCurrentSource() {
+      return source;
+    }
+
+    @Override
+    public DatastoreIO.Source splitAtFraction(double fraction) {
+      // Not supported.
+      return null;
+    }
+
+    @Override
+    public Double getFractionConsumed() {
+      // Not supported.
+      return null;
+    }
+
     /**
      * Returns an iterator over the next batch of records for the query
      * and updates the cursor to get the next batch as needed.
@@ -628,6 +640,8 @@ public void close() throws IOException {
      */
     private Iterator<DatastoreV1.EntityResult> getIteratorAndMoveCursor()
         throws DatastoreException {
+      Query.Builder query = this.source.query.toBuilder().clone();
+      query.setLimit(QUERY_LIMIT);
       if (currentBatch != null && currentBatch.hasEndCursor()) {
         query.setStartCursor(currentBatch.getEndCursor());
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index c95430bc2a5af..a5619fbb7d512 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -29,8 +29,8 @@
 import java.nio.channels.SeekableByteChannel;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Iterator;
 import java.util.List;
+import java.util.ListIterator;
 import java.util.NoSuchElementException;
 
 /**
@@ -210,19 +210,26 @@ public final List<? extends FileBasedSource<T>> splitIntoBundles(long desiredBun
   }
 
   @Override
-  public final Reader<T> createReader(PipelineOptions options,
-                                      ExecutionContext executionContext) throws IOException {
+  public final BoundedReader<T> createReader(PipelineOptions options,
+                                             ExecutionContext executionContext) throws IOException {
     if (mode == Mode.FILEPATTERN) {
       long startTime = System.currentTimeMillis();
       Collection<String> files = expandFilePattern();
       List<FileBasedReader<T>> fileReaders = new ArrayList<>();
       for (String fileName : files) {
-        fileReaders.add(createForSubrangeOfFile(fileName, 0, Long.MAX_VALUE).createSingleFileReader(
+        long endOffset;
+        try {
+          endOffset = IOChannelUtils.getFactory(fileName).getSizeBytes(fileName);
+        } catch (IOException e) {
+          LOG.warn("Failed to get size of " + fileName, e);
+          endOffset = Long.MAX_VALUE;
+        }
+        fileReaders.add(createForSubrangeOfFile(fileName, 0, endOffset).createSingleFileReader(
             options, executionContext));
       }
       LOG.debug("Creating a reader for file pattern " + fileOrPatternSpec + " took "
           + (System.currentTimeMillis() - startTime) + " ms");
-      return new FilePatternReader(fileReaders.iterator());
+      return new FilePatternReader(this, fileReaders);
     } else {
       return createSingleFileReader(options, executionContext);
     }
@@ -357,7 +364,6 @@ public abstract static class FileBasedReader<T> extends ByteOffsetBasedReader<T>
                                                 // offset but the last split point may not have been
                                                 // reached.
     private boolean startCalled = false;
-    private FileBasedSource<T> source = null;
 
     /**
      * Subclasses should not perform IO operations at the constructor. All IO operations should be
@@ -367,15 +373,16 @@ public FileBasedReader(FileBasedSource<T> source) {
       super(source);
       Preconditions.checkArgument(source.getMode() != Mode.FILEPATTERN,
           "FileBasedReader does not support reading file patterns");
-      this.source = source;
     }
 
-    protected final FileBasedSource<T> getSource() {
-      return source;
+    @Override
+    public FileBasedSource<T> getCurrentSource() {
+      return (FileBasedSource<T>) super.getCurrentSource();
     }
 
     @Override
     public final boolean start() throws IOException {
+      FileBasedSource<T> source = getCurrentSource();
       Preconditions.checkState(!startCalled, "start() should only be called once");
       IOChannelFactory factory = IOChannelUtils.getFactory(source.getFileOrPatternSpec());
       this.channel = factory.open(source.getFileOrPatternSpec());
@@ -402,6 +409,7 @@ public final boolean start() throws IOException {
 
     @Override
     public final boolean advance() throws IOException {
+      FileBasedSource<T> source = getCurrentSource();
       Preconditions.checkState(startCalled, "advance() called before calling start()");
       if (finished) {
         return false;
@@ -489,12 +497,16 @@ public void close() throws IOException {
   }
 
   // An internal Reader implementation that concatenates a sequence of FileBasedReaders.
-  private class FilePatternReader implements Reader<T> {
-    final Iterator<FileBasedReader<T>> fileReaders;
+  private class FilePatternReader implements BoundedReader<T> {
+    private final FileBasedSource<T> source;
+    private final List<FileBasedReader<T>> fileReaders;
+    final ListIterator<FileBasedReader<T>> fileReadersIterator;
     FileBasedReader<T> currentReader = null;
 
-    public FilePatternReader(Iterator<FileBasedReader<T>> fileReaders) {
+    public FilePatternReader(FileBasedSource<T> source, List<FileBasedReader<T>> fileReaders) {
+      this.source = source;
       this.fileReaders = fileReaders;
+      this.fileReadersIterator = fileReaders.listIterator();
     }
 
     @Override
@@ -512,8 +524,8 @@ public boolean advance() throws IOException {
     }
 
     private boolean startNextNonemptyReader() throws IOException {
-      while (fileReaders.hasNext()) {
-        currentReader = fileReaders.next();
+      while (fileReadersIterator.hasNext()) {
+        currentReader = fileReadersIterator.next();
         if (currentReader.start()) {
           return true;
         }
@@ -533,9 +545,43 @@ public T getCurrent() throws NoSuchElementException {
     public void close() throws IOException {
       // Close all readers that may have not yet been closed.
       currentReader.close();
-      while (fileReaders.hasNext()) {
-        fileReaders.next().close();
+      while (fileReadersIterator.hasNext()) {
+        fileReadersIterator.next().close();
+      }
+    }
+
+    @Override
+    public FileBasedSource<T> getCurrentSource() {
+      return source;
+    }
+
+    @Override
+    public FileBasedSource<T> splitAtFraction(double fraction) {
+      // Unsupported. TODO: implement.
+      LOG.debug("Dynamic splitting of FilePatternReader is unsupported.");
+      return null;
+    }
+
+    @Override
+    public Double getFractionConsumed() {
+      if (currentReader == null) {
+        return 0.0;
+      }
+      if (fileReaders.isEmpty()) {
+        return 1.0;
+      }
+      int index = fileReadersIterator.previousIndex();
+      int numReaders = fileReaders.size();
+      if (index == numReaders) {
+        return 1.0;
+      }
+      double before = 1.0 * index / numReaders;
+      double after = 1.0 * (index + 1) / numReaders;
+      Double fractionOfCurrentReader = currentReader.getFractionConsumed();
+      if (fractionOfCurrentReader == null) {
+        return before;
       }
+      return before + fractionOfCurrentReader * (after - before);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java
index 8a5290234455e..90c6ce71a7b7c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java
@@ -62,7 +62,7 @@ public static class Bound<T>
     private static final long serialVersionUID = 0;
 
     @Nullable
-    private Source<T> source;
+    private final Source<T> source;
 
     private Bound(@Nullable String name, @Nullable Source<T> source) {
       super(name);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
index f973cecd8cfc3..cc53ab1b2e609 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
@@ -44,6 +44,10 @@
  * non-transient instance variable state will be serialized in the main program
  * and then deserialized on remote worker machines.
  *
+ * <p> {@code Source} classes MUST be effectively immutable. The only acceptable use of
+ * mutable fields is to cache the results of expensive operations, and such fields MUST be
+ * marked {@code transient}.
+ *
  * <p> {@code Source} objects should implement {@link Object#toString}, as it will be
  * used in important error and debugging messages.
  *
@@ -63,19 +67,6 @@ public abstract class Source<T> implements Serializable {
   public abstract List<? extends Source<T>> splitIntoBundles(
       long desiredBundleSizeBytes, PipelineOptions options) throws Exception;
 
-  /**
-   * An estimate of the total size (in bytes) of the data that would be read from this source.
-   * This estimate is in terms of external storage size, before any decompression or other
-   * processing done by the reader.
-   */
-  public abstract long getEstimatedSizeBytes(PipelineOptions options) throws Exception;
-
-  /**
-   * Whether this source is known to produce key/value pairs with the (encoded) keys in
-   * lexicographically sorted order.
-   */
-  public abstract boolean producesSortedKeys(PipelineOptions options) throws Exception;
-
   /**
    * Creates a reader for this source.
    */
@@ -115,7 +106,6 @@ public Reader<T> createReader(
    * Note: this interface is work-in-progress and may change.
    */
   public interface Reader<T> extends AutoCloseable {
-
     /**
      * Initializes the reader and advances the reader to the first record.
      *
@@ -128,7 +118,7 @@ public interface Reader<T> extends AutoCloseable {
     public boolean start() throws IOException;
 
     /**
-     * Advances the iterator to the next valid record.
+     * Advances the reader to the next valid record.
      * Invalidates the result of the previous {@link #getCurrent} call.
      * @return {@code true} if a record was read, {@code false} if we're at the end of input.
      */
@@ -138,16 +128,25 @@ public interface Reader<T> extends AutoCloseable {
      * Returns the value of the data item which was read by the last {@link #start} or
      * {@link #advance} call.
      *
-     * @throws java.util.NoSuchElementException if the iterator is at the beginning of the input and
+     * @throws java.util.NoSuchElementException if the reader is at the beginning of the input and
      *         {@link #start} or {@link #advance} wasn't called, or if the last {@link #start} or
      *         {@link #advance} returned {@code false}.
      */
     public T getCurrent() throws NoSuchElementException;
 
     /**
-     * Closes the iterator. The iterator cannot be used after this method was called.
+     * Closes the reader. The reader cannot be used after this method is called.
      */
     @Override
     public void close() throws IOException;
+
+    /**
+     * Returns a {@code Source} describing the same input that this {@code Reader} reads
+     * (including items already read).
+     * <p>
+     * A reader created from the result of {@code getCurrentSource}, if consumed, MUST
+     * return the same data items as the current reader.
+     */
+    public Source<T> getCurrentSource();
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index 3d54364b0bef0..b435a83bf5f98 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -26,6 +26,8 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 
 import com.google.api.client.util.Base64;
+import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.SourceFork;
 import com.google.api.services.dataflow.model.SourceGetMetadataRequest;
 import com.google.api.services.dataflow.model.SourceGetMetadataResponse;
 import com.google.api.services.dataflow.model.SourceMetadata;
@@ -36,11 +38,13 @@
 import com.google.api.services.dataflow.model.SourceSplitResponse;
 import com.google.api.services.dataflow.model.SourceSplitShard;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.io.BoundedSource;
 import com.google.cloud.dataflow.sdk.io.ReadSource;
 import com.google.cloud.dataflow.sdk.io.Source;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
@@ -76,6 +80,42 @@ public BasicSerializableSourceFormat(PipelineOptions options) {
     this.options = options;
   }
 
+  /**
+   * A {@code DynamicSplitResult} specified explicitly by a pair of {@code Source}
+   * objects describing the primary and residual sources.
+   */
+  public static final class SourceSplit<T> implements Reader.DynamicSplitResult {
+    public final Source<T> primary;
+    public final Source<T> residual;
+
+    public SourceSplit(Source<T> primary, Source<T> residual) {
+      this.primary = primary;
+      this.residual = residual;
+    }
+  }
+
+  public static SourceFork toSourceSplit(
+      SourceSplit<?> sourceSplitResult, PipelineOptions options) {
+    SourceFork sourceSplit = new SourceFork();
+    com.google.api.services.dataflow.model.Source primarySource;
+    com.google.api.services.dataflow.model.Source residualSource;
+    try {
+      primarySource = serializeToCloudSource(sourceSplitResult.primary, options);
+      residualSource = serializeToCloudSource(sourceSplitResult.residual, options);
+    } catch (Exception e) {
+      throw new RuntimeException("Failed to serialize one of the parts of the source split", e);
+    }
+    sourceSplit.setPrimary(
+        new SourceSplitShard()
+            .setDerivationMode("SOURCE_DERIVATION_MODE_INDEPENDENT")
+            .setSource(primarySource));
+    sourceSplit.setResidual(
+        new SourceSplitShard()
+            .setDerivationMode("SOURCE_DERIVATION_MODE_INDEPENDENT")
+            .setSource(residualSource));
+    return sourceSplit;
+  }
+
   /**
    * Executes a protocol-level split {@code SourceOperationRequest} by deserializing its source
    * to a {@code Source}, splitting it, and serializing results back.
@@ -96,11 +136,12 @@ public OperationResponse performSourceOperation(OperationRequest request) throws
   }
 
   /**
-   * Factory method allowing this class to satisfy the implicit contract of {@code SourceFactory}.
+   * Factory method allowing this class to satisfy the implicit contract of
+   * {@link com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory}.
    */
-  public static <T> Reader<WindowedValue<T>> create(final PipelineOptions options, CloudObject spec,
-      final Coder<WindowedValue<T>> coder, final ExecutionContext executionContext)
-      throws Exception {
+  public static <T> Reader<WindowedValue<T>> create(
+      final PipelineOptions options, CloudObject spec, Coder<WindowedValue<T>> coder,
+      final ExecutionContext executionContext) throws Exception {
     // The parameter "coder" is deliberately never used. It is an artifact of ReaderFactory:
     // some readers need a coder, some don't (i.e. for some it doesn't even make sense),
     // but ReaderFactory passes it to all readers anyway.
@@ -109,8 +150,7 @@ public static <T> Reader<WindowedValue<T>> create(final PipelineOptions options,
     return new Reader<WindowedValue<T>>() {
       @Override
       public ReaderIterator<WindowedValue<T>> iterator() throws IOException {
-        return new BasicSerializableSourceFormat.ReaderIterator<T>(
-            source,
+        return new BasicSerializableSourceFormat.ReaderIterator<>(
             source.createReader(options, executionContext));
       }
     };
@@ -158,8 +198,11 @@ private SourceGetMetadataResponse performGetMetadata(SourceGetMetadataRequest re
       throws Exception {
     Source<?> source = deserializeFromCloudSource(request.getSource().getSpec());
     SourceMetadata metadata = new SourceMetadata();
-    metadata.setProducesSortedKeys(source.producesSortedKeys(options));
-    metadata.setEstimatedSizeBytes(source.getEstimatedSizeBytes(options));
+    if (source instanceof BoundedSource) {
+      BoundedSource<?> boundedSource = (BoundedSource<?>) source;
+      metadata.setProducesSortedKeys(boundedSource.producesSortedKeys(options));
+      metadata.setEstimatedSizeBytes(boundedSource.getEstimatedSizeBytes(options));
+    }
     SourceGetMetadataResponse response = new SourceGetMetadataResponse();
     response.setMetadata(metadata);
     return response;
@@ -187,13 +230,20 @@ static com.google.api.services.dataflow.model.Source serializeToCloudSource(
         cloudSource.getSpec(), SERIALIZED_SOURCE, encodeBase64String(serializeToByteArray(source)));
 
     SourceMetadata metadata = new SourceMetadata();
-    metadata.setProducesSortedKeys(source.producesSortedKeys(options));
+    if (source instanceof BoundedSource) {
+      BoundedSource<?> boundedSource = (BoundedSource<?>) source;
+      try {
+        metadata.setProducesSortedKeys(boundedSource.producesSortedKeys(options));
+      } catch (Exception e) {
+        LOG.warn("Failed to check if the source produces sorted keys: " + source, e);
+      }
 
-    // Size estimation is best effort so we continue even if it fails here.
-    try {
-      metadata.setEstimatedSizeBytes(source.getEstimatedSizeBytes(options));
-    } catch (Exception e) {
-      LOG.warn("Size estimation of the source failed.", e);
+      // Size estimation is best effort so we continue even if it fails here.
+      try {
+        metadata.setEstimatedSizeBytes(boundedSource.getEstimatedSizeBytes(options));
+      } catch (Exception e) {
+        LOG.warn("Size estimation of the source failed: " + source, e);
+      }
     }
 
     cloudSource.setMetadata(metadata);
@@ -245,23 +295,47 @@ public static <T> void translateReadHelper(
    * TODO: Consider changing the API of Reader.ReaderIterator so this adapter wouldn't be needed.
    */
   private static class ReaderIterator<T> implements Reader.ReaderIterator<WindowedValue<T>> {
-    private final Source<T> source;
+    private enum NextState {
+      UNKNOWN_BEFORE_START,
+      UNKNOWN_BEFORE_ADVANCE,
+      AVAILABLE,
+      FINISHED
+    }
     private Source.Reader<T> reader;
-    private boolean hasNext;
-    private T next;
-    private boolean advanced;
+    private NextState state = NextState.UNKNOWN_BEFORE_START;
 
-    private ReaderIterator(Source<T> source, Source.Reader<T> reader) {
-      this.source = source;
+    private ReaderIterator(Source.Reader<T> reader) {
       this.reader = reader;
     }
 
     @Override
     public boolean hasNext() throws IOException {
-      if (!advanced) {
-        advanceInternal();
+      switch(state) {
+        case UNKNOWN_BEFORE_START:
+          try {
+            if (reader.start()) {
+              state = NextState.AVAILABLE;
+              return true;
+            } else {
+              state = NextState.FINISHED;
+              return false;
+            }
+          } catch (Exception e) {
+            throw new IOException(
+                "Failed to start reading from source: " + reader.getCurrentSource(), e);
+          }
+        case UNKNOWN_BEFORE_ADVANCE:
+          if (reader.advance()) {
+            state = NextState.AVAILABLE;
+            return true;
+          } else {
+            state = NextState.FINISHED;
+            return false;
+          }
+        case AVAILABLE: return true;
+        case FINISHED: return false;
+        default: throw new AssertionError();
       }
-      return hasNext;
     }
 
     @Override
@@ -269,30 +343,8 @@ public WindowedValue<T> next() throws IOException {
       if (!hasNext()) {
         throw new NoSuchElementException();
       }
-      T res = this.next;
-      advanceInternal();
-      return WindowedValue.valueInGlobalWindow(res);
-    }
-
-    private void advanceInternal() throws IOException {
-      try {
-        if (!advanced) {
-          try {
-            hasNext = reader.start();
-          } catch (Exception e) {
-            throw new IOException(
-                "Failed to start reading from source: " + source, e);
-          }
-        } else {
-          hasNext = reader.advance();
-        }
-        if (hasNext) {
-          next = reader.getCurrent();
-        }
-        advanced = true;
-      } catch (Exception e) {
-        throw new IOException(e);
-      }
+      state = NextState.UNKNOWN_BEFORE_ADVANCE;
+      return WindowedValue.valueInGlobalWindow(reader.getCurrent());
     }
 
     @Override
@@ -307,12 +359,67 @@ public void close() throws IOException {
 
     @Override
     public Reader.Progress getProgress() {
-      return null;
+      if (reader instanceof BoundedSource.BoundedReader) {
+        ApproximateProgress progress = new ApproximateProgress();
+        Double fractionConsumed = ((BoundedSource.BoundedReader<?>) reader).getFractionConsumed();
+        if (fractionConsumed != null) {
+          progress.setPercentComplete(fractionConsumed.floatValue());
+        }
+        return SourceTranslationUtils.cloudProgressToReaderProgress(progress);
+      } else {
+        // Progress estimation for unbounded sources not yet supported.
+        return null;
+      }
     }
 
     @Override
     public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest request) {
-      return null;
+      if (!(reader instanceof BoundedSource.BoundedReader)) {
+        throw new IllegalStateException(
+            "Unexpected requestDynamicSplit on an unbounded source: " + reader.getCurrentSource()
+            + ", request: " + request);
+      }
+
+      BoundedSource.BoundedReader<T> boundedReader = (BoundedSource.BoundedReader<T>) reader;
+      ApproximateProgress stopPosition =
+          SourceTranslationUtils.splitRequestToApproximateProgress(request);
+      Float fractionConsumed = stopPosition.getPercentComplete();
+      if (fractionConsumed == null) {
+        // Only truncating at a fraction is currently supported.
+        return null;
+      }
+      BoundedSource<T> original = boundedReader.getCurrentSource();
+      BoundedSource<T> residual = boundedReader.splitAtFraction(fractionConsumed.doubleValue());
+      if (residual == null) {
+        return null;
+      }
+      // Try to catch some potential subclass implementation errors early.
+      Source<T> primary = boundedReader.getCurrentSource();
+      if (original == primary) {
+        throw new IllegalStateException(
+          "Successful split did not change the current source: primary is identical to original"
+          + " (Source objects MUST be immutable): " + primary);
+      }
+      if (original == residual) {
+        throw new IllegalStateException(
+          "Successful split did not change the current source: residual is identical to original"
+          + " (Source objects MUST be immutable): " + residual);
+      }
+      try {
+        primary.validate();
+      } catch (Exception e) {
+        throw new IllegalStateException(
+            "Successful split produced an illegal primary source. "
+            + "\nOriginal: " + original + "\nPrimary: " + primary + "\nResidual: " + residual);
+      }
+      try {
+        residual.validate();
+      } catch (Exception e) {
+        throw new IllegalStateException(
+            "Successful split produced an illegal residual source. "
+            + "\nOriginal: " + original + "\nPrimary: " + primary + "\nResidual: " + residual);
+      }
+      return new SourceSplit<T>(primary, residual);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 9d589b58f53d4..1f6345a7b6d0d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -27,6 +27,7 @@
 import com.google.api.services.dataflow.model.WorkItemServiceState;
 import com.google.api.services.dataflow.model.WorkItemStatus;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
+import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingFormatter;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudCounterUtils;
@@ -260,6 +261,9 @@ static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
       Reader.DynamicSplitResultWithPosition asPosition =
           (Reader.DynamicSplitResultWithPosition) dynamicSplitResult;
       status.setStopPosition(toCloudPosition(asPosition.getAcceptedPosition()));
+    } else if (dynamicSplitResult instanceof BasicSerializableSourceFormat.SourceSplit) {
+      status.setSourceFork(BasicSerializableSourceFormat.toSourceSplit(
+          (BasicSerializableSourceFormat.SourceSplit) dynamicSplitResult, options));
     } else if (dynamicSplitResult != null) {
       throw new IllegalArgumentException(
           "Unexpected type of dynamic split result: " + dynamicSplitResult);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
index 8e38c15240aba..97d3ad36d4a6b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
@@ -123,17 +123,17 @@ public ReaderIterator<T> iterator() throws IOException {
   protected abstract class FileBasedIterator extends AbstractReaderIterator<T> {
     protected final CopyableSeekableByteChannel seeker;
     protected final PushbackInputStream stream;
-    protected final Long startOffset;
+    protected final long startOffset;
     protected Long endOffset;
     protected final ProgressTracker<Integer> tracker;
     protected ByteArrayOutputStream nextElement;
     protected boolean nextElementComputed = false;
     protected long offset;
-    protected FileBasedReader.DecompressingStreamFactory compressionStreamFactory;
+    protected DecompressingStreamFactory compressionStreamFactory;
 
     FileBasedIterator(CopyableSeekableByteChannel seeker, long startOffset, long offset,
         @Nullable Long endOffset, ProgressTracker<Integer> tracker,
-        FileBasedReader.DecompressingStreamFactory compressionStreamFactory) throws IOException {
+        DecompressingStreamFactory compressionStreamFactory) throws IOException {
       this.seeker = checkNotNull(seeker);
       this.seeker.position(startOffset);
       this.compressionStreamFactory = compressionStreamFactory;
@@ -182,9 +182,9 @@ void advance() throws IOException {
 
     @Override
     public Progress getProgress() {
-      // Currently we assume that only a offset position is reported as
+      // Currently we assume that only a offset position and fraction are reported as
       // current progress. An implementor can override this method to update
-      // other metrics, e.g. completion percentage or remaining time.
+      // other metrics, e.g. report a different completion percentage or remaining time.
       com.google.api.services.dataflow.model.Position currentPosition =
           new com.google.api.services.dataflow.model.Position();
       currentPosition.setByteOffset(offset);
@@ -192,6 +192,17 @@ public Progress getProgress() {
       ApproximateProgress progress = new ApproximateProgress();
       progress.setPosition(currentPosition);
 
+      // If endOffset is null, we don't know the fraction consumed.
+      if (endOffset != null) {
+        // offset, in principle, can go beyond endOffset, e.g.:
+        // - We just read the last record and offset points to its end, which is after endOffset
+        // - This is some block-based file format where not every record is a "split point" and some
+        //   records can *start* after endOffset (though the first record of the next shard would
+        //   start still later).
+        progress.setPercentComplete(
+            Math.min(1.0f, 1.0f * (offset - startOffset) / (endOffset - startOffset)));
+      }
+
       return cloudProgressToReaderProgress(progress);
     }
 
@@ -279,7 +290,7 @@ public interface DecompressingStreamFactory {
    * is checked against known extensions to determine a compression type to use.
    */
   protected static class FilenameBasedStreamFactory
-      implements FileBasedReader.DecompressingStreamFactory {
+      implements DecompressingStreamFactory {
     private String filename;
     private TextIO.CompressionType compressionType;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
index 6a326cbcd2bc3..c6aa37a10a3de 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
@@ -14,16 +14,30 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
+import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS;
+import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionBehavior;
+import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.readFromSource;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.IOException;
+import java.util.ArrayList;
 import java.util.List;
+import java.util.NoSuchElementException;
+
+import javax.annotation.Nullable;
 
 /**
  * Tests code common to all offset-based sources.
@@ -31,22 +45,27 @@
 @RunWith(JUnit4.class)
 public class ByteOffsetBasedSourceTest {
 
-  class TestByteOffsetBasedSource extends ByteOffsetBasedSource<String> {
-
-    private static final long serialVersionUID = 85539250;
+  // A byte-offset based source which yields its own current offset
+  // and rounds the start and end offset to the nearest multiple of a given number,
+  // e.g. reading [13, 48) with granularity 10 gives records with values [20, 50).
+  private static class CoarseByteRangeSource extends ByteOffsetBasedSource<Integer> {
+    private static final long serialVersionUID = 0L;
+    private long granularity;
 
-    public TestByteOffsetBasedSource(long startOffset, long endOffset, long minBundleSize) {
+    public CoarseByteRangeSource(
+        long startOffset, long endOffset, long minBundleSize, long granularity) {
       super(startOffset, endOffset, minBundleSize);
+      this.granularity = granularity;
     }
 
     @Override
-    public ByteOffsetBasedSource<String> createSourceForSubrange(long start, long end) {
-      return new TestByteOffsetBasedSource(start, end, 1024);
+    public ByteOffsetBasedSource<Integer> createSourceForSubrange(long start, long end) {
+      return new CoarseByteRangeSource(start, end, getMinBundleSize(), granularity);
     }
 
     @Override
     public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
-      return 0;
+      throw new UnsupportedOperationException();
     }
 
     @Override
@@ -58,7 +77,7 @@ public boolean producesSortedKeys(PipelineOptions options) throws Exception {
     public void validate() {}
 
     @Override
-    public Coder<String> getDefaultOutputCoder() {
+    public Coder<Integer> getDefaultOutputCoder() {
       return null;
     }
 
@@ -66,13 +85,64 @@ public Coder<String> getDefaultOutputCoder() {
     public long getMaxEndOffset(PipelineOptions options) {
       return getEndOffset();
     }
+
+    @Override
+    public BoundedReader<Integer> createReader(
+        PipelineOptions options, @Nullable ExecutionContext executionContext)
+        throws IOException {
+      return new CoarseByteRangeReader(this);
+    }
+  }
+
+  private static class CoarseByteRangeReader
+      extends ByteOffsetBasedSource.ByteOffsetBasedReader<Integer> {
+    private long current = -1;
+    private long granularity;
+
+    public CoarseByteRangeReader(CoarseByteRangeSource source) {
+      super(source);
+      this.granularity = source.granularity;
+    }
+
+    @Override
+    protected long getCurrentOffset() {
+      return current;
+    }
+
+    @Override
+    public boolean start() throws IOException {
+      current = getCurrentSource().getStartOffset();
+      while (true) {
+        if (current >= getCurrentSource().getEndOffset()) {
+          return false;
+        }
+        if (current % granularity == 0) {
+          return true;
+        }
+        ++current;
+      }
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      ++current;
+      return !(current >= getCurrentSource().getEndOffset() && current % granularity == 0);
+    }
+
+    @Override
+    public Integer getCurrent() throws NoSuchElementException {
+      return (int) current;
+    }
+
+    @Override
+    public void close() throws IOException { }
   }
 
-  public static void assertSplitsAre(List<? extends ByteOffsetBasedSource<String>> splits,
+  public static void assertSplitsAre(List<? extends ByteOffsetBasedSource<?>> splits,
       long[] expectedBoundaries) {
     assertEquals(splits.size(), expectedBoundaries.length - 1);
     int i = 0;
-    for (ByteOffsetBasedSource<String> split : splits) {
+    for (ByteOffsetBasedSource<?> split : splits) {
       assertEquals(split.getStartOffset(), expectedBoundaries[i]);
       assertEquals(split.getEndOffset(), expectedBoundaries[i + 1]);
       i++;
@@ -85,7 +155,7 @@ public void testSplitPositionsZeroStart() throws Exception {
     long end = 1000;
     long minBundleSize = 50;
     long desiredBundleSize = 150;
-    TestByteOffsetBasedSource testSource = new TestByteOffsetBasedSource(start, end, minBundleSize);
+    CoarseByteRangeSource testSource = new CoarseByteRangeSource(start, end, minBundleSize, 1);
     long[] boundaries = {0, 150, 300, 450, 600, 750, 900, 1000};
     assertSplitsAre(testSource.splitIntoBundles(desiredBundleSize, null), boundaries);
   }
@@ -96,7 +166,7 @@ public void testSplitPositionsNonZeroStart() throws Exception {
     long end = 1000;
     long minBundleSize = 50;
     long desiredBundleSize = 150;
-    TestByteOffsetBasedSource testSource = new TestByteOffsetBasedSource(start, end, minBundleSize);
+    CoarseByteRangeSource testSource = new CoarseByteRangeSource(start, end, minBundleSize, 1);
     long[] boundaries = {300, 450, 600, 750, 900, 1000};
     assertSplitsAre(testSource.splitIntoBundles(desiredBundleSize, null), boundaries);
   }
@@ -107,7 +177,7 @@ public void testMinBundleSize() throws Exception {
     long end = 1000;
     long minBundleSize = 150;
     long desiredBundleSize = 100;
-    TestByteOffsetBasedSource testSource = new TestByteOffsetBasedSource(start, end, minBundleSize);
+    CoarseByteRangeSource testSource = new CoarseByteRangeSource(start, end, minBundleSize, 1);
     long[] boundaries = {300, 450, 600, 750, 1000};
     assertSplitsAre(testSource.splitIntoBundles(desiredBundleSize, null), boundaries);
   }
@@ -118,9 +188,86 @@ public void testSplitPositionsCollapseEndBundle() throws Exception {
     long end = 1000;
     long minBundleSize = 50;
     long desiredBundleSize = 110;
-    TestByteOffsetBasedSource testSource = new TestByteOffsetBasedSource(start, end, minBundleSize);
+    CoarseByteRangeSource testSource = new CoarseByteRangeSource(start, end, minBundleSize, 1);
     // Last 10 bytes should collapse to the previous bundle.
     long[] boundaries = {0, 110, 220, 330, 440, 550, 660, 770, 880, 1000};
     assertSplitsAre(testSource.splitIntoBundles(desiredBundleSize, null), boundaries);
   }
+
+  @Test
+  public void testReadingGranularityAndFractionConsumed() throws IOException {
+    // Tests that the reader correctly snaps to multiples of the given granularity
+    // (note: this is testing test code), and that getFractionConsumed works sensibly
+    // in the face of that.
+    PipelineOptions options = PipelineOptionsFactory.create();
+    CoarseByteRangeSource source = new CoarseByteRangeSource(13, 35, 1, 10);
+    BoundedSource.BoundedReader<Integer> reader = source.createReader(options, null);
+    List<Integer> items = new ArrayList<>();
+
+    assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
+    assertTrue(reader.start());
+    do {
+      Double fraction = reader.getFractionConsumed();
+      assertNotNull(fraction);
+      assertTrue(fraction.toString(), fraction > 0.0);
+      assertTrue(fraction.toString(), fraction <= 1.0);
+      items.add(reader.getCurrent());
+    } while (reader.advance());
+    assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
+
+    assertEquals(20, items.size());
+    assertEquals(20, items.get(0).intValue());
+    assertEquals(39, items.get(items.size() - 1).intValue());
+
+    source = new CoarseByteRangeSource(13, 17, 1, 10);
+    reader = source.createReader(options, null);
+    assertFalse(reader.start());
+  }
+
+  @Test
+  public void testSplitAtFraction() throws IOException {
+    PipelineOptions options = PipelineOptionsFactory.create();
+    CoarseByteRangeSource source = new CoarseByteRangeSource(13, 35, 1, 10);
+    CoarseByteRangeReader reader =
+        (CoarseByteRangeReader) source.createReader(options, null);
+    List<Integer> originalItems = new ArrayList<>();
+    assertTrue(reader.start());
+    originalItems.add(reader.getCurrent());
+    assertTrue(reader.advance());
+    originalItems.add(reader.getCurrent());
+    assertTrue(reader.advance());
+    originalItems.add(reader.getCurrent());
+    assertTrue(reader.advance());
+    originalItems.add(reader.getCurrent());
+    assertNull(reader.splitAtFraction(0.0));
+    assertNull(reader.splitAtFraction(reader.getFractionConsumed()));
+
+    Source<Integer> residual = reader.splitAtFraction(reader.getFractionConsumed() + 0.1);
+    Source<Integer> primary = reader.getCurrentSource();
+    List<Integer> primaryItems = readFromSource(primary);
+    List<Integer> residualItems = readFromSource(residual);
+    for (Integer item : residualItems) {
+      assertTrue(item > reader.getCurrentOffset());
+    }
+    assertFalse(primaryItems.isEmpty());
+    assertFalse(residualItems.isEmpty());
+    assertTrue(primaryItems.get(primaryItems.size() - 1) <= residualItems.get(0));
+
+    while (reader.advance()) {
+      originalItems.add(reader.getCurrent());
+    }
+    assertEquals(originalItems, primaryItems);
+  }
+
+  @Test
+  public void testSplitAtFractionExhaustive() throws IOException {
+    CoarseByteRangeSource original = new CoarseByteRangeSource(13, 35, 1, 10);
+    int maxItems = readFromSource(original).size();
+    for (int numItems = 0; numItems <= maxItems; ++numItems) {
+      for (double splitFraction = 0.0; splitFraction < 1.1; splitFraction += 0.01) {
+        assertSplitAtFractionBehavior(
+            original, numItems, splitFraction, MUST_BE_CONSISTENT_IF_SUCCEEDS);
+      }
+    }
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index e0f1cec5e5399..c3e368de20890 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -14,6 +14,9 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
+import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionFails;
+import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent;
+import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.readFromSource;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
@@ -24,7 +27,6 @@
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.io.FileBasedSource.FileBasedReader;
 import com.google.cloud.dataflow.sdk.io.FileBasedSource.Mode;
-import com.google.cloud.dataflow.sdk.io.Source.Reader;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
@@ -83,7 +85,6 @@ class TestFileBasedSource extends FileBasedSource<String> {
 
     private static final long serialVersionUID = 85539251;
 
-    ReadableByteChannel channel = null;
     final String splitHeader;
 
     public TestFileBasedSource(boolean isFilePattern, String fileOrPattern, long minBundleSize,
@@ -174,7 +175,7 @@ private int readNextLine(ByteArrayOutputStream out) throws IOException {
     @Override
     protected void startReading(ReadableByteChannel channel) throws IOException {
       boolean removeLine = false;
-      if (getSource().getMode() == Mode.SUBRANGE_OF_SINGLE_FILE) {
+      if (getCurrentSource().getMode() == Mode.SUBRANGE_OF_SINGLE_FILE) {
         SeekableByteChannel seekChannel = (SeekableByteChannel) channel;
         // If we are not at the beginning of a line, we should ignore the current line.
         if (seekChannel.position() > 0) {
@@ -188,7 +189,7 @@ protected void startReading(ReadableByteChannel channel) throws IOException {
       if (removeLine) {
         nextOffset += readNextLine(new ByteArrayOutputStream());
       }
-      if (nextOffset >= getSource().getEndOffset()) {
+      if (nextOffset >= getCurrentSource().getEndOffset()) {
         emptyBundle = true;
       }
     }
@@ -267,7 +268,7 @@ protected void startReading(ReadableByteChannel channel) throws IOException {
         }
         current = super.getCurrent();
       }
-      if (currentOffset >= getSource().getEndOffset()) {
+      if (currentOffset >= getCurrentSource().getEndOffset()) {
         emptyBundle = true;
       }
     }
@@ -335,14 +336,6 @@ public List<String> createStringDataset(int dataItemLength, int numItems) {
     return list;
   }
 
-  private List<String> readEverythingFromReader(Reader<String> reader) throws IOException {
-    List<String> results = new ArrayList<String>();
-    for (boolean available = reader.start(); available; available = reader.advance()) {
-      results.add(reader.getCurrent());
-    }
-    return results;
-  }
-
   @Test
   public void testFullyReadSingleFile() throws IOException {
     List<String> data = createStringDataset(3, 50);
@@ -351,8 +344,7 @@ public void testFullyReadSingleFile() throws IOException {
     File file = createFileWithData(fileName, data);
 
     TestFileBasedSource source = new TestFileBasedSource(false, file.getPath(), 64, null);
-    assertThat(data, containsInAnyOrder(
-        readEverythingFromReader(source.createReader(null, null)).toArray()));
+    assertEquals(data, readFromSource(source));
   }
 
   @Test
@@ -375,8 +367,37 @@ public void testFullyReadFilePattern() throws IOException {
     expectedResults.addAll(data1);
     expectedResults.addAll(data2);
     expectedResults.addAll(data3);
-    assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createReader(null, null)).toArray()));
+    assertThat(expectedResults, containsInAnyOrder(readFromSource(source).toArray()));
+  }
+
+  @Test
+  public void testFractionConsumedWhenReadingFilepattern() throws IOException {
+    List<String> data1 = createStringDataset(3, 1000);
+    File file1 = createFileWithData("file1", data1);
+
+    List<String> data2 = createStringDataset(3, 1000);
+    createFileWithData("file2", data2);
+
+    List<String> data3 = createStringDataset(3, 1000);
+    createFileWithData("file3", data3);
+
+    TestFileBasedSource source =
+        new TestFileBasedSource(true, file1.getParent() + "/" + "file*", 1024, null);
+    BoundedSource.BoundedReader<String> reader = source.createReader(null, null);
+    double lastFractionConsumed = 0.0;
+    assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
+    assertTrue(reader.start());
+    assertTrue(reader.advance());
+    assertTrue(reader.advance());
+    // We're inside the first file. Should be in [0, 1/3).
+    assertTrue(reader.getFractionConsumed() > 0.0);
+    assertTrue(reader.getFractionConsumed() < 1.0 / 3.0);
+    while (reader.advance()) {
+      double fractionConsumed = reader.getFractionConsumed();
+      assertTrue(fractionConsumed > lastFractionConsumed);
+      lastFractionConsumed = fractionConsumed;
+    }
+    assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
   }
 
   @Test
@@ -405,8 +426,7 @@ public void testFullyReadFilePatternFirstRecordEmpty() throws IOException {
     List<String> expectedResults = new ArrayList<String>();
     expectedResults.addAll(data2);
     expectedResults.addAll(data3);
-    assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createReader(null, null)).toArray()));
+    assertThat(expectedResults, containsInAnyOrder(readFromSource(source).toArray()));
   }
 
   @Test
@@ -421,8 +441,8 @@ public void testReadRangeAtStart() throws IOException {
         new TestFileBasedSource(file.getPath(), 64, 25, Long.MAX_VALUE, null);
 
     List<String> results = new ArrayList<String>();
-    results.addAll(readEverythingFromReader(source1.createReader(null, null)));
-    results.addAll(readEverythingFromReader(source2.createReader(null, null)));
+    results.addAll(readFromSource(source1));
+    results.addAll(readFromSource(source2));
     assertThat(data, containsInAnyOrder(results.toArray()));
   }
 
@@ -445,8 +465,7 @@ public void testReadEverythingFromFileWithSplits() throws IOException {
     // Remove all occurrences of header from expected results.
     expectedResults.removeAll(Arrays.asList(header));
 
-    assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createReader(null, null)).toArray()));
+    assertEquals(expectedResults, readFromSource(source));
   }
 
   @Test
@@ -470,8 +489,8 @@ public void testReadRangeFromFileWithSplitsFromStart() throws IOException {
     expectedResults.removeAll(Arrays.asList(header));
 
     List<String> results = new ArrayList<>();
-    results.addAll(readEverythingFromReader(source1.createReader(null, null)));
-    results.addAll(readEverythingFromReader(source2.createReader(null, null)));
+    results.addAll(readFromSource(source1));
+    results.addAll(readFromSource(source2));
 
     assertThat(expectedResults, containsInAnyOrder(results.toArray()));
   }
@@ -499,9 +518,9 @@ public void testReadRangeFromFileWithSplitsFromMiddle() throws IOException {
     expectedResults.removeAll(Arrays.asList(header));
 
     List<String> results = new ArrayList<>();
-    results.addAll(readEverythingFromReader(source1.createReader(null, null)));
-    results.addAll(readEverythingFromReader(source2.createReader(null, null)));
-    results.addAll(readEverythingFromReader(source3.createReader(null, null)));
+    results.addAll(readFromSource(source1));
+    results.addAll(readFromSource(source2));
+    results.addAll(readFromSource(source3));
 
     assertThat(expectedResults, containsInAnyOrder(results.toArray()));
   }
@@ -529,9 +548,9 @@ public void testReadFileWithSplitsWithEmptyRange() throws IOException {
     expectedResults.removeAll(Arrays.asList(header));
 
     List<String> results = new ArrayList<>();
-    results.addAll(readEverythingFromReader(source1.createReader(null, null)));
-    results.addAll(readEverythingFromReader(source2.createReader(null, null)));
-    results.addAll(readEverythingFromReader(source3.createReader(null, null)));
+    results.addAll(readFromSource(source1));
+    results.addAll(readFromSource(source2));
+    results.addAll(readFromSource(source3));
 
     assertThat(expectedResults, containsInAnyOrder(results.toArray()));
   }
@@ -555,18 +574,15 @@ public void testReadRangeFromFileWithSplitsFromMiddleOfHeader() throws IOExcepti
     // Split starts after "<" of the header
     TestFileBasedSource source =
         new TestFileBasedSource(file.getPath(), 64, 1, Long.MAX_VALUE, header);
-    assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createReader(null, null)).toArray()));
+    assertThat(expectedResults, containsInAnyOrder(readFromSource(source).toArray()));
 
     // Split starts after "<h" of the header
     source = new TestFileBasedSource(file.getPath(), 64, 2, Long.MAX_VALUE, header);
-    assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createReader(null, null)).toArray()));
+    assertThat(expectedResults, containsInAnyOrder(readFromSource(source).toArray()));
 
     // Split starts after "<h>" of the header
     source = new TestFileBasedSource(file.getPath(), 64, 3, Long.MAX_VALUE, header);
-    assertThat(expectedResults, containsInAnyOrder(
-        readEverythingFromReader(source.createReader(null, null)).toArray()));
+    assertThat(expectedResults, containsInAnyOrder(readFromSource(source).toArray()));
   }
 
   @Test
@@ -581,9 +597,9 @@ public void testReadRangeAtMiddle() throws IOException {
         new TestFileBasedSource(file.getPath(), 64, 72, Long.MAX_VALUE, null);
 
     List<String> results = new ArrayList<>();
-    results.addAll(readEverythingFromReader(source1.createReader(null, null)));
-    results.addAll(readEverythingFromReader(source2.createReader(null, null)));
-    results.addAll(readEverythingFromReader(source3.createReader(null, null)));
+    results.addAll(readFromSource(source1));
+    results.addAll(readFromSource(source2));
+    results.addAll(readFromSource(source3));
 
     assertThat(data, containsInAnyOrder(results.toArray()));
   }
@@ -600,8 +616,9 @@ public void testReadRangeAtEnd() throws IOException {
         new TestFileBasedSource(file.getPath(), 1024, 162, Long.MAX_VALUE, null);
 
     List<String> results = new ArrayList<>();
-    results.addAll(readEverythingFromReader(source1.createReader(null, null)));
-    results.addAll(readEverythingFromReader(source2.createReader(null, null)));
+    results.addAll(readFromSource(source1));
+    results.addAll(readFromSource(source2));
+
     assertThat(data, containsInAnyOrder(results.toArray()));
   }
 
@@ -621,7 +638,7 @@ public void testReadAllSplitsOfSingleFile() throws Exception {
 
     List<String> results = new ArrayList<String>();
     for (Source<String> split : sources) {
-      results.addAll(readEverythingFromReader(split.createReader(null, null)));
+      results.addAll(readFromSource(split));
     }
 
     assertThat(data, containsInAnyOrder(results.toArray()));
@@ -754,7 +771,7 @@ public void testReadAllSplitsOfFilePattern() throws Exception {
 
     List<String> results = new ArrayList<String>();
     for (Source<String> split : sources) {
-      results.addAll(readEverythingFromReader(split.createReader(null, null)));
+      results.addAll(readFromSource(split));
     }
 
     List<String> expectedResults = new ArrayList<String>();
@@ -764,4 +781,19 @@ public void testReadAllSplitsOfFilePattern() throws Exception {
 
     assertThat(expectedResults, containsInAnyOrder(results.toArray()));
   }
+
+  @Test
+  public void testSplitAtFraction() throws IOException {
+    File file = createFileWithData("file", createStringDataset(3, 100));
+
+    TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 1, 0, 400, null);
+    assertSplitAtFractionSucceedsAndConsistent(source, 0, 0.7);
+    assertSplitAtFractionSucceedsAndConsistent(source, 1, 0.7);
+    assertSplitAtFractionSucceedsAndConsistent(source, 30, 0.7);
+    assertSplitAtFractionFails(source, 0, 0.0);
+    assertSplitAtFractionFails(source, 70, 0.3);
+    assertSplitAtFractionFails(source, 100, 1.0);
+    assertSplitAtFractionFails(source, 100, 0.99);
+    assertSplitAtFractionSucceedsAndConsistent(source, 100, 0.995);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
new file mode 100644
index 0000000000000..60b21bea89b02
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Utilities for testing {@link Source} classes.
+ */
+public class SourceTestUtils {
+  /**
+   * Reads all elements from the given {@link Source}.
+   */
+  public static <T> List<T> readFromSource(Source<T> source) throws IOException {
+    return readFromUnstartedReader(source.createReader(
+        PipelineOptionsFactory.create(), null));
+  }
+
+  /**
+   * Reads all elements from the given unstarted {@link Source.Reader}.
+   */
+  public static <T> List<T> readFromUnstartedReader(Source.Reader<T> reader) throws IOException {
+    List<T> res = new ArrayList<>();
+    for (boolean more = reader.start(); more; more = reader.advance()) {
+      res.add(reader.getCurrent());
+    }
+    return res;
+  }
+
+  public static <T> List<T> readFromStartedReader(Source.Reader<T> reader) throws IOException {
+    List<T> res = new ArrayList<>();
+    while (reader.advance()) {
+      res.add(reader.getCurrent());
+    }
+    return res;
+  }
+
+  public static <T> List<T> readNItemsFromUnstartedReader(Source.Reader<T> reader, int n)
+      throws IOException {
+    List<T> res = new ArrayList<>();
+    for (int i = 0; i < n; ++i) {
+      assertTrue((i == 0) ? reader.start() : reader.advance());
+      res.add(reader.getCurrent());
+    }
+    return res;
+  }
+
+  /**
+   * Expected outcome of
+   * {@link com.google.cloud.dataflow.sdk.io.BoundedSource.BoundedReader#splitAtFraction}.
+   */
+  public enum ExpectedSplitOutcome {
+    /**
+     * The operation must succeed and the results must be consistent.
+     */
+    MUST_SUCCEED_AND_BE_CONSISTENT,
+    /**
+     * The operation must fail (return {@code null}).
+     */
+    MUST_FAIL,
+    /**
+     * The operation must either fail, or succeed and the results be consistent.
+     */
+    MUST_BE_CONSISTENT_IF_SUCCEEDS
+  }
+
+  /**
+   * Asserts that the {@code source}'s reader either fails to {@code splitAtFraction(fraction)}
+   * after reading {@code numItemsToReadBeforeSplit} items, or succeeds in a way that is
+   * consistent according to {@link #assertSplitAtFractionSucceedsAndConsistent}.
+   */
+  public static <T> void assertSplitAtFractionBehavior(
+      BoundedSource<T> source, int numItemsToReadBeforeSplit, double splitFraction,
+      ExpectedSplitOutcome expectedOutcome) throws IOException {
+    PipelineOptions options = PipelineOptionsFactory.create();
+    List<T> expectedItems = readFromSource(source);
+    BoundedSource.BoundedReader<T> reader = source.createReader(options, null);
+    List<T> currentItems = new ArrayList<>();
+    currentItems.addAll(readNItemsFromUnstartedReader(reader, numItemsToReadBeforeSplit));
+    BoundedSource<T> residual = reader.splitAtFraction(splitFraction);
+    // Failure cases are: must succeed but fails; must fail but succeeds.
+    switch(expectedOutcome) {
+      case MUST_SUCCEED_AND_BE_CONSISTENT:
+        assertNotNull(
+            "Failed to split reader of source: " + source + " at " + splitFraction
+                + " after reading " + numItemsToReadBeforeSplit + " items", residual);
+        break;
+      case MUST_FAIL:
+        assertEquals(null, residual);
+        break;
+      case MUST_BE_CONSISTENT_IF_SUCCEEDS:
+        // Nothing.
+        break;
+    }
+    if (residual != null) {
+      BoundedSource<T> primary = reader.getCurrentSource();
+      List<T> primaryItems = readFromSource(primary);
+      List<T> residualItems = readFromSource(residual);
+      List<T> totalItems = new ArrayList<>();
+      totalItems.addAll(primaryItems);
+      totalItems.addAll(residualItems);
+      currentItems.addAll(numItemsToReadBeforeSplit > 0
+          ? readFromStartedReader(reader) : readFromUnstartedReader(reader));
+      assertEquals(
+          "Continued reading after split yielded different items than primary source: "
+            + " split at " + splitFraction + " after reading " + numItemsToReadBeforeSplit
+            + " items, original source: " + source + ", primary source: " + primary,
+          primaryItems, currentItems);
+      assertEquals(
+          "Items in primary and residual sources after split do not add up "
+            + "to items in the original source. "
+            + "Split at " + splitFraction + " after reading " + numItemsToReadBeforeSplit
+            + " items; original source: " + source + ", primary: " + primary
+            + ", residual: " + residual,
+          expectedItems, totalItems);
+    }
+  }
+
+  /**
+   * Verifies some consistency properties of
+   * {@link BoundedSource.BoundedReader#splitAtFraction} on the given source. Equivalent to
+   * the following pseudocode:
+   * <pre>
+   *   Reader reader = source.createReader();
+   *   read N items from reader;
+   *   Source residual = reader.splitAtFraction(splitFraction);
+   *   Source primary = reader.getCurrentSource();
+   *   assert: items in primary == items we read so far
+   *                               + items we'll get by continuing to read from reader;
+   *   assert: items in original source == items in primary + items in residual
+   * </pre>
+   */
+  public static <T> void assertSplitAtFractionSucceedsAndConsistent(
+      BoundedSource<T> source, int numItemsToReadBeforeSplit, double splitFraction)
+      throws IOException {
+    assertSplitAtFractionBehavior(
+        source, numItemsToReadBeforeSplit, splitFraction,
+        ExpectedSplitOutcome.MUST_SUCCEED_AND_BE_CONSISTENT);
+  }
+
+  /**
+   * Asserts that the {@code source}'s reader fails to {@code splitAtFraction(fraction)}
+   * after reading {@code numItemsToReadBeforeSplit} items.
+   */
+  public static <T> void assertSplitAtFractionFails(
+      BoundedSource<T> source, int numItemsToReadBeforeSplit, double splitFraction)
+      throws IOException {
+    assertSplitAtFractionBehavior(
+        source, numItemsToReadBeforeSplit, splitFraction,
+        ExpectedSplitOutcome.MUST_FAIL);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 2041f12937ddc..510334472c170 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -16,8 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.runners.dataflow;
 
+import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.readFromSource;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtFraction;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceOperationRequestToSourceOperationRequest;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.dictionaryToCloudSource;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceOperationResponseToCloudSourceOperationResponse;
 import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
 import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
@@ -28,6 +31,9 @@
 import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.containsString;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 import static org.junit.internal.matchers.ThrowableMessageMatcher.hasMessage;
@@ -42,17 +48,20 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.io.BoundedSource;
 import com.google.cloud.dataflow.sdk.io.ReadSource;
 import com.google.cloud.dataflow.sdk.io.Source;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
+import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
 import com.google.common.base.Preconditions;
 
@@ -84,7 +93,7 @@ public static Read fromRange(int from, int to) {
       return new Read(from, to);
     }
 
-    static class Read extends Source<Integer> {
+    static class Read extends BoundedSource<Integer> {
       private static final long serialVersionUID = 0;
 
       final int from;
@@ -118,7 +127,7 @@ public boolean producesSortedKeys(PipelineOptions options) throws Exception {
       }
 
       @Override
-      public Reader<Integer> createReader(
+      public BoundedReader<Integer> createReader(
           PipelineOptions options, @Nullable ExecutionContext executionContext) throws IOException {
         return new RangeReader(this);
       }
@@ -136,32 +145,74 @@ public Coder<Integer> getDefaultOutputCoder() {
         return BigEndianIntegerCoder.of();
       }
 
-      private class RangeReader implements Reader<Integer> {
-        private int current;
+      private static class RangeReader implements BoundedReader<Integer> {
+        // To verify that BasicSerializableSourceFormat calls our methods according to protocol.
+        enum State {
+          UNSTARTED,
+          STARTED,
+          FINISHED
+        }
+        private Read source;
+        private int current = -1;
+        private State state = State.UNSTARTED;
 
         public RangeReader(Read source) {
-          this.current = source.from;
+          this.source = source;
         }
 
         @Override
         public boolean start() throws IOException {
-          return (current < to);
+          Preconditions.checkState(state == State.UNSTARTED);
+          state = State.STARTED;
+          current = source.from;
+          return (current < source.to);
         }
 
         @Override
         public boolean advance() throws IOException {
+          Preconditions.checkState(state == State.STARTED);
+          if (current == source.to - 1) {
+            state = State.FINISHED;
+            return false;
+          }
           current++;
-          return (current < to);
+          return true;
         }
 
         @Override
         public Integer getCurrent() {
+          Preconditions.checkState(state == State.STARTED);
           return current;
         }
 
         @Override
         public void close() throws IOException {
-          // Nothing
+          Preconditions.checkState(state == State.STARTED || state == State.FINISHED);
+          state = State.FINISHED;
+        }
+
+        @Override
+        public Read getCurrentSource() {
+          return source;
+        }
+
+        @Override
+        public Read splitAtFraction(double fraction) {
+          int proposedIndex = (int) (source.from + fraction * (source.to - source.from));
+          if (proposedIndex <= current) {
+            return null;
+          }
+          Read primary = new Read(source.from, proposedIndex);
+          Read residual = new Read(proposedIndex, source.to);
+          this.source = primary;
+          return residual;
+        }
+
+        @Override
+        public Double getFractionConsumed() {
+          return (current == -1)
+              ? 0.0
+              : (1.0 * (1 + current - source.from) / (source.to - source.from));
         }
       }
     }
@@ -194,6 +245,90 @@ public void testSplitAndReadBundlesBack() throws Exception {
     }
   }
 
+  @Test
+  public void testRangeProgressAndSplitAtFraction() throws Exception {
+    // Show basic usage of getFractionConsumed and splitAtFraction.
+    // This test only tests TestIO itself, not BasicSerializableSourceFormat.
+
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    TestIO.Read source = TestIO.fromRange(10, 20);
+    BoundedSource.BoundedReader<Integer> reader = source.createReader(options, null);
+    assertEquals(0, reader.getFractionConsumed().intValue());
+    assertTrue(reader.start());
+    assertEquals(0.1, reader.getFractionConsumed(), 1e-6);
+    assertTrue(reader.advance());
+    assertEquals(0.2, reader.getFractionConsumed(), 1e-6);
+    // Already past 0.0 and 0.1.
+    assertNull(reader.splitAtFraction(0.0));
+    assertNull(reader.splitAtFraction(0.1));
+
+    {
+      TestIO.Read residual = (TestIO.Read) reader.splitAtFraction(0.5);
+      assertNotNull(residual);
+      TestIO.Read primary = (TestIO.Read) reader.getCurrentSource();
+      assertThat(readFromSource(primary), contains(10, 11, 12, 13, 14));
+      assertThat(readFromSource(residual), contains(15, 16, 17, 18, 19));
+    }
+
+    // Range is now [10, 15) and we are at 12.
+    {
+      TestIO.Read residual = (TestIO.Read) reader.splitAtFraction(0.8);  // give up 14.
+      assertNotNull(residual);
+      TestIO.Read primary = (TestIO.Read) reader.getCurrentSource();
+      assertThat(readFromSource(primary), contains(10, 11, 12, 13));
+      assertThat(readFromSource(residual), contains(14));
+    }
+
+    assertTrue(reader.advance());
+    assertEquals(12, reader.getCurrent().intValue());
+    assertTrue(reader.advance());
+    assertEquals(13, reader.getCurrent().intValue());
+    assertFalse(reader.advance());
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testProgressAndSourceSplitTranslation() throws Exception {
+    // Same as previous test, but now using BasicSerializableSourceFormat wrappers.
+    // We know that the underlying reader behaves correctly (because of the previous test),
+    // now check that we are wrapping it correctly.
+    DataflowPipelineOptions options = PipelineOptionsFactory.create()
+        .as(DataflowPipelineOptions.class);
+    Reader<WindowedValue<Integer>> reader = ReaderFactory.create(
+        options, translateIOToCloudSource(TestIO.fromRange(10, 20), options), null);
+    Reader.ReaderIterator<WindowedValue<Integer>> iterator = reader.iterator();
+    assertTrue(iterator.hasNext());
+    assertEquals(0, readerProgressToCloudProgress(
+        iterator.getProgress()).getPercentComplete().intValue());
+    assertEquals(valueInGlobalWindow(10), iterator.next());
+    assertEquals(0.1, readerProgressToCloudProgress(
+        iterator.getProgress()).getPercentComplete().doubleValue(), 1e-6);
+    assertEquals(valueInGlobalWindow(11), iterator.next());
+    assertEquals(0.2, readerProgressToCloudProgress(
+        iterator.getProgress()).getPercentComplete().doubleValue(), 1e-6);
+    assertEquals(valueInGlobalWindow(12), iterator.next());
+
+    assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(0)));
+    assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(0.1f)));
+    BasicSerializableSourceFormat.SourceSplit<Integer> sourceSplit =
+        (BasicSerializableSourceFormat.SourceSplit<Integer>) iterator.requestDynamicSplit(
+            splitRequestAtFraction(0.5f));
+    assertNotNull(sourceSplit);
+    assertThat(readFromSource(sourceSplit.primary), contains(10, 11, 12, 13, 14));
+    assertThat(readFromSource(sourceSplit.residual), contains(15, 16, 17, 18, 19));
+
+    sourceSplit = (BasicSerializableSourceFormat.SourceSplit<Integer>) iterator.requestDynamicSplit(
+        splitRequestAtFraction(0.8f));
+    assertNotNull(sourceSplit);
+    assertThat(readFromSource(sourceSplit.primary), contains(10, 11, 12, 13));
+    assertThat(readFromSource(sourceSplit.residual), contains(14));
+
+    assertTrue(iterator.hasNext());
+    assertEquals(valueInGlobalWindow(13), iterator.next());
+    assertFalse(iterator.hasNext());
+  }
+
   /**
    * A source that cannot do anything. Intended to be overridden for testing of individual methods.
    */
@@ -206,16 +341,6 @@ public List<? extends Source<Integer>> splitIntoBundles(
       return Arrays.asList(this);
     }
 
-    @Override
-    public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
-      return 0L;
-    }
-
-    @Override
-    public boolean producesSortedKeys(PipelineOptions options) throws Exception {
-      return false;
-    }
-
     @Override
     public void validate() { }
 
@@ -278,6 +403,17 @@ public void testSplittingProducedInvalidSource() throws Exception {
   }
 
   private static class FailingReader implements Source.Reader<Integer> {
+    private Source<Integer> source;
+
+    private FailingReader(Source<Integer> source) {
+      this.source = source;
+    }
+
+    @Override
+    public Source<Integer> getCurrentSource() {
+      return source;
+    }
+
     @Override
     public boolean start() throws IOException {
       throw new IOException("Intentional error");
@@ -304,7 +440,7 @@ private static class SourceProducingFailingReader extends MockSource {
     public Reader<Integer> createReader(
         PipelineOptions options, @Nullable ExecutionContext executionContext)
         throws IOException {
-      return new FailingReader();
+      return new FailingReader(this);
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java
index e0fcf32c70a5b..8b7f1fb80f2df 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java
@@ -49,9 +49,7 @@
  */
 @RunWith(JUnit4.class)
 public class DatastoreReaderTest {
-  private static final String TEST_HOST = "http://localhost:8080";
   private static final String TEST_KIND = "mykind";
-  private static final String TEST_DATASET = "mydataset";
   private static final String TEST_PROPERTY = "myproperty";
 
   private static class IsValidRequest extends ArgumentMatcher<RunQueryRequest> {
@@ -62,9 +60,9 @@ public boolean matches(Object o) {
     }
   }
 
-  private EntityResult createEntityResult(String kind, String val) {
-    Entity entity = Entity.newBuilder().addProperty(
-        makeProperty(TEST_PROPERTY, makeValue(val))).build();
+  private EntityResult createEntityResult(String val) {
+    Entity entity =
+        Entity.newBuilder().addProperty(makeProperty(TEST_PROPERTY, makeValue(val))).build();
     return EntityResult.newBuilder().setEntity(entity).build();
   }
 
@@ -75,11 +73,11 @@ private Datastore buildMockDatastore() throws DatastoreException {
     RunQueryResponse.Builder thirdResponseBuilder = RunQueryResponse.newBuilder();
     {
       QueryResultBatch.Builder resultsBatch = QueryResultBatch.newBuilder();
-      resultsBatch.addEntityResult(0, createEntityResult(TEST_KIND, "val0"));
-      resultsBatch.addEntityResult(1, createEntityResult(TEST_KIND, "val1"));
-      resultsBatch.addEntityResult(2, createEntityResult(TEST_KIND, "val2"));
-      resultsBatch.addEntityResult(3, createEntityResult(TEST_KIND, "val3"));
-      resultsBatch.addEntityResult(4, createEntityResult(TEST_KIND, "val4"));
+      resultsBatch.addEntityResult(0, createEntityResult("val0"));
+      resultsBatch.addEntityResult(1, createEntityResult("val1"));
+      resultsBatch.addEntityResult(2, createEntityResult("val2"));
+      resultsBatch.addEntityResult(3, createEntityResult("val3"));
+      resultsBatch.addEntityResult(4, createEntityResult("val4"));
       resultsBatch.setEntityResultType(ResultType.FULL);
 
       resultsBatch.setMoreResults(MoreResultsType.NOT_FINISHED);
@@ -88,11 +86,11 @@ private Datastore buildMockDatastore() throws DatastoreException {
     }
     {
       QueryResultBatch.Builder resultsBatch = QueryResultBatch.newBuilder();
-      resultsBatch.addEntityResult(0, createEntityResult(TEST_KIND, "val5"));
-      resultsBatch.addEntityResult(1, createEntityResult(TEST_KIND, "val6"));
-      resultsBatch.addEntityResult(2, createEntityResult(TEST_KIND, "val7"));
-      resultsBatch.addEntityResult(3, createEntityResult(TEST_KIND, "val8"));
-      resultsBatch.addEntityResult(4, createEntityResult(TEST_KIND, "val9"));
+      resultsBatch.addEntityResult(0, createEntityResult("val5"));
+      resultsBatch.addEntityResult(1, createEntityResult("val6"));
+      resultsBatch.addEntityResult(2, createEntityResult("val7"));
+      resultsBatch.addEntityResult(3, createEntityResult("val8"));
+      resultsBatch.addEntityResult(4, createEntityResult("val9"));
       resultsBatch.setEntityResultType(ResultType.FULL);
 
       resultsBatch.setMoreResults(MoreResultsType.NOT_FINISHED);
@@ -123,7 +121,8 @@ public void testRead() throws Exception {
     q.addKindBuilder().setName(TEST_KIND);
     Query query = q.build();
 
-    DatastoreIO.DatastoreReader iterator = new DatastoreIO.DatastoreReader(query, datastore);
+    DatastoreIO.DatastoreReader iterator =
+        new DatastoreIO.DatastoreReader(DatastoreIO.read().withQuery(query), datastore);
 
     List<Entity> entityResults = new ArrayList<Entity>();
     while (iterator.advance()) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
index 85bfc4807be80..76c4a169c193b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
@@ -50,6 +50,10 @@ public static ApproximateProgress approximateProgressAtByteOffset(@Nullable Long
     return approximateProgressAtPosition(positionAtByteOffset(byteOffset));
   }
 
+  public static ApproximateProgress approximateProgressAtFraction(@Nullable Float fraction) {
+    return new ApproximateProgress().setPercentComplete(fraction);
+  }
+
   public static Reader.DynamicSplitRequest splitRequestAtPosition(@Nullable Position position) {
     return toDynamicSplitRequest(approximateProgressAtPosition(position));
   }
@@ -70,4 +74,8 @@ public static Position positionFromSplitResult(Reader.DynamicSplitResult dynamic
   public static Position positionFromProgress(Reader.Progress progress) {
     return readerProgressToCloudProgress(progress).getPosition();
   }
+
+  public static Reader.DynamicSplitRequest splitRequestAtFraction(float fraction) {
+    return toDynamicSplitRequest(approximateProgressAtFraction(fraction));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
index dc112f5b4775a..ae33ff3c2ee89 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
@@ -374,7 +374,7 @@ public void testNonStringCoders() throws Exception {
   }
 
   @Test
-  public void testGetApproximatePosition() throws Exception {
+  public void testGetProgressNoEndOffset() throws Exception {
     File tmpFile = initTestFile();
     TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 0L, null,
         StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
@@ -388,6 +388,9 @@ public void testGetApproximatePosition() throws Exception {
       iterator.next();
       progress = readerProgressToCloudProgress(iterator.getProgress());
       assertEquals(24L, progress.getPosition().getByteOffset().longValue());
+      // Since end position is not specified, percentComplete should be null.
+      assertNull(progress.getPercentComplete());
+
       iterator.next();
       progress = readerProgressToCloudProgress(iterator.getProgress());
       assertEquals(34L, progress.getPosition().getByteOffset().longValue());
@@ -395,6 +398,25 @@ public void testGetApproximatePosition() throws Exception {
     }
   }
 
+  @Test
+  public void testGetProgressWithEndOffset() throws Exception {
+    File tmpFile = initTestFile();
+    TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 0L, 40L,
+        StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
+
+    try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
+      ApproximateProgress progress = readerProgressToCloudProgress(iterator.getProgress());
+      iterator.next();
+      progress = readerProgressToCloudProgress(iterator.getProgress());
+      assertEquals(1.0f * 11 / 40, progress.getPercentComplete(), 1e-6);
+      iterator.next();
+      iterator.next();
+      progress = readerProgressToCloudProgress(iterator.getProgress());
+      assertEquals(1.0f * 34 / 40, progress.getPercentComplete(), 1e-6);
+      assertFalse(iterator.hasNext());
+    }
+  }
+
   @Test
   public void testUpdateStopPosition() throws Exception {
     final long end = 10L; // in the first line

From 0748be11fa750793a3e90ad4495bb149aedba8ad Mon Sep 17 00:00:00 2001
From: malo <malo@google.com>
Date: Thu, 26 Mar 2015 17:24:54 -0700
Subject: [PATCH 0320/1541] Make the WorkProgressUpdater follow the hints from
 the service response. ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=89652620

---
 .../sdk/runners/worker/DataflowWorkProgressUpdater.java         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
index 671c72391fa4f..be4f18b2d33b8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
@@ -96,7 +96,7 @@ protected void reportProgressHelper() throws Exception {
       nextReportIndex++;
 
       progressReportIntervalMs = nextProgressReportInterval(
-          getWorkUnitSuggestedReportingInterval(),
+          fromCloudDuration(result.getReportStatusInterval()).getMillis(),
           leaseRemainingTime(getLeaseExpirationTimestamp(result)));
 
       ApproximateProgress suggestedStopPoint = result.getSuggestedStopPoint();

From a04f59bce5c96f1aa51e54486e23ad31d86c88b0 Mon Sep 17 00:00:00 2001
From: chernyak <chernyak@google.com>
Date: Fri, 27 Mar 2015 10:55:11 -0700
Subject: [PATCH 0321/1541] Support for real-time timers in addition to
 watermark timers.

----Release Notes----
Adds "type" field to the Timer API to distinguish watermark and real-time timers.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89708645
---
 sdk/src/main/proto/windmill.proto | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index 76b17a51dd6b3..1b25758fb328d 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -38,6 +38,11 @@ message Message {
 message Timer {
   required bytes tag = 1;
   optional int64 timestamp = 2 [default=-0x8000000000000000];
+  enum Type {
+    WATERMARK = 0;
+    REALTIME = 1;
+  }
+  optional Type type = 3 [default = WATERMARK];
 }
 
 message InputMessageBundle {

From 6f53ba5c5f05eca07443818a5ab82c22cbc1b460 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 27 Mar 2015 11:16:17 -0700
Subject: [PATCH 0322/1541] Rename Combine.perElement().withHotKeys() to
 Combine.perElement().withHotKeyFanout()

----Release Notes----
- Renames Combine.perElement().withHotKeys() to Combine.perElement().withHotKeyFanout()
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89710933
---
 .../cloud/dataflow/examples/AutoComplete.java |  4 +--
 .../dataflow/sdk/transforms/Combine.java      | 34 +++++++++----------
 .../dataflow/sdk/transforms/CombineTest.java  |  8 ++---
 3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
index 19d6984a66b56..d26b3cdf6e821 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
@@ -163,10 +163,10 @@ public PCollection<KV<String, List<CompletionCandidate>>> apply(
 
         // Find and return the top candiates for each prefix.
         .apply(Top.<String, CompletionCandidate>largestPerKey(candidatesPerPrefix)
-               .withHotKeys(new HotKeySpread()));
+               .withHotKeyFanout(new HotKeyFanout()));
     }
 
-    private static class HotKeySpread implements SerializableFunction<String, Integer> {
+    private static class HotKeyFanout implements SerializableFunction<String, Integer> {
       private static final long serialVersionUID = 0;
 
       @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index a7aaea70a6c52..d8bdacdd330e3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -1376,24 +1376,24 @@ private PerKey(
      * combining transform that inserts an intermediate node to combine "hot"
      * keys partially before performing the full combine.
      *
-     * @param hotKeySpread a function from keys to an integer N, where the key
+     * @param hotKeyFanout a function from keys to an integer N, where the key
      * will be spread among N intermediate nodes for partial combining.
      * If N is less than or equal to 1, this key will not be sent through an
      * intermediate node.
      */
-    public PerKeyWithHotKeys<K, VI, VO> withHotKeys(
-        SerializableFunction<? super K, Integer> hotKeySpread) {
-      return new PerKeyWithHotKeys<K, VI, VO>(fn, hotKeySpread).withName(name);
+    public PerKeyWithHotKeyFanout<K, VI, VO> withHotKeyFanout(
+        SerializableFunction<? super K, Integer> hotKeyFanout) {
+      return new PerKeyWithHotKeyFanout<K, VI, VO>(fn, hotKeyFanout).withName(name);
     }
 
     /**
-     * Like {@link #withHotKeys(SerializableFunction)}, but returning the given
+     * Like {@link #withHotKeyFanout(SerializableFunction)}, but returning the given
      * constant value for every key.
      */
-    public PerKeyWithHotKeys<K, VI, VO> withHotKeys(final int hotKeySpread) {
-      return withHotKeys(
+    public PerKeyWithHotKeyFanout<K, VI, VO> withHotKeyFanout(final int hotKeyFanout) {
+      return withHotKeyFanout(
           new SerializableFunction<K, Integer>(){
-            @Override public Integer apply(K unused) { return hotKeySpread; }
+            @Override public Integer apply(K unused) { return hotKeyFanout; }
           });
     }
 
@@ -1426,23 +1426,23 @@ protected String getKindString() {
   /**
    * Like {@link PerKey}, but sharding the combining of hot keys.
    */
-  public static class PerKeyWithHotKeys<K, VI, VO>
+  public static class PerKeyWithHotKeyFanout<K, VI, VO>
       extends PTransform<PCollection<KV<K, VI>>, PCollection<KV<K, VO>>> {
 
     private final transient KeyedCombineFn<? super K, ? super VI, ?, VO> fn;
-    private final SerializableFunction<? super K, Integer> hotKeySpread;
+    private final SerializableFunction<? super K, Integer> hotKeyFanout;
 
-    private PerKeyWithHotKeys(
+    private PerKeyWithHotKeyFanout(
         KeyedCombineFn<? super K, ? super VI, ?, VO> fn,
-        SerializableFunction<? super K, Integer> hotKeySpread) {
+        SerializableFunction<? super K, Integer> hotKeyFanout) {
       this.fn = fn;
-      this.hotKeySpread = hotKeySpread;
+      this.hotKeyFanout = hotKeyFanout;
     }
 
     @Override
     @SuppressWarnings("unchecked")
-    public PerKeyWithHotKeys<K, VI, VO> withName(String name) {
-      return (PerKeyWithHotKeys<K, VI, VO>) super.withName(name);
+    public PerKeyWithHotKeyFanout<K, VI, VO> withName(String name) {
+      return (PerKeyWithHotKeyFanout<K, VI, VO>) super.withName(name);
     }
 
     @Override
@@ -1515,7 +1515,7 @@ public Coder<VO> getDefaultOutputCoder(
             }
       };
 
-      // Use the provided hotKeySpread fn to split into "hot" and "cold" keys,
+      // Use the provided hotKeyFanout fn to split into "hot" and "cold" keys,
       // augmenting the hot keys with a nonce.
       final TupleTag<KV<KV<K, Integer>, VI>> hot = new TupleTag<>();
       final TupleTag<KV<K, VI>> cold = new TupleTag<>();
@@ -1525,7 +1525,7 @@ public Coder<VO> getDefaultOutputCoder(
                      @Override
                      public void processElement(ProcessContext c) {
                        KV<K, VI> kv = c.element();
-                       int spread = hotKeySpread.apply(kv.getKey());
+                       int spread = hotKeyFanout.apply(kv.getKey());
                        if (spread <= 1) {
                          c.output(kv);
                        } else {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 0de560500f69d..13b875dff1174 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -315,7 +315,7 @@ counter.new Counter(8, 2, 0, 0),
         counter.new Counter(1, 1, 0, 0)));
   }
 
-  private static final SerializableFunction<String, Integer> hotKeySpread =
+  private static final SerializableFunction<String, Integer> hotKeyFanout =
       new SerializableFunction<String, Integer>() {
         @Override
         public Integer apply(String input) {
@@ -331,11 +331,11 @@ public void testHotKeyCombining() {
     KeyedCombineFn<String, Integer, ?, Double> mean =
         new MeanInts().<String>asKeyedFn();
     PCollection<KV<String, Double>> coldMean = input.apply(
-        Combine.perKey(mean).withHotKeys(0));
+        Combine.perKey(mean).withHotKeyFanout(0));
     PCollection<KV<String, Double>> warmMean = input.apply(
-        Combine.perKey(mean).withHotKeys(hotKeySpread));
+        Combine.perKey(mean).withHotKeyFanout(hotKeyFanout));
     PCollection<KV<String, Double>> hotMean = input.apply(
-        Combine.perKey(mean).withHotKeys(5));
+        Combine.perKey(mean).withHotKeyFanout(5));
 
     List<KV<String, Double>> expected = Arrays.asList(KV.of("a", 2.0), KV.of("b", 7.0));
     DataflowAssert.that(coldMean).containsInAnyOrder(expected);

From cf721802a97f975a3391cbd77621733685199df5 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 27 Mar 2015 15:10:49 -0700
Subject: [PATCH 0323/1541] Rename First.of to Sample.any

----Release Notes----
Rename First.of to Sample.any to better represent its functionality.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89731505
---
 .../cloud/dataflow/sdk/transforms/First.java  | 106 ------------------
 .../cloud/dataflow/sdk/transforms/Sample.java |  87 ++++++++++++++
 .../sdk/runners/TransformTreeTest.java        |  22 ++--
 .../dataflow/sdk/transforms/FirstTest.java    | 105 -----------------
 .../dataflow/sdk/transforms/SampleTest.java   |  76 +++++++++++++
 5 files changed, 174 insertions(+), 222 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
deleted file mode 100644
index 5228d67891152..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-
-/**
- * {@code First<T>} takes a {@code PCollection<T>} and a limit, and
- * produces a new {@code PCollection<T>} containing up to limit
- * elements of the input {@code PCollection}.
- *
- * <p> If limit is less than or equal to the size of the input
- * {@code PCollection}, then all the input's elements will be selected.
- *
- * <p> All of the elements of the output {@code PCollection} should fit into
- * main memory of a single worker machine.  This operation does not
- * run in parallel.
- *
- * <p> Example of use:
- * <pre> {@code
- * PCollection<String> input = ...;
- * PCollection<String> output = input.apply(First.<String>of(100));
- * } </pre>
- *
- * @param <T> the type of the elements of the input and output
- * {@code PCollection}s
- */
-public class First<T> extends PTransform<PCollection<T>, PCollection<T>> {
-  private static final long serialVersionUID = 0;
-
-  /**
-   * Returns a {@code First<T>} {@code PTransform}.
-   *
-   * @param <T> the type of the elements of the input and output
-   * {@code PCollection}s
-   * @param limit the numer of elements to take from the input
-   */
-  public static <T> First<T> of(long limit) {
-    return new First<>(limit);
-  }
-
-  private final long limit;
-
-  /**
-   * Constructs a {@code First<T>} PTransform that, when applied,
-   * produces a new PCollection containing up to {@code limit}
-   * elements of its input {@code PCollection}.
-   */
-  private First(long limit) {
-    this.limit = limit;
-    if (limit < 0) {
-      throw new IllegalArgumentException(
-          "limit argument to First should be non-negative");
-    }
-  }
-
-  private static class CopyFirstDoFn<T> extends DoFn<Void, T> {
-    private static final long serialVersionUID = 0;
-
-    long limit;
-    final PCollectionView<Iterable<T>> iterableView;
-
-    public CopyFirstDoFn(long limit, PCollectionView<Iterable<T>> iterableView) {
-      this.limit = limit;
-      this.iterableView = iterableView;
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      for (T i : c.sideInput(iterableView)) {
-        if (limit-- <= 0) {
-          break;
-        }
-        c.output(i);
-      }
-    }
-  }
-
-  @Override
-  public PCollection<T> apply(PCollection<T> in) {
-    PCollectionView<Iterable<T>> iterableView = in.apply(View.<T>asIterable());
-    return
-        in.getPipeline()
-        .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
-        .apply(ParDo
-               .withSideInputs(iterableView)
-               .of(new CopyFirstDoFn<>(limit, iterableView)))
-        .setCoder(in.getCoder());
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
index 03a352c09cc9e..493bb2f87595d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
@@ -21,9 +21,12 @@
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.common.base.Preconditions;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -35,6 +38,33 @@
  * key in a {@code PCollection} of {@code KV}s.
  **/
 public class Sample {
+
+  /**
+   * {@code Sample#any(long)} takes a {@code PCollection<T>} and a limit, and
+   * produces a new {@code PCollection<T>} containing up to limit
+   * elements of the input {@code PCollection}.
+   *
+   * <p> If limit is less than or equal to the size of the input
+   * {@code PCollection}, then all the input's elements will be selected.
+   *
+   * <p> All of the elements of the output {@code PCollection} should fit into
+   * main memory of a single worker machine.  This operation does not
+   * run in parallel.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<String> input = ...;
+   * PCollection<String> output = input.apply(Sample.<String>any(100));
+   * } </pre>bla
+   *
+   * @param <T> the type of the elements of the input and output
+   * {@code PCollection}s
+   * @param limit the number of elements to take from the input
+   */
+  public static <T> PTransform<PCollection<T>, PCollection<T>> any(long limit) {
+    return new SampleAny<>(limit);
+  }
+
   /**
    * Returns a {@code PTransform} that takes a {@code PCollection<T>},
    * selects {@code sampleSize} elements, uniformly at random, and returns a
@@ -92,6 +122,63 @@ public class Sample {
 
   /////////////////////////////////////////////////////////////////////////////
 
+  /**
+   * A {@link PTransform} that takes a {@code PCollection<T>} and a limit, and
+   * produces a new {@code PCollection<T>} containing up to limit
+   * elements of the input {@code PCollection}.
+   */
+  public static class SampleAny<T> extends PTransform<PCollection<T>, PCollection<T>> {
+    private static final long serialVersionUID = 0;
+    private final long limit;
+
+    /**
+     * Constructs a {@code SampleAny<T>} PTransform that, when applied,
+     * produces a new PCollection containing up to {@code limit}
+     * elements of its input {@code PCollection}.
+     */
+    private SampleAny(long limit) {
+      Preconditions.checkArgument(limit >= 0, "Expected non-negative limit, received %s.", limit);
+      this.limit = limit;
+    }
+
+    @Override
+    public PCollection<T> apply(PCollection<T> in) {
+      PCollectionView<Iterable<T>> iterableView = in.apply(View.<T>asIterable());
+      return
+          in.getPipeline()
+          .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
+          .apply(ParDo
+                 .withSideInputs(iterableView)
+                 .of(new SampleAnyDoFn<>(limit, iterableView)))
+          .setCoder(in.getCoder());
+    }
+  }
+
+  /**
+   * A {@link DoFn} that returns up to limit elements from the side input PCollection.
+   */
+  private static class SampleAnyDoFn<T> extends DoFn<Void, T> {
+    private static final long serialVersionUID = 0;
+
+    long limit;
+    final PCollectionView<Iterable<T>> iterableView;
+
+    public SampleAnyDoFn(long limit, PCollectionView<Iterable<T>> iterableView) {
+      this.limit = limit;
+      this.iterableView = iterableView;
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      for (T i : c.sideInput(iterableView)) {
+        if (limit-- <= 0) {
+          break;
+        }
+        c.output(i);
+      }
+    }
+  }
+
   /**
    * {@code CombineFn} that computes a fixed-size sample of a
    * collection of values.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
index 5a5a0678bf97f..a054fa5717294 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
@@ -25,8 +25,8 @@
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.transforms.Count;
 import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.First;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.Sample;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -53,7 +53,7 @@ public class TransformTreeTest {
   enum TransformsSeen {
     READ,
     WRITE,
-    FIRST
+    SAMPLE_ANY
   }
 
   /**
@@ -99,14 +99,14 @@ protected Coder<?> getDefaultOutputCoder() {
     }
   }
 
-  // Builds a pipeline containing a composite operation (First), then
+  // Builds a pipeline containing a composite operation (Pick), then
   // visits the nodes and verifies that the hierarchy was captured.
   @Test
   public void testCompositeCapture() throws Exception {
     Pipeline p = DirectPipeline.createForTest();
 
     p.apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/object"))
-        .apply(First.<String>of(10))
+        .apply(Sample.<String>any(10))
         .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/object"));
 
     final EnumSet<TransformsSeen> visited =
@@ -118,8 +118,8 @@ public void testCompositeCapture() throws Exception {
       @Override
       public void enterCompositeTransform(TransformTreeNode node) {
         PTransform<?, ?> transform = node.getTransform();
-        if (transform instanceof First) {
-          Assert.assertTrue(visited.add(TransformsSeen.FIRST));
+        if (transform instanceof Sample.SampleAny) {
+          Assert.assertTrue(visited.add(TransformsSeen.SAMPLE_ANY));
           Assert.assertNotNull(node.getEnclosingNode());
           Assert.assertTrue(node.isCompositeNode());
         }
@@ -130,16 +130,16 @@ public void enterCompositeTransform(TransformTreeNode node) {
       @Override
       public void leaveCompositeTransform(TransformTreeNode node) {
         PTransform<?, ?> transform = node.getTransform();
-        if (transform instanceof First) {
-          Assert.assertTrue(left.add(TransformsSeen.FIRST));
+        if (transform instanceof Sample.SampleAny) {
+          Assert.assertTrue(left.add(TransformsSeen.SAMPLE_ANY));
         }
       }
 
       @Override
       public void visitTransform(TransformTreeNode node) {
         PTransform<?, ?> transform = node.getTransform();
-        // First is a composite, should not be visited here.
-        Assert.assertThat(transform, not(instanceOf(First.class)));
+        // Pick is a composite, should not be visited here.
+        Assert.assertThat(transform, not(instanceOf(Sample.SampleAny.class)));
         if (transform instanceof TextIO.Read.Bound) {
           Assert.assertTrue(visited.add(TransformsSeen.READ));
         } else if (transform instanceof TextIO.Write.Bound) {
@@ -153,7 +153,7 @@ public void visitValue(PValue value, TransformTreeNode producer) {
     });
 
     Assert.assertTrue(visited.equals(EnumSet.allOf(TransformsSeen.class)));
-    Assert.assertTrue(left.equals(EnumSet.of(TransformsSeen.FIRST)));
+    Assert.assertTrue(left.equals(EnumSet.of(TransformsSeen.SAMPLE_ANY)));
   }
 
   @Test(expected = IllegalStateException.class)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java
deleted file mode 100644
index 3e61760144164..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import static com.google.cloud.dataflow.sdk.TestUtils.LINES;
-import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Set;
-import java.util.TreeSet;
-
-/**
- * Tests for First.
- */
-@RunWith(JUnit4.class)
-@SuppressWarnings("serial")
-public class FirstTest
-    implements Serializable /* to allow anon inner classes */ {
-  // PRE: lines contains no duplicates.
-  void runTestFirst(final List<String> lines, int limit) {
-    Pipeline p = TestPipeline.create();
-
-    PCollection<String> input = p.apply(Create.of(lines))
-        .setCoder(StringUtf8Coder.of());
-
-    PCollection<String> output =
-        input.apply(First.<String>of(limit));
-
-    final int expectedSize = Math.min(limit, lines.size());
-
-    DataflowAssert.that(output)
-        .satisfies(new SerializableFunction<Iterable<String>, Void>() {
-            @Override
-            public Void apply(Iterable<String> actualIter) {
-              // Make sure actual is the right length, and is a
-              // subset of expected.
-              List<String> actual = new ArrayList<>();
-              for (String s : actualIter) {
-                actual.add(s);
-              }
-              assertEquals(expectedSize, actual.size());
-              Set<String> actualAsSet = new TreeSet<>(actual);
-              Set<String> linesAsSet = new TreeSet<>(lines);
-              assertEquals(actual.size(), actualAsSet.size());
-              assertEquals(lines.size(), linesAsSet.size());
-              assertTrue(linesAsSet.containsAll(actualAsSet));
-              return null;
-            }
-          });
-
-    p.run();
-  }
-
-  @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
-  public void testFirst() {
-    runTestFirst(LINES, 0);
-    runTestFirst(LINES, LINES.size() / 2);
-    runTestFirst(LINES, LINES.size() * 2);
-  }
-
-  @Test
-  // Extra tests, not worth the time to run on the real service.
-  public void testFirstMore() {
-    runTestFirst(LINES, LINES.size() - 1);
-    runTestFirst(LINES, LINES.size());
-    runTestFirst(LINES, LINES.size() + 1);
-  }
-
-  @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
-  public void testFirstEmpty() {
-    runTestFirst(NO_LINES, 0);
-    runTestFirst(NO_LINES, 1);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
index a4dcfc5763d9a..6fd2ce958a5fe 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
@@ -16,15 +16,19 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import static com.google.cloud.dataflow.sdk.TestUtils.LINES;
+import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import com.google.api.client.util.Joiner;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.Preconditions;
 
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
@@ -33,7 +37,10 @@
 
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
 
 /**
  * Tests for Sample transform.
@@ -176,4 +183,73 @@ public void testSampleMultiplicity() {
         .satisfies(new VerifyCorrectSample<>(6, REPEATED_DATA));
     p.run();
   }
+
+  private static class VerifyAnySample implements SerializableFunction<Iterable<String>, Void> {
+    private final List<String> lines;
+    private final int limit;
+    private VerifyAnySample(List<String> lines, int limit) {
+      this.lines = lines;
+      this.limit = limit;
+    }
+
+    @Override
+    public Void apply(Iterable<String> actualIter) {
+      final int expectedSize = Math.min(limit, lines.size());
+
+      // Make sure actual is the right length, and is a
+      // subset of expected.
+      List<String> actual = new ArrayList<>();
+      for (String s : actualIter) {
+        actual.add(s);
+      }
+      assertEquals(expectedSize, actual.size());
+      Set<String> actualAsSet = new TreeSet<>(actual);
+      Set<String> linesAsSet = new TreeSet<>(lines);
+      assertEquals(actual.size(), actualAsSet.size());
+      assertEquals(lines.size(), linesAsSet.size());
+      assertTrue(linesAsSet.containsAll(actualAsSet));
+      return null;
+    }
+  }
+
+  void runPickAnyTest(final List<String> lines, int limit) {
+    Preconditions.checkArgument(new HashSet<String>(lines).size() == lines.size(),
+        "Duplicates are unsupported.");
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of(lines))
+        .setCoder(StringUtf8Coder.of());
+
+    PCollection<String> output =
+        input.apply(Sample.<String>any(limit));
+
+
+    DataflowAssert.that(output)
+        .satisfies(new VerifyAnySample(lines, limit));
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testPickAny() {
+    runPickAnyTest(LINES, 0);
+    runPickAnyTest(LINES, LINES.size() / 2);
+    runPickAnyTest(LINES, LINES.size() * 2);
+  }
+
+  @Test
+  // Extra tests, not worth the time to run on the real service.
+  public void testPickAnyMore() {
+    runPickAnyTest(LINES, LINES.size() - 1);
+    runPickAnyTest(LINES, LINES.size());
+    runPickAnyTest(LINES, LINES.size() + 1);
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testPickAnyWhenEmpty() {
+    runPickAnyTest(NO_LINES, 0);
+    runPickAnyTest(NO_LINES, 1);
+  }
 }

From 60b52eafd5195d96b73b4a1e2b7724c4c151327c Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Fri, 27 Mar 2015 15:34:29 -0700
Subject: [PATCH 0324/1541] Create additional local_dataflow_runner_test for
 Combine.globally in streaming, and add Count.globallyWithoutDefaults().

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89733664
---
 .../cloud/dataflow/sdk/transforms/Count.java  | 26 +++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
index 2422483ac69aa..83a86c1c17eba 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
@@ -68,10 +68,32 @@ public static <T> PerElement<T> perElement() {
   public static class Globally<T>
       extends PTransform<PCollection<T>, PCollection<Long>> {
 
-    public Globally() { }
+    private final boolean withoutDefaults;
+
+    public Globally() {
+      this.withoutDefaults = false;
+    }
+
+    private Globally(boolean withoutDefaults) {
+      this.withoutDefaults = withoutDefaults;
+    }
+
+    /**
+     * Returns a {@link PTransform} identical to Globally(), but that does not attempt to
+     * provide a default value in the case of empty input.
+     */
+    public Globally<T> withoutDefaults() {
+      return new Globally<T>(true /* withoutDefaults */);
+    }
 
     @Override
     public PCollection<Long> apply(PCollection<T> input) {
+      Combine.Globally<Long, Long> sumGlobally;
+      if (withoutDefaults) {
+        sumGlobally = Sum.longsGlobally().withoutDefaults();
+      } else {
+        sumGlobally = Sum.longsGlobally();
+      }
       return
           input
           .apply(ParDo.named("Init")
@@ -81,7 +103,7 @@ public void processElement(ProcessContext c) {
                        c.output(1L);
                      }
                    }))
-          .apply(Sum.longsGlobally());
+          .apply(sumGlobally);
     }
   }
 

From c941cad8aaba98ed234fb3afda9f8c3fc017762a Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Fri, 27 Mar 2015 17:35:19 -0700
Subject: [PATCH 0325/1541] Fix a bug that introduces duplicated window in
 batch activeWindowManager, add check to prevent infinite loop, and add an
 regression test.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89744277
---
 .../dataflow/sdk/util/CombiningWindowSet.java |  8 +-
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  5 +-
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  | 78 ++++++++++++++++---
 3 files changed, 74 insertions(+), 17 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
index a2a96d3d5dc5e..b910b08080b5a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
@@ -115,7 +115,7 @@ protected void put(W window, VI value) throws Exception {
   @Override
   protected void remove(W window) throws Exception {
     context.keyedState().remove(accumulatorTag(window));
-    activeWindowManager.addWindow(window);
+    activeWindowManager.removeWindow(window);
     liveWindowsModified = liveWindows.remove(window);
   }
 
@@ -142,8 +142,10 @@ private CodedTupleTag<VA> accumulatorTag(W window) throws Exception {
   private void store(W window, VA va) throws Exception {
     CodedTupleTag<VA> tag = accumulatorTag(window);
     context.keyedState().store(tag, va);
-    activeWindowManager.addWindow(window);
-    liveWindowsModified = liveWindows.add(window);
+    if (!contains(window)) {
+      activeWindowManager.addWindow(window);
+      liveWindowsModified = liveWindows.add(window);
+    }
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 3cd54c27e7aba..395c67ddeab17 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -222,9 +222,8 @@ private void maybeOutputWindows(
           && (nextTimestamp == null
               || activeWindowManager.nextTimestamp().isBefore(nextTimestamp))) {
         W window = activeWindowManager.getWindow();
-        if (windowSet.contains(window)) {
-          windowSet.markCompleted(window);
-        }
+        Preconditions.checkState(windowSet.contains(window));
+        windowSet.markCompleted(window);
       }
     }
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index 8860d1cce75e6..2c46f757c2481 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -18,12 +18,14 @@
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
-
+import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
@@ -247,27 +249,81 @@ public class GroupAlsoByWindowsDoFnTest {
         Matchers.contains(window(15, 25)));
   }
 
+  @Test public void testSessionsCombine() throws Exception {
+    CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
+    DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>,
+        KV<String, Long>, List> runner =
+        makeRunner(Sessions.<String>withGapDuration(Duration.millis(10)),
+                   combineFn.<String>asKeyedFn());
+    runner.startBundle();
 
-  private DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
-    KV<String, Iterable<String>>, List> makeRunner(
-        WindowFn<? super String, IntervalWindow> windowFn) {
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        KV.of("k", (Iterable<WindowedValue<Long>>) Arrays.asList(
+            WindowedValue.of(
+                1L,
+                new Instant(0),
+                Arrays.asList(window(0, 10))),
+            WindowedValue.of(
+                2L,
+                new Instant(5),
+                Arrays.asList(window(5, 15))),
+            WindowedValue.of(
+                4L,
+                new Instant(15),
+                Arrays.asList(window(15, 25)))))));
 
+    runner.finishBundle();
+
+    List<WindowedValue<KV<String, Long>>> result = runner.getReceiver(outputTag);
+
+    assertEquals(2, result.size());
+
+    WindowedValue<KV<String, Long>> item0 = result.get(0);
+    assertEquals("k", item0.getValue().getKey());
+    assertEquals(3L, item0.getValue().getValue().longValue());
+    assertEquals(new Instant(14), item0.getTimestamp());
+    assertThat(item0.getWindows(), Matchers.contains(window(0, 15)));
+
+    WindowedValue<KV<String, Long>> item1 = result.get(1);
+    assertEquals("k", item1.getValue().getKey());
+    assertEquals(4L, item1.getValue().getValue().longValue());
+    assertEquals(new Instant(24), item1.getTimestamp());
+    assertThat(item1.getWindows(), Matchers.contains(window(15, 25)));
+  }
+
+  private DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
+      KV<String, Iterable<String>>, List> makeRunner(
+          WindowFn<? super String, IntervalWindow> windowFn) {
     GroupAlsoByWindowsDoFn<String, String, Iterable<String>, IntervalWindow> fn =
         GroupAlsoByWindowsDoFn.createForIterable(windowFn, StringUtf8Coder.of());
+    return makeRunner(windowFn, fn);
+  }
 
-    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
-        KV<String, Iterable<String>>, List> runner =
+  private DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>,
+      KV<String, Long>, List> makeRunner(
+        WindowFn<? super String, IntervalWindow> windowFn,
+        KeyedCombineFn<String, Long, ?, Long> combineFn) {
+    GroupAlsoByWindowsDoFn<String, Long, Long, IntervalWindow> fn =
+        GroupAlsoByWindowsDoFn.create(
+            windowFn, combineFn, StringUtf8Coder.of(), BigEndianLongCoder.of());
+
+    return makeRunner(windowFn, fn);
+  }
+
+  private <VI, VO> DoFnRunner<KV<String, Iterable<WindowedValue<VI>>>,
+    KV<String, VO>, List> makeRunner(
+        WindowFn<? super String, IntervalWindow> windowFn,
+        GroupAlsoByWindowsDoFn<String, VI, VO, IntervalWindow> fn) {
+    return
         DoFnRunner.createWithListOutputs(
             PipelineOptionsFactory.create(),
             fn,
             PTuple.empty(),
-            outputTag,
+            (TupleTag<KV<String, VO>>) (TupleTag) outputTag,
             new ArrayList<TupleTag<?>>(),
             execContext.createStepContext("merge"),
             counters.getAddCounterMutator(),
-            new GlobalWindows());
-
-    return runner;
+            windowFn);
   }
 
   private BoundedWindow window(long start, long end) {

From c48e733783597ac7e70273a3855a74d57e8094ba Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Fri, 27 Mar 2015 18:15:33 -0700
Subject: [PATCH 0326/1541] Pass additional information from SDK to Dataflow
 backend: whether GroupByKeyOnly disallowCombinerLifting. It will allow
 Dataflow backend to choose a proper optimization.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89746683
---
 .../runners/DataflowPipelineTranslator.java   |  2 +
 .../dataflow/sdk/transforms/Combine.java      | 20 +++++++--
 .../dataflow/sdk/transforms/GroupByKey.java   | 45 ++++++++++++++++---
 .../dataflow/sdk/util/PropertyNames.java      |  1 +
 4 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 23c7f2ee3749b..2f701c23586f1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -857,6 +857,8 @@ private <K, V> void groupByKeyHelper(
             context.addStep(transform, "GroupByKey");
             context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
             context.addOutput(PropertyNames.OUTPUT, transform.getOutput());
+            context.addInput(
+                PropertyNames.DISALLOW_COMBINER_LIFTING, transform.disallowCombinerLifting());
             // TODO: sortsValues
           }
         });
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index d8bdacdd330e3..5343abbf9497d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -156,7 +156,16 @@ public static <K, VI, VO> PerKey<K, VI, VO> perKey(
    */
   public static <K, VI, VO> PerKey<K, VI, VO> perKey(
       KeyedCombineFn<? super K, ? super VI, ?, VO> fn) {
-    return new PerKey<>(fn);
+    return new PerKey<>(fn, false /*fewKeys*/);
+  }
+
+  /**
+   * Returns a {@link PerKey Combine.PerKey}, and set fewKeys
+   * in {@link GroupByKey}.
+   */
+  private static <K, VI, VO> PerKey<K, VI, VO> fewKeys(
+      KeyedCombineFn<? super K, ? super VI, ?, VO> fn) {
+    return new PerKey<>(fn, true /*fewKeys*/);
   }
 
   /**
@@ -1142,7 +1151,7 @@ public PCollection<VO> apply(PCollection<VI> input) {
       PCollection<VO> output = input
           .apply(WithKeys.<Void, VI>of((Void) null))
           .setCoder(KvCoder.of(VoidCoder.of(), input.getCoder()))
-          .apply(Combine.<Void, VI, VO>perKey(fn.<Void>asKeyedFn()))
+          .apply(Combine.<Void, VI, VO>fewKeys(fn.<Void>asKeyedFn()))
           .apply(Values.<VO>create());
 
       if (insertDefault) {
@@ -1364,10 +1373,13 @@ public static class PerKey<K, VI, VO>
     extends PTransform<PCollection<KV<K, VI>>, PCollection<KV<K, VO>>> {
 
     private final transient KeyedCombineFn<? super K, ? super VI, ?, VO> fn;
+    private final boolean fewKeys;
 
     private PerKey(
-        KeyedCombineFn<? super K, ? super VI, ?, VO> fn) {
+        KeyedCombineFn<? super K, ? super VI, ?, VO> fn,
+        boolean fewKeys) {
       this.fn = fn;
+      this.fewKeys = fewKeys;
     }
 
     /**
@@ -1413,7 +1425,7 @@ public PerKey<K, VI, VO> withName(String name) {
     @Override
     public PCollection<KV<K, VO>> apply(PCollection<KV<K, VI>> input) {
       return input
-        .apply(GroupByKey.<K, VI>create())
+        .apply(GroupByKey.<K, VI>create(fewKeys))
         .apply(Combine.<K, VI, VO>groupedValues(fn));
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 78c0075271676..1fd09bb90d9d8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -126,6 +126,13 @@
 public class GroupByKey<K, V>
     extends PTransform<PCollection<KV<K, V>>,
                        PCollection<KV<K, Iterable<V>>>> {
+
+  private final boolean fewKeys;
+
+  private GroupByKey(boolean fewKeys) {
+    this.fewKeys = fewKeys;
+  }
+
   /**
    * Returns a {@code GroupByKey<K, V>} {@code PTransform}.
    *
@@ -136,7 +143,21 @@ public class GroupByKey<K, V>
    * {@code PCollection}
    */
   public static <K, V> GroupByKey<K, V> create() {
-    return new GroupByKey<>();
+    return new GroupByKey<>(false);
+  }
+
+  /**
+   * Returns a {@code GroupByKey<K, V>} {@code PTransform}.
+   *
+   * @param <K> the type of the keys of the input and output
+   * {@code PCollection}s
+   * @param <V> the type of the values of the input {@code PCollection}
+   * and the elements of the {@code Iterable}s in the output
+   * {@code PCollection}
+   * @param fewKeys whether it groups just few keys.
+   */
+  static <K, V> GroupByKey<K, V> create(boolean fewKeys) {
+    return new GroupByKey<>(fewKeys);
   }
 
 
@@ -270,7 +291,11 @@ public PCollection<KV<K, Iterable<V>>> apply(
   public static class GroupByKeyOnly<K, V>
       extends PTransform<PCollection<KV<K, V>>,
                          PCollection<KV<K, Iterable<V>>>> {
-    public GroupByKeyOnly() { }
+    final boolean disallowCombinerLifting;
+
+    public GroupByKeyOnly(boolean disallowCombinerLifting) {
+      this.disallowCombinerLifting = disallowCombinerLifting;
+    }
 
     @SuppressWarnings({"rawtypes", "unchecked"})
     @Override
@@ -350,6 +375,13 @@ KvCoder<K, Iterable<V>> getOutputKvCoder() {
     protected Coder<KV<K, Iterable<V>>> getDefaultOutputCoder() {
       return getOutputKvCoder();
     }
+
+    /**
+     * Returns whether this GBK allows lifting combiner through.
+     */
+    public boolean disallowCombinerLifting() {
+      return disallowCombinerLifting;
+    }
   }
 
 
@@ -442,18 +474,21 @@ public PCollection<KV<K, Iterable<V>>> applyHelper(
           "GroupByKey must have a valid Window merge function.  "
           + "Invalid because: " + cause);
     }
+    boolean disallowCombinerLifting = !(windowFn instanceof NonMergingWindowFn)
+        || (isStreaming && !fewKeys);
+
     if (windowFn.isCompatible(new GlobalWindows())) {
       // The input PCollection is using the degenerate default
       // window function, which uses a single global window for all
       // elements.  We can implement this using a more-primitive
       // non-window-aware GBK transform.
-      return input.apply(new GroupByKeyOnly<K, V>());
+      return input.apply(new GroupByKeyOnly<K, V>(disallowCombinerLifting));
 
     } else if (isStreaming) {
       // If using the streaming runner, the service will do the insertion of
       // the GroupAlsoByWindow step.
       // TODO: Remove this case once the Dataflow Runner handles GBK directly
-      return input.apply(new GroupByKeyOnly<K, V>());
+      return input.apply(new GroupByKeyOnly<K, V>(disallowCombinerLifting));
 
     } else {
       // By default, implement GroupByKey[AndWindow] via a series of lower-level
@@ -464,7 +499,7 @@ public PCollection<KV<K, Iterable<V>>> applyHelper(
           .apply(new ReifyTimestampsAndWindows<K, V>())
 
           // Group by just the key.
-          .apply(new GroupByKeyOnly<K, WindowedValue<V>>());
+          .apply(new GroupByKeyOnly<K, WindowedValue<V>>(disallowCombinerLifting));
 
       if (!runnerSortsByTimestamp) {
         // Sort each key's values by timestamp. GroupAlsoByWindow requires
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
index 21fa0d3679488..3f5c31c04ba43 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
@@ -60,6 +60,7 @@ public class PropertyNames {
   public static final String IS_PAIR_LIKE = "is_pair_like";
   public static final String IS_STREAM_LIKE = "is_stream_like";
   public static final String IS_WRAPPER = "is_wrapper";
+  public static final String DISALLOW_COMBINER_LIFTING = "disallow_combiner_lifting";
   public static final String NON_PARALLEL_INPUTS = "non_parallel_inputs";
   public static final String NUM_SHARDS = "num_shards";
   public static final String OBJECT_TYPE_NAME = "@type";

From deeeb8338f33908bdd9b566fb13eec1ff69d18a4 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Sat, 28 Mar 2015 11:45:49 -0700
Subject: [PATCH 0327/1541] Pull the GroupAlsoByWindowsViaIteratorsDoFn into a
 separate file.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89776720
---
 .../sdk/util/GroupAlsoByWindowsDoFn.java      | 207 +---------------
 .../GroupAlsoByWindowsViaIteratorsDoFn.java   | 232 ++++++++++++++++++
 2 files changed, 233 insertions(+), 206 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 395c67ddeab17..8c37f68d61690 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -23,23 +23,12 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.common.PeekingReiterator;
-import com.google.cloud.dataflow.sdk.util.common.Reiterable;
-import com.google.cloud.dataflow.sdk.util.common.Reiterator;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
-import com.google.common.collect.ArrayListMultimap;
-import com.google.common.collect.ListMultimap;
 
 import org.joda.time.Instant;
 
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
 import java.util.Comparator;
-import java.util.Iterator;
-import java.util.List;
-import java.util.NoSuchElementException;
 import java.util.PriorityQueue;
 
 /**
@@ -66,7 +55,7 @@ public abstract class GroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
   public static <K, V, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W>
   createForIterable(final WindowFn<?, W> windowFn, final Coder<V> inputCoder) {
     if (windowFn instanceof NonMergingWindowFn) {
-      return new GABWViaIteratorsDoFn<K, V, W>();
+      return new GroupAlsoByWindowsViaIteratorsDoFn<K, V, W>();
     } else {
       return new GABWViaWindowSetDoFn<K, V, Iterable<V>, W>(windowFn) {
         @Override
@@ -103,62 +92,6 @@ AbstractWindowSet<K, VI, VO, W> createWindowSet(
     };
   }
 
-  private static class GABWViaIteratorsDoFn<K, V, W extends BoundedWindow>
-      extends GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W> {
-
-    @Override
-    public void processElement(ProcessContext c) throws Exception {
-      K key = c.element().getKey();
-      Iterable<WindowedValue<V>> value = c.element().getValue();
-      PeekingReiterator<WindowedValue<V>> iterator;
-
-      if (value instanceof Collection) {
-        iterator = new PeekingReiterator<>(new ListReiterator<WindowedValue<V>>(
-            new ArrayList<WindowedValue<V>>((Collection<WindowedValue<V>>) value), 0));
-      } else if (value instanceof Reiterable) {
-        iterator = new PeekingReiterator<>(((Reiterable<WindowedValue<V>>) value).iterator());
-      } else {
-        throw new IllegalArgumentException(
-            "Input to GroupAlsoByWindowsDoFn must be a Collection or Reiterable");
-      }
-
-      // This ListMultimap is a map of window maxTimestamps to the list of active
-      // windows with that maxTimestamp.
-      ListMultimap<Instant, BoundedWindow> windows = ArrayListMultimap.create();
-
-      while (iterator.hasNext()) {
-        WindowedValue<V> e = iterator.peek();
-        for (BoundedWindow window : e.getWindows()) {
-          // If this window is not already in the active set, emit a new WindowReiterable
-          // corresponding to this window, starting at this element in the input Reiterable.
-          if (!windows.containsEntry(window.maxTimestamp(), window)) {
-            // Iterating through the WindowReiterable may advance iterator as an optimization
-            // for as long as it detects that there are no new windows.
-            windows.put(window.maxTimestamp(), window);
-            c.windowingInternals().outputWindowedValue(
-                KV.of(key, (Iterable<V>) new WindowReiterable<V>(iterator, window)),
-                window.maxTimestamp(),
-                Arrays.asList(window));
-          }
-        }
-        // Copy the iterator in case the next DoFn cached its version of the iterator instead
-        // of immediately iterating through it.
-        // And, only advance the iterator if the consuming operation hasn't done so.
-        iterator = iterator.copy();
-        if (iterator.hasNext() && iterator.peek() == e) {
-          iterator.next();
-        }
-
-        // Remove all windows with maxTimestamp behind the current timestamp.
-        Iterator<Instant> windowIterator = windows.keys().iterator();
-        while (windowIterator.hasNext()
-            && windowIterator.next().isBefore(e.getTimestamp())) {
-          windowIterator.remove();
-        }
-      }
-    }
-  }
-
   private abstract static class GABWViaWindowSetDoFn<K, VI, VO, W extends BoundedWindow>
      extends GroupAlsoByWindowsDoFn<K, VI, VO, W> {
 
@@ -200,7 +133,6 @@ public void processElement(
       windowSet.flush();
     }
 
-
     /**
      * Outputs any windows that are complete, with their corresponding elemeents.
      * If there are potentially complete windows, try merging windows first.
@@ -228,143 +160,6 @@ private void maybeOutputWindows(
     }
   }
 
-  /**
-   * {@link Reiterable} representing a view of all elements in a base
-   * {@link Reiterator} that are in a given window.
-   */
-  private static class WindowReiterable<V> implements Reiterable<V> {
-    private PeekingReiterator<WindowedValue<V>> baseIterator;
-    private BoundedWindow window;
-
-    public WindowReiterable(
-        PeekingReiterator<WindowedValue<V>> baseIterator, BoundedWindow window) {
-      this.baseIterator = baseIterator;
-      this.window = window;
-    }
-
-    @Override
-    public Reiterator<V> iterator() {
-      // We don't copy the baseIterator when creating the first WindowReiterator
-      // so that the WindowReiterator can advance the baseIterator.  We have to
-      // make a copy afterwards so that future calls to iterator() will start
-      // at the right spot.
-      Reiterator<V> result = new WindowReiterator<V>(baseIterator, window);
-      baseIterator = baseIterator.copy();
-      return result;
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder result = new StringBuilder();
-      result.append("WR{");
-      for (V v : this) {
-        result.append(v.toString()).append(',');
-      }
-      result.append("}");
-      return result.toString();
-    }
-  }
-
-  /**
-   * The {@link Reiterator} used by
-   * {@link com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn.WindowReiterable}.
-   */
-  private static class WindowReiterator<V> implements Reiterator<V> {
-    private PeekingReiterator<WindowedValue<V>> iterator;
-    private BoundedWindow window;
-
-    public WindowReiterator(PeekingReiterator<WindowedValue<V>> iterator, BoundedWindow window) {
-      this.iterator = iterator;
-      this.window = window;
-    }
-
-    @Override
-    public Reiterator<V> copy() {
-      return new WindowReiterator<V>(iterator.copy(), window);
-    }
-
-    @Override
-    public boolean hasNext() {
-      skipToValidElement();
-      return (iterator.hasNext() && iterator.peek().getWindows().contains(window));
-    }
-
-    @Override
-    public V next() {
-      skipToValidElement();
-      WindowedValue<V> next = iterator.next();
-      if (!next.getWindows().contains(window)) {
-        throw new NoSuchElementException("No next item in window");
-      }
-      return next.getValue();
-    }
-
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-
-    /**
-     * Moves the underlying iterator forward until it either points to the next
-     * element in the correct window, or is past the end of the window.
-     */
-    private void skipToValidElement() {
-      while (iterator.hasNext()) {
-        WindowedValue<V> peek = iterator.peek();
-        if (peek.getTimestamp().isAfter(window.maxTimestamp())) {
-          // We are past the end of this window, so there can't be any more
-          // elements in this iterator.
-          break;
-        }
-        if (!(peek.getWindows().size() == 1 && peek.getWindows().contains(window))) {
-          // We have reached new windows; we need to copy the iterator so we don't
-          // keep advancing the outer loop in processElement.
-          iterator = iterator.copy();
-        }
-        if (!peek.getWindows().contains(window)) {
-          // The next element is not in the right window: skip it.
-          iterator.next();
-        } else {
-          // The next element is in the right window.
-          break;
-        }
-      }
-    }
-  }
-
-  /**
-   * {@link Reiterator} that wraps a {@link List}.
-   */
-  private static class ListReiterator<T> implements Reiterator<T> {
-    private List<T> list;
-    private int index;
-
-    public ListReiterator(List<T> list, int index) {
-      this.list = list;
-      this.index = index;
-    }
-
-    @Override
-    public T next() {
-      return list.get(index++);
-    }
-
-    @Override
-    public boolean hasNext() {
-      return index < list.size();
-    }
-
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public Reiterator<T> copy() {
-      return new ListReiterator<T>(list, index);
-    }
-  }
-
   private static class BatchActiveWindowManager<W extends BoundedWindow>
       implements AbstractWindowSet.ActiveWindowManager<W> {
     // Sort the windows by their end timestamps so that we can efficiently
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
new file mode 100644
index 0000000000000..864dbbae64625
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
@@ -0,0 +1,232 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.common.PeekingReiterator;
+import com.google.cloud.dataflow.sdk.util.common.Reiterable;
+import com.google.cloud.dataflow.sdk.util.common.Reiterator;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.ListMultimap;
+
+import org.joda.time.Instant;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * {@link GroupAlsoByWindowsDoFn} that uses reiterators to handle non-merging window functions with
+ * the default triggering strategy.
+ */
+@SuppressWarnings("serial")
+class GroupAlsoByWindowsViaIteratorsDoFn<K, V, W extends BoundedWindow>
+    extends GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W> {
+
+  @Override
+  public void processElement(ProcessContext c) throws Exception {
+    K key = c.element().getKey();
+    Iterable<WindowedValue<V>> value = c.element().getValue();
+    PeekingReiterator<WindowedValue<V>> iterator;
+
+    if (value instanceof Collection) {
+      iterator = new PeekingReiterator<>(new ListReiterator<WindowedValue<V>>(
+          new ArrayList<WindowedValue<V>>((Collection<WindowedValue<V>>) value), 0));
+    } else if (value instanceof Reiterable) {
+      iterator = new PeekingReiterator<>(((Reiterable<WindowedValue<V>>) value).iterator());
+    } else {
+      throw new IllegalArgumentException(
+          "Input to GroupAlsoByWindowsDoFn must be a Collection or Reiterable");
+    }
+
+    // This ListMultimap is a map of window maxTimestamps to the list of active
+    // windows with that maxTimestamp.
+    ListMultimap<Instant, BoundedWindow> windows = ArrayListMultimap.create();
+
+    while (iterator.hasNext()) {
+      WindowedValue<V> e = iterator.peek();
+      for (BoundedWindow window : e.getWindows()) {
+        // If this window is not already in the active set, emit a new WindowReiterable
+        // corresponding to this window, starting at this element in the input Reiterable.
+        if (!windows.containsEntry(window.maxTimestamp(), window)) {
+          // Iterating through the WindowReiterable may advance iterator as an optimization
+          // for as long as it detects that there are no new windows.
+          windows.put(window.maxTimestamp(), window);
+          c.windowingInternals().outputWindowedValue(
+              KV.of(key, (Iterable<V>) new WindowReiterable<V>(iterator, window)),
+              window.maxTimestamp(),
+              Arrays.asList(window));
+        }
+      }
+      // Copy the iterator in case the next DoFn cached its version of the iterator instead
+      // of immediately iterating through it.
+      // And, only advance the iterator if the consuming operation hasn't done so.
+      iterator = iterator.copy();
+      if (iterator.hasNext() && iterator.peek() == e) {
+        iterator.next();
+      }
+
+      // Remove all windows with maxTimestamp behind the current timestamp.
+      Iterator<Instant> windowIterator = windows.keys().iterator();
+      while (windowIterator.hasNext()
+          && windowIterator.next().isBefore(e.getTimestamp())) {
+        windowIterator.remove();
+      }
+    }
+  }
+
+  /**
+   * {@link Reiterable} representing a view of all elements in a base
+   * {@link Reiterator} that are in a given window.
+   */
+  private static class WindowReiterable<V> implements Reiterable<V> {
+    private PeekingReiterator<WindowedValue<V>> baseIterator;
+    private BoundedWindow window;
+
+    public WindowReiterable(
+        PeekingReiterator<WindowedValue<V>> baseIterator, BoundedWindow window) {
+      this.baseIterator = baseIterator;
+      this.window = window;
+    }
+
+    @Override
+    public Reiterator<V> iterator() {
+      // We don't copy the baseIterator when creating the first WindowReiterator
+      // so that the WindowReiterator can advance the baseIterator.  We have to
+      // make a copy afterwards so that future calls to iterator() will start
+      // at the right spot.
+      Reiterator<V> result = new WindowReiterator<V>(baseIterator, window);
+      baseIterator = baseIterator.copy();
+      return result;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder result = new StringBuilder();
+      result.append("WR{");
+      for (V v : this) {
+        result.append(v.toString()).append(',');
+      }
+      result.append("}");
+      return result.toString();
+    }
+  }
+
+  /**
+   * The {@link Reiterator} used by
+   * {@link com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsViaIteratorsDoFn.WindowReiterable}.
+   */
+  private static class WindowReiterator<V> implements Reiterator<V> {
+    private PeekingReiterator<WindowedValue<V>> iterator;
+    private BoundedWindow window;
+
+    public WindowReiterator(PeekingReiterator<WindowedValue<V>> iterator, BoundedWindow window) {
+      this.iterator = iterator;
+      this.window = window;
+    }
+
+    @Override
+    public Reiterator<V> copy() {
+      return new WindowReiterator<V>(iterator.copy(), window);
+    }
+
+    @Override
+    public boolean hasNext() {
+      skipToValidElement();
+      return (iterator.hasNext() && iterator.peek().getWindows().contains(window));
+    }
+
+    @Override
+    public V next() {
+      skipToValidElement();
+      WindowedValue<V> next = iterator.next();
+      if (!next.getWindows().contains(window)) {
+        throw new NoSuchElementException("No next item in window");
+      }
+      return next.getValue();
+    }
+
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+
+    /**
+     * Moves the underlying iterator forward until it either points to the next
+     * element in the correct window, or is past the end of the window.
+     */
+    private void skipToValidElement() {
+      while (iterator.hasNext()) {
+        WindowedValue<V> peek = iterator.peek();
+        if (peek.getTimestamp().isAfter(window.maxTimestamp())) {
+          // We are past the end of this window, so there can't be any more
+          // elements in this iterator.
+          break;
+        }
+        if (!(peek.getWindows().size() == 1 && peek.getWindows().contains(window))) {
+          // We have reached new windows; we need to copy the iterator so we don't
+          // keep advancing the outer loop in processElement.
+          iterator = iterator.copy();
+        }
+        if (!peek.getWindows().contains(window)) {
+          // The next element is not in the right window: skip it.
+          iterator.next();
+        } else {
+          // The next element is in the right window.
+          break;
+        }
+      }
+    }
+  }
+
+  /**
+   * {@link Reiterator} that wraps a {@link List}.
+   */
+  private static class ListReiterator<T> implements Reiterator<T> {
+    private List<T> list;
+    private int index;
+
+    public ListReiterator(List<T> list, int index) {
+      this.list = list;
+      this.index = index;
+    }
+
+    @Override
+    public T next() {
+      return list.get(index++);
+    }
+
+    @Override
+    public boolean hasNext() {
+      return index < list.size();
+    }
+
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public Reiterator<T> copy() {
+      return new ListReiterator<T>(list, index);
+    }
+  }
+}

From 62314ab7ed524277c0aba6d3ad9979e51c34dd5a Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Sun, 29 Mar 2015 09:43:13 -0700
Subject: [PATCH 0328/1541] Rollback of 'Rename First.of to Sample.any'

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89813873
---
 .../cloud/dataflow/sdk/transforms/First.java  | 106 ++++++++++++++++++
 .../cloud/dataflow/sdk/transforms/Sample.java |  87 --------------
 .../sdk/runners/TransformTreeTest.java        |  22 ++--
 .../dataflow/sdk/transforms/FirstTest.java    | 105 +++++++++++++++++
 .../dataflow/sdk/transforms/SampleTest.java   |  76 -------------
 5 files changed, 222 insertions(+), 174 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
new file mode 100644
index 0000000000000..5228d67891152
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+
+/**
+ * {@code First<T>} takes a {@code PCollection<T>} and a limit, and
+ * produces a new {@code PCollection<T>} containing up to limit
+ * elements of the input {@code PCollection}.
+ *
+ * <p> If limit is less than or equal to the size of the input
+ * {@code PCollection}, then all the input's elements will be selected.
+ *
+ * <p> All of the elements of the output {@code PCollection} should fit into
+ * main memory of a single worker machine.  This operation does not
+ * run in parallel.
+ *
+ * <p> Example of use:
+ * <pre> {@code
+ * PCollection<String> input = ...;
+ * PCollection<String> output = input.apply(First.<String>of(100));
+ * } </pre>
+ *
+ * @param <T> the type of the elements of the input and output
+ * {@code PCollection}s
+ */
+public class First<T> extends PTransform<PCollection<T>, PCollection<T>> {
+  private static final long serialVersionUID = 0;
+
+  /**
+   * Returns a {@code First<T>} {@code PTransform}.
+   *
+   * @param <T> the type of the elements of the input and output
+   * {@code PCollection}s
+   * @param limit the numer of elements to take from the input
+   */
+  public static <T> First<T> of(long limit) {
+    return new First<>(limit);
+  }
+
+  private final long limit;
+
+  /**
+   * Constructs a {@code First<T>} PTransform that, when applied,
+   * produces a new PCollection containing up to {@code limit}
+   * elements of its input {@code PCollection}.
+   */
+  private First(long limit) {
+    this.limit = limit;
+    if (limit < 0) {
+      throw new IllegalArgumentException(
+          "limit argument to First should be non-negative");
+    }
+  }
+
+  private static class CopyFirstDoFn<T> extends DoFn<Void, T> {
+    private static final long serialVersionUID = 0;
+
+    long limit;
+    final PCollectionView<Iterable<T>> iterableView;
+
+    public CopyFirstDoFn(long limit, PCollectionView<Iterable<T>> iterableView) {
+      this.limit = limit;
+      this.iterableView = iterableView;
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      for (T i : c.sideInput(iterableView)) {
+        if (limit-- <= 0) {
+          break;
+        }
+        c.output(i);
+      }
+    }
+  }
+
+  @Override
+  public PCollection<T> apply(PCollection<T> in) {
+    PCollectionView<Iterable<T>> iterableView = in.apply(View.<T>asIterable());
+    return
+        in.getPipeline()
+        .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
+        .apply(ParDo
+               .withSideInputs(iterableView)
+               .of(new CopyFirstDoFn<>(limit, iterableView)))
+        .setCoder(in.getCoder());
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
index 493bb2f87595d..03a352c09cc9e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
@@ -21,12 +21,9 @@
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.common.base.Preconditions;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -38,33 +35,6 @@
  * key in a {@code PCollection} of {@code KV}s.
  **/
 public class Sample {
-
-  /**
-   * {@code Sample#any(long)} takes a {@code PCollection<T>} and a limit, and
-   * produces a new {@code PCollection<T>} containing up to limit
-   * elements of the input {@code PCollection}.
-   *
-   * <p> If limit is less than or equal to the size of the input
-   * {@code PCollection}, then all the input's elements will be selected.
-   *
-   * <p> All of the elements of the output {@code PCollection} should fit into
-   * main memory of a single worker machine.  This operation does not
-   * run in parallel.
-   *
-   * <p> Example of use:
-   * <pre> {@code
-   * PCollection<String> input = ...;
-   * PCollection<String> output = input.apply(Sample.<String>any(100));
-   * } </pre>bla
-   *
-   * @param <T> the type of the elements of the input and output
-   * {@code PCollection}s
-   * @param limit the number of elements to take from the input
-   */
-  public static <T> PTransform<PCollection<T>, PCollection<T>> any(long limit) {
-    return new SampleAny<>(limit);
-  }
-
   /**
    * Returns a {@code PTransform} that takes a {@code PCollection<T>},
    * selects {@code sampleSize} elements, uniformly at random, and returns a
@@ -122,63 +92,6 @@ public static <T> PTransform<PCollection<T>, PCollection<T>> any(long limit) {
 
   /////////////////////////////////////////////////////////////////////////////
 
-  /**
-   * A {@link PTransform} that takes a {@code PCollection<T>} and a limit, and
-   * produces a new {@code PCollection<T>} containing up to limit
-   * elements of the input {@code PCollection}.
-   */
-  public static class SampleAny<T> extends PTransform<PCollection<T>, PCollection<T>> {
-    private static final long serialVersionUID = 0;
-    private final long limit;
-
-    /**
-     * Constructs a {@code SampleAny<T>} PTransform that, when applied,
-     * produces a new PCollection containing up to {@code limit}
-     * elements of its input {@code PCollection}.
-     */
-    private SampleAny(long limit) {
-      Preconditions.checkArgument(limit >= 0, "Expected non-negative limit, received %s.", limit);
-      this.limit = limit;
-    }
-
-    @Override
-    public PCollection<T> apply(PCollection<T> in) {
-      PCollectionView<Iterable<T>> iterableView = in.apply(View.<T>asIterable());
-      return
-          in.getPipeline()
-          .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
-          .apply(ParDo
-                 .withSideInputs(iterableView)
-                 .of(new SampleAnyDoFn<>(limit, iterableView)))
-          .setCoder(in.getCoder());
-    }
-  }
-
-  /**
-   * A {@link DoFn} that returns up to limit elements from the side input PCollection.
-   */
-  private static class SampleAnyDoFn<T> extends DoFn<Void, T> {
-    private static final long serialVersionUID = 0;
-
-    long limit;
-    final PCollectionView<Iterable<T>> iterableView;
-
-    public SampleAnyDoFn(long limit, PCollectionView<Iterable<T>> iterableView) {
-      this.limit = limit;
-      this.iterableView = iterableView;
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      for (T i : c.sideInput(iterableView)) {
-        if (limit-- <= 0) {
-          break;
-        }
-        c.output(i);
-      }
-    }
-  }
-
   /**
    * {@code CombineFn} that computes a fixed-size sample of a
    * collection of values.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
index a054fa5717294..5a5a0678bf97f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
@@ -25,8 +25,8 @@
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.transforms.Count;
 import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.First;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.Sample;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -53,7 +53,7 @@ public class TransformTreeTest {
   enum TransformsSeen {
     READ,
     WRITE,
-    SAMPLE_ANY
+    FIRST
   }
 
   /**
@@ -99,14 +99,14 @@ protected Coder<?> getDefaultOutputCoder() {
     }
   }
 
-  // Builds a pipeline containing a composite operation (Pick), then
+  // Builds a pipeline containing a composite operation (First), then
   // visits the nodes and verifies that the hierarchy was captured.
   @Test
   public void testCompositeCapture() throws Exception {
     Pipeline p = DirectPipeline.createForTest();
 
     p.apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/object"))
-        .apply(Sample.<String>any(10))
+        .apply(First.<String>of(10))
         .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/object"));
 
     final EnumSet<TransformsSeen> visited =
@@ -118,8 +118,8 @@ public void testCompositeCapture() throws Exception {
       @Override
       public void enterCompositeTransform(TransformTreeNode node) {
         PTransform<?, ?> transform = node.getTransform();
-        if (transform instanceof Sample.SampleAny) {
-          Assert.assertTrue(visited.add(TransformsSeen.SAMPLE_ANY));
+        if (transform instanceof First) {
+          Assert.assertTrue(visited.add(TransformsSeen.FIRST));
           Assert.assertNotNull(node.getEnclosingNode());
           Assert.assertTrue(node.isCompositeNode());
         }
@@ -130,16 +130,16 @@ public void enterCompositeTransform(TransformTreeNode node) {
       @Override
       public void leaveCompositeTransform(TransformTreeNode node) {
         PTransform<?, ?> transform = node.getTransform();
-        if (transform instanceof Sample.SampleAny) {
-          Assert.assertTrue(left.add(TransformsSeen.SAMPLE_ANY));
+        if (transform instanceof First) {
+          Assert.assertTrue(left.add(TransformsSeen.FIRST));
         }
       }
 
       @Override
       public void visitTransform(TransformTreeNode node) {
         PTransform<?, ?> transform = node.getTransform();
-        // Pick is a composite, should not be visited here.
-        Assert.assertThat(transform, not(instanceOf(Sample.SampleAny.class)));
+        // First is a composite, should not be visited here.
+        Assert.assertThat(transform, not(instanceOf(First.class)));
         if (transform instanceof TextIO.Read.Bound) {
           Assert.assertTrue(visited.add(TransformsSeen.READ));
         } else if (transform instanceof TextIO.Write.Bound) {
@@ -153,7 +153,7 @@ public void visitValue(PValue value, TransformTreeNode producer) {
     });
 
     Assert.assertTrue(visited.equals(EnumSet.allOf(TransformsSeen.class)));
-    Assert.assertTrue(left.equals(EnumSet.of(TransformsSeen.SAMPLE_ANY)));
+    Assert.assertTrue(left.equals(EnumSet.of(TransformsSeen.FIRST)));
   }
 
   @Test(expected = IllegalStateException.class)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java
new file mode 100644
index 0000000000000..3e61760144164
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static com.google.cloud.dataflow.sdk.TestUtils.LINES;
+import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
+
+/**
+ * Tests for First.
+ */
+@RunWith(JUnit4.class)
+@SuppressWarnings("serial")
+public class FirstTest
+    implements Serializable /* to allow anon inner classes */ {
+  // PRE: lines contains no duplicates.
+  void runTestFirst(final List<String> lines, int limit) {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of(lines))
+        .setCoder(StringUtf8Coder.of());
+
+    PCollection<String> output =
+        input.apply(First.<String>of(limit));
+
+    final int expectedSize = Math.min(limit, lines.size());
+
+    DataflowAssert.that(output)
+        .satisfies(new SerializableFunction<Iterable<String>, Void>() {
+            @Override
+            public Void apply(Iterable<String> actualIter) {
+              // Make sure actual is the right length, and is a
+              // subset of expected.
+              List<String> actual = new ArrayList<>();
+              for (String s : actualIter) {
+                actual.add(s);
+              }
+              assertEquals(expectedSize, actual.size());
+              Set<String> actualAsSet = new TreeSet<>(actual);
+              Set<String> linesAsSet = new TreeSet<>(lines);
+              assertEquals(actual.size(), actualAsSet.size());
+              assertEquals(lines.size(), linesAsSet.size());
+              assertTrue(linesAsSet.containsAll(actualAsSet));
+              return null;
+            }
+          });
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testFirst() {
+    runTestFirst(LINES, 0);
+    runTestFirst(LINES, LINES.size() / 2);
+    runTestFirst(LINES, LINES.size() * 2);
+  }
+
+  @Test
+  // Extra tests, not worth the time to run on the real service.
+  public void testFirstMore() {
+    runTestFirst(LINES, LINES.size() - 1);
+    runTestFirst(LINES, LINES.size());
+    runTestFirst(LINES, LINES.size() + 1);
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testFirstEmpty() {
+    runTestFirst(NO_LINES, 0);
+    runTestFirst(NO_LINES, 1);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
index 6fd2ce958a5fe..a4dcfc5763d9a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
@@ -16,19 +16,15 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
-import static com.google.cloud.dataflow.sdk.TestUtils.LINES;
-import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import com.google.api.client.util.Joiner;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.base.Preconditions;
 
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
@@ -37,10 +33,7 @@
 
 import java.util.ArrayList;
 import java.util.Collections;
-import java.util.HashSet;
 import java.util.List;
-import java.util.Set;
-import java.util.TreeSet;
 
 /**
  * Tests for Sample transform.
@@ -183,73 +176,4 @@ public void testSampleMultiplicity() {
         .satisfies(new VerifyCorrectSample<>(6, REPEATED_DATA));
     p.run();
   }
-
-  private static class VerifyAnySample implements SerializableFunction<Iterable<String>, Void> {
-    private final List<String> lines;
-    private final int limit;
-    private VerifyAnySample(List<String> lines, int limit) {
-      this.lines = lines;
-      this.limit = limit;
-    }
-
-    @Override
-    public Void apply(Iterable<String> actualIter) {
-      final int expectedSize = Math.min(limit, lines.size());
-
-      // Make sure actual is the right length, and is a
-      // subset of expected.
-      List<String> actual = new ArrayList<>();
-      for (String s : actualIter) {
-        actual.add(s);
-      }
-      assertEquals(expectedSize, actual.size());
-      Set<String> actualAsSet = new TreeSet<>(actual);
-      Set<String> linesAsSet = new TreeSet<>(lines);
-      assertEquals(actual.size(), actualAsSet.size());
-      assertEquals(lines.size(), linesAsSet.size());
-      assertTrue(linesAsSet.containsAll(actualAsSet));
-      return null;
-    }
-  }
-
-  void runPickAnyTest(final List<String> lines, int limit) {
-    Preconditions.checkArgument(new HashSet<String>(lines).size() == lines.size(),
-        "Duplicates are unsupported.");
-    Pipeline p = TestPipeline.create();
-
-    PCollection<String> input = p.apply(Create.of(lines))
-        .setCoder(StringUtf8Coder.of());
-
-    PCollection<String> output =
-        input.apply(Sample.<String>any(limit));
-
-
-    DataflowAssert.that(output)
-        .satisfies(new VerifyAnySample(lines, limit));
-
-    p.run();
-  }
-
-  @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
-  public void testPickAny() {
-    runPickAnyTest(LINES, 0);
-    runPickAnyTest(LINES, LINES.size() / 2);
-    runPickAnyTest(LINES, LINES.size() * 2);
-  }
-
-  @Test
-  // Extra tests, not worth the time to run on the real service.
-  public void testPickAnyMore() {
-    runPickAnyTest(LINES, LINES.size() - 1);
-    runPickAnyTest(LINES, LINES.size());
-    runPickAnyTest(LINES, LINES.size() + 1);
-  }
-
-  @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
-  public void testPickAnyWhenEmpty() {
-    runPickAnyTest(NO_LINES, 0);
-    runPickAnyTest(NO_LINES, 1);
-  }
 }

From 6c45a6e7e96f7c6b018e4be3a210ddf9a961ec75 Mon Sep 17 00:00:00 2001
From: chambers <chambers@google.com>
Date: Sun, 29 Mar 2015 10:36:35 -0700
Subject: [PATCH 0329/1541] Enable some empty Flatten tests to run on the
 Dataflow service.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89815713
---
 .../google/cloud/dataflow/sdk/transforms/FlattenTest.java  | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
index 44b87521c1eb2..86f71a82565c8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
@@ -87,9 +87,8 @@ public void testFlattenPCollectionListThenParDo() {
     p.run();
   }
 
-  // TODO: re-enable running this test on the service once empty flattens
-  // followed by ParDos work properly.
   @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testFlattenPCollectionListEmpty() {
     Pipeline p = TestPipeline.create();
 
@@ -129,9 +128,7 @@ public void processElement(ProcessContext c) {
   }
 
   @Test
-  // TODO: Enable this test to run on the Dataflow service when it is
-  // correctly implemented.
-  // @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testFlattenPCollectionListEmptyThenParDo() {
 
     Pipeline p = TestPipeline.create();

From 526d0e9f61f3c48ddb0f3d8f317ce4953eb56aa9 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Sun, 29 Mar 2015 13:57:36 -0700
Subject: [PATCH 0330/1541] Extract a DefaultTrigger and TriggerExecutor from
 GroupAlsoByWindow.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89821065
---
 .../dataflow/sdk/util/AbstractWindowSet.java  |  80 ++-------
 .../dataflow/sdk/util/BufferingWindowSet.java |  19 +--
 .../dataflow/sdk/util/CombiningWindowSet.java |  32 +---
 .../dataflow/sdk/util/DefaultTrigger.java     |  49 ++++++
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |   1 -
 .../sdk/util/GroupAlsoByWindowsDoFn.java      | 159 ++++++++++--------
 .../sdk/util/PartitionBufferingWindowSet.java |  13 +-
 .../util/StreamingGroupAlsoByWindowsDoFn.java |  97 +++++------
 .../cloud/dataflow/sdk/util/Trigger.java      |  97 +++++++++++
 .../dataflow/sdk/util/TriggerExecutor.java    | 148 ++++++++++++++++
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  12 ++
 11 files changed, 469 insertions(+), 238 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
index 2ffc6c8f434cf..99e95ea77feaf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
@@ -20,80 +20,28 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
 
-import java.util.Arrays;
 import java.util.Collection;
 
 /**
  * Abstract class representing a set of active windows for a key.
  */
 abstract class AbstractWindowSet<K, VI, VO, W extends BoundedWindow> {
-  /**
-   * Hook for determining how to keep track of active windows and when they
-   * should be marked as complete.
-   */
-  interface ActiveWindowManager<W> {
-    /**
-     * Notes that a window has been added to the active set.
-     *
-     * <p> The given window must not already be active.
-     */
-    void addWindow(W window) throws Exception;
-
-    /**
-     * Notes that a window has been explicitly removed from the active set.
-     *
-     * <p> The given window must currently be active.
-     *
-     * <p> Windows are implicitly removed from the active set when they are
-     * complete, and this method will not be called.  This method is called when
-     * a window is merged into another and thus is no longer active.
-     */
-    void removeWindow(W window) throws Exception;
-  }
-
-  /**
-   * Wrapper around AbstractWindowSet that provides the MergeContext interface.
-   */
-  static class WindowMergeContext<T, W extends BoundedWindow>
-      extends WindowFn<T, W>.MergeContext {
-    private final AbstractWindowSet<?, ?, ?, W> windowSet;
-
-    @SuppressWarnings("unchecked")
-    public WindowMergeContext(
-        AbstractWindowSet<?, ?, ?, W> windowSet,
-        WindowFn<?, W> windowFn) {
-      ((WindowFn<T, W>) windowFn).super();
-      this.windowSet = windowSet;
-    }
-
-    @Override public Collection<W> windows() {
-      return windowSet.windows();
-    }
-
-    @Override public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
-      windowSet.merge(toBeMerged, mergeResult);
-    }
-  }
-
   protected final K key;
   protected final WindowFn<?, W> windowFn;
   protected final Coder<VI> inputCoder;
-  protected final DoFn<?, KV<K, VO>>.ProcessContext context;
-  protected final ActiveWindowManager<W> activeWindowManager;
+  protected final DoFn<?, ?>.ProcessContext context;
 
   protected AbstractWindowSet(
       K key,
       WindowFn<?, W> windowFn,
       Coder<VI> inputCoder,
-      DoFn<?, KV<K, VO>>.ProcessContext context,
-      ActiveWindowManager<W> activeWindowManager) {
+      DoFn<?, ?>.ProcessContext context) {
     this.key = key;
     this.windowFn = windowFn;
     this.inputCoder = inputCoder;
     this.context = context;
-    this.activeWindowManager = activeWindowManager;
   }
 
   /**
@@ -115,7 +63,7 @@ protected AbstractWindowSet(
    * If not, adds the window to the set first, then puts the element
    * in the window.
    */
-  protected abstract void put(W window, VI value) throws Exception;
+  protected abstract WindowStatus put(W window, VI value) throws Exception;
 
   /**
    * Removes the given window from the set.
@@ -135,7 +83,7 @@ protected AbstractWindowSet(
    * and disjoint from the {@code toBeMerged} set of previous calls
    * to {@code merge}.
    *
-   * <p> {@code mergeResult} must either not be in {@link @windows} or be in
+   * <p> {@code mergeResult} must either not be in {@link #windows} or be in
    * {@code toBeMerged}.
    *
    * <p> {@code AbstractWindowSet} subclasses may throw
@@ -153,20 +101,12 @@ protected AbstractWindowSet(
    */
   protected abstract boolean contains(W window);
 
-  /**
-   * Marks the window as complete, causing its elements to be emitted.
-   */
-  public void markCompleted(W window) throws Exception {
-    VO value = finalValue(window);
-    remove(window);
-    context.windowingInternals().outputWindowedValue(
-        KV.of(key, value),
-        window.maxTimestamp(),
-        Arrays.asList(window));
-  }
-
   /**
    * Hook for WindowSets to take action before they are deleted.
    */
-  protected void flush() throws Exception {}
+  protected void persist() throws Exception {}
+
+  public K getKey() {
+    return key;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
index 3b3ea8427430c..4a940437f3713 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
@@ -24,8 +24,8 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.cloud.dataflow.sdk.values.KV;
 
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -73,9 +73,8 @@ protected BufferingWindowSet(
       K key,
       WindowFn<?, W> windowFn,
       Coder<V> inputCoder,
-      DoFn<?, KV<K, Iterable<V>>>.ProcessContext context,
-      ActiveWindowManager<W> activeWindowManager) throws Exception {
-    super(key, windowFn, inputCoder, context, activeWindowManager);
+      DoFn<?, ?>.ProcessContext context) throws Exception {
+    super(key, windowFn, inputCoder, context);
 
     mergeTree = emptyIfNull(
         context.keyedState().lookup(Arrays.asList(mergeTreeTag))
@@ -85,14 +84,17 @@ protected BufferingWindowSet(
   }
 
   @Override
-  public void put(W window, V value) throws Exception {
+  public WindowStatus put(W window, V value) throws Exception {
     context.windowingInternals().writeToTagList(
         bufferTag(window, windowFn.windowCoder(), inputCoder),
         value,
         context.timestamp());
+
     if (!mergeTree.containsKey(window)) {
       mergeTree.put(window, new HashSet<W>());
-      activeWindowManager.addWindow(window);
+      return WindowStatus.NEW;
+    } else {
+      return WindowStatus.EXISTING;
     }
   }
 
@@ -106,7 +108,6 @@ public void remove(W window) throws Exception {
     context.windowingInternals().deleteTagList(
         bufferTag(window, windowFn.windowCoder(), inputCoder));
     mergeTree.remove(window);
-    activeWindowManager.removeWindow(window);
   }
 
   @Override
@@ -122,10 +123,8 @@ public void merge(Collection<W> otherWindows, W newWindow) throws Exception {
       subWindows.addAll(mergeTree.get(other));
       subWindows.add(other);
       mergeTree.remove(other);
-      activeWindowManager.removeWindow(other);
     }
     mergeTree.put(newWindow, subWindows);
-    activeWindowManager.addWindow(newWindow);
   }
 
   @Override
@@ -171,7 +170,7 @@ protected Iterable<V> finalValue(W window) throws Exception {
   }
 
   @Override
-  public void flush() throws Exception {
+  public void persist() throws Exception {
     if (!mergeTree.equals(originalMergeTree)) {
       context.keyedState().store(mergeTreeTag, mergeTree);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
index b910b08080b5a..0cd2ba55a029b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.collect.Iterators;
@@ -63,9 +64,8 @@ protected CombiningWindowSet(
       KeyedCombineFn<K, VI, VA, VO> combineFn,
       Coder<K> keyCoder,
       Coder<VI> inputValueCoder,
-      DoFn<?, KV<K, VO>>.ProcessContext context,
-      ActiveWindowManager<W> activeWindowManager) throws Exception {
-    super(key, windowFn, inputValueCoder, context, activeWindowManager);
+      DoFn<?, KV<K, VO>>.ProcessContext context) throws Exception {
+    super(key, windowFn, inputValueCoder, context);
     this.combineFn = combineFn;
     liveWindows = new HashSet<W>();
     Iterators.addAll(liveWindows,
@@ -77,19 +77,6 @@ protected CombiningWindowSet(
     accumulatorCoder = combineFn.getAccumulatorCoder(coderRegistry, keyCoder, inputValueCoder);
   }
 
-  protected static <K, VI, VA, VO, W extends BoundedWindow> CombiningWindowSet<K, VI, VA, VO, W>
-  create(
-      K key,
-      WindowFn<?, W> windowFn,
-      KeyedCombineFn<K, VI, VA, VO> combineFn,
-      Coder<K> keyCoder,
-      Coder<VI> inputValueCoder,
-      DoFn<?, KV<K, VO>>.ProcessContext context,
-      ActiveWindowManager<W> activeWindowManager) throws Exception {
-    return new CombiningWindowSet<K, VI, VA, VO, W>(
-        key, windowFn, combineFn, keyCoder, inputValueCoder, context, activeWindowManager);
-  }
-
   @Override
   protected Collection<W> windows() {
     return Collections.unmodifiableSet(liveWindows);
@@ -103,19 +90,21 @@ protected VO finalValue(W window) throws Exception {
   }
 
   @Override
-  protected void put(W window, VI value) throws Exception {
+  protected WindowStatus put(W window, VI value) throws Exception {
     VA va = context.keyedState().lookup(accumulatorTag(window));
+    WindowStatus status = WindowStatus.EXISTING;
     if (va == null) {
+      status = WindowStatus.NEW;
       va = combineFn.createAccumulator(key);
     }
     combineFn.addInput(key, va, value);
     store(window, va);
+    return status;
   }
 
   @Override
   protected void remove(W window) throws Exception {
     context.keyedState().remove(accumulatorTag(window));
-    activeWindowManager.removeWindow(window);
     liveWindowsModified = liveWindows.remove(window);
   }
 
@@ -142,10 +131,7 @@ private CodedTupleTag<VA> accumulatorTag(W window) throws Exception {
   private void store(W window, VA va) throws Exception {
     CodedTupleTag<VA> tag = accumulatorTag(window);
     context.keyedState().store(tag, va);
-    if (!contains(window)) {
-      activeWindowManager.addWindow(window);
-      liveWindowsModified = liveWindows.add(window);
-    }
+    liveWindowsModified = liveWindows.add(window);
   }
 
   @Override
@@ -162,7 +148,7 @@ private static <T> Iterable<T> emptyIfNull(Iterable<T> list) {
   }
 
   @Override
-  protected void flush() throws Exception {
+  protected void persist() throws Exception {
     if (liveWindowsModified) {
       context.keyedState().store(windowListTag, liveWindows);
       liveWindowsModified = false;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
new file mode 100644
index 0000000000000..9f59a5cdab5f7
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+
+/**
+ * A trigger that fires repeatedly when the watermark passes the end of the window.
+ *
+ * TODO: Split this up as we build the final trigger API.
+ *
+ * @param <W> The type of windows being triggered/encoded.
+ */
+public class DefaultTrigger<W extends BoundedWindow> implements Trigger<Object, W>{
+
+  @Override
+  public void onElement(
+      TriggerContext<W> c, Object value, W window, WindowStatus status) throws Exception {
+    c.setTimer(window, window.maxTimestamp());
+  }
+
+  @Override
+  public void onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) throws Exception {
+    for (W oldWindow : oldWindows) {
+      c.deleteTimer(oldWindow);
+    }
+
+    c.setTimer(newWindow, newWindow.maxTimestamp());
+  }
+
+  @Override
+  public void onTimer(TriggerContext<W> c, W window) throws Exception {
+    c.emitWindow(window);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 85e60adfbceb6..4633012daebbf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -148,7 +148,6 @@ public void startBundle() {
    * the current element.
    */
   public void processElement(WindowedValue<I> elem) {
-
     if (elem.getWindows().size() == 1
         || !RequiresWindowAccess.class.isAssignableFrom(fn.getClass())) {
       invokeProcessElement(elem);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 8c37f68d61690..1b4af5938c32a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -23,12 +23,15 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.TriggerExecutor.TimerManager;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
 
 import org.joda.time.Instant;
 
-import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
 import java.util.PriorityQueue;
 
 /**
@@ -60,10 +63,8 @@ public abstract class GroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
       return new GABWViaWindowSetDoFn<K, V, Iterable<V>, W>(windowFn) {
         @Override
         AbstractWindowSet<K, V, Iterable<V>, W> createWindowSet(K key,
-            DoFn<KV<K, Iterable<WindowedValue<V>>>, KV<K, Iterable<V>>>.ProcessContext context,
-            BatchActiveWindowManager<W> activeWindowManager) throws Exception {
-          return  new BufferingWindowSet<K, V, W>(key, windowFn, inputCoder,
-              context, activeWindowManager);
+            DoFn<?, KV<K, Iterable<V>>>.ProcessContext context) throws Exception {
+          return new BufferingWindowSet<K, V, W>(key, windowFn, inputCoder, context);
         }
       };
     }
@@ -83,11 +84,8 @@ AbstractWindowSet<K, V, Iterable<V>, W> createWindowSet(K key,
       @Override
       AbstractWindowSet<K, VI, VO, W> createWindowSet(
           K key,
-          DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>.ProcessContext context,
-          BatchActiveWindowManager<W> activeWindowManager) throws Exception {
-        return CombiningWindowSet.create(
-            key, windowFn, combineFn, keyCoder, inputCoder,
-            context, activeWindowManager);
+          DoFn<?, KV<K, VO>>.ProcessContext context) throws Exception {
+        return new CombiningWindowSet<>(key, windowFn, combineFn, keyCoder, inputCoder, context);
       }
     };
   }
@@ -104,102 +102,125 @@ public GABWViaWindowSetDoFn(WindowFn<?, W> windowFn) {
     }
 
     abstract AbstractWindowSet<K, VI, VO, W> createWindowSet(
-        K key,
-        DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>.ProcessContext context,
-        BatchActiveWindowManager<W> activeWindowManager)
+        K key, DoFn<?, KV<K, VO>>.ProcessContext context)
         throws Exception;
 
     @Override
     public void processElement(
         DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>.ProcessContext c) throws Exception {
       K key = c.element().getKey();
-      BatchActiveWindowManager<W> activeWindowManager = new BatchActiveWindowManager<>();
-      AbstractWindowSet<K, VI, ?, W> windowSet = createWindowSet(key, c, activeWindowManager);
+      AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(key, c);
+
+      BatchTimerManager timerManager = new BatchTimerManager();
+      TriggerExecutor<K, VI, VO, W> triggerExecutor = new TriggerExecutor<>(
+          windowFn, timerManager,
+          new DefaultTrigger<W>(),
+          c.windowingInternals(), windowSet);
 
       for (WindowedValue<VI> e : c.element().getValue()) {
-        for (BoundedWindow window : e.getWindows()) {
-          @SuppressWarnings("unchecked")
-          W w = (W) window;
-          windowSet.put(w, e.getValue());
-        }
-        windowFn.mergeWindows(
-            new AbstractWindowSet.WindowMergeContext<Object, W>(windowSet, windowFn));
+        // First, handle anything that needs to happen for this element
+        triggerExecutor.onElement(e.getValue(), e.getWindows());
 
-        maybeOutputWindows(activeWindowManager, windowSet, e.getTimestamp());
+        // Then, since elements are sorted by their timestamp, advance the watermark and fire any
+        // timers that need to be fired.
+        advanceWatermark(timerManager, triggerExecutor, e.getTimestamp());
       }
 
-      maybeOutputWindows(activeWindowManager, windowSet, null);
+      // Finish any pending windows by advance the watermark to infinity.
+      advanceWatermark(timerManager, triggerExecutor, new Instant(Long.MAX_VALUE));
 
-      windowSet.flush();
+      windowSet.persist();
     }
 
-    /**
-     * Outputs any windows that are complete, with their corresponding elemeents.
-     * If there are potentially complete windows, try merging windows first.
-     */
-    private void maybeOutputWindows(
-        BatchActiveWindowManager<W> activeWindowManager,
-        AbstractWindowSet<?, ?, ?, W> windowSet,
-        Instant nextTimestamp) throws Exception {
-      if (activeWindowManager.hasMoreWindows()
-          && (nextTimestamp == null
-              || activeWindowManager.nextTimestamp().isBefore(nextTimestamp))) {
-        // There is at least one window ready to emit.  Merge now in case that window should be
-        // merged into a not yet completed one.
-        windowFn.mergeWindows(
-           new AbstractWindowSet.WindowMergeContext<Object, W>(windowSet, windowFn));
+    private void advanceWatermark(BatchTimerManager timerManager,
+        TriggerExecutor<?, ?, ?, ?> triggerExecutor, Instant instant) throws Exception {
+      while (timerManager.readyToFire(instant)) {
+        BatchTimer tagToFire = timerManager.tagToFire();
+        triggerExecutor.onTimer(tagToFire.tag);
       }
+    }
+  }
+
+  private static class BatchTimer implements Comparable<BatchTimer> {
+
+    private final String tag;
+    private final Instant time;
+
+    public BatchTimer(String tag, Instant time) {
+      this.tag = tag;
+      this.time = time;
+    }
+
+    @Override
+    public String toString() {
+      return time + ": " + tag;
+    }
 
-      while (activeWindowManager.hasMoreWindows()
-          && (nextTimestamp == null
-              || activeWindowManager.nextTimestamp().isBefore(nextTimestamp))) {
-        W window = activeWindowManager.getWindow();
-        Preconditions.checkState(windowSet.contains(window));
-        windowSet.markCompleted(window);
+    @Override
+    public int compareTo(BatchTimer o) {
+      return time.compareTo(o.time);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(time, tag);
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (other instanceof BatchTimer) {
+        BatchTimer that = (BatchTimer) other;
+        return Objects.equals(this.time, that.time)
+            && Objects.equals(this.tag, that.tag);
       }
+      return false;
     }
   }
 
-  private static class BatchActiveWindowManager<W extends BoundedWindow>
-      implements AbstractWindowSet.ActiveWindowManager<W> {
+  private static class BatchTimerManager implements TimerManager {
+
     // Sort the windows by their end timestamps so that we can efficiently
     // ask for the next window that will be completed.
-    PriorityQueue<W> windows = new PriorityQueue<>(11, new Comparator<W>() {
-          @Override
-          public int compare(W w1, W w2) {
-            return w1.maxTimestamp().compareTo(w2.maxTimestamp());
-          }
-        });
+    private PriorityQueue<BatchTimer> timers = new PriorityQueue<>(11);
+    private Map<String, BatchTimer> tagToTimer = new HashMap<>();
 
     @Override
-    public void addWindow(W window) {
-      windows.add(window);
+    public void setTimer(String tag, Instant timestamp) {
+      BatchTimer newTimer = new BatchTimer(tag, timestamp);
+
+      BatchTimer oldTimer = tagToTimer.put(tag, newTimer);
+      if (oldTimer != null) {
+        timers.remove(oldTimer);
+      }
+      timers.add(newTimer);
     }
 
     @Override
-    public void removeWindow(W window) {
-      windows.remove(window);
+    public void deleteTimer(String tag) {
+      timers.remove(tagToTimer.get(tag));
+      tagToTimer.remove(tag);
     }
 
     /**
-     * Returns whether there are more windows.
+     * Determine if there if the next timer is ready to fire at the given timestamp.
      */
-    public boolean hasMoreWindows() {
-      return windows.peek() != null;
+    public boolean readyToFire(Instant timestamp) {
+      BatchTimer firstTimer = timers.peek();
+      return firstTimer != null && timestamp.isAfter(firstTimer.time);
     }
 
     /**
-     * Returns the timestamp of the next window.
+     * Get the tag to fire.
      */
-    public Instant nextTimestamp() {
-      return windows.peek().maxTimestamp();
+    public BatchTimer tagToFire() {
+      BatchTimer timer = timers.remove();
+      tagToTimer.remove(timer.tag);
+      return timer;
     }
 
-    /**
-     * Returns the next window.
-     */
-    public W getWindow() {
-      return windows.peek();
+    @Override
+    public String toString() {
+      return timers.toString();
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
index 3c0e99936925c..853754790028c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.KV;
 
@@ -41,25 +42,23 @@ class PartitionBufferingWindowSet<K, V, W extends BoundedWindow>
       K key,
       WindowFn<?, W> windowFn,
       Coder<V> inputCoder,
-      DoFn<?, KV<K, Iterable<V>>>.ProcessContext context,
-      ActiveWindowManager<W> activeWindowManager) {
-    super(key, windowFn, inputCoder, context, activeWindowManager);
+      DoFn<?, KV<K, Iterable<V>>>.ProcessContext context) {
+    super(key, windowFn, inputCoder, context);
   }
 
   @Override
-  public void put(W window, V value) throws Exception {
+  public WindowStatus put(W window, V value) throws Exception {
     context.windowingInternals().writeToTagList(
         bufferTag(window, windowFn.windowCoder(), inputCoder), value, context.timestamp());
     // Adds the window even if it is already present, relying on the streaming backend to
-    // de-deduplicate.
-    activeWindowManager.addWindow(window);
+    // de-duplicate.
+    return WindowStatus.UNKNOWN;
   }
 
   @Override
   public void remove(W window) throws Exception {
     context.windowingInternals().deleteTagList(
         bufferTag(window, windowFn.windowCoder(), inputCoder));
-    activeWindowManager.removeWindow(window);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 4f05e4c8fd892..41031288aa45a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -22,10 +22,11 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PartitioningWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.TriggerExecutor.TimerManager;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
 
-import java.io.IOException;
+import org.joda.time.Instant;
 
 /**
  * DoFn that merges windows and groups elements in those windows.
@@ -49,11 +50,10 @@ StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W> create(
     return new StreamingGABWViaWindowSetDoFn<K, VI, VO, W>(windowFn) {
       @Override
       AbstractWindowSet<K, VI, VO, W> createWindowSet(K key,
-          DoFn<TimerOrElement<KV<K, VI>>, KV<K, VO>>.ProcessContext context,
-          StreamingActiveWindowManager<W> activeWindowManager)
+          DoFn<TimerOrElement<KV<K, VI>>, KV<K, VO>>.ProcessContext context)
           throws Exception {
-        return CombiningWindowSet.create(
-            key, windowFn, combineFn, keyCoder, inputValueCoder, context, activeWindowManager);
+        return new CombiningWindowSet<>(
+            key, windowFn, combineFn, keyCoder, inputValueCoder, context);
       }
     };
   }
@@ -64,15 +64,14 @@ AbstractWindowSet<K, VI, VO, W> createWindowSet(K key,
     return new StreamingGABWViaWindowSetDoFn<K, VI, Iterable<VI>, W>(windowFn) {
       @Override
       AbstractWindowSet<K, VI, Iterable<VI>, W> createWindowSet(K key,
-          DoFn<TimerOrElement<KV<K, VI>>, KV<K, Iterable<VI>>>.ProcessContext context,
-          StreamingActiveWindowManager<W> activeWindowManager)
+          DoFn<TimerOrElement<KV<K, VI>>, KV<K, Iterable<VI>>>.ProcessContext context)
           throws Exception {
         if (windowFn instanceof PartitioningWindowFn) {
           return new PartitionBufferingWindowSet<K, VI, W>(
-            key, windowFn, inputValueCoder, context, activeWindowManager);
+            key, windowFn, inputValueCoder, context);
         } else {
           return new BufferingWindowSet<K, VI, W>(
-              key, windowFn, inputValueCoder, context, activeWindowManager);
+              key, windowFn, inputValueCoder, context);
         }
       }
     };
@@ -90,8 +89,7 @@ public StreamingGABWViaWindowSetDoFn(WindowFn<?, W> windowFn) {
 
     abstract AbstractWindowSet<K, VI, VO, W> createWindowSet(
         K key,
-        DoFn<TimerOrElement<KV<K, VI>>, KV<K, VO>>.ProcessContext context,
-        StreamingActiveWindowManager<W> activeWindowManager)
+        DoFn<TimerOrElement<KV<K, VI>>, KV<K, VO>>.ProcessContext context)
         throws Exception;
 
     @Override
@@ -100,67 +98,50 @@ public void processElement(ProcessContext context) throws Exception {
         KV<K, VI> element = context.element().element();
         K key = element.getKey();
         VI value = element.getValue();
-        AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(
-            key, context,
-            new StreamingActiveWindowManager<>(windowFn, context));
-
-        for (BoundedWindow window : context.windowingInternals().windows()) {
-          @SuppressWarnings("unchecked")
-          W w = (W) window;
-          windowSet.put(w, value);
-        }
-
-        windowSet.flush();
+        AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(key, context);
+        TriggerExecutor<K, VI, VO, W> executor = new TriggerExecutor<>(
+            windowFn,
+            new StreamingTimerManager(context),
+            new DefaultTrigger<W>(),
+            context.windowingInternals(),
+            windowSet);
+
+        executor.onElement(value, context.windowingInternals().windows());
+        windowSet.persist();
       } else {
         TimerOrElement<KV<K, VI>> timer = context.element();
-        AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(
-            (K) timer.key(),
-            context,
-            new StreamingActiveWindowManager<>(windowFn, context));
-
-        // Attempt to merge windows before emitting; that may remove the current window under
-        // consideration.
-        windowFn.mergeWindows(
-            new AbstractWindowSet.WindowMergeContext<Object, W>(windowSet, windowFn));
-
-        W window = WindowUtils.windowFromString(timer.tag(), windowFn.windowCoder());
-
-        if ((windowFn instanceof PartitioningWindowFn) || windowSet.contains(window)) {
-          Preconditions.checkState(!timer.timestamp().isBefore(window.maxTimestamp()));
-          windowSet.markCompleted(window);
-          windowSet.flush();
-        }
+        @SuppressWarnings("unchecked")
+        K key = (K) timer.key();
+        AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(key, context);
+        TriggerExecutor<K, VI, VO, W> executor = new TriggerExecutor<>(
+            windowFn,
+            new StreamingTimerManager(context),
+            new DefaultTrigger<W>(),
+            context.windowingInternals(),
+            windowSet);
+
+        executor.onTimer(timer.tag());
+        windowSet.persist();
       }
     }
   }
 
-  private static class StreamingActiveWindowManager<W extends BoundedWindow>
-      implements AbstractWindowSet.ActiveWindowManager<W> {
-    WindowFn<?, W> windowFn;
-    DoFn<?, ?>.ProcessContext context;
+  private static class StreamingTimerManager implements TimerManager {
 
-    StreamingActiveWindowManager(
-        WindowFn<?, W> windowFn,
-        DoFn<?, ?>.ProcessContext context) {
-      this.windowFn = windowFn;
+    private DoFn<?, ?>.ProcessContext context;
+
+    public StreamingTimerManager(DoFn<?, ?>.ProcessContext context) {
       this.context = context;
     }
 
     @Override
-    public void addWindow(W window) throws IOException {
-      context.windowingInternals().setTimer(
-          WindowUtils.windowToString(window, windowFn.windowCoder()), window.maxTimestamp());
+    public void setTimer(String timer, Instant timestamp) {
+      context.windowingInternals().setTimer(timer, timestamp);
     }
 
     @Override
-    public void removeWindow(W window) throws IOException {
-      if (windowFn instanceof PartitioningWindowFn) {
-        // For PartitioningWindowFn, each window triggers exactly one timer.
-        // And, timers are automatically deleted once they are fired.
-        return;
-      }
-      context.windowingInternals().deleteTimer(
-          WindowUtils.windowToString(window, windowFn.windowCoder()));
+    public void deleteTimer(String timer) {
+      context.windowingInternals().deleteTimer(timer);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
new file mode 100644
index 0000000000000..835c4d628fce5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+
+/**
+ * Interface to use for controlling when output for a specific key and window is triggered.
+ *
+ * TODO: Generalize this after extracting the current default trigger.
+ *
+ * @param <T> the element type that this trigger applies to
+ * @param <W> the window that this trigger applies to
+ */
+public interface Trigger<T, W extends BoundedWindow> {
+
+  /**
+   * Status of the element in the window.
+   */
+  public enum WindowStatus {
+    NEW,       // This element caused us to start actively managing the given window
+    EXISTING,  // This window was already under active management
+    UNKNOWN;   // The WindowSet doesn't track the windows actively being managed
+  }
+
+  /**
+   * Information is that is made available to triggers, eg., setting timers.
+   *
+   * TODO: Add support for processing time timers.
+   */
+  public interface TriggerContext<W extends BoundedWindow>  {
+
+    /**
+     * Set a timer to fire for the given window at the specified time.
+     *
+     * TODO: Support processing time
+     * TODO: Support per-trigger timers.
+     */
+    public void setTimer(W window, Instant timestamp) throws IOException;
+
+    /**
+     * Delete a timer that has been set for the specified window.
+     */
+    public void deleteTimer(W window) throws IOException;
+
+    /**
+     * Emit the given window.
+     * @param window
+     * @throws Exception
+     */
+    void emitWindow(W window) throws Exception;
+  }
+
+  /**
+   * Called immediately after an element is first incorporated into a window.
+   *
+   * @param c the context to interact with
+   * @param value the element that was incorporated
+   * @param window the window the element was assigned to
+   */
+  void onElement(TriggerContext<W> c, T value, W window, WindowStatus status) throws Exception;
+
+  /**
+   * Called immediately after windows have been merged.
+   *
+   * @param c the context to interact with
+   * @param oldWindows the windows that were merged
+   * @param newWindow the window that resulted from merging
+   */
+  void onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) throws Exception;
+
+  /**
+   * Called after a timer fires.
+   *
+   * @param c the context to interact with
+   * @param window the timer is being fired for
+   */
+  void onTimer(TriggerContext<W> c, W window) throws Exception;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
new file mode 100644
index 0000000000000..7a61102ad33eb
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.DoFn.WindowingInternals;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PartitioningWindowFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.Trigger.TriggerContext;
+import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+
+/**
+ * Manages the execution of a trigger.
+ *
+ * @param <K>
+ * @param <VI>
+ * @param <VO>
+ * @param <W> The type of windows this operates on.
+ */
+public class TriggerExecutor<K, VI, VO, W extends BoundedWindow> implements TriggerContext<W> {
+
+  private final WindowFn<Object, W> windowFn;
+  private final Trigger<Object, W> trigger;
+  private final WindowingInternals<?, KV<K, VO>> windowingInternals;
+  private final AbstractWindowSet<K, VI, VO, W> windowSet;
+  private final TimerManager timerManager;
+  private final MergeContext mergeContext;
+
+  /**
+   * Methods that the system must provide in order for us to implement triggers.
+   */
+  public interface TimerManager {
+    /**
+     * Writes out a timer to be fired when the watermark reaches the given
+     * timestamp.  Timers are identified by their name, and can be moved
+     * by calling {@code setTimer} again, or deleted with {@link #deleteTimer}.
+     */
+    void setTimer(String timer, Instant timestamp);
+
+    /**
+     * Deletes the given timer.
+     */
+    void deleteTimer(String timer);
+  }
+
+  public TriggerExecutor(
+      WindowFn<Object, W> windowFn,
+      TimerManager timerManager,
+      Trigger<Object, W> trigger,
+      WindowingInternals<?, KV<K, VO>> windowingInternals,
+      AbstractWindowSet<K, VI, VO, W> windowSet) {
+    this.windowFn = windowFn;
+    this.trigger = trigger;
+    this.windowingInternals = windowingInternals;
+    this.windowSet = windowSet;
+    this.timerManager = timerManager;
+    this.mergeContext = new MergeContext();
+  }
+
+  public void onElement(
+      VI value, Iterable<? extends BoundedWindow> windows) throws Exception {
+    for (BoundedWindow window : windows) {
+      @SuppressWarnings("unchecked")
+      W w = (W) window;
+      WindowStatus status = windowSet.put(w, value);
+
+      trigger.onElement(this, value, w, status);
+    }
+  }
+
+  public void onTimer(String timerTag) throws Exception {
+    // Attempt to merge windows before continuing; that may remove the current window from
+    // consideration.
+    windowFn.mergeWindows(mergeContext);
+
+    W window = WindowUtils.windowFromString(timerTag, windowFn.windowCoder());
+
+    // Make sure the window still exists before passing the timer to the trigger.
+
+    // The WindowSet used with PartitioningWindowFn doesn't support contains, but it will never
+    // merge windows in a way that causes the timer to no longer be applicable.
+    if ((windowFn instanceof PartitioningWindowFn) || windowSet.contains(window)) {
+      trigger.onTimer(this, window);
+    }
+  }
+
+  @Override
+  public void emitWindow(W window) throws Exception {
+    // Emit the (current) final values for the window
+    KV<K, VO> value = KV.of(windowSet.getKey(), windowSet.finalValue(window));
+
+    // Remove the window from management (assume it is "done")
+    windowSet.remove(window);
+
+    // Output the windowed value.
+    windowingInternals.outputWindowedValue(value, window.maxTimestamp(), Arrays.asList(window));
+  }
+
+  private class MergeContext extends WindowFn<Object, W>.MergeContext {
+
+    @SuppressWarnings("cast")
+    public MergeContext() {
+      ((WindowFn<Object, W>) windowFn).super();
+    }
+
+    @Override
+    public Collection<W> windows() {
+      return windowSet.windows();
+    }
+
+    @Override
+    public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
+      windowSet.merge(toBeMerged, mergeResult);
+      trigger.onMerge(TriggerExecutor.this, toBeMerged, mergeResult);
+    }
+  }
+
+  @Override
+  public void setTimer(W window, Instant timestamp) throws IOException {
+    timerManager.setTimer(WindowUtils.windowToString(window, windowFn.windowCoder()), timestamp);
+  }
+
+  @Override
+  public void deleteTimer(W window) throws IOException {
+    timerManager.deleteTimer(WindowUtils.windowToString(window, windowFn.windowCoder()));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index b881936694e04..e0ce66d5926d4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -231,6 +231,11 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
         new Instant(3),
         Arrays.asList(window(3, 13))));
 
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        TimerOrElement.<KV<String, String>>timer(
+            windowToString((IntervalWindow) window(0, 10), windowCoder),
+            new Instant(9), "k")));
+
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
             windowToString((IntervalWindow) window(0, 15), windowCoder),
@@ -293,6 +298,13 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
         new Instant(3),
         Arrays.asList(window(3, 13))));
 
+    // TODO: To simplify tests, create a timer manager that can sweep a watermark past some timers
+    // and fire them as appropriate. This would essentially be the batch timer context.
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        TimerOrElement.<KV<String, Long>>timer(
+            windowToString((IntervalWindow) window(0, 10), windowCoder),
+            new Instant(9), "k")));
+
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, Long>>timer(
             windowToString((IntervalWindow) window(0, 15), windowCoder),

From 29308bf0ad73203d98c1c17f56f1e7426c0db597 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 30 Mar 2015 11:10:54 -0700
Subject: [PATCH 0331/1541] Rollback of rollback of 'Rename First.of to
 Sample.any'

----Release Notes----
Rename of First.of to Sample.any

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89882401
---
 .../cloud/dataflow/sdk/transforms/First.java  | 106 ------------------
 .../cloud/dataflow/sdk/transforms/Sample.java |  87 ++++++++++++++
 .../sdk/runners/TransformTreeTest.java        |  22 ++--
 .../dataflow/sdk/transforms/FirstTest.java    | 105 -----------------
 .../dataflow/sdk/transforms/SampleTest.java   |  76 +++++++++++++
 5 files changed, 174 insertions(+), 222 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
deleted file mode 100644
index 5228d67891152..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/First.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-
-/**
- * {@code First<T>} takes a {@code PCollection<T>} and a limit, and
- * produces a new {@code PCollection<T>} containing up to limit
- * elements of the input {@code PCollection}.
- *
- * <p> If limit is less than or equal to the size of the input
- * {@code PCollection}, then all the input's elements will be selected.
- *
- * <p> All of the elements of the output {@code PCollection} should fit into
- * main memory of a single worker machine.  This operation does not
- * run in parallel.
- *
- * <p> Example of use:
- * <pre> {@code
- * PCollection<String> input = ...;
- * PCollection<String> output = input.apply(First.<String>of(100));
- * } </pre>
- *
- * @param <T> the type of the elements of the input and output
- * {@code PCollection}s
- */
-public class First<T> extends PTransform<PCollection<T>, PCollection<T>> {
-  private static final long serialVersionUID = 0;
-
-  /**
-   * Returns a {@code First<T>} {@code PTransform}.
-   *
-   * @param <T> the type of the elements of the input and output
-   * {@code PCollection}s
-   * @param limit the numer of elements to take from the input
-   */
-  public static <T> First<T> of(long limit) {
-    return new First<>(limit);
-  }
-
-  private final long limit;
-
-  /**
-   * Constructs a {@code First<T>} PTransform that, when applied,
-   * produces a new PCollection containing up to {@code limit}
-   * elements of its input {@code PCollection}.
-   */
-  private First(long limit) {
-    this.limit = limit;
-    if (limit < 0) {
-      throw new IllegalArgumentException(
-          "limit argument to First should be non-negative");
-    }
-  }
-
-  private static class CopyFirstDoFn<T> extends DoFn<Void, T> {
-    private static final long serialVersionUID = 0;
-
-    long limit;
-    final PCollectionView<Iterable<T>> iterableView;
-
-    public CopyFirstDoFn(long limit, PCollectionView<Iterable<T>> iterableView) {
-      this.limit = limit;
-      this.iterableView = iterableView;
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      for (T i : c.sideInput(iterableView)) {
-        if (limit-- <= 0) {
-          break;
-        }
-        c.output(i);
-      }
-    }
-  }
-
-  @Override
-  public PCollection<T> apply(PCollection<T> in) {
-    PCollectionView<Iterable<T>> iterableView = in.apply(View.<T>asIterable());
-    return
-        in.getPipeline()
-        .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
-        .apply(ParDo
-               .withSideInputs(iterableView)
-               .of(new CopyFirstDoFn<>(limit, iterableView)))
-        .setCoder(in.getCoder());
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
index 03a352c09cc9e..493bb2f87595d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
@@ -21,9 +21,12 @@
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.common.base.Preconditions;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -35,6 +38,33 @@
  * key in a {@code PCollection} of {@code KV}s.
  **/
 public class Sample {
+
+  /**
+   * {@code Sample#any(long)} takes a {@code PCollection<T>} and a limit, and
+   * produces a new {@code PCollection<T>} containing up to limit
+   * elements of the input {@code PCollection}.
+   *
+   * <p> If limit is less than or equal to the size of the input
+   * {@code PCollection}, then all the input's elements will be selected.
+   *
+   * <p> All of the elements of the output {@code PCollection} should fit into
+   * main memory of a single worker machine.  This operation does not
+   * run in parallel.
+   *
+   * <p> Example of use:
+   * <pre> {@code
+   * PCollection<String> input = ...;
+   * PCollection<String> output = input.apply(Sample.<String>any(100));
+   * } </pre>bla
+   *
+   * @param <T> the type of the elements of the input and output
+   * {@code PCollection}s
+   * @param limit the number of elements to take from the input
+   */
+  public static <T> PTransform<PCollection<T>, PCollection<T>> any(long limit) {
+    return new SampleAny<>(limit);
+  }
+
   /**
    * Returns a {@code PTransform} that takes a {@code PCollection<T>},
    * selects {@code sampleSize} elements, uniformly at random, and returns a
@@ -92,6 +122,63 @@ public class Sample {
 
   /////////////////////////////////////////////////////////////////////////////
 
+  /**
+   * A {@link PTransform} that takes a {@code PCollection<T>} and a limit, and
+   * produces a new {@code PCollection<T>} containing up to limit
+   * elements of the input {@code PCollection}.
+   */
+  public static class SampleAny<T> extends PTransform<PCollection<T>, PCollection<T>> {
+    private static final long serialVersionUID = 0;
+    private final long limit;
+
+    /**
+     * Constructs a {@code SampleAny<T>} PTransform that, when applied,
+     * produces a new PCollection containing up to {@code limit}
+     * elements of its input {@code PCollection}.
+     */
+    private SampleAny(long limit) {
+      Preconditions.checkArgument(limit >= 0, "Expected non-negative limit, received %s.", limit);
+      this.limit = limit;
+    }
+
+    @Override
+    public PCollection<T> apply(PCollection<T> in) {
+      PCollectionView<Iterable<T>> iterableView = in.apply(View.<T>asIterable());
+      return
+          in.getPipeline()
+          .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
+          .apply(ParDo
+                 .withSideInputs(iterableView)
+                 .of(new SampleAnyDoFn<>(limit, iterableView)))
+          .setCoder(in.getCoder());
+    }
+  }
+
+  /**
+   * A {@link DoFn} that returns up to limit elements from the side input PCollection.
+   */
+  private static class SampleAnyDoFn<T> extends DoFn<Void, T> {
+    private static final long serialVersionUID = 0;
+
+    long limit;
+    final PCollectionView<Iterable<T>> iterableView;
+
+    public SampleAnyDoFn(long limit, PCollectionView<Iterable<T>> iterableView) {
+      this.limit = limit;
+      this.iterableView = iterableView;
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      for (T i : c.sideInput(iterableView)) {
+        if (limit-- <= 0) {
+          break;
+        }
+        c.output(i);
+      }
+    }
+  }
+
   /**
    * {@code CombineFn} that computes a fixed-size sample of a
    * collection of values.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
index 5a5a0678bf97f..a054fa5717294 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
@@ -25,8 +25,8 @@
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.transforms.Count;
 import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.First;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.Sample;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -53,7 +53,7 @@ public class TransformTreeTest {
   enum TransformsSeen {
     READ,
     WRITE,
-    FIRST
+    SAMPLE_ANY
   }
 
   /**
@@ -99,14 +99,14 @@ protected Coder<?> getDefaultOutputCoder() {
     }
   }
 
-  // Builds a pipeline containing a composite operation (First), then
+  // Builds a pipeline containing a composite operation (Pick), then
   // visits the nodes and verifies that the hierarchy was captured.
   @Test
   public void testCompositeCapture() throws Exception {
     Pipeline p = DirectPipeline.createForTest();
 
     p.apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/object"))
-        .apply(First.<String>of(10))
+        .apply(Sample.<String>any(10))
         .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/object"));
 
     final EnumSet<TransformsSeen> visited =
@@ -118,8 +118,8 @@ public void testCompositeCapture() throws Exception {
       @Override
       public void enterCompositeTransform(TransformTreeNode node) {
         PTransform<?, ?> transform = node.getTransform();
-        if (transform instanceof First) {
-          Assert.assertTrue(visited.add(TransformsSeen.FIRST));
+        if (transform instanceof Sample.SampleAny) {
+          Assert.assertTrue(visited.add(TransformsSeen.SAMPLE_ANY));
           Assert.assertNotNull(node.getEnclosingNode());
           Assert.assertTrue(node.isCompositeNode());
         }
@@ -130,16 +130,16 @@ public void enterCompositeTransform(TransformTreeNode node) {
       @Override
       public void leaveCompositeTransform(TransformTreeNode node) {
         PTransform<?, ?> transform = node.getTransform();
-        if (transform instanceof First) {
-          Assert.assertTrue(left.add(TransformsSeen.FIRST));
+        if (transform instanceof Sample.SampleAny) {
+          Assert.assertTrue(left.add(TransformsSeen.SAMPLE_ANY));
         }
       }
 
       @Override
       public void visitTransform(TransformTreeNode node) {
         PTransform<?, ?> transform = node.getTransform();
-        // First is a composite, should not be visited here.
-        Assert.assertThat(transform, not(instanceOf(First.class)));
+        // Pick is a composite, should not be visited here.
+        Assert.assertThat(transform, not(instanceOf(Sample.SampleAny.class)));
         if (transform instanceof TextIO.Read.Bound) {
           Assert.assertTrue(visited.add(TransformsSeen.READ));
         } else if (transform instanceof TextIO.Write.Bound) {
@@ -153,7 +153,7 @@ public void visitValue(PValue value, TransformTreeNode producer) {
     });
 
     Assert.assertTrue(visited.equals(EnumSet.allOf(TransformsSeen.class)));
-    Assert.assertTrue(left.equals(EnumSet.of(TransformsSeen.FIRST)));
+    Assert.assertTrue(left.equals(EnumSet.of(TransformsSeen.SAMPLE_ANY)));
   }
 
   @Test(expected = IllegalStateException.class)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java
deleted file mode 100644
index 3e61760144164..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FirstTest.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import static com.google.cloud.dataflow.sdk.TestUtils.LINES;
-import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Set;
-import java.util.TreeSet;
-
-/**
- * Tests for First.
- */
-@RunWith(JUnit4.class)
-@SuppressWarnings("serial")
-public class FirstTest
-    implements Serializable /* to allow anon inner classes */ {
-  // PRE: lines contains no duplicates.
-  void runTestFirst(final List<String> lines, int limit) {
-    Pipeline p = TestPipeline.create();
-
-    PCollection<String> input = p.apply(Create.of(lines))
-        .setCoder(StringUtf8Coder.of());
-
-    PCollection<String> output =
-        input.apply(First.<String>of(limit));
-
-    final int expectedSize = Math.min(limit, lines.size());
-
-    DataflowAssert.that(output)
-        .satisfies(new SerializableFunction<Iterable<String>, Void>() {
-            @Override
-            public Void apply(Iterable<String> actualIter) {
-              // Make sure actual is the right length, and is a
-              // subset of expected.
-              List<String> actual = new ArrayList<>();
-              for (String s : actualIter) {
-                actual.add(s);
-              }
-              assertEquals(expectedSize, actual.size());
-              Set<String> actualAsSet = new TreeSet<>(actual);
-              Set<String> linesAsSet = new TreeSet<>(lines);
-              assertEquals(actual.size(), actualAsSet.size());
-              assertEquals(lines.size(), linesAsSet.size());
-              assertTrue(linesAsSet.containsAll(actualAsSet));
-              return null;
-            }
-          });
-
-    p.run();
-  }
-
-  @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
-  public void testFirst() {
-    runTestFirst(LINES, 0);
-    runTestFirst(LINES, LINES.size() / 2);
-    runTestFirst(LINES, LINES.size() * 2);
-  }
-
-  @Test
-  // Extra tests, not worth the time to run on the real service.
-  public void testFirstMore() {
-    runTestFirst(LINES, LINES.size() - 1);
-    runTestFirst(LINES, LINES.size());
-    runTestFirst(LINES, LINES.size() + 1);
-  }
-
-  @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
-  public void testFirstEmpty() {
-    runTestFirst(NO_LINES, 0);
-    runTestFirst(NO_LINES, 1);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
index a4dcfc5763d9a..6fd2ce958a5fe 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
@@ -16,15 +16,19 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import static com.google.cloud.dataflow.sdk.TestUtils.LINES;
+import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import com.google.api.client.util.Joiner;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.Preconditions;
 
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
@@ -33,7 +37,10 @@
 
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
 
 /**
  * Tests for Sample transform.
@@ -176,4 +183,73 @@ public void testSampleMultiplicity() {
         .satisfies(new VerifyCorrectSample<>(6, REPEATED_DATA));
     p.run();
   }
+
+  private static class VerifyAnySample implements SerializableFunction<Iterable<String>, Void> {
+    private final List<String> lines;
+    private final int limit;
+    private VerifyAnySample(List<String> lines, int limit) {
+      this.lines = lines;
+      this.limit = limit;
+    }
+
+    @Override
+    public Void apply(Iterable<String> actualIter) {
+      final int expectedSize = Math.min(limit, lines.size());
+
+      // Make sure actual is the right length, and is a
+      // subset of expected.
+      List<String> actual = new ArrayList<>();
+      for (String s : actualIter) {
+        actual.add(s);
+      }
+      assertEquals(expectedSize, actual.size());
+      Set<String> actualAsSet = new TreeSet<>(actual);
+      Set<String> linesAsSet = new TreeSet<>(lines);
+      assertEquals(actual.size(), actualAsSet.size());
+      assertEquals(lines.size(), linesAsSet.size());
+      assertTrue(linesAsSet.containsAll(actualAsSet));
+      return null;
+    }
+  }
+
+  void runPickAnyTest(final List<String> lines, int limit) {
+    Preconditions.checkArgument(new HashSet<String>(lines).size() == lines.size(),
+        "Duplicates are unsupported.");
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of(lines))
+        .setCoder(StringUtf8Coder.of());
+
+    PCollection<String> output =
+        input.apply(Sample.<String>any(limit));
+
+
+    DataflowAssert.that(output)
+        .satisfies(new VerifyAnySample(lines, limit));
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testPickAny() {
+    runPickAnyTest(LINES, 0);
+    runPickAnyTest(LINES, LINES.size() / 2);
+    runPickAnyTest(LINES, LINES.size() * 2);
+  }
+
+  @Test
+  // Extra tests, not worth the time to run on the real service.
+  public void testPickAnyMore() {
+    runPickAnyTest(LINES, LINES.size() - 1);
+    runPickAnyTest(LINES, LINES.size());
+    runPickAnyTest(LINES, LINES.size() + 1);
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testPickAnyWhenEmpty() {
+    runPickAnyTest(NO_LINES, 0);
+    runPickAnyTest(NO_LINES, 1);
+  }
 }

From fa13d6f0efec28d2ef3148080b9dad3f7e7c5f42 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 30 Mar 2015 14:43:45 -0700
Subject: [PATCH 0332/1541] Change RateLimiting to IntraBundleParallelization,
 make RateLimiting use the same common ExecutorService instead of creating its
 own.

----Release Notes----
Rename RateLimiting to IntraBundleParallelization

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89903571
---
 .../dataflow/examples/PubsubFileInjector.java |   4 +-
 .../IntraBundleParallelization.java           | 349 ++++++++++++++++++
 .../dataflow/sdk/transforms/RateLimiting.java | 349 ------------------
 ...va => IntraBundleParallelizationTest.java} | 125 +++----
 4 files changed, 401 insertions(+), 426 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/{RateLimitingTest.java => IntraBundleParallelizationTest.java} (62%)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java b/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
index 5a2f9ad4553e9..537948834551a 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
@@ -27,7 +27,7 @@
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.options.Validation;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.RateLimiting;
+import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
 import com.google.cloud.dataflow.sdk.util.Transport;
 
 import java.io.IOException;
@@ -99,7 +99,7 @@ public static void main(String[] args) {
 
     pipeline
         .apply(TextIO.Read.from(options.getInput()))
-        .apply(RateLimiting.perWorker(new Publish(options.getOutputTopic()))
+        .apply(IntraBundleParallelization.of(new Publish(options.getOutputTopic()))
             .withMaxParallelism(20));
 
     pipeline.run();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
new file mode 100644
index 0000000000000..5579d5f7e8eed
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
@@ -0,0 +1,349 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.api.client.util.Throwables;
+import com.google.cloud.dataflow.sdk.options.GcsOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Preconditions;
+import com.google.common.reflect.TypeToken;
+
+import org.joda.time.Instant;
+
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.atomic.AtomicReference;
+
+/**
+ * Provides multi-threading of {@link DoFn DoFns}, using threaded execution to
+ * process multiple elements concurrently within a bundle.
+ *
+ * <p> Note, that each Dataflow worker will already process multiple bundles
+ * concurrently and usage of this class is meant only for cases where processing
+ * elements from within a bundle is limited by blocking calls.
+ *
+ * <p> CPU intensive or IO intensive tasks are in general a poor fit for parallelization.
+ * This is because a limited resource which is already maximally utilized, does not
+ * benefit from sub-division of work. The parallelization will increase the amount of time
+ * to process each element yet the throughput for processing will remain relatively the same.
+ * For example, if the local disk (an IO resource) has a maximum write rate of 10 MiB/s,
+ * and processing each element requires to write 20 MiBs to disk, then processing one element
+ * to disk will take 2 seconds. Yet processing 3 elements concurrently (each getting an equal
+ * share of the maximum write rate) will take at least 6 seconds to complete (there is additional
+ * overhead in the extra parallelization).
+ *
+ * <p> To parallelize a DoFn to 10 threads:
+ * <pre>{@code
+ * PCollection<T> data = ...;
+ * data.apply(
+ *   IntraBundleParallelization.of(new MyDoFn())
+ *                             .withMaxParallelism(10)));
+ * }</pre>
+ *
+ * <p> An uncaught exception from the wrapped DoFn will result in the exception
+ * being rethrown in later calls to {@link MultiThreadedIntraBundleProcessingDoFn#processElement}
+ * or a call to {@link MultiThreadedIntraBundleProcessingDoFn#finishBundle}.
+ */
+public class IntraBundleParallelization {
+  /**
+   * Creates a {@link IntraBundleParallelization} {@link PTransform} for the given
+   * {@link DoFn} which processes elements using multiple threads.
+   *
+   * <p> Note that the specified {@code doFn} needs to be thread safe.
+   */
+  public static <I, O> Bound<I, O> of(DoFn<I, O> doFn) {
+    return new Unbound().of(doFn);
+  }
+
+  /**
+   * Creates a {@link IntraBundleParallelization} {@link PTransform} with the specified
+   * maximum concurrency level.
+   */
+  public static Unbound withMaxParallelism(int maxParallelism) {
+    return new Unbound().withMaxParallelism(maxParallelism);
+  }
+
+  /**
+   * An incomplete {@code IntraBundleParallelization} transform, with unbound input/output types.
+   *
+   * <p> Before being applied, {@link IntraBundleParallelization.Unbound#of} must be
+   * invoked to specify the {@link DoFn} to invoke, which will also
+   * bind the input/output types of this {@code PTransform}.
+   */
+  public static class Unbound {
+    private final int maxParallelism;
+
+    Unbound() {
+      this(DEFAULT_MAX_PARALLELISM);
+    }
+
+    Unbound(int maxParallelism) {
+      Preconditions.checkArgument(maxParallelism > 0,
+          "Expected parallelism factor greater than zero, received %s.", maxParallelism);
+      this.maxParallelism = maxParallelism;
+    }
+
+    /**
+     * Returns a new {@link IntraBundleParallelization} {@link PTransform} like this one
+     * with the specified maximum concurrency level.
+     */
+    public Unbound withMaxParallelism(int maxParallelism) {
+      return new Unbound(maxParallelism);
+    }
+
+    /**
+     * Returns a new {@link IntraBundleParallelization} {@link PTransform} like this one
+     * with the specified {@link DoFn}.
+     *
+     * <p> Note that the specified {@code doFn} needs to be thread safe.
+     */
+    public <I, O> Bound<I, O> of(DoFn<I, O> doFn) {
+      return new Bound<>(doFn, maxParallelism);
+    }
+  }
+
+  /**
+   * A {@code PTransform} that, when applied to a {@code PCollection<I>},
+   * invokes a user-specified {@code DoFn<I, O>} on all its elements,
+   * with all its outputs collected into an output
+   * {@code PCollection<O>}.
+   *
+   * <p> Note that the specified {@code doFn} needs to be thread safe.
+   *
+   * @param <I> the type of the (main) input {@code PCollection} elements
+   * @param <O> the type of the (main) output {@code PCollection} elements
+   */
+  public static class Bound<I, O>
+      extends PTransform<PCollection<? extends I>, PCollection<O>> {
+    private static final long serialVersionUID = 0;
+    private final DoFn<I, O> doFn;
+    private final int maxParallelism;
+
+    Bound(DoFn<I, O> doFn, int maxParallelism) {
+      Preconditions.checkArgument(maxParallelism > 0,
+          "Expected parallelism factor greater than zero, received %s.", maxParallelism);
+      this.doFn = doFn;
+      this.maxParallelism = maxParallelism;
+    }
+
+    /**
+     * Returns a new {@link IntraBundleParallelization} {@link PTransform} like this one
+     * with the specified maximum concurrency level.
+     */
+    public Bound<I, O> withMaxParallelism(int maxParallelism) {
+      return new Bound<>(doFn, maxParallelism);
+    }
+
+    /**
+     * Returns a new {@link IntraBundleParallelization} {@link PTransform} like this one
+     * with the specified {@link DoFn}.
+     *
+     * <p> Note that the specified {@code doFn} needs to be thread safe.
+     */
+    public <I, O> Bound<I, O> of(DoFn<I, O> doFn) {
+      return new Bound<>(doFn, maxParallelism);
+    }
+
+    @Override
+    public PCollection<O> apply(PCollection<? extends I> input) {
+      return input.apply(
+          ParDo.of(new MultiThreadedIntraBundleProcessingDoFn<>(doFn, maxParallelism)));
+    }
+  }
+
+  /**
+   * A multi-threaded {@code DoFn} wrapper.
+   *
+   * @see IntraBundleParallelization#of(DoFn)
+   *
+   * @param <I> the type of the (main) input elements
+   * @param <O> the type of the (main) output elements
+   */
+  public static class MultiThreadedIntraBundleProcessingDoFn<I, O> extends DoFn<I, O> {
+    private static final long serialVersionUID = 0;
+
+    public MultiThreadedIntraBundleProcessingDoFn(DoFn<I, O> doFn, int maxParallelism) {
+      Preconditions.checkArgument(maxParallelism > 0,
+          "Expected parallelism factor greater than zero, received %s.", maxParallelism);
+      this.doFn = doFn;
+      this.maxParallelism = maxParallelism;
+    }
+
+    @Override
+    public void startBundle(Context c) throws Exception {
+      doFn.startBundle(c);
+
+      executor = c.getPipelineOptions().as(GcsOptions.class).getExecutorService();
+      workTickets = new Semaphore(maxParallelism);
+      failure = new AtomicReference<>();
+    }
+
+    @Override
+    public void processElement(final ProcessContext c) throws Exception {
+      try {
+        workTickets.acquire();
+      } catch (InterruptedException e) {
+        throw new RuntimeException("Interrupted while scheduling work", e);
+      }
+
+      if (failure.get() != null) {
+        throw Throwables.propagate(failure.get());
+      }
+
+      executor.submit(new Runnable() {
+        @Override
+        public void run() {
+          try {
+            doFn.processElement(new WrappedContext(c));
+          } catch (Throwable t) {
+            failure.compareAndSet(null, t);
+            Throwables.propagateIfPossible(t);
+            throw new AssertionError("Unexpected checked exception: " + t);
+          } finally {
+            workTickets.release();
+          }
+        }
+      });
+    }
+
+    @Override
+    public void finishBundle(Context c) throws Exception {
+      // Acquire all the work tickets to guarantee that all the previous
+      // processElement calls have finished.
+      workTickets.acquire(maxParallelism);
+      if (failure.get() != null) {
+        throw Throwables.propagate(failure.get());
+      }
+      doFn.finishBundle(c);
+    }
+
+    @Override
+    TypeToken<I> getInputTypeToken() {
+      return doFn.getInputTypeToken();
+    }
+
+    @Override
+    TypeToken<O> getOutputTypeToken() {
+      return doFn.getOutputTypeToken();
+    }
+
+    /////////////////////////////////////////////////////////////////////////////
+
+    /**
+     * Wraps a DoFn context, forcing single-thread output so that threads don't
+     * propagate through to downstream functions.
+     */
+    private class WrappedContext extends ProcessContext {
+      private final ProcessContext context;
+
+      WrappedContext(ProcessContext context) {
+        this.context = context;
+      }
+
+      @Override
+      public I element() {
+        return context.element();
+      }
+
+      @Override
+      public KeyedState keyedState() {
+        return context.keyedState();
+      }
+
+      @Override
+      public PipelineOptions getPipelineOptions() {
+        return context.getPipelineOptions();
+      }
+
+      @Override
+      public <T> T sideInput(PCollectionView<T> view) {
+        return context.sideInput(view);
+      }
+
+      @Override
+      public void output(O output) {
+        synchronized (MultiThreadedIntraBundleProcessingDoFn.this) {
+          context.output(output);
+        }
+      }
+
+      @Override
+      public void outputWithTimestamp(O output, Instant timestamp) {
+        synchronized (MultiThreadedIntraBundleProcessingDoFn.this) {
+          context.outputWithTimestamp(output, timestamp);
+        }
+      }
+
+      @Override
+      public <T> void sideOutput(TupleTag<T> tag, T output) {
+        synchronized (MultiThreadedIntraBundleProcessingDoFn.this) {
+          context.sideOutput(tag, output);
+        }
+      }
+
+      @Override
+      public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
+        synchronized (MultiThreadedIntraBundleProcessingDoFn.this) {
+          context.sideOutputWithTimestamp(tag, output, timestamp);
+        }
+      }
+
+      @Override
+      public <AI, AA, AO> Aggregator<AI> createAggregator(
+          String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
+        return context.createAggregator(name, combiner);
+      }
+
+      @Override
+      public <AI, AO> Aggregator<AI> createAggregator(
+          String name, SerializableFunction<Iterable<AI>, AO> combiner) {
+        return context.createAggregator(name, combiner);
+      }
+
+      @Override
+      public Instant timestamp() {
+        return context.timestamp();
+      }
+
+      @Override
+      public BoundedWindow window() {
+        return context.window();
+      }
+
+      @Override
+      public WindowingInternals<I, O> windowingInternals() {
+        return context.windowingInternals();
+      }
+    }
+
+    private final DoFn<I, O> doFn;
+    private int maxParallelism;
+
+    private transient ExecutorService executor;
+    private transient Semaphore workTickets;
+    private transient AtomicReference<Throwable> failure;
+  }
+
+  /**
+   * Default maximum for number of concurrent elements to process.
+   */
+  private static final int DEFAULT_MAX_PARALLELISM = 16;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
deleted file mode 100644
index 075d486f5a28b..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RateLimiting.java
+++ /dev/null
@@ -1,349 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.api.client.util.Throwables;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.reflect.TypeToken;
-import com.google.common.util.concurrent.RateLimiter;
-
-import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Semaphore;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicReference;
-
-/**
- * Provides rate-limiting of user functions, using threaded execution and a
- * {@link com.google.common.util.concurrent.RateLimiter} to process elements
- * at the desired rate.
- *
- * <p> For example, to limit each worker to 10 requests per second:
- * <pre>{@code
- * PCollection<T> data = ...;
- * data.apply(
- *   RateLimiting.perWorker(new MyDoFn())
- *               .withRateLimit(10)));
- * }</pre>
- *
- * <p> An uncaught exception from the wrapped DoFn will result in the exception
- * being rethrown in later calls to {@link RateLimitingDoFn#processElement}
- * or a call to {@link RateLimitingDoFn#finishBundle}.
- *
- * <p> Rate limiting is provided as a PTransform
- * ({@link RateLimitingTransform}), and also as a {@code DoFn}
- * ({@link RateLimitingDoFn}).
- */
-public class RateLimiting {
-
-  /**
-   * Creates a new per-worker rate-limiting transform for the given
-   * {@link com.google.cloud.dataflow.sdk.transforms.DoFn}.
-   *
-   * <p> The default behavior is to process elements with multiple threads,
-   * but no rate limit is applied.
-   *
-   * <p> Use {@link RateLimitingTransform#withRateLimit} to limit the processing
-   * rate, and {@link RateLimitingTransform#withMaxParallelism} to control the
-   * maximum concurrent processing limit.
-   *
-   * <p> Aside from the above, the {@code DoFn} will be executed in the same manner
-   * as in {@link ParDo}.
-   *
-   * <p> Rate limiting is applied independently per-worker.
-   */
-  public static <I, O> RateLimitingTransform<I, O> perWorker(DoFn<I, O> doFn) {
-    return new RateLimitingTransform<>(doFn);
-  }
-
-  /**
-   * A {@link PTransform} which applies rate limiting to a {@link DoFn}.
-   *
-   * @param <I> the type of the (main) input elements
-   * @param <O> the type of the (main) output elements
-   */
-  @SuppressWarnings("serial")
-  public static class RateLimitingTransform<I, O>
-      extends PTransform<PCollection<? extends I>, PCollection<O>> {
-    private final DoFn<I, O> doFn;
-    private double rate = 0.0;
-    // TODO: set default based on num cores, or based on rate limit?
-    private int maxParallelism = DEFAULT_MAX_PARALLELISM;
-
-    public RateLimitingTransform(DoFn<I, O> doFn) {
-      this.doFn = doFn;
-    }
-
-    /**
-     * Modifies this {@code RateLimitingTransform}, specifying a maximum
-     * per-worker element processing rate.
-     *
-     * <p> A rate of {@code N} corresponds to {@code N} elements per second.
-     * This rate is on a per-worker basis, so the overall rate of the job
-     * depends upon the number of workers.
-     *
-     * <p> This rate limit may not be reachable unless there is sufficient
-     * parallelism.
-     *
-     * <p> A rate of &le; 0.0 disables rate limiting.
-     */
-    public RateLimitingTransform<I, O> withRateLimit(
-        double maxElementsPerSecond) {
-      this.rate = maxElementsPerSecond;
-      return this;
-    }
-
-    /**
-     * Modifies this {@code RateLimitingTransform}, specifying a maximum
-     * per-worker parallelism.
-     *
-     * <p> This determines how many concurrent elements will be processed by the
-     * wrapped {@code DoFn}.
-     *
-     * <p> The desired amount of parallelism depends upon the type of work.  For
-     * CPU-intensive work, a good starting point is to use the number of cores:
-     * {@code Runtime.getRuntime().availableProcessors()}.
-     */
-    public RateLimitingTransform<I, O> withMaxParallelism(int max) {
-      this.maxParallelism = max;
-      return this;
-    }
-
-    @Override
-    public PCollection<O> apply(PCollection<? extends I> input) {
-      return input.apply(
-          ParDo.of(new RateLimitingDoFn<>(doFn, rate, maxParallelism)));
-    }
-  }
-
-  /**
-   * A rate-limiting {@code DoFn} wrapper.
-   *
-   * @see RateLimiting#perWorker(DoFn)
-   *
-   * @param <I> the type of the (main) input elements
-   * @param <O> the type of the (main) output elements
-   */
-  public static class RateLimitingDoFn<I, O> extends DoFn<I, O> {
-    private static final long serialVersionUID = 0;
-
-    private static final Logger LOG = LoggerFactory.getLogger(RateLimitingDoFn.class);
-
-    public RateLimitingDoFn(DoFn<I, O> doFn, double rateLimit,
-        int maxParallelism) {
-      this.doFn = doFn;
-      this.rate = rateLimit;
-      this.maxParallelism = maxParallelism;
-    }
-
-    @Override
-    public void startBundle(Context c) throws Exception {
-      doFn.startBundle(c);
-
-      if (rate > 0.0) {
-        limiter = RateLimiter.create(rate);
-      }
-      executor = Executors.newCachedThreadPool();
-      workTickets = new Semaphore(maxParallelism);
-      failure = new AtomicReference<>();
-    }
-
-    @Override
-    public void processElement(final ProcessContext c) throws Exception {
-      // Apply rate limiting up front, controlling the availability of work for
-      // the thread pool.  This allows us to use an auto-scaling thread pool,
-      // which adapts the parallelism to the available work.
-      // The semaphore is used to avoid overwhelming the executor, by bounding
-      // the number of outstanding elements.
-      if (limiter != null) {
-        limiter.acquire();
-      }
-      try {
-        workTickets.acquire();
-      } catch (InterruptedException e) {
-        throw new RuntimeException("Interrupted while scheduling work", e);
-      }
-
-      if (failure.get() != null) {
-        throw Throwables.propagate(failure.get());
-      }
-
-      executor.submit(new Runnable() {
-        @Override
-        public void run() {
-          try {
-            doFn.processElement(new WrappedContext(c));
-          } catch (Throwable t) {
-            failure.compareAndSet(null, t);
-            Throwables.propagateIfPossible(t);
-            throw new AssertionError("Unexpected checked exception: " + t);
-          } finally {
-            workTickets.release();
-          }
-        }
-      });
-    }
-
-    @Override
-    public void finishBundle(Context c) throws Exception {
-      executor.shutdown();
-      // Log a periodic progress report until the queue has drained.
-      while (true) {
-        try {
-          if (executor.awaitTermination(30, TimeUnit.SECONDS)) {
-            if (failure.get() != null) {
-              // Handle failure propagation outside of the try/catch block.
-              break;
-            }
-            doFn.finishBundle(c);
-            return;
-          }
-          int outstanding = workTickets.getQueueLength()
-              + maxParallelism - workTickets.availablePermits();
-          LOG.info("RateLimitingDoFn backlog: {}", outstanding);
-        } catch (InterruptedException e) {
-          throw Throwables.propagate(e);
-        }
-      }
-
-      throw Throwables.propagate(failure.get());
-    }
-
-    @Override
-    TypeToken<I> getInputTypeToken() {
-      return doFn.getInputTypeToken();
-    }
-
-    @Override
-    TypeToken<O> getOutputTypeToken() {
-      return doFn.getOutputTypeToken();
-    }
-
-    /////////////////////////////////////////////////////////////////////////////
-
-    /**
-     * Wraps a DoFn context, forcing single-thread output so that threads don't
-     * propagate through to downstream functions.
-     */
-    private class WrappedContext extends ProcessContext {
-      private final ProcessContext context;
-
-      WrappedContext(ProcessContext context) {
-        this.context = context;
-      }
-
-      @Override
-      public I element() {
-        return context.element();
-      }
-
-      @Override
-      public KeyedState keyedState() {
-        return context.keyedState();
-      }
-
-      @Override
-      public PipelineOptions getPipelineOptions() {
-        return context.getPipelineOptions();
-      }
-
-      @Override
-      public <T> T sideInput(PCollectionView<T> view) {
-        return context.sideInput(view);
-      }
-
-      @Override
-      public void output(O output) {
-        synchronized (RateLimitingDoFn.this) {
-          context.output(output);
-        }
-      }
-
-      @Override
-      public void outputWithTimestamp(O output, Instant timestamp) {
-        synchronized (RateLimitingDoFn.this) {
-          context.outputWithTimestamp(output, timestamp);
-        }
-      }
-
-      @Override
-      public <T> void sideOutput(TupleTag<T> tag, T output) {
-        synchronized (RateLimitingDoFn.this) {
-          context.sideOutput(tag, output);
-        }
-      }
-
-      @Override
-      public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
-        synchronized (RateLimitingDoFn.this) {
-          context.sideOutputWithTimestamp(tag, output, timestamp);
-        }
-      }
-
-      @Override
-      public <AI, AA, AO> Aggregator<AI> createAggregator(
-          String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
-        return context.createAggregator(name, combiner);
-      }
-
-      @Override
-      public <AI, AO> Aggregator<AI> createAggregator(
-          String name, SerializableFunction<Iterable<AI>, AO> combiner) {
-        return context.createAggregator(name, combiner);
-      }
-
-      @Override
-      public Instant timestamp() {
-        return context.timestamp();
-      }
-
-      @Override
-      public BoundedWindow window() {
-        return context.window();
-      }
-
-      @Override
-      public WindowingInternals<I, O> windowingInternals() {
-        return context.windowingInternals();
-      }
-    }
-
-    private final DoFn<I, O> doFn;
-    private double rate;
-    private int maxParallelism;
-
-    private transient RateLimiter limiter;
-    private transient ExecutorService executor;
-    private transient Semaphore workTickets;
-    private transient AtomicReference<Throwable> failure;
-  }
-
-  /**
-   * Default maximum for number of concurrent elements to process.
-   */
-  @VisibleForTesting
-  static final int DEFAULT_MAX_PARALLELISM = 16;
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java
similarity index 62%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java
index 14eeff96b937c..b612246f43a1b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RateLimitingTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java
@@ -25,10 +25,10 @@
 
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
 import org.junit.Assert;
+import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -41,23 +41,28 @@
  */
 @RunWith(JUnit4.class)
 @SuppressWarnings("serial")
-public class RateLimitingTest {
-
-  /**
-   * Pass-thru function.
-   */
-  private static class IdentityFn<T> extends DoFn<T, T> {
-    @Override
-    public void processElement(ProcessContext c) {
-      c.output(c.element());
-    }
+public class IntraBundleParallelizationTest {
+  private static final int PARALLELISM_FACTOR = 16;
+  private static final AtomicInteger numSuccesses = new AtomicInteger();
+  private static final AtomicInteger numProcessed = new AtomicInteger();
+  private static final AtomicInteger numFailures = new AtomicInteger();
+  private static int concurrentElements = 0;
+  private static int maxConcurrency = 0;
+
+  @Before
+  public void setUp() {
+    numSuccesses.set(0);
+    numProcessed.set(0);
+    numFailures.set(0);
+    concurrentElements = 0;
+    maxConcurrency = 0;
   }
 
   /**
    * Introduces a delay in processing, then passes thru elements.
    */
   private static class DelayFn<T> extends DoFn<T, T> {
-    public static final long DELAY_MS = 250;
+    public static final long DELAY_MS = 25;
 
     @Override
     public void processElement(ProcessContext c) {
@@ -75,18 +80,14 @@ public void processElement(ProcessContext c) {
    * Throws an exception after some number of calls.
    */
   private static class ExceptionThrowingFn<T> extends DoFn<T, T> {
-    private final AtomicInteger numSuccesses;
-    private final AtomicInteger numProcessed = new AtomicInteger();
-    private final AtomicInteger numFailures = new AtomicInteger();
-
     private ExceptionThrowingFn(int numSuccesses) {
-      this.numSuccesses = new AtomicInteger(numSuccesses);
+      IntraBundleParallelizationTest.numSuccesses.set(numSuccesses);
     }
 
     @Override
     public void processElement(ProcessContext c) {
       numProcessed.incrementAndGet();
-      if (numSuccesses.decrementAndGet() > 0) {
+      if (numSuccesses.decrementAndGet() >= 0) {
         c.output(c.element());
         return;
       }
@@ -98,15 +99,8 @@ public void processElement(ProcessContext c) {
 
   /**
    * Measures concurrency of the processElement method.
-   *
-   * <p> Note: this only works when
-   * {@link DirectPipelineRunner#testSerializability} is disabled, otherwise
-   * the counters are not available after the run.
    */
   private static class ConcurrencyMeasuringFn<T> extends DoFn<T, T> {
-    private int concurrentElements = 0;
-    private int maxConcurrency = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       synchronized (this) {
@@ -125,21 +119,29 @@ public void processElement(ProcessContext c) {
   }
 
   @Test
-  public void testRateLimitingMax() {
-    int n = 10;
-    double rate = 10.0;
-    long duration = runWithRate(n, rate, new IdentityFn<Integer>());
-
-    long perElementPause = (long) (1000L / rate);
-    long minDuration = (n - 1) * perElementPause;
-    Assert.assertThat(duration, greaterThanOrEqualTo(minDuration));
+  public void testParallelization() {
+    long minDuration = Long.MAX_VALUE;
+    // Take the minimum from multiple runs.
+    for (int i = 0; i < 5; ++i) {
+      minDuration = Math.min(minDuration,
+          run(2 * PARALLELISM_FACTOR, PARALLELISM_FACTOR, new DelayFn<Integer>()));
+    }
+
+    // The minimum is guaranteed to be >= 2x the delay interval, since no more than half the
+    // elements can be scheduled at once.
+    Assert.assertThat(minDuration,
+        greaterThanOrEqualTo(2 * DelayFn.DELAY_MS));
+    // Also, it should take <= 8x the delay interval since we should be at least
+    // parallelizing some of the work.
+    Assert.assertThat(minDuration,
+        lessThanOrEqualTo(8 * DelayFn.DELAY_MS));
   }
 
   @Test(timeout = 5000L)
   public void testExceptionHandling() {
     ExceptionThrowingFn<Integer> fn = new ExceptionThrowingFn<>(10);
     try {
-      runWithRate(100, 0.0, fn);
+      run(100, PARALLELISM_FACTOR, fn);
       Assert.fail("Expected exception to propagate");
     } catch (RuntimeException e) {
       Assert.assertThat(e.getMessage(), containsString("Expected failure"));
@@ -147,24 +149,21 @@ public void testExceptionHandling() {
 
     // Should have processed 10 elements, but stopped before processing all
     // of them.
-    Assert.assertThat(fn.numProcessed.get(),
+    Assert.assertThat(numProcessed.get(),
         is(both(greaterThanOrEqualTo(10))
             .and(lessThan(100))));
 
     // The first failure should prevent the scheduling of any more elements.
-    Assert.assertThat(fn.numFailures.get(),
+    Assert.assertThat(numFailures.get(),
         is(both(greaterThanOrEqualTo(1))
-            .and(lessThanOrEqualTo(RateLimiting.DEFAULT_MAX_PARALLELISM))));
+            .and(lessThanOrEqualTo(PARALLELISM_FACTOR))));
   }
 
-  /**
-   * Test exception handling on the last element to be processed.
-   */
   @Test(timeout = 5000L)
-  public void testExceptionHandling2() {
-    ExceptionThrowingFn<Integer> fn = new ExceptionThrowingFn<>(10);
+  public void testExceptionHandlingOnLastElement() {
+    ExceptionThrowingFn<Integer> fn = new ExceptionThrowingFn<>(9);
     try {
-      runWithRate(10, 0.0, fn);
+      run(10, PARALLELISM_FACTOR, fn);
       Assert.fail("Expected exception to propagate");
     } catch (RuntimeException e) {
       Assert.assertThat(e.getMessage(), containsString("Expected failure"));
@@ -172,53 +171,29 @@ public void testExceptionHandling2() {
 
     // Should have processed 10 elements, but stopped before processing all
     // of them.
-    Assert.assertEquals(10, fn.numProcessed.get());
-    Assert.assertEquals(1, fn.numFailures.get());
-  }
-
-  /**
-   * Provides more elements than can be scheduled at once, testing that the
-   * backlog limit is applied.
-   */
-  @Test
-  public void testBacklogLimiter() {
-    long duration = runWithRate(2 * RateLimiting.DEFAULT_MAX_PARALLELISM,
-        -1.0 /* unlimited */, new DelayFn<Integer>());
-
-    // Should take >= 2x the delay interval, since no more than half the
-    // elements can be scheduled at once.
-    Assert.assertThat(duration,
-        greaterThanOrEqualTo(2 * DelayFn.DELAY_MS));
+    Assert.assertEquals(10, numProcessed.get());
+    Assert.assertEquals(1, numFailures.get());
   }
 
-  private long runWithRate(int numElements, double rateLimit,
-      DoFn<Integer, Integer> doFn) {
+  private long run(int numElements, int maxParallelism, DoFn<Integer, Integer> doFn) {
     DirectPipeline p = DirectPipeline.createForTest();
-    // Run with serializability testing disabled so that our tests can inspect
-    // the DoFns after the test.
-    p.getRunner().withSerializabilityTesting(false);
 
     ArrayList<Integer> data = new ArrayList<>(numElements);
     for (int i = 0; i < numElements; ++i) {
       data.add(i);
     }
 
-    PCollection<Integer> input = TestUtils.createInts(p, data);
-
     ConcurrencyMeasuringFn<Integer> downstream = new ConcurrencyMeasuringFn<>();
-
-    PCollection<Integer> output = input
-        .apply(RateLimiting.perWorker(doFn)
-            .withRateLimit(rateLimit))
-        .apply(ParDo
-            .of(downstream));
+    PCollection<Integer> input = TestUtils.createInts(p, data);
+    input.apply(IntraBundleParallelization.of(doFn).withMaxParallelism(maxParallelism))
+         .apply(ParDo.of(downstream));
 
     long startTime = System.currentTimeMillis();
 
-    DirectPipelineRunner.EvaluationResults results = p.run();
+    p.run();
 
     // Downstream methods should not see parallel threads.
-    Assert.assertEquals(1, downstream.maxConcurrency);
+    Assert.assertEquals(1, maxConcurrency);
 
     long endTime = System.currentTimeMillis();
     return endTime - startTime;

From 91732331f762e94c4bf82b4514a4a3631037c80b Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 30 Mar 2015 14:56:11 -0700
Subject: [PATCH 0333/1541] Adds support for side inputs in the streaming
 runner.  This functionality is not yet enabled for use.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89904856
---
 .../runners/DataflowPipelineTranslator.java   |  15 +-
 .../sdk/runners/worker/NormalParDoFn.java     | 120 +++++---
 .../cloud/dataflow/sdk/transforms/DoFn.java   |  10 +
 .../cloud/dataflow/sdk/transforms/View.java   | 156 ++++++++--
 .../sdk/util/BatchModeExecutionContext.java   |  49 +++
 .../cloud/dataflow/sdk/util/DoFnInfo.java     |  22 ++
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |  86 +++---
 .../dataflow/sdk/util/ExecutionContext.java   |  20 ++
 .../cloud/dataflow/sdk/util/StateFetcher.java | 163 ++++++++++
 .../util/StreamingModeExecutionContext.java   |  92 ++++++
 .../StreamingPCollectionViewWriterFn.java     |  58 ++++
 .../util/StreamingSideInputDoFnRunner.java    | 188 ++++++++++++
 .../dataflow/sdk/values/PCollectionView.java  |   6 +
 .../dataflow/sdk/util/StateFetcherTest.java   | 288 ++++++++++++++++++
 .../StreamingSideInputDoFnRunnerTest.java     | 274 +++++++++++++++++
 15 files changed, 1415 insertions(+), 132 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingPCollectionViewWriterFn.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 2f701c23586f1..1b88465bc988d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -467,7 +467,7 @@ public void visitValue(PValue value, TransformTreeNode producer) {
       if (options.isStreaming()
           && value instanceof PCollectionView) {
         throw new UnsupportedOperationException(
-             "PCollectionViews are not supported in streaming Dataflow.");
+            "PCollectionViews are not supported in streaming Dataflow.");
       }
       if (value.getProducingTransformInternal() == null) {
         throw new RuntimeException(
@@ -878,7 +878,8 @@ private <I, O> void translateMultiHelper(
               TranslationContext context) {
             context.addStep(transform, "ParallelDo");
             translateInputs(transform.getInput(), transform.getSideInputs(), context);
-            translateFn(transform.getFn(), transform.getInput().getWindowFn(), context);
+            translateFn(transform.getFn(), transform.getInput().getWindowFn(),
+                transform.getSideInputs(), transform.getInput().getCoder(), context);
             translateOutputs(transform.getOutput(), context);
           }
         });
@@ -898,7 +899,8 @@ private <I, O> void translateSingleHelper(
               TranslationContext context) {
             context.addStep(transform, "ParallelDo");
             translateInputs(transform.getInput(), transform.getSideInputs(), context);
-            translateFn(transform.getFn(), transform.getInput().getWindowFn(), context);
+            translateFn(transform.getFn(), transform.getInput().getWindowFn(),
+                transform.getSideInputs(), transform.getInput().getCoder(), context);
             context.addOutput("out", transform.getOutput());
           }
         });
@@ -959,11 +961,16 @@ private static void translateSideInputs(
   private static void translateFn(
       DoFn fn,
       WindowFn windowFn,
+      Iterable<PCollectionView<?>> sideInputs,
+      Coder inputCoder,
       TranslationContext context) {
     context.addInput(PropertyNames.USER_FN, fn.getClass().getName());
     context.addInput(
         PropertyNames.SERIALIZED_FN,
-        byteArrayToJsonString(serializeToByteArray(new DoFnInfo(fn, windowFn))));
+        byteArrayToJsonString(serializeToByteArray(
+            new DoFnInfo(fn, windowFn)
+            .setSideInputViews(sideInputs)
+            .setInputCoder(inputCoder))));
     if (fn instanceof DoFn.RequiresKeyedState
         // Adjust requires keyed state property for the Dataflow Service.
         // TODO: Remove when this is performed by the service.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
index 58bd037453894..255718707fc3f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
@@ -21,6 +21,7 @@
 import com.google.api.services.dataflow.model.MultiOutputInfo;
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
@@ -30,12 +31,14 @@
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.util.StreamingSideInputDoFnRunner;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.OutputReceiver;
 import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
 import com.google.cloud.dataflow.sdk.util.common.worker.Receiver;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Throwables;
 
@@ -85,7 +88,10 @@ public DoFnInfo createDoFnInfo() throws Exception {
         }
       };
 
+    // If the side input data has already been computed, it will be in sideInputInfo.  Otherwise,
+    // we need to look it up dynamically from the Views.
     PTuple sideInputValues = PTuple.empty();
+    Iterable<PCollectionView<?>> sideInputViews = fnFactory.createDoFnInfo().getSideInputViews();
     if (sideInputInfos != null) {
       for (SideInputInfo sideInputInfo : sideInputInfos) {
         Object sideInputValue = SideInputUtils.readSideInput(
@@ -93,6 +99,10 @@ public DoFnInfo createDoFnInfo() throws Exception {
         TupleTag<Object> tag = new TupleTag<>(sideInputInfo.getTag());
         sideInputValues = sideInputValues.and(tag, sideInputValue);
       }
+    } else if (sideInputViews != null) {
+      for (PCollectionView<?> view : sideInputViews) {
+        sideInputValues = sideInputValues.and(view.getTagInternal(), null);
+      }
     }
 
     List<String> outputTags = new ArrayList<>();
@@ -164,56 +174,68 @@ public void startBundle(final Receiver... receivers) throws Exception {
       stepContext = executionContext.getStepContext(stepName);
     }
 
-    fnRunner = DoFnRunner.create(
-        options,
-        fnFactory.createDoFnInfo().getDoFn(),
-        sideInputValues,
-        new OutputManager<Receiver>() {
-          final Map<TupleTag<?>, OutputReceiver> undeclaredOutputs =
-              new HashMap<>();
-
-          @Override
-          public Receiver initialize(TupleTag tag) {
-            // Declared outputs.
-            if (tag.equals(mainOutputTag)) {
-              return receivers[0];
-            } else if (sideOutputTags.contains(tag)) {
-              return receivers[sideOutputTags.indexOf(tag) + 1];
-            }
-
-            // Undeclared outputs.
-            OutputReceiver receiver = undeclaredOutputs.get(tag);
-            if (receiver == null) {
-              // A new undeclared output.
-              // TODO: plumb through the operationName, so that we can
-              // name implicit outputs after it.
-              String outputName = "implicit-" + tag.getId();
-              // TODO: plumb through the counter prefix, so we can
-              // make it available to the OutputReceiver class in case
-              // it wants to use it in naming output counters.  (It
-              // doesn't today.)
-              String counterPrefix = "";
-              receiver = new OutputReceiver(
-                  outputName, counterPrefix, addCounterMutator);
-              undeclaredOutputs.put(tag, receiver);
-            }
-            return receiver;
-          }
+    DoFnInfo doFnInfo = fnFactory.createDoFnInfo();
 
-          @Override
-          public void output(Receiver receiver, WindowedValue<?> output) {
-            try {
-              receiver.process(output);
-            } catch (Throwable t) {
-              throw Throwables.propagate(t);
-            }
-          }
-        },
-        mainOutputTag,
-        sideOutputTags,
-        stepContext,
-        addCounterMutator,
-        fnFactory.createDoFnInfo().getWindowFn());
+    OutputManager<Receiver> outputManager = new OutputManager<Receiver>() {
+      final Map<TupleTag<?>, OutputReceiver> undeclaredOutputs =
+      new HashMap<>();
+
+      @Override
+      public Receiver initialize(TupleTag tag) {
+        // Declared outputs.
+        if (tag.equals(mainOutputTag)) {
+          return receivers[0];
+        } else if (sideOutputTags.contains(tag)) {
+          return receivers[sideOutputTags.indexOf(tag) + 1];
+        }
+
+        // Undeclared outputs.
+        OutputReceiver receiver = undeclaredOutputs.get(tag);
+        if (receiver == null) {
+          // A new undeclared output.
+          // TODO: plumb through the operationName, so that we can
+          // name implicit outputs after it.
+          String outputName = "implicit-" + tag.getId();
+          // TODO: plumb through the counter prefix, so we can
+          // make it available to the OutputReceiver class in case
+          // it wants to use it in naming output counters.  (It
+          // doesn't today.)
+          String counterPrefix = "";
+          receiver = new OutputReceiver(
+              outputName, counterPrefix, addCounterMutator);
+          undeclaredOutputs.put(tag, receiver);
+        }
+        return receiver;
+      }
+
+      @Override
+      public void output(Receiver receiver, WindowedValue<?> output) {
+        try {
+          receiver.process(output);
+        } catch (Throwable t) {
+          throw Throwables.propagate(t);
+        }
+      }
+    };
+
+
+
+    if (options.as(StreamingOptions.class).isStreaming() && !sideInputValues.getAll().isEmpty()) {
+      fnRunner = new StreamingSideInputDoFnRunner(
+          options, doFnInfo, sideInputValues, outputManager,
+          mainOutputTag, sideOutputTags, stepContext, addCounterMutator);
+    } else {
+      fnRunner = DoFnRunner.create(
+          options,
+          doFnInfo.getDoFn(),
+          sideInputValues,
+          outputManager,
+          mainOutputTag,
+          sideOutputTags,
+          stepContext,
+          addCounterMutator,
+          doFnInfo.getWindowFn());
+    }
 
     fnRunner.startBundle();
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index e1e385102bae7..0609309c54f71 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -16,9 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -378,6 +380,14 @@ void outputWindowedValue(O output, Instant timestamp,
      * Access the windows the element is being processed in without "exploding" it.
      */
     Collection<? extends BoundedWindow> windows();
+
+    /**
+     * Write the given {@link PCollectionView} data to a location accessible by other workers.
+     */
+    <T> void writePCollectionViewData(
+        TupleTag<?> tag,
+        Iterable<WindowedValue<T>> data,
+        Coder<T> elemCoder) throws IOException;
   }
 
   /////////////////////////////////////////////////////////////////////////////
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index 427185e2443c8..dfa7c96f61a17 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -18,11 +18,16 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.ListCoder;
+import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.StreamingPCollectionViewWriterFn;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -35,6 +40,8 @@
 import com.google.common.collect.Multimap;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
@@ -103,9 +110,14 @@ private AsIterable() { }
     @Override
     public PCollectionView<Iterable<T>> apply(
         PCollection<T> input) {
-      return input.apply(
-          new CreatePCollectionView<T, Iterable<T>>(
-              new IterablePCollectionView<T>(input.getPipeline(), input.getWindowFn())));
+      if (input.getPipeline().getOptions().as(StreamingOptions.class).isStreaming()) {
+        return input.apply(Combine.globally(new Concatenate<T>()).asSingletonView());
+      } else {
+        return input.apply(
+            new CreatePCollectionView<T, Iterable<T>>(
+                new IterablePCollectionView<T>(
+                    input.getPipeline(), input.getWindowFn(), input.getCoder())));
+      }
     }
   }
 
@@ -140,11 +152,25 @@ public AsSingleton<T> withDefaultValue(T defaultValue) {
 
     @Override
     public PCollectionView<T> apply(PCollection<T> input) {
-      return input.apply(
-          new CreatePCollectionView<T, T>(
-              new SingletonPCollectionView<T>(
-                  input.getPipeline(), input.getWindowFn(),
-                  hasDefault, defaultValue, input.getCoder())));
+      SingletonPCollectionView<T> view = new SingletonPCollectionView<T>(
+          input.getPipeline(), input.getWindowFn(), hasDefault, defaultValue, input.getCoder());
+
+      CreatePCollectionView<T, T> createView = new CreatePCollectionView<>(view);
+
+      if (input.getPipeline().getOptions().as(StreamingOptions.class).isStreaming()) {
+        return input
+            .apply(ParDo.named("WrapAsList").of(new DoFn<T, List<T>>() {
+                      private static final long serialVersionUID = 0;
+                      @Override
+                      public void processElement(ProcessContext c) {
+                        c.output(Arrays.asList(c.element()));
+                      }
+                    }))
+            .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, input.getCoder())))
+            .apply(createView);
+      } else {
+        return input.apply(createView);
+      }
     }
   }
 
@@ -180,9 +206,20 @@ public <VO> AsSingletonMap<K, V, VO> withCombiner(CombineFn<V, ?, VO> combineFn)
 
     @Override
     public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
-      return input.apply(
-          new CreatePCollectionView<KV<K, V>, Map<K, Iterable<V>>>(
-              new MultimapPCollectionView<K, V>(input.getPipeline(), input.getWindowFn())));
+      MultimapPCollectionView<K, V> view = new MultimapPCollectionView<K, V>(
+          input.getPipeline(), input.getWindowFn(), input.getCoder());
+
+      CreatePCollectionView<KV<K, V>, Map<K, Iterable<V>>> createView =
+          new CreatePCollectionView<>(view);
+
+      if (input.getPipeline().getOptions().as(StreamingOptions.class).isStreaming()) {
+        return input
+            .apply(Combine.globally(new Concatenate<KV<K, V>>()).withoutDefaults())
+            .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, input.getCoder())))
+            .apply(createView);
+      } else {
+        return input.apply(createView);
+      }
     }
   }
 
@@ -210,15 +247,66 @@ public PCollectionView<Map<K, VO>> apply(PCollection<KV<K, VI>> input) {
         combineFn == null
         ? (PCollection) input
         : input.apply(Combine.perKey(combineFn.<K>asKeyedFn()));
-      return combined.apply(
-          new CreatePCollectionView<KV<K, VO>, Map<K, VO>>(
-              new MapPCollectionView<K, VO>(input.getPipeline(), combined.getWindowFn())));
+
+      MapPCollectionView<K, VO> view = new MapPCollectionView<K, VO>(
+          input.getPipeline(), combined.getWindowFn(), combined.getCoder());
+
+      CreatePCollectionView<KV<K, VO>, Map<K, VO>> createView = new CreatePCollectionView<>(view);
+
+      if (combined.getPipeline().getOptions().as(StreamingOptions.class).isStreaming()) {
+        return combined
+            .apply(Combine.globally(new Concatenate<KV<K, VO>>()).withoutDefaults())
+            .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, combined.getCoder())))
+            .apply(createView);
+      } else {
+        return combined.apply(createView);
+      }
     }
   }
 
   ////////////////////////////////////////////////////////////////////////////
   // Internal details below
 
+  /**
+   * Combiner that combines {@code T}s into a single {@code Iterable<T>} containing
+   * all inputs.
+   *
+   * @param <T> the type of elements to concatenate.
+   */
+  private static class Concatenate<T> extends CombineFn<T, List<T>, Iterable<T>> {
+    private static final long serialVersionUID = 0;
+
+    @Override
+    public List<T> createAccumulator() {
+      return new ArrayList<T>();
+    }
+
+    @Override
+    public List<T> addInput(List<T> accumulator, T input) {
+      accumulator.add(input);
+      return accumulator;
+    }
+
+    @Override
+    public List<T> mergeAccumulators(Iterable<List<T>> accumulators) {
+      List<T> result = createAccumulator();
+      for (List<T> accumulator : accumulators) {
+        result.addAll(accumulator);
+      }
+      return result;
+    }
+
+    @Override
+    public Iterable<T> extractOutput(List<T> accumulator) {
+      return accumulator;
+    }
+
+    @Override
+    public Coder<List<T>> getAccumulatorCoder(CoderRegistry registry, Coder<T> inputCoder) {
+      return ListCoder.of(inputCoder);
+    }
+  }
+
   /**
    * Creates a primitive PCollectionView.
    *
@@ -226,8 +314,6 @@ public PCollectionView<Map<K, VO>> apply(PCollection<KV<K, VI>> input) {
    *
    * @param <R> The type of the elements of the input PCollection
    * @param <T> The type associated with the PCollectionView used as a side input
-   * @param <WT> The type associated with a windowed side input from the
-   * PCollectionView
    */
   public static class CreatePCollectionView<R, T>
       extends PTransform<PCollection<R>, PCollectionView<T>> {
@@ -271,18 +357,18 @@ private static class SingletonPCollectionView<T>
     private static final long serialVersionUID = 0;
     private byte[] encodedDefaultValue;
     private transient T defaultValue;
-    private Coder<T> defaultValueCoder;
+    private Coder<T> valueCoder;
 
     public SingletonPCollectionView(
         Pipeline pipeline, WindowFn<?, ?> windowFn,
-        boolean hasDefault, T defaultValue, Coder<T> defaultValueCoder) {
-      super(windowFn);
+        boolean hasDefault, T defaultValue, Coder<T> valueCoder) {
+      super(windowFn, valueCoder);
       setPipelineInternal(pipeline);
       this.defaultValue = defaultValue;
-      this.defaultValueCoder = defaultValueCoder;
+      this.valueCoder = valueCoder;
       if (hasDefault) {
         try {
-          this.encodedDefaultValue = CoderUtils.encodeToByteArray(defaultValueCoder, defaultValue);
+          this.encodedDefaultValue = CoderUtils.encodeToByteArray(valueCoder, defaultValue);
         } catch (IOException e) {
           throw new RuntimeException("Unexpected IOException: ", e);
         }
@@ -294,7 +380,7 @@ public SingletonPCollectionView(
     public T fromIterableInternal(Iterable<WindowedValue<?>> contents) {
       if (encodedDefaultValue != null && defaultValue == null) {
         try {
-          defaultValue = CoderUtils.decodeFromByteArray(defaultValueCoder, encodedDefaultValue);
+          defaultValue = CoderUtils.decodeFromByteArray(valueCoder, encodedDefaultValue);
         } catch (IOException e) {
           throw new RuntimeException("Unexpected IOException: ", e);
         }
@@ -320,8 +406,9 @@ private static class IterablePCollectionView<T>
       extends PCollectionViewBase<Iterable<T>> {
     private static final long serialVersionUID = 0;
 
-    public IterablePCollectionView(Pipeline pipeline, WindowFn<?, ?> windowFn) {
-      super(windowFn);
+    public IterablePCollectionView(
+        Pipeline pipeline, WindowFn<?, ?> windowFn, Coder<T> valueCoder) {
+      super(windowFn, valueCoder);
       setPipelineInternal(pipeline);
     }
 
@@ -341,8 +428,9 @@ private static class MultimapPCollectionView<K, V>
       extends PCollectionViewBase<Map<K, Iterable<V>>> {
     private static final long serialVersionUID = 0;
 
-    public MultimapPCollectionView(Pipeline pipeline, WindowFn<?, ?> windowFn) {
-      super(windowFn);
+    public MultimapPCollectionView(
+        Pipeline pipeline, WindowFn<?, ?> windowFn, Coder<KV<K, V>> valueCoder) {
+      super(windowFn, valueCoder);
       setPipelineInternal(pipeline);
     }
 
@@ -363,8 +451,9 @@ private static class MapPCollectionView<K, V>
       extends PCollectionViewBase<Map<K, V>> {
     private static final long serialVersionUID = 0;
 
-    public MapPCollectionView(Pipeline pipeline, WindowFn<?, ?> windowFn) {
-      super(windowFn);
+    public MapPCollectionView(
+        Pipeline pipeline, WindowFn<?, ?> windowFn, Coder<KV<K, V>> valueCoder) {
+      super(windowFn, valueCoder);
       setPipelineInternal(pipeline);
     }
 
@@ -387,11 +476,13 @@ private abstract static class PCollectionViewBase<T>
       implements PCollectionView<T> {
     private static final long serialVersionUID = 0;
 
-    PCollectionViewBase(WindowFn<?, ?> windowFn) {
+    PCollectionViewBase(WindowFn<?, ?> windowFn, Coder<?> valueCoder) {
       if (windowFn instanceof InvalidWindows) {
         throw new IllegalArgumentException("WindowFn of PCollectionView cannot be InvalidWindows");
       }
       this.windowFn = windowFn;
+      this.coder = (Coder)
+          IterableCoder.of(WindowedValue.getFullCoder(valueCoder, windowFn.windowCoder()));
     }
 
     @Override
@@ -404,8 +495,13 @@ public WindowFn getWindowFnInternal() {
       return windowFn;
     }
 
-    private TupleTag<Iterable<WindowedValue<?>>> tag = new TupleTag<>();
+    @Override
+    public Coder<Iterable<WindowedValue<?>>> getCoderInternal() {
+      return coder;
+    }
 
+    private TupleTag<Iterable<WindowedValue<?>>> tag = new TupleTag<>();
     private WindowFn<?, ?> windowFn;
+    private Coder<Iterable<WindowedValue<?>>> coder;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
index 051fe792db9cd..f76b1152ea785 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
@@ -16,8 +16,14 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Predicate;
+import com.google.common.collect.Iterables;
 
 import org.joda.time.Instant;
 
@@ -32,6 +38,7 @@
 public class BatchModeExecutionContext extends ExecutionContext {
   private Object key;
   private Map<Object, Map<String, Instant>> timers = new HashMap<>();
+  private final Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache = new HashMap<>();
 
   /**
    * Create a new {@link ExecutionContext.StepContext}.
@@ -75,6 +82,48 @@ public void deleteTimer(String timer) {
     }
   }
 
+  @Override
+  @SuppressWarnings("unchecked")
+  public <T> T getSideInput(
+      PCollectionView<T> view, BoundedWindow mainInputWindow, PTuple sideInputs) {
+    TupleTag<Iterable<WindowedValue<?>>> tag = view.getTagInternal();
+    Map<BoundedWindow, Object> tagCache = sideInputCache.get(tag);
+    if (tagCache == null) {
+      if (!sideInputs.has(tag)) {
+        throw new IllegalArgumentException(
+            "calling sideInput() with unknown view; did you forget to pass the view in "
+            + "ParDo.withSideInputs()?");
+      }
+      tagCache = new HashMap<>();
+      sideInputCache.put(tag, tagCache);
+    }
+
+    final BoundedWindow sideInputWindow =
+        view.getWindowFnInternal().getSideInputWindow(mainInputWindow);
+
+    // tagCache stores values in a type-safe way based on the TupleTag.
+    T result = (T) tagCache.get(sideInputWindow);
+
+    // TODO: Consider partial prefetching like in CoGBK to reduce iteration cost.
+    if (result == null) {
+      if (view.getWindowFnInternal() instanceof GlobalWindows) {
+        result = view.fromIterableInternal(sideInputs.get(tag));
+      } else {
+        result = view.fromIterableInternal(
+            Iterables.filter(sideInputs.get(tag),
+                new Predicate<WindowedValue<?>>() {
+                  @Override
+                  public boolean apply(WindowedValue<?> element) {
+                    return element.getWindows().contains(sideInputWindow);
+                  }
+                }));
+      }
+      tagCache.put(sideInputWindow, result);
+    }
+
+    return result;
+  }
+
   public <E> List<TimerOrElement<E>> getAllTimers() {
     List<TimerOrElement<E>> result = new ArrayList<>();
     for (Map.Entry<Object, Map<String, Instant>> keyTimers : timers.entrySet()) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
index 9d8481be932f2..cdd9cb229b26d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
@@ -16,8 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
 
 import java.io.Serializable;
 
@@ -31,6 +33,8 @@ public class DoFnInfo<I, O> implements Serializable {
   private static final long serialVersionUID = 0;
   private DoFn<I, O> doFn;
   private WindowFn<?, ?> windowFn;
+  private Iterable<PCollectionView<?>> sideInputViews;
+  private Coder<I> inputCoder;
 
   public DoFnInfo(DoFn<I, O> doFn, WindowFn<?, ?> windowFn) {
     this.doFn = doFn;
@@ -44,4 +48,22 @@ public DoFn<I, O> getDoFn() {
   public WindowFn<?, ?> getWindowFn() {
     return windowFn;
   }
+
+  public DoFnInfo<I, O> setSideInputViews(Iterable<PCollectionView<?>> sideInputViews) {
+    this.sideInputViews = sideInputViews;
+    return this;
+  }
+
+  public Iterable<PCollectionView<?>> getSideInputViews() {
+    return sideInputViews;
+  }
+
+  public DoFnInfo<I, O> setInputCoder(Coder<I> inputCoder) {
+    this.inputCoder = inputCoder;
+    return this;
+  }
+
+  public Coder<I> getInputCoder() {
+    return inputCoder;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 4633012daebbf..f588f9ef69a63 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -17,6 +17,8 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
@@ -36,7 +38,6 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.base.Predicate;
 import com.google.common.base.Throwables;
 import com.google.common.collect.Iterables;
 
@@ -76,15 +77,16 @@ public interface OutputManager<R> {
   /** The context used for running the DoFn. */
   public final DoFnContext<I, O, R> context;
 
-  private DoFnRunner(PipelineOptions options,
-                     DoFn<I, O> fn,
-                     PTuple sideInputs,
-                     OutputManager<R> outputManager,
-                     TupleTag<O> mainOutputTag,
-                     List<TupleTag<?>> sideOutputTags,
-                     StepContext stepContext,
-                     CounterSet.AddCounterMutator addCounterMutator,
-                     WindowFn windowFn) {
+  DoFnRunner(
+      PipelineOptions options,
+      DoFn<I, O> fn,
+      PTuple sideInputs,
+      OutputManager<R> outputManager,
+      TupleTag<O> mainOutputTag,
+      List<TupleTag<?>> sideOutputTags,
+      StepContext stepContext,
+      CounterSet.AddCounterMutator addCounterMutator,
+      WindowFn windowFn) {
     this.fn = fn;
     this.context = new DoFnContext<>(options, fn, sideInputs, outputManager,
                                      mainOutputTag, sideOutputTags, stepContext,
@@ -149,7 +151,8 @@ public void startBundle() {
    */
   public void processElement(WindowedValue<I> elem) {
     if (elem.getWindows().size() == 1
-        || !RequiresWindowAccess.class.isAssignableFrom(fn.getClass())) {
+        || !RequiresWindowAccess.class.isAssignableFrom(fn.getClass())
+        || !context.sideInputs.getAll().isEmpty()) {
       invokeProcessElement(elem);
     } else {
       // We could modify the windowed value (and the processContext) to
@@ -161,8 +164,8 @@ public void processElement(WindowedValue<I> elem) {
     }
   }
 
-  private void invokeProcessElement(WindowedValue<I> elem) {
-    DoFnProcessContext<I, O> processContext = new DoFnProcessContext<I, O>(fn, context, elem);
+  protected void invokeProcessElement(WindowedValue<I> elem) {
+    DoFn<I, O>.ProcessContext processContext = createProcessContext(elem);
     // This can contain user code. Wrap it in case it throws an exception.
     try {
       fn.processElement(processContext);
@@ -256,41 +259,7 @@ public PipelineOptions getPipelineOptions() {
 
     @SuppressWarnings("unchecked")
     <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
-      TupleTag<?> tag = view.getTagInternal();
-      Map<BoundedWindow, Object> tagCache = sideInputCache.get(tag);
-      if (tagCache == null) {
-        if (!sideInputs.has(tag)) {
-          throw new IllegalArgumentException(
-              "calling sideInput() with unknown view; did you forget to pass the view in "
-              + "ParDo.withSideInputs()?");
-        }
-        tagCache = new HashMap<>();
-        sideInputCache.put(tag, tagCache);
-      }
-
-      final BoundedWindow sideInputWindow =
-          view.getWindowFnInternal().getSideInputWindow(mainInputWindow);
-
-      T result = (T) tagCache.get(sideInputWindow);
-
-      // TODO: Consider partial prefetching like in CoGBK to reduce iteration cost.
-      if (result == null) {
-        if (windowFn instanceof GlobalWindows) {
-          result = view.fromIterableInternal((Iterable<WindowedValue<?>>) sideInputs.get(tag));
-        } else {
-          result = view.fromIterableInternal(Iterables.filter(
-              (Iterable<WindowedValue<?>>) sideInputs.get(tag),
-              new Predicate<WindowedValue<?>>() {
-                @Override
-                public boolean apply(WindowedValue<?> element) {
-                  return element.getWindows().contains(sideInputWindow);
-                }
-              }));
-        }
-        tagCache.put(sideInputWindow, result);
-      }
-
-      return result;
+      return stepContext.getExecutionContext().getSideInput(view, mainInputWindow, sideInputs);
     }
 
     <T> WindowedValue<T> makeWindowedValue(
@@ -416,6 +385,13 @@ public <AI, AO> Aggregator<AI> createAggregator(
     }
   }
 
+  /**
+   * Returns a new {@link DoFn.ProcessContext} for the given element.
+   */
+  protected DoFn<I, O>.ProcessContext createProcessContext(WindowedValue<I> elem) {
+    return new DoFnProcessContext<I, O>(fn, context, elem);
+  }
+
   /**
    * A concrete implementation of {@link DoFn<I, O>.ProcessContext} used for running
    * a {@link DoFn} over a single element.
@@ -603,7 +579,19 @@ public void deleteTimer(String timer) {
         public Collection<? extends BoundedWindow> windows() {
           return windowedValue.getWindows();
         }
+
+        @Override
+        public <T> void writePCollectionViewData(
+            TupleTag<?> tag,
+            Iterable<WindowedValue<T>> data,
+            Coder<T> elemCoder) throws IOException {
+          Coder<BoundedWindow> windowCoder = (Coder<BoundedWindow>) context.windowFn.windowCoder();
+
+          context.stepContext.getExecutionContext().writePCollectionViewData(
+              tag, data, IterableCoder.of(WindowedValue.getFullCoder(elemCoder, windowCoder)),
+              window(), windowCoder);
+        }
       };
     }
-}
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
index e0f5f903f88e8..9f68cdf115f4d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
@@ -16,9 +16,12 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
 import org.joda.time.Instant;
@@ -89,6 +92,23 @@ public void noteOutput(WindowedValue<?> output) {}
    */
   public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output) {}
 
+  /**
+   * Returns the side input associated with the given view and window, given the set of side
+   * inputs available.
+   */
+  public abstract <T> T getSideInput(
+      PCollectionView<T> view, BoundedWindow mainInputWindow, PTuple sideInputs);
+
+  /**
+   * Writes the given {@link PCollectionView} data to a globally accessible location.
+   */
+  public <T, W extends BoundedWindow> void writePCollectionViewData(
+      TupleTag<?> tag,
+      Iterable<WindowedValue<T>> data, Coder<Iterable<WindowedValue<T>>> dataCoder,
+      W window, Coder<W> windowCoder) throws IOException {
+    throw new UnsupportedOperationException("Not implemented.");
+  }
+
   /**
    * Per-step, per-key context used for retrieving state.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
index 210791dc6b0b0..201bd15810e49 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
@@ -21,23 +21,52 @@
 import com.google.cloud.dataflow.sdk.runners.worker.StreamingDataflowWorker;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.Weigher;
 import com.google.protobuf.ByteString;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.Callable;
 
 /**
  * Class responsible for fetching state from the windmill server.
  */
 public class StateFetcher {
+  private static final Logger LOG = LoggerFactory.getLogger(StateFetcher.class);
+
   private WindmillServerStub server;
+  private Cache<SideInputId, SideInputCacheEntry> sideInputCache;
 
   public StateFetcher(WindmillServerStub server) {
+    this(server, CacheBuilder
+        .newBuilder()
+        .maximumWeight(100000000 /* 100 MB */)
+        .weigher(new Weigher<SideInputId, SideInputCacheEntry>() {
+              @Override
+              public int weigh(SideInputId id, SideInputCacheEntry entry) {
+                return entry.encodedSize;
+              }
+            })
+        .build());
+  }
+
+  public StateFetcher(
+      WindmillServerStub server, Cache<SideInputId, SideInputCacheEntry> sideInputCache) {
     this.server = server;
+    this.sideInputCache = sideInputCache;
   }
 
   public Map<CodedTupleTag<?>, Object> fetch(
@@ -144,4 +173,138 @@ public <T> List<T> fetchList(
 
     return result;
   }
+
+  /**
+   * Indicates the caller's knowledge of whether a particular side input has been computed.
+   */
+  public enum SideInputState {
+    KNOWN_READY, UNKNOWN;
+  }
+
+  /**
+   * Fetch the given side input, storing it in a process-level cache.
+   *
+   * <p> If state is KNOWN_READY, attempt to fetch the data regardless of whether a
+   * not-ready entry was cached.
+   */
+  public <T> T fetchSideInput(
+      final PCollectionView<T> view, final BoundedWindow window, SideInputState state) {
+    final SideInputId id = new SideInputId(view.getTagInternal(), window);
+
+    Callable<SideInputCacheEntry> fetchCallable = new Callable<SideInputCacheEntry>() {
+      @Override
+      public SideInputCacheEntry call() throws Exception {
+        Coder<BoundedWindow> windowCoder = view.getWindowFnInternal().windowCoder();
+
+        ByteString.Output windowStream = ByteString.newOutput();
+        windowCoder.encode(window, windowStream, Coder.Context.OUTER);
+
+        Windmill.GetDataRequest request = Windmill.GetDataRequest.newBuilder()
+            .addGlobalDataToFetch(
+                Windmill.GlobalDataId.newBuilder()
+                .setTag(view.getTagInternal().getId())
+                .setVersion(windowStream.toByteString())
+                .build())
+            .build();
+
+        Windmill.GetDataResponse response = server.getData(request);
+
+        Windmill.GlobalData data = response.getGlobalData(0);
+
+        if (data.getIsReady()) {
+          Iterable<WindowedValue<?>> rawData =
+              view.getCoderInternal().decode(
+                  data.getData().newInput(), Coder.Context.OUTER);
+
+          return new SideInputCacheEntry(
+              view.fromIterableInternal(rawData), data.getData().size());
+        } else {
+          return SideInputCacheEntry.notReady();
+        }
+      }
+    };
+
+    try {
+      if (state == SideInputState.KNOWN_READY) {
+        SideInputCacheEntry entry = sideInputCache.getIfPresent(id);
+        if (entry == null) {
+          return (T) sideInputCache.get(id, fetchCallable).value;
+        } else if (!entry.isReady()) {
+          // Invalidate the existing not-ready entry.  This must be done atomically
+          // so that another thread doesn't replace the entry with a ready entry which
+          // will then be deleted here.
+          synchronized (entry) {
+            SideInputCacheEntry newEntry = sideInputCache.getIfPresent(id);
+            if (newEntry != null && !newEntry.isReady()) {
+              sideInputCache.invalidate(id);
+            }
+          }
+
+          T result = (T) sideInputCache.get(id, fetchCallable).value;
+          if (result == null) {
+            throw new IllegalStateException(
+                "Side input fetch unexpectedly returned null. Tag: "
+                + view.getTagInternal().getId());
+          }
+          return result;
+        } else {
+          return (T) entry.value;
+        }
+      } else {
+        return (T) sideInputCache.get(id, fetchCallable).value;
+      }
+    } catch (Exception e) {
+      LOG.error("Fetch failed: ", e);
+      throw new RuntimeException("Exception while fetching side input: ", e);
+    }
+  }
+
+  /**
+   * Struct representing a side input for a particular window.
+   */
+  static class SideInputId {
+    private final TupleTag<?> tag;
+    private final BoundedWindow window;
+
+    public SideInputId(TupleTag<?> tag, BoundedWindow window) {
+      this.tag = tag;
+      this.window = window;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (other instanceof SideInputId) {
+        SideInputId otherId = (SideInputId) other;
+        return tag.equals(otherId.tag) && window.equals(otherId.window);
+      }
+      return false;
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(tag, window);
+    }
+  }
+
+  /**
+   * Entry in the side input cache that stores the value (null if not ready), and
+   * the encoded size of the value.
+   */
+  static class SideInputCacheEntry {
+    public final Object value;
+    public final int encodedSize;
+
+    public SideInputCacheEntry(Object value, int encodedSize) {
+      this.value = value;
+      this.encodedSize = encodedSize;
+    }
+
+    public static SideInputCacheEntry notReady() {
+      return new SideInputCacheEntry(null, 0);
+    }
+
+    public boolean isReady() {
+      return value != null;
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index 61e00ac101bb5..31db3fcf273e3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -16,12 +16,17 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
+
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.protobuf.ByteString;
 
 import org.joda.time.Instant;
@@ -41,6 +46,7 @@ public class StreamingModeExecutionContext extends ExecutionContext {
   private Windmill.WorkItem work;
   private StateFetcher stateFetcher;
   private Windmill.WorkItemCommitRequest.Builder outputBuilder;
+  private Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
 
   public StreamingModeExecutionContext(String computation, StateFetcher stateFetcher) {
     this.computation = computation;
@@ -50,6 +56,7 @@ public StreamingModeExecutionContext(String computation, StateFetcher stateFetch
   public void start(Windmill.WorkItem work, Windmill.WorkItemCommitRequest.Builder outputBuilder) {
     this.work = work;
     this.outputBuilder = outputBuilder;
+    this.sideInputCache = new HashMap<>();
   }
 
   @Override
@@ -73,6 +80,91 @@ public void deleteTimer(String timer) {
         Windmill.Timer.newBuilder().setTag(ByteString.copyFromUtf8(timer)).build());
   }
 
+  @Override
+  public <T> T getSideInput(
+      PCollectionView<T> view, BoundedWindow mainInputWindow, PTuple sideInputs) {
+    if (!sideInputs.has(view.getTagInternal())) {
+      throw new IllegalArgumentException(
+          "calling sideInput() with unknown view; " +
+          "did you forget to pass the view in " +
+          "ParDo.withSideInputs()?");
+    }
+
+    return fetchSideInput(view, mainInputWindow, SideInputState.KNOWN_READY);
+  }
+
+  /**
+   * Fetch the given side input asynchronously and return true if it is present.
+   */
+  public boolean issueSideInputFetch(
+      PCollectionView<?> view, BoundedWindow mainInputWindow) {
+    return fetchSideInput(view, mainInputWindow, SideInputState.UNKNOWN) != null;
+  }
+
+  /**
+   * Fetches the requested sideInput, and maintains a view of the cache that doesn't remove
+   * items until the active work item is finished.
+   */
+  private <T> T fetchSideInput(
+      PCollectionView<?> view, BoundedWindow mainInputWindow, SideInputState state) {
+    BoundedWindow sideInputWindow = view.getWindowFnInternal().getSideInputWindow(mainInputWindow);
+
+    Map<BoundedWindow, Object> tagCache = sideInputCache.get(view.getTagInternal());
+    if (tagCache == null) {
+      tagCache = new HashMap<>();
+      sideInputCache.put(view.getTagInternal(), tagCache);
+    }
+
+    T sideInput = (T) tagCache.get(sideInputWindow);
+    if (sideInput == null) {
+      sideInput = (T) stateFetcher.fetchSideInput(view, sideInputWindow, state);
+      if (sideInput != null) {
+        tagCache.put(sideInputWindow, sideInput);
+        return sideInput;
+      } else {
+        return null;
+      }
+    } else {
+      return sideInput;
+    }
+  }
+
+  @Override
+  public <T, W extends BoundedWindow> void writePCollectionViewData(
+      TupleTag<?> tag,
+      Iterable<WindowedValue<T>> data, Coder<Iterable<WindowedValue<T>>> dataCoder,
+      W window, Coder<W> windowCoder) throws IOException {
+    if (getSerializedKey().size() != 0) {
+      throw new IllegalStateException("writePCollectionViewData must follow a Combine.globally");
+    }
+
+    ByteString.Output dataStream = ByteString.newOutput();
+    dataCoder.encode(data, dataStream, Coder.Context.OUTER);
+
+    ByteString.Output windowStream = ByteString.newOutput();
+    windowCoder.encode(window, windowStream, Coder.Context.OUTER);
+
+    outputBuilder.addGlobalDataUpdates(
+        Windmill.GlobalData.newBuilder()
+        .setDataId(
+            Windmill.GlobalDataId.newBuilder()
+            .setTag(tag.getId())
+            .setVersion(windowStream.toByteString())
+            .build())
+        .setData(dataStream.toByteString())
+        .build());
+  }
+
+  public Iterable<Windmill.GlobalDataId> getSideInputNotifications() {
+    return work.getGlobalDataIdNotificationsList();
+  }
+
+  public void setBlockingSideInputs(Iterable<Windmill.GlobalDataId> sideInputs) {
+    for (Windmill.GlobalDataId id : sideInputs) {
+      outputBuilder.addGlobalDataIdRequests(id);
+    }
+  }
+
   public ByteString getSerializedKey() {
     return work.getKey();
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingPCollectionViewWriterFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingPCollectionViewWriterFn.java
new file mode 100644
index 0000000000000..49c6d0155b1e5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingPCollectionViewWriterFn.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Utility DoFn for writing streaming PCollectionViews.
+ *
+ * @param <T> element type
+ */
+public class StreamingPCollectionViewWriterFn<T>
+    extends DoFn<Iterable<T>, T> implements DoFn.RequiresWindowAccess {
+  private static final long serialVersionUID = 0;
+
+  private final PCollectionView<?> view;
+  private final Coder<T> dataCoder;
+
+  public static <T> StreamingPCollectionViewWriterFn<T> create(
+      PCollectionView<?> view, Coder<T> dataCoder) {
+    return new StreamingPCollectionViewWriterFn<T>(view, dataCoder);
+  }
+
+  private StreamingPCollectionViewWriterFn(PCollectionView<?> view, Coder<T> dataCoder) {
+    this.view = view;
+    this.dataCoder = dataCoder;
+  }
+
+  @Override
+  public void processElement(ProcessContext c) throws Exception {
+    List<WindowedValue<T>> output = new ArrayList<>();
+    for (T elem : c.element()) {
+      output.add(WindowedValue.of(elem, c.timestamp(), c.window()));
+    }
+
+    c.windowingInternals().writePCollectionViewData(
+        view.getTagInternal(), output, dataCoder);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
new file mode 100644
index 0000000000000..c03e78b853c12
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.MapCoder;
+import com.google.cloud.dataflow.sdk.coders.Proto2Coder;
+import com.google.cloud.dataflow.sdk.coders.SetCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Throwables;
+import com.google.protobuf.ByteString;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Runs a DoFn by constructing the appropriate contexts and passing them in.
+ *
+ * @param <I> the type of the DoFn's (main) input elements
+ * @param <O> the type of the DoFn's (main) output elements
+ * @param <R> the type of object which receives outputs
+ * @param <W> the type of the windows of the main input
+ */
+public class StreamingSideInputDoFnRunner<I, O, R, W extends BoundedWindow>
+    extends DoFnRunner<I, O, R> {
+  private StepContext stepContext;
+  private StreamingModeExecutionContext execContext;
+  private WindowFn<?, W> windowFn;
+  private Map<String, PCollectionView<?>> sideInputViews;
+  private CodedTupleTag<Map<W, Set<Windmill.GlobalDataId>>> blockedMapTag;
+  private Map<W, Set<Windmill.GlobalDataId>> blockedMap;
+  private Coder<I> elemCoder;
+
+  public StreamingSideInputDoFnRunner(
+      PipelineOptions options,
+      DoFnInfo<I, O> doFnInfo,
+      PTuple sideInputs,
+      OutputManager<R> outputManager,
+      TupleTag<O> mainOutputTag,
+      List<TupleTag<?>> sideOutputTags,
+      StepContext stepContext,
+      CounterSet.AddCounterMutator addCounterMutator) throws Exception {
+    super(options, doFnInfo.getDoFn(), sideInputs, outputManager,
+        mainOutputTag, sideOutputTags, stepContext,
+        addCounterMutator, doFnInfo.getWindowFn());
+    this.stepContext = stepContext;
+    this.windowFn = (WindowFn) doFnInfo.getWindowFn();
+    this.elemCoder = doFnInfo.getInputCoder();
+
+    this.sideInputViews = new HashMap<>();
+    for (PCollectionView<?> view : doFnInfo.getSideInputViews()) {
+      sideInputViews.put(view.getTagInternal().getId(), view);
+    }
+    this.execContext =
+        (StreamingModeExecutionContext) stepContext.getExecutionContext();
+    this.blockedMapTag = CodedTupleTag.of("blockedMap:", MapCoder.of(
+        windowFn.windowCoder(),
+        SetCoder.of(Proto2Coder.of(Windmill.GlobalDataId.class))));
+    this.blockedMap = stepContext.lookup(blockedMapTag);
+    if (this.blockedMap == null) {
+      this.blockedMap = new HashMap<>();
+    }
+  }
+
+  @Override
+  public void startBundle() {
+    super.startBundle();
+
+    Set<W> readyWindows = new HashSet<>();
+
+    for (Windmill.GlobalDataId id : execContext.getSideInputNotifications()) {
+      PCollectionView<?> view = sideInputViews.get(id.getTag());
+      if (view == null) {
+        // Side input is for a different DoFn; ignore it.
+        continue;
+      }
+
+      for (Map.Entry<W, Set<Windmill.GlobalDataId>> entry : blockedMap.entrySet()) {
+        entry.getValue().remove(id);
+        if (entry.getValue().isEmpty()) {
+          readyWindows.add(entry.getKey());
+        }
+      }
+    }
+
+    for (W window : readyWindows) {
+      blockedMap.remove(window);
+
+      try {
+        CodedTupleTag<WindowedValue<I>> elementTag = getElemListTag((W) window);
+        for (WindowedValue<I> elem : stepContext.readTagList(elementTag)) {
+          fn.processElement(createProcessContext(elem));
+        }
+        stepContext.deleteTagList(elementTag);
+      } catch (Throwable t) {
+        // Exception in user code.
+        Throwables.propagateIfInstanceOf(t, UserCodeException.class);
+        throw new UserCodeException(t);
+      }
+    }
+  }
+
+  @Override
+  public void invokeProcessElement(WindowedValue<I> elem) {
+    // This can contain user code. Wrap it in case it throws an exception.
+    try {
+      W window = (W) elem.getWindows().iterator().next();
+
+      Set<Windmill.GlobalDataId> blocked = blockedMap.get(window);
+      if (blocked == null) {
+        for (PCollectionView<?> view : sideInputViews.values()) {
+          if (!execContext.issueSideInputFetch(view, window)) {
+            if (blocked == null) {
+              blocked = new HashSet<>();
+              blockedMap.put(window, blocked);
+            }
+            Coder<BoundedWindow> sideInputWindowCoder = view.getWindowFnInternal().windowCoder();
+
+            ByteString.Output windowStream = ByteString.newOutput();
+            sideInputWindowCoder.encode(
+                view.getWindowFnInternal().getSideInputWindow(window),
+                windowStream, Coder.Context.OUTER);
+
+            blocked.add(Windmill.GlobalDataId.newBuilder()
+                .setTag(view.getTagInternal().getId())
+                .setVersion(windowStream.toByteString())
+                .build());
+          }
+        }
+      }
+
+      if (blocked == null) {
+        fn.processElement(createProcessContext(elem));
+      } else {
+        stepContext.writeToTagList(
+            getElemListTag(window), elem, elem.getTimestamp());
+
+        execContext.setBlockingSideInputs(blocked);
+      }
+    } catch (Throwable t) {
+      // Exception in user code.
+      Throwables.propagateIfInstanceOf(t, UserCodeException.class);
+      throw new UserCodeException(t);
+    }
+  }
+
+  @Override
+  public void finishBundle() {
+    super.finishBundle();
+    try {
+      stepContext.store(blockedMapTag, blockedMap);
+    } catch (IOException e) {
+      throw new RuntimeException("Exception while storing streaming side input info: ", e);
+    }
+  }
+
+  private CodedTupleTag<WindowedValue<I>> getElemListTag(W window) throws IOException {
+    return CodedTupleTag.<WindowedValue<I>>of(
+        "e:" + WindowUtils.windowToString(window, windowFn.windowCoder()),
+        WindowedValue.getFullCoder(elemCoder, windowFn.windowCoder()));
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
index d897d3fa24fea..5ffac59eeb5f6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.values;
 
+import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 
@@ -49,4 +50,9 @@ public interface PCollectionView<T> extends PValue, Serializable {
    * For internal use only.
    */
   public WindowFn getWindowFnInternal();
+
+  /**
+   * For internal use only.
+   */
+  public Coder<Iterable<WindowedValue<?>>> getCoderInternal();
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
new file mode 100644
index 0000000000000..0751fda0a5dc4
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
@@ -0,0 +1,288 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
+import static org.hamcrest.Matchers.contains;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.ListCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.protobuf.ByteString;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+/** Unit tests for {@link StateFetcher}. */
+@RunWith(JUnit4.class)
+public class StateFetcherTest {
+
+  @Mock
+  WindmillServerStub server;
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+  }
+
+  @Test
+  public void testFetch() throws Exception {
+    StateFetcher fetcher = new StateFetcher(server);
+
+    when(server.getData(any(Windmill.GetDataRequest.class))).thenReturn(
+        Windmill.GetDataResponse.newBuilder()
+        .addData(Windmill.ComputationGetDataResponse.newBuilder()
+            .setComputationId("computation")
+            .addData(Windmill.KeyedGetDataResponse.newBuilder()
+                .setKey(ByteString.copyFromUtf8("key"))
+                .addValues(Windmill.TagValue.newBuilder()
+                    .setTag(ByteString.copyFromUtf8("p:tag1"))
+                    .setValue(Windmill.Value.newBuilder()
+                        .setTimestamp(0)
+                        .setData(ByteString.copyFromUtf8("data1"))
+                        .build())
+                    .build())
+                .addValues(Windmill.TagValue.newBuilder()
+                    .setTag(ByteString.copyFromUtf8("p:tag2"))
+                    .setValue(Windmill.Value.newBuilder()
+                        .setTimestamp(0)
+                        .setData(ByteString.copyFromUtf8("data2"))
+                        .build())
+                    .build())
+                .build())
+            .build())
+        .build());
+
+    Map<CodedTupleTag<?>, Object> data =
+        fetcher.fetch("computation", ByteString.copyFromUtf8("key"), 17L, "p:",
+            Arrays.asList(
+                CodedTupleTag.of("tag1", StringUtf8Coder.of()),
+                CodedTupleTag.of("tag2", StringUtf8Coder.of())));
+
+    verify(server).getData(
+        Windmill.GetDataRequest.newBuilder()
+        .addRequests(Windmill.ComputationGetDataRequest.newBuilder()
+            .setComputationId("computation")
+            .addRequests(Windmill.KeyedGetDataRequest.newBuilder()
+                .setKey(ByteString.copyFromUtf8("key"))
+                .setWorkToken(17L)
+                .addValuesToFetch(Windmill.TagValue.newBuilder()
+                    .setTag(ByteString.copyFromUtf8("p:tag1"))
+                    .build())
+                .addValuesToFetch(Windmill.TagValue.newBuilder()
+                    .setTag(ByteString.copyFromUtf8("p:tag2"))
+                    .build())
+                .build())
+            .build())
+        .build());
+
+    assertEquals(2, data.size());
+    assertEquals("data1", data.get(CodedTupleTag.of("tag1", StringUtf8Coder.of())));
+    assertEquals("data2", data.get(CodedTupleTag.of("tag2", StringUtf8Coder.of())));
+  }
+
+  @Test
+  public void testFetchList() throws Exception {
+    StateFetcher fetcher = new StateFetcher(server);
+
+    when(server.getData(any(Windmill.GetDataRequest.class))).thenReturn(
+        Windmill.GetDataResponse.newBuilder()
+        .addData(Windmill.ComputationGetDataResponse.newBuilder()
+            .setComputationId("computation")
+            .addData(Windmill.KeyedGetDataResponse.newBuilder()
+                .setKey(ByteString.copyFromUtf8("key"))
+                .addLists(Windmill.TagList.newBuilder()
+                    .setTag(ByteString.copyFromUtf8("p:tag1"))
+                    .addValues(Windmill.Value.newBuilder()
+                        .setTimestamp(0)
+                        .setData(ByteString.copyFromUtf8("data1"))
+                        .build())
+                    .addValues(Windmill.Value.newBuilder()
+                        .setTimestamp(1)
+                        .setData(ByteString.copyFromUtf8("data2"))
+                        .build())
+                    .build())
+                .build())
+            .build())
+        .build());
+
+    List<String> data =
+        fetcher.fetchList("computation", ByteString.copyFromUtf8("key"), 17L, "p:",
+            CodedTupleTag.of("tag1", StringUtf8Coder.of()));
+
+    verify(server).getData(
+        Windmill.GetDataRequest.newBuilder()
+        .addRequests(Windmill.ComputationGetDataRequest.newBuilder()
+            .setComputationId("computation")
+            .addRequests(Windmill.KeyedGetDataRequest.newBuilder()
+                .setKey(ByteString.copyFromUtf8("key"))
+                .setWorkToken(17L)
+                .addListsToFetch(Windmill.TagList.newBuilder()
+                    .setTag(ByteString.copyFromUtf8("p:tag1"))
+                    .setEndTimestamp(Long.MAX_VALUE)
+                    .build())
+                .build())
+            .build())
+        .build());
+
+    assertThat(data, contains("data1", "data2"));
+  }
+
+  @Test
+  public void testFetchGlobalDataBasic() throws Exception {
+    StateFetcher fetcher = new StateFetcher(server);
+
+    ByteString.Output stream = ByteString.newOutput();
+    ListCoder.of(WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE))
+        .encode(Arrays.asList(WindowedValue.valueInGlobalWindow("data")),
+            stream, Coder.Context.OUTER);
+    ByteString encodedIterable = stream.toByteString();
+
+    PCollectionView<String> view =
+        TestPipeline.create().apply(Create.<String>of())
+        .setCoder(StringUtf8Coder.of()).apply(View.<String>asSingleton());
+
+    String tag = view.getTagInternal().getId();
+
+    // Test three calls in a row. First, data is not ready, then data is ready,
+    // then the data is already cached.
+    when(server.getData(any(Windmill.GetDataRequest.class))).thenReturn(
+        buildGlobalDataResponse(tag, ByteString.EMPTY, false, null),
+        buildGlobalDataResponse(tag, ByteString.EMPTY, true, encodedIterable));
+
+    assertEquals(null, fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, SideInputState.UNKNOWN));
+    assertEquals(null, fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, SideInputState.UNKNOWN));
+    assertEquals("data",
+        fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, SideInputState.KNOWN_READY));
+    assertEquals("data",
+        fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, SideInputState.KNOWN_READY));
+
+    verify(server, times(2)).getData(buildGlobalDataRequest(tag, ByteString.EMPTY));
+    verifyNoMoreInteractions(server);
+  }
+
+  @Test
+  public void testFetchGlobalDataCacheOverflow() throws Exception {
+    Coder<List<WindowedValue<String>>> coder =
+        ListCoder.of(WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE));
+
+    ByteString.Output stream = ByteString.newOutput();
+    coder.encode(Arrays.asList(WindowedValue.valueInGlobalWindow("data1")),
+        stream, Coder.Context.OUTER);
+    ByteString encodedIterable1 = stream.toByteString();
+    stream = ByteString.newOutput();
+    coder.encode(Arrays.asList(WindowedValue.valueInGlobalWindow("data2")),
+        stream, Coder.Context.OUTER);
+    ByteString encodedIterable2 = stream.toByteString();
+
+    Cache<StateFetcher.SideInputId, StateFetcher.SideInputCacheEntry> cache =
+        CacheBuilder.newBuilder().build();
+
+    StateFetcher fetcher = new StateFetcher(server, cache);
+
+    PCollectionView<String> view1 =
+        TestPipeline.create().apply(Create.<String>of())
+        .setCoder(StringUtf8Coder.of()).apply(View.<String>asSingleton());
+
+    PCollectionView<String> view2 =
+        TestPipeline.create().apply(Create.<String>of())
+        .setCoder(StringUtf8Coder.of()).apply(View.<String>asSingleton());
+
+    String tag1 = view1.getTagInternal().getId();
+    String tag2 = view2.getTagInternal().getId();
+
+    // Test four calls in a row. First, fetch view1, then view2 (which evicts view1 from the cache),
+    // then view 1 again twice.
+    when(server.getData(any(Windmill.GetDataRequest.class))).thenReturn(
+        buildGlobalDataResponse(tag1, ByteString.EMPTY, true, encodedIterable1),
+        buildGlobalDataResponse(tag2, ByteString.EMPTY, true, encodedIterable2),
+        buildGlobalDataResponse(tag1, ByteString.EMPTY, true, encodedIterable1));
+
+    assertEquals("data1",
+        fetcher.fetchSideInput(view1, GlobalWindow.INSTANCE, SideInputState.UNKNOWN));
+    assertEquals("data2",
+        fetcher.fetchSideInput(view2, GlobalWindow.INSTANCE, SideInputState.UNKNOWN));
+    cache.invalidateAll();
+    assertEquals("data1",
+        fetcher.fetchSideInput(view1, GlobalWindow.INSTANCE, SideInputState.UNKNOWN));
+    assertEquals("data1",
+        fetcher.fetchSideInput(view1, GlobalWindow.INSTANCE, SideInputState.UNKNOWN));
+
+    ArgumentCaptor<Windmill.GetDataRequest> captor =
+        ArgumentCaptor.forClass(Windmill.GetDataRequest.class);
+
+    verify(server, times(3)).getData(captor.capture());
+    verifyNoMoreInteractions(server);
+
+    assertThat(captor.getAllValues(), contains(
+        buildGlobalDataRequest(tag1, ByteString.EMPTY),
+        buildGlobalDataRequest(tag2, ByteString.EMPTY),
+        buildGlobalDataRequest(tag1, ByteString.EMPTY)));
+  }
+
+  private Windmill.GetDataResponse buildGlobalDataResponse(
+      String tag, ByteString version, boolean isReady, ByteString data) {
+    Windmill.GlobalData.Builder builder = Windmill.GlobalData.newBuilder()
+        .setDataId(Windmill.GlobalDataId.newBuilder()
+            .setTag(tag)
+            .setVersion(version)
+            .build());
+
+    if (isReady) {
+      builder.setIsReady(true).setData(data);
+    } else {
+      builder.setIsReady(false);
+    }
+    return Windmill.GetDataResponse.newBuilder()
+        .addGlobalData(builder.build()).build();
+  }
+
+  private Windmill.GetDataRequest buildGlobalDataRequest(String tag, ByteString version) {
+    return Windmill.GetDataRequest.newBuilder()
+        .addGlobalDataToFetch(Windmill.GlobalDataId.newBuilder()
+            .setTag(tag)
+            .setVersion(version)
+            .build())
+        .build();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
new file mode 100644
index 0000000000000..1a827b0734347
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
@@ -0,0 +1,274 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.hamcrest.Matchers.contains;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.protobuf.ByteString;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/** Unit tests for {@link StreamingSideInputDoFnRunner}. */
+@RunWith(JUnit4.class)
+public class StreamingSideInputDoFnRunnerTest {
+
+  static TupleTag<String> mainOutputTag = new TupleTag<String>();
+  @Mock StreamingModeExecutionContext execContext;
+  @Mock ExecutionContext.StepContext stepContext;
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+    when(stepContext.getExecutionContext()).thenReturn(execContext);
+  }
+
+  @Test
+  public void testSideInputReady() throws Exception {
+    PCollectionView<String> view = createView();
+
+    when(stepContext.lookup(any(CodedTupleTag.class))).thenReturn(new HashMap());
+    when(execContext.getSideInputNotifications())
+        .thenReturn(Arrays.<Windmill.GlobalDataId>asList());
+    when(execContext.issueSideInputFetch(eq(view), any(BoundedWindow.class))).thenReturn(true);
+    when(execContext.getSideInput(eq(view), any(BoundedWindow.class), any(PTuple.class)))
+        .thenReturn("data");
+
+    StreamingSideInputDoFnRunner<String, String, List, IntervalWindow> runner =
+        createRunner(Arrays.asList(view));
+
+    runner.startBundle();
+    runner.processElement(createDatum("e", 0));
+    runner.finishBundle();
+
+    assertThat((List<WindowedValue<String>>) runner.getReceiver(mainOutputTag),
+        contains(createDatum("e:data", 0)));
+  }
+
+  @Test
+  public void testSideInputNotReady() throws Exception {
+    PCollectionView<String> view = createView();
+
+    when(stepContext.lookup(any(CodedTupleTag.class))).thenReturn(new HashMap());
+    when(execContext.getSideInputNotifications())
+        .thenReturn(Arrays.<Windmill.GlobalDataId>asList());
+    when(execContext.issueSideInputFetch(eq(view), any(BoundedWindow.class))).thenReturn(false);
+
+    StreamingSideInputDoFnRunner<String, String, List, IntervalWindow> runner =
+        createRunner(Arrays.asList(view));
+
+    runner.startBundle();
+    runner.processElement(createDatum("e", 0));
+    runner.finishBundle();
+
+    assertTrue(runner.getReceiver(mainOutputTag).isEmpty());
+
+    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
+
+    verify(stepContext).writeToTagList(
+        any(CodedTupleTag.class), eq(createDatum("e", 0)), eq(new Instant(0)));
+    verify(stepContext).store(any(CodedTupleTag.class), eq(
+        Collections.singletonMap(
+            window,
+            Collections.singleton(Windmill.GlobalDataId.newBuilder()
+                .setTag(view.getTagInternal().getId())
+                .setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(
+                    IntervalWindow.getFixedSizeCoder(Duration.millis(10)), window)))
+                .build()))));
+  }
+
+  @Test
+  public void testSideInputNotification() throws Exception {
+    PCollectionView<String> view = createView();
+
+    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
+    Windmill.GlobalDataId id = Windmill.GlobalDataId.newBuilder()
+        .setTag(view.getTagInternal().getId())
+        .setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(
+            IntervalWindow.getFixedSizeCoder(Duration.millis(10)), window)))
+        .build();
+
+    Set<Windmill.GlobalDataId> idSet = new HashSet<>();
+    idSet.add(id);
+    Map<IntervalWindow, Set<Windmill.GlobalDataId>> blockedMap = new HashMap<>();
+    blockedMap.put(window, idSet);
+
+    when(stepContext.lookup(any(CodedTupleTag.class))).thenReturn(blockedMap);
+    when(execContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
+    when(execContext.getSideInput(eq(view), eq(window), any(PTuple.class)))
+        .thenReturn("data");
+    when(stepContext.readTagList(any(CodedTupleTag.class))).thenReturn(
+        Arrays.asList(createDatum("e", 0)));
+
+    StreamingSideInputDoFnRunner<String, String, List, IntervalWindow> runner =
+        createRunner(Arrays.asList(view));
+
+    runner.startBundle();
+    runner.finishBundle();
+
+    assertThat((List<WindowedValue<String>>) runner.getReceiver(mainOutputTag),
+        contains(createDatum("e:data", 0)));
+
+    verify(stepContext).store(any(CodedTupleTag.class), eq(new HashMap()));
+  }
+
+  @Test
+  public void testMultipleSideInputs() throws Exception {
+    PCollectionView<String> view1 = createView();
+    PCollectionView<String> view2 = createView();
+
+    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
+    Windmill.GlobalDataId id = Windmill.GlobalDataId.newBuilder()
+        .setTag(view1.getTagInternal().getId())
+        .setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(
+            IntervalWindow.getFixedSizeCoder(Duration.millis(10)), window)))
+        .build();
+
+    Set<Windmill.GlobalDataId> idSet = new HashSet<>();
+    idSet.add(id);
+    Map<IntervalWindow, Set<Windmill.GlobalDataId>> blockedMap = new HashMap<>();
+    blockedMap.put(window, idSet);
+
+    when(stepContext.lookup(any(CodedTupleTag.class))).thenReturn(blockedMap);
+    when(execContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
+    when(execContext.issueSideInputFetch(any(PCollectionView.class), any(BoundedWindow.class)))
+        .thenReturn(true);
+    when(execContext.getSideInput(eq(view1), eq(window), any(PTuple.class)))
+        .thenReturn("data1");
+    when(execContext.getSideInput(eq(view2), eq(window), any(PTuple.class)))
+        .thenReturn("data2");
+    when(stepContext.readTagList(any(CodedTupleTag.class))).thenReturn(
+        Arrays.asList(createDatum("e1", 0)));
+
+    StreamingSideInputDoFnRunner<String, String, List, IntervalWindow> runner =
+        createRunner(Arrays.asList(view1, view2));
+
+    runner.startBundle();
+    runner.processElement(createDatum("e2", 2));
+    runner.finishBundle();
+
+    System.out.println(runner.getReceiver(mainOutputTag));
+
+    assertThat((List<WindowedValue<String>>) runner.getReceiver(mainOutputTag),
+        contains(createDatum("e1:data1:data2", 0), createDatum("e2:data1:data2", 2)));
+
+    verify(stepContext).store(any(CodedTupleTag.class), eq(new HashMap()));
+  }
+
+  private StreamingSideInputDoFnRunner<String, String, List, IntervalWindow> createRunner(
+      List<PCollectionView<String>> views) throws Exception {
+    DoFnInfo doFnInfo = new DoFnInfo<String, String>(
+        new SideInputFn(views), FixedWindows.of(Duration.millis(10)))
+        .setSideInputViews((Iterable) views)
+        .setInputCoder(StringUtf8Coder.of());
+
+    PTuple sideInputs = PTuple.empty();
+    for (PCollectionView<String> view : views) {
+      sideInputs = sideInputs.and(view.getTagInternal(), null);
+    }
+
+    return new StreamingSideInputDoFnRunner<String, String, List, IntervalWindow>(
+        PipelineOptionsFactory.create(),
+        doFnInfo,
+        sideInputs,
+        new DoFnRunner.OutputManager<List>() {
+          @Override
+          public List initialize(TupleTag<?> tag) {
+            return new ArrayList<>();
+          }
+          @Override
+          public void output(List list, WindowedValue<?> output) {
+            list.add(output);
+          }
+        },
+        mainOutputTag,
+        Arrays.<TupleTag<?>>asList(),
+        stepContext,
+        null);
+  }
+
+  private static class SideInputFn extends DoFn<String, String> {
+    private static final long serialVersionUID = 0;
+
+    private List<PCollectionView<String>> views;
+
+    public SideInputFn(List<PCollectionView<String>> views) {
+      this.views = views;
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      String output = c.element();
+      for (PCollectionView<String> view : views) {
+        output += ":" + c.sideInput(view);
+      }
+      c.output(output);
+    }
+  }
+
+  private PCollectionView<String> createView() {
+    return TestPipeline.create()
+        .apply(Create.<String>of()).setCoder(StringUtf8Coder.of())
+        .apply(Window.<String>into(FixedWindows.of(Duration.millis(10))))
+        .apply(View.<String>asSingleton());
+  }
+
+  private WindowedValue<String> createDatum(String element, long timestamp) {
+    return WindowedValue.of(
+        element,
+        new Instant(timestamp),
+        Arrays.asList(new IntervalWindow(
+            new Instant(timestamp - timestamp % 10),
+            new Instant(timestamp - timestamp % 10 + 10))));
+  }
+}

From bffabaeefa02d06eddd0cbbc8ab4978064a2a42f Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 30 Mar 2015 20:02:51 -0700
Subject: [PATCH 0334/1541] Introduce a test utility for testing the
 DefaultTrigger.

The tester executes a real WindowFn, Trigger, and WindowSet, using
stubbed out versions of the dependencies (eg., KeyedState, etc.).

Introducing the tester revealed some cases where we could simplify reuse
of code, specifically around how AbstractWindowSets are created. This
also led to removing the dependency from AbstractWindowSets on
DoFn.ProcessContext and WindowFn. Now they just depend on the things
they needed (KeyedState, WindowingInternals, and Coder<W>).

TimerManager also got support for Processing Time, although it is not
yet used by anything.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89929303
---
 .../cloud/dataflow/sdk/transforms/DoFn.java   |   1 +
 .../dataflow/sdk/util/AbstractWindowSet.java  |  36 ++-
 .../dataflow/sdk/util/BatchTimerManager.java  | 157 +++++++++
 .../dataflow/sdk/util/BufferingWindowSet.java |  57 ++--
 .../dataflow/sdk/util/CombiningWindowSet.java |  52 ++-
 .../dataflow/sdk/util/DefaultTrigger.java     |   6 +-
 .../sdk/util/GroupAlsoByWindowsDoFn.java      | 141 +-------
 .../sdk/util/PartitionBufferingWindowSet.java |  49 ++-
 .../util/StreamingGroupAlsoByWindowsDoFn.java |  68 ++--
 .../cloud/dataflow/sdk/util/Trigger.java      |  21 +-
 .../dataflow/sdk/util/TriggerExecutor.java    |  29 +-
 .../dataflow/sdk/util/DefaultTriggerTest.java |  90 ++++++
 .../dataflow/sdk/util/TriggerTester.java      | 306 ++++++++++++++++++
 13 files changed, 773 insertions(+), 240 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 0609309c54f71..b8aee4d53633b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -359,6 +359,7 @@ void outputWindowedValue(O output, Instant timestamp,
 
     /**
      * Reads the elements of the list in stored state corresponding to the provided tag.
+     * If the tag is undefined, will return an empty list rather than null.
      *
      * @throws IOException if decoding any of the requested values fails
      */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
index 99e95ea77feaf..43e6b307372bc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
@@ -17,31 +17,49 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.WindowingInternals;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
 
+import org.joda.time.Instant;
+
+import java.io.Serializable;
 import java.util.Collection;
 
 /**
  * Abstract class representing a set of active windows for a key.
  */
 abstract class AbstractWindowSet<K, VI, VO, W extends BoundedWindow> {
+
+  /**
+   * Factory for creating a window set.
+   */
+  public interface Factory<K, VI, VO, W extends BoundedWindow> extends Serializable {
+    public AbstractWindowSet<K, VI, VO, W> create(
+        K key,
+        Coder<W> windowCoder,
+        KeyedState keyedState,
+        WindowingInternals<?, ?> windowingInternals) throws Exception;
+  }
+
   protected final K key;
-  protected final WindowFn<?, W> windowFn;
+  protected final Coder<W> windowCoder;
   protected final Coder<VI> inputCoder;
-  protected final DoFn<?, ?>.ProcessContext context;
+  protected final KeyedState keyedState;
+  protected final WindowingInternals<?, ?> windowingInternals;
 
   protected AbstractWindowSet(
       K key,
-      WindowFn<?, W> windowFn,
+      Coder<W> windowCoder,
       Coder<VI> inputCoder,
-      DoFn<?, ?>.ProcessContext context) {
+      KeyedState keyedState,
+      WindowingInternals<?, ?> windowingInternals) {
     this.key = key;
-    this.windowFn = windowFn;
+    this.windowCoder = windowCoder;
     this.inputCoder = inputCoder;
-    this.context = context;
+    this.keyedState = keyedState;
+    this.windowingInternals = windowingInternals;
   }
 
   /**
@@ -63,7 +81,7 @@ protected AbstractWindowSet(
    * If not, adds the window to the set first, then puts the element
    * in the window.
    */
-  protected abstract WindowStatus put(W window, VI value) throws Exception;
+  protected abstract WindowStatus put(W window, VI value, Instant timestamp) throws Exception;
 
   /**
    * Removes the given window from the set.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
new file mode 100644
index 0000000000000..76b2384dc1282
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.util.Trigger.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TriggerExecutor.TimerManager;
+
+import org.joda.time.Instant;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+import java.util.PriorityQueue;
+
+/**
+ * TimerManager that uses priority queues to manage the timers that are ready to fire.
+ */
+public class BatchTimerManager implements TimerManager {
+
+  private PriorityQueue<BatchTimerManager.BatchTimer> watermarkTimers = new PriorityQueue<>(11);
+  private Map<String, BatchTimerManager.BatchTimer> watermarkTagToTimer = new HashMap<>();
+
+  private PriorityQueue<BatchTimerManager.BatchTimer> processingTimers = new PriorityQueue<>(11);
+  private Map<String, BatchTimerManager.BatchTimer> processingTagToTimer = new HashMap<>();
+
+  private PriorityQueue<BatchTimerManager.BatchTimer> queue(Trigger.TimeDomain domain) {
+    return Trigger.TimeDomain.EVENT_TIME.equals(domain) ? watermarkTimers : processingTimers;
+  }
+
+  private Map<String, BatchTimer> map(Trigger.TimeDomain domain) {
+    return Trigger.TimeDomain.EVENT_TIME.equals(domain)
+        ? watermarkTagToTimer : processingTagToTimer;
+  }
+
+  @Override
+  public void setTimer(String tag, Instant timestamp, Trigger.TimeDomain domain) {
+    BatchTimerManager.BatchTimer newTimer = new BatchTimerManager.BatchTimer(tag, timestamp);
+
+    BatchTimerManager.BatchTimer oldTimer = map(domain).put(tag, newTimer);
+    if (oldTimer != null) {
+      queue(domain).remove(oldTimer);
+    }
+    queue(domain).add(newTimer);
+  }
+
+  @Override
+  public void deleteTimer(String tag, Trigger.TimeDomain domain) {
+    queue(domain).remove(map(domain).get(tag));
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder builder = new StringBuilder("BatchTimerManager [");
+    for (BatchTimer timer : watermarkTimers) {
+      builder.append("  ").append("Watermark ").append(timer.time).append(" = ").append(timer.tag);
+    }
+    for (BatchTimer timer : processingTimers) {
+      builder.append("  ").append("Processing ").append(timer.time).append(" = ").append(timer.tag);
+    }
+    builder.append("]");
+    return builder.toString();
+  }
+
+  public void advanceWatermark(TriggerExecutor<?, ?, ?, ?> triggerExecutor, Instant newWatermark)
+      throws Exception {
+    advance(triggerExecutor, newWatermark, TimeDomain.EVENT_TIME);
+  }
+
+  public void advanceProcessingTime(
+      TriggerExecutor<?, ?, ?, ?> triggerExecutor, Instant newProcessingTime) throws Exception {
+    advance(triggerExecutor, newProcessingTime, TimeDomain.PROCESSING_TIME);
+  }
+
+  /**
+   * @param domain The time domain that the tag is being fired on.
+   */
+  protected void fire(
+      TriggerExecutor<?, ?, ?, ?> triggerExecutor, String timerTag, TimeDomain domain)
+          throws Exception {
+    triggerExecutor.onTimer(timerTag);
+  }
+
+  private void advance(
+      TriggerExecutor<?, ?, ?, ?> triggerExecutor, Instant newTime, TimeDomain domain)
+          throws Exception {
+
+    PriorityQueue<BatchTimer> timers = queue(domain);
+    Map<String, BatchTimer> map = map(domain);
+    boolean shouldFire = false;
+
+    do {
+      BatchTimer timer = timers.peek();
+      shouldFire = timer != null && newTime.isAfter(timer.time);
+      if (shouldFire) {
+        // Remove before firing, so that if the trigger adds another identical
+        // timer we don't remove it.
+        timers.remove();
+        map.remove(timer.tag);
+
+        fire(triggerExecutor, timer.tag, domain);
+      }
+    } while (shouldFire);
+  }
+
+  /**
+   * A timer is a tag and the time it should fire.
+   */
+  private static class BatchTimer implements Comparable<BatchTimer> {
+
+    final String tag;
+    final Instant time;
+
+    public BatchTimer(String tag, Instant time) {
+      this.tag = tag;
+      this.time = time;
+    }
+
+    @Override
+    public String toString() {
+      return time + ": " + tag;
+    }
+
+    @Override
+    public int compareTo(BatchTimer o) {
+      return time.compareTo(o.time);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(time, tag);
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (other instanceof BatchTimer) {
+        BatchTimer that = (BatchTimer) other;
+        return Objects.equals(this.time, that.time)
+            && Objects.equals(this.tag, that.tag);
+      }
+      return false;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
index 4a940437f3713..e4602e3e9b11f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
@@ -21,12 +21,14 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.MapCoder;
 import com.google.cloud.dataflow.sdk.coders.SetCoder;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.WindowingInternals;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 
+import org.joda.time.Instant;
+
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -42,16 +44,29 @@
  */
 class BufferingWindowSet<K, V, W extends BoundedWindow>
     extends AbstractWindowSet<K, V, Iterable<V>, W> {
+
+  public static <K, V, W extends BoundedWindow>
+  AbstractWindowSet.Factory<K, V, Iterable<V>, W> factory(final Coder<V> inputCoder) {
+    return new AbstractWindowSet.Factory<K, V, Iterable<V>, W>() {
+
+      private static final long serialVersionUID = 0L;
+
+      @Override
+      public AbstractWindowSet<K, V, Iterable<V>, W> create(K key,
+          Coder<W> windowCoder, KeyedState keyedState,
+          WindowingInternals<?, ?> windowingInternals) throws Exception {
+        return new BufferingWindowSet<>(
+            key, windowCoder, inputCoder, keyedState, windowingInternals);
+      }
+    };
+  }
+
   /**
    * Tag for storing the merge tree, the data structure that keeps
    * track of which windows have been merged together.
    */
   private final CodedTupleTag<Map<W, Set<W>>> mergeTreeTag =
-      CodedTupleTag.of(
-          "mergeTree",
-          MapCoder.of(
-              windowFn.windowCoder(),
-              SetCoder.of(windowFn.windowCoder())));
+      CodedTupleTag.of("mergeTree", MapCoder.of(windowCoder, SetCoder.of(windowCoder)));
 
   /**
    * A map of live windows to windows that were merged into them.
@@ -71,24 +86,24 @@ class BufferingWindowSet<K, V, W extends BoundedWindow>
 
   protected BufferingWindowSet(
       K key,
-      WindowFn<?, W> windowFn,
+      Coder<W> windowCoder,
       Coder<V> inputCoder,
-      DoFn<?, ?>.ProcessContext context) throws Exception {
-    super(key, windowFn, inputCoder, context);
+      KeyedState keyedState,
+      WindowingInternals<?, ?> windowingInternals) throws Exception {
+    super(key, windowCoder, inputCoder, keyedState, windowingInternals);
 
     mergeTree = emptyIfNull(
-        context.keyedState().lookup(Arrays.asList(mergeTreeTag))
+        keyedState.lookup(Arrays.asList(mergeTreeTag))
         .get(mergeTreeTag));
 
     originalMergeTree = deepCopy(mergeTree);
   }
 
   @Override
-  public WindowStatus put(W window, V value) throws Exception {
-    context.windowingInternals().writeToTagList(
-        bufferTag(window, windowFn.windowCoder(), inputCoder),
+  public WindowStatus put(W window, V value, Instant timestamp) throws Exception {
+    windowingInternals.writeToTagList(bufferTag(window, windowCoder, inputCoder),
         value,
-        context.timestamp());
+        timestamp);
 
     if (!mergeTree.containsKey(window)) {
       mergeTree.put(window, new HashSet<W>());
@@ -102,11 +117,9 @@ public WindowStatus put(W window, V value) throws Exception {
   public void remove(W window) throws Exception {
     Set<W> subWindows = mergeTree.get(window);
     for (W w : subWindows) {
-      context.windowingInternals().deleteTagList(
-          bufferTag(w, windowFn.windowCoder(), inputCoder));
+      windowingInternals.deleteTagList(bufferTag(w, windowCoder, inputCoder));
     }
-    context.windowingInternals().deleteTagList(
-        bufferTag(window, windowFn.windowCoder(), inputCoder));
+    windowingInternals.deleteTagList(bufferTag(window, windowCoder, inputCoder));
     mergeTree.remove(window);
   }
 
@@ -159,8 +172,8 @@ protected Iterable<V> finalValue(W window) throws Exception {
     }
 
     for (W curWindow : curWindows) {
-      Iterable<V> items = context.windowingInternals().readTagList(bufferTag(
-          curWindow, windowFn.windowCoder(), inputCoder));
+      Iterable<V> items = windowingInternals.readTagList(
+          bufferTag(curWindow, windowCoder, inputCoder));
       for (V item : items) {
         toEmit.add(item);
       }
@@ -172,7 +185,7 @@ protected Iterable<V> finalValue(W window) throws Exception {
   @Override
   public void persist() throws Exception {
     if (!mergeTree.equals(originalMergeTree)) {
-      context.keyedState().store(mergeTreeTag, mergeTree);
+      keyedState.store(mergeTreeTag, mergeTree);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
index 0cd2ba55a029b..f7adccca2a68b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
@@ -23,14 +23,15 @@
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.WindowingInternals;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.collect.Iterators;
 
+import org.joda.time.Instant;
+
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
@@ -50,8 +51,26 @@
 public class CombiningWindowSet<K, VI, VA, VO, W extends BoundedWindow>
     extends AbstractWindowSet<K, VI, VO, W> {
 
+  public static <K, VI, VA, VO, W extends BoundedWindow>
+  AbstractWindowSet.Factory<K, VI, VO, W> factory(
+      final KeyedCombineFn<K, VI, VA, VO> combineFn,
+      final Coder<K> keyCoder, final Coder<VI> inputCoder) {
+    return new AbstractWindowSet.Factory<K, VI, VO, W>() {
+
+      private static final long serialVersionUID = 0L;
+
+      @Override
+      public AbstractWindowSet<K, VI, VO, W> create(K key,
+          Coder<W> windowCoder, KeyedState keyedState,
+          WindowingInternals<?, ?> windowingInternals) throws Exception {
+        return new CombiningWindowSet<>(
+            key, windowCoder, combineFn, keyCoder, inputCoder, keyedState, windowingInternals);
+      }
+    };
+  }
+
   private final CodedTupleTag<Iterable<W>> windowListTag =
-      CodedTupleTag.of("liveWindowsList", IterableCoder.of(windowFn.windowCoder()));
+      CodedTupleTag.of("liveWindowsList", IterableCoder.of(windowCoder));
 
   private final KeyedCombineFn<K, VI, VA, VO> combineFn;
   private final Set<W> liveWindows;
@@ -60,16 +79,17 @@ public class CombiningWindowSet<K, VI, VA, VO, W extends BoundedWindow>
 
   protected CombiningWindowSet(
       K key,
-      WindowFn<?, W> windowFn,
+      Coder<W> windowCoder,
       KeyedCombineFn<K, VI, VA, VO> combineFn,
       Coder<K> keyCoder,
       Coder<VI> inputValueCoder,
-      DoFn<?, KV<K, VO>>.ProcessContext context) throws Exception {
-    super(key, windowFn, inputValueCoder, context);
+      KeyedState keyedState,
+      WindowingInternals<?, ?> windowingInternals) throws Exception {
+    super(key, windowCoder, inputValueCoder, keyedState, windowingInternals);
     this.combineFn = combineFn;
     liveWindows = new HashSet<W>();
     Iterators.addAll(liveWindows,
-                     emptyIfNull(context.keyedState().lookup(windowListTag)).iterator());
+                     emptyIfNull(keyedState.lookup(windowListTag)).iterator());
     liveWindowsModified = false;
     // TODO: Use the pipeline's registry once the TODO in GroupByKey is resolved.
     CoderRegistry coderRegistry = new CoderRegistry();
@@ -86,12 +106,12 @@ protected Collection<W> windows() {
   protected VO finalValue(W window) throws Exception {
     return combineFn.extractOutput(
         key,
-        context.keyedState().lookup(bufferTag(window, windowFn.windowCoder(), accumulatorCoder)));
+        keyedState.lookup(bufferTag(window, windowCoder, accumulatorCoder)));
   }
 
   @Override
-  protected WindowStatus put(W window, VI value) throws Exception {
-    VA va = context.keyedState().lookup(accumulatorTag(window));
+  protected WindowStatus put(W window, VI value, Instant timestamp) throws Exception {
+    VA va = keyedState.lookup(accumulatorTag(window));
     WindowStatus status = WindowStatus.EXISTING;
     if (va == null) {
       status = WindowStatus.NEW;
@@ -104,7 +124,7 @@ protected WindowStatus put(W window, VI value) throws Exception {
 
   @Override
   protected void remove(W window) throws Exception {
-    context.keyedState().remove(accumulatorTag(window));
+    keyedState.remove(accumulatorTag(window));
     liveWindowsModified = liveWindows.remove(window);
   }
 
@@ -112,7 +132,7 @@ protected void remove(W window) throws Exception {
   protected void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
     List<VA> accumulators = Lists.newArrayList();
     for (W window : toBeMerged) {
-      VA va = context.keyedState().lookup(accumulatorTag(window));
+      VA va = keyedState.lookup(accumulatorTag(window));
       // TODO: determine whether null means no value associated with the tag, b/19201776.
       if (va != null) {
         accumulators.add(va);
@@ -125,12 +145,12 @@ protected void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
 
   private CodedTupleTag<VA> accumulatorTag(W window) throws Exception {
     // TODO: Cache this.
-    return bufferTag(window, windowFn.windowCoder(), accumulatorCoder);
+    return bufferTag(window, windowCoder, accumulatorCoder);
   }
 
   private void store(W window, VA va) throws Exception {
     CodedTupleTag<VA> tag = accumulatorTag(window);
-    context.keyedState().store(tag, va);
+    keyedState.store(tag, va);
     liveWindowsModified = liveWindows.add(window);
   }
 
@@ -150,7 +170,7 @@ private static <T> Iterable<T> emptyIfNull(Iterable<T> list) {
   @Override
   protected void persist() throws Exception {
     if (liveWindowsModified) {
-      context.keyedState().store(windowListTag, liveWindows);
+      keyedState.store(windowListTag, liveWindows);
       liveWindowsModified = false;
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
index 9f59a5cdab5f7..25d76c1c6f884 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
@@ -30,16 +30,16 @@ public class DefaultTrigger<W extends BoundedWindow> implements Trigger<Object,
   @Override
   public void onElement(
       TriggerContext<W> c, Object value, W window, WindowStatus status) throws Exception {
-    c.setTimer(window, window.maxTimestamp());
+    c.setTimer(window, window.maxTimestamp(), TimeDomain.EVENT_TIME);
   }
 
   @Override
   public void onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) throws Exception {
     for (W oldWindow : oldWindows) {
-      c.deleteTimer(oldWindow);
+      c.deleteTimer(oldWindow, TimeDomain.EVENT_TIME);
     }
 
-    c.setTimer(newWindow, newWindow.maxTimestamp());
+    c.setTimer(newWindow, newWindow.maxTimestamp(), TimeDomain.EVENT_TIME);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 1b4af5938c32a..76f418fb10cb8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -23,17 +23,11 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.TriggerExecutor.TimerManager;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
 
 import org.joda.time.Instant;
 
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Objects;
-import java.util.PriorityQueue;
-
 /**
  * DoFn that merges windows and groups elements in those windows, optionally
  * combining values.
@@ -56,60 +50,48 @@ public abstract class GroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
    * @param inputCoder the input coder to use
    */
   public static <K, V, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W>
-  createForIterable(final WindowFn<?, W> windowFn, final Coder<V> inputCoder) {
+  createForIterable(WindowFn<?, W> windowFn, Coder<V> inputCoder) {
     if (windowFn instanceof NonMergingWindowFn) {
       return new GroupAlsoByWindowsViaIteratorsDoFn<K, V, W>();
     } else {
-      return new GABWViaWindowSetDoFn<K, V, Iterable<V>, W>(windowFn) {
-        @Override
-        AbstractWindowSet<K, V, Iterable<V>, W> createWindowSet(K key,
-            DoFn<?, KV<K, Iterable<V>>>.ProcessContext context) throws Exception {
-          return new BufferingWindowSet<K, V, W>(key, windowFn, inputCoder, context);
-        }
-      };
+      return new GABWViaWindowSetDoFn<>(windowFn, BufferingWindowSet.<K, V, W>factory(inputCoder));
     }
   }
 
   /**
    * Construct a {@link GroupAlsoByWindowsDoFn} using the {@code combineFn} if available.
    */
-  public static <K, VI, VO, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, VI, VO, W>
+  public static <K, VI, VA, VO, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, VI, VO, W>
   create(
       final WindowFn<?, W> windowFn,
-      final KeyedCombineFn<K, VI, ?, VO> combineFn,
+      final KeyedCombineFn<K, VI, VA, VO> combineFn,
       final Coder<K> keyCoder,
       final Coder<VI> inputCoder) {
     Preconditions.checkNotNull(combineFn);
-    return new GABWViaWindowSetDoFn<K, VI, VO, W>(windowFn) {
-      @Override
-      AbstractWindowSet<K, VI, VO, W> createWindowSet(
-          K key,
-          DoFn<?, KV<K, VO>>.ProcessContext context) throws Exception {
-        return new CombiningWindowSet<>(key, windowFn, combineFn, keyCoder, inputCoder, context);
-      }
-    };
+    return new GABWViaWindowSetDoFn<>(
+        windowFn, CombiningWindowSet.<K, VI, VA, VO, W>factory(combineFn, keyCoder, inputCoder));
   }
 
-  private abstract static class GABWViaWindowSetDoFn<K, VI, VO, W extends BoundedWindow>
+  private static class GABWViaWindowSetDoFn<K, VI, VO, W extends BoundedWindow>
      extends GroupAlsoByWindowsDoFn<K, VI, VO, W> {
 
     private WindowFn<Object, W> windowFn;
+    private AbstractWindowSet.Factory<K, VI, VO, W> windowSetFactory;
 
-    public GABWViaWindowSetDoFn(WindowFn<?, W> windowFn) {
+    public GABWViaWindowSetDoFn(WindowFn<?, W> windowFn,
+        AbstractWindowSet.Factory<K, VI, VO, W> factory) {
       @SuppressWarnings("unchecked")
       WindowFn<Object, W> noWildcard = (WindowFn<Object, W>) windowFn;
       this.windowFn = noWildcard;
+      this.windowSetFactory = factory;
     }
 
-    abstract AbstractWindowSet<K, VI, VO, W> createWindowSet(
-        K key, DoFn<?, KV<K, VO>>.ProcessContext context)
-        throws Exception;
-
     @Override
     public void processElement(
         DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>.ProcessContext c) throws Exception {
       K key = c.element().getKey();
-      AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(key, c);
+      AbstractWindowSet<K, VI, VO, W> windowSet = windowSetFactory.create(
+          key, windowFn.windowCoder(), c.keyedState(), c.windowingInternals());
 
       BatchTimerManager timerManager = new BatchTimerManager();
       TriggerExecutor<K, VI, VO, W> triggerExecutor = new TriggerExecutor<>(
@@ -119,108 +101,17 @@ public void processElement(
 
       for (WindowedValue<VI> e : c.element().getValue()) {
         // First, handle anything that needs to happen for this element
-        triggerExecutor.onElement(e.getValue(), e.getWindows());
+        triggerExecutor.onElement(e);
 
         // Then, since elements are sorted by their timestamp, advance the watermark and fire any
         // timers that need to be fired.
-        advanceWatermark(timerManager, triggerExecutor, e.getTimestamp());
+        timerManager.advanceWatermark(triggerExecutor, e.getTimestamp());
       }
 
       // Finish any pending windows by advance the watermark to infinity.
-      advanceWatermark(timerManager, triggerExecutor, new Instant(Long.MAX_VALUE));
+      timerManager.advanceWatermark(triggerExecutor, new Instant(Long.MAX_VALUE));
 
       windowSet.persist();
     }
-
-    private void advanceWatermark(BatchTimerManager timerManager,
-        TriggerExecutor<?, ?, ?, ?> triggerExecutor, Instant instant) throws Exception {
-      while (timerManager.readyToFire(instant)) {
-        BatchTimer tagToFire = timerManager.tagToFire();
-        triggerExecutor.onTimer(tagToFire.tag);
-      }
-    }
-  }
-
-  private static class BatchTimer implements Comparable<BatchTimer> {
-
-    private final String tag;
-    private final Instant time;
-
-    public BatchTimer(String tag, Instant time) {
-      this.tag = tag;
-      this.time = time;
-    }
-
-    @Override
-    public String toString() {
-      return time + ": " + tag;
-    }
-
-    @Override
-    public int compareTo(BatchTimer o) {
-      return time.compareTo(o.time);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(time, tag);
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      if (other instanceof BatchTimer) {
-        BatchTimer that = (BatchTimer) other;
-        return Objects.equals(this.time, that.time)
-            && Objects.equals(this.tag, that.tag);
-      }
-      return false;
-    }
-  }
-
-  private static class BatchTimerManager implements TimerManager {
-
-    // Sort the windows by their end timestamps so that we can efficiently
-    // ask for the next window that will be completed.
-    private PriorityQueue<BatchTimer> timers = new PriorityQueue<>(11);
-    private Map<String, BatchTimer> tagToTimer = new HashMap<>();
-
-    @Override
-    public void setTimer(String tag, Instant timestamp) {
-      BatchTimer newTimer = new BatchTimer(tag, timestamp);
-
-      BatchTimer oldTimer = tagToTimer.put(tag, newTimer);
-      if (oldTimer != null) {
-        timers.remove(oldTimer);
-      }
-      timers.add(newTimer);
-    }
-
-    @Override
-    public void deleteTimer(String tag) {
-      timers.remove(tagToTimer.get(tag));
-      tagToTimer.remove(tag);
-    }
-
-    /**
-     * Determine if there if the next timer is ready to fire at the given timestamp.
-     */
-    public boolean readyToFire(Instant timestamp) {
-      BatchTimer firstTimer = timers.peek();
-      return firstTimer != null && timestamp.isAfter(firstTimer.time);
-    }
-
-    /**
-     * Get the tag to fire.
-     */
-    public BatchTimer tagToFire() {
-      BatchTimer timer = timers.remove();
-      tagToTimer.remove(timer.tag);
-      return timer;
-    }
-
-    @Override
-    public String toString() {
-      return timers.toString();
-    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
index 853754790028c..9ee5aaf98a3fd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
@@ -19,12 +19,13 @@
 import static com.google.cloud.dataflow.sdk.util.WindowUtils.bufferTag;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.WindowingInternals;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.cloud.dataflow.sdk.values.KV;
+
+import org.joda.time.Instant;
 
 import java.util.Collection;
 
@@ -38,27 +39,45 @@
  */
 class PartitionBufferingWindowSet<K, V, W extends BoundedWindow>
     extends AbstractWindowSet<K, V, Iterable<V>, W> {
-  PartitionBufferingWindowSet(
+
+  public static <K, V, W extends BoundedWindow>
+  AbstractWindowSet.Factory<K, V, Iterable<V>, W> factory(final Coder<V> inputCoder) {
+    return new AbstractWindowSet.Factory<K, V, Iterable<V>, W>() {
+
+      private static final long serialVersionUID = 0L;
+
+      @Override
+      public AbstractWindowSet<K, V, Iterable<V>, W> create(K key,
+          Coder<W> windowFn, KeyedState keyedState,
+          WindowingInternals<?, ?> windowingInternals) throws Exception {
+        return new PartitionBufferingWindowSet<>(
+            key, windowFn, inputCoder, keyedState, windowingInternals);
+      }
+    };
+  }
+
+  private PartitionBufferingWindowSet(
       K key,
-      WindowFn<?, W> windowFn,
+      Coder<W> windowCoder,
       Coder<V> inputCoder,
-      DoFn<?, KV<K, Iterable<V>>>.ProcessContext context) {
-    super(key, windowFn, inputCoder, context);
+      KeyedState keyedState,
+      WindowingInternals<?, ?> windowingInternals) {
+    super(key, windowCoder, inputCoder, keyedState, windowingInternals);
   }
 
   @Override
-  public WindowStatus put(W window, V value) throws Exception {
-    context.windowingInternals().writeToTagList(
-        bufferTag(window, windowFn.windowCoder(), inputCoder), value, context.timestamp());
+  public WindowStatus put(W window, V value, Instant timestamp) throws Exception {
+    windowingInternals.writeToTagList(
+        bufferTag(window, windowCoder, inputCoder), value, timestamp);
+
     // Adds the window even if it is already present, relying on the streaming backend to
-    // de-duplicate.
+    // de-duplicate. As such, we don't know if this was a genuinely new window.
     return WindowStatus.UNKNOWN;
   }
 
   @Override
   public void remove(W window) throws Exception {
-    context.windowingInternals().deleteTagList(
-        bufferTag(window, windowFn.windowCoder(), inputCoder));
+    windowingInternals.deleteTagList(bufferTag(window, windowCoder, inputCoder));
   }
 
   @Override
@@ -78,8 +97,8 @@ public boolean contains(W window) {
 
   @Override
   protected Iterable<V> finalValue(W window) throws Exception {
-    CodedTupleTag<V> tag = bufferTag(window, windowFn.windowCoder(), inputCoder);
-    Iterable<V> result = context.windowingInternals().readTagList(tag);
+    CodedTupleTag<V> tag = bufferTag(window, windowCoder, inputCoder);
+    Iterable<V> result = windowingInternals.readTagList(tag);
     if (result == null) {
       throw new IllegalStateException("finalValue called for non-existent window");
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 41031288aa45a..9a90cbe1ba33f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PartitioningWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.AbstractWindowSet.Factory;
 import com.google.cloud.dataflow.sdk.util.TriggerExecutor.TimerManager;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
@@ -40,65 +41,50 @@
 public abstract class StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
     extends DoFn<TimerOrElement<KV<K, VI>>, KV<K, VO>> implements DoFn.RequiresKeyedState {
 
-  public static <K, VI, VO, W extends BoundedWindow>
+  public static <K, VI, VA, VO, W extends BoundedWindow>
       StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W> create(
           final WindowFn<?, W> windowFn,
-          final KeyedCombineFn<K, VI, ?, VO> combineFn,
+          final KeyedCombineFn<K, VI, VA, VO> combineFn,
           final Coder<K> keyCoder,
           final Coder<VI> inputValueCoder) {
     Preconditions.checkNotNull(combineFn);
-    return new StreamingGABWViaWindowSetDoFn<K, VI, VO, W>(windowFn) {
-      @Override
-      AbstractWindowSet<K, VI, VO, W> createWindowSet(K key,
-          DoFn<TimerOrElement<KV<K, VI>>, KV<K, VO>>.ProcessContext context)
-          throws Exception {
-        return new CombiningWindowSet<>(
-            key, windowFn, combineFn, keyCoder, inputValueCoder, context);
-      }
-    };
+    return new StreamingGABWViaWindowSetDoFn<>(windowFn,
+        CombiningWindowSet.<K, VI, VA, VO, W>factory(combineFn, keyCoder, inputValueCoder));
   }
 
   public static <K, VI, W extends BoundedWindow>
   StreamingGroupAlsoByWindowsDoFn<K, VI, Iterable<VI>, W>
   createForIterable(final WindowFn<?, W> windowFn, final Coder<VI> inputValueCoder) {
-    return new StreamingGABWViaWindowSetDoFn<K, VI, Iterable<VI>, W>(windowFn) {
-      @Override
-      AbstractWindowSet<K, VI, Iterable<VI>, W> createWindowSet(K key,
-          DoFn<TimerOrElement<KV<K, VI>>, KV<K, Iterable<VI>>>.ProcessContext context)
-          throws Exception {
-        if (windowFn instanceof PartitioningWindowFn) {
-          return new PartitionBufferingWindowSet<K, VI, W>(
-            key, windowFn, inputValueCoder, context);
-        } else {
-          return new BufferingWindowSet<K, VI, W>(
-              key, windowFn, inputValueCoder, context);
-        }
-      }
-    };
+    if (windowFn instanceof PartitioningWindowFn) {
+      return new StreamingGABWViaWindowSetDoFn<>(windowFn,
+          PartitionBufferingWindowSet.<K, VI, W>factory(inputValueCoder));
+    } else {
+      return new StreamingGABWViaWindowSetDoFn<>(windowFn,
+          BufferingWindowSet.<K, VI, W>factory(inputValueCoder));
+    }
   }
 
-  private abstract static class StreamingGABWViaWindowSetDoFn<K, VI, VO, W extends BoundedWindow>
+  private static class StreamingGABWViaWindowSetDoFn<K, VI, VO, W extends BoundedWindow>
   extends StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W> {
     private final WindowFn<Object, W> windowFn;
+    private Factory<K, VI, VO, W> windowSetFactory;
 
-    public StreamingGABWViaWindowSetDoFn(WindowFn<?, W> windowFn) {
+    public StreamingGABWViaWindowSetDoFn(WindowFn<?, W> windowFn,
+        AbstractWindowSet.Factory<K, VI, VO, W> windowSetFactory) {
+      this.windowSetFactory = windowSetFactory;
       @SuppressWarnings("unchecked")
       WindowFn<Object, W> noWildcard = (WindowFn<Object, W>) windowFn;
       this.windowFn = noWildcard;
     }
 
-    abstract AbstractWindowSet<K, VI, VO, W> createWindowSet(
-        K key,
-        DoFn<TimerOrElement<KV<K, VI>>, KV<K, VO>>.ProcessContext context)
-        throws Exception;
-
     @Override
     public void processElement(ProcessContext context) throws Exception {
       if (!context.element().isTimer()) {
         KV<K, VI> element = context.element().element();
         K key = element.getKey();
         VI value = element.getValue();
-        AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(key, context);
+        AbstractWindowSet<K, VI, VO, W> windowSet = windowSetFactory.create(
+                key, windowFn.windowCoder(), context.keyedState(), context.windowingInternals());
         TriggerExecutor<K, VI, VO, W> executor = new TriggerExecutor<>(
             windowFn,
             new StreamingTimerManager(context),
@@ -106,13 +92,15 @@ public void processElement(ProcessContext context) throws Exception {
             context.windowingInternals(),
             windowSet);
 
-        executor.onElement(value, context.windowingInternals().windows());
+        executor.onElement(WindowedValue.of(
+            value, context.timestamp(), context.windowingInternals().windows()));
         windowSet.persist();
       } else {
         TimerOrElement<KV<K, VI>> timer = context.element();
         @SuppressWarnings("unchecked")
         K key = (K) timer.key();
-        AbstractWindowSet<K, VI, VO, W> windowSet = createWindowSet(key, context);
+        AbstractWindowSet<K, VI, VO, W> windowSet = windowSetFactory.create(
+            key, windowFn.windowCoder(), context.keyedState(), context.windowingInternals());
         TriggerExecutor<K, VI, VO, W> executor = new TriggerExecutor<>(
             windowFn,
             new StreamingTimerManager(context),
@@ -135,12 +123,18 @@ public StreamingTimerManager(DoFn<?, ?>.ProcessContext context) {
     }
 
     @Override
-    public void setTimer(String timer, Instant timestamp) {
+    public void setTimer(String timer, Instant timestamp, Trigger.TimeDomain domain) {
+      // TODO: Hook up support for processing-time timers.
+      Preconditions.checkArgument(Trigger.TimeDomain.EVENT_TIME.equals(domain),
+          "Streaming currently only supports Watermark based timers.");
       context.windowingInternals().setTimer(timer, timestamp);
     }
 
     @Override
-    public void deleteTimer(String timer) {
+    public void deleteTimer(String timer, Trigger.TimeDomain domain) {
+      // TODO: Hook up support for processing-time timers.
+      Preconditions.checkArgument(Trigger.TimeDomain.EVENT_TIME.equals(domain),
+          "Streaming currently only supports Watermark based timers.");
       context.windowingInternals().deleteTimer(timer);
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
index 835c4d628fce5..02ac64873a2bd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
@@ -32,6 +32,23 @@
  */
 public interface Trigger<T, W extends BoundedWindow> {
 
+  /**
+   * Types of timers that are supported.
+   */
+  public enum TimeDomain {
+    /**
+     * Timers that fire based on the timestamp of events. Once set, the timer will fire when the
+     * system watermark passes the specified time.
+     */
+    EVENT_TIME,
+
+    /**
+     * Timers that fire based on the current processing time. Once set, the timer will fire at some
+     * point when the system time is after the specified time.
+     */
+    PROCESSING_TIME;
+  }
+
   /**
    * Status of the element in the window.
    */
@@ -54,12 +71,12 @@ public interface TriggerContext<W extends BoundedWindow>  {
      * TODO: Support processing time
      * TODO: Support per-trigger timers.
      */
-    public void setTimer(W window, Instant timestamp) throws IOException;
+    public void setTimer(W window, Instant timestamp, TimeDomain timeDomain) throws IOException;
 
     /**
      * Delete a timer that has been set for the specified window.
      */
-    public void deleteTimer(W window) throws IOException;
+    public void deleteTimer(W window, TimeDomain timeDomain) throws IOException;
 
     /**
      * Emit the given window.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 7a61102ad33eb..2f907c964c246 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PartitioningWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -51,17 +52,18 @@ public class TriggerExecutor<K, VI, VO, W extends BoundedWindow> implements Trig
    * Methods that the system must provide in order for us to implement triggers.
    */
   public interface TimerManager {
+
     /**
      * Writes out a timer to be fired when the watermark reaches the given
      * timestamp.  Timers are identified by their name, and can be moved
      * by calling {@code setTimer} again, or deleted with {@link #deleteTimer}.
      */
-    void setTimer(String timer, Instant timestamp);
+    void setTimer(String timer, Instant timestamp, Trigger.TimeDomain domain);
 
     /**
      * Deletes the given timer.
      */
-    void deleteTimer(String timer);
+    void deleteTimer(String timer, Trigger.TimeDomain domain);
   }
 
   public TriggerExecutor(
@@ -78,14 +80,14 @@ public TriggerExecutor(
     this.mergeContext = new MergeContext();
   }
 
-  public void onElement(
-      VI value, Iterable<? extends BoundedWindow> windows) throws Exception {
-    for (BoundedWindow window : windows) {
+  public void onElement(WindowedValue<VI> value) throws Exception {
+    for (BoundedWindow window : value.getWindows()) {
       @SuppressWarnings("unchecked")
       W w = (W) window;
-      WindowStatus status = windowSet.put(w, value);
 
-      trigger.onElement(this, value, w, status);
+      WindowStatus status = windowSet.put(w, value.getValue(), value.getTimestamp());
+
+      trigger.onElement(this, value.getValue(), w, status);
     }
   }
 
@@ -137,12 +139,17 @@ public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
   }
 
   @Override
-  public void setTimer(W window, Instant timestamp) throws IOException {
-    timerManager.setTimer(WindowUtils.windowToString(window, windowFn.windowCoder()), timestamp);
+  public void setTimer(W window, Instant timestamp, TimeDomain domain) throws IOException {
+    timerManager.setTimer(
+        WindowUtils.windowToString(window, windowFn.windowCoder()),
+        timestamp,
+        domain);
   }
 
   @Override
-  public void deleteTimer(W window) throws IOException {
-    timerManager.deleteTimer(WindowUtils.windowToString(window, windowFn.windowCoder()));
+  public void deleteTimer(W window, TimeDomain domain) throws IOException {
+    timerManager.deleteTimer(
+        WindowUtils.windowToString(window, windowFn.windowCoder()),
+        domain);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java
new file mode 100644
index 0000000000000..eb28cb0d40ab9
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests the {@link DefaultTrigger} in a variety of windowing modes.
+ */
+@RunWith(JUnit4.class)
+public class DefaultTriggerTest {
+
+  @Test
+  public void testDefaultTriggerWithFixedWindow() throws Exception {
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.of(
+        FixedWindows.of(Duration.millis(10)),
+        new DefaultTrigger<IntervalWindow>(),
+        BufferingWindowSet.<String, Integer, IntervalWindow>factory(VarIntCoder.of()));
+
+    tester.injectElement(1, new Instant(1));
+    tester.injectElement(2, new Instant(9));
+    tester.injectElement(3, new Instant(15));
+    tester.injectElement(4, new Instant(19));
+    tester.injectElement(5, new Instant(30));
+
+    // We're processing the data after it arrived.
+    tester.advanceProcessingTime(new Instant(500));
+    tester.assertNoMoreOutput();
+
+    // Advance the watermark almost to the end of the first window.
+    tester.advanceWatermark(new Instant(9));
+    tester.assertNoMoreOutput();
+
+    // Advance the watermark past the first window
+    tester.advanceWatermark(new Instant(12));
+    tester.assertNextOutput(new Instant(9), Matchers.containsInAnyOrder(1, 2));
+    tester.assertNoMoreOutput();
+
+    // Advance the watermark to the end
+    tester.advanceWatermark(new Instant(100));
+    tester.assertNextOutput(new Instant(19), Matchers.containsInAnyOrder(3, 4));
+    tester.assertNextOutput(new Instant(39), Matchers.contains(5));
+  }
+
+  @Test
+  public void testDefaultTriggerWithSessionWindow() throws Exception {
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.of(
+        Sessions.withGapDuration(Duration.millis(10)),
+        new DefaultTrigger<IntervalWindow>(),
+        BufferingWindowSet.<String, Integer, IntervalWindow>factory(VarIntCoder.of()));
+
+    tester.injectElement(1, new Instant(1));
+    tester.injectElement(2, new Instant(9));
+
+    tester.advanceWatermark(new Instant(10));
+    tester.assertNoMoreOutput(); // no output, because we merge 1 into the [9-19) session
+
+    tester.injectElement(3, new Instant(15));
+    tester.injectElement(4, new Instant(30));
+
+    // Advance the watermark to the end
+    tester.advanceWatermark(new Instant(100));
+    tester.assertNextOutput(new Instant(24), Matchers.containsInAnyOrder(1, 2, 3));
+    tester.assertNextOutput(new Instant(39), Matchers.contains(4));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
new file mode 100644
index 0000000000000..eea21eb7c73d6
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -0,0 +1,306 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.Trigger.TimeDomain;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+
+import org.hamcrest.Matcher;
+import org.hamcrest.Matchers;
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.logging.Logger;
+
+import javax.annotation.Nullable;
+
+/**
+ * Test utility that runs a {@link WindowFn}, {@link Trigger} and {@link AbstractWindowSet} using
+ * stub implementations of everything under the hood.
+ *
+ * <p>To have all interactions between the trigger and underlying components logged, call
+ * {@link #logInteractions(boolean)}.
+ *
+ * @param <VI> The element types.
+ * @param <VO> The final type for elements in the window (for instance, {@code Iterable<VI>})
+ * @param <W> The type of windows being used.
+ */
+public class TriggerTester<VI, VO, W extends BoundedWindow> {
+
+  private static final Logger LOGGER = Logger.getLogger(TriggerTester.class.getName());
+
+  private BatchTimerManager timerManager = new LoggingBatchTimerManager();
+
+  private Instant watermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
+  private Instant processingTime = BoundedWindow.TIMESTAMP_MIN_VALUE;
+  private TriggerExecutor<String, VI, VO, W> triggerExecutor;
+
+  private WindowFn<Object, W> windowFn;
+  private StubContexts stubContexts;
+
+  private static final String KEY = "TEST_KEY";
+
+  private boolean logInteractions = false;
+
+  private void logInteraction(String fmt, Object... args) {
+    if (logInteractions) {
+      LOGGER.warning("Trigger Interaction: " + String.format(fmt, args));
+    }
+  }
+
+  public static <VI, VO, W extends BoundedWindow> TriggerTester<VI, VO, W> of(
+      WindowFn<?, W> windowFn,
+      Trigger<?, W> trigger,
+      AbstractWindowSet.Factory<String, VI, VO, W> windowSetFactory) throws Exception {
+    @SuppressWarnings("unchecked")
+    WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
+    @SuppressWarnings("unchecked")
+    Trigger<Object, W> objectTrigger = (Trigger<Object, W>) trigger;
+
+    return new TriggerTester<VI, VO, W>(objectWindowFn, objectTrigger, windowSetFactory);
+  }
+
+  private TriggerTester(
+      WindowFn<Object, W> windowFn,
+      Trigger<Object, W> trigger,
+      AbstractWindowSet.Factory<String, VI, VO, W> windowSetFactory) throws Exception {
+    StubContexts stubContexts = new StubContexts();
+    AbstractWindowSet<String, VI, VO, W> windowSet = windowSetFactory.create(
+        KEY, windowFn.windowCoder(), stubContexts, stubContexts);
+
+    this.windowFn = windowFn;
+    this.stubContexts = stubContexts;
+    this.triggerExecutor = new TriggerExecutor<>(
+        windowFn, timerManager, trigger, stubContexts, windowSet);
+  }
+
+  public void logInteractions(boolean logInteractions) {
+    this.logInteractions = logInteractions;
+  }
+
+  public void advanceWatermark(Instant newWatermark) throws Exception {
+    Preconditions.checkState(!newWatermark.isBefore(watermark));
+    logInteraction("Advancing watermark to %d", newWatermark.getMillis());
+    watermark = newWatermark;
+    timerManager.advanceWatermark(triggerExecutor, newWatermark);
+  }
+
+  public void advanceProcessingTime(Instant newProcessingTime) throws Exception {
+    Preconditions.checkState(!newProcessingTime.isBefore(processingTime));
+    logInteraction("Advancing processing time to %d", newProcessingTime.getMillis());
+    processingTime = newProcessingTime;
+    timerManager.advanceProcessingTime(triggerExecutor, newProcessingTime);
+  }
+
+  public void assertNoMoreOutput() {
+    assertThat(stubContexts.outputs, Matchers.empty());
+  }
+
+  public void assertNextOutput(Instant outputTimestamp, Matcher<? super VO> element) {
+    assertNextOutput(outputTimestamp, element, null);
+  }
+
+  public void assertNextOutput(Instant outputTimestamp, Matcher<? super VO> element,
+      BoundedWindow window) {
+    assertThat(stubContexts.outputs.size(), Matchers.greaterThan(0));
+    WindowedValue<KV<String, VO>> first = stubContexts.outputs.remove(0);
+    assertEquals(outputTimestamp, first.getTimestamp());
+    assertEquals(first.getValue().getKey(), KEY);
+    assertThat(first.getValue().getValue(), element);
+
+    if (window != null) {
+      assertThat(first.getWindows(), Matchers.contains(window));
+    }
+  }
+
+  public void injectElement(VI value, Instant timestamp) throws Exception {
+    Collection<W> windows = windowFn.assignWindows(new TriggerTester.StubAssignContext<W>(
+        windowFn, value, timestamp, Arrays.asList(GlobalWindow.INSTANCE)));
+    logInteraction("Element %s at time %d put in windows %s",
+        value, timestamp.getMillis(), windows);
+    triggerExecutor.onElement(WindowedValue.of(value, timestamp, windows));
+  }
+
+  private class StubContexts
+      implements DoFn.WindowingInternals<VI, KV<String, VO>>, DoFn.KeyedState {
+
+    private Map<CodedTupleTag<?>, List<?>> tagListValues = new HashMap<>();
+    private Map<CodedTupleTag<?>, Object> tagValues = new HashMap<>();
+    private List<WindowedValue<KV<String, VO>>> outputs = new ArrayList<>();
+
+    @Override
+    public void outputWindowedValue(KV<String, VO> output, Instant timestamp,
+        Collection<? extends BoundedWindow> windows) {
+      WindowedValue<KV<String, VO>> value = WindowedValue.of(output, timestamp, windows);
+      logInteraction("Outputting: %s", value);
+      outputs.add(value);
+    }
+
+    @Override
+    public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp)
+        throws IOException {
+      @SuppressWarnings("unchecked")
+      List<T> values = (List<T>) tagListValues.get(tag);
+      if (values == null) {
+        values = new ArrayList<>();
+        tagListValues.put(tag, values);
+      }
+      values.add(value);
+    }
+
+    @Override
+    public <T> void deleteTagList(CodedTupleTag<T> tag) {
+      tagListValues.remove(tag);
+    }
+
+    @Override
+    public <T> Iterable<T> readTagList(CodedTupleTag<T> tag) throws IOException {
+      @SuppressWarnings("unchecked")
+      List<T> values = (List<T>) tagListValues.get(tag);
+      return values == null ? Collections.<T>emptyList() : values;
+    }
+
+    @Override
+    public void setTimer(String timer, Instant timestamp) {
+      throw new UnsupportedOperationException(
+          "Testing triggers should not use timers from WindowingInternals.");
+    }
+
+    @Override
+    public void deleteTimer(String timer) {
+      throw new UnsupportedOperationException(
+          "Testing triggers should not use timers from WindowingInternals.");
+    }
+
+    @Override
+    public Collection<? extends BoundedWindow> windows() {
+      throw new UnsupportedOperationException(
+          "Testing triggers should not use windows from WindowingInternals.");
+    }
+
+    @Override
+    public <T> void store(CodedTupleTag<T> tag, T value) throws IOException {
+      tagValues.put(tag, value);
+    }
+
+    @Override
+    public <T> void remove(CodedTupleTag<T> tag) {
+      tagValues.remove(tag);
+    }
+
+    @Override
+    public <T> T lookup(CodedTupleTag<T> tag) throws IOException {
+      @SuppressWarnings("unchecked")
+      T value = (T) tagValues.get(tag);
+      return value;
+    }
+
+    @Override
+    public CodedTupleTagMap lookup(List<? extends CodedTupleTag<?>> tags) throws IOException {
+      Set<CodedTupleTag<?>> tagSet = new LinkedHashSet<>(tags);
+      return CodedTupleTagMap.of(Maps.asMap(tagSet, new Function<CodedTupleTag<?>, Object>() {
+        @Override
+        @Nullable
+        public Object apply(@Nullable CodedTupleTag<?> tag) {
+          return tagValues.get(tag);
+        }
+      }));
+    }
+
+    @Override
+    public <T> void writePCollectionViewData(TupleTag<?> tag, Iterable<WindowedValue<T>> data,
+        Coder<T> elemCoder) throws IOException {
+      throw new UnsupportedOperationException(
+          "Testing triggers should not use writePCollectionViewData from WindowingInternals.");
+    }
+  }
+
+  private class LoggingBatchTimerManager extends BatchTimerManager {
+    @Override
+    public void setTimer(String tag, Instant timestamp, TimeDomain domain) {
+      logInteraction("Setting timer '%s' for time %d in domain %s",
+          tag, timestamp.getMillis(), domain);
+      super.setTimer(tag, timestamp, domain);
+    }
+
+    @Override
+    public void deleteTimer(String tag, Trigger.TimeDomain domain) {
+      logInteraction("Delete timer '%s' in domain %s", tag, domain);
+      super.deleteTimer(tag, domain);
+    }
+
+    @Override
+    protected void fire(TriggerExecutor<?, ?, ?, ?> triggerExecutor,
+        String tag, TimeDomain domain) throws Exception {
+      logInteraction("Firing timer '%s' in domain %s", tag, domain);
+      super.fire(triggerExecutor, tag, domain);
+    }
+  }
+
+  private static class StubAssignContext<W extends BoundedWindow>
+      extends WindowFn<Object, W>.AssignContext {
+    private Object element;
+    private Instant timestamp;
+    private Collection<? extends BoundedWindow> windows;
+
+    public StubAssignContext(WindowFn<Object, W> windowFn,
+        Object element, Instant timestamp, Collection<? extends BoundedWindow> windows) {
+      windowFn.super();
+      this.element = element;
+      this.timestamp = timestamp;
+      this.windows = windows;
+    }
+
+    @Override
+    public Object element() {
+      return element;
+    }
+
+    @Override
+    public Instant timestamp() {
+      return timestamp;
+    }
+
+    @Override
+    public Collection<? extends BoundedWindow> windows() {
+      return windows;
+    }
+  }
+}

From 85e4f8813a9069949d966d44705d803030396c61 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 30 Mar 2015 21:57:06 -0700
Subject: [PATCH 0335/1541] Make a copy of the DoFn passed to ParDo.
 ----Release Notes---- A DoFn is now copied when it is used to construct a
 ParDo transform. Further mutations of the state of the DoFn will not be
 present during execution of the ParDo. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=89934817

---
 .../com/google/cloud/dataflow/sdk/transforms/ParDo.java    | 5 +++--
 .../google/cloud/dataflow/sdk/util/SerializableUtils.java  | 7 +++++++
 .../google/cloud/dataflow/sdk/transforms/CombineTest.java  | 4 +++-
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 9352a073059c4..24b22dd064f73 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
 import com.google.cloud.dataflow.sdk.util.PTuple;
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -630,7 +631,7 @@ public static class Bound<I, O>
           DoFn<I, O> fn) {
       super(name);
       this.sideInputs = sideInputs;
-      this.fn = fn;
+      this.fn = SerializableUtils.clone(fn);
     }
 
     /**
@@ -830,7 +831,7 @@ public static class BoundMulti<I, O>
       this.sideInputs = sideInputs;
       this.mainOutputTag = mainOutputTag;
       this.sideOutputTags = sideOutputTags;
-      this.fn = fn;
+      this.fn = SerializableUtils.clone(fn);
     }
 
     /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java
index cdd528dc32a1f..a7a73c8d289eb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java
@@ -83,6 +83,13 @@ public static <T extends Serializable> T ensureSerializable(T value) {
     return copy;
   }
 
+  public static <T extends Serializable> T clone(T value) {
+    @SuppressWarnings("unchecked")
+    T copy = (T) deserializeFromByteArray(serializeToByteArray(value),
+        value.toString());
+    return copy;
+  }
+
   /**
    * Serializes a Coder and verifies that it can be correctly deserialized.
    * <p>
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 13b875dff1174..81d14e5c9806f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -74,7 +74,9 @@
  */
 @RunWith(JUnit4.class)
 @SuppressWarnings("serial")
-public class CombineTest {
+public class CombineTest implements Serializable {
+  // This test is Serializable, just so that it's easy to have
+  // anonymous inner classes inside the non-static test methods.
 
   @SuppressWarnings({"rawtypes", "unchecked"})
   static final KV<String, Integer>[] TABLE = new KV[] {

From e1d0819ead03c98e10284537d1c69afced143d12 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Tue, 31 Mar 2015 08:07:47 -0700
Subject: [PATCH 0336/1541] Reduce memory and CPU requirements for deducing the
 encoded size of join results, and StandardCoders in general.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89966780
---
 .../google/cloud/dataflow/sdk/coders/StandardCoder.java    | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
index 2a66429e96425..963d6aa34d379 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
@@ -21,8 +21,9 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.common.io.ByteStreams;
+import com.google.common.io.CountingOutputStream;
 
-import java.io.ByteArrayOutputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -122,9 +123,9 @@ public boolean isRegisterByteSizeObserverCheap(T value, Context context) {
   protected long getEncodedElementByteSize(T value, Context context)
       throws Exception {
     try {
-      ByteArrayOutputStream os = new ByteArrayOutputStream();
+      CountingOutputStream os = new CountingOutputStream(ByteStreams.nullOutputStream());
       encode(value, os, context);
-      return os.size();
+      return os.getCount();
     } catch (Exception exn) {
       throw new IllegalArgumentException(
           "Unable to encode element '" + value + "' with coder '" + this + "'.", exn);

From e8a9d806c81afdd31655369ef3ecf976fe560006 Mon Sep 17 00:00:00 2001
From: boyuan <boyuan@google.com>
Date: Tue, 31 Mar 2015 13:39:50 -0700
Subject: [PATCH 0337/1541] Switch ENVIRONMENT_MAJOR_VERSION back to "2" since
 the dataflow service supports it now. After switching back to version "2",
 the optimization of lifting combiner into the PGBK operation will be enabled.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=89997612
---
 .../cloud/dataflow/sdk/runners/DataflowPipelineRunner.java      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 4283614c5e5bc..67d5df0973d0f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -80,7 +80,7 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
   private DataflowPipelineRunnerHooks hooks;
 
   // Environment version information
-  private static final String ENVIRONMENT_MAJOR_VERSION = "1";
+  private static final String ENVIRONMENT_MAJOR_VERSION = "2";
 
   /**
    * Construct a runner from the provided options.

From 10622ee22dcd3fe085559fc95566e4b8f68287b4 Mon Sep 17 00:00:00 2001
From: andersjohnson <andersjohnson@google.com>
Date: Tue, 31 Mar 2015 15:35:16 -0700
Subject: [PATCH 0338/1541] test_wordcount.sh: Quote the input patterns so that
 they get expanded by Dataflow rather than by the shell on the way into
 run_via_mvn and run_bundled. That way, we cover Dataflow globbing issues.
 ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=90009473

---
 test_wordcount.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test_wordcount.sh b/test_wordcount.sh
index f852d2f6f77b5..cc79a50579c34 100755
--- a/test_wordcount.sh
+++ b/test_wordcount.sh
@@ -77,7 +77,7 @@ function run_bundled {
   local cmd='java -cp '"$JAR_FILE"' \
     com.google.cloud.dataflow.examples.WordCount \
     --runner=DirectPipelineRunner \
-    --input='"$input"' \
+    --input='"'$input'"' \
     --output='"$outfile_prefix"
   echo "$name: Running $cmd" >&2
   sh -c "$cmd"
@@ -89,9 +89,9 @@ function run_all_ways {
   local input=$2
   local expected_hash=$3
 
-  run_via_mvn ${name}a $input $expected_hash
+  run_via_mvn ${name}a "$input" $expected_hash
   check_for_jar_file
-  run_bundled ${name}b $input $expected_hash
+  run_bundled ${name}b "$input" $expected_hash
 }
 
 function check_for_jar_file {

From 6c6748c631dd912988913d041e4cb748b049a03e Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Tue, 31 Mar 2015 15:40:40 -0700
Subject: [PATCH 0339/1541] Fixes the flakiness of CounterTest because of
 reliance on order of iteration in a HashSet. ----Release Notes---- []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=90009907

---
 .../dataflow/sdk/util/common/CounterTest.java      | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
index 4e50b2a3f1cf2..4c46de317e5d3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
@@ -41,7 +41,6 @@
 
 import java.util.Arrays;
 import java.util.HashSet;
-import java.util.List;
 import java.util.Set;
 
 /**
@@ -729,16 +728,13 @@ public void testExtraction() {
       set.addCounter(c);
     }
 
-    List<MetricUpdate> cloudCountersFromSet = CloudCounterUtils.extractCounters(set, true);
+    Set<MetricUpdate> cloudCountersFromSet = new HashSet<>(
+        CloudCounterUtils.extractCounters(set, true));
 
-    List<MetricUpdate> cloudCountersFromArray =
-        CounterTestUtils.extractCounterUpdates(Arrays.asList(counters), true);
-
-    assertEquals(cloudCountersFromArray.size(), cloudCountersFromSet.size());
-    for (int i = 0; i < cloudCountersFromArray.size(); i++) {
-      assertEquals(cloudCountersFromArray.get(i), cloudCountersFromSet.get(i));
-    }
+    Set<MetricUpdate> cloudCountersFromArray =
+        new HashSet<>(CounterTestUtils.extractCounterUpdates(Arrays.asList(counters), true));
 
+    assertEquals(cloudCountersFromSet, cloudCountersFromArray);
     assertEquals(2, cloudCountersFromSet.size()); // empty set was ignored
   }
 }

From 291eb7596adb832379dc8aaaf24b1cfe9409571e Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Tue, 31 Mar 2015 16:48:55 -0700
Subject: [PATCH 0340/1541] * Updates SDK to use renamed protos after the
 shard=>bundle and fork=>split rename   has been released. * Cuts down overly
 chatty logging that made one of the tests produce tons of output. ----Release
 Notes---- [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=90016392

---
 sdk/pom.xml                                   |  2 +-
 .../sdk/io/ByteOffsetBasedSource.java         |  4 ++--
 .../BasicSerializableSourceFormat.java        | 20 +++++++++----------
 .../sdk/runners/worker/DataflowWorker.java    |  2 +-
 .../sdk/io/ByteOffsetBasedSourceTest.java     |  2 +-
 .../BasicSerializableSourceFormatTest.java    |  6 +++---
 6 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 0928d1bea7d36..958593c603d31 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -213,7 +213,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-dataflow</artifactId>
-      <version>v1beta3-rev8-1.19.1</version>
+      <version>v1beta3-rev9-1.19.1</version>
       <exclusions>
         <!-- Exclude an old version of guava which is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
index edb522cb0b7f0..a02c7bda895e4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
@@ -216,13 +216,13 @@ public ByteOffsetBasedSource<T> splitAtFraction(double fraction) {
       long splitOffset = (long) (start + fraction * (end - start));
       long current = getCurrentOffset();
       if (splitOffset <= current) {
-        LOG.info(
+        LOG.debug(
             "Refusing to split at fraction {} (offset {}) because current offset is {} of [{}, {})",
             fraction, splitOffset, current, start, end);
         return null;
       }
       if (splitOffset <= start || splitOffset >= end) {
-        LOG.info(
+        LOG.debug(
             "Refusing to split at fraction {} (offset {}) outside current range [{}, {})",
             fraction, splitOffset, start, end);
         return null;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index b435a83bf5f98..06805a53cb169 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -27,7 +27,8 @@
 
 import com.google.api.client.util.Base64;
 import com.google.api.services.dataflow.model.ApproximateProgress;
-import com.google.api.services.dataflow.model.SourceFork;
+import com.google.api.services.dataflow.model.DerivedSource;
+import com.google.api.services.dataflow.model.DynamicSourceSplit;
 import com.google.api.services.dataflow.model.SourceGetMetadataRequest;
 import com.google.api.services.dataflow.model.SourceGetMetadataResponse;
 import com.google.api.services.dataflow.model.SourceMetadata;
@@ -36,7 +37,6 @@
 import com.google.api.services.dataflow.model.SourceSplitOptions;
 import com.google.api.services.dataflow.model.SourceSplitRequest;
 import com.google.api.services.dataflow.model.SourceSplitResponse;
-import com.google.api.services.dataflow.model.SourceSplitShard;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.io.BoundedSource;
 import com.google.cloud.dataflow.sdk.io.ReadSource;
@@ -94,9 +94,9 @@ public SourceSplit(Source<T> primary, Source<T> residual) {
     }
   }
 
-  public static SourceFork toSourceSplit(
+  public static DynamicSourceSplit toSourceSplit(
       SourceSplit<?> sourceSplitResult, PipelineOptions options) {
-    SourceFork sourceSplit = new SourceFork();
+    DynamicSourceSplit sourceSplit = new DynamicSourceSplit();
     com.google.api.services.dataflow.model.Source primarySource;
     com.google.api.services.dataflow.model.Source residualSource;
     try {
@@ -106,11 +106,11 @@ public static SourceFork toSourceSplit(
       throw new RuntimeException("Failed to serialize one of the parts of the source split", e);
     }
     sourceSplit.setPrimary(
-        new SourceSplitShard()
+        new DerivedSource()
             .setDerivationMode("SOURCE_DERIVATION_MODE_INDEPENDENT")
             .setSource(primarySource));
     sourceSplit.setResidual(
-        new SourceSplitShard()
+        new DerivedSource()
             .setDerivationMode("SOURCE_DERIVATION_MODE_INDEPENDENT")
             .setSource(residualSource));
     return sourceSplit;
@@ -162,10 +162,10 @@ private SourceSplitResponse performSplit(SourceSplitRequest request) throws Exce
 
     // Produce simple independent, unsplittable bundles with no metadata attached.
     SourceSplitResponse response = new SourceSplitResponse();
-    response.setShards(new ArrayList<SourceSplitShard>());
+    response.setBundles(new ArrayList<DerivedSource>());
     SourceSplitOptions splitOptions = request.getOptions();
     Long desiredBundleSizeBytes =
-        (splitOptions == null) ? null : splitOptions.getDesiredShardSizeBytes();
+        (splitOptions == null) ? null : splitOptions.getDesiredBundleSizeBytes();
     if (desiredBundleSizeBytes == null) {
       desiredBundleSizeBytes = DEFAULT_DESIRED_BUNDLE_SIZE_BYTES;
     }
@@ -180,7 +180,7 @@ private SourceSplitResponse performSplit(SourceSplitRequest request) throws Exce
             + "\nOriginal source: " + source
             + "\nInvalid bundle: " + split, e);
       }
-      SourceSplitShard bundle = new SourceSplitShard();
+      DerivedSource bundle = new DerivedSource();
 
       com.google.api.services.dataflow.model.Source cloudSource =
           serializeToCloudSource(split, options);
@@ -188,7 +188,7 @@ private SourceSplitResponse performSplit(SourceSplitRequest request) throws Exce
 
       bundle.setDerivationMode("SOURCE_DERIVATION_MODE_INDEPENDENT");
       bundle.setSource(cloudSource);
-      response.getShards().add(bundle);
+      response.getBundles().add(bundle);
     }
     response.setOutcome("SOURCE_SPLIT_OUTCOME_SPLITTING_HAPPENED");
     return response;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 1f6345a7b6d0d..1deea6655a337 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -262,7 +262,7 @@ static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
           (Reader.DynamicSplitResultWithPosition) dynamicSplitResult;
       status.setStopPosition(toCloudPosition(asPosition.getAcceptedPosition()));
     } else if (dynamicSplitResult instanceof BasicSerializableSourceFormat.SourceSplit) {
-      status.setSourceFork(BasicSerializableSourceFormat.toSourceSplit(
+      status.setDynamicSourceSplit(BasicSerializableSourceFormat.toSourceSplit(
           (BasicSerializableSourceFormat.SourceSplit) dynamicSplitResult, options));
     } else if (dynamicSplitResult != null) {
       throw new IllegalArgumentException(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
index c6aa37a10a3de..c9a6993000970 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
@@ -264,7 +264,7 @@ public void testSplitAtFractionExhaustive() throws IOException {
     CoarseByteRangeSource original = new CoarseByteRangeSource(13, 35, 1, 10);
     int maxItems = readFromSource(original).size();
     for (int numItems = 0; numItems <= maxItems; ++numItems) {
-      for (double splitFraction = 0.0; splitFraction < 1.1; splitFraction += 0.01) {
+      for (double splitFraction = 0.0; splitFraction < 1.1; splitFraction += 0.05) {
         assertSplitAtFractionBehavior(
             original, numItems, splitFraction, MUST_BE_CONSISTENT_IF_SUCCEEDS);
       }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 510334472c170..757400ac2b33f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -39,11 +39,11 @@
 import static org.junit.internal.matchers.ThrowableMessageMatcher.hasMessage;
 
 import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.api.services.dataflow.model.DerivedSource;
 import com.google.api.services.dataflow.model.Job;
 import com.google.api.services.dataflow.model.SourceOperationRequest;
 import com.google.api.services.dataflow.model.SourceSplitRequest;
 import com.google.api.services.dataflow.model.SourceSplitResponse;
-import com.google.api.services.dataflow.model.SourceSplitShard;
 import com.google.api.services.dataflow.model.Step;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
@@ -232,10 +232,10 @@ public void testSplitAndReadBundlesBack() throws Exception {
     }
     SourceSplitResponse response = performSplit(source, options);
     assertEquals("SOURCE_SPLIT_OUTCOME_SPLITTING_HAPPENED", response.getOutcome());
-    List<SourceSplitShard> bundles = response.getShards();
+    List<DerivedSource> bundles = response.getBundles();
     assertEquals(5, bundles.size());
     for (int i = 0; i < 5; ++i) {
-      SourceSplitShard bundle = bundles.get(i);
+      DerivedSource bundle = bundles.get(i);
       assertEquals("SOURCE_DERIVATION_MODE_INDEPENDENT", bundle.getDerivationMode());
       com.google.api.services.dataflow.model.Source bundleSource = bundle.getSource();
       assertTrue(bundleSource.getDoesNotNeedSplitting());

From 565a4da65331e00165978df1b6a6a5c989a807ec Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 31 Mar 2015 17:47:28 -0700
Subject: [PATCH 0341/1541] Move WindowingInternals interface into its own
 file.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90021425
---
 .../cloud/dataflow/sdk/transforms/DoFn.java   | 63 +------------
 .../IntraBundleParallelization.java           |  1 +
 .../dataflow/sdk/util/AbstractWindowSet.java  |  1 -
 .../dataflow/sdk/util/BufferingWindowSet.java |  1 -
 .../dataflow/sdk/util/CombiningWindowSet.java |  1 -
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |  1 -
 .../sdk/util/PartitionBufferingWindowSet.java |  1 -
 .../dataflow/sdk/util/TriggerExecutor.java    |  1 -
 .../dataflow/sdk/util/WindowingInternals.java | 91 +++++++++++++++++++
 .../dataflow/sdk/util/TriggerTester.java      |  3 +-
 10 files changed, 97 insertions(+), 67 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index b8aee4d53633b..1c07c5b4ed440 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -16,11 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
-import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingInternals;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -32,7 +31,6 @@
 
 import java.io.IOException;
 import java.io.Serializable;
-import java.util.Collection;
 import java.util.List;
 
 /**
@@ -256,6 +254,9 @@ public abstract class ProcessContext extends Context {
 
     /**
      * Returns the process context to use for implementing windowing.
+     *
+     * <p>This interface is experimental and likely to change. It shouldn't be necessary to use it
+     * from general user code.
      */
     public abstract WindowingInternals<I, O> windowingInternals();
   }
@@ -335,62 +336,6 @@ public interface KeyedState {
     public CodedTupleTagMap lookup(List<? extends CodedTupleTag<?>> tags) throws IOException;
   }
 
-  /**
-   * Interface that may be required by some (internal) {@code DoFn}s to implement windowing.
-   * @param <I> input type
-   * @param <O> output type
-   */
-  public interface WindowingInternals<I, O> {
-    void outputWindowedValue(O output, Instant timestamp,
-        Collection<? extends BoundedWindow> windows);
-
-    /**
-     * Writes the provided value to the list of values in stored state corresponding to the
-     * provided tag.
-     *
-     * @throws IOException if encoding the given value fails
-     */
-    <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp) throws IOException;
-
-    /**
-     * Deletes the list corresponding to the given tag.
-     */
-    <T> void deleteTagList(CodedTupleTag<T> tag);
-
-    /**
-     * Reads the elements of the list in stored state corresponding to the provided tag.
-     * If the tag is undefined, will return an empty list rather than null.
-     *
-     * @throws IOException if decoding any of the requested values fails
-     */
-    <T> Iterable<T> readTagList(CodedTupleTag<T> tag) throws IOException;
-
-    /**
-     * Writes out a timer to be fired when the watermark reaches the given
-     * timestamp.  Timers are identified by their name, and can be moved
-     * by calling {@code setTimer} again, or deleted with {@link #deleteTimer}.
-     */
-    void setTimer(String timer, Instant timestamp);
-
-    /**
-     * Deletes the given timer.
-     */
-    void deleteTimer(String timer);
-
-    /**
-     * Access the windows the element is being processed in without "exploding" it.
-     */
-    Collection<? extends BoundedWindow> windows();
-
-    /**
-     * Write the given {@link PCollectionView} data to a location accessible by other workers.
-     */
-    <T> void writePCollectionViewData(
-        TupleTag<?> tag,
-        Iterable<WindowedValue<T>> data,
-        Coder<T> elemCoder) throws IOException;
-  }
-
   /////////////////////////////////////////////////////////////////////////////
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
index 5579d5f7e8eed..9fe01b292628f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.options.GcsOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.WindowingInternals;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
index 43e6b307372bc..903842849b3ba 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
@@ -18,7 +18,6 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.WindowingInternals;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
index e4602e3e9b11f..b734bfdc53768 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
@@ -22,7 +22,6 @@
 import com.google.cloud.dataflow.sdk.coders.MapCoder;
 import com.google.cloud.dataflow.sdk.coders.SetCoder;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.WindowingInternals;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
index f7adccca2a68b..1e5d232349a0c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
@@ -24,7 +24,6 @@
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.WindowingInternals;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index f588f9ef69a63..0a4c0769b62a4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -26,7 +26,6 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresKeyedState;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.WindowingInternals;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
index 9ee5aaf98a3fd..c3a3b9e07c870 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
@@ -20,7 +20,6 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.WindowingInternals;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 2f907c964c246..a828d33511e2a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import com.google.cloud.dataflow.sdk.transforms.DoFn.WindowingInternals;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PartitioningWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
new file mode 100644
index 0000000000000..6fafc002691d1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.util.Collection;
+
+/**
+ * Interface that may be required by some (internal) {@code DoFn}s to implement windowing.
+ *
+ * <p>This interface should be provided by runner implementors to support windowing on their runner.
+ *
+ * @param <I> input type
+ * @param <O> output type
+ */
+public interface WindowingInternals<I, O> {
+
+  /**
+   * Output the value at the specified timestamp in the listed windows.
+   */
+  void outputWindowedValue(O output, Instant timestamp,
+      Collection<? extends BoundedWindow> windows);
+
+  /**
+   * Writes the provided value to the list of values in stored state corresponding to the
+   * provided tag.
+   *
+   * @throws IOException if encoding the given value fails
+   */
+  <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp) throws IOException;
+
+  /**
+   * Deletes the list corresponding to the given tag.
+   */
+  <T> void deleteTagList(CodedTupleTag<T> tag);
+
+  /**
+   * Reads the elements of the list in stored state corresponding to the provided tag.
+   * If the tag is undefined, will return an empty list rather than null.
+   *
+   * @throws IOException if decoding any of the requested values fails
+   */
+  <T> Iterable<T> readTagList(CodedTupleTag<T> tag) throws IOException;
+
+  /**
+   * Writes out a timer to be fired when the watermark reaches the given
+   * timestamp.  Timers are identified by their name, and can be moved
+   * by calling {@code setTimer} again, or deleted with {@link #deleteTimer}.
+   */
+  void setTimer(String timer, Instant timestamp);
+
+  /**
+   * Deletes the given timer.
+   */
+  void deleteTimer(String timer);
+
+  /**
+   * Access the windows the element is being processed in without "exploding" it.
+   */
+  Collection<? extends BoundedWindow> windows();
+
+  /**
+   * Write the given {@link PCollectionView} data to a location accessible by other workers.
+   */
+  <T> void writePCollectionViewData(
+      TupleTag<?> tag,
+      Iterable<WindowedValue<T>> data,
+      Coder<T> elemCoder) throws IOException;
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index eea21eb7c73d6..6d0e1fb8833fa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -158,8 +158,7 @@ public void injectElement(VI value, Instant timestamp) throws Exception {
     triggerExecutor.onElement(WindowedValue.of(value, timestamp, windows));
   }
 
-  private class StubContexts
-      implements DoFn.WindowingInternals<VI, KV<String, VO>>, DoFn.KeyedState {
+  private class StubContexts implements WindowingInternals<VI, KV<String, VO>>, DoFn.KeyedState {
 
     private Map<CodedTupleTag<?>, List<?>> tagListValues = new HashMap<>();
     private Map<CodedTupleTag<?>, Object> tagValues = new HashMap<>();

From 2b76a7ef4c1c8c5edf9521654bde1fdc906deb13 Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Tue, 31 Mar 2015 18:41:32 -0700
Subject: [PATCH 0342/1541] Added custom Sink API and corresponding Write
 transform. ----Release Notes---- Added an API for user-defined Sinks. []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=90025537

---
 sdk/pom.xml                                   |  12 +
 .../cloud/dataflow/sdk/io/FileBasedSink.java  | 796 ++++++++++++++++++
 .../google/cloud/dataflow/sdk/io/Sink.java    | 249 ++++++
 .../dataflow/sdk/io/XmlFileBasedSink.java     | 316 +++++++
 .../cloud/dataflow/sdk/transforms/Write.java  | 200 +++++
 .../dataflow/sdk/io/FileBasedSinkTest.java    | 507 +++++++++++
 .../dataflow/sdk/io/XmlFileBasedSinkTest.java | 238 ++++++
 .../dataflow/sdk/transforms/WriteTest.java    | 259 ++++++
 8 files changed, 2577 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSink.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSinkTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSinkTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 958593c603d31..9aae34aa2280d 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -230,6 +230,18 @@
       <version>0.3.150313</version>
     </dependency>
 
+    <dependency>
+      <groupId>com.google.api-client</groupId>
+      <artifactId>google-api-client</artifactId>
+      <version>1.20.0</version>
+      <exclusions>
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-bigquery</artifactId>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
new file mode 100644
index 0000000000000..5e12215df701a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
@@ -0,0 +1,796 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import com.google.api.client.googleapis.batch.BatchRequest;
+import com.google.api.client.googleapis.batch.json.JsonBatchCallback;
+import com.google.api.client.googleapis.json.GoogleJsonError;
+import com.google.api.client.http.HttpHeaders;
+import com.google.api.client.http.HttpRequestInitializer;
+import com.google.api.services.storage.Storage;
+import com.google.api.services.storage.StorageRequest;
+import com.google.api.services.storage.model.StorageObject;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
+import com.google.cloud.dataflow.sdk.options.GcsOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.ApiErrorExtractor;
+import com.google.cloud.dataflow.sdk.util.FileIOChannelFactory;
+import com.google.cloud.dataflow.sdk.util.GcsIOChannelFactory;
+import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.MimeTypes;
+import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.common.base.Preconditions;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.nio.channels.WritableByteChannel;
+import java.nio.file.Files;
+import java.nio.file.NoSuchFileException;
+import java.nio.file.Paths;
+import java.nio.file.StandardCopyOption;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+
+import javax.annotation.concurrent.NotThreadSafe;
+
+/**
+ * Abstract {@link Sink} for file-based output. An implementation of FileBasedSink writes file-based
+ * output and defines the format of output files (how values are written, headers/footers, MIME
+ * type, etc.).
+ *
+ * <p>At pipeline construction time, the methods of FileBasedSink are called to validate the sink
+ * and to create a {@link Sink.WriteOperation} that manages the process of writing to the sink.
+ *
+ * <p>The process of writing to file-based sink is as follows:
+ * <ol>
+ * <li>An optional subclass-defined initialization,
+ * <li>a parallel write of bundles to temporary files, and finally,
+ * <li>these temporary files are renamed with final output filenames.
+ * </ol>
+ *
+ * <p>Supported file systems are those registered with {@link IOChannelUtils}.
+ *
+ * @param <T> the type of values written to the sink.
+ */
+public abstract class FileBasedSink<T> extends Sink<T> {
+  /**
+   * Base filename for final output files.
+   */
+  protected final String baseOutputFilename;
+
+  /**
+   * The extension to be used for the final output files.
+   */
+  protected final String extension;
+
+  /**
+   * Naming template for output files. See {@link ShardNameTemplate} for a description of
+   * possible naming templates.  Default is {@link ShardNameTemplate#INDEX_OF_MAX}.
+   */
+  protected final String fileNamingTemplate;
+
+  /**
+   * Construct a FileBasedSink with the given base output filename and extension.
+   */
+  public FileBasedSink(String baseOutputFilename, String extension) {
+    this(baseOutputFilename, extension, ShardNameTemplate.INDEX_OF_MAX);
+  }
+
+  /**
+   * Construct a FileBasedSink with the given base output filename, extension, and file naming
+   * template.
+   *
+   * <p>See {@link ShardNameTemplate} for a description of file naming templates.
+   */
+  public FileBasedSink(String baseOutputFilename, String extension, String fileNamingTemplate) {
+    this.baseOutputFilename = baseOutputFilename;
+    this.extension = extension;
+    this.fileNamingTemplate = fileNamingTemplate;
+  }
+
+  /**
+   * Perform pipeline-construction-time validation. The default implementation is a no-op.
+   * Subclasses should override to ensure the sink is valid and can be written to. It is recommended
+   * to use {@link Preconditions} in the implementation of this method.
+   */
+  @Override
+  public void validate(PipelineOptions options) {}
+
+  /**
+   * Return a subclass of {@link FileBasedSink.FileBasedWriteOperation} that will manage the write
+   * to the sink.
+   */
+  @Override
+  public abstract FileBasedWriteOperation<T> createWriteOperation(PipelineOptions options);
+
+  /**
+   * Abstract {@link Sink.WriteOperation} that manages the process of writing to a
+   * {@link FileBasedSink}.
+   *
+   * <p>The primary responsibilities of the FileBasedWriteOperation is the management of output
+   * files. During a write, {@link FileBasedSink.FileBasedWriter}s write bundles to temporary file
+   * locations. After the bundles have been written,
+   * <ol>
+   * <li>{@link FileBasedSink.FileBasedWriteOperation#finalize} is given a list of the temporary
+   * files containing the output bundles.
+   * <li>During finalize, these temporary files are copied to final output locations and named
+   * according to a file naming template.
+   * <li>Finally, any temporary files that were created during the write are removed.
+   * </ol>
+   *
+   * <p>Subclass implementations of FileBasedWriteOperation must implement
+   * {@link FileBasedSink.FileBasedWriteOperation#createWriter} to return a concrete
+   * FileBasedSinkWriter.
+   *
+   * <h2>Temporary and Output File Naming:</h2> During the write, bundles are written to temporary
+   * files using the baseTemporaryFilename that can be provided via the constructor of
+   * FileBasedWriteOperation. These temporary files will be named
+   * {@code {baseTemporaryFilename}-temp-{bundleId}}, where bundleId is the unique id of the bundle.
+   * For example, if baseTemporaryFilename is "gs://my-bucket/my_temp_output", the output for a
+   * bundle with bundle id 15723 will be "gs://my-bucket/my_temp_output-temp-15723".
+   *
+   * <p>Final output files are written to baseOutputFilename with the format
+   * {@code {baseOutputFilename}-0000i-of-0000n.{extension}} where n is the total number of bundles
+   * written and extension is the file extension. Both baseOutputFilename and extension are required
+   * constructor arguments.
+   *
+   * <p>Subclass implementations can change the file naming template by supplying a value for
+   * {@link FileBasedSink#fileNamingTemplate}.
+   *
+   * <h2>Temporary Bundle File Handling:</h2>
+   * <p>{@link FileBasedSink.FileBasedWriteOperation#temporaryFileRetention} controls the behavior
+   * for managing temporary files. By default, temporary files will be removed. Subclasses can
+   * provide a different value to the constructor.
+   *
+   * <p>Note that in the case of permanent failure of a bundle's write, no clean up of temporary
+   * files will occur.
+   *
+   * <p>If there are no elements in the PCollection being written, no output will be generated.
+   *
+   * @param <T> the type of values written to the sink.
+   */
+  public abstract static class FileBasedWriteOperation<T> extends WriteOperation<T, FileResult> {
+    private static final Logger LOG = LoggerFactory.getLogger(FileBasedWriteOperation.class);
+
+    /**
+     * Options for handling of temporary output files.
+     */
+    public enum TemporaryFileRetention {
+      KEEP,
+      REMOVE;
+    }
+
+    /**
+     * The Sink that this WriteOperation will write to.
+     */
+    protected final FileBasedSink<T> sink;
+
+    /**
+     * Option to keep or remove temporary output files.
+     */
+    protected final TemporaryFileRetention temporaryFileRetention;
+
+    /**
+     * Base filename used for temporary output files. Default is the baseOutputFilename.
+     */
+    protected final String baseTemporaryFilename;
+
+    /**
+     * Name separator for temporary files. Temporary files will be named
+     * {@code {baseTemporaryFilename}-temp-{bundleId}}.
+     */
+    protected static final String TEMPORARY_FILENAME_SEPARATOR = "-temp-";
+
+    /**
+     * Build a temporary filename using the temporary filename separator with the given prefix and
+     * suffix.
+     */
+    protected static final String buildTemporaryFilename(String prefix, String suffix) {
+      return prefix + FileBasedWriteOperation.TEMPORARY_FILENAME_SEPARATOR + suffix;
+    }
+
+    /**
+     * Construct a FileBasedWriteOperation using the same base filename for both temporary and
+     * output files.
+     *
+     * @param sink the FileBasedSink that will be used to configure this write operation.
+     */
+    public FileBasedWriteOperation(FileBasedSink<T> sink) {
+      this(sink, sink.baseOutputFilename);
+    }
+
+    /**
+     * Construct a FileBasedWriteOperation.
+     *
+     * @param sink the FileBasedSink that will be used to configure this write operation.
+     * @param baseTemporaryFilename the base filename to be used for temporary output files.
+     */
+    public FileBasedWriteOperation(FileBasedSink<T> sink, String baseTemporaryFilename) {
+      this(sink, baseTemporaryFilename, TemporaryFileRetention.REMOVE);
+    }
+
+    /**
+     * Create a new FileBasedWriteOperation.
+     *
+     * @param sink the FileBasedSink that will be used to configure this write operation.
+     * @param baseTemporaryFilename the base filename to be used for temporary output files.
+     * @param temporaryFileRetention defines how temporary files are handled.
+     */
+    public FileBasedWriteOperation(FileBasedSink<T> sink, String baseTemporaryFilename,
+        TemporaryFileRetention temporaryFileRetention) {
+      this.sink = sink;
+      this.baseTemporaryFilename = baseTemporaryFilename;
+      this.temporaryFileRetention = temporaryFileRetention;
+    }
+
+    /**
+     * Clients must implement to return a subclass of {@link FileBasedSink.FileBasedWriter}. This
+     * method must satisfy the restrictions placed on implementations of
+     * {@link Sink.WriteOperation#createWriter}. Namely, it must not mutate the state of the object.
+     */
+    @Override
+    public abstract FileBasedWriter<T> createWriter() throws Exception;
+
+    /**
+     * Initialization of the sink. Default implementation is a no-op. May be overridden by subclass
+     * implementations to perform initialization of the sink at pipeline runtime. This method must
+     * be idempotent and is subject to the same implementation restrictions as
+     * {@link Sink.WriteOperation#initialize}.
+     */
+    @Override
+    public void initialize(PipelineOptions options) throws Exception {}
+
+    /**
+     * Finalizes writing by copying temporary output files to their final location and optionally
+     * removing temporary files.
+     *
+     * <p>Finalization may be overridden by subclass implementations to perform customized
+     * finalization (e.g., initiating some operation on output bundles, merging them, etc.).
+     * bundleResults contains the filenames of written bundles.
+     *
+     * <p>If subclasses override this method, they must guarantee that its implementation is
+     * idempotent, as it may be executed multiple times in the case of failure or for redundancy. It
+     * is a best practice to attempt to try to make this method atomic.
+     *
+     * @param writerResults the results of writes (FileResult).
+     */
+    @Override
+    public void finalize(Iterable<FileResult> writerResults, PipelineOptions options)
+        throws Exception {
+      // Collect names of temporary files and rename them.
+      List<String> files = new ArrayList<>();
+      for (FileResult result : writerResults) {
+        LOG.debug("Temporary bundle output file {} will be copied.", result.filename);
+        files.add(result.filename);
+      }
+      copyToOutputFiles(files, options);
+
+      // Optionally remove temporary files.
+      if (temporaryFileRetention == TemporaryFileRetention.REMOVE) {
+        removeTemporaryFiles(options);
+      }
+    }
+
+    /**
+     * Copy temporary files to final output filenames using the file naming template.
+     *
+     * <p>Can be called from subclasses that override {@link FileBasedWriteOperation#finalize}.
+     *
+     * <p>Files will be named according to the file naming template. The order of the output files
+     * will be the same as the sorted order of the input filenames.  In other words, if the input
+     * filenames are ["C", "A", "B"], baseOutputFilename is "file", the extension is ".txt", and
+     * the fileNamingTemplate is "-SSS-of-NNN", the contents of A will be copied to
+     * file-000-of-003.txt, the contents of B will be copied to file-001-of-003.txt, etc.
+     *
+     * @param filenames the filenames of temporary files.
+     * @return a list containing the names of final output files.
+     */
+    protected final List<String> copyToOutputFiles(List<String> filenames, PipelineOptions options)
+        throws IOException {
+      int numFiles = filenames.size();
+      List<String> srcFilenames = new ArrayList<>();
+      List<String> destFilenames = generateDestinationFilenames(numFiles);
+
+      // Sort files for copying.
+      srcFilenames.addAll(filenames);
+      Collections.sort(srcFilenames);
+
+      if (numFiles > 0) {
+        LOG.debug("Copying {} files.", numFiles);
+        FileOperations fileOperations =
+            FileOperationsFactory.getFileOperations(destFilenames.get(0), options);
+        fileOperations.copy(srcFilenames, destFilenames);
+      } else {
+        LOG.info("No output files to write.");
+      }
+
+      return destFilenames;
+    }
+
+    /**
+     * Generate output bundle filenames.
+     */
+    protected final List<String> generateDestinationFilenames(int numFiles) {
+      List<String> destFilenames = new ArrayList<>();
+      String extension = getSink().extension;
+      String baseOutputFilename = getSink().baseOutputFilename;
+      String fileNamingTemplate = getSink().fileNamingTemplate;
+
+      String suffix = (extension.length() == 0) ? extension : ("." + extension);
+      for (int i = 0; i < numFiles; i++) {
+        destFilenames.add(IOChannelUtils.constructName(
+            baseOutputFilename, fileNamingTemplate, suffix, i, numFiles));
+      }
+      return destFilenames;
+    }
+
+    /**
+     * Removes temporary output files. Uses the temporary filename to find files to remove.
+     *
+     * <p>Can be called from subclasses that override {@link FileBasedWriteOperation#finalize}.
+     * <b>Note:</b>If finalize is overridden and does <b>not</b> rename or otherwise finalize
+     * temporary files, this method will remove them.
+     */
+    protected final void removeTemporaryFiles(PipelineOptions options) throws IOException {
+      String pattern = buildTemporaryFilename(baseTemporaryFilename, "*");
+      LOG.debug("Finding temporary bundle output files matching {}.", pattern);
+      FileOperations fileOperations = FileOperationsFactory.getFileOperations(pattern, options);
+      IOChannelFactory factory = IOChannelUtils.getFactory(pattern);
+      Collection<String> matches = factory.match(pattern);
+      LOG.debug("{} temporary files matched {}", matches.size(), pattern);
+      LOG.debug("Removing {} files.", matches.size());
+      fileOperations.remove(matches);
+    }
+
+    /**
+     * Provides a coder for {@link FileBasedSink.FileResult}.
+     */
+    @Override
+    public Coder<FileResult> getWriterResultCoder() {
+      return SerializableCoder.of(FileResult.class);
+    }
+
+    /**
+     * Returns the FileBasedSink for this write operation.
+     */
+    @Override
+    public FileBasedSink<T> getSink() {
+      return sink;
+    }
+  }
+
+  /**
+   * Abstract {@link Sink.Writer} that writes a bundle to a {@link FileBasedSink}. Subclass
+   * implementations provide a method that can write a single value to a {@link WritableByteChannel}
+   * ({@link Sink.Writer#write}).
+   *
+   * <p>Subclass implementations may also override methods that write headers and footers before and
+   * after the values in a bundle, respectively, as well as provide a MIME type for the output
+   * channel.
+   *
+   * <p>Multiple FileBasedWriter instances may be created on the same worker, and therefore any
+   * access to static members or methods should be thread safe.
+   *
+   * @param <T> the type of values to write.
+   */
+  public abstract static class FileBasedWriter<T> extends Writer<T, FileResult> {
+    private static final Logger LOG = LoggerFactory.getLogger(FileBasedWriter.class);
+
+    final FileBasedWriteOperation<T> writeOperation;
+
+    /**
+     * Unique id for this output bundle.
+     */
+    private String id;
+
+    /**
+     * The filename of the output bundle. Equal to the
+     * {@link FileBasedSink.FileBasedWriteOperation#TEMPORARY_FILENAME_SEPARATOR} and id appended to
+     * the baseName.
+     */
+    private String filename;
+
+    /**
+     * The channel to write to.
+     */
+    private WritableByteChannel channel;
+
+    /**
+     * The MIME type used in the creation of the output channel (if the file system supports it).
+     *
+     * <p>GCS, for example, supports writing files with Content-Type metadata.
+     *
+     * <p>May be overridden. Default is {@link MimeTypes#TEXT}. See {@link MimeTypes} for other
+     * options.
+     */
+    protected String mimeType = MimeTypes.TEXT;
+
+    /**
+     * Construct a new FileBasedWriter with a base filename.
+     */
+    public FileBasedWriter(FileBasedWriteOperation<T> writeOperation) {
+      Preconditions.checkNotNull(writeOperation);
+      this.writeOperation = writeOperation;
+    }
+
+    /**
+     * Called with the channel that a subclass will write its header, footer, and values to.
+     * Subclasses should either keep a reference to the channel provided or create and keep a
+     * reference to an appropriate object that they will use to write to it.
+     *
+     * <p>Called before any subsequent calls to writeHeader, writeFooter, and write.
+     */
+    protected abstract void prepareWrite(WritableByteChannel channel) throws Exception;
+
+    /**
+     * Writes header at the beginning of output files. Nothing by default; subclasses may override.
+     */
+    protected void writeHeader() throws Exception {}
+
+    /**
+     * Writes footer at the end of output files. Nothing by default; subclasses may override.
+     */
+    protected void writeFooter() throws Exception {}
+
+    /**
+     * Opens the channel.
+     */
+    @Override
+    public final void open(String uId) throws Exception {
+      this.id = uId;
+      filename = FileBasedWriteOperation.buildTemporaryFilename(
+          getWriteOperation().baseTemporaryFilename, uId);
+      LOG.debug("Opening {}.", filename);
+      channel = IOChannelUtils.create(filename, mimeType);
+      try {
+        prepareWrite(channel);
+        LOG.debug("Writing header to {}.", filename);
+        writeHeader();
+      } catch (Exception e) {
+        // The caller shouldn't have to close() this Writer if it fails to open(), so close the
+        // channel if prepareWrite() or writeHeader() fails.
+        try {
+          LOG.error("Writing header to {} failed, closing channel.", filename);
+          channel.close();
+        } catch (IOException closeException) {
+          // Log exception and mask it.
+          LOG.error("Closing channel for {} failed: {}", filename, closeException.getMessage());
+        }
+        // Throw the exception that caused the write to fail.
+        throw e;
+      }
+      LOG.debug("Starting write of bundle {} to {}.", this.id, filename);
+    }
+
+    /**
+     * Closes the channel and return the bundle result.
+     */
+    @Override
+    public final FileResult close() throws Exception {
+      try (WritableByteChannel theChannel = channel) {
+        LOG.debug("Writing footer to {}.", filename);
+        writeFooter();
+      }
+      FileResult result = new FileResult(filename);
+      LOG.debug("Result for bundle {}: {}", this.id, result.filename);
+      return result;
+    }
+
+    /**
+     * Return the FileBasedWriteOperation that this Writer belongs to.
+     */
+    @Override
+    public FileBasedWriteOperation<T> getWriteOperation() {
+      return writeOperation;
+    }
+  }
+
+  /**
+   * Result of a single bundle write. Contains the filename of the bundle.
+   */
+  static final class FileResult implements Serializable {
+    private static final long serialVersionUID = 0;
+
+    final String filename;
+
+    public FileResult(String filename) {
+      this.filename = filename;
+    }
+  }
+
+  // File system operations
+  // Warning: These class are purposefully private and will be replaced by more robust file I/O
+  // utilities. Not for use outside FileBasedSink.
+
+  /**
+   * Factory for FileOperations.
+   */
+  private static class FileOperationsFactory {
+    /**
+     * Return a FileOperations implementation based on which IOChannel would be used to write to a
+     * location specification (not necessarily a filename, as it may contain wildcards).
+     *
+     * <p>Only supports File and GCS locations (currently, the only factories registered with
+     * IOChannelUtils). For other locations, an exception is thrown.
+     */
+    public static FileOperations getFileOperations(String spec, PipelineOptions options)
+        throws IOException {
+      IOChannelFactory factory = IOChannelUtils.getFactory(spec);
+      if (factory instanceof GcsIOChannelFactory) {
+        return new GcsOperations(options);
+      } else if (factory instanceof FileIOChannelFactory) {
+        return new LocalFileOperations();
+      } else {
+        throw new IOException("Unrecognized file system.");
+      }
+    }
+  }
+
+  /**
+   * Copy and Remove operations for files. Operations behave like remove-if-existing and
+   * copy-if-existing and do not throw exceptions on file not found to enable retries of these
+   * operations in the case of transient error.
+   */
+  private static interface FileOperations {
+    /**
+     * Copy a collection of files from one location to another.
+     *
+     * <p>The number of source filenames must equal the number of destination filenames.
+     *
+     * @param srcFilenames the source filenames.
+     * @param destFilenames the destination filenames.
+     */
+    public void copy(List<String> srcFilenames, List<String> destFilenames) throws IOException;
+
+    /**
+     * Remove a collection of files.
+     */
+    public void remove(Collection<String> filenames) throws IOException;
+  }
+
+  /**
+   * GCS file system operations.
+   */
+  private static class GcsOperations implements FileOperations {
+    private static final Logger LOG = LoggerFactory.getLogger(GcsOperations.class);
+
+    /**
+     * Maximum number of requests permitted in a GCS batch request.
+     */
+    private static final int MAX_REQUESTS_PER_BATCH = 1000;
+
+    private ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
+    private GcsOptions gcsOptions;
+    private Storage gcs;
+    private BatchHelper batchHelper;
+
+    public GcsOperations(PipelineOptions options) {
+      gcsOptions = options.as(GcsOptions.class);
+      gcs = Transport.newStorageClient(gcsOptions).build();
+      batchHelper =
+          new BatchHelper(gcs.getRequestFactory().getInitializer(), gcs, MAX_REQUESTS_PER_BATCH);
+    }
+
+    @Override
+    public void copy(List<String> srcFilenames, List<String> destFilenames) throws IOException {
+      Preconditions.checkArgument(
+          srcFilenames.size() == destFilenames.size(),
+          String.format("Number of source files {} must equal number of destination files {}",
+              srcFilenames.size(), destFilenames.size()));
+      for (int i = 0; i < srcFilenames.size(); i++) {
+        final GcsPath sourcePath = GcsPath.fromUri(srcFilenames.get(i));
+        final GcsPath destPath = GcsPath.fromUri(destFilenames.get(i));
+        LOG.debug("Copying {} to {}", sourcePath, destPath);
+        Storage.Objects.Copy copyObject = gcs.objects().copy(sourcePath.getBucket(),
+            sourcePath.getObject(), destPath.getBucket(), destPath.getObject(), null);
+        batchHelper.queue(copyObject, new JsonBatchCallback<StorageObject>() {
+          @Override
+          public void onSuccess(StorageObject obj, HttpHeaders responseHeaders) {
+            LOG.debug("Successfully copied {} to {}", sourcePath, destPath);
+          }
+
+          @Override
+          public void onFailure(GoogleJsonError e, HttpHeaders responseHeaders) throws IOException {
+            // Do nothing on item not found.
+            if (!errorExtractor.itemNotFound(e)) {
+              throw new IOException(e.toString());
+            }
+            LOG.debug("{} does not exist.", sourcePath);
+          }
+        });
+      }
+      batchHelper.flush();
+    }
+
+    @Override
+    public void remove(Collection<String> filenames) throws IOException {
+      for (String filename : filenames) {
+        final GcsPath path = GcsPath.fromUri(filename);
+        LOG.debug("Removing: " + path);
+        Storage.Objects.Delete deleteObject =
+            gcs.objects().delete(path.getBucket(), path.getObject());
+        batchHelper.queue(deleteObject, new JsonBatchCallback<Void>() {
+          @Override
+          public void onSuccess(Void obj, HttpHeaders responseHeaders) throws IOException {
+            LOG.debug("Successfully removed {}", path);
+          }
+
+          @Override
+          public void onFailure(GoogleJsonError e, HttpHeaders responseHeaders) throws IOException {
+            // Do nothing on item not found.
+            if (!errorExtractor.itemNotFound(e)) {
+              throw new IOException(e.toString());
+            }
+            LOG.debug("{} does not exist.", path);
+          }
+        });
+      }
+      batchHelper.flush();
+    }
+  }
+
+  /**
+   * File systems supported by {@link Files}.
+   */
+  private static class LocalFileOperations implements FileOperations {
+    private static final Logger LOG = LoggerFactory.getLogger(LocalFileOperations.class);
+
+    @Override
+    public void copy(List<String> srcFilenames, List<String> destFilenames) throws IOException {
+      Preconditions.checkArgument(
+          srcFilenames.size() == destFilenames.size(),
+          String.format("Number of source files {} must equal number of destination files {}",
+              srcFilenames.size(), destFilenames.size()));
+      int numFiles = srcFilenames.size();
+      for (int i = 0; i < numFiles; i++) {
+        String src = srcFilenames.get(i);
+        String dst = destFilenames.get(i);
+        LOG.debug("Copying {} to {}", src, dst);
+        copyOne(src, dst);
+      }
+    }
+
+    private void copyOne(String source, String destination) throws IOException {
+      try {
+        // Copy the source file, replacing the existing destination.
+        Files.copy(Paths.get(source), Paths.get(destination), StandardCopyOption.REPLACE_EXISTING);
+      } catch (NoSuchFileException e) {
+        LOG.debug("{} does not exist.", source);
+        // Suppress exception if file does not exist.
+      }
+    }
+
+    @Override
+    public void remove(Collection<String> filenames) throws IOException {
+      for (String filename : filenames) {
+        LOG.debug("Removing file {}", filename);
+        removeOne(filename);
+      }
+    }
+
+    private void removeOne(String filename) throws IOException {
+      // Delete the file if it exists.
+      boolean exists = Files.deleteIfExists(Paths.get(filename));
+      if (!exists) {
+        LOG.debug("{} does not exist.", filename);
+      }
+    }
+  }
+
+  /**
+   * BatchHelper abstracts out the logic for the maximum requests per batch for GCS.
+   *
+   * <p>Copy of
+   * https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/gcs/src/main/java/com/google/cloud/hadoop/gcsio/BatchHelper.java
+   *
+   * <p>Copied to prevent Dataflow from depending on the Hadoop-related dependencies that are not
+   * used in Dataflow.  Hadoop-related dependencies will be removed from the Google Cloud Storage
+   * Connector (https://cloud.google.com/hadoop/google-cloud-storage-connector) so that this project
+   * and others may use the connector without introducing unnecessary dependencies.
+   *
+   * <p>This class is not thread-safe; create a new BatchHelper instance per single-threaded logical
+   * grouping of requests.
+   */
+  @NotThreadSafe
+  private static class BatchHelper {
+    /**
+     * Callback that causes a single StorageRequest to be added to the BatchRequest.
+     */
+    protected static interface QueueRequestCallback {
+      void enqueue() throws IOException;
+    }
+
+    private final List<QueueRequestCallback> pendingBatchEntries;
+    private final BatchRequest batch;
+
+    // Number of requests which can be queued into a single actual HTTP request before a sub-batch
+    // is sent.
+    private final long maxRequestsPerBatch;
+
+    // Flag that indicates whether there is an in-progress flush.
+    private boolean flushing = false;
+
+    /**
+     * Primary constructor, generally accessed only via the inner Factory class.
+     */
+    public BatchHelper(
+        HttpRequestInitializer requestInitializer, Storage gcs, long maxRequestsPerBatch) {
+      this.pendingBatchEntries = new LinkedList<>();
+      this.batch = gcs.batch(requestInitializer);
+      this.maxRequestsPerBatch = maxRequestsPerBatch;
+    }
+
+    /**
+     * Adds an additional request to the batch, and possibly flushes the current contents of the
+     * batch if {@code maxRequestsPerBatch} has been reached.
+     */
+    public <T> void queue(final StorageRequest<T> req, final JsonBatchCallback<T> callback)
+        throws IOException {
+      QueueRequestCallback queueCallback = new QueueRequestCallback() {
+        @Override
+        public void enqueue() throws IOException {
+          req.queue(batch, callback);
+        }
+      };
+      pendingBatchEntries.add(queueCallback);
+
+      flushIfPossibleAndRequired();
+    }
+
+    // Flush our buffer if we have more pending entries than maxRequestsPerBatch
+    private void flushIfPossibleAndRequired() throws IOException {
+      if (pendingBatchEntries.size() > maxRequestsPerBatch) {
+        flushIfPossible();
+      }
+    }
+
+    // Flush our buffer if we are not already in a flush operation and we have data to flush.
+    private void flushIfPossible() throws IOException {
+      if (!flushing && pendingBatchEntries.size() > 0) {
+        flushing = true;
+        try {
+          while (batch.size() < maxRequestsPerBatch && pendingBatchEntries.size() > 0) {
+            QueueRequestCallback head = pendingBatchEntries.remove(0);
+            head.enqueue();
+          }
+
+          batch.execute();
+        } finally {
+          flushing = false;
+        }
+      }
+    }
+
+
+    /**
+     * Sends any currently remaining requests in the batch; should be called at the end of any
+     * series of batched requests to ensure everything has been sent.
+     */
+    public void flush() throws IOException {
+      flushIfPossible();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
new file mode 100644
index 0000000000000..c77cca8906a13
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
@@ -0,0 +1,249 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.Write;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import java.io.Serializable;
+
+/**
+ * A Sink represents a resource that can be written to using the {@link Write} transform. A parallel
+ * write to a Sink consists of three phases:
+ * <ol>
+ * <li>A sequential <i>initialization</i> phase (e.g., creating a temporary output directory, etc.)
+ * <li>A <i>parallel write</i> phase where workers write bundles of records
+ * <li>A sequential <i>finalization</i> phase (e.g., committing the writes, merging output files,
+ * etc.)
+ * </ol>
+ *
+ * <p>The {@link Write} transform can be used in a Dataflow pipeline to perform this write.
+ * Specifically, a Write transform can be applied to a {@link PCollection} {@code p} by:
+ *
+ * <p>{@code p.apply(Write.to(new MySink()));}
+ *
+ * <p>Implementing a Sink and the corresponding write operations requires extending three abstract
+ * classes:
+ * <ul>
+ * <li>{@link Sink}: A Sink describes a location/resource to write to. It may contain fields like a
+ * URI or other metadata about the sink. Implementors of {@link Sink} must implement two methods:
+ * {@link Sink#validate} and {@link Sink#createWriteOperation}. {@link Sink#validate Validate} is
+ * called by the Write transform at pipeline creation, and should validate that the Sink can be
+ * written to. The createWriteOperation method is also called at pipeline creation, and should
+ * return a WriteOperation object that defines how to write to the Sink.  Note that implementations
+ * of Sink must be serializable and Sinks must be immutable.
+ *
+ * <li>{@link WriteOperation}: The WriteOperation implements the <i>initialization</i> and
+ * <i>finalization</i> phases of a write. Implementors of {@link WriteOperation} must implement
+ * corresponding {@link WriteOperation#initialize} and {@link WriteOperation#finalize} methods. A
+ * WriteOperation must also implement {@link WriteOperation#createWriter} that creates Writers,
+ * {@link WriteOperation#getWriterResultCoder} that returns a {@link Coder} for the result of a
+ * parallel write, and a {@link WriteOperation#getSink} that returns the Sink that the write
+ * operation corresponds to. See below for more information about these methods and restrictions on
+ * their implementation.
+ *
+ * <li>{@link Writer}: A Writer writes a bundle of records. Writer defines four methods:
+ * {@link Writer#open}, which is called once at the start of writing a bundle; {@link Writer#write},
+ * which writes a single record from the bundle; {@link Writer#close}, which is called once at the
+ * end of writing a bundle; and {@link Writer#getWriteOperation}, which returns the write operation
+ * that the writer belongs to.
+ * </ul>
+ *
+ * <h2>WriteOperation</h2>
+ * <p>{@link WriteOperation#initialize} and {@link WriteOperation#finalize} are conceptually called
+ * once: at the beginning and end of a Write transform. However, implementors must ensure that these
+ * methods are idempotent, as they may be called multiple times on different machines in the case of
+ * failure/retry or for redundancy.
+ *
+ * <p>The finalize method of WriteOperation is passed an Iterable of a writer result type. This
+ * writer result type should encode the result of a write and, in most cases, some encoding of the
+ * unique bundle id.
+ *
+ * <p>All implementations of {@link WriteOperation} must be serializable.
+ *
+ * <p>WriteOperation may have mutable state. For instance, {@link WriteOperation#initialize} may
+ * mutate the object state. These mutations will be visible in {@link WriteOperation#createWriter}
+ * and {@link WriteOperation#finalize} because the object will be serialized after initialize and
+ * deserialized before these calls. However, it is not serialized again after createWriter is
+ * called, as createWriter will be called within workers to create Writers for the bundles that are
+ * distributed to these workers. Therefore, newWriter should not mutate the WriteOperation state (as
+ * these mutations will not be visible in finalize).
+ *
+ * <h2>Bundle Ids:</h2>
+ * <p>In order to ensure fault-tolerance, a bundle may be executed multiple times (e.g., in the
+ * event of failure/retry or for redundancy). However, exactly one of these executions will have its
+ * result passed to the WriteOperation's finalize method. Each call to {@link Writer#open} is passed
+ * a unique <i>bundle id</i> when it is called by the Write transform, so even redundant or retried
+ * bundles will have a unique way of identifying their output.
+ *
+ * <p>The bundle id should be used to guarantee that a bundle's output is unique. This uniqueness
+ * guarantee is important; if a bundle is to be output to a file, for example, the name of the file
+ * must be unique to avoid conflicts with other Writers. The bundle id should be encoded in the
+ * writer result returned by the Writer and subsequently used by the WriteOperation's finalize
+ * method to identify the results of successful writes.
+ *
+ * <p>For example, consider the scenario where a Writer writes files containing serialized records
+ * and the WriteOperation's finalization step is to merge or rename these output files. In this
+ * case, a Writer may use its unique id to name its output file (to avoid conflicts) and return the
+ * name of the file it wrote as its writer result. The WriteOperation will then receive an Iterable
+ * of output file names that it can then merge or rename using some bundle naming scheme.
+ *
+ * <h2>Writer Results:</h2>
+ * <p>{@link WriteOperation}s and {@link Writer}s must agree on a writer result type that will be
+ * returned by a Writer after it writes a bundle. This type can be a client-defined object or an
+ * existing type; {@link WriteOperation#getWriterResultCoder} should return a {@link Coder} for the
+ * type.
+ *
+ * <p>A note about thread safety: Any use of static members or methods in Writer should be thread
+ * safe, as different instances of Writer objects may be created in different threads on the same
+ * worker.
+ *
+ * <p>Disclaimer: this API is experimental and subject to change.
+ *
+ * @param <T> the type that will be written to the Sink.
+ */
+public abstract class Sink<T> implements Serializable {
+  /**
+   * Ensures that the sink is valid and can be written to before the write operation begins. One
+   * should use {@link com.google.common.base.Preconditions} to implement this method.
+   */
+  public abstract void validate(PipelineOptions options);
+
+  /**
+   * Returns an instance of a {@link WriteOperation} that can write to this Sink.
+   */
+  public abstract WriteOperation<T, ?> createWriteOperation(PipelineOptions options);
+
+  /**
+   * A WriteOperation defines the process of a parallel write of objects to a Sink.
+   *
+   * <p>The WriteOperation defines how to perform initialization and finalization of a parallel
+   * write to a sink as well as how to create a {@link Sink.Writer} object that can write a bundle
+   * to the sink.
+   *
+   * <p>Since operations in Dataflow may be run multiple times for redundancy or fault-tolerance,
+   * the initialization and finalization defined by a WriteOperation <b>must be idempotent</b>.
+   *
+   * <p>WriteOperations may be mutable; a WriteOperation is serialized after the call to initialize
+   * method and deserialized before calls to createWriter and finalized.  However, it is not
+   * reserialized after createWriter, so createWriter should not mutate the state of the
+   * WriteOperation.
+   *
+   * <p>See {@link Sink} for more detailed documentation about the process of writing to a Sink.
+   *
+   * @param <T> The type of objects to write
+   * @param <WR> The result of a per-bundle write
+   */
+  public abstract static class WriteOperation<T, WR> implements Serializable {
+    /**
+     * Performs initialization before writing to the sink. Called before writing begins.
+     */
+    public abstract void initialize(PipelineOptions options) throws Exception;
+
+    /**
+     * Given an Iterable of results from bundle writes, performs finalization after writing and
+     * closes the sink. Called after all bundle writes are complete.
+     *
+     * <p>The results that are passed to finalize are those returned by bundles that completed
+     * successfully. Although bundles may have been run multiple times (for fault-tolerance), only
+     * one writer result will be passed to finalize for each bundle. An implementation of finalize
+     * should perform clean up of any failed and successfully retried bundles.  Note that these
+     * failed bundles will not have their writer result passed to finalize, so finalize should be
+     * capable of locating any temporary/partial output written by failed bundles.
+     *
+     * <p>A best practice is to make finalize atomic. If this is impossible given the semantics
+     * of the sink, finalize should be idempotent, as it may be called multiple times in the case of
+     * failure/retry or for redundancy.
+     *
+     * <p>Note that the iteration order of the writer results is not guaranteed to be consistent if
+     * finalize is called multiple times.
+     *
+     * @param writerResults an Iterable of results from successful bundle writes.
+     */
+    public abstract void finalize(Iterable<WR> writerResults, PipelineOptions options)
+        throws Exception;
+
+    /**
+     * Creates a new {@link Sink.Writer} to write a bundle of the input to the sink.
+     *
+     * <p>The bundle id that the writer will use to uniquely identify its output will be passed to
+     * {@link Writer#open}.
+     *
+     * <p>Must not mutate the state of the WriteOperation.
+     */
+    public abstract Writer<T, WR> createWriter() throws Exception;
+
+    /**
+     * Returns the Sink that this write operation writes to.
+     */
+    public abstract Sink<T> getSink();
+
+    /**
+     * Returns a coder for the writer result type.
+     */
+    public Coder<WR> getWriterResultCoder() {
+      return null;
+    }
+  }
+
+  /**
+   * A Writer writes a bundle of elements from a PCollection to a sink. {@link Writer#open} is
+   * called before writing begins and {@link Writer#close} is called after all elements in the
+   * bundle have been written. {@link Writer#write} writes an element to the sink.
+   *
+   * <p>Note that any access to static members or methods of a Writer must be thread-safe, as
+   * multiple instances of a Writer may be instantiated in different threads on the same worker.
+   *
+   * <p>See {@link Sink} for more detailed documentation about the process of writing to a Sink.
+   *
+   * @param <T> The type of object to write
+   * @param <WR> The writer results type (e.g., the bundle's output filename, as String)
+   */
+  public abstract static class Writer<T, WR> {
+    /**
+     * Performs bundle initialization. For example, creates a temporary file for writing or
+     * initializes any state that will be used across calls to {@link Writer#write}.
+     *
+     * <p>The unique id that is given to open should be used to ensure that the writer's output does
+     * not interfere with the output of other Writers, as a bundle may be executed many times for
+     * fault tolerance. See {@link Sink} for more information about bundle ids.
+     */
+    public abstract void open(String uId) throws Exception;
+
+    /**
+     * Called for each value in the bundle.
+     */
+    public abstract void write(T value) throws Exception;
+
+    /**
+     * Finishes writing the bundle. Closes any resources used for writing the bundle.
+     *
+     * <p>Returns a writer result that will be used in the {@link Sink.WriteOperation}'s
+     * finalization. The result should contain some way to identify the output of this bundle (using
+     * the bundle id). {@link WriteOperation#finalize} will use the writer result to identify
+     * successful writes. See {@link Sink} for more information about bundle ids.
+     *
+     * @return the writer result
+     */
+    public abstract WR close() throws Exception;
+
+    /**
+     * Returns the write operation this writer belongs to.
+     */
+    public abstract WriteOperation<T, WR> getWriteOperation();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSink.java
new file mode 100644
index 0000000000000..37813e3b84352
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSink.java
@@ -0,0 +1,316 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriteOperation;
+import com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriter;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.Write;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.Preconditions;
+
+import java.io.OutputStream;
+import java.nio.channels.Channels;
+import java.nio.channels.WritableByteChannel;
+
+import javax.xml.bind.JAXBContext;
+import javax.xml.bind.JAXBException;
+import javax.xml.bind.Marshaller;
+
+/**
+ * A {@link Sink} that outputs records as XML-formatted elements. Writes a {@link PCollection} of
+ * records from JAXB-annotated classes to a single file location.
+ *
+ * <p>Given a PCollection containing records of type T that can be marshalled to XML elements, this
+ * Sink will produce a single file consisting of a single root element that contains all of the
+ * elements in the PCollection.
+ *
+ * <p>XML Sinks are created with a base filename to write to, a root element name that will be used
+ * for the root element of the output files, and a class to bind to an XML element. This class
+ * will be used in the marshalling of records in an input PCollection to their XML representation
+ * and must be able to be bound using JAXB annotations (checked at pipeline construction time).
+ *
+ * <p>XML Sinks can be written to using the {@link Write} transform:
+ *
+ * <pre>
+ * p.apply(Write.to(
+ *      XmlFileBasedSink.ofRecordClass(Type.class)
+ *          .withRootElementName(root_element)
+ *          .toFilenamePrefix(output_filename)));
+ * </pre>
+ *
+ * <p>For example, consider the following class with JAXB annotations:
+ *
+ * <pre>
+ *  {@literal @}XmlRootElement(name = "word_count_result")
+ *  {@literal @}XmlType(propOrder = {"word", "frequency"})
+ *  public class WordFrequency {
+ *    private String word;
+ *    private long frequency;
+ *
+ *    public WordFrequency() { }
+ *
+ *    public WordFrequency(String word, long frequency) {
+ *      this.word = word;
+ *      this.frequency = frequency;
+ *    }
+ *
+ *    public void setWord(String word) {
+ *      this.word = word;
+ *    }
+ *
+ *    public void setFrequency(long frequency) {
+ *      this.frequency = frequency;
+ *    }
+ *
+ *    public long getFrequency() {
+ *      return frequency;
+ *    }
+ *
+ *    public String getWord() {
+ *      return word;
+ *    }
+ *  }
+ * </pre>
+ *
+ * <p>The following will produce XML output with a root element named "words" from a PCollection of
+ * WordFrequency objects:
+ *
+ * <p>
+ * <pre>
+ * p.apply(Write.to(
+ *  XmlFileBasedSink.ofRecordClass(WordFrequency.class)
+ *      .withRootElement("words")
+ *      .toFilenamePrefix(output_file)));
+ * </pre>
+ *
+ * <p>The output of which will look like:
+ *
+ * <pre>
+ * {@code
+ * <words>
+ *
+ *  <word_count_result>
+ *    <word>decreased</word>
+ *    <frequency>1</frequency>
+ *  </word_count_result>
+ *
+ *  <word_count_result>
+ *    <word>War</word>
+ *    <frequency>4</frequency>
+ *  </word_count_result>
+ *
+ *  <word_count_result>
+ *    <word>empress'</word>
+ *    <frequency>14</frequency>
+ *  </word_count_result>
+ *
+ *  <word_count_result>
+ *    <word>stoops</word>
+ *    <frequency>6</frequency>
+ *  </word_count_result>
+ *
+ *  ...
+ * </words>
+ * }
+ * </pre>
+ */
+public class XmlFileBasedSink {
+  protected static final String XML_EXTENSION = "xml";
+
+  /**
+   * Returns a builder for an XmlFileBasedSink. You'll need to configure the class to bind, the root
+   * element name, and the output file prefix with {@link Bound#ofRecordClass}, {@link
+   * Bound#withRootElement}, and {@link Bound#toFilenamePrefix}, respectively.
+   */
+  public static Bound<?> write() {
+    return new Bound<>(null, null, null);
+  }
+
+  /**
+   * Returns an XmlFileBasedSink that writes objects as XML entities.
+   *
+   * <p>Output files will have the name {@literal {baseOutputFilename}-0000i-of-0000n.xml} where n
+   * is the number of output bundles that the Dataflow service divides the output into.
+   *
+   * @param klass the class of the elements to write.
+   * @param rootElementName the enclosing root element.
+   * @param baseOutputFilename the output filename prefix.
+   */
+  public static <T> Bound<T> writeOf(
+      Class<T> klass, String rootElementName, String baseOutputFilename) {
+    return new Bound<>(klass, rootElementName, baseOutputFilename);
+  }
+
+  /**
+   * A FileBasedSink that writes objects as XML elements.
+   */
+  public static class Bound<T> extends FileBasedSink<T> {
+    private static final long serialVersionUID = 0;
+
+    final Class<T> classToBind;
+    final String rootElementName;
+
+    private Bound(Class<T> classToBind, String rootElementName, String baseOutputFilename) {
+      super(baseOutputFilename, XML_EXTENSION);
+      this.classToBind = classToBind;
+      this.rootElementName = rootElementName;
+    }
+
+    /**
+     * Returns an XmlFileBasedSink that writes objects of the class specified as XML elements.
+     *
+     * <p>The specified class must be able to be used to create a JAXB context.
+     */
+    public <T> Bound<T> ofRecordClass(Class<T> classToBind) {
+      return new Bound<>(classToBind, rootElementName, baseOutputFilename);
+    }
+
+    /**
+     * Returns an XmlFileBasedSink that writes to files with the given prefix.
+     *
+     * <p>Output files will have the name {@literal {filenamePrefix}-0000i-of-0000n.xml} where n is
+     * the number of output bundles that the Dataflow service divides the output into.
+     */
+    public Bound<T> toFilenamePrefix(String baseOutputFilename) {
+      return new Bound<>(classToBind, rootElementName, baseOutputFilename);
+    }
+
+    /**
+     * Returns an XmlFileBasedSink that writes XML files with an enclosing root element of the
+     * supplied name.
+     */
+    public Bound<T> withRootElement(String rootElementName) {
+      return new Bound<>(classToBind, rootElementName, baseOutputFilename);
+    }
+
+    /**
+     * Validates that the root element, class to bind to a JAXB context, and filenamePrefix have
+     * been set and that the class can be bound in a JAXB context.
+     */
+    @Override
+    public void validate(PipelineOptions options) {
+      Preconditions.checkNotNull(classToBind, "Missing a class to bind to a JAXB context.");
+      Preconditions.checkNotNull(rootElementName, "Missing a root element name.");
+      Preconditions.checkNotNull(baseOutputFilename, "Missing a filename to write to.");
+      try {
+        JAXBContext.newInstance(classToBind);
+      } catch (JAXBException e) {
+        throw new RuntimeException("Error binding classes to a JAXB Context.", e);
+      }
+    }
+
+    /**
+     * Creates an XmlWriteOperation.
+     */
+    @Override
+    public XmlWriteOperation<T> createWriteOperation(PipelineOptions options) {
+      return new XmlWriteOperation<>(this);
+    }
+  }
+
+  /**
+   * WriteOperation for XML Sinks.
+   */
+  protected static final class XmlWriteOperation<T> extends FileBasedWriteOperation<T> {
+    private static final long serialVersionUID = 0;
+
+    public XmlWriteOperation(XmlFileBasedSink.Bound<T> sink) {
+      super(sink);
+    }
+
+    /**
+     * Creates a XmlWriter with a marshaller for the type it will write.
+     */
+    @Override
+    public XmlWriter<T> createWriter() throws Exception {
+      JAXBContext context;
+      Marshaller marshaller;
+      context = JAXBContext.newInstance(getSink().classToBind);
+      marshaller = context.createMarshaller();
+      marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE);
+      marshaller.setProperty(Marshaller.JAXB_FRAGMENT, Boolean.TRUE);
+      marshaller.setProperty(Marshaller.JAXB_ENCODING, "UTF-8");
+      return new XmlWriter<>(this, marshaller);
+    }
+
+    /**
+     * Return the XmlFileBasedSink.Bound for this write operation.
+     */
+    @Override
+    public XmlFileBasedSink.Bound<T> getSink() {
+      return (XmlFileBasedSink.Bound<T>) super.getSink();
+    }
+  }
+
+  /**
+   * Writer that can write objects as XML elements.
+   */
+  protected static final class XmlWriter<T> extends FileBasedWriter<T> {
+    final Marshaller marshaller;
+    private OutputStream os = null;
+
+    public XmlWriter(XmlWriteOperation<T> writeOperation, Marshaller marshaller) {
+      super(writeOperation);
+      this.marshaller = marshaller;
+    }
+
+    /**
+     * Creates the output stream that elements will be written to.
+     */
+    @Override
+    protected void prepareWrite(WritableByteChannel channel) throws Exception {
+      os = Channels.newOutputStream(channel);
+    }
+
+    /**
+     * Writes the root element opening tag.
+     */
+    @Override
+    protected void writeHeader() throws Exception {
+      String rootElementName = getWriteOperation().getSink().rootElementName;
+      os.write(CoderUtils.encodeToByteArray(StringUtf8Coder.of(), "<" + rootElementName + ">\n"));
+    }
+
+    /**
+     * Writes the root element closing tag.
+     */
+    @Override
+    protected void writeFooter() throws Exception {
+      String rootElementName = getWriteOperation().getSink().rootElementName;
+      os.write(CoderUtils.encodeToByteArray(StringUtf8Coder.of(), "\n</" + rootElementName + ">"));
+    }
+
+    /**
+     * Writes a value to the stream.
+     */
+    @Override
+    public void write(T value) throws Exception {
+      marshaller.marshal(value, os);
+    }
+
+    /**
+     * Return the XmlWriteOperation this write belongs to.
+     */
+    @Override
+    public XmlWriteOperation<T> getWriteOperation() {
+      return (XmlWriteOperation<T>) super.getWriteOperation();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
new file mode 100644
index 0000000000000..c2cc1589c9f19
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
+import com.google.cloud.dataflow.sdk.io.Sink;
+import com.google.cloud.dataflow.sdk.io.Sink.WriteOperation;
+import com.google.cloud.dataflow.sdk.io.Sink.Writer;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.PDone;
+
+import java.util.UUID;
+
+/**
+ * A PTransform that writes to a sink. A write begins with a sequential global initialization of a
+ * sink, followed by a parallel write, and ends with a sequential finalization of the write. The
+ * output of a write is {@link PDone}.  In the case of an empty PCollection, only the global
+ * initialization and finalization will be performed.
+ *
+ * <p>Currently, only batch workflows can contain Write transforms.
+ *
+ * <p>Example usage:
+ *
+ * <p>{@code p.apply(Write.to(new MySink(...)));}
+ *
+ * <p>Disclaimer: This API is experimental and may change.
+ */
+public class Write {
+  public static <T> Bound<T> to(Sink<T> sink) {
+    return new Bound<>(sink);
+  }
+
+  private static class Bound<T> extends PTransform<PCollection<T>, PDone> {
+    private static final long serialVersionUID = 0;
+
+    private Sink<T> sink;
+
+    private Bound(Sink<T> sink) {
+      this.sink = sink;
+    }
+
+    @Override
+    public PDone apply(PCollection<T> input) {
+      PipelineOptions options = input.getPipeline().getOptions();
+      DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
+      if (dataflowOptions.isStreaming()) {
+        throw new UnsupportedOperationException(
+            "The Write transform cannot be applied to streaming workflows.");
+      }
+      sink.validate(options);
+      return createWrite(input, sink.createWriteOperation(options));
+    }
+
+    /**
+     * A write is performed as sequence of three {@link ParDo}'s.
+     *
+     * <p>In the first, a do-once ParDo is applied to a singleton PCollection containing the Sink's
+     * {@link WriteOperation}. In this initialization ParDo, {@link WriteOperation#initialize} is
+     * called. The output of this ParDo is a singleton PCollection
+     * containing the WriteOperation.
+     *
+     * <p>This singleton collection containing the WriteOperation is then used as a side input to a
+     * ParDo over the PCollection of elements to write. In this bundle-writing phase,
+     * {@link WriteOperation#createWriter} is called to obtain a {@link Writer}.
+     * {@link Writer#open} and {@link Writer#close} are called in {@link DoFn#startBundle} and
+     * {@link DoFn#finishBundle}, respectively, and {@link Writer#write} method is called for every
+     * element in the bundle. The output of this ParDo is a PCollection of <i>writer result</i>
+     * objects (see {@link Sink} for a description of writer results)-one for each bundle.
+     *
+     * <p>The final do-once ParDo uses the singleton collection of the WriteOperation as input and
+     * the collection of writer results as a side-input. In this ParDo,
+     * {@link WriteOperation#finalize} is called to finalize the write.
+     *
+     * <p>If the write of any element in the PCollection fails, {@link Writer#close} will be called
+     * before the exception that caused the write to fail is propagated and the write result will be
+     * discarded.
+     *
+     * <p>Since the {@link WriteOperation} is serialized after the initialization ParDo and
+     * deserialized in the bundle-writing and finalization phases, any state change to the
+     * WriteOperation object that occurs during initialization is visible in the latter phases.
+     * However, the WriteOperation is not serialized after the bundle-writing phase.  This is why
+     * implementations should guarantee that {@link WriteOperation#createWriter} does not mutate
+     * WriteOperation).
+     */
+    private <WR> PDone createWrite(PCollection<T> input, WriteOperation<T, WR> writeOperation) {
+      Pipeline p = input.getPipeline();
+
+      // A coder to user for the WriteOperation.
+      @SuppressWarnings("unchecked")
+      Coder<WriteOperation<T, WR>> operationCoder =
+          (Coder<WriteOperation<T, WR>>) SerializableCoder.of(writeOperation.getClass());
+
+      // A singleton collection of the WriteOperation, to be used as input to a ParDo to initialize
+      // the sink.
+      PCollection<WriteOperation<T, WR>> operationCollection =
+          p.apply(Create.<WriteOperation<T, WR>>of(writeOperation)).setCoder(operationCoder);
+
+      // Initialize the resource in a do-once ParDo on the WriteOperation.
+      operationCollection =
+          operationCollection
+              .apply(ParDo.of(new DoFn<WriteOperation<T, WR>, WriteOperation<T, WR>>() {
+                private static final long serialVersionUID = 0;
+
+                @Override
+                public void processElement(ProcessContext c) throws Exception {
+                  WriteOperation<T, WR> writeOperation = c.element();
+                  writeOperation.initialize(c.getPipelineOptions());
+                  // The WriteOperation is also the output of this ParDo, so it can have mutable
+                  // state.
+                  c.output(writeOperation);
+                }
+              }))
+              .setCoder(operationCoder);
+
+      // Create a view of the WriteOperation to be used as a sideInput to the parallel write phase.
+      final PCollectionView<WriteOperation<T, WR>> writeOperationView =
+          operationCollection.apply(View.<WriteOperation<T, WR>>asSingleton());
+
+      // Perform the per-bundle writes as a ParDo on the input PCollection (with the WriteOperation
+      // as a side input) and collect the results of the writes in a PCollection.
+      // There is a dependency between this ParDo and the first (the WriteOperation PCollection
+      // as a side input), so this will happen after the initial ParDo.
+      PCollection<WR> results = input.apply(ParDo.of(new DoFn<T, WR>() {
+        private static final long serialVersionUID = 0;
+
+        // Writer that will write the records in this bundle. Lazily
+        // initialized in processElement.
+        private Writer<T, WR> writer = null;
+
+        @Override
+        public void processElement(ProcessContext c) throws Exception {
+          // Lazily initialize the Writer
+          if (writer == null) {
+            WriteOperation<T, WR> writeOperation = c.sideInput(writeOperationView);
+            writer = writeOperation.createWriter();
+            writer.open(UUID.randomUUID().toString());
+          }
+          try {
+            writer.write(c.element());
+          } catch (Exception e) {
+            // Discard write result and close the write.
+            try {
+              writer.close();
+            } catch (Exception closeException) {
+              // Do not mask the exception that caused the write to fail.
+            }
+            throw e;
+          }
+        }
+
+        @Override
+        public void finishBundle(Context c) throws Exception {
+          if (writer != null) {
+            WR result = writer.close();
+            // Output the result of the write.
+            c.output(result);
+          }
+        }
+      }).withSideInputs(writeOperationView)).setCoder(writeOperation.getWriterResultCoder());
+
+      final PCollectionView<Iterable<WR>> resultsView = results.apply(View.<WR>asIterable());
+
+      // Finalize the write in another do-once ParDo on the singleton collection containing the
+      // Writer. The results from the per-bundle writes are given as an Iterable side input.
+      // The WriteOperation's state is the same as after its initialization in the first do-once
+      // ParDo. There is a dependency between this ParDo and the parallel write (the writer results
+      // collection as a side input), so it will happen after the parallel write.
+      @SuppressWarnings("unused")
+      final PCollection<Integer> done =
+          operationCollection.apply(ParDo.of(new DoFn<WriteOperation<T, WR>, Integer>() {
+            private static final long serialVersionUID = 0;
+
+            @Override
+            public void processElement(ProcessContext c) throws Exception {
+              Iterable<WR> results = c.sideInput(resultsView);
+              WriteOperation<T, WR> writeOperation = c.element();
+              writeOperation.finalize(results, c.getPipelineOptions());
+            }
+          }).withSideInputs(resultsView));
+      return new PDone();
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSinkTest.java
new file mode 100644
index 0000000000000..8b1e56acb6c6d
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSinkTest.java
@@ -0,0 +1,507 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriteOperation;
+import com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriteOperation.TemporaryFileRetention;
+import com.google.cloud.dataflow.sdk.io.FileBasedSink.FileResult;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.PrintWriter;
+import java.nio.ByteBuffer;
+import java.nio.channels.WritableByteChannel;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests for FileBasedSink.
+ */
+@RunWith(JUnit4.class)
+public class FileBasedSinkTest {
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  private String baseOutputFilename = "output";
+  private String baseTemporaryFilename = "temp";
+  private String testExtension = "test";
+
+  private String appendToTempFolder(String filename) {
+    return Paths.get(tmpFolder.getRoot().getPath(), filename).toString();
+  }
+
+  private String getBaseOutputFilename() {
+    return appendToTempFolder(baseOutputFilename);
+  }
+
+  private String getBaseTempFilename() {
+    return appendToTempFolder(baseTemporaryFilename);
+  }
+
+  /**
+   * FileBasedWriter opens the correct file, writes the header, footer, and elements in the
+   * correct order, and returns the correct filename.
+   */
+  @Test
+  public void testWriter() throws Exception {
+    String testUid = "testId";
+    String expectedFilename =
+        getBaseTempFilename() + FileBasedWriteOperation.TEMPORARY_FILENAME_SEPARATOR + testUid;
+    SimpleSink.SimpleWriter writer = buildWriter();
+
+    List<String> values = Arrays.asList("sympathetic vulture", "boresome hummingbird");
+    List<String> expected = new ArrayList<>();
+    expected.add(SimpleSink.SimpleWriter.HEADER);
+    expected.addAll(values);
+    expected.add(SimpleSink.SimpleWriter.FOOTER);
+
+    writer.open(testUid);
+    for (String value : values) {
+      writer.write(value);
+    }
+    FileResult result = writer.close();
+
+    assertEquals(expectedFilename, result.filename);
+    assertFileContains(expected, expectedFilename);
+  }
+
+  /**
+   * Assert that a file contains the lines provided, in the same order as expected.
+   */
+  private void assertFileContains(List<String> expected, String filename) throws Exception {
+    try (BufferedReader reader = new BufferedReader(new FileReader(filename))) {
+      List<String> actual = new ArrayList<>();
+      for (;;) {
+        String line = reader.readLine();
+        if (line == null) {
+          break;
+        }
+        actual.add(line);
+      }
+      assertEquals(expected, actual);
+    }
+  }
+
+  /**
+   * Write lines to a file.
+   */
+  private void writeFile(List<String> lines, File file) throws Exception {
+    try (PrintWriter writer = new PrintWriter(new FileOutputStream(file))) {
+      for (String line : lines) {
+        writer.println(line);
+      }
+    }
+  }
+
+  /**
+   * Removes temporary files when temporary and output filenames differ.
+   */
+  @Test
+  public void testRemoveWithTempFilename() throws Exception {
+    testRemoveTemporaryFiles(3, baseTemporaryFilename);
+  }
+
+  /**
+   * Removes only temporary files, even if temporary and output files share the same base filename.
+   */
+  @Test
+  public void testRemoveWithSameFilename() throws Exception {
+    testRemoveTemporaryFiles(3, baseOutputFilename);
+  }
+
+  /**
+   * Finalize copies temporary files to output files and removes any temporary files.
+   */
+  @Test
+  public void testFinalizeWithNoRetention() throws Exception {
+    List<File> files = generateTemporaryFilesForFinalize(3);
+    boolean retainTemporaryFiles = false;
+    runFinalize(buildWriteOperationForFinalize(retainTemporaryFiles), files, retainTemporaryFiles);
+  }
+
+  /**
+   * Finalize retains temporary files when requested.
+   */
+  @Test
+  public void testFinalizeWithRetention() throws Exception {
+    List<File> files = generateTemporaryFilesForFinalize(3);
+    boolean retainTemporaryFiles = true;
+    runFinalize(buildWriteOperationForFinalize(retainTemporaryFiles), files, retainTemporaryFiles);
+  }
+
+  /**
+   * Finalize can be called repeatedly.
+   */
+  @Test
+  public void testFinalizeMultipleCalls() throws Exception {
+    List<File> files = generateTemporaryFilesForFinalize(3);
+    SimpleSink.SimpleWriteOperation writeOp = buildWriteOperationForFinalize(false);
+    runFinalize(writeOp, files, false);
+    runFinalize(writeOp, files, false);
+  }
+
+  /**
+   * Finalize can be called when some temporary files do not exist and output files exist.
+   */
+  @Test
+  public void testFinalizeWithIntermediateState() throws Exception {
+    List<File> files = generateTemporaryFilesForFinalize(3);
+    SimpleSink.SimpleWriteOperation writeOp = buildWriteOperationForFinalize(false);
+    runFinalize(writeOp, files, false);
+
+    // create a temporary file
+    tmpFolder.newFile(
+        baseTemporaryFilename + FileBasedWriteOperation.TEMPORARY_FILENAME_SEPARATOR + "1");
+
+    runFinalize(writeOp, files, false);
+  }
+
+  /**
+   * Build a SimpleWriteOperation with default values and the specified retention policy.
+   */
+  private SimpleSink.SimpleWriteOperation buildWriteOperationForFinalize(
+      boolean retainTemporaryFiles) throws Exception {
+    TemporaryFileRetention retentionPolicy =
+        retainTemporaryFiles ? TemporaryFileRetention.KEEP : TemporaryFileRetention.REMOVE;
+    return buildWriteOperation(retentionPolicy);
+  }
+
+  /**
+   * Generate n temporary files using the temporary file pattern of FileBasedWriter.
+   */
+  private List<File> generateTemporaryFilesForFinalize(int numFiles) throws Exception {
+    List<File> temporaryFiles = new ArrayList<>();
+    for (int i = 0; i < numFiles; i++) {
+      String temporaryFilename =
+          FileBasedWriteOperation.buildTemporaryFilename(baseTemporaryFilename, "" + i);
+      File tmpFile = tmpFolder.newFile(temporaryFilename);
+      temporaryFiles.add(tmpFile);
+    }
+
+    return temporaryFiles;
+  }
+
+  /**
+   * Finalize and verify that files are copied and temporary files are optionally removed.
+   */
+  private void runFinalize(SimpleSink.SimpleWriteOperation writeOp, List<File> temporaryFiles,
+      boolean retainTemporaryFiles) throws Exception {
+    PipelineOptions options = PipelineOptionsFactory.create();
+
+    int numFiles = temporaryFiles.size();
+
+    List<File> outputFiles = new ArrayList<>();
+    List<FileResult> fileResults = new ArrayList<>();
+    List<String> outputFilenames = writeOp.generateDestinationFilenames(numFiles);
+
+    // Create temporary output bundles and output File objects
+    for (int i = 0; i < numFiles; i++) {
+      fileResults.add(new FileResult(temporaryFiles.get(i).toString()));
+      outputFiles.add(new File(outputFilenames.get(i)));
+    }
+
+    writeOp.finalize(fileResults, options);
+
+    for (int i = 0; i < numFiles; i++) {
+      assertTrue(outputFiles.get(i).exists());
+      assertEquals(retainTemporaryFiles, temporaryFiles.get(i).exists());
+    }
+  }
+
+  /**
+   * Create n temporary and output files and verify that removeTemporaryFiles only
+   * removes temporary files.
+   */
+  private void testRemoveTemporaryFiles(int numFiles, String baseTemporaryFilename)
+      throws Exception {
+    PipelineOptions options = PipelineOptionsFactory.create();
+    SimpleSink.SimpleWriteOperation writeOp = buildWriteOperation(baseTemporaryFilename);
+
+    List<File> temporaryFiles = new ArrayList<>();
+    List<File> outputFiles = new ArrayList<>();
+    for (int i = 0; i < numFiles; i++) {
+      File tmpFile = tmpFolder.newFile(
+          FileBasedWriteOperation.buildTemporaryFilename(baseTemporaryFilename, "" + i));
+      temporaryFiles.add(tmpFile);
+      File outputFile = tmpFolder.newFile(baseOutputFilename + i);
+      outputFiles.add(outputFile);
+    }
+
+    writeOp.removeTemporaryFiles(options);
+
+    for (int i = 0; i < numFiles; i++) {
+      assertFalse(temporaryFiles.get(i).exists());
+      assertTrue(outputFiles.get(i).exists());
+    }
+  }
+
+  /**
+   * Output files are copied to the destination location with the correct names and contents.
+   */
+  @Test
+  public void testCopyToOutputFiles() throws Exception {
+    PipelineOptions options = PipelineOptionsFactory.create();
+    SimpleSink.SimpleWriteOperation writeOp = buildWriteOperation();
+
+    List<String> inputFilenames = Arrays.asList("input-3", "input-2", "input-1");
+    List<String> inputContents = Arrays.asList("3", "2", "1");
+    List<String> expectedOutputFilenames = Arrays.asList(
+        "output-00002-of-00003.test", "output-00001-of-00003.test", "output-00000-of-00003.test");
+
+    List<String> inputFilePaths = new ArrayList<>();
+    List<String> expectedOutputPaths = new ArrayList<>();
+
+    for (int i = 0; i < inputFilenames.size(); i++) {
+      // Generate output paths.
+      File outputFile = tmpFolder.newFile(expectedOutputFilenames.get(i));
+      expectedOutputPaths.add(outputFile.toString());
+
+      // Generate and write to input paths.
+      File inputTmpFile = tmpFolder.newFile(inputFilenames.get(i));
+      List<String> lines = Arrays.asList(inputContents.get(i));
+      writeFile(lines, inputTmpFile);
+      inputFilePaths.add(inputTmpFile.toString());
+    }
+
+    // Copy input files to output files.
+    List<String> actual = writeOp.copyToOutputFiles(inputFilePaths, options);
+
+    // Assert that the expected paths are returned.
+    assertThat(expectedOutputPaths, containsInAnyOrder(actual.toArray()));
+
+    // Assert that the contents were copied.
+    for (int i = 0; i < expectedOutputPaths.size(); i++) {
+      assertFileContains(Arrays.asList(inputContents.get(i)), expectedOutputPaths.get(i));
+    }
+  }
+
+  /**
+   * Output filenames use the supplied naming template.
+   */
+  @Test
+  public void testGenerateOutputFilenamesWithTemplate() {
+    List<String> expected;
+    List<String> actual;
+    SimpleSink sink = buildSink(".SS.of.NN");
+    SimpleSink.SimpleWriteOperation writeOp = new SimpleSink.SimpleWriteOperation(sink);
+
+    expected = Arrays.asList(appendToTempFolder("output.00.of.03.test"),
+        appendToTempFolder("output.01.of.03.test"), appendToTempFolder("output.02.of.03.test"));
+    actual = writeOp.generateDestinationFilenames(3);
+    assertEquals(expected, actual);
+
+    expected = Arrays.asList(appendToTempFolder("output.00.of.01.test"));
+    actual = writeOp.generateDestinationFilenames(1);
+    assertEquals(expected, actual);
+
+    expected = new ArrayList<>();
+    actual = writeOp.generateDestinationFilenames(0);
+    assertEquals(expected, actual);
+  }
+
+  /**
+   * Output filenames are generated correctly when an extension is supplied.
+   */
+  @Test
+  public void testGenerateOutputFilenamesWithExtension() {
+    List<String> expected;
+    List<String> actual;
+    SimpleSink.SimpleWriteOperation writeOp = buildWriteOperation();
+
+    expected = Arrays.asList(
+        appendToTempFolder("output-00000-of-00003.test"),
+        appendToTempFolder("output-00001-of-00003.test"),
+        appendToTempFolder("output-00002-of-00003.test"));
+    actual = writeOp.generateDestinationFilenames(3);
+    assertEquals(expected, actual);
+
+    expected = Arrays.asList(appendToTempFolder("output-00000-of-00001.test"));
+    actual = writeOp.generateDestinationFilenames(1);
+    assertEquals(expected, actual);
+
+    expected = new ArrayList<>();
+    actual = writeOp.generateDestinationFilenames(0);
+    assertEquals(expected, actual);
+  }
+
+  /**
+   * Output filenames are generated correctly when an extension is not supplied.
+   */
+  @Test
+  public void testGenerateOutputFilenamesWithoutExtension() {
+    List<String> expected;
+    List<String> actual;
+    SimpleSink sink = new SimpleSink(appendToTempFolder(baseOutputFilename), "");
+    SimpleSink.SimpleWriteOperation writeOp = new SimpleSink.SimpleWriteOperation(sink);
+
+    expected = Arrays.asList(appendToTempFolder("output-00000-of-00003"),
+        appendToTempFolder("output-00001-of-00003"), appendToTempFolder("output-00002-of-00003"));
+    actual = writeOp.generateDestinationFilenames(3);
+    assertEquals(expected, actual);
+
+    expected = Arrays.asList(appendToTempFolder("output-00000-of-00001"));
+    actual = writeOp.generateDestinationFilenames(1);
+    assertEquals(expected, actual);
+
+    expected = new ArrayList<>();
+    actual = writeOp.generateDestinationFilenames(0);
+    assertEquals(expected, actual);
+  }
+
+  /**
+   * A simple FileBasedSink that writes String values as lines with header and footer lines.
+   */
+  private static final class SimpleSink extends FileBasedSink<String> {
+    private static final long serialVersionUID = 0;
+
+    public SimpleSink(String baseOutputFilename, String extension) {
+      super(baseOutputFilename, extension);
+    }
+
+    public SimpleSink(String baseOutputFilename, String extension, String fileNamingTemplate) {
+      super(baseOutputFilename, extension, fileNamingTemplate);
+    }
+
+    @Override
+    public SimpleWriteOperation createWriteOperation(PipelineOptions options) {
+      return new SimpleWriteOperation(this);
+    }
+
+    private static final class SimpleWriteOperation extends FileBasedWriteOperation<String> {
+      private static final long serialVersionUID = 0;
+
+      public SimpleWriteOperation(
+          SimpleSink sink, String tempOutputFilename, TemporaryFileRetention retentionPolicy) {
+        super(sink, tempOutputFilename, retentionPolicy);
+      }
+
+      public SimpleWriteOperation(SimpleSink sink, String tempOutputFilename) {
+        super(sink, tempOutputFilename);
+      }
+
+      public SimpleWriteOperation(SimpleSink sink) {
+        super(sink);
+      }
+
+      @Override
+      public SimpleWriter createWriter() throws Exception {
+        return new SimpleWriter(this);
+      }
+    }
+
+    private static final class SimpleWriter extends FileBasedWriter<String> {
+      static final String HEADER = "header";
+      static final String FOOTER = "footer";
+
+      private WritableByteChannel channel;
+
+      public SimpleWriter(SimpleWriteOperation writeOperation) {
+        super(writeOperation);
+      }
+
+      private static ByteBuffer wrap(String value) throws Exception {
+        return ByteBuffer.wrap((value + "\n").getBytes("UTF-8"));
+      }
+
+      @Override
+      protected void prepareWrite(WritableByteChannel channel) throws Exception {
+        this.channel = channel;
+      }
+
+      @Override
+      protected void writeHeader() throws Exception {
+        channel.write(wrap(HEADER));
+      }
+
+      @Override
+      protected void writeFooter() throws Exception {
+        channel.write(wrap(FOOTER));
+      }
+
+      @Override
+      public void write(String value) throws Exception {
+        channel.write(wrap(value));
+      }
+    }
+  }
+
+  /**
+   * Build a SimpleSink with default options.
+   */
+  private SimpleSink buildSink() {
+    return new SimpleSink(getBaseOutputFilename(), testExtension);
+  }
+
+  /**
+   * Build a SimpleSink with default options and the given shard template.
+   */
+  private SimpleSink buildSink(String shardTemplate) {
+    return new SimpleSink(getBaseOutputFilename(), testExtension, shardTemplate);
+  }
+
+  /**
+   * Build a SimpleWriteOperation with default options and the given file retention policy.
+   */
+  private SimpleSink.SimpleWriteOperation buildWriteOperation(
+      TemporaryFileRetention fileRetention) {
+    SimpleSink sink = buildSink();
+    return new SimpleSink.SimpleWriteOperation(sink, getBaseTempFilename(), fileRetention);
+  }
+
+  /**
+   * Build a SimpleWriteOperation with default options and the given base temporary filename.
+   */
+  private SimpleSink.SimpleWriteOperation buildWriteOperation(String baseTemporaryFilename) {
+    SimpleSink sink = buildSink();
+    return new SimpleSink.SimpleWriteOperation(sink, appendToTempFolder(baseTemporaryFilename));
+  }
+
+  /**
+   * Build a write operation with the default options for it and its parent sink.
+   */
+  private SimpleSink.SimpleWriteOperation buildWriteOperation() {
+    SimpleSink sink = buildSink();
+    return new SimpleSink.SimpleWriteOperation(
+        sink, getBaseTempFilename(), TemporaryFileRetention.REMOVE);
+  }
+
+  /**
+   * Build a writer with the default options for its parent write operation and sink.
+   */
+  private SimpleSink.SimpleWriter buildWriter() {
+    SimpleSink.SimpleWriteOperation writeOp = buildWriteOperation(TemporaryFileRetention.REMOVE);
+    return new SimpleSink.SimpleWriter(writeOp);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSinkTest.java
new file mode 100644
index 0000000000000..ead0b0873ee16
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSinkTest.java
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import com.google.cloud.dataflow.sdk.io.XmlFileBasedSink.XmlWriteOperation;
+import com.google.cloud.dataflow.sdk.io.XmlFileBasedSink.XmlWriter;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.common.collect.Lists;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.nio.channels.WritableByteChannel;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlRootElement;
+import javax.xml.bind.annotation.XmlType;
+
+
+/**
+ * Tests for XmlFileBasedSink.
+ */
+@RunWith(JUnit4.class)
+public class XmlFileBasedSinkTest {
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  private Class<Bird> testClass = Bird.class;
+  private String testRootElement = "testElement";
+  private String testFilePrefix = "testPrefix";
+
+  /**
+   * An XmlWriter correctly writes objects as Xml elements with an enclosing root element.
+   */
+  @Test
+  public void testXmlWriter() throws Exception {
+    PipelineOptions options = PipelineOptionsFactory.create();
+    XmlWriteOperation<Bird> writeOp =
+        XmlFileBasedSink.writeOf(Bird.class, "birds", testFilePrefix).createWriteOperation(options);
+    XmlWriter<Bird> writer = writeOp.createWriter();
+
+    List<Bird> bundle =
+        Lists.newArrayList(new Bird("bemused", "robin"), new Bird("evasive", "goose"));
+    List<String> lines = Arrays.asList("<birds>", "<bird>", "<species>robin</species>",
+        "<adjective>bemused</adjective>", "</bird>", "<bird>", "<species>goose</species>",
+        "<adjective>evasive</adjective>", "</bird>", "</birds>");
+    runTestWrite(writer, bundle, lines);
+  }
+
+  /**
+   * Builder methods correctly initialize an XML Sink.
+   */
+  @Test
+  public void testBuildXmlSink() {
+    XmlFileBasedSink.Bound<Bird> sink =
+        XmlFileBasedSink.write()
+            .toFilenamePrefix(testFilePrefix)
+            .ofRecordClass(testClass)
+            .withRootElement(testRootElement);
+    assertEquals(testClass, sink.classToBind);
+    assertEquals(testRootElement, sink.rootElementName);
+    assertEquals(testFilePrefix, sink.baseOutputFilename);
+  }
+
+  /**
+   * Alternate builder method correctly initializes an XML Sink.
+   */
+  @Test
+  public void testBuildXmlSinkDirect() {
+    XmlFileBasedSink.Bound<Bird> sink =
+        XmlFileBasedSink.writeOf(Bird.class, testRootElement, testFilePrefix);
+    assertEquals(testClass, sink.classToBind);
+    assertEquals(testRootElement, sink.rootElementName);
+    assertEquals(testFilePrefix, sink.baseOutputFilename);
+  }
+
+  /**
+   * Validation ensures no fields are missing.
+   */
+  @Test
+  public void testValidateXmlSinkMissingFields() {
+    XmlFileBasedSink.Bound<Bird> sink;
+    sink = XmlFileBasedSink.writeOf(null, testRootElement, testFilePrefix);
+    validateAndFailIfSucceeds(sink, NullPointerException.class);
+    sink = XmlFileBasedSink.writeOf(testClass, null, testFilePrefix);
+    validateAndFailIfSucceeds(sink, NullPointerException.class);
+    sink = XmlFileBasedSink.writeOf(testClass, testRootElement, null);
+    validateAndFailIfSucceeds(sink, NullPointerException.class);
+  }
+
+  /**
+   * Call validate and fail if validation does not throw the expected exception.
+   */
+  private <T> void validateAndFailIfSucceeds(
+      XmlFileBasedSink.Bound<T> sink, Class<? extends Exception> expected) {
+    thrown.expect(expected);
+    PipelineOptions options = PipelineOptionsFactory.create();
+    sink.validate(options);
+  }
+
+  /**
+   * An XML Sink correctly creates an XmlWriteOperation.
+   */
+  @Test
+  public void testCreateWriteOperations() {
+    PipelineOptions options = PipelineOptionsFactory.create();
+    XmlFileBasedSink.Bound<Bird> sink =
+        XmlFileBasedSink.writeOf(testClass, testRootElement, testFilePrefix);
+    XmlWriteOperation<Bird> writeOp = sink.createWriteOperation(options);
+    assertEquals(testClass, writeOp.getSink().classToBind);
+    assertEquals(testFilePrefix, writeOp.getSink().baseOutputFilename);
+    assertEquals(testRootElement, writeOp.getSink().rootElementName);
+    assertEquals(XmlFileBasedSink.XML_EXTENSION, writeOp.getSink().extension);
+    assertEquals(testFilePrefix, writeOp.baseTemporaryFilename);
+  }
+
+  /**
+   * An XmlWriteOperation correctly creates an XmlWriter.
+   */
+  @Test
+  public void testCreateWriter() throws Exception {
+    PipelineOptions options = PipelineOptionsFactory.create();
+    XmlWriteOperation<Bird> writeOp =
+        XmlFileBasedSink.writeOf(testClass, testRootElement, testFilePrefix)
+            .createWriteOperation(options);
+    XmlWriter<Bird> writer = writeOp.createWriter();
+    assertEquals(testFilePrefix, writer.getWriteOperation().baseTemporaryFilename);
+    assertEquals(testRootElement, writer.getWriteOperation().getSink().rootElementName);
+    assertNotNull(writer.marshaller);
+  }
+
+  /**
+   * Write a bundle with an XmlWriter and verify the output is expected.
+   */
+  private <T> void runTestWrite(XmlWriter<T> writer, List<T> bundle, List<String> expected)
+      throws Exception {
+    File tmpFile = tmpFolder.newFile("foo.txt");
+    FileOutputStream fileOutputStream = new FileOutputStream(tmpFile);
+
+    writeBundle(writer, bundle, fileOutputStream.getChannel());
+    fileOutputStream.close();
+
+    List<String> lines = new ArrayList<>();
+    try (BufferedReader reader = new BufferedReader(new FileReader(tmpFile))) {
+      for (;;) {
+        String line = reader.readLine();
+        if (line == null) {
+          break;
+        }
+        line = line.trim();
+        if (line.length() > 0) {
+          lines.add(line);
+        }
+      }
+      assertEquals(expected, lines);
+    }
+  }
+
+  /**
+   * Write a bundle with an XmlWriter.
+   */
+  private <T> void writeBundle(XmlWriter<T> writer, List<T> elements, WritableByteChannel channel)
+      throws Exception {
+    writer.prepareWrite(channel);
+    writer.writeHeader();
+    for (T elem : elements) {
+      writer.write(elem);
+    }
+    writer.writeFooter();
+  }
+
+  /**
+   * Test JAXB annotated class.
+   */
+  @SuppressWarnings("unused")
+  @XmlRootElement(name = "bird")
+  @XmlType(propOrder = {"name", "adjective"})
+  private static final class Bird {
+    private String name;
+    private String adjective;
+
+    @XmlElement(name = "species")
+    public String getName() {
+      return name;
+    }
+
+    public void setName(String name) {
+      this.name = name;
+    }
+
+    public String getAdjective() {
+      return adjective;
+    }
+
+    public void setAdjective(String adjective) {
+      this.adjective = adjective;
+    }
+
+    public Bird() {}
+
+    public Bird(String adjective, String name) {
+      this.adjective = adjective;
+      this.name = name;
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java
new file mode 100644
index 0000000000000..47dec4a97f65d
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java
@@ -0,0 +1,259 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static com.google.cloud.dataflow.sdk.TestUtils.createStrings;
+import static org.hamcrest.Matchers.anyOf;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
+import com.google.cloud.dataflow.sdk.io.Sink;
+import com.google.cloud.dataflow.sdk.io.Sink.WriteOperation;
+import com.google.cloud.dataflow.sdk.io.Sink.Writer;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest.TestPipelineOptions;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Tests for the Write PTransform.
+ */
+@RunWith(JUnit4.class)
+public class WriteTest {
+  // Static store that can be accessed within the writer
+  static List<String> sinkContents = new ArrayList<>();
+
+  /**
+   * Test a Write transform with a PCollection of elements.
+   */
+  @Test
+  public void testWrite() {
+    List<String> inputs = Arrays.asList("Critical canary", "Apprehensive eagle",
+        "Intimidating pigeon", "Pedantic gull", "Frisky finch");
+    runWrite(inputs);
+  }
+
+  /**
+   * Test a Write transform with an empty PCollection.
+   */
+  @Test
+  public void testWriteWithEmptyPCollection() {
+    List<String> inputs = new ArrayList<>();
+    runWrite(inputs);
+  }
+
+  /**
+   * Performs a Write transform and verifies the Write transform calls the appropriate methods on
+   * a test sink in the correct order, as well as verifies that the elements of a PCollection are
+   * written to the sink.
+   */
+  public void runWrite(List<String> inputs) {
+    // Flag to validate that the pipeline options are passed to the Sink
+    String[] args = {"--testFlag=test_value"};
+    PipelineOptions options = PipelineOptionsFactory.fromArgs(args).as(WriteOptions.class);
+    Pipeline p = TestPipeline.create(options);
+
+    // Clear the sink's contents.
+    sinkContents.clear();
+
+    // Construct the input PCollection and test Sink.
+    PCollection<String> input = createStrings(p, inputs);
+    TestSink sink = new TestSink();
+
+    input.apply(Write.to(sink));
+
+    p.run();
+    assertThat(sinkContents, containsInAnyOrder(inputs.toArray()));
+    assertTrue(sink.hasCorrectState());
+  }
+
+  // Test sink and associated write operation and writer. TestSink, TestWriteOperation, and
+  // TestWriter each verify that the sequence of method calls is consistent with the specification
+  // of the Write PTransform.
+  private static class TestSink extends Sink<String> {
+    private static final long serialVersionUID = 0;
+    private boolean createCalled = false;
+    private boolean validateCalled = false;
+
+    @Override
+    public WriteOperation<String, ?> createWriteOperation(PipelineOptions options) {
+      assertTrue(validateCalled);
+      assertTestFlagPresent(options);
+      createCalled = true;
+      return new TestSinkWriteOperation(this);
+    }
+
+    @Override
+    public void validate(PipelineOptions options) {
+      assertTestFlagPresent(options);
+      validateCalled = true;
+    }
+
+    private void assertTestFlagPresent(PipelineOptions options) {
+      assertEquals("test_value", options.as(WriteOptions.class).getTestFlag());
+    }
+
+    private boolean hasCorrectState() {
+      return validateCalled && createCalled;
+    }
+  }
+
+  private static class TestSinkWriteOperation extends WriteOperation<String, TestWriterResult> {
+    private static final long serialVersionUID = 0;
+
+    private enum State {
+      INITIAL,
+      INITIALIZED,
+      FINALIZED
+    }
+
+    private State state = State.INITIAL;
+    private boolean coderCalled = false;
+
+    private final TestSink sink;
+
+    public TestSinkWriteOperation(TestSink sink) {
+      this.sink = sink;
+    }
+
+    @Override
+    public TestSink getSink() {
+      return sink;
+    }
+
+    @Override
+    public void initialize(PipelineOptions options) throws Exception {
+      assertEquals("test_value", options.as(WriteOptions.class).getTestFlag());
+      assertThat(state, anyOf(equalTo(State.INITIAL), equalTo(State.INITIALIZED)));
+      state = State.INITIALIZED;
+    }
+
+    @Override
+    public void finalize(Iterable<TestWriterResult> bundleResults, PipelineOptions options)
+        throws Exception {
+      assertEquals("test_value", options.as(WriteOptions.class).getTestFlag());
+      assertEquals(State.INITIALIZED, state);
+      // The coder for the test writer results should've been called.
+      assertTrue(coderCalled);
+      Set<String> idSet = new HashSet<>();
+      int resultCount = 0;
+      state = State.FINALIZED;
+      for (TestWriterResult result : bundleResults) {
+        resultCount += 1;
+        idSet.add(result.uId);
+        // Add the elements that were written to the sink's contents.
+        sinkContents.addAll(result.elementsWritten);
+      }
+      // Each result came from a unique id.
+      assertEquals(resultCount, idSet.size());
+    }
+
+    @Override
+    public Writer<String, TestWriterResult> createWriter() {
+      return new TestSinkWriter(this);
+    }
+
+    @Override
+    public Coder<TestWriterResult> getWriterResultCoder() {
+      coderCalled = true;
+      return SerializableCoder.of(TestWriterResult.class);
+    }
+  }
+
+  private static class TestWriterResult implements Serializable {
+    private static final long serialVersionUID = 0;
+    String uId;
+    List<String> elementsWritten;
+
+    public TestWriterResult(String uId, List<String> elementsWritten) {
+      this.uId = uId;
+      this.elementsWritten = elementsWritten;
+    }
+  }
+
+  private static class TestSinkWriter extends Writer<String, TestWriterResult> {
+    private enum State {
+      INITIAL,
+      OPENED,
+      WRITING,
+      CLOSED
+    }
+
+    private State state = State.INITIAL;
+    private List<String> elementsWritten = new ArrayList<>();
+    private String uId;
+
+    private final TestSinkWriteOperation writeOperation;
+
+    public TestSinkWriter(TestSinkWriteOperation writeOperation) {
+      this.writeOperation = writeOperation;
+    }
+
+    @Override
+    public TestSinkWriteOperation getWriteOperation() {
+      return writeOperation;
+    }
+
+    @Override
+    public void open(String uId) throws Exception {
+      this.uId = uId;
+      assertEquals(State.INITIAL, state);
+      state = State.OPENED;
+    }
+
+    @Override
+    public void write(String value) throws Exception {
+      assertThat(state, anyOf(equalTo(State.OPENED), equalTo(State.WRITING)));
+      state = State.WRITING;
+      elementsWritten.add(value);
+    }
+
+    @Override
+    public TestWriterResult close() throws Exception {
+      assertThat(state, anyOf(equalTo(State.OPENED), equalTo(State.WRITING)));
+      state = State.CLOSED;
+      return new TestWriterResult(uId, elementsWritten);
+    }
+  }
+
+  /**
+   * Options for test, exposed for PipelineOptionsFactory.
+   */
+  public static interface WriteOptions extends TestPipelineOptions {
+    @Description("Test flag and value")
+    String getTestFlag();
+
+    void setTestFlag(String value);
+  }
+}

From 6bf7e28547a225accf3bc29ceb06c812b4a4beda Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 31 Mar 2015 20:43:09 -0700
Subject: [PATCH 0343/1541] Implement a state-based "DelayAfterFirstInPane"
 Trigger.

This plumbs keyed state into the TriggerContext and adds support for
retrieving the current Processing Time.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90031946
---
 .../dataflow/sdk/util/BatchTimerManager.java  |  16 ++-
 .../sdk/util/DelayAfterFirstInPane.java       |  93 +++++++++++++++
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  12 +-
 .../util/StreamingGroupAlsoByWindowsDoFn.java |   7 ++
 .../cloud/dataflow/sdk/util/Trigger.java      |  31 ++++-
 .../dataflow/sdk/util/TriggerExecutor.java    |  55 +++++++++
 .../dataflow/sdk/util/DefaultTriggerTest.java |   6 +-
 .../sdk/util/DelayAfterFirstInPaneTest.java   | 108 ++++++++++++++++++
 .../dataflow/sdk/util/TriggerTester.java      |  17 ++-
 9 files changed, 332 insertions(+), 13 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPaneTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
index 76b2384dc1282..3a055dd4dc2e8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
@@ -37,6 +37,8 @@ public class BatchTimerManager implements TimerManager {
   private PriorityQueue<BatchTimerManager.BatchTimer> processingTimers = new PriorityQueue<>(11);
   private Map<String, BatchTimerManager.BatchTimer> processingTagToTimer = new HashMap<>();
 
+  private Instant processingTime;
+
   private PriorityQueue<BatchTimerManager.BatchTimer> queue(Trigger.TimeDomain domain) {
     return Trigger.TimeDomain.EVENT_TIME.equals(domain) ? watermarkTimers : processingTimers;
   }
@@ -46,6 +48,10 @@ private Map<String, BatchTimer> map(Trigger.TimeDomain domain) {
         ? watermarkTagToTimer : processingTagToTimer;
   }
 
+  public BatchTimerManager(Instant processingTime) {
+    this.processingTime = processingTime;
+  }
+
   @Override
   public void setTimer(String tag, Instant timestamp, Trigger.TimeDomain domain) {
     BatchTimerManager.BatchTimer newTimer = new BatchTimerManager.BatchTimer(tag, timestamp);
@@ -62,6 +68,11 @@ public void deleteTimer(String tag, Trigger.TimeDomain domain) {
     queue(domain).remove(map(domain).get(tag));
   }
 
+  @Override
+  public Instant currentProcessingTime() {
+    return processingTime;
+  }
+
   @Override
   public String toString() {
     StringBuilder builder = new StringBuilder("BatchTimerManager [");
@@ -83,6 +94,7 @@ public void advanceWatermark(TriggerExecutor<?, ?, ?, ?> triggerExecutor, Instan
   public void advanceProcessingTime(
       TriggerExecutor<?, ?, ?, ?> triggerExecutor, Instant newProcessingTime) throws Exception {
     advance(triggerExecutor, newProcessingTime, TimeDomain.PROCESSING_TIME);
+    this.processingTime = newProcessingTime;
   }
 
   /**
@@ -104,7 +116,8 @@ private void advance(
 
     do {
       BatchTimer timer = timers.peek();
-      shouldFire = timer != null && newTime.isAfter(timer.time);
+      // Timers fire if the new time is >= the timer
+      shouldFire = timer != null && !newTime.isBefore(timer.time);
       if (shouldFire) {
         // Remove before firing, so that if the trigger adds another identical
         // timer we don't remove it.
@@ -153,5 +166,6 @@ public boolean equals(Object other) {
       }
       return false;
     }
+
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java
new file mode 100644
index 0000000000000..23feb1173590d
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.InstantCoder;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+
+import org.joda.time.Instant;
+
+/**
+ * A trigger that fires after a given amount of delay from the first element arriving.
+ *
+ * <p>TODO: Generalize this as appropriate, and add support to hook it up.
+ *
+ * @param <W> The type of windows being triggered/encoded.
+ */
+public class DelayAfterFirstInPane<W extends BoundedWindow> implements Trigger<Object, W> {
+
+  private static final Instant ALREADY_FIRED = BoundedWindow.TIMESTAMP_MAX_VALUE;
+
+  private SerializableFunction<Instant, Instant> delayFunction;
+  private CodedTupleTag<Instant> delayedUntilTag =
+      CodedTupleTag.of("delayed-until", InstantCoder.of());
+
+  /**
+   * Delay after the first element in the window arrives.
+   *
+   * @param delayFunction Transformation to apply the current processing time to compute the delay.
+   *     It should only move values forward: delayFunction(now) >= now
+   *     It should be monotonically increasing: If a < b, then delayFunction(a) <= delayFunction(b)
+   */
+  public DelayAfterFirstInPane(SerializableFunction<Instant, Instant> delayFunction) {
+    this.delayFunction = delayFunction;
+  }
+
+  @Override
+  public void onElement(TriggerContext<W> c, Object value, W window, WindowStatus status)
+      throws Exception {
+    Instant delayUntil = c.lookup(delayedUntilTag, window);
+    if (delayUntil == null) {
+      delayUntil = delayFunction.apply(c.currentProcessingTime());
+      c.setTimer(window, delayUntil, TimeDomain.PROCESSING_TIME);
+      c.store(delayedUntilTag, window, delayUntil);
+    }
+  }
+
+  @Override
+  public void onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) throws Exception {
+    // We want to fire after the minimum delayed-until in the window. If that means we've already
+    // fired, we should stop.
+    Instant delayedUntil = null;
+    for (Instant oldDelayedUntil : c.lookup(delayedUntilTag, oldWindows)) {
+      if (oldDelayedUntil != null) {
+        delayedUntil = (delayedUntil != null && delayedUntil.isBefore(oldDelayedUntil))
+            ? delayedUntil : oldDelayedUntil;
+      }
+    }
+
+    // Delete the old timers.
+    for (W oldWindow : oldWindows) {
+      c.deleteTimer(oldWindow, TimeDomain.PROCESSING_TIME);
+      c.remove(delayedUntilTag, oldWindow);
+    }
+
+    // Now, (re)set the timer if we need to:
+    if (delayedUntil != null && delayedUntil.isBefore(ALREADY_FIRED)) {
+      c.setTimer(newWindow, delayedUntil, TimeDomain.PROCESSING_TIME);
+    }
+    c.store(delayedUntilTag, newWindow, delayedUntil);
+  }
+
+  @Override
+  public void onTimer(TriggerContext<W> c, W window) throws Exception {
+    c.store(delayedUntilTag, window, ALREADY_FIRED);
+    c.emitWindow(window);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 76f418fb10cb8..85f5bc254b88d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -93,11 +93,11 @@ public void processElement(
       AbstractWindowSet<K, VI, VO, W> windowSet = windowSetFactory.create(
           key, windowFn.windowCoder(), c.keyedState(), c.windowingInternals());
 
-      BatchTimerManager timerManager = new BatchTimerManager();
+      BatchTimerManager timerManager = new BatchTimerManager(Instant.now());
       TriggerExecutor<K, VI, VO, W> triggerExecutor = new TriggerExecutor<>(
           windowFn, timerManager,
           new DefaultTrigger<W>(),
-          c.windowingInternals(), windowSet);
+          c.keyedState(), c.windowingInternals(), windowSet);
 
       for (WindowedValue<VI> e : c.element().getValue()) {
         // First, handle anything that needs to happen for this element
@@ -106,11 +106,17 @@ public void processElement(
         // Then, since elements are sorted by their timestamp, advance the watermark and fire any
         // timers that need to be fired.
         timerManager.advanceWatermark(triggerExecutor, e.getTimestamp());
+
+        // Also, fire any processing timers that need to fire
+        timerManager.advanceProcessingTime(triggerExecutor, Instant.now());
       }
 
-      // Finish any pending windows by advance the watermark to infinity.
+      // Finish any pending windows by advancing the watermark to infinity.
       timerManager.advanceWatermark(triggerExecutor, new Instant(Long.MAX_VALUE));
 
+      // Finally, advance the processing time
+      timerManager.advanceProcessingTime(triggerExecutor, Instant.now());
+
       windowSet.persist();
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 9a90cbe1ba33f..0b11897b59632 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -89,6 +89,7 @@ public void processElement(ProcessContext context) throws Exception {
             windowFn,
             new StreamingTimerManager(context),
             new DefaultTrigger<W>(),
+            context.keyedState(),
             context.windowingInternals(),
             windowSet);
 
@@ -105,6 +106,7 @@ public void processElement(ProcessContext context) throws Exception {
             windowFn,
             new StreamingTimerManager(context),
             new DefaultTrigger<W>(),
+            context.keyedState(),
             context.windowingInternals(),
             windowSet);
 
@@ -137,5 +139,10 @@ public void deleteTimer(String timer, Trigger.TimeDomain domain) {
           "Streaming currently only supports Watermark based timers.");
       context.windowingInternals().deleteTimer(timer);
     }
+
+    @Override
+    public Instant currentProcessingTime() {
+      throw new UnsupportedOperationException("Streaming doesn't yet support processing time.");
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
index 02ac64873a2bd..6faf252567f2f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 
 import org.joda.time.Instant;
 
@@ -71,12 +72,17 @@ public interface TriggerContext<W extends BoundedWindow>  {
      * TODO: Support processing time
      * TODO: Support per-trigger timers.
      */
-    public void setTimer(W window, Instant timestamp, TimeDomain timeDomain) throws IOException;
+    void setTimer(W window, Instant timestamp, TimeDomain timeDomain) throws IOException;
 
     /**
      * Delete a timer that has been set for the specified window.
      */
-    public void deleteTimer(W window, TimeDomain timeDomain) throws IOException;
+    void deleteTimer(W window, TimeDomain timeDomain) throws IOException;
+
+    /**
+     * The current processing time.
+     */
+    Instant currentProcessingTime();
 
     /**
      * Emit the given window.
@@ -84,6 +90,27 @@ public interface TriggerContext<W extends BoundedWindow>  {
      * @throws Exception
      */
     void emitWindow(W window) throws Exception;
+
+    /**
+     * Updates the value stored in keyed state for the given window.
+     */
+    <T> void store(CodedTupleTag<T> tag, W window, T value) throws IOException;
+
+    /**
+     * Removes the data associated with the given tag from {@code KeyedState}.
+     * @throws IOException
+     */
+    <T> void remove(CodedTupleTag<T> tag, W window) throws IOException;
+
+    /**
+     * Lookup the value stored in keyed state.
+     */
+    <T> T lookup(CodedTupleTag<T> tag, W window) throws IOException;
+
+    /**
+     * Lookup the value stored in a bunch of windows.
+     */
+    <T> Iterable<T> lookup(CodedTupleTag<T> tag, Iterable<W> windows) throws IOException;
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index a828d33511e2a..20a927f339fec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -16,19 +16,24 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PartitioningWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.KV;
 
 import org.joda.time.Instant;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.List;
 
 /**
  * Manages the execution of a trigger.
@@ -46,6 +51,7 @@ public class TriggerExecutor<K, VI, VO, W extends BoundedWindow> implements Trig
   private final AbstractWindowSet<K, VI, VO, W> windowSet;
   private final TimerManager timerManager;
   private final MergeContext mergeContext;
+  private KeyedState keyedState;
 
   /**
    * Methods that the system must provide in order for us to implement triggers.
@@ -63,16 +69,23 @@ public interface TimerManager {
      * Deletes the given timer.
      */
     void deleteTimer(String timer, Trigger.TimeDomain domain);
+
+    /**
+     * @return the current timestamp in the {@link Trigger.TimeDomain#PROCESSING_TIME}.
+     */
+    Instant currentProcessingTime();
   }
 
   public TriggerExecutor(
       WindowFn<Object, W> windowFn,
       TimerManager timerManager,
       Trigger<Object, W> trigger,
+      KeyedState keyedState,
       WindowingInternals<?, KV<K, VO>> windowingInternals,
       AbstractWindowSet<K, VI, VO, W> windowSet) {
     this.windowFn = windowFn;
     this.trigger = trigger;
+    this.keyedState = keyedState;
     this.windowingInternals = windowingInternals;
     this.windowSet = windowSet;
     this.timerManager = timerManager;
@@ -151,4 +164,46 @@ public void deleteTimer(W window, TimeDomain domain) throws IOException {
         WindowUtils.windowToString(window, windowFn.windowCoder()),
         domain);
   }
+
+  private <T> CodedTupleTag<T> perWindowTag(CodedTupleTag<T> tag, W window) throws IOException {
+    return CodedTupleTag.of(
+        tag.getId() + WindowUtils.windowToString(window, windowFn.windowCoder()),
+        tag.getCoder());
+  }
+
+  @Override
+  public <T> void store(CodedTupleTag<T> tag, W window, T value) throws IOException {
+    keyedState.store(perWindowTag(tag, window), value);
+  }
+
+  @Override
+  public <T> void remove(CodedTupleTag<T> tag, W window) throws IOException {
+    keyedState.remove(perWindowTag(tag, window));
+  }
+
+  @Override
+  public <T> T lookup(CodedTupleTag<T> tag, W window) throws IOException {
+    return keyedState.lookup(perWindowTag(tag, window));
+  }
+
+
+  @Override
+  public <T> Iterable<T> lookup(CodedTupleTag<T> tag, Iterable<W> windows) throws IOException {
+    List<CodedTupleTag<T>> tags = new ArrayList<>();
+    for (W window : windows) {
+      tags.add(perWindowTag(tag, window));
+    }
+    CodedTupleTagMap tagMap = keyedState.lookup(tags);
+
+    List<T> result = new ArrayList<>();
+    for (CodedTupleTag<T> windowTag : tags) {
+      result.add(tagMap.get(windowTag));
+    }
+    return result;
+  }
+
+  @Override
+  public Instant currentProcessingTime() {
+    return timerManager.currentProcessingTime();
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java
index eb28cb0d40ab9..cbeaa1a861441 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java
@@ -52,11 +52,11 @@ public void testDefaultTriggerWithFixedWindow() throws Exception {
     tester.assertNoMoreOutput();
 
     // Advance the watermark almost to the end of the first window.
-    tester.advanceWatermark(new Instant(9));
+    tester.advanceWatermark(new Instant(8));
     tester.assertNoMoreOutput();
 
-    // Advance the watermark past the first window
-    tester.advanceWatermark(new Instant(12));
+    // Advance the watermark to the end of the first window
+    tester.advanceWatermark(new Instant(9));
     tester.assertNextOutput(new Instant(9), Matchers.containsInAnyOrder(1, 2));
     tester.assertNoMoreOutput();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPaneTest.java
new file mode 100644
index 0000000000000..b45de42e57457
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPaneTest.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests the {@link DelayAfterFirstInPane}.
+ */
+@RunWith(JUnit4.class)
+public class DelayAfterFirstInPaneTest {
+  @Test
+  public void testDefaultTriggerWithFixedWindow() throws Exception {
+    Duration windowDuration = Duration.millis(10);
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.of(
+        FixedWindows.of(windowDuration),
+        new DelayAfterFirstInPane<IntervalWindow>(new SerializableFunction<Instant, Instant>() {
+          private static final long serialVersionUID = 1L;
+          @Override
+          public Instant apply(Instant input) {
+            return input.plus(Duration.millis(5));
+          }
+        }),
+        BufferingWindowSet.<String, Integer, IntervalWindow>factory(VarIntCoder.of()));
+
+    tester.advanceProcessingTime(new Instant(10));
+    tester.injectElement(1, new Instant(1)); // first in window [0, 10), timer set for 15
+    tester.advanceProcessingTime(new Instant(11));
+    tester.injectElement(2, new Instant(9));
+
+    tester.advanceProcessingTime(new Instant(12));
+    tester.assertNoMoreOutput();
+
+    tester.injectElement(3, new Instant(8));
+    tester.injectElement(4, new Instant(19)); // timer set for 17
+    tester.advanceProcessingTime(new Instant(13));
+    tester.injectElement(5, new Instant(30)); // timer set for 18
+
+    tester.advanceProcessingTime(new Instant(16));
+
+    tester.assertNextOutput(
+        Matchers.containsInAnyOrder(1, 2, 3),
+        new IntervalWindow(new Instant(0), windowDuration));
+    tester.assertNoMoreOutput();
+
+    tester.advanceProcessingTime(new Instant(19));
+    tester.assertNextOutput(Matchers.containsInAnyOrder(4),
+        new IntervalWindow(new Instant(10), windowDuration));
+    tester.assertNextOutput(Matchers.containsInAnyOrder(5),
+        new IntervalWindow(new Instant(30), windowDuration));
+    tester.assertNoMoreOutput();
+  }
+
+  @Test
+  public void testDefaultTriggerWithMergingWindowAlreadyFired() throws Exception {
+    Duration windowDuration = Duration.millis(10);
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.of(
+        Sessions.withGapDuration(windowDuration),
+        new DelayAfterFirstInPane<IntervalWindow>(new SerializableFunction<Instant, Instant>() {
+          private static final long serialVersionUID = 1L;
+          @Override
+          public Instant apply(Instant input) {
+            return input.plus(Duration.millis(5));
+          }
+        }),
+        BufferingWindowSet.<String, Integer, IntervalWindow>factory(VarIntCoder.of()));
+
+    tester.advanceProcessingTime(new Instant(10));
+    tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 15
+    tester.advanceProcessingTime(new Instant(16));
+    tester.assertNextOutput(
+        Matchers.containsInAnyOrder(1), new IntervalWindow(new Instant(1), new Instant(11)));
+    tester.assertNoMoreOutput();
+
+    // Because we discarded the previous window, we don't have it around to merge with.
+    tester.injectElement(2, new Instant(2)); // in [2, 12), timer for 21
+    tester.advanceProcessingTime(new Instant(100));
+
+    tester.assertNextOutput(
+        Matchers.containsInAnyOrder(2), new IntervalWindow(new Instant(2), new Instant(12)));
+    tester.assertNoMoreOutput();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 6d0e1fb8833fa..a6248759bc85d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -66,10 +66,10 @@ public class TriggerTester<VI, VO, W extends BoundedWindow> {
 
   private static final Logger LOGGER = Logger.getLogger(TriggerTester.class.getName());
 
-  private BatchTimerManager timerManager = new LoggingBatchTimerManager();
-
   private Instant watermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
   private Instant processingTime = BoundedWindow.TIMESTAMP_MIN_VALUE;
+  private BatchTimerManager timerManager = new LoggingBatchTimerManager(processingTime);
+
   private TriggerExecutor<String, VI, VO, W> triggerExecutor;
 
   private WindowFn<Object, W> windowFn;
@@ -108,7 +108,7 @@ private TriggerTester(
     this.windowFn = windowFn;
     this.stubContexts = stubContexts;
     this.triggerExecutor = new TriggerExecutor<>(
-        windowFn, timerManager, trigger, stubContexts, windowSet);
+        windowFn, timerManager, trigger, stubContexts, stubContexts, windowSet);
   }
 
   public void logInteractions(boolean logInteractions) {
@@ -137,7 +137,11 @@ public void assertNextOutput(Instant outputTimestamp, Matcher<? super VO> elemen
     assertNextOutput(outputTimestamp, element, null);
   }
 
-  public void assertNextOutput(Instant outputTimestamp, Matcher<? super VO> element,
+  public void assertNextOutput(Matcher<? super VO> element, BoundedWindow window) {
+    assertNextOutput(window.maxTimestamp(), element, window);
+  }
+
+  private void assertNextOutput(Instant outputTimestamp, Matcher<? super VO> element,
       BoundedWindow window) {
     assertThat(stubContexts.outputs.size(), Matchers.greaterThan(0));
     WindowedValue<KV<String, VO>> first = stubContexts.outputs.remove(0);
@@ -252,6 +256,11 @@ public <T> void writePCollectionViewData(TupleTag<?> tag, Iterable<WindowedValue
   }
 
   private class LoggingBatchTimerManager extends BatchTimerManager {
+
+    public LoggingBatchTimerManager(Instant processingTime) {
+      super(processingTime);
+    }
+
     @Override
     public void setTimer(String tag, Instant timestamp, TimeDomain domain) {
       logInteraction("Setting timer '%s' for time %d in domain %s",

From 4d21d403336ee66ca30ddf7e8b2b900935698038 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 31 Mar 2015 21:14:55 -0700
Subject: [PATCH 0344/1541] Move the TriggerTester to be parallel to
 DoFnTester.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90033511
---
 .../dataflow/sdk/util/TriggerTester.java      |  54 ++++-----
 .../cloud/dataflow/sdk/WindowMatchers.java    | 104 ++++++++++++++++++
 .../dataflow/sdk/util/DefaultTriggerTest.java |  40 +++----
 .../sdk/util/DelayAfterFirstInPaneTest.java   |  52 ++++-----
 4 files changed, 169 insertions(+), 81 deletions(-)
 rename sdk/src/{test => main}/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java (89%)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
similarity index 89%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index a6248759bc85d..6438740bf9bc1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -16,9 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
-
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
@@ -31,10 +28,10 @@
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Function;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.FluentIterable;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Maps;
 
-import org.hamcrest.Matcher;
-import org.hamcrest.Matchers;
 import org.joda.time.Instant;
 
 import java.io.IOException;
@@ -115,43 +112,38 @@ public void logInteractions(boolean logInteractions) {
     this.logInteractions = logInteractions;
   }
 
-  public void advanceWatermark(Instant newWatermark) throws Exception {
+  private Iterable<WindowedValue<VO>> extractOutput() {
+    ImmutableList<WindowedValue<VO>> result = FluentIterable.from(stubContexts.outputs)
+        .transform(new Function<WindowedValue<KV<String, VO>>, WindowedValue<VO>>() {
+          @Override
+          @Nullable
+          public WindowedValue<VO> apply(@Nullable WindowedValue<KV<String, VO>> input) {
+            return WindowedValue.of(
+                input.getValue().getValue(), input.getTimestamp(), input.getWindows());
+          }
+        })
+        .toList();
+    stubContexts.outputs.clear();
+    return result;
+  }
+
+  public Iterable<WindowedValue<VO>> advanceWatermark(Instant newWatermark) throws Exception {
     Preconditions.checkState(!newWatermark.isBefore(watermark));
     logInteraction("Advancing watermark to %d", newWatermark.getMillis());
     watermark = newWatermark;
     timerManager.advanceWatermark(triggerExecutor, newWatermark);
+
+    return extractOutput();
   }
 
-  public void advanceProcessingTime(Instant newProcessingTime) throws Exception {
+  public Iterable<WindowedValue<VO>> advanceProcessingTime(
+      Instant newProcessingTime) throws Exception {
     Preconditions.checkState(!newProcessingTime.isBefore(processingTime));
     logInteraction("Advancing processing time to %d", newProcessingTime.getMillis());
     processingTime = newProcessingTime;
     timerManager.advanceProcessingTime(triggerExecutor, newProcessingTime);
-  }
-
-  public void assertNoMoreOutput() {
-    assertThat(stubContexts.outputs, Matchers.empty());
-  }
 
-  public void assertNextOutput(Instant outputTimestamp, Matcher<? super VO> element) {
-    assertNextOutput(outputTimestamp, element, null);
-  }
-
-  public void assertNextOutput(Matcher<? super VO> element, BoundedWindow window) {
-    assertNextOutput(window.maxTimestamp(), element, window);
-  }
-
-  private void assertNextOutput(Instant outputTimestamp, Matcher<? super VO> element,
-      BoundedWindow window) {
-    assertThat(stubContexts.outputs.size(), Matchers.greaterThan(0));
-    WindowedValue<KV<String, VO>> first = stubContexts.outputs.remove(0);
-    assertEquals(outputTimestamp, first.getTimestamp());
-    assertEquals(first.getValue().getKey(), KEY);
-    assertThat(first.getValue().getValue(), element);
-
-    if (window != null) {
-      assertThat(first.getWindows(), Matchers.contains(window));
-    }
+    return extractOutput();
   }
 
   public void injectElement(VI value, Instant timestamp) throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java
new file mode 100644
index 0000000000000..fe6a67e105b29
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+
+import org.hamcrest.Description;
+import org.hamcrest.Matcher;
+import org.hamcrest.Matchers;
+import org.hamcrest.TypeSafeMatcher;
+import org.joda.time.Instant;
+
+import java.util.Collection;
+
+/**
+ * Matchers that are useful for working with Windowing, Timestamps, etc.
+ */
+public class WindowMatchers {
+
+  public static <T> Matcher<WindowedValue<? extends T>> isWindowedValue(
+      Matcher<? super T> valueMatcher, Matcher<? super Instant> timestampMatcher,
+      Matcher<? super Collection<? extends BoundedWindow>> windowsMatcher) {
+    return new WindowedValueMatcher<>(valueMatcher, timestampMatcher, windowsMatcher);
+  }
+
+  public static <T> Matcher<WindowedValue<? extends T>> isWindowedValue(
+      Matcher<? super T> valueMatcher, Matcher<? super Instant> timestampMatcher) {
+    return new WindowedValueMatcher<>(valueMatcher, timestampMatcher, Matchers.anything());
+  }
+
+  public static <T> Matcher<WindowedValue<? extends T>> isSingleWindowedValue(
+      T value, long windowStart, long windowEnd) {
+    return WindowMatchers.<T>isSingleWindowedValue(
+        Matchers.equalTo(value), windowStart, windowEnd);
+  }
+
+  public static <T> Matcher<WindowedValue<? extends T>> isSingleWindowedValue(
+      Matcher<T> valueMatcher, long windowStart, long windowEnd) {
+    IntervalWindow intervalWindow =
+        new IntervalWindow(new Instant(windowStart), new Instant(windowEnd));
+    return WindowMatchers.<T>isSingleWindowedValue(
+        valueMatcher,
+        Matchers.equalTo(intervalWindow.maxTimestamp()),
+        Matchers.<BoundedWindow>equalTo(intervalWindow));
+  }
+
+  public static <T> Matcher<WindowedValue<? extends T>> isSingleWindowedValue(
+      Matcher<? super T> valueMatcher, Matcher<? super Instant> timestampMatcher,
+      Matcher<? super BoundedWindow> windowMatcher) {
+    return new WindowedValueMatcher<T>(
+        valueMatcher, timestampMatcher, Matchers.contains(windowMatcher));
+  }
+
+
+  private WindowMatchers() {}
+
+  private static class WindowedValueMatcher<T> extends TypeSafeMatcher<WindowedValue<? extends T>> {
+
+    private Matcher<? super T> valueMatcher;
+    private Matcher<? super Instant> timestampMatcher;
+    private Matcher<? super Collection<? extends BoundedWindow>> windowsMatcher;
+
+    private WindowedValueMatcher(
+        Matcher<? super T> valueMatcher,
+        Matcher<? super Instant> timestampMatcher,
+        Matcher<? super Collection<? extends BoundedWindow>> windowsMatcher) {
+      this.valueMatcher = valueMatcher;
+      this.timestampMatcher = timestampMatcher;
+      this.windowsMatcher = windowsMatcher;
+    }
+
+    @Override
+    public void describeTo(Description description) {
+      description
+          .appendText("a WindowedValue(").appendValue(valueMatcher)
+          .appendText(", ").appendValue(timestampMatcher)
+          .appendText(", ").appendValue(windowsMatcher)
+          .appendText(")");
+    }
+
+    @Override
+    protected boolean matchesSafely(WindowedValue<? extends T> windowedValue) {
+      return valueMatcher.matches(windowedValue.getValue())
+          && timestampMatcher.matches(windowedValue.getTimestamp())
+          && windowsMatcher.matches(windowedValue.getWindows());
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java
index cbeaa1a861441..488ba59c6a2fa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java
@@ -16,6 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.junit.Assert.assertThat;
+
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
@@ -47,23 +50,21 @@ public void testDefaultTriggerWithFixedWindow() throws Exception {
     tester.injectElement(4, new Instant(19));
     tester.injectElement(5, new Instant(30));
 
-    // We're processing the data after it arrived.
-    tester.advanceProcessingTime(new Instant(500));
-    tester.assertNoMoreOutput();
+    assertThat(tester.advanceProcessingTime(new Instant(500)), Matchers.emptyIterable());
 
-    // Advance the watermark almost to the end of the first window.
-    tester.advanceWatermark(new Instant(8));
-    tester.assertNoMoreOutput();
+    //    // Advance the watermark almost to the end of the first window.
+    assertThat(tester.advanceWatermark(new Instant(8)), Matchers.emptyIterable());
 
-    // Advance the watermark to the end of the first window
-    tester.advanceWatermark(new Instant(9));
-    tester.assertNextOutput(new Instant(9), Matchers.containsInAnyOrder(1, 2));
-    tester.assertNoMoreOutput();
+    // Advance watermark to 9 (the exact end of the window), which causes the first fixed window to
+    // be emitted
+    assertThat(tester.advanceWatermark(new Instant(9)), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 0, 10)));
 
-    // Advance the watermark to the end
-    tester.advanceWatermark(new Instant(100));
-    tester.assertNextOutput(new Instant(19), Matchers.containsInAnyOrder(3, 4));
-    tester.assertNextOutput(new Instant(39), Matchers.contains(5));
+    // Advance watermark to 100, which causes the remaining two windows to be emitted.
+    // Since their timers were at different timestamps, they should fire in order.
+    assertThat(tester.advanceWatermark(new Instant(100)), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 10, 20),
+        isSingleWindowedValue(Matchers.contains(5), 30, 40)));
   }
 
   @Test
@@ -76,15 +77,14 @@ public void testDefaultTriggerWithSessionWindow() throws Exception {
     tester.injectElement(1, new Instant(1));
     tester.injectElement(2, new Instant(9));
 
-    tester.advanceWatermark(new Instant(10));
-    tester.assertNoMoreOutput(); // no output, because we merge 1 into the [9-19) session
+    // no output, because we merged into the [9-19) session
+    assertThat(tester.advanceWatermark(new Instant(10)), Matchers.emptyIterable());
 
     tester.injectElement(3, new Instant(15));
     tester.injectElement(4, new Instant(30));
 
-    // Advance the watermark to the end
-    tester.advanceWatermark(new Instant(100));
-    tester.assertNextOutput(new Instant(24), Matchers.containsInAnyOrder(1, 2, 3));
-    tester.assertNextOutput(new Instant(39), Matchers.contains(4));
+    assertThat(tester.advanceWatermark(new Instant(100)), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 25),
+        isSingleWindowedValue(Matchers.contains(4), 30, 40)));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPaneTest.java
index b45de42e57457..1750a1c9e2b3e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPaneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPaneTest.java
@@ -16,6 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
@@ -48,32 +51,24 @@ public Instant apply(Instant input) {
         }),
         BufferingWindowSet.<String, Integer, IntervalWindow>factory(VarIntCoder.of()));
 
-    tester.advanceProcessingTime(new Instant(10));
+    assertThat(tester.advanceProcessingTime(new Instant(10)), Matchers.emptyIterable());
+
     tester.injectElement(1, new Instant(1)); // first in window [0, 10), timer set for 15
-    tester.advanceProcessingTime(new Instant(11));
+    assertThat(tester.advanceProcessingTime(new Instant(11)), Matchers.emptyIterable());
     tester.injectElement(2, new Instant(9));
 
-    tester.advanceProcessingTime(new Instant(12));
-    tester.assertNoMoreOutput();
+    assertThat(tester.advanceProcessingTime(new Instant(12)), Matchers.emptyIterable());
 
     tester.injectElement(3, new Instant(8));
-    tester.injectElement(4, new Instant(19)); // timer set for 17
-    tester.advanceProcessingTime(new Instant(13));
-    tester.injectElement(5, new Instant(30)); // timer set for 18
-
-    tester.advanceProcessingTime(new Instant(16));
-
-    tester.assertNextOutput(
-        Matchers.containsInAnyOrder(1, 2, 3),
-        new IntervalWindow(new Instant(0), windowDuration));
-    tester.assertNoMoreOutput();
-
-    tester.advanceProcessingTime(new Instant(19));
-    tester.assertNextOutput(Matchers.containsInAnyOrder(4),
-        new IntervalWindow(new Instant(10), windowDuration));
-    tester.assertNextOutput(Matchers.containsInAnyOrder(5),
-        new IntervalWindow(new Instant(30), windowDuration));
-    tester.assertNoMoreOutput();
+    tester.injectElement(4, new Instant(19));
+    tester.injectElement(5, new Instant(30));
+
+    assertThat(tester.advanceProcessingTime(new Instant(16)), Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 0, 10)));
+
+    assertThat(tester.advanceProcessingTime(new Instant(19)), Matchers.containsInAnyOrder(
+        WindowMatchers.isSingleWindowedValue(Matchers.contains(4), 10, 20),
+        WindowMatchers.isSingleWindowedValue(Matchers.contains(5), 30, 40)));
   }
 
   @Test
@@ -90,19 +85,16 @@ public Instant apply(Instant input) {
         }),
         BufferingWindowSet.<String, Integer, IntervalWindow>factory(VarIntCoder.of()));
 
-    tester.advanceProcessingTime(new Instant(10));
+    assertThat(tester.advanceProcessingTime(new Instant(10)), Matchers.emptyIterable());
+
     tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 15
-    tester.advanceProcessingTime(new Instant(16));
-    tester.assertNextOutput(
-        Matchers.containsInAnyOrder(1), new IntervalWindow(new Instant(1), new Instant(11)));
-    tester.assertNoMoreOutput();
+    assertThat(tester.advanceProcessingTime(new Instant(16)), Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.contains(1), 1, 11)));
 
     // Because we discarded the previous window, we don't have it around to merge with.
     tester.injectElement(2, new Instant(2)); // in [2, 12), timer for 21
-    tester.advanceProcessingTime(new Instant(100));
 
-    tester.assertNextOutput(
-        Matchers.containsInAnyOrder(2), new IntervalWindow(new Instant(2), new Instant(12)));
-    tester.assertNoMoreOutput();
+    assertThat(tester.advanceProcessingTime(new Instant(100)), Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.contains(2), 2, 12)));
   }
 }

From 16b38aa4f26b325cc770fe8f7099f5f4837849b6 Mon Sep 17 00:00:00 2001
From: amancuso <amancuso@google.com>
Date: Tue, 31 Mar 2015 22:35:55 -0700
Subject: [PATCH 0345/1541] Created Dataflow Examples README.md to consolidate
 help for running batch and streaming pipelines. ----Release Notes---- []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=90037351

---
 README.md          |  49 +-------
 examples/README.md | 302 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 305 insertions(+), 46 deletions(-)
 create mode 100644 examples/README.md

diff --git a/README.md b/README.md
index 3b7869190e18d..59c4b877fbf8a 100644
--- a/README.md
+++ b/README.md
@@ -85,53 +85,10 @@ You can speed up the build and install process by using the following options:
 
         mvn -T 4 install
 
+## Running the Examples
+
 After building and installing, you can execute the `WordCount` and other example
-pipelines using the `DirectPipelineRunner` on your local machine:
-
-    mvn exec:java -pl examples \
-    -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
-    -Dexec.args="--input=<INPUT FILE PATTERN> --output=<OUTPUT FILE>"
-
-If you have been whitelisted for Alpha access to the Dataflow Service and
-followed the [developer setup](https://cloud.google.com/dataflow/java-sdk/getting-started#DeveloperSetup)
-steps, you can use the `BlockingDataflowPipelineRunner` to execute the
-`WordCount` example in the GCP. In this case, you specify your project name,
-pipeline runner, and the staging location in
-[Google Cloud Storage](https://cloud.google.com/storage/) (GCS), as follows:
-
-    mvn exec:java -pl examples \
-    -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
-    -Dexec.args="--project=<YOUR GCP PROJECT NAME> --stagingLocation=<YOUR GCS LOCATION> --runner=BlockingDataflowPipelineRunner"
-
-GCS location should be entered in the form of
-`gs://bucket/path/to/staging/directory`. GCP project refers to its name (not
-number), which has been whitelisted for Cloud Dataflow. Refer to
-[Google Cloud Platform](https://cloud.google.com/) for general instructions on
-getting started with GCP.
-
-Alternatively, you may choose to bundle all dependencies into a single JAR and
-execute it outside of the Maven environment. For example, after building and
-installing as usual, you can execute the following commands to create the
-bundled JAR of the `Examples` module and execute it both locally and in GCP:
-
-    mvn bundle:bundle -pl examples
-
-    java -cp examples/target/google-cloud-dataflow-java-examples-all-bundled-manual_build.jar \
-    com.google.cloud.dataflow.examples.WordCount \
-    --input=<INPUT FILE PATTERN> --output=<OUTPUT FILE>
-
-    java -cp examples/target/google-cloud-dataflow-java-examples-all-bundled-manual_build.jar \
-    com.google.cloud.dataflow.examples.WordCount \
-    --project=<YOUR GCP PROJECT NAME> --stagingLocation=<YOUR GCS LOCATION> --runner=BlockingDataflowPipelineRunner
-
-Other examples can be run similarly by replacing the `WordCount` class name with
-`BigQueryTornadoes`, `DatastoreWordCount`, `TfIdf`, `TopWikipediaSessions`, etc.
-and adjusting runtime options under the `Dexec.args` parameter, as specified in
-the example itself.
-
-Note that when running Maven on Microsoft Windows platform, backslashes (`\`)
-under the `Dexec.args` parameter should be escaped with another backslash. For
-example, input file pattern of `c:\*.txt` should be entered as `c:\\*.txt`.
+pipelines by following the instructions in this [README](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/README.md).
 
 ## Contact Us
 
diff --git a/examples/README.md b/examples/README.md
new file mode 100644
index 0000000000000..c848e33eb279a
--- /dev/null
+++ b/examples/README.md
@@ -0,0 +1,302 @@
+# Google Cloud Dataflow SDK for Java Examples
+
+The examples included in this module serve to demonstrate the basic
+functionality of Google Cloud Dataflow, and act as starting points for
+the development of more complex pipelines.
+
+A good starting point for new users is our [`WordCount`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java)
+example, which runs over the provided input text file(s) and computes how many
+times each word occurs in the input.
+
+Besides WordCount, the following examples are included:
+
+ <ul>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java">AutoComplete</a>
+  &mdash;An example that computes the most popular hash tags for a for every
+  prefix, which can be used for auto-completion. Demonstrates how to use the
+  same pipeline in both streaming and batch, combiners, and composite
+  transforms.</li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java">BigQueryTornadoes</a>
+  &mdash;An example that reads the public samples of weather data from Google
+  BigQuery, counts the number of tornadoes that occur in each month, and
+  writes the results to BigQuery. Demonstrates reading/writing BigQuery,
+  counting a <code>PCollection</code>, and user-defined <code>PTransforms</code>.</li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java">CombinePerKeyExamples</a>
+  &mdash;An example that reads the public &quot;Shakespeare&quot; data, and for
+  each word in the dataset that exceeds a given length, generates a string
+  containing the list of play names in which that word appears. Output is saved
+  in a Google BigQuery table. Demonstrates the <code>Combine.perKey</code>
+  transform, which lets you combine the values in a key-grouped
+  <code>PCollection</code>; also  how to use an <code>Aggregator</code> to track
+  information in the Google Developers Console.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java">DatastoreWordCount</a>
+  &mdash;An example that shows you how to use Google Cloud Datastore IO to read
+  from Cloud Datastore.</li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java">DeDupExample</a>
+  &mdash;An example that uses Shakespeare's plays as plain text files, and
+  removes duplicate lines across all the files. Demonstrates the
+  <code>RemoveDuplicates</code>, <code>TextIO.Read</code>,
+  <code>RemoveDuplicates</code>, and <code>TextIO.Write</code> transforms, and
+  how to wire transforms together.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java">FilterExamples</a>
+  &mdash;An example that shows different approaches to filtering, including
+  selection and projection. It also shows how to dynamically set parameters
+  by defining and using new pipeline options, and use how to use a value derived
+  by a pipeline. Demonstrates the <code>Mean</code> transform,
+  <code>Options</code> configuration, and using pipeline-derived data as a side
+  input.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java">JoinExamples</a>
+  &mdash;An example that shows how to do a join on two collections. It uses a
+  sample of the <a href="http://goo.gl/OB6oin">GDELT &quot;world event&quot;
+  data</a>, joining the event <code>action</code> country code against a table
+  that maps country codes to country names. Demonstrated the <code>Join</code>
+  operation, and using multiple input sources.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java">MaxPerKeyExamples</a>&mdash;An example that reads the public samples of weather data from BigQuery,
+  and finds the maximum temperature (<code>mean_temp</code>) for each month.
+  Demonstates the <code>Max</code> statistical combination transform, and how to
+  find the max-per-key group.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java">PubsubFileInjector</a>&mdash;A batch Cloud Dataflow pipeline for injecting a set of Cloud Storage
+  files into a Google Cloud Pub/Sub topic, line by line. This example can be
+  useful for testing streaming pipelines.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java">StreamingWordExtract</a>&mdash;An streaming pipeline example that inputs lines of text from a Cloud
+  Pub/Sub topic, splits each line into individual words, capitalizes those
+  words, and writes the output to a BigQuery table.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java">TfIdf</a>
+  &mdash;An example that computes a basic TF-IDF search table for a directory or
+  Cloud Storage prefix. Demonstrates joining data, side inputs, and logging.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java">TopWikipediaSessions</a>
+  &mdash;An example that reads Wikipedia edit data from Cloud Storage and
+  computes the user with the longest string of edits separated by no more than
+  an hour within each month. Demonstrates using Cloud Dataflow
+  <code>Windowing</code> to perform time-based aggregations of data.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingMaxLaneFlow.java">TrafficStreamingMaxLaneFlow</a>
+  &mdash;A streaming Cloud Dataflow example using BigQuery output in the
+  <code>traffic sensor</code> domain. Demonstrates the Cloud Dataflow streaming
+  runner, sliding windows, Cloud Pub/Sub topic ingestion, the use of the
+  <code>AvroCoder</code> to encode a custom class, and custom
+  <code>Combine</code> transforms.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingRoutes.java">TrafficStreamingRoutes</a>
+  &mdash;A streaming Cloud Dataflow example using BigQuery output in the
+  <code>traffic sensor</code> domain. Demonstrates the Cloud Dataflow streaming
+  runner, <code>GroupByKey</code>, keyed state, sliding windows, and Cloud
+  Pub/Sub topic ingestion.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java">WindowingWordCount</a>
+  &mdash;An example that applies windowing to &quot;Shakespeare&quot; data in a
+  wordcount pipeline.
+  </li>
+  </ul>
+
+## How to Run the Examples
+
+After building and installing the Cloud Dataflow `SDK` and `Examples` modules as
+explained in this [README](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/README.md),
+you can execute the `WordCount` and other example pipelines using the
+`DirectPipelineRunner` on your local machine:
+
+    mvn exec:java -pl examples \
+    -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
+    -Dexec.args="--input=<INPUT FILE PATTERN> --output=<OUTPUT FILE>"
+
+If you have been whitelisted for Alpha access to the Cloud Dataflow Service and
+followed the [developer setup](https://cloud.google.com/dataflow/java-sdk/getting-started#DeveloperSetup)
+steps, you can use the `BlockingDataflowPipelineRunner` to execute the
+`WordCount` example in the Google Cloud Platform. In this case, you specify your
+project name, pipeline runner, and the staging location in
+[Google Cloud Storage](https://cloud.google.com/storage/), as follows:
+
+    mvn exec:java -pl examples \
+    -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
+    -Dexec.args="--project=<YOUR CLOUD PLATFORM PROJECT NAME> \
+    --stagingLocation=<YOUR CLOUD STORAGE LOCATION> \
+    --runner=BlockingDataflowPipelineRunner"
+
+Your Cloud Storage location should be entered in the form of
+`gs://bucket/path/to/staging/directory`. The Cloud Platform project refers to
+its name (not number), which has been whitelisted for Cloud Dataflow. Refer to
+[Google Cloud Platform](https://cloud.google.com/) for general instructions on
+getting started with Cloud Platform.
+
+Alternatively, you may choose to bundle all dependencies into a single JAR and
+execute it outside of the Maven environment. For example, after building and
+installing as usual, you can execute the following commands to create the
+bundled JAR of the `Examples` module and execute it both locally and in Cloud
+Platform:
+
+    mvn bundle:bundle -pl examples
+
+    java -cp examples/target/google-cloud-dataflow-java-examples-all-bundled-manual_build.jar \
+    com.google.cloud.dataflow.examples.WordCount \
+    --input=<INPUT FILE PATTERN> --output=<OUTPUT FILE>
+
+    java -cp examples/target/google-cloud-dataflow-java-examples-all-bundled-manual_build.jar \
+    com.google.cloud.dataflow.examples.WordCount \
+    --project=<YOUR CLOUD PLATFORM PROJECT NAME> \
+    --stagingLocation=<YOUR CLOUD STORAGE LOCATION> \
+    --runner=BlockingDataflowPipelineRunner
+
+Other examples can be run similarly by replacing the `WordCount` class name with
+`BigQueryTornadoes`, `DatastoreWordCount`, `TfIdf`, `TopWikipediaSessions`, etc.
+and adjusting runtime options under the `Dexec.args` parameter, as specified in
+the example itself. If you are running the streaming pipeline examples, see the
+additional setup instruction, below.
+
+Note that when running Maven on Microsoft Windows platform, backslashes (`\`)
+under the `Dexec.args` parameter should be escaped with another backslash. For
+example, input file pattern of `c:\*.txt` should be entered as `c:\\*.txt`.
+
+<p class="note"><b>Note:</b> We are working on improving the experience around
+running some of our streaming examples. Please stay tuned for much easier
+instructions in the near future!</p>
+
+## How to Set Up the "Traffic Sensor" Streaming Pipeline Examples
+
+Two of the streaming Cloud Dataflow pipeline examples,
+`TrafficStreamingMaxLaneFlow` and `TrafficStreamingRoutes`, require the
+publication of *traffic sensor* data to a [Google Cloud Pub/Sub](https://cloud.google.com/pubsub/docs)
+topic. This publication is accomplished via a Python script, which reads from
+traffic-sensor data file(s) and publishes that data to a Cloud Pub/Sub topic.
+The following subsections explain how to set up and use the script to input
+streaming traffic sensor data to one of the example Cloud Dataflow streaming
+pipelines.
+
+### Set Up a Cloud Pub/Sub Topic
+
+An easy way to set up a Cloud Pub/Sub topic is via the web UI. Navigate to the
+*Try It!* section of the [Cloud Pub/Sub API explorer page](https://cloud.google.com/pubsub/reference/rest/v1beta2/projects/topics/create).
+Turn on **Authorize requests using OAuth2**, then click **Authorize**
+(accept the default selected scopes). Next, click inside the *Request body* box,
+select *name*, then type your topic name. The topic name should be of the
+format: `/projects/<project-name>/topics/<topic-name>`. Make sure that you
+create it in the same project for which Cloud Dataflow has been whitelisted.
+Click **Execute** to create the topic.
+
+### Set Up a BigQuery Dataset
+
+If necessary, create a BigQuery dataset to write your pipeline output to.
+You can do this with the [BigQuery UI](https://bigquery.cloud.google.com/) or
+the `bq` command-line tool, which is installed as part of the Google Cloud SDK:
+
+    bq mk <project-name>:<dataset-name>
+
+It's easiest to create the dataset in the same project for which Cloud Dataflow
+has been whitelisted. If you create the BigQuery table in a different project,
+you will need to give your pipeline project access to this BigQuery table
+(see the [BigTable Access Control](https://cloud.google.com/bigquery/access-control)
+for more information).
+
+### Set Up the "Traffic Sensor" Data Generator Script
+
+This Python script reads traffic sensor data from a file and publishes that data
+to Cloud Pub/Sub. The `traffic_pubsub_generator.py`, is located [here](https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher).
+You can either clone its repo (`git clone https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python.git`),
+or download a zip of the repo ([click here to download the zip](https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/archive/master.zip)).
+
+The script uses the Google APIs Client Library for Python. You can install this
+library via:
+
+    pip install --upgrade google-api-python-client
+
+It also uses the Python dateutil package, which you can install via:
+
+    pip install python-dateutil
+
+The script must authenticate to access Cloud Pub/Sub. See [this page](https://developers.google.com/accounts/docs/application-default-credentials)
+for more information about the `GoogleCredentials` library used by the script.
+As described in that doc, you can:
+
+ 1. run the script locally by downloading a credentials file, then pointing an
+ environment variable to that file, or
+
+ 2. run the script on a Google Compute Engine instance from the same project. If
+ you run the script on a Compute Engine instance, this instance must be created
+ with *Cloud Platform Project Access* enabled. Click **Show advanced options**
+ when creating the Compute Instance image to display and enable this setting.
+
+The script reads a pre-written traffic sensor data file. You can either download
+the complete (~2GB) file:
+
+    curl -O \
+    http://storage.googleapis.com/aju-sd-traffic/unzipped/Freeways-5Minaa2010-01-01_to_2010-02-15.csv
+
+or, a smaller test file:
+
+    http://storage.googleapis.com/aju-sd-traffic/unzipped/Freeways-5Minaa2010-01-01_to_2010-02-15_test2.csv
+
+These files contain real traffic sensor data from San Diego freeways. See
+<a href="http://storage.googleapis.com/aju-sd-traffic/freeway_detector_config/Freeways-Metadata-2010_01_01/copyright(san%20diego).txt">this file</a>
+for copyright information.
+
+
+### Start a TrafficStreaming Pipeline
+
+Before running the traffic generator script, start one of the traffic sensor
+streaming pipelines ([`TrafficStreamingRoutes.java`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingRoutes.java)
+or [`TrafficStreamingMaxLaneFlow.java`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingMaxLaneFlow.java)).
+
+ * See the Dataflow SDK for general instructions on compiling and running the
+ examples.
+
+ * See the comments in the source files and the <i>Example Use Cases&mdash;Streaming
+ Pipeline Example</i> section in [Google Cloud Dataflow Documentation&mdash;Designing
+ Your Pipeline](https://cloud.google.com/dataflow/pipelines/designing-your-pipeline)
+ for more information on what these pipelines do and the options they take. To
+ recap:
+
+   * You use the `--inputTopic` pipeline option to specify your Cloud Pub/Sub
+   topic name:
+
+        `--inputTopic=/topics/<project-name>/<topic-name>`
+
+    Note that this format is different from the format you used when you
+    established your Cloud Pub/Sub topic name (and which you will pass to your
+    pipeline). The reason for the difference is that the script depends on an
+    earlier version of Pub/Sub, which uses a different topic name
+    syntax than the latest Cloud Pub/Sub version.
+
+   * You specify the BigQuery dataset and table that you pipeline will write
+   output to with the `--dataset` and `--table pipeline` options:
+
+        `--dataset=<dataset-name> --table=<table-name>`
+
+### Run the "Traffic Sensor" Data Generator Script
+
+After starting a traffic streaming pipeline, run the [`traffic_pubsub_generator.py`](https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher)
+script. You can run the script with or without the `--replay` option; the
+`--replay` flag will simulate pauses in the data stream that correspond to the
+pauses in the original data, which was sampled every 5 minutes. Note that each
+of the streaming traffic sensor pipelines makes different assumptions about the
+use of this flag, so read the comments to those pipelines before deciding
+whether to use this flag when you run the script.
+
+    python traffic_pubsub_generator.py \
+    --filename <your-datafile.csv> \
+    --topic projects/<project-name>/topics/<topic-name> --replay
+
+Note that instead of using the `--topic` flag, you can set your topic name as
+the default topic directly in the script:
+
+    TOPIC = 'projects/your-project/topics/your-topic'  # default; set to your
+    topic
+
+To restrict the generator to N lines, use the `num_lines` flag:
+
+    python traffic_pubsub_generator.py --filename <your-datafile.csv> \
+    --replay --num_lines 10
+
+To alter the data timestamps to start from script time, add the `--current`
+flag.
+
+For more information, see the script doc string by running:
+
+    python traffic_pubsub_generator.py -h

From 701ab44250c1997076db4879ca01d8da3b9831cf Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 1 Apr 2015 00:27:57 -0700
Subject: [PATCH 0346/1541] Remove the element type parameter (T) parameter
 from Trigger.

It will never (usefully) be anything other Object, so it was just extra
type parameters to carry around.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90041966
---
 .../cloud/dataflow/sdk/util/DefaultTrigger.java  |  2 +-
 .../dataflow/sdk/util/DelayAfterFirstInPane.java |  2 +-
 .../google/cloud/dataflow/sdk/util/Trigger.java  | 16 +++++++++-------
 .../cloud/dataflow/sdk/util/TriggerExecutor.java |  4 ++--
 .../cloud/dataflow/sdk/util/TriggerTester.java   | 14 +++++---------
 5 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
index 25d76c1c6f884..7e4578fce61a2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
@@ -25,7 +25,7 @@
  *
  * @param <W> The type of windows being triggered/encoded.
  */
-public class DefaultTrigger<W extends BoundedWindow> implements Trigger<Object, W>{
+public class DefaultTrigger<W extends BoundedWindow> implements Trigger<W>{
 
   @Override
   public void onElement(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java
index 23feb1173590d..905b87ec396d9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java
@@ -30,7 +30,7 @@
  *
  * @param <W> The type of windows being triggered/encoded.
  */
-public class DelayAfterFirstInPane<W extends BoundedWindow> implements Trigger<Object, W> {
+public class DelayAfterFirstInPane<W extends BoundedWindow> implements Trigger<W> {
 
   private static final Instant ALREADY_FIRED = BoundedWindow.TIMESTAMP_MAX_VALUE;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
index 6faf252567f2f..81ff40d30e1c3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
@@ -26,12 +26,11 @@
 /**
  * Interface to use for controlling when output for a specific key and window is triggered.
  *
- * TODO: Generalize this after extracting the current default trigger.
+ * <p> This functionality is experimental and likely to change.
  *
- * @param <T> the element type that this trigger applies to
  * @param <W> the window that this trigger applies to
  */
-public interface Trigger<T, W extends BoundedWindow> {
+public interface Trigger<W extends BoundedWindow> {
 
   /**
    * Types of timers that are supported.
@@ -54,9 +53,12 @@ public enum TimeDomain {
    * Status of the element in the window.
    */
   public enum WindowStatus {
-    NEW,       // This element caused us to start actively managing the given window
-    EXISTING,  // This window was already under active management
-    UNKNOWN;   // The WindowSet doesn't track the windows actively being managed
+    /** This element caused us to start actively managing the given window. */
+    NEW,
+    /** This window was already under active management before the arrival of this element. */
+    EXISTING,
+    /** The WindowSet doesn't track the windows actively being managed. */
+    UNKNOWN;
   }
 
   /**
@@ -120,7 +122,7 @@ public interface TriggerContext<W extends BoundedWindow>  {
    * @param value the element that was incorporated
    * @param window the window the element was assigned to
    */
-  void onElement(TriggerContext<W> c, T value, W window, WindowStatus status) throws Exception;
+  void onElement(TriggerContext<W> c, Object value, W window, WindowStatus status) throws Exception;
 
   /**
    * Called immediately after windows have been merged.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 20a927f339fec..9e6b3dd9024c9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -46,7 +46,7 @@
 public class TriggerExecutor<K, VI, VO, W extends BoundedWindow> implements TriggerContext<W> {
 
   private final WindowFn<Object, W> windowFn;
-  private final Trigger<Object, W> trigger;
+  private final Trigger<W> trigger;
   private final WindowingInternals<?, KV<K, VO>> windowingInternals;
   private final AbstractWindowSet<K, VI, VO, W> windowSet;
   private final TimerManager timerManager;
@@ -79,7 +79,7 @@ public interface TimerManager {
   public TriggerExecutor(
       WindowFn<Object, W> windowFn,
       TimerManager timerManager,
-      Trigger<Object, W> trigger,
+      Trigger<W> trigger,
       KeyedState keyedState,
       WindowingInternals<?, KV<K, VO>> windowingInternals,
       AbstractWindowSet<K, VI, VO, W> windowSet) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 6438740bf9bc1..6853d1ea824b8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -84,26 +84,22 @@ private void logInteraction(String fmt, Object... args) {
 
   public static <VI, VO, W extends BoundedWindow> TriggerTester<VI, VO, W> of(
       WindowFn<?, W> windowFn,
-      Trigger<?, W> trigger,
+      Trigger<W> trigger,
       AbstractWindowSet.Factory<String, VI, VO, W> windowSetFactory) throws Exception {
     @SuppressWarnings("unchecked")
     WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
-    @SuppressWarnings("unchecked")
-    Trigger<Object, W> objectTrigger = (Trigger<Object, W>) trigger;
 
-    return new TriggerTester<VI, VO, W>(objectWindowFn, objectTrigger, windowSetFactory);
+    return new TriggerTester<VI, VO, W>(objectWindowFn, trigger, windowSetFactory);
   }
 
   private TriggerTester(
       WindowFn<Object, W> windowFn,
-      Trigger<Object, W> trigger,
+      Trigger<W> trigger,
       AbstractWindowSet.Factory<String, VI, VO, W> windowSetFactory) throws Exception {
-    StubContexts stubContexts = new StubContexts();
+    this.windowFn = windowFn;
+    this.stubContexts = new StubContexts();
     AbstractWindowSet<String, VI, VO, W> windowSet = windowSetFactory.create(
         KEY, windowFn.windowCoder(), stubContexts, stubContexts);
-
-    this.windowFn = windowFn;
-    this.stubContexts = stubContexts;
     this.triggerExecutor = new TriggerExecutor<>(
         windowFn, timerManager, trigger, stubContexts, stubContexts, windowSet);
   }

From 44d747995f4726a3d6d8ae595a2dfb470c18ceef Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 1 Apr 2015 09:52:36 -0700
Subject: [PATCH 0347/1541] Add getState method to PipelineResult

This method provides an object that can be queried for the name of the
state, and if the state is terminal.

Update Dataflow and Direct runners.

----Release Notes----
Add a getState method to PipelineResult to obtain the state of a running
or completed pipeline.

Backwards Incompatible: Move MonitoringUtil.JobState to PipelineResult.State

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90073199
---
 .../cloud/dataflow/sdk/PipelineResult.java    | 41 ++++++++++-
 .../BlockingDataflowPipelineRunner.java       | 20 +++---
 .../sdk/runners/DataflowPipelineJob.java      | 42 ++++++------
 .../sdk/runners/DirectPipelineRunner.java     | 10 +++
 .../testing/TestDataflowPipelineRunner.java   |  7 +-
 .../dataflow/sdk/util/MonitoringUtil.java     | 67 ++++++------------
 .../BlockingDataflowPipelineRunnerTest.java   | 11 +--
 .../sdk/runners/DataflowPipelineJobTest.java  | 68 ++++++++++++++++---
 .../dataflow/sdk/util/MonitoringUtilTest.java | 28 ++++++++
 9 files changed, 201 insertions(+), 93 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java
index 7062b29a86142..c52c8512066ff 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java
@@ -21,7 +21,46 @@
  */
 public interface PipelineResult {
 
-  // TODO: method to ask if pipeline is running / finished.
+  /**
+   * Retrieves the current state of the pipeline execution.
+   *
+   * @return the {@link State} representing the state of this pipeline.
+   */
+  State getState();
   // TODO: method to retrieve error messages.
 
+
+  /** Named constants for common values for the job state. */
+  public enum State {
+    /** The job state could not be obtained or was not specified. */
+    UNKNOWN(false),
+    /** The job has been paused, or has not yet started. */
+    STOPPED(false),
+    /** The job is currently running. */
+    RUNNING(false),
+    /** The job has successfully completed. */
+    DONE(true),
+    /** The job has failed. */
+    FAILED(true),
+    /** The job has been explicitly cancelled. */
+    CANCELLED(true);
+
+    private final boolean terminal;
+
+    private State(boolean terminal) {
+      this.terminal = terminal;
+    }
+
+    /**
+     * Returns if the job state can no longer complete work.
+     *
+     * @return if this JobState represents a terminal state.
+     */
+    public final boolean isTerminal() {
+      return terminal;
+    }
+
+  }
+
+
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
index 1606a7ea03c9e..d06f65dac1cb5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
@@ -18,12 +18,12 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.PipelineResult.State;
 import com.google.cloud.dataflow.sdk.options.BlockingDataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
-import com.google.cloud.dataflow.sdk.util.MonitoringUtil.JobState;
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.cloud.dataflow.sdk.values.POutput;
 
@@ -45,20 +45,21 @@
  * fails or cannot be monitored.
  */
 public class BlockingDataflowPipelineRunner extends
-    PipelineRunner<BlockingDataflowPipelineRunner.PipelineJobState> {
+    PipelineRunner<BlockingDataflowPipelineRunner.DataflowPipelineJobState> {
   private static final Logger LOG = LoggerFactory.getLogger(BlockingDataflowPipelineRunner.class);
 
   /**
    * Holds the status of a run request.
    */
-  public static class PipelineJobState implements PipelineResult {
-    private final JobState state;
+  public static class DataflowPipelineJobState implements PipelineResult {
+    private final State state;
 
-    public PipelineJobState(JobState state) {
+    public DataflowPipelineJobState(State state) {
       this.state = state;
     }
 
-    public JobState getJobState() {
+    @Override
+    public State getState() {
       return state;
     }
   }
@@ -92,10 +93,11 @@ public static BlockingDataflowPipelineRunner fromOptions(
   }
 
   @Override
-  public PipelineJobState run(Pipeline p) {
+  public DataflowPipelineJobState run(Pipeline p) {
     DataflowPipelineJob job = dataflowPipelineRunner.run(p);
 
-    @Nullable JobState result;
+    @Nullable
+    State result;
     try {
       result = job.waitToFinish(
           BUILTIN_JOB_TIMEOUT_SEC, TimeUnit.SECONDS, jobMessagesHandler);
@@ -110,7 +112,7 @@ public PipelineJobState run(Pipeline p) {
 
     LOG.info("Job finished with status {}", result);
     if (result.isTerminal()) {
-      return new PipelineJobState(result);
+      return new DataflowPipelineJobState(result);
     }
 
     // TODO: introduce an exception which can wrap a JobState,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
index c989c6f02b035..e8461eb26a14a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
@@ -23,7 +23,6 @@
 import com.google.api.services.dataflow.model.JobMessage;
 import com.google.cloud.dataflow.sdk.PipelineResult;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
-import com.google.cloud.dataflow.sdk.util.MonitoringUtil.JobState;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -99,7 +98,7 @@ public Dataflow getDataflowClient() {
    * @throws InterruptedException
    */
   @Nullable
-  public JobState waitToFinish(
+  public State waitToFinish(
       long timeToWait,
       TimeUnit timeUnit,
       MonitoringUtil.JobMessagesHandler messageHandler)
@@ -116,21 +115,10 @@ public JobState waitToFinish(
 
     long lastTimestamp = 0;
     int errorGettingMessages = 0;
-    int errorGettingJobStatus = 0;
     while (true) {
       // Get the state of the job before listing messages. This ensures we always fetch job
       // messages after the job finishes to ensure we have all them.
-      Job job = null;
-      try {
-        job = dataflowClient.v1b3().projects().jobs().get(project, jobId).execute();
-      } catch (GoogleJsonResponseException | SocketTimeoutException e) {
-        if (++errorGettingJobStatus > 5) {
-          // We want to continue to wait for the job to finish so
-          // we ignore this error, but warn occasionally if it keeps happening.
-          LOG.warn("There were problems getting job status: ", e);
-          errorGettingJobStatus = 0;
-        }
-      }
+      State state = getState();
 
       if (messageHandler != null) {
         // Process all the job messages that have accumulated so far.
@@ -154,11 +142,8 @@ public JobState waitToFinish(
       }
 
       // Check if the job is done.
-      if (job != null) {
-        JobState state = JobState.toState(job.getCurrentState());
-        if (state.isTerminal()) {
-          return state;
-        }
+      if (state.isTerminal()) {
+        return state;
       }
 
       if (System.currentTimeMillis() >= endTime) {
@@ -172,4 +157,23 @@ public JobState waitToFinish(
       TimeUnit.MILLISECONDS.sleep(sleepTime);
     }
   }
+
+  @Override
+  public State getState() {
+    Job job = null;
+    for (int retryAttempts = 5; retryAttempts > 0; retryAttempts--) {
+      try {
+        job = dataflowClient
+            .v1b3()
+            .projects()
+            .jobs()
+            .get(project, jobId)
+            .execute();
+        return MonitoringUtil.toState(job.getCurrentState());
+      } catch (IOException e) {
+        LOG.warn("There were problems getting current job status: ", e);
+      }
+    }
+    return State.UNKNOWN;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 631797381e7a7..764340574efc0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -847,6 +847,16 @@ public String getStepName(PTransform<?, ?> transform) {
     public CounterSet getCounters() {
       return counters;
     }
+
+    /**
+     * Returns JobState.DONE in all situations. The Evaluator is not returned
+     * until the pipeline has been traversed, so it will either be returned
+     * after a successful run or the run call will terminate abnormally.
+     */
+    @Override
+    public State getState() {
+      return State.DONE;
+    }
   }
 
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
index 62bdf9aa36ac1..9b15bc6105ad9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.testing;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult.State;
 import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
@@ -35,9 +36,9 @@ public class TestDataflowPipelineRunner extends BlockingDataflowPipelineRunner {
   }
 
   @Override
-  public PipelineJobState run(Pipeline pipeline) {
-    PipelineJobState state = super.run(pipeline);
-    if (state.getJobState() != MonitoringUtil.JobState.DONE) {
+  public DataflowPipelineJobState run(Pipeline pipeline) {
+    DataflowPipelineJobState state = super.run(pipeline);
+    if (state.getState() != State.DONE) {
       throw new AssertionError("The dataflow failed.");
     }
     return state;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
index 677c9ee738ac3..9c27aa07654ad 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
@@ -21,6 +21,9 @@
 import com.google.api.services.dataflow.Dataflow.V1b3.Projects.Jobs.Messages;
 import com.google.api.services.dataflow.model.JobMessage;
 import com.google.api.services.dataflow.model.ListJobMessagesResponse;
+import com.google.cloud.dataflow.sdk.PipelineResult.State;
+import com.google.common.base.MoreObjects;
+import com.google.common.collect.ImmutableMap;
 
 import org.joda.time.Instant;
 
@@ -31,7 +34,6 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
@@ -41,54 +43,20 @@
  * A helper class for monitoring jobs submitted to the service.
  */
 public final class MonitoringUtil {
+  private static final Map<String, State> DATAFLOW_STATE_TO_JOB_STATE =
+      ImmutableMap
+          .<String, State>builder()
+          .put("JOB_STATE_UNKNOWN", State.UNKNOWN)
+          .put("JOB_STATE_STOPPED", State.STOPPED)
+          .put("JOB_STATE_RUNNING", State.RUNNING)
+          .put("JOB_STATE_DONE", State.DONE)
+          .put("JOB_STATE_FAILED", State.FAILED)
+          .put("JOB_STATE_CANCELLED", State.CANCELLED)
+          .build();
+
   private String projectId;
   private Messages messagesClient;
 
-  /** Named constants for common values for the job state. */
-  public static enum JobState {
-    UNKNOWN  ("JOB_STATE_UNKNOWN",   false),
-    STOPPED  ("JOB_STATE_STOPPED",   false),
-    RUNNING  ("JOB_STATE_RUNNING",   false),
-    DONE     ("JOB_STATE_DONE",      true),
-    FAILED   ("JOB_STATE_FAILED",    true),
-    CANCELLED("JOB_STATE_CANCELLED", true);
-
-    private final String stateName;
-    private final boolean terminal;
-
-    private JobState(String stateName, boolean terminal) {
-      this.stateName = stateName;
-      this.terminal = terminal;
-    }
-
-    public final String getStateName() {
-      return stateName;
-    }
-
-    public final boolean isTerminal() {
-      return terminal;
-    }
-
-    private static final Map<String, JobState> statesByName =
-        Collections.unmodifiableMap(buildStatesByName());
-
-    private static Map<String, JobState> buildStatesByName() {
-      Map<String, JobState> result = new HashMap<>();
-      for (JobState state : JobState.values()) {
-        result.put(state.getStateName(), state);
-      }
-      return result;
-    }
-
-    public static JobState toState(String stateName) {
-      @Nullable JobState state = statesByName.get(stateName);
-      if (state == null) {
-        state = UNKNOWN;
-      }
-      return state;
-    }
-  }
-
   /**
    * An interface which can be used for defining callbacks to receive a list
    * of JobMessages containing monitoring information.
@@ -227,4 +195,11 @@ public static String getJobMonitoringPageURL(String projectName, String jobId) {
       throw new AssertionError("UTF-8 encoding is not supported by the environment", e);
     }
   }
+
+  public static State toState(String stateName) {
+    return MoreObjects.firstNonNull(DATAFLOW_STATE_TO_JOB_STATE.get(stateName),
+        State.UNKNOWN);
+  }
+
 }
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
index 7137da76062ff..f9cb52205302f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
@@ -24,9 +24,9 @@
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult.State;
 import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
-import com.google.cloud.dataflow.sdk.util.MonitoringUtil.JobState;
 
 import org.junit.Rule;
 import org.junit.Test;
@@ -49,16 +49,18 @@ public class BlockingDataflowPipelineRunnerTest {
   // This class mocks a call to DataflowPipelineJob.waitToFinish():
   //    it blocks the thread to simulate waiting,
   //    and releases the blocking once signaled
-  static class MockWaitToFinish implements Answer<JobState> {
+  static class MockWaitToFinish implements Answer<State> {
     NotificationHelper jobCompleted = new NotificationHelper();
 
-    public JobState answer(InvocationOnMock invocation) throws InterruptedException {
+    @Override
+    public State answer(
+        InvocationOnMock invocation) throws InterruptedException {
       System.out.println("MockWaitToFinish.answer(): Wait for signaling job completion.");
       assertTrue("Test did not receive mock job completion signal",
           jobCompleted.waitTillSet(10000));
 
       System.out.println("MockWaitToFinish.answer(): job completed.");
-      return JobState.DONE;
+      return State.DONE;
     }
 
     public void signalJobComplete() {
@@ -114,6 +116,7 @@ public void testJobWaitComplete() throws IOException, InterruptedException {
     final NotificationHelper jobCompleted = new NotificationHelper();
 
     new Thread() {
+      @Override
       public void run() {
         executionStarted.set();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java
index d32e12d1315df..aaca229cd3f90 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java
@@ -22,11 +22,14 @@
 
 import com.google.api.services.dataflow.Dataflow;
 import com.google.api.services.dataflow.model.Job;
-import com.google.cloud.dataflow.sdk.util.MonitoringUtil.JobState;
+import com.google.cloud.dataflow.sdk.PipelineResult.State;
 
+import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
 
 import java.io.IOException;
 import java.util.concurrent.TimeUnit;
@@ -39,20 +42,31 @@ public class DataflowPipelineJobTest {
   private static final String PROJECT_ID = "someProject";
   private static final String JOB_ID = "1234";
 
+  @Mock
+  private Dataflow mockWorkflowClient;
+  @Mock
+  private Dataflow.V1b3 mockV1b3;
+  @Mock
+  private Dataflow.V1b3.Projects mockProjects;
+  @Mock
+  private Dataflow.V1b3.Projects.Jobs mockJobs;
+
+  @Before
+  public void setup() {
+    MockitoAnnotations.initMocks(this);
+
+    when(mockWorkflowClient.v1b3()).thenReturn(mockV1b3);
+    when(mockV1b3.projects()).thenReturn(mockProjects);
+    when(mockProjects.jobs()).thenReturn(mockJobs);
+  }
+
   @Test
   public void testWaitToFinish() throws IOException, InterruptedException {
-    Dataflow mockWorkflowClient = mock(Dataflow.class);
-    Dataflow.V1b3 mockV1b3 = mock(Dataflow.V1b3.class);
-    Dataflow.V1b3.Projects mockProjects = mock(Dataflow.V1b3.Projects.class);
-    Dataflow.V1b3.Projects.Jobs mockJobs = mock(Dataflow.V1b3.Projects.Jobs.class);
     Dataflow.V1b3.Projects.Jobs.Get statusRequest = mock(Dataflow.V1b3.Projects.Jobs.Get.class);
 
     Job statusResponse = new Job();
-    statusResponse.setCurrentState(JobState.DONE.getStateName());
+    statusResponse.setCurrentState("JOB_STATE_" + State.DONE.name());
 
-    when(mockWorkflowClient.v1b3()).thenReturn(mockV1b3);
-    when(mockV1b3.projects()).thenReturn(mockProjects);
-    when(mockProjects.jobs()).thenReturn(mockJobs);
     when(mockJobs.get(eq(PROJECT_ID), eq(JOB_ID)))
         .thenReturn(statusRequest);
     when(statusRequest.execute()).thenReturn(statusResponse);
@@ -60,7 +74,39 @@ public void testWaitToFinish() throws IOException, InterruptedException {
     DataflowPipelineJob job = new DataflowPipelineJob(
         PROJECT_ID, JOB_ID, mockWorkflowClient);
 
-    JobState state = job.waitToFinish(1, TimeUnit.MINUTES, null);
-    assertEquals(JobState.DONE, state);
+    State state = job.waitToFinish(1, TimeUnit.MINUTES, null);
+    assertEquals(State.DONE, state);
+  }
+
+  @Test
+  public void testGetStateReturnsServiceState() throws IOException {
+    Dataflow.V1b3.Projects.Jobs.Get statusRequest =
+        mock(Dataflow.V1b3.Projects.Jobs.Get.class);
+
+    Job statusResponse = new Job();
+    statusResponse.setCurrentState("JOB_STATE_" + State.RUNNING.name());
+
+    when(mockJobs.get(eq(PROJECT_ID), eq(JOB_ID))).thenReturn(statusRequest);
+    when(statusRequest.execute()).thenReturn(statusResponse);
+
+    DataflowPipelineJob job =
+        new DataflowPipelineJob(PROJECT_ID, JOB_ID, mockWorkflowClient);
+
+    assertEquals(State.RUNNING, job.getState());
+  }
+
+  @Test
+  public void testGetStateWithExceptionReturnsUnknown() throws IOException {
+    Dataflow.V1b3.Projects.Jobs.Get statusRequest =
+        mock(Dataflow.V1b3.Projects.Jobs.Get.class);
+
+    when(mockJobs.get(eq(PROJECT_ID), eq(JOB_ID))).thenReturn(statusRequest);
+    when(statusRequest.execute()).thenThrow(IOException.class);
+
+    DataflowPipelineJob job =
+        new DataflowPipelineJob(PROJECT_ID, JOB_ID, mockWorkflowClient);
+
+    assertEquals(State.UNKNOWN, job.getState());
+
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java
index 592222b8d6613..980e93787fdcc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java
@@ -23,6 +23,7 @@
 import com.google.api.services.dataflow.Dataflow;
 import com.google.api.services.dataflow.model.JobMessage;
 import com.google.api.services.dataflow.model.ListJobMessagesResponse;
+import com.google.cloud.dataflow.sdk.PipelineResult.State;
 
 import org.joda.time.Instant;
 import org.junit.Test;
@@ -87,4 +88,31 @@ public void testGetJobMessages() throws IOException {
 
     assertEquals(150, messages.size());
   }
+
+  @Test
+  public void testToStateCreatesState() {
+    String stateName = "JOB_STATE_DONE";
+
+    State result = MonitoringUtil.toState(stateName);
+
+    assertEquals(State.DONE, result);
+  }
+
+  @Test
+  public void testToStateWithNullReturnsUnknown() {
+    String stateName = null;
+
+    State result = MonitoringUtil.toState(stateName);
+
+    assertEquals(State.UNKNOWN, result);
+  }
+
+  @Test
+  public void testToStateWithOtherValueReturnsUnknown() {
+    String stateName = "FOO_BAR_BAZ";
+
+    State result = MonitoringUtil.toState(stateName);
+
+    assertEquals(State.UNKNOWN, result);
+  }
 }

From 3009e14a93f2b9f988387abe474a20104e873cc1 Mon Sep 17 00:00:00 2001
From: gildea <gildea@google.com>
Date: Wed, 1 Apr 2015 11:06:48 -0700
Subject: [PATCH 0348/1541] Grammar in Javadoc: fix incorrect/misleading uses
 of "which" for "that". Change line breaks in a few places to make correct
 uses of "which" easier to grep out.  Fix a few other doc typos found.
 ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=90080517

---
 README.md                                     |  6 +-
 checkstyle.xml                                |  2 +-
 examples/pom.xml                              |  6 +-
 .../cloud/dataflow/examples/AutoComplete.java |  8 +--
 .../dataflow/examples/BigQueryTornadoes.java  |  6 +-
 .../dataflow/examples/DatastoreWordCount.java |  4 +-
 .../google/cloud/dataflow/examples/TfIdf.java |  2 +-
 .../examples/TrafficStreamingMaxLaneFlow.java |  5 +-
 .../examples/TrafficStreamingRoutes.java      |  5 +-
 sdk/pom.xml                                   | 16 +++---
 .../cloud/dataflow/sdk/coders/Coder.java      |  8 +--
 .../dataflow/sdk/coders/CoderRegistry.java    |  4 +-
 .../dataflow/sdk/coders/DelegateCoder.java    |  2 +-
 .../dataflow/sdk/coders/DoubleCoder.java      |  2 +-
 .../dataflow/sdk/coders/EntityCoder.java      |  4 +-
 .../dataflow/sdk/coders/StandardCoder.java    |  4 +-
 .../sdk/coders/TableRowJsonCoder.java         |  2 +-
 .../cloud/dataflow/sdk/io/BigQueryIO.java     | 24 ++++----
 .../cloud/dataflow/sdk/io/BoundedSource.java  |  8 +--
 .../sdk/io/ByteOffsetBasedSource.java         | 10 ++--
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |  4 +-
 .../dataflow/sdk/io/FileBasedSource.java      | 12 ++--
 .../dataflow/sdk/io/ShardNameTemplate.java    |  4 +-
 .../google/cloud/dataflow/sdk/io/Source.java  |  4 +-
 .../cloud/dataflow/sdk/io/package-info.java   |  4 +-
 .../sdk/options/ApplicationNameOptions.java   |  4 +-
 .../dataflow/sdk/options/BigQueryOptions.java |  2 +-
 .../BlockingDataflowPipelineOptions.java      |  2 +-
 .../options/DataflowPipelineDebugOptions.java |  8 +--
 .../sdk/options/DataflowPipelineOptions.java  |  8 +--
 .../DataflowPipelineWorkerPoolOptions.java    |  4 +-
 .../options/DataflowWorkerHarnessOptions.java |  4 +-
 .../options/DataflowWorkerLoggingOptions.java | 16 +++---
 .../cloud/dataflow/sdk/options/Default.java   |  2 +-
 .../sdk/options/DefaultValueFactory.java      |  2 +-
 .../dataflow/sdk/options/Description.java     |  2 +-
 .../sdk/options/DirectPipelineOptions.java    |  2 +-
 .../dataflow/sdk/options/GcpOptions.java      |  4 +-
 .../dataflow/sdk/options/GcsOptions.java      | 10 ++--
 .../sdk/options/GoogleApiDebugOptions.java    | 12 ++--
 .../dataflow/sdk/options/PipelineOptions.java |  4 +-
 .../sdk/options/PipelineOptionsFactory.java   | 57 ++++++++++---------
 .../sdk/options/ProxyInvocationHandler.java   | 12 ++--
 .../dataflow/sdk/options/Validation.java      |  4 +-
 .../cloud/dataflow/sdk/package-info.java      |  4 +-
 .../BlockingDataflowPipelineRunner.java       |  2 +-
 .../sdk/runners/DataflowPipelineRunner.java   |  4 +-
 .../runners/DataflowPipelineTranslator.java   |  6 +-
 .../sdk/runners/RecordingPipelineVisitor.java |  2 +-
 .../sdk/runners/TransformTreeNode.java        |  4 +-
 .../sdk/runners/worker/BigQueryReader.java    |  2 +-
 .../runners/worker/DataflowWorkerHarness.java |  4 +-
 .../sdk/runners/worker/FileBasedReader.java   |  2 +-
 .../runners/worker/GroupingShuffleReader.java |  8 +--
 .../sdk/runners/worker/InMemoryReader.java    |  2 +-
 .../sdk/runners/worker/OrderedCode.java       | 18 +++---
 .../dataflow/sdk/runners/worker/TextSink.java |  2 +-
 .../dataflow/sdk/testing/DataflowAssert.java  |  4 +-
 .../dataflow/sdk/testing/TestPipeline.java    |  4 +-
 .../sdk/testing/WindowFnTestUtils.java        |  2 +-
 .../dataflow/sdk/transforms/Aggregator.java   |  2 +-
 .../sdk/transforms/ApproximateQuantiles.java  |  6 +-
 .../dataflow/sdk/transforms/Combine.java      | 18 +++---
 .../cloud/dataflow/sdk/transforms/Count.java  |  2 +-
 .../cloud/dataflow/sdk/transforms/Filter.java |  8 +--
 .../cloud/dataflow/sdk/transforms/ParDo.java  | 10 ++--
 .../cloud/dataflow/sdk/transforms/Top.java    | 22 +++----
 .../transforms/windowing/IntervalWindow.java  |  4 +-
 .../sdk/transforms/windowing/Window.java      |  2 +-
 .../dataflow/sdk/util/AbstractWindowSet.java  |  6 +-
 .../cloud/dataflow/sdk/util/CloudObject.java  | 12 ++--
 .../cloud/dataflow/sdk/util/CoderUtils.java   |  2 +-
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |  4 +-
 .../dataflow/sdk/util/GCloudCredential.java   |  2 +-
 .../cloud/dataflow/sdk/util/GcsUtil.java      |  8 +--
 .../dataflow/sdk/util/IOChannelFactory.java   |  8 +--
 .../dataflow/sdk/util/IOChannelUtils.java     |  8 +--
 .../dataflow/sdk/util/MonitoringUtil.java     |  4 +-
 .../cloud/dataflow/sdk/util/PackageUtil.java  |  4 +-
 .../sdk/util/RetryHttpRequestInitializer.java |  8 +--
 .../cloud/dataflow/sdk/util/Serializer.java   |  6 +-
 .../sdk/util/ShardingWritableByteChannel.java |  4 +-
 .../cloud/dataflow/sdk/util/Transport.java    |  2 +-
 .../dataflow/sdk/util/common/Counter.java     |  2 +-
 .../sdk/util/common/ForwardingReiterator.java |  2 +-
 .../sdk/util/common/ReflectHelpers.java       |  6 +-
 .../worker/CachingShuffleBatchReader.java     |  2 +-
 .../worker/GroupingShuffleEntryIterator.java  |  6 +-
 .../worker/PartialGroupByKeyOperation.java    | 23 ++++----
 .../common/worker/ProgressTrackerGroup.java   |  8 +--
 .../worker/ProgressTrackingReiterator.java    |  2 +-
 .../sdk/util/common/worker/Reader.java        |  4 +-
 .../common/worker/ShuffleEntryReader.java     |  2 +-
 .../sdk/util/common/worker/SourceFormat.java  |  2 +-
 .../sdk/util/common/worker/StateSampler.java  |  2 +-
 .../sdk/util/common/worker/WorkExecutor.java  |  4 +-
 .../dataflow/sdk/util/gcsfs/GcsPath.java      | 12 ++--
 .../gcsio/GoogleCloudStorageReadChannel.java  |  4 +-
 .../gcsio/GoogleCloudStorageWriteChannel.java | 10 ++--
 ...gingMediaHttpUploaderProgressListener.java |  2 +-
 .../sdk/util/gcsio/StorageResourceId.java     |  8 +--
 .../dataflow/sdk/values/CodedTupleTag.java    |  2 +-
 .../cloud/dataflow/sdk/values/PBegin.java     |  2 +-
 .../dataflow/sdk/coders/CoderProperties.java  |  2 +-
 .../sdk/coders/CoderPropertiesTest.java       |  4 +-
 .../sdk/io/ByteOffsetBasedSourceTest.java     |  2 +-
 .../dataflow/sdk/io/DatastoreIOTest.java      |  4 +-
 .../options/PipelineOptionsFactoryTest.java   | 12 ++--
 .../options/PipelineOptionsValidatorTest.java |  2 +-
 .../options/ProxyInvocationHandlerTest.java   | 30 +++++-----
 .../DataflowPipelineTranslatorTest.java       |  8 +--
 .../sdk/runners/TransformTreeTest.java        |  4 +-
 .../DataflowWorkProgressUpdaterTest.java      |  2 +-
 .../dataflow/sdk/testing/ExpectedLogs.java    |  4 +-
 .../dataflow/sdk/transforms/CombineTest.java  |  2 +-
 .../dataflow/sdk/transforms/ParDoTest.java    |  4 +-
 .../dataflow/sdk/transforms/ViewTest.java     |  2 +-
 .../sdk/transforms/join/CoGroupByKeyTest.java |  8 +--
 .../sdk/util/SerializableUtilsTest.java       |  8 +--
 119 files changed, 370 insertions(+), 370 deletions(-)

diff --git a/README.md b/README.md
index 59c4b877fbf8a..2104e03cabd60 100644
--- a/README.md
+++ b/README.md
@@ -6,8 +6,8 @@ and streaming parallel data processing pipelines.
 
 ## Status [![Build Status](https://travis-ci.org/GoogleCloudPlatform/DataflowJavaSDK.svg?branch=master)](https://travis-ci.org/GoogleCloudPlatform/DataflowJavaSDK)
 
-The Cloud Dataflow SDK is used to access the Google Cloud Dataflow service,
-which is currently in Alpha and restricted to whitelisted users.
+The Cloud Dataflow SDK is used to access the Google Cloud Dataflow
+service, which is currently in Alpha and restricted to whitelisted users.
 
 The SDK is publicly available and can be used for local execution by anyone.
 Note, however, that the SDK is also an Alpha release and may change
@@ -26,7 +26,7 @@ represents a collection of data, which could be bounded or unbounded in size.
 represents a computation that transforms input PCollections into output
 PCollections.
 * [`Pipeline`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java):
-manages a directed acyclic graph of PTransforms and PCollections, which is ready
+manages a directed acyclic graph of PTransforms and PCollections that is ready
 for execution.
 * [`PipelineRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java):
 specifies where and how the pipeline should execute.
diff --git a/checkstyle.xml b/checkstyle.xml
index 788c9ea999850..9c0f4f8cae4b8 100644
--- a/checkstyle.xml
+++ b/checkstyle.xml
@@ -304,7 +304,7 @@ page at http://checkstyle.sourceforge.net/config.html -->
       <!-- Warn about falling through to the next case statement.  Similar to
       javac -Xlint:fallthrough, but the check is suppressed if a single-line comment
       on the last non-blank line preceding the fallen-into case contains 'fall through' (or
-      some other variants which we don't publicized to promote consistency).
+      some other variants that we don't publicized to promote consistency).
       -->
       <property name="reliefPattern"
        value="fall through|Fall through|fallthru|Fallthru|falls through|Falls through|fallthrough|Fallthrough|No break|NO break|no break|continue on"/>
diff --git a/examples/pom.xml b/examples/pom.xml
index 863bf76459e2d..708921a70db68 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -152,7 +152,7 @@
       <artifactId>google-api-services-storage</artifactId>
       <version>v1-rev25-1.19.1</version>
       <exclusions>
-        <!-- Exclude an old version of guava which is being pulled
+        <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
         <exclusion>
           <groupId>com.google.guava</groupId>
@@ -166,7 +166,7 @@
       <artifactId>google-api-services-bigquery</artifactId>
       <version>v2-rev187-1.19.1</version>
       <exclusions>
-        <!-- Exclude an old version of guava which is being pulled
+        <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
         <exclusion>
           <groupId>com.google.guava</groupId>
@@ -180,7 +180,7 @@
       <artifactId>google-http-client-jackson2</artifactId>
       <version>1.19.0</version>
       <exclusions>
-        <!-- Exclude an old version of guava which is being pulled
+        <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
         <exclusion>
           <groupId>com.google.guava</groupId>
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
index d26b3cdf6e821..c6524b01a9cd0 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
@@ -62,8 +62,8 @@
 import java.util.regex.Pattern;
 
 /**
- * An example that computes the most popular hash tags for a for every prefix,
- * which can be used for auto-completion.
+ * An example that computes the most popular hash tags
+ * for every prefix, which can be used for auto-completion.
  *
  * <p> Concepts: Using the same pipeline in both streaming and batch, combiners,
  *               composite transforms.
@@ -84,8 +84,8 @@
  *   --inputTopic=/topics/someproject/sometopic
  *   [--outputDataset=<DATASTORE DATASET ID>]
  *
- * <p> Which will update the datastore every 10 seconds based on the last 30 minutes
- * of data received.
+ * <p> This will update the datastore every 10 seconds based on the last
+ * 30 minutes of data received.
  */
 public class AutoComplete {
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
index c396a0f4ba717..c73cfcab43249 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
@@ -66,8 +66,8 @@ public class BigQueryTornadoes {
       "clouddataflow-readonly:samples.weather_stations";
 
   /**
-   * Examines each row in the input table. If a tornado was recorded in that sample, the month in
-   * which it occurred is output.
+   * Examines each row in the input table. If a tornado was recorded
+   * in that sample, the month in which it occurred is output.
    */
   static class ExtractTornadoesFn extends DoFn<TableRow, Integer> {
     private static final long serialVersionUID = 0;
@@ -99,7 +99,7 @@ public void processElement(ProcessContext c) {
 
   /**
    * Takes rows from a table and generates a table of counts.
-   *
+   * <p>
    * The input schema is described by
    * https://developers.google.com/bigquery/docs/dataset-gsod .
    * The output contains the total number of tornadoes found in each month in
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
index 0d7da855ebb68..8a14a01633dd7 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
@@ -154,7 +154,7 @@ public static interface Options extends PipelineOptions {
   }
 
   /**
-   * An example which creates a pipeline to populate DatastoreIO from a
+   * An example that creates a pipeline to populate DatastoreIO from a
    * text input.  Forces use of DirectPipelineRunner for local execution mode.
    */
   public static void writeDataToDatastore(Options options) {
@@ -178,7 +178,7 @@ public static void writeDataToDatastore(Options options) {
   }
 
   /**
-   * An example which creates a pipeline to do DatastoreIO.Read from Datastore.
+   * An example that creates a pipeline to do DatastoreIO.Read from Datastore.
    */
   public static void readDataFromDatastore(Options options) {
     // Build a query: read all entities of the specified kind.
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
index 9b95993746f1a..04d58dbba94a2 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
@@ -359,7 +359,7 @@ public void processElement(ProcessContext c) {
       // Compute a mapping from each word to a (URI, TF-IDF) score
       // for each URI. There are a variety of definitions of TF-IDF
       // ("term frequency - inverse document frequency") score;
-      // here we use a basic version which is the term frequency
+      // here we use a basic version that is the term frequency
       // divided by the log of the document frequency.
       PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf = wordToUriAndTfAndDf
           .apply(ParDo.of(new DoFn<KV<String, CoGbkResult>, KV<String, KV<URI, Double>>>() {
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingMaxLaneFlow.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingMaxLaneFlow.java
index 7c25b51f462c3..afd27f03907da 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingMaxLaneFlow.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingMaxLaneFlow.java
@@ -59,8 +59,8 @@
  *
  * <p> This pipeline expects input from
  * <a href="https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher">
- * this script</a>,
- * which publishes traffic sensor data to a PubSub topic. After you've started this pipeline, start
+ * this script</a>, which publishes traffic sensor data to a PubSub topic.
+ * After you've started this pipeline, start
  * up the input generation script as per its instructions. The default SlidingWindow parameters
  * assume that you're running this script with the {@literal --replay} flag, which simulates pauses
  * in the sensor data publication.
@@ -402,4 +402,3 @@ private static Double tryDoubleParse(String number) {
     }
   }
 }
-
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingRoutes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingRoutes.java
index b9e02ee5fcf3d..466fa1a1bf8ee 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingRoutes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingRoutes.java
@@ -67,8 +67,8 @@
  *
  * <p> This pipeline expects input from
  * <a href="https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher">
- * this script</a>,
- * which publishes traffic sensor data to a PubSub topic. After you've started this pipeline, start
+ * this script</a>, which publishes traffic sensor data to a PubSub topic.
+ * After you've started this pipeline, start
  * up the input generation script as per its instructions. The default SlidingWindow parameters
  * assume that you're running this script without the {@literal --replay} flag, so that there are
  * no simulated pauses in the sensor data publication.
@@ -373,4 +373,3 @@ private static Map<String, String> buildStationInfo() {
   }
 
 }
-
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 9aae34aa2280d..29fc779ef4528 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -215,7 +215,7 @@
       <artifactId>google-api-services-dataflow</artifactId>
       <version>v1beta3-rev9-1.19.1</version>
       <exclusions>
-        <!-- Exclude an old version of guava which is being pulled
+        <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
         <exclusion>
           <groupId>com.google.guava</groupId>
@@ -247,7 +247,7 @@
       <artifactId>google-api-services-bigquery</artifactId>
       <version>v2-rev187-1.19.1</version>
       <exclusions>
-        <!-- Exclude an old version of guava which is being pulled
+        <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
         <exclusion>
           <groupId>com.google.guava</groupId>
@@ -261,7 +261,7 @@
       <artifactId>google-api-services-compute</artifactId>
       <version>v1-rev46-1.19.1</version>
       <exclusions>
-        <!-- Exclude an old version of guava which is being pulled
+        <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
         <exclusion>
           <groupId>com.google.guava</groupId>
@@ -275,7 +275,7 @@
       <artifactId>google-api-services-pubsub</artifactId>
       <version>v1beta1-rev18-1.19.1</version>
       <exclusions>
-        <!-- Exclude an old version of guava which is being pulled
+        <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
         <exclusion>
           <groupId>com.google.guava</groupId>
@@ -289,7 +289,7 @@
       <artifactId>google-api-services-storage</artifactId>
       <version>v1-rev25-1.19.1</version>
       <exclusions>
-        <!-- Exclude an old version of guava which is being pulled
+        <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
         <exclusion>
           <groupId>com.google.guava</groupId>
@@ -303,7 +303,7 @@
       <artifactId>google-http-client-jackson2</artifactId>
       <version>1.19.0</version>
       <exclusions>
-        <!-- Exclude an old version of guava which is being pulled
+        <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
         <exclusion>
           <groupId>com.google.guava</groupId>
@@ -323,7 +323,7 @@
       <artifactId>google-oauth-client-java6</artifactId>
       <version>1.19.0</version>
       <exclusions>
-        <!-- Exclude an old version of guava which is being pulled
+        <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
         <exclusion>
           <groupId>com.google.guava</groupId>
@@ -337,7 +337,7 @@
       <artifactId>google-api-services-datastore-protobuf</artifactId>
       <version>v1beta2-rev1-2.1.2</version>
       <exclusions>
-        <!-- Exclude an old version of guava which is being pulled
+        <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
         <exclusion>
           <groupId>com.google.guava</groupId>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
index 108428d98331e..ae3247537489d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
@@ -123,8 +123,8 @@ public T decode(InputStream inStream, Context context)
    * <p> In order for a {@code Coder} to be considered deterministic,
    * the following must be true:
    * <ul>
-   *   <li>two values which compare as equal (via {@code Object.equals()}
-   *       or {@code Comparable.compareTo()}, if supported), have the same
+   *   <li>two values that compare as equal (via {@code Object.equals()}
+   *       or {@code Comparable.compareTo()}, if supported) have the same
    *       encoding.
    *   <li>the {@code Coder} always produces a canonical encoding, which is the
    *       same for an instance of an object even if produced on different
@@ -143,8 +143,8 @@ public T decode(InputStream inStream, Context context)
    * <p> In order for a {@code Coder} to be considered deterministic,
    * the following must be true:
    * <ul>
-   *   <li>two values which compare as equal (via {@code Object.equals()}
-   *       or {@code Comparable.compareTo()}, if supported), have the same
+   *   <li>two values that compare as equal (via {@code Object.equals()}
+   *       or {@code Comparable.compareTo()}, if supported) have the same
    *       encoding.
    *   <li>the {@code Coder} always produces a canonical encoding, which is the
    *       same for an instance of an object even if produced on different
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 8c9ad930ff2bc..f8927070bdd28 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -340,7 +340,7 @@ public <T> Coder<T> getDefaultCoder(T exampleValue) {
    * {@code Map<String, Integer>} then this will return the registered Coders
    * to use for String and Integer as a {"K": stringCoder, "V": intCoder} Map.
    * The knownCoders parameter can be used to provide known coders for any of
-   * the parameters which will be used to infer the others.
+   * the parameters that will be used to infer the others.
    *
    * @param subClass the concrete type whose specializations are being inferred
    * @param baseClass the base type, a parameterized class
@@ -373,7 +373,7 @@ public <T> Map<String, Coder<?>> getDefaultCoders(
    * <P> For example, if baseClass is Map.class and subClass extends
    * {@code Map<String, Integer>} then this will return the registered Coders
    * to use for String and Integer in that order.  The knownCoders parameter
-   * can be used to provide known coders for any of the parameters which will
+   * can be used to provide known coders for any of the parameters that will
    * be used to infer the others.
    *
    * <P> If a type cannot be inferred, null is returned.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
index 89e996c510a60..63e1f42c499dd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
@@ -29,7 +29,7 @@
  *
  * <p> The conversions from {@code T} to {@code DT} and vice versa
  * must be supplied as {@link CodingFunction}, a serializable
- * function which may throw any {@code Exception}. If a thrown
+ * function that may throw any {@code Exception}. If a thrown
  * exception is an instance of {@link CoderException} or
  * {@link IOException}, it will be re-thrown, otherwise it will be wrapped as
  * a {@link CoderException}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
index c6520ee3ff463..a855ee1087708 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
@@ -67,7 +67,7 @@ public Double decode(InputStream inStream, Context context)
   /**
    * Floating-point operations are not guaranteed to be deterministic, even
    * if the storage format might be, so floating point representations are not
-   * recommended for use in operations which require deterministic inputs.
+   * recommended for use in operations that require deterministic inputs.
    */
   @Override
   @Deprecated
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
index afb69bb72d06d..c2562139a50ed 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
@@ -73,8 +73,8 @@ protected long getEncodedElementByteSize(Entity value, Context context)
   }
 
   /**
-   * A datastore kind can hold arbitrary Object instances,
-   * which makes the encoding non-deterministic.
+   * A datastore kind can hold arbitrary Object instances, which
+   * makes the encoding non-deterministic.
    */
   @Override
   @Deprecated
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
index 963d6aa34d379..d9fd232b14a28 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
@@ -107,8 +107,8 @@ public CloudObject asCloudObject() {
 
   /**
    * StandardCoder requires elements to be fully encoded and copied
-   * into a byte stream to determine the byte size of the element,
-   * which is considered expensive.
+   * into a byte stream to determine the byte size of the element, which is
+   * considered expensive.
    */
   @Override
   public boolean isRegisterByteSizeObserverCheap(T value, Context context) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
index 1043aab2e477a..2d494429cb429 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
@@ -82,6 +82,6 @@ public boolean isDeterministic() {
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
     throw new NonDeterministicException(this,
-        "TableCell can hold arbitrary instances which may be non-deterministic.");
+        "TableCell can hold arbitrary instances, which may be non-deterministic.");
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index cf89d23c5134e..290ff3c150774 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -72,8 +72,8 @@
  * from the <a href="https://cloud.google.com/bigquery/client-libraries">
  * BigQuery Java Client API</a>.
  * Tables can be referred to as Strings, with or without the {@code projectId}.
- * A helper function is provided ({@link BigQueryIO#parseTableSpec(String)}),
- * which parses the following string forms into a {@link TableReference}:
+ * A helper function is provided ({@link BigQueryIO#parseTableSpec(String)})
+ * that parses the following string forms into a {@link TableReference}:
  * <ul>
  *   <li>[{@code project_id}]:[{@code dataset_id}].[{@code table_id}]
  *   <li>[{@code dataset_id}].[{@code table_id}]
@@ -130,12 +130,12 @@ public class BigQueryIO {
   private static final String PROJECT_ID_REGEXP = "[a-z][-a-z0-9:.]{4,61}[a-z0-9]";
 
   /**
-   * Regular expression which matches Dataset IDs.
+   * Regular expression that matches Dataset IDs.
    */
   private static final String DATASET_REGEXP = "[-\\w.]{1,1024}";
 
   /**
-   * Regular expression which matches Table IDs.
+   * Regular expression that matches Table IDs.
    */
   private static final String TABLE_REGEXP = "[-\\w$@]{1,1024}";
 
@@ -188,7 +188,7 @@ public static String toTableSpec(TableReference ref) {
    * {@code PCollection<TableRow>} containing each of the rows of the table.
    * <p>
    * Each TableRow record contains values indexed by column name.  Here is a
-   * sample processing function which processes a "line" column from rows:
+   * sample processing function that processes a "line" column from rows:
    * <pre><code>
    * static class ExtractWordsFn extends DoFn{@literal <TableRow, String>} {
    *   {@literal @}Override
@@ -227,7 +227,7 @@ public static Bound from(TableReference table) {
     }
 
     /**
-     * Disables BigQuery table validation which is enabled by default.
+     * Disables BigQuery table validation, which is enabled by default.
      */
     public static Bound withoutValidation() {
       return new Bound().withoutValidation();
@@ -341,17 +341,17 @@ public boolean getValidate() {
    * <p>
    * By default, tables will be created if they do not exist, which
    * corresponds to a {@code CreateDisposition.CREATE_IF_NEEDED} disposition
-   * which matches the default of BigQuery's Jobs API.  A schema must be
+   * that matches the default of BigQuery's Jobs API.  A schema must be
    * provided (via {@link Write#withSchema}), or else the transform may fail
    * at runtime with an {@link java.lang.IllegalArgumentException}.
    * <p>
    * The dataset being written must already exist.
    * <p>
    * By default, writes require an empty table, which corresponds to
-   * a {@code WriteDisposition.WRITE_EMPTY} disposition which matches the
+   * a {@code WriteDisposition.WRITE_EMPTY} disposition that matches the
    * default of BigQuery's Jobs API.
    * <p>
-   * Here is a sample transform which produces TableRow values containing
+   * Here is a sample transform that produces TableRow values containing
    * "word" and "count" columns:
    * <pre><code>
    * static class FormatCountsFn extends DoFnP{@literal <KV<String, Long>, TableRow>} {
@@ -454,7 +454,7 @@ public static Bound to(TableReference table) {
     /**
      * Specifies a table schema to use in table creation.
      * <p>
-     * The schema is required only if writing to a table which does not already
+     * The schema is required only if writing to a table that does not already
      * exist, and {@link BigQueryIO.Write.CreateDisposition} is set to
      * {@code CREATE_IF_NEEDED}.
      */
@@ -473,7 +473,7 @@ public static Bound withWriteDisposition(WriteDisposition disposition) {
     }
 
     /**
-     * Disables BigQuery table validation which is enabled by default.
+     * Disables BigQuery table validation, which is enabled by default.
      */
     public static Bound withoutValidation() {
       return new Bound().withoutValidation();
@@ -812,7 +812,7 @@ public PDone apply(PCollection<TableRow> in) {
           in.apply(ParDo.of(new TagWithUniqueIds()));
 
       // To prevent having the same TableRow processed more than once with regenerated
-      // different unique ids, this implementation relies on "checkpointing" which is
+      // different unique ids, this implementation relies on "checkpointing", which is
       // achieved as a side effect of having StreamingWriteFn implement RequiresKeyedState.
       tagged.apply(ParDo.of(new StreamingWriteFn(tableReference, tableSchema)));
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
index 2be56bb3e53d4..762a56ead80dc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
@@ -24,7 +24,7 @@
 import javax.annotation.Nullable;
 
 /**
- * A {@code Source} which reads a bounded amount of input and, because of that, supports
+ * A {@code Source} that reads a bounded amount of input and, because of that, supports
  * some additional operations, e.g. size estimation, and its reader supports progress estimation.
  *
  * @param <T> Type of records read by the source.
@@ -53,7 +53,7 @@ public BoundedReader<T> createReader(
   }
 
   /**
-   * A {@code Reader} which reads a bounded amount of input and supports some additional
+   * A {@code Reader} that reads a bounded amount of input and supports some additional
    * operations, such as progress estimation and dynamic work rebalancing.
    *
    * <p>This API is experimental and subject to change.
@@ -99,12 +99,12 @@ public interface BoundedReader<T> extends Source.Reader<T> {
      *  <li> The amount of data read by "primary" should ideally represent approximately
      *  the given fraction of the amount of data read by "initial".
      * </ul>
-     * For example, a reader which reads a range of offsets <i>[A, B)</i> in a file might implement
+     * For example, a reader that reads a range of offsets <i>[A, B)</i> in a file might implement
      * this method by truncating the current range to <i>[A, A + fraction*(B-A))</i> and returning
      * a Source representing the range <i>[A + fraction*(B-A), B)</i>.
      * <p>
      * This method should return {@code null} if the split cannot be performed for this fraction
-     * while satisfying the semantics above. E.g. a reader which reads a range of offsets
+     * while satisfying the semantics above. E.g., a reader that reads a range of offsets
      * in a file should return {@code null} if it is already past the position in its range
      * corresponding to the given fraction. In this case, the method MUST have no effect
      * (the reader must behave as if the method hadn't been called at all).
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
index a02c7bda895e4..baa5737a84d07 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
@@ -25,13 +25,13 @@
 
 /**
  * A source that uses byte offsets to define starting and ending positions. Extend this class to
- * implement your own byte offset based custom source. {@link FileBasedSource} which is a subclass
- * of this adds additional functionality useful for custom sources that are based on files. If
+ * implement your own byte offset based custom source. {@link FileBasedSource}, which is a subclass
+ * of this, adds additional functionality useful for custom sources that are based on files. If
  * possible implementors should start from {@code FileBasedSource} instead of
  * {@code ByteOffsetBasedSource}.
  *
  * <p>This is a common base class for all sources that use a byte offset range. It stores the range
- * and implements splitting into bundles. This should be used for sources which can be cheaply read
+ * and implements splitting into bundles. This should be used for sources that can be cheaply read
  * starting at any given byte offset.
  *
  * <p>The byte offset range of the source is between {@code startOffset} (inclusive) and endOffset
@@ -40,7 +40,7 @@
  * past the range. The source does not include any record at offsets before this range even if it
  * extend into this range because the previous range will include this record. A source may choose
  * to include records at offsets after this range. For example, a source may choose to set offset
- * boundaries based on blocks of records in which case certain records may start after
+ * boundaries based on blocks of records, in which case certain records may start after
  * {@code endOffset}. But for any given source type the combined set of data read by two sources for
  * ranges [A, B) and [B, C) must be the same as the records read by a single source of the same type
  * for the range [A, C).
@@ -229,7 +229,7 @@ public ByteOffsetBasedSource<T> splitAtFraction(double fraction) {
       }
       // Note: we intentionally ignore minBundleSize here.
       // It is useful to respect it during initial splitting so we don't produce work items
-      // which are likely to turn out too small - but once dynamic work rebalancing kicks in,
+      // that are likely to turn out too small - but once dynamic work rebalancing kicks in,
       // its estimates are far more precise and should take priority. If it says split into
       // tiny single-record bundles, we should do that.
       ByteOffsetBasedSource<T> primary = source.createSourceForSubrange(start, splitOffset);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 3b720611e2421..cba9fdb0724cb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -155,7 +155,7 @@ public static Source read() {
   }
 
   /**
-   * Returns a {@code PTransform} which reads Datastore entities from the query
+   * Returns a {@code PTransform} that reads Datastore entities from the query
    * against the given dataset.
    */
   public static ReadSource.Bound<Entity> readFrom(String datasetId, Query query) {
@@ -163,7 +163,7 @@ public static ReadSource.Bound<Entity> readFrom(String datasetId, Query query) {
   }
 
   /**
-   * Returns a {@code PTransform} which reads Datastore entities from the query
+   * Returns a {@code PTransform} that reads Datastore entities from the query
    * against the given dataset and host.
    */
   public static ReadSource.Bound<Entity> readFrom(String host, String datasetId, Query query) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index a5619fbb7d512..b1db0d92fa0f1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -133,8 +133,8 @@ public final FileBasedSource<T> createSourceForSubrange(long start, long end) {
    *
    * @param fileName file backing the new {@code FileBasedSource}.
    * @param start starting byte offset of the new {@code FileBasedSource}.
-   * @param end ending byte offset of the new {@code FileBasedSource}. May be Long.MAX_VALUE, in
-   *        which case it will be inferred using {@link #getMaxEndOffset}.
+   * @param end ending byte offset of the new {@code FileBasedSource}. May be Long.MAX_VALUE,
+   *        in which case it will be inferred using {@link #getMaxEndOffset}.
    */
   public abstract FileBasedSource<T> createForSubrangeOfFile(String fileName, long start, long end);
 
@@ -302,7 +302,7 @@ private Collection<String> expandFilePattern() throws IOException {
    * source. Subclasses of this reader should implement {@link #startReading} to get access to this
    * channel. If the source corresponding to the reader is for a subrange of a file the
    * {@code ReadableByteChannel} provided is guaranteed to be an instance of the type
-   * {@link SeekableByteChannel} which may be used by subclass to traverse back in the channel to
+   * {@link SeekableByteChannel}, which may be used by subclass to traverse back in the channel to
    * determine the correct starting position.
    *
    * <h2>Split Points</h2>
@@ -312,8 +312,8 @@ private Collection<String> expandFilePattern() throws IOException {
    * record starting at or after offset A, up to but not including the first record starting at or
    * after offset B".
    *
-   * <p>More complex formats, such as some block-based formats, may have records which are not
-   * directly addressable: i.e. for some records, there is no way to describe the location of a
+   * <p>More complex formats, such as some block-based formats, may have records that are not
+   * directly addressable: i.e., for some records, there is no way to describe the location of a
    * record using a single offset number. For example, imagine a file format consisting of a
    * sequence of blocks, where each block is compressed using some block compression algorithm. Then
    * blocks have offsets, but individual records don't. More complex cases are also possible.
@@ -329,7 +329,7 @@ private Collection<String> expandFilePattern() throws IOException {
    * the block-based format above, the only split points would be the first records in each block.
    *
    * <p>With the above definition of split points an extended definition of the offset of a record
-   * can be specified. For a record which is at a split point, its offset is defined to be the
+   * can be specified. For a record that is at a split point, its offset is defined to be the
    * largest A such that reading a source with the range [A, Long.MAX_VALUE) includes this record;
    * offsets of other records are only required to be non-strictly increasing. Offsets of records of
    * a {@code FileBasedReader} should be set based on this definition.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ShardNameTemplate.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ShardNameTemplate.java
index e427ba31775cf..930a9596f8e39 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ShardNameTemplate.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ShardNameTemplate.java
@@ -19,7 +19,7 @@
 /**
  * Standard shard naming templates.
  *
- * <p> Shard naming templates are strings which may contain placeholders for
+ * <p> Shard naming templates are strings that may contain placeholders for
  * the shard number and shard count.  When constructing a filename for a
  * particular shard number, the upper-case letters 'S' and 'N' are replaced
  * with the 0-padded shard number and shard count respectively.
@@ -37,7 +37,7 @@
  * 1000-way sharded output.
  *
  * <p> A shard name template is typically provided along with a name prefix
- * and suffix, which allows constructing complex paths which have embedded
+ * and suffix, which allows constructing complex paths that have embedded
  * shard information.  For example, outputs in the form
  * "gs://bucket/path-01-of-99.txt" could be constructed by providing the
  * individual components:
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
index cc53ab1b2e609..7d0dfaa39b3cf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
@@ -89,7 +89,7 @@ public Reader<T> createReader(
   public abstract Coder<T> getDefaultOutputCoder();
 
   /**
-   * The interface which readers of custom input sources must implement.
+   * The interface that readers of custom input sources must implement.
    * <p>
    * This interface is deliberately distinct from {@link java.util.Iterator} because
    * the current model tends to be easier to program and more efficient in practice
@@ -125,7 +125,7 @@ public interface Reader<T> extends AutoCloseable {
     public boolean advance() throws IOException;
 
     /**
-     * Returns the value of the data item which was read by the last {@link #start} or
+     * Returns the value of the data item that was read by the last {@link #start} or
      * {@link #advance} call.
      *
      * @throws java.util.NoSuchElementException if the reader is at the beginning of the input and
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/package-info.java
index ffc3ebce20a0d..de0bd86094983 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/package-info.java
@@ -20,14 +20,14 @@
  * {@link com.google.cloud.dataflow.sdk.io.BigQueryIO}, and
  * {@link com.google.cloud.dataflow.sdk.io.TextIO}.
  *
- * <p>The classes in this package provide {@code Read} transforms which create PCollections
+ * <p>The classes in this package provide {@code Read} transforms that create PCollections
  * from existing storage:
  * <pre>{@code
  * PCollection<TableRow> inputData = pipeline.apply(
  *     BigQueryIO.Read.named("Read")
  *                    .from("clouddataflow-readonly:samples.weather_stations");
  * }</pre>
- * and {@code Write} transforms which persist PCollections to external storage:
+ * and {@code Write} transforms that persist PCollections to external storage:
  * <pre> {@code
  * PCollection<Integer> numbers = ...;
  * numbers.apply(TextIO.Write.named("WriteNumbers")
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java
index d6d7db8cdf9fb..7fa19144e571d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java
@@ -23,10 +23,10 @@ public interface ApplicationNameOptions extends PipelineOptions {
   /**
    * Name of application, for display purposes.
    * <p>
-   * Defaults to the name of the class which constructs the {@link PipelineOptions}
+   * Defaults to the name of the class that constructs the {@link PipelineOptions}
    * via the {@link PipelineOptionsFactory}.
    */
-  @Description("Name of application for display purposes. Defaults to the name of the class which "
+  @Description("Name of application for display purposes. Defaults to the name of the class that "
       + "constructs the PipelineOptions via the PipelineOptionsFactory.")
   String getAppName();
   void setAppName(String value);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BigQueryOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BigQueryOptions.java
index d240d1c4b4076..ed4eb24bacb1f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BigQueryOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BigQueryOptions.java
@@ -19,7 +19,7 @@
 /**
  * Properties needed when using BigQuery with the Dataflow SDK.
  */
-@Description("Options which are used to configure BigQuery. See "
+@Description("Options that are used to configure BigQuery. See "
     + "https://cloud.google.com/bigquery/what-is-bigquery for details on BigQuery.")
 public interface BigQueryOptions extends ApplicationNameOptions, GcpOptions,
     PipelineOptions, StreamingOptions {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BlockingDataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BlockingDataflowPipelineOptions.java
index d65f38c6c6b58..43a46b029ce33 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BlockingDataflowPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BlockingDataflowPipelineOptions.java
@@ -23,7 +23,7 @@
 import java.io.PrintStream;
 
 /**
- * Options which are used to configure the {@link BlockingDataflowPipelineRunner}.
+ * Options that are used to configure the {@link BlockingDataflowPipelineRunner}.
  */
 @Description("Configure options on the BlockingDataflowPipelineRunner.")
 public interface BlockingDataflowPipelineOptions extends DataflowPipelineOptions {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
index 937af6cf40bf7..34a418a136136 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
@@ -103,13 +103,13 @@ public interface DataflowPipelineDebugOptions extends PipelineOptions {
 
   /**
    * The path validator instance that should be created and used to validate paths.
-   * If no path validator has been set explicitly, the default is to use the instance factory which
+   * If no path validator has been set explicitly, the default is to use the instance factory that
    * constructs a path validator based upon the currently set pathValidatorClass.
    */
   @JsonIgnore
   @Description("The path validator instance that should be created and used to validate paths. "
       + "If no path validator has been set explicitly, the default is to use the instance factory "
-      + "which constructs a path validator based upon the currently set pathValidatorClass.")
+      + "that constructs a path validator based upon the currently set pathValidatorClass.")
   @Default.InstanceFactory(PathValidatorFactory.class)
   PathValidator getPathValidator();
   void setPathValidator(PathValidator validator);
@@ -128,12 +128,12 @@ public interface DataflowPipelineDebugOptions extends PipelineOptions {
   /**
    * The resource stager instance that should be created and used to stage resources.
    * If no stager has been set explicitly, the default is to use the instance factory
-   * which constructs a resource stager based upon the currently set stagerClass.
+   * that constructs a resource stager based upon the currently set stagerClass.
    */
   @JsonIgnore
   @Description("The resource stager instance that should be created and used to stage resources. "
       + "If no stager has been set explicitly, the default is to use the instance factory "
-      + "which constructs a resource stager based upon the currently set stagerClass.")
+      + "that constructs a resource stager based upon the currently set stagerClass.")
   @Default.InstanceFactory(StagerFactory.class)
   Stager getStager();
   void setStager(Stager stager);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
index bfcc47c1a8912..7792c91688b9e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
@@ -25,9 +25,9 @@
 import org.joda.time.format.DateTimeFormatter;
 
 /**
- * Options which can be used to configure the {@link DataflowPipeline}.
+ * Options that can be used to configure the {@link DataflowPipeline}.
  */
-@Description("Options which configure the Dataflow pipeline.")
+@Description("Options that configure the Dataflow pipeline.")
 public interface DataflowPipelineOptions extends
     PipelineOptions, GcpOptions, ApplicationNameOptions, DataflowPipelineDebugOptions,
     DataflowPipelineWorkerPoolOptions, BigQueryOptions,
@@ -67,11 +67,11 @@ public interface DataflowPipelineOptions extends
 
   /**
    * The Dataflow job name is used as an idempotence key within the Dataflow service.
-   * If there is an existing job which is currently active, another active job with the same
+   * If there is an existing job that is currently active, another active job with the same
    * name will not be able to be created. Defaults to using the ApplicationName-UserName-Date.
    */
   @Description("The Dataflow job name is used as an idempotence key within the Dataflow service. "
-      + "If there is an existing job which is currently active, another active job with the same "
+      + "If there is an existing job that is currently active, another active job with the same "
       + "name will not be able to be created. Defaults to using the ApplicationName-UserName-Date.")
   @Default.InstanceFactory(JobNameFactory.class)
   String getJobName();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index af1f7667177c0..ed1d65d3400e6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -19,9 +19,9 @@
 import java.util.List;
 
 /**
- * Options which are used to configure the Dataflow pipeline worker pool.
+ * Options that are used to configure the Dataflow pipeline worker pool.
  */
-@Description("Options which are used to configure the Dataflow pipeline worker pool.")
+@Description("Options that are used to configure the Dataflow pipeline worker pool.")
 public interface DataflowPipelineWorkerPoolOptions extends PipelineOptions {
   /**
    * Disk source image to use by VMs for jobs.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java
index 6f69db26f96c1..cac16dbf548f2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java
@@ -17,10 +17,10 @@
 package com.google.cloud.dataflow.sdk.options;
 
 /**
- * Options which are used exclusively within the Dataflow worker harness.
+ * Options that are used exclusively within the Dataflow worker harness.
  * These options have no effect at pipeline creation time.
  */
-@Description("[Internal] Options which are used exclusively within the Dataflow worker harness. "
+@Description("[Internal] Options that are used exclusively within the Dataflow worker harness. "
     + "These options have no effect at pipeline creation time.")
 @Hidden
 public interface DataflowWorkerHarnessOptions extends DataflowPipelineOptions {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
index 2ba61c6ce42b0..c0a221781190e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
@@ -23,12 +23,12 @@
 import java.util.Arrays;
 
 /**
- * Options which are used to control logging configuration on the Dataflow worker.
+ * Options that are used to control logging configuration on the Dataflow worker.
  */
-@Description("Options which are used to control logging configuration on the Dataflow worker.")
+@Description("Options that are used to control logging configuration on the Dataflow worker.")
 public interface DataflowWorkerLoggingOptions extends PipelineOptions {
   /**
-   * The set of log levels which can be used on the Dataflow worker.
+   * The set of log levels that can be used on the Dataflow worker.
    */
   public enum Level {
     DEBUG, ERROR, INFO, TRACE, WARN
@@ -53,8 +53,8 @@ public enum Level {
    * {@link WorkerLogLevelOverride#create(String)}.
    */
   @Description("This option controls the log levels for specifically named loggers. "
-      + "The expected format is Name#Level. The Dataflow worker uses java.util.logging which "
-      + "supports a logging hierarchy based off of names which are \".\" separated. "
+      + "The expected format is Name#Level. The Dataflow worker uses java.util.logging, which "
+      + "supports a logging hierarchy based off of names that are \".\" separated. "
       + "For example, by specifying the value \"a.b.c.Foo#DEBUG\", the logger for the class "
       + "\"a.b.c.Foo\" will be configured to output logs at the DEBUG level. Similarly, "
       + "by specifying the value \"a.b.c#WARN\", all loggers underneath the \"a.b.c\" package "
@@ -69,7 +69,7 @@ public enum Level {
    * Defines a log level override for a specific class, package, or name.
    * <p>
    * {@link java.util.logging} is used on the Dataflow worker harness and supports
-   * a logging hierarchy based off of names which are "." separated. It is a common
+   * a logging hierarchy based off of names that are "." separated. It is a common
    * pattern to have the logger for a given class share the same name as the class itself.
    * Given the classes {@code a.b.c.Foo}, {@code a.b.c.Xyz}, and {@code a.b.Bar}, with
    * loggers named {@code "a.b.c.Foo"}, {@code "a.b.c.Xyz"}, and {@code "a.b.Bar"} respectively,
@@ -113,8 +113,8 @@ public static WorkerLogLevelOverride forPackage(Package pkg, Level level) {
      * Overrides the default log level for the passed in name.
      * <p>
      * Note that because of the hierarchical nature of logger names, this will
-     * override the log level of all loggers which have the passed in name or
-     * a parent logger which has the passed in name.
+     * override the log level of all loggers that have the passed in name or
+     * a parent logger that has the passed in name.
      */
     public static WorkerLogLevelOverride forName(String name, Level level) {
       Preconditions.checkNotNull(name, "Expected name to be not null.");
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java
index f8cb64cecbb76..22463ec9f9d6b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java
@@ -22,7 +22,7 @@
 import java.lang.annotation.Target;
 
 /**
- * {@link Default} represents a set of annotations which can be used to annotate getter properties
+ * {@link Default} represents a set of annotations that can be used to annotate getter properties
  * on {@link PipelineOptions} with information representing the default value to be returned
  * if no value is specified.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java
index 2d61d20ceb2c7..d02f00f5e51bf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java
@@ -17,7 +17,7 @@
 package com.google.cloud.dataflow.sdk.options;
 
 /**
- * An interface used with {@link Default.InstanceFactory} annotation to specify the class which will
+ * An interface used with {@link Default.InstanceFactory} annotation to specify the class that will
  * be an instance factory to produce default values for a given getter on {@link PipelineOptions}.
  * When a property on a {@link PipelineOptions} is fetched, and is currently unset, the default
  * value factory will be instantiated and invoked.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java
index eaf7aac7d1fa6..d8c473a518f14 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java
@@ -23,7 +23,7 @@
 
 /**
  * Descriptions are used to generate human readable output when the --help
- * command is specified. Description annotations placed on interfaces which extend
+ * command is specified. Description annotations placed on interfaces that extend
  * {@link PipelineOptions} to describe groups of related options. Description annotations
  * placed on getter methods will be used to provide human readable information
  * for the specific option.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java
index 1044162f1b94c..a974fbbc0216b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java
@@ -19,7 +19,7 @@
 import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
 
 /**
- * Options which can be used to configure the {@link DirectPipeline}.
+ * Options that can be used to configure the {@link DirectPipeline}.
  */
 public interface DirectPipelineOptions extends
     ApplicationNameOptions, BigQueryOptions, GcsOptions, GcpOptions,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
index 7da5277e48777..5494399917bd0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
@@ -163,12 +163,12 @@ void setCredentialFactoryClass(
   /**
    * The credential instance that should be used to authenticate against GCP services.
    * If no credential has been set explicitly, the default is to use the instance factory
-   * which constructs a credential based upon the currently set credentialFactoryClass.
+   * that constructs a credential based upon the currently set credentialFactoryClass.
    */
   @JsonIgnore
   @Description("The credential instance that should be used to authenticate against GCP services. "
       + "If no credential has been set explicitly, the default is to use the instance factory "
-      + "which constructs a credential based upon the currently set credentialFactoryClass.")
+      + "that constructs a credential based upon the currently set credentialFactoryClass.")
   @Default.InstanceFactory(GcpUserCredentialsFactory.class)
   @Hidden
   Credential getGcpCredential();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
index 9c52d5a3f97d7..cedc6d8fa6261 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
@@ -45,14 +45,14 @@ public interface GcsOptions extends
 
   /**
    * The ExecutorService instance to use to create threads, can be overridden to specify an
-   * ExecutorService which is compatible with the users environment. If unset, the
-   * default is to create an ExecutorService with an unbounded number of threads which
+   * ExecutorService that is compatible with the users environment. If unset, the
+   * default is to create an ExecutorService with an unbounded number of threads; this
    * is compatible with Google AppEngine.
    */
   @JsonIgnore
-  @Description("The ExecutorService instance to use to create multiple threads. can be overridden "
-      + "to specify an ExecutorService which is compatible with the users environment. If unset, "
-      + "the default is to create an ExecutorService with an unbounded number of threads which "
+  @Description("The ExecutorService instance to use to create multiple threads. Can be overridden "
+      + "to specify an ExecutorService that is compatible with the users environment. If unset, "
+      + "the default is to create an ExecutorService with an unbounded number of threads; this "
       + "is compatible with Google AppEngine.")
   @Default.InstanceFactory(ExecutorServiceFactory.class)
   @Hidden
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
index 3af9b000aebf0..12654954e07d3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
@@ -49,28 +49,28 @@ public interface GoogleApiDebugOptions extends PipelineOptions {
   void setGoogleApiTrace(GoogleApiTracer... commands);
 
   /**
-   * A {@link GoogleClientRequestInitializer} which adds the 'trace' token to Google API calls.
+   * A {@link GoogleClientRequestInitializer} that adds the 'trace' token to Google API calls.
    */
   public static class GoogleApiTracer implements GoogleClientRequestInitializer {
     private static final Pattern COMMAND_LINE_PATTERN = Pattern.compile("([^#]*)#(.*)");
     /**
-     * Creates a {@link GoogleApiTracer} which sets the trace {@code token} on all
-     * calls which match the given client type.
+     * Creates a {@link GoogleApiTracer} that sets the trace {@code token} on all
+     * calls that match the given client type.
      */
     public static GoogleApiTracer create(AbstractGoogleClient client, String token) {
       return new GoogleApiTracer(client.getClass().getCanonicalName(), token);
     }
 
     /**
-     * Creates a {@link GoogleApiTracer} which sets the trace {@code token} on all
-     * calls which match for the given request type.
+     * Creates a {@link GoogleApiTracer} that sets the trace {@code token} on all
+     * calls that match for the given request type.
      */
     public static GoogleApiTracer create(AbstractGoogleClientRequest<?> request, String token) {
       return new GoogleApiTracer(request.getClass().getCanonicalName(), token);
     }
 
     /**
-     * Creates a {@link GoogleClientRequestInitializer} which adds the trace token
+     * Creates a {@link GoogleClientRequestInitializer} that adds the trace token
      * based upon the passed in value.
      * <p>
      * The {@code value} represents a string containing {@code ApiName#TracingToken}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
index 9e75875ebfa0b..1d36e8e6a1906 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
@@ -31,7 +31,7 @@
  * <p>
  * Serialization
  * <p>
- * For runners which execute their work remotely, every property available within PipelineOptions
+ * For runners that execute their work remotely, every property available within PipelineOptions
  * must either be serializable using Jackson's {@link ObjectMapper} or the getter method for the
  * property annotated with {@link JsonIgnore @JsonIgnore}.
  * <p>
@@ -56,7 +56,7 @@ public interface PipelineOptions {
   <T extends PipelineOptions> T as(Class<T> kls);
 
   @Validation.Required
-  @Description("The pipeline runner which will be used to execute the pipeline. "
+  @Description("The pipeline runner that will be used to execute the pipeline. "
       + "For registered runners, the class name can be specified, otherwise the fully"
       + "qualified name needs to be specified.")
   @Default.Class(DirectPipelineRunner.class)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 13c68cbb49389..52fbb54ae253d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -76,7 +76,7 @@
 import java.util.TreeSet;
 
 /**
- * Constructs a {@link PipelineOptions} or any derived interface which is composable to any other
+ * Constructs a {@link PipelineOptions} or any derived interface that is composable to any other
  * derived interface of {@link PipelineOptions} via the {@link PipelineOptions#as} method. Being
  * able to compose one derived interface of {@link PipelineOptions} to another has the following
  * restrictions:
@@ -97,25 +97,25 @@
 public class PipelineOptionsFactory {
 
   /**
-   * Creates and returns an object which implements {@link PipelineOptions}.
+   * Creates and returns an object that implements {@link PipelineOptions}.
    * This sets the {@link ApplicationNameOptions#getAppName() "appName"} to the calling
    * {@link Class#getSimpleName() classes simple name}.
    *
-   * @return An object which implements {@link PipelineOptions}.
+   * @return An object that implements {@link PipelineOptions}.
    */
   public static PipelineOptions create() {
     return new Builder().as(PipelineOptions.class);
   }
 
   /**
-   * Creates and returns an object which implements {@code <T>}.
+   * Creates and returns an object that implements {@code <T>}.
    * This sets the {@link ApplicationNameOptions#getAppName() "appName"} to the calling
    * {@link Class#getSimpleName() classes simple name}.
    *
    * <p> Note that {@code <T>} must be composable with every registered interface with this factory.
    * See {@link PipelineOptionsFactory#validateWellFormed(Class, Set)} for more details.
    *
-   * @return An object which implements {@code <T>}.
+   * @return An object that implements {@code <T>}.
    */
   public static <T extends PipelineOptions> T as(Class<T> klass) {
     return new Builder().as(klass);
@@ -243,23 +243,23 @@ public Builder withoutStrictParsing() {
     }
 
     /**
-     * Creates and returns an object which implements {@link PipelineOptions} using the values
+     * Creates and returns an object that implements {@link PipelineOptions} using the values
      * configured on this builder during construction.
      *
-     * @return An object which implements {@link PipelineOptions}.
+     * @return An object that implements {@link PipelineOptions}.
      */
     public PipelineOptions create() {
       return as(PipelineOptions.class);
     }
 
     /**
-     * Creates and returns an object which implements {@code <T>} using the values configured on
+     * Creates and returns an object that implements {@code <T>} using the values configured on
      * this builder during construction.
      * <p>
      * Note that {@code <T>} must be composable with every registered interface with this factory.
      * See {@link PipelineOptionsFactory#validateWellFormed(Class, Set)} for more details.
      *
-     * @return An object which implements {@code <T>}.
+     * @return An object that implements {@code <T>}.
      */
     public <T extends PipelineOptions> T as(Class<T> klass) {
       Map<String, Object> initialOptions = Maps.newHashMap();
@@ -369,7 +369,7 @@ private static String findCallersClassName() {
         break;
       }
     }
-    // Then find the first instance after which is not the PipelineOptionsFactory/Builder class.
+    // Then find the first instance after that is not the PipelineOptionsFactory/Builder class.
     while (elements.hasNext()) {
       StackTraceElement next = elements.next();
       if (!PIPELINE_OPTIONS_FACTORY_CLASSES.contains(next.getClassName())) {
@@ -414,15 +414,15 @@ Class<T> getProxyClass() {
   private static final ObjectMapper MAPPER = new ObjectMapper();
   private static final Map<String, Class<? extends PipelineRunner<?>>> SUPPORTED_PIPELINE_RUNNERS;
 
-  /** Classes which are used as the boundary in the stack trace to find the callers class name. */
+  /** Classes that are used as the boundary in the stack trace to find the callers class name. */
   private static final Set<String> PIPELINE_OPTIONS_FACTORY_CLASSES = ImmutableSet.of(
       PipelineOptionsFactory.class.getName(),
       Builder.class.getName());
 
-  /** Methods which are ignored when validating the proxy class. */
+  /** Methods that are ignored when validating the proxy class. */
   private static final Set<Method> IGNORED_METHODS;
 
-  /** The set of options which have been registered and visible to the user. */
+  /** The set of options that have been registered and visible to the user. */
   private static final Set<Class<? extends PipelineOptions>> REGISTERED_OPTIONS =
       Sets.newConcurrentHashSet();
 
@@ -438,9 +438,10 @@ Class<T> getProxyClass() {
   private static final int TERMINAL_WIDTH = 80;
 
   /**
-   * Finds the appropriate {@code ClassLoader} to be used by the {@link ServiceLoader#load} call,
-   * which by default would use the context {@code ClassLoader} which can be null. The fallback is
-   * as follow: context ClassLoader, class ClassLoader and finaly the system ClassLoader.
+   * Finds the appropriate {@code ClassLoader} to be used by the
+   * {@link ServiceLoader#load} call, which by default would use the context
+   * {@code ClassLoader}, which can be null. The fallback is as follow: context
+   * ClassLoader, class ClassLoader and finaly the system ClassLoader.
    */
   static ClassLoader findClassLoader() {
     ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
@@ -637,13 +638,13 @@ public static void printHelp(PrintStream out, Class<? extends PipelineOptions> i
     Iterable<Method> methods = getClosureOfMethodsOnInterface(iface);
     ListMultimap<Class<?>, Method> ifaceToMethods = ArrayListMultimap.create();
     for (Method method : methods) {
-      // Process only methods which are not marked as hidden.
+      // Process only methods that are not marked as hidden.
       if (method.getAnnotation(Hidden.class) == null) {
         ifaceToMethods.put(method.getDeclaringClass(), method);
       }
     }
     SortedSet<Class<?>> ifaces = new TreeSet<>(ClassNameComparator.INSTANCE);
-    // Keep interfaces which are not marked as hidden.
+    // Keep interfaces that are not marked as hidden.
     ifaces.addAll(Collections2.filter(ifaceToMethods.keySet(), new Predicate<Class<?>>() {
       @Override
       public boolean apply(Class<?> input) {
@@ -830,7 +831,7 @@ public static DataflowWorkerHarnessOptions createFromSystemProperties() throws I
    * Returns all the methods visible from the provided interfaces.
    *
    * @param interfaces The interfaces to use when searching for all their methods.
-   * @return An iterable of {@link Method}s which interfaces expose.
+   * @return An iterable of {@link Method}s that interfaces expose.
    */
   static Iterable<Method> getClosureOfMethodsOnInterfaces(
       Iterable<Class<? extends PipelineOptions>> interfaces) {
@@ -847,7 +848,7 @@ public Iterable<Method> apply(Class<? extends PipelineOptions> input) {
    * Returns all the methods visible from {@code iface}.
    *
    * @param iface The interface to use when searching for all its methods.
-   * @return An iterable of {@link Method}s which {@code iface} exposes.
+   * @return An iterable of {@link Method}s that {@code iface} exposes.
    */
   static Iterable<Method> getClosureOfMethodsOnInterface(Class<? extends PipelineOptions> iface) {
     Preconditions.checkNotNull(iface);
@@ -1060,7 +1061,7 @@ private static void validateClass(Class<? extends PipelineOptions> iface,
         iface.getName());
   }
 
-  /** A {@link Comparator} which uses the classes name to compare them. */
+  /** A {@link Comparator} that uses the classes name to compare them. */
   private static class ClassNameComparator implements Comparator<Class<?>> {
     static final ClassNameComparator INSTANCE = new ClassNameComparator();
     @Override
@@ -1069,7 +1070,7 @@ public int compare(Class<?> o1, Class<?> o2) {
     }
   }
 
-  /** A {@link Comparator} which uses the object's classes canonical name to compare them. */
+  /** A {@link Comparator} that uses the object's classes canonical name to compare them. */
   private static class ObjectsClassComparator implements Comparator<Object> {
     static final ObjectsClassComparator INSTANCE = new ObjectsClassComparator();
     @Override
@@ -1078,7 +1079,7 @@ public int compare(Object o1, Object o2) {
     }
   }
 
-  /** A {@link Comparator} which uses the generic method signature to sort them. */
+  /** A {@link Comparator} that uses the generic method signature to sort them. */
   private static class MethodComparator implements Comparator<Method> {
     static final MethodComparator INSTANCE = new MethodComparator();
     @Override
@@ -1087,7 +1088,7 @@ public int compare(Method o1, Method o2) {
     }
   }
 
-  /** A {@link Function} which gets the methods return type. */
+  /** A {@link Function} that gets the method's return type. */
   private static class ReturnTypeFetchingFunction implements Function<Method, Class<?>> {
     static final ReturnTypeFetchingFunction INSTANCE = new ReturnTypeFetchingFunction();
     @Override
@@ -1105,7 +1106,7 @@ public Class<?> apply(Method input) {
     }
   }
 
-  /** An {@link Equivalence} which considers two methods equivalent if they share the same name. */
+  /** An {@link Equivalence} that considers two methods equivalent if they share the same name. */
   private static class MethodNameEquivalence extends Equivalence<Method> {
     static final MethodNameEquivalence INSTANCE = new MethodNameEquivalence();
     @Override
@@ -1120,7 +1121,7 @@ protected int doHash(Method t) {
   }
 
   /**
-   * A {@link Predicate} which returns true if the method is annotated with
+   * A {@link Predicate} that returns true if the method is annotated with
    * {@link JsonIgnore @JsonIgnore}.
    */
   static class JsonIgnorePredicate implements Predicate<Method> {
@@ -1187,7 +1188,7 @@ private static ListMultimap<String, String> parseCommandLine(
 
   /**
    * Using the parsed string arguments, we convert the strings to the expected
-   * return type of the methods which are found on the passed in class.
+   * return type of the methods that are found on the passed-in class.
    * <p>
    * For any return type that is expected to be an array or a collection, we further
    * split up each string on ','.
@@ -1195,7 +1196,7 @@ private static ListMultimap<String, String> parseCommandLine(
    * We special case the "runner" option. It is mapped to the class of the {@link PipelineRunner}
    * based off of the {@link PipelineRunner}s simple class name.
    * <p>
-   * If strict parsing is enabled, unknown options or options which can not be converted to
+   * If strict parsing is enabled, unknown options or options that cannot be converted to
    * the expected java type using an {@link ObjectMapper} will be ignored.
    */
   private static <T extends PipelineOptions> Map<String, Object> parseObjects(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
index e30fc0fe46176..252df5adc0382 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
@@ -134,8 +134,8 @@ public Object invoke(Object proxy, Method method, Object[] args) {
   /**
    * Backing implementation for {@link PipelineOptions#as(Class)}.
    *
-   * @param iface The interface which the returned object needs to implement.
-   * @return An object which implements the interface <T>.
+   * @param iface The interface that the returned object needs to implement.
+   * @return An object that implements the interface <T>.
    */
   synchronized <T extends PipelineOptions> T as(Class<T> iface) {
     Preconditions.checkNotNull(iface);
@@ -209,7 +209,7 @@ public synchronized String toString() {
    * Uses a Jackson {@link ObjectMapper} to attempt type conversion.
    *
    * @param method The method whose return type you would like to return.
-   * @param propertyName The name of the property which is being returned.
+   * @param propertyName The name of the property that is being returned.
    * @return An object matching the return type of the method passed in.
    */
   private Object getValueFromJson(String propertyName, Method method) {
@@ -229,7 +229,7 @@ private Object getValueFromJson(String propertyName, Method method) {
    * per the Java Language Specification for the expected return type is returned.
    *
    * @param proxy The proxy object for which we are attempting to get the default.
-   * @param method The getter method which was invoked.
+   * @param method The getter method that was invoked.
    * @return The default value from an {@link Default} annotation if present, otherwise a default
    *         value as per the Java Language Specification.
    */
@@ -331,7 +331,7 @@ public void serialize(PipelineOptions value, JsonGenerator jgen, SerializerProvi
      */
     private void removeIgnoredOptions(
         Set<Class<? extends PipelineOptions>> interfaces, Map<String, Object> options) {
-      // Find all the method names which are annotated with JSON ignore.
+      // Find all the method names that are annotated with JSON ignore.
       Set<String> jsonIgnoreMethodNames = FluentIterable.from(
           PipelineOptionsFactory.getClosureOfMethodsOnInterfaces(interfaces))
           .filter(JsonIgnorePredicate.INSTANCE).transform(new Function<Method, String>() {
@@ -341,7 +341,7 @@ public String apply(Method input) {
             }
           }).toSet();
 
-      // Remove all options which have the same method name as the descriptor.
+      // Remove all options that have the same method name as the descriptor.
       for (PropertyDescriptor descriptor
           : PipelineOptionsFactory.getPropertyDescriptors(interfaces)) {
         if (jsonIgnoreMethodNames.contains(descriptor.getReadMethod().getName())) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java
index e426d4b667bbf..2841ad2367abc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java
@@ -22,7 +22,7 @@
 import java.lang.annotation.Target;
 
 /**
- * {@link Validation} represents a set of annotations which can be used to annotate getter
+ * {@link Validation} represents a set of annotations that can be used to annotate getter
  * properties on {@link PipelineOptions} with information representing the validation criteria to
  * be used when validating with the {@link PipelineOptionsValidator}.
  */
@@ -30,7 +30,7 @@
 public @interface Validation {
   /**
    * This criteria specifies that the value must be not null. Note that this annotation
-   * should only be applied to methods which return nullable objects.
+   * should only be applied to methods that return nullable objects.
    */
   @Target(value = ElementType.METHOD)
   @Retention(RetentionPolicy.RUNTIME)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java
index c836331e9ae44..73eeedff24ecb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java
@@ -20,7 +20,7 @@
  * {@link com.google.cloud.dataflow.sdk.Pipeline}s.
  *
  * <p> To use the Google Cloud Dataflow SDK, you build a
- * {@link com.google.cloud.dataflow.sdk.Pipeline} which manages a graph of
+ * {@link com.google.cloud.dataflow.sdk.Pipeline}, which manages a graph of
  * {@link com.google.cloud.dataflow.sdk.transforms.PTransform}s
  * and the {@link com.google.cloud.dataflow.sdk.values.PCollection}s that
  * the PTransforms consume and produce.
@@ -31,5 +31,3 @@
  *
  */
 package com.google.cloud.dataflow.sdk;
-
-
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
index d06f65dac1cb5..3ec200bb9be34 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
@@ -115,7 +115,7 @@ public DataflowPipelineJobState run(Pipeline p) {
       return new DataflowPipelineJobState(result);
     }
 
-    // TODO: introduce an exception which can wrap a JobState,
+    // TODO: introduce an exception that can wrap a JobState,
     // so that detailed error information can be retrieved.
     throw new RuntimeException("Job failed with state " + result);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 67d5df0973d0f..acd92d348861c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -85,7 +85,7 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
   /**
    * Construct a runner from the provided options.
    *
-   * @param options Properties which configure the runner.
+   * @param options Properties that configure the runner.
    * @return The newly created runner.
    */
   public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
@@ -186,7 +186,7 @@ public DataflowPipelineJob run(Pipeline pipeline) {
     Map<String, Object> environmentVersion = new HashMap<>();
     environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_MAJOR_KEY, ENVIRONMENT_MAJOR_VERSION);
     newJob.getEnvironment().setVersion(environmentVersion);
-    // Default jobType is DATA_PARALLEL which is for java batch.
+    // Default jobType is DATA_PARALLEL, which is for java batch.
     String jobType = "DATA_PARALLEL";
 
     if (options.isStreaming()) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 1b88465bc988d..fe4c0b16f7cd0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -126,7 +126,7 @@ public class DataflowPipelineTranslator {
   /**
    * Constructs a translator from the provided options.
    *
-   * @param options Properties which configure the translator.
+   * @param options Properties that configure the translator.
    *
    * @return The newly created translator.
    */
@@ -245,12 +245,12 @@ public interface TranslationContext {
     public void addInput(String name, PInput value);
 
     /**
-     * Adds an input which is a dictionary of strings to objects.
+     * Adds an input that is a dictionary of strings to objects.
      */
     public void addInput(String name, Map<String, Object> elements);
 
     /**
-     * Adds an input which is a list of objects.
+     * Adds an input that is a list of objects.
      */
     public void addInput(String name, List<? extends Map<String, Object>> elements);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java
index dda033a27438c..6826fc125d228 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java
@@ -24,7 +24,7 @@
 import java.util.List;
 
 /**
- * Provides a simple PipelineVisitor which records the transformation tree.
+ * Provides a simple PipelineVisitor that records the transformation tree.
  *
  * <p> Provided for internal unit tests.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
index 18f8f3b55dfa0..fc88cc4374769 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
@@ -64,8 +64,8 @@ public class TransformTreeNode {
   /**
    * Creates a new TransformTreeNode with the given parent and transform.
    *
-   * <p> EnclosingNode and transform may both be null for a root-level node
-   * which holds all other nodes.
+   * <p> EnclosingNode and transform may both be null for
+   * a root-level node, which holds all other nodes.
    *
    * @param enclosingNode the composite node containing this node
    * @param transform the PTransform tracked by this node
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
index 4e073ed73a214..d3e31ef5e2293 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
@@ -31,7 +31,7 @@
 /**
  * A source that reads a BigQuery table and yields TableRow objects.
  *
- * <p>The source is a wrapper over the {@code BigQueryTableRowIterator} class which issues a
+ * <p>The source is a wrapper over the {@code BigQueryTableRowIterator} class, which issues a
  * query for all rows of a table and then iterates over the result. There is no support for
  * progress reporting because the source is used only in situations where the entire table must be
  * read by each worker (i.e. the source is used as a side input).
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index 4510206ef8202..313bc971b53be 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -66,7 +66,7 @@
  * Returns status code 0 on successful completion, 1 on any uncaught failures.
  * <p>
  * TODO: add support for VM initialization via config.
- * During initialization, we should take a configuration which specifies
+ * During initialization, we should take a configuration that specifies
  * an initialization function, allowing user code to run on VM startup.
  */
 public class DataflowWorkerHarness {
@@ -119,7 +119,7 @@ public static void main(String[] args) throws Exception {
   }
 
   /**
-   * A thread which repeatedly fetches and processes work units from the Dataflow service.
+   * A thread that repeatedly fetches and processes work units from the Dataflow service.
    */
   private static class WorkerThread implements Callable<Boolean> {
     // sleeper is used to sleep the appropriate amount of time
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
index 97d3ad36d4a6b..96af0b2e37a76 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
@@ -225,7 +225,7 @@ public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest)
         return null;
       }
       if (splitOffset <= offset) {
-        LOG.info("Already progressed to offset {} which is after the requested split offset {}",
+        LOG.info("Already progressed to offset {}, which is after the requested split offset {}",
             offset, splitOffset);
         return null;
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index b3a07fe18b06a..e6112957811c6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -116,14 +116,14 @@ final ReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> iterator(ShuffleEntryR
    * <p>A key limitation of this implementation is that all iterator accesses
    * must by externally synchronized (the iterator objects are not individually
    * thread-safe, and the iterators derived from a single original iterator
-   * access shared state which is not thread-safe).
+   * access shared state that is not thread-safe).
    *
    * <p>To access the current position, the iterator must advance
    * on-demand and cache the next batch of key grouped shuffle
    * entries. The iterator does not advance a second time in @next()
    * to avoid asking the underlying iterator to advance to the next
    * key before the caller/user iterates over the values corresponding
-   * to the current key -- which would introduce a performance
+   * to the current key, which would introduce a performance
    * penalty.
    */
   private final class GroupingShuffleReaderIterator
@@ -244,8 +244,8 @@ public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest)
       ByteArrayShufflePosition newStopPosition =
           ByteArrayShufflePosition.fromBase64(splitShufflePosition);
       if (newStopPosition.compareTo(promisedPosition) <= 0) {
-        LOG.info("Already progressed to promised shuffle position {} "
-            + "which is after the requested split shuffle position {}",
+        LOG.info("Already progressed to promised shuffle position {}, which is "
+            + "after the requested split shuffle position {}",
             promisedPosition.encodeBase64(), splitShufflePosition);
         return null;
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
index 8d48230f4b45f..8362dca797412 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
@@ -142,7 +142,7 @@ public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest)
         return null;
       }
       if (splitIndex <= index) {
-        LOG.info("Already progressed to index {} which is after the requested split index {}",
+        LOG.info("Already progressed to index {}, which is after the requested split index {}",
             index, splitIndex);
         return null;
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java
index b25fd145fbb3c..edd64757f4474 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java
@@ -29,12 +29,14 @@
  * lexicographically compared to yield the same comparison value that
  * would have been generated if the encoded items had been compared
  * one by one according to their type.
- *
+ * <p>
  * More precisely, suppose:
- *  1. byte array A is generated by encoding the sequence of items [A_1..A_n]
- *  2. byte array B is generated by encoding the sequence of items [B_1..B_n]
- *  3. The types match; i.e., for all i: A_i was encoded using
+ * <ol>
+ *  <li> byte array A is generated by encoding the sequence of items [A_1..A_n]
+ *  <li> byte array B is generated by encoding the sequence of items [B_1..B_n]
+ *  <li> The types match; i.e., for all i: A_i was encoded using
  *     the same routine as B_i
+ * </ol>
  * Then:
  *    Comparing A vs. B lexicographically is the same as comparing
  *    the vectors [A_1..A_n] and [B_1..B_n] lexicographically.
@@ -221,7 +223,7 @@ public void writeBytes(byte[] value) {
   /**
    * Encodes the long item, in big-endian format, and appends the result to its
    * internal encoded byte array store.
-   *
+   * <p>
    * Note that the specified long is treated like a uint64, e.g.
    * {@code new OrderedCode().writeNumIncreasing(-1L).getEncodedBytes() &gt;
    * new OrderedCode().writeNumIncreasing(Long.MAX_VALUE).getEncodedBytes()}.
@@ -266,7 +268,7 @@ int getSignedEncodingLength(long n) {
   /**
    * Encodes the long item, in big-endian format, and appends the result to its
    * internal encoded byte array store.
-   *
+   * <p>
    * Note that the specified long is treated like an int64, i.e.
    * {@code new OrderedCode().writeNumIncreasing(-1L).getEncodedBytes() &lt;
    * new OrderedCode().writeNumIncreasing(0L).getEncodedBytes()}.
@@ -314,7 +316,7 @@ public void writeInfinity() {
   /**
    * Appends the byte array item to its internal encoded byte array
    * store. This is used for the last item and is not encoded.  It
-   * also can be used to write a fixed number of bytes which will be
+   * also can be used to write a fixed number of bytes that will be
    * read back using {@link #readBytes(int)}.
    *
    * <p>
@@ -477,7 +479,7 @@ public long readSignedNumIncreasing() {
 
     long xorMask = ((store[firstArrayPosition] & 0x80) == 0) ? ~0L : 0L;
     // Store first byte as an int rather than a (signed) byte -- to avoid
-    // accidental byte-to-int promotion later which would extend the byte's
+    // accidental byte-to-int promotion later, which would extend the byte's
     // sign bit (if any).
     int firstByte =
         (store[firstArrayPosition] & 0xff) ^ (int) (xorMask & 0xff);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
index 9c9ff327569c0..fb37619465543 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
@@ -64,7 +64,7 @@ private static byte[] getNewline() {
   /**
    * For testing only.
    *
-   * <p> Used by simple tests which write to a single unsharded file.
+   * <p> Used by simple tests that write to a single unsharded file.
    */
   public static <V> TextSink<WindowedValue<V>> createForTest(
       String filename,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 6af925fcc38c7..c563714337f02 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -171,7 +171,7 @@ public Coder<T> getCoder() {
       } else {
         throw new IllegalStateException(
             "Attempting to access the coder of an IterableAssert"
-            + " which has not been set yet.");
+            + " that has not been set yet.");
       }
     }
 
@@ -269,7 +269,7 @@ public Coder<T> getCoder() {
       } else {
         throw new IllegalStateException(
             "Attempting to access the coder of a SingletonAssert"
-            + " which has not been set yet.");
+            + " that has not been set yet.");
       }
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
index 45645888e71a0..fdd72724510a9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
@@ -35,7 +35,7 @@
 import java.util.Iterator;
 
 /**
- * A creator of test pipelines which can be used inside of tests that can be
+ * A creator of test pipelines that can be used inside of tests that can be
  * configured to run locally or against the live service.
  *
  * <p> It is recommended to tag hand-selected tests for this purpose using the
@@ -144,7 +144,7 @@ private static Optional<StackTraceElement> findCallersStackTrace() {
         break;
       }
     }
-    // Then find the first instance after which is not the TestPipeline
+    // Then find the first instance after that is not the TestPipeline
     while (elements.hasNext()) {
       StackTraceElement next = elements.next();
       if (!TestPipeline.class.getName().equals(next.getClassName())) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
index ef6983bcefca4..6b49eff4f566b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
@@ -127,7 +127,7 @@ public void merge(Collection<W> toBeMerged, W mergeResult) {
   }
 
   /**
-   * A WindowSet useful for testing WindowFns which simply
+   * A WindowSet useful for testing WindowFns that simply
    * collects the placed elements into multisets.
    */
   private static class TestWindowSet<W extends BoundedWindow, V> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
index 2ce549bb13231..6170c96fef24f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
@@ -26,7 +26,7 @@
  * <p> Aggregators are visible in the monitoring UI, when the pipeline is run
  * using DataflowPipelineRunner or BlockingDataflowPipelineRunner, along with
  * their current value. Aggregators may not become visible until the system
- * begins executing the ParDo transform which created them and/or their initial
+ * begins executing the ParDo transform that created them and/or their initial
  * value is changed.
  *
  * <p> Example:
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
index 80c80b539b34c..de8d2b101f2e2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
@@ -255,7 +255,7 @@ public static class ApproximateQuantilesCombineFn
      *
      * <p> The {@code Comparator} must be {@code Serializable}.
      *
-     * <p> The default error bound is {@code 1 / numQuantiles} which
+     * <p> The default error bound is {@code 1 / numQuantiles}, which
      * holds as long as the number of elements is less than
      * {@link #DEFAULT_MAX_NUM_ELEMENTS}.
      */
@@ -309,7 +309,7 @@ public ApproximateQuantilesCombineFn<T, C> withMaxInputSize(
      *
      * <p> The {@code Comparator} must be {@code Serializable}.
      *
-     * <p> The default error bound is {@code epsilon} which is holds as long
+     * <p> The default error bound is {@code epsilon}, which holds as long
      * as the number of elements is less than {@code maxNumElements}.
      * Specifically, if one considers the input as a sorted list x_1, ..., x_N,
      * then the distance between the each exact quantile x_c and its
@@ -492,7 +492,7 @@ private QuantileBuffer collapse(Iterable<QuantileBuffer> buffers) {
       /**
        * Outputs numQuantiles elements consisting of the minimum, maximum, and
        * numQuantiles - 2 evenly spaced intermediate elements.
-       *
+       * <p>
        * Returns the empty list if no elements have been added.
        */
       @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 5343abbf9497d..dad57b02db23f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -418,7 +418,7 @@ public Coder<VO> getDefaultOutputCoder(
 
     /**
      * Converts this {@code CombineFn} into an equivalent
-     * {@link KeyedCombineFn}, which ignores the keys passed to it and
+     * {@link KeyedCombineFn} that ignores the keys passed to it and
      * combines the values according to this {@code CombineFn}.
      *
      * @param <K> the type of the (ignored) keys
@@ -1103,8 +1103,8 @@ public Coder<VO> getDefaultOutputCoder(
    * {@code CombineFn<VI, VA, VO>}'s output type {@code VO}.
    *
    * <p> See also {@link #perKey}/{@link PerKey Combine.PerKey} and
-   * {@link #groupedValues}/{@link GroupedValues Combine.GroupedValues},
-   * which are useful for combining values associated with each key in
+   * {@link #groupedValues}/{@link GroupedValues Combine.GroupedValues}, which
+   * are useful for combining values associated with each key in
    * a {@code PCollection} of {@code KV}s.
    *
    * @param <VI> type of input values
@@ -1225,8 +1225,8 @@ protected String getKindString() {
    * {@code CombineFn<VI, VA, VO>}'s output type {@code VO}.
    *
    * <p> See also {@link #perKey}/{@link PerKey Combine.PerKey} and
-   * {@link #groupedValues}/{@link GroupedValues Combine.GroupedValues},
-   * which are useful for combining values associated with each key in
+   * {@link #groupedValues}/{@link GroupedValues Combine.GroupedValues}, which
+   * are useful for combining values associated with each key in
    * a {@code PCollection} of {@code KV}s.
    *
    * @param <VI> type of input values
@@ -1604,8 +1604,8 @@ protected String getKindString() {
    *         new Sum.SumIntegerFn()));
    * } </pre>
    *
-   * <p> See also {@link #perKey}/{@link PerKey Combine.PerKey}
-   * which captures the common pattern of "combining by key" in a
+   * <p> See also {@link #perKey}/{@link PerKey Combine.PerKey}, which
+   * captures the common pattern of "combining by key" in a
    * single easy-to-use {@code PTransform}.
    *
    * <p> Combining for different keys can happen in parallel.  Moreover,
@@ -1628,8 +1628,8 @@ protected String getKindString() {
    * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
    * associated with it as the input.
    *
-   * <p> See also {@link #globally}/{@link Globally Combine.Globally},
-   * which combines all the values in a {@code PCollection} into a
+   * <p> See also {@link #globally}/{@link Globally Combine.Globally}, which
+   * combines all the values in a {@code PCollection} into a
    * single value in a {@code PCollection}.
    *
    * @param <K> type of input and output keys
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
index 83a86c1c17eba..ae338f7188e7d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
@@ -52,7 +52,7 @@ public static <T> PerElement<T> perElement() {
 
   /**
    * {@code Count.Globally<T>} takes a {@code PCollection<T>} and returns a
-   * {@code PCollection<Long>} containing a single element which is the total
+   * {@code PCollection<Long>} containing a single element that is the total
    * number of elements in the {@code PCollection}.
    *
    * <p> Example of use:
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
index 8707062ff3ef5..8f90aecfcfdf1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
@@ -73,7 +73,7 @@ public void processElement(ProcessContext c) {
    * } </pre>
    *
    * <p> See also {@link #lessThanEq}, {@link #greaterThanEq},
-   * {@link #greaterThan} which return elements satisfying various
+   * and {@link #greaterThan}, which return elements satisfying various
    * inequalities with the specified value based on the elements'
    * natural ordering.
    *
@@ -106,7 +106,7 @@ public void processElement(ProcessContext c) {
    * } </pre>
    *
    * <p> See also {@link #greaterThanEq}, {@link #lessThan},
-   * {@link #lessThanEq} which return elements satisfying various
+   * and {@link #lessThanEq}, which return elements satisfying various
    * inequalities with the specified value based on the elements'
    * natural ordering.
    *
@@ -139,7 +139,7 @@ public void processElement(ProcessContext c) {
    * } </pre>
    *
    * <p> See also {@link #lessThan}, {@link #greaterThanEq},
-   * {@link #greaterThan} which return elements satisfying various
+   * and {@link #greaterThan}, which return elements satisfying various
    * inequalities with the specified value based on the elements'
    * natural ordering.
    *
@@ -172,7 +172,7 @@ public void processElement(ProcessContext c) {
    * } </pre>
    *
    * <p> See also {@link #greaterThan}, {@link #lessThan},
-   * {@link #lessThanEq} which return elements satisfying various
+   * and {@link #lessThanEq}, which return elements satisfying various
    * inequalities with the specified value based on the elements'
    * natural ordering.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 24b22dd064f73..aee68cb8adcb1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -394,7 +394,7 @@
  * together into a single ParDo operation and run in a single pass;
  * this is "producer-consumer fusion".  Similarly, if
  * two or more ParDo operations have the same PCollection main input,
- * they will be fused into a single ParDo which makes just one pass
+ * they will be fused into a single ParDo that makes just one pass
  * over the input PCollection; this is "sibling fusion".
  *
  * <p> If after fusion there are no more unfused references to a
@@ -596,8 +596,8 @@ public <O> UnboundMulti<O> withOutputTags(TupleTag<O> mainOutputTag,
 
     /**
      * Returns a new {@code ParDo} {@code PTransform} that's like this
-     * transform but which will invoke the given {@link DoFn}
-     * function, and which has its input and output types bound.  Does
+     * transform but that will invoke the given {@link DoFn}
+     * function, and that has its input and output types bound.  Does
      * not modify this transform.  The resulting {@code PTransform} is
      * sufficiently specified to be applied, but more properties can
      * still be specified.
@@ -790,8 +790,8 @@ public UnboundMulti<O> withSideInputs(
 
     /**
      * Returns a new multi-output {@code ParDo} {@code PTransform}
-     * that's like this transform but which will invoke the given
-     * {@link DoFn} function, and which has its input type bound.
+     * that's like this transform but that will invoke the given
+     * {@link DoFn} function, and that has its input type bound.
      * Does not modify this transform.  The resulting
      * {@code PTransform} is sufficiently specified to be applied, but
      * more properties can still be specified.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index 8afe04a073bff..8207c17d9c51d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -76,7 +76,7 @@ public class Top {
    * {@code Comparable} elements using their natural ordering.
    *
    * <p> See also {@link #perKey}, {@link #smallestPerKey}, and
-   * {@link #largestPerKey} which take a {@code PCollection} of
+   * {@link #largestPerKey}, which take a {@code PCollection} of
    * {@code KV}s and return the top values associated with each key.
    */
   public static <T, C extends Comparator<T> & Serializable>
@@ -116,7 +116,7 @@ Combine.Globally<T, List<T>> of(int count, C compareFn) {
    * {@code Comparator} function.
    *
    * <p> See also {@link #perKey}, {@link #smallestPerKey}, and
-   * {@link #largestPerKey} which take a {@code PCollection} of
+   * {@link #largestPerKey}, which take a {@code PCollection} of
    * {@code KV}s and return the top values associated with each key.
    */
   public static <T extends Comparable<T>>
@@ -156,7 +156,7 @@ Combine.Globally<T, List<T>> smallest(int count) {
    * {@code Comparator} function.
    *
    * <p> See also {@link #perKey}, {@link #smallestPerKey}, and
-   * {@link #largestPerKey} which take a {@code PCollection} of
+   * {@link #largestPerKey}, which take a {@code PCollection} of
    * {@code KV}s and return the top values associated with each key.
    */
   public static <T extends Comparable<T>>
@@ -199,12 +199,12 @@ Combine.Globally<T, List<T>> largest(int count) {
    * output {@code PCollection} is a {@code ListCoder} of the
    * {@code Coder} of the values of the input {@code PCollection}.
    *
-   * <p> See also {@link #smallestPerKey} and {@link #largestPerKey},
-   * which sort {@code Comparable<V>} values using their natural
+   * <p> See also {@link #smallestPerKey} and {@link #largestPerKey}, which
+   * sort {@code Comparable<V>} values using their natural
    * ordering.
    *
-   * <p> See also {@link #of}, {@link #smallest}, and {@link #largest}
-   * which take a {@code PCollection} and return the top elements.
+   * <p> See also {@link #of}, {@link #smallest}, and {@link #largest}, which
+   * take a {@code PCollection} and return the top elements.
    */
   public static <K, V, C extends Comparator<V> & Serializable>
       PTransform<PCollection<KV<K, V>>, PCollection<KV<K, List<V>>>>
@@ -251,8 +251,8 @@ Combine.Globally<T, List<T>> largest(int count) {
    * <p> See also {@link #perKey}, which sorts values using a user-specified
    * {@code Comparator} function.
    *
-   * <p> See also {@link #of}, {@link #smallest}, and {@link #largest}
-   * which take a {@code PCollection} and return the top elements.
+   * <p> See also {@link #of}, {@link #smallest}, and {@link #largest}, which
+   * take a {@code PCollection} and return the top elements.
    */
   public static <K, V extends Comparable<V>>
       PTransform<PCollection<KV<K, V>>, PCollection<KV<K, List<V>>>>
@@ -299,8 +299,8 @@ Combine.Globally<T, List<T>> largest(int count) {
    * <p> See also {@link #perKey}, which sorts values using a user-specified
    * {@code Comparator} function.
    *
-   * <p> See also {@link #of}, {@link #smallest}, and {@link #largest}
-   * which take a {@code PCollection} and return the top elements.
+   * <p> See also {@link #of}, {@link #smallest}, and {@link #largest}, which
+   * take a {@code PCollection} and return the top elements.
    */
   public static <K, V extends Comparable<V>>
       PerKey<K, V, List<V>>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
index f4a96e49e7fcc..a1853d1f97887 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
@@ -129,8 +129,8 @@ public boolean equals(Object o) {
 
   @Override
   public int hashCode() {
-    // The end values are themselves likely to be arithmetic sequence,
-    // which is a poor distribution to use for a hashtable, so we
+    // The end values are themselves likely to be arithmetic sequence, which
+    // is a poor distribution to use for a hashtable, so we
     // add a highly non-linear transformation.
     return (int)
         (start.getMillis() + modInverse((int) (end.getMillis() << 1) + 1));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 7521db8582baf..a0a4d88a7b3fe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -151,7 +151,7 @@ public Unbound named(String name) {
 
     /**
      * Returns a new {@code Window} {@code PTransform} that's like this
-     * transform but which will use the given {@link WindowFn}, and which has
+     * transform but that will use the given {@link WindowFn}, and that has
      * its input and output types bound.  Does not modify this transform.  The
      * resulting {@code PTransform} is sufficiently specified to be applied,
      * but more properties can still be specified.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
index 903842849b3ba..a11979d129e24 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
@@ -112,9 +112,9 @@ protected AbstractWindowSet(
    * Returns whether this window set contains the given window.
    *
    * <p> {@code AbstractWindowSet} subclasses may throw
-   * {@link UnsupportedOperationException} if they do not support querying for
-   * which windows are active.  If this is the case, callers must ensure they
-   * do not call {@link #finalValue} on non-existent windows.
+   * {@link UnsupportedOperationException} if they do not support querying
+   * for which windows are active.  If this is the case, callers must ensure
+   * they do not call {@link #finalValue} on non-existent windows.
    */
   protected abstract boolean contains(W window);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudObject.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudObject.java
index 00172ff746627..d63acb8152f5c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudObject.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudObject.java
@@ -30,7 +30,7 @@
  * workers.
  * <p>
  * Typically, an object to be written by the SDK to the Dataflow service will
- * implement a method (typically called {@code asCloudObject()}) which returns a
+ * implement a method (typically called {@code asCloudObject()}) that returns a
  * {@code CloudObject} to represent the object in the protocol.  Once the
  * {@code CloudObject} is constructed, the method should explicitly add
  * additional properties to be presented during deserialization, representing
@@ -38,8 +38,8 @@
  */
 public final class CloudObject extends GenericJson {
   /**
-   * Constructs a {@code CloudObject} by copying the supplied serialized object spec,
-   * which must represent an SDK object serialized for transport via the
+   * Constructs a {@code CloudObject} by copying the supplied serialized object
+   * spec, which must represent an SDK object serialized for transport via the
    * Dataflow API.
    * <p>
    * The most common use of this method is during deserialization on the worker,
@@ -152,8 +152,8 @@ public static CloudObject forFloat(Double value) {
    * Constructs a {@code CloudObject} representing the given value of a
    * well-known cloud object type.
    * @param value the scalar value to represent.
-   * @throw RuntimeException if the value does not have a {@link CloudKnownType}
-   * mapping
+   * @throws RuntimeException if the value does not have a
+   * {@link CloudKnownType} mapping
    */
   public static CloudObject forKnownType(Object value) {
     @Nullable CloudKnownType ty = CloudKnownType.forClass(value.getClass());
@@ -171,7 +171,7 @@ public static CloudObject forKnownType(Object value) {
   private CloudObject() {}
 
   /**
-   * Gets the name of the Java class which this CloudObject represents.
+   * Gets the name of the Java class that this CloudObject represents.
    */
   public String getClassName() {
     return className;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
index 25541aaede6bc..53a52ad2ef857 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
@@ -109,7 +109,7 @@ public static CloudObject makeCloudEncoding(
   }
 
   /**
-   * A {@link com.fasterxml.jackson.databind.module.Module} which adds the type
+   * A {@link com.fasterxml.jackson.databind.module.Module} that adds the type
    * resolver needed for Coder definitions created by the Dataflow service.
    */
   @SuppressWarnings("serial")
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 0a4c0769b62a4..17ef5f3f96eef 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -55,7 +55,7 @@
  *
  * @param <I> the type of the DoFn's (main) input elements
  * @param <O> the type of the DoFn's (main) output elements
- * @param <R> the type of object which receives outputs
+ * @param <R> the type of object that receives outputs
  */
 public class DoFnRunner<I, O, R> {
 
@@ -198,7 +198,7 @@ public R getReceiver(TupleTag<?> tag) {
    *
    * @param <I> the type of the DoFn's (main) input elements
    * @param <O> the type of the DoFn's (main) output elements
-   * @param <R> the type of object which receives outputs
+   * @param <R> the type of object that receives outputs
    */
   private static class DoFnContext<I, O, R> extends DoFn<I, O>.Context {
     private static final int MAX_SIDE_OUTPUTS = 1000;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java
index 49ba1ed20fa12..dc1623ae1dde9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java
@@ -28,7 +28,7 @@
 import java.util.Arrays;
 
 /**
- * A credential object which uses the GCloud command line tool to get
+ * A credential object that uses the GCloud command line tool to get
  * an access token.
  */
 public class GCloudCredential extends Credential {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
index 2eac3a6091349..b8d2642c89860 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
@@ -197,10 +197,10 @@ public long fileSize(GcsPath path) throws IOException {
   /**
    * Opens an object in GCS.
    *
-   * <p> Returns a SeekableByteChannel which provides access to data in the bucket.
+   * <p> Returns a SeekableByteChannel that provides access to data in the bucket.
    *
    * @param path the GCS filename to read from
-   * @return a SeekableByteChannel which can read the object data
+   * @return a SeekableByteChannel that can read the object data
    * @throws IOException
    */
   public SeekableByteChannel open(GcsPath path)
@@ -212,12 +212,12 @@ public SeekableByteChannel open(GcsPath path)
   /**
    * Creates an object in GCS.
    *
-   * <p> Returns a WritableByteChannel which can be used to write data to the
+   * <p> Returns a WritableByteChannel that can be used to write data to the
    * object.
    *
    * @param path the GCS file to write to
    * @param type the type of object, eg "text/plain".
-   * @return a Callable object which encloses the operation.
+   * @return a Callable object that encloses the operation.
    * @throws IOException
    */
   public WritableByteChannel create(GcsPath path,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
index 9e37907d233d9..a3a133ec15060 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
@@ -23,10 +23,10 @@
 
 /**
  * Defines a factory for working with read and write channels.
- *
+ * <p>
  * Channels provide an abstract API for IO operations.
- *
- * See <a href="http://docs.oracle.com/javase/7/docs/api/java/nio/channels/package-summary.html
+ * <p>
+ * See <a href="http://docs.oracle.com/javase/7/docs/api/java/nio/channels/package-summary.html"
  * >Java NIO Channels</a>
  */
 public interface IOChannelFactory {
@@ -39,7 +39,7 @@ public interface IOChannelFactory {
    * all support globs in the final component of a path (eg /foo/bar/*.txt),
    * however they are not required to support globs in the directory paths.
    *
-   * <p>The result is the (possibly empty) set of specifications which match.
+   * <p>The result is the (possibly empty) set of specifications that match.
    */
   Collection<String> match(String spec) throws IOException;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
index 5fd06353ad36b..96cc9c2501f0c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
@@ -44,16 +44,16 @@ public class IOChannelUtils {
   private static final Map<String, IOChannelFactory> FACTORY_MAP =
       Collections.synchronizedMap(new HashMap<String, IOChannelFactory>());
 
-  // Pattern which matches shard placeholders within a shard template.
+  // Pattern that matches shard placeholders within a shard template.
   private static final Pattern SHARD_FORMAT_RE = Pattern.compile("(S+|N+)");
 
   /**
    * Associates a scheme with an {@link IOChannelFactory}.
    *
-   * The given factory is used to construct read and write channels when
+   * <p> The given factory is used to construct read and write channels when
    * a URI is provided with the given scheme.
    *
-   * For example, when reading from "gs://bucket/path", the scheme "gs" is
+   * <p> For example, when reading from "gs://bucket/path", the scheme "gs" is
    * used to lookup the appropriate factory.
    */
   public static void setIOFactory(String scheme, IOChannelFactory factory) {
@@ -62,7 +62,7 @@ public static void setIOFactory(String scheme, IOChannelFactory factory) {
 
   /**
    * Registers standard factories globally. This requires {@link PipelineOptions}
-   * to provide e.g. credentials for GCS.
+   * to provide, e.g., credentials for GCS.
    */
   public static void registerStandardIOFactories(PipelineOptions options) {
     setIOFactory("gs", new GcsIOChannelFactory(options.as(GcsOptions.class)));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
index 9c27aa07654ad..41ca6e6bec993 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
@@ -58,7 +58,7 @@ public final class MonitoringUtil {
   private Messages messagesClient;
 
   /**
-   * An interface which can be used for defining callbacks to receive a list
+   * An interface that can be used for defining callbacks to receive a list
    * of JobMessages containing monitoring information.
    */
   public interface JobMessagesHandler {
@@ -66,7 +66,7 @@ public interface JobMessagesHandler {
     void process(List<JobMessage> messages);
   }
 
-  /** A handler which prints monitoring messages to a stream. */
+  /** A handler that prints monitoring messages to a stream. */
   public static class PrintHandler implements JobMessagesHandler {
     private PrintStream out;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
index 54d1f8fe62b5e..c15b42e82db4b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
@@ -227,7 +227,7 @@ static String getUniqueContentName(File classpathElement, String contentHash) {
   }
 
   /**
-   * Computes a message digest of the file/directory contents, returning a base64 string which is
+   * Computes a message digest of the file/directory contents, returning a base64 string that is
    * suitable for use in URLs.
    */
   private static String computeContentHash(File classpathElement) throws IOException {
@@ -351,7 +351,7 @@ private static void zipDirectoryInternal(
   private static String relativize(File file, File root) {
     if (AppEngineEnvironment.IS_APP_ENGINE) {
       // AppEngine doesn't allow for java.nio.file.Path to be used so we rely on
-      // using URIs, but URIs are broken for UNC paths which AppEngine doesn't
+      // using URIs, but URIs are broken for UNC paths, which AppEngine doesn't
       // use. See for more details: http://wiki.eclipse.org/Eclipse/UNC_Paths
       return root.toURI().relativize(file.toURI()).getPath();
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
index cf2e87452308c..f6e0337c2d685 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
@@ -40,9 +40,9 @@
 import javax.annotation.Nullable;
 
 /**
- * Implements a request initializer which adds retry handlers to all
+ * Implements a request initializer that adds retry handlers to all
  * HttpRequests.
- *
+ * <p>
  * This allows chaining through to another HttpRequestInitializer, since
  * clients have exactly one HttpRequestInitializer, and Credential is also
  * a required HttpRequestInitializer.
@@ -139,7 +139,7 @@ public RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained) {
   /**
    * @param chained a downstream HttpRequestInitializer, which will also be
    *                applied to HttpRequest initialization.  May be null.
-   * @param additionalIgnoredResponseCodes a list of HTTP status codes which should not be logged.
+   * @param additionalIgnoredResponseCodes a list of HTTP status codes that should not be logged.
    */
   public RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained,
       Collection<Integer> additionalIgnoredResponseCodes) {
@@ -153,7 +153,7 @@ public RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained,
    *                applied to HttpRequest initialization.  May be null.
    * @param nanoClock used as a timing source for knowing how much time has elapsed.
    * @param sleeper used to sleep between retries.
-   * @param additionalIgnoredResponseCodes a list of HTTP status codes which should not be logged.
+   * @param additionalIgnoredResponseCodes a list of HTTP status codes that should not be logged.
    */
   RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained,
       NanoClock nanoClock, Sleeper sleeper, Collection<Integer> additionalIgnoredResponseCodes) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java
index fdc75bce29518..4cc915a374eef 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java
@@ -38,7 +38,7 @@ private static class SingletonHelper {
     static final ObjectMapper TREE_MAPPER = createTreeMapper();
 
     /**
-     * Creates the object mapper which will be used for serializing Google API
+     * Creates the object mapper that will be used for serializing Google API
      * client maps into Jackson trees.
      */
     private static ObjectMapper createTreeMapper() {
@@ -46,12 +46,12 @@ private static ObjectMapper createTreeMapper() {
     }
 
     /**
-     * Creates the object mapper which will be used for deserializing Jackson
+     * Creates the object mapper that will be used for deserializing Jackson
      * trees into objects.
      */
     private static ObjectMapper createObjectMapper() {
       ObjectMapper m = new ObjectMapper();
-      // Ignore properties which are not used by the object.
+      // Ignore properties that are not used by the object.
       m.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);
 
       // For parameters of type Object, use the @type property to determine the
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ShardingWritableByteChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ShardingWritableByteChannel.java
index a70c7203390b2..977e0c022748f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ShardingWritableByteChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ShardingWritableByteChannel.java
@@ -22,7 +22,7 @@
 import java.util.ArrayList;
 
 /**
- * Implements a WritableByteChannel which may contain multiple output shards.
+ * Implements a WritableByteChannel that may contain multiple output shards.
  *
  * <p> This provides {@link #writeToShard}, which takes a shard number for
  * writing to a particular shard.
@@ -33,7 +33,7 @@
 public class ShardingWritableByteChannel implements WritableByteChannel {
 
   /**
-   * Special shard number which causes a write to all shards.
+   * Special shard number that causes a write to all shards.
    */
   public static final int ALL_SHARDS = -2;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
index 57876ae7fa4ca..3fd79b60b2681 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
@@ -128,7 +128,7 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
   }
 
   /**
-   * Returns a Dataflow client which does not automatically retry failed
+   * Returns a Dataflow client that does not automatically retry failed
    * requests.
    */
   public static Dataflow.Builder
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
index 796d5482ed2b5..e939bd484c076 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
@@ -95,7 +95,7 @@ public static enum AggregationKind {
    * {@link AggregationKind#MAX}, {@link AggregationKind#MEAN}, and
    * {@link AggregationKind#SET}. This is a convenience wrapper over a
    * {@link Counter} implementation that aggregates {@link Long} values. This is
-   * useful when the application handles (boxed) {@link Integer} values which
+   * useful when the application handles (boxed) {@link Integer} values that
    * are not readily convertible to the (boxed) {@link Long} values otherwise
    * expected by the {@link Counter} implementation aggregating {@link Long}
    * values.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ForwardingReiterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ForwardingReiterator.java
index a072ac02e1474..c63114e1bb19d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ForwardingReiterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ForwardingReiterator.java
@@ -19,7 +19,7 @@
 import static com.google.common.base.Preconditions.checkNotNull;
 
 /**
- * A {@link Reiterator} which forwards to another {@code Reiterator}, useful for
+ * A {@link Reiterator} that forwards to another {@code Reiterator}, useful for
  * implementing {@code Reiterator} wrappers.
  *
  * @param <T> the type of elements returned by this iterator
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java
index 2fab0171c3f47..62dee3cb61bf5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java
@@ -35,7 +35,7 @@ public class ReflectHelpers {
 
   private static final Joiner COMMA_SEPARATOR = Joiner.on(", ");
 
-  /** A {@link Function} which turns a method into a simple method signature. */
+  /** A {@link Function} that turns a method into a simple method signature. */
   public static final Function<Method, String> METHOD_FORMATTER = new Function<Method, String>() {
     @Override
     public String apply(Method input) {
@@ -48,7 +48,7 @@ public String apply(Method input) {
     }
   };
 
-  /** A {@link Function} which turns a method into the declaring class + method signature. */
+  /** A {@link Function} that turns a method into the declaring class + method signature. */
   public static final Function<Method, String> CLASS_AND_METHOD_FORMATTER =
       new Function<Method, String>() {
     @Override
@@ -77,7 +77,7 @@ public String apply(Class<?> input) {
     }
   };
 
-  /** A {@link Function} which formats types. */
+  /** A {@link Function} that formats types. */
   public static final Function<Type, String> TYPE_SIMPLE_DESCRIPTION =
       new Function<Type, String>() {
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReader.java
index 98f2b26448490..c7cffc58b7239 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReader.java
@@ -64,7 +64,7 @@ public final class CachingShuffleBatchReader implements ShuffleBatchReader {
   // @VisibleForTesting
   final HashMap<BatchRange, RangeReadReference> cache = new HashMap<>();
 
-  // The queue of references which have been collected by the garbage collector.
+  // The queue of references that have been collected by the garbage collector.
   // This queue should only be used with references of class RangeReadReference.
   private final ReferenceQueue<AsyncReadResult> refQueue = new ReferenceQueue<>();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java
index f37068825402f..9d37df2c4cf40 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java
@@ -148,13 +148,13 @@ public ValuesIterator(byte[] valueKeyBytes) {
       this.valueKeyBytes = checkNotNull(valueKeyBytes);
       this.valueShuffleIterator = shuffleIterator.copy();
       // N.B. The ProgressTrackerGroup captures the reference to the original
-      // ValuesIterator for a given values iteration.  Which happens to be
+      // ValuesIterator for a given values iteration, which happens to be
       // exactly what we want, since this is also the ValuesIterator whose
       // base Observable has the references to all of the Observers watching
       // the iteration.  Copied ValuesIterator instances do *not* have these
       // Observers, but that's fine, since the derived ProgressTracker
-      // instances reference the ProgressTrackerGroup which references the
-      // original ValuesIterator which does have them.
+      // instances reference the ProgressTrackerGroup, which references the
+      // original ValuesIterator, which does have them.
       this.tracker = new ProgressTrackerGroup<ShuffleEntry>() {
         @Override
         protected void report(ShuffleEntry entry) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
index 5cc41f7e0633c..7d7a0c27bfaf4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
@@ -109,15 +109,16 @@ public String toString() {
    * The number of bytes of overhead to store an entry in the
    * grouping table (a {@code HashMap<StructuralByteArray, KeyAndValues>}),
    * ignoring the actual number of bytes in the keys and values:
-   *
-   * - an array element (1 word),
-   * - a HashMap.Entry (4 words),
-   * - a StructuralByteArray (1 words),
-   * - a backing array (guessed at 1 word for the length),
-   * - a KeyAndValues (2 words),
-   * - an ArrayList (2 words),
-   * - a backing array (1 word),
-   * - per-object overhead (JVM-specific, guessed at 2 words * 6 objects).
+   * <ul>
+   * <li> an array element (1 word),
+   * <li> a HashMap.Entry (4 words),
+   * <li> a StructuralByteArray (1 words),
+   * <li> a backing array (guessed at 1 word for the length),
+   * <li> a KeyAndValues (2 words),
+   * <li> an ArrayList (2 words),
+   * <li> a backing array (1 word),
+   * <li> per-object overhead (JVM-specific, guessed at 2 words * 6 objects).
+   * </ul>
    */
   static final int PER_KEY_OVERHEAD = 24 * BYTES_PER_JVM_WORD;
 
@@ -534,8 +535,8 @@ private long recordSample(long value) {
         // Uses the geometric distribution to return the likely distance between
         // successive independent trials of a fixed probability p. This gives the
         // same uniform distribution of branching on Math.random() < p, but with
-        // one random number generation per success rather than one per test,
-        // which can be a significant savings if p is small.
+        // one random number generation per success rather than one
+        // per test, which can be a significant savings if p is small.
         nextSample = rate == 1.0
             ? 0
             : (long) Math.floor(Math.log(random.nextDouble()) / Math.log(1 - rate));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackerGroup.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackerGroup.java
index a2131c63db8be..77e05c603043e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackerGroup.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackerGroup.java
@@ -18,16 +18,16 @@
 
 /**
  * Implements a group of linked
- * {@link ProgressTracker ProgressTrackers} which
+ * {@link ProgressTracker ProgressTrackers} that
  * collectively track how far a processing loop has gotten through the elements
  * it's processing.  Individual {@code ProgressTracker} instances may be copied,
  * capturing an independent view of the progress of the system; this turns out
  * to be useful for some non-trivial processing loops.  The furthest point
  * reached by any {@code ProgressTracker} is the one reported.
  *
- * <p>This class is abstract.  Its single extension point is {@link #report},
- * which should be overriden to provide a function which handles the reporting
- * of the supplied element, as appropriate.
+ * <p>This class is abstract.  Its single extension point is
+ * {@link #report}, which should be overriden to provide a function that
+ * handles the reporting of the supplied element, as appropriate.
  *
  * @param <T> the type of elements being tracked
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackingReiterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackingReiterator.java
index 547073fde9f80..47e8a949f3a19 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackingReiterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackingReiterator.java
@@ -22,7 +22,7 @@
 import com.google.cloud.dataflow.sdk.util.common.Reiterator;
 
 /**
- * Implements a {@link Reiterator} which uses a
+ * Implements a {@link Reiterator} that uses a
  * {@link ProgressTrackerGroup.Tracker ProgressTracker} to track how far
  * it's gotten through some base {@code Reiterator}.
  * {@link ProgressTrackingReiterator#copy} copies the {@code ProgressTracker},
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
index acb55366538e6..00b9d71b8abe1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
@@ -102,7 +102,7 @@ public interface ReaderIterator<T> extends AutoCloseable {
      * This method is not required to be thread-safe, and it will not be
      * called concurrently to any other methods.
      * <p>
-     * This call should not affect the range of input represented by the {@link Reader} which
+     * This call should not affect the range of input represented by the {@link Reader} that
      * produced this {@link ReaderIterator}.
      *
      * @return {@code null} if the {@link Reader.DynamicSplitRequest} cannot be honored
@@ -169,7 +169,7 @@ public interface DynamicSplitRequest {}
   public interface DynamicSplitResult {}
 
   /**
-   * A {@link Reader.DynamicSplitResult} which specifies the boundary between the primary and
+   * A {@link Reader.DynamicSplitResult} that specifies the boundary between the primary and
    * residual parts of the input using a {@link Position}.
    */
   public static final class DynamicSplitResultWithPosition implements DynamicSplitResult {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryReader.java
index b27cf42afb896..3fc75791d1b4b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryReader.java
@@ -28,7 +28,7 @@
 @NotThreadSafe
 public interface ShuffleEntryReader {
   /**
-   * Returns an iterator which reads a range of entries from a shuffle dataset.
+   * Returns an iterator that reads a range of entries from a shuffle dataset.
    *
    * @param startPosition encodes the initial key from where to read.
    * This parameter may be null, indicating that the read should start
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/SourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/SourceFormat.java
index f41611285ae60..70e633ad61847 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/SourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/SourceFormat.java
@@ -17,7 +17,7 @@
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
 /**
- * An interface for sources which can perform operations on source specifications, such as
+ * An interface for sources that can perform operations on source specifications, such as
  * splitting the source and computing its metadata. See {@code SourceOperationRequest} for details.
  */
 public interface SourceFormat {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
index 4d32fb9f0238d..7548c1344ac9a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
@@ -52,7 +52,7 @@ public class StateSampler extends TimerTask implements AutoCloseable {
   /** The current state. */
   private final AtomicInteger currentState;
 
-  /** Special value of {@code currentState} which we do not sample. */
+  /** Special value of {@code currentState} that means we do not sample. */
   private static final int DO_NOT_SAMPLE = -1;
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
index 9f6c3ef986eaa..26b47bb40b569 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
@@ -37,7 +37,7 @@ public abstract class WorkExecutor implements AutoCloseable {
 
   /**
    * OperatingSystemMXBean for reporting CPU usage.
-   *
+   * <p>
    * Uses com.sun.management.OperatingSystemMXBean instead of
    * java.lang.management.OperatingSystemMXBean because the former supports
    * getProcessCpuLoad().
@@ -84,7 +84,7 @@ public Reader.Progress getWorkerProgress() throws Exception {
 
   /**
    * See {@link Reader.ReaderIterator#requestDynamicSplit}.
-   * Makes sense only for tasks which read input.
+   * Makes sense only for tasks that read input.
    */
   public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest splitRequest)
       throws Exception {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
index 1a78fb9d35cef..abcd3ab9d59b6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
@@ -48,7 +48,7 @@
  *   <li> A trailing slash always indicates a directory, which is compliant
  *        with POSIX.1-2008.
  *   <li> Slashes separate components of a path.  Empty components are allowed,
- *        which is represented as repeated slashes.  An empty component always
+ *        these are represented as repeated slashes.  An empty component always
  *        refers to a directory, and always ends in a slash.
  *   <li> {@link #getParent()}} always returns a path ending in a slash, as the
  *        parent of a GcsPath is always a directory.
@@ -97,7 +97,7 @@ public static GcsPath fromUri(URI uri) {
   }
 
   /**
-   * Pattern which is used to parse a GCS URL.
+   * Pattern that is used to parse a GCS URL.
    *
    * <p> This is used to separate the components.  Verification is handled
    * separately.
@@ -121,7 +121,7 @@ public static GcsPath fromUri(String uri) {
   }
 
   /**
-   * Pattern which is used to parse a GCS resource name.
+   * Pattern that is used to parse a GCS resource name.
    */
   private static final Pattern GCS_RESOURCE_NAME =
       Pattern.compile("storage.googleapis.com/(?<BUCKET>[^/]+)(/(?<OBJECT>.*))?");
@@ -148,7 +148,7 @@ public static GcsPath fromObject(StorageObject object) {
    *
    * <p> A GcsPath without a bucket name is treated as a relative path, which
    * is a path component with no linkage to the root element.  This is similar
-   * to a Unix path which does not begin with the root marker (a slash).
+   * to a Unix path that does not begin with the root marker (a slash).
    * GCS has different naming constraints and APIs for working with buckets and
    * objects, so these two concepts are kept separate to avoid accidental
    * attempts to treat objects as buckets, or vice versa, as much as possible.
@@ -250,7 +250,7 @@ public FileSystem getFileSystem() {
     return fs;
   }
 
-  // Absolute paths are those which have a bucket and the root path.
+  // Absolute paths are those that have a bucket and the root path.
   @Override
   public boolean isAbsolute() {
     return !bucket.isEmpty() || object.isEmpty();
@@ -270,7 +270,7 @@ public GcsPath getFileName() {
    * Returns the <em>parent path</em>, or {@code null} if this path does not
    * have a parent.
    *
-   * <p> Returns a path which ends in '/', as the parent path always refers to
+   * <p> Returns a path that ends in '/', as the parent path always refers to
    * a directory.
    */
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
index a67d18205873d..b90d162b02c76 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
@@ -84,7 +84,7 @@ public class GoogleCloudStorageReadChannel implements SeekableByteChannel {
   // progress; each time at least one byte is successfully read, the counter of attempted retries
   // is reset.
   // TODO: Wire this setting out to GHFS; it should correspond to adding the wiring for
-  // setting the equivalent value inside HttpRequest.java which determines the low-level retries
+  // setting the equivalent value inside HttpRequest.java that determines the low-level retries
   // during "execute()" calls. The default in HttpRequest.java is also 10.
   private int maxRetries = 10;
 
@@ -525,7 +525,7 @@ protected InputStream openStreamAndSetSize(long newPosition)
       throws IOException {
     validatePosition(newPosition);
     Storage.Objects.Get getObject = gcs.objects().get(bucketName, objectName);
-    // Set the range on the existing request headers which may have been initialized with things
+    // Set the range on the existing request headers that may have been initialized with things
     // like user-agent already.
     clientRequestHelper.getRequestHeaders(getObject)
         .setRange(String.format("bytes=%d-", newPosition));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
index 9930697b2c675..7a5029ca3ad0c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
@@ -80,7 +80,7 @@ public class GoogleCloudStorageWriteChannel
   // size = UPLOAD_CHUNK_SIZE_DEFAULT (64 MB)
 
   // A pipe that connects write channel used by caller to the input stream used by GCS uploader.
-  // The uploader reads from input stream which blocks till a caller writes some data to the
+  // The uploader reads from input stream, which blocks till a caller writes some data to the
   // write channel (pipeSinkChannel below). The pipe is formed by connecting pipeSink to pipeSource.
   private PipedOutputStream pipeSink;
   private PipedInputStream pipeSource;
@@ -217,8 +217,8 @@ static void setClientRequestHelper(ClientRequestHelper helper) {
 
   /**
    * Writes contents of the given buffer to this channel.
-   *
-   * Note: The data that one writes gets written to a pipe which may not block
+   * <p>
+   * Note: The data that one writes gets written to a pipe that must not block
    * if the pipe has sufficient buffer space. A success code returned from this method
    * does not mean that the specific data was successfully written to the underlying
    * storage. It simply means that there is no error at present. The data upload
@@ -253,7 +253,7 @@ public boolean isOpen() {
 
   /**
    * Closes this channel.
-   *
+   * <p>
    * Note:
    * The method returns only after all data has been successfully written to GCS
    * or if there is a non-retry-able error.
@@ -291,7 +291,7 @@ public static void setUploadBufferSize(int bufferSize) {
 
   /**
    * Enables or disables hard limit of 250GB on size of uploaded files.
-   *
+   * <p>
    * If enabled, we get very high write throughput but writing files larger than UPLOAD_MAX_SIZE
    * will not succeed. Set it to false to allow larger files at lower throughput.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListener.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListener.java
index f9dcc8ae44c31..f4807912b129f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListener.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListener.java
@@ -43,7 +43,7 @@ class LoggingMediaHttpUploaderProgressListener implements MediaHttpUploaderProgr
   private long prevUploadedBytes;
 
   /**
-   * Creates a upload progress listener which emits relevant statistics about the
+   * Creates a upload progress listener that emits relevant statistics about the
    * progress of the upload.
    * @param name The name of the resource being uploaded.
    * @param minLoggingInterval The minimum amount of time (millis) between logging upload progress.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/StorageResourceId.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/StorageResourceId.java
index 0131c1a9d4b76..8a0c2f1275b4a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/StorageResourceId.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/StorageResourceId.java
@@ -42,7 +42,7 @@ public class StorageResourceId {
   private final String readableString;
 
   /**
-   * Constructor for a StorageResourceId which refers to the GCS root (gs://). Private because
+   * Constructor for a StorageResourceId that refers to the GCS root (gs://). Private because
    * all external users should just use the singleton StorageResourceId.ROOT.
    */
   private StorageResourceId() {
@@ -53,7 +53,7 @@ private StorageResourceId() {
 
   /**
    * Constructor for a StorageResourceId representing a Bucket; {@code getObjectName()} will return
-   * null for a StorageResourceId which represents a Bucket.
+   * null for a StorageResourceId that represents a Bucket.
    *
    * @param bucketName The bucket name of the resource. Must be non-empty and non-null.
    */
@@ -146,12 +146,12 @@ public int hashCode() {
   }
 
   /**
-   * Helper for standardizing the way various human-readable messages in logs/exceptions which refer
+   * Helper for standardizing the way various human-readable messages in logs/exceptions refer
    * to a bucket/object pair.
    */
   public static String createReadableString(String bucketName, String objectName) {
     if (bucketName == null && objectName == null) {
-      // TODO: Unify this method with other methods which convert bucketName/objectName
+      // TODO: Unify this method with other methods that convert bucketName/objectName
       // to a URI; maybe use the single slash for compatibility.
       return "gs://";
     } else if (bucketName != null && objectName == null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
index 29fd698ae830f..58d2028f19bec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
@@ -30,7 +30,7 @@
 @SuppressWarnings("serial")
 public class CodedTupleTag<T> extends TupleTag<T> {
   /**
-   * Returns a {@code CodedTupleTag} with the given id which uses the
+   * Returns a {@code CodedTupleTag} with the given id that uses the
    * given {@code Coder} whenever a value associated with the tag
    * needs to be serialized.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
index ec3b920995289..a700a64b38024 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
@@ -24,7 +24,7 @@
 
 /**
  * {@code PBegin} is used as the "input" to a root
- * {@link com.google.cloud.dataflow.sdk.transforms.PTransform} which
+ * {@link com.google.cloud.dataflow.sdk.transforms.PTransform} that
  * is the first operation in a {@link Pipeline}, such as
  * {@link com.google.cloud.dataflow.sdk.io.TextIO.Read} or
  * {@link com.google.cloud.dataflow.sdk.transforms.Create}.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
index 240fd8b9431d2..6a23116e9cc44 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
@@ -40,7 +40,7 @@
  * rather than as predicates for the sake of error messages.
  * <p>
  * We serialize and deserialize the coder to make sure that any state information required by the
- * coder is preserved. This causes tests written such that coders which lose information during
+ * coder is preserved. This causes tests written such that coders that lose information during
  * serialization or change state during encoding/decoding will fail.
  */
 public class CoderProperties {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java
index 03343cc1ab51f..1a6601aa91e90 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java
@@ -42,7 +42,7 @@ public void testGoodCoderIsDeterministic() throws Exception {
     CoderProperties.coderDeterministic(StringUtf8Coder.of(), "TestData", "TestData");
   }
 
-  /** A coder which says it is not deterministic but actually is. */
+  /** A coder that says it is not deterministic but actually is. */
   private static class NonDeterministicCoder extends CustomCoder<String> {
     private static final long serialVersionUID = 0;
 
@@ -81,7 +81,7 @@ public void testPassingInNonEqualValuesWithDeterministicCoder() throws Exception
     }
   }
 
-  /** A coder which is non-deterministic because it adds a string to the value. */
+  /** A coder that is non-deterministic because it adds a string to the value. */
   private static class BadDeterminsticCoder extends CustomCoder<String> {
     private static final long serialVersionUID = 0;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
index c9a6993000970..94737da2cb4f5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
@@ -45,7 +45,7 @@
 @RunWith(JUnit4.class)
 public class ByteOffsetBasedSourceTest {
 
-  // A byte-offset based source which yields its own current offset
+  // A byte-offset based source that yields its own current offset
   // and rounds the start and end offset to the nearest multiple of a given number,
   // e.g. reading [13, 48) with granularity 10 gives records with values [20, 50).
   private static class CoarseByteRangeSource extends ByteOffsetBasedSource<Integer> {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
index 1c69cb1124894..6e368fbc514c7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
@@ -56,8 +56,8 @@ public class DatastoreIOTest {
   private Query query;
 
   /**
-   * Sets the default dataset ID as "shakespearedataset",
-   * which contains two kinds of records: "food" and "shakespeare".
+   * Sets the default dataset ID as "shakespearedataset", which
+   * contains two kinds of records: "food" and "shakespeare".
    * The "food" table contains 10 manually constructed entities,
    * The "shakespeare" table contains 172948 entities,
    * where each entity represents one line in one play in
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index 2961a7064ce52..f439c55ed85ab 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -165,7 +165,7 @@ public void testHavingSettersGettersFromSeparateInterfacesIsValid() {
     PipelineOptionsFactory.as(CombinedObject.class);
   }
 
-  /** A test interface which contains a non-bean style method. */
+  /** A test interface that contains a non-bean style method. */
   public static interface ExtraneousMethod extends PipelineOptions {
     public String extraneousMethod(int value, String otherValue);
   }
@@ -181,7 +181,7 @@ public void testHavingExtraneousMethodThrows() throws Exception {
     PipelineOptionsFactory.as(ExtraneousMethod.class);
   }
 
-  /** A test interface which has a conflicting return type with its parent. */
+  /** A test interface that has a conflicting return type with its parent. */
   public static interface ReturnTypeConflict extends CombinedObject {
     @Override
     String getObject();
@@ -629,7 +629,7 @@ public void testSpecificHelpAsArgument() {
     assertThat(output, containsString("--runner"));
     assertThat(output, containsString("Default: DirectPipelineRunner"));
     assertThat(output,
-        containsString("The pipeline runner which will be used to execute the pipeline."));
+        containsString("The pipeline runner that will be used to execute the pipeline."));
   }
 
   @Test
@@ -644,7 +644,7 @@ public void testSpecificHelpAsArgumentWithSimpleClassName() {
     assertThat(output, containsString("--runner"));
     assertThat(output, containsString("Default: DirectPipelineRunner"));
     assertThat(output,
-        containsString("The pipeline runner which will be used to execute the pipeline."));
+        containsString("The pipeline runner that will be used to execute the pipeline."));
   }
 
   @Test
@@ -659,7 +659,7 @@ public void testSpecificHelpAsArgumentWithClassNameSuffix() {
     assertThat(output, containsString("--runner"));
     assertThat(output, containsString("Default: DirectPipelineRunner"));
     assertThat(output,
-        containsString("The pipeline runner which will be used to execute the pipeline."));
+        containsString("The pipeline runner that will be used to execute the pipeline."));
   }
 
   /** Used for a name collision test with the other DataflowPipelineOptions. */
@@ -740,7 +740,7 @@ public void testProgrammaticPrintHelpForSpecificType() {
     assertThat(output, containsString("--runner"));
     assertThat(output, containsString("Default: DirectPipelineRunner"));
     assertThat(output,
-        containsString("The pipeline runner which will be used to execute the pipeline."));
+        containsString("The pipeline runner that will be used to execute the pipeline."));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java
index 694c5e71dac67..bdba334d142ea 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java
@@ -66,7 +66,7 @@ public void testWhenRequiredOptionIsNeverSet() {
     PipelineOptionsValidator.validate(Required.class, required);
   }
 
-  /** A test interface which overrides the parents method. */
+  /** A test interface that overrides the parent's method. */
   public static interface SubClassValidation extends Required {
     @Override
     public String getObject();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
index e7f9d0e483978..798591083e747 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
@@ -107,7 +107,7 @@ public void testGettingJLSDefaults() throws Exception {
     assertNull(proxy.getObject());
   }
 
-  /** A {@link DefaultValueFactory} which is used for testing. */
+  /** A {@link DefaultValueFactory} that is used for testing. */
   public static class TestOptionFactory implements DefaultValueFactory<String> {
     @Override
     public String create(PipelineOptions options) {
@@ -279,7 +279,7 @@ public void testInvokeWithUnknownMethod() throws Exception {
     handler.invoke(handler, UnknownMethod.class.getMethod("unknownMethod"), null);
   }
 
-  /** A test interface which extends another interface. */
+  /** A test interface that extends another interface. */
   public static interface SubClass extends Simple {
     String getExtended();
     void setExtended(String value);
@@ -314,7 +314,7 @@ public void testUpCastRetainsSubClassValues() throws Exception {
     assertEquals("subClassValue", extended2.getExtended());
   }
 
-  /** A test interface which is a sibling to {@link SubClass}. */
+  /** A test interface that is a sibling to {@link SubClass}. */
   public static interface Sibling extends Simple {
     String getSibling();
     void setSibling(String value);
@@ -330,7 +330,7 @@ public void testAsSiblingRetainsSuperInterfaceValues() throws Exception {
     assertEquals("parentValue", sibling.getString());
   }
 
-  /** A test interface which has the same methods as the parent. */
+  /** A test interface that has the same methods as the parent. */
   public static interface MethodConflict extends Simple {
     @Override
     String getString();
@@ -348,7 +348,7 @@ public void testMethodConflictProvidesSameValue() throws Exception {
     assertEquals("conflictValue", methodConflict.as(Simple.class).getString());
   }
 
-  /** A test interface which has the same methods as its parent and grandparent. */
+  /** A test interface that has the same methods as its parent and grandparent. */
   public static interface DeepMethodConflict extends MethodConflict {
     @Override
     String getString();
@@ -378,7 +378,7 @@ public void testDeepMethodConflictProvidesSameValue() throws Exception {
     assertEquals(5, deepMethodConflict.as(Simple.class).getPrimitive());
   }
 
-  /** A test interface which shares the same methods as {@link Sibling}. */
+  /** A test interface that shares the same methods as {@link Sibling}. */
   public static interface SimpleSibling extends PipelineOptions {
     String getString();
     void setString(String value);
@@ -393,7 +393,7 @@ public void testDisjointSiblingsShareValues() throws Exception {
     assertEquals("siblingValue", proxy.as(Simple.class).getString());
   }
 
-  /** A test interface which joins two sibling interfaces which have conflicting methods. */
+  /** A test interface that joins two sibling interfaces that have conflicting methods. */
   public static interface SiblingMethodConflict extends Simple, SimpleSibling {
   }
 
@@ -407,7 +407,7 @@ public void testSiblingMethodConflict() throws Exception {
     assertEquals("siblingValue", siblingMethodConflict.as(SimpleSibling.class).getString());
   }
 
-  /** A test interface which has only the getter and only a setter overriden. */
+  /** A test interface that has only the getter and only a setter overriden. */
   public static interface PartialMethodConflict extends Simple {
     @Override
     String getString();
@@ -420,12 +420,12 @@ public void testPartialMethodConflictProvidesSameValue() throws Exception {
     ProxyInvocationHandler handler = new ProxyInvocationHandler(Maps.<String, Object>newHashMap());
     PartialMethodConflict partialMethodConflict = handler.as(PartialMethodConflict.class);
 
-    // Tests overriding a getter property which is only partially bound
+    // Tests overriding a getter property that is only partially bound
     partialMethodConflict.setString("conflictValue");
     assertEquals("conflictValue", partialMethodConflict.getString());
     assertEquals("conflictValue", partialMethodConflict.as(Simple.class).getString());
 
-    // Tests overriding a setter property which is only partially bound
+    // Tests overriding a setter property that is only partially bound
     partialMethodConflict.setPrimitive(5);
     assertEquals(5, partialMethodConflict.getPrimitive());
     assertEquals(5, partialMethodConflict.as(Simple.class).getPrimitive());
@@ -596,7 +596,7 @@ public void testJsonConversionOfIgnoredProperty() throws Exception {
     assertNull(options2.getValue());
   }
 
-  /** Test class which is not serializable by Jackson. */
+  /** Test class that is not serializable by Jackson. */
   public static class NotSerializable {
     private String value;
     public NotSerializable(String value) {
@@ -608,7 +608,7 @@ public String getValue() {
     }
   }
 
-  /** Test interface containing a class which is not serializable by Jackson. */
+  /** Test interface containing a class that is not serializable by Jackson. */
   private static interface NotSerializableProperty extends PipelineOptions {
     NotSerializable getValue();
     void setValue(NotSerializable value);
@@ -623,7 +623,7 @@ public void testJsonConversionOfNotSerializableProperty() throws Exception {
   }
 
   /**
-   * Test interface which has {@link JsonIgnore @JsonIgnore} on a property that Jackson
+   * Test interface that has {@link JsonIgnore @JsonIgnore} on a property that Jackson
    * can't serialize.
    */
   private static interface IgnoredNotSerializableProperty extends PipelineOptions {
@@ -643,7 +643,7 @@ public void testJsonConversionOfIgnoredNotSerializableProperty() throws Exceptio
     assertNull(options2.getValue());
   }
 
-  /** Test class which is only serializable by Jackson with the added metadata. */
+  /** Test class that is only serializable by Jackson with the added metadata. */
   public static class SerializableWithMetadata {
     private String value;
     public SerializableWithMetadata(@JsonProperty("value") String value) {
@@ -657,7 +657,7 @@ public String getValue() {
   }
 
   /**
-   * Test interface containing a property which is only serializable by Jackson with
+   * Test interface containing a property that is serializable by Jackson only with
    * the additional metadata.
    */
   private static interface SerializableWithMetadataProperty extends PipelineOptions {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 3921ad0547b3e..992c7e6004166 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -83,7 +83,7 @@ public class DataflowPipelineTranslatorTest {
 
   @Rule public ExpectedException thrown = ExpectedException.none();
 
-  // A Custom Mockito matcher for an initial Job which checks that all
+  // A Custom Mockito matcher for an initial Job that checks that all
   // expected fields are set.
   private static class IsValidCreateRequest extends ArgumentMatcher<Job> {
     public boolean matches(Object o) {
@@ -354,7 +354,7 @@ private static class EmbeddedTranslator
   }
 
   /**
-   * A composite transform which returns an output that is unrelated to
+   * A composite transform that returns an output that is unrelated to
    * the input.
    */
   private static class UnrelatedOutputCreator
@@ -376,7 +376,7 @@ protected Coder<?> getDefaultOutputCoder() {
   }
 
   /**
-   * A composite transform which returns an output which is unbound.
+   * A composite transform that returns an output that is unbound.
    */
   private static class UnboundOutputCreator
       extends PTransform<PCollection<Integer>, PDone> {
@@ -396,7 +396,7 @@ protected Coder<?> getDefaultOutputCoder() {
   }
 
   /**
-   * A composite transform which returns a partially bound output.
+   * A composite transform that returns a partially bound output.
    *
    * <p> This is not allowed and will result in a failure.
    */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
index a054fa5717294..e4f5ef5d12ba4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
@@ -59,7 +59,7 @@ enum TransformsSeen {
   /**
    * INVALID TRANSFORM, DO NOT COPY.
    *
-   * <p> This is an invalid composite transform, which returns unbound outputs.
+   * <p> This is an invalid composite transform that returns unbound outputs.
    * This should never happen, and is here to test that it is properly rejected.
    */
   private static class InvalidCompositeTransform
@@ -80,7 +80,7 @@ public PCollectionList<String> apply(PBegin b) {
   }
 
   /**
-   * A composite transform which returns an output which is unbound.
+   * A composite transform that returns an output that is unbound.
    */
   private static class UnboundOutputCreator
       extends PTransform<PCollection<Integer>, PDone> {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
index 7145127abf301..480cd036ffee8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -171,7 +171,7 @@ public Collection<Metric<?>> getOutputMetrics() {
 
   // TODO: Remove sleeps from this test by using a mock sleeper.  This
   // requires a redesign of the WorkProgressUpdater to use a Sleeper and
-  // not use a ScheduledThreadExecutor which relies on real time passing.
+  // not use a ScheduledThreadExecutor that relies on real time passing.
   @Test(timeout = 1000)
   public void workProgressUpdaterUpdates() throws Exception {
     when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java
index 5275d813f8d5f..8b1a688d6804b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java
@@ -43,7 +43,7 @@
  */
 public class ExpectedLogs extends ExternalResource {
   /**
-   * Returns a {@link TestRule} which captures logs for the given class.
+   * Returns a {@link TestRule} that captures logs for the given class.
    *
    * @param klass The class to capture logs for.
    * @return A {@link ExpectedLogs} test rule.
@@ -220,7 +220,7 @@ private ExpectedLogs(Class<?> klass) {
   }
 
   /**
-   * A JUL logging {@link Handler} that records all logging events which are passed to it.
+   * A JUL logging {@link Handler} that records all logging events that are passed to it.
    */
   @ThreadSafe
   private static class LogSaver extends Handler {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 81d14e5c9806f..73b7feb12bc99 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -275,7 +275,7 @@ public void testAccumulatingCombineEmpty() {
     runTestAccumulatingCombine(EMPTY_TABLE, 0.0, new KV[] { });
   }
 
-  // Checks that Min, Max, Mean, Sum (operations which pass-through to Combine),
+  // Checks that Min, Max, Mean, Sum (operations that pass-through to Combine),
   // provide their own top-level name.
   @Test
   public void testCombinerNames() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 5f0366654bdda..ecaaed1e3a944 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -288,7 +288,7 @@ public void finishBundle(Context c) {
   }
 
   /**
-   * Output the keys which have appeared at least three times.
+   * Output the keys that have appeared at least three times.
    */
   static class TestKeyedStateCountAtLeastThreeDoFn
       extends DoFn<KV<String, Integer>, String> implements DoFn.RequiresKeyedState{
@@ -1027,7 +1027,7 @@ public void testMainOutputApplySideOutputNoCoder() {
     );
 
     // Before fix, tuple.get(mainOutputTag).apply(...) would indirectly trigger
-    // tuple.get(sideOutputTag).finishSpecifyingOutput() which would crash
+    // tuple.get(sideOutputTag).finishSpecifyingOutput(), which would crash
     // on a missing coder.
     PCollection<Integer> foo = tuple
         .get(mainOutputTag)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index 6e3c3610e5118..4599040aec7e7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -46,7 +46,7 @@
 import java.util.NoSuchElementException;
 
 /**
- * Tests for {@link View}. See also {@link ParDoTest} which
+ * Tests for {@link View}. See also {@link ParDoTest}, which
  * provides additional coverage since views can only be
  * observed via {@link ParDo}.
  */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
index f5ea4c5603509..2e1291c960adc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
@@ -191,8 +191,8 @@ private <K, V> void checkGetOnlyForKey(
 
   /**
    * Returns a PCollection<KV<Integer, CoGbkResult>> containing the
-   * results of the CoGbk over 3 PCollection<KV<Integer, String>>, each of
-   * which correlates a customer id to purchases, addresses, or names,
+   * results of the CoGbk over 3 PCollection<KV<Integer, String>>,
+   * each of which correlates a customer id to purchases, addresses, or names,
    * respectively.
    */
   private PCollection<KV<Integer, CoGbkResult>> buildPurchasesCoGbk(
@@ -249,8 +249,8 @@ private PCollection<KV<Integer, CoGbkResult>> buildPurchasesCoGbk(
 
   /**
    * Returns a PCollection<KV<Integer, CoGbkResult>> containing the
-   * results of the CoGbk over 2 PCollection<KV<Integer, String>>, each of
-   * which correlates a customer id to clicks, purchases, respectively.
+   * results of the CoGbk over 2 PCollection<KV<Integer, String>>,
+   * each of which correlates a customer id to clicks, purchases, respectively.
    */
   private PCollection<KV<Integer, CoGbkResult>> buildPurchasesCoGbkWithWindowing(
       Pipeline p,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
index a16c14c350e84..1ed94735a13f1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
@@ -43,7 +43,7 @@
 public class SerializableUtilsTest {
   @Rule public ExpectedException expectedException = ExpectedException.none();
 
-  /** A class which is serializable by Java. */
+  /** A class that is serializable by Java. */
   private static class SerializableByJava implements Serializable {
     private static final long serialVersionUID = 0;
 
@@ -77,7 +77,7 @@ public void testDeserializationError() {
         "a bogus string");
   }
 
-  /** A class which is not serializable by Java. */
+  /** A class that is not serializable by Java. */
   private static class UnserializableByJava implements Serializable {
     private static final long serialVersionUID = 0;
 
@@ -92,7 +92,7 @@ public void testSerializationError() {
     SerializableUtils.serializeToByteArray(new UnserializableByJava());
   }
 
-  /** A {@link Coder} which is not serializable by Java. */
+  /** A {@link Coder} that is not serializable by Java. */
   private static class UnserializableCoderByJava extends StandardCoder<Object> {
     private static final long serialVersionUID = 0;
 
@@ -127,7 +127,7 @@ public void testEnsureSerializableWithUnserializableCoderByJava() {
     SerializableUtils.ensureSerializable(new UnserializableCoderByJava());
   }
 
-  /** A {@link Coder} which is not serializable by Jackson. */
+  /** A {@link Coder} that is not serializable by Jackson. */
   private static class UnserializableCoderByJackson extends StandardCoder<Object> {
     private static final long serialVersionUID = 0;
 

From 6b8fcc0507d3a0e9c313e7dcc293ee031e605b50 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Wed, 1 Apr 2015 11:51:28 -0700
Subject: [PATCH 0349/1541] A file based custom source for reading XML files
 and creating PCollections of JAXB annotated Java objects.

----Release Notes----
Added XMLSource; a custom source for reading XML files.
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90085196
---
 sdk/pom.xml                                   |  14 +
 .../cloud/dataflow/sdk/coders/JAXBCoder.java  |  86 ++
 .../dataflow/sdk/io/FileBasedSource.java      |  81 +-
 .../cloud/dataflow/sdk/io/XMLSource.java      | 527 ++++++++++++
 .../dataflow/sdk/coders/JAXBCoderTest.java    |  93 +++
 .../dataflow/sdk/io/FileBasedSourceTest.java  |  30 +-
 .../cloud/dataflow/sdk/io/XMLSourceTest.java  | 789 ++++++++++++++++++
 7 files changed, 1568 insertions(+), 52 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XMLSource.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/JAXBCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XMLSourceTest.java

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 29fc779ef4528..f8b2e92fe63d5 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -413,6 +413,20 @@
       <version>9.2.6.v20141205</version>
     </dependency>
 
+    <dependency>
+      <groupId>org.codehaus.woodstox</groupId>
+      <artifactId>stax2-api</artifactId>
+      <version>3.1.1</version>
+      <optional>true</optional>
+    </dependency>
+
+    <dependency>
+      <groupId>org.codehaus.woodstox</groupId>
+      <artifactId>woodstox-core-asl</artifactId>
+      <version>4.1.2</version>
+      <optional>true</optional>
+    </dependency>
+
     <!-- build dependencies -->
     <dependency>
       <groupId>com.google.auto.service</groupId>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java
new file mode 100644
index 0000000000000..99f578ec35cb7
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import javax.xml.bind.JAXBContext;
+import javax.xml.bind.JAXBException;
+import javax.xml.bind.Marshaller;
+import javax.xml.bind.Unmarshaller;
+
+/**
+ * A coder for JAXB annotated objects. This coder uses JAXB marshalling/unmarshalling mechanisms
+ * to encode/decode the objects. Users must provide the {@code Class} of the JAXB annotated object.
+ *
+ * @param <T> type of JAXB annotated objects that will be serialized.
+ */
+public class JAXBCoder<T> extends CustomCoder<T> {
+  private static final long serialVersionUID = 0L;
+  private final Class<T> jaxbClass;
+  private transient Marshaller jaxbMarshaller = null;
+  private transient Unmarshaller jaxbUnmarshaller = null;
+
+  public Class<T> getJAXBClass() {
+    return jaxbClass;
+  }
+
+  private JAXBCoder(Class<T> jaxbClass) {
+    this.jaxbClass = jaxbClass;
+  }
+
+  /**
+   * Create a coder for a given type of JAXB annotated objects.
+   *
+   * @param jaxbClass the {@code Class} of the JAXB annotated objects.
+   */
+  public static <T> JAXBCoder<T> of(Class<T> jaxbClass) {
+    return new JAXBCoder<>(jaxbClass);
+  }
+
+  @Override
+  public void encode(T value, OutputStream outStream, Context context)
+      throws CoderException, IOException {
+    try {
+      if (jaxbMarshaller == null) {
+        JAXBContext jaxbContext = JAXBContext.newInstance(jaxbClass);
+        jaxbMarshaller = jaxbContext.createMarshaller();
+      }
+
+      jaxbMarshaller.marshal(value, outStream);
+    } catch (JAXBException e) {
+      throw new CoderException(e);
+    }
+  }
+
+  @Override
+  public T decode(InputStream inStream, Context context) throws CoderException, IOException {
+    try {
+      if (jaxbUnmarshaller == null) {
+        JAXBContext jaxbContext = JAXBContext.newInstance(jaxbClass);
+        jaxbUnmarshaller = jaxbContext.createUnmarshaller();
+      }
+
+      @SuppressWarnings("unchecked")
+      T obj = (T) jaxbUnmarshaller.unmarshal(inStream);
+      return obj;
+    } catch (JAXBException e) {
+      throw new CoderException(e);
+    }
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index b1db0d92fa0f1..6f094a20b64b3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -48,7 +48,8 @@
  *
  * <p>In addition to the methods left abstract from {@code Source}, subclasses must implement
  * methods to create a sub-source and a reader for a range of a single file -
- * {@link #createForSubrangeOfFile} and {@link #createSingleFileReader}.
+ * {@link #createForSubrangeOfFile} and {@link #createSingleFileReader}. Please refer to
+ * {@link XMLSource} for an example implementation of {@code FilebasedSource}.
  *
  * @param <T> Type of records represented by the source.
  */
@@ -63,37 +64,47 @@ public abstract class FileBasedSource<T> extends ByteOffsetBasedSource<T> {
    * A given {@code FileBasedSource} represents a file resource of one of these types.
    */
   public enum Mode {
-    FILEPATTERN, FULL_SINGLE_FILE, SUBRANGE_OF_SINGLE_FILE
+    FILEPATTERN,
+    SINGLE_FILE_OR_SUBRANGE
   }
 
   /**
-   * Create a {@code FileBasedSource} based on a file or a file pattern specification.
+   * Create a {@code FileBaseSource} based on a file or a file pattern specification. This
+   * constructor must be used when creating a new {@code FileBasedSource} for a file pattern.
    *
-   * <p>See {@link ByteOffsetBasedSource} for detailed descriptions of {@code minBundleSize},
-   * {@code startOffset}, and {@code endOffset}.
+   * <p> See {@link ByteOffsetBasedSource} for a detailed description of {@code minBundleSize}.
    *
-   * @param isFilePattern if {@code true} provided {@code fileOrPatternSpec} may be a file pattern
-   *        and {@code FileBasedSource} will try to expand the file pattern, if {@code false}
-   *        provided {@code fileOrPatternSpec} will be considered a single file and will be used
-   *        verbatim.
    * @param fileOrPatternSpec {@link IOChannelFactory} specification of file or file pattern
    *        represented by the {@link FileBasedSource}.
    * @param minBundleSize minimum bundle size in bytes.
+   */
+  public FileBasedSource(String fileOrPatternSpec, long minBundleSize) {
+    super(0, Long.MAX_VALUE, minBundleSize);
+    mode = Mode.FILEPATTERN;
+    this.fileOrPatternSpec = fileOrPatternSpec;
+  }
+
+  /**
+   * Create a {@code FileBasedSource} based on a single file. This constructor must be used when
+   * creating a new {@code FileBasedSource} for a subrange of a single file.
+   * Additionally, this constructor must be used to create new {@code FileBasedSource}s when
+   * subclasses implement the method {@link #createForSubrangeOfFile}.
+   *
+   * <p> See {@link ByteOffsetBasedSource} for detailed descriptions of {@code minBundleSize},
+   * {@code startOffset}, and {@code endOffset}.
+   *
+   * @param fileName {@link IOChannelFactory} specification of the file represented by the
+   *        {@link FileBasedSource}.
+   * @param minBundleSize minimum bundle size in bytes.
    * @param startOffset starting byte offset.
    * @param endOffset ending byte offset. If the specified value {@code >= #getMaxEndOffset()} it
    *        implies {@code #getMaxEndOffSet()}.
    */
-  public FileBasedSource(boolean isFilePattern, String fileOrPatternSpec, long minBundleSize,
+  public FileBasedSource(String fileName, long minBundleSize,
       long startOffset, long endOffset) {
     super(startOffset, endOffset, minBundleSize);
-    if (isFilePattern) {
-      mode = Mode.FILEPATTERN;
-    } else if (startOffset == 0 && endOffset == Long.MAX_VALUE) {
-      mode = Mode.FULL_SINGLE_FILE;
-    } else {
-      mode = Mode.SUBRANGE_OF_SINGLE_FILE;
-    }
-    this.fileOrPatternSpec = fileOrPatternSpec;
+    mode = Mode.SINGLE_FILE_OR_SUBRANGE;
+    this.fileOrPatternSpec = fileName;
   }
 
   public final String getFileOrPatternSpec() {
@@ -117,7 +128,7 @@ public final FileBasedSource<T> createSourceForSubrange(long start, long end) {
 
     FileBasedSource<T> source = createForSubrangeOfFile(fileOrPatternSpec, start, end);
     if (start > 0 || end != Long.MAX_VALUE) {
-      Preconditions.checkArgument(source.getMode() == Mode.SUBRANGE_OF_SINGLE_FILE,
+      Preconditions.checkArgument(source.getMode() == Mode.SINGLE_FILE_OR_SUBRANGE,
           "Source created for the range [" + start + "," + end + ")"
           + " must be a subrange source");
     }
@@ -128,8 +139,8 @@ public final FileBasedSource<T> createSourceForSubrange(long start, long end) {
    * Creates and returns a new {@code FileBasedSource} of the same type as the current
    * {@code FileBasedSource} backed by a given file and an offset range. When current source is
    * being split, this method is used to generate new sub-sources. When creating the source
-   * subclasses must call the constructor of {@code FileBasedSource} with exactly the same
-   * {@code start} and {@code end} values passed here.
+   * subclasses must call the constructor {@link #FileBasedSource(String, long, long, long)} of
+   * {@code FileBasedSource} with corresponding parameter values passed here.
    *
    * @param fileName file backing the new {@code FileBasedSource}.
    * @param start starting byte offset of the new {@code FileBasedSource}.
@@ -182,7 +193,7 @@ public final List<? extends FileBasedSource<T>> splitIntoBundles(long desiredBun
     if (mode == Mode.FILEPATTERN) {
       long startTime = System.currentTimeMillis();
       List<FileBasedSource<T>> splitResults = new ArrayList<>();
-      for (String file : expandFilePattern()) {
+      for (String file : FileBasedSource.expandFilePattern(fileOrPatternSpec)) {
         splitResults.addAll(createForSubrangeOfFile(file, 0, Long.MAX_VALUE).splitIntoBundles(
             desiredBundleSizeBytes, options));
       }
@@ -212,9 +223,12 @@ public final List<? extends FileBasedSource<T>> splitIntoBundles(long desiredBun
   @Override
   public final BoundedReader<T> createReader(PipelineOptions options,
                                              ExecutionContext executionContext) throws IOException {
+    // Validate the current source prior to creating a reader for it.
+    this.validate();
+
     if (mode == Mode.FILEPATTERN) {
       long startTime = System.currentTimeMillis();
-      Collection<String> files = expandFilePattern();
+      Collection<String> files = FileBasedSource.expandFilePattern(fileOrPatternSpec);
       List<FileBasedReader<T>> fileReaders = new ArrayList<>();
       for (String fileName : files) {
         long endOffset;
@@ -240,9 +254,7 @@ public String toString() {
     switch (mode) {
       case FILEPATTERN:
         return fileOrPatternSpec;
-      case FULL_SINGLE_FILE:
-        return fileOrPatternSpec;
-      case SUBRANGE_OF_SINGLE_FILE:
+      case SINGLE_FILE_OR_SUBRANGE:
         return fileOrPatternSpec + " range " + super.toString();
       default:
         throw new IllegalStateException("Unexpected mode: " + mode);
@@ -254,7 +266,6 @@ public void validate() {
     super.validate();
     switch (mode) {
       case FILEPATTERN:
-      case FULL_SINGLE_FILE:
         Preconditions.checkArgument(getStartOffset() == 0,
             "FileBasedSource is based on a file pattern or a full single file "
             + "but the starting offset proposed " + getStartOffset() + " is not zero");
@@ -262,7 +273,7 @@ public void validate() {
             "FileBasedSource is based on a file pattern or a full single file "
             + "but the ending offset proposed " + getEndOffset() + " is not Long.MAX_VALUE");
         break;
-      case SUBRANGE_OF_SINGLE_FILE:
+      case SINGLE_FILE_OR_SUBRANGE:
         // Nothing more to validate.
         break;
       default:
@@ -283,10 +294,7 @@ public final long getMaxEndOffset(PipelineOptions options) throws Exception {
     }
   }
 
-  private Collection<String> expandFilePattern() throws IOException {
-    if (mode != Mode.FILEPATTERN) {
-      throw new IllegalArgumentException("Not a file pattern");
-    }
+  private static Collection<String> expandFilePattern(String fileOrPatternSpec) throws IOException {
     IOChannelFactory factory = IOChannelUtils.getFactory(fileOrPatternSpec);
     return factory.match(fileOrPatternSpec);
   }
@@ -392,7 +400,7 @@ public final boolean start() throws IOException {
         seekChannel.position(source.getStartOffset());
       } else {
         // Channel is not seekable. Must not be a subrange.
-        Preconditions.checkArgument(source.mode != Mode.SUBRANGE_OF_SINGLE_FILE,
+        Preconditions.checkArgument(source.mode != Mode.SINGLE_FILE_OR_SUBRANGE,
             "Subrange-based sources must only be defined for file types that support seekable "
             + " read channels");
         Preconditions.checkArgument(source.getStartOffset() == 0, "Start offset "
@@ -452,10 +460,9 @@ public void close() throws IOException {
     /**
      * Specifies if the current record of the reader is at a split point.
      *
-     * <p>This returns {@code true} if {@link #readNextRecord} was invoked at least once and the
-     * last record returned by {@link #readNextRecord} is at a split point, {@code false} otherwise.
-     * Please refer to {@link FileBasedSource.FileBasedReader FileBasedReader} for the definition of
-     * split points.
+     * <p>This returns {@code true} if the last record returned by {@link #readNextRecord} is at a
+     * split point, {@code false} otherwise. Please refer to {@link FileBasedSource.FileBasedReader
+     * FileBasedReader} for the definition of split points.
      */
     protected abstract boolean isAtSplitPoint();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XMLSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XMLSource.java
new file mode 100644
index 0000000000000..63779a3601380
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XMLSource.java
@@ -0,0 +1,527 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import com.google.api.client.util.Preconditions;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.JAXBCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+
+import org.codehaus.stax2.XMLInputFactory2;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.SequenceInputStream;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.channels.Channels;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.NoSuchElementException;
+
+import javax.xml.bind.JAXBContext;
+import javax.xml.bind.JAXBElement;
+import javax.xml.bind.JAXBException;
+import javax.xml.bind.Unmarshaller;
+import javax.xml.bind.ValidationEvent;
+import javax.xml.bind.ValidationEventHandler;
+import javax.xml.stream.FactoryConfigurationError;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamConstants;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+
+/**
+ * A source that can be used to read XML files. This source reads one or more
+ * XML files and creates a {@code PCollection} of a given type. An Dataflow read transform can be
+ * created by passing an {@code XMLSource} object to {@code ReadSource.from()}. Please note the
+ * example given below.
+ *
+ * <p> The XML file must be of the following form where root and record are XML element names that
+ * are defined by the user. Root is the name of the root element of the XML document.
+ *
+ * <pre>
+ * {@code
+ * <root>
+ * <record>...</record>
+ * <record>...</record>
+ * <record>...</record>
+ * ...
+ * <record>...</record>
+ * </root>
+ * }
+ * </pre>
+ *
+ * <p> Basically the XML document should contain a set of record elements where a record may contain
+ * arbitrary XML content. Root and/or record elements may additionally contain an arbitrary number
+ * of XML attributes. Users must provide the name of the root element and record
+ * element when creating the source. Additionally users must provide a class of a JAXB annotated
+ * Java type that can be used convert records into Java objects and vice versa using JAXB
+ * marshalling/unmarshalling mechanisms. Reading the source will generate a {@code PCollection} of
+ * the given JAXB annotated Java type. Optionally users may provide a minimum size of a bundle that
+ * should be created for the source. An example Dataflow read transformation that uses XMLSource is
+ * given below.
+ *
+ * <pre>
+ * {@code
+ * XMLSource<String> source = XMLSource.<String>from(file.toPath().toString())
+ * .withRootElement("root").withRecordElement("record")
+ * .withRecordClass(Record.class).withMinBundleSize(128);
+ * PCollection<String> output = p.apply(ReadSource.from(source);
+ * }
+ * </pre>
+ *
+ * <p> Currently only XML files that use character encoding UTF-8 are supported. Using a file that
+ * has a different character encoding may result in loss of data.
+ *
+ * <p> To use {@code XMLSource}, explicitly declare dependencies on following two jars from WoodStax
+ * StAX XML parser.
+ * (1) stax2-api-3.1.1.jar
+ * (2) woodstox-core-asl-4.1.2.jar
+ * These dependencies have been declared as optional in Maven sdk/pom.xml file of Google Cloud
+ * Dataflow.
+ *
+ * @param <T> Type of the objects that represent the records of the XML file. The
+ *        {@code PCollection} generated by this source will be of this type.
+ */
+public class XMLSource<T> extends FileBasedSource<T> {
+  static final long serialVersionUID = 0L;
+
+  private static final String XML_VERSION = "1.1";
+  private static final int DEFAULT_MIN_BUNDLE_SIZE = 8 * 1024;
+  private final String rootElement;
+  private final String recordElement;
+  private final Class<T> recordClass;
+  private final Charset charset = StandardCharsets.UTF_8;
+
+  /**
+   * Creates an XMLSource for a single XML file or a set of XML files defined by a Java "glob" file
+   * pattern. Each XML file should be of the form defined in {@link XMLSource}.
+   */
+  public static <T> XMLSource<T> from(String fileOrPatternSpec) {
+    return new XMLSource<>(fileOrPatternSpec, DEFAULT_MIN_BUNDLE_SIZE, null, null, null);
+  }
+
+  /**
+   * Sets name of the root element of the XML document. This will be used to create a valid starting
+   * root element when initiating a bundle of records created from an XML document. This is a
+   * required parameter.
+   */
+  public XMLSource<T> withRootElement(String rootElement) {
+    return new XMLSource<>(
+        getFileOrPatternSpec(), getMinBundleSize(), rootElement, recordElement, recordClass);
+  }
+
+  /**
+   * Sets name of the record element of the XML document. This will be used to determine offset of
+   * the first record of a bundle created from the XML document. This is a required parameter.
+   */
+  public XMLSource<T> withRecordElement(String recordElement) {
+    return new XMLSource<>(
+        getFileOrPatternSpec(), getMinBundleSize(), rootElement, recordElement, recordClass);
+  }
+
+  /**
+   * Sets a JAXB annotated class that can be populated using a record of the provided XML file. This
+   * will be used when unmarshalling record objects from the XML file.  This is a required
+   * parameter.
+   */
+  public XMLSource<T> withRecordClass(Class<T> recordClass) {
+    return new XMLSource<>(
+        getFileOrPatternSpec(), getMinBundleSize(), rootElement, recordElement, recordClass);
+  }
+
+  /**
+   * Sets a parameter {@code minBundleSize} for the minimum bundle size of the source. Please refer
+   * to {@link ByteOffsetBasedSource} for the definition of minBundleSize.  This is an optional
+   * parameter.
+   */
+  public XMLSource<T> withMinBundleSize(long minBundleSize) {
+    return new XMLSource<>(
+        getFileOrPatternSpec(), minBundleSize, rootElement, recordElement, recordClass);
+  }
+
+  private XMLSource(String fileOrPattern, long minBundleSize, String rootElement,
+      String recordElement, Class<T> recordClass) {
+    super(fileOrPattern, minBundleSize);
+    this.rootElement = rootElement;
+    this.recordElement = recordElement;
+    this.recordClass = recordClass;
+  }
+
+  private XMLSource(String fileOrPattern, long minBundleSize, long startOffset, long endOffset,
+      String rootElement, String recordElement, Class<T> recordClass) {
+    super(fileOrPattern, minBundleSize, startOffset, endOffset);
+    this.rootElement = rootElement;
+    this.recordElement = recordElement;
+    this.recordClass = recordClass;
+  }
+
+  @Override
+  public FileBasedSource<T> createForSubrangeOfFile(String fileName, long start, long end) {
+    return new XMLSource<T>(
+        fileName, getMinBundleSize(), start, end, rootElement, recordElement, recordClass);
+  }
+
+  @Override
+  public FileBasedReader<T> createSingleFileReader(
+      PipelineOptions options, ExecutionContext executionContext) {
+    return new XMLReader<T>(this);
+  }
+
+  @Override
+  public boolean producesSortedKeys(PipelineOptions options) throws Exception {
+    return false;
+  }
+
+  @Override
+  public void validate() {
+    super.validate();
+    Preconditions.checkNotNull(
+        rootElement, "rootElement is null. Use builder method withRootElement() to set this.");
+    Preconditions.checkNotNull(
+        recordElement,
+        "recordElement is null. Use builder method withRecordElement() to set this.");
+    Preconditions.checkNotNull(
+        recordClass, "recordClass is null. Use builder method withRecordClass() to set this.");
+  }
+
+  @Override
+  public Coder<T> getDefaultOutputCoder() {
+    return JAXBCoder.of(recordClass);
+  }
+
+  public String getRootElement() {
+    return rootElement;
+  }
+
+  public String getRecordElement() {
+    return recordElement;
+  }
+
+  public Class<T> getRecordClass() {
+    return recordClass;
+  }
+
+  /**
+   * A {@link Source.Reader} for reading JAXB annotated Java objects from an XML file. The XML
+   * file should be of the form defined at {@link XMLSource}.
+   *
+   * @param <T> Type of objects that will be read by the reader.
+   */
+  private static class XMLReader<T> extends FileBasedReader<T> {
+    // The amount of bytes read from the channel to memory when determining the starting offset of
+    // the first record in a bundle. After matching to starting offset of the first record the
+    // remaining bytes read to this buffer and the bytes still not read from the channel are used to
+    // create the XML parser.
+    private static final int BUF_SIZE = 1024;
+
+    // This should be the maximum number of bytes a character will encode to, for any encoding
+    // supported by XMLSource. Currently this is set to 4 since UTF-8 characters may be
+    // four bytes.
+    private static final int MAX_CHAR_BYTES = 4;
+
+    // In order to support reading starting in the middle of an XML file, we construct an imaginary
+    // well-formed document (a header and root tag followed by the contents of the input starting at
+    // the record boundary) and feed it to the parser. Because of this, the offset reported by the
+    // XML parser is not the same as offset in the original file. They differ by a constant amount:
+    // offsetInOriginalFile = parser.getLocation().getCharacterOffset() + parserBaseOffset;
+    // Note that parser.getLocation().getCharacterOffset() a byte offset since
+    // we provide a byte stream as the input source to the XML parser.
+    // http://
+    //   docs.oracle.com/javaee/5/api/javax/xml/stream/Location.html#getCharacterOffset()
+    private long parserBaseOffset = 0;
+    private boolean readingStarted = false;
+
+    // If true, the current bundle does not contain any records.
+    private boolean emptyBundle = false;
+
+    private Unmarshaller jaxbUnmarshaller = null;
+    private XMLStreamReader parser = null;
+
+    private T currentRecord = null;
+
+    // Byte offset of the current record in the XML file provided when creating the source.
+    private long currentByteOffset = 0;
+
+    public XMLReader(XMLSource<T> source) {
+      super(source);
+
+      // Set up a JAXB Unmarshaller that can be used to unmarshall record objects.
+      try {
+        JAXBContext jaxbContext = JAXBContext.newInstance(getCurrentSource().recordClass);
+        jaxbUnmarshaller = jaxbContext.createUnmarshaller();
+
+        // Throw errors if validation fails. JAXB by default ignores validation errors.
+        jaxbUnmarshaller.setEventHandler(new ValidationEventHandler() {
+          @Override
+          public boolean handleEvent(ValidationEvent event) {
+            throw new RuntimeException(event.getMessage(), event.getLinkedException());
+          }
+        });
+      } catch (JAXBException e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    @Override
+    public XMLSource<T> getCurrentSource() {
+      return (XMLSource<T>) super.getCurrentSource();
+    }
+
+    @Override
+    protected void startReading(ReadableByteChannel channel) throws IOException {
+      // This method determines the correct starting offset of the first record by reading bytes
+      // from the ReadableByteChannel. This implementation does not need the channel to be a
+      // SeekableByteChannel.
+      // The method tries to determine the first record element in the byte channel. The first
+      // record must start with the characters "<recordElement" where "recordElement" is the
+      // record element of the XML document described above. For the match to be complete this
+      // has to be followed by one of following.
+      // * any whitespace character
+      // * '>' character
+      // * '/' character (to support empty records).
+      //
+      // After this match this method creates the XML parser for parsing the XML document,
+      // feeding it a fake document consisting of an XML header and the <rootElement> tag followed
+      // by the contents of channel starting from <recordElement. The <rootElement> tag may be never
+      // closed.
+
+      // This stores any bytes that should be used prior to the remaining bytes of the channel when
+      // creating an XML parser object.
+      ByteArrayOutputStream preambleByteBuffer = new ByteArrayOutputStream();
+      // A dummy declaration and root for the document with proper XML version and encoding. Without
+      // this XML parsing may fail or may produce incorrect results.
+
+      byte[] dummyStartDocumentBytes =
+          ("<?xml version=\"" + XML_VERSION + "\" encoding=\"UTF-8\" ?>"
+              + "<" + getCurrentSource().rootElement + ">").getBytes(getCurrentSource().charset);
+      preambleByteBuffer.write(dummyStartDocumentBytes);
+      // Gets the byte offset (in the input file) of the first record in ReadableByteChannel. This
+      // method returns the offset and stores any bytes that should be used when creating the XML
+      // parser in preambleByteBuffer.
+      long offsetInFileOfRecordElement =
+          getFirstOccurenceOfRecordElement(channel, preambleByteBuffer);
+      if (offsetInFileOfRecordElement < 0) {
+        // Bundle has no records. So marking this bundle as an empty bundle.
+        emptyBundle = true;
+        return;
+      } else {
+        byte[] preambleBytes = preambleByteBuffer.toByteArray();
+        currentByteOffset = offsetInFileOfRecordElement;
+        setUpXMLParser(channel, preambleBytes);
+        parserBaseOffset = offsetInFileOfRecordElement - dummyStartDocumentBytes.length;
+      }
+      readingStarted = true;
+    }
+
+    // Gets the first occurrence of the next record within the given ReadableByteChannel. Puts
+    // any bytes read past the starting offset of the next record back to the preambleByteBuffer.
+    // If a record is found, returns the starting offset of the record, otherwise
+    // returns -1.
+    private long getFirstOccurenceOfRecordElement(
+        ReadableByteChannel channel, ByteArrayOutputStream preambleByteBuffer) throws IOException {
+      int byteIndexInRecordElementToMatch = 0;
+      // Index of the byte in the string "<recordElement" to be matched
+      // against the current byte from the stream.
+      boolean recordStartBytesMatched = false; // "<recordElement" matched. Still have to match the
+      // next character to confirm if this is a positive match.
+      boolean fullyMatched = false; // If true, record element was fully matched.
+
+      // This gives the offset of the byte currently being read. We do a '-1' here since we
+      // increment this value at the beginning of the while loop below.
+      long offsetInFileOfCurrentByte = getCurrentSource().getStartOffset() - 1;
+      long startingOffsetInFileOfCurrentMatch = -1;
+      // If this is non-negative, currently there is a match in progress and this value gives the
+      // starting offset of the match currently being conducted.
+      boolean matchStarted = false; // If true, a match is currently in progress.
+
+      // These two values are used to determine the character immediately following a match for
+      // "<recordElement". Please see the comment for 'MAX_CHAR_BYTES' above.
+      byte[] charBytes = new byte[MAX_CHAR_BYTES];
+      int charBytesFound = 0;
+
+      ByteBuffer buf = ByteBuffer.allocate(BUF_SIZE);
+      byte[] recordStartBytes =
+          ("<" + getCurrentSource().recordElement).getBytes(getCurrentSource().charset);
+
+      outer: while (channel.read(buf) > 0) {
+        buf.flip();
+        while (buf.hasRemaining()) {
+          offsetInFileOfCurrentByte++;
+          byte b = buf.get();
+          boolean reset = false;
+          if (recordStartBytesMatched) {
+            // We already matched "<recordElement" reading the next character to determine if this
+            // is a positive match for a new record.
+            charBytes[charBytesFound] = b;
+            charBytesFound++;
+            Character c = null;
+            if (charBytesFound == charBytes.length) {
+              CharBuffer charBuf = CharBuffer.allocate(1);
+              InputStream charBufStream = new ByteArrayInputStream(charBytes);
+              java.io.Reader reader =
+                  new InputStreamReader(charBufStream, getCurrentSource().charset);
+              int read = reader.read();
+              if (read <= 0) {
+                return -1;
+              }
+              charBuf.flip();
+              c = (char) read;
+            } else {
+              continue;
+            }
+
+            // Record start may be of following forms
+            // * "<recordElement<whitespace>..."
+            // * "<recordElement>..."
+            // * "<recordElement/..."
+            if (Character.isWhitespace(c) || c == '>' || c == '/') {
+              fullyMatched = true;
+              // Add the recordStartBytes and charBytes to preambleByteBuffer since these were
+              // already read from the channel.
+              preambleByteBuffer.write(recordStartBytes);
+              preambleByteBuffer.write(charBytes);
+              // Also add the rest of the current buffer to preambleByteBuffer.
+              while (buf.hasRemaining()) {
+                preambleByteBuffer.write(buf.get());
+              }
+              break outer;
+            } else {
+              // Matching was unsuccessful. Reset the buffer to include bytes read for the char.
+              ByteBuffer newbuf = ByteBuffer.allocate(BUF_SIZE);
+              newbuf.put(charBytes);
+              offsetInFileOfCurrentByte -= charBytes.length;
+              while (buf.hasRemaining()) {
+                newbuf.put(buf.get());
+              }
+              newbuf.flip();
+              buf = newbuf;
+
+              // Ignore everything and try again starting from the current buffer.
+              reset = true;
+            }
+          } else if (b == recordStartBytes[byteIndexInRecordElementToMatch]) {
+            // Next byte matched.
+            if (!matchStarted) {
+              // Match was for the first byte, record the starting offset.
+              matchStarted = true;
+              startingOffsetInFileOfCurrentMatch = offsetInFileOfCurrentByte;
+            }
+            byteIndexInRecordElementToMatch++;
+          } else {
+            // Not a match. Ignore everything and try again starting at current point.
+            reset = true;
+          }
+          if (reset) {
+            // Clear variables and try to match starting from the next byte.
+            byteIndexInRecordElementToMatch = 0;
+            startingOffsetInFileOfCurrentMatch = -1;
+            matchStarted = false;
+            recordStartBytesMatched = false;
+            charBytes = new byte[MAX_CHAR_BYTES];
+            charBytesFound = 0;
+          }
+          if (byteIndexInRecordElementToMatch == recordStartBytes.length) {
+            // "<recordElement" matched. Need to still check next byte since this might be an
+            // element that has "recordElement" as a prefix.
+            recordStartBytesMatched = true;
+          }
+        }
+        buf.clear();
+      }
+
+      if (!fullyMatched) {
+        return -1;
+      } else {
+        return startingOffsetInFileOfCurrentMatch;
+      }
+    }
+
+    private void setUpXMLParser(ReadableByteChannel channel, byte[] lookAhead) throws IOException {
+      try {
+        // We use Woodstox because the StAX implementation provided by OpenJDK reports locations
+        // incorrectly.
+        XMLInputFactory2 xmlInputFactory = (XMLInputFactory2) XMLInputFactory.newInstance();
+        this.parser = xmlInputFactory.createXMLStreamReader(
+            new SequenceInputStream(
+                new ByteArrayInputStream(lookAhead), Channels.newInputStream(channel)),
+            "UTF-8");
+
+        // Current offset should be the offset before reading the record element.
+        while (true) {
+          int event = parser.next();
+          if (event == XMLStreamConstants.START_ELEMENT) {
+            String localName = parser.getLocalName();
+            if (localName.equals(getCurrentSource().recordElement)) {
+              break;
+            }
+          }
+        }
+      } catch (FactoryConfigurationError | XMLStreamException e) {
+        throw new IOException(e);
+      }
+    }
+
+    @Override
+    protected boolean readNextRecord() throws IOException {
+      if (emptyBundle) {
+        currentByteOffset = Long.MAX_VALUE;
+        return false;
+      }
+      try {
+        // Update current offset and check if the next value is the record element.
+        currentByteOffset = parserBaseOffset + parser.getLocation().getCharacterOffset();
+        while (parser.getEventType() != XMLStreamConstants.START_ELEMENT) {
+          parser.next();
+          currentByteOffset = parserBaseOffset + parser.getLocation().getCharacterOffset();
+          if (parser.getEventType() == XMLStreamConstants.END_DOCUMENT) {
+            currentByteOffset = Long.MAX_VALUE;
+            return false;
+          }
+        }
+        JAXBElement<T> jb = jaxbUnmarshaller.unmarshal(parser, getCurrentSource().recordClass);
+        currentRecord = jb.getValue();
+        return true;
+      } catch (JAXBException | XMLStreamException e) {
+        throw new IOException(e);
+      }
+    }
+
+    @Override
+    public T getCurrent() throws NoSuchElementException {
+      if (!readingStarted) {
+        throw new NoSuchElementException();
+      }
+      return currentRecord;
+    }
+
+    @Override
+    protected boolean isAtSplitPoint() {
+      // Every record is at a split point.
+      return true;
+    }
+
+    @Override
+    protected long getCurrentOffset() {
+      return currentByteOffset;
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/JAXBCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/JAXBCoderTest.java
new file mode 100644
index 0000000000000..e13bcb2fae9ba
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/JAXBCoderTest.java
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import javax.xml.bind.annotation.XmlRootElement;
+
+/** Unit tests for {@link JAXBCoder}. */
+@RunWith(JUnit4.class)
+@SuppressWarnings("serial")
+public class JAXBCoderTest {
+  @XmlRootElement
+  static class TestType {
+    private String testString = null;
+    private int testInt;
+
+    public TestType() {}
+
+    public TestType(String testString, int testInt) {
+      this.testString = testString;
+      this.testInt = testInt;
+    }
+
+    public String getTestString() {
+      return testString;
+    }
+
+    public void setTestString(String testString) {
+      this.testString = testString;
+    }
+
+    public int getTestInt() {
+      return testInt;
+    }
+
+    public void setTestInt(int testInt) {
+      this.testInt = testInt;
+    }
+
+    @Override
+    public int hashCode() {
+      int hashCode = 1;
+      hashCode = 31 * hashCode + (testString == null ? 0 : testString.hashCode());
+      hashCode = 31 * hashCode + testInt;
+      return hashCode;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (!(obj instanceof TestType)) {
+        return false;
+      }
+
+      TestType other = (TestType) obj;
+      return (testString == null || testString.equals(other.testString))
+          && (testInt == other.testInt);
+    }
+  }
+
+  @Test
+  public void testEncodeDecode() throws Exception {
+    JAXBCoder<TestType> coder = JAXBCoder.of(TestType.class);
+
+    byte[] encoded = CoderUtils.encodeToByteArray(coder, new TestType("abc", 9999));
+    Assert.assertEquals(new TestType("abc", 9999), CoderUtils.decodeFromByteArray(coder, encoded));
+  }
+
+  @Test
+  public void testEncodable() throws Exception {
+    SerializableUtils.ensureSerializable(JAXBCoder.of(TestType.class));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index c3e368de20890..1dc741b0ddac6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -87,15 +87,15 @@ class TestFileBasedSource extends FileBasedSource<String> {
 
     final String splitHeader;
 
-    public TestFileBasedSource(boolean isFilePattern, String fileOrPattern, long minBundleSize,
+    public TestFileBasedSource(String fileOrPattern, long minBundleSize,
         String splitHeader) {
-      super(isFilePattern, fileOrPattern, minBundleSize, 0L, Long.MAX_VALUE);
+      super(fileOrPattern, minBundleSize);
       this.splitHeader = splitHeader;
     }
 
     public TestFileBasedSource(String fileOrPattern, long minBundleSize, long startOffset,
         long endOffset, String splitHeader) {
-      super(false, fileOrPattern, minBundleSize, startOffset, endOffset);
+      super(fileOrPattern, minBundleSize, startOffset, endOffset);
       this.splitHeader = splitHeader;
     }
 
@@ -175,7 +175,7 @@ private int readNextLine(ByteArrayOutputStream out) throws IOException {
     @Override
     protected void startReading(ReadableByteChannel channel) throws IOException {
       boolean removeLine = false;
-      if (getCurrentSource().getMode() == Mode.SUBRANGE_OF_SINGLE_FILE) {
+      if (getCurrentSource().getMode() == Mode.SINGLE_FILE_OR_SUBRANGE) {
         SeekableByteChannel seekChannel = (SeekableByteChannel) channel;
         // If we are not at the beginning of a line, we should ignore the current line.
         if (seekChannel.position() > 0) {
@@ -343,7 +343,7 @@ public void testFullyReadSingleFile() throws IOException {
     String fileName = "file";
     File file = createFileWithData(fileName, data);
 
-    TestFileBasedSource source = new TestFileBasedSource(false, file.getPath(), 64, null);
+    TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 64, null);
     assertEquals(data, readFromSource(source));
   }
 
@@ -362,7 +362,7 @@ public void testFullyReadFilePattern() throws IOException {
     createFileWithData("otherfile", data4);
 
     TestFileBasedSource source =
-        new TestFileBasedSource(true, new File(file1.getParent(), "file*").getPath(), 64, null);
+        new TestFileBasedSource(new File(file1.getParent(), "file*").getPath(), 64, null);
     List<String> expectedResults = new ArrayList<String>();
     expectedResults.addAll(data1);
     expectedResults.addAll(data2);
@@ -382,7 +382,7 @@ public void testFractionConsumedWhenReadingFilepattern() throws IOException {
     createFileWithData("file3", data3);
 
     TestFileBasedSource source =
-        new TestFileBasedSource(true, file1.getParent() + "/" + "file*", 1024, null);
+        new TestFileBasedSource(file1.getParent() + "/" + "file*", 1024, null);
     BoundedSource.BoundedReader<String> reader = source.createReader(null, null);
     double lastFractionConsumed = 0.0;
     assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
@@ -421,7 +421,7 @@ public void testFullyReadFilePatternFirstRecordEmpty() throws IOException {
     List<String> data4 = createStringDataset(3, 50);
     createFileWithData("otherfile", data4);
 
-    TestFileBasedSource source = new TestFileBasedSource(true, pattern, 64, null);
+    TestFileBasedSource source = new TestFileBasedSource(pattern, 64, null);
 
     List<String> expectedResults = new ArrayList<String>();
     expectedResults.addAll(data2);
@@ -458,7 +458,7 @@ public void testReadEverythingFromFileWithSplits() throws IOException {
     File file = createFileWithData(fileName, data);
 
     TestFileBasedSource source =
-        new TestFileBasedSource(file.getPath(), 64, 0, Long.MAX_VALUE, header);
+        new TestFileBasedSource(file.getPath(), 64, header);
 
     List<String> expectedResults = new ArrayList<String>();
     expectedResults.addAll(data);
@@ -629,7 +629,7 @@ public void testReadAllSplitsOfSingleFile() throws Exception {
     String fileName = "file";
     File file = createFileWithData(fileName, data);
 
-    TestFileBasedSource source = new TestFileBasedSource(false, file.getPath(), 16, null);
+    TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 16, null);
 
     List<? extends Source<String>> sources = source.splitIntoBundles(32, null);
 
@@ -656,7 +656,7 @@ public void testDataflowFile() throws IOException {
     String fileName = "file";
     File file = createFileWithData(fileName, data);
 
-    TestFileBasedSource source = new TestFileBasedSource(false, file.getPath(), 64, null);
+    TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 64, null);
 
     PCollection<String> output = p.apply(ReadSource.from(source).named("ReadFileData"));
 
@@ -692,7 +692,7 @@ public void testDataflowFilePattern() throws IOException {
     createFileWithData("otherfile", data4);
 
     TestFileBasedSource source =
-        new TestFileBasedSource(true, new File(file1.getParent(), "file*").getPath(), 64, null);
+        new TestFileBasedSource(new File(file1.getParent(), "file*").getPath(), 64, null);
 
     PCollection<String> output = p.apply(ReadSource.from(source).named("ReadFileData"));
 
@@ -718,7 +718,7 @@ public void testEstimatedSizeOfFile() throws Exception {
     String fileName = "file";
     File file = createFileWithData(fileName, data);
 
-    TestFileBasedSource source = new TestFileBasedSource(false, file.getPath(), 64, null);
+    TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 64, null);
     assertEquals(file.length(), source.getEstimatedSizeBytes(null));
   }
 
@@ -740,7 +740,7 @@ public void testEstimatedSizeOfFilePattern() throws Exception {
     createFileWithData("anotherfile", data5);
 
     TestFileBasedSource source =
-        new TestFileBasedSource(true, new File(file1.getParent(), "file*").getPath(), 64, null);
+        new TestFileBasedSource(new File(file1.getParent(), "file*").getPath(), 64, null);
 
     // Estimated size of the file pattern based source should be the total size of files that the
     // corresponding pattern is expanded into.
@@ -763,7 +763,7 @@ public void testReadAllSplitsOfFilePattern() throws Exception {
     createFileWithData("otherfile", data4);
 
     TestFileBasedSource source =
-        new TestFileBasedSource(true, new File(file1.getParent(), "file*").getPath(), 64, null);
+        new TestFileBasedSource(new File(file1.getParent(), "file*").getPath(), 64, null);
     List<? extends Source<String>> sources = source.splitIntoBundles(512, null);
 
     // Not a trivial split.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XMLSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XMLSourceTest.java
new file mode 100644
index 0000000000000..fd851e8145232
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XMLSourceTest.java
@@ -0,0 +1,789 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS;
+import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionBehavior;
+import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionFails;
+import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.io.Source.Reader;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.EvaluationResults;
+import com.google.cloud.dataflow.sdk.testing.TestDataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.util.TestCredential;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.collect.ImmutableList;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import javax.xml.bind.annotation.XmlAttribute;
+import javax.xml.bind.annotation.XmlRootElement;
+
+/**
+ * Tests XMLSource.
+ */
+@RunWith(JUnit4.class)
+public class XMLSourceTest {
+  @Rule
+  public TemporaryFolder tempFolder = new TemporaryFolder();
+
+  @Rule
+  public ExpectedException exception = ExpectedException.none();
+
+  String tinyXML =
+      "<trains><train><name>Thomas</name></train><train><name>Henry</name></train>"
+      + "<train><name>James</name></train></trains>";
+
+  String xmlWithMultiByteElementName =
+      "<දුම්රියන්><දුම්රිය><name>Thomas</name></දුම්රිය><දුම්රිය><name>Henry</name></දුම්රිය>"
+      + "<දුම්රිය><name>James</name></දුම්රිය></දුම්රියන්>";
+
+  String xmlWithMultiByteChars =
+      "<trains><train><name>Thomas¥</name></train><train><name>Hen¶ry</name></train>"
+      + "<train><name>Jamßes</name></train></trains>";
+
+  String trainXML =
+      "<trains>"
+      + "<train><name>Thomas</name><number>1</number><color>blue</color></train>"
+      + "<train><name>Henry</name><number>3</number><color>green</color></train>"
+      + "<train><name>Toby</name><number>7</number><color>brown</color></train>"
+      + "<train><name>Gordon</name><number>4</number><color>blue</color></train>"
+      + "<train><name>Emily</name><number>-1</number><color>red</color></train>"
+      + "<train><name>Percy</name><number>6</number><color>green</color></train>"
+      + "</trains>";
+
+  String trainXMLWithEmptyTags =
+      "<trains>"
+      + "<train/>"
+      + "<train><name>Thomas</name><number>1</number><color>blue</color></train>"
+      + "<train><name>Henry</name><number>3</number><color>green</color></train>"
+      + "<train/>"
+      + "<train><name>Toby</name><number>7</number><color>brown</color></train>"
+      + "<train><name>Gordon</name><number>4</number><color>blue</color></train>"
+      + "<train><name>Emily</name><number>-1</number><color>red</color></train>"
+      + "<train><name>Percy</name><number>6</number><color>green</color></train>"
+      + "</trains>";
+
+  String trainXMLWithAttributes =
+      "<trains>"
+      + "<train size=\"small\"><name>Thomas</name><number>1</number><color>blue</color></train>"
+      + "<train size=\"big\"><name>Henry</name><number>3</number><color>green</color></train>"
+      + "<train size=\"small\"><name>Toby</name><number>7</number><color>brown</color></train>"
+      + "<train size=\"big\"><name>Gordon</name><number>4</number><color>blue</color></train>"
+      + "<train size=\"small\"><name>Emily</name><number>-1</number><color>red</color></train>"
+      + "<train size=\"small\"><name>Percy</name><number>6</number><color>green</color></train>"
+      + "</trains>";
+
+  String trainXMLWithSpaces =
+      "<trains>"
+      + "<train><name>Thomas   </name>   <number>1</number><color>blue</color></train>"
+      + "<train><name>Henry</name><number>3</number><color>green</color></train>\n"
+      + "<train><name>Toby</name><number>7</number><color>  brown  </color></train>  "
+      + "<train><name>Gordon</name>   <number>4</number><color>blue</color>\n</train>\t"
+      + "<train><name>Emily</name><number>-1</number>\t<color>red</color></train>"
+      + "<train>\n<name>Percy</name>   <number>6  </number>   <color>green</color></train>"
+      + "</trains>";
+
+  String trainXMLWithAllFeatures =
+      "<දුම්රියන්>"
+      + "<දුම්රිය/>"
+      + "<දුම්රිය size=\"small\"><name> Thomas¥</name><number>1</number><color>blue</color>"
+      + "</දුම්රිය>"
+      + "<දුම්රිය size=\"big\"><name>He nry</name><number>3</number><color>green</color></දුම්රිය>"
+      + "<දුම්රිය size=\"small\"><name>Toby  </name><number>7</number><color>br¶own</color>"
+      + "</දුම්රිය>"
+      + "<දුම්රිය/>"
+      + "<දුම්රිය size=\"big\"><name>Gordon</name><number>4</number><color> blue</color></දුම්රිය>"
+      + "<දුම්රිය size=\"small\"><name>Emily</name><number>-1</number><color>red</color></දුම්රිය>"
+      + "<දුම්රිය size=\"small\"><name>Percy</name><number>6</number><color>green</color>"
+      + "</දුම්රිය>"
+      + "</දුම්රියන්>";
+
+  @XmlRootElement
+  static class Train {
+    public static final int TRAIN_NUMBER_UNDEFINED = -1;
+    public String name = null;
+    public String color = null;
+    public int number = TRAIN_NUMBER_UNDEFINED;
+
+    @XmlAttribute(name = "size")
+    public String size = null;
+
+    public Train() {}
+
+    public Train(String name, int number, String color, String size) {
+      this.name = name;
+      this.number = number;
+      this.color = color;
+      this.size = size;
+    }
+
+    @Override
+    public int hashCode() {
+      int hashCode = 1;
+      hashCode = 31 * hashCode + (name == null ? 0 : name.hashCode());
+      hashCode = 31 * hashCode + number;
+      hashCode = 31 * hashCode + (color == null ? 0 : name.hashCode());
+      hashCode = 31 * hashCode + (size == null ? 0 : name.hashCode());
+      return hashCode;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (!(obj instanceof Train)) {
+        return false;
+      }
+
+      Train other = (Train) obj;
+      return (name == null || name.equals(other.name)) && (number == other.number)
+          && (color == null || color.equals(other.color))
+          && (size == null || size.equals(other.size));
+    }
+
+    @Override
+    public String toString() {
+      String str = "Train[";
+      boolean first = true;
+      if (name != null) {
+        str = str + "name=" + name;
+        first = false;
+      }
+      if (number != Integer.MIN_VALUE) {
+        if (!first) {
+          str = str + ",";
+        }
+        str = str + "number=" + number;
+        first = false;
+      }
+      if (color != null) {
+        if (!first) {
+          str = str + ",";
+        }
+        str = str + "color=" + color;
+        first = false;
+      }
+      if (size != null) {
+        if (!first) {
+          str = str + ",";
+        }
+        str = str + "size=" + size;
+      }
+      str = str + "]";
+      return str;
+    }
+  }
+
+  private List<Train> generateRandomTrainList(int size) {
+    String[] names = {"Thomas", "Henry", "Gordon", "Emily", "Toby", "Percy", "Mavis", "Edward",
+        "Bertie", "Harold", "Hiro", "Terence", "Salty", "Trevor"};
+    int[] numbers = {-1, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+    String[] colors = {"red", "blue", "green", "orange", "brown", "black", "white"};
+    String[] sizes = {"small", "medium", "big"};
+
+    Random random = new Random(System.currentTimeMillis());
+
+    List<Train> trains = new ArrayList<>();
+    for (int i = 0; i < size; i++) {
+      trains.add(new Train(names[random.nextInt(names.length - 1)],
+          numbers[random.nextInt(numbers.length - 1)], colors[random.nextInt(colors.length - 1)],
+          sizes[random.nextInt(sizes.length - 1)]));
+    }
+
+    return trains;
+  }
+
+  private String trainToXMLElement(Train train) {
+    return "<train size=\"" + train.size + "\"><name>" + train.name + "</name><number>"
+        + train.number + "</number><color>" + train.color + "</color></train>";
+  }
+
+  private File createRandomTrainXML(String fileName, List<Train> trains) throws IOException {
+    File file = tempFolder.newFile(fileName);
+    BufferedWriter writer = new BufferedWriter(new FileWriter(file));
+    writer.write("<trains>");
+    writer.newLine();
+    for (Train train : trains) {
+      String str = trainToXMLElement(train);
+      writer.write(str);
+      writer.newLine();
+    }
+    writer.write("</trains>");
+    writer.newLine();
+    writer.close();
+    return file;
+  }
+
+  private List<Train> readEverythingFromReader(Reader<Train> reader) throws IOException {
+    List<Train> results = new ArrayList<>();
+    for (boolean available = reader.start(); available; available = reader.advance()) {
+      Train train = reader.getCurrent();
+      results.add(train);
+    }
+    return results;
+  }
+
+  @Test
+  public void testReadXMLTiny() throws IOException {
+    File file = tempFolder.newFile("trainXMLTiny");
+    Files.write(file.toPath(), tinyXML.getBytes(StandardCharsets.UTF_8));
+
+    XMLSource<Train> source =
+        XMLSource.<Train>from(file.toPath().toString())
+            .withRootElement("trains")
+            .withRecordElement("train")
+            .withRecordClass(Train.class)
+            .withMinBundleSize(1024);
+
+    List<Train> expectedResults = ImmutableList.of(
+        new Train("Thomas", Train.TRAIN_NUMBER_UNDEFINED, null, null),
+        new Train("Henry", Train.TRAIN_NUMBER_UNDEFINED, null, null),
+        new Train("James", Train.TRAIN_NUMBER_UNDEFINED, null, null));
+
+    assertThat(
+        trainsToStrings(expectedResults),
+        containsInAnyOrder(
+            trainsToStrings(readEverythingFromReader(source.createReader(null, null))).toArray()));
+  }
+
+  @Test
+  public void testReadXMLWithMultiByteChars() throws IOException {
+    File file = tempFolder.newFile("trainXMLTiny");
+    Files.write(file.toPath(), xmlWithMultiByteChars.getBytes(StandardCharsets.UTF_8));
+
+    XMLSource<Train> source =
+        XMLSource.<Train>from(file.toPath().toString())
+            .withRootElement("trains")
+            .withRecordElement("train")
+            .withRecordClass(Train.class)
+            .withMinBundleSize(1024);
+
+    List<Train> expectedResults = ImmutableList.of(
+        new Train("Thomas¥", Train.TRAIN_NUMBER_UNDEFINED, null, null),
+        new Train("Hen¶ry", Train.TRAIN_NUMBER_UNDEFINED, null, null),
+        new Train("Jamßes", Train.TRAIN_NUMBER_UNDEFINED, null, null));
+
+    assertThat(
+        trainsToStrings(expectedResults),
+        containsInAnyOrder(
+            trainsToStrings(readEverythingFromReader(source.createReader(null, null))).toArray()));
+  }
+
+  @Test
+  public void testReadXMLWithMultiByteElementName() throws IOException {
+    File file = tempFolder.newFile("trainXMLTiny");
+    Files.write(file.toPath(), xmlWithMultiByteElementName.getBytes(StandardCharsets.UTF_8));
+
+    XMLSource<Train> source =
+        XMLSource.<Train>from(file.toPath().toString())
+            .withRootElement("දුම්රියන්")
+            .withRecordElement("දුම්රිය")
+            .withRecordClass(Train.class)
+            .withMinBundleSize(1024);
+
+    List<Train> expectedResults = ImmutableList.of(
+        new Train("Thomas", Train.TRAIN_NUMBER_UNDEFINED, null, null),
+        new Train("Henry", Train.TRAIN_NUMBER_UNDEFINED, null, null),
+        new Train("James", Train.TRAIN_NUMBER_UNDEFINED, null, null));
+
+    assertThat(
+        trainsToStrings(expectedResults),
+        containsInAnyOrder(
+            trainsToStrings(readEverythingFromReader(source.createReader(null, null))).toArray()));
+  }
+
+  @Test
+  public void testSplitWithEmptyBundleAtEnd() throws Exception {
+    File file = tempFolder.newFile("trainXMLTiny");
+    Files.write(file.toPath(), tinyXML.getBytes(StandardCharsets.UTF_8));
+
+    XMLSource<Train> source =
+        XMLSource.<Train>from(file.toPath().toString())
+            .withRootElement("trains")
+            .withRecordElement("train")
+            .withRecordClass(Train.class)
+            .withMinBundleSize(10);
+    List<? extends FileBasedSource<Train>> splits = source.splitIntoBundles(50, null);
+
+    assertTrue(splits.size() > 2);
+
+    List<Train> results = new ArrayList<>();
+    for (FileBasedSource<Train> split : splits) {
+      results.addAll(readEverythingFromReader(split.createReader(null, null)));
+    }
+
+    List<Train> expectedResults = ImmutableList.of(
+        new Train("Thomas", Train.TRAIN_NUMBER_UNDEFINED, null, null),
+        new Train("Henry", Train.TRAIN_NUMBER_UNDEFINED, null, null),
+        new Train("James", Train.TRAIN_NUMBER_UNDEFINED, null, null));
+
+    assertThat(
+        trainsToStrings(expectedResults), containsInAnyOrder(trainsToStrings(results).toArray()));
+  }
+
+  List<String> trainsToStrings(List<Train> input) {
+    List<String> strings = new ArrayList<>();
+    for (Object data : input) {
+      strings.add(data.toString());
+    }
+    return strings;
+  }
+
+  @Test
+  public void testReadXMLSmall() throws IOException {
+    File file = tempFolder.newFile("trainXMLSmall");
+    Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
+
+    XMLSource<Train> source =
+        XMLSource.<Train>from(file.toPath().toString())
+            .withRootElement("trains")
+            .withRecordElement("train")
+            .withRecordClass(Train.class)
+            .withMinBundleSize(1024);
+
+    List<Train> expectedResults =
+        ImmutableList.of(new Train("Thomas", 1, "blue", null), new Train("Henry", 3, "green", null),
+            new Train("Toby", 7, "brown", null), new Train("Gordon", 4, "blue", null),
+            new Train("Emily", -1, "red", null), new Train("Percy", 6, "green", null));
+
+    assertThat(
+        trainsToStrings(expectedResults),
+        containsInAnyOrder(
+            trainsToStrings(readEverythingFromReader(source.createReader(null, null))).toArray()));
+  }
+
+  @Test
+  public void testReadXMLNoRootElement() throws IOException {
+    File file = tempFolder.newFile("trainXMLSmall");
+    Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
+
+    XMLSource<Train> source =
+        XMLSource.<Train>from(file.toPath().toString())
+            .withRecordElement("train")
+            .withRecordClass(Train.class);
+
+    exception.expect(NullPointerException.class);
+    exception.expectMessage(
+        "rootElement is null. Use builder method withRootElement() to set this.");
+    readEverythingFromReader(source.createReader(null, null));
+  }
+
+  @Test
+  public void testReadXMLNoRecordElement() throws IOException {
+    File file = tempFolder.newFile("trainXMLSmall");
+    Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
+
+    XMLSource<Train> source =
+        XMLSource.<Train>from(file.toPath().toString())
+            .withRootElement("trains")
+            .withRecordClass(Train.class);
+
+    exception.expect(NullPointerException.class);
+    exception.expectMessage(
+        "recordElement is null. Use builder method withRecordElement() to set this.");
+    readEverythingFromReader(source.createReader(null, null));
+  }
+
+  @Test
+  public void testReadXMLNoRecordClass() throws IOException {
+    File file = tempFolder.newFile("trainXMLSmall");
+    Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
+
+    XMLSource<Train> source =
+        XMLSource.<Train>from(file.toPath().toString())
+            .withRootElement("trains")
+            .withRecordElement("train");
+
+    exception.expect(NullPointerException.class);
+    exception.expectMessage(
+        "recordClass is null. Use builder method withRecordClass() to set this.");
+    readEverythingFromReader(source.createReader(null, null));
+  }
+
+  @Test
+  public void testReadXMLIncorrectRootElement() throws IOException {
+    File file = tempFolder.newFile("trainXMLSmall");
+    Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
+
+    XMLSource<Train> source =
+        XMLSource.<Train>from(file.toPath().toString())
+            .withRootElement("something")
+            .withRecordElement("train")
+            .withRecordClass(Train.class);
+
+    exception.expectMessage("Unexpected close tag </trains>; expected </something>.");
+    readEverythingFromReader(source.createReader(null, null));
+  }
+
+  @Test
+  public void testReadXMLIncorrectRecordElement() throws IOException {
+    File file = tempFolder.newFile("trainXMLSmall");
+    Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
+
+    XMLSource<Train> source =
+        XMLSource.<Train>from(file.toPath().toString())
+            .withRootElement("trains")
+            .withRecordElement("something")
+            .withRecordClass(Train.class);
+
+    assertEquals(readEverythingFromReader(source.createReader(null, null)), new ArrayList<Train>());
+  }
+
+  @XmlRootElement
+  private static class WrongTrainType {
+    public String something;
+  }
+
+  @Test
+  public void testReadXMLInvalidRecordClass() throws IOException {
+    File file = tempFolder.newFile("trainXMLSmall");
+    Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
+
+    XMLSource<WrongTrainType> source =
+        XMLSource.<WrongTrainType>from(file.toPath().toString())
+            .withRootElement("trains")
+            .withRecordElement("train")
+            .withRecordClass(WrongTrainType.class);
+
+    exception.expect(RuntimeException.class);
+    exception.expectMessage(
+        "unexpected element (uri:\"\", local:\"name\"). Expected elements are <{}something>");
+    Reader<WrongTrainType> reader = source.createReader(null, null);
+
+    List<WrongTrainType> results = new ArrayList<>();
+    for (boolean available = reader.start(); available; available = reader.advance()) {
+      WrongTrainType train = reader.getCurrent();
+      results.add(train);
+    }
+  }
+
+  @Test
+  public void testReadXMLNoBundleSize() throws IOException {
+    File file = tempFolder.newFile("trainXMLSmall");
+    Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
+
+    XMLSource<Train> source =
+        XMLSource.<Train>from(file.toPath().toString())
+            .withRootElement("trains")
+            .withRecordElement("train")
+            .withRecordClass(Train.class);
+
+    List<Train> expectedResults =
+        ImmutableList.of(new Train("Thomas", 1, "blue", null), new Train("Henry", 3, "green", null),
+            new Train("Toby", 7, "brown", null), new Train("Gordon", 4, "blue", null),
+            new Train("Emily", -1, "red", null), new Train("Percy", 6, "green", null));
+
+    assertThat(
+        trainsToStrings(expectedResults),
+        containsInAnyOrder(
+            trainsToStrings(readEverythingFromReader(source.createReader(null, null))).toArray()));
+  }
+
+
+  @Test
+  public void testReadXMLWithEmptyTags() throws IOException {
+    File file = tempFolder.newFile("trainXMLSmall");
+    Files.write(file.toPath(), trainXMLWithEmptyTags.getBytes(StandardCharsets.UTF_8));
+
+    XMLSource<Train> source =
+        XMLSource.<Train>from(file.toPath().toString())
+            .withRootElement("trains")
+            .withRecordElement("train")
+            .withRecordClass(Train.class)
+            .withMinBundleSize(1024);
+
+    List<Train> expectedResults = ImmutableList.of(new Train("Thomas", 1, "blue", null),
+        new Train("Henry", 3, "green", null), new Train("Toby", 7, "brown", null),
+        new Train("Gordon", 4, "blue", null), new Train("Emily", -1, "red", null),
+        new Train("Percy", 6, "green", null), new Train(), new Train());
+
+    assertThat(
+        trainsToStrings(expectedResults),
+        containsInAnyOrder(
+            trainsToStrings(readEverythingFromReader(source.createReader(null, null))).toArray()));
+  }
+
+  @Test
+  public void testReadXMLSmallDataflow() throws IOException {
+    TestDataflowPipelineOptions options =
+        PipelineOptionsFactory.as(TestDataflowPipelineOptions.class);
+    options.setGcpCredential(new TestCredential());
+
+    DirectPipeline p = DirectPipeline.createForTest();
+
+    File file = tempFolder.newFile("trainXMLSmall");
+    Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
+
+    XMLSource<Train> source =
+        XMLSource.<Train>from(file.toPath().toString())
+            .withRootElement("trains")
+            .withRecordElement("train")
+            .withRecordClass(Train.class)
+            .withMinBundleSize(1024);
+
+    PCollection<Train> output = p.apply(ReadSource.from(source).named("ReadFileData"));
+
+    EvaluationResults results = p.run();
+    List<Train> readData = results.getPCollection(output);
+
+    List<Train> expectedResults =
+        ImmutableList.of(new Train("Thomas", 1, "blue", null), new Train("Henry", 3, "green", null),
+            new Train("Toby", 7, "brown", null), new Train("Gordon", 4, "blue", null),
+            new Train("Emily", -1, "red", null), new Train("Percy", 6, "green", null));
+
+    assertThat(
+        trainsToStrings(expectedResults), containsInAnyOrder(trainsToStrings(readData).toArray()));
+  }
+
+  @Test
+  public void testReadXMLWithAttributes() throws IOException {
+    File file = tempFolder.newFile("trainXMLSmall");
+    Files.write(file.toPath(), trainXMLWithAttributes.getBytes(StandardCharsets.UTF_8));
+
+    XMLSource<Train> source =
+        XMLSource.<Train>from(file.toPath().toString())
+            .withRootElement("trains")
+            .withRecordElement("train")
+            .withRecordClass(Train.class)
+            .withMinBundleSize(1024);
+
+    List<Train> expectedResults = ImmutableList.of(new Train("Thomas", 1, "blue", "small"),
+        new Train("Henry", 3, "green", "big"), new Train("Toby", 7, "brown", "small"),
+        new Train("Gordon", 4, "blue", "big"), new Train("Emily", -1, "red", "small"),
+        new Train("Percy", 6, "green", "small"));
+
+    assertThat(
+        trainsToStrings(expectedResults),
+        containsInAnyOrder(
+            trainsToStrings(readEverythingFromReader(source.createReader(null, null))).toArray()));
+  }
+
+  @Test
+  public void testReadXMLWithWhitespaces() throws IOException {
+    File file = tempFolder.newFile("trainXMLSmall");
+    Files.write(file.toPath(), trainXMLWithSpaces.getBytes(StandardCharsets.UTF_8));
+
+    XMLSource<Train> source =
+        XMLSource.<Train>from(file.toPath().toString())
+            .withRootElement("trains")
+            .withRecordElement("train")
+            .withRecordClass(Train.class)
+            .withMinBundleSize(1024);
+
+    List<Train> expectedResults = ImmutableList.of(new Train("Thomas   ", 1, "blue", null),
+        new Train("Henry", 3, "green", null), new Train("Toby", 7, "  brown  ", null),
+        new Train("Gordon", 4, "blue", null), new Train("Emily", -1, "red", null),
+        new Train("Percy", 6, "green", null));
+
+    assertThat(
+        trainsToStrings(expectedResults),
+        containsInAnyOrder(
+            trainsToStrings(readEverythingFromReader(source.createReader(null, null))).toArray()));
+  }
+
+  @Test
+  public void testReadXMLLarge() throws IOException {
+    String fileName = "temp.xml";
+    List<Train> trains = generateRandomTrainList(100);
+    File file = createRandomTrainXML(fileName, trains);
+
+    XMLSource<Train> source =
+        XMLSource.<Train>from(file.toPath().toString())
+            .withRootElement("trains")
+            .withRecordElement("train")
+            .withRecordClass(Train.class)
+            .withMinBundleSize(1024);
+
+    assertThat(
+        trainsToStrings(trains),
+        containsInAnyOrder(
+            trainsToStrings(readEverythingFromReader(source.createReader(null, null))).toArray()));
+  }
+
+  @Test
+  public void testReadXMLLargeDataflow() throws IOException {
+    String fileName = "temp.xml";
+    List<Train> trains = generateRandomTrainList(100);
+    File file = createRandomTrainXML(fileName, trains);
+
+    DirectPipeline p = DirectPipeline.createForTest();
+    XMLSource<Train> source =
+        XMLSource.<Train>from(file.toPath().toString())
+            .withRootElement("trains")
+            .withRecordElement("train")
+            .withRecordClass(Train.class)
+            .withMinBundleSize(1024);
+    PCollection<Train> output = p.apply(ReadSource.from(source).named("ReadFileData"));
+
+    EvaluationResults results = p.run();
+    List<Train> readData = results.getPCollection(output);
+
+    assertThat(trainsToStrings(trains), containsInAnyOrder(trainsToStrings(readData).toArray()));
+  }
+
+  @Test
+  public void testSplitWithEmptyBundles() throws Exception {
+    String fileName = "temp.xml";
+    List<Train> trains = generateRandomTrainList(10);
+    File file = createRandomTrainXML(fileName, trains);
+
+    XMLSource<Train> source =
+        XMLSource.<Train>from(file.toPath().toString())
+            .withRootElement("trains")
+            .withRecordElement("train")
+            .withRecordClass(Train.class)
+            .withMinBundleSize(10);
+    List<? extends FileBasedSource<Train>> splits = source.splitIntoBundles(100, null);
+
+    assertTrue(splits.size() > 2);
+
+    List<Train> results = new ArrayList<>();
+    for (FileBasedSource<Train> split : splits) {
+      results.addAll(readEverythingFromReader(split.createReader(null, null)));
+    }
+
+    assertThat(trainsToStrings(trains), containsInAnyOrder(trainsToStrings(results).toArray()));
+  }
+
+  @Test
+  public void testXMLWithSplits() throws Exception {
+    String fileName = "temp.xml";
+    List<Train> trains = generateRandomTrainList(100);
+    File file = createRandomTrainXML(fileName, trains);
+
+    XMLSource<Train> source =
+        XMLSource.<Train>from(file.toPath().toString())
+            .withRootElement("trains")
+            .withRecordElement("train")
+            .withRecordClass(Train.class)
+            .withMinBundleSize(10);
+    List<? extends FileBasedSource<Train>> splits = source.splitIntoBundles(256, null);
+
+    // Not a trivial split
+    assertTrue(splits.size() > 2);
+
+    List<Train> results = new ArrayList<>();
+    for (FileBasedSource<Train> split : splits) {
+      results.addAll(readEverythingFromReader(split.createReader(null, null)));
+    }
+    assertThat(trainsToStrings(trains), containsInAnyOrder(trainsToStrings(results).toArray()));
+  }
+
+  @Test
+  public void testSplitAtFraction() throws Exception {
+    String fileName = "temp.xml";
+    List<Train> trains = generateRandomTrainList(100);
+    File file = createRandomTrainXML(fileName, trains);
+
+    XMLSource<Train> fileSource =
+        XMLSource.<Train>from(file.toPath().toString())
+            .withRootElement("trains")
+            .withRecordElement("train")
+            .withRecordClass(Train.class)
+            .withMinBundleSize(10);
+
+    List<? extends FileBasedSource<Train>> splits =
+        fileSource.splitIntoBundles(file.length() / 3, null);
+    for (BoundedSource<Train> splitSource : splits) {
+      int items = readEverythingFromReader(splitSource.createReader(null, null)).size();
+      assertSplitAtFractionSucceedsAndConsistent(splitSource, 0, 0.7);
+      assertSplitAtFractionSucceedsAndConsistent(splitSource, 1, 0.7);
+      assertSplitAtFractionSucceedsAndConsistent(splitSource, 15, 0.7);
+      assertSplitAtFractionFails(splitSource, 0, 0.0);
+      assertSplitAtFractionFails(splitSource, 20, 0.3);
+      assertSplitAtFractionFails(splitSource, items, 1.0);
+      assertSplitAtFractionFails(splitSource, items, 0.9);
+      assertSplitAtFractionSucceedsAndConsistent(splitSource, items, 0.999);
+    }
+  }
+
+  @Test
+  public void testSplitAtFractionExhaustive() throws Exception {
+    File file = tempFolder.newFile("trainXMLSmall");
+    Files.write(file.toPath(), trainXMLWithAllFeatures.getBytes(StandardCharsets.UTF_8));
+
+    XMLSource<Train> source =
+        XMLSource.<Train>from(file.toPath().toString())
+            .withRootElement("trains")
+            .withRecordElement("train")
+            .withRecordClass(Train.class)
+            .withMinBundleSize(1024);
+    List<? extends FileBasedSource<Train>> splits =
+        source.splitIntoBundles(file.length() / 3, null);
+
+    for (BoundedSource<Train> splitSource : splits) {
+      int maxItems = readEverythingFromReader(splitSource.createReader(null, null)).size();
+      for (int numItems = 0; numItems <= maxItems; ++numItems) {
+        for (double splitFraction = 0.0; splitFraction < 1.1; splitFraction += 0.01) {
+          assertSplitAtFractionBehavior(
+              splitSource, numItems, splitFraction, MUST_BE_CONSISTENT_IF_SUCCEEDS);
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testReadXMLFilePattern() throws IOException {
+    List<Train> trains1 = generateRandomTrainList(20);
+    File file = createRandomTrainXML("temp1.xml", trains1);
+    List<Train> trains2 = generateRandomTrainList(10);
+    createRandomTrainXML("temp2.xml", trains2);
+    List<Train> trains3 = generateRandomTrainList(15);
+    createRandomTrainXML("temp3.xml", trains3);
+    generateRandomTrainList(8);
+    createRandomTrainXML("otherfile.xml", trains1);
+
+    DirectPipeline p = DirectPipeline.createForTest();
+
+    XMLSource<Train> source = XMLSource.<Train>from(file.getParent() + "/"
+                                           + "temp*.xml")
+                                  .withRootElement("trains")
+                                  .withRecordElement("train")
+                                  .withRecordClass(Train.class)
+                                  .withMinBundleSize(1024);
+    PCollection<Train> output = p.apply(ReadSource.from(source).named("ReadFileData"));
+
+    EvaluationResults results = p.run();
+    List<Train> readData = results.getPCollection(output);
+
+    List<Train> expectedResults = new ArrayList<>();
+    expectedResults.addAll(trains1);
+    expectedResults.addAll(trains2);
+    expectedResults.addAll(trains3);
+
+    assertThat(
+        trainsToStrings(expectedResults), containsInAnyOrder(trainsToStrings(readData).toArray()));
+  }
+}

From 5d0eb9a7ac2ca6ab0e8117f564daaa198d6c4d58 Mon Sep 17 00:00:00 2001
From: samuelw <samuelw@google.com>
Date: Wed, 1 Apr 2015 12:06:13 -0700
Subject: [PATCH 0350/1541] Avoid starting update progress thread in
 ReadOperation for streaming tasks. ----Release Notes---- Improve performance
 of streaming ReadOperation. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=90086660

---
 .../sdk/runners/worker/StreamingDataflowWorker.java      | 5 +++++
 .../dataflow/sdk/util/common/worker/MapTaskExecutor.java | 2 +-
 .../dataflow/sdk/util/common/worker/ReadOperation.java   | 9 +++++----
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index e8c2758bb37f6..cbcfd4ee6f191 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -34,6 +34,7 @@
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
+import com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation;
 import com.google.common.base.Preconditions;
 
 import org.eclipse.jetty.server.Request;
@@ -331,6 +332,10 @@ private void process(
       if (workerAndContext == null) {
         context = new StreamingModeExecutionContext(computation, stateFetcher);
         worker = MapTaskExecutorFactory.create(options, mapTask, context);
+        ReadOperation readOperation = worker.getReadOperation();
+        // Disable progress updates since its results are unused for streaming
+        // and involves starting a thread.
+        readOperation.setProgressUpdatePeriodMs(0);
         Preconditions.checkState(worker.supportsRestart(),
             "Streaming runner requires all operations support restart.");
       } else {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
index 17c3040cb41ce..0f8cb1b346cca 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
@@ -90,7 +90,7 @@ public Reader.DynamicSplitResult requestDynamicSplit(
     return getReadOperation().requestDynamicSplit(splitRequest);
   }
 
-  ReadOperation getReadOperation() throws Exception {
+  public ReadOperation getReadOperation() throws Exception {
     if (operations == null || operations.isEmpty()) {
       throw new IllegalStateException("Map task has no operation.");
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index 416dbdbd4f07f..92111fa53b177 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -97,10 +97,11 @@ public ReadOperation(String operationName, Reader<?> reader, OutputReceiver[] re
   }
 
   /**
-   * Invoked by tests. A value of 0 means "update progress on each iteration".
+   * Controls the frequency at which progress is updated. A value of zero means
+   * "update progress on each iteration". A value of less than zero means never
+   * update progress. Ignored after starting.
    */
-  void setProgressUpdatePeriodMs(long millis) {
-    Preconditions.checkArgument(millis >= 0, "Progress update period must be non-negative");
+  public void setProgressUpdatePeriodMs(long millis) {
     progressUpdatePeriodMs = millis;
   }
 
@@ -141,7 +142,7 @@ protected void runReadLoop() throws Exception {
 
       // TODO: Consider using the ExecutorService from PipelineOptions instead.
       Thread updateRequester = null;
-      if (progressUpdatePeriodMs != 0) {
+      if (progressUpdatePeriodMs > 0) {
         updateRequester = new Thread() {
           @Override
           public void run() {

From 5702512dfb095e68161719fdacefae29e06ee2e5 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Wed, 1 Apr 2015 13:54:56 -0700
Subject: [PATCH 0351/1541] Update various numeric aggregators to have better
 degenerate values.

In particular, double min/max returns positive/negative infinity respectively when applied to an empty collection, and the Mean aggregator now returns NaN rather than zero for the empty collection.

----Release Notes----

Better degenerate values for numeric aggregators.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90097410
---
 .../cloud/dataflow/sdk/transforms/Max.java     |  4 ++--
 .../cloud/dataflow/sdk/transforms/Mean.java    |  4 ++--
 .../cloud/dataflow/sdk/transforms/Min.java     |  4 ++--
 .../dataflow/sdk/util/common/Counter.java      |  8 ++++----
 .../sdk/transforms/SimpleStatsFnsTest.java     |  7 +++----
 .../dataflow/sdk/util/common/CounterTest.java  | 18 +++++++++---------
 6 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
index 7c707140a7a7b..387e398dc97d0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
@@ -103,7 +103,7 @@ public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
    * {@code PCollection<Double>} and returns a
    * {@code PCollection<Double>} whose contents is the maximum of the
    * input {@code PCollection}'s elements, or
-   * {@code Double.MIN_VALUE} if there are no elements.
+   * {@code Double.NEGATIVE_INFINITY} if there are no elements.
    */
   public static Combine.Globally<Double, Double> doublesGlobally() {
     Combine.Globally<Double, Double> combine = Combine
@@ -194,6 +194,6 @@ public static class MaxLongFn extends MaxFn<Long> {
    */
   @SuppressWarnings("serial")
   public static class MaxDoubleFn extends MaxFn<Double> {
-    public MaxDoubleFn() { super(Double.MIN_VALUE); }
+    public MaxDoubleFn() { super(Double.NEGATIVE_INFINITY); }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
index d3b397161e842..17a67acdcb6d7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
@@ -92,7 +92,7 @@ public static <K, N extends Number> Combine.PerKey<K, N, Double> perKey() {
    * {@code N}, useful as an argument to {@link Combine#globally} or
    * {@link Combine#perKey}.
    *
-   * <p> Returns {@code 0} if combining zero elements.
+   * <p> Returns {@code Double.NaN} if combining zero elements.
    *
    * @param <N> the type of the {@code Number}s being combined
    */
@@ -138,7 +138,7 @@ public void mergeAccumulator(CountSum accumulator) {
 
       @Override
       public Double extractOutput() {
-        return count == 0 ? 0.0 : sum / count;
+        return count == 0 ? Double.NaN : sum / count;
       }
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
index aaf7d3aaf61e4..1d550eb4f46af 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
@@ -103,7 +103,7 @@ public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
    * {@code PCollection<Double>} and returns a
    * {@code PCollection<Double>} whose contents is the minimum of the
    * input {@code PCollection}'s elements, or
-   * {@code Double.MAX_VALUE} if there are no elements.
+   * {@code Double.POSITIVE_INFINITY} if there are no elements.
    */
   public static Combine.Globally<Double, Double> doublesGlobally() {
     Combine.Globally<Double, Double> combine = Combine
@@ -197,6 +197,6 @@ public static class MinLongFn extends MinFn<Long> {
   public static class MinDoubleFn extends MinFn<Double> {
     private static final long serialVersionUID = 0;
 
-    public MinDoubleFn() { super(Double.MAX_VALUE); }
+    public MinDoubleFn() { super(Double.POSITIVE_INFINITY); }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
index e939bd484c076..28672eddde97f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
@@ -478,10 +478,10 @@ private DoubleCounter(String name, AggregationKind kind) {
           aggregate = deltaAggregate = 0.0;
           break;
         case MAX:
-          aggregate = deltaAggregate = Double.MIN_VALUE;
+          aggregate = deltaAggregate = Double.NEGATIVE_INFINITY;
           break;
         case MIN:
-          aggregate = deltaAggregate = Double.MAX_VALUE;
+          aggregate = deltaAggregate = Double.POSITIVE_INFINITY;
           break;
         case SET:
           break;
@@ -532,10 +532,10 @@ public synchronized void resetDelta() {
           deltaCount = 0;
           break;
         case MAX:
-          deltaAggregate = Double.MIN_VALUE;
+          deltaAggregate = Double.NEGATIVE_INFINITY;
           break;
         case MIN:
-          deltaAggregate = Double.MAX_VALUE;
+          deltaAggregate = Double.POSITIVE_INFINITY;
           break;
         case SET:
           deltaSet = new HashSet<>();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
index 1b58832379172..e0fc474f983d3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
@@ -55,7 +55,7 @@ public TestCase(N min, N max, N sum, N... values) {
       this.max = max;
       this.sum = sum;
       this.mean =
-          values.length == 0 ? 0.0 : sum.doubleValue() / values.length;
+          values.length == 0 ? Double.NaN : sum.doubleValue() / values.length;
     }
   }
 
@@ -64,7 +64,7 @@ public TestCase(N min, N max, N sum, N... values) {
           -312.31, 29.13, 112.158, 6312.31, -312.158, -312.158, 112.158,
           -312.31, 6312.31, 0.0),
       new TestCase<>(3.14, 3.14, 3.14, 3.14),
-      new TestCase<>(Double.MAX_VALUE, Double.MIN_NORMAL, 0.0));
+      new TestCase<>(Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, 0.0));
 
   static final List<TestCase<Long>> LONG_CASES = Arrays.asList(
       new TestCase<>(-50000000000000000L,
@@ -125,8 +125,7 @@ public void testMeanCountSumSerializable() {
         .apply(Create.of(KV.of(1L, 1.5), KV.of(2L, 7.3)))
         .setCoder(KvCoder.of(VarLongCoder.of(), DoubleCoder.of()));
 
-    PCollection<KV<Long, Double>> meanPerKey =
-        input.apply(Mean.<Long, Double>perKey());
+    input.apply(Mean.<Long, Double>perKey());
 
     p.run();
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
index 4c46de317e5d3..514eeb9f75ca6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
@@ -81,7 +81,7 @@ public void testNameKindAndCloudCounterRepresentation() {
     cc = flush(c2);
     assertEquals("c2", cc.getName().getName());
     assertEquals("MAX", cc.getKind());
-    assertEquals(Double.MIN_VALUE, asDouble(cc.getScalar()), EPSILON);
+    assertEquals(Double.NEGATIVE_INFINITY, asDouble(cc.getScalar()), EPSILON);
     c2.resetToValue(0.0).addValue(Math.PI).addValue(Math.E);
     cc = flush(c2);
     assertEquals(Math.PI, asDouble(cc.getScalar()), EPSILON);
@@ -288,11 +288,11 @@ public void testMaxLong() {
   @Test
   public void testMaxDouble() {
     Counter<Double> c = Counter.doubles("max-double", MAX);
-    double expectedTotal = Double.MIN_VALUE;
-    double expectedDelta = Double.MIN_VALUE;
+    double expectedTotal = Double.NEGATIVE_INFINITY;
+    double expectedDelta = Double.NEGATIVE_INFINITY;
     assertOK(expectedTotal, expectedDelta, c);
 
-    c.addValue(Math.E).addValue(Math.PI).addValue(Double.MIN_VALUE);
+    c.addValue(Math.E).addValue(Math.PI).addValue(Double.NEGATIVE_INFINITY);
     expectedTotal = expectedDelta = Math.PI;
     assertOK(expectedTotal, expectedDelta, c);
 
@@ -301,7 +301,7 @@ public void testMaxDouble() {
     assertOK(expectedTotal, expectedDelta, c);
 
     flush(c);
-    expectedDelta = Double.MIN_VALUE;
+    expectedDelta = Double.NEGATIVE_INFINITY;
     assertOK(expectedTotal, expectedDelta, c);
 
     c.addValue(7 * Math.PI).addValue(5 * Math.E);
@@ -347,11 +347,11 @@ public void testMinLong() {
   @Test
   public void testMinDouble() {
     Counter<Double> c = Counter.doubles("min-double", MIN);
-    double expectedTotal = Double.MAX_VALUE;
-    double expectedDelta = Double.MAX_VALUE;
+    double expectedTotal = Double.POSITIVE_INFINITY;
+    double expectedDelta = Double.POSITIVE_INFINITY;
     assertOK(expectedTotal, expectedDelta, c);
 
-    c.addValue(Math.E).addValue(Math.PI).addValue(Double.MAX_VALUE);
+    c.addValue(Math.E).addValue(Math.PI).addValue(Double.POSITIVE_INFINITY);
     expectedTotal = expectedDelta = Math.E;
     assertOK(expectedTotal, expectedDelta, c);
 
@@ -360,7 +360,7 @@ public void testMinDouble() {
     assertOK(expectedTotal, expectedDelta, c);
 
     flush(c);
-    expectedDelta = Double.MAX_VALUE;
+    expectedDelta = Double.POSITIVE_INFINITY;
     assertOK(expectedTotal, expectedDelta, c);
 
     c.addValue(7 * Math.PI).addValue(5 * Math.E);

From 0d6899f8bcfbf57afb3fcf7949c6db10ba31e998 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 1 Apr 2015 14:04:58 -0700
Subject: [PATCH 0352/1541] Change the Trigger API mechanism for firing.

This introduces a TriggerResult enum that each of the callbacks emit.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90098538
---
 .../dataflow/sdk/util/DefaultTrigger.java     | 13 ++--
 .../sdk/util/DelayAfterFirstInPane.java       | 13 ++--
 .../cloud/dataflow/sdk/util/Trigger.java      | 42 +++++++----
 .../dataflow/sdk/util/TriggerExecutor.java    | 72 +++++++++++--------
 4 files changed, 87 insertions(+), 53 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
index 7e4578fce61a2..d2fda3fa8d132 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
@@ -21,29 +21,30 @@
 /**
  * A trigger that fires repeatedly when the watermark passes the end of the window.
  *
- * TODO: Split this up as we build the final trigger API.
- *
  * @param <W> The type of windows being triggered/encoded.
  */
 public class DefaultTrigger<W extends BoundedWindow> implements Trigger<W>{
 
   @Override
-  public void onElement(
+  public TriggerResult onElement(
       TriggerContext<W> c, Object value, W window, WindowStatus status) throws Exception {
     c.setTimer(window, window.maxTimestamp(), TimeDomain.EVENT_TIME);
+    return TriggerResult.CONTINUE;
   }
 
   @Override
-  public void onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) throws Exception {
+  public TriggerResult onMerge(
+      TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) throws Exception {
     for (W oldWindow : oldWindows) {
       c.deleteTimer(oldWindow, TimeDomain.EVENT_TIME);
     }
 
     c.setTimer(newWindow, newWindow.maxTimestamp(), TimeDomain.EVENT_TIME);
+    return TriggerResult.CONTINUE;
   }
 
   @Override
-  public void onTimer(TriggerContext<W> c, W window) throws Exception {
-    c.emitWindow(window);
+  public TriggerResult onTimer(TriggerContext<W> c, W window) throws Exception {
+    return TriggerResult.FIRE;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java
index 905b87ec396d9..e638212647cab 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java
@@ -50,7 +50,7 @@ public DelayAfterFirstInPane(SerializableFunction<Instant, Instant> delayFunctio
   }
 
   @Override
-  public void onElement(TriggerContext<W> c, Object value, W window, WindowStatus status)
+  public TriggerResult onElement(TriggerContext<W> c, Object value, W window, WindowStatus status)
       throws Exception {
     Instant delayUntil = c.lookup(delayedUntilTag, window);
     if (delayUntil == null) {
@@ -58,10 +58,13 @@ public void onElement(TriggerContext<W> c, Object value, W window, WindowStatus
       c.setTimer(window, delayUntil, TimeDomain.PROCESSING_TIME);
       c.store(delayedUntilTag, window, delayUntil);
     }
+
+    return TriggerResult.CONTINUE;
   }
 
   @Override
-  public void onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) throws Exception {
+  public TriggerResult onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow)
+      throws Exception {
     // We want to fire after the minimum delayed-until in the window. If that means we've already
     // fired, we should stop.
     Instant delayedUntil = null;
@@ -83,11 +86,13 @@ public void onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) th
       c.setTimer(newWindow, delayedUntil, TimeDomain.PROCESSING_TIME);
     }
     c.store(delayedUntilTag, newWindow, delayedUntil);
+
+    return TriggerResult.CONTINUE;
   }
 
   @Override
-  public void onTimer(TriggerContext<W> c, W window) throws Exception {
+  public TriggerResult onTimer(TriggerContext<W> c, W window) throws Exception {
     c.store(delayedUntilTag, window, ALREADY_FIRED);
-    c.emitWindow(window);
+    return TriggerResult.FIRE_AND_FINISH;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
index 81ff40d30e1c3..be030f05e6676 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
@@ -61,10 +61,34 @@ public enum WindowStatus {
     UNKNOWN;
   }
 
+  /**
+   * Enumeration of the possible results for a trigger.
+   */
+  public enum TriggerResult {
+    FIRE(true, false),
+    CONTINUE(false, false),
+    FIRE_AND_FINISH(true, true),
+    FINISH(false, true);
+
+    private boolean finish;
+    private boolean fire;
+
+    private TriggerResult(boolean fire, boolean finish) {
+      this.fire = fire;
+      this.finish = finish;
+    }
+
+    public boolean isFire() {
+      return fire;
+    }
+
+    public boolean isFinish() {
+      return finish;
+    }
+  }
+
   /**
    * Information is that is made available to triggers, eg., setting timers.
-   *
-   * TODO: Add support for processing time timers.
    */
   public interface TriggerContext<W extends BoundedWindow>  {
 
@@ -86,13 +110,6 @@ public interface TriggerContext<W extends BoundedWindow>  {
      */
     Instant currentProcessingTime();
 
-    /**
-     * Emit the given window.
-     * @param window
-     * @throws Exception
-     */
-    void emitWindow(W window) throws Exception;
-
     /**
      * Updates the value stored in keyed state for the given window.
      */
@@ -122,7 +139,8 @@ public interface TriggerContext<W extends BoundedWindow>  {
    * @param value the element that was incorporated
    * @param window the window the element was assigned to
    */
-  void onElement(TriggerContext<W> c, Object value, W window, WindowStatus status) throws Exception;
+  TriggerResult onElement(
+      TriggerContext<W> c, Object value, W window, WindowStatus status) throws Exception;
 
   /**
    * Called immediately after windows have been merged.
@@ -131,7 +149,7 @@ public interface TriggerContext<W extends BoundedWindow>  {
    * @param oldWindows the windows that were merged
    * @param newWindow the window that resulted from merging
    */
-  void onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) throws Exception;
+  TriggerResult onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) throws Exception;
 
   /**
    * Called after a timer fires.
@@ -139,5 +157,5 @@ public interface TriggerContext<W extends BoundedWindow>  {
    * @param c the context to interact with
    * @param window the timer is being fired for
    */
-  void onTimer(TriggerContext<W> c, W window) throws Exception;
+  TriggerResult onTimer(TriggerContext<W> c, W window) throws Exception;
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 9e6b3dd9024c9..112986735e71b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.Trigger.TriggerContext;
+import com.google.cloud.dataflow.sdk.util.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
@@ -99,7 +100,7 @@ public void onElement(WindowedValue<VI> value) throws Exception {
 
       WindowStatus status = windowSet.put(w, value.getValue(), value.getTimestamp());
 
-      trigger.onElement(this, value.getValue(), w, status);
+      handleResult(w, trigger.onElement(this, value.getValue(), w, status));
     }
   }
 
@@ -115,39 +116,12 @@ public void onTimer(String timerTag) throws Exception {
     // The WindowSet used with PartitioningWindowFn doesn't support contains, but it will never
     // merge windows in a way that causes the timer to no longer be applicable.
     if ((windowFn instanceof PartitioningWindowFn) || windowSet.contains(window)) {
-      trigger.onTimer(this, window);
+      handleResult(window, trigger.onTimer(this, window));
     }
   }
 
-  @Override
-  public void emitWindow(W window) throws Exception {
-    // Emit the (current) final values for the window
-    KV<K, VO> value = KV.of(windowSet.getKey(), windowSet.finalValue(window));
-
-    // Remove the window from management (assume it is "done")
-    windowSet.remove(window);
-
-    // Output the windowed value.
-    windowingInternals.outputWindowedValue(value, window.maxTimestamp(), Arrays.asList(window));
-  }
-
-  private class MergeContext extends WindowFn<Object, W>.MergeContext {
-
-    @SuppressWarnings("cast")
-    public MergeContext() {
-      ((WindowFn<Object, W>) windowFn).super();
-    }
-
-    @Override
-    public Collection<W> windows() {
-      return windowSet.windows();
-    }
-
-    @Override
-    public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
-      windowSet.merge(toBeMerged, mergeResult);
-      trigger.onMerge(TriggerExecutor.this, toBeMerged, mergeResult);
-    }
+  private void onMerge(Collection<W> toBeMerged, W mergeResult) throws Exception {
+    handleResult(mergeResult, trigger.onMerge(TriggerExecutor.this, toBeMerged, mergeResult));
   }
 
   @Override
@@ -206,4 +180,40 @@ public <T> Iterable<T> lookup(CodedTupleTag<T> tag, Iterable<W> windows) throws
   public Instant currentProcessingTime() {
     return timerManager.currentProcessingTime();
   }
+
+  private void handleResult(W window, TriggerResult result) throws Exception {
+    if (result.isFire()) {
+      emitWindow(window);
+    }
+  }
+
+  private void emitWindow(W window) throws Exception {
+    // Emit the (current) final values for the window
+    KV<K, VO> value = KV.of(windowSet.getKey(), windowSet.finalValue(window));
+
+    // Remove the window from management (assume it is "done")
+    windowSet.remove(window);
+
+    // Output the windowed value.
+    windowingInternals.outputWindowedValue(value, window.maxTimestamp(), Arrays.asList(window));
+  }
+
+  private class MergeContext extends WindowFn<Object, W>.MergeContext {
+
+    @SuppressWarnings("cast")
+    public MergeContext() {
+      ((WindowFn<Object, W>) windowFn).super();
+    }
+
+    @Override
+    public Collection<W> windows() {
+      return windowSet.windows();
+    }
+
+    @Override
+    public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
+      windowSet.merge(toBeMerged, mergeResult);
+      onMerge(toBeMerged, mergeResult);
+    }
+  }
 }

From caedf332eb77f3cb80cba05865cc4696660c67f4 Mon Sep 17 00:00:00 2001
From: hokira <hokira@google.com>
Date: Wed, 1 Apr 2015 15:00:45 -0700
Subject: [PATCH 0353/1541] Support local_dataflow_runner to use [] version of
 windmill server. This potentially allows local_dataflow_runner to run with
 asan or tsan, if shuffle server is ported against [].

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90104008
---
 .../sdk/runners/worker/StreamingDataflowWorker.java      | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index cbcfd4ee6f191..5a16d49827858 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -74,7 +74,7 @@ public class StreamingDataflowWorker {
   // Memory threshold over which no new work will be processed.
   // Set to a value >= 1 to disable pushback.
   static final double PUSHBACK_THRESHOLD_RATIO = 0.9;
-  static final String WINDMILL_SERVER_CLASS_NAME =
+  static final String DEFAULT_WINDMILL_SERVER_CLASS_NAME =
       "com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServer";
 
   /**
@@ -116,13 +116,18 @@ public static void main(String[] args) throws Exception {
       statusPort = Integer.parseInt(System.getProperty("status_port"));
     }
 
+    String windmillServerClassName = DEFAULT_WINDMILL_SERVER_CLASS_NAME;
+    if (System.getProperties().containsKey("windmill.serverclassname")) {
+      windmillServerClassName = System.getProperty("windmill.serverclassname");
+    }
+
     ArrayList<MapTask> mapTasks = new ArrayList<>();
     for (int i = 0; i < args.length; i++) {
       mapTasks.add(parseMapTask(args[i]));
     }
 
     WindmillServerStub windmillServer =
-        (WindmillServerStub) Class.forName(WINDMILL_SERVER_CLASS_NAME)
+        (WindmillServerStub) Class.forName(windmillServerClassName)
         .getDeclaredConstructor(String.class).newInstance(hostport);
 
     StreamingDataflowWorker worker =

From 32864ced64c7893585bc06ad44ba4e9587f558a8 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Wed, 1 Apr 2015 15:16:33 -0700
Subject: [PATCH 0354/1541] Use ValueInGlobalWindow rather than
 TimestampedValueInGlobalWindow for PGBK keys.

Small efficiency improvements to TimestampedValueInGlobalWindow equality and hashing.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90105568
---
 .../dataflow/sdk/runners/worker/MapTaskExecutorFactory.java | 3 ++-
 .../com/google/cloud/dataflow/sdk/util/WindowedValue.java   | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index 8cf95d58d1321..2c60af79a1a28 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -31,6 +31,7 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
@@ -285,7 +286,7 @@ public Object makeOutputPair(Object key, Object values) {
   public static class WindowingCoderGroupingKeyCreator
       implements GroupingKeyCreator {
 
-    private static final Instant ignored = new Instant(0);
+    private static final Instant ignored = BoundedWindow.TIMESTAMP_MIN_VALUE;
 
     private final Coder coder;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index af887ebf9b353..a74fb857fc9dd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -284,8 +284,8 @@ public boolean equals(Object o) {
       if (o instanceof TimestampedValueInGlobalWindow) {
         TimestampedValueInGlobalWindow<?> that =
             (TimestampedValueInGlobalWindow) o;
-        return Objects.equals(that.value, this.value)
-            && that.timestamp.isEqual(this.timestamp);
+        return this.timestamp.getMillis() == that.timestamp.getMillis()
+            && Objects.equals(that.value, this.value);
       } else {
         return false;
       }
@@ -293,7 +293,7 @@ public boolean equals(Object o) {
 
     @Override
     public int hashCode() {
-      return Objects.hash(value, timestamp);
+      return Objects.hash(value) ^ ((int) timestamp.getMillis());
     }
 
     @Override

From 5ccdf1266063344067eebd9bff00f2cac5a01a16 Mon Sep 17 00:00:00 2001
From: gildea <gildea@google.com>
Date: Wed, 1 Apr 2015 15:17:10 -0700
Subject: [PATCH 0355/1541] Grammar in Javadoc: fix more incorrect uses of
 "which" for "that". ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=90105634

---
 .../java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java  | 4 ++--
 .../dataflow/sdk/transforms/IntraBundleParallelization.java   | 4 ++--
 .../java/com/google/cloud/dataflow/sdk/util/StateFetcher.java | 4 ++--
 .../cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
index 5e12215df701a..4a1184b135c0d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
@@ -726,8 +726,8 @@ protected static interface QueueRequestCallback {
     private final List<QueueRequestCallback> pendingBatchEntries;
     private final BatchRequest batch;
 
-    // Number of requests which can be queued into a single actual HTTP request before a sub-batch
-    // is sent.
+    // Number of requests that can be queued into a single actual HTTP request
+    // before a sub-batch is sent.
     private final long maxRequestsPerBatch;
 
     // Flag that indicates whether there is an in-progress flush.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
index 9fe01b292628f..6aa20d881275f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
@@ -42,7 +42,7 @@
  * elements from within a bundle is limited by blocking calls.
  *
  * <p> CPU intensive or IO intensive tasks are in general a poor fit for parallelization.
- * This is because a limited resource which is already maximally utilized, does not
+ * This is because a limited resource that is already maximally utilized does not
  * benefit from sub-division of work. The parallelization will increase the amount of time
  * to process each element yet the throughput for processing will remain relatively the same.
  * For example, if the local disk (an IO resource) has a maximum write rate of 10 MiB/s,
@@ -66,7 +66,7 @@
 public class IntraBundleParallelization {
   /**
    * Creates a {@link IntraBundleParallelization} {@link PTransform} for the given
-   * {@link DoFn} which processes elements using multiple threads.
+   * {@link DoFn} that processes elements using multiple threads.
    *
    * <p> Note that the specified {@code doFn} needs to be thread safe.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
index 201bd15810e49..16da131587951 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
@@ -231,8 +231,8 @@ public SideInputCacheEntry call() throws Exception {
           return (T) sideInputCache.get(id, fetchCallable).value;
         } else if (!entry.isReady()) {
           // Invalidate the existing not-ready entry.  This must be done atomically
-          // so that another thread doesn't replace the entry with a ready entry which
-          // will then be deleted here.
+          // so that another thread doesn't replace the entry with a ready entry, which
+          // would then be deleted here.
           synchronized (entry) {
             SideInputCacheEntry newEntry = sideInputCache.getIfPresent(id);
             if (newEntry != null && !newEntry.isReady()) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
index c03e78b853c12..a66165682d911 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
@@ -44,7 +44,7 @@
  *
  * @param <I> the type of the DoFn's (main) input elements
  * @param <O> the type of the DoFn's (main) output elements
- * @param <R> the type of object which receives outputs
+ * @param <R> the type of object that receives outputs
  * @param <W> the type of the windows of the main input
  */
 public class StreamingSideInputDoFnRunner<I, O, R, W extends BoundedWindow>

From 5fd106fef5a10d6d3011b96ad2c4679ae4c9f2b2 Mon Sep 17 00:00:00 2001
From: amancuso <amancuso@google.com>
Date: Wed, 1 Apr 2015 15:18:03 -0700
Subject: [PATCH 0356/1541] Fix typos in Cloud Dataflow devsite docs.
 ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=90105694

---
 examples/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/README.md b/examples/README.md
index c848e33eb279a..c0d7e89a2ee87 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -264,7 +264,7 @@ or [`TrafficStreamingMaxLaneFlow.java`](https://github.com/GoogleCloudPlatform/D
     earlier version of Pub/Sub, which uses a different topic name
     syntax than the latest Cloud Pub/Sub version.
 
-   * You specify the BigQuery dataset and table that you pipeline will write
+   * You specify the BigQuery dataset and table that your pipeline will write
    output to with the `--dataset` and `--table pipeline` options:
 
         `--dataset=<dataset-name> --table=<table-name>`

From 8f92445c627d3c7aee0c55a6be05b892b0fefaa5 Mon Sep 17 00:00:00 2001
From: samuelw <samuelw@google.com>
Date: Wed, 1 Apr 2015 17:17:50 -0700
Subject: [PATCH 0357/1541] Avoid parsing serialized DoFnInfo repeatedly.
 ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=90116550

---
 .../runners/DataflowPipelineTranslator.java   |  4 +-
 .../sdk/runners/worker/NormalParDoFn.java     | 45 ++++++++++++-------
 .../cloud/dataflow/sdk/util/DoFnInfo.java     | 28 ++++++------
 .../StreamingSideInputDoFnRunnerTest.java     |  5 +--
 4 files changed, 47 insertions(+), 35 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index fe4c0b16f7cd0..ac562d1964454 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -968,9 +968,7 @@ private static void translateFn(
     context.addInput(
         PropertyNames.SERIALIZED_FN,
         byteArrayToJsonString(serializeToByteArray(
-            new DoFnInfo(fn, windowFn)
-            .setSideInputViews(sideInputs)
-            .setInputCoder(inputCoder))));
+            new DoFnInfo(fn, windowFn, sideInputs, inputCoder))));
     if (fn instanceof DoFn.RequiresKeyedState
         // Adjust requires keyed state property for the Dataflow Service.
         // TODO: Remove when this is performed by the service.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
index 255718707fc3f..ef34bf5804362 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
@@ -20,8 +20,11 @@
 
 import com.google.api.services.dataflow.model.MultiOutputInfo;
 import com.google.api.services.dataflow.model.SideInputInfo;
+import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
@@ -73,25 +76,20 @@ public static NormalParDoFn create(
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler /* ignored */)
       throws Exception {
-    DoFnInfoFactory fnFactory = new DoFnInfoFactory() {
-        @Override
-        public DoFnInfo createDoFnInfo() throws Exception {
-          Object deserializedFn =
-              SerializableUtils.deserializeFromByteArray(
-                  getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
-                  "serialized user fn");
-          if (!(deserializedFn instanceof DoFnInfo)) {
-            throw new Exception(
-                "unexpected kind of DoFnInfo: " + deserializedFn.getClass().getName());
-          }
-          return (DoFnInfo) deserializedFn;
-        }
-      };
+    Object deserializedFnInfo =
+        SerializableUtils.deserializeFromByteArray(
+            getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
+            "serialized fn info");
+    if (!(deserializedFnInfo instanceof DoFnInfo)) {
+      throw new Exception(
+          "unexpected kind of DoFnInfo: " + deserializedFnInfo.getClass().getName());
+    }
+    DoFnInfo doFnInfo = (DoFnInfo) deserializedFnInfo;
 
     // If the side input data has already been computed, it will be in sideInputInfo.  Otherwise,
     // we need to look it up dynamically from the Views.
     PTuple sideInputValues = PTuple.empty();
-    Iterable<PCollectionView<?>> sideInputViews = fnFactory.createDoFnInfo().getSideInputViews();
+    final Iterable<PCollectionView<?>> sideInputViews = doFnInfo.getSideInputViews();
     if (sideInputInfos != null) {
       for (SideInputInfo sideInputInfo : sideInputInfos) {
         Object sideInputValue = SideInputUtils.readSideInput(
@@ -121,6 +119,23 @@ public DoFnInfo createDoFnInfo() throws Exception {
           "unexpected number of outputTags for DoFn");
     }
 
+    final byte[] serializedDoFn = SerializableUtils.serializeToByteArray(
+        doFnInfo.getDoFn());
+    final WindowFn windowFn = doFnInfo.getWindowFn();
+    final Coder inputCoder = doFnInfo.getInputCoder();
+    DoFnInfoFactory fnFactory = new DoFnInfoFactory() {
+        @Override public DoFnInfo createDoFnInfo() throws Exception {
+          // We guarantee the user a fresh DoFn object every call.  However we
+          // can avoid reparsing the other auxillary information.
+          Object deserializedDoFn = SerializableUtils.deserializeFromByteArray(
+              serializedDoFn, "serialized user fun");
+          if (!(deserializedDoFn instanceof DoFn)) {
+            throw new Exception(
+                "unexpected kind of DoFn: " + deserializedDoFn.getClass().getName());
+          }
+          return new DoFnInfo((DoFn) deserializedDoFn, windowFn, sideInputViews, inputCoder);
+        }
+      };
     return new NormalParDoFn(options, fnFactory, sideInputValues, outputTags,
                              stepName, executionContext, addCounterMutator);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
index cdd9cb229b26d..6d7746267f1f6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
@@ -31,14 +31,24 @@
  */
 public class DoFnInfo<I, O> implements Serializable {
   private static final long serialVersionUID = 0;
-  private DoFn<I, O> doFn;
-  private WindowFn<?, ?> windowFn;
-  private Iterable<PCollectionView<?>> sideInputViews;
-  private Coder<I> inputCoder;
+  private final DoFn<I, O> doFn;
+  private final WindowFn<?, ?> windowFn;
+  private final Iterable<PCollectionView<?>> sideInputViews;
+  private final Coder<I> inputCoder;
 
   public DoFnInfo(DoFn<I, O> doFn, WindowFn<?, ?> windowFn) {
     this.doFn = doFn;
     this.windowFn = windowFn;
+    this.sideInputViews = null;
+    this.inputCoder = null;
+  }
+
+  public DoFnInfo(DoFn<I, O> doFn, WindowFn<?, ?> windowFn,
+                  Iterable<PCollectionView<?>> sideInputViews, Coder<I> inputCoder) {
+    this.doFn = doFn;
+    this.windowFn = windowFn;
+    this.sideInputViews = sideInputViews;
+    this.inputCoder = inputCoder;
   }
 
   public DoFn<I, O> getDoFn() {
@@ -49,20 +59,10 @@ public DoFn<I, O> getDoFn() {
     return windowFn;
   }
 
-  public DoFnInfo<I, O> setSideInputViews(Iterable<PCollectionView<?>> sideInputViews) {
-    this.sideInputViews = sideInputViews;
-    return this;
-  }
-
   public Iterable<PCollectionView<?>> getSideInputViews() {
     return sideInputViews;
   }
 
-  public DoFnInfo<I, O> setInputCoder(Coder<I> inputCoder) {
-    this.inputCoder = inputCoder;
-    return this;
-  }
-
   public Coder<I> getInputCoder() {
     return inputCoder;
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
index 1a827b0734347..9ac9ce002e8d6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
@@ -208,9 +208,8 @@ public void testMultipleSideInputs() throws Exception {
   private StreamingSideInputDoFnRunner<String, String, List, IntervalWindow> createRunner(
       List<PCollectionView<String>> views) throws Exception {
     DoFnInfo doFnInfo = new DoFnInfo<String, String>(
-        new SideInputFn(views), FixedWindows.of(Duration.millis(10)))
-        .setSideInputViews((Iterable) views)
-        .setInputCoder(StringUtf8Coder.of());
+        new SideInputFn(views), FixedWindows.of(Duration.millis(10)),
+        (Iterable) views, StringUtf8Coder.of());
 
     PTuple sideInputs = PTuple.empty();
     for (PCollectionView<String> view : views) {

From b4433e6228de3a6eee23d5c80f275a2cab5f8aee Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 1 Apr 2015 17:18:22 -0700
Subject: [PATCH 0358/1541] Have DoFnContext and DoFnProcessContext check their
 arguments for null.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90116592
---
 .../com/google/cloud/dataflow/sdk/util/DoFnRunner.java | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 17ef5f3f96eef..078ce9f988226 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -357,11 +357,13 @@ public void outputWithTimestamp(O output, Instant timestamp) {
 
     @Override
     public <T> void sideOutput(TupleTag<T> tag, T output) {
+      Preconditions.checkNotNull(tag, "TupleTag passed to sideOutput cannot be null");
       sideOutputWindowedValue(tag, output, null, null);
     }
 
     @Override
     public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
+      Preconditions.checkNotNull(tag, "TupleTag passed to sideOutputWithTimestamp cannot be null");
       sideOutputWindowedValue(tag, output, timestamp, null);
     }
 
@@ -372,6 +374,7 @@ private String generateInternalAggregatorName(String userName) {
     @Override
     public <AI, AA, AO> Aggregator<AI> createAggregator(
         String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
+      Preconditions.checkNotNull(combiner, "Combiner passed to createAggregator cannot be null");
       return new AggregatorImpl<>(
           generateInternalAggregatorName(name), combiner, addCounterMutator);
     }
@@ -379,6 +382,7 @@ public <AI, AA, AO> Aggregator<AI> createAggregator(
     @Override
     public <AI, AO> Aggregator<AI> createAggregator(
         String name, SerializableFunction<Iterable<AI>, AO> combiner) {
+      Preconditions.checkNotNull(combiner, "Combiner passed to createAggregator cannot be null");
       return new AggregatorImpl<AI, Iterable<AI>, AO>(
           generateInternalAggregatorName(name), combiner, addCounterMutator);
     }
@@ -426,6 +430,7 @@ public I element() {
 
     @Override
     public <T> T sideInput(PCollectionView<T> view) {
+      Preconditions.checkNotNull(view, "View passed to sideInput cannot be null");
       Iterator<? extends BoundedWindow> windowIter = windows().iterator();
       BoundedWindow window;
       if (!windowIter.hasNext()) {
@@ -459,7 +464,6 @@ public KeyedState keyedState() {
       return context.stepContext;
     }
 
-
     @Override
     public BoundedWindow window() {
       if (!(fn instanceof RequiresWindowAccess)) {
@@ -489,6 +493,7 @@ void outputWindowedValue(
 
     @Override
     public <T> void sideOutput(TupleTag<T> tag, T output) {
+      Preconditions.checkNotNull(tag, "Tag passed to sideOutput cannot be null");
       context.sideOutputWindowedValue(tag,
                                       output,
                                       windowedValue.getTimestamp(),
@@ -497,6 +502,7 @@ public <T> void sideOutput(TupleTag<T> tag, T output) {
 
     @Override
     public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
+      Preconditions.checkNotNull(tag, "Tag passed to sideOutputWithTimestamp cannot be null");
       checkTimestamp(timestamp);
       context.sideOutputWindowedValue(tag, output, timestamp, windowedValue.getWindows());
     }
@@ -504,12 +510,14 @@ public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant times
     @Override
     public <AI, AA, AO> Aggregator<AI> createAggregator(
         String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
+      Preconditions.checkNotNull(combiner, "Combiner passed to createAggregator cannot be null");
       return context.createAggregator(name, combiner);
     }
 
     @Override
     public <AI, AO> Aggregator<AI> createAggregator(
         String name, SerializableFunction<Iterable<AI>, AO> combiner) {
+      Preconditions.checkNotNull(combiner, "Combiner passed to createAggregator cannot be null");
       return context.createAggregator(name, combiner);
     }
 

From eac7dcf2663320134828694780816f1a2168d999 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Wed, 1 Apr 2015 23:44:33 -0700
Subject: [PATCH 0359/1541] =?UTF-8?q?Fix=20the=20bug=20that=20CoGbkResult:?=
 =?UTF-8?q?:getAll(TupleTag<V>=20tag)=20doesn=E2=80=99t=20return=20all=20v?=
 =?UTF-8?q?alues=20associated=20with=20the=20tag.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90137539
---
 .../dataflow/sdk/transforms/join/CoGbkResult.java      | 10 +++++++---
 .../dataflow/sdk/transforms/join/CoGbkResultTest.java  |  1 +
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
index 42a3e6e75d8d9..2d293b8bd6930 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
@@ -408,7 +408,8 @@ public boolean hasNext() {
       if (unions.hasNext()) {
         return true;
       } else {
-        // We can now resolve all the "unknown" null values.
+        // Now that we've iterated over all the values, we can resolve all the "unknown" null
+        // values to false.
         for (int i = 0; i < containsTag.length; i++) {
           if (containsTag[i] == null) {
             containsTag[i] = false;
@@ -426,9 +427,12 @@ public V next() {
     }
 
     private void advance() {
-      int curTag;
-      while (unions.hasNext() && (curTag = unions.peek().getUnionTag()) != tag) {
+      while (unions.hasNext()) {
+        int curTag = unions.peek().getUnionTag();
         containsTag[curTag] = true;
+        if (curTag == tag) {
+          break;
+        }
         unions.next();
       }
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultTest.java
index c71439aa351de..88e29e9ad2ec4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultTest.java
@@ -57,6 +57,7 @@ public void runLazyResult(int cacheSize) {
     assertThat(result.getAll(new TupleTag<Integer>("tag3")), contains(3, 5, 6));
     assertThat(result.getAll(new TupleTag<Integer>("tag2")), emptyIterable());
     assertThat(result.getOnly(new TupleTag<Integer>("tag1")), equalTo(1));
+    assertThat(result.getAll(new TupleTag<Integer>("tag0")), contains(0, 2, 4));
   }
 
   private CoGbkResultSchema createSchema(int size) {

From 2660c39235dee41f5eba161434a7c641b1b5a57c Mon Sep 17 00:00:00 2001
From: samuelw <samuelw@google.com>
Date: Thu, 2 Apr 2015 09:20:59 -0700
Subject: [PATCH 0360/1541] Optimize counter processing in
 StreamingDataflowWorker. ----Release Notes---- [] ------------- Created by
 MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=90167171

---
 .../worker/StreamingDataflowWorker.java       | 86 ++++++++-----------
 1 file changed, 36 insertions(+), 50 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 5a16d49827858..86332a312be61 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -17,7 +17,6 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import com.google.api.services.dataflow.model.MapTask;
-import com.google.api.services.dataflow.model.MetricUpdate;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingFormatter;
@@ -25,12 +24,10 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
 import com.google.cloud.dataflow.sdk.util.BoundedQueueExecutor;
-import com.google.cloud.dataflow.sdk.util.CloudCounterUtils;
 import com.google.cloud.dataflow.sdk.util.StateFetcher;
 import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.UserCodeException;
-import com.google.cloud.dataflow.sdk.util.Values;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
@@ -466,59 +463,48 @@ private void getConfig(String computation) {
 
   private void buildCounters(CounterSet counterSet,
                              Windmill.WorkItemCommitRequest.Builder builder) {
-    for (MetricUpdate metricUpdate :
-             CloudCounterUtils.extractCounters(counterSet, true /* delta */)) {
+    for (Counter counter : counterSet) {
+      Windmill.Counter.Builder counterBuilder = Windmill.Counter.newBuilder();
       Windmill.Counter.Kind kind;
-      String cloudKind = metricUpdate.getKind();
-      if (cloudKind.equals(Counter.AggregationKind.SUM.name())) {
-        kind = Windmill.Counter.Kind.SUM;
-      } else if (cloudKind.equals(Counter.AggregationKind.MEAN.name())) {
-        kind = Windmill.Counter.Kind.MEAN;
-      } else if (cloudKind.equals(Counter.AggregationKind.MAX.name())) {
-        kind = Windmill.Counter.Kind.MAX;
-      } else if (cloudKind.equals(Counter.AggregationKind.MIN.name())) {
-        kind = Windmill.Counter.Kind.MIN;
-      } else {
-        LOG.debug("Unhandled counter type: {}", metricUpdate.getKind());
-        return;
-      }
-      Windmill.Counter.Builder counterBuilder = builder.addCounterUpdatesBuilder();
-      counterBuilder.setName(metricUpdate.getName().getName()).setKind(kind);
-      Object element = null;
-      if (kind == Windmill.Counter.Kind.MEAN) {
-        Object meanCount = metricUpdate.getMeanCount();
-        if (meanCount != null) {
-          try {
-            Long longValue = Values.asLong(meanCount);
-            if (longValue != 0) {
-              counterBuilder.setMeanCount(longValue);
-            }
-          } catch (ClassCastException e) {
-            // Nothing to do.
+      switch (counter.getKind()) {
+        case SUM: kind = Windmill.Counter.Kind.SUM; break;
+        case MAX: kind = Windmill.Counter.Kind.MAX; break;
+        case MIN: kind = Windmill.Counter.Kind.MIN; break;
+        case MEAN:
+          kind = Windmill.Counter.Kind.MEAN;
+          long count = counter.getCount(true /* delta */);
+          if (count <= 0) {
+            continue;
           }
-        }
-        element = metricUpdate.getMeanSum();
-      } else {
-        element = metricUpdate.getScalar();
+          counterBuilder.setMeanCount(count);
+          break;
+        default:
+          LOG.debug("Unhandled counter type: {}", counter.getKind());
+          continue;
       }
-      if (element != null) {
-        try {
-          Double doubleValue = Values.asDouble(element);
-          if (doubleValue != 0) {
-            counterBuilder.setDoubleScalar(doubleValue);
-          }
-        } catch (ClassCastException e) {
-          // Nothing to do.
+      Object aggregateObj = counter.getAggregate(true /* delta */);
+      counter.resetDelta();
+      if (aggregateObj instanceof Double) {
+        double aggregate = (Double) aggregateObj;
+        if (aggregate != 0) {
+          counterBuilder.setDoubleScalar(aggregate);
         }
-        try {
-          Long longValue = Values.asLong(element);
-          if (longValue != 0) {
-            counterBuilder.setIntScalar(longValue);
-          }
-        } catch (ClassCastException e) {
-          // Nothing to do.
+      } else if (aggregateObj instanceof Long) {
+        long aggregate = (Long) aggregateObj;
+        if (aggregate != 0) {
+          counterBuilder.setIntScalar(aggregate);
         }
+      } else if (aggregateObj instanceof Integer) {
+        long aggregate = ((Integer) aggregateObj).longValue();
+        if (aggregate != 0) {
+          counterBuilder.setIntScalar(aggregate);
+        }
+      } else {
+        LOG.debug("Unhandled aggregate class: {}", aggregateObj.getClass());
+        continue;
       }
+      counterBuilder.setName(counter.getName()).setKind(kind);
+      builder.addCounterUpdates(counterBuilder);
     }
   }
 

From d7a57d86788a6a2d5a27e1542a1779c023c0c862 Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Thu, 2 Apr 2015 10:45:58 -0700
Subject: [PATCH 0361/1541] Pass PipelineOptions to createWriter in Sinks.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90175467
---
 .../java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java  | 2 +-
 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/io/XmlFileBasedSink.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/Write.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/io/FileBasedSinkTest.java   | 2 +-
 .../google/cloud/dataflow/sdk/io/XmlFileBasedSinkTest.java    | 4 ++--
 .../com/google/cloud/dataflow/sdk/transforms/WriteTest.java   | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
index 4a1184b135c0d..508bd8a03bc9a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
@@ -252,7 +252,7 @@ public FileBasedWriteOperation(FileBasedSink<T> sink, String baseTemporaryFilena
      * {@link Sink.WriteOperation#createWriter}. Namely, it must not mutate the state of the object.
      */
     @Override
-    public abstract FileBasedWriter<T> createWriter() throws Exception;
+    public abstract FileBasedWriter<T> createWriter(PipelineOptions options) throws Exception;
 
     /**
      * Initialization of the sink. Default implementation is a no-op. May be overridden by subclass
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
index c77cca8906a13..6afad84e3f0f9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
@@ -185,7 +185,7 @@ public abstract void finalize(Iterable<WR> writerResults, PipelineOptions option
      *
      * <p>Must not mutate the state of the WriteOperation.
      */
-    public abstract Writer<T, WR> createWriter() throws Exception;
+    public abstract Writer<T, WR> createWriter(PipelineOptions options) throws Exception;
 
     /**
      * Returns the Sink that this write operation writes to.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSink.java
index 37813e3b84352..b14c22ffafd00 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSink.java
@@ -239,7 +239,7 @@ public XmlWriteOperation(XmlFileBasedSink.Bound<T> sink) {
      * Creates a XmlWriter with a marshaller for the type it will write.
      */
     @Override
-    public XmlWriter<T> createWriter() throws Exception {
+    public XmlWriter<T> createWriter(PipelineOptions options) throws Exception {
       JAXBContext context;
       Marshaller marshaller;
       context = JAXBContext.newInstance(getSink().classToBind);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
index c2cc1589c9f19..9faa49936ed1f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
@@ -149,7 +149,7 @@ public void processElement(ProcessContext c) throws Exception {
           // Lazily initialize the Writer
           if (writer == null) {
             WriteOperation<T, WR> writeOperation = c.sideInput(writeOperationView);
-            writer = writeOperation.createWriter();
+            writer = writeOperation.createWriter(c.getPipelineOptions());
             writer.open(UUID.randomUUID().toString());
           }
           try {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSinkTest.java
index 8b1e56acb6c6d..11a941699efae 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSinkTest.java
@@ -416,7 +416,7 @@ public SimpleWriteOperation(SimpleSink sink) {
       }
 
       @Override
-      public SimpleWriter createWriter() throws Exception {
+      public SimpleWriter createWriter(PipelineOptions options) throws Exception {
         return new SimpleWriter(this);
       }
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSinkTest.java
index ead0b0873ee16..f32758300365c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSinkTest.java
@@ -69,7 +69,7 @@ public void testXmlWriter() throws Exception {
     PipelineOptions options = PipelineOptionsFactory.create();
     XmlWriteOperation<Bird> writeOp =
         XmlFileBasedSink.writeOf(Bird.class, "birds", testFilePrefix).createWriteOperation(options);
-    XmlWriter<Bird> writer = writeOp.createWriter();
+    XmlWriter<Bird> writer = writeOp.createWriter(options);
 
     List<Bird> bundle =
         Lists.newArrayList(new Bird("bemused", "robin"), new Bird("evasive", "goose"));
@@ -155,7 +155,7 @@ public void testCreateWriter() throws Exception {
     XmlWriteOperation<Bird> writeOp =
         XmlFileBasedSink.writeOf(testClass, testRootElement, testFilePrefix)
             .createWriteOperation(options);
-    XmlWriter<Bird> writer = writeOp.createWriter();
+    XmlWriter<Bird> writer = writeOp.createWriter(options);
     assertEquals(testFilePrefix, writer.getWriteOperation().baseTemporaryFilename);
     assertEquals(testRootElement, writer.getWriteOperation().getSink().rootElementName);
     assertNotNull(writer.marshaller);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java
index 47dec4a97f65d..9759bca763cba 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java
@@ -180,7 +180,7 @@ public void finalize(Iterable<TestWriterResult> bundleResults, PipelineOptions o
     }
 
     @Override
-    public Writer<String, TestWriterResult> createWriter() {
+    public Writer<String, TestWriterResult> createWriter(PipelineOptions options) {
       return new TestSinkWriter(this);
     }
 

From 9e403bc9a3eacbbc26d505bd829472546b3004a3 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Thu, 2 Apr 2015 11:05:45 -0700
Subject: [PATCH 0362/1541] Use DataflowPipelineJob in BlockingRunner

With the addition of the getState method in PipelineResult, the method
provided by DataflowPipelineJobState is provided by
DataflowPipelineResult. Additionally, DataflowPipelineResult represents
a result for both an in-progress and complete Dataflow Job, so all
Dataflow Jobs can reuse the functionality provided.

Cache terminal Job States in DataflowPipelineJob.

----Release Notes----
Return DataflowPipelineJob in BlockingDataflowPipelineRunner#run
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90177563
---
 .../BlockingDataflowPipelineRunner.java       | 26 ++++---------------
 .../sdk/runners/DataflowPipelineJob.java      | 14 +++++++++-
 .../testing/TestDataflowPipelineRunner.java   |  5 ++--
 3 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
index 3ec200bb9be34..596ba237cc422 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
@@ -17,7 +17,6 @@
 package com.google.cloud.dataflow.sdk.runners;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.PipelineResult;
 import com.google.cloud.dataflow.sdk.PipelineResult.State;
 import com.google.cloud.dataflow.sdk.options.BlockingDataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
@@ -45,25 +44,9 @@
  * fails or cannot be monitored.
  */
 public class BlockingDataflowPipelineRunner extends
-    PipelineRunner<BlockingDataflowPipelineRunner.DataflowPipelineJobState> {
+    PipelineRunner<DataflowPipelineJob> {
   private static final Logger LOG = LoggerFactory.getLogger(BlockingDataflowPipelineRunner.class);
 
-  /**
-   * Holds the status of a run request.
-   */
-  public static class DataflowPipelineJobState implements PipelineResult {
-    private final State state;
-
-    public DataflowPipelineJobState(State state) {
-      this.state = state;
-    }
-
-    @Override
-    public State getState() {
-      return state;
-    }
-  }
-
   // Defaults to an infinite wait period.
   // TODO: make this configurable after removal of option map.
   private static final long BUILTIN_JOB_TIMEOUT_SEC = -1L;
@@ -93,7 +76,7 @@ public static BlockingDataflowPipelineRunner fromOptions(
   }
 
   @Override
-  public DataflowPipelineJobState run(Pipeline p) {
+  public DataflowPipelineJob run(Pipeline p) {
     DataflowPipelineJob job = dataflowPipelineRunner.run(p);
 
     @Nullable
@@ -112,12 +95,13 @@ public DataflowPipelineJobState run(Pipeline p) {
 
     LOG.info("Job finished with status {}", result);
     if (result.isTerminal()) {
-      return new DataflowPipelineJobState(result);
+      return job;
     }
 
     // TODO: introduce an exception that can wrap a JobState,
     // so that detailed error information can be retrieved.
-    throw new RuntimeException("Job failed with state " + result);
+    throw new RuntimeException(
+        "Failed to wait for the job to finish. Returned result: " + result);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
index e8461eb26a14a..b3dbdc552e567 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
@@ -57,6 +57,11 @@ public class DataflowPipelineJob implements PipelineResult {
    */
   private Dataflow dataflowClient;
 
+  /**
+   * The state the job terminated in.
+   */
+  private State terminalState;
+
   /**
    * Construct the job.
    *
@@ -160,6 +165,9 @@ public State waitToFinish(
 
   @Override
   public State getState() {
+    if (terminalState != null) {
+      return terminalState;
+    }
     Job job = null;
     for (int retryAttempts = 5; retryAttempts > 0; retryAttempts--) {
       try {
@@ -169,7 +177,11 @@ public State getState() {
             .jobs()
             .get(project, jobId)
             .execute();
-        return MonitoringUtil.toState(job.getCurrentState());
+        State currentState = MonitoringUtil.toState(job.getCurrentState());
+        if (currentState.isTerminal()) {
+          terminalState = currentState;
+        }
+        return currentState;
       } catch (IOException e) {
         LOG.warn("There were problems getting current job status: ", e);
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
index 9b15bc6105ad9..3201c2ac11481 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult.State;
 import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineJob;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
 
@@ -36,8 +37,8 @@ public class TestDataflowPipelineRunner extends BlockingDataflowPipelineRunner {
   }
 
   @Override
-  public DataflowPipelineJobState run(Pipeline pipeline) {
-    DataflowPipelineJobState state = super.run(pipeline);
+  public DataflowPipelineJob run(Pipeline pipeline) {
+    DataflowPipelineJob state = super.run(pipeline);
     if (state.getState() != State.DONE) {
       throw new AssertionError("The dataflow failed.");
     }

From 586c0158a83374cbf85e3409b40ef25a60a3a76b Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Thu, 2 Apr 2015 13:52:28 -0700
Subject: [PATCH 0363/1541] Add README.md for Eclipse.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90192966
---
 README.md         |  4 +++
 eclipse/README.md | 74 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+)
 create mode 100644 eclipse/README.md

diff --git a/README.md b/README.md
index 2104e03cabd60..3b41733d12f59 100644
--- a/README.md
+++ b/README.md
@@ -85,6 +85,10 @@ You can speed up the build and install process by using the following options:
 
         mvn -T 4 install
 
+If you are using [Eclipse](https://eclipse.org/) integrated development
+environment (IDE), please additionally review our
+[Eclipse integration instructions](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/eclipse/README.md).
+
 ## Running the Examples
 
 After building and installing, you can execute the `WordCount` and other example
diff --git a/eclipse/README.md b/eclipse/README.md
new file mode 100644
index 0000000000000..ba2b4ae5ce75a
--- /dev/null
+++ b/eclipse/README.md
@@ -0,0 +1,74 @@
+# Eclipse integration
+
+Google Cloud Dataflow SDK for Java supports the [Eclipse](https://eclipse.org/)
+integrated development environment (IDE) for the development of both user
+pipelines and the SDK itself. This is in addition to other supported development
+environments, such as [Apache Maven](https://maven.apache.org/).
+
+## Requirements
+
+In addition to Eclipse, you need to install the
+[M2Eclipse plugin](http://eclipse.org/m2e/) prior to importing projects.
+
+## Development of user pipelines
+
+We provide the [Eclipse starter project](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/tree/master/eclipse/starter)
+for getting started with Cloud Dataflow in Eclipse for the development of user
+pipelines and general usage of the Cloud Dataflow SDK for Java.
+
+Start by cloning this repository or downloading its contents to your local
+machine. Now, in the Eclipse IDE, choose `File` menu and then select `Import`.
+In the `Import` wizard, choose `Existing Projects into Workspace` inside the
+`General` group.
+
+In the next window, set `Select root directory` to point to the location with
+the contents of this repository. `Projects` list should automatically populate
+with `google-cloud-dataflow-starter` project. Make sure that project is
+selected and choose `Finish` to complete the import wizard.
+
+You can now run the starter pipeline on your local machine. From the `Run` menu,
+select `Run`. Choose `LOCAL` run configuration. When the execution finishes,
+among other output, the console should contain text `HELLO WORLD`.
+
+You can also run the starter pipeline on the Google Cloud Dataflow Service using
+managed resources in the Google Cloud Platform. Start by following the general
+Cloud Dataflow [Getting Started](https://cloud.google.com/dataflow/getting-started)
+instructions. You should have a Google Cloud Platform project that has a Cloud
+Dataflow API enabled, a Google Cloud Storage bucket that will serve as a
+staging location, and installed and authenticated Google Cloud SDK. Now, from
+the `Run` menu, select `Run configurations`. Choose `SERVICE` run configuration
+inside the `Java Application` group. In the arguments tab, populate values for
+`--project` and `--stagingLocation` arguments. Click `Run` to start the program.
+When the execution finishes, among other output, the console should contain
+`Submitted job: <job_id>` and `Job finished with status DONE` statements.
+
+At this point, you should be ready to start making changes to
+`StarterPipeline.java` and developing your own pipeline.
+
+## Development of the SDK
+
+You can work on the development of the Cloud Dataflow SDK itself from Eclipse.
+
+Start by cloning this repository or downloading its contents to your local
+machine. Now, in the Eclipse IDE, choose `File` menu and then select `Import`.
+In the `Import` wizard, choose `Existing Maven Projects` inside the `Maven`
+group. If this import source is not available, you may not have installed the
+M2Eclipse plugin properly.
+
+In the next window, set `Root Directory` to point to the location with the
+contents of this repository. `Projects` list should automatically populate with
+several projects including: `/pom.xml`, `sdk/pom.xml` and `examples/pom.xml`.
+Make sure all projects are selected and choose `Finish` to complete the import
+wizard.
+
+In the `Package Explorer` you can now select the `src/test/java` package group
+in one of the projects. From the `Run` menu, select `Run`. Choose `JUnit Test`
+run configuration. This will execute all unit tests of the particular project
+locally.
+
+At this point, you should be ready to start making changes to the Cloud Dataflow
+SDK for Java. Please consider sharing your improvements with the rest of the
+Dataflow community by posting them as pull requests in our
+[GitHub repository](https://github.com/GoogleCloudPlatform/DataflowJavaSDK).
+
+Good luck!

From 9c09f921357ac0a4da13b69061ef24c3272b32dd Mon Sep 17 00:00:00 2001
From: chernyak <chernyak@google.com>
Date: Thu, 2 Apr 2015 15:58:08 -0700
Subject: [PATCH 0364/1541] API Change to support existence watermark deadline
 for GlobalData

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90205325
---
 sdk/src/main/proto/windmill.proto | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index 1b25758fb328d..547c78ea65355 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -89,6 +89,7 @@ message TagList {
 message GlobalDataId {
   required string tag = 1;
   required bytes version = 2;
+  optional int64 existence_watermark_deadline = 3 [default=0x7FFFFFFFFFFFFFFF];
 }
 
 message GlobalData {

From b47d851fc5b45908de14c731bedf3577abaab7d9 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Thu, 2 Apr 2015 16:49:16 -0700
Subject: [PATCH 0365/1541] Add cloneAs() to PipelineOptions to support running
 multiple pipelines with different PipelineOptions.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90209706
---
 .../dataflow/sdk/options/PipelineOptions.java |  8 +++++
 .../sdk/options/PipelineOptionsFactory.java   |  1 +
 .../sdk/options/ProxyInvocationHandler.java   | 16 +++++++++
 .../sdk/options/PipelineOptionsTest.java      | 35 +++++++++++++++++++
 4 files changed, 60 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
index 1d36e8e6a1906..416bf056ea14b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
@@ -55,6 +55,14 @@ public interface PipelineOptions {
    */
   <T extends PipelineOptions> T as(Class<T> kls);
 
+  /**
+   * Makes a deep clone of this object, and transforms the cloned object into the specified
+   * type {@code kls}. {@see #as} for more information about the conversion.
+   * <p>
+   * Properties which are marked with {@code @JsonIgnore} will not be cloned.
+   */
+  <T extends PipelineOptions> T cloneAs(Class<T> kls);
+
   @Validation.Required
   @Description("The pipeline runner that will be used to execute the pipeline. "
       + "For registered runners, the class name can be specified, otherwise the fully"
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 52fbb54ae253d..2a72d670bedc3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -958,6 +958,7 @@ private static void validateClass(Class<? extends PipelineOptions> iface,
       methods.add(klass.getMethod("hashCode"));
       methods.add(klass.getMethod("toString"));
       methods.add(klass.getMethod("as", Class.class));
+      methods.add(klass.getMethod("cloneAs", Class.class));
     } catch (NoSuchMethodException | SecurityException e) {
       throw Throwables.propagate(e);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
index 252df5adc0382..e72a967f7dd35 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
@@ -109,6 +109,10 @@ public Object invoke(Object proxy, Method method, Object[] args) {
       @SuppressWarnings("unchecked")
       Class<? extends PipelineOptions> clazz = (Class<? extends PipelineOptions>) args[0];
       return as(clazz);
+    } else if (args != null && "cloneAs".equals(method.getName()) && args[0] instanceof Class) {
+      @SuppressWarnings("unchecked")
+      Class<? extends PipelineOptions> clazz = (Class<? extends PipelineOptions>) args[0];
+      return cloneAs(proxy, clazz);
     }
     String methodName = method.getName();
     synchronized (this) {
@@ -157,6 +161,18 @@ synchronized <T extends PipelineOptions> T as(Class<T> iface) {
     return interfaceToProxyCache.getInstance(iface);
   }
 
+  /**
+   * Backing implementation for {@link PipelineOptions#cloneAs(Class)}.
+   *
+   * @return A copy of the PipelineOptions.
+   */
+  synchronized <T extends PipelineOptions> T cloneAs(Object proxy, Class<T> iface) {
+    try {
+      return MAPPER.readValue(MAPPER.writeValueAsBytes(proxy), PipelineOptions.class).as(iface);
+    } catch (IOException e) {
+      throw new IllegalStateException("Failed to serialize the pipeline options to JSON.", e);
+    }
+  }
 
   /**
    * Returns true if the other object is a ProxyInvocationHandler or is a Proxy object and has the
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java
index 3e75a06b55e2b..06acc046e0f41 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java
@@ -17,19 +17,36 @@
 package com.google.cloud.dataflow.sdk.options;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.util.List;
+import java.util.Set;
+
 /** Unit tests for {@link PipelineOptions}. */
 @RunWith(JUnit4.class)
 public class PipelineOptionsTest {
   /** Interface used for testing that {@link PipelineOptions#as(Class)} functions. */
   public static interface TestOptions extends PipelineOptions {
+    List<Boolean> getTestValue();
+    void setTestValue(List<Boolean> testValue);
+
+    @JsonIgnore
+    Set<String> getIgnoredValue();
+    void setIgnoredValue(Set<String> ignoredValue);
   }
 
   @Test
@@ -42,4 +59,22 @@ public void testDynamicAs() {
   public void testDefaultRunnerIsSet() {
     assertEquals(DirectPipelineRunner.class, PipelineOptionsFactory.create().getRunner());
   }
+
+  @Test
+  public void testCloneAs() {
+    TestOptions options = PipelineOptionsFactory.create().as(TestOptions.class);
+    options.setTestValue(Lists.<Boolean>newArrayList());
+    options.setIgnoredValue(Sets.<String>newHashSet());
+    options.getIgnoredValue().add("ignoredString");
+
+    TestOptions clonedOptions = options.cloneAs(TestOptions.class);
+    assertNotSame(clonedOptions, options);
+    assertNotSame(clonedOptions.getTestValue(), options.getTestValue());
+
+    clonedOptions.getTestValue().add(true);
+    assertFalse(clonedOptions.getTestValue().isEmpty());
+    assertTrue(options.getTestValue().isEmpty());
+
+    assertNull(clonedOptions.getIgnoredValue());
+  }
 }

From 5cfd88d9f1a0345b017668b1c3ad68afbd6b90c7 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 3 Apr 2015 15:52:18 -0700
Subject: [PATCH 0366/1541] Changes the timestamps of elements produced by
 GroupByKey to be the min timestamps of input elements for that key and
 window, rather than the maxTimestamp() of the window, and makes
 CombiningWindowSet properly use accumulators and store timestamps

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90288768
---
 .../dataflow/sdk/util/AbstractWindowSet.java  |  6 +-
 .../sdk/util/BatchModeExecutionContext.java   | 35 ++++++-----
 .../dataflow/sdk/util/BufferingWindowSet.java | 15 +++--
 .../dataflow/sdk/util/CombiningWindowSet.java | 63 +++++++++++++------
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |  4 +-
 .../dataflow/sdk/util/ExecutionContext.java   |  3 +-
 .../GroupAlsoByWindowsViaIteratorsDoFn.java   |  2 +-
 .../sdk/util/PartitionBufferingWindowSet.java | 17 ++++-
 .../cloud/dataflow/sdk/util/StateFetcher.java | 12 +++-
 .../util/StreamingModeExecutionContext.java   |  3 +-
 .../util/StreamingSideInputDoFnRunner.java    |  5 +-
 .../dataflow/sdk/util/TriggerExecutor.java    |  7 ++-
 .../dataflow/sdk/util/TriggerTester.java      | 24 +++++--
 .../dataflow/sdk/util/WindowingInternals.java |  3 +-
 .../dataflow/sdk/values/TimestampedValue.java | 20 ++++++
 .../cloud/dataflow/sdk/WindowMatchers.java    |  8 +--
 .../worker/StreamingDataflowWorkerTest.java   |  4 +-
 .../sdk/transforms/join/CoGroupByKeyTest.java | 20 +++---
 .../transforms/windowing/WindowingTest.java   | 34 +++++-----
 .../dataflow/sdk/util/DefaultTriggerTest.java | 10 +--
 .../sdk/util/DelayAfterFirstInPaneTest.java   | 10 +--
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  | 20 +++---
 .../dataflow/sdk/util/StateFetcherTest.java   | 10 ++-
 .../StreamingGroupAlsoByWindowsDoFnTest.java  | 53 ++++++++++++----
 .../StreamingSideInputDoFnRunnerTest.java     |  5 +-
 25 files changed, 262 insertions(+), 131 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
index a11979d129e24..c4294b7a24955 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
 import org.joda.time.Instant;
 
@@ -67,11 +68,12 @@ protected AbstractWindowSet(
   protected abstract Collection<W> windows();
 
   /**
-   * Returns the final value of the elements in the given window.
+   * Returns the final value of the elements in the given window, as well
+   * as the minimum timestamp of all the elements that were placed in the window.
    *
    * <p> Illegal to call if the window does not exist in the set.
    */
-  protected abstract VO finalValue(W window) throws Exception;
+  protected abstract TimestampedValue<VO> finalValue(W window) throws Exception;
 
   /**
    * Adds the given value in the given window to the set.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
index f76b1152ea785..9eaa30c18c403 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Predicate;
 import com.google.common.collect.Iterables;
@@ -31,6 +32,7 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.TreeMap;
 
 /**
  * {@link ExecutionContext} for use in batch mode.
@@ -139,7 +141,8 @@ public <E> List<TimerOrElement<E>> getAllTimers() {
    */
   class StepContext extends ExecutionContext.StepContext {
     private Map<Object, Map<CodedTupleTag<?>, Object>> state = new HashMap<>();
-    private Map<Object, Map<CodedTupleTag<?>, List<Object>>> tagLists = new HashMap<>();
+    private Map<Object, Map<CodedTupleTag<?>, Map<Instant, List<TimestampedValue>>>> tagLists =
+        new HashMap<>();
 
     StepContext(String stepName) {
       super(stepName);
@@ -177,22 +180,29 @@ public CodedTupleTagMap lookup(List<? extends CodedTupleTag<?>> tags) {
 
     @Override
     public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp) {
-      Map<CodedTupleTag<?>, List<Object>> perKeyTagLists = tagLists.get(getKey());
+      Map<CodedTupleTag<?>, Map<Instant, List<TimestampedValue>>> perKeyTagLists =
+          tagLists.get(getKey());
       if (perKeyTagLists == null) {
         perKeyTagLists = new HashMap<>();
         tagLists.put(getKey(), perKeyTagLists);
       }
-      List<Object> tagList = perKeyTagLists.get(tag);
+      Map<Instant, List<TimestampedValue>> tagList = perKeyTagLists.get(tag);
       if (tagList == null) {
-        tagList = new ArrayList<>();
+        tagList = new TreeMap<>();
         perKeyTagLists.put(tag, tagList);
       }
-      tagList.add(value);
+      List<TimestampedValue> timestampList = tagList.get(timestamp);
+      if (timestampList == null) {
+        timestampList = new ArrayList<>();
+        tagList.put(timestamp, timestampList);
+      }
+      timestampList.add(TimestampedValue.of(value, timestamp));
     }
 
     @Override
     public <T> void deleteTagList(CodedTupleTag<T> tag) {
-      Map<CodedTupleTag<?>, List<Object>> perKeyTagLists = tagLists.get(getKey());
+      Map<CodedTupleTag<?>, Map<Instant, List<TimestampedValue>>> perKeyTagLists =
+          tagLists.get(getKey());
       if (perKeyTagLists != null) {
         perKeyTagLists.remove(tag);
       }
@@ -200,16 +210,13 @@ public <T> void deleteTagList(CodedTupleTag<T> tag) {
 
     @Override
     @SuppressWarnings("unchecked")
-    public <T> Iterable<T> readTagList(CodedTupleTag<T> tag) {
-      Map<CodedTupleTag<?>, List<Object>> perKeyTagLists = tagLists.get(getKey());
+    public <T> Iterable<TimestampedValue<T>> readTagList(CodedTupleTag<T> tag) {
+      Map<CodedTupleTag<?>, Map<Instant, List<TimestampedValue>>> perKeyTagLists =
+          tagLists.get(getKey());
       if (perKeyTagLists == null || perKeyTagLists.get(tag) == null) {
-        return new ArrayList<T>();
-      }
-      List<T> result = new ArrayList<T>();
-      for (Object element : perKeyTagLists.get(tag)) {
-        result.add((T) element);
+        return new ArrayList<TimestampedValue<T>>();
       }
-      return result;
+      return Iterables.concat((Iterable) perKeyTagLists.get(tag).values());
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
index b734bfdc53768..0f1902452c70d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
 import org.joda.time.Instant;
 
@@ -150,12 +151,13 @@ public boolean contains(W window) {
   }
 
   @Override
-  protected Iterable<V> finalValue(W window) throws Exception {
+  protected TimestampedValue<Iterable<V>> finalValue(W window) throws Exception {
     if (!contains(window)) {
       throw new IllegalStateException("finalValue called for non-existent window");
     }
 
     List<V> toEmit = new ArrayList<>();
+    Instant minTimestamp = BoundedWindow.TIMESTAMP_MAX_VALUE;
     // This is the set of windows that we're currently emitting.
     Set<W> curWindows = new HashSet<>();
     curWindows.add(window);
@@ -171,14 +173,17 @@ protected Iterable<V> finalValue(W window) throws Exception {
     }
 
     for (W curWindow : curWindows) {
-      Iterable<V> items = windowingInternals.readTagList(
+      Iterable<TimestampedValue<V>> items = windowingInternals.readTagList(
           bufferTag(curWindow, windowCoder, inputCoder));
-      for (V item : items) {
-        toEmit.add(item);
+      for (TimestampedValue<V> item : items) {
+        toEmit.add(item.getValue());
+        if (item.getTimestamp().isBefore(minTimestamp)) {
+          minTimestamp = item.getTimestamp();
+        }
       }
     }
 
-    return toEmit;
+    return TimestampedValue.of((Iterable<V>) toEmit, minTimestamp);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
index 1e5d232349a0c..18cf6e383da32 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
@@ -27,6 +27,9 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterables;
 import com.google.common.collect.Iterators;
 
 import org.joda.time.Instant;
@@ -102,23 +105,30 @@ protected Collection<W> windows() {
   }
 
   @Override
-  protected VO finalValue(W window) throws Exception {
-    return combineFn.extractOutput(
-        key,
-        keyedState.lookup(bufferTag(window, windowCoder, accumulatorCoder)));
+  protected TimestampedValue<VO> finalValue(W window) throws Exception {
+    TimestampedValue<VA> timestampedAccumulator =
+        Preconditions.checkNotNull(lookupAccumulator(window));
+
+    return TimestampedValue.of(
+        combineFn.extractOutput(key, timestampedAccumulator.getValue()),
+        timestampedAccumulator.getTimestamp());
   }
 
   @Override
   protected WindowStatus put(W window, VI value, Instant timestamp) throws Exception {
-    VA va = keyedState.lookup(accumulatorTag(window));
-    WindowStatus status = WindowStatus.EXISTING;
-    if (va == null) {
-      status = WindowStatus.NEW;
-      va = combineFn.createAccumulator(key);
+    TimestampedValue<VA> timestampedAccumulator = lookupAccumulator(window);
+    if (timestampedAccumulator == null) {
+      storeAccumulator(
+          window, combineFn.addInput(key, combineFn.createAccumulator(key), value), timestamp);
+      return WindowStatus.NEW;
+    } else {
+      VA accumulator = timestampedAccumulator.getValue();
+      if (timestampedAccumulator.getTimestamp().isBefore(timestamp)) {
+        timestamp = timestampedAccumulator.getTimestamp();
+      }
+      storeAccumulator(window, combineFn.addInput(key, accumulator, value), timestamp);
+      return WindowStatus.EXISTING;
     }
-    combineFn.addInput(key, va, value);
-    store(window, va);
-    return status;
   }
 
   @Override
@@ -130,16 +140,19 @@ protected void remove(W window) throws Exception {
   @Override
   protected void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
     List<VA> accumulators = Lists.newArrayList();
+    Instant minTimestamp = BoundedWindow.TIMESTAMP_MAX_VALUE;
     for (W window : toBeMerged) {
-      VA va = keyedState.lookup(accumulatorTag(window));
-      // TODO: determine whether null means no value associated with the tag, b/19201776.
-      if (va != null) {
-        accumulators.add(va);
+      TimestampedValue<VA> timestampedAccumulator =
+          Preconditions.checkNotNull(lookupAccumulator(window));
+
+      accumulators.add(timestampedAccumulator.getValue());
+      if (timestampedAccumulator.getTimestamp().isBefore(minTimestamp)) {
+        minTimestamp = timestampedAccumulator.getTimestamp();
       }
       remove(window);
     }
-    VA mergedVa = combineFn.mergeAccumulators(key, accumulators);
-    store(mergeResult, mergedVa);
+    VA mergedAccumulator = combineFn.mergeAccumulators(key, accumulators);
+    storeAccumulator(mergeResult, mergedAccumulator, minTimestamp);
   }
 
   private CodedTupleTag<VA> accumulatorTag(W window) throws Exception {
@@ -147,12 +160,22 @@ private CodedTupleTag<VA> accumulatorTag(W window) throws Exception {
     return bufferTag(window, windowCoder, accumulatorCoder);
   }
 
-  private void store(W window, VA va) throws Exception {
+  private void storeAccumulator(W window, VA accumulator, Instant timestamp) throws Exception {
     CodedTupleTag<VA> tag = accumulatorTag(window);
-    keyedState.store(tag, va);
+    windowingInternals.deleteTagList(tag);
+    windowingInternals.writeToTagList(tag, accumulator, timestamp);
     liveWindowsModified = liveWindows.add(window);
   }
 
+  private TimestampedValue<VA> lookupAccumulator(W window) throws Exception {
+    CodedTupleTag<VA> tag = accumulatorTag(window);
+    Iterable<TimestampedValue<VA>> data = windowingInternals.readTagList(tag);
+    if (!data.iterator().hasNext()) {
+      return null;
+    }
+    return Iterables.getOnlyElement(data);
+  }
+
   @Override
   protected boolean contains(W window) {
     return liveWindows.contains(window);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 078ce9f988226..02c165783de87 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -36,6 +36,7 @@
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Throwables;
 import com.google.common.collect.Iterables;
@@ -568,7 +569,8 @@ public <T> void deleteTagList(CodedTupleTag<T> tag) {
         }
 
         @Override
-        public <T> Iterable<T> readTagList(CodedTupleTag<T> tag) throws IOException {
+        public <T> Iterable<TimestampedValue<T>> readTagList(CodedTupleTag<T> tag)
+            throws IOException {
           return context.stepContext.readTagList(tag);
         }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
index 9f68cdf115f4d..1f99983c04355 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
 import org.joda.time.Instant;
@@ -182,7 +183,7 @@ public abstract <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant t
      *
      * @throws IOException if decoding any of the requested values fails
      */
-    public abstract <T> Iterable<T> readTagList(CodedTupleTag<T> tag)
+    public abstract <T> Iterable<TimestampedValue<T>> readTagList(CodedTupleTag<T> tag)
         throws IOException;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
index 864dbbae64625..e4cc90d94c5c3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
@@ -72,7 +72,7 @@ public void processElement(ProcessContext c) throws Exception {
           windows.put(window.maxTimestamp(), window);
           c.windowingInternals().outputWindowedValue(
               KV.of(key, (Iterable<V>) new WindowReiterable<V>(iterator, window)),
-              window.maxTimestamp(),
+              e.getTimestamp(),
               Arrays.asList(window));
         }
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
index c3a3b9e07c870..30cc55cb7db39 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
@@ -23,6 +23,9 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.common.base.Function;
+import com.google.common.collect.Iterables;
 
 import org.joda.time.Instant;
 
@@ -95,12 +98,20 @@ public boolean contains(W window) {
   }
 
   @Override
-  protected Iterable<V> finalValue(W window) throws Exception {
+  protected TimestampedValue<Iterable<V>> finalValue(W window) throws Exception {
     CodedTupleTag<V> tag = bufferTag(window, windowCoder, inputCoder);
-    Iterable<V> result = windowingInternals.readTagList(tag);
+    Iterable<TimestampedValue<V>> result = windowingInternals.readTagList(tag);
+    Instant timestamp = result.iterator().next().getTimestamp();
     if (result == null) {
       throw new IllegalStateException("finalValue called for non-existent window");
     }
-    return result;
+    return TimestampedValue.of(
+        Iterables.transform(result, new Function<TimestampedValue<V>, V>() {
+              @Override
+              public V apply(TimestampedValue<V> input) {
+                return input.getValue();
+              }
+            }),
+        timestamp);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
index 16da131587951..8585446e58371 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
@@ -24,12 +24,15 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 import com.google.common.cache.Weigher;
 import com.google.protobuf.ByteString;
 
+import org.joda.time.Instant;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -40,6 +43,7 @@
 import java.util.Map;
 import java.util.Objects;
 import java.util.concurrent.Callable;
+import java.util.concurrent.TimeUnit;
 
 /**
  * Class responsible for fetching state from the windmill server.
@@ -126,7 +130,7 @@ public Map<CodedTupleTag<?>, Object> fetch(
     return resultMap;
   }
 
-  public <T> List<T> fetchList(
+  public <T> List<TimestampedValue<T>> fetchList(
       String computation, ByteString key, long workToken, String prefix, CodedTupleTag<T> tag)
       throws IOException {
 
@@ -166,9 +170,11 @@ public <T> List<T> fetchList(
       throw new IOException("Expected single list for tag " + tagString);
     }
     Windmill.TagList tagList = keyResponse.getLists(0);
-    List<T> result = new ArrayList<>();
+    List<TimestampedValue<T>> result = new ArrayList<>();
     for (Windmill.Value value : tagList.getValuesList()) {
-      result.add(tag.getCoder().decode(value.getData().newInput(), Coder.Context.OUTER));
+      result.add(TimestampedValue.of(
+          tag.getCoder().decode(value.getData().newInput(), Coder.Context.OUTER),
+          new Instant(TimeUnit.MICROSECONDS.toMillis(value.getTimestamp()))));
     }
 
     return result;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index 31db3fcf273e3..d4b5b1c3e7788 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.protobuf.ByteString;
 
@@ -250,7 +251,7 @@ public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp)
     }
 
     @Override
-    public <T> Iterable<T> readTagList(CodedTupleTag<T> tag) throws IOException {
+    public <T> Iterable<TimestampedValue<T>> readTagList(CodedTupleTag<T> tag) throws IOException {
       return stateFetcher.fetchList(
           computation, getSerializedKey(), getWorkToken(), mangledPrefix, tag);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
index a66165682d911..c639910a0a565 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
@@ -28,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Throwables;
 import com.google.protobuf.ByteString;
@@ -114,8 +115,8 @@ public void startBundle() {
 
       try {
         CodedTupleTag<WindowedValue<I>> elementTag = getElemListTag((W) window);
-        for (WindowedValue<I> elem : stepContext.readTagList(elementTag)) {
-          fn.processElement(createProcessContext(elem));
+        for (TimestampedValue<WindowedValue<I>> elem : stepContext.readTagList(elementTag)) {
+          fn.processElement(createProcessContext(elem.getValue()));
         }
         stepContext.deleteTagList(elementTag);
       } catch (Throwable t) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 112986735e71b..b567d43f36c75 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -27,6 +27,7 @@
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
 import org.joda.time.Instant;
 
@@ -188,14 +189,16 @@ private void handleResult(W window, TriggerResult result) throws Exception {
   }
 
   private void emitWindow(W window) throws Exception {
+    TimestampedValue<VO> finalValue = windowSet.finalValue(window);
+
     // Emit the (current) final values for the window
-    KV<K, VO> value = KV.of(windowSet.getKey(), windowSet.finalValue(window));
+    KV<K, VO> value = KV.of(windowSet.getKey(), finalValue.getValue());
 
     // Remove the window from management (assume it is "done")
     windowSet.remove(window);
 
     // Output the windowed value.
-    windowingInternals.outputWindowedValue(value, window.maxTimestamp(), Arrays.asList(window));
+    windowingInternals.outputWindowedValue(value, finalValue.getTimestamp(), Arrays.asList(window));
   }
 
   private class MergeContext extends WindowFn<Object, W>.MergeContext {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 6853d1ea824b8..711e1a7af71a1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Function;
 import com.google.common.base.Preconditions;
@@ -39,6 +40,7 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.LinkedHashSet;
 import java.util.List;
@@ -152,7 +154,7 @@ public void injectElement(VI value, Instant timestamp) throws Exception {
 
   private class StubContexts implements WindowingInternals<VI, KV<String, VO>>, DoFn.KeyedState {
 
-    private Map<CodedTupleTag<?>, List<?>> tagListValues = new HashMap<>();
+    private Map<CodedTupleTag<?>, List<TimestampedValue<?>>> tagListValues = new HashMap<>();
     private Map<CodedTupleTag<?>, Object> tagValues = new HashMap<>();
     private List<WindowedValue<KV<String, VO>>> outputs = new ArrayList<>();
 
@@ -168,12 +170,12 @@ public void outputWindowedValue(KV<String, VO> output, Instant timestamp,
     public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp)
         throws IOException {
       @SuppressWarnings("unchecked")
-      List<T> values = (List<T>) tagListValues.get(tag);
+      List<TimestampedValue<?>> values = tagListValues.get(tag);
       if (values == null) {
         values = new ArrayList<>();
         tagListValues.put(tag, values);
       }
-      values.add(value);
+      values.add(TimestampedValue.of(value, timestamp));
     }
 
     @Override
@@ -182,10 +184,20 @@ public <T> void deleteTagList(CodedTupleTag<T> tag) {
     }
 
     @Override
-    public <T> Iterable<T> readTagList(CodedTupleTag<T> tag) throws IOException {
+    public <T> Iterable<TimestampedValue<T>> readTagList(CodedTupleTag<T> tag) throws IOException {
       @SuppressWarnings("unchecked")
-      List<T> values = (List<T>) tagListValues.get(tag);
-      return values == null ? Collections.<T>emptyList() : values;
+      List<TimestampedValue<T>> values = (List) tagListValues.get(tag);
+      if (values == null) {
+        return Collections.<TimestampedValue<T>>emptyList();
+      } else {
+        Collections.sort(values, new Comparator<TimestampedValue<T>>() {
+              @Override
+              public int compare(TimestampedValue<T> v1, TimestampedValue<T> v2) {
+                return v1.getTimestamp().compareTo(v2.getTimestamp());
+              }
+            });
+        return values;
+      }
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
index 6fafc002691d1..085f04d6bbfb0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
 import org.joda.time.Instant;
@@ -62,7 +63,7 @@ void outputWindowedValue(O output, Instant timestamp,
    *
    * @throws IOException if decoding any of the requested values fails
    */
-  <T> Iterable<T> readTagList(CodedTupleTag<T> tag) throws IOException;
+  <T> Iterable<TimestampedValue<T>> readTagList(CodedTupleTag<T> tag) throws IOException;
 
   /**
    * Writes out a timer to be fired when the watermark reaches the given
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
index a94c3ee8c9268..f6c09e1c740a6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
@@ -34,6 +34,7 @@
 import java.io.OutputStream;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Objects;
 
 /**
  * An immutable (value, timestamp) pair.
@@ -60,6 +61,25 @@ public Instant getTimestamp() {
     return timestamp;
   }
 
+  @Override
+  public boolean equals(Object other) {
+    if (!(other instanceof TimestampedValue)) {
+      return false;
+    }
+    TimestampedValue that = (TimestampedValue) other;
+    return Objects.equals(value, that.value) && Objects.equals(timestamp, that.timestamp);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(value, timestamp);
+  }
+
+  @Override
+  public String toString() {
+    return "TimetampedValue(" + value + ", " + timestamp + ")";
+  }
+
   /////////////////////////////////////////////////////////////////////////////
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java
index fe6a67e105b29..f9085f4a5611a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java
@@ -45,18 +45,18 @@ public static <T> Matcher<WindowedValue<? extends T>> isWindowedValue(
   }
 
   public static <T> Matcher<WindowedValue<? extends T>> isSingleWindowedValue(
-      T value, long windowStart, long windowEnd) {
+      T value, long timestamp, long windowStart, long windowEnd) {
     return WindowMatchers.<T>isSingleWindowedValue(
-        Matchers.equalTo(value), windowStart, windowEnd);
+        Matchers.equalTo(value), timestamp, windowStart, windowEnd);
   }
 
   public static <T> Matcher<WindowedValue<? extends T>> isSingleWindowedValue(
-      Matcher<T> valueMatcher, long windowStart, long windowEnd) {
+      Matcher<T> valueMatcher, long timestamp, long windowStart, long windowEnd) {
     IntervalWindow intervalWindow =
         new IntervalWindow(new Instant(windowStart), new Instant(windowEnd));
     return WindowMatchers.<T>isSingleWindowedValue(
         valueMatcher,
-        Matchers.equalTo(intervalWindow.maxTimestamp()),
+        Matchers.equalTo(new Instant(timestamp)),
         Matchers.<BoundedWindow>equalTo(intervalWindow));
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index ee4dd7d649957..203d0cfd6f5d1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -923,8 +923,8 @@ public void processElement(ProcessContext c) {
         "  bundles {" +
         "    key: \"key\"" +
         "    messages {" +
-        "      timestamp: 999000" +
-        "      data: \"\000\000\000\001\005data0\"" +
+        "      timestamp: 0" +
+        "      data: \"\\377\\377\\377\\377\001\005data0\000\"" +
         "    }" +
         "  }" +
         "} " +
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
index 2e1291c960adc..5349380dcaf14 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
@@ -566,16 +566,16 @@ public void testCoGroupByKeyWithWindowing() {
             new ClickOfPurchaseFn(clicksTag, purchasesTag)));
     DataflowAssert.that(clickOfPurchase)
         .containsInAnyOrder(
-            KV.of("Click t0:Boat t1", "3:3"),
-            KV.of("Click t0:Shoesi t2", "3:3"),
-            KV.of("Click t0:Pens t3", "3:3"),
-            KV.of("Click t4:Car t6", "7:7"),
-            KV.of("Click t4:Book t7", "7:7"),
-            KV.of("Click t6:Car t6", "7:7"),
-            KV.of("Click t6:Book t7", "7:7"),
-            KV.of("Click t8:House t8", "11:11"),
-            KV.of("Click t8:Shoes t9", "11:11"),
-            KV.of("Click t8:House t10", "11:11"));
+            KV.of("Click t0:Boat t1", "0:3"),
+            KV.of("Click t0:Shoesi t2", "0:3"),
+            KV.of("Click t0:Pens t3", "0:3"),
+            KV.of("Click t4:Car t6", "4:7"),
+            KV.of("Click t4:Book t7", "4:7"),
+            KV.of("Click t6:Car t6", "4:7"),
+            KV.of("Click t6:Book t7", "4:7"),
+            KV.of("Click t8:House t8", "8:11"),
+            KV.of("Click t8:Shoes t9", "8:11"),
+            KV.of("Click t8:House t10", "8:11"));
     p.run();
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
index 911d47a9e66ec..5517ae3bfce10 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
@@ -104,10 +104,10 @@ public void testPartitioningWindowing() {
         .apply(new WindowedCount(FixedWindows.of(new Duration(10))));
 
     DataflowAssert.that(output).containsInAnyOrder(
-        output("a", 1, 9, 0, 10),
-        output("b", 2, 9, 0, 10),
-        output("c", 1, 19, 10, 20),
-        output("d", 1, 19, 10, 20));
+        output("a", 1, 1, 0, 10),
+        output("b", 2, 2, 0, 10),
+        output("c", 1, 11, 10, 20),
+        output("d", 1, 11, 10, 20));
 
     p.run();
   }
@@ -129,11 +129,11 @@ public void testNonPartitioningWindowing() {
             SlidingWindows.<String>of(new Duration(10)).every(new Duration(5))));
 
     DataflowAssert.that(output).containsInAnyOrder(
-        output("a", 1, 4, -5, 5),
-        output("a", 2, 9, 0, 10),
-        output("a", 1, 14, 5, 15),
-        output("b", 1, 9, 0, 10),
-        output("b", 1, 14, 5, 15));
+        output("a", 1, 1, -5, 5),
+        output("a", 2, 1, 0, 10),
+        output("a", 1, 7, 5, 15),
+        output("b", 1, 8, 0, 10),
+        output("b", 1, 8, 5, 15));
 
     p.run();
   }
@@ -154,8 +154,8 @@ public void testMergingWindowing() {
         .apply(new WindowedCount(Sessions.<String>withGapDuration(new Duration(10))));
 
     DataflowAssert.that(output).containsInAnyOrder(
-        output("a", 2, 14, 1, 15),
-        output("a", 1, 29, 20, 30));
+        output("a", 2, 1, 1, 15),
+        output("a", 1, 20, 20, 30));
 
     p.run();
   }
@@ -182,8 +182,8 @@ public void testWindowPreservation() {
         .apply(new WindowedCount(FixedWindows.<String>of(new Duration(5))));
 
     DataflowAssert.that(output).containsInAnyOrder(
-        output("a", 2, 4, 0, 5),
-        output("b", 2, 4, 0, 5));
+        output("a", 2, 1, 0, 5),
+        output("b", 2, 2, 0, 5));
 
     p.run();
   }
@@ -260,10 +260,10 @@ public void testTextIoInput() throws Exception {
         .apply(new WindowedCount(FixedWindows.<String>of(Duration.millis(10))));
 
     DataflowAssert.that(output).containsInAnyOrder(
-        output("a", 1, 9, 0, 10),
-        output("b", 2, 9, 0, 10),
-        output("c", 1, 19, 10, 20),
-        output("d", 1, 19, 10, 20));
+        output("a", 1, 1, 0, 10),
+        output("b", 2, 2, 0, 10),
+        output("c", 1, 11, 10, 20),
+        output("d", 1, 11, 10, 20));
 
     p.run();
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java
index 488ba59c6a2fa..e11c46afd803b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java
@@ -58,13 +58,13 @@ public void testDefaultTriggerWithFixedWindow() throws Exception {
     // Advance watermark to 9 (the exact end of the window), which causes the first fixed window to
     // be emitted
     assertThat(tester.advanceWatermark(new Instant(9)), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 0, 10)));
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
 
     // Advance watermark to 100, which causes the remaining two windows to be emitted.
     // Since their timers were at different timestamps, they should fire in order.
     assertThat(tester.advanceWatermark(new Instant(100)), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 10, 20),
-        isSingleWindowedValue(Matchers.contains(5), 30, 40)));
+        isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 15, 10, 20),
+        isSingleWindowedValue(Matchers.contains(5), 30, 30, 40)));
   }
 
   @Test
@@ -84,7 +84,7 @@ public void testDefaultTriggerWithSessionWindow() throws Exception {
     tester.injectElement(4, new Instant(30));
 
     assertThat(tester.advanceWatermark(new Instant(100)), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 25),
-        isSingleWindowedValue(Matchers.contains(4), 30, 40)));
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 1, 25),
+        isSingleWindowedValue(Matchers.contains(4), 30, 30, 40)));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPaneTest.java
index 1750a1c9e2b3e..61482fb533c44 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPaneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPaneTest.java
@@ -64,11 +64,11 @@ public Instant apply(Instant input) {
     tester.injectElement(5, new Instant(30));
 
     assertThat(tester.advanceProcessingTime(new Instant(16)), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 0, 10)));
+        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10)));
 
     assertThat(tester.advanceProcessingTime(new Instant(19)), Matchers.containsInAnyOrder(
-        WindowMatchers.isSingleWindowedValue(Matchers.contains(4), 10, 20),
-        WindowMatchers.isSingleWindowedValue(Matchers.contains(5), 30, 40)));
+        WindowMatchers.isSingleWindowedValue(Matchers.contains(4), 19, 10, 20),
+        WindowMatchers.isSingleWindowedValue(Matchers.contains(5), 30, 30, 40)));
   }
 
   @Test
@@ -89,12 +89,12 @@ public Instant apply(Instant input) {
 
     tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 15
     assertThat(tester.advanceProcessingTime(new Instant(16)), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.contains(1), 1, 11)));
+        WindowMatchers.isSingleWindowedValue(Matchers.contains(1), 1, 1, 11)));
 
     // Because we discarded the previous window, we don't have it around to merge with.
     tester.injectElement(2, new Instant(2)); // in [2, 12), timer for 21
 
     assertThat(tester.advanceProcessingTime(new Instant(100)), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.contains(2), 2, 12)));
+        WindowMatchers.isSingleWindowedValue(Matchers.contains(2), 2, 2, 12)));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index 2c46f757c2481..293bece399231 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -105,14 +105,14 @@ public class GroupAlsoByWindowsDoFnTest {
     WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
     assertEquals("k", item0.getValue().getKey());
     assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v1", "v2"));
-    assertEquals(new Instant(9), item0.getTimestamp());
+    assertEquals(new Instant(1), item0.getTimestamp());
     assertThat(item0.getWindows(),
         Matchers.contains(window(0, 10)));
 
     WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
     assertEquals("k", item1.getValue().getKey());
     assertThat(item1.getValue().getValue(), Matchers.contains("v3"));
-    assertEquals(new Instant(19), item1.getTimestamp());
+    assertEquals(new Instant(13), item1.getTimestamp());
     assertThat(item1.getWindows(),
         Matchers.contains(window(10, 20)));
   }
@@ -144,21 +144,21 @@ public class GroupAlsoByWindowsDoFnTest {
     WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
     assertEquals("k", item0.getValue().getKey());
     assertThat(item0.getValue().getValue(), Matchers.contains("v1"));
-    assertEquals(new Instant(9), item0.getTimestamp());
+    assertEquals(new Instant(5), item0.getTimestamp());
     assertThat(item0.getWindows(),
         Matchers.contains(window(-10, 10)));
 
     WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
     assertEquals("k", item1.getValue().getKey());
     assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v1", "v2"));
-    assertEquals(new Instant(19), item1.getTimestamp());
+    assertEquals(new Instant(5), item1.getTimestamp());
     assertThat(item1.getWindows(),
         Matchers.contains(window(0, 20)));
 
     WindowedValue<KV<String, Iterable<String>>> item2 = result.get(2);
     assertEquals("k", item2.getValue().getKey());
     assertThat(item2.getValue().getValue(), Matchers.contains("v2"));
-    assertEquals(new Instant(29), item2.getTimestamp());
+    assertEquals(new Instant(15), item2.getTimestamp());
     assertThat(item2.getWindows(),
         Matchers.contains(window(10, 30)));
   }
@@ -194,7 +194,7 @@ public class GroupAlsoByWindowsDoFnTest {
     WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
     assertEquals("k", item0.getValue().getKey());
     assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v1", "v3"));
-    assertEquals(new Instant(4), item0.getTimestamp());
+    assertEquals(new Instant(1), item0.getTimestamp());
     assertThat(item0.getWindows(),
         Matchers.contains(window(0, 5)));
 
@@ -237,14 +237,14 @@ public class GroupAlsoByWindowsDoFnTest {
     WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
     assertEquals("k", item0.getValue().getKey());
     assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v1", "v2"));
-    assertEquals(new Instant(14), item0.getTimestamp());
+    assertEquals(new Instant(0), item0.getTimestamp());
     assertThat(item0.getWindows(),
         Matchers.contains(window(0, 15)));
 
     WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
     assertEquals("k", item1.getValue().getKey());
     assertThat(item1.getValue().getValue(), Matchers.contains("v3"));
-    assertEquals(new Instant(24), item1.getTimestamp());
+    assertEquals(new Instant(15), item1.getTimestamp());
     assertThat(item1.getWindows(),
         Matchers.contains(window(15, 25)));
   }
@@ -281,13 +281,13 @@ public class GroupAlsoByWindowsDoFnTest {
     WindowedValue<KV<String, Long>> item0 = result.get(0);
     assertEquals("k", item0.getValue().getKey());
     assertEquals(3L, item0.getValue().getValue().longValue());
-    assertEquals(new Instant(14), item0.getTimestamp());
+    assertEquals(new Instant(0), item0.getTimestamp());
     assertThat(item0.getWindows(), Matchers.contains(window(0, 15)));
 
     WindowedValue<KV<String, Long>> item1 = result.get(1);
     assertEquals("k", item1.getValue().getKey());
     assertEquals(4L, item1.getValue().getValue().longValue());
-    assertEquals(new Instant(24), item1.getTimestamp());
+    assertEquals(new Instant(15), item1.getTimestamp());
     assertThat(item1.getWindows(), Matchers.contains(window(15, 25)));
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
index 0751fda0a5dc4..d6a7ed5e71702 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
@@ -37,10 +37,12 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 import com.google.protobuf.ByteString;
 
+import org.joda.time.Instant;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -138,7 +140,7 @@ public void testFetchList() throws Exception {
                         .setData(ByteString.copyFromUtf8("data1"))
                         .build())
                     .addValues(Windmill.Value.newBuilder()
-                        .setTimestamp(1)
+                        .setTimestamp(1000)
                         .setData(ByteString.copyFromUtf8("data2"))
                         .build())
                     .build())
@@ -146,7 +148,7 @@ public void testFetchList() throws Exception {
             .build())
         .build());
 
-    List<String> data =
+    List<TimestampedValue<String>> data =
         fetcher.fetchList("computation", ByteString.copyFromUtf8("key"), 17L, "p:",
             CodedTupleTag.of("tag1", StringUtf8Coder.of()));
 
@@ -165,7 +167,9 @@ public void testFetchList() throws Exception {
             .build())
         .build());
 
-    assertThat(data, contains("data1", "data2"));
+    assertThat(data, contains(
+        TimestampedValue.of("data1", new Instant(0)),
+        TimestampedValue.of("data2", new Instant(1))));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index e0ce66d5926d4..3ad061200a566 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -26,7 +26,6 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
@@ -125,13 +124,13 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
     WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
     assertEquals("k", item0.getValue().getKey());
     assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1", "v2"));
-    assertEquals(new Instant(9), item0.getTimestamp());
+    assertEquals(new Instant(0), item0.getTimestamp());
     assertThat(item0.getWindows(), Matchers.contains(window(0, 10)));
 
     WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
     assertEquals("k", item1.getValue().getKey());
     assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v3"));
-    assertEquals(new Instant(19), item1.getTimestamp());
+    assertEquals(new Instant(13), item1.getTimestamp());
     assertThat(item1.getWindows(), Matchers.contains(window(10, 20)));
   }
 
@@ -185,19 +184,19 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
     WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
     assertEquals("k", item0.getValue().getKey());
     assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1"));
-    assertEquals(new Instant(9), item0.getTimestamp());
+    assertEquals(new Instant(2), item0.getTimestamp());
     assertThat(item0.getWindows(), Matchers.contains(window(-10, 10)));
 
     WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
     assertEquals("k", item1.getValue().getKey());
     assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1", "v2"));
-    assertEquals(new Instant(19), item1.getTimestamp());
+    assertEquals(new Instant(2), item1.getTimestamp());
     assertThat(item1.getWindows(), Matchers.contains(window(0, 20)));
 
     WindowedValue<KV<String, Iterable<String>>> item2 = result.get(2);
     assertEquals("k", item2.getValue().getKey());
     assertThat(item2.getValue().getValue(), Matchers.containsInAnyOrder("v2"));
-    assertEquals(new Instant(29), item2.getTimestamp());
+    assertEquals(new Instant(5), item2.getTimestamp());
     assertThat(item2.getWindows(), Matchers.contains(window(10, 30)));
   }
 
@@ -256,18 +255,50 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
     WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
     assertEquals("k", item0.getValue().getKey());
     assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1", "v2"));
-    assertEquals(new Instant(14), item0.getTimestamp());
+    assertEquals(new Instant(0), item0.getTimestamp());
     assertThat(item0.getWindows(), Matchers.contains(window(0, 15)));
 
     WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
     assertEquals("k", item1.getValue().getKey());
     assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v3"));
-    assertEquals(new Instant(24), item1.getTimestamp());
+    assertEquals(new Instant(15), item1.getTimestamp());
     assertThat(item1.getWindows(), Matchers.contains(window(15, 25)));
   }
 
+  /**
+   * A custom combine fn that doesn't take any performace shortcuts
+   * to ensure that we are using the CombineFn API properly.
+   */
+  private static class SumLongs extends CombineFn<Long, Long, Long> {
+    private static final long serialVersionUID = 0L;
+
+    @Override
+    public Long createAccumulator() {
+      return 0L;
+    }
+
+    @Override
+    public Long addInput(Long accumulator, Long input) {
+      return accumulator + input;
+    }
+
+    @Override
+    public Long mergeAccumulators(Iterable<Long> accumulators) {
+      Long sum = 0L;
+      for (Long value : accumulators) {
+        sum += value;
+      }
+      return sum;
+    }
+
+    @Override
+    public Long extractOutput(Long accumulator) {
+      return new Long(accumulator);
+    }
+  }
+
   @Test public void testSessionsCombine() throws Exception {
-    CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
+    CombineFn<Long, ?, Long> combineFn = new SumLongs();
     DoFnRunner<TimerOrElement<KV<String, Long>>,
         KV<String, Long>, List> runner =
         makeRunner(Sessions.withGapDuration(Duration.millis(10)),
@@ -325,13 +356,13 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
     WindowedValue<KV<String, Long>> item0 = result.get(0);
     assertEquals("k", item0.getValue().getKey());
     assertEquals((Long) 7L, item0.getValue().getValue());
-    assertEquals(new Instant(14), item0.getTimestamp());
+    assertEquals(new Instant(0), item0.getTimestamp());
     assertThat(item0.getWindows(), Matchers.contains(window(0, 15)));
 
     WindowedValue<KV<String, Long>> item1 = result.get(1);
     assertEquals("k", item1.getValue().getKey());
     assertEquals((Long) 3L, item1.getValue().getValue());
-    assertEquals(new Instant(24), item1.getTimestamp());
+    assertEquals(new Instant(15), item1.getTimestamp());
     assertThat(item1.getWindows(), Matchers.contains(window(15, 25)));
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
index 9ac9ce002e8d6..41e1e22eca8d3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
@@ -37,6 +37,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.protobuf.ByteString;
 
@@ -148,7 +149,7 @@ public void testSideInputNotification() throws Exception {
     when(execContext.getSideInput(eq(view), eq(window), any(PTuple.class)))
         .thenReturn("data");
     when(stepContext.readTagList(any(CodedTupleTag.class))).thenReturn(
-        Arrays.asList(createDatum("e", 0)));
+        Arrays.asList(TimestampedValue.of(createDatum("e", 0), new Instant(0))));
 
     StreamingSideInputDoFnRunner<String, String, List, IntervalWindow> runner =
         createRunner(Arrays.asList(view));
@@ -188,7 +189,7 @@ public void testMultipleSideInputs() throws Exception {
     when(execContext.getSideInput(eq(view2), eq(window), any(PTuple.class)))
         .thenReturn("data2");
     when(stepContext.readTagList(any(CodedTupleTag.class))).thenReturn(
-        Arrays.asList(createDatum("e1", 0)));
+        Arrays.asList(TimestampedValue.of(createDatum("e1", 0), new Instant(0))));
 
     StreamingSideInputDoFnRunner<String, String, List, IntervalWindow> runner =
         createRunner(Arrays.asList(view1, view2));

From 182aca472ad274a4c9521181a3d58eb3d830deb6 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Fri, 3 Apr 2015 15:58:21 -0700
Subject: [PATCH 0367/1541] Eliminates user-visible application state of
 PTransforms.

In particular, getPipeline(), getInput(), getOutput(), and getCoderRegistry() are now
private (soon to be deleted).  Coder inference methods have been augmented
to provide the needed data in their arguments.  The pipeline, if required, can be obtained from the input in apply().

----Release Notes----

PTransform.getInput(), getOutput(), getPipeline(), and getCoderRegistry() are now private (soon to be deleted).

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90289223
---
 .../google/cloud/dataflow/examples/TfIdf.java |   2 +-
 .../google/cloud/dataflow/sdk/Pipeline.java   |   3 +
 .../google/cloud/dataflow/sdk/io/AvroIO.java  |   5 +-
 .../cloud/dataflow/sdk/io/BigQueryIO.java     |   6 +-
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |   2 +-
 .../google/cloud/dataflow/sdk/io/TextIO.java  |   4 +-
 .../runners/DataflowPipelineTranslator.java   | 101 ++++++++++++++----
 .../sdk/runners/DirectPipelineRunner.java     |  41 ++++++-
 .../sdk/runners/TransformTreeNode.java        |   2 +-
 .../runners/dataflow/AvroIOTranslator.java    |   4 +-
 .../BasicSerializableSourceFormat.java        |   4 +-
 .../dataflow/BigQueryIOTranslator.java        |   4 +-
 .../runners/dataflow/PubsubIOTranslator.java  |   6 +-
 .../runners/dataflow/TextIOTranslator.java    |   4 +-
 .../sdk/transforms/AppliedPTransform.java     |  45 ++++++++
 .../dataflow/sdk/transforms/Combine.java      |  23 ++--
 .../cloud/dataflow/sdk/transforms/Create.java |  20 ++--
 .../dataflow/sdk/transforms/Flatten.java      |  14 +--
 .../dataflow/sdk/transforms/GroupByKey.java   |  70 ++++++------
 .../dataflow/sdk/transforms/PTransform.java   |  56 +++++-----
 .../cloud/dataflow/sdk/transforms/ParDo.java  |  19 ++--
 .../cloud/dataflow/sdk/transforms/View.java   |   4 +-
 .../dataflow/sdk/transforms/WithKeys.java     |   6 +-
 .../sdk/transforms/join/CoGroupByKey.java     |   2 +-
 .../sdk/transforms/windowing/Window.java      |  47 +-------
 .../cloud/dataflow/sdk/values/PValueBase.java |   2 +-
 .../dataflow/sdk/values/TypedPValue.java      |   8 +-
 .../runners/DataflowPipelineRunnerTest.java   |   6 +-
 .../DataflowPipelineTranslatorTest.java       |   2 +-
 29 files changed, 313 insertions(+), 199 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
index 04d58dbba94a2..66adff6b599f5 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
@@ -162,7 +162,7 @@ public Coder<?> getDefaultOutputCoder() {
 
     @Override
     public PCollection<KV<URI, String>> apply(PInput input) {
-      Pipeline pipeline = getPipeline();
+      Pipeline pipeline = input.getPipeline();
 
       // Create one TextIO.Read transform for each document
       // and add its output to a PCollectionList
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index dade39c8e9bc5..13066d90adb7d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -266,6 +266,7 @@ Output applyInternal(Input input,
     LOG.debug("Adding {} to {}", transform, this);
     try {
       transforms.pushNode(child);
+      transform.validate(input);
       Output output = runner.apply(transform, input);
       transforms.setOutput(child, output);
 
@@ -332,6 +333,7 @@ public PipelineOptions getOptions() {
    *
    * @throws IllegalStateException if the transform has not been applied to the pipeline.
    */
+  @Deprecated
   public POutput getOutput(PTransform<?, ?> transform) {
     TransformTreeNode node = transforms.getNode(transform);
     Preconditions.checkState(node != null,
@@ -344,6 +346,7 @@ public POutput getOutput(PTransform<?, ?> transform) {
    *
    * @throws IllegalStateException if the transform has not been applied to the pipeline.
    */
+  @Deprecated
   public PInput getInput(PTransform<?, ?> transform) {
     TransformTreeNode node = transforms.getNode(transform);
     Preconditions.checkState(node != null,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index 907ec2a9ebac2..636a88a262648 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -705,12 +705,13 @@ private static <T> void evaluateReadHelper(
     for (WindowedValue<T> elem : elems) {
       output.add(ValueWithMetadata.of(elem));
     }
-    context.setPCollectionValuesWithMetadata(transform.getOutput(), output);
+    context.setPCollectionValuesWithMetadata(context.getOutput(transform), output);
   }
 
   private static <T> void evaluateWriteHelper(
       Write.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
-    List<WindowedValue<T>> elems = context.getPCollectionWindowedValues(transform.getInput());
+    List<WindowedValue<T>> elems =
+        context.getPCollectionWindowedValues(context.getInput(transform));
     int numShards = transform.numShards;
     if (numShards < 1) {
       // System gets to choose.  For direct mode, choose 1.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 290ff3c150774..7ca6c0d6be0c3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -581,7 +581,7 @@ public PDone apply(PCollection<TableRow> input) {
         }
 
         // In streaming, BigQuery write is taken care of by StreamWithDeDup transform.
-        BigQueryOptions options = getPipeline().getOptions().as(BigQueryOptions.class);
+        BigQueryOptions options = input.getPipeline().getOptions().as(BigQueryOptions.class);
         if (options.isStreaming()) {
           return input.apply(new StreamWithDeDup(table, schema));
         }
@@ -846,7 +846,7 @@ private static void evaluateReadHelper(
     List<WindowedValue<TableRow>> elems =
         ReaderUtils.readElemsFromReader(new BigQueryReader(client, ref));
     LOG.info("Number of records read from BigQuery: {}", elems.size());
-    context.setPCollectionWindowedValue(transform.getOutput(), elems);
+    context.setPCollectionWindowedValue(context.getOutput(transform), elems);
   }
 
   /**
@@ -872,7 +872,7 @@ private static void evaluateWriteHelper(
       inserter.getOrCreateTable(
           transform.writeDisposition, transform.createDisposition, transform.schema);
 
-      List<TableRow> tableRows = context.getPCollection(transform.getInput());
+      List<TableRow> tableRows = context.getPCollection(context.getInput(transform));
       inserter.insertAll(tableRows);
     } catch (IOException e) {
       throw new RuntimeException(e);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index cba9fdb0724cb..4cf1e72e26c10 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -486,7 +486,7 @@ private static void evaluateWriteHelper(
             .initializer(new RetryHttpRequestInitializer(null))
             .build());
 
-    List<Entity> entityList = context.getPCollection(transform.getInput());
+    List<Entity> entityList = context.getPCollection(context.getInput(transform));
 
     // Create a map to put entities with same ancestor for writing in a batch.
     HashMap<String, List<Entity>> map = new HashMap<>();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 11d423a7f0899..9d9e3b9d40602 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -712,12 +712,12 @@ private static <T> void evaluateReadHelper(
         new TextReader<>(transform.filepattern, true, null, null, transform.coder,
             transform.getCompressionType());
     List<T> elems = ReaderUtils.readElemsFromReader(reader);
-    context.setPCollection(transform.getOutput(), elems);
+    context.setPCollection(context.getOutput(transform), elems);
   }
 
   private static <T> void evaluateWriteHelper(
       Write.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
-    List<T> elems = context.getPCollection(transform.getInput());
+    List<T> elems = context.getPCollection(context.getInput(transform));
     int numShards = transform.numShards;
     if (numShards < 1) {
       // System gets to choose.  For direct mode, choose 1.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index ac562d1964454..1b5ebe05694a5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -27,6 +27,7 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.addObject;
 import static com.google.cloud.dataflow.sdk.util.Structs.addString;
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+import static com.google.common.base.Preconditions.checkArgument;
 
 import com.google.api.client.util.Preconditions;
 import com.google.api.services.dataflow.model.AutoscalingSettings;
@@ -58,6 +59,7 @@
 import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.ReadSourceTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.TextIOTranslator;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
@@ -66,6 +68,7 @@
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
@@ -198,6 +201,16 @@ public interface TranslationContext {
      */
     DataflowPipelineOptions getPipelineOptions();
 
+    /**
+     * Returns the input of the currently being translated transform.
+     */
+    <Input extends PInput> Input getInput(PTransform<Input, ?> transform);
+
+    /**
+     * Returns the output of the currently being translated transform.
+     */
+    <Output extends POutput> Output getOutput(PTransform<?, Output> transform);
+
     /**
      * Adds a step to the Dataflow workflow for the given transform, with
      * the given Dataflow step type.
@@ -323,6 +336,11 @@ class Translator implements PipelineVisitor, TranslationContext {
      */
     private final Map<POutput, Coder<?>> outputCoders = new HashMap<>();
 
+    /**
+     * The transform currently being applied.
+     */
+    private AppliedPTransform<?, ?, ?> currentTransform;
+
     /**
      * Constructs a Translator that will translate the specified
      * Pipeline into Dataflow objects.
@@ -439,6 +457,20 @@ public DataflowPipelineOptions getPipelineOptions() {
       return options;
     }
 
+    @Override
+    public <Input extends PInput> Input getInput(PTransform<Input, ?> transform) {
+      checkArgument(currentTransform != null && currentTransform.transform == transform,
+          "can only be called with current transform");
+      return (Input) currentTransform.input;
+    }
+
+    @Override
+    public <Output extends POutput> Output getOutput(PTransform<?, Output> transform) {
+      checkArgument(currentTransform != null && currentTransform.transform == transform,
+          "can only be called with current transform");
+      return (Output) currentTransform.output;
+    }
+
     @Override
     public void enterCompositeTransform(TransformTreeNode node) {
     }
@@ -458,7 +490,10 @@ public void visitTransform(TransformTreeNode node) {
             "no translator registered for " + transform);
       }
       LOG.debug("Translating {}", transform);
+      currentTransform = AppliedPTransform.of(
+          node.getInput(), node.getOutput(), (PTransform) transform);
       translator.translate(transform, this);
+      currentTransform = null;
     }
 
     @Override
@@ -746,11 +781,11 @@ private <R, T> void translateTyped(
               View.CreatePCollectionView<R, T> transform,
               TranslationContext context) {
             context.addStep(transform, "CollectionToSingleton");
-            context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
+            context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
             context.addCollectionToSingletonOutput(
                 PropertyNames.OUTPUT,
-                transform.getInput(),
-                transform.getOutput());
+                context.getInput(transform),
+                context.getOutput(transform));
           }
         });
 
@@ -769,12 +804,14 @@ private <K, VI, VO> void translateHelper(
               final Combine.GroupedValues<K, VI, VO> transform,
               DataflowPipelineTranslator.TranslationContext context) {
             context.addStep(transform, "CombineValues");
-            context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
+            context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
             context.addInput(
                 PropertyNames.SERIALIZED_FN,
                 byteArrayToJsonString(serializeToByteArray(transform.getFn())));
-            context.addEncodingInput(transform.getAccumulatorCoder());
-            context.addOutput(PropertyNames.OUTPUT, transform.getOutput());
+            context.addEncodingInput(transform.getAccumulatorCoder(
+                context.getInput(transform).getPipeline().getCoderRegistry(),
+                context.getInput(transform)));
+            context.addOutput(PropertyNames.OUTPUT, context.getOutput(transform));
           }
         });
 
@@ -793,7 +830,7 @@ private <T> void createHelper(
               TranslationContext context) {
             context.addStep(transform, "CreateCollection");
 
-            Coder<T> coder = transform.getOutput().getCoder();
+            Coder<T> coder = context.getOutput(transform).getCoder();
             List<CloudObject> elements = new LinkedList<>();
             for (T elem : transform.getElements()) {
               byte[] encodedBytes;
@@ -813,7 +850,7 @@ private <T> void createHelper(
               elements.add(CloudObject.forString(encodedJson));
             }
             context.addInput(PropertyNames.ELEMENT, elements);
-            context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
+            context.addValueOnlyOutput(PropertyNames.OUTPUT, context.getOutput(transform));
           }
         });
 
@@ -833,11 +870,11 @@ private <T> void flattenHelper(
             context.addStep(transform, "Flatten");
 
             List<OutputReference> inputs = new LinkedList<>();
-            for (PCollection<T> input : transform.getInput().getAll()) {
+            for (PCollection<T> input : context.getInput(transform).getAll()) {
               inputs.add(context.asOutputReference(input));
             }
             context.addInput(PropertyNames.INPUTS, inputs);
-            context.addOutput(PropertyNames.OUTPUT, transform.getOutput());
+            context.addOutput(PropertyNames.OUTPUT, context.getOutput(transform));
           }
         });
 
@@ -855,8 +892,8 @@ private <K, V> void groupByKeyHelper(
               GroupByKeyOnly<K, V> transform,
               TranslationContext context) {
             context.addStep(transform, "GroupByKey");
-            context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
-            context.addOutput(PropertyNames.OUTPUT, transform.getOutput());
+            context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
+            context.addOutput(PropertyNames.OUTPUT, context.getOutput(transform));
             context.addInput(
                 PropertyNames.DISALLOW_COMBINER_LIFTING, transform.disallowCombinerLifting());
             // TODO: sortsValues
@@ -877,10 +914,10 @@ private <I, O> void translateMultiHelper(
               ParDo.BoundMulti<I, O> transform,
               TranslationContext context) {
             context.addStep(transform, "ParallelDo");
-            translateInputs(transform.getInput(), transform.getSideInputs(), context);
-            translateFn(transform.getFn(), transform.getInput().getWindowFn(),
-                transform.getSideInputs(), transform.getInput().getCoder(), context);
-            translateOutputs(transform.getOutput(), context);
+            translateInputs(context.getInput(transform), transform.getSideInputs(), context);
+            translateFn(transform.getFn(), context.getInput(transform).getWindowFn(),
+                transform.getSideInputs(), context.getInput(transform).getCoder(), context);
+            translateOutputs(context.getOutput(transform), context);
           }
         });
 
@@ -898,10 +935,34 @@ private <I, O> void translateSingleHelper(
               ParDo.Bound<I, O> transform,
               TranslationContext context) {
             context.addStep(transform, "ParallelDo");
-            translateInputs(transform.getInput(), transform.getSideInputs(), context);
-            translateFn(transform.getFn(), transform.getInput().getWindowFn(),
-                transform.getSideInputs(), transform.getInput().getCoder(), context);
-            context.addOutput("out", transform.getOutput());
+            translateInputs(context.getInput(transform), transform.getSideInputs(), context);
+            translateFn(transform.getFn(), context.getInput(transform).getWindowFn(),
+                transform.getSideInputs(), context.getInput(transform).getCoder(), context);
+            context.addOutput("out", context.getOutput(transform));
+          }
+        });
+
+
+    registerTransformTranslator(
+        Window.Bound.class,
+        new DataflowPipelineTranslator.TransformTranslator<Window.Bound>() {
+          @Override
+          public void translate(
+              Window.Bound transform, TranslationContext context) {
+            translateHelper(transform, context);
+          }
+
+          private <T> void translateHelper(
+              Window.Bound<T> transform, TranslationContext context) {
+            context.addStep(transform, "Bucket");
+            context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
+            context.addOutput(PropertyNames.OUTPUT, context.getOutput(transform));
+
+            byte[] serializedBytes = serializeToByteArray(transform.getWindowFn());
+            String serializedJson = byteArrayToJsonString(serializedBytes);
+            assert Arrays.equals(serializedBytes,
+                                 jsonStringToByteArray(serializedJson));
+            context.addInput(PropertyNames.SERIALIZED_FN, serializedJson);
           }
         });
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 764340574efc0..e032b6259d863 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.runners;
 
+import static com.google.common.base.Preconditions.checkArgument;
+
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
 import com.google.cloud.dataflow.sdk.PipelineResult;
@@ -25,6 +27,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
@@ -217,8 +220,8 @@ public <Output extends POutput, Input extends PInput> Output apply(
   private <K, VI, VA, VO> PCollection<KV<K, VO>> applyTestCombine(
       Combine.GroupedValues<K, VI, VO> transform,
       PCollection<KV<K, Iterable<VI>>> input) {
-    return input.apply(ParDo.of(TestCombineDoFn.create(transform, testSerializability)))
-                .setCoder(transform.getDefaultOutputCoder());
+    return input.apply(ParDo.of(TestCombineDoFn.create(transform, input, testSerializability)))
+                .setCoder(transform.getDefaultOutputCoder(input));
   }
 
   /**
@@ -243,9 +246,13 @@ public static class TestCombineDoFn<K, VI, VA, VO>
     @SuppressWarnings({"unchecked", "rawtypes"})
     public static <K, VI, VA, VO> TestCombineDoFn<K, VI, VA, VO> create(
         Combine.GroupedValues<K, VI, VO> transform,
+        PCollection<KV<K, Iterable<VI>>> input,
         boolean testSerializability) {
       return new TestCombineDoFn(
-          transform.getFn(), transform.getAccumulatorCoder(), testSerializability);
+          transform.getFn(),
+          transform.getAccumulatorCoder(
+              input.getPipeline().getCoderRegistry(), input),
+          testSerializability);
     }
 
     public TestCombineDoFn(
@@ -480,6 +487,16 @@ public interface EvaluationContext extends EvaluationResults {
      */
     DirectPipelineOptions getPipelineOptions();
 
+    /**
+     * Returns the input of the currently being processed transform.
+     */
+    <Input extends PInput> Input getInput(PTransform<Input, ?> transform);
+
+    /**
+     * Returns the output of the currently being processed transform.
+     */
+    <Output extends POutput> Output getOutput(PTransform<?, Output> transform);
+
     /**
      * Sets the value of the given PCollection, where each element also has a timestamp
      * and collection of windows.
@@ -579,6 +596,7 @@ class Evaluator implements PipelineVisitor, EvaluationContext {
     private final Map<PTransform, String> stepNames = new HashMap<>();
     private final Map<PValue, Object> store = new HashMap<>();
     private final CounterSet counters = new CounterSet();
+    private AppliedPTransform<?, ?, ?> currentTransform;
 
     // Use a random number generator with a fixed seed, so execution
     // using this evaluator is deterministic.  (If the user-defined
@@ -596,6 +614,20 @@ public DirectPipelineOptions getPipelineOptions() {
       return options;
     }
 
+    @Override
+    public <Input extends PInput> Input getInput(PTransform<Input, ?> transform) {
+      checkArgument(currentTransform != null && currentTransform.transform == transform,
+          "can only be called with current transform");
+      return (Input) currentTransform.input;
+    }
+
+    @Override
+    public <Output extends POutput> Output getOutput(PTransform<?, Output> transform) {
+      checkArgument(currentTransform != null && currentTransform.transform == transform,
+          "can only be called with current transform");
+      return (Output) currentTransform.output;
+    }
+
     @Override
     public void enterCompositeTransform(TransformTreeNode node) {
     }
@@ -615,7 +647,10 @@ public void visitTransform(TransformTreeNode node) {
             "no evaluator registered for " + transform);
       }
       LOG.debug("Evaluating {}", transform);
+      currentTransform = AppliedPTransform.of(
+          node.getInput(), node.getOutput(), (PTransform) transform);
       evaluator.evaluate(transform, this);
+      currentTransform = null;
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
index fc88cc4374769..d1d6a0f537274 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
@@ -227,7 +227,7 @@ public void finishSpecifying() {
     }
 
     if (transform != null) {
-      transform.finishSpecifying();
+      transform.finishSpecifyingInternal();
     }
 
     if (output != null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
index 23ad587cec86d..227fbf67ba3ad 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
@@ -56,7 +56,7 @@ private <T> void translateReadHelper(
       context.addStep(transform, "ParallelRead");
       context.addInput(PropertyNames.FORMAT, "avro");
       context.addInput(PropertyNames.FILEPATTERN, filepattern);
-      context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
+      context.addValueOnlyOutput(PropertyNames.OUTPUT, context.getOutput(transform));
       context.addInput(PropertyNames.VALIDATE_SOURCE, transform.needsValidation());
     }
   }
@@ -81,7 +81,7 @@ private <T> void translateWriteHelper(
       String filenamePrefix = validator.validateOutputFilePrefixSupported(
           transform.getFilenamePrefix());
       context.addStep(transform, "ParallelWrite");
-      context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
+      context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
 
       // TODO: drop this check when server supports alternative templates.
       switch (transform.getShardTemplate()) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index 06805a53cb169..17b47b988e3f5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -266,7 +266,7 @@ public static <T> void evaluateReadHelper(
         output.add(DirectPipelineRunner.ValueWithMetadata.of(
             WindowedValue.valueInGlobalWindow(elem)));
       }
-      context.setPCollectionValuesWithMetadata(transform.getOutput(), output);
+      context.setPCollectionValuesWithMetadata(context.getOutput(transform), output);
     } catch (Exception e) {
       throw new RuntimeException(e);
     }
@@ -281,7 +281,7 @@ public static <T> void translateReadHelper(
           PropertyNames.SOURCE_STEP_INPUT,
           cloudSourceToDictionary(
               serializeToCloudSource(transform.getSource(), context.getPipelineOptions())));
-      context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
+      context.addValueOnlyOutput(PropertyNames.OUTPUT, context.getOutput(transform));
     } catch (Exception e) {
       throw new RuntimeException(e);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
index 313aba3e6e03a..ddb90c84b6f04 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
@@ -68,7 +68,7 @@ public void translate(BigQueryIO.Read.Bound transform,
       if (table.getProjectId() != null) {
         context.addInput(PropertyNames.BIGQUERY_PROJECT, table.getProjectId());
       }
-      context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
+      context.addValueOnlyOutput(PropertyNames.OUTPUT, context.getOutput(transform));
     }
   }
 
@@ -133,7 +133,7 @@ public void translate(BigQueryIO.Write.Bound transform,
       // Set sink encoding to TableRowJsonCoder.
       context.addEncodingInput(
           WindowedValue.getValueOnlyCoder(TableRowJsonCoder.of()));
-      context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
+      context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
index 6ff2deb090ffb..4280355b6af93 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
@@ -66,7 +66,7 @@ private void translateReadHelper(
       if (transform.getIdLabel() != null) {
         context.addInput(PropertyNames.PUBSUB_ID_LABEL, transform.getIdLabel());
       }
-      context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
+      context.addValueOnlyOutput(PropertyNames.OUTPUT, context.getOutput(transform));
     }
   }
 
@@ -98,8 +98,8 @@ private void translateWriteHelper(
         context.addInput(PropertyNames.PUBSUB_ID_LABEL, transform.getIdLabel());
       }
       context.addEncodingInput(
-          WindowedValue.getValueOnlyCoder(transform.getInput().getCoder()));
-      context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
+          WindowedValue.getValueOnlyCoder(context.getInput(transform).getCoder()));
+      context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
index 57729b546b809..f208dd0bbef08 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
@@ -57,7 +57,7 @@ private <T> void translateReadHelper(
       // format-specific properties?
       context.addInput(PropertyNames.FORMAT, "text");
       context.addInput(PropertyNames.FILEPATTERN, filepattern);
-      context.addValueOnlyOutput(PropertyNames.OUTPUT, transform.getOutput());
+      context.addValueOnlyOutput(PropertyNames.OUTPUT, context.getOutput(transform));
       context.addInput(PropertyNames.VALIDATE_SOURCE, transform.needsValidation());
       context.addInput(PropertyNames.COMPRESSION_TYPE, transform.getCompressionType().toString());
     }
@@ -87,7 +87,7 @@ private <T> void translateWriteHelper(
           transform.getFilenamePrefix());
 
       context.addStep(transform, "ParallelWrite");
-      context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
+      context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
 
       // TODO: drop this check when server supports alternative templates.
       switch (transform.getShardTemplate()) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java
new file mode 100644
index 0000000000000..60fcd2a6f4f92
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.cloud.dataflow.sdk.values.POutput;
+
+/**
+ * Represents the application of this transform to a specific input to produce
+ * a specific output.
+ *
+ * @param <Input> transform input type
+ * @param <Output> transform output type
+ * @param <PT> transform type
+ */
+public class AppliedPTransform
+    <Input extends PInput, Output extends POutput, PT extends PTransform<Input, Output>> {
+  public final Input input;
+  public final Output output;
+  public final PT transform;
+  public AppliedPTransform(Input input, Output output, PT transform) {
+    this.input = input;
+    this.output = output;
+    this.transform = transform;
+  }
+
+  public static <Input extends PInput, Output extends POutput, PT extends PTransform<Input, Output>>
+  AppliedPTransform<Input, Output, PT> of(Input input, Output output, PT transform) {
+    return new AppliedPTransform<Input, Output, PT>(input, output, transform);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index dad57b02db23f..d768f51fbd1af 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -1657,7 +1657,7 @@ private GroupedValues(KeyedCombineFn<? super K, ? super VI, ?, VO> fn) {
     @Override
     public PCollection<KV<K, VO>> apply(
         PCollection<? extends KV<K, ? extends Iterable<VI>>> input) {
-      Coder<KV<K, VO>> outputCoder = getDefaultOutputCoder();
+      Coder<KV<K, VO>> outputCoder = getDefaultOutputCoder(input);
       return input.apply(ParDo.of(
           new DoFn<KV<K, ? extends Iterable<VI>>, KV<K, VO>>() {
             @Override
@@ -1668,9 +1668,12 @@ public void processElement(ProcessContext c) {
           })).setCoder(outputCoder);
     }
 
-    private KvCoder<K, VI> getKvCoder() {
+    private KvCoder<K, VI> getKvCoder(
+        Coder<? extends KV<K, ? extends Iterable<VI>>> inputCoder) {
+      /*
       Coder<? extends KV<K, ? extends Iterable<VI>>> inputCoder =
           getInput().getCoder();
+          */
       if (!(inputCoder instanceof KvCoder)) {
         throw new IllegalStateException(
             "Combine.GroupedValues requires its input to use KvCoder");
@@ -1691,19 +1694,23 @@ private KvCoder<K, VI> getKvCoder() {
     }
 
     @SuppressWarnings("unchecked")
-    public Coder<?> getAccumulatorCoder() {
-      KvCoder<K, VI> kvCoder = getKvCoder();
+    public Coder<?> getAccumulatorCoder(
+        CoderRegistry coderRegistry,
+        PCollection<? extends KV<K, ? extends Iterable<VI>>> input) {
+      KvCoder<K, VI> kvCoder = getKvCoder(input.getCoder());
       return ((KeyedCombineFn<K, VI, ?, VO>) fn).getAccumulatorCoder(
-          getCoderRegistry(), kvCoder.getKeyCoder(), kvCoder.getValueCoder());
+          coderRegistry, kvCoder.getKeyCoder(), kvCoder.getValueCoder());
     }
 
     @Override
-    public Coder<KV<K, VO>> getDefaultOutputCoder() {
-      KvCoder<K, VI> kvCoder = getKvCoder();
+    public Coder<KV<K, VO>> getDefaultOutputCoder(
+        PCollection<? extends KV<K, ? extends Iterable<VI>>> input) {
+      KvCoder<K, VI> kvCoder = getKvCoder(input.getCoder());
       @SuppressWarnings("unchecked")
       Coder<VO> outputValueCoder = ((KeyedCombineFn<K, VI, ?, VO>) fn)
           .getDefaultOutputCoder(
-              getCoderRegistry(), kvCoder.getKeyCoder(), kvCoder.getValueCoder());
+              input.getPipeline().getCoderRegistry(),
+              kvCoder.getKeyCoder(), kvCoder.getValueCoder());
       return KvCoder.of(kvCoder.getKeyCoder(), outputValueCoder);
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index 6cdb11b342b4e..bffafd17c23dc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
@@ -197,7 +198,8 @@ public PCollection<T> apply(PInput input) {
 
   public PCollection<T> applyHelper(PInput input, boolean isStreaming) {
     if (isStreaming) {
-      Coder<T> elemCoder = (Coder<T>) getElementCoder();
+      @SuppressWarnings("unchecked")
+      Coder<T> elemCoder = (Coder<T>) getElementCoder(input.getPipeline().getCoderRegistry());
       return Pipeline.applyTransform(
           input, PubsubIO.Read.named("StartingSignal").subscription("_starting_signal/"))
           .apply(ParDo.of(new DoFn<String, KV<Void, Void>>() {
@@ -266,7 +268,7 @@ public Iterable<T> getElements() {
     return elems;
   }
 
-  private Coder<?> getElementCoder() {
+  private Coder<?> getElementCoder(CoderRegistry coderRegistry) {
     // First try to deduce a coder using the types of the elements.
     Class<?> elementType = null;
     for (T elem : elems) {
@@ -283,7 +285,7 @@ private Coder<?> getElementCoder() {
       return null;
     }
     if (elementType.getTypeParameters().length == 0) {
-      Coder<?> candidate = getCoderRegistry().getDefaultCoder(TypeToken.of(elementType));
+      Coder<?> candidate = coderRegistry.getDefaultCoder(TypeToken.of(elementType));
       if (candidate != null) {
         return candidate;
       }
@@ -292,7 +294,7 @@ private Coder<?> getElementCoder() {
     // If that fails, try to deduce a coder using the elements themselves
     Coder<?> coder = null;
     for (T elem : elems) {
-      Coder<?> c = getCoderRegistry().getDefaultCoder(elem);
+      Coder<?> c = coderRegistry.getDefaultCoder(elem);
       if (coder == null) {
         coder = c;
       } else if (!Objects.equals(c, coder)) {
@@ -304,10 +306,10 @@ private Coder<?> getElementCoder() {
   }
 
   @Override
-  protected Coder<?> getDefaultOutputCoder() {
-    Coder<?> elemCoder = getElementCoder();
+  protected Coder<?> getDefaultOutputCoder(PInput input) {
+    Coder<?> elemCoder = getElementCoder(input.getPipeline().getCoderRegistry());
     if (elemCoder == null) {
-      return super.getDefaultOutputCoder();
+      return super.getDefaultOutputCoder(input);
     } else {
       return elemCoder;
     }
@@ -382,8 +384,8 @@ private static <T> void evaluateHelper(
     }
     for (T elem : transform.elems) {
       listElems.add(
-          context.ensureElementEncodable(transform.getOutput(), elem));
+          context.ensureElementEncodable(context.getOutput(transform), elem));
     }
-    context.setPCollection(transform.getOutput(), listElems);
+    context.setPCollection(context.getOutput(transform), listElems);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index 2f1b90413da07..dc35544f733dc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -114,9 +114,9 @@ private FlattenPCollectionList() { }
     @Override
     public PCollection<T> apply(PCollectionList<T> inputs) {
       WindowFn<?, ?> windowFn;
-      if (!getInput().getAll().isEmpty()) {
-        windowFn = getInput().get(0).getWindowFn();
-        for (PCollection<?> input : getInput().getAll()) {
+      if (!inputs.getAll().isEmpty()) {
+        windowFn = inputs.get(0).getWindowFn();
+        for (PCollection<?> input : inputs.getAll()) {
           if (!windowFn.isCompatible(input.getWindowFn())) {
             throw new IllegalStateException(
                 "Inputs to Flatten had incompatible window windowFns: "
@@ -131,8 +131,8 @@ public PCollection<T> apply(PCollectionList<T> inputs) {
     }
 
     @Override
-    protected Coder<?> getDefaultOutputCoder() {
-      List<PCollection<T>> inputs = getInput().getAll();
+    protected Coder<?> getDefaultOutputCoder(PCollectionList<T> input) {
+      List<PCollection<T>> inputs = input.getAll();
       if (inputs.isEmpty()) {
         // Cannot infer a Coder from an empty list of input PCollections.
         return null;
@@ -197,12 +197,12 @@ private static <T> void evaluateHelper(
       FlattenPCollectionList<T> transform,
       DirectPipelineRunner.EvaluationContext context) {
     List<DirectPipelineRunner.ValueWithMetadata<T>> outputElems = new ArrayList<>();
-    PCollectionList<T> inputs = transform.getInput();
+    PCollectionList<T> inputs = context.getInput(transform);
 
     for (PCollection<T> input : inputs.getAll()) {
       outputElems.addAll(context.getPCollectionValuesWithMetadata(input));
     }
 
-    context.setPCollectionValuesWithMetadata(transform.getOutput(), outputElems);
+    context.setPCollectionValuesWithMetadata(context.getOutput(transform), outputElems);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 1fd09bb90d9d8..a05deb955772c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -182,11 +182,11 @@ public static class ReifyTimestampsAndWindows<K, V>
     public PCollection<KV<K, WindowedValue<V>>> apply(
         PCollection<KV<K, V>> input) {
       @SuppressWarnings("unchecked")
-      KvCoder<K, V> inputKvCoder = (KvCoder<K, V>) getInput().getCoder();
+      KvCoder<K, V> inputKvCoder = (KvCoder<K, V>) input.getCoder();
       Coder<K> keyCoder = inputKvCoder.getKeyCoder();
       Coder<V> inputValueCoder = inputKvCoder.getValueCoder();
       Coder<WindowedValue<V>> outputValueCoder = FullWindowedValueCoder.of(
-          inputValueCoder, getInput().getWindowFn().windowCoder());
+          inputValueCoder, input.getWindowFn().windowCoder());
       Coder<KV<K, WindowedValue<V>>> outputKvCoder =
           KvCoder.of(keyCoder, outputValueCoder);
       return input.apply(ParDo.of(new ReifyTimestampAndWindowsDoFn<K, V>()))
@@ -228,7 +228,7 @@ public int compare(WindowedValue<V> e1, WindowedValue<V> e2) {
                 });
               c.output(KV.<K, Iterable<WindowedValue<V>>>of(key, sortedValues));
             }}))
-          .setCoder(getInput().getCoder());
+          .setCoder(input.getCoder());
     }
   }
 
@@ -257,7 +257,7 @@ public PCollection<KV<K, Iterable<V>>> apply(
         PCollection<KV<K, Iterable<WindowedValue<V>>>> input) {
       @SuppressWarnings("unchecked")
       KvCoder<K, Iterable<WindowedValue<V>>> inputKvCoder =
-          (KvCoder<K, Iterable<WindowedValue<V>>>) getInput().getCoder();
+          (KvCoder<K, Iterable<WindowedValue<V>>>) input.getCoder();
       Coder<K> keyCoder = inputKvCoder.getKeyCoder();
       Coder<Iterable<WindowedValue<V>>> inputValueCoder =
           inputKvCoder.getValueCoder();
@@ -297,10 +297,23 @@ public GroupByKeyOnly(boolean disallowCombinerLifting) {
       this.disallowCombinerLifting = disallowCombinerLifting;
     }
 
+    @Override
+    public void validate(PCollection<KV<K, V>> input) {
+      // Verify that the input Coder<KV<K, V>> is a KvCoder<K, V>, and that
+      // the key coder is deterministic.
+      Coder<K> keyCoder = getKeyCoder(input.getCoder());
+      try {
+        keyCoder.verifyDeterministic();
+      } catch (NonDeterministicException e) {
+        throw new IllegalStateException(
+            "the keyCoder of a GroupByKey must be deterministic", e);
+      }
+    }
+
     @SuppressWarnings({"rawtypes", "unchecked"})
     @Override
     public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
-      WindowFn windowFn = getInput().getWindowFn();
+      WindowFn windowFn = input.getWindowFn();
       if (!(windowFn instanceof NonMergingWindowFn)) {
         // Prevent merging windows again, without explicit user
         // involvement, e.g., by Window.into() or Window.remerge().
@@ -312,27 +325,12 @@ public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
           windowFn);
     }
 
-    @Override
-    public void finishSpecifying() {
-      // Verify that the input Coder<KV<K, V>> is a KvCoder<K, V>, and that
-      // the key coder is deterministic.
-      Coder<K> keyCoder = getKeyCoder();
-      try {
-        keyCoder.verifyDeterministic();
-      } catch (NonDeterministicException e) {
-        throw new IllegalStateException(
-            "the keyCoder of a GroupByKey must be deterministic", e);
-      }
-      super.finishSpecifying();
-    }
-
     /**
      * Returns the {@code Coder} of the input to this transform, which
      * should be a {@code KvCoder}.
      */
     @SuppressWarnings("unchecked")
-    KvCoder<K, V> getInputKvCoder() {
-      Coder<KV<K, V>> inputCoder = getInput().getCoder();
+    KvCoder<K, V> getInputKvCoder(Coder<KV<K, V>> inputCoder) {
       if (!(inputCoder instanceof KvCoder)) {
         throw new IllegalStateException(
             "GroupByKey requires its input to use KvCoder");
@@ -345,35 +343,35 @@ KvCoder<K, V> getInputKvCoder() {
      * transform, which is also used as the {@code Coder} of the keys of
      * the output of this transform.
      */
-    Coder<K> getKeyCoder() {
-      return getInputKvCoder().getKeyCoder();
+    Coder<K> getKeyCoder(Coder<KV<K, V>> inputCoder) {
+      return getInputKvCoder(inputCoder).getKeyCoder();
     }
 
     /**
      * Returns the {@code Coder} of the values of the input to this transform.
      */
-    Coder<V> getInputValueCoder() {
-      return getInputKvCoder().getValueCoder();
+    Coder<V> getInputValueCoder(Coder<KV<K, V>> inputCoder) {
+      return getInputKvCoder(inputCoder).getValueCoder();
     }
 
     /**
      * Returns the {@code Coder} of the {@code Iterable} values of the
      * output of this transform.
      */
-    Coder<Iterable<V>> getOutputValueCoder() {
-      return IterableCoder.of(getInputValueCoder());
+    Coder<Iterable<V>> getOutputValueCoder(Coder<KV<K, V>> inputCoder) {
+      return IterableCoder.of(getInputValueCoder(inputCoder));
     }
 
     /**
      * Returns the {@code Coder} of the output of this transform.
      */
-    KvCoder<K, Iterable<V>> getOutputKvCoder() {
-      return KvCoder.of(getKeyCoder(), getOutputValueCoder());
+    KvCoder<K, Iterable<V>> getOutputKvCoder(Coder<KV<K, V>> inputCoder) {
+      return KvCoder.of(getKeyCoder(inputCoder), getOutputValueCoder(inputCoder));
     }
 
     @Override
-    protected Coder<KV<K, Iterable<V>>> getDefaultOutputCoder() {
-      return getOutputKvCoder();
+    protected Coder<KV<K, Iterable<V>>> getDefaultOutputCoder(PCollection<KV<K, V>> input) {
+      return getOutputKvCoder(input.getCoder());
     }
 
     /**
@@ -408,12 +406,12 @@ public void evaluate(
   private static <K, V> void evaluateHelper(
       GroupByKeyOnly<K, V> transform,
       DirectPipelineRunner.EvaluationContext context) {
-    PCollection<KV<K, V>> input = transform.getInput();
+    PCollection<KV<K, V>> input = context.getInput(transform);
 
     List<ValueWithMetadata<KV<K, V>>> inputElems =
         context.getPCollectionValuesWithMetadata(input);
 
-    Coder<K> keyCoder = transform.getKeyCoder();
+    Coder<K> keyCoder = transform.getKeyCoder(input.getCoder());
 
     Map<GroupingKey<K>, List<V>> groupingMap = new HashMap<>();
 
@@ -452,13 +450,13 @@ private static <K, V> void evaluateHelper(
                       .withKey(key));
     }
 
-    context.setPCollectionValuesWithMetadata(transform.getOutput(),
+    context.setPCollectionValuesWithMetadata(context.getOutput(transform),
                                              outputElems);
   }
 
   public PCollection<KV<K, Iterable<V>>> applyHelper(
       PCollection<KV<K, V>> input, boolean isStreaming, boolean runnerSortsByTimestamp) {
-    Coder<KV<K, V>> inputCoder = getInput().getCoder();
+    Coder<KV<K, V>> inputCoder = input.getCoder();
     if (!(inputCoder instanceof KvCoder)) {
       throw new IllegalStateException(
           "GroupByKey requires its input to use KvCoder");
@@ -467,7 +465,7 @@ public PCollection<KV<K, Iterable<V>>> applyHelper(
     // merging windows as needed, using the windows assigned to the
     // key/value input elements and the window merge operation of the
     // window function associated with the input PCollection.
-    WindowFn<?, ?> windowFn = getInput().getWindowFn();
+    WindowFn<?, ?> windowFn = input.getWindowFn();
     if (windowFn instanceof InvalidWindows) {
       String cause = ((InvalidWindows<?>) windowFn).getCause();
       throw new IllegalStateException(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
index 8f927c6e37639..5457bea25d1ce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
@@ -18,7 +18,6 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.cloud.dataflow.sdk.values.POutput;
@@ -193,6 +192,15 @@ public Output apply(Input input) {
             + this);
   }
 
+  /**
+   * Called before invoking apply (which may be intercepted by the runner) to
+   * verify this transform is fully specified and applicable to the specified
+   * input.
+   *
+   * <p> By default, does nothing.
+   */
+  public void validate(Input input) { }
+
   /**
    * Sets the base name of this {@code PTransform}.
    */
@@ -227,48 +235,25 @@ public String getName() {
    * set yet
    */
   @Deprecated
-  public Pipeline getPipeline() {
+  private Pipeline getPipeline() {
     if (pipeline == null) {
       throw new IllegalStateException("owning pipeline not set");
     }
     return pipeline;
   }
 
-  /**
-   * Returns the input of this transform.
-   *
-   * @throws IllegalStateException if this PTransform hasn't been applied yet
-   */
-  public Input getInput() {
-    @SuppressWarnings("unchecked")
-    Input input = (Input) getPipeline().getInput(this);
-    return input;
-  }
-
   /**
    * Returns the output of this transform.
    *
    * @throws IllegalStateException if this PTransform hasn't been applied yet
    */
-  public Output getOutput() {
+  @Deprecated
+  private Output getOutput() {
     @SuppressWarnings("unchecked")
     Output output = (Output) getPipeline().getOutput(this);
     return output;
   }
 
-  /**
-   * Returns the {@link CoderRegistry}, useful for inferring
-   * {@link com.google.cloud.dataflow.sdk.coders.Coder}s.
-   *
-   * @throws IllegalStateException if the owning {@link Pipeline} hasn't been
-   * set yet
-   * @deprecated use pipeline.getCoderRegistry()
-   */
-  @Deprecated
-  protected CoderRegistry getCoderRegistry() {
-    return getPipeline().getCoderRegistry();
-  }
-
 
   /////////////////////////////////////////////////////////////////////////////
 
@@ -361,7 +346,7 @@ private void readObject(ObjectInputStream oos) {
    *
    * <p> Not normally called by user code.
    */
-  public void finishSpecifying() {
+  public void finishSpecifyingInternal() {
     getOutput().finishSpecifyingOutput();
   }
 
@@ -376,17 +361,28 @@ protected Coder<?> getDefaultOutputCoder() {
     return null;
   }
 
+  /**
+   * Returns the default {@code Coder} to use for the output of this
+   * single-output {@code PTransform}, or {@code null} if
+   * none can be inferred.
+   *
+   * <p> By default, returns {@code null}.
+   */
+  protected Coder<?> getDefaultOutputCoder(Input input) {
+    return getDefaultOutputCoder();
+  }
+
   /**
    * Returns the default {@code Coder} to use for the given output of
    * this single-output {@code PTransform}, or {@code null}
    * if none can be inferred.
    */
-  public <T> Coder<T> getDefaultOutputCoder(TypedPValue<T> output) {
+  public <T> Coder<T> getDefaultOutputCoder(Input input, TypedPValue<T> output) {
     if (output != getOutput()) {
       return null;
     } else {
       @SuppressWarnings("unchecked")
-      Coder<T> defaultOutputCoder = (Coder<T>) getDefaultOutputCoder();
+      Coder<T> defaultOutputCoder = (Coder<T>) getDefaultOutputCoder(input);
       return defaultOutputCoder;
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index aee68cb8adcb1..8c11ab37e93f3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -689,16 +689,17 @@ public PCollection<O> apply(PCollection<? extends I> input) {
       if (sideInputs == null) {
         sideInputs = Collections.emptyList();
       }
-      return PCollection.<O>createPrimitiveOutputInternal(getInput().getWindowFn())
+      return PCollection.<O>createPrimitiveOutputInternal(input.getWindowFn())
           .setTypeTokenInternal(fn.getOutputTypeToken());
     }
 
     @Override
-    protected Coder<O> getDefaultOutputCoder() {
-      return getPipeline().getCoderRegistry().getDefaultCoder(
+    @SuppressWarnings("unchecked")
+    protected Coder<O> getDefaultOutputCoder(PCollection<? extends I> input) {
+      return input.getPipeline().getCoderRegistry().getDefaultCoder(
           fn.getOutputTypeToken(),
           fn.getInputTypeToken(),
-          ((PCollection<I>) getInput()).getCoder());
+          ((PCollection<I>) input).getCoder());
     }
 
     @Override
@@ -881,7 +882,7 @@ public BoundMulti<I, O> withSideInputs(
     public PCollectionTuple apply(PCollection<? extends I> input) {
       PCollectionTuple outputs = PCollectionTuple.ofPrimitiveOutputsInternal(
           TupleTagList.of(mainOutputTag).and(sideOutputTags.getAll()),
-          getInput().getWindowFn());
+          input.getWindowFn());
 
       // The fn will likely be an instance of an anonymous subclass
       // such as DoFn<Integer, String> { }, thus will have a high-fidelity
@@ -943,12 +944,12 @@ private static <I, O> void evaluateSingleHelper(
 
     DoFnRunner<I, O, List> fnRunner =
         evaluateHelper(transform.fn, context.getStepName(transform),
-            transform.getInput(), transform.sideInputs,
+            context.getInput(transform), transform.sideInputs,
             mainOutputTag, new ArrayList<TupleTag<?>>(),
             context, executionContext);
 
     context.setPCollectionValuesWithMetadata(
-        transform.getOutput(),
+        context.getOutput(transform),
         executionContext.getOutput(mainOutputTag));
   }
 
@@ -975,12 +976,12 @@ private static <I, O> void evaluateMultiHelper(
 
     DoFnRunner<I, O, List> fnRunner =
         evaluateHelper(transform.fn, context.getStepName(transform),
-                       transform.getInput(), transform.sideInputs,
+                       context.getInput(transform), transform.sideInputs,
                        transform.mainOutputTag, transform.sideOutputTags.getAll(),
                        context, executionContext);
 
     for (Map.Entry<TupleTag<?>, PCollection<?>> entry
-        : transform.getOutput().getAll().entrySet()) {
+        : context.getOutput(transform).getAll().entrySet()) {
       TupleTag<Object> tag = (TupleTag<Object>) entry.getKey();
       @SuppressWarnings("unchecked")
       PCollection<Object> pc = (PCollection<Object>) entry.getValue();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index dfa7c96f61a17..60195a6e7205d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -345,8 +345,8 @@ private <R, T> void evaluateTyped(
                 CreatePCollectionView<R, T> transform,
                 DirectPipelineRunner.EvaluationContext context) {
               List<WindowedValue<R>> elems =
-                  context.getPCollectionWindowedValues(transform.getInput());
-              context.setPCollectionView(transform.getOutput(), elems);
+                  context.getPCollectionWindowedValues(context.getInput(transform));
+              context.setPCollectionView(context.getOutput(transform), elems);
             }
           });
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
index 7b79533a7b68b..62983626ec0bc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -94,10 +95,11 @@ private WithKeys(SerializableFunction<V, K> fn, Class<K> keyClass) {
   @Override
   public PCollection<KV<K, V>> apply(PCollection<V> in) {
     Coder<K> keyCoder;
+    CoderRegistry coderRegistry = in.getPipeline().getCoderRegistry();
     if (keyClass == null) {
-      keyCoder = getCoderRegistry().getDefaultOutputCoder(fn, in.getCoder());
+      keyCoder = coderRegistry.getDefaultOutputCoder(fn, in.getCoder());
     } else {
-      keyCoder = getCoderRegistry().getDefaultCoder(TypeToken.of(keyClass));
+      keyCoder = coderRegistry.getDefaultCoder(TypeToken.of(keyClass));
     }
     PCollection<KV<K, V>> result =
         in.apply(ParDo.named("AddKeys")
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
index 3a97a700def50..d4de721464029 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
@@ -105,7 +105,7 @@ public PCollection<KV<K, CoGbkResult>> apply(
         KvCoder.of(keyCoder, unionCoder);
 
     PCollectionList<KV<K, RawUnionValue>> unionTables =
-        PCollectionList.empty(getPipeline());
+        PCollectionList.empty(input.getPipeline());
 
     // TODO: Use the schema to order the indices rather than depending
     // on the fact that the schema ordering is identical to the ordering from
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index a0a4d88a7b3fe..f17be52500d9f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -16,12 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static com.google.cloud.dataflow.sdk.util.SerializableUtils.serializeToByteArray;
-import static com.google.cloud.dataflow.sdk.util.StringUtils.byteArrayToJsonString;
-import static com.google.cloud.dataflow.sdk.util.StringUtils.jsonStringToByteArray;
-
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -30,13 +25,11 @@
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
 import com.google.cloud.dataflow.sdk.util.PTuple;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 
 /**
@@ -202,8 +195,8 @@ public PCollection<T> apply(PCollection<T> input) {
     }
 
     @Override
-    protected Coder<?> getDefaultOutputCoder() {
-      return getInput().getCoder();
+    protected Coder<?> getDefaultOutputCoder(PCollection<T> input) {
+      return input.getCoder();
     }
 
     @Override
@@ -233,7 +226,7 @@ public static <T> Remerge<T> remerge() {
   public static class Remerge<T> extends PTransform<PCollection<T>, PCollection<T>> {
     @Override
     public PCollection<T> apply(PCollection<T> input) {
-      WindowFn<?, ?> windowFn = getInput().getWindowFn();
+      WindowFn<?, ?> windowFn = input.getWindowFn();
       WindowFn<?, ?> outputWindowFn =
           (windowFn instanceof InvalidWindows)
           ? ((InvalidWindows<?>) windowFn).getOriginalWindowFn()
@@ -266,7 +259,7 @@ public void evaluate(
   private static <T> void evaluateHelper(
       Bound<T> transform,
       DirectPipelineRunner.EvaluationContext context) {
-    PCollection<T> input = transform.getInput();
+    PCollection<T> input = context.getInput(transform);
 
     DirectModeExecutionContext executionContext = new DirectModeExecutionContext();
 
@@ -295,37 +288,7 @@ private static <T> void evaluateHelper(
     addWindowsRunner.finishBundle();
 
     context.setPCollectionValuesWithMetadata(
-        transform.getOutput(),
+        context.getOutput(transform),
         executionContext.getOutput(outputTag));
   }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  static {
-    DataflowPipelineTranslator.registerTransformTranslator(
-        Bound.class,
-        new DataflowPipelineTranslator.TransformTranslator<Bound>() {
-          @Override
-          public void translate(
-              Bound transform,
-              DataflowPipelineTranslator.TranslationContext context) {
-            translateHelper(transform, context);
-          }
-        });
-  }
-
-  private static <T> void translateHelper(
-      Bound<T> transform,
-      DataflowPipelineTranslator.TranslationContext context) {
-    context.addStep(transform, "Bucket");
-    context.addInput(PropertyNames.PARALLEL_INPUT, transform.getInput());
-    context.addOutput(PropertyNames.OUTPUT, transform.getOutput());
-
-    byte[] serializedBytes = serializeToByteArray(transform.fn);
-    String serializedJson = byteArrayToJsonString(serializedBytes);
-    assert Arrays.equals(serializedBytes,
-                         jsonStringToByteArray(serializedJson));
-    context.addInput(PropertyNames.SERIALIZED_FN, serializedJson);
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
index 0c158523dc8e9..9b500c3bc37c1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
@@ -167,7 +167,7 @@ public Collection<? extends PValue> expand() {
 
   @Override
   public void finishSpecifying() {
-    getProducingTransformInternal().finishSpecifying();
+    getProducingTransformInternal().finishSpecifyingInternal();
     finishedSpecifying = true;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
index fda9a1a4e01a3..8d2fa524ebe3d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
@@ -134,19 +134,19 @@ public TypedPValue<T> setTypeTokenInternal(TypeToken<T> typeToken) {
    * based upon the known {@code TypeToken<T>}. By default, this is null,
    * but can and should be improved by subclasses.
    */
+  @SuppressWarnings({"unchecked", "rawtypes"})
   private void inferCoderOrFail() {
     if (coder == null) {
       TypeToken<T> token = getTypeToken();
-      CoderRegistry registry = getProducingTransformInternal()
-          .getPipeline()
-          .getCoderRegistry();
+      CoderRegistry registry = getPipeline().getCoderRegistry();
 
       if (token != null) {
         coder = registry.getDefaultCoder(token);
       }
 
       if (coder == null) {
-        coder = getProducingTransformInternal().getDefaultOutputCoder(this);
+        coder = ((PTransform) getProducingTransformInternal()).getDefaultOutputCoder(
+            getPipeline().getInput(getProducingTransformInternal()), this);
       }
 
       if (coder == null) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 79e17cec7d595..4c85d11a5e468 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -442,8 +442,8 @@ public PCollection<Integer> apply(PCollection<Integer> input) {
     }
 
     @Override
-    protected Coder<?> getDefaultOutputCoder() {
-      return getInput().getCoder();
+    protected Coder<?> getDefaultOutputCoder(PCollection<Integer> input) {
+      return input.getCoder();
     }
   }
 
@@ -494,7 +494,7 @@ public void translate(
             // Note: This is about the minimum needed to fake out a
             // translation. This obviously isn't a real translation.
             context.addStep(transform, "TestTranslate");
-            context.addOutput("output", transform.getOutput());
+            context.addOutput("output", context.getOutput(transform));
           }
         });
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 992c7e6004166..6b3ff462b87af 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -348,7 +348,7 @@ private static class EmbeddedTranslator
       implements DataflowPipelineTranslator.TransformTranslator<EmbeddedTransform> {
     @Override public void translate(EmbeddedTransform transform, TranslationContext context) {
       addObject(transform.step.getProperties(), PropertyNames.PARALLEL_INPUT,
-          context.asOutputReference(transform.getInput()));
+          context.asOutputReference(context.getInput(transform)));
       context.addStep(transform, transform.step);
     }
   }

From 0579165b2b636776a9a71466cc14534d00bcac18 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Fri, 3 Apr 2015 16:13:36 -0700
Subject: [PATCH 0368/1541] * Introduces Reader.getCurrentTimestamp() to
 support producing timestamps   from user-defined readers. * Introduces
 AbstractReader and AbstractBoundedReader implementing some methods  
 (including getCurrentTimestamp) in the default way to simplify subclasses  
 and allow introducing more methods with default implementations. ----Release
 Notes---- User-defined sources can report timestamped values via
 Reader.getCurrentTimestamp(). [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=90290465

---
 .../cloud/dataflow/sdk/io/BoundedSource.java  | 21 ++++++
 .../sdk/io/ByteOffsetBasedSource.java         |  2 +-
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |  5 +-
 .../dataflow/sdk/io/FileBasedSource.java      | 10 ++-
 .../google/cloud/dataflow/sdk/io/Source.java  | 27 ++++++++
 .../cloud/dataflow/sdk/io/XMLSource.java      |  3 +
 .../BasicSerializableSourceFormat.java        | 18 +++--
 .../sdk/runners/worker/PubsubReader.java      |  3 +-
 .../BasicSerializableSourceFormatTest.java    | 69 +++++++++++++++++--
 9 files changed, 137 insertions(+), 21 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
index 762a56ead80dc..3fef1584670bf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
@@ -114,4 +114,25 @@ public interface BoundedReader<T> extends Source.Reader<T> {
      */
     BoundedSource<T> splitAtFraction(double fraction);
   }
+
+  /**
+   * A base class implementing some optional methods of {@link BoundedReader} in a default way:
+   * <ul>
+   *   <li>Progress estimation ({@link #getFractionConsumed}) is not supported.
+   *   <li>Dynamic splitting ({@link #splitAtFraction}) is not supported.
+   * </ul>
+   * @param <T>
+   */
+  public abstract static class AbstractBoundedReader<T>
+      extends AbstractReader<T> implements BoundedReader<T> {
+    @Override
+    public Double getFractionConsumed() {
+      return null;
+    }
+
+    @Override
+    public BoundedSource<T> splitAtFraction(double fraction) {
+      return null;
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
index baa5737a84d07..2b6cf8b3d9d88 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
@@ -155,7 +155,7 @@ public String toString() {
   /**
    * A reader that implements code common to readers of all {@link ByteOffsetBasedSource}s.
    */
-  public abstract static class ByteOffsetBasedReader<T> implements BoundedReader<T> {
+  public abstract static class ByteOffsetBasedReader<T> extends AbstractBoundedReader<T> {
     private static final Logger LOG = LoggerFactory.getLogger(ByteOffsetBasedReader.class);
 
     private ByteOffsetBasedSource<T> source;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 4cf1e72e26c10..e626f949e8b94 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -538,8 +538,11 @@ private static void writeBatch(List<Entity> listOfEntities, Datastore datastore)
 
   /**
    * A reader over the records from a query of the datastore.
+   * <p>
+   * Timestamped records are currently not supported. All records implicitly have the timestamp
+   * of {@code BoundedWindow.TIMESTAMP_MIN_VALUE}.
    */
-  public static class DatastoreReader implements BoundedSource.BoundedReader<Entity> {
+  public static class DatastoreReader extends BoundedSource.AbstractBoundedReader<Entity> {
     private final Source source;
 
     /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index 6f094a20b64b3..3b7d91feb5361 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.common.collect.ImmutableList;
 
+import org.joda.time.Instant;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -504,7 +505,7 @@ public void close() throws IOException {
   }
 
   // An internal Reader implementation that concatenates a sequence of FileBasedReaders.
-  private class FilePatternReader implements BoundedReader<T> {
+  private class FilePatternReader extends AbstractBoundedReader<T> {
     private final FileBasedSource<T> source;
     private final List<FileBasedReader<T>> fileReaders;
     final ListIterator<FileBasedReader<T>> fileReadersIterator;
@@ -548,6 +549,13 @@ public T getCurrent() throws NoSuchElementException {
       return currentReader.getCurrent();
     }
 
+    @Override
+    public Instant getCurrentTimestamp() throws NoSuchElementException {
+      // A NoSuchElement will be thrown by the last FileBasedReader if getCurrentTimestamp()
+      // is called after advance() returns false.
+      return currentReader.getCurrentTimestamp();
+    }
+
     @Override
     public void close() throws IOException {
       // Close all readers that may have not yet been closed.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
index 7d0dfaa39b3cf..af2e7038ee28c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
@@ -18,8 +18,11 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 
+import org.joda.time.Instant;
+
 import java.io.IOException;
 import java.io.Serializable;
 import java.util.List;
@@ -134,6 +137,16 @@ public interface Reader<T> extends AutoCloseable {
      */
     public T getCurrent() throws NoSuchElementException;
 
+    /**
+     * Returns the timestamp associated with the current data item.
+     * <p>
+     * If the source does not support timestamps, this should return
+     * {@code BoundedWindow.TIMESTAMP_MIN_VALUE}.
+     *
+     * @throws NoSuchElementException
+     */
+    public Instant getCurrentTimestamp() throws NoSuchElementException;
+
     /**
      * Closes the reader. The reader cannot be used after this method is called.
      */
@@ -149,4 +162,18 @@ public interface Reader<T> extends AutoCloseable {
      */
     public Source<T> getCurrentSource();
   }
+
+  /**
+   * A base class implementing optional methods of {@link Reader} in a default way:
+   * <ul>
+   *   <li>All values have the timestamp of {@code BoundedWindow.TIMESTAMP_MIN_VALUE}.
+   * </ul>
+   * @param <T>
+   */
+  public abstract static class AbstractReader<T> implements Reader<T> {
+    @Override
+    public Instant getCurrentTimestamp() throws NoSuchElementException {
+      return BoundedWindow.TIMESTAMP_MIN_VALUE;
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XMLSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XMLSource.java
index 63779a3601380..ff68d8e39bcb1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XMLSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XMLSource.java
@@ -223,6 +223,9 @@ public Class<T> getRecordClass() {
   /**
    * A {@link Source.Reader} for reading JAXB annotated Java objects from an XML file. The XML
    * file should be of the form defined at {@link XMLSource}.
+   * <p>
+   * Timestamped values are currently unsupported - all values implicitly have the timestamp
+   * of {@code BoundedWindow.TIMESTAMP_MIN_VALUE}.
    *
    * @param <T> Type of objects that will be read by the reader.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index 17b47b988e3f5..66e57f02f15fb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -45,6 +45,7 @@
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
@@ -253,19 +254,15 @@ static com.google.api.services.dataflow.model.Source serializeToCloudSource(
   public static <T> void evaluateReadHelper(
       ReadSource.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
     try {
-      List<T> elems = new ArrayList<>();
+      List<DirectPipelineRunner.ValueWithMetadata<T>> output = new ArrayList<>();
       Source<T> source = transform.getSource();
-      try (Source.Reader<T> reader = source.createReader(
-          context.getPipelineOptions(), null)) {
+      try (Source.Reader<T> reader = source.createReader(context.getPipelineOptions(), null)) {
         for (boolean available = reader.start(); available; available = reader.advance()) {
-          elems.add(reader.getCurrent());
+          output.add(DirectPipelineRunner.ValueWithMetadata.of(
+              WindowedValue.of(
+                  reader.getCurrent(), reader.getCurrentTimestamp(), GlobalWindow.INSTANCE)));
         }
       }
-      List<DirectPipelineRunner.ValueWithMetadata<T>> output = new ArrayList<>();
-      for (T elem : elems) {
-        output.add(DirectPipelineRunner.ValueWithMetadata.of(
-            WindowedValue.valueInGlobalWindow(elem)));
-      }
       context.setPCollectionValuesWithMetadata(context.getOutput(transform), output);
     } catch (Exception e) {
       throw new RuntimeException(e);
@@ -344,7 +341,8 @@ public WindowedValue<T> next() throws IOException {
         throw new NoSuchElementException();
       }
       state = NextState.UNKNOWN_BEFORE_ADVANCE;
-      return WindowedValue.valueInGlobalWindow(reader.getCurrent());
+      return WindowedValue.of(
+          reader.getCurrent(), reader.getCurrentTimestamp(), GlobalWindow.INSTANCE);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
index d28a2eb319398..21d03ccc96a2d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
@@ -31,7 +31,6 @@
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.Arrays;
 import java.util.concurrent.TimeUnit;
 
 /**
@@ -87,7 +86,7 @@ public WindowedValue<T> next() throws IOException {
       T value = (T) coder.getValueCoder().decode(data, Coder.Context.OUTER);
       return WindowedValue.of(value,
                               new Instant(timestampMillis),
-                              Arrays.asList(GlobalWindow.INSTANCE));
+                              GlobalWindow.INSTANCE);
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 757400ac2b33f..a4f1fdd2adea0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -29,6 +29,7 @@
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.Matchers.allOf;
 import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.containsString;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -55,7 +56,13 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory;
+import com.google.cloud.dataflow.sdk.transforms.Sample;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
@@ -63,8 +70,11 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
+import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.Preconditions;
 
+import org.joda.time.Duration;
+import org.joda.time.Instant;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
@@ -90,7 +100,7 @@ public class BasicSerializableSourceFormatTest {
 
   static class TestIO {
     public static Read fromRange(int from, int to) {
-      return new Read(from, to);
+      return new Read(from, to, false);
     }
 
     static class Read extends BoundedSource<Integer> {
@@ -98,10 +108,16 @@ static class Read extends BoundedSource<Integer> {
 
       final int from;
       final int to;
+      final boolean produceTimestamps;
 
-      Read(int from, int to) {
+      Read(int from, int to, boolean produceTimestamps) {
         this.from = from;
         this.to = to;
+        this.produceTimestamps = produceTimestamps;
+      }
+
+      public Read withTimestampsMillis() {
+        return new Read(from, to, true);
       }
 
       @Override
@@ -111,7 +127,9 @@ public List<Read> splitIntoBundles(long desiredBundleSizeBytes, PipelineOptions
         DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
         float step = 1.0f * (to - from) / dataflowOptions.getNumWorkers();
         for (int i = 0; i < dataflowOptions.getNumWorkers(); ++i) {
-          res.add(new Read(Math.round(from + i * step), Math.round(from + (i + 1) * step)));
+          res.add(new Read(
+              Math.round(from + i * step), Math.round(from + (i + 1) * step),
+              produceTimestamps));
         }
         return res;
       }
@@ -145,7 +163,7 @@ public Coder<Integer> getDefaultOutputCoder() {
         return BigEndianIntegerCoder.of();
       }
 
-      private static class RangeReader implements BoundedReader<Integer> {
+      private static class RangeReader extends AbstractBoundedReader<Integer> {
         // To verify that BasicSerializableSourceFormat calls our methods according to protocol.
         enum State {
           UNSTARTED,
@@ -185,6 +203,12 @@ public Integer getCurrent() {
           return current;
         }
 
+        @Override
+        public Instant getCurrentTimestamp() {
+          return source.produceTimestamps
+              ? new Instant(current /* as millis */) : BoundedWindow.TIMESTAMP_MIN_VALUE;
+        }
+
         @Override
         public void close() throws IOException {
           Preconditions.checkState(state == State.STARTED || state == State.FINISHED);
@@ -202,8 +226,8 @@ public Read splitAtFraction(double fraction) {
           if (proposedIndex <= current) {
             return null;
           }
-          Read primary = new Read(source.from, proposedIndex);
-          Read residual = new Read(proposedIndex, source.to);
+          Read primary = new Read(source.from, proposedIndex, source.produceTimestamps);
+          Read residual = new Read(proposedIndex, source.to, source.produceTimestamps);
           this.source = primary;
           return residual;
         }
@@ -245,6 +269,34 @@ public void testSplitAndReadBundlesBack() throws Exception {
     }
   }
 
+  @Test
+  public void testDirectPipelineWithoutTimestamps() throws Exception {
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    DirectPipelineRunner runner = DirectPipelineRunner.fromOptions(options);
+    Pipeline p = Pipeline.create(options);
+    PCollection<Integer> sum = p.apply(ReadSource.from(TestIO.fromRange(10, 20)))
+        .apply(Sum.integersGlobally())
+        .apply(Sample.<Integer>any(1));
+    DirectPipelineRunner.EvaluationResults results = runner.run(p);
+    assertThat(results.getPCollection(sum), contains(145));
+  }
+
+  @Test
+  public void testDirectPipelineWithTimestamps() throws Exception {
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    DirectPipelineRunner runner = DirectPipelineRunner.fromOptions(options);
+    Pipeline p = Pipeline.create(options);
+    PCollection<Integer> sums =
+        p.apply(ReadSource.from(TestIO.fromRange(10, 20).withTimestampsMillis()))
+         .apply(Window.<Integer>into(FixedWindows.of(Duration.millis(3))))
+         .apply(Sum.integersGlobally().withoutDefaults());
+    DirectPipelineRunner.EvaluationResults results = runner.run(p);
+    // Should group into [10 11] [12 13 14] [15 16 17] [18 19].
+    assertThat(results.getPCollection(sums), containsInAnyOrder(21, 37, 39, 48));
+  }
+
   @Test
   public void testRangeProgressAndSplitAtFraction() throws Exception {
     // Show basic usage of getFractionConsumed and splitAtFraction.
@@ -429,6 +481,11 @@ public Integer getCurrent() throws NoSuchElementException {
       throw new IllegalStateException("Should have failed in start()");
     }
 
+    @Override
+    public Instant getCurrentTimestamp() throws NoSuchElementException {
+      throw new IllegalStateException("Should have failed in start()");
+    }
+
     @Override
     public void close() throws IOException { }
   }

From 3d4ad3e4940b249513ba2be30798bfdc029fc8fb Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Fri, 3 Apr 2015 17:31:28 -0700
Subject: [PATCH 0369/1541] Make GCS channel close() method idempotent.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90296467
---
 .../sdk/util/gcsio/GoogleCloudStorageReadChannel.java  | 10 +++++++---
 .../google/cloud/dataflow/sdk/util/GcsUtilTest.java    | 10 ++++++++++
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
index b90d162b02c76..0711c120edb56 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
@@ -392,9 +392,13 @@ public boolean isOpen() {
    * @throws IOException on IO error
    */
   @Override
-  public void close()
-      throws IOException {
-    throwIfNotOpen();
+  public void close() throws IOException {
+    if (!channelIsOpen) {
+      LOG.warn(
+          "Channel for {} is not open.",
+          StorageResourceId.createReadableString(bucketName, objectName));
+      return;
+    }
     channelIsOpen = false;
     if (readChannel != null) {
       readChannel.close();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
index 1d9a88fd460f9..b151da60fb015 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
@@ -32,6 +32,7 @@
 import com.google.cloud.dataflow.sdk.options.GcsOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.cloud.dataflow.sdk.util.gcsio.GoogleCloudStorageReadChannel;
 import com.google.common.collect.ImmutableList;
 
 import org.junit.Rule;
@@ -42,6 +43,7 @@
 import org.mockito.Mockito;
 
 import java.io.IOException;
+import java.nio.channels.SeekableByteChannel;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.CountDownLatch;
@@ -253,4 +255,12 @@ public void testNonExistent() throws IOException {
       assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
     }
   }
+
+  @Test
+  public void testGCSChannelCloseIdempotent() throws IOException {
+    SeekableByteChannel channel =
+        new GoogleCloudStorageReadChannel(null, "dummybucket", "dummyobject", null);
+    channel.close();
+    channel.close();
+  }
 }

From bf14787cfb55d0171540d0eee0ea0f44d10dea3e Mon Sep 17 00:00:00 2001
From: boyuan <boyuan@google.com>
Date: Fri, 3 Apr 2015 17:32:06 -0700
Subject: [PATCH 0370/1541] Clean up the CoGbkResult.java

* Move the Boolean array containsTag out of the for loop.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90296518
---
 .../dataflow/sdk/transforms/join/CoGbkResult.java     | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
index 2d293b8bd6930..f265145444b13 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
@@ -116,15 +116,14 @@ public CoGbkResult(
       LOG.info("CoGbkResult has more than " + inMemoryElementCount + " elements,"
                + "reiteration (which may be slow) is required.");
       final Reiterator<RawUnionValue> tail = (Reiterator<RawUnionValue>) taggedIter;
+      // This is a trinary-state array recording whether a given tag is present in the tail. The
+      // inital value is null (unknown) for all tags, and the first iteration through the entire
+      // list will set these values to true or false to avoid needlessly iterating if filtering
+      // against a given tag would not match anything.
+      final Boolean[] containsTag = new Boolean[schema.size()];
       for (int unionTag = 0; unionTag < schema.size(); unionTag++) {
         final int unionTag0 = unionTag;
         final Iterable<?> head = valueMap.get(unionTag);
-        // This is a trinary-state array recording whether a given tag is
-        // present in the tail.  The inital value is null (unknown) for all
-        // tags, and the first iteration through the entire list will set
-        // these values to true or false to avoid needlessly iterating if
-        // filtering against a given tag would not match anything.
-        final Boolean[] containsTag = new Boolean[schema.size()];
         valueMap.set(
             unionTag,
             new Iterable() {

From 2be164eae342e154234821c2776d54a15dbe04a0 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Sat, 4 Apr 2015 10:51:31 -0700
Subject: [PATCH 0371/1541] Add support for CompositeTriggers and build
 FirstOfTrigger.

Each node of the trigger tree (starting with the TriggerExecutor) is
responsible for tracking the completion of the triggers it executes.

TriggerExecutor does this using an IS_ROOT_FINISHED integer stored in
KeyedState. Composite triggers use a SubTriggerExecutor, which stores a
BitSet in keyed state for each window.

Updated DelayAfterFirstInWindow to rely on this automatically managed
finished state.

Introduce a TriggerId which uniquely identifies a trigger. This is used
to identify the trigger that should receive a timer, and also the owner
of a given tag in keyed state.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90330121
---
 .../cloud/dataflow/sdk/util/CoderUtils.java   |  31 ++
 .../dataflow/sdk/util/CompositeTrigger.java   | 256 ++++++++++++++
 .../dataflow/sdk/util/DefaultTrigger.java     |   9 +-
 .../sdk/util/DelayAfterFirstInPane.java       |  46 ++-
 .../dataflow/sdk/util/FirstOfTrigger.java     |  84 +++++
 .../util/StreamingSideInputDoFnRunner.java    |   2 +-
 .../cloud/dataflow/sdk/util/Trigger.java      |  96 ++++-
 .../dataflow/sdk/util/TriggerExecutor.java    | 327 ++++++++++++++----
 .../dataflow/sdk/util/TriggerTester.java      |  40 ++-
 .../cloud/dataflow/sdk/util/WindowUtils.java  |  24 +-
 .../cloud/dataflow/sdk/WindowMatchers.java    |  10 +
 .../worker/StreamingDataflowWorkerTest.java   |   4 +-
 .../dataflow/sdk/util/DefaultTriggerTest.java |  20 +-
 .../sdk/util/DelayAfterFirstInPaneTest.java   |  32 +-
 .../dataflow/sdk/util/FirstOfTriggerTest.java | 216 ++++++++++++
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  45 ++-
 16 files changed, 1066 insertions(+), 176 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CompositeTrigger.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FirstOfTrigger.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FirstOfTriggerTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
index 53a52ad2ef857..1420c550412b1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
@@ -18,6 +18,7 @@
 
 import static com.google.cloud.dataflow.sdk.util.Structs.addList;
 
+import com.google.api.client.util.Base64;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
@@ -98,6 +99,36 @@ public static <T> T decodeFromByteArray(Coder<T> coder, byte[] encodedValue)
     }
   }
 
+  /**
+   * Encodes the given value using the specified Coder, and returns the Base64 encoding of the
+   * encoded bytes.
+   *
+   * @throws CoderException if there are errors during encoding.
+   */
+  public static <T> String encodeToBase64(Coder<T> coder, T value) throws CoderException {
+    ByteArrayOutputStream stream = new ByteArrayOutputStream();
+    try {
+      coder.encode(value, stream, Coder.Context.OUTER);
+    } catch (IOException e) {
+      throw new RuntimeException("unexpected IOException", e);
+    }
+    byte[] rawValue = stream.toByteArray();
+    return Base64.encodeBase64String(rawValue);
+  }
+
+  /**
+   * Parses a window from a base64-encoded String using the given coder.
+   */
+  public static <T> T decodeFromBase64(Coder<T> coder, String encodedValue) {
+    try {
+      return coder.decode(
+          new ByteArrayInputStream(Base64.decodeBase64(encodedValue)),
+          Coder.Context.OUTER);
+    } catch (IOException e) {
+      throw new RuntimeException("unexpected IOException", e);
+    }
+  }
+
   public static CloudObject makeCloudEncoding(
       String type,
       CloudObject... componentSpecs) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CompositeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CompositeTrigger.java
new file mode 100644
index 0000000000000..8df7784c7bc83
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CompositeTrigger.java
@@ -0,0 +1,256 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.common.collect.ImmutableList;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.BitSet;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Base class for implementing composite triggers.
+ *
+ * @param <W> The type of windows the trigger operates in.
+ */
+public abstract class CompositeTrigger<W extends BoundedWindow> extends Trigger<W> {
+
+  private static final CodedTupleTag<BitSet> SUBTRIGGERS_FINISHED_SET_TAG =
+      CodedTupleTag.of("finished", new BitSetCoder());
+
+  private List<Trigger<W>> subTriggers;
+
+  public CompositeTrigger(List<Trigger<W>> subTriggers) {
+    this.subTriggers = subTriggers;
+  }
+
+  /**
+   * Encapsulates the sub-trigger states that have been looked up for this composite trigger and
+   * allows invoking the various trigger methods on the sub-triggers.
+   */
+  protected class SubTriggerExecutor {
+
+    private final BitSet isFinished;
+    private final W window;
+    private final TriggerContext<W> context;
+
+    private SubTriggerExecutor(
+        TriggerContext<W> context, W window, BitSet isFinished) {
+      this.context = context;
+      this.window = window;
+      this.isFinished = isFinished;
+    }
+
+    private void flush() throws Exception {
+      context.store(SUBTRIGGERS_FINISHED_SET_TAG, window, isFinished);
+    }
+
+    public boolean allFinished() {
+      return isFinished.cardinality() == subTriggers.size();
+    }
+
+    public List<Integer> getUnfinishedTriggers() {
+      ImmutableList.Builder<Integer> result = ImmutableList.builder();
+      for (int i = isFinished.nextClearBit(0); i >= 0 && i < subTriggers.size();
+          i = isFinished.nextClearBit(i + 1)) {
+        result.add(i);
+      }
+      return result.build();
+    }
+
+    public int firstUnfinished() {
+      return isFinished.nextClearBit(0);
+    }
+
+    private TriggerResult handleResult(
+        TriggerContext<W> childContext, int index, TriggerResult result) throws Exception {
+      if (result.isFinish()) {
+        markFinishedInChild(childContext, index);
+      }
+
+      return result;
+    }
+
+    public TriggerResult onElement(
+        TriggerContext<W> compositeContext, int index, Object value, W window, WindowStatus status)
+        throws Exception {
+      if (isFinished.get(index)) {
+        return TriggerResult.FINISH;
+      }
+
+      TriggerContext<W> childContext = compositeContext.forChild(index);
+      Trigger<W> subTrigger = subTriggers.get(index);
+      return handleResult(
+          childContext, index, subTrigger.onElement(childContext, value, window, status));
+    }
+
+    public TriggerResult onTimer(
+        TriggerContext<W> compositeContext, int index, TriggerId<W> triggerId) throws Exception {
+      TriggerContext<W> childContext = compositeContext.forChild(index);
+      return handleResult(
+          childContext, index, subTriggers.get(index).onTimer(childContext, triggerId));
+    }
+
+    public TriggerResult onMerge(
+        TriggerContext<W> compositeContext, int index, Iterable<W> oldWindows, W newWindow)
+        throws Exception {
+      TriggerContext<W> childContext = compositeContext.forChild(index);
+      return handleResult(
+          childContext, index, subTriggers.get(index).onMerge(childContext, oldWindows, newWindow));
+    }
+
+    public void clear(TriggerContext<W> compositeContext, int index, W window)
+        throws Exception {
+      subTriggers.get(index).clear(compositeContext.forChild(index), window);
+    }
+
+    public boolean isFinished(int index) {
+      return isFinished.get(index);
+    }
+
+    private void markFinishedInChild(TriggerContext<W> childContext, int index) throws Exception {
+      isFinished.set(index);
+      flush();
+      subTriggers.get(index).clear(childContext, window);
+    }
+
+    public void markFinished(TriggerContext<W> childContext, int index) throws Exception {
+      markFinishedInChild(childContext.forChild(index), index);
+    }
+  }
+
+  /**
+   * Return a {@link SubTriggerExecutor} for executing sub-triggers in the given context and window.
+   *
+   * <p>TODO: Consider having the composite trigger always create the sub-executor and pass it down
+   * to the composite.
+   *
+   * @param c The context of the composite trigger
+   * @param window the window
+   */
+  protected SubTriggerExecutor subExecutor(TriggerContext<W> c, W window) throws IOException {
+    BitSet result = c.lookup(SUBTRIGGERS_FINISHED_SET_TAG, window);
+    if (result == null) {
+      result = new BitSet(subTriggers.size());
+    }
+    return new SubTriggerExecutor(c, window, result);
+  }
+  /**
+   * Return a {@link SubTriggerExecutor} for executing sub-triggers in the given context and window.
+   *
+   * <p>The finished states of all of the sub-triggers will be OR-ed across all of the windows. This
+   * applies the behavior that a trigger which has finished in any of the merged windows is finished
+   * in the merged window.
+   *
+   * @param c The context of the composite trigger
+   * @param windows the windows that are being merged
+   * @param outputWindow the window that the results should be written to
+   */
+  protected SubTriggerExecutor subExecutor(
+      TriggerContext<W> c, Iterable<W> windows, W outputWindow)
+      throws Exception {
+    BitSet result = new BitSet(subTriggers.size());
+    Map<W, BitSet> lookup = c.lookup(SUBTRIGGERS_FINISHED_SET_TAG, windows);
+    for (BitSet stateInWindow : lookup.values()) {
+      if (stateInWindow != null) {
+        result.or(stateInWindow);
+      }
+    }
+
+    SubTriggerExecutor subTriggerStates = new SubTriggerExecutor(c, outputWindow, result);
+
+    // Preemptively flush this since we just constructed it from the sub-windows.
+    subTriggerStates.flush();
+    return subTriggerStates;
+  }
+
+  @Override
+  public void clear(TriggerContext<W> c, W window) throws Exception {
+    // Clear all triggers (even if they were already cleared).
+    for (Trigger<W> subTrigger : subTriggers) {
+      subTrigger.clear(c, window);
+    }
+  }
+
+  @Override
+  public final TriggerResult onTimer(TriggerContext<W> c, TriggerId<W> triggerId) throws Exception {
+    if (!triggerId.isForChild()) {
+      // TODO: Modify the composite trigger interface to enforce this.
+      throw new UnsupportedOperationException("Composite triggers should not set timers.");
+    }
+
+    int childIndex = triggerId.getChildIndex();
+    SubTriggerExecutor subTriggerStates = subExecutor(c, triggerId.getWindow());
+    if (subTriggerStates.isFinished(childIndex)) {
+      // The child was already finished, so this timer doesn't do anything. There has been no change
+      // which might cause the composite to fire or change its state, so we just continue.
+      return TriggerResult.CONTINUE;
+    }
+
+    TriggerResult result = subTriggerStates.onTimer(c, childIndex, triggerId.forChildTrigger());
+    return afterChildTimer(c, triggerId.getWindow(), childIndex, result);
+  }
+
+  /**
+   * Called after a timer has been executed on a sub-trigger.
+   *
+   * @param c The context for the composite trigger.
+   * @param window The window that the timer fired in.
+   * @param childIdx The index of the child that received the timer.
+   * @param result The result of the timer firing in the child.
+   */
+  public abstract TriggerResult afterChildTimer(
+      TriggerContext<W> c, W window, int childIdx, TriggerResult result)
+      throws Exception;
+
+  /**
+   * Coder for the BitSet used to track child-trigger finished states.
+   */
+  private static class BitSetCoder extends AtomicCoder<BitSet> {
+
+    private static final long serialVersionUID = 1L;
+
+    private transient Coder<byte[]> byteArrayCoder = ByteArrayCoder.of();
+
+    @Override
+    public void encode(BitSet value, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+      byteArrayCoder.encode(value.toByteArray(), outStream, context);
+    }
+
+    @Override
+    public BitSet decode(InputStream inStream, Context context)
+        throws CoderException, IOException {
+      return BitSet.valueOf(byteArrayCoder.decode(inStream, context));
+    }
+
+    @Deprecated
+    @Override
+    public boolean isDeterministic() {
+      return byteArrayCoder.isDeterministic();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
index d2fda3fa8d132..4a1726febac7e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
@@ -23,7 +23,7 @@
  *
  * @param <W> The type of windows being triggered/encoded.
  */
-public class DefaultTrigger<W extends BoundedWindow> implements Trigger<W>{
+public class DefaultTrigger<W extends BoundedWindow> extends Trigger<W>{
 
   @Override
   public TriggerResult onElement(
@@ -44,7 +44,12 @@ public TriggerResult onMerge(
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext<W> c, W window) throws Exception {
+  public TriggerResult onTimer(TriggerContext<W> c, TriggerId<W> triggerId) throws Exception {
     return TriggerResult.FIRE;
   }
+
+  @Override
+  public void clear(TriggerContext<W> c, W window) throws Exception {
+    c.deleteTimer(window, TimeDomain.EVENT_TIME);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java
index e638212647cab..59c178709d940 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java
@@ -30,18 +30,18 @@
  *
  * @param <W> The type of windows being triggered/encoded.
  */
-public class DelayAfterFirstInPane<W extends BoundedWindow> implements Trigger<W> {
+public class DelayAfterFirstInPane<W extends BoundedWindow> extends Trigger<W> {
 
-  private static final Instant ALREADY_FIRED = BoundedWindow.TIMESTAMP_MAX_VALUE;
+  private static final CodedTupleTag<Instant> DELAYED_UNTIL_TAG =
+      CodedTupleTag.of("delayed-until", InstantCoder.of());
 
   private SerializableFunction<Instant, Instant> delayFunction;
-  private CodedTupleTag<Instant> delayedUntilTag =
-      CodedTupleTag.of("delayed-until", InstantCoder.of());
 
   /**
    * Delay after the first element in the window arrives.
    *
    * @param delayFunction Transformation to apply the current processing time to compute the delay.
+   *     It should be deterministic: a = b => delayFunction(a) = delayFunction(b)
    *     It should only move values forward: delayFunction(now) >= now
    *     It should be monotonically increasing: If a < b, then delayFunction(a) <= delayFunction(b)
    */
@@ -52,11 +52,11 @@ public DelayAfterFirstInPane(SerializableFunction<Instant, Instant> delayFunctio
   @Override
   public TriggerResult onElement(TriggerContext<W> c, Object value, W window, WindowStatus status)
       throws Exception {
-    Instant delayUntil = c.lookup(delayedUntilTag, window);
+    Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, window);
     if (delayUntil == null) {
       delayUntil = delayFunction.apply(c.currentProcessingTime());
       c.setTimer(window, delayUntil, TimeDomain.PROCESSING_TIME);
-      c.store(delayedUntilTag, window, delayUntil);
+      c.store(DELAYED_UNTIL_TAG, window, delayUntil);
     }
 
     return TriggerResult.CONTINUE;
@@ -65,34 +65,30 @@ public TriggerResult onElement(TriggerContext<W> c, Object value, W window, Wind
   @Override
   public TriggerResult onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow)
       throws Exception {
-    // We want to fire after the minimum delayed-until in the window. If that means we've already
-    // fired, we should stop.
-    Instant delayedUntil = null;
-    for (Instant oldDelayedUntil : c.lookup(delayedUntilTag, oldWindows)) {
-      if (oldDelayedUntil != null) {
-        delayedUntil = (delayedUntil != null && delayedUntil.isBefore(oldDelayedUntil))
-            ? delayedUntil : oldDelayedUntil;
+    // To have gotten here, we must not have fired in any of the oldWindows.
+    Instant earliestTimer = BoundedWindow.TIMESTAMP_MAX_VALUE;
+    for (Instant delayedUntil : c.lookup(DELAYED_UNTIL_TAG, oldWindows).values()) {
+      if (delayedUntil != null && delayedUntil.isBefore(earliestTimer)) {
+        earliestTimer = delayedUntil;
       }
     }
 
-    // Delete the old timers.
-    for (W oldWindow : oldWindows) {
-      c.deleteTimer(oldWindow, TimeDomain.PROCESSING_TIME);
-      c.remove(delayedUntilTag, oldWindow);
-    }
-
-    // Now, (re)set the timer if we need to:
-    if (delayedUntil != null && delayedUntil.isBefore(ALREADY_FIRED)) {
-      c.setTimer(newWindow, delayedUntil, TimeDomain.PROCESSING_TIME);
+    if (earliestTimer != null) {
+      c.store(DELAYED_UNTIL_TAG, newWindow, earliestTimer);
+      c.setTimer(newWindow, earliestTimer, TimeDomain.PROCESSING_TIME);
     }
-    c.store(delayedUntilTag, newWindow, delayedUntil);
 
     return TriggerResult.CONTINUE;
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext<W> c, W window) throws Exception {
-    c.store(delayedUntilTag, window, ALREADY_FIRED);
+  public TriggerResult onTimer(TriggerContext<W> c, TriggerId<W> triggerId) throws Exception {
     return TriggerResult.FIRE_AND_FINISH;
   }
+
+  @Override
+  public void clear(TriggerContext<W> c, W window) throws Exception {
+    c.remove(DELAYED_UNTIL_TAG, window);
+    c.deleteTimer(window, TimeDomain.PROCESSING_TIME);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FirstOfTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FirstOfTrigger.java
new file mode 100644
index 0000000000000..16b0cc99d144a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FirstOfTrigger.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.Trigger.TriggerResult;
+
+import java.util.List;
+
+/**
+ * Create a {@link CompositeTrigger} that fires once the first time any of its sub-triggers fire.
+ *
+ * @param <W> The type of windows this trigger operates on.
+ */
+public class FirstOfTrigger<W extends BoundedWindow> extends CompositeTrigger<W> {
+
+  public FirstOfTrigger(List<Trigger<W>> subTriggers) {
+    super(subTriggers);
+  }
+
+  @Override
+  public TriggerResult onElement(TriggerContext<W> c, Object value, W window, WindowStatus status)
+      throws Exception {
+    // If all the sub-triggers have finished, we should have already finished, so we know there is
+    // at least one unfinished trigger.
+
+    SubTriggerExecutor subStates = subExecutor(c, window);
+    for (int i : subStates.getUnfinishedTriggers()) {
+      if (subStates.onElement(c, i, value, window, status).isFire()) {
+        return TriggerResult.FIRE_AND_FINISH;
+      }
+    }
+
+    return subStates.allFinished() ? TriggerResult.FINISH : TriggerResult.CONTINUE;
+  }
+
+  @Override
+  public TriggerResult onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow)
+      throws Exception {
+    SubTriggerExecutor subStates = subExecutor(c, oldWindows, newWindow);
+    if (subStates.allFinished()) {
+      return TriggerResult.FINISH;
+    }
+
+    for (int i : subStates.getUnfinishedTriggers()) {
+      if (subStates.onMerge(c, i, oldWindows, newWindow).isFire()) {
+        return TriggerResult.FIRE_AND_FINISH;
+      }
+    }
+
+    return subStates.allFinished() ? TriggerResult.FINISH : TriggerResult.CONTINUE;
+  }
+
+  @Override
+  public TriggerResult afterChildTimer(
+      TriggerContext<W> c, W window, int childIdx, TriggerResult result) throws Exception {
+    if (result.isFire()) {
+      return TriggerResult.FIRE_AND_FINISH;
+    } else if (result.isFinish()) {
+      // If the given child finished, we may need to mark final completion if there are no more
+      // unfinished children.
+      SubTriggerExecutor subStates = subExecutor(c, window);
+      if (subStates.allFinished()) {
+        return TriggerResult.FINISH;
+      }
+    }
+
+    return TriggerResult.CONTINUE;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
index c639910a0a565..1401f808ae59b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
@@ -183,7 +183,7 @@ public void finishBundle() {
 
   private CodedTupleTag<WindowedValue<I>> getElemListTag(W window) throws IOException {
     return CodedTupleTag.<WindowedValue<I>>of(
-        "e:" + WindowUtils.windowToString(window, windowFn.windowCoder()),
+        "e:" + CoderUtils.encodeToBase64(windowFn.windowCoder(), window),
         WindowedValue.getFullCoder(elemCoder, windowFn.windowCoder()));
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
index be030f05e6676..c63f254d832e5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
@@ -22,6 +22,8 @@
 import org.joda.time.Instant;
 
 import java.io.IOException;
+import java.util.List;
+import java.util.Map;
 
 /**
  * Interface to use for controlling when output for a specific key and window is triggered.
@@ -30,7 +32,7 @@
  *
  * @param <W> the window that this trigger applies to
  */
-public interface Trigger<W extends BoundedWindow> {
+public abstract class Trigger<W extends BoundedWindow> {
 
   /**
    * Types of timers that are supported.
@@ -85,6 +87,18 @@ public boolean isFire() {
     public boolean isFinish() {
       return finish;
     }
+
+    public static TriggerResult valueOf(boolean fire, boolean finish) {
+      if (fire && finish) {
+        return FIRE_AND_FINISH;
+      } else if (fire) {
+        return FIRE;
+      } else if (finish) {
+        return FINISH;
+      } else {
+        return CONTINUE;
+      }
+    }
   }
 
   /**
@@ -129,7 +143,12 @@ public interface TriggerContext<W extends BoundedWindow>  {
     /**
      * Lookup the value stored in a bunch of windows.
      */
-    <T> Iterable<T> lookup(CodedTupleTag<T> tag, Iterable<W> windows) throws IOException;
+    <T> Map<W, T> lookup(CodedTupleTag<T> tag, Iterable<W> windows) throws IOException;
+
+    /**
+     * Create a {@code TriggerContext} for executing in the given child.
+     */
+    TriggerContext<W> forChild(int childIndex);
   }
 
   /**
@@ -139,23 +158,88 @@ public interface TriggerContext<W extends BoundedWindow>  {
    * @param value the element that was incorporated
    * @param window the window the element was assigned to
    */
-  TriggerResult onElement(
+  public abstract TriggerResult onElement(
       TriggerContext<W> c, Object value, W window, WindowStatus status) throws Exception;
 
   /**
    * Called immediately after windows have been merged.
    *
+   * <p>This will only be called if the trigger hasn't finished in any of the {@code oldWindows}.
+   * If it had finished, we assume that it is also finished in the resulting window.
+   *
+   * <p>The implementation does not need to clear out any state associated with the old windows.
+   * That will automatically be done by the trigger execution layer.
+   *
    * @param c the context to interact with
    * @param oldWindows the windows that were merged
    * @param newWindow the window that resulted from merging
    */
-  TriggerResult onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) throws Exception;
+  public abstract TriggerResult onMerge(
+      TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) throws Exception;
 
   /**
    * Called after a timer fires.
    *
    * @param c the context to interact with
-   * @param window the timer is being fired for
+   * @param triggerId identifier for the trigger that the timer is for.
+   */
+  public abstract TriggerResult onTimer(
+      TriggerContext<W> c, TriggerId<W> triggerId) throws Exception;
+
+  /**
+   * Clear any state associated with this trigger in the given window.
+   *
+   * <p>This is called after a trigger has indicated it will never fire again. The trigger system
+   * keeps enough information to know that the trigger is finished, so this trigger should clear all
+   * of its state.
+   *
+   * @param c the context to interact with
+   * @param window the window that is being cleared
    */
-  TriggerResult onTimer(TriggerContext<W> c, W window) throws Exception;
+  public abstract void clear(TriggerContext<W> c, W window) throws Exception;
+
+  /**
+   * Identifies a unique trigger instance, by the window it is in and the path through the trigger
+   * tree.
+   *
+   * @param <W> The type of windows the trigger operates in.
+   */
+  public static class TriggerId<W extends BoundedWindow> {
+    private final W window;
+    private final List<Integer> subTriggers;
+
+    TriggerId(W window, List<Integer> subTriggers) {
+      this.window = window;
+      this.subTriggers = subTriggers;
+    }
+
+    /**
+     * Return a trigger ID that is applicable for the specific child.
+     */
+    public TriggerId<W> forChildTrigger() {
+      return new TriggerId<>(window, subTriggers.subList(1, subTriggers.size()));
+    }
+
+    public W getWindow() {
+      return window;
+    }
+
+    /**
+     * Return true if this trigger ID corresponds to a child of the current trigger.
+     */
+    public boolean isForChild() {
+      return subTriggers.size() > 0;
+    }
+
+    /**
+     * Return the index of the child this trigger ID is for.
+     */
+    public int getChildIndex() {
+      return subTriggers.get(0);
+    }
+
+    public Iterable<Integer> getPath() {
+      return subTriggers;
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index b567d43f36c75..ea9f70ddd2858 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -16,26 +16,42 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PartitioningWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.Trigger.TriggerContext;
+import com.google.cloud.dataflow.sdk.util.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.util.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Functions;
+import com.google.common.base.Predicates;
+import com.google.common.collect.FluentIterable;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Maps;
 
 import org.joda.time.Instant;
 
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.LinkedHashMap;
 import java.util.List;
+import java.util.Map;
 
 /**
  * Manages the execution of a trigger.
@@ -45,15 +61,22 @@
  * @param <VO>
  * @param <W> The type of windows this operates on.
  */
-public class TriggerExecutor<K, VI, VO, W extends BoundedWindow> implements TriggerContext<W> {
+public class TriggerExecutor<K, VI, VO, W extends BoundedWindow> {
+
+  private static final CodedTupleTag<Integer> IS_ROOT_FINISHED =
+      CodedTupleTag.of("finished-root", VarIntCoder.of());
+
+  private static final Integer FINISHED = Integer.valueOf(1);
 
-  private final WindowFn<Object, W> windowFn;
   private final Trigger<W> trigger;
   private final WindowingInternals<?, KV<K, VO>> windowingInternals;
   private final AbstractWindowSet<K, VI, VO, W> windowSet;
+  private final WindowFn<Object, W> windowFn;
   private final TimerManager timerManager;
+  private final KeyedState keyedState;
   private final MergeContext mergeContext;
-  private KeyedState keyedState;
+  private final TriggerContextImpl triggerContext;
+  private final Coder<TriggerId<W>> triggerIdCoder;
 
   /**
    * Methods that the system must provide in order for us to implement triggers.
@@ -92,100 +115,135 @@ public TriggerExecutor(
     this.windowSet = windowSet;
     this.timerManager = timerManager;
     this.mergeContext = new MergeContext();
+    this.triggerContext = new TriggerContextImpl();
+    this.triggerIdCoder = new TriggerIdCoder<>(windowFn.windowCoder());
   }
 
-  public void onElement(WindowedValue<VI> value) throws Exception {
-    for (BoundedWindow window : value.getWindows()) {
-      @SuppressWarnings("unchecked")
-      W w = (W) window;
-
-      WindowStatus status = windowSet.put(w, value.getValue(), value.getTimestamp());
-
-      handleResult(w, trigger.onElement(this, value.getValue(), w, status));
-    }
+  /** Return true if the trigger is guaranteed to never finish. */
+  private boolean willNeverFinish() {
+    // TODO: Generalize willNeverFinish to other triggers.
+    return trigger instanceof DefaultTrigger;
   }
 
-  public void onTimer(String timerTag) throws Exception {
-    // Attempt to merge windows before continuing; that may remove the current window from
-    // consideration.
-    windowFn.mergeWindows(mergeContext);
-
-    W window = WindowUtils.windowFromString(timerTag, windowFn.windowCoder());
+  /**
+   * Determine if the root trigger is finished in the given window.
+   */
+  @VisibleForTesting boolean isRootFinished(W window) throws IOException {
+    return !willNeverFinish() && FINISHED.equals(triggerContext.lookup(IS_ROOT_FINISHED, window));
+  }
 
-    // Make sure the window still exists before passing the timer to the trigger.
+  /**
+   * The root is finished in a merged window if it was finished in any of the windows being merged.
+   */
+  private boolean isRootFinished(Iterable<W> windows) throws IOException {
+    if (willNeverFinish()) {
+      return false;
+    }
 
-    // The WindowSet used with PartitioningWindowFn doesn't support contains, but it will never
-    // merge windows in a way that causes the timer to no longer be applicable.
-    if ((windowFn instanceof PartitioningWindowFn) || windowSet.contains(window)) {
-      handleResult(window, trigger.onTimer(this, window));
+    for (Integer isFinished : triggerContext.lookup(IS_ROOT_FINISHED, windows).values()) {
+      if (FINISHED.equals(isFinished)) {
+        return true;
+      }
     }
+    return false;
   }
 
-  private void onMerge(Collection<W> toBeMerged, W mergeResult) throws Exception {
-    handleResult(mergeResult, trigger.onMerge(TriggerExecutor.this, toBeMerged, mergeResult));
-  }
+  /**
+   * The root is finished in a merged window if it was finished in any of the windows being merged.
+   */
+  private Map<W, Boolean> isRootFinishedInEachWindow(Iterable<W> windows) throws IOException {
+    if (willNeverFinish()) {
+      return FluentIterable.from(windows).toMap(Functions.constant(false));
+    }
 
-  @Override
-  public void setTimer(W window, Instant timestamp, TimeDomain domain) throws IOException {
-    timerManager.setTimer(
-        WindowUtils.windowToString(window, windowFn.windowCoder()),
-        timestamp,
-        domain);
+    return Maps.transformValues(
+        triggerContext.lookup(IS_ROOT_FINISHED, windows),
+        Functions.forPredicate(Predicates.equalTo(FINISHED)));
   }
 
-  @Override
-  public void deleteTimer(W window, TimeDomain domain) throws IOException {
-    timerManager.deleteTimer(
-        WindowUtils.windowToString(window, windowFn.windowCoder()),
-        domain);
-  }
+  public void onElement(WindowedValue<VI> value) throws Exception {
+    @SuppressWarnings("unchecked")
+    Collection<W> windows = (Collection<W>) value.getWindows();
 
-  private <T> CodedTupleTag<T> perWindowTag(CodedTupleTag<T> tag, W window) throws IOException {
-    return CodedTupleTag.of(
-        tag.getId() + WindowUtils.windowToString(window, windowFn.windowCoder()),
-        tag.getCoder());
-  }
+    for (Map.Entry<W, Boolean> entry : isRootFinishedInEachWindow(windows).entrySet()) {
+      if (entry.getValue()) {
+        // If the trigger was already finished in that window, don't bother passing the element down
+        continue;
+      }
 
-  @Override
-  public <T> void store(CodedTupleTag<T> tag, W window, T value) throws IOException {
-    keyedState.store(perWindowTag(tag, window), value);
-  }
+      W window = entry.getKey();
+      WindowStatus status = windowSet.put(window, value.getValue(), value.getTimestamp());
 
-  @Override
-  public <T> void remove(CodedTupleTag<T> tag, W window) throws IOException {
-    keyedState.remove(perWindowTag(tag, window));
-  }
+      handleResult(trigger, window,
+          trigger.onElement(triggerContext, value.getValue(), window, status));
 
-  @Override
-  public <T> T lookup(CodedTupleTag<T> tag, W window) throws IOException {
-    return keyedState.lookup(perWindowTag(tag, window));
+      if (WindowStatus.NEW.equals(status)) {
+        // Attempt to merge windows before continuing
+        windowFn.mergeWindows(mergeContext);
+      }
+    }
   }
 
-
-  @Override
-  public <T> Iterable<T> lookup(CodedTupleTag<T> tag, Iterable<W> windows) throws IOException {
-    List<CodedTupleTag<T>> tags = new ArrayList<>();
-    for (W window : windows) {
-      tags.add(perWindowTag(tag, window));
+  public void onTimer(String timerTag) throws Exception {
+    TriggerId<W> triggerId = CoderUtils.decodeFromBase64(triggerIdCoder, timerTag);
+    W window = triggerId.getWindow();
+
+    // If we receive a timer for an already finished trigger tree, we can ignore it. Once the
+    // trigger is finished, it has reached a terminal state, and the trigger shouldn't be allowed
+    // to do anything.
+    if (isRootFinished(window)) {
+      return;
     }
-    CodedTupleTagMap tagMap = keyedState.lookup(tags);
 
-    List<T> result = new ArrayList<>();
-    for (CodedTupleTag<T> windowTag : tags) {
-      result.add(tagMap.get(windowTag));
+    // Attempt to merge windows before continuing; that may remove the current window from
+    // consideration.
+    windowFn.mergeWindows(mergeContext);
+
+    // The WindowSet used with PartitioningWindowFn doesn't support contains, but it will never
+    // merge windows in a way that causes the timer to no longer be applicable. Otherwise, we
+    // confirm that the window is still in the windowSet.
+    if ((windowFn instanceof PartitioningWindowFn) || windowSet.contains(window)) {
+      handleResult(trigger, window, trigger.onTimer(triggerContext, triggerId));
     }
-    return result;
   }
 
-  @Override
-  public Instant currentProcessingTime() {
-    return timerManager.currentProcessingTime();
+  private void onMerge(Collection<W> toBeMerged, W mergeResult) throws Exception {
+    // If the root is finished in any of the windows to be merged, then it is finished in the result
+    // Merging cannot "wake up" a trigger. This case should never happen, because once finished the
+    // window should have been removed from the window set, so it shouldn't even be around as a
+    // source for merges. If it is, we don't bother merging and mark things finished.
+    boolean isFinished = isRootFinished(toBeMerged);
+
+    if (!isFinished) {
+      // If the root wasn't finished in any of the windows, then call the underlying merge and
+      // handle the result appropriately.
+      handleResult(trigger, mergeResult,
+          trigger.onMerge(triggerContext, toBeMerged, mergeResult));
+    }
+
+    // Before we finish, we can clean up the state associated with the trigger in the old windows
+    for (W window : toBeMerged) {
+      trigger.clear(triggerContext, window);
+      if (!willNeverFinish()) {
+        triggerContext.remove(IS_ROOT_FINISHED, window);
+      }
+    }
   }
 
-  private void handleResult(W window, TriggerResult result) throws Exception {
+  private void handleResult(Trigger<W> trigger, W window, TriggerResult result) throws Exception {
     if (result.isFire()) {
       emitWindow(window);
     }
+
+    // If the trigger is finished, we can clear out its state as long as we keep the
+    // IS_ROOT_FINISHED bit.
+    if (result.isFinish()) {
+      if (willNeverFinish()) {
+        throw new RuntimeException("Trigger that shouldn't finish finished: " + trigger);
+      }
+      triggerContext.store(IS_ROOT_FINISHED, window, FINISHED);
+      trigger.clear(triggerContext, window);
+    }
   }
 
   private void emitWindow(W window) throws Exception {
@@ -201,6 +259,11 @@ private void emitWindow(W window) throws Exception {
     windowingInternals.outputWindowedValue(value, finalValue.getTimestamp(), Arrays.asList(window));
   }
 
+  @VisibleForTesting void setTimer(TriggerId<W> triggerId, Instant timestamp, TimeDomain domain)
+      throws CoderException {
+    timerManager.setTimer(CoderUtils.encodeToBase64(triggerIdCoder, triggerId), timestamp, domain);
+  }
+
   private class MergeContext extends WindowFn<Object, W>.MergeContext {
 
     @SuppressWarnings("cast")
@@ -219,4 +282,130 @@ public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
       onMerge(toBeMerged, mergeResult);
     }
   }
+
+  private class TriggerContextImpl implements TriggerContext<W> {
+
+    private List<Integer> subTriggers;
+
+    private TriggerContextImpl() {
+      this.subTriggers = ImmutableList.of();
+    }
+
+    private TriggerContextImpl(List<Integer> subTriggers) {
+      this.subTriggers = subTriggers;
+    }
+
+    private TriggerId<W> triggerId(W window) {
+      return new TriggerId<>(window, subTriggers);
+    }
+
+    private String triggerIdTag(W window) throws CoderException {
+      return CoderUtils.encodeToBase64(triggerIdCoder, triggerId(window));
+    }
+
+    private <T> CodedTupleTag<T> codedTriggerIdTag(CodedTupleTag<T> tag, W window)
+        throws CoderException {
+      return CodedTupleTag.of(tag.getId() + "-" + triggerIdTag(window), tag.getCoder());
+    }
+
+    @Override
+    public void setTimer(W window, Instant timestamp, TimeDomain domain) throws IOException {
+      TriggerExecutor.this.setTimer(triggerId(window), timestamp, domain);
+    }
+
+    @Override
+    public void deleteTimer(W window, TimeDomain domain) throws IOException {
+      timerManager.deleteTimer(triggerIdTag(window), domain);
+    }
+
+    @Override
+    public <T> void store(CodedTupleTag<T> tag, W window, T value) throws IOException {
+      keyedState.store(codedTriggerIdTag(tag, window), value);
+    }
+
+    @Override
+    public <T> void remove(CodedTupleTag<T> tag, W window) throws IOException {
+      keyedState.remove(codedTriggerIdTag(tag, window));
+    }
+
+    @Override
+    public <T> T lookup(CodedTupleTag<T> tag, W window) throws IOException {
+      return keyedState.lookup(codedTriggerIdTag(tag, window));
+    }
+
+    @Override
+    public <T> Map<W, T> lookup(CodedTupleTag<T> tag, Iterable<W> windows) throws IOException {
+      List<CodedTupleTag<T>> tags = new ArrayList<>();
+      for (W window : windows) {
+        tags.add(codedTriggerIdTag(tag, window));
+      }
+
+      CodedTupleTagMap tagMap = keyedState.lookup(tags);
+
+      Map<W, T> result = new LinkedHashMap<>();
+      int i = 0;
+      for (W window : windows) {
+        result.put(window, tagMap.get(tags.get(i++)));
+      }
+
+      return result;
+    }
+
+    @Override
+    public Instant currentProcessingTime() {
+      return timerManager.currentProcessingTime();
+    }
+
+    @Override
+    public TriggerContext<W> forChild(int childIndex) {
+      return new TriggerContextImpl(
+          ImmutableList.<Integer>builder().addAll(subTriggers).add(childIndex).build());
+    }
+  }
+
+  /**
+   * Coder for Trigger IDs.
+   */
+  public static class TriggerIdCoder<W extends BoundedWindow> extends StandardCoder<TriggerId<W>> {
+
+    private static final long serialVersionUID = 1L;
+
+    private transient Coder<Iterable<Integer>> pathCoder = IterableCoder.of(VarIntCoder.of());
+    private final Coder<W> windowCoder;
+
+    public TriggerIdCoder(Coder<W> windowCoder) {
+      this.windowCoder = windowCoder;
+    }
+
+    @Override
+    public void encode(TriggerId<W> triggerId, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+      windowCoder.encode(triggerId.getWindow(), outStream, context);
+      pathCoder.encode(triggerId.getPath(), outStream, context);
+    }
+
+    @Override
+    public TriggerId<W> decode(InputStream inStream, Context context)
+        throws CoderException, IOException {
+      W window = windowCoder.decode(inStream, context);
+      List<Integer> path = ImmutableList.copyOf(pathCoder.decode(inStream, context));
+      return new TriggerId<>(window, path);
+    }
+
+    @Deprecated
+    @Override
+    public boolean isDeterministic() {
+      return windowCoder.isDeterministic();
+    }
+
+    @Override
+    public void verifyDeterministic() throws Coder.NonDeterministicException {
+      verifyDeterministic("TriggerIdCoder requires a deterministic windowCoder", windowCoder);
+    }
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return Arrays.asList(windowCoder);
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 711e1a7af71a1..d97274d37c839 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -17,11 +17,13 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.Trigger.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -31,7 +33,6 @@
 import com.google.common.base.Preconditions;
 import com.google.common.collect.FluentIterable;
 import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Maps;
 
 import org.joda.time.Instant;
 
@@ -42,10 +43,9 @@
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
-import java.util.LinkedHashSet;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import java.util.logging.Logger;
 
 import javax.annotation.Nullable;
@@ -110,7 +110,11 @@ public void logInteractions(boolean logInteractions) {
     this.logInteractions = logInteractions;
   }
 
-  private Iterable<WindowedValue<VO>> extractOutput() {
+  public boolean isDone(W window) throws IOException {
+    return triggerExecutor.isRootFinished(window);
+  }
+
+  public Iterable<WindowedValue<VO>> extractOutput() {
     ImmutableList<WindowedValue<VO>> result = FluentIterable.from(stubContexts.outputs)
         .transform(new Function<WindowedValue<KV<String, VO>>, WindowedValue<VO>>() {
           @Override
@@ -125,23 +129,19 @@ public WindowedValue<VO> apply(@Nullable WindowedValue<KV<String, VO>> input) {
     return result;
   }
 
-  public Iterable<WindowedValue<VO>> advanceWatermark(Instant newWatermark) throws Exception {
+  public void advanceWatermark(Instant newWatermark) throws Exception {
     Preconditions.checkState(!newWatermark.isBefore(watermark));
     logInteraction("Advancing watermark to %d", newWatermark.getMillis());
     watermark = newWatermark;
     timerManager.advanceWatermark(triggerExecutor, newWatermark);
-
-    return extractOutput();
   }
 
-  public Iterable<WindowedValue<VO>> advanceProcessingTime(
+  public void advanceProcessingTime(
       Instant newProcessingTime) throws Exception {
     Preconditions.checkState(!newProcessingTime.isBefore(processingTime));
     logInteraction("Advancing processing time to %d", newProcessingTime.getMillis());
     processingTime = newProcessingTime;
     timerManager.advanceProcessingTime(triggerExecutor, newProcessingTime);
-
-    return extractOutput();
   }
 
   public void injectElement(VI value, Instant timestamp) throws Exception {
@@ -152,6 +152,12 @@ public void injectElement(VI value, Instant timestamp) throws Exception {
     triggerExecutor.onElement(WindowedValue.of(value, timestamp, windows));
   }
 
+  public void setTimer(
+      W window, Instant timestamp, TimeDomain domain, List<Integer> subTriggerPath)
+          throws CoderException {
+    triggerExecutor.setTimer(new TriggerId<W>(window, subTriggerPath), timestamp, domain);
+  }
+
   private class StubContexts implements WindowingInternals<VI, KV<String, VO>>, DoFn.KeyedState {
 
     private Map<CodedTupleTag<?>, List<TimestampedValue<?>>> tagListValues = new HashMap<>();
@@ -235,16 +241,14 @@ public <T> T lookup(CodedTupleTag<T> tag) throws IOException {
       return value;
     }
 
+    @SuppressWarnings("unchecked")
     @Override
     public CodedTupleTagMap lookup(List<? extends CodedTupleTag<?>> tags) throws IOException {
-      Set<CodedTupleTag<?>> tagSet = new LinkedHashSet<>(tags);
-      return CodedTupleTagMap.of(Maps.asMap(tagSet, new Function<CodedTupleTag<?>, Object>() {
-        @Override
-        @Nullable
-        public Object apply(@Nullable CodedTupleTag<?> tag) {
-          return tagValues.get(tag);
-        }
-      }));
+      LinkedHashMap<CodedTupleTag<?>, Object> result = new LinkedHashMap<>();
+      for (CodedTupleTag<?> tag : tags) {
+        result.put(tag, tagValues.get(tag));
+      }
+      return CodedTupleTagMap.of(result);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowUtils.java
index ed4fb028b3580..25c3c0f104084 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowUtils.java
@@ -16,13 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import com.google.api.client.util.Base64;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 
 /**
@@ -31,25 +28,6 @@
 class WindowUtils {
   private static final String BUFFER_TAG_PREFIX = "buffer:";
 
-  /**
-   * Converts the given window to a base64-encoded String using the given coder.
-   */
-  public static <W> String windowToString(W window, Coder<W> coder) throws IOException {
-    ByteArrayOutputStream stream = new ByteArrayOutputStream();
-    coder.encode(window, stream, Coder.Context.OUTER);
-    byte[] rawWindow = stream.toByteArray();
-    return Base64.encodeBase64String(rawWindow);
-  }
-
-  /**
-   * Parses a window from a base64-encoded String using the given coder.
-   */
-  public static <W> W windowFromString(String serializedWindow, Coder<W> coder) throws IOException {
-    return coder.decode(
-        new ByteArrayInputStream(Base64.decodeBase64(serializedWindow)),
-        Coder.Context.OUTER);
-  }
-
   /**
    * Returns a tag for storing buffered data in per-key state.
    */
@@ -57,6 +35,6 @@ public static <W extends BoundedWindow, V> CodedTupleTag<V> bufferTag(
       W window, Coder<W> windowCoder, Coder<V> elemCoder)
       throws IOException {
     return CodedTupleTag.of(
-        BUFFER_TAG_PREFIX + windowToString(window, windowCoder), elemCoder);
+        BUFFER_TAG_PREFIX + CoderUtils.encodeToBase64(windowCoder, window), elemCoder);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java
index f9085f4a5611a..bba218ae3dc0f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java
@@ -67,6 +67,16 @@ public static <T> Matcher<WindowedValue<? extends T>> isSingleWindowedValue(
         valueMatcher, timestampMatcher, Matchers.contains(windowMatcher));
   }
 
+  public static Matcher<IntervalWindow> intervalWindow(long start, long end) {
+    return Matchers.equalTo(new IntervalWindow(new Instant(start), new Instant(end)));
+  }
+
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  @SafeVarargs
+  public static final <W extends BoundedWindow> Matcher<Iterable<W>> ofWindows(
+      Matcher<W>... windows) {
+    return (Matcher) Matchers.<W>containsInAnyOrder(windows);
+  }
 
   private WindowMatchers() {}
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 203d0cfd6f5d1..46fed798bf30b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -872,7 +872,7 @@ public void processElement(ProcessContext c) {
         "key: \"key\" " +
         "work_token: 0 " +
         "output_timers {" +
-        "  tag: \"gAAAAAAAAAA=\"" +
+        "  tag: \"gAAAAAAAAAAAAAAA\"" +
         "  timestamp: 999000" +
         "} " +
         "list_updates {" +
@@ -892,7 +892,7 @@ public void processElement(ProcessContext c) {
         "    work_token: 1" +
         "    timers {" +
         "      timers {" +
-        "        tag: \"gAAAAAAAAAA=\"" +
+        "        tag: \"gAAAAAAAAAAAAAAA\"" +
         "        timestamp: 999000" +
         "      }" +
         "    }" +
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java
index e11c46afd803b..f888025496ac7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java
@@ -50,19 +50,21 @@ public void testDefaultTriggerWithFixedWindow() throws Exception {
     tester.injectElement(4, new Instant(19));
     tester.injectElement(5, new Instant(30));
 
-    assertThat(tester.advanceProcessingTime(new Instant(500)), Matchers.emptyIterable());
-
-    //    // Advance the watermark almost to the end of the first window.
-    assertThat(tester.advanceWatermark(new Instant(8)), Matchers.emptyIterable());
+    // Advance the watermark almost to the end of the first window.
+    tester.advanceProcessingTime(new Instant(500));
+    tester.advanceWatermark(new Instant(8));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
     // Advance watermark to 9 (the exact end of the window), which causes the first fixed window to
     // be emitted
-    assertThat(tester.advanceWatermark(new Instant(9)), Matchers.contains(
+    tester.advanceWatermark(new Instant(9));
+    assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
 
     // Advance watermark to 100, which causes the remaining two windows to be emitted.
     // Since their timers were at different timestamps, they should fire in order.
-    assertThat(tester.advanceWatermark(new Instant(100)), Matchers.contains(
+    tester.advanceWatermark(new Instant(100));
+    assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 15, 10, 20),
         isSingleWindowedValue(Matchers.contains(5), 30, 30, 40)));
   }
@@ -78,12 +80,14 @@ public void testDefaultTriggerWithSessionWindow() throws Exception {
     tester.injectElement(2, new Instant(9));
 
     // no output, because we merged into the [9-19) session
-    assertThat(tester.advanceWatermark(new Instant(10)), Matchers.emptyIterable());
+    tester.advanceWatermark(new Instant(10));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
     tester.injectElement(3, new Instant(15));
     tester.injectElement(4, new Instant(30));
 
-    assertThat(tester.advanceWatermark(new Instant(100)), Matchers.contains(
+    tester.advanceWatermark(new Instant(100));
+    assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 1, 25),
         isSingleWindowedValue(Matchers.contains(4), 30, 30, 40)));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPaneTest.java
index 61482fb533c44..aa98b7efb0eab 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPaneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPaneTest.java
@@ -38,7 +38,7 @@
 @RunWith(JUnit4.class)
 public class DelayAfterFirstInPaneTest {
   @Test
-  public void testDefaultTriggerWithFixedWindow() throws Exception {
+  public void testDelayAfterFirstInPaneWithFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.of(
         FixedWindows.of(windowDuration),
@@ -51,28 +51,35 @@ public Instant apply(Instant input) {
         }),
         BufferingWindowSet.<String, Integer, IntervalWindow>factory(VarIntCoder.of()));
 
-    assertThat(tester.advanceProcessingTime(new Instant(10)), Matchers.emptyIterable());
+    tester.advanceProcessingTime(new Instant(10));
 
     tester.injectElement(1, new Instant(1)); // first in window [0, 10), timer set for 15
-    assertThat(tester.advanceProcessingTime(new Instant(11)), Matchers.emptyIterable());
+    tester.advanceProcessingTime(new Instant(11));
     tester.injectElement(2, new Instant(9));
 
-    assertThat(tester.advanceProcessingTime(new Instant(12)), Matchers.emptyIterable());
+    tester.advanceProcessingTime(new Instant(12));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
     tester.injectElement(3, new Instant(8));
     tester.injectElement(4, new Instant(19));
     tester.injectElement(5, new Instant(30));
 
-    assertThat(tester.advanceProcessingTime(new Instant(16)), Matchers.contains(
+    tester.advanceProcessingTime(new Instant(16));
+    assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10)));
 
-    assertThat(tester.advanceProcessingTime(new Instant(19)), Matchers.containsInAnyOrder(
+    // This element belongs in the window that has already fired. It should not be re-output because
+    // that trigger (which was one-time) has already gone off.
+    tester.injectElement(6, new Instant(2));
+
+    tester.advanceProcessingTime(new Instant(19));
+    assertThat(tester.extractOutput(), Matchers.containsInAnyOrder(
         WindowMatchers.isSingleWindowedValue(Matchers.contains(4), 19, 10, 20),
         WindowMatchers.isSingleWindowedValue(Matchers.contains(5), 30, 30, 40)));
   }
 
   @Test
-  public void testDefaultTriggerWithMergingWindowAlreadyFired() throws Exception {
+  public void testDelayAfterFirstInPaneWithMergingWindowAlreadyFired() throws Exception {
     Duration windowDuration = Duration.millis(10);
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.of(
         Sessions.withGapDuration(windowDuration),
@@ -85,16 +92,21 @@ public Instant apply(Instant input) {
         }),
         BufferingWindowSet.<String, Integer, IntervalWindow>factory(VarIntCoder.of()));
 
-    assertThat(tester.advanceProcessingTime(new Instant(10)), Matchers.emptyIterable());
+    tester.advanceProcessingTime(new Instant(10));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
     tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 15
-    assertThat(tester.advanceProcessingTime(new Instant(16)), Matchers.contains(
+
+    tester.advanceProcessingTime(new Instant(16));
+    assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.contains(1), 1, 1, 11)));
 
     // Because we discarded the previous window, we don't have it around to merge with.
     tester.injectElement(2, new Instant(2)); // in [2, 12), timer for 21
 
-    assertThat(tester.advanceProcessingTime(new Instant(100)), Matchers.contains(
+
+    tester.advanceProcessingTime(new Instant(100));
+    assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.contains(2), 2, 2, 12)));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FirstOfTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FirstOfTriggerTest.java
new file mode 100644
index 0000000000000..0ae78a8e209fd
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FirstOfTriggerTest.java
@@ -0,0 +1,216 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.WindowMatchers;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.Trigger.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.Trigger.TriggerContext;
+import com.google.cloud.dataflow.sdk.util.Trigger.TriggerId;
+import com.google.cloud.dataflow.sdk.util.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
+import com.google.common.collect.ImmutableList;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.MockitoAnnotations;
+
+import java.util.Arrays;
+
+/**
+ * Tests for {@link FirstOfTrigger}.
+ */
+@RunWith(JUnit4.class)
+public class FirstOfTriggerTest {
+
+  @Mock private Trigger<IntervalWindow> mockTrigger1;
+  @Mock private Trigger<IntervalWindow> mockTrigger2;
+  private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
+  private IntervalWindow firstWindow;
+
+  public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
+    MockitoAnnotations.initMocks(this);
+    tester = TriggerTester.of(
+        windowFn,
+        new FirstOfTrigger<>(Arrays.asList(mockTrigger1, mockTrigger2)),
+        BufferingWindowSet.<String, Integer, IntervalWindow>factory(VarIntCoder.of()));
+    firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
+  }
+
+  @SuppressWarnings("unchecked")
+  private TriggerContext<IntervalWindow> isTriggerContext() {
+    return Mockito.isA(TriggerContext.class);
+  }
+
+  private void injectElement(int element, TriggerResult result1, TriggerResult result2)
+      throws Exception {
+    if (result1 != null) {
+      when(mockTrigger1.onElement(
+          isTriggerContext(), Mockito.eq(element),
+          Mockito.any(IntervalWindow.class), Mockito.any(WindowStatus.class)))
+          .thenReturn(result1);
+    }
+    if (result2 != null) {
+      when(mockTrigger2.onElement(
+          isTriggerContext(), Mockito.eq(element),
+          Mockito.any(IntervalWindow.class), Mockito.any(WindowStatus.class)))
+          .thenReturn(result2);
+    }
+    tester.injectElement(element, new Instant(element));
+  }
+
+  @Test
+  public void testOnElementT1Fires() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    injectElement(2, TriggerResult.FIRE, TriggerResult.CONTINUE);
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertTrue(tester.isDone(firstWindow));
+  }
+
+  @Test
+  public void testOnElementT2Fires() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE);
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertTrue(tester.isDone(firstWindow));
+  }
+
+  @Test
+  public void testOnElementT1Finishes() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.FINISH, TriggerResult.CONTINUE);
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    injectElement(2, null, TriggerResult.FIRE);
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertTrue(tester.isDone(firstWindow));
+  }
+
+  @Test
+  public void testOnElementT2Finishes() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.FINISH);
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    injectElement(2, TriggerResult.FIRE, null);
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertTrue(tester.isDone(firstWindow));
+  }
+
+  @Test
+  public void testOnElementBothFinish() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.FINISH);
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    injectElement(2, TriggerResult.FINISH, null);
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertTrue(tester.isDone(firstWindow));
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testOnTimerFire() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(0));
+    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.isA(TriggerId.class)))
+        .thenReturn(TriggerResult.FIRE);
+    tester.advanceWatermark(new Instant(12));
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
+    assertTrue(tester.isDone(firstWindow));
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testOnTimerFinish() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(1));
+    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.isA(TriggerId.class)))
+        .thenReturn(TriggerResult.FINISH);
+
+    tester.advanceWatermark(new Instant(12));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+
+    injectElement(2, TriggerResult.FIRE, null);
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertTrue(tester.isDone(firstWindow));
+  }
+
+  @Test
+  public void testOnMergeFires() throws Exception {
+    setUp(Sessions.withGapDuration(Duration.millis(10)));
+    tester.logInteractions(true);
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(12, TriggerResult.FINISH, TriggerResult.CONTINUE);
+
+    when(mockTrigger2.onMerge(
+        isTriggerContext(),
+        Mockito.argThat(WindowMatchers.ofWindows(
+            WindowMatchers.intervalWindow(1, 11),
+            WindowMatchers.intervalWindow(12, 22),
+            WindowMatchers.intervalWindow(5, 15))),
+        Mockito.isA(IntervalWindow.class))).thenReturn(TriggerResult.FIRE);
+
+    // The arrival of this element should trigger merging.
+    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
+    assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
+
+    verify(mockTrigger1, Mockito.never())
+        .onMerge(
+            Mockito.<TriggerContext<IntervalWindow>>any(),
+            Mockito.<Iterable<IntervalWindow>>any(),
+            Mockito.<IntervalWindow>any());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 3ad061200a566..30cb12c1a4938 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import static com.google.cloud.dataflow.sdk.util.WindowUtils.windowToString;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 
@@ -32,6 +31,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.Trigger.TriggerId;
+import com.google.cloud.dataflow.sdk.util.TriggerExecutor.TriggerIdCoder;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -46,6 +47,7 @@
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 
 /** Unit tests for {@link StreamingGroupAlsoByWindowsDoFn}. */
@@ -81,6 +83,8 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
         makeRunner(FixedWindows.of(Duration.millis(10)));
 
     Coder<IntervalWindow> windowCoder = FixedWindows.of(Duration.millis(10)).windowCoder();
+    Coder<TriggerId<IntervalWindow>> triggerIdCoder =
+        new TriggerIdCoder<IntervalWindow>(windowCoder);
 
     runner.startBundle();
 
@@ -106,12 +110,14 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            windowToString((IntervalWindow) window(0, 10), windowCoder),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
+                (IntervalWindow) window(0, 10), Collections.<Integer>emptyList())),
             new Instant(9), "k")));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            windowToString((IntervalWindow) window(10, 20), windowCoder),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
+                (IntervalWindow) window(10, 20), Collections.<Integer>emptyList())),
             new Instant(19), "k")));
 
     runner.finishBundle();
@@ -141,6 +147,8 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
 
     Coder<IntervalWindow> windowCoder =
         SlidingWindows.of(Duration.millis(10)).every(Duration.millis(10)).windowCoder();
+    Coder<TriggerId<IntervalWindow>> triggerIdCoder =
+        new TriggerIdCoder<IntervalWindow>(windowCoder);
 
     runner.startBundle();
 
@@ -156,7 +164,8 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            windowToString((IntervalWindow) window(-10, 10), windowCoder),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
+                (IntervalWindow) window(-10, 10), Collections.<Integer>emptyList())),
             new Instant(9), "k")));
 
     runner.processElement(WindowedValue.of(
@@ -166,12 +175,14 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            windowToString((IntervalWindow) window(0, 20), windowCoder),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
+                (IntervalWindow) window(0, 20), Collections.<Integer>emptyList())),
             new Instant(19), "k")));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            windowToString((IntervalWindow) window(10, 30), windowCoder),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
+                (IntervalWindow) window(10, 30), Collections.<Integer>emptyList())),
             new Instant(29), "k")));
 
     runner.finishBundle();
@@ -207,6 +218,8 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
 
     Coder<IntervalWindow> windowCoder =
         Sessions.withGapDuration(Duration.millis(10)).windowCoder();
+    Coder<TriggerId<IntervalWindow>> triggerIdCoder =
+        new TriggerIdCoder<IntervalWindow>(windowCoder);
 
     runner.startBundle();
 
@@ -232,17 +245,20 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            windowToString((IntervalWindow) window(0, 10), windowCoder),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
+                (IntervalWindow) window(0, 10), Collections.<Integer>emptyList())),
             new Instant(9), "k")));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            windowToString((IntervalWindow) window(0, 15), windowCoder),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
+                (IntervalWindow) window(0, 15), Collections.<Integer>emptyList())),
             new Instant(14), "k")));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            windowToString((IntervalWindow) window(15, 25), windowCoder),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
+                (IntervalWindow) window(15, 25), Collections.<Integer>emptyList())),
             new Instant(24), "k")));
 
     runner.finishBundle();
@@ -306,6 +322,8 @@ public Long extractOutput(Long accumulator) {
 
     Coder<IntervalWindow> windowCoder =
         Sessions.withGapDuration(Duration.millis(10)).windowCoder();
+    Coder<TriggerId<IntervalWindow>> triggerIdCoder =
+        new TriggerIdCoder<IntervalWindow>(windowCoder);
 
     runner.startBundle();
 
@@ -333,17 +351,20 @@ public Long extractOutput(Long accumulator) {
     // and fire them as appropriate. This would essentially be the batch timer context.
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, Long>>timer(
-            windowToString((IntervalWindow) window(0, 10), windowCoder),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
+                (IntervalWindow) window(0, 10), Collections.<Integer>emptyList())),
             new Instant(9), "k")));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, Long>>timer(
-            windowToString((IntervalWindow) window(0, 15), windowCoder),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
+                (IntervalWindow) window(0, 15), Collections.<Integer>emptyList())),
             new Instant(14), "k")));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, Long>>timer(
-            windowToString((IntervalWindow) window(15, 25), windowCoder),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
+                (IntervalWindow) window(15, 25), Collections.<Integer>emptyList())),
             new Instant(24), "k")));
 
     runner.finishBundle();

From d6bfe4a17d42a61e90723cdc2d8f3bf23a39aeb0 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Sat, 4 Apr 2015 11:40:10 -0700
Subject: [PATCH 0372/1541] Add a SequenceOf trigger which starts at the front,
 and fires each time the first trigger fires, until it finishes, then moves to
 the second, etc.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90331513
---
 .../dataflow/sdk/util/FirstOfTrigger.java     |   3 +-
 .../dataflow/sdk/util/SequenceOfTrigger.java  |  76 +++++++
 .../sdk/util/SequenceOfTriggerTest.java       | 213 ++++++++++++++++++
 3 files changed, 291 insertions(+), 1 deletion(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SequenceOfTrigger.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SequenceOfTriggerTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FirstOfTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FirstOfTrigger.java
index 16b0cc99d144a..6034bb00794f9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FirstOfTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FirstOfTrigger.java
@@ -17,7 +17,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.Trigger.TriggerResult;
+import com.google.common.base.Preconditions;
 
 import java.util.List;
 
@@ -30,6 +30,7 @@ public class FirstOfTrigger<W extends BoundedWindow> extends CompositeTrigger<W>
 
   public FirstOfTrigger(List<Trigger<W>> subTriggers) {
     super(subTriggers);
+    Preconditions.checkArgument(subTriggers.size() > 1);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SequenceOfTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SequenceOfTrigger.java
new file mode 100644
index 0000000000000..78533cb5949e0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SequenceOfTrigger.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.common.base.Preconditions;
+
+import java.util.List;
+
+/**
+ * Creates a trigger that executes each trigger in sequence. Any time the current trigger fires, the
+ * sequence will fire. It moves on to the next trigger in the sequence after the current trigger
+ * finishes.
+ *
+ * @param <W> The type of windows this trigger operates on.
+ */
+public class SequenceOfTrigger<W extends BoundedWindow> extends CompositeTrigger<W> {
+
+  public SequenceOfTrigger(List<Trigger<W>> subTriggers) {
+    super(subTriggers);
+    Preconditions.checkArgument(subTriggers.size() > 1);
+  }
+
+  private TriggerResult result(TriggerResult subResult, SubTriggerExecutor subexecutor)
+      throws Exception {
+    return TriggerResult.valueOf(subResult.isFire(), subexecutor.allFinished());
+  }
+
+  @Override
+  public TriggerResult onElement(TriggerContext<W> c, Object value, W window, WindowStatus status)
+      throws Exception {
+    // If all the sub-triggers have finished, we should have already finished, so we know there is
+    // at least one unfinished trigger.
+
+    SubTriggerExecutor subexecutor = subExecutor(c, window);
+
+    // There must be at least one unfinished, because otherwise we would have finished the root.
+    int current = subexecutor.firstUnfinished();
+    return result(subexecutor.onElement(c, current, value, window, status), subexecutor);
+  }
+
+  @Override
+  public TriggerResult onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow)
+      throws Exception {
+    SubTriggerExecutor subexecutor = subExecutor(c, oldWindows, newWindow);
+
+    // There must be at least one unfinished, because otherwise we would have finished the root.
+    int current = subexecutor.firstUnfinished();
+    return result(subexecutor.onMerge(c, current, oldWindows, newWindow), subexecutor);
+  }
+
+  @Override
+  public TriggerResult afterChildTimer(
+      TriggerContext<W> c, W window, int childIdx, TriggerResult result) throws Exception {
+    SubTriggerExecutor subExecutor = subExecutor(c, window);
+    if (childIdx != subExecutor.firstUnfinished()) {
+      return TriggerResult.CONTINUE;
+    }
+
+    return result(result, subExecutor);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SequenceOfTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SequenceOfTriggerTest.java
new file mode 100644
index 0000000000000..b3665486d2b11
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SequenceOfTriggerTest.java
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.WindowMatchers;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.Trigger.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.Trigger.TriggerContext;
+import com.google.cloud.dataflow.sdk.util.Trigger.TriggerId;
+import com.google.cloud.dataflow.sdk.util.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
+import com.google.common.collect.ImmutableList;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.MockitoAnnotations;
+
+import java.util.Arrays;
+
+/**
+ * Tests for {@link SequenceOfTrigger}.
+ */
+@RunWith(JUnit4.class)
+public class SequenceOfTriggerTest {
+
+  @Mock private Trigger<IntervalWindow> mockTrigger1;
+  @Mock private Trigger<IntervalWindow> mockTrigger2;
+  private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
+  private IntervalWindow firstWindow;
+
+  public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
+    MockitoAnnotations.initMocks(this);
+    tester = TriggerTester.of(
+        windowFn,
+        new SequenceOfTrigger<>(Arrays.asList(mockTrigger1, mockTrigger2)),
+        BufferingWindowSet.<String, Integer, IntervalWindow>factory(VarIntCoder.of()));
+    firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
+  }
+
+  @SuppressWarnings("unchecked")
+  private TriggerContext<IntervalWindow> isTriggerContext() {
+    return Mockito.isA(TriggerContext.class);
+  }
+
+  private void injectElement(int element, TriggerResult result1, TriggerResult result2)
+      throws Exception {
+    if (result1 != null) {
+      when(mockTrigger1.onElement(
+          isTriggerContext(), Mockito.eq(element),
+          Mockito.any(IntervalWindow.class), Mockito.any(WindowStatus.class)))
+          .thenReturn(result1);
+    }
+    if (result2 != null) {
+      when(mockTrigger2.onElement(
+          isTriggerContext(), Mockito.eq(element),
+          Mockito.any(IntervalWindow.class), Mockito.any(WindowStatus.class)))
+          .thenReturn(result2);
+    }
+    tester.injectElement(element, new Instant(element));
+  }
+
+  @Test
+  public void testOnElementT1Fires() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, null);
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+
+    injectElement(2, TriggerResult.FIRE, null);
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+
+    injectElement(3, TriggerResult.FIRE_AND_FINISH, null);
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
+
+    injectElement(4, null, TriggerResult.FIRE);
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(4), 4, 0, 10)));
+    injectElement(5, null, TriggerResult.FINISH);
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertTrue(tester.isDone(firstWindow));
+  }
+
+  @Test
+  public void testOnElementT2Fires() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.FIRE);
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertTrue(!tester.isDone(firstWindow));
+  }
+
+  @Test
+  public void testOnElementT1Finishes() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.FINISH, TriggerResult.CONTINUE);
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    injectElement(2, null, TriggerResult.FIRE_AND_FINISH);
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertTrue(tester.isDone(firstWindow));
+  }
+
+  @Test
+  public void testOnElementBothFinish() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.FINISH, null);
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    injectElement(2, null, TriggerResult.FINISH);
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertTrue(tester.isDone(firstWindow));
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testOnTimerFire() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, null);
+
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(0));
+    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.isA(TriggerId.class)))
+        .thenReturn(TriggerResult.FIRE);
+    tester.advanceWatermark(new Instant(12));
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
+    assertFalse("Should still be waiting for the second trigger.", tester.isDone(firstWindow));
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testOnTimerFinish() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, null);
+
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(0));
+    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.isA(TriggerId.class)))
+        .thenReturn(TriggerResult.FINISH);
+
+    tester.advanceWatermark(new Instant(12));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+
+    injectElement(2, null, TriggerResult.FIRE_AND_FINISH);
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertTrue(tester.isDone(firstWindow));
+  }
+
+  @Test
+  public void testOnMergeFires() throws Exception {
+    setUp(Sessions.withGapDuration(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(12, TriggerResult.FINISH, TriggerResult.CONTINUE);
+
+    when(mockTrigger2.onMerge(
+        isTriggerContext(),
+        Mockito.argThat(WindowMatchers.ofWindows(
+            WindowMatchers.intervalWindow(1, 11),
+            WindowMatchers.intervalWindow(12, 22),
+            WindowMatchers.intervalWindow(5, 15))),
+        Mockito.isA(IntervalWindow.class))).thenReturn(TriggerResult.FIRE_AND_FINISH);
+
+    // The arrival of this element should trigger merging.
+    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
+    assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
+
+    verify(mockTrigger1, Mockito.never())
+        .onMerge(
+            Mockito.<TriggerContext<IntervalWindow>>any(),
+            Mockito.<Iterable<IntervalWindow>>any(),
+            Mockito.<IntervalWindow>any());
+  }
+}

From 922880224e079fd45db06e0c592d2b2e001a33ff Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Sat, 4 Apr 2015 14:36:33 -0700
Subject: [PATCH 0373/1541] Add a Repeatedly and RepeatedlyUntil trigger.

Update the willNeverFinish logic to allow the actual trigger to decide
if it finishes.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90337001
---
 .../dataflow/sdk/util/CompositeTrigger.java   |  18 +
 .../dataflow/sdk/util/DefaultTrigger.java     |   5 +
 .../cloud/dataflow/sdk/util/Repeatedly.java   | 161 +++++++++
 .../cloud/dataflow/sdk/util/Trigger.java      |   7 +
 .../dataflow/sdk/util/TriggerExecutor.java    |  21 +-
 .../dataflow/sdk/util/RepeatedlyTest.java     | 337 ++++++++++++++++++
 6 files changed, 539 insertions(+), 10 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Repeatedly.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RepeatedlyTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CompositeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CompositeTrigger.java
index 8df7784c7bc83..45ca6af1697fb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CompositeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CompositeTrigger.java
@@ -127,6 +127,24 @@ public void clear(TriggerContext<W> compositeContext, int index, W window)
       subTriggers.get(index).clear(compositeContext.forChild(index), window);
     }
 
+    /**
+     * Mark the sub-trigger at {@code index} as never-started. If the sub-trigger wasn't finished,
+     * clears any associated state.
+     *
+     * @param compositeContext the context that the parent trigger was executing in.
+     * @param index the index of the sub-trigger to affect.
+     * @param window the window that the trigger is operating in.
+     */
+    public void reset(TriggerContext<W> compositeContext, int index, W window) throws Exception {
+      // If it wasn't finished, the trigger may have state associated with it. Clear that up.
+      if (!isFinished.get(index)) {
+        subTriggers.get(index).clear(compositeContext.forChild(index), window);
+      }
+
+      isFinished.clear(index);
+      flush();
+    }
+
     public boolean isFinished(int index) {
       return isFinished.get(index);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
index 4a1726febac7e..c16a1fa648f2d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
@@ -52,4 +52,9 @@ public TriggerResult onTimer(TriggerContext<W> c, TriggerId<W> triggerId) throws
   public void clear(TriggerContext<W> c, W window) throws Exception {
     c.deleteTimer(window, TimeDomain.EVENT_TIME);
   }
+
+  @Override
+  public boolean willNeverFinish() {
+    return true;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Repeatedly.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Repeatedly.java
new file mode 100644
index 0000000000000..5d24f4b95ef31
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Repeatedly.java
@@ -0,0 +1,161 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+
+import java.util.Arrays;
+
+/**
+ * Repeat a trigger, either until some condition is met or forever.
+ *
+ * <p>For example, to fire after the end of the window, and every time late data arrives:
+ * <pre> {@code
+ * Repeatedly.forever(WhenWatermark.isPastEndOfWindow());
+ * } </pre>
+ *
+ * <p>{@code Repeatedly.forever(someTrigger)} behaves like the infinite
+ * {@code SequenceOf(someTrigger, someTrigger, someTrigger, ...)}.
+ *
+ * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
+ * {@code Trigger}
+ */
+public class Repeatedly<W extends BoundedWindow> extends Trigger<W> {
+
+  private Trigger<W> repeated;
+
+  /**
+   * Create a composite trigger that repeatedly executes the trigger {@code toRepeat}, firing each
+   * time it fires and ignoring any indications to finish.
+   *
+   * <p>Unless used with {@link #until} the composite trigger will never finish.
+   *
+   * @param repeated the trigger to execute repeatedly.
+   */
+  public static <W extends BoundedWindow> Repeatedly<W> forever(Trigger<W> repeated) {
+    return new Repeatedly<W>(repeated);
+  }
+
+  private Repeatedly(Trigger<W> repeated) {
+    this.repeated = repeated;
+  }
+
+  /**
+   * Specify an ending condition for this {@code Repeated} trigger. When {@code until} fires the
+   * composite trigger will finish.
+   *
+   * <p>If {@code until} finishes before firing we stop executing it and the {@code Repeated}
+   * trigger will never finish.
+   *
+   * @param until the trigger that will fire when we should stop repeating.
+   */
+  public RepeatedlyUntil<W> until(Trigger<W> until) {
+    return new RepeatedlyUntil<W>(repeated, until);
+  }
+
+  private TriggerResult wrap(TriggerResult result) {
+    return result.isFire() ? TriggerResult.FIRE : TriggerResult.CONTINUE;
+  }
+
+  @Override
+  public TriggerResult onElement(
+      TriggerContext<W> c, Object value, W window, Trigger.WindowStatus status) throws Exception {
+    return wrap(repeated.onElement(c, value, window, status));
+  }
+
+  @Override
+  public TriggerResult onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow)
+      throws Exception {
+    return wrap(repeated.onMerge(c, oldWindows, newWindow));
+  }
+
+  @Override
+  public TriggerResult onTimer(TriggerContext<W> c, TriggerId<W> triggerId) throws Exception {
+    return wrap(repeated.onTimer(c, triggerId));
+  }
+
+  @Override
+  public void clear(TriggerContext<W> c, W window) throws Exception {
+    repeated.clear(c, window);
+  }
+
+  @Override
+  public boolean willNeverFinish() {
+    return true;
+  }
+
+  /**
+   * Repeats the given trigger forever, until the "until" trigger fires.
+   */
+  public static class RepeatedlyUntil<W extends BoundedWindow> extends CompositeTrigger<W> {
+
+    private RepeatedlyUntil(Trigger<W> repeat, Trigger<W> until) {
+      super(Arrays.asList(repeat, until));
+    }
+
+    private TriggerResult handleResult(
+        TriggerContext<W> c, SubTriggerExecutor subExecutor, W window,
+        TriggerResult repeated, TriggerResult until) throws Exception {
+      if (repeated.isFinish() && !until.isFire()) {
+        subExecutor.reset(c, 0, window);
+      }
+
+      return TriggerResult.valueOf(repeated.isFire(), until.isFire());
+    }
+
+    @Override
+    public TriggerResult onElement(TriggerContext<W> c, Object value, W window, WindowStatus status)
+        throws Exception {
+      SubTriggerExecutor subExecutor = subExecutor(c, window);
+
+      TriggerResult until = subExecutor.isFinished(1)
+          ? TriggerResult.CONTINUE // if we already finished the until, treat it like Never Stop
+          : subExecutor.onElement(c, 1, value, window, status);
+      return handleResult(c, subExecutor, window,
+          subExecutor.onElement(c, 0, value, window, status), until);
+    }
+
+    @Override
+    public TriggerResult onMerge(
+        TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) throws Exception {
+      SubTriggerExecutor subExecutor = subExecutor(c, oldWindows, newWindow);
+
+      TriggerResult until = subExecutor.isFinished(1)
+          ? TriggerResult.CONTINUE // if we already finished the until, treat it like Never Stop
+          : subExecutor.onMerge(c, 1, oldWindows, newWindow);
+
+      // Even if the merged until says fire, we should still evaluate (and maybe fire) from the
+      // merging of the repeated trigger.
+      return handleResult(c, subExecutor, newWindow,
+          subExecutor.onMerge(c, 0, oldWindows, newWindow), until);
+    }
+
+    @Override
+    public TriggerResult afterChildTimer(
+        TriggerContext<W> c, W window, int childIdx, TriggerResult result) throws Exception {
+      if (childIdx == 0) {
+        // If the first trigger finishes, we need to reset it
+        if (result.isFinish()) {
+          subExecutor(c, window).reset(c, 0, window);
+        }
+        return result.isFire() ? TriggerResult.FIRE : TriggerResult.CONTINUE;
+      } else {
+        return result.isFire() ? TriggerResult.FINISH : TriggerResult.CONTINUE;
+      }
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
index c63f254d832e5..c0092c59b058a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
@@ -198,6 +198,13 @@ public abstract TriggerResult onTimer(
    */
   public abstract void clear(TriggerContext<W> c, W window) throws Exception;
 
+  /**
+   * Return true if the trigger is guaranteed to never finish.
+   */
+  public boolean willNeverFinish() {
+    return false;
+  }
+
   /**
    * Identifies a unique trigger instance, by the window it is in and the path through the trigger
    * tree.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index ea9f70ddd2858..7b8f9b96656e4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -77,6 +77,7 @@ public class TriggerExecutor<K, VI, VO, W extends BoundedWindow> {
   private final MergeContext mergeContext;
   private final TriggerContextImpl triggerContext;
   private final Coder<TriggerId<W>> triggerIdCoder;
+  private final boolean willNeverFinish;
 
   /**
    * Methods that the system must provide in order for us to implement triggers.
@@ -117,26 +118,23 @@ public TriggerExecutor(
     this.mergeContext = new MergeContext();
     this.triggerContext = new TriggerContextImpl();
     this.triggerIdCoder = new TriggerIdCoder<>(windowFn.windowCoder());
-  }
 
-  /** Return true if the trigger is guaranteed to never finish. */
-  private boolean willNeverFinish() {
-    // TODO: Generalize willNeverFinish to other triggers.
-    return trigger instanceof DefaultTrigger;
+    this.willNeverFinish = trigger.willNeverFinish();
   }
 
   /**
    * Determine if the root trigger is finished in the given window.
    */
   @VisibleForTesting boolean isRootFinished(W window) throws IOException {
-    return !willNeverFinish() && FINISHED.equals(triggerContext.lookup(IS_ROOT_FINISHED, window));
+    return !willNeverFinish
+        && FINISHED.equals(triggerContext.lookup(IS_ROOT_FINISHED, window));
   }
 
   /**
    * The root is finished in a merged window if it was finished in any of the windows being merged.
    */
   private boolean isRootFinished(Iterable<W> windows) throws IOException {
-    if (willNeverFinish()) {
+    if (willNeverFinish) {
       return false;
     }
 
@@ -152,7 +150,7 @@ private boolean isRootFinished(Iterable<W> windows) throws IOException {
    * The root is finished in a merged window if it was finished in any of the windows being merged.
    */
   private Map<W, Boolean> isRootFinishedInEachWindow(Iterable<W> windows) throws IOException {
-    if (willNeverFinish()) {
+    if (willNeverFinish) {
       return FluentIterable.from(windows).toMap(Functions.constant(false));
     }
 
@@ -219,12 +217,15 @@ private void onMerge(Collection<W> toBeMerged, W mergeResult) throws Exception {
       // handle the result appropriately.
       handleResult(trigger, mergeResult,
           trigger.onMerge(triggerContext, toBeMerged, mergeResult));
+    } else {
+      // Otherwise, act like we were just told to finish in the resulting window.
+      handleResult(trigger, mergeResult, TriggerResult.FINISH);
     }
 
     // Before we finish, we can clean up the state associated with the trigger in the old windows
     for (W window : toBeMerged) {
       trigger.clear(triggerContext, window);
-      if (!willNeverFinish()) {
+      if (!willNeverFinish) {
         triggerContext.remove(IS_ROOT_FINISHED, window);
       }
     }
@@ -238,7 +239,7 @@ private void handleResult(Trigger<W> trigger, W window, TriggerResult result) th
     // If the trigger is finished, we can clear out its state as long as we keep the
     // IS_ROOT_FINISHED bit.
     if (result.isFinish()) {
-      if (willNeverFinish()) {
+      if (willNeverFinish) {
         throw new RuntimeException("Trigger that shouldn't finish finished: " + trigger);
       }
       triggerContext.store(IS_ROOT_FINISHED, window, FINISHED);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RepeatedlyTest.java
new file mode 100644
index 0000000000000..ff1cec1e5816e
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RepeatedlyTest.java
@@ -0,0 +1,337 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.WindowMatchers;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.Trigger.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.Trigger.TriggerContext;
+import com.google.cloud.dataflow.sdk.util.Trigger.TriggerId;
+import com.google.cloud.dataflow.sdk.util.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
+import com.google.common.collect.ImmutableList;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.MockitoAnnotations;
+
+/**
+ * Tests for {@link Repeatedly}.
+ */
+@RunWith(JUnit4.class)
+public class RepeatedlyTest {
+  @Mock private Trigger<IntervalWindow> mockTrigger1;
+  @Mock private Trigger<IntervalWindow> mockTrigger2;
+  private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
+  private IntervalWindow firstWindow;
+
+  public void setUp(WindowFn<?, IntervalWindow> windowFn, boolean until) throws Exception {
+    MockitoAnnotations.initMocks(this);
+    Trigger<IntervalWindow> underTest = until
+        ? Repeatedly.forever(mockTrigger1).until(mockTrigger2)
+        : Repeatedly.forever(mockTrigger1);
+
+        tester = TriggerTester.of(windowFn, underTest,
+            BufferingWindowSet.<String, Integer, IntervalWindow>factory(VarIntCoder.of()));
+        firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
+  }
+
+  @SuppressWarnings("unchecked")
+  private TriggerContext<IntervalWindow> isTriggerContext() {
+    return Mockito.isA(TriggerContext.class);
+  }
+
+  private void injectElement(int element, TriggerResult result1, TriggerResult result2)
+      throws Exception {
+    if (result1 != null) {
+      when(mockTrigger1.onElement(
+          isTriggerContext(), Mockito.eq(element),
+          Mockito.any(IntervalWindow.class), Mockito.any(WindowStatus.class)))
+          .thenReturn(result1);
+    }
+    if (result2 != null) {
+      when(mockTrigger2.onElement(
+          isTriggerContext(), Mockito.eq(element),
+          Mockito.any(IntervalWindow.class), Mockito.any(WindowStatus.class)))
+          .thenReturn(result2);
+    }
+    tester.injectElement(element, new Instant(element));
+  }
+
+  @Test
+  public void testOnElementNoUntil() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)), false);
+
+    injectElement(1, TriggerResult.CONTINUE, null);
+    injectElement(2, TriggerResult.FIRE_AND_FINISH, null);
+    injectElement(3, TriggerResult.FIRE_AND_FINISH, null);
+    injectElement(4, TriggerResult.FINISH, null);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
+    assertFalse(tester.isDone(firstWindow));
+  }
+
+  @Test
+  public void testOnElementUntilFires() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)), true);
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(2, TriggerResult.FIRE_AND_FINISH, TriggerResult.FIRE);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertTrue(tester.isDone(firstWindow));
+  }
+
+  @Test
+  public void testOnElementUntilFinishes() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)), true);
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(2, TriggerResult.FIRE_AND_FINISH, TriggerResult.FINISH);
+    injectElement(3, TriggerResult.FIRE,
+        // until is already finished, so this shouldn't be called, and shouldn't finish the repeat.
+        TriggerResult.FIRE);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
+    assertFalse(tester.isDone(firstWindow));
+  }
+
+  @Test
+  public void testOnElementTimerFiresWithoutUntil() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)), false);
+
+    injectElement(1, TriggerResult.CONTINUE, null);
+
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(0));
+    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<TriggerId<IntervalWindow>>any()))
+        .thenReturn(TriggerResult.FIRE);
+    tester.advanceWatermark(new Instant(12));
+
+    injectElement(2, TriggerResult.CONTINUE, null);
+
+    tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, ImmutableList.of(0));
+    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<TriggerId<IntervalWindow>>any()))
+        .thenReturn(TriggerResult.FIRE_AND_FINISH);
+    tester.advanceWatermark(new Instant(13));
+
+    injectElement(3, TriggerResult.CONTINUE, null);
+
+    tester.setTimer(firstWindow, new Instant(13), TimeDomain.EVENT_TIME, ImmutableList.of(0));
+    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<TriggerId<IntervalWindow>>any()))
+        .thenReturn(TriggerResult.FINISH);
+    tester.advanceWatermark(new Instant(14));
+
+    injectElement(4, TriggerResult.CONTINUE, null);
+
+    tester.setTimer(firstWindow, new Instant(14), TimeDomain.EVENT_TIME, ImmutableList.of(0));
+    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<TriggerId<IntervalWindow>>any()))
+        .thenReturn(TriggerResult.FIRE);
+    tester.advanceWatermark(new Instant(15));
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(2), 2, 0, 10),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 3, 0, 10)));
+    assertFalse(tester.isDone(firstWindow));
+  }
+
+  @Test
+  public void testOnElementTimerFiresWithUntil() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)), true);
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    // Timer fires for until, which says continue
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(1));
+    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<TriggerId<IntervalWindow>>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    tester.advanceWatermark(new Instant(12));
+
+    injectElement(2, TriggerResult.FIRE, TriggerResult.CONTINUE);
+
+    // Timer fires for until, which says fire, so we stop repeating.
+    tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, ImmutableList.of(1));
+    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<TriggerId<IntervalWindow>>any()))
+        .thenReturn(TriggerResult.FIRE);
+    tester.advanceWatermark(new Instant(13));
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertTrue(tester.isDone(firstWindow));
+  }
+
+  @Test
+  public void testOnElementTimerFinishesUntil() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)), true);
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    // Timer fires for until, which says continue
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(1));
+    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<TriggerId<IntervalWindow>>any()))
+        .thenReturn(TriggerResult.FINISH);
+    tester.advanceWatermark(new Instant(12));
+
+    injectElement(2, TriggerResult.FIRE, TriggerResult.CONTINUE);
+
+    // Timer fires for until, which says finish, so we stop paying attention to it.
+    tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, ImmutableList.of(1));
+    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<TriggerId<IntervalWindow>>any()))
+        .thenReturn(TriggerResult.FIRE);
+    tester.advanceWatermark(new Instant(13));
+
+    // This timer for the until shouldn't do anything
+    tester.setTimer(firstWindow, new Instant(13), TimeDomain.EVENT_TIME, ImmutableList.of(1));
+    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<TriggerId<IntervalWindow>>any()))
+        .thenReturn(TriggerResult.FINISH);
+    tester.advanceWatermark(new Instant(14));
+
+    // But we should be able to fire trigger 1 still
+    injectElement(3, TriggerResult.CONTINUE, null);
+    tester.setTimer(firstWindow, new Instant(14), TimeDomain.EVENT_TIME, ImmutableList.of(0));
+    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<TriggerId<IntervalWindow>>any()))
+        .thenReturn(TriggerResult.FIRE);
+    tester.advanceWatermark(new Instant(15));
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
+    assertFalse(tester.isDone(firstWindow));
+  }
+
+  @Test
+  public void testMergeWithoutUntil() throws Exception {
+    setUp(Sessions.withGapDuration(Duration.millis(10)), false);
+
+    injectElement(1, TriggerResult.CONTINUE, null);
+    injectElement(12, TriggerResult.FINISH, null);
+
+    when(mockTrigger1.onMerge(
+        isTriggerContext(),
+        Mockito.argThat(WindowMatchers.ofWindows(
+            WindowMatchers.intervalWindow(1, 11),
+            WindowMatchers.intervalWindow(12, 22),
+            WindowMatchers.intervalWindow(5, 15))),
+            Mockito.isA(IntervalWindow.class))).thenReturn(TriggerResult.FIRE_AND_FINISH);
+
+    // The arrival of this element should trigger merging.
+    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
+    assertFalse(tester.isDone(firstWindow));
+  }
+
+  @Test
+  public void testMergeUntilFires() throws Exception {
+    setUp(Sessions.withGapDuration(Duration.millis(10)), true);
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(12, TriggerResult.FINISH, TriggerResult.CONTINUE);
+
+    when(mockTrigger1.onMerge(
+        isTriggerContext(),
+        Mockito.argThat(WindowMatchers.ofWindows(
+            WindowMatchers.intervalWindow(1, 11),
+            WindowMatchers.intervalWindow(12, 22),
+            WindowMatchers.intervalWindow(5, 15))),
+            Mockito.isA(IntervalWindow.class))).thenReturn(TriggerResult.FIRE_AND_FINISH);
+
+    when(mockTrigger2.onMerge(
+        isTriggerContext(),
+        Mockito.argThat(WindowMatchers.ofWindows(
+            WindowMatchers.intervalWindow(1, 11),
+            WindowMatchers.intervalWindow(12, 22),
+            WindowMatchers.intervalWindow(5, 15))),
+            Mockito.isA(IntervalWindow.class))).thenReturn(TriggerResult.FIRE);
+
+    // The arrival of this element should trigger merging.
+    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
+    // the until fired during the merge
+    assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
+  }
+
+  @Test
+  public void testMergeRepeatUntilFinished() throws Exception {
+    setUp(Sessions.withGapDuration(Duration.millis(10)), true);
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(12, TriggerResult.FINISH, TriggerResult.FINISH);
+    assertFalse(tester.isDone(new IntervalWindow(new Instant(1), new Instant(11))));
+    assertFalse(tester.isDone(new IntervalWindow(new Instant(12), new Instant(22))));
+
+    when(mockTrigger1.onMerge(
+        isTriggerContext(),
+        Mockito.argThat(WindowMatchers.ofWindows(
+            WindowMatchers.intervalWindow(1, 11),
+            WindowMatchers.intervalWindow(12, 22),
+            WindowMatchers.intervalWindow(5, 15))),
+            Mockito.isA(IntervalWindow.class))).thenReturn(TriggerResult.FIRE_AND_FINISH);
+
+    // The arrival of this element should trigger merging.
+    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
+    assertFalse(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
+  }
+
+  @Test
+  public void testMergeRepeatHadUntilFired() throws Exception {
+    setUp(Sessions.withGapDuration(Duration.millis(10)), true);
+    tester.logInteractions(true);
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(12, TriggerResult.FINISH, TriggerResult.FIRE);
+    assertFalse(tester.isDone(new IntervalWindow(new Instant(1), new Instant(11))));
+    assertTrue(tester.isDone(new IntervalWindow(new Instant(12), new Instant(22))));
+
+    // The arrival of this element would cause a merge (see above), but since we mark this finished
+    // in the window from [5, 15), when we merge, the TriggerExecutor finishes things for us.
+    injectElement(5, TriggerResult.CONTINUE, TriggerResult.FIRE);
+
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+
+    assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
+    // And we should have lost the old isFinished bits
+    assertFalse(tester.isDone(new IntervalWindow(new Instant(12), new Instant(22))));
+  }
+}

From e9b9ce7ccd782bfb5a9b9c66a03cd484cb0bc3cd Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Sat, 4 Apr 2015 15:05:21 -0700
Subject: [PATCH 0374/1541] Verify the set of data stored in key state by
 various triggers.

When a trigger indicates it is finished, remove the window from the
window set.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90337780
---
 .../dataflow/sdk/util/BufferingWindowSet.java |  4 ++
 .../dataflow/sdk/util/CompositeTrigger.java   |  9 ++--
 .../dataflow/sdk/util/TriggerExecutor.java    |  8 +--
 .../dataflow/sdk/util/TriggerTester.java      | 50 +++++++++++++++++++
 .../dataflow/sdk/util/DefaultTriggerTest.java |  6 +++
 .../dataflow/sdk/util/FirstOfTriggerTest.java |  9 ++++
 .../dataflow/sdk/util/RepeatedlyTest.java     | 36 ++++++++++---
 .../sdk/util/SequenceOfTriggerTest.java       | 12 ++++-
 8 files changed, 120 insertions(+), 14 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
index 0f1902452c70d..a79cac703b1bd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
@@ -116,6 +116,10 @@ public WindowStatus put(W window, V value, Instant timestamp) throws Exception {
   @Override
   public void remove(W window) throws Exception {
     Set<W> subWindows = mergeTree.get(window);
+    if (subWindows == null) {
+      return;
+    }
+
     for (W w : subWindows) {
       windowingInternals.deleteTagList(bufferTag(w, windowCoder, inputCoder));
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CompositeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CompositeTrigger.java
index 45ca6af1697fb..8b126bf2604fa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CompositeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CompositeTrigger.java
@@ -85,7 +85,7 @@ public int firstUnfinished() {
       return isFinished.nextClearBit(0);
     }
 
-    private TriggerResult handleResult(
+    private TriggerResult handleChildResult(
         TriggerContext<W> childContext, int index, TriggerResult result) throws Exception {
       if (result.isFinish()) {
         markFinishedInChild(childContext, index);
@@ -103,14 +103,14 @@ public TriggerResult onElement(
 
       TriggerContext<W> childContext = compositeContext.forChild(index);
       Trigger<W> subTrigger = subTriggers.get(index);
-      return handleResult(
+      return handleChildResult(
           childContext, index, subTrigger.onElement(childContext, value, window, status));
     }
 
     public TriggerResult onTimer(
         TriggerContext<W> compositeContext, int index, TriggerId<W> triggerId) throws Exception {
       TriggerContext<W> childContext = compositeContext.forChild(index);
-      return handleResult(
+      return handleChildResult(
           childContext, index, subTriggers.get(index).onTimer(childContext, triggerId));
     }
 
@@ -118,7 +118,7 @@ public TriggerResult onMerge(
         TriggerContext<W> compositeContext, int index, Iterable<W> oldWindows, W newWindow)
         throws Exception {
       TriggerContext<W> childContext = compositeContext.forChild(index);
-      return handleResult(
+      return handleChildResult(
           childContext, index, subTriggers.get(index).onMerge(childContext, oldWindows, newWindow));
     }
 
@@ -211,6 +211,7 @@ public void clear(TriggerContext<W> c, W window) throws Exception {
     for (Trigger<W> subTrigger : subTriggers) {
       subTrigger.clear(c, window);
     }
+    c.remove(SUBTRIGGERS_FINISHED_SET_TAG, window);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 7b8f9b96656e4..c104d14715dbb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -236,6 +236,11 @@ private void handleResult(Trigger<W> trigger, W window, TriggerResult result) th
       emitWindow(window);
     }
 
+    if (result.isFire() || result.isFinish()) {
+      // Remove the window from management (assume it is "done")
+      windowSet.remove(window);
+    }
+
     // If the trigger is finished, we can clear out its state as long as we keep the
     // IS_ROOT_FINISHED bit.
     if (result.isFinish()) {
@@ -253,9 +258,6 @@ private void emitWindow(W window) throws Exception {
     // Emit the (current) final values for the window
     KV<K, VO> value = KV.of(windowSet.getKey(), finalValue.getValue());
 
-    // Remove the window from management (assume it is "done")
-    windowSet.remove(window);
-
     // Output the windowed value.
     windowingInternals.outputWindowedValue(value, finalValue.getTimestamp(), Arrays.asList(window));
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index d97274d37c839..21b98f78df05f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -18,12 +18,14 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.Trigger.TriggerId;
+import com.google.cloud.dataflow.sdk.util.TriggerExecutor.TriggerIdCoder;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -78,6 +80,8 @@ public class TriggerTester<VI, VO, W extends BoundedWindow> {
 
   private boolean logInteractions = false;
 
+  private TriggerIdCoder<W> triggerIdCoder;
+
   private void logInteraction(String fmt, Object... args) {
     if (logInteractions) {
       LOGGER.warning("Trigger Interaction: " + String.format(fmt, args));
@@ -104,6 +108,7 @@ private TriggerTester(
         KEY, windowFn.windowCoder(), stubContexts, stubContexts);
     this.triggerExecutor = new TriggerExecutor<>(
         windowFn, timerManager, trigger, stubContexts, stubContexts, windowSet);
+    this.triggerIdCoder = new TriggerIdCoder<W>(windowFn.windowCoder());
   }
 
   public void logInteractions(boolean logInteractions) {
@@ -114,6 +119,35 @@ public boolean isDone(W window) throws IOException {
     return triggerExecutor.isRootFinished(window);
   }
 
+  /**
+   * Retrieve the tags of keyed state that is currently stored.
+   */
+  public Iterable<String> getKeyedStateInUse() {
+    return stubContexts.getKeyedStateInUse();
+  }
+
+  // TODO: Share the tag-mangling code with the TriggerExecutor.
+  public String rootFinished(W window) throws CoderException {
+    return "finished-root-"
+        + CoderUtils.encodeToBase64(triggerIdCoder,
+            new TriggerId<W>(window, Collections.<Integer>emptyList()));
+  }
+
+  public String subFinished(W window, Integer... path) throws CoderException {
+    List<Integer> pathList = new ArrayList<Integer>();
+    Collections.addAll(pathList, path);
+    return "finished-"
+        + CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<W>(window, pathList));
+  }
+
+  public String bufferTag(W window) throws IOException {
+    // We only care about the resulting tag ID, so we don't care about getting the type right.
+    return WindowUtils.bufferTag(window, windowFn.windowCoder(), VoidCoder.of()).getId();
+  }
+
+  /**
+   * Retrieve the values that have been output to this time, and clear out the output accumulator.
+   */
   public Iterable<WindowedValue<VO>> extractOutput() {
     ImmutableList<WindowedValue<VO>> result = FluentIterable.from(stubContexts.outputs)
         .transform(new Function<WindowedValue<KV<String, VO>>, WindowedValue<VO>>() {
@@ -129,6 +163,7 @@ public WindowedValue<VO> apply(@Nullable WindowedValue<KV<String, VO>> input) {
     return result;
   }
 
+  /** Advance the watermark to the specified time, firing any timers that should fire. */
   public void advanceWatermark(Instant newWatermark) throws Exception {
     Preconditions.checkState(!newWatermark.isBefore(watermark));
     logInteraction("Advancing watermark to %d", newWatermark.getMillis());
@@ -136,6 +171,7 @@ public void advanceWatermark(Instant newWatermark) throws Exception {
     timerManager.advanceWatermark(triggerExecutor, newWatermark);
   }
 
+  /** Advance the processing time to the specified time, firing any timers that should fire. */
   public void advanceProcessingTime(
       Instant newProcessingTime) throws Exception {
     Preconditions.checkState(!newProcessingTime.isBefore(processingTime));
@@ -172,6 +208,20 @@ public void outputWindowedValue(KV<String, VO> output, Instant timestamp,
       outputs.add(value);
     }
 
+    public Iterable<String> getKeyedStateInUse() {
+      return FluentIterable
+          .from(tagListValues.keySet())
+          .append(tagValues.keySet())
+          .transform(new Function<CodedTupleTag<?>, String>() {
+            @Override
+            @Nullable
+            public String apply(CodedTupleTag<?> input) {
+              return input.getId();
+            }
+          })
+          .toSet();
+    }
+
     @Override
     public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp)
         throws IOException {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java
index f888025496ac7..c88996948de9f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
@@ -67,6 +68,8 @@ public void testDefaultTriggerWithFixedWindow() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 15, 10, 20),
         isSingleWindowedValue(Matchers.contains(5), 30, 30, 40)));
+    assertFalse(tester.isDone(new IntervalWindow(new Instant(30), new Instant(40))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
   }
 
   @Test
@@ -90,5 +93,8 @@ public void testDefaultTriggerWithSessionWindow() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 1, 25),
         isSingleWindowedValue(Matchers.contains(4), 30, 30, 40)));
+    assertFalse(tester.isDone(new IntervalWindow(new Instant(1), new Instant(25))));
+    assertFalse(tester.isDone(new IntervalWindow(new Instant(30), new Instant(40))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FirstOfTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FirstOfTriggerTest.java
index 0ae78a8e209fd..d14317e08b9ac 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FirstOfTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FirstOfTriggerTest.java
@@ -99,6 +99,7 @@ public void testOnElementT1Fires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
   }
 
   @Test
@@ -111,6 +112,7 @@ public void testOnElementT2Fires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
   }
 
   @Test
@@ -123,6 +125,7 @@ public void testOnElementT1Finishes() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
   }
 
   @Test
@@ -135,6 +138,7 @@ public void testOnElementT2Finishes() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
   }
 
   @Test
@@ -146,6 +150,7 @@ public void testOnElementBothFinish() throws Exception {
     injectElement(2, TriggerResult.FINISH, null);
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
     assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
   }
 
   @SuppressWarnings("unchecked")
@@ -163,6 +168,7 @@ public void testOnTimerFire() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
   }
 
   @SuppressWarnings("unchecked")
@@ -183,6 +189,7 @@ public void testOnTimerFinish() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
   }
 
   @Test
@@ -206,6 +213,8 @@ public void testOnMergeFires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
     assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+        tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(22)))));
 
     verify(mockTrigger1, Mockito.never())
         .onMerge(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RepeatedlyTest.java
index ff1cec1e5816e..2ad005d472ece 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RepeatedlyTest.java
@@ -101,6 +101,8 @@ public void testOnElementNoUntil() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
     assertFalse(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+        tester.bufferTag(firstWindow)));
   }
 
   @Test
@@ -113,6 +115,9 @@ public void testOnElementUntilFires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+        // We're storing that the root trigger has finished.
+        tester.rootFinished(firstWindow)));
   }
 
   @Test
@@ -129,6 +134,9 @@ public void testOnElementUntilFinishes() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
     assertFalse(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+        // We're storing the sub-trigger state for the root trigger.
+        tester.subFinished(firstWindow)));
   }
 
   @Test
@@ -168,6 +176,7 @@ public void testOnElementTimerFiresWithoutUntil() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(2), 2, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 3, 0, 10)));
     assertFalse(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
   }
 
   @Test
@@ -193,6 +202,8 @@ public void testOnElementTimerFiresWithUntil() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+        tester.rootFinished(firstWindow)));
   }
 
   @Test
@@ -232,6 +243,9 @@ public void testOnElementTimerFinishesUntil() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
     assertFalse(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+        // We're storing that the until sub-trigger has finished
+        tester.subFinished(firstWindow)));
   }
 
   @Test
@@ -255,6 +269,7 @@ public void testMergeWithoutUntil() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
     assertFalse(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
   }
 
   @Test
@@ -287,6 +302,9 @@ public void testMergeUntilFires() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
     // the until fired during the merge
     assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+        // We're storing that the root has finished
+        tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(22)))));
   }
 
   @Test
@@ -312,15 +330,17 @@ public void testMergeRepeatUntilFinished() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
     assertFalse(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+        // Remembering that we finished the until clause in the 1-22 window.
+        tester.subFinished(new IntervalWindow(new Instant(1), new Instant(22)))));
   }
 
   @Test
-  public void testMergeRepeatHadUntilFired() throws Exception {
+  public void testMergeRepeatUntilFired() throws Exception {
     setUp(Sessions.withGapDuration(Duration.millis(10)), true);
-    tester.logInteractions(true);
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(12, TriggerResult.FINISH, TriggerResult.FIRE);
+    injectElement(12, TriggerResult.CONTINUE, TriggerResult.FIRE);
     assertFalse(tester.isDone(new IntervalWindow(new Instant(1), new Instant(11))));
     assertTrue(tester.isDone(new IntervalWindow(new Instant(12), new Instant(22))));
 
@@ -330,8 +350,12 @@ public void testMergeRepeatHadUntilFired() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
-    assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
-    // And we should have lost the old isFinished bits
-    assertFalse(tester.isDone(new IntervalWindow(new Instant(12), new Instant(22))));
+    assertTrue(tester.isDone(new IntervalWindow(new Instant(12), new Instant(22))));
+    assertTrue(tester.isDone(new IntervalWindow(new Instant(5), new Instant(15))));
+
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.bufferTag(new IntervalWindow(new Instant(1), new Instant(11))),
+        tester.rootFinished(new IntervalWindow(new Instant(5), new Instant(15))),
+        tester.rootFinished(new IntervalWindow(new Instant(12), new Instant(22)))));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SequenceOfTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SequenceOfTriggerTest.java
index b3665486d2b11..7624f3f32b9eb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SequenceOfTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SequenceOfTriggerTest.java
@@ -111,6 +111,7 @@ public void testOnElementT1Fires() throws Exception {
     injectElement(5, null, TriggerResult.FINISH);
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
     assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
   }
 
   @Test
@@ -119,7 +120,10 @@ public void testOnElementT2Fires() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.FIRE);
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    assertTrue(!tester.isDone(firstWindow));
+    assertFalse(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+        // Buffering element 1; Ignored the trigger for T2 since we aren't there yet.
+        tester.bufferTag(firstWindow)));
   }
 
   @Test
@@ -132,6 +136,7 @@ public void testOnElementT1Finishes() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
   }
 
   @Test
@@ -143,6 +148,7 @@ public void testOnElementBothFinish() throws Exception {
     injectElement(2, null, TriggerResult.FINISH);
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
     assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
   }
 
   @SuppressWarnings("unchecked")
@@ -160,6 +166,7 @@ public void testOnTimerFire() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
     assertFalse("Should still be waiting for the second trigger.", tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
   }
 
   @SuppressWarnings("unchecked")
@@ -180,6 +187,7 @@ public void testOnTimerFinish() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
   }
 
   @Test
@@ -203,6 +211,8 @@ public void testOnMergeFires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
     assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+        tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(22)))));
 
     verify(mockTrigger1, Mockito.never())
         .onMerge(

From f9d89832badccfbb488fddb7fc80c36ce4baae72 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Sun, 5 Apr 2015 13:23:29 -0700
Subject: [PATCH 0375/1541] Add a WindowingStrategy object that encapsulates a
 WindowFn and Trigger.

Replace the WindowFn that was carried on a PCollection by a WindowingStrategy.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90376357
---
 .../google/cloud/dataflow/sdk/io/AvroIO.java  |  7 +-
 .../cloud/dataflow/sdk/io/BigQueryIO.java     |  5 +-
 .../cloud/dataflow/sdk/io/PubsubIO.java       |  5 +-
 .../cloud/dataflow/sdk/io/ReadSource.java     |  4 +-
 .../google/cloud/dataflow/sdk/io/TextIO.java  |  5 +-
 .../sdk/runners/DataflowPipelineRunner.java   |  2 +-
 .../runners/DataflowPipelineTranslator.java   | 16 ++--
 .../runners/worker/AssignWindowsParDoFn.java  | 13 ++--
 .../worker/GroupAlsoByWindowsParDoFn.java     | 36 ++++-----
 .../sdk/runners/worker/NormalParDoFn.java     |  9 ++-
 .../dataflow/sdk/transforms/Combine.java      |  5 +-
 .../cloud/dataflow/sdk/transforms/Create.java |  4 +-
 .../dataflow/sdk/transforms/DoFnTester.java   |  4 +-
 .../dataflow/sdk/transforms/Flatten.java      | 21 +++--
 .../dataflow/sdk/transforms/GroupByKey.java   | 49 +++++++-----
 .../cloud/dataflow/sdk/transforms/ParDo.java  |  6 +-
 .../cloud/dataflow/sdk/transforms/View.java   | 42 +++++-----
 .../sdk/transforms/windowing/Window.java      | 70 +++++++++++------
 .../sdk/util/BatchModeExecutionContext.java   |  4 +-
 .../dataflow/sdk/util/CompositeTrigger.java   |  2 +
 .../dataflow/sdk/util/DefaultTrigger.java     |  8 ++
 .../sdk/util/DelayAfterFirstInPane.java       |  2 +
 .../cloud/dataflow/sdk/util/DoFnInfo.java     | 15 ++--
 .../cloud/dataflow/sdk/util/DoFnRunner.java   | 16 ++--
 .../dataflow/sdk/util/FirstOfTrigger.java     |  2 +
 .../sdk/util/GroupAlsoByWindowsDoFn.java      | 26 ++++---
 .../cloud/dataflow/sdk/util/Repeatedly.java   |  4 +
 .../dataflow/sdk/util/SequenceOfTrigger.java  |  2 +
 .../cloud/dataflow/sdk/util/StateFetcher.java |  3 +-
 .../util/StreamingGroupAlsoByWindowsDoFn.java | 28 ++++---
 .../util/StreamingModeExecutionContext.java   |  6 +-
 .../util/StreamingSideInputDoFnRunner.java    | 18 ++---
 .../cloud/dataflow/sdk/util/Trigger.java      |  5 +-
 .../dataflow/sdk/util/WindowingStrategy.java  | 76 +++++++++++++++++++
 .../dataflow/sdk/values/PCollection.java      | 24 +++---
 .../dataflow/sdk/values/PCollectionTuple.java |  6 +-
 .../dataflow/sdk/values/PCollectionView.java  |  4 +-
 .../runners/DataflowPipelineRunnerTest.java   |  4 +-
 .../DataflowPipelineTranslatorTest.java       |  6 +-
 .../sdk/runners/TransformTreeTest.java        |  4 +-
 .../worker/MapTaskExecutorFactoryTest.java    |  5 +-
 .../sdk/runners/worker/NormalParDoFnTest.java | 10 +--
 .../runners/worker/ParDoFnFactoryTest.java    | 10 ++-
 .../worker/StreamingDataflowWorkerTest.java   |  7 +-
 .../dataflow/sdk/transforms/FlattenTest.java  |  4 +-
 .../sdk/transforms/GroupByKeyTest.java        |  7 +-
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  | 31 ++++----
 .../StreamingGroupAlsoByWindowsDoFnTest.java  | 28 +++----
 .../StreamingSideInputDoFnRunnerTest.java     |  4 +-
 49 files changed, 420 insertions(+), 254 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index 636a88a262648..d64945ac8cd46 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -16,19 +16,18 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
-import static com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
-
 import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
 import com.google.cloud.dataflow.sdk.runners.worker.AvroReader;
 import com.google.cloud.dataflow.sdk.runners.worker.AvroSink;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.util.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PDone;
@@ -291,7 +290,7 @@ public PCollection<T> apply(PInput input) {
         // Force the output's Coder to be what the read is using, and
         // unchangeable later, to ensure that we read the input in the
         // format specified by the Read transform.
-        return PCollection.<T>createPrimitiveOutputInternal(new GlobalWindows())
+        return PCollection.<T>createPrimitiveOutputInternal(WindowingStrategy.globalDefault())
             .setCoder(getDefaultOutputCoder());
       }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 7ca6c0d6be0c3..7f8902dffc89a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -31,11 +31,11 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.util.BigQueryTableInserter;
 import com.google.cloud.dataflow.sdk.util.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PDone;
@@ -289,7 +289,8 @@ public PCollection<TableRow> apply(PInput input) {
           throw new IllegalStateException(
               "must set the table reference of a BigQueryIO.Read transform");
         }
-        return PCollection.<TableRow>createPrimitiveOutputInternal(new GlobalWindows())
+        return PCollection.<TableRow>createPrimitiveOutputInternal(
+            WindowingStrategy.globalDefault())
             // Force the output's Coder to be what the read is using, and
             // unchangeable later, to ensure that we read the input in the
             // format specified by the Read transform.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 3bc28ad83fa8f..ef4b7e211bace 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -20,7 +20,7 @@
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
@@ -339,8 +339,7 @@ public PCollection<String> apply(PInput input) {
               "Can't set both the topic and the subscription for a "
               + "PubsubIO.Read transform");
         }
-        return PCollection.<String>createPrimitiveOutputInternal(
-            new GlobalWindows());
+        return PCollection.<String>createPrimitiveOutputInternal(WindowingStrategy.globalDefault());
       }
 
       @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java
index 90c6ce71a7b7c..383c2888c3a50 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java
@@ -20,7 +20,7 @@
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.common.base.Preconditions;
@@ -99,7 +99,7 @@ protected Coder<T> getDefaultOutputCoder() {
     public final PCollection<T> apply(PInput input) {
       Preconditions.checkNotNull(source, "source must be set");
       source.validate();
-      return PCollection.<T>createPrimitiveOutputInternal(new GlobalWindows())
+      return PCollection.<T>createPrimitiveOutputInternal(WindowingStrategy.globalDefault())
           .setCoder(getDefaultOutputCoder());
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 9d9e3b9d40602..92323cf297293 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -25,9 +25,9 @@
 import com.google.cloud.dataflow.sdk.runners.worker.TextReader;
 import com.google.cloud.dataflow.sdk.runners.worker.TextSink;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.util.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PDone;
@@ -278,7 +278,8 @@ public PCollection<T> apply(PInput input) {
         // Force the output's Coder to be what the read is using, and
         // unchangeable later, to ensure that we read the input in the
         // format specified by the Read transform.
-        return PCollection.<T>createPrimitiveOutputInternal(new GlobalWindows()).setCoder(coder);
+        return PCollection.<T>createPrimitiveOutputInternal(WindowingStrategy.globalDefault())
+            .setCoder(coder);
       }
 
       @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index acd92d348861c..8a034afb4f85d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -144,7 +144,7 @@ public <Output extends POutput, Input extends PInput> Output apply(
     if (transform instanceof Combine.GroupedValues) {
       // TODO: Redundant with translator registration?
       return (Output) PCollection.createPrimitiveOutputInternal(
-          ((PCollection<?>) input).getWindowFn());
+          ((PCollection<?>) input).getWindowingStrategy());
     } else if (transform instanceof GroupByKey) {
       // The DataflowPipelineRunner implementation of GroupByKey will sort values by timestamp,
       // so no need for an explicit sort transform.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 1b5ebe05694a5..9785f6c04171e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -69,7 +69,6 @@
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.View;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
@@ -77,6 +76,7 @@
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -628,7 +628,7 @@ public void addOutput(String name, PValue value) {
           // Wrap the PCollection element Coder inside a WindowedValueCoder.
           coder = WindowedValue.getFullCoder(
               coder,
-              ((PCollection<?>) value).getWindowFn().windowCoder());
+              ((PCollection<?>) value).getWindowingStrategy().getWindowFn().windowCoder());
         }
       } else {
         // No output coder to encode.
@@ -915,7 +915,7 @@ private <I, O> void translateMultiHelper(
               TranslationContext context) {
             context.addStep(transform, "ParallelDo");
             translateInputs(context.getInput(transform), transform.getSideInputs(), context);
-            translateFn(transform.getFn(), context.getInput(transform).getWindowFn(),
+            translateFn(transform.getFn(), context.getInput(transform).getWindowingStrategy(),
                 transform.getSideInputs(), context.getInput(transform).getCoder(), context);
             translateOutputs(context.getOutput(transform), context);
           }
@@ -936,7 +936,9 @@ private <I, O> void translateSingleHelper(
               TranslationContext context) {
             context.addStep(transform, "ParallelDo");
             translateInputs(context.getInput(transform), transform.getSideInputs(), context);
-            translateFn(transform.getFn(), context.getInput(transform).getWindowFn(),
+            translateFn(
+                transform.getFn(),
+                context.getInput(transform).getWindowingStrategy(),
                 transform.getSideInputs(), context.getInput(transform).getCoder(), context);
             context.addOutput("out", context.getOutput(transform));
           }
@@ -958,7 +960,7 @@ private <T> void translateHelper(
             context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
             context.addOutput(PropertyNames.OUTPUT, context.getOutput(transform));
 
-            byte[] serializedBytes = serializeToByteArray(transform.getWindowFn());
+            byte[] serializedBytes = serializeToByteArray(transform.getWindowingStrategy());
             String serializedJson = byteArrayToJsonString(serializedBytes);
             assert Arrays.equals(serializedBytes,
                                  jsonStringToByteArray(serializedJson));
@@ -1021,7 +1023,7 @@ private static void translateSideInputs(
 
   private static void translateFn(
       DoFn fn,
-      WindowFn windowFn,
+      WindowingStrategy windowingStrategy,
       Iterable<PCollectionView<?>> sideInputs,
       Coder inputCoder,
       TranslationContext context) {
@@ -1029,7 +1031,7 @@ private static void translateFn(
     context.addInput(
         PropertyNames.SERIALIZED_FN,
         byteArrayToJsonString(serializeToByteArray(
-            new DoFnInfo(fn, windowFn, sideInputs, inputCoder))));
+            new DoFnInfo(fn, windowingStrategy, sideInputs, inputCoder))));
     if (fn instanceof DoFn.RequiresKeyedState
         // Adjust requires keyed state property for the Dataflow Service.
         // TODO: Remove when this is performed by the service.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
index 0a4fb72de1410..0dd64eb385b4c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
@@ -21,7 +21,6 @@
 import com.google.api.services.dataflow.model.MultiOutputInfo;
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
@@ -29,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 
@@ -54,16 +54,17 @@ public static AssignWindowsParDoFn create(
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler sampler /* unused */)
       throws Exception {
-    final Object windowFn =
+    final Object windowingStrategy =
         SerializableUtils.deserializeFromByteArray(
             getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
-            "serialized window fn");
-    if (!(windowFn instanceof WindowFn)) {
+            "serialized windowing strategy");
+    if (!(windowingStrategy instanceof WindowingStrategy)) {
       throw new Exception(
-          "unexpected kind of WindowFn: " + windowFn.getClass().getName());
+          "unexpected kind of WindowingStrategy: " + windowingStrategy.getClass().getName());
     }
 
-    final AssignWindowsDoFn assignFn = new AssignWindowsDoFn((WindowFn) windowFn);
+    final AssignWindowsDoFn assignFn = new AssignWindowsDoFn(
+        ((WindowingStrategy) windowingStrategy).getWindowFn());
 
     DoFnInfoFactory fnFactory = new DoFnInfoFactory() {
         @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index 86c32b2a13b5e..810b5743d8aa0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -27,8 +27,6 @@
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
@@ -39,6 +37,7 @@
 import com.google.cloud.dataflow.sdk.util.Serializer;
 import com.google.cloud.dataflow.sdk.util.StreamingGroupAlsoByWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.common.collect.Iterables;
@@ -67,19 +66,20 @@ public static GroupAlsoByWindowsParDoFn create(
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler sampler /* unused */)
       throws Exception {
-    Object windowFnObj;
-    byte[] encodedWindowFn = getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN);
-    if (encodedWindowFn.length == 0) {
-      windowFnObj = new GlobalWindows();
+    Object windowingStrategyObj;
+    byte[] encodedWindowingStrategy = getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN);
+    if (encodedWindowingStrategy.length == 0) {
+      windowingStrategyObj = WindowingStrategy.globalDefault();
     } else {
-      windowFnObj =
-        SerializableUtils.deserializeFromByteArray(encodedWindowFn, "serialized window fn");
-      if (!(windowFnObj instanceof WindowFn)) {
+      windowingStrategyObj =
+        SerializableUtils.deserializeFromByteArray(
+            encodedWindowingStrategy, "serialized windowing strategy");
+      if (!(windowingStrategyObj instanceof WindowingStrategy)) {
         throw new Exception(
-            "unexpected kind of WindowFn: " + windowFnObj.getClass().getName());
+            "unexpected kind of WindowingStrategy: " + windowingStrategyObj.getClass().getName());
       }
     }
-    WindowFn windowFn = (WindowFn) windowFnObj;
+    WindowingStrategy windowingStrategy = (WindowingStrategy) windowingStrategyObj;
 
     byte[] serializedCombineFn = getBytes(cloudUserFn, PropertyNames.COMBINE_FN, null);
     KeyedCombineFn combineFn;
@@ -125,7 +125,7 @@ public static GroupAlsoByWindowsParDoFn create(
 
     DoFnInfoFactory fnFactory;
     final DoFn groupAlsoByWindowsDoFn = getGroupAlsoByWindowsDoFn(
-        isStreamingPipeline, windowFn, kvCoder, maybeMergingCombineFn);
+        isStreamingPipeline, windowingStrategy, kvCoder, maybeMergingCombineFn);
 
     fnFactory = new DoFnInfoFactory() {
       @Override
@@ -140,24 +140,26 @@ public DoFnInfo createDoFnInfo() {
   @SuppressWarnings({"rawtypes", "unchecked"})
   private static DoFn getGroupAlsoByWindowsDoFn(
       boolean isStreamingPipeline,
-      WindowFn windowFn,
+      WindowingStrategy windowingStrategy,
       KvCoder kvCoder,
       KeyedCombineFn maybeMergingCombineFn) {
     if (isStreamingPipeline) {
       if (maybeMergingCombineFn == null) {
         return StreamingGroupAlsoByWindowsDoFn.createForIterable(
-            windowFn, kvCoder.getValueCoder());
+            windowingStrategy, kvCoder.getValueCoder());
       } else {
         return StreamingGroupAlsoByWindowsDoFn.create(
-            windowFn, maybeMergingCombineFn, kvCoder.getKeyCoder(), kvCoder.getValueCoder());
+            windowingStrategy, maybeMergingCombineFn,
+            kvCoder.getKeyCoder(), kvCoder.getValueCoder());
       }
     } else {
       if (maybeMergingCombineFn == null) {
         return GroupAlsoByWindowsDoFn.createForIterable(
-            windowFn, kvCoder.getValueCoder());
+            windowingStrategy, kvCoder.getValueCoder());
       } else {
         return GroupAlsoByWindowsDoFn.create(
-            windowFn, maybeMergingCombineFn, kvCoder.getKeyCoder(), kvCoder.getValueCoder());
+            windowingStrategy, maybeMergingCombineFn,
+            kvCoder.getKeyCoder(), kvCoder.getValueCoder());
       }
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
index ef34bf5804362..56182607167f8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
@@ -24,7 +24,6 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
@@ -36,6 +35,7 @@
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.StreamingSideInputDoFnRunner;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.OutputReceiver;
 import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
@@ -121,7 +121,7 @@ public static NormalParDoFn create(
 
     final byte[] serializedDoFn = SerializableUtils.serializeToByteArray(
         doFnInfo.getDoFn());
-    final WindowFn windowFn = doFnInfo.getWindowFn();
+    final WindowingStrategy windowingStrategy = doFnInfo.getWindowingStrategy();
     final Coder inputCoder = doFnInfo.getInputCoder();
     DoFnInfoFactory fnFactory = new DoFnInfoFactory() {
         @Override public DoFnInfo createDoFnInfo() throws Exception {
@@ -133,7 +133,8 @@ public static NormalParDoFn create(
             throw new Exception(
                 "unexpected kind of DoFn: " + deserializedDoFn.getClass().getName());
           }
-          return new DoFnInfo((DoFn) deserializedDoFn, windowFn, sideInputViews, inputCoder);
+          return new DoFnInfo(
+              (DoFn) deserializedDoFn, windowingStrategy, sideInputViews, inputCoder);
         }
       };
     return new NormalParDoFn(options, fnFactory, sideInputValues, outputTags,
@@ -249,7 +250,7 @@ public void output(Receiver receiver, WindowedValue<?> output) {
           sideOutputTags,
           stepContext,
           addCounterMutator,
-          doFnInfo.getWindowFn());
+          doFnInfo.getWindowingStrategy());
     }
 
     fnRunner.startBundle();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index d768f51fbd1af..a304d4d6765cb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -17,8 +17,6 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
-import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.coders.CustomCoder;
@@ -28,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -1155,7 +1154,7 @@ public PCollection<VO> apply(PCollection<VI> input) {
           .apply(Values.<VO>create());
 
       if (insertDefault) {
-        if (!output.getWindowFn().isCompatible(new GlobalWindows())) {
+        if (!output.getWindowingStrategy().getWindowFn().isCompatible(new GlobalWindows())) {
           throw new IllegalStateException(
               "Attempted to add default value to PCollection not windowed by GlobalWindows. "
               + "Instead, use Combine.globally().withoutDefaults() or "
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index bffafd17c23dc..5f8073b9e2ce9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -26,8 +26,8 @@
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PBegin;
@@ -213,7 +213,7 @@ public void processElement(DoFn<String, KV<Void, Void>>.ProcessContext c)
           }))
           .apply(ParDo.of(new OutputOnceDoFn<>(elems, elemCoder)));
     } else {
-      return PCollection.<T>createPrimitiveOutputInternal(new GlobalWindows());
+      return PCollection.<T>createPrimitiveOutputInternal(WindowingStrategy.globalDefault());
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index 3940d6c6be9a5..76d8ad1e085ce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -18,12 +18,12 @@
 
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -355,6 +355,6 @@ void initializeState() {
         sideOutputTags,
         (new BatchModeExecutionContext()).createStepContext("stepName"),
         counterSet.getAddCounterMutator(),
-        new GlobalWindows());
+        WindowingStrategy.globalDefault());
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index dc35544f733dc..49ce0d1335e07 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -19,8 +19,8 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
 
@@ -113,21 +113,28 @@ private FlattenPCollectionList() { }
 
     @Override
     public PCollection<T> apply(PCollectionList<T> inputs) {
-      WindowFn<?, ?> windowFn;
+      WindowingStrategy<?, ?> windowingStrategy;
       if (!inputs.getAll().isEmpty()) {
-        windowFn = inputs.get(0).getWindowFn();
+        windowingStrategy = inputs.get(0).getWindowingStrategy();
         for (PCollection<?> input : inputs.getAll()) {
-          if (!windowFn.isCompatible(input.getWindowFn())) {
+          WindowingStrategy<?, ?> other = input.getWindowingStrategy();
+          if (!windowingStrategy.getWindowFn().isCompatible(other.getWindowFn())) {
             throw new IllegalStateException(
                 "Inputs to Flatten had incompatible window windowFns: "
-                + windowFn + ", " + input.getWindowFn());
+                + windowingStrategy.getWindowFn() + ", " + other.getWindowFn());
+          }
+
+          if (!windowingStrategy.getTrigger().equals(other.getTrigger())) {
+            throw new IllegalStateException(
+                "Inputs to Flatten had incompatible triggers: "
+                + windowingStrategy.getTrigger() + ", " + other.getTrigger());
           }
         }
       } else {
-        windowFn = new GlobalWindows();
+        windowingStrategy = WindowingStrategy.globalDefault();
       }
 
-      return PCollection.<T>createPrimitiveOutputInternal(windowFn);
+      return PCollection.<T>createPrimitiveOutputInternal(windowingStrategy);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index a05deb955772c..81abe2aa1ff79 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -29,11 +29,13 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.ReifyTimestampAndWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
@@ -186,7 +188,7 @@ public PCollection<KV<K, WindowedValue<V>>> apply(
       Coder<K> keyCoder = inputKvCoder.getKeyCoder();
       Coder<V> inputValueCoder = inputKvCoder.getValueCoder();
       Coder<WindowedValue<V>> outputValueCoder = FullWindowedValueCoder.of(
-          inputValueCoder, input.getWindowFn().windowCoder());
+          inputValueCoder, input.getWindowingStrategy().getWindowFn().windowCoder());
       Coder<KV<K, WindowedValue<V>>> outputKvCoder =
           KvCoder.of(keyCoder, outputValueCoder);
       return input.apply(ParDo.of(new ReifyTimestampAndWindowsDoFn<K, V>()))
@@ -245,10 +247,10 @@ public int compare(WindowedValue<V> e1, WindowedValue<V> e2) {
   public static class GroupAlsoByWindow<K, V>
       extends PTransform<PCollection<KV<K, Iterable<WindowedValue<V>>>>,
                          PCollection<KV<K, Iterable<V>>>> {
-    private final WindowFn<?, ?> windowFn;
+    private final WindowingStrategy<?, ?> windowingStrategy;
 
-    public GroupAlsoByWindow(WindowFn<?, ?> windowFn) {
-      this.windowFn = windowFn;
+    public GroupAlsoByWindow(WindowingStrategy<?, ?> windowingStrategy) {
+      this.windowingStrategy = windowingStrategy;
     }
 
     @Override
@@ -275,7 +277,8 @@ public PCollection<KV<K, Iterable<V>>> apply(
           KvCoder.of(keyCoder, outputValueCoder);
 
       GroupAlsoByWindowsDoFn<K, V, Iterable<V>, ?> fn =
-          GroupAlsoByWindowsDoFn.createForIterable(windowFn, inputIterableElementValueCoder);
+          GroupAlsoByWindowsDoFn.createForIterable(
+              windowingStrategy, inputIterableElementValueCoder);
 
       return input.apply(ParDo.of(fn)).setCoder(outputKvCoder);
     }
@@ -313,16 +316,18 @@ public void validate(PCollection<KV<K, V>> input) {
     @SuppressWarnings({"rawtypes", "unchecked"})
     @Override
     public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
-      WindowFn windowFn = input.getWindowFn();
-      if (!(windowFn instanceof NonMergingWindowFn)) {
+      WindowingStrategy<?, ?> oldWindowingStrategy = input.getWindowingStrategy();
+      WindowFn<?, ?> newWindowFn = oldWindowingStrategy.getWindowFn();
+      if (!(newWindowFn instanceof NonMergingWindowFn)) {
         // Prevent merging windows again, without explicit user
         // involvement, e.g., by Window.into() or Window.remerge().
-        windowFn = new InvalidWindows(
-            "WindowFn has already been consumed by previous GroupByKey",
-            windowFn);
+        newWindowFn = new InvalidWindows(
+            "WindowFn has already been consumed by previous GroupByKey", newWindowFn);
       }
-      return PCollection.<KV<K, Iterable<V>>>createPrimitiveOutputInternal(
-          windowFn);
+
+      // We also return to the default trigger.
+      WindowingStrategy<?, ?> newWindowingStrategy = WindowingStrategy.of(newWindowFn);
+      return PCollection.<KV<K, Iterable<V>>>createPrimitiveOutputInternal(newWindowingStrategy);
     }
 
     /**
@@ -465,17 +470,21 @@ public PCollection<KV<K, Iterable<V>>> applyHelper(
     // merging windows as needed, using the windows assigned to the
     // key/value input elements and the window merge operation of the
     // window function associated with the input PCollection.
-    WindowFn<?, ?> windowFn = input.getWindowFn();
-    if (windowFn instanceof InvalidWindows) {
-      String cause = ((InvalidWindows<?>) windowFn).getCause();
+    WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
+    if (windowingStrategy.getWindowFn() instanceof InvalidWindows) {
+      String cause = ((InvalidWindows<?>) windowingStrategy.getWindowFn()).getCause();
       throw new IllegalStateException(
           "GroupByKey must have a valid Window merge function.  "
           + "Invalid because: " + cause);
     }
-    boolean disallowCombinerLifting = !(windowFn instanceof NonMergingWindowFn)
-        || (isStreaming && !fewKeys);
-
-    if (windowFn.isCompatible(new GlobalWindows())) {
+    boolean disallowCombinerLifting =
+        !(windowingStrategy.getWindowFn() instanceof NonMergingWindowFn)
+        || (isStreaming && !fewKeys)
+        // TODO: Allow combiner lifting on the non-default trigger, as appropriate.
+        || !(windowingStrategy.getTrigger() instanceof DefaultTrigger);
+
+    if (windowingStrategy.getWindowFn().isCompatible(new GlobalWindows())
+        && windowingStrategy.getTrigger() instanceof DefaultTrigger) {
       // The input PCollection is using the degenerate default
       // window function, which uses a single global window for all
       // elements.  We can implement this using a more-primitive
@@ -507,7 +516,7 @@ public PCollection<KV<K, Iterable<V>>> applyHelper(
 
       return gbkOutput
           // Group each key's values by window, merging windows as needed.
-          .apply(new GroupAlsoByWindow<K, V>(windowFn));
+          .apply(new GroupAlsoByWindow<K, V>(windowingStrategy));
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 8c11ab37e93f3..4206fc3171987 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -689,7 +689,7 @@ public PCollection<O> apply(PCollection<? extends I> input) {
       if (sideInputs == null) {
         sideInputs = Collections.emptyList();
       }
-      return PCollection.<O>createPrimitiveOutputInternal(input.getWindowFn())
+      return PCollection.<O>createPrimitiveOutputInternal(input.getWindowingStrategy())
           .setTypeTokenInternal(fn.getOutputTypeToken());
     }
 
@@ -882,7 +882,7 @@ public BoundMulti<I, O> withSideInputs(
     public PCollectionTuple apply(PCollection<? extends I> input) {
       PCollectionTuple outputs = PCollectionTuple.ofPrimitiveOutputsInternal(
           TupleTagList.of(mainOutputTag).and(sideOutputTags.getAll()),
-          input.getWindowFn());
+          input.getWindowingStrategy());
 
       // The fn will likely be an instance of an anonymous subclass
       // such as DoFn<Integer, String> { }, thus will have a high-fidelity
@@ -1022,7 +1022,7 @@ private static <I, O> DoFnRunner<I, O, List> evaluateHelper(
             sideOutputTags,
             executionContext.getStepContext(name),
             context.getAddCounterMutator(),
-            input.getWindowFn());
+            input.getWindowingStrategy());
 
     fnRunner.startBundle();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index 60195a6e7205d..fbe9fe6e31976 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -25,10 +25,10 @@
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.StreamingPCollectionViewWriterFn;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -116,7 +116,7 @@ public PCollectionView<Iterable<T>> apply(
         return input.apply(
             new CreatePCollectionView<T, Iterable<T>>(
                 new IterablePCollectionView<T>(
-                    input.getPipeline(), input.getWindowFn(), input.getCoder())));
+                    input.getPipeline(), input.getWindowingStrategy(), input.getCoder())));
       }
     }
   }
@@ -153,7 +153,8 @@ public AsSingleton<T> withDefaultValue(T defaultValue) {
     @Override
     public PCollectionView<T> apply(PCollection<T> input) {
       SingletonPCollectionView<T> view = new SingletonPCollectionView<T>(
-          input.getPipeline(), input.getWindowFn(), hasDefault, defaultValue, input.getCoder());
+          input.getPipeline(), input.getWindowingStrategy(), hasDefault, defaultValue,
+          input.getCoder());
 
       CreatePCollectionView<T, T> createView = new CreatePCollectionView<>(view);
 
@@ -207,7 +208,7 @@ public <VO> AsSingletonMap<K, V, VO> withCombiner(CombineFn<V, ?, VO> combineFn)
     @Override
     public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
       MultimapPCollectionView<K, V> view = new MultimapPCollectionView<K, V>(
-          input.getPipeline(), input.getWindowFn(), input.getCoder());
+          input.getPipeline(), input.getWindowingStrategy(), input.getCoder());
 
       CreatePCollectionView<KV<K, V>, Map<K, Iterable<V>>> createView =
           new CreatePCollectionView<>(view);
@@ -249,7 +250,7 @@ public PCollectionView<Map<K, VO>> apply(PCollection<KV<K, VI>> input) {
         : input.apply(Combine.perKey(combineFn.<K>asKeyedFn()));
 
       MapPCollectionView<K, VO> view = new MapPCollectionView<K, VO>(
-          input.getPipeline(), combined.getWindowFn(), combined.getCoder());
+          input.getPipeline(), combined.getWindowingStrategy(), combined.getCoder());
 
       CreatePCollectionView<KV<K, VO>, Map<K, VO>> createView = new CreatePCollectionView<>(view);
 
@@ -360,9 +361,9 @@ private static class SingletonPCollectionView<T>
     private Coder<T> valueCoder;
 
     public SingletonPCollectionView(
-        Pipeline pipeline, WindowFn<?, ?> windowFn,
+        Pipeline pipeline, WindowingStrategy<?, ?> windowingStrategy,
         boolean hasDefault, T defaultValue, Coder<T> valueCoder) {
-      super(windowFn, valueCoder);
+      super(windowingStrategy, valueCoder);
       setPipelineInternal(pipeline);
       this.defaultValue = defaultValue;
       this.valueCoder = valueCoder;
@@ -407,8 +408,8 @@ private static class IterablePCollectionView<T>
     private static final long serialVersionUID = 0;
 
     public IterablePCollectionView(
-        Pipeline pipeline, WindowFn<?, ?> windowFn, Coder<T> valueCoder) {
-      super(windowFn, valueCoder);
+        Pipeline pipeline, WindowingStrategy<?, ?> windowingStrategy, Coder<T> valueCoder) {
+      super(windowingStrategy, valueCoder);
       setPipelineInternal(pipeline);
     }
 
@@ -429,8 +430,8 @@ private static class MultimapPCollectionView<K, V>
     private static final long serialVersionUID = 0;
 
     public MultimapPCollectionView(
-        Pipeline pipeline, WindowFn<?, ?> windowFn, Coder<KV<K, V>> valueCoder) {
-      super(windowFn, valueCoder);
+        Pipeline pipeline, WindowingStrategy<?, ?> windowingStrategy, Coder<KV<K, V>> valueCoder) {
+      super(windowingStrategy, valueCoder);
       setPipelineInternal(pipeline);
     }
 
@@ -452,8 +453,8 @@ private static class MapPCollectionView<K, V>
     private static final long serialVersionUID = 0;
 
     public MapPCollectionView(
-        Pipeline pipeline, WindowFn<?, ?> windowFn, Coder<KV<K, V>> valueCoder) {
-      super(windowFn, valueCoder);
+        Pipeline pipeline, WindowingStrategy<?, ?> windowingStrategy, Coder<KV<K, V>> valueCoder) {
+      super(windowingStrategy, valueCoder);
       setPipelineInternal(pipeline);
     }
 
@@ -476,13 +477,14 @@ private abstract static class PCollectionViewBase<T>
       implements PCollectionView<T> {
     private static final long serialVersionUID = 0;
 
-    PCollectionViewBase(WindowFn<?, ?> windowFn, Coder<?> valueCoder) {
-      if (windowFn instanceof InvalidWindows) {
+    PCollectionViewBase(WindowingStrategy<?, ?> windowingStrategy, Coder<?> valueCoder) {
+      if (windowingStrategy.getWindowFn() instanceof InvalidWindows) {
         throw new IllegalArgumentException("WindowFn of PCollectionView cannot be InvalidWindows");
       }
-      this.windowFn = windowFn;
+      this.windowingStrategy = windowingStrategy;
       this.coder = (Coder)
-          IterableCoder.of(WindowedValue.getFullCoder(valueCoder, windowFn.windowCoder()));
+          IterableCoder.of(WindowedValue.getFullCoder(
+              valueCoder, windowingStrategy.getWindowFn().windowCoder()));
     }
 
     @Override
@@ -491,8 +493,8 @@ public TupleTag<Iterable<WindowedValue<?>>> getTagInternal() {
     }
 
     @Override
-    public WindowFn getWindowFnInternal() {
-      return windowFn;
+    public WindowingStrategy<?, ?> getWindowingStrategyInternal() {
+      return windowingStrategy;
     }
 
     @Override
@@ -501,7 +503,7 @@ public Coder<Iterable<WindowedValue<?>>> getCoderInternal() {
     }
 
     private TupleTag<Iterable<WindowedValue<?>>> tag = new TupleTag<>();
-    private WindowFn<?, ?> windowFn;
+    private WindowingStrategy<?, ?> windowingStrategy;
     private Coder<Iterable<WindowedValue<?>>> coder;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index f17be52500d9f..09844280abdd1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -22,10 +22,13 @@
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
+import com.google.cloud.dataflow.sdk.util.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
+import com.google.cloud.dataflow.sdk.util.Trigger;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
@@ -150,10 +153,20 @@ public Unbound named(String name) {
      * but more properties can still be specified.
      */
     public <T> Bound<T> into(WindowFn<? super T, ?> fn) {
-      return new Bound<>(name, fn);
+      return new Bound<>(name,
+          (WindowingStrategy<? super T, ?>) createWindowingStrategy(fn, new DefaultTrigger<>()));
     }
   }
 
+  private static <T, W extends BoundedWindow> WindowingStrategy<? super T, ?>
+    createWindowingStrategy(WindowFn<? super T, ?> fn, Trigger<?> trigger) {
+    @SuppressWarnings("unchecked")
+    WindowFn<? super T, W> typedFn = (WindowFn<? super T, W>) fn;
+    @SuppressWarnings("unchecked")
+    Trigger<W> typedTrigger = (Trigger<W>) trigger;
+    return WindowingStrategy.of(typedFn, typedTrigger);
+  }
+
   /**
    * A {@code PTransform} that windows the elements of a {@code PCollection<T>},
    * into finite windows according to a user-specified {@code WindowFn<T, B>}.
@@ -162,11 +175,12 @@ public <T> Bound<T> into(WindowFn<? super T, ?> fn) {
    */
   @SuppressWarnings("serial")
   public static class Bound<T> extends PTransform<PCollection<T>, PCollection<T>> {
-    WindowFn<? super T, ?> fn;
 
-    Bound(String name, WindowFn<? super T, ?> fn) {
+    WindowingStrategy<? super T, ?> windowingStrategy;
+
+    Bound(String name, WindowingStrategy<? super T, ?> windowingStrategy) {
       this.name = name;
-      this.fn = fn;
+      this.windowingStrategy = windowingStrategy;
     }
 
     /**
@@ -179,19 +193,12 @@ public static class Bound<T> extends PTransform<PCollection<T>, PCollection<T>>
      * explanation.
      */
     public Bound<T> named(String name) {
-      return new Bound<>(name, fn);
-    }
-
-    /**
-     * Returns the user-specified {@code WindowFn}.
-     */
-    public WindowFn<? super T, ?> getWindowFn() {
-      return fn;
+      return new Bound<>(name, windowingStrategy);
     }
 
     @Override
     public PCollection<T> apply(PCollection<T> input) {
-      return PCollection.<T>createPrimitiveOutputInternal(fn);
+      return PCollection.<T>createPrimitiveOutputInternal(windowingStrategy);
     }
 
     @Override
@@ -199,9 +206,18 @@ protected Coder<?> getDefaultOutputCoder(PCollection<T> input) {
       return input.getCoder();
     }
 
+    public WindowingStrategy<? super T, ?> getWindowingStrategy() {
+      return windowingStrategy;
+    }
+
     @Override
     protected String getKindString() {
-      return "Window.Into(" + StringUtils.approximateSimpleName(fn.getClass()) + ")";
+      return "Window.Into("
+          + StringUtils.approximateSimpleName(windowingStrategy.getWindowFn().getClass())
+          + ", "
+          // TODO: Add support for describing triggers.
+          + StringUtils.approximateSimpleName(windowingStrategy.getTrigger().getClass())
+          + ")";
     }
   }
 
@@ -226,20 +242,28 @@ public static <T> Remerge<T> remerge() {
   public static class Remerge<T> extends PTransform<PCollection<T>, PCollection<T>> {
     @Override
     public PCollection<T> apply(PCollection<T> input) {
-      WindowFn<?, ?> windowFn = input.getWindowFn();
-      WindowFn<?, ?> outputWindowFn =
-          (windowFn instanceof InvalidWindows)
-          ? ((InvalidWindows<?>) windowFn).getOriginalWindowFn()
-          : windowFn;
+      WindowingStrategy<?, ?> outputWindowingStrategy = getOutputWindowing(
+          input.getWindowingStrategy());
 
       return input.apply(ParDo.named("Identity").of(new DoFn<T, T>() {
                 @Override public void processElement(ProcessContext c) {
                   c.output(c.element());
                 }
-              })).setWindowFnInternal(outputWindowFn);
+              })).setWindowingStrategyInternal(outputWindowingStrategy);
     }
-  }
 
+    private <W extends BoundedWindow> WindowingStrategy<?, W> getOutputWindowing(
+        WindowingStrategy<?, W> inputStrategy) {
+      if (inputStrategy.getWindowFn() instanceof InvalidWindows) {
+        @SuppressWarnings("unchecked")
+        InvalidWindows<W> invalidWindows = (InvalidWindows<W>) inputStrategy.getWindowFn();
+        return WindowingStrategy.of(
+            invalidWindows.getOriginalWindowFn(), inputStrategy.getTrigger());
+      } else {
+        return inputStrategy;
+      }
+    }
+  }
 
   /////////////////////////////////////////////////////////////////////////////
 
@@ -264,7 +288,7 @@ private static <T> void evaluateHelper(
     DirectModeExecutionContext executionContext = new DirectModeExecutionContext();
 
     TupleTag<T> outputTag = new TupleTag<>();
-    DoFn<T, T> addWindowsDoFn = new AssignWindowsDoFn<>(transform.fn);
+    DoFn<T, T> addWindowsDoFn = new AssignWindowsDoFn<>(transform.windowingStrategy.getWindowFn());
     DoFnRunner<T, T, List> addWindowsRunner =
         DoFnRunner.createWithListOutputs(
             context.getPipelineOptions(),
@@ -274,7 +298,7 @@ private static <T> void evaluateHelper(
             new ArrayList<TupleTag<?>>(),
             executionContext.getStepContext(context.getStepName(transform)),
             context.getAddCounterMutator(),
-            transform.fn);
+            transform.windowingStrategy);
 
     addWindowsRunner.startBundle();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
index 9eaa30c18c403..8e804cf571dc7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
@@ -101,14 +101,14 @@ public <T> T getSideInput(
     }
 
     final BoundedWindow sideInputWindow =
-        view.getWindowFnInternal().getSideInputWindow(mainInputWindow);
+        view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(mainInputWindow);
 
     // tagCache stores values in a type-safe way based on the TupleTag.
     T result = (T) tagCache.get(sideInputWindow);
 
     // TODO: Consider partial prefetching like in CoGBK to reduce iteration cost.
     if (result == null) {
-      if (view.getWindowFnInternal() instanceof GlobalWindows) {
+      if (view.getWindowingStrategyInternal().getWindowFn() instanceof GlobalWindows) {
         result = view.fromIterableInternal(sideInputs.get(tag));
       } else {
         result = view.fromIterableInternal(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CompositeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CompositeTrigger.java
index 8b126bf2604fa..603fc105b7f40 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CompositeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CompositeTrigger.java
@@ -38,6 +38,8 @@
  */
 public abstract class CompositeTrigger<W extends BoundedWindow> extends Trigger<W> {
 
+  private static final long serialVersionUID = 0L;
+
   private static final CodedTupleTag<BitSet> SUBTRIGGERS_FINISHED_SET_TAG =
       CodedTupleTag.of("finished", new BitSetCoder());
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
index c16a1fa648f2d..e48a5ac636001 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
@@ -25,6 +25,8 @@
  */
 public class DefaultTrigger<W extends BoundedWindow> extends Trigger<W>{
 
+  private static final long serialVersionUID = 0L;
+
   @Override
   public TriggerResult onElement(
       TriggerContext<W> c, Object value, W window, WindowStatus status) throws Exception {
@@ -57,4 +59,10 @@ public void clear(TriggerContext<W> c, W window) throws Exception {
   public boolean willNeverFinish() {
     return true;
   }
+
+  @Override
+  public boolean equals(Object other) {
+    // Semantically, all default triggers are identical
+    return other instanceof DefaultTrigger;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java
index 59c178709d940..6103f6f6cdf76 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java
@@ -32,6 +32,8 @@
  */
 public class DelayAfterFirstInPane<W extends BoundedWindow> extends Trigger<W> {
 
+  private static final long serialVersionUID = 0L;
+
   private static final CodedTupleTag<Instant> DELAYED_UNTIL_TAG =
       CodedTupleTag.of("delayed-until", InstantCoder.of());
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
index 6d7746267f1f6..d290f28d56cc6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
@@ -18,7 +18,6 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 
 import java.io.Serializable;
@@ -32,21 +31,21 @@
 public class DoFnInfo<I, O> implements Serializable {
   private static final long serialVersionUID = 0;
   private final DoFn<I, O> doFn;
-  private final WindowFn<?, ?> windowFn;
+  private final WindowingStrategy<?, ?> windowingStrategy;
   private final Iterable<PCollectionView<?>> sideInputViews;
   private final Coder<I> inputCoder;
 
-  public DoFnInfo(DoFn<I, O> doFn, WindowFn<?, ?> windowFn) {
+  public DoFnInfo(DoFn<I, O> doFn, WindowingStrategy<?, ?> windowingStrategy) {
     this.doFn = doFn;
-    this.windowFn = windowFn;
+    this.windowingStrategy = windowingStrategy;
     this.sideInputViews = null;
     this.inputCoder = null;
   }
 
-  public DoFnInfo(DoFn<I, O> doFn, WindowFn<?, ?> windowFn,
+  public DoFnInfo(DoFn<I, O> doFn, WindowingStrategy<?, ?> windowingStrategy,
                   Iterable<PCollectionView<?>> sideInputViews, Coder<I> inputCoder) {
     this.doFn = doFn;
-    this.windowFn = windowFn;
+    this.windowingStrategy = windowingStrategy;
     this.sideInputViews = sideInputViews;
     this.inputCoder = inputCoder;
   }
@@ -55,8 +54,8 @@ public DoFn<I, O> getDoFn() {
     return doFn;
   }
 
-  public WindowFn<?, ?> getWindowFn() {
-    return windowFn;
+  public WindowingStrategy<?, ?> getWindowingStrategy() {
+    return windowingStrategy;
   }
 
   public Iterable<PCollectionView<?>> getSideInputViews() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 02c165783de87..899c604429263 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -86,11 +86,11 @@ public interface OutputManager<R> {
       List<TupleTag<?>> sideOutputTags,
       StepContext stepContext,
       CounterSet.AddCounterMutator addCounterMutator,
-      WindowFn windowFn) {
+      WindowingStrategy<?, ?> windowingStrategy) {
     this.fn = fn;
-    this.context = new DoFnContext<>(options, fn, sideInputs, outputManager,
-                                     mainOutputTag, sideOutputTags, stepContext,
-                                     addCounterMutator, windowFn);
+    this.context = new DoFnContext<>(
+        options, fn, sideInputs, outputManager, mainOutputTag, sideOutputTags, stepContext,
+        addCounterMutator, windowingStrategy == null ? null : windowingStrategy.getWindowFn());
   }
 
   public static <I, O, R> DoFnRunner<I, O, R> create(
@@ -102,10 +102,10 @@ public static <I, O, R> DoFnRunner<I, O, R> create(
       List<TupleTag<?>> sideOutputTags,
       StepContext stepContext,
       CounterSet.AddCounterMutator addCounterMutator,
-      WindowFn windowFn) {
+      WindowingStrategy<?, ?> windowingStrategy) {
     return new DoFnRunner<>(
         options, fn, sideInputs, outputManager,
-        mainOutputTag, sideOutputTags, stepContext, addCounterMutator, windowFn);
+        mainOutputTag, sideOutputTags, stepContext, addCounterMutator, windowingStrategy);
   }
 
   @SuppressWarnings({"rawtypes", "unchecked"})
@@ -117,7 +117,7 @@ public static <I, O> DoFnRunner<I, O, List> createWithListOutputs(
       List<TupleTag<?>> sideOutputTags,
       StepContext stepContext,
       CounterSet.AddCounterMutator addCounterMutator,
-      WindowFn windowFn) {
+      WindowingStrategy<?, ?> windowingStrategy) {
     return create(
         options, fn, sideInputs,
         new OutputManager<List>() {
@@ -130,7 +130,7 @@ public void output(List list, WindowedValue<?> output) {
             list.add(output);
           }
         },
-        mainOutputTag, sideOutputTags, stepContext, addCounterMutator, windowFn);
+        mainOutputTag, sideOutputTags, stepContext, addCounterMutator, windowingStrategy);
   }
 
   /** Calls {@link DoFn#startBundle}. */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FirstOfTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FirstOfTrigger.java
index 6034bb00794f9..557b8ba3c3de4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FirstOfTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FirstOfTrigger.java
@@ -28,6 +28,8 @@
  */
 public class FirstOfTrigger<W extends BoundedWindow> extends CompositeTrigger<W> {
 
+  private static final long serialVersionUID = 0L;
+
   public FirstOfTrigger(List<Trigger<W>> subTriggers) {
     super(subTriggers);
     Preconditions.checkArgument(subTriggers.size() > 1);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 85f5bc254b88d..5c59f30483c54 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -46,15 +46,17 @@ public abstract class GroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
    * Create a {@link GroupAlsoByWindowsDoFn} without a combine function. Depending on the
    * {@code windowFn} this will either use iterators or window sets to implement the grouping.
    *
-   * @param windowFn The window function to use for grouping
+   * @param windowingStrategy The window function and trigger to use for grouping
    * @param inputCoder the input coder to use
    */
   public static <K, V, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W>
-  createForIterable(WindowFn<?, W> windowFn, Coder<V> inputCoder) {
-    if (windowFn instanceof NonMergingWindowFn) {
+  createForIterable(WindowingStrategy<?, W> windowingStrategy, Coder<V> inputCoder) {
+    if (windowingStrategy.getWindowFn() instanceof NonMergingWindowFn
+        && windowingStrategy.getTrigger() instanceof DefaultTrigger) {
       return new GroupAlsoByWindowsViaIteratorsDoFn<K, V, W>();
     } else {
-      return new GABWViaWindowSetDoFn<>(windowFn, BufferingWindowSet.<K, V, W>factory(inputCoder));
+      return new GABWViaWindowSetDoFn<>(
+          windowingStrategy, BufferingWindowSet.<K, V, W>factory(inputCoder));
     }
   }
 
@@ -63,13 +65,14 @@ public abstract class GroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
    */
   public static <K, VI, VA, VO, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, VI, VO, W>
   create(
-      final WindowFn<?, W> windowFn,
+      final WindowingStrategy<?, W> windowingStrategy,
       final KeyedCombineFn<K, VI, VA, VO> combineFn,
       final Coder<K> keyCoder,
       final Coder<VI> inputCoder) {
     Preconditions.checkNotNull(combineFn);
     return new GABWViaWindowSetDoFn<>(
-        windowFn, CombiningWindowSet.<K, VI, VA, VO, W>factory(combineFn, keyCoder, inputCoder));
+        windowingStrategy, CombiningWindowSet.<K, VI, VA, VO, W>factory(
+            combineFn, keyCoder, inputCoder));
   }
 
   private static class GABWViaWindowSetDoFn<K, VI, VO, W extends BoundedWindow>
@@ -77,12 +80,14 @@ private static class GABWViaWindowSetDoFn<K, VI, VO, W extends BoundedWindow>
 
     private WindowFn<Object, W> windowFn;
     private AbstractWindowSet.Factory<K, VI, VO, W> windowSetFactory;
+    private Trigger<W> trigger;
 
-    public GABWViaWindowSetDoFn(WindowFn<?, W> windowFn,
+    public GABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
         AbstractWindowSet.Factory<K, VI, VO, W> factory) {
       @SuppressWarnings("unchecked")
-      WindowFn<Object, W> noWildcard = (WindowFn<Object, W>) windowFn;
-      this.windowFn = noWildcard;
+      WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
+      this.windowFn = noWildcard.getWindowFn();
+      this.trigger = noWildcard.getTrigger();
       this.windowSetFactory = factory;
     }
 
@@ -95,8 +100,7 @@ public void processElement(
 
       BatchTimerManager timerManager = new BatchTimerManager(Instant.now());
       TriggerExecutor<K, VI, VO, W> triggerExecutor = new TriggerExecutor<>(
-          windowFn, timerManager,
-          new DefaultTrigger<W>(),
+          windowFn, timerManager, trigger,
           c.keyedState(), c.windowingInternals(), windowSet);
 
       for (WindowedValue<VI> e : c.element().getValue()) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Repeatedly.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Repeatedly.java
index 5d24f4b95ef31..65ab5b859913c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Repeatedly.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Repeatedly.java
@@ -36,6 +36,8 @@
  */
 public class Repeatedly<W extends BoundedWindow> extends Trigger<W> {
 
+  private static final long serialVersionUID = 0L;
+
   private Trigger<W> repeated;
 
   /**
@@ -103,6 +105,8 @@ public boolean willNeverFinish() {
    */
   public static class RepeatedlyUntil<W extends BoundedWindow> extends CompositeTrigger<W> {
 
+    private static final long serialVersionUID = 0L;
+
     private RepeatedlyUntil(Trigger<W> repeat, Trigger<W> until) {
       super(Arrays.asList(repeat, until));
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SequenceOfTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SequenceOfTrigger.java
index 78533cb5949e0..7a18a47f60f33 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SequenceOfTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SequenceOfTrigger.java
@@ -30,6 +30,8 @@
  */
 public class SequenceOfTrigger<W extends BoundedWindow> extends CompositeTrigger<W> {
 
+  private static final long serialVersionUID = 0L;
+
   public SequenceOfTrigger(List<Trigger<W>> subTriggers) {
     super(subTriggers);
     Preconditions.checkArgument(subTriggers.size() > 1);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
index 8585446e58371..534d785ddfebc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
@@ -200,7 +200,8 @@ public <T> T fetchSideInput(
     Callable<SideInputCacheEntry> fetchCallable = new Callable<SideInputCacheEntry>() {
       @Override
       public SideInputCacheEntry call() throws Exception {
-        Coder<BoundedWindow> windowCoder = view.getWindowFnInternal().windowCoder();
+        Coder<BoundedWindow> windowCoder =
+            view.getWindowingStrategyInternal().getWindowFn().windowCoder();
 
         ByteString.Output windowStream = ByteString.newOutput();
         windowCoder.encode(window, windowStream, Coder.Context.OUTER);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 0b11897b59632..89139cd0026b6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -43,23 +43,27 @@ public abstract class StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W extends Bound
 
   public static <K, VI, VA, VO, W extends BoundedWindow>
       StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W> create(
-          final WindowFn<?, W> windowFn,
+          final WindowingStrategy<?, W> windowingStrategy,
           final KeyedCombineFn<K, VI, VA, VO> combineFn,
           final Coder<K> keyCoder,
           final Coder<VI> inputValueCoder) {
     Preconditions.checkNotNull(combineFn);
-    return new StreamingGABWViaWindowSetDoFn<>(windowFn,
+    return new StreamingGABWViaWindowSetDoFn<>(windowingStrategy,
         CombiningWindowSet.<K, VI, VA, VO, W>factory(combineFn, keyCoder, inputValueCoder));
   }
 
   public static <K, VI, W extends BoundedWindow>
-  StreamingGroupAlsoByWindowsDoFn<K, VI, Iterable<VI>, W>
-  createForIterable(final WindowFn<?, W> windowFn, final Coder<VI> inputValueCoder) {
-    if (windowFn instanceof PartitioningWindowFn) {
-      return new StreamingGABWViaWindowSetDoFn<>(windowFn,
+  StreamingGroupAlsoByWindowsDoFn<K, VI, Iterable<VI>, W> createForIterable(
+      final WindowingStrategy<?, W> windowingStrategy,
+      final Coder<VI> inputValueCoder) {
+    if (windowingStrategy.getWindowFn() instanceof PartitioningWindowFn
+        // TODO: Characterize the other kinds of triggers that work with the
+        // PartitioningBufferingWindowSet
+        && windowingStrategy.getTrigger() instanceof DefaultTrigger) {
+      return new StreamingGABWViaWindowSetDoFn<>(windowingStrategy,
           PartitionBufferingWindowSet.<K, VI, W>factory(inputValueCoder));
     } else {
-      return new StreamingGABWViaWindowSetDoFn<>(windowFn,
+      return new StreamingGABWViaWindowSetDoFn<>(windowingStrategy,
           BufferingWindowSet.<K, VI, W>factory(inputValueCoder));
     }
   }
@@ -68,13 +72,15 @@ private static class StreamingGABWViaWindowSetDoFn<K, VI, VO, W extends BoundedW
   extends StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W> {
     private final WindowFn<Object, W> windowFn;
     private Factory<K, VI, VO, W> windowSetFactory;
+    private Trigger<W> trigger;
 
-    public StreamingGABWViaWindowSetDoFn(WindowFn<?, W> windowFn,
+    public StreamingGABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
         AbstractWindowSet.Factory<K, VI, VO, W> windowSetFactory) {
       this.windowSetFactory = windowSetFactory;
       @SuppressWarnings("unchecked")
-      WindowFn<Object, W> noWildcard = (WindowFn<Object, W>) windowFn;
-      this.windowFn = noWildcard;
+      WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
+      this.windowFn = noWildcard.getWindowFn();
+      this.trigger = noWildcard.getTrigger();
     }
 
     @Override
@@ -88,7 +94,7 @@ public void processElement(ProcessContext context) throws Exception {
         TriggerExecutor<K, VI, VO, W> executor = new TriggerExecutor<>(
             windowFn,
             new StreamingTimerManager(context),
-            new DefaultTrigger<W>(),
+            trigger,
             context.keyedState(),
             context.windowingInternals(),
             windowSet);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index d4b5b1c3e7788..782407f098f5d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -16,12 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import static com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
-
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -108,7 +107,8 @@ public boolean issueSideInputFetch(
    */
   private <T> T fetchSideInput(
       PCollectionView<?> view, BoundedWindow mainInputWindow, SideInputState state) {
-    BoundedWindow sideInputWindow = view.getWindowFnInternal().getSideInputWindow(mainInputWindow);
+    BoundedWindow sideInputWindow =
+        view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(mainInputWindow);
 
     Map<BoundedWindow, Object> tagCache = sideInputCache.get(view.getTagInternal());
     if (tagCache == null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
index 1401f808ae59b..a2a639fca2166 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
@@ -23,7 +23,6 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
@@ -52,7 +51,7 @@ public class StreamingSideInputDoFnRunner<I, O, R, W extends BoundedWindow>
     extends DoFnRunner<I, O, R> {
   private StepContext stepContext;
   private StreamingModeExecutionContext execContext;
-  private WindowFn<?, W> windowFn;
+  private WindowingStrategy<?, W> windowingStrategy;
   private Map<String, PCollectionView<?>> sideInputViews;
   private CodedTupleTag<Map<W, Set<Windmill.GlobalDataId>>> blockedMapTag;
   private Map<W, Set<Windmill.GlobalDataId>> blockedMap;
@@ -69,9 +68,9 @@ public StreamingSideInputDoFnRunner(
       CounterSet.AddCounterMutator addCounterMutator) throws Exception {
     super(options, doFnInfo.getDoFn(), sideInputs, outputManager,
         mainOutputTag, sideOutputTags, stepContext,
-        addCounterMutator, doFnInfo.getWindowFn());
+        addCounterMutator, doFnInfo.getWindowingStrategy());
     this.stepContext = stepContext;
-    this.windowFn = (WindowFn) doFnInfo.getWindowFn();
+    this.windowingStrategy = (WindowingStrategy) doFnInfo.getWindowingStrategy();
     this.elemCoder = doFnInfo.getInputCoder();
 
     this.sideInputViews = new HashMap<>();
@@ -81,7 +80,7 @@ public StreamingSideInputDoFnRunner(
     this.execContext =
         (StreamingModeExecutionContext) stepContext.getExecutionContext();
     this.blockedMapTag = CodedTupleTag.of("blockedMap:", MapCoder.of(
-        windowFn.windowCoder(),
+        windowingStrategy.getWindowFn().windowCoder(),
         SetCoder.of(Proto2Coder.of(Windmill.GlobalDataId.class))));
     this.blockedMap = stepContext.lookup(blockedMapTag);
     if (this.blockedMap == null) {
@@ -141,11 +140,12 @@ public void invokeProcessElement(WindowedValue<I> elem) {
               blocked = new HashSet<>();
               blockedMap.put(window, blocked);
             }
-            Coder<BoundedWindow> sideInputWindowCoder = view.getWindowFnInternal().windowCoder();
+            Coder<BoundedWindow> sideInputWindowCoder =
+                view.getWindowingStrategyInternal().getWindowFn().windowCoder();
 
             ByteString.Output windowStream = ByteString.newOutput();
             sideInputWindowCoder.encode(
-                view.getWindowFnInternal().getSideInputWindow(window),
+                view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(window),
                 windowStream, Coder.Context.OUTER);
 
             blocked.add(Windmill.GlobalDataId.newBuilder()
@@ -183,7 +183,7 @@ public void finishBundle() {
 
   private CodedTupleTag<WindowedValue<I>> getElemListTag(W window) throws IOException {
     return CodedTupleTag.<WindowedValue<I>>of(
-        "e:" + CoderUtils.encodeToBase64(windowFn.windowCoder(), window),
-        WindowedValue.getFullCoder(elemCoder, windowFn.windowCoder()));
+        "e:" + CoderUtils.encodeToBase64(windowingStrategy.getWindowFn().windowCoder(), window),
+        WindowedValue.getFullCoder(elemCoder, windowingStrategy.getWindowFn().windowCoder()));
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
index c0092c59b058a..36dd5ce543076 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
@@ -22,6 +22,7 @@
 import org.joda.time.Instant;
 
 import java.io.IOException;
+import java.io.Serializable;
 import java.util.List;
 import java.util.Map;
 
@@ -32,7 +33,9 @@
  *
  * @param <W> the window that this trigger applies to
  */
-public abstract class Trigger<W extends BoundedWindow> {
+public abstract class Trigger<W extends BoundedWindow> implements Serializable {
+
+  private static final long serialVersionUID = 0L;
 
   /**
    * Types of timers that are supported.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
new file mode 100644
index 0000000000000..e5418be6eba33
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+
+import java.io.Serializable;
+
+/**
+ * A {@code WindowingStrategy} describes the windowing behavior for a specific collection of values.
+ * It has both a {@link WindowFn} describing how elements are assigned to windows and a
+ * {@link Trigger} that controls when output is produced for each window.
+ *
+ * @param <T> type of elements being windowed
+ * @param <W> {@link BoundedWindow} subclass used to represent the
+ *            windows used by this {@code WindowingStrategy}
+ */
+public class WindowingStrategy<T, W extends BoundedWindow> implements Serializable {
+
+  private static final WindowingStrategy<Object, GlobalWindow> DEFAULT = of(new GlobalWindows());
+
+  private static final long serialVersionUID = 0L;
+
+  private final WindowFn<T, W> windowFn;
+  private final Trigger<W> trigger;
+
+  private WindowingStrategy(WindowFn<T, W> windowFn, Trigger<W> trigger) {
+    this.windowFn = windowFn;
+    this.trigger = trigger;
+  }
+
+  public static WindowingStrategy<Object, GlobalWindow> globalDefault() {
+    return DEFAULT;
+  }
+
+  /**
+   * Create a {@code WindowingStrategy} for the given {@code windowFn}, using the
+   * {@link DefaultTrigger}.
+   */
+  public static <T, W extends BoundedWindow> WindowingStrategy<T, W> of(WindowFn<T, W> windowFn) {
+    return of(windowFn, new DefaultTrigger<W>());
+  }
+
+  /**
+   * Create a {@code WindowingStrategy} for the given {@code windowFn} and {@code trigger}.
+   */
+  public static <T, W extends BoundedWindow> WindowingStrategy<T, W> of(
+      WindowFn<T, W> windowFn, Trigger<W> trigger) {
+    return new WindowingStrategy<>(windowFn, trigger);
+  }
+
+  public WindowFn<T, W> getWindowFn() {
+    return windowFn;
+  }
+
+  public Trigger<W> getTrigger() {
+    return trigger;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
index 155a0e6406a69..24afe327988ae 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.common.reflect.TypeToken;
 
 /**
@@ -128,23 +129,22 @@ public <Output extends POutput> Output apply(PTransform<? super PCollection<T>,
   }
 
   /**
-   * Returns the {@link WindowFn} of this {@code PCollection}.
+   * Returns the {@link WindowingStrategy} of this {@code PCollection}.
    */
-  public WindowFn<?, ?> getWindowFn() {
-    return windowFn;
+  public WindowingStrategy<?, ?> getWindowingStrategy() {
+    return windowingStrategy;
   }
 
   /////////////////////////////////////////////////////////////////////////////
   // Internal details below here.
 
   /**
-   * {@link WindowFn} that will be used to merge windows in
-   * this {@code PCollection} and subsequent {@code PCollection}s produced
-   * from this one.
+   * {@link WindowingStrategy} that will be used for merging windows and triggering output in this
+   * {@code PCollection} and subsequence {@code PCollection}s produced from this one.
    *
    * <p> By default, no merging is performed.
    */
-  private WindowFn<?, ?> windowFn;
+  private WindowingStrategy<?, ?> windowingStrategy;
 
   private PCollection() {}
 
@@ -161,12 +161,12 @@ public PCollection<T> setTypeTokenInternal(TypeToken<T> typeToken) {
   }
 
   /**
-   * Sets the {@link WindowFn} of this {@code PCollection}.
+   * Sets the {@link WindowingStrategy} of this {@code PCollection}.
    *
    * <p> For use by primitive transformations only.
    */
-  public PCollection<T> setWindowFnInternal(WindowFn<?, ?> windowFn) {
-     this.windowFn = windowFn;
+  public PCollection<T> setWindowingStrategyInternal(WindowingStrategy<?, ?> windowingStrategy) {
+     this.windowingStrategy = windowingStrategy;
      return this;
   }
 
@@ -187,7 +187,7 @@ public PCollection<T> setPipelineInternal(Pipeline pipeline) {
    * <p> For use by primitive transformations only.
    */
   public static <T> PCollection<T> createPrimitiveOutputInternal(
-      WindowFn<?, ?> windowFn) {
-    return new PCollection<T>().setWindowFnInternal(windowFn);
+      WindowingStrategy<?, ?> windowingStrategy) {
+    return new PCollection<T>().setWindowingStrategyInternal(windowingStrategy);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
index 31fa2aa5d0a3a..967bf46cd22d8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
@@ -18,7 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.reflect.TypeToken;
 
@@ -183,7 +183,7 @@ public <Output extends POutput> Output apply(
    * <p> For use by primitive transformations only.
    */
   public static PCollectionTuple ofPrimitiveOutputsInternal(
-      TupleTagList outputTags, WindowFn<?, ?> windowFn) {
+      TupleTagList outputTags, WindowingStrategy<?, ?> windowingStrategy) {
     Map<TupleTag<?>, PCollection<?>> pcollectionMap = new LinkedHashMap<>();
     for (TupleTag<?> outputTag : outputTags.tupleTags) {
       if (pcollectionMap.containsKey(outputTag)) {
@@ -200,7 +200,7 @@ public static PCollectionTuple ofPrimitiveOutputsInternal(
       @SuppressWarnings("unchecked")
       TypeToken<Object> token = (TypeToken<Object>) outputTag.getTypeToken();
       PCollection<Object> outputCollection = PCollection
-          .createPrimitiveOutputInternal(windowFn)
+          .createPrimitiveOutputInternal(windowingStrategy)
           .setTypeTokenInternal(token);
 
       pcollectionMap.put(outputTag, outputCollection);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
index 5ffac59eeb5f6..a36e2542808eb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
@@ -17,8 +17,8 @@
 package com.google.cloud.dataflow.sdk.values;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 
 import java.io.Serializable;
 
@@ -49,7 +49,7 @@ public interface PCollectionView<T> extends PValue, Serializable {
   /**
    * For internal use only.
    */
-  public WindowFn getWindowFnInternal();
+  public WindowingStrategy getWindowingStrategyInternal();
 
   /**
    * For internal use only.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 4c85d11a5e468..cad4e8f45feb0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -40,11 +40,11 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.util.DataflowReleaseInfo;
 import com.google.cloud.dataflow.sdk.util.GcsUtil;
 import com.google.cloud.dataflow.sdk.util.PackageUtil;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.collect.ImmutableList;
@@ -438,7 +438,7 @@ public static class TestTransform
 
     @Override
     public PCollection<Integer> apply(PCollection<Integer> input) {
-      return PCollection.<Integer>createPrimitiveOutputInternal(new GlobalWindows());
+      return PCollection.<Integer>createPrimitiveOutputInternal(WindowingStrategy.globalDefault());
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 6b3ff462b87af..21bec75d4f556 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -46,10 +46,10 @@
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.View;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.util.OutputReference;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
@@ -331,7 +331,7 @@ public EmbeddedTransform(Step step) {
 
     @Override
     public PCollection<String> apply(PCollection<String> input) {
-      return PCollection.createPrimitiveOutputInternal(new GlobalWindows());
+      return PCollection.createPrimitiveOutputInternal(WindowingStrategy.globalDefault());
     }
 
     @Override
@@ -413,7 +413,7 @@ public PCollectionTuple apply(PCollection<Integer> input) {
       // Fails here when attempting to construct a tuple with an unbound object.
       return PCollectionTuple.of(sumTag, sum)
           .and(doneTag, PCollection.<Void>createPrimitiveOutputInternal(
-              new GlobalWindows()));
+              WindowingStrategy.globalDefault()));
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
index e4f5ef5d12ba4..a189448f6582a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
@@ -27,7 +27,7 @@
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.Sample;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
@@ -75,7 +75,7 @@ public PCollectionList<String> apply(PBegin b) {
       // from within a composite transform.
       return PCollectionList.of(
           Arrays.asList(result, PCollection.<String>createPrimitiveOutputInternal(
-              new GlobalWindows())));
+              WindowingStrategy.globalDefault())));
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index e5adbd339b11f..be2fe8398f450 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -42,7 +42,6 @@
 import com.google.cloud.dataflow.sdk.runners.worker.SinkFactoryTest.TestSink;
 import com.google.cloud.dataflow.sdk.runners.worker.SinkFactoryTest.TestSinkFactory;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
@@ -52,6 +51,7 @@
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestOperation;
@@ -294,7 +294,8 @@ static ParallelInstruction createParDoInstruction(
 
     String serializedFn =
         StringUtils.byteArrayToJsonString(
-            SerializableUtils.serializeToByteArray(new DoFnInfo(fn, new GlobalWindows())));
+            SerializableUtils.serializeToByteArray(
+                new DoFnInfo(fn, WindowingStrategy.globalDefault())));
 
     CloudObject cloudUserFn = CloudObject.forClassName("DoFn");
     addString(cloudUserFn, PropertyNames.SERIALIZED_FN, serializedFn);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
index 00035ecce4001..551faba0f66cf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
@@ -29,12 +29,12 @@
 
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.UserCodeException;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Receiver;
@@ -153,7 +153,7 @@ public void testNormalParDoFn() throws Exception {
     List<String> sideOutputTags = Arrays.asList("tag1", "tag2", "tag3");
 
     TestDoFn fn = new TestDoFn(sideOutputTags);
-    DoFnInfo fnInfo = new DoFnInfo(fn, new GlobalWindows());
+    DoFnInfo fnInfo = new DoFnInfo(fn, WindowingStrategy.globalDefault());
     TestReceiver receiver = new TestReceiver();
     TestReceiver receiver1 = new TestReceiver();
     TestReceiver receiver2 = new TestReceiver();
@@ -218,7 +218,7 @@ public void testNormalParDoFn() throws Exception {
   @Test
   public void testUnexpectedNumberOfReceivers() throws Exception {
     TestDoFn fn = new TestDoFn(Collections.<String>emptyList());
-    DoFnInfo fnInfo = new DoFnInfo(fn, new GlobalWindows());
+    DoFnInfo fnInfo = new DoFnInfo(fn, WindowingStrategy.globalDefault());
     TestReceiver receiver = new TestReceiver();
 
     PTuple sideInputValues = PTuple.empty();
@@ -257,7 +257,7 @@ private List<String> stackTraceFrameStrings(Throwable t) {
   @Test
   public void testErrorPropagation() throws Exception {
     TestErrorDoFn fn = new TestErrorDoFn();
-    DoFnInfo fnInfo = new DoFnInfo(fn, new GlobalWindows());
+    DoFnInfo fnInfo = new DoFnInfo(fn, WindowingStrategy.globalDefault());
     TestReceiver receiver = new TestReceiver();
 
     PTuple sideInputValues = PTuple.empty();
@@ -325,7 +325,7 @@ public void testErrorPropagation() throws Exception {
   @Test
   public void testUndeclaredSideOutputs() throws Exception {
     TestDoFn fn = new TestDoFn(Arrays.asList("declared", "undecl1", "undecl2", "undecl3"));
-    DoFnInfo fnInfo = new DoFnInfo(fn, new GlobalWindows());
+    DoFnInfo fnInfo = new DoFnInfo(fn, WindowingStrategy.globalDefault());
     CounterSet counters = new CounterSet();
     NormalParDoFn normalParDoFn =
         new NormalParDoFn(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
index eaf069ec79419..4540d87d98fd6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
@@ -24,9 +24,11 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
@@ -71,7 +73,8 @@ public void testCreateNormalParDoFn() throws Exception {
 
     String serializedFn =
         StringUtils.byteArrayToJsonString(
-            SerializableUtils.serializeToByteArray(new DoFnInfo(fn, new GlobalWindows())));
+            SerializableUtils.serializeToByteArray(
+                new DoFnInfo(fn, WindowingStrategy.globalDefault())));
 
     CloudObject cloudUserFn = CloudObject.forClassName("DoFn");
     addString(cloudUserFn, "serialized_fn", serializedFn);
@@ -97,8 +100,11 @@ public void testCreateNormalParDoFn() throws Exception {
     DoFn actualDoFn = normalParDoFn.fnFactory.createDoFnInfo().getDoFn();
     Assert.assertThat(actualDoFn, new IsInstanceOf(TestDoFn.class));
     Assert.assertThat(
-        normalParDoFn.fnFactory.createDoFnInfo().getWindowFn(),
+        normalParDoFn.fnFactory.createDoFnInfo().getWindowingStrategy().getWindowFn(),
         new IsInstanceOf(GlobalWindows.class));
+    Assert.assertThat(
+        normalParDoFn.fnFactory.createDoFnInfo().getWindowingStrategy().getTrigger(),
+        new IsInstanceOf(DefaultTrigger.class));
     TestDoFn actualTestDoFn = (TestDoFn) actualDoFn;
 
     Assert.assertEquals(stringState, actualTestDoFn.stringState);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 46fed798bf30b..78d9e30631636 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -51,6 +51,7 @@
 import com.google.cloud.dataflow.sdk.util.TimerOrElement.TimerOrElementCoder;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -742,7 +743,8 @@ public void processElement(ProcessContext c) {
     CloudObject spec = CloudObject.forClassName("AssignWindowsDoFn");
     addString(spec, PropertyNames.SERIALIZED_FN,
         StringUtils.byteArrayToJsonString(
-            SerializableUtils.serializeToByteArray(FixedWindows.of(gapDuration))));
+            SerializableUtils.serializeToByteArray(
+                WindowingStrategy.of(FixedWindows.of(gapDuration)))));
 
     ParallelInstruction addWindowsInstruction =
         new ParallelInstruction()
@@ -822,7 +824,8 @@ public void processElement(ProcessContext c) {
     CloudObject spec = CloudObject.forClassName("MergeWindowsDoFn");
     addString(spec, PropertyNames.SERIALIZED_FN,
         StringUtils.byteArrayToJsonString(
-            SerializableUtils.serializeToByteArray(FixedWindows.of(Duration.standardSeconds(1)))));
+            SerializableUtils.serializeToByteArray(
+                WindowingStrategy.of(FixedWindows.of(Duration.standardSeconds(1))))));
     addObject(spec, PropertyNames.INPUT_CODER, windowedKvCoder.asCloudObject());
 
     ParallelInstruction mergeWindowsInstruction =
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
index 86f71a82565c8..be90e965b30f3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
@@ -201,7 +201,7 @@ public void testEqualWindowFnPropagation() {
 
     p.run();
 
-    Assert.assertTrue(output.getWindowFn().isCompatible(
+    Assert.assertTrue(output.getWindowingStrategy().getWindowFn().isCompatible(
         FixedWindows.<String>of(Duration.standardMinutes(1))));
   }
 
@@ -222,7 +222,7 @@ public void testCompatibleWindowFnPropagation() {
 
     p.run();
 
-    Assert.assertTrue(output.getWindowFn().isCompatible(
+    Assert.assertTrue(output.getWindowingStrategy().getWindowFn().isCompatible(
         Sessions.<String>withGapDuration(Duration.standardMinutes(2))));
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
index 1ad16cb55a788..577d028ffabe5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
@@ -200,9 +200,8 @@ public void testIdentityWindowFnPropagation() {
 
     p.run();
 
-    Assert.assertTrue(output.getWindowFn().isCompatible(
+    Assert.assertTrue(output.getWindowingStrategy().getWindowFn().isCompatible(
         FixedWindows.<KV<String, Integer>>of(Duration.standardMinutes(1))));
-
   }
 
   @Test
@@ -223,7 +222,7 @@ public void testWindowFnInvalidation() {
     p.run();
 
     Assert.assertTrue(
-        output.getWindowFn().isCompatible(
+        output.getWindowingStrategy().getWindowFn().isCompatible(
             new InvalidWindows(
                 "Invalid",
                 Sessions.<KV<String, Integer>>withGapDuration(
@@ -274,7 +273,7 @@ public void testRemerge() {
     p.run();
 
     Assert.assertTrue(
-        middle.getWindowFn().isCompatible(
+        middle.getWindowingStrategy().getWindowFn().isCompatible(
             Sessions.withGapDuration(Duration.standardMinutes(1))));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index 293bece399231..d25bb9366b89d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -18,6 +18,7 @@
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
+
 import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
@@ -29,7 +30,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -63,7 +63,7 @@ public class GroupAlsoByWindowsDoFnTest {
   @Test public void testEmpty() throws Exception {
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
         KV<String, Iterable<String>>, List> runner =
-        makeRunner(FixedWindows.<String>of(Duration.millis(10)));
+        makeRunner(WindowingStrategy.of(FixedWindows.<String>of(Duration.millis(10))));
 
     runner.startBundle();
 
@@ -77,7 +77,7 @@ public class GroupAlsoByWindowsDoFnTest {
   @Test public void testFixedWindows() throws Exception {
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
         KV<String, Iterable<String>>, List> runner =
-        makeRunner(FixedWindows.<String>of(Duration.millis(10)));
+        makeRunner(WindowingStrategy.of(FixedWindows.<String>of(Duration.millis(10))));
 
     runner.startBundle();
 
@@ -120,7 +120,8 @@ public class GroupAlsoByWindowsDoFnTest {
   @Test public void testSlidingWindows() throws Exception {
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
         KV<String, Iterable<String>>, List> runner =
-        makeRunner(SlidingWindows.<String>of(Duration.millis(20)).every(Duration.millis(10)));
+        makeRunner(WindowingStrategy.of(
+            SlidingWindows.<String>of(Duration.millis(20)).every(Duration.millis(10))));
 
     runner.startBundle();
 
@@ -166,7 +167,7 @@ public class GroupAlsoByWindowsDoFnTest {
   @Test public void testDiscontiguousWindows() throws Exception {
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
         KV<String, Iterable<String>>, List> runner =
-        makeRunner(FixedWindows.<String>of(Duration.millis(10)));
+        makeRunner(WindowingStrategy.of(FixedWindows.<String>of(Duration.millis(10))));
 
     runner.startBundle();
 
@@ -209,7 +210,7 @@ public class GroupAlsoByWindowsDoFnTest {
   @Test public void testSessions() throws Exception {
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
         KV<String, Iterable<String>>, List> runner =
-        makeRunner(Sessions.<String>withGapDuration(Duration.millis(10)));
+        makeRunner(WindowingStrategy.of(Sessions.<String>withGapDuration(Duration.millis(10))));
 
     runner.startBundle();
 
@@ -253,7 +254,7 @@ public class GroupAlsoByWindowsDoFnTest {
     CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
     DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>,
         KV<String, Long>, List> runner =
-        makeRunner(Sessions.<String>withGapDuration(Duration.millis(10)),
+        makeRunner(WindowingStrategy.of(Sessions.<String>withGapDuration(Duration.millis(10))),
                    combineFn.<String>asKeyedFn());
     runner.startBundle();
 
@@ -293,26 +294,26 @@ public class GroupAlsoByWindowsDoFnTest {
 
   private DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
       KV<String, Iterable<String>>, List> makeRunner(
-          WindowFn<? super String, IntervalWindow> windowFn) {
+          WindowingStrategy<? super String, IntervalWindow> windowingStrategy) {
     GroupAlsoByWindowsDoFn<String, String, Iterable<String>, IntervalWindow> fn =
-        GroupAlsoByWindowsDoFn.createForIterable(windowFn, StringUtf8Coder.of());
-    return makeRunner(windowFn, fn);
+        GroupAlsoByWindowsDoFn.createForIterable(windowingStrategy, StringUtf8Coder.of());
+    return makeRunner(windowingStrategy, fn);
   }
 
   private DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>,
       KV<String, Long>, List> makeRunner(
-        WindowFn<? super String, IntervalWindow> windowFn,
+        WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
         KeyedCombineFn<String, Long, ?, Long> combineFn) {
     GroupAlsoByWindowsDoFn<String, Long, Long, IntervalWindow> fn =
         GroupAlsoByWindowsDoFn.create(
-            windowFn, combineFn, StringUtf8Coder.of(), BigEndianLongCoder.of());
+            windowingStrategy, combineFn, StringUtf8Coder.of(), BigEndianLongCoder.of());
 
-    return makeRunner(windowFn, fn);
+    return makeRunner(windowingStrategy, fn);
   }
 
   private <VI, VO> DoFnRunner<KV<String, Iterable<WindowedValue<VI>>>,
     KV<String, VO>, List> makeRunner(
-        WindowFn<? super String, IntervalWindow> windowFn,
+        WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
         GroupAlsoByWindowsDoFn<String, VI, VO, IntervalWindow> fn) {
     return
         DoFnRunner.createWithListOutputs(
@@ -323,7 +324,7 @@ KV<String, VO>, List> makeRunner(
             new ArrayList<TupleTag<?>>(),
             execContext.createStepContext("merge"),
             counters.getAddCounterMutator(),
-            windowFn);
+            windowingStrategy);
   }
 
   private BoundedWindow window(long start, long end) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 30cb12c1a4938..08c7fc1b6b38c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -30,7 +30,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.util.TriggerExecutor.TriggerIdCoder;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -66,7 +65,7 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
 
   @Test public void testEmpty() throws Exception {
     DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>, List> runner =
-        makeRunner(FixedWindows.of(Duration.millis(10)));
+        makeRunner(WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
     runner.startBundle();
 
@@ -80,7 +79,7 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
   @Test public void testFixedWindows() throws Exception {
     DoFnRunner<TimerOrElement<KV<String, String>>,
         KV<String, Iterable<String>>, List> runner =
-        makeRunner(FixedWindows.of(Duration.millis(10)));
+        makeRunner(WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
     Coder<IntervalWindow> windowCoder = FixedWindows.of(Duration.millis(10)).windowCoder();
     Coder<TriggerId<IntervalWindow>> triggerIdCoder =
@@ -143,7 +142,8 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
   @Test public void testSlidingWindows() throws Exception {
     DoFnRunner<TimerOrElement<KV<String, String>>,
         KV<String, Iterable<String>>, List> runner =
-        makeRunner(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10)));
+        makeRunner(WindowingStrategy.of(
+            SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))));
 
     Coder<IntervalWindow> windowCoder =
         SlidingWindows.of(Duration.millis(10)).every(Duration.millis(10)).windowCoder();
@@ -214,7 +214,7 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
   @Test public void testSessions() throws Exception {
     DoFnRunner<TimerOrElement<KV<String, String>>,
         KV<String, Iterable<String>>, List> runner =
-        makeRunner(Sessions.withGapDuration(Duration.millis(10)));
+        makeRunner(WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))));
 
     Coder<IntervalWindow> windowCoder =
         Sessions.withGapDuration(Duration.millis(10)).windowCoder();
@@ -317,7 +317,7 @@ public Long extractOutput(Long accumulator) {
     CombineFn<Long, ?, Long> combineFn = new SumLongs();
     DoFnRunner<TimerOrElement<KV<String, Long>>,
         KV<String, Long>, List> runner =
-        makeRunner(Sessions.withGapDuration(Duration.millis(10)),
+        makeRunner(WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))),
                    combineFn.<String>asKeyedFn());
 
     Coder<IntervalWindow> windowCoder =
@@ -388,24 +388,24 @@ public Long extractOutput(Long accumulator) {
   }
 
   private DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>, List>
-      makeRunner(WindowFn<? super String, IntervalWindow> windowFn) {
+      makeRunner(WindowingStrategy<? super String, IntervalWindow> windowingStrategy) {
     StreamingGroupAlsoByWindowsDoFn<String, String, Iterable<String>, IntervalWindow> fn =
-        StreamingGroupAlsoByWindowsDoFn.createForIterable(windowFn, StringUtf8Coder.of());
-    return makeRunner(windowFn, fn);
+        StreamingGroupAlsoByWindowsDoFn.createForIterable(windowingStrategy, StringUtf8Coder.of());
+    return makeRunner(windowingStrategy, fn);
   }
 
   private DoFnRunner<TimerOrElement<KV<String, Long>>, KV<String, Long>, List> makeRunner(
-        WindowFn<? super String, IntervalWindow> windowFn,
+        WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
         KeyedCombineFn<String, Long, ?, Long> combineFn) {
     StreamingGroupAlsoByWindowsDoFn<String, Long, Long, IntervalWindow> fn =
         StreamingGroupAlsoByWindowsDoFn.create(
-            windowFn, combineFn, StringUtf8Coder.of(), BigEndianLongCoder.of());
+            windowingStrategy, combineFn, StringUtf8Coder.of(), BigEndianLongCoder.of());
 
-    return makeRunner(windowFn, fn);
+    return makeRunner(windowingStrategy, fn);
   }
 
   private <VI, VO> DoFnRunner<TimerOrElement<KV<String, VI>>, KV<String, VO>, List> makeRunner(
-      WindowFn<? super String, IntervalWindow> windowFn,
+      WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
       StreamingGroupAlsoByWindowsDoFn<String, VI, VO, IntervalWindow> fn) {
     return
         DoFnRunner.createWithListOutputs(
@@ -416,7 +416,7 @@ private <VI, VO> DoFnRunner<TimerOrElement<KV<String, VI>>, KV<String, VO>, List
             new ArrayList<TupleTag<?>>(),
             execContext.createStepContext("merge"),
             counters.getAddCounterMutator(),
-            windowFn);
+            windowingStrategy);
   }
 
   private BoundedWindow window(long start, long end) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
index 41e1e22eca8d3..b93da6b07939b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
@@ -43,7 +43,6 @@
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
-
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -209,7 +208,8 @@ public void testMultipleSideInputs() throws Exception {
   private StreamingSideInputDoFnRunner<String, String, List, IntervalWindow> createRunner(
       List<PCollectionView<String>> views) throws Exception {
     DoFnInfo doFnInfo = new DoFnInfo<String, String>(
-        new SideInputFn(views), FixedWindows.of(Duration.millis(10)),
+        new SideInputFn(views),
+        WindowingStrategy.of(FixedWindows.of(Duration.millis(10))),
         (Iterable) views, StringUtf8Coder.of());
 
     PTuple sideInputs = PTuple.empty();

From 77ae43cb6cc1b9c2ef911fdfc7925f3cd5904937 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Sun, 5 Apr 2015 19:42:14 -0700
Subject: [PATCH 0376/1541] Move KeyedState validation to verification time,
 rather than runtime.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90387918
---
 .../cloud/dataflow/sdk/transforms/ParDo.java  | 31 +++++++++++++++++++
 .../cloud/dataflow/sdk/util/DoFnRunner.java   | 26 ++--------------
 .../dataflow/sdk/transforms/ParDoTest.java    | 10 +++---
 3 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 4206fc3171987..661f24d66f7f3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -17,12 +17,15 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresKeyedState;
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
+import com.google.cloud.dataflow.sdk.util.TimerOrElement.TimerOrElementCoder;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -523,6 +526,22 @@ public static <I, O> Bound<I, O> of(DoFn<I, O> fn) {
     return new Unbound().of(fn);
   }
 
+  private static <I> void validateCoder(
+      DoFn<I, ?> fn, PCollection<? extends I> input) {
+    if (RequiresKeyedState.class.isAssignableFrom(fn.getClass())
+      && !isKvEquivalentCoder(input.getCoder())) {
+      throw new UnsupportedOperationException(
+          "KeyedState is only available in DoFn's with keyed inputs, but input coder "
+          + input.getCoder() + " is not keyed.");
+    }
+  }
+
+  private static boolean isKvEquivalentCoder(Coder<?> coder) {
+    return (coder instanceof KvCoder)
+        || (coder instanceof TimerOrElementCoder
+            && ((TimerOrElementCoder<?>) coder).getElementCoder() instanceof KvCoder);
+  }
+
   /**
    * An incomplete {@code ParDo} transform, with unbound input/output types.
    *
@@ -717,6 +736,12 @@ public DoFn<I, O> getFn() {
     public List<PCollectionView<?>> getSideInputs() {
       return sideInputs;
     }
+
+    @Override
+    public void validate(PCollection<? extends I> input) {
+      super.validate(input);
+      ParDo.validateCoder(fn, input);
+    }
   }
 
   /**
@@ -917,6 +942,12 @@ public TupleTag<O> getMainOutputTag() {
     public List<PCollectionView<?>> getSideInputs() {
       return sideInputs;
     }
+
+    @Override
+    public void validate(PCollection<? extends I> input) {
+      super.validate(input);
+      ParDo.validateCoder(fn, input);
+    }
   }
 
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 899c604429263..2626a466aa7ce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -34,7 +34,6 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -455,8 +454,7 @@ public <T> T sideInput(PCollectionView<T> view) {
 
     @Override
     public KeyedState keyedState() {
-      if (!(fn instanceof RequiresKeyedState)
-          || !equivalentToKV(element())) {
+      if (!(fn instanceof RequiresKeyedState)) {
         throw new UnsupportedOperationException(
             "Keyed state is only available in the context of a keyed DoFn "
             + "marked as requiring state");
@@ -485,13 +483,6 @@ public void outputWithTimestamp(O output, Instant timestamp) {
       context.outputWindowedValue(output, timestamp, windowedValue.getWindows());
     }
 
-    void outputWindowedValue(
-        O output,
-        Instant timestamp,
-        Collection<? extends BoundedWindow> windows) {
-      context.outputWindowedValue(output, timestamp, windows);
-    }
-
     @Override
     public <T> void sideOutput(TupleTag<T> tag, T output) {
       Preconditions.checkNotNull(tag, "Tag passed to sideOutput cannot be null");
@@ -533,19 +524,8 @@ public Collection<? extends BoundedWindow> windows() {
 
     private void checkTimestamp(Instant timestamp) {
       Preconditions.checkArgument(
-          !timestamp.isBefore(windowedValue.getTimestamp().minus(fn.getAllowedTimestampSkew())));
-    }
-
-    private boolean equivalentToKV(I input) {
-      if (input == null) {
-        return true;
-      } else if (input instanceof KV) {
-        return true;
-      } else if (input instanceof TimerOrElement) {
-        return ((TimerOrElement) input).isTimer()
-            || ((TimerOrElement) input).element() instanceof KV;
-      }
-      return false;
+          !timestamp.isBefore(windowedValue.getTimestamp().minus(fn.getAllowedTimestampSkew())),
+          "Timestamp %s exceeds allowed maximum skew.", timestamp);
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index ecaaed1e3a944..56959009e19eb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -727,14 +727,14 @@ public void testParDoKeyedStateDoFnWithNonKvInput() {
 
     PCollection<Integer> input = createInts(p, inputs);
 
-    input
-        .apply(ParDo.of(new TestKeyedStateDoFnWithNonKvInput()));
     try {
-      p.run();
+      input.apply(ParDo.of(new TestKeyedStateDoFnWithNonKvInput()))
+          .finishSpecifying();
       fail("should have failed");
     } catch (RuntimeException exn) {
-      assertThat(exn.toString(),
-                 containsString("'RequiresKeyedState' but input elements were not of type KV"));
+      assertThat(exn.toString(), containsString(
+          "KeyedState is only available in DoFn's with keyed inputs, but "
+          + "input coder BigEndianIntegerCoder is not keyed."));
     }
   }
 

From bfd7dc00ba5aed268d011f0a087b57867b033dc8 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Mon, 6 Apr 2015 11:29:56 -0700
Subject: [PATCH 0377/1541] Remove variable 'charset' of XMLSource since it is
 not serializable.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90431723
---
 .../java/com/google/cloud/dataflow/sdk/io/XMLSource.java  | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XMLSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XMLSource.java
index ff68d8e39bcb1..0ee596d7ad734 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XMLSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XMLSource.java
@@ -32,7 +32,6 @@
 import java.nio.CharBuffer;
 import java.nio.channels.Channels;
 import java.nio.channels.ReadableByteChannel;
-import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.util.NoSuchElementException;
 
@@ -109,7 +108,6 @@ public class XMLSource<T> extends FileBasedSource<T> {
   private final String rootElement;
   private final String recordElement;
   private final Class<T> recordClass;
-  private final Charset charset = StandardCharsets.UTF_8;
 
   /**
    * Creates an XMLSource for a single XML file or a set of XML files defined by a Java "glob" file
@@ -315,7 +313,7 @@ protected void startReading(ReadableByteChannel channel) throws IOException {
 
       byte[] dummyStartDocumentBytes =
           ("<?xml version=\"" + XML_VERSION + "\" encoding=\"UTF-8\" ?>"
-              + "<" + getCurrentSource().rootElement + ">").getBytes(getCurrentSource().charset);
+              + "<" + getCurrentSource().rootElement + ">").getBytes(StandardCharsets.UTF_8);
       preambleByteBuffer.write(dummyStartDocumentBytes);
       // Gets the byte offset (in the input file) of the first record in ReadableByteChannel. This
       // method returns the offset and stores any bytes that should be used when creating the XML
@@ -363,7 +361,7 @@ private long getFirstOccurenceOfRecordElement(
 
       ByteBuffer buf = ByteBuffer.allocate(BUF_SIZE);
       byte[] recordStartBytes =
-          ("<" + getCurrentSource().recordElement).getBytes(getCurrentSource().charset);
+          ("<" + getCurrentSource().recordElement).getBytes(StandardCharsets.UTF_8);
 
       outer: while (channel.read(buf) > 0) {
         buf.flip();
@@ -381,7 +379,7 @@ private long getFirstOccurenceOfRecordElement(
               CharBuffer charBuf = CharBuffer.allocate(1);
               InputStream charBufStream = new ByteArrayInputStream(charBytes);
               java.io.Reader reader =
-                  new InputStreamReader(charBufStream, getCurrentSource().charset);
+                  new InputStreamReader(charBufStream, StandardCharsets.UTF_8);
               int read = reader.read();
               if (read <= 0) {
                 return -1;

From 4bf8485ff9d34f4cbee1db2139d48301d5ffa0d3 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 6 Apr 2015 11:54:10 -0700
Subject: [PATCH 0378/1541] Organize triggers to make them easier to construct.

Introduce an AtMostOnce trigger interface to enforce (at compile time)
that all the arguments to a AfterFirst.of(...) are at-most once
triggers.

Introduce the AfterWatermark and AfterPane triggers. Give triggers the
timestamp of the element.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90434090
---
 .../dataflow/sdk/transforms/GroupByKey.java   |   2 +-
 .../sdk/transforms/windowing/AfterAll.java    | 151 ++++++++
 .../windowing/AfterEach.java}                 |  51 ++-
 .../windowing/AfterFirst.java}                |  36 +-
 .../sdk/transforms/windowing/AfterPane.java   | 104 ++++++
 .../windowing/AfterProcessingTime.java}       |  45 ++-
 .../transforms/windowing/AfterWatermark.java  | 169 +++++++++
 .../windowing}/CompositeTrigger.java          |  42 ++-
 .../windowing}/DefaultTrigger.java            |  18 +-
 .../windowing}/Repeatedly.java                |  27 +-
 .../sdk/transforms/windowing/TimeTrigger.java | 105 ++++++
 .../sdk/transforms/windowing/Trigger.java     | 334 ++++++++++++++++++
 .../sdk/transforms/windowing/Window.java      |   5 +-
 .../dataflow/sdk/util/AbstractWindowSet.java  |   2 +-
 .../dataflow/sdk/util/BatchTimerManager.java  |   3 +-
 .../dataflow/sdk/util/BufferingWindowSet.java |   2 +-
 .../dataflow/sdk/util/CombiningWindowSet.java |   2 +-
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |   2 +
 .../sdk/util/PartitionBufferingWindowSet.java |   2 +-
 .../util/StreamingGroupAlsoByWindowsDoFn.java |   4 +-
 .../cloud/dataflow/sdk/util/Trigger.java      | 255 -------------
 .../dataflow/sdk/util/TriggerExecutor.java    |  14 +-
 .../dataflow/sdk/util/TriggerTester.java      |  18 +-
 .../dataflow/sdk/util/WindowingStrategy.java  |   4 +-
 .../runners/worker/ParDoFnFactoryTest.java    |   2 +-
 .../transforms/windowing/AfterAllTest.java    | 211 +++++++++++
 .../windowing/AfterEachTest.java}             |  35 +-
 .../windowing/AfterFirstTest.java}            |  42 +--
 .../transforms/windowing/AfterPaneTest.java   |  88 +++++
 .../windowing/AfterProcessingTimeTest.java}   |  49 ++-
 .../windowing/AfterWatermarkTest.java         | 148 ++++++++
 .../windowing}/DefaultTriggerTest.java        |  17 +-
 .../windowing}/RepeatedlyTest.java            |  32 +-
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |   2 +-
 34 files changed, 1576 insertions(+), 447 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/{util/SequenceOfTrigger.java => transforms/windowing/AfterEach.java} (56%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/{util/FirstOfTrigger.java => transforms/windowing/AfterFirst.java} (64%)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/{util/DelayAfterFirstInPane.java => transforms/windowing/AfterProcessingTime.java} (63%)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/{util => transforms/windowing}/CompositeTrigger.java (87%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/{util => transforms/windowing}/DefaultTrigger.java (79%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/{util => transforms/windowing}/Repeatedly.java (85%)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/{util/SequenceOfTriggerTest.java => transforms/windowing/AfterEachTest.java} (87%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/{util/FirstOfTriggerTest.java => transforms/windowing/AfterFirstTest.java} (85%)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/{util/DelayAfterFirstInPaneTest.java => transforms/windowing/AfterProcessingTimeTest.java} (68%)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/{util => transforms/windowing}/DefaultTriggerTest.java (85%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/{util => transforms/windowing}/RepeatedlyTest.java (92%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 81abe2aa1ff79..a4d72ab84f4d3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -25,11 +25,11 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
+import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.ReifyTimestampAndWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
new file mode 100644
index 0000000000000..106d6a9acb126
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.common.base.Preconditions;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.List;
+
+/**
+ * Create a {@link CompositeTrigger} that fires once after all of its sub-triggers have fired. If
+ * any of the sub-triggers finish without firing, the {@code AfterAll.of(...)} will also finish
+ * without firing.
+ *
+ * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
+ *            {@code Trigger}
+ */
+public class AfterAll<W extends BoundedWindow>
+    extends CompositeTrigger<W> implements AtMostOnceTrigger<W> {
+
+  private static final long serialVersionUID = 0L;
+
+  private static final CodedTupleTag<BitSet> SUBTRIGGERS_FIRED_SET_TAG =
+      CodedTupleTag.of("fired", new BitSetCoder());
+
+  private AfterAll(List<Trigger<W>> subTriggers) {
+    super(subTriggers);
+    Preconditions.checkArgument(subTriggers.size() > 1);
+  }
+
+  @SafeVarargs
+  public static <W extends BoundedWindow> AtMostOnceTrigger<W> of(
+      AtMostOnceTrigger<W>... triggers) {
+    return new AfterAll<W>(Arrays.<Trigger<W>>asList(triggers));
+  }
+
+  private TriggerResult wrapResult(TriggerContext<W> c, W window,
+      BitSet firedSet, SubTriggerExecutor subExecutor) throws IOException {
+    // If all children have fired, fire and finish.
+    if (firedSet.cardinality() == subTriggers.size()) {
+      return TriggerResult.FIRE_AND_FINISH;
+    }
+
+    // If we have any triggers that have finished without firing, we should finish:
+    BitSet finishedWithoutFiring = subExecutor.getFinishedSet();
+    finishedWithoutFiring.andNot(firedSet);
+    if (finishedWithoutFiring.cardinality() > 0) {
+      return TriggerResult.FINISH;
+    }
+
+    // Otherwise, store the FIRED set and continue
+    c.store(SUBTRIGGERS_FIRED_SET_TAG, window, firedSet);
+
+    return TriggerResult.CONTINUE;
+  }
+
+  @Override
+  public TriggerResult onElement(
+      TriggerContext<W> c, Object value, Instant timestamp, W window, WindowStatus status)
+      throws Exception {
+    BitSet firedSet = c.lookup(SUBTRIGGERS_FIRED_SET_TAG, window);
+    if (firedSet == null) {
+      firedSet = new BitSet(subTriggers.size());
+    }
+
+    SubTriggerExecutor subExecutor = subExecutor(c, window);
+    for (int i : subExecutor.getUnfinishedTriggers()) {
+      if (subExecutor.onElement(c, i, value, timestamp, window, status).isFire()) {
+        firedSet.set(i);
+      }
+    }
+
+    return wrapResult(c, window, firedSet, subExecutor);
+  }
+
+  @Override
+  public void clear(TriggerContext<W> c, W window) throws Exception {
+    super.clear(c, window);
+    c.remove(SUBTRIGGERS_FIRED_SET_TAG, window);
+  }
+
+  @Override
+  public TriggerResult onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow)
+      throws Exception {
+    // First check to see if we've fired in the set of merged triggers
+    BitSet newFiredSet = new BitSet(subTriggers.size());
+    for (BitSet oldFiredSet : c.lookup(SUBTRIGGERS_FIRED_SET_TAG, oldWindows).values()) {
+      if (oldFiredSet != null) {
+        newFiredSet.or(oldFiredSet);
+      }
+    }
+
+    SubTriggerExecutor subExecutor = subExecutor(c, oldWindows, newWindow);
+
+    // Before evaluating the merge of the underlying trigger, see if we can finish early.
+    TriggerResult earlyResult = wrapResult(c, newWindow, newFiredSet, subExecutor);
+    if (earlyResult.isFinish()) {
+      return earlyResult;
+    }
+
+    for (int i : subExecutor.getUnfinishedTriggers()) {
+      if (subExecutor.onMerge(c, i, oldWindows, newWindow).isFire()) {
+        newFiredSet.set(i);
+      }
+    }
+
+    return wrapResult(c, newWindow, newFiredSet, subExecutor);
+  }
+
+  @Override
+  public TriggerResult afterChildTimer(
+      TriggerContext<W> c, W window, int childIdx, TriggerResult result) throws Exception {
+    SubTriggerExecutor subExecutor = subExecutor(c, window);
+    BitSet firedSet = c.lookup(SUBTRIGGERS_FIRED_SET_TAG, window);
+    if (firedSet == null) {
+      firedSet = new BitSet(subTriggers.size());
+    }
+
+    if (result.isFire()) {
+      firedSet.set(childIdx);
+    }
+
+    return wrapResult(c, window, firedSet, subExecutor);
+  }
+
+  @Override
+  public boolean willNeverFinish() {
+    // even if one of the triggers never finishes, the AfterAll could finish if it FIREs.
+    return false;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SequenceOfTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
similarity index 56%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SequenceOfTrigger.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index 7a18a47f60f33..49f23651f329c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SequenceOfTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -14,36 +14,55 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.util;
+package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.common.base.Preconditions;
 
+import org.joda.time.Instant;
+
+import java.util.Arrays;
 import java.util.List;
 
 /**
- * Creates a trigger that executes each trigger in sequence. Any time the current trigger fires, the
- * sequence will fire. It moves on to the next trigger in the sequence after the current trigger
- * finishes.
+ * A composite trigger that executes its sub-triggers in order. Only one sub-trigger is executing at
+ * a time, and any time it fires the {@code AfterEach} fires. When the currently executing
+ * sub-trigger finishes, the {@code AfterEach} starts executing the next sub-trigger.
+ *
+ * <p> {@code AfterEach.inOrder(t1, t2, ...)} finishes when all of the sub-triggers have finished.
+ *
+ * <p> The following properties hold:
+ * <ul>
+ *   <li> {@code AfterEach.inOrder(AfterEach.inOrder(a, b), c)} behaves the same as
+ *   {@code AfterEach.inOrer(a, b, c)}
+ *   <li> {@code AfterEach.inOrder(Repeatedly.forever(a), b)} behaves the same as
+ *   {@code Repeatedly.forever(a)}, since the repeated trigger never finishes.
+ * </ul>
  *
- * @param <W> The type of windows this trigger operates on.
+ * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
+ *            {@code Trigger}
  */
-public class SequenceOfTrigger<W extends BoundedWindow> extends CompositeTrigger<W> {
+public class AfterEach<W extends BoundedWindow> extends CompositeTrigger<W> {
 
   private static final long serialVersionUID = 0L;
 
-  public SequenceOfTrigger(List<Trigger<W>> subTriggers) {
+  private AfterEach(List<Trigger<W>> subTriggers) {
     super(subTriggers);
     Preconditions.checkArgument(subTriggers.size() > 1);
   }
 
+  @SafeVarargs
+  public static <W extends BoundedWindow> Trigger<W> inOrder(Trigger<W>... triggers) {
+    return new AfterEach<W>(Arrays.<Trigger<W>>asList(triggers));
+  }
+
   private TriggerResult result(TriggerResult subResult, SubTriggerExecutor subexecutor)
       throws Exception {
     return TriggerResult.valueOf(subResult.isFire(), subexecutor.allFinished());
   }
 
   @Override
-  public TriggerResult onElement(TriggerContext<W> c, Object value, W window, WindowStatus status)
+  public TriggerResult onElement(
+      TriggerContext<W> c, Object value, Instant timestamp, W window, WindowStatus status)
       throws Exception {
     // If all the sub-triggers have finished, we should have already finished, so we know there is
     // at least one unfinished trigger.
@@ -52,7 +71,9 @@ public TriggerResult onElement(TriggerContext<W> c, Object value, W window, Wind
 
     // There must be at least one unfinished, because otherwise we would have finished the root.
     int current = subexecutor.firstUnfinished();
-    return result(subexecutor.onElement(c, current, value, window, status), subexecutor);
+    return result(
+        subexecutor.onElement(c, current, value, timestamp, window, status),
+        subexecutor);
   }
 
   @Override
@@ -75,4 +96,14 @@ public TriggerResult afterChildTimer(
 
     return result(result, subExecutor);
   }
+
+  @Override
+  public boolean willNeverFinish() {
+    for (Trigger<W> trigger : subTriggers) {
+      if (trigger.willNeverFinish()) {
+        return true;
+      }
+    }
+    return false;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FirstOfTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
similarity index 64%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FirstOfTrigger.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
index 557b8ba3c3de4..edeaad462a70a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FirstOfTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
@@ -14,36 +14,50 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.util;
+package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
 import com.google.common.base.Preconditions;
 
+import org.joda.time.Instant;
+
+import java.util.Arrays;
 import java.util.List;
 
 /**
- * Create a {@link CompositeTrigger} that fires once the first time any of its sub-triggers fire.
+ * Create a {@link CompositeTrigger} that fires once after at least one of its sub-triggers have
+ * fired. If all of the sub-triggers finish without firing, the {@code AfterFirst.of(...)} will also
+ * finish without firing.
  *
- * @param <W> The type of windows this trigger operates on.
+ * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
+ *            {@code Trigger}
  */
-public class FirstOfTrigger<W extends BoundedWindow> extends CompositeTrigger<W> {
+public class AfterFirst<W extends BoundedWindow>
+    extends CompositeTrigger<W> implements AtMostOnceTrigger<W> {
 
   private static final long serialVersionUID = 0L;
 
-  public FirstOfTrigger(List<Trigger<W>> subTriggers) {
+  private AfterFirst(List<Trigger<W>> subTriggers) {
     super(subTriggers);
     Preconditions.checkArgument(subTriggers.size() > 1);
   }
 
+  @SafeVarargs
+  public static <W extends BoundedWindow> AtMostOnceTrigger<W> of(
+      AtMostOnceTrigger<W>... triggers) {
+    return new AfterFirst<W>(Arrays.<Trigger<W>>asList(triggers));
+  }
+
   @Override
-  public TriggerResult onElement(TriggerContext<W> c, Object value, W window, WindowStatus status)
+  public TriggerResult onElement(
+      TriggerContext<W> c, Object value, Instant timestamp, W window, WindowStatus status)
       throws Exception {
     // If all the sub-triggers have finished, we should have already finished, so we know there is
     // at least one unfinished trigger.
 
     SubTriggerExecutor subStates = subExecutor(c, window);
     for (int i : subStates.getUnfinishedTriggers()) {
-      if (subStates.onElement(c, i, value, window, status).isFire()) {
+      if (subStates.onElement(c, i, value, timestamp, window, status).isFire()) {
         return TriggerResult.FIRE_AND_FINISH;
       }
     }
@@ -84,4 +98,10 @@ public TriggerResult afterChildTimer(
 
     return TriggerResult.CONTINUE;
   }
+
+  @Override
+  public boolean willNeverFinish() {
+    // Even if all the subtriggers never finish, if any of them fire, the AfterAll will finish.
+    return false;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
new file mode 100644
index 0000000000000..dab35d97e3edc
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+
+import org.joda.time.Instant;
+
+import java.util.Map.Entry;
+
+/**
+ * Triggers that fire based on properties of the elements in the current pane.
+ *
+ * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
+ *            {@code Trigger}
+ */
+public class AfterPane<W extends BoundedWindow> implements AtMostOnceTrigger<W>{
+
+  private static final long serialVersionUID = 0L;
+
+  private static final CodedTupleTag<Integer> ELEMENTS_IN_PANE_TAG =
+      CodedTupleTag.of("elements-in-pane", VarIntCoder.of());
+
+  private final int countElems;
+
+  private AfterPane(int countElems) {
+    this.countElems = countElems;
+  }
+
+  /**
+   * Creates a trigger that fires when the pane contains at least {@code countElems} elements.
+   */
+  public static <W extends BoundedWindow> AfterPane<W> elementCountAtLeast(int countElems) {
+    return new AfterPane<>(countElems);
+  }
+
+  @Override
+  public TriggerResult onElement(
+      TriggerContext<W> c, Object value, Instant timestamp, W window, WindowStatus status)
+      throws Exception {
+    Integer count = c.lookup(ELEMENTS_IN_PANE_TAG, window);
+    if (count == null) {
+      count = 0;
+    }
+    count++;
+
+    if (count >= countElems) {
+      return TriggerResult.FIRE_AND_FINISH;
+    }
+
+    c.store(ELEMENTS_IN_PANE_TAG, window, count);
+    return TriggerResult.CONTINUE;
+  }
+
+  @Override
+  public TriggerResult onMerge(
+      TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) throws Exception {
+    int count = 0;
+    for (Entry<W, Integer> old : c.lookup(ELEMENTS_IN_PANE_TAG, oldWindows).entrySet()) {
+      if (old.getValue() != null) {
+        count += old.getValue();
+        c.remove(ELEMENTS_IN_PANE_TAG, old.getKey());
+      }
+    }
+
+    // Don't break early because we want to clean up the old keyed state.
+    if (count >= countElems) {
+      return TriggerResult.FIRE_AND_FINISH;
+    }
+    c.store(ELEMENTS_IN_PANE_TAG, newWindow, count);
+    return TriggerResult.CONTINUE;
+  }
+
+  @Override
+  public TriggerResult onTimer(TriggerContext<W> c, TriggerId<W> triggerId) {
+    return TriggerResult.CONTINUE;
+  }
+
+  @Override
+  public void clear(TriggerContext<W> c, W window) throws Exception {
+    c.remove(ELEMENTS_IN_PANE_TAG, window);
+  }
+
+  @Override
+  public boolean willNeverFinish() {
+    return false;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
similarity index 63%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 6103f6f6cdf76..147dfa0c7d1f0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -14,49 +14,53 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.util;
+package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 
 import org.joda.time.Instant;
 
 /**
- * A trigger that fires after a given amount of delay from the first element arriving.
+ * {@code AfterProcessingTime} triggers fire based on the current processing time. They operate in
+ * the real-time domain.
  *
- * <p>TODO: Generalize this as appropriate, and add support to hook it up.
- *
- * @param <W> The type of windows being triggered/encoded.
+ * @param <W> {@link BoundedWindow} subclass used to represent the windows used
  */
-public class DelayAfterFirstInPane<W extends BoundedWindow> extends Trigger<W> {
+public class AfterProcessingTime<W extends BoundedWindow>
+    extends TimeTrigger<W, AfterProcessingTime<W>> implements AtMostOnceTrigger<W>{
 
   private static final long serialVersionUID = 0L;
 
   private static final CodedTupleTag<Instant> DELAYED_UNTIL_TAG =
       CodedTupleTag.of("delayed-until", InstantCoder.of());
 
-  private SerializableFunction<Instant, Instant> delayFunction;
+  private AfterProcessingTime(SerializableFunction<Instant, Instant> delayFunction) {
+    super(delayFunction);
+  }
 
   /**
-   * Delay after the first element in the window arrives.
-   *
-   * @param delayFunction Transformation to apply the current processing time to compute the delay.
-   *     It should be deterministic: a = b => delayFunction(a) = delayFunction(b)
-   *     It should only move values forward: delayFunction(now) >= now
-   *     It should be monotonically increasing: If a < b, then delayFunction(a) <= delayFunction(b)
+   * Creates a trigger that fires when the current processing time passes the processing time at
+   * which this trigger saw the first element in a pane.
    */
-  public DelayAfterFirstInPane(SerializableFunction<Instant, Instant> delayFunction) {
-    this.delayFunction = delayFunction;
+  public static <W extends BoundedWindow> AfterProcessingTime<W> pastFirstElementInPane() {
+    return new AfterProcessingTime<W>(IDENTITY);
+  }
+
+  @Override
+  protected AfterProcessingTime<W> newWith(SerializableFunction<Instant, Instant> transform) {
+    return new AfterProcessingTime<W>(transform);
   }
 
   @Override
-  public TriggerResult onElement(TriggerContext<W> c, Object value, W window, WindowStatus status)
+  public TriggerResult onElement(
+      TriggerContext<W> c, Object value, Instant timestamp, W window, WindowStatus status)
       throws Exception {
     Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, window);
     if (delayUntil == null) {
-      delayUntil = delayFunction.apply(c.currentProcessingTime());
+      delayUntil = computeTargetTimestamp(c.currentProcessingTime());
       c.setTimer(window, delayUntil, TimeDomain.PROCESSING_TIME);
       c.store(DELAYED_UNTIL_TAG, window, delayUntil);
     }
@@ -93,4 +97,9 @@ public void clear(TriggerContext<W> c, W window) throws Exception {
     c.remove(DELAYED_UNTIL_TAG, window);
     c.deleteTimer(window, TimeDomain.PROCESSING_TIME);
   }
+
+  @Override
+  public boolean willNeverFinish() {
+    return false;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
new file mode 100644
index 0000000000000..4accd1d363b88
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import com.google.cloud.dataflow.sdk.coders.InstantCoder;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+
+import org.joda.time.Instant;
+
+/**
+ * {@code AfterWatermarkTime} triggers fire based on the system watermark. They operate in event
+ * time.
+ *
+ * @param <W> {@link BoundedWindow} subclass used to represent the windows used.
+ */
+public abstract class AfterWatermark<W extends BoundedWindow>
+    extends TimeTrigger<W, AfterWatermark<W>> implements AtMostOnceTrigger<W>{
+
+  private static final long serialVersionUID = 0L;
+
+  protected AfterWatermark(SerializableFunction<Instant, Instant> composed) {
+    super(composed);
+  }
+
+  /**
+   * Creates a trigger that fires when the watermark passes timestamp of the first element in the
+   * pane.
+   */
+  static <W extends BoundedWindow> AfterWatermark<W> pastFirstElementInPane() {
+    return new FromFirstElementInPane<W>(IDENTITY);
+  }
+
+  /**
+   * Creates a trigger that fires when the watermark passes the end of the window.
+   */
+  public static <W extends BoundedWindow> AfterWatermark<W> pastEndOfWindow() {
+    return new FromEndOfWindow<W>(IDENTITY);
+  }
+
+  private static class FromFirstElementInPane<W extends BoundedWindow> extends AfterWatermark<W> {
+
+    private static final long serialVersionUID = 0L;
+
+    private static final CodedTupleTag<Instant> DELAYED_UNTIL_TAG =
+        CodedTupleTag.of("delayed-until", InstantCoder.of());
+
+    private FromFirstElementInPane(SerializableFunction<Instant, Instant> delayFunction) {
+      super(delayFunction);
+    }
+
+    @Override
+    public TriggerResult onElement(
+        TriggerContext<W> c, Object value, Instant timestamp, W window, WindowStatus status)
+        throws Exception {
+      Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, window);
+      if (delayUntil == null) {
+        delayUntil = computeTargetTimestamp(timestamp);
+        c.setTimer(window, delayUntil, TimeDomain.EVENT_TIME);
+        c.store(DELAYED_UNTIL_TAG, window, delayUntil);
+      }
+
+      return TriggerResult.CONTINUE;
+    }
+
+    @Override
+    public TriggerResult onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow)
+        throws Exception {
+      // To have gotten here, we must not have fired in any of the oldWindows.
+      Instant earliestTimer = BoundedWindow.TIMESTAMP_MAX_VALUE;
+      for (Instant delayedUntil : c.lookup(DELAYED_UNTIL_TAG, oldWindows).values()) {
+        if (delayedUntil != null && delayedUntil.isBefore(earliestTimer)) {
+          earliestTimer = delayedUntil;
+        }
+      }
+
+      if (earliestTimer != null) {
+        c.store(DELAYED_UNTIL_TAG, newWindow, earliestTimer);
+        c.setTimer(newWindow, earliestTimer, TimeDomain.EVENT_TIME);
+      }
+
+      return TriggerResult.CONTINUE;
+    }
+
+    @Override
+    public TriggerResult onTimer(TriggerContext<W> c, TriggerId<W> triggerId) throws Exception {
+      return TriggerResult.FIRE_AND_FINISH;
+    }
+
+    @Override
+    public void clear(TriggerContext<W> c, W window) throws Exception {
+      c.remove(DELAYED_UNTIL_TAG, window);
+      c.deleteTimer(window, TimeDomain.EVENT_TIME);
+    }
+
+    @Override
+    public boolean willNeverFinish() {
+      return false;
+    }
+
+    @Override
+    protected AfterWatermark<W> newWith(SerializableFunction<Instant, Instant> transform) {
+      return new FromFirstElementInPane<W>(transform);
+    }
+  }
+
+  private static class FromEndOfWindow<W extends BoundedWindow> extends AfterWatermark<W> {
+
+    private static final long serialVersionUID = 0L;
+
+    private FromEndOfWindow(SerializableFunction<Instant, Instant> composed) {
+      super(composed);
+    }
+
+    @Override
+    public TriggerResult onElement(
+        TriggerContext<W> c, Object value, Instant timestamp, W window, WindowStatus status)
+            throws Exception {
+      c.setTimer(window, computeTargetTimestamp(window.maxTimestamp()), TimeDomain.EVENT_TIME);
+      return TriggerResult.CONTINUE;
+    }
+
+    @Override
+    public TriggerResult onMerge(
+        Trigger.TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) throws Exception {
+      for (W oldWindow : oldWindows) {
+        c.deleteTimer(oldWindow, TimeDomain.EVENT_TIME);
+      }
+
+      c.setTimer(newWindow, newWindow.maxTimestamp(), TimeDomain.EVENT_TIME);
+      return TriggerResult.CONTINUE;
+    }
+
+    @Override
+    public TriggerResult onTimer(TriggerContext<W> c, TriggerId<W> triggerId) throws Exception {
+      return TriggerResult.FIRE_AND_FINISH;
+    }
+
+    @Override
+    public void clear(TriggerContext<W> c, W window) throws Exception {
+      c.deleteTimer(window, TimeDomain.EVENT_TIME);
+    }
+
+    @Override
+    public boolean willNeverFinish() {
+      return false;
+    }
+
+    @Override
+    protected AfterWatermark<W> newWith(SerializableFunction<Instant, Instant> transform) {
+      return new FromEndOfWindow<>(transform);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CompositeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CompositeTrigger.java
similarity index 87%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CompositeTrigger.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CompositeTrigger.java
index 603fc105b7f40..1ebc043e51742 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CompositeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CompositeTrigger.java
@@ -14,16 +14,17 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.util;
+package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.common.collect.ImmutableList;
 
+import org.joda.time.Instant;
+
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
@@ -32,26 +33,33 @@
 import java.util.Map;
 
 /**
- * Base class for implementing composite triggers.
+ * {@code CompositeTrigger} performs much of the book-keeping necessary for implementing a trigger
+ * that has multiple sub-triggers. Specifically, it includes support for passing events to the
+ * sub-triggers, and tracking the finished-states of each sub-trigger.
+ *
+ * TODO: Document the methods on this and SubTriggerExecutor to support writing new composite
+ * triggers.
+ *
+ * <p> This functionality is experimental and likely to change.
  *
- * @param <W> The type of windows the trigger operates in.
+ * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
+ *            {@code CompositeTrigger}
  */
-public abstract class CompositeTrigger<W extends BoundedWindow> extends Trigger<W> {
+public abstract class CompositeTrigger<W extends BoundedWindow> implements Trigger<W> {
 
   private static final long serialVersionUID = 0L;
 
   private static final CodedTupleTag<BitSet> SUBTRIGGERS_FINISHED_SET_TAG =
       CodedTupleTag.of("finished", new BitSetCoder());
 
-  private List<Trigger<W>> subTriggers;
+  protected List<Trigger<W>> subTriggers;
 
-  public CompositeTrigger(List<Trigger<W>> subTriggers) {
+  protected CompositeTrigger(List<Trigger<W>> subTriggers) {
     this.subTriggers = subTriggers;
   }
 
   /**
-   * Encapsulates the sub-trigger states that have been looked up for this composite trigger and
-   * allows invoking the various trigger methods on the sub-triggers.
+   * Helper that allows allows a {@code CompositeTrigger} to execute callbacks.
    */
   protected class SubTriggerExecutor {
 
@@ -87,6 +95,10 @@ public int firstUnfinished() {
       return isFinished.nextClearBit(0);
     }
 
+    public BitSet getFinishedSet() {
+      return (BitSet) isFinished.clone();
+    }
+
     private TriggerResult handleChildResult(
         TriggerContext<W> childContext, int index, TriggerResult result) throws Exception {
       if (result.isFinish()) {
@@ -97,7 +109,8 @@ private TriggerResult handleChildResult(
     }
 
     public TriggerResult onElement(
-        TriggerContext<W> compositeContext, int index, Object value, W window, WindowStatus status)
+        TriggerContext<W> compositeContext, int index, Object value,
+        Instant timestamp, W window, WindowStatus status)
         throws Exception {
       if (isFinished.get(index)) {
         return TriggerResult.FINISH;
@@ -106,7 +119,8 @@ public TriggerResult onElement(
       TriggerContext<W> childContext = compositeContext.forChild(index);
       Trigger<W> subTrigger = subTriggers.get(index);
       return handleChildResult(
-          childContext, index, subTrigger.onElement(childContext, value, window, status));
+          childContext, index,
+          subTrigger.onElement(childContext, value, timestamp, window, status));
     }
 
     public TriggerResult onTimer(
@@ -157,8 +171,8 @@ private void markFinishedInChild(TriggerContext<W> childContext, int index) thro
       subTriggers.get(index).clear(childContext, window);
     }
 
-    public void markFinished(TriggerContext<W> childContext, int index) throws Exception {
-      markFinishedInChild(childContext.forChild(index), index);
+    public void markFinished(TriggerContext<W> compositeContext, int index) throws Exception {
+      markFinishedInChild(compositeContext.forChild(index), index);
     }
   }
 
@@ -250,7 +264,7 @@ public abstract TriggerResult afterChildTimer(
   /**
    * Coder for the BitSet used to track child-trigger finished states.
    */
-  private static class BitSetCoder extends AtomicCoder<BitSet> {
+  protected static class BitSetCoder extends AtomicCoder<BitSet> {
 
     private static final long serialVersionUID = 1L;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
similarity index 79%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
index e48a5ac636001..a0c26160ec2b9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
@@ -14,22 +14,32 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.util;
+package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import org.joda.time.Instant;
 
 /**
  * A trigger that fires repeatedly when the watermark passes the end of the window.
  *
  * @param <W> The type of windows being triggered/encoded.
  */
-public class DefaultTrigger<W extends BoundedWindow> extends Trigger<W>{
+public class DefaultTrigger<W extends BoundedWindow> implements Trigger<W>{
 
   private static final long serialVersionUID = 0L;
 
+  private DefaultTrigger() {}
+
+  /**
+   * Returns the default trigger.
+   */
+  public static <W extends BoundedWindow> DefaultTrigger<W> of() {
+    return new DefaultTrigger<W>();
+  }
+
   @Override
   public TriggerResult onElement(
-      TriggerContext<W> c, Object value, W window, WindowStatus status) throws Exception {
+      TriggerContext<W> c, Object value, Instant timestamp, W window,
+      WindowStatus status) throws Exception {
     c.setTimer(window, window.maxTimestamp(), TimeDomain.EVENT_TIME);
     return TriggerResult.CONTINUE;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Repeatedly.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
similarity index 85%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Repeatedly.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
index 65ab5b859913c..c541b7e152c3f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Repeatedly.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
@@ -14,9 +14,9 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.util;
+package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import org.joda.time.Instant;
 
 import java.util.Arrays;
 
@@ -34,7 +34,7 @@
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
  * {@code Trigger}
  */
-public class Repeatedly<W extends BoundedWindow> extends Trigger<W> {
+public class Repeatedly<W extends BoundedWindow> implements Trigger<W> {
 
   private static final long serialVersionUID = 0L;
 
@@ -65,7 +65,7 @@ private Repeatedly(Trigger<W> repeated) {
    *
    * @param until the trigger that will fire when we should stop repeating.
    */
-  public RepeatedlyUntil<W> until(Trigger<W> until) {
+  public RepeatedlyUntil<W> until(AtMostOnceTrigger<W> until) {
     return new RepeatedlyUntil<W>(repeated, until);
   }
 
@@ -75,8 +75,9 @@ private TriggerResult wrap(TriggerResult result) {
 
   @Override
   public TriggerResult onElement(
-      TriggerContext<W> c, Object value, W window, Trigger.WindowStatus status) throws Exception {
-    return wrap(repeated.onElement(c, value, window, status));
+      TriggerContext<W> c, Object value, Instant timestamp, W window, WindowStatus status)
+      throws Exception {
+    return wrap(repeated.onElement(c, value, timestamp, window, status));
   }
 
   @Override
@@ -107,7 +108,7 @@ public static class RepeatedlyUntil<W extends BoundedWindow> extends CompositeTr
 
     private static final long serialVersionUID = 0L;
 
-    private RepeatedlyUntil(Trigger<W> repeat, Trigger<W> until) {
+    private RepeatedlyUntil(Trigger<W> repeat, AtMostOnceTrigger<W> until) {
       super(Arrays.asList(repeat, until));
     }
 
@@ -122,15 +123,16 @@ private TriggerResult handleResult(
     }
 
     @Override
-    public TriggerResult onElement(TriggerContext<W> c, Object value, W window, WindowStatus status)
+    public TriggerResult onElement(
+        TriggerContext<W> c, Object value, Instant timestamp, W window, WindowStatus status)
         throws Exception {
       SubTriggerExecutor subExecutor = subExecutor(c, window);
 
       TriggerResult until = subExecutor.isFinished(1)
           ? TriggerResult.CONTINUE // if we already finished the until, treat it like Never Stop
-          : subExecutor.onElement(c, 1, value, window, status);
+          : subExecutor.onElement(c, 1, value, timestamp, window, status);
       return handleResult(c, subExecutor, window,
-          subExecutor.onElement(c, 0, value, window, status), until);
+          subExecutor.onElement(c, 0, value, timestamp, window, status), until);
     }
 
     @Override
@@ -161,5 +163,10 @@ public TriggerResult afterChildTimer(
         return result.isFire() ? TriggerResult.FINISH : TriggerResult.CONTINUE;
       }
     }
+
+    @Override
+    public boolean willNeverFinish() {
+      return false;
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
new file mode 100644
index 0000000000000..2768759d52636
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+/**
+ * Support for manipulating the time at which time-based triggers fire.
+ *
+ * @param <W> {@link BoundedWindow} subclass used to represent the windows used.
+ * @param <T> {@code TimeTrigger} subclass produced by modifying the current {@code TimeTrigger}.
+ */
+public abstract class TimeTrigger<W extends BoundedWindow, T extends TimeTrigger<W, T>>
+    implements Trigger<W> {
+
+  protected static final SerializableFunction<Instant, Instant> IDENTITY =
+      new SerializableFunction<Instant, Instant>() {
+
+    private static final long serialVersionUID = 0L;
+
+    @Override
+    public Instant apply(Instant input) {
+      return input;
+    }
+  };
+
+  private SerializableFunction<Instant, Instant> composedTimestampMapper;
+
+  protected TimeTrigger(SerializableFunction<Instant, Instant> composedTimestampMapper) {
+    this.composedTimestampMapper = composedTimestampMapper;
+  }
+
+  protected Instant computeTargetTimestamp(Instant time) {
+    return composedTimestampMapper.apply(time);
+  }
+
+  /**
+   * Adds some delay to the original target time.
+   *
+   * @param delay the delay to add
+   * @return An updated time trigger which will wait the additional time before firing.
+   */
+  public T plusDelay(final Duration delay) {
+    return mappedTo(new SerializableFunction<Instant, Instant>() {
+      private static final long serialVersionUID = 0L;
+
+      @Override
+      public Instant apply(Instant input) {
+        return input.plus(delay);
+      }
+    });
+  }
+
+  /**
+   * Adjust the time at which the trigger will fire.
+   *
+   * <p>The {@code timestampMapper} function must have the following properties for all values
+   * {@code a} and {@code b}:
+   *
+   * <ul>
+   *   <li> Deterministic: If {@code a = b} then {@code timestampMapper(a) = timestampMapper(b)}
+   *   <li> Monotonicity: If {@code a < b} then {@code timestampMapper(a) <= timestampMapper(b)}
+   * </ul>
+   *
+   * @param timestampMapper Function that will be invoked on the proposed trigger time to determine
+   *        the time at which the trigger should actually fire.
+   */
+  public T mappedTo(final SerializableFunction<Instant, Instant> timestampMapper) {
+    return newWith(new SerializableFunction<Instant, Instant>() {
+
+      private static final long serialVersionUID = 0L;
+
+      @Override
+      public Instant apply(Instant input) {
+        return timestampMapper.apply(composedTimestampMapper.apply(input));
+      }
+    });
+  }
+
+  /**
+   * Method to create an updated version of this {@code TimeTrigger} modified to use the specified
+   * {@code transform}.
+   *
+   * @param transform The new transform to apply to target times.
+   * @return a new {@code TimeTrigger}.
+   */
+  protected abstract T newWith(SerializableFunction<Instant, Instant> transform);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
new file mode 100644
index 0000000000000..daf944be63af7
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -0,0 +1,334 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * {@code Trigger}s control when the elements for a specific key and window are output. As elements
+ * arrive, they are put into one or more windows by the {@code Window} by the {@link WindowFn}, and
+ * then passed to the associated {@code Trigger} to determine if the {@code Window}s contents should
+ * be output.
+ *
+ * <p>The elements that are assigned to a window since the last time it was fired (or since the
+ * window was created) are placed into a pane. Triggers are evaluated against the elements in the
+ * current pane, and when fired, will output those elements. Depending on the trigger, this will
+ * either finish the trigger (and the window) or start a new pane.
+ *
+ * <p>Several predefined {@code Trigger}s are provided:
+ * <ul>
+ *   <li> {@link AfterWatermark} for firing when the watermark passes a timestamp determined from
+ *   either the end of the window or the arrival of the first element in a pane.
+ *   <li> {@link AfterProcessingTime} for firing after some amount of processing time has elapsed
+ *   (typically since the first element in a pane).
+ *   <li> {@link AfterPane} for firing off a property of the elements in the current pane, such as
+ *   the number of elements that have been assigned to the current pane.
+ * </ul>
+ *
+ * <p>In addition, {@code Trigger}s can be combined in a variety of ways:
+ * <ul>
+ *   <li> {@link Repeatedly#forever} to create a trigger that executes forever. Any time its
+ *   argument finishes it gets reset and starts over. Can be combined with {@link Repeatedly#until}
+ *   to specify a condition which causes the repetition to stop.
+ *   <li> {@link AfterEach#inOrder} to execute each trigger in sequence, firing each (and every)
+ *   time that a trigger fires, and advancing to the next trigger in the sequence when it finishes.
+ *   <li> {@link AfterFirst#of} to create a trigger that fires after at least one of its arguments
+ *   fires. An {@link AfterFirst} trigger finishes after it fires once, or when all of its arguments
+ *   have finished without firing.
+ *   <li> {@link AfterAll#of} to create a trigger that fires after all least one of its arguments
+ *   have fired at least once. An {@link AfterFirst} trigger finishes after it fires once, or when
+ *   any of its arguments have finished without firing.
+ * </ul>
+ *
+ * <p>Each trigger tree is instantiated per-key and per-window. Every trigger in the tree is in one
+ * of the following states:
+ * <ul>
+ *   <li> Never Existed - before the trigger has started executing, there is no state associated
+ *   with it anywhere in the system. A trigger moves to the executing state as soon as it
+ *   processes in the current pane.
+ *   <li> Executing - while the trigger is receiving items and may fire. While it is in this state,
+ *   it may persist book-keeping information to {@link KeyedState}, set timers, etc.
+ *   <li> Finished - after a trigger finishes, all of its book-keeping data is cleaned up, and the
+ *   system remembers only that it is finished. Entering this state causes us to discard any
+ *   elements in the buffer for that window, as well.
+ * </ul>
+ *
+ * <p>Once finished, a trigger cannot return itself back to an earlier state, however a
+ * {@link CompositeTrigger} can reset its sub-triggers.
+ *
+ * <p> This functionality is experimental and likely to change.
+ *
+ * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
+ *            {@code Trigger}
+ */
+public interface Trigger<W extends BoundedWindow> extends Serializable {
+
+  /**
+   * Triggers operate on both timestamps of elements that are being processed and the current
+   * (real-world) time as reported while processing. {@code TimeDomain} specifies which of these
+   * domains are applicable to a given operation.
+   */
+  public enum TimeDomain {
+    /**
+     * The {@code EVENT_TIME} domain corresponds to the timestamps on the elemnts. Time advances
+     * on the system watermark advances.
+     */
+    EVENT_TIME,
+
+    /**
+     * The {@code PROCESSING_TIME} domain corresponds to the current to the current (system) time.
+     * This is advanced during exeuction of the Dataflow pipeline.
+     */
+    PROCESSING_TIME;
+  }
+
+  /**
+   * {@code WindowStatus} indicates the status of the window that an element is being processed in.
+   */
+  public enum WindowStatus {
+    /**
+     * The arrival of this element started a new pane. Either the window is entirely new, or we had
+     * previously fired a trigger that caused us to output the earlier elements.
+     */
+    NEW,
+
+    /** This element was added to a pane that was already being managed. */
+    EXISTING,
+
+    /**
+     * The window set doesn’t track the windows being managed, so it is not known whether the pane
+     * is new. The trigger can track windows on its own if necessary.
+     */
+    UNKNOWN;
+  }
+
+  /**
+   * {@code TriggerResult} enumerates the possible result a trigger can have when it is executed.
+   */
+  public enum TriggerResult {
+    FIRE(true, false),
+    CONTINUE(false, false),
+    FIRE_AND_FINISH(true, true),
+    FINISH(false, true);
+
+    private boolean finish;
+    private boolean fire;
+
+    private TriggerResult(boolean fire, boolean finish) {
+      this.fire = fire;
+      this.finish = finish;
+    }
+
+    public boolean isFire() {
+      return fire;
+    }
+
+    public boolean isFinish() {
+      return finish;
+    }
+
+    public static TriggerResult valueOf(boolean fire, boolean finish) {
+      if (fire && finish) {
+        return FIRE_AND_FINISH;
+      } else if (fire) {
+        return FIRE;
+      } else if (finish) {
+        return FINISH;
+      } else {
+        return CONTINUE;
+      }
+    }
+  }
+
+  /**
+   * Information accessible to all of the callbacks that are executed on a trigger.
+   *
+   * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
+   *            {@code TriggerContext}
+   */
+  public interface TriggerContext<W extends BoundedWindow>  {
+
+    /**
+     * Sets a timer to fire when the watermark or processing time is beyond the given timestamp.
+     * Timers are not gauranteed to fire immediately, but will be delivered at some time afterwards.
+     *
+     * <p>Each trigger can have a single timer in per {@code timeDomain} and {@code window}. If the
+     * trigger has already set a timer for a given domain and window, then setting overwrites it.
+     *
+     * @param window the window the timer is being set for.
+     * @param timestamp the time at which the trigger’s {@link Trigger#onTimer} callback should
+     *        execute
+     * @param timeDomain the domain which the {@code timestamp} applies to
+     */
+    void setTimer(W window, Instant timestamp, TimeDomain timeDomain) throws IOException;
+
+    /**
+     * Removes the timer set in this trigger context for the given {@code window} and
+     * {@code timeDomain}.
+     */
+    void deleteTimer(W window, TimeDomain timeDomain) throws IOException;
+
+    /**
+     * Returns the current processing time.
+     */
+    Instant currentProcessingTime();
+
+    /**
+     * Updates the value stored in keyed state for the given {@code tag} and {@code window}.
+     */
+    <T> void store(CodedTupleTag<T> tag, W window, T value) throws IOException;
+
+    /**
+     * Removes the keyed state associated with the given {@code tag} and {@code window}.
+     */
+    <T> void remove(CodedTupleTag<T> tag, W window) throws IOException;
+
+    /**
+     * Lookup the value stored for the given {@code tag} and {@code window}.
+     */
+    <T> T lookup(CodedTupleTag<T> tag, W window) throws IOException;
+
+    /**
+     * Lookup the value stored for a given {@code tag} in a bunch of {@code window}s.
+     */
+    <T> Map<W, T> lookup(CodedTupleTag<T> tag, Iterable<W> windows) throws IOException;
+
+    /**
+     * Create a {@code TriggerContext} for executing a given sub-trigger.
+     */
+    TriggerContext<W> forChild(int childIndex);
+  }
+
+  /**
+   * Called immediately after an element is first incorporated into a window.
+   *
+   * @param c the context to interact with
+   * @param value the element that was incorporated
+   * @param timestamp the event time that the element arrived at
+   * @param window the window the element was assigned to
+   */
+  TriggerResult onElement(
+      TriggerContext<W> c, Object value, Instant timestamp, W
+      window, WindowStatus status) throws Exception;
+
+  /**
+   * Called immediately after windows have been merged.
+   *
+   * <p>This will only be called if the trigger hasn't finished in any of the {@code oldWindows}.
+   * If it had finished, we assume that it is also finished in the resulting window.
+   *
+   * <p>The implementation does not need to clear out any state associated with the old windows.
+   * That will automatically be done by the trigger execution layer.
+   *
+   * @param c the context to interact with
+   * @param oldWindows the windows that were merged
+   * @param newWindow the window that resulted from merging
+   */
+  TriggerResult onMerge(
+      TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) throws Exception;
+
+  /**
+   * Called when a timer has fired for the trigger or one of it’s sub-triggers.
+   *
+   * @param c the context to interact with
+   * @param triggerId identifier for the trigger that the timer is for.
+   */
+  TriggerResult onTimer(
+      TriggerContext<W> c, TriggerId<W> triggerId) throws Exception;
+
+  /**
+   * Clear any state associated with this trigger in the given window.
+   *
+   * <p>This is called after a trigger has indicated it will never fire again. The trigger system
+   * keeps enough information to know that the trigger is finished, so this trigger should clear all
+   * of its state.
+   *
+   * @param c the context to interact with
+   * @param window the window that is being cleared
+   */
+  void clear(TriggerContext<W> c, W window) throws Exception;
+
+  /**
+   * Return true if the trigger is guaranteed to never finish.
+   */
+  boolean willNeverFinish();
+
+  /**
+   * Identifies a unique trigger instance, by the window it is in and the path through the trigger
+   * tree.
+   *
+   * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
+   *            {@code TriggerContext}
+   */
+  public static class TriggerId<W extends BoundedWindow> {
+    private final W window;
+    private final List<Integer> subTriggers;
+
+    public TriggerId(W window, List<Integer> subTriggers) {
+      this.window = window;
+      this.subTriggers = subTriggers;
+    }
+
+    /**
+     * Return a trigger ID that is applicable for the sub-trigger.
+     */
+    public TriggerId<W> forChildTrigger() {
+      return new TriggerId<>(window, subTriggers.subList(1, subTriggers.size()));
+    }
+
+    public W getWindow() {
+      return window;
+    }
+
+    /**
+     * Return true if this trigger ID corresponds to a child of the current trigger.
+     */
+    public boolean isForChild() {
+      return subTriggers.size() > 0;
+    }
+
+    /**
+     * Return the index of the child this trigger ID is for.
+     */
+    public int getChildIndex() {
+      return subTriggers.get(0);
+    }
+
+    public Iterable<Integer> getPath() {
+      return subTriggers;
+    }
+  }
+
+  /**
+   * Triggers that are guaranteed to fire at most once should extend from this, rather than the
+   * general {@link Trigger} class to indicate that behavior.
+   *
+   * TODO: Add checks that an AtMostOnceTrigger never returns TriggerResult.FIRE.
+   *
+   * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
+   *            {@code AtMostOnceTrigger}
+   */
+  public interface AtMostOnceTrigger<W extends BoundedWindow> extends Trigger<W> {}
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 09844280abdd1..59117c6af366b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -22,12 +22,10 @@
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
-import com.google.cloud.dataflow.sdk.util.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
-import com.google.cloud.dataflow.sdk.util.Trigger;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -153,8 +151,7 @@ public Unbound named(String name) {
      * but more properties can still be specified.
      */
     public <T> Bound<T> into(WindowFn<? super T, ?> fn) {
-      return new Bound<>(name,
-          (WindowingStrategy<? super T, ?>) createWindowingStrategy(fn, new DefaultTrigger<>()));
+      return new Bound<>(name, createWindowingStrategy(fn, DefaultTrigger.of()));
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
index c4294b7a24955..145ae9a576e93 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
@@ -19,7 +19,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
 import org.joda.time.Instant;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
index 3a055dd4dc2e8..4370912e2831a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
@@ -16,7 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import com.google.cloud.dataflow.sdk.util.Trigger.TimeDomain;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerExecutor.TimerManager;
 
 import org.joda.time.Instant;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
index a79cac703b1bd..d1aa4b73b837d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
@@ -23,7 +23,7 @@
 import com.google.cloud.dataflow.sdk.coders.SetCoder;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
index 18cf6e383da32..1da74b3559255 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
@@ -25,7 +25,7 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.common.base.Preconditions;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 5c59f30483c54..aa9b3dc043e1d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -21,7 +21,9 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresKeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
index 30cc55cb7db39..b2d65156bebdd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
@@ -21,7 +21,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.common.base.Function;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 89139cd0026b6..08aba84c56f7e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -20,7 +20,9 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PartitioningWindowFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.AbstractWindowSet.Factory;
 import com.google.cloud.dataflow.sdk.util.TriggerExecutor.TimerManager;
@@ -111,7 +113,7 @@ public void processElement(ProcessContext context) throws Exception {
         TriggerExecutor<K, VI, VO, W> executor = new TriggerExecutor<>(
             windowFn,
             new StreamingTimerManager(context),
-            new DefaultTrigger<W>(),
+            trigger,
             context.keyedState(),
             context.windowingInternals(),
             windowSet);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
deleted file mode 100644
index 36dd5ce543076..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Trigger.java
+++ /dev/null
@@ -1,255 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Interface to use for controlling when output for a specific key and window is triggered.
- *
- * <p> This functionality is experimental and likely to change.
- *
- * @param <W> the window that this trigger applies to
- */
-public abstract class Trigger<W extends BoundedWindow> implements Serializable {
-
-  private static final long serialVersionUID = 0L;
-
-  /**
-   * Types of timers that are supported.
-   */
-  public enum TimeDomain {
-    /**
-     * Timers that fire based on the timestamp of events. Once set, the timer will fire when the
-     * system watermark passes the specified time.
-     */
-    EVENT_TIME,
-
-    /**
-     * Timers that fire based on the current processing time. Once set, the timer will fire at some
-     * point when the system time is after the specified time.
-     */
-    PROCESSING_TIME;
-  }
-
-  /**
-   * Status of the element in the window.
-   */
-  public enum WindowStatus {
-    /** This element caused us to start actively managing the given window. */
-    NEW,
-    /** This window was already under active management before the arrival of this element. */
-    EXISTING,
-    /** The WindowSet doesn't track the windows actively being managed. */
-    UNKNOWN;
-  }
-
-  /**
-   * Enumeration of the possible results for a trigger.
-   */
-  public enum TriggerResult {
-    FIRE(true, false),
-    CONTINUE(false, false),
-    FIRE_AND_FINISH(true, true),
-    FINISH(false, true);
-
-    private boolean finish;
-    private boolean fire;
-
-    private TriggerResult(boolean fire, boolean finish) {
-      this.fire = fire;
-      this.finish = finish;
-    }
-
-    public boolean isFire() {
-      return fire;
-    }
-
-    public boolean isFinish() {
-      return finish;
-    }
-
-    public static TriggerResult valueOf(boolean fire, boolean finish) {
-      if (fire && finish) {
-        return FIRE_AND_FINISH;
-      } else if (fire) {
-        return FIRE;
-      } else if (finish) {
-        return FINISH;
-      } else {
-        return CONTINUE;
-      }
-    }
-  }
-
-  /**
-   * Information is that is made available to triggers, eg., setting timers.
-   */
-  public interface TriggerContext<W extends BoundedWindow>  {
-
-    /**
-     * Set a timer to fire for the given window at the specified time.
-     *
-     * TODO: Support processing time
-     * TODO: Support per-trigger timers.
-     */
-    void setTimer(W window, Instant timestamp, TimeDomain timeDomain) throws IOException;
-
-    /**
-     * Delete a timer that has been set for the specified window.
-     */
-    void deleteTimer(W window, TimeDomain timeDomain) throws IOException;
-
-    /**
-     * The current processing time.
-     */
-    Instant currentProcessingTime();
-
-    /**
-     * Updates the value stored in keyed state for the given window.
-     */
-    <T> void store(CodedTupleTag<T> tag, W window, T value) throws IOException;
-
-    /**
-     * Removes the data associated with the given tag from {@code KeyedState}.
-     * @throws IOException
-     */
-    <T> void remove(CodedTupleTag<T> tag, W window) throws IOException;
-
-    /**
-     * Lookup the value stored in keyed state.
-     */
-    <T> T lookup(CodedTupleTag<T> tag, W window) throws IOException;
-
-    /**
-     * Lookup the value stored in a bunch of windows.
-     */
-    <T> Map<W, T> lookup(CodedTupleTag<T> tag, Iterable<W> windows) throws IOException;
-
-    /**
-     * Create a {@code TriggerContext} for executing in the given child.
-     */
-    TriggerContext<W> forChild(int childIndex);
-  }
-
-  /**
-   * Called immediately after an element is first incorporated into a window.
-   *
-   * @param c the context to interact with
-   * @param value the element that was incorporated
-   * @param window the window the element was assigned to
-   */
-  public abstract TriggerResult onElement(
-      TriggerContext<W> c, Object value, W window, WindowStatus status) throws Exception;
-
-  /**
-   * Called immediately after windows have been merged.
-   *
-   * <p>This will only be called if the trigger hasn't finished in any of the {@code oldWindows}.
-   * If it had finished, we assume that it is also finished in the resulting window.
-   *
-   * <p>The implementation does not need to clear out any state associated with the old windows.
-   * That will automatically be done by the trigger execution layer.
-   *
-   * @param c the context to interact with
-   * @param oldWindows the windows that were merged
-   * @param newWindow the window that resulted from merging
-   */
-  public abstract TriggerResult onMerge(
-      TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) throws Exception;
-
-  /**
-   * Called after a timer fires.
-   *
-   * @param c the context to interact with
-   * @param triggerId identifier for the trigger that the timer is for.
-   */
-  public abstract TriggerResult onTimer(
-      TriggerContext<W> c, TriggerId<W> triggerId) throws Exception;
-
-  /**
-   * Clear any state associated with this trigger in the given window.
-   *
-   * <p>This is called after a trigger has indicated it will never fire again. The trigger system
-   * keeps enough information to know that the trigger is finished, so this trigger should clear all
-   * of its state.
-   *
-   * @param c the context to interact with
-   * @param window the window that is being cleared
-   */
-  public abstract void clear(TriggerContext<W> c, W window) throws Exception;
-
-  /**
-   * Return true if the trigger is guaranteed to never finish.
-   */
-  public boolean willNeverFinish() {
-    return false;
-  }
-
-  /**
-   * Identifies a unique trigger instance, by the window it is in and the path through the trigger
-   * tree.
-   *
-   * @param <W> The type of windows the trigger operates in.
-   */
-  public static class TriggerId<W extends BoundedWindow> {
-    private final W window;
-    private final List<Integer> subTriggers;
-
-    TriggerId(W window, List<Integer> subTriggers) {
-      this.window = window;
-      this.subTriggers = subTriggers;
-    }
-
-    /**
-     * Return a trigger ID that is applicable for the specific child.
-     */
-    public TriggerId<W> forChildTrigger() {
-      return new TriggerId<>(window, subTriggers.subList(1, subTriggers.size()));
-    }
-
-    public W getWindow() {
-      return window;
-    }
-
-    /**
-     * Return true if this trigger ID corresponds to a child of the current trigger.
-     */
-    public boolean isForChild() {
-      return subTriggers.size() > 0;
-    }
-
-    /**
-     * Return the index of the child this trigger ID is for.
-     */
-    public int getChildIndex() {
-      return subTriggers.get(0);
-    }
-
-    public Iterable<Integer> getPath() {
-      return subTriggers;
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index c104d14715dbb..3aa45f1f94927 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -24,12 +24,13 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PartitioningWindowFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.Trigger.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.Trigger.TriggerContext;
-import com.google.cloud.dataflow.sdk.util.Trigger.TriggerId;
-import com.google.cloud.dataflow.sdk.util.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -173,7 +174,8 @@ public void onElement(WindowedValue<VI> value) throws Exception {
       WindowStatus status = windowSet.put(window, value.getValue(), value.getTimestamp());
 
       handleResult(trigger, window,
-          trigger.onElement(triggerContext, value.getValue(), window, status));
+          trigger.onElement(triggerContext,
+              value.getValue(), value.getTimestamp(), window, status));
 
       if (WindowStatus.NEW.equals(status)) {
         // Attempt to merge windows before continuing
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 21b98f78df05f..d701a6e34640c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -18,13 +18,15 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.Trigger.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.util.TriggerExecutor.TriggerIdCoder;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
@@ -88,14 +90,16 @@ private void logInteraction(String fmt, Object... args) {
     }
   }
 
-  public static <VI, VO, W extends BoundedWindow> TriggerTester<VI, VO, W> of(
-      WindowFn<?, W> windowFn,
-      Trigger<W> trigger,
-      AbstractWindowSet.Factory<String, VI, VO, W> windowSetFactory) throws Exception {
+  public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>, W> buffering(
+      WindowFn<?, W> windowFn, Trigger<W> trigger) throws Exception {
     @SuppressWarnings("unchecked")
     WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
 
-    return new TriggerTester<VI, VO, W>(objectWindowFn, trigger, windowSetFactory);
+    AbstractWindowSet.Factory<String, Integer, Iterable<Integer>, W> windowSetFactory =
+        BufferingWindowSet.<String, Integer, W>factory(VarIntCoder.of());
+
+    return new TriggerTester<Integer, Iterable<Integer>, W>(
+        objectWindowFn, trigger, windowSetFactory);
   }
 
   private TriggerTester(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
index e5418be6eba33..7a4682ab4824d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
@@ -17,8 +17,10 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 
 import java.io.Serializable;
@@ -55,7 +57,7 @@ public static WindowingStrategy<Object, GlobalWindow> globalDefault() {
    * {@link DefaultTrigger}.
    */
   public static <T, W extends BoundedWindow> WindowingStrategy<T, W> of(WindowFn<T, W> windowFn) {
-    return of(windowFn, new DefaultTrigger<W>());
+    return of(windowFn, DefaultTrigger.<W>of());
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
index 4540d87d98fd6..d78a374c45df1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
@@ -21,10 +21,10 @@
 import com.google.api.services.dataflow.model.MultiOutputInfo;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
new file mode 100644
index 0000000000000..63c64486ef5f6
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -0,0 +1,211 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.WindowMatchers;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
+import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.common.collect.ImmutableList;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.MockitoAnnotations;
+
+/**
+ * Tests for {@link AfterAll}.
+ */
+@RunWith(JUnit4.class)
+public class AfterAllTest {
+  @Mock private AtMostOnceTrigger<IntervalWindow> mockTrigger1;
+  @Mock private AtMostOnceTrigger<IntervalWindow> mockTrigger2;
+  private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
+  private IntervalWindow firstWindow;
+
+  public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
+    MockitoAnnotations.initMocks(this);
+    tester = TriggerTester.buffering(windowFn, AfterAll.of(mockTrigger1, mockTrigger2));
+    firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
+  }
+
+  @SuppressWarnings("unchecked")
+  private TriggerContext<IntervalWindow> isTriggerContext() {
+    return Mockito.isA(TriggerContext.class);
+  }
+
+  private void injectElement(int element, TriggerResult result1, TriggerResult result2)
+      throws Exception {
+    if (result1 != null) {
+      when(mockTrigger1.onElement(
+          isTriggerContext(), Mockito.eq(element),
+          Mockito.any(Instant.class), Mockito.any(IntervalWindow.class),
+          Mockito.any(WindowStatus.class)))
+          .thenReturn(result1);
+    }
+    if (result2 != null) {
+      when(mockTrigger2.onElement(
+          isTriggerContext(), Mockito.eq(element),
+          Mockito.any(Instant.class), Mockito.any(IntervalWindow.class),
+          Mockito.any(WindowStatus.class)))
+          .thenReturn(result2);
+    }
+    tester.injectElement(element, new Instant(element));
+  }
+
+  @Test
+  public void testOnElementT1FiresFirst() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    injectElement(2, TriggerResult.FIRE_AND_FINISH, TriggerResult.CONTINUE);
+    injectElement(3, null, TriggerResult.FIRE_AND_FINISH);
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10)));
+    assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
+  }
+
+  @Test
+  public void testOnElementT2FiresFirst() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    injectElement(2, TriggerResult.FIRE_AND_FINISH, null);
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
+  }
+
+  @Test
+  public void testOnElementT1Finishes() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.FINISH, TriggerResult.CONTINUE);
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
+  }
+
+  @Test
+  public void testOnElementT2Finishes() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.FINISH);
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    injectElement(2, null, null);
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
+  }
+
+  @Test
+  public void testOnElementBothFinish() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.FINISH);
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    injectElement(2, TriggerResult.FINISH, null);
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testOnTimerFire() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
+
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(0));
+    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.isA(TriggerId.class)))
+        .thenReturn(TriggerResult.FIRE_AND_FINISH);
+    tester.advanceWatermark(new Instant(12));
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
+    assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testOnTimerFinish() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(1));
+    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.isA(TriggerId.class)))
+        .thenReturn(TriggerResult.FINISH);
+
+    tester.advanceWatermark(new Instant(12));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
+  }
+
+  @Test
+  public void testOnMergeFires() throws Exception {
+    setUp(Sessions.withGapDuration(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(12, TriggerResult.FIRE_AND_FINISH, TriggerResult.CONTINUE);
+
+    when(mockTrigger2.onMerge(
+        isTriggerContext(),
+        Mockito.argThat(WindowMatchers.ofWindows(
+            WindowMatchers.intervalWindow(1, 11),
+            WindowMatchers.intervalWindow(12, 22),
+            WindowMatchers.intervalWindow(5, 15))),
+        Mockito.isA(IntervalWindow.class))).thenReturn(TriggerResult.FIRE_AND_FINISH);
+
+    // The arrival of this element should trigger merging.
+    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
+    assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+        tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(22)))));
+
+    verify(mockTrigger1, Mockito.never())
+        .onMerge(
+            Mockito.<TriggerContext<IntervalWindow>>any(),
+            Mockito.<Iterable<IntervalWindow>>any(),
+            Mockito.<IntervalWindow>any());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SequenceOfTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
similarity index 87%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SequenceOfTriggerTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index 7624f3f32b9eb..80e4303d0fc4a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SequenceOfTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.util;
+package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
 import static org.junit.Assert.assertFalse;
@@ -24,16 +24,12 @@
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.WindowMatchers;
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.Trigger.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.Trigger.TriggerContext;
-import com.google.cloud.dataflow.sdk.util.Trigger.TriggerId;
-import com.google.cloud.dataflow.sdk.util.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
+import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.common.collect.ImmutableList;
 
 import org.hamcrest.Matchers;
@@ -46,13 +42,11 @@
 import org.mockito.Mockito;
 import org.mockito.MockitoAnnotations;
 
-import java.util.Arrays;
-
 /**
- * Tests for {@link SequenceOfTrigger}.
+ * Tests for {@link AfterEach}.
  */
 @RunWith(JUnit4.class)
-public class SequenceOfTriggerTest {
+public class AfterEachTest {
 
   @Mock private Trigger<IntervalWindow> mockTrigger1;
   @Mock private Trigger<IntervalWindow> mockTrigger2;
@@ -61,10 +55,7 @@ public class SequenceOfTriggerTest {
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
-    tester = TriggerTester.of(
-        windowFn,
-        new SequenceOfTrigger<>(Arrays.asList(mockTrigger1, mockTrigger2)),
-        BufferingWindowSet.<String, Integer, IntervalWindow>factory(VarIntCoder.of()));
+    tester = TriggerTester.buffering(windowFn, AfterEach.inOrder(mockTrigger1, mockTrigger2));
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
@@ -78,13 +69,15 @@ private void injectElement(int element, TriggerResult result1, TriggerResult res
     if (result1 != null) {
       when(mockTrigger1.onElement(
           isTriggerContext(), Mockito.eq(element),
-          Mockito.any(IntervalWindow.class), Mockito.any(WindowStatus.class)))
+          Mockito.any(Instant.class), Mockito.any(IntervalWindow.class),
+          Mockito.any(WindowStatus.class)))
           .thenReturn(result1);
     }
     if (result2 != null) {
       when(mockTrigger2.onElement(
           isTriggerContext(), Mockito.eq(element),
-          Mockito.any(IntervalWindow.class), Mockito.any(WindowStatus.class)))
+          Mockito.any(Instant.class), Mockito.any(IntervalWindow.class),
+          Mockito.any(WindowStatus.class)))
           .thenReturn(result2);
     }
     tester.injectElement(element, new Instant(element));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FirstOfTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
similarity index 85%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FirstOfTriggerTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index d14317e08b9ac..bf6dfef96f49e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FirstOfTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.util;
+package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
 import static org.junit.Assert.assertThat;
@@ -23,16 +23,13 @@
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.WindowMatchers;
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.Trigger.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.Trigger.TriggerContext;
-import com.google.cloud.dataflow.sdk.util.Trigger.TriggerId;
-import com.google.cloud.dataflow.sdk.util.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
+import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.common.collect.ImmutableList;
 
 import org.hamcrest.Matchers;
@@ -45,25 +42,20 @@
 import org.mockito.Mockito;
 import org.mockito.MockitoAnnotations;
 
-import java.util.Arrays;
-
 /**
- * Tests for {@link FirstOfTrigger}.
+ * Tests for {@link AfterFirst}.
  */
 @RunWith(JUnit4.class)
-public class FirstOfTriggerTest {
+public class AfterFirstTest {
 
-  @Mock private Trigger<IntervalWindow> mockTrigger1;
-  @Mock private Trigger<IntervalWindow> mockTrigger2;
+  @Mock private AtMostOnceTrigger<IntervalWindow> mockTrigger1;
+  @Mock private AtMostOnceTrigger<IntervalWindow> mockTrigger2;
   private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
-    tester = TriggerTester.of(
-        windowFn,
-        new FirstOfTrigger<>(Arrays.asList(mockTrigger1, mockTrigger2)),
-        BufferingWindowSet.<String, Integer, IntervalWindow>factory(VarIntCoder.of()));
+    tester = TriggerTester.buffering(windowFn, AfterFirst.of(mockTrigger1, mockTrigger2));
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
@@ -77,13 +69,15 @@ private void injectElement(int element, TriggerResult result1, TriggerResult res
     if (result1 != null) {
       when(mockTrigger1.onElement(
           isTriggerContext(), Mockito.eq(element),
-          Mockito.any(IntervalWindow.class), Mockito.any(WindowStatus.class)))
+          Mockito.any(Instant.class), Mockito.any(IntervalWindow.class),
+          Mockito.any(WindowStatus.class)))
           .thenReturn(result1);
     }
     if (result2 != null) {
       when(mockTrigger2.onElement(
           isTriggerContext(), Mockito.eq(element),
-          Mockito.any(IntervalWindow.class), Mockito.any(WindowStatus.class)))
+          Mockito.any(Instant.class), Mockito.any(IntervalWindow.class),
+          Mockito.any(WindowStatus.class)))
           .thenReturn(result2);
     }
     tester.injectElement(element, new Instant(element));
@@ -195,7 +189,7 @@ public void testOnTimerFinish() throws Exception {
   @Test
   public void testOnMergeFires() throws Exception {
     setUp(Sessions.withGapDuration(Duration.millis(10)));
-    tester.logInteractions(true);
+
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
     injectElement(12, TriggerResult.FINISH, TriggerResult.CONTINUE);
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
new file mode 100644
index 0000000000000..13d823b290e37
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.WindowMatchers;
+import com.google.cloud.dataflow.sdk.util.TriggerTester;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link AfterPane}.
+ */
+@RunWith(JUnit4.class)
+public class AfterPaneTest {
+  @Test
+  public void testAfterPaneWithFixedWindow() throws Exception {
+    Duration windowDuration = Duration.millis(10);
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
+        FixedWindows.of(windowDuration),
+        AfterPane.<IntervalWindow>elementCountAtLeast(2));
+
+    tester.injectElement(1, new Instant(1)); // first in window [0, 10)
+    tester.injectElement(2, new Instant(9));
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+
+    // This element belongs in the window that has already fired. It should not be re-output because
+    // that trigger (which was one-time) has already gone off.
+    tester.injectElement(6, new Instant(2));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+
+    assertTrue(tester.isDone(new IntervalWindow(new Instant(0), new Instant(10))));
+    assertFalse(tester.isDone(new IntervalWindow(new Instant(10), new Instant(20))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.rootFinished(new IntervalWindow(new Instant(0), new Instant(10)))));
+  }
+
+  @Test
+  public void testAfterPaneWithMerging() throws Exception {
+    Duration windowDuration = Duration.millis(10);
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
+        Sessions.withGapDuration(windowDuration),
+        AfterPane.<IntervalWindow>elementCountAtLeast(2));
+
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+
+    tester.injectElement(1, new Instant(1)); // in [1, 11)
+    tester.injectElement(2, new Instant(2)); // in [2, 12)
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 1, 12)));
+
+    // Because we discarded the previous window, we don't have it around to merge with.
+    tester.injectElement(3, new Instant(7)); // in [7, 17)
+    tester.injectElement(4, new Instant(8)); // in [8, 18)
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 7, 7, 18)));
+
+    assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(12))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(12))),
+        tester.rootFinished(new IntervalWindow(new Instant(7), new Instant(18)))));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
similarity index 68%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPaneTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
index aa98b7efb0eab..d20605af33a43 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DelayAfterFirstInPaneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
@@ -14,16 +14,13 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.util;
+package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.WindowMatchers;
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
+import com.google.cloud.dataflow.sdk.util.TriggerTester;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -33,23 +30,16 @@
 import org.junit.runners.JUnit4;
 
 /**
- * Tests the {@link DelayAfterFirstInPane}.
+ * Tests the {@link AfterProcessingTime}.
  */
 @RunWith(JUnit4.class)
-public class DelayAfterFirstInPaneTest {
+public class AfterProcessingTimeTest {
   @Test
-  public void testDelayAfterFirstInPaneWithFixedWindow() throws Exception {
+  public void testAfterProcessingTimeWithFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.of(
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
         FixedWindows.of(windowDuration),
-        new DelayAfterFirstInPane<IntervalWindow>(new SerializableFunction<Instant, Instant>() {
-          private static final long serialVersionUID = 1L;
-          @Override
-          public Instant apply(Instant input) {
-            return input.plus(Duration.millis(5));
-          }
-        }),
-        BufferingWindowSet.<String, Integer, IntervalWindow>factory(VarIntCoder.of()));
+        AfterProcessingTime.<IntervalWindow>pastFirstElementInPane().plusDelay(Duration.millis(5)));
 
     tester.advanceProcessingTime(new Instant(10));
 
@@ -76,21 +66,19 @@ public Instant apply(Instant input) {
     assertThat(tester.extractOutput(), Matchers.containsInAnyOrder(
         WindowMatchers.isSingleWindowedValue(Matchers.contains(4), 19, 10, 20),
         WindowMatchers.isSingleWindowedValue(Matchers.contains(5), 30, 30, 40)));
+    assertTrue(tester.isDone(new IntervalWindow(new Instant(0), new Instant(10))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.rootFinished(new IntervalWindow(new Instant(0), new Instant(10))),
+        tester.rootFinished(new IntervalWindow(new Instant(10), new Instant(20))),
+        tester.rootFinished(new IntervalWindow(new Instant(30), new Instant(40)))));
   }
 
   @Test
-  public void testDelayAfterFirstInPaneWithMergingWindowAlreadyFired() throws Exception {
+  public void testAfterProcessingTimeWithMergingWindowAlreadyFired() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.of(
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
         Sessions.withGapDuration(windowDuration),
-        new DelayAfterFirstInPane<IntervalWindow>(new SerializableFunction<Instant, Instant>() {
-          private static final long serialVersionUID = 1L;
-          @Override
-          public Instant apply(Instant input) {
-            return input.plus(Duration.millis(5));
-          }
-        }),
-        BufferingWindowSet.<String, Integer, IntervalWindow>factory(VarIntCoder.of()));
+        AfterProcessingTime.<IntervalWindow>pastFirstElementInPane().plusDelay(Duration.millis(5)));
 
     tester.advanceProcessingTime(new Instant(10));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
@@ -108,5 +96,10 @@ public Instant apply(Instant input) {
     tester.advanceProcessingTime(new Instant(100));
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.contains(2), 2, 2, 12)));
+
+    assertTrue(tester.isDone(new IntervalWindow(new Instant(2), new Instant(12))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(11))),
+        tester.rootFinished(new IntervalWindow(new Instant(2), new Instant(12)))));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
new file mode 100644
index 0000000000000..d7dde6d2613dc
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.WindowMatchers;
+import com.google.cloud.dataflow.sdk.util.TriggerTester;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests the {@link AfterWatermark} triggers.
+ */
+@RunWith(JUnit4.class)
+public class AfterWatermarkTest {
+
+  @Test
+  public void testFirstInPaneWithFixedWindow() throws Exception {
+    Duration windowDuration = Duration.millis(10);
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
+        FixedWindows.of(windowDuration),
+        AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelay(Duration.millis(5)));
+
+    tester.injectElement(1, new Instant(1)); // first in window [0, 10), timer set for 6
+    tester.advanceWatermark(new Instant(5));
+    tester.injectElement(2, new Instant(9));
+    tester.injectElement(3, new Instant(8));
+
+    tester.advanceWatermark(new Instant(6));
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10)));
+
+    // This element belongs in the window that has already fired. It should not be re-output because
+    // that trigger (which was one-time) has already gone off.
+    tester.injectElement(6, new Instant(2));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+
+    assertTrue(tester.isDone(new IntervalWindow(new Instant(0), new Instant(10))));
+    assertFalse(tester.isDone(new IntervalWindow(new Instant(10), new Instant(20))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.rootFinished(new IntervalWindow(new Instant(0), new Instant(10)))));
+  }
+
+  @Test
+  public void testFirstInPaneWithMerging() throws Exception {
+    Duration windowDuration = Duration.millis(10);
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
+        Sessions.withGapDuration(windowDuration),
+        AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelay(Duration.millis(5)));
+
+    tester.advanceWatermark(new Instant(1));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+
+    tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 6
+    tester.advanceWatermark(new Instant(7));
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.contains(1), 1, 1, 11)));
+
+    // Because we discarded the previous window, we don't have it around to merge with.
+    tester.injectElement(2, new Instant(2)); // in [2, 12), timer for 7
+
+    tester.advanceWatermark(new Instant(100));
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.contains(2), 2, 2, 12)));
+
+    assertTrue(tester.isDone(new IntervalWindow(new Instant(2), new Instant(12))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(11))),
+        tester.rootFinished(new IntervalWindow(new Instant(2), new Instant(12)))));
+  }
+
+  @Test
+  public void testEndOfWindowFixedWindow() throws Exception {
+    Duration windowDuration = Duration.millis(10);
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
+        FixedWindows.of(windowDuration),
+        AfterWatermark.<IntervalWindow>pastEndOfWindow().plusDelay(Duration.millis(5)));
+
+    tester.injectElement(1, new Instant(1)); // first in window [0, 10), timer set for 15
+    tester.advanceWatermark(new Instant(11));
+    tester.injectElement(2, new Instant(9));
+    tester.injectElement(3, new Instant(8));
+
+    tester.advanceWatermark(new Instant(15));
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10)));
+
+    // This element belongs in the window that has already fired. It should not be re-output because
+    // that trigger (which was one-time) has already gone off.
+    tester.injectElement(6, new Instant(2));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+
+    assertTrue(tester.isDone(new IntervalWindow(new Instant(0), new Instant(10))));
+    assertFalse(tester.isDone(new IntervalWindow(new Instant(10), new Instant(20))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.rootFinished(new IntervalWindow(new Instant(0), new Instant(10)))));
+  }
+
+  @Test
+  public void testEndOfWindowWithMerging() throws Exception {
+    Duration windowDuration = Duration.millis(10);
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
+        Sessions.withGapDuration(windowDuration),
+        AfterWatermark.<IntervalWindow>pastEndOfWindow().plusDelay(Duration.millis(5)));
+
+    tester.advanceWatermark(new Instant(1));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+
+    tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 16
+    tester.advanceWatermark(new Instant(16));
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.contains(1), 1, 1, 11)));
+
+    // Because we discarded the previous window, we don't have it around to merge with.
+    tester.injectElement(2, new Instant(2)); // in [2, 12), timer for 17
+
+    tester.advanceWatermark(new Instant(100));
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.contains(2), 2, 2, 12)));
+
+    assertTrue(tester.isDone(new IntervalWindow(new Instant(2), new Instant(12))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(11))),
+        tester.rootFinished(new IntervalWindow(new Instant(2), new Instant(12)))));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
similarity index 85%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
index c88996948de9f..1c283edef7831 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
@@ -14,16 +14,13 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.util;
+package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
+import com.google.cloud.dataflow.sdk.util.TriggerTester;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -40,10 +37,9 @@ public class DefaultTriggerTest {
 
   @Test
   public void testDefaultTriggerWithFixedWindow() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.of(
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
         FixedWindows.of(Duration.millis(10)),
-        new DefaultTrigger<IntervalWindow>(),
-        BufferingWindowSet.<String, Integer, IntervalWindow>factory(VarIntCoder.of()));
+        DefaultTrigger.<IntervalWindow>of());
 
     tester.injectElement(1, new Instant(1));
     tester.injectElement(2, new Instant(9));
@@ -74,10 +70,9 @@ public void testDefaultTriggerWithFixedWindow() throws Exception {
 
   @Test
   public void testDefaultTriggerWithSessionWindow() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.of(
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
         Sessions.withGapDuration(Duration.millis(10)),
-        new DefaultTrigger<IntervalWindow>(),
-        BufferingWindowSet.<String, Integer, IntervalWindow>factory(VarIntCoder.of()));
+        DefaultTrigger.<IntervalWindow>of());
 
     tester.injectElement(1, new Instant(1));
     tester.injectElement(2, new Instant(9));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
similarity index 92%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RepeatedlyTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index 2ad005d472ece..ee971c563ec58 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.util;
+package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
 import static org.junit.Assert.assertFalse;
@@ -23,16 +23,13 @@
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.WindowMatchers;
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.Trigger.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.Trigger.TriggerContext;
-import com.google.cloud.dataflow.sdk.util.Trigger.TriggerId;
-import com.google.cloud.dataflow.sdk.util.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.Trigger.WindowStatus;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
+import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.common.collect.ImmutableList;
 
 import org.hamcrest.Matchers;
@@ -51,7 +48,7 @@
 @RunWith(JUnit4.class)
 public class RepeatedlyTest {
   @Mock private Trigger<IntervalWindow> mockTrigger1;
-  @Mock private Trigger<IntervalWindow> mockTrigger2;
+  @Mock private AtMostOnceTrigger<IntervalWindow> mockTrigger2;
   private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
@@ -61,9 +58,8 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn, boolean until) throws Ex
         ? Repeatedly.forever(mockTrigger1).until(mockTrigger2)
         : Repeatedly.forever(mockTrigger1);
 
-        tester = TriggerTester.of(windowFn, underTest,
-            BufferingWindowSet.<String, Integer, IntervalWindow>factory(VarIntCoder.of()));
-        firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
+    tester = TriggerTester.buffering(windowFn, underTest);
+    firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
   @SuppressWarnings("unchecked")
@@ -76,13 +72,15 @@ private void injectElement(int element, TriggerResult result1, TriggerResult res
     if (result1 != null) {
       when(mockTrigger1.onElement(
           isTriggerContext(), Mockito.eq(element),
-          Mockito.any(IntervalWindow.class), Mockito.any(WindowStatus.class)))
+          Mockito.any(Instant.class), Mockito.any(IntervalWindow.class),
+          Mockito.any(WindowStatus.class)))
           .thenReturn(result1);
     }
     if (result2 != null) {
       when(mockTrigger2.onElement(
           isTriggerContext(), Mockito.eq(element),
-          Mockito.any(IntervalWindow.class), Mockito.any(WindowStatus.class)))
+          Mockito.any(Instant.class), Mockito.any(IntervalWindow.class),
+          Mockito.any(WindowStatus.class)))
           .thenReturn(result2);
     }
     tester.injectElement(element, new Instant(element));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 08c7fc1b6b38c..8728a3700908f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -30,7 +30,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
-import com.google.cloud.dataflow.sdk.util.Trigger.TriggerId;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.util.TriggerExecutor.TriggerIdCoder;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.KV;

From 95c9574ed10476e0c506d08f2293dc92311442f0 Mon Sep 17 00:00:00 2001
From: malo <malo@google.com>
Date: Mon, 6 Apr 2015 12:32:29 -0700
Subject: [PATCH 0379/1541] Enable progress reporting for Avro source files.
 ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=90437396

---
 .../sdk/runners/worker/AvroReader.java        | 36 +++++++++++++++++--
 .../sdk/runners/worker/AvroReaderTest.java    | 12 +++++--
 2 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
index 92c3492e218d7..400fc8fa817d8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
@@ -16,6 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
+
+import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
@@ -28,6 +31,9 @@
 import org.apache.avro.file.SeekableInput;
 import org.apache.avro.io.DatumReader;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.nio.ByteBuffer;
@@ -45,13 +51,15 @@
  * @param <T> the type of the elements read from the source
  */
 public class AvroReader<T> extends Reader<WindowedValue<T>> {
-  private static final int BUF_SIZE = 200;
+  private static final Logger LOG = LoggerFactory.getLogger(InMemoryReader.class);
+
   final String filename;
   @Nullable
   final Long startPosition;
   @Nullable
   final Long endPosition;
   final AvroCoder<T> avroCoder;
+  @SuppressWarnings("unchecked")
   private final Schema schema;
 
   public AvroReader(String filename, @Nullable Long startPosition, @Nullable Long endPosition,
@@ -144,12 +152,34 @@ public WindowedValue<T> next() throws IOException {
       }
       T next = fileReader.next();
       // DataFileReader doesn't seem to support getting the current position.
-      // The difference between tell() calls seems to be zero. Use the coder
-      // instead.
+      // Calls to tell() return how much has been read from the underlying Channel, which is a bad
+      // length approximation due to buffering. Use the coder instead.
+      // TODO: Avoid reencoding the record to get its length.
       notifyElementRead(CoderUtils.encodeToByteArray(avroCoder, next).length);
       return WindowedValue.valueInGlobalWindow(next);
     }
 
+    @Override
+    public Progress getProgress() {
+      com.google.api.services.dataflow.model.Position currentPosition =
+          new com.google.api.services.dataflow.model.Position();
+      ApproximateProgress progress = new ApproximateProgress();
+      // The fileReader.tell() result is computed from the underlying SeekableByteChannelInput, so
+      // its value is an overestimation of the current position. This is however enough to get a
+      // progress estimation, but would not be precise enough for dynamic splitting.
+      // TODO: Make the progress estimation more precise.
+      try {
+        currentPosition.setByteOffset(fileReader.tell());
+        progress.setPosition(currentPosition);
+      } catch (IOException e) {
+        // If fileReader.tell() throws an exception, we do not set the position.
+        LOG.warn("Avro source file {} failed to report current progress.", filename);
+      }
+      // We do not compute progress percentage, as the endOffset is not necessarily a correct block
+      // boundary.
+      return cloudProgressToReaderProgress(progress);
+    }
+
     @Override
     public void close() throws IOException {
       fileReader.close();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
index 957da90ab5807..a22cf72194d67 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
@@ -16,6 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
+
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -127,13 +130,18 @@ private <T> List<T> readElems(String filename, @Nullable Long startOffset,
       @Nullable Long endOffset, Coder<T> coder, List<Integer> actualSizes) throws Exception {
     AvroReader<T> avroReader =
         new AvroReader<>(filename, startOffset, endOffset, WindowedValue.getValueOnlyCoder(coder));
-    ExecutorTestUtils.TestReaderObserver observer =
-        new ExecutorTestUtils.TestReaderObserver(avroReader, actualSizes);
+    new ExecutorTestUtils.TestReaderObserver(avroReader, actualSizes);
 
+    long offsetReported = 0;
     List<T> actualElems = new ArrayList<>();
     try (Reader.ReaderIterator<WindowedValue<T>> iterator = avroReader.iterator()) {
       while (iterator.hasNext()) {
         actualElems.add(iterator.next().getValue());
+        long progress =
+            readerProgressToCloudProgress(iterator.getProgress()).getPosition().getByteOffset();
+        // Make sure that the reported progress is monotonous.
+        Assert.assertThat(progress, greaterThanOrEqualTo(offsetReported));
+        offsetReported = progress;
       }
     }
     return actualElems;

From 389f010fdd03cbcad65b74ee90269817d2cd50ef Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Mon, 6 Apr 2015 13:05:40 -0700
Subject: [PATCH 0380/1541] Add serialVersionUID to serializable classes to
 prevent warnings.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90440279
---
 .../java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java  | 3 +++
 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java  | 4 ++++
 2 files changed, 7 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
index 508bd8a03bc9a..3271abae6ed8d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
@@ -76,6 +76,8 @@
  * @param <T> the type of values written to the sink.
  */
 public abstract class FileBasedSink<T> extends Sink<T> {
+  private static final long serialVersionUID = 0;
+
   /**
    * Base filename for final output files.
    */
@@ -173,6 +175,7 @@ public void validate(PipelineOptions options) {}
    * @param <T> the type of values written to the sink.
    */
   public abstract static class FileBasedWriteOperation<T> extends WriteOperation<T, FileResult> {
+    private static final long serialVersionUID = 0;
     private static final Logger LOG = LoggerFactory.getLogger(FileBasedWriteOperation.class);
 
     /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
index 6afad84e3f0f9..31237bfe1ecab 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
@@ -117,6 +117,8 @@
  * @param <T> the type that will be written to the Sink.
  */
 public abstract class Sink<T> implements Serializable {
+  private static final long serialVersionUID = 0;
+
   /**
    * Ensures that the sink is valid and can be written to before the write operation begins. One
    * should use {@link com.google.common.base.Preconditions} to implement this method.
@@ -149,6 +151,8 @@ public abstract class Sink<T> implements Serializable {
    * @param <WR> The result of a per-bundle write
    */
   public abstract static class WriteOperation<T, WR> implements Serializable {
+    private static final long serialVersionUID = 0;
+
     /**
      * Performs initialization before writing to the sink. Called before writing begins.
      */

From 6d0cc34375ce8a8b6a8a957374d11bfd9f75138b Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 6 Apr 2015 13:06:40 -0700
Subject: [PATCH 0381/1541] Allow setting a trigger function in Window.into,
 and add an integration test for triggers in the streaming runner.

----Release Notes----

Window.into allows setting a trigger to control when results are
produced. This enables getting early/partial results from a window.A

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90440381
---
 .../sdk/transforms/windowing/Repeatedly.java  | 12 ++-
 .../sdk/transforms/windowing/TimeTrigger.java |  2 +
 .../sdk/transforms/windowing/Window.java      | 62 ++++++++++++++--
 .../util/StreamingModeExecutionContext.java   | 73 +++++++++++++++----
 .../dataflow/sdk/util/TriggerExecutor.java    |  8 +-
 .../dataflow/sdk/util/TriggerTester.java      | 12 +++
 .../dataflow/sdk/values/TimestampedValue.java |  2 +-
 .../transforms/windowing/AfterPaneTest.java   | 24 ++++++
 8 files changed, 169 insertions(+), 26 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
index c541b7e152c3f..b5912191a02b0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
@@ -69,7 +69,11 @@ public RepeatedlyUntil<W> until(AtMostOnceTrigger<W> until) {
     return new RepeatedlyUntil<W>(repeated, until);
   }
 
-  private TriggerResult wrap(TriggerResult result) {
+
+  private TriggerResult wrap(TriggerContext<W> c, W window, TriggerResult result) throws Exception {
+    if (result.isFire() || result.isFinish()) {
+      repeated.clear(c, window);
+    }
     return result.isFire() ? TriggerResult.FIRE : TriggerResult.CONTINUE;
   }
 
@@ -77,18 +81,18 @@ private TriggerResult wrap(TriggerResult result) {
   public TriggerResult onElement(
       TriggerContext<W> c, Object value, Instant timestamp, W window, WindowStatus status)
       throws Exception {
-    return wrap(repeated.onElement(c, value, timestamp, window, status));
+    return wrap(c, window, repeated.onElement(c, value, timestamp, window, status));
   }
 
   @Override
   public TriggerResult onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow)
       throws Exception {
-    return wrap(repeated.onMerge(c, oldWindows, newWindow));
+    return wrap(c, newWindow, repeated.onMerge(c, oldWindows, newWindow));
   }
 
   @Override
   public TriggerResult onTimer(TriggerContext<W> c, TriggerId<W> triggerId) throws Exception {
-    return wrap(repeated.onTimer(c, triggerId));
+    return wrap(c, triggerId.getWindow(), repeated.onTimer(c, triggerId));
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
index 2768759d52636..9c9c245cfb259 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
@@ -30,6 +30,8 @@
 public abstract class TimeTrigger<W extends BoundedWindow, T extends TimeTrigger<W, T>>
     implements Trigger<W> {
 
+  private static final long serialVersionUID = 0L;
+
   protected static final SerializableFunction<Instant, Instant> IDENTITY =
       new SerializableFunction<Instant, Instant>() {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 59117c6af366b..f4a374df90a4e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -38,9 +38,9 @@
  * {@link PCollection} into finite windows according to a {@link WindowFn}.
  * The output of {@code Window} contains the same elements as input, but they
  * have been logically assigned to windows. The next
- * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}s, including one
- * within composite transforms, will group by the combination of keys and
- * windows.
+ * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey GroupByKeys},
+ * including one within composite transforms, will group by the combination of
+ * keys and windows.
 
  * <p> See {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}
  * for more information about how grouping with windows works.
@@ -73,7 +73,6 @@
  * the output will be
  * {(KV("foo", 2), 1m), (KV("bar", 1), 1m), (KV("foo", 1), 2m)}
  *
- *
  * <p> Several predefined {@link WindowFn}s are provided:
  * <ul>
  *  <li> {@link FixedWindows} partitions the timestamps into fixed-width intervals.
@@ -82,8 +81,14 @@
  *       is separated from the next by no more than a specified gap.
  * </ul>
  *
- * Additionally, custom {@link WindowFn}s can be created, by creating new
+ * <p>Additionally, custom {@link WindowFn}s can be created, by creating new
  * subclasses of {@link WindowFn}.
+ *
+ * <p> {@link Window.Bound#triggering(Trigger)} allows specifying a trigger to control when
+ * (in processing time) results for the given window can be produced. If unspecified, the default
+ * behavior is to trigger first when the watermark passes the end of the window, and then trigger
+ * again every time there is late arriving data. See {@link Trigger} for details on specifying other
+ * triggers.
  */
 public class Window {
   /**
@@ -193,6 +198,19 @@ public Bound<T> named(String name) {
       return new Bound<>(name, windowingStrategy);
     }
 
+    /**
+     * Sets a non-default trigger for this {@code Window} {@code PTransform}.
+     * Elements that are assigned to a specific window will be output when
+     * the trigger fires.
+     *
+     * <p> {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger}
+     * has more details on the available triggers.
+     */
+    public Triggering<T> triggering(Trigger<?> trigger) {
+      return new Triggering<T>(name,
+          createWindowingStrategy(windowingStrategy.getWindowFn(), trigger));
+    }
+
     @Override
     public PCollection<T> apply(PCollection<T> input) {
       return PCollection.<T>createPrimitiveOutputInternal(windowingStrategy);
@@ -218,6 +236,40 @@ protected String getKindString() {
     }
   }
 
+  /**
+   * An incomplete {@code Window} transform which has a trigger specified but has an unspecified
+   * accumulation mode.
+   *
+   * <p> The currently available accumulation modes are:
+   *
+   * <ul>
+   *   <li> {@link Window.Triggering#discardingFiredPanes} which causes the elements in a pane to
+   *   be discarded after the trigger fires and output is produced.
+   * </ul>
+   *
+   * <p> After specifying the accumulation mode the PTransform is complete and can be applied.
+   */
+  public static class Triggering<T> {
+
+    String name;
+    WindowingStrategy<? super T, ?> windowingStrategy;
+
+    Triggering(String name, WindowingStrategy<? super T, ?> windowingStrategy) {
+      this.name = name;
+      this.windowingStrategy = windowingStrategy;
+    }
+
+    /**
+     * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
+     * Triggering behavior, and which discards elements in a pane after they are triggered.
+     *
+     * <p> Does not modify this transform.  The resulting {@code PTransform} is sufficiently
+     * specified to be applied, but more properties can still be specified.
+     */
+    public Bound<T> discardingFiredPanes() {
+      return new Bound<>(name, windowingStrategy);
+    }
+  }
 
   /////////////////////////////////////////////////////////////////////////////
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index 782407f098f5d..4998ac7044f9a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -193,10 +193,44 @@ public Map<CodedTupleTag<?>, Object> lookupState(
     return stateFetcher.fetch(computation, getSerializedKey(), getWorkToken(), prefix, tags);
   }
 
+  private static class TagListUpdates<T> {
+    List<TimestampedValue<ByteString>> encodedValues = new ArrayList<>();
+    List<TimestampedValue<T>> values = new ArrayList<>();
+    boolean remove = false;
+
+    public void deleteTagList() {
+      encodedValues.clear();
+      values.clear();
+      remove = true;
+    }
+
+    public boolean isRemove() {
+      return remove;
+    }
+
+    public void add(Instant timestamp, ByteString encoded, T value) {
+      encodedValues.add(TimestampedValue.of(encoded, timestamp));
+      values.add(TimestampedValue.of(value, timestamp));
+    }
+  }
+
   class StepContext extends ExecutionContext.StepContext {
     private final String mangledPrefix;
+
+    // K = the value that was put, V = the encoded value
     private Map<CodedTupleTag<?>, KV<?, ByteString>> stateCache = new HashMap<>();
-    private Map<CodedTupleTag<?>, List<KV<ByteString, Instant>>> tagListUpdates = new HashMap<>();
+
+    private Map<CodedTupleTag<?>, TagListUpdates<?>> tagListUpdates = new HashMap<>();
+
+    private <T> TagListUpdates<T> getOrCreateListUpdates(CodedTupleTag<T> tag) {
+      @SuppressWarnings("unchecked")
+      TagListUpdates<T> updates = (TagListUpdates<T>) tagListUpdates.get(tag);
+      if (updates == null) {
+        updates = new TagListUpdates<T>();
+        tagListUpdates.put(tag, updates);
+      }
+      return updates;
+    }
 
     public StepContext(String stepName) {
       super(stepName);
@@ -240,24 +274,31 @@ public CodedTupleTagMap lookup(List<? extends CodedTupleTag<?>> tags)
     @Override
     public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp)
         throws IOException {
-      List<KV<ByteString, Instant>> list = tagListUpdates.get(tag);
-      if (list == null) {
-        list = new ArrayList<>();
-        tagListUpdates.put(tag, list);
-      }
       ByteString.Output stream = ByteString.newOutput();
       tag.getCoder().encode(value, stream, Coder.Context.OUTER);
-      list.add(KV.of(stream.toByteString(), timestamp));
+      getOrCreateListUpdates(tag).add(timestamp, stream.toByteString(), value);
     }
 
     @Override
     public <T> Iterable<TimestampedValue<T>> readTagList(CodedTupleTag<T> tag) throws IOException {
-      return stateFetcher.fetchList(
-          computation, getSerializedKey(), getWorkToken(), mangledPrefix, tag);
+      TagListUpdates<T> listUpdates = getOrCreateListUpdates(tag);
+      ArrayList<TimestampedValue<T>> items = new ArrayList<>();
+      // If we've done a (not-yet-persisted) remove don't include the persisted items
+      if (!listUpdates.isRemove()) {
+        items.addAll(stateFetcher.fetchList(
+          computation, getSerializedKey(), getWorkToken(), mangledPrefix, tag));
+      }
+
+      // If we have pending (not-yet-persisted) additions, include them
+      items.addAll(listUpdates.values);
+      return items.isEmpty() ? null : items;
     }
 
     @Override
     public <T> void deleteTagList(CodedTupleTag<T> tag) {
+      getOrCreateListUpdates(tag).deleteTagList();
+
+      // And record the deletion
       outputBuilder.addListUpdates(
           Windmill.TagList.newBuilder()
           .setTag(serializeTag(tag))
@@ -280,22 +321,26 @@ public void flushState() {
             .build());
       }
 
-      for (Map.Entry<CodedTupleTag<?>, List<KV<ByteString, Instant>>> entry :
-               tagListUpdates.entrySet()) {
+      for (Map.Entry<CodedTupleTag<?>, TagListUpdates<?>> entry : tagListUpdates.entrySet()) {
+        if (entry.getValue().encodedValues.isEmpty()) {
+          continue;
+        }
+
         CodedTupleTag<?> tag = entry.getKey();
         Windmill.TagList.Builder listBuilder =
             Windmill.TagList.newBuilder()
             .setTag(serializeTag(tag));
-        for (KV<ByteString, Instant> item : entry.getValue()) {
-          long timestampMicros = TimeUnit.MILLISECONDS.toMicros(item.getValue().getMillis());
+        for (TimestampedValue<ByteString> item : entry.getValue().encodedValues) {
+          long timestampMicros = TimeUnit.MILLISECONDS.toMicros(item.getTimestamp().getMillis());
           listBuilder.addValues(
               Windmill.Value.newBuilder()
-              .setData(item.getKey())
+              .setData(item.getValue())
               .setTimestamp(timestampMicros));
         }
         outputBuilder.addListUpdates(listBuilder.build());
       }
 
+      // Clear all of the not-yet-persisted information, since we're about to persist it.
       stateCache.clear();
       tagListUpdates.clear();
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 3aa45f1f94927..543e01d1c96c0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -325,12 +325,16 @@ public void deleteTimer(W window, TimeDomain domain) throws IOException {
 
     @Override
     public <T> void store(CodedTupleTag<T> tag, W window, T value) throws IOException {
-      keyedState.store(codedTriggerIdTag(tag, window), value);
+      CodedTupleTag<T> codedTriggerIdTag = codedTriggerIdTag(tag, window);
+      keyedState.lookup(codedTriggerIdTag);
+      keyedState.store(codedTriggerIdTag, value);
     }
 
     @Override
     public <T> void remove(CodedTupleTag<T> tag, W window) throws IOException {
-      keyedState.remove(codedTriggerIdTag(tag, window));
+      CodedTupleTag<T> codedTriggerIdTag = codedTriggerIdTag(tag, window);
+      keyedState.lookup(codedTriggerIdTag);
+      keyedState.remove(codedTriggerIdTag);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index d701a6e34640c..4533c59b56b96 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -102,6 +102,18 @@ public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>
         objectWindowFn, trigger, windowSetFactory);
   }
 
+  public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>, W> combining(
+      WindowFn<?, W> windowFn, Trigger<W> trigger) throws Exception {
+    @SuppressWarnings("unchecked")
+    WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
+
+    AbstractWindowSet.Factory<String, Integer, Iterable<Integer>, W> windowSetFactory =
+        BufferingWindowSet.<String, Integer, W>factory(VarIntCoder.of());
+
+    return new TriggerTester<Integer, Iterable<Integer>, W>(
+        objectWindowFn, trigger, windowSetFactory);
+  }
+
   private TriggerTester(
       WindowFn<Object, W> windowFn,
       Trigger<W> trigger,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
index f6c09e1c740a6..f12cdc2761493 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
@@ -77,7 +77,7 @@ public int hashCode() {
 
   @Override
   public String toString() {
-    return "TimetampedValue(" + value + ", " + timestamp + ")";
+    return "TimestampedValue(" + value + ", " + timestamp + ")";
   }
 
   /////////////////////////////////////////////////////////////////////////////
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
index 13d823b290e37..14fd0c0cf58de 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
@@ -35,6 +35,30 @@
  */
 @RunWith(JUnit4.class)
 public class AfterPaneTest {
+  @Test
+  public void testAfterPaneWithGlobalWindowsAndCombining() throws Exception {
+    Duration windowDuration = Duration.millis(10);
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.combining(
+        FixedWindows.of(windowDuration),
+        AfterPane.<IntervalWindow>elementCountAtLeast(2));
+
+    tester.injectElement(1, new Instant(1));
+    tester.injectElement(2, new Instant(9));
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+
+    // This element should not be output because that trigger (which was one-time) has already
+    // gone off.
+    tester.injectElement(6, new Instant(2));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+
+    assertTrue(tester.isDone(new IntervalWindow(new Instant(0), new Instant(10))));
+    assertFalse(tester.isDone(new IntervalWindow(new Instant(10), new Instant(20))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.rootFinished(new IntervalWindow(new Instant(0), new Instant(10)))));
+  }
+
   @Test
   public void testAfterPaneWithFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);

From 137bebb0f37c87bbd232bbcdedc7bd6e0c5d117d Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 6 Apr 2015 13:21:41 -0700
Subject: [PATCH 0382/1541] Handle the case when writing the footer during
 close fails. Clean-up the output channel.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90441798
---
 .../dataflow/sdk/runners/worker/TextSink.java | 16 ++--
 .../sdk/runners/worker/TextSinkTest.java      | 87 +++++++++++++++++--
 2 files changed, 90 insertions(+), 13 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
index fb37619465543..3842438dcb715 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
@@ -162,7 +162,7 @@ public SinkWriter<T> writer() throws IOException {
           "Expected WindowedValueCoder for inputCoder, got: "
           + coder.getClass().getName());
     }
-    Coder valueCoder = ((WindowedValueCoder) coder).getValueCoder();
+    Coder<?> valueCoder = ((WindowedValueCoder<?>) coder).getValueCoder();
     if (valueCoder.equals(StringUtf8Coder.of())) {
       mimeType = MimeTypes.TEXT;
     } else {
@@ -237,8 +237,11 @@ class TextFileWriter extends AbstractTextFileWriter {
 
     @Override
     public void close() throws IOException {
-      super.close();
-      outputChannel.close();
+      try {
+        super.close();
+      } finally {
+        outputChannel.close();
+      }
     }
 
     @Override
@@ -268,8 +271,11 @@ class ShardingTextFileWriter extends AbstractTextFileWriter {
 
     @Override
     public void close() throws IOException {
-      super.close();
-      outputChannel.close();
+      try {
+        super.close();
+      } finally {
+        outputChannel.close();
+      }
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkTest.java
index 35dae1b1c63df..51228f678e013 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkTest.java
@@ -16,11 +16,16 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.fail;
+
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.ShardingWritableByteChannel;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 
@@ -34,6 +39,8 @@
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileReader;
+import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -73,16 +80,17 @@ <T> void runTestWriteFile(List<T> elems,
       expected.add(footer);
     }
 
-    BufferedReader reader = new BufferedReader(new FileReader(tmpFile));
     List<String> actual = new ArrayList<>();
     List<Integer> expectedSizes = new ArrayList<>();
-    for (;;) {
-      String line = reader.readLine();
-      if (line == null) {
-        break;
+    try (BufferedReader reader = new BufferedReader(new FileReader(tmpFile))) {
+      for (;;) {
+        String line = reader.readLine();
+        if (line == null) {
+          break;
+        }
+        actual.add(line);
+        expectedSizes.add(line.length() + TextSink.NEWLINE.length);
       }
-      actual.add(line);
-      expectedSizes.add(line.length() + TextSink.NEWLINE.length);
     }
     if (header != null) {
       expectedSizes.remove(0);
@@ -138,7 +146,70 @@ public void testWriteNonEmptyNonStringFile() throws Exception {
     runTestWriteFile(TestUtils.INTS, null, null, TextualIntegerCoder.of());
   }
 
+
+  private static class ThrowingWritableByteChannel extends ShardingWritableByteChannel {
+    IOException exception = null;
+    boolean open = true;
+    @Override
+    public boolean isOpen() {
+      return open;
+    }
+
+    @Override
+    public void close() throws IOException {
+      open = false;
+    }
+
+    @Override
+    public int write(ByteBuffer src) throws IOException {
+      if (exception != null) {
+        throw exception;
+      }
+      return src.remaining();
+    }
+
+    @Override
+    public int writeToShard(int shardNum, ByteBuffer src) throws IOException {
+      if (exception != null) {
+        throw exception;
+      }
+      return src.remaining();
+    }
+  }
+
+  @Test
+  public void testWriteFileWithFooterThatThrowsException() throws Exception {
+    ThrowingWritableByteChannel channel = new ThrowingWritableByteChannel();
+    TextSink<WindowedValue<String>> sink =
+        TextSink.createForTest("test-location", false, null, "test-footer", StringUtf8Coder.of());
+    TextSink<WindowedValue<String>>.TextFileWriter writer = sink.new TextFileWriter(channel);
+    channel.exception = new IOException("Test throwing exception during close");
+    try {
+      writer.close();
+      fail();
+    } catch (IOException e) {
+      assertSame(e, channel.exception);
+      assertFalse(channel.isOpen());
+    }
+  }
+
+  @Test
+  public void testWriteShardedFileWithFooterThatThrowsException() throws Exception {
+    ThrowingWritableByteChannel channel = new ThrowingWritableByteChannel();
+    TextSink<WindowedValue<String>> sink =
+        TextSink.createForTest("test-location", false, null, "test-footer", StringUtf8Coder.of());
+    TextSink<WindowedValue<String>>.ShardingTextFileWriter writer =
+        sink.new ShardingTextFileWriter(channel);
+    channel.exception = new IOException("Test throwing exception during close");
+    try {
+      writer.close();
+      fail();
+    } catch (IOException e) {
+      assertSame(e, channel.exception);
+      assertFalse(channel.isOpen());
+    }
+  }
+
   // TODO: sharded filenames
   // TODO: not appending newlines
-  // TODO: writing to GCS
 }

From bb1cf9af9ed60822bbcab43d4d801729216f37ac Mon Sep 17 00:00:00 2001
From: chernyak <chernyak@google.com>
Date: Mon, 6 Apr 2015 13:46:15 -0700
Subject: [PATCH 0383/1541] Support existence_watermark_deadline for fetching
 GlobalData. Return empty values in the case that no global data was written
 and the corresponding watermark has advanced passed the deadline.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90444121
---
 sdk/src/main/proto/windmill.proto | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index 547c78ea65355..5cf398d0b54ab 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -89,7 +89,6 @@ message TagList {
 message GlobalDataId {
   required string tag = 1;
   required bytes version = 2;
-  optional int64 existence_watermark_deadline = 3 [default=0x7FFFFFFFFFFFFFFF];
 }
 
 message GlobalData {
@@ -142,6 +141,9 @@ message ComputationGetDataRequest {
 
 message GetDataRequest {
   repeated ComputationGetDataRequest requests = 1;
+  repeated GlobalDataRequest global_data_fetch_requests = 3;
+
+  // DEPRECATED
   repeated GlobalDataId global_data_to_fetch = 2;
 }
 
@@ -184,7 +186,12 @@ message Counter {
   optional int64 mean_count = 6;
 }
 
-// next id: 11
+message GlobalDataRequest {
+  required GlobalDataId data_id = 1;
+  optional int64 existence_watermark_deadline = 2 [default=0x7FFFFFFFFFFFFFFF];
+}
+
+// next id: 12
 message WorkItemCommitRequest {
   required bytes key = 1;
   required fixed64 work_token = 2;
@@ -194,8 +201,11 @@ message WorkItemCommitRequest {
   repeated TagValue value_updates = 5;
   repeated TagList list_updates = 6;
   repeated Counter counter_updates = 8;
-  repeated GlobalDataId global_data_id_requests = 9;
+  repeated GlobalDataRequest global_data_requests = 11;
   repeated GlobalData global_data_updates = 10;
+
+  // DEPRECATED
+  repeated GlobalDataId global_data_id_requests = 9;
 }
 
 message ComputationCommitWorkRequest {

From c1f73fc681540e743d3711081c53534afe5cad68 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 6 Apr 2015 13:53:29 -0700
Subject: [PATCH 0384/1541] Define compatibility between triggers. Require that
 Flatten is passed PCollections with compatible triggers.

Update TimeTriggers to use a list of transform functions, so that
comparison is a bit more sensible.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90444914
---
 .../dataflow/sdk/transforms/Flatten.java      |  3 +-
 .../sdk/transforms/windowing/AfterPane.java   | 10 ++++
 .../windowing/AfterProcessingTime.java        | 10 ++--
 .../transforms/windowing/AfterWatermark.java  | 21 ++++---
 .../windowing/CompositeTrigger.java           | 20 +++++++
 .../transforms/windowing/DefaultTrigger.java  |  2 +-
 .../sdk/transforms/windowing/Repeatedly.java  | 10 ++++
 .../sdk/transforms/windowing/TimeTrigger.java | 55 +++++++++++--------
 .../sdk/transforms/windowing/Trigger.java     |  5 ++
 9 files changed, 97 insertions(+), 39 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index 49ce0d1335e07..a29dfaa898906 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -124,7 +124,7 @@ public PCollection<T> apply(PCollectionList<T> inputs) {
                 + windowingStrategy.getWindowFn() + ", " + other.getWindowFn());
           }
 
-          if (!windowingStrategy.getTrigger().equals(other.getTrigger())) {
+          if (!windowingStrategy.getTrigger().isCompatible(other.getTrigger())) {
             throw new IllegalStateException(
                 "Inputs to Flatten had incompatible triggers: "
                 + windowingStrategy.getTrigger() + ", " + other.getTrigger());
@@ -147,7 +147,6 @@ protected Coder<?> getDefaultOutputCoder(PCollectionList<T> input) {
       // Use the Coder of the first input.
       return inputs.get(0).getCoder();
     }
-
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index dab35d97e3edc..6ff8ca4a84af9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -101,4 +101,14 @@ public void clear(TriggerContext<W> c, W window) throws Exception {
   public boolean willNeverFinish() {
     return false;
   }
+
+  @Override
+  public boolean isCompatible(Trigger<?> other) {
+    if (!(other instanceof AfterPane)) {
+      return false;
+    }
+
+    AfterPane<?> that = (AfterPane<?>) other;
+    return countElems == that.countElems;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 147dfa0c7d1f0..2eec8645463e1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.common.collect.ImmutableList;
 
 import org.joda.time.Instant;
 
@@ -37,8 +38,8 @@ public class AfterProcessingTime<W extends BoundedWindow>
   private static final CodedTupleTag<Instant> DELAYED_UNTIL_TAG =
       CodedTupleTag.of("delayed-until", InstantCoder.of());
 
-  private AfterProcessingTime(SerializableFunction<Instant, Instant> delayFunction) {
-    super(delayFunction);
+  private AfterProcessingTime(ImmutableList<SerializableFunction<Instant, Instant>> transforms) {
+    super(transforms);
   }
 
   /**
@@ -50,8 +51,9 @@ public static <W extends BoundedWindow> AfterProcessingTime<W> pastFirstElementI
   }
 
   @Override
-  protected AfterProcessingTime<W> newWith(SerializableFunction<Instant, Instant> transform) {
-    return new AfterProcessingTime<W>(transform);
+  protected AfterProcessingTime<W> newWith(
+      ImmutableList<SerializableFunction<Instant, Instant>> transforms) {
+    return new AfterProcessingTime<W>(transforms);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 4accd1d363b88..e218f3139b60a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.common.collect.ImmutableList;
 
 import org.joda.time.Instant;
 
@@ -34,8 +35,8 @@ public abstract class AfterWatermark<W extends BoundedWindow>
 
   private static final long serialVersionUID = 0L;
 
-  protected AfterWatermark(SerializableFunction<Instant, Instant> composed) {
-    super(composed);
+  protected AfterWatermark(ImmutableList<SerializableFunction<Instant, Instant>> transforms) {
+    super(transforms);
   }
 
   /**
@@ -60,7 +61,8 @@ private static class FromFirstElementInPane<W extends BoundedWindow> extends Aft
     private static final CodedTupleTag<Instant> DELAYED_UNTIL_TAG =
         CodedTupleTag.of("delayed-until", InstantCoder.of());
 
-    private FromFirstElementInPane(SerializableFunction<Instant, Instant> delayFunction) {
+    private FromFirstElementInPane(
+        ImmutableList<SerializableFunction<Instant, Instant>> delayFunction) {
       super(delayFunction);
     }
 
@@ -114,8 +116,9 @@ public boolean willNeverFinish() {
     }
 
     @Override
-    protected AfterWatermark<W> newWith(SerializableFunction<Instant, Instant> transform) {
-      return new FromFirstElementInPane<W>(transform);
+    protected AfterWatermark<W> newWith(
+        ImmutableList<SerializableFunction<Instant, Instant>> transforms) {
+      return new FromFirstElementInPane<W>(transforms);
     }
   }
 
@@ -123,7 +126,8 @@ private static class FromEndOfWindow<W extends BoundedWindow> extends AfterWater
 
     private static final long serialVersionUID = 0L;
 
-    private FromEndOfWindow(SerializableFunction<Instant, Instant> composed) {
+    private FromEndOfWindow(
+        ImmutableList<SerializableFunction<Instant, Instant>> composed) {
       super(composed);
     }
 
@@ -162,8 +166,9 @@ public boolean willNeverFinish() {
     }
 
     @Override
-    protected AfterWatermark<W> newWith(SerializableFunction<Instant, Instant> transform) {
-      return new FromEndOfWindow<>(transform);
+    protected AfterWatermark<W> newWith(
+        ImmutableList<SerializableFunction<Instant, Instant>> transforms) {
+      return new FromEndOfWindow<>(transforms);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CompositeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CompositeTrigger.java
index 1ebc043e51742..ffa843aff0fe6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CompositeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CompositeTrigger.java
@@ -261,6 +261,26 @@ public abstract TriggerResult afterChildTimer(
       TriggerContext<W> c, W window, int childIdx, TriggerResult result)
       throws Exception;
 
+  @Override
+  public boolean isCompatible(Trigger<?> other) {
+    if (!getClass().equals(other.getClass())) {
+      return false;
+    }
+
+    CompositeTrigger<?> that = (CompositeTrigger<?>) other;
+    if (subTriggers.size() != that.subTriggers.size()) {
+      return false;
+    }
+
+    for (int i = 0; i < subTriggers.size(); i++) {
+      if (!subTriggers.get(i).isCompatible(that.subTriggers.get(i))) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
   /**
    * Coder for the BitSet used to track child-trigger finished states.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
index a0c26160ec2b9..0c46e2bed80b8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
@@ -71,7 +71,7 @@ public boolean willNeverFinish() {
   }
 
   @Override
-  public boolean equals(Object other) {
+  public boolean isCompatible(Trigger<?> other) {
     // Semantically, all default triggers are identical
     return other instanceof DefaultTrigger;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
index b5912191a02b0..2f634da338c53 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
@@ -105,6 +105,16 @@ public boolean willNeverFinish() {
     return true;
   }
 
+  @Override
+  public boolean isCompatible(Trigger<?> other) {
+    if (!(other instanceof Repeatedly)) {
+      return false;
+    }
+
+    Repeatedly<?> that = (Repeatedly<?>) other;
+    return repeated.isCompatible(that.repeated);
+  }
+
   /**
    * Repeats the given trigger forever, until the "until" trigger fires.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
index 9c9c245cfb259..e90247f8ce88c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.common.collect.ImmutableList;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -32,25 +33,22 @@ public abstract class TimeTrigger<W extends BoundedWindow, T extends TimeTrigger
 
   private static final long serialVersionUID = 0L;
 
-  protected static final SerializableFunction<Instant, Instant> IDENTITY =
-      new SerializableFunction<Instant, Instant>() {
+  protected static final ImmutableList<SerializableFunction<Instant, Instant>> IDENTITY =
+      ImmutableList.<SerializableFunction<Instant, Instant>>of();
 
-    private static final long serialVersionUID = 0L;
+  private final ImmutableList<SerializableFunction<Instant, Instant>> timestampMappers;
 
-    @Override
-    public Instant apply(Instant input) {
-      return input;
-    }
-  };
-
-  private SerializableFunction<Instant, Instant> composedTimestampMapper;
-
-  protected TimeTrigger(SerializableFunction<Instant, Instant> composedTimestampMapper) {
-    this.composedTimestampMapper = composedTimestampMapper;
+  protected TimeTrigger(
+      ImmutableList<SerializableFunction<Instant, Instant>> timestampMappers) {
+    this.timestampMappers = timestampMappers;
   }
 
   protected Instant computeTargetTimestamp(Instant time) {
-    return composedTimestampMapper.apply(time);
+    Instant result = time;
+    for (SerializableFunction<Instant, Instant> timestampMapper : timestampMappers) {
+      result = timestampMapper.apply(result);
+    }
+    return result;
   }
 
   /**
@@ -60,7 +58,7 @@ protected Instant computeTargetTimestamp(Instant time) {
    * @return An updated time trigger which will wait the additional time before firing.
    */
   public T plusDelay(final Duration delay) {
-    return mappedTo(new SerializableFunction<Instant, Instant>() {
+    return newWith(new SerializableFunction<Instant, Instant>() {
       private static final long serialVersionUID = 0L;
 
       @Override
@@ -84,16 +82,25 @@ public Instant apply(Instant input) {
    * @param timestampMapper Function that will be invoked on the proposed trigger time to determine
    *        the time at which the trigger should actually fire.
    */
-  public T mappedTo(final SerializableFunction<Instant, Instant> timestampMapper) {
-    return newWith(new SerializableFunction<Instant, Instant>() {
+  public T mappedTo(SerializableFunction<Instant, Instant> timestampMapper) {
+    return newWith(timestampMapper);
+  }
 
-      private static final long serialVersionUID = 0L;
+  @Override
+  public boolean isCompatible(Trigger<?> other) {
+    if (!getClass().equals(other.getClass())) {
+      return false;
+    }
 
-      @Override
-      public Instant apply(Instant input) {
-        return timestampMapper.apply(composedTimestampMapper.apply(input));
-      }
-    });
+    TimeTrigger<?, ?> that = (TimeTrigger<?, ?>) other;
+    return this.timestampMappers.equals(that.timestampMappers);
+  }
+
+  private T newWith(SerializableFunction<Instant, Instant> timestampMapper) {
+    return newWith(ImmutableList.<SerializableFunction<Instant, Instant>>builder()
+        .addAll(timestampMappers)
+        .add(timestampMapper)
+        .build());
   }
 
   /**
@@ -103,5 +110,5 @@ public Instant apply(Instant input) {
    * @param transform The new transform to apply to target times.
    * @return a new {@code TimeTrigger}.
    */
-  protected abstract T newWith(SerializableFunction<Instant, Instant> transform);
+  protected abstract T newWith(ImmutableList<SerializableFunction<Instant, Instant>> transform);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index daf944be63af7..a7d478099fc05 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -275,6 +275,11 @@ TriggerResult onTimer(
    */
   boolean willNeverFinish();
 
+  /**
+   * Returns whether this performs the same triggering as the given {@code Trigger}.
+   */
+  boolean isCompatible(Trigger<?> other);
+
   /**
    * Identifies a unique trigger instance, by the window it is in and the path through the trigger
    * tree.

From 7cf561e347dbd21243b001ef8b017587e730fef1 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 6 Apr 2015 14:25:51 -0700
Subject: [PATCH 0385/1541] Group the parameters to Trigger methods into event
 objects.

This makes it easier to pass them around (unmodified) and also allows us
to add new parameters without needing to refactor every callback.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90448272
---
 .../sdk/transforms/windowing/AfterAll.java    |  27 ++-
 .../sdk/transforms/windowing/AfterEach.java   |  19 +--
 .../sdk/transforms/windowing/AfterFirst.java  |  17 +-
 .../sdk/transforms/windowing/AfterPane.java   |  19 +--
 .../windowing/AfterProcessingTime.java        |  20 +--
 .../transforms/windowing/AfterWatermark.java  |  39 ++---
 .../windowing/CompositeTrigger.java           |  38 ++---
 .../transforms/windowing/DefaultTrigger.java  |  17 +-
 .../sdk/transforms/windowing/Repeatedly.java  |  38 ++---
 .../sdk/transforms/windowing/Trigger.java     | 160 ++++++++++++++----
 .../dataflow/sdk/util/TriggerExecutor.java    |  14 +-
 .../transforms/windowing/AfterAllTest.java    |  29 ++--
 .../transforms/windowing/AfterEachTest.java   |  27 +--
 .../transforms/windowing/AfterFirstTest.java  |  27 +--
 .../transforms/windowing/RepeatedlyTest.java  |  58 +++----
 15 files changed, 280 insertions(+), 269 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
index 106d6a9acb126..0c4012d6ba9ec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
@@ -20,8 +20,6 @@
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.common.base.Preconditions;
 
-import org.joda.time.Instant;
-
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.BitSet;
@@ -75,22 +73,20 @@ private TriggerResult wrapResult(TriggerContext<W> c, W window,
   }
 
   @Override
-  public TriggerResult onElement(
-      TriggerContext<W> c, Object value, Instant timestamp, W window, WindowStatus status)
-      throws Exception {
-    BitSet firedSet = c.lookup(SUBTRIGGERS_FIRED_SET_TAG, window);
+  public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
+    BitSet firedSet = c.lookup(SUBTRIGGERS_FIRED_SET_TAG, e.window());
     if (firedSet == null) {
       firedSet = new BitSet(subTriggers.size());
     }
 
-    SubTriggerExecutor subExecutor = subExecutor(c, window);
+    SubTriggerExecutor subExecutor = subExecutor(c, e.window());
     for (int i : subExecutor.getUnfinishedTriggers()) {
-      if (subExecutor.onElement(c, i, value, timestamp, window, status).isFire()) {
+      if (subExecutor.onElement(c, i, e).isFire()) {
         firedSet.set(i);
       }
     }
 
-    return wrapResult(c, window, firedSet, subExecutor);
+    return wrapResult(c, e.window(), firedSet, subExecutor);
   }
 
   @Override
@@ -100,31 +96,30 @@ public void clear(TriggerContext<W> c, W window) throws Exception {
   }
 
   @Override
-  public TriggerResult onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow)
-      throws Exception {
+  public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
     // First check to see if we've fired in the set of merged triggers
     BitSet newFiredSet = new BitSet(subTriggers.size());
-    for (BitSet oldFiredSet : c.lookup(SUBTRIGGERS_FIRED_SET_TAG, oldWindows).values()) {
+    for (BitSet oldFiredSet : c.lookup(SUBTRIGGERS_FIRED_SET_TAG, e.oldWindows()).values()) {
       if (oldFiredSet != null) {
         newFiredSet.or(oldFiredSet);
       }
     }
 
-    SubTriggerExecutor subExecutor = subExecutor(c, oldWindows, newWindow);
+    SubTriggerExecutor subExecutor = subExecutor(c, e);
 
     // Before evaluating the merge of the underlying trigger, see if we can finish early.
-    TriggerResult earlyResult = wrapResult(c, newWindow, newFiredSet, subExecutor);
+    TriggerResult earlyResult = wrapResult(c, e.newWindow(), newFiredSet, subExecutor);
     if (earlyResult.isFinish()) {
       return earlyResult;
     }
 
     for (int i : subExecutor.getUnfinishedTriggers()) {
-      if (subExecutor.onMerge(c, i, oldWindows, newWindow).isFire()) {
+      if (subExecutor.onMerge(c, i, e).isFire()) {
         newFiredSet.set(i);
       }
     }
 
-    return wrapResult(c, newWindow, newFiredSet, subExecutor);
+    return wrapResult(c, e.newWindow(), newFiredSet, subExecutor);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index 49f23651f329c..c4f1894d3f399 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -18,8 +18,6 @@
 
 import com.google.common.base.Preconditions;
 
-import org.joda.time.Instant;
-
 import java.util.Arrays;
 import java.util.List;
 
@@ -61,29 +59,24 @@ private TriggerResult result(TriggerResult subResult, SubTriggerExecutor subexec
   }
 
   @Override
-  public TriggerResult onElement(
-      TriggerContext<W> c, Object value, Instant timestamp, W window, WindowStatus status)
-      throws Exception {
+  public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
     // If all the sub-triggers have finished, we should have already finished, so we know there is
     // at least one unfinished trigger.
 
-    SubTriggerExecutor subexecutor = subExecutor(c, window);
+    SubTriggerExecutor subexecutor = subExecutor(c, e.window());
 
     // There must be at least one unfinished, because otherwise we would have finished the root.
     int current = subexecutor.firstUnfinished();
-    return result(
-        subexecutor.onElement(c, current, value, timestamp, window, status),
-        subexecutor);
+    return result(subexecutor.onElement(c, current, e), subexecutor);
   }
 
   @Override
-  public TriggerResult onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow)
-      throws Exception {
-    SubTriggerExecutor subexecutor = subExecutor(c, oldWindows, newWindow);
+  public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+    SubTriggerExecutor subexecutor = subExecutor(c, e);
 
     // There must be at least one unfinished, because otherwise we would have finished the root.
     int current = subexecutor.firstUnfinished();
-    return result(subexecutor.onMerge(c, current, oldWindows, newWindow), subexecutor);
+    return result(subexecutor.onMerge(c, current, e), subexecutor);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
index edeaad462a70a..6f9f5ebba4054 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
@@ -19,8 +19,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
 import com.google.common.base.Preconditions;
 
-import org.joda.time.Instant;
-
 import java.util.Arrays;
 import java.util.List;
 
@@ -49,15 +47,13 @@ public static <W extends BoundedWindow> AtMostOnceTrigger<W> of(
   }
 
   @Override
-  public TriggerResult onElement(
-      TriggerContext<W> c, Object value, Instant timestamp, W window, WindowStatus status)
-      throws Exception {
+  public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
     // If all the sub-triggers have finished, we should have already finished, so we know there is
     // at least one unfinished trigger.
 
-    SubTriggerExecutor subStates = subExecutor(c, window);
+    SubTriggerExecutor subStates = subExecutor(c, e.window());
     for (int i : subStates.getUnfinishedTriggers()) {
-      if (subStates.onElement(c, i, value, timestamp, window, status).isFire()) {
+      if (subStates.onElement(c, i, e).isFire()) {
         return TriggerResult.FIRE_AND_FINISH;
       }
     }
@@ -66,15 +62,14 @@ public TriggerResult onElement(
   }
 
   @Override
-  public TriggerResult onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow)
-      throws Exception {
-    SubTriggerExecutor subStates = subExecutor(c, oldWindows, newWindow);
+  public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+    SubTriggerExecutor subStates = subExecutor(c, e);
     if (subStates.allFinished()) {
       return TriggerResult.FINISH;
     }
 
     for (int i : subStates.getUnfinishedTriggers()) {
-      if (subStates.onMerge(c, i, oldWindows, newWindow).isFire()) {
+      if (subStates.onMerge(c, i, e).isFire()) {
         return TriggerResult.FIRE_AND_FINISH;
       }
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index 6ff8ca4a84af9..21f961862e169 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -20,8 +20,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 
-import org.joda.time.Instant;
-
 import java.util.Map.Entry;
 
 /**
@@ -51,10 +49,8 @@ public static <W extends BoundedWindow> AfterPane<W> elementCountAtLeast(int cou
   }
 
   @Override
-  public TriggerResult onElement(
-      TriggerContext<W> c, Object value, Instant timestamp, W window, WindowStatus status)
-      throws Exception {
-    Integer count = c.lookup(ELEMENTS_IN_PANE_TAG, window);
+  public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
+    Integer count = c.lookup(ELEMENTS_IN_PANE_TAG, e.window());
     if (count == null) {
       count = 0;
     }
@@ -64,15 +60,14 @@ public TriggerResult onElement(
       return TriggerResult.FIRE_AND_FINISH;
     }
 
-    c.store(ELEMENTS_IN_PANE_TAG, window, count);
+    c.store(ELEMENTS_IN_PANE_TAG, e.window(), count);
     return TriggerResult.CONTINUE;
   }
 
   @Override
-  public TriggerResult onMerge(
-      TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) throws Exception {
+  public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
     int count = 0;
-    for (Entry<W, Integer> old : c.lookup(ELEMENTS_IN_PANE_TAG, oldWindows).entrySet()) {
+    for (Entry<W, Integer> old : c.lookup(ELEMENTS_IN_PANE_TAG, e.oldWindows()).entrySet()) {
       if (old.getValue() != null) {
         count += old.getValue();
         c.remove(ELEMENTS_IN_PANE_TAG, old.getKey());
@@ -83,12 +78,12 @@ public TriggerResult onMerge(
     if (count >= countElems) {
       return TriggerResult.FIRE_AND_FINISH;
     }
-    c.store(ELEMENTS_IN_PANE_TAG, newWindow, count);
+    c.store(ELEMENTS_IN_PANE_TAG, e.newWindow(), count);
     return TriggerResult.CONTINUE;
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext<W> c, TriggerId<W> triggerId) {
+  public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) {
     return TriggerResult.CONTINUE;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 2eec8645463e1..1173066a0d874 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -57,40 +57,38 @@ protected AfterProcessingTime<W> newWith(
   }
 
   @Override
-  public TriggerResult onElement(
-      TriggerContext<W> c, Object value, Instant timestamp, W window, WindowStatus status)
+  public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e)
       throws Exception {
-    Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, window);
+    Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, e.window());
     if (delayUntil == null) {
       delayUntil = computeTargetTimestamp(c.currentProcessingTime());
-      c.setTimer(window, delayUntil, TimeDomain.PROCESSING_TIME);
-      c.store(DELAYED_UNTIL_TAG, window, delayUntil);
+      c.setTimer(e.window(), delayUntil, TimeDomain.PROCESSING_TIME);
+      c.store(DELAYED_UNTIL_TAG, e.window(), delayUntil);
     }
 
     return TriggerResult.CONTINUE;
   }
 
   @Override
-  public TriggerResult onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow)
-      throws Exception {
+  public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
     // To have gotten here, we must not have fired in any of the oldWindows.
     Instant earliestTimer = BoundedWindow.TIMESTAMP_MAX_VALUE;
-    for (Instant delayedUntil : c.lookup(DELAYED_UNTIL_TAG, oldWindows).values()) {
+    for (Instant delayedUntil : c.lookup(DELAYED_UNTIL_TAG, e.oldWindows()).values()) {
       if (delayedUntil != null && delayedUntil.isBefore(earliestTimer)) {
         earliestTimer = delayedUntil;
       }
     }
 
     if (earliestTimer != null) {
-      c.store(DELAYED_UNTIL_TAG, newWindow, earliestTimer);
-      c.setTimer(newWindow, earliestTimer, TimeDomain.PROCESSING_TIME);
+      c.store(DELAYED_UNTIL_TAG, e.newWindow(), earliestTimer);
+      c.setTimer(e.newWindow(), earliestTimer, TimeDomain.PROCESSING_TIME);
     }
 
     return TriggerResult.CONTINUE;
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext<W> c, TriggerId<W> triggerId) throws Exception {
+  public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
     return TriggerResult.FIRE_AND_FINISH;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index e218f3139b60a..cad12b384a391 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -67,40 +67,37 @@ private FromFirstElementInPane(
     }
 
     @Override
-    public TriggerResult onElement(
-        TriggerContext<W> c, Object value, Instant timestamp, W window, WindowStatus status)
-        throws Exception {
-      Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, window);
+    public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
+      Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, e.window());
       if (delayUntil == null) {
-        delayUntil = computeTargetTimestamp(timestamp);
-        c.setTimer(window, delayUntil, TimeDomain.EVENT_TIME);
-        c.store(DELAYED_UNTIL_TAG, window, delayUntil);
+        delayUntil = computeTargetTimestamp(e.eventTimestamp());
+        c.setTimer(e.window(), delayUntil, TimeDomain.EVENT_TIME);
+        c.store(DELAYED_UNTIL_TAG, e.window(), delayUntil);
       }
 
       return TriggerResult.CONTINUE;
     }
 
     @Override
-    public TriggerResult onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow)
-        throws Exception {
+    public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
       // To have gotten here, we must not have fired in any of the oldWindows.
       Instant earliestTimer = BoundedWindow.TIMESTAMP_MAX_VALUE;
-      for (Instant delayedUntil : c.lookup(DELAYED_UNTIL_TAG, oldWindows).values()) {
+      for (Instant delayedUntil : c.lookup(DELAYED_UNTIL_TAG, e.oldWindows()).values()) {
         if (delayedUntil != null && delayedUntil.isBefore(earliestTimer)) {
           earliestTimer = delayedUntil;
         }
       }
 
       if (earliestTimer != null) {
-        c.store(DELAYED_UNTIL_TAG, newWindow, earliestTimer);
-        c.setTimer(newWindow, earliestTimer, TimeDomain.EVENT_TIME);
+        c.store(DELAYED_UNTIL_TAG, e.newWindow(), earliestTimer);
+        c.setTimer(e.newWindow(), earliestTimer, TimeDomain.EVENT_TIME);
       }
 
       return TriggerResult.CONTINUE;
     }
 
     @Override
-    public TriggerResult onTimer(TriggerContext<W> c, TriggerId<W> triggerId) throws Exception {
+    public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
       return TriggerResult.FIRE_AND_FINISH;
     }
 
@@ -132,26 +129,24 @@ private FromEndOfWindow(
     }
 
     @Override
-    public TriggerResult onElement(
-        TriggerContext<W> c, Object value, Instant timestamp, W window, WindowStatus status)
-            throws Exception {
-      c.setTimer(window, computeTargetTimestamp(window.maxTimestamp()), TimeDomain.EVENT_TIME);
+    public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
+      c.setTimer(e.window(),
+          computeTargetTimestamp(e.window().maxTimestamp()), TimeDomain.EVENT_TIME);
       return TriggerResult.CONTINUE;
     }
 
     @Override
-    public TriggerResult onMerge(
-        Trigger.TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) throws Exception {
-      for (W oldWindow : oldWindows) {
+    public TriggerResult onMerge(Trigger.TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+      for (W oldWindow : e.oldWindows()) {
         c.deleteTimer(oldWindow, TimeDomain.EVENT_TIME);
       }
 
-      c.setTimer(newWindow, newWindow.maxTimestamp(), TimeDomain.EVENT_TIME);
+      c.setTimer(e.newWindow(), e.newWindow().maxTimestamp(), TimeDomain.EVENT_TIME);
       return TriggerResult.CONTINUE;
     }
 
     @Override
-    public TriggerResult onTimer(TriggerContext<W> c, TriggerId<W> triggerId) throws Exception {
+    public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
       return TriggerResult.FIRE_AND_FINISH;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CompositeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CompositeTrigger.java
index ffa843aff0fe6..116bc4e246fb2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CompositeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CompositeTrigger.java
@@ -23,8 +23,6 @@
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.common.collect.ImmutableList;
 
-import org.joda.time.Instant;
-
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
@@ -109,9 +107,7 @@ private TriggerResult handleChildResult(
     }
 
     public TriggerResult onElement(
-        TriggerContext<W> compositeContext, int index, Object value,
-        Instant timestamp, W window, WindowStatus status)
-        throws Exception {
+        TriggerContext<W> compositeContext, int index, OnElementEvent<W> e) throws Exception {
       if (isFinished.get(index)) {
         return TriggerResult.FINISH;
       }
@@ -120,22 +116,22 @@ public TriggerResult onElement(
       Trigger<W> subTrigger = subTriggers.get(index);
       return handleChildResult(
           childContext, index,
-          subTrigger.onElement(childContext, value, timestamp, window, status));
+          subTrigger.onElement(childContext, e));
     }
 
     public TriggerResult onTimer(
-        TriggerContext<W> compositeContext, int index, TriggerId<W> triggerId) throws Exception {
+        TriggerContext<W> compositeContext, int index, OnTimerEvent<W> e) throws Exception {
       TriggerContext<W> childContext = compositeContext.forChild(index);
       return handleChildResult(
-          childContext, index, subTriggers.get(index).onTimer(childContext, triggerId));
+          childContext, index, subTriggers.get(index).onTimer(childContext, e));
     }
 
     public TriggerResult onMerge(
-        TriggerContext<W> compositeContext, int index, Iterable<W> oldWindows, W newWindow)
+        TriggerContext<W> compositeContext, int index, OnMergeEvent<W> e)
         throws Exception {
       TriggerContext<W> childContext = compositeContext.forChild(index);
       return handleChildResult(
-          childContext, index, subTriggers.get(index).onMerge(childContext, oldWindows, newWindow));
+          childContext, index, subTriggers.get(index).onMerge(childContext, e));
     }
 
     public void clear(TriggerContext<W> compositeContext, int index, W window)
@@ -200,21 +196,19 @@ protected SubTriggerExecutor subExecutor(TriggerContext<W> c, W window) throws I
    * in the merged window.
    *
    * @param c The context of the composite trigger
-   * @param windows the windows that are being merged
-   * @param outputWindow the window that the results should be written to
+   * @param e The on merge event that is being processed.o
    */
-  protected SubTriggerExecutor subExecutor(
-      TriggerContext<W> c, Iterable<W> windows, W outputWindow)
+  protected SubTriggerExecutor subExecutor(TriggerContext<W> c, OnMergeEvent<W> e)
       throws Exception {
     BitSet result = new BitSet(subTriggers.size());
-    Map<W, BitSet> lookup = c.lookup(SUBTRIGGERS_FINISHED_SET_TAG, windows);
+    Map<W, BitSet> lookup = c.lookup(SUBTRIGGERS_FINISHED_SET_TAG, e.oldWindows());
     for (BitSet stateInWindow : lookup.values()) {
       if (stateInWindow != null) {
         result.or(stateInWindow);
       }
     }
 
-    SubTriggerExecutor subTriggerStates = new SubTriggerExecutor(c, outputWindow, result);
+    SubTriggerExecutor subTriggerStates = new SubTriggerExecutor(c, e.newWindow(), result);
 
     // Preemptively flush this since we just constructed it from the sub-windows.
     subTriggerStates.flush();
@@ -231,22 +225,22 @@ public void clear(TriggerContext<W> c, W window) throws Exception {
   }
 
   @Override
-  public final TriggerResult onTimer(TriggerContext<W> c, TriggerId<W> triggerId) throws Exception {
-    if (!triggerId.isForChild()) {
+  public final TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+    if (e.isForCurrentLayer()) {
       // TODO: Modify the composite trigger interface to enforce this.
       throw new UnsupportedOperationException("Composite triggers should not set timers.");
     }
 
-    int childIndex = triggerId.getChildIndex();
-    SubTriggerExecutor subTriggerStates = subExecutor(c, triggerId.getWindow());
+    int childIndex = e.getChildIndex();
+    SubTriggerExecutor subTriggerStates = subExecutor(c, e.window());
     if (subTriggerStates.isFinished(childIndex)) {
       // The child was already finished, so this timer doesn't do anything. There has been no change
       // which might cause the composite to fire or change its state, so we just continue.
       return TriggerResult.CONTINUE;
     }
 
-    TriggerResult result = subTriggerStates.onTimer(c, childIndex, triggerId.forChildTrigger());
-    return afterChildTimer(c, triggerId.getWindow(), childIndex, result);
+    TriggerResult result = subTriggerStates.onTimer(c, childIndex, e.withoutOuterTrigger());
+    return afterChildTimer(c, e.window(), childIndex, result);
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
index 0c46e2bed80b8..8f8b2075b4012 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
@@ -16,8 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import org.joda.time.Instant;
-
 /**
  * A trigger that fires repeatedly when the watermark passes the end of the window.
  *
@@ -37,26 +35,23 @@ public static <W extends BoundedWindow> DefaultTrigger<W> of() {
   }
 
   @Override
-  public TriggerResult onElement(
-      TriggerContext<W> c, Object value, Instant timestamp, W window,
-      WindowStatus status) throws Exception {
-    c.setTimer(window, window.maxTimestamp(), TimeDomain.EVENT_TIME);
+  public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
+    c.setTimer(e.window(), e.window().maxTimestamp(), TimeDomain.EVENT_TIME);
     return TriggerResult.CONTINUE;
   }
 
   @Override
-  public TriggerResult onMerge(
-      TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) throws Exception {
-    for (W oldWindow : oldWindows) {
+  public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+    for (W oldWindow : e.oldWindows()) {
       c.deleteTimer(oldWindow, TimeDomain.EVENT_TIME);
     }
 
-    c.setTimer(newWindow, newWindow.maxTimestamp(), TimeDomain.EVENT_TIME);
+    c.setTimer(e.newWindow(), e.newWindow().maxTimestamp(), TimeDomain.EVENT_TIME);
     return TriggerResult.CONTINUE;
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext<W> c, TriggerId<W> triggerId) throws Exception {
+  public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
     return TriggerResult.FIRE;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
index 2f634da338c53..b565d4ca8eb29 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
@@ -16,8 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import org.joda.time.Instant;
-
 import java.util.Arrays;
 
 /**
@@ -78,21 +76,19 @@ private TriggerResult wrap(TriggerContext<W> c, W window, TriggerResult result)
   }
 
   @Override
-  public TriggerResult onElement(
-      TriggerContext<W> c, Object value, Instant timestamp, W window, WindowStatus status)
+  public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e)
       throws Exception {
-    return wrap(c, window, repeated.onElement(c, value, timestamp, window, status));
+    return wrap(c, e.window(), repeated.onElement(c, e));
   }
 
   @Override
-  public TriggerResult onMerge(TriggerContext<W> c, Iterable<W> oldWindows, W newWindow)
-      throws Exception {
-    return wrap(c, newWindow, repeated.onMerge(c, oldWindows, newWindow));
+  public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+    return wrap(c, e.newWindow(), repeated.onMerge(c, e));
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext<W> c, TriggerId<W> triggerId) throws Exception {
-    return wrap(c, triggerId.getWindow(), repeated.onTimer(c, triggerId));
+  public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+    return wrap(c, e.window(), repeated.onTimer(c, e));
   }
 
   @Override
@@ -137,31 +133,27 @@ private TriggerResult handleResult(
     }
 
     @Override
-    public TriggerResult onElement(
-        TriggerContext<W> c, Object value, Instant timestamp, W window, WindowStatus status)
-        throws Exception {
-      SubTriggerExecutor subExecutor = subExecutor(c, window);
+    public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
+      SubTriggerExecutor subExecutor = subExecutor(c, e.window());
 
       TriggerResult until = subExecutor.isFinished(1)
           ? TriggerResult.CONTINUE // if we already finished the until, treat it like Never Stop
-          : subExecutor.onElement(c, 1, value, timestamp, window, status);
-      return handleResult(c, subExecutor, window,
-          subExecutor.onElement(c, 0, value, timestamp, window, status), until);
+          : subExecutor.onElement(c, 1, e);
+      return handleResult(c, subExecutor, e.window(),
+          subExecutor.onElement(c, 0, e), until);
     }
 
     @Override
-    public TriggerResult onMerge(
-        TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) throws Exception {
-      SubTriggerExecutor subExecutor = subExecutor(c, oldWindows, newWindow);
+    public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+      SubTriggerExecutor subExecutor = subExecutor(c, e);
 
       TriggerResult until = subExecutor.isFinished(1)
           ? TriggerResult.CONTINUE // if we already finished the until, treat it like Never Stop
-          : subExecutor.onMerge(c, 1, oldWindows, newWindow);
+          : subExecutor.onMerge(c, 1, e);
 
       // Even if the merged until says fire, we should still evaluate (and maybe fire) from the
       // merging of the repeated trigger.
-      return handleResult(c, subExecutor, newWindow,
-          subExecutor.onMerge(c, 0, oldWindows, newWindow), until);
+      return handleResult(c, subExecutor, e.newWindow(), subExecutor.onMerge(c, 0, e), until);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index a7d478099fc05..c15a6568abeb9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -221,17 +221,91 @@ public interface TriggerContext<W extends BoundedWindow>  {
     TriggerContext<W> forChild(int childIndex);
   }
 
+  /**
+   * Details about an invocation of {@link Trigger#onElement}.
+   *
+   * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
+   *            {@code OnElementEvent}
+   */
+  public static class OnElementEvent<W> {
+    private Object value;
+    private Instant timestamp;
+    private W window;
+    private WindowStatus status;
+
+    public OnElementEvent(Object value, Instant timestamp, W window, WindowStatus status) {
+      this.value = value;
+      this.timestamp = timestamp;
+      this.window = window;
+      this.status = status;
+    }
+
+    /**
+     * The element being handled by this call to {@link Trigger#onElement}.
+     */
+    public Object element() {
+      return value;
+    }
+
+    /**
+     * The event timestamp of the element being processed.
+     */
+    public Instant eventTimestamp() {
+      return timestamp;
+    }
+
+    /**
+     * The window into which the element was assigned.
+     */
+    public W window() {
+      return window;
+    }
+
+    /**
+     * The status of the window to which the element was assigned.
+     */
+    public WindowStatus windowStatus() {
+      return status;
+    }
+  }
+
   /**
    * Called immediately after an element is first incorporated into a window.
    *
    * @param c the context to interact with
-   * @param value the element that was incorporated
-   * @param timestamp the event time that the element arrived at
-   * @param window the window the element was assigned to
+   * @param e an event describing the cause of this callback being executed
    */
-  TriggerResult onElement(
-      TriggerContext<W> c, Object value, Instant timestamp, W
-      window, WindowStatus status) throws Exception;
+  TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception;
+
+  /**
+   * Details about an invocation of {@link Trigger#onMerge}.
+   *
+   * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
+   *            {@code OnMergeEvent}
+   */
+  public static class OnMergeEvent<W> {
+    private Iterable<W> oldWindows;
+    private W newWindow;
+
+    public OnMergeEvent(Iterable<W> oldWindows, W newWindow) {
+      this.oldWindows = oldWindows;
+      this.newWindow = newWindow;
+    }
+
+    /**
+     * The old windows which were merged.
+     */
+    public Iterable<W> oldWindows() {
+      return oldWindows;
+    }
+
+    /**
+     * The new window produced by merging the {@link #oldWindows()}.
+     */
+    public W newWindow() {
+      return newWindow;
+    }
+  }
 
   /**
    * Called immediately after windows have been merged.
@@ -243,11 +317,54 @@ TriggerResult onElement(
    * That will automatically be done by the trigger execution layer.
    *
    * @param c the context to interact with
-   * @param oldWindows the windows that were merged
-   * @param newWindow the window that resulted from merging
+   * @param e an event describnig the cause of this callback being executed
    */
-  TriggerResult onMerge(
-      TriggerContext<W> c, Iterable<W> oldWindows, W newWindow) throws Exception;
+  TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception;
+
+  /**
+   * Details about an invocation of {@link Trigger#onTimer}.
+   *
+   * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
+   *            {@code OnTimerEvent}
+   */
+  public static class OnTimerEvent<W extends BoundedWindow> {
+
+    private TriggerId<W> triggerId;
+
+    public OnTimerEvent(TriggerId<W> triggerId) {
+      this.triggerId = triggerId;
+    }
+
+    public W window() {
+      return triggerId.window;
+    }
+
+    public TriggerId<W> triggerId() {
+      return triggerId;
+    }
+
+    /**
+     * Remove the outer layer from the path to the desired timer. This produces an
+     * {@code OnTimerEvent} suitable for passing to a subtrigger.
+     */
+    public OnTimerEvent<W> withoutOuterTrigger() {
+      return new OnTimerEvent<W>(triggerId.withoutOuterTrigger());
+    }
+
+    /**
+     * Return the index of the child this trigger ID is for.
+     */
+    public int getChildIndex() {
+      return triggerId.getPath().iterator().next();
+    }
+
+    /**
+     * Return true if the timer event is for the current layer.
+     */
+    public boolean isForCurrentLayer() {
+      return !triggerId.getPath().iterator().hasNext();
+    }
+  }
 
   /**
    * Called when a timer has fired for the trigger or one of it’s sub-triggers.
@@ -255,8 +372,7 @@ TriggerResult onMerge(
    * @param c the context to interact with
    * @param triggerId identifier for the trigger that the timer is for.
    */
-  TriggerResult onTimer(
-      TriggerContext<W> c, TriggerId<W> triggerId) throws Exception;
+  TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> triggerId) throws Exception;
 
   /**
    * Clear any state associated with this trigger in the given window.
@@ -285,7 +401,7 @@ TriggerResult onTimer(
    * tree.
    *
    * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
-   *            {@code TriggerContext}
+   *            {@code TriggerId}
    */
   public static class TriggerId<W extends BoundedWindow> {
     private final W window;
@@ -299,28 +415,14 @@ public TriggerId(W window, List<Integer> subTriggers) {
     /**
      * Return a trigger ID that is applicable for the sub-trigger.
      */
-    public TriggerId<W> forChildTrigger() {
+    public TriggerId<W> withoutOuterTrigger() {
       return new TriggerId<>(window, subTriggers.subList(1, subTriggers.size()));
     }
 
-    public W getWindow() {
+    public W window() {
       return window;
     }
 
-    /**
-     * Return true if this trigger ID corresponds to a child of the current trigger.
-     */
-    public boolean isForChild() {
-      return subTriggers.size() > 0;
-    }
-
-    /**
-     * Return the index of the child this trigger ID is for.
-     */
-    public int getChildIndex() {
-      return subTriggers.get(0);
-    }
-
     public Iterable<Integer> getPath() {
       return subTriggers;
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 543e01d1c96c0..913e6bf1a4429 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -25,6 +25,9 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PartitioningWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
@@ -175,7 +178,7 @@ public void onElement(WindowedValue<VI> value) throws Exception {
 
       handleResult(trigger, window,
           trigger.onElement(triggerContext,
-              value.getValue(), value.getTimestamp(), window, status));
+              new OnElementEvent<W>(value.getValue(), value.getTimestamp(), window, status)));
 
       if (WindowStatus.NEW.equals(status)) {
         // Attempt to merge windows before continuing
@@ -186,7 +189,7 @@ public void onElement(WindowedValue<VI> value) throws Exception {
 
   public void onTimer(String timerTag) throws Exception {
     TriggerId<W> triggerId = CoderUtils.decodeFromBase64(triggerIdCoder, timerTag);
-    W window = triggerId.getWindow();
+    W window = triggerId.window();
 
     // If we receive a timer for an already finished trigger tree, we can ignore it. Once the
     // trigger is finished, it has reached a terminal state, and the trigger shouldn't be allowed
@@ -203,7 +206,8 @@ public void onTimer(String timerTag) throws Exception {
     // merge windows in a way that causes the timer to no longer be applicable. Otherwise, we
     // confirm that the window is still in the windowSet.
     if ((windowFn instanceof PartitioningWindowFn) || windowSet.contains(window)) {
-      handleResult(trigger, window, trigger.onTimer(triggerContext, triggerId));
+      handleResult(trigger, window,
+          trigger.onTimer(triggerContext, new OnTimerEvent<W>(triggerId)));
     }
   }
 
@@ -218,7 +222,7 @@ private void onMerge(Collection<W> toBeMerged, W mergeResult) throws Exception {
       // If the root wasn't finished in any of the windows, then call the underlying merge and
       // handle the result appropriately.
       handleResult(trigger, mergeResult,
-          trigger.onMerge(triggerContext, toBeMerged, mergeResult));
+          trigger.onMerge(triggerContext, new OnMergeEvent<W>(toBeMerged, mergeResult)));
     } else {
       // Otherwise, act like we were just told to finish in the resulting window.
       handleResult(trigger, mergeResult, TriggerResult.FINISH);
@@ -389,7 +393,7 @@ public TriggerIdCoder(Coder<W> windowCoder) {
     @Override
     public void encode(TriggerId<W> triggerId, OutputStream outStream, Context context)
         throws CoderException, IOException {
-      windowCoder.encode(triggerId.getWindow(), outStream, context);
+      windowCoder.encode(triggerId.window(), outStream, context);
       pathCoder.encode(triggerId.getPath(), outStream, context);
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index 63c64486ef5f6..65e43655bf178 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -22,13 +22,13 @@
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.common.collect.ImmutableList;
 
@@ -67,16 +67,12 @@ private void injectElement(int element, TriggerResult result1, TriggerResult res
       throws Exception {
     if (result1 != null) {
       when(mockTrigger1.onElement(
-          isTriggerContext(), Mockito.eq(element),
-          Mockito.any(Instant.class), Mockito.any(IntervalWindow.class),
-          Mockito.any(WindowStatus.class)))
+          isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
           .thenReturn(result1);
     }
     if (result2 != null) {
       when(mockTrigger2.onElement(
-          isTriggerContext(), Mockito.eq(element),
-          Mockito.any(Instant.class), Mockito.any(IntervalWindow.class),
-          Mockito.any(WindowStatus.class)))
+          isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
           .thenReturn(result2);
     }
     tester.injectElement(element, new Instant(element));
@@ -151,7 +147,7 @@ public void testOnTimerFire() throws Exception {
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
 
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(0));
-    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.isA(TriggerId.class)))
+    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
     tester.advanceWatermark(new Instant(12));
 
@@ -169,7 +165,7 @@ public void testOnTimerFinish() throws Exception {
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(1));
-    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.isA(TriggerId.class)))
+    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FINISH);
 
     tester.advanceWatermark(new Instant(12));
@@ -186,12 +182,8 @@ public void testOnMergeFires() throws Exception {
     injectElement(12, TriggerResult.FIRE_AND_FINISH, TriggerResult.CONTINUE);
 
     when(mockTrigger2.onMerge(
-        isTriggerContext(),
-        Mockito.argThat(WindowMatchers.ofWindows(
-            WindowMatchers.intervalWindow(1, 11),
-            WindowMatchers.intervalWindow(12, 22),
-            WindowMatchers.intervalWindow(5, 15))),
-        Mockito.isA(IntervalWindow.class))).thenReturn(TriggerResult.FIRE_AND_FINISH);
+        isTriggerContext(), Mockito.<OnMergeEvent<IntervalWindow>>any()))
+        .thenReturn(TriggerResult.FIRE_AND_FINISH);
 
     // The arrival of this element should trigger merging.
     injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
@@ -205,7 +197,6 @@ public void testOnMergeFires() throws Exception {
     verify(mockTrigger1, Mockito.never())
         .onMerge(
             Mockito.<TriggerContext<IntervalWindow>>any(),
-            Mockito.<Iterable<IntervalWindow>>any(),
-            Mockito.<IntervalWindow>any());
+            Mockito.<OnMergeEvent<IntervalWindow>>any());
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index 80e4303d0fc4a..227bc76d33e81 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -23,12 +23,12 @@
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.WindowMatchers;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.common.collect.ImmutableList;
 
@@ -68,16 +68,12 @@ private void injectElement(int element, TriggerResult result1, TriggerResult res
       throws Exception {
     if (result1 != null) {
       when(mockTrigger1.onElement(
-          isTriggerContext(), Mockito.eq(element),
-          Mockito.any(Instant.class), Mockito.any(IntervalWindow.class),
-          Mockito.any(WindowStatus.class)))
+          isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
           .thenReturn(result1);
     }
     if (result2 != null) {
       when(mockTrigger2.onElement(
-          isTriggerContext(), Mockito.eq(element),
-          Mockito.any(Instant.class), Mockito.any(IntervalWindow.class),
-          Mockito.any(WindowStatus.class)))
+          isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
           .thenReturn(result2);
     }
     tester.injectElement(element, new Instant(element));
@@ -152,7 +148,7 @@ public void testOnTimerFire() throws Exception {
     injectElement(1, TriggerResult.CONTINUE, null);
 
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(0));
-    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.isA(TriggerId.class)))
+    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.isA(OnTimerEvent.class)))
         .thenReturn(TriggerResult.FIRE);
     tester.advanceWatermark(new Instant(12));
 
@@ -170,7 +166,7 @@ public void testOnTimerFinish() throws Exception {
     injectElement(1, TriggerResult.CONTINUE, null);
 
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(0));
-    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.isA(TriggerId.class)))
+    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.isA(OnTimerEvent.class)))
         .thenReturn(TriggerResult.FINISH);
 
     tester.advanceWatermark(new Instant(12));
@@ -192,11 +188,7 @@ public void testOnMergeFires() throws Exception {
 
     when(mockTrigger2.onMerge(
         isTriggerContext(),
-        Mockito.argThat(WindowMatchers.ofWindows(
-            WindowMatchers.intervalWindow(1, 11),
-            WindowMatchers.intervalWindow(12, 22),
-            WindowMatchers.intervalWindow(5, 15))),
-        Mockito.isA(IntervalWindow.class))).thenReturn(TriggerResult.FIRE_AND_FINISH);
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.FIRE_AND_FINISH);
 
     // The arrival of this element should trigger merging.
     injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
@@ -210,7 +202,6 @@ public void testOnMergeFires() throws Exception {
     verify(mockTrigger1, Mockito.never())
         .onMerge(
             Mockito.<TriggerContext<IntervalWindow>>any(),
-            Mockito.<Iterable<IntervalWindow>>any(),
-            Mockito.<IntervalWindow>any());
+            Mockito.<OnMergeEvent<IntervalWindow>>any());
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index bf6dfef96f49e..1b9cf6ed156be 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -22,13 +22,13 @@
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.common.collect.ImmutableList;
 
@@ -68,16 +68,12 @@ private void injectElement(int element, TriggerResult result1, TriggerResult res
       throws Exception {
     if (result1 != null) {
       when(mockTrigger1.onElement(
-          isTriggerContext(), Mockito.eq(element),
-          Mockito.any(Instant.class), Mockito.any(IntervalWindow.class),
-          Mockito.any(WindowStatus.class)))
+          isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
           .thenReturn(result1);
     }
     if (result2 != null) {
       when(mockTrigger2.onElement(
-          isTriggerContext(), Mockito.eq(element),
-          Mockito.any(Instant.class), Mockito.any(IntervalWindow.class),
-          Mockito.any(WindowStatus.class)))
+          isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
           .thenReturn(result2);
     }
     tester.injectElement(element, new Instant(element));
@@ -155,7 +151,7 @@ public void testOnTimerFire() throws Exception {
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(0));
-    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.isA(TriggerId.class)))
+    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE);
     tester.advanceWatermark(new Instant(12));
 
@@ -173,7 +169,7 @@ public void testOnTimerFinish() throws Exception {
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(1));
-    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.isA(TriggerId.class)))
+    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FINISH);
 
     tester.advanceWatermark(new Instant(12));
@@ -195,11 +191,7 @@ public void testOnMergeFires() throws Exception {
 
     when(mockTrigger2.onMerge(
         isTriggerContext(),
-        Mockito.argThat(WindowMatchers.ofWindows(
-            WindowMatchers.intervalWindow(1, 11),
-            WindowMatchers.intervalWindow(12, 22),
-            WindowMatchers.intervalWindow(5, 15))),
-        Mockito.isA(IntervalWindow.class))).thenReturn(TriggerResult.FIRE);
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.FIRE);
 
     // The arrival of this element should trigger merging.
     injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
@@ -213,7 +205,6 @@ public void testOnMergeFires() throws Exception {
     verify(mockTrigger1, Mockito.never())
         .onMerge(
             Mockito.<TriggerContext<IntervalWindow>>any(),
-            Mockito.<Iterable<IntervalWindow>>any(),
-            Mockito.<IntervalWindow>any());
+            Mockito.<OnMergeEvent<IntervalWindow>>any());
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index ee971c563ec58..27cb3546f767b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -22,13 +22,13 @@
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.common.collect.ImmutableList;
 
@@ -71,16 +71,12 @@ private void injectElement(int element, TriggerResult result1, TriggerResult res
       throws Exception {
     if (result1 != null) {
       when(mockTrigger1.onElement(
-          isTriggerContext(), Mockito.eq(element),
-          Mockito.any(Instant.class), Mockito.any(IntervalWindow.class),
-          Mockito.any(WindowStatus.class)))
+          isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
           .thenReturn(result1);
     }
     if (result2 != null) {
       when(mockTrigger2.onElement(
-          isTriggerContext(), Mockito.eq(element),
-          Mockito.any(Instant.class), Mockito.any(IntervalWindow.class),
-          Mockito.any(WindowStatus.class)))
+          isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
           .thenReturn(result2);
     }
     tester.injectElement(element, new Instant(element));
@@ -144,28 +140,28 @@ public void testOnElementTimerFiresWithoutUntil() throws Exception {
     injectElement(1, TriggerResult.CONTINUE, null);
 
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(0));
-    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<TriggerId<IntervalWindow>>any()))
+    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE);
     tester.advanceWatermark(new Instant(12));
 
     injectElement(2, TriggerResult.CONTINUE, null);
 
     tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, ImmutableList.of(0));
-    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<TriggerId<IntervalWindow>>any()))
+    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
     tester.advanceWatermark(new Instant(13));
 
     injectElement(3, TriggerResult.CONTINUE, null);
 
     tester.setTimer(firstWindow, new Instant(13), TimeDomain.EVENT_TIME, ImmutableList.of(0));
-    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<TriggerId<IntervalWindow>>any()))
+    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FINISH);
     tester.advanceWatermark(new Instant(14));
 
     injectElement(4, TriggerResult.CONTINUE, null);
 
     tester.setTimer(firstWindow, new Instant(14), TimeDomain.EVENT_TIME, ImmutableList.of(0));
-    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<TriggerId<IntervalWindow>>any()))
+    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE);
     tester.advanceWatermark(new Instant(15));
 
@@ -185,7 +181,7 @@ public void testOnElementTimerFiresWithUntil() throws Exception {
 
     // Timer fires for until, which says continue
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(1));
-    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<TriggerId<IntervalWindow>>any()))
+    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.CONTINUE);
     tester.advanceWatermark(new Instant(12));
 
@@ -193,7 +189,7 @@ public void testOnElementTimerFiresWithUntil() throws Exception {
 
     // Timer fires for until, which says fire, so we stop repeating.
     tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, ImmutableList.of(1));
-    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<TriggerId<IntervalWindow>>any()))
+    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE);
     tester.advanceWatermark(new Instant(13));
 
@@ -212,7 +208,7 @@ public void testOnElementTimerFinishesUntil() throws Exception {
 
     // Timer fires for until, which says continue
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(1));
-    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<TriggerId<IntervalWindow>>any()))
+    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FINISH);
     tester.advanceWatermark(new Instant(12));
 
@@ -220,20 +216,20 @@ public void testOnElementTimerFinishesUntil() throws Exception {
 
     // Timer fires for until, which says finish, so we stop paying attention to it.
     tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, ImmutableList.of(1));
-    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<TriggerId<IntervalWindow>>any()))
+    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE);
     tester.advanceWatermark(new Instant(13));
 
     // This timer for the until shouldn't do anything
     tester.setTimer(firstWindow, new Instant(13), TimeDomain.EVENT_TIME, ImmutableList.of(1));
-    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<TriggerId<IntervalWindow>>any()))
+    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FINISH);
     tester.advanceWatermark(new Instant(14));
 
     // But we should be able to fire trigger 1 still
     injectElement(3, TriggerResult.CONTINUE, null);
     tester.setTimer(firstWindow, new Instant(14), TimeDomain.EVENT_TIME, ImmutableList.of(0));
-    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<TriggerId<IntervalWindow>>any()))
+    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE);
     tester.advanceWatermark(new Instant(15));
 
@@ -255,11 +251,7 @@ public void testMergeWithoutUntil() throws Exception {
 
     when(mockTrigger1.onMerge(
         isTriggerContext(),
-        Mockito.argThat(WindowMatchers.ofWindows(
-            WindowMatchers.intervalWindow(1, 11),
-            WindowMatchers.intervalWindow(12, 22),
-            WindowMatchers.intervalWindow(5, 15))),
-            Mockito.isA(IntervalWindow.class))).thenReturn(TriggerResult.FIRE_AND_FINISH);
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.FIRE_AND_FINISH);
 
     // The arrival of this element should trigger merging.
     injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
@@ -279,19 +271,11 @@ public void testMergeUntilFires() throws Exception {
 
     when(mockTrigger1.onMerge(
         isTriggerContext(),
-        Mockito.argThat(WindowMatchers.ofWindows(
-            WindowMatchers.intervalWindow(1, 11),
-            WindowMatchers.intervalWindow(12, 22),
-            WindowMatchers.intervalWindow(5, 15))),
-            Mockito.isA(IntervalWindow.class))).thenReturn(TriggerResult.FIRE_AND_FINISH);
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.FIRE_AND_FINISH);
 
     when(mockTrigger2.onMerge(
         isTriggerContext(),
-        Mockito.argThat(WindowMatchers.ofWindows(
-            WindowMatchers.intervalWindow(1, 11),
-            WindowMatchers.intervalWindow(12, 22),
-            WindowMatchers.intervalWindow(5, 15))),
-            Mockito.isA(IntervalWindow.class))).thenReturn(TriggerResult.FIRE);
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.FIRE);
 
     // The arrival of this element should trigger merging.
     injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
@@ -316,11 +300,7 @@ public void testMergeRepeatUntilFinished() throws Exception {
 
     when(mockTrigger1.onMerge(
         isTriggerContext(),
-        Mockito.argThat(WindowMatchers.ofWindows(
-            WindowMatchers.intervalWindow(1, 11),
-            WindowMatchers.intervalWindow(12, 22),
-            WindowMatchers.intervalWindow(5, 15))),
-            Mockito.isA(IntervalWindow.class))).thenReturn(TriggerResult.FIRE_AND_FINISH);
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.FIRE_AND_FINISH);
 
     // The arrival of this element should trigger merging.
     injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);

From c842dba6fc2d5a529d90e984cd33d76dcc11cd1f Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Mon, 6 Apr 2015 15:29:21 -0700
Subject: [PATCH 0386/1541] Rename XMLSource to XmlSource and XmlFileBasedSink
 to XmlSink.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90454241
---
 .../dataflow/sdk/io/FileBasedSource.java      |  2 +-
 .../{XmlFileBasedSink.java => XmlSink.java}   | 24 ++---
 .../sdk/io/{XMLSource.java => XmlSource.java} | 50 +++++-----
 ...ileBasedSinkTest.java => XmlSinkTest.java} | 36 +++----
 ...{XMLSourceTest.java => XmlSourceTest.java} | 94 +++++++++----------
 5 files changed, 103 insertions(+), 103 deletions(-)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/io/{XmlFileBasedSink.java => XmlSink.java} (92%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/io/{XMLSource.java => XmlSource.java} (94%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/io/{XmlFileBasedSinkTest.java => XmlSinkTest.java} (85%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/io/{XMLSourceTest.java => XmlSourceTest.java} (93%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index 3b7d91feb5361..39d1c68c5d7b9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -50,7 +50,7 @@
  * <p>In addition to the methods left abstract from {@code Source}, subclasses must implement
  * methods to create a sub-source and a reader for a range of a single file -
  * {@link #createForSubrangeOfFile} and {@link #createSingleFileReader}. Please refer to
- * {@link XMLSource} for an example implementation of {@code FilebasedSource}.
+ * {@link XmlSource} for an example implementation of {@code FilebasedSource}.
  *
  * @param <T> Type of records represented by the source.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
similarity index 92%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSink.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
index b14c22ffafd00..746e59f57eefb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
@@ -50,7 +50,7 @@
  *
  * <pre>
  * p.apply(Write.to(
- *      XmlFileBasedSink.ofRecordClass(Type.class)
+ *      XmlSink.ofRecordClass(Type.class)
  *          .withRootElementName(root_element)
  *          .toFilenamePrefix(output_filename)));
  * </pre>
@@ -95,7 +95,7 @@
  * <p>
  * <pre>
  * p.apply(Write.to(
- *  XmlFileBasedSink.ofRecordClass(WordFrequency.class)
+ *  XmlSink.ofRecordClass(WordFrequency.class)
  *      .withRootElement("words")
  *      .toFilenamePrefix(output_file)));
  * </pre>
@@ -131,11 +131,11 @@
  * }
  * </pre>
  */
-public class XmlFileBasedSink {
+public class XmlSink {
   protected static final String XML_EXTENSION = "xml";
 
   /**
-   * Returns a builder for an XmlFileBasedSink. You'll need to configure the class to bind, the root
+   * Returns a builder for an XmlSink. You'll need to configure the class to bind, the root
    * element name, and the output file prefix with {@link Bound#ofRecordClass}, {@link
    * Bound#withRootElement}, and {@link Bound#toFilenamePrefix}, respectively.
    */
@@ -144,7 +144,7 @@ public static Bound<?> write() {
   }
 
   /**
-   * Returns an XmlFileBasedSink that writes objects as XML entities.
+   * Returns an XmlSink that writes objects as XML entities.
    *
    * <p>Output files will have the name {@literal {baseOutputFilename}-0000i-of-0000n.xml} where n
    * is the number of output bundles that the Dataflow service divides the output into.
@@ -174,7 +174,7 @@ private Bound(Class<T> classToBind, String rootElementName, String baseOutputFil
     }
 
     /**
-     * Returns an XmlFileBasedSink that writes objects of the class specified as XML elements.
+     * Returns an XmlSink that writes objects of the class specified as XML elements.
      *
      * <p>The specified class must be able to be used to create a JAXB context.
      */
@@ -183,7 +183,7 @@ public <T> Bound<T> ofRecordClass(Class<T> classToBind) {
     }
 
     /**
-     * Returns an XmlFileBasedSink that writes to files with the given prefix.
+     * Returns an XmlSink that writes to files with the given prefix.
      *
      * <p>Output files will have the name {@literal {filenamePrefix}-0000i-of-0000n.xml} where n is
      * the number of output bundles that the Dataflow service divides the output into.
@@ -193,7 +193,7 @@ public Bound<T> toFilenamePrefix(String baseOutputFilename) {
     }
 
     /**
-     * Returns an XmlFileBasedSink that writes XML files with an enclosing root element of the
+     * Returns an XmlSink that writes XML files with an enclosing root element of the
      * supplied name.
      */
     public Bound<T> withRootElement(String rootElementName) {
@@ -231,7 +231,7 @@ public XmlWriteOperation<T> createWriteOperation(PipelineOptions options) {
   protected static final class XmlWriteOperation<T> extends FileBasedWriteOperation<T> {
     private static final long serialVersionUID = 0;
 
-    public XmlWriteOperation(XmlFileBasedSink.Bound<T> sink) {
+    public XmlWriteOperation(XmlSink.Bound<T> sink) {
       super(sink);
     }
 
@@ -251,11 +251,11 @@ public XmlWriter<T> createWriter(PipelineOptions options) throws Exception {
     }
 
     /**
-     * Return the XmlFileBasedSink.Bound for this write operation.
+     * Return the XmlSink.Bound for this write operation.
      */
     @Override
-    public XmlFileBasedSink.Bound<T> getSink() {
-      return (XmlFileBasedSink.Bound<T>) super.getSink();
+    public XmlSink.Bound<T> getSink() {
+      return (XmlSink.Bound<T>) super.getSink();
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XMLSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
similarity index 94%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XMLSource.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
index 0ee596d7ad734..e09a6e1f55801 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XMLSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
@@ -50,7 +50,7 @@
 /**
  * A source that can be used to read XML files. This source reads one or more
  * XML files and creates a {@code PCollection} of a given type. An Dataflow read transform can be
- * created by passing an {@code XMLSource} object to {@code ReadSource.from()}. Please note the
+ * created by passing an {@code XmlSource} object to {@code ReadSource.from()}. Please note the
  * example given below.
  *
  * <p> The XML file must be of the following form where root and record are XML element names that
@@ -75,12 +75,12 @@
  * Java type that can be used convert records into Java objects and vice versa using JAXB
  * marshalling/unmarshalling mechanisms. Reading the source will generate a {@code PCollection} of
  * the given JAXB annotated Java type. Optionally users may provide a minimum size of a bundle that
- * should be created for the source. An example Dataflow read transformation that uses XMLSource is
+ * should be created for the source. An example Dataflow read transformation that uses XmlSource is
  * given below.
  *
  * <pre>
  * {@code
- * XMLSource<String> source = XMLSource.<String>from(file.toPath().toString())
+ * XmlSource<String> source = XmlSource.<String>from(file.toPath().toString())
  * .withRootElement("root").withRecordElement("record")
  * .withRecordClass(Record.class).withMinBundleSize(128);
  * PCollection<String> output = p.apply(ReadSource.from(source);
@@ -90,7 +90,7 @@
  * <p> Currently only XML files that use character encoding UTF-8 are supported. Using a file that
  * has a different character encoding may result in loss of data.
  *
- * <p> To use {@code XMLSource}, explicitly declare dependencies on following two jars from WoodStax
+ * <p> To use {@code XmlSource}, explicitly declare dependencies on following two jars from WoodStax
  * StAX XML parser.
  * (1) stax2-api-3.1.1.jar
  * (2) woodstox-core-asl-4.1.2.jar
@@ -100,7 +100,7 @@
  * @param <T> Type of the objects that represent the records of the XML file. The
  *        {@code PCollection} generated by this source will be of this type.
  */
-public class XMLSource<T> extends FileBasedSource<T> {
+public class XmlSource<T> extends FileBasedSource<T> {
   static final long serialVersionUID = 0L;
 
   private static final String XML_VERSION = "1.1";
@@ -110,11 +110,11 @@ public class XMLSource<T> extends FileBasedSource<T> {
   private final Class<T> recordClass;
 
   /**
-   * Creates an XMLSource for a single XML file or a set of XML files defined by a Java "glob" file
-   * pattern. Each XML file should be of the form defined in {@link XMLSource}.
+   * Creates an XmlSource for a single XML file or a set of XML files defined by a Java "glob" file
+   * pattern. Each XML file should be of the form defined in {@link XmlSource}.
    */
-  public static <T> XMLSource<T> from(String fileOrPatternSpec) {
-    return new XMLSource<>(fileOrPatternSpec, DEFAULT_MIN_BUNDLE_SIZE, null, null, null);
+  public static <T> XmlSource<T> from(String fileOrPatternSpec) {
+    return new XmlSource<>(fileOrPatternSpec, DEFAULT_MIN_BUNDLE_SIZE, null, null, null);
   }
 
   /**
@@ -122,8 +122,8 @@ public static <T> XMLSource<T> from(String fileOrPatternSpec) {
    * root element when initiating a bundle of records created from an XML document. This is a
    * required parameter.
    */
-  public XMLSource<T> withRootElement(String rootElement) {
-    return new XMLSource<>(
+  public XmlSource<T> withRootElement(String rootElement) {
+    return new XmlSource<>(
         getFileOrPatternSpec(), getMinBundleSize(), rootElement, recordElement, recordClass);
   }
 
@@ -131,8 +131,8 @@ public XMLSource<T> withRootElement(String rootElement) {
    * Sets name of the record element of the XML document. This will be used to determine offset of
    * the first record of a bundle created from the XML document. This is a required parameter.
    */
-  public XMLSource<T> withRecordElement(String recordElement) {
-    return new XMLSource<>(
+  public XmlSource<T> withRecordElement(String recordElement) {
+    return new XmlSource<>(
         getFileOrPatternSpec(), getMinBundleSize(), rootElement, recordElement, recordClass);
   }
 
@@ -141,8 +141,8 @@ public XMLSource<T> withRecordElement(String recordElement) {
    * will be used when unmarshalling record objects from the XML file.  This is a required
    * parameter.
    */
-  public XMLSource<T> withRecordClass(Class<T> recordClass) {
-    return new XMLSource<>(
+  public XmlSource<T> withRecordClass(Class<T> recordClass) {
+    return new XmlSource<>(
         getFileOrPatternSpec(), getMinBundleSize(), rootElement, recordElement, recordClass);
   }
 
@@ -151,12 +151,12 @@ public XMLSource<T> withRecordClass(Class<T> recordClass) {
    * to {@link ByteOffsetBasedSource} for the definition of minBundleSize.  This is an optional
    * parameter.
    */
-  public XMLSource<T> withMinBundleSize(long minBundleSize) {
-    return new XMLSource<>(
+  public XmlSource<T> withMinBundleSize(long minBundleSize) {
+    return new XmlSource<>(
         getFileOrPatternSpec(), minBundleSize, rootElement, recordElement, recordClass);
   }
 
-  private XMLSource(String fileOrPattern, long minBundleSize, String rootElement,
+  private XmlSource(String fileOrPattern, long minBundleSize, String rootElement,
       String recordElement, Class<T> recordClass) {
     super(fileOrPattern, minBundleSize);
     this.rootElement = rootElement;
@@ -164,7 +164,7 @@ private XMLSource(String fileOrPattern, long minBundleSize, String rootElement,
     this.recordClass = recordClass;
   }
 
-  private XMLSource(String fileOrPattern, long minBundleSize, long startOffset, long endOffset,
+  private XmlSource(String fileOrPattern, long minBundleSize, long startOffset, long endOffset,
       String rootElement, String recordElement, Class<T> recordClass) {
     super(fileOrPattern, minBundleSize, startOffset, endOffset);
     this.rootElement = rootElement;
@@ -174,7 +174,7 @@ private XMLSource(String fileOrPattern, long minBundleSize, long startOffset, lo
 
   @Override
   public FileBasedSource<T> createForSubrangeOfFile(String fileName, long start, long end) {
-    return new XMLSource<T>(
+    return new XmlSource<T>(
         fileName, getMinBundleSize(), start, end, rootElement, recordElement, recordClass);
   }
 
@@ -220,7 +220,7 @@ public Class<T> getRecordClass() {
 
   /**
    * A {@link Source.Reader} for reading JAXB annotated Java objects from an XML file. The XML
-   * file should be of the form defined at {@link XMLSource}.
+   * file should be of the form defined at {@link XmlSource}.
    * <p>
    * Timestamped values are currently unsupported - all values implicitly have the timestamp
    * of {@code BoundedWindow.TIMESTAMP_MIN_VALUE}.
@@ -235,7 +235,7 @@ private static class XMLReader<T> extends FileBasedReader<T> {
     private static final int BUF_SIZE = 1024;
 
     // This should be the maximum number of bytes a character will encode to, for any encoding
-    // supported by XMLSource. Currently this is set to 4 since UTF-8 characters may be
+    // supported by XmlSource. Currently this is set to 4 since UTF-8 characters may be
     // four bytes.
     private static final int MAX_CHAR_BYTES = 4;
 
@@ -262,7 +262,7 @@ private static class XMLReader<T> extends FileBasedReader<T> {
     // Byte offset of the current record in the XML file provided when creating the source.
     private long currentByteOffset = 0;
 
-    public XMLReader(XMLSource<T> source) {
+    public XMLReader(XmlSource<T> source) {
       super(source);
 
       // Set up a JAXB Unmarshaller that can be used to unmarshall record objects.
@@ -283,8 +283,8 @@ public boolean handleEvent(ValidationEvent event) {
     }
 
     @Override
-    public XMLSource<T> getCurrentSource() {
-      return (XMLSource<T>) super.getCurrentSource();
+    public XmlSource<T> getCurrentSource() {
+      return (XmlSource<T>) super.getCurrentSource();
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSinkTest.java
similarity index 85%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSinkTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSinkTest.java
index f32758300365c..68d5266d4f6e1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlFileBasedSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSinkTest.java
@@ -19,8 +19,8 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 
-import com.google.cloud.dataflow.sdk.io.XmlFileBasedSink.XmlWriteOperation;
-import com.google.cloud.dataflow.sdk.io.XmlFileBasedSink.XmlWriter;
+import com.google.cloud.dataflow.sdk.io.XmlSink.XmlWriteOperation;
+import com.google.cloud.dataflow.sdk.io.XmlSink.XmlWriter;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.common.collect.Lists;
@@ -47,10 +47,10 @@
 
 
 /**
- * Tests for XmlFileBasedSink.
+ * Tests for XmlSink.
  */
 @RunWith(JUnit4.class)
-public class XmlFileBasedSinkTest {
+public class XmlSinkTest {
   @Rule
   public TemporaryFolder tmpFolder = new TemporaryFolder();
 
@@ -68,7 +68,7 @@ public class XmlFileBasedSinkTest {
   public void testXmlWriter() throws Exception {
     PipelineOptions options = PipelineOptionsFactory.create();
     XmlWriteOperation<Bird> writeOp =
-        XmlFileBasedSink.writeOf(Bird.class, "birds", testFilePrefix).createWriteOperation(options);
+        XmlSink.writeOf(Bird.class, "birds", testFilePrefix).createWriteOperation(options);
     XmlWriter<Bird> writer = writeOp.createWriter(options);
 
     List<Bird> bundle =
@@ -84,8 +84,8 @@ public void testXmlWriter() throws Exception {
    */
   @Test
   public void testBuildXmlSink() {
-    XmlFileBasedSink.Bound<Bird> sink =
-        XmlFileBasedSink.write()
+    XmlSink.Bound<Bird> sink =
+        XmlSink.write()
             .toFilenamePrefix(testFilePrefix)
             .ofRecordClass(testClass)
             .withRootElement(testRootElement);
@@ -99,8 +99,8 @@ public void testBuildXmlSink() {
    */
   @Test
   public void testBuildXmlSinkDirect() {
-    XmlFileBasedSink.Bound<Bird> sink =
-        XmlFileBasedSink.writeOf(Bird.class, testRootElement, testFilePrefix);
+    XmlSink.Bound<Bird> sink =
+        XmlSink.writeOf(Bird.class, testRootElement, testFilePrefix);
     assertEquals(testClass, sink.classToBind);
     assertEquals(testRootElement, sink.rootElementName);
     assertEquals(testFilePrefix, sink.baseOutputFilename);
@@ -111,12 +111,12 @@ public void testBuildXmlSinkDirect() {
    */
   @Test
   public void testValidateXmlSinkMissingFields() {
-    XmlFileBasedSink.Bound<Bird> sink;
-    sink = XmlFileBasedSink.writeOf(null, testRootElement, testFilePrefix);
+    XmlSink.Bound<Bird> sink;
+    sink = XmlSink.writeOf(null, testRootElement, testFilePrefix);
     validateAndFailIfSucceeds(sink, NullPointerException.class);
-    sink = XmlFileBasedSink.writeOf(testClass, null, testFilePrefix);
+    sink = XmlSink.writeOf(testClass, null, testFilePrefix);
     validateAndFailIfSucceeds(sink, NullPointerException.class);
-    sink = XmlFileBasedSink.writeOf(testClass, testRootElement, null);
+    sink = XmlSink.writeOf(testClass, testRootElement, null);
     validateAndFailIfSucceeds(sink, NullPointerException.class);
   }
 
@@ -124,7 +124,7 @@ public void testValidateXmlSinkMissingFields() {
    * Call validate and fail if validation does not throw the expected exception.
    */
   private <T> void validateAndFailIfSucceeds(
-      XmlFileBasedSink.Bound<T> sink, Class<? extends Exception> expected) {
+      XmlSink.Bound<T> sink, Class<? extends Exception> expected) {
     thrown.expect(expected);
     PipelineOptions options = PipelineOptionsFactory.create();
     sink.validate(options);
@@ -136,13 +136,13 @@ private <T> void validateAndFailIfSucceeds(
   @Test
   public void testCreateWriteOperations() {
     PipelineOptions options = PipelineOptionsFactory.create();
-    XmlFileBasedSink.Bound<Bird> sink =
-        XmlFileBasedSink.writeOf(testClass, testRootElement, testFilePrefix);
+    XmlSink.Bound<Bird> sink =
+        XmlSink.writeOf(testClass, testRootElement, testFilePrefix);
     XmlWriteOperation<Bird> writeOp = sink.createWriteOperation(options);
     assertEquals(testClass, writeOp.getSink().classToBind);
     assertEquals(testFilePrefix, writeOp.getSink().baseOutputFilename);
     assertEquals(testRootElement, writeOp.getSink().rootElementName);
-    assertEquals(XmlFileBasedSink.XML_EXTENSION, writeOp.getSink().extension);
+    assertEquals(XmlSink.XML_EXTENSION, writeOp.getSink().extension);
     assertEquals(testFilePrefix, writeOp.baseTemporaryFilename);
   }
 
@@ -153,7 +153,7 @@ public void testCreateWriteOperations() {
   public void testCreateWriter() throws Exception {
     PipelineOptions options = PipelineOptionsFactory.create();
     XmlWriteOperation<Bird> writeOp =
-        XmlFileBasedSink.writeOf(testClass, testRootElement, testFilePrefix)
+        XmlSink.writeOf(testClass, testRootElement, testFilePrefix)
             .createWriteOperation(options);
     XmlWriter<Bird> writer = writeOp.createWriter(options);
     assertEquals(testFilePrefix, writer.getWriteOperation().baseTemporaryFilename);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XMLSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
similarity index 93%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XMLSourceTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
index fd851e8145232..e7b398dc89a69 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XMLSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
@@ -53,10 +53,10 @@
 import javax.xml.bind.annotation.XmlRootElement;
 
 /**
- * Tests XMLSource.
+ * Tests XmlSource.
  */
 @RunWith(JUnit4.class)
-public class XMLSourceTest {
+public class XmlSourceTest {
   @Rule
   public TemporaryFolder tempFolder = new TemporaryFolder();
 
@@ -260,8 +260,8 @@ public void testReadXMLTiny() throws IOException {
     File file = tempFolder.newFile("trainXMLTiny");
     Files.write(file.toPath(), tinyXML.getBytes(StandardCharsets.UTF_8));
 
-    XMLSource<Train> source =
-        XMLSource.<Train>from(file.toPath().toString())
+    XmlSource<Train> source =
+        XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
@@ -283,8 +283,8 @@ public void testReadXMLWithMultiByteChars() throws IOException {
     File file = tempFolder.newFile("trainXMLTiny");
     Files.write(file.toPath(), xmlWithMultiByteChars.getBytes(StandardCharsets.UTF_8));
 
-    XMLSource<Train> source =
-        XMLSource.<Train>from(file.toPath().toString())
+    XmlSource<Train> source =
+        XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
@@ -306,8 +306,8 @@ public void testReadXMLWithMultiByteElementName() throws IOException {
     File file = tempFolder.newFile("trainXMLTiny");
     Files.write(file.toPath(), xmlWithMultiByteElementName.getBytes(StandardCharsets.UTF_8));
 
-    XMLSource<Train> source =
-        XMLSource.<Train>from(file.toPath().toString())
+    XmlSource<Train> source =
+        XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("දුම්රියන්")
             .withRecordElement("දුම්රිය")
             .withRecordClass(Train.class)
@@ -329,8 +329,8 @@ public void testSplitWithEmptyBundleAtEnd() throws Exception {
     File file = tempFolder.newFile("trainXMLTiny");
     Files.write(file.toPath(), tinyXML.getBytes(StandardCharsets.UTF_8));
 
-    XMLSource<Train> source =
-        XMLSource.<Train>from(file.toPath().toString())
+    XmlSource<Train> source =
+        XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
@@ -366,8 +366,8 @@ public void testReadXMLSmall() throws IOException {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
 
-    XMLSource<Train> source =
-        XMLSource.<Train>from(file.toPath().toString())
+    XmlSource<Train> source =
+        XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
@@ -389,8 +389,8 @@ public void testReadXMLNoRootElement() throws IOException {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
 
-    XMLSource<Train> source =
-        XMLSource.<Train>from(file.toPath().toString())
+    XmlSource<Train> source =
+        XmlSource.<Train>from(file.toPath().toString())
             .withRecordElement("train")
             .withRecordClass(Train.class);
 
@@ -405,8 +405,8 @@ public void testReadXMLNoRecordElement() throws IOException {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
 
-    XMLSource<Train> source =
-        XMLSource.<Train>from(file.toPath().toString())
+    XmlSource<Train> source =
+        XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordClass(Train.class);
 
@@ -421,8 +421,8 @@ public void testReadXMLNoRecordClass() throws IOException {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
 
-    XMLSource<Train> source =
-        XMLSource.<Train>from(file.toPath().toString())
+    XmlSource<Train> source =
+        XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train");
 
@@ -437,8 +437,8 @@ public void testReadXMLIncorrectRootElement() throws IOException {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
 
-    XMLSource<Train> source =
-        XMLSource.<Train>from(file.toPath().toString())
+    XmlSource<Train> source =
+        XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("something")
             .withRecordElement("train")
             .withRecordClass(Train.class);
@@ -452,8 +452,8 @@ public void testReadXMLIncorrectRecordElement() throws IOException {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
 
-    XMLSource<Train> source =
-        XMLSource.<Train>from(file.toPath().toString())
+    XmlSource<Train> source =
+        XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("something")
             .withRecordClass(Train.class);
@@ -471,8 +471,8 @@ public void testReadXMLInvalidRecordClass() throws IOException {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
 
-    XMLSource<WrongTrainType> source =
-        XMLSource.<WrongTrainType>from(file.toPath().toString())
+    XmlSource<WrongTrainType> source =
+        XmlSource.<WrongTrainType>from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(WrongTrainType.class);
@@ -494,8 +494,8 @@ public void testReadXMLNoBundleSize() throws IOException {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
 
-    XMLSource<Train> source =
-        XMLSource.<Train>from(file.toPath().toString())
+    XmlSource<Train> source =
+        XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class);
@@ -517,8 +517,8 @@ public void testReadXMLWithEmptyTags() throws IOException {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXMLWithEmptyTags.getBytes(StandardCharsets.UTF_8));
 
-    XMLSource<Train> source =
-        XMLSource.<Train>from(file.toPath().toString())
+    XmlSource<Train> source =
+        XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
@@ -546,8 +546,8 @@ public void testReadXMLSmallDataflow() throws IOException {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
 
-    XMLSource<Train> source =
-        XMLSource.<Train>from(file.toPath().toString())
+    XmlSource<Train> source =
+        XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
@@ -572,8 +572,8 @@ public void testReadXMLWithAttributes() throws IOException {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXMLWithAttributes.getBytes(StandardCharsets.UTF_8));
 
-    XMLSource<Train> source =
-        XMLSource.<Train>from(file.toPath().toString())
+    XmlSource<Train> source =
+        XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
@@ -595,8 +595,8 @@ public void testReadXMLWithWhitespaces() throws IOException {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXMLWithSpaces.getBytes(StandardCharsets.UTF_8));
 
-    XMLSource<Train> source =
-        XMLSource.<Train>from(file.toPath().toString())
+    XmlSource<Train> source =
+        XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
@@ -619,8 +619,8 @@ public void testReadXMLLarge() throws IOException {
     List<Train> trains = generateRandomTrainList(100);
     File file = createRandomTrainXML(fileName, trains);
 
-    XMLSource<Train> source =
-        XMLSource.<Train>from(file.toPath().toString())
+    XmlSource<Train> source =
+        XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
@@ -639,8 +639,8 @@ public void testReadXMLLargeDataflow() throws IOException {
     File file = createRandomTrainXML(fileName, trains);
 
     DirectPipeline p = DirectPipeline.createForTest();
-    XMLSource<Train> source =
-        XMLSource.<Train>from(file.toPath().toString())
+    XmlSource<Train> source =
+        XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
@@ -659,8 +659,8 @@ public void testSplitWithEmptyBundles() throws Exception {
     List<Train> trains = generateRandomTrainList(10);
     File file = createRandomTrainXML(fileName, trains);
 
-    XMLSource<Train> source =
-        XMLSource.<Train>from(file.toPath().toString())
+    XmlSource<Train> source =
+        XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
@@ -683,8 +683,8 @@ public void testXMLWithSplits() throws Exception {
     List<Train> trains = generateRandomTrainList(100);
     File file = createRandomTrainXML(fileName, trains);
 
-    XMLSource<Train> source =
-        XMLSource.<Train>from(file.toPath().toString())
+    XmlSource<Train> source =
+        XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
@@ -707,8 +707,8 @@ public void testSplitAtFraction() throws Exception {
     List<Train> trains = generateRandomTrainList(100);
     File file = createRandomTrainXML(fileName, trains);
 
-    XMLSource<Train> fileSource =
-        XMLSource.<Train>from(file.toPath().toString())
+    XmlSource<Train> fileSource =
+        XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
@@ -734,8 +734,8 @@ public void testSplitAtFractionExhaustive() throws Exception {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXMLWithAllFeatures.getBytes(StandardCharsets.UTF_8));
 
-    XMLSource<Train> source =
-        XMLSource.<Train>from(file.toPath().toString())
+    XmlSource<Train> source =
+        XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
@@ -767,7 +767,7 @@ public void testReadXMLFilePattern() throws IOException {
 
     DirectPipeline p = DirectPipeline.createForTest();
 
-    XMLSource<Train> source = XMLSource.<Train>from(file.getParent() + "/"
+    XmlSource<Train> source = XmlSource.<Train>from(file.getParent() + "/"
                                            + "temp*.xml")
                                   .withRootElement("trains")
                                   .withRecordElement("train")

From 7031109aa475fa4dafbc4def65d058ee402d996c Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 6 Apr 2015 16:35:40 -0700
Subject: [PATCH 0387/1541] Enables side inputs in the streaming Dataflow
 runner

----Release Notes----
- Side inputs are now supported by the streaming Dataflow runner
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90459829
---
 .../dataflow/sdk/runners/DataflowPipelineTranslator.java     | 5 -----
 .../cloud/dataflow/sdk/runners/worker/NormalParDoFn.java     | 2 +-
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 9785f6c04171e..a0c4356faa35f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -499,11 +499,6 @@ public void visitTransform(TransformTreeNode node) {
     @Override
     public void visitValue(PValue value, TransformTreeNode producer) {
       LOG.debug("Checking translation of {}", value);
-      if (options.isStreaming()
-          && value instanceof PCollectionView) {
-        throw new UnsupportedOperationException(
-            "PCollectionViews are not supported in streaming Dataflow.");
-      }
       if (value.getProducingTransformInternal() == null) {
         throw new RuntimeException(
             "internal error: expecting a PValue "
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
index 56182607167f8..ab23cedc29d27 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
@@ -90,7 +90,7 @@ public static NormalParDoFn create(
     // we need to look it up dynamically from the Views.
     PTuple sideInputValues = PTuple.empty();
     final Iterable<PCollectionView<?>> sideInputViews = doFnInfo.getSideInputViews();
-    if (sideInputInfos != null) {
+    if (sideInputInfos != null && !sideInputInfos.isEmpty()) {
       for (SideInputInfo sideInputInfo : sideInputInfos) {
         Object sideInputValue = SideInputUtils.readSideInput(
             options, sideInputInfo, executionContext);

From c57f074beda7e590072f9a64ad5d0d7697177c06 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 6 Apr 2015 16:42:59 -0700
Subject: [PATCH 0388/1541] Support processing time timers in the streaming
 runner

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90460412
---
 sdk/pom.xml                                   |  2 +-
 .../sdk/util/BatchModeExecutionContext.java   | 28 ++++---------------
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |  9 +++---
 .../dataflow/sdk/util/ExecutionContext.java   |  5 ++--
 .../util/StreamingGroupAlsoByWindowsDoFn.java | 12 ++------
 .../util/StreamingModeExecutionContext.java   | 17 +++++++++--
 .../dataflow/sdk/util/TriggerTester.java      |  4 +--
 .../dataflow/sdk/util/WindowingInternals.java |  5 ++--
 .../worker/StreamingDataflowWorkerTest.java   |  1 +
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  9 +++++-
 10 files changed, 45 insertions(+), 47 deletions(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index f8b2e92fe63d5..905af67b6a243 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -227,7 +227,7 @@
     <dependency>
       <groupId>com.google.cloud.dataflow</groupId>
       <artifactId>google-cloud-dataflow-java-proto-library-all</artifactId>
-      <version>0.3.150313</version>
+      <version>0.3.150331</version>
     </dependency>
 
     <dependency>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
index 8e804cf571dc7..61f04af1ec2cf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -39,7 +40,6 @@
  */
 public class BatchModeExecutionContext extends ExecutionContext {
   private Object key;
-  private Map<Object, Map<String, Instant>> timers = new HashMap<>();
   private final Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache = new HashMap<>();
 
   /**
@@ -67,21 +67,13 @@ public Object getKey() {
   }
 
   @Override
-  public void setTimer(String timer, Instant timestamp) {
-    Map<String, Instant> keyTimers = timers.get(getKey());
-    if (keyTimers == null) {
-      keyTimers = new HashMap<>();
-      timers.put(getKey(), keyTimers);
-    }
-    keyTimers.put(timer, timestamp);
+  public void setTimer(String timer, Instant timestamp, Trigger.TimeDomain domain) {
+    throw new UnsupportedOperationException("setTimer is not supported in batch mode");
   }
 
   @Override
-  public void deleteTimer(String timer) {
-    Map<String, Instant> keyTimers = timers.get(getKey());
-    if (keyTimers != null) {
-      keyTimers.remove(timer);
-    }
+  public void deleteTimer(String timer, Trigger.TimeDomain domain) {
+    throw new UnsupportedOperationException("deleteTimer is not supported in batch mode");
   }
 
   @Override
@@ -126,16 +118,6 @@ public boolean apply(WindowedValue<?> element) {
     return result;
   }
 
-  public <E> List<TimerOrElement<E>> getAllTimers() {
-    List<TimerOrElement<E>> result = new ArrayList<>();
-    for (Map.Entry<Object, Map<String, Instant>> keyTimers : timers.entrySet()) {
-      for (Map.Entry<String, Instant> timer : keyTimers.getValue().entrySet()) {
-        result.add(TimerOrElement.<E>timer(timer.getKey(), timer.getValue(), keyTimers.getKey()));
-      }
-    }
-    return result;
-  }
-
   /**
    * {@link ExecutionContext.StepContext} used in batch mode.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 2626a466aa7ce..0db2bd5621278 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -30,6 +30,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -555,13 +556,13 @@ public <T> Iterable<TimestampedValue<T>> readTagList(CodedTupleTag<T> tag)
         }
 
         @Override
-        public void setTimer(String timer, Instant timestamp) {
-          context.stepContext.getExecutionContext().setTimer(timer, timestamp);
+        public void setTimer(String timer, Instant timestamp, Trigger.TimeDomain domain) {
+          context.stepContext.getExecutionContext().setTimer(timer, timestamp, domain);
         }
 
         @Override
-        public void deleteTimer(String timer) {
-          context.stepContext.getExecutionContext().deleteTimer(timer);
+        public void deleteTimer(String timer, Trigger.TimeDomain domain) {
+          context.stepContext.getExecutionContext().deleteTimer(timer, domain);
         }
 
         @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
index 1f99983c04355..b792aed3f6866 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -72,12 +73,12 @@ public Collection<StepContext> getAllStepContexts() {
    * by calling {@code setTimer} again, or deleted with
    * {@link ExecutionContext#deleteTimer}.
    */
-  public abstract void setTimer(String timer, Instant timestamp);
+  public abstract void setTimer(String timer, Instant timestamp, Trigger.TimeDomain domain);
 
   /**
    * Deletes the given timer.
    */
-  public abstract void deleteTimer(String timer);
+  public abstract void deleteTimer(String timer, Trigger.TimeDomain domain);
 
   /**
    * Hook for subclasses to implement that will be called whenever
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 08aba84c56f7e..7dbd9168b342f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -134,23 +134,17 @@ public StreamingTimerManager(DoFn<?, ?>.ProcessContext context) {
 
     @Override
     public void setTimer(String timer, Instant timestamp, Trigger.TimeDomain domain) {
-      // TODO: Hook up support for processing-time timers.
-      Preconditions.checkArgument(Trigger.TimeDomain.EVENT_TIME.equals(domain),
-          "Streaming currently only supports Watermark based timers.");
-      context.windowingInternals().setTimer(timer, timestamp);
+      context.windowingInternals().setTimer(timer, timestamp, domain);
     }
 
     @Override
     public void deleteTimer(String timer, Trigger.TimeDomain domain) {
-      // TODO: Hook up support for processing-time timers.
-      Preconditions.checkArgument(Trigger.TimeDomain.EVENT_TIME.equals(domain),
-          "Streaming currently only supports Watermark based timers.");
-      context.windowingInternals().deleteTimer(timer);
+      context.windowingInternals().deleteTimer(timer, domain);
     }
 
     @Override
     public Instant currentProcessingTime() {
-      throw new UnsupportedOperationException("Streaming doesn't yet support processing time.");
+      return Instant.now();
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index 4998ac7044f9a..2a116cb787850 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
@@ -65,19 +66,29 @@ public ExecutionContext.StepContext createStepContext(String stepName) {
   }
 
   @Override
-  public void setTimer(String timer, Instant timestamp) {
+  public void setTimer(String timer, Instant timestamp, Trigger.TimeDomain domain) {
     long timestampMicros = TimeUnit.MILLISECONDS.toMicros(timestamp.getMillis());
     outputBuilder.addOutputTimers(
         Windmill.Timer.newBuilder()
         .setTimestamp(timestampMicros)
         .setTag(ByteString.copyFromUtf8(timer))
+        .setType(timerType(domain))
         .build());
   }
 
   @Override
-  public void deleteTimer(String timer) {
+  public void deleteTimer(String timer, Trigger.TimeDomain domain) {
     outputBuilder.addOutputTimers(
-        Windmill.Timer.newBuilder().setTag(ByteString.copyFromUtf8(timer)).build());
+        Windmill.Timer.newBuilder()
+        .setTag(ByteString.copyFromUtf8(timer))
+        .setType(timerType(domain))
+        .build());
+  }
+
+  private Windmill.Timer.Type timerType(Trigger.TimeDomain domain) {
+    return domain == Trigger.TimeDomain.EVENT_TIME
+        ? Windmill.Timer.Type.WATERMARK
+        : Windmill.Timer.Type.REALTIME;
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 4533c59b56b96..377badbc76a0a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -273,13 +273,13 @@ public int compare(TimestampedValue<T> v1, TimestampedValue<T> v2) {
     }
 
     @Override
-    public void setTimer(String timer, Instant timestamp) {
+    public void setTimer(String timer, Instant timestamp, Trigger.TimeDomain domain) {
       throw new UnsupportedOperationException(
           "Testing triggers should not use timers from WindowingInternals.");
     }
 
     @Override
-    public void deleteTimer(String timer) {
+    public void deleteTimer(String timer, Trigger.TimeDomain domain) {
       throw new UnsupportedOperationException(
           "Testing triggers should not use timers from WindowingInternals.");
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
index 085f04d6bbfb0..fe1462bb6c745 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
@@ -70,12 +71,12 @@ void outputWindowedValue(O output, Instant timestamp,
    * timestamp.  Timers are identified by their name, and can be moved
    * by calling {@code setTimer} again, or deleted with {@link #deleteTimer}.
    */
-  void setTimer(String timer, Instant timestamp);
+  void setTimer(String timer, Instant timestamp, Trigger.TimeDomain domain);
 
   /**
    * Deletes the given timer.
    */
-  void deleteTimer(String timer);
+  void deleteTimer(String timer, Trigger.TimeDomain domain);
 
   /**
    * Access the windows the element is being processed in without "exploding" it.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 78d9e30631636..48e46993a0941 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -877,6 +877,7 @@ public void processElement(ProcessContext c) {
         "output_timers {" +
         "  tag: \"gAAAAAAAAAAAAAAA\"" +
         "  timestamp: 999000" +
+        "  type: WATERMARK" +
         "} " +
         "list_updates {" +
         "  tag: \"12:MergeWindowsbuffer:gAAAAAAAAAA=\"" +
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 8728a3700908f..20ee8acb46ee5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -30,6 +30,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.util.TriggerExecutor.TriggerIdCoder;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -58,7 +59,13 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
   TupleTag<KV<String, Iterable<String>>> outputTag;
 
   @Before public void setUp() {
-    execContext = new DirectModeExecutionContext();
+    execContext = new DirectModeExecutionContext() {
+        @Override
+        public void setTimer(String tag, Instant timestamp, Trigger.TimeDomain domain) {}
+
+        @Override
+        public void deleteTimer(String tag, Trigger.TimeDomain domain) {}
+      };
     counters = new CounterSet();
     outputTag = new TupleTag<>();
   }

From 1028330cffdf69339b5a3d8e83ca57a332497614 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Mon, 6 Apr 2015 18:43:46 -0700
Subject: [PATCH 0389/1541] Fix FileBasedSource.testSplitAtFraction() test so
 that it does not depend on the size of the newline character.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90469686
---
 .../com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index 1dc741b0ddac6..fe5e764d18313 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -786,7 +786,7 @@ public void testReadAllSplitsOfFilePattern() throws Exception {
   public void testSplitAtFraction() throws IOException {
     File file = createFileWithData("file", createStringDataset(3, 100));
 
-    TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 1, 0, 400, null);
+    TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 1, 0, file.length(), null);
     assertSplitAtFractionSucceedsAndConsistent(source, 0, 0.7);
     assertSplitAtFractionSucceedsAndConsistent(source, 1, 0.7);
     assertSplitAtFractionSucceedsAndConsistent(source, 30, 0.7);

From a144830f6fd94353356b86ebad13549812d4ff6c Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Mon, 6 Apr 2015 21:36:46 -0700
Subject: [PATCH 0390/1541] Remove SET aggregation type.

----Release Notes----

SET aggregation type is no longer provided.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90478197
---
 .../dataflow/sdk/util/CloudCounterUtils.java  |  14 --
 .../dataflow/sdk/util/common/Counter.java     | 104 ++----------
 .../DataflowWorkProgressUpdaterTest.java      |  10 +-
 .../sdk/util/common/CounterSetTest.java       |   3 +-
 .../dataflow/sdk/util/common/CounterTest.java | 160 +-----------------
 .../sdk/util/common/CounterTestUtils.java     |   9 -
 6 files changed, 28 insertions(+), 272 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java
index dd74b40296831..f02a332156b0a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java
@@ -25,9 +25,7 @@
 import org.slf4j.LoggerFactory;
 
 import java.util.ArrayList;
-import java.util.HashSet;
 import java.util.List;
-import java.util.Set;
 
 /**
  * Utilities for working with CloudCounters.
@@ -80,18 +78,6 @@ public static MetricUpdate extractCounter(Counter<?> counter, boolean delta) {
           metricUpdate.setMeanCount(CloudObject.forKnownType(countUpdate));
           break;
         }
-        case SET: {
-          Set<?> values = counter.getSet(delta);
-          if (values.isEmpty()) {
-            return null;
-          }
-          Set<Object> encodedSet = new HashSet(values.size());
-          for (Object value : values) {
-            encodedSet.add(CloudObject.forKnownType(value));
-          }
-          metricUpdate.setSet(encodedSet);
-          break;
-        }
         default:
           throw new IllegalArgumentException("unexpected kind of counter");
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
index 28672eddde97f..483d75dd724e8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
@@ -19,13 +19,10 @@
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.AND;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.OR;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SET;
 
 import com.google.common.reflect.TypeToken;
 
-import java.util.HashSet;
 import java.util.Objects;
-import java.util.Set;
 
 /**
  * A Counter enables the aggregation of a stream of values over time.  The
@@ -68,12 +65,6 @@ public static enum AggregationKind {
      */
     MEAN,
 
-    /**
-     * Computes the set of all added values.  Applicable to {@link Integer},
-     * {@link Long}, {@link Double}, and {@link String} values.
-     */
-    SET,
-
     /**
      * Computes boolean AND over all added values.
      * Applicable only to {@link Boolean} values.
@@ -92,8 +83,8 @@ public static enum AggregationKind {
    * Constructs a new {@link Counter} that aggregates {@link Integer}, values
    * according to the desired aggregation kind. The supported aggregation kinds
    * are {@link AggregationKind#SUM}, {@link AggregationKind#MIN},
-   * {@link AggregationKind#MAX}, {@link AggregationKind#MEAN}, and
-   * {@link AggregationKind#SET}. This is a convenience wrapper over a
+   * {@link AggregationKind#MAX}, and {@link AggregationKind#MEAN}.
+   * This is a convenience wrapper over a
    * {@link Counter} implementation that aggregates {@link Long} values. This is
    * useful when the application handles (boxed) {@link Integer} values that
    * are not readily convertible to the (boxed) {@link Long} values otherwise
@@ -113,8 +104,7 @@ public static Counter<Integer> ints(String name, AggregationKind kind) {
    * Constructs a new {@link Counter} that aggregates {@link Long} values
    * according to the desired aggregation kind. The supported aggregation kinds
    * are {@link AggregationKind#SUM}, {@link AggregationKind#MIN},
-   * {@link AggregationKind#MAX}, {@link AggregationKind#MEAN}, and
-   * {@link AggregationKind#SET}.
+   * {@link AggregationKind#MAX}, and {@link AggregationKind#MEAN}.
    *
    * @param name the name of the new counter
    * @param kind the new counter's aggregation kind
@@ -129,8 +119,7 @@ public static Counter<Long> longs(String name, AggregationKind kind) {
    * Constructs a new {@link Counter} that aggregates {@link Double} values
    * according to the desired aggregation kind. The supported aggregation kinds
    * are {@link AggregationKind#SUM}, {@link AggregationKind#MIN},
-   * {@link AggregationKind#MAX}, {@link AggregationKind#MEAN}, and
-   * {@link AggregationKind#SET}.
+   * {@link AggregationKind#MAX}, and {@link AggregationKind#MEAN}.
    *
    * @param name the name of the new counter
    * @param kind the new counter's aggregation kind
@@ -158,14 +147,14 @@ public static Counter<Boolean> booleans(String name, AggregationKind kind) {
   /**
    * Constructs a new {@link Counter} that aggregates {@link String} values
    * according to the desired aggregation kind. The only supported aggregation
-   * kind is {@link AggregationKind#SET}.
+   * kind is {@link AggregationKind#MIN} and {@link AggregationKind#MAX}.
    *
    * @param name the name of the new counter
    * @param kind the new counter's aggregation kind
    * @return the newly constructed Counter
    * @throws IllegalArgumentException if the aggregation kind is not supported
    */
-  public static Counter<String> strings(String name, AggregationKind kind) {
+  private static Counter<String> strings(String name, AggregationKind kind) {
     return new StringCounter(name, kind);
   }
 
@@ -211,13 +200,6 @@ public synchronized Counter<T> resetToValue(long elementCount, T value) {
       count = 0;
       deltaCount = 0;
     }
-
-    if (kind.equals(SET)) {
-      set.clear();
-      set.add(value);
-      deltaSet = new HashSet<>();
-      deltaSet.add(value);
-    }
     return this;
   }
 
@@ -249,8 +231,7 @@ public Class<?> getType() {
 
   /**
    * Returns the aggregated value, or the sum for MEAN aggregation, either
-   * total or, if delta, since the last update extraction or resetDelta,
-   * if not a SET aggregation.
+   * total or, if delta, since the last update extraction or resetDelta..
    */
   public T getAggregate(boolean delta) {
     return delta ? deltaAggregate : aggregate;
@@ -265,15 +246,6 @@ public long getCount(boolean delta) {
     return delta ? deltaCount : count;
   }
 
-  /**
-   * Returns the set of all aggregated values, either total or, if
-   * delta, since the last update extraction or resetDelta, if a SET
-   * aggregation.
-   */
-  public Set<T> getSet(boolean delta) {
-    return delta ? deltaSet : set;
-  }
-
   /**
    * Returns a string representation of the Counter. Useful for debugging logs.
    * Example return value: "ElementCount:SUM(15)".
@@ -298,9 +270,6 @@ public String toString() {
         sb.append("/");
         sb.append(count);
         break;
-      case SET:
-        sb.append(set);
-        break;
       default:
         throw illegalArgumentException();
     }
@@ -319,8 +288,7 @@ public boolean equals(Object o) {
           && this.kind == that.kind
           && this.getClass().equals(that.getClass())
           && this.count == that.count
-          && Objects.equals(this.aggregate, that.aggregate)
-          && Objects.equals(this.set, that.set);
+          && Objects.equals(this.aggregate, that.aggregate);
     } else {
       return false;
     }
@@ -328,7 +296,7 @@ public boolean equals(Object o) {
 
   @Override
   public int hashCode() {
-    return Objects.hash(getClass(), name, kind, aggregate, count, set);
+    return Objects.hash(getClass(), name, kind, aggregate, count);
   }
 
   /**
@@ -362,21 +330,11 @@ public boolean isCompatibleWith(Counter<?> that) {
   /** The number of aggregated values since the last update extraction. */
   protected long deltaCount;
 
-  /** Holds the set of all aggregated values. Used only for SET aggregation. */
-  protected Set<T> set;
-
-  /** Holds the set of aggregated values since the last update extraction. */
-  protected Set<T> deltaSet;
-
   protected Counter(String name, AggregationKind kind) {
     this.name = name;
     this.kind = kind;
     this.count = 0;
     this.deltaCount = 0;
-    if (kind.equals(SET)) {
-      set = new HashSet<>();
-      deltaSet = new HashSet<>();
-    }
   }
 
 
@@ -401,8 +359,6 @@ private LongCounter(String name, AggregationKind kind) {
         case MIN:
           aggregate = deltaAggregate = Long.MAX_VALUE;
           break;
-        case SET:
-          break;
         default:
           throw illegalArgumentException();
       }
@@ -429,10 +385,6 @@ public synchronized LongCounter addValue(Long value) {
           aggregate = Math.min(aggregate, value);
           deltaAggregate = Math.min(deltaAggregate, value);
           break;
-        case SET:
-          set.add(value);
-          deltaSet.add(value);
-          break;
         default:
           throw illegalArgumentException();
       }
@@ -455,9 +407,6 @@ public synchronized void resetDelta() {
         case MIN:
           deltaAggregate = Long.MAX_VALUE;
           break;
-        case SET:
-          deltaSet = new HashSet<>();
-          break;
         default:
           throw illegalArgumentException();
       }
@@ -483,8 +432,6 @@ private DoubleCounter(String name, AggregationKind kind) {
         case MIN:
           aggregate = deltaAggregate = Double.POSITIVE_INFINITY;
           break;
-        case SET:
-          break;
         default:
           throw illegalArgumentException();
       }
@@ -511,10 +458,6 @@ public synchronized DoubleCounter addValue(Double value) {
           aggregate = Math.min(aggregate, value);
           deltaAggregate = Math.min(deltaAggregate, value);
           break;
-        case SET:
-          set.add(value);
-          deltaSet.add(value);
-          break;
         default:
           throw illegalArgumentException();
       }
@@ -537,9 +480,6 @@ public synchronized void resetDelta() {
         case MIN:
           deltaAggregate = Double.POSITIVE_INFINITY;
           break;
-        case SET:
-          deltaSet = new HashSet<>();
-          break;
         default:
           throw illegalArgumentException();
       }
@@ -598,24 +538,21 @@ private static class StringCounter extends Counter<String> {
     /** Initializes a new {@link Counter} for {@link String} values. */
     private StringCounter(String name, AggregationKind kind) {
       super(name, kind);
-      if (!kind.equals(SET)) {
-        throw illegalArgumentException();
-      }
+      // TODO: Support MIN, MAX of Strings.
+      throw illegalArgumentException();
     }
 
     @Override
     public synchronized StringCounter addValue(String value) {
-      set.add(value);
-      deltaSet.add(value);
-      return this;
+      switch (kind) {
+        default:
+          throw illegalArgumentException();
+      }
     }
 
     @Override
     public synchronized void resetDelta() {
       switch (kind) {
-        case SET:
-          deltaSet = new HashSet<>();
-          break;
         default:
           throw illegalArgumentException();
       }
@@ -641,8 +578,6 @@ private IntegerCounter(String name, AggregationKind kind) {
         case MIN:
           aggregate = deltaAggregate = Integer.MAX_VALUE;
           break;
-        case SET:
-          break;
         default:
           throw illegalArgumentException();
       }
@@ -669,10 +604,6 @@ public synchronized IntegerCounter addValue(Integer value) {
           aggregate = Math.min(aggregate, value);
           deltaAggregate = Math.min(deltaAggregate, value);
           break;
-        case SET:
-          set.add(value);
-          deltaSet.add(value);
-          break;
         default:
           throw illegalArgumentException();
       }
@@ -695,9 +626,6 @@ public synchronized void resetDelta() {
         case MIN:
           deltaAggregate = Integer.MAX_VALUE;
           break;
-        case SET:
-          deltaSet = new HashSet<>();
-          break;
         default:
           throw illegalArgumentException();
       }
@@ -724,6 +652,4 @@ protected IllegalArgumentException illegalArgumentException() {
   synchronized T getDeltaAggregate() { return deltaAggregate; }
   synchronized long getTotalCount() { return count; }
   synchronized long getDeltaCount() { return deltaCount; }
-  synchronized Set<T> getTotalSet() { return set; }
-  synchronized Set<T> getDeltaSet() { return deltaSet; }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
index 480cd036ffee8..6f6788eb98060 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -28,7 +28,7 @@
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudDuration;
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudTime;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MAX;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SET;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MIN;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
@@ -122,10 +122,10 @@ public void setWorkerProgress(ApproximateProgress progress) {
   private static final String WORKER_ID = "TEST_WORKER_ID";
   private static final Long WORK_ID = 1234567890L;
   private static final String COUNTER_NAME = "test-counter-";
-  private static final AggregationKind[] COUNTER_KINDS = {SUM, MAX, SET};
+  private static final AggregationKind[] COUNTER_KINDS = {SUM, MAX, MIN};
   private static final Long COUNTER_VALUE1 = 12345L;
   private static final Double COUNTER_VALUE2 = Math.PI;
-  private static final String COUNTER_VALUE3 = "value";
+  private static final Long COUNTER_VALUE3 = -389L;
 
   @Rule
   public final ExpectedException thrown = ExpectedException.none();
@@ -308,9 +308,9 @@ private static Counter<?> makeCounter(int i) {
           .addValue(COUNTER_VALUE2 + i)
           .addValue(COUNTER_VALUE2 + i * 3);
     } else {
-      return Counter.strings(COUNTER_NAME + i, COUNTER_KINDS[2])
+      return Counter.longs(COUNTER_NAME + i, COUNTER_KINDS[2])
           .addValue(COUNTER_VALUE3 + i)
-          .addValue(COUNTER_NAME + i * 5);
+          .addValue(COUNTER_VALUE3 + i * 5);
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterSetTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterSetTest.java
index 5eb7dd1a4bc9d..d3f7a23cf3067 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterSetTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterSetTest.java
@@ -17,7 +17,6 @@
 package com.google.cloud.dataflow.sdk.util.common;
 
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MAX;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SET;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.junit.Assert.assertFalse;
@@ -136,7 +135,7 @@ public void testAddCounterMutatorAddEqualCounterReusesCounter() {
   @Test
   public void testAddCounterMutatorIncompatibleTypesThrowsException() {
     Counter<?> c1 = Counter.longs("c1", SUM);
-    Counter<?> c1Incompatible = Counter.longs("c1", SET);
+    Counter<?> c1Incompatible = Counter.longs("c1", MAX);
 
     set.getAddCounterMutator().addCounter(c1);
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
index 514eeb9f75ca6..a51ba6c687c49 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
@@ -24,7 +24,6 @@
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MIN;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.OR;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SET;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -32,8 +31,6 @@
 
 import com.google.api.services.dataflow.model.MetricUpdate;
 import com.google.cloud.dataflow.sdk.util.CloudCounterUtils;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.common.collect.Sets;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -60,7 +57,7 @@ private static MetricUpdate flush(Counter<?> c) {
   public void testNameKindAndCloudCounterRepresentation() {
     Counter<Long> c1 = Counter.longs("c1", SUM);
     Counter<Double> c2 = Counter.doubles("c2", MAX);
-    Counter<String> c3 = Counter.strings("c3", SET);
+    Counter<Double> c3 = Counter.doubles("c3", MIN);
     Counter<Double> c4 = Counter.doubles("c4", MEAN);
     Counter<Integer> c5 = Counter.ints("c5", MIN);
     Counter<Boolean> c6 = Counter.booleans("c6", AND);
@@ -87,19 +84,12 @@ public void testNameKindAndCloudCounterRepresentation() {
     assertEquals(Math.PI, asDouble(cc.getScalar()), EPSILON);
 
     assertEquals("c3", c3.getName());
-    assertEquals(SET, c3.getKind());
-    cc = flush(c3); // empty sets are not sent to the service
-    assertEquals(null, cc);
-    c3.addValue("abc").addValue("e").addValue("abc");
+    assertEquals(MIN, c3.getKind());
+    c3.addValue(Math.PI).addValue(-Math.PI).addValue(-Math.sqrt(2));
     cc = flush(c3);
     assertEquals("c3", cc.getName().getName());
-    assertEquals("SET", cc.getKind());
-    @SuppressWarnings("unchecked")
-    Set<String> s = (Set<String>) cc.getSet();
-    assertEquals(2, s.size());
-    assertTrue(s.containsAll(Arrays.asList(
-        CloudObject.forString("e"),
-        CloudObject.forString("abc"))));
+    assertEquals("MIN", cc.getKind());
+    assertEquals(-Math.PI, asDouble(cc.getScalar()), EPSILON);
 
     assertEquals("c4", c4.getName());
     assertEquals(MEAN, c4.getKind());
@@ -160,8 +150,6 @@ public void testCompatibility() {
         Counter.ints("c", SUM).isCompatibleWith(Counter.ints("c", SUM)));
     assertTrue(
         Counter.doubles("c", SUM).isCompatibleWith(Counter.doubles("c", SUM)));
-    assertTrue(
-        Counter.strings("c", SET).isCompatibleWith(Counter.strings("c", SET)));
     assertTrue(
         Counter.booleans("c", OR).isCompatibleWith(
             Counter.booleans("c", OR)));
@@ -467,104 +455,6 @@ public void testMeanDouble() {
   }
 
 
-  // Tests for SET.
-
-  private <T> void assertSet(Set<T> total, Set<T> delta, Counter<T> c) {
-    assertTrue(total.containsAll(c.getTotalSet()));
-    assertTrue(c.getTotalSet().containsAll(total));
-    assertTrue(delta.containsAll(c.getDeltaSet()));
-    assertTrue(c.getDeltaSet().containsAll(delta));
-  }
-
-  @Test
-  public void testSetLong() {
-    Counter<Long> c = Counter.longs("set-long", SET);
-    HashSet<Long> expectedTotal = new HashSet<>();
-    HashSet<Long> expectedDelta = new HashSet<>();
-    assertSet(expectedTotal, expectedDelta, c);
-
-    c.addValue(13L).addValue(42L).addValue(13L);
-    expectedTotal = expectedDelta = Sets.newHashSet(13L, 42L);
-    assertSet(expectedTotal, expectedDelta, c);
-
-    c.resetToValue(120L).addValue(17L).addValue(37L);
-    expectedTotal = expectedDelta = Sets.newHashSet(120L, 17L, 37L);
-    assertSet(expectedTotal, expectedDelta, c);
-
-    flush(c);
-    expectedDelta = new HashSet<>();
-    assertSet(expectedTotal, expectedDelta, c);
-
-    c.addValue(42L).addValue(18L);
-    expectedTotal.addAll(Arrays.asList(42L, 18L));
-    expectedDelta = Sets.newHashSet(42L, 18L);
-    assertSet(expectedTotal, expectedDelta, c);
-
-    c.resetToValue(100L).addValue(171L).addValue(49L);
-    expectedTotal = expectedDelta = Sets.newHashSet(100L, 171L, 49L);
-    assertSet(expectedTotal, expectedDelta, c);
-  }
-
-  @Test
-  public void testSetDouble() {
-    Counter<Double> c = Counter.doubles("set-double", SET);
-    HashSet<Double> expectedTotal = new HashSet<>();
-    HashSet<Double> expectedDelta = new HashSet<>();
-    assertSet(expectedTotal, expectedDelta, c);
-
-    c.addValue(Math.E).addValue(Math.PI);
-    expectedTotal = expectedDelta = Sets.newHashSet(Math.E, Math.PI);
-    assertSet(expectedTotal, expectedDelta, c);
-
-    c.resetToValue(Math.sqrt(12345)).addValue(2 * Math.PI).addValue(3 * Math.E);
-    expectedTotal =
-        expectedDelta = Sets.newHashSet(Math.sqrt(12345), 2 * Math.PI, 3 * Math.E);
-    assertSet(expectedTotal, expectedDelta, c);
-
-    flush(c);
-    expectedDelta = new HashSet<>();
-    assertSet(expectedTotal, expectedDelta, c);
-
-    c.addValue(7 * Math.PI).addValue(5 * Math.E);
-    expectedTotal.addAll(Arrays.asList(7 * Math.PI, 5 * Math.E));
-    expectedDelta = Sets.newHashSet(7 * Math.PI, 5 * Math.E);
-    assertSet(expectedTotal, expectedDelta, c);
-
-    c.resetToValue(Math.sqrt(17)).addValue(171.0).addValue(0.0);
-    expectedTotal = expectedDelta = Sets.newHashSet(Math.sqrt(17), 171.0, 0.0);
-    assertSet(expectedTotal, expectedDelta, c);
-  }
-
-  @Test
-  public void testSetString() {
-    Counter<String> c = Counter.strings("set-string", SET);
-    HashSet<String> expectedTotal = new HashSet<>();
-    HashSet<String> expectedDelta = new HashSet<>();
-    assertSet(expectedTotal, expectedDelta, c);
-
-    c.addValue("a").addValue("b").addValue("a");
-    expectedTotal = expectedDelta = Sets.newHashSet("a", "b");
-    assertSet(expectedTotal, expectedDelta, c);
-
-    c.resetToValue("c").addValue("d").addValue("e");
-    expectedTotal = expectedDelta = Sets.newHashSet("c", "d", "e");
-    assertSet(expectedTotal, expectedDelta, c);
-
-    flush(c);
-    expectedDelta = new HashSet<>();
-    assertSet(expectedTotal, expectedDelta, c);
-
-    c.addValue("b").addValue("f");
-    expectedTotal.addAll(Arrays.asList("b", "f"));
-    expectedDelta = Sets.newHashSet("b", "f");
-    assertSet(expectedTotal, expectedDelta, c);
-
-    c.resetToValue("g").addValue("h").addValue("i");
-    expectedTotal = expectedDelta = Sets.newHashSet("g", "h", "i");
-    assertSet(expectedTotal, expectedDelta, c);
-  }
-
-
   // Test for AND and OR.
 
   private void assertBool(boolean total, boolean delta, Counter<Boolean> c) {
@@ -648,46 +538,21 @@ public void testSumBool() {
     Counter.booleans("counter", SUM);
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testSumString() {
-    Counter.strings("counter", SUM);
-  }
-
   @Test(expected = IllegalArgumentException.class)
   public void testMinBool() {
     Counter.booleans("counter", MIN);
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testMinString() {
-    Counter.strings("counter", MIN);
-  }
-
   @Test(expected = IllegalArgumentException.class)
   public void testMaxBool() {
     Counter.booleans("counter", MAX);
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testMaxString() {
-    Counter.strings("counter", MAX);
-  }
-
   @Test(expected = IllegalArgumentException.class)
   public void testMeanBool() {
     Counter.booleans("counter", MEAN);
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testMeanString() {
-    Counter.strings("counter", MEAN);
-  }
-
-  @Test(expected = IllegalArgumentException.class)
-  public void testSetBool() {
-    Counter.booleans("counter", SET);
-  }
-
   @Test(expected = IllegalArgumentException.class)
   public void testAndLong() {
     Counter.longs("counter", AND);
@@ -698,11 +563,6 @@ public void testAndDouble() {
     Counter.doubles("counter", AND);
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testAndString() {
-    Counter.strings("counter", AND);
-  }
-
   @Test(expected = IllegalArgumentException.class)
   public void testOrLong() {
     Counter.longs("counter", OR);
@@ -713,16 +573,10 @@ public void testOrDouble() {
     Counter.doubles("counter", OR);
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testOrString() {
-    Counter.strings("counter", OR);
-  }
-
   @Test
   public void testExtraction() {
     Counter<?>[] counters = {Counter.longs("c1", SUM),
-                             Counter.doubles("c2", MAX),
-                             Counter.strings("c3", SET)};
+                             Counter.doubles("c2", MAX)};
     CounterSet set = new CounterSet();
     for (Counter<?> c : counters) {
       set.addCounter(c);
@@ -735,6 +589,6 @@ public void testExtraction() {
         new HashSet<>(CounterTestUtils.extractCounterUpdates(Arrays.asList(counters), true));
 
     assertEquals(cloudCountersFromSet, cloudCountersFromArray);
-    assertEquals(2, cloudCountersFromSet.size()); // empty set was ignored
+    assertEquals(2, cloudCountersFromSet.size());
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java
index 2aa6d864e590d..a71b9972b9347 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java
@@ -28,7 +28,6 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
-import java.util.Set;
 
 /**
  * Utilities for testing {@link Counter}s.
@@ -90,14 +89,6 @@ public static <T> long getDeltaCount(Counter<T> counter) {
     return counter.getDeltaCount();
   }
 
-  public static <T> Set<T> getTotalSet(Counter<T> counter) {
-    return counter.getTotalSet();
-  }
-
-  public static <T> Set<T> getDeltaSet(Counter<T> counter) {
-    return counter.getDeltaSet();
-  }
-
   /**
    * A utility method that passes the given (unencoded) elements through
    * coder's registerByteSizeObserver() and encode() methods, and confirms

From a043295588fe91551248f09fc6979832c23737ff Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Mon, 6 Apr 2015 22:51:13 -0700
Subject: [PATCH 0391/1541] Datastore Sink implementation.

----Release Notes----
The DatastoreIO Sink API has changed in a backwards incompatible way.  Instead of using p.apply(DatastoreIO.Write.to(datasetId)) to write to Datastore, you will use p.apply(DatastoreIO.writeTo(datasetId)). In addition, entities written to the Datastore Sink must have complete keys.
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90481437
---
 .../cloud/dataflow/examples/AutoComplete.java |   2 +-
 .../dataflow/examples/DatastoreWordCount.java |  18 +-
 .../cloud/dataflow/sdk/io/DatastoreIO.java    | 393 +++++++++++-------
 .../runners/DataflowPipelineTranslator.java   |   5 -
 .../dataflow/DatastoreIOTranslator.java       |  41 --
 .../cloud/dataflow/sdk/transforms/Write.java  |  17 +-
 .../dataflow/sdk/io/DatastoreIOTest.java      | 127 +++++-
 7 files changed, 383 insertions(+), 220 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DatastoreIOTranslator.java

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
index c6524b01a9cd0..ac407264749eb 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
@@ -483,7 +483,7 @@ public static void main(String[] args) {
     if (options.getOutputDataset() != null) {
       toWrite
         .apply(ParDo.of(new FormatForDatastore(options.getKind())))
-        .apply(DatastoreIO.write().to(options.getOutputDataset()));
+        .apply(DatastoreIO.writeTo(options.getOutputDataset()));
     }
 
     // Run the pipeline.
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
index 8a14a01633dd7..fd44949a61295 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
@@ -31,12 +31,11 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.options.Validation;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 
 import java.util.Map;
+import java.util.UUID;
 
 /**
  * A WordCount example using DatastoreIO.
@@ -103,7 +102,7 @@ public Entity makeEntity(String content) {
       Entity.Builder entityBuilder = Entity.newBuilder();
       // Create entities with same ancestor Key.
       Key ancestorKey = DatastoreHelper.makeKey(kind, "root").build();
-      Key key = DatastoreHelper.makeKey(ancestorKey, kind).build();
+      Key key = DatastoreHelper.makeKey(ancestorKey, kind, UUID.randomUUID().toString()).build();
 
       entityBuilder.setKey(key);
       entityBuilder.addProperty(Property.newBuilder().setName("content")
@@ -158,23 +157,12 @@ public static interface Options extends PipelineOptions {
    * text input.  Forces use of DirectPipelineRunner for local execution mode.
    */
   public static void writeDataToDatastore(Options options) {
-    // Storing the user-specified runner.
-    Class<? extends PipelineRunner<?>> tempRunner = options.getRunner();
-
-    try {
-      // Runs locally via DirectPiplineRunner, as writing is not yet implemented
-      // for the other runners.
-      options.setRunner(DirectPipelineRunner.class);
       Pipeline p = Pipeline.create(options);
       p.apply(TextIO.Read.named("ReadLines").from(options.getInput()))
        .apply(ParDo.of(new CreateEntityFn(options.getKind())))
-       .apply(DatastoreIO.write().to(options.getDataset()));
+       .apply(DatastoreIO.writeTo(options.getDataset()));
 
       p.run();
-    } finally {
-      // Resetting the runner to the user specified class.
-      options.setRunner(tempRunner);
-    }
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index e626f949e8b94..6a3a32df23b3c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -26,10 +26,10 @@
 
 import com.google.api.client.auth.oauth2.Credential;
 import com.google.api.services.datastore.DatastoreV1;
-import com.google.api.services.datastore.DatastoreV1.BeginTransactionRequest;
-import com.google.api.services.datastore.DatastoreV1.BeginTransactionResponse;
 import com.google.api.services.datastore.DatastoreV1.CommitRequest;
 import com.google.api.services.datastore.DatastoreV1.Entity;
+import com.google.api.services.datastore.DatastoreV1.Key;
+import com.google.api.services.datastore.DatastoreV1.Key.PathElement;
 import com.google.api.services.datastore.DatastoreV1.Query;
 import com.google.api.services.datastore.client.Datastore;
 import com.google.api.services.datastore.client.DatastoreException;
@@ -39,16 +39,16 @@
 import com.google.api.services.datastore.client.QuerySplitter;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.EntityCoder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
+import com.google.cloud.dataflow.sdk.io.Sink.WriteOperation;
+import com.google.cloud.dataflow.sdk.io.Sink.Writer;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.GcpOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.Write;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.RetryHttpRequestInitializer;
 import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Supplier;
 
@@ -56,8 +56,8 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.io.Serializable;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.NoSuchElementException;
@@ -68,13 +68,11 @@
  * entities.
  *
  * <p> The DatastoreIO class provides an experimental API to Read and Write a
- * {@link PCollection} of Datastore Entity.  Currently the class supports
- * read operations on both the DirectPipelineRunner and DataflowPipelineRunner,
- * and write operations on the DirectPipelineRunner.  This API is subject to
+ * {@link PCollection} of Datastore Entity.  This API is subject to
  * change, and currently requires an authentication workaround described below.
  *
  * <p> Datastore is a fully managed NoSQL data storage service.
- * An Entity is an object in Datastore, analogous to the a row in traditional
+ * An Entity is an object in Datastore, analogous to a row in traditional
  * database table.  DatastoreIO supports Read/Write from/to Datastore within
  * Dataflow SDK service.
  *
@@ -120,23 +118,33 @@
  * p.run();
  * } </pre>
  *
- * <p> To write a {@link PCollection} to a datastore, use
- * {@link DatastoreIO.Sink}, specifying {@link DatastoreIO.Sink#to} to specify
- * the datastore to write to, and optionally {@link TextIO.Write#named} to specify
- * the name of the pipeline step.  For example:
+ * <p>To write a {@link PCollection} to a Datastore, use {@link DatastoreIO#writeTo},
+ * specifying the datastore to write to:
  *
  * <pre> {@code
- * // A simple Write to Datastore with DirectPipelineRunner (writing is not
- * // yet implemented for other runners):
  * PCollection<Entity> entities = ...;
- * lines.apply(DatastoreIO.Write.to("Write entities", datastore));
+ * entities.apply(DatastoreIO.writeTo(dataset));
  * p.run();
+ * } </pre>
+ *
+ * <p>To optionally change the host that is used to write to the Datastore, use {@link
+ * DatastoreIO#sink} to build a DatastoreIO {@link Sink} and write to it using the {@link Write}
+ * transform:
  *
+ * <pre> {@code
+ * PCollection<Entity> entities = ...;
+ * entities.apply(Write.to(DatastoreIO.sink().withDataset(dataset).withHost(host)));
+ * p.run();
  * } </pre>
+ *
+ * <p>Entities in the PCollection to be written must have complete keys.  Complete keys specify the
+ * name/id of the entity, where incomplete keys do not. Entities will be committed as upsert (update
+ * or insert) mutations. Please read
+ * <a href="https://cloud.google.com/datastore/docs/concepts/entities">Entities, Properties, and
+ * Keys</a> for more information about entity keys.
  */
 
 public class DatastoreIO {
-  private static final Logger LOG = LoggerFactory.getLogger(DatastoreIO.class);
   public static final String DEFAULT_HOST = "https://www.googleapis.com";
 
   /**
@@ -174,11 +182,13 @@ public static ReadSource.Bound<Entity> readFrom(String host, String datasetId, Q
    * A source that reads the result rows of a Datastore query as {@code Entity} objects.
    */
   public static class Source extends BoundedSource<Entity> {
-    private static final long serialVersionUID = -6078498627204891522L;
+    private static final Logger LOG = LoggerFactory.getLogger(Source.class);
+    private static final long serialVersionUID = 0;
 
     String host;
     String datasetId;
     Query query;
+
     /** For testing only. */
     private QuerySplitter mockSplitter;
     private Supplier<Long> mockEstimateSizeBytes;
@@ -258,8 +268,8 @@ private long queryLatestStatisticsTimestamp(Datastore datastore) throws Datastor
 
       long now = System.currentTimeMillis();
       DatastoreV1.RunQueryResponse response = datastore.runQuery(request);
-      LOG.info("Query for latest stats timestamp of dataset {} took {}ms",
-          datasetId, System.currentTimeMillis() - now);
+      LOG.info("Query for latest stats timestamp of dataset {} took {}ms", datasetId,
+          System.currentTimeMillis() - now);
       DatastoreV1.QueryResultBatch batch = response.getBatch();
       if (batch.getEntityResultCount() == 0) {
         throw new NoSuchElementException(
@@ -303,8 +313,7 @@ public List<Source> splitIntoBundles(long desiredBundleSizeBytes, PipelineOption
 
     @Override
     public BoundedReader<Entity> createReader(
-        PipelineOptions pipelineOptions, ExecutionContext executionContext)
-        throws IOException {
+        PipelineOptions pipelineOptions, ExecutionContext executionContext) throws IOException {
       return new DatastoreReader(this, getDatastore(pipelineOptions));
     }
 
@@ -363,176 +372,274 @@ public String toString() {
 
   /**
    * Returns a new {@link DatastoreIO.Sink} builder using the default host.
-   * You need to further configure it using {@link DatastoreIO.Sink#named},
-   * {@link DatastoreIO.Sink#to}, and optionally {@link DatastoreIO.Sink#withHost}.
+   * You need to further configure it using {@link DatastoreIO.Sink#withDataset}, and optionally
+   * {@link DatastoreIO.Sink#withHost} before using it in a {@link Write} transform.
+   *
+   * <p>For example: {@code p.apply(Write.to(DatastoreIO.sink().withDataset(dataset)));}
    */
-  public static Sink write() {
-    return new Sink(DEFAULT_HOST);
+  public static Sink sink() {
+    return new Sink(DEFAULT_HOST, null);
   }
 
   /**
-   * Returns a new {@link DatastoreIO.Sink} builder using the default host and given dataset.
-   * You need to further configure it using {@link DatastoreIO.Sink#named},
-   * and optionally {@link DatastoreIO.Sink#withHost}.
+   * Returns a new {@link Write} transform that will write to a {@link Sink}.
+   *
+   * <p>For example: {@code p.apply(DatastoreIO.writeTo(dataset));}
    */
-  public static Sink writeTo(String datasetId) {
-    return write().to(datasetId);
+  public static Write.Bound<Entity> writeTo(String datasetId) {
+    return Write.to(sink().withDataset(datasetId));
   }
 
   /**
-   * A {@link PTransform} that writes a {@code PCollection<Entity>} containing
+   * A {@link Sink} that writes a {@code PCollection<Entity>} containing
    * entities to a Datastore kind.
    *
-   * <p> Current version only supports Write operation running on
-   * {@link DirectPipelineRunner}.  If Write is used on
-   * {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner},
-   * it throws {@link UnsupportedOperationException} and won't continue on the
-   * operation.
-   *
    */
-  @SuppressWarnings("serial")
-  public static class Sink extends PTransform<PCollection<Entity>, PDone> {
-    String host;
-    String datasetId;
+  public static class Sink extends com.google.cloud.dataflow.sdk.io.Sink<Entity> {
+    private static final long serialVersionUID = 0;
+
+    final String host;
+    final String datasetId;
 
     /**
-     * Returns a DatastoreIO.Write PTransform with given host.
+     * Returns a Sink that is like this one, but will write to the specified dataset.
      */
-    Sink(String host) {
-      this.host = host;
+    public Sink withDataset(String datasetId) {
+      return new Sink(host, datasetId);
     }
 
     /**
-     * Returns a DatastoreIO.Write.Bound object.
-     * Sets the name, datastore agent, and kind associated
-     * with this transformation.
+     * Returns a Sink that is like this one, but will use the given host.  If not specified,
+     * the {@link DatastoreIO#DEFAULT_HOST default host} will be used.
      */
-    Sink(String name, String host, String datasetId) {
-      super(name);
-      this.host = host;
-      this.datasetId = datasetId;
+    public Sink withHost(String host) {
+      return new Sink(host, datasetId);
     }
 
     /**
-     * Returns a DatastoreIO.Write PTransform with the name
-     * associated with this PTransform.
+     * Constructs a Sink with given host and dataset.
      */
-    public Sink named(String name) {
-      return new Sink(name, host, datasetId);
+    protected Sink(String host, String datasetId) {
+      this.host = host;
+      this.datasetId = datasetId;
     }
 
     /**
-     * Returns a DatastoreIO.Write PTransform with given datasetId.
+     * Ensures the host and dataset are set.
      */
-    public Sink to(String datasetId) {
-      return new Sink(name, host, datasetId);
+    @Override
+    public void validate(PipelineOptions options) {
+      Preconditions.checkNotNull(
+          host, "Host is a required parameter. Please use withHost to set the host.");
+      Preconditions.checkNotNull(
+          datasetId, "Dataset id is a required parameter. Please use to to set the datasetId.");
+    }
+
+    @Override
+    public DatastoreWriteOperation createWriteOperation(PipelineOptions options) {
+      return new DatastoreWriteOperation(this);
     }
+  }
+
+  /**
+   * A {@link WriteOperation} that will manage a parallel write to a Datastore sink.
+   */
+  private static class DatastoreWriteOperation
+      extends WriteOperation<Entity, DatastoreWriteResult> {
+    private static final long serialVersionUID = 0;
+    private static final Logger LOG = LoggerFactory.getLogger(DatastoreWriteOperation.class);
+
+    private final DatastoreIO.Sink sink;
+
+    public DatastoreWriteOperation(DatastoreIO.Sink sink) {
+      this.sink = sink;
+    }
+
+    @Override
+    public Coder<DatastoreWriteResult> getWriterResultCoder() {
+      return SerializableCoder.of(DatastoreWriteResult.class);
+    }
+
+    @Override
+    public void initialize(PipelineOptions options) throws Exception {}
 
     /**
-     * Returns a new DatastoreIO.Write PTransform with specified host.
+     * Finalizes the write.  Logs the number of entities written to the Datastore.
      */
-    public Sink withHost(String host) {
-      return new Sink(name, host, datasetId);
+    @Override
+    public void finalize(Iterable<DatastoreWriteResult> writerResults, PipelineOptions options)
+        throws Exception {
+      long totalEntities = 0;
+      for (DatastoreWriteResult result : writerResults) {
+        totalEntities += result.entitiesWritten;
+      }
+      LOG.info("Wrote {} elements.", totalEntities);
     }
 
     @Override
-    public PDone apply(PCollection<Entity> input) {
-      if (this.host == null || this.datasetId == null) {
-        throw new IllegalStateException("need to set Datastore host and datasetId"
-            + "of a DatastoreIO.Write transform");
+    public DatastoreWriter createWriter(PipelineOptions options) throws Exception {
+      DatastoreOptions.Builder builder =
+          new DatastoreOptions.Builder()
+              .host(sink.host)
+              .dataset(sink.datasetId)
+              .initializer(new RetryHttpRequestInitializer(null));
+      Credential credential = options.as(GcpOptions.class).getGcpCredential();
+      if (credential != null) {
+        builder.credential(credential);
       }
+      Datastore datastore = DatastoreFactory.get().create(builder.build());
 
-      return new PDone();
+      return new DatastoreWriter(this, datastore);
     }
 
     @Override
-    protected String getKindString() {
-      return "DatastoreIO.Write";
+    public DatastoreIO.Sink getSink() {
+      return sink;
     }
+  }
 
-    @Override
-    protected Coder<Void> getDefaultOutputCoder() {
-      return VoidCoder.of();
+  /**
+   * {@link Writer} that writes entities to a Datastore Sink.  Entities are written in batches,
+   * where the maximum batch size is {@link DatastoreIO#DATASTORE_BATCH_UPDATE_LIMIT}.  Entities
+   * are committed as upsert mutations (either update if the key already exists, or insert if it is
+   * a new key).  If an entity does not have a complete key (i.e., it has no name or id), the bundle
+   * will fail.
+   *
+   * <p>See <a
+   * href="https://cloud.google.com/datastore/docs/concepts/entities#Datastore_Creating_an_entity">
+   * Datastore: Entities, Properties, and Keys</a> for information about entity keys and upsert
+   * mutations.
+   *
+   * <p>Commits are non-transactional.  If a commit fails because of a conflict over an entity
+   * group, the commit will be retried (up to {@link DatastoreIO#DATASTORE_BATCH_UPDATE_LIMIT}
+   * times).
+   *
+   * <p>Visible for testing purposes.
+   */
+  static class DatastoreWriter extends Writer<Entity, DatastoreWriteResult> {
+    private static final Logger LOG = LoggerFactory.getLogger(DatastoreWriter.class);
+    private final DatastoreWriteOperation writeOp;
+    private final Datastore datastore;
+    private long totalWritten = 0;
+
+    // Visible for testing.
+    final List<Entity> entities = new ArrayList<>();
+
+    /**
+     * Since a bundle is written in batches, we should retry the commit of a batch in order to
+     * prevent transient errors from causing the bundle to fail.
+     */
+    private static final int DATASTORE_MAX_RETRIES = 5;
+
+    /**
+     * Returns true if a Datastore key is complete.  A key is complete if its last element
+     * has either an id or a name.
+     */
+    static boolean isValidKey(Key key) {
+      List<PathElement> elementList = key.getPathElementList();
+      if (elementList.isEmpty()) {
+        return false;
+      }
+      PathElement lastElement = elementList.get(elementList.size() - 1);
+      return (lastElement.hasId() || lastElement.hasName());
     }
 
-    static {
-      DirectPipelineRunner.registerDefaultTransformEvaluator(
-          Sink.class, new DirectPipelineRunner.TransformEvaluator<Sink>() {
-            @Override
-            public void evaluate(
-                Sink transform, DirectPipelineRunner.EvaluationContext context) {
-              evaluateWriteHelper(transform, context);
-            }
-          });
+    // Visible for testing
+    DatastoreWriter(DatastoreWriteOperation writeOp, Datastore datastore) {
+      this.writeOp = writeOp;
+      this.datastore = datastore;
     }
-  }
 
-  ///////////////////////////////////////////////////////////////////
+    @Override
+    public void open(String uId) throws Exception {}
 
-  /**
-   * Direct mode write evaluator.
-   * This writes the result to Datastore.
-   */
-  private static void evaluateWriteHelper(
-      Sink transform, DirectPipelineRunner.EvaluationContext context) {
-    LOG.info("Writing to Datastore");
-    GcpOptions options = context.getPipelineOptions();
-    Credential credential = options.getGcpCredential();
-    Datastore datastore = DatastoreFactory.get().create(
-        new DatastoreOptions.Builder()
-            .host(transform.host)
-            .dataset(transform.datasetId)
-            .credential(credential)
-            .initializer(new RetryHttpRequestInitializer(null))
-            .build());
-
-    List<Entity> entityList = context.getPCollection(context.getInput(transform));
-
-    // Create a map to put entities with same ancestor for writing in a batch.
-    HashMap<String, List<Entity>> map = new HashMap<>();
-    for (Entity e : entityList) {
-      String keyOfAncestor =
-          e.getKey().getPathElement(0).getKind() + e.getKey().getPathElement(0).getName();
-      List<Entity> value = map.get(keyOfAncestor);
-      if (value == null) {
-        value = new ArrayList<>();
+    /**
+     * Writes an entity to the Datastore.  Writes are batched, up to {@link
+     * DatastoreIO#DATASTORE_BATCH_UPDATE_LIMIT}. If an entity does not have a complete key, an
+     * {@link IllegalArgumentException} will be thrown.
+     */
+    @Override
+    public void write(Entity value) throws Exception {
+      // Verify that the entity to write has a complete key.
+      if (!isValidKey(value.getKey())) {
+        throw new IllegalArgumentException(
+            "Entities to be written to the Datastore must have complete keys");
       }
-      value.add(e);
-      map.put(keyOfAncestor, value);
-    }
-
-    // Walk over the map, and write entities bucket by bucket.
-    int count = 0;
-    for (String k : map.keySet()) {
-      List<Entity> entitiesWithSameAncestor = map.get(k);
-      List<Entity> toInsert = new ArrayList<>();
-      for (Entity e : entitiesWithSameAncestor) {
-        toInsert.add(e);
-        if (toInsert.size() >= DATASTORE_BATCH_UPDATE_LIMIT) {
-          writeBatch(toInsert, datastore);
-          toInsert.clear();
-        }
+
+      entities.add(value);
+
+      if (entities.size() >= DatastoreIO.DATASTORE_BATCH_UPDATE_LIMIT) {
+        flushBatch();
+      }
+    }
+
+    /**
+     * Flushes any pending batch writes and returns a DatastoreWriteResult.
+     */
+    @Override
+    public DatastoreWriteResult close() throws Exception {
+      if (entities.size() > 0) {
+        flushBatch();
       }
-      writeBatch(toInsert, datastore);
-      count += entitiesWithSameAncestor.size();
+      return new DatastoreWriteResult(totalWritten);
+    }
+
+    @Override
+    public DatastoreWriteOperation getWriteOperation() {
+      return writeOp;
     }
 
-    LOG.info("Total number of entities written: {}", count);
+    /**
+     * Writes a batch of entities to the Datastore.
+     *
+     * <p>If a commit fails, it will be retried (up to {@link DatastoreWriter#DATASTORE_MAX_RETRIES}
+     * times).  All entities in the batch will be committed again, even if the commit was partially
+     * successful. If the retry limit is exceeded, the last exception from the Datastore will be
+     * thrown.
+     *
+     * @throws DatastoreException if the commit fails.
+     */
+    private void flushBatch() throws DatastoreException {
+      int retryCount = 0;
+      LOG.debug("Writing batch of {} entities", entities.size());
+
+      retryCount = 0;
+      while (true) {
+        // Batch upsert entities.
+        try {
+          CommitRequest.Builder commitRequest = CommitRequest.newBuilder();
+          commitRequest.getMutationBuilder().addAllUpsert(entities);
+          commitRequest.setMode(CommitRequest.Mode.NON_TRANSACTIONAL);
+          datastore.commit(commitRequest.build());
+
+          totalWritten += entities.size();
+          entities.clear();
+          // Break if the commit threw no exception.
+          LOG.debug("Successfully wrote {} entities", entities.size());
+          break;
+
+        } catch (DatastoreException exception) {
+          // Only log the code and message for potentially-transient errors. The entire exception
+          // will be propagated upon the last retry.
+          LOG.error("Error writing to the Datastore ({}): {}", exception.getCode(),
+              exception.getMessage());
+          retryCount += 1;
+          if (retryCount >= DATASTORE_MAX_RETRIES) {
+            throw exception;
+          }
+          LOG.error("Committing entities: attempt {} of {}", retryCount + 1, DATASTORE_MAX_RETRIES);
+        }
+      }
+    }
   }
 
-  /**
-   * A function for batch writing to Datastore.
-   */
-  private static void writeBatch(List<Entity> listOfEntities, Datastore datastore) {
-    try {
-      BeginTransactionRequest.Builder treq = BeginTransactionRequest.newBuilder();
-      BeginTransactionResponse tres = datastore.beginTransaction(treq.build());
-      CommitRequest.Builder creq = CommitRequest.newBuilder();
-      creq.setTransaction(tres.getTransaction());
-      creq.getMutationBuilder().addAllInsertAutoId(listOfEntities);
-      datastore.commit(creq.build());
-    } catch (DatastoreException e) {
-      throw new RuntimeException("Datastore exception", e);
+  private static class DatastoreWriteResult implements Serializable {
+    private static final long serialVersionUID = 0;
+
+    final long entitiesWritten;
+
+    public DatastoreWriteResult(long recordsWritten) {
+      this.entitiesWritten = recordsWritten;
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index a0c4356faa35f..9c47975c49e82 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -46,7 +46,6 @@
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.io.AvroIO;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.io.DatastoreIO;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.io.ReadSource;
 import com.google.cloud.dataflow.sdk.io.TextIO;
@@ -55,7 +54,6 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType;
 import com.google.cloud.dataflow.sdk.runners.dataflow.AvroIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BigQueryIOTranslator;
-import com.google.cloud.dataflow.sdk.runners.dataflow.DatastoreIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.ReadSourceTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.TextIOTranslator;
@@ -976,9 +974,6 @@ private <T> void translateHelper(
     registerTransformTranslator(
         BigQueryIO.Write.Bound.class, new BigQueryIOTranslator.WriteTranslator());
 
-    registerTransformTranslator(
-        DatastoreIO.Sink.class, new DatastoreIOTranslator.WriteTranslator());
-
     registerTransformTranslator(
         PubsubIO.Read.Bound.class, new PubsubIOTranslator.ReadTranslator());
     registerTransformTranslator(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DatastoreIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DatastoreIOTranslator.java
deleted file mode 100644
index a13478b46365b..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DatastoreIOTranslator.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.dataflow;
-
-import com.google.cloud.dataflow.sdk.io.DatastoreIO;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
-
-/**
- * Datastore transform support code for the Dataflow backend.
- */
-public class DatastoreIOTranslator {
-
-  /**
-   * Implements DatastoreIO Write translation for the Dataflow backend.
-   */
-  public static class WriteTranslator implements TransformTranslator<DatastoreIO.Sink> {
-    @Override
-    public void translate(
-        DatastoreIO.Sink transform,
-        TranslationContext context) {
-      // TODO: Not implemented yet.
-      // translateWriteHelper(transform, context);
-      throw new UnsupportedOperationException("Write only supports direct mode now.");
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
index 9faa49936ed1f..f5c1c3abd9b09 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
@@ -29,10 +29,10 @@
 import java.util.UUID;
 
 /**
- * A PTransform that writes to a sink. A write begins with a sequential global initialization of a
- * sink, followed by a parallel write, and ends with a sequential finalization of the write. The
- * output of a write is {@link PDone}.  In the case of an empty PCollection, only the global
- * initialization and finalization will be performed.
+ * A PTransform that writes to a {@link Sink}. A write begins with a sequential global
+ * initialization of a sink, followed by a parallel write, and ends with a sequential finalization
+ * of the write. The output of a write is {@link PDone}.  In the case of an empty PCollection, only
+ * the global initialization and finalization will be performed.
  *
  * <p>Currently, only batch workflows can contain Write transforms.
  *
@@ -43,11 +43,18 @@
  * <p>Disclaimer: This API is experimental and may change.
  */
 public class Write {
+  /**
+   * Creates a Write transform that writes to the given Sink.
+   */
   public static <T> Bound<T> to(Sink<T> sink) {
     return new Bound<>(sink);
   }
 
-  private static class Bound<T> extends PTransform<PCollection<T>, PDone> {
+  /**
+   * A PTransform that writes to a {@link Sink}. See {@link Write} and {@link Sink} for
+   * documentation about writing to Sinks.
+   */
+  public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
     private static final long serialVersionUID = 0;
 
     private Sink<T> sink;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
index 6e368fbc514c7..b64689d3f0ff0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
@@ -16,7 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
+import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.mock;
@@ -24,26 +28,33 @@
 
 import com.google.api.services.datastore.DatastoreV1;
 import com.google.api.services.datastore.DatastoreV1.Entity;
+import com.google.api.services.datastore.DatastoreV1.Key;
 import com.google.api.services.datastore.DatastoreV1.Query;
 import com.google.api.services.datastore.client.Datastore;
 import com.google.api.services.datastore.client.DatastoreHelper;
 import com.google.api.services.datastore.client.QuerySplitter;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.EntityCoder;
+import com.google.cloud.dataflow.sdk.io.DatastoreIO.DatastoreWriter;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.Write;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
 import com.google.common.base.Supplier;
+import com.google.common.collect.Lists;
 
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
 
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 
 /**
@@ -55,6 +66,9 @@ public class DatastoreIOTest {
   private String datasetId;
   private Query query;
 
+  @Mock
+  Datastore mockDatastore;
+
   /**
    * Sets the default dataset ID as "shakespearedataset", which
    * contains two kinds of records: "food" and "shakespeare".
@@ -78,6 +92,8 @@ public void setUp() {
     Query.Builder q = Query.newBuilder();
     q.addKindBuilder().setName("shakespeare");
     this.query = q.build();
+
+    MockitoAnnotations.initMocks(this);
   }
 
   /**
@@ -116,8 +132,8 @@ public void testQuerySplitWithMockSplitter() throws Exception {
         DatastoreV1.KindExpression.newBuilder().setName("mykind").build();
     Query query = Query.newBuilder().addKind(mykind).build();
 
-    DataflowPipelineOptions options = PipelineOptionsFactory.create()
-        .as(DataflowPipelineOptions.class);
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
 
     List<Query> mockSplits = new ArrayList<>();
@@ -156,26 +172,117 @@ public Long get() {
     }
   }
 
+  /**
+   * Test building a Sink using builder methods.
+   */
   @Test
   public void testBuildWrite() throws Exception {
-    DatastoreIO.Sink sink = DatastoreIO.write().to(this.datasetId).withHost(this.host);
+    DatastoreIO.Sink sink = DatastoreIO.sink().withDataset(this.datasetId).withHost(this.host);
+    assertEquals(this.host, sink.host);
+    assertEquals(this.datasetId, sink.datasetId);
+
+    sink = DatastoreIO.sink().withHost(this.host).withDataset(this.datasetId);
+    assertEquals(this.host, sink.host);
+    assertEquals(this.datasetId, sink.datasetId);
+
+    sink = DatastoreIO.sink().withDataset(this.datasetId).withHost(this.host);
     assertEquals(this.host, sink.host);
     assertEquals(this.datasetId, sink.datasetId);
   }
 
+  /**
+   * Test building a sink using the default host.
+   */
   @Test
-  public void testBuildWriteAlt() throws Exception {
-    DatastoreIO.Sink sink = DatastoreIO.write().withHost(this.host).to(this.datasetId);
-    assertEquals(this.host, sink.host);
+  public void testBuildWriteDefaults() throws Exception {
+    DatastoreIO.Sink sink = DatastoreIO.sink().withDataset(this.datasetId);
+    assertEquals(DatastoreIO.DEFAULT_HOST, sink.host);
+    assertEquals(this.datasetId, sink.datasetId);
+
+    sink = DatastoreIO.sink().withDataset(this.datasetId);
+    assertEquals(DatastoreIO.DEFAULT_HOST, sink.host);
     assertEquals(this.datasetId, sink.datasetId);
   }
 
-  @Test(expected = IllegalStateException.class)
+  /**
+   * Test building an invalid sink.
+   */
+  @Test(expected = NullPointerException.class)
   public void testBuildWriteWithoutDatastoreToCatchException() throws Exception {
     // create pipeline and run the pipeline to get result
     Pipeline p = DirectPipeline.createForTest();
-    p.apply(Create.<Entity>of())
-        .setCoder(EntityCoder.of())
-        .apply(DatastoreIO.write().named("WriteDatastore"));
+    p.apply(Create.<Entity>of()).setCoder(EntityCoder.of()).apply(Write.to(DatastoreIO.sink()));
+  }
+
+  /**
+   * Test the detection of complete and incomplete keys.
+   */
+  @Test
+  public void testHasNameOrId() {
+    Key key;
+    // Complete with name, no ancestor
+    key = DatastoreHelper.makeKey("bird", "finch").build();
+    assertTrue(DatastoreWriter.isValidKey(key));
+
+    // Complete with id, no ancestor
+    key = DatastoreHelper.makeKey("bird", 123).build();
+    assertTrue(DatastoreWriter.isValidKey(key));
+
+    // Incomplete, no ancestor
+    key = DatastoreHelper.makeKey("bird").build();
+    assertFalse(DatastoreWriter.isValidKey(key));
+
+    // Complete with name and ancestor
+    key = DatastoreHelper.makeKey("bird", "owl").build();
+    key = DatastoreHelper.makeKey(key, "bird", "horned").build();
+    assertTrue(DatastoreWriter.isValidKey(key));
+
+    // Complete with id and ancestor
+    key = DatastoreHelper.makeKey("bird", "owl").build();
+    key = DatastoreHelper.makeKey(key, "bird", 123).build();
+    assertTrue(DatastoreWriter.isValidKey(key));
+
+    // Incomplete with ancestor
+    key = DatastoreHelper.makeKey("bird", "owl").build();
+    key = DatastoreHelper.makeKey(key, "bird").build();
+    assertFalse(DatastoreWriter.isValidKey(key));
+
+    key = DatastoreHelper.makeKey().build();
+    assertFalse(DatastoreWriter.isValidKey(key));
+  }
+
+  /**
+   * Test that entities with incomplete keys cannot be updated.
+   */
+  @Test(expected = IllegalArgumentException.class)
+  public void testAddEntitiesWithIncompleteKeys() throws Exception {
+    Key key = DatastoreHelper.makeKey("bird").build();
+    Entity entity = Entity.newBuilder().setKey(key).build();
+    DatastoreWriter writer = new DatastoreIO.DatastoreWriter(null, mockDatastore);
+    writer.write(entity);
+  }
+
+  /**
+   * Test that entities are added to the batch to update.
+   */
+  @Test
+  public void testAddingEntities() throws Exception {
+    List<Entity> expected = Lists.newArrayList(
+        Entity.newBuilder().setKey(DatastoreHelper.makeKey("bird", "jay").build()).build(),
+        Entity.newBuilder().setKey(DatastoreHelper.makeKey("bird", "condor").build()).build(),
+        Entity.newBuilder().setKey(DatastoreHelper.makeKey("bird", "robin").build()).build());
+
+    List<Entity> allEntities = new ArrayList<>();
+    allEntities.addAll(expected);
+    Collections.shuffle(allEntities);
+
+    DatastoreWriter writer = new DatastoreIO.DatastoreWriter(null, mockDatastore);
+    writer.open("test_id");
+    for (Entity entity : allEntities) {
+      writer.write(entity);
+    }
+
+    assertEquals(expected.size(), writer.entities.size());
+    assertThat(writer.entities, containsInAnyOrder(expected.toArray()));
   }
 }

From 6b017295c161494d7bd2cbe0bb51014d91d0b2e3 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Tue, 7 Apr 2015 10:39:46 -0700
Subject: [PATCH 0392/1541] Periodically expire side input cache entries so
 that we get new values as they are updated

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90523932
---
 .../java/com/google/cloud/dataflow/sdk/util/StateFetcher.java    | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
index 534d785ddfebc..50ee26a22b316 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
@@ -58,6 +58,7 @@ public StateFetcher(WindmillServerStub server) {
     this(server, CacheBuilder
         .newBuilder()
         .maximumWeight(100000000 /* 100 MB */)
+        .expireAfterWrite(1, TimeUnit.MINUTES)
         .weigher(new Weigher<SideInputId, SideInputCacheEntry>() {
               @Override
               public int weigh(SideInputId id, SideInputCacheEntry entry) {

From e9e3e9b5fd0dbb3426cd41bc0b1059cbe6653f1d Mon Sep 17 00:00:00 2001
From: relax <relax@google.com>
Date: Tue, 7 Apr 2015 13:03:26 -0700
Subject: [PATCH 0393/1541] Add the ability to set the Coder used by PubsubIO.

----Release Notes----
This CL adds a generic type parameter to PubsubIO.Bound. Any code that explicitly stored references to PubsubIO.Bound must now store references to PubsubIO.Bound<T>.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90538405
---
 .../sdk/coders/TextualIntegerCoder.java       |   2 +-
 .../cloud/dataflow/sdk/io/PubsubIO.java       | 195 +++++++++++-------
 .../runners/dataflow/PubsubIOTranslator.java  |  19 +-
 3 files changed, 130 insertions(+), 86 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
index 4a1d5ac1a99b2..f4010621277f1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
@@ -35,7 +35,7 @@ public static TextualIntegerCoder of() {
 
   /////////////////////////////////////////////////////////////////////////////
 
-  private TextualIntegerCoder() {}
+  protected TextualIntegerCoder() {}
 
   @Override
   public void encode(Integer value, OutputStream outStream, Context context)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index ef4b7e211bace..d7ff03ff16c60 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -28,20 +28,20 @@
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import javax.annotation.Nullable;
+
 /**
  * Read and Write transforms for Pub/Sub streams. These transforms create
  * and consume unbounded {@link com.google.cloud.dataflow.sdk.values.PCollection}s.
  *
- * <p> {@code PubsubIO} is experimental.  It is only usable
+ * <p> {@code PubsubIO}  is only usable
  * with the {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner}
  * and requires
  * {@link com.google.cloud.dataflow.sdk.options.StreamingOptions#setStreaming(boolean)}
  * to be enabled.
- *
- * <p> You should expect this class to change significantly in future versions of the SDK
- * or be removed entirely.
  */
 public class PubsubIO {
+  public static final Coder<String> DEFAULT_PUBSUB_CODER = StringUtf8Coder.of();
 
   /**
    * Project IDs must contain 6-63 lowercase letters, digits, or dashes.
@@ -133,10 +133,9 @@ private static void validatePubsubName(String name) {
    * returns a {@code PCollection<String>} containing the items from
    * the stream.
    */
-  // TODO: Support non-String encodings.
   public static class Read {
-    public static Bound named(String name) {
-      return new Bound().named(name);
+    public static Bound<String> named(String name) {
+      return new Bound<>(DEFAULT_PUBSUB_CODER).named(name);
     }
 
     /**
@@ -160,8 +159,8 @@ public static Bound named(String name) {
      * started. Any data published on the topic before the pipeline is started will not be read
      * by Dataflow.
      */
-    public static Bound topic(String topic) {
-      return new Bound().topic(topic);
+    public static Bound<String> topic(String topic) {
+      return new Bound<>(DEFAULT_PUBSUB_CODER).topic(topic);
     }
 
     /**
@@ -182,8 +181,8 @@ public static Bound topic(String topic) {
      * <li>Cannot begin with 'goog' prefix.</li>
      * </ul>
      */
-    public static Bound subscription(String subscription) {
-      return new Bound().subscription(subscription);
+    public static Bound<String> subscription(String subscription) {
+      return new Bound<>(DEFAULT_PUBSUB_CODER).subscription(subscription);
     }
 
     /**
@@ -202,8 +201,8 @@ public static Bound subscription(String subscription) {
      * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} for additional information on
      * late data and windowing.
      */
-    public static Bound timestampLabel(String timestampLabel) {
-      return new Bound().timestampLabel(timestampLabel);
+    public static Bound<String> timestampLabel(String timestampLabel) {
+      return new Bound<>(DEFAULT_PUBSUB_CODER).timestampLabel(timestampLabel);
     }
 
     /**
@@ -214,8 +213,8 @@ public static Bound timestampLabel(String timestampLabel) {
      * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}
      * for additional information on late data and windowing.
      */
-    public static Bound dropLateData(boolean dropLateData) {
-      return new Bound().dropLateData(dropLateData);
+    public static Bound<String> dropLateData(boolean dropLateData) {
+      return new Bound<>(DEFAULT_PUBSUB_CODER).dropLateData(dropLateData);
     }
 
     /**
@@ -228,8 +227,22 @@ public static Bound dropLateData(boolean dropLateData) {
      * delivered on the PubSub stream. In this case,  deduplication of the stream will be
      * stricly best effort.
      */
-    public static Bound idLabel(String idLabel) {
-      return new Bound().idLabel(idLabel);
+    public static Bound<String> idLabel(String idLabel) {
+      return new Bound<>(DEFAULT_PUBSUB_CODER).idLabel(idLabel);
+    }
+
+   /**
+     * Creates and returns a PubsubIO.Read PTransform that uses the given
+     * {@code Coder<T>} to decode PubSub record into a value of type {@code T}.
+     *
+     * <p> By default, uses {@link StringUtf8Coder}, which just
+     * returns the text lines as Java strings.
+     *
+     * @param <T> the type of the decoded elements, and the elements
+     * of the resulting PCollection.
+     */
+    public static <T> Bound<T> withCoder(Coder<T> coder) {
+      return new Bound<>(coder);
     }
 
     /**
@@ -237,28 +250,28 @@ public static Bound idLabel(String idLabel) {
      * a unbounded PCollection containing the items from the stream.
      */
     @SuppressWarnings("serial")
-    public static class Bound
-        extends PTransform<PInput, PCollection<String>> {
+    public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
       /** The Pubsub topic to read from. */
       String topic;
       /** The Pubsub subscription to read from. */
       String subscription;
       /** The Pubsub label to read timestamps from. */
       String timestampLabel;
+      /** If true, late data will be dropped. */
       Boolean dropLateData;
-      /** This is set for backwards compatibility with old services. If dropLateData is not
-       * explicitly called, then we won't forward that parameter to the service. */
-      Boolean dropLateDataExplicit;
       /** The Pubsub label to read ids from. */
       String idLabel;
+      /** The coder used to decode each record. */
+      @Nullable
+      final Coder<T> coder;
 
-      Bound() {
+      Bound(Coder<T> coder) {
         this.dropLateData = true;
-        this.dropLateDataExplicit = false;
+        this.coder = coder;
       }
 
       Bound(String name, String subscription, String topic, String timestampLabel,
-          boolean dropLateData, boolean dropLateDataExplicit, String idLabel) {
+          boolean dropLateData, Coder<T> coder, String idLabel) {
         super(name);
         if (subscription != null) {
           Validator.validateSubscriptionName(subscription);
@@ -270,7 +283,7 @@ public static class Bound
         this.topic = topic;
         this.timestampLabel = timestampLabel;
         this.dropLateData = dropLateData;
-        this.dropLateDataExplicit = dropLateDataExplicit;
+        this.coder = coder;
         this.idLabel = idLabel;
       }
 
@@ -278,57 +291,65 @@ public static class Bound
        * Returns a new PubsubIO.Read PTransform that's like this one but with the given
        * step name. Does not modify the object.
        */
-      public Bound named(String name) {
-        return new Bound(name, subscription, topic, timestampLabel, dropLateData,
-            dropLateDataExplicit, idLabel);
+      public Bound<T> named(String name) {
+        return new Bound<>(name, subscription, topic, timestampLabel, dropLateData,
+            coder, idLabel);
       }
 
       /**
        * Returns a new PubsubIO.Read PTransform that's like this one but reading from the
        * given subscription. Does not modify the object.
        */
-      public Bound subscription(String subscription) {
-        return new Bound(name, subscription, topic, timestampLabel, dropLateData,
-            dropLateDataExplicit, idLabel);
+      public Bound<T> subscription(String subscription) {
+        return new Bound<>(name, subscription, topic, timestampLabel, dropLateData, coder, idLabel);
       }
 
       /**
        * Returns a new PubsubIO.Read PTransform that's like this one but reading from the
        * give topic. Does not modify the object.
        */
-      public Bound topic(String topic) {
-        return new Bound(name, subscription, topic, timestampLabel, dropLateData,
-            dropLateDataExplicit, idLabel);
+      public Bound<T> topic(String topic) {
+        return new Bound<>(name, subscription, topic, timestampLabel, dropLateData, coder, idLabel);
       }
 
       /**
        * Returns a new PubsubIO.Read PTransform that's like this one but reading timestamps
        * from the given PubSub label. Does not modify the object.
        */
-      public Bound timestampLabel(String timestampLabel) {
-        return new Bound(name, subscription, topic, timestampLabel, dropLateData,
-            dropLateDataExplicit, idLabel);
+      public Bound<T> timestampLabel(String timestampLabel) {
+        return new Bound<>(name, subscription, topic, timestampLabel, dropLateData, coder, idLabel);
       }
 
       /**
        * Returns a new PubsubIO.Read PTransform that's like this one but with the specified
        * setting for dropLateData. Does not modify the object.
        */
-      public Bound dropLateData(boolean dropLateData) {
-        return new Bound(name, subscription, topic, timestampLabel, dropLateData, true, idLabel);
+      public Bound<T> dropLateData(boolean dropLateData) {
+        return new Bound<>(name, subscription, topic, timestampLabel, dropLateData, coder, idLabel);
       }
 
       /**
        * Returns a new PubsubIO.Read PTransform that's like this one but reading unique ids
        * from the given PubSub label. Does not modify the object.
        */
-      public Bound idLabel(String idLabel) {
-        return new Bound(name, subscription, topic, timestampLabel, dropLateData,
-            dropLateDataExplicit, idLabel);
+      public Bound<T> idLabel(String idLabel) {
+        return new Bound<>(name, subscription, topic, timestampLabel, dropLateData, coder, idLabel);
+      }
+
+      /**
+       * Returns a new PubsubIO.Read PTransform that's like this one but that uses the given
+       * {@code Coder<T1>} to decode each record into a value of type {@code T1}.  Does not modify
+       * this object.
+       *
+       * @param <T1> the type of the decoded elements, and the
+       * elements of the resulting PCollection.
+       */
+      public <T1> Bound<T1> withCoder(Coder<T1> coder) {
+        return new Bound<>(name, subscription, topic, timestampLabel, dropLateData, coder, idLabel);
       }
 
       @Override
-      public PCollection<String> apply(PInput input) {
+      public PCollection<T> apply(PInput input) {
         if (topic == null && subscription == null) {
           throw new IllegalStateException(
               "need to set either the topic or the subscription for "
@@ -339,12 +360,13 @@ public PCollection<String> apply(PInput input) {
               "Can't set both the topic and the subscription for a "
               + "PubsubIO.Read transform");
         }
-        return PCollection.<String>createPrimitiveOutputInternal(WindowingStrategy.globalDefault());
+        return PCollection.<T>createPrimitiveOutputInternal(WindowingStrategy.globalDefault())
+            .setCoder(coder);
       }
 
       @Override
-      protected Coder<String> getDefaultOutputCoder() {
-        return StringUtf8Coder.of();
+      protected Coder<T> getDefaultOutputCoder() {
+        return coder;
       }
 
       @Override
@@ -366,10 +388,6 @@ public boolean getDropLateData() {
         return dropLateData;
       }
 
-      public boolean getDropLateDataExplicit() {
-        return dropLateDataExplicit;
-      }
-
       public String getIdLabel() {
         return idLabel;
       }
@@ -390,16 +408,16 @@ public String getIdLabel() {
    */
   // TODO: Support non-String encodings.
   public static class Write {
-    public static Bound named(String name) {
-      return new Bound().named(name);
+    public static Bound<String> named(String name) {
+      return new Bound<>(DEFAULT_PUBSUB_CODER).named(name);
     }
 
     /** The topic to publish to.
      * Cloud Pubsub topic names should be {@code /topics/<project>/<topic>},
      * where {@code <project>} is the name of the publishing project.
      */
-    public static Bound topic(String topic) {
-      return new Bound().topic(topic);
+    public static Bound<String> topic(String topic) {
+      return new Bound<>(DEFAULT_PUBSUB_CODER).topic(topic);
     }
 
     /**
@@ -411,8 +429,8 @@ public static Bound topic(String topic) {
      * source, then PubsubIO.Read.timestampLabel can be used to ensure that the other source reads
      * these timestamps from the appropriate label.
      */
-    public static Bound timestampLabel(String timestampLabel) {
-      return new Bound().timestampLabel(timestampLabel);
+    public static Bound<String> timestampLabel(String timestampLabel) {
+      return new Bound<>(DEFAULT_PUBSUB_CODER).timestampLabel(timestampLabel);
     }
 
     /**
@@ -422,8 +440,22 @@ public static Bound timestampLabel(String timestampLabel) {
      * by another Dataflow source, in which case PubsubIO.Read.idLabel can be used to ensure that
      * the other source reads these ids from the appropriate label.
      */
-    public static Bound idLabel(String idLabel) {
-      return new Bound().idLabel(idLabel);
+    public static Bound<String> idLabel(String idLabel) {
+      return new Bound<>(DEFAULT_PUBSUB_CODER).idLabel(idLabel);
+    }
+
+   /**
+     * Returns a TextIO.Write PTransform that uses the given
+     * {@code Coder<T>} to encode each of the elements of the input
+     * {@code PCollection<T>} into an output PubSub record.
+     *
+     * <p> By default, uses {@link StringUtf8Coder}, which writes input
+     * Java strings directly as records.
+     *
+     * @param <T> the type of the elements of the input PCollection
+     */
+    public static <T> Bound<T> withCoder(Coder<T> coder) {
+      return new Bound<>(coder);
     }
 
     /**
@@ -431,16 +463,18 @@ public static Bound idLabel(String idLabel) {
      * to a PubSub stream.
      */
     @SuppressWarnings("serial")
-    public static class Bound
-        extends PTransform<PCollection<String>, PDone> {
+    public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       /** The Pubsub topic to publish to. */
       String topic;
       String timestampLabel;
       String idLabel;
+      final Coder<T> coder;
 
-      Bound() {}
+      Bound(Coder<T> coder) {
+        this.coder = coder;
+      }
 
-      Bound(String name, String topic, String timestampLabel, String idLabel) {
+      Bound(String name, String topic, String timestampLabel, String idLabel, Coder<T> coder) {
         super(name);
         if (topic != null) {
           Validator.validateTopicName(topic);
@@ -448,42 +482,55 @@ public static class Bound
         }
         this.timestampLabel = timestampLabel;
         this.idLabel = idLabel;
+        this.coder = coder;
       }
 
       /**
        * Returns a new PubsubIO.Write PTransform that's like this one but with the given step
        * name. Does not modify the object.
        */
-      public Bound named(String name) {
-        return new Bound(name, topic, timestampLabel, idLabel);
+      public Bound<T> named(String name) {
+        return new Bound<>(name, topic, timestampLabel, idLabel, coder);
       }
 
       /**
        * Returns a new PubsubIO.Write PTransform that's like this one but writing to the given
        * topic. Does not modify the object.
        */
-      public Bound topic(String topic) {
-        return new Bound(name, topic, timestampLabel, idLabel);
+      public Bound<T> topic(String topic) {
+        return new Bound<>(name, topic, timestampLabel, idLabel, coder);
       }
 
       /**
        * Returns a new PubsubIO.Write PTransform that's like this one but publishing timestamps
        * to the given PubSub label. Does not modify the object.
        */
-      public Bound timestampLabel(String timestampLabel) {
-        return new Bound(name, topic, timestampLabel, idLabel);
+      public Bound<T> timestampLabel(String timestampLabel) {
+        return new Bound<>(name, topic, timestampLabel, idLabel, coder);
       }
 
       /**
        * Returns a new PubsubIO.Write PTransform that's like this one but publishing record ids
        * to the given PubSub label. Does not modify the object.
        */
-     public Bound idLabel(String idLabel) {
-        return new Bound(name, topic, timestampLabel, idLabel);
+     public Bound<T> idLabel(String idLabel) {
+       return new Bound<>(name, topic, timestampLabel, idLabel, coder);
+      }
+
+     /**
+       * Returns a new PubsubIO.Write PTransform that's like this one
+       * but that uses the given {@code Coder<T1>} to encode each of
+       * the elements of the input {@code PCollection<T1>} into an
+       * output record.  Does not modify this object.
+       *
+       * @param <T1> the type of the elements of the input PCollection
+       */
+      public <T1> Bound<T1> withCoder(Coder<T1> coder) {
+        return new Bound<>(name, topic, timestampLabel, idLabel, coder);
       }
 
       @Override
-      public PDone apply(PCollection<String> input) {
+      public PDone apply(PCollection<T> input) {
         if (topic == null) {
           throw new IllegalStateException(
               "need to set the topic of a PubsubIO.Write transform");
@@ -511,6 +558,10 @@ public String getIdLabel() {
         return idLabel;
       }
 
+      public Coder<T> getCoder() {
+        return coder;
+      }
+
       static {
         // TODO: Figure out how to make this work under
         // DirectPipelineRunner.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
index 4280355b6af93..d84777bbecc4f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
@@ -38,12 +38,8 @@ public void translate(
       translateReadHelper(transform, context);
     }
 
-    /*
-  private static void translateReadHelper(
-     */
-
-    private void translateReadHelper(
-        PubsubIO.Read.Bound transform,
+    private <T> void translateReadHelper(
+        PubsubIO.Read.Bound<T> transform,
         TranslationContext context) {
       if (!context.getPipelineOptions().isStreaming()) {
         throw new IllegalArgumentException("PubsubIO can only be used in streaming mode.");
@@ -60,9 +56,7 @@ private void translateReadHelper(
       if (transform.getTimestampLabel() != null) {
         context.addInput(PropertyNames.PUBSUB_TIMESTAMP_LABEL, transform.getTimestampLabel());
       }
-      if (transform.getDropLateDataExplicit()) {
-        context.addInput(PropertyNames.PUBSUB_DROP_LATE_DATA, transform.getDropLateData());
-      }
+      context.addInput(PropertyNames.PUBSUB_DROP_LATE_DATA, transform.getDropLateData());
       if (transform.getIdLabel() != null) {
         context.addInput(PropertyNames.PUBSUB_ID_LABEL, transform.getIdLabel());
       }
@@ -81,8 +75,8 @@ public void translate(
       translateWriteHelper(transform, context);
     }
 
-    private void translateWriteHelper(
-        PubsubIO.Write.Bound transform,
+    private <T> void translateWriteHelper(
+        PubsubIO.Write.Bound<T> transform,
         TranslationContext context) {
       if (!context.getPipelineOptions().isStreaming()) {
         throw new IllegalArgumentException("PubsubIO can only be used in streaming mode.");
@@ -97,8 +91,7 @@ private void translateWriteHelper(
       if (transform.getIdLabel() != null) {
         context.addInput(PropertyNames.PUBSUB_ID_LABEL, transform.getIdLabel());
       }
-      context.addEncodingInput(
-          WindowedValue.getValueOnlyCoder(context.getInput(transform).getCoder()));
+      context.addEncodingInput(WindowedValue.getValueOnlyCoder(transform.getCoder()));
       context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
     }
   }

From ae56efad5103e1b54325afba7581e4f7e5636163 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Tue, 7 Apr 2015 13:45:39 -0700
Subject: [PATCH 0394/1541] Determine when we can use a default value for a
 side input instead of continuing to wait for it by placing a bound on when a
 trigger will fire for a window

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90542454
---
 sdk/pom.xml                                   |  2 +-
 .../sdk/transforms/windowing/AfterAll.java    | 15 +++++++
 .../sdk/transforms/windowing/AfterEach.java   |  9 ++++
 .../sdk/transforms/windowing/AfterFirst.java  | 15 +++++++
 .../sdk/transforms/windowing/AfterPane.java   |  7 ++++
 .../windowing/AfterProcessingTime.java        |  5 +++
 .../transforms/windowing/AfterWatermark.java  | 10 +++++
 .../transforms/windowing/DefaultTrigger.java  |  7 ++++
 .../sdk/transforms/windowing/Repeatedly.java  | 16 ++++++++
 .../sdk/transforms/windowing/Trigger.java     | 10 +++++
 .../cloud/dataflow/sdk/util/StateFetcher.java | 27 ++++++++----
 .../util/StreamingModeExecutionContext.java   |  7 ++--
 .../util/StreamingSideInputDoFnRunner.java    | 41 +++++++++++++------
 .../transforms/windowing/AfterAllTest.java    | 14 +++++++
 .../transforms/windowing/AfterEachTest.java   | 14 +++++++
 .../transforms/windowing/AfterFirstTest.java  | 14 +++++++
 .../transforms/windowing/AfterPaneTest.java   |  8 ++++
 .../windowing/AfterProcessingTimeTest.java    |  8 ++++
 .../windowing/AfterWatermarkTest.java         | 12 ++++++
 .../windowing/DefaultTriggerTest.java         |  9 ++++
 .../transforms/windowing/RepeatedlyTest.java  | 19 +++++++++
 .../dataflow/sdk/util/StateFetcherTest.java   | 19 ++++++---
 .../StreamingSideInputDoFnRunnerTest.java     | 27 ++++++------
 23 files changed, 272 insertions(+), 43 deletions(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 905af67b6a243..75a097ace9508 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -227,7 +227,7 @@
     <dependency>
       <groupId>com.google.cloud.dataflow</groupId>
       <artifactId>google-cloud-dataflow-java-proto-library-all</artifactId>
-      <version>0.3.150331</version>
+      <version>0.3.150406</version>
     </dependency>
 
     <dependency>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
index 0c4012d6ba9ec..223a59c411186 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
@@ -20,6 +20,8 @@
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.common.base.Preconditions;
 
+import org.joda.time.Instant;
+
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.BitSet;
@@ -143,4 +145,17 @@ public boolean willNeverFinish() {
     // even if one of the triggers never finishes, the AfterAll could finish if it FIREs.
     return false;
   }
+
+  @Override
+  public Instant getWatermarkCutoff(W window) {
+    // This trigger will fire after the latest of its sub-triggers.
+    Instant deadline = BoundedWindow.TIMESTAMP_MIN_VALUE;
+    for (Trigger<W> subTrigger : subTriggers) {
+      Instant subDeadline = subTrigger.getWatermarkCutoff(window);
+      if (deadline.isBefore(subDeadline)) {
+        deadline = subDeadline;
+      }
+    }
+    return deadline;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index c4f1894d3f399..ab05a1b4389cc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -18,6 +18,8 @@
 
 import com.google.common.base.Preconditions;
 
+import org.joda.time.Instant;
+
 import java.util.Arrays;
 import java.util.List;
 
@@ -99,4 +101,11 @@ public boolean willNeverFinish() {
     }
     return false;
   }
+
+  @Override
+  public Instant getWatermarkCutoff(W window) {
+    // This trigger will fire at least once when the first trigger in the sequence
+    // fires at least once.
+    return subTriggers.get(0).getWatermarkCutoff(window);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
index 6f9f5ebba4054..38a5b0a6bbf70 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
@@ -19,6 +19,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
 import com.google.common.base.Preconditions;
 
+import org.joda.time.Instant;
+
 import java.util.Arrays;
 import java.util.List;
 
@@ -99,4 +101,17 @@ public boolean willNeverFinish() {
     // Even if all the subtriggers never finish, if any of them fire, the AfterAll will finish.
     return false;
   }
+
+  @Override
+  public Instant getWatermarkCutoff(W window) {
+    // This trigger will fire after the earliest of its sub-triggers.
+    Instant deadline = BoundedWindow.TIMESTAMP_MAX_VALUE;
+    for (Trigger<W> subTrigger : subTriggers) {
+      Instant subDeadline = subTrigger.getWatermarkCutoff(window);
+      if (deadline.isAfter(subDeadline)) {
+        deadline = subDeadline;
+      }
+    }
+    return deadline;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index 21f961862e169..476200b0f0d64 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -20,6 +20,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 
+import org.joda.time.Instant;
+
 import java.util.Map.Entry;
 
 /**
@@ -106,4 +108,9 @@ public boolean isCompatible(Trigger<?> other) {
     AfterPane<?> that = (AfterPane<?>) other;
     return countElems == that.countElems;
   }
+
+  @Override
+  public Instant getWatermarkCutoff(W window) {
+    return BoundedWindow.TIMESTAMP_MAX_VALUE;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 1173066a0d874..3996bdb02de03 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -102,4 +102,9 @@ public void clear(TriggerContext<W> c, W window) throws Exception {
   public boolean willNeverFinish() {
     return false;
   }
+
+  @Override
+  public Instant getWatermarkCutoff(W window) {
+    return BoundedWindow.TIMESTAMP_MAX_VALUE;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index cad12b384a391..a712fb3de3b33 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -112,6 +112,11 @@ public boolean willNeverFinish() {
       return false;
     }
 
+    @Override
+    public Instant getWatermarkCutoff(W window) {
+      return computeTargetTimestamp(window.maxTimestamp());
+    }
+
     @Override
     protected AfterWatermark<W> newWith(
         ImmutableList<SerializableFunction<Instant, Instant>> transforms) {
@@ -160,6 +165,11 @@ public boolean willNeverFinish() {
       return false;
     }
 
+    @Override
+    public Instant getWatermarkCutoff(W window) {
+      return computeTargetTimestamp(window.maxTimestamp());
+    }
+
     @Override
     protected AfterWatermark<W> newWith(
         ImmutableList<SerializableFunction<Instant, Instant>> transforms) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
index 8f8b2075b4012..0c1324d12b909 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import org.joda.time.Instant;
+
 /**
  * A trigger that fires repeatedly when the watermark passes the end of the window.
  *
@@ -65,6 +67,11 @@ public boolean willNeverFinish() {
     return true;
   }
 
+  @Override
+  public Instant getWatermarkCutoff(W window) {
+    return window.maxTimestamp();
+  }
+
   @Override
   public boolean isCompatible(Trigger<?> other) {
     // Semantically, all default triggers are identical
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
index b565d4ca8eb29..dbe75b26080be 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import org.joda.time.Instant;
+
 import java.util.Arrays;
 
 /**
@@ -101,6 +103,12 @@ public boolean willNeverFinish() {
     return true;
   }
 
+  @Override
+  public Instant getWatermarkCutoff(W window) {
+    // This trigger fires once the repeated trigger fires.
+    return repeated.getWatermarkCutoff(window);
+  }
+
   @Override
   public boolean isCompatible(Trigger<?> other) {
     if (!(other instanceof Repeatedly)) {
@@ -174,5 +182,13 @@ public TriggerResult afterChildTimer(
     public boolean willNeverFinish() {
       return false;
     }
+
+    @Override
+    public Instant getWatermarkCutoff(W window) {
+      // This trigger fires once either the repeated trigger or the until trigger fires.
+      Instant repeatedDeadline = subTriggers.get(0).getWatermarkCutoff(window);
+      Instant untilDeadline = subTriggers.get(1).getWatermarkCutoff(window);
+      return repeatedDeadline.isBefore(untilDeadline) ? repeatedDeadline : untilDeadline;
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index c15a6568abeb9..54f9ed7696361 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -391,6 +391,16 @@ public boolean isForCurrentLayer() {
    */
   boolean willNeverFinish();
 
+  /**
+   * Returns a bound in watermark time by which this trigger would have fired at least once
+   * for a given window had there been input data.  This is a static property of a trigger
+   * that does not depend on its state.
+   *
+   * <p> For triggers that do not fire based on the watermark advancing, returns
+   * {@link BoundedWindow.TIMESTAMP_MAX_VALUE}.
+   */
+  Instant getWatermarkCutoff(W window);
+
   /**
    * Returns whether this performs the same triggering as the given {@code Trigger}.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
index 50ee26a22b316..e15a3ed683548 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
@@ -207,15 +207,24 @@ public SideInputCacheEntry call() throws Exception {
         ByteString.Output windowStream = ByteString.newOutput();
         windowCoder.encode(window, windowStream, Coder.Context.OUTER);
 
-        Windmill.GetDataRequest request = Windmill.GetDataRequest.newBuilder()
-            .addGlobalDataToFetch(
-                Windmill.GlobalDataId.newBuilder()
-                .setTag(view.getTagInternal().getId())
-                .setVersion(windowStream.toByteString())
-                .build())
-            .build();
-
-        Windmill.GetDataResponse response = server.getData(request);
+        Windmill.GlobalDataRequest request =
+            Windmill.GlobalDataRequest.newBuilder()
+                .setDataId(Windmill.GlobalDataId.newBuilder()
+                    .setTag(view.getTagInternal().getId())
+                    .setVersion(windowStream.toByteString())
+                    .build())
+                .setExistenceWatermarkDeadline(
+                     TimeUnit.MILLISECONDS.toMicros(view.getWindowingStrategyInternal()
+                         .getTrigger()
+                         .getWatermarkCutoff(window)
+                         .getMillis()))
+                .build();
+
+        Windmill.GetDataResponse response = server.getData(
+            Windmill.GetDataRequest.newBuilder()
+            .addGlobalDataFetchRequests(request)
+            .addGlobalDataToFetch(request.getDataId())
+            .build());
 
         Windmill.GlobalData data = response.getGlobalData(0);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index 2a116cb787850..b886bc4f2e9b3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -171,9 +171,10 @@ public Iterable<Windmill.GlobalDataId> getSideInputNotifications() {
     return work.getGlobalDataIdNotificationsList();
   }
 
-  public void setBlockingSideInputs(Iterable<Windmill.GlobalDataId> sideInputs) {
-    for (Windmill.GlobalDataId id : sideInputs) {
-      outputBuilder.addGlobalDataIdRequests(id);
+  public void setBlockingSideInputs(Iterable<Windmill.GlobalDataRequest> sideInputs) {
+    for (Windmill.GlobalDataRequest sideInput : sideInputs) {
+      outputBuilder.addGlobalDataRequests(sideInput);
+      outputBuilder.addGlobalDataIdRequests(sideInput.getDataId());
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
index a2a639fca2166..947904a25467c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
@@ -38,6 +38,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.concurrent.TimeUnit;
 
 /**
  * Runs a DoFn by constructing the appropriate contexts and passing them in.
@@ -53,8 +54,8 @@ public class StreamingSideInputDoFnRunner<I, O, R, W extends BoundedWindow>
   private StreamingModeExecutionContext execContext;
   private WindowingStrategy<?, W> windowingStrategy;
   private Map<String, PCollectionView<?>> sideInputViews;
-  private CodedTupleTag<Map<W, Set<Windmill.GlobalDataId>>> blockedMapTag;
-  private Map<W, Set<Windmill.GlobalDataId>> blockedMap;
+  private CodedTupleTag<Map<W, Set<Windmill.GlobalDataRequest>>> blockedMapTag;
+  private Map<W, Set<Windmill.GlobalDataRequest>> blockedMap;
   private Coder<I> elemCoder;
 
   public StreamingSideInputDoFnRunner(
@@ -81,7 +82,7 @@ public StreamingSideInputDoFnRunner(
         (StreamingModeExecutionContext) stepContext.getExecutionContext();
     this.blockedMapTag = CodedTupleTag.of("blockedMap:", MapCoder.of(
         windowingStrategy.getWindowFn().windowCoder(),
-        SetCoder.of(Proto2Coder.of(Windmill.GlobalDataId.class))));
+        SetCoder.of(Proto2Coder.of(Windmill.GlobalDataRequest.class))));
     this.blockedMap = stepContext.lookup(blockedMapTag);
     if (this.blockedMap == null) {
       this.blockedMap = new HashMap<>();
@@ -101,8 +102,14 @@ public void startBundle() {
         continue;
       }
 
-      for (Map.Entry<W, Set<Windmill.GlobalDataId>> entry : blockedMap.entrySet()) {
-        entry.getValue().remove(id);
+      for (Map.Entry<W, Set<Windmill.GlobalDataRequest>> entry : blockedMap.entrySet()) {
+        Set<Windmill.GlobalDataRequest> found = new HashSet<>();
+        for (Windmill.GlobalDataRequest request : entry.getValue()) {
+          if (id.equals(request.getDataId())) {
+            found.add(request);
+          }
+        }
+        entry.getValue().removeAll(found);
         if (entry.getValue().isEmpty()) {
           readyWindows.add(entry.getKey());
         }
@@ -132,7 +139,7 @@ public void invokeProcessElement(WindowedValue<I> elem) {
     try {
       W window = (W) elem.getWindows().iterator().next();
 
-      Set<Windmill.GlobalDataId> blocked = blockedMap.get(window);
+      Set<Windmill.GlobalDataRequest> blocked = blockedMap.get(window);
       if (blocked == null) {
         for (PCollectionView<?> view : sideInputViews.values()) {
           if (!execContext.issueSideInputFetch(view, window)) {
@@ -143,14 +150,22 @@ public void invokeProcessElement(WindowedValue<I> elem) {
             Coder<BoundedWindow> sideInputWindowCoder =
                 view.getWindowingStrategyInternal().getWindowFn().windowCoder();
 
-            ByteString.Output windowStream = ByteString.newOutput();
-            sideInputWindowCoder.encode(
-                view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(window),
-                windowStream, Coder.Context.OUTER);
+            BoundedWindow sideInputWindow =
+                view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(window);
 
-            blocked.add(Windmill.GlobalDataId.newBuilder()
-                .setTag(view.getTagInternal().getId())
-                .setVersion(windowStream.toByteString())
+            ByteString.Output windowStream = ByteString.newOutput();
+            sideInputWindowCoder.encode(sideInputWindow, windowStream, Coder.Context.OUTER);
+
+            blocked.add(Windmill.GlobalDataRequest.newBuilder()
+                .setDataId(Windmill.GlobalDataId.newBuilder()
+                    .setTag(view.getTagInternal().getId())
+                    .setVersion(windowStream.toByteString())
+                    .build())
+                .setExistenceWatermarkDeadline(
+                    TimeUnit.MILLISECONDS.toMicros(view.getWindowingStrategyInternal()
+                        .getTrigger()
+                        .getWatermarkCutoff(sideInputWindow)
+                        .getMillis()))
                 .build());
           }
         }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index 65e43655bf178..09256ec8dadc1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.verify;
@@ -199,4 +200,17 @@ public void testOnMergeFires() throws Exception {
             Mockito.<TriggerContext<IntervalWindow>>any(),
             Mockito.<OnMergeEvent<IntervalWindow>>any());
   }
+
+  @Test
+  public void testFireDeadline() throws Exception {
+    BoundedWindow window = new IntervalWindow(new Instant(0), new Instant(10));
+
+    assertEquals(new Instant(19),
+        AfterAll.of(AfterWatermark.pastEndOfWindow(),
+                     AfterWatermark.pastEndOfWindow().plusDelay(Duration.millis(10)))
+            .getWatermarkCutoff(window));
+    assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
+        AfterAll.of(AfterWatermark.pastEndOfWindow(), AfterPane.elementCountAtLeast(1))
+            .getWatermarkCutoff(window));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index 227bc76d33e81..bab7fe5cf0890 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
@@ -204,4 +205,17 @@ public void testOnMergeFires() throws Exception {
             Mockito.<TriggerContext<IntervalWindow>>any(),
             Mockito.<OnMergeEvent<IntervalWindow>>any());
   }
+
+  @Test
+  public void testFireDeadline() throws Exception {
+    BoundedWindow window = new IntervalWindow(new Instant(0), new Instant(10));
+
+    assertEquals(new Instant(9),
+        AfterEach.inOrder(AfterWatermark.pastEndOfWindow(),
+                      AfterWatermark.pastEndOfWindow().plusDelay(Duration.millis(10)))
+            .getWatermarkCutoff(window));
+    assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
+        AfterEach.inOrder(AfterPane.elementCountAtLeast(2), AfterWatermark.pastEndOfWindow())
+            .getWatermarkCutoff(window));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index 1b9cf6ed156be..f5148f5fc1c8f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.verify;
@@ -207,4 +208,17 @@ public void testOnMergeFires() throws Exception {
             Mockito.<TriggerContext<IntervalWindow>>any(),
             Mockito.<OnMergeEvent<IntervalWindow>>any());
   }
+
+  @Test
+  public void testFireDeadline() throws Exception {
+    BoundedWindow window = new IntervalWindow(new Instant(0), new Instant(10));
+
+    assertEquals(new Instant(9),
+        AfterFirst.of(AfterWatermark.pastEndOfWindow(),
+                       AfterWatermark.pastEndOfWindow().plusDelay(Duration.millis(10)))
+            .getWatermarkCutoff(window));
+    assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
+        AfterFirst.of(AfterPane.elementCountAtLeast(2), AfterPane.elementCountAtLeast(1))
+            .getWatermarkCutoff(window));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
index 14fd0c0cf58de..539bf109ae9a5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
@@ -109,4 +110,11 @@ public void testAfterPaneWithMerging() throws Exception {
         tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(12))),
         tester.rootFinished(new IntervalWindow(new Instant(7), new Instant(18)))));
   }
+
+  @Test
+  public void testFireDeadline() throws Exception {
+    assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
+        AfterPane.elementCountAtLeast(1).getWatermarkCutoff(
+            new IntervalWindow(new Instant(0), new Instant(10))));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
index d20605af33a43..80eeb78e9dcdd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
@@ -102,4 +103,11 @@ public void testAfterProcessingTimeWithMergingWindowAlreadyFired() throws Except
         tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(11))),
         tester.rootFinished(new IntervalWindow(new Instant(2), new Instant(12)))));
   }
+
+  @Test
+  public void testFireDeadline() throws Exception {
+    assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
+        AfterProcessingTime.pastFirstElementInPane().getWatermarkCutoff(
+            new IntervalWindow(new Instant(0), new Instant(10))));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
index d7dde6d2613dc..c812833724584 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
@@ -145,4 +146,15 @@ public void testEndOfWindowWithMerging() throws Exception {
         tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(11))),
         tester.rootFinished(new IntervalWindow(new Instant(2), new Instant(12)))));
   }
+
+  @Test
+  public void testFireDeadline() throws Exception {
+    BoundedWindow window = new IntervalWindow(new Instant(0), new Instant(10));
+
+    assertEquals(new Instant(9), AfterWatermark.pastEndOfWindow().getWatermarkCutoff(window));
+    assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
+        AfterWatermark.pastEndOfWindow().getWatermarkCutoff(GlobalWindow.INSTANCE));
+    assertEquals(new Instant(19),
+        AfterWatermark.pastEndOfWindow().plusDelay(Duration.millis(10)).getWatermarkCutoff(window));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
index 1c283edef7831..46f9cdb5d1364 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 
@@ -92,4 +93,12 @@ public void testDefaultTriggerWithSessionWindow() throws Exception {
     assertFalse(tester.isDone(new IntervalWindow(new Instant(30), new Instant(40))));
     assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
   }
+
+  @Test
+  public void testFireDeadline() throws Exception {
+    assertEquals(new Instant(9), DefaultTrigger.of().getWatermarkCutoff(
+        new IntervalWindow(new Instant(0), new Instant(10))));
+    assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
+        DefaultTrigger.of().getWatermarkCutoff(GlobalWindow.INSTANCE));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index 27cb3546f767b..ab90015f17442 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
@@ -336,4 +337,22 @@ public void testMergeRepeatUntilFired() throws Exception {
         tester.rootFinished(new IntervalWindow(new Instant(5), new Instant(15))),
         tester.rootFinished(new IntervalWindow(new Instant(12), new Instant(22)))));
   }
+
+  @Test
+  public void testFireDeadline() throws Exception {
+    BoundedWindow window = new IntervalWindow(new Instant(0), new Instant(10));
+
+    assertEquals(new Instant(9),
+        Repeatedly.forever(AfterWatermark.pastEndOfWindow()).getWatermarkCutoff(window));
+    assertEquals(new Instant(9), Repeatedly.forever(AfterWatermark.pastEndOfWindow())
+        .until(AfterPane.elementCountAtLeast(1))
+        .getWatermarkCutoff(window));
+    assertEquals(new Instant(9), Repeatedly.forever(AfterPane.elementCountAtLeast(1))
+        .until(AfterWatermark.pastEndOfWindow())
+        .getWatermarkCutoff(window));
+    assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
+        Repeatedly.forever(AfterPane.elementCountAtLeast(1))
+        .until(AfterPane.elementCountAtLeast(10))
+        .getWatermarkCutoff(window));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
index d6a7ed5e71702..3d0d99d4d329d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
@@ -34,6 +34,7 @@
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -54,6 +55,7 @@
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.TimeUnit;
 
 /** Unit tests for {@link StateFetcher}. */
 @RunWith(JUnit4.class)
@@ -281,12 +283,19 @@ private Windmill.GetDataResponse buildGlobalDataResponse(
         .addGlobalData(builder.build()).build();
   }
 
-  private Windmill.GetDataRequest buildGlobalDataRequest(String tag, ByteString version) {
+  private Windmill.GetDataRequest buildGlobalDataRequest(
+      String tag, ByteString version) {
+    Windmill.GlobalDataId id =
+        Windmill.GlobalDataId.newBuilder().setTag(tag).setVersion(version).build();
+
     return Windmill.GetDataRequest.newBuilder()
-        .addGlobalDataToFetch(Windmill.GlobalDataId.newBuilder()
-            .setTag(tag)
-            .setVersion(version)
-            .build())
+        .addGlobalDataFetchRequests(
+             Windmill.GlobalDataRequest.newBuilder()
+                 .setDataId(id)
+                 .setExistenceWatermarkDeadline(
+                      TimeUnit.MILLISECONDS.toMicros(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()))
+                 .build())
+        .addGlobalDataToFetch(id)
         .build();
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
index b93da6b07939b..0a01476dd5670 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
@@ -120,10 +120,13 @@ public void testSideInputNotReady() throws Exception {
     verify(stepContext).store(any(CodedTupleTag.class), eq(
         Collections.singletonMap(
             window,
-            Collections.singleton(Windmill.GlobalDataId.newBuilder()
-                .setTag(view.getTagInternal().getId())
-                .setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(
-                    IntervalWindow.getFixedSizeCoder(Duration.millis(10)), window)))
+            Collections.singleton(Windmill.GlobalDataRequest.newBuilder()
+                .setDataId(Windmill.GlobalDataId.newBuilder()
+                    .setTag(view.getTagInternal().getId())
+                    .setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(
+                        IntervalWindow.getFixedSizeCoder(Duration.millis(10)), window)))
+                    .build())
+                .setExistenceWatermarkDeadline(9000)
                 .build()))));
   }
 
@@ -138,10 +141,10 @@ public void testSideInputNotification() throws Exception {
             IntervalWindow.getFixedSizeCoder(Duration.millis(10)), window)))
         .build();
 
-    Set<Windmill.GlobalDataId> idSet = new HashSet<>();
-    idSet.add(id);
-    Map<IntervalWindow, Set<Windmill.GlobalDataId>> blockedMap = new HashMap<>();
-    blockedMap.put(window, idSet);
+    Set<Windmill.GlobalDataRequest> requestSet = new HashSet<>();
+    requestSet.add(Windmill.GlobalDataRequest.newBuilder().setDataId(id).build());
+    Map<IntervalWindow, Set<Windmill.GlobalDataRequest>> blockedMap = new HashMap<>();
+    blockedMap.put(window, requestSet);
 
     when(stepContext.lookup(any(CodedTupleTag.class))).thenReturn(blockedMap);
     when(execContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
@@ -174,10 +177,10 @@ public void testMultipleSideInputs() throws Exception {
             IntervalWindow.getFixedSizeCoder(Duration.millis(10)), window)))
         .build();
 
-    Set<Windmill.GlobalDataId> idSet = new HashSet<>();
-    idSet.add(id);
-    Map<IntervalWindow, Set<Windmill.GlobalDataId>> blockedMap = new HashMap<>();
-    blockedMap.put(window, idSet);
+    Set<Windmill.GlobalDataRequest> requestSet = new HashSet<>();
+    requestSet.add(Windmill.GlobalDataRequest.newBuilder().setDataId(id).build());
+    Map<IntervalWindow, Set<Windmill.GlobalDataRequest>> blockedMap = new HashMap<>();
+    blockedMap.put(window, requestSet);
 
     when(stepContext.lookup(any(CodedTupleTag.class))).thenReturn(blockedMap);
     when(execContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));

From 2a3a6990e355fa40192cfa22aa9cf73243ca4162 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Tue, 7 Apr 2015 15:05:46 -0700
Subject: [PATCH 0395/1541] Small fixes for CombiningWindowSet 1. use
 deleteTagList() for tags created by TagList. 2. prevent NullPointerException
 when reading a not-yet-written tag

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90550732
---
 .../cloud/dataflow/sdk/util/CombiningWindowSet.java      | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
index 1da74b3559255..a723aec3726a8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
@@ -133,8 +133,10 @@ protected WindowStatus put(W window, VI value, Instant timestamp) throws Excepti
 
   @Override
   protected void remove(W window) throws Exception {
-    keyedState.remove(accumulatorTag(window));
-    liveWindowsModified = liveWindows.remove(window);
+    if (contains(window)) {
+      windowingInternals.deleteTagList(accumulatorTag(window));
+      liveWindowsModified = liveWindows.remove(window);
+    }
   }
 
   @Override
@@ -162,6 +164,7 @@ private CodedTupleTag<VA> accumulatorTag(W window) throws Exception {
 
   private void storeAccumulator(W window, VA accumulator, Instant timestamp) throws Exception {
     CodedTupleTag<VA> tag = accumulatorTag(window);
+    windowingInternals.readTagList(tag);
     windowingInternals.deleteTagList(tag);
     windowingInternals.writeToTagList(tag, accumulator, timestamp);
     liveWindowsModified = liveWindows.add(window);
@@ -170,7 +173,7 @@ private void storeAccumulator(W window, VA accumulator, Instant timestamp) throw
   private TimestampedValue<VA> lookupAccumulator(W window) throws Exception {
     CodedTupleTag<VA> tag = accumulatorTag(window);
     Iterable<TimestampedValue<VA>> data = windowingInternals.readTagList(tag);
-    if (!data.iterator().hasNext()) {
+    if (data == null || !data.iterator().hasNext()) {
       return null;
     }
     return Iterables.getOnlyElement(data);

From dd828b7118193bc080f2e1fab926908cbb804dad Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 7 Apr 2015 15:30:50 -0700
Subject: [PATCH 0396/1541] Remove TriggerResult.FINISH, and simplify the tests
 and implementation of triggers based on that.

Semantically, finishing without firing the buffered elements leads to
confusion, and is unlikely to be desirable, since it corresponds to discarding
the unfired elements. If that behavior is desired, we can fire-and-finish
earlier.

This means that now the "finished" set tracked by the SubExecutor is
also the fired set.

As a result of this, merging a window in which a trigger has already returned FIRE_AND_FINISH with a window in which it hasn't, will result in *another* FIRE_AND_FINISH for the merged window. This ensures that we emit the elements in the other panes.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90553200
---
 .../sdk/transforms/windowing/AfterAll.java    |  72 +++-------
 .../sdk/transforms/windowing/AfterEach.java   |  14 +-
 .../sdk/transforms/windowing/AfterFirst.java  |  25 ++--
 .../windowing/CompositeTrigger.java           |  13 +-
 .../sdk/transforms/windowing/Repeatedly.java  | 108 ++++++++-------
 .../sdk/transforms/windowing/Trigger.java     |  15 +-
 .../dataflow/sdk/util/AbstractWindowSet.java  |   2 +-
 .../dataflow/sdk/util/BufferingWindowSet.java |   2 +-
 .../dataflow/sdk/util/CombiningWindowSet.java |   7 +-
 .../sdk/util/PartitionBufferingWindowSet.java |   5 +-
 .../dataflow/sdk/util/TriggerExecutor.java    |  14 +-
 .../dataflow/sdk/util/TriggerTester.java      |   2 +-
 .../transforms/windowing/AfterAllTest.java    |  45 ++----
 .../transforms/windowing/AfterEachTest.java   |  45 +-----
 .../transforms/windowing/AfterFirstTest.java  |  58 ++------
 .../transforms/windowing/RepeatedlyTest.java  | 129 +++++++-----------
 16 files changed, 199 insertions(+), 357 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
index 223a59c411186..ad0a1bea739e7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
@@ -17,14 +17,11 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.common.base.Preconditions;
 
 import org.joda.time.Instant;
 
-import java.io.IOException;
 import java.util.Arrays;
-import java.util.BitSet;
 import java.util.List;
 
 /**
@@ -40,9 +37,6 @@ public class AfterAll<W extends BoundedWindow>
 
   private static final long serialVersionUID = 0L;
 
-  private static final CodedTupleTag<BitSet> SUBTRIGGERS_FIRED_SET_TAG =
-      CodedTupleTag.of("fired", new BitSetCoder());
-
   private AfterAll(List<Trigger<W>> subTriggers) {
     super(subTriggers);
     Preconditions.checkArgument(subTriggers.size() > 1);
@@ -54,90 +48,56 @@ public static <W extends BoundedWindow> AtMostOnceTrigger<W> of(
     return new AfterAll<W>(Arrays.<Trigger<W>>asList(triggers));
   }
 
-  private TriggerResult wrapResult(TriggerContext<W> c, W window,
-      BitSet firedSet, SubTriggerExecutor subExecutor) throws IOException {
-    // If all children have fired, fire and finish.
-    if (firedSet.cardinality() == subTriggers.size()) {
+  private TriggerResult wrapResult(SubTriggerExecutor subExecutor) {
+    // If all children have finished, then they must have each fired at least once.
+    if (subExecutor.allFinished()) {
       return TriggerResult.FIRE_AND_FINISH;
     }
 
-    // If we have any triggers that have finished without firing, we should finish:
-    BitSet finishedWithoutFiring = subExecutor.getFinishedSet();
-    finishedWithoutFiring.andNot(firedSet);
-    if (finishedWithoutFiring.cardinality() > 0) {
-      return TriggerResult.FINISH;
-    }
-
-    // Otherwise, store the FIRED set and continue
-    c.store(SUBTRIGGERS_FIRED_SET_TAG, window, firedSet);
-
     return TriggerResult.CONTINUE;
   }
 
   @Override
   public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
-    BitSet firedSet = c.lookup(SUBTRIGGERS_FIRED_SET_TAG, e.window());
-    if (firedSet == null) {
-      firedSet = new BitSet(subTriggers.size());
-    }
-
     SubTriggerExecutor subExecutor = subExecutor(c, e.window());
     for (int i : subExecutor.getUnfinishedTriggers()) {
+      // Mark any fired triggers as finished.
       if (subExecutor.onElement(c, i, e).isFire()) {
-        firedSet.set(i);
+        subExecutor.markFinished(c, i);
       }
     }
 
-    return wrapResult(c, e.window(), firedSet, subExecutor);
-  }
-
-  @Override
-  public void clear(TriggerContext<W> c, W window) throws Exception {
-    super.clear(c, window);
-    c.remove(SUBTRIGGERS_FIRED_SET_TAG, window);
+    return wrapResult(subExecutor);
   }
 
   @Override
   public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
-    // First check to see if we've fired in the set of merged triggers
-    BitSet newFiredSet = new BitSet(subTriggers.size());
-    for (BitSet oldFiredSet : c.lookup(SUBTRIGGERS_FIRED_SET_TAG, e.oldWindows()).values()) {
-      if (oldFiredSet != null) {
-        newFiredSet.or(oldFiredSet);
-      }
-    }
-
     SubTriggerExecutor subExecutor = subExecutor(c, e);
 
-    // Before evaluating the merge of the underlying trigger, see if we can finish early.
-    TriggerResult earlyResult = wrapResult(c, e.newWindow(), newFiredSet, subExecutor);
-    if (earlyResult.isFinish()) {
-      return earlyResult;
+    // If after merging the set of fire & finished sub-triggers, we're done, we can
+    // FIRE_AND_FINISH early.
+    if (subExecutor.allFinished()) {
+      return TriggerResult.FIRE_AND_FINISH;
     }
 
+    // Otherwise, merge all of the unfinished triggers.
     for (int i : subExecutor.getUnfinishedTriggers()) {
       if (subExecutor.onMerge(c, i, e).isFire()) {
-        newFiredSet.set(i);
+        subExecutor.markFinished(c, i);
       }
     }
 
-    return wrapResult(c, e.newWindow(), newFiredSet, subExecutor);
+    return wrapResult(subExecutor);
   }
 
   @Override
   public TriggerResult afterChildTimer(
       TriggerContext<W> c, W window, int childIdx, TriggerResult result) throws Exception {
-    SubTriggerExecutor subExecutor = subExecutor(c, window);
-    BitSet firedSet = c.lookup(SUBTRIGGERS_FIRED_SET_TAG, window);
-    if (firedSet == null) {
-      firedSet = new BitSet(subTriggers.size());
-    }
-
-    if (result.isFire()) {
-      firedSet.set(childIdx);
+    if (TriggerResult.CONTINUE.equals(result)) {
+      return TriggerResult.CONTINUE;
     }
 
-    return wrapResult(c, window, firedSet, subExecutor);
+    return wrapResult(subExecutor(c, window));
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index ab05a1b4389cc..5e41bf411c028 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -33,7 +33,7 @@
  * <p> The following properties hold:
  * <ul>
  *   <li> {@code AfterEach.inOrder(AfterEach.inOrder(a, b), c)} behaves the same as
- *   {@code AfterEach.inOrer(a, b, c)}
+ *   {@code AfterEach.inOrder(a, b, c)}
  *   <li> {@code AfterEach.inOrder(Repeatedly.forever(a), b)} behaves the same as
  *   {@code Repeatedly.forever(a)}, since the repeated trigger never finishes.
  * </ul>
@@ -55,9 +55,15 @@ public static <W extends BoundedWindow> Trigger<W> inOrder(Trigger<W>... trigger
     return new AfterEach<W>(Arrays.<Trigger<W>>asList(triggers));
   }
 
-  private TriggerResult result(TriggerResult subResult, SubTriggerExecutor subexecutor)
+  private TriggerResult result(
+      TriggerResult subResult, SubTriggerExecutor subexecutor)
       throws Exception {
-    return TriggerResult.valueOf(subResult.isFire(), subexecutor.allFinished());
+
+    if (subResult.isFire()) {
+      return subexecutor.allFinished() ? TriggerResult.FIRE_AND_FINISH : TriggerResult.FIRE;
+    } else {
+      return TriggerResult.CONTINUE;
+    }
   }
 
   @Override
@@ -86,6 +92,8 @@ public TriggerResult afterChildTimer(
       TriggerContext<W> c, W window, int childIdx, TriggerResult result) throws Exception {
     SubTriggerExecutor subExecutor = subExecutor(c, window);
     if (childIdx != subExecutor.firstUnfinished()) {
+      // If we aren't currently executing the given sub-trigger, it shouldn't have been able to send
+      // a timer at all. We record its finishing, but ignore it otherwise.
       return TriggerResult.CONTINUE;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
index 38a5b0a6bbf70..b6f6827ecca5c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
@@ -54,29 +54,28 @@ public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws
     // at least one unfinished trigger.
 
     SubTriggerExecutor subStates = subExecutor(c, e.window());
-    for (int i : subStates.getUnfinishedTriggers()) {
+    for (int i = 0; i < subTriggers.size(); i++) {
       if (subStates.onElement(c, i, e).isFire()) {
         return TriggerResult.FIRE_AND_FINISH;
       }
     }
 
-    return subStates.allFinished() ? TriggerResult.FINISH : TriggerResult.CONTINUE;
+    if (subStates.allFinished()) {
+      throw new IllegalStateException("AfterFirst should have fired earlier.");
+    }
+    return TriggerResult.CONTINUE;
   }
 
   @Override
   public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
     SubTriggerExecutor subStates = subExecutor(c, e);
-    if (subStates.allFinished()) {
-      return TriggerResult.FINISH;
-    }
-
-    for (int i : subStates.getUnfinishedTriggers()) {
+    for (int i = 0; i < subTriggers.size(); i++) {
       if (subStates.onMerge(c, i, e).isFire()) {
         return TriggerResult.FIRE_AND_FINISH;
       }
     }
 
-    return subStates.allFinished() ? TriggerResult.FINISH : TriggerResult.CONTINUE;
+    return TriggerResult.CONTINUE;
   }
 
   @Override
@@ -84,13 +83,6 @@ public TriggerResult afterChildTimer(
       TriggerContext<W> c, W window, int childIdx, TriggerResult result) throws Exception {
     if (result.isFire()) {
       return TriggerResult.FIRE_AND_FINISH;
-    } else if (result.isFinish()) {
-      // If the given child finished, we may need to mark final completion if there are no more
-      // unfinished children.
-      SubTriggerExecutor subStates = subExecutor(c, window);
-      if (subStates.allFinished()) {
-        return TriggerResult.FINISH;
-      }
     }
 
     return TriggerResult.CONTINUE;
@@ -98,7 +90,8 @@ public TriggerResult afterChildTimer(
 
   @Override
   public boolean willNeverFinish() {
-    // Even if all the subtriggers never finish, if any of them fire, the AfterAll will finish.
+    // The only case an AfterAll will never finish, is if some trigger never fires. But, we can't
+    // statically determine if (or when) a trigger might fire.
     return false;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CompositeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CompositeTrigger.java
index 116bc4e246fb2..0bc695a7b1a13 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CompositeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CompositeTrigger.java
@@ -109,7 +109,8 @@ private TriggerResult handleChildResult(
     public TriggerResult onElement(
         TriggerContext<W> compositeContext, int index, OnElementEvent<W> e) throws Exception {
       if (isFinished.get(index)) {
-        return TriggerResult.FINISH;
+        throw new IllegalStateException(
+            "Cannot call onElement on already finished sub-trigger " + index);
       }
 
       TriggerContext<W> childContext = compositeContext.forChild(index);
@@ -121,6 +122,11 @@ public TriggerResult onElement(
 
     public TriggerResult onTimer(
         TriggerContext<W> compositeContext, int index, OnTimerEvent<W> e) throws Exception {
+      if (isFinished.get(index)) {
+        throw new IllegalStateException(
+            "Cannot call onTimer on already finished sub-trigger " + index);
+      }
+
       TriggerContext<W> childContext = compositeContext.forChild(index);
       return handleChildResult(
           childContext, index, subTriggers.get(index).onTimer(childContext, e));
@@ -129,6 +135,11 @@ public TriggerResult onTimer(
     public TriggerResult onMerge(
         TriggerContext<W> compositeContext, int index, OnMergeEvent<W> e)
         throws Exception {
+      if (isFinished.get(index)) {
+        throw new IllegalStateException(
+            "Cannot call onMerge on already finished sub-trigger " + index);
+      }
+
       TriggerContext<W> childContext = compositeContext.forChild(index);
       return handleChildResult(
           childContext, index, subTriggers.get(index).onMerge(childContext, e));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
index dbe75b26080be..cd10c5be9e404 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
@@ -18,14 +18,12 @@
 
 import org.joda.time.Instant;
 
-import java.util.Arrays;
-
 /**
  * Repeat a trigger, either until some condition is met or forever.
  *
  * <p>For example, to fire after the end of the window, and every time late data arrives:
  * <pre> {@code
- * Repeatedly.forever(WhenWatermark.isPastEndOfWindow());
+ *     Repeatedly.forever(AfterWatermark.isPastEndOfWindow());
  * } </pre>
  *
  * <p>{@code Repeatedly.forever(someTrigger)} behaves like the infinite
@@ -44,7 +42,7 @@ public class Repeatedly<W extends BoundedWindow> implements Trigger<W> {
    * Create a composite trigger that repeatedly executes the trigger {@code toRepeat}, firing each
    * time it fires and ignoring any indications to finish.
    *
-   * <p>Unless used with {@link #until} the composite trigger will never finish.
+   * <p>Unless used with {@link #finishing} the composite trigger will never finish.
    *
    * @param repeated the trigger to execute repeatedly.
    */
@@ -58,18 +56,14 @@ private Repeatedly(Trigger<W> repeated) {
 
   /**
    * Specify an ending condition for this {@code Repeated} trigger. When {@code until} fires the
-   * composite trigger will finish.
-   *
-   * <p>If {@code until} finishes before firing we stop executing it and the {@code Repeated}
-   * trigger will never finish.
+   * composite trigger will fire and finish.
    *
    * @param until the trigger that will fire when we should stop repeating.
    */
-  public RepeatedlyUntil<W> until(AtMostOnceTrigger<W> until) {
-    return new RepeatedlyUntil<W>(repeated, until);
+  public Trigger<W> finishing(AtMostOnceTrigger<W> until) {
+    return new Until<W>(this, until);
   }
 
-
   private TriggerResult wrap(TriggerContext<W> c, W window, TriggerResult result) throws Exception {
     if (result.isFire() || result.isFinish()) {
       repeated.clear(c, window);
@@ -100,6 +94,7 @@ public void clear(TriggerContext<W> c, W window) throws Exception {
 
   @Override
   public boolean willNeverFinish() {
+    // Repeatedly without an until will never finish.
     return true;
   }
 
@@ -121,61 +116,63 @@ public boolean isCompatible(Trigger<?> other) {
 
   /**
    * Repeats the given trigger forever, until the "until" trigger fires.
+   *
+   * <p> TODO: Move this to the top level.
    */
-  public static class RepeatedlyUntil<W extends BoundedWindow> extends CompositeTrigger<W> {
+  public static class Until<W extends BoundedWindow> implements Trigger<W> {
 
+    private static final int ACTUAL = 0;
+    private static final int UNTIL = 1;
     private static final long serialVersionUID = 0L;
 
-    private RepeatedlyUntil(Trigger<W> repeat, AtMostOnceTrigger<W> until) {
-      super(Arrays.asList(repeat, until));
-    }
+    private Trigger<W> actual;
+    private AtMostOnceTrigger<W> until;
 
-    private TriggerResult handleResult(
-        TriggerContext<W> c, SubTriggerExecutor subExecutor, W window,
-        TriggerResult repeated, TriggerResult until) throws Exception {
-      if (repeated.isFinish() && !until.isFire()) {
-        subExecutor.reset(c, 0, window);
-      }
-
-      return TriggerResult.valueOf(repeated.isFire(), until.isFire());
+    private Until(Trigger<W> actual, AtMostOnceTrigger<W> until) {
+      this.actual = actual;
+      this.until = until;
     }
 
     @Override
     public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
-      SubTriggerExecutor subExecutor = subExecutor(c, e.window());
+      TriggerResult untilResult = until.onElement(c.forChild(UNTIL), e);
+      if (untilResult != TriggerResult.CONTINUE) {
+        return TriggerResult.FIRE_AND_FINISH;
+      }
 
-      TriggerResult until = subExecutor.isFinished(1)
-          ? TriggerResult.CONTINUE // if we already finished the until, treat it like Never Stop
-          : subExecutor.onElement(c, 1, e);
-      return handleResult(c, subExecutor, e.window(),
-          subExecutor.onElement(c, 0, e), until);
+      return actual.onElement(c.forChild(ACTUAL), e);
     }
 
     @Override
     public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
-      SubTriggerExecutor subExecutor = subExecutor(c, e);
-
-      TriggerResult until = subExecutor.isFinished(1)
-          ? TriggerResult.CONTINUE // if we already finished the until, treat it like Never Stop
-          : subExecutor.onMerge(c, 1, e);
+      TriggerResult untilResult = until.onMerge(c.forChild(UNTIL), e);
+      if (untilResult != TriggerResult.CONTINUE) {
+        return TriggerResult.FIRE_AND_FINISH;
+      }
 
-      // Even if the merged until says fire, we should still evaluate (and maybe fire) from the
-      // merging of the repeated trigger.
-      return handleResult(c, subExecutor, e.newWindow(), subExecutor.onMerge(c, 0, e), until);
+      return actual.onMerge(c.forChild(ACTUAL), e);
     }
 
     @Override
-    public TriggerResult afterChildTimer(
-        TriggerContext<W> c, W window, int childIdx, TriggerResult result) throws Exception {
-      if (childIdx == 0) {
-        // If the first trigger finishes, we need to reset it
-        if (result.isFinish()) {
-          subExecutor(c, window).reset(c, 0, window);
-        }
-        return result.isFire() ? TriggerResult.FIRE : TriggerResult.CONTINUE;
+    public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+
+      if (e.isForCurrentLayer()) {
+        throw new IllegalStateException("Until shouldn't receive any timers.");
+      } else if (e.getChildIndex() == ACTUAL) {
+        return actual.onTimer(c.forChild(ACTUAL), e.withoutOuterTrigger());
       } else {
-        return result.isFire() ? TriggerResult.FINISH : TriggerResult.CONTINUE;
+        if (until.onTimer(c.forChild(UNTIL), e.withoutOuterTrigger()) != TriggerResult.CONTINUE) {
+          return TriggerResult.FIRE_AND_FINISH;
+        }
       }
+
+      return TriggerResult.CONTINUE;
+    }
+
+    @Override
+    public void clear(TriggerContext<W> c, W window) throws Exception {
+      actual.clear(c.forChild(ACTUAL), window);
+      until.clear(c.forChild(UNTIL), window);
     }
 
     @Override
@@ -185,10 +182,21 @@ public boolean willNeverFinish() {
 
     @Override
     public Instant getWatermarkCutoff(W window) {
-      // This trigger fires once either the repeated trigger or the until trigger fires.
-      Instant repeatedDeadline = subTriggers.get(0).getWatermarkCutoff(window);
-      Instant untilDeadline = subTriggers.get(1).getWatermarkCutoff(window);
-      return repeatedDeadline.isBefore(untilDeadline) ? repeatedDeadline : untilDeadline;
+      // This trigger fires once either the trigger or the until trigger fires.
+      Instant actualDeadline = actual.getWatermarkCutoff(window);
+      Instant untilDeadline = until.getWatermarkCutoff(window);
+      return actualDeadline.isBefore(untilDeadline) ? actualDeadline : untilDeadline;
+    }
+
+    @Override
+    public boolean isCompatible(Trigger<?> other) {
+      if (!(other instanceof Until)) {
+        return false;
+      }
+
+      Until<?> that = (Until<?>) other;
+      return actual.isCompatible(that.actual)
+          && until.isCompatible(that.until);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 54f9ed7696361..57a9c8b1f7882 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -130,8 +130,7 @@ public enum WindowStatus {
   public enum TriggerResult {
     FIRE(true, false),
     CONTINUE(false, false),
-    FIRE_AND_FINISH(true, true),
-    FINISH(false, true);
+    FIRE_AND_FINISH(true, true);
 
     private boolean finish;
     private boolean fire;
@@ -148,18 +147,6 @@ public boolean isFire() {
     public boolean isFinish() {
       return finish;
     }
-
-    public static TriggerResult valueOf(boolean fire, boolean finish) {
-      if (fire && finish) {
-        return FIRE_AND_FINISH;
-      } else if (fire) {
-        return FIRE;
-      } else if (finish) {
-        return FINISH;
-      } else {
-        return CONTINUE;
-      }
-    }
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
index 145ae9a576e93..454a8765b09a2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
@@ -71,7 +71,7 @@ protected AbstractWindowSet(
    * Returns the final value of the elements in the given window, as well
    * as the minimum timestamp of all the elements that were placed in the window.
    *
-   * <p> Illegal to call if the window does not exist in the set.
+   * <p> Returns null if the window does not exist in the set.
    */
   protected abstract TimestampedValue<VO> finalValue(W window) throws Exception;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
index d1aa4b73b837d..ea9e0fba5bdce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
@@ -157,7 +157,7 @@ public boolean contains(W window) {
   @Override
   protected TimestampedValue<Iterable<V>> finalValue(W window) throws Exception {
     if (!contains(window)) {
-      throw new IllegalStateException("finalValue called for non-existent window");
+      return null;
     }
 
     List<V> toEmit = new ArrayList<>();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
index a723aec3726a8..97d0ccf8a0f38 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
@@ -106,8 +106,11 @@ protected Collection<W> windows() {
 
   @Override
   protected TimestampedValue<VO> finalValue(W window) throws Exception {
-    TimestampedValue<VA> timestampedAccumulator =
-        Preconditions.checkNotNull(lookupAccumulator(window));
+    TimestampedValue<VA> timestampedAccumulator = lookupAccumulator(window);
+
+    if (timestampedAccumulator == null) {
+      return null;
+    }
 
     return TimestampedValue.of(
         combineFn.extractOutput(key, timestampedAccumulator.getValue()),
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
index b2d65156bebdd..bef199ebd54aa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
@@ -101,10 +101,11 @@ public boolean contains(W window) {
   protected TimestampedValue<Iterable<V>> finalValue(W window) throws Exception {
     CodedTupleTag<V> tag = bufferTag(window, windowCoder, inputCoder);
     Iterable<TimestampedValue<V>> result = windowingInternals.readTagList(tag);
-    Instant timestamp = result.iterator().next().getTimestamp();
     if (result == null) {
-      throw new IllegalStateException("finalValue called for non-existent window");
+      return null;
     }
+
+    Instant timestamp = result.iterator().next().getTimestamp();
     return TimestampedValue.of(
         Iterables.transform(result, new Function<TimestampedValue<V>, V>() {
               @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 913e6bf1a4429..9cf202d026139 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -225,7 +225,7 @@ private void onMerge(Collection<W> toBeMerged, W mergeResult) throws Exception {
           trigger.onMerge(triggerContext, new OnMergeEvent<W>(toBeMerged, mergeResult)));
     } else {
       // Otherwise, act like we were just told to finish in the resulting window.
-      handleResult(trigger, mergeResult, TriggerResult.FINISH);
+      handleResult(trigger, mergeResult, TriggerResult.FIRE_AND_FINISH);
     }
 
     // Before we finish, we can clean up the state associated with the trigger in the old windows
@@ -261,11 +261,15 @@ private void handleResult(Trigger<W> trigger, W window, TriggerResult result) th
   private void emitWindow(W window) throws Exception {
     TimestampedValue<VO> finalValue = windowSet.finalValue(window);
 
-    // Emit the (current) final values for the window
-    KV<K, VO> value = KV.of(windowSet.getKey(), finalValue.getValue());
+    // If there were any contents to output in the window, do so.
+    if (finalValue != null) {
+      // Emit the (current) final values for the window
+      KV<K, VO> value = KV.of(windowSet.getKey(), finalValue.getValue());
 
-    // Output the windowed value.
-    windowingInternals.outputWindowedValue(value, finalValue.getTimestamp(), Arrays.asList(window));
+      // Output the windowed value.
+      windowingInternals.outputWindowedValue(
+          value, finalValue.getTimestamp(), Arrays.asList(window));
+    }
   }
 
   @VisibleForTesting void setTimer(TriggerId<W> triggerId, Instant timestamp, TimeDomain domain)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 377badbc76a0a..eb52d507bf22b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -257,7 +257,7 @@ public <T> void deleteTagList(CodedTupleTag<T> tag) {
 
     @Override
     public <T> Iterable<TimestampedValue<T>> readTagList(CodedTupleTag<T> tag) throws IOException {
-      @SuppressWarnings("unchecked")
+      @SuppressWarnings({"unchecked", "rawtypes"})
       List<TimestampedValue<T>> values = (List) tagListValues.get(tag);
       if (values == null) {
         return Collections.<TimestampedValue<T>>emptyList();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index 09256ec8dadc1..16cdea971fce2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -18,6 +18,7 @@
 
 import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.verify;
@@ -106,40 +107,6 @@ public void testOnElementT2FiresFirst() throws Exception {
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
   }
 
-  @Test
-  public void testOnElementT1Finishes() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.FINISH, TriggerResult.CONTINUE);
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
-  }
-
-  @Test
-  public void testOnElementT2Finishes() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.FINISH);
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    injectElement(2, null, null);
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
-  }
-
-  @Test
-  public void testOnElementBothFinish() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.FINISH);
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    injectElement(2, TriggerResult.FINISH, null);
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
-  }
-
   @SuppressWarnings("unchecked")
   @Test
   public void testOnTimerFire() throws Exception {
@@ -160,17 +127,23 @@ public void testOnTimerFire() throws Exception {
 
   @SuppressWarnings("unchecked")
   @Test
-  public void testOnTimerFinish() throws Exception {
+  public void testOnTimerFireAndFinish() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(1));
     when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
-        .thenReturn(TriggerResult.FINISH);
+        .thenReturn(TriggerResult.FIRE_AND_FINISH);
 
     tester.advanceWatermark(new Instant(12));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertFalse(tester.isDone(firstWindow));
+
+    injectElement(2, TriggerResult.FIRE_AND_FINISH, null);
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+
     assertTrue(tester.isDone(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index bab7fe5cf0890..a5b7b7064d619 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -21,7 +21,6 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
@@ -95,11 +94,9 @@ public void testOnElementT1Fires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
 
-    injectElement(4, null, TriggerResult.FIRE);
+    injectElement(4, null, TriggerResult.FIRE_AND_FINISH);
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(4), 4, 0, 10)));
-    injectElement(5, null, TriggerResult.FINISH);
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
     assertTrue(tester.isDone(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
   }
@@ -116,31 +113,6 @@ public void testOnElementT2Fires() throws Exception {
         tester.bufferTag(firstWindow)));
   }
 
-  @Test
-  public void testOnElementT1Finishes() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.FINISH, TriggerResult.CONTINUE);
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    injectElement(2, null, TriggerResult.FIRE_AND_FINISH);
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
-  }
-
-  @Test
-  public void testOnElementBothFinish() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.FINISH, null);
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    injectElement(2, null, TriggerResult.FINISH);
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
-  }
-
   @SuppressWarnings("unchecked")
   @Test
   public void testOnTimerFire() throws Exception {
@@ -168,7 +140,7 @@ public void testOnTimerFinish() throws Exception {
 
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(0));
     when(mockTrigger1.onTimer(isTriggerContext(), Mockito.isA(OnTimerEvent.class)))
-        .thenReturn(TriggerResult.FINISH);
+        .thenReturn(TriggerResult.FIRE_AND_FINISH);
 
     tester.advanceWatermark(new Instant(12));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
@@ -185,9 +157,9 @@ public void testOnMergeFires() throws Exception {
     setUp(Sessions.withGapDuration(Duration.millis(10)));
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(12, TriggerResult.FINISH, TriggerResult.CONTINUE);
+    injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
-    when(mockTrigger2.onMerge(
+    when(mockTrigger1.onMerge(
         isTriggerContext(),
         Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.FIRE_AND_FINISH);
 
@@ -196,14 +168,9 @@ public void testOnMergeFires() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
+    assertFalse(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
-        tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(22)))));
-
-    verify(mockTrigger1, Mockito.never())
-        .onMerge(
-            Mockito.<TriggerContext<IntervalWindow>>any(),
-            Mockito.<OnMergeEvent<IntervalWindow>>any());
+        tester.subFinished(new IntervalWindow(new Instant(1), new Instant(22)))));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index f5148f5fc1c8f..6e847a4f98169 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -20,7 +20,6 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
@@ -106,44 +105,6 @@ public void testOnElementT2Fires() throws Exception {
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
   }
 
-  @Test
-  public void testOnElementT1Finishes() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.FINISH, TriggerResult.CONTINUE);
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    injectElement(2, null, TriggerResult.FIRE);
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
-  }
-
-  @Test
-  public void testOnElementT2Finishes() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.FINISH);
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    injectElement(2, TriggerResult.FIRE, null);
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
-  }
-
-  @Test
-  public void testOnElementBothFinish() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.FINISH);
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    injectElement(2, TriggerResult.FINISH, null);
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
-  }
-
   @SuppressWarnings("unchecked")
   @Test
   public void testOnTimerFire() throws Exception {
@@ -171,14 +132,12 @@ public void testOnTimerFinish() throws Exception {
 
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(1));
     when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
-        .thenReturn(TriggerResult.FINISH);
+        .thenReturn(TriggerResult.FIRE_AND_FINISH);
 
     tester.advanceWatermark(new Instant(12));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-
-    injectElement(2, TriggerResult.FIRE, null);
     assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
+
     assertTrue(tester.isDone(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
   }
@@ -188,7 +147,11 @@ public void testOnMergeFires() throws Exception {
     setUp(Sessions.withGapDuration(Duration.millis(10)));
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(12, TriggerResult.FINISH, TriggerResult.CONTINUE);
+    injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    when(mockTrigger1.onMerge(
+        isTriggerContext(),
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.CONTINUE);
 
     when(mockTrigger2.onMerge(
         isTriggerContext(),
@@ -202,11 +165,6 @@ public void testOnMergeFires() throws Exception {
     assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
         tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(22)))));
-
-    verify(mockTrigger1, Mockito.never())
-        .onMerge(
-            Mockito.<TriggerContext<IntervalWindow>>any(),
-            Mockito.<OnMergeEvent<IntervalWindow>>any());
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index ab90015f17442..d9d54e1f8ba32 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -48,16 +48,16 @@
  */
 @RunWith(JUnit4.class)
 public class RepeatedlyTest {
-  @Mock private Trigger<IntervalWindow> mockTrigger1;
-  @Mock private AtMostOnceTrigger<IntervalWindow> mockTrigger2;
+  @Mock private Trigger<IntervalWindow> mockRepeated;
+  @Mock private AtMostOnceTrigger<IntervalWindow> mockUntil;
   private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn, boolean until) throws Exception {
     MockitoAnnotations.initMocks(this);
     Trigger<IntervalWindow> underTest = until
-        ? Repeatedly.forever(mockTrigger1).until(mockTrigger2)
-        : Repeatedly.forever(mockTrigger1);
+        ? Repeatedly.forever(mockRepeated).finishing(mockUntil)
+        : Repeatedly.forever(mockRepeated);
 
     tester = TriggerTester.buffering(windowFn, underTest);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
@@ -71,12 +71,12 @@ private TriggerContext<IntervalWindow> isTriggerContext() {
   private void injectElement(int element, TriggerResult result1, TriggerResult result2)
       throws Exception {
     if (result1 != null) {
-      when(mockTrigger1.onElement(
+      when(mockRepeated.onElement(
           isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
           .thenReturn(result1);
     }
     if (result2 != null) {
-      when(mockTrigger2.onElement(
+      when(mockUntil.onElement(
           isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
           .thenReturn(result2);
     }
@@ -88,9 +88,9 @@ public void testOnElementNoUntil() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)), false);
 
     injectElement(1, TriggerResult.CONTINUE, null);
-    injectElement(2, TriggerResult.FIRE_AND_FINISH, null);
+    injectElement(2, TriggerResult.FIRE, null);
     injectElement(3, TriggerResult.FIRE_AND_FINISH, null);
-    injectElement(4, TriggerResult.FINISH, null);
+    injectElement(4, TriggerResult.CONTINUE, null);
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
@@ -105,7 +105,7 @@ public void testOnElementUntilFires() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)), true);
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(2, TriggerResult.FIRE_AND_FINISH, TriggerResult.FIRE);
+    injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE);
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
@@ -116,22 +116,18 @@ public void testOnElementUntilFires() throws Exception {
   }
 
   @Test
-  public void testOnElementUntilFinishes() throws Exception {
+  public void testOnElementUntilFiresAndFinishes() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)), true);
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(2, TriggerResult.FIRE_AND_FINISH, TriggerResult.FINISH);
-    injectElement(3, TriggerResult.FIRE,
-        // until is already finished, so this shouldn't be called, and shouldn't finish the repeat.
-        TriggerResult.FIRE);
+    injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
 
     assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
-    assertFalse(tester.isDone(firstWindow));
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertTrue(tester.isDone(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
-        // We're storing the sub-trigger state for the root trigger.
-        tester.subFinished(firstWindow)));
+        // We're storing that the root trigger has finished.
+        tester.rootFinished(firstWindow)));
   }
 
   @Test
@@ -141,28 +137,28 @@ public void testOnElementTimerFiresWithoutUntil() throws Exception {
     injectElement(1, TriggerResult.CONTINUE, null);
 
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(0));
-    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+    when(mockRepeated.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE);
     tester.advanceWatermark(new Instant(12));
 
     injectElement(2, TriggerResult.CONTINUE, null);
 
     tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, ImmutableList.of(0));
-    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+    when(mockRepeated.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
     tester.advanceWatermark(new Instant(13));
 
     injectElement(3, TriggerResult.CONTINUE, null);
 
     tester.setTimer(firstWindow, new Instant(13), TimeDomain.EVENT_TIME, ImmutableList.of(0));
-    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
-        .thenReturn(TriggerResult.FINISH);
+    when(mockRepeated.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+        .thenReturn(TriggerResult.CONTINUE);
     tester.advanceWatermark(new Instant(14));
 
     injectElement(4, TriggerResult.CONTINUE, null);
 
     tester.setTimer(firstWindow, new Instant(14), TimeDomain.EVENT_TIME, ImmutableList.of(0));
-    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+    when(mockRepeated.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE);
     tester.advanceWatermark(new Instant(15));
 
@@ -175,14 +171,14 @@ public void testOnElementTimerFiresWithoutUntil() throws Exception {
   }
 
   @Test
-  public void testOnElementTimerFiresWithUntil() throws Exception {
+  public void testOnTimerFiresWithUntil() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)), true);
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     // Timer fires for until, which says continue
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(1));
-    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+    when(mockUntil.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.CONTINUE);
     tester.advanceWatermark(new Instant(12));
 
@@ -190,7 +186,7 @@ public void testOnElementTimerFiresWithUntil() throws Exception {
 
     // Timer fires for until, which says fire, so we stop repeating.
     tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, ImmutableList.of(1));
-    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+    when(mockUntil.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE);
     tester.advanceWatermark(new Instant(13));
 
@@ -202,45 +198,40 @@ public void testOnElementTimerFiresWithUntil() throws Exception {
   }
 
   @Test
-  public void testOnElementTimerFinishesUntil() throws Exception {
+  public void testOnTimerFinishesUntil() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)), true);
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     // Timer fires for until, which says continue
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(1));
-    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
-        .thenReturn(TriggerResult.FINISH);
+    when(mockUntil.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+        .thenReturn(TriggerResult.CONTINUE);
     tester.advanceWatermark(new Instant(12));
 
     injectElement(2, TriggerResult.FIRE, TriggerResult.CONTINUE);
 
+    injectElement(3, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
     // Timer fires for until, which says finish, so we stop paying attention to it.
     tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, ImmutableList.of(1));
-    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+    when(mockUntil.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE);
     tester.advanceWatermark(new Instant(13));
 
-    // This timer for the until shouldn't do anything
+    // These timers shouldn't do anything.
     tester.setTimer(firstWindow, new Instant(13), TimeDomain.EVENT_TIME, ImmutableList.of(1));
-    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
-        .thenReturn(TriggerResult.FINISH);
     tester.advanceWatermark(new Instant(14));
 
-    // But we should be able to fire trigger 1 still
-    injectElement(3, TriggerResult.CONTINUE, null);
     tester.setTimer(firstWindow, new Instant(14), TimeDomain.EVENT_TIME, ImmutableList.of(0));
-    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
-        .thenReturn(TriggerResult.FIRE);
     tester.advanceWatermark(new Instant(15));
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
-    assertFalse(tester.isDone(firstWindow));
+    assertTrue(tester.isDone(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
-        // We're storing that the until sub-trigger has finished
-        tester.subFinished(firstWindow)));
+        tester.rootFinished(firstWindow)));
   }
 
   @Test
@@ -248,9 +239,9 @@ public void testMergeWithoutUntil() throws Exception {
     setUp(Sessions.withGapDuration(Duration.millis(10)), false);
 
     injectElement(1, TriggerResult.CONTINUE, null);
-    injectElement(12, TriggerResult.FINISH, null);
+    injectElement(12, TriggerResult.CONTINUE, null);
 
-    when(mockTrigger1.onMerge(
+    when(mockRepeated.onMerge(
         isTriggerContext(),
         Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.FIRE_AND_FINISH);
 
@@ -268,13 +259,13 @@ public void testMergeUntilFires() throws Exception {
     setUp(Sessions.withGapDuration(Duration.millis(10)), true);
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(12, TriggerResult.FINISH, TriggerResult.CONTINUE);
+    injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
-    when(mockTrigger1.onMerge(
+    when(mockRepeated.onMerge(
         isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.FIRE_AND_FINISH);
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.CONTINUE);
 
-    when(mockTrigger2.onMerge(
+    when(mockUntil.onMerge(
         isTriggerContext(),
         Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.FIRE);
 
@@ -291,15 +282,19 @@ public void testMergeUntilFires() throws Exception {
   }
 
   @Test
-  public void testMergeRepeatUntilFinished() throws Exception {
+  public void testMergeRepeatUntilFiresAndFinishes() throws Exception {
     setUp(Sessions.withGapDuration(Duration.millis(10)), true);
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(12, TriggerResult.FINISH, TriggerResult.FINISH);
+    injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
     assertFalse(tester.isDone(new IntervalWindow(new Instant(1), new Instant(11))));
     assertFalse(tester.isDone(new IntervalWindow(new Instant(12), new Instant(22))));
 
-    when(mockTrigger1.onMerge(
+    when(mockUntil.onMerge(
+        isTriggerContext(),
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.CONTINUE);
+
+    when(mockRepeated.onMerge(
         isTriggerContext(),
         Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.FIRE_AND_FINISH);
 
@@ -309,33 +304,7 @@ public void testMergeRepeatUntilFinished() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
     assertFalse(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
-        // Remembering that we finished the until clause in the 1-22 window.
-        tester.subFinished(new IntervalWindow(new Instant(1), new Instant(22)))));
-  }
-
-  @Test
-  public void testMergeRepeatUntilFired() throws Exception {
-    setUp(Sessions.withGapDuration(Duration.millis(10)), true);
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(12, TriggerResult.CONTINUE, TriggerResult.FIRE);
-    assertFalse(tester.isDone(new IntervalWindow(new Instant(1), new Instant(11))));
-    assertTrue(tester.isDone(new IntervalWindow(new Instant(12), new Instant(22))));
-
-    // The arrival of this element would cause a merge (see above), but since we mark this finished
-    // in the window from [5, 15), when we merge, the TriggerExecutor finishes things for us.
-    injectElement(5, TriggerResult.CONTINUE, TriggerResult.FIRE);
-
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-
-    assertTrue(tester.isDone(new IntervalWindow(new Instant(12), new Instant(22))));
-    assertTrue(tester.isDone(new IntervalWindow(new Instant(5), new Instant(15))));
-
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.bufferTag(new IntervalWindow(new Instant(1), new Instant(11))),
-        tester.rootFinished(new IntervalWindow(new Instant(5), new Instant(15))),
-        tester.rootFinished(new IntervalWindow(new Instant(12), new Instant(22)))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
   }
 
   @Test
@@ -345,14 +314,14 @@ public void testFireDeadline() throws Exception {
     assertEquals(new Instant(9),
         Repeatedly.forever(AfterWatermark.pastEndOfWindow()).getWatermarkCutoff(window));
     assertEquals(new Instant(9), Repeatedly.forever(AfterWatermark.pastEndOfWindow())
-        .until(AfterPane.elementCountAtLeast(1))
+        .finishing(AfterPane.elementCountAtLeast(1))
         .getWatermarkCutoff(window));
     assertEquals(new Instant(9), Repeatedly.forever(AfterPane.elementCountAtLeast(1))
-        .until(AfterWatermark.pastEndOfWindow())
+        .finishing(AfterWatermark.pastEndOfWindow())
         .getWatermarkCutoff(window));
     assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
         Repeatedly.forever(AfterPane.elementCountAtLeast(1))
-        .until(AfterPane.elementCountAtLeast(10))
+        .finishing(AfterPane.elementCountAtLeast(10))
         .getWatermarkCutoff(window));
   }
 }

From fc0374945e51e8a2fc093d326ea183907aac4462 Mon Sep 17 00:00:00 2001
From: mariand <mariand@google.com>
Date: Tue, 7 Apr 2015 16:01:46 -0700
Subject: [PATCH 0397/1541] Added <p> tags to javadocs paragraphs for files in
 java/com/google/cloud/dataflow/sdk/runners/worker/

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90556350
---
 .../sdk/runners/worker/ApplianceShuffleReader.java     |  2 +-
 .../sdk/runners/worker/ApplianceShuffleWriter.java     |  2 +-
 .../dataflow/sdk/runners/worker/DataflowWorker.java    |  2 +-
 .../sdk/runners/worker/LazyMultiReaderIterator.java    | 10 +++++-----
 .../dataflow/sdk/runners/worker/ParDoFnFactory.java    |  2 +-
 .../dataflow/sdk/runners/worker/ReaderFactory.java     |  4 ++--
 .../cloud/dataflow/sdk/runners/worker/SinkFactory.java |  4 ++--
 7 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java
index 51c7ad0d63b64..c6db59c9d618a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java
@@ -24,7 +24,7 @@
  * ApplianceShuffleReader reads chunks of data from a shuffle dataset
  * for a position range.
  *
- * It is a JNI wrapper of an equivalent C++ class.
+ * <p> It is a JNI wrapper of an equivalent C++ class.
  */
 @ThreadSafe
 public final class ApplianceShuffleReader implements ShuffleReader {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java
index d4e6b1dc0db9c..a14e53b696a45 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java
@@ -22,7 +22,7 @@
 /**
  * ApplianceShuffleWriter writes chunks of data to a shuffle dataset.
  *
- * It is a JNI wrapper of an equivalent C++ class.
+ * <p> It is a JNI wrapper of an equivalent C++ class.
  */
 @ThreadSafe
 public final class ApplianceShuffleWriter implements ShuffleWriter {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 1deea6655a337..98db69430c292 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -84,7 +84,7 @@ public DataflowWorker(WorkUnitClient workUnitClient, DataflowWorkerHarnessOption
    * Gets WorkItem and performs it; returns true if work was
    * successfully completed.
    *
-   * getAndPerformWork may throw if there is a failure of the
+   * <p> getAndPerformWork may throw if there is a failure of the
    * WorkUnitClient.
    */
   public boolean getAndPerformWork() throws IOException {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
index 37dd5898efc45..724ee39b098ef 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
@@ -25,13 +25,13 @@
 /**
  * Implements a ReaderIterator over a collection of inputs.
  *
- * The sources are used sequentially, each consumed entirely before moving
+ * <p> The sources are used sequentially, each consumed entirely before moving
  * to the next source.
  *
- * The input is lazily constructed by using the abstract method {@code open} to
- * create a source iterator for inputs on demand.  This allows the resources to
- * be produced lazily, as an open source iterator may consume process resources
- * such as file descriptors.
+ * <p> The input is lazily constructed by using the abstract method {@code open}
+ * to create a source iterator for inputs on demand.  This allows the resources
+ * to be produced lazily, as an open source iterator may consume process
+ * resources such as file descriptors.
  */
 abstract class LazyMultiReaderIterator<T> extends Reader.AbstractReaderIterator<T> {
   private final Iterator<String> inputs;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
index 920065663631e..26509708b861d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
@@ -33,7 +33,7 @@
 /**
  * Creates a ParDoFn from a CloudObject spec.
  *
- * A ParDoFnFactory concrete "subclass" should define a method with
+ * <p> A ParDoFnFactory concrete "subclass" should define a method with
  * the following signature:
  * <pre> {@code
  * static SomeParDoFnSubclass create(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
index 2224410215877..4c2a3338be518 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
@@ -35,8 +35,8 @@
 /**
  * Constructs a Reader from a Dataflow API Source definition.
  *
- * A ReaderFactory concrete "subclass" should define a method with the following
- * signature:
+ * <p> A ReaderFactory concrete "subclass" should define a method with the
+ * following signature:
  * <pre> {@code
  * static SomeReaderSubclass<T> create(PipelineOptions, CloudObject,
  *                                     Coder<T>, ExecutionContext);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
index bef9192cb1303..f708cacbae5f0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
@@ -31,8 +31,8 @@
 /**
  * Constructs a Sink from a Dataflow service protocol Sink definition.
  *
- * A SinkFactory concrete "subclass" should define a method with the following
- * signature:
+ * <p> A SinkFactory concrete "subclass" should define a method with the
+ * following signature:
  * <pre> {@code
  * static SomeSinkSubclass<T> create(PipelineOptions, CloudObject,
  *                                   Coder<T>, ExecutionContext);

From 948dbe865591d829cd83f758e4328186c2a99b54 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Tue, 7 Apr 2015 16:16:49 -0700
Subject: [PATCH 0398/1541] Emit a warning when transforms do not have stable
 names.  This will become an error once reloading streaming pipelines is
 supported

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90557693
---
 .../cloud/dataflow/examples/AutoComplete.java |   2 +-
 .../dataflow/examples/FilterExamples.java     |   1 +
 .../cloud/dataflow/examples/JoinExamples.java |  33 +++---
 .../google/cloud/dataflow/examples/TfIdf.java | 103 +++++++++---------
 .../google/cloud/dataflow/sdk/Pipeline.java   |  24 +++-
 .../dataflow/sdk/transforms/PTransform.java   |   6 +-
 .../cloud/dataflow/sdk/transforms/ParDo.java  |  12 +-
 .../sdk/transforms/join/CoGroupByKey.java     |   2 +-
 .../cloud/dataflow/sdk/util/StringUtils.java  |   6 +
 .../dataflow/sdk/util/StringUtilsTest.java    |  31 +++---
 10 files changed, 131 insertions(+), 89 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
index ac407264749eb..9dac23db39cba 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
@@ -116,7 +116,7 @@ public PCollection<KV<String, List<CompletionCandidate>>> apply(PCollection<Stri
         .apply(new Count.PerElement<String>())
 
         // Map the KV outputs of Count into our own CompletionCandiate class.
-        .apply(ParDo.of(
+        .apply(ParDo.named("CreateCompletionCandidates").of(
             new DoFn<KV<String, Long>, CompletionCandidate>() {
               private static final long serialVersionUID = 0;
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
index dc0eb05525571..23934ada4ccdb 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
@@ -184,6 +184,7 @@ public PCollection<TableRow> apply(PCollection<TableRow> rows) {
       // We'll only output readings with temperatures below this mean.
       PCollection<TableRow> filteredRows = monthFilteredRows
           .apply(ParDo
+              .named("ParseAndFilter")
               .withSideInputs(globalMeanTemp)
               .of(new DoFn<TableRow, TableRow>() {
                 private static final long serialVersionUID = 0;
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
index ecd205888284e..ccf3aa9a48e18 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
@@ -86,27 +86,28 @@ static PCollection<String> joinEvents(PCollection<TableRow> eventsTable,
     // Process the CoGbkResult elements generated by the CoGroupByKey transform.
     // country code 'key' -> string of <event info>, <country name>
     PCollection<KV<String, String>> finalResultCollection =
-      kvpCollection.apply(ParDo.of(new DoFn<KV<String, CoGbkResult>, KV<String, String>>() {
-        private static final long serialVersionUID = 0;
-
-        @Override
-        public void processElement(ProcessContext c) {
-           KV<String, CoGbkResult> e = c.element();
-           CoGbkResult val = e.getValue();
-           String countryCode = e.getKey();
-           String countryName = "none";
-           countryName = e.getValue().getOnly(countryInfoTag);
-           for (String eventInfo : c.element().getValue().getAll(eventInfoTag)) {
-             // Generate a string that combines information from both collection values
-             c.output(KV.of(countryCode, "Country name: " + countryName
-                     + ", Event info: " + eventInfo));
+      kvpCollection.apply(ParDo.named("Process").of(
+        new DoFn<KV<String, CoGbkResult>, KV<String, String>>() {
+          private static final long serialVersionUID = 0;
+
+          @Override
+          public void processElement(ProcessContext c) {
+            KV<String, CoGbkResult> e = c.element();
+            CoGbkResult val = e.getValue();
+            String countryCode = e.getKey();
+            String countryName = "none";
+            countryName = e.getValue().getOnly(countryInfoTag);
+            for (String eventInfo : c.element().getValue().getAll(eventInfoTag)) {
+              // Generate a string that combines information from both collection values
+              c.output(KV.of(countryCode, "Country name: " + countryName
+                      + ", Event info: " + eventInfo));
+            }
           }
-        }
       }));
 
     // write to GCS
     PCollection<String> formattedResults = finalResultCollection
-        .apply(ParDo.of(new DoFn<KV<String, String>, String>() {
+        .apply(ParDo.named("Format").of(new DoFn<KV<String, String>, String>() {
           private static final long serialVersionUID = 0;
 
           @Override
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
index 66adff6b599f5..0b5db47ef8f29 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
@@ -183,7 +183,7 @@ public PCollection<KV<URI, String>> apply(PInput input) {
         PCollection<KV<URI, String>> oneUriToLines = pipeline
             .apply(TextIO.Read.from(uriString)
                 .named("TextIO.Read(" + uriString + ")"))
-            .apply(WithKeys.<URI, String>of(uri));
+            .apply(WithKeys.<URI, String>of(uri).withName("WithKeys(" + uriString + ")"));
 
         urisToLines = urisToLines.and(oneUriToLines);
       }
@@ -213,8 +213,8 @@ public PCollection<KV<String, KV<URI, Double>>> apply(
       // use as a side input.
       final PCollectionView<Long> totalDocuments =
           uriToContent
-          .apply(Keys.<URI>create())
-          .apply(RemoveDuplicates.<URI>create())
+          .apply(Keys.<URI>create().withName("GetURIs"))
+          .apply(RemoveDuplicates.<URI>create().withName("RemoveDuplicateDocs"))
           .apply(Count.<URI>globally())
           .apply(View.<Long>asSingleton());
 
@@ -245,38 +245,39 @@ public void processElement(ProcessContext c) {
       // Compute a mapping from each word to the total
       // number of documents in which it appears.
       PCollection<KV<String, Long>> wordToDocCount = uriToWords
-          .apply(RemoveDuplicates.<KV<URI, String>>create())
+          .apply(RemoveDuplicates.<KV<URI, String>>create().withName("RemoveDuplicateWords"))
           .apply(Values.<String>create())
-          .apply(Count.<String>perElement());
+          .apply(Count.<String>perElement().withName("CountDocs"));
 
       // Compute a mapping from each URI to the total
       // number of words in the document associated with that URI.
       PCollection<KV<URI, Long>> uriToWordTotal = uriToWords
-          .apply(Keys.<URI>create())
-          .apply(Count.<URI>perElement());
+          .apply(Keys.<URI>create().withName("GetURIs2"))
+          .apply(Count.<URI>perElement().withName("CountWords"));
 
       // Count, for each (URI, word) pair, the number of
       // occurrences of that word in the document associated
       // with the URI.
       PCollection<KV<KV<URI, String>, Long>> uriAndWordToCount = uriToWords
-          .apply(Count.<KV<URI, String>>perElement());
+          .apply(Count.<KV<URI, String>>perElement().withName("CountWordDocPairs"));
 
       // Adjust the above collection to a mapping from
       // (URI, word) pairs to counts into an isomorphic mapping
       // from URI to (word, count) pairs, to prepare for a join
       // by the URI key.
       PCollection<KV<URI, KV<String, Long>>> uriToWordAndCount = uriAndWordToCount
-          .apply(ParDo.of(new DoFn<KV<KV<URI, String>, Long>, KV<URI, KV<String, Long>>>() {
-            private static final long serialVersionUID = 0;
+          .apply(ParDo.named("ShiftKeys").of(
+              new DoFn<KV<KV<URI, String>, Long>, KV<URI, KV<String, Long>>>() {
+                private static final long serialVersionUID = 0;
 
-            @Override
-            public void processElement(ProcessContext c) {
-              URI uri = c.element().getKey().getKey();
-              String word = c.element().getKey().getValue();
-              Long occurrences = c.element().getValue();
-              c.output(KV.of(uri, KV.of(word, occurrences)));
-            }
-          }));
+                @Override
+                public void processElement(ProcessContext c) {
+                  URI uri = c.element().getKey().getKey();
+                  String word = c.element().getKey().getValue();
+                  Long occurrences = c.element().getValue();
+                  c.output(KV.of(uri, KV.of(word, occurrences)));
+                }
+              }));
 
       // Prepare to join the mapping of URI to (word, count) pairs with
       // the mapping of URI to total word counts, by associating
@@ -306,22 +307,24 @@ public void processElement(ProcessContext c) {
       // is simply the number of times that word occurs in the document
       // divided by the total number of words in the document.
       PCollection<KV<String, KV<URI, Double>>> wordToUriAndTf = uriToWordAndCountAndTotal
-          .apply(ParDo.of(new DoFn<KV<URI, CoGbkResult>, KV<String, KV<URI, Double>>>() {
-            private static final long serialVersionUID = 0;
+          .apply(ParDo.named("ComputeTermFrequencies").of(
+              new DoFn<KV<URI, CoGbkResult>, KV<String, KV<URI, Double>>>() {
+                private static final long serialVersionUID = 0;
 
-            @Override
-            public void processElement(ProcessContext c) {
-              URI uri = c.element().getKey();
-              Long wordTotal = c.element().getValue().getOnly(wordTotalsTag);
-
-              for (KV<String, Long> wordAndCount : c.element().getValue().getAll(wordCountsTag)) {
-                String word = wordAndCount.getKey();
-                Long wordCount = wordAndCount.getValue();
-                Double termFrequency = wordCount.doubleValue() / wordTotal.doubleValue();
-                c.output(KV.of(word, KV.of(uri, termFrequency)));
-              }
-            }
-          }));
+                @Override
+                public void processElement(ProcessContext c) {
+                  URI uri = c.element().getKey();
+                  Long wordTotal = c.element().getValue().getOnly(wordTotalsTag);
+
+                  for (KV<String, Long> wordAndCount
+                           : c.element().getValue().getAll(wordCountsTag)) {
+                    String word = wordAndCount.getKey();
+                    Long wordCount = wordAndCount.getValue();
+                    Double termFrequency = wordCount.doubleValue() / wordTotal.doubleValue();
+                    c.output(KV.of(word, KV.of(uri, termFrequency)));
+                  }
+                }
+              }));
 
       // Compute a mapping from each word to its document frequency.
       // A word's document frequency in a corpus is the number of
@@ -331,6 +334,7 @@ public void processElement(ProcessContext c) {
       // presented to each invocation of the DoFn.
       PCollection<KV<String, Double>> wordToDf = wordToDocCount
           .apply(ParDo
+              .named("ComputeDocFrequencies")
               .withSideInputs(totalDocuments)
               .of(new DoFn<KV<String, Long>, KV<String, Double>>() {
                 private static final long serialVersionUID = 0;
@@ -362,22 +366,23 @@ public void processElement(ProcessContext c) {
       // here we use a basic version that is the term frequency
       // divided by the log of the document frequency.
       PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf = wordToUriAndTfAndDf
-          .apply(ParDo.of(new DoFn<KV<String, CoGbkResult>, KV<String, KV<URI, Double>>>() {
-            private static final long serialVersionUID = 0;
+          .apply(ParDo.named("ComputeTfIdf").of(
+              new DoFn<KV<String, CoGbkResult>, KV<String, KV<URI, Double>>>() {
+                private static final long serialVersionUID = 0;
 
-            @Override
-            public void processElement(ProcessContext c) {
-              String word = c.element().getKey();
-              Double df = c.element().getValue().getOnly(dfTag);
-
-              for (KV<URI, Double> uriAndTf : c.element().getValue().getAll(tfTag)) {
-                URI uri = uriAndTf.getKey();
-                Double tf = uriAndTf.getValue();
-                Double tfIdf = tf * Math.log(1 / df);
-                c.output(KV.of(word, KV.of(uri, tfIdf)));
-              }
-            }
-          }));
+                @Override
+                public void processElement(ProcessContext c) {
+                  String word = c.element().getKey();
+                  Double df = c.element().getValue().getOnly(dfTag);
+
+                  for (KV<URI, Double> uriAndTf : c.element().getValue().getAll(tfTag)) {
+                    URI uri = uriAndTf.getKey();
+                    Double tf = uriAndTf.getValue();
+                    Double tfIdf = tf * Math.log(1 / df);
+                    c.output(KV.of(word, KV.of(uri, tfIdf)));
+                  }
+                }
+              }));
 
       return wordToUriAndTfIdf;
     }
@@ -405,7 +410,7 @@ public WriteTfIdf(String output) {
     @Override
     public PDone apply(PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf) {
       return wordToUriAndTfIdf
-          .apply(ParDo.of(new DoFn<KV<String, KV<URI, Double>>, String>() {
+          .apply(ParDo.named("Format").of(new DoFn<KV<String, KV<URI, Double>>, String>() {
             private static final long serialVersionUID = 0;
 
             @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index 13066d90adb7d..25f8ad26cc40f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -256,8 +256,19 @@ Output applyInternal(Input input,
 
     TransformTreeNode parent = transforms.getCurrent();
     String namePrefix = parent.getFullName();
-    String fullName = uniquifyInternal(namePrefix, transform.getName());
-    TransformTreeNode child = new TransformTreeNode(parent, transform, fullName, input);
+
+    String name = transform.getName();
+    String fullName = uniquifyInternal(namePrefix, name);
+
+    boolean nameIsUnique = fullName.equals(buildName(namePrefix, name));
+
+    if (!nameIsUnique) {
+      LOG.warn("Transform {} does not have a stable unique name.  "
+          + "In the future, this will prevent reloading streaming pipelines", fullName);
+    }
+
+    TransformTreeNode child =
+        new TransformTreeNode(parent, transform, fullName, input);
     parent.addComposite(child);
 
     transforms.addInput(child, input);
@@ -376,7 +387,7 @@ private String uniquifyInternal(String namePrefix, String origName) {
     String name = origName;
     int suffixNum = 2;
     while (true) {
-      String candidate = namePrefix.isEmpty() ? name : namePrefix + "/" + name;
+      String candidate = buildName(namePrefix, name);
       if (usedFullNames.add(candidate)) {
         return candidate;
       }
@@ -385,6 +396,13 @@ private String uniquifyInternal(String namePrefix, String origName) {
     }
   }
 
+  /**
+   * Builds a name from a /-delimited prefix and a name.
+   */
+  private String buildName(String namePrefix, String name) {
+    return namePrefix.isEmpty() ? name : namePrefix + "/" + name;
+  }
+
   /**
    * Adds the given PValue to this Pipeline.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
index 5457bea25d1ce..3cbfdf1e28d17 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
@@ -324,7 +324,11 @@ protected String getDefaultName() {
    * {@code PTransform}'s class.
    */
   protected String getKindString() {
-    return StringUtils.approximateSimpleName(getClass());
+    if (getClass().isAnonymousClass()) {
+      return "AnonymousTransform";
+    } else {
+      return StringUtils.approximateSimpleName(getClass());
+    }
   }
 
   private void writeObject(ObjectOutputStream oos) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 661f24d66f7f3..fc67795f01cfe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -723,7 +723,11 @@ protected Coder<O> getDefaultOutputCoder(PCollection<? extends I> input) {
 
     @Override
     protected String getDefaultName() {
-      return StringUtils.approximateSimpleName(fn.getClass());
+      if (fn.getClass().isAnonymousClass()) {
+        return "AnonymousParDo";
+      } else {
+        return StringUtils.approximateSimpleName(fn.getClass());
+      }
     }
 
     @Override
@@ -925,7 +929,11 @@ protected Coder<O> getDefaultOutputCoder() {
 
     @Override
     protected String getDefaultName() {
-      return StringUtils.approximateSimpleName(fn.getClass());
+      if (fn.getClass().isAnonymousClass()) {
+        return "AnonymousParDo";
+      } else {
+        return StringUtils.approximateSimpleName(fn.getClass());
+      }
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
index d4de721464029..60d6d3e56d221 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
@@ -161,7 +161,7 @@ private <V> PCollection<KV<K, RawUnionValue>> makeUnionTable(
       KvCoder<K, RawUnionValue> unionTableEncoder) {
 
     return pCollection.apply(ParDo.of(
-        new ConstructUnionTableFn<K, V>(index)).named("MakeUnionTable"))
+        new ConstructUnionTableFn<K, V>(index)).named("MakeUnionTable" + index))
                                                .setCoder(unionTableEncoder);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
index e898d8e0923d1..650a3387d6173 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -115,8 +116,13 @@ public static byte[] jsonStringToByteArray(String string) {
    *   <li>{@code some.package.WordSummaryDoFn} -> "WordSummary"
    *   <li>{@code another.package.PairingFn} -> "Pairing"
    * </ul>
+   *
+   * @throws IllegalArgumentException if the class is anonymous
    */
   public static String approximateSimpleName(Class<?> clazz) {
+    Preconditions.checkArgument(!clazz.isAnonymousClass(),
+        "Attempted to get simple name of anonymous class");
+
     String fullName = clazz.getName();
     String shortName = fullName.substring(fullName.lastIndexOf('.') + 1);
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java
index 83db3ae615a39..2f1103b8de120 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java
@@ -16,23 +16,23 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import static org.hamcrest.core.Is.is;
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
 /**
  * Tests for StringUtils.
  */
 @RunWith(JUnit4.class)
 public class StringUtilsTest {
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
   @Test
   public void testTranscodeEmptyByteArray() {
     byte[] bytes = { };
@@ -54,9 +54,11 @@ public void testTranscodeMixedByteArray() {
    * Inner class for simple name test.
    */
   private class EmbeddedDoFn {
-    // Returns an anonymous inner class.
+
+    private class DeeperEmbeddedDoFn extends EmbeddedDoFn {}
+
     private EmbeddedDoFn getEmbedded() {
-      return new EmbeddedDoFn(){};
+      return new DeeperEmbeddedDoFn();
     }
   }
 
@@ -67,22 +69,19 @@ public void testSimpleName() {
   }
 
   @Test
-  public void testAnonSimpleName() {
+  public void testAnonSimpleName() throws Exception {
+    thrown.expect(IllegalArgumentException.class);
+
     EmbeddedDoFn anon = new EmbeddedDoFn(){};
 
-    Pattern p = Pattern.compile("StringUtilsTest\\$[0-9]+");
-    Matcher m = p.matcher(StringUtils.approximateSimpleName(anon.getClass()));
-    assertThat(m.matches(), is(true));
+    StringUtils.approximateSimpleName(anon.getClass());
   }
 
   @Test
   public void testNestedSimpleName() {
     EmbeddedDoFn fn = new EmbeddedDoFn();
-    EmbeddedDoFn anon = fn.getEmbedded();
+    EmbeddedDoFn inner = fn.getEmbedded();
 
-    // Expect to find "Embedded$1"
-    Pattern p = Pattern.compile("Embedded\\$[0-9]+");
-    Matcher m = p.matcher(StringUtils.approximateSimpleName(anon.getClass()));
-    assertThat(m.matches(), is(true));
+    assertEquals("DeeperEmbedded", StringUtils.approximateSimpleName(inner.getClass()));
   }
 }

From 6fd6506fc5c6cf79f938d7ebab26d133a5cbd858 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Tue, 7 Apr 2015 16:18:32 -0700
Subject: [PATCH 0399/1541] 1. Update SDK major version to "3" to enable
 groupAlsoByWindows combiner lifting. 2. pass in windowFn in GroupByKeyOnly to
 move the batch GroupByKey expansion to the backend. It enables combiner
 lifting in the backend for batch pipelines with windowing.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90557888
---
 .../sdk/runners/DataflowPipelineRunner.java   |  10 +-
 .../runners/DataflowPipelineTranslator.java   |  29 ++-
 .../worker/GroupAlsoByWindowsParDoFn.java     |  20 +-
 .../dataflow/sdk/transforms/GroupByKey.java   | 198 ++++++++----------
 4 files changed, 126 insertions(+), 131 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 8a034afb4f85d..7212d5a09389a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -80,7 +80,7 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
   private DataflowPipelineRunnerHooks hooks;
 
   // Environment version information
-  private static final String ENVIRONMENT_MAJOR_VERSION = "2";
+  private static final String ENVIRONMENT_MAJOR_VERSION = "3";
 
   /**
    * Construct a runner from the provided options.
@@ -141,16 +141,10 @@ private DataflowPipelineRunner(DataflowPipelineOptions options) {
   @SuppressWarnings("unchecked")
   public <Output extends POutput, Input extends PInput> Output apply(
       PTransform<Input, Output> transform, Input input) {
-    if (transform instanceof Combine.GroupedValues) {
+    if (transform instanceof Combine.GroupedValues || transform instanceof GroupByKey) {
       // TODO: Redundant with translator registration?
       return (Output) PCollection.createPrimitiveOutputInternal(
           ((PCollection<?>) input).getWindowingStrategy());
-    } else if (transform instanceof GroupByKey) {
-      // The DataflowPipelineRunner implementation of GroupByKey will sort values by timestamp,
-      // so no need for an explicit sort transform.
-      boolean runnerSortsByTimestamp = true;
-      return (Output) ((GroupByKey) transform).applyHelper(
-          (PCollection<?>) input, options.isStreaming(), runnerSortsByTimestamp);
     } else if (transform instanceof Create) {
       return (Output) ((Create) transform).applyHelper(input, options.isStreaming());
     } else {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 9c47975c49e82..d590415aa9f81 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -52,6 +52,7 @@
 import com.google.cloud.dataflow.sdk.options.CloudDebuggerOptions.DebuggerConfig;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType;
+import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.dataflow.AvroIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BigQueryIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
@@ -62,10 +63,12 @@
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.Flatten;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
@@ -872,24 +875,36 @@ private <T> void flattenHelper(
         });
 
     registerTransformTranslator(
-        GroupByKeyOnly.class,
-        new TransformTranslator<GroupByKeyOnly>() {
+        GroupByKey.class,
+        new TransformTranslator<GroupByKey>() {
           @Override
           public void translate(
-              GroupByKeyOnly transform,
+              GroupByKey transform,
               TranslationContext context) {
             groupByKeyHelper(transform, context);
           }
 
           private <K, V> void groupByKeyHelper(
-              GroupByKeyOnly<K, V> transform,
+              GroupByKey<K, V> transform,
               TranslationContext context) {
             context.addStep(transform, "GroupByKey");
             context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
             context.addOutput(PropertyNames.OUTPUT, context.getOutput(transform));
+
+            WindowingStrategy<?, ?> windowingStrategy =
+                context.getInput(transform).getWindowingStrategy();
+            boolean isStreaming =
+                context.getPipelineOptions().as(StreamingOptions.class).isStreaming();
+            boolean disallowCombinerLifting =
+                !(windowingStrategy.getWindowFn() instanceof NonMergingWindowFn)
+                || (isStreaming && !transform.fewKeys())
+                // TODO: Allow combiner lifting on the non-default trigger, as appropriate.
+                || !(windowingStrategy.getTrigger() instanceof DefaultTrigger);
+            context.addInput(
+                PropertyNames.DISALLOW_COMBINER_LIFTING, disallowCombinerLifting);
             context.addInput(
-                PropertyNames.DISALLOW_COMBINER_LIFTING, transform.disallowCombinerLifting());
-            // TODO: sortsValues
+                PropertyNames.SERIALIZED_FN,
+                byteArrayToJsonString(serializeToByteArray(windowingStrategy)));
           }
         });
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index 810b5743d8aa0..de6fa4245820f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -18,13 +18,16 @@
 
 import static com.google.cloud.dataflow.sdk.util.Structs.getBytes;
 import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 
+import com.google.api.client.util.Preconditions;
 import com.google.api.services.dataflow.model.MultiOutputInfo;
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
+import com.google.cloud.dataflow.sdk.runners.worker.CombineValuesFn.CombinePhase;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
@@ -115,12 +118,17 @@ public static GroupAlsoByWindowsParDoFn create(
       isStreamingPipeline = ((StreamingOptions) options).isStreaming();
     }
 
-    boolean isMergingOnly = true;
-    KeyedCombineFn maybeMergingCombineFn;
-    if (isMergingOnly && combineFn != null) {
-      maybeMergingCombineFn = new MergingKeyedCombineFn(combineFn);
-    } else {
-      maybeMergingCombineFn = combineFn;
+    KeyedCombineFn maybeMergingCombineFn = null;
+    if (combineFn != null) {
+      String phase = getString(cloudUserFn, PropertyNames.PHASE, CombinePhase.ALL);
+      Preconditions.checkArgument(
+          phase.equals(CombinePhase.ALL) || phase.equals(CombinePhase.MERGE),
+          "Unexpected phase: " + phase);
+      if (phase.equals(CombinePhase.MERGE)) {
+        maybeMergingCombineFn = new MergingKeyedCombineFn(combineFn);
+      } else {
+        maybeMergingCombineFn = combineFn;
+      }
     }
 
     DoFnInfoFactory fnFactory;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index a4d72ab84f4d3..50ac27dc48cb1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -25,8 +25,6 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
-import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
@@ -162,14 +160,99 @@ static <K, V> GroupByKey<K, V> create(boolean fewKeys) {
     return new GroupByKey<>(fewKeys);
   }
 
+  /**
+   * Returns whether it groups just few keys.
+   */
+  public boolean fewKeys() {
+    return fewKeys;
+  }
 
   /////////////////////////////////////////////////////////////////////////////
 
   @Override
   public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
-    return applyHelper(input, false, false);
+    // This operation groups by the combination of key and window,
+    // merging windows as needed, using the windows assigned to the
+    // key/value input elements and the window merge operation of the
+    // window function associated with the input PCollection.
+    WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
+    if (windowingStrategy.getWindowFn() instanceof InvalidWindows) {
+      String cause = ((InvalidWindows<?>) windowingStrategy.getWindowFn()).getCause();
+      throw new IllegalStateException(
+          "GroupByKey must have a valid Window merge function.  "
+          + "Invalid because: " + cause);
+    }
+    // By default, implement GroupByKey[AndWindow] via a series of lower-level
+    // operations.
+    return input
+        // Make each input element's timestamp and assigned windows
+        // explicit, in the value part.
+        .apply(new ReifyTimestampsAndWindows<K, V>())
+
+        // Group by just the key.
+        // Combiner lifting will not happen regardless of the disallowCombinerLifting value.
+        // There will be no combiners right after the GroupByKeyOnly because of the two ParDos
+        // introduced in here.
+        .apply(new GroupByKeyOnly<K, WindowedValue<V>>())
+
+        // Sort each key's values by timestamp. GroupAlsoByWindow requires
+        // its input to be sorted by timestamp.
+        .apply(new SortValuesByTimestamp<K, V>())
+
+        // Group each key's values by window, merging windows as needed.
+        .apply(new GroupAlsoByWindow<K, V>(windowingStrategy));
+  }
+
+  @Override
+  protected Coder<KV<K, Iterable<V>>> getDefaultOutputCoder(PCollection<KV<K, V>> input) {
+    return getOutputKvCoder(input.getCoder());
+  }
+
+  /**
+   * Returns the {@code Coder} of the input to this transform, which
+   * should be a {@code KvCoder}.
+   */
+  @SuppressWarnings("unchecked")
+  static <K, V> KvCoder<K, V> getInputKvCoder(Coder<KV<K, V>> inputCoder) {
+    if (!(inputCoder instanceof KvCoder)) {
+      throw new IllegalStateException(
+          "GroupByKey requires its input to use KvCoder");
+    }
+    return (KvCoder<K, V>) inputCoder;
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Returns the {@code Coder} of the keys of the input to this
+   * transform, which is also used as the {@code Coder} of the keys of
+   * the output of this transform.
+   */
+  static <K, V> Coder<K> getKeyCoder(Coder<KV<K, V>> inputCoder) {
+    return getInputKvCoder(inputCoder).getKeyCoder();
   }
 
+  /**
+   * Returns the {@code Coder} of the values of the input to this transform.
+   */
+  static <K, V> Coder<V> getInputValueCoder(Coder<KV<K, V>> inputCoder) {
+    return getInputKvCoder(inputCoder).getValueCoder();
+  }
+
+  /**
+   * Returns the {@code Coder} of the {@code Iterable} values of the
+   * output of this transform.
+   */
+  static <K, V> Coder<Iterable<V>> getOutputValueCoder(Coder<KV<K, V>> inputCoder) {
+    return IterableCoder.of(getInputValueCoder(inputCoder));
+  }
+
+  /**
+   * Returns the {@code Coder} of the output of this transform.
+   */
+  static <K, V> KvCoder<K, Iterable<V>> getOutputKvCoder(Coder<KV<K, V>> inputCoder) {
+    return KvCoder.of(getKeyCoder(inputCoder), getOutputValueCoder(inputCoder));
+  }
 
   /////////////////////////////////////////////////////////////////////////////
 
@@ -294,12 +377,6 @@ public PCollection<KV<K, Iterable<V>>> apply(
   public static class GroupByKeyOnly<K, V>
       extends PTransform<PCollection<KV<K, V>>,
                          PCollection<KV<K, Iterable<V>>>> {
-    final boolean disallowCombinerLifting;
-
-    public GroupByKeyOnly(boolean disallowCombinerLifting) {
-      this.disallowCombinerLifting = disallowCombinerLifting;
-    }
-
     @Override
     public void validate(PCollection<KV<K, V>> input) {
       // Verify that the input Coder<KV<K, V>> is a KvCoder<K, V>, and that
@@ -343,47 +420,9 @@ KvCoder<K, V> getInputKvCoder(Coder<KV<K, V>> inputCoder) {
       return (KvCoder<K, V>) inputCoder;
     }
 
-    /**
-     * Returns the {@code Coder} of the keys of the input to this
-     * transform, which is also used as the {@code Coder} of the keys of
-     * the output of this transform.
-     */
-    Coder<K> getKeyCoder(Coder<KV<K, V>> inputCoder) {
-      return getInputKvCoder(inputCoder).getKeyCoder();
-    }
-
-    /**
-     * Returns the {@code Coder} of the values of the input to this transform.
-     */
-    Coder<V> getInputValueCoder(Coder<KV<K, V>> inputCoder) {
-      return getInputKvCoder(inputCoder).getValueCoder();
-    }
-
-    /**
-     * Returns the {@code Coder} of the {@code Iterable} values of the
-     * output of this transform.
-     */
-    Coder<Iterable<V>> getOutputValueCoder(Coder<KV<K, V>> inputCoder) {
-      return IterableCoder.of(getInputValueCoder(inputCoder));
-    }
-
-    /**
-     * Returns the {@code Coder} of the output of this transform.
-     */
-    KvCoder<K, Iterable<V>> getOutputKvCoder(Coder<KV<K, V>> inputCoder) {
-      return KvCoder.of(getKeyCoder(inputCoder), getOutputValueCoder(inputCoder));
-    }
-
     @Override
     protected Coder<KV<K, Iterable<V>>> getDefaultOutputCoder(PCollection<KV<K, V>> input) {
-      return getOutputKvCoder(input.getCoder());
-    }
-
-    /**
-     * Returns whether this GBK allows lifting combiner through.
-     */
-    public boolean disallowCombinerLifting() {
-      return disallowCombinerLifting;
+      return GroupByKey.getOutputKvCoder(input.getCoder());
     }
   }
 
@@ -416,7 +455,7 @@ private static <K, V> void evaluateHelper(
     List<ValueWithMetadata<KV<K, V>>> inputElems =
         context.getPCollectionValuesWithMetadata(input);
 
-    Coder<K> keyCoder = transform.getKeyCoder(input.getCoder());
+    Coder<K> keyCoder = GroupByKey.getKeyCoder(input.getCoder());
 
     Map<GroupingKey<K>, List<V>> groupingMap = new HashMap<>();
 
@@ -459,67 +498,6 @@ private static <K, V> void evaluateHelper(
                                              outputElems);
   }
 
-  public PCollection<KV<K, Iterable<V>>> applyHelper(
-      PCollection<KV<K, V>> input, boolean isStreaming, boolean runnerSortsByTimestamp) {
-    Coder<KV<K, V>> inputCoder = input.getCoder();
-    if (!(inputCoder instanceof KvCoder)) {
-      throw new IllegalStateException(
-          "GroupByKey requires its input to use KvCoder");
-    }
-    // This operation groups by the combination of key and window,
-    // merging windows as needed, using the windows assigned to the
-    // key/value input elements and the window merge operation of the
-    // window function associated with the input PCollection.
-    WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
-    if (windowingStrategy.getWindowFn() instanceof InvalidWindows) {
-      String cause = ((InvalidWindows<?>) windowingStrategy.getWindowFn()).getCause();
-      throw new IllegalStateException(
-          "GroupByKey must have a valid Window merge function.  "
-          + "Invalid because: " + cause);
-    }
-    boolean disallowCombinerLifting =
-        !(windowingStrategy.getWindowFn() instanceof NonMergingWindowFn)
-        || (isStreaming && !fewKeys)
-        // TODO: Allow combiner lifting on the non-default trigger, as appropriate.
-        || !(windowingStrategy.getTrigger() instanceof DefaultTrigger);
-
-    if (windowingStrategy.getWindowFn().isCompatible(new GlobalWindows())
-        && windowingStrategy.getTrigger() instanceof DefaultTrigger) {
-      // The input PCollection is using the degenerate default
-      // window function, which uses a single global window for all
-      // elements.  We can implement this using a more-primitive
-      // non-window-aware GBK transform.
-      return input.apply(new GroupByKeyOnly<K, V>(disallowCombinerLifting));
-
-    } else if (isStreaming) {
-      // If using the streaming runner, the service will do the insertion of
-      // the GroupAlsoByWindow step.
-      // TODO: Remove this case once the Dataflow Runner handles GBK directly
-      return input.apply(new GroupByKeyOnly<K, V>(disallowCombinerLifting));
-
-    } else {
-      // By default, implement GroupByKey[AndWindow] via a series of lower-level
-      // operations.
-      PCollection<KV<K, Iterable<WindowedValue<V>>>> gbkOutput = input
-          // Make each input element's timestamp and assigned windows
-          // explicit, in the value part.
-          .apply(new ReifyTimestampsAndWindows<K, V>())
-
-          // Group by just the key.
-          .apply(new GroupByKeyOnly<K, WindowedValue<V>>(disallowCombinerLifting));
-
-      if (!runnerSortsByTimestamp) {
-        // Sort each key's values by timestamp. GroupAlsoByWindow requires
-        // its input to be sorted by timestamp.
-        gbkOutput = gbkOutput.apply(new SortValuesByTimestamp<K, V>());
-      }
-
-      return gbkOutput
-          // Group each key's values by window, merging windows as needed.
-          .apply(new GroupAlsoByWindow<K, V>(windowingStrategy));
-    }
-  }
-
   private static class GroupingKey<K> {
     private K key;
     private byte[] encodedKey;

From 756581e37535c43ef92d50d3474028b4c458f7bd Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 7 Apr 2015 17:21:15 -0700
Subject: [PATCH 0400/1541] Remove CompositeTrigger, since most of that
 functionality was in SubTriggerExecutor and TriggerContext.

This enables Trigger and OnceTrigger being classes, so they can
provide functionality.

Pull the Repeatedly#until function to Trigger#orFinally, so it
can be used to finish any trigger.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90563751
---
 .../sdk/transforms/windowing/AfterAll.java    |  68 ++--
 .../sdk/transforms/windowing/AfterEach.java   |  62 +++-
 .../sdk/transforms/windowing/AfterFirst.java  |  68 ++--
 .../sdk/transforms/windowing/AfterPane.java   |   4 +-
 .../windowing/AfterProcessingTime.java        |   3 +-
 .../transforms/windowing/AfterWatermark.java  |   3 +-
 .../windowing/CompositeTrigger.java           | 316 ------------------
 .../transforms/windowing/DefaultTrigger.java  |   2 +-
 .../sdk/transforms/windowing/Repeatedly.java  | 102 +-----
 .../windowing/SubTriggerExecutor.java         | 249 ++++++++++++++
 .../sdk/transforms/windowing/TimeTrigger.java |   3 +-
 .../sdk/transforms/windowing/Trigger.java     | 137 +++++++-
 .../transforms/windowing/AfterAllTest.java    |   6 +-
 .../transforms/windowing/AfterEachTest.java   |   5 +-
 .../transforms/windowing/AfterFirstTest.java  |   6 +-
 .../transforms/windowing/RepeatedlyTest.java  | 204 ++---------
 .../sdk/transforms/windowing/TriggerTest.java | 272 +++++++++++++++
 17 files changed, 824 insertions(+), 686 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CompositeTrigger.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SubTriggerExecutor.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
index ad0a1bea739e7..892403be763b4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
@@ -16,7 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.common.base.Preconditions;
 
 import org.joda.time.Instant;
@@ -25,30 +25,28 @@
 import java.util.List;
 
 /**
- * Create a {@link CompositeTrigger} that fires once after all of its sub-triggers have fired. If
- * any of the sub-triggers finish without firing, the {@code AfterAll.of(...)} will also finish
- * without firing.
+ * Create a {@link Trigger} that fires and finishes once after all of its sub-triggers have fired.
  *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
  *            {@code Trigger}
  */
-public class AfterAll<W extends BoundedWindow>
-    extends CompositeTrigger<W> implements AtMostOnceTrigger<W> {
+public class AfterAll<W extends BoundedWindow> extends OnceTrigger<W> {
 
   private static final long serialVersionUID = 0L;
+  private List<Trigger<W>> subTriggers;
 
   private AfterAll(List<Trigger<W>> subTriggers) {
-    super(subTriggers);
+    this.subTriggers = subTriggers;
     Preconditions.checkArgument(subTriggers.size() > 1);
   }
 
   @SafeVarargs
-  public static <W extends BoundedWindow> AtMostOnceTrigger<W> of(
-      AtMostOnceTrigger<W>... triggers) {
+  public static <W extends BoundedWindow> OnceTrigger<W> of(
+      OnceTrigger<W>... triggers) {
     return new AfterAll<W>(Arrays.<Trigger<W>>asList(triggers));
   }
 
-  private TriggerResult wrapResult(SubTriggerExecutor subExecutor) {
+  private TriggerResult wrapResult(SubTriggerExecutor<W> subExecutor) {
     // If all children have finished, then they must have each fired at least once.
     if (subExecutor.allFinished()) {
       return TriggerResult.FIRE_AND_FINISH;
@@ -59,10 +57,10 @@ private TriggerResult wrapResult(SubTriggerExecutor subExecutor) {
 
   @Override
   public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
-    SubTriggerExecutor subExecutor = subExecutor(c, e.window());
+    SubTriggerExecutor<W> subExecutor = SubTriggerExecutor.forWindow(subTriggers, c, e.window());
     for (int i : subExecutor.getUnfinishedTriggers()) {
       // Mark any fired triggers as finished.
-      if (subExecutor.onElement(c, i, e).isFire()) {
+      if (subExecutor.onElement(i, e).isFire()) {
         subExecutor.markFinished(c, i);
       }
     }
@@ -72,7 +70,7 @@ public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws
 
   @Override
   public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
-    SubTriggerExecutor subExecutor = subExecutor(c, e);
+    SubTriggerExecutor<W> subExecutor = SubTriggerExecutor.forMerge(subTriggers, c, e);
 
     // If after merging the set of fire & finished sub-triggers, we're done, we can
     // FIRE_AND_FINISH early.
@@ -82,7 +80,7 @@ public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exce
 
     // Otherwise, merge all of the unfinished triggers.
     for (int i : subExecutor.getUnfinishedTriggers()) {
-      if (subExecutor.onMerge(c, i, e).isFire()) {
+      if (subExecutor.onMerge(i, e).isFire()) {
         subExecutor.markFinished(c, i);
       }
     }
@@ -91,13 +89,25 @@ public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exce
   }
 
   @Override
-  public TriggerResult afterChildTimer(
-      TriggerContext<W> c, W window, int childIdx, TriggerResult result) throws Exception {
-    if (TriggerResult.CONTINUE.equals(result)) {
-      return TriggerResult.CONTINUE;
+  public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+    if (e.isForCurrentLayer()) {
+      throw new IllegalStateException("AfterAll shouldn't receive any timers.");
     }
 
-    return wrapResult(subExecutor(c, window));
+    int childIdx = e.getChildIndex();
+    SubTriggerExecutor<W> subExecutor = SubTriggerExecutor.forWindow(subTriggers, c, e.window());
+
+    // We take at-most-once triggers, so the result of the timer on the child should be either
+    // CONTINUE or FIRE_AND_FINISH. The subexecutor already tracks finishing of children, so we just
+    // need to know that we fire and finish if all of the children have finished.
+    subExecutor.onTimer(childIdx, e);
+    return wrapResult(subExecutor);
+  }
+
+
+  @Override
+  public void clear(Trigger.TriggerContext<W> c, W window) throws Exception {
+    SubTriggerExecutor.forWindow(subTriggers, c, window).clear();
   }
 
   @Override
@@ -118,4 +128,24 @@ public Instant getWatermarkCutoff(W window) {
     }
     return deadline;
   }
+
+  @Override
+  public boolean isCompatible(Trigger<?> other) {
+    if (!(other instanceof AfterAll)) {
+      return false;
+    }
+
+    AfterAll<?> that = (AfterAll<?>) other;
+    if (this.subTriggers.size() != that.subTriggers.size()) {
+      return false;
+    }
+
+    for (int i = 0; i < this.subTriggers.size(); i++) {
+      if (!this.subTriggers.get(i).isCompatible(that.subTriggers.get(i))) {
+        return false;
+      }
+    }
+
+    return true;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index 5e41bf411c028..be6fe04a3c915 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -19,6 +19,8 @@
 import com.google.common.base.Preconditions;
 
 import org.joda.time.Instant;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.util.Arrays;
 import java.util.List;
@@ -41,12 +43,15 @@
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
  *            {@code Trigger}
  */
-public class AfterEach<W extends BoundedWindow> extends CompositeTrigger<W> {
+public class AfterEach<W extends BoundedWindow> extends Trigger<W> {
+
+  private static final Logger LOG = LoggerFactory.getLogger(AfterEach.class);
 
   private static final long serialVersionUID = 0L;
+  private List<Trigger<W>> subTriggers;
 
   private AfterEach(List<Trigger<W>> subTriggers) {
-    super(subTriggers);
+    this.subTriggers = subTriggers;
     Preconditions.checkArgument(subTriggers.size() > 1);
   }
 
@@ -55,8 +60,8 @@ public static <W extends BoundedWindow> Trigger<W> inOrder(Trigger<W>... trigger
     return new AfterEach<W>(Arrays.<Trigger<W>>asList(triggers));
   }
 
-  private TriggerResult result(
-      TriggerResult subResult, SubTriggerExecutor subexecutor)
+  private TriggerResult wrapResult(
+      TriggerResult subResult, SubTriggerExecutor<W> subexecutor)
       throws Exception {
 
     if (subResult.isFire()) {
@@ -71,33 +76,42 @@ public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws
     // If all the sub-triggers have finished, we should have already finished, so we know there is
     // at least one unfinished trigger.
 
-    SubTriggerExecutor subexecutor = subExecutor(c, e.window());
+    SubTriggerExecutor<W> subexecutor = SubTriggerExecutor.forWindow(subTriggers, c, e.window());
 
     // There must be at least one unfinished, because otherwise we would have finished the root.
     int current = subexecutor.firstUnfinished();
-    return result(subexecutor.onElement(c, current, e), subexecutor);
+    return wrapResult(subexecutor.onElement(current, e), subexecutor);
   }
 
   @Override
   public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
-    SubTriggerExecutor subexecutor = subExecutor(c, e);
+    SubTriggerExecutor<W> subexecutor = SubTriggerExecutor.forMerge(subTriggers, c, e);
 
     // There must be at least one unfinished, because otherwise we would have finished the root.
     int current = subexecutor.firstUnfinished();
-    return result(subexecutor.onMerge(c, current, e), subexecutor);
+    return wrapResult(subexecutor.onMerge(current, e), subexecutor);
   }
 
   @Override
-  public TriggerResult afterChildTimer(
-      TriggerContext<W> c, W window, int childIdx, TriggerResult result) throws Exception {
-    SubTriggerExecutor subExecutor = subExecutor(c, window);
+  public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+    if (e.isForCurrentLayer()) {
+      throw new IllegalStateException("AfterAll shouldn't receive any timers.");
+    }
+
+    int childIdx = e.getChildIndex();
+    SubTriggerExecutor<W> subExecutor = SubTriggerExecutor.forWindow(subTriggers, c, e.window());
+
     if (childIdx != subExecutor.firstUnfinished()) {
-      // If we aren't currently executing the given sub-trigger, it shouldn't have been able to send
-      // a timer at all. We record its finishing, but ignore it otherwise.
+      LOG.warn("AfterEach received timer for non-current sub-trigger {}", childIdx);
       return TriggerResult.CONTINUE;
     }
 
-    return result(result, subExecutor);
+    return wrapResult(subExecutor.onTimer(childIdx, e), subExecutor);
+  }
+
+  @Override
+  public void clear(Trigger.TriggerContext<W> c, W window) throws Exception {
+    SubTriggerExecutor.forWindow(subTriggers, c, window).clear();
   }
 
   @Override
@@ -116,4 +130,24 @@ public Instant getWatermarkCutoff(W window) {
     // fires at least once.
     return subTriggers.get(0).getWatermarkCutoff(window);
   }
+
+  @Override
+  public boolean isCompatible(Trigger<?> other) {
+    if (!(other instanceof AfterEach)) {
+      return false;
+    }
+
+    AfterEach<?> that = (AfterEach<?>) other;
+    if (this.subTriggers.size() != that.subTriggers.size()) {
+      return false;
+    }
+
+    for (int i = 0; i < this.subTriggers.size(); i++) {
+      if (!this.subTriggers.get(i).isCompatible(that.subTriggers.get(i))) {
+        return false;
+      }
+    }
+
+    return true;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
index b6f6827ecca5c..23fc961624f2e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
@@ -16,7 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.common.base.Preconditions;
 
 import org.joda.time.Instant;
@@ -25,67 +25,63 @@
 import java.util.List;
 
 /**
- * Create a {@link CompositeTrigger} that fires once after at least one of its sub-triggers have
- * fired. If all of the sub-triggers finish without firing, the {@code AfterFirst.of(...)} will also
- * finish without firing.
+ * Create a composite {@link Trigger} that fires once after at least one of its sub-triggers have
+ * fired.
  *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
  *            {@code Trigger}
  */
-public class AfterFirst<W extends BoundedWindow>
-    extends CompositeTrigger<W> implements AtMostOnceTrigger<W> {
+public class AfterFirst<W extends BoundedWindow> extends OnceTrigger<W> {
 
   private static final long serialVersionUID = 0L;
+  private List<Trigger<W>> subTriggers;
 
   private AfterFirst(List<Trigger<W>> subTriggers) {
-    super(subTriggers);
+    this.subTriggers = subTriggers;
     Preconditions.checkArgument(subTriggers.size() > 1);
   }
 
   @SafeVarargs
-  public static <W extends BoundedWindow> AtMostOnceTrigger<W> of(
-      AtMostOnceTrigger<W>... triggers) {
+  public static <W extends BoundedWindow> OnceTrigger<W> of(
+      OnceTrigger<W>... triggers) {
     return new AfterFirst<W>(Arrays.<Trigger<W>>asList(triggers));
   }
 
   @Override
   public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
-    // If all the sub-triggers have finished, we should have already finished, so we know there is
-    // at least one unfinished trigger.
-
-    SubTriggerExecutor subStates = subExecutor(c, e.window());
     for (int i = 0; i < subTriggers.size(); i++) {
-      if (subStates.onElement(c, i, e).isFire()) {
+      if (subTriggers.get(i).onElement(c.forChild(i), e).isFire()) {
         return TriggerResult.FIRE_AND_FINISH;
       }
     }
-
-    if (subStates.allFinished()) {
-      throw new IllegalStateException("AfterFirst should have fired earlier.");
-    }
     return TriggerResult.CONTINUE;
   }
 
   @Override
   public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
-    SubTriggerExecutor subStates = subExecutor(c, e);
     for (int i = 0; i < subTriggers.size(); i++) {
-      if (subStates.onMerge(c, i, e).isFire()) {
+      if (subTriggers.get(i).onMerge(c.forChild(i), e).isFire()) {
         return TriggerResult.FIRE_AND_FINISH;
       }
     }
-
     return TriggerResult.CONTINUE;
   }
 
   @Override
-  public TriggerResult afterChildTimer(
-      TriggerContext<W> c, W window, int childIdx, TriggerResult result) throws Exception {
-    if (result.isFire()) {
-      return TriggerResult.FIRE_AND_FINISH;
+  public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+    if (e.isForCurrentLayer()) {
+      throw new IllegalStateException("AfterFirst shouldn't receive any timers.");
     }
 
-    return TriggerResult.CONTINUE;
+    int childIdx = e.getChildIndex();
+    return subTriggers.get(childIdx).onTimer(c.forChild(childIdx), e.withoutOuterTrigger()).isFire()
+        ? TriggerResult.FIRE_AND_FINISH
+        : TriggerResult.CONTINUE;
+  }
+
+  @Override
+  public void clear(Trigger.TriggerContext<W> c, W window) throws Exception {
+    SubTriggerExecutor.forWindow(subTriggers, c, window).clear();
   }
 
   @Override
@@ -107,4 +103,24 @@ public Instant getWatermarkCutoff(W window) {
     }
     return deadline;
   }
+
+  @Override
+  public boolean isCompatible(Trigger<?> other) {
+    if (!(other instanceof AfterFirst)) {
+      return false;
+    }
+
+    AfterFirst<?> that = (AfterFirst<?>) other;
+    if (this.subTriggers.size() != that.subTriggers.size()) {
+      return false;
+    }
+
+    for (int i = 0; i < this.subTriggers.size(); i++) {
+      if (!this.subTriggers.get(i).isCompatible(that.subTriggers.get(i))) {
+        return false;
+      }
+    }
+
+    return true;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index 476200b0f0d64..c9634cdb72877 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -17,7 +17,7 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 
 import org.joda.time.Instant;
@@ -30,7 +30,7 @@
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
  *            {@code Trigger}
  */
-public class AfterPane<W extends BoundedWindow> implements AtMostOnceTrigger<W>{
+public class AfterPane<W extends BoundedWindow> extends OnceTrigger<W>{
 
   private static final long serialVersionUID = 0L;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 3996bdb02de03..8d7db38f71b64 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -18,7 +18,6 @@
 
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.common.collect.ImmutableList;
 
@@ -31,7 +30,7 @@
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used
  */
 public class AfterProcessingTime<W extends BoundedWindow>
-    extends TimeTrigger<W, AfterProcessingTime<W>> implements AtMostOnceTrigger<W>{
+    extends TimeTrigger<W, AfterProcessingTime<W>> {
 
   private static final long serialVersionUID = 0L;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index a712fb3de3b33..5f014c484d9f7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -18,7 +18,6 @@
 
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.common.collect.ImmutableList;
 
@@ -31,7 +30,7 @@
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used.
  */
 public abstract class AfterWatermark<W extends BoundedWindow>
-    extends TimeTrigger<W, AfterWatermark<W>> implements AtMostOnceTrigger<W>{
+    extends TimeTrigger<W, AfterWatermark<W>> {
 
   private static final long serialVersionUID = 0L;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CompositeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CompositeTrigger.java
deleted file mode 100644
index 0bc695a7b1a13..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CompositeTrigger.java
+++ /dev/null
@@ -1,316 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
-import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.common.collect.ImmutableList;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.BitSet;
-import java.util.List;
-import java.util.Map;
-
-/**
- * {@code CompositeTrigger} performs much of the book-keeping necessary for implementing a trigger
- * that has multiple sub-triggers. Specifically, it includes support for passing events to the
- * sub-triggers, and tracking the finished-states of each sub-trigger.
- *
- * TODO: Document the methods on this and SubTriggerExecutor to support writing new composite
- * triggers.
- *
- * <p> This functionality is experimental and likely to change.
- *
- * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
- *            {@code CompositeTrigger}
- */
-public abstract class CompositeTrigger<W extends BoundedWindow> implements Trigger<W> {
-
-  private static final long serialVersionUID = 0L;
-
-  private static final CodedTupleTag<BitSet> SUBTRIGGERS_FINISHED_SET_TAG =
-      CodedTupleTag.of("finished", new BitSetCoder());
-
-  protected List<Trigger<W>> subTriggers;
-
-  protected CompositeTrigger(List<Trigger<W>> subTriggers) {
-    this.subTriggers = subTriggers;
-  }
-
-  /**
-   * Helper that allows allows a {@code CompositeTrigger} to execute callbacks.
-   */
-  protected class SubTriggerExecutor {
-
-    private final BitSet isFinished;
-    private final W window;
-    private final TriggerContext<W> context;
-
-    private SubTriggerExecutor(
-        TriggerContext<W> context, W window, BitSet isFinished) {
-      this.context = context;
-      this.window = window;
-      this.isFinished = isFinished;
-    }
-
-    private void flush() throws Exception {
-      context.store(SUBTRIGGERS_FINISHED_SET_TAG, window, isFinished);
-    }
-
-    public boolean allFinished() {
-      return isFinished.cardinality() == subTriggers.size();
-    }
-
-    public List<Integer> getUnfinishedTriggers() {
-      ImmutableList.Builder<Integer> result = ImmutableList.builder();
-      for (int i = isFinished.nextClearBit(0); i >= 0 && i < subTriggers.size();
-          i = isFinished.nextClearBit(i + 1)) {
-        result.add(i);
-      }
-      return result.build();
-    }
-
-    public int firstUnfinished() {
-      return isFinished.nextClearBit(0);
-    }
-
-    public BitSet getFinishedSet() {
-      return (BitSet) isFinished.clone();
-    }
-
-    private TriggerResult handleChildResult(
-        TriggerContext<W> childContext, int index, TriggerResult result) throws Exception {
-      if (result.isFinish()) {
-        markFinishedInChild(childContext, index);
-      }
-
-      return result;
-    }
-
-    public TriggerResult onElement(
-        TriggerContext<W> compositeContext, int index, OnElementEvent<W> e) throws Exception {
-      if (isFinished.get(index)) {
-        throw new IllegalStateException(
-            "Cannot call onElement on already finished sub-trigger " + index);
-      }
-
-      TriggerContext<W> childContext = compositeContext.forChild(index);
-      Trigger<W> subTrigger = subTriggers.get(index);
-      return handleChildResult(
-          childContext, index,
-          subTrigger.onElement(childContext, e));
-    }
-
-    public TriggerResult onTimer(
-        TriggerContext<W> compositeContext, int index, OnTimerEvent<W> e) throws Exception {
-      if (isFinished.get(index)) {
-        throw new IllegalStateException(
-            "Cannot call onTimer on already finished sub-trigger " + index);
-      }
-
-      TriggerContext<W> childContext = compositeContext.forChild(index);
-      return handleChildResult(
-          childContext, index, subTriggers.get(index).onTimer(childContext, e));
-    }
-
-    public TriggerResult onMerge(
-        TriggerContext<W> compositeContext, int index, OnMergeEvent<W> e)
-        throws Exception {
-      if (isFinished.get(index)) {
-        throw new IllegalStateException(
-            "Cannot call onMerge on already finished sub-trigger " + index);
-      }
-
-      TriggerContext<W> childContext = compositeContext.forChild(index);
-      return handleChildResult(
-          childContext, index, subTriggers.get(index).onMerge(childContext, e));
-    }
-
-    public void clear(TriggerContext<W> compositeContext, int index, W window)
-        throws Exception {
-      subTriggers.get(index).clear(compositeContext.forChild(index), window);
-    }
-
-    /**
-     * Mark the sub-trigger at {@code index} as never-started. If the sub-trigger wasn't finished,
-     * clears any associated state.
-     *
-     * @param compositeContext the context that the parent trigger was executing in.
-     * @param index the index of the sub-trigger to affect.
-     * @param window the window that the trigger is operating in.
-     */
-    public void reset(TriggerContext<W> compositeContext, int index, W window) throws Exception {
-      // If it wasn't finished, the trigger may have state associated with it. Clear that up.
-      if (!isFinished.get(index)) {
-        subTriggers.get(index).clear(compositeContext.forChild(index), window);
-      }
-
-      isFinished.clear(index);
-      flush();
-    }
-
-    public boolean isFinished(int index) {
-      return isFinished.get(index);
-    }
-
-    private void markFinishedInChild(TriggerContext<W> childContext, int index) throws Exception {
-      isFinished.set(index);
-      flush();
-      subTriggers.get(index).clear(childContext, window);
-    }
-
-    public void markFinished(TriggerContext<W> compositeContext, int index) throws Exception {
-      markFinishedInChild(compositeContext.forChild(index), index);
-    }
-  }
-
-  /**
-   * Return a {@link SubTriggerExecutor} for executing sub-triggers in the given context and window.
-   *
-   * <p>TODO: Consider having the composite trigger always create the sub-executor and pass it down
-   * to the composite.
-   *
-   * @param c The context of the composite trigger
-   * @param window the window
-   */
-  protected SubTriggerExecutor subExecutor(TriggerContext<W> c, W window) throws IOException {
-    BitSet result = c.lookup(SUBTRIGGERS_FINISHED_SET_TAG, window);
-    if (result == null) {
-      result = new BitSet(subTriggers.size());
-    }
-    return new SubTriggerExecutor(c, window, result);
-  }
-  /**
-   * Return a {@link SubTriggerExecutor} for executing sub-triggers in the given context and window.
-   *
-   * <p>The finished states of all of the sub-triggers will be OR-ed across all of the windows. This
-   * applies the behavior that a trigger which has finished in any of the merged windows is finished
-   * in the merged window.
-   *
-   * @param c The context of the composite trigger
-   * @param e The on merge event that is being processed.o
-   */
-  protected SubTriggerExecutor subExecutor(TriggerContext<W> c, OnMergeEvent<W> e)
-      throws Exception {
-    BitSet result = new BitSet(subTriggers.size());
-    Map<W, BitSet> lookup = c.lookup(SUBTRIGGERS_FINISHED_SET_TAG, e.oldWindows());
-    for (BitSet stateInWindow : lookup.values()) {
-      if (stateInWindow != null) {
-        result.or(stateInWindow);
-      }
-    }
-
-    SubTriggerExecutor subTriggerStates = new SubTriggerExecutor(c, e.newWindow(), result);
-
-    // Preemptively flush this since we just constructed it from the sub-windows.
-    subTriggerStates.flush();
-    return subTriggerStates;
-  }
-
-  @Override
-  public void clear(TriggerContext<W> c, W window) throws Exception {
-    // Clear all triggers (even if they were already cleared).
-    for (Trigger<W> subTrigger : subTriggers) {
-      subTrigger.clear(c, window);
-    }
-    c.remove(SUBTRIGGERS_FINISHED_SET_TAG, window);
-  }
-
-  @Override
-  public final TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
-    if (e.isForCurrentLayer()) {
-      // TODO: Modify the composite trigger interface to enforce this.
-      throw new UnsupportedOperationException("Composite triggers should not set timers.");
-    }
-
-    int childIndex = e.getChildIndex();
-    SubTriggerExecutor subTriggerStates = subExecutor(c, e.window());
-    if (subTriggerStates.isFinished(childIndex)) {
-      // The child was already finished, so this timer doesn't do anything. There has been no change
-      // which might cause the composite to fire or change its state, so we just continue.
-      return TriggerResult.CONTINUE;
-    }
-
-    TriggerResult result = subTriggerStates.onTimer(c, childIndex, e.withoutOuterTrigger());
-    return afterChildTimer(c, e.window(), childIndex, result);
-  }
-
-  /**
-   * Called after a timer has been executed on a sub-trigger.
-   *
-   * @param c The context for the composite trigger.
-   * @param window The window that the timer fired in.
-   * @param childIdx The index of the child that received the timer.
-   * @param result The result of the timer firing in the child.
-   */
-  public abstract TriggerResult afterChildTimer(
-      TriggerContext<W> c, W window, int childIdx, TriggerResult result)
-      throws Exception;
-
-  @Override
-  public boolean isCompatible(Trigger<?> other) {
-    if (!getClass().equals(other.getClass())) {
-      return false;
-    }
-
-    CompositeTrigger<?> that = (CompositeTrigger<?>) other;
-    if (subTriggers.size() != that.subTriggers.size()) {
-      return false;
-    }
-
-    for (int i = 0; i < subTriggers.size(); i++) {
-      if (!subTriggers.get(i).isCompatible(that.subTriggers.get(i))) {
-        return false;
-      }
-    }
-
-    return true;
-  }
-
-  /**
-   * Coder for the BitSet used to track child-trigger finished states.
-   */
-  protected static class BitSetCoder extends AtomicCoder<BitSet> {
-
-    private static final long serialVersionUID = 1L;
-
-    private transient Coder<byte[]> byteArrayCoder = ByteArrayCoder.of();
-
-    @Override
-    public void encode(BitSet value, OutputStream outStream, Context context)
-        throws CoderException, IOException {
-      byteArrayCoder.encode(value.toByteArray(), outStream, context);
-    }
-
-    @Override
-    public BitSet decode(InputStream inStream, Context context)
-        throws CoderException, IOException {
-      return BitSet.valueOf(byteArrayCoder.decode(inStream, context));
-    }
-
-    @Deprecated
-    @Override
-    public boolean isDeterministic() {
-      return byteArrayCoder.isDeterministic();
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
index 0c1324d12b909..e5e409cbb5cff 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
@@ -23,7 +23,7 @@
  *
  * @param <W> The type of windows being triggered/encoded.
  */
-public class DefaultTrigger<W extends BoundedWindow> implements Trigger<W>{
+public class DefaultTrigger<W extends BoundedWindow> extends Trigger<W>{
 
   private static final long serialVersionUID = 0L;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
index cd10c5be9e404..53c7188df1a73 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
@@ -30,9 +30,9 @@
  * {@code SequenceOf(someTrigger, someTrigger, someTrigger, ...)}.
  *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
- * {@code Trigger}
+ *            {@code Trigger}
  */
-public class Repeatedly<W extends BoundedWindow> implements Trigger<W> {
+public class Repeatedly<W extends BoundedWindow> extends Trigger<W> {
 
   private static final long serialVersionUID = 0L;
 
@@ -42,7 +42,7 @@ public class Repeatedly<W extends BoundedWindow> implements Trigger<W> {
    * Create a composite trigger that repeatedly executes the trigger {@code toRepeat}, firing each
    * time it fires and ignoring any indications to finish.
    *
-   * <p>Unless used with {@link #finishing} the composite trigger will never finish.
+   * <p>Unless used with {@link Trigger#orFinally} the composite trigger will never finish.
    *
    * @param repeated the trigger to execute repeatedly.
    */
@@ -54,16 +54,6 @@ private Repeatedly(Trigger<W> repeated) {
     this.repeated = repeated;
   }
 
-  /**
-   * Specify an ending condition for this {@code Repeated} trigger. When {@code until} fires the
-   * composite trigger will fire and finish.
-   *
-   * @param until the trigger that will fire when we should stop repeating.
-   */
-  public Trigger<W> finishing(AtMostOnceTrigger<W> until) {
-    return new Until<W>(this, until);
-  }
-
   private TriggerResult wrap(TriggerContext<W> c, W window, TriggerResult result) throws Exception {
     if (result.isFire() || result.isFinish()) {
       repeated.clear(c, window);
@@ -113,90 +103,4 @@ public boolean isCompatible(Trigger<?> other) {
     Repeatedly<?> that = (Repeatedly<?>) other;
     return repeated.isCompatible(that.repeated);
   }
-
-  /**
-   * Repeats the given trigger forever, until the "until" trigger fires.
-   *
-   * <p> TODO: Move this to the top level.
-   */
-  public static class Until<W extends BoundedWindow> implements Trigger<W> {
-
-    private static final int ACTUAL = 0;
-    private static final int UNTIL = 1;
-    private static final long serialVersionUID = 0L;
-
-    private Trigger<W> actual;
-    private AtMostOnceTrigger<W> until;
-
-    private Until(Trigger<W> actual, AtMostOnceTrigger<W> until) {
-      this.actual = actual;
-      this.until = until;
-    }
-
-    @Override
-    public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
-      TriggerResult untilResult = until.onElement(c.forChild(UNTIL), e);
-      if (untilResult != TriggerResult.CONTINUE) {
-        return TriggerResult.FIRE_AND_FINISH;
-      }
-
-      return actual.onElement(c.forChild(ACTUAL), e);
-    }
-
-    @Override
-    public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
-      TriggerResult untilResult = until.onMerge(c.forChild(UNTIL), e);
-      if (untilResult != TriggerResult.CONTINUE) {
-        return TriggerResult.FIRE_AND_FINISH;
-      }
-
-      return actual.onMerge(c.forChild(ACTUAL), e);
-    }
-
-    @Override
-    public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
-
-      if (e.isForCurrentLayer()) {
-        throw new IllegalStateException("Until shouldn't receive any timers.");
-      } else if (e.getChildIndex() == ACTUAL) {
-        return actual.onTimer(c.forChild(ACTUAL), e.withoutOuterTrigger());
-      } else {
-        if (until.onTimer(c.forChild(UNTIL), e.withoutOuterTrigger()) != TriggerResult.CONTINUE) {
-          return TriggerResult.FIRE_AND_FINISH;
-        }
-      }
-
-      return TriggerResult.CONTINUE;
-    }
-
-    @Override
-    public void clear(TriggerContext<W> c, W window) throws Exception {
-      actual.clear(c.forChild(ACTUAL), window);
-      until.clear(c.forChild(UNTIL), window);
-    }
-
-    @Override
-    public boolean willNeverFinish() {
-      return false;
-    }
-
-    @Override
-    public Instant getWatermarkCutoff(W window) {
-      // This trigger fires once either the trigger or the until trigger fires.
-      Instant actualDeadline = actual.getWatermarkCutoff(window);
-      Instant untilDeadline = until.getWatermarkCutoff(window);
-      return actualDeadline.isBefore(untilDeadline) ? actualDeadline : untilDeadline;
-    }
-
-    @Override
-    public boolean isCompatible(Trigger<?> other) {
-      if (!(other instanceof Until)) {
-        return false;
-      }
-
-      Until<?> that = (Until<?>) other;
-      return actual.isCompatible(that.actual)
-          && until.isCompatible(that.until);
-    }
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SubTriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SubTriggerExecutor.java
new file mode 100644
index 0000000000000..8cf59fb7f4755
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SubTriggerExecutor.java
@@ -0,0 +1,249 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.common.collect.ImmutableList;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.BitSet;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Utility that executes some subtriggers and tracks (in keyed state) which of the sub-triggers
+ * have already finished. Useful for implementing composite {@link Trigger Triggers} that need
+ * to track state of their sub-triggers between elements.
+ */
+class SubTriggerExecutor<W extends BoundedWindow> {
+
+  private List<Trigger<W>> subTriggers;
+  private final BitSet isFinished;
+  private final W window;
+  private final TriggerContext<W> context;
+
+  private static final CodedTupleTag<BitSet> SUBTRIGGERS_FINISHED_SET_TAG =
+      CodedTupleTag.of("finished", new BitSetCoder());
+
+  /**
+   * Return a {code SubTriggerExecutor} for executing sub-triggers in the given context and
+   * window.
+   *
+   * @param c The context of the composite trigger
+   * @param window the window
+   */
+  public static <W extends BoundedWindow> SubTriggerExecutor<W> forWindow(
+      List<Trigger<W>> subTriggers, TriggerContext<W> c, W window) throws IOException {
+    BitSet bitset = c.lookup(SUBTRIGGERS_FINISHED_SET_TAG, window);
+    if (bitset == null) {
+      bitset = new BitSet(subTriggers.size());
+    }
+    return new SubTriggerExecutor<W>(subTriggers, c, window, bitset);
+  }
+
+  /**
+   * Return a {code SubTriggerExecutor} for executing sub-triggers in the given context and
+   * windows.
+   *
+   * <p>The finished states of all of the sub-triggers will be OR-ed across all of the windows.
+   * This applies the behavior that a trigger which has finished in any of the merged windows is
+   * finished in the merged window.
+   *
+   * @param c The context of the composite trigger
+   * @param e The on merge event that is being processed.o
+   */
+  public static <W extends BoundedWindow> SubTriggerExecutor<W> forMerge(
+      List<Trigger<W>> subTriggers, TriggerContext<W> c, OnMergeEvent<W> e)
+      throws Exception {
+    BitSet bitset = new BitSet(subTriggers.size());
+    Map<W, BitSet> lookup = c.lookup(SUBTRIGGERS_FINISHED_SET_TAG, e.oldWindows());
+    for (BitSet stateInWindow : lookup.values()) {
+      if (stateInWindow != null) {
+        bitset.or(stateInWindow);
+      }
+    }
+
+    SubTriggerExecutor<W> subTrigger =
+        new SubTriggerExecutor<W>(subTriggers, c, e.newWindow(), bitset);
+
+    // Preemptively flush this since we just constructed it from the sub-windows.
+    subTrigger.flush();
+    return subTrigger;
+  }
+
+  private SubTriggerExecutor(
+      List<Trigger<W>> subTriggers, TriggerContext<W> context, W window, BitSet isFinished) {
+    this.subTriggers = subTriggers;
+    this.context = context;
+    this.window = window;
+    this.isFinished = isFinished;
+  }
+
+  private void flush() throws Exception {
+    context.store(SUBTRIGGERS_FINISHED_SET_TAG, window, isFinished);
+  }
+
+  public boolean allFinished() {
+    return isFinished.cardinality() == subTriggers.size();
+  }
+
+  public List<Integer> getUnfinishedTriggers() {
+    ImmutableList.Builder<Integer> result = ImmutableList.builder();
+    for (int i = isFinished.nextClearBit(0); i >= 0 && i < subTriggers.size();
+        i = isFinished.nextClearBit(i + 1)) {
+      result.add(i);
+    }
+    return result.build();
+  }
+
+  public int firstUnfinished() {
+    return isFinished.nextClearBit(0);
+  }
+
+  public BitSet getFinishedSet() {
+    return (BitSet) isFinished.clone();
+  }
+
+  private TriggerResult handleChildResult(
+      TriggerContext<W> childContext, int index, TriggerResult result) throws Exception {
+    if (result.isFinish()) {
+      markFinishedInChild(childContext, index);
+    }
+
+    return result;
+  }
+
+  public TriggerResult onElement(int index, OnElementEvent<W> e) throws Exception {
+    if (isFinished.get(index)) {
+      throw new IllegalStateException("Calling onElement on already finished sub-element " + index);
+    }
+
+    TriggerContext<W> childContext = context.forChild(index);
+    Trigger<W> subTrigger = subTriggers.get(index);
+    return handleChildResult(
+        childContext, index,
+        subTrigger.onElement(childContext, e));
+  }
+
+  public TriggerResult onTimer(int index, OnTimerEvent<W> e) throws Exception {
+    if (isFinished.get(index)) {
+      throw new IllegalStateException("Calling onTimer on already finished sub-element " + index);
+    }
+
+    TriggerContext<W> childContext = context.forChild(index);
+    TriggerResult onTimer = subTriggers.get(index).onTimer(childContext, e.withoutOuterTrigger());
+    return handleChildResult(childContext, index, onTimer);
+  }
+
+  public TriggerResult onMerge(int index, OnMergeEvent<W> e)
+      throws Exception {
+    if (isFinished.get(index)) {
+      throw new IllegalStateException("Calling onMerge on already finished sub-element " + index);
+    }
+
+    TriggerContext<W> childContext = context.forChild(index);
+    return handleChildResult(
+        childContext, index, subTriggers.get(index).onMerge(childContext, e));
+  }
+
+  /**
+   * Clears the state associated with the given subtrigger.
+   */
+  public void clearSubTrigger(int index) throws Exception {
+    subTriggers.get(index).clear(context.forChild(index), window);
+  }
+
+  /**
+   * Clears the sub-triggers and the finished bits for the sub-trigger executor in the window.
+   */
+  public void clear() throws Exception {
+    for (int i = 0; i < subTriggers.size(); i++) {
+      clearSubTrigger(i);
+    }
+    context.remove(SUBTRIGGERS_FINISHED_SET_TAG, window);
+  }
+
+  /**
+   * Mark the sub-trigger at {@code index} as never-started. If the sub-trigger wasn't finished,
+   * clears any associated state.
+   *
+   * @param index the index of the sub-trigger to affect.
+   */
+  public void reset(int index) throws Exception {
+    // If it wasn't finished, the trigger may have state associated with it. Clear that up.
+    if (!isFinished.get(index)) {
+      clearSubTrigger(index);
+    }
+
+    // And mark it finished.
+    isFinished.clear(index);
+    flush();
+  }
+
+  public boolean isFinished(int index) {
+    return isFinished.get(index);
+  }
+
+  private void markFinishedInChild(TriggerContext<W> childContext, int index) throws Exception {
+    isFinished.set(index);
+    flush();
+    subTriggers.get(index).clear(childContext, window);
+  }
+
+  public void markFinished(TriggerContext<W> context, int index) throws Exception {
+    markFinishedInChild(context.forChild(index), index);
+  }
+
+  /**
+   * Coder for the BitSet used to track child-trigger finished states.
+   */
+  protected static class BitSetCoder extends AtomicCoder<BitSet> {
+
+    private static final long serialVersionUID = 1L;
+
+    private transient Coder<byte[]> byteArrayCoder = ByteArrayCoder.of();
+
+    @Override
+    public void encode(BitSet value, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+      byteArrayCoder.encode(value.toByteArray(), outStream, context);
+    }
+
+    @Override
+    public BitSet decode(InputStream inStream, Context context)
+        throws CoderException, IOException {
+      return BitSet.valueOf(byteArrayCoder.decode(inStream, context));
+    }
+
+    @Deprecated
+    @Override
+    public boolean isDeterministic() {
+      return byteArrayCoder.isDeterministic();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
index e90247f8ce88c..c56aa072cd9ad 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.common.collect.ImmutableList;
 
 import org.joda.time.Duration;
@@ -29,7 +30,7 @@
  * @param <T> {@code TimeTrigger} subclass produced by modifying the current {@code TimeTrigger}.
  */
 public abstract class TimeTrigger<W extends BoundedWindow, T extends TimeTrigger<W, T>>
-    implements Trigger<W> {
+    extends OnceTrigger<W> {
 
   private static final long serialVersionUID = 0L;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 57a9c8b1f7882..0a68b212f3069 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.common.annotations.VisibleForTesting;
 
 import org.joda.time.Instant;
 
@@ -50,8 +51,8 @@
  * <p>In addition, {@code Trigger}s can be combined in a variety of ways:
  * <ul>
  *   <li> {@link Repeatedly#forever} to create a trigger that executes forever. Any time its
- *   argument finishes it gets reset and starts over. Can be combined with {@link Repeatedly#until}
- *   to specify a condition which causes the repetition to stop.
+ *   argument finishes it gets reset and starts over. Can be combined with
+ *   {@link Trigger#orFinally} to specify a condition which causes the repetition to stop.
  *   <li> {@link AfterEach#inOrder} to execute each trigger in sequence, firing each (and every)
  *   time that a trigger fires, and advancing to the next trigger in the sequence when it finishes.
  *   <li> {@link AfterFirst#of} to create a trigger that fires after at least one of its arguments
@@ -75,15 +76,17 @@
  *   elements in the buffer for that window, as well.
  * </ul>
  *
- * <p>Once finished, a trigger cannot return itself back to an earlier state, however a
- * {@link CompositeTrigger} can reset its sub-triggers.
+ * <p>Once finished, a trigger cannot return itself back to an earlier state, however a composite
+ * trigger could reset its sub-triggers.
  *
  * <p> This functionality is experimental and likely to change.
  *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
  *            {@code Trigger}
  */
-public interface Trigger<W extends BoundedWindow> extends Serializable {
+public abstract class Trigger<W extends BoundedWindow> implements Serializable {
+
+  private static final long serialVersionUID = 0L;
 
   /**
    * Triggers operate on both timestamps of elements that are being processed and the current
@@ -262,7 +265,8 @@ public WindowStatus windowStatus() {
    * @param c the context to interact with
    * @param e an event describing the cause of this callback being executed
    */
-  TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception;
+  public abstract TriggerResult onElement(
+      TriggerContext<W> c, OnElementEvent<W> e) throws Exception;
 
   /**
    * Details about an invocation of {@link Trigger#onMerge}.
@@ -306,7 +310,7 @@ public W newWindow() {
    * @param c the context to interact with
    * @param e an event describnig the cause of this callback being executed
    */
-  TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception;
+  public abstract TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception;
 
   /**
    * Details about an invocation of {@link Trigger#onTimer}.
@@ -357,9 +361,9 @@ public boolean isForCurrentLayer() {
    * Called when a timer has fired for the trigger or one of it’s sub-triggers.
    *
    * @param c the context to interact with
-   * @param triggerId identifier for the trigger that the timer is for.
+   * @param e identifier for the trigger that the timer is for.
    */
-  TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> triggerId) throws Exception;
+  public abstract TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception;
 
   /**
    * Clear any state associated with this trigger in the given window.
@@ -371,12 +375,14 @@ public boolean isForCurrentLayer() {
    * @param c the context to interact with
    * @param window the window that is being cleared
    */
-  void clear(TriggerContext<W> c, W window) throws Exception;
+  public abstract void clear(TriggerContext<W> c, W window) throws Exception;
 
   /**
    * Return true if the trigger is guaranteed to never finish.
    */
-  boolean willNeverFinish();
+  public boolean willNeverFinish() {
+    return false;
+  }
 
   /**
    * Returns a bound in watermark time by which this trigger would have fired at least once
@@ -384,14 +390,14 @@ public boolean isForCurrentLayer() {
    * that does not depend on its state.
    *
    * <p> For triggers that do not fire based on the watermark advancing, returns
-   * {@link BoundedWindow.TIMESTAMP_MAX_VALUE}.
+   * {@link BoundedWindow#TIMESTAMP_MAX_VALUE}.
    */
-  Instant getWatermarkCutoff(W window);
+  public abstract Instant getWatermarkCutoff(W window);
 
   /**
    * Returns whether this performs the same triggering as the given {@code Trigger}.
    */
-  boolean isCompatible(Trigger<?> other);
+  public abstract boolean isCompatible(Trigger<?> other);
 
   /**
    * Identifies a unique trigger instance, by the window it is in and the path through the trigger
@@ -425,6 +431,21 @@ public Iterable<Integer> getPath() {
     }
   }
 
+  /**
+   * Specify an ending condition for this trigger. If the {@code until} fires then the combination
+   * fires.
+   *
+   * <p> The expression {@code t1.orFinally(t2)} fires every time {@code t1} fires, and finishes
+   * as soon as either {@code t1} finishes or {@code t2} fires, in which case it fires one last time
+   * for {@code t2}.
+   *
+   * <p> Note that if {@code t1} is {@link OnceTrigger}, then {@code t1.orFinally(t2)} is the same
+   * as {@code AfterFirst.of(t1, t2)}.
+   */
+  public Trigger<W> orFinally(OnceTrigger<W> until) {
+    return new OrFinallyTrigger<W>(this, until);
+  }
+
   /**
    * Triggers that are guaranteed to fire at most once should extend from this, rather than the
    * general {@link Trigger} class to indicate that behavior.
@@ -434,5 +455,91 @@ public Iterable<Integer> getPath() {
    * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
    *            {@code AtMostOnceTrigger}
    */
-  public interface AtMostOnceTrigger<W extends BoundedWindow> extends Trigger<W> {}
+  public abstract static class OnceTrigger<W extends BoundedWindow> extends Trigger<W> {
+    private static final long serialVersionUID = 0L;
+  }
+
+  /**
+   * Executes the {@code actual} trigger until it finishes or until the {@code until} trigger fires.
+   */
+  @VisibleForTesting static class OrFinallyTrigger<W extends BoundedWindow> extends Trigger<W> {
+
+    private static final int ACTUAL = 0;
+    private static final int UNTIL = 1;
+    private static final long serialVersionUID = 0L;
+
+    private Trigger<W> actual;
+    private OnceTrigger<W> until;
+
+    @VisibleForTesting OrFinallyTrigger(Trigger<W> actual, OnceTrigger<W> until) {
+      this.actual = actual;
+      this.until = until;
+    }
+
+    @Override
+    public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
+      TriggerResult untilResult = until.onElement(c.forChild(UNTIL), e);
+      if (untilResult != TriggerResult.CONTINUE) {
+        return TriggerResult.FIRE_AND_FINISH;
+      }
+
+      return actual.onElement(c.forChild(ACTUAL), e);
+    }
+
+    @Override
+    public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+      TriggerResult untilResult = until.onMerge(c.forChild(UNTIL), e);
+      if (untilResult != TriggerResult.CONTINUE) {
+        return TriggerResult.FIRE_AND_FINISH;
+      }
+
+      return actual.onMerge(c.forChild(ACTUAL), e);
+    }
+
+    @Override
+    public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+
+      if (e.isForCurrentLayer()) {
+        throw new IllegalStateException("Until shouldn't receive any timers.");
+      } else if (e.getChildIndex() == ACTUAL) {
+        return actual.onTimer(c.forChild(ACTUAL), e.withoutOuterTrigger());
+      } else {
+        if (until.onTimer(c.forChild(UNTIL), e.withoutOuterTrigger()) != TriggerResult.CONTINUE) {
+          return TriggerResult.FIRE_AND_FINISH;
+        }
+      }
+
+      return TriggerResult.CONTINUE;
+    }
+
+    @Override
+    public void clear(TriggerContext<W> c, W window) throws Exception {
+      actual.clear(c.forChild(ACTUAL), window);
+      until.clear(c.forChild(UNTIL), window);
+    }
+
+    @Override
+    public boolean willNeverFinish() {
+      return false;
+    }
+
+    @Override
+    public Instant getWatermarkCutoff(W window) {
+      // This trigger fires once either the trigger or the until trigger fires.
+      Instant actualDeadline = actual.getWatermarkCutoff(window);
+      Instant untilDeadline = until.getWatermarkCutoff(window);
+      return actualDeadline.isBefore(untilDeadline) ? actualDeadline : untilDeadline;
+    }
+
+    @Override
+    public boolean isCompatible(Trigger<?> other) {
+      if (!(other instanceof OrFinallyTrigger)) {
+        return false;
+      }
+
+      OrFinallyTrigger<?> that = (OrFinallyTrigger<?>) other;
+      return actual.isCompatible(that.actual)
+          && until.isCompatible(that.until);
+    }
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index 16cdea971fce2..cf47fa3f72319 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -24,10 +24,10 @@
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
@@ -49,8 +49,8 @@
  */
 @RunWith(JUnit4.class)
 public class AfterAllTest {
-  @Mock private AtMostOnceTrigger<IntervalWindow> mockTrigger1;
-  @Mock private AtMostOnceTrigger<IntervalWindow> mockTrigger2;
+  @Mock private OnceTrigger<IntervalWindow> mockTrigger1;
+  @Mock private OnceTrigger<IntervalWindow> mockTrigger2;
   private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index a5b7b7064d619..e527ebfd04111 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -143,11 +143,12 @@ public void testOnTimerFinish() throws Exception {
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
 
     tester.advanceWatermark(new Instant(12));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
 
     injectElement(2, null, TriggerResult.FIRE_AND_FINISH);
     assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+        isSingleWindowedValue(Matchers.containsInAnyOrder(2), 2, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index 6e847a4f98169..00c5856796286 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -22,10 +22,10 @@
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
@@ -48,8 +48,8 @@
 @RunWith(JUnit4.class)
 public class AfterFirstTest {
 
-  @Mock private AtMostOnceTrigger<IntervalWindow> mockTrigger1;
-  @Mock private AtMostOnceTrigger<IntervalWindow> mockTrigger2;
+  @Mock private OnceTrigger<IntervalWindow> mockTrigger1;
+  @Mock private OnceTrigger<IntervalWindow> mockTrigger2;
   private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index d9d54e1f8ba32..8c049502adb1e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -20,10 +20,8 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.AtMostOnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
@@ -49,16 +47,12 @@
 @RunWith(JUnit4.class)
 public class RepeatedlyTest {
   @Mock private Trigger<IntervalWindow> mockRepeated;
-  @Mock private AtMostOnceTrigger<IntervalWindow> mockUntil;
   private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
-  public void setUp(WindowFn<?, IntervalWindow> windowFn, boolean until) throws Exception {
+  public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
-    Trigger<IntervalWindow> underTest = until
-        ? Repeatedly.forever(mockRepeated).finishing(mockUntil)
-        : Repeatedly.forever(mockRepeated);
-
+    Trigger<IntervalWindow> underTest = Repeatedly.forever(mockRepeated);
     tester = TriggerTester.buffering(windowFn, underTest);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
@@ -68,29 +62,24 @@ private TriggerContext<IntervalWindow> isTriggerContext() {
     return Mockito.isA(TriggerContext.class);
   }
 
-  private void injectElement(int element, TriggerResult result1, TriggerResult result2)
+  private void injectElement(int element, TriggerResult result1)
       throws Exception {
     if (result1 != null) {
       when(mockRepeated.onElement(
           isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
           .thenReturn(result1);
     }
-    if (result2 != null) {
-      when(mockUntil.onElement(
-          isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
-          .thenReturn(result2);
-    }
     tester.injectElement(element, new Instant(element));
   }
 
   @Test
-  public void testOnElementNoUntil() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)), false);
+  public void testOnElement() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
 
-    injectElement(1, TriggerResult.CONTINUE, null);
-    injectElement(2, TriggerResult.FIRE, null);
-    injectElement(3, TriggerResult.FIRE_AND_FINISH, null);
-    injectElement(4, TriggerResult.CONTINUE, null);
+    injectElement(1, TriggerResult.CONTINUE);
+    injectElement(2, TriggerResult.FIRE);
+    injectElement(3, TriggerResult.FIRE_AND_FINISH);
+    injectElement(4, TriggerResult.CONTINUE);
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
@@ -101,61 +90,31 @@ public void testOnElementNoUntil() throws Exception {
   }
 
   @Test
-  public void testOnElementUntilFires() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)), true);
+  public void testOnElementTimerFires() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
 
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE);
-
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
-        // We're storing that the root trigger has finished.
-        tester.rootFinished(firstWindow)));
-  }
-
-  @Test
-  public void testOnElementUntilFiresAndFinishes() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)), true);
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
-
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
-        // We're storing that the root trigger has finished.
-        tester.rootFinished(firstWindow)));
-  }
-
-  @Test
-  public void testOnElementTimerFiresWithoutUntil() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)), false);
-
-    injectElement(1, TriggerResult.CONTINUE, null);
+    injectElement(1, TriggerResult.CONTINUE);
 
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(0));
     when(mockRepeated.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE);
     tester.advanceWatermark(new Instant(12));
 
-    injectElement(2, TriggerResult.CONTINUE, null);
+    injectElement(2, TriggerResult.CONTINUE);
 
     tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, ImmutableList.of(0));
     when(mockRepeated.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
     tester.advanceWatermark(new Instant(13));
 
-    injectElement(3, TriggerResult.CONTINUE, null);
+    injectElement(3, TriggerResult.CONTINUE);
 
     tester.setTimer(firstWindow, new Instant(13), TimeDomain.EVENT_TIME, ImmutableList.of(0));
     when(mockRepeated.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.CONTINUE);
     tester.advanceWatermark(new Instant(14));
 
-    injectElement(4, TriggerResult.CONTINUE, null);
+    injectElement(4, TriggerResult.CONTINUE);
 
     tester.setTimer(firstWindow, new Instant(14), TimeDomain.EVENT_TIME, ImmutableList.of(0));
     when(mockRepeated.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
@@ -171,82 +130,18 @@ public void testOnElementTimerFiresWithoutUntil() throws Exception {
   }
 
   @Test
-  public void testOnTimerFiresWithUntil() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)), true);
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-
-    // Timer fires for until, which says continue
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(1));
-    when(mockUntil.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    tester.advanceWatermark(new Instant(12));
-
-    injectElement(2, TriggerResult.FIRE, TriggerResult.CONTINUE);
-
-    // Timer fires for until, which says fire, so we stop repeating.
-    tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, ImmutableList.of(1));
-    when(mockUntil.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
-        .thenReturn(TriggerResult.FIRE);
-    tester.advanceWatermark(new Instant(13));
-
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
-        tester.rootFinished(firstWindow)));
-  }
-
-  @Test
-  public void testOnTimerFinishesUntil() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)), true);
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-
-    // Timer fires for until, which says continue
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(1));
-    when(mockUntil.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    tester.advanceWatermark(new Instant(12));
-
-    injectElement(2, TriggerResult.FIRE, TriggerResult.CONTINUE);
-
-    injectElement(3, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-
-    // Timer fires for until, which says finish, so we stop paying attention to it.
-    tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, ImmutableList.of(1));
-    when(mockUntil.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
-        .thenReturn(TriggerResult.FIRE);
-    tester.advanceWatermark(new Instant(13));
-
-    // These timers shouldn't do anything.
-    tester.setTimer(firstWindow, new Instant(13), TimeDomain.EVENT_TIME, ImmutableList.of(1));
-    tester.advanceWatermark(new Instant(14));
-
-    tester.setTimer(firstWindow, new Instant(14), TimeDomain.EVENT_TIME, ImmutableList.of(0));
-    tester.advanceWatermark(new Instant(15));
-
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
-        tester.rootFinished(firstWindow)));
-  }
-
-  @Test
-  public void testMergeWithoutUntil() throws Exception {
-    setUp(Sessions.withGapDuration(Duration.millis(10)), false);
+  public void testMerge() throws Exception {
+    setUp(Sessions.withGapDuration(Duration.millis(10)));
 
-    injectElement(1, TriggerResult.CONTINUE, null);
-    injectElement(12, TriggerResult.CONTINUE, null);
+    injectElement(1, TriggerResult.CONTINUE);
+    injectElement(12, TriggerResult.CONTINUE);
 
     when(mockRepeated.onMerge(
         isTriggerContext(),
         Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.FIRE_AND_FINISH);
 
     // The arrival of this element should trigger merging.
-    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(5, TriggerResult.CONTINUE);
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
@@ -254,59 +149,6 @@ public void testMergeWithoutUntil() throws Exception {
     assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
   }
 
-  @Test
-  public void testMergeUntilFires() throws Exception {
-    setUp(Sessions.withGapDuration(Duration.millis(10)), true);
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-
-    when(mockRepeated.onMerge(
-        isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.CONTINUE);
-
-    when(mockUntil.onMerge(
-        isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.FIRE);
-
-    // The arrival of this element should trigger merging.
-    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    // the until fired during the merge
-    assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
-        // We're storing that the root has finished
-        tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(22)))));
-  }
-
-  @Test
-  public void testMergeRepeatUntilFiresAndFinishes() throws Exception {
-    setUp(Sessions.withGapDuration(Duration.millis(10)), true);
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    assertFalse(tester.isDone(new IntervalWindow(new Instant(1), new Instant(11))));
-    assertFalse(tester.isDone(new IntervalWindow(new Instant(12), new Instant(22))));
-
-    when(mockUntil.onMerge(
-        isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.CONTINUE);
-
-    when(mockRepeated.onMerge(
-        isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.FIRE_AND_FINISH);
-
-    // The arrival of this element should trigger merging.
-    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    assertFalse(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
-  }
-
   @Test
   public void testFireDeadline() throws Exception {
     BoundedWindow window = new IntervalWindow(new Instant(0), new Instant(10));
@@ -314,14 +156,14 @@ public void testFireDeadline() throws Exception {
     assertEquals(new Instant(9),
         Repeatedly.forever(AfterWatermark.pastEndOfWindow()).getWatermarkCutoff(window));
     assertEquals(new Instant(9), Repeatedly.forever(AfterWatermark.pastEndOfWindow())
-        .finishing(AfterPane.elementCountAtLeast(1))
+        .orFinally(AfterPane.elementCountAtLeast(1))
         .getWatermarkCutoff(window));
     assertEquals(new Instant(9), Repeatedly.forever(AfterPane.elementCountAtLeast(1))
-        .finishing(AfterWatermark.pastEndOfWindow())
+        .orFinally(AfterWatermark.pastEndOfWindow())
         .getWatermarkCutoff(window));
     assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
         Repeatedly.forever(AfterPane.elementCountAtLeast(1))
-        .finishing(AfterPane.elementCountAtLeast(10))
+        .orFinally(AfterPane.elementCountAtLeast(10))
         .getWatermarkCutoff(window));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
new file mode 100644
index 0000000000000..871e10717902b
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
@@ -0,0 +1,272 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OrFinallyTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.common.collect.ImmutableList;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.MockitoAnnotations;
+
+/**
+ * Tests for {@link Trigger}.
+ */
+@RunWith(JUnit4.class)
+public class TriggerTest {
+  @Mock private Trigger<IntervalWindow> mockActual;
+  @Mock private OnceTrigger<IntervalWindow> mockUntil;
+  private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
+  private IntervalWindow firstWindow;
+
+  public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
+    MockitoAnnotations.initMocks(this);
+    Trigger<IntervalWindow> underTest =
+        new OrFinallyTrigger<IntervalWindow>(mockActual, mockUntil);
+
+    tester = TriggerTester.buffering(windowFn, underTest);
+    firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
+  }
+
+  @SuppressWarnings("unchecked")
+  private TriggerContext<IntervalWindow> isTriggerContext() {
+    return Mockito.isA(TriggerContext.class);
+  }
+
+  private void injectElement(int element, TriggerResult result1, TriggerResult result2)
+      throws Exception {
+    if (result1 != null) {
+      when(mockActual.onElement(
+          isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
+          .thenReturn(result1);
+    }
+    if (result2 != null) {
+      when(mockUntil.onElement(
+          isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
+          .thenReturn(result2);
+    }
+    tester.injectElement(element, new Instant(element));
+  }
+
+  @Test
+  public void testOnElementActualFires() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.FIRE, TriggerResult.CONTINUE);
+    injectElement(2, TriggerResult.FIRE_AND_FINISH, TriggerResult.CONTINUE);
+
+    // This should do nothing (we've already fired and finished)
+    injectElement(3, TriggerResult.FIRE, TriggerResult.FIRE);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(2), 2, 0, 10)));
+    assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+        // We're storing that the root trigger has finished.
+        tester.rootFinished(firstWindow)));
+  }
+
+  @Test
+  public void testOnElementUntilFires() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+        // We're storing that the root trigger has finished.
+        tester.rootFinished(firstWindow)));
+  }
+
+  @Test
+  public void testOnElementUntilFiresAndFinishes() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+        // We're storing that the root trigger has finished.
+        tester.rootFinished(firstWindow)));
+  }
+
+  @Test
+  public void testOnTimerFiresWithUntil() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    // Timer fires for until, which says continue
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(1));
+    when(mockUntil.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    tester.advanceWatermark(new Instant(12));
+
+    injectElement(2, TriggerResult.FIRE, TriggerResult.CONTINUE);
+
+    // Timer fires for until, which says fire, so we stop repeating.
+    tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, ImmutableList.of(1));
+    when(mockUntil.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+        .thenReturn(TriggerResult.FIRE);
+    tester.advanceWatermark(new Instant(13));
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+        tester.rootFinished(firstWindow)));
+  }
+
+  @Test
+  public void testOnTimerFinishesUntil() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    // Timer fires for until, which says continue
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(1));
+    when(mockUntil.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    tester.advanceWatermark(new Instant(12));
+
+    injectElement(2, TriggerResult.FIRE, TriggerResult.CONTINUE);
+
+    injectElement(3, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    // Timer fires for until, which says FIRE, so we fire and finish
+    tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, ImmutableList.of(1));
+    when(mockUntil.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+        .thenReturn(TriggerResult.FIRE);
+    tester.advanceWatermark(new Instant(13));
+
+    // These timers shouldn't do anything -- at this point we've already finished
+    tester.setTimer(firstWindow, new Instant(13), TimeDomain.EVENT_TIME, ImmutableList.of(1));
+    tester.advanceWatermark(new Instant(14));
+
+    tester.setTimer(firstWindow, new Instant(14), TimeDomain.EVENT_TIME, ImmutableList.of(0));
+    tester.advanceWatermark(new Instant(15));
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
+    assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+        tester.rootFinished(firstWindow)));
+  }
+
+  @Test
+  public void testMergeActualFires() throws Exception {
+    setUp(Sessions.withGapDuration(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    when(mockActual.onMerge(
+        isTriggerContext(),
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.FIRE);
+
+    when(mockUntil.onMerge(
+        isTriggerContext(),
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.CONTINUE);
+
+    // The arrival of this element should trigger merging.
+    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
+    assertFalse(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
+  }
+
+  @Test
+  public void testMergeUntilFires() throws Exception {
+    setUp(Sessions.withGapDuration(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    when(mockActual.onMerge(
+        isTriggerContext(),
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.CONTINUE);
+
+    when(mockUntil.onMerge(
+        isTriggerContext(),
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.FIRE);
+
+    // The arrival of this element should trigger merging.
+    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
+    // the until fired during the merge
+    assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+        // We're storing that the root has finished
+        tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(22)))));
+  }
+
+  @Test
+  public void testFireDeadline() throws Exception {
+    BoundedWindow window = new IntervalWindow(new Instant(0), new Instant(10));
+
+    assertEquals(new Instant(9),
+        Repeatedly.forever(AfterWatermark.pastEndOfWindow()).getWatermarkCutoff(window));
+    assertEquals(new Instant(9), Repeatedly.forever(AfterWatermark.pastEndOfWindow())
+        .orFinally(AfterPane.elementCountAtLeast(1))
+        .getWatermarkCutoff(window));
+    assertEquals(new Instant(9), Repeatedly.forever(AfterPane.elementCountAtLeast(1))
+        .orFinally(AfterWatermark.pastEndOfWindow())
+        .getWatermarkCutoff(window));
+    assertEquals(new Instant(9),
+        AfterPane.elementCountAtLeast(100)
+            .orFinally(AfterWatermark.pastEndOfWindow())
+            .getWatermarkCutoff(window));
+
+    assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
+        Repeatedly.forever(AfterPane.elementCountAtLeast(1))
+        .orFinally(AfterPane.elementCountAtLeast(10))
+        .getWatermarkCutoff(window));
+  }
+}

From 3b6c7ff366a2c5918d9abd7c0b6410a6bea81d96 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 7 Apr 2015 18:08:22 -0700
Subject: [PATCH 0401/1541] Add TimeTrigger#alignTo for building TimeTriggers
 that align their timers.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90567499
---
 .../sdk/transforms/windowing/TimeTrigger.java | 34 +++++++++++++-
 .../transforms/windowing/AfterAllTest.java    |  2 +-
 .../transforms/windowing/AfterEachTest.java   |  2 +-
 .../transforms/windowing/AfterFirstTest.java  |  2 +-
 .../windowing/AfterProcessingTimeTest.java    |  8 +++-
 .../windowing/AfterWatermarkTest.java         | 12 ++---
 .../transforms/windowing/TimeTriggerTest.java | 45 +++++++++++++++++++
 7 files changed, 94 insertions(+), 11 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
index c56aa072cd9ad..82c85a0c1fb1b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ImmutableList;
 
 import org.joda.time.Duration;
@@ -58,7 +59,7 @@ protected Instant computeTargetTimestamp(Instant time) {
    * @param delay the delay to add
    * @return An updated time trigger which will wait the additional time before firing.
    */
-  public T plusDelay(final Duration delay) {
+  public T plusDelayOf(final Duration delay) {
     return newWith(new SerializableFunction<Instant, Instant>() {
       private static final long serialVersionUID = 0L;
 
@@ -69,6 +70,37 @@ public Instant apply(Instant input) {
     });
   }
 
+  /**
+   * Aligns timestamps to the smallest multiple of {@code size} since the {@code offset} greater
+   * than the timestamp.
+   *
+   * <p> TODO: Consider sharing this with FixedWindows, and bring over the equivalent of
+   * CalendarWindows.
+   */
+  public T alignedTo(final Duration size, final Instant offset) {
+    return newWith(new SerializableFunction<Instant, Instant>() {
+      private static final long serialVersionUID = 0L;
+
+      @Override
+      public Instant apply(Instant point) {
+        return alignedTo(point, size, offset);
+      }
+    });
+  }
+
+  /**
+   * Aligns the time to be the smallest multiple of {@code size} greater than the timestamp
+   * since the epoch.
+   */
+  public T alignedTo(final Duration size) {
+    return alignedTo(size, new Instant(0));
+  }
+
+  @VisibleForTesting static Instant alignedTo(Instant point, Duration size, Instant offset) {
+    long millisSinceStart = new Duration(offset, point).getMillis() % size.getMillis();
+    return millisSinceStart == 0 ? point : point.plus(size).minus(millisSinceStart);
+  }
+
   /**
    * Adjust the time at which the trigger will fire.
    *
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index cf47fa3f72319..9047ee100dc9a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -180,7 +180,7 @@ public void testFireDeadline() throws Exception {
 
     assertEquals(new Instant(19),
         AfterAll.of(AfterWatermark.pastEndOfWindow(),
-                     AfterWatermark.pastEndOfWindow().plusDelay(Duration.millis(10)))
+                     AfterWatermark.pastEndOfWindow().plusDelayOf(Duration.millis(10)))
             .getWatermarkCutoff(window));
     assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
         AfterAll.of(AfterWatermark.pastEndOfWindow(), AfterPane.elementCountAtLeast(1))
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index e527ebfd04111..e917b57597a7d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -180,7 +180,7 @@ public void testFireDeadline() throws Exception {
 
     assertEquals(new Instant(9),
         AfterEach.inOrder(AfterWatermark.pastEndOfWindow(),
-                      AfterWatermark.pastEndOfWindow().plusDelay(Duration.millis(10)))
+                      AfterWatermark.pastEndOfWindow().plusDelayOf(Duration.millis(10)))
             .getWatermarkCutoff(window));
     assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
         AfterEach.inOrder(AfterPane.elementCountAtLeast(2), AfterWatermark.pastEndOfWindow())
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index 00c5856796286..5e8f2c4fe8f85 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -173,7 +173,7 @@ public void testFireDeadline() throws Exception {
 
     assertEquals(new Instant(9),
         AfterFirst.of(AfterWatermark.pastEndOfWindow(),
-                       AfterWatermark.pastEndOfWindow().plusDelay(Duration.millis(10)))
+                       AfterWatermark.pastEndOfWindow().plusDelayOf(Duration.millis(10)))
             .getWatermarkCutoff(window));
     assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
         AfterFirst.of(AfterPane.elementCountAtLeast(2), AfterPane.elementCountAtLeast(1))
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
index 80eeb78e9dcdd..ff231e68387fa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
@@ -40,7 +40,9 @@ public void testAfterProcessingTimeWithFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
         FixedWindows.of(windowDuration),
-        AfterProcessingTime.<IntervalWindow>pastFirstElementInPane().plusDelay(Duration.millis(5)));
+        AfterProcessingTime
+            .<IntervalWindow>pastFirstElementInPane()
+            .plusDelayOf(Duration.millis(5)));
 
     tester.advanceProcessingTime(new Instant(10));
 
@@ -79,7 +81,9 @@ public void testAfterProcessingTimeWithMergingWindowAlreadyFired() throws Except
     Duration windowDuration = Duration.millis(10);
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
         Sessions.withGapDuration(windowDuration),
-        AfterProcessingTime.<IntervalWindow>pastFirstElementInPane().plusDelay(Duration.millis(5)));
+        AfterProcessingTime
+            .<IntervalWindow>pastFirstElementInPane()
+            .plusDelayOf(Duration.millis(5)));
 
     tester.advanceProcessingTime(new Instant(10));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
index c812833724584..06ecd830bde09 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
@@ -42,7 +42,7 @@ public void testFirstInPaneWithFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
         FixedWindows.of(windowDuration),
-        AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelay(Duration.millis(5)));
+        AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelayOf(Duration.millis(5)));
 
     tester.injectElement(1, new Instant(1)); // first in window [0, 10), timer set for 6
     tester.advanceWatermark(new Instant(5));
@@ -69,7 +69,7 @@ public void testFirstInPaneWithMerging() throws Exception {
     Duration windowDuration = Duration.millis(10);
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
         Sessions.withGapDuration(windowDuration),
-        AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelay(Duration.millis(5)));
+        AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelayOf(Duration.millis(5)));
 
     tester.advanceWatermark(new Instant(1));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
@@ -97,7 +97,7 @@ public void testEndOfWindowFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
         FixedWindows.of(windowDuration),
-        AfterWatermark.<IntervalWindow>pastEndOfWindow().plusDelay(Duration.millis(5)));
+        AfterWatermark.<IntervalWindow>pastEndOfWindow().plusDelayOf(Duration.millis(5)));
 
     tester.injectElement(1, new Instant(1)); // first in window [0, 10), timer set for 15
     tester.advanceWatermark(new Instant(11));
@@ -124,7 +124,7 @@ public void testEndOfWindowWithMerging() throws Exception {
     Duration windowDuration = Duration.millis(10);
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
         Sessions.withGapDuration(windowDuration),
-        AfterWatermark.<IntervalWindow>pastEndOfWindow().plusDelay(Duration.millis(5)));
+        AfterWatermark.<IntervalWindow>pastEndOfWindow().plusDelayOf(Duration.millis(5)));
 
     tester.advanceWatermark(new Instant(1));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
@@ -155,6 +155,8 @@ public void testFireDeadline() throws Exception {
     assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
         AfterWatermark.pastEndOfWindow().getWatermarkCutoff(GlobalWindow.INSTANCE));
     assertEquals(new Instant(19),
-        AfterWatermark.pastEndOfWindow().plusDelay(Duration.millis(10)).getWatermarkCutoff(window));
+        AfterWatermark
+            .pastEndOfWindow()
+            .plusDelayOf(Duration.millis(10)).getWatermarkCutoff(window));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
new file mode 100644
index 0000000000000..cd8ba8758437e
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import static org.junit.Assert.assertEquals;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for the various time operations in {@link TimeTrigger}.
+ */
+@RunWith(JUnit4.class)
+public class TimeTriggerTest {
+
+  @Test
+  public void testAlignTo() {
+    assertEquals(new Instant(100),
+        TimeTrigger.alignedTo(new Instant(100), new Duration(10), new Instant(0)));
+    assertEquals(new Instant(110),
+        TimeTrigger.alignedTo(new Instant(105), new Duration(10), new Instant(0)));
+    assertEquals(new Instant(105),
+        TimeTrigger.alignedTo(new Instant(105), new Duration(10), new Instant(5)));
+    assertEquals(new Instant(115),
+        TimeTrigger.alignedTo(new Instant(110), new Duration(10), new Instant(5)));
+  }
+
+}

From f8e72d5a455e1056c28cbb93a641aa9dbdef30e8 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 7 Apr 2015 18:24:46 -0700
Subject: [PATCH 0402/1541] Allow replacement of the universal default coder in
 CoderRegistry, previously hardcoded to SerializableCoder.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90568526
---
 .../cloud/dataflow/sdk/coders/AvroCoder.java  | 25 ++++++-
 .../dataflow/sdk/coders/CoderFactory.java     | 43 ++++++++++++
 .../dataflow/sdk/coders/CoderProvider.java    | 32 +++++++++
 .../dataflow/sdk/coders/CoderRegistry.java    | 70 ++++++++++++-------
 .../sdk/coders/SerializableCoder.java         | 48 +++++++++++--
 .../sdk/coders/CoderRegistryTest.java         | 31 ++++++++
 .../dataflow/sdk/coders/DefaultCoderTest.java |  4 +-
 7 files changed, 217 insertions(+), 36 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProvider.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
index 5e268081ecbc2..c92c7f92a69fd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -20,6 +20,7 @@
 
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.common.base.Optional;
 import com.google.common.reflect.TypeToken;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
@@ -110,11 +111,21 @@
 public class AvroCoder<T> extends StandardCoder<T> {
 
   /**
-   * Returns an {@code AvroCoder} instance for the provided element type.
+   * Returns an {@code AvroCoder} instance for the provided element class.
    * @param <T> the element type
    */
-  public static <T> AvroCoder<T> of(Class<T> type) {
-    return new AvroCoder<>(type, ReflectData.get().getSchema(type));
+  public static <T> AvroCoder<T> of(Class<T> clazz) {
+    return new AvroCoder<>(clazz, ReflectData.get().getSchema(clazz));
+  }
+
+  /**
+   * Returns an {@code AvroCoder} instance for the provided element type token.
+   * @param <T> the element type
+   */
+  public static <T> AvroCoder<T> of(TypeToken<T> typeToken) {
+    @SuppressWarnings("unchecked")
+    Class<T> clazz = (Class<T>) typeToken.getRawType();
+    return AvroCoder.of(clazz);
   }
 
   /**
@@ -147,6 +158,14 @@ public static AvroCoder<?> of(
     return new AvroCoder(Class.forName(classType), parser.parse(schema));
   }
 
+  public static final CoderProvider PROVIDER = new CoderProvider() {
+    @Override
+    @SuppressWarnings("unchecked")
+    public <T> Optional<Coder<T>> getCoder(TypeToken<T> typeToken) {
+      return Optional.<Coder<T>>fromNullable(AvroCoder.of(typeToken));
+    }
+  };
+
   private final Class<T> type;
   private final Schema schema;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactory.java
new file mode 100644
index 0000000000000..f7df53ea1f67f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactory.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import java.util.List;
+
+/**
+ * A {@code CoderFactory} creates coders and decomposes values.
+ * It may operate on a parameterized type, such as {@code List},
+ * in which case the {@code create} method accepts a list of
+ * coders to use for the type parameters.
+ */
+public interface CoderFactory {
+
+  /**
+   * Returns a {@code Coder<?>}, given argument coder to use for
+   * values of a particular type, given the Coders for each of
+   * the type's generic parameter types.
+   */
+  public Coder<?> create(List<? extends Coder<?>> componentCoders);
+
+  /**
+   * Returns a list of objects contained in {@code value}, one per
+   * type argument, or {@code null} if none can be determined.
+   * The list of returned objects should be the same size as the
+   * list of coders required by {@link #create}.
+   */
+  public List<Object> getInstanceComponents(Object value);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProvider.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProvider.java
new file mode 100644
index 0000000000000..be407ff582ee3
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProvider.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.common.base.Optional;
+import com.google.common.reflect.TypeToken;
+
+/**
+ * A {@code CoderProvider} may create a {@link Coder} for
+ * any concrete class.
+ */
+public interface CoderProvider {
+
+  /**
+   * Provides a coder for a given class, if possible.
+   */
+  public <T> Optional<Coder<T>> getCoder(TypeToken<T> type);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index f8927070bdd28..8759d56ee2a76 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.common.base.Optional;
 import com.google.common.reflect.TypeToken;
 
 import org.joda.time.Instant;
@@ -59,28 +60,11 @@
  *        {@code Coder} of {@link SerializableCoder}.
  * </ul>
  */
-public class CoderRegistry {
+public class CoderRegistry implements CoderProvider {
   private static final Logger LOG = LoggerFactory.getLogger(CoderRegistry.class);
 
-  /** A factory for default Coders for values of a particular class. */
-  public abstract static class CoderFactory {
-    /**
-     * Returns the default Coder to use for values of a particular type,
-     * given the Coders for each of the type's generic parameter types.
-     * May return null if no default Coder can be created.
-     */
-    public abstract Coder<?> create(
-        List<? extends Coder<?>> typeArgumentCoders);
-
-    /**
-     * Returns a list of objects contained in {@code value}, one per
-     * type argument, or {@code null} if none can be determined.
-     */
-    public abstract List<Object> getInstanceComponents(Object value);
-  }
-
   /** A factory that always returns the coder with which it is instantiated. */
-  public class ConstantCoderFactory extends CoderFactory {
+  public class ConstantCoderFactory implements CoderFactory {
     private Coder<?> coder;
 
     public ConstantCoderFactory(Coder<?> coder) {
@@ -98,7 +82,9 @@ public List<Object> getInstanceComponents(Object value) {
     }
   }
 
-  public CoderRegistry() {}
+  public CoderRegistry() {
+    setFallbackCoderProvider(SerializableCoder.PROVIDER);
+  }
 
   /**
    * Registers standard Coders with this CoderRegistry.
@@ -230,6 +216,14 @@ public <T> Coder<T> getDefaultCoder(TypeToken<T> typeToken) {
     return getDefaultCoder(typeToken, Collections.<Type, Coder<?>>emptyMap());
   }
 
+  /**
+   * See {@link #getDefaultCoder(TypeToken)}.
+   */
+  @Override
+  public <T> Optional<Coder<T>> getCoder(TypeToken<T> typeToken) {
+    return Optional.fromNullable(getDefaultCoder(typeToken));
+  }
+
   /**
    * Returns the Coder to use by default for values of the given type,
    * where the given context type uses the given context coder,
@@ -477,6 +471,11 @@ private static boolean isNullOrEmpty(Collection<?> c) {
    */
   Map<Class<?>, CoderFactory> coderFactoryMap = new HashMap<>();
 
+   /**
+    * A provider of coders for types where no coder is registered.
+    */
+   private CoderProvider fallbackCoderProvider;
+
   /**
    * Returns a CoderFactory that invokes the given static factory method
    * to create the Coder.
@@ -589,7 +588,15 @@ Coder<?> getDefaultCoder(Type type, Map<Type, Coder<?>> typeCoderBindings) {
 
   /**
    * Returns the Coder to use by default for values of the given
-   * class, or null if there is no default Coder.
+   * class, or null if there is no default Coder. The following
+   * possibilities are tried, in this order:
+   *
+   * <ol>
+   * <li>A {@link Coder} class registered explicitly via
+   * a call to {@link #registerCoder},
+   * <li>A {@link DefaultCoder} annotation on the class,
+   * <li>This registry's fallback {@link CoderProvider}, which
+   * may be able to generate a coder for an arbitrary class.
    */
   Coder<?> getDefaultCoder(Class<?> clazz) {
     CoderFactory coderFactory = getDefaultCoderFactory(clazz);
@@ -609,19 +616,28 @@ Coder<?> getDefaultCoder(Class<?> clazz) {
           .build();
     }
 
-    // Interface-based defaults.
-    if (Serializable.class.isAssignableFrom(clazz)) {
+    if (getFallbackCoderProvider() != null) {
       @SuppressWarnings("unchecked")
-      Class<? extends Serializable> serializableClazz =
-          (Class<? extends Serializable>) clazz;
-      LOG.debug("Default Coder for {}: SerializableCoder", serializableClazz);
-      return SerializableCoder.of(serializableClazz);
+      Optional<Coder<?>> coder =
+          (Optional<Coder<?>>) getFallbackCoderProvider()
+          .getCoder(TypeToken.of(clazz));
+      if (coder.isPresent()) {
+        return coder.get();
+      }
     }
 
     LOG.debug("No default Coder for {}", clazz);
     return null;
   }
 
+  public void setFallbackCoderProvider(CoderProvider coderProvider) {
+    fallbackCoderProvider = coderProvider;
+  }
+
+  public CoderProvider getFallbackCoderProvider() {
+    return fallbackCoderProvider;
+  }
+
   /**
    * Returns the Coder to use by default for values of the given
    * parameterized type, in a context where the given types use the
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
index 84920e4e8dbdb..17d4902863f32 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
@@ -17,6 +17,8 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.common.base.Optional;
+import com.google.common.reflect.TypeToken;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
@@ -31,7 +33,7 @@
 /**
  * An encoder of {@link java.io.Serializable} objects.
  *
- * To use, specify the coder type on a PCollection.
+ * <p> To use, specify the coder type on a PCollection:
  * <pre>
  * {@code
  *   PCollection<MyRecord> records =
@@ -48,12 +50,23 @@
 @SuppressWarnings("serial")
 public class SerializableCoder<T extends Serializable>
     extends AtomicCoder<T> {
+
   /**
-   * Returns a {@code SerializableCoder} instance for the provided element type.
+   * Returns a {@code SerializableCoder} instance for the provided element class.
    * @param <T> the element type
    */
-  public static <T extends Serializable> SerializableCoder<T> of(Class<T> type) {
-    return new SerializableCoder<>(type);
+  public static <T extends Serializable> SerializableCoder<T> of(Class<T> clazz) {
+    return new SerializableCoder<>(clazz);
+  }
+
+  /**
+   * Returns a {@code SerializableCoder} instance for the provided element type token.
+   * @param <T> the element type
+   */
+  public static <T extends Serializable> SerializableCoder<T> of(TypeToken<T> typeToken) {
+    @SuppressWarnings("unchecked")
+    Class<T> clazz = (Class<T>) typeToken.getRawType();
+    return SerializableCoder.of(clazz);
   }
 
   @JsonCreator
@@ -68,6 +81,28 @@ public static SerializableCoder<?> of(@JsonProperty("type") String classType)
     return of((Class<? extends Serializable>) clazz);
   }
 
+  /**
+   * A {@link CoderProvider} that constructs a {@link SerializableCoder}
+   * for any class that implements serializable.
+   */
+  public static final CoderProvider PROVIDER = new CoderProvider() {
+    @Override
+    public <T> Optional<Coder<T>> getCoder(TypeToken<T> typeToken) {
+      Class<?> clazz = typeToken.getRawType();
+      if (Serializable.class.isAssignableFrom(clazz)) {
+        @SuppressWarnings("unchecked")
+        Class<? extends Serializable> serializableClazz =
+            (Class<? extends Serializable>) clazz;
+        @SuppressWarnings("unchecked")
+        Coder<T> coderOrNull = (Coder<T>) SerializableCoder.of(serializableClazz);
+        return Optional.fromNullable(coderOrNull);
+      } else {
+        return Optional.<Coder<T>>absent();
+      }
+    }
+  };
+
+
   private final Class<T> type;
 
   protected SerializableCoder(Class<T> type) {
@@ -128,6 +163,11 @@ public boolean equals(Object other) {
     return type == ((SerializableCoder) other).type;
   }
 
+  @Override
+  public int hashCode() {
+    return type.hashCode();
+  }
+
   // This coder inherits isRegisterByteSizeObserverCheap,
   // getEncodedElementByteSize and registerByteSizeObserver
   // from StandardCoder. Looks like we cannot do much better
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index 6edcc783285dd..1eb7ec1585040 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -26,13 +26,16 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.reflect.TypeToken;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -46,12 +49,40 @@
 @SuppressWarnings("serial")
 public class CoderRegistryTest {
 
+  @Rule
+  public transient ExpectedException thrown = ExpectedException.none();
+
   public static CoderRegistry getStandardRegistry() {
     CoderRegistry registry = new CoderRegistry();
     registry.registerStandardCoders();
     return registry;
   }
 
+  private static class SerializableClass implements Serializable {
+  }
+
+  private static class NotSerializableClass { }
+
+  private static class NotACoderProvider { }
+
+  @Test
+  public void testSerializableFallbackCoderProvider() {
+    CoderRegistry registry = getStandardRegistry();
+    registry.setFallbackCoderProvider(SerializableCoder.PROVIDER);
+    Coder<?> serializableCoder = registry.getDefaultCoder(SerializableClass.class);
+
+    assertEquals(serializableCoder, SerializableCoder.of(SerializableClass.class));
+  }
+
+  @Test
+  public void testAvroFallbackCoderProvider() {
+    CoderRegistry registry = getStandardRegistry();
+    registry.setFallbackCoderProvider(AvroCoder.PROVIDER);
+    Coder<?> avroCoder = registry.getDefaultCoder(NotSerializableClass.class);
+
+    assertEquals(avroCoder, AvroCoder.of(NotSerializableClass.class));
+  }
+
   @Test
   public void testRegisterInstantiatedGenericCoder() {
     class MyValueList extends ArrayList<MyValue> { }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
index d70e1318a8836..e5e54e22790d2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
@@ -76,8 +76,8 @@ public void testDefaultCoders() throws Exception {
 
   @Test
   public void testUnknown() throws Exception {
-    CoderRegistry registery = new CoderRegistry();
-    Coder<?> coderType = registery.getDefaultCoder(Unknown.class);
+    CoderRegistry registry = new CoderRegistry();
+    Coder<?> coderType = registry.getDefaultCoder(Unknown.class);
     Assert.assertNull(coderType);
   }
 

From f1b1db825a17c0ae2ce0077a9a87c9aeafecef6b Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 7 Apr 2015 19:36:13 -0700
Subject: [PATCH 0403/1541] Update the Javadoc for features related to
 Windowing & Triggers

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90572472
---
 .../cloud/dataflow/sdk/io/PubsubIO.java       | 17 +++--
 .../dataflow/sdk/transforms/GroupByKey.java   | 15 +++--
 .../transforms/windowing/AfterWatermark.java  | 25 +++++++-
 .../transforms/windowing/DefaultTrigger.java  |  3 +-
 .../sdk/transforms/windowing/Trigger.java     | 13 ++--
 .../sdk/transforms/windowing/Window.java      | 64 ++++++++++++++++---
 .../transforms/windowing/package-info.java    | 12 +++-
 7 files changed, 120 insertions(+), 29 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index d7ff03ff16c60..0b310de6ed30f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -194,12 +194,19 @@ public static Bound<String> subscription(String subscription) {
      *
      * <p> If {@code <timestampLabel>} is not provided, the system will generate record timestamps
      * the first time it sees each record. All windowing will be done relative to these timestamps.
-     * Windows are closed based on an estimate of when this source has finished producing data for
-     * a timestamp range, which means that late data can arrive after a window has been closed. The
-     * {#dropLateData} field allows you to control what to do with late data.  The relaxes the
-     * semantics of {@code GroupByKey}; see
+     *
+     * <p> By default windows are emitted based on an estimate of when this source is likely
+     * done producing data for a given timestamp (referred to as the Watermark; see
+     * {@link AfterWatermark} for more details). Any late data will be handled by the trigger
+     * specified with the windowing strategy -- by default it will be output immediately.
+     *
+     * <p> The {#dropLateData} field allows you to control what to do with late data. This relaxes
+     * the semantics of {@code GroupByKey}; see
      * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} for additional information on
      * late data and windowing.
+     *
+     * <p> Note that the system can guarantee that no late data will ever be seen when it assigns
+     * timestamps by arrival time (i.e. {@code timestampLabel} is not provided).
      */
     public static Bound<String> timestampLabel(String timestampLabel) {
       return new Bound<>(DEFAULT_PUBSUB_CODER).timestampLabel(timestampLabel);
@@ -209,7 +216,7 @@ public static Bound<String> timestampLabel(String timestampLabel) {
      * If true, then late-arriving data from this source will be dropped.
      *
      * <p> If late data is not dropped, data for a window can arrive after that window has already
-     * been closed.  The relaxes the semantics of {@code GroupByKey}; see
+     * been closed.  This relaxes the semantics of {@code GroupByKey}; see
      * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}
      * for additional information on late data and windowing.
      */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 50ac27dc48cb1..39f0d6fe1056c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -100,12 +100,15 @@
  * {@code GroupByKey} followed by {@link Combine.GroupedValues}.
  *
  * <p> When grouping, windows that can be merged according to the {@link WindowFn}
- * of the input {@code PCollection} will be merged together, and a group
- * corresponding to the new, merged window will be emitted.
- * The timestamp for each group is the upper bound of its window, e.g., the most
- * recent timestamp that can be assigned into the window, and the group will be
- * in the window that it corresponds to.  The output {@code PCollection} will
- * have the same {@link WindowFn} as the input.
+ * of the input {@code PCollection} will be merged together, and a window pane
+ * corresponding to the new, merged window will be created. The items in this pane
+ * will be emitted when a trigger fires. By default this will be when the input
+ * sources estimate there will be no more data for the window. See
+ * {@link AfterWatermark} for details on the estimation.
+ *
+ * <p>The timestamp for each emitted pane is the earliest event time among all elements in
+ * the pane. The output {@code PCollection} will have the same {@link WindowFn}
+ * as the input.
  *
  * <p> If the input {@code PCollection} contains late data (see
  * {@link com.google.cloud.dataflow.sdk.io.PubsubIO.Read.Bound#timestampLabel}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 5f014c484d9f7..33cf964272c08 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -24,8 +24,29 @@
 import org.joda.time.Instant;
 
 /**
- * {@code AfterWatermarkTime} triggers fire based on the system watermark. They operate in event
- * time.
+ * <p>{@code AfterWatermark} triggers fire based on progress of the system watermark. This time is a
+ * lower-bound, sometimes heuristically established, on event times that have been fully processed
+ * by the pipeline.
+ *
+ * <p>For sources that provide non-heuristic watermarks (e.g.
+ * {@link com.google.cloud.dataflow.sdk.io.PubsubIO} when using arrival times as event times), the
+ * watermark is a strict guarantee that no data with an event time earlier than
+ * that watermark will ever be observed in the pipeline. In this case, it's safe to assume that any
+ * pane triggered by an {@code AfterWatermark} trigger with a reference point at or beyond the end
+ * of the window will be the last pane ever for that window.
+ *
+ * <p>For sources that provide heuristic watermarks (e.g.
+ * {@link com.google.cloud.dataflow.sdk.io.PubsubIO} when using user-supplied event times), the
+ * watermark itself becomes an <i>estimate</i> that no data with an event time earlier than that
+ * watermark (i.e. "late data) will ever be observed in the pipeline. These heuristics can
+ * often be quite accurate, but the chance of seeing late data for any given window is non-zero.
+ * Thus, if absolute correctness over time is important to your use case, you may want to consider
+ * using a trigger that accounts for late data. The default trigger,
+ * {@code Repeatedly.forever(AfterWatermark.pastEndOfWindow())}, which fires
+ * once when the watermark passes the end of the window and then immediately therafter when any
+ * late data arrive, is one such example.
+ *
+ * <p> The watermark is the clock that defines {@link TimeDomain.EVENT_TIME}.
  *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
index e5e409cbb5cff..f286799cbf2e6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
@@ -19,7 +19,8 @@
 import org.joda.time.Instant;
 
 /**
- * A trigger that fires repeatedly when the watermark passes the end of the window.
+ * A trigger that is equivalent to {@code Repeatedly.forever(AfterWatermark.pastEndOfWindow())}.
+ * See {@link Repeatedly#forever} and {@link AfterWatermark#pastEndOfWindow} for more details.
  *
  * @param <W> The type of windows being triggered/encoded.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 0a68b212f3069..b0b874b1f5ac3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -33,6 +33,9 @@
  * then passed to the associated {@code Trigger} to determine if the {@code Window}s contents should
  * be output.
  *
+ * <p> See {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} and {@link Window}
+ * for more information about how grouping with windows works.
+ *
  * <p>The elements that are assigned to a window since the last time it was fired (or since the
  * window was created) are placed into a pane. Triggers are evaluated against the elements in the
  * current pane, and when fired, will output those elements. Depending on the trigger, this will
@@ -56,11 +59,9 @@
  *   <li> {@link AfterEach#inOrder} to execute each trigger in sequence, firing each (and every)
  *   time that a trigger fires, and advancing to the next trigger in the sequence when it finishes.
  *   <li> {@link AfterFirst#of} to create a trigger that fires after at least one of its arguments
- *   fires. An {@link AfterFirst} trigger finishes after it fires once, or when all of its arguments
- *   have finished without firing.
+ *   fires. An {@link AfterFirst} trigger finishes after it fires once.
  *   <li> {@link AfterAll#of} to create a trigger that fires after all least one of its arguments
- *   have fired at least once. An {@link AfterFirst} trigger finishes after it fires once, or when
- *   any of its arguments have finished without firing.
+ *   have fired at least once. An {@link AfterFirst} trigger finishes after it fires once.
  * </ul>
  *
  * <p>Each trigger tree is instantiated per-key and per-window. Every trigger in the tree is in one
@@ -79,6 +80,10 @@
  * <p>Once finished, a trigger cannot return itself back to an earlier state, however a composite
  * trigger could reset its sub-triggers.
  *
+ * <p> Triggers should not build up any state internally since they may be recreated
+ * between invocations of the callbacks. All important values should be persisted to
+ * {@link KeyedState} before the callback returns.
+ *
  * <p> This functionality is experimental and likely to change.
  *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index f4a374df90a4e..9a284151cc9ce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -45,14 +45,14 @@
  * <p> See {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}
  * for more information about how grouping with windows works.
  *
- * <p> Windowing a {@code PCollection} allows chunks of it to be processed
- * individually, before the entire {@code PCollection} is available.  This is
- * especially important for {@code PCollection}s with unbounded size,
- * since the full {@code PCollection} is
- * never available at once, since more data is continually arriving.
- * For {@code PCollection}s with a bounded size (aka. conventional batch mode),
- * by default, all data is implicitly in a single window, unless
- * {@code Window} is applied.
+ * <h2> Windowing </h2>
+ *
+ * <p> Windowing a {@code PCollection} divides the elements into windows based
+ * on the associated event time for each element. This is especially useful
+ * for {@code PCollection}s with unbounded size, since it allows operating on
+ * a sub-group of the elements placed into a related window. For {@code PCollection}s
+ * with a bounded size (aka. conventional batch mode), by default, all data is
+ * implicitly in a single window, unless {@code Window} is applied.
  *
  * <p> For example, a simple form of windowing divides up the data into
  * fixed-width time intervals, using {@link FixedWindows}.
@@ -84,11 +84,55 @@
  * <p>Additionally, custom {@link WindowFn}s can be created, by creating new
  * subclasses of {@link WindowFn}.
  *
+ * <h2> Triggers </h2>
+ *
  * <p> {@link Window.Bound#triggering(Trigger)} allows specifying a trigger to control when
  * (in processing time) results for the given window can be produced. If unspecified, the default
  * behavior is to trigger first when the watermark passes the end of the window, and then trigger
- * again every time there is late arriving data. See {@link Trigger} for details on specifying other
- * triggers.
+ * again every time there is late arriving data.
+ *
+ * <p> All of the elements in a window since the last time a trigger fired are
+ * part of the current pane. When a trigger fires, new output is produced
+ * based on the elements in the current pane.
+ *
+ * <p>Depending on the trigger, this can be used both to output partial results
+ * early during the processing of the whole window, and to deal with late
+ * arriving in batches.
+ *
+ * <p> Continuing the earlier example, if we wanted to emit the values that were available
+ * when the watermark passed the end of the window, and then output any late arriving
+ * elements once-per (actual hour) hour until we have finished processing the next 24-hours of data.
+ * (The use of watermark time to stop processing tends to be more robust if the data source is slow
+ * for a few days, etc.)
+ *
+ * <pre> {@code
+ * PCollection<String> items = ...;
+ * PCollection<String> windowed_items = item.apply(
+ *   Window.<String>into(FixedWindows.of(1, TimeUnit.MINUTES)
+ *      .triggering(AfterEach.inOrder(
+ *          AfterWatermark.pastEndOfWindow(),
+ *          Repeatedly
+ *              .forever(AfterProcessingTime
+ *                  .pastFirstElementInPane().plusDelay(Duration.standardMinutes(1)))
+ *              .until(AfterWatermark
+ *                  .pastEndOfWindow().plusDelay(Duration.standardDays(1)))));
+ * PCollection<KV<String, Long>> windowed_counts = windowed_items.apply(
+ *   Count.<String>perElement());
+ * } </pre>
+ *
+ * <p> On the other hand, if we wanted to get early results every minute of processing
+ * time (for which there were new elements in the given window) we could do the following:
+ *
+ * <pre> {@code
+ * PCollection<String> windowed_items = item.apply(
+ *   Window.<String>into(FixedWindows.of(1, TimeUnit.MINUTES)
+ *      .triggering(Repeatedly
+ *              .forever(AfterProcessingTime
+ *                  .pastFirstElementInPane().plusDelay(Duration.standardMinutes(1)))
+ *              .until(AfterWatermark.pastEndOfWindow())));
+ * } </pre>
+ *
+ * <p> See {@link Trigger} for details on the available triggers.
  */
 public class Window {
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java
index 197dbd6b711e9..5bd2de36ebed8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java
@@ -16,7 +16,9 @@
 
 /**
  * Defines the {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window} transform
- * for dividing the elements in a PCollection into windows.
+ * for dividing the elements in a PCollection into windows, and the
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger} for controlling when those
+ * elements are output.
  *
  * <p> {@code Window} logically divides up or groups the elements of a
  * {@link com.google.cloud.dataflow.sdk.values.PCollection} into finite windows according to a
@@ -35,5 +37,13 @@
  * <p> For {@code PCollection}s with a bounded size, by default, all data is implicitly in a
  * single window, and this replicates conventional batch mode. However, windowing can still be a
  * convenient way to express time-sliced algorithms over bounded {@code PCollection}s.
+ *
+ * <p> As elements are assigned to a window, they are are placed into a pane. When the trigger fires
+ * all of the elements in the current pane are output.
+ *
+ * <p> The {@link com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger} will output a
+ * window when the system watermark passes the end of the window.  See
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark} for details on the
+ * watermark.
  */
 package com.google.cloud.dataflow.sdk.transforms.windowing;

From 1c4a000ff06199a9c43ea1054f0cd2e2e0907d10 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Tue, 7 Apr 2015 22:28:46 -0700
Subject: [PATCH 0404/1541] Automated g4 rollback of changelist 90557888.

*** Reason for rollback ***

Seems to be causing instability.

*** Original change description ***

1. Update SDK major version to "3" to enable groupAlsoByWindows combiner lifting. 2. pass in windowFn in GroupByKeyOnly to move the batch GroupByKey expansion to the backend. It enables combiner lifting in the backend for batch pipelines with windowing.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90580779
---
 .../sdk/runners/DataflowPipelineRunner.java   |  10 +-
 .../runners/DataflowPipelineTranslator.java   |  29 +--
 .../worker/GroupAlsoByWindowsParDoFn.java     |  20 +-
 .../dataflow/sdk/transforms/GroupByKey.java   | 198 ++++++++++--------
 4 files changed, 131 insertions(+), 126 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 7212d5a09389a..8a034afb4f85d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -80,7 +80,7 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
   private DataflowPipelineRunnerHooks hooks;
 
   // Environment version information
-  private static final String ENVIRONMENT_MAJOR_VERSION = "3";
+  private static final String ENVIRONMENT_MAJOR_VERSION = "2";
 
   /**
    * Construct a runner from the provided options.
@@ -141,10 +141,16 @@ private DataflowPipelineRunner(DataflowPipelineOptions options) {
   @SuppressWarnings("unchecked")
   public <Output extends POutput, Input extends PInput> Output apply(
       PTransform<Input, Output> transform, Input input) {
-    if (transform instanceof Combine.GroupedValues || transform instanceof GroupByKey) {
+    if (transform instanceof Combine.GroupedValues) {
       // TODO: Redundant with translator registration?
       return (Output) PCollection.createPrimitiveOutputInternal(
           ((PCollection<?>) input).getWindowingStrategy());
+    } else if (transform instanceof GroupByKey) {
+      // The DataflowPipelineRunner implementation of GroupByKey will sort values by timestamp,
+      // so no need for an explicit sort transform.
+      boolean runnerSortsByTimestamp = true;
+      return (Output) ((GroupByKey) transform).applyHelper(
+          (PCollection<?>) input, options.isStreaming(), runnerSortsByTimestamp);
     } else if (transform instanceof Create) {
       return (Output) ((Create) transform).applyHelper(input, options.isStreaming());
     } else {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index d590415aa9f81..9c47975c49e82 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -52,7 +52,6 @@
 import com.google.cloud.dataflow.sdk.options.CloudDebuggerOptions.DebuggerConfig;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType;
-import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.dataflow.AvroIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BigQueryIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
@@ -63,12 +62,10 @@
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.Flatten;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.View;
-import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
@@ -875,36 +872,24 @@ private <T> void flattenHelper(
         });
 
     registerTransformTranslator(
-        GroupByKey.class,
-        new TransformTranslator<GroupByKey>() {
+        GroupByKeyOnly.class,
+        new TransformTranslator<GroupByKeyOnly>() {
           @Override
           public void translate(
-              GroupByKey transform,
+              GroupByKeyOnly transform,
               TranslationContext context) {
             groupByKeyHelper(transform, context);
           }
 
           private <K, V> void groupByKeyHelper(
-              GroupByKey<K, V> transform,
+              GroupByKeyOnly<K, V> transform,
               TranslationContext context) {
             context.addStep(transform, "GroupByKey");
             context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
             context.addOutput(PropertyNames.OUTPUT, context.getOutput(transform));
-
-            WindowingStrategy<?, ?> windowingStrategy =
-                context.getInput(transform).getWindowingStrategy();
-            boolean isStreaming =
-                context.getPipelineOptions().as(StreamingOptions.class).isStreaming();
-            boolean disallowCombinerLifting =
-                !(windowingStrategy.getWindowFn() instanceof NonMergingWindowFn)
-                || (isStreaming && !transform.fewKeys())
-                // TODO: Allow combiner lifting on the non-default trigger, as appropriate.
-                || !(windowingStrategy.getTrigger() instanceof DefaultTrigger);
-            context.addInput(
-                PropertyNames.DISALLOW_COMBINER_LIFTING, disallowCombinerLifting);
             context.addInput(
-                PropertyNames.SERIALIZED_FN,
-                byteArrayToJsonString(serializeToByteArray(windowingStrategy)));
+                PropertyNames.DISALLOW_COMBINER_LIFTING, transform.disallowCombinerLifting());
+            // TODO: sortsValues
           }
         });
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index de6fa4245820f..810b5743d8aa0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -18,16 +18,13 @@
 
 import static com.google.cloud.dataflow.sdk.util.Structs.getBytes;
 import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 
-import com.google.api.client.util.Preconditions;
 import com.google.api.services.dataflow.model.MultiOutputInfo;
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
-import com.google.cloud.dataflow.sdk.runners.worker.CombineValuesFn.CombinePhase;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
@@ -118,17 +115,12 @@ public static GroupAlsoByWindowsParDoFn create(
       isStreamingPipeline = ((StreamingOptions) options).isStreaming();
     }
 
-    KeyedCombineFn maybeMergingCombineFn = null;
-    if (combineFn != null) {
-      String phase = getString(cloudUserFn, PropertyNames.PHASE, CombinePhase.ALL);
-      Preconditions.checkArgument(
-          phase.equals(CombinePhase.ALL) || phase.equals(CombinePhase.MERGE),
-          "Unexpected phase: " + phase);
-      if (phase.equals(CombinePhase.MERGE)) {
-        maybeMergingCombineFn = new MergingKeyedCombineFn(combineFn);
-      } else {
-        maybeMergingCombineFn = combineFn;
-      }
+    boolean isMergingOnly = true;
+    KeyedCombineFn maybeMergingCombineFn;
+    if (isMergingOnly && combineFn != null) {
+      maybeMergingCombineFn = new MergingKeyedCombineFn(combineFn);
+    } else {
+      maybeMergingCombineFn = combineFn;
     }
 
     DoFnInfoFactory fnFactory;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 39f0d6fe1056c..a2eb239708037 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -25,6 +25,8 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
+import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
@@ -163,99 +165,14 @@ static <K, V> GroupByKey<K, V> create(boolean fewKeys) {
     return new GroupByKey<>(fewKeys);
   }
 
-  /**
-   * Returns whether it groups just few keys.
-   */
-  public boolean fewKeys() {
-    return fewKeys;
-  }
 
   /////////////////////////////////////////////////////////////////////////////
 
   @Override
   public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
-    // This operation groups by the combination of key and window,
-    // merging windows as needed, using the windows assigned to the
-    // key/value input elements and the window merge operation of the
-    // window function associated with the input PCollection.
-    WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
-    if (windowingStrategy.getWindowFn() instanceof InvalidWindows) {
-      String cause = ((InvalidWindows<?>) windowingStrategy.getWindowFn()).getCause();
-      throw new IllegalStateException(
-          "GroupByKey must have a valid Window merge function.  "
-          + "Invalid because: " + cause);
-    }
-    // By default, implement GroupByKey[AndWindow] via a series of lower-level
-    // operations.
-    return input
-        // Make each input element's timestamp and assigned windows
-        // explicit, in the value part.
-        .apply(new ReifyTimestampsAndWindows<K, V>())
-
-        // Group by just the key.
-        // Combiner lifting will not happen regardless of the disallowCombinerLifting value.
-        // There will be no combiners right after the GroupByKeyOnly because of the two ParDos
-        // introduced in here.
-        .apply(new GroupByKeyOnly<K, WindowedValue<V>>())
-
-        // Sort each key's values by timestamp. GroupAlsoByWindow requires
-        // its input to be sorted by timestamp.
-        .apply(new SortValuesByTimestamp<K, V>())
-
-        // Group each key's values by window, merging windows as needed.
-        .apply(new GroupAlsoByWindow<K, V>(windowingStrategy));
-  }
-
-  @Override
-  protected Coder<KV<K, Iterable<V>>> getDefaultOutputCoder(PCollection<KV<K, V>> input) {
-    return getOutputKvCoder(input.getCoder());
-  }
-
-  /**
-   * Returns the {@code Coder} of the input to this transform, which
-   * should be a {@code KvCoder}.
-   */
-  @SuppressWarnings("unchecked")
-  static <K, V> KvCoder<K, V> getInputKvCoder(Coder<KV<K, V>> inputCoder) {
-    if (!(inputCoder instanceof KvCoder)) {
-      throw new IllegalStateException(
-          "GroupByKey requires its input to use KvCoder");
-    }
-    return (KvCoder<K, V>) inputCoder;
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Returns the {@code Coder} of the keys of the input to this
-   * transform, which is also used as the {@code Coder} of the keys of
-   * the output of this transform.
-   */
-  static <K, V> Coder<K> getKeyCoder(Coder<KV<K, V>> inputCoder) {
-    return getInputKvCoder(inputCoder).getKeyCoder();
+    return applyHelper(input, false, false);
   }
 
-  /**
-   * Returns the {@code Coder} of the values of the input to this transform.
-   */
-  static <K, V> Coder<V> getInputValueCoder(Coder<KV<K, V>> inputCoder) {
-    return getInputKvCoder(inputCoder).getValueCoder();
-  }
-
-  /**
-   * Returns the {@code Coder} of the {@code Iterable} values of the
-   * output of this transform.
-   */
-  static <K, V> Coder<Iterable<V>> getOutputValueCoder(Coder<KV<K, V>> inputCoder) {
-    return IterableCoder.of(getInputValueCoder(inputCoder));
-  }
-
-  /**
-   * Returns the {@code Coder} of the output of this transform.
-   */
-  static <K, V> KvCoder<K, Iterable<V>> getOutputKvCoder(Coder<KV<K, V>> inputCoder) {
-    return KvCoder.of(getKeyCoder(inputCoder), getOutputValueCoder(inputCoder));
-  }
 
   /////////////////////////////////////////////////////////////////////////////
 
@@ -380,6 +297,12 @@ public PCollection<KV<K, Iterable<V>>> apply(
   public static class GroupByKeyOnly<K, V>
       extends PTransform<PCollection<KV<K, V>>,
                          PCollection<KV<K, Iterable<V>>>> {
+    final boolean disallowCombinerLifting;
+
+    public GroupByKeyOnly(boolean disallowCombinerLifting) {
+      this.disallowCombinerLifting = disallowCombinerLifting;
+    }
+
     @Override
     public void validate(PCollection<KV<K, V>> input) {
       // Verify that the input Coder<KV<K, V>> is a KvCoder<K, V>, and that
@@ -423,9 +346,47 @@ KvCoder<K, V> getInputKvCoder(Coder<KV<K, V>> inputCoder) {
       return (KvCoder<K, V>) inputCoder;
     }
 
+    /**
+     * Returns the {@code Coder} of the keys of the input to this
+     * transform, which is also used as the {@code Coder} of the keys of
+     * the output of this transform.
+     */
+    Coder<K> getKeyCoder(Coder<KV<K, V>> inputCoder) {
+      return getInputKvCoder(inputCoder).getKeyCoder();
+    }
+
+    /**
+     * Returns the {@code Coder} of the values of the input to this transform.
+     */
+    Coder<V> getInputValueCoder(Coder<KV<K, V>> inputCoder) {
+      return getInputKvCoder(inputCoder).getValueCoder();
+    }
+
+    /**
+     * Returns the {@code Coder} of the {@code Iterable} values of the
+     * output of this transform.
+     */
+    Coder<Iterable<V>> getOutputValueCoder(Coder<KV<K, V>> inputCoder) {
+      return IterableCoder.of(getInputValueCoder(inputCoder));
+    }
+
+    /**
+     * Returns the {@code Coder} of the output of this transform.
+     */
+    KvCoder<K, Iterable<V>> getOutputKvCoder(Coder<KV<K, V>> inputCoder) {
+      return KvCoder.of(getKeyCoder(inputCoder), getOutputValueCoder(inputCoder));
+    }
+
     @Override
     protected Coder<KV<K, Iterable<V>>> getDefaultOutputCoder(PCollection<KV<K, V>> input) {
-      return GroupByKey.getOutputKvCoder(input.getCoder());
+      return getOutputKvCoder(input.getCoder());
+    }
+
+    /**
+     * Returns whether this GBK allows lifting combiner through.
+     */
+    public boolean disallowCombinerLifting() {
+      return disallowCombinerLifting;
     }
   }
 
@@ -458,7 +419,7 @@ private static <K, V> void evaluateHelper(
     List<ValueWithMetadata<KV<K, V>>> inputElems =
         context.getPCollectionValuesWithMetadata(input);
 
-    Coder<K> keyCoder = GroupByKey.getKeyCoder(input.getCoder());
+    Coder<K> keyCoder = transform.getKeyCoder(input.getCoder());
 
     Map<GroupingKey<K>, List<V>> groupingMap = new HashMap<>();
 
@@ -501,6 +462,67 @@ private static <K, V> void evaluateHelper(
                                              outputElems);
   }
 
+  public PCollection<KV<K, Iterable<V>>> applyHelper(
+      PCollection<KV<K, V>> input, boolean isStreaming, boolean runnerSortsByTimestamp) {
+    Coder<KV<K, V>> inputCoder = input.getCoder();
+    if (!(inputCoder instanceof KvCoder)) {
+      throw new IllegalStateException(
+          "GroupByKey requires its input to use KvCoder");
+    }
+    // This operation groups by the combination of key and window,
+    // merging windows as needed, using the windows assigned to the
+    // key/value input elements and the window merge operation of the
+    // window function associated with the input PCollection.
+    WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
+    if (windowingStrategy.getWindowFn() instanceof InvalidWindows) {
+      String cause = ((InvalidWindows<?>) windowingStrategy.getWindowFn()).getCause();
+      throw new IllegalStateException(
+          "GroupByKey must have a valid Window merge function.  "
+          + "Invalid because: " + cause);
+    }
+    boolean disallowCombinerLifting =
+        !(windowingStrategy.getWindowFn() instanceof NonMergingWindowFn)
+        || (isStreaming && !fewKeys)
+        // TODO: Allow combiner lifting on the non-default trigger, as appropriate.
+        || !(windowingStrategy.getTrigger() instanceof DefaultTrigger);
+
+    if (windowingStrategy.getWindowFn().isCompatible(new GlobalWindows())
+        && windowingStrategy.getTrigger() instanceof DefaultTrigger) {
+      // The input PCollection is using the degenerate default
+      // window function, which uses a single global window for all
+      // elements.  We can implement this using a more-primitive
+      // non-window-aware GBK transform.
+      return input.apply(new GroupByKeyOnly<K, V>(disallowCombinerLifting));
+
+    } else if (isStreaming) {
+      // If using the streaming runner, the service will do the insertion of
+      // the GroupAlsoByWindow step.
+      // TODO: Remove this case once the Dataflow Runner handles GBK directly
+      return input.apply(new GroupByKeyOnly<K, V>(disallowCombinerLifting));
+
+    } else {
+      // By default, implement GroupByKey[AndWindow] via a series of lower-level
+      // operations.
+      PCollection<KV<K, Iterable<WindowedValue<V>>>> gbkOutput = input
+          // Make each input element's timestamp and assigned windows
+          // explicit, in the value part.
+          .apply(new ReifyTimestampsAndWindows<K, V>())
+
+          // Group by just the key.
+          .apply(new GroupByKeyOnly<K, WindowedValue<V>>(disallowCombinerLifting));
+
+      if (!runnerSortsByTimestamp) {
+        // Sort each key's values by timestamp. GroupAlsoByWindow requires
+        // its input to be sorted by timestamp.
+        gbkOutput = gbkOutput.apply(new SortValuesByTimestamp<K, V>());
+      }
+
+      return gbkOutput
+          // Group each key's values by window, merging windows as needed.
+          .apply(new GroupAlsoByWindow<K, V>(windowingStrategy));
+    }
+  }
+
   private static class GroupingKey<K> {
     private K key;
     private byte[] encodedKey;

From 07325aff245303ce332d696472a0ed25caf8a4a3 Mon Sep 17 00:00:00 2001
From: mariand <mariand@google.com>
Date: Wed, 8 Apr 2015 08:37:55 -0700
Subject: [PATCH 0405/1541] Added <p> tags to javadocs paragraphs.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90613025
---
 .../com/google/cloud/dataflow/examples/DeDupExample.java    | 2 +-
 .../google/cloud/dataflow/examples/MaxPerKeyExamples.java   | 2 +-
 .../google/cloud/dataflow/sdk/coders/ByteArrayCoder.java    | 2 +-
 .../main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java  | 2 +-
 .../java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/io/FileBasedSource.java   | 4 ++--
 .../java/com/google/cloud/dataflow/sdk/io/PubsubIO.java     | 6 +++---
 .../dataflow/sdk/runners/DataflowPipelineRunnerHooks.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/Combine.java   | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/DoFn.java | 2 +-
 .../google/cloud/dataflow/sdk/transforms/DoFnTester.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/Mean.java | 2 +-
 .../google/cloud/dataflow/sdk/transforms/PTransform.java    | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/ParDo.java     | 4 ++--
 .../cloud/dataflow/sdk/transforms/windowing/Trigger.java    | 2 +-
 .../google/cloud/dataflow/sdk/util/ApiErrorExtractor.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/util/Credentials.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/util/StringUtils.java     | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/VarInt.java     | 2 +-
 .../dataflow/sdk/util/common/TaggedReiteratorList.java      | 2 +-
 20 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java
index 42eb0554da049..61390137b0e33 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java
@@ -31,7 +31,7 @@
  * This example uses as input Shakespeare's plays as plaintext files, and will remove any
  * duplicate lines across all the files. (The output does not preserve any input order).
  *
- * Concepts: the RemoveDuplicates transform, and how to wire transforms together.
+ * <p> Concepts: the RemoveDuplicates transform, and how to wire transforms together.
  * Demonstrates TextIO.Read/RemoveDuplicates/TextIO.Write.
  *
  * <p> To execute this pipeline locally, specify general pipeline configuration:
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java
index 80851defd6d6f..311ebb31db253 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java
@@ -40,7 +40,7 @@
  * An example that reads the public samples of weather data from BigQuery, and finds
  * the maximum temperature ('mean_temp') for each month.
  *
- * Concepts: The 'Max' statistical combination function, and how to find the max per
+ * <p> Concepts: The 'Max' statistical combination function, and how to find the max per
  * key group.
  *
  * <p> Note: Before running this example, you must create a BigQuery dataset to contain your output
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
index c0d3eb1c74bea..94e89ff5d8ca8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
@@ -29,7 +29,7 @@
 /**
  * A ByteArrayCoder encodes byte[] objects.
  *
- * If in a nested context, prefixes the encoded array with a VarInt encoding
+ * <p> If in a nested context, prefixes the encoded array with a VarInt encoding
  * of the length.
  */
 @SuppressWarnings("serial")
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index d64945ac8cd46..4f66320e02035 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -384,7 +384,7 @@ public static Bound<GenericRecord> withNumShards(int numShards) {
      * Returns an AvroIO.Write PTransform that uses the given shard name
      * template.
      *
-     * See {@link ShardNameTemplate} for a description of shard templates.
+     * <p> See {@link ShardNameTemplate} for a description of shard templates.
      */
     public static Bound<GenericRecord> withShardNameTemplate(String shardTemplate) {
       return new Bound<>(GenericRecord.class).withShardNameTemplate(shardTemplate);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 6a3a32df23b3c..7cbfcc820b09a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -107,7 +107,7 @@
  * p.run();
  * } </pre>
  *
- * or:
+ * <p> or:
  *
  * <pre> {@code
  * // Read a query from Datastore
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index 39d1c68c5d7b9..22c1ef9e8f9a6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -363,8 +363,8 @@ private static Collection<String> expandFilePattern(String fileOrPatternSpec) th
    *
    * <h2>Thread Safety</h2>
    *
-   * Since this class implements {@link Source.Reader} it guarantees thread safety. Abstract methods
-   * defined here will not be accessed by more than one thread concurrently.
+   * <p> Since this class implements {@link Source.Reader} it guarantees thread safety. Abstract
+   * methods defined here will not be accessed by more than one thread concurrently.
    */
   public abstract static class FileBasedReader<T> extends ByteOffsetBasedReader<T> {
     private ReadableByteChannel channel = null;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 0b310de6ed30f..421f6750515f8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -155,9 +155,9 @@ public static Bound<String> named(String name) {
      * <li>Cannot begin with 'goog' prefix.</li>
      * </ul>
      *
-     * Dataflow will start reading data published on this topic from the time the pipeline is
-     * started. Any data published on the topic before the pipeline is started will not be read
-     * by Dataflow.
+     * <p> Dataflow will start reading data published on this topic from the time the pipeline is
+     * started. Any data published on the topic before the pipeline is started will not be read by
+     * Dataflow.
      */
     public static Bound<String> topic(String topic) {
       return new Bound<>(DEFAULT_PUBSUB_CODER).topic(topic);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
index 2e9a3c4a2686e..0e37466692901 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
@@ -23,7 +23,7 @@
  * DataflowPipeline runner to add user defined hooks to be
  * invoked at various times during pipeline execution.
  *
- * Important: DataflowPipelineRunnerHooks is experimental. Please consult with
+ * <p> Important: DataflowPipelineRunnerHooks is experimental. Please consult with
  * the Dataflow team before using it. You should expect this class to change significantly
  * in future versions of the SDK or be removed entirely.
  *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index a304d4d6765cb..987947a5e6fd5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -920,7 +920,7 @@ public final VO extractOutput(VA accumulator) {
    *
    * </ol>
    *
-   * All of these operations are passed the {@code K} key that the
+   * <p> All of these operations are passed the {@code K} key that the
    * values being combined are associated with.
    *
    * <p> For example:
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 1c07c5b4ed440..9c4b96706510e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -266,7 +266,7 @@ public abstract class ProcessContext extends Context {
    * duration that timestamps can be shifted backward in
    * {@link DoFn.Context#outputWithTimestamp}.
    *
-   * The default value is {@code Duration.ZERO}, in which case
+   * <p> The default value is {@code Duration.ZERO}, in which case
    * timestamps can only be shifted forward to future.  For infinite
    * skew, return {@code Duration.millis(Long.MAX_VALUE)}.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index 76d8ad1e085ce..7e7df07a33544 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -214,7 +214,7 @@ public void finishBundle() {
    * @see #takeOutputElements
    * @see #clearOutputElements
    *
-   * TODO: provide accessors that take and return {@code WindowedValue}s
+   * <p> TODO: provide accessors that take and return {@code WindowedValue}s
    * in order to test timestamp- and window-sensitive DoFns.
    */
   public List<O> peekOutputElements() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
index 17a67acdcb6d7..4e3e6c0b28a56 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
@@ -72,7 +72,7 @@ public static <N extends Number> Combine.Globally<N, Double> globally() {
    * {@code PCollection} to the mean of the values associated with
    * that key in the input {@code PCollection}.
    *
-   * See {@link Combine.PerKey} for how this affects timestamps and bucketing.
+   * <p> See {@link Combine.PerKey} for how this affects timestamps and bucketing.
    *
    * @param <K> the type of the keys
    * @param <N> the type of the {@code Number}s being combined
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
index 3cbfdf1e28d17..5eee1d27ec0d4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
@@ -139,7 +139,7 @@
  *
  * <h3>Note on Serialization</h3>
  *
- * {@code PTransform} doesn't actually support serialization, despite
+ * <p> {@code PTransform} doesn't actually support serialization, despite
  * implementing {@code Serializable}.
  *
  * <p> {@code PTransform} is marked {@code Serializable} solely
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index fc67795f01cfe..2507b68f82d88 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -218,7 +218,7 @@
  *
  * <h2>Properties May Be Specified In Any Order</h2>
  *
- * Several properties can be specified for a {@code ParDo}
+ * <p> Several properties can be specified for a {@code ParDo}
  * {@code PTransform}, including name, side inputs, side output tags,
  * and {@code DoFn} to invoke.  Only the {@code DoFn} is required; the
  * name is encouraged but not required, and side inputs and side
@@ -307,7 +307,7 @@
  *
  * </ul>
  *
- * Both these approaches ensure that there is no implicit enclosing
+ * <p> Both these approaches ensure that there is no implicit enclosing
  * class instance serialized along with the {@code DoFn} instance.
  *
  * <p> Prior to Java 8, any local variables of the enclosing
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index b0b874b1f5ac3..58c8883968a4c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -455,7 +455,7 @@ public Trigger<W> orFinally(OnceTrigger<W> until) {
    * Triggers that are guaranteed to fire at most once should extend from this, rather than the
    * general {@link Trigger} class to indicate that behavior.
    *
-   * TODO: Add checks that an AtMostOnceTrigger never returns TriggerResult.FIRE.
+   * <p> TODO: Add checks that an AtMostOnceTrigger never returns TriggerResult.FIRE.
    *
    * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
    *            {@code AtMostOnceTrigger}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiErrorExtractor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiErrorExtractor.java
index 46008820a0795..b58d43f6c3b5c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiErrorExtractor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiErrorExtractor.java
@@ -94,7 +94,7 @@ public boolean accessDenied(IOException e) {
   /**
    * Returns HTTP status code from the given exception.
    *
-   * Note: GoogleJsonResponseException.getStatusCode() method is marked final therefore
+   * <p> Note: GoogleJsonResponseException.getStatusCode() method is marked final therefore
    * it cannot be mocked using Mockito. We use this helper so that we can override it in tests.
    */
   @VisibleForTesting
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
index edf5b03e0aecd..cf1d5a61d71fe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
@@ -75,7 +75,7 @@ public String getRedirectUri() {
   /**
    * Initializes OAuth2 credentials.
    *
-   * This can use 4 different mechanisms for obtaining a credential:
+   * <p> This can use 4 different mechanisms for obtaining a credential:
    * <ol>
    *   <li>
    *     It can fetch the
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
index 650a3387d6173..4ccc08b99394e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
@@ -31,7 +31,7 @@ public class StringUtils {
   /**
    * Converts the given array of bytes into a legal JSON string.
    *
-   * Uses a simple strategy of converting each byte to a single char,
+   * <p> Uses a simple strategy of converting each byte to a single char,
    * except for non-printable chars, non-ASCII chars, and '%', '\',
    * and '"', which are encoded as three chars in '%xx' format, where
    * 'xx' is the hexadecimal encoding of the byte.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java
index 7cdb92eefbac4..08651e5dcc370 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java
@@ -24,7 +24,7 @@
 /**
  * Variable-length encoding for integers.
  *
- * Handles, in a common encoding format, signed bytes, shorts, ints, and longs.
+ * <p> Handles, in a common encoding format, signed bytes, shorts, ints, and longs.
  * Takes between 1 and 10 bytes.
  * Less efficient than BigEndian{Int,Long} coder for negative or large numbers.
  * All negative ints are encoded using 5 bytes, longs take 10 bytes.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java
index c11f5098306bb..f86bd94889d27 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java
@@ -25,7 +25,7 @@
  * Provides a view a of re-iterable of tagged values, with monotonically
  * increasing tags, as a list of tagged re-iterables.
  *
- * This class, and the returned iterators, are not threadsafe.
+ * <p> This class, and the returned iterators, are not threadsafe.
  */
 public class TaggedReiteratorList extends AbstractList<Reiterator<Object>> {
 

From 77f7c2311928e20342017310a827b5749c3d4ad2 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Wed, 8 Apr 2015 13:07:21 -0700
Subject: [PATCH 0406/1541] FIX a bug in PipelineOptions Serializer to support
 serializing/deserializing a PipelineOptions multiple times, which is required
 by cloneAs() use cases.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90638776
---
 .../sdk/options/ProxyInvocationHandler.java   |  8 ++-
 .../sdk/options/PipelineOptionsTest.java      | 71 +++++++++++++++----
 2 files changed, 65 insertions(+), 14 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
index e72a967f7dd35..bca1914186b8d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
@@ -53,6 +53,7 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Set;
 import java.util.SortedMap;
 import java.util.TreeMap;
@@ -167,11 +168,16 @@ synchronized <T extends PipelineOptions> T as(Class<T> iface) {
    * @return A copy of the PipelineOptions.
    */
   synchronized <T extends PipelineOptions> T cloneAs(Object proxy, Class<T> iface) {
+    PipelineOptions clonedOptions;
     try {
-      return MAPPER.readValue(MAPPER.writeValueAsBytes(proxy), PipelineOptions.class).as(iface);
+      clonedOptions = MAPPER.readValue(MAPPER.writeValueAsBytes(proxy), PipelineOptions.class);
     } catch (IOException e) {
       throw new IllegalStateException("Failed to serialize the pipeline options to JSON.", e);
     }
+    for (Class<? extends PipelineOptions> knownIface : knownInterfaces) {
+      clonedOptions.as(knownIface);
+    }
+    return clonedOptions.as(iface);
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java
index 06acc046e0f41..f821e4656a619 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java
@@ -16,33 +16,56 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
+import static org.hamcrest.CoreMatchers.containsString;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNotSame;
 import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 
 import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.databind.ObjectMapper;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.IOException;
 import java.util.List;
 import java.util.Set;
 
 /** Unit tests for {@link PipelineOptions}. */
 @RunWith(JUnit4.class)
 public class PipelineOptionsTest {
-  /** Interface used for testing that {@link PipelineOptions#as(Class)} functions. */
-  public static interface TestOptions extends PipelineOptions {
-    List<Boolean> getTestValue();
-    void setTestValue(List<Boolean> testValue);
+  /** Interfaces used for testing that {@link PipelineOptions#as(Class)} functions. */
+  private static interface DerivedTestOptions extends BaseTestOptions {
+    int getDerivedValue();
+    void setDerivedValue(int derivedValue);
+
+    @JsonIgnore
+    Set<String> getIgnoredValue();
+    void setIgnoredValue(Set<String> ignoredValue);
+  }
+
+  private static interface ConflictedTestOptions extends BaseTestOptions {
+    String getDerivedValue();
+    void setDerivedValue(String derivedValue);
+
+    @JsonIgnore
+    Set<String> getIgnoredValue();
+    void setIgnoredValue(Set<String> ignoredValue);
+  }
+
+  private static interface BaseTestOptions extends PipelineOptions {
+    List<Boolean> getBaseValue();
+    void setBaseValue(List<Boolean> baseValue);
 
     @JsonIgnore
     Set<String> getIgnoredValue();
@@ -51,7 +74,7 @@ public static interface TestOptions extends PipelineOptions {
 
   @Test
   public void testDynamicAs() {
-    TestOptions options = PipelineOptionsFactory.create().as(TestOptions.class);
+    BaseTestOptions options = PipelineOptionsFactory.create().as(BaseTestOptions.class);
     assertNotNull(options);
   }
 
@@ -61,20 +84,42 @@ public void testDefaultRunnerIsSet() {
   }
 
   @Test
-  public void testCloneAs() {
-    TestOptions options = PipelineOptionsFactory.create().as(TestOptions.class);
-    options.setTestValue(Lists.<Boolean>newArrayList());
+  public void testCloneAs() throws IOException {
+    DerivedTestOptions options = PipelineOptionsFactory.create().as(DerivedTestOptions.class);
+    options.setBaseValue(Lists.<Boolean>newArrayList());
     options.setIgnoredValue(Sets.<String>newHashSet());
     options.getIgnoredValue().add("ignoredString");
+    options.setDerivedValue(0);
 
-    TestOptions clonedOptions = options.cloneAs(TestOptions.class);
+    BaseTestOptions clonedOptions = options.cloneAs(BaseTestOptions.class);
     assertNotSame(clonedOptions, options);
-    assertNotSame(clonedOptions.getTestValue(), options.getTestValue());
+    assertNotSame(clonedOptions.getBaseValue(), options.getBaseValue());
 
-    clonedOptions.getTestValue().add(true);
-    assertFalse(clonedOptions.getTestValue().isEmpty());
-    assertTrue(options.getTestValue().isEmpty());
+    clonedOptions.getBaseValue().add(true);
+    assertFalse(clonedOptions.getBaseValue().isEmpty());
+    assertTrue(options.getBaseValue().isEmpty());
 
     assertNull(clonedOptions.getIgnoredValue());
+
+    ObjectMapper mapper = new ObjectMapper();
+    mapper.readValue(mapper.writeValueAsBytes(clonedOptions), PipelineOptions.class);
+  }
+
+  @Test
+  public void testCloneAsConflicted() throws IOException {
+    DerivedTestOptions options = PipelineOptionsFactory.create().as(DerivedTestOptions.class);
+    options.setBaseValue(Lists.<Boolean>newArrayList());
+    options.setIgnoredValue(Sets.<String>newHashSet());
+    options.getIgnoredValue().add("ignoredString");
+    options.setDerivedValue(0);
+
+    try {
+      options.cloneAs(ConflictedTestOptions.class);
+      fail("should have failed");
+    } catch (Exception e) {
+      // Expected
+      assertThat(e.toString(), containsString("incompatible return types"));
+    }
   }
 }
+

From 3e0fe0669c9a5b80369d660163706533208a2324 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Wed, 8 Apr 2015 14:05:52 -0700
Subject: [PATCH 0407/1541] Created a basic DataflowExampleUtils class to
 handle pubsub, bigquery setups and workflow cancellations, and updated
 TrafficMaxLaneFlow and TrafficRoutes examples.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90645144
---
 .../dataflow/examples/PubsubFileInjector.java |   4 +-
 ...xLaneFlow.java => TrafficMaxLaneFlow.java} | 252 +++++++-------
 ...treamingRoutes.java => TrafficRoutes.java} | 234 ++++++++-----
 .../common/DataflowExampleOptions.java        |  29 ++
 .../examples/common/DataflowExampleUtils.java | 308 ++++++++++++++++++
 .../common/ExampleBigQueryTableOptions.java   |  53 +++
 .../common/ExamplePubsubTopicOptions.java     |  49 +++
 .../sdk/runners/DataflowPipelineJob.java      |  18 +-
 8 files changed, 726 insertions(+), 221 deletions(-)
 rename examples/src/main/java/com/google/cloud/dataflow/examples/{TrafficStreamingMaxLaneFlow.java => TrafficMaxLaneFlow.java} (58%)
 rename examples/src/main/java/com/google/cloud/dataflow/examples/{TrafficStreamingRoutes.java => TrafficRoutes.java} (58%)
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/common/ExampleBigQueryTableOptions.java
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java b/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
index 537948834551a..6557a21684d19 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
@@ -45,13 +45,13 @@
 public class PubsubFileInjector {
 
   /** A DoFn that publishes lines to Google Cloud PubSub. */
-  static class Publish extends DoFn<String, Void> {
+  public static class Publish extends DoFn<String, Void> {
     private static final long serialVersionUID = 0;
 
     private String outputTopic;
     public transient Pubsub pubsub;
 
-    Publish(String outputTopic) {
+    public Publish(String outputTopic) {
       this.outputTopic = outputTopic;
     }
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingMaxLaneFlow.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java
similarity index 58%
rename from examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingMaxLaneFlow.java
rename to examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java
index afd27f03907da..4747103995952 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingMaxLaneFlow.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java
@@ -20,17 +20,21 @@
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
+import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
+import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.Default;
 import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -43,41 +47,47 @@
 
 import org.apache.avro.reflect.Nullable;
 import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
 /**
- * A streaming Dataflow Example using BigQuery output, in the 'traffic sensor' domain.
+ * A Dataflow Example that runs in both batch and streaming modes with traffic sensor data.
+ * You can configure the running mode by setting {@literal --streaming} to true or false.
  *
- * <p>Concepts: The streaming runner, sliding windows, PubSub topic ingestion, use of the AvroCoder
- * to encode a custom class, and custom Combine transforms.
+ * <p>Concepts: The batch and streaming runners, sliding windows, Google Cloud Pub/Sub
+ * topic injection, use of the AvroCoder to encode a custom class, and custom Combine transforms.
  *
- * <p> This pipeline takes as input traffic sensor data from a PubSub topic, and analyzes it using
- * SlidingWindows. For each window, it finds the lane that had the highest flow recorded, for each
- * sensor station. It writes those max values along with auxiliary info to a BigQuery table.
+ * <p> This example analyzes traffic sensor data using SlidingWindows. For each window,
+ * it finds the lane that had the highest flow recorded, for each sensor station. It writes
+ * those max values along with auxiliary info to a BigQuery table.
  *
- * <p> This pipeline expects input from
- * <a href="https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher">
- * this script</a>, which publishes traffic sensor data to a PubSub topic.
- * After you've started this pipeline, start
- * up the input generation script as per its instructions. The default SlidingWindow parameters
- * assume that you're running this script with the {@literal --replay} flag, which simulates pauses
- * in the sensor data publication.
+ * <p> In batch mode, the pipeline reads traffic sensor data from {@literal --inputFile}.
  *
- * <p> To run this example using the Dataflow service, you must provide an input
- * PubSub topic and an output BigQuery table, using the {@literal --inputTopic},
- * {@literal --dataset}, and {@literal --table} options. Since this is a streaming
- * pipeline that never completes, select the non-blocking pipeline runner by specifying
- * {@literal --runner=DataflowPipelineRunner}.
+ * <p> In streaming mode, the pipeline reads the data from a Pub/Sub topic.
+ * By default, the example will run a separate pipeline to inject the data from the default
+ * {@literal --inputFile} to the Pub/Sub {@literal --inputTopic}. It will make it available for
+ * the streaming pipeline to process. You may override the default {@literal --inputFile} with the
+ * file of your choosing. You may also set {@literal --inputTopic} to an empty string, which will
+ * disable the automatic Pub/Sub injection, and allow you to use separate tool to control the input
+ * to this example. An example code, which publishes traffic sensor data to a Pub/Sub topic,
+ * is provided in
+ * <a href="https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher"></a>.
  *
- * <p> When you are done running the example, cancel your pipeline so that you do not continue to
- * be charged for its instances. You can do this by visiting
- * https://console.developers.google.com/project/your-project-name/dataflow/job-id
- * in the Developers Console. You should also terminate the generator script so that you do not
- * use unnecessary PubSub quota.
+ * <p> The example is configured to use the default Pub/Sub topic and the default BigQuery table
+ * from the example common package (there is no defaults for a general Dataflow pipeline).
+ * You can override them by using the {@literal --inputTopic}, {@literal --bigQueryDataset}, and
+ * {@literal --bigQueryTable} options. If the Pub/Sub topic or the BigQuery table do not exist,
+ * the example will try to create them.
+ *
+ * <p> The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
+ * and then exits.
  */
-public class TrafficStreamingMaxLaneFlow {
+public class TrafficMaxLaneFlow {
 
   static final int WINDOW_DURATION = 60;  // Default sliding window duration in minutes
   static final int WINDOW_SLIDE_EVERY = 5;  // Default window 'slide every' setting in minutes
@@ -152,74 +162,48 @@ public Integer getTotalFlow() {
    */
   static class ExtractFlowInfoFn extends DoFn<String, KV<String, LaneInfo>> {
     private static final long serialVersionUID = 0;
+    private static final DateTimeFormatter dateTimeFormat =
+        DateTimeFormat.forPattern("MM/dd/yyyy HH:mm:ss");
+
+    private final boolean outputTimestamp;
+
+    public ExtractFlowInfoFn(boolean outputTimestamp) {
+      this.outputTimestamp = outputTimestamp;
+    }
 
     @Override
     public void processElement(ProcessContext c) {
       String[] items = c.element().split(",");
+      if (items.length < 48) {
+        // Skip the invalid input.
+        return;
+      }
       // extract the sensor information for the lanes from the input string fields.
       String timestamp = items[0];
       String stationId = items[1];
       String freeway = items[2];
       String direction = items[3];
       Integer totalFlow = tryIntParse(items[7]);
-      // lane 1
-      Integer lane1Flow = tryIntParse(items[11]);
-      Double lane1AO = tryDoubleParse(items[12]);
-      Double lane1AS = tryDoubleParse(items[13]);
-      // lane2
-      Integer lane2Flow = tryIntParse(items[16]);
-      Double lane2AO = tryDoubleParse(items[17]);
-      Double lane2AS = tryDoubleParse(items[18]);
-      // lane3
-      Integer lane3Flow = tryIntParse(items[21]);
-      Double lane3AO = tryDoubleParse(items[22]);
-      Double lane3AS = tryDoubleParse(items[23]);
-      // lane4
-      Integer lane4Flow = tryIntParse(items[26]);
-      Double lane4AO = tryDoubleParse(items[27]);
-      Double lane4AS = tryDoubleParse(items[28]);
-      // lane5
-      Integer lane5Flow = tryIntParse(items[31]);
-      Double lane5AO = tryDoubleParse(items[32]);
-      Double lane5AS = tryDoubleParse(items[33]);
-      // lane6
-      Integer lane6Flow = tryIntParse(items[36]);
-      Double lane6AO = tryDoubleParse(items[37]);
-      Double lane6AS = tryDoubleParse(items[38]);
-      // lane7
-      Integer lane7Flow = tryIntParse(items[41]);
-      Double lane7AO = tryDoubleParse(items[42]);
-      Double lane7AS = tryDoubleParse(items[43]);
-      // lane8
-      Integer lane8Flow = tryIntParse(items[46]);
-      Double lane8AO = tryDoubleParse(items[47]);
-      Double lane8AS = tryDoubleParse(items[48]);
-
-      // For each lane in the reading, output LaneInfo keyed to its station.
-      LaneInfo laneInfo1 = new LaneInfo(stationId, "lane1", direction, freeway, timestamp,
-          lane1Flow, lane1AO, lane1AS, totalFlow);
-      c.output(KV.of(stationId, laneInfo1));
-      LaneInfo laneInfo2 = new LaneInfo(stationId, "lane2", direction, freeway, timestamp,
-          lane2Flow, lane2AO, lane2AS, totalFlow);
-      c.output(KV.of(stationId, laneInfo2));
-      LaneInfo laneInfo3 = new LaneInfo(stationId, "lane3", direction, freeway, timestamp,
-          lane3Flow, lane3AO, lane3AS, totalFlow);
-      c.output(KV.of(stationId, laneInfo3));
-      LaneInfo laneInfo4 = new LaneInfo(stationId, "lane4", direction, freeway, timestamp,
-          lane4Flow, lane4AO, lane4AS, totalFlow);
-      c.output(KV.of(stationId, laneInfo4));
-      LaneInfo laneInfo5 = new LaneInfo(stationId, "lane5", direction, freeway, timestamp,
-          lane5Flow, lane5AO, lane5AS, totalFlow);
-      c.output(KV.of(stationId, laneInfo5));
-      LaneInfo laneInfo6 = new LaneInfo(stationId, "lane6", direction, freeway, timestamp,
-          lane6Flow, lane6AO, lane6AS, totalFlow);
-      c.output(KV.of(stationId, laneInfo6));
-      LaneInfo laneInfo7 = new LaneInfo(stationId, "lane7", direction, freeway, timestamp,
-          lane7Flow, lane7AO, lane7AS, totalFlow);
-      c.output(KV.of(stationId, laneInfo7));
-      LaneInfo laneInfo8 = new LaneInfo(stationId, "lane8", direction, freeway, timestamp,
-          lane8Flow, lane8AO, lane8AS, totalFlow);
-      c.output(KV.of(stationId, laneInfo8));
+      for (int i = 1; i <= 8; ++i) {
+        Integer laneFlow = tryIntParse(items[6 + 5 * i]);
+        Double laneAvgOccupancy = tryDoubleParse(items[7 + 5 * i]);
+        Double laneAvgSpeed = tryDoubleParse(items[8 + 5 * i]);
+        if (laneFlow == null || laneAvgOccupancy == null || laneAvgSpeed == null) {
+          return;
+        }
+        LaneInfo laneInfo = new LaneInfo(stationId, "lane" + i, direction, freeway, timestamp,
+            laneFlow, laneAvgOccupancy, laneAvgSpeed, totalFlow);
+        if (outputTimestamp) {
+          try {
+            c.outputWithTimestamp(KV.of(stationId, laneInfo),
+                                  new Instant(dateTimeFormat.parseMillis(timestamp)));
+          } catch (IllegalArgumentException e) {
+            // Skip the invalid input.
+          }
+        } else {
+          c.output(KV.of(stationId, laneInfo));
+        }
+      }
     }
   }
 
@@ -258,7 +242,7 @@ static class FormatMaxesFn extends DoFn<KV<String, LaneInfo>, TableRow> {
     @Override
     public void processElement(ProcessContext c) {
 
-      LaneInfo laneInfo = (LaneInfo) c.element().getValue();
+      LaneInfo laneInfo = c.element().getValue();
       TableRow row = new TableRow()
           .set("station_id", c.element().getKey())
           .set("direction", laneInfo.getDirection())
@@ -296,15 +280,11 @@ static TableSchema getSchema() {
    * the current Window) using a custom 'combiner', and formats the results for BigQuery.
    */
   static class MaxLaneFlow
-      extends PTransform<PCollection<String>, PCollection<TableRow>> {
+      extends PTransform<PCollection<KV<String, LaneInfo>>, PCollection<TableRow>> {
     private static final long serialVersionUID = 0;
 
     @Override
-    public PCollection<TableRow> apply(PCollection<String> rows) {
-      // row... => <stationId, LaneInfo> ...
-      PCollection<KV<String, LaneInfo>> flowInfo = rows.apply(
-          ParDo.of(new ExtractFlowInfoFn()));
-
+    public PCollection<TableRow> apply(PCollection<KV<String, LaneInfo>> flowInfo) {
       // stationId, LaneInfo => stationId + max lane flow info
       PCollection<KV<String, LaneInfo>> flowMaxes =
           flowInfo.apply(Combine.<String, LaneInfo>perKey(
@@ -319,25 +299,17 @@ public PCollection<TableRow> apply(PCollection<String> rows) {
   }
 
   /**
-    * Options supported by {@link TrafficStreamingMaxLaneFlow}.
+    * Options supported by {@link TrafficMaxLaneFlow}.
     * <p>
     * Inherits standard configuration options.
     */
-  private interface TrafficStreamingMaxLaneFlowOptions extends PipelineOptions {
-    @Description("Input PubSub topic")
-    @Validation.Required
-    String getInputTopic();
-    void setInputTopic(String value);
-
-    @Description("BigQuery dataset name")
-    @Validation.Required
-    String getDataset();
-    void setDataset(String value);
-
-    @Description("BigQuery table name")
-    @Validation.Required
-    String getTable();
-    void setTable(String value);
+  private interface TrafficMaxLaneFlowOptions
+      extends DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
+        @Description("Input file to inject to Pub/Sub topic")
+    @Default.String("gs://dataflow-samples/traffic_sensor/"
+        + "Freeways-5Minaa2010-01-01_to_2010-02-15_test2.csv")
+    String getInputFile();
+    void setInputFile(String value);
 
     @Description("Numeric value of sliding window duration, in minutes")
     @Default.Integer(WINDOW_DURATION)
@@ -352,38 +324,60 @@ private interface TrafficStreamingMaxLaneFlowOptions extends PipelineOptions {
 
   /**
    * Sets up and starts streaming pipeline.
+   *
+   * @throws IOException if there is a problem setting up resources
    */
-  public static void main(String[] args) {
-    TrafficStreamingMaxLaneFlowOptions options = PipelineOptionsFactory.fromArgs(args)
+  public static void main(String[] args) throws IOException {
+    TrafficMaxLaneFlowOptions options = PipelineOptionsFactory.fromArgs(args)
         .withValidation()
-        .as(TrafficStreamingMaxLaneFlowOptions.class);
-    DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
-    dataflowOptions.setStreaming(true);
+        .as(TrafficMaxLaneFlowOptions.class);
+    if (options.isStreaming()) {
+      // In order to cancel the pipelines automatically,
+      // {@literal DataflowPipelineRunner} is forced to be used.
+      options.setRunner(DataflowPipelineRunner.class);
+    }
+    options.setBigQuerySchema(FormatMaxesFn.getSchema());
+    // Using DataflowExampleUtils to set up required resources.
+    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
+    dataflowUtils.setup();
 
     Pipeline pipeline = Pipeline.create(options);
     TableReference tableRef = new TableReference();
-    tableRef.setProjectId(dataflowOptions.getProject());
-    tableRef.setDatasetId(options.getDataset());
-    tableRef.setTableId(options.getTable());
-    pipeline
-        .apply(PubsubIO.Read.topic(options.getInputTopic()))
-        /* map the incoming data stream into sliding windows. The default window duration values
-           work well if you're running the accompanying PubSub generator script with the
-           --replay flag, which simulates pauses in the sensor data publication. You may want to
-           adjust them otherwise. */
-        .apply(Window.<String>into(SlidingWindows.of(
+    tableRef.setProjectId(options.getProject());
+    tableRef.setDatasetId(options.getBigQueryDataset());
+    tableRef.setTableId(options.getBigQueryTable());
+
+    PCollection<KV<String, LaneInfo>> input;
+    if (options.isStreaming()) {
+      input = pipeline
+          .apply(PubsubIO.Read.topic(options.getPubsubTopic()))
+          // row... => <stationId, LaneInfo> ...
+          .apply(ParDo.of(new ExtractFlowInfoFn(false /* outputTimestamp */)));
+    } else {
+      input = pipeline
+          .apply(TextIO.Read.from(options.getInputFile()))
+          // row... => <stationId, LaneInfo> ...
+          .apply(ParDo.of(new ExtractFlowInfoFn(true /* outputTimestamp */)));
+    }
+    // map the incoming data stream into sliding windows. The default window duration values
+    // work well if you're running the accompanying Pub/Sub generator script with the
+    // --replay flag, which simulates pauses in the sensor data publication. You may want to
+    // adjust them otherwise.
+    input.apply(Window.<KV<String, LaneInfo>>into(SlidingWindows.of(
             Duration.standardMinutes(options.getWindowDuration())).
             every(Duration.standardMinutes(options.getWindowSlideEvery()))))
         .apply(new MaxLaneFlow())
         .apply(BigQueryIO.Write.to(tableRef)
             .withSchema(FormatMaxesFn.getSchema()));
 
-    /* When you are done running the example, cancel your pipeline so that you do not continue to
-       be charged for its instances. You can do this by visiting
-       https://console.developers.google.com/project/your-project-name/dataflow/job-id
-       in the Developers Console. You should also terminate the generator script so that you do not
-       use unnecessary PubSub quota. */
-    pipeline.run();
+    PipelineResult result = pipeline.run();
+    if (options.isStreaming() && !options.getInputFile().isEmpty()) {
+      // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
+      dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
+    }
+
+    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
+    dataflowUtils.waitToFinish(result);
   }
 
   private static Integer tryIntParse(String number) {
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingRoutes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java
similarity index 58%
rename from examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingRoutes.java
rename to examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java
index 466fa1a1bf8ee..0038239a381f4 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingRoutes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java
@@ -20,18 +20,22 @@
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
+import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
+import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.Default;
 import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -45,6 +49,9 @@
 
 import org.apache.avro.reflect.Nullable;
 import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -52,40 +59,42 @@
 import java.util.List;
 import java.util.Map;
 
-
 /**
- * A streaming Dataflow Example using BigQuery output, in the 'traffic sensor' domain.
+ * A Dataflow Example that runs in both batch and streaming modes with traffic sensor data.
+ * You can configure the running mode by setting {@literal --streaming} to true or false.
+ *
+ * <p>Concepts: The batch and streaming runners, GroupByKey, keyed state, sliding windows, and
+ * Google Cloud Pub/Sub topic injection.
  *
- * <p>Concepts: The streaming runner, GroupByKey, keyed state, sliding windows, and
- * PubSub topic ingestion.
+ * <p> This example analyzes traffic sensor data using SlidingWindows. For each window,
+ * it calculates the average speed over the window for some small set of predefined 'routes',
+ * and looks for 'slowdowns' in those routes. It uses keyed state to track slowdown information
+ * across successive sliding windows. It writes its results to a BigQuery table.
  *
- * <p> This pipeline takes as input traffic sensor data from a PubSub topic, and analyzes it using
- * SlidingWindows. For each window, it calculates the average speed over the window for some small
- * set of predefined 'routes', and looks for 'slowdowns' in those routes. It uses keyed state to
- * track slowdown information across successive sliding windows. It writes its results to a
- * BigQuery table.
+ * <p> In batch mode, the pipeline reads traffic sensor data from {@literal --inputFile}.
  *
- * <p> This pipeline expects input from
- * <a href="https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher">
- * this script</a>, which publishes traffic sensor data to a PubSub topic.
- * After you've started this pipeline, start
- * up the input generation script as per its instructions. The default SlidingWindow parameters
- * assume that you're running this script without the {@literal --replay} flag, so that there are
- * no simulated pauses in the sensor data publication.
+ * <p> In streaming mode, the pipeline reads the data from a Pub/Sub topic.
+ * By default, the example will run a separate pipeline to inject the data from the default
+ * {@literal --inputFile} to the Pub/Sub {@literal --inputTopic}. It will make it available for
+ * the streaming pipeline to process. You may override the default {@literal --inputFile} with the
+ * file of your choosing. You may also set {@literal --inputTopic} to an empty string, which will
+ * disable the automatic Pub/Sub injection, and allow you to use separate tool to control the input
+ * to this example. An example code, which publishes traffic sensor data to a Pub/Sub topic,
+ * is provided in
+ * <a href="https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher"></a>.
  *
- * <p> To run this example using the Dataflow service, you must provide an input
- * PubSub topic and an output BigQuery table, using the {@literal --inputTopic},
- * {@literal --dataset}, and {@literal --table} options. Since this is a streaming
- * pipeline that never completes, select the non-blocking pipeline runner by specifying
- * {@literal --runner=DataflowPipelineRunner}.
+ * <p> The example is configured to use the default Pub/Sub topic and the default BigQuery table
+ * from the example common package (there is no defaults for a general Dataflow pipeline).
+ * You can override them by using the {@literal --inputTopic}, {@literal --bigQueryDataset}, and
+ * {@literal --bigQueryTable} options. If the Pub/Sub topic or the BigQuery table do not exist,
+ * the example will try to create them.
  *
- * <p> When you are done running the example, cancel your pipeline so that you do not continue to
- * be charged for its instances. You can do this by visiting
- * https://console.developers.google.com/project/your-project-name/dataflow/job-id
- * in the Developers Console. You should also terminate the generator script so that you do not
- * use unnecessary PubSub quota.
+ * <p> The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
+ * and then exits.
  */
-public class TrafficStreamingRoutes {
+
+public class TrafficRoutes {
+
   // Instantiate some small predefined San Diego routes to analyze
   static Map<String, String> sdStations = buildStationInfo();
   static final int WINDOW_DURATION = 3;  // Default sliding window duration in minutes
@@ -149,26 +158,42 @@ public Boolean getSlowdownEvent() {
    */
   static class ExtractStationSpeedFn extends DoFn<String, KV<String, StationSpeed>> {
     private static final long serialVersionUID = 0;
+    private static final DateTimeFormatter dateTimeFormat =
+        DateTimeFormat.forPattern("MM/dd/yyyy HH:mm:ss");
+
+    private final boolean outputTimestamp;
+
+    public ExtractStationSpeedFn(boolean outputTimestamp) {
+      this.outputTimestamp = outputTimestamp;
+    }
+
 
     @Override
     public void processElement(ProcessContext c) {
       String[] items = c.element().split(",");
-      String stationId = items[1];
-      String stationType = items[4];
-      Double avgSpeed = tryDoubleParse(items[9]);
+      String stationType = tryParseStationType(items);
       // For this analysis, use only 'main line' station types
-      if (stationType.equals("ML")) {
+      if (stationType != null && stationType.equals("ML")) {
+        Double avgSpeed = tryParseAvgSpeed(items);
+        String stationId = tryParseStationId(items);
         // For this simple example, filter out everything but some hardwired routes.
-        if (sdStations.containsKey(stationId)) {
+        if (avgSpeed != null && stationId != null && sdStations.containsKey(stationId)) {
           StationSpeed stationSpeed = new StationSpeed(stationId, avgSpeed);
           // The tuple key is the 'route' name stored in the 'sdStations' hash.
-          c.output(KV.of(sdStations.get(stationId), stationSpeed));
+          KV<String, StationSpeed> outputValue = KV.of(sdStations.get(stationId), stationSpeed);
+          if (outputTimestamp) {
+            String timestamp = tryParseTimestamp(items);
+            c.outputWithTimestamp(outputValue,
+                                  new Instant(dateTimeFormat.parseMillis(timestamp)));
+          } else {
+            c.output(outputValue);
+          }
         }
       }
     }
   }
 
-  /*
+  /**
    * For a given route, track average speed for the window. Calculate whether traffic is currently
    * slowing down, via a predefined threshold. Use keyed state to keep a count of the speed drops,
    * with at least 3 in a row constituting a 'slowdown'.
@@ -244,7 +269,9 @@ public void processElement(ProcessContext c) {
       c.output(row);
     }
 
-    /** Defines the BigQuery schema used for the output. */
+    /**
+     * Defines the BigQuery schema used for the output.
+     */
     static TableSchema getSchema() {
       List<TableFieldSchema> fields = new ArrayList<>();
       fields.add(new TableFieldSchema().setName("route").setType("STRING"));
@@ -261,18 +288,15 @@ static TableSchema getSchema() {
    * It groups the readings by 'route' and analyzes traffic slowdown for that route, using keyed
    * state to retain previous slowdown information. Then, it formats the results for BigQuery.
    */
-  static class TrackSpeed extends PTransform<PCollection<String>, PCollection<TableRow>> {
+  static class TrackSpeed extends
+      PTransform<PCollection<KV<String, StationSpeed>>, PCollection<TableRow>> {
     private static final long serialVersionUID = 0;
 
     @Override
-    public PCollection<TableRow> apply(PCollection<String> rows) {
-      // row... => <station route, station speed> ...
-      PCollection<KV<String, StationSpeed>> flowInfo = rows.apply(
-          ParDo.of(new ExtractStationSpeedFn()));
-
+    public PCollection<TableRow> apply(PCollection<KV<String, StationSpeed>> stationSpeed) {
       // Apply a GroupByKey transform to collect a list of all station
       // readings for a given route.
-      PCollection<KV<String, Iterable<StationSpeed>>> timeGroup = flowInfo.apply(
+      PCollection<KV<String, Iterable<StationSpeed>>> timeGroup = stationSpeed.apply(
         GroupByKey.<String, StationSpeed>create());
 
       // Analyze 'slowdown' over the route readings.
@@ -288,25 +312,17 @@ public PCollection<TableRow> apply(PCollection<String> rows) {
 
 
   /**
-  * Options supported by {@link TrafficStreamingRoutes}.
-  * <p>
-  * Inherits standard configuration options.
+  * Options supported by {@link TrafficRoutes}.
+  *
+  * <p> Inherits standard configuration options.
   */
-  private interface TrafficStreamingRoutesOptions extends PipelineOptions {
-    @Description("Input PubSub topic")
-    @Validation.Required
-    String getInputTopic();
-    void setInputTopic(String value);
-
-    @Description("BigQuery dataset name")
-    @Validation.Required
-    String getDataset();
-    void setDataset(String value);
-
-    @Description("BigQuery table name")
-    @Validation.Required
-    String getTable();
-    void setTable(String value);
+  private interface TrafficRoutesOptions
+      extends DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
+    @Description("Input file to inject to Pub/Sub topic")
+    @Default.String("gs://dataflow-samples/traffic_sensor/"
+        + "Freeways-5Minaa2010-01-01_to_2010-02-15_test2.csv")
+    String getInputFile();
+    void setInputFile(String value);
 
     @Description("Numeric value of sliding window duration, in minutes")
     @Default.Integer(WINDOW_DURATION)
@@ -321,49 +337,92 @@ private interface TrafficStreamingRoutesOptions extends PipelineOptions {
 
   /**
    * Sets up and starts streaming pipeline.
+   *
+   * @throws IOException if there is a problem setting up resources
    */
-  public static void main(String[] args) {
-    TrafficStreamingRoutesOptions options = PipelineOptionsFactory.fromArgs(args)
+  public static void main(String[] args) throws IOException {
+    TrafficRoutesOptions options = PipelineOptionsFactory.fromArgs(args)
         .withValidation()
-        .as(TrafficStreamingRoutesOptions.class);
-    DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
-    dataflowOptions.setStreaming(true);
+        .as(TrafficRoutesOptions.class);
+
+    if (options.isStreaming()) {
+      // In order to cancel the pipelines automatically,
+      // {@literal DataflowPipelineRunner} is forced to be used.
+      options.setRunner(DataflowPipelineRunner.class);
+    }
+    options.setBigQuerySchema(FormatStatsFn.getSchema());
+    // Using DataflowExampleUtils to set up required resources.
+    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
+    dataflowUtils.setup();
 
     Pipeline pipeline = Pipeline.create(options);
     TableReference tableRef = new TableReference();
-    tableRef.setProjectId(dataflowOptions.getProject());
-    tableRef.setDatasetId(options.getDataset());
-    tableRef.setTableId(options.getTable());
-    pipeline
-        .apply(PubsubIO.Read.topic(options.getInputTopic()))
-        /* map the incoming data stream into sliding windows.
-           The default window duration values work well if you're running the accompanying PubSub
-           generator script without the --replay flag, so that there are no simulated pauses in
-           the sensor data publication. You may want to adjust the values otherwise. */
-        .apply(Window.<String>into(SlidingWindows.of(
+    tableRef.setProjectId(options.getProject());
+    tableRef.setDatasetId(options.getBigQueryDataset());
+    tableRef.setTableId(options.getBigQueryTable());
+
+    PCollection<KV<String, StationSpeed>> input;
+    if (options.isStreaming()) {
+      input = pipeline
+          .apply(PubsubIO.Read.topic(options.getPubsubTopic()))
+          // row... => <station route, station speed> ...
+          .apply(ParDo.of(new ExtractStationSpeedFn(false /* outputTimestamp */)));
+    } else {
+      input = pipeline
+          .apply(TextIO.Read.from(options.getInputFile()))
+          .apply(ParDo.of(new ExtractStationSpeedFn(true /* outputTimestamp */)));
+    }
+
+    // map the incoming data stream into sliding windows.
+    // The default window duration values work well if you're running the accompanying Pub/Sub
+    // generator script without the --replay flag, so that there are no simulated pauses in
+    // the sensor data publication. You may want to adjust the values otherwise.
+    input.apply(Window.<KV<String, StationSpeed>>into(SlidingWindows.of(
             Duration.standardMinutes(options.getWindowDuration())).
             every(Duration.standardMinutes(options.getWindowSlideEvery()))))
         .apply(new TrackSpeed())
         .apply(BigQueryIO.Write.to(tableRef)
             .withSchema(FormatStatsFn.getSchema()));
 
-    /* When you are done running the example, cancel your pipeline so that you do not continue to
-       be charged for its instances. You can do this by visiting
-       https://console.developers.google.com/project/your-project-name/dataflow/job-id
-       in the Developers Console. You should also terminate the generator script so that you do not
-       use unnecessary PubSub quota. */
-    pipeline.run();
+    PipelineResult result = pipeline.run();
+    if (options.isStreaming() && !options.getInputFile().isEmpty()) {
+      // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
+      dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
+    }
+
+    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
+    dataflowUtils.waitToFinish(result);
   }
 
-  private static Double tryDoubleParse(String number) {
+  private static Double tryParseAvgSpeed(String[] inputItems) {
     try {
-      return Double.parseDouble(number);
+      return Double.parseDouble(tryParseString(inputItems, 9));
     } catch (NumberFormatException e) {
       return null;
+    } catch (NullPointerException e) {
+      return null;
     }
   }
 
-  /** Define some small hard-wired San Diego 'routes' to track based on sensor station ID. */
+  private static String tryParseStationType(String[] inputItems) {
+    return tryParseString(inputItems, 4);
+  }
+
+  private static String tryParseStationId(String[] inputItems) {
+    return tryParseString(inputItems, 1);
+  }
+
+  private static String tryParseTimestamp(String[] inputItems) {
+    return tryParseString(inputItems, 0);
+  }
+
+  private static String tryParseString(String[] inputItems, int index) {
+    return inputItems.length >= index ? inputItems[index] : null;
+  }
+
+  /**
+   * Define some small hard-wired San Diego 'routes' to track based on sensor station ID.
+   */
   private static Map<String, String> buildStationInfo() {
     Map<String, String> stations = new Hashtable<String, String>();
       stations.put("1108413", "SDRoute1"); // from freeway 805 S
@@ -371,5 +430,4 @@ private static Map<String, String> buildStationInfo() {
       stations.put("1108702", "SDRoute2");
     return stations;
   }
-
 }
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java
new file mode 100644
index 0000000000000..3c3ae8a5d1077
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.common;
+
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+
+/**
+ * Options which can be used to configure the Dataflow examples.
+ */
+public interface DataflowExampleOptions extends DataflowPipelineOptions {
+  @Description("Whether to keep jobs running on the Dataflow service after local process exit")
+  @Default.Boolean(false)
+  boolean getKeepJobsRunning();
+  void setKeepJobsRunning(boolean keepJobsRunning);
+}
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
new file mode 100644
index 0000000000000..ad5b2fed47165
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
@@ -0,0 +1,308 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.common;
+
+import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
+import com.google.api.client.util.Lists;
+import com.google.api.client.util.Sets;
+import com.google.api.services.bigquery.Bigquery;
+import com.google.api.services.bigquery.Bigquery.Datasets;
+import com.google.api.services.bigquery.Bigquery.Tables;
+import com.google.api.services.bigquery.model.Dataset;
+import com.google.api.services.bigquery.model.DatasetReference;
+import com.google.api.services.bigquery.model.Table;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.api.services.dataflow.Dataflow;
+import com.google.api.services.pubsub.Pubsub;
+import com.google.api.services.pubsub.model.Topic;
+import com.google.cloud.dataflow.examples.PubsubFileInjector;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.StreamingOptions;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineJob;
+import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
+import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
+import com.google.cloud.dataflow.sdk.util.Transport;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+import javax.servlet.http.HttpServletResponse;
+
+/**
+ * The utility class that sets up and tears down external resources, starts the Google Cloud Pub/Sub
+ * injector, and cancels the streaming and the injector pipelines once the program terminates.
+ *
+ * <p> It is used to run Dataflow examples, such as TrafficMaxLaneFlow and TrafficRoutes.
+ */
+public class DataflowExampleUtils {
+
+  private final DataflowPipelineOptions options;
+  private Bigquery bigQueryClient = null;
+  private Pubsub pubsubClient = null;
+  private Dataflow dataflowClient = null;
+  private Pipeline injectorPipeline = null;
+  private Set<DataflowPipelineJob> jobsToCancel = Sets.newHashSet();
+  private List<String> pendingMessages = Lists.newArrayList();
+
+  public DataflowExampleUtils(DataflowPipelineOptions options) {
+    this.options = options;
+  }
+
+  /**
+   * Sets up external resources that are required by the example,
+   * such as Pub/Sub topics and BigQuery tables.
+   *
+   * @throws IOException if there is a problem setting up the resources
+   */
+  public void setup() throws IOException {
+    pendingMessages.add("**************************Set Up***************************");
+    ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
+    if (!pubsubTopicOptions.getPubsubTopic().isEmpty()) {
+      setupPubsubTopic(pubsubTopicOptions.getPubsubTopic());
+      pendingMessages.add("The Pub/Sub topic has been set up for this example: "
+          + pubsubTopicOptions.getPubsubTopic());
+    }
+    ExampleBigQueryTableOptions bigQueryTableOptions =
+        options.as(ExampleBigQueryTableOptions.class);
+    if (bigQueryTableOptions.getBigQueryDataset() != null
+        && bigQueryTableOptions.getBigQueryTable() != null
+        && bigQueryTableOptions.getBigQuerySchema() != null) {
+      setupBigQueryTable(bigQueryTableOptions.getProject(),
+                         bigQueryTableOptions.getBigQueryDataset(),
+                         bigQueryTableOptions.getBigQueryTable(),
+                         bigQueryTableOptions.getBigQuerySchema());
+      pendingMessages.add("The BigQuery table has been set up for this example: "
+          + bigQueryTableOptions.getProject()
+          + ":" + bigQueryTableOptions.getBigQueryDataset()
+          + "." + bigQueryTableOptions.getBigQueryTable());
+    }
+  }
+
+  /**
+   * Tears down external resources that can be deleted upon the example's completion.
+   */
+  private void tearDown() {
+    pendingMessages.add("*************************Tear Down*************************");
+    ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
+    if (!pubsubTopicOptions.getPubsubTopic().isEmpty()) {
+      try {
+        deletePubsubTopic(pubsubTopicOptions.getPubsubTopic());
+        pendingMessages.add("The Pub/Sub topic has been deleted: "
+            + pubsubTopicOptions.getPubsubTopic());
+      } catch (IOException e) {
+        pendingMessages.add("Failed to delete the Pub/Sub topic : "
+            + pubsubTopicOptions.getPubsubTopic());
+      }
+    }
+
+    ExampleBigQueryTableOptions bigQueryTableOptions =
+        options.as(ExampleBigQueryTableOptions.class);
+    if (bigQueryTableOptions.getBigQueryDataset() != null
+        && bigQueryTableOptions.getBigQueryTable() != null
+        && bigQueryTableOptions.getBigQuerySchema() != null) {
+      pendingMessages.add("The BigQuery table might contain the example's output, "
+          + "and it is not deleted automatically: "
+          + bigQueryTableOptions.getProject()
+          + ":" + bigQueryTableOptions.getBigQueryDataset()
+          + "." + bigQueryTableOptions.getBigQueryTable());
+      pendingMessages.add("Please go to the Developers Console to delete it manually."
+          + " Otherwise, you may be charged for its usage.");
+    }
+  }
+
+  /**
+   * Sets up the BigQuery table with the given schema.
+   *
+   * <p> If the table already exists, the schema has to match the given one. Otherwise, the example
+   * will throw a RuntimeException. If the table doesn't exist, a new table with the given schema
+   * will be created.
+   *
+   * @throws IOException if there is a problem setting up the BigQuery table
+   */
+  private void setupBigQueryTable(String projectId, String datasetId, String tableId,
+      TableSchema schema) throws IOException {
+    if (bigQueryClient == null) {
+      bigQueryClient = Transport.newBigQueryClient(options.as(BigQueryOptions.class)).build();
+    }
+
+    Datasets datasetService = bigQueryClient.datasets();
+    if (executeNullIfNotFound(datasetService.get(projectId, datasetId)) == null) {
+      Dataset newDataset = new Dataset().setDatasetReference(
+          new DatasetReference().setProjectId(projectId).setDatasetId(datasetId));
+      datasetService.insert(projectId, newDataset).execute();
+    }
+
+    Tables tableService = bigQueryClient.tables();
+    Table table = executeNullIfNotFound(tableService.get(projectId, datasetId, tableId));
+    if (table == null) {
+      Table newTable = new Table().setSchema(schema).setTableReference(
+          new TableReference().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId));
+      tableService.insert(projectId, datasetId, newTable).execute();
+    } else if (!table.getSchema().equals(schema)) {
+      throw new RuntimeException(
+          "Table exists and schemas do not match, expecting: " + schema.toPrettyString()
+          + ", actual: " + table.getSchema().toPrettyString());
+    }
+  }
+
+  /**
+   * Sets up the Google Cloud Pub/Sub topic.
+   *
+   * <p> If the topic doesn't exist, a new topic with the given name will be created.
+   *
+   * @throws IOException if there is a problem setting up the Pub/Sub topic
+   */
+  private void setupPubsubTopic(String topic) throws IOException {
+    if (pubsubClient == null) {
+      pubsubClient = Transport.newPubsubClient(options.as(StreamingOptions.class)).build();
+    }
+    if (executeNullIfNotFound(pubsubClient.topics().get(topic)) == null) {
+      pubsubClient.topics().create(new Topic().setName(topic)).execute();
+    }
+  }
+
+  /**
+   * Deletes the Google Cloud Pub/Sub topic.
+   *
+   * @throws IOException if there is a problem deleting the Pub/Sub topic
+   */
+  private void deletePubsubTopic(String topic) throws IOException {
+    if (pubsubClient == null) {
+      pubsubClient = Transport.newPubsubClient(options.as(StreamingOptions.class)).build();
+    }
+    if (executeNullIfNotFound(pubsubClient.topics().get(topic)) != null) {
+      pubsubClient.topics().delete(topic).execute();
+    }
+  }
+
+  /**
+   * Runs the batch injector for the streaming pipeline.
+   *
+   * <p> The injector pipeline will read from the given text file, and inject data
+   * into the Google Cloud Pub/Sub topic.
+   */
+  public void runInjectorPipeline(String inputFile, String topic) {
+    DataflowPipelineOptions copiedOptions = options.cloneAs(DataflowPipelineOptions.class);
+    copiedOptions.setStreaming(false);
+    copiedOptions.setNumWorkers(
+        options.as(ExamplePubsubTopicOptions.class).getInjectorNumWorkers());
+    injectorPipeline = Pipeline.create(copiedOptions);
+    injectorPipeline.apply(TextIO.Read.from(inputFile))
+                    .apply(IntraBundleParallelization
+                        .of(new PubsubFileInjector.Publish(topic))
+                        .withMaxParallelism(20));
+    DataflowPipelineJob injectorJob = (DataflowPipelineJob) injectorPipeline.run();
+    jobsToCancel.add(injectorJob);
+  }
+
+  /**
+   * Waits for the pipeline to finish, and cancels it (and the injector) before the program exists.
+   */
+  public void waitToFinish(PipelineResult result) {
+    final DataflowPipelineJob job = (DataflowPipelineJob) result;
+    jobsToCancel.add(job);
+    if (!options.as(DataflowExampleOptions.class).getKeepJobsRunning()) {
+      addShutdownHook(jobsToCancel);
+    }
+    try {
+      job.waitToFinish(-1, TimeUnit.SECONDS, new MonitoringUtil.PrintHandler(System.out));
+    } catch (Exception e) {
+      throw new RuntimeException("Failed to wait for job to finish: " + job.getJobId());
+    }
+  }
+
+  private void addShutdownHook(final Collection<DataflowPipelineJob> jobs) {
+    if (dataflowClient == null) {
+      dataflowClient = options.getDataflowClient();
+    }
+
+    Runtime.getRuntime().addShutdownHook(new Thread() {
+      @Override
+      public void run() {
+        tearDown();
+        printPendingMessages();
+        for (DataflowPipelineJob job : jobs) {
+          System.out.println("Canceling example pipeline: " + job.getJobId());
+          try {
+            job.cancel();
+          } catch (IOException e) {
+            System.out.println("Failed to cancel the job,"
+                + " please go to the Developers Console to cancel it manually");
+            System.out.println(
+                MonitoringUtil.getJobMonitoringPageURL(job.getProjectId(), job.getJobId()));
+          }
+        }
+
+        for (DataflowPipelineJob job : jobs) {
+          boolean cancellationVerified = false;
+          for (int retryAttempts = 6; retryAttempts > 0; retryAttempts--) {
+            if (job.getState().isTerminal()) {
+              cancellationVerified = true;
+              System.out.println("Canceled example pipeline: " + job.getJobId());
+              break;
+            } else {
+              System.out.println(
+                  "The example pipeline is still running. Verifying the cancellation.");
+            }
+            try {
+              Thread.sleep(10000);
+            } catch (InterruptedException e) {
+              // Ignore
+            }
+          }
+          if (!cancellationVerified) {
+            System.out.println("Failed to verify the cancellation for job: " + job.getJobId());
+            System.out.println("Please go to the Developers Console to verify manually:");
+            System.out.println(
+                MonitoringUtil.getJobMonitoringPageURL(job.getProjectId(), job.getJobId()));
+          }
+        }
+      }
+    });
+  }
+
+  private void printPendingMessages() {
+    System.out.println();
+    System.out.println("***********************************************************");
+    System.out.println("***********************************************************");
+    for (String message : pendingMessages) {
+      System.out.println(message);
+    }
+    System.out.println("***********************************************************");
+    System.out.println("***********************************************************");
+  }
+
+  private static <T> T executeNullIfNotFound(
+      AbstractGoogleClientRequest<T> request) throws IOException {
+    try {
+      return request.execute();
+    } catch (GoogleJsonResponseException e) {
+      if (e.getStatusCode() == HttpServletResponse.SC_NOT_FOUND) {
+        return null;
+      } else {
+        throw e;
+      }
+    }
+  }
+}
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExampleBigQueryTableOptions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExampleBigQueryTableOptions.java
new file mode 100644
index 0000000000000..c42ae7483a2ea
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExampleBigQueryTableOptions.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.common;
+
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+/**
+ * Options which can be used to configure BigQuery tables in Dataflow examples.
+ * The project defaults to the project being used to run the example.
+ */
+public interface ExampleBigQueryTableOptions extends DataflowPipelineOptions {
+  @Description("BigQuery dataset name")
+  @Default.String("dataflow_examples")
+  String getBigQueryDataset();
+  void setBigQueryDataset(String dataset);
+
+  @Description("BigQuery table name")
+  @Default.InstanceFactory(BigQueryTableFactory.class)
+  String getBigQueryTable();
+  void setBigQueryTable(String table);
+
+  @Description("BigQuery table schema")
+  TableSchema getBigQuerySchema();
+  void setBigQuerySchema(TableSchema schema);
+
+  /**
+   * Returns the job name as the default BigQuery table name.
+   */
+  static class BigQueryTableFactory implements DefaultValueFactory<String> {
+    @Override
+    public String create(PipelineOptions options) {
+      return options.as(DataflowPipelineOptions.class).getJobName()
+          .replace('-', '_');
+    }
+  }
+}
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
new file mode 100644
index 0000000000000..8aa4893c4200c
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.common;
+
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+/**
+ * Options which can be used to configure Pub/Sub topic in Dataflow examples.
+ */
+public interface ExamplePubsubTopicOptions extends DataflowPipelineOptions {
+  @Description("Pub/Sub topic")
+  @Default.InstanceFactory(PubsubTopicFactory.class)
+  String getPubsubTopic();
+  void setPubsubTopic(String topic);
+
+  @Description("Number of workers to use when executing the injector pipeline")
+  @Default.Integer(1)
+  int getInjectorNumWorkers();
+  void setInjectorNumWorkers(int numWorkers);
+
+  /**
+   * Returns a default Pub/Sub topic based on the project and the job names.
+   */
+  static class PubsubTopicFactory implements DefaultValueFactory<String> {
+    @Override
+    public String create(PipelineOptions options) {
+      DataflowPipelineOptions dataflowPipelineOptions =
+          options.as(DataflowPipelineOptions.class);
+      return "/topics/" + dataflowPipelineOptions.getProject()
+          + "/" + dataflowPipelineOptions.getJobName();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
index b3dbdc552e567..a5fd487d445e5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
@@ -63,7 +63,7 @@ public class DataflowPipelineJob implements PipelineResult {
   private State terminalState;
 
   /**
-   * Construct the job.
+   * Constructs the job.
    *
    * @param projectId the project id
    * @param jobId the job id
@@ -89,7 +89,7 @@ public Dataflow getDataflowClient() {
   }
 
   /**
-   * Wait for the job to finish and return the final status.
+   * Waits for the job to finish and return the final status.
    *
    * @param timeToWait The time to wait in units timeUnit for the job to finish.
    * @param timeUnit The unit of time for timeToWait.
@@ -163,6 +163,20 @@ public State waitToFinish(
     }
   }
 
+  /**
+   * Cancels the job.
+   * @throws IOException if there is a problem executing the cancel request.
+   */
+  public void cancel() throws IOException {
+    Job content = new Job();
+    content.setProjectId(project);
+    content.setId(jobId);
+    content.setRequestedState("JOB_STATE_CANCELLED");
+    dataflowClient.v1b3().projects().jobs()
+        .update(project, jobId, content)
+        .execute();
+  }
+
   @Override
   public State getState() {
     if (terminalState != null) {

From 36778d86d03d8bc4925a241e3ca9416758bdfda2 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Wed, 8 Apr 2015 14:22:38 -0700
Subject: [PATCH 0408/1541] Renames the ReadSource transform to Read for
 consistency with Write. ----Release Notes---- The ReadSource transform has
 been renamed to Read. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=90646999

---
 .../cloud/dataflow/sdk/io/DatastoreIO.java     | 10 +++++-----
 .../sdk/io/{ReadSource.java => Read.java}      | 18 +++++++++---------
 .../cloud/dataflow/sdk/io/XmlSource.java       |  4 ++--
 .../runners/DataflowPipelineTranslator.java    |  6 +++---
 .../BasicSerializableSourceFormat.java         |  6 +++---
 ...urceTranslator.java => ReadTranslator.java} | 10 +++++-----
 .../cloud/dataflow/sdk/io/DatastoreIOTest.java |  2 +-
 .../dataflow/sdk/io/FileBasedSourceTest.java   |  4 ++--
 .../cloud/dataflow/sdk/io/XmlSourceTest.java   |  6 +++---
 .../BasicSerializableSourceFormatTest.java     |  8 ++++----
 10 files changed, 37 insertions(+), 37 deletions(-)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/io/{ReadSource.java => Read.java} (84%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/{ReadSourceTranslator.java => ReadTranslator.java} (76%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 7cbfcc820b09a..eec1396d51eea 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -100,7 +100,7 @@
  *     PipelineOptionsFactory.fromArgs(args).create();
  * Pipeline p = Pipeline.create(options);
  * PCollection<Entity> entities = p.apply(
- *     ReadSource.from(DatastoreIO.read()
+ *     Read.from(DatastoreIO.read()
  *         .withDataset(datasetId)
  *         .withQuery(query)
  *         .withHost(host)));
@@ -166,16 +166,16 @@ public static Source read() {
    * Returns a {@code PTransform} that reads Datastore entities from the query
    * against the given dataset.
    */
-  public static ReadSource.Bound<Entity> readFrom(String datasetId, Query query) {
-    return ReadSource.from(new Source(DEFAULT_HOST, datasetId, query));
+  public static Read.Bound<Entity> readFrom(String datasetId, Query query) {
+    return Read.from(new Source(DEFAULT_HOST, datasetId, query));
   }
 
   /**
    * Returns a {@code PTransform} that reads Datastore entities from the query
    * against the given dataset and host.
    */
-  public static ReadSource.Bound<Entity> readFrom(String host, String datasetId, Query query) {
-    return ReadSource.from(new Source(host, datasetId, query));
+  public static Read.Bound<Entity> readFrom(String host, String datasetId, Query query) {
+    return Read.from(new Source(host, datasetId, query));
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
similarity index 84%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
index 383c2888c3a50..9f3d35887dd4c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ReadSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
@@ -33,13 +33,13 @@
  * Usage example:
  * <pre>
  * Pipeline p = Pipeline.create();
- * p.apply(ReadSource.from(new MySource().withFoo("foo").withBar("bar"))
- *                   .named("foobar"));
+ * p.apply(Read.from(new MySource().withFoo("foo").withBar("bar"))
+ *             .named("foobar"));
  * </pre>
  */
-public class ReadSource {
+public class Read {
   /**
-   * Returns a new {@code ReadSource.Bound} {@code PTransform} with the given name.
+   * Returns a new {@code Read.Bound} {@code PTransform} with the given name.
    */
   @SuppressWarnings("unchecked")
   public static Bound<?> named(String name) {
@@ -47,7 +47,7 @@ public static Bound<?> named(String name) {
   }
 
   /**
-   * Returns a new unnamed {@code ReadSource.Bound} {@code PTransform} reading from the given
+   * Returns a new unnamed {@code Read.Bound} {@code PTransform} reading from the given
    * {@code Source}.
    */
   public static <T> Bound<T> from(Source<T> source) {
@@ -55,7 +55,7 @@ public static <T> Bound<T> from(Source<T> source) {
   }
 
   /**
-   * Implementation of the {@code ReadSource} {@code PTransform} builder.
+   * Implementation of the {@code Read} {@code PTransform} builder.
    */
   public static class Bound<T>
       extends PTransform<PInput, PCollection<T>> {
@@ -70,7 +70,7 @@ private Bound(@Nullable String name, @Nullable Source<T> source) {
     }
 
     /**
-     * Returns a new {@code ReadSource} {@code PTransform} that's like this one but
+     * Returns a new {@code Read} {@code PTransform} that's like this one but
      * reads from the given {@code Source}.
      *
      * <p> Does not modify this object.
@@ -80,7 +80,7 @@ public <T> Bound<T> from(Source<T> source) {
     }
 
     /**
-     * Returns a new {@code ReadSource} {@code PTransform} that's like this one but
+     * Returns a new {@code Read} {@code PTransform} that's like this one but
      * has the given name.
      *
      * <p> Does not modify this object.
@@ -104,7 +104,7 @@ public final PCollection<T> apply(PInput input) {
     }
 
     /**
-     * Returns the {@code Source} used to create this {@code ReadSource} {@code PTransform}.
+     * Returns the {@code Source} used to create this {@code Read} {@code PTransform}.
      */
     @Nullable
     public Source<T> getSource() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
index e09a6e1f55801..51dc2a1d407b4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
@@ -50,7 +50,7 @@
 /**
  * A source that can be used to read XML files. This source reads one or more
  * XML files and creates a {@code PCollection} of a given type. An Dataflow read transform can be
- * created by passing an {@code XmlSource} object to {@code ReadSource.from()}. Please note the
+ * created by passing an {@code XmlSource} object to {@code Read.from()}. Please note the
  * example given below.
  *
  * <p> The XML file must be of the following form where root and record are XML element names that
@@ -83,7 +83,7 @@
  * XmlSource<String> source = XmlSource.<String>from(file.toPath().toString())
  * .withRootElement("root").withRecordElement("record")
  * .withRecordClass(Record.class).withMinBundleSize(128);
- * PCollection<String> output = p.apply(ReadSource.from(source);
+ * PCollection<String> output = p.apply(Read.from(source);
  * }
  * </pre>
  *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 9c47975c49e82..55ea5c639fa3d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -47,7 +47,7 @@
 import com.google.cloud.dataflow.sdk.io.AvroIO;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
-import com.google.cloud.dataflow.sdk.io.ReadSource;
+import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.CloudDebuggerOptions.DebuggerConfig;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
@@ -55,7 +55,7 @@
 import com.google.cloud.dataflow.sdk.runners.dataflow.AvroIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BigQueryIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
-import com.google.cloud.dataflow.sdk.runners.dataflow.ReadSourceTranslator;
+import com.google.cloud.dataflow.sdk.runners.dataflow.ReadTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.TextIOTranslator;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
@@ -985,7 +985,7 @@ private <T> void translateHelper(
         TextIO.Write.Bound.class, new TextIOTranslator.WriteTranslator());
 
     registerTransformTranslator(
-        ReadSource.Bound.class, new ReadSourceTranslator());
+        Read.Bound.class, new ReadTranslator());
   }
 
   private static void translateInputs(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index 66e57f02f15fb..725aec43b767e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -39,7 +39,7 @@
 import com.google.api.services.dataflow.model.SourceSplitResponse;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.io.BoundedSource;
-import com.google.cloud.dataflow.sdk.io.ReadSource;
+import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.Source;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
@@ -252,7 +252,7 @@ static com.google.api.services.dataflow.model.Source serializeToCloudSource(
   }
 
   public static <T> void evaluateReadHelper(
-      ReadSource.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
+      Read.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
     try {
       List<DirectPipelineRunner.ValueWithMetadata<T>> output = new ArrayList<>();
       Source<T> source = transform.getSource();
@@ -270,7 +270,7 @@ public static <T> void evaluateReadHelper(
   }
 
   public static <T> void translateReadHelper(
-      ReadSource.Bound<T> transform, DataflowPipelineTranslator.TranslationContext context) {
+      Read.Bound<T> transform, DataflowPipelineTranslator.TranslationContext context) {
     try {
       context.addStep(transform, "ParallelRead");
       context.addInput(PropertyNames.FORMAT, PropertyNames.CUSTOM_SOURCE_FORMAT);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadSourceTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
similarity index 76%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadSourceTranslator.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
index 4bb90bf56667c..8b698605169b4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadSourceTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
@@ -16,17 +16,17 @@
 
 package com.google.cloud.dataflow.sdk.runners.dataflow;
 
-import com.google.cloud.dataflow.sdk.io.ReadSource;
+import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
 
 /**
- * Translator for the {@code ReadSource} {@code PTransform} for the Dataflow back-end.
+ * Translator for the {@code Read} {@code PTransform} for the Dataflow back-end.
  */
-public class ReadSourceTranslator
-    implements DataflowPipelineTranslator.TransformTranslator<ReadSource.Bound> {
+public class ReadTranslator
+    implements DataflowPipelineTranslator.TransformTranslator<Read.Bound> {
   @Override
   public void translate(
-      ReadSource.Bound transform, DataflowPipelineTranslator.TranslationContext context) {
+      Read.Bound transform, DataflowPipelineTranslator.TranslationContext context) {
     BasicSerializableSourceFormat.translateReadHelper(transform, context);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
index b64689d3f0ff0..82ae4ea06b1eb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
@@ -122,7 +122,7 @@ public void testBuildReadAlt() throws Exception {
   public void testBuildReadWithoutDatastoreSettingToCatchException() throws Exception {
     // create pipeline and run the pipeline to get result
     Pipeline p = DirectPipeline.createForTest();
-    p.apply(ReadSource.from(DatastoreIO.read().withHost(null)));
+    p.apply(Read.from(DatastoreIO.read().withHost(null)));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index fe5e764d18313..dd5fc065356e8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -658,7 +658,7 @@ public void testDataflowFile() throws IOException {
 
     TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 64, null);
 
-    PCollection<String> output = p.apply(ReadSource.from(source).named("ReadFileData"));
+    PCollection<String> output = p.apply(Read.from(source).named("ReadFileData"));
 
     EvaluationResults results = p.run();
     List<String> readData = results.getPCollection(output);
@@ -694,7 +694,7 @@ public void testDataflowFilePattern() throws IOException {
     TestFileBasedSource source =
         new TestFileBasedSource(new File(file1.getParent(), "file*").getPath(), 64, null);
 
-    PCollection<String> output = p.apply(ReadSource.from(source).named("ReadFileData"));
+    PCollection<String> output = p.apply(Read.from(source).named("ReadFileData"));
 
     EvaluationResults pipelineResults = p.run();
     List<String> results = pipelineResults.getPCollection(output);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
index e7b398dc89a69..52a5edfb328c1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
@@ -553,7 +553,7 @@ public void testReadXMLSmallDataflow() throws IOException {
             .withRecordClass(Train.class)
             .withMinBundleSize(1024);
 
-    PCollection<Train> output = p.apply(ReadSource.from(source).named("ReadFileData"));
+    PCollection<Train> output = p.apply(Read.from(source).named("ReadFileData"));
 
     EvaluationResults results = p.run();
     List<Train> readData = results.getPCollection(output);
@@ -645,7 +645,7 @@ public void testReadXMLLargeDataflow() throws IOException {
             .withRecordElement("train")
             .withRecordClass(Train.class)
             .withMinBundleSize(1024);
-    PCollection<Train> output = p.apply(ReadSource.from(source).named("ReadFileData"));
+    PCollection<Train> output = p.apply(Read.from(source).named("ReadFileData"));
 
     EvaluationResults results = p.run();
     List<Train> readData = results.getPCollection(output);
@@ -773,7 +773,7 @@ public void testReadXMLFilePattern() throws IOException {
                                   .withRecordElement("train")
                                   .withRecordClass(Train.class)
                                   .withMinBundleSize(1024);
-    PCollection<Train> output = p.apply(ReadSource.from(source).named("ReadFileData"));
+    PCollection<Train> output = p.apply(Read.from(source).named("ReadFileData"));
 
     EvaluationResults results = p.run();
     List<Train> readData = results.getPCollection(output);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index a4f1fdd2adea0..9f2490de68fbb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -50,7 +50,7 @@
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.io.BoundedSource;
-import com.google.cloud.dataflow.sdk.io.ReadSource;
+import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.Source;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
@@ -275,7 +275,7 @@ public void testDirectPipelineWithoutTimestamps() throws Exception {
         PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
     DirectPipelineRunner runner = DirectPipelineRunner.fromOptions(options);
     Pipeline p = Pipeline.create(options);
-    PCollection<Integer> sum = p.apply(ReadSource.from(TestIO.fromRange(10, 20)))
+    PCollection<Integer> sum = p.apply(Read.from(TestIO.fromRange(10, 20)))
         .apply(Sum.integersGlobally())
         .apply(Sample.<Integer>any(1));
     DirectPipelineRunner.EvaluationResults results = runner.run(p);
@@ -289,7 +289,7 @@ public void testDirectPipelineWithTimestamps() throws Exception {
     DirectPipelineRunner runner = DirectPipelineRunner.fromOptions(options);
     Pipeline p = Pipeline.create(options);
     PCollection<Integer> sums =
-        p.apply(ReadSource.from(TestIO.fromRange(10, 20).withTimestampsMillis()))
+        p.apply(Read.from(TestIO.fromRange(10, 20).withTimestampsMillis()))
          .apply(Window.<Integer>into(FixedWindows.of(Duration.millis(3))))
          .apply(Sum.integersGlobally().withoutDefaults());
     DirectPipelineRunner.EvaluationResults results = runner.run(p);
@@ -530,7 +530,7 @@ private static com.google.api.services.dataflow.model.Source translateIOToCloudS
       Source<?> io, DataflowPipelineOptions options) throws Exception {
     DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
     Pipeline p = Pipeline.create(options);
-    p.begin().apply(ReadSource.from(io));
+    p.begin().apply(Read.from(io));
 
     Job workflow = translator.translate(p, new ArrayList<DataflowPackage>());
     Step step = workflow.getSteps().get(0);

From 648ffa1306736175e670989a89a4b42cb2e7ab16 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 8 Apr 2015 14:59:09 -0700
Subject: [PATCH 0409/1541] Implement structural value for Coders.

Structural values allow avoiding encoding of objects that support
equals() and hashCode() when being put in the combiner table.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90650691
---
 .../sdk/coders/BigEndianIntegerCoder.java     |  5 +++
 .../sdk/coders/BigEndianLongCoder.java        |  5 +++
 .../dataflow/sdk/coders/ByteArrayCoder.java   |  6 +++
 .../cloud/dataflow/sdk/coders/Coder.java      | 29 ++++++++++++++
 .../dataflow/sdk/coders/DelegateCoder.java    |  5 +++
 .../dataflow/sdk/coders/DoubleCoder.java      |  5 +++
 .../dataflow/sdk/coders/InstantCoder.java     |  5 +++
 .../cloud/dataflow/sdk/coders/KvCoder.java    | 15 +++++++
 .../dataflow/sdk/coders/StandardCoder.java    | 23 +++++++++++
 .../dataflow/sdk/coders/StringUtf8Coder.java  |  5 +++
 .../dataflow/sdk/coders/VarIntCoder.java      |  5 +++
 .../dataflow/sdk/coders/VarLongCoder.java     |  5 +++
 .../cloud/dataflow/sdk/coders/VoidCoder.java  |  5 +++
 .../worker/MapTaskExecutorFactory.java        |  3 +-
 .../sdk/coders/ByteArrayCoderTest.java        | 14 ++++++-
 .../dataflow/sdk/coders/CoderProperties.java  | 40 +++++++++++++++++++
 .../sdk/coders/CoderRegistryTest.java         |  4 ++
 .../runners/worker/CombineValuesFnTest.java   |  3 +-
 18 files changed, 178 insertions(+), 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
index 2ad124f4f2e02..4d4c342e61452 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
@@ -70,6 +70,11 @@ public boolean isDeterministic() {
     return true;
   }
 
+  @Override
+  public boolean consistentWithEquals() {
+    return true;
+  }
+
   /**
    * Returns true since registerByteSizeObserver() runs in constant time.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
index 270f938b07229..44cac577f18f3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
@@ -69,6 +69,11 @@ public boolean isDeterministic() {
     return true;
   }
 
+  @Override
+  public boolean consistentWithEquals() {
+    return true;
+  }
+
   /**
    * Returns true since registerByteSizeObserver() runs in constant time.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
index 94e89ff5d8ca8..07cf740cd4837 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import com.google.cloud.dataflow.sdk.util.VarInt;
+import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.StructuralByteArray;
 import com.google.common.io.ByteStreams;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
@@ -81,6 +82,11 @@ public boolean isDeterministic() {
     return true;
   }
 
+  @Override
+  public Object structuralValue(byte[] value) {
+    return new StructuralByteArray(value);
+  }
+
   /**
    * Returns true since registerByteSizeObserver() runs in constant time.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
index ae3247537489d..bd11f460e33b5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
@@ -155,6 +155,35 @@ public T decode(InputStream inStream, Context context)
    */
   public void verifyDeterministic() throws Coder.NonDeterministicException;
 
+  /**
+   * Returns true if the encoded bytes of two objects are
+   * equal only when they are also equal according to {@code Object.equals()}.
+   * (and also implements a compatible {@code Object.hasCode()})
+   *
+   * <p> This most notably false for arrays. It will generally
+   * be false when {@code Object.equals()} compares object identity,
+   * rather than performing a semantic/structural comparison.
+   */
+  public boolean consistentWithEquals();
+
+  /**
+   * Returns an object with an {@code Object.equals()} method
+   * that represents structural equality on the argument.
+   * (and also implements a compatible {@code Object.hashCode()}).
+   *
+   * <p> For any two objects of type T, if their encoded bytes
+   * are the same, then their structural values are equal
+   * according to {@code Object.equals()}.
+   *
+   * <p> Most notably, the structural value for an array coder
+   * should perform a structural comparison of the contents of
+   * the arrays, rather than the default behavior of
+   * comparing according to object identity.
+   *
+   * <p> See also {@link #consistentWithEquals()}.
+   */
+  public Object structuralValue(T value) throws Exception;
+
   /**
    * Returns whether {@link #registerByteSizeObserver} cheap enough to
    * call for every element, that is, if this {@code Coder} can
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
index 63e1f42c499dd..9140a0e3c6800 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
@@ -77,6 +77,11 @@ public void verifyDeterministic() throws NonDeterministicException {
     coder.verifyDeterministic();
   }
 
+  @Override
+  public Object structuralValue(T value) throws Exception {
+    return coder.structuralValue(toFn.apply(value));
+  }
+
   @Override
   public String toString() {
     return "DelegateCoder(" + coder + ")";
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
index a855ee1087708..fb04de666925c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
@@ -81,6 +81,11 @@ public void verifyDeterministic() throws NonDeterministicException {
         "Floating point encodings are not guaranteed to be deterministic.");
   }
 
+  @Override
+  public boolean consistentWithEquals() {
+    return true;
+  }
+
   /**
    * Returns true since registerByteSizeObserver() runs in constant time.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
index 2285d02a4369c..e33c567f48f7b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
@@ -63,4 +63,9 @@ public boolean isDeterministic() {
 
   @Override
   public void verifyDeterministic() { }
+
+  @Override
+  public boolean consistentWithEquals() {
+    return true;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
index 00a29988b80d1..05e6f258fa4d7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
@@ -113,6 +113,21 @@ public void verifyDeterministic() throws NonDeterministicException {
     verifyDeterministic("Value coder must be deterministic", getValueCoder());
   }
 
+  @Override
+  public boolean consistentWithEquals() {
+    return keyCoder.consistentWithEquals() && valueCoder.consistentWithEquals();
+  }
+
+  @Override
+  public Object structuralValue(KV<K, V> kv) throws Exception {
+    if (consistentWithEquals()) {
+      return kv;
+    } else {
+      return KV.of(getKeyCoder().structuralValue(kv.getKey()),
+                   getValueCoder().structuralValue(kv.getValue()));
+    }
+  }
+
   @Override
   public CloudObject asCloudObject() {
     CloudObject result = super.asCloudObject();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
index d9fd232b14a28..8556815bde31a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
@@ -21,9 +21,11 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.StructuralByteArray;
 import com.google.common.io.ByteStreams;
 import com.google.common.io.CountingOutputStream;
 
+import java.io.ByteArrayOutputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -167,4 +169,25 @@ protected void verifyDeterministic(String message, Coder<?>... coders)
       throws NonDeterministicException {
     verifyDeterministic(message, Arrays.asList(coders));
   }
+
+  @Override
+  public boolean consistentWithEquals() {
+    return false;
+  }
+
+  @Override
+  public Object structuralValue(T value) throws Exception {
+    if (value == null || consistentWithEquals()) {
+      return value;
+    } else {
+      try {
+        ByteArrayOutputStream os = new ByteArrayOutputStream();
+        encode(value, os, Context.OUTER);
+        return new StructuralByteArray(os.toByteArray());
+      } catch (Exception exn) {
+        throw new IllegalArgumentException(
+            "Unable to encode element '" + value + "' with coder '" + this + "'.", exn);
+      }
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
index 9d40a43030112..32ccf9854750b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
@@ -113,6 +113,11 @@ public boolean isDeterministic() {
   @Override
   public void verifyDeterministic() { }
 
+  @Override
+  public boolean consistentWithEquals() {
+    return true;
+  }
+
   protected long getEncodedElementByteSize(String value, Context context)
       throws Exception {
     if (value == null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
index 5f1ede1b6c1ff..0b58a54ac5b85 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
@@ -76,6 +76,11 @@ public boolean isDeterministic() {
   @Override
   public void verifyDeterministic() { }
 
+  @Override
+  public boolean consistentWithEquals() {
+    return true;
+  }
+
   /**
    * Returns true since registerByteSizeObserver() runs in constant time.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
index 2c6c02bbfad62..44fff0582a783 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
@@ -76,6 +76,11 @@ public boolean isDeterministic() {
   @Override
   public void verifyDeterministic() { }
 
+  @Override
+  public boolean consistentWithEquals() {
+    return true;
+  }
+
   /**
    * Returns true since registerByteSizeObserver() runs in constant time.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
index ec5e486c0f88d..53651287b7a96 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
@@ -58,6 +58,11 @@ public boolean isDeterministic() {
   @Override
   public void verifyDeterministic() { }
 
+  @Override
+  public boolean consistentWithEquals() {
+    return true;
+  }
+
   /**
    * Returns true since registerByteSizeObserver() runs in constant time.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index 2c60af79a1a28..7c7b897c1555c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -300,8 +300,7 @@ public Object createGroupingKey(Object key) throws Exception {
       // Ignore timestamp for grouping purposes.
       // The PGBK output will inherit the timestamp of one of its inputs.
       return WindowedValue.of(
-          new PartialGroupByKeyOperation.StructuralByteArray(
-              CoderUtils.encodeToByteArray(coder, windowedKey.getValue())),
+          coder.structuralValue(windowedKey.getValue()),
           ignored,
           windowedKey.getWindows());
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
index 3fb4141fa5b41..aeba1aceadb03 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
@@ -29,7 +29,7 @@
 public class ByteArrayCoderTest {
 
   private static final byte[][] TEST_VALUES = {
-    {0xa, 0xb, 0xc}, {}, {}, {0xd, 0xe}, {}};
+    {0xa, 0xb, 0xc}, {}, {}, {0xd, 0xe}, {0xd, 0xe}, {}};
 
   @Test
   public void testDecodeEncodeEquals() throws Exception {
@@ -47,4 +47,16 @@ public void testRegisterByteSizeObserver() throws Exception {
     CounterTestUtils.testByteCount(ByteArrayCoder.of(), Coder.Context.NESTED,
                                    new byte[][]{{ 0xa, 0xb, 0xc }, {}, {}, { 0xd, 0xe }, {}});
   }
+
+  @Test
+  public void testStructuralValueConsistentWithEquals() throws Exception {
+    ByteArrayCoder coder = ByteArrayCoder.of();
+    // We know that byte array coders are NOT compatible with equals
+    // (aka injective w.r.t. Object.equals)
+    for (byte[] value1 : TEST_VALUES) {
+      for (byte[] value2 : TEST_VALUES) {
+        CoderProperties.structuralValueConsistentWithEquals(coder, value1, value2);
+      }
+    }
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
index 6a23116e9cc44..30fd517d2d08c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
@@ -20,6 +20,7 @@
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.emptyIterable;
 import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.fail;
 
@@ -176,6 +177,45 @@ public static <T> void coderSerializable(Coder<T> coder) {
     SerializableUtils.ensureSerializable(coder);
   }
 
+  public static <T> void coderConsistentWithEquals(
+      Coder<T> coder, T value1, T value2)
+      throws Exception {
+
+    for (Coder.Context context : ALL_CONTEXTS) {
+      CoderProperties.<T>coderConsistentWithEqualsInContext(coder, context, value1, value2);
+    }
+  }
+
+  public static <T> void coderConsistentWithEqualsInContext(
+      Coder<T> coder, Coder.Context context, T value1, T value2) throws Exception {
+
+    assertEquals(
+        value1.equals(value2),
+        Arrays.equals(
+            encode(coder, context, value1),
+            encode(coder, context, value2)));
+  }
+
+  public static <T> void structuralValueConsistentWithEquals(
+      Coder<T> coder, T value1, T value2)
+      throws Exception {
+
+    for (Coder.Context context : ALL_CONTEXTS) {
+      CoderProperties.<T>structuralValueConsistentWithEqualsInContext(
+          coder, context, value1, value2);
+    }
+  }
+
+  public static <T> void structuralValueConsistentWithEqualsInContext(
+      Coder<T> coder, Coder.Context context, T value1, T value2) throws Exception {
+
+    assertEquals(
+        coder.structuralValue(value1).equals(coder.structuralValue(value2)),
+        Arrays.equals(
+            encode(coder, context, value1),
+            encode(coder, context, value2)));
+  }
+
   //////////////////////////////////////////////////////////////////////////
 
   private static <T> byte[] encode(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index 1eb7ec1585040..60d3af4459371 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -256,6 +256,10 @@ public CloudObject asCloudObject() {
     @Override
     public void verifyDeterministic() { }
 
+    public boolean consistentWithEquals() { return true; }
+
+    public Object structuralValue(MyValue value) { return value; }
+
     @Override
     public boolean isRegisterByteSizeObserverCheap(MyValue value, Context context) {
       return true;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
index 4aaefc7ad49ea..09ddf9e07cf4e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
@@ -126,7 +127,7 @@ public Coder<CountSum> getAccumulatorCoder(
   /**
    * An example "cheap" accumulator coder.
    */
-  public static class CountSumCoder implements Coder<MeanInts.CountSum> {
+  public static class CountSumCoder extends StandardCoder<MeanInts.CountSum> {
     public CountSumCoder() { }
 
     @Override

From 97083cace725fcb011b3256fa918cfafb56d93c4 Mon Sep 17 00:00:00 2001
From: gildea <gildea@google.com>
Date: Wed, 8 Apr 2015 15:40:33 -0700
Subject: [PATCH 0410/1541] Grammar in Javadoc: fix new incorrect uses of
 "which" for "that". ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=90654868

---
 examples/README.md                                          | 2 +-
 .../dataflow/examples/common/DataflowExampleOptions.java    | 2 +-
 .../examples/common/ExampleBigQueryTableOptions.java        | 2 +-
 .../dataflow/examples/common/ExamplePubsubTopicOptions.java | 2 +-
 .../google/cloud/dataflow/sdk/options/PipelineOptions.java  | 2 +-
 .../sdk/transforms/windowing/AfterProcessingTime.java       | 4 ++--
 .../sdk/transforms/windowing/SubTriggerExecutor.java        | 2 +-
 .../dataflow/sdk/transforms/windowing/TimeTrigger.java      | 2 +-
 .../cloud/dataflow/sdk/transforms/windowing/Trigger.java    | 6 +++---
 .../cloud/dataflow/sdk/transforms/windowing/Window.java     | 6 +++---
 10 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/examples/README.md b/examples/README.md
index c0d7e89a2ee87..de3852cef0994 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -259,7 +259,7 @@ or [`TrafficStreamingMaxLaneFlow.java`](https://github.com/GoogleCloudPlatform/D
         `--inputTopic=/topics/<project-name>/<topic-name>`
 
     Note that this format is different from the format you used when you
-    established your Cloud Pub/Sub topic name (and which you will pass to your
+    established your Cloud Pub/Sub topic name (and that you will pass to your
     pipeline). The reason for the difference is that the script depends on an
     earlier version of Pub/Sub, which uses a different topic name
     syntax than the latest Cloud Pub/Sub version.
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java
index 3c3ae8a5d1077..28b0818f64a70 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java
@@ -19,7 +19,7 @@
 import com.google.cloud.dataflow.sdk.options.Description;
 
 /**
- * Options which can be used to configure the Dataflow examples.
+ * Options that can be used to configure the Dataflow examples.
  */
 public interface DataflowExampleOptions extends DataflowPipelineOptions {
   @Description("Whether to keep jobs running on the Dataflow service after local process exit")
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExampleBigQueryTableOptions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExampleBigQueryTableOptions.java
index c42ae7483a2ea..7c213b59d681b 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExampleBigQueryTableOptions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExampleBigQueryTableOptions.java
@@ -22,7 +22,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 
 /**
- * Options which can be used to configure BigQuery tables in Dataflow examples.
+ * Options that can be used to configure BigQuery tables in Dataflow examples.
  * The project defaults to the project being used to run the example.
  */
 public interface ExampleBigQueryTableOptions extends DataflowPipelineOptions {
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
index 8aa4893c4200c..9626a30a21ee9 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
@@ -21,7 +21,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 
 /**
- * Options which can be used to configure Pub/Sub topic in Dataflow examples.
+ * Options that can be used to configure Pub/Sub topic in Dataflow examples.
  */
 public interface ExamplePubsubTopicOptions extends DataflowPipelineOptions {
   @Description("Pub/Sub topic")
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
index 416bf056ea14b..878348ea0cf17 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
@@ -59,7 +59,7 @@ public interface PipelineOptions {
    * Makes a deep clone of this object, and transforms the cloned object into the specified
    * type {@code kls}. {@see #as} for more information about the conversion.
    * <p>
-   * Properties which are marked with {@code @JsonIgnore} will not be cloned.
+   * Properties that are marked with {@code @JsonIgnore} will not be cloned.
    */
   <T extends PipelineOptions> T cloneAs(Class<T> kls);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 8d7db38f71b64..0a15c91528882 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -42,8 +42,8 @@ private AfterProcessingTime(ImmutableList<SerializableFunction<Instant, Instant>
   }
 
   /**
-   * Creates a trigger that fires when the current processing time passes the processing time at
-   * which this trigger saw the first element in a pane.
+   * Creates a trigger that fires when the current processing time passes the processing time
+   * at which this trigger saw the first element in a pane.
    */
   public static <W extends BoundedWindow> AfterProcessingTime<W> pastFirstElementInPane() {
     return new AfterProcessingTime<W>(IDENTITY);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SubTriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SubTriggerExecutor.java
index 8cf59fb7f4755..2dd02c0df7229 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SubTriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SubTriggerExecutor.java
@@ -71,7 +71,7 @@ public static <W extends BoundedWindow> SubTriggerExecutor<W> forWindow(
    * windows.
    *
    * <p>The finished states of all of the sub-triggers will be OR-ed across all of the windows.
-   * This applies the behavior that a trigger which has finished in any of the merged windows is
+   * This applies the behavior that a trigger that has finished in any of the merged windows is
    * finished in the merged window.
    *
    * @param c The context of the composite trigger
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
index 82c85a0c1fb1b..5a3f940fc91e7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
@@ -57,7 +57,7 @@ protected Instant computeTargetTimestamp(Instant time) {
    * Adds some delay to the original target time.
    *
    * @param delay the delay to add
-   * @return An updated time trigger which will wait the additional time before firing.
+   * @return An updated time trigger that will wait the additional time before firing.
    */
   public T plusDelayOf(final Duration delay) {
     return newWith(new SerializableFunction<Instant, Instant>() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 58c8883968a4c..225b3dbd482da 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -55,7 +55,7 @@
  * <ul>
  *   <li> {@link Repeatedly#forever} to create a trigger that executes forever. Any time its
  *   argument finishes it gets reset and starts over. Can be combined with
- *   {@link Trigger#orFinally} to specify a condition which causes the repetition to stop.
+ *   {@link Trigger#orFinally} to specify a condition that causes the repetition to stop.
  *   <li> {@link AfterEach#inOrder} to execute each trigger in sequence, firing each (and every)
  *   time that a trigger fires, and advancing to the next trigger in the sequence when it finishes.
  *   <li> {@link AfterFirst#of} to create a trigger that fires after at least one of its arguments
@@ -175,7 +175,7 @@ public interface TriggerContext<W extends BoundedWindow>  {
      * @param window the window the timer is being set for.
      * @param timestamp the time at which the trigger’s {@link Trigger#onTimer} callback should
      *        execute
-     * @param timeDomain the domain which the {@code timestamp} applies to
+     * @param timeDomain the domain that the {@code timestamp} applies to
      */
     void setTimer(W window, Instant timestamp, TimeDomain timeDomain) throws IOException;
 
@@ -289,7 +289,7 @@ public OnMergeEvent(Iterable<W> oldWindows, W newWindow) {
     }
 
     /**
-     * The old windows which were merged.
+     * The old windows that were merged.
      */
     public Iterable<W> oldWindows() {
       return oldWindows;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 9a284151cc9ce..7d3537bf7e98c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -281,13 +281,13 @@ protected String getKindString() {
   }
 
   /**
-   * An incomplete {@code Window} transform which has a trigger specified but has an unspecified
+   * An incomplete {@code Window} transform that has a trigger specified but has an unspecified
    * accumulation mode.
    *
    * <p> The currently available accumulation modes are:
    *
    * <ul>
-   *   <li> {@link Window.Triggering#discardingFiredPanes} which causes the elements in a pane to
+   *   <li> {@link Window.Triggering#discardingFiredPanes}, which causes the elements in a pane to
    *   be discarded after the trigger fires and output is produced.
    * </ul>
    *
@@ -305,7 +305,7 @@ public static class Triggering<T> {
 
     /**
      * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
-     * Triggering behavior, and which discards elements in a pane after they are triggered.
+     * Triggering behavior, and that discards elements in a pane after they are triggered.
      *
      * <p> Does not modify this transform.  The resulting {@code PTransform} is sufficiently
      * specified to be applied, but more properties can still be specified.

From 65b8636ae3dec2e3f9cc8401332cb2d2097fccf4 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Wed, 8 Apr 2015 16:18:01 -0700
Subject: [PATCH 0411/1541] Define @Experimental annotation and sprinkle it
 around.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90658564
---
 .../sdk/annotations/Experimental.java         | 68 +++++++++++++++++++
 .../sdk/annotations/package-info.java         | 20 ++++++
 .../cloud/dataflow/sdk/io/BoundedSource.java  |  4 +-
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |  9 +--
 .../google/cloud/dataflow/sdk/io/Sink.java    |  4 +-
 .../google/cloud/dataflow/sdk/io/Source.java  |  4 +-
 .../sdk/options/CloudDebuggerOptions.java     |  5 +-
 .../DataflowPipelineWorkerPoolOptions.java    |  8 ++-
 .../BlockingDataflowPipelineRunner.java       |  5 +-
 .../sdk/runners/DataflowPipelineRunner.java   |  5 +-
 .../runners/DataflowPipelineRunnerHooks.java  |  7 +-
 .../cloud/dataflow/sdk/transforms/DoFn.java   | 16 ++---
 .../cloud/dataflow/sdk/transforms/Write.java  |  4 +-
 .../sdk/transforms/windowing/AfterAll.java    |  2 +
 .../sdk/transforms/windowing/AfterEach.java   |  2 +
 .../sdk/transforms/windowing/AfterFirst.java  |  2 +
 .../sdk/transforms/windowing/AfterPane.java   |  2 +
 .../windowing/AfterProcessingTime.java        |  2 +
 .../transforms/windowing/AfterWatermark.java  |  2 +
 .../transforms/windowing/DefaultTrigger.java  |  3 +
 .../sdk/transforms/windowing/TimeTrigger.java |  2 +
 .../sdk/transforms/windowing/Trigger.java     |  4 +-
 .../sdk/transforms/windowing/Window.java      |  2 +
 23 files changed, 145 insertions(+), 37 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/package-info.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java
new file mode 100644
index 0000000000000..e9dcd67efa527
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.annotations;
+
+import java.lang.annotation.Documented;
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * Signifies that a public API (public class, method or field) is subject to
+ * incompatible changes, or even removal, in a future release. An API bearing
+ * this annotation is exempt from any compatibility guarantees made by its
+ * containing library. Note that the presence of this annotation implies nothing
+ * about the quality or performance of the API in question, only the fact that
+ * it is not "API-frozen."
+ *
+ * <p>It is generally safe for <i>applications</i> to depend on beta APIs, at
+ * the cost of some extra work during upgrades. However, it is generally
+ * inadvisable for <i>libraries</i> (which get included on users' class paths,
+ * outside the library developers' control) to do so.
+ */
+@Retention(RetentionPolicy.CLASS)
+@Target({
+    ElementType.ANNOTATION_TYPE,
+    ElementType.CONSTRUCTOR,
+    ElementType.FIELD,
+    ElementType.METHOD,
+    ElementType.TYPE})
+@Documented
+public @interface Experimental {
+  public Kind value() default Kind.UNSPECIFIED;
+
+  /**
+   * An enumeration of various kinds of experimental APIs.
+   */
+  public enum Kind {
+    /** Generic group of experimental APIs. This is the default value. */
+    UNSPECIFIED,
+
+    /** Sources and sinks related experimental APIs. */
+    SOURCE_SINK,
+
+    /** Auto-scaling related experimental APIs. */
+    AUTOSCALING,
+
+    /** Trigger-related experimental APIs. */
+    TRIGGER,
+
+    /** Aggregator-related experimental APIs. */
+    AGGREGATOR
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/package-info.java
new file mode 100644
index 0000000000000..6c224a6a8e8d3
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/package-info.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/**
+ * Defines annotations used across the SDK.
+ */
+package com.google.cloud.dataflow.sdk.annotations;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
index 3fef1584670bf..f23d3f2e8e247 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 
@@ -55,9 +56,8 @@ public BoundedReader<T> createReader(
   /**
    * A {@code Reader} that reads a bounded amount of input and supports some additional
    * operations, such as progress estimation and dynamic work rebalancing.
-   *
-   * <p>This API is experimental and subject to change.
    */
+  @Experimental(Experimental.Kind.SOURCE_SINK)
   public interface BoundedReader<T> extends Source.Reader<T> {
     /**
      * Returns a value in [0, 1] representing approximately what fraction of the source
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index eec1396d51eea..bb0ff33f4027d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -37,6 +37,7 @@
 import com.google.api.services.datastore.client.DatastoreHelper;
 import com.google.api.services.datastore.client.DatastoreOptions;
 import com.google.api.services.datastore.client.QuerySplitter;
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.EntityCoder;
 import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
@@ -67,9 +68,9 @@
  * <a href="https://developers.google.com/datastore/">Google Cloud Datastore</a>
  * entities.
  *
- * <p> The DatastoreIO class provides an experimental API to Read and Write a
- * {@link PCollection} of Datastore Entity.  This API is subject to
- * change, and currently requires an authentication workaround described below.
+ * <p> The DatastoreIO class provides an API to Read and Write a
+ * {@link PCollection} of Datastore Entity.  This API currently requires an
+ * authentication workaround described below.
  *
  * <p> Datastore is a fully managed NoSQL data storage service.
  * An Entity is an object in Datastore, analogous to a row in traditional
@@ -143,7 +144,7 @@
  * <a href="https://cloud.google.com/datastore/docs/concepts/entities">Entities, Properties, and
  * Keys</a> for more information about entity keys.
  */
-
+@Experimental(Experimental.Kind.SOURCE_SINK)
 public class DatastoreIO {
   public static final String DEFAULT_HOST = "https://www.googleapis.com";
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
index 31237bfe1ecab..04bcd9796f28e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
@@ -14,6 +14,7 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Write;
@@ -112,10 +113,9 @@
  * safe, as different instances of Writer objects may be created in different threads on the same
  * worker.
  *
- * <p>Disclaimer: this API is experimental and subject to change.
- *
  * @param <T> the type that will be written to the Sink.
  */
+@Experimental(Experimental.Kind.SOURCE_SINK)
 public abstract class Sink<T> implements Serializable {
   private static final long serialVersionUID = 0;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
index af2e7038ee28c..e2f42b7d1e53c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
@@ -54,10 +55,9 @@
  * <p> {@code Source} objects should implement {@link Object#toString}, as it will be
  * used in important error and debugging messages.
  *
- * <p> This API is experimental and subject to change.
- *
  * @param <T> Type of elements read by the source.
  */
+@Experimental(Experimental.Kind.SOURCE_SINK)
 public abstract class Source<T> implements Serializable {
   private static final long serialVersionUID = 0;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
index 84710df258451..3324ab5dd0dfc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
@@ -16,13 +16,16 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.ObjectMapper;
 
 /**
- * Options for controlling Cloud Debugger. These options are experimental and subject to change.
+ * Options for controlling Cloud Debugger.
  */
 @Description("[Experimental] Used to configure the Cloud Debugger")
+@Experimental
 public interface CloudDebuggerOptions {
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index ed1d65d3400e6..aabc91034be96 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+
 import java.util.List;
 
 /**
@@ -43,8 +45,9 @@ public interface DataflowPipelineWorkerPoolOptions extends PipelineOptions {
   void setNumWorkers(int value);
 
   /**
-   * Type of autoscaling algorithm to use. These types are experimental and subject to change.
+   * Type of autoscaling algorithm to use.
    */
+  @Experimental(Experimental.Kind.AUTOSCALING)
   public enum AutoscalingAlgorithmType {
     /** Use numWorkers machines. Do not autoscale the worker pool. */
     NONE("AUTOSCALING_ALGORITHM_NONE"),
@@ -68,16 +71,17 @@ public String getAlgorithm() {
       + "NONE: does not change the size of the worker pool. "
       + "BASIC: autoscale the worker pool size up to maxNumWorkers until the job completes.")
   @Default.Enum("NONE")
+  @Experimental(Experimental.Kind.AUTOSCALING)
   AutoscalingAlgorithmType getAutoscalingAlgorithm();
   void setAutoscalingAlgorithm(AutoscalingAlgorithmType value);
 
   /**
    * The maximum number of workers to use when using workerpool autoscaling.
-   * This option is experimental and subject to change.
    */
   @Description("[Experimental] The maximum number of workers to use when using workerpool "
       + "autoscaling.")
   @Default.Integer(20)
+  @Experimental(Experimental.Kind.AUTOSCALING)
   int getMaxNumWorkers();
   void setMaxNumWorkers(int value);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
index 596ba237cc422..76ca6aa71d93b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult.State;
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.options.BlockingDataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
@@ -112,10 +113,8 @@ public <Output extends POutput, Input extends PInput> Output apply(
 
   /**
    * Sets callbacks to invoke during execution see {@code DataflowPipelineRunnerHooks}.
-   * Important: setHooks is experimental. Please consult with the Dataflow team before using it.
-   * You should expect this class to change significantly in future versions of the SDK or be
-   * removed entirely.
    */
+  @Experimental
   public void setHooks(DataflowPipelineRunnerHooks hooks) {
     this.dataflowPipelineRunner.setHooks(hooks);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 8a034afb4f85d..5ecee9bdf7f4a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -22,6 +22,7 @@
 import com.google.api.services.dataflow.model.DataflowPackage;
 import com.google.api.services.dataflow.model.Job;
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
@@ -243,10 +244,8 @@ public DataflowPipelineTranslator getTranslator() {
 
   /**
    * Sets callbacks to invoke during execution see {@code DataflowPipelineRunnerHooks}.
-   * Important: setHooks is experimental. Please consult with the Dataflow team before using it.
-   * You should expect this class to change significantly in future versions of the SDK or be
-   * removed entirely.
    */
+  @Experimental
   public void setHooks(DataflowPipelineRunnerHooks hooks) {
     this.hooks = hooks;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
index 0e37466692901..a7e917fbd646f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
@@ -17,17 +17,14 @@
 package com.google.cloud.dataflow.sdk.runners;
 
 import com.google.api.services.dataflow.model.Environment;
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 
 /**
  * An instance of this class can be passed to the
  * DataflowPipeline runner to add user defined hooks to be
  * invoked at various times during pipeline execution.
- *
- * <p> Important: DataflowPipelineRunnerHooks is experimental. Please consult with
- * the Dataflow team before using it. You should expect this class to change significantly
- * in future versions of the SDK or be removed entirely.
- *
  */
+@Experimental
 public class DataflowPipelineRunnerHooks {
   /**
    * Allows the user to modify the environment of their job before their job is submitted
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 9c4b96706510e..5d9826cbe8a46 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
@@ -167,6 +168,7 @@ public abstract <T> void sideOutputWithTimestamp(
      * supported as aggregator's combiner, or if the given name collides
      * with another aggregator or system-provided counter.
      */
+    @Experimental(Experimental.Kind.AGGREGATOR)
     public abstract <AI, AA, AO> Aggregator<AI> createAggregator(
         String name, Combine.CombineFn<? super AI, AA, AO> combiner);
 
@@ -185,6 +187,7 @@ public abstract <AI, AA, AO> Aggregator<AI> createAggregator(
      * not supported as aggregator's combiner, or if the given name collides
      * with another aggregator or system-provided counter.
      */
+    @Experimental(Experimental.Kind.AGGREGATOR)
     public abstract <AI, AO> Aggregator<AI> createAggregator(
         String name, SerializableFunction<Iterable<AI>, AO> combiner);
   }
@@ -254,10 +257,8 @@ public abstract class ProcessContext extends Context {
 
     /**
      * Returns the process context to use for implementing windowing.
-     *
-     * <p>This interface is experimental and likely to change. It shouldn't be necessary to use it
-     * from general user code.
      */
+    @Experimental
     public abstract WindowingInternals<I, O> windowingInternals();
   }
 
@@ -278,24 +279,21 @@ public Duration getAllowedTimestampSkew() {
    * Interface for signaling that a {@link DoFn} needs to maintain
    * per-key state, accessed via
    * {@link DoFn.ProcessContext#keyedState}.
-   *
-   * <p> This functionality is experimental and likely to change.
    */
+  @Experimental
   public interface RequiresKeyedState {}
 
   /**
    * Interface for signaling that a {@link DoFn} needs to access the window the
    * element is being processed in, via {@link DoFn.ProcessContext#window}.
-   *
-   * <p> This functionality is experimental and likely to change.
    */
+  @Experimental
   public interface RequiresWindowAccess {}
 
   /**
    * Interface for interacting with keyed state.
-   *
-   * <p> This functionality is experimental and likely to change.
    */
+  @Experimental
   public interface KeyedState {
     /**
      * Updates this {@code KeyedState} in place so that the given tag
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
index f5c1c3abd9b09..1b2bc41244b1f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
@@ -15,6 +15,7 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
 import com.google.cloud.dataflow.sdk.io.Sink;
@@ -39,9 +40,8 @@
  * <p>Example usage:
  *
  * <p>{@code p.apply(Write.to(new MySink(...)));}
- *
- * <p>Disclaimer: This API is experimental and may change.
  */
+@Experimental(Experimental.Kind.SOURCE_SINK)
 public class Write {
   /**
    * Creates a Write transform that writes to the given Sink.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
index 892403be763b4..d3cfb6996250c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.common.base.Preconditions;
 
@@ -30,6 +31,7 @@
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
  *            {@code Trigger}
  */
+@Experimental(Experimental.Kind.TRIGGER)
 public class AfterAll<W extends BoundedWindow> extends OnceTrigger<W> {
 
   private static final long serialVersionUID = 0L;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index be6fe04a3c915..dae929f828a4c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.common.base.Preconditions;
 
 import org.joda.time.Instant;
@@ -43,6 +44,7 @@
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
  *            {@code Trigger}
  */
+@Experimental(Experimental.Kind.TRIGGER)
 public class AfterEach<W extends BoundedWindow> extends Trigger<W> {
 
   private static final Logger LOG = LoggerFactory.getLogger(AfterEach.class);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
index 23fc961624f2e..b96eea350f788 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.common.base.Preconditions;
 
@@ -31,6 +32,7 @@
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
  *            {@code Trigger}
  */
+@Experimental(Experimental.Kind.TRIGGER)
 public class AfterFirst<W extends BoundedWindow> extends OnceTrigger<W> {
 
   private static final long serialVersionUID = 0L;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index c9634cdb72877..8ac5d0a11b39f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
@@ -30,6 +31,7 @@
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
  *            {@code Trigger}
  */
+@Experimental(Experimental.Kind.TRIGGER)
 public class AfterPane<W extends BoundedWindow> extends OnceTrigger<W>{
 
   private static final long serialVersionUID = 0L;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 0a15c91528882..d20668bcb80ad 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
@@ -29,6 +30,7 @@
  *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used
  */
+@Experimental(Experimental.Kind.TRIGGER)
 public class AfterProcessingTime<W extends BoundedWindow>
     extends TimeTrigger<W, AfterProcessingTime<W>> {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 33cf964272c08..85e241d2fc6d3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
@@ -50,6 +51,7 @@
  *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used.
  */
+@Experimental(Experimental.Kind.TRIGGER)
 public abstract class AfterWatermark<W extends BoundedWindow>
     extends TimeTrigger<W, AfterWatermark<W>> {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
index f286799cbf2e6..275f00f34a827 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+
 import org.joda.time.Instant;
 
 /**
@@ -24,6 +26,7 @@
  *
  * @param <W> The type of windows being triggered/encoded.
  */
+@Experimental(Experimental.Kind.TRIGGER)
 public class DefaultTrigger<W extends BoundedWindow> extends Trigger<W>{
 
   private static final long serialVersionUID = 0L;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
index 5a3f940fc91e7..27711635bb977 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.common.annotations.VisibleForTesting;
@@ -30,6 +31,7 @@
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used.
  * @param <T> {@code TimeTrigger} subclass produced by modifying the current {@code TimeTrigger}.
  */
+@Experimental(Experimental.Kind.TRIGGER)
 public abstract class TimeTrigger<W extends BoundedWindow, T extends TimeTrigger<W, T>>
     extends OnceTrigger<W> {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 225b3dbd482da..c150bec4c2e6a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.common.annotations.VisibleForTesting;
@@ -84,11 +85,10 @@
  * between invocations of the callbacks. All important values should be persisted to
  * {@link KeyedState} before the callback returns.
  *
- * <p> This functionality is experimental and likely to change.
- *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
  *            {@code Trigger}
  */
+@Experimental(Experimental.Kind.TRIGGER)
 public abstract class Trigger<W extends BoundedWindow> implements Serializable {
 
   private static final long serialVersionUID = 0L;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 7d3537bf7e98c..c4f437e0f733e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
@@ -250,6 +251,7 @@ public Bound<T> named(String name) {
      * <p> {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger}
      * has more details on the available triggers.
      */
+    @Experimental(Experimental.Kind.TRIGGER)
     public Triggering<T> triggering(Trigger<?> trigger) {
       return new Triggering<T>(name,
           createWindowingStrategy(windowingStrategy.getWindowFn(), trigger));

From da6e334015b15f0b8bb7a9f5714f339fcb5752f5 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 8 Apr 2015 17:34:15 -0700
Subject: [PATCH 0412/1541] Merging windows shouldn't clear the state of the
 result.

Specifically, the set of input windows can contain the output window, so
clearing all input windows could clear needed state.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90665125
---
 .../dataflow/sdk/util/TriggerExecutor.java     |  8 +++++---
 .../windowing/DefaultTriggerTest.java          | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 9cf202d026139..d325f1d7f2ef0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -230,9 +230,11 @@ private void onMerge(Collection<W> toBeMerged, W mergeResult) throws Exception {
 
     // Before we finish, we can clean up the state associated with the trigger in the old windows
     for (W window : toBeMerged) {
-      trigger.clear(triggerContext, window);
-      if (!willNeverFinish) {
-        triggerContext.remove(IS_ROOT_FINISHED, window);
+      if (!mergeResult.equals(window)) {
+        trigger.clear(triggerContext, window);
+        if (!willNeverFinish) {
+          triggerContext.remove(IS_ROOT_FINISHED, window);
+        }
       }
     }
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
index 46f9cdb5d1364..862ae8398b5b2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
@@ -22,6 +22,7 @@
 import static org.junit.Assert.assertThat;
 
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -94,6 +95,23 @@ public void testDefaultTriggerWithSessionWindow() throws Exception {
     assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
   }
 
+  @Test
+  public void testDefaultTriggerWithContainedSessionWindow() throws Exception {
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
+        Sessions.withGapDuration(Duration.millis(10)),
+        DefaultTrigger.<IntervalWindow>of());
+
+    tester.injectElement(1, new Instant(1));
+    tester.injectElement(2, new Instant(9));
+    tester.injectElement(3, new Instant(7));
+
+    tester.advanceWatermark(new Instant(20));
+    Iterable<WindowedValue<Iterable<Integer>>> extractOutput = tester.extractOutput();
+    assertThat(extractOutput, Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 1, 19)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
+  }
+
   @Test
   public void testFireDeadline() throws Exception {
     assertEquals(new Instant(9), DefaultTrigger.of().getWatermarkCutoff(

From de261e2318c5187d034b89b71ba040cec1aeb381 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 8 Apr 2015 18:13:28 -0700
Subject: [PATCH 0413/1541] Never return a null tag-list -- if the tag doesn't
 exist, return empty.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90668176
---
 .../cloud/dataflow/sdk/util/StreamingModeExecutionContext.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index b886bc4f2e9b3..9fdc9d7fad94d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -303,7 +303,7 @@ public <T> Iterable<TimestampedValue<T>> readTagList(CodedTupleTag<T> tag) throw
 
       // If we have pending (not-yet-persisted) additions, include them
       items.addAll(listUpdates.values);
-      return items.isEmpty() ? null : items;
+      return items;
     }
 
     @Override

From f75542e6ca8f1b4ed15135a06d8efd5132c3380f Mon Sep 17 00:00:00 2001
From: altay <altay@google.com>
Date: Wed, 8 Apr 2015 19:14:52 -0700
Subject: [PATCH 0414/1541] Fix Javadoc warnings/links.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90671733
---
 .../java/com/google/cloud/dataflow/sdk/io/PubsubIO.java   | 5 +++--
 .../sdk/options/DataflowWorkerLoggingOptions.java         | 8 ++++----
 .../cloud/dataflow/sdk/options/PipelineOptions.java       | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/Count.java   | 2 +-
 .../google/cloud/dataflow/sdk/transforms/GroupByKey.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/ParDo.java   | 6 +++---
 .../dataflow/sdk/transforms/windowing/AfterWatermark.java | 2 +-
 7 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 421f6750515f8..d08172e72e173 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -197,8 +197,9 @@ public static Bound<String> subscription(String subscription) {
      *
      * <p> By default windows are emitted based on an estimate of when this source is likely
      * done producing data for a given timestamp (referred to as the Watermark; see
-     * {@link AfterWatermark} for more details). Any late data will be handled by the trigger
-     * specified with the windowing strategy -- by default it will be output immediately.
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark} for more details).
+     * Any late data will be handled by the trigger specified with the windowing strategy -- by
+     * default it will be output immediately.
      *
      * <p> The {#dropLateData} field allows you to control what to do with late data. This relaxes
      * the semantics of {@code GroupByKey}; see
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
index c0a221781190e..3ab3293c9e784 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
@@ -90,8 +90,8 @@ public static class WorkerLogLevelOverride {
     /**
      * Overrides the default log level for the passed in class.
      * <p>
-     * This is equivalent to calling {@link #forName(String, Level)} and
-     * passing in the {@link Class#getName() class name}.
+     * This is equivalent to calling {@link #forName(String, DataflowWorkerLoggingOptions.Level)}
+     * and passing in the {@link Class#getName() class name}.
      */
     public static WorkerLogLevelOverride forClass(Class<?> klass, Level level) {
       Preconditions.checkNotNull(klass, "Expected class to be not null.");
@@ -101,8 +101,8 @@ public static WorkerLogLevelOverride forClass(Class<?> klass, Level level) {
     /**
      * Overrides the default log level for the passed in package.
      * <p>
-     * This is equivalent to calling {@link #forName(String, Level)} and
-     * passing in the {@link Package#getName() package name}.
+     * This is equivalent to calling {@link #forName(String, DataflowWorkerLoggingOptions.Level)}
+     * and passing in the {@link Package#getName() package name}.
      */
     public static WorkerLogLevelOverride forPackage(Package pkg, Level level) {
       Preconditions.checkNotNull(pkg, "Expected package to be not null.");
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
index 878348ea0cf17..87fb29b51413c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
@@ -57,7 +57,7 @@ public interface PipelineOptions {
 
   /**
    * Makes a deep clone of this object, and transforms the cloned object into the specified
-   * type {@code kls}. {@see #as} for more information about the conversion.
+   * type {@code kls}. See {@link #as} for more information about the conversion.
    * <p>
    * Properties that are marked with {@code @JsonIgnore} will not be cloned.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
index ae338f7188e7d..7e1fe1519d887 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
@@ -21,7 +21,7 @@
 
 /**
  * Count transforms can be used to count the number of elements in a PCollection.
- * {@link PerElement Count.PerElement can be used to count the number of occurrences of each
+ * {@link PerElement Count.PerElement} can be used to count the number of occurrences of each
  * distinct element in the PCollection. {@link Globally Count.Globally} can
  * be used to count the total number of elements in a PCollection.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index a2eb239708037..1588c3c567935 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -106,7 +106,7 @@
  * corresponding to the new, merged window will be created. The items in this pane
  * will be emitted when a trigger fires. By default this will be when the input
  * sources estimate there will be no more data for the window. See
- * {@link AfterWatermark} for details on the estimation.
+ * {@link windowing.AfterWatermark} for details on the estimation.
  *
  * <p>The timestamp for each emitted pane is the earliest event time among all elements in
  * the pane. The output {@code PCollection} will have the same {@link WindowFn}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 2507b68f82d88..ad0d68cbd12c6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -129,7 +129,7 @@
  * {@code PCollection}s computed by earlier pipeline operations,
  * passed in to the {@code ParDo} transform using
  * {@link #withSideInputs}, and their contents accessible to each of
- * the {@code DoFn} operations via {@link DoFn.Context#sideInput}.
+ * the {@code DoFn} operations via {@link DoFn.ProcessContext#sideInput sideInput}.
  * For example:
  *
  * <pre> {@code
@@ -445,7 +445,7 @@ public static Unbound named(String name) {
    *
    * <p> Side inputs are {@link PCollectionView}s, whose contents are
    * computed during pipeline execution and then made accessible to
-   * {@code DoFn} code via {@link DoFn.Context#sideInput}. Each
+   * {@code DoFn} code via {@link DoFn.ProcessContext#sideInput sideInput}. Each
    * invocation of the {@code DoFn} receives the same values for these
    * side inputs.
    *
@@ -466,7 +466,7 @@ public static Unbound withSideInputs(PCollectionView<?>... sideInputs) {
     *
    * <p> Side inputs are {@link PCollectionView}s, whose contents are
    * computed during pipeline execution and then made accessible to
-   * {@code DoFn} code via {@link DoFn.Context#sideInput}.
+   * {@code DoFn} code via {@link DoFn.ProcessContext#sideInput sideInput}.
    *
    * <p> See the discussion of Side Inputs above for more explanation.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 85e241d2fc6d3..b98639f612867 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -47,7 +47,7 @@
  * once when the watermark passes the end of the window and then immediately therafter when any
  * late data arrive, is one such example.
  *
- * <p> The watermark is the clock that defines {@link TimeDomain.EVENT_TIME}.
+ * <p> The watermark is the clock that defines {@link Trigger.TimeDomain#EVENT_TIME}.
  *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used.
  */

From a447453595e218ca9973602a75c3a4e875e625d4 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 8 Apr 2015 19:43:50 -0700
Subject: [PATCH 0415/1541] DefaultTrigger#onMerge shouldn't clean up timers
 for merged windows.

The TriggerExecutor automatically cleans the trigger in all windows
that no longer exist.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90673162
---
 .../dataflow/sdk/transforms/windowing/DefaultTrigger.java     | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
index 275f00f34a827..ae25d307ae836 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
@@ -48,10 +48,6 @@ public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws
 
   @Override
   public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
-    for (W oldWindow : e.oldWindows()) {
-      c.deleteTimer(oldWindow, TimeDomain.EVENT_TIME);
-    }
-
     c.setTimer(e.newWindow(), e.newWindow().maxTimestamp(), TimeDomain.EVENT_TIME);
     return TriggerResult.CONTINUE;
   }

From d9ee90996225c5393edf076025b8a36bf7ffbc68 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 9 Apr 2015 11:52:58 -0700
Subject: [PATCH 0416/1541] Create a new test target that runs the
 @Category(RunnableOnService) via the local dataflow endpoint.

----Release Notes----
Fix TestPipeline to throw an IllegalStateException during execution if the TestDataflowPipelineRunner job failed.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90732507
---
 .../cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
index 3201c2ac11481..8dfb674dbb937 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
@@ -40,7 +40,8 @@ public class TestDataflowPipelineRunner extends BlockingDataflowPipelineRunner {
   public DataflowPipelineJob run(Pipeline pipeline) {
     DataflowPipelineJob state = super.run(pipeline);
     if (state.getState() != State.DONE) {
-      throw new AssertionError("The dataflow failed.");
+      // TODO: Get an exception from the remote service.
+      throw new IllegalStateException("The dataflow failed.");
     }
     return state;
   }

From 92439c4d538bdfc50cae35e155e614830f1f82ec Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Thu, 9 Apr 2015 12:11:24 -0700
Subject: [PATCH 0417/1541] Fix a cast error that in CoderRegistry when using
 Eclipse.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90734263
---
 .../com/google/cloud/dataflow/sdk/coders/CoderRegistry.java  | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 8759d56ee2a76..7e08c20c43a43 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -617,10 +617,9 @@ Coder<?> getDefaultCoder(Class<?> clazz) {
     }
 
     if (getFallbackCoderProvider() != null) {
-      @SuppressWarnings("unchecked")
+      @SuppressWarnings({"unchecked", "rawtypes"})
       Optional<Coder<?>> coder =
-          (Optional<Coder<?>>) getFallbackCoderProvider()
-          .getCoder(TypeToken.of(clazz));
+          (Optional) getFallbackCoderProvider().getCoder(TypeToken.of(clazz));
       if (coder.isPresent()) {
         return coder.get();
       }

From 804d75a6ccf694ae9d6eb180826dd0601be38584 Mon Sep 17 00:00:00 2001
From: gildea <gildea@google.com>
Date: Thu, 9 Apr 2015 12:20:00 -0700
Subject: [PATCH 0418/1541] Fix some Javadoc typos. ----Release Notes---- []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=90735059

---
 .../dataflow/sdk/transforms/join/CoGbkResult.java  | 14 +++++++++++---
 .../sdk/transforms/join/CoGbkResultSchema.java     |  4 ++--
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
index f265145444b13..7a66ba83a8e68 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
@@ -117,7 +117,7 @@ public CoGbkResult(
                + "reiteration (which may be slow) is required.");
       final Reiterator<RawUnionValue> tail = (Reiterator<RawUnionValue>) taggedIter;
       // This is a trinary-state array recording whether a given tag is present in the tail. The
-      // inital value is null (unknown) for all tags, and the first iteration through the entire
+      // initial value is null (unknown) for all tags, and the first iteration through the entire
       // list will set these values to true or false to avoid needlessly iterating if filtering
       // against a given tag would not match anything.
       final Boolean[] containsTag = new Boolean[schema.size()];
@@ -164,6 +164,9 @@ public String toString() {
    * Returns the values from the table represented by the given
    * {@code TupleTag<V>} as an {@code Iterable<V>} (which may be empty if there
    * are no results).
+   * <p>
+   * If tag was not part of the original CoGroupByKey,
+   * throws an IllegalArgumentException.
    */
   public <V> Iterable<V> getAll(TupleTag<V> tag) {
     int index = schema.getIndex(tag);
@@ -179,6 +182,9 @@ public <V> Iterable<V> getAll(TupleTag<V> tag) {
   /**
    * If there is a singleton value for the given tag, returns it.
    * Otherwise, throws an IllegalArgumentException.
+   * <p>
+   * If tag was not part of the original CoGroupByKey,
+   * throws an IllegalArgumentException.
    */
   public <V> V getOnly(TupleTag<V> tag) {
     return innerGetOnly(tag, null, false);
@@ -187,7 +193,9 @@ public <V> V getOnly(TupleTag<V> tag) {
   /**
    * If there is a singleton value for the given tag, returns it.  If there is
    * no value for the given tag, returns the defaultValue.
-   * Otherwise, throws an IllegalArgumentException.
+   * <p>
+   * If tag was not part of the original CoGroupByKey,
+   * throws an IllegalArgumentException.
    */
   public <V> V getOnly(TupleTag<V> tag, V defaultValue) {
     return innerGetOnly(tag, defaultValue, true);
@@ -305,7 +313,7 @@ public void verifyDeterministic() throws NonDeterministicException {
   // Methods for testing purposes
 
   /**
-   * Returns a new CoGbkResult that contains just the given tag the given data.
+   * Returns a new CoGbkResult that contains just the given tag and given data.
    */
   public static <V> CoGbkResult of(TupleTag<V> tag, List<V> data) {
     return CoGbkResult.empty().and(tag, data);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
index 4e22fd36c3a10..c894437beea49 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
@@ -33,7 +33,7 @@
 
 /**
  * A schema for the results of a CoGroupByKey.  This maintains the full
- * set of TupleTags for the results of a CoGroupByKey, and facilitates mapping
+ * set of TupleTags for the results of a CoGroupByKey and facilitates mapping
  * between TupleTags and Union Tags (which are used as secondary keys in the
  * CoGroupByKey).
  */
@@ -87,7 +87,7 @@ public TupleTag<?> getTag(int index) {
   }
 
   /**
-   * Returns the number of columms for this schema.
+   * Returns the number of columns for this schema.
    */
   public int size() {
     return tupleTagList.getAll().size();

From 1f46717e8f6e061e3376263859e5878ee0c6a07f Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Thu, 9 Apr 2015 16:58:13 -0700
Subject: [PATCH 0419/1541] fix the typo in the Traffic example java docs.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90761527
---
 .../com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java  | 2 +-
 .../java/com/google/cloud/dataflow/examples/TrafficRoutes.java  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java
index 4747103995952..6526b460c934d 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java
@@ -72,7 +72,7 @@
  * By default, the example will run a separate pipeline to inject the data from the default
  * {@literal --inputFile} to the Pub/Sub {@literal --inputTopic}. It will make it available for
  * the streaming pipeline to process. You may override the default {@literal --inputFile} with the
- * file of your choosing. You may also set {@literal --inputTopic} to an empty string, which will
+ * file of your choosing. You may also set {@literal --inputFile} to an empty string, which will
  * disable the automatic Pub/Sub injection, and allow you to use separate tool to control the input
  * to this example. An example code, which publishes traffic sensor data to a Pub/Sub topic,
  * is provided in
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java
index 0038239a381f4..f7d39f705f8be 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java
@@ -77,7 +77,7 @@
  * By default, the example will run a separate pipeline to inject the data from the default
  * {@literal --inputFile} to the Pub/Sub {@literal --inputTopic}. It will make it available for
  * the streaming pipeline to process. You may override the default {@literal --inputFile} with the
- * file of your choosing. You may also set {@literal --inputTopic} to an empty string, which will
+ * file of your choosing. You may also set {@literal --inputFile} to an empty string, which will
  * disable the automatic Pub/Sub injection, and allow you to use separate tool to control the input
  * to this example. An example code, which publishes traffic sensor data to a Pub/Sub topic,
  * is provided in

From 120229b7feb0bf826859d845319c763587e770a7 Mon Sep 17 00:00:00 2001
From: amancuso <amancuso@google.com>
Date: Fri, 10 Apr 2015 12:19:56 -0700
Subject: [PATCH 0420/1541] Update Pub/Sub setup instructions. ----Release
 Notes---- [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=90829026

---
 examples/README.md | 151 +++++----------------------------------------
 1 file changed, 17 insertions(+), 134 deletions(-)

diff --git a/examples/README.md b/examples/README.md
index de3852cef0994..5486167b6c19d 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -78,14 +78,14 @@ Besides WordCount, the following examples are included:
   an hour within each month. Demonstrates using Cloud Dataflow
   <code>Windowing</code> to perform time-based aggregations of data.
   </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingMaxLaneFlow.java">TrafficStreamingMaxLaneFlow</a>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java">TrafficMaxLaneFlow</a>
   &mdash;A streaming Cloud Dataflow example using BigQuery output in the
   <code>traffic sensor</code> domain. Demonstrates the Cloud Dataflow streaming
   runner, sliding windows, Cloud Pub/Sub topic ingestion, the use of the
   <code>AvroCoder</code> to encode a custom class, and custom
   <code>Combine</code> transforms.
   </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingRoutes.java">TrafficStreamingRoutes</a>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java">TrafficRoutes</a>
   &mdash;A streaming Cloud Dataflow example using BigQuery output in the
   <code>traffic sensor</code> domain. Demonstrates the Cloud Dataflow streaming
   runner, <code>GroupByKey</code>, keyed state, sliding windows, and Cloud
@@ -159,144 +159,27 @@ example, input file pattern of `c:\*.txt` should be entered as `c:\\*.txt`.
 running some of our streaming examples. Please stay tuned for much easier
 instructions in the near future!</p>
 
-## How to Set Up the "Traffic Sensor" Streaming Pipeline Examples
+### Running the Streaming "Traffic" Streaming Examples###
 
-Two of the streaming Cloud Dataflow pipeline examples,
-`TrafficStreamingMaxLaneFlow` and `TrafficStreamingRoutes`, require the
-publication of *traffic sensor* data to a [Google Cloud Pub/Sub](https://cloud.google.com/pubsub/docs)
-topic. This publication is accomplished via a Python script, which reads from
-traffic-sensor data file(s) and publishes that data to a Cloud Pub/Sub topic.
-The following subsections explain how to set up and use the script to input
-streaming traffic sensor data to one of the example Cloud Dataflow streaming
-pipelines.
+The `TrafficMaxLaneFlow` and `TrafficRoutes` pipelines, when run in
+streaming mode (with the `--streaming=true` option), require the
+publication of *traffic sensor* data to a
+[Google Cloud Pub/Sub](https://cloud.google.com/pubsub/docs) topic.
+By default, they use a separate batch pipeline and a pre-defined Cloud Pub/Sub
+topic to publish previously gathered traffic sensor data that is "streamed" into
+the pipeline.
 
-### Set Up a Cloud Pub/Sub Topic
-
-An easy way to set up a Cloud Pub/Sub topic is via the web UI. Navigate to the
-*Try It!* section of the [Cloud Pub/Sub API explorer page](https://cloud.google.com/pubsub/reference/rest/v1beta2/projects/topics/create).
-Turn on **Authorize requests using OAuth2**, then click **Authorize**
-(accept the default selected scopes). Next, click inside the *Request body* box,
-select *name*, then type your topic name. The topic name should be of the
-format: `/projects/<project-name>/topics/<topic-name>`. Make sure that you
-create it in the same project for which Cloud Dataflow has been whitelisted.
-Click **Execute** to create the topic.
-
-### Set Up a BigQuery Dataset
-
-If necessary, create a BigQuery dataset to write your pipeline output to.
-You can do this with the [BigQuery UI](https://bigquery.cloud.google.com/) or
-the `bq` command-line tool, which is installed as part of the Google Cloud SDK:
-
-    bq mk <project-name>:<dataset-name>
-
-It's easiest to create the dataset in the same project for which Cloud Dataflow
-has been whitelisted. If you create the BigQuery table in a different project,
-you will need to give your pipeline project access to this BigQuery table
-(see the [BigTable Access Control](https://cloud.google.com/bigquery/access-control)
-for more information).
-
-### Set Up the "Traffic Sensor" Data Generator Script
-
-This Python script reads traffic sensor data from a file and publishes that data
-to Cloud Pub/Sub. The `traffic_pubsub_generator.py`, is located [here](https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher).
-You can either clone its repo (`git clone https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python.git`),
-or download a zip of the repo ([click here to download the zip](https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/archive/master.zip)).
-
-The script uses the Google APIs Client Library for Python. You can install this
-library via:
-
-    pip install --upgrade google-api-python-client
-
-It also uses the Python dateutil package, which you can install via:
-
-    pip install python-dateutil
-
-The script must authenticate to access Cloud Pub/Sub. See [this page](https://developers.google.com/accounts/docs/application-default-credentials)
-for more information about the `GoogleCredentials` library used by the script.
-As described in that doc, you can:
-
- 1. run the script locally by downloading a credentials file, then pointing an
- environment variable to that file, or
-
- 2. run the script on a Google Compute Engine instance from the same project. If
- you run the script on a Compute Engine instance, this instance must be created
- with *Cloud Platform Project Access* enabled. Click **Show advanced options**
- when creating the Compute Instance image to display and enable this setting.
-
-The script reads a pre-written traffic sensor data file. You can either download
-the complete (~2GB) file:
+The default traffic sensor data `--inputFile` is downloaded from
 
     curl -O \
-    http://storage.googleapis.com/aju-sd-traffic/unzipped/Freeways-5Minaa2010-01-01_to_2010-02-15.csv
-
-or, a smaller test file:
-
     http://storage.googleapis.com/aju-sd-traffic/unzipped/Freeways-5Minaa2010-01-01_to_2010-02-15_test2.csv
 
-These files contain real traffic sensor data from San Diego freeways. See
+This file contains real traffic sensor data from San Diego freeways. See
 <a href="http://storage.googleapis.com/aju-sd-traffic/freeway_detector_config/Freeways-Metadata-2010_01_01/copyright(san%20diego).txt">this file</a>
 for copyright information.
 
-
-### Start a TrafficStreaming Pipeline
-
-Before running the traffic generator script, start one of the traffic sensor
-streaming pipelines ([`TrafficStreamingRoutes.java`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingRoutes.java)
-or [`TrafficStreamingMaxLaneFlow.java`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficStreamingMaxLaneFlow.java)).
-
- * See the Dataflow SDK for general instructions on compiling and running the
- examples.
-
- * See the comments in the source files and the <i>Example Use Cases&mdash;Streaming
- Pipeline Example</i> section in [Google Cloud Dataflow Documentation&mdash;Designing
- Your Pipeline](https://cloud.google.com/dataflow/pipelines/designing-your-pipeline)
- for more information on what these pipelines do and the options they take. To
- recap:
-
-   * You use the `--inputTopic` pipeline option to specify your Cloud Pub/Sub
-   topic name:
-
-        `--inputTopic=/topics/<project-name>/<topic-name>`
-
-    Note that this format is different from the format you used when you
-    established your Cloud Pub/Sub topic name (and that you will pass to your
-    pipeline). The reason for the difference is that the script depends on an
-    earlier version of Pub/Sub, which uses a different topic name
-    syntax than the latest Cloud Pub/Sub version.
-
-   * You specify the BigQuery dataset and table that your pipeline will write
-   output to with the `--dataset` and `--table pipeline` options:
-
-        `--dataset=<dataset-name> --table=<table-name>`
-
-### Run the "Traffic Sensor" Data Generator Script
-
-After starting a traffic streaming pipeline, run the [`traffic_pubsub_generator.py`](https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher)
-script. You can run the script with or without the `--replay` option; the
-`--replay` flag will simulate pauses in the data stream that correspond to the
-pauses in the original data, which was sampled every 5 minutes. Note that each
-of the streaming traffic sensor pipelines makes different assumptions about the
-use of this flag, so read the comments to those pipelines before deciding
-whether to use this flag when you run the script.
-
-    python traffic_pubsub_generator.py \
-    --filename <your-datafile.csv> \
-    --topic projects/<project-name>/topics/<topic-name> --replay
-
-Note that instead of using the `--topic` flag, you can set your topic name as
-the default topic directly in the script:
-
-    TOPIC = 'projects/your-project/topics/your-topic'  # default; set to your
-    topic
-
-To restrict the generator to N lines, use the `num_lines` flag:
-
-    python traffic_pubsub_generator.py --filename <your-datafile.csv> \
-    --replay --num_lines 10
-
-To alter the data timestamps to start from script time, add the `--current`
-flag.
-
-For more information, see the script doc string by running:
-
-    python traffic_pubsub_generator.py -h
+**Note:** If you set `--streaming=false`, these traffic pipelines will run in batch mode,
+using the timestamps applied to the original dataset to process the data in
+a batch. For further information on how these pipelines operate, see
+<a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java">TrafficMaxLaneFlow</a>
+and <a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java">TrafficRoutes</a>.

From 9204a435451a706d6a109e305891ccf8e6dfbfd9 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 10 Apr 2015 15:48:49 -0700
Subject: [PATCH 0421/1541] Fix deserialization of empty streaming side inputs

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90847135
---
 .../cloud/dataflow/sdk/util/StateFetcher.java | 11 +++++---
 .../dataflow/sdk/util/StateFetcherTest.java   | 26 +++++++++++++++++++
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
index e15a3ed683548..5b5efdfce841e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
@@ -38,6 +38,7 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -228,10 +229,14 @@ public SideInputCacheEntry call() throws Exception {
 
         Windmill.GlobalData data = response.getGlobalData(0);
 
+        Iterable<WindowedValue<?>> rawData;
         if (data.getIsReady()) {
-          Iterable<WindowedValue<?>> rawData =
-              view.getCoderInternal().decode(
-                  data.getData().newInput(), Coder.Context.OUTER);
+          if (data.getData().size() > 0) {
+            rawData = view.getCoderInternal().decode(
+                data.getData().newInput(), Coder.Context.OUTER);
+          } else {
+            rawData = Collections.emptyList();
+          }
 
           return new SideInputCacheEntry(
               view.fromIterableInternal(rawData), data.getData().size());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
index 3d0d99d4d329d..c7449cb6f79d4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
@@ -29,10 +29,12 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.ListCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.View;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
@@ -266,6 +268,30 @@ public void testFetchGlobalDataCacheOverflow() throws Exception {
         buildGlobalDataRequest(tag1, ByteString.EMPTY)));
   }
 
+  @Test
+  public void testEmptyFetchGlobalData() throws Exception {
+    StateFetcher fetcher = new StateFetcher(server);
+
+    ByteString encodedIterable = ByteString.EMPTY;
+
+    PCollectionView<Long> view =
+        TestPipeline.create().apply(Create.<Long>of())
+        .setCoder(VarLongCoder.of()).apply(Sum.longsGlobally().asSingletonView());
+
+    String tag = view.getTagInternal().getId();
+
+    // Test three calls in a row. First, data is not ready, then data is ready,
+    // then the data is already cached.
+    when(server.getData(any(Windmill.GetDataRequest.class))).thenReturn(
+        buildGlobalDataResponse(tag, ByteString.EMPTY, true, encodedIterable));
+
+    assertEquals(0L,
+        (long) fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, SideInputState.UNKNOWN));
+
+    verify(server).getData(buildGlobalDataRequest(tag, ByteString.EMPTY));
+    verifyNoMoreInteractions(server);
+  }
+
   private Windmill.GetDataResponse buildGlobalDataResponse(
       String tag, ByteString version, boolean isReady, ByteString data) {
     Windmill.GlobalData.Builder builder = Windmill.GlobalData.newBuilder()

From af805e69918c85d7dc5f2ee8bf3159368d14da1c Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Fri, 10 Apr 2015 16:51:01 -0700
Subject: [PATCH 0422/1541] fix the typos in the Traffic example java docs.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90852578
---
 .../google/cloud/dataflow/examples/TrafficMaxLaneFlow.java    | 4 ++--
 .../com/google/cloud/dataflow/examples/TrafficRoutes.java     | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java
index 6526b460c934d..339c47d9e07aa 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java
@@ -70,7 +70,7 @@
  *
  * <p> In streaming mode, the pipeline reads the data from a Pub/Sub topic.
  * By default, the example will run a separate pipeline to inject the data from the default
- * {@literal --inputFile} to the Pub/Sub {@literal --inputTopic}. It will make it available for
+ * {@literal --inputFile} to the Pub/Sub {@literal --pubsubTopic}. It will make it available for
  * the streaming pipeline to process. You may override the default {@literal --inputFile} with the
  * file of your choosing. You may also set {@literal --inputFile} to an empty string, which will
  * disable the automatic Pub/Sub injection, and allow you to use separate tool to control the input
@@ -80,7 +80,7 @@
  *
  * <p> The example is configured to use the default Pub/Sub topic and the default BigQuery table
  * from the example common package (there is no defaults for a general Dataflow pipeline).
- * You can override them by using the {@literal --inputTopic}, {@literal --bigQueryDataset}, and
+ * You can override them by using the {@literal --pubsubTopic}, {@literal --bigQueryDataset}, and
  * {@literal --bigQueryTable} options. If the Pub/Sub topic or the BigQuery table do not exist,
  * the example will try to create them.
  *
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java
index f7d39f705f8be..fb0e59a19c581 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java
@@ -75,7 +75,7 @@
  *
  * <p> In streaming mode, the pipeline reads the data from a Pub/Sub topic.
  * By default, the example will run a separate pipeline to inject the data from the default
- * {@literal --inputFile} to the Pub/Sub {@literal --inputTopic}. It will make it available for
+ * {@literal --inputFile} to the Pub/Sub {@literal --pubsubTopic}. It will make it available for
  * the streaming pipeline to process. You may override the default {@literal --inputFile} with the
  * file of your choosing. You may also set {@literal --inputFile} to an empty string, which will
  * disable the automatic Pub/Sub injection, and allow you to use separate tool to control the input
@@ -85,7 +85,7 @@
  *
  * <p> The example is configured to use the default Pub/Sub topic and the default BigQuery table
  * from the example common package (there is no defaults for a general Dataflow pipeline).
- * You can override them by using the {@literal --inputTopic}, {@literal --bigQueryDataset}, and
+ * You can override them by using the {@literal --pubsubTopic}, {@literal --bigQueryDataset}, and
  * {@literal --bigQueryTable} options. If the Pub/Sub topic or the BigQuery table do not exist,
  * the example will try to create them.
  *

From 621e842184397728213320e8bb21fbcc5c47bae6 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Fri, 10 Apr 2015 18:43:59 -0700
Subject: [PATCH 0423/1541] Update "Traffic" Streaming Examples instructions.
 ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=90859897

---
 examples/README.md | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/examples/README.md b/examples/README.md
index 5486167b6c19d..6cd7013198f99 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -159,15 +159,24 @@ example, input file pattern of `c:\*.txt` should be entered as `c:\\*.txt`.
 running some of our streaming examples. Please stay tuned for much easier
 instructions in the near future!</p>
 
-### Running the Streaming "Traffic" Streaming Examples###
+### Running the "Traffic" Streaming Examples###
 
 The `TrafficMaxLaneFlow` and `TrafficRoutes` pipelines, when run in
 streaming mode (with the `--streaming=true` option), require the
 publication of *traffic sensor* data to a
 [Google Cloud Pub/Sub](https://cloud.google.com/pubsub/docs) topic.
-By default, they use a separate batch pipeline and a pre-defined Cloud Pub/Sub
-topic to publish previously gathered traffic sensor data that is "streamed" into
-the pipeline.
+You can run the example with default settings using the following command:
+
+    mvn exec:java -pl examples \
+    -Dexec.mainClass=com.google.cloud.dataflow.examples.TrafficMaxLaneFlow \
+    -Dexec.args="--project=<YOUR CLOUD PLATFORM PROJECT NAME> \
+    --stagingLocation=<YOUR CLOUD STORAGE LOCATION> \
+    --runner=DataflowPipelineRunner \
+    --streaming=true"
+
+By default, they use a separate batch pipeline to publish previously gathered
+traffic sensor data to the Cloud Pub/Sub topic, which is used as an input source
+for the streaming pipeline.
 
 The default traffic sensor data `--inputFile` is downloaded from
 
@@ -178,6 +187,15 @@ This file contains real traffic sensor data from San Diego freeways. See
 <a href="http://storage.googleapis.com/aju-sd-traffic/freeway_detector_config/Freeways-Metadata-2010_01_01/copyright(san%20diego).txt">this file</a>
 for copyright information.
 
+You may override the default '--inputFile' with an alternative complete
+data set (~2GB). It is provided in the Google Cloud Storage bucket
+'gs://dataflow-samples/traffic_sensor/Freeways-5Minaa2010-01-01_to_2010-02-15.csv'.
+
+You may also set '--inputFile' to an empty string, which will disable
+the automatic Pub/Sub injection, and allow you to use separate tool to control
+the input to this example. An example code, which publishes traffic sensor data
+to a Pub/Sub topic, is provided in [`traffic_pubsub_generator.py`](https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher)
+
 **Note:** If you set `--streaming=false`, these traffic pipelines will run in batch mode,
 using the timestamps applied to the original dataset to process the data in
 a batch. For further information on how these pipelines operate, see

From 035742ec080c4431d087808a3ebdd0b1d23fffed Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Fri, 10 Apr 2015 18:54:51 -0700
Subject: [PATCH 0424/1541] Allows empty value for String, String Array, and
 Collection in PipelineOptions, added the validation for other types.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=90860391
---
 .../sdk/options/PipelineOptionsFactory.java   | 18 ++++-
 .../options/PipelineOptionsFactoryTest.java   | 78 +++++++++++++++----
 2 files changed, 77 insertions(+), 19 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 2a72d670bedc3..9c59aef4706e9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -1168,8 +1168,6 @@ private static ListMultimap<String, String> parseCommandLine(
         // Make sure that '=' isn't the first character after '--' or the last character
         Preconditions.checkArgument(index != 2,
             "Argument '%s' starts with '--=', empty argument name not allowed", arg);
-        Preconditions.checkArgument(index != arg.length() - 1,
-            "Argument '%s' ends with '=', empty argument value not allowed", arg);
         if (index > 0) {
           builder.put(arg.substring(2, index), arg.substring(index + 1, arg.length()));
         } else {
@@ -1218,6 +1216,8 @@ private static <T extends PipelineOptions> Map<String, Object> parseObjects(
             "Class %s missing a property named '%s'", klass, entry.getKey());
 
         Method method = propertyNamesToGetters.get(entry.getKey());
+        // Only allow empty argument values for String, String Array, and Collection.
+        Class<?> returnType = method.getReturnType();
         JavaType type = MAPPER.getTypeFactory().constructType(method.getGenericReturnType());
         if ("runner".equals(entry.getKey())) {
           String runner = Iterables.getOnlyElement(entry.getValue());
@@ -1225,8 +1225,7 @@ private static <T extends PipelineOptions> Map<String, Object> parseObjects(
               "Unknown 'runner' specified '%s', supported pipeline runners %s",
               runner, Sets.newTreeSet(SUPPORTED_PIPELINE_RUNNERS.keySet()));
           convertedOptions.put("runner", SUPPORTED_PIPELINE_RUNNERS.get(runner));
-        } else if (method.getReturnType().isArray()
-            || Collection.class.isAssignableFrom(method.getReturnType())) {
+        } else if (returnType.isArray() || Collection.class.isAssignableFrom(returnType)) {
           // Split any strings with ","
           List<String> values = FluentIterable.from(entry.getValue())
               .transformAndConcat(new Function<String, Iterable<String>>() {
@@ -1235,9 +1234,20 @@ public Iterable<String> apply(String input) {
                   return Arrays.asList(input.split(","));
                 }
           }).toList();
+
+          if (returnType.isArray() && !returnType.getComponentType().equals(String.class)) {
+            for (String value : values) {
+              Preconditions.checkArgument(!value.isEmpty(),
+                  "Empty argument value is only allowed for String, String Array, and Collection,"
+                  + " but received: " + returnType);
+            }
+          }
           convertedOptions.put(entry.getKey(), MAPPER.convertValue(values, type));
         } else {
           String value = Iterables.getOnlyElement(entry.getValue());
+          Preconditions.checkArgument(returnType.equals(String.class) || !value.isEmpty(),
+              "Empty argument value is only allowed for String, String Array, and Collection,"
+               + " but received: " + returnType);
           convertedOptions.put(entry.getKey(), MAPPER.convertValue(value, type));
         }
       } catch (IllegalArgumentException e) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index f439c55ed85ab..bfab61390c820 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -311,6 +311,16 @@ public void testBooleanShorthandArgument() {
     assertTrue(options.getBoolean());
   }
 
+  @Test
+  public void testEmptyValueNotAllowed() {
+    String[] args = new String[] {
+        "--byte="};
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage(
+        "Empty argument value is only allowed for String, String Array, and Collection");
+    PipelineOptionsFactory.fromArgs(args).as(Primitives.class);
+  }
+
   /** A test interface containing all supported objects. */
   public static interface Objects extends PipelineOptions {
     Boolean getBoolean();
@@ -331,6 +341,8 @@ public static interface Objects extends PipelineOptions {
     void setDouble(Double value);
     String getString();
     void setString(String value);
+    String getEmptyString();
+    void setEmptyString(String value);
     Class<?> getClassValue();
     void setClassValue(Class<?> value);
   }
@@ -347,6 +359,7 @@ public void testObjects() {
         "--float=55.5",
         "--double=12.3",
         "--string=stringValue",
+        "--emptyString=",
         "--classValue=" + PipelineOptionsFactoryTest.class.getName()};
 
     Objects options = PipelineOptionsFactory.fromArgs(args).as(Objects.class);
@@ -359,6 +372,7 @@ public void testObjects() {
     assertEquals(Float.valueOf(55.5f), options.getFloat(), 0.0f);
     assertEquals(Double.valueOf(12.3), options.getDouble(), 0.0);
     assertEquals("stringValue", options.getString());
+    assertTrue(options.getEmptyString().isEmpty());
     assertEquals(PipelineOptionsFactoryTest.class, options.getClassValue());
   }
 
@@ -439,6 +453,55 @@ public void testArrays() {
         options.getClassValue());
   }
 
+  @Test
+  @SuppressWarnings("rawtypes")
+  public void testEmptyInStringArrays() {
+    String[] args = new String[] {
+        "--string=",
+        "--string=",
+        "--string="};
+
+    Arrays options = PipelineOptionsFactory.fromArgs(args).as(Arrays.class);
+    assertArrayEquals(new String[] {"", "", ""},
+        options.getString());
+  }
+
+  @Test
+  @SuppressWarnings("rawtypes")
+  public void testEmptyInStringArraysWithCommaList() {
+    String[] args = new String[] {
+        "--string=a,,b"};
+
+    Arrays options = PipelineOptionsFactory.fromArgs(args).as(Arrays.class);
+    assertArrayEquals(new String[] {"a", "", "b"},
+        options.getString());
+  }
+
+  @Test
+  public void testEmptyInNonStringArrays() {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage(
+        "Empty argument value is only allowed for String, String Array, and Collection");
+
+    String[] args = new String[] {
+        "--boolean=true",
+        "--boolean=",
+        "--boolean=false"};
+
+    PipelineOptionsFactory.fromArgs(args).as(Arrays.class);
+  }
+
+  @Test
+  public void testEmptyInNonStringArraysWithCommaList() {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage(
+        "Empty argument value is only allowed for String, String Array, and Collection");
+
+    String[] args = new String[] {
+        "--int=1,,9"};
+    PipelineOptionsFactory.fromArgs(args).as(Arrays.class);
+  }
+
   @Test
   public void testOutOfOrderArrays() {
     String[] args = new String[] {
@@ -549,21 +612,6 @@ public void testUsingArgumentWithUnknownPropertyIsIgnoredWithoutStrictParsing()
     PipelineOptionsFactory.fromArgs(args).withoutStrictParsing().create();
   }
 
-  @Test
-  public void testUsingArgumentWithoutValueIsNotAllowed() {
-    String[] args = new String[] {"--diskSizeGb="};
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("Argument '--diskSizeGb=' ends with '='");
-    PipelineOptionsFactory.fromArgs(args).create();
-  }
-
-  @Test
-  public void testUsingArgumentWithoutValueIsIgnoredWithoutStrictParsing() {
-    String[] args = new String[] {"--diskSizeGb="};
-    expectedLogs.expectWarn("Strict parsing is disabled, ignoring option");
-    PipelineOptionsFactory.fromArgs(args).withoutStrictParsing().create();
-  }
-
   @Test
   public void testUsingArgumentStartingWithIllegalCharacterIsNotAllowed() {
     String[] args = new String[] {" --diskSizeGb=100"};

From 80333bdac15b62c84ac9075217c748ac78b3cc3d Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 13 Apr 2015 13:58:41 -0700
Subject: [PATCH 0425/1541] Avoid writing empty data to windmill tag list state

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91021974
---
 .../java/com/google/cloud/dataflow/sdk/util/StateFetcher.java | 4 +++-
 .../dataflow/sdk/util/StreamingModeExecutionContext.java      | 4 +++-
 .../sdk/runners/worker/StreamingDataflowWorkerTest.java       | 4 ++--
 .../com/google/cloud/dataflow/sdk/util/StateFetcherTest.java  | 4 ++--
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
index 5b5efdfce841e..9d650360e7965 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
@@ -175,7 +175,9 @@ public <T> List<TimestampedValue<T>> fetchList(
     List<TimestampedValue<T>> result = new ArrayList<>();
     for (Windmill.Value value : tagList.getValuesList()) {
       result.add(TimestampedValue.of(
-          tag.getCoder().decode(value.getData().newInput(), Coder.Context.OUTER),
+          // Drop the first byte of the data; it's the zero byte we prepended to avoid writing
+          // empty data.
+          tag.getCoder().decode(value.getData().substring(1).newInput(), Coder.Context.OUTER),
           new Instant(TimeUnit.MICROSECONDS.toMillis(value.getTimestamp()))));
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index 9fdc9d7fad94d..6ebd4663a86de 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -344,9 +344,11 @@ public void flushState() {
             .setTag(serializeTag(tag));
         for (TimestampedValue<ByteString> item : entry.getValue().encodedValues) {
           long timestampMicros = TimeUnit.MILLISECONDS.toMicros(item.getTimestamp().getMillis());
+          // Windmill does not support empty data for tag list state; prepend a zero byte.
+          byte[] zero = {0x0};
           listBuilder.addValues(
               Windmill.Value.newBuilder()
-              .setData(item.getValue())
+              .setData(ByteString.copyFrom(zero).concat(item.getValue()))
               .setTimestamp(timestampMicros));
         }
         outputBuilder.addListUpdates(listBuilder.build());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 48e46993a0941..635656955dca6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -883,7 +883,7 @@ public void processElement(ProcessContext c) {
         "  tag: \"12:MergeWindowsbuffer:gAAAAAAAAAA=\"" +
         "    values {" +
         "    timestamp: 0" +
-        "    data: \"data0\"" +
+        "    data: \"\000data0\"" +
         "  }" +
         "}"),
         stripCounters(result.get(0L)));
@@ -911,7 +911,7 @@ public void processElement(ProcessContext c) {
         "      tag: \"12:MergeWindowsbuffer:gAAAAAAAAAA=\"" +
         "      values {" +
         "        timestamp: 0" +
-        "        data: \"data0\"" +
+        "        data: \"\000data0\"" +
         "      }" +
         "    }" +
         "  }" +
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
index c7449cb6f79d4..e592f9f352398 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
@@ -141,11 +141,11 @@ public void testFetchList() throws Exception {
                     .setTag(ByteString.copyFromUtf8("p:tag1"))
                     .addValues(Windmill.Value.newBuilder()
                         .setTimestamp(0)
-                        .setData(ByteString.copyFromUtf8("data1"))
+                        .setData(ByteString.copyFromUtf8("\000data1"))
                         .build())
                     .addValues(Windmill.Value.newBuilder()
                         .setTimestamp(1000)
-                        .setData(ByteString.copyFromUtf8("data2"))
+                        .setData(ByteString.copyFromUtf8("\000data2"))
                         .build())
                     .build())
                 .build())

From bed354adbb14131612d1382ee55c0bdd8d1345db Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Mon, 13 Apr 2015 18:55:20 -0700
Subject: [PATCH 0426/1541] Adds exponential backoff to Datastore write
 retries, as some Datastore errors are due to short periods of unavailability
 or server-side deadlines.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91049142
---
 .../cloud/dataflow/sdk/io/DatastoreIO.java    | 33 ++++++++++++-------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index bb0ff33f4027d..6135c10102f69 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -25,6 +25,9 @@
 import static com.google.api.services.datastore.client.DatastoreHelper.makeValue;
 
 import com.google.api.client.auth.oauth2.Credential;
+import com.google.api.client.util.BackOff;
+import com.google.api.client.util.BackOffUtils;
+import com.google.api.client.util.Sleeper;
 import com.google.api.services.datastore.DatastoreV1;
 import com.google.api.services.datastore.DatastoreV1.CommitRequest;
 import com.google.api.services.datastore.DatastoreV1.Entity;
@@ -47,6 +50,7 @@
 import com.google.cloud.dataflow.sdk.options.GcpOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Write;
+import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.RetryHttpRequestInitializer;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -530,7 +534,12 @@ static class DatastoreWriter extends Writer<Entity, DatastoreWriteResult> {
      * Since a bundle is written in batches, we should retry the commit of a batch in order to
      * prevent transient errors from causing the bundle to fail.
      */
-    private static final int DATASTORE_MAX_RETRIES = 5;
+    private static final int MAX_RETRIES = 5;
+
+    /**
+     * Initial backoff time for exponential backoff for retry attempts.
+     */
+    private static final int INITIAL_BACKOFF_MILLIS = 5000;
 
     /**
      * Returns true if a Datastore key is complete.  A key is complete if its last element
@@ -593,18 +602,19 @@ public DatastoreWriteOperation getWriteOperation() {
     /**
      * Writes a batch of entities to the Datastore.
      *
-     * <p>If a commit fails, it will be retried (up to {@link DatastoreWriter#DATASTORE_MAX_RETRIES}
+     * <p>If a commit fails, it will be retried (up to {@link DatastoreWriter#MAX_RETRIES}
      * times).  All entities in the batch will be committed again, even if the commit was partially
      * successful. If the retry limit is exceeded, the last exception from the Datastore will be
      * thrown.
      *
-     * @throws DatastoreException if the commit fails.
+     * @throws DatastoreException if the commit fails or IOException or InterruptedException if
+     * backing off between retries fails.
      */
-    private void flushBatch() throws DatastoreException {
-      int retryCount = 0;
+    private void flushBatch() throws DatastoreException, IOException, InterruptedException {
       LOG.debug("Writing batch of {} entities", entities.size());
+      Sleeper sleeper = Sleeper.DEFAULT;
+      BackOff backoff = new AttemptBoundedExponentialBackOff(MAX_RETRIES, INITIAL_BACKOFF_MILLIS);
 
-      retryCount = 0;
       while (true) {
         // Batch upsert entities.
         try {
@@ -613,10 +623,7 @@ private void flushBatch() throws DatastoreException {
           commitRequest.setMode(CommitRequest.Mode.NON_TRANSACTIONAL);
           datastore.commit(commitRequest.build());
 
-          totalWritten += entities.size();
-          entities.clear();
           // Break if the commit threw no exception.
-          LOG.debug("Successfully wrote {} entities", entities.size());
           break;
 
         } catch (DatastoreException exception) {
@@ -624,13 +631,15 @@ private void flushBatch() throws DatastoreException {
           // will be propagated upon the last retry.
           LOG.error("Error writing to the Datastore ({}): {}", exception.getCode(),
               exception.getMessage());
-          retryCount += 1;
-          if (retryCount >= DATASTORE_MAX_RETRIES) {
+          if (!BackOffUtils.next(sleeper, backoff)) {
+            LOG.error("Aborting after {} retries.", MAX_RETRIES);
             throw exception;
           }
-          LOG.error("Committing entities: attempt {} of {}", retryCount + 1, DATASTORE_MAX_RETRIES);
         }
       }
+      totalWritten += entities.size();
+      LOG.debug("Successfully wrote {} entities", entities.size());
+      entities.clear();
     }
   }
 

From 8316f3eeb3fb34aa9968f0cb3640aa7edf943a58 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 13 Apr 2015 18:58:06 -0700
Subject: [PATCH 0427/1541] Make Combine with hot key fanout work properly with
 small input bundles

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91049251
---
 .../dataflow/sdk/transforms/Combine.java      | 34 +++++++++++--------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 987947a5e6fd5..d511a315fed84 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -47,6 +47,7 @@
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
+import java.util.concurrent.ThreadLocalRandom;
 
 /**
  * {@code PTransform}s for combining {@code PCollection} elements
@@ -1532,20 +1533,25 @@ public Coder<VO> getDefaultOutputCoder(
       final TupleTag<KV<K, VI>> cold = new TupleTag<>();
       PCollectionTuple split = input.apply(
           ParDo.of(new DoFn<KV<K, VI>, KV<K, VI>>(){
-                     int counter = 0;
-                     @Override
-                     public void processElement(ProcessContext c) {
-                       KV<K, VI> kv = c.element();
-                       int spread = hotKeyFanout.apply(kv.getKey());
-                       if (spread <= 1) {
-                         c.output(kv);
-                       } else {
-                         int nonce = counter++ % spread;
-                         c.sideOutput(hot, KV.of(KV.of(kv.getKey(), nonce), kv.getValue()));
-                       }
-                     }
-                   })
-               .withOutputTags(cold, TupleTagList.of(hot)));
+                transient int counter;
+                @Override
+                public void startBundle(Context c) {
+                  counter = ThreadLocalRandom.current().nextInt();
+                }
+
+                @Override
+                public void processElement(ProcessContext c) {
+                  KV<K, VI> kv = c.element();
+                  int spread = hotKeyFanout.apply(kv.getKey());
+                  if (spread <= 1) {
+                    c.output(kv);
+                  } else {
+                    int nonce = counter++ % spread;
+                    c.sideOutput(hot, KV.of(KV.of(kv.getKey(), nonce), kv.getValue()));
+                  }
+                }
+              })
+          .withOutputTags(cold, TupleTagList.of(hot)));
 
       // Combine the hot and cold keys separately.
       PCollection<KV<K, VO>> combinedHot = split

From b885f615abb10fe9565985987b8121dc4a858f8a Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Mon, 13 Apr 2015 19:43:27 -0700
Subject: [PATCH 0428/1541] Fix AutoComplete example, and uses
 DataflowExampleUtils to set up and tear down the example pipeline.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91051662
---
 .../cloud/dataflow/examples/AutoComplete.java | 132 +++++++++++-------
 .../examples/common/DataflowExampleUtils.java |  43 +++---
 2 files changed, 110 insertions(+), 65 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
index 9dac23db39cba..17fd4fd6215a3 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
@@ -17,24 +17,28 @@
 package com.google.cloud.dataflow.examples;
 
 import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
 import com.google.api.services.datastore.DatastoreV1.Entity;
 import com.google.api.services.datastore.DatastoreV1.Key;
 import com.google.api.services.datastore.DatastoreV1.Value;
 import com.google.api.services.datastore.client.DatastoreHelper;
+import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
+import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.DatastoreIO;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.Default;
 import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Count;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.Filter;
@@ -53,9 +57,11 @@
 import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
+import com.google.common.base.Preconditions;
 
 import org.joda.time.Duration;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.regex.Matcher;
@@ -72,17 +78,16 @@
  * specify pipeline configuration:
  *   --project=<PROJECT ID>
  *   --stagingLocation=gs://<STAGING DIRECTORY>
- *   --runner=[Blocking]DataflowPipelineRunner
+ *   --runner=DataflowPipelineRunner
  *   --inputFile=gs://path/to/input*.txt
- *   [--outputDataset=<DATASTORE DATASET ID>]
  *
  * <p> To execute this pipeline using the Dataflow service in streaming mode,
  * specify pipeline configuration:
  *   --project=<PROJECT ID>
  *   --stagingLocation=gs://<STAGING DIRECTORY>
  *   --runner=DataflowPipelineRunner
- *   --inputTopic=/topics/someproject/sometopic
- *   [--outputDataset=<DATASTORE DATASET ID>]
+ *   --inputFile=gs://path/to/input*.txt
+ *   --streaming
  *
  * <p> This will update the datastore every 10 seconds based on the last
  * 30 minutes of data received.
@@ -199,6 +204,7 @@ public ComputeTopRecursive(int candidatesPerPrefix, int minPrefix) {
     private class KeySizePartitionFn implements PartitionFn<KV<String, List<CompletionCandidate>>> {
       private static final long serialVersionUID = 0;
 
+      @Override
       public int partitionFor(KV<String, List<CompletionCandidate>> elem, int numPartitions) {
         return elem.getKey().length() > minPrefix ? 0 : 1;
       }
@@ -208,6 +214,7 @@ private static class FlattenTops
         extends DoFn<KV<String, List<CompletionCandidate>>, CompletionCandidate> {
       private static final long serialVersionUID = 0;
 
+      @Override
       public void processElement(ProcessContext c) {
         for (CompletionCandidate cc : c.element().getValue()) {
           c.output(cc);
@@ -215,6 +222,7 @@ public void processElement(ProcessContext c) {
       }
     }
 
+    @Override
     public PCollectionList<KV<String, List<CompletionCandidate>>> apply(
           PCollection<CompletionCandidate> input) {
         if (minPrefix > 10) {
@@ -237,6 +245,7 @@ public PCollectionList<KV<String, List<CompletionCandidate>>> apply(
             .and(input.apply(Filter.by(new SerializableFunction<CompletionCandidate, Boolean>() {
                     private static final long serialVersionUID = 0;
 
+                    @Override
                     public Boolean apply(CompletionCandidate c) {
                       return c.getValue().length() == minPrefix;
                     }
@@ -341,6 +350,7 @@ public String toString() {
   static class ExtractHashtags extends DoFn<String, String> {
     private static final long serialVersionUID = 0;
 
+    @Override
     public void processElement(ProcessContext c) {
       Matcher m = Pattern.compile("#\\S+").matcher(c.element());
       while (m.find()) {
@@ -352,6 +362,7 @@ public void processElement(ProcessContext c) {
   static class FormatForBigquery extends DoFn<KV<String, List<CompletionCandidate>>, TableRow> {
     private static final long serialVersionUID = 0;
 
+    @Override
     public void processElement(ProcessContext c) {
       List<TableRow> completions = new ArrayList<>();
       for (CompletionCandidate cc : c.element().getValue()) {
@@ -364,6 +375,20 @@ public void processElement(ProcessContext c) {
         .set("tags", completions);
       c.output(row);
     }
+
+    /**
+     * Defines the BigQuery schema used for the output.
+     */
+    static TableSchema getSchema() {
+      List<TableFieldSchema> tagFields = new ArrayList<>();
+      tagFields.add(new TableFieldSchema().setName("count").setType("INTEGER"));
+      tagFields.add(new TableFieldSchema().setName("tag").setType("STRING"));
+      List<TableFieldSchema> fields = new ArrayList<>();
+      fields.add(new TableFieldSchema().setName("prefix").setType("STRING"));
+      fields.add(new TableFieldSchema()
+          .setName("tags").setType("RECORD").setMode("REPEATED").setFields(tagFields));
+      return new TableSchema().setFields(fields);
+    }
   }
 
   /**
@@ -379,11 +404,10 @@ public FormatForDatastore(String kind) {
       this.kind = kind;
     }
 
+    @Override
     public void processElement(ProcessContext c) {
       Entity.Builder entityBuilder = Entity.newBuilder();
-      // Create entities with same ancestor Key.???
-      Key ancestorKey = DatastoreHelper.makeKey(kind, "root").build();
-      Key key = DatastoreHelper.makeKey(ancestorKey, c.element().getKey()).build();
+      Key key = DatastoreHelper.makeKey(kind, c.element().getKey()).build();
 
       entityBuilder.setKey(key);
       List<Value> candidates = new ArrayList<>();
@@ -393,7 +417,7 @@ public void processElement(ProcessContext c) {
             DatastoreHelper.makeProperty("tag", DatastoreHelper.makeValue(tag.value)));
         tagEntity.addProperty(
             DatastoreHelper.makeProperty("count", DatastoreHelper.makeValue(tag.count)));
-        candidates.add(DatastoreHelper.makeValue(tagEntity).build());
+        candidates.add(DatastoreHelper.makeValue(tagEntity).setIndexed(false).build());
       }
       entityBuilder.addProperty(
           DatastoreHelper.makeProperty("candidates", DatastoreHelper.makeValue(candidates)));
@@ -406,50 +430,58 @@ public void processElement(ProcessContext c) {
    *
    * <p> Inherits standard Dataflow configuration options.
    */
-  private static interface Options extends PipelineOptions {
+  private static interface Options extends ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
     @Description("Input text file")
     String getInputFile();
     void setInputFile(String value);
 
-    @Description("Input Pubsub topic")
-    String getInputTopic();
-    void setInputTopic(String value);
-
     @Description("Whether to use the recursive algorithm")
     @Default.Boolean(true)
     Boolean getRecursive();
     void setRecursive(Boolean value);
 
-    @Description("BigQuery table to write to, specified as "
-                 + "<project_id>:<dataset_id>.<table_id>. The dataset must already exist.")
-    String getOutputBigqueryTable();
-    void setOutputBigqueryTable(String value);
-
     @Description("Dataset entity kind")
     @Default.String("autocomplete-demo")
     String getKind();
     void setKind(String value);
 
-    @Description("Dataset ID to write to in datastore")
-    String getOutputDataset();
-    void setOutputDataset(String value);
+    @Description("Whether output to BigQuery")
+    @Default.Boolean(true)
+    Boolean getOutputToBigQuery();
+    void setOutputToBigQuery(Boolean value);
+
+    @Description("Whether output to Datastoree")
+    @Default.Boolean(false)
+    Boolean getOutputToDatastore();
+    void setOutputToDatastore(Boolean value);
   }
 
-  public static void main(String[] args) {
+  public static void main(String[] args) throws IOException {
     Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
 
+    if (options.isStreaming()) {
+      // In order to cancel the pipelines automatically,
+      // {@literal DataflowPipelineRunner} is forced to be used.
+      options.setRunner(DataflowPipelineRunner.class);
+    }
+
+    options.setBigQuerySchema(FormatForBigquery.getSchema());
+    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
+
     // We support running the same pipeline in either
     // batch or windowed streaming mode.
     PTransform<? super PBegin, PCollection<String>> readSource;
     WindowFn<Object, ?> windowFn;
-    if (options.getInputFile() != null) {
+    if (options.isStreaming()) {
+      Preconditions.checkArgument(
+          !options.getOutputToDatastore(), "DatastoreIO is not supported in streaming.");
+      dataflowUtils.setupPubsubTopic();
+
+      readSource = PubsubIO.Read.topic(options.getPubsubTopic());
+      windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5));
+    } else {
       readSource = TextIO.Read.from(options.getInputFile());
       windowFn = new GlobalWindows();
-    } else {
-      DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
-      dataflowOptions.setStreaming(true);
-      readSource = PubsubIO.Read.topic(options.getInputTopic());
-      windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5));
     }
 
     // Create the pipeline.
@@ -460,33 +492,37 @@ public static void main(String[] args) {
       .apply(Window.<String>into(windowFn))
       .apply(ComputeTopCompletions.top(10, options.getRecursive()));
 
-    // Optionally write the result out to bigquery...
-    if (options.getOutputBigqueryTable() != null) {
-      List<TableFieldSchema> tagFields = new ArrayList<>();
-      tagFields.add(new TableFieldSchema().setName("count").setType("INTEGER"));
-      tagFields.add(new TableFieldSchema().setName("tag").setType("STRING"));
-      List<TableFieldSchema> fields = new ArrayList<>();
-      fields.add(new TableFieldSchema().setName("prefix").setType("STRING"));
-      fields.add(new TableFieldSchema().setName("tags").setType("RECORD").setFields(tagFields));
-      TableSchema schema = new TableSchema().setFields(fields);
+    if (options.getOutputToDatastore()) {
+      toWrite
+      .apply(ParDo.named("FormatForDatastore").of(new FormatForDatastore(options.getKind())))
+      .apply(DatastoreIO.writeTo(options.getProject()));
+    }
+    if (options.getOutputToBigQuery()) {
+      dataflowUtils.setupBigQueryTable();
+
+      TableReference tableRef = new TableReference();
+      tableRef.setProjectId(options.getProject());
+      tableRef.setDatasetId(options.getBigQueryDataset());
+      tableRef.setTableId(options.getBigQueryTable());
 
       toWrite
         .apply(ParDo.of(new FormatForBigquery()))
         .apply(BigQueryIO.Write
-               .to(options.getOutputBigqueryTable())
-               .withSchema(schema)
+               .to(tableRef)
+               .withSchema(FormatForBigquery.getSchema())
                .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
                .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
     }
 
-    // ...and to Datastore.
-    if (options.getOutputDataset() != null) {
-      toWrite
-        .apply(ParDo.of(new FormatForDatastore(options.getKind())))
-        .apply(DatastoreIO.writeTo(options.getOutputDataset()));
+    // Run the pipeline.
+    PipelineResult result = p.run();
+
+    if (options.isStreaming() && !options.getInputFile().isEmpty()) {
+      // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
+      dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
     }
 
-    // Run the pipeline.
-    p.run();
+    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
+    dataflowUtils.waitToFinish(result);
   }
 }
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
index ad5b2fed47165..ff825eea53bc9 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
@@ -76,18 +76,43 @@ public DataflowExampleUtils(DataflowPipelineOptions options) {
    * @throws IOException if there is a problem setting up the resources
    */
   public void setup() throws IOException {
-    pendingMessages.add("**************************Set Up***************************");
+    setupPubsubTopic();
+    setupBigQueryTable();
+  }
+
+  /**
+   * Sets up the BigQuery table with the given schema.
+   *
+   * <p> If the table already exists, the schema has to match the given one. Otherwise, the example
+   * will throw a RuntimeException. If the table doesn't exist, a new table with the given schema
+   * will be created.
+   *
+   * @throws IOException if there is a problem setting up the BigQuery table
+   */
+  public void setupPubsubTopic() throws IOException {
     ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
     if (!pubsubTopicOptions.getPubsubTopic().isEmpty()) {
+      pendingMessages.add("*******************Set Up Pubsub Topic*********************");
       setupPubsubTopic(pubsubTopicOptions.getPubsubTopic());
       pendingMessages.add("The Pub/Sub topic has been set up for this example: "
           + pubsubTopicOptions.getPubsubTopic());
     }
+  }
+
+  /**
+   * Sets up the Google Cloud Pub/Sub topic.
+   *
+   * <p> If the topic doesn't exist, a new topic with the given name will be created.
+   *
+   * @throws IOException if there is a problem setting up the Pub/Sub topic
+   */
+  public void setupBigQueryTable() throws IOException {
     ExampleBigQueryTableOptions bigQueryTableOptions =
         options.as(ExampleBigQueryTableOptions.class);
     if (bigQueryTableOptions.getBigQueryDataset() != null
         && bigQueryTableOptions.getBigQueryTable() != null
         && bigQueryTableOptions.getBigQuerySchema() != null) {
+      pendingMessages.add("******************Set Up Big Query Table*******************");
       setupBigQueryTable(bigQueryTableOptions.getProject(),
                          bigQueryTableOptions.getBigQueryDataset(),
                          bigQueryTableOptions.getBigQueryTable(),
@@ -131,15 +156,6 @@ private void tearDown() {
     }
   }
 
-  /**
-   * Sets up the BigQuery table with the given schema.
-   *
-   * <p> If the table already exists, the schema has to match the given one. Otherwise, the example
-   * will throw a RuntimeException. If the table doesn't exist, a new table with the given schema
-   * will be created.
-   *
-   * @throws IOException if there is a problem setting up the BigQuery table
-   */
   private void setupBigQueryTable(String projectId, String datasetId, String tableId,
       TableSchema schema) throws IOException {
     if (bigQueryClient == null) {
@@ -166,13 +182,6 @@ private void setupBigQueryTable(String projectId, String datasetId, String table
     }
   }
 
-  /**
-   * Sets up the Google Cloud Pub/Sub topic.
-   *
-   * <p> If the topic doesn't exist, a new topic with the given name will be created.
-   *
-   * @throws IOException if there is a problem setting up the Pub/Sub topic
-   */
   private void setupPubsubTopic(String topic) throws IOException {
     if (pubsubClient == null) {
       pubsubClient = Transport.newPubsubClient(options.as(StreamingOptions.class)).build();

From 86097b0539485d79dc2f522f71711460ac57982f Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Mon, 13 Apr 2015 20:09:32 -0700
Subject: [PATCH 0429/1541] Fix keyed combiners.

These were broken in the face of partial-group-by-key (combiner lifting).

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91053156
---
 .../runners/worker/MapTaskExecutorFactory.java | 18 +++++++++---------
 .../dataflow/sdk/transforms/CombineTest.java   |  1 +
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index 7c7b897c1555c..1e094086fcea8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -223,7 +223,7 @@ static ValueCombiner createValueCombiner(PartialGroupByKeyInstruction pgbk) thro
    * Implements PGBKOp.Combiner via Combine.KeyedCombineFn.
    */
   public static class ValueCombiner<K, VI, VA, VO>
-      implements PartialGroupByKeyOperation.Combiner<K, VI, VA, VO> {
+      implements PartialGroupByKeyOperation.Combiner<WindowedValue<K>, VI, VA, VO> {
     private final Combine.KeyedCombineFn<K, VI, VA, VO> combineFn;
 
     private ValueCombiner(Combine.KeyedCombineFn<K, VI, VA, VO> combineFn) {
@@ -231,23 +231,23 @@ private ValueCombiner(Combine.KeyedCombineFn<K, VI, VA, VO> combineFn) {
     }
 
     @Override
-    public VA createAccumulator(K key) {
-      return this.combineFn.createAccumulator(key);
+    public VA createAccumulator(WindowedValue<K> windowedKey) {
+      return this.combineFn.createAccumulator(windowedKey.getValue());
     }
 
     @Override
-    public VA add(K key, VA accumulator, VI value) {
-      return this.combineFn.addInput(key, accumulator, value);
+    public VA add(WindowedValue<K> windowedKey, VA accumulator, VI value) {
+      return this.combineFn.addInput(windowedKey.getValue(), accumulator, value);
     }
 
     @Override
-    public VA merge(K key, Iterable<VA> accumulators) {
-      return this.combineFn.mergeAccumulators(key, accumulators);
+    public VA merge(WindowedValue<K> windowedKey, Iterable<VA> accumulators) {
+      return this.combineFn.mergeAccumulators(windowedKey.getValue(), accumulators);
     }
 
     @Override
-    public VO extract(K key, VA accumulator) {
-      return this.combineFn.extractOutput(key, accumulator);
+    public VO extract(WindowedValue<K> windowedKey, VA accumulator) {
+      return this.combineFn.extractOutput(windowedKey.getValue(), accumulator);
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 73b7feb12bc99..c9cd18c76151a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -326,6 +326,7 @@ public Integer apply(String input) {
       };
 
   @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testHotKeyCombining() {
     Pipeline p = TestPipeline.create();
     PCollection<KV<String, Integer>> input = copy(createInput(p, TABLE), 10);

From b878b8a88461dd4742269d4a319b133d1b4757a2 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Mon, 13 Apr 2015 20:36:47 -0700
Subject: [PATCH 0430/1541] Add hot key fanout to Combine.globally() and Count.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91054587
---
 .../dataflow/sdk/transforms/Combine.java      | 46 ++++++++++++++-----
 .../cloud/dataflow/sdk/transforms/Count.java  | 19 ++++++--
 2 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index d511a315fed84..8b8ce0d753b6c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -93,7 +93,7 @@ public static <V> Globally<V, V> globally(
    */
   public static <VI, VO> Globally<VI, VO> globally(
       CombineFn<? super VI, ?, VO> fn) {
-    return new Globally<>(fn, true);
+    return new Globally<>(fn, true, 0);
   }
 
   /**
@@ -1115,10 +1115,12 @@ public static class Globally<VI, VO>
 
     private final CombineFn<? super VI, ?, VO> fn;
     private final boolean insertDefault;
+    private final int fanout;
 
-    private Globally(CombineFn<? super VI, ?, VO> fn, boolean insertDefault) {
+    private Globally(CombineFn<? super VI, ?, VO> fn, boolean insertDefault, int fanout) {
       this.fn = fn;
       this.insertDefault = insertDefault;
+      this.fanout = fanout;
     }
 
     @Override
@@ -1135,7 +1137,7 @@ public Globally<VI, VO> withName(String name) {
      * on empty input will returned.
      */
     public GloballyAsSingletonView<VI, VO> asSingletonView() {
-      return new GloballyAsSingletonView<>(fn, insertDefault);
+      return new GloballyAsSingletonView<>(fn, insertDefault, fanout);
     }
 
     /**
@@ -1143,16 +1145,35 @@ public GloballyAsSingletonView<VI, VO> asSingletonView() {
      * provide a default value in the case of empty input.
      */
     public Globally<VI, VO> withoutDefaults() {
-      return new Globally<>(fn, false);
+      return new Globally<>(fn, false, fanout);
+    }
+
+    /**
+     * Returns a {@link PTransform} identical to this, but that uses an intermediate node
+     * to combine parts of the data to reduce load on the final global combine step.
+     *
+     * <p> The {@code fanout} parameter determines the number of intermediate keys
+     * that will be used.
+     */
+    public Globally<VI, VO> withFanout(int fanout) {
+      return new Globally<>(fn, insertDefault, fanout);
     }
 
     @Override
     public PCollection<VO> apply(PCollection<VI> input) {
-      PCollection<VO> output = input
+      PCollection<KV<Void, VI>> withKeys = input
           .apply(WithKeys.<Void, VI>of((Void) null))
-          .setCoder(KvCoder.of(VoidCoder.of(), input.getCoder()))
-          .apply(Combine.<Void, VI, VO>fewKeys(fn.<Void>asKeyedFn()))
-          .apply(Values.<VO>create());
+          .setCoder(KvCoder.of(VoidCoder.of(), input.getCoder()));
+
+      PCollection<KV<Void, VO>> combined;
+      if (fanout >= 2) {
+        combined = withKeys.apply(
+            Combine.<Void, VI, VO>fewKeys(fn.<Void>asKeyedFn()).withHotKeyFanout(fanout));
+      } else {
+        combined = withKeys.apply(Combine.<Void, VI, VO>fewKeys(fn.<Void>asKeyedFn()));
+      }
+
+      PCollection<VO> output = combined.apply(Values.<VO>create());
 
       if (insertDefault) {
         if (!output.getWindowingStrategy().getWindowFn().isCompatible(new GlobalWindows())) {
@@ -1237,10 +1258,13 @@ public static class GloballyAsSingletonView<VI, VO>
 
     private final CombineFn<? super VI, ?, VO> fn;
     private final boolean insertDefault;
+    private final int fanout;
 
-    private GloballyAsSingletonView(CombineFn<? super VI, ?, VO> fn, boolean insertDefault) {
+    private GloballyAsSingletonView(
+        CombineFn<? super VI, ?, VO> fn, boolean insertDefault, int fanout) {
       this.fn = fn;
       this.insertDefault = insertDefault;
+      this.fanout = fanout;
     }
 
     @Override
@@ -1252,7 +1276,7 @@ public GloballyAsSingletonView<VI, VO> withName(String name) {
     @Override
     public PCollectionView<VO> apply(PCollection<VI> input) {
       PCollection<VO> combined = input
-          .apply(Combine.globally(fn).withoutDefaults());
+          .apply(Combine.globally(fn).withoutDefaults().withFanout(fanout));
       if (insertDefault) {
         return combined
             .apply(View.<VO>asSingleton().withDefaultValue(
@@ -1532,7 +1556,7 @@ public Coder<VO> getDefaultOutputCoder(
       final TupleTag<KV<KV<K, Integer>, VI>> hot = new TupleTag<>();
       final TupleTag<KV<K, VI>> cold = new TupleTag<>();
       PCollectionTuple split = input.apply(
-          ParDo.of(new DoFn<KV<K, VI>, KV<K, VI>>(){
+          ParDo.of(new DoFn<KV<K, VI>, KV<K, VI>>() {
                 transient int counter;
                 @Override
                 public void startBundle(Context c) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
index 7e1fe1519d887..03bd24adbdb32 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
@@ -69,13 +69,16 @@ public static class Globally<T>
       extends PTransform<PCollection<T>, PCollection<Long>> {
 
     private final boolean withoutDefaults;
+    private final int fanout;
 
     public Globally() {
       this.withoutDefaults = false;
+      this.fanout = 0;
     }
 
-    private Globally(boolean withoutDefaults) {
+    private Globally(boolean withoutDefaults, int fanout) {
       this.withoutDefaults = withoutDefaults;
+      this.fanout = fanout;
     }
 
     /**
@@ -83,16 +86,24 @@ private Globally(boolean withoutDefaults) {
      * provide a default value in the case of empty input.
      */
     public Globally<T> withoutDefaults() {
-      return new Globally<T>(true /* withoutDefaults */);
+      return new Globally<T>(true /* withoutDefaults */, fanout);
+    }
+
+    /**
+     * Returns a {@link PTransform} identical to Globally(), but that uses an intermedate combining
+     * node to improve performance.  See {@link Combine.Globally#withFanout}.
+     */
+    public Globally<T> withHotKeyFanout(int fanout) {
+      return new Globally<T>(withoutDefaults, fanout);
     }
 
     @Override
     public PCollection<Long> apply(PCollection<T> input) {
       Combine.Globally<Long, Long> sumGlobally;
       if (withoutDefaults) {
-        sumGlobally = Sum.longsGlobally().withoutDefaults();
+        sumGlobally = Sum.longsGlobally().withoutDefaults().withFanout(fanout);
       } else {
-        sumGlobally = Sum.longsGlobally();
+        sumGlobally = Sum.longsGlobally().withFanout(fanout);
       }
       return
           input

From 25abf7cfb06157a4f906d1f94bca7fe26ff1c3c8 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 14 Apr 2015 09:09:42 -0700
Subject: [PATCH 0431/1541] WoodStax -> Woodstox, fix typo in documentation.
 ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=91094030

---
 .../main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
index 51dc2a1d407b4..babc5f349508a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
@@ -90,7 +90,7 @@
  * <p> Currently only XML files that use character encoding UTF-8 are supported. Using a file that
  * has a different character encoding may result in loss of data.
  *
- * <p> To use {@code XmlSource}, explicitly declare dependencies on following two jars from WoodStax
+ * <p> To use {@code XmlSource}, explicitly declare dependencies on following two jars from Woodstox
  * StAX XML parser.
  * (1) stax2-api-3.1.1.jar
  * (2) woodstox-core-asl-4.1.2.jar

From 28c660cbd4613619fed10285d910af4890516a65 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 14 Apr 2015 12:35:35 -0700
Subject: [PATCH 0432/1541] Clarify comment about use of constructing methods
 for CoGbkResult. ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=91113488

---
 .../cloud/dataflow/sdk/transforms/join/CoGbkResult.java      | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
index 7a66ba83a8e68..215ba29ef51e9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
@@ -310,7 +310,10 @@ public void verifyDeterministic() throws NonDeterministicException {
 
 
   //////////////////////////////////////////////////////////////////////////////
-  // Methods for testing purposes
+  // Methods for directly constructing a CoGbkResult
+  //
+  // (for example, creating test data for a transform that consumes a
+  // CoGbkResult)
 
   /**
    * Returns a new CoGbkResult that contains just the given tag and given data.

From d244f3b6f38f078b042bf1ad9c0846d01b1dbfc1 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Tue, 14 Apr 2015 13:24:36 -0700
Subject: [PATCH 0433/1541] Handle case where accumulator is not modified.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91117718
---
 .../com/google/cloud/dataflow/sdk/transforms/Combine.java     | 4 ++--
 .../test/java/com/google/cloud/dataflow/sdk/TestUtils.java    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 8b8ce0d753b6c..f3611c64e037c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -370,7 +370,7 @@ public abstract static class CombineFn<VI, VA, VO> implements Serializable {
     public VO apply(Iterable<? extends VI> inputs) {
       VA accum = createAccumulator();
       for (VI input : inputs) {
-        addInput(accum, input);
+        accum = addInput(accum, input);
       }
       return extractOutput(accum);
     }
@@ -1019,7 +1019,7 @@ public abstract static class KeyedCombineFn<K, VI, VA, VO>
     public VO apply(K key, Iterable<? extends VI> inputs) {
       VA accum = createAccumulator(key);
       for (VI input : inputs) {
-        addInput(key, accum, input);
+        accum = addInput(key, accum, input);
       }
       return extractOutput(key, accum);
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
index b26cf4874d6e6..38c0054da2045 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
@@ -199,7 +199,7 @@ private static <VI, VA, VO> void checkCombineFnShardsInternal(
     for (Iterable<VI> shard : shards) {
       VA accumulator = fn.createAccumulator();
       for (VI elem : shard) {
-        fn.addInput(accumulator, elem);
+        accumulator = fn.addInput(accumulator, elem);
       }
       accumulators.add(accumulator);
     }

From 1cc355a5adbd12ad2986579d70145fe4b52a777c Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Wed, 15 Apr 2015 14:36:51 -0700
Subject: [PATCH 0434/1541] Update contrib/README.md to fix a spelling

---
 contrib/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/README.md b/contrib/README.md
index e3381b5e41d4f..b99cf46f58bfd 100644
--- a/contrib/README.md
+++ b/contrib/README.md
@@ -15,7 +15,7 @@ are also open to linking external repositories via
 [`submodule`](http://git-scm.com/docs/git-submodule/) functionality within Git.
 
 While we are happy to host individual modules to provide additional value to all
-Cloud Dataflow users, the modules are _maintanted solely by their respective
+Cloud Dataflow users, the modules are _maintained solely by their respective
 authors_. We will make sure that modules are related to Cloud Dataflow, that
 they are distributed under the same license as the mainline SDK, and provide
 some guidance to the authors to make the quality as high as possible.

From 5adcf8572232661b7e90aa23350b715f96a950d1 Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Thu, 16 Apr 2015 05:00:36 -0700
Subject: [PATCH 0435/1541] Update examples/README.md for Beta

---
 examples/README.md | 82 ++++++++++++++++++++++------------------------
 1 file changed, 39 insertions(+), 43 deletions(-)

diff --git a/examples/README.md b/examples/README.md
index 6cd7013198f99..cbde8361ae292 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -8,40 +8,38 @@ A good starting point for new users is our [`WordCount`](https://github.com/Goog
 example, which runs over the provided input text file(s) and computes how many
 times each word occurs in the input.
 
-Besides WordCount, the following examples are included:
+Besides `WordCount`, the following examples are included:
 
  <ul>
   <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java">AutoComplete</a>
-  &mdash;An example that computes the most popular hash tags for a for every
+  &mdash; An example that computes the most popular hash tags for every
   prefix, which can be used for auto-completion. Demonstrates how to use the
   same pipeline in both streaming and batch, combiners, and composite
   transforms.</li>
   <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java">BigQueryTornadoes</a>
-  &mdash;An example that reads the public samples of weather data from Google
+  &mdash; An example that reads the public samples of weather data from Google
   BigQuery, counts the number of tornadoes that occur in each month, and
   writes the results to BigQuery. Demonstrates reading/writing BigQuery,
   counting a <code>PCollection</code>, and user-defined <code>PTransforms</code>.</li>
   <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java">CombinePerKeyExamples</a>
-  &mdash;An example that reads the public &quot;Shakespeare&quot; data, and for
+  &mdash; An example that reads the public &quot;Shakespeare&quot; data, and for
   each word in the dataset that exceeds a given length, generates a string
   containing the list of play names in which that word appears. Output is saved
   in a Google BigQuery table. Demonstrates the <code>Combine.perKey</code>
   transform, which lets you combine the values in a key-grouped
-  <code>PCollection</code>; also  how to use an <code>Aggregator</code> to track
+  <code>PCollection</code>; also how to use an <code>Aggregator</code> to track
   information in the Google Developers Console.
   </li>
   <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java">DatastoreWordCount</a>
-  &mdash;An example that shows you how to use Google Cloud Datastore IO to read
-  from Cloud Datastore.</li>
+  &mdash; An example that shows you how to read from Google Cloud Datastore.</li>
   <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java">DeDupExample</a>
-  &mdash;An example that uses Shakespeare's plays as plain text files, and
+  &mdash; An example that uses Shakespeare's plays as plain text files, and
   removes duplicate lines across all the files. Demonstrates the
   <code>RemoveDuplicates</code>, <code>TextIO.Read</code>,
-  <code>RemoveDuplicates</code>, and <code>TextIO.Write</code> transforms, and
-  how to wire transforms together.
+  and <code>TextIO.Write</code> transforms, and how to wire transforms together.
   </li>
   <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java">FilterExamples</a>
-  &mdash;An example that shows different approaches to filtering, including
+  &mdash; An example that shows different approaches to filtering, including
   selection and projection. It also shows how to dynamically set parameters
   by defining and using new pipeline options, and use how to use a value derived
   by a pipeline. Demonstrates the <code>Mean</code> transform,
@@ -49,58 +47,61 @@ Besides WordCount, the following examples are included:
   input.
   </li>
   <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java">JoinExamples</a>
-  &mdash;An example that shows how to do a join on two collections. It uses a
+  &mdash; An example that shows how to join two collections. It uses a
   sample of the <a href="http://goo.gl/OB6oin">GDELT &quot;world event&quot;
   data</a>, joining the event <code>action</code> country code against a table
-  that maps country codes to country names. Demonstrated the <code>Join</code>
+  that maps country codes to country names. Demonstrates the <code>Join</code>
   operation, and using multiple input sources.
   </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java">MaxPerKeyExamples</a>&mdash;An example that reads the public samples of weather data from BigQuery,
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java">MaxPerKeyExamples</a>
+  &mdash; An example that reads the public samples of weather data from BigQuery,
   and finds the maximum temperature (<code>mean_temp</code>) for each month.
-  Demonstates the <code>Max</code> statistical combination transform, and how to
+  Demonstrates the <code>Max</code> statistical combination transform, and how to
   find the max-per-key group.
   </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java">PubsubFileInjector</a>&mdash;A batch Cloud Dataflow pipeline for injecting a set of Cloud Storage
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java">PubsubFileInjector</a>
+  &mdash; A batch Cloud Dataflow pipeline for injecting a set of Cloud Storage
   files into a Google Cloud Pub/Sub topic, line by line. This example can be
   useful for testing streaming pipelines.
   </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java">StreamingWordExtract</a>&mdash;An streaming pipeline example that inputs lines of text from a Cloud
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java">StreamingWordExtract</a>
+  &mdash; A streaming pipeline example that inputs lines of text from a Cloud
   Pub/Sub topic, splits each line into individual words, capitalizes those
   words, and writes the output to a BigQuery table.
   </li>
   <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java">TfIdf</a>
-  &mdash;An example that computes a basic TF-IDF search table for a directory or
+  &mdash; An example that computes a basic TF-IDF search table for a directory or
   Cloud Storage prefix. Demonstrates joining data, side inputs, and logging.
   </li>
   <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java">TopWikipediaSessions</a>
-  &mdash;An example that reads Wikipedia edit data from Cloud Storage and
+  &mdash; An example that reads Wikipedia edit data from Cloud Storage and
   computes the user with the longest string of edits separated by no more than
   an hour within each month. Demonstrates using Cloud Dataflow
   <code>Windowing</code> to perform time-based aggregations of data.
   </li>
   <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java">TrafficMaxLaneFlow</a>
-  &mdash;A streaming Cloud Dataflow example using BigQuery output in the
+  &mdash; A streaming Cloud Dataflow example using BigQuery output in the
   <code>traffic sensor</code> domain. Demonstrates the Cloud Dataflow streaming
   runner, sliding windows, Cloud Pub/Sub topic ingestion, the use of the
   <code>AvroCoder</code> to encode a custom class, and custom
   <code>Combine</code> transforms.
   </li>
   <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java">TrafficRoutes</a>
-  &mdash;A streaming Cloud Dataflow example using BigQuery output in the
+  &mdash; A streaming Cloud Dataflow example using BigQuery output in the
   <code>traffic sensor</code> domain. Demonstrates the Cloud Dataflow streaming
   runner, <code>GroupByKey</code>, keyed state, sliding windows, and Cloud
   Pub/Sub topic ingestion.
   </li>
   <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java">WindowingWordCount</a>
-  &mdash;An example that applies windowing to &quot;Shakespeare&quot; data in a
-  wordcount pipeline.
+  &mdash; An example that applies windowing to &quot;Shakespeare&quot; data in a
+  `WordCount` pipeline.
   </li>
   </ul>
 
-## How to Run the Examples
+## Running the Examples
 
-After building and installing the Cloud Dataflow `SDK` and `Examples` modules as
-explained in this [README](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/README.md),
+After building and installing the `SDK` and `Examples` modules, as explained in this
+[README](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/README.md),
 you can execute the `WordCount` and other example pipelines using the
 `DirectPipelineRunner` on your local machine:
 
@@ -108,12 +109,13 @@ you can execute the `WordCount` and other example pipelines using the
     -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
     -Dexec.args="--input=<INPUT FILE PATTERN> --output=<OUTPUT FILE>"
 
-If you have been whitelisted for Alpha access to the Cloud Dataflow Service and
-followed the [developer setup](https://cloud.google.com/dataflow/java-sdk/getting-started#DeveloperSetup)
-steps, you can use the `BlockingDataflowPipelineRunner` to execute the
-`WordCount` example in the Google Cloud Platform. In this case, you specify your
-project name, pipeline runner, and the staging location in
-[Google Cloud Storage](https://cloud.google.com/storage/), as follows:
+You can use the `BlockingDataflowPipelineRunner` to execute the `WordCount` example on
+Google Cloud Dataflow Service using managed resources in the Google Cloud Platform.
+Start by following the general Cloud Dataflow
+[Getting Started](https://cloud.google.com/dataflow/getting-started) instructions.
+You should have a Google Cloud Platform project that has a Cloud Dataflow API enabled,
+a Google Cloud Storage bucket that will serve as a staging location, and installed and
+authenticated Google Cloud SDK. In this case, invoke the example as follows:
 
     mvn exec:java -pl examples \
     -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
@@ -123,9 +125,7 @@ project name, pipeline runner, and the staging location in
 
 Your Cloud Storage location should be entered in the form of
 `gs://bucket/path/to/staging/directory`. The Cloud Platform project refers to
-its name (not number), which has been whitelisted for Cloud Dataflow. Refer to
-[Google Cloud Platform](https://cloud.google.com/) for general instructions on
-getting started with Cloud Platform.
+its name (not number). 
 
 Alternatively, you may choose to bundle all dependencies into a single JAR and
 execute it outside of the Maven environment. For example, after building and
@@ -155,11 +155,7 @@ Note that when running Maven on Microsoft Windows platform, backslashes (`\`)
 under the `Dexec.args` parameter should be escaped with another backslash. For
 example, input file pattern of `c:\*.txt` should be entered as `c:\\*.txt`.
 
-<p class="note"><b>Note:</b> We are working on improving the experience around
-running some of our streaming examples. Please stay tuned for much easier
-instructions in the near future!</p>
-
-### Running the "Traffic" Streaming Examples###
+### Running the "Traffic" Streaming Examples
 
 The `TrafficMaxLaneFlow` and `TrafficRoutes` pipelines, when run in
 streaming mode (with the `--streaming=true` option), require the
@@ -187,11 +183,11 @@ This file contains real traffic sensor data from San Diego freeways. See
 <a href="http://storage.googleapis.com/aju-sd-traffic/freeway_detector_config/Freeways-Metadata-2010_01_01/copyright(san%20diego).txt">this file</a>
 for copyright information.
 
-You may override the default '--inputFile' with an alternative complete
+You may override the default `--inputFile` with an alternative complete
 data set (~2GB). It is provided in the Google Cloud Storage bucket
-'gs://dataflow-samples/traffic_sensor/Freeways-5Minaa2010-01-01_to_2010-02-15.csv'.
+`gs://dataflow-samples/traffic_sensor/Freeways-5Minaa2010-01-01_to_2010-02-15.csv`.
 
-You may also set '--inputFile' to an empty string, which will disable
+You may also set `--inputFile` to an empty string, which will disable
 the automatic Pub/Sub injection, and allow you to use separate tool to control
 the input to this example. An example code, which publishes traffic sensor data
 to a Pub/Sub topic, is provided in [`traffic_pubsub_generator.py`](https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher)

From 843eec88257a42796f14279856e3265dff0e379a Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Thu, 16 Apr 2015 05:02:16 -0700
Subject: [PATCH 0436/1541] Update README.md for Beta

---
 README.md | 82 +++++++++++++++++++++++++++++++++----------------------
 1 file changed, 49 insertions(+), 33 deletions(-)

diff --git a/README.md b/README.md
index 3b41733d12f59..18fad4dd7d65e 100644
--- a/README.md
+++ b/README.md
@@ -1,20 +1,39 @@
-# Cloud Dataflow Java SDK (Alpha)
+# Google Cloud Dataflow SDK for Java (Beta)
 
-[Google Cloud Dataflow](https://cloud.google.com/dataflow/)
-provides a simple, powerful programming model for building both batch
-and streaming parallel data processing pipelines.
+[Google Cloud Dataflow](https://cloud.google.com/dataflow/) provides a simple,
+powerful programming model for building both batch and streaming parallel data
+processing pipelines. This repository hosts the open-sourced Cloud Dataflow SDK
+for Java, which can be used to run pipelines against the Google Cloud Dataflow
+Service.
+
+The contents of this repository are also available as released artifacts in the
+[Maven Central Repository](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.google.cloud.dataflow%22).
+You can bypass this GitHub repository and depend directly on the released
+artifacts from Maven Central by adding the following dependency to development
+environments like Eclipse or Apache Maven:
+
+    <dependency>
+      <groupId>com.google.cloud.dataflow</groupId>
+      <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
+      <version>version_number</version>
+    </dependency>
+
+Please replace `version_number` with one of the supported versions from our
+[Release Notes](https://cloud.google.com/dataflow/release-notes/java).
 
 ## Status [![Build Status](https://travis-ci.org/GoogleCloudPlatform/DataflowJavaSDK.svg?branch=master)](https://travis-ci.org/GoogleCloudPlatform/DataflowJavaSDK)
 
-The Cloud Dataflow SDK is used to access the Google Cloud Dataflow
-service, which is currently in Alpha and restricted to whitelisted users.
+The SDK is publicly available as a Beta release, and might be changed in
+backward-incompatible ways.
+
+The Google Cloud Dataflow Service is also publicly available in Beta under the
+following conditions:
 
-The SDK is publicly available and can be used for local execution by anyone.
-Note, however, that the SDK is also an Alpha release and may change
-significantly over time. The SDK is built to be extensible and support
-additional execution environments ("runners") beyond local execution and the
-Google Cloud Dataflow service. As the product matures, we look forward to
-working with you to improve Cloud Dataflow.
+* Your use of Google Cloud Dataflow is governed by the Google Cloud Platform
+  Terms of Service. The foregoing   notwithstanding, Google Cloud Dataflow is
+  currently in Beta release and might be changed in backward-incompatible ways.
+  It is not subject to any SLA or deprecation policy and is not recommended for
+  production use.
 
 ## Overview
 
@@ -38,28 +57,30 @@ runs the pipeline on your local machine.
   2. The [`DataflowPipelineRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java)
 submits the pipeline to the Dataflow Service, where it runs using managed
 resources in the [Google Cloud Platform](https://cloud.google.com) (GCP).
-  3. The
-[`BlockingDataflowPipelineRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java)
+  3. The [`BlockingDataflowPipelineRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java)
 submits the pipeline to the Dataflow Service via the `DataflowPipelineRunner`
 and then prints messages about the job status until the execution is complete.
 
-_The Dataflow Service is currently in the Alpha phase of development and
-access is limited to whitelisted users._
-
-Additionally, in partnership with [Cloudera](https://www.cloudera.com/), you can
-run Dataflow pipelines on an [Apache Spark](https://spark.apache.org/) backend.
-The relevant runner code is hosted in
-[this](https://github.com/cloudera/spark-dataflow) repository.
+The SDK is built to be extensible and support additional execution environments
+beyond local execution and the Google Cloud Dataflow Service. In partnership
+with [Cloudera](https://www.cloudera.com/), you can run Dataflow pipelines on
+an [Apache Spark](https://spark.apache.org/) backend using the 
+[SparkPipelineRunner](https://github.com/cloudera/spark-dataflow).
+Additionally, you can run Dataflow pipelines on an
+[Apache Flink](https://flink.apache.org/) backend using the
+[FlinkPipelineRunner](https://github.com/dataArtisans/flink-dataflow).
 
 ## Getting Started
 
-This repository consists of two modules:
+This repository consists of three parts:
 
-* [`SDK`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk)
+* The [`SDK`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk)
 module provides a set of basic Java APIs to program against.
-* [`Examples`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples)
+* The [`Examples`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples)
 module provides a few samples to get started. We recommend starting with the
-WordCount example.
+`WordCount` example.
+* The [`Contrib`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/contrib)
+directory hosts community-contributed Dataflow modules.
 
 The following command will build both modules and install them in your local
 Maven repository:
@@ -81,18 +102,13 @@ You can speed up the build and install process by using the following options:
   from the local repository (or Maven Central) even if you have changed it
   locally.
 
-  3. To run Maven using multiple threads, run:
-
-        mvn -T 4 install
-
 If you are using [Eclipse](https://eclipse.org/) integrated development
 environment (IDE), please additionally review our
 [Eclipse integration instructions](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/eclipse/README.md).
 
-## Running the Examples
-
-After building and installing, you can execute the `WordCount` and other example
-pipelines by following the instructions in this [README](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/README.md).
+After building and installing, you can execute the `WordCount` and other
+example pipelines by following the instructions in this
+[README](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/README.md).
 
 ## Contact Us
 

From e6252c0e9191aef4c8bea98bdeb8340dd09d9695 Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Thu, 16 Apr 2015 14:51:54 -0700
Subject: [PATCH 0437/1541] Update README.md to fix outdated link

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 18fad4dd7d65e..470e60df3124f 100644
--- a/README.md
+++ b/README.md
@@ -65,10 +65,10 @@ The SDK is built to be extensible and support additional execution environments
 beyond local execution and the Google Cloud Dataflow Service. In partnership
 with [Cloudera](https://www.cloudera.com/), you can run Dataflow pipelines on
 an [Apache Spark](https://spark.apache.org/) backend using the 
-[SparkPipelineRunner](https://github.com/cloudera/spark-dataflow).
+[`SparkPipelineRunner`](https://github.com/cloudera/spark-dataflow).
 Additionally, you can run Dataflow pipelines on an
 [Apache Flink](https://flink.apache.org/) backend using the
-[FlinkPipelineRunner](https://github.com/dataArtisans/flink-dataflow).
+[`FlinkPipelineRunner`](https://github.com/dataArtisans/flink-dataflow).
 
 ## Getting Started
 
@@ -121,5 +121,5 @@ on GitHub to report any bugs, comments or questions regarding SDK development.
 ## More Information
 
 * [Google Cloud Dataflow](https://cloud.google.com/dataflow/)
-* [Dataflow Concepts and Programming Model](https://cloud.google.com/dataflow/java-sdk/building-a-pipeline)
+* [Dataflow Concepts and Programming Model](https://cloud.google.com/dataflow/model/programming-model)
 * [Javadoc](https://cloud.google.com/dataflow/java-sdk/JavaDoc/index)

From c0d5d58b0e017fdcc1406efed78dbbf93c535265 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Mon, 20 Apr 2015 11:42:47 -0700
Subject: [PATCH 0438/1541] Decreases QUERY_LIMIT in Datastore reader from 5000
 to 500 because 5000 is, per Datastore team, too high. ----Release Notes----
 [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=91604673

---
 .../main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 6135c10102f69..ddbe1ca3e6b3c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -688,7 +688,7 @@ public static class DatastoreReader extends BoundedSource.AbstractBoundedReader<
      * <p> Must be set, or it may result in an I/O error when querying
      * Cloud Datastore.
      */
-    private static final int QUERY_LIMIT = 5000;
+    private static final int QUERY_LIMIT = 500;
 
     private Entity currentEntity;
 

From aabd119645191d0a2f83aee69b8d8eeae6c502fa Mon Sep 17 00:00:00 2001
From: mbalassi <mbalassi@apache.org>
Date: Wed, 29 Apr 2015 17:09:51 +0200
Subject: [PATCH 0439/1541] Getters for unexposed WindowFn fields

---
 .../transforms/windowing/CalendarWindows.java | 51 +++++++++++++++++++
 .../transforms/windowing/FixedWindows.java    |  9 ++++
 .../sdk/transforms/windowing/Sessions.java    |  5 ++
 .../transforms/windowing/SlidingWindows.java  | 13 +++++
 4 files changed, 78 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
index 07ff371b7b03a..c4a30c16972f8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
@@ -142,6 +142,19 @@ public boolean isCompatible(WindowFn other) {
           && startDate == that.startDate
           && timeZone == that.timeZone;
     }
+
+    public int getNumber() {
+      return number;
+    }
+
+    public DateTime getStartDate() {
+      return startDate;
+    }
+
+    public DateTimeZone getTimeZone() {
+      return timeZone;
+    }
+
   }
 
   /**
@@ -218,6 +231,23 @@ public boolean isCompatible(WindowFn other) {
           && startDate == that.startDate
           && timeZone == that.timeZone;
     }
+
+    public int getNumber() {
+      return number;
+    }
+
+    public int getDayOfMonth() {
+      return dayOfMonth;
+    }
+
+    public DateTime getStartDate() {
+      return startDate;
+    }
+
+    public DateTimeZone getTimeZone() {
+      return timeZone;
+    }
+
   }
 
   /**
@@ -299,5 +329,26 @@ public boolean isCompatible(WindowFn other) {
           && startDate == that.startDate
           && timeZone == that.timeZone;
     }
+
+    public DateTimeZone getTimeZone() {
+      return timeZone;
+    }
+
+    public DateTime getStartDate() {
+      return startDate;
+    }
+
+    public int getDayOfMonth() {
+      return dayOfMonth;
+    }
+
+    public int getMonthOfYear() {
+      return monthOfYear;
+    }
+
+    public int getNumber() {
+      return number;
+    }
+
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
index 8db171e7b089d..4e56928ed4095 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
@@ -91,4 +91,13 @@ public boolean isCompatible(WindowFn<?, ?> other) {
         && (size.equals(((FixedWindows) other).size))
         && (offset.equals(((FixedWindows) other).offset));
   }
+
+  public Duration getSize() {
+    return size;
+  }
+
+  public Duration getOffset() {
+    return offset;
+  }
+
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
index 9a1f061cdcf8c..1d386e9c97035 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
@@ -79,4 +79,9 @@ public Coder<IntervalWindow> windowCoder() {
   public boolean isCompatible(WindowFn<?, ?> other) {
     return other instanceof Sessions;
   }
+
+  public Duration getGapDuration() {
+    return gapDuration;
+  }
+
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
index ab7887a1ac2d5..1a5772af22ab2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
@@ -143,4 +143,17 @@ static Duration getDefaultPeriod(Duration size) {
     }
     return Duration.millis(1);
   }
+
+  public Duration getPeriod() {
+    return period;
+  }
+
+  public Duration getSize() {
+    return size;
+  }
+
+  public Duration getOffset() {
+    return offset;
+  }
+
 }

From 1a13e24c65a11b627da31bb81a0591f3c55bfe35 Mon Sep 17 00:00:00 2001
From: Alex Van Boxel <alex@vanboxel.be>
Date: Fri, 8 May 2015 14:02:49 +0200
Subject: [PATCH 0440/1541] Fixed error in examples of the Filter JavaDoc.

---
 .../cloud/dataflow/sdk/transforms/Filter.java    | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
index 8f90aecfcfdf1..fb1fbd4f819ec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
@@ -67,8 +67,8 @@ public void processElement(ProcessContext c) {
    *
    * <p> Example of use:
    * <pre> {@code
-   * PCollection<String> listOfNumbers = ...;
-   * PCollection<String> smallNumbers =
+   * PCollection<Integer> listOfNumbers = ...;
+   * PCollection<Integer> smallNumbers =
    *     listOfNumbers.apply(Filter.lessThan(10));
    * } </pre>
    *
@@ -100,8 +100,8 @@ public void processElement(ProcessContext c) {
    *
    * <p> Example of use:
    * <pre> {@code
-   * PCollection<String> listOfNumbers = ...;
-   * PCollection<String> largeNumbers =
+   * PCollection<Integer> listOfNumbers = ...;
+   * PCollection<Integer> largeNumbers =
    *     listOfNumbers.apply(Filter.greaterThan(1000));
    * } </pre>
    *
@@ -133,8 +133,8 @@ public void processElement(ProcessContext c) {
    *
    * <p> Example of use:
    * <pre> {@code
-   * PCollection<String> listOfNumbers = ...;
-   * PCollection<String> smallOrEqualNumbers =
+   * PCollection<Integer> listOfNumbers = ...;
+   * PCollection<Integer> smallOrEqualNumbers =
    *     listOfNumbers.apply(Filter.lessThanEq(10));
    * } </pre>
    *
@@ -166,8 +166,8 @@ public void processElement(ProcessContext c) {
    *
    * <p> Example of use:
    * <pre> {@code
-   * PCollection<String> listOfNumbers = ...;
-   * PCollection<String> largeOrEqualNumbers =
+   * PCollection<Integer> listOfNumbers = ...;
+   * PCollection<Integer> largeOrEqualNumbers =
    *     listOfNumbers.apply(Filter.greaterThanEq(1000));
    * } </pre>
    *

From 380609dbbbf727a78c4ceef767d41bf49c70ed04 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Tue, 14 Apr 2015 13:55:14 -0700
Subject: [PATCH 0441/1541] Add comments to Deprecated Methods

Flatten and DataflowAssert both provide methods that are marked
deprecated and a replacement is available within the class. Link to
those methods within the documentation for the deprecated methods.

PipelineOptionsFactory#createFromSystemProperties() is for internal
use, and should not be called by clients. Deprecate the method,
provide a reference that it is for internal use only, and add a
createFromSystemPropertiesInternal wrapper.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91120880
---
 .../com/google/cloud/dataflow/sdk/Pipeline.java  |  3 +++
 .../sdk/options/PipelineOptionsFactory.java      | 16 ++++++++++++++++
 .../runners/worker/DataflowWorkerHarness.java    |  2 +-
 .../runners/worker/StreamingDataflowWorker.java  |  6 +++---
 .../dataflow/sdk/testing/DataflowAssert.java     |  6 ++++++
 .../cloud/dataflow/sdk/transforms/Flatten.java   |  7 +++++++
 .../worker/DataflowWorkerHarnessTest.java        | 15 ++++++++++-----
 7 files changed, 46 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index 25f8ad26cc40f..34168f2d51177 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -231,6 +231,9 @@ Output applyTransform(Input input,
   private Set<String> usedFullNames = new HashSet<>();
   private CoderRegistry coderRegistry;
 
+  /**
+   * @deprecated replaced by {@link #Pipeline(PipelineRunner, PipelineOptions)}
+   */
   @Deprecated
   protected Pipeline(PipelineRunner<?> runner) {
     this(runner, PipelineOptionsFactory.create());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 9c59aef4706e9..16b8f4421de0d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -759,6 +759,21 @@ static List<PropertyDescriptor> getPropertyDescriptors(
     return COMBINED_CACHE.get(interfaces).getPropertyDescriptors();
   }
 
+  /**
+   * Creates a set of {@link DataflowWorkerHarnessOptions} based of a set of known system
+   * properties. This is meant to only be used from the {@link DataflowWorkerHarness} as a method to
+   * bootstrap the worker harness.
+   *
+   * <p>For internal use only.
+   *
+   * @return A {@link DataflowWorkerHarnessOptions} object configured for the
+   *         {@link DataflowWorkerHarness}.
+   */
+  public static DataflowWorkerHarnessOptions createFromSystemPropertiesInternal()
+      throws IOException {
+    return createFromSystemProperties();
+  }
+
   /**
    * Creates a set of {@link DataflowWorkerHarnessOptions} based of a set of known system
    * properties. This is meant to only be used from the {@link DataflowWorkerHarness} as a method to
@@ -766,6 +781,7 @@ static List<PropertyDescriptor> getPropertyDescriptors(
    *
    * @return A {@link DataflowWorkerHarnessOptions} object configured for the
    *         {@link DataflowWorkerHarness}.
+   * @deprecated for internal use only
    */
   @Deprecated
   public static DataflowWorkerHarnessOptions createFromSystemProperties() throws IOException {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index 313bc971b53be..637b3e3a19307 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -110,7 +110,7 @@ public static void main(String[] args) throws Exception {
     DataflowWorkerLoggingInitializer.initialize();
 
     DataflowWorkerHarnessOptions pipelineOptions =
-        PipelineOptionsFactory.createFromSystemProperties();
+        PipelineOptionsFactory.createFromSystemPropertiesInternal();
     DataflowWorkerLoggingInitializer.configure(pipelineOptions);
 
     final Sleeper sleeper = Sleeper.DEFAULT;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 86332a312be61..5812125e63578 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -96,7 +96,7 @@ public static void main(String[] args) throws Exception {
 
     DataflowWorkerLoggingInitializer.initialize();
     DataflowWorkerHarnessOptions options =
-        PipelineOptionsFactory.createFromSystemProperties();
+        PipelineOptionsFactory.createFromSystemPropertiesInternal();
     // TODO: Remove setting these options once we have migrated to passing
     // through the pipeline options.
     options.setAppName("StreamingWorkerHarness");
@@ -119,8 +119,8 @@ public static void main(String[] args) throws Exception {
     }
 
     ArrayList<MapTask> mapTasks = new ArrayList<>();
-    for (int i = 0; i < args.length; i++) {
-      mapTasks.add(parseMapTask(args[i]));
+    for (String arg : args) {
+      mapTasks.add(parseMapTask(arg));
     }
 
     WindmillServerStub windmillServer =
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index c563714337f02..63028d1282b5e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -313,6 +313,12 @@ public SingletonAssert<T> isEqualTo(T expectedValue) {
       return satisfies(new AssertIsEqualToRelation<T>(), expectedValue);
     }
 
+    /**
+     * Checks that the value of this {@code SingletonAssert}'s view is equal to
+     * the expected value.
+     *
+     * @deprecated replaced by {@link #isEqualTo}
+     */
     @Deprecated
     public SingletonAssert<T> is(T expectedValue) {
       return isEqualTo(expectedValue);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index a29dfaa898906..1d91ca5ce951d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -70,6 +70,13 @@ public static <T> FlattenPCollectionList<T> pCollections() {
     return new FlattenPCollectionList<>();
   }
 
+  /**
+   * Returns a {@link PTransform} that flattens a {@link PCollectionList} into a
+   * {@link PCollection} containing all the elements of all the
+   * {@link PCollection}s in its input.
+   *
+   * @deprecated replaced by {@link #pCollections()}
+   */
   @Deprecated
   public static <T> FlattenPCollectionList<T> create() {
     return pCollections();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
index 70895f0f8cc6e..9001b09125261 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
@@ -101,7 +101,8 @@ public void testCreationOfWorkerHarness() throws Exception {
         .put("job_id", "jobId")
         .put("worker_id", "workerId")
         .build());
-    DataflowWorkerHarnessOptions options = PipelineOptionsFactory.createFromSystemProperties();
+    DataflowWorkerHarnessOptions options =
+        PipelineOptionsFactory.createFromSystemPropertiesInternal();
     options.setGcpCredential(new TestCredential());
     assertNotNull(DataflowWorkerHarness.create(options));
     assertEquals("jobId", DataflowWorkerLoggingFormatter.getJobId());
@@ -120,7 +121,8 @@ public void testCloudServiceCall() throws Exception {
 
     when(request.execute()).thenReturn(generateMockResponse(workItem));
 
-    DataflowWorkerHarnessOptions options = PipelineOptionsFactory.createFromSystemProperties();
+    DataflowWorkerHarnessOptions options =
+        PipelineOptionsFactory.createFromSystemPropertiesInternal();
 
     DataflowWorker.WorkUnitClient client =
         new DataflowWorkerHarness.DataflowWorkUnitClient(service, options);
@@ -151,7 +153,8 @@ public void testCloudServiceCallNoWorkId() throws Exception {
 
     when(request.execute()).thenReturn(generateMockResponse(workItem));
 
-    DataflowWorkerHarnessOptions options = PipelineOptionsFactory.createFromSystemProperties();
+    DataflowWorkerHarnessOptions options =
+        PipelineOptionsFactory.createFromSystemPropertiesInternal();
 
     DataflowWorker.WorkUnitClient client =
         new DataflowWorkerHarness.DataflowWorkUnitClient(service, options);
@@ -178,7 +181,8 @@ public void testCloudServiceCallNoWorkItem() throws Exception {
 
     when(request.execute()).thenReturn(generateMockResponse());
 
-    DataflowWorkerHarnessOptions options = PipelineOptionsFactory.createFromSystemProperties();
+    DataflowWorkerHarnessOptions options =
+        PipelineOptionsFactory.createFromSystemPropertiesInternal();
 
     DataflowWorker.WorkUnitClient client =
         new DataflowWorkerHarness.DataflowWorkUnitClient(service, options);
@@ -211,7 +215,8 @@ public void testCloudServiceCallMultipleWorkItems() throws Exception {
 
     when(request.execute()).thenReturn(generateMockResponse(workItem1, workItem2));
 
-    DataflowWorkerHarnessOptions options = PipelineOptionsFactory.createFromSystemProperties();
+    DataflowWorkerHarnessOptions options =
+        PipelineOptionsFactory.createFromSystemPropertiesInternal();
 
     DataflowWorker.WorkUnitClient client =
         new DataflowWorkerHarness.DataflowWorkUnitClient(service, options);

From 247ea84fbc02676541198c37b90c4791ef40595a Mon Sep 17 00:00:00 2001
From: tudorm <tudorm@google.com>
Date: Tue, 14 Apr 2015 21:48:44 -0700
Subject: [PATCH 0442/1541] Reuse DataInputStream and DataOutputStream objects
 -- to avoid expensive allocations. ----Release Notes---- [] -------------
 Created by MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=91159378

---
 .../sdk/runners/worker/ChunkingShuffleBatchReader.java   | 9 ++++-----
 .../sdk/runners/worker/ChunkingShuffleEntryWriter.java   | 4 +++-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java
index de5cb81748f19..114e701f0684d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java
@@ -24,7 +24,6 @@
 import java.io.ByteArrayInputStream;
 import java.io.DataInputStream;
 import java.io.IOException;
-import java.io.InputStream;
 import java.util.ArrayList;
 
 import javax.annotation.Nullable;
@@ -54,7 +53,8 @@ public ShuffleBatchReader.Batch read(
 
     ShuffleReader.ReadChunkResult result =
         reader.readIncludingPosition(startPosition, endPosition);
-    InputStream input = new ByteArrayInputStream(result.chunk);
+    DataInputStream input =
+        new DataInputStream(new ByteArrayInputStream(result.chunk));
     ArrayList<ShuffleEntry> entries = new ArrayList<>();
     while (input.available() > 0) {
       entries.add(getShuffleEntry(input));
@@ -69,7 +69,7 @@ public ShuffleBatchReader.Batch read(
    * @param input stream to read from
    * @return parsed ShuffleEntry
    */
-  static ShuffleEntry getShuffleEntry(InputStream input) throws IOException {
+  static ShuffleEntry getShuffleEntry(DataInputStream input) throws IOException {
     byte[] position = getFixedLengthPrefixedByteArray(input);
     byte[] key = getFixedLengthPrefixedByteArray(input);
     byte[] skey = getFixedLengthPrefixedByteArray(input);
@@ -83,9 +83,8 @@ static ShuffleEntry getShuffleEntry(InputStream input) throws IOException {
    * @param input stream to read from
    * @return parsed byte array
    */
-  static byte[] getFixedLengthPrefixedByteArray(InputStream input)
+  static byte[] getFixedLengthPrefixedByteArray(DataInputStream dataInputStream)
       throws IOException {
-    DataInputStream dataInputStream = new DataInputStream(input);
     int length = dataInputStream.readInt();
     if (length < 0) {
       throw new IOException("invalid length: " + length);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleEntryWriter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleEntryWriter.java
index e8288269220b9..7a0a6f7cd3519 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleEntryWriter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleEntryWriter.java
@@ -39,6 +39,8 @@ final class ChunkingShuffleEntryWriter implements ShuffleEntryWriter {
 
   private ByteArrayOutputStream chunk = new ByteArrayOutputStream();
 
+  private DataOutputStream output = new DataOutputStream(chunk);
+
   private final ShuffleWriter writer;
 
   /**
@@ -54,7 +56,6 @@ public long put(ShuffleEntry entry) throws IOException {
       writeChunk();
     }
 
-    DataOutputStream output = new DataOutputStream(chunk);
     return putFixedLengthPrefixedByteArray(entry.getKey(), output)
         + putFixedLengthPrefixedByteArray(entry.getSecondaryKey(), output)
         + putFixedLengthPrefixedByteArray(entry.getValue(), output);
@@ -70,6 +71,7 @@ private void writeChunk() throws IOException {
     if (chunk.size() > 0) {
       writer.write(chunk.toByteArray());
       chunk.reset();
+      output = new DataOutputStream(chunk);
     }
   }
 

From 4acea766c6eba567ab9d0c505cdcbc762ad4d466 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Wed, 15 Apr 2015 09:24:18 -0700
Subject: [PATCH 0443/1541] Implement a test combining function that exercises
 KeyedCombineFn in full generality.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91201825
---
 .../dataflow/sdk/transforms/CombineTest.java  | 103 +++++++++++++++---
 1 file changed, 87 insertions(+), 16 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index c9cd18c76151a..903142deb10a0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -103,7 +103,7 @@ PCollection<KV<String, Integer>> createInput(Pipeline p,
 
   private void runTestSimpleCombine(KV<String, Integer>[] table,
                                     int globalSum,
-                                    KV<String, Integer>[] perKeySums) {
+                                    KV<String, String>[] perKeyCombines) {
     Pipeline p = TestPipeline.create();
     PCollection<KV<String, Integer>> input = createInput(p, table);
 
@@ -112,11 +112,11 @@ private void runTestSimpleCombine(KV<String, Integer>[] table,
         .apply(Combine.globally(new SumInts()));
 
     // Java 8 will infer.
-    PCollection<KV<String, Integer>> sumPerKey = input
-        .apply(Combine.<String, Integer>perKey(new SumInts()));
+    PCollection<KV<String, String>> sumPerKey = input
+        .apply(Combine.perKey(new TestKeyedCombineFn()));
 
     DataflowAssert.that(sum).containsInAnyOrder(globalSum);
-    DataflowAssert.that(sumPerKey).containsInAnyOrder(perKeySums);
+    DataflowAssert.that(sumPerKey).containsInAnyOrder(perKeyCombines);
 
     p.run();
   }
@@ -126,7 +126,7 @@ private void runTestSimpleCombine(KV<String, Integer>[] table,
   @SuppressWarnings({"rawtypes", "unchecked"})
   public void testSimpleCombine() {
     runTestSimpleCombine(TABLE, 20, new KV[] {
-        KV.of("a", 6), KV.of("b", 14) });
+        KV.of("a", "114a"), KV.of("b", "113b") });
   }
 
   @Test
@@ -209,15 +209,15 @@ public void testFixedWindowsCombine() {
         .apply(Values.<Integer>create())
         .apply(Combine.globally(new SumInts()).withoutDefaults());
 
-    PCollection<KV<String, Integer>> sumPerKey = input
-        .apply(Combine.<String, Integer>perKey(new SumInts()));
+    PCollection<KV<String, String>> sumPerKey = input
+        .apply(Combine.perKey(new TestKeyedCombineFn()));
 
     DataflowAssert.that(sum).containsInAnyOrder(2, 5, 13);
     DataflowAssert.that(sumPerKey).containsInAnyOrder(
-        KV.of("a", 2),
-        KV.of("a", 4),
-        KV.of("b", 1),
-        KV.of("b", 13));
+        KV.of("a", "11a"),
+        KV.of("a", "4a"),
+        KV.of("b", "1b"),
+        KV.of("b", "13b"));
     p.run();
   }
 
@@ -236,14 +236,14 @@ public void testSessionsCombine() {
         .apply(Values.<Integer>create())
         .apply(Combine.globally(new SumInts()).withoutDefaults());
 
-    PCollection<KV<String, Integer>> sumPerKey = input
-        .apply(Combine.<String, Integer>perKey(new SumInts()));
+    PCollection<KV<String, String>> sumPerKey = input
+        .apply(Combine.perKey(new TestKeyedCombineFn()));
 
     DataflowAssert.that(sum).containsInAnyOrder(7, 13);
     DataflowAssert.that(sumPerKey).containsInAnyOrder(
-        KV.of("a", 6),
-        KV.of("b", 1),
-        KV.of("b", 13));
+        KV.of("a", "114a"),
+        KV.of("b", "1b"),
+        KV.of("b", "13b"));
     p.run();
   }
 
@@ -605,6 +605,77 @@ public void registerByteSizeObserver(
     }
   }
 
+  /**
+   * A KeyedCombineFn that exercises the full generality of [Keyed]CombineFn.
+   *
+   * <p> The net result of applying this CombineFn is a sorted list of all
+   * characters occurring in the key and the decimal representations of
+   * each value.
+   */
+  public class TestKeyedCombineFn extends KeyedCombineFn
+      <String, Integer, TestKeyedCombineFn.Accumulator, String> {
+
+    // Not serializable.
+    private class Accumulator {
+      String value;
+      public Accumulator(String value) {
+        this.value = value;
+      }
+    }
+
+    @Override
+    public Coder<Accumulator> getAccumulatorCoder(
+        CoderRegistry registry, Coder<String> keyCoder, Coder<Integer> inputCoder) {
+      return new CustomCoder<Accumulator>() {
+        @Override
+        public void encode(Accumulator accumulator, OutputStream outStream, Coder.Context context)
+            throws CoderException, IOException {
+          StringUtf8Coder.of().encode(accumulator.value, outStream, context);
+        }
+
+        @Override
+        public Accumulator decode(InputStream inStream, Coder.Context context)
+            throws CoderException, IOException {
+          return new Accumulator(StringUtf8Coder.of().decode(inStream, context));
+        }
+      };
+    }
+
+    @Override
+    public Accumulator createAccumulator(String key) {
+      return new Accumulator(key);
+    }
+
+    @Override
+    public Accumulator addInput(String key, Accumulator accumulator, Integer value) {
+      try {
+        assertThat(accumulator.value, Matchers.startsWith(key));
+        return new Accumulator(accumulator.value + String.valueOf(value));
+      } finally {
+        accumulator.value = "cleared in addInput";
+      }
+    }
+
+    @Override
+    public Accumulator mergeAccumulators(String key, Iterable<Accumulator> accumulators) {
+      String all = key;
+      for (Accumulator accumulator : accumulators) {
+        assertThat(accumulator.value, Matchers.startsWith(key));
+        all += accumulator.value.substring(key.length());
+        accumulator.value = "cleared in mergeAccumulators";
+      }
+      return new Accumulator(all);
+    }
+
+    @Override
+    public String extractOutput(String key, Accumulator accumulator) {
+      assertThat(accumulator.value, Matchers.startsWith(key));
+      char[] chars = accumulator.value.toCharArray();
+      Arrays.sort(chars);
+      return new String(chars);
+    }
+  }
+
   /** Another example AccumulatingCombineFn. */
   public static class TestCounter extends
       Combine.AccumulatingCombineFn<

From 555cf927198bbb2a398fbacd1f3b2a00dd711c37 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 15 Apr 2015 10:17:47 -0700
Subject: [PATCH 0444/1541] Improve javadoc

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91206968
---
 .../google/cloud/dataflow/sdk/Pipeline.java   |  7 +++--
 .../dataflow/sdk/coders/AtomicCoder.java      |  6 ++--
 .../cloud/dataflow/sdk/coders/AvroCoder.java  |  3 +-
 .../sdk/coders/BigEndianIntegerCoder.java     |  2 +-
 .../sdk/coders/BigEndianLongCoder.java        |  2 +-
 .../dataflow/sdk/coders/ByteArrayCoder.java   |  6 ++--
 .../dataflow/sdk/coders/CoderException.java   |  2 +-
 .../dataflow/sdk/coders/CoderRegistry.java    |  6 ++--
 .../dataflow/sdk/coders/CollectionCoder.java  |  2 +-
 .../dataflow/sdk/coders/CustomCoder.java      |  2 +-
 .../dataflow/sdk/coders/DefaultCoder.java     |  3 +-
 .../dataflow/sdk/coders/DoubleCoder.java      |  2 +-
 .../dataflow/sdk/coders/EntityCoder.java      |  2 +-
 .../dataflow/sdk/coders/InstantCoder.java     |  2 +-
 .../dataflow/sdk/coders/IterableCoder.java    |  4 ++-
 .../sdk/coders/IterableLikeCoder.java         |  5 ++--
 .../cloud/dataflow/sdk/coders/KvCoder.java    |  2 +-
 .../cloud/dataflow/sdk/coders/ListCoder.java  |  2 +-
 .../cloud/dataflow/sdk/coders/MapCoder.java   |  2 +-
 .../sdk/coders/SerializableCoder.java         |  3 +-
 .../cloud/dataflow/sdk/coders/SetCoder.java   |  5 +++-
 .../dataflow/sdk/coders/StandardCoder.java    |  5 ++--
 .../dataflow/sdk/coders/StringUtf8Coder.java  |  5 ++--
 .../sdk/coders/TableRowJsonCoder.java         |  2 +-
 .../sdk/coders/TextualIntegerCoder.java       |  3 +-
 .../dataflow/sdk/coders/VarIntCoder.java      |  4 +--
 .../dataflow/sdk/coders/VarLongCoder.java     |  4 +--
 .../cloud/dataflow/sdk/coders/VoidCoder.java  |  2 +-
 .../google/cloud/dataflow/sdk/io/AvroIO.java  | 14 +++++-----
 .../cloud/dataflow/sdk/io/BigQueryIO.java     | 16 +++++------
 .../cloud/dataflow/sdk/io/BoundedSource.java  |  2 +-
 .../sdk/io/ByteOffsetBasedSource.java         |  5 ++--
 .../cloud/dataflow/sdk/io/DatastoreIO.java    | 22 +++++++--------
 .../cloud/dataflow/sdk/io/PubsubIO.java       | 10 +++----
 .../google/cloud/dataflow/sdk/io/Read.java    |  4 +--
 .../google/cloud/dataflow/sdk/io/Sink.java    | 21 +++++++-------
 .../google/cloud/dataflow/sdk/io/TextIO.java  | 14 +++++-----
 .../google/cloud/dataflow/sdk/io/XmlSink.java | 10 +++----
 .../options/DataflowWorkerLoggingOptions.java |  4 +--
 .../sdk/options/PipelineOptionsFactory.java   |  2 +-
 .../BlockingDataflowPipelineRunner.java       |  2 +-
 .../sdk/runners/DataflowPipeline.java         | 12 +++++---
 .../runners/DataflowPipelineRunnerHooks.java  |  2 +-
 .../runners/DataflowPipelineTranslator.java   | 17 ++++++-----
 .../dataflow/sdk/runners/DirectPipeline.java  |  6 ++--
 .../sdk/runners/DirectPipelineRunner.java     | 10 ++++---
 .../dataflow/sdk/runners/PipelineRunner.java  |  4 +--
 .../sdk/runners/RecordingPipelineVisitor.java |  3 +-
 .../sdk/runners/TransformHierarchy.java       |  2 +-
 .../sdk/testing/RunnableOnService.java        |  5 ++--
 .../testing/TestDataflowPipelineRunner.java   |  4 +--
 .../dataflow/sdk/transforms/Aggregator.java   |  3 +-
 .../sdk/transforms/AppliedPTransform.java     |  2 +-
 .../dataflow/sdk/transforms/Combine.java      |  8 +++---
 .../cloud/dataflow/sdk/transforms/Count.java  |  5 ++--
 .../cloud/dataflow/sdk/transforms/DoFn.java   |  4 ++-
 .../transforms/SerializableComparator.java    |  2 +-
 .../sdk/transforms/SerializableFunction.java  |  3 +-
 .../cloud/dataflow/sdk/transforms/View.java   |  2 +-
 .../cloud/dataflow/sdk/transforms/Write.java  |  4 +--
 .../sdk/transforms/join/CoGbkResult.java      |  4 +--
 .../transforms/join/CoGbkResultSchema.java    |  9 +++---
 .../sdk/transforms/join/CoGroupByKey.java     | 11 +++++---
 .../join/KeyedPCollectionTuple.java           | 28 +++++++++++--------
 .../sdk/transforms/windowing/AfterEach.java   |  5 ++--
 .../sdk/transforms/windowing/AfterPane.java   |  4 +--
 .../sdk/transforms/windowing/Sessions.java    |  2 +-
 .../transforms/windowing/SlidingWindows.java  |  2 +-
 .../sdk/transforms/windowing/TimeTrigger.java |  2 +-
 .../sdk/transforms/windowing/Trigger.java     | 15 +++++-----
 .../sdk/util/common/worker/Reader.java        |  2 +-
 .../google/cloud/dataflow/sdk/values/KV.java  |  4 +--
 .../dataflow/sdk/values/TimestampedValue.java |  2 +-
 73 files changed, 226 insertions(+), 183 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index 34168f2d51177..d84b785905170 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -40,8 +40,9 @@
 import java.util.Set;
 
 /**
- * A Pipeline manages a DAG of PTransforms, and the PCollections
- * that the PTransforms consume and produce.
+ * A {@code Pipeline} manages a DAG of {@link PTransform}s, and the
+ * {@link com.google.cloud.dataflow.sdk.values.PCollection}s
+ * that the {@link PTransform}s consume and produce.
  *
  * <p> After a {@code Pipeline} has been constructed, it can be executed,
  * using a default or an explicit {@link PipelineRunner}.
@@ -175,7 +176,7 @@ public void setCoderRegistry(CoderRegistry coderRegistry) {
   }
 
   /**
-   * A PipelineVisitor can be passed into
+   * A {@link PipelineVisitor} can be passed into
    * {@link Pipeline#traverseTopologically} to be called for each of the
    * transforms and values in the Pipeline.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
index 15380ca380d94..7ad64c4406769 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
@@ -20,8 +20,10 @@
 import java.util.List;
 
 /**
- * An AtomicCoder is one that has no component Coders or other state.
- * All instances of its class are equal.
+ * An {@code AtomicCoder} is a {@link Coder} that has no component
+ * {@link Coder}s or other state.
+ *
+ * <p> All instances of its class are equal.
  *
  * @param <T> the type of the values being transcoded
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
index c92c7f92a69fd..53c344bf2751b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -65,7 +65,8 @@
 import javax.annotation.Nullable;
 
 /**
- * An encoder using Avro binary format.
+ * A {@link Coder} using Avro binary format.
+ *
  * <p>
  * The Avro schema is generated using reflection on the element type, using
  * Avro's <a href="http://avro.apache.org/docs/current/api/java/index.html">
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
index 4d4c342e61452..e40d439cd8a20 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
@@ -27,7 +27,7 @@
 import java.io.UTFDataFormatException;
 
 /**
- * A BigEndianIntegerCoder encodes Integers in 4 bytes, big-endian.
+ * A {@code BigEndianIntegerCoder} encodes {@code Integer}s in 4 bytes, big-endian.
  */
 @SuppressWarnings("serial")
 public class BigEndianIntegerCoder extends AtomicCoder<Integer> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
index 44cac577f18f3..f734b1c1b2cbc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
@@ -27,7 +27,7 @@
 import java.io.UTFDataFormatException;
 
 /**
- * A BigEndianLongCoder encodes Longs in 8 bytes, big-endian.
+ * A {@code BigEndianLongCoder} encodes {@code Long}s in 8 bytes, big-endian.
  */
 @SuppressWarnings("serial")
 public class BigEndianLongCoder extends AtomicCoder<Long> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
index 07cf740cd4837..e8d04e7a3a7d8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
@@ -28,10 +28,10 @@
 import java.io.OutputStream;
 
 /**
- * A ByteArrayCoder encodes byte[] objects.
+ * A {@code ByteArrayCoder} encodes {@code byte[]} objects.
  *
- * <p> If in a nested context, prefixes the encoded array with a VarInt encoding
- * of the length.
+ * <p> If in a nested context, prefixes the encoded array with its
+ * length, encoded via a {@link VarIntCoder}.
  */
 @SuppressWarnings("serial")
 public class ByteArrayCoder extends AtomicCoder<byte[]> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java
index 8afd0cbddc790..09da8dd81ad44 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java
@@ -19,7 +19,7 @@
 import java.io.IOException;
 
 /**
- * A CoderException is thrown if there is a problem encoding or
+ * A {@code CoderException} is thrown if there is a problem encoding or
  * decoding a value.
  */
 public class CoderException extends IOException {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 7e08c20c43a43..96652598ad722 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -45,8 +45,10 @@
 import java.util.Map;
 
 /**
- * A CoderRegistry allows registering the default Coder to use for a Java class,
- * and looking up and instantiating the default Coder for a Java type.
+ * A {@code CoderRegistry} allows registering the
+ * default {@link Coder} to use for a Java class,
+ * and looking up and instantiating the default
+ * {@link Coder} for a Java type.
  *
  * <p> {@code CoderRegistry} uses the following mechanisms to determine a
  * default {@link Coder} for a Java class, in order of precedence:
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
index 4e7f630249955..f2600f3670728 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
@@ -26,7 +26,7 @@
 import java.util.List;
 
 /**
- * A CollectionCoder encodes Collections.
+ * A {@code CollectionCoder} encodes {@code Collection}s.
  *
  * @param <T> the type of the elements of the Collections being transcoded
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
index 5ad93521f172e..9c3b2e32ac69b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
@@ -28,7 +28,7 @@
 import java.io.Serializable;
 
 /**
- * An abstract base class for writing Coders that encodes itself via java
+ * An abstract base class for writing {@link Coder}s that encodes itself via java
  * serialization.  Subclasses only need to implement the {@link Coder#encode}
  * and {@link Coder#decode} methods.
  *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java
index 1c8ca385bd7c4..ad91552de6e9a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java
@@ -23,7 +23,8 @@
 import java.lang.annotation.Target;
 
 /**
- * Specifies a default {@link Coder} class to handle encoding and decoding
+ * The {@code DefaultCoder} annotation
+ * specifies a default {@link Coder} class to handle encoding and decoding
  * instances of the annotated class.
  *
  * <p> The specified {@code Coder} must implement a function with the following
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
index fb04de666925c..814d94a3a71ed 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
@@ -27,7 +27,7 @@
 import java.io.UTFDataFormatException;
 
 /**
- * A DoubleCoder encodes Doubles in 8 bytes.
+ * A {@code DoubleCoder} encodes {@code Doubles} in 8 bytes.
  */
 @SuppressWarnings("serial")
 public class DoubleCoder extends AtomicCoder<Double> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
index c2562139a50ed..98b21767c50bd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
@@ -25,7 +25,7 @@
 import java.io.OutputStream;
 
 /**
- * An EntityCoder encodes/decodes Datastore Entity objects.
+ * An {@code EntityCoder} is a {@link Coder} for {@link Entity} objects.
  */
 @SuppressWarnings("serial")
 public class EntityCoder extends AtomicCoder<Entity> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
index e33c567f48f7b..913154a325595 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
@@ -25,7 +25,7 @@
 import java.io.OutputStream;
 
 /**
- * A InstantCoder encodes joda Instant.
+ * A {@code InstantCoder} is a {@link Coder} for a joda {@link Instant}.
  */
 @SuppressWarnings("serial")
 public class InstantCoder extends AtomicCoder<Instant> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
index 17b1d875154a8..e5a27abb16581 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
@@ -28,7 +28,9 @@
 import java.util.List;
 
 /**
- * An IterableCoder encodes Iterables.
+ * An {@code IterableCoder} encodes any {@code Iterable}
+ * as the sequence of its elements, encoded according to the
+ * component coder.
  *
  * @param <T> the type of the elements of the Iterables being transcoded
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
index 0aa24b6725742..e24d042535294 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
@@ -32,9 +32,10 @@
 import java.util.Observer;
 
 /**
- * The base class of Coders for Iterable subclasses.
+ * An abstract base class with functionality for assembling a
+ * {@link Coder} for a class that implements {@code Iterable}.
  *
- * @param <T> the type of the elements of the Iterables being transcoded
+ * @param <T> the type of the elements of the {@code Iterable}s being transcoded
  * @param <IT> the type of the Iterables being transcoded
  */
 public abstract class IterableLikeCoder<T, IT extends Iterable<T>>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
index 05e6f258fa4d7..4e82e215fac5a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
@@ -34,7 +34,7 @@
 import java.util.List;
 
 /**
- * A KvCoder encodes KVs.
+ * A {@code KvCoder} encodes {@link KV}s.
  *
  * @param <K> the type of the keys of the KVs being transcoded
  * @param <V> the type of the values of the KVs being transcoded
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
index ef60b0828a9b5..9ab661e626b00 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
@@ -25,7 +25,7 @@
 import java.util.List;
 
 /**
- * A ListCoder encodes Lists.
+ * A {@code ListCoder} encodes {@code List}s.
  *
  * @param <T> the type of the elements of the Lists being transcoded
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
index 17625bc5b847b..bd2dbbf2034b3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
@@ -35,7 +35,7 @@
 import java.util.Map.Entry;
 
 /**
- * A MapCoder encodes Maps.
+ * A {@code MapCoder} encodes {@code Map}s.
  *
  * @param <K> the type of the keys of the KVs being transcoded
  * @param <V> the type of the values of the KVs being transcoded
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
index 17d4902863f32..661399df63b39 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
@@ -31,7 +31,8 @@
 import java.io.Serializable;
 
 /**
- * An encoder of {@link java.io.Serializable} objects.
+ * A {@code SerializableCoder} is a {@link Coder} for a
+ * Java class that implements {@link java.io.Serializable}.
  *
  * <p> To use, specify the coder type on a PCollection:
  * <pre>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
index 23f6838bde902..5ca70e9548119 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
@@ -27,7 +27,10 @@
 import java.util.Set;
 
 /**
- * A SetCoder encodes Sets.
+ * A {@code SetCoder<T>} encodes any {@code Set<T>}
+ * as an encoding of an iterable of its elements. The elements
+ * may not be in a deterministic order, depending on the
+ * {@code Set} implementation.
  *
  * @param <T> the type of the elements of the set
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
index 8556815bde31a..eace118b7c684 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
@@ -32,8 +32,9 @@
 import java.util.List;
 
 /**
- * A StandardCoder is one that defines equality, hashing, and printing
- * via the class name and recursively using {@link #getComponents}.
+ * A {@code StandardCoder} is a {@link Coder} that defines equality,
+ * hashing, and printing via the class name and recursively using
+ * {@link #getComponents}.
  *
  * @param <T> the type of the values being transcoded
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
index 32ccf9854750b..3bdba5a969689 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
@@ -32,8 +32,9 @@
 import java.nio.charset.Charset;
 
 /**
- * A StringUtf8Coder encodes Java Strings in UTF-8 encoding.
- * If in a nested context, prefixes the string with a VarInt length field.
+ * A {@code StringUtf8Coder} encodes Java Strings in UTF-8 encoding.
+ * If in a nested context, prefixes the string with an integer length field,
+ * encoded via the {@link VarIntCoder}.
  */
 @SuppressWarnings("serial")
 public class StringUtf8Coder extends AtomicCoder<String> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
index 2d494429cb429..c7378faa1bfb0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
@@ -27,7 +27,7 @@
 import java.io.OutputStream;
 
 /**
- * A TableRowJsonCoder encodes BigQuery TableRow objects.
+ * A {@code TableRowJsonCoder} encodes BigQuery {@link TableRow} objects.
  */
 @SuppressWarnings("serial")
 public class TableRowJsonCoder extends AtomicCoder<TableRow> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
index f4010621277f1..8174abbcdba9f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
@@ -23,7 +23,8 @@
 import java.io.OutputStream;
 
 /**
- * A TextualIntegerCoder encodes Integers as text.
+ * A {@code TextualIntegerCoder} encodes {@code Integer}s
+ * as their textual, decimal, representation.
  */
 @SuppressWarnings("serial")
 public class TextualIntegerCoder extends AtomicCoder<Integer> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
index 0b58a54ac5b85..7c7ccd8eb69b8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
@@ -27,8 +27,8 @@
 import java.io.UTFDataFormatException;
 
 /**
- * A VarIntCoder encodes Integers using between 1 and 5 bytes.  Negative
- * numbers always take 5 bytes, so BigEndianIntegerCoder may be preferable for
+ * A {@code VarIntCoder} encodes {@code Integer}s using between 1 and 5 bytes. Negative
+ * numbers always take 5 bytes, so {@link BigEndianIntegerCoder} may be preferable for
  * ints that are known to often be large or negative.
  */
 @SuppressWarnings("serial")
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
index 44fff0582a783..d218ca68d89b9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
@@ -27,8 +27,8 @@
 import java.io.UTFDataFormatException;
 
 /**
- * A VarLongCoder encodes longs using between 1 and 10 bytes.  Negative
- * numbers always take 10 bytes, so BigEndianLongCoder may be preferable for
+ * A {@code VarLongCoder} encodes longs using between 1 and 10 bytes. Negative
+ * numbers always take 10 bytes, so {@link BigEndianLongCoder} may be preferable for
  * longs that are known to often be large or negative.
  */
 @SuppressWarnings("serial")
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
index 53651287b7a96..aa138f2a8f260 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
@@ -22,7 +22,7 @@
 import java.io.OutputStream;
 
 /**
- * A VoidCoder encodes Voids.  Uses zero bytes per Void.
+ * A {@code VoidCoder} encodes {@code Void}s. Uses zero bytes per {@code Void}.
  */
 @SuppressWarnings("serial")
 public class VoidCoder extends AtomicCoder<Void> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index 4f66320e02035..f30b343100cb6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -45,7 +45,7 @@
 import javax.annotation.Nullable;
 
 /**
- * Transforms for reading and writing Avro files.
+ * {@link PTransform}s for reading and writing Avro files.
  *
  * <p> To read a {@link PCollection} from one or more Avro files, use
  * {@link AvroIO.Read}, specifying {@link AvroIO.Read#from} to specify
@@ -116,8 +116,8 @@
  */
 public class AvroIO {
   /**
-   * A root PTransform that reads from an Avro file (or multiple Avro
-   * files matching a pattern) and returns a PCollection containing
+   * A root {@link PTransform} that reads from an Avro file (or multiple Avro
+   * files matching a pattern) and returns a {@link PCollection} containing
    * the decoding of each record.
    */
   public static class Read {
@@ -183,8 +183,8 @@ public static Bound<GenericRecord> withoutValidation() {
     }
 
     /**
-     * A PTransform that reads from an Avro file (or multiple Avro
-     * files matching a pattern) and returns a bounded PCollection containing
+     * A {@link PTransform} that reads from an Avro file (or multiple Avro
+     * files matching a pattern) and returns a bounded {@link PCollection} containing
      * the decoding of each record.
      *
      * @param <T> the type of each of the elements of the resulting
@@ -332,7 +332,7 @@ public void evaluate(
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * A root PTransform that writes a PCollection to an Avro file (or
+   * A root {@link PTransform} that writes a {@link PCollection} to an Avro file (or
    * multiple Avro files matching a sharding pattern).
    */
   public static class Write {
@@ -442,7 +442,7 @@ public static Bound<GenericRecord> withoutValidation() {
     }
 
     /**
-     * A PTransform that writes a bounded PCollection to an Avro file (or
+     * A {@link PTransform} that writes a bounded {@link PCollection} to an Avro file (or
      * multiple Avro files matching a sharding pattern).
      *
      * @param <T> the type of each of the elements of the input PCollection
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 7f8902dffc89a..11d6120f0534a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -57,7 +57,7 @@
 import java.util.regex.Pattern;
 
 /**
- * Transformations for reading and writing
+ * {@link PTransform}s for reading and writing
  * <a href="https://developers.google.com/bigquery/">BigQuery</a> tables.
  * <p><h3>Table References</h3>
  * A fully-qualified BigQuery table name consists of three components:
@@ -184,8 +184,8 @@ public static String toTableSpec(TableReference ref) {
   }
 
   /**
-   * A PTransform that reads from a BigQuery table and returns a
-   * {@code PCollection<TableRow>} containing each of the rows of the table.
+   * A {@link PTransform} that reads from a BigQuery table and returns a
+   * {@link PCollection PCollection<TableRow>} containing each of the rows of the table.
    * <p>
    * Each TableRow record contains values indexed by column name.  Here is a
    * sample processing function that processes a "line" column from rows:
@@ -234,8 +234,8 @@ public static Bound withoutValidation() {
     }
 
     /**
-     * A PTransform that reads from a BigQuery table and returns a bounded
-     * {@code PCollection<TableRow>}.
+     * A {@link PTransform} that reads from a BigQuery table and returns a bounded
+     * {@link PCollection PCollection<TableRow>}.
      */
     public static class Bound extends PTransform<PInput, PCollection<TableRow>> {
       private static final long serialVersionUID = 0;
@@ -337,7 +337,7 @@ public boolean getValidate() {
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * A PTransform that writes a {@code PCollection<TableRow>} containing rows
+   * A {@link PTransform} that writes a {@link PCollection PCollection<TableRow>} containing rows
    * to a BigQuery table.
    * <p>
    * By default, tables will be created if they do not exist, which
@@ -481,8 +481,8 @@ public static Bound withoutValidation() {
     }
 
     /**
-     * A PTransform that can write either a bounded or unbounded
-     * {@code PCollection<TableRow>}s to a BigQuery table.
+     * A {@link PTransform} that can write either a bounded or unbounded
+     * {@link PCollection PCollection<TableRow>}s to a BigQuery table.
      */
     public static class Bound extends PTransform<PCollection<TableRow>, PDone> {
       private static final long serialVersionUID = 0;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
index f23d3f2e8e247..baa1fe65159a8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
@@ -25,7 +25,7 @@
 import javax.annotation.Nullable;
 
 /**
- * A {@code Source} that reads a bounded amount of input and, because of that, supports
+ * A {@link Source} that reads a bounded amount of input and, because of that, supports
  * some additional operations, e.g. size estimation, and its reader supports progress estimation.
  *
  * @param <T> Type of records read by the source.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
index 2b6cf8b3d9d88..6a5fe1bea5b96 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
@@ -24,7 +24,7 @@
 import java.util.List;
 
 /**
- * A source that uses byte offsets to define starting and ending positions. Extend this class to
+ * A {@link Source} that uses byte offsets to define starting and ending positions. Extend this class to
  * implement your own byte offset based custom source. {@link FileBasedSource}, which is a subclass
  * of this, adds additional functionality useful for custom sources that are based on files. If
  * possible implementors should start from {@code FileBasedSource} instead of
@@ -153,7 +153,8 @@ public String toString() {
   public abstract ByteOffsetBasedSource<T> createSourceForSubrange(long start, long end);
 
   /**
-   * A reader that implements code common to readers of all {@link ByteOffsetBasedSource}s.
+   * A {@link Source.Reader} that implements code common
+   * to readers of all {@link ByteOffsetBasedSource}s.
    */
   public abstract static class ByteOffsetBasedReader<T> extends AbstractBoundedReader<T> {
     private static final Logger LOG = LoggerFactory.getLogger(ByteOffsetBasedReader.class);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index ddbe1ca3e6b3c..802577f167d8a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -68,11 +68,11 @@
 import java.util.NoSuchElementException;
 
 /**
- * Transforms for reading and writing
+ * {@link com.google.cloud.dataflow.sdk.transforms.PTransform}s for reading and writing
  * <a href="https://developers.google.com/datastore/">Google Cloud Datastore</a>
  * entities.
  *
- * <p> The DatastoreIO class provides an API to Read and Write a
+ * <p> The {@link DatastoreIO} class provides an API to Read and Write a
  * {@link PCollection} of Datastore Entity.  This API currently requires an
  * authentication workaround described below.
  *
@@ -81,7 +81,7 @@
  * database table.  DatastoreIO supports Read/Write from/to Datastore within
  * Dataflow SDK service.
  *
- * <p> To use DatastoreIO, users must set up the environment and use gcloud
+ * <p> To use {@link DatastoreIO}, users must set up the environment and use gcloud
  * to get credential for Datastore:
  * <pre>
  * $ export CLOUDSDK_EXTRA_SCOPES=https://www.googleapis.com/auth/datastore
@@ -184,7 +184,7 @@ public static Read.Bound<Entity> readFrom(String host, String datasetId, Query q
   }
 
   /**
-   * A source that reads the result rows of a Datastore query as {@code Entity} objects.
+   * A {@link Source} that reads the result rows of a Datastore query as {@code Entity} objects.
    */
   public static class Source extends BoundedSource<Entity> {
     private static final Logger LOG = LoggerFactory.getLogger(Source.class);
@@ -396,7 +396,7 @@ public static Write.Bound<Entity> writeTo(String datasetId) {
   }
 
   /**
-   * A {@link Sink} that writes a {@code PCollection<Entity>} containing
+   * A {@link Sink} that writes a {@link PCollection PCollection<Entity>} containing
    * entities to a Datastore kind.
    *
    */
@@ -407,14 +407,14 @@ public static class Sink extends com.google.cloud.dataflow.sdk.io.Sink<Entity> {
     final String datasetId;
 
     /**
-     * Returns a Sink that is like this one, but will write to the specified dataset.
+     * Returns a {@link Sink} that is like this one, but will write to the specified dataset.
      */
     public Sink withDataset(String datasetId) {
       return new Sink(host, datasetId);
     }
 
     /**
-     * Returns a Sink that is like this one, but will use the given host.  If not specified,
+     * Returns a {@link Sink} that is like this one, but will use the given host.  If not specified,
      * the {@link DatastoreIO#DEFAULT_HOST default host} will be used.
      */
     public Sink withHost(String host) {
@@ -654,10 +654,10 @@ public DatastoreWriteResult(long recordsWritten) {
   }
 
   /**
-   * A reader over the records from a query of the datastore.
-   * <p>
-   * Timestamped records are currently not supported. All records implicitly have the timestamp
-   * of {@code BoundedWindow.TIMESTAMP_MIN_VALUE}.
+   * A {@link Source.Reader} over the records from a query of the datastore.
+
+   * <p> Timestamped records are currently not supported.
+   * All records implicitly have the timestamp of {@code BoundedWindow.TIMESTAMP_MIN_VALUE}.
    */
   public static class DatastoreReader extends BoundedSource.AbstractBoundedReader<Entity> {
     private final Source source;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index d08172e72e173..36e2e268fefbc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -31,7 +31,7 @@
 import javax.annotation.Nullable;
 
 /**
- * Read and Write transforms for Pub/Sub streams. These transforms create
+ * Read and Write {@link PTransform}s for Pub/Sub streams. These transforms create
  * and consume unbounded {@link com.google.cloud.dataflow.sdk.values.PCollection}s.
  *
  * <p> {@code PubsubIO}  is only usable
@@ -129,7 +129,7 @@ private static void validatePubsubName(String name) {
   }
 
   /**
-   * A PTransform that continuously reads from a Pubsub stream and
+   * A {@link PTransform} that continuously reads from a Pubsub stream and
    * returns a {@code PCollection<String>} containing the items from
    * the stream.
    */
@@ -254,7 +254,7 @@ public static <T> Bound<T> withCoder(Coder<T> coder) {
     }
 
     /**
-     * A PTransform that reads from a PubSub source and returns
+     * A {@link PTransform} that reads from a PubSub source and returns
      * a unbounded PCollection containing the items from the stream.
      */
     @SuppressWarnings("serial")
@@ -411,7 +411,7 @@ public String getIdLabel() {
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * A PTransform that continuously writes a
+   * A {@link PTransform} that continuously writes a
    * {@code PCollection<String>} to a Pubsub stream.
    */
   // TODO: Support non-String encodings.
@@ -467,7 +467,7 @@ public static <T> Bound<T> withCoder(Coder<T> coder) {
     }
 
     /**
-     * A PTransfrom that writes a unbounded {@code PCollection<String>}
+     * A {@link PTransform} that writes a unbounded {@code PCollection<String>}
      * to a PubSub stream.
      */
     @SuppressWarnings("serial")
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
index 9f3d35887dd4c..3ba2fb62e9b25 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
@@ -28,7 +28,7 @@
 import javax.annotation.Nullable;
 
 /**
- * The {@code PTransform} for reading from a {@code Source}.
+ * A {@link PTransform} for reading from a {@link Source}.
  * <p>
  * Usage example:
  * <pre>
@@ -55,7 +55,7 @@ public static <T> Bound<T> from(Source<T> source) {
   }
 
   /**
-   * Implementation of the {@code Read} {@code PTransform} builder.
+   * Implementation of the {@code Read} {@link PTransform} builder.
    */
   public static class Bound<T>
       extends PTransform<PInput, PCollection<T>> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
index 04bcd9796f28e..0921d51438f48 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
@@ -23,8 +23,8 @@
 import java.io.Serializable;
 
 /**
- * A Sink represents a resource that can be written to using the {@link Write} transform. A parallel
- * write to a Sink consists of three phases:
+ * A {@code Sink} represents a resource that can be written to using the {@link Write} transform. A parallel
+ * write to a {@code Sink} consists of three phases:
  * <ol>
  * <li>A sequential <i>initialization</i> phase (e.g., creating a temporary output directory, etc.)
  * <li>A <i>parallel write</i> phase where workers write bundles of records
@@ -37,10 +37,10 @@
  *
  * <p>{@code p.apply(Write.to(new MySink()));}
  *
- * <p>Implementing a Sink and the corresponding write operations requires extending three abstract
+ * <p>Implementing a {@link Sink} and the corresponding write operations requires extending three abstract
  * classes:
  * <ul>
- * <li>{@link Sink}: A Sink describes a location/resource to write to. It may contain fields like a
+ * <li>{@link Sink}: A {@link Sink} describes a location/resource to write to. It may contain fields like a
  * URI or other metadata about the sink. Implementors of {@link Sink} must implement two methods:
  * {@link Sink#validate} and {@link Sink#createWriteOperation}. {@link Sink#validate Validate} is
  * called by the Write transform at pipeline creation, and should validate that the Sink can be
@@ -131,19 +131,20 @@ public abstract class Sink<T> implements Serializable {
   public abstract WriteOperation<T, ?> createWriteOperation(PipelineOptions options);
 
   /**
-   * A WriteOperation defines the process of a parallel write of objects to a Sink.
+   * A {@link WriteOperation} defines the process of a parallel write of objects to a Sink.
    *
-   * <p>The WriteOperation defines how to perform initialization and finalization of a parallel
+   * <p>The {@code WriteOperation} defines how to perform initialization and finalization of a parallel
    * write to a sink as well as how to create a {@link Sink.Writer} object that can write a bundle
    * to the sink.
    *
    * <p>Since operations in Dataflow may be run multiple times for redundancy or fault-tolerance,
    * the initialization and finalization defined by a WriteOperation <b>must be idempotent</b>.
    *
-   * <p>WriteOperations may be mutable; a WriteOperation is serialized after the call to initialize
-   * method and deserialized before calls to createWriter and finalized.  However, it is not
-   * reserialized after createWriter, so createWriter should not mutate the state of the
-   * WriteOperation.
+   * <p>{@code WriteOperation}s may be mutable; a {@code WriteOperation} is serialized after the
+   * call to {@code initialize} method and deserialized before calls to
+   * {@code createWriter} and {@code finalized}. However, it is not
+   * reserialized after {@code createWriter}, so {@code createWriter} should not mutate the
+   * state of the {@code WriteOperation}.
    *
    * <p>See {@link Sink} for more detailed documentation about the process of writing to a Sink.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 92323cf297293..ad5d0664aa054 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -46,7 +46,7 @@
 import javax.annotation.Nullable;
 
 /**
- * Transforms for reading and writing text files.
+ * {@link PTransform}s for reading and writing text files.
  *
  * <p> To read a {@link PCollection} from one or more text files, use
  * {@link TextIO.Read}, specifying {@link TextIO.Read#from} to specify
@@ -101,21 +101,21 @@ public class TextIO {
   public static final Coder<String> DEFAULT_TEXT_CODER = StringUtf8Coder.of();
 
   /**
-   * A root PTransform that reads from a text file (or multiple text
-   * files matching a pattern) and returns a PCollection containing
+   * A {@link PTransform} that reads from a text file (or multiple text
+   * files matching a pattern) and returns a {@link PCollection} containing
    * the decoding of each of the lines of the text file(s).  The
    * default decoding just returns the lines.
    */
   public static class Read {
     /**
-     * Returns a TextIO.Read PTransform with the given step name.
+     * Returns a {@link TextIO.Read} {@link PTransform} with the given step name.
      */
     public static Bound<String> named(String name) {
       return new Bound<>(DEFAULT_TEXT_CODER).named(name);
     }
 
     /**
-     * Returns a TextIO.Read PTransform that reads from the file(s)
+     * Returns a {@link TextIO.Read} {@link PTransform} that reads from the file(s)
      * with the given name or pattern.  This can be a local filename
      * or filename pattern (if running locally), or a Google Cloud
      * Storage filename or filename pattern of the form
@@ -171,7 +171,7 @@ public static Bound<String> withCompressionType(TextIO.CompressionType compressi
     // TODO: strippingNewlines, etc.
 
     /**
-     * A root PTransform that reads from a text file (or multiple text files
+     * A {@link PTransform} that reads from a text file (or multiple text files
      * matching a pattern) and returns a bounded PCollection containing the
      * decoding of each of the lines of the text file(s).  The default
      * decoding just returns the lines.
@@ -321,7 +321,7 @@ public void evaluate(
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * A PTransform that writes a PCollection to a text file (or
+   * A {@link PTransform} that writes a {@link PCollection} to a text file (or
    * multiple text files matching a sharding pattern), with each
    * PCollection element being encoded into its own line.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
index 746e59f57eefb..a0be1a30d147c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
@@ -159,7 +159,7 @@ public static <T> Bound<T> writeOf(
   }
 
   /**
-   * A FileBasedSink that writes objects as XML elements.
+   * A {@link FileBasedSink} that writes objects as XML elements.
    */
   public static class Bound<T> extends FileBasedSink<T> {
     private static final long serialVersionUID = 0;
@@ -217,7 +217,7 @@ public void validate(PipelineOptions options) {
     }
 
     /**
-     * Creates an XmlWriteOperation.
+     * Creates an {@link XmlWriteOperation}.
      */
     @Override
     public XmlWriteOperation<T> createWriteOperation(PipelineOptions options) {
@@ -226,7 +226,7 @@ public XmlWriteOperation<T> createWriteOperation(PipelineOptions options) {
   }
 
   /**
-   * WriteOperation for XML Sinks.
+   * {@link Sink.WriteOperation} for XML {@link Sink}s.
    */
   protected static final class XmlWriteOperation<T> extends FileBasedWriteOperation<T> {
     private static final long serialVersionUID = 0;
@@ -236,7 +236,7 @@ public XmlWriteOperation(XmlSink.Bound<T> sink) {
     }
 
     /**
-     * Creates a XmlWriter with a marshaller for the type it will write.
+     * Creates a {@link XmlWriter} with a marshaller for the type it will write.
      */
     @Override
     public XmlWriter<T> createWriter(PipelineOptions options) throws Exception {
@@ -260,7 +260,7 @@ public XmlSink.Bound<T> getSink() {
   }
 
   /**
-   * Writer that can write objects as XML elements.
+   * A {@link Sink.Writer} that can write objects as XML elements.
    */
   protected static final class XmlWriter<T> extends FileBasedWriter<T> {
     final Marshaller marshaller;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
index 3ab3293c9e784..5e47db999c247 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
@@ -67,8 +67,8 @@ public enum Level {
 
   /**
    * Defines a log level override for a specific class, package, or name.
-   * <p>
-   * {@link java.util.logging} is used on the Dataflow worker harness and supports
+   *
+   * <p> {@code java.util.logging} is used on the Dataflow worker harness and supports
    * a logging hierarchy based off of names that are "." separated. It is a common
    * pattern to have the logger for a given class share the same name as the class itself.
    * Given the classes {@code a.b.c.Foo}, {@code a.b.c.Xyz}, and {@code a.b.Bar}, with
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 16b8f4421de0d..2c95fe843b87d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -167,7 +167,7 @@ public Builder withValidation() {
     return new Builder().withValidation();
   }
 
-  /** A fluent PipelineOptions builder. */
+  /** A fluent {@link PipelineOptions} builder. */
   public static class Builder {
     private final String defaultAppName;
     private final String[] args;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
index 76ca6aa71d93b..808c0dd7c453b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
@@ -36,7 +36,7 @@
 import javax.annotation.Nullable;
 
 /**
- * A PipelineRunner that's like {@link DataflowPipelineRunner}
+ * A {@link PipelineRunner} that's like {@link DataflowPipelineRunner}
  * but that waits for the launched job to finish.
  *
  * <p> Prints out job status updates and console messages while it waits.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java
index 028a0a46e0ab6..80653b8533cb6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java
@@ -20,14 +20,18 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 
 /**
- * A DataflowPipeline, which returns a
- * {@link DataflowPipelineJob} subclass of PipelineResult
- * from {@link com.google.cloud.dataflow.sdk.Pipeline#run()}.
+ * A {@link DataflowPipeline} is a {@link Pipeline} that returns a
+ * {@link DataflowPipelineJob} when it is
+ * {@link com.google.cloud.dataflow.sdk.Pipeline#run()}.
+ *
+ * <p> This is not intended for use by users of Cloud Dataflow.
+ * Instead, use {@link Pipeline#create(PipelineOptions)} to initialize a
+ * {@link Pipeline}.
  */
 public class DataflowPipeline extends Pipeline {
 
   /**
-   * Creates and returns a new DataflowPipeline instance for tests.
+   * Creates and returns a new {@link DataflowPipeline} instance for tests.
    */
   public static DataflowPipeline create(DataflowPipelineOptions options) {
     return new DataflowPipeline(options);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
index a7e917fbd646f..b9a02935dee3b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
@@ -21,7 +21,7 @@
 
 /**
  * An instance of this class can be passed to the
- * DataflowPipeline runner to add user defined hooks to be
+ * {@link DataflowPipelineRunner} to add user defined hooks to be
  * invoked at various times during pipeline execution.
  */
 @Experimental
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 55ea5c639fa3d..f328511964562 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -103,8 +103,8 @@
 import javax.annotation.Nullable;
 
 /**
- * DataflowPipelineTranslator knows how to translate Pipeline objects
- * into Dataflow API Jobs.
+ * {@link DataflowPipelineTranslator} knows how to translate {@link Pipeline} objects
+ * into Cloud Dataflow Service API {@link Job}s.
  */
 @SuppressWarnings({"rawtypes", "unchecked"})
 public class DataflowPipelineTranslator {
@@ -113,8 +113,8 @@ public class DataflowPipelineTranslator {
   private static final ObjectMapper MAPPER = new ObjectMapper();
 
   /**
-   * A map from PTransform class to the corresponding
-   * TransformTranslator to use to translate that transform.
+   * A map from {@link PTransform} subclass to the corresponding
+   * {@link TransformTranslator} to use to translate that transform.
    *
    * <p> A static map that contains system-wide defaults.
    */
@@ -159,7 +159,7 @@ public static String jobToString(Job job)
   /**
    * Records that instances of the specified PTransform class
    * should be translated by default by the corresponding
-   * TransformTranslator.
+   * {@link TransformTranslator}.
    */
   public static <PT extends PTransform> void registerTransformTranslator(
       Class<PT> transformClass,
@@ -171,7 +171,7 @@ public static <PT extends PTransform> void registerTransformTranslator(
   }
 
   /**
-   * Returns the TransformTranslator to use for instances of the
+   * Returns the {@link TransformTranslator} to use for instances of the
    * specified PTransform class, or null if none registered.
    */
   @SuppressWarnings("unchecked")
@@ -181,7 +181,10 @@ TransformTranslator<PT> getTransformTranslator(Class<PT> transformClass) {
   }
 
   /**
-   * A translator of a {@link PTransform}.
+   * A {@link TransformTranslator} knows how to translate
+   * a particular subclass of {@link PTransform} for the
+   * Cloud Dataflow service. It does so by
+   * mutating the {@link TranslationContext}.
    */
   public interface TransformTranslator<PT extends PTransform> {
     public void translate(PT transform,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipeline.java
index f00ebe1bdcb67..cadbf37ffee01 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipeline.java
@@ -20,9 +20,9 @@
 import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
 
 /**
- * A DirectPipeline, which returns a
- * {@link DirectPipelineRunner.EvaluationResults} subclass of PipelineResult
- * from {@link com.google.cloud.dataflow.sdk.Pipeline#run()}.
+ * A {@link DirectPipeline} is a {@link Pipeline} that returns
+ * {@link DirectPipelineRunner.EvaluationResults} when it is
+ * {@link com.google.cloud.dataflow.sdk.Pipeline#run()}.
  */
 public class DirectPipeline extends Pipeline {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index e032b6259d863..542dd5dc392d7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -225,12 +225,14 @@ private <K, VI, VA, VO> PCollection<KV<K, VO>> applyTestCombine(
   }
 
   /**
-   * The implementation may split the KeyedCombineFn into ADD, MERGE
-   * and EXTRACT phases (see CombineValuesFn). In order to emulate
-   * this for the DirectPipelineRunner and provide an experience
+   * The implementation may split the {@link KeyedCombineFn} into ADD, MERGE
+   * and EXTRACT phases (see {@link com.google.cloud.dataflow.sdk.runners.worker.CombineValuesFn}).
+   * In order to emulate
+   * this for the {@link DirectPipelineRunner} and provide an experience
    * closer to the service, go through heavy seralizability checks for
    * the equivalent of the results of the ADD phase, but after the
-   * GroupByKey shuffle, and the MERGE phase. Doing these checks
+   * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}
+   * shuffle, and the MERGE phase. Doing these checks
    * ensure that not only is the accumulator coder serializable, but
    * the accumulator coder can actually serialize the data in
    * question.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java
index d2daafd35e1e9..9dcf8cf732a61 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java
@@ -29,8 +29,8 @@
 import com.google.common.base.Preconditions;
 
 /**
- * A PipelineRunner can execute, translate, or otherwise process a
- * Pipeline.
+ * A {@link PipelineRunner} can execute, translate, or otherwise process a
+ * {@link Pipeline}.
  *
  * @param <Results> the type of the result of {@link #run}.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java
index 6826fc125d228..b32668ba48ddd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java
@@ -24,7 +24,8 @@
 import java.util.List;
 
 /**
- * Provides a simple PipelineVisitor that records the transformation tree.
+ * Provides a simple {@link com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor}
+ * that records the transformation tree.
  *
  * <p> Provided for internal unit tests.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java
index 13fddea5da74b..1d7e2af642d36 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java
@@ -31,7 +31,7 @@
 
 /**
  * Captures information about a collection of transformations and their
- * associated PValues.
+ * associated {@link PValue}s.
  */
 public class TransformHierarchy {
   private final Deque<TransformTreeNode> transformStack = new LinkedList<>();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/RunnableOnService.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/RunnableOnService.java
index e3c8884d8ab11..60ab2e51b667c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/RunnableOnService.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/RunnableOnService.java
@@ -17,8 +17,9 @@
 package com.google.cloud.dataflow.sdk.testing;
 
 /**
- * Category tag for tests that can be run on the DataflowPipelineRunner if the
- * runIntegrationTestOnService System property is set to true.
+ * Category tag for tests that can be run on the
+ * {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner} if the
+ * {@code runIntegrationTestOnService} System property is set to true.
  * Example usage:
  * <pre><code>
  *     {@literal @}Test
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
index 8dfb674dbb937..94f24ef8a750f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
@@ -24,8 +24,8 @@
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
 
 /**
- * TestDataflowPipelineRunner is a pipeline runner that wraps a
- * DataflowPipelineRunner when running tests against the {@link TestPipeline}.
+ * {@link TestDataflowPipelineRunner} is a pipeline runner that wraps a
+ * {@link DataflowPipelineRunner} when running tests against the {@link TestPipeline}.
  *
  * @see TestPipeline
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
index 6170c96fef24f..10f3bfd25bd7a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
@@ -17,7 +17,8 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 /**
- * An {@code Aggregator} enables arbitrary monitoring in user code.
+ * An {@code Aggregator<VI>} enables monitoring of values of type {@code VI},
+ * to be combined across all bundles.
  *
  * <p> Aggregators are created by calling {@link DoFn.Context#createAggregator},
  * typically from {@link DoFn#startBundle}. Elements can be added to the
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java
index 60fcd2a6f4f92..135b01924b26b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java
@@ -20,7 +20,7 @@
 import com.google.cloud.dataflow.sdk.values.POutput;
 
 /**
- * Represents the application of this transform to a specific input to produce
+ * Represents the application of a {@link PTransform} to a specific input to produce
  * a specific output.
  *
  * @param <Input> transform input type
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index f3611c64e037c..86c25f80cc835 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -466,7 +466,7 @@ public Coder<VO> getDefaultOutputCoder(
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * An abstract subclass of CombineFn for implementing combiners that are more
+   * An abstract subclass of {@link CombineFn} for implementing combiners that are more
    * easily expressed as binary operations.
    */
   public abstract static class BinaryCombineFn<V>
@@ -578,7 +578,7 @@ public void set(V value) {
   }
 
   /**
-   * An abstract subclass of CombineFn for implementing combiners that are more
+   * An abstract subclass of {@link CombineFn} for implementing combiners that are more
    * easily expressed as binary operations on ints.
    */
   public abstract static class BinaryCombineIntegerFn extends CombineFn<Integer, int[], Integer> {
@@ -648,7 +648,7 @@ private int[] wrap(int value) {
   }
 
   /**
-   * An abstract subclass of CombineFn for implementing combiners that are more
+   * An abstract subclass of {@link CombineFn} for implementing combiners that are more
    * easily expressed as binary operations on longs.
    */
   public abstract static class BinaryCombineLongFn extends CombineFn<Long, long[], Long> {
@@ -717,7 +717,7 @@ private long[] wrap(long value) {
   }
 
   /**
-   * An abstract subclass of CombineFn for implementing combiners that are more
+   * An abstract subclass of {@link CombineFn} for implementing combiners that are more
    * easily expressed as binary operations on doubles.
    */
   public abstract static class BinaryCombineDoubleFn extends CombineFn<Double, double[], Double> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
index 03bd24adbdb32..1dc42892f1067 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
@@ -20,8 +20,9 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
 /**
- * Count transforms can be used to count the number of elements in a PCollection.
- * {@link PerElement Count.PerElement} can be used to count the number of occurrences of each
+ * {@code PTransorm}s to count the elements in a {@link PCollection}.
+ *
+ * <p> {@link PerElement Count.PerElement} can be used to count the number of occurrences of each
  * distinct element in the PCollection. {@link Globally Count.Globally} can
  * be used to count the total number of elements in a PCollection.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 5d9826cbe8a46..7e21d32583618 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -291,7 +291,9 @@ public interface RequiresKeyedState {}
   public interface RequiresWindowAccess {}
 
   /**
-   * Interface for interacting with keyed state.
+   * A {@code KeyedState} is a mutable mapping
+   * from {@link CodedTupleTag CodedTupleTag<T>}
+   * to {@code T}.
    */
   @Experimental
   public interface KeyedState {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableComparator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableComparator.java
index 0194b0cf0bcbd..7d41917a94c1b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableComparator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableComparator.java
@@ -20,7 +20,7 @@
 import java.util.Comparator;
 
 /**
- * A {@code Serializable} {@code Comparator}.
+ * A {@code Comparator} that is also {@code Serializable}.
  *
  * @param <T> type of values being compared
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableFunction.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableFunction.java
index 0f721e87404e7..477caa35b261c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableFunction.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableFunction.java
@@ -19,7 +19,8 @@
 import java.io.Serializable;
 
 /**
- * A function that computes an output value based on an input value,
+ * A function that computes an output value of type
+ * {@code O} from an input value of type {@code I}
  * and is {@link Serializable}.
  *
  * @param <I> input value type
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index fbe9fe6e31976..5f6e3b724cadf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -309,7 +309,7 @@ public Coder<List<T>> getAccumulatorCoder(CoderRegistry registry, Coder<T> input
   }
 
   /**
-   * Creates a primitive PCollectionView.
+   * Creates a primitive {@link PCollectionView}.
    *
    * <p> For internal use only.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
index 1b2bc41244b1f..b1c2eb67a61e6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
@@ -30,7 +30,7 @@
 import java.util.UUID;
 
 /**
- * A PTransform that writes to a {@link Sink}. A write begins with a sequential global
+ * A {@link PTransform} that writes to a {@link Sink}. A write begins with a sequential global
  * initialization of a sink, followed by a parallel write, and ends with a sequential finalization
  * of the write. The output of a write is {@link PDone}.  In the case of an empty PCollection, only
  * the global initialization and finalization will be performed.
@@ -51,7 +51,7 @@ public static <T> Bound<T> to(Sink<T> sink) {
   }
 
   /**
-   * A PTransform that writes to a {@link Sink}. See {@link Write} and {@link Sink} for
+   * A {@link PTransform} that writes to a {@link Sink}. See {@link Write} and {@link Sink} for
    * documentation about writing to Sinks.
    */
   public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
index 215ba29ef51e9..85006d00c75c1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
@@ -46,7 +46,7 @@
 import java.util.List;
 
 /**
- * A row result of a CoGroupByKey.  This is a tuple of Iterables produced for
+ * A row result of a {@link CoGroupByKey}.  This is a tuple of {@link Iterable}s produced for
  * a given key, and these can be accessed in different ways.
  */
 public class CoGbkResult {
@@ -202,7 +202,7 @@ public <V> V getOnly(TupleTag<V> tag, V defaultValue) {
   }
 
   /**
-   * A coder for CoGbkResults.
+   * A {@link Coder} for {@link CoGbkResult}s.
    */
   @SuppressWarnings("serial")
   public static class CoGbkResultCoder extends StandardCoder<CoGbkResult> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
index c894437beea49..fbe2734587b2f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
@@ -32,10 +32,11 @@
 import java.util.List;
 
 /**
- * A schema for the results of a CoGroupByKey.  This maintains the full
- * set of TupleTags for the results of a CoGroupByKey and facilitates mapping
- * between TupleTags and Union Tags (which are used as secondary keys in the
- * CoGroupByKey).
+ * A schema for the results of a {@link CoGroupByKey}.  This maintains the full
+ * set of {@link TupleTag}s for the results of a {@link CoGroupByKey} and
+ * facilitates mapping between {@link TupleTag}s and
+ * {@link RawUnionValue} tags (which are used as secondary keys in the
+ * {@link CoGroupByKey}).
  */
 @SuppressWarnings("serial")
 public class CoGbkResultSchema implements Serializable {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
index 60d6d3e56d221..4b190c51002f8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
@@ -33,12 +33,15 @@
 import java.util.List;
 
 /**
- * A transform that performs a CoGroupByKey on a tuple of tables.  A
- * CoGroupByKey groups results from all tables by like keys into CoGbkResults,
+ * A {@link PTransform} that performs a {@link CoGroupByKey} on a tuple
+ * of tables.  A {@link CoGroupByKey} groups results from all
+ * tables by like keys into {@link CoGbkResult}s,
  * from which the results for any specific table can be accessed by the
- * TupleTag supplied with the initial table.
+ * {@link com.google.cloud.dataflow.sdk.values.TupleTag}
+ * supplied with the initial table.
  *
- * <p> Example of performing a CoGroupByKey followed by a ParDo that consumes
+ * <p> Example of performing a {@link CoGroupByKey} followed by a
+ * {@link ParDo} that consumes
  * the results:
  * <pre> <code>
  * {@literal PCollection<KV<K, V1>>} pt1 = ...;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
index 1a98825d464e6..c5b44f57a95b3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
@@ -33,21 +33,23 @@
 import java.util.List;
 
 /**
- * Represents an immutable tuple of keyed PCollections (i.e. PCollections of
- * {@code KV<K, ?>}), with key type K.
+ * An immutable tuple of keyed {@link PCollection}s
+ * with key type K.
+ * ({@link PCollection}s containing values of type
+ * {@code KV<K, ?>})
  *
  * @param <K> the type of key shared by all constituent PCollections
  */
 public class KeyedPCollectionTuple<K> implements PInput {
   /**
-   * Returns an empty {@code KeyedPCollections<K>} on the given pipeline.
+   * Returns an empty {@code KeyedPCollectionTuple<K>} on the given pipeline.
    */
   public static <K> KeyedPCollectionTuple<K> empty(Pipeline pipeline) {
     return new KeyedPCollectionTuple<>(pipeline);
   }
 
   /**
-   * Returns a new {@code KeyedPCollections<K>} with the given tag and initial
+   * Returns a new {@code KeyedPCollectionTuple<K>} with the given tag and initial
    * PCollection.
    */
   public static <K, VI> KeyedPCollectionTuple<K> of(
@@ -57,7 +59,7 @@ public static <K, VI> KeyedPCollectionTuple<K> of(
   }
 
   /**
-   * Returns a new {@code KeyedPCollections<K>} that is the same as this,
+   * Returns a new {@code KeyedPCollectionTuple<K>} that is the same as this,
    * appended with the given PCollection.
    */
   public <V> KeyedPCollectionTuple<K> and(
@@ -87,15 +89,16 @@ public boolean isEmpty() {
   }
 
   /**
-   * Returns a list of TaggedKeyedPCollections for the PCollections contained in
-   * this {@code KeyedPCollections<K>}.
+   * Returns a list of {@link TaggedKeyedPCollection}s for the
+   * {@link PCollection}s contained in
+   * this {@link KeyedPCollectionTuple KeyedPCollectionTuple<K>}.
    */
   public List<TaggedKeyedPCollection<K, ?>> getKeyedCollections() {
     return keyedCollections;
   }
 
   /**
-   * Applies the given transform to this input.
+   * Applies the given {@link PTransform} to this input.
    */
   public <O extends POutput> O apply(
       PTransform<KeyedPCollectionTuple<K>, O> transform) {
@@ -103,7 +106,7 @@ public <O extends POutput> O apply(
   }
 
   /**
-   * Expands the component PCollections, stripping off any tag-specific
+   * Expands the component {@link PCollection}s, stripping off any tag-specific
    * information.
    */
   @Override
@@ -116,7 +119,8 @@ public Collection<? extends PValue> expand() {
   }
 
   /**
-   * Returns the KeyCoder for all PCollections in this KeyedPCollections.
+   * Returns the key {@link Coder} for all {@link PCollection}s
+   * in this {@link KeyedPCollectionTuple}.
    */
   public Coder<K> getKeyCoder() {
     if (keyCoder == null) {
@@ -126,8 +130,8 @@ public Coder<K> getKeyCoder() {
   }
 
   /**
-   * Returns the CoGbkResultSchema associated with this
-   * KeyedPCollections.
+   * Returns the {@link CoGbkResultSchema} associated with this
+   * {@link KeyedPCollectionTuple}.
    */
   public CoGbkResultSchema getCoGbkResultSchema() {
     return schema;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index dae929f828a4c..9849648bd3fb4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -27,8 +27,9 @@
 import java.util.List;
 
 /**
- * A composite trigger that executes its sub-triggers in order. Only one sub-trigger is executing at
- * a time, and any time it fires the {@code AfterEach} fires. When the currently executing
+ * A composite {@link Trigger} that executes its sub-triggers in order.
+ * Only one sub-trigger is executing at a time,
+ * and any time it fires the {@code AfterEach} fires. When the currently executing
  * sub-trigger finishes, the {@code AfterEach} starts executing the next sub-trigger.
  *
  * <p> {@code AfterEach.inOrder(t1, t2, ...)} finishes when all of the sub-triggers have finished.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index 8ac5d0a11b39f..4d49c67104f71 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -26,10 +26,10 @@
 import java.util.Map.Entry;
 
 /**
- * Triggers that fire based on properties of the elements in the current pane.
+ * {@link Trigger}s that fire based on properties of the elements in the current pane.
  *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
- *            {@code Trigger}
+ *            {@link Trigger}
  */
 @Experimental(Experimental.Kind.TRIGGER)
 public class AfterPane<W extends BoundedWindow> extends OnceTrigger<W>{
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
index 9a1f061cdcf8c..9e6eb69099dc5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
@@ -24,7 +24,7 @@
 import java.util.Collection;
 
 /**
- * A WindowFn windowing values into sessions separated by {@link #gapDuration}-long
+ * A {@link WindowFn} windowing values into sessions separated by {@link #gapDuration}-long
  * periods with no elements.
  *
  * <p> For example, in order to window data into session with at least 10 minute
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
index ab7887a1ac2d5..040d2e8a1126f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
@@ -26,7 +26,7 @@
 import java.util.List;
 
 /**
- * A WindowFn that windows values into possibly overlapping fixed-size
+ * A {@link WindowFn} that windows values into possibly overlapping fixed-size
  * timestamp-based windows.
  *
  * <p> For example, in order to window data into 10 minute windows that
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
index 27711635bb977..e4c1fb0fc7aa7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
@@ -26,7 +26,7 @@
 import org.joda.time.Instant;
 
 /**
- * Support for manipulating the time at which time-based triggers fire.
+ * Support for manipulating the time at which time-based {@link Trigger}s fire.
  *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used.
  * @param <T> {@code TimeTrigger} subclass produced by modifying the current {@code TimeTrigger}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index c150bec4c2e6a..fa68c7a0daf79 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -94,9 +94,8 @@ public abstract class Trigger<W extends BoundedWindow> implements Serializable {
   private static final long serialVersionUID = 0L;
 
   /**
-   * Triggers operate on both timestamps of elements that are being processed and the current
-   * (real-world) time as reported while processing. {@code TimeDomain} specifies which of these
-   * domains are applicable to a given operation.
+   * {@code TimeDomain} specifies whether an operation is based on
+   * timestamps of elements or current "real-world" time as reported while processing.
    */
   public enum TimeDomain {
     /**
@@ -158,10 +157,10 @@ public boolean isFinish() {
   }
 
   /**
-   * Information accessible to all of the callbacks that are executed on a trigger.
+   * Information accessible to all of the callbacks that are executed on a {@link Trigger}.
    *
    * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
-   *            {@code TriggerContext}
+   *            {@link TriggerContext}
    */
   public interface TriggerContext<W extends BoundedWindow>  {
 
@@ -405,8 +404,8 @@ public boolean willNeverFinish() {
   public abstract boolean isCompatible(Trigger<?> other);
 
   /**
-   * Identifies a unique trigger instance, by the window it is in and the path through the trigger
-   * tree.
+   * Identifies a unique {@link Trigger} instance, by the window it is in
+   * and the path through the {@link Trigger} tree.
    *
    * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
    *            {@code TriggerId}
@@ -452,7 +451,7 @@ public Trigger<W> orFinally(OnceTrigger<W> until) {
   }
 
   /**
-   * Triggers that are guaranteed to fire at most once should extend from this, rather than the
+   * {@link Trigger}s that are guaranteed to fire at most once should extend from this, rather than the
    * general {@link Trigger} class to indicate that behavior.
    *
    * <p> TODO: Add checks that an AtMostOnceTrigger never returns TriggerResult.FIRE.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
index 00b9d71b8abe1..9590eca28ec80 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
@@ -23,7 +23,7 @@
 /**
  * Abstract base class for readers.
  *
- * <p> A Source is read from by getting an Iterator-like value and
+ * <p> A {@link Source} is read from by getting an {@code Iterator}-like value and
  * iterating through it.
  *
  * @param <T> the type of the elements read from the source
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
index 491d35a4621e5..3629150fca7d1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
@@ -75,7 +75,7 @@ public boolean equals(Object o) {
     return false;
   }
 
-  /** Orders the KV by the key. A null key is less than any non-null key. */
+  /** Orders the {@link KV} by the key. A null key is less than any non-null key. */
   @SuppressWarnings("serial")
   public static class OrderByKey<K extends Comparable<? super K>, V> implements
       Comparator<KV<K, V>>, Serializable {
@@ -91,7 +91,7 @@ public int compare(KV<K, V> a, KV<K, V> b) {
     }
   }
 
-  /** Orders the KV by the value. A null value is less than any non-null value. */
+  /** Orders the {@link KV} by the value. A null value is less than any non-null value. */
   @SuppressWarnings("serial")
   public static class OrderByValue<K, V extends Comparable<? super V>>
       implements Comparator<KV<K, V>>, Serializable {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
index f12cdc2761493..7a7d9e4cece11 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
@@ -83,7 +83,7 @@ public String toString() {
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * Coder for {@code TimestampedValue}.
+   * A {@link Coder} for {@code TimestampedValue}.
    */
   @SuppressWarnings("serial")
   public static class TimestampedValueCoder<T>

From 68cfc06085b4e3ec7a3c9feded5ea2483ec17735 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 15 Apr 2015 11:08:45 -0700
Subject: [PATCH 0445/1541] Fix some line lengths ----Release Notes---- []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=91212370

---
 .../sdk/io/ByteOffsetBasedSource.java         | 11 ++++---
 .../google/cloud/dataflow/sdk/io/Sink.java    | 31 ++++++++++---------
 .../sdk/transforms/windowing/Trigger.java     |  4 +--
 3 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
index 6a5fe1bea5b96..ab108ec2837c0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
@@ -24,11 +24,12 @@
 import java.util.List;
 
 /**
- * A {@link Source} that uses byte offsets to define starting and ending positions. Extend this class to
- * implement your own byte offset based custom source. {@link FileBasedSource}, which is a subclass
- * of this, adds additional functionality useful for custom sources that are based on files. If
- * possible implementors should start from {@code FileBasedSource} instead of
- * {@code ByteOffsetBasedSource}.
+ * A {@link Source} that uses byte offsets to define starting and ending positions.
+ *
+ * <p>Extend this class to implement your own byte offset based custom source.
+ * {@link FileBasedSource}, which is a subclass of this, adds additional functionality useful for
+ * custom sources that are based on files. If possible implementors should start from
+ * {@code FileBasedSource} instead of {@code ByteOffsetBasedSource}.
  *
  * <p>This is a common base class for all sources that use a byte offset range. It stores the range
  * and implements splitting into bundles. This should be used for sources that can be cheaply read
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
index 0921d51438f48..67dfcdae59e51 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
@@ -23,8 +23,9 @@
 import java.io.Serializable;
 
 /**
- * A {@code Sink} represents a resource that can be written to using the {@link Write} transform. A parallel
- * write to a {@code Sink} consists of three phases:
+ * A {@code Sink} represents a resource that can be written to using the {@link Write} transform.
+ *
+ * <p>A parallel write to a {@code Sink} consists of three phases:
  * <ol>
  * <li>A sequential <i>initialization</i> phase (e.g., creating a temporary output directory, etc.)
  * <li>A <i>parallel write</i> phase where workers write bundles of records
@@ -37,16 +38,18 @@
  *
  * <p>{@code p.apply(Write.to(new MySink()));}
  *
- * <p>Implementing a {@link Sink} and the corresponding write operations requires extending three abstract
- * classes:
+ * <p>Implementing a {@link Sink} and the corresponding write operations requires extending three
+ * abstract classes:
+ *
  * <ul>
- * <li>{@link Sink}: A {@link Sink} describes a location/resource to write to. It may contain fields like a
- * URI or other metadata about the sink. Implementors of {@link Sink} must implement two methods:
- * {@link Sink#validate} and {@link Sink#createWriteOperation}. {@link Sink#validate Validate} is
- * called by the Write transform at pipeline creation, and should validate that the Sink can be
- * written to. The createWriteOperation method is also called at pipeline creation, and should
- * return a WriteOperation object that defines how to write to the Sink.  Note that implementations
- * of Sink must be serializable and Sinks must be immutable.
+ * <li>{@link Sink}: an immutable logical description of the location/resource to write to.
+ * Depending on the type of sink, it may contain fields such as the path to an output directory
+ * on a filesystem, a database table name, etc. Implementors of {@link Sink} must
+ * implement two methods: {@link Sink#validate} and {@link Sink#createWriteOperation}.
+ * {@link Sink#validate Validate} is called by the Write transform at pipeline creation, and should
+ * validate that the Sink can be written to. The createWriteOperation method is also called at
+ * pipeline creation, and should return a WriteOperation object that defines how to write to the
+ * Sink. Note that implementations of Sink must be serializable and Sinks must be immutable.
  *
  * <li>{@link WriteOperation}: The WriteOperation implements the <i>initialization</i> and
  * <i>finalization</i> phases of a write. Implementors of {@link WriteOperation} must implement
@@ -133,9 +136,9 @@ public abstract class Sink<T> implements Serializable {
   /**
    * A {@link WriteOperation} defines the process of a parallel write of objects to a Sink.
    *
-   * <p>The {@code WriteOperation} defines how to perform initialization and finalization of a parallel
-   * write to a sink as well as how to create a {@link Sink.Writer} object that can write a bundle
-   * to the sink.
+   * <p>The {@code WriteOperation} defines how to perform initialization and finalization of a
+   * parallel write to a sink as well as how to create a {@link Sink.Writer} object that can write
+   * a bundle to the sink.
    *
    * <p>Since operations in Dataflow may be run multiple times for redundancy or fault-tolerance,
    * the initialization and finalization defined by a WriteOperation <b>must be idempotent</b>.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index fa68c7a0daf79..a21700399138f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -451,8 +451,8 @@ public Trigger<W> orFinally(OnceTrigger<W> until) {
   }
 
   /**
-   * {@link Trigger}s that are guaranteed to fire at most once should extend from this, rather than the
-   * general {@link Trigger} class to indicate that behavior.
+   * {@link Trigger}s that are guaranteed to fire at most once should extend from this, rather
+   * than the general {@link Trigger} class to indicate that behavior.
    *
    * <p> TODO: Add checks that an AtMostOnceTrigger never returns TriggerResult.FIRE.
    *

From 7c75a7e6793d84316a29c80386fb3656d1f58a70 Mon Sep 17 00:00:00 2001
From: gildea <gildea@google.com>
Date: Wed, 15 Apr 2015 11:36:13 -0700
Subject: [PATCH 0446/1541] Typos in Javadoc: "it's" vs. "its" ----Release
 Notes---- [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=91215542

---
 .../java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java  | 2 +-
 .../google/cloud/dataflow/sdk/transforms/windowing/Trigger.java | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
index a16fa79341225..9249d212c1efe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
@@ -82,7 +82,7 @@ public static <T extends Message> Proto2Coder<T> of(Class<T> protoMessageClass)
   public Proto2Coder<T> withExtensionsFrom(Class<?>... extensionHosts) {
     for (Class<?> extensionHost : extensionHosts) {
       try {
-        // Attempt to access the declared method, to make sure its present.
+        // Attempt to access the declared method, to make sure it's present.
         extensionHost
             .getDeclaredMethod("registerAllExtensions", ExtensionRegistry.class);
       } catch (NoSuchMethodException e) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index a21700399138f..01c8196e9b88c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -362,7 +362,7 @@ public boolean isForCurrentLayer() {
   }
 
   /**
-   * Called when a timer has fired for the trigger or one of it’s sub-triggers.
+   * Called when a timer has fired for the trigger or one of its sub-triggers.
    *
    * @param c the context to interact with
    * @param e identifier for the trigger that the timer is for.

From 480fbbfccee0f8c2c3e1210a6219417e65f49fca Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Wed, 15 Apr 2015 11:58:54 -0700
Subject: [PATCH 0447/1541] Fix a typo in Window java doc.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91217881
---
 .../cloud/dataflow/sdk/transforms/windowing/Window.java     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index c4f437e0f733e..0bd497424d6ca 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -62,7 +62,7 @@
  *
  * <pre> {@code
  * PCollection<String> items = ...;
- * PCollection<String> windowed_items = item.apply(
+ * PCollection<String> windowed_items = items.apply(
  *   Window.<String>into(FixedWindows.of(1, TimeUnit.MINUTES)));
  * PCollection<KV<String, Long>> windowed_counts = windowed_items.apply(
  *   Count.<String>perElement());
@@ -108,7 +108,7 @@
  *
  * <pre> {@code
  * PCollection<String> items = ...;
- * PCollection<String> windowed_items = item.apply(
+ * PCollection<String> windowed_items = items.apply(
  *   Window.<String>into(FixedWindows.of(1, TimeUnit.MINUTES)
  *      .triggering(AfterEach.inOrder(
  *          AfterWatermark.pastEndOfWindow(),
@@ -125,7 +125,7 @@
  * time (for which there were new elements in the given window) we could do the following:
  *
  * <pre> {@code
- * PCollection<String> windowed_items = item.apply(
+ * PCollection<String> windowed_items = items.apply(
  *   Window.<String>into(FixedWindows.of(1, TimeUnit.MINUTES)
  *      .triggering(Repeatedly
  *              .forever(AfterProcessingTime

From b4a85c066d8b6859e814276f966a092c67c9ed36 Mon Sep 17 00:00:00 2001
From: tudorm <tudorm@google.com>
Date: Wed, 15 Apr 2015 12:12:51 -0700
Subject: [PATCH 0448/1541] Use the volatile int java type to represent the
 current state of a StateSampler instead of the more expensive atomic int
 operations.  The state sampler need not be precise in that it does not
 require that the read-update-write operations be atomic. ----Release
 Notes---- [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=91219501

---
 .../dataflow/sdk/util/common/worker/StateSampler.java | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
index 7548c1344ac9a..02ec2080861b9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
@@ -26,7 +26,6 @@
 import java.util.TimerTask;
 
 import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
 
 import javax.annotation.concurrent.ThreadSafe;
 
@@ -50,7 +49,7 @@ public class StateSampler extends TimerTask implements AutoCloseable {
   private HashMap<String, Integer> statesByName = new HashMap<>();
 
   /** The current state. */
-  private final AtomicInteger currentState;
+  private volatile int currentState;
 
   /** Special value of {@code currentState} that means we do not sample. */
   private static final int DO_NOT_SAMPLE = -1;
@@ -87,7 +86,7 @@ public StateSampler(String prefix,
                       long samplingPeriodMs) {
     this.prefix = prefix;
     this.counterSetMutator = counterSetMutator;
-    currentState = new AtomicInteger(DO_NOT_SAMPLE);
+    currentState = DO_NOT_SAMPLE;
     Random rand = new Random();
     int initialDelay = rand.nextInt((int) samplingPeriodMs);
     timers[rand.nextInt(NUM_TIMER_THREADS)].scheduleAtFixedRate(
@@ -112,7 +111,7 @@ public StateSampler(String prefix,
   @Override
   public void run() {
     long startTimestampNs = System.nanoTime();
-    int state = currentState.get();
+    int state = currentState;
     if (state != DO_NOT_SAMPLE) {
       synchronized (this) {
         countersByState.get(state).addValue(
@@ -161,7 +160,9 @@ public int stateForName(String name) {
    * @return the previous state
    */
   public int setState(int state) {
-    return currentState.getAndSet(state);
+    int previousState = currentState;
+    currentState = state;
+    return previousState;
   }
 
   /**

From 6ac6ca61069267e7e19bacf57a3f67c88b38682a Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 15 Apr 2015 13:04:02 -0700
Subject: [PATCH 0449/1541] Add examples of use for View transforms.
 ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=91224190

---
 .../cloud/dataflow/sdk/transforms/ParDo.java  |  2 +-
 .../cloud/dataflow/sdk/transforms/View.java   | 52 ++++++++++++++++++-
 .../dataflow/sdk/values/PCollectionView.java  | 23 +++++---
 3 files changed, 66 insertions(+), 11 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index ad0d68cbd12c6..90950560da280 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -136,7 +136,7 @@
  * PCollection<String> words = ...;
  * PCollection<Integer> maxWordLengthCutOff = ...; // Singleton PCollection
  * final PCollectionView<Integer> maxWordLengthCutOffView =
- *     SingletonPCollectionView.of(maxWordLengthCutOff);
+ *     maxWordLengthCutOff.apply(View.asSingleton());
  * PCollection<String> wordsBelowCutOff =
  *     words.apply(ParDo.withSideInput(maxWordLengthCutOffView)
  *                      .of(new DoFn<String, String>() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index 5f6e3b724cadf..f80ba09b4c5ec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -51,7 +51,55 @@
 /**
  * Transforms for creating {@link PCollectionView}s from {@link PCollection}s,
  * for consuming the contents of those {@link PCollection}s as side inputs
- * to {@link ParDo} transforms.
+ * to {@link ParDo} transforms. These transforms support viewing a {@link PCollection}
+ * as a single value, an iterable, a map, or a multimap.
+ *
+ * <p> For a {@link PCollection} that contains a single value of type {@code T}
+ * per window, such as the output of {@link Combine#globally},
+ * use {@link View#asSingleton()} to prepare it for use as a side input:
+ *
+ * <pre>
+ * {@code
+ * PCollectionView<A> output = someOtherPCollection
+ *     .apply(Combine.globally(...))
+ *     .apply(View.asSingleton());
+ * }
+ * </pre>
+ *
+ * <p> To iterate over an entire window of a {@link PCollection} via
+ * side input, use {@link View#asIterable()}:
+ *
+ * <pre>
+ * {@code
+ * PCollectionView<Iterable<A>> output =
+ *     somePCollection.apply(View.asIterable());
+ * }
+ * </pre>
+ *
+ * <p> To access a {@link PCollection PCollection<K, V>} as a
+ * {@code Map<K, Iterable<V>>} side input, use {@link View#asMap()}:
+ *
+ * <pre>
+ * {@code
+ * PCollectionView<Map<K, Iterable<V>> output =
+ *     somePCollection.apply(View.asMap());
+ * }
+ * </pre>
+ *
+ * <p> If a {@link PCollection PCollection<K, V>} is known to
+ * have a single value for each key, then use
+ * {@code View.AsMultimap#withSingletonValues View.asMap().withSingletonValues()}
+ * to view it as a {@code Map<K, V>}:
+ *
+ * <pre>
+ * {@code
+ * PCollectionView<Map<K, V> output =
+ *     somePCollection.apply(View.asMap().withSingletonValues());
+ * }
+ * </pre>
+ *
+ * <p> See {@link ParDo#withSideInputs} for details on how to access
+ * this variable inside a {@link ParDo} over another {@link PCollection}.
  */
 public class View {
 
@@ -314,7 +362,7 @@ public Coder<List<T>> getAccumulatorCoder(CoderRegistry registry, Coder<T> input
    * <p> For internal use only.
    *
    * @param <R> The type of the elements of the input PCollection
-   * @param <T> The type associated with the PCollectionView used as a side input
+   * @param <T> The type associated with the {@link PCollectionView} used as a side input
    */
   public static class CreatePCollectionView<R, T>
       extends PTransform<PCollection<R>, PCollectionView<T>> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
index a36e2542808eb..48e25d1710405 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
@@ -23,15 +23,22 @@
 import java.io.Serializable;
 
 /**
- * A {@code PCollectionView<T>} is an immutable view of a
- * {@link PCollection} that can be accessed e.g. as a
- * side input to a {@link com.google.cloud.dataflow.sdk.transforms.DoFn}.
+ * A {@link PCollectionView PCollectionView<T>} is an immutable view of a {@link PCollection} as a
+ * value of type {@code T} that can be accessed e.g. as a side input to a
+ * {@link com.google.cloud.dataflow.sdk.transforms.DoFn}.
  *
- * <p> A {@link PCollectionView} should always be the output of a
- * {@link com.google.cloud.dataflow.sdk.transforms.PTransform}. It is the joint
- * responsibility of this transform and each
- * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner} to implement the
- * view in a runner-specific manner.
+ * <p>A {@code PCollectionView} should always be the output of a
+ * {@link com.google.cloud.dataflow.sdk.transforms.PTransform}. It is the joint responsibility of
+ * this transform and each {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner} to implement
+ * the view in a runner-specific manner.
+ *
+ * <p>The most common case is using the {@link com.google.cloud.dataflow.sdk.transforms.View}
+ * transforms to prepare a {@link PCollection} for use as a side input to
+ * {@link com.google.cloud.dataflow.sdk.transforms.ParDo}. See
+ * {@link com.google.cloud.dataflow.sdk.transforms.View#asSingleton()},
+ * {@link com.google.cloud.dataflow.sdk.transforms.View#asIterable()}, and
+ * {@link com.google.cloud.dataflow.sdk.transforms.View#asMap()} for more detail on specific views
+ * available in the SDK.
  *
  * @param <T> the type of the value(s) accessible via this {@code PCollectionView}
  */

From 1ed8b3aa5907893a8d2c559c90dc1ca7500301e6 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 16 Apr 2015 10:02:42 -0700
Subject: [PATCH 0450/1541] Make PipelineOptions / ProxyInvocationHandler
 thread safe when being serialized. ----Release Notes---- [] -------------
 Created by MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=91309152

---
 .../dataflow/sdk/options/PipelineOptions.java |  3 +++
 .../sdk/options/ProxyInvocationHandler.java   | 23 +++++++++++--------
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
index 87fb29b51413c..1413e615a0d8b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
@@ -26,6 +26,8 @@
 import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
 import com.fasterxml.jackson.databind.annotation.JsonSerialize;
 
+import javax.annotation.concurrent.ThreadSafe;
+
 /**
  * Dataflow SDK pipeline configuration options.
  * <p>
@@ -41,6 +43,7 @@
  */
 @JsonSerialize(using = Serializer.class)
 @JsonDeserialize(using = Deserializer.class)
+@ThreadSafe
 public interface PipelineOptions {
   /**
    * Transforms this object into an object of type {@code <T>}. {@code <T>} must extend
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
index bca1914186b8d..94075b9b7b9b5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
@@ -53,11 +53,12 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.Map.Entry;
 import java.util.Set;
 import java.util.SortedMap;
 import java.util.TreeMap;
 
+import javax.annotation.concurrent.ThreadSafe;
+
 /**
  * Represents and {@link InvocationHandler} for a {@link Proxy}. The invocation handler uses bean
  * introspection of the proxy class to store and retrieve values based off of the property name.
@@ -71,6 +72,7 @@
  * {@link Object#equals(Object)}, {@link Object#hashCode()}, {@link Object#toString()} and
  * {@link PipelineOptions#as(Class)}.
  */
+@ThreadSafe
 class ProxyInvocationHandler implements InvocationHandler {
   private static final ObjectMapper MAPPER = new ObjectMapper();
   /**
@@ -255,6 +257,7 @@ private Object getValueFromJson(String propertyName, Method method) {
    * @return The default value from an {@link Default} annotation if present, otherwise a default
    *         value as per the Java Language Specification.
    */
+  @SuppressWarnings({"unchecked", "rawtypes"})
   private Object getDefault(PipelineOptions proxy, Method method) {
     for (Annotation annotation : method.getAnnotations()) {
       if (annotation instanceof Default.Class) {
@@ -337,14 +340,16 @@ static class Serializer extends JsonSerializer<PipelineOptions> {
     public void serialize(PipelineOptions value, JsonGenerator jgen, SerializerProvider provider)
         throws IOException, JsonProcessingException {
       ProxyInvocationHandler handler = (ProxyInvocationHandler) Proxy.getInvocationHandler(value);
-      Map<String, Object> options = Maps.<String, Object>newHashMap(handler.jsonOptions);
-      options.putAll(handler.options);
-      removeIgnoredOptions(handler.knownInterfaces, options);
-      ensureSerializable(handler.knownInterfaces, options);
-      jgen.writeStartObject();
-      jgen.writeFieldName("options");
-      jgen.writeObject(options);
-      jgen.writeEndObject();
+      synchronized (handler) {
+        Map<String, Object> options = Maps.<String, Object>newHashMap(handler.jsonOptions);
+        options.putAll(handler.options);
+        removeIgnoredOptions(handler.knownInterfaces, options);
+        ensureSerializable(handler.knownInterfaces, options);
+        jgen.writeStartObject();
+        jgen.writeFieldName("options");
+        jgen.writeObject(options);
+        jgen.writeEndObject();
+      }
     }
 
     /**

From 0d62431edfde3fdd796b846525d4196e67788e00 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 16 Apr 2015 10:38:47 -0700
Subject: [PATCH 0451/1541] Update to support creation of files within
 sub-directories that don't exist by creating any directories that are
 required when using the IOChannelUtils with local files. ----Release
 Notes---- [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=91313029

---
 .../sdk/util/FileIOChannelFactory.java        |   8 +-
 .../sdk/util/FileIOChannelFactoryTest.java    | 127 ++++++++++++++++++
 2 files changed, 134 insertions(+), 1 deletion(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
index 82a3f9ea37521..5e5b1c73213c5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
@@ -91,8 +91,14 @@ public ReadableByteChannel open(String spec) throws IOException {
   public WritableByteChannel create(String spec, String mimeType)
       throws IOException {
     LOG.debug("creating file {}", spec);
+    File file = new File(spec);
+    if (file.getAbsoluteFile().getParentFile() != null
+        && !file.getAbsoluteFile().getParentFile().exists()
+        && !file.getAbsoluteFile().getParentFile().mkdirs()) {
+      throw new IOException("Unable to create parent directories for '" + spec + "'");
+    }
     return Channels.newChannel(
-        new BufferedOutputStream(new FileOutputStream(spec)));
+        new BufferedOutputStream(new FileOutputStream(file)));
   }
 
   @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java
new file mode 100644
index 0000000000000..96d35c9d4fecc
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.io.Files;
+import com.google.common.io.LineReader;
+
+import org.hamcrest.Matchers;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.Reader;
+import java.io.Writer;
+import java.nio.channels.Channels;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Path;
+import java.util.List;
+
+/** Tests for {@link FileIOChannelFactory}. */
+@RunWith(JUnit4.class)
+public class FileIOChannelFactoryTest {
+  @Rule public TemporaryFolder temporaryFolder = new TemporaryFolder();
+  private FileIOChannelFactory factory = new FileIOChannelFactory();
+
+  private void testCreate(Path path) throws Exception {
+    String expected = "my test string";
+    try (Writer writer = Channels.newWriter(
+        factory.create(path.toString(), MimeTypes.TEXT), StandardCharsets.UTF_8.name())) {
+      writer.write(expected);
+    }
+    assertThat(Files.readLines(path.toFile(), StandardCharsets.UTF_8), Matchers.hasItems(expected));
+  }
+
+  @Test
+  public void testCreateWithExistingFile() throws Exception {
+    File existingFile = temporaryFolder.newFile();
+    testCreate(existingFile.toPath());
+  }
+
+  @Test
+  public void testCreateWithinExistingDirectory() throws Exception {
+    testCreate(temporaryFolder.getRoot().toPath().resolve("file.txt"));
+  }
+
+  @Test
+  public void testCreateWithNonExistentSubDirectory() throws Exception {
+    testCreate(temporaryFolder.getRoot().toPath().resolve("non-existent-dir").resolve("file.txt"));
+  }
+
+  @Test
+  public void testReadWithExistingFile() throws Exception {
+    String expected = "my test string";
+    File existingFile = temporaryFolder.newFile();
+    Files.write(expected, existingFile, StandardCharsets.UTF_8);
+    String data;
+    try (Reader reader =
+        Channels.newReader(factory.open(existingFile.getPath()), StandardCharsets.UTF_8.name())) {
+      data = new LineReader(reader).readLine();
+    }
+    assertEquals(expected, data);
+  }
+
+  @Test(expected = FileNotFoundException.class)
+  public void testReadNonExistentFile() throws Exception {
+    factory.open(temporaryFolder.getRoot().toPath().resolve("non-existent-file.txt").toString());
+  }
+
+  @Test
+  public void testIsReadSeekEfficient() throws Exception {
+    assertTrue(factory.isReadSeekEfficient("somePath"));
+  }
+
+  @Test
+  public void testMatchExact() throws Exception {
+    List<String> expected = ImmutableList.of(temporaryFolder.newFile("a").toString());
+    temporaryFolder.newFile("aa");
+    temporaryFolder.newFile("ab");
+
+    assertThat(factory.match(temporaryFolder.getRoot().toPath().resolve("a").toString()),
+        Matchers.hasItems(expected.toArray(new String[expected.size()])));
+  }
+
+  @Test
+  public void testMatchNone() throws Exception {
+    List<String> expected = ImmutableList.of();
+    temporaryFolder.newFile("a");
+    temporaryFolder.newFile("aa");
+    temporaryFolder.newFile("ab");
+
+    assertThat(factory.match(temporaryFolder.getRoot().toPath().resolve("b*").toString()),
+        Matchers.hasItems(expected.toArray(new String[expected.size()])));
+  }
+
+  @Test
+  public void testMatchMultiple() throws Exception {
+    List<String> expected = ImmutableList.of(temporaryFolder.newFile("a").toString(),
+        temporaryFolder.newFile("aa").toString(), temporaryFolder.newFile("ab").toString());
+    temporaryFolder.newFile("ba");
+    temporaryFolder.newFile("bb");
+    assertThat(factory.match(temporaryFolder.getRoot().toPath().resolve("a*").toString()),
+        Matchers.hasItems(expected.toArray(new String[expected.size()])));
+  }
+}

From ee47273e47c658fc8d403b22bb38ca6931537a1e Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Thu, 16 Apr 2015 11:41:46 -0700
Subject: [PATCH 0452/1541] Add a test for sliding windows with the default
 trigger.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91320296
---
 .../windowing/DefaultTriggerTest.java         | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
index 862ae8398b5b2..4493acd6c138c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
@@ -95,6 +95,36 @@ public void testDefaultTriggerWithSessionWindow() throws Exception {
     assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
   }
 
+  @Test
+  public void testDefaultTriggerWithSlidingWindow() throws Exception {
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
+        SlidingWindows.of(Duration.millis(10)).every(Duration.millis(5)),
+        DefaultTrigger.<IntervalWindow>of());
+
+    tester.injectElement(1, new Instant(1));
+    tester.injectElement(2, new Instant(4));
+    tester.injectElement(3, new Instant(9));
+
+    tester.advanceWatermark(new Instant(100));
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, -5, 5),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 9, 5, 15)));
+
+    tester.injectElement(4, new Instant(8));
+
+    // Late data means the merge tree might be empty
+    tester.advanceWatermark(new Instant(101));
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(4), 8, 0, 10),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(4), 8, 5, 15)));
+
+    assertFalse(tester.isDone(new IntervalWindow(new Instant(1), new Instant(10))));
+    assertFalse(tester.isDone(new IntervalWindow(new Instant(5), new Instant(15))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
+  }
+
+
   @Test
   public void testDefaultTriggerWithContainedSessionWindow() throws Exception {
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(

From ca18f3c462a75f56ed6561d170408dae703c2118 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 16 Apr 2015 12:05:30 -0700
Subject: [PATCH 0453/1541] Remove the long-deprecated Flatten.create()

----Release Notes----
The deprecated alias Flatten.create() has been removed. Users should switch to Flatten.pCollections().

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91323288
---
 .../com/google/cloud/dataflow/examples/TfIdf.java    |  2 +-
 .../cloud/dataflow/sdk/transforms/Flatten.java       | 12 ------------
 .../dataflow/sdk/transforms/join/CoGroupByKey.java   |  2 +-
 .../cloud/dataflow/sdk/transforms/PartitionTest.java |  2 +-
 .../sdk/transforms/windowing/WindowingTest.java      |  2 +-
 5 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
index 0b5db47ef8f29..b9bca0404bca2 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
@@ -188,7 +188,7 @@ public PCollection<KV<URI, String>> apply(PInput input) {
         urisToLines = urisToLines.and(oneUriToLines);
       }
 
-      return urisToLines.apply(Flatten.<KV<URI, String>>create());
+      return urisToLines.apply(Flatten.<KV<URI, String>>pCollections());
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index 1d91ca5ce951d..399459eaba181 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -70,18 +70,6 @@ public static <T> FlattenPCollectionList<T> pCollections() {
     return new FlattenPCollectionList<>();
   }
 
-  /**
-   * Returns a {@link PTransform} that flattens a {@link PCollectionList} into a
-   * {@link PCollection} containing all the elements of all the
-   * {@link PCollection}s in its input.
-   *
-   * @deprecated replaced by {@link #pCollections()}
-   */
-  @Deprecated
-  public static <T> FlattenPCollectionList<T> create() {
-    return pCollections();
-  }
-
   /**
    * Returns a {@code PTransform} that takes a {@code PCollection<Iterable<T>>}
    * and returns a {@code PCollection<T>} containing all the elements from
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
index 4b190c51002f8..57fff5c59b8ae 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
@@ -122,7 +122,7 @@ public PCollection<KV<K, CoGbkResult>> apply(
     }
 
     PCollection<KV<K, RawUnionValue>> flattenedTable =
-        unionTables.apply(Flatten.<KV<K, RawUnionValue>>create());
+        unionTables.apply(Flatten.<KV<K, RawUnionValue>>pCollections());
 
     PCollection<KV<K, Iterable<RawUnionValue>>> groupedTable =
         flattenedTable.apply(GroupByKey.<K, RawUnionValue>create());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
index cb12468e60604..b799cc33ee5fc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
@@ -135,7 +135,7 @@ public void testDroppedPartition() {
     outputs = PCollectionList.of(outputsList);
     assertTrue(outputs.size() == 2);
 
-    PCollection<Integer> output = outputs.apply(Flatten.<Integer>create());
+    PCollection<Integer> output = outputs.apply(Flatten.<Integer>pCollections());
     DataflowAssert.that(output).containsInAnyOrder(2, 4, 5, 7, 8, 10, 11);
     p.run();
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
index 5517ae3bfce10..8e5539485324a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
@@ -178,7 +178,7 @@ public void testWindowPreservation() {
 
     PCollection<String> output =
         input
-        .apply(Flatten.<String>create())
+        .apply(Flatten.<String>pCollections())
         .apply(new WindowedCount(FixedWindows.<String>of(new Duration(5))));
 
     DataflowAssert.that(output).containsInAnyOrder(

From 5a8fe57afe7d28326be8be5161d051cfacd42165 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 16 Apr 2015 13:15:00 -0700
Subject: [PATCH 0454/1541] Improve types in CoderRegistry. ----Release
 Notes---- [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=91330146

---
 .../cloud/dataflow/sdk/coders/CoderRegistry.java      | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 96652598ad722..900afe6bf3af6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -600,11 +600,13 @@ Coder<?> getDefaultCoder(Type type, Map<Type, Coder<?>> typeCoderBindings) {
    * <li>This registry's fallback {@link CoderProvider}, which
    * may be able to generate a coder for an arbitrary class.
    */
-  Coder<?> getDefaultCoder(Class<?> clazz) {
+  <T> Coder<T> getDefaultCoder(Class<T> clazz) {
     CoderFactory coderFactory = getDefaultCoderFactory(clazz);
     if (coderFactory != null) {
       LOG.debug("Default Coder for {} found by factory", clazz);
-      return coderFactory.create(Collections.<Coder<?>>emptyList());
+      @SuppressWarnings("unchecked")
+      Coder<T> coder = (Coder<T>) coderFactory.create(Collections.<Coder<?>>emptyList());
+      return coder;
     }
 
     DefaultCoder defaultAnnotation = clazz.getAnnotation(
@@ -619,9 +621,8 @@ Coder<?> getDefaultCoder(Class<?> clazz) {
     }
 
     if (getFallbackCoderProvider() != null) {
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      Optional<Coder<?>> coder =
-          (Optional) getFallbackCoderProvider().getCoder(TypeToken.of(clazz));
+      Optional<Coder<T>> coder =
+          getFallbackCoderProvider().getCoder(TypeToken.of(clazz));
       if (coder.isPresent()) {
         return coder.get();
       }

From 257e37a58555506e48be1e8d1c87e23fd67abcfa Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 16 Apr 2015 13:51:33 -0700
Subject: [PATCH 0455/1541] Remove Coder.isDeterministic, which has been
 deprecated for some time.

----Release Notes----
Removes the deprecated Coder.isDeterministic(). Instead, Coder implementations must implement Coder.verifyDeterministic(), throwing a Coder.NonDeterministicException with an explanation of why the Coder is not deterministic.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91333894
---
 .../dataflow/sdk/coders/AtomicCoder.java      |  6 ++-
 .../cloud/dataflow/sdk/coders/AvroCoder.java  | 11 ------
 .../sdk/coders/BigEndianIntegerCoder.java     |  5 ---
 .../sdk/coders/BigEndianLongCoder.java        |  5 ---
 .../dataflow/sdk/coders/ByteArrayCoder.java   |  5 ---
 .../cloud/dataflow/sdk/coders/Coder.java      | 20 ----------
 .../dataflow/sdk/coders/CustomCoder.java      |  7 ++--
 .../dataflow/sdk/coders/DelegateCoder.java    | 11 +++---
 .../coders/DeterministicStandardCoder.java    | 38 +++++++++++++++++++
 .../dataflow/sdk/coders/DoubleCoder.java      |  6 ---
 .../dataflow/sdk/coders/EntityCoder.java      |  6 ---
 .../dataflow/sdk/coders/InstantCoder.java     |  9 -----
 .../sdk/coders/IterableLikeCoder.java         |  6 ---
 .../cloud/dataflow/sdk/coders/KvCoder.java    |  6 ---
 .../cloud/dataflow/sdk/coders/ListCoder.java  |  6 ---
 .../cloud/dataflow/sdk/coders/MapCoder.java   |  6 ---
 .../sdk/coders/SerializableCoder.java         |  6 ---
 .../cloud/dataflow/sdk/coders/SetCoder.java   |  8 +---
 .../dataflow/sdk/coders/StandardCoder.java    |  9 +----
 .../dataflow/sdk/coders/StringUtf8Coder.java  |  6 ---
 .../sdk/coders/TableRowJsonCoder.java         |  6 ---
 .../sdk/coders/TextualIntegerCoder.java       |  6 ---
 .../dataflow/sdk/coders/VarIntCoder.java      |  9 -----
 .../dataflow/sdk/coders/VarLongCoder.java     |  6 ---
 .../cloud/dataflow/sdk/coders/VoidCoder.java  |  9 -----
 .../sdk/transforms/ApproximateQuantiles.java  |  9 +----
 .../dataflow/sdk/transforms/Combine.java      |  8 +---
 .../cloud/dataflow/sdk/transforms/Mean.java   |  5 ---
 .../cloud/dataflow/sdk/transforms/Top.java    |  6 ---
 .../sdk/transforms/join/CoGbkResult.java      |  6 ---
 .../sdk/transforms/join/UnionCoder.java       | 12 ------
 .../transforms/windowing/GlobalWindow.java    |  5 ---
 .../transforms/windowing/IntervalWindow.java  |  6 ---
 .../windowing/SubTriggerExecutor.java         |  7 ++--
 .../dataflow/sdk/util/TimerOrElement.java     |  6 ---
 .../dataflow/sdk/util/TriggerExecutor.java    |  6 ---
 .../dataflow/sdk/util/WindowedValue.java      | 13 -------
 .../dataflow/sdk/values/TimestampedValue.java |  8 +---
 .../sdk/coders/CoderPropertiesTest.java       |  4 +-
 .../sdk/coders/CoderRegistryTest.java         |  4 --
 .../sdk/coders/StringDelegateCoderTest.java   |  4 --
 .../runners/worker/CombineValuesFnTest.java   | 10 +----
 .../sdk/testing/DataflowAssertTest.java       |  5 ---
 .../dataflow/sdk/transforms/CombineTest.java  |  9 ++---
 .../dataflow/sdk/transforms/ParDoTest.java    |  3 --
 .../dataflow/sdk/util/CoderUtilsTest.java     |  5 ++-
 .../sdk/util/SerializableUtilsTest.java       | 16 ++------
 47 files changed, 73 insertions(+), 302 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DeterministicStandardCoder.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
index 7ad64c4406769..d434137a4df03 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
@@ -23,11 +23,13 @@
  * An {@code AtomicCoder} is a {@link Coder} that has no component
  * {@link Coder}s or other state.
  *
- * <p> All instances of its class are equal.
+ * <p> Note that, unless the behavior is overriden,
+ * atomic coders are presumed to be deterministic
+ * and all instances are considered equal.
  *
  * @param <T> the type of the values being transcoded
  */
-public abstract class AtomicCoder<T> extends StandardCoder<T> {
+public abstract class AtomicCoder<T> extends DeterministicStandardCoder<T> {
   private static final long serialVersionUID = 0;
 
   protected AtomicCoder() {}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
index 53c344bf2751b..f9fa35f2a314b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -220,17 +220,6 @@ public CloudObject asCloudObject() {
     return result;
   }
 
-  /**
-   * Returns true if the given type should be deterministically encoded using
-   * the given Schema, the directBinaryEncoder, and the ReflectDatumWriter or
-   * GenericDatumWriter.
-   */
-  @Override
-  @Deprecated
-  public boolean isDeterministic() {
-    return nonDeterministicReasons.isEmpty();
-  }
-
   /**
    * Raises an exception describing reasons why the type may not be deterministically
    * encoded using the given Schema, the directBinaryEncoder, and the ReflectDatumWriter
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
index e40d439cd8a20..39df9b584649c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
@@ -65,11 +65,6 @@ public Integer decode(InputStream inStream, Context context)
     }
   }
 
-  @Override
-  public boolean isDeterministic() {
-    return true;
-  }
-
   @Override
   public boolean consistentWithEquals() {
     return true;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
index f734b1c1b2cbc..9d9f89f23686e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
@@ -64,11 +64,6 @@ public Long decode(InputStream inStream, Context context)
     }
   }
 
-  @Override
-  public boolean isDeterministic() {
-    return true;
-  }
-
   @Override
   public boolean consistentWithEquals() {
     return true;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
index e8d04e7a3a7d8..1fb36ba33bd30 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
@@ -77,11 +77,6 @@ public byte[] decode(InputStream inStream, Context context)
     }
   }
 
-  @Override
-  public boolean isDeterministic() {
-    return true;
-  }
-
   @Override
   public Object structuralValue(byte[] value) {
     return new StructuralByteArray(value);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
index bd11f460e33b5..52126b86e5a7c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
@@ -117,26 +117,6 @@ public T decode(InputStream inStream, Context context)
    */
   public CloudObject asCloudObject();
 
-  /**
-   * Returns true if the coding is deterministic.
-   *
-   * <p> In order for a {@code Coder} to be considered deterministic,
-   * the following must be true:
-   * <ul>
-   *   <li>two values that compare as equal (via {@code Object.equals()}
-   *       or {@code Comparable.compareTo()}, if supported) have the same
-   *       encoding.
-   *   <li>the {@code Coder} always produces a canonical encoding, which is the
-   *       same for an instance of an object even if produced on different
-   *       computers at different times.
-   * </ul>
-   *
-   * @deprecated {@link #verifyDeterministic()} should be used instead to
-   * produce explanations of why a given Coder is non-deterministic.
-   */
-  @Deprecated
-  public boolean isDeterministic();
-
   /**
    * Throw {@link NonDeterministicException} if the coding is not deterministic.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
index 9c3b2e32ac69b..ab968ed64311c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
@@ -74,9 +74,10 @@ public CloudObject asCloudObject() {
   }
 
   @Override
-  @Deprecated
-  public boolean isDeterministic() {
-    return false;
+  public void verifyDeterministic() throws NonDeterministicException {
+    throw new NonDeterministicException(this,
+        "CustomCoder implementations must override verifyDeterministic,"
+        + " or they are presumed nondeterministic.");
   }
 
   // This coder inherits isRegisterByteSizeObserverCheap,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
index 9140a0e3c6800..64f28bfde0f44 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
@@ -66,12 +66,11 @@ public T decode(InputStream inStream, Context context) throws CoderException, IO
     return applyAndWrapExceptions(fromFn, coder.decode(inStream, context));
   }
 
-  @Override
-  @Deprecated
-  public boolean isDeterministic() {
-    return coder.isDeterministic();
-  }
-
+  /**
+   * A delegate coder is deterministic if the underlying coder is deterministic.
+   * For this to be safe, the intermediate {@code CodingFunction<T, DT>} must
+   * also be deterministic.
+   */
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
     coder.verifyDeterministic();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DeterministicStandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DeterministicStandardCoder.java
new file mode 100644
index 0000000000000..0977af742a5d5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DeterministicStandardCoder.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+/**
+ * A {@code DeterministicStandardCoder} is a {@link StandardCoder} that is
+ * deterministic, in the sense that for objects considered equal
+ * according to {@code Object.equals()}, the encoded bytes are
+ * also equal.
+ *
+ * @param <T> the type of the values being transcoded
+ */
+public abstract class DeterministicStandardCoder<T> extends StandardCoder<T> {
+  private static final long serialVersionUID = 0;
+
+  protected DeterministicStandardCoder() {}
+
+  /**
+   * As a {@code DeterministicStandardCoder} is presumed deterministic, this
+   * method does nothing.
+   */
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException { }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
index 814d94a3a71ed..1a3655ca6dd5d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
@@ -69,12 +69,6 @@ public Double decode(InputStream inStream, Context context)
    * if the storage format might be, so floating point representations are not
    * recommended for use in operations that require deterministic inputs.
    */
-  @Override
-  @Deprecated
-  public boolean isDeterministic() {
-    return false;
-  }
-
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
     throw new NonDeterministicException(this,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
index 98b21767c50bd..5146f86694b77 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
@@ -76,12 +76,6 @@ protected long getEncodedElementByteSize(Entity value, Context context)
    * A datastore kind can hold arbitrary Object instances, which
    * makes the encoding non-deterministic.
    */
-  @Override
-  @Deprecated
-  public boolean isDeterministic() {
-    return false;
-  }
-
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
     throw new NonDeterministicException(this,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
index 913154a325595..6916e88072c38 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
@@ -55,15 +55,6 @@ public Instant decode(InputStream inStream, Context context)
       return new Instant(BigEndianLongCoder.of().decode(inStream, context) + Long.MIN_VALUE);
   }
 
-  @Override
-  @Deprecated
-  public boolean isDeterministic() {
-    return true;
-  }
-
-  @Override
-  public void verifyDeterministic() { }
-
   @Override
   public boolean consistentWithEquals() {
     return true;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
index e24d042535294..c66ec48263a08 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
@@ -138,12 +138,6 @@ public List<? extends Coder<?>> getCoderArguments() {
    * upon the type of iterable. This may allow two objects to compare as equal
    * while the encoding differs.
    */
-  @Override
-  @Deprecated
-  public boolean isDeterministic() {
-    return false;
-  }
-
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
     throw new NonDeterministicException(this,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
index 4e82e215fac5a..ff70de8495e9f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
@@ -101,12 +101,6 @@ public List<? extends Coder<?>> getCoderArguments() {
     return Arrays.asList(keyCoder, valueCoder);
   }
 
-  @Override
-  @Deprecated
-  public boolean isDeterministic() {
-    return getKeyCoder().isDeterministic() && getValueCoder().isDeterministic();
-  }
-
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
     verifyDeterministic("Key coder must be deterministic", getKeyCoder());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
index 9ab661e626b00..3f004c08ea72e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
@@ -69,12 +69,6 @@ public static <T> List<Object> getInstanceComponents(List<T> exampleValue) {
    * List sizes are always known, so ListIterable may be deterministic while
    * the general IterableLikeCoder is not.
    */
-  @Override
-  @Deprecated
-  public boolean isDeterministic() {
-    return getElemCoder().isDeterministic();
-  }
-
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
     verifyDeterministic(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
index bd2dbbf2034b3..cba274801c143 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
@@ -125,12 +125,6 @@ public List<? extends Coder<?>> getCoderArguments() {
    * <p> For example, HashMap comparison does not depend on element order, so
    * two HashMap instances may be equal but produce different encodings.
    */
-  @Override
-  @Deprecated
-  public boolean isDeterministic() {
-    return false;
-  }
-
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
     throw new NonDeterministicException(this,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
index 661399df63b39..a135d20082b99 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
@@ -144,12 +144,6 @@ public CloudObject asCloudObject() {
     return result;
   }
 
-  @Override
-  @Deprecated
-  public boolean isDeterministic() {
-    return false;
-  }
-
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
     throw new NonDeterministicException(this,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
index 5ca70e9548119..f76849f4dd0f1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
@@ -62,12 +62,6 @@ public static SetCoder<?> of(
    * <p> For example, {@code HashSet} comparison does not depend on element order, so
    * two {@code HashSet} instances may be equal but produce different encodings.
    */
-  @Override
-  @Deprecated
-  public boolean isDeterministic() {
-    return false;
-  }
-
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
     throw new NonDeterministicException(this,
@@ -88,7 +82,7 @@ public static <T> List<Object> getInstanceComponents(
 
   @Override
   protected final Set<T> decodeToIterable(List<T> decodedElements) {
-    return new HashSet(decodedElements);
+    return new HashSet<>(decodedElements);
   }
 
   protected SetCoder(Coder<T> elemCoder) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
index eace118b7c684..222b783d13f18 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
@@ -109,7 +109,7 @@ public CloudObject asCloudObject() {
   }
 
   /**
-   * StandardCoder requires elements to be fully encoded and copied
+   * {@code StandardCoder} requires elements to be fully encoded and copied
    * into a byte stream to determine the byte size of the element, which is
    * considered expensive.
    */
@@ -147,13 +147,6 @@ public void registerByteSizeObserver(
     observer.update(getEncodedElementByteSize(value, context));
   }
 
-  @SuppressWarnings("deprecation")
-  @Override
-  public void verifyDeterministic() throws NonDeterministicException {
-    if (!isDeterministic()) {
-      throw new NonDeterministicException(this, "Coder reported it was not deterministic.");
-    }
-  }
 
   protected void verifyDeterministic(String message, Iterable<Coder<?>> coders)
       throws NonDeterministicException {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
index 3bdba5a969689..f2d2c23419573 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
@@ -105,12 +105,6 @@ public String decode(InputStream inStream, Context context)
     }
   }
 
-  @Override
-  @Deprecated
-  public boolean isDeterministic() {
-    return true;
-  }
-
   @Override
   public void verifyDeterministic() { }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
index c7378faa1bfb0..e3b1be825526a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
@@ -73,12 +73,6 @@ private TableRowJsonCoder() { }
    * TableCell can hold arbitrary Object instances, which makes the encoding
    * non-deterministic.
    */
-  @Override
-  @Deprecated
-  public boolean isDeterministic() {
-    return false;
-  }
-
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
     throw new NonDeterministicException(this,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
index 8174abbcdba9f..1b8452e305b88 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
@@ -59,12 +59,6 @@ public Integer decode(InputStream inStream, Context context)
     }
   }
 
-  @Override
-  @Deprecated
-  public boolean isDeterministic() {
-    return true;
-  }
-
   @Override
   public void verifyDeterministic() { }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
index 7c7ccd8eb69b8..5ba363300a9f1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
@@ -67,15 +67,6 @@ public Integer decode(InputStream inStream, Context context)
     }
   }
 
-  @Override
-  @Deprecated
-  public boolean isDeterministic() {
-    return true;
-  }
-
-  @Override
-  public void verifyDeterministic() { }
-
   @Override
   public boolean consistentWithEquals() {
     return true;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
index d218ca68d89b9..2d598468986b2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
@@ -67,12 +67,6 @@ public Long decode(InputStream inStream, Context context)
     }
   }
 
-  @Override
-  @Deprecated
-  public boolean isDeterministic() {
-    return true;
-  }
-
   @Override
   public void verifyDeterministic() { }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
index aa138f2a8f260..2e62d5554250c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
@@ -49,15 +49,6 @@ public Void decode(InputStream inStream, Context context) {
     return null;
   }
 
-  @Override
-  @Deprecated
-  public boolean isDeterministic() {
-    return true;
-  }
-
-  @Override
-  public void verifyDeterministic() { }
-
   @Override
   public boolean consistentWithEquals() {
     return true;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
index de8d2b101f2e2..5896e799f6615 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
@@ -654,14 +654,7 @@ public void registerByteSizeObserver(
       }
 
       @Override
-      @Deprecated
-      public boolean isDeterministic() {
-        return elementCoder.isDeterministic()
-            && elementListCoder.isDeterministic();
-      }
-
-      @Override
-      public void verifyDeterministic() throws NonDeterministicException{
+      public void verifyDeterministic() throws NonDeterministicException {
         verifyDeterministic(
             "QuantileState.ElementCoder must be deterministic",
             elementCoder);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 86c25f80cc835..4723faada5de9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -541,18 +541,12 @@ public void encode(Holder<V> accumulator, OutputStream outStream, Context contex
         public Holder<V> decode(InputStream inStream, Context context)
             throws CoderException, IOException {
           if (inStream.read() == 1) {
-            return new Holder(inputCoder.decode(inStream, context));
+            return new Holder<>(inputCoder.decode(inStream, context));
           } else {
             return new Holder<>();
           }
         }
 
-        @Override
-        @Deprecated
-        public boolean isDeterministic() {
-          return inputCoder.isDeterministic();
-        }
-
         @Override
         public void verifyDeterministic() throws NonDeterministicException {
           inputCoder.verifyDeterministic();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
index 4e3e6c0b28a56..a6b64e04d165e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
@@ -173,11 +173,6 @@ public CountSum decode(InputStream inStream, Coder.Context context)
               LONG_CODER.decode(inStream, nestedContext),
               DOUBLE_CODER.decode(inStream, nestedContext));
         }
-
-        @Override
-        public boolean isDeterministic() {
-          return true;
-        }
       };
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index 8207c17d9c51d..6142860a07116 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -440,12 +440,6 @@ public Heap decode(InputStream inStream, Coder.Context context)
         return new Heap(listCoder.decode(inStream, context));
       }
 
-      @Override
-      @Deprecated
-      public boolean isDeterministic() {
-        return listCoder.isDeterministic();
-      }
-
       @Override
       public void verifyDeterministic() throws NonDeterministicException {
         verifyDeterministic(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
index 85006d00c75c1..4528d1bc9cc24 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
@@ -295,12 +295,6 @@ public boolean equals(Object other) {
       return schema.equals(((CoGbkResultCoder) other).schema);
     }
 
-    @Override
-    @Deprecated
-    public boolean isDeterministic() {
-      return mapCoder.isDeterministic();
-    }
-
     @Override
     public void verifyDeterministic() throws NonDeterministicException {
       verifyDeterministic(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
index 59d4ba961fdd5..4128b00ab96ca 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
@@ -139,18 +139,6 @@ private UnionCoder(List<Coder<?>> elementCoders) {
     this.elementCoders = elementCoders;
   }
 
-  @Override
-  @Deprecated
-  public boolean isDeterministic() {
-    for (Coder<?> elementCoder : elementCoders) {
-      if (!elementCoder.isDeterministic()) {
-        return false;
-      }
-    }
-
-    return true;
-  }
-
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
     verifyDeterministic(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
index 1c31a96e3fa8a..9986dc619266a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
@@ -53,11 +53,6 @@ public GlobalWindow decode(InputStream inStream, Context context) {
       return GlobalWindow.INSTANCE;
     }
 
-    @Override
-    public boolean isDeterministic() {
-      return true;
-    }
-
     private Coder() {}
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
index a1853d1f97887..80a218ad032c3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
@@ -205,9 +205,6 @@ public IntervalWindow decode(InputStream inStream, Context context)
       Instant end = instantCoder.decode(inStream, context.nested());
       return new IntervalWindow(start, end);
     }
-
-    @Override
-    public boolean isDeterministic() { return true; }
   }
 
   @SuppressWarnings("serial")
@@ -246,9 +243,6 @@ public IntervalWindow decode(InputStream inStream, Context context)
       return new IntervalWindow(start, size);
     }
 
-    @Override
-    public boolean isDeterministic() { return true; }
-
     @Override
     public CloudObject asCloudObject() {
       CloudObject result = super.asCloudObject();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SubTriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SubTriggerExecutor.java
index 2dd02c0df7229..d957c5bb16ab5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SubTriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SubTriggerExecutor.java
@@ -240,10 +240,11 @@ public BitSet decode(InputStream inStream, Context context)
       return BitSet.valueOf(byteArrayCoder.decode(inStream, context));
     }
 
-    @Deprecated
     @Override
-    public boolean isDeterministic() {
-      return byteArrayCoder.isDeterministic();
+    public void verifyDeterministic() throws NonDeterministicException {
+      verifyDeterministic(
+          "SubTriggerExecutor.BitSetCoder requires its byteArrayCoder to be deterministic.",
+          byteArrayCoder);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
index 2e9ca9b600d05..600bbe555b714 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
@@ -155,12 +155,6 @@ public void registerByteSizeObserver(
       }
     }
 
-    @Deprecated
-    @Override
-    public boolean isDeterministic() {
-      return elemCoder.isDeterministic();
-    }
-
     @Override
     public void verifyDeterministic() throws NonDeterministicException {
       verifyDeterministic(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index d325f1d7f2ef0..cc52f0635011c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -411,12 +411,6 @@ public TriggerId<W> decode(InputStream inStream, Context context)
       return new TriggerId<>(window, path);
     }
 
-    @Deprecated
-    @Override
-    public boolean isDeterministic() {
-      return windowCoder.isDeterministic();
-    }
-
     @Override
     public void verifyDeterministic() throws Coder.NonDeterministicException {
       verifyDeterministic("TriggerIdCoder requires a deterministic windowCoder", windowCoder);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index a74fb857fc9dd..ddaceda2528a0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -538,12 +538,6 @@ public WindowedValue<T> decode(InputStream inStream, Context context)
       return WindowedValue.of(value, timestamp, windows);
     }
 
-    @Override
-    @Deprecated
-    public boolean isDeterministic() {
-      return valueCoder.isDeterministic() && windowCoder.isDeterministic();
-    }
-
     @Override
     public void verifyDeterministic() throws NonDeterministicException {
       verifyDeterministic(
@@ -625,13 +619,6 @@ public WindowedValue<T> decode(InputStream inStream, Context context)
       return WindowedValue.valueInGlobalWindow(value);
     }
 
-    @Override
-    @Deprecated
-    public boolean isDeterministic() {
-      return valueCoder.isDeterministic();
-    }
-
-
     @Override
     public void verifyDeterministic() throws NonDeterministicException {
       verifyDeterministic(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
index 7a7d9e4cece11..f827b2a56bd9b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
@@ -66,7 +66,7 @@ public boolean equals(Object other) {
     if (!(other instanceof TimestampedValue)) {
       return false;
     }
-    TimestampedValue that = (TimestampedValue) other;
+    TimestampedValue<?> that = (TimestampedValue<?>) other;
     return Objects.equals(value, that.value) && Objects.equals(timestamp, that.timestamp);
   }
 
@@ -127,12 +127,6 @@ public TimestampedValue<T> decode(InputStream inStream, Context context)
       return TimestampedValue.of(value, timestamp);
     }
 
-    @Override
-    @Deprecated
-    public boolean isDeterministic() {
-      return valueCoder.isDeterministic();
-    }
-
     @Override
     public void verifyDeterministic() throws NonDeterministicException {
       verifyDeterministic(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java
index 1a6601aa91e90..b761fe7a4e603 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java
@@ -101,9 +101,7 @@ public String decode(InputStream inStream, Context context)
     }
 
     @Override
-    public boolean isDeterministic() {
-      return true;
-    }
+    public void verifyDeterministic() throws NonDeterministicException { }
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index 60d3af4459371..a80c398693b01 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -249,10 +249,6 @@ public CloudObject asCloudObject() {
       return null;
     }
 
-    @Deprecated
-    @Override
-    public boolean isDeterministic() { return true; }
-
     @Override
     public void verifyDeterministic() { }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java
index 4a959c7337cf6..e7a68a2a7ff54 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java
@@ -16,9 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
-import static org.hamcrest.Matchers.equalTo;
-import static org.junit.Assert.assertThat;
-
 import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
 
 import org.junit.Test;
@@ -50,7 +47,6 @@ public class StringDelegateCoderTest {
   @Test
   public void testDeterministic() throws Exception, NonDeterministicException {
     uriCoder.verifyDeterministic();
-    assertThat(uriCoder.isDeterministic(), equalTo(true));
     for (String uriString : TEST_URI_STRINGS) {
       CoderProperties.coderDeterministic(uriCoder, new URI(uriString), new URI(uriString));
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
index 09ddf9e07cf4e..0e71c190a2807 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
@@ -26,7 +26,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.coders.DeterministicStandardCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
@@ -127,7 +127,7 @@ public Coder<CountSum> getAccumulatorCoder(
   /**
    * An example "cheap" accumulator coder.
    */
-  public static class CountSumCoder extends StandardCoder<MeanInts.CountSum> {
+  public static class CountSumCoder extends DeterministicStandardCoder<MeanInts.CountSum> {
     public CountSumCoder() { }
 
     @Override
@@ -148,12 +148,6 @@ public MeanInts.CountSum decode(InputStream inStream, Context context)
       return (new MeanInts ()).new CountSum(count, sum);
     }
 
-    @Override
-    public boolean isDeterministic() { return true; }
-
-    @Override
-    public void verifyDeterministic() { }
-
     public CloudObject asCloudObject() {
       return makeCloudEncoding(this.getClass().getName());
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
index 03c19c2c38b31..0e6557bbae869 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
@@ -83,11 +83,6 @@ public NotSerializableObject decode(InputStream inStream, Context context)
       return new NotSerializableObject();
     }
 
-    @Override
-    public boolean isDeterministic() {
-      return true;
-    }
-
     @Override
     public boolean isRegisterByteSizeObserverCheap(NotSerializableObject value, Context context) {
       return true;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 903142deb10a0..9641f3d54370e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -503,8 +503,9 @@ public List<? extends Coder<?>> getCoderArguments() {
     }
 
     @Override
-    public boolean isDeterministic() {
-      return false;
+    public void verifyDeterministic() throws NonDeterministicException {
+      throw new NonDeterministicException(this,
+          "CombineTest.SetCoder does not encode in a deterministic order.");
     }
 
     @Override
@@ -585,9 +586,7 @@ public CountSum decode(InputStream inStream, Coder.Context context)
       }
 
       @Override
-      public boolean isDeterministic() {
-        return true;
-      }
+      public void verifyDeterministic() throws NonDeterministicException { }
 
       @Override
       public boolean isRegisterByteSizeObserverCheap(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 56959009e19eb..88d4bf43ac7e5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -905,9 +905,6 @@ public TestDummy decode(InputStream inStream, Context context)
       return new TestDummy();
     }
 
-    @Override
-    public boolean isDeterministic() { return true; }
-
     @Override
     public boolean isRegisterByteSizeObserverCheap(TestDummy value, Context context) {
       return true;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
index 1d8acef0c984c..7b2506ec5ae28 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
@@ -56,8 +56,9 @@ public Integer decode(InputStream inStream, Context context) {
     }
 
     @Override
-    public boolean isDeterministic() {
-      return false;
+    public void verifyDeterministic() throws NonDeterministicException {
+      throw new NonDeterministicException(this,
+        "TestCoder does not actually encode or decode.");
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
index 1ed94735a13f1..6eb587bbe0572 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
@@ -20,7 +20,7 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.coders.DeterministicStandardCoder;
 import com.google.common.collect.ImmutableList;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
@@ -93,7 +93,7 @@ public void testSerializationError() {
   }
 
   /** A {@link Coder} that is not serializable by Java. */
-  private static class UnserializableCoderByJava extends StandardCoder<Object> {
+  private static class UnserializableCoderByJava extends DeterministicStandardCoder<Object> {
     private static final long serialVersionUID = 0;
 
     private final Object unserializableField = new Object();
@@ -113,11 +113,6 @@ public Object decode(InputStream inStream, Context context)
     public List<? extends Coder<?>> getCoderArguments() {
       return ImmutableList.of();
     }
-
-    @Override
-    public boolean isDeterministic() {
-      return true;
-    }
   }
 
   @Test
@@ -128,7 +123,7 @@ public void testEnsureSerializableWithUnserializableCoderByJava() {
   }
 
   /** A {@link Coder} that is not serializable by Jackson. */
-  private static class UnserializableCoderByJackson extends StandardCoder<Object> {
+  private static class UnserializableCoderByJackson extends DeterministicStandardCoder<Object> {
     private static final long serialVersionUID = 0;
 
     private final SerializableByJava unserializableField;
@@ -165,11 +160,6 @@ public Object decode(InputStream inStream, Context context)
     public List<? extends Coder<?>> getCoderArguments() {
       return ImmutableList.of();
     }
-
-    @Override
-    public boolean isDeterministic() {
-      return true;
-    }
   }
 
   @Test

From 0da9a72c738766396a17cf12234d4ab913a71e59 Mon Sep 17 00:00:00 2001
From: boyuan <boyuan@google.com>
Date: Thu, 16 Apr 2015 15:21:01 -0700
Subject: [PATCH 0456/1541] Fix some typos in the comments of the class
 CounterSet

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91343420
---
 .../com/google/cloud/dataflow/sdk/util/common/CounterSet.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
index 513f036c91989..e7408812f40ec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
@@ -67,7 +67,7 @@ public void addNewCounter(Counter<?> counter) {
    * reused, as long as it is compatible.
    *
    * @return the Counter that was reused, or added
-   * @throws IllegalArgumentException if the a counter with the same
+   * @throws IllegalArgumentException if a counter with the same
    * name but an incompatible kind had already been added
    */
   public synchronized <T> Counter<T> addOrReuseCounter(Counter<T> counter) {
@@ -142,7 +142,7 @@ public class AddCounterMutator {
      * reused, as long as it has the same type.
      *
      * @return the Counter that was reused, or added
-     * @throws IllegalArgumentException if the a counter with the same
+     * @throws IllegalArgumentException if a counter with the same
      * name but an incompatible kind had already been added
      */
     public <T> Counter<T> addCounter(Counter<T> counter) {

From 10e08482737a3a09a2e1495a947dbceafa11c6c5 Mon Sep 17 00:00:00 2001
From: earhart <earhart@google.com>
Date: Thu, 16 Apr 2015 16:55:04 -0700
Subject: [PATCH 0457/1541] Use the Dataflow service's notion of the current
 work item status report index, instead of calculating it in the SDK. 
 Currently, both sides use the same algorithm to compute the report index;
 pushing the computation entirely to the service gives the service more
 flexibility in assigning the report index. ----Release Notes---- []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=91352535

---
 .../worker/DataflowWorkProgressUpdater.java   |  4 ++--
 .../DataflowWorkProgressUpdaterTest.java      | 19 +++++++++++--------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
index be4f18b2d33b8..4cd00727dcd9f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
@@ -63,7 +63,7 @@ public DataflowWorkProgressUpdater(WorkItem workItem, WorkExecutor worker,
     this.workItem = workItem;
     this.workUnitClient = workUnitClient;
     this.options = options;
-    this.nextReportIndex = 1;
+    this.nextReportIndex = workItem.getInitialReportIndex();
   }
 
   @Override
@@ -93,7 +93,7 @@ protected void reportProgressHelper() throws Exception {
     if (result != null) {
       // Resets state after a successful progress report.
       dynamicSplitResultToReport = null;
-      nextReportIndex++;
+      nextReportIndex = result.getNextReportIndex();
 
       progressReportIntervalMs = nextProgressReportInterval(
           fromCloudDuration(result.getReportStatusInterval()).getMillis(),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
index 6f6788eb98060..45e8500e6bac7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -165,6 +165,7 @@ public Collection<Metric<?>> getOutputMetrics() {
     workItem.setId(WORK_ID);
     workItem.setLeaseExpireTime(toCloudTime(new Instant(nowMillis + 1000)));
     workItem.setReportStatusInterval(toCloudDuration(Duration.millis(300)));
+    workItem.setInitialReportIndex(1L);
 
     progressUpdater = new DataflowWorkProgressUpdater(workItem, worker, workUnitClient, options);
   }
@@ -175,7 +176,7 @@ public Collection<Metric<?>> getOutputMetrics() {
   @Test(timeout = 1000)
   public void workProgressUpdaterUpdates() throws Exception {
     when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
-        .thenReturn(generateServiceState(nowMillis + 2000, 1000, null));
+        .thenReturn(generateServiceState(nowMillis + 2000, 1000, null, 2L));
     setUpCounters(2);
     setUpMetrics(3);
     setUpProgress(approximateProgressAtIndex(1L));
@@ -196,10 +197,10 @@ public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
     // us to truncate the task at index 3, and the next two will not ask us to
     // truncate at all.
     when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
-        .thenReturn(generateServiceState(nowMillis + 2000, 1000, positionAtIndex(3L)))
-        .thenReturn(generateServiceState(nowMillis + 3000, 2000, null))
-        .thenReturn(generateServiceState(nowMillis + 1000, 3000, null))
-        .thenReturn(generateServiceState(nowMillis + 4000, 3000, null));
+        .thenReturn(generateServiceState(nowMillis + 2000, 1000, positionAtIndex(3L), 2L))
+        .thenReturn(generateServiceState(nowMillis + 3000, 2000, null, 3L))
+        .thenReturn(generateServiceState(nowMillis + 1000, 3000, null, 4L))
+        .thenReturn(generateServiceState(nowMillis + 4000, 3000, null, 5L));
 
     setUpCounters(3);
     setUpMetrics(2);
@@ -253,8 +254,8 @@ public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
   @Test(timeout = 2000)
   public void workProgressUpdaterLastUpdate() throws Exception {
     when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
-        .thenReturn(generateServiceState(nowMillis + 2000, 1000, positionAtIndex(2L)))
-        .thenReturn(generateServiceState(nowMillis + 3000, 2000, null));
+        .thenReturn(generateServiceState(nowMillis + 2000, 1000, positionAtIndex(2L), 2L))
+        .thenReturn(generateServiceState(nowMillis + 3000, 2000, null, 3L));
 
     setUpProgress(approximateProgressAtIndex(1L));
     progressUpdater.startReportingProgress();
@@ -330,12 +331,14 @@ private void setUpProgress(ApproximateProgress progress) {
   }
 
   private WorkItemServiceState generateServiceState(long leaseExpirationTimestamp,
-      int progressReportIntervalMs, Position suggestedStopPosition) throws IOException {
+      int progressReportIntervalMs, Position suggestedStopPosition,
+      long nextReportIndex) throws IOException {
     WorkItemServiceState responseState = new WorkItemServiceState();
     responseState.setFactory(Transport.getJsonFactory());
     responseState.setLeaseExpireTime(toCloudTime(new Instant(leaseExpirationTimestamp)));
     responseState.setReportStatusInterval(
         toCloudDuration(Duration.millis(progressReportIntervalMs)));
+    responseState.setNextReportIndex(nextReportIndex);
 
     if (suggestedStopPosition != null) {
       responseState.setSuggestedStopPoint(approximateProgressAtPosition(suggestedStopPosition));

From e155db498214097ce0e1e220c88137655be92ab7 Mon Sep 17 00:00:00 2001
From: laraschmidt <laraschmidt@google.com>
Date: Thu, 16 Apr 2015 17:04:31 -0700
Subject: [PATCH 0458/1541] Reducing the log output for intermittent connection
 issues. Adding exponential backoff to avoid sending out a lot of log
 messages.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91353356
---
 .../sdk/runners/DataflowPipelineJob.java      | 130 +++++++++++++-----
 ...temptAndTimeBoundedExponentialBackOff.java |  98 +++++++++++++
 .../AttemptBoundedExponentialBackOff.java     |   2 -
 .../sdk/runners/DataflowPipelineJobTest.java  | 118 +++++++++++++++-
 ...tAndTimeBoundedExponentialBackOffTest.java | 111 +++++++++++++++
 5 files changed, 416 insertions(+), 43 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOff.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOffTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
index a5fd487d445e5..80d136a38e674 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
@@ -18,11 +18,19 @@
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudTime;
 
 import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.api.client.util.BackOff;
+import com.google.api.client.util.BackOffUtils;
+import com.google.api.client.util.NanoClock;
+import com.google.api.client.util.Sleeper;
 import com.google.api.services.dataflow.Dataflow;
 import com.google.api.services.dataflow.model.Job;
 import com.google.api.services.dataflow.model.JobMessage;
 import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.util.AttemptAndTimeBoundedExponentialBackOff;
+import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Throwables;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -62,6 +70,18 @@ public class DataflowPipelineJob implements PipelineResult {
    */
   private State terminalState;
 
+  /**
+   * The polling interval for job status and messages information.
+   */
+  static final long MESSAGES_POLLING_INTERVAL = TimeUnit.SECONDS.toMillis(2);
+  static final long STATUS_POLLING_INTERVAL = TimeUnit.SECONDS.toMillis(2);
+
+  /**
+   * The amount of polling attempts for job status and messages information.
+   */
+  static final int MESSAGES_POLLING_ATTEMPTS = 10;
+  static final int STATUS_POLLING_ATTEMPTS = 5;
+
   /**
    * Constructs the job.
    *
@@ -92,8 +112,8 @@ public Dataflow getDataflowClient() {
    * Waits for the job to finish and return the final status.
    *
    * @param timeToWait The time to wait in units timeUnit for the job to finish.
+   *     Provide a value less than 1 ms for an infinite wait.
    * @param timeUnit The unit of time for timeToWait.
-   *     Provide a negative value for an infinite wait.
    * @param messageHandler If non null this handler will be invoked for each
    *   batch of messages received.
    * @return The final state of the job or null on timeout or if the
@@ -108,24 +128,51 @@ public State waitToFinish(
       TimeUnit timeUnit,
       MonitoringUtil.JobMessagesHandler messageHandler)
           throws IOException, InterruptedException {
-    // The polling interval for job status information.
-    long interval = TimeUnit.SECONDS.toMillis(2);
-
-    // The time at which to stop.
-    long endTime = timeToWait >= 0
-        ? System.currentTimeMillis() + timeUnit.toMillis(timeToWait)
-        : Long.MAX_VALUE;
+    return waitToFinish(timeToWait, timeUnit, messageHandler, Sleeper.DEFAULT, NanoClock.SYSTEM);
+  }
 
+  /**
+   * Wait for the job to finish and return the final status.
+   *
+   * @param timeToWait The time to wait in units timeUnit for the job to finish.
+   *     Provide a value less than 1 ms for an infinite wait.
+   * @param timeUnit The unit of time for timeToWait.
+   * @param messageHandler If non null this handler will be invoked for each
+   *   batch of messages received.
+   * @param sleeper A sleeper to use to sleep between attempts.
+   * @param nanoClock A nanoClock used to time the total time taken.
+   * @return The final state of the job or null on timeout or if the
+   *   thread is interrupted.
+   * @throws IOException If there is a persistent problem getting job
+   *   information.
+   * @throws InterruptedException
+   */
+  @Nullable
+  @VisibleForTesting
+  State waitToFinish(
+      long timeToWait,
+      TimeUnit timeUnit,
+      MonitoringUtil.JobMessagesHandler messageHandler,
+      Sleeper sleeper,
+      NanoClock nanoClock)
+          throws IOException, InterruptedException {
     MonitoringUtil monitor = new MonitoringUtil(project, dataflowClient);
 
     long lastTimestamp = 0;
-    int errorGettingMessages = 0;
-    while (true) {
+    BackOff backoff = timeUnit.toMillis(timeToWait) > 0
+        ? new AttemptAndTimeBoundedExponentialBackOff(
+            MESSAGES_POLLING_ATTEMPTS, MESSAGES_POLLING_INTERVAL, timeUnit.toMillis(timeToWait),
+            nanoClock)
+        : new AttemptBoundedExponentialBackOff(
+            MESSAGES_POLLING_ATTEMPTS, MESSAGES_POLLING_INTERVAL);
+
+    do {
       // Get the state of the job before listing messages. This ensures we always fetch job
       // messages after the job finishes to ensure we have all them.
-      State state = getState();
+      State state = getStateWithRetries(1, sleeper);
+      boolean hasError = state == State.UNKNOWN;
 
-      if (messageHandler != null) {
+      if (messageHandler != null && !hasError) {
         // Process all the job messages that have accumulated so far.
         try {
           List<JobMessage> allMessages = monitor.getJobMessages(
@@ -137,30 +184,22 @@ public State waitToFinish(
             messageHandler.process(allMessages);
           }
         } catch (GoogleJsonResponseException | SocketTimeoutException e) {
-          if (++errorGettingMessages > 5) {
-            // We want to continue to wait for the job to finish so
-            // we ignore this error, but warn occasionally if it keeps happening.
-            LOG.warn("There are problems accessing job messages: ", e);
-            errorGettingMessages = 0;
-          }
+          hasError = true;
+          LOG.warn("There were problems getting current job messages: {}.", e.getMessage());
+          LOG.debug("Exception information:", e);
         }
       }
 
-      // Check if the job is done.
-      if (state.isTerminal()) {
-        return state;
-      }
-
-      if (System.currentTimeMillis() >= endTime) {
-        // Timed out.
-        return null;
+      if (!hasError) {
+        backoff.reset();
+        // Check if the job is done.
+        if (state.isTerminal()) {
+          return state;
+        }
       }
+    } while(BackOffUtils.next(sleeper, backoff));
 
-      // Job not yet done.  Wait a little, then check again.
-      long sleepTime = Math.min(
-          endTime - System.currentTimeMillis(), interval);
-      TimeUnit.MILLISECONDS.sleep(sleepTime);
-    }
+    return null;  // Timed out.
   }
 
   /**
@@ -179,11 +218,28 @@ public void cancel() throws IOException {
 
   @Override
   public State getState() {
+    return getStateWithRetries(STATUS_POLLING_ATTEMPTS, Sleeper.DEFAULT);
+  }
+
+
+  /**
+   * Attempts to get the state. Uses exponential backoff on failure up to the maximum number
+   * of passed in attempts.
+   *
+   * @param attempts The amount of attempts to make.
+   * @param sleeper Object used to do the sleeps between attempts.
+   * @return The state of the job or State.UNKNOWN in case of failure.
+   */
+  @VisibleForTesting
+  State getStateWithRetries(int attempts, Sleeper sleeper) {
     if (terminalState != null) {
       return terminalState;
     }
     Job job = null;
-    for (int retryAttempts = 5; retryAttempts > 0; retryAttempts--) {
+    AttemptBoundedExponentialBackOff backoff =
+        new AttemptBoundedExponentialBackOff(attempts, STATUS_POLLING_INTERVAL);
+    boolean shouldRetry;
+    do {
       try {
         job = dataflowClient
             .v1b3()
@@ -197,9 +253,15 @@ public State getState() {
         }
         return currentState;
       } catch (IOException e) {
-        LOG.warn("There were problems getting current job status: ", e);
+        LOG.warn("There were problems getting current job status: {}.", e.getMessage());
+        LOG.debug("Exception information:", e);
       }
-    }
+      try {
+        shouldRetry = BackOffUtils.next(sleeper, backoff);
+      } catch (InterruptedException | IOException e) {
+        throw Throwables.propagate(e);
+      }
+    } while (shouldRetry);
     return State.UNKNOWN;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOff.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOff.java
new file mode 100644
index 0000000000000..f7c20a8e5695e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOff.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.util.BackOff;
+import com.google.api.client.util.NanoClock;
+import com.google.common.base.Preconditions;
+
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Extension of {@link AttemptBoundedExponentialBackOff} that bounds the total time that the backoff
+ * is happening as well as the amount of retries. Acts exactly as a AttemptBoundedExponentialBackOff
+ * unless the time interval has expired since the object was created. At this point, it will always
+ * return BackOff.STOP. Note that reset does not reset the timer.
+ * <p>
+ * Implementation is not thread-safe.
+ */
+public class AttemptAndTimeBoundedExponentialBackOff extends AttemptBoundedExponentialBackOff {
+  private long endTimeMillis;
+  private final NanoClock nanoClock;
+  // NanoClock.SYSTEM has a max elapsed time of 292 years or 2^63 ns.
+  private static final long MAX_ELAPSED_TIME_MILLIS = 1L << 53;
+
+  /**
+   * Constructs an instance of AttemptAndTimeBoundedExponentialBackoff.
+   *
+   * @param maximumNumberOfAttempts The maximum number of attempts it will make.
+   * @param initialIntervalMillis The original interval to wait between attempts in milliseconds.
+   * @param maximumTotalWaitTimeMillis The maximum total time that this object will
+   *    allow more attempts in milliseconds.
+   */
+  public AttemptAndTimeBoundedExponentialBackOff(
+      int maximumNumberOfAttempts, long initialIntervalMillis, long maximumTotalWaitTimeMillis) {
+    this(maximumNumberOfAttempts,
+        initialIntervalMillis,
+        maximumTotalWaitTimeMillis,
+        NanoClock.SYSTEM);
+  }
+
+  /**
+   * Constructs an instance of AttemptAndTimeBoundedExponentialBackoff.
+   *
+   * @param maximumNumberOfAttempts The maximum number of attempts it will make.
+   * @param initialIntervalMillis The original interval to wait between attempts in milliseconds.
+   * @param maximumTotalWaitTimeMillis The maximum total time that this object will
+   *    allow more attempts in milliseconds.
+   * @param nanoClock clock used to measure the time that has passed.
+   */
+  public AttemptAndTimeBoundedExponentialBackOff(
+      int maximumNumberOfAttempts, long initialIntervalMillis,
+      long maximumTotalWaitTimeMillis, NanoClock nanoClock) {
+    super(maximumNumberOfAttempts, initialIntervalMillis);
+    Preconditions.checkArgument(nanoClock != null, "NanoClock may not be null");
+    Preconditions.checkArgument(maximumTotalWaitTimeMillis > 0,
+        "Maximum total wait time must be greater than zero.");
+    Preconditions.checkArgument(maximumTotalWaitTimeMillis < MAX_ELAPSED_TIME_MILLIS,
+        "Maximum total wait time must be less than " + MAX_ELAPSED_TIME_MILLIS + " milliseconds");
+    this.nanoClock = nanoClock;
+    endTimeMillis = getTimeMillis() + maximumTotalWaitTimeMillis;
+  }
+
+  public void setEndtimeMillis(long endTimeMillis) {
+    this.endTimeMillis = endTimeMillis;
+  }
+
+  @Override
+  public long nextBackOffMillis() {
+    if (atMaxAttempts()) {
+      return BackOff.STOP;
+    }
+    long backoff = Math.min(super.nextBackOffMillis(), endTimeMillis - getTimeMillis());
+    return (backoff > 0 ? backoff : BackOff.STOP);
+  }
+
+  private long getTimeMillis() {
+    return TimeUnit.NANOSECONDS.toMillis(nanoClock.nanoTime());
+  }
+
+  @Override
+  public boolean atMaxAttempts() {
+    return super.atMaxAttempts() || getTimeMillis() >= endTimeMillis;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java
index 9aa91ecf054b5..de0ee98442e68 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java
@@ -25,7 +25,6 @@
  * <p>
  * Example: The initial interval is .5 seconds and the maximum number of retries is 10.
  * For 10 tries the sequence will be (values in seconds):
- * </p>
  *
  * <pre>
    retry#      retry_interval     randomized_interval
@@ -43,7 +42,6 @@
  *
  * <p>
  * Implementation is not thread-safe.
- * </p>
  */
 public class AttemptBoundedExponentialBackOff implements BackOff {
   public static final double DEFAULT_MULTIPLIER = 1.5;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java
index aaca229cd3f90..3c88a8d3ddd19 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java
@@ -15,16 +15,25 @@
  */
 package com.google.cloud.dataflow.sdk.runners;
 
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.allOf;
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
+import static org.hamcrest.Matchers.lessThanOrEqualTo;
 import static org.junit.Assert.assertEquals;
 import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
 import com.google.api.services.dataflow.Dataflow;
+import com.google.api.services.dataflow.Dataflow.V1b3.Projects.Jobs.Messages;
 import com.google.api.services.dataflow.model.Job;
 import com.google.cloud.dataflow.sdk.PipelineResult.State;
+import com.google.cloud.dataflow.sdk.testing.FastNanoClockAndSleeper;
+import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
+import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
 
 import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -32,6 +41,7 @@
 import org.mockito.MockitoAnnotations;
 
 import java.io.IOException;
+import java.net.SocketTimeoutException;
 import java.util.concurrent.TimeUnit;
 
 /**
@@ -50,6 +60,9 @@ public class DataflowPipelineJobTest {
   private Dataflow.V1b3.Projects mockProjects;
   @Mock
   private Dataflow.V1b3.Projects.Jobs mockJobs;
+  @Rule
+  public FastNanoClockAndSleeper fastClock = new FastNanoClockAndSleeper();
+
 
   @Before
   public void setup() {
@@ -60,8 +73,57 @@ public void setup() {
     when(mockProjects.jobs()).thenReturn(mockJobs);
   }
 
+/**
+   * Validates that a given time is valid for the total time slept by a
+   * AttemptBoundedExponentialBackOff given the number of retries and
+   * an initial polling interval.
+   *
+   * @param pollingIntervalMillis The initial polling interval given.
+   * @param attempts The number of attempts made
+   * @param timeSleptMillis The amount of time slept by the clock. This is checked
+   * against the valid interval.
+   */
+  void checkValidInterval(long pollingIntervalMillis, int attempts, long timeSleptMillis){
+    long highSum = 0;
+    long lowSum = 0;
+    for (int i = 1; i < attempts; i++) {
+      double currentInterval = pollingIntervalMillis
+          * Math.pow(AttemptBoundedExponentialBackOff.DEFAULT_MULTIPLIER, i - 1);
+      double offset = AttemptBoundedExponentialBackOff.DEFAULT_RANDOMIZATION_FACTOR
+          * currentInterval;
+      highSum += Math.round(currentInterval + offset);
+      lowSum += Math.round(currentInterval - offset);
+    }
+    assertThat(timeSleptMillis, allOf(greaterThanOrEqualTo(lowSum), lessThanOrEqualTo(highSum)));
+  }
+
+  @Test
+  public void testWaitToFinishMessagesFail() throws Exception {
+    Dataflow.V1b3.Projects.Jobs.Get statusRequest = mock(Dataflow.V1b3.Projects.Jobs.Get.class);
+
+    Job statusResponse = new Job();
+    statusResponse.setCurrentState("JOB_STATE_" + State.DONE.name());
+    when(mockJobs.get(eq(PROJECT_ID), eq(JOB_ID)))
+        .thenReturn(statusRequest);
+    when(statusRequest.execute()).thenReturn(statusResponse);
+
+    MonitoringUtil.JobMessagesHandler jobHandler = mock(MonitoringUtil.JobMessagesHandler.class);
+    Dataflow.V1b3.Projects.Jobs.Messages mockMessages =
+        mock(Dataflow.V1b3.Projects.Jobs.Messages.class);
+    Messages.List listRequest = mock(Dataflow.V1b3.Projects.Jobs.Messages.List.class);
+    when(mockJobs.messages()).thenReturn(mockMessages);
+    when(mockMessages.list(eq(PROJECT_ID), eq(JOB_ID))).thenReturn(listRequest);
+    when(listRequest.execute()).thenThrow(SocketTimeoutException.class);
+
+    DataflowPipelineJob job = new DataflowPipelineJob(
+        PROJECT_ID, JOB_ID, mockWorkflowClient);
+
+    State state = job.waitToFinish(5, TimeUnit.MINUTES, jobHandler, fastClock, fastClock);
+    assertEquals(null, state);
+  }
+
   @Test
-  public void testWaitToFinish() throws IOException, InterruptedException {
+  public void testWaitToFinish() throws Exception {
     Dataflow.V1b3.Projects.Jobs.Get statusRequest = mock(Dataflow.V1b3.Projects.Jobs.Get.class);
 
     Job statusResponse = new Job();
@@ -74,12 +136,49 @@ public void testWaitToFinish() throws IOException, InterruptedException {
     DataflowPipelineJob job = new DataflowPipelineJob(
         PROJECT_ID, JOB_ID, mockWorkflowClient);
 
-    State state = job.waitToFinish(1, TimeUnit.MINUTES, null);
+    State state = job.waitToFinish(1, TimeUnit.MINUTES, null, fastClock, fastClock);
     assertEquals(State.DONE, state);
   }
 
   @Test
-  public void testGetStateReturnsServiceState() throws IOException {
+  public void testWaitToFinishFail() throws Exception {
+    Dataflow.V1b3.Projects.Jobs.Get statusRequest = mock(Dataflow.V1b3.Projects.Jobs.Get.class);
+
+    when(mockJobs.get(eq(PROJECT_ID), eq(JOB_ID)))
+        .thenReturn(statusRequest);
+    when(statusRequest.execute()).thenThrow(IOException.class);
+
+    DataflowPipelineJob job = new DataflowPipelineJob(
+        PROJECT_ID, JOB_ID, mockWorkflowClient);
+
+    long startTime = fastClock.nanoTime();
+    State state = job.waitToFinish(5, TimeUnit.MINUTES, null, fastClock, fastClock);
+    assertEquals(null, state);
+    long timeDiff = TimeUnit.NANOSECONDS.toMillis(fastClock.nanoTime() - startTime);
+    checkValidInterval(DataflowPipelineJob.MESSAGES_POLLING_INTERVAL,
+        DataflowPipelineJob.MESSAGES_POLLING_ATTEMPTS, timeDiff);
+  }
+
+  @Test
+  public void testWaitToFinishTimeFail() throws Exception {
+    Dataflow.V1b3.Projects.Jobs.Get statusRequest = mock(Dataflow.V1b3.Projects.Jobs.Get.class);
+
+    when(mockJobs.get(eq(PROJECT_ID), eq(JOB_ID)))
+        .thenReturn(statusRequest);
+    when(statusRequest.execute()).thenThrow(IOException.class);
+
+    DataflowPipelineJob job = new DataflowPipelineJob(
+        PROJECT_ID, JOB_ID, mockWorkflowClient);
+    long startTime = fastClock.nanoTime();
+    State state = job.waitToFinish(4, TimeUnit.MILLISECONDS, null, fastClock, fastClock);
+    assertEquals(null, state);
+    long timeDiff = TimeUnit.NANOSECONDS.toMillis(fastClock.nanoTime() - startTime);
+    // Should only sleep for the 4 ms remaining.
+    assertEquals(timeDiff, 4L);
+  }
+
+  @Test
+  public void testGetStateReturnsServiceState() throws Exception {
     Dataflow.V1b3.Projects.Jobs.Get statusRequest =
         mock(Dataflow.V1b3.Projects.Jobs.Get.class);
 
@@ -92,11 +191,12 @@ public void testGetStateReturnsServiceState() throws IOException {
     DataflowPipelineJob job =
         new DataflowPipelineJob(PROJECT_ID, JOB_ID, mockWorkflowClient);
 
-    assertEquals(State.RUNNING, job.getState());
+    assertEquals(State.RUNNING, job.getStateWithRetries(
+        DataflowPipelineJob.STATUS_POLLING_ATTEMPTS, fastClock));
   }
 
   @Test
-  public void testGetStateWithExceptionReturnsUnknown() throws IOException {
+  public void testGetStateWithExceptionReturnsUnknown() throws Exception {
     Dataflow.V1b3.Projects.Jobs.Get statusRequest =
         mock(Dataflow.V1b3.Projects.Jobs.Get.class);
 
@@ -106,7 +206,11 @@ public void testGetStateWithExceptionReturnsUnknown() throws IOException {
     DataflowPipelineJob job =
         new DataflowPipelineJob(PROJECT_ID, JOB_ID, mockWorkflowClient);
 
-    assertEquals(State.UNKNOWN, job.getState());
-
+    long startTime = fastClock.nanoTime();
+    assertEquals(State.UNKNOWN, job.getStateWithRetries(
+        DataflowPipelineJob.STATUS_POLLING_ATTEMPTS, fastClock));
+    long timeDiff = TimeUnit.NANOSECONDS.toMillis(fastClock.nanoTime() - startTime);
+    checkValidInterval(DataflowPipelineJob.STATUS_POLLING_INTERVAL,
+        DataflowPipelineJob.STATUS_POLLING_ATTEMPTS, timeDiff);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOffTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOffTest.java
new file mode 100644
index 0000000000000..003e6fb4a7572
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOffTest.java
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.allOf;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.lessThan;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import com.google.api.client.util.BackOff;
+import com.google.cloud.dataflow.sdk.testing.FastNanoClockAndSleeper;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Unit tests for {@link AttemptAndTimeBoundedExponentialBackOff}. */
+@RunWith(JUnit4.class)
+public class AttemptAndTimeBoundedExponentialBackOffTest {
+  @Rule public ExpectedException exception = ExpectedException.none();
+  @Rule public FastNanoClockAndSleeper fastClock = new FastNanoClockAndSleeper();
+
+  @Test
+  public void testUsingInvalidInitialInterval() throws Exception {
+    exception.expect(IllegalArgumentException.class);
+    exception.expectMessage("Initial interval must be greater than zero.");
+    new AttemptAndTimeBoundedExponentialBackOff(10, 0L, 1000L);
+  }
+
+  @Test
+  public void testUsingInvalidTimeInterval() throws Exception {
+    exception.expect(IllegalArgumentException.class);
+    exception.expectMessage("Maximum total wait time must be greater than zero.");
+    new AttemptAndTimeBoundedExponentialBackOff(10, 2L, 0L);
+  }
+
+  @Test
+  public void testUsingInvalidMaximumNumberOfRetries() throws Exception {
+    exception.expect(IllegalArgumentException.class);
+    exception.expectMessage("Maximum number of attempts must be greater than zero.");
+    new AttemptAndTimeBoundedExponentialBackOff(-1, 10L, 1000L);
+  }
+
+  @Test
+  public void testThatFixedNumberOfAttemptsExits() throws Exception {
+    BackOff backOff = new AttemptAndTimeBoundedExponentialBackOff(3, 500L, 1000L, fastClock);
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
+    assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
+  }
+
+  @Test
+  public void testThatResettingAllowsReuse() throws Exception {
+    AttemptBoundedExponentialBackOff backOff =
+        new AttemptAndTimeBoundedExponentialBackOff(3, 500, 1000L, fastClock);
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
+    assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
+    backOff.reset();
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
+    assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
+  }
+
+  @Test
+  public void testTimeBound() throws Exception {
+    AttemptBoundedExponentialBackOff backOff =
+        new AttemptAndTimeBoundedExponentialBackOff(3, 500L, 5L, fastClock);
+    assertEquals(backOff.nextBackOffMillis(), 5L);
+  }
+
+  @Test
+  public void testAtMaxAttempts() throws Exception {
+    AttemptBoundedExponentialBackOff backOff =
+        new AttemptAndTimeBoundedExponentialBackOff(3, 500L, 1000L, fastClock);
+    assertFalse(backOff.atMaxAttempts());
+    backOff.nextBackOffMillis();
+    assertFalse(backOff.atMaxAttempts());
+    backOff.nextBackOffMillis();
+    assertTrue(backOff.atMaxAttempts());
+    assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
+  }
+
+  @Test
+  public void testAtMaxTime() throws Exception {
+    AttemptBoundedExponentialBackOff backOff =
+        new AttemptAndTimeBoundedExponentialBackOff(3, 500L, 1L, fastClock);
+    fastClock.sleep(2);
+    assertTrue(backOff.atMaxAttempts());
+    assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
+  }
+}

From ee36c404fb1ad7781960f2b67ef0034d8867a84e Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Fri, 17 Apr 2015 14:22:34 -0700
Subject: [PATCH 0459/1541] StreamingWordExtract: sets up and tears down the
 pipeline automatically with DataflowExampleUtils.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91440063
---
 .../examples/StreamingWordExtract.java        | 91 ++++++++++++-------
 .../dataflow/examples/TrafficMaxLaneFlow.java |  2 +-
 .../dataflow/examples/TrafficRoutes.java      |  2 +-
 3 files changed, 59 insertions(+), 36 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java b/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java
index 4378795108d02..07ea40201768b 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java
@@ -19,31 +19,45 @@
 import com.google.api.services.bigquery.model.TableFieldSchema;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
+import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
 import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 
+import java.io.IOException;
 import java.util.ArrayList;
 
 /**
  * A streaming Dataflow Example using BigQuery output.
  *
- * <p> This pipeline example lines of text from a PubSub topic, splits each line
+ * <p> This pipeline example reads lines of text from a PubSub topic, splits each line
  * into individual words, capitalizes those words, and writes the output to
- * a BigQuery table. </p>
+ * a BigQuery table.
  *
- * <p> To run this example using the Dataflow service, you must provide an input
- * pubsub topic and an output BigQuery table, using the {@literal --inputTopic}
- * {@literal --dataset} and {@literal --table} options. Since this is a streaming
- * pipeline that never completes, select the non-blocking pipeline runner
- * {@literal --runner=DataflowPipelineRunner}.
+ * <p> By default, the example will run a separate pipeline to inject the data from the default
+ * {@literal --inputFile} to the Pub/Sub {@literal --pubsubTopic}. It will make it available for
+ * the streaming pipeline to process. You may override the default {@literal --inputFile} with the
+ * file of your choosing. You may also set {@literal --inputFile} to an empty string, which will
+ * disable the automatic Pub/Sub injection, and allow you to use separate tool to control the input
+ * to this example.
+ *
+ * <p> The example is configured to use the default Pub/Sub topic and the default BigQuery table
+ * from the example common package (there are no defaults for a general Dataflow pipeline).
+ * You can override them by using the {@literal --pubsubTopic}, {@literal --bigQueryDataset}, and
+ * {@literal --bigQueryTable} options. If the Pub/Sub topic or the BigQuery table do not exist,
+ * the example will try to create them.
+ *
+ * <p> The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
+ * and then exits.
  */
 public class StreamingWordExtract {
 
@@ -99,50 +113,59 @@ static TableSchema getSchema() {
   }
 
   /**
-   * Command line parameter options.
+   * Options supported by {@link StreamingWordExtract}.
+   *
+   * <p> Inherits standard configuration options.
    */
-  private interface StreamingWordExtractOptions extends PipelineOptions {
-    @Description("Input Pubsub topic")
-    @Validation.Required
-    String getInputTopic();
-    void setInputTopic(String value);
-
-    @Description("BigQuery dataset name")
-    @Validation.Required
-    String getDataset();
-    void setDataset(String value);
-
-    @Description("BigQuery table name")
-    @Validation.Required
-    String getTable();
-    void setTable(String value);
+  private interface StreamingWordExtractOptions
+      extends ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
+    @Description("Input file to inject to Pub/Sub topic")
+    @Default.String("gs://dataflow-samples/shakespeare/kinglear.txt")
+    String getInputFile();
+    void setInputFile(String value);
   }
 
   /**
    * Sets up and starts streaming pipeline.
+   *
+   * @throws IOException if there is a problem setting up resources
    */
-  public static void main(String[] args) {
+  public static void main(String[] args) throws IOException {
     StreamingWordExtractOptions options = PipelineOptionsFactory.fromArgs(args)
         .withValidation()
         .as(StreamingWordExtractOptions.class);
-    DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
-    dataflowOptions.setStreaming(true);
+    options.setStreaming(true);
+    // In order to cancel the pipelines automatically,
+    // {@literal DataflowPipelineRunner} is forced to be used.
+    options.setRunner(DataflowPipelineRunner.class);
+
+    options.setBigQuerySchema(StringToRowConverter.getSchema());
+    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
+    dataflowUtils.setup();
 
     Pipeline pipeline = Pipeline.create(options);
 
     String tableSpec = new StringBuilder()
-        .append(dataflowOptions.getProject()).append(":")
-        .append(options.getDataset()).append(".")
-        .append(options.getTable())
+        .append(options.getProject()).append(":")
+        .append(options.getBigQueryDataset()).append(".")
+        .append(options.getBigQueryTable())
         .toString();
     pipeline
-        .apply(PubsubIO.Read.topic(options.getInputTopic()))
+        .apply(PubsubIO.Read.topic(options.getPubsubTopic()))
         .apply(ParDo.of(new ExtractWords()))
         .apply(ParDo.of(new Uppercase()))
         .apply(ParDo.of(new StringToRowConverter()))
         .apply(BigQueryIO.Write.to(tableSpec)
             .withSchema(StringToRowConverter.getSchema()));
 
-    pipeline.run();
+    PipelineResult result = pipeline.run();
+
+    if (!options.getInputFile().isEmpty()) {
+      // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
+      dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
+    }
+
+    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
+    dataflowUtils.waitToFinish(result);
   }
 }
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java
index 339c47d9e07aa..62274e49154b2 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java
@@ -79,7 +79,7 @@
  * <a href="https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher"></a>.
  *
  * <p> The example is configured to use the default Pub/Sub topic and the default BigQuery table
- * from the example common package (there is no defaults for a general Dataflow pipeline).
+ * from the example common package (there are no defaults for a general Dataflow pipeline).
  * You can override them by using the {@literal --pubsubTopic}, {@literal --bigQueryDataset}, and
  * {@literal --bigQueryTable} options. If the Pub/Sub topic or the BigQuery table do not exist,
  * the example will try to create them.
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java
index fb0e59a19c581..c2c3518fe5918 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java
@@ -84,7 +84,7 @@
  * <a href="https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher"></a>.
  *
  * <p> The example is configured to use the default Pub/Sub topic and the default BigQuery table
- * from the example common package (there is no defaults for a general Dataflow pipeline).
+ * from the example common package (there are no defaults for a general Dataflow pipeline).
  * You can override them by using the {@literal --pubsubTopic}, {@literal --bigQueryDataset}, and
  * {@literal --bigQueryTable} options. If the Pub/Sub topic or the BigQuery table do not exist,
  * the example will try to create them.

From 76f338393197ae19de5c6b52ccce3ea8420e4518 Mon Sep 17 00:00:00 2001
From: cushon <cushon@google.com>
Date: Fri, 17 Apr 2015 17:41:57 -0700
Subject: [PATCH 0460/1541] Remove type arguments from non-generic methods

The JLS [1] allows explicit type arguments to be specified when invoking
non-generic methods. This is occasionally useful when dealing with legacy
pre-generic code, but in practice it's usually a mistake. This change removes
the unneeded type arguments.

[1] https://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.12.2.1

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91456437
---
 .../cloud/dataflow/sdk/transforms/FlattenTest.java   |  4 ++--
 .../dataflow/sdk/transforms/GroupByKeyTest.java      |  4 ++--
 .../sdk/transforms/join/CoGroupByKeyTest.java        |  4 ++--
 .../sdk/transforms/windowing/WindowingTest.java      | 10 +++++-----
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java         | 12 ++++++------
 5 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
index be90e965b30f3..08e5ac9e3f6e1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
@@ -202,7 +202,7 @@ public void testEqualWindowFnPropagation() {
     p.run();
 
     Assert.assertTrue(output.getWindowingStrategy().getWindowFn().isCompatible(
-        FixedWindows.<String>of(Duration.standardMinutes(1))));
+        FixedWindows.of(Duration.standardMinutes(1))));
   }
 
   @Test
@@ -223,7 +223,7 @@ public void testCompatibleWindowFnPropagation() {
     p.run();
 
     Assert.assertTrue(output.getWindowingStrategy().getWindowFn().isCompatible(
-        Sessions.<String>withGapDuration(Duration.standardMinutes(2))));
+        Sessions.withGapDuration(Duration.standardMinutes(2))));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
index 577d028ffabe5..10c20a950d15f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
@@ -201,7 +201,7 @@ public void testIdentityWindowFnPropagation() {
     p.run();
 
     Assert.assertTrue(output.getWindowingStrategy().getWindowFn().isCompatible(
-        FixedWindows.<KV<String, Integer>>of(Duration.standardMinutes(1))));
+        FixedWindows.of(Duration.standardMinutes(1))));
   }
 
   @Test
@@ -225,7 +225,7 @@ public void testWindowFnInvalidation() {
         output.getWindowingStrategy().getWindowFn().isCompatible(
             new InvalidWindows(
                 "Invalid",
-                Sessions.<KV<String, Integer>>withGapDuration(
+                Sessions.withGapDuration(
                     Duration.standardMinutes(1)))));
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
index 5349380dcaf14..3094f2ab46d16 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
@@ -283,7 +283,7 @@ private PCollection<KV<Integer, CoGbkResult>> buildPurchasesCoGbkWithWindowing(
             idToClick,
             Arrays.asList(0L, 2L, 4L, 6L, 8L))
         .apply(Window.<KV<Integer, String>>into(
-            FixedWindows.<KV<Integer, String>>of(new Duration(4))));
+            FixedWindows.of(new Duration(4))));
 
     PCollection<KV<Integer, String>> purchasesTable =
         createInput(
@@ -291,7 +291,7 @@ private PCollection<KV<Integer, CoGbkResult>> buildPurchasesCoGbkWithWindowing(
             idToPurchases,
             Arrays.asList(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L))
         .apply(Window.<KV<Integer, String>>into(
-            FixedWindows.<KV<Integer, String>>of(new Duration(4))));
+            FixedWindows.of(new Duration(4))));
 
     PCollection<KV<Integer, CoGbkResult>> coGbkResults =
         KeyedPCollectionTuple.of(clicksTag, clicksTable)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
index 8e5539485324a..bcbf508bb73a7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
@@ -126,7 +126,7 @@ public void testNonPartitioningWindowing() {
     PCollection<String> output =
         input
         .apply(new WindowedCount(
-            SlidingWindows.<String>of(new Duration(10)).every(new Duration(5))));
+            SlidingWindows.of(new Duration(10)).every(new Duration(5))));
 
     DataflowAssert.that(output).containsInAnyOrder(
         output("a", 1, 1, -5, 5),
@@ -151,7 +151,7 @@ public void testMergingWindowing() {
 
     PCollection<String> output =
         input
-        .apply(new WindowedCount(Sessions.<String>withGapDuration(new Duration(10))));
+        .apply(new WindowedCount(Sessions.withGapDuration(new Duration(10))));
 
     DataflowAssert.that(output).containsInAnyOrder(
         output("a", 2, 1, 1, 15),
@@ -179,7 +179,7 @@ public void testWindowPreservation() {
     PCollection<String> output =
         input
         .apply(Flatten.<String>pCollections())
-        .apply(new WindowedCount(FixedWindows.<String>of(new Duration(5))));
+        .apply(new WindowedCount(FixedWindows.of(new Duration(5))));
 
     DataflowAssert.that(output).containsInAnyOrder(
         output("a", 2, 1, 0, 5),
@@ -233,7 +233,7 @@ public void testEmptyInput() {
 
     PCollection<String> output =
         input
-        .apply(new WindowedCount(FixedWindows.<String>of(new Duration(10))));
+        .apply(new WindowedCount(FixedWindows.of(new Duration(10))));
 
     DataflowAssert.that(output).containsInAnyOrder();
 
@@ -257,7 +257,7 @@ public void testTextIoInput() throws Exception {
     PCollection<String> output = p.begin()
         .apply(TextIO.Read.named("ReadLines").from(filename))
         .apply(ParDo.of(new ExtractWordsWithTimestampsFn()))
-        .apply(new WindowedCount(FixedWindows.<String>of(Duration.millis(10))));
+        .apply(new WindowedCount(FixedWindows.of(Duration.millis(10))));
 
     DataflowAssert.that(output).containsInAnyOrder(
         output("a", 1, 1, 0, 10),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index d25bb9366b89d..d98152a2a9513 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -63,7 +63,7 @@ public class GroupAlsoByWindowsDoFnTest {
   @Test public void testEmpty() throws Exception {
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
         KV<String, Iterable<String>>, List> runner =
-        makeRunner(WindowingStrategy.of(FixedWindows.<String>of(Duration.millis(10))));
+        makeRunner(WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
     runner.startBundle();
 
@@ -77,7 +77,7 @@ public class GroupAlsoByWindowsDoFnTest {
   @Test public void testFixedWindows() throws Exception {
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
         KV<String, Iterable<String>>, List> runner =
-        makeRunner(WindowingStrategy.of(FixedWindows.<String>of(Duration.millis(10))));
+        makeRunner(WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
     runner.startBundle();
 
@@ -121,7 +121,7 @@ public class GroupAlsoByWindowsDoFnTest {
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
         KV<String, Iterable<String>>, List> runner =
         makeRunner(WindowingStrategy.of(
-            SlidingWindows.<String>of(Duration.millis(20)).every(Duration.millis(10))));
+            SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))));
 
     runner.startBundle();
 
@@ -167,7 +167,7 @@ public class GroupAlsoByWindowsDoFnTest {
   @Test public void testDiscontiguousWindows() throws Exception {
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
         KV<String, Iterable<String>>, List> runner =
-        makeRunner(WindowingStrategy.of(FixedWindows.<String>of(Duration.millis(10))));
+        makeRunner(WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
     runner.startBundle();
 
@@ -210,7 +210,7 @@ public class GroupAlsoByWindowsDoFnTest {
   @Test public void testSessions() throws Exception {
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
         KV<String, Iterable<String>>, List> runner =
-        makeRunner(WindowingStrategy.of(Sessions.<String>withGapDuration(Duration.millis(10))));
+        makeRunner(WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))));
 
     runner.startBundle();
 
@@ -254,7 +254,7 @@ public class GroupAlsoByWindowsDoFnTest {
     CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
     DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>,
         KV<String, Long>, List> runner =
-        makeRunner(WindowingStrategy.of(Sessions.<String>withGapDuration(Duration.millis(10))),
+        makeRunner(WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))),
                    combineFn.<String>asKeyedFn());
     runner.startBundle();
 

From 254ec4c28417f66f83e5a95d8919ea5948431b86 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Fri, 17 Apr 2015 18:15:38 -0700
Subject: [PATCH 0461/1541] Uniquify names and add remerge to hot keys
 implementation. ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=91458662

---
 .../dataflow/sdk/transforms/Combine.java      | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 4723faada5de9..255a094732b6b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -28,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
@@ -1550,11 +1551,13 @@ public Coder<VO> getDefaultOutputCoder(
       final TupleTag<KV<KV<K, Integer>, VI>> hot = new TupleTag<>();
       final TupleTag<KV<K, VI>> cold = new TupleTag<>();
       PCollectionTuple split = input.apply(
-          ParDo.of(new DoFn<KV<K, VI>, KV<K, VI>>() {
+          ParDo.of(
+              new DoFn<KV<K, VI>, KV<K, VI>>() {
                 transient int counter;
                 @Override
                 public void startBundle(Context c) {
-                  counter = ThreadLocalRandom.current().nextInt();
+                  counter = ThreadLocalRandom.current().nextInt(
+                      Integer.MAX_VALUE);
                 }
 
                 @Override
@@ -1569,26 +1572,28 @@ public void processElement(ProcessContext c) {
                   }
                 }
               })
-          .withOutputTags(cold, TupleTagList.of(hot)));
+          .withOutputTags(cold, TupleTagList.of(hot))
+          .withName("AddNonce"));
 
       // Combine the hot and cold keys separately.
       PCollection<KV<K, VO>> combinedHot = split
           .get(hot)
           .setCoder(KvCoder.of(KvCoder.of(inputCoder.getKeyCoder(), VarIntCoder.of()),
                                inputCoder.getValueCoder()))
-          .apply(Combine.perKey(hotPreCombine))
+          .apply(Combine.perKey(hotPreCombine).withName("PreCombineHot"))
           .apply(ParDo.of(
               new DoFn<KV<KV<K, Integer>, VA>, KV<K, VA>>() {
                 @Override
                 public void processElement(ProcessContext c) {
                   c.output(KV.of(c.element().getKey().getKey(), c.element().getValue()));
                 }
-              }))
-          .apply(Combine.perKey(hotPostCombine));
+              }).withName("StripNonce"))
+          .apply(Window.<KV<K, VA>>remerge())
+          .apply(Combine.perKey(hotPostCombine).withName("PostCombineHot"));
       PCollection<KV<K, VO>> combinedCold = split
           .get(cold)
           .setCoder(inputCoder)
-          .apply(Combine.perKey(fn));
+          .apply(Combine.perKey(fn).withName("CombineCold"));
 
       // Return the union of the hot and cold key results.
       return PCollectionList.of(combinedHot).and(combinedCold)

From 82e5e9ae4e87cdd297d558fc462834082cc7529f Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Sat, 18 Apr 2015 01:10:07 -0700
Subject: [PATCH 0462/1541] Improve caching in StreamingModeExecutionContext.

Also update it to automatically do the reads needed to prevent blind
store/remove on tags, and blind deletes on tag lists.

Update WindowSets and StreamingSideInputDoFnRunner to read from multiple
tag lists when possible, to reduce the round trips.

With this CL, WindowSet's no longer keep track of the minimum timestamp
of elements in the active windows or store keyed state with timestamps.
Rather, the trigger executor automatically tracks that regardless of
the window set being used, and ensures that the watermark is held
appropriately.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91473272
---
 .../cloud/dataflow/sdk/transforms/DoFn.java   |   8 +-
 .../sdk/transforms/windowing/Trigger.java     |   1 -
 .../dataflow/sdk/util/AbstractWindowSet.java  |  10 +-
 .../sdk/util/BatchModeExecutionContext.java   |  73 ++--
 .../dataflow/sdk/util/BufferingWindowSet.java |  49 +--
 .../dataflow/sdk/util/CombiningWindowSet.java |  60 +--
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |  18 +-
 .../dataflow/sdk/util/ExecutionContext.java   |  31 +-
 .../dataflow/sdk/util/KeyedStateCache.java    | 327 ++++++++++++++++
 .../sdk/util/PartitionBufferingWindowSet.java |  24 +-
 .../cloud/dataflow/sdk/util/StateFetcher.java | 139 ++++---
 .../util/StreamingModeExecutionContext.java   | 198 ++++------
 .../util/StreamingSideInputDoFnRunner.java    |  29 +-
 .../dataflow/sdk/util/TriggerExecutor.java    |  99 ++++-
 .../dataflow/sdk/util/TriggerTester.java      |  65 +++-
 .../dataflow/sdk/util/WindowingInternals.java |  32 +-
 .../dataflow/sdk/values/CodedTupleTagMap.java |  14 +-
 .../dataflow/sdk/values/TimestampedValue.java |  10 +
 .../worker/StreamingDataflowWorkerTest.java   |  59 ++-
 .../transforms/windowing/AfterEachTest.java   |   4 +-
 .../transforms/windowing/RepeatedlyTest.java  |   4 +-
 .../sdk/util/KeyedStateCacheTest.java         | 366 ++++++++++++++++++
 .../dataflow/sdk/util/StateFetcherTest.java   |  23 +-
 .../StreamingSideInputDoFnRunnerTest.java     |  41 +-
 24 files changed, 1278 insertions(+), 406 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedStateCache.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/KeyedStateCacheTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 7e21d32583618..b11100ef5c574 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -32,7 +32,6 @@
 
 import java.io.IOException;
 import java.io.Serializable;
-import java.util.List;
 
 /**
  * The argument to {@link ParDo} providing the code to use to process
@@ -298,8 +297,7 @@ public interface RequiresWindowAccess {}
   @Experimental
   public interface KeyedState {
     /**
-     * Updates this {@code KeyedState} in place so that the given tag
-     * maps to the given value.
+     * Updates this {@code KeyedState} in place so that the given tag maps to the given value.
      *
      * @throws IOException if encoding the given value fails
      */
@@ -315,7 +313,7 @@ public interface KeyedState {
      * {@code KeyedState}, or {@code null} if the tag has no asssociated
      * value.
      *
-     * <p> See {@link #lookup(List)} to look up multiple tags at
+     * <p> See {@link #lookup(Iterable)} to look up multiple tags at
      * once.  It is significantly more efficient to look up multiple
      * tags all at once rather than one at a time.
      *
@@ -333,7 +331,7 @@ public interface KeyedState {
      *
      * @throws CoderException if decoding any of the requested values fails
      */
-    public CodedTupleTagMap lookup(List<? extends CodedTupleTag<?>> tags) throws IOException;
+    public CodedTupleTagMap lookup(Iterable<? extends CodedTupleTag<?>> tags) throws IOException;
   }
 
   /////////////////////////////////////////////////////////////////////////////
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 01c8196e9b88c..c8a9e80367e01 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -502,7 +502,6 @@ public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exce
 
     @Override
     public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
-
       if (e.isForCurrentLayer()) {
         throw new IllegalStateException("Until shouldn't receive any timers.");
       } else if (e.getChildIndex() == ACTUAL) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
index 454a8765b09a2..0b60db3ec2503 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
@@ -20,9 +20,6 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
-
-import org.joda.time.Instant;
 
 import java.io.Serializable;
 import java.util.Collection;
@@ -68,12 +65,11 @@ protected AbstractWindowSet(
   protected abstract Collection<W> windows();
 
   /**
-   * Returns the final value of the elements in the given window, as well
-   * as the minimum timestamp of all the elements that were placed in the window.
+   * Returns the final value of the elements in the given window.
    *
    * <p> Returns null if the window does not exist in the set.
    */
-  protected abstract TimestampedValue<VO> finalValue(W window) throws Exception;
+  protected abstract VO finalValue(W window) throws Exception;
 
   /**
    * Adds the given value in the given window to the set.
@@ -82,7 +78,7 @@ protected AbstractWindowSet(
    * If not, adds the window to the set first, then puts the element
    * in the window.
    */
-  protected abstract WindowStatus put(W window, VI value, Instant timestamp) throws Exception;
+  protected abstract WindowStatus put(W window, VI value) throws Exception;
 
   /**
    * Removes the given window from the set.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
index 61f04af1ec2cf..8f27d2d770c27 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
@@ -22,18 +22,20 @@
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Function;
 import com.google.common.base.Predicate;
+import com.google.common.collect.FluentIterable;
 import com.google.common.collect.Iterables;
 
 import org.joda.time.Instant;
 
+import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.TreeMap;
 
 /**
  * {@link ExecutionContext} for use in batch mode.
@@ -123,7 +125,7 @@ public boolean apply(WindowedValue<?> element) {
    */
   class StepContext extends ExecutionContext.StepContext {
     private Map<Object, Map<CodedTupleTag<?>, Object>> state = new HashMap<>();
-    private Map<Object, Map<CodedTupleTag<?>, Map<Instant, List<TimestampedValue>>>> tagLists =
+    private Map<Object, Map<CodedTupleTag<?>, List<?>>> tagLists =
         new HashMap<>();
 
     StepContext(String stepName) {
@@ -131,7 +133,8 @@ class StepContext extends ExecutionContext.StepContext {
     }
 
     @Override
-    public <T> void store(CodedTupleTag<T> tag, T value) {
+    public <T> void store(CodedTupleTag<T> tag, T value, Instant timestamp) {
+      // We never read the timestamp, and batch doesn't need it. So don't store it.
       Map<CodedTupleTag<?>, Object> perKeyState = state.get(getKey());
       if (perKeyState == null) {
         perKeyState = new HashMap<>();
@@ -149,56 +152,70 @@ public <T> void remove(CodedTupleTag<T> tag) {
     }
 
     @Override
-    public CodedTupleTagMap lookup(List<? extends CodedTupleTag<?>> tags) {
+    public CodedTupleTagMap lookup(Iterable<? extends CodedTupleTag<?>> tags) {
       Map<CodedTupleTag<?>, Object> perKeyState = state.get(getKey());
+      if (perKeyState == null) {
+        return CodedTupleTagMap.empty();
+      }
+
       Map<CodedTupleTag<?>, Object> map = new HashMap<>();
-      if (perKeyState != null) {
-        for (CodedTupleTag<?> tag : tags) {
-          map.put(tag, perKeyState.get(tag));
-        }
+      for (CodedTupleTag<?> tag : tags) {
+        map.put(tag, perKeyState.get(tag));
       }
       return CodedTupleTagMap.of(map);
     }
 
     @Override
     public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp) {
-      Map<CodedTupleTag<?>, Map<Instant, List<TimestampedValue>>> perKeyTagLists =
-          tagLists.get(getKey());
+      // We never read the timestamp, and batch doesn't need it. So don't store it.
+      Map<CodedTupleTag<?>, List<?>> perKeyTagLists = tagLists.get(getKey());
       if (perKeyTagLists == null) {
         perKeyTagLists = new HashMap<>();
         tagLists.put(getKey(), perKeyTagLists);
       }
-      Map<Instant, List<TimestampedValue>> tagList = perKeyTagLists.get(tag);
+      @SuppressWarnings("unchecked")
+      List<T> tagList = (List<T>) perKeyTagLists.get(tag);
       if (tagList == null) {
-        tagList = new TreeMap<>();
+        tagList = new ArrayList<>();
         perKeyTagLists.put(tag, tagList);
       }
-      List<TimestampedValue> timestampList = tagList.get(timestamp);
-      if (timestampList == null) {
-        timestampList = new ArrayList<>();
-        tagList.put(timestamp, timestampList);
-      }
-      timestampList.add(TimestampedValue.of(value, timestamp));
+
+      tagList.add(value);
     }
 
     @Override
     public <T> void deleteTagList(CodedTupleTag<T> tag) {
-      Map<CodedTupleTag<?>, Map<Instant, List<TimestampedValue>>> perKeyTagLists =
-          tagLists.get(getKey());
+      Map<CodedTupleTag<?>, List<?>> perKeyTagLists = tagLists.get(getKey());
       if (perKeyTagLists != null) {
         perKeyTagLists.remove(tag);
       }
     }
 
     @Override
-    @SuppressWarnings("unchecked")
-    public <T> Iterable<TimestampedValue<T>> readTagList(CodedTupleTag<T> tag) {
-      Map<CodedTupleTag<?>, Map<Instant, List<TimestampedValue>>> perKeyTagLists =
-          tagLists.get(getKey());
-      if (perKeyTagLists == null || perKeyTagLists.get(tag) == null) {
-        return new ArrayList<TimestampedValue<T>>();
+    public <T> Iterable<T> readTagList(CodedTupleTag<T> tag) {
+      Map<CodedTupleTag<?>, List<?>> perKeyTagLists = tagLists.get(getKey());
+      if (perKeyTagLists == null) {
+        return Collections.emptyList();
+      }
+
+      @SuppressWarnings("unchecked")
+      List<T> list = (List<T>) perKeyTagLists.get(tag);
+      if (list == null) {
+        return Collections.emptyList();
       }
-      return Iterables.concat((Iterable) perKeyTagLists.get(tag).values());
+      return list;
+    }
+
+    @Override
+    public <T> Map<CodedTupleTag<T>, Iterable<T>> readTagLists(Iterable<CodedTupleTag<T>> tags)
+        throws IOException {
+      return FluentIterable.from(tags)
+          .toMap(new Function<CodedTupleTag<T>, Iterable<T>>() {
+            @Override
+            public Iterable<T> apply(CodedTupleTag<T> input) {
+              return readTagList(input);
+            }
+          });
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
index ea9e0fba5bdce..c9e2c9daeee35 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
@@ -25,11 +25,12 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.common.base.Function;
+import com.google.common.base.Functions;
+import com.google.common.base.Throwables;
+import com.google.common.collect.FluentIterable;
 
-import org.joda.time.Instant;
-
-import java.util.ArrayList;
+import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
@@ -100,10 +101,8 @@ protected BufferingWindowSet(
   }
 
   @Override
-  public WindowStatus put(W window, V value, Instant timestamp) throws Exception {
-    windowingInternals.writeToTagList(bufferTag(window, windowCoder, inputCoder),
-        value,
-        timestamp);
+  public WindowStatus put(W window, V value) throws Exception {
+    windowingInternals.writeToTagList(bufferTag(window, windowCoder, inputCoder), value);
 
     if (!mergeTree.containsKey(window)) {
       mergeTree.put(window, new HashSet<W>());
@@ -155,13 +154,11 @@ public boolean contains(W window) {
   }
 
   @Override
-  protected TimestampedValue<Iterable<V>> finalValue(W window) throws Exception {
+  protected Iterable<V> finalValue(W window) throws Exception {
     if (!contains(window)) {
       return null;
     }
 
-    List<V> toEmit = new ArrayList<>();
-    Instant minTimestamp = BoundedWindow.TIMESTAMP_MAX_VALUE;
     // This is the set of windows that we're currently emitting.
     Set<W> curWindows = new HashSet<>();
     curWindows.add(window);
@@ -176,18 +173,24 @@ protected TimestampedValue<Iterable<V>> finalValue(W window) throws Exception {
       }
     }
 
-    for (W curWindow : curWindows) {
-      Iterable<TimestampedValue<V>> items = windowingInternals.readTagList(
-          bufferTag(curWindow, windowCoder, inputCoder));
-      for (TimestampedValue<V> item : items) {
-        toEmit.add(item.getValue());
-        if (item.getTimestamp().isBefore(minTimestamp)) {
-          minTimestamp = item.getTimestamp();
-        }
-      }
-    }
-
-    return TimestampedValue.of((Iterable<V>) toEmit, minTimestamp);
+    List<CodedTupleTag<V>> bufferTags = FluentIterable.from(curWindows)
+        .transform(new Function<W, CodedTupleTag<V>>() {
+          @Override
+          public CodedTupleTag<V> apply(W input) {
+            try {
+              return bufferTag(input, windowCoder, inputCoder);
+            } catch (IOException e) {
+              throw Throwables.propagate(e);
+            }
+          }
+        })
+        .toList();
+
+    List<V> toEmit = FluentIterable
+        .from(windowingInternals.readTagList(bufferTags).values())
+        .transformAndConcat(Functions.<Iterable<V>>identity())
+        .toList();
+    return toEmit;
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
index 97d0ccf8a0f38..e18098fb06571 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
@@ -27,13 +27,9 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.common.base.Preconditions;
-import com.google.common.collect.Iterables;
 import com.google.common.collect.Iterators;
 
-import org.joda.time.Instant;
-
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
@@ -105,31 +101,23 @@ protected Collection<W> windows() {
   }
 
   @Override
-  protected TimestampedValue<VO> finalValue(W window) throws Exception {
-    TimestampedValue<VA> timestampedAccumulator = lookupAccumulator(window);
-
-    if (timestampedAccumulator == null) {
+  protected VO finalValue(W window) throws Exception {
+    VA accumulator = lookupAccumulator(window);
+    if (accumulator == null) {
       return null;
     }
 
-    return TimestampedValue.of(
-        combineFn.extractOutput(key, timestampedAccumulator.getValue()),
-        timestampedAccumulator.getTimestamp());
+    return combineFn.extractOutput(key, accumulator);
   }
 
   @Override
-  protected WindowStatus put(W window, VI value, Instant timestamp) throws Exception {
-    TimestampedValue<VA> timestampedAccumulator = lookupAccumulator(window);
-    if (timestampedAccumulator == null) {
-      storeAccumulator(
-          window, combineFn.addInput(key, combineFn.createAccumulator(key), value), timestamp);
+  protected WindowStatus put(W window, VI value) throws Exception {
+    VA accumulator = lookupAccumulator(window);
+    if (accumulator == null) {
+      storeAccumulator(window, combineFn.addInput(key, combineFn.createAccumulator(key), value));
       return WindowStatus.NEW;
     } else {
-      VA accumulator = timestampedAccumulator.getValue();
-      if (timestampedAccumulator.getTimestamp().isBefore(timestamp)) {
-        timestamp = timestampedAccumulator.getTimestamp();
-      }
-      storeAccumulator(window, combineFn.addInput(key, accumulator, value), timestamp);
+      storeAccumulator(window, combineFn.addInput(key, accumulator, value));
       return WindowStatus.EXISTING;
     }
   }
@@ -137,7 +125,7 @@ protected WindowStatus put(W window, VI value, Instant timestamp) throws Excepti
   @Override
   protected void remove(W window) throws Exception {
     if (contains(window)) {
-      windowingInternals.deleteTagList(accumulatorTag(window));
+      keyedState.remove(accumulatorTag(window));
       liveWindowsModified = liveWindows.remove(window);
     }
   }
@@ -145,19 +133,13 @@ protected void remove(W window) throws Exception {
   @Override
   protected void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
     List<VA> accumulators = Lists.newArrayList();
-    Instant minTimestamp = BoundedWindow.TIMESTAMP_MAX_VALUE;
     for (W window : toBeMerged) {
-      TimestampedValue<VA> timestampedAccumulator =
-          Preconditions.checkNotNull(lookupAccumulator(window));
-
-      accumulators.add(timestampedAccumulator.getValue());
-      if (timestampedAccumulator.getTimestamp().isBefore(minTimestamp)) {
-        minTimestamp = timestampedAccumulator.getTimestamp();
-      }
+      VA accumulator = Preconditions.checkNotNull(lookupAccumulator(window));
+      accumulators.add(accumulator);
       remove(window);
     }
     VA mergedAccumulator = combineFn.mergeAccumulators(key, accumulators);
-    storeAccumulator(mergeResult, mergedAccumulator, minTimestamp);
+    storeAccumulator(mergeResult, mergedAccumulator);
   }
 
   private CodedTupleTag<VA> accumulatorTag(W window) throws Exception {
@@ -165,21 +147,13 @@ private CodedTupleTag<VA> accumulatorTag(W window) throws Exception {
     return bufferTag(window, windowCoder, accumulatorCoder);
   }
 
-  private void storeAccumulator(W window, VA accumulator, Instant timestamp) throws Exception {
-    CodedTupleTag<VA> tag = accumulatorTag(window);
-    windowingInternals.readTagList(tag);
-    windowingInternals.deleteTagList(tag);
-    windowingInternals.writeToTagList(tag, accumulator, timestamp);
+  private void storeAccumulator(W window, VA accumulator) throws Exception {
+    keyedState.store(accumulatorTag(window), accumulator);
     liveWindowsModified = liveWindows.add(window);
   }
 
-  private TimestampedValue<VA> lookupAccumulator(W window) throws Exception {
-    CodedTupleTag<VA> tag = accumulatorTag(window);
-    Iterable<TimestampedValue<VA>> data = windowingInternals.readTagList(tag);
-    if (data == null || !data.iterator().hasNext()) {
-      return null;
-    }
-    return Iterables.getOnlyElement(data);
+  private VA lookupAccumulator(W window) throws Exception {
+    return keyedState.lookup(accumulatorTag(window));
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 0db2bd5621278..8e5bdcd2ddb34 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -36,7 +36,6 @@
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Throwables;
 import com.google.common.collect.Iterables;
@@ -539,9 +538,9 @@ public void outputWindowedValue(O output, Instant timestamp,
         }
 
         @Override
-        public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp)
+        public <T> void writeToTagList(CodedTupleTag<T> tag, T value)
             throws IOException {
-          context.stepContext.writeToTagList(tag, value, timestamp);
+          context.stepContext.writeToTagList(tag, value);
         }
 
         @Override
@@ -550,11 +549,17 @@ public <T> void deleteTagList(CodedTupleTag<T> tag) {
         }
 
         @Override
-        public <T> Iterable<TimestampedValue<T>> readTagList(CodedTupleTag<T> tag)
+        public <T> Iterable<T> readTagList(CodedTupleTag<T> tag)
             throws IOException {
           return context.stepContext.readTagList(tag);
         }
 
+        @Override
+        public <T> Map<CodedTupleTag<T>, Iterable<T>> readTagList(List<CodedTupleTag<T>> tags)
+            throws IOException {
+          return context.stepContext.readTagLists(tags);
+        }
+
         @Override
         public void setTimer(String timer, Instant timestamp, Trigger.TimeDomain domain) {
           context.stepContext.getExecutionContext().setTimer(timer, timestamp, domain);
@@ -581,6 +586,11 @@ public <T> void writePCollectionViewData(
               tag, data, IterableCoder.of(WindowedValue.getFullCoder(elemCoder, windowCoder)),
               window(), windowCoder);
         }
+
+        @Override
+        public <T> void store(CodedTupleTag<T> tag, T value, Instant timestamp) throws IOException {
+          context.stepContext.store(tag, value, timestamp);
+        }
       };
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
index b792aed3f6866..8db792aab9838 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
@@ -23,7 +23,6 @@
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
 import org.joda.time.Instant;
@@ -32,7 +31,6 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 
 /**
@@ -144,7 +142,13 @@ public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output) {
      *
      * @throws IOException if encoding the given value fails
      */
-    public abstract <T> void store(CodedTupleTag<T> tag, T value) throws IOException;
+    public abstract <T> void store(CodedTupleTag<T> tag, T value, Instant timestamp)
+        throws IOException;
+
+    @Override
+    public <T> void store(CodedTupleTag<T> tag, T value) throws IOException {
+      store(tag, value, BoundedWindow.TIMESTAMP_MAX_VALUE);
+    }
 
     /**
      * Loads the values from the per-{@link com.google.cloud.dataflow.sdk.transforms.DoFn},
@@ -152,7 +156,8 @@ public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output) {
      *
      * @throws IOException if decoding any of the requested values fails
      */
-    public abstract CodedTupleTagMap lookup(List<? extends CodedTupleTag<?>> tags)
+    @Override
+    public abstract CodedTupleTagMap lookup(Iterable<? extends CodedTupleTag<?>> tags)
         throws IOException;
 
     /**
@@ -161,6 +166,7 @@ public abstract CodedTupleTagMap lookup(List<? extends CodedTupleTag<?>> tags)
      *
      * @throws IOException if decoding the value fails
      */
+    @Override
     public <T> T lookup(CodedTupleTag<T> tag) throws IOException {
       return lookup(Arrays.asList(tag)).get(tag);
     }
@@ -171,6 +177,10 @@ public <T> T lookup(CodedTupleTag<T> tag) throws IOException {
      *
      * @throws IOException if encoding the given value fails
      */
+    public <T> void writeToTagList(CodedTupleTag<T> tag, T value) throws IOException {
+      writeToTagList(tag, value, BoundedWindow.TIMESTAMP_MAX_VALUE);
+    }
+
     public abstract <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp)
         throws IOException;
 
@@ -184,7 +194,16 @@ public abstract <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant t
      *
      * @throws IOException if decoding any of the requested values fails
      */
-    public abstract <T> Iterable<TimestampedValue<T>> readTagList(CodedTupleTag<T> tag)
-        throws IOException;
+    public <T> Iterable<T> readTagList(CodedTupleTag<T> tag) throws IOException {
+      return readTagLists(Arrays.asList(tag)).get(tag);
+    }
+
+    /**
+     * Reads the elements of the list in stored state corresponding to the provided tag.
+     *
+     * @throws IOException if decoding any of the requested values fails
+     */
+    public abstract <T> Map<CodedTupleTag<T>, Iterable<T>> readTagLists(
+        Iterable<CodedTupleTag<T>> tags) throws IOException;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedStateCache.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedStateCache.java
new file mode 100644
index 0000000000000..8275c34aa7762
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedStateCache.java
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.common.base.Optional;
+import com.google.common.base.Predicate;
+import com.google.common.base.Throwables;
+import com.google.common.cache.LoadingCache;
+import com.google.common.collect.FluentIterable;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+import com.google.protobuf.ByteString;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * A write-back cache for the tag and tag list state computed during a given stage.
+ *
+ * <p>This does not synchronize changes across multiple threads or multiple workers.
+ */
+class KeyedStateCache {
+
+  private static final Predicate<TagListUpdates<?>> IS_DELETE_TAG_LIST =
+      new Predicate<TagListUpdates<?>>() {
+        @Override
+        public boolean apply(TagListUpdates<?> input) {
+          return input.isDelete;
+        }
+  };
+
+  private final LoadingCache<CodedTupleTag<?>, Optional<?>> tagCache;
+  private final Map<CodedTupleTag<?>, KeyedStateCache.TagUpdates<?>> localTagUpdates =
+      new LinkedHashMap<>();
+
+  private final LoadingCache<CodedTupleTag<?>, List<?>> tagListCache;
+  private final Map<CodedTupleTag<?>, KeyedStateCache.TagListUpdates<?>> localTagListUpdates =
+      new LinkedHashMap<>();
+
+  private String tagPrefix;
+
+  public KeyedStateCache(String tagPrefix,
+      LoadingCache<CodedTupleTag<?>, Optional<?>> tagCache,
+      LoadingCache<CodedTupleTag<?>, List<?>> tagListCache) {
+    this.tagPrefix = tagPrefix;
+    this.tagCache = tagCache;
+    this.tagListCache = tagListCache;
+  }
+
+  private <T> KeyedStateCache.TagUpdates<T> getOrCreateTagUpdate(CodedTupleTag<T> tag) {
+    @SuppressWarnings("unchecked")
+    KeyedStateCache.TagUpdates<T> update = (KeyedStateCache.TagUpdates<T>) localTagUpdates.get(tag);
+    if (update == null) {
+      update = new KeyedStateCache.TagUpdates<>();
+      localTagUpdates.put(tag, update);
+    }
+    return update;
+  }
+
+  private <T> KeyedStateCache.TagListUpdates<T> getOrCreateTagListUpdate(CodedTupleTag<T> tag) {
+    @SuppressWarnings("unchecked")
+    KeyedStateCache.TagListUpdates<T> update =
+        (KeyedStateCache.TagListUpdates<T>) localTagListUpdates.get(tag);
+    if (update == null) {
+      update = new KeyedStateCache.TagListUpdates<>();
+      localTagListUpdates.put(tag, update);
+    }
+    return update;
+  }
+
+  public void removeTags(CodedTupleTag<?>... tags) {
+    for (CodedTupleTag<?> tag : tags) {
+      getOrCreateTagUpdate(tag).markRemoved();
+    }
+  }
+
+  public <T> void store(CodedTupleTag<T> tag, T value, Instant timestamp) {
+    getOrCreateTagUpdate(tag).set(value, timestamp);
+  }
+
+  public Map<CodedTupleTag<?>, Object> lookupTags(Iterable<? extends CodedTupleTag<?>> tags)
+      throws IOException {
+    try {
+      ImmutableMap.Builder<CodedTupleTag<?>, Object> outputBuilder = ImmutableMap.builder();
+
+      // Figure out which tags can be fully satisfied with local data, and add them to the output.
+      // Other tags, will need to be looked up.
+      List<CodedTupleTag<?>> nonLocalTags = new ArrayList<>();
+      for (CodedTupleTag<?> tag : tags) {
+        TagUpdates<?> localUpdates = localTagUpdates.get(tag);
+        if (localUpdates != null) {
+          // ImmutableMap's can't hold null, so we just skip putting the value in if its null.
+          if (localUpdates.getUpdatedValue() != null) {
+            outputBuilder.put(tag, localUpdates.getUpdatedValue());
+          }
+        } else {
+          nonLocalTags.add(tag);
+        }
+      }
+
+      for (Map.Entry<CodedTupleTag<?>, Optional<?>> entry
+          : tagCache.getAll(nonLocalTags).entrySet()) {
+        if (entry.getValue().isPresent()) {
+          outputBuilder.put(entry.getKey(), entry.getValue().get());
+        }
+      }
+
+      return outputBuilder.build();
+    } catch (ExecutionException e) {
+      Throwables.propagateIfInstanceOf(e.getCause(), IOException.class);
+      throw Throwables.propagate(e.getCause());
+    }
+  }
+
+  public void removeTagLists(CodedTupleTag<?>... tagLists) {
+    for (CodedTupleTag<?> tagList : tagLists) {
+      getOrCreateTagListUpdate(tagList).markRemoved();
+    }
+  }
+
+  public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp) {
+    getOrCreateTagListUpdate(tag).add(value, timestamp);
+  }
+
+  public Map<CodedTupleTag<?>, Iterable<?>> readTagLists(Iterable<CodedTupleTag<?>> tags)
+      throws IOException  {
+    try {
+      ImmutableMap.Builder<CodedTupleTag<?>, Iterable<?>> outputBuilder = ImmutableMap.builder();
+
+      // Figure out which tags can be fully satisfied with local data, and add them to the output.
+      // Other tags, will need to be looked up.
+      List<CodedTupleTag<?>> nonDeletedTags = new ArrayList<>();
+      for (CodedTupleTag<?> tag : tags) {
+        TagListUpdates<?> localUpdates = localTagListUpdates.get(tag);
+        if (localUpdates != null && localUpdates.isDelete) {
+          // For locally deleted items, we don't need to do a lookup at all
+          outputBuilder.put(tag, localUpdates.getAddedItems());
+        } else {
+          nonDeletedTags.add(tag);
+        }
+      }
+
+      // For any non-deleted tag, look it up in the tagListCache, and build output by combining
+      ImmutableMap<CodedTupleTag<?>, List<?>> cachedContents = tagListCache.getAll(nonDeletedTags);
+      for (Map.Entry<CodedTupleTag<?>, List<?>> lookedUp : cachedContents.entrySet()) {
+        CodedTupleTag<?> tag = lookedUp.getKey();
+        TagListUpdates<?> localUpdates = localTagListUpdates.get(tag);
+        outputBuilder.put(tag, localUpdates == null
+            ? lookedUp.getValue() : localUpdates.mergeWith(lookedUp.getValue()));
+      }
+
+      return outputBuilder.build();
+    } catch (ExecutionException e) {
+      Throwables.propagateIfInstanceOf(e.getCause(), IOException.class);
+      throw Throwables.propagate(e.getCause());
+    }
+  }
+
+  public void flushTo(Windmill.WorkItemCommitRequest.Builder outputBuilder) throws IOException {
+    // Make sure that we've done lookups for the tag-writes, tag-deletes, and tag-list-deletes.
+    try {
+      tagCache.getAll(localTagUpdates.keySet());
+      tagListCache.getAll(Maps.filterValues(localTagListUpdates, IS_DELETE_TAG_LIST).keySet());
+    } catch (ExecutionException e) {
+      Throwables.propagateIfInstanceOf(e.getCause(), IOException.class);
+      throw Throwables.propagate(e.getCause());
+    }
+
+    // Flush the local tag and tag list updates to the commit request
+    for (Map.Entry<CodedTupleTag<?>, TagUpdates<?>> update : localTagUpdates.entrySet()) {
+      update.getValue().flushTo(update.getKey(), outputBuilder);
+    }
+
+    for (Map.Entry<CodedTupleTag<?>, TagListUpdates<?>> update : localTagListUpdates.entrySet()) {
+      update.getValue().flushTo(update.getKey(), outputBuilder);
+    }
+
+    // Clear the caches and local updates
+    tagCache.invalidateAll();
+    tagListCache.invalidateAll();
+    localTagUpdates.clear();
+    localTagListUpdates.clear();
+  }
+
+  private ByteString serializeTag(CodedTupleTag<?> tag) {
+    return ByteString.copyFromUtf8(tagPrefix + tag.getId());
+  }
+
+  private class TagUpdates<T> {
+    private T updatedValue;
+    private Instant updatedTimestamp;
+
+    boolean removed;
+
+    private void set(T newValue, Instant newTimestamp) {
+      removed = false;
+      updatedTimestamp = newTimestamp;
+      updatedValue = newValue;
+    }
+
+    public T getUpdatedValue() {
+      return updatedValue;
+    }
+
+    private void markRemoved() {
+      removed = true;
+      updatedTimestamp = BoundedWindow.TIMESTAMP_MAX_VALUE;
+      updatedValue = null;
+    }
+
+    private void flushTo(
+        CodedTupleTag<?> wildcardTag, Windmill.WorkItemCommitRequest.Builder outputBuilder)
+            throws CoderException, IOException {
+      Windmill.Value.Builder valueBuilder = outputBuilder.addValueUpdatesBuilder()
+          .setTag(serializeTag(wildcardTag))
+          .getValueBuilder();
+
+      if (removed) {
+        valueBuilder
+            .setTimestamp(Long.MAX_VALUE)
+            .setData(ByteString.EMPTY);
+      } else {
+        @SuppressWarnings("unchecked")
+        CodedTupleTag<T> tag = (CodedTupleTag<T>) wildcardTag;
+
+        ByteString.Output stream = ByteString.newOutput();
+        tag.getCoder().encode(updatedValue, stream, Coder.Context.OUTER);
+
+        valueBuilder
+            .setTimestamp(TimeUnit.MILLISECONDS.toMicros(updatedTimestamp.getMillis()))
+            .setData(stream.toByteString());
+      }
+    }
+  }
+
+  private class TagListUpdates<T> {
+    boolean isDelete = false;
+    List<TimestampedValue<T>> added = new ArrayList<>();
+
+    private void markRemoved() {
+      isDelete = true;
+      added.clear();
+    }
+
+
+    private void add(T value, Instant timestamp) {
+      added.add(TimestampedValue.of(value, timestamp));
+    }
+
+    private Iterable<T> getAddedItems() {
+      return FluentIterable.from(added).transform(TimestampedValue.<T>valueFunction()).toList();
+    }
+
+    public List<T> mergeWith(List<?> wildcardValue) {
+      @SuppressWarnings("unchecked")
+      List<T> value = (List<T>) wildcardValue;
+      return ImmutableList.<T>builder().addAll(value).addAll(getAddedItems()).build();
+    }
+
+    private void flushTo(
+        CodedTupleTag<?> wildcardTag, Windmill.WorkItemCommitRequest.Builder outputBuilder)
+            throws IOException {
+      // First do the delete, if necessary and there were previously elements
+      try {
+        if (isDelete && tagListCache.get(wildcardTag).size() > 0) {
+          outputBuilder.addListUpdatesBuilder()
+              .setTag(serializeTag(wildcardTag))
+              .setEndTimestamp(Long.MAX_VALUE);
+        }
+      } catch (ExecutionException e) {
+        Throwables.propagateIfInstanceOf(e.getCause(), IOException.class);
+        throw Throwables.propagate(e.getCause());
+      }
+
+      // Then, add all the elements
+      if (added.size() > 0) {
+        @SuppressWarnings("unchecked")
+        CodedTupleTag<T> tag = (CodedTupleTag<T>) wildcardTag;
+
+        Windmill.TagList.Builder listBuilder = outputBuilder.addListUpdatesBuilder()
+            .setTag(serializeTag(wildcardTag));
+        for (TimestampedValue<T> value : added) {
+          ByteString.Output stream = ByteString.newOutput();
+
+          // Windmill does not support empty data for tag list state; prepend a zero byte.
+          byte[] zero = {0x0};
+          stream.write(zero);
+
+          // Encode the value
+          tag.getCoder().encode(value.getValue(), stream, Coder.Context.OUTER);
+
+          listBuilder.addValuesBuilder()
+              .setData(stream.toByteString())
+              .setTimestamp(TimeUnit.MILLISECONDS.toMicros(value.getTimestamp().getMillis()));
+        }
+      }
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
index bef199ebd54aa..f25b9c56f4c9a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
@@ -23,11 +23,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
-import com.google.common.base.Function;
-import com.google.common.collect.Iterables;
-
-import org.joda.time.Instant;
 
 import java.util.Collection;
 
@@ -68,9 +63,8 @@ private PartitionBufferingWindowSet(
   }
 
   @Override
-  public WindowStatus put(W window, V value, Instant timestamp) throws Exception {
-    windowingInternals.writeToTagList(
-        bufferTag(window, windowCoder, inputCoder), value, timestamp);
+  public WindowStatus put(W window, V value) throws Exception {
+    windowingInternals.writeToTagList(bufferTag(window, windowCoder, inputCoder), value);
 
     // Adds the window even if it is already present, relying on the streaming backend to
     // de-duplicate. As such, we don't know if this was a genuinely new window.
@@ -98,21 +92,13 @@ public boolean contains(W window) {
   }
 
   @Override
-  protected TimestampedValue<Iterable<V>> finalValue(W window) throws Exception {
+  protected Iterable<V> finalValue(W window) throws Exception {
     CodedTupleTag<V> tag = bufferTag(window, windowCoder, inputCoder);
-    Iterable<TimestampedValue<V>> result = windowingInternals.readTagList(tag);
+    Iterable<V> result = windowingInternals.readTagList(tag);
     if (result == null) {
       return null;
     }
 
-    Instant timestamp = result.iterator().next().getTimestamp();
-    return TimestampedValue.of(
-        Iterables.transform(result, new Function<TimestampedValue<V>, V>() {
-              @Override
-              public V apply(TimestampedValue<V> input) {
-                return input.getValue();
-              }
-            }),
-        timestamp);
+    return result;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
index 9d650360e7965..74c8825c45c0f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
@@ -24,15 +24,14 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Optional;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 import com.google.common.cache.Weigher;
+import com.google.common.collect.Iterables;
 import com.google.protobuf.ByteString;
 
-import org.joda.time.Instant;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -75,113 +74,129 @@ public StateFetcher(
     this.sideInputCache = sideInputCache;
   }
 
-  public Map<CodedTupleTag<?>, Object> fetch(
+  public Map<CodedTupleTag<?>, Optional<?>> fetch(
       String computation, ByteString key, long workToken, String prefix,
-      List<? extends CodedTupleTag<?>> tags) throws CoderException, IOException {
-    Map<CodedTupleTag<?>, Object> resultMap = new HashMap<>();
-    if (tags.isEmpty()) {
-      return resultMap;
+      Iterable<? extends CodedTupleTag<?>> tags) throws CoderException, IOException {
+    if (Iterables.isEmpty(tags)) {
+      return Collections.emptyMap();
     }
 
     Windmill.KeyedGetDataRequest.Builder requestBuilder = Windmill.KeyedGetDataRequest.newBuilder()
         .setKey(key)
         .setWorkToken(workToken);
 
+
     Map<ByteString, CodedTupleTag<?>> tagMap = new HashMap<>();
     for (CodedTupleTag<?> tag : tags) {
       ByteString tagString = ByteString.copyFromUtf8(prefix + tag.getId());
-      requestBuilder.addValuesToFetch(
-          Windmill.TagValue.newBuilder()
-          .setTag(tagString)
-          .build());
-      tagMap.put(tagString, tag);
+      if (tagMap.put(tagString, tag) == null) {
+        requestBuilder.addValuesToFetch(Windmill.TagValue.newBuilder().setTag(tagString).build());
+      }
     }
 
-    Windmill.GetDataResponse response = server.getData(
-        Windmill.GetDataRequest.newBuilder()
-        .addRequests(
-            Windmill.ComputationGetDataRequest.newBuilder()
-            .setComputationId(computation)
-            .addRequests(requestBuilder.build())
-            .build())
-        .build());
-
-    if (response.getDataCount() != 1
-        || !response.getData(0).getComputationId().equals(computation)
-        || response.getData(0).getDataCount() != 1
-        || !response.getData(0).getData(0).getKey().equals(key)) {
-      throw new IOException("Invalid data response, expected single computation and key");
-    }
-    Windmill.KeyedGetDataResponse keyResponse = response.getData(0).getData(0);
-    if (keyResponse.getFailed()) {
-      throw new StreamingDataflowWorker.KeyTokenInvalidException(key.toStringUtf8());
-    }
+    Map<CodedTupleTag<?>, Optional<?>> resultMap = new HashMap<>();
+    Windmill.KeyedGetDataResponse keyResponse = getResponse(computation, key, requestBuilder);
 
     for (Windmill.TagValue tv : keyResponse.getValuesList()) {
       CodedTupleTag<?> tag = tagMap.get(tv.getTag());
       if (tag != null) {
         if (tv.getValue().hasData() && !tv.getValue().getData().isEmpty()) {
-          resultMap.put(tag, tag.getCoder().decode(tv.getValue().getData().newInput(),
-                  Coder.Context.OUTER));
+          Object v = tag.getCoder().decode(tv.getValue().getData().newInput(), Coder.Context.OUTER);
+          resultMap.put(tag, Optional.of(v));
         } else {
-          resultMap.put(tag, null);
+          resultMap.put(tag, Optional.absent());
         }
       }
     }
 
+    for (CodedTupleTag<?> tag : tags) {
+      if (!resultMap.containsKey(tag)) {
+        resultMap.put(tag, Optional.absent());
+      }
+    }
+
     return resultMap;
   }
 
-  public <T> List<TimestampedValue<T>> fetchList(
-      String computation, ByteString key, long workToken, String prefix, CodedTupleTag<T> tag)
+  public Map<CodedTupleTag<?>, List<?>> fetchList(
+      String computation, ByteString key, long workToken, String prefix,
+      Iterable<? extends CodedTupleTag<?>> tags)
       throws IOException {
+    if (Iterables.isEmpty(tags)) {
+      return Collections.emptyMap();
+    }
+
+    Windmill.KeyedGetDataRequest.Builder requestBuilder = Windmill.KeyedGetDataRequest.newBuilder()
+        .setKey(key)
+        .setWorkToken(workToken);
 
-    ByteString tagString = ByteString.copyFromUtf8(prefix + tag.getId());
+    Map<ByteString, CodedTupleTag<?>> tagMap = new HashMap<>();
+    for (CodedTupleTag<?> tag : tags) {
+      ByteString tagString = ByteString.copyFromUtf8(prefix + tag.getId());
+      if (tagMap.put(tagString, tag) == null) {
+        requestBuilder.addListsToFetch(Windmill.TagList.newBuilder()
+            .setTag(tagString)
+            .setEndTimestamp(Long.MAX_VALUE)
+            .build());
+      }
+    }
+
+    Map<CodedTupleTag<?>, List<?>> resultMap = new HashMap<>();
+    Windmill.KeyedGetDataResponse keyResponse = getResponse(computation, key, requestBuilder);
+    for (Windmill.TagList tagList : keyResponse.getListsList()) {
+      CodedTupleTag<?> tag = tagMap.get(tagList.getTag());
+      resultMap.put(tag, decodeTagList(tag, tagList));
+    }
+
+    return resultMap;
+  }
+
+  private Windmill.KeyedGetDataResponse getResponse(
+      String computation, ByteString key,
+      Windmill.KeyedGetDataRequest.Builder requestBuilder) throws IOException {
     Windmill.GetDataRequest request = Windmill.GetDataRequest.newBuilder()
         .addRequests(
             Windmill.ComputationGetDataRequest.newBuilder()
             .setComputationId(computation)
-            .addRequests(
-                Windmill.KeyedGetDataRequest.newBuilder()
-                .setKey(key)
-                .setWorkToken(workToken)
-                .addListsToFetch(
-                    Windmill.TagList.newBuilder()
-                    .setTag(tagString)
-                    .setEndTimestamp(Long.MAX_VALUE)
-                    .build())
-                .build())
+            .addRequests(requestBuilder.build())
             .build())
         .build();
-
     Windmill.GetDataResponse response = server.getData(request);
 
     if (response.getDataCount() != 1
         || !response.getData(0).getComputationId().equals(computation)
-        || response.getData(0).getDataCount() != 1
-        || !response.getData(0).getData(0).getKey().equals(key)) {
-      throw new IOException("Invalid data response, expected single computation and key\n");
+        || response.getData(0).getDataCount() != 1) {
+      throw new IOException("Invalid data response, expected single computation and key.");
     }
 
     Windmill.KeyedGetDataResponse keyResponse = response.getData(0).getData(0);
+    if (!keyResponse.getKey().equals(key)) {
+      throw new IOException("Invalid data response, expected key "
+          + key.toStringUtf8() + " but got " + keyResponse.getKey().toStringUtf8());
+    }
+
     if (keyResponse.getFailed()) {
       throw new StreamingDataflowWorker.KeyTokenInvalidException(key.toStringUtf8());
     }
-    if (keyResponse.getListsCount() != 1
-        || !keyResponse.getLists(0).getTag().equals(tagString)) {
-      throw new IOException("Expected single list for tag " + tagString);
+    return keyResponse;
+  }
+
+  private <T> List<T> decodeTagList(CodedTupleTag<T> tag, Windmill.TagList tagList)
+      throws IOException {
+    if (tag == null) {
+      throw new IOException("Unexpected tag list for tag: " + tagList.getTag());
     }
-    Windmill.TagList tagList = keyResponse.getLists(0);
-    List<TimestampedValue<T>> result = new ArrayList<>();
+
+    List<T> valueList = new ArrayList<>();
     for (Windmill.Value value : tagList.getValuesList()) {
-      result.add(TimestampedValue.of(
+      if (value.hasData() && !value.getData().isEmpty()) {
+        valueList.add(
           // Drop the first byte of the data; it's the zero byte we prepended to avoid writing
           // empty data.
-          tag.getCoder().decode(value.getData().substring(1).newInput(), Coder.Context.OUTER),
-          new Instant(TimeUnit.MICROSECONDS.toMillis(value.getTimestamp()))));
+          tag.getCoder().decode(value.getData().substring(1).newInput(), Coder.Context.OUTER));
+      }
     }
-
-    return result;
+    return valueList;
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index 6ebd4663a86de..463bcd1e3aebb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -17,23 +17,23 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
-import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Optional;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
 import com.google.protobuf.ByteString;
 
 import org.joda.time.Instant;
 
 import java.io.IOException;
-import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -117,7 +117,7 @@ public boolean issueSideInputFetch(
    * items until the active work item is finished.
    */
   private <T> T fetchSideInput(
-      PCollectionView<?> view, BoundedWindow mainInputWindow, SideInputState state) {
+      PCollectionView<T> view, BoundedWindow mainInputWindow, SideInputState state) {
     BoundedWindow sideInputWindow =
         view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(mainInputWindow);
 
@@ -127,9 +127,11 @@ private <T> T fetchSideInput(
       sideInputCache.put(view.getTagInternal(), tagCache);
     }
 
+    @SuppressWarnings("unchecked")
     T sideInput = (T) tagCache.get(sideInputWindow);
     if (sideInput == null) {
-      sideInput = (T) stateFetcher.fetchSideInput(view, sideInputWindow, state);
+      T typed = (T) stateFetcher.fetchSideInput(view, sideInputWindow, state);
+      sideInput = typed;
       if (sideInput != null) {
         tagCache.put(sideInputWindow, sideInput);
         return sideInput;
@@ -196,171 +198,113 @@ public Windmill.WorkItemCommitRequest.Builder getOutputBuilder() {
 
   public void flushState() {
     for (ExecutionContext.StepContext stepContext : getAllStepContexts()) {
-      ((StepContext) stepContext).flushState();
+      try {
+        ((StepContext) stepContext).flushState();
+      } catch (IOException e) {
+        throw new RuntimeException("Failed to flush state");
+      }
     }
   }
 
-  public Map<CodedTupleTag<?>, Object> lookupState(
-      String prefix, List<? extends CodedTupleTag<?>> tags) throws CoderException, IOException {
-    return stateFetcher.fetch(computation, getSerializedKey(), getWorkToken(), prefix, tags);
-  }
+  private class TagLoader extends CacheLoader<CodedTupleTag<?>, Optional<?>> {
 
-  private static class TagListUpdates<T> {
-    List<TimestampedValue<ByteString>> encodedValues = new ArrayList<>();
-    List<TimestampedValue<T>> values = new ArrayList<>();
-    boolean remove = false;
+    private final String mangledPrefix;
 
-    public void deleteTagList() {
-      encodedValues.clear();
-      values.clear();
-      remove = true;
+    private TagLoader(String mangledPrefix) {
+      this.mangledPrefix = mangledPrefix;
     }
 
-    public boolean isRemove() {
-      return remove;
+    @Override
+    public Optional<?> load(CodedTupleTag<?> key) throws Exception {
+      return loadAll(Arrays.asList(key)).get(key);
     }
 
-    public void add(Instant timestamp, ByteString encoded, T value) {
-      encodedValues.add(TimestampedValue.of(encoded, timestamp));
-      values.add(TimestampedValue.of(value, timestamp));
+    @Override
+    public Map<CodedTupleTag<?>, Optional<?>> loadAll(
+        Iterable<? extends CodedTupleTag<?>> keys) throws Exception {
+      return  stateFetcher.fetch(
+          computation, getSerializedKey(), getWorkToken(), mangledPrefix, keys);
     }
   }
 
-  class StepContext extends ExecutionContext.StepContext {
+  private class TagListLoader extends CacheLoader<CodedTupleTag<?>, List<?>> {
+
     private final String mangledPrefix;
 
-    // K = the value that was put, V = the encoded value
-    private Map<CodedTupleTag<?>, KV<?, ByteString>> stateCache = new HashMap<>();
+    private TagListLoader(String mangledPrefix) {
+      this.mangledPrefix = mangledPrefix;
+    }
 
-    private Map<CodedTupleTag<?>, TagListUpdates<?>> tagListUpdates = new HashMap<>();
+    @Override
+    public List<?> load(CodedTupleTag<?> key) throws Exception {
+      return loadAll(Arrays.asList(key)).get(key);
+    }
 
-    private <T> TagListUpdates<T> getOrCreateListUpdates(CodedTupleTag<T> tag) {
-      @SuppressWarnings("unchecked")
-      TagListUpdates<T> updates = (TagListUpdates<T>) tagListUpdates.get(tag);
-      if (updates == null) {
-        updates = new TagListUpdates<T>();
-        tagListUpdates.put(tag, updates);
-      }
-      return updates;
+    @Override
+    public Map<CodedTupleTag<?>, List<?>> loadAll(
+        Iterable<? extends CodedTupleTag<?>> keys) throws Exception {
+      return stateFetcher.fetchList(
+          computation, getSerializedKey(), getWorkToken(), mangledPrefix, keys);
     }
+  }
+
+  class StepContext extends ExecutionContext.StepContext {
+    private KeyedStateCache tagCache;
 
     public StepContext(String stepName) {
       super(stepName);
+
       // Mangle such that there are no partially overlapping prefixes.
-      this.mangledPrefix = stepName.length() + ":" + stepName;
+      String mangledPrefix = stepName.length() + ":" + stepName;
+      this.tagCache = new KeyedStateCache(
+          mangledPrefix,
+          CacheBuilder.newBuilder().build(new TagLoader(mangledPrefix)),
+          CacheBuilder.newBuilder().build(new TagListLoader(mangledPrefix)));
     }
 
     @Override
-    public <T> void store(CodedTupleTag<T> tag, T value) throws CoderException, IOException {
-      ByteString.Output stream = ByteString.newOutput();
-      tag.getCoder().encode(value, stream, Coder.Context.OUTER);
-      stateCache.put(tag, KV.of(value, stream.toByteString()));
+    public <T> void store(CodedTupleTag<T> tag, T value, Instant timestamp) {
+      tagCache.store(tag, value, timestamp);
     }
 
     @Override
     public <T> void remove(CodedTupleTag<T> tag) {
-      // Write ByteString.EMPTY to indicate the value associated with the tag is removed.
-      stateCache.put(tag, KV.of(null, ByteString.EMPTY));
+      tagCache.removeTags(tag);
     }
 
     @Override
-    public CodedTupleTagMap lookup(List<? extends CodedTupleTag<?>> tags)
-        throws CoderException, IOException {
-      List<CodedTupleTag<?>> tagsToLookup = new ArrayList<>();
-      List<CodedTupleTag<?>> residentTags = new ArrayList<>();
-      for (CodedTupleTag<?> tag : tags) {
-        if (stateCache.containsKey(tag)) {
-          residentTags.add(tag);
-        } else {
-          tagsToLookup.add(tag);
-        }
-      }
-      Map<CodedTupleTag<?>, Object> result =
-          StreamingModeExecutionContext.this.lookupState(mangledPrefix, tagsToLookup);
-      for (CodedTupleTag<?> tag : residentTags) {
-        result.put(tag, stateCache.get(tag).getKey());
-      }
-      return CodedTupleTagMap.of(result);
+    public CodedTupleTagMap lookup(Iterable<? extends CodedTupleTag<?>> tags) throws IOException {
+      return CodedTupleTagMap.of(tagCache.lookupTags(tags));
     }
 
     @Override
-    public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp)
-        throws IOException {
-      ByteString.Output stream = ByteString.newOutput();
-      tag.getCoder().encode(value, stream, Coder.Context.OUTER);
-      getOrCreateListUpdates(tag).add(timestamp, stream.toByteString(), value);
+    public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp) {
+      tagCache.writeToTagList(tag, value, timestamp);
     }
 
     @Override
-    public <T> Iterable<TimestampedValue<T>> readTagList(CodedTupleTag<T> tag) throws IOException {
-      TagListUpdates<T> listUpdates = getOrCreateListUpdates(tag);
-      ArrayList<TimestampedValue<T>> items = new ArrayList<>();
-      // If we've done a (not-yet-persisted) remove don't include the persisted items
-      if (!listUpdates.isRemove()) {
-        items.addAll(stateFetcher.fetchList(
-          computation, getSerializedKey(), getWorkToken(), mangledPrefix, tag));
-      }
-
-      // If we have pending (not-yet-persisted) additions, include them
-      items.addAll(listUpdates.values);
-      return items;
+    public <T> Iterable<T> readTagList(CodedTupleTag<T> tag) throws IOException {
+      return readTagLists(Arrays.asList(tag)).get(tag);
     }
 
     @Override
-    public <T> void deleteTagList(CodedTupleTag<T> tag) {
-      getOrCreateListUpdates(tag).deleteTagList();
-
-      // And record the deletion
-      outputBuilder.addListUpdates(
-          Windmill.TagList.newBuilder()
-          .setTag(serializeTag(tag))
-          .setEndTimestamp(Long.MAX_VALUE)
-          .build());
+    public <T> Map<CodedTupleTag<T>, Iterable<T>> readTagLists(Iterable<CodedTupleTag<T>> tags)
+        throws IOException {
+      @SuppressWarnings({"unchecked"})
+      Iterable<CodedTupleTag<?>> wildcardTags = (Iterable) tags;
+      Map<CodedTupleTag<?>, Iterable<?>> wildcardMap = tagCache.readTagLists(wildcardTags);
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      Map<CodedTupleTag<T>, Iterable<T>> typedMap = (Map) wildcardMap;
+      return typedMap;
     }
 
-    public void flushState() {
-      for (Map.Entry<CodedTupleTag<?>, KV<?, ByteString>> entry : stateCache.entrySet()) {
-        CodedTupleTag<?> tag = entry.getKey();
-        ByteString encodedValue = entry.getValue().getValue();
-        outputBuilder.addValueUpdates(
-            Windmill.TagValue.newBuilder()
-            .setTag(serializeTag(tag))
-            .setValue(
-                Windmill.Value.newBuilder()
-                .setData(encodedValue)
-                .setTimestamp(Long.MAX_VALUE)
-                .build())
-            .build());
-      }
-
-      for (Map.Entry<CodedTupleTag<?>, TagListUpdates<?>> entry : tagListUpdates.entrySet()) {
-        if (entry.getValue().encodedValues.isEmpty()) {
-          continue;
-        }
-
-        CodedTupleTag<?> tag = entry.getKey();
-        Windmill.TagList.Builder listBuilder =
-            Windmill.TagList.newBuilder()
-            .setTag(serializeTag(tag));
-        for (TimestampedValue<ByteString> item : entry.getValue().encodedValues) {
-          long timestampMicros = TimeUnit.MILLISECONDS.toMicros(item.getTimestamp().getMillis());
-          // Windmill does not support empty data for tag list state; prepend a zero byte.
-          byte[] zero = {0x0};
-          listBuilder.addValues(
-              Windmill.Value.newBuilder()
-              .setData(ByteString.copyFrom(zero).concat(item.getValue()))
-              .setTimestamp(timestampMicros));
-        }
-        outputBuilder.addListUpdates(listBuilder.build());
-      }
-
-      // Clear all of the not-yet-persisted information, since we're about to persist it.
-      stateCache.clear();
-      tagListUpdates.clear();
+    @Override
+    public <T> void deleteTagList(CodedTupleTag<T> tag) {
+      tagCache.removeTagLists(tag);
     }
 
-    private ByteString serializeTag(CodedTupleTag<?> tag) {
-      return ByteString.copyFromUtf8(mangledPrefix + tag.getId());
+    public void flushState() throws IOException {
+      tagCache.flushTo(outputBuilder);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
index 947904a25467c..883dc1a632310 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
@@ -27,7 +27,6 @@
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Throwables;
 import com.google.protobuf.ByteString;
@@ -93,7 +92,7 @@ public StreamingSideInputDoFnRunner(
   public void startBundle() {
     super.startBundle();
 
-    Set<W> readyWindows = new HashSet<>();
+    Map<W, CodedTupleTag<WindowedValue<I>>> readyWindowTags = new HashMap<>();
 
     for (Windmill.GlobalDataId id : execContext.getSideInputNotifications()) {
       PCollectionView<?> view = sideInputViews.get(id.getTag());
@@ -111,25 +110,37 @@ public void startBundle() {
         }
         entry.getValue().removeAll(found);
         if (entry.getValue().isEmpty()) {
-          readyWindows.add(entry.getKey());
+          try {
+            readyWindowTags.put(entry.getKey(), getElemListTag(entry.getKey()));
+          } catch (IOException e) {
+            throw Throwables.propagate(e);
+          }
         }
       }
     }
 
-    for (W window : readyWindows) {
-      blockedMap.remove(window);
+    Map<CodedTupleTag<WindowedValue<I>>, Iterable<WindowedValue<I>>> elementsPerWindow;
+    try {
+      elementsPerWindow = stepContext.readTagLists(readyWindowTags.values());
+    } catch (IOException e) {
+      throw Throwables.propagate(e);
+    }
+
+    for (Map.Entry<W, CodedTupleTag<WindowedValue<I>>> entry : readyWindowTags.entrySet()) {
+      blockedMap.remove(entry.getKey());
 
+      Iterable<WindowedValue<I>> elements = elementsPerWindow.get(entry.getValue());
       try {
-        CodedTupleTag<WindowedValue<I>> elementTag = getElemListTag((W) window);
-        for (TimestampedValue<WindowedValue<I>> elem : stepContext.readTagList(elementTag)) {
-          fn.processElement(createProcessContext(elem.getValue()));
+        for (WindowedValue<I> elem : elements) {
+          fn.processElement(createProcessContext(elem));
         }
-        stepContext.deleteTagList(elementTag);
       } catch (Throwable t) {
         // Exception in user code.
         Throwables.propagateIfInstanceOf(t, UserCodeException.class);
         throw new UserCodeException(t);
       }
+
+      stepContext.deleteTagList(entry.getValue());
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index cc52f0635011c..2728d19db0e44 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.StandardCoder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
@@ -37,7 +38,6 @@
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Functions;
 import com.google.common.base.Predicates;
@@ -67,11 +67,18 @@
  */
 public class TriggerExecutor<K, VI, VO, W extends BoundedWindow> {
 
-  private static final CodedTupleTag<Integer> IS_ROOT_FINISHED =
-      CodedTupleTag.of("finished-root", VarIntCoder.of());
 
+  /**
+   * Integer used to identify triggers that have been finished and marked the window closed.
+   */
   private static final Integer FINISHED = Integer.valueOf(1);
 
+  /**
+   * Tag that holds {@code FINISHED} if the root trigger has closed the associated window.
+   */
+  private static final CodedTupleTag<Integer> IS_ROOT_FINISHED_TAG =
+      CodedTupleTag.of("finished-root", VarIntCoder.of());
+
   private final Trigger<W> trigger;
   private final WindowingInternals<?, KV<K, VO>> windowingInternals;
   private final AbstractWindowSet<K, VI, VO, W> windowSet;
@@ -82,6 +89,7 @@ public class TriggerExecutor<K, VI, VO, W extends BoundedWindow> {
   private final TriggerContextImpl triggerContext;
   private final Coder<TriggerId<W>> triggerIdCoder;
   private final boolean willNeverFinish;
+  private final WatermarkHold watermarkHold;
 
   /**
    * Methods that the system must provide in order for us to implement triggers.
@@ -101,7 +109,9 @@ public interface TimerManager {
     void deleteTimer(String timer, Trigger.TimeDomain domain);
 
     /**
-     * @return the current timestamp in the {@link Trigger.TimeDomain#PROCESSING_TIME}.
+     * @return the current timestamp in the
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain#PROCESSING_TIME}
+     * time domain.
      */
     Instant currentProcessingTime();
   }
@@ -122,8 +132,10 @@ public TriggerExecutor(
     this.mergeContext = new MergeContext();
     this.triggerContext = new TriggerContextImpl();
     this.triggerIdCoder = new TriggerIdCoder<>(windowFn.windowCoder());
+    this.watermarkHold = new WatermarkHold();
 
     this.willNeverFinish = trigger.willNeverFinish();
+
   }
 
   /**
@@ -131,7 +143,7 @@ public TriggerExecutor(
    */
   @VisibleForTesting boolean isRootFinished(W window) throws IOException {
     return !willNeverFinish
-        && FINISHED.equals(triggerContext.lookup(IS_ROOT_FINISHED, window));
+        && FINISHED.equals(triggerContext.lookup(IS_ROOT_FINISHED_TAG, window));
   }
 
   /**
@@ -142,7 +154,7 @@ private boolean isRootFinished(Iterable<W> windows) throws IOException {
       return false;
     }
 
-    for (Integer isFinished : triggerContext.lookup(IS_ROOT_FINISHED, windows).values()) {
+    for (Integer isFinished : triggerContext.lookup(IS_ROOT_FINISHED_TAG, windows).values()) {
       if (FINISHED.equals(isFinished)) {
         return true;
       }
@@ -159,7 +171,7 @@ private Map<W, Boolean> isRootFinishedInEachWindow(Iterable<W> windows) throws I
     }
 
     return Maps.transformValues(
-        triggerContext.lookup(IS_ROOT_FINISHED, windows),
+        triggerContext.lookup(IS_ROOT_FINISHED_TAG, windows),
         Functions.forPredicate(Predicates.equalTo(FINISHED)));
   }
 
@@ -174,7 +186,9 @@ public void onElement(WindowedValue<VI> value) throws Exception {
       }
 
       W window = entry.getKey();
-      WindowStatus status = windowSet.put(window, value.getValue(), value.getTimestamp());
+      WindowStatus status = windowSet.put(window, value.getValue());
+
+      watermarkHold.updateHoldForElement(window, value.getTimestamp());
 
       handleResult(trigger, window,
           trigger.onElement(triggerContext,
@@ -219,6 +233,8 @@ private void onMerge(Collection<W> toBeMerged, W mergeResult) throws Exception {
     boolean isFinished = isRootFinished(toBeMerged);
 
     if (!isFinished) {
+      watermarkHold.updateHoldForMerge(toBeMerged, mergeResult);
+
       // If the root wasn't finished in any of the windows, then call the underlying merge and
       // handle the result appropriately.
       handleResult(trigger, mergeResult,
@@ -233,8 +249,10 @@ private void onMerge(Collection<W> toBeMerged, W mergeResult) throws Exception {
       if (!mergeResult.equals(window)) {
         trigger.clear(triggerContext, window);
         if (!willNeverFinish) {
-          triggerContext.remove(IS_ROOT_FINISHED, window);
+          triggerContext.remove(IS_ROOT_FINISHED_TAG, window);
         }
+
+        watermarkHold.clearHold(window);
       }
     }
   }
@@ -247,6 +265,7 @@ private void handleResult(Trigger<W> trigger, W window, TriggerResult result) th
     if (result.isFire() || result.isFinish()) {
       // Remove the window from management (assume it is "done")
       windowSet.remove(window);
+      watermarkHold.clearHold(window);
     }
 
     // If the trigger is finished, we can clear out its state as long as we keep the
@@ -255,22 +274,23 @@ private void handleResult(Trigger<W> trigger, W window, TriggerResult result) th
       if (willNeverFinish) {
         throw new RuntimeException("Trigger that shouldn't finish finished: " + trigger);
       }
-      triggerContext.store(IS_ROOT_FINISHED, window, FINISHED);
+
+      triggerContext.store(IS_ROOT_FINISHED_TAG, window, FINISHED);
       trigger.clear(triggerContext, window);
     }
   }
 
   private void emitWindow(W window) throws Exception {
-    TimestampedValue<VO> finalValue = windowSet.finalValue(window);
+    VO finalValue = windowSet.finalValue(window);
 
     // If there were any contents to output in the window, do so.
     if (finalValue != null) {
       // Emit the (current) final values for the window
-      KV<K, VO> value = KV.of(windowSet.getKey(), finalValue.getValue());
+      KV<K, VO> value = KV.of(windowSet.getKey(), finalValue);
 
       // Output the windowed value.
       windowingInternals.outputWindowedValue(
-          value, finalValue.getTimestamp(), Arrays.asList(window));
+          value, watermarkHold.lookupEarliestElement(window), Arrays.asList(window));
     }
   }
 
@@ -279,6 +299,49 @@ private void emitWindow(W window) throws Exception {
     timerManager.setTimer(CoderUtils.encodeToBase64(triggerIdCoder, triggerId), timestamp, domain);
   }
 
+  @VisibleForTesting void deleteTimer(
+      TriggerId<W> triggerId, TimeDomain domain) throws CoderException {
+    timerManager.deleteTimer(CoderUtils.encodeToBase64(triggerIdCoder, triggerId), domain);
+  }
+
+  /**
+   * Helper class for managing the keyed state that tracks the earliest element in the active pane,
+   * and holds up the watermark accordingly.
+   */
+  private class WatermarkHold {
+    /**
+     * Tag used to store the timestamp of the earliest element in the active pane.
+     */
+    private final CodedTupleTag<Instant> earliestElementTag =
+        CodedTupleTag.of("earliest-element", InstantCoder.of());
+
+    public Instant lookupEarliestElement(W window) throws IOException {
+      return triggerContext.lookup(earliestElementTag, window);
+    }
+
+    public void updateHoldForElement(W window, Instant timestamp) throws IOException {
+      // TODO: Combine the lookup of EARLIEST_ELEMENT_TAG with any lookups needed by the windowset.
+      Instant oldHold = triggerContext.lookup(earliestElementTag, window);
+      if (oldHold == null || oldHold.isAfter(timestamp)) {
+        triggerContext.store(earliestElementTag, window, timestamp, timestamp);
+      }
+    }
+
+    public void updateHoldForMerge(Iterable<W> oldWindows, W newWindow) throws IOException {
+      Instant earliestElement = BoundedWindow.TIMESTAMP_MAX_VALUE;
+      for (Instant old : triggerContext.lookup(earliestElementTag, oldWindows).values()) {
+        if (old.isBefore(earliestElement)) {
+          earliestElement = old;
+        }
+      }
+      triggerContext.store(earliestElementTag, newWindow, earliestElement, earliestElement);
+    }
+
+    public void clearHold(W window) throws IOException {
+      triggerContext.remove(earliestElementTag, window);
+    }
+  }
+
   private class MergeContext extends WindowFn<Object, W>.MergeContext {
 
     @SuppressWarnings("cast")
@@ -330,20 +393,24 @@ public void setTimer(W window, Instant timestamp, TimeDomain domain) throws IOEx
 
     @Override
     public void deleteTimer(W window, TimeDomain domain) throws IOException {
-      timerManager.deleteTimer(triggerIdTag(window), domain);
+      TriggerExecutor.this.deleteTimer(triggerId(window), domain);
     }
 
     @Override
     public <T> void store(CodedTupleTag<T> tag, W window, T value) throws IOException {
       CodedTupleTag<T> codedTriggerIdTag = codedTriggerIdTag(tag, window);
-      keyedState.lookup(codedTriggerIdTag);
       keyedState.store(codedTriggerIdTag, value);
     }
 
+    private <T> void store(CodedTupleTag<T> tag, W window, T value, Instant timestamp)
+        throws IOException {
+      CodedTupleTag<T> codedTriggerIdTag = codedTriggerIdTag(tag, window);
+      windowingInternals.store(codedTriggerIdTag, value, timestamp);
+    }
+
     @Override
     public <T> void remove(CodedTupleTag<T> tag, W window) throws IOException {
       CodedTupleTag<T> codedTriggerIdTag = codedTriggerIdTag(tag, window);
-      keyedState.lookup(codedTriggerIdTag);
       keyedState.remove(codedTriggerIdTag);
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index eb52d507bf22b..9bfe55143f993 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -31,7 +31,6 @@
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Function;
 import com.google.common.base.Preconditions;
@@ -45,11 +44,11 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.PriorityQueue;
 import java.util.logging.Logger;
 
 import javax.annotation.Nullable;
@@ -161,6 +160,12 @@ public String bufferTag(W window) throws IOException {
     return WindowUtils.bufferTag(window, windowFn.windowCoder(), VoidCoder.of()).getId();
   }
 
+  public String earliestElement(W window) throws CoderException {
+    return "earliest-element-"
+        + CoderUtils.encodeToBase64(triggerIdCoder,
+            new TriggerId<W>(window, Arrays.<Integer>asList()));
+  }
+
   /**
    * Retrieve the values that have been output to this time, and clear out the output accumulator.
    */
@@ -212,10 +217,13 @@ public void setTimer(
 
   private class StubContexts implements WindowingInternals<VI, KV<String, VO>>, DoFn.KeyedState {
 
-    private Map<CodedTupleTag<?>, List<TimestampedValue<?>>> tagListValues = new HashMap<>();
+    private Map<CodedTupleTag<?>, List<?>> tagListValues = new HashMap<>();
     private Map<CodedTupleTag<?>, Object> tagValues = new HashMap<>();
     private List<WindowedValue<KV<String, VO>>> outputs = new ArrayList<>();
 
+    private Map<CodedTupleTag<?>, Instant> tagTimestamps = new HashMap<>();
+    private PriorityQueue<Instant> minTagTimestamp = new PriorityQueue<>();
+
     @Override
     public void outputWindowedValue(KV<String, VO> output, Instant timestamp,
         Collection<? extends BoundedWindow> windows) {
@@ -239,15 +247,14 @@ public String apply(CodedTupleTag<?> input) {
     }
 
     @Override
-    public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp)
-        throws IOException {
+    public <T> void writeToTagList(CodedTupleTag<T> tag, T value) throws IOException {
       @SuppressWarnings("unchecked")
-      List<TimestampedValue<?>> values = tagListValues.get(tag);
+      List<T> values = (List<T>) tagListValues.get(tag);
       if (values == null) {
         values = new ArrayList<>();
         tagListValues.put(tag, values);
       }
-      values.add(TimestampedValue.of(value, timestamp));
+      values.add(value);
     }
 
     @Override
@@ -256,22 +263,29 @@ public <T> void deleteTagList(CodedTupleTag<T> tag) {
     }
 
     @Override
-    public <T> Iterable<TimestampedValue<T>> readTagList(CodedTupleTag<T> tag) throws IOException {
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      List<TimestampedValue<T>> values = (List) tagListValues.get(tag);
+    public <T> Iterable<T> readTagList(CodedTupleTag<T> tag) {
+      @SuppressWarnings("unchecked")
+      List<T> values = (List<T>) tagListValues.get(tag);
       if (values == null) {
-        return Collections.<TimestampedValue<T>>emptyList();
+        return Collections.emptyList();
       } else {
-        Collections.sort(values, new Comparator<TimestampedValue<T>>() {
-              @Override
-              public int compare(TimestampedValue<T> v1, TimestampedValue<T> v2) {
-                return v1.getTimestamp().compareTo(v2.getTimestamp());
-              }
-            });
         return values;
       }
     }
 
+    @Override
+    public <T> Map<CodedTupleTag<T>, Iterable<T>> readTagList(
+        List<CodedTupleTag<T>> tags) throws IOException {
+      return FluentIterable.from(tags)
+          .toMap(new Function<CodedTupleTag<T>, Iterable<T>>() {
+            @Override
+            @Nullable
+            public Iterable<T> apply(@Nullable CodedTupleTag<T> tag) {
+              return readTagList(tag);
+            }
+          });
+    }
+
     @Override
     public void setTimer(String timer, Instant timestamp, Trigger.TimeDomain domain) {
       throw new UnsupportedOperationException(
@@ -292,12 +306,26 @@ public Collection<? extends BoundedWindow> windows() {
 
     @Override
     public <T> void store(CodedTupleTag<T> tag, T value) throws IOException {
+      store(tag, value, BoundedWindow.TIMESTAMP_MAX_VALUE);
+    }
+
+    @Override
+    public <T> void store(CodedTupleTag<T> tag, T value, Instant timestamp) throws IOException {
       tagValues.put(tag, value);
+
+      // We never use the timestamp, but for testing purposes we want to keep track of the minimum
+      // timestamp that is currently being stored, since this will be used to hold-up the watermark.
+      Instant old = tagTimestamps.put(tag, timestamp);
+      if (old != null) {
+        minTagTimestamp.remove(old);
+      }
+      minTagTimestamp.add(timestamp);
     }
 
     @Override
     public <T> void remove(CodedTupleTag<T> tag) {
       tagValues.remove(tag);
+      minTagTimestamp.remove(tagTimestamps.remove(tag));
     }
 
     @Override
@@ -309,7 +337,7 @@ public <T> T lookup(CodedTupleTag<T> tag) throws IOException {
 
     @SuppressWarnings("unchecked")
     @Override
-    public CodedTupleTagMap lookup(List<? extends CodedTupleTag<?>> tags) throws IOException {
+    public CodedTupleTagMap lookup(Iterable<? extends CodedTupleTag<?>> tags) throws IOException {
       LinkedHashMap<CodedTupleTag<?>, Object> result = new LinkedHashMap<>();
       for (CodedTupleTag<?> tag : tags) {
         result.put(tag, tagValues.get(tag));
@@ -381,4 +409,5 @@ public Collection<? extends BoundedWindow> windows() {
       return windows;
     }
   }
+
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
index fe1462bb6c745..36dff41e580f2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
@@ -21,16 +21,18 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
 import org.joda.time.Instant;
 
 import java.io.IOException;
 import java.util.Collection;
+import java.util.List;
+import java.util.Map;
 
 /**
- * Interface that may be required by some (internal) {@code DoFn}s to implement windowing.
+ * Interface that may be required by some (internal) {@code DoFn}s to implement windowing. It should
+ * not be necessary for general user code to interact with this at all.
  *
  * <p>This interface should be provided by runner implementors to support windowing on their runner.
  *
@@ -39,6 +41,19 @@
  */
 public interface WindowingInternals<I, O> {
 
+  /**
+   * Updates the {@code KeyedState} in place so that the given tag maps to the given value.
+   *
+   * <p> This method should be used with caution. Unless the value is removed or updated with
+   * a new timestamp, the watermark will be held up and no output will be produced.
+   *
+   * @param timestamp the timestamp to associate with the value. The watermark will be held to
+   *        the given point and no downstream watermark triggers will fire.
+   *
+   * @throws IOException if encoding the given value fails
+   */
+  public <T> void store(CodedTupleTag<T> tag, T value, Instant timestamp) throws IOException;
+
   /**
    * Output the value at the specified timestamp in the listed windows.
    */
@@ -51,7 +66,7 @@ void outputWindowedValue(O output, Instant timestamp,
    *
    * @throws IOException if encoding the given value fails
    */
-  <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp) throws IOException;
+  <T> void writeToTagList(CodedTupleTag<T> tag, T value) throws IOException;
 
   /**
    * Deletes the list corresponding to the given tag.
@@ -64,7 +79,16 @@ void outputWindowedValue(O output, Instant timestamp,
    *
    * @throws IOException if decoding any of the requested values fails
    */
-  <T> Iterable<TimestampedValue<T>> readTagList(CodedTupleTag<T> tag) throws IOException;
+  <T> Iterable<T> readTagList(CodedTupleTag<T> tag) throws IOException;
+
+  /**
+   * Reads the elements of the lists in stored state corresponding to the provided tags.
+   * Any undefined tag will be an empty list rather than null.
+   *
+   * @throws IOException if decoding any of the requested values fails
+   */
+  <T> Map<CodedTupleTag<T>, Iterable<T>> readTagList(
+      List<CodedTupleTag<T>> tags) throws IOException;
 
   /**
    * Writes out a timer to be fired when the watermark reaches the given
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java
index 2453afb0b0b50..b6a82f2bc7d26 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java
@@ -16,15 +16,20 @@
 
 package com.google.cloud.dataflow.sdk.values;
 
+import java.util.Collections;
 import java.util.Map;
 
 /**
  * A mapping of {@link CodedTupleTag}s to associated values.
  *
  * <p> Returned by
- * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState#lookup(java.util.List)}.
+ * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState#lookup(java.lang.Iterable)}.
  */
 public class CodedTupleTagMap {
+
+  private static final CodedTupleTagMap EMPTY =
+      of(Collections.<CodedTupleTag<?>, Object>emptyMap());
+
   /**
    * Returns a {@code CodedTupleTagMap} containing the given mappings.
    *
@@ -40,6 +45,13 @@ public static CodedTupleTagMap of(Map<CodedTupleTag<?>, Object> map) {
     return new CodedTupleTagMap(map);
   }
 
+  /**
+   * Returns an empty {@code CodedTupleTagMap}.
+   */
+  public static CodedTupleTagMap empty() {
+    return EMPTY;
+  }
+
   /**
    * Returns the value associated with the given tag in this
    * {@code CodedTupleTagMap}, or {@code null} if the tag has no
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
index f827b2a56bd9b..d0c006f8cd4b0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.coders.StandardCoder;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.common.base.Function;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
@@ -144,6 +145,15 @@ public static <T> List<Object> getInstanceComponents(TimestampedValue<T> example
     }
   }
 
+  public static <T> Function<TimestampedValue<T>, T> valueFunction() {
+    return new Function<TimestampedValue<T>, T>() {
+      @Override
+      public T apply(TimestampedValue<T> input) {
+        return input.getValue();
+      }
+    };
+  }
+
   /////////////////////////////////////////////////////////////////////////////
 
   private final V value;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 635656955dca6..c2e07a1b5abbf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -18,6 +18,7 @@
 
 import static com.google.cloud.dataflow.sdk.util.Structs.addObject;
 import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+import static org.junit.Assert.assertFalse;
 
 import com.google.api.services.dataflow.model.InstructionInput;
 import com.google.api.services.dataflow.model.InstructionOutput;
@@ -158,12 +159,15 @@ public Windmill.ReportStatsResponse reportStats(Windmill.ReportStatsRequest requ
     }
 
     public Map<Long, Windmill.WorkItemCommitRequest> waitForAndGetCommits(int numCommits) {
-      while (commitsReceived.size() < commitsRequested + numCommits) {
+      int maxTries = 10;
+      while (maxTries-- > 0 && commitsReceived.size() < commitsRequested + numCommits) {
         try {
           Thread.sleep(1000);
         } catch (InterruptedException expected) {}
       }
 
+      assertFalse("Should have received commits after 10s, but only got " + commitsReceived,
+          commitsReceived.size() < commitsRequested + numCommits);
       commitsRequested += numCommits;
 
       return commitsReceived;
@@ -465,13 +469,6 @@ public void processElement(ProcessContext c) {
         "        data: \"key0\"" +
         "      }" +
         "    }" +
-        "  }" +
-        "}"));
-    server.addDataToOffer(buildData(
-        "data {" +
-        "  computation_id: \"computation\"" +
-        "  data {" +
-        "    key: \"key0\"" +
         "    values {" +
         "      tag: \"5:Stagestate\"" +
         "      value {" +
@@ -547,7 +544,7 @@ public void processElement(ProcessContext c) {
         "value_updates {" +
         "  tag: \"5:parDostate\"" +
         "  value {" +
-        "    timestamp: 9223372036854775807" +
+        "    timestamp: 9223372036854775000" +
         "    data: \"key0-0-1\"" +
         "  }" +
         "} " +
@@ -583,7 +580,7 @@ public void processElement(ProcessContext c) {
         "value_updates {" +
         "  tag: \"5:parDostate\"" +
         "  value {" +
-        "    timestamp: 9223372036854775807" +
+        "    timestamp: 9223372036854775000" +
         "    data: \"key1-2\"" +
         "  }" +
         "}" +
@@ -868,6 +865,16 @@ public void processElement(ProcessContext c) {
         CoderUtils.encodeToByteArray(
             CollectionCoder.of(IntervalWindow.getCoder()),
             Arrays.asList(new IntervalWindow(new Instant(0), new Instant(1000))))));
+    server.addDataToOffer(buildData(
+        "data {" +
+        "  computation_id: \"computation\"" +
+        "  data {" +
+        "    key: \"key\"" +
+        "    values {" +
+        "      tag: \"12:MergeWindowsearliest-element-gAAAAAAAAAAAAAAA\"" +
+        "    }" +
+        "  }" +
+        "}"));
 
     Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
 
@@ -879,10 +886,17 @@ public void processElement(ProcessContext c) {
         "  timestamp: 999000" +
         "  type: WATERMARK" +
         "} " +
+        "value_updates {" +
+        "  tag: \"12:MergeWindowsearliest-element-gAAAAAAAAAAAAAAA\"" +
+        "  value {" +
+        "    timestamp: 0" +
+        "    data: \"\\200\\000\\000\\000\\000\\000\\000\\000\"" +
+        "  }" +
+        "}" +
         "list_updates {" +
         "  tag: \"12:MergeWindowsbuffer:gAAAAAAAAAA=\"" +
         "    values {" +
-        "    timestamp: 0" +
+        "    timestamp: 9223372036854775000" +
         "    data: \"\000data0\"" +
         "  }" +
         "}"),
@@ -916,6 +930,20 @@ public void processElement(ProcessContext c) {
         "    }" +
         "  }" +
         "}"));
+    server.addDataToOffer(buildData(
+        "data {" +
+        "  computation_id: \"computation\"" +
+        "  data {" +
+        "    key: \"key\"" +
+        "    values {" +
+        "      tag: \"12:MergeWindowsearliest-element-gAAAAAAAAAAAAAAA\"" +
+        "      value {" +
+        "        timestamp: 0" +
+        "        data: \"\\200\\000\\000\\000\\000\\000\\000\\000\"" +
+        "      }" +
+        "    }" +
+        "  }" +
+        "}"));
 
     result = server.waitForAndGetCommits(1);
 
@@ -928,10 +956,17 @@ public void processElement(ProcessContext c) {
         "    key: \"key\"" +
         "    messages {" +
         "      timestamp: 0" +
-        "      data: \"\\377\\377\\377\\377\001\005data0\000\"" +
+        "      data: \"\\000\\000\\000\\001\\005data0\"" +
         "    }" +
         "  }" +
         "} " +
+        "value_updates {" +
+        "  tag: \"12:MergeWindowsearliest-element-gAAAAAAAAAAAAAAA\"" +
+        "  value {" +
+        "    timestamp: 9223372036854775807" +
+        "    data: \"\"" +
+        "  }" +
+        "}" +
         "list_updates {" +
         "  tag: \"12:MergeWindowsbuffer:gAAAAAAAAAA=\"" +
         "  end_timestamp: 9223372036854775807" +
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index e917b57597a7d..b814db3628605 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -110,7 +110,9 @@ public void testOnElementT2Fires() throws Exception {
     assertFalse(tester.isDone(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
         // Buffering element 1; Ignored the trigger for T2 since we aren't there yet.
-        tester.bufferTag(firstWindow)));
+        tester.bufferTag(firstWindow),
+        // Still holding the earliest element, waiting to fire
+        tester.earliestElement(firstWindow)));
   }
 
   @SuppressWarnings("unchecked")
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index 8c049502adb1e..7718b2d4872c9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -86,7 +86,9 @@ public void testOnElement() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
     assertFalse(tester.isDone(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
-        tester.bufferTag(firstWindow)));
+        tester.bufferTag(firstWindow),
+        // Holding the earliest not-yet-output element (4) waiting to fire.
+        tester.earliestElement(firstWindow)));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/KeyedStateCacheTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/KeyedStateCacheTest.java
new file mode 100644
index 0000000000000..f6ae14cf67e85
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/KeyedStateCacheTest.java
@@ -0,0 +1,366 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.WorkItemCommitRequest;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.common.base.Joiner;
+import com.google.common.base.Optional;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.collect.ImmutableMap;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Instant;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.MockitoAnnotations;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Tests for {@link KeyedStateCache}.
+ */
+@RunWith(JUnit4.class)
+public class KeyedStateCacheTest {
+
+  private static final String MANGLED_STEP_PREFIX = "mangled-step-prefix-";
+
+  private static final CodedTupleTag<String> TAG1 = CodedTupleTag.of("tag1", StringUtf8Coder.of());
+  private static final CodedTupleTag<Integer> TAG2 = CodedTupleTag.of("tag2", VarIntCoder.of());
+  private static final CodedTupleTag<Integer> TAG3 = CodedTupleTag.of("tag3", VarIntCoder.of());
+
+  @Mock
+  private CacheLoader<CodedTupleTag<?>, Optional<?>> mockTagLoader;
+  @Mock
+  private CacheLoader<CodedTupleTag<?>, List<?>> mockTagListLoader;
+
+  private KeyedStateCache underTest;
+
+  private List<CodedTupleTag<?>> tags(CodedTupleTag<?>... tags) {
+    return Arrays.asList(tags);
+  }
+
+  private Iterable<? extends CodedTupleTag<?>> tagsMatcher(CodedTupleTag<?>... tags) {
+    return Mockito.argThat(Matchers.containsInAnyOrder(tags));
+  }
+
+  private <T1> Map<CodedTupleTag<?>, Optional<?>> lookup(
+      CodedTupleTag<T1> tag1, Optional<T1> value1) {
+    return ImmutableMap.<CodedTupleTag<?>, Optional<?>>of(tag1, value1);
+  }
+
+  private <T1, T2> Map<CodedTupleTag<?>, Optional<?>> lookup(
+      CodedTupleTag<T1> tag1, Optional<T1> value1,
+      CodedTupleTag<T2> tag2, Optional<T2> value2) {
+    return ImmutableMap.of(tag1, value1, tag2, value2);
+  }
+
+  private <T1> Map<CodedTupleTag<?>, List<?>> lookupList(
+      CodedTupleTag<T1> tag1, List<T1> value1) {
+    return ImmutableMap.<CodedTupleTag<?>, List<?>>of(tag1, value1);
+  }
+
+  private <T1, T2> Map<CodedTupleTag<?>, List<?>> lookupList(
+      CodedTupleTag<T1> tag1, List<T1> value1,
+      CodedTupleTag<T2> tag2, List<T2> value2) {
+    return ImmutableMap.of(tag1, value1, tag2, value2);
+  }
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+    this.underTest = new KeyedStateCache(MANGLED_STEP_PREFIX,
+        CacheBuilder.newBuilder().build(mockTagLoader),
+        CacheBuilder.newBuilder().build(mockTagListLoader));
+  }
+
+  @Test
+  public void testGetTagCaches() throws Exception {
+    when(mockTagLoader.loadAll(tagsMatcher(TAG1, TAG2)))
+        .thenReturn(lookup(TAG1, Optional.of("hello"), TAG2, Optional.of(5)));
+
+    Map<CodedTupleTag<?>, Object> result = underTest.lookupTags(tags(TAG1, TAG2));
+    assertEquals(2, result.size());
+    assertEquals("hello", result.get(TAG1));
+    assertEquals(5, result.get(TAG2));
+
+    Mockito.verify(mockTagLoader).loadAll(tagsMatcher(TAG1, TAG2));
+    Mockito.verifyNoMoreInteractions(mockTagLoader);
+
+    when(mockTagLoader.loadAll(tagsMatcher(TAG3)))
+        .thenReturn(lookup(TAG3, Optional.of(8)));
+
+    result = underTest.lookupTags(tags(TAG2, TAG3));
+    assertEquals(2, result.size());
+    assertEquals(5, result.get(TAG2));
+    assertEquals(8, result.get(TAG3));
+
+    Mockito.verify(mockTagLoader).loadAll(tagsMatcher(TAG3));
+    Mockito.verifyNoMoreInteractions(mockTagLoader);
+  }
+
+  @Test
+  public void testGetTagLocalEdits() throws Exception {
+    underTest.store(TAG2, 42, new Instant(5));
+    underTest.store(TAG3, 5, new Instant(10));
+    underTest.removeTags(TAG3);
+
+    when(mockTagLoader.loadAll(tagsMatcher(TAG1))).thenReturn(lookup(TAG1, Optional.of("hello")));
+
+    Map<CodedTupleTag<?>, Object> result = underTest.lookupTags(tags(TAG1, TAG2, TAG3));
+    assertEquals("hello", result.get(TAG1));
+    assertEquals(42, result.get(TAG2));
+    assertNull(result.get(TAG3));
+
+    Mockito.verify(mockTagLoader).loadAll(tagsMatcher(TAG1));
+    Mockito.verifyNoMoreInteractions(mockTagLoader);
+
+    underTest.store(TAG1, "world", new Instant(22));
+    result = underTest.lookupTags(tags(TAG1));
+    assertEquals("world", result.get(TAG1));
+
+    Mockito.verifyNoMoreInteractions(mockTagLoader);
+  }
+
+  @Test
+  public void testFlushTagAlreadyRead() throws Exception {
+    // Read TAG1 and TAG2
+    when(mockTagLoader.loadAll(tagsMatcher(TAG1, TAG2)))
+        .thenReturn(lookup(TAG1, Optional.<String>absent(), TAG2, Optional.of(6)));
+    underTest.lookupTags(tags(TAG1, TAG2));
+
+    underTest.store(TAG2, 41, new Instant(5));
+    underTest.store(TAG3, 43, new Instant(6));
+    underTest.store(TAG2, 42, new Instant(7));
+    underTest.removeTags(TAG3);
+
+    // Load to prevent blind writes -- only need to read TAG3
+    when(mockTagLoader.loadAll(tagsMatcher(TAG3)))
+        .thenReturn(lookup(TAG3, Optional.of(6)));
+
+    Windmill.WorkItemCommitRequest.Builder outputBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.flushTo(outputBuilder);
+    WorkItemCommitRequest commitRequest = outputBuilder.buildPartial();
+
+    assertEquals(Joiner.on("\n").join(
+        "value_updates {",
+        "  tag: \"" + MANGLED_STEP_PREFIX + TAG2.getId() + "\"",
+        "  value {",
+        "    timestamp: 7000",
+        "    data: \"*\"",
+        "  }",
+        "}",
+        "value_updates {",
+        "  tag: \"" + MANGLED_STEP_PREFIX + TAG3.getId() + "\"",
+        "  value {",
+        "    timestamp: " + Long.MAX_VALUE,
+        "    data: \"\"",
+        "  }",
+        "}",
+        ""),
+        commitRequest.toString());
+
+    // Should load 3 to prevent blind delete
+    Mockito.verify(mockTagLoader).loadAll(tagsMatcher(TAG1, TAG2));
+    Mockito.verify(mockTagLoader).loadAll(tagsMatcher(TAG3));
+    Mockito.verifyNoMoreInteractions(mockTagLoader);
+  }
+
+  @Test
+  public void testFlushTagBlindWrites() throws Exception {
+    underTest.store(TAG2, 41, new Instant(5));
+    underTest.store(TAG3, 43, new Instant(6));
+    underTest.store(TAG2, 42, new Instant(7));
+    underTest.removeTags(TAG3);
+
+    // Load to prevent blind writes
+    when(mockTagLoader.loadAll(tagsMatcher(TAG2, TAG3)))
+        .thenReturn(lookup(TAG2, Optional.of(5), TAG3, Optional.of(6)));
+
+    Windmill.WorkItemCommitRequest.Builder outputBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.flushTo(outputBuilder);
+    WorkItemCommitRequest commitRequest = outputBuilder.buildPartial();
+
+    assertEquals(Joiner.on("\n").join(
+        "value_updates {",
+        "  tag: \"" + MANGLED_STEP_PREFIX + TAG2.getId() + "\"",
+        "  value {",
+        "    timestamp: 7000",
+        "    data: \"*\"",
+        "  }",
+        "}",
+        "value_updates {",
+        "  tag: \"" + MANGLED_STEP_PREFIX + TAG3.getId() + "\"",
+        "  value {",
+        "    timestamp: " + Long.MAX_VALUE,
+        "    data: \"\"",
+        "  }",
+        "}",
+        ""),
+        commitRequest.toString());
+
+    // Should load 3 to prevent blind delete
+    Mockito.verify(mockTagLoader).loadAll(tagsMatcher(TAG2, TAG3));
+    Mockito.verifyNoMoreInteractions(mockTagLoader);
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testGetTagListIsCached() throws Exception {
+    when(mockTagListLoader.loadAll(tagsMatcher(TAG1, TAG2)))
+        .thenReturn(lookupList(
+            TAG1, Arrays.asList("hello", "world"),
+            TAG2, Arrays.asList(5, 10)));
+    when(mockTagListLoader.loadAll(tagsMatcher(TAG3)))
+    .thenReturn(lookupList(
+        TAG3, Arrays.asList(6, 7)));
+
+    Map<CodedTupleTag<?>, Iterable<?>> results = underTest.readTagLists(tags(TAG1, TAG2));
+    assertThat((Iterable<String>) results.get(TAG1), Matchers.contains("hello", "world"));
+    assertThat((Iterable<Integer>) results.get(TAG2), Matchers.contains(5, 10));
+
+    results = underTest.readTagLists(tags(TAG1, TAG2, TAG3));
+    assertThat((Iterable<String>) results.get(TAG1), Matchers.contains("hello", "world"));
+    assertThat((Iterable<Integer>) results.get(TAG2), Matchers.contains(5, 10));
+    assertThat((Iterable<Integer>) results.get(TAG3), Matchers.contains(6, 7));
+
+    Mockito.verify(mockTagListLoader).loadAll(tagsMatcher(TAG1, TAG2));
+    Mockito.verify(mockTagListLoader).loadAll(tagsMatcher(TAG3));
+    Mockito.verifyNoMoreInteractions(mockTagListLoader);
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testGetTagListLocalEdits() throws Exception {
+    // First make local edits
+    underTest.writeToTagList(TAG1, "goodbye", new Instant(50));
+    underTest.writeToTagList(TAG1, "also", new Instant(55));
+    underTest.writeToTagList(TAG2, 15, new Instant(55));
+    underTest.writeToTagList(TAG3, 20, new Instant(60));
+    underTest.removeTagLists(TAG3);
+
+    // Now look things up
+    when(mockTagListLoader.loadAll(tagsMatcher(TAG1, TAG2)))
+        .thenReturn(lookupList(
+            TAG1, Arrays.asList("hello", "world"),
+            TAG2, Arrays.asList(5, 10)));
+    Map<CodedTupleTag<?>, Iterable<?>> results = underTest.readTagLists(tags(TAG1, TAG2, TAG3));
+    assertThat((Iterable<String>) results.get(TAG1),
+        Matchers.contains("hello", "world", "goodbye", "also"));
+    assertThat((Iterable<Integer>) results.get(TAG2), Matchers.contains(5, 10, 15));
+    assertThat((Iterable<Integer>) results.get(TAG3), Matchers.emptyIterable());
+
+    Mockito.verify(mockTagListLoader).loadAll(tagsMatcher(TAG1, TAG2));
+    Mockito.verifyNoMoreInteractions(mockTagListLoader);
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testGetTagListLocalFlush() throws Exception {
+    // First make local edits
+    underTest.writeToTagList(TAG1, "goodbye", new Instant(50));
+    underTest.writeToTagList(TAG1, "also", new Instant(55));
+    underTest.writeToTagList(TAG2, 15, new Instant(55));
+    underTest.writeToTagList(TAG3, 20, new Instant(60));
+    underTest.removeTagLists(TAG3);
+
+    // User lookup -- shouldn't need to re-lookup
+    when(mockTagListLoader.loadAll(tagsMatcher(TAG1)))
+        .thenReturn(lookupList(
+            TAG1, Arrays.asList("hello", "world")));
+    underTest.readTagLists(tags(TAG1));
+    Mockito.verify(mockTagListLoader).loadAll(tagsMatcher(TAG1));
+    Mockito.verifyNoMoreInteractions(mockTagListLoader);
+
+    // When we flush, we should lookup TAG3 (to prevent blind deletes)
+    when(mockTagListLoader.loadAll(tagsMatcher(TAG3)))
+        .thenReturn(lookupList(TAG3, Arrays.asList(5)));
+
+    // Flush and verify output
+    Windmill.WorkItemCommitRequest.Builder outputBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.flushTo(outputBuilder);
+    WorkItemCommitRequest commitRequest = outputBuilder.buildPartial();
+
+    assertEquals(Joiner.on("\n").join(
+        "list_updates {",
+        "  tag: \"" + MANGLED_STEP_PREFIX + TAG1.getId() + "\"",
+        "  values {",
+        "    timestamp: 50000",
+        "    data: \"\\000goodbye\"",
+        "  }",
+        "  values {",
+        "    timestamp: 55000",
+        "    data: \"\\000also\"",
+        "  }",
+        "}",
+        "list_updates {",
+        "  tag: \"" + MANGLED_STEP_PREFIX + TAG2.getId() + "\"",
+        "  values {",
+        "    timestamp: 55000",
+        "    data: \"\\000\\017\"",
+        "  }",
+        "}",
+        "list_updates {",
+        "  tag: \"" + MANGLED_STEP_PREFIX + TAG3.getId() + "\"",
+        "  end_timestamp: " + Long.MAX_VALUE,
+        "}",
+        ""),
+        commitRequest.toString());
+
+    Mockito.verify(mockTagListLoader).loadAll(tagsMatcher(TAG3));
+    Mockito.verifyNoMoreInteractions(mockTagListLoader);
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testGetTagListNoDeleteEmptyList() throws Exception {
+    underTest.removeTagLists(TAG3);
+
+    // When we flush, we should lookup TAG3 (to prevent blind deletes)
+    when(mockTagListLoader.loadAll(tagsMatcher(TAG3)))
+        .thenReturn(lookupList(TAG3, Arrays.<Integer>asList()));
+
+    // Flush and verify output
+    Windmill.WorkItemCommitRequest.Builder outputBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.flushTo(outputBuilder);
+    WorkItemCommitRequest commitRequest = outputBuilder.buildPartial();
+
+    assertEquals("", commitRequest.toString());
+
+    Mockito.verify(mockTagListLoader).loadAll(tagsMatcher(TAG3));
+    Mockito.verifyNoMoreInteractions(mockTagListLoader);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
index e592f9f352398..39bd6e65a5d6c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import static com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
 import static org.hamcrest.Matchers.contains;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
@@ -38,14 +37,14 @@
 import com.google.cloud.dataflow.sdk.transforms.View;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.common.base.Optional;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 import com.google.protobuf.ByteString;
 
-import org.joda.time.Instant;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -99,7 +98,7 @@ public void testFetch() throws Exception {
             .build())
         .build());
 
-    Map<CodedTupleTag<?>, Object> data =
+    Map<CodedTupleTag<?>, Optional<?>> data =
         fetcher.fetch("computation", ByteString.copyFromUtf8("key"), 17L, "p:",
             Arrays.asList(
                 CodedTupleTag.of("tag1", StringUtf8Coder.of()),
@@ -123,8 +122,8 @@ public void testFetch() throws Exception {
         .build());
 
     assertEquals(2, data.size());
-    assertEquals("data1", data.get(CodedTupleTag.of("tag1", StringUtf8Coder.of())));
-    assertEquals("data2", data.get(CodedTupleTag.of("tag2", StringUtf8Coder.of())));
+    assertEquals("data1", data.get(CodedTupleTag.of("tag1", StringUtf8Coder.of())).get());
+    assertEquals("data2", data.get(CodedTupleTag.of("tag2", StringUtf8Coder.of())).get());
   }
 
   @Test
@@ -152,9 +151,11 @@ public void testFetchList() throws Exception {
             .build())
         .build());
 
-    List<TimestampedValue<String>> data =
-        fetcher.fetchList("computation", ByteString.copyFromUtf8("key"), 17L, "p:",
-            CodedTupleTag.of("tag1", StringUtf8Coder.of()));
+    CodedTupleTag<String> tag = CodedTupleTag.of("tag1", StringUtf8Coder.of());
+    @SuppressWarnings("unchecked")
+    List<String> data =
+        (List<String>) fetcher.fetchList("computation", ByteString.copyFromUtf8("key"), 17L, "p:",
+        Arrays.asList(tag)).get(tag);
 
     verify(server).getData(
         Windmill.GetDataRequest.newBuilder()
@@ -171,9 +172,7 @@ public void testFetchList() throws Exception {
             .build())
         .build());
 
-    assertThat(data, contains(
-        TimestampedValue.of("data1", new Instant(0)),
-        TimestampedValue.of("data2", new Instant(1))));
+    assertThat(data, contains("data1", "data2"));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
index 0a01476dd5670..3bd823916a9cb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
@@ -37,7 +37,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.protobuf.ByteString;
 
@@ -48,13 +47,17 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.mockito.Mock;
+import org.mockito.Mockito;
 import org.mockito.MockitoAnnotations;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
 
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -146,12 +149,15 @@ public void testSideInputNotification() throws Exception {
     Map<IntervalWindow, Set<Windmill.GlobalDataRequest>> blockedMap = new HashMap<>();
     blockedMap.put(window, requestSet);
 
-    when(stepContext.lookup(any(CodedTupleTag.class))).thenReturn(blockedMap);
+    when(stepContext
+        .lookup(Mockito.<CodedTupleTag<Map<IntervalWindow, Set<Windmill.GlobalDataRequest>>>>any()))
+        .thenReturn(blockedMap);
     when(execContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
     when(execContext.getSideInput(eq(view), eq(window), any(PTuple.class)))
         .thenReturn("data");
-    when(stepContext.readTagList(any(CodedTupleTag.class))).thenReturn(
-        Arrays.asList(TimestampedValue.of(createDatum("e", 0), new Instant(0))));
+    when(stepContext.readTagLists(
+        Mockito.<Iterable<CodedTupleTag<WindowedValue<String>>>>any()))
+        .thenAnswer(readTagListAnswer(Arrays.asList(createDatum("e", 0))));
 
     StreamingSideInputDoFnRunner<String, String, List, IntervalWindow> runner =
         createRunner(Arrays.asList(view));
@@ -165,6 +171,24 @@ public void testSideInputNotification() throws Exception {
     verify(stepContext).store(any(CodedTupleTag.class), eq(new HashMap()));
   }
 
+  private <T> Answer<Map<CodedTupleTag<T>, Iterable<T>>> readTagListAnswer(
+      final Iterable<T> answer) {
+    return new Answer<Map<CodedTupleTag<T>, Iterable<T>>>() {
+      @Override
+      public Map<CodedTupleTag<T>, Iterable<T>> answer(InvocationOnMock invocation)
+          throws Throwable {
+        Map<CodedTupleTag<T>, Iterable<T>> result = new LinkedHashMap<>();
+        @SuppressWarnings("unchecked")
+        Iterable<CodedTupleTag<T>> tags =
+            (Iterable<CodedTupleTag<T>>) invocation.getArguments()[0];
+        for (CodedTupleTag<T> tag : tags) {
+          result.put(tag, answer);
+        }
+        return result;
+      }
+    };
+  }
+
   @Test
   public void testMultipleSideInputs() throws Exception {
     PCollectionView<String> view1 = createView();
@@ -182,7 +206,9 @@ public void testMultipleSideInputs() throws Exception {
     Map<IntervalWindow, Set<Windmill.GlobalDataRequest>> blockedMap = new HashMap<>();
     blockedMap.put(window, requestSet);
 
-    when(stepContext.lookup(any(CodedTupleTag.class))).thenReturn(blockedMap);
+    when(stepContext.lookup(
+        Mockito.<CodedTupleTag<Map<IntervalWindow, Set<Windmill.GlobalDataRequest>>>>any()))
+        .thenReturn(blockedMap);
     when(execContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
     when(execContext.issueSideInputFetch(any(PCollectionView.class), any(BoundedWindow.class)))
         .thenReturn(true);
@@ -190,8 +216,9 @@ public void testMultipleSideInputs() throws Exception {
         .thenReturn("data1");
     when(execContext.getSideInput(eq(view2), eq(window), any(PTuple.class)))
         .thenReturn("data2");
-    when(stepContext.readTagList(any(CodedTupleTag.class))).thenReturn(
-        Arrays.asList(TimestampedValue.of(createDatum("e1", 0), new Instant(0))));
+    when(stepContext.readTagLists(
+        Mockito.<Iterable<CodedTupleTag<WindowedValue<String>>>>any()))
+        .thenAnswer(readTagListAnswer(Arrays.asList(createDatum("e1", 0))));
 
     StreamingSideInputDoFnRunner<String, String, List, IntervalWindow> runner =
         createRunner(Arrays.asList(view1, view2));

From 89bc3030a8e623dc575b24622951af2e7e9c7b66 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 20 Apr 2015 11:43:16 -0700
Subject: [PATCH 0463/1541] Fix use of setXXX (which mutates an object) versus
 withXXX (which should not), via deprecations. ----Release Notes---- []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=91604720

---
 .../dataflow/sdk/coders/Proto2Coder.java      | 12 ++++---
 .../dataflow/sdk/transforms/Combine.java      | 31 +++++++++++++++----
 .../dataflow/sdk/transforms/PTransform.java   | 13 ++++----
 .../cloud/dataflow/sdk/transforms/Top.java    | 12 +++----
 .../dataflow/sdk/coders/Proto2CoderTest.java  |  2 +-
 5 files changed, 46 insertions(+), 24 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
index 9249d212c1efe..1a1f0b7662713 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
@@ -48,7 +48,7 @@
  * PCollection<MyProto.Message> records =
  *     input.apply(...)
  *          .setCoder(Proto2Coder.of(MyProto.Message.class)
- *          .withExtensionsFrom(MyProto.class));
+ *              .addExtensionsFrom(MyProto.class));
  * }
  * </pre>
  *
@@ -74,12 +74,13 @@ public static <T extends Message> Proto2Coder<T> of(Class<T> protoMessageClass)
   }
 
   /**
-   * Adds custom Protobuf extensions to the coder.
+   * Adds custom Protobuf extensions to the coder. Returns {@code this}
+   * for method chaining.
    *
    * @param extensionHosts must be a class that defines a static
    *      method name {@code registerAllExtensions}
    */
-  public Proto2Coder<T> withExtensionsFrom(Class<?>... extensionHosts) {
+  public Proto2Coder<T> addExtensionsFrom(Class<?>... extensionHosts) {
     for (Class<?> extensionHost : extensionHosts) {
       try {
         // Attempt to access the declared method, to make sure it's present.
@@ -116,9 +117,12 @@ private Parser<T> getParser() {
       return parser;
     }
     try {
+      @SuppressWarnings("unchecked")
       T protoMessageInstance = (T) protoMessageClass
           .getMethod("getDefaultInstance").invoke(null);
-      parser = (Parser<T>) protoMessageInstance.getParserForType();
+      @SuppressWarnings("unchecked")
+      Parser<T> tParser = (Parser<T>) protoMessageInstance.getParserForType();
+      parser = tParser;
     } catch (IllegalAccessException
         | InvocationTargetException
         | NoSuchMethodException e) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 255a094732b6b..04d65cc45f11c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -1119,9 +1119,16 @@ private Globally(CombineFn<? super VI, ?, VO> fn, boolean insertDefault, int fan
     }
 
     @Override
-    @SuppressWarnings("unchecked")
+    public Globally<VI, VO> setName(String name) {
+      super.setName(name);
+      return this;
+    }
+
+    @Override
+    @Deprecated
     public Globally<VI, VO> withName(String name) {
-      return (Globally<VI, VO>) super.withName(name);
+      super.setName(name);
+      return this;
     }
 
     /**
@@ -1263,9 +1270,15 @@ private GloballyAsSingletonView(
     }
 
     @Override
-    @SuppressWarnings("unchecked")
+    public GloballyAsSingletonView<VI, VO> setName(String name) {
+      super.setName(name);
+      return this;
+    }
+
+    @Override
+    @Deprecated
     public GloballyAsSingletonView<VI, VO> withName(String name) {
-      return (GloballyAsSingletonView<VI, VO>) super.withName(name);
+      return setName(name);
     }
 
     @Override
@@ -1429,9 +1442,15 @@ public PerKeyWithHotKeyFanout<K, VI, VO> withHotKeyFanout(final int hotKeyFanout
     }
 
     @Override
-    @SuppressWarnings("unchecked")
+    public PerKey<K, VI, VO> setName(String name) {
+      super.setName(name);
+      return this;
+    }
+
+    @Override
+    @Deprecated
     public PerKey<K, VI, VO> withName(String name) {
-      return (PerKey<K, VI, VO>) super.withName(name);
+      return setName(name);
     }
 
     /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
index 5eee1d27ec0d4..055cecaf78632 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
@@ -203,20 +203,19 @@ public void validate(Input input) { }
 
   /**
    * Sets the base name of this {@code PTransform}.
+   * Returns {@code this} for method chaining.
    */
-  public void setName(String name) {
+  public PTransform<Input, Output> setName(String name) {
     this.name = name;
+    return this;
   }
 
   /**
-   * Sets the base name of this {@code PTransform} and returns itself.
-   *
-   * <p> This is a shortcut for calling {@link #setName}, which allows method
-   * chaining.
+   * @deprecated Use {@link #setName}, which has been modified to return {@code this}.
    */
+  @Deprecated
   public PTransform<Input, Output> withName(String name) {
-    setName(name);
-    return this;
+    return setName(name);
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index 6142860a07116..aa8eb343a26bc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -82,7 +82,7 @@ public class Top {
   public static <T, C extends Comparator<T> & Serializable>
       Combine.Globally<T, List<T>> of(int count, C compareFn) {
     return Combine.globally(new TopCombineFn<>(count, compareFn))
-        .withName("Top");
+        .setName("Top");
   }
 
   /**
@@ -122,7 +122,7 @@ Combine.Globally<T, List<T>> of(int count, C compareFn) {
   public static <T extends Comparable<T>>
       Combine.Globally<T, List<T>> smallest(int count) {
     return Combine.globally(new TopCombineFn<>(count, new Smallest<T>()))
-        .withName("Top.Smallest");
+        .setName("Top.Smallest");
   }
 
   /**
@@ -162,7 +162,7 @@ Combine.Globally<T, List<T>> smallest(int count) {
   public static <T extends Comparable<T>>
       Combine.Globally<T, List<T>> largest(int count) {
     return Combine.globally(new TopCombineFn<>(count, new Largest<T>()))
-        .withName("Top.Largest");
+        .setName("Top.Largest");
   }
 
   /**
@@ -211,7 +211,7 @@ Combine.Globally<T, List<T>> largest(int count) {
       perKey(int count, C compareFn) {
     return Combine.perKey(
         new TopCombineFn<>(count, compareFn).<K>asKeyedFn())
-        .withName("Top.PerKey");
+        .setName("Top.PerKey");
   }
 
   /**
@@ -259,7 +259,7 @@ Combine.Globally<T, List<T>> largest(int count) {
       smallestPerKey(int count) {
     return Combine.perKey(
         new TopCombineFn<>(count, new Smallest<V>()).<K>asKeyedFn())
-        .withName("Top.SmallestPerKey");
+        .setName("Top.SmallestPerKey");
   }
 
   /**
@@ -307,7 +307,7 @@ Combine.Globally<T, List<T>> largest(int count) {
       largestPerKey(int count) {
     return Combine.perKey(
         new TopCombineFn<>(count, new Largest<V>()).<K>asKeyedFn())
-        .withName("Top.LargestPerKey");
+        .setName("Top.LargestPerKey");
   }
 
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
index bde7b31b22541..be73e7e78af60 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
@@ -82,6 +82,6 @@ public void testCoderEncodeDecodeExtensionsEqual() throws Exception {
         .build();
     CoderProperties.coderDecodeEncodeEqual(
         Proto2Coder.of(MessageC.class)
-        .withExtensionsFrom(Proto2CoderTestMessages.class), value);
+        .addExtensionsFrom(Proto2CoderTestMessages.class), value);
   }
 }

From 6d276aaccd313834fb7314a7673a74424158c77f Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 20 Apr 2015 11:49:00 -0700
Subject: [PATCH 0464/1541] Narrow the SubTriggerExecutor API.

This removes methods that are no longer in use.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91605321
---
 .../sdk/transforms/windowing/AfterAll.java    | 11 ++--
 .../sdk/transforms/windowing/AfterEach.java   | 13 +----
 .../windowing/SubTriggerExecutor.java         | 54 +++++++++----------
 3 files changed, 30 insertions(+), 48 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
index d3cfb6996250c..2bb6f58559005 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
@@ -63,7 +63,7 @@ public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws
     for (int i : subExecutor.getUnfinishedTriggers()) {
       // Mark any fired triggers as finished.
       if (subExecutor.onElement(i, e).isFire()) {
-        subExecutor.markFinished(c, i);
+        subExecutor.markFinished(i);
       }
     }
 
@@ -83,7 +83,7 @@ public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exce
     // Otherwise, merge all of the unfinished triggers.
     for (int i : subExecutor.getUnfinishedTriggers()) {
       if (subExecutor.onMerge(i, e).isFire()) {
-        subExecutor.markFinished(c, i);
+        subExecutor.markFinished(i);
       }
     }
 
@@ -92,17 +92,12 @@ public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exce
 
   @Override
   public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
-    if (e.isForCurrentLayer()) {
-      throw new IllegalStateException("AfterAll shouldn't receive any timers.");
-    }
-
-    int childIdx = e.getChildIndex();
     SubTriggerExecutor<W> subExecutor = SubTriggerExecutor.forWindow(subTriggers, c, e.window());
 
     // We take at-most-once triggers, so the result of the timer on the child should be either
     // CONTINUE or FIRE_AND_FINISH. The subexecutor already tracks finishing of children, so we just
     // need to know that we fire and finish if all of the children have finished.
-    subExecutor.onTimer(childIdx, e);
+    subExecutor.onTimer(e);
     return wrapResult(subExecutor);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index 9849648bd3fb4..dc9282c23a67b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -97,19 +97,8 @@ public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exce
 
   @Override
   public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
-    if (e.isForCurrentLayer()) {
-      throw new IllegalStateException("AfterAll shouldn't receive any timers.");
-    }
-
-    int childIdx = e.getChildIndex();
     SubTriggerExecutor<W> subExecutor = SubTriggerExecutor.forWindow(subTriggers, c, e.window());
-
-    if (childIdx != subExecutor.firstUnfinished()) {
-      LOG.warn("AfterEach received timer for non-current sub-trigger {}", childIdx);
-      return TriggerResult.CONTINUE;
-    }
-
-    return wrapResult(subExecutor.onTimer(childIdx, e), subExecutor);
+    return wrapResult(subExecutor.onTimer(e), subExecutor);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SubTriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SubTriggerExecutor.java
index d957c5bb16ab5..8d967d30068cb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SubTriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SubTriggerExecutor.java
@@ -121,14 +121,13 @@ public List<Integer> getUnfinishedTriggers() {
     return result.build();
   }
 
+  /**
+   * Return the index of the first-unfinished sub-trigger.
+   */
   public int firstUnfinished() {
     return isFinished.nextClearBit(0);
   }
 
-  public BitSet getFinishedSet() {
-    return (BitSet) isFinished.clone();
-  }
-
   private TriggerResult handleChildResult(
       TriggerContext<W> childContext, int index, TriggerResult result) throws Exception {
     if (result.isFinish()) {
@@ -138,6 +137,11 @@ private TriggerResult handleChildResult(
     return result;
   }
 
+  /**
+   * Invoke {@code onElement} for the {@code index} sub-trigger on the given event.
+   *
+   * <p>Updates the is-finished set if the sub-trigger finishes.
+   */
   public TriggerResult onElement(int index, OnElementEvent<W> e) throws Exception {
     if (isFinished.get(index)) {
       throw new IllegalStateException("Calling onElement on already finished sub-element " + index);
@@ -150,7 +154,19 @@ public TriggerResult onElement(int index, OnElementEvent<W> e) throws Exception
         subTrigger.onElement(childContext, e));
   }
 
-  public TriggerResult onTimer(int index, OnTimerEvent<W> e) throws Exception {
+  /**
+   * Invoke {@code onTimer} for the {@code index} sub-trigger on the given event. Expects the
+   * {@link OnTimerEvent} to be for one of the sub-triggers.
+   *
+   * <p>Updates the is-finished set if the sub-trigger finishes.
+   */
+  public TriggerResult onTimer(OnTimerEvent<W> e) throws Exception {
+    if (e.isForCurrentLayer()) {
+      throw new IllegalStateException(
+          "SubTriggerExecutor can only execute timers for sub-triggres.");
+    }
+
+    int index = e.getChildIndex();
     if (isFinished.get(index)) {
       throw new IllegalStateException("Calling onTimer on already finished sub-element " + index);
     }
@@ -174,7 +190,7 @@ public TriggerResult onMerge(int index, OnMergeEvent<W> e)
   /**
    * Clears the state associated with the given subtrigger.
    */
-  public void clearSubTrigger(int index) throws Exception {
+  private void clearSubTrigger(int index) throws Exception {
     subTriggers.get(index).clear(context.forChild(index), window);
   }
 
@@ -188,34 +204,16 @@ public void clear() throws Exception {
     context.remove(SUBTRIGGERS_FINISHED_SET_TAG, window);
   }
 
-  /**
-   * Mark the sub-trigger at {@code index} as never-started. If the sub-trigger wasn't finished,
-   * clears any associated state.
-   *
-   * @param index the index of the sub-trigger to affect.
-   */
-  public void reset(int index) throws Exception {
-    // If it wasn't finished, the trigger may have state associated with it. Clear that up.
-    if (!isFinished.get(index)) {
-      clearSubTrigger(index);
-    }
-
-    // And mark it finished.
-    isFinished.clear(index);
-    flush();
-  }
-
-  public boolean isFinished(int index) {
-    return isFinished.get(index);
-  }
-
   private void markFinishedInChild(TriggerContext<W> childContext, int index) throws Exception {
     isFinished.set(index);
     flush();
     subTriggers.get(index).clear(childContext, window);
   }
 
-  public void markFinished(TriggerContext<W> context, int index) throws Exception {
+  /**
+   * Mark the child at the given index as finished and clean up any associated state.
+   */
+  public void markFinished(int index) throws Exception {
     markFinishedInChild(context.forChild(index), index);
   }
 

From 57b6804b291aa61b94ff6cfe7006ce6a3fe5dd50 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Mon, 20 Apr 2015 16:29:03 -0700
Subject: [PATCH 0465/1541] Update contrib/README.md to fix a spelling

-------------
Update examples/README.md for Beta

-------------
Update README.md for Beta

-------------
Update README.md to fix outdated link

-------------
Created by MOE: http://code.google.com/p/moe-java

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91632621
---
 README.md          | 2 +-
 examples/README.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 470e60df3124f..b7d1bc9671028 100644
--- a/README.md
+++ b/README.md
@@ -64,7 +64,7 @@ and then prints messages about the job status until the execution is complete.
 The SDK is built to be extensible and support additional execution environments
 beyond local execution and the Google Cloud Dataflow Service. In partnership
 with [Cloudera](https://www.cloudera.com/), you can run Dataflow pipelines on
-an [Apache Spark](https://spark.apache.org/) backend using the 
+an [Apache Spark](https://spark.apache.org/) backend using the
 [`SparkPipelineRunner`](https://github.com/cloudera/spark-dataflow).
 Additionally, you can run Dataflow pipelines on an
 [Apache Flink](https://flink.apache.org/) backend using the
diff --git a/examples/README.md b/examples/README.md
index cbde8361ae292..59ecd0a17d750 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -125,7 +125,7 @@ authenticated Google Cloud SDK. In this case, invoke the example as follows:
 
 Your Cloud Storage location should be entered in the form of
 `gs://bucket/path/to/staging/directory`. The Cloud Platform project refers to
-its name (not number). 
+its name (not number).
 
 Alternatively, you may choose to bundle all dependencies into a single JAR and
 execute it outside of the Maven environment. For example, after building and

From 2ceb562fe199e87c78ba79c5ab3a3b5bacd4b297 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 21 Apr 2015 11:50:42 -0700
Subject: [PATCH 0466/1541] Refactor private CoderFactory implementations out
 of CoderRegistry into CoderFactories. Add tests. ----Release Notes---- []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=91708366

---
 .../dataflow/sdk/coders/CoderFactories.java   | 281 ++++++++++++++++++
 .../dataflow/sdk/coders/CoderRegistry.java    | 149 +---------
 .../sdk/coders/CoderFactoriesTest.java        | 100 +++++++
 3 files changed, 389 insertions(+), 141 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactories.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderFactoriesTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactories.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactories.java
new file mode 100644
index 0000000000000..01c1cdc1daced
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactories.java
@@ -0,0 +1,281 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.common.reflect.Invokable;
+import com.google.common.reflect.TypeToken;
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+import java.lang.reflect.ParameterizedType;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Static utility methods for creating and working with {@link Coder}s.
+ */
+public final class CoderFactories {
+  private CoderFactories() { } // Static utility class
+
+  /**
+   * Creates a {@link CoderFactory} built from particular static methods of a class that
+   * implements {@link Coder Coder<T>} for some {@code T}.
+   *
+   * <p> The class must have the following static methods:
+   *
+   * <ul>
+   * <li> {@code
+   * public static Coder<T> of(Coder<X> argCoder1, Coder<Y> argCoder2, ...)
+   * }
+   * <li> {@code
+   * public static List<Object> getInstanceComponents(T exampleValue);
+   * }
+   * </ul>
+   *
+   * <p> The {@code of(...)} method will be used to construct a
+   * {@link Coder Coder<T>} from component {@link Coder}s.
+   * It must accept one {@link Coder} argument for each
+   * generic type parameter of {@code T}. If {@code T} takes no generic
+   * type parameters, then the {@code of()} factory method should take
+   * no arguments.
+   *
+   * <p> The {@code getInstanceComponents} method will be used to
+   * decompose a value during the {@link Coder} inference process,
+   * to automatically choose coders for the components.
+   *
+   * <p> Note that the class {@code T} to be coded may be a
+   * not-yet-specialized generic class.
+   * For a generic class {@code MyClass<X>} and an actual type parameter
+   * {@code Foo}, the {@link CoderFactoryFromStaticMethods} will
+   * accept any {@code Coder<Foo>} and produce a {@code Coder<MyClass<Foo>>}.
+   *
+   * <p> For example, the {@link CoderFactory} returned by
+   * {@code fromStaticMethods(ListCoder.class)}
+   * will produce a {@code Coder<List<X>>} for any {@code Coder Coder<X>}.
+   */
+  public static <T> CoderFactory fromStaticMethods(Class<T> clazz) {
+    return fromStaticMethods(TypeToken.of(clazz));
+  }
+
+  public static <T> CoderFactory fromStaticMethods(TypeToken<T> typeToken) {
+    return new CoderFactoryFromStaticMethods(typeToken);
+  }
+
+  /**
+   * Creates a {@link CoderFactory} that always returns the
+   * given coder.
+   *
+   * <p> The {@code getInstanceComponents} method of this
+   * {@link CoderFactory} always returns an empty list.
+   */
+  public static <T> CoderFactory forCoder(Coder<T> coder) {
+    return new CoderFactoryForCoder<>(coder);
+  }
+
+  /**
+   * See {@link #fromStaticMethods} for a detailed description
+   * of the characteristics of this {@link CoderFactory}.
+   */
+  private static class CoderFactoryFromStaticMethods implements CoderFactory {
+
+    @Override
+    @SuppressWarnings("rawtypes")
+    public Coder<?> create(List<? extends Coder<?>> componentCoders) {
+      try {
+        return (Coder) factoryMethod.invoke(
+            null /* static */, componentCoders.toArray());
+      } catch (IllegalAccessException |
+               IllegalArgumentException |
+               InvocationTargetException |
+               NullPointerException |
+               ExceptionInInitializerError exn) {
+        throw new IllegalStateException(
+            "error when invoking Coder factory method " + factoryMethod,
+            exn);
+      }
+    }
+
+    @Override
+    public List<Object> getInstanceComponents(Object value) {
+      try {
+        @SuppressWarnings("unchecked")
+        List<Object> components =  (List<Object>) getComponentsMethod.invoke(
+            null /* static */, value);
+        return components;
+      } catch (IllegalAccessException
+          | IllegalArgumentException
+          | InvocationTargetException
+          | NullPointerException
+          | ExceptionInInitializerError exn) {
+        throw new IllegalStateException(
+            "error when invoking Coder getComponents method " + getComponentsMethod,
+            exn);
+      }
+    }
+
+    ////////////////////////////////////////////////////////////////////////////////
+
+    // Method to create a coder given component coders
+    // For a Coder class of kind * -> * -> ... n times ... -> *
+    // this has type Coder<?> -> Coder<?> -> ... n times ... -> Coder<T>
+    private Method factoryMethod;
+
+    // Method to decompose a value of type T into its parts.
+    // For a Coder class of kind * -> * -> ... n times ... -> *
+    // this has type T -> List<Object>
+    // where the list has n elements.
+    private Method getComponentsMethod;
+
+    /**
+     * Returns a CoderFactory that invokes the given static factory method
+     * to create the Coder.
+     */
+    private CoderFactoryFromStaticMethods(TypeToken<?> coderType) {
+      this.factoryMethod = getFactoryMethod(coderType.getRawType());
+      this.getComponentsMethod = getInstanceComponentsMethod(coderType);
+    }
+
+    /**
+     * Returns the static {@code of} constructor method on {@code coderClazz}
+     * if it exists. It is assumed to have one {@link Coder} parameter for
+     * each type parameter of {@code coderClazz}.
+     */
+    private Method getFactoryMethod(Class<?> coderClazz) {
+      Method factoryMethodCandidate;
+
+      // Find the static factory method of coderClazz named 'of' with
+      // the appropriate number of type parameters.
+      int numTypeParameters = coderClazz.getTypeParameters().length;
+      Class<?>[] factoryMethodArgTypes = new Class<?>[numTypeParameters];
+      Arrays.fill(factoryMethodArgTypes, Coder.class);
+      try {
+        factoryMethodCandidate =
+            coderClazz.getDeclaredMethod("of", factoryMethodArgTypes);
+      } catch (NoSuchMethodException | SecurityException exn) {
+        throw new IllegalArgumentException(
+            "cannot register Coder " + coderClazz + ": "
+            + "does not have an accessible method named 'of' with "
+            + numTypeParameters + " arguments of Coder type",
+            exn);
+      }
+      if (!Modifier.isStatic(factoryMethodCandidate.getModifiers())) {
+        throw new IllegalArgumentException(
+            "cannot register Coder " + coderClazz + ": "
+            + "method named 'of' with " + numTypeParameters
+            + " arguments of Coder type is not static");
+      }
+      if (!coderClazz.isAssignableFrom(factoryMethodCandidate.getReturnType())) {
+        throw new IllegalArgumentException(
+            "cannot register Coder " + coderClazz + ": "
+            + "method named 'of' with " + numTypeParameters
+            + " arguments of Coder type does not return a " + coderClazz);
+      }
+      try {
+        if (!factoryMethodCandidate.isAccessible()) {
+          factoryMethodCandidate.setAccessible(true);
+        }
+      } catch (SecurityException exn) {
+        throw new IllegalArgumentException(
+            "cannot register Coder " + coderClazz + ": "
+            + "method named 'of' with " + numTypeParameters
+            + " arguments of Coder type is not accessible",
+            exn);
+      }
+
+      return factoryMethodCandidate;
+    }
+
+    /**
+     * Finds the static method on {@code coderType} to use
+     * to decompose a value of type {@code T} into components,
+     * each corresponding to an argument of the {@code of}
+     * method.
+     */
+    private <T> Method getInstanceComponentsMethod(TypeToken<?> coderType) {
+      TypeToken<T> argumentType = getCodedType(coderType);
+
+      // getInstanceComponents may be implemented in a superclass,
+      // so we search them all for an applicable method. We do not
+      // try to be clever about finding the best overload. It may
+      // be in a generic superclass, erased to accept an Object.
+      // However, subtypes are listed before supertypes (it is a
+      // topological ordering) so probably the best one will be chosen
+      // if there are more than one (which should be rare)
+      for (TypeToken<?> supertype : coderType.getTypes().classes()) {
+        for (Method method : supertype.getRawType().getDeclaredMethods()) {
+          if (method.getName().equals("getInstanceComponents")) {
+            Invokable<?, ?> typedMethod = supertype.method(method);
+            TypeToken<?> formalArgumentType = supertype.resolveType(
+                typedMethod.getParameters().get(0)
+                    .getType() // A TypeToken
+                    .getType()); // A Type
+            if (formalArgumentType.getRawType().isAssignableFrom(argumentType.getRawType())) {
+              return method;
+            }
+          }
+        }
+      }
+
+      throw new IllegalArgumentException(
+          "cannot create a CoderFactory from " + coderType + ": "
+          + "does not have an accessible method "
+          + "'getInstanceComponents'");
+    }
+
+    /**
+     * If {@code coderType} is a subclass of {@link Coder<T>} for a fixed T,
+     * returns {@code T.class}. Otherwise, raises IllegalArgumentException
+     */
+    private <T> TypeToken<T> getCodedType(TypeToken<?> coderType) {
+      for (TypeToken<?> ifaceType : coderType.getTypes().interfaces()) {
+        if (ifaceType.getRawType().equals(Coder.class)) {
+          ParameterizedType coderIface = (ParameterizedType) ifaceType.getType();
+          @SuppressWarnings("unchecked")
+          TypeToken<T> token = (TypeToken<T>) TypeToken.of(coderIface.getActualTypeArguments()[0]);
+          return token;
+        }
+      }
+      throw new IllegalArgumentException(
+          "cannot build CoderFactory from class " + coderType
+          + ": does not implement Coder<T> for any T.");
+    }
+  }
+
+  /**
+   * See {@link #forCoder} for a detailed description of this
+   * {@link CoderFactory}.
+   */
+  private static class CoderFactoryForCoder<T> implements CoderFactory {
+    private Coder<T> coder;
+
+    public CoderFactoryForCoder(Coder<T> coder) {
+      this.coder = coder;
+    }
+
+    @Override
+    public Coder<?> create(List<? extends Coder<?>> componentCoders) {
+      return this.coder;
+    }
+
+    @Override
+    public List<Object> getInstanceComponents(Object value) {
+      return Collections.emptyList();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 900afe6bf3af6..ac290d3b67331 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -29,15 +29,11 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.Serializable;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
-import java.lang.reflect.Modifier;
 import java.lang.reflect.ParameterizedType;
 import java.lang.reflect.Type;
 import java.lang.reflect.TypeVariable;
 import java.lang.reflect.WildcardType;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
@@ -65,25 +61,6 @@
 public class CoderRegistry implements CoderProvider {
   private static final Logger LOG = LoggerFactory.getLogger(CoderRegistry.class);
 
-  /** A factory that always returns the coder with which it is instantiated. */
-  public class ConstantCoderFactory implements CoderFactory {
-    private Coder<?> coder;
-
-    public ConstantCoderFactory(Coder<?> coder) {
-      this.coder = coder;
-    }
-
-    @Override
-    public Coder<?> create(List<? extends Coder<?>> typeArgumentCoders) {
-      return this.coder;
-    }
-
-    @Override
-    public List<Object> getInstanceComponents(Object value) {
-      return Collections.emptyList();
-    }
-  }
-
   public CoderRegistry() {
     setFallbackCoderProvider(SerializableCoder.PROVIDER);
   }
@@ -136,65 +113,7 @@ public void registerStandardCoders() {
    */
   public void registerCoder(Class<?> clazz,
                             Class<?> coderClazz) {
-    int numTypeParameters = clazz.getTypeParameters().length;
-
-    // Find the static factory method of coderClazz named 'of' with
-    // the appropriate number of type parameters.
-
-    Class<?>[] factoryMethodArgTypes = new Class<?>[numTypeParameters];
-    Arrays.fill(factoryMethodArgTypes, Coder.class);
-
-    Method factoryMethod;
-    try {
-      factoryMethod =
-          coderClazz.getDeclaredMethod("of", factoryMethodArgTypes);
-    } catch (NoSuchMethodException | SecurityException exn) {
-      throw new IllegalArgumentException(
-          "Cannot register Coder " + coderClazz + ": "
-          + "does not have an accessible method named 'of' with "
-          + numTypeParameters + " arguments of Coder type",
-          exn);
-    }
-    if (!Modifier.isStatic(factoryMethod.getModifiers())) {
-      throw new IllegalArgumentException(
-          "Cannot register Coder " + coderClazz + ": "
-          + "method named 'of' with " + numTypeParameters
-          + " arguments of Coder type is not static");
-    }
-    if (!coderClazz.isAssignableFrom(factoryMethod.getReturnType())) {
-      throw new IllegalArgumentException(
-          "Cannot register Coder " + coderClazz + ": "
-          + "method named 'of' with " + numTypeParameters
-          + " arguments of Coder type does not return a " + coderClazz);
-    }
-    try {
-      if (!factoryMethod.isAccessible()) {
-        factoryMethod.setAccessible(true);
-      }
-    } catch (SecurityException exn) {
-      throw new IllegalArgumentException(
-          "Cannot register Coder " + coderClazz + ": "
-          + "method named 'of' with " + numTypeParameters
-          + " arguments of Coder type is not accessible",
-          exn);
-    }
-
-    // Find the static method to decompose values when inferring a coder,
-    // if there are type parameters for which we also need an example
-    // value
-    Method getComponentsMethod = null;
-    if (clazz.getTypeParameters().length > 0) {
-      try {
-        getComponentsMethod = coderClazz.getDeclaredMethod(
-            "getInstanceComponents",
-            clazz);
-      } catch (NoSuchMethodException | SecurityException exn) {
-        LOG.warn("Cannot find getInstanceComponents for class {}. This may limit the ability to"
-            + " infer a Coder for values of this type.", coderClazz, exn);
-      }
-    }
-
-    registerCoder(clazz, defaultCoderFactory(coderClazz, factoryMethod, getComponentsMethod));
+    registerCoder(clazz, CoderFactories.fromStaticMethods(coderClazz));
   }
 
   public void registerCoder(Class<?> rawClazz,
@@ -206,7 +125,7 @@ public void registerCoder(Class<?> rawClazz,
   }
 
   public void registerCoder(Class<?> rawClazz, Coder<?> coder) {
-    CoderFactory factory = new ConstantCoderFactory(coder);
+    CoderFactory factory = CoderFactories.forCoder(coder);
     registerCoder(rawClazz, factory);
   }
 
@@ -478,62 +397,6 @@ private static boolean isNullOrEmpty(Collection<?> c) {
     */
    private CoderProvider fallbackCoderProvider;
 
-  /**
-   * Returns a CoderFactory that invokes the given static factory method
-   * to create the Coder.
-   */
-  static CoderFactory defaultCoderFactory(
-      final Class<?> coderClazz,
-      final Method coderFactoryMethod,
-      final Method getComponentsMethod) {
-
-    return new CoderFactory() {
-      @Override
-      public Coder<?> create(List<? extends Coder<?>> typeArgumentCoders) {
-        try {
-          return (Coder) coderFactoryMethod.invoke(
-              null /* static */, typeArgumentCoders.toArray());
-        } catch (IllegalAccessException |
-                 IllegalArgumentException |
-                 InvocationTargetException |
-                 NullPointerException |
-                 ExceptionInInitializerError exn) {
-          throw new IllegalStateException(
-              "Error when invoking Coder factory method " + coderFactoryMethod,
-              exn);
-        }
-      }
-
-      @Override
-      public List<Object> getInstanceComponents(Object value) {
-        if (getComponentsMethod == null) {
-          throw new IllegalStateException(
-              "No suitable static getInstanceComponents method available for "
-              + "Coder " + coderClazz);
-        }
-
-        try {
-          @SuppressWarnings("unchecked")
-          List<Object> result = (List<Object>) (getComponentsMethod.invoke(
-              null /* static */, value));
-          return result;
-        } catch (IllegalAccessException
-            | IllegalArgumentException
-            | InvocationTargetException
-            | NullPointerException
-            | ExceptionInInitializerError exn) {
-          throw new IllegalStateException(
-              "Error when invoking Coder getComponents method " + getComponentsMethod,
-              exn);
-        }
-      }
-    };
-  }
-
-  static CoderFactory defaultCoderFactory(Class<?> coderClazz, final Method coderFactoryMethod) {
-    return defaultCoderFactory(coderClazz, coderFactoryMethod, null);
-  }
-
   /**
    * Returns the CoderFactory to use to create default Coders for
    * instances of the given class, or null if there is no default
@@ -573,7 +436,9 @@ Coder<?> getDefaultCoder(Type type, Map<Type, Coder<?>> typeCoderBindings) {
       return coder;
     }
     if (type instanceof Class<?>) {
-      return getDefaultCoder((Class) type);
+      @SuppressWarnings("unchecked")
+      Class<?> clazz = (Class<?>) type;
+      return getDefaultCoder(clazz);
     } else if (type instanceof ParameterizedType) {
       return this.getDefaultCoder((ParameterizedType) type,
                                   typeCoderBindings);
@@ -613,11 +478,13 @@ <T> Coder<T> getDefaultCoder(Class<T> clazz) {
         DefaultCoder.class);
     if (defaultAnnotation != null) {
       LOG.debug("Default Coder for {} found by DefaultCoder annotation", clazz);
-      return InstanceBuilder.ofType(Coder.class)
+      @SuppressWarnings("unchecked")
+      Coder<T> coder = (Coder<T>) InstanceBuilder.ofType(Coder.class)
           .fromClass(defaultAnnotation.value())
           .fromFactoryMethod("of")
           .withArg(Class.class, clazz)
           .build();
+      return coder;
     }
 
     if (getFallbackCoderProvider() != null) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderFactoriesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderFactoriesTest.java
new file mode 100644
index 0000000000000..8d702bf259cbd
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderFactoriesTest.java
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.Collections;
+
+/**
+ * Tests for {@link CoderFactories}.
+ */
+@RunWith(JUnit4.class)
+public class CoderFactoriesTest {
+
+  /**
+   * Ensures that a few of our standard atomic coder classes
+   * can each be built into a factory that works as expected.
+   * It is presumed that testing a few, not all, suffices to
+   * exercise CoderFactoryFromStaticMethods.
+   */
+  @Test
+  public void testAtomicCoderClassFactories() {
+    checkAtomicCoderFactory(StringUtf8Coder.class, StringUtf8Coder.of());
+    checkAtomicCoderFactory(DoubleCoder.class, DoubleCoder.of());
+    checkAtomicCoderFactory(ByteArrayCoder.class, ByteArrayCoder.of());
+  }
+
+  /**
+   * Checks that {#link CoderFactories.fromStaticMethods} successfully
+   * builds a working {@link CoderFactory} from {@link KvCoder KvCoder.class}.
+   */
+  @Test
+  public void testKvCoderFactory() {
+    CoderFactory kvCoderFactory = CoderFactories.fromStaticMethods(KvCoder.class);
+    assertEquals(
+        KvCoder.of(DoubleCoder.of(), DoubleCoder.of()),
+        kvCoderFactory.create(Arrays.asList(DoubleCoder.of(), DoubleCoder.of())));
+  }
+
+  /**
+   * Checks that {#link CoderFactories.fromStaticMethods} successfully
+   * builds a working {@link CoderFactory} from {@link ListCoder ListCoder.class}.
+   */
+  @Test
+  public void testListCoderFactory() {
+    CoderFactory listCoderFactory = CoderFactories.fromStaticMethods(ListCoder.class);
+
+    assertEquals(
+        ListCoder.of(DoubleCoder.of()),
+        listCoderFactory.create(Arrays.asList(DoubleCoder.of())));
+  }
+
+  /**
+   * Checks that {#link CoderFactories.fromStaticMethods} successfully
+   * builds a working {@link CoderFactory} from {@link IterableCoder IterableCoder.class}.
+   */
+  @Test
+  public void testIterableCoderFactory() {
+    CoderFactory iterableCoderFactory = CoderFactories.fromStaticMethods(IterableCoder.class);
+
+    assertEquals(
+        IterableCoder.of(DoubleCoder.of()),
+        iterableCoderFactory.create(Arrays.asList(DoubleCoder.of())));
+  }
+
+  ///////////////////////////////////////////////////////////////////////
+
+  /**
+   * Checks that an atomic coder class can be converted into
+   * a factory that then yields a coder equal to the example
+   * provided.
+   */
+  private <T> void checkAtomicCoderFactory(
+      Class<? extends Coder<T>> coderClazz,
+      Coder<T> expectedCoder) {
+    CoderFactory factory = CoderFactories.fromStaticMethods(coderClazz);
+    @SuppressWarnings("unchecked")
+    Coder<T> actualCoder = (Coder<T>) factory.create(Collections.<Coder<?>>emptyList());
+    assertEquals(expectedCoder, actualCoder);
+  }
+}

From 0aea2a3f77d871903e48092aff97becd2ae732e1 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 21 Apr 2015 14:57:30 -0700
Subject: [PATCH 0467/1541] Have the GCS write channel properly handle
 interrupt exceptions when waiting on close(). ----Release Notes---- []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=91727750

---
 .../gcsio/GoogleCloudStorageWriteChannel.java | 87 +++++++++----------
 1 file changed, 43 insertions(+), 44 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
index 7a5029ca3ad0c..769c3632017b5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
@@ -20,6 +20,7 @@
 import com.google.api.client.http.InputStreamContent;
 import com.google.api.client.util.Preconditions;
 import com.google.api.services.storage.Storage;
+import com.google.api.services.storage.model.StorageObject;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -30,10 +31,13 @@
 import java.io.PipedOutputStream;
 import java.nio.ByteBuffer;
 import java.nio.channels.Channels;
+import java.nio.channels.ClosedByInterruptException;
 import java.nio.channels.ClosedChannelException;
 import java.nio.channels.WritableByteChannel;
-import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
 
 /**
  * Implements WritableByteChannel to provide write access to GCS.
@@ -94,6 +98,9 @@ public class GoogleCloudStorageWriteChannel
   // Upload operation that takes place on a separate thread.
   private UploadOperation uploadOperation;
 
+  // The future wrapping the upload operation.
+  private Future<StorageObject> uploadOperationFuture;
+
   // Default GCS upload granularity.
   private static final int GCS_UPLOAD_GRANULARITY = 8 * 1024 * 1024;
 
@@ -125,18 +132,11 @@ public class GoogleCloudStorageWriteChannel
    * Allows running upload operation on a background thread.
    */
   static class UploadOperation
-      implements Runnable {
+      implements Callable<StorageObject> {
 
     // Object to be uploaded. This object declared final for safe object publishing.
     private final Storage.Objects.Insert insertObject;
 
-    // Exception encountered during upload.
-    Throwable exception;
-
-    // Allows other threads to wait for this operation to be complete. This object declared final
-    // for safe object publishing.
-    final CountDownLatch uploadDone = new CountDownLatch(1);
-
     // Read end of the pipe. This object declared final for safe object publishing.
     private final InputStream pipeSource;
 
@@ -150,45 +150,32 @@ public UploadOperation(Storage.Objects.Insert insertObject, InputStream pipeSour
       this.pipeSource = pipeSource;
     }
 
-    /**
-     * Gets exception/error encountered during upload or null.
-     */
-    public Throwable exception() {
-      return exception;
-    }
-
     /**
      * Runs the upload operation.
      */
     @Override
-    public void run() {
+    public StorageObject call() throws Exception {
+      Exception exception = null;
       try {
-        insertObject.execute();
-      } catch (Throwable t) {
-        exception = t;
-        LOG.error("Upload failure", t);
+        return insertObject.execute();
+      } catch (Exception e) {
+        exception = e;
+        LOG.error("Upload failure", e);
       } finally {
-        uploadDone.countDown();
         try {
           // Close this end of the pipe so that the writer at the other end
           // will not hang indefinitely.
           pipeSource.close();
         } catch (IOException ioe) {
           LOG.error("Error trying to close pipe.source()", ioe);
-          // Log and ignore IOException while trying to close the channel,
-          // as there is not much we can do about it.
+          if (exception != null) {
+            exception.addSuppressed(ioe);
+          } else {
+            exception = ioe;
+          }
         }
       }
-    }
-
-    public void waitForCompletion() {
-      do {
-        try {
-          uploadDone.await();
-        } catch (InterruptedException e) {
-          // Ignore it and continue to wait.
-        }
-      } while(uploadDone.getCount() > 0);
+      throw exception;
     }
   }
 
@@ -236,7 +223,9 @@ public int write(ByteBuffer buffer)
     throwIfNotOpen();
 
     // No point in writing further if upload failed on another thread.
-    throwIfUploadFailed();
+    if (uploadOperationFuture.isDone()) {
+      waitForCompletionAndThrowIfUploadFailed();
+    }
 
     return pipeSinkChannel.write(buffer);
   }
@@ -266,13 +255,13 @@ public void close()
     throwIfNotOpen();
     try {
       pipeSinkChannel.close();
-      uploadOperation.waitForCompletion();
-      throwIfUploadFailed();
+      waitForCompletionAndThrowIfUploadFailed();
     } finally {
       pipeSinkChannel = null;
       pipeSink = null;
       pipeSource = null;
       uploadOperation = null;
+      uploadOperationFuture = null;
     }
   }
 
@@ -343,7 +332,7 @@ private void init(
     // Given that the two ends of the pipe must operate asynchronous relative
     // to each other, we need to start the upload operation on a separate thread.
     uploadOperation = new UploadOperation(insertObject, pipeSource);
-    threadPool.execute(uploadOperation);
+    uploadOperationFuture = threadPool.submit(uploadOperation);
   }
 
   /**
@@ -363,14 +352,24 @@ private void throwIfNotOpen()
    *
    * @throws IOException on IO error
    */
-  private void throwIfUploadFailed()
+  private StorageObject waitForCompletionAndThrowIfUploadFailed()
       throws IOException {
-    if ((uploadOperation != null) && (uploadOperation.exception() != null)) {
-      if (uploadOperation.exception() instanceof Error) {
-        throw (Error) uploadOperation.exception();
+    try {
+      return uploadOperationFuture.get();
+    } catch (InterruptedException e) {
+      // If we were interrupted, we need to cancel the upload operation.
+      uploadOperationFuture.cancel(true);
+      IOException exception = new ClosedByInterruptException();
+      exception.addSuppressed(e);
+      throw exception;
+    } catch (ExecutionException e) {
+      if (e.getCause() instanceof Error) {
+        throw (Error) e.getCause();
+      } else {
+        throw new IOException(
+            String.format("Failed to write to GCS path %s.", getPrintableGCSPath()),
+            e.getCause());
       }
-      throw new IOException(String.format("Failed to write to GCS path %s.", getPrintableGCSPath()),
-          uploadOperation.exception());
     }
   }
 

From a15113b9b288895e7fe69518acb08130342c7f06 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 22 Apr 2015 09:08:18 -0700
Subject: [PATCH 0468/1541] Port all tests away from
 DirectPipelineRunner.EvaluationResults to use runner-independent
 DataflowAssert. ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=91793349

---
 .../dataflow/sdk/testing/DataflowAssert.java  |  49 ++++-
 .../dataflow/sdk/io/FileBasedSourceTest.java  |  44 +---
 .../cloud/dataflow/sdk/io/TextIOTest.java     |  42 ++--
 .../cloud/dataflow/sdk/io/XmlSourceTest.java  |  38 ++--
 .../BasicSerializableSourceFormatTest.java    |  29 ++-
 .../sdk/transforms/ApproximateUniqueTest.java | 132 +++++++-----
 .../dataflow/sdk/transforms/TopTest.java      |  66 +++---
 .../sdk/transforms/join/CoGroupByKeyTest.java | 202 ++++++------------
 8 files changed, 276 insertions(+), 326 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 63028d1282b5e..360407b6f02ae 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -18,6 +18,9 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.MapCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Create;
@@ -27,6 +30,7 @@
 import com.google.cloud.dataflow.sdk.transforms.View;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.common.base.Optional;
@@ -39,6 +43,7 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
+import java.util.Map;
 import java.util.NoSuchElementException;
 
 /**
@@ -92,19 +97,26 @@ public static <T> IterableAssert<T> that(PCollection<T> actual) {
    * {@link PCollection PCollection&lt;Iterable&lt;T&gt;&gt;}, which must be a
    * singleton.
    */
-  public static <T> IterableAssert<T> thatSingletonIterable(PCollection<Iterable<T>> actual) {
+  public static <T> IterableAssert<T>
+      thatSingletonIterable(PCollection<? extends Iterable<T>> actual) {
+
     List<? extends Coder<?>> maybeElementCoder = actual.getCoder().getCoderArguments();
     Coder<T> tCoder;
     try {
       tCoder = (Coder<T>) Iterables.getOnlyElement(maybeElementCoder);
     } catch (NoSuchElementException | IllegalArgumentException exc) {
       throw new IllegalArgumentException(
-        "DataflowAssert.<T>thatSingltonIterable requires a PCollection<Iterable<T>>"
+        "DataflowAssert.<T>thatSingletonIterable requires a PCollection<Iterable<T>>"
         + " with a Coder<Iterable<T>> where getCoderArguments() yields a"
         + " single Coder<T> to apply to the elements.");
     }
 
-    return new IterableAssert<>(inGlobalWindows(actual).apply(View.<Iterable<T>>asSingleton()))
+    @SuppressWarnings("unchecked") // Safe covariant cast
+    PCollection<Iterable<T>> actualIterables = (PCollection<Iterable<T>>) actual;
+
+    return new IterableAssert<T>(
+            inGlobalWindows(actualIterables)
+            .apply(View.<Iterable<T>>asSingleton()))
         .setCoder(tCoder);
   }
 
@@ -125,6 +137,37 @@ public static <T> SingletonAssert<T> thatSingleton(PCollection<T> actual) {
         .setCoder(actual.getCoder());
   }
 
+  /**
+   * Constructs a {@link SingletonAssert SingletonAssert<Map<K, Iterable<V>>>}
+   * for the value of the provided {@link PCollection PCollection<KV<K, V>>}
+   *
+   * <p> Note that the actual value must be coded by a {@link KvCoder},
+   * not just any {@code Coder<K, V>}.
+   */
+  public static <K, V> SingletonAssert<Map<K, Iterable<V>>>
+      thatMultimap(PCollection<KV<K, V>> actual) {
+    @SuppressWarnings("unchecked")
+    KvCoder<K, V> kvCoder = (KvCoder<K, V>) actual.getCoder();
+    return new SingletonAssert<>(inGlobalWindows(actual).apply(View.<K, V>asMap()))
+        .setCoder(MapCoder.of(kvCoder.getKeyCoder(), IterableCoder.of(kvCoder.getValueCoder())));
+  }
+
+  /**
+   * Constructs a {@link SingletonAssert SingletonAssert<Map<K, V>>} for the value of the provided
+   * {@link PCollection PCollection<KV<K, V>>}, which must have at
+   * most one value per key.
+
+   * <p> Note that the actual value must be coded by a {@link KvCoder},
+   * not just any {@code Coder<K, V>}.
+   */
+  public static <K, V> SingletonAssert<Map<K, V>> thatMap(PCollection<KV<K, V>> actual) {
+    @SuppressWarnings("unchecked")
+    KvCoder<K, V> kvCoder = (KvCoder<K, V>) actual.getCoder();
+    return new SingletonAssert<>(
+        inGlobalWindows(actual).apply(View.<K, V>asMap().withSingletonValues()))
+        .setCoder(MapCoder.of(kvCoder.getKeyCoder(), kvCoder.getValueCoder()));
+  }
+
   ////////////////////////////////////////////////////////////
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index dd5fc065356e8..cfa587b28f262 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -23,20 +23,18 @@
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
+import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.io.FileBasedSource.FileBasedReader;
 import com.google.cloud.dataflow.sdk.io.FileBasedSource.Mode;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.EvaluationResults;
-import com.google.cloud.dataflow.sdk.testing.TestDataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.TestCredential;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.collect.ImmutableList;
 
@@ -57,7 +55,6 @@
 import java.nio.file.Files;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
 import java.util.List;
 import java.util.NoSuchElementException;
 import java.util.Random;
@@ -646,38 +643,22 @@ public void testReadAllSplitsOfSingleFile() throws Exception {
 
   @Test
   public void testDataflowFile() throws IOException {
-    TestDataflowPipelineOptions options =
-        PipelineOptionsFactory.as(TestDataflowPipelineOptions.class);
-    options.setGcpCredential(new TestCredential());
-
-    DirectPipeline p = DirectPipeline.createForTest();
+    Pipeline p = TestPipeline.create();
     List<String> data = createStringDataset(3, 50);
 
     String fileName = "file";
     File file = createFileWithData(fileName, data);
 
     TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 64, null);
-
     PCollection<String> output = p.apply(Read.from(source).named("ReadFileData"));
 
-    EvaluationResults results = p.run();
-    List<String> readData = results.getPCollection(output);
-
-    // Need to sort here since we have no control over the order of files returned from a file
-    // pattern expansion.
-    Collections.sort(data);
-    Collections.sort(readData);
-
-    assertThat(data, containsInAnyOrder(readData.toArray()));
+    DataflowAssert.that(output).containsInAnyOrder(data);
+    p.run();
   }
 
   @Test
   public void testDataflowFilePattern() throws IOException {
-    TestDataflowPipelineOptions options =
-        PipelineOptionsFactory.as(TestDataflowPipelineOptions.class);
-    options.setGcpCredential(new TestCredential());
-
-    DirectPipeline p = DirectPipeline.createForTest();
+    Pipeline p = TestPipeline.create();
 
     List<String> data1 = createStringDataset(3, 50);
     File file1 = createFileWithData("file1", data1);
@@ -696,20 +677,13 @@ public void testDataflowFilePattern() throws IOException {
 
     PCollection<String> output = p.apply(Read.from(source).named("ReadFileData"));
 
-    EvaluationResults pipelineResults = p.run();
-    List<String> results = pipelineResults.getPCollection(output);
-
     List<String> expectedResults = new ArrayList<String>();
     expectedResults.addAll(data1);
     expectedResults.addAll(data2);
     expectedResults.addAll(data3);
 
-    // Need to sort here since we have no control over the order of files returned from a file
-    // pattern expansion.
-    Collections.sort(expectedResults);
-    Collections.sort(results);
-
-    assertThat(expectedResults, containsInAnyOrder(results.toArray()));
+    DataflowAssert.that(output).containsInAnyOrder(expectedResults);
+    p.run();
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
index b1fad04b971c9..3cc3ba37b0e7e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -17,7 +17,6 @@
 package com.google.cloud.dataflow.sdk.io;
 
 import static com.google.cloud.dataflow.sdk.TestUtils.INTS_ARRAY;
-import static com.google.cloud.dataflow.sdk.TestUtils.LINES;
 import static com.google.cloud.dataflow.sdk.TestUtils.LINES_ARRAY;
 import static com.google.cloud.dataflow.sdk.TestUtils.NO_INTS_ARRAY;
 import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES_ARRAY;
@@ -33,9 +32,9 @@
 import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
 import com.google.cloud.dataflow.sdk.io.TextIO.CompressionType;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.EvaluationResults;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestDataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
@@ -149,7 +148,7 @@ <T> void runTestRead(T[] expected, Coder<T> coder) throws Exception {
       }
     }
 
-    DirectPipeline p = DirectPipeline.createForTest();
+    Pipeline p = TestPipeline.create();
 
     TextIO.Read.Bound<T> read;
     if (coder.equals(StringUtf8Coder.of())) {
@@ -162,10 +161,8 @@ <T> void runTestRead(T[] expected, Coder<T> coder) throws Exception {
 
     PCollection<T> output = p.apply(read);
 
-    EvaluationResults results = p.run();
-
-    assertThat(results.getPCollection(output),
-               containsInAnyOrder(expected));
+    DataflowAssert.that(output).containsInAnyOrder(expected);
+    p.run();
   }
 
   @Test
@@ -190,7 +187,7 @@ public void testReadEmptyInts() throws Exception {
 
   @Test
   public void testReadNamed() {
-    Pipeline p = DirectPipeline.createForTest();
+    Pipeline p = TestPipeline.create();
 
     {
       PCollection<String> output1 =
@@ -215,7 +212,7 @@ <T> void runTestWrite(T[] elems, Coder<T> coder) throws Exception {
     File tmpFile = tmpFolder.newFile("file.txt");
     String filename = tmpFile.getPath();
 
-    DirectPipeline p = DirectPipeline.createForTest();
+    Pipeline p = TestPipeline.create();
 
     PCollection<T> input =
         p.apply(Create.of(Arrays.asList(elems))).setCoder(coder);
@@ -281,7 +278,7 @@ public void testWriteSharded() throws IOException {
     File outFolder = tmpFolder.newFolder();
     String filename = outFolder.toPath().resolve("output").toString();
 
-    DirectPipeline p = DirectPipeline.createForTest();
+    Pipeline p = TestPipeline.create();
 
     PCollection<String> input =
         p.apply(Create.of(Arrays.asList(LINES_ARRAY)))
@@ -300,11 +297,6 @@ public void testWriteSharded() throws IOException {
 
   @Test
   public void testWriteNamed() {
-    Pipeline p = DirectPipeline.createForTest();
-
-    PCollection<String> input =
-        p.apply(Create.of(LINES)).setCoder(StringUtf8Coder.of());
-
     {
       PTransform<PCollection<String>, PDone> transform1 =
         TextIO.Write.to("/tmp/file.txt");
@@ -329,7 +321,7 @@ public void testUnsupportedFilePattern() throws IOException {
     File outFolder = tmpFolder.newFolder();
     String filename = outFolder.toPath().resolve("output@*").toString();
 
-    DirectPipeline p = DirectPipeline.createForTest();
+    Pipeline p = TestPipeline.create();
 
     PCollection<String> input =
         p.apply(Create.of(Arrays.asList(LINES_ARRAY)))
@@ -346,6 +338,7 @@ public void testUnsupportedFilePattern() throws IOException {
    */
   @Test
   public void testGoodWildcards() throws Exception {
+
     TestDataflowPipelineOptions options = buildTestPipelineOptions();
     options.setGcsUtil(buildMockGcsUtil());
 
@@ -376,7 +369,7 @@ public void testGoodWildcards() throws Exception {
    */
   @Test
   public void testBadWildcardRecursive() throws Exception {
-    Pipeline pipeline = Pipeline.create(buildTestPipelineOptions());
+    Pipeline pipeline = TestPipeline.create();
 
     pipeline.apply(TextIO.Read.from("gs://bucket/foo**/baz"));
 
@@ -423,16 +416,15 @@ public void testCompressedRead() throws Exception {
       }
     }
 
-
-    DirectPipeline p = DirectPipeline.createForTest();
+    Pipeline p = TestPipeline.create();
 
     TextIO.Read.Bound<String> read =
         TextIO.Read.from(filename).withCompressionType(CompressionType.GZIP);
     PCollection<String> output = p.apply(read);
 
-    EvaluationResults results = p.run();
+    DataflowAssert.that(output).containsInAnyOrder(expected);
+    p.run();
 
-    assertThat(results.getPCollection(output), containsInAnyOrder(expected.toArray()));
     tmpFile.delete();
   }
 
@@ -450,14 +442,14 @@ public void testGZIPReadWhenUncompressed() throws Exception {
       }
     }
 
-    DirectPipeline p = DirectPipeline.createForTest();
+    Pipeline p = TestPipeline.create();
     TextIO.Read.Bound<String> read =
         TextIO.Read.from(filename).withCompressionType(CompressionType.GZIP);
     PCollection<String> output = p.apply(read);
 
-    EvaluationResults results = p.run();
+    DataflowAssert.that(output).containsInAnyOrder(expected);
+    p.run();
 
-    assertThat(results.getPCollection(output), containsInAnyOrder(expected.toArray()));
     tmpFile.delete();
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
index 52a5edfb328c1..76bbf62170a5b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
@@ -23,12 +23,10 @@
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
+import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.io.Source.Reader;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.EvaluationResults;
-import com.google.cloud.dataflow.sdk.testing.TestDataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.util.TestCredential;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.collect.ImmutableList;
 
@@ -537,11 +535,7 @@ public void testReadXMLWithEmptyTags() throws IOException {
 
   @Test
   public void testReadXMLSmallDataflow() throws IOException {
-    TestDataflowPipelineOptions options =
-        PipelineOptionsFactory.as(TestDataflowPipelineOptions.class);
-    options.setGcpCredential(new TestCredential());
-
-    DirectPipeline p = DirectPipeline.createForTest();
+    Pipeline p = TestPipeline.create();
 
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
@@ -555,16 +549,13 @@ public void testReadXMLSmallDataflow() throws IOException {
 
     PCollection<Train> output = p.apply(Read.from(source).named("ReadFileData"));
 
-    EvaluationResults results = p.run();
-    List<Train> readData = results.getPCollection(output);
-
     List<Train> expectedResults =
         ImmutableList.of(new Train("Thomas", 1, "blue", null), new Train("Henry", 3, "green", null),
             new Train("Toby", 7, "brown", null), new Train("Gordon", 4, "blue", null),
             new Train("Emily", -1, "red", null), new Train("Percy", 6, "green", null));
 
-    assertThat(
-        trainsToStrings(expectedResults), containsInAnyOrder(trainsToStrings(readData).toArray()));
+    DataflowAssert.that(output).containsInAnyOrder(expectedResults);
+    p.run();
   }
 
   @Test
@@ -638,7 +629,7 @@ public void testReadXMLLargeDataflow() throws IOException {
     List<Train> trains = generateRandomTrainList(100);
     File file = createRandomTrainXML(fileName, trains);
 
-    DirectPipeline p = DirectPipeline.createForTest();
+    Pipeline p = TestPipeline.create();
     XmlSource<Train> source =
         XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("trains")
@@ -647,10 +638,8 @@ public void testReadXMLLargeDataflow() throws IOException {
             .withMinBundleSize(1024);
     PCollection<Train> output = p.apply(Read.from(source).named("ReadFileData"));
 
-    EvaluationResults results = p.run();
-    List<Train> readData = results.getPCollection(output);
-
-    assertThat(trainsToStrings(trains), containsInAnyOrder(trainsToStrings(readData).toArray()));
+    DataflowAssert.that(output).containsInAnyOrder(trains);
+    p.run();
   }
 
   @Test
@@ -765,7 +754,7 @@ public void testReadXMLFilePattern() throws IOException {
     generateRandomTrainList(8);
     createRandomTrainXML("otherfile.xml", trains1);
 
-    DirectPipeline p = DirectPipeline.createForTest();
+    Pipeline p = TestPipeline.create();
 
     XmlSource<Train> source = XmlSource.<Train>from(file.getParent() + "/"
                                            + "temp*.xml")
@@ -775,15 +764,12 @@ public void testReadXMLFilePattern() throws IOException {
                                   .withMinBundleSize(1024);
     PCollection<Train> output = p.apply(Read.from(source).named("ReadFileData"));
 
-    EvaluationResults results = p.run();
-    List<Train> readData = results.getPCollection(output);
-
     List<Train> expectedResults = new ArrayList<>();
     expectedResults.addAll(trains1);
     expectedResults.addAll(trains2);
     expectedResults.addAll(trains3);
 
-    assertThat(
-        trainsToStrings(expectedResults), containsInAnyOrder(trainsToStrings(readData).toArray()));
+    DataflowAssert.that(output).containsInAnyOrder(expectedResults);
+    p.run();
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 9f2490de68fbb..be3b5342f7a35 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -29,7 +29,6 @@
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.Matchers.allOf;
 import static org.hamcrest.Matchers.contains;
-import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.containsString;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -56,8 +55,9 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Sample;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
@@ -271,30 +271,23 @@ public void testSplitAndReadBundlesBack() throws Exception {
 
   @Test
   public void testDirectPipelineWithoutTimestamps() throws Exception {
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    DirectPipelineRunner runner = DirectPipelineRunner.fromOptions(options);
-    Pipeline p = Pipeline.create(options);
+    Pipeline p = TestPipeline.create();
     PCollection<Integer> sum = p.apply(Read.from(TestIO.fromRange(10, 20)))
         .apply(Sum.integersGlobally())
         .apply(Sample.<Integer>any(1));
-    DirectPipelineRunner.EvaluationResults results = runner.run(p);
-    assertThat(results.getPCollection(sum), contains(145));
+
+    DataflowAssert.thatSingleton(sum).isEqualTo(145);
   }
 
   @Test
   public void testDirectPipelineWithTimestamps() throws Exception {
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    DirectPipelineRunner runner = DirectPipelineRunner.fromOptions(options);
-    Pipeline p = Pipeline.create(options);
+    Pipeline p = TestPipeline.create();
     PCollection<Integer> sums =
         p.apply(Read.from(TestIO.fromRange(10, 20).withTimestampsMillis()))
          .apply(Window.<Integer>into(FixedWindows.of(Duration.millis(3))))
          .apply(Sum.integersGlobally().withoutDefaults());
-    DirectPipelineRunner.EvaluationResults results = runner.run(p);
     // Should group into [10 11] [12 13 14] [15 16 17] [18 19].
-    assertThat(results.getPCollection(sums), containsInAnyOrder(21, 37, 39, 48));
+    DataflowAssert.that(sums).containsInAnyOrder(21, 37, 39, 48);
   }
 
   @Test
@@ -545,9 +538,11 @@ private static com.google.api.services.dataflow.model.Source stepToCloudSource(S
     // Encoding is specified in the step, not in the source itself.  This is
     // normal: incoming Dataflow API Source objects in map tasks will have the
     // encoding filled in from the step's output encoding.
-    CloudObject encoding = CloudObject.fromSpec(getObject(
-        // TODO: This should be done via a Structs accessor.
-        ((List<Map<String, Object>>) step.getProperties().get(PropertyNames.OUTPUT_INFO)).get(0),
+    @SuppressWarnings("unchecked")
+    List<Map<String, Object>> outputInfo =
+        (List<Map<String, Object>>) step.getProperties().get(PropertyNames.OUTPUT_INFO);
+
+    CloudObject encoding = CloudObject.fromSpec(getObject(outputInfo.get(0),
         PropertyNames.ENCODING));
     res.setCodec(encoding);
     return res;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
index 723e3d5d7c0d0..1bba902a46a09 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
@@ -22,17 +22,12 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.TestUtils;
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.DoubleCoder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.EvaluationResults;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 
@@ -41,6 +36,7 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -51,7 +47,8 @@
  */
 @RunWith(JUnit4.class)
 @SuppressWarnings("serial")
-public class ApproximateUniqueTest {
+public class ApproximateUniqueTest implements Serializable {
+  // implements Serializable just to make it easy to use anonymous inner DoFn subclasses
 
   @Test
   public void testEstimationErrorToSampleSize() {
@@ -65,23 +62,18 @@ public void testEstimationErrorToSampleSize() {
     assertEquals(16, ApproximateUnique.sampleSizeFromEstimationError(0.5));
   }
 
-  public <T> PCollection<T> createInput(Pipeline p, Iterable<T> input,
-      Coder<T> coder) {
-    return p.apply(Create.of(input)).setCoder(coder);
-  }
-
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testApproximateUniqueWithSmallInput() {
     Pipeline p = TestPipeline.create();
 
-    PCollection<Integer> input =
-        createInput(p, Arrays.asList(1, 2, 3, 3), BigEndianIntegerCoder.of());
+    PCollection<Integer> input = p.apply(
+        Create.of(Arrays.asList(1, 2, 3, 3)));
 
     PCollection<Long> estimate = input
         .apply(ApproximateUnique.<Integer>globally(1000));
 
-    DataflowAssert.that(estimate).containsInAnyOrder(3L);
+    DataflowAssert.thatSingleton(estimate).isEqualTo(3L);
 
     p.run();
   }
@@ -104,15 +96,14 @@ private void runApproximateUniqueWithDuplicates(int elementCount,
     }
     Collections.shuffle(elements);
 
-    DirectPipeline p = DirectPipeline.createForTest();
-    PCollection<Double> input = createInput(p, elements, DoubleCoder.of());
+    Pipeline p = TestPipeline.create();
+    PCollection<Double> input = p.apply(Create.of(elements));
     PCollection<Long> estimate =
         input.apply(ApproximateUnique.<Double>globally(sampleSize));
 
-    EvaluationResults results = p.run();
+    DataflowAssert.thatSingleton(estimate).satisfies(new VerifyEstimateFn(uniqueCount, sampleSize));
 
-    verifyEstimate(uniqueCount, sampleSize,
-        results.getPCollection(estimate).get(0));
+    p.run();
   }
 
   @Test
@@ -142,42 +133,38 @@ private void runApproximateUniqueWithSkewedDistributions(int elementCount,
       }
     }
 
-    DirectPipeline p = DirectPipeline.createForTest();
-    PCollection<Integer> input =
-        createInput(p, elements, BigEndianIntegerCoder.of());
+    Pipeline p = TestPipeline.create();
+    PCollection<Integer> input = p.apply(Create.of(elements));
     PCollection<Long> estimate =
         input.apply(ApproximateUnique.<Integer>globally(sampleSize));
 
-    EvaluationResults results = p.run();
+    DataflowAssert.thatSingleton(estimate).satisfies(new VerifyEstimateFn(uniqueCount, sampleSize));
 
-    verifyEstimate(uniqueCount, sampleSize,
-        results.getPCollection(estimate).get(0).longValue());
+    p.run();
   }
 
   @Test
   public void testApproximateUniquePerKey() {
-    List<KV<Integer, Integer>> elements = Lists.newArrayList();
-    List<Integer> keys = ImmutableList.of(20, 50, 100);
+    List<KV<Long, Long>> elements = Lists.newArrayList();
+    List<Long> keys = ImmutableList.of(20L, 50L, 100L);
     int elementCount = 1000;
     int sampleSize = 100;
     // Use the key as the number of unique values.
-    for (int uniqueCount : keys) {
-      for (int value = 0; value < elementCount; value++) {
+    for (long uniqueCount : keys) {
+      for (long value = 0; value < elementCount; value++) {
         elements.add(KV.of(uniqueCount, value % uniqueCount));
       }
     }
 
-    DirectPipeline p = DirectPipeline.createForTest();
-    PCollection<KV<Integer, Integer>> input = createInput(p, elements,
-        KvCoder.of(BigEndianIntegerCoder.of(), BigEndianIntegerCoder.of()));
-    PCollection<KV<Integer, Long>> counts =
-        input.apply(ApproximateUnique.<Integer, Integer>perKey(sampleSize));
+    Pipeline p = TestPipeline.create();
+    PCollection<KV<Long, Long>> input = p.apply(Create.of(elements));
+    PCollection<KV<Long, Long>> counts =
+        input.apply(ApproximateUnique.<Long, Long>perKey(sampleSize));
 
-    EvaluationResults results = p.run();
+    DataflowAssert.that(counts).satisfies(new VerifyEstimatePerKeyFn(sampleSize));
+
+    p.run();
 
-    for (KV<Integer, Long> result : results.getPCollection(counts)) {
-      verifyEstimate(result.getKey(), sampleSize, result.getValue());
-    }
   }
 
   /**
@@ -208,19 +195,30 @@ public void testApproximateUniqueWithDifferentSampleSizes() {
    * error falls within the maximum allowed error of {@code 2/sqrt(sampleSize)}.
    */
   private void runApproximateUniquePipeline(int sampleSize) {
-    DirectPipeline p = DirectPipeline.createForTest();
+    Pipeline p = TestPipeline.create();
     PCollection<String> collection = readPCollection(p);
 
-    PCollection<Long> exact = collection.apply(RemoveDuplicates.<String>create())
-        .apply(Combine.globally(new CountElements<String>()));
+    final PCollectionView<Long> exact = collection
+        .apply(RemoveDuplicates.<String>create())
+        .apply(Combine.globally(new CountElements<String>()))
+        .apply(View.<Long>asSingleton());
+
+    PCollection<Long> approximate = collection
+        .apply(ApproximateUnique.<String>globally(sampleSize));
 
-    PCollection<Long> approximate =
-        collection.apply(ApproximateUnique.<String>globally(sampleSize));
+    PCollection<KV<Long, Long>> approximateAndExact = approximate
+        .apply(ParDo.of(new DoFn<Long, KV<Long, Long>>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                c.output(KV.of(c.element(), c.sideInput(exact)));
+              }
+            })
+            .withSideInputs(exact));
 
-    EvaluationResults results = p.run();
+    DataflowAssert.that(approximateAndExact)
+        .satisfies(new VerifyEstimatePerKeyFn(sampleSize));
 
-    verifyEstimate(results.getPCollection(exact).get(0).longValue(), sampleSize,
-        results.getPCollection(approximate).get(0).longValue());
+    p.run();
   }
 
   /**
@@ -244,8 +242,7 @@ private PCollection<String> readPCollection(Pipeline p) {
    * {@code uniqueCount} and {@code estimate} is less than
    * {@code 2 / sqrt(sampleSize}).
    */
-  private static void verifyEstimate(long uniqueCount, int sampleSize,
-      long estimate) {
+  private static void verifyEstimate(long uniqueCount, int sampleSize, long estimate) {
     if (uniqueCount < sampleSize) {
       assertEquals("Number of hashes is less than the sample size. "
           + "Estimate should be exact", uniqueCount, estimate);
@@ -256,6 +253,43 @@ private static void verifyEstimate(long uniqueCount, int sampleSize,
 
     assertTrue("Estimate= " + estimate + " Actual=" + uniqueCount + " Error="
         + error + "%, MaxError=" + maxError + "%.", error < maxError);
+
+    assertTrue("Estimate= " + estimate + " Actual=" + uniqueCount + " Error="
+        + error + "%, MaxError=" + maxError + "%.", error < maxError);
+  }
+
+  private static class VerifyEstimateFn implements SerializableFunction<Long, Void> {
+    private long uniqueCount;
+    private int sampleSize;
+
+    public VerifyEstimateFn(long uniqueCount, int sampleSize) {
+      this.uniqueCount = uniqueCount;
+      this.sampleSize = sampleSize;
+    }
+
+    @Override
+    public Void apply(Long estimate) {
+      verifyEstimate(uniqueCount, sampleSize, estimate);
+      return null;
+    }
+  }
+
+  private static class VerifyEstimatePerKeyFn
+      implements SerializableFunction<Iterable<KV<Long, Long>>, Void> {
+
+    private int sampleSize;
+
+    public VerifyEstimatePerKeyFn(int sampleSize) {
+      this.sampleSize = sampleSize;
+    }
+
+    @Override
+    public Void apply(Iterable<KV<Long, Long>> estimatePerKey) {
+      for (KV<Long, Long> result : estimatePerKey) {
+        verifyEstimate(result.getKey(), sampleSize, result.getValue());
+      }
+      return null;
+    }
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
index 07f00d699ccfa..d2d486cc29dc2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
@@ -16,17 +16,15 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
-import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
-import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
 import static org.junit.Assert.assertThat;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.EvaluationResults;
 import com.google.cloud.dataflow.sdk.runners.RecordingPipelineVisitor;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
@@ -87,7 +85,7 @@ public PCollection<KV<String, Integer>> createEmptyInputTable(Pipeline p) {
   @Test
   @SuppressWarnings("unchecked")
   public void testTop() {
-    DirectPipeline p = DirectPipeline.createForTest();
+    Pipeline p = TestPipeline.create();
     PCollection<String> input =
         p.apply(Create.of(Arrays.asList(COLLECTION)))
                  .setCoder(StringUtf8Coder.of());
@@ -101,23 +99,23 @@ public void testTop() {
     PCollection<KV<String, List<Integer>>> smallestPerKey = createInputTable(p)
         .apply(Top.<String, Integer>smallestPerKey(2));
 
-    EvaluationResults results = p.run();
-
-    assertThat(results.getPCollection(top1).get(0), contains("bb"));
-    assertThat(results.getPCollection(top2).get(0), contains("z", "c"));
-    assertThat(results.getPCollection(top3).get(0), contains("a", "bb", "c"));
-    assertThat(results.getPCollection(largestPerKey), containsInAnyOrder(
+    DataflowAssert.thatSingletonIterable(top1).containsInAnyOrder(Arrays.asList("bb"));
+    DataflowAssert.thatSingletonIterable(top2).containsInAnyOrder("z", "c");
+    DataflowAssert.thatSingletonIterable(top3).containsInAnyOrder("a", "bb", "c");
+    DataflowAssert.that(largestPerKey).containsInAnyOrder(
         KV.of("a", Arrays.asList(3, 2)),
-        KV.of("b", Arrays.asList(100, 10))));
-    assertThat(results.getPCollection(smallestPerKey), containsInAnyOrder(
+        KV.of("b", Arrays.asList(100, 10)));
+    DataflowAssert.that(smallestPerKey).containsInAnyOrder(
         KV.of("a", Arrays.asList(1, 2)),
-        KV.of("b", Arrays.asList(1, 10))));
+        KV.of("b", Arrays.asList(1, 10)));
+
+    p.run();
   }
 
   @Test
   @SuppressWarnings("unchecked")
   public void testTopEmpty() {
-    DirectPipeline p = DirectPipeline.createForTest();
+    Pipeline p = TestPipeline.create();
     PCollection<String> input =
         p.apply(Create.of(Arrays.asList(EMPTY_COLLECTION)))
                  .setCoder(StringUtf8Coder.of());
@@ -131,19 +129,19 @@ public void testTopEmpty() {
     PCollection<KV<String, List<Integer>>> smallestPerKey = createEmptyInputTable(p)
         .apply(Top.<String, Integer>smallestPerKey(2));
 
-    EvaluationResults results = p.run();
+    DataflowAssert.thatSingletonIterable(top1).containsInAnyOrder();
+    DataflowAssert.thatSingletonIterable(top2).containsInAnyOrder();
+    DataflowAssert.thatSingletonIterable(top3).containsInAnyOrder();
+    DataflowAssert.that(largestPerKey).containsInAnyOrder();
+    DataflowAssert.that(smallestPerKey).containsInAnyOrder();
 
-    assertThat(results.getPCollection(top1).get(0), containsInAnyOrder());
-    assertThat(results.getPCollection(top2).get(0), containsInAnyOrder());
-    assertThat(results.getPCollection(top3).get(0), containsInAnyOrder());
-    assertThat(results.getPCollection(largestPerKey), containsInAnyOrder());
-    assertThat(results.getPCollection(smallestPerKey), containsInAnyOrder());
+    p.run();
   }
 
   @Test
   @SuppressWarnings("unchecked")
   public void testTopZero() {
-    DirectPipeline p = DirectPipeline.createForTest();
+    Pipeline p = TestPipeline.create();
     PCollection<String> input =
         p.apply(Create.of(Arrays.asList(COLLECTION)))
                  .setCoder(StringUtf8Coder.of());
@@ -158,23 +156,23 @@ public void testTopZero() {
     PCollection<KV<String, List<Integer>>> smallestPerKey = createInputTable(p)
         .apply(Top.<String, Integer>smallestPerKey(0));
 
-    EvaluationResults results = p.run();
-
-    assertThat(results.getPCollection(top1).get(0), containsInAnyOrder());
-    assertThat(results.getPCollection(top2).get(0), containsInAnyOrder());
-    assertThat(results.getPCollection(top3).get(0), containsInAnyOrder());
-    assertThat(results.getPCollection(largestPerKey), containsInAnyOrder(
+    DataflowAssert.thatSingletonIterable(top1).containsInAnyOrder();
+    DataflowAssert.thatSingletonIterable(top2).containsInAnyOrder();
+    DataflowAssert.thatSingletonIterable(top3).containsInAnyOrder();
+    DataflowAssert.that(largestPerKey).containsInAnyOrder(
         KV.of("a", Arrays.<Integer>asList()),
-        KV.of("b", Arrays.<Integer>asList())));
-    assertThat(results.getPCollection(smallestPerKey), containsInAnyOrder(
+        KV.of("b", Arrays.<Integer>asList()));
+    DataflowAssert.that(smallestPerKey).containsInAnyOrder(
         KV.of("a", Arrays.<Integer>asList()),
-        KV.of("b", Arrays.<Integer>asList())));
+        KV.of("b", Arrays.<Integer>asList()));
+
+    p.run();
   }
 
   // This is a purely compile-time test.  If the code compiles, then it worked.
   @Test
   public void testPerKeySerializabilityRequirement() {
-    DirectPipeline p = DirectPipeline.createForTest();
+    Pipeline p = TestPipeline.create();
     p.apply(Create.of(Arrays.asList(COLLECTION)))
             .setCoder(StringUtf8Coder.of());
 
@@ -189,7 +187,7 @@ public void testPerKeySerializabilityRequirement() {
 
   @Test
   public void testCountConstraint() {
-    DirectPipeline p = DirectPipeline.createForTest();
+    Pipeline p = TestPipeline.create();
     PCollection<String> input =
         p.apply(Create.of(Arrays.asList(COLLECTION)))
             .setCoder(StringUtf8Coder.of());
@@ -202,7 +200,7 @@ public void testCountConstraint() {
 
   @Test
   public void testTransformName() {
-    DirectPipeline p = DirectPipeline.createForTest();
+    Pipeline p = TestPipeline.create();
     PCollection<String> input =
         p.apply(Create.of(Arrays.asList(COLLECTION)))
             .setCoder(StringUtf8Coder.of());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
index 3094f2ab46d16..13cad8c49840c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
@@ -17,17 +17,13 @@
 package com.google.cloud.dataflow.sdk.transforms.join;
 
 import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
-import static org.hamcrest.core.IsEqual.equalTo;
-import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
-import static org.junit.Assert.fail;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.EvaluationResults;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
@@ -35,6 +31,7 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
 import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
@@ -43,7 +40,6 @@
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.collect.Iterables;
 
-import org.hamcrest.Matcher;
 import org.joda.time.Duration;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
@@ -53,8 +49,8 @@
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 /**
  * Tests for CoGroupByKeyTest.  Implements Serializable for anonymous DoFns.
@@ -98,10 +94,10 @@ public void processElement(ProcessContext c) {
   }
 
   /**
-   * Returns a PCollection<KV<Integer, CoGbkResult>> containing the result
-   * of a CoGbk over 2 PCollection<KV<Integer, String>>, where each PCollection
-   * has no duplicate keys and the key sets of each PCollection are
-   * intersecting but neither is a subset of the other.
+   * Returns a {@code PCollection<KV<Integer, CoGbkResult>>} containing the result
+   * of a {@link CoGroupByKey} over 2 {@code PCollection<KV<Integer, String>>},
+   * where each {@link PCollection} has no duplicate keys and the key sets of
+   * each {@link PCollection} are intersecting but neither is a subset of the other.
    */
   private PCollection<KV<Integer, CoGbkResult>> buildGetOnlyGbk(
       Pipeline p,
@@ -126,74 +122,34 @@ private PCollection<KV<Integer, CoGbkResult>> buildGetOnlyGbk(
 
   @Test
   public void testCoGroupByKeyGetOnly() {
-    TupleTag<String> tag1 = new TupleTag<>();
-    TupleTag<String> tag2 = new TupleTag<>();
+    final TupleTag<String> tag1 = new TupleTag<>();
+    final TupleTag<String> tag2 = new TupleTag<>();
 
-    DirectPipeline p = DirectPipeline.createForTest();
+    Pipeline p = TestPipeline.create();
 
     PCollection<KV<Integer, CoGbkResult>> coGbkResults =
         buildGetOnlyGbk(p, tag1, tag2);
 
-    EvaluationResults results = p.run();
-
-    List<KV<Integer, CoGbkResult>> finalResult =
-        results.getPCollection(coGbkResults);
-
-    HashMap<Integer, Matcher<String>> collection1Matchers =
-        new HashMap<Integer, Matcher<String>>() {
-      {
-        put(1, equalTo("collection1-1"));
-        put(2, equalTo("collection1-2"));
-      }
-    };
-
-    HashMap<Integer, Matcher<String>> collection2Matchers =
-        new HashMap<Integer, Matcher<String>>() {
-      {
-        put(2, equalTo("collection2-2"));
-        put(3, equalTo("collection2-3"));
-      }
-    };
-
-    for (KV<Integer, CoGbkResult> result : finalResult) {
-      int key = result.getKey();
-      CoGbkResult row = result.getValue();
-      checkGetOnlyForKey(key, collection1Matchers, row, tag1, "default");
-      checkGetOnlyForKey(key, collection2Matchers, row, tag2, "default");
-    }
-  }
+    DataflowAssert.thatMap(coGbkResults).satisfies(
+        new SerializableFunction<Map<Integer, CoGbkResult>, Void>() {
+          @Override
+          public Void apply(Map<Integer, CoGbkResult> results) {
+            assertEquals("collection1-1", results.get(1).getOnly(tag1));
+            assertEquals("collection1-2", results.get(2).getOnly(tag1));
+            assertEquals("collection2-2", results.get(2).getOnly(tag2));
+            assertEquals("collection2-3", results.get(3).getOnly(tag2));
+            return null;
+          }
+        });
 
-  /**
-   * Check that a singleton value for a key in a CoGbkResult matches the
-   * expected value in a map.  If no value exists for the key, check that
-   * a default value is given (if supplied) and that an
-   * {@link IllegalArgumentException} is thrown if no default is supplied.
-   */
-  private <K, V> void checkGetOnlyForKey(
-      K key,
-      HashMap<K, Matcher<V>> matchers,
-      CoGbkResult row,
-      TupleTag<V> tag,
-      V defaultValue) {
-    if (matchers.containsKey(key)) {
-      assertThat(row.getOnly(tag), matchers.get(key));
-    } else {
-      assertThat(row.getOnly(tag, defaultValue), equalTo(defaultValue));
-      try {
-        row.getOnly(tag);
-        fail();
-      } catch (IllegalArgumentException e) {
-        // if no value exists, an IllegalArgumentException should be thrown
-      }
-
-    }
+    p.run();
   }
 
   /**
-   * Returns a PCollection<KV<Integer, CoGbkResult>> containing the
-   * results of the CoGbk over 3 PCollection<KV<Integer, String>>,
-   * each of which correlates a customer id to purchases, addresses, or names,
-   * respectively.
+   * Returns a {@code PCollection<KV<Integer, CoGbkResult>>} containing the
+   * results of the {@code CoGroupByKey} over three
+   * {@code PCollection<KV<Integer, String>>}, each of which correlates
+   * a customer id to purchases, addresses, or names, respectively.
    */
   private PCollection<KV<Integer, CoGbkResult>> buildPurchasesCoGbk(
       Pipeline p,
@@ -248,8 +204,8 @@ private PCollection<KV<Integer, CoGbkResult>> buildPurchasesCoGbk(
   }
 
   /**
-   * Returns a PCollection<KV<Integer, CoGbkResult>> containing the
-   * results of the CoGbk over 2 PCollection<KV<Integer, String>>,
+   * Returns a {@code PCollection<KV<Integer, CoGbkResult>>} containing the
+   * results of the {@code CoGroupByKey} over 2 {@code PCollection<KV<Integer, String>>},
    * each of which correlates a customer id to clicks, purchases, respectively.
    */
   private PCollection<KV<Integer, CoGbkResult>> buildPurchasesCoGbkWithWindowing(
@@ -302,82 +258,53 @@ private PCollection<KV<Integer, CoGbkResult>> buildPurchasesCoGbkWithWindowing(
 
   @Test
   public void testCoGroupByKey() {
-    TupleTag<String> namesTag = new TupleTag<>();
-    TupleTag<String> addressesTag = new TupleTag<>();
-    TupleTag<String> purchasesTag = new TupleTag<>();
+    final TupleTag<String> namesTag = new TupleTag<>();
+    final TupleTag<String> addressesTag = new TupleTag<>();
+    final TupleTag<String> purchasesTag = new TupleTag<>();
 
-    DirectPipeline p = DirectPipeline.createForTest();
+    Pipeline p = TestPipeline.create();
 
     PCollection<KV<Integer, CoGbkResult>> coGbkResults =
         buildPurchasesCoGbk(p, purchasesTag, addressesTag, namesTag);
 
-    EvaluationResults results = p.run();
+    DataflowAssert.thatMap(coGbkResults).satisfies(
+        new SerializableFunction<Map<Integer, CoGbkResult>, Void>() {
+          @Override
+          public Void apply(Map<Integer, CoGbkResult> results) {
+            CoGbkResult result1 = results.get(1);
+            assertEquals("John Smith", result1.getOnly(namesTag));
+            assertThat(result1.getAll(purchasesTag), containsInAnyOrder("Shoes", "Book"));
 
-    List<KV<Integer, CoGbkResult>> finalResult =
-        results.getPCollection(coGbkResults);
+            CoGbkResult result2 = results.get(2);
+            assertEquals("Sally James", result2.getOnly(namesTag));
+            assertEquals("53 S. 3rd", result2.getOnly(addressesTag));
+            assertThat(result2.getAll(purchasesTag), containsInAnyOrder("Suit", "Boat"));
 
-    HashMap<Integer, Matcher<Iterable<? extends String>>> namesMatchers =
-        new HashMap<Integer, Matcher<Iterable<? extends String>>>() {
-      {
-        put(1, containsInAnyOrder("John Smith"));
-        put(2, containsInAnyOrder("Sally James"));
-        put(8, containsInAnyOrder("Jeffery Spalding"));
-        put(20, containsInAnyOrder("Joan Lichtfield"));
-      }
-    };
-
-    HashMap<Integer, Matcher<Iterable<? extends String>>> addressesMatchers =
-        new HashMap<Integer, Matcher<Iterable<? extends String>>>() {
-      {
-        put(2, containsInAnyOrder("53 S. 3rd"));
-        put(3, containsInAnyOrder("29 School Rd"));
-        put(8, containsInAnyOrder("6 Watling Rd"));
-        put(10, containsInAnyOrder("383 Jackson Street"));
-        put(20, containsInAnyOrder("3 W. Arizona"));
-      }
-    };
-
-    HashMap<Integer, Matcher<Iterable<? extends String>>> purchasesMatchers =
-        new HashMap<Integer, Matcher<Iterable<? extends String>>>() {
-      {
-        put(1, containsInAnyOrder("Shoes", "Book"));
-        put(2, containsInAnyOrder("Suit", "Boat"));
-        put(3, containsInAnyOrder("Car", "House"));
-        put(4, containsInAnyOrder("Suit"));
-        put(8, containsInAnyOrder("House", "Suit Case"));
-        put(10, containsInAnyOrder("Pens"));
-        put(11, containsInAnyOrder("House"));
-        put(14, containsInAnyOrder("Shoes"));
-      }
-    };
+            CoGbkResult result3 = results.get(3);
+            assertEquals("29 School Rd", result3.getOnly(addressesTag), "29 School Rd");
+            assertThat(result3.getAll(purchasesTag), containsInAnyOrder("Car", "House"));
 
-    // TODO: Figure out a way to do a hamcrest matcher for CoGbkResults.
-    for (KV<Integer, CoGbkResult> result : finalResult) {
-      int key = result.getKey();
-      CoGbkResult row = result.getValue();
-      checkValuesMatch(key, namesMatchers, row, namesTag);
-      checkValuesMatch(key, addressesMatchers, row, addressesTag);
-      checkValuesMatch(key, purchasesMatchers, row, purchasesTag);
+            CoGbkResult result8 = results.get(8);
+            assertEquals("Jeffery Spalding", result8.getOnly(namesTag));
+            assertEquals("6 Watling Rd", result8.getOnly(addressesTag));
+            assertThat(result8.getAll(purchasesTag), containsInAnyOrder("House", "Suit Case"));
 
-    }
+            CoGbkResult result20 = results.get(20);
+            assertEquals("Joan Lichtfield", result20.getOnly(namesTag));
+            assertEquals("3 W. Arizona", result20.getOnly(addressesTag));
 
-  }
+            assertEquals("383 Jackson Street", results.get(10).getOnly(addressesTag));
 
-  /**
-   * Checks that the values for the given tag in the given row matches the
-   * expected values for the given key in the given matchers map.
-   */
-  private <K, V> void checkValuesMatch(
-      K key,
-      HashMap<K, Matcher<Iterable<? extends V>>> matchers,
-      CoGbkResult row,
-      TupleTag<V> tag) {
-    Iterable<V> taggedValues = row.getAll(tag);
-    if (taggedValues.iterator().hasNext()) {
-      assertThat(taggedValues, matchers.get(key));
-    } else {
-      assertNull(matchers.get(key));
-    }
+            assertThat(results.get(4).getAll(purchasesTag), containsInAnyOrder("Suit"));
+            assertThat(results.get(10).getAll(purchasesTag), containsInAnyOrder("Pens"));
+            assertThat(results.get(11).getAll(purchasesTag), containsInAnyOrder("House"));
+            assertThat(results.get(14).getAll(purchasesTag), containsInAnyOrder("Shoes"));
+
+            return null;
+          }
+        });
+
+    p.run();
   }
 
   /**
@@ -511,6 +438,7 @@ public void testConsumingDoFn() {
                     KV.of(2, result2),
                     KV.of(3, result3),
                     KV.of(4, result4));
+
     assertThat(results, containsInAnyOrder(KV.of("4a", 2), KV.of("8a", 0)));
   }
 

From a09fe139f353fad324b05920aad7635fdfbc3099 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Wed, 22 Apr 2015 11:39:18 -0700
Subject: [PATCH 0469/1541] Add a healthz page to the streaming dataflow worker
 to support health monitoring

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91807949
---
 .../worker/StreamingDataflowWorker.java        | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 5812125e63578..3cbb42e0d2f3e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -567,15 +567,15 @@ public void handle(
 
       responseWriter.println("<html><body>");
 
-      printHeader(responseWriter);
-
-      printMetrics(responseWriter);
-
-      printResources(responseWriter);
-
-      printLastException(responseWriter);
-
-      printSpecs(responseWriter);
+      if (target.equals("/healthz")) {
+        responseWriter.println("ok");
+      } else {
+        printHeader(responseWriter);
+        printMetrics(responseWriter);
+        printResources(responseWriter);
+        printLastException(responseWriter);
+        printSpecs(responseWriter);
+      }
 
       responseWriter.println("</body></html>");
     }

From 68b16796c59e04d2f4183fc7d6d4ad18beed9ca0 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 23 Apr 2015 10:46:18 -0700
Subject: [PATCH 0470/1541] Remove use of Guava's Supplier from DatastoreIO
 ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=91900536

---
 .../com/google/cloud/dataflow/sdk/io/DatastoreIO.java     | 7 +++----
 .../com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java | 8 +-------
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 802577f167d8a..5ccbd52b37aa1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -55,7 +55,6 @@
 import com.google.cloud.dataflow.sdk.util.RetryHttpRequestInitializer;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.Preconditions;
-import com.google.common.base.Supplier;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -196,7 +195,7 @@ public static class Source extends BoundedSource<Entity> {
 
     /** For testing only. */
     private QuerySplitter mockSplitter;
-    private Supplier<Long> mockEstimateSizeBytes;
+    private Long mockEstimateSizeBytes;
 
     private Source(String host, String datasetId, Query query) {
       this.host = host;
@@ -228,7 +227,7 @@ public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
       // entity kind being queried, using the __Stat_Kind__ system table, assuming exactly 1 kind
       // is specified in the query.
       if (mockEstimateSizeBytes != null) {
-        return mockEstimateSizeBytes.get();
+        return mockEstimateSizeBytes;
       }
 
       Datastore datastore = getDatastore(options);
@@ -350,7 +349,7 @@ Source withMockSplitter(QuerySplitter splitter) {
     }
 
     /** For testing only. */
-    public Source withMockEstimateSizeBytes(Supplier<Long> estimateSizeBytes) {
+    public Source withMockEstimateSizeBytes(Long estimateSizeBytes) {
       Source res = new Source(host, datasetId, query);
       res.mockSplitter = mockSplitter;
       res.mockEstimateSizeBytes = estimateSizeBytes;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
index 82ae4ea06b1eb..01634b0058df3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
@@ -42,7 +42,6 @@
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.Write;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
-import com.google.common.base.Supplier;
 import com.google.common.collect.Lists;
 
 import org.junit.Before;
@@ -155,12 +154,7 @@ public void testQuerySplitWithMockSplitter() throws Exception {
             .withDataset(dataset)
             .withQuery(query)
             .withMockSplitter(splitter)
-            .withMockEstimateSizeBytes(new Supplier<Long>() {
-              @Override
-              public Long get() {
-                return 8 * 1024L;
-              }
-            });
+            .withMockEstimateSizeBytes(8 * 1024L);
 
     List<DatastoreIO.Source> bundles = io.splitIntoBundles(1024, options);
     assertEquals(8, bundles.size());

From 22a51132c912349ab14c8de2b464bf9b251a46c0 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Thu, 23 Apr 2015 11:29:37 -0700
Subject: [PATCH 0471/1541] Updated XML integration test to work for non-GCS
 paths as well.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91905312
---
 .../java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
index abcd3ab9d59b6..0e1fb1d7fdec6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
@@ -102,7 +102,7 @@ public static GcsPath fromUri(URI uri) {
    * <p> This is used to separate the components.  Verification is handled
    * separately.
    */
-  private static final Pattern GCS_URI =
+  public static final Pattern GCS_URI =
       Pattern.compile("(?<SCHEME>[^:]+)://(?<BUCKET>[^/]+)(/(?<OBJECT>.*))?");
 
   /**

From 13a0a9838211ea68da9b56ad32fd67d112a20fb8 Mon Sep 17 00:00:00 2001
From: jlewi <jlewi@google.com>
Date: Thu, 23 Apr 2015 12:00:47 -0700
Subject: [PATCH 0472/1541] Stop setting TaskRunnerSettings in the SDK request.

* TaskRunnerSettings is no longer used to pass the Dataflow endpoint to the workers.
* The Dataflow endpoint is set using DataflowPipelineOptions.
* Stop setting the Dataflow endpoint and root url based on the specific flags.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91908512
---
 .../sdk/options/PipelineOptionsFactory.java   |  36 ------
 .../runners/DataflowPipelineTranslator.java   |  12 --
 .../options/PipelineOptionsFactoryTest.java   |  14 ---
 .../worker/DataflowWorkerHarnessTest.java     | 105 +++++++-----------
 4 files changed, 43 insertions(+), 124 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 2c95fe843b87d..8429c7f426e8d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -804,42 +804,6 @@ public static DataflowWorkerHarnessOptions createFromSystemProperties() throws I
       options.setJobId(System.getProperty("job_id"));
     }
 
-    // TODO: Remove setting these options once we have migrated to passing
-    // through the pipeline options.
-    if (System.getProperties().containsKey("root_url")) {
-      options.setApiRootUrl(System.getProperty("root_url"));
-    }
-    if (System.getProperties().containsKey("service_path")) {
-      options.setDataflowEndpoint(System.getProperty("service_path"));
-    }
-    if (System.getProperties().containsKey("temp_gcs_directory")) {
-      options.setTempLocation(System.getProperty("temp_gcs_directory"));
-    }
-    if (System.getProperties().containsKey("service_account_name")) {
-      options.setServiceAccountName(System.getProperty("service_account_name"));
-    }
-    if (System.getProperties().containsKey("service_account_keyfile")) {
-      options.setServiceAccountKeyfile(System.getProperty("service_account_keyfile"));
-    }
-    if (System.getProperties().containsKey("project_id")) {
-      options.setProject(System.getProperty("project_id"));
-    }
-    if (System.getProperties().containsKey("path_validator_class")) {
-      try {
-        options.setPathValidatorClass((Class) Class.forName(
-            System.getProperty("path_validator_class")));
-      } catch (ClassNotFoundException e) {
-        throw new RuntimeException("Unable to find validator class", e);
-      }
-    }
-    if (System.getProperties().containsKey("credential_factory_class")) {
-      try {
-        options.setCredentialFactoryClass((Class) Class.forName(
-            System.getProperty("credential_factory_class")));
-      } catch (ClassNotFoundException e) {
-        throw new RuntimeException("Unable to find credential factory class", e);
-      }
-    }
     return options;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index f328511964562..c89e0e783f38e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -36,9 +36,7 @@
 import com.google.api.services.dataflow.model.Environment;
 import com.google.api.services.dataflow.model.Job;
 import com.google.api.services.dataflow.model.Step;
-import com.google.api.services.dataflow.model.TaskRunnerSettings;
 import com.google.api.services.dataflow.model.WorkerPool;
-import com.google.api.services.dataflow.model.WorkerSettings;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -376,16 +374,6 @@ public Job translate(List<DataflowPackage> packages) {
         workerPool.setTeardownPolicy(options.getTeardownPolicy().getTeardownPolicyName());
       }
 
-      // Pass the URL and endpoint to use to the worker pool.
-      WorkerSettings workerSettings = new WorkerSettings();
-      workerSettings.setBaseUrl(options.getApiRootUrl());
-      workerSettings.setServicePath(options.getDataflowEndpoint());
-
-      TaskRunnerSettings taskRunnerSettings = new TaskRunnerSettings();
-      taskRunnerSettings.setParallelWorkerSettings(workerSettings);
-
-      workerPool.setTaskrunnerSettings(taskRunnerSettings);
-
       // Config Cloud Debugger
       if (!Strings.isNullOrEmpty(options.getCdbgVersion())) {
         String cdbgVersion = options.getCdbgVersion();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index bfab61390c820..c326d50463a8c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -69,28 +69,14 @@ public void testAutomaticRegistrationOfRunners() {
   public void testCreationFromSystemProperties() throws Exception {
     System.getProperties().putAll(ImmutableMap
         .<String, String>builder()
-        .put("root_url", "test_root_url")
-        .put("service_path", "test_service_path")
-        .put("temp_gcs_directory",
-            "gs://tap-testing-30lsaafg6g3zudmjbnsdz6wj/unittesting/staging")
-        .put("service_account_name", "test_service_account_name")
-        .put("service_account_keyfile", "test_service_account_keyfile")
         .put("worker_id", "test_worker_id")
-        .put("project_id", "test_project_id")
         .put("job_id", "test_job_id")
         // Set a non-default value for testing
         .put("sdk_pipeline_options", "{\"options\":{\"numWorkers\":999}}")
         .build());
 
     DataflowWorkerHarnessOptions options = PipelineOptionsFactory.createFromSystemProperties();
-    assertEquals("test_root_url", options.getApiRootUrl());
-    assertEquals("test_service_path", options.getDataflowEndpoint());
-    assertEquals("gs://tap-testing-30lsaafg6g3zudmjbnsdz6wj/unittesting/staging",
-        options.getTempLocation());
-    assertEquals("test_service_account_name", options.getServiceAccountName());
-    assertEquals("test_service_account_keyfile", options.getServiceAccountKeyfile());
     assertEquals("test_worker_id", options.getWorkerId());
-    assertEquals("test_project_id", options.getProject());
     assertEquals("test_job_id", options.getJobId());
     assertEquals(999, options.getNumWorkers());
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
index 9001b09125261..1c7f8f3d9bf9b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
@@ -42,7 +42,6 @@
 import com.google.cloud.dataflow.sdk.util.TestCredential;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
 
 import org.junit.Before;
@@ -71,6 +70,10 @@ public class DataflowWorkerHarnessTest {
 
   private Dataflow service;
 
+  private static final String PROJECT_ID = "TEST_PROJECT_ID";
+  private static final String JOB_ID = "TEST_JOB_ID";
+  private static final String WORKER_ID = "TEST_WORKER_ID";
+
   @Before
   public void setUp() throws Exception {
     MockitoAnnotations.initMocks(this);
@@ -95,44 +98,36 @@ public void testThatWeRetryIfTaskExecutionFailAgainAndAgain() throws Exception {
 
   @Test
   public void testCreationOfWorkerHarness() throws Exception {
-    System.getProperties().putAll(ImmutableMap
-        .<String, String>builder()
-        .put("project_id", "projectId")
-        .put("job_id", "jobId")
-        .put("worker_id", "workerId")
-        .build());
-    DataflowWorkerHarnessOptions options =
-        PipelineOptionsFactory.createFromSystemPropertiesInternal();
-    options.setGcpCredential(new TestCredential());
-    assertNotNull(DataflowWorkerHarness.create(options));
-    assertEquals("jobId", DataflowWorkerLoggingFormatter.getJobId());
-    assertEquals("workerId", DataflowWorkerLoggingFormatter.getWorkerId());
+    pipelineOptions.setProject(PROJECT_ID);
+    pipelineOptions.setJobId(JOB_ID);
+    pipelineOptions.setWorkerId(WORKER_ID);
+    pipelineOptions.setGcpCredential(new TestCredential());
+
+    assertNotNull(DataflowWorkerHarness.create(pipelineOptions));
+    assertEquals(JOB_ID, DataflowWorkerLoggingFormatter.getJobId());
+    assertEquals(WORKER_ID, DataflowWorkerLoggingFormatter.getWorkerId());
   }
 
   @Test
   public void testCloudServiceCall() throws Exception {
-    System.getProperties().putAll(ImmutableMap
-        .<String, String>builder()
-        .put("project_id", "projectId")
-        .put("job_id", "jobId")
-        .put("worker_id", "workerId")
-        .build());
-    WorkItem workItem = createWorkItem("projectId", "jobId");
+    pipelineOptions.setProject(PROJECT_ID);
+    pipelineOptions.setJobId(JOB_ID);
+    pipelineOptions.setWorkerId(WORKER_ID);
+    pipelineOptions.setGcpCredential(new TestCredential());
 
-    when(request.execute()).thenReturn(generateMockResponse(workItem));
+    WorkItem workItem = createWorkItem(PROJECT_ID, JOB_ID);
 
-    DataflowWorkerHarnessOptions options =
-        PipelineOptionsFactory.createFromSystemPropertiesInternal();
+    when(request.execute()).thenReturn(generateMockResponse(workItem));
 
     DataflowWorker.WorkUnitClient client =
-        new DataflowWorkerHarness.DataflowWorkUnitClient(service, options);
+        new DataflowWorkerHarness.DataflowWorkUnitClient(service, pipelineOptions);
 
     assertEquals(workItem, client.getWorkItem());
 
     LeaseWorkItemRequest actualRequest = Transport.getJsonFactory().fromString(
         request.getContentAsString(), LeaseWorkItemRequest.class);
-    assertEquals("workerId", actualRequest.getWorkerId());
-    assertEquals(ImmutableList.<String>of("workerId", "remote_source", "custom_source"),
+    assertEquals(WORKER_ID, actualRequest.getWorkerId());
+    assertEquals(ImmutableList.<String>of(WORKER_ID, "remote_source", "custom_source"),
         actualRequest.getWorkerCapabilities());
     assertEquals(ImmutableList.<String>of("map_task", "seq_map_task", "remote_source_task"),
         actualRequest.getWorkItemTypes());
@@ -141,30 +136,25 @@ public void testCloudServiceCall() throws Exception {
 
   @Test
   public void testCloudServiceCallNoWorkId() throws Exception {
-    System.getProperties().putAll(ImmutableMap
-        .<String, String>builder()
-        .put("project_id", "projectId")
-        .put("job_id", "jobId")
-        .put("worker_id", "workerId")
-        .build());
+    pipelineOptions.setProject(PROJECT_ID);
+    pipelineOptions.setJobId(JOB_ID);
+    pipelineOptions.setWorkerId(WORKER_ID);
+    pipelineOptions.setGcpCredential(new TestCredential());
 
     // If there's no work the service should return an empty work item.
     WorkItem workItem = new WorkItem();
 
     when(request.execute()).thenReturn(generateMockResponse(workItem));
 
-    DataflowWorkerHarnessOptions options =
-        PipelineOptionsFactory.createFromSystemPropertiesInternal();
-
     DataflowWorker.WorkUnitClient client =
-        new DataflowWorkerHarness.DataflowWorkUnitClient(service, options);
+        new DataflowWorkerHarness.DataflowWorkUnitClient(service, pipelineOptions);
 
     assertNull(client.getWorkItem());
 
     LeaseWorkItemRequest actualRequest = Transport.getJsonFactory().fromString(
         request.getContentAsString(), LeaseWorkItemRequest.class);
-    assertEquals("workerId", actualRequest.getWorkerId());
-    assertEquals(ImmutableList.<String>of("workerId", "remote_source", "custom_source"),
+    assertEquals(WORKER_ID, actualRequest.getWorkerId());
+    assertEquals(ImmutableList.<String>of(WORKER_ID, "remote_source", "custom_source"),
         actualRequest.getWorkerCapabilities());
     assertEquals(ImmutableList.<String>of("map_task", "seq_map_task",  "remote_source_task"),
         actualRequest.getWorkItemTypes());
@@ -172,27 +162,22 @@ public void testCloudServiceCallNoWorkId() throws Exception {
 
   @Test
   public void testCloudServiceCallNoWorkItem() throws Exception {
-    System.getProperties().putAll(ImmutableMap
-        .<String, String>builder()
-        .put("project_id", "projectId")
-        .put("job_id", "jobId")
-        .put("worker_id", "workerId")
-        .build());
+    pipelineOptions.setProject(PROJECT_ID);
+    pipelineOptions.setJobId(JOB_ID);
+    pipelineOptions.setWorkerId(WORKER_ID);
+    pipelineOptions.setGcpCredential(new TestCredential());
 
     when(request.execute()).thenReturn(generateMockResponse());
 
-    DataflowWorkerHarnessOptions options =
-        PipelineOptionsFactory.createFromSystemPropertiesInternal();
-
     DataflowWorker.WorkUnitClient client =
-        new DataflowWorkerHarness.DataflowWorkUnitClient(service, options);
+        new DataflowWorkerHarness.DataflowWorkUnitClient(service, pipelineOptions);
 
     assertNull(client.getWorkItem());
 
     LeaseWorkItemRequest actualRequest = Transport.getJsonFactory().fromString(
         request.getContentAsString(), LeaseWorkItemRequest.class);
-    assertEquals("workerId", actualRequest.getWorkerId());
-    assertEquals(ImmutableList.<String>of("workerId", "remote_source", "custom_source"),
+    assertEquals(WORKER_ID, actualRequest.getWorkerId());
+    assertEquals(ImmutableList.<String>of(WORKER_ID, "remote_source", "custom_source"),
         actualRequest.getWorkerCapabilities());
     assertEquals(ImmutableList.<String>of("map_task", "seq_map_task",  "remote_source_task"),
         actualRequest.getWorkItemTypes());
@@ -203,23 +188,19 @@ public void testCloudServiceCallMultipleWorkItems() throws Exception {
     expectedException.expect(IOException.class);
     expectedException.expectMessage(
         "This version of the SDK expects no more than one work item from the service");
-    System.getProperties().putAll(ImmutableMap
-        .<String, String>builder()
-        .put("project_id", "projectId")
-        .put("job_id", "jobId")
-        .put("worker_id", "workerId")
-        .build());
+    pipelineOptions.setProject(PROJECT_ID);
+    pipelineOptions.setJobId(JOB_ID);
+    pipelineOptions.setWorkerId(WORKER_ID);
+    pipelineOptions.setGcpCredential(new TestCredential());
 
-    WorkItem workItem1 = createWorkItem("projectId", "jobId");
-    WorkItem workItem2 = createWorkItem("projectId", "jobId");
 
-    when(request.execute()).thenReturn(generateMockResponse(workItem1, workItem2));
+    WorkItem workItem1 = createWorkItem(PROJECT_ID, JOB_ID);
+    WorkItem workItem2 = createWorkItem(PROJECT_ID, JOB_ID);
 
-    DataflowWorkerHarnessOptions options =
-        PipelineOptionsFactory.createFromSystemPropertiesInternal();
+    when(request.execute()).thenReturn(generateMockResponse(workItem1, workItem2));
 
     DataflowWorker.WorkUnitClient client =
-        new DataflowWorkerHarness.DataflowWorkUnitClient(service, options);
+        new DataflowWorkerHarness.DataflowWorkUnitClient(service, pipelineOptions);
 
     client.getWorkItem();
   }

From 3380740a01a8602e3bd2cf26f4f84c968249aa45 Mon Sep 17 00:00:00 2001
From: samuelw <samuelw@google.com>
Date: Thu, 23 Apr 2015 15:51:48 -0700
Subject: [PATCH 0473/1541] Filter debug messages in MonitoringUtil.

Fix labeling of WARNING and ERROR messages and label DETAILED messages
for consistent spacing.
----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91930762
---
 .../dataflow/sdk/util/MonitoringUtil.java     | 47 ++++++++++---------
 1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
index 41ca6e6bec993..5dce27277cf19 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
@@ -82,27 +82,33 @@ public PrintHandler(PrintStream stream) {
     @Override
     public void process(List<JobMessage> messages) {
       for (JobMessage message : messages) {
-        StringBuilder sb = new StringBuilder();
-        if (message.getMessageText() != null && !message.getMessageText().isEmpty()) {
-          if (message.getMessageImportance() != null) {
-            if (message.getMessageImportance().equals("ERROR")) {
-              sb.append("Error: ");
-            } else if (message.getMessageImportance().equals("WARNING")) {
-              sb.append("Warning: ");
-            }
-          }
-          // TODO: Allow filtering out overly detailed messages.
-          sb.append(message.getMessageText());
+        if (message.getMessageText() == null || message.getMessageText().isEmpty()) {
+          continue;
+        }
+        String importanceString = null;
+        if (message.getMessageImportance() == null) {
+          continue;
+        } else if (message.getMessageImportance().equals("JOB_MESSAGE_ERROR")) {
+          importanceString = "Error:   ";
+        } else if (message.getMessageImportance().equals("JOB_MESSAGE_WARNING")) {
+          importanceString = "Warning: ";
+        } else if (message.getMessageImportance().equals("JOB_MESSAGE_DETAILED")) {
+          importanceString = "Detail:  ";
+        } else {
+          // TODO: Remove filtering here once getJobMessages supports minimum
+          // importance.
+          continue;
         }
-        if (sb.length() > 0) {
-          @Nullable Instant time = fromCloudTime(message.getTime());
-          if (time == null) {
-            out.print("UNKNOWN TIMESTAMP: ");
-          } else {
-            out.print(time + ": ");
-          }
-          out.println(sb.toString());
+        @Nullable Instant time = TimeUtil.fromCloudTime(message.getTime());
+        if (time == null) {
+          out.print("UNKNOWN TIMESTAMP: ");
+        } else {
+          out.print(time + ": ");
         }
+        if (importanceString != null) {
+          out.print(importanceString);
+        }
+        out.println(message.getMessageText());
       }
       out.flush();
     }
@@ -147,6 +153,7 @@ public int compare(JobMessage o1, JobMessage o2) {
    */
   public ArrayList<JobMessage> getJobMessages(
       String jobId, long startTimestampMs) throws IOException {
+    // TODO: Allow filtering messages by importance
     Instant startTimestamp = new Instant(startTimestampMs);
     ArrayList<JobMessage> allMessages = new ArrayList<>();
     String pageToken = null;
@@ -200,6 +207,4 @@ public static State toState(String stateName) {
     return MoreObjects.firstNonNull(DATAFLOW_STATE_TO_JOB_STATE.get(stateName),
         State.UNKNOWN);
   }
-
 }
-

From 9941d6fdd79477b64092397447cca67053c480f3 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Thu, 23 Apr 2015 17:40:16 -0700
Subject: [PATCH 0474/1541] Create aggregators on the SDK Client

Aggregators are now created via a method in DoFn, rather than being
context-dependent. Context should be provided to Aggregators within the
code that is executing the DoFn.

Add createAggregatorInternal to DoFn.Context. This method is called by
the DoFn.Context#setUpDelegateAggregators method, which should be called
when the Context is constructed, before being provided to the DoFn.

Update DoFnRunner to provide the Dataflow runner Aggregator
implementations before calling StartBundle.

----Release Notes----
(Backwards Incompatible) Replace DoFn.Context#createAggregator with
DoFn#createAggregator.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=91940223
---
 .../examples/CombinePerKeyExamples.java       |   8 +-
 .../cloud/dataflow/examples/WordCount.java    |   8 +-
 .../dataflow/sdk/transforms/Aggregator.java   |  29 ++-
 .../dataflow/sdk/transforms/Combine.java      |  14 +-
 .../cloud/dataflow/sdk/transforms/DoFn.java   | 172 +++++++++++++++---
 .../IntraBundleParallelization.java           |  19 +-
 .../cloud/dataflow/sdk/transforms/Max.java    |  28 ++-
 .../cloud/dataflow/sdk/transforms/Min.java    |  28 ++-
 .../cloud/dataflow/sdk/transforms/Sum.java    |  18 ++
 .../dataflow/sdk/util/AggregatorImpl.java     | 111 -----------
 .../dataflow/sdk/util/CounterAggregator.java  |  96 ++++++++++
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |  44 ++---
 .../sdk/util/common/CounterProvider.java      |  26 +++
 .../dataflow/sdk/transforms/CombineTest.java  |   5 +
 .../sdk/transforms/DoFnContextTest.java       |  69 +++++++
 .../DoFnDelegatingAggregatorTest.java         | 143 +++++++++++++++
 .../dataflow/sdk/transforms/DoFnTest.java     | 118 ++++++++++++
 .../dataflow/sdk/transforms/NoOpDoFn.java     | 144 +++++++++++++++
 ...plTest.java => CounterAggregatorTest.java} | 112 ++++++++----
 19 files changed, 943 insertions(+), 249 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AggregatorImpl.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CounterAggregator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterProvider.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnContextTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnDelegatingAggregatorTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/NoOpDoFn.java
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/util/{AggregatorImplTest.java => CounterAggregatorTest.java} (69%)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java
index f62e12a3f49b3..6f59e74a2e2cf 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java
@@ -81,12 +81,8 @@ public class CombinePerKeyExamples {
   static class ExtractLargeWordsFn extends DoFn<TableRow, KV<String, String>> {
     private static final long serialVersionUID = 0;
 
-    private Aggregator<Long> smallerWords;
-
-    @Override
-    public void startBundle(Context c) {
-      smallerWords = c.createAggregator("smallerWords", new Sum.SumLongFn());
-    }
+    private final Aggregator<Long, Long> smallerWords =
+        createAggregator("smallerWords", new Sum.SumLongFn());
 
     @Override
     public void processElement(ProcessContext c){
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
index 29bc514aef08c..1300d88920958 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
@@ -62,12 +62,8 @@ public class WordCount {
   static class ExtractWordsFn extends DoFn<String, String> {
     private static final long serialVersionUID = 0;
 
-    private Aggregator<Long> emptyLines;
-
-    @Override
-    public void startBundle(Context c) {
-      emptyLines = c.createAggregator("emptyLines", new Sum.SumLongFn());
-    }
+    private final Aggregator<Long, Long> emptyLines =
+        createAggregator("emptyLines", new Sum.SumLongFn());
 
     @Override
     public void processElement(ProcessContext c) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
index 10f3bfd25bd7a..8050b59401d5f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
@@ -16,12 +16,14 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+
 /**
  * An {@code Aggregator<VI>} enables monitoring of values of type {@code VI},
  * to be combined across all bundles.
  *
- * <p> Aggregators are created by calling {@link DoFn.Context#createAggregator},
- * typically from {@link DoFn#startBundle}. Elements can be added to the
+ * <p> Aggregators are created by calling {@link DoFn#createAggregator},
+ * typically from the {@link DoFn} constructor. Elements can be added to the
  * {@code Aggregator} by calling {@link Aggregator#addValue}.
  *
  * <p> Aggregators are visible in the monitoring UI, when the pipeline is run
@@ -33,11 +35,10 @@
  * <p> Example:
  * <pre> {@code
  * class MyDoFn extends DoFn<String, String> {
- *   private Aggregator<Integer> myAggregator;
+ *   private Aggregator<Integer, Integer> myAggregator;
  *
- *   {@literal @}Override
- *   public void startBundle(Context c) {
- *     myAggregator = c.createAggregator("myCounter", new Sum.SumIntegerFn());
+ *   public MyDoFn() {
+ *     myAggregator = createAggregator("myCounter", new Sum.SumIntegerFn());
  *   }
  *
  *   {@literal @}Override
@@ -48,13 +49,25 @@
  * } </pre>
  *
  * @param <VI> the type of input values
+ * @param <VO> the type of output values
  */
-public interface Aggregator<VI> {
+public interface Aggregator<VI, VO> {
 
   /**
    * Adds a new value into the Aggregator.
    */
-  public void addValue(VI value);
+  void addValue(VI value);
+
+  /**
+   * Returns the name of the Aggregator.
+   */
+  String getName();
+
+  /**
+   * Returns the {@link CombineFn}, which combines input elements in the
+   * aggregator.
+   */
+  CombineFn<VI, ?, VO> getCombineFn();
 
   // TODO: Consider the following additional API conveniences:
   // - In addition to createAggregator(), consider adding getAggregator() to
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 04d65cc45f11c..46977a74030c0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -29,6 +29,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.util.common.CounterProvider;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
@@ -470,8 +471,8 @@ public Coder<VO> getDefaultOutputCoder(
    * An abstract subclass of {@link CombineFn} for implementing combiners that are more
    * easily expressed as binary operations.
    */
-  public abstract static class BinaryCombineFn<V>
-      extends CombineFn<V, BinaryCombineFn.Holder<V>, V> {
+  public abstract static class BinaryCombineFn<V> extends
+      CombineFn<V, BinaryCombineFn.Holder<V>, V> {
 
     /**
      * Applies the binary operation to the two operands, returning the result.
@@ -576,7 +577,8 @@ public void set(V value) {
    * An abstract subclass of {@link CombineFn} for implementing combiners that are more
    * easily expressed as binary operations on ints.
    */
-  public abstract static class BinaryCombineIntegerFn extends CombineFn<Integer, int[], Integer> {
+  public abstract static class BinaryCombineIntegerFn extends
+      CombineFn<Integer, int[], Integer> implements CounterProvider<Integer> {
 
     /**
      * Applies the binary operation to the two operands, returning the result.
@@ -646,7 +648,8 @@ private int[] wrap(int value) {
    * An abstract subclass of {@link CombineFn} for implementing combiners that are more
    * easily expressed as binary operations on longs.
    */
-  public abstract static class BinaryCombineLongFn extends CombineFn<Long, long[], Long> {
+  public abstract static class BinaryCombineLongFn extends
+      CombineFn<Long, long[], Long> implements CounterProvider<Long> {
 
     /**
      * Applies the binary operation to the two operands, returning the result.
@@ -715,7 +718,8 @@ private long[] wrap(long value) {
    * An abstract subclass of {@link CombineFn} for implementing combiners that are more
    * easily expressed as binary operations on doubles.
    */
-  public abstract static class BinaryCombineDoubleFn extends CombineFn<Double, double[], Double> {
+  public abstract static class BinaryCombineDoubleFn extends
+      CombineFn<Double, double[], Double> implements CounterProvider<Double> {
 
     /**
      * Applies the binary operation to the two operands, returning the result.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index b11100ef5c574..4c0aa1ef69bd8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -16,15 +16,21 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.WindowingInternals;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.reflect.TypeToken;
 
 import org.joda.time.Duration;
@@ -32,6 +38,8 @@
 
 import java.io.IOException;
 import java.io.Serializable;
+import java.util.HashMap;
+import java.util.Map;
 
 /**
  * The argument to {@link ParDo} providing the code to use to process
@@ -153,42 +161,43 @@ public abstract <T> void sideOutputWithTimestamp(
         TupleTag<T> tag, T output, Instant timestamp);
 
     /**
-     * Returns an aggregator with aggregation logic specified by the CombineFn
-     * argument. The name provided should be unique across aggregators created
-     * within the containing ParDo transform application.
+     * Creates an {@link Aggregator} in the {@link DoFn} context with the
+     * specified name and aggregation logic specified by {@link CombineFn}.
      *
-     * <p> All instances of this DoFn in the containing ParDo
-     * transform application should define aggregators consistently,
-     * i.e., an aggregator with a given name always specifies the same
-     * combiner in all DoFn instances in the containing ParDo
-     * transform application.
+     * <p>For internal use only.
      *
-     * @throws IllegalArgumentException if the given CombineFn is not
-     * supported as aggregator's combiner, or if the given name collides
-     * with another aggregator or system-provided counter.
+     * @param name the name of the aggregator
+     * @param combiner the {@link CombineFn} to use in the aggregator
+     * @return an aggregator for the provided name and {@link CombineFn} in this
+     *         context
      */
-    @Experimental(Experimental.Kind.AGGREGATOR)
-    public abstract <AI, AA, AO> Aggregator<AI> createAggregator(
-        String name, Combine.CombineFn<? super AI, AA, AO> combiner);
+    @Experimental(Kind.AGGREGATOR)
+    protected abstract <VI, VO> Aggregator<VI, VO> createAggregatorInternal(
+        String name,
+        CombineFn<VI, ?, VO> combiner);
 
     /**
-     * Returns an aggregator with aggregation logic specified by the
-     * SerializableFunction argument. The name provided should be unique across
-     * aggregators created within the containing ParDo transform application.
-     *
-     * <p> All instances of this DoFn in the containing ParDo
-     * transform application should define aggregators consistently,
-     * i.e., an aggregator with a given name always specifies the same
-     * combiner in all DoFn instances in the containing ParDo
-     * transform application.
+     * Sets up {@link Aggregator}s created by the {@link DoFn} so they are
+     * usable within this context.
      *
-     * @throws IllegalArgumentException if the given SerializableFunction is
-     * not supported as aggregator's combiner, or if the given name collides
-     * with another aggregator or system-provided counter.
+     * <p>This method should be called by runners before {@link DoFn#startBundle}
+     * is executed.
      */
-    @Experimental(Experimental.Kind.AGGREGATOR)
-    public abstract <AI, AO> Aggregator<AI> createAggregator(
-        String name, SerializableFunction<Iterable<AI>, AO> combiner);
+    @Experimental(Kind.AGGREGATOR)
+    protected final void setupDelegateAggregators() {
+      for (DelegatingAggregator<?, ?> aggregator : aggregators.values()) {
+        setupDelegateAggregator(aggregator);
+      }
+    }
+
+    private final <VI, VO> void setupDelegateAggregator(
+        DelegatingAggregator<VI, VO> aggregator) {
+
+      Aggregator<VI, VO> delegate = createAggregatorInternal(
+          aggregator.getName(), aggregator.getCombineFn());
+
+      aggregator.setDelegate(delegate);
+    }
   }
 
   /**
@@ -336,6 +345,8 @@ public interface KeyedState {
 
   /////////////////////////////////////////////////////////////////////////////
 
+  private Map<String, DelegatingAggregator<?, ?>> aggregators = new HashMap<>();
+
   /**
    * Prepares this {@code DoFn} instance for processing a batch of elements.
    *
@@ -386,4 +397,107 @@ TypeToken<I> getInputTypeToken() {
   TypeToken<O> getOutputTypeToken() {
     return new TypeToken<O>(getClass()) {};
   }
+
+  /**
+   * Returns an {@link Aggregator} with aggregation logic specified by the
+   * {@link CombineFn} argument. The name provided must be unique across
+   * {@link Aggregator}s created within the DoFn.
+   *
+   * @param name the name of the aggregator
+   * @param combiner the {@link CombineFn} to use in the aggregator
+   * @return an aggregator for the provided name and combiner in the scope of
+   *         this DoFn
+   * @throws NullPointerException if the name or combiner is null
+   * @throws IllegalArgumentException if the given name collides with another
+   *         aggregator in this scope
+   */
+  protected final <VI, VO> Aggregator<VI, VO> createAggregator(String name,
+      CombineFn<? super VI, ?, VO> combiner) {
+    checkNotNull(name, "name cannot be null");
+    checkNotNull(combiner, "combiner cannot be null");
+    checkArgument(!aggregators.containsKey(name),
+        "Cannot create aggregator with name %s."
+        + " An Aggregator with that name already exists within this scope.",
+        name);
+    DelegatingAggregator<VI, VO> aggregator =
+        new DelegatingAggregator<>(name, combiner);
+    aggregators.put(name, aggregator);
+    return aggregator;
+  }
+
+  /**
+   * Returns an {@link Aggregator} with the aggregation logic specified by the
+   * {@link SerializableFunction} argument. The name provided must be unique
+   * across {@link Aggregator}s created within the DoFn.
+   *
+   * @param name the name of the aggregator
+   * @param combiner the {@link SerializableFunction} to use in the aggregator
+   * @return an aggregator for the provided name and combiner in the scope of
+   *         this DoFn
+   * @throws NullPointerException if the name or combiner is null
+   * @throws IllegalArgumentException if the given name collides with another
+   *         aggregator in this scope
+   */
+  protected final <VI> Aggregator<VI, VI> createAggregator(String name,
+      SerializableFunction<Iterable<VI>, VI> combiner) {
+    checkNotNull(combiner, "combiner cannot be null.");
+    return createAggregator(name, Combine.SimpleCombineFn.of(combiner));
+  }
+
+  /**
+   * An {@link Aggregator} that delegates calls to addValue to another
+   * aggregator.
+   *
+   * @param <VI> the type of input element
+   * @param <VO> the type of output element
+   */
+  @VisibleForTesting
+  static class DelegatingAggregator<VI, VO> implements
+      Aggregator<VI, VO>, Serializable {
+    private static final long serialVersionUID = 0L;
+
+    private final String name;
+
+    private final CombineFn<VI, ?, VO> combineFn;
+
+    private Aggregator<VI, ?> delegate;
+
+    public DelegatingAggregator(String name,
+        CombineFn<? super VI, ?, VO> combiner) {
+      this.name = name;
+      // Safe contravariant cast
+      @SuppressWarnings("unchecked")
+      CombineFn<VI, ?, VO> specificCombiner = (CombineFn<VI, ?, VO>) combiner;
+      this.combineFn = specificCombiner;
+    }
+
+    @Override
+    public void addValue(VI value) {
+      if (delegate == null) {
+        throw new IllegalStateException(
+            "addValue cannot be called on Aggregator outside of the execution of a DoFn.");
+      } else {
+        delegate.addValue(value);
+      }
+    }
+
+    @Override
+    public String getName() {
+      return name;
+    }
+
+    @Override
+    public CombineFn<VI, ?, VO> getCombineFn() {
+      return combineFn;
+    }
+
+    /**
+     * Sets the current delegate of the Aggregator.
+     *
+     * @param delegate the delegate to set in this aggregator
+     */
+    public void setDelegate(Aggregator<VI, ?> delegate) {
+      this.delegate = delegate;
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
index 6aa20d881275f..070d1bfc764f1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
@@ -19,6 +19,7 @@
 import com.google.api.client.util.Throwables;
 import com.google.cloud.dataflow.sdk.options.GcsOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.WindowingInternals;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -307,18 +308,6 @@ public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant times
         }
       }
 
-      @Override
-      public <AI, AA, AO> Aggregator<AI> createAggregator(
-          String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
-        return context.createAggregator(name, combiner);
-      }
-
-      @Override
-      public <AI, AO> Aggregator<AI> createAggregator(
-          String name, SerializableFunction<Iterable<AI>, AO> combiner) {
-        return context.createAggregator(name, combiner);
-      }
-
       @Override
       public Instant timestamp() {
         return context.timestamp();
@@ -333,6 +322,12 @@ public BoundedWindow window() {
       public WindowingInternals<I, O> windowingInternals() {
         return context.windowingInternals();
       }
+
+      @Override
+      protected <VI, VO> Aggregator<VI, VO> createAggregatorInternal(
+          String name, CombineFn<VI, ?, VO> combiner) {
+        return context.createAggregatorInternal(name, combiner);
+      }
     }
 
     private final DoFn<I, O> doFn;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
index 387e398dc97d0..c30f8f153adec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
@@ -16,6 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind;
+import com.google.cloud.dataflow.sdk.util.common.CounterProvider;
+
 /**
  * {@code PTransform}s for computing the maximum of the elements in a
  * {@code PCollection}, or the maximum of the values associated with
@@ -173,8 +177,14 @@ public N identity() {
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
   @SuppressWarnings("serial")
-  public static class MaxIntegerFn extends MaxFn<Integer> {
+  public static class MaxIntegerFn extends MaxFn<Integer> implements
+      CounterProvider<Integer> {
     public MaxIntegerFn() { super(Integer.MIN_VALUE); }
+
+    @Override
+    public Counter<Integer> getCounter(String name) {
+      return Counter.ints(name, AggregationKind.MAX);
+    }
   }
 
   /**
@@ -183,8 +193,14 @@ public static class MaxIntegerFn extends MaxFn<Integer> {
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
   @SuppressWarnings("serial")
-  public static class MaxLongFn extends MaxFn<Long> {
+  public static class MaxLongFn extends MaxFn<Long> implements
+      CounterProvider<Long> {
     public MaxLongFn() { super(Long.MIN_VALUE); }
+
+    @Override
+    public Counter<Long> getCounter(String name) {
+      return Counter.longs(name, AggregationKind.MAX);
+    }
   }
 
   /**
@@ -193,7 +209,13 @@ public static class MaxLongFn extends MaxFn<Long> {
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
   @SuppressWarnings("serial")
-  public static class MaxDoubleFn extends MaxFn<Double> {
+  public static class MaxDoubleFn extends MaxFn<Double> implements
+      CounterProvider<Double> {
     public MaxDoubleFn() { super(Double.NEGATIVE_INFINITY); }
+
+    @Override
+    public Counter<Double> getCounter(String name) {
+      return Counter.doubles(name, AggregationKind.MAX);
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
index 1d550eb4f46af..5e7466cee9b60 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
@@ -16,6 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind;
+import com.google.cloud.dataflow.sdk.util.common.CounterProvider;
+
 /**
  * {@code PTransform}s for computing the minimum of the elements in a
  * {@code PCollection}, or the minimum of the values associated with
@@ -172,10 +176,16 @@ public N identity() {
    * of {@code Integer}s, useful as an argument to
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
-  public static class MinIntegerFn extends MinFn<Integer> {
+  public static class MinIntegerFn extends MinFn<Integer> implements
+      CounterProvider<Integer> {
     private static final long serialVersionUID = 0;
 
     public MinIntegerFn() { super(Integer.MAX_VALUE); }
+
+    @Override
+    public Counter<Integer> getCounter(String name) {
+      return Counter.ints(name, AggregationKind.MIN);
+    }
   }
 
   /**
@@ -183,10 +193,16 @@ public static class MinIntegerFn extends MinFn<Integer> {
    * of {@code Long}s, useful as an argument to
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
-  public static class MinLongFn extends MinFn<Long> {
+  public static class MinLongFn extends MinFn<Long> implements
+      CounterProvider<Long> {
     private static final long serialVersionUID = 0;
 
     public MinLongFn() { super(Long.MAX_VALUE); }
+
+    @Override
+    public Counter<Long> getCounter(String name) {
+      return Counter.longs(name, AggregationKind.MIN);
+    }
   }
 
   /**
@@ -194,9 +210,15 @@ public static class MinLongFn extends MinFn<Long> {
    * of {@code Double}s, useful as an argument to
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
-  public static class MinDoubleFn extends MinFn<Double> {
+  public static class MinDoubleFn extends MinFn<Double> implements
+      CounterProvider<Double> {
     private static final long serialVersionUID = 0;
 
     public MinDoubleFn() { super(Double.POSITIVE_INFINITY); }
+
+    @Override
+    public Counter<Double> getCounter(String name) {
+      return Counter.doubles(name, AggregationKind.MIN);
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
index 98f2adb2b20a1..39cd7a94a48ee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
@@ -16,6 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind;
+
 /**
  * {@code PTransform}s for computing the sum of the elements in a
  * {@code PCollection}, or the sum of the values associated with
@@ -142,6 +145,11 @@ public int apply(int a, int b) {
     public int identity() {
       return 0;
     }
+
+    @Override
+    public Counter<Integer> getCounter(String name) {
+      return Counter.ints(name, AggregationKind.SUM);
+    }
   }
 
   /**
@@ -159,6 +167,11 @@ public long apply(long a, long b) {
     public long identity() {
       return 0;
     }
+
+    @Override
+    public Counter<Long> getCounter(String name) {
+      return Counter.longs(name, AggregationKind.SUM);
+    }
   }
 
   /**
@@ -176,5 +189,10 @@ public double apply(double a, double b) {
     public double identity() {
       return 0;
     }
+
+    @Override
+    public Counter<Double> getCounter(String name) {
+      return Counter.doubles(name, AggregationKind.SUM);
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AggregatorImpl.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AggregatorImpl.java
deleted file mode 100644
index 3c48700b319f1..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AggregatorImpl.java
+++ /dev/null
@@ -1,111 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MAX;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MIN;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Max;
-import com.google.cloud.dataflow.sdk.transforms.Min;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-
-/**
- * An implementation of the {@code Aggregator} interface.
- *
- * @param <VI> the type of input values
- * @param <VA> the type of accumulator values
- * @param <VO> the type of output value
- */
-public class AggregatorImpl<VI, VA, VO> implements Aggregator<VI> {
-
-  private final Counter<VI> counter;
-
-  /*
-   * Constructs a new aggregator with the given name and aggregation logic
-   * specified in the CombineFn argument. The underlying counter is
-   * automatically added into the provided CounterSet.
-   *
-   * <p> If a counter with the same name already exists, it will be
-   * reused, as long as it has the same type.
-   */
-  public AggregatorImpl(String name,
-                        CombineFn<? super VI, VA, VO> combiner,
-                        CounterSet.AddCounterMutator addCounterMutator) {
-    this((Counter<VI>) constructCounter(name, combiner), addCounterMutator);
-  }
-
-  /*
-   * Constructs a new aggregator with the given name and aggregation logic
-   * specified in the SerializableFunction argument. The underlying counter is
-   * automatically added into the provided CounterSet.
-   *
-   * <p> If a counter with the same name already exists, it will be
-   * reused, as long as it has the same type.
-   */
-  public AggregatorImpl(String name,
-                        SerializableFunction<Iterable<VI>, VO> combiner,
-                        CounterSet.AddCounterMutator addCounterMutator) {
-    this((Counter<VI>) constructCounter(name, combiner), addCounterMutator);
-  }
-
-  private AggregatorImpl(Counter<VI> counter,
-                         CounterSet.AddCounterMutator addCounterMutator) {
-    try {
-      this.counter = addCounterMutator.addCounter(counter);
-    } catch (IllegalArgumentException ex) {
-      throw new IllegalArgumentException(
-          "aggregator's name collides with an existing aggregator "
-          + "or system-provided counter of an incompatible type");
-    }
-  }
-
-  private static Counter<?> constructCounter(String name, Object combiner) {
-    if (combiner.getClass() == Sum.SumIntegerFn.class) {
-      return Counter.ints(name, SUM);
-    } else if (combiner.getClass() == Sum.SumLongFn.class) {
-      return Counter.longs(name, SUM);
-    } else if (combiner.getClass() == Sum.SumDoubleFn.class) {
-      return Counter.doubles(name, SUM);
-    } else if (combiner.getClass() == Min.MinIntegerFn.class) {
-      return Counter.ints(name, MIN);
-    } else if (combiner.getClass() == Min.MinLongFn.class) {
-      return Counter.longs(name, MIN);
-    } else if (combiner.getClass() == Min.MinDoubleFn.class) {
-      return Counter.doubles(name, MIN);
-    } else if (combiner.getClass() == Max.MaxIntegerFn.class) {
-      return Counter.ints(name, MAX);
-    } else if (combiner.getClass() == Max.MaxLongFn.class) {
-      return Counter.longs(name, MAX);
-    } else if (combiner.getClass() == Max.MaxDoubleFn.class) {
-      return Counter.doubles(name, MAX);
-    } else {
-      throw new IllegalArgumentException("unsupported combiner in Aggregator: "
-        + combiner.getClass().getName());
-    }
-  }
-
-  @Override
-  public void addValue(VI value) {
-    counter.addValue(value);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CounterAggregator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CounterAggregator.java
new file mode 100644
index 0000000000000..21718840499d8
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CounterAggregator.java
@@ -0,0 +1,96 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Max;
+import com.google.cloud.dataflow.sdk.transforms.Min;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterProvider;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+
+/**
+ * An implementation of the {@code Aggregator} interface that uses a
+ * {@link Counter} as the underlying representation. Supports {@link CombineFn}s
+ * from the {@link Sum}, {@link Min} and {@link Max} classes.
+ *
+ * @param <VI> the type of input values
+ * @param <VA> the type of accumulator values
+ * @param <VO> the type of output value
+ */
+public class CounterAggregator<VI, VA, VO> implements Aggregator<VI, VO> {
+
+  private final Counter<VI> counter;
+  private final CombineFn<VI, VA, VO> combiner;
+
+  /**
+   * Constructs a new aggregator with the given name and aggregation logic
+   * specified in the CombineFn argument. The underlying counter is
+   * automatically added into the provided CounterSet.
+   *
+   *  <p> If a counter with the same name already exists, it will be reused, as
+   * long as it has the same type.
+   */
+  public CounterAggregator(String name, CombineFn<? super VI, VA, VO> combiner,
+      CounterSet.AddCounterMutator addCounterMutator) {
+    // Safe contravariant cast
+    this(constructCounter(name, combiner), addCounterMutator,
+        (CombineFn<VI, VA, VO>) combiner);
+  }
+
+  private CounterAggregator(Counter<VI> counter,
+      CounterSet.AddCounterMutator addCounterMutator,
+      CombineFn<VI, VA, VO> combiner) {
+    try {
+      this.counter = addCounterMutator.addCounter(counter);
+    } catch (IllegalArgumentException ex) {
+      throw new IllegalArgumentException(
+          "aggregator's name collides with an existing aggregator "
+          + "or system-provided counter of an incompatible type");
+    }
+    this.combiner = combiner;
+  }
+
+  private static <T> Counter<T> constructCounter(String name,
+      CombineFn<? super T, ?, ?> combiner) {
+    if (combiner instanceof CounterProvider) {
+      @SuppressWarnings("unchecked")
+      CounterProvider<T> counterProvider = (CounterProvider<T>) combiner;
+      return counterProvider.getCounter(name);
+    } else {
+      throw new IllegalArgumentException("unsupported combiner in Aggregator: "
+        + combiner.getClass().getName());
+    }
+  }
+
+  @Override
+  public void addValue(VI value) {
+    counter.addValue(value);
+  }
+
+  @Override
+  public String getName() {
+    return counter.getName();
+  }
+
+  @Override
+  public CombineFn<VI, ?, VO> getCombineFn() {
+    return combiner;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 8e5bdcd2ddb34..294cfcaa3a989 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -21,12 +21,11 @@
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresKeyedState;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
@@ -238,6 +237,7 @@ public DoFnContext(PipelineOptions options,
       this.stepContext = stepContext;
       this.addCounterMutator = addCounterMutator;
       this.windowFn = windowFn;
+      super.setupDelegateAggregators();
     }
 
     public R getReceiver(TupleTag<?> tag) {
@@ -372,20 +372,14 @@ private String generateInternalAggregatorName(String userName) {
     }
 
     @Override
-    public <AI, AA, AO> Aggregator<AI> createAggregator(
-        String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
-      Preconditions.checkNotNull(combiner, "Combiner passed to createAggregator cannot be null");
-      return new AggregatorImpl<>(
-          generateInternalAggregatorName(name), combiner, addCounterMutator);
+    protected <VI, VO> Aggregator<VI, VO> createAggregatorInternal(String name,
+        CombineFn<VI, ?, VO> combiner) {
+      Preconditions.checkNotNull(combiner,
+          "Combiner passed to createAggregator cannot be null");
+      return new CounterAggregator<>(generateInternalAggregatorName(name),
+          combiner, addCounterMutator);
     }
 
-    @Override
-    public <AI, AO> Aggregator<AI> createAggregator(
-        String name, SerializableFunction<Iterable<AI>, AO> combiner) {
-      Preconditions.checkNotNull(combiner, "Combiner passed to createAggregator cannot be null");
-      return new AggregatorImpl<AI, Iterable<AI>, AO>(
-          generateInternalAggregatorName(name), combiner, addCounterMutator);
-    }
   }
 
   /**
@@ -499,20 +493,6 @@ public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant times
       context.sideOutputWindowedValue(tag, output, timestamp, windowedValue.getWindows());
     }
 
-    @Override
-    public <AI, AA, AO> Aggregator<AI> createAggregator(
-        String name, Combine.CombineFn<? super AI, AA, AO> combiner) {
-      Preconditions.checkNotNull(combiner, "Combiner passed to createAggregator cannot be null");
-      return context.createAggregator(name, combiner);
-    }
-
-    @Override
-    public <AI, AO> Aggregator<AI> createAggregator(
-        String name, SerializableFunction<Iterable<AI>, AO> combiner) {
-      Preconditions.checkNotNull(combiner, "Combiner passed to createAggregator cannot be null");
-      return context.createAggregator(name, combiner);
-    }
-
     @Override
     public Instant timestamp() {
       return windowedValue.getTimestamp();
@@ -580,7 +560,7 @@ public <T> void writePCollectionViewData(
             TupleTag<?> tag,
             Iterable<WindowedValue<T>> data,
             Coder<T> elemCoder) throws IOException {
-          Coder<BoundedWindow> windowCoder = (Coder<BoundedWindow>) context.windowFn.windowCoder();
+          Coder<BoundedWindow> windowCoder = context.windowFn.windowCoder();
 
           context.stepContext.getExecutionContext().writePCollectionViewData(
               tag, data, IterableCoder.of(WindowedValue.getFullCoder(elemCoder, windowCoder)),
@@ -593,5 +573,11 @@ public <T> void store(CodedTupleTag<T> tag, T value, Instant timestamp) throws I
         }
       };
     }
+
+    @Override
+    protected <VI, VO> Aggregator<VI, VO> createAggregatorInternal(String name,
+        CombineFn<VI, ?, VO> combiner) {
+      return context.createAggregatorInternal(name, combiner);
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterProvider.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterProvider.java
new file mode 100644
index 0000000000000..ba53f80d208a9
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterProvider.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.common;
+
+/**
+ * A counter provider can provide {@link Counter} instances.
+ *
+ * @param <T> the input type of the counter.
+ */
+public interface CounterProvider<T> {
+  Counter<T> getCounter(String name);
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 9641f3d54370e..dc7dd60635617 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -42,6 +42,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -376,6 +377,10 @@ public void testBinaryCombineFnWithNulls() {
   private static final class TestProdInt extends Combine.BinaryCombineIntegerFn {
     public int apply(int left, int right) { return left * right; }
     public int identity() { return 1; }
+    @Override
+    public Counter<Integer> getCounter(String name) {
+      throw new UnsupportedOperationException();
+    }
   }
 
   private static final class TestProdObj extends Combine.BinaryCombineFn<Integer> {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnContextTest.java
new file mode 100644
index 0000000000000..39dca1815a51d
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnContextTest.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+/**
+ * Tests for {@link DoFn.Context}.
+ */
+@RunWith(JUnit4.class)
+public class DoFnContextTest {
+
+  @Mock
+  private Aggregator<Long, Long> agg;
+
+  private DoFn<Object, Object> fn;
+  private DoFn<Object, Object>.Context context;
+
+  @Before
+  public void setup() {
+    MockitoAnnotations.initMocks(this);
+
+    // Need to be real objects to call the constructor, and to reference the
+    // outer instance of DoFn
+    NoOpDoFn<Object, Object> noOpFn = new NoOpDoFn<>();
+    DoFn<Object, Object>.Context noOpContext = noOpFn.context();
+
+    fn = spy(noOpFn);
+    context = spy(noOpContext);
+  }
+
+  @Test
+  public void testSetupDelegateAggregatorsCreatesAndLinksDelegateAggregators() {
+    Sum.SumLongFn combiner = new Sum.SumLongFn();
+    Aggregator<Long, Long> delegateAggregator =
+        fn.createAggregator("test", combiner);
+
+    when(context.createAggregatorInternal("test", combiner)).thenReturn(agg);
+
+    context.setupDelegateAggregators();
+    delegateAggregator.addValue(1L);
+
+    verify(agg).addValue(1L);
+  }
+
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnDelegatingAggregatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnDelegatingAggregatorTest.java
new file mode 100644
index 0000000000000..0b82c51f7cfd3
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnDelegatingAggregatorTest.java
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Matchers.anyLong;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.verify;
+
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.DelegatingAggregator;
+
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+/**
+ * Tests for DoFn.DelegatingAggregator.
+ */
+@RunWith(JUnit4.class)
+public class DoFnDelegatingAggregatorTest {
+
+  @Mock
+  private Aggregator<Long, Long> delegate;
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Before
+  public void setup() {
+    MockitoAnnotations.initMocks(this);
+  }
+
+  @Test
+  public void testAddValueWithoutDelegateThrowsException() {
+    DoFn<Void, Void> doFn = new NoOpDoFn<>();
+
+    String name = "agg";
+    CombineFn<Double, ?, Double> combiner = mockCombineFn(Double.class);
+
+    DelegatingAggregator<Double, Double> aggregator =
+        (DelegatingAggregator<Double, Double>) doFn.createAggregator(name, combiner);
+
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("cannot be called");
+    thrown.expectMessage("DoFn");
+
+    aggregator.addValue(21.2);
+  }
+
+  @Test
+  public void testSetDelegateThenAddValueCallsDelegate() {
+    String name = "agg";
+    CombineFn<Long, ?, Long> combiner = mockCombineFn(Long.class);
+
+    DoFn<Void, Void> doFn = new NoOpDoFn<>();
+
+    DelegatingAggregator<Long, Long> aggregator =
+        (DelegatingAggregator<Long, Long>) doFn.createAggregator(name, combiner);
+
+    aggregator.setDelegate(delegate);
+
+    aggregator.addValue(12L);
+
+    verify(delegate).addValue(12L);
+  }
+
+  @Test
+  public void testSetDelegateWithExistingDelegateStartsDelegatingToSecond() {
+    String name = "agg";
+    CombineFn<Double, ?, Double> combiner = mockCombineFn(Double.class);
+
+    DoFn<Void, Void> doFn = new NoOpDoFn<>();
+
+    DelegatingAggregator<Double, Double> aggregator =
+        (DelegatingAggregator<Double, Double>) doFn.createAggregator(name, combiner);
+
+    @SuppressWarnings("unchecked")
+    Aggregator<Double, Double> secondDelegate =
+        mock(Aggregator.class, "secondDelegate");
+
+    aggregator.setDelegate(aggregator);
+    aggregator.setDelegate(secondDelegate);
+
+    aggregator.addValue(2.25);
+
+    verify(secondDelegate).addValue(2.25);
+    verify(delegate, never()).addValue(anyLong());
+  }
+
+  @Test
+  public void testGetNameReturnsName() {
+    String name = "agg";
+    CombineFn<Double, ?, Double> combiner = mockCombineFn(Double.class);
+
+    DoFn<Void, Void> doFn = new NoOpDoFn<>();
+
+    DelegatingAggregator<Double, Double> aggregator =
+        (DelegatingAggregator<Double, Double>) doFn.createAggregator(name, combiner);
+
+    assertEquals(name, aggregator.getName());
+  }
+
+  @Test
+  public void testGetCombineFnReturnsCombineFn() {
+    String name = "agg";
+    CombineFn<Double, ?, Double> combiner = mockCombineFn(Double.class);
+
+    DoFn<Void, Void> doFn = new NoOpDoFn<>();
+
+    DelegatingAggregator<Double, Double> aggregator =
+        (DelegatingAggregator<Double, Double>) doFn.createAggregator(name, combiner);
+
+    assertEquals(combiner, aggregator.getCombineFn());
+  }
+
+  @SuppressWarnings("unchecked")
+  private static <T> CombineFn<T, ?, T> mockCombineFn(
+      @SuppressWarnings("unused") Class<T> clazz) {
+    return mock(CombineFn.class);
+  }
+
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnTest.java
new file mode 100644
index 0000000000000..b275cd2252792
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnTest.java
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for DoFn.
+ */
+@RunWith(JUnit4.class)
+public class DoFnTest {
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void testCreateAggregatorWithCombinerSucceeds() {
+    String name = "testAggregator";
+    Sum.SumLongFn combiner = new Sum.SumLongFn();
+
+    DoFn<Void, Void> doFn = new NoOpDoFn<>();
+
+    Aggregator<Long, Long> aggregator = doFn.createAggregator(name, combiner);
+
+    assertEquals(name, aggregator.getName());
+    assertEquals(combiner, aggregator.getCombineFn());
+  }
+
+  @Test
+  public void testCreateAggregatorWithNullNameThrowsException() {
+    thrown.expect(NullPointerException.class);
+    thrown.expectMessage("name cannot be null");
+
+    DoFn<Void, Void> doFn = new NoOpDoFn<>();
+
+    doFn.createAggregator(null, new Sum.SumLongFn());
+  }
+
+  @Test
+  public void testCreateAggregatorWithNullCombineFnThrowsException() {
+    CombineFn<Object, Object, Object> combiner = null;
+
+    thrown.expect(NullPointerException.class);
+    thrown.expectMessage("combiner cannot be null");
+
+    DoFn<Void, Void> doFn = new NoOpDoFn<>();
+
+    doFn.createAggregator("testAggregator", combiner);
+  }
+
+  @Test
+  public void testCreateAggregatorWithNullSerializableFnThrowsException() {
+    SerializableFunction<Iterable<Object>, Object> combiner = null;
+
+    thrown.expect(NullPointerException.class);
+    thrown.expectMessage("combiner cannot be null");
+
+    DoFn<Void, Void> doFn = new NoOpDoFn<>();
+
+    doFn.createAggregator("testAggregator", combiner);
+  }
+
+  @Test
+  public void testCreateAggregatorWithSameNameThrowsException() {
+    String name = "testAggregator";
+    CombineFn<Double, ?, Double> combiner = new Max.MaxDoubleFn();
+
+    DoFn<Void, Void> doFn = new NoOpDoFn<>();
+
+    doFn.createAggregator(name, combiner);
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Cannot create");
+    thrown.expectMessage(name);
+    thrown.expectMessage("already exists");
+
+    doFn.createAggregator(name, combiner);
+  }
+
+  @Test
+  public void testCreateAggregatorsWithDifferentNamesSucceeds() {
+    String nameOne = "testAggregator";
+    String nameTwo = "aggregatorPrime";
+    CombineFn<Double, ?, Double> combiner = new Max.MaxDoubleFn();
+
+    DoFn<Void, Void> doFn = new NoOpDoFn<>();
+
+    Aggregator<Double, Double> aggregatorOne =
+        doFn.createAggregator(nameOne, combiner);
+    Aggregator<Double, Double> aggregatorTwo =
+        doFn.createAggregator(nameTwo, combiner);
+
+    assertNotEquals(aggregatorOne, aggregatorTwo);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/NoOpDoFn.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/NoOpDoFn.java
new file mode 100644
index 0000000000000..316273408e4b7
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/NoOpDoFn.java
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.WindowingInternals;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import org.joda.time.Instant;
+
+/**
+ * A {@link DoFn} that does nothing with provided elements. Used for testing
+ * methods provided by the DoFn abstract class.
+ *
+ * @param <I> unused.
+ * @param <O> unused.
+ */
+class NoOpDoFn<I, O> extends DoFn<I, O> {
+  private static final long serialVersionUID = 0L;
+
+  @Override
+  public void processElement(DoFn<I, O>.ProcessContext c) throws Exception {
+  }
+
+  /**
+   * Returns a new NoOp Context.
+   */
+  public DoFn<I, O>.Context context() {
+    return new NoOpDoFnContext();
+  }
+
+  /**
+   * Returns a new NoOp Process Context.
+   */
+  public DoFn<I, O>.ProcessContext processContext() {
+    return new NoOpDoFnProcessContext();
+  }
+
+  /**
+   * A {@link DoFn.Context} that does nothing and returns exclusively null.
+   */
+  private class NoOpDoFnContext extends DoFn<I, O>.Context {
+    @Override
+    public PipelineOptions getPipelineOptions() {
+      return null;
+    }
+    @Override
+    public void output(O output) {
+    }
+    @Override
+    public void outputWithTimestamp(O output, Instant timestamp) {
+    }
+    @Override
+    public <T> void sideOutput(TupleTag<T> tag, T output) {
+    }
+    @Override
+    public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output,
+        Instant timestamp) {
+    }
+    @Override
+    protected <VI, VO> Aggregator<VI, VO> createAggregatorInternal(String name,
+        CombineFn<VI, ?, VO> combiner) {
+      return null;
+    }
+  }
+
+  /**
+   * A {@link DoFn.ProcessContext} that does nothing and returns exclusively
+   * null.
+   */
+  private class NoOpDoFnProcessContext extends DoFn<I, O>.ProcessContext {
+    @Override
+    public I element() {
+      return null;
+    }
+
+    @Override
+    public <T> T sideInput(PCollectionView<T> view) {
+      return null;
+    }
+
+    @Override
+    public com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState keyedState() {
+      return null;
+    }
+
+    @Override
+    public Instant timestamp() {
+      return null;
+    }
+
+    @Override
+    public BoundedWindow window() {
+      return null;
+    }
+
+    @Override
+    public WindowingInternals<I, O> windowingInternals() {
+      return null;
+    }
+
+    @Override
+    public PipelineOptions getPipelineOptions() {
+      return null;
+    }
+
+    @Override
+    public void output(O output) {}
+
+    @Override
+    public void outputWithTimestamp(O output, Instant timestamp) {}
+
+    @Override
+    public <T> void sideOutput(TupleTag<T> tag, T output) {}
+
+    @Override
+    public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output,
+        Instant timestamp) {}
+
+    @Override
+    protected <VI, VO> Aggregator<VI, VO> createAggregatorInternal(String name,
+        CombineFn<VI, ?, VO> combiner) {
+      return null;
+    }
+
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java
similarity index 69%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java
index 7f51dd6232eab..47c99daead0d4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AggregatorImplTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java
@@ -19,16 +19,25 @@
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MAX;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MIN;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+import static org.mockito.Mockito.withSettings;
 
 import com.google.api.services.dataflow.model.MetricUpdate;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Combine.SimpleCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Max;
 import com.google.cloud.dataflow.sdk.transforms.Min;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterProvider;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
 import com.google.cloud.dataflow.sdk.util.common.CounterTestUtils;
 
 import org.hamcrest.Matchers;
@@ -47,35 +56,18 @@
  */
 @RunWith(JUnit4.class)
 @SuppressWarnings("serial")
-public class AggregatorImplTest {
+public class CounterAggregatorTest {
   @Rule
   public final ExpectedException expectedEx = ExpectedException.none();
 
   private static final String AGGREGATOR_NAME = "aggregator_name";
 
-  @SuppressWarnings("rawtypes")
-  private <V> void testAggregator(List<V> items,
-                                  SerializableFunction<Iterable<V>, V> combiner,
-                                  Counter expectedCounter) {
-    CounterSet counters = new CounterSet();
-    Aggregator<V> aggregator = new AggregatorImpl<V, Iterable<V>, V>(
-        AGGREGATOR_NAME, combiner, counters.getAddCounterMutator());
-    for (V item : items) {
-      aggregator.addValue(item);
-    }
-
-    List<MetricUpdate> cloudCounterSet = CounterTestUtils.extractCounterUpdates(counters, false);
-    Assert.assertEquals(cloudCounterSet.size(), 1);
-    Assert.assertEquals(cloudCounterSet.get(0),
-                        CounterTestUtils.extractCounterUpdate(expectedCounter, false));
-  }
-
   @SuppressWarnings("rawtypes")
   private <V, VA> void testAggregator(List<V> items,
                                       Combine.CombineFn<V, VA, V> combiner,
                                       Counter expectedCounter) {
     CounterSet counters = new CounterSet();
-    Aggregator<V> aggregator = new AggregatorImpl<V, VA, V>(
+    Aggregator<V, V> aggregator = new CounterAggregator<>(
         AGGREGATOR_NAME, combiner, counters.getAddCounterMutator());
     for (V item : items) {
       aggregator.addValue(item);
@@ -88,6 +80,27 @@ private <V, VA> void testAggregator(List<V> items,
   }
 
   @Test
+  public void testGetName() {
+    String name = "testAgg";
+    CounterAggregator<Long, long[], Long> aggregator = new CounterAggregator<>(
+        name, new Sum.SumLongFn(),
+        new CounterSet().getAddCounterMutator());
+
+    assertEquals(name, aggregator.getName());
+  }
+
+  @Test
+  public void testGetCombineFn() {
+    CombineFn<Long, ?, Long> combineFn = new Min.MinLongFn();
+
+    CounterAggregator<Long, ?, Long> aggregator = new CounterAggregator<>("foo",
+        combineFn, new CounterSet().getAddCounterMutator());
+
+    assertEquals(combineFn, aggregator.getCombineFn());
+  }
+
+  @Test
+
   public void testSumInteger() throws Exception {
     testAggregator(Arrays.asList(2, 4, 1, 3), new Sum.SumIntegerFn(),
                    Counter.ints(AGGREGATOR_NAME, SUM).resetToValue(10));
@@ -141,18 +154,42 @@ public void testMaxDouble() throws Exception {
                    Counter.doubles(AGGREGATOR_NAME, MAX).resetToValue(4.1));
   }
 
+  @Test
+  public void testCounterProviderCallsProvidedCounterAddValue() {
+    @SuppressWarnings("unchecked")
+    CombineFn<String, ?, String> combiner = mock(CombineFn.class,
+        withSettings().extraInterfaces(CounterProvider.class));
+    @SuppressWarnings("unchecked")
+    CounterProvider<String> provider = (CounterProvider<String>) combiner;
+
+    @SuppressWarnings("unchecked")
+    Counter<String> mockCounter = mock(Counter.class);
+    String name = "foo";
+    when(provider.getCounter(name)).thenReturn(mockCounter);
+
+    AddCounterMutator addCounterMutator = mock(AddCounterMutator.class);
+    when(addCounterMutator.addCounter(mockCounter)).thenReturn(mockCounter);
+
+    Aggregator<String, String> aggregator =
+        new CounterAggregator<>(name, combiner, addCounterMutator);
+
+    aggregator.addValue("bar_baz");
+
+    verify(mockCounter).addValue("bar_baz");
+    verify(addCounterMutator).addCounter(mockCounter);
+  }
+
+
   @Test
   public void testCompatibleDuplicateNames() throws Exception {
     CounterSet counters = new CounterSet();
-    Aggregator<Integer> aggregator1 =
-        new AggregatorImpl<Integer, int[], Integer>(
-            AGGREGATOR_NAME, new Sum.SumIntegerFn(),
-            counters.getAddCounterMutator());
+    Aggregator<Integer, Integer> aggregator1 = new CounterAggregator<>(
+        AGGREGATOR_NAME, new Sum.SumIntegerFn(),
+        counters.getAddCounterMutator());
 
-    Aggregator<Integer> aggregator2 =
-        new AggregatorImpl<Integer, int[], Integer>(
-            AGGREGATOR_NAME, new Sum.SumIntegerFn(),
-            counters.getAddCounterMutator());
+    Aggregator<Integer, Integer> aggregator2 = new CounterAggregator<>(
+        AGGREGATOR_NAME, new Sum.SumIntegerFn(),
+        counters.getAddCounterMutator());
 
     // The duplicate aggregators should update the same counter.
     aggregator1.addValue(3);
@@ -165,7 +202,7 @@ public void testCompatibleDuplicateNames() throws Exception {
   @Test
   public void testIncompatibleDuplicateNames() throws Exception {
     CounterSet counters = new CounterSet();
-    new AggregatorImpl<Integer, int[], Integer>(
+    new CounterAggregator<>(
         AGGREGATOR_NAME, new Sum.SumIntegerFn(),
         counters.getAddCounterMutator());
 
@@ -173,7 +210,7 @@ public void testIncompatibleDuplicateNames() throws Exception {
     expectedEx.expectMessage(Matchers.containsString(
         "aggregator's name collides with an existing aggregator or "
         + "system-provided counter of an incompatible type"));
-    new AggregatorImpl<Long, long[], Long>(
+    new CounterAggregator<>(
         AGGREGATOR_NAME, new Sum.SumLongFn(),
         counters.getAddCounterMutator());
     }
@@ -182,7 +219,7 @@ public void testIncompatibleDuplicateNames() throws Exception {
   public void testUnsupportedCombineFn() throws Exception {
     expectedEx.expect(IllegalArgumentException.class);
     expectedEx.expectMessage(Matchers.containsString("unsupported combiner"));
-    new AggregatorImpl<>(
+    new CounterAggregator<>(
         AGGREGATOR_NAME,
         new Combine.CombineFn<Integer, List<Integer>, Integer>() {
           @Override
@@ -197,20 +234,21 @@ public List<Integer> mergeAccumulators(Iterable<List<Integer>> accumulators) {
           }
           @Override
           public Integer extractOutput(List<Integer> accumulator) { return null; }
-        },
-        (new CounterSet()).getAddCounterMutator());
+        }, (new CounterSet()).getAddCounterMutator());
   }
 
   @Test
   public void testUnsupportedSerializableFunction() throws Exception {
     expectedEx.expect(IllegalArgumentException.class);
     expectedEx.expectMessage(Matchers.containsString("unsupported combiner"));
-    new AggregatorImpl<Integer, Iterable<Integer>, Integer>(
-        AGGREGATOR_NAME,
-        new SerializableFunction<Iterable<Integer>, Integer>() {
+    CombineFn<Integer, List<Integer>, Integer> combiner = SimpleCombineFn
+        .<Integer>of(new SerializableFunction<Iterable<Integer>, Integer>() {
           @Override
-          public Integer apply(Iterable<Integer> input) { return null; }
-        },
+          public Integer apply(Iterable<Integer> input) {
+            return null;
+          }
+        });
+    new CounterAggregator<>(AGGREGATOR_NAME, combiner,
         (new CounterSet()).getAddCounterMutator());
   }
 }

From b06caf4825aa52255aaf7e6ddae9c98489edcfd0 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 24 Apr 2015 10:32:47 -0700
Subject: [PATCH 0475/1541] Remove
 DataflowPipelineWorkerPoolOptions.setDiskSourceImage ----Release Notes---- []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=91994024

---
 .../sdk/options/DataflowPipelineWorkerPoolOptions.java   | 9 ---------
 .../dataflow/sdk/runners/DataflowPipelineTranslator.java | 3 ---
 2 files changed, 12 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index aabc91034be96..4ac3f54f599ef 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -25,15 +25,6 @@
  */
 @Description("Options that are used to configure the Dataflow pipeline worker pool.")
 public interface DataflowPipelineWorkerPoolOptions extends PipelineOptions {
-  /**
-   * Disk source image to use by VMs for jobs.
-   * @see <a href="https://developers.google.com/compute/docs/images">Compute Engine Images</a>
-   */
-  @Description("Disk source image to use by VMs for jobs. See "
-      + "https://developers.google.com/compute/docs/images for further details.")
-  String getDiskSourceImage();
-  void setDiskSourceImage(String value);
-
   /**
    * Number of workers to use when executing the Dataflow job.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index c89e0e783f38e..2d1dba496cedc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -405,9 +405,6 @@ public Job translate(List<DataflowPackage> packages) {
 
       workerPool.setPackages(packages);
       workerPool.setNumWorkers(options.getNumWorkers());
-      if (options.getDiskSourceImage() != null) {
-        workerPool.setDiskSourceImage(options.getDiskSourceImage());
-      }
 
       if (options.isStreaming()) {
         // Use separate data disk for streaming.

From 633fef36ecbef44a00e6e03f089ab9b4e8d26ef3 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 24 Apr 2015 15:02:45 -0700
Subject: [PATCH 0476/1541] Explicitly set -source parameter in Eclipse starter
 project pom.xml ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=92017848

---
 eclipse/starter/pom.xml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/eclipse/starter/pom.xml b/eclipse/starter/pom.xml
index 1f5553e74576c..237cbbfaf9a6d 100644
--- a/eclipse/starter/pom.xml
+++ b/eclipse/starter/pom.xml
@@ -7,6 +7,20 @@
   <artifactId>google-cloud-dataflow-starter</artifactId>
   <version>0.0.1-SNAPSHOT</version>
 
+  <build>
+   <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>3.3</version>
+        <configuration>
+          <source>7</source>
+          <target>7</target>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+
   <dependencies>
     <dependency>
       <groupId>com.google.cloud.dataflow</groupId>

From 894456e376ac0beed9e4530d709bba76c506565d Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 24 Apr 2015 15:11:48 -0700
Subject: [PATCH 0477/1541] Replace all use of null and Optional during Coder
 inference with a checked CannotProvideCoderException. Improve error messages
 throughout.

----Release Notes----
The use of nulls and Optional to indicate the inability to infer a Coder has been replaced with a CannotProvideCoderException. Any code that consumes or returns a coder will require adjustment to catch & throw this exception.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92018705
---
 .../cloud/dataflow/sdk/coders/AvroCoder.java  |   6 +-
 .../coders/CannotProvideCoderException.java   |  56 +++
 .../dataflow/sdk/coders/CoderProvider.java    |   5 +-
 .../dataflow/sdk/coders/CoderRegistry.java    | 326 +++++++++++-------
 .../sdk/coders/IterableLikeCoder.java         |   3 +
 .../sdk/coders/SerializableCoder.java         |  11 +-
 .../runners/DataflowPipelineTranslator.java   |  12 +-
 .../sdk/runners/DirectPipelineRunner.java     |  26 +-
 .../dataflow/sdk/transforms/Combine.java      |  45 ++-
 .../cloud/dataflow/sdk/transforms/Create.java |  47 ++-
 .../dataflow/sdk/transforms/PTransform.java   |  35 +-
 .../cloud/dataflow/sdk/transforms/ParDo.java  |   4 +-
 .../dataflow/sdk/transforms/WithKeys.java     |  21 +-
 .../dataflow/sdk/values/POutputValueBase.java |   2 +-
 .../dataflow/sdk/values/TypedPValue.java      |  45 +--
 .../sdk/coders/CoderRegistryTest.java         |  53 +--
 .../dataflow/sdk/coders/DefaultCoderTest.java |  11 +-
 .../dataflow/sdk/transforms/CreateTest.java   |   4 +-
 .../dataflow/sdk/transforms/ParDoTest.java    |  10 +-
 19 files changed, 471 insertions(+), 251 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CannotProvideCoderException.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
index f9fa35f2a314b..2df9137aa03cc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -20,7 +20,6 @@
 
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.common.base.Optional;
 import com.google.common.reflect.TypeToken;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
@@ -161,9 +160,8 @@ public static AvroCoder<?> of(
 
   public static final CoderProvider PROVIDER = new CoderProvider() {
     @Override
-    @SuppressWarnings("unchecked")
-    public <T> Optional<Coder<T>> getCoder(TypeToken<T> typeToken) {
-      return Optional.<Coder<T>>fromNullable(AvroCoder.of(typeToken));
+    public <T> Coder<T> getCoder(TypeToken<T> typeToken) {
+      return AvroCoder.of(typeToken);
     }
   };
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CannotProvideCoderException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CannotProvideCoderException.java
new file mode 100644
index 0000000000000..891dea552aca4
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CannotProvideCoderException.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+/**
+ * The exception thrown when a {@link CoderProvider} cannot
+ * provide a {@link Coder} that has been requested.
+ */
+public class CannotProvideCoderException extends Exception {
+  private static final long serialVersionUID = 0;
+
+  public CannotProvideCoderException(String message) {
+    super(message);
+  }
+
+  public CannotProvideCoderException(String message, Throwable cause) {
+    super(message, cause);
+  }
+
+  public CannotProvideCoderException(Throwable cause) {
+    super(cause);
+  }
+
+  /**
+   * Returns the inner-most {@link CannotProvideCoderException} when they are deeply nested.
+   *
+   * <p>For example, if a coder for {@code List<KV<Integer, Whatsit>>} cannot be provided because
+   * there is no known coder for {@code Whatsit}, the root cause of the exception should be a
+   * CannotProvideCoderException with details pertinent to {@code Whatsit}, suppressing the
+   * intermediate layers.
+   */
+  public Throwable getRootCause() {
+    Throwable cause = getCause();
+    if (cause == null) {
+      return this;
+    } else if (!(cause instanceof CannotProvideCoderException)) {
+      return cause;
+    } else {
+      return ((CannotProvideCoderException) cause).getRootCause();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProvider.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProvider.java
index be407ff582ee3..05071c1e5b65c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProvider.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProvider.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
-import com.google.common.base.Optional;
 import com.google.common.reflect.TypeToken;
 
 /**
@@ -27,6 +26,8 @@ public interface CoderProvider {
 
   /**
    * Provides a coder for a given class, if possible.
+   *
+   * @throws CannotProvideCoderException if no coder can be provided
    */
-  public <T> Optional<Coder<T>> getCoder(TypeToken<T> type);
+  public <T> Coder<T> getCoder(TypeToken<T> type) throws CannotProvideCoderException;
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index ac290d3b67331..326b8bc4f1eee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -21,7 +21,8 @@
 import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
-import com.google.common.base.Optional;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
 import com.google.common.reflect.TypeToken;
 
 import org.joda.time.Instant;
@@ -34,6 +35,7 @@
 import java.lang.reflect.TypeVariable;
 import java.lang.reflect.WildcardType;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
@@ -130,10 +132,11 @@ public void registerCoder(Class<?> rawClazz, Coder<?> coder) {
   }
 
   /**
-   * Returns the Coder to use by default for values of the given type,
-   * or null if there is no default Coder.
+   * Returns the Coder to use by default for values of the given type.
+   *
+   * @throws CannotProvideCoderException if there is no default Coder.
    */
-  public <T> Coder<T> getDefaultCoder(TypeToken<T> typeToken) {
+  public <T> Coder<T> getDefaultCoder(TypeToken<T> typeToken) throws CannotProvideCoderException {
     return getDefaultCoder(typeToken, Collections.<Type, Coder<?>>emptyMap());
   }
 
@@ -141,20 +144,23 @@ public <T> Coder<T> getDefaultCoder(TypeToken<T> typeToken) {
    * See {@link #getDefaultCoder(TypeToken)}.
    */
   @Override
-  public <T> Optional<Coder<T>> getCoder(TypeToken<T> typeToken) {
-    return Optional.fromNullable(getDefaultCoder(typeToken));
+  public <T> Coder<T> getCoder(TypeToken<T> typeToken) throws CannotProvideCoderException {
+    return getDefaultCoder(typeToken);
   }
 
   /**
    * Returns the Coder to use by default for values of the given type,
-   * where the given context type uses the given context coder,
-   * or null if there is no default Coder.
+   * where the given context type uses the given context coder.
+   *
+   * @throws CannotProvideCoderException if there is no default Coder.
    */
-  public <I, O> Coder<O> getDefaultCoder(TypeToken<O> typeToken,
-                                         TypeToken<I> contextTypeToken,
-                                         Coder<I> contextCoder) {
+  public <I, O> Coder<O> getDefaultCoder(
+      TypeToken<O> typeToken,
+      TypeToken<I> contextTypeToken,
+      Coder<I> contextCoder)
+      throws CannotProvideCoderException {
     return getDefaultCoder(typeToken,
-                           createTypeBindings(contextTypeToken, contextCoder));
+                           getTypeToCoderBindings(contextTypeToken.getType(), contextCoder));
   }
 
   /**
@@ -162,7 +168,8 @@ public <I, O> Coder<O> getDefaultCoder(TypeToken<O> typeToken,
    * the coder used for its input elements.
    */
   public <I, O> Coder<O> getDefaultOutputCoder(
-      SerializableFunction<I, O> fn, Coder<I> inputCoder) {
+      SerializableFunction<I, O> fn, Coder<I> inputCoder)
+      throws CannotProvideCoderException {
     return getDefaultCoder(
         fn.getClass(), SerializableFunction.class, inputCoder);
   }
@@ -171,11 +178,13 @@ public <I, O> Coder<O> getDefaultOutputCoder(
    * Returns the Coder to use for the last type parameter specialization
    * of the subclass given Coders to use for all other type parameters
    * specializations (if any).
+   *
+   * @throws CannotProvideCoderException if there is no default Coder.
    */
   public <T, O> Coder<O> getDefaultCoder(
       Class<? extends T> subClass,
       Class<T> baseClass,
-      Coder<?>... knownCoders) {
+      Coder<?>... knownCoders) throws CannotProvideCoderException {
     Coder<?>[] allCoders = new Coder<?>[knownCoders.length + 1];
     // Last entry intentionally left null.
     System.arraycopy(knownCoders, 0, allCoders, 0, knownCoders.length);
@@ -189,24 +198,36 @@ public <T, O> Coder<O> getDefaultCoder(
    * Returns the Coder to use for the specified type parameter specialization
    * of the subclass, given Coders to use for all other type parameters
    * (if any).
+   *
+   * @throws CannotProvideCoderException if there is no default Coder.
    */
-  @SuppressWarnings("unchecked")
   public <T, O> Coder<O> getDefaultCoder(
       Class<? extends T> subClass,
       Class<T> baseClass,
       Map<String, ? extends Coder<?>> knownCoders,
-      String paramName) {
-    // TODO: Don't infer unneeded params.
-    return (Coder<O>) getDefaultCoders(subClass, baseClass, knownCoders)
-        .get(paramName);
+      String paramName)
+      throws CannotProvideCoderException {
+
+    Map<String, Coder<?>> inferredCoders = getDefaultCoders(subClass, baseClass, knownCoders);
+
+    @SuppressWarnings("unchecked")
+    Coder<O> paramCoderOrNull = (Coder<O>) inferredCoders.get(paramName);
+    if (paramCoderOrNull != null) {
+      return paramCoderOrNull;
+    } else {
+      throw new CannotProvideCoderException(
+          "Cannot infer coder for type parameter " + paramName);
+    }
   }
 
   /**
    * Returns the Coder to use for the provided example value, if it can
-   * be determined, otherwise returns {@code null}. If more than one
-   * default coder matches, this will raise an exception.
+   * be determined.
+   *
+   * @throws CannotProvideCoderException if there is no default Coder or
+   * more than one coder matches
    */
-  public <T> Coder<T> getDefaultCoder(T exampleValue) {
+  public <T> Coder<T> getDefaultCoder(T exampleValue) throws CannotProvideCoderException {
     Class<?> clazz = exampleValue.getClass();
 
     if (clazz.getTypeParameters().length == 0) {
@@ -217,23 +238,25 @@ public <T> Coder<T> getDefaultCoder(T exampleValue) {
       return coder;
     } else {
       CoderFactory factory = getDefaultCoderFactory(clazz);
-      if (factory == null) {
-        return null;
-      }
 
       List<Object> components = factory.getInstanceComponents(exampleValue);
       if (components == null) {
-        return null;
+        throw new CannotProvideCoderException(
+            "Cannot provide coder based on value with class "
+            + clazz + ": The registered CoderFactory with class "
+            + factory.getClass() + " failed to decompose the value, "
+            + "which is required in order to provide coders for the components.");
       }
 
       // componentcoders = components.map(this.getDefaultCoder)
       List<Coder<?>> componentCoders = new ArrayList<>();
       for (Object component : components) {
-        Coder<?> componentCoder = getDefaultCoder(component);
-        if (componentCoder == null) {
-          return null;
-        } else {
+        try {
+          Coder<?> componentCoder = getDefaultCoder(component);
           componentCoders.add(componentCoder);
+        } catch (CannotProvideCoderException exc) {
+          throw new CannotProvideCoderException(
+              "Cannot provide coder based on value with class " + clazz, exc);
         }
       }
 
@@ -245,18 +268,32 @@ public <T> Coder<T> getDefaultCoder(T exampleValue) {
     }
   }
 
-
   /**
    * Returns a Map from each of baseClass's type parameters to the Coder to
    * use by default for it, in the context of subClass's specialization of
    * baseClass.
    *
-   * <P> For example, if baseClass is Map.class and subClass extends
+   * <p> If no coder can be inferred for a particular type parameter,
+   * then that type variable will be absent from the returned map.
+   *
+   * <p> For example, if baseClass is Map.class and subClass extends
    * {@code Map<String, Integer>} then this will return the registered Coders
    * to use for String and Integer as a {"K": stringCoder, "V": intCoder} Map.
    * The knownCoders parameter can be used to provide known coders for any of
    * the parameters that will be used to infer the others.
    *
+   * <p> Note that inference is attempted for every type variable.
+   * For a type {@code MyType<A, B, C, D, E>} inference will will be
+   * attempted for all of {@code A}, {@code B}, {@code C}, {@code D},
+   * and {@code E}, even if the requester only wants a coder for
+   * {@code C}.
+   *
+   * <p> For this reason, {@code getDefaultCoders} (plural) does not throw
+   * an exception if a coder for a particular type variable cannot be
+   * inferred. Instead, it is left absent from the map. It is the responsibility
+   * of the caller (usually {@link #getDefaultCoder} to extract the
+   * desired coder or throw a {@link CannotProvideCoderException} when appropriate.
+   *
    * @param subClass the concrete type whose specializations are being inferred
    * @param baseClass the base type, a parameterized class
    * @param knownCoders a map corresponding to the set of known coders indexed
@@ -275,23 +312,39 @@ public <T> Map<String, Coder<?>> getDefaultCoders(
       subClass, baseClass, knownCodersArray);
     Map<String, Coder<?>> result = new HashMap<>();
     for (int i = 0; i < typeParams.length; i++) {
-      result.put(typeParams[i].getName(), resultArray[i]);
+      if (resultArray[i] != null) {
+        result.put(typeParams[i].getName(), resultArray[i]);
+      }
     }
     return result;
   }
 
   /**
-   * Returns an array listing, for each of baseClass's type parameters, the
-   * Coder to use by default for it, in the context of subClass's specialization
-   * of baseClass.
+   * Returns an array listing, for each of {@code baseClass}'s type parameters, the
+   * Coder to use by default for it, in the context of {@code subClass}'s specialization
+   * of {@code baseClass}.
    *
-   * <P> For example, if baseClass is Map.class and subClass extends
-   * {@code Map<String, Integer>} then this will return the registered Coders
-   * to use for String and Integer in that order.  The knownCoders parameter
-   * can be used to provide known coders for any of the parameters that will
-   * be used to infer the others.
+   * <p> If a coder cannot be inferred for a type variable, its slot in the
+   * resulting array will be {@code null}.
+   *
+   * <p> For example, if {@code baseClass} is {@code Map.class} and {@code subClass}
+   * extends {@code Map<String, Integer>} then this will return the registered Coders
+   * to use for {@code String} and {@code Integer}, in that order.
+   * The {@code knownCoders} parameter can be used to provide known coders
+   * for any of the parameters that will be used to infer the others.
    *
-   * <P> If a type cannot be inferred, null is returned.
+   * <p> Note that inference is attempted for every type variable.
+   * For a type {@code MyType<A, B, C, D, E>} inference will will be
+   * attempted for all of {@code A}, {@code B}, {@code C}, {@code D},
+   * and {@code E}, even if the requester only wants a coder for
+   * {@code C}.
+   *
+   * <p> For this reason {@code getDefaultCoders} (plural) does not throw
+   * an exception if a coder for a particular type variable cannot be
+   * inferred. Instead, it results in a {@code null} in the array.
+   * It is the responsibility of the caller (usually {@link #getDefaultCoder}
+   * to extract the desired coder or throw a {@link CannotProvideCoderException}
+   * when appropriate.
    *
    * @param subClass the concrete type whose specializations are being inferred
    * @param baseClass the base type, a parameterized class
@@ -321,6 +374,7 @@ public <T> Coder<?>[] getDefaultCoders(
           "Class " + baseClass + " has " + typeArgs.length + " parameters, "
           + "but " + knownCoders.length + " coders are requested.");
     }
+
     Map<Type, Coder<?>> context = new HashMap<>();
     for (int i = 0; i < knownCoders.length; i++) {
       if (knownCoders[i] != null) {
@@ -329,15 +383,20 @@ public <T> Coder<?>[] getDefaultCoders(
               "Cannot encode elements of type " + typeArgs[i]
                   + " with " + knownCoders[i]);
         }
-        fillTypeBindings(typeArgs[i], knownCoders[i], context);
+        context.putAll(getTypeToCoderBindings(typeArgs[i], knownCoders[i]));
       }
     }
+
     Coder<?>[] result = new Coder<?>[typeArgs.length];
     for (int i = 0; i < knownCoders.length; i++) {
       if (knownCoders[i] != null) {
         result[i] = knownCoders[i];
       } else {
-        result[i] = getDefaultCoder(typeArgs[i], context);
+        try {
+          result[i] = getDefaultCoder(typeArgs[i], context);
+        } catch (CannotProvideCoderException exc) {
+          result[i] = null;
+        }
       }
     }
     return result;
@@ -402,21 +461,28 @@ private static boolean isNullOrEmpty(Collection<?> c) {
    * instances of the given class, or null if there is no default
    * CoderFactory registered.
    */
-  CoderFactory getDefaultCoderFactory(Class<?> clazz) {
-    CoderFactory coderFactory = coderFactoryMap.get(clazz);
-    if (coderFactory == null) {
-      LOG.info("No Coder registered for {}", clazz);
+  CoderFactory getDefaultCoderFactory(Class<?> clazz) throws CannotProvideCoderException {
+    CoderFactory coderFactoryOrNull = coderFactoryMap.get(clazz);
+    if (coderFactoryOrNull != null) {
+      return coderFactoryOrNull;
+    } else {
+      throw new CannotProvideCoderException(
+          "Cannot provide coder based on value with class "
+          + clazz + ": No CoderFactory has been registered for the class.");
     }
-    return coderFactory;
   }
 
   /**
-   * Returns the Coder to use by default for values of the given type,
-   * in a context where the given types use the given coders,
-   * or null if there is no default Coder.
+   * Returns the {@link Coder} to use by default for values of the given type,
+   * in a context where the given types use the given coders.
+   *
+   * @throws CannotProvideCoderException if a coder cannot be provided
    */
-  <T> Coder<T> getDefaultCoder(TypeToken<T> typeToken,
-                               Map<Type, Coder<?>> typeCoderBindings) {
+  <T> Coder<T> getDefaultCoder(
+      TypeToken<T> typeToken,
+      Map<Type, Coder<?>> typeCoderBindings)
+      throws CannotProvideCoderException {
+
     Coder<?> defaultCoder = getDefaultCoder(typeToken.getType(),
                                             typeCoderBindings);
     LOG.debug("Default Coder for {}: {}", typeToken, defaultCoder);
@@ -426,11 +492,13 @@ <T> Coder<T> getDefaultCoder(TypeToken<T> typeToken,
   }
 
   /**
-   * Returns the Coder to use by default for values of the given type,
-   * in a context where the given types use the given coders,
-   * or null if there is no default Coder.
+   * Returns the {@link Coder} to use by default for values of the given type,
+   * in a context where the given types use the given coders.
+   *
+   * @throws CannotProvideCoderException if a coder cannot be provided
    */
-  Coder<?> getDefaultCoder(Type type, Map<Type, Coder<?>> typeCoderBindings) {
+  Coder<?> getDefaultCoder(Type type, Map<Type, Coder<?>> typeCoderBindings)
+      throws CannotProvideCoderException {
     Coder<?> coder = typeCoderBindings.get(type);
     if (coder != null) {
       return coder;
@@ -440,13 +508,13 @@ Coder<?> getDefaultCoder(Type type, Map<Type, Coder<?>> typeCoderBindings) {
       Class<?> clazz = (Class<?>) type;
       return getDefaultCoder(clazz);
     } else if (type instanceof ParameterizedType) {
-      return this.getDefaultCoder((ParameterizedType) type,
-                                  typeCoderBindings);
+      return getDefaultCoder((ParameterizedType) type, typeCoderBindings);
     } else if (type instanceof TypeVariable
         || type instanceof WildcardType) {
       // No default coder for an unknown generic type.
-      LOG.debug("No Coder for unknown generic type {}", type);
-      return null;
+      throw new CannotProvideCoderException(
+          "Cannot provide a Coder for type variable "
+          + type + " because the actual type is unknown due to erasure.");
     } else {
       throw new RuntimeException(
           "Internal error: unexpected kind of Type: " + type);
@@ -454,9 +522,8 @@ Coder<?> getDefaultCoder(Type type, Map<Type, Coder<?>> typeCoderBindings) {
   }
 
   /**
-   * Returns the Coder to use by default for values of the given
-   * class, or null if there is no default Coder. The following
-   * possibilities are tried, in this order:
+   * Returns the {@link Coder} to use by default for values of the given
+   * class.
    *
    * <ol>
    * <li>A {@link Coder} class registered explicitly via
@@ -464,14 +531,19 @@ Coder<?> getDefaultCoder(Type type, Map<Type, Coder<?>> typeCoderBindings) {
    * <li>A {@link DefaultCoder} annotation on the class,
    * <li>This registry's fallback {@link CoderProvider}, which
    * may be able to generate a coder for an arbitrary class.
+   * </ol>
+   *
+   * @throws CannotProvideCoderException if a coder cannot be provided
    */
-  <T> Coder<T> getDefaultCoder(Class<T> clazz) {
-    CoderFactory coderFactory = getDefaultCoderFactory(clazz);
-    if (coderFactory != null) {
+  <T> Coder<T> getDefaultCoder(Class<T> clazz) throws CannotProvideCoderException {
+    try {
+      CoderFactory coderFactory = getDefaultCoderFactory(clazz);
       LOG.debug("Default Coder for {} found by factory", clazz);
       @SuppressWarnings("unchecked")
       Coder<T> coder = (Coder<T>) coderFactory.create(Collections.<Coder<?>>emptyList());
       return coder;
+    } catch (CannotProvideCoderException exc) {
+      // try other ways of finding one
     }
 
     DefaultCoder defaultAnnotation = clazz.getAnnotation(
@@ -488,15 +560,24 @@ <T> Coder<T> getDefaultCoder(Class<T> clazz) {
     }
 
     if (getFallbackCoderProvider() != null) {
-      Optional<Coder<T>> coder =
-          getFallbackCoderProvider().getCoder(TypeToken.of(clazz));
-      if (coder.isPresent()) {
-        return coder.get();
+      try {
+        return getFallbackCoderProvider().getCoder(TypeToken.of(clazz));
+      } catch (CannotProvideCoderException exc) {
+        throw new CannotProvideCoderException(
+            "Cannot provide coder for class " + clazz + " because "
+            + "it has no " + CoderFactory.class.getSimpleName() + " registered, "
+            + "it has no " + DefaultCoder.class.getSimpleName() + " annotation, "
+            + "and the fallback " + CoderProvider.class.getSimpleName()
+            + " could not automatically create a Coder.",
+            exc);
       }
+    } else {
+      throw new CannotProvideCoderException(
+            "Cannot provide coder for class " + clazz + " because "
+            + "it has no " + CoderFactory.class.getSimpleName() + " registered, "
+            + "it has no " + DefaultCoder.class.getSimpleName() + " annotation, "
+            + "and there is no fallback CoderProvider configured.");
     }
-
-    LOG.debug("No default Coder for {}", clazz);
-    return null;
   }
 
   public void setFallbackCoderProvider(CoderProvider coderProvider) {
@@ -510,76 +591,73 @@ public CoderProvider getFallbackCoderProvider() {
   /**
    * Returns the Coder to use by default for values of the given
    * parameterized type, in a context where the given types use the
-   * given coders, or null if there is no default Coder.
+   * given coders.
+   *
+   * @throws CannotProvideCoderException if no coder can be provided
    */
   Coder<?> getDefaultCoder(
       ParameterizedType type,
-      Map<Type, Coder<?>> typeCoderBindings) {
-    Class<?> rawClazz = (Class) type.getRawType();
+      Map<Type, Coder<?>> typeCoderBindings)
+      throws CannotProvideCoderException {
+    Class<?> rawClazz = (Class<?>) type.getRawType();
     CoderFactory coderFactory = getDefaultCoderFactory(rawClazz);
-    if (coderFactory == null) {
-      return null;
-    }
     List<Coder<?>> typeArgumentCoders = new ArrayList<>();
     for (Type typeArgument : type.getActualTypeArguments()) {
-      Coder<?> typeArgumentCoder = getDefaultCoder(typeArgument,
-                                                   typeCoderBindings);
-      if (typeArgumentCoder == null) {
-        return null;
+      try {
+        Coder<?> typeArgumentCoder = getDefaultCoder(typeArgument,
+                                                     typeCoderBindings);
+        typeArgumentCoders.add(typeArgumentCoder);
+      } catch (CannotProvideCoderException exc) {
+         throw new CannotProvideCoderException(
+          "Cannot provide coder for parameterized type " + type,
+          exc);
       }
-      typeArgumentCoders.add(typeArgumentCoder);
     }
     return coderFactory.create(typeArgumentCoders);
   }
 
   /**
-   * Returns a Map where each of the type variables embedded in the
-   * given type are mapped to the corresponding Coders in the given
-   * coder.
-   */
-  Map<Type, Coder<?>> createTypeBindings(TypeToken<?> typeToken,
-                                         Coder<?> coder) {
-    Map<Type, Coder<?>> typeCoderBindings = new HashMap<>();
-    fillTypeBindings(typeToken.getType(), coder, typeCoderBindings);
-    return typeCoderBindings;
-  }
-
-  /**
-   * Adds to the given map bindings from each of the type variables
-   * embedded in the given type to the corresponding Coders in the
-   * given coder.
+   * Returns an immutable {@code Map} from each of the type variables
+   * embedded in the given type to the corresponding types
+   * in the given coder.
    */
-  void fillTypeBindings(Type type,
-                        Coder<?> coder,
-                        Map<Type, Coder<?>> typeCoderBindings) {
+  private Map<Type, Coder<?>> getTypeToCoderBindings(Type type, Coder<?> coder) {
     if (type instanceof TypeVariable || type instanceof Class) {
-      LOG.debug("Binding type {} to Coder {}", type, coder);
-      typeCoderBindings.put(type, coder);
+      return ImmutableMap.<Type, Coder<?>>of(type, coder);
     } else if (type instanceof ParameterizedType) {
-      fillTypeBindings((ParameterizedType) type,
-                       coder,
-                       typeCoderBindings);
+      return getTypeToCoderBindings((ParameterizedType) type, coder);
+    } else {
+      return ImmutableMap.of();
     }
   }
 
   /**
-   * Adds to the given map bindings from each of the type variables
-   * embedded in the given parameterized type to the corresponding
-   * Coders in the given coder.
+   * Returns an immutable {@code Map} from the type arguments of the
+   * parameterized type to their corresponding coders, and so on recursively
+   * for their type parameters.
+   *
+   * <p>This method is simply a specialization to break out the most
+   * elaborate case of {@link #getTypeToCoderBindings(Type, Coder)}.
    */
-  void fillTypeBindings(ParameterizedType type,
-                        Coder<?> coder,
-                        Map<Type, Coder<?>> typeCoderBindings) {
-    Type[] typeArguments = type.getActualTypeArguments();
+  private Map<Type, Coder<?>> getTypeToCoderBindings(ParameterizedType type, Coder<?> coder) {
+    List<Type> typeArguments = Arrays.asList(type.getActualTypeArguments());
     List<? extends Coder<?>> coderArguments = coder.getCoderArguments();
-    if (coderArguments == null
-        || typeArguments.length != coderArguments.size()) {
-      return;
-    }
-    for (int i = 0; i < typeArguments.length; i++) {
-      fillTypeBindings(typeArguments[i],
-                       coderArguments.get(i),
-                       typeCoderBindings);
+
+    if ((coderArguments == null) || (typeArguments.size() != coderArguments.size())) {
+      return ImmutableMap.of();
+    } else {
+      Map<Type, Coder<?>> typeToCoder = Maps.newHashMap();
+
+      typeToCoder.put(type, coder);
+
+      for (int i = 0; i < typeArguments.size(); i++) {
+        Type typeArgument = typeArguments.get(i);
+        Coder<?> coderArgument = coderArguments.get(i);
+        typeToCoder.putAll(getTypeToCoderBindings(typeArgument, coderArgument));
+      }
+
+      return ImmutableMap.<Type, Coder<?>>builder().putAll(typeToCoder).build();
     }
+
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
index c66ec48263a08..7906d135d1ec5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservableIterable;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.common.base.Preconditions;
 
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
@@ -71,6 +72,8 @@ List<Object> getInstanceComponentsHelper(
   }
 
   protected IterableLikeCoder(Coder<T> elementCoder) {
+    Preconditions.checkArgument(elementCoder != null,
+        "element Coder for IterableLikeCoder must not be null");
     this.elementCoder = elementCoder;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
index a135d20082b99..abbd51660cc1d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
@@ -17,7 +17,6 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.common.base.Optional;
 import com.google.common.reflect.TypeToken;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
@@ -88,17 +87,19 @@ public static SerializableCoder<?> of(@JsonProperty("type") String classType)
    */
   public static final CoderProvider PROVIDER = new CoderProvider() {
     @Override
-    public <T> Optional<Coder<T>> getCoder(TypeToken<T> typeToken) {
+    public <T> Coder<T> getCoder(TypeToken<T> typeToken) throws CannotProvideCoderException {
       Class<?> clazz = typeToken.getRawType();
       if (Serializable.class.isAssignableFrom(clazz)) {
         @SuppressWarnings("unchecked")
         Class<? extends Serializable> serializableClazz =
             (Class<? extends Serializable>) clazz;
         @SuppressWarnings("unchecked")
-        Coder<T> coderOrNull = (Coder<T>) SerializableCoder.of(serializableClazz);
-        return Optional.fromNullable(coderOrNull);
+        Coder<T> coder = (Coder<T>) SerializableCoder.of(serializableClazz);
+        return coder;
       } else {
-        return Optional.<Coder<T>>absent();
+        throw new CannotProvideCoderException(
+            "Cannot provide SerializableCoder because " + typeToken
+            + " does not implement Serializable");
       }
     }
   };
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 2d1dba496cedc..444076edb0e8c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -39,6 +39,7 @@
 import com.google.api.services.dataflow.model.WorkerPool;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
@@ -789,9 +790,14 @@ private <K, VI, VO> void translateHelper(
             context.addInput(
                 PropertyNames.SERIALIZED_FN,
                 byteArrayToJsonString(serializeToByteArray(transform.getFn())));
-            context.addEncodingInput(transform.getAccumulatorCoder(
-                context.getInput(transform).getPipeline().getCoderRegistry(),
-                context.getInput(transform)));
+            try {
+              context.addEncodingInput(transform.getAccumulatorCoder(
+                  context.getInput(transform).getPipeline().getCoderRegistry(),
+                  context.getInput(transform)));
+            } catch (CannotProvideCoderException exc) {
+              throw new IllegalStateException(
+                "Could not determine coder for input to Combine.GroupedValues", exc);
+            }
             context.addOutput(PropertyNames.OUTPUT, context.getOutput(transform));
           }
         });
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 542dd5dc392d7..764fa1368e40c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
 import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.ListCoder;
 import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
@@ -220,8 +221,16 @@ public <Output extends POutput, Input extends PInput> Output apply(
   private <K, VI, VA, VO> PCollection<KV<K, VO>> applyTestCombine(
       Combine.GroupedValues<K, VI, VO> transform,
       PCollection<KV<K, Iterable<VI>>> input) {
-    return input.apply(ParDo.of(TestCombineDoFn.create(transform, input, testSerializability)))
-                .setCoder(transform.getDefaultOutputCoder(input));
+
+    PCollection<KV<K, VO>> output = input
+        .apply(ParDo.of(TestCombineDoFn.create(transform, input, testSerializability)));
+
+    try {
+      output.setCoder(transform.getDefaultOutputCoder(input));
+    } catch (CannotProvideCoderException exc) {
+      // let coder inference occur later, if it can
+    }
+    return output;
   }
 
   /**
@@ -250,10 +259,19 @@ public static <K, VI, VA, VO> TestCombineDoFn<K, VI, VA, VO> create(
         Combine.GroupedValues<K, VI, VO> transform,
         PCollection<KV<K, Iterable<VI>>> input,
         boolean testSerializability) {
+
+      Coder<VA> accumCoder;
+      try {
+        accumCoder = (Coder<VA>) transform.getAccumulatorCoder(
+            input.getPipeline().getCoderRegistry(), input);
+      } catch (CannotProvideCoderException exc) {
+        throw new IllegalArgumentException(
+          "Transform " + transform + " failed to provide a coder for its accumulator type");
+      }
+
       return new TestCombineDoFn(
           transform.getFn(),
-          transform.getAccumulatorCoder(
-              input.getPipeline().getCoderRegistry(), input),
+          accumCoder,
           testSerializability);
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 46977a74030c0..6a051636f307c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
@@ -25,7 +26,6 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
@@ -391,7 +391,7 @@ public VO apply(Iterable<? extends VI> inputs) {
      * significant performance benefits.
      */
     public Coder<VA> getAccumulatorCoder(
-        CoderRegistry registry, Coder<VI> inputCoder) {
+        CoderRegistry registry, Coder<VI> inputCoder) throws CannotProvideCoderException {
       return registry.getDefaultCoder(
           getClass(),
           CombineFn.class,
@@ -409,7 +409,7 @@ public Coder<VA> getAccumulatorCoder(
      * Coder for {@code VO} values.
      */
     public Coder<VO> getDefaultOutputCoder(
-        CoderRegistry registry, Coder<VI> inputCoder) {
+        CoderRegistry registry, Coder<VI> inputCoder) throws CannotProvideCoderException {
       return registry.getDefaultCoder(
           getClass(),
           CombineFn.class,
@@ -451,13 +451,15 @@ public VO extractOutput(K key, VA accumulator) {
 
         @Override
         public Coder<VA> getAccumulatorCoder(
-            CoderRegistry registry, Coder<K> keyCoder, Coder<VI> inputCoder) {
+            CoderRegistry registry, Coder<K> keyCoder, Coder<VI> inputCoder)
+            throws CannotProvideCoderException {
           return CombineFn.this.getAccumulatorCoder(registry, inputCoder);
         }
 
         @Override
         public Coder<VO> getDefaultOutputCoder(
-            CoderRegistry registry, Coder<K> keyCoder, Coder<VI> inputCoder) {
+            CoderRegistry registry, Coder<K> keyCoder, Coder<VI> inputCoder)
+            throws CannotProvideCoderException {
           return CombineFn.this.getDefaultOutputCoder(registry, inputCoder);
         }
       };
@@ -1037,7 +1039,8 @@ public VO apply(K key, Iterable<? extends VI> inputs) {
      * significant performance benefits.
      */
     public Coder<VA> getAccumulatorCoder(
-        CoderRegistry registry, Coder<K> keyCoder, Coder<VI> inputCoder) {
+        CoderRegistry registry, Coder<K> keyCoder, Coder<VI> inputCoder)
+        throws CannotProvideCoderException {
       return registry.getDefaultCoder(
           getClass(),
           KeyedCombineFn.class,
@@ -1055,7 +1058,8 @@ public Coder<VA> getAccumulatorCoder(
      * infer the Coder for {@code VO} values.
      */
     public Coder<VO> getDefaultOutputCoder(
-        CoderRegistry registry, Coder<K> keyCoder, Coder<VI> inputCoder) {
+        CoderRegistry registry, Coder<K> keyCoder, Coder<VI> inputCoder)
+        throws CannotProvideCoderException {
       return registry.getDefaultCoder(
           getClass(),
           KeyedCombineFn.class,
@@ -1535,7 +1539,8 @@ public VA extractOutput(KV<K, Integer> key, VA accumulator) {
             @Override
             @SuppressWarnings("unchecked")
             public Coder<VA> getAccumulatorCoder(
-                CoderRegistry registry, Coder<KV<K, Integer>> keyCoder, Coder<VI> inputCoder) {
+                CoderRegistry registry, Coder<KV<K, Integer>> keyCoder, Coder<VI> inputCoder)
+                throws CannotProvideCoderException {
               return fn.getAccumulatorCoder(
                   registry, ((KvCoder<K, Integer>) keyCoder).getKeyCoder(), inputCoder);
             }
@@ -1564,7 +1569,8 @@ public VO extractOutput(K key, VA accumulator) {
             }
             @Override
             public Coder<VO> getDefaultOutputCoder(
-                CoderRegistry registry, Coder<K> keyCoder, Coder<VA> accumulatorCoder) {
+                CoderRegistry registry, Coder<K> keyCoder, Coder<VA> accumulatorCoder)
+                throws CannotProvideCoderException {
               return fn.getDefaultOutputCoder(registry, keyCoder, inputCoder.getValueCoder());
             }
       };
@@ -1708,15 +1714,24 @@ private GroupedValues(KeyedCombineFn<? super K, ? super VI, ?, VO> fn) {
     @Override
     public PCollection<KV<K, VO>> apply(
         PCollection<? extends KV<K, ? extends Iterable<VI>>> input) {
-      Coder<KV<K, VO>> outputCoder = getDefaultOutputCoder(input);
-      return input.apply(ParDo.of(
+
+      PCollection<KV<K, VO>> output = input.apply(ParDo.of(
           new DoFn<KV<K, ? extends Iterable<VI>>, KV<K, VO>>() {
             @Override
             public void processElement(ProcessContext c) {
               K key = c.element().getKey();
               c.output(KV.of(key, fn.apply(key, c.element().getValue())));
             }
-          })).setCoder(outputCoder);
+          }));
+
+      try {
+        Coder<KV<K, VO>> outputCoder = getDefaultOutputCoder(input);
+        output.setCoder(outputCoder);
+      } catch (CannotProvideCoderException exc) {
+        // let coder inference happen later, if it can
+      }
+
+      return output;
     }
 
     private KvCoder<K, VI> getKvCoder(
@@ -1747,7 +1762,8 @@ private KvCoder<K, VI> getKvCoder(
     @SuppressWarnings("unchecked")
     public Coder<?> getAccumulatorCoder(
         CoderRegistry coderRegistry,
-        PCollection<? extends KV<K, ? extends Iterable<VI>>> input) {
+        PCollection<? extends KV<K, ? extends Iterable<VI>>> input)
+        throws CannotProvideCoderException {
       KvCoder<K, VI> kvCoder = getKvCoder(input.getCoder());
       return ((KeyedCombineFn<K, VI, ?, VO>) fn).getAccumulatorCoder(
           coderRegistry, kvCoder.getKeyCoder(), kvCoder.getValueCoder());
@@ -1755,7 +1771,8 @@ public Coder<?> getAccumulatorCoder(
 
     @Override
     public Coder<KV<K, VO>> getDefaultOutputCoder(
-        PCollection<? extends KV<K, ? extends Iterable<VI>>> input) {
+        PCollection<? extends KV<K, ? extends Iterable<VI>>> input)
+        throws CannotProvideCoderException {
       KvCoder<K, VI> kvCoder = getKvCoder(input.getCoder());
       @SuppressWarnings("unchecked")
       Coder<VO> outputValueCoder = ((KeyedCombineFn<K, VI, ?, VO>) fn)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index 5f8073b9e2ce9..a03d5ab2f43f5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -18,6 +18,7 @@
 
 import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
@@ -199,7 +200,14 @@ public PCollection<T> apply(PInput input) {
   public PCollection<T> applyHelper(PInput input, boolean isStreaming) {
     if (isStreaming) {
       @SuppressWarnings("unchecked")
-      Coder<T> elemCoder = (Coder<T>) getElementCoder(input.getPipeline().getCoderRegistry());
+      Coder<T> elemCoder;
+      try {
+        elemCoder = (Coder<T>) getElementCoder(input.getPipeline().getCoderRegistry());
+      } catch (CannotProvideCoderException exc) {
+        throw new IllegalArgumentException(
+            "Failed to apply Create: could not determine element coder");
+      }
+
       return Pipeline.applyTransform(
           input, PubsubIO.Read.named("StartingSignal").subscription("_starting_signal/"))
           .apply(ParDo.of(new DoFn<String, KV<Void, Void>>() {
@@ -268,26 +276,25 @@ public Iterable<T> getElements() {
     return elems;
   }
 
-  private Coder<?> getElementCoder(CoderRegistry coderRegistry) {
+  private Coder<?> getElementCoder(CoderRegistry coderRegistry) throws CannotProvideCoderException {
     // First try to deduce a coder using the types of the elements.
-    Class<?> elementType = null;
+    Class<?> elementClazz = null;
     for (T elem : elems) {
-      Class<?> type = elem.getClass();
-      if (elementType == null) {
-        elementType = type;
-      } else if (!elementType.equals(type)) {
+      Class<?> clazz = elem.getClass();
+      if (elementClazz == null) {
+        elementClazz = clazz;
+      } else if (!elementClazz.equals(clazz)) {
         // Elements are not the same type, require a user-specified coder.
-        elementType = null;
-        break;
+        throw new CannotProvideCoderException(
+            "Cannot provide coder for Create: The elements are not all of the same class.");
       }
     }
-    if (elementType == null) {
-      return null;
-    }
-    if (elementType.getTypeParameters().length == 0) {
-      Coder<?> candidate = coderRegistry.getDefaultCoder(TypeToken.of(elementType));
-      if (candidate != null) {
-        return candidate;
+
+    if (elementClazz.getTypeParameters().length == 0) {
+      try {
+        return coderRegistry.getDefaultCoder(TypeToken.of(elementClazz));
+      } catch (CannotProvideCoderException exc) {
+        // let the next stage try
       }
     }
 
@@ -298,15 +305,17 @@ private Coder<?> getElementCoder(CoderRegistry coderRegistry) {
       if (coder == null) {
         coder = c;
       } else if (!Objects.equals(c, coder)) {
-        coder = null;
-        break;
+        throw new CannotProvideCoderException(
+            "Cannot provide coder for elements of " + Create.class.getSimpleName() + ":"
+            + " For their common class, no coder could be provided."
+            + " Based on their values, they do not all default to the same Coder.");
       }
     }
     return coder;
   }
 
   @Override
-  protected Coder<?> getDefaultOutputCoder(PInput input) {
+  protected Coder<?> getDefaultOutputCoder(PInput input) throws CannotProvideCoderException {
     Coder<?> elemCoder = getElementCoder(input.getPipeline().getCoderRegistry());
     if (elemCoder == null) {
       return super.getDefaultOutputCoder(input);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
index 055cecaf78632..9ada2753a9f2a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
 import com.google.cloud.dataflow.sdk.values.PInput;
@@ -355,34 +356,42 @@ public void finishSpecifyingInternal() {
 
   /**
    * Returns the default {@code Coder} to use for the output of this
-   * single-output {@code PTransform}, or {@code null} if
-   * none can be inferred.
+   * single-output {@code PTransform}.
    *
-   * <p> By default, returns {@code null}.
+   * <p> By default, always throws
+   *
+   * @throws CannotProvideCoderException if no coder can be inferred
    */
-  protected Coder<?> getDefaultOutputCoder() {
-    return null;
+  protected Coder<?> getDefaultOutputCoder() throws CannotProvideCoderException {
+    throw new CannotProvideCoderException(
+      "PTransform.getDefaultOutputCoder called.");
   }
 
   /**
    * Returns the default {@code Coder} to use for the output of this
-   * single-output {@code PTransform}, or {@code null} if
-   * none can be inferred.
+   * single-output {@code PTransform}.
+   *
+   * @throws CannotProvideCoderException if none can be inferred.
    *
-   * <p> By default, returns {@code null}.
+   * <p> By default, always throws.
    */
-  protected Coder<?> getDefaultOutputCoder(Input input) {
+  protected Coder<?> getDefaultOutputCoder(Input input) throws CannotProvideCoderException {
     return getDefaultOutputCoder();
   }
 
   /**
    * Returns the default {@code Coder} to use for the given output of
-   * this single-output {@code PTransform}, or {@code null}
-   * if none can be inferred.
+   * this single-output {@code PTransform}.
+   *
+   * @throws CannotProvideCoderException if none can be inferred.
+   *
+   * <p> By default, always throws.
    */
-  public <T> Coder<T> getDefaultOutputCoder(Input input, TypedPValue<T> output) {
+  public <T> Coder<T> getDefaultOutputCoder(Input input, TypedPValue<T> output)
+      throws CannotProvideCoderException {
     if (output != getOutput()) {
-      return null;
+      throw new CannotProvideCoderException(
+          "Attempt to get default output coder from PTransform for a POutput it did not produce");
     } else {
       @SuppressWarnings("unchecked")
       Coder<T> defaultOutputCoder = (Coder<T>) getDefaultOutputCoder(input);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 90950560da280..f41530555ba5e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
@@ -714,7 +715,8 @@ public PCollection<O> apply(PCollection<? extends I> input) {
 
     @Override
     @SuppressWarnings("unchecked")
-    protected Coder<O> getDefaultOutputCoder(PCollection<? extends I> input) {
+    protected Coder<O> getDefaultOutputCoder(PCollection<? extends I> input)
+        throws CannotProvideCoderException {
       return input.getPipeline().getCoderRegistry().getDefaultCoder(
           fn.getOutputTypeToken(),
           fn.getInputTypeToken(),
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
index 62983626ec0bc..ce1d95ee9fc91 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
@@ -94,13 +95,6 @@ private WithKeys(SerializableFunction<V, K> fn, Class<K> keyClass) {
 
   @Override
   public PCollection<KV<K, V>> apply(PCollection<V> in) {
-    Coder<K> keyCoder;
-    CoderRegistry coderRegistry = in.getPipeline().getCoderRegistry();
-    if (keyClass == null) {
-      keyCoder = coderRegistry.getDefaultOutputCoder(fn, in.getCoder());
-    } else {
-      keyCoder = coderRegistry.getDefaultCoder(TypeToken.of(keyClass));
-    }
     PCollection<KV<K, V>> result =
         in.apply(ParDo.named("AddKeys")
                  .of(new DoFn<V, KV<K, V>>() {
@@ -110,10 +104,21 @@ public void processElement(ProcessContext c) {
                                     c.element()));
                      }
                     }));
-    if (keyCoder != null) {
+
+    try {
+      Coder<K> keyCoder;
+      CoderRegistry coderRegistry = in.getPipeline().getCoderRegistry();
+      if (keyClass == null) {
+        keyCoder = coderRegistry.getDefaultOutputCoder(fn, in.getCoder());
+      } else {
+        keyCoder = coderRegistry.getDefaultCoder(TypeToken.of(keyClass));
+      }
       // TODO: Remove when we can set the coder inference context.
       result.setCoder(KvCoder.of(keyCoder, in.getCoder()));
+    } catch (CannotProvideCoderException exc) {
+      // let lazy coder inference have a try
     }
+
     return result;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
index 3aeacff766f84..c2e1efcaa6eda 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
@@ -79,5 +79,5 @@ public void finishSpecifyingOutput() { }
   /**
    * The {@code PTransform} that produces this {@code POutputValueBase}.
    */
-  private PTransform<?, ?> producingTransform;
+  private PTransform<?, ?> producingTransform = null;
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
index 8d2fa524ebe3d..2d7561d0349c2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.values;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -41,7 +42,15 @@ public abstract class TypedPValue<T> extends PValueBase implements PValue {
    */
   public Coder<T> getCoder() {
     if (coder == null) {
-      inferCoderOrFail();
+      try {
+        coder = inferCoderOrFail();
+      } catch (CannotProvideCoderException exc) {
+        throw new IllegalStateException(
+            "Unable to infer a default Coder for " + this
+            + "; either correct the root cause below "
+            + "or use setCoder() to specify one explicitly. ",
+            exc);
+      }
     }
     return coder;
   }
@@ -135,29 +144,23 @@ public TypedPValue<T> setTypeTokenInternal(TypeToken<T> typeToken) {
    * but can and should be improved by subclasses.
    */
   @SuppressWarnings({"unchecked", "rawtypes"})
-  private void inferCoderOrFail() {
-    if (coder == null) {
-      TypeToken<T> token = getTypeToken();
-      CoderRegistry registry = getPipeline().getCoderRegistry();
-
-      if (token != null) {
-        coder = registry.getDefaultCoder(token);
-      }
+  private Coder<T> inferCoderOrFail() throws CannotProvideCoderException {
+    if (coder != null) {
+      return coder;
+    }
 
-      if (coder == null) {
-        coder = ((PTransform) getProducingTransformInternal()).getDefaultOutputCoder(
-            getPipeline().getInput(getProducingTransformInternal()), this);
-      }
+    TypeToken<T> token = getTypeToken();
+    CoderRegistry registry = getPipeline().getCoderRegistry();
 
-      if (coder == null) {
-        throw new IllegalStateException(
-            "unable to infer a default Coder for " + this
-            + "; either register a default Coder for its element type, "
-            + "or use setCoder() to specify one explicitly. "
-            + "If a default coder is registered, it may not be found "
-            + "due to type erasure; again, use setCoder() to specify "
-            + "a Coder explicitly.");
+    try {
+      if (token != null) {
+        return registry.getDefaultCoder(token);
       }
+    } catch (CannotProvideCoderException exc) {
+        // try the next thing
     }
+
+    return ((PTransform) getProducingTransformInternal()).getDefaultOutputCoder(
+        getPipeline().getInput(getProducingTransformInternal()), this);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index a80c398693b01..3d43248841fdb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -66,7 +66,7 @@ private static class NotSerializableClass { }
   private static class NotACoderProvider { }
 
   @Test
-  public void testSerializableFallbackCoderProvider() {
+  public void testSerializableFallbackCoderProvider() throws Exception {
     CoderRegistry registry = getStandardRegistry();
     registry.setFallbackCoderProvider(SerializableCoder.PROVIDER);
     Coder<?> serializableCoder = registry.getDefaultCoder(SerializableClass.class);
@@ -75,7 +75,7 @@ public void testSerializableFallbackCoderProvider() {
   }
 
   @Test
-  public void testAvroFallbackCoderProvider() {
+  public void testAvroFallbackCoderProvider() throws Exception {
     CoderRegistry registry = getStandardRegistry();
     registry.setFallbackCoderProvider(AvroCoder.PROVIDER);
     Coder<?> avroCoder = registry.getDefaultCoder(NotSerializableClass.class);
@@ -84,7 +84,7 @@ public void testAvroFallbackCoderProvider() {
   }
 
   @Test
-  public void testRegisterInstantiatedGenericCoder() {
+  public void testRegisterInstantiatedGenericCoder() throws Exception {
     class MyValueList extends ArrayList<MyValue> { }
 
     CoderRegistry registry = new CoderRegistry();
@@ -93,14 +93,20 @@ class MyValueList extends ArrayList<MyValue> { }
   }
 
   @Test
-  public void testSimpleDefaultCoder() {
+  public void testSimpleDefaultCoder() throws Exception {
     CoderRegistry registry = getStandardRegistry();
     assertEquals(StringUtf8Coder.of(), registry.getDefaultCoder(String.class));
-    assertEquals(null, registry.getDefaultCoder(UnknownType.class));
   }
 
   @Test
-  public void testTemplateDefaultCoder() {
+  public void testSimpleUnknownDefaultCoder() throws Exception {
+    CoderRegistry registry = getStandardRegistry();
+    thrown.expect(CannotProvideCoderException.class);
+    registry.getDefaultCoder(UnknownType.class);
+  }
+
+  @Test
+  public void testParameterizedDefaultCoder() throws Exception {
     CoderRegistry registry = getStandardRegistry();
     TypeToken<List<Integer>> listToken = new TypeToken<List<Integer>>() {};
     assertEquals(ListCoder.of(VarIntCoder.of()),
@@ -113,50 +119,57 @@ public void testTemplateDefaultCoder() {
                             ListCoder.of(MyValueCoder.of())),
                  registry.getDefaultCoder(kvToken));
 
+  }
+
+  @Test
+  public void testParameterizedDefaultCoderUnknown() throws Exception {
+    CoderRegistry registry = getStandardRegistry();
     TypeToken<List<UnknownType>> listUnknownToken =
         new TypeToken<List<UnknownType>>() {};
-    assertEquals(null, registry.getDefaultCoder(listUnknownToken));
+
+    thrown.expect(CannotProvideCoderException.class);
+    registry.getDefaultCoder(listUnknownToken);
   }
 
   @Test
-  public void testTemplateInference() {
+  public void testTypeParameterInference() throws Exception {
     CoderRegistry registry = getStandardRegistry();
-    MyTemplateClass<MyValue, List<MyValue>> instance =
-        new MyTemplateClass<MyValue, List<MyValue>>() {};
+    MyGenericClass<MyValue, List<MyValue>> instance =
+        new MyGenericClass<MyValue, List<MyValue>>() {};
     Coder<List<MyValue>> listCoder = ListCoder.of(MyValueCoder.of());
 
     // The map method operates on parameter names.
     Map<String, Coder<?>> coderMap = registry.getDefaultCoders(
         instance.getClass(),
-        MyTemplateClass.class,
+        MyGenericClass.class,
         Collections.singletonMap("A", MyValueCoder.of()));
     assertEquals(listCoder, coderMap.get("B"));
 
     // Check we can infer the other direction as well.
     Map<String, Coder<?>> coderMap2 = registry.getDefaultCoders(
         instance.getClass(),
-        MyTemplateClass.class,
+        MyGenericClass.class,
         Collections.singletonMap("B", listCoder));
     assertEquals(MyValueCoder.of(), coderMap2.get("A"));
 
     // The array interface operates on position.
     Coder<?>[] coders = registry.getDefaultCoders(
         instance.getClass(),
-        MyTemplateClass.class,
+        MyGenericClass.class,
         new Coder<?>[] { MyValueCoder.of(), null });
     assertEquals(listCoder, coders[1]);
 
     // The "last argument" coder handles a common case.
     Coder<List<MyValueCoder>> actual = registry.getDefaultCoder(
         instance.getClass(),
-        MyTemplateClass.class,
+        MyGenericClass.class,
         MyValueCoder.of());
     assertEquals(listCoder, actual);
 
     try {
       registry.getDefaultCoder(
           instance.getClass(),
-          MyTemplateClass.class,
+          MyGenericClass.class,
           BigEndianIntegerCoder.of());
       fail("should have failed");
     } catch (IllegalArgumentException exn) {
@@ -167,7 +180,7 @@ public void testTemplateInference() {
   }
 
   @Test
-  public void testGetDefaultCoderFromIntegerValue() {
+  public void testGetDefaultCoderFromIntegerValue() throws Exception {
     CoderRegistry registry = getStandardRegistry();
     Integer i = 13;
     Coder<Integer> coder = registry.getDefaultCoder(i);
@@ -175,7 +188,7 @@ public void testGetDefaultCoderFromIntegerValue() {
   }
 
   @Test
-  public void testGetDefaultCoderFromKvValue() {
+  public void testGetDefaultCoderFromKvValue() throws Exception {
     CoderRegistry registry = getStandardRegistry();
     KV<Integer, String> kv = KV.of(13, "hello");
     Coder<KV<Integer, String>> coder = registry.getDefaultCoder(kv);
@@ -184,7 +197,7 @@ public void testGetDefaultCoderFromKvValue() {
   }
 
   @Test
-  public void testGetDefaultCoderFromNestedKvValue() {
+  public void testGetDefaultCoderFromNestedKvValue() throws Exception {
     CoderRegistry registry = getStandardRegistry();
     KV<Integer, KV<Long, KV<String, String>>> kv = KV.of(13, KV.of(17L, KV.of("hello", "goodbye")));
     Coder<KV<Integer, KV<Long, KV<String, String>>>> coder = registry.getDefaultCoder(kv);
@@ -196,7 +209,7 @@ public void testGetDefaultCoderFromNestedKvValue() {
   }
 
   @Test
-  public void testTypeCompatibility() {
+  public void testTypeCompatibility() throws Exception {
     assertTrue(CoderRegistry.isCompatible(
         BigEndianIntegerCoder.of(), Integer.class));
     assertFalse(CoderRegistry.isCompatible(
@@ -212,7 +225,7 @@ public void testTypeCompatibility() {
         new TypeToken<List<String>>() {}.getType()));
   }
 
-  static class MyTemplateClass<A, B> { }
+  static class MyGenericClass<A, B> { }
 
   static class MyValue { }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
index e5e54e22790d2..07a6d48b61fb8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
@@ -21,7 +21,9 @@
 
 import org.hamcrest.Matchers;
 import org.junit.Assert;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -34,6 +36,9 @@
 @SuppressWarnings("serial")
 public class DefaultCoderTest {
 
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
   @DefaultCoder(AvroCoder.class)
   private static class AvroRecord {
   }
@@ -76,9 +81,9 @@ public void testDefaultCoders() throws Exception {
 
   @Test
   public void testUnknown() throws Exception {
+    thrown.expect(CannotProvideCoderException.class);
     CoderRegistry registry = new CoderRegistry();
-    Coder<?> coderType = registry.getDefaultCoder(Unknown.class);
-    Assert.assertNull(coderType);
+    registry.getDefaultCoder(Unknown.class);
   }
 
   /**
@@ -86,7 +91,7 @@ public void testUnknown() throws Exception {
    * {@code expectedCoder}.
    */
   private void checkDefault(Class<?> valueType,
-      Class<?> expectedCoder) {
+      Class<?> expectedCoder) throws Exception {
     CoderRegistry registry = new CoderRegistry();
     Coder<?> coder = registry.getDefaultCoder(TypeToken.of(valueType));
     Assert.assertThat(coder, Matchers.instanceOf(expectedCoder));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
index a919da7f12a04..b4c7b2b8c86f4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
@@ -88,7 +88,7 @@ static class Record2 extends Record {
   public void testPolymorphicType() throws Exception {
     thrown.expect(RuntimeException.class);
     thrown.expectMessage(
-        Matchers.containsString("unable to infer a default Coder"));
+        Matchers.containsString("Unable to infer a default Coder"));
 
     Pipeline p = TestPipeline.create();
 
@@ -158,7 +158,7 @@ public void testCreateTimestampedEmpty() {
   public void testCreateTimestampedPolymorphicType() throws Exception {
     thrown.expect(RuntimeException.class);
     thrown.expectMessage(
-        Matchers.containsString("unable to infer a default Coder"));
+        Matchers.containsString("Unable to infer a default Coder"));
 
     Pipeline p = TestPipeline.create();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 88d4bf43ac7e5..7ad1d503722e4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -27,7 +27,6 @@
 import static org.hamcrest.core.IsEqual.equalTo;
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
@@ -943,24 +942,23 @@ public void processElement(ProcessContext c) {
   }
 
   @Test
-  public void testSideOutputUnknownCoder() {
+  public void testSideOutputUnknownCoder() throws Exception {
     Pipeline pipeline = TestPipeline.create();
     PCollection<Integer> input = pipeline
         .apply(Create.of(Arrays.asList(1, 2, 3)));
 
-    // Expect a fail, but it should be a NoCoderException
     final TupleTag<Integer> mainTag = new TupleTag<Integer>();
     final TupleTag<TestDummy> sideTag = new TupleTag<TestDummy>();
     input.apply(ParDo.of(new SideOutputDummyFn(sideTag))
         .withOutputTags(mainTag, TupleTagList.of(sideTag)));
 
     thrown.expect(IllegalStateException.class);
-    thrown.expectMessage("unable to infer a default Coder");
+    thrown.expectMessage("Unable to infer a default Coder");
     pipeline.run();
   }
 
   @Test
-  public void testSideOutputUnregisteredExplicitCoder() {
+  public void testSideOutputUnregisteredExplicitCoder() throws Exception {
     Pipeline pipeline = TestPipeline.create();
     PCollection<Integer> input = pipeline
         .apply(Create.of(Arrays.asList(1, 2, 3)));
@@ -970,8 +968,6 @@ public void testSideOutputUnregisteredExplicitCoder() {
     PCollectionTuple outputTuple = input.apply(ParDo.of(new SideOutputDummyFn(sideTag))
         .withOutputTags(mainTag, TupleTagList.of(sideTag)));
 
-    assertNull(pipeline.getCoderRegistry().getDefaultCoder(TestDummy.class));
-
     outputTuple.get(sideTag).setCoder(new TestDummyCoder());
 
     outputTuple.get(sideTag).apply(View.<TestDummy>asSingleton());

From c522175f9a3a35f2bb581d40907eb616485a856b Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 24 Apr 2015 15:23:45 -0700
Subject: [PATCH 0478/1541] Remove use of Optional from public API of
 DataflowAssert ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=92019885

---
 .../dataflow/sdk/testing/DataflowAssert.java  | 28 +++----------------
 1 file changed, 4 insertions(+), 24 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 360407b6f02ae..51c25d3d7961e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -185,23 +185,14 @@ protected IterableAssert(PCollectionView<Iterable<T>> actualView) {
       coder = Optional.absent();
     }
 
-    /**
-     * Sets the coder to use for elements of type {@code T}, as needed
-     * for internal purposes.
-     */
-    public IterableAssert<T> setCoder(Coder<T> coder) {
-      this.coder = Optional.of(coder);
-      return this;
-    }
-
     /**
      * Sets the coder to use for elements of type {@code T}, as needed
      * for internal purposes.
      *
      * <p> Returns this {@code IterableAssert}.
      */
-    public IterableAssert<T> setCoder(Optional<Coder<T>> coder) {
-      this.coder = coder;
+    public IterableAssert<T> setCoder(Coder<T> coderOrNull) {
+      this.coder = Optional.fromNullable(coderOrNull);
       return this;
     }
 
@@ -287,19 +278,8 @@ protected SingletonAssert(PCollectionView<T> actualView) {
      * Sets the coder to use for elements of type {@code T}, as needed
      * for internal purposes.
      */
-    public SingletonAssert<T> setCoder(Coder<T> coder) {
-      this.coder = Optional.of(coder);
-      return this;
-    }
-
-    /**
-     * Sets the coder to use for elements of type {@code T}, as needed
-     * for internal purposes.
-     *
-     * <p> Returns this {@code SingletonAssert}.
-     */
-    public SingletonAssert<T> setCoder(Optional<Coder<T>> coder) {
-      this.coder = coder;
+    public SingletonAssert<T> setCoder(Coder<T> coderOrNull) {
+      this.coder = Optional.fromNullable(coderOrNull);
       return this;
     }
 

From 687c12e17ea68707d46120d43ce3952a44c95eef Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 24 Apr 2015 17:16:42 -0700
Subject: [PATCH 0479/1541] Make every PValue require a Pipeline at
 construction time.

This eliminates potential errors that required defensive coding, some unnecessary mutation, and the internal-only public method that performed the mutation.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92029165
---
 .../google/cloud/dataflow/sdk/Pipeline.java   | 26 +++++++-
 .../google/cloud/dataflow/sdk/io/AvroIO.java  |  6 +-
 .../cloud/dataflow/sdk/io/BigQueryIO.java     |  9 +--
 .../cloud/dataflow/sdk/io/PubsubIO.java       |  6 +-
 .../google/cloud/dataflow/sdk/io/Read.java    |  4 +-
 .../google/cloud/dataflow/sdk/io/TextIO.java  |  6 +-
 .../sdk/runners/DataflowPipelineRunner.java   |  1 +
 .../cloud/dataflow/sdk/transforms/Create.java |  4 +-
 .../dataflow/sdk/transforms/Flatten.java      |  4 +-
 .../dataflow/sdk/transforms/GroupByKey.java   |  3 +-
 .../cloud/dataflow/sdk/transforms/ParDo.java  |  5 +-
 .../cloud/dataflow/sdk/transforms/View.java   | 25 +++++---
 .../cloud/dataflow/sdk/transforms/Write.java  |  2 +-
 .../sdk/transforms/windowing/Window.java      |  2 +-
 .../cloud/dataflow/sdk/values/PBegin.java     |  6 +-
 .../dataflow/sdk/values/PCollection.java      | 18 ++----
 .../dataflow/sdk/values/PCollectionList.java  |  9 +--
 .../dataflow/sdk/values/PCollectionTuple.java | 16 ++---
 .../cloud/dataflow/sdk/values/PDone.java      | 14 +++-
 .../cloud/dataflow/sdk/values/PInput.java     | 22 +++----
 .../cloud/dataflow/sdk/values/POutput.java    | 28 ++++----
 .../dataflow/sdk/values/POutputValueBase.java | 24 +++++--
 .../cloud/dataflow/sdk/values/PValue.java     |  7 +-
 .../cloud/dataflow/sdk/values/PValueBase.java | 64 +++++--------------
 .../dataflow/sdk/values/TypedPValue.java      | 18 +-----
 .../runners/DataflowPipelineRunnerTest.java   |  4 +-
 .../DataflowPipelineTranslatorTest.java       |  7 +-
 .../sdk/runners/TransformTreeTest.java        |  3 +-
 .../cloud/dataflow/sdk/values/PDoneTest.java  |  2 +-
 29 files changed, 183 insertions(+), 162 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index d84b785905170..a90219b784db0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -286,7 +286,7 @@ Output applyInternal(Input input,
       transforms.setOutput(child, output);
 
       // recordAsOutput is a NOOP if already called;
-      output.recordAsOutput(this, child.getTransform());
+      output.recordAsOutput(child.getTransform());
       verifyOutputState(output, child);
       return output;
     } finally {
@@ -314,18 +314,41 @@ Output applyInternal(Input input,
    *
    * <p> A non-composite transform must have all
    * of its outputs registered as produced by the transform.
+   *
+   * <p> A composite transform must have all of its outputs
+   * registered as produced by the contains primitive transforms.
+   * They have each had the above check performed already, when
+   * they were applied, so the only possible failure state is
+   * that the composite transform has returned a primitive output.
    */
   private void verifyOutputState(POutput output, TransformTreeNode node) {
     if (!node.isCompositeNode()) {
       PTransform<?, ?> thisTransform = node.getTransform();
       List<PTransform<?, ?>> producingTransforms = getProducingTransforms(output);
       for (PTransform<?, ?> producingTransform : producingTransforms) {
+
+        // Using != because object identity indicates that the transforms
+        // are the same node in the pipeline
         if (thisTransform != producingTransform) {
           throw new IllegalArgumentException("Output of non-composite transform "
               + thisTransform + " is registered as being produced by"
               + " a different transform: " + producingTransform);
         }
       }
+    } else {
+      PTransform<?, ?> thisTransform = node.getTransform();
+      List<PTransform<?, ?>> producingTransforms = getProducingTransforms(output);
+      for (PTransform<?, ?> producingTransform : producingTransforms) {
+
+        // Using == because object identity indicates that the transforms
+        // are the same node in the pipeline
+        if (thisTransform == producingTransform) {
+          throw new IllegalStateException("Output of composite transform "
+              + thisTransform + " is registered as being produced by it,"
+              + " but the output of every composite transform should be"
+              + " produced by a primitive transform contained therein.");
+        }
+      }
     }
   }
 
@@ -414,7 +437,6 @@ private String buildName(String namePrefix, String name) {
    */
   public void addValueInternal(PValue value) {
     this.values.add(value);
-    value.setPipelineInternal(this);
     LOG.debug("Adding {} to {}", value, this);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index f30b343100cb6..6effc30ca4ccf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -290,7 +290,9 @@ public PCollection<T> apply(PInput input) {
         // Force the output's Coder to be what the read is using, and
         // unchangeable later, to ensure that we read the input in the
         // format specified by the Read transform.
-        return PCollection.<T>createPrimitiveOutputInternal(WindowingStrategy.globalDefault())
+        return PCollection.<T>createPrimitiveOutputInternal(
+            input.getPipeline(),
+            WindowingStrategy.globalDefault())
             .setCoder(getDefaultOutputCoder());
       }
 
@@ -621,7 +623,7 @@ public PDone apply(PCollection<T> input) {
           throw new IllegalStateException("need to set the schema of an AvroIO.Write transform");
         }
 
-        return new PDone();
+        return PDone.in(input.getPipeline());
       }
 
       /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 11d6120f0534a..dad94594466f3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -290,6 +290,7 @@ public PCollection<TableRow> apply(PInput input) {
               "must set the table reference of a BigQueryIO.Read transform");
         }
         return PCollection.<TableRow>createPrimitiveOutputInternal(
+            input.getPipeline(),
             WindowingStrategy.globalDefault())
             // Force the output's Coder to be what the read is using, and
             // unchangeable later, to ensure that we read the input in the
@@ -587,7 +588,7 @@ public PDone apply(PCollection<TableRow> input) {
           return input.apply(new StreamWithDeDup(table, schema));
         }
 
-        return new PDone();
+        return PDone.in(input.getPipeline());
       }
 
       @Override
@@ -798,7 +799,7 @@ protected Coder<Void> getDefaultOutputCoder() {
     }
 
     @Override
-    public PDone apply(PCollection<TableRow> in) {
+    public PDone apply(PCollection<TableRow> input) {
       // A naive implementation would be to simply stream data directly to BigQuery.
       // However, this could occassionally lead to duplicated data, e.g., when
       // a VM that runs this code is restarted and the code is re-run.
@@ -810,7 +811,7 @@ public PDone apply(PCollection<TableRow> in) {
       // unique id, which is then passed to BigQuery and used to ignore duplicates.
 
       PCollection<KV<Integer, KV<String, TableRow>>> tagged =
-          in.apply(ParDo.of(new TagWithUniqueIds()));
+          input.apply(ParDo.of(new TagWithUniqueIds()));
 
       // To prevent having the same TableRow processed more than once with regenerated
       // different unique ids, this implementation relies on "checkpointing", which is
@@ -823,7 +824,7 @@ public PDone apply(PCollection<TableRow> in) {
       // input, the transform may not necessarily be executed after
       // the BigQueryIO.Write.
 
-      return new PDone();
+      return PDone.in(input.getPipeline());
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 36e2e268fefbc..26b5c1d02981e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -368,7 +368,9 @@ public PCollection<T> apply(PInput input) {
               "Can't set both the topic and the subscription for a "
               + "PubsubIO.Read transform");
         }
-        return PCollection.<T>createPrimitiveOutputInternal(WindowingStrategy.globalDefault())
+        return PCollection.<T>createPrimitiveOutputInternal(
+                input.getPipeline(),
+                WindowingStrategy.globalDefault())
             .setCoder(coder);
       }
 
@@ -543,7 +545,7 @@ public PDone apply(PCollection<T> input) {
           throw new IllegalStateException(
               "need to set the topic of a PubsubIO.Write transform");
         }
-        return new PDone();
+        return PDone.in(input.getPipeline());
       }
 
       @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
index 3ba2fb62e9b25..7dedf599c15db 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
@@ -99,7 +99,9 @@ protected Coder<T> getDefaultOutputCoder() {
     public final PCollection<T> apply(PInput input) {
       Preconditions.checkNotNull(source, "source must be set");
       source.validate();
-      return PCollection.<T>createPrimitiveOutputInternal(WindowingStrategy.globalDefault())
+      return PCollection.<T>createPrimitiveOutputInternal(
+          input.getPipeline(),
+          WindowingStrategy.globalDefault())
           .setCoder(getDefaultOutputCoder());
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index ad5d0664aa054..684344e1a9f84 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -278,7 +278,9 @@ public PCollection<T> apply(PInput input) {
         // Force the output's Coder to be what the read is using, and
         // unchangeable later, to ensure that we read the input in the
         // format specified by the Read transform.
-        return PCollection.<T>createPrimitiveOutputInternal(WindowingStrategy.globalDefault())
+        return PCollection.<T>createPrimitiveOutputInternal(
+                input.getPipeline(),
+                WindowingStrategy.globalDefault())
             .setCoder(coder);
       }
 
@@ -575,7 +577,7 @@ public PDone apply(PCollection<T> input) {
           throw new IllegalStateException(
               "need to set the filename prefix of a TextIO.Write transform");
         }
-        return new PDone();
+        return PDone.in(input.getPipeline());
       }
 
       /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 5ecee9bdf7f4a..29755f61aa530 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -145,6 +145,7 @@ public <Output extends POutput, Input extends PInput> Output apply(
     if (transform instanceof Combine.GroupedValues) {
       // TODO: Redundant with translator registration?
       return (Output) PCollection.createPrimitiveOutputInternal(
+          input.getPipeline(),
           ((PCollection<?>) input).getWindowingStrategy());
     } else if (transform instanceof GroupByKey) {
       // The DataflowPipelineRunner implementation of GroupByKey will sort values by timestamp,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index a03d5ab2f43f5..2434d8910077f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -221,7 +221,9 @@ public void processElement(DoFn<String, KV<Void, Void>>.ProcessContext c)
           }))
           .apply(ParDo.of(new OutputOnceDoFn<>(elems, elemCoder)));
     } else {
-      return PCollection.<T>createPrimitiveOutputInternal(WindowingStrategy.globalDefault());
+      return PCollection.<T>createPrimitiveOutputInternal(
+          input.getPipeline(),
+          WindowingStrategy.globalDefault());
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index 399459eaba181..34737e0e48a82 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -129,7 +129,9 @@ public PCollection<T> apply(PCollectionList<T> inputs) {
         windowingStrategy = WindowingStrategy.globalDefault();
       }
 
-      return PCollection.<T>createPrimitiveOutputInternal(windowingStrategy);
+      return PCollection.<T>createPrimitiveOutputInternal(
+          inputs.getPipeline(),
+          windowingStrategy);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 1588c3c567935..4d0eb19aef65b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -330,7 +330,8 @@ public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
 
       // We also return to the default trigger.
       WindowingStrategy<?, ?> newWindowingStrategy = WindowingStrategy.of(newWindowFn);
-      return PCollection.<KV<K, Iterable<V>>>createPrimitiveOutputInternal(newWindowingStrategy);
+      return PCollection.<KV<K, Iterable<V>>>createPrimitiveOutputInternal(
+          input.getPipeline(), newWindowingStrategy);
     }
 
     /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index f41530555ba5e..0e6f572375bde 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -709,7 +709,9 @@ public PCollection<O> apply(PCollection<? extends I> input) {
       if (sideInputs == null) {
         sideInputs = Collections.emptyList();
       }
-      return PCollection.<O>createPrimitiveOutputInternal(input.getWindowingStrategy())
+      return PCollection.<O>createPrimitiveOutputInternal(
+              input.getPipeline(),
+              input.getWindowingStrategy())
           .setTypeTokenInternal(fn.getOutputTypeToken());
     }
 
@@ -912,6 +914,7 @@ public BoundMulti<I, O> withSideInputs(
     @Override
     public PCollectionTuple apply(PCollection<? extends I> input) {
       PCollectionTuple outputs = PCollectionTuple.ofPrimitiveOutputsInternal(
+          input.getPipeline(),
           TupleTagList.of(mainOutputTag).and(sideOutputTags.getAll()),
           input.getWindowingStrategy());
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index f80ba09b4c5ec..ad44531f60035 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -411,8 +411,7 @@ private static class SingletonPCollectionView<T>
     public SingletonPCollectionView(
         Pipeline pipeline, WindowingStrategy<?, ?> windowingStrategy,
         boolean hasDefault, T defaultValue, Coder<T> valueCoder) {
-      super(windowingStrategy, valueCoder);
-      setPipelineInternal(pipeline);
+      super(pipeline, windowingStrategy, valueCoder);
       this.defaultValue = defaultValue;
       this.valueCoder = valueCoder;
       if (hasDefault) {
@@ -457,8 +456,7 @@ private static class IterablePCollectionView<T>
 
     public IterablePCollectionView(
         Pipeline pipeline, WindowingStrategy<?, ?> windowingStrategy, Coder<T> valueCoder) {
-      super(windowingStrategy, valueCoder);
-      setPipelineInternal(pipeline);
+      super(pipeline, windowingStrategy, valueCoder);
     }
 
     @Override
@@ -479,8 +477,7 @@ private static class MultimapPCollectionView<K, V>
 
     public MultimapPCollectionView(
         Pipeline pipeline, WindowingStrategy<?, ?> windowingStrategy, Coder<KV<K, V>> valueCoder) {
-      super(windowingStrategy, valueCoder);
-      setPipelineInternal(pipeline);
+      super(pipeline, windowingStrategy, valueCoder);
     }
 
     @Override
@@ -502,8 +499,7 @@ private static class MapPCollectionView<K, V>
 
     public MapPCollectionView(
         Pipeline pipeline, WindowingStrategy<?, ?> windowingStrategy, Coder<KV<K, V>> valueCoder) {
-      super(windowingStrategy, valueCoder);
-      setPipelineInternal(pipeline);
+      super(pipeline, windowingStrategy, valueCoder);
     }
 
     @Override
@@ -525,7 +521,18 @@ private abstract static class PCollectionViewBase<T>
       implements PCollectionView<T> {
     private static final long serialVersionUID = 0;
 
-    PCollectionViewBase(WindowingStrategy<?, ?> windowingStrategy, Coder<?> valueCoder) {
+    // for serialization only
+    protected PCollectionViewBase() {
+      super();
+    }
+
+    protected PCollectionViewBase(
+        Pipeline pipeline,
+        WindowingStrategy<?, ?> windowingStrategy,
+        Coder<?> valueCoder) {
+
+      super(pipeline);
+
       if (windowingStrategy.getWindowFn() instanceof InvalidWindows) {
         throw new IllegalArgumentException("WindowFn of PCollectionView cannot be InvalidWindows");
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
index b1c2eb67a61e6..8d5222480e501 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
@@ -201,7 +201,7 @@ public void processElement(ProcessContext c) throws Exception {
               writeOperation.finalize(results, c.getPipelineOptions());
             }
           }).withSideInputs(resultsView));
-      return new PDone();
+      return PDone.in(input.getPipeline());
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 0bd497424d6ca..1bc2fe96b3fbb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -259,7 +259,7 @@ public Triggering<T> triggering(Trigger<?> trigger) {
 
     @Override
     public PCollection<T> apply(PCollection<T> input) {
-      return PCollection.<T>createPrimitiveOutputInternal(windowingStrategy);
+      return PCollection.<T>createPrimitiveOutputInternal(input.getPipeline(), windowingStrategy);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
index a700a64b38024..b006e77c47616 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
@@ -40,8 +40,8 @@ public static PBegin in(Pipeline pipeline) {
   }
 
   /**
-   * Applies the given PTransform to this input PBegin, and
-   * returns the PTransform's Output.
+   * Applies the given {@link PTransform} to this input {@code PBegin}, and
+   * returns the {@link PTransform}'s Output.
    */
   public <Output extends POutput> Output apply(
       PTransform<? super PBegin, Output> t) {
@@ -73,5 +73,5 @@ protected PBegin(Pipeline pipeline) {
     this.pipeline = pipeline;
   }
 
-  private Pipeline pipeline;
+  private final Pipeline pipeline;
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
index 24afe327988ae..785b48ee91ff2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
@@ -146,7 +146,9 @@ public <Output extends POutput> Output apply(PTransform<? super PCollection<T>,
    */
   private WindowingStrategy<?, ?> windowingStrategy;
 
-  private PCollection() {}
+  private PCollection(Pipeline p) {
+    super(p);
+  }
 
   /**
    * Sets the {@code TypeToken<T>} for this {@code PCollection<T>}, so that
@@ -170,24 +172,14 @@ public PCollection<T> setWindowingStrategyInternal(WindowingStrategy<?, ?> windo
      return this;
   }
 
-  /**
-   * Sets the {@link Pipeline} for this {@code PCollection}.
-   *
-   * <p> For use by primitive transformations only.
-   */
-  @Override
-  public PCollection<T> setPipelineInternal(Pipeline pipeline) {
-    super.setPipelineInternal(pipeline);
-    return this;
-  }
-
   /**
    * Creates and returns a new PCollection for a primitive output.
    *
    * <p> For use by primitive transformations only.
    */
   public static <T> PCollection<T> createPrimitiveOutputInternal(
+      Pipeline pipeline,
       WindowingStrategy<?, ?> windowingStrategy) {
-    return new PCollection<T>().setWindowingStrategyInternal(windowingStrategy);
+    return new PCollection<T>(pipeline).setWindowingStrategyInternal(windowingStrategy);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
index 8f6f018142379..50b594bc0e359 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
@@ -198,15 +198,10 @@ public Collection<? extends PValue> expand() {
   }
 
   @Override
-  public void recordAsOutput(Pipeline pipeline,
-                             PTransform<?, ?> transform) {
-    if (this.pipeline != null && this.pipeline != pipeline) {
-      throw new AssertionError(
-          "not expecting to change the Pipeline owning a PCollectionList");
-    }
+  public void recordAsOutput(PTransform<?, ?> transform) {
     int i = 0;
     for (PCollection<T> pc : pcollections) {
-      pc.recordAsOutput(pipeline, transform, "out" + i);
+      pc.recordAsOutput(transform, "out" + i);
       i++;
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
index 967bf46cd22d8..20c15d84f8b22 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
@@ -183,7 +183,9 @@ public <Output extends POutput> Output apply(
    * <p> For use by primitive transformations only.
    */
   public static PCollectionTuple ofPrimitiveOutputsInternal(
-      TupleTagList outputTags, WindowingStrategy<?, ?> windowingStrategy) {
+      Pipeline pipeline,
+      TupleTagList outputTags,
+      WindowingStrategy<?, ?> windowingStrategy) {
     Map<TupleTag<?>, PCollection<?>> pcollectionMap = new LinkedHashMap<>();
     for (TupleTag<?> outputTag : outputTags.tupleTags) {
       if (pcollectionMap.containsKey(outputTag)) {
@@ -200,7 +202,7 @@ public static PCollectionTuple ofPrimitiveOutputsInternal(
       @SuppressWarnings("unchecked")
       TypeToken<Object> token = (TypeToken<Object>) outputTag.getTypeToken();
       PCollection<Object> outputCollection = PCollection
-          .createPrimitiveOutputInternal(windowingStrategy)
+          .createPrimitiveOutputInternal(pipeline, windowingStrategy)
           .setTypeTokenInternal(token);
 
       pcollectionMap.put(outputTag, outputCollection);
@@ -219,19 +221,13 @@ public Collection<? extends PValue> expand() {
   }
 
   @Override
-  public void recordAsOutput(Pipeline pipeline,
-                             PTransform<?, ?> transform) {
-    if (this.pipeline != null && this.pipeline != pipeline) {
-      throw new AssertionError(
-          "not expecting to change the Pipeline owning a PCollectionTuple");
-    }
-    this.pipeline = pipeline;
+  public void recordAsOutput(PTransform<?, ?> transform) {
     int i = 0;
     for (Map.Entry<TupleTag<?>, PCollection<?>> entry
              : pcollectionMap.entrySet()) {
       TupleTag<?> tag = entry.getKey();
       PCollection<?> pc = entry.getValue();
-      pc.recordAsOutput(pipeline, transform, tag.getOutName(i));
+      pc.recordAsOutput(transform, tag.getOutName(i));
       i++;
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PDone.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PDone.java
index c8041f7481e30..f6a2af3e4cf3a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PDone.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PDone.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.values;
 
+import com.google.cloud.dataflow.sdk.Pipeline;
+
 import java.util.Collection;
 import java.util.Collections;
 
@@ -26,11 +28,21 @@
  * transforms can be applied to it.
  */
 public class PDone extends POutputValueBase {
-  public PDone() {}
+
+  /**
+   * Creates a {@code PDone} in the given {@code Pipeline}.
+   */
+  public static PDone in(Pipeline pipeline) {
+    return new PDone(pipeline);
+  }
 
   @Override
   public Collection<? extends PValue> expand() {
     // A PDone contains no PValues.
     return Collections.emptyList();
   }
+
+  private PDone(Pipeline pipeline) {
+    super(pipeline);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java
index 8834c2d08a1b5..aa87fef5abd22 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java
@@ -21,37 +21,33 @@
 import java.util.Collection;
 
 /**
- * The abstract interface of things that might be input to a
+ * The interface for things that might be input to a
  * {@link com.google.cloud.dataflow.sdk.transforms.PTransform}.
  */
 public interface PInput {
   /**
-   * Returns the owning Pipeline of this PInput.
-   *
-   * @throws IllegalStateException if the owning Pipeline hasn't been
-   * set yet
+   * Returns the owning {@link Pipeline} of this {@code PInput}.
    */
   public Pipeline getPipeline();
 
   /**
-   * Expands this PInput into a list of its component input PValues.
+   * Expands this {@code PInput} into a list of its component input {@link PValue}s.
    *
-   * <p> A PValue expands to itself.
+   * <p> A {@link PValue} expands to itself.
    *
-   * <p> A tuple or list of PValues (e.g.,
-   * PCollectionTuple, and PCollectionList) expands to its component
-   * PValues.
+   * <p> A tuple or list of {@link PValue}s (e.g., {@link PCollectionTuple},
+   * and {@link PCollectionList}) expands to its component {@link PValue}s.
    *
    * <p> Not intended to be invoked directly by user code.
    */
   public Collection<? extends PValue> expand();
 
   /**
-   * <p> After building, finalizes this PInput to make it ready for
-   * being used as an input to a PTransform.
+   * <p> After building, finalizes this {@code PInput} to make it ready for
+   * being used as an input to a {@link PTransform}.
    *
    * <p> Automatically invoked whenever {@code apply()} is invoked on
-   * this PInput, so users do not normally call this explicitly.
+   * this {@code PInput}, so users do not normally call this explicitly.
    */
   public void finishSpecifying();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
index 521a1a1ec34f5..358d0f3777bbf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
@@ -22,10 +22,15 @@
 import java.util.Collection;
 
 /**
- * The abstract interface of things that might be output from a
- * {@link PTransform}.
+ * The interface for things that might be output from a {@link PTransform}.
  */
 public interface POutput {
+
+  /**
+   * Returns the owning {@link Pipeline} of this {@code POutput}.
+   */
+  public Pipeline getPipeline();
+
   /**
    * Expands this {@code POutput} into a list of its component output
    * {@code PValue}s.
@@ -42,17 +47,16 @@ public interface POutput {
 
   /**
    * Records that this {@code POutput} is an output of the given
-   * {@code PTransform} in the given {@code Pipeline}.
+   * {@code PTransform}.
    *
-   * <p> Should expand this {@code POutput} and invoke
-   * {@link PValue#recordAsOutput(Pipeline, com.google.cloud.dataflow.sdk.transforms.PTransform)}
-   * on each component output {@code PValue}.
+   * <p> For a compound {@code POutput}, it is advised to call
+   * this method on each component {@code POutput}.
    *
-   * <p> Automatically invoked as part of applying a
-   * {@code PTransform}.  Not to be invoked directly by user code.
+   * <p> This is not intended to be invoked by user code, but
+   * is automatically invoked as part of applying the
+   * producing {@code PTransform}.
    */
-  public void recordAsOutput(Pipeline pipeline,
-                             PTransform<?, ?> transform);
+  public void recordAsOutput(PTransform<?, ?> transform);
 
   /**
    * As part of finishing the producing {@code PTransform}, finalizes this
@@ -64,8 +68,8 @@ public void recordAsOutput(Pipeline pipeline,
    *
    * <p> Automatically invoked whenever this {@code POutput} is used
    * as a {@code PInput} to another {@code PTransform}, or if never
-   * used as a {@code PInput}, when {@link Pipeline#run} is called, so
-   * users do not normally call this explicitly.
+   * used as a {@code PInput}, when {@link Pipeline#run}
+   * is called, so users do not normally call this explicitly.
    */
   public void finishSpecifyingOutput();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
index c2e1efcaa6eda..baaa96f9c8c52 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
@@ -30,7 +30,25 @@
  */
 public abstract class POutputValueBase implements POutput {
 
-  protected POutputValueBase() { }
+  private final Pipeline pipeline;
+
+  protected POutputValueBase(Pipeline pipeline) {
+    this.pipeline = pipeline;
+  }
+
+  /**
+   * No-arg constructor for Java serialization only.
+   * The resulting {@code POutputValueBase} is unlikely to be
+   * valid.
+   */
+  protected POutputValueBase() {
+    pipeline = null;
+  }
+
+  @Override
+  public Pipeline getPipeline() {
+    return pipeline;
+  }
 
   /**
    * Returns the {@code PTransform} that this {@code POutputValueBase}
@@ -45,13 +63,11 @@ protected POutputValueBase() { }
   /**
    * Records that this {@code POutputValueBase} is an output with the
    * given name of the given {@code PTransform} in the given
-   * {@code Pipeline}.
    *
    * <p> To be invoked only by {@link POutput#recordAsOutput}
    * implementations.  Not to be invoked directly by user code.
    */
-  public void recordAsOutput(Pipeline pipeline,
-                             PTransform<?, ?> transform) {
+  public void recordAsOutput(PTransform<?, ?> transform) {
     if (producingTransform != null) {
       // Already used this POutput as a PTransform output.  This can
       // happen if the POutput is an output of a transform within a
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java
index e9dd0790a5b14..802c27c7f1308 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.values;
 
-import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 
 /**
@@ -24,9 +23,11 @@
  * input and output from {@link PTransform}s.
  */
 public interface PValue extends POutput, PInput {
-  public String getName();
 
-  public PValue setPipelineInternal(Pipeline pipeline);
+  /**
+   * Returns the name of this {@code PValue}.
+   */
+  public String getName();
 
   /**
    * Returns the {@code PTransform} that this {@code PValue} is an output of.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
index 9b500c3bc37c1..be9f81a6abe5f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
@@ -73,18 +73,23 @@ public PValueBase setName(String name) {
 
   /////////////////////////////////////////////////////////////////////////////
 
-  protected PValueBase() {}
+  protected PValueBase(Pipeline pipeline) {
+    super(pipeline);
+  }
 
   /**
-   * The name of this {@code PValueBase}, or null if not yet set.
+   * No-arg constructor for Java serialization only.
+   * The resulting {@code PValueBase} is unlikely to be
+   * valid.
    */
-  private String name;
+  protected PValueBase() {
+    super();
+  }
 
   /**
-   * The {@code Pipeline} that owns this {@code PValueBase}, or null
-   * if not yet set.
+   * The name of this {@code PValueBase}, or null if not yet set.
    */
-  private Pipeline pipeline;
+  private String name;
 
   /**
    * Whether this {@code PValueBase} has been finalized, and its core
@@ -92,45 +97,9 @@ protected PValueBase() {}
    */
   private boolean finishedSpecifying = false;
 
-
-  /**
-   * Returns the owning {@code Pipeline} of this {@code PValueBase}.
-   *
-   * @throws IllegalStateException if the owning {@code Pipeline}
-   * hasn't been set yet
-   */
-  @Override
-  public Pipeline getPipeline() {
-    if (pipeline == null) {
-      throw new IllegalStateException("owning pipeline not set");
-    }
-    return pipeline;
-  }
-
-  /**
-   * Sets the owning {@code Pipeline} of this {@code PValueBase}.
-   * Returns {@code this}.
-   *
-   * <p> For internal use only.
-   *
-   * @throws IllegalArgumentException if the owner has already been set
-   * differently
-   */
-  @Override
-  public PValue setPipelineInternal(Pipeline pipeline) {
-    if (this.pipeline != null
-        && this.pipeline != pipeline) {
-      throw new IllegalArgumentException(
-          "owning pipeline cannot be changed once set");
-    }
-    this.pipeline = pipeline;
-    return this;
-  }
-
   @Override
-  public void recordAsOutput(Pipeline pipeline,
-                             PTransform<?, ?> transform) {
-    recordAsOutput(pipeline, transform, "out");
+  public void recordAsOutput(PTransform<?, ?> transform) {
+    recordAsOutput(transform, "out");
   }
 
   /**
@@ -141,12 +110,11 @@ public void recordAsOutput(Pipeline pipeline,
    * <p> To be invoked only by {@link POutput#recordAsOutput}
    * implementations.  Not to be invoked directly by user code.
    */
-  protected void recordAsOutput(Pipeline pipeline,
-                             PTransform<?, ?> transform,
+  protected void recordAsOutput(PTransform<?, ?> transform,
                              String outName) {
-    super.recordAsOutput(pipeline, transform);
+    super.recordAsOutput(transform);
     if (name == null) {
-      name = pipeline.getFullName(transform) + "." + outName;
+      name = getPipeline().getFullName(transform) + "." + outName;
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
index 2d7561d0349c2..2b8788dab60a9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
@@ -76,20 +76,6 @@ public TypedPValue<T> setCoder(Coder<T> coder) {
     return this;
   }
 
-  @Override
-  public void recordAsOutput(Pipeline pipeline,
-                             PTransform<?, ?> transform,
-                             String outName) {
-    super.recordAsOutput(pipeline, transform, outName);
-    pipeline.addValueInternal(this);
-  }
-
-  @Override
-  public TypedPValue<T> setPipelineInternal(Pipeline pipeline) {
-    super.setPipelineInternal(pipeline);
-    return this;
-  }
-
   /**
    * After building, finalizes this PValue to make it ready for
    * running.  Automatically invoked whenever the PValue is "used"
@@ -113,7 +99,9 @@ public void finishSpecifying() {
    */
   private Coder<T> coder;
 
-  protected TypedPValue() {}
+  protected TypedPValue(Pipeline p) {
+    super(p);
+  }
 
   private TypeToken<T> typeToken;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index cad4e8f45feb0..2fecd21d80942 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -438,7 +438,9 @@ public static class TestTransform
 
     @Override
     public PCollection<Integer> apply(PCollection<Integer> input) {
-      return PCollection.<Integer>createPrimitiveOutputInternal(WindowingStrategy.globalDefault());
+      return PCollection.<Integer>createPrimitiveOutputInternal(
+          input.getPipeline(),
+          WindowingStrategy.globalDefault());
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 21bec75d4f556..4d381a390af6e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -331,7 +331,9 @@ public EmbeddedTransform(Step step) {
 
     @Override
     public PCollection<String> apply(PCollection<String> input) {
-      return PCollection.createPrimitiveOutputInternal(WindowingStrategy.globalDefault());
+      return PCollection.createPrimitiveOutputInternal(
+          input.getPipeline(),
+          WindowingStrategy.globalDefault());
     }
 
     @Override
@@ -386,7 +388,7 @@ public PDone apply(PCollection<Integer> input) {
       // Apply an operation so that this is a composite transform.
       input.apply(Count.<Integer>perElement());
 
-      return new PDone();
+      return PDone.in(input.getPipeline());
     }
 
     @Override
@@ -413,6 +415,7 @@ public PCollectionTuple apply(PCollection<Integer> input) {
       // Fails here when attempting to construct a tuple with an unbound object.
       return PCollectionTuple.of(sumTag, sum)
           .and(doneTag, PCollection.<Void>createPrimitiveOutputInternal(
+              input.getPipeline(),
               WindowingStrategy.globalDefault()));
     }
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
index a189448f6582a..52af69f8cb875 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
@@ -75,6 +75,7 @@ public PCollectionList<String> apply(PBegin b) {
       // from within a composite transform.
       return PCollectionList.of(
           Arrays.asList(result, PCollection.<String>createPrimitiveOutputInternal(
+              b.getPipeline(),
               WindowingStrategy.globalDefault())));
     }
   }
@@ -90,7 +91,7 @@ public PDone apply(PCollection<Integer> input) {
       // Apply an operation so that this is a composite transform.
       input.apply(Count.<Integer>perElement());
 
-      return new PDone();
+      return PDone.in(input.getPipeline());
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java
index f00b8a436b183..856477bc30aff 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java
@@ -49,7 +49,7 @@ public class PDoneTest {
   static class EmptyTransform extends PTransform<PBegin, PDone> {
     @Override
     public PDone apply(PBegin begin) {
-      return new PDone();
+      return PDone.in(begin.getPipeline());
     }
   }
 

From c52f4329b6af75d5fb22957c1d87efaded75a499 Mon Sep 17 00:00:00 2001
From: earhart <earhart@google.com>
Date: Mon, 27 Apr 2015 15:16:29 -0700
Subject: [PATCH 0480/1541] Update the Dataflow SDK to support the current API
 tracing infrastructure. ----Release Notes---- [] ------------- Created by
 MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=92188196

---
 .../sdk/options/GoogleApiDebugOptions.java    | 67 ++++++++++---------
 .../options/GoogleApiDebugOptionsTest.java    | 31 ++++-----
 2 files changed, 51 insertions(+), 47 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
index 12654954e07d3..a0eaf586454f4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
@@ -34,85 +34,88 @@
 public interface GoogleApiDebugOptions extends PipelineOptions {
   /**
    * This option enables tracing of API calls to Google services used within the Dataflow SDK.
-   * A tracing token must be requested from Google to be able to use this option.
-   * An invalid tracing token will result in 400 errors from Google when the API is invoked.
    */
   @Description("This option enables tracing of API calls to Google services used within the "
-      + "Dataflow SDK. Values are expected in the format \"ApiName#TracingToken\" where the "
-      + "ApiName represents the request classes canonical name. The TracingToken must be requested "
-      + "from Google to be able to use this option. An invalid tracing token will result in HTTP "
-      + "400 errors from Google when the API is invoked. Note, that by enabling this option, the "
-      + "contents of the requests to and from Google Cloud services will be made available to "
-      + "Google. For example, by specifiying \"Dataflow#TracingToken\", all calls to the Dataflow "
-      + "service will be made available to Google.")
+      + "Dataflow SDK. Values are expected in the format \"ApiName#TraceDestination\" where the "
+      + "ApiName represents the request classes canonical name. The TraceDestination is a "
+      + "logical trace consumer to whom the trace will be reported. Typically, \"producer\" is "
+      + "the right destination to use: this makes API traces available to the team offering the "
+      + "API. Note that by enabling this option, the contents of the requests to and from "
+      + "Google Cloud services will be made available to Google. For example, by specifying "
+      + "\"Dataflow#producer\", all calls to the Dataflow service will be made available to "
+      + "Google, specifically to the Google Cloud Dataflow team.")
   GoogleApiTracer[] getGoogleApiTrace();
   void setGoogleApiTrace(GoogleApiTracer... commands);
 
   /**
-   * A {@link GoogleClientRequestInitializer} that adds the 'trace' token to Google API calls.
+   * A {@link GoogleClientRequestInitializer} that adds the trace destination to Google API calls.
    */
   public static class GoogleApiTracer implements GoogleClientRequestInitializer {
     private static final Pattern COMMAND_LINE_PATTERN = Pattern.compile("([^#]*)#(.*)");
     /**
-     * Creates a {@link GoogleApiTracer} that sets the trace {@code token} on all
+     * Creates a {@link GoogleApiTracer} that sets the trace destination on all
      * calls that match the given client type.
      */
-    public static GoogleApiTracer create(AbstractGoogleClient client, String token) {
-      return new GoogleApiTracer(client.getClass().getCanonicalName(), token);
+    public static GoogleApiTracer create(AbstractGoogleClient client, String traceDestination) {
+      return new GoogleApiTracer(client.getClass().getCanonicalName(), traceDestination);
     }
 
     /**
-     * Creates a {@link GoogleApiTracer} that sets the trace {@code token} on all
+     * Creates a {@link GoogleApiTracer} that sets the trace {@code traceDestination} on all
      * calls that match for the given request type.
      */
-    public static GoogleApiTracer create(AbstractGoogleClientRequest<?> request, String token) {
-      return new GoogleApiTracer(request.getClass().getCanonicalName(), token);
+    public static GoogleApiTracer create(
+        AbstractGoogleClientRequest<?> request, String traceDestination) {
+      return new GoogleApiTracer(request.getClass().getCanonicalName(), traceDestination);
     }
 
     /**
-     * Creates a {@link GoogleClientRequestInitializer} that adds the trace token
+     * Creates a {@link GoogleClientRequestInitializer} that adds the trace destination
      * based upon the passed in value.
      * <p>
-     * The {@code value} represents a string containing {@code ApiName#TracingToken}.
-     * The {@code ApiName} is used to match against the request classes
-     * {@link Class#getCanonicalName() canonical name} for which to add the {@code TracingToken} to.
+     * The {@code value} represents a string containing {@code ApiName#TraceDestination}.
+     * The {@code ApiName} is used to match against the request class
+     * {@link Class#getCanonicalName() canonical name} to determine the requests to which the
+     * {@code TraceDestination} should be added.
+     * <p>
      * For example, to match:
      * <ul>
-     *   <li>all Google API calls: {@code #TracingToken}
-     *   <li>all Dataflow API calls: {@code Dataflow#TracingToken}
-     *   <li>all Dataflow V1B3 API calls: {@code Dataflow.V1b3#TracingToken}
-     *   <li>all Dataflow V1B3 Jobs API calls: {@code Dataflow.V1b3.Projects.Jobs#TracingToken}
-     *   <li>all Dataflow V1B3 Jobs Get calls: {@code Dataflow.V1b3.Projects.Jobs.Get#TracingToken}
-     *   <li>all Job creation calls in any version: {@code Jobs.Create#TracingToken}
+     *   <li>all Google API calls: {@code #TraceDestination}
+     *   <li>all Dataflow API calls: {@code Dataflow#TraceDestination}
+     *   <li>all Dataflow V1B3 API calls: {@code Dataflow.V1b3#TraceDestination}
+     *   <li>all Dataflow V1B3 Jobs API calls: {@code Dataflow.V1b3.Projects.Jobs#TraceDestination}
+     *   <li>all Dataflow V1B3 Jobs Get calls:
+     *       {@code Dataflow.V1b3.Projects.Jobs.Get#TraceDestination}
+     *   <li>all Job creation calls in any version: {@code Jobs.Create#TraceDestination}
      * </ul>
      */
     @JsonCreator
     public static GoogleApiTracer create(String value) {
       Matcher matcher = COMMAND_LINE_PATTERN.matcher(value);
       Preconditions.checkArgument(matcher.find() && matcher.groupCount() == 2,
-          "Unable to parse '%s', expected format 'ClientRequestName#Token'", value);
+          "Unable to parse '%s', expected format 'ClientRequestName#TraceDestination'", value);
       return new GoogleApiTracer(matcher.group(1), matcher.group(2));
     }
 
     private final String clientRequestName;
-    private final String token;
+    private final String traceDestination;
 
-    private GoogleApiTracer(String clientRequestName, String token) {
+    private GoogleApiTracer(String clientRequestName, String traceDestination) {
       this.clientRequestName = clientRequestName;
-      this.token = token;
+      this.traceDestination = traceDestination;
     }
 
     @Override
     public void initialize(AbstractGoogleClientRequest<?> request) throws IOException {
       if (request.getClass().getCanonicalName().contains(clientRequestName)) {
-        request.set("trace", token);
+        request.set("$trace", traceDestination);
       }
     }
 
     @JsonValue
     @Override
     public String toString() {
-      return clientRequestName + "#" + token;
+      return clientRequestName + "#" + traceDestination;
     }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
index 4eb01eaaad4c3..89161d3910215 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
@@ -38,7 +38,7 @@
 public class GoogleApiDebugOptionsTest {
   @Test
   public void testWhenTracingMatches() throws Exception {
-    String[] args = new String[] {"--googleApiTrace=Projects.Jobs.Get#GetTestToken"};
+    String[] args = new String[] {"--googleApiTrace=Projects.Jobs.Get#GetTraceDestination"};
     DataflowPipelineOptions options =
         PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
@@ -47,7 +47,7 @@ public void testWhenTracingMatches() throws Exception {
 
     Get request =
         options.getDataflowClient().v1b3().projects().jobs().get("testProjectId", "testJobId");
-    assertEquals("GetTestToken", request.get("trace"));
+    assertEquals("GetTraceDestination", request.get("$trace"));
   }
 
   @Test
@@ -61,13 +61,14 @@ public void testWhenTracingDoesNotMatch() throws Exception {
 
     Get request =
         options.getDataflowClient().v1b3().projects().jobs().get("testProjectId", "testJobId");
-    assertNull(request.get("trace"));
+    assertNull(request.get("$trace"));
   }
 
   @Test
   public void testWithMultipleTraces() throws Exception {
     String[] args = new String[] {
-        "--googleApiTrace=Projects.Jobs.Create#CreateTestToken,Projects.Jobs.Get#GetTestToken"};
+        "--googleApiTrace=Projects.Jobs.Create#CreateTraceDestination,"
+        + "Projects.Jobs.Get#GetTraceDestination"};
     DataflowPipelineOptions options =
         PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
@@ -76,16 +77,16 @@ public void testWithMultipleTraces() throws Exception {
 
     Get getRequest =
         options.getDataflowClient().v1b3().projects().jobs().get("testProjectId", "testJobId");
-    assertEquals("GetTestToken", getRequest.get("trace"));
+    assertEquals("GetTraceDestination", getRequest.get("$trace"));
 
     Create createRequest =
         options.getDataflowClient().v1b3().projects().jobs().create("testProjectId", null);
-    assertEquals("CreateTestToken", createRequest.get("trace"));
+    assertEquals("CreateTraceDestination", createRequest.get("$trace"));
   }
 
   @Test
   public void testMatchingAllDataflowV1b3Calls() throws Exception {
-    String[] args = new String[] {"--googleApiTrace=Dataflow.V1b3#TestToken"};
+    String[] args = new String[] {"--googleApiTrace=Dataflow.V1b3#TraceDestination"};
     DataflowPipelineOptions options =
         PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
@@ -94,11 +95,11 @@ public void testMatchingAllDataflowV1b3Calls() throws Exception {
 
     Get getRequest =
         options.getDataflowClient().v1b3().projects().jobs().get("testProjectId", "testJobId");
-    assertEquals("TestToken", getRequest.get("trace"));
+    assertEquals("TraceDestination", getRequest.get("$trace"));
 
     Create createRequest =
         options.getDataflowClient().v1b3().projects().jobs().create("testProjectId", null);
-    assertEquals("TestToken", createRequest.get("trace"));
+    assertEquals("TraceDestination", createRequest.get("$trace"));
   }
 
   @Test
@@ -106,15 +107,15 @@ public void testMatchingAgainstClient() throws Exception {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
     options.setGoogleApiTrace(new GoogleApiTracer[] {
-        GoogleApiTracer.create(Transport.newDataflowClient(options).build(), "TestToken")});
+        GoogleApiTracer.create(Transport.newDataflowClient(options).build(), "TraceDestination")});
 
     Get getRequest =
         options.getDataflowClient().v1b3().projects().jobs().get("testProjectId", "testJobId");
-    assertEquals("TestToken", getRequest.get("trace"));
+    assertEquals("TraceDestination", getRequest.get("$trace"));
 
     Delete deleteRequest = Transport.newBigQueryClient(options).build().datasets()
         .delete("testProjectId", "testDatasetId");
-    assertNull(deleteRequest.get("trace"));
+    assertNull(deleteRequest.get("$trace"));
   }
 
   @Test
@@ -123,15 +124,15 @@ public void testMatchingAgainstRequestType() throws Exception {
     options.setGcpCredential(new TestCredential());
     options.setGoogleApiTrace(new GoogleApiTracer[] {GoogleApiTracer.create(
         Transport.newDataflowClient(options).build().v1b3().projects().jobs()
-            .get("aProjectId", "aJobId"), "TestToken")});
+            .get("aProjectId", "aJobId"), "TraceDestination")});
 
     Get getRequest =
         options.getDataflowClient().v1b3().projects().jobs().get("testProjectId", "testJobId");
-    assertEquals("TestToken", getRequest.get("trace"));
+    assertEquals("TraceDestination", getRequest.get("$trace"));
 
     Create createRequest =
         options.getDataflowClient().v1b3().projects().jobs().create("testProjectId", null);
-    assertNull(createRequest.get("trace"));
+    assertNull(createRequest.get("$trace"));
   }
 
   @Test

From 6e75943796c24324d8a66aeb45aff093f43a9f7e Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 27 Apr 2015 15:32:16 -0700
Subject: [PATCH 0481/1541] Introduce DoFnWithContext, an annotation based
 version of DoFn.

Current implementation is via DoFnReflector and an adaptor to turn a
DoFnWithContext into a DoFn.

----Release Notes----
Introduce DoFnWithContext, an experimental way to simplify accessing
extra information such as KeyedState and the current window for a DoFn.
Consider using DoFnWithContext rather than creating a DoFn that
implements the RequiresKeyedState or RequiresWindowAccess.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92189663
---
 .../sdk/options/PipelineOptionsFactory.java   |  47 +-
 .../sdk/options/PipelineOptionsValidator.java |   3 +-
 .../sdk/options/ProxyInvocationHandler.java   |   3 +-
 .../cloud/dataflow/sdk/transforms/DoFn.java   |  21 +-
 .../sdk/transforms/DoFnReflector.java         | 709 ++++++++++++++++++
 .../dataflow/sdk/transforms/DoFnTester.java   |  11 +-
 .../sdk/transforms/DoFnWithContext.java       | 381 ++++++++++
 .../IntraBundleParallelization.java           |   4 +-
 .../cloud/dataflow/sdk/transforms/ParDo.java  |  56 +-
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |  23 +-
 .../sdk/util/common/ReflectHelpers.java       |  64 ++
 .../worker/MapTaskExecutorFactoryTest.java    |  16 +-
 .../sdk/transforms/DoFnContextTest.java       |   1 -
 .../sdk/transforms/DoFnReflectorTest.java     | 554 ++++++++++++++
 .../sdk/transforms/DoFnWithContextTest.java   | 154 ++++
 .../dataflow/sdk/transforms/ParDoTest.java    | 335 ++++++---
 16 files changed, 2218 insertions(+), 164 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflectorTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContextTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 8429c7f426e8d..2ff484d50795a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -39,7 +39,6 @@
 import com.google.common.collect.ListMultimap;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
-import com.google.common.collect.Queues;
 import com.google.common.collect.SetMultimap;
 import com.google.common.collect.Sets;
 
@@ -68,7 +67,6 @@
 import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
-import java.util.Queue;
 import java.util.ServiceLoader;
 import java.util.Set;
 import java.util.SortedMap;
@@ -635,7 +633,7 @@ public static void printHelp(PrintStream out, Class<? extends PipelineOptions> i
     Preconditions.checkNotNull(iface);
     validateWellFormed(iface, REGISTERED_OPTIONS);
 
-    Iterable<Method> methods = getClosureOfMethodsOnInterface(iface);
+    Iterable<Method> methods = ReflectHelpers.getClosureOfMethodsOnInterface(iface);
     ListMultimap<Class<?>, Method> ifaceToMethods = ArrayListMultimap.create();
     for (Method method : methods) {
       // Process only methods that are not marked as hidden.
@@ -807,43 +805,6 @@ public static DataflowWorkerHarnessOptions createFromSystemProperties() throws I
     return options;
   }
 
-  /**
-   * Returns all the methods visible from the provided interfaces.
-   *
-   * @param interfaces The interfaces to use when searching for all their methods.
-   * @return An iterable of {@link Method}s that interfaces expose.
-   */
-  static Iterable<Method> getClosureOfMethodsOnInterfaces(
-      Iterable<Class<? extends PipelineOptions>> interfaces) {
-    return FluentIterable.from(interfaces).transformAndConcat(
-        new Function<Class<? extends PipelineOptions>, Iterable<Method>>() {
-          @Override
-          public Iterable<Method> apply(Class<? extends PipelineOptions> input) {
-            return getClosureOfMethodsOnInterface(input);
-          }
-    });
-  }
-
-  /**
-   * Returns all the methods visible from {@code iface}.
-   *
-   * @param iface The interface to use when searching for all its methods.
-   * @return An iterable of {@link Method}s that {@code iface} exposes.
-   */
-  static Iterable<Method> getClosureOfMethodsOnInterface(Class<? extends PipelineOptions> iface) {
-    Preconditions.checkNotNull(iface);
-    Preconditions.checkArgument(iface.isInterface());
-    ImmutableSet.Builder<Method> builder = ImmutableSet.builder();
-    Queue<Class<?>> interfacesToProcess = Queues.newArrayDeque();
-    interfacesToProcess.add(iface);
-    while (!interfacesToProcess.isEmpty()) {
-      Class<?> current = interfacesToProcess.remove();
-      builder.add(current.getMethods());
-      interfacesToProcess.addAll(Arrays.asList(current.getInterfaces()));
-    }
-    return builder.build();
-  }
-
   /**
    * This method is meant to emulate the behavior of {@link Introspector#getBeanInfo(Class, int)}
    * to construct the list of {@link PropertyDescriptor}.
@@ -945,7 +906,7 @@ private static void validateClass(Class<? extends PipelineOptions> iface,
 
     // Verify that there are no methods with the same name with two different return types.
     Iterable<Method> interfaceMethods = FluentIterable
-        .from(getClosureOfMethodsOnInterface(iface))
+        .from(ReflectHelpers.getClosureOfMethodsOnInterface(iface))
         .toSortedSet(MethodComparator.INSTANCE);
     SetMultimap<Equivalence.Wrapper<Method>, Method> methodNameToMethodMap =
         HashMultimap.create();
@@ -968,8 +929,8 @@ private static void validateClass(Class<? extends PipelineOptions> iface,
     // Verify that there is no getter with a mixed @JsonIgnore annotation and verify
     // that no setter has @JsonIgnore.
     Iterable<Method> allInterfaceMethods = FluentIterable
-        .from(getClosureOfMethodsOnInterfaces(validatedPipelineOptionsInterfaces))
-        .append(getClosureOfMethodsOnInterface(iface))
+        .from(ReflectHelpers.getClosureOfMethodsOnInterfaces(validatedPipelineOptionsInterfaces))
+        .append(ReflectHelpers.getClosureOfMethodsOnInterface(iface))
         .toSortedSet(MethodComparator.INSTANCE);
     SetMultimap<Equivalence.Wrapper<Method>, Method> methodNameToAllMethodMap =
         HashMultimap.create();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
index 70622baea5e28..dd8682256a419 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
+import com.google.cloud.dataflow.sdk.util.common.ReflectHelpers;
 import com.google.common.base.Preconditions;
 
 import java.lang.reflect.Method;
@@ -45,7 +46,7 @@ public static <T extends PipelineOptions> T validate(Class<T> klass, PipelineOpt
 
     ProxyInvocationHandler handler =
         (ProxyInvocationHandler) Proxy.getInvocationHandler(options);
-    for (Method method : PipelineOptionsFactory.getClosureOfMethodsOnInterface(klass)) {
+    for (Method method : ReflectHelpers.getClosureOfMethodsOnInterface(klass)) {
       if (method.getAnnotation(Validation.Required.class) != null) {
         Preconditions.checkArgument(handler.invoke(options, method, null) != null,
             "Missing required value for [" + method + ", \"" + getDescription(method) + "\"]. ");
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
index 94075b9b7b9b5..5652ad0975383 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory.JsonIgnorePredicate;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory.Registration;
 import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
+import com.google.cloud.dataflow.sdk.util.common.ReflectHelpers;
 import com.google.common.base.Defaults;
 import com.google.common.base.Function;
 import com.google.common.base.Preconditions;
@@ -360,7 +361,7 @@ private void removeIgnoredOptions(
         Set<Class<? extends PipelineOptions>> interfaces, Map<String, Object> options) {
       // Find all the method names that are annotated with JSON ignore.
       Set<String> jsonIgnoreMethodNames = FluentIterable.from(
-          PipelineOptionsFactory.getClosureOfMethodsOnInterfaces(interfaces))
+          ReflectHelpers.getClosureOfMethodsOnInterfaces(interfaces))
           .filter(JsonIgnorePredicate.INSTANCE).transform(new Function<Method, String>() {
             @Override
             public String apply(Method input) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 4c0aa1ef69bd8..090376c77b828 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -30,7 +30,6 @@
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.annotations.VisibleForTesting;
 import com.google.common.reflect.TypeToken;
 
 import org.joda.time.Duration;
@@ -57,6 +56,11 @@
  * separately from any {@code ParDo} transform or {@code Pipeline},
  * can be done via the {@link DoFnTester} harness.
  *
+ * <p> {@link DoFnWithContext} (currently experimental) offers an alternative
+ * mechanism for accessing {@link ProcessContext#keyedState} and
+ * {@link ProcessContext#window()} without the need to implement
+ * {@link RequiresKeyedState} or {@link RequiresWindowAccess}.
+ *
  * @param <I> the type of the (main) input elements
  * @param <O> the type of the (main) output elements
  */
@@ -343,9 +347,17 @@ public interface KeyedState {
     public CodedTupleTagMap lookup(Iterable<? extends CodedTupleTag<?>> tags) throws IOException;
   }
 
+  public DoFn() {
+    this(new HashMap<String, DelegatingAggregator<?, ?>>());
+  }
+
+  DoFn(Map<String, DelegatingAggregator<?, ?>> aggregators) {
+    this.aggregators = aggregators;
+  }
+
   /////////////////////////////////////////////////////////////////////////////
 
-  private Map<String, DelegatingAggregator<?, ?>> aggregators = new HashMap<>();
+  private final Map<String, DelegatingAggregator<?, ?>> aggregators;
 
   /**
    * Prepares this {@code DoFn} instance for processing a batch of elements.
@@ -379,7 +391,7 @@ public void finishBundle(Context c) throws Exception {
    *
    * <p> See {@link #getOutputTypeToken} for more discussion.
    */
-  TypeToken<I> getInputTypeToken() {
+  protected TypeToken<I> getInputTypeToken() {
     return new TypeToken<I>(getClass()) {};
   }
 
@@ -394,7 +406,7 @@ TypeToken<I> getInputTypeToken() {
    * for choosing a default output {@code Coder<O>} for the output
    * {@code PCollection<O>}.
    */
-  TypeToken<O> getOutputTypeToken() {
+  protected TypeToken<O> getOutputTypeToken() {
     return new TypeToken<O>(getClass()) {};
   }
 
@@ -451,7 +463,6 @@ protected final <VI> Aggregator<VI, VI> createAggregator(String name,
    * @param <VI> the type of input element
    * @param <VO> the type of output element
    */
-  @VisibleForTesting
   static class DelegatingAggregator<VI, VO> implements
       Aggregator<VI, VO>, Serializable {
     private static final long serialVersionUID = 0L;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
new file mode 100644
index 0000000000000..5ab8c901fbabc
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
@@ -0,0 +1,709 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
+import com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.ExtraContextFactory;
+import com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.FinishBundle;
+import com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.ProcessElement;
+import com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.StartBundle;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.UserCodeException;
+import com.google.cloud.dataflow.sdk.util.WindowingInternals;
+import com.google.cloud.dataflow.sdk.util.common.ReflectHelpers;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Function;
+import com.google.common.base.Throwables;
+import com.google.common.collect.FluentIterable;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.reflect.TypeParameter;
+import com.google.common.reflect.TypeToken;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.lang.annotation.Annotation;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+import java.lang.reflect.ParameterizedType;
+import java.lang.reflect.Type;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.Map;
+
+import javax.annotation.Nullable;
+
+/**
+ * Utility implementing the necessary reflection for working with {@link DoFnWithContext}s.
+ */
+public abstract class DoFnReflector {
+
+  private interface ExtraContextInfo {
+    /**
+     * Create an instance of the given instance using the instance factory.
+     */
+    <I, O> Object createInstance(DoFnWithContext.ExtraContextFactory<I, O> factory);
+
+    /**
+     * Create the type token for the given type, filling in the generics.
+     */
+    <I, O> TypeToken<?> tokenFor(TypeToken<I> in, TypeToken<O> out);
+  }
+
+  private static final Map<Class<?>, ExtraContextInfo> EXTRA_CONTEXTS = Collections.emptyMap();
+  private static final Map<Class<?>, ExtraContextInfo> EXTRA_PROCESS_CONTEXTS =
+      ImmutableMap.<Class<?>, ExtraContextInfo>builder()
+      .putAll(EXTRA_CONTEXTS)
+      .put(KeyedState.class, new ExtraContextInfo() {
+        @Override
+        public <I, O> Object createInstance(ExtraContextFactory<I, O> factory) {
+          return factory.keyedState();
+        }
+
+        @Override
+        public <I, O> TypeToken<?> tokenFor(TypeToken<I> in, TypeToken<O> out) {
+          return TypeToken.of(KeyedState.class);
+        }
+      })
+      .put(BoundedWindow.class, new ExtraContextInfo() {
+        @Override
+        public <I, O> Object createInstance(ExtraContextFactory<I, O> factory) {
+          return factory.window();
+        }
+
+        @Override
+        public <I, O> TypeToken<?> tokenFor(TypeToken<I> in, TypeToken<O> out) {
+          return TypeToken.of(BoundedWindow.class);
+        }
+      })
+      .put(WindowingInternals.class, new ExtraContextInfo() {
+        @Override
+        public <I, O> Object createInstance(ExtraContextFactory<I, O> factory) {
+          return factory.windowingInternals();
+        }
+
+        @Override
+        public <I, O> TypeToken<?> tokenFor(TypeToken<I> in, TypeToken<O> out) {
+          return new TypeToken<WindowingInternals<I, O>>() {
+            private static final long serialVersionUID = 0;
+          }
+          .where(new TypeParameter<I>() {}, in)
+          .where(new TypeParameter<O>() {}, out);
+        }
+      })
+      .build();
+
+  /**
+   * @return true if the reflected {@link DoFnWithContext} uses Keyed State.
+   */
+  public abstract boolean usesKeyedState();
+
+  /**
+   * @return true if the reflected {@link DoFnWithContext} uses a Single Window.
+   */
+  public abstract boolean usesSingleWindow();
+
+  /**
+   * Invoke the reflected {@link ProcessElement} method on the given instance.
+   *
+   * @param fn an instance of the {@link DoFnWithContext} to invoke {@link ProcessElement} on.
+   * @param c the {@link com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.ProcessContext}
+   *     to pass to {@link ProcessElement}.
+   */
+  abstract <I, O> void invokeProcessElement(
+      DoFnWithContext<I, O> fn,
+      DoFnWithContext<I, O>.ProcessContext c,
+      ExtraContextFactory<I, O> extra);
+
+  /**
+   * Invoke the reflected {@link StartBundle} method on the given instance.
+   *
+   * @param fn an instance of the {@link DoFnWithContext} to invoke {@link StartBundle} on.
+   * @param c the {@link com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.Context}
+   *     to pass to {@link StartBundle}.
+   */
+  abstract <I, O> void invokeStartBundle(
+     DoFnWithContext<I, O> fn,
+     DoFnWithContext<I, O>.Context c,
+     ExtraContextFactory<I, O> extra);
+
+  /**
+   * Invoke the reflected {@link FinishBundle} method on the given instance.
+   *
+   * @param fn an instance of the {@link DoFnWithContext} to invoke {@link FinishBundle} on.
+   * @param c the {@link com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.Context}
+   *     to pass to {@link FinishBundle}.
+   */
+  abstract <I, O> void invokeFinishBundle(
+      DoFnWithContext<I, O> fn,
+      DoFnWithContext<I, O>.Context c,
+      ExtraContextFactory<I, O> extra);
+
+  private static final Map<Class<?>, DoFnReflector> REFLECTOR_CACHE =
+      new LinkedHashMap<Class<?>, DoFnReflector>();
+
+  /**
+   * @return the {@link DoFnReflector} for the given {@link DoFnWithContext}.
+   */
+  public static DoFnReflector of(
+      @SuppressWarnings("rawtypes") Class<? extends DoFnWithContext> fn) {
+    DoFnReflector reflector = REFLECTOR_CACHE.get(fn);
+    if (reflector != null) {
+      return reflector;
+    }
+
+    reflector = new GenericDoFnReflector(fn);
+    REFLECTOR_CACHE.put(fn, reflector);
+    return reflector;
+  }
+
+  /**
+   * Create a {@link DoFn} that the {@link DoFnWithContext}.
+   */
+  public <I, O> DoFn<I, O> toDoFn(DoFnWithContext<I, O> fn) {
+    if (usesKeyedState() && usesSingleWindow()) {
+      return new WindowAndKeyedStateDoFnAdapter<I, O>(this, fn);
+    } else if (usesKeyedState()) {
+      return new KeyedStateDoFnAdapter<I, O>(this, fn);
+    } else if (usesSingleWindow()) {
+      return new WindowDoFnAdapter<I, O>(this, fn);
+    } else {
+      return new SimpleDoFnAdapter<I, O>(this, fn);
+    }
+  }
+
+  private static String formatType(TypeToken<?> t) {
+    return ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(t.getType());
+  }
+
+  private static String format(Method m) {
+    return ReflectHelpers.CLASS_AND_METHOD_FORMATTER.apply(m);
+  }
+
+  private static Collection<String> describeSupportedTypes(
+      Map<Class<?>, ExtraContextInfo> extraProcessContexts,
+      final TypeToken<?> in, final TypeToken<?> out) {
+    return FluentIterable
+        .from(extraProcessContexts.values())
+        .transform(new Function<ExtraContextInfo, String>() {
+          @Override
+          @Nullable
+          public String apply(@Nullable ExtraContextInfo input) {
+            return formatType(input.tokenFor(in, out));
+          }
+        })
+        .toSortedSet(String.CASE_INSENSITIVE_ORDER);
+  }
+
+  @VisibleForTesting static <I, O> ExtraContextInfo[] verifyProcessMethodArguments(Method m) {
+    return verifyMethodArguments(m,
+        EXTRA_PROCESS_CONTEXTS,
+        new TypeToken<DoFnWithContext<I, O>.ProcessContext>() {
+          private static final long serialVersionUID = 0;
+        },
+        new TypeParameter<I>() {},
+        new TypeParameter<O>() {});
+  }
+
+  @VisibleForTesting static <I, O> ExtraContextInfo[] verifyBundleMethodArguments(Method m) {
+    return verifyMethodArguments(m,
+        EXTRA_CONTEXTS,
+        new TypeToken<DoFnWithContext<I, O>.Context>() {
+          private static final long serialVersionUID = 0;
+        },
+        new TypeParameter<I>() {},
+        new TypeParameter<O>() {});
+  }
+
+  /**
+   * Verify the method arguments for a given {@link DoFnWithContext} method.
+   *
+   * <p>The requirements for a method to be valid, are:
+   * <ol>
+   * <li>The method has at least one argument.
+   * <li>The first argument is of type firstContextArg.
+   * <li>The remaining arguments have raw types that appear in {@code contexts}
+   * <li>Any generics on the extra context arguments match what is expected. Eg.,
+   *     {@code WindowingInternals<I, O>} either matches the {@code I} and {@code O} parameters of
+   *     the {@code DoFn<I, O>.ProcessContext}, or it uses a wildcard, etc.
+   * </ol>
+   *
+   * @param m the method to verify
+   * @param contexts mapping from raw classes to the {@link ExtraContextInfo} used
+   *     to create new instances.
+   * @param firstContextArg the expected type of the first context argument
+   * @param iParam TypeParameter representing the input type
+   * @param oParam TypeParameter representing the output type
+   */
+  @VisibleForTesting static <I, O> ExtraContextInfo[] verifyMethodArguments(Method m,
+      Map<Class<?>, ExtraContextInfo> contexts,
+      TypeToken<?> firstContextArg, TypeParameter<I> iParam, TypeParameter<O> oParam) {
+
+    if (!void.class.equals(m.getReturnType())) {
+      throw new IllegalStateException(String.format(
+          "%s must have a void return type", format(m)));
+    }
+    if (m.isVarArgs()) {
+      throw new IllegalStateException(String.format(
+          "%s must not have var args", format(m)));
+    }
+
+    // The first parameter must be present, and must be the specified type
+    Type[] params = m.getGenericParameterTypes();
+    TypeToken<?> contextToken = null;
+    if (params.length > 0) {
+      contextToken = TypeToken.of(params[0]);
+    }
+    if (contextToken == null
+        || !contextToken.getRawType().equals(firstContextArg.getRawType())) {
+      throw new IllegalStateException(String.format(
+          "%s must take a %s as its first argument",
+          format(m), firstContextArg.getRawType().getSimpleName()));
+    }
+    ExtraContextInfo[] contextInfos = new ExtraContextInfo[params.length - 1];
+
+    // Fill in the generics in the allExtraContextArgs interface from the types in the
+    // Context or ProcessContext DoFn.
+    ParameterizedType pt = (ParameterizedType) contextToken.getType();
+    // We actually want the owner, since ProcessContext and Context are owned by DoFnWithContext.
+    pt = (ParameterizedType) pt.getOwnerType();
+    @SuppressWarnings("unchecked")
+    TypeToken<I> iActual = (TypeToken<I>) TypeToken.of(pt.getActualTypeArguments()[0]);
+    @SuppressWarnings("unchecked")
+    TypeToken<O> oActual = (TypeToken<O>) TypeToken.of(pt.getActualTypeArguments()[1]);
+
+    // All of the remaining parameters must be a super-interface of allExtraContextArgs
+    // that is not listed in the EXCLUDED_INTERFACES set.
+    for (int i = 1; i < params.length; i++) {
+      TypeToken<?> param = TypeToken.of(params[i]);
+
+      ExtraContextInfo info = contexts.get(param.getRawType());
+      if (info == null) {
+        throw new IllegalStateException(String.format(
+            "%s is not a valid context parameter for method %s. Should be one of %s",
+            formatType(param), format(m),
+            describeSupportedTypes(contexts, iActual, oActual)));
+      }
+
+      // If we get here, the class matches, but maybe the generics don't:
+      TypeToken<?> expected = info.tokenFor(iActual, oActual);
+      if (!isSupertypeOf(param, expected)) {
+        throw new IllegalStateException(String.format(
+            "Incompatible generics in context parameter %s for method %s. Should be %s",
+            formatType(param), format(m), formatType(info.tokenFor(iActual, oActual))));
+      }
+
+      // Register the (now validated) context info
+      contextInfos[i - 1] = info;
+    }
+    return contextInfos;
+  }
+
+  @SuppressWarnings("deprecation")
+  private static boolean isSupertypeOf(TypeToken<?> param, TypeToken<?> expected) {
+    return param.isAssignableFrom(expected);
+  }
+
+  /**
+   * Implementation of {@link DoFnReflector} for the arbitrary {@link DoFnWithContext}.
+   */
+  private static class GenericDoFnReflector extends DoFnReflector {
+
+    private Method startBundle;
+    private Method processElement;
+    private Method finishBundle;
+    private ExtraContextInfo[] processElementArgs;
+    private ExtraContextInfo[] startBundleArgs;
+    private ExtraContextInfo[] finishBundleArgs;
+
+    private GenericDoFnReflector(Class<?> fn) {
+      // Locate the annotated methods
+      this.processElement = findAnnotatedMethod(ProcessElement.class, fn, true);
+      this.startBundle = findAnnotatedMethod(StartBundle.class, fn, false);
+      this.finishBundle = findAnnotatedMethod(FinishBundle.class, fn, false);
+
+      // Verify that their method arguments satisfy our conditions.
+      processElementArgs = verifyProcessMethodArguments(processElement);
+      if (startBundle != null) {
+        startBundleArgs = verifyBundleMethodArguments(startBundle);
+      }
+      if (finishBundle != null) {
+        finishBundleArgs = verifyBundleMethodArguments(finishBundle);
+      }
+    }
+
+    private static Collection<Method> declaredMethodsWithAnnotation(
+        Class<? extends Annotation> anno,
+        Class<?> startClass, Class<?> stopClass) {
+      Collection<Method> matches = new ArrayList<>();
+
+      Class<?> clazz = startClass;
+      LinkedHashSet<Class<?>> interfaces = new LinkedHashSet<>();
+
+      // First, find all declared methods on the startClass and parents (up to stopClass)
+      while (clazz != null && !clazz.equals(stopClass)) {
+        for (Method method : clazz.getDeclaredMethods()) {
+          if (method.isAnnotationPresent(anno)) {
+            matches.add(method);
+          }
+        }
+
+        Collections.addAll(interfaces, clazz.getInterfaces());
+
+        clazz = clazz.getSuperclass();
+      }
+
+      // Now, iterate over all the discovered interfaces
+      for (Method method : ReflectHelpers.getClosureOfMethodsOnInterfaces(interfaces)) {
+        if (method.isAnnotationPresent(anno)) {
+          matches.add(method);
+        }
+      }
+      return matches;
+    }
+
+    private static Method findAnnotatedMethod(
+        Class<? extends Annotation> anno, Class<?> fnClazz, boolean required) {
+      Collection<Method> matches = declaredMethodsWithAnnotation(
+          anno, fnClazz, DoFnWithContext.class);
+
+      if (matches.size() == 0) {
+        if (required == true) {
+          throw new IllegalStateException(String.format(
+              "No method annotated with @%s found in %s",
+              anno.getSimpleName(), fnClazz.getName()));
+        } else {
+          return null;
+        }
+      }
+
+      // If we have at least one match, then either it should be the only match
+      // or it should be an extension of the other matches (which came from parent
+      // classes).
+      Method first = matches.iterator().next();
+      for (Method other : matches) {
+        if (!first.getName().equals(other.getName())
+            || !Arrays.equals(first.getParameterTypes(), other.getParameterTypes())) {
+          throw new IllegalStateException(String.format(
+              "Found multiple methods annotated with @%s. [%s] and [%s]",
+              anno.getSimpleName(), format(first), format(other)));
+        }
+      }
+
+      // We need to be able to call it. We require it is public.
+      if ((first.getModifiers() & Modifier.PUBLIC) == 0) {
+        throw new IllegalStateException(format(first) + " must be public");
+      }
+
+      // And make sure its not static.
+      if ((first.getModifiers() & Modifier.STATIC) != 0) {
+        throw new IllegalStateException(format(first) + " must not be static");
+      }
+
+      first.setAccessible(true);
+      return first;
+    }
+
+    @Override
+    public boolean usesKeyedState() {
+      return usesContext(DoFn.KeyedState.class);
+    }
+
+    @Override
+    public boolean usesSingleWindow() {
+      return usesContext(BoundedWindow.class);
+    }
+
+    private boolean usesContext(Class<?> context) {
+      for (Class<?> clazz : processElement.getParameterTypes()) {
+        if (clazz.equals(context)) {
+          return true;
+        }
+      }
+      return false;
+    }
+
+    @Override
+    <I, O> void invokeProcessElement(
+        DoFnWithContext<I, O> fn,
+        DoFnWithContext<I, O>.ProcessContext c,
+        ExtraContextFactory<I, O> extra) {
+      invoke(processElement, fn, c, extra, processElementArgs);
+    }
+
+    @Override
+    <I, O> void invokeStartBundle(
+        DoFnWithContext<I, O> fn,
+        DoFnWithContext<I, O>.Context c,
+        ExtraContextFactory<I, O> extra) {
+      if (startBundle != null) {
+        invoke(startBundle, fn, c, extra, startBundleArgs);
+      }
+    }
+
+    @Override
+    <I, O> void invokeFinishBundle(
+        DoFnWithContext<I, O> fn,
+        DoFnWithContext<I, O>.Context c,
+        ExtraContextFactory<I, O> extra) {
+      if (finishBundle != null) {
+        invoke(finishBundle, fn, c, extra, finishBundleArgs);
+      }
+    }
+
+    private <I, O> void invoke(Method m,
+        DoFnWithContext<I, O> on,
+        DoFnWithContext<I, O>.Context contextArg,
+        ExtraContextFactory<I, O> extraArgFactory,
+        ExtraContextInfo[] extraArgs) {
+
+      Class<?>[] parameterTypes = m.getParameterTypes();
+      Object[] args = new Object[parameterTypes.length];
+      args[0] = contextArg;
+      for (int i = 1; i < args.length; i++) {
+        args[i] = extraArgs[i - 1].createInstance(extraArgFactory);
+      }
+
+      try {
+        m.invoke(on, args);
+      } catch (InvocationTargetException e) {
+        // Exception in user code.
+        Throwables.propagateIfInstanceOf(e.getCause(), UserCodeException.class);
+        throw new UserCodeException(e.getCause());
+      } catch (IllegalAccessException | IllegalArgumentException e) {
+        // Exception in our code.
+        throw Throwables.propagate(e);
+      }
+    }
+  }
+
+  private static class ContextAdapter<I, O> extends DoFnWithContext<I, O>.Context
+      implements DoFnWithContext.ExtraContextFactory<I, O> {
+
+    private DoFn<I, O>.Context context;
+
+    private ContextAdapter(DoFnWithContext<I, O> fn, DoFn<I, O>.Context context) {
+      fn.super();
+      this.context = context;
+    }
+
+    @Override
+    public PipelineOptions getPipelineOptions() {
+      return context.getPipelineOptions();
+    }
+
+    @Override
+    public void output(O output) {
+      context.output(output);
+    }
+
+    @Override
+    public void outputWithTimestamp(O output, Instant timestamp) {
+      context.outputWithTimestamp(output, timestamp);
+    }
+
+    @Override
+    public <T> void sideOutput(TupleTag<T> tag, T output) {
+      context.sideOutput(tag, output);
+    }
+
+    @Override
+    public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
+      context.sideOutputWithTimestamp(tag, output, timestamp);
+    }
+
+    @Override
+    public KeyedState keyedState() {
+      // The DoFnWithContext doesn't allow us to ask for these outside ProcessElements, so this
+      // should be unreachable.
+      throw new UnsupportedOperationException("Can only get keyedState in ProcessElements");
+    }
+
+    @Override
+    public BoundedWindow window() {
+      // The DoFnWithContext doesn't allow us to ask for these outside ProcessElements, so this
+      // should be unreachable.
+      throw new UnsupportedOperationException("Can only get the window in ProcessElements");
+    }
+
+    @Override
+    public WindowingInternals<I, O> windowingInternals() {
+      // The DoFnWithContext doesn't allow us to ask for these outside ProcessElements, so this
+      // should be unreachable.
+      throw new UnsupportedOperationException(
+          "Can only get the windowingInternals in ProcessElements");
+    }
+  }
+
+  private static class ProcessContextAdapter<I, O>
+      extends DoFnWithContext<I, O>.ProcessContext
+      implements DoFnWithContext.ExtraContextFactory<I, O> {
+
+    private DoFn<I, O>.ProcessContext context;
+
+    private ProcessContextAdapter(DoFnWithContext<I, O> fn, DoFn<I, O>.ProcessContext context) {
+      fn.super();
+      this.context = context;
+    }
+
+    @Override
+    public PipelineOptions getPipelineOptions() {
+      return context.getPipelineOptions();
+    }
+
+    @Override
+    public <T> T sideInput(PCollectionView<T> view) {
+      return context.sideInput(view);
+    }
+
+    @Override
+    public void output(O output) {
+      context.output(output);
+    }
+
+    @Override
+    public void outputWithTimestamp(O output, Instant timestamp) {
+      context.outputWithTimestamp(output, timestamp);
+    }
+
+    @Override
+    public <T> void sideOutput(TupleTag<T> tag, T output) {
+      context.sideOutput(tag, output);
+    }
+
+    @Override
+    public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
+      context.sideOutputWithTimestamp(tag, output, timestamp);
+    }
+
+    @Override
+    public I element() {
+      return context.element();
+    }
+
+    @Override
+    public Instant timestamp() {
+      return context.timestamp();
+    }
+
+    @Override
+    public KeyedState keyedState() {
+      return context.keyedState();
+    }
+
+    @Override
+    public BoundedWindow window() {
+      return context.window();
+    }
+
+    @Override
+    public WindowingInternals<I, O> windowingInternals() {
+      return context.windowingInternals();
+    }
+  }
+
+  public static Class<?> getDoFnClass(DoFn<?, ?> fn) {
+    if (fn instanceof SimpleDoFnAdapter) {
+      return ((SimpleDoFnAdapter<?, ?>) fn).fn.getClass();
+    } else {
+      return fn.getClass();
+    }
+  }
+
+  private static class SimpleDoFnAdapter<I, O> extends DoFn<I, O> {
+
+    private static final long serialVersionUID = 0;
+
+    private transient DoFnReflector reflector;
+    private DoFnWithContext<I, O> fn;
+
+    private SimpleDoFnAdapter(DoFnReflector reflector, DoFnWithContext<I, O> fn) {
+      super(fn.aggregators);
+      this.reflector = reflector;
+      this.fn = fn;
+    }
+
+    @Override
+    public void startBundle(DoFn<I, O>.Context c) throws Exception {
+      ContextAdapter<I, O> adapter = new ContextAdapter<>(fn, c);
+      reflector.invokeStartBundle(fn, adapter, adapter);
+    }
+
+    @Override
+    public void finishBundle(DoFn<I, O>.Context c) throws Exception {
+      ContextAdapter<I, O> adapter = new ContextAdapter<>(fn, c);
+      reflector.invokeFinishBundle(fn, adapter, adapter);
+    }
+
+    @Override
+    public void processElement(DoFn<I, O>.ProcessContext c) throws Exception {
+      ProcessContextAdapter<I, O> adapter = new ProcessContextAdapter<>(fn, c);
+      reflector.invokeProcessElement(fn, adapter, adapter);
+    }
+
+    @Override
+    protected TypeToken<I> getInputTypeToken() {
+      return fn.getInputTypeToken();
+    }
+
+    @Override
+    protected TypeToken<O> getOutputTypeToken() {
+      return fn.getOutputTypeToken();
+    }
+
+    private void readObject(java.io.ObjectInputStream in)
+        throws IOException, ClassNotFoundException {
+      in.defaultReadObject();
+      reflector = DoFnReflector.of(fn.getClass());
+    }
+  }
+
+  private static class KeyedStateDoFnAdapter<I, O>
+      extends SimpleDoFnAdapter<I, O> implements DoFn.RequiresKeyedState {
+
+    private static final long serialVersionUID = 0;
+    private KeyedStateDoFnAdapter(DoFnReflector reflector, DoFnWithContext<I, O> fn) {
+      super(reflector, fn);
+    }
+  }
+
+  private static class WindowDoFnAdapter<I, O>
+  extends SimpleDoFnAdapter<I, O> implements DoFn.RequiresWindowAccess {
+
+    private static final long serialVersionUID = 0;
+    private WindowDoFnAdapter(DoFnReflector reflector, DoFnWithContext<I, O> fn) {
+      super(reflector, fn);
+    }
+  }
+
+  private static class WindowAndKeyedStateDoFnAdapter<I, O>
+  extends SimpleDoFnAdapter<I, O> implements DoFn.RequiresKeyedState, DoFn.RequiresWindowAccess {
+
+    private static final long serialVersionUID = 0;
+    private WindowAndKeyedStateDoFnAdapter(DoFnReflector reflector, DoFnWithContext<I, O> fn) {
+      super(reflector, fn);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index 7e7df07a33544..21af2ecc253b8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -72,7 +72,16 @@ public class DoFnTester<I, O> {
    */
   @SuppressWarnings("unchecked")
   public static <I, O> DoFnTester<I, O> of(DoFn<I, O> fn) {
-    return new DoFnTester(fn);
+    return new DoFnTester<I, O>(fn);
+  }
+
+  /**
+   * Returns a {@code DoFnTester} supporting unit-testing of the given
+   * {@link DoFn}.
+   */
+  @SuppressWarnings("unchecked")
+  public static <I, O> DoFnTester<I, O> of(DoFnWithContext<I, O> fn) {
+    return new DoFnTester<I, O>(DoFnReflector.of(fn.getClass()).toDoFn(fn));
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
new file mode 100644
index 0000000000000..b599a19702a9b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
@@ -0,0 +1,381 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.DelegatingAggregator;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.WindowingInternals;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.reflect.TypeToken;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+import java.io.Serializable;
+import java.lang.annotation.Documented;
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * The argument to {@link ParDo} providing the code to use to process
+ * elements of the input
+ * {@link com.google.cloud.dataflow.sdk.values.PCollection}.
+ *
+ * <p> See {@link ParDo} for more explanation, examples of use, and
+ * discussion of constraints on {@code DoFnWithContext}s, including their
+ * serializability, lack of access to global shared mutable state,
+ * requirements for failure tolerance, and benefits of optimization.
+ *
+ * <p> {@code DoFnWithContext}s can be tested in a particular
+ * {@code Pipeline} by running that {@code Pipeline} on sample input
+ * and then checking its output.  Unit testing of a {@code DoFnWithContext},
+ * separately from any {@code ParDo} transform or {@code Pipeline},
+ * can be done via the {@link DoFnTester} harness.
+ *
+ * <p>Implementations must define a method annotated with {@link ProcessElement}
+ * that satisfies the requirements described there. See the {@link ProcessElement}
+ * for details.
+ *
+ * <p> This functionality is experimental and likely to change.
+ *
+ * <p> Example usage:
+ *
+ * <pre> {@code
+ * PCollection<String> lines = ... ;
+ * PCollection<String> words =
+ *     lines.apply(ParDo.of(new DoFnWithContext<String, String>() {
+ *         @ProcessElement
+ *         public void processElement(ProcessContext c, BoundedWindow window) {
+ *
+ *         }}));
+ * } </pre>
+ *
+ * @param <I> the type of the (main) input elements
+ * @param <O> the type of the (main) output elements
+ */
+@Experimental
+@SuppressWarnings("serial")
+public abstract class DoFnWithContext<I, O> implements Serializable {
+
+  /** Information accessible to all methods in this {@code DoFnWithContext}. */
+  public abstract class Context {
+
+    /**
+     * Returns the {@code PipelineOptions} specified with the
+     * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner}
+     * invoking this {@code DoFnWithContext}.  The {@code PipelineOptions} will
+     * be the default running via {@link DoFnTester}.
+     */
+    public abstract PipelineOptions getPipelineOptions();
+
+    /**
+     * Adds the given element to the main output {@code PCollection}.
+     *
+     * <p> If invoked from {@link ProcessElement}, the output
+     * element will have the same timestamp and be in the same windows
+     * as the input element passed to {@link @ProcessElement}).
+     *
+     * <p> If invoked from {@link StartBundle} or {@link FinishBundle},
+     * this will attempt to use the
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
+     * of the input {@code PCollection} to determine what windows the element
+     * should be in, throwing an exception if the {@code WindowFn} attempts
+     * to access any information about the input element. The output element
+     * will have a timestamp of negative infinity.
+     */
+    public abstract void output(O output);
+
+    /**
+     * Adds the given element to the main output {@code PCollection},
+     * with the given timestamp.
+     *
+     * <p> If invoked from {@link ProcessElement}), the timestamp
+     * must not be older than the input element's timestamp minus
+     * {@link DoFn#getAllowedTimestampSkew}.  The output element will
+     * be in the same windows as the input element.
+     *
+     * <p> If invoked from {@link StartBundle} or {@link FinishBundle},
+     * this will attempt to use the
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
+     * of the input {@code PCollection} to determine what windows the element
+     * should be in, throwing an exception if the {@code WindowFn} attempts
+     * to access any information about the input element except for the
+     * timestamp.
+     */
+    public abstract void outputWithTimestamp(O output, Instant timestamp);
+
+    /**
+     * Adds the given element to the side output {@code PCollection} with the
+     * given tag.
+     *
+     * <p> The caller of {@code ParDo} uses {@link ParDo#withOutputTags} to
+     * specify the tags of side outputs that it consumes. Non-consumed side
+     * outputs, e.g., outputs for monitoring purposes only, don't necessarily
+     * need to be specified.
+     *
+     * <p> The output element will have the same timestamp and be in the same
+     * windows as the input element passed to {@link ProcessElement}).
+     *
+     * <p> If invoked from {@link StartBundle} or {@link FinishBundle},
+     * this will attempt to use the
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
+     * of the input {@code PCollection} to determine what windows the element
+     * should be in, throwing an exception if the {@code WindowFn} attempts
+     * to access any information about the input element. The output element
+     * will have a timestamp of negative infinity.
+     *
+     * @throws IllegalArgumentException if the number of outputs exceeds
+     * the limit of 1,000 outputs per DoFn
+     * @see ParDo#withOutputTags
+     */
+    public abstract <T> void sideOutput(TupleTag<T> tag, T output);
+
+    /**
+     * Adds the given element to the specified side output {@code PCollection},
+     * with the given timestamp.
+     *
+     * <p> If invoked from {@link ProcessElement}), the timestamp
+     * must not be older than the input element's timestamp minus
+     * {@link DoFn#getAllowedTimestampSkew}.  The output element will
+     * be in the same windows as the input element.
+     *
+     * <p> If invoked from {@link StartBundle} or {@link FinishBundle},
+     * this will attempt to use the
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
+     * of the input {@code PCollection} to determine what windows the element
+     * should be in, throwing an exception if the {@code WindowFn} attempts
+     * to access any information about the input element except for the
+     * timestamp.
+     *
+     * @throws IllegalArgumentException if the number of outputs exceeds
+     * the limit of 1,000 outputs per DoFn
+     * @see ParDo#withOutputTags
+     */
+    public abstract <T> void sideOutputWithTimestamp(
+        TupleTag<T> tag, T output, Instant timestamp);
+  }
+
+  /**
+   * Information accessible when running {@link DoFn#processElement}.
+   */
+  public abstract class ProcessContext extends Context {
+
+    /**
+     * Returns the input element to be processed.
+     */
+    public abstract I element();
+
+
+    /**
+     * Returns the value of the side input.
+     *
+     * @throws IllegalArgumentException if this is not a side input
+     * @see ParDo#withSideInputs
+     */
+    public abstract <T> T sideInput(PCollectionView<T> view);
+
+    /**
+     * Returns the timestamp of the input element.
+     *
+     * <p> See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window}
+     * for more information.
+     */
+    public abstract Instant timestamp();
+  }
+
+  /**
+   * Returns the allowed timestamp skew duration, which is the maximum
+   * duration that timestamps can be shifted backward in
+   * {@link DoFnWithContext.Context#outputWithTimestamp}.
+   *
+   * The default value is {@code Duration.ZERO}, in which case
+   * timestamps can only be shifted forward to future.  For infinite
+   * skew, return {@code Duration.millis(Long.MAX_VALUE)}.
+   */
+  public Duration getAllowedTimestampSkew() {
+    return Duration.ZERO;
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  Map<String, DelegatingAggregator<?, ?>> aggregators = new HashMap<>();
+
+  /**
+   * Returns a {@link TypeToken} capturing what is known statically
+   * about the input type of this {@code DoFnWithContext} instance's most-derived
+   * class.
+   *
+   * <p> See {@link #getOutputTypeToken} for more discussion.
+   */
+  protected TypeToken<I> getInputTypeToken() {
+    return new TypeToken<I>(getClass()) {};
+  }
+
+  /**
+   * Returns a {@link TypeToken} capturing what is known statically
+   * about the output type of this {@code DoFnWithContext} instance's
+   * most-derived class.
+   *
+   * <p> In the normal case of a concrete {@code DoFnWithContext} subclass with
+   * no generic type parameters of its own (including anonymous inner
+   * classes), this will be a complete non-generic type, which is good
+   * for choosing a default output {@code Coder<O>} for the output
+   * {@code PCollection<O>}.
+   */
+  protected TypeToken<O> getOutputTypeToken() {
+    return new TypeToken<O>(getClass()) {};
+  }
+
+  /**
+   * Interface for runner implementors to provide implementations of extra context information.
+   *
+   * <p>The methods on this interface are called by {@link DoFnReflector} before invoking an
+   * annotated {@link StartBundle}, {@link ProcessElement} or {@link FinishBundle} method that
+   * has indicated it needs the given extra context.
+   *
+   * <p>In the case of {@link ProcessElement} it is called once per invocation of
+   * {@link ProcessElement}.
+   */
+  public interface ExtraContextFactory<I, O> {
+    /**
+     * Construct the {@link KeyedState} interface for use within a {@link DoFnWithContext} that
+     * needs it. This is called if the {@link ProcessElement} method has a parameter of type
+     * {@link KeyedState}.
+     *
+     * @return {@link KeyedState} interface for interacting with keyed state.
+     */
+    KeyedState keyedState();
+
+    /**
+     * Construct the {@link BoundedWindow} to use within a {@link DoFnWithContext} that
+     * needs it. This is called if the {@link ProcessElement} method has a parameter of type
+     * {@link BoundedWindow}.
+     *
+     * @return {@link BoundedWindow} of the element currently being processed.
+     */
+    BoundedWindow window();
+
+    /**
+     * Construct the {@link WindowingInternals} to use within a {@link DoFnWithContext} that
+     * needs it. This is called if the {@link ProcessElement} method has a parameter of type
+     * {@link WindowingInternals}.
+     */
+    WindowingInternals<I, O> windowingInternals();
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Annotation for the method to use to prepare an instance for processing a batch of elements.
+   * The method annotated with this must satisfy the following constraints:
+   * <ul>
+   *   <li>It must have at least one argument.
+   *   <li>Its first (and only) argument must be a {@link DoFnWithContext.Context}.
+   * </ul>
+   */
+  @Documented
+  @Retention(RetentionPolicy.RUNTIME)
+  @Target(ElementType.METHOD)
+  public @interface StartBundle {}
+
+  /**
+   * Annotation for the method to use for processing elements. A subclass of
+   * {@link DoFnWithContext} must have a method with this annotation satisfying
+   * the following constraints in order for it to be executable:
+   * <ul>
+   *   <li>It must have at least one argument.
+   *   <li>Its first argument must be a {@link DoFnWithContext.ProcessContext}.
+   *   <li>Its remaining arguments must be {@link KeyedState}, {@link BoundedWindow}, or
+   *   {@link WindowingInternals WindowingInternals<I, O>}.
+   * </ul>
+   */
+  @Documented
+  @Retention(RetentionPolicy.RUNTIME)
+  @Target(ElementType.METHOD)
+  public @interface ProcessElement {}
+
+  /**
+   * Annotation for the method to use to prepare an instance for processing a batch of elements.
+   * The method annotated with this must satisfy the following constraints:
+   * <ul>
+   *   <li>It must have at least one argument.
+   *   <li>Its first (and only) argument must be a {@link DoFnWithContext.Context}.
+   * </ul>
+   */
+  @Documented
+  @Retention(RetentionPolicy.RUNTIME)
+  @Target(ElementType.METHOD)
+  public @interface FinishBundle {}
+
+  /**
+   * Returns an {@link Aggregator} with aggregation logic specified by the
+   * {@link CombineFn} argument. The name provided must be unique across
+   * {@link Aggregator}s created within the DoFn.
+   *
+   * @param name the name of the aggregator
+   * @param combiner the {@link CombineFn} to use in the aggregator
+   * @return an aggregator for the provided name and combiner in the scope of
+   *         this DoFn
+   * @throws NullPointerException if the name or combiner is null
+   * @throws IllegalArgumentException if the given name collides with another
+   *         aggregator in this scope
+   */
+  public final <VI, VO> Aggregator<VI, VO> createAggregator(
+      String name, Combine.CombineFn<? super VI, ?, VO> combiner) {
+    checkNotNull(name, "name cannot be null");
+    checkNotNull(combiner, "combiner cannot be null");
+    checkArgument(!aggregators.containsKey(name),
+        "Cannot create aggregator with name %s."
+        + " An Aggregator with that name already exists within this scope.",
+        name);
+    DelegatingAggregator<VI, VO> aggregator =
+        new DelegatingAggregator<>(name, combiner);
+    aggregators.put(name, aggregator);
+    return aggregator;
+  }
+
+  /**
+   * Returns an {@link Aggregator} with the aggregation logic specified by the
+   * {@link SerializableFunction} argument. The name provided must be unique
+   * across {@link Aggregator}s created within the DoFn.
+   *
+   * @param name the name of the aggregator
+   * @param combiner the {@link SerializableFunction} to use in the aggregator
+   * @return an aggregator for the provided name and combiner in the scope of
+   *         this DoFn
+   * @throws NullPointerException if the name or combiner is null
+   * @throws IllegalArgumentException if the given name collides with another
+   *         aggregator in this scope
+   */
+  public final <VI> Aggregator<VI, VI> createAggregator(
+      String name, SerializableFunction<Iterable<VI>, VI> combiner) {
+    checkNotNull(combiner, "combiner cannot be null.");
+    return createAggregator(name, Combine.SimpleCombineFn.of(combiner));
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
index 070d1bfc764f1..b19ff629a38ac 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
@@ -238,12 +238,12 @@ public void finishBundle(Context c) throws Exception {
     }
 
     @Override
-    TypeToken<I> getInputTypeToken() {
+    protected TypeToken<I> getInputTypeToken() {
       return doFn.getInputTypeToken();
     }
 
     @Override
-    TypeToken<O> getOutputTypeToken() {
+    protected TypeToken<O> getOutputTypeToken() {
       return doFn.getOutputTypeToken();
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 0e6f572375bde..80c53ed33ab07 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
@@ -527,10 +528,33 @@ public static <I, O> Bound<I, O> of(DoFn<I, O> fn) {
     return new Unbound().of(fn);
   }
 
+  private static <I, O> DoFn<I, O> adapt(DoFnWithContext<I, O> fn) {
+    return DoFnReflector.of(fn.getClass()).toDoFn(fn);
+  }
+
+  /**
+   * Creates a {@code ParDo} {@code PTransform} that will invoke the
+   * given {@link DoFnWithContext} function.
+   *
+   * <p> The resulting {@code PTransform}'s types have been bound, with the
+   * input being a {@code PCollection<I>} and the output a
+   * {@code PCollection<O>}, inferred from the types of the argument
+   * {@code DoFn<I, O>}.  It is ready to be applied, or further
+   * properties can be set on it first.
+   *
+   * <p> {@link DoFnWithContext} is an experimental alternative to
+   * {@link DoFn} which simplifies accessing {@code KeyedState} and
+   * the window of the element.
+   */
+  @Experimental
+  public static <I, O> Bound<I, O> of(DoFnWithContext<I, O> fn) {
+    return of(adapt(fn));
+  }
+
   private static <I> void validateCoder(
       DoFn<I, ?> fn, PCollection<? extends I> input) {
     if (RequiresKeyedState.class.isAssignableFrom(fn.getClass())
-      && !isKvEquivalentCoder(input.getCoder())) {
+        && !isKvEquivalentCoder(input.getCoder())) {
       throw new UnsupportedOperationException(
           "KeyedState is only available in DoFn's with keyed inputs, but input coder "
           + input.getCoder() + " is not keyed.");
@@ -625,6 +649,18 @@ public <O> UnboundMulti<O> withOutputTags(TupleTag<O> mainOutputTag,
     public <I, O> Bound<I, O> of(DoFn<I, O> fn) {
       return new Bound<>(name, sideInputs, fn);
     }
+
+    /**
+     * Returns a new {@code ParDo} {@code PTransform} that's like this
+     * transform but which will invoke the given {@link DoFnWithContext}
+     * function, and which has its input and output types bound.  Does
+     * not modify this transform.  The resulting {@code PTransform} is
+     * sufficiently specified to be applied, but more properties can
+     * still be specified.
+     */
+    public <I, O> Bound<I, O> of(DoFnWithContext<I, O> fn) {
+      return of(adapt(fn));
+    }
   }
 
   /**
@@ -727,10 +763,11 @@ protected Coder<O> getDefaultOutputCoder(PCollection<? extends I> input)
 
     @Override
     protected String getDefaultName() {
-      if (fn.getClass().isAnonymousClass()) {
+      Class<?> clazz = DoFnReflector.getDoFnClass(fn);
+      if (clazz.isAnonymousClass()) {
         return "AnonymousParDo";
       } else {
-        return StringUtils.approximateSimpleName(fn.getClass());
+        return StringUtils.approximateSimpleName(clazz);
       }
     }
 
@@ -834,6 +871,18 @@ public <I> BoundMulti<I, O> of(DoFn<I, O> fn) {
       return new BoundMulti<>(
           name, sideInputs, mainOutputTag, sideOutputTags, fn);
     }
+
+    /**
+     * Returns a new multi-output {@code ParDo} {@code PTransform}
+     * that's like this transform but which will invoke the given
+     * {@link DoFnWithContext} function, and which has its input type bound.
+     * Does not modify this transform.  The resulting
+     * {@code PTransform} is sufficiently specified to be applied, but
+     * more properties can still be specified.
+     */
+    public <I> BoundMulti<I, O> of(DoFnWithContext<I, O> fn) {
+      return of(adapt(fn));
+    }
   }
 
   /**
@@ -963,7 +1012,6 @@ public void validate(PCollection<? extends I> input) {
     }
   }
 
-
   /////////////////////////////////////////////////////////////////////////////
 
   static {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 294cfcaa3a989..deffcddeb61e5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -34,6 +34,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Throwables;
@@ -448,7 +449,8 @@ public <T> T sideInput(PCollectionView<T> view) {
 
     @Override
     public KeyedState keyedState() {
-      if (!(fn instanceof RequiresKeyedState)) {
+      if (!(fn instanceof RequiresKeyedState)
+          || !equivalentToKV(element())) {
         throw new UnsupportedOperationException(
             "Keyed state is only available in the context of a keyed DoFn "
             + "marked as requiring state");
@@ -477,6 +479,13 @@ public void outputWithTimestamp(O output, Instant timestamp) {
       context.outputWindowedValue(output, timestamp, windowedValue.getWindows());
     }
 
+    void outputWindowedValue(
+        O output,
+        Instant timestamp,
+        Collection<? extends BoundedWindow> windows) {
+      context.outputWindowedValue(output, timestamp, windows);
+    }
+
     @Override
     public <T> void sideOutput(TupleTag<T> tag, T output) {
       Preconditions.checkNotNull(tag, "Tag passed to sideOutput cannot be null");
@@ -508,6 +517,18 @@ private void checkTimestamp(Instant timestamp) {
           "Timestamp %s exceeds allowed maximum skew.", timestamp);
     }
 
+    private boolean equivalentToKV(I input) {
+      if (input == null) {
+        return true;
+      } else if (input instanceof KV) {
+        return true;
+      } else if (input instanceof TimerOrElement) {
+        return ((TimerOrElement) input).isTimer()
+            || ((TimerOrElement) input).element() instanceof KV;
+      }
+      return false;
+    }
+
     @Override
     public WindowingInternals<I, O> windowingInternals() {
       return new WindowingInternals<I, O>() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java
index 62dee3cb61bf5..f49d6f31f5b91 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java
@@ -17,7 +17,10 @@
 
 import com.google.common.base.Function;
 import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.FluentIterable;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Queues;
 
 import java.lang.reflect.GenericArrayType;
 import java.lang.reflect.Method;
@@ -25,6 +28,10 @@
 import java.lang.reflect.Type;
 import java.lang.reflect.TypeVariable;
 import java.lang.reflect.WildcardType;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.LinkedHashSet;
+import java.util.Queue;
 
 import javax.annotation.Nullable;
 
@@ -140,4 +147,61 @@ private void formatGenericArrayType(StringBuilder builder, GenericArrayType t) {
       builder.append("[]");
     }
   };
+
+  /**
+   * Returns all interfaces of the given clazz.
+   * @param clazz
+   * @return
+   */
+  public static FluentIterable<Class<?>> getClosureOfInterfaces(Class<?> clazz) {
+    Preconditions.checkNotNull(clazz);
+    Queue<Class<?>> interfacesToProcess = Queues.newArrayDeque();
+    Collections.addAll(interfacesToProcess, clazz.getInterfaces());
+
+    LinkedHashSet<Class<?>> interfaces = new LinkedHashSet<>();
+    while (!interfacesToProcess.isEmpty()) {
+      Class<?> current = interfacesToProcess.remove();
+      if (interfaces.add(current)) {
+        Collections.addAll(interfacesToProcess, current.getInterfaces());
+      }
+    }
+    return FluentIterable.from(interfaces);
+  }
+
+  /**
+   * Returns all the methods visible from the provided interfaces.
+   *
+   * @param interfaces The interfaces to use when searching for all their methods.
+   * @return An iterable of {@link Method}s which interfaces expose.
+   */
+  public static Iterable<Method> getClosureOfMethodsOnInterfaces(
+      Iterable<? extends Class<?>> interfaces) {
+    return FluentIterable.from(interfaces).transformAndConcat(
+        new Function<Class<?>, Iterable<Method>>() {
+          @Override
+          public Iterable<Method> apply(Class<?> input) {
+            return getClosureOfMethodsOnInterface(input);
+          }
+    });
+  }
+
+  /**
+   * Returns all the methods visible from {@code iface}.
+   *
+   * @param iface The interface to use when searching for all its methods.
+   * @return An iterable of {@link Method}s which {@code iface} exposes.
+   */
+  public static Iterable<Method> getClosureOfMethodsOnInterface(Class<?> iface) {
+    Preconditions.checkNotNull(iface);
+    Preconditions.checkArgument(iface.isInterface());
+    ImmutableSet.Builder<Method> builder = ImmutableSet.builder();
+    Queue<Class<?>> interfacesToProcess = Queues.newArrayDeque();
+    interfacesToProcess.add(iface);
+    while (!interfacesToProcess.isEmpty()) {
+      Class<?> current = interfacesToProcess.remove();
+      builder.add(current.getMethods());
+      interfacesToProcess.addAll(Arrays.asList(current.getInterfaces()));
+    }
+    return builder.build();
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index be2fe8398f450..bd0a0e08513ce 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -84,7 +84,7 @@ public class MapTaskExecutorFactoryTest {
   @Test
   public void testCreateMapTaskExecutor() throws Exception {
     List<ParallelInstruction> instructions = Arrays.asList(createReadInstruction("Read"),
-        createParDoInstruction(0, 0, "DoFn1"), createParDoInstruction(0, 0, "DoFn2"),
+        createParDoInstruction(0, 0, "DoFn1"), createParDoInstruction(0, 0, "DoFnWithContext"),
         createFlattenInstruction(1, 0, 2, 0, "Flatten"), createWriteInstruction(3, 0, "Write"));
 
     MapTask mapTask = new MapTask();
@@ -118,11 +118,11 @@ public void testCreateMapTaskExecutor() throws Exception {
             Counter.longs("test-DoFn1-start-msecs", SUM).resetToValue(0L),
             Counter.longs("test-DoFn1-process-msecs", SUM).resetToValue(0L),
             Counter.longs("test-DoFn1-finish-msecs", SUM).resetToValue(0L),
-            Counter.longs("DoFn2_output-ElementCount", SUM).resetToValue(0L),
-            Counter.longs("DoFn2_output-MeanByteCount", MEAN).resetToValue(0, 0L),
-            Counter.longs("test-DoFn2-start-msecs", SUM).resetToValue(0L),
-            Counter.longs("test-DoFn2-process-msecs", SUM).resetToValue(0L),
-            Counter.longs("test-DoFn2-finish-msecs", SUM).resetToValue(0L),
+            Counter.longs("DoFnWithContext_output-ElementCount", SUM).resetToValue(0L),
+            Counter.longs("DoFnWithContext_output-MeanByteCount", MEAN).resetToValue(0, 0L),
+            Counter.longs("test-DoFnWithContext-start-msecs", SUM).resetToValue(0L),
+            Counter.longs("test-DoFnWithContext-process-msecs", SUM).resetToValue(0L),
+            Counter.longs("test-DoFnWithContext-finish-msecs", SUM).resetToValue(0L),
             Counter.longs("flatten_output_name-ElementCount", SUM).resetToValue(0L),
             Counter.longs("flatten_output_name-MeanByteCount", MEAN).resetToValue(0, 0L),
             Counter.longs("test-Flatten-start-msecs", SUM).resetToValue(0L),
@@ -143,7 +143,7 @@ public void testCreateMapTaskExecutor() throws Exception {
   public void testExecutionContextPlumbing() throws Exception {
     List<ParallelInstruction> instructions =
         Arrays.asList(createReadInstruction("Read"), createParDoInstruction(0, 0, "DoFn1"),
-            createParDoInstruction(1, 0, "DoFn2"), createWriteInstruction(2, 0, "Write"));
+            createParDoInstruction(1, 0, "DoFnWithContext"), createWriteInstruction(2, 0, "Write"));
 
     MapTask mapTask = new MapTask();
     mapTask.setInstructions(instructions);
@@ -159,7 +159,7 @@ public void testExecutionContextPlumbing() throws Exception {
     for (ExecutionContext.StepContext stepContext : context.getAllStepContexts()) {
       stepNames.add(stepContext.getStepName());
     }
-    assertThat(stepNames, CoreMatchers.hasItems("DoFn1", "DoFn2"));
+    assertThat(stepNames, CoreMatchers.hasItems("DoFn1", "DoFnWithContext"));
   }
 
   static ParallelInstruction createReadInstruction(String name) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnContextTest.java
index 39dca1815a51d..c4716f9a392ef 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnContextTest.java
@@ -65,5 +65,4 @@ public void testSetupDelegateAggregatorsCreatesAndLinksDelegateAggregators() {
 
     verify(agg).addValue(1L);
   }
-
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflectorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflectorTest.java
new file mode 100644
index 0000000000000..a9f78a8861c23
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflectorTest.java
@@ -0,0 +1,554 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
+import com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.Context;
+import com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.ExtraContextFactory;
+import com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.ProcessContext;
+import com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.ProcessElement;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.WindowingInternals;
+
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.lang.reflect.Method;
+
+/**
+ * Tests for {@link DoFnReflector}.
+ */
+@RunWith(JUnit4.class)
+public class DoFnReflectorTest {
+
+  private boolean wasProcessElementInvoked = false;
+  private boolean wasStartBundleInvoked = false;
+  private boolean wasFinishBundleInvoked = false;
+
+  private DoFnWithContext<String, String> fn;
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Mock
+  private DoFnWithContext<String, String>.ProcessContext mockContext;
+
+  @Mock
+  private KeyedState mockKeyedState;
+  @Mock
+  private BoundedWindow mockWindow;
+  @Mock
+  private WindowingInternals<String, String> mockWindowingInternals;
+
+  private ExtraContextFactory<String, String> extraContextFactory;
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+    this.extraContextFactory = new ExtraContextFactory<String, String>() {
+      @Override
+      public KeyedState keyedState() {
+        return mockKeyedState;
+      }
+
+      @Override
+      public BoundedWindow window() {
+        return mockWindow;
+      }
+
+      @Override
+      public WindowingInternals<String, String> windowingInternals() {
+        return mockWindowingInternals;
+      }
+    };
+  }
+
+  private DoFnReflector underTest(DoFnWithContext<String, String> fn) {
+    this.fn = fn;
+    return DoFnReflector.of(fn.getClass());
+  }
+
+  private void checkInvokeProcessElementWorks(DoFnReflector r) throws Exception {
+    assertFalse(wasProcessElementInvoked);
+    r.invokeProcessElement(fn, mockContext, extraContextFactory);
+    assertTrue(wasProcessElementInvoked);
+  }
+
+  private void checkInvokeStartBundleWorks(DoFnReflector r) throws Exception {
+    assertFalse(wasStartBundleInvoked);
+    r.invokeStartBundle(fn, mockContext, extraContextFactory);
+    assertTrue(wasStartBundleInvoked);
+  }
+
+  private void checkInvokeFinishBundleWorks(DoFnReflector r) throws Exception {
+    assertFalse(wasFinishBundleInvoked);
+    r.invokeFinishBundle(fn, mockContext, extraContextFactory);
+    assertTrue(wasFinishBundleInvoked);
+  }
+
+  @Test
+  public void testDoFnWithNoExtraContext() throws Exception {
+    DoFnReflector reflector = underTest(new DoFnWithContext<String, String>() {
+
+      private static final long serialVersionUID = 0;
+
+      @ProcessElement
+      public void processElement(ProcessContext c)
+          throws Exception {
+        wasProcessElementInvoked = true;
+        assertSame(c, mockContext);
+      }
+    });
+
+    assertFalse(reflector.usesKeyedState());
+    assertFalse(reflector.usesSingleWindow());
+
+    checkInvokeProcessElementWorks(reflector);
+  }
+
+  interface InterfaceWithProcessElement {
+    @ProcessElement
+    void processElement(DoFnWithContext<String, String>.ProcessContext c);
+  }
+
+  interface LayersOfInterfaces extends InterfaceWithProcessElement {}
+
+  private class IdentityUsingInterfaceWithProcessElement
+      extends DoFnWithContext<String, String>
+      implements LayersOfInterfaces {
+
+    private static final long serialVersionUID = 0;
+
+    @Override
+    public void processElement(DoFnWithContext<String, String>.ProcessContext c) {
+      wasProcessElementInvoked = true;
+      assertSame(c, mockContext);
+    }
+  }
+
+  @Test
+  public void testDoFnWithProcessElementInterface() throws Exception {
+    DoFnReflector reflector = underTest(new IdentityUsingInterfaceWithProcessElement());
+    assertFalse(reflector.usesKeyedState());
+    assertFalse(reflector.usesSingleWindow());
+    checkInvokeProcessElementWorks(reflector);
+  }
+
+  private class IdentityParent extends DoFnWithContext<String, String> {
+    private static final long serialVersionUID = 0;
+
+    @ProcessElement
+    public void process(ProcessContext c) {
+      wasProcessElementInvoked = true;
+      assertSame(c, mockContext);
+    }
+  }
+
+  private class IdentityChild extends IdentityParent {
+    private static final long serialVersionUID = 0;
+  }
+
+  @Test
+  public void testDoFnWithMethodInSuperclass() throws Exception {
+    DoFnReflector reflector = underTest(new IdentityChild());
+    assertFalse(reflector.usesKeyedState());
+    assertFalse(reflector.usesSingleWindow());
+    checkInvokeProcessElementWorks(reflector);
+  }
+
+  @Test
+  public void testDoFnWithWindow() throws Exception {
+    DoFnReflector reflector = underTest(new DoFnWithContext<String, String>() {
+
+      private static final long serialVersionUID = 0;
+
+      @ProcessElement
+      public void processElement(ProcessContext c, BoundedWindow w)
+          throws Exception {
+        wasProcessElementInvoked = true;
+        assertSame(c, mockContext);
+        assertSame(w, mockWindow);
+      }
+    });
+
+    assertFalse(reflector.usesKeyedState());
+    assertTrue(reflector.usesSingleWindow());
+
+    checkInvokeProcessElementWorks(reflector);
+  }
+
+  @Test
+  public void testDoFnWithWindowAndKeyedState() throws Exception {
+    DoFnReflector reflector = underTest(new DoFnWithContext<String, String>() {
+
+      private static final long serialVersionUID = 0;
+
+      @ProcessElement
+      public void processElement(ProcessContext c, BoundedWindow w, KeyedState k)
+          throws Exception {
+        wasProcessElementInvoked = true;
+        assertSame(c, mockContext);
+        assertSame(w, mockWindow);
+        assertSame(k, mockKeyedState);
+      }
+    });
+
+    assertTrue(reflector.usesKeyedState());
+    assertTrue(reflector.usesSingleWindow());
+
+    checkInvokeProcessElementWorks(reflector);
+  }
+
+  @Test
+  public void testDoFnWithWindowingInternals() throws Exception {
+    DoFnReflector reflector = underTest(new DoFnWithContext<String, String>() {
+
+      private static final long serialVersionUID = 0;
+
+      @ProcessElement
+      public void processElement(ProcessContext c, WindowingInternals<String, String> w)
+          throws Exception {
+        wasProcessElementInvoked = true;
+        assertSame(c, mockContext);
+        assertSame(w, mockWindowingInternals);
+      }
+    });
+
+    assertFalse(reflector.usesKeyedState());
+    assertFalse(reflector.usesSingleWindow());
+
+    checkInvokeProcessElementWorks(reflector);
+  }
+
+  @Test
+  public void testDoFnWithStartBundle() throws Exception {
+    DoFnReflector reflector = underTest(new DoFnWithContext<String, String>() {
+      private static final long serialVersionUID = 0;
+
+      @ProcessElement
+      public void processElement(@SuppressWarnings("unused") ProcessContext c) {}
+
+      @StartBundle
+      public void startBundle(Context c) {
+        wasStartBundleInvoked = true;
+        assertSame(c, mockContext);
+      }
+
+      @FinishBundle
+      public void finishBundle(Context c) {
+        wasFinishBundleInvoked = true;
+        assertSame(c, mockContext);
+      }
+    });
+
+    checkInvokeStartBundleWorks(reflector);
+    checkInvokeFinishBundleWorks(reflector);
+  }
+
+  @Test
+  public void testNoProcessElement() throws Exception {
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("No method annotated with @ProcessElement found");
+    thrown.expectMessage(getClass().getName() + "$");
+    underTest(new DoFnWithContext<String, String>() {
+      private static final long serialVersionUID = 0;
+    });
+  }
+
+  @Test
+  public void testMultipleProcessElement() throws Exception {
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("Found multiple methods annotated with @ProcessElement");
+    thrown.expectMessage("foo()");
+    thrown.expectMessage("bar()");
+    thrown.expectMessage(getClass().getName() + "$");
+    underTest(new DoFnWithContext<String, String>() {
+      private static final long serialVersionUID = 0;
+
+      @ProcessElement
+      public void foo() {}
+
+      @ProcessElement
+      public void bar() {}
+    });
+  }
+
+  @Test
+  public void testMultipleStartBundleElement() throws Exception {
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("Found multiple methods annotated with @StartBundle");
+    thrown.expectMessage("bar()");
+    thrown.expectMessage("baz()");
+    thrown.expectMessage(getClass().getName() + "$");
+    underTest(new DoFnWithContext<String, String>() {
+      private static final long serialVersionUID = 0;
+
+      @ProcessElement
+      public void foo() {}
+
+      @StartBundle
+      public void bar() {}
+
+      @StartBundle
+      public void baz() {}
+    });
+  }
+
+  @Test
+  public void testMultipleFinishBundleElement() throws Exception {
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("Found multiple methods annotated with @FinishBundle");
+    thrown.expectMessage("bar()");
+    thrown.expectMessage("baz()");
+    thrown.expectMessage(getClass().getName() + "$");
+    underTest(new DoFnWithContext<String, String>() {
+      private static final long serialVersionUID = 0;
+
+      @ProcessElement
+      public void foo() {}
+
+      @FinishBundle
+      public void bar() {}
+
+      @FinishBundle
+      public void baz() {}
+    });
+  }
+
+  @Test
+  public void testPrivateProcessElement() throws Exception {
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("process() must be public");
+    thrown.expectMessage(getClass().getName() + "$");
+    underTest(new DoFnWithContext<String, String>() {
+      private static final long serialVersionUID = 0;
+
+      @ProcessElement
+      private void process() {}
+    });
+  }
+
+  @Test
+  public void testPrivateStartBundle() throws Exception {
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("startBundle() must be public");
+    thrown.expectMessage(getClass().getName() + "$");
+    underTest(new DoFnWithContext<String, String>() {
+      private static final long serialVersionUID = 0;
+
+      @ProcessElement
+      public void processElement() {}
+
+      @StartBundle
+      void startBundle() {}
+    });
+  }
+
+  @Test
+  public void testPrivateFinishBundle() throws Exception {
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("finishBundle() must be public");
+    thrown.expectMessage(getClass().getName() + "$");
+    underTest(new DoFnWithContext<String, String>() {
+      private static final long serialVersionUID = 0;
+
+      @ProcessElement
+      public void processElement() {}
+
+      @FinishBundle
+      void finishBundle() {}
+    });
+  }
+
+  @SuppressWarnings({"unused", "rawtypes"})
+  private void missingProcessContext() {}
+
+  @Test
+  public void testMissingProcessContext() throws Exception {
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage(getClass().getName()
+        + "#missingProcessContext() must take a ProcessContext as its first argument");
+
+    DoFnReflector.verifyProcessMethodArguments(
+        getClass().getDeclaredMethod("missingProcessContext"));
+  }
+
+  @SuppressWarnings({"unused", "rawtypes"})
+  private void badProcessContext(String s) {}
+
+  @Test
+  public void testBadProcessContextType() throws Exception {
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage(getClass().getName()
+        + "#badProcessContext(String) must take a ProcessContext as its first argument");
+
+    DoFnReflector.verifyProcessMethodArguments(
+        getClass().getDeclaredMethod("badProcessContext", String.class));
+  }
+
+  @SuppressWarnings({"unused", "rawtypes"})
+  private void badExtraContext(DoFnWithContext<Integer, String>.Context c, int n) {}
+
+  @Test
+  public void testBadExtraContext() throws Exception {
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage(
+        "int is not a valid context parameter for method "
+        + getClass().getName() + "#badExtraContext(Context, int). Should be one of [");
+
+    DoFnReflector.verifyBundleMethodArguments(
+        getClass().getDeclaredMethod("badExtraContext", Context.class, int.class));
+  }
+
+  @SuppressWarnings({"unused", "rawtypes"})
+  private void badExtraProcessContext(
+      DoFnWithContext<Integer, String>.ProcessContext c, Integer n) {}
+
+  @Test
+  public void testBadExtraProcessContextType() throws Exception {
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage(
+        "Integer is not a valid context parameter for method "
+        + getClass().getName() + "#badExtraProcessContext(ProcessContext, Integer)"
+        + ". Should be one of [BoundedWindow, KeyedState, WindowingInternals<Integer, String>]");
+
+    DoFnReflector.verifyProcessMethodArguments(
+        getClass().getDeclaredMethod("badExtraProcessContext",
+            ProcessContext.class, Integer.class));
+  }
+
+  @SuppressWarnings("unused")
+  private int badReturnType() { return 0; }
+
+  @Test
+  public void testBadReturnType() throws Exception {
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage(getClass().getName() + "#badReturnType() must have a void return type");
+
+    DoFnReflector.verifyProcessMethodArguments(getClass().getDeclaredMethod("badReturnType"));
+  }
+
+  @SuppressWarnings("unused")
+  private void goodGenerics(DoFnWithContext<Integer, String>.ProcessContext c,
+      WindowingInternals<Integer, String> i1) {}
+
+  @Test
+  public void testValidGenerics() throws Exception {
+    Method method = getClass().getDeclaredMethod("goodGenerics",
+        DoFnWithContext.ProcessContext.class, WindowingInternals.class);
+    DoFnReflector.verifyProcessMethodArguments(method);
+  }
+
+  @SuppressWarnings("unused")
+  private void goodWildcards(DoFnWithContext<Integer, String>.ProcessContext c,
+      WindowingInternals<?, ?> i1) {}
+
+  @Test
+  public void testGoodWildcards() throws Exception {
+    Method method = getClass().getDeclaredMethod("goodWildcards",
+        DoFnWithContext.ProcessContext.class, WindowingInternals.class);
+    DoFnReflector.verifyProcessMethodArguments(method);
+  }
+
+  @SuppressWarnings("unused")
+  private void goodBoundedWildcards(DoFnWithContext<Integer, String>.ProcessContext c,
+      WindowingInternals<? super Integer, ? super String> i1) {}
+
+  @Test
+  public void testGoodBoundedWildcards() throws Exception {
+    Method method = getClass().getDeclaredMethod("goodBoundedWildcards",
+        DoFnWithContext.ProcessContext.class, WindowingInternals.class);
+    DoFnReflector.verifyProcessMethodArguments(method);
+  }
+
+  @SuppressWarnings("unused")
+  private <I, O> void goodTypeVariables(DoFnWithContext<I, O>.ProcessContext c,
+      WindowingInternals<I, O> i1) {}
+
+  @Test
+  public void testGoodTypeVariables() throws Exception {
+    Method method = getClass().getDeclaredMethod("goodTypeVariables",
+        DoFnWithContext.ProcessContext.class, WindowingInternals.class);
+    DoFnReflector.verifyProcessMethodArguments(method);
+  }
+
+  @SuppressWarnings("unused")
+  private void badGenericTwoArgs(DoFnWithContext<Integer, String>.ProcessContext c,
+      WindowingInternals<Integer, Integer> i1) {}
+
+  @Test
+  public void testBadGenericsTwoArgs() throws Exception {
+    Method method = getClass().getDeclaredMethod("badGenericTwoArgs",
+        DoFnWithContext.ProcessContext.class, WindowingInternals.class);
+
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("Incompatible generics in context parameter "
+        + "WindowingInternals<Integer, Integer> "
+        + "for method " + getClass().getName()
+        + "#badGenericTwoArgs(ProcessContext, WindowingInternals). Should be "
+        + "WindowingInternals<Integer, String>");
+
+    DoFnReflector.verifyProcessMethodArguments(method);
+  }
+
+  @SuppressWarnings("unused")
+  private void badGenericWildCards(DoFnWithContext<Integer, String>.ProcessContext c,
+      WindowingInternals<Integer, ? super Integer> i1) {}
+
+  @Test
+  public void testBadGenericWildCards() throws Exception {
+    Method method = getClass().getDeclaredMethod("badGenericWildCards",
+        DoFnWithContext.ProcessContext.class, WindowingInternals.class);
+
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("Incompatible generics in context parameter "
+        + "WindowingInternals<Integer, ? super Integer> for method "
+        + getClass().getName()
+        + "#badGenericWildCards(ProcessContext, WindowingInternals). Should be "
+        + "WindowingInternals<Integer, String>");
+
+    DoFnReflector.verifyProcessMethodArguments(method);
+  }
+
+  @SuppressWarnings("unused")
+  private <I, O> void badTypeVariables(DoFnWithContext<I, O>.ProcessContext c,
+      WindowingInternals<I, I> i1) {}
+
+  @Test
+  public void testBadTypeVariables() throws Exception {
+    Method method = getClass().getDeclaredMethod("badTypeVariables",
+        DoFnWithContext.ProcessContext.class, WindowingInternals.class);
+
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("Incompatible generics in context parameter "
+        + "WindowingInternals<I, I> for method " + getClass().getName()
+        + "#badTypeVariables(ProcessContext, WindowingInternals). Should be "
+        + "WindowingInternals<I, O>");
+
+    DoFnReflector.verifyProcessMethodArguments(method);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContextTest.java
new file mode 100644
index 0000000000000..a48996687380c
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContextTest.java
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link DoFnWithContext}. */
+@RunWith(JUnit4.class)
+public class DoFnWithContextTest {
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  private class NoOpDoFnWithContext extends DoFnWithContext<Void, Void> {
+
+    private static final long serialVersionUID = 0;
+
+    /**
+     * @param c context
+     */
+    @ProcessElement
+    public void processElement(ProcessContext c) {
+    }
+  }
+
+  @Test
+  public void testCreateAggregatorWithCombinerSucceeds() {
+    String name = "testAggregator";
+    Sum.SumLongFn combiner = new Sum.SumLongFn();
+
+    DoFnWithContext<Void, Void> doFn = new NoOpDoFnWithContext();
+
+    Aggregator<Long, Long> aggregator = doFn.createAggregator(name, combiner);
+
+    assertEquals(name, aggregator.getName());
+    assertEquals(combiner, aggregator.getCombineFn());
+  }
+
+  @Test
+  public void testCreateAggregatorWithNullNameThrowsException() {
+    thrown.expect(NullPointerException.class);
+    thrown.expectMessage("name cannot be null");
+
+    DoFnWithContext<Void, Void> doFn = new NoOpDoFnWithContext();
+
+    doFn.createAggregator(null, new Sum.SumLongFn());
+  }
+
+  @Test
+  public void testCreateAggregatorWithNullCombineFnThrowsException() {
+    CombineFn<Object, Object, Object> combiner = null;
+
+    thrown.expect(NullPointerException.class);
+    thrown.expectMessage("combiner cannot be null");
+
+    DoFnWithContext<Void, Void> doFn = new NoOpDoFnWithContext();
+
+    doFn.createAggregator("testAggregator", combiner);
+  }
+
+  @Test
+  public void testCreateAggregatorWithNullSerializableFnThrowsException() {
+    SerializableFunction<Iterable<Object>, Object> combiner = null;
+
+    thrown.expect(NullPointerException.class);
+    thrown.expectMessage("combiner cannot be null");
+
+    DoFnWithContext<Void, Void> doFn = new NoOpDoFnWithContext();
+
+    doFn.createAggregator("testAggregator", combiner);
+  }
+
+  @Test
+  public void testCreateAggregatorWithSameNameThrowsException() {
+    String name = "testAggregator";
+    CombineFn<Double, ?, Double> combiner = new Max.MaxDoubleFn();
+
+    DoFnWithContext<Void, Void> doFn = new NoOpDoFnWithContext();
+
+    doFn.createAggregator(name, combiner);
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Cannot create");
+    thrown.expectMessage(name);
+    thrown.expectMessage("already exists");
+
+    doFn.createAggregator(name, combiner);
+  }
+
+  @Test
+  public void testCreateAggregatorsWithDifferentNamesSucceeds() {
+    String nameOne = "testAggregator";
+    String nameTwo = "aggregatorPrime";
+    CombineFn<Double, ?, Double> combiner = new Max.MaxDoubleFn();
+
+    DoFnWithContext<Void, Void> doFn = new NoOpDoFnWithContext();
+
+    Aggregator<Double, Double> aggregatorOne =
+        doFn.createAggregator(nameOne, combiner);
+    Aggregator<Double, Double> aggregatorTwo =
+        doFn.createAggregator(nameTwo, combiner);
+
+    assertNotEquals(aggregatorOne, aggregatorTwo);
+  }
+
+  @Test
+  public void testDoFnWithContextUsingAggregators() {
+    NoOpDoFn<Object, Object> noOpFn = new NoOpDoFn<>();
+    DoFn<Object, Object>.Context context = noOpFn.context();
+
+    DoFn<Object, Object> fn = spy(noOpFn);
+    context = spy(context);
+
+    @SuppressWarnings("unchecked")
+    Aggregator<Long, Long> agg = mock(Aggregator.class);
+
+    Sum.SumLongFn combiner = new Sum.SumLongFn();
+    Aggregator<Long, Long> delegateAggregator =
+        fn.createAggregator("test", combiner);
+
+    when(context.createAggregatorInternal("test", combiner)).thenReturn(agg);
+
+    context.setupDelegateAggregators();
+    delegateAggregator.addValue(1L);
+
+    verify(agg).addValue(1L);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 7ad1d503722e4..da09a7343b627 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -39,6 +39,7 @@
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
@@ -153,104 +154,67 @@ private void outputToAllWithSideInputs(ProcessContext c, String value) {
                      sideOutputTupleTag.getId() + ": " + value);
       }
     }
+  }
 
-    /** DataflowAssert "matcher" for expected output. */
-    static class HasExpectedOutput
-        implements SerializableFunction<Iterable<String>, Void>, Serializable {
-      private final List<Integer> inputs;
-      private final List<Integer> sideInputs;
-      private final String sideOutput;
-      private final boolean ordered;
-
-      public static HasExpectedOutput forInput(List<Integer> inputs) {
-        return new HasExpectedOutput(
-            new ArrayList<Integer>(inputs),
-            new ArrayList<Integer>(),
-            "",
-            false);
-      }
-
-      private HasExpectedOutput(List<Integer> inputs,
-                                List<Integer> sideInputs,
-                                String sideOutput,
-                                boolean ordered) {
-        this.inputs = inputs;
-        this.sideInputs = sideInputs;
-        this.sideOutput = sideOutput;
-        this.ordered = ordered;
-      }
+  static class TestDoFnWithContext extends DoFnWithContext<Integer, String> {
+    enum State { UNSTARTED, STARTED, PROCESSING, FINISHED }
+    State state = State.UNSTARTED;
 
-      public HasExpectedOutput andSideInputs(Integer... sideInputValues) {
-        List<Integer> sideInputs = new ArrayList<>();
-        for (Integer sideInputValue : sideInputValues) {
-          sideInputs.add(sideInputValue);
-        }
-        return new HasExpectedOutput(inputs, sideInputs, sideOutput, ordered);
-      }
+    final List<PCollectionView<Integer>> sideInputViews = new ArrayList<>();
+    final List<TupleTag<String>> sideOutputTupleTags = new ArrayList<>();
 
-      public HasExpectedOutput fromSideOutput(TupleTag<String> sideOutputTag) {
-        return fromSideOutput(sideOutputTag.getId());
-      }
-      public HasExpectedOutput fromSideOutput(String sideOutput) {
-        return new HasExpectedOutput(inputs, sideInputs, sideOutput, ordered);
-      }
+    public TestDoFnWithContext() {
+    }
 
-      public HasExpectedOutput inOrder() {
-        return new HasExpectedOutput(inputs, sideInputs, sideOutput, true);
-      }
+    public TestDoFnWithContext(List<PCollectionView<Integer>> sideInputViews,
+                    List<TupleTag<String>> sideOutputTupleTags) {
+      this.sideInputViews.addAll(sideInputViews);
+      this.sideOutputTupleTags.addAll(sideOutputTupleTags);
+    }
 
-      @Override
-      public Void apply(Iterable<String> outputs) {
-        List<String> starteds = new ArrayList<>();
-        List<String> processeds = new ArrayList<>();
-        List<String> finisheds = new ArrayList<>();
-        for (String output : outputs) {
-          if (output.contains("started")) {
-            starteds.add(output);
-          } else if (output.contains("finished")) {
-            finisheds.add(output);
-          } else {
-            processeds.add(output);
-          }
-        }
+    @StartBundle
+    public void startBundle(Context c) {
+      assertEquals(State.UNSTARTED, state);
+      state = State.STARTED;
+      outputToAll(c, "started");
+    }
 
-        String sideInputsSuffix;
-        if (sideInputs.isEmpty()) {
-          sideInputsSuffix = "";
-        } else {
-          sideInputsSuffix = ": " + sideInputs;
-        }
+    @ProcessElement
+    public void processElement(ProcessContext c) {
+      assertThat(state,
+                 anyOf(equalTo(State.STARTED), equalTo(State.PROCESSING)));
+      state = State.PROCESSING;
+      outputToAllWithSideInputs(c, "processing: " + c.element());
+    }
 
-        String sideOutputPrefix;
-        if (sideOutput.isEmpty()) {
-          sideOutputPrefix = "";
-        } else {
-          sideOutputPrefix = sideOutput + ": ";
-        }
+    @FinishBundle
+    public void finishBundle(Context c) {
+      assertThat(state,
+                 anyOf(equalTo(State.STARTED), equalTo(State.PROCESSING)));
+      state = State.FINISHED;
+      outputToAll(c, "finished");
+    }
 
-        List<String> expectedProcesseds = new ArrayList<>();
-        for (Integer input : inputs) {
-          expectedProcesseds.add(
-              sideOutputPrefix + "processing: " + input + sideInputsSuffix);
-        }
-        String[] expectedProcessedsArray =
-            expectedProcesseds.toArray(new String[expectedProcesseds.size()]);
-        if (!ordered || expectedProcesseds.isEmpty()) {
-          assertThat(processeds, containsInAnyOrder(expectedProcessedsArray));
-        } else {
-          assertThat(processeds, contains(expectedProcessedsArray));
-        }
+    private void outputToAll(Context c, String value) {
+      c.output(value);
+      for (TupleTag<String> sideOutputTupleTag : sideOutputTupleTags) {
+        c.sideOutput(sideOutputTupleTag,
+                     sideOutputTupleTag.getId() + ": " + value);
+      }
+    }
 
-        assertEquals(starteds.size(), finisheds.size());
-        assertTrue(starteds.size() > 0);
-        for (String started : starteds) {
-          assertEquals(sideOutputPrefix + "started", started);
-        }
-        for (String finished : finisheds) {
-          assertEquals(sideOutputPrefix + "finished", finished);
+    private void outputToAllWithSideInputs(ProcessContext c, String value) {
+      if (!sideInputViews.isEmpty()) {
+        List<Integer> sideInputValues = new ArrayList<>();
+        for (PCollectionView<Integer> sideInputView : sideInputViews) {
+          sideInputValues.add(c.sideInput(sideInputView));
         }
-
-        return null;
+        value += ": " + sideInputValues;
+      }
+      c.output(value);
+      for (TupleTag<String> sideOutputTupleTag : sideOutputTupleTags) {
+        c.sideOutput(sideOutputTupleTag,
+                     sideOutputTupleTag.getId() + ": " + value);
       }
     }
   }
@@ -409,7 +373,26 @@ public void testParDo() {
         .apply(ParDo.of(new TestDoFn()));
 
     DataflowAssert.that(output)
-        .satisfies(TestDoFn.HasExpectedOutput.forInput(inputs));
+        .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs));
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testParDo2() {
+    Pipeline p = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList(3, -42, 666);
+
+    PCollection<Integer> input = createInts(p, inputs);
+
+    PCollection<String> output =
+        input
+        .apply(ParDo.of(new TestDoFnWithContext()));
+
+    DataflowAssert.that(output)
+        .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs));
 
     p.run();
   }
@@ -428,7 +411,7 @@ public void testParDoEmpty() {
         .apply(ParDo.of(new TestDoFn()));
 
     DataflowAssert.that(output)
-        .satisfies(TestDoFn.HasExpectedOutput.forInput(inputs));
+        .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs));
 
     p.run();
   }
@@ -460,16 +443,16 @@ public void testParDoWithSideOutputs() {
                    .and(sideTagUnwritten).and(sideTag2)));
 
     DataflowAssert.that(outputs.get(mainTag))
-        .satisfies(TestDoFn.HasExpectedOutput.forInput(inputs));
+        .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs));
 
     DataflowAssert.that(outputs.get(sideTag1))
-        .satisfies(TestDoFn.HasExpectedOutput.forInput(inputs)
+        .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs)
                    .fromSideOutput(sideTag1));
     DataflowAssert.that(outputs.get(sideTag2))
-        .satisfies(TestDoFn.HasExpectedOutput.forInput(inputs)
+        .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs)
                    .fromSideOutput(sideTag2));
     DataflowAssert.that(outputs.get(sideTag3))
-        .satisfies(TestDoFn.HasExpectedOutput.forInput(inputs)
+        .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs)
                    .fromSideOutput(sideTag3));
     DataflowAssert.that(outputs.get(sideTagUnwritten)).containsInAnyOrder();
 
@@ -520,7 +503,7 @@ public void testParDoWritingToUndeclaredSideOutput() {
             Arrays.asList(sideTag))));
 
     DataflowAssert.that(output)
-        .satisfies(TestDoFn.HasExpectedOutput.forInput(inputs));
+        .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs));
 
     p.run();
   }
@@ -586,7 +569,7 @@ public void testParDoWithSideInputs() {
                    Arrays.<TupleTag<String>>asList())));
 
     DataflowAssert.that(output)
-        .satisfies(TestDoFn.HasExpectedOutput
+        .satisfies(ParDoTest.HasExpectedOutput
                    .forInput(inputs)
                    .andSideInputs(11, 222));
 
@@ -697,6 +680,42 @@ public void processElement(ProcessContext c) {
     p.run();
   }
 
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testParDoKeyedState2() {
+    Pipeline p = TestPipeline.create();
+
+    List<String> inputs = Arrays.asList(
+        "A", "A", "B", "C", "B", "A", "D", "D", "D", "D");
+
+    PCollection<String> output =
+        p.apply(Create.of(inputs))
+         .apply(ParDo.named("ToKv")
+                     .of(new DoFn<String, KV<String, Integer>>() {
+                         @Override
+                         public void processElement(ProcessContext c) {
+                           c.output(KV.of(c.element(), 1));
+                         }
+                     }))
+     .apply(ParDo.of(new DoFnWithContext<KV<String, Integer>, String>() {
+       @ProcessElement
+       public void processElement(ProcessContext c, KeyedState keyedState) throws IOException {
+         String key = c.element().getKey();
+         CodedTupleTag<Long> tag = CodedTupleTag.of(key, BigEndianLongCoder.of());
+         Long result = keyedState.lookup(tag);
+         long count = result == null ? 0 : result;
+         keyedState.store(tag, ++count);
+         if (count == 3) {
+           c.output(key);
+         }
+       }
+     }));
+
+    DataflowAssert.that(output).containsInAnyOrder("A", "D");
+    p.run();
+  }
+
   @Test
   public void testParDoWithUnexpectedKeyedState() {
     Pipeline p = TestPipeline.create();
@@ -737,6 +756,28 @@ public void testParDoKeyedStateDoFnWithNonKvInput() {
     }
   }
 
+  @Test
+  public void testParDoKeyedStateDoFnWithContextWithNonKvInput() {
+    Pipeline p = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList(3, -42, 666);
+
+    PCollection<Integer> input = createInts(p, inputs);
+
+    try {
+      input.apply(ParDo.of(new DoFnWithContext<Integer, String>() {
+        @SuppressWarnings("unused")
+        @ProcessElement
+        public void process(ProcessContext c, KeyedState keyedState) {}
+      })).finishSpecifying();
+      fail("should have failed");
+    } catch (RuntimeException exn) {
+      assertThat(exn.toString(), containsString(
+          "KeyedState is only available in DoFn's with keyed inputs, but "
+          + "input coder BigEndianIntegerCoder is not keyed."));
+    }
+  }
+
   @Test
   public void testParDoName() {
     Pipeline p = TestPipeline.create();
@@ -836,7 +877,7 @@ public PCollection<String> apply(PCollection<Integer> input) {
     // Test that Coder inference of the result works through
     // user-defined PTransforms.
     DataflowAssert.that(output)
-        .satisfies(TestDoFn.HasExpectedOutput.forInput(inputs));
+        .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs));
 
     p.run();
   }
@@ -941,6 +982,106 @@ public void processElement(ProcessContext c) {
      }
   }
 
+  /** DataflowAssert "matcher" for expected output. */
+  static class HasExpectedOutput
+      implements SerializableFunction<Iterable<String>, Void>, Serializable {
+    private final List<Integer> inputs;
+    private final List<Integer> sideInputs;
+    private final String sideOutput;
+    private final boolean ordered;
+
+    public static HasExpectedOutput forInput(List<Integer> inputs) {
+      return new HasExpectedOutput(
+          new ArrayList<Integer>(inputs),
+          new ArrayList<Integer>(),
+          "",
+          false);
+    }
+
+    private HasExpectedOutput(List<Integer> inputs,
+                              List<Integer> sideInputs,
+                              String sideOutput,
+                              boolean ordered) {
+      this.inputs = inputs;
+      this.sideInputs = sideInputs;
+      this.sideOutput = sideOutput;
+      this.ordered = ordered;
+    }
+
+    public HasExpectedOutput andSideInputs(Integer... sideInputValues) {
+      List<Integer> sideInputs = new ArrayList<>();
+      for (Integer sideInputValue : sideInputValues) {
+        sideInputs.add(sideInputValue);
+      }
+      return new HasExpectedOutput(inputs, sideInputs, sideOutput, ordered);
+    }
+
+    public HasExpectedOutput fromSideOutput(TupleTag<String> sideOutputTag) {
+      return fromSideOutput(sideOutputTag.getId());
+    }
+    public HasExpectedOutput fromSideOutput(String sideOutput) {
+      return new HasExpectedOutput(inputs, sideInputs, sideOutput, ordered);
+    }
+
+    public HasExpectedOutput inOrder() {
+      return new HasExpectedOutput(inputs, sideInputs, sideOutput, true);
+    }
+
+    @Override
+    public Void apply(Iterable<String> outputs) {
+      List<String> starteds = new ArrayList<>();
+      List<String> processeds = new ArrayList<>();
+      List<String> finisheds = new ArrayList<>();
+      for (String output : outputs) {
+        if (output.contains("started")) {
+          starteds.add(output);
+        } else if (output.contains("finished")) {
+          finisheds.add(output);
+        } else {
+          processeds.add(output);
+        }
+      }
+
+      String sideInputsSuffix;
+      if (sideInputs.isEmpty()) {
+        sideInputsSuffix = "";
+      } else {
+        sideInputsSuffix = ": " + sideInputs;
+      }
+
+      String sideOutputPrefix;
+      if (sideOutput.isEmpty()) {
+        sideOutputPrefix = "";
+      } else {
+        sideOutputPrefix = sideOutput + ": ";
+      }
+
+      List<String> expectedProcesseds = new ArrayList<>();
+      for (Integer input : inputs) {
+        expectedProcesseds.add(
+            sideOutputPrefix + "processing: " + input + sideInputsSuffix);
+      }
+      String[] expectedProcessedsArray =
+          expectedProcesseds.toArray(new String[expectedProcesseds.size()]);
+      if (!ordered || expectedProcesseds.isEmpty()) {
+        assertThat(processeds, containsInAnyOrder(expectedProcessedsArray));
+      } else {
+        assertThat(processeds, contains(expectedProcessedsArray));
+      }
+
+      assertEquals(starteds.size(), finisheds.size());
+      assertTrue(starteds.size() > 0);
+      for (String started : starteds) {
+        assertEquals(sideOutputPrefix + "started", started);
+      }
+      for (String finished : finisheds) {
+        assertEquals(sideOutputPrefix + "finished", finished);
+      }
+
+      return null;
+    }
+  }
+
   @Test
   public void testSideOutputUnknownCoder() throws Exception {
     Pipeline pipeline = TestPipeline.create();

From a5c0710feac2f43ad427973dcc80570138ae411c Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 27 Apr 2015 15:56:00 -0700
Subject: [PATCH 0482/1541] Update ParDoTest to use ExpectedException
 throughout.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92192005
---
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |  38 +++---
 .../dataflow/sdk/transforms/ParDoTest.java    | 115 ++++++------------
 2 files changed, 59 insertions(+), 94 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index deffcddeb61e5..61dd72a301597 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -273,27 +273,27 @@ <T> WindowedValue<T> makeWindowedValue(
       if (windows == null) {
         try {
           windows = windowFn.assignWindows(windowFn.new AssignContext() {
-              @Override
-              public Object element() {
+            @Override
+            public Object element() {
+              throw new UnsupportedOperationException(
+                  "WindowFn attempted to access input element when none was available");
+            }
+
+            @Override
+            public Instant timestamp() {
+              if (inputTimestamp == null) {
                 throw new UnsupportedOperationException(
-                    "WindowFn attemped to access input element when none was available");
+                    "WindowFn attempted to access input timestamp when none was available");
               }
-
-              @Override
-              public Instant timestamp() {
-                if (inputTimestamp == null) {
-                  throw new UnsupportedOperationException(
-                      "WindowFn attemped to access input timestamp when none was available");
-                }
-                return inputTimestamp;
-              }
-
-              @Override
-              public Collection<? extends BoundedWindow> windows() {
-                throw new UnsupportedOperationException(
-                    "WindowFn attemped to access input windows when none were available");
-              }
-            });
+              return inputTimestamp;
+            }
+
+            @Override
+            public Collection<? extends BoundedWindow> windows() {
+              throw new UnsupportedOperationException(
+                  "WindowFn attempted to access input windows when none were available");
+            }
+          });
         } catch (Exception e) {
           throw new RuntimeException(e);
         }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index da09a7343b627..dc59bdccb0d71 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -20,7 +20,6 @@
 import static com.google.cloud.dataflow.sdk.util.SerializableUtils.serializeToByteArray;
 import static com.google.cloud.dataflow.sdk.util.StringUtils.byteArrayToJsonString;
 import static com.google.cloud.dataflow.sdk.util.StringUtils.jsonStringToByteArray;
-import static org.hamcrest.CoreMatchers.containsString;
 import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
 import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
 import static org.hamcrest.core.AnyOf.anyOf;
@@ -29,7 +28,6 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
 
 import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.Pipeline;
@@ -538,13 +536,10 @@ public void processElement(ProcessContext c) {
                 c.sideOutput(new TupleTag<String>(){}, "side");
               }
             }}));
-    try {
-      p.run();
-      fail("should have failed");
-    } catch (RuntimeException exn) {
-      assertThat(exn.toString(),
-                 containsString("the number of side outputs has exceeded a limit"));
-    }
+
+    thrown.expect(RuntimeException.class);
+    thrown.expectMessage("the number of side outputs has exceeded a limit");
+    p.run();
   }
 
   @Test
@@ -591,13 +586,9 @@ public void testParDoReadingFromUnknownSideInput() {
             Arrays.<PCollectionView<Integer>>asList(sideView),
             Arrays.<TupleTag<String>>asList())));
 
-    try {
-      p.run();
-      fail("should have failed");
-    } catch (RuntimeException exn) {
-      assertThat(exn.toString(),
-                 containsString("calling sideInput() with unknown view"));
-    }
+    thrown.expect(RuntimeException.class);
+    thrown.expectMessage("calling sideInput() with unknown view");
+    p.run();
   }
 
   @Test
@@ -611,12 +602,9 @@ public void testParDoWithErrorInStartBatch() {
     input
         .apply(ParDo.of(new TestStartBatchErrorDoFn()));
 
-    try {
-      p.run();
-      fail("should have failed");
-    } catch (RuntimeException exn) {
-      assertThat(exn.toString(), containsString("test error in initialize"));
-    }
+    thrown.expect(RuntimeException.class);
+    thrown.expectMessage("test error in initialize");
+    p.run();
   }
 
   @Test
@@ -630,12 +618,9 @@ public void testParDoWithErrorInProcessElement() {
     input
         .apply(ParDo.of(new TestProcessElementErrorDoFn()));
 
-    try {
-      p.run();
-      fail("should have failed");
-    } catch (RuntimeException exn) {
-      assertThat(exn.toString(), containsString("test error in process"));
-    }
+    thrown.expect(RuntimeException.class);
+    thrown.expectMessage("test error in process");
+    p.run();
   }
 
   @Test
@@ -649,12 +634,9 @@ public void testParDoWithErrorInFinishBatch() {
     input
         .apply(ParDo.of(new TestFinishBatchErrorDoFn()));
 
-    try {
-      p.run();
-      fail("should have failed");
-    } catch (RuntimeException exn) {
-      assertThat(exn.toString(), containsString("test error in finalize"));
-    }
+    thrown.expect(RuntimeException.class);
+    thrown.expectMessage("test error in finalize");
+    p.run();
   }
 
   @Test
@@ -728,13 +710,9 @@ public void testParDoWithUnexpectedKeyedState() {
     input
         .apply(ParDo.of(new TestUnexpectedKeyedStateDoFn()));
 
-    try {
-      p.run();
-      fail("should have failed");
-    } catch (RuntimeException exn) {
-      assertThat(exn.toString(),
-                 containsString("Keyed state is only available"));
-    }
+    thrown.expect(RuntimeException.class);
+    thrown.expectMessage("Keyed state is only available");
+    p.run();
   }
 
   @Test
@@ -745,15 +723,12 @@ public void testParDoKeyedStateDoFnWithNonKvInput() {
 
     PCollection<Integer> input = createInts(p, inputs);
 
-    try {
-      input.apply(ParDo.of(new TestKeyedStateDoFnWithNonKvInput()))
-          .finishSpecifying();
-      fail("should have failed");
-    } catch (RuntimeException exn) {
-      assertThat(exn.toString(), containsString(
-          "KeyedState is only available in DoFn's with keyed inputs, but "
-          + "input coder BigEndianIntegerCoder is not keyed."));
-    }
+    thrown.expect(RuntimeException.class);
+    thrown.expectMessage(
+        "KeyedState is only available in DoFn's with keyed inputs, but "
+        + "input coder BigEndianIntegerCoder is not keyed.");
+
+    input.apply(ParDo.of(new TestKeyedStateDoFnWithNonKvInput())).finishSpecifying();
   }
 
   @Test
@@ -764,18 +739,16 @@ public void testParDoKeyedStateDoFnWithContextWithNonKvInput() {
 
     PCollection<Integer> input = createInts(p, inputs);
 
-    try {
-      input.apply(ParDo.of(new DoFnWithContext<Integer, String>() {
-        @SuppressWarnings("unused")
-        @ProcessElement
-        public void process(ProcessContext c, KeyedState keyedState) {}
-      })).finishSpecifying();
-      fail("should have failed");
-    } catch (RuntimeException exn) {
-      assertThat(exn.toString(), containsString(
-          "KeyedState is only available in DoFn's with keyed inputs, but "
-          + "input coder BigEndianIntegerCoder is not keyed."));
-    }
+    thrown.expect(RuntimeException.class);
+    thrown.expectMessage(
+        "KeyedState is only available in DoFn's with keyed inputs, but "
+            + "input coder BigEndianIntegerCoder is not keyed.");
+
+    input.apply(ParDo.of(new DoFnWithContext<Integer, String>() {
+      @SuppressWarnings("unused")
+      @ProcessElement
+      public void process(ProcessContext c, KeyedState keyedState) {}
+    })).finishSpecifying();
   }
 
   @Test
@@ -1261,12 +1234,9 @@ public void testParDoShiftTimestampInvalid() {
                                                    Duration.millis(-1001))))
         .apply(ParDo.of(new TestFormatTimestampDoFn()));
 
-    try {
-      p.run();
-      fail("should have failed");
-    } catch (RuntimeException exn) {
-      // expected
-    }
+    thrown.expect(RuntimeException.class);
+    thrown.expectMessage("allowed maximum skew");
+    p.run();
   }
 
   @Test
@@ -1322,12 +1292,7 @@ public void processElement(ProcessContext c) {
                   }
                 }));
 
-    try {
-      p.run();
-      fail("should have failed");
-    } catch (Exception e) {
-      assertThat(e.toString(), containsString(
-          "WindowFn attemped to access input timestamp when none was available"));
-    }
+    thrown.expectMessage("WindowFn attempted to access input timestamp when none was available");
+    p.run();
   }
 }

From f76344b3a45f1937221c1a2deeb3ed2e645d0a31 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 27 Apr 2015 16:15:45 -0700
Subject: [PATCH 0483/1541] Add View.asList for side inputs that are small
 enough to fit in memory and should be cached.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92193944
---
 .../cloud/dataflow/sdk/transforms/View.java   | 29 ++++++++++++---
 .../dataflow/sdk/transforms/ViewTest.java     | 36 +++++++++++++++++--
 2 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index ad44531f60035..e1b588126ba73 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -123,10 +123,23 @@ public static <T> AsSingleton<T> asSingleton() {
     return new AsSingleton<>();
   }
 
+  /**
+   * Returns a transform that takes a {@link PCollection} and returns a
+   * {@code List} containing all of its elements, to be consumed as
+   * a side input.
+   *
+   * <p> The resulting list is required to fit in memory.
+   */
+  public static <T> PTransform<PCollection<T>, PCollectionView<List<T>>> asList() {
+    return Combine.globally(new Concatenate<T>()).asSingletonView();
+  }
+
   /**
    * Returns a {@link AsIterable} that takes a
    * {@link PCollection} as input and produces a {@link PCollectionView}
-   * of the values, to be consumed as an iterable side input.
+   * of the values, to be consumed as an iterable side input.  The values of
+   * this {@code Iterable} may not be cached; if that behavior is desired, use
+   * {@link #asList}.
    */
   public static <T> AsIterable<T> asIterable() {
     return new AsIterable<>();
@@ -159,7 +172,8 @@ private AsIterable() { }
     public PCollectionView<Iterable<T>> apply(
         PCollection<T> input) {
       if (input.getPipeline().getOptions().as(StreamingOptions.class).isStreaming()) {
-        return input.apply(Combine.globally(new Concatenate<T>()).asSingletonView());
+        return input.apply((Combine.GloballyAsSingletonView<T, Iterable<T>>)
+            Combine.globally(new Concatenate()).asSingletonView());
       } else {
         return input.apply(
             new CreatePCollectionView<T, Iterable<T>>(
@@ -317,12 +331,12 @@ public PCollectionView<Map<K, VO>> apply(PCollection<KV<K, VI>> input) {
   // Internal details below
 
   /**
-   * Combiner that combines {@code T}s into a single {@code Iterable<T>} containing
+   * Combiner that combines {@code T}s into a single {@code List<T>} containing
    * all inputs.
    *
    * @param <T> the type of elements to concatenate.
    */
-  private static class Concatenate<T> extends CombineFn<T, List<T>, Iterable<T>> {
+  private static class Concatenate<T> extends CombineFn<T, List<T>, List<T>> {
     private static final long serialVersionUID = 0;
 
     @Override
@@ -346,7 +360,7 @@ public List<T> mergeAccumulators(Iterable<List<T>> accumulators) {
     }
 
     @Override
-    public Iterable<T> extractOutput(List<T> accumulator) {
+    public List<T> extractOutput(List<T> accumulator) {
       return accumulator;
     }
 
@@ -354,6 +368,11 @@ public Iterable<T> extractOutput(List<T> accumulator) {
     public Coder<List<T>> getAccumulatorCoder(CoderRegistry registry, Coder<T> inputCoder) {
       return ListCoder.of(inputCoder);
     }
+
+    @Override
+    public Coder<List<T>> getDefaultOutputCoder(CoderRegistry registry, Coder<T> inputCoder) {
+      return ListCoder.of(inputCoder);
+    }
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index 4599040aec7e7..e8423c6667d38 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -30,6 +30,7 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.common.base.Preconditions;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -42,6 +43,7 @@
 import org.junit.runners.JUnit4;
 
 import java.io.Serializable;
+import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
 
@@ -93,7 +95,7 @@ public void testEmptySingletonSideInput() throws Exception {
         .setCoder(VarIntCoder.of())
         .apply(View.<Integer>asSingleton());
 
-    PCollection<Integer> output = pipeline
+    pipeline
         .apply(Create.of(1, 2, 3))
         .apply(ParDo.withSideInputs(view).of(
             new DoFn<Integer, Integer>() {
@@ -120,7 +122,7 @@ public void testNonSingletonSideInput() throws Exception {
         .apply(Create.<Integer>of(1, 2, 3))
         .apply(View.<Integer>asSingleton());
 
-    PCollection<Integer> output = pipeline
+    pipeline
         .apply(Create.of(1, 2, 3))
         .apply(ParDo.withSideInputs(view).of(
             new DoFn<Integer, Integer>() {
@@ -139,6 +141,36 @@ public void processElement(ProcessContext c) {
     pipeline.run();
   }
 
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testListSideInput() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<List<Integer>> view = pipeline
+        .apply(Create.of(11, 13, 17, 23))
+        .apply(View.<Integer>asList());
+
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(29, 31))
+        .apply(ParDo.withSideInputs(view).of(
+            new DoFn<Integer, Integer>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                Preconditions.checkArgument(c.sideInput(view).size() == 4);
+                Preconditions.checkArgument(c.sideInput(view).get(0) == c.sideInput(view).get(0));
+                for (Integer i : c.sideInput(view)) {
+                  c.output(i);
+                }
+              }
+            }));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        11, 13, 17, 23,
+        11, 13, 17, 23);
+
+    pipeline.run();
+  }
+
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testIterableSideInput() {

From 5a2c63c7da8ef4e2fc4e6d8b2d78ec7b62836aaa Mon Sep 17 00:00:00 2001
From: foegler <foegler@google.com>
Date: Mon, 27 Apr 2015 16:22:31 -0700
Subject: [PATCH 0484/1541] Add log for status when returning null. ----Release
 Notes---- [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=92194691

---
 .../cloud/dataflow/sdk/runners/DataflowPipelineJob.java     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
index 80d136a38e674..52ab717abdb1d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
@@ -165,11 +165,11 @@ State waitToFinish(
             nanoClock)
         : new AttemptBoundedExponentialBackOff(
             MESSAGES_POLLING_ATTEMPTS, MESSAGES_POLLING_INTERVAL);
-
+    State state;
     do {
       // Get the state of the job before listing messages. This ensures we always fetch job
       // messages after the job finishes to ensure we have all them.
-      State state = getStateWithRetries(1, sleeper);
+      state = getStateWithRetries(1, sleeper);
       boolean hasError = state == State.UNKNOWN;
 
       if (messageHandler != null && !hasError) {
@@ -198,7 +198,7 @@ State waitToFinish(
         }
       }
     } while(BackOffUtils.next(sleeper, backoff));
-
+    LOG.warn("No terminal state was returned.  State value {}", state);
     return null;  // Timed out.
   }
 

From b9fdf60456c06eef433a231f311089fbf1b544dc Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Tue, 12 May 2015 14:09:30 -0700
Subject: [PATCH 0485/1541] Fast coder encoding / decoding by reducing byte
 copying and allocations.

1. Expose the internal buffer of ByteArrayInputStream to avoid copying in the OUTER context.
2. For non-exposed version of byte stream, allocate the byte[] with length=stream.available.
3. For normal input streams, use a thread local buffer to boost the stream copying.
4. Re-use byte buffer of output streams by ThreadLocal, which will also benefit NESTED context.

----Release Notes----
Fast coder encoding / decoding by reducing byte copying and allocations.

-------------
*Manually* created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92194691
---
 .../dataflow/sdk/coders/ByteArrayCoder.java   |  6 +-
 .../dataflow/sdk/coders/StringUtf8Coder.java  |  9 +--
 .../cloud/dataflow/sdk/util/CoderUtils.java   | 37 ++++++---
 .../sdk/util/ExposedByteArrayInputStream.java | 43 +++++++++++
 .../cloud/dataflow/sdk/util/StreamUtils.java  | 70 +++++++++++++++++
 .../util/ExposedByteArrayInputStreamTest.java | 77 +++++++++++++++++++
 .../dataflow/sdk/util/StreamUtilsTest.java    | 71 +++++++++++++++++
 7 files changed, 294 insertions(+), 19 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayInputStream.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamUtils.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayInputStreamTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamUtilsTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
index 1fb36ba33bd30..32744f3bd88fe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
@@ -16,13 +16,13 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.util.StreamUtils;
 import com.google.cloud.dataflow.sdk.util.VarInt;
 import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.StructuralByteArray;
 import com.google.common.io.ByteStreams;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 
-import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
@@ -63,9 +63,7 @@ public void encode(byte[] value, OutputStream outStream, Context context)
   public byte[] decode(InputStream inStream, Context context)
       throws IOException, CoderException {
     if (context.isWholeStream) {
-      ByteArrayOutputStream outStream = new ByteArrayOutputStream();
-      ByteStreams.copy(inStream, outStream);
-      return outStream.toByteArray();
+      return StreamUtils.getBytes(inStream);
     } else {
       int length = VarInt.decodeInt(inStream);
       if (length < 0) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
index f2d2c23419573..fd66cd5329367 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
@@ -16,8 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.util.StreamUtils;
 import com.google.cloud.dataflow.sdk.util.VarInt;
-import com.google.common.io.ByteStreams;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 
@@ -90,10 +90,8 @@ public void encode(String value, OutputStream outStream, Context context)
   public String decode(InputStream inStream, Context context)
       throws IOException {
     if (context.isWholeStream) {
-      ByteArrayOutputStream outStream = new ByteArrayOutputStream();
-      ByteStreams.copy(inStream, outStream);
-      // ByteArrayOutputStream.toString provides no Charset overloads.
-      return outStream.toString("UTF-8");
+      byte[] bytes = StreamUtils.getBytes(inStream);
+      return new String(bytes, Singletons.UTF8);
     } else {
       try {
         return readString(new DataInputStream(inStream));
@@ -113,6 +111,7 @@ public boolean consistentWithEquals() {
     return true;
   }
 
+  @Override
   protected long getEncodedElementByteSize(String value, Context context)
       throws Exception {
     if (value == null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
index 1420c550412b1..1c4c7916f453b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
@@ -40,6 +40,7 @@
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.lang.ref.SoftReference;
 import java.lang.reflect.TypeVariable;
 
 /**
@@ -58,19 +59,32 @@ private CoderUtils() {}  // Non-instantiable
    */
   public static final String KIND_STREAM = "kind:stream";
 
+  private static ThreadLocal<SoftReference<ByteArrayOutputStream>> threadLocalOutputStream
+      = new ThreadLocal<>();
+
   /**
    * Encodes the given value using the specified Coder, and returns
    * the encoded bytes.
-   *
-   * @throws CoderException if there are errors during encoding
+   * This function is non-reentrant due to the use of ThreadLocal.
    */
-  public static <T> byte[] encodeToByteArray(Coder<T> coder, T value)
+  public static <T> byte[] encodeToByteArray(Coder<T> coder, T value) throws CoderException{
+    return encodeToByteArray(coder, value, Coder.Context.OUTER);
+  }
+
+  public static <T> byte[] encodeToByteArray(Coder<T> coder, T value, Coder.Context context)
       throws CoderException {
     try {
-      try (ByteArrayOutputStream os = new ByteArrayOutputStream()) {
-        coder.encode(value, os, Coder.Context.OUTER);
-        return os.toByteArray();
+      ByteArrayOutputStream stream;
+      SoftReference<ByteArrayOutputStream> refStream = threadLocalOutputStream.get();
+      if (refStream == null) {
+        stream = new ByteArrayOutputStream();
+        threadLocalOutputStream.set(new SoftReference<>(stream));
+      } else {
+        stream = refStream.get();
       }
+      stream.reset();
+      coder.encode(value, stream, context);
+      return stream.toByteArray();
     } catch (IOException exn) {
       throw new RuntimeException("unexpected IOException", exn);
     }
@@ -79,14 +93,17 @@ public static <T> byte[] encodeToByteArray(Coder<T> coder, T value)
   /**
    * Decodes the given bytes using the specified Coder, and returns
    * the resulting decoded value.
-   *
-   * @throws CoderException if there are errors during decoding
    */
   public static <T> T decodeFromByteArray(Coder<T> coder, byte[] encodedValue)
       throws CoderException {
+    return decodeFromByteArray(coder, encodedValue, Coder.Context.OUTER);
+  }
+
+  public static <T> T decodeFromByteArray(
+      Coder<T> coder, byte[] encodedValue, Coder.Context context) throws CoderException {
     try {
-      try (ByteArrayInputStream is = new ByteArrayInputStream(encodedValue)) {
-        T result = coder.decode(is, Coder.Context.OUTER);
+      try (ByteArrayInputStream is = new ExposedByteArrayInputStream(encodedValue)) {
+        T result = coder.decode(is, context);
         if (is.available() != 0) {
           throw new CoderException(
               is.available() + " unexpected extra bytes after decoding " +
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayInputStream.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayInputStream.java
new file mode 100644
index 0000000000000..f2beccfe4d48f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayInputStream.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+
+/**
+ * {@link ByteArrayInputStream} that allows accessing the entire internal buffer without copying.
+ */
+public class ExposedByteArrayInputStream extends ByteArrayInputStream{
+
+  public ExposedByteArrayInputStream(byte[] buf) {
+    super(buf);
+  }
+
+  /** Read all remaining bytes.
+   * @throws IOException */
+  public byte[] readAll() throws IOException {
+    if (pos == 0 && count == buf.length) {
+      pos = count;
+      return buf;
+    }
+    byte[] ret = new byte[count - pos];
+    super.read(ret);
+    return ret;
+  }
+
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamUtils.java
new file mode 100644
index 0000000000000..a6a665cb2cc44
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamUtils.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.ref.SoftReference;
+
+/**
+ * Utility functions for stream operations.
+ */
+public class StreamUtils {
+
+  private StreamUtils() {
+  }
+
+  private static final int BUF_SIZE = 8192;
+
+  private static ThreadLocal<SoftReference<byte[]>> threadLocalBuffer = new ThreadLocal<>();
+
+  /**
+   * Efficient converting stream to bytes.
+   */
+  public static byte[] getBytes(InputStream stream) throws IOException {
+    if (stream instanceof ExposedByteArrayInputStream) {
+      // Fast path for the exposed version.
+      return ((ExposedByteArrayInputStream) stream).readAll();
+    } else if (stream instanceof ByteArrayInputStream) {
+      // Fast path for ByteArrayInputStream.
+      byte[] ret = new byte[stream.available()];
+      stream.read(ret);
+      return ret;
+    }
+    // Falls back to normal stream copying.
+    byte[] buffer;
+    SoftReference<byte[]> refBuffer = threadLocalBuffer.get();
+    if (refBuffer == null) {
+      buffer = new byte[BUF_SIZE];
+      threadLocalBuffer.set(new SoftReference<byte[]>(buffer));
+    } else {
+      buffer = refBuffer.get();
+    }
+    ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+    while (true) {
+      int r = stream.read(buffer);
+      if (r == -1) {
+        break;
+      }
+      outStream.write(buffer, 0, r);
+    }
+    return outStream.toByteArray();
+  }
+
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayInputStreamTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayInputStreamTest.java
new file mode 100644
index 0000000000000..d0d9b9aebebbb
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayInputStreamTest.java
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertSame;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+
+/** Unit tests for {@link ExposedByteArrayInputStream}. */
+@RunWith(JUnit4.class)
+public class ExposedByteArrayInputStreamTest {
+
+  private static final byte[] TEST_DATA = "Hello World!".getBytes();
+
+  private ByteArrayInputStream stream = new ByteArrayInputStream(TEST_DATA);
+
+  private ExposedByteArrayInputStream exposedStream = new ExposedByteArrayInputStream(TEST_DATA);
+
+  @Test
+  public void testConstructWithEmptyArray() throws IOException {
+    ExposedByteArrayInputStream s = new ExposedByteArrayInputStream(new byte[0]);
+    assertEquals(0, s.available());
+    byte[] data = s.readAll();
+    assertEquals(0, data.length);
+  }
+
+  @Test
+  public void testReadAll() throws IOException {
+    assertEquals(TEST_DATA.length, exposedStream.available());
+    byte[] data = exposedStream.readAll();
+    assertArrayEquals(TEST_DATA, data);
+    assertSame(TEST_DATA, data);
+    assertEquals(0, exposedStream.available());
+  }
+
+  @Test
+  public void testReadPartial() throws IOException {
+    assertEquals(TEST_DATA.length, exposedStream.available());
+    assertEquals(TEST_DATA.length, stream.available());
+    byte[] data1 = new byte[4];
+    byte[] data2 = new byte[4];
+    int ret1 = exposedStream.read(data1);
+    int ret2 = stream.read(data2);
+    assertEquals(ret2, ret1);
+    assertArrayEquals(data2, data1);
+    assertEquals(stream.available(), exposedStream.available());
+  }
+
+  @Test
+  public void testReadAllAfterReadPartial() throws IOException {
+    assertNotEquals(-1, exposedStream.read());
+    byte[] ret = exposedStream.readAll();
+    assertArrayEquals("ello World!".getBytes(), ret);
+  }
+
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamUtilsTest.java
new file mode 100644
index 0000000000000..595dbdc155bf3
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamUtilsTest.java
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertSame;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+
+/** Unit tests for {@link ExposedByteArrayInputStream}. */
+@RunWith(JUnit4.class)
+public class StreamUtilsTest {
+
+  private byte[] testData = null;
+
+  @Before
+  public void setUp() {
+    testData = new byte[60 * 1024];
+    Arrays.fill(testData, (byte) 32);
+  }
+
+  @Test
+  public void testGetBytesFromExposedByteArrayInputStream() throws IOException {
+    InputStream stream = new ExposedByteArrayInputStream(testData);
+    byte[] bytes = StreamUtils.getBytes(stream);
+    assertArrayEquals(testData, bytes);
+    assertSame(testData, bytes);
+    assertEquals(0, stream.available());
+  }
+
+  @Test
+  public void testGetBytesFromByteArrayInputStream() throws IOException {
+    InputStream stream = new ByteArrayInputStream(testData);
+    byte[] bytes = StreamUtils.getBytes(stream);
+    assertArrayEquals(testData, bytes);
+    assertEquals(0, stream.available());
+  }
+
+  @Test
+  public void testGetBytesFromInputStream() throws IOException {
+    // Any stream which is not a ByteArrayInputStream.
+    InputStream stream =
+        new BufferedInputStream(new ByteArrayInputStream(testData));
+    byte[] bytes = StreamUtils.getBytes(stream);
+    assertArrayEquals(testData, bytes);
+    assertEquals(0, stream.available());
+  }
+}

From ae9c75ac3f1b948439f7a7d46cb962685833eac2 Mon Sep 17 00:00:00 2001
From: ddonnelly <ddonnelly@google.com>
Date: Mon, 27 Apr 2015 16:36:55 -0700
Subject: [PATCH 0486/1541] Fixed broken link in ParDo JavaDoc.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92196092
---
 .../java/com/google/cloud/dataflow/sdk/transforms/ParDo.java  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 80c53ed33ab07..2c9e8f217f9ae 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -421,8 +421,8 @@
  * Dataflow service's optimizer to "flatten out" all the compositions
  * into highly optimized stages.
  *
- * @see <a href="https://cloud.google.com/dataflow/java-sdk/par-do">Parallel
- * Processing with ParDo</a>
+ * @see <a href="https://cloud.google.com/dataflow/model/par-do">the web
+ * documentation for ParDo</a>
  */
 public class ParDo {
 

From a4d4f090d094f19b090e0699b7ce68569338ca63 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 28 Apr 2015 15:01:56 -0700
Subject: [PATCH 0487/1541] Improved error message when Coder inference fails
 for output of Flatten.pCollections due to an empty input PCollectionList.
 ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=92291138

---
 .../dataflow/sdk/transforms/Flatten.java      | 19 +++--
 .../dataflow/sdk/transforms/FlattenTest.java  | 82 ++++++++++++++++---
 2 files changed, 82 insertions(+), 19 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index 34737e0e48a82..e04341b38e85f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
@@ -135,14 +136,18 @@ public PCollection<T> apply(PCollectionList<T> inputs) {
     }
 
     @Override
-    protected Coder<?> getDefaultOutputCoder(PCollectionList<T> input) {
-      List<PCollection<T>> inputs = input.getAll();
-      if (inputs.isEmpty()) {
-        // Cannot infer a Coder from an empty list of input PCollections.
-        return null;
+    protected Coder<?> getDefaultOutputCoder(PCollectionList<T> input)
+        throws CannotProvideCoderException {
+
+      // Take coder from first collection
+      for (PCollection<T> pCollection : input.getAll()) {
+        return pCollection.getCoder();
       }
-      // Use the Coder of the first input.
-      return inputs.get(0).getCoder();
+
+      // No inputs
+      throw new CannotProvideCoderException(
+          this.getClass().getSimpleName() + " cannot provide a Coder for"
+          + " empty " + PCollectionList.class.getSimpleName());
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
index 08e5ac9e3f6e1..e50056c3aa482 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
@@ -22,7 +22,11 @@
 import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES;
 import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES_ARRAY;
 
+import static org.hamcrest.Matchers.isA;
+import static org.hamcrest.core.StringContains.containsString;
+
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
@@ -36,15 +40,21 @@
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 
+import org.hamcrest.BaseMatcher;
+import org.hamcrest.Description;
+import org.hamcrest.Matcher;
 import org.joda.time.Duration;
 import org.junit.Assert;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
 import java.io.Serializable;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 
 /**
@@ -54,19 +64,25 @@
 @SuppressWarnings("serial")
 public class FlattenTest implements Serializable {
 
+  @Rule
+  public transient ExpectedException thrown = ExpectedException.none();
+
+  private static class ClassWithoutCoder { }
+
+
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testFlattenPCollectionList() {
     Pipeline p = TestPipeline.create();
 
-    List<String>[] inputs = new List[] {
-      LINES, NO_LINES, LINES2, NO_LINES, LINES, NO_LINES };
+    List<List<String>> inputs = Arrays.asList(
+      LINES, NO_LINES, LINES2, NO_LINES, LINES, NO_LINES);
 
     PCollection<String> output =
         makePCollectionListOfStrings(p, inputs)
         .apply(Flatten.<String>pCollections());
 
-    DataflowAssert.that(output).containsInAnyOrder(flatten(inputs));
+    DataflowAssert.that(output).containsInAnyOrder(flattenLists(inputs));
     p.run();
   }
 
@@ -75,15 +91,15 @@ public void testFlattenPCollectionList() {
   public void testFlattenPCollectionListThenParDo() {
     Pipeline p = TestPipeline.create();
 
-    List<String>[] inputs = new List[] {
-      LINES, NO_LINES, LINES2, NO_LINES, LINES, NO_LINES };
+    List<List<String>> inputs = Arrays.asList(
+      LINES, NO_LINES, LINES2, NO_LINES, LINES, NO_LINES);
 
     PCollection<String> output =
         makePCollectionListOfStrings(p, inputs)
         .apply(Flatten.<String>pCollections())
         .apply(ParDo.of(new IdentityFn<String>(){}));
 
-    DataflowAssert.that(output).containsInAnyOrder(flatten(inputs));
+    DataflowAssert.that(output).containsInAnyOrder(flattenLists(inputs));
     p.run();
   }
 
@@ -142,6 +158,49 @@ public void testFlattenPCollectionListEmptyThenParDo() {
     p.run();
   }
 
+  private static class HasMessageMatcher extends BaseMatcher<Throwable>
+      implements Matcher<Throwable>{
+
+    private final Matcher<String> messageMatcher;
+
+    public HasMessageMatcher(Matcher<String> messageMatcher) {
+      this.messageMatcher = messageMatcher;
+    }
+
+    @Override
+    public boolean matches(Object item) {
+      return (item instanceof Throwable) && matches((Throwable) item);
+    }
+
+    public boolean matches(Throwable item) {
+      return messageMatcher.matches(item.getMessage());
+    }
+
+    @Override
+    public void describeTo(Description description) {
+      description.appendText("where getMessage() is ");
+      description.appendDescriptionOf(messageMatcher);
+    }
+  }
+
+  private static Matcher<Throwable> hasMessage(Matcher<String> messageMatcher) {
+    return new HasMessageMatcher(messageMatcher);
+  }
+
+  @Test
+  public void testFlattenNoListsNoCoder() {
+    // not RunnableOnService because it should fail at pipeline construction time anyhow.
+    thrown.expect(IllegalStateException.class);
+    thrown.expectCause(isA(CannotProvideCoderException.class));
+    thrown.expectCause(hasMessage(containsString("cannot provide a Coder for empty")));
+
+    Pipeline p = TestPipeline.create();
+
+    PCollectionList.<ClassWithoutCoder>empty(p)
+        .apply(Flatten.<ClassWithoutCoder>pCollections());
+
+    p.run();
+  }
 
   /////////////////////////////////////////////////////////////////////////////
 
@@ -238,8 +297,7 @@ public void testIncompatibleWindowFnPropagationFailure() {
         .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(2))));
 
     try {
-      PCollection<String> output =
-          PCollectionList.of(input1).and(input2)
+      PCollectionList.of(input1).and(input2)
           .apply(Flatten.<String>pCollections());
       Assert.fail("Exception should have been thrown");
     } catch (IllegalStateException e) {
@@ -260,14 +318,14 @@ public void processElement(ProcessContext c) {
 
   private PCollectionList<String> makePCollectionListOfStrings(
       Pipeline p,
-      List<String>... lists) {
+      List<List<String>> lists) {
     return makePCollectionList(p, StringUtf8Coder.of(), lists);
   }
 
   private <T> PCollectionList<T> makePCollectionList(
       Pipeline p,
       Coder<T> coder,
-      List<T>... lists) {
+      List<List<T>> lists) {
     List<PCollection<T>> pcs = new ArrayList<>();
     for (List<T> list : lists) {
       PCollection<T> pc = p.apply(Create.of(list)).setCoder(coder);
@@ -276,11 +334,11 @@ private <T> PCollectionList<T> makePCollectionList(
     return PCollectionList.of(pcs);
   }
 
-  private <T> T[] flatten(List<T>... lists) {
+  private <T> List<T> flattenLists(List<List<T>> lists) {
     List<T> flattened = new ArrayList<>();
     for (List<T> list : lists) {
       flattened.addAll(list);
     }
-    return flattened.toArray((T[]) new Object[flattened.size()]);
+    return flattened;
   }
 }

From c8b366b9b151739759a2465ab332b1eaa19e1ea4 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Wed, 29 Apr 2015 10:11:38 -0700
Subject: [PATCH 0488/1541] FIX the condition of invoking ProcessElement for
 individual window. ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=92362617

---
 .../java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 61dd72a301597..035509c625f9e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -149,9 +149,9 @@ public void startBundle() {
    * the current element.
    */
   public void processElement(WindowedValue<I> elem) {
-    if (elem.getWindows().size() == 1
-        || !RequiresWindowAccess.class.isAssignableFrom(fn.getClass())
-        || !context.sideInputs.getAll().isEmpty()) {
+    if (elem.getWindows().size() <= 1
+        || (!RequiresWindowAccess.class.isAssignableFrom(fn.getClass())
+            && context.sideInputs.getAll().isEmpty())) {
       invokeProcessElement(elem);
     } else {
       // We could modify the windowed value (and the processContext) to

From bb8832b9094e64d74b790f76b909d77a8c153d0c Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 29 Apr 2015 14:34:31 -0700
Subject: [PATCH 0489/1541] Exclude generated sources from checkstyle. Update
 our checkstyle to version 6.6. Superficial fixes for increased left curly
 strictness. ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=92391324

---
 sdk/pom.xml                                   |  8 +++
 .../google/cloud/dataflow/sdk/Pipeline.java   |  4 +-
 .../dataflow/sdk/coders/AtomicCoder.java      |  6 ++-
 .../sdk/coders/IterableLikeCoder.java         |  4 +-
 .../cloud/dataflow/sdk/coders/KvCoder.java    |  9 +++-
 .../cloud/dataflow/sdk/coders/MapCoder.java   |  9 +++-
 .../cloud/dataflow/sdk/io/PubsubIO.java       |  8 ++-
 .../sdk/options/CloudDebuggerOptions.java     |  8 ++-
 .../sdk/runners/DataflowPipelineRunner.java   |  4 +-
 .../sdk/runners/DirectPipelineRunner.java     |  4 +-
 .../worker/ByteArrayShufflePosition.java      |  4 +-
 .../sdk/transforms/ApproximateQuantiles.java  |  5 +-
 .../dataflow/sdk/transforms/Combine.java      | 51 +++++++++++++++----
 .../cloud/dataflow/sdk/transforms/DoFn.java   |  4 +-
 .../dataflow/sdk/transforms/GroupByKey.java   |  8 ++-
 .../cloud/dataflow/sdk/transforms/Max.java    | 12 +++--
 .../cloud/dataflow/sdk/transforms/Min.java    | 12 +++--
 .../cloud/dataflow/sdk/transforms/ParDo.java  |  8 ++-
 .../dataflow/sdk/util/WindowedValue.java      |  8 ++-
 .../dataflow/sdk/util/common/Counter.java     | 16 ++++--
 .../dataflow/sdk/util/common/Metric.java      |  8 ++-
 .../sdk/util/common/TaggedReiteratorList.java | 16 ++++--
 .../worker/PartialGroupByKeyOperation.java    | 28 +++++++---
 .../dataflow/sdk/util/gcsfs/GcsPath.java      |  4 +-
 .../cloud/dataflow/sdk/values/TupleTag.java   | 12 +++--
 .../sdk/coders/CoderRegistryTest.java         |  8 ++-
 .../runners/worker/CombineValuesFnTest.java   |  4 +-
 .../dataflow/sdk/transforms/CombineTest.java  | 12 +++--
 .../sdk/transforms/DoFnReflectorTest.java     |  4 +-
 .../sdk/transforms/join/CoGbkResultTest.java  | 12 +++--
 .../dataflow/sdk/util/CoderUtilsTest.java     |  4 +-
 .../sdk/util/CounterAggregatorTest.java       |  8 ++-
 .../PartialGroupByKeyOperationTest.java       |  4 +-
 33 files changed, 240 insertions(+), 76 deletions(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 75a097ace9508..520ff7c92e888 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -77,12 +77,20 @@
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-checkstyle-plugin</artifactId>
         <version>2.12</version>
+        <dependencies>
+          <dependency>
+            <groupId>com.puppycrawl.tools</groupId>
+            <artifactId>checkstyle</artifactId>
+            <version>6.6</version>
+          </dependency>
+        </dependencies>
         <configuration>
           <configLocation>../checkstyle.xml</configLocation>
           <consoleOutput>true</consoleOutput>
           <failOnViolation>true</failOnViolation>
           <includeResources>false</includeResources>
           <includeTestSourceDirectory>true</includeTestSourceDirectory>
+          <excludes>${project.build.directory}/generated-test-sources/**</excludes>
         </configuration>
         <executions>
           <execution>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index a90219b784db0..7516dd709a69f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -246,7 +246,9 @@ protected Pipeline(PipelineRunner<?> runner, PipelineOptions options) {
   }
 
   @Override
-  public String toString() { return "Pipeline#" + hashCode(); }
+  public String toString() {
+    return "Pipeline#" + hashCode();
+  }
 
   /**
    * Applies a transformation to the given input.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
index d434137a4df03..fc9e382efcbd7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
@@ -32,10 +32,12 @@
 public abstract class AtomicCoder<T> extends DeterministicStandardCoder<T> {
   private static final long serialVersionUID = 0;
 
-  protected AtomicCoder() {}
+  protected AtomicCoder() { }
 
   @Override
-  public List<Coder<?>> getCoderArguments() { return null; }
+  public List<Coder<?>> getCoderArguments() {
+    return null;
+  }
 
   /**
    * Returns a list of values contained in the provided example
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
index 7906d135d1ec5..bbfbf71f1ea0f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
@@ -43,7 +43,9 @@ public abstract class IterableLikeCoder<T, IT extends Iterable<T>>
     extends StandardCoder<IT> {
   private static final long serialVersionUID = 0;
 
-  public Coder<T> getElemCoder() { return elementCoder; }
+  public Coder<T> getElemCoder() {
+    return elementCoder;
+  }
 
   /**
    * Builds an instance of the coder's associated {@code Iterable} from a list
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
index ff70de8495e9f..bfd077d1b8023 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
@@ -63,8 +63,13 @@ public static <K, V> List<Object> getInstanceComponents(
         exampleValue.getValue());
   }
 
-  public Coder<K> getKeyCoder() { return keyCoder; }
-  public Coder<V> getValueCoder() { return valueCoder; }
+  public Coder<K> getKeyCoder() {
+    return keyCoder;
+  }
+
+  public Coder<V> getValueCoder() {
+    return valueCoder;
+  }
 
   /////////////////////////////////////////////////////////////////////////////
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
index cba274801c143..543fe85de3793 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
@@ -72,8 +72,13 @@ public static <K, V> List<Object> getInstanceComponents(
      return null;
    }
 
-  public Coder<K> getKeyCoder() { return keyCoder; }
-  public Coder<V> getValueCoder() { return valueCoder; }
+  public Coder<K> getKeyCoder() {
+    return keyCoder;
+  }
+
+  public Coder<V> getValueCoder() {
+    return valueCoder;
+  }
 
   /////////////////////////////////////////////////////////////////////////////
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 26b5c1d02981e..423f2306ae4de 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -380,7 +380,9 @@ protected Coder<T> getDefaultOutputCoder() {
       }
 
       @Override
-      protected String getKindString() { return "PubsubIO.Read"; }
+      protected String getKindString() {
+        return "PubsubIO.Read";
+      }
 
       public String getTopic() {
         return topic;
@@ -554,7 +556,9 @@ protected Coder<Void> getDefaultOutputCoder() {
       }
 
       @Override
-      protected String getKindString() { return "PubsubIO.Write"; }
+      protected String getKindString() {
+        return "PubsubIO.Write";
+      }
 
       public String getTopic() {
         return topic;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
index 3324ab5dd0dfc..4104151900b24 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
@@ -44,8 +44,12 @@ public interface CloudDebuggerOptions {
    */
   public static class DebuggerConfig {
     private String version;
-    public String getVersion() { return version; }
-    public void setVersion(String version) { this.version = version; }
+    public String getVersion() {
+      return version;
+    }
+    public void setVersion(String version) {
+      this.version = version;
+    }
 
     /**
      * Compute the string of Debugger config.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 29755f61aa530..f6091717773c6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -255,7 +255,9 @@ public void setHooks(DataflowPipelineRunnerHooks hooks) {
   /////////////////////////////////////////////////////////////////////////////
 
   @Override
-  public String toString() { return "DataflowPipelineRunner#" + hashCode(); }
+  public String toString() {
+    return "DataflowPipelineRunner#" + hashCode();
+  }
 
   /**
    * Attempts to detect all the resources the class loader has access to. This does not recurse
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 764fa1368e40c..463c2820c6c26 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -934,5 +934,7 @@ public DirectPipelineOptions getPipelineOptions() {
   }
 
   @Override
-  public String toString() { return "DirectPipelineRunner#" + hashCode(); }
+  public String toString() {
+    return "DirectPipelineRunner#" + hashCode();
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
index 65553c508a29b..13970ebd1f4f0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
@@ -56,7 +56,9 @@ public static byte[] getPosition(ShufflePosition shufflePosition) {
     return adapter.getPosition();
   }
 
-  public byte[] getPosition() { return position; }
+  public byte[] getPosition() {
+    return position;
+  }
 
   public String encodeBase64() {
     return encodeBase64URLSafeString(position);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
index 5896e799f6615..fb1776fc6972d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
@@ -552,7 +552,10 @@ public String toString() {
       public Iterator<WeightedElement<T>> weightedIterator() {
         return new UnmodifiableIterator<WeightedElement<T>>() {
           Iterator<T> iter = elements.iterator();
-          @Override public boolean hasNext() { return iter.hasNext(); }
+          @Override
+          public boolean hasNext() {
+            return iter.hasNext();
+          }
           @Override public WeightedElement<T> next() {
             return WeightedElement.of(weight, iter.next());
           }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 6a051636f307c..a8f6fd386f652 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -296,7 +296,9 @@ public static <K, VI, VO> GroupedValues<K, VI, VO> groupedValues(
    *     int sum = 0;
    *     int count = 0;
    *   }
-   *   public Accum createAccumulator() { return new Accum(); }
+   *   public Accum createAccumulator() {
+   *     return new Accum();
+   *   }
    *   public void addInput(Accum accum, Integer input) {
    *       accum.sum += input;
    *       accum.count++;
@@ -567,7 +569,9 @@ private static class Holder<V> {
       public V value;
       public boolean present;
       public Holder() { }
-      public Holder(V value) { set(value); }
+      public Holder(V value) {
+        set(value);
+      }
       public void set(V value) {
         this.present = true;
         this.value = value;
@@ -628,10 +632,16 @@ public Coder<int[]> getAccumulatorCoder(CoderRegistry registry, Coder<Integer> i
       return DelegateCoder.of(
           inputCoder,
           new DelegateCoder.CodingFunction<int[], Integer>() {
-            @Override public Integer apply(int[] accumulator) { return accumulator[0]; }
+            @Override
+            public Integer apply(int[] accumulator) {
+              return accumulator[0];
+            }
           },
           new DelegateCoder.CodingFunction<Integer, int[]>() {
-            @Override public int[] apply(Integer value) { return wrap(value); }
+            @Override
+            public int[] apply(Integer value) {
+              return wrap(value);
+            }
           });
     }
 
@@ -699,10 +709,16 @@ public Coder<long[]> getAccumulatorCoder(CoderRegistry registry, Coder<Long> inp
       return DelegateCoder.of(
           inputCoder,
           new DelegateCoder.CodingFunction<long[], Long>() {
-            @Override public Long apply(long[] accumulator) { return accumulator[0]; }
+            @Override
+            public Long apply(long[] accumulator) {
+              return accumulator[0];
+            }
           },
           new DelegateCoder.CodingFunction<Long, long[]>() {
-            @Override public long[] apply(Long value) { return wrap(value); }
+            @Override
+            public long[] apply(Long value) {
+              return wrap(value);
+            }
           });
     }
 
@@ -769,10 +785,16 @@ public Coder<double[]> getAccumulatorCoder(CoderRegistry registry, Coder<Double>
       return DelegateCoder.of(
           inputCoder,
           new DelegateCoder.CodingFunction<double[], Double>() {
-            @Override public Double apply(double[] accumulator) { return accumulator[0]; }
+            @Override
+            public Double apply(double[] accumulator) {
+              return accumulator[0];
+            }
           },
           new DelegateCoder.CodingFunction<Double, double[]>() {
-            @Override public double[] apply(Double value) { return wrap(value); }
+            @Override
+            public double[] apply(Double value) {
+              return wrap(value);
+            }
           });
     }
 
@@ -803,7 +825,9 @@ private double[] wrap(double value) {
    * <pre> {@code
    * public class AverageFn
    *     extends AccumulatingCombineFn<Integer, AverageFn.Accum, Double> {
-   *   public Accum createAccumulator() { return new Accum(); }
+   *   public Accum createAccumulator() {
+   *     return new Accum();
+   *   }
    *   public class Accum
    *       extends AccumulatingCombineFn<Integer, AverageFn.Accum, Double>
    *               .Accumulator {
@@ -932,7 +956,9 @@ public final VO extractOutput(VA accumulator) {
    *   public static class Accum {
    *     String s = "";
    *   }
-   *   public Accum createAccumulator(String key) { return new Accum(); }
+   *   public Accum createAccumulator(String key) {
+   *     return new Accum();
+   *   }
    *   public void addInput(String key, Accum accum, Integer input) {
    *       accum.s += "+" + input;
    *   }
@@ -1445,7 +1471,10 @@ public PerKeyWithHotKeyFanout<K, VI, VO> withHotKeyFanout(
     public PerKeyWithHotKeyFanout<K, VI, VO> withHotKeyFanout(final int hotKeyFanout) {
       return withHotKeyFanout(
           new SerializableFunction<K, Integer>(){
-            @Override public Integer apply(K unused) { return hotKeyFanout; }
+            @Override
+            public Integer apply(K unused) {
+              return hotKeyFanout;
+            }
           });
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 090376c77b828..14ca1a2d092c4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -21,7 +21,6 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
@@ -342,7 +341,8 @@ public interface KeyedState {
      * <p> See {@link #lookup(CodedTupleTag)} to look up a single
      * tag.
      *
-     * @throws CoderException if decoding any of the requested values fails
+     * @throws IOException if decoding any of the requested values fails, often
+     * a {@link com.google.cloud.dataflow.sdk.coders.CoderException}.
      */
     public CodedTupleTagMap lookup(Iterable<? extends CodedTupleTag<?>> tags) throws IOException;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 4d0eb19aef65b..89805d8650357 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -533,7 +533,9 @@ public GroupingKey(K key, byte[] encodedKey) {
       this.encodedKey = encodedKey;
     }
 
-    public K getKey() { return key; }
+    public K getKey() {
+      return key;
+    }
 
     @Override
     public boolean equals(Object o) {
@@ -546,6 +548,8 @@ public boolean equals(Object o) {
     }
 
     @Override
-    public int hashCode() { return Arrays.hashCode(encodedKey); }
+    public int hashCode() {
+      return Arrays.hashCode(encodedKey);
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
index c30f8f153adec..07bfbf10286dd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
@@ -179,7 +179,9 @@ public N identity() {
   @SuppressWarnings("serial")
   public static class MaxIntegerFn extends MaxFn<Integer> implements
       CounterProvider<Integer> {
-    public MaxIntegerFn() { super(Integer.MIN_VALUE); }
+    public MaxIntegerFn() {
+      super(Integer.MIN_VALUE);
+    }
 
     @Override
     public Counter<Integer> getCounter(String name) {
@@ -195,7 +197,9 @@ public Counter<Integer> getCounter(String name) {
   @SuppressWarnings("serial")
   public static class MaxLongFn extends MaxFn<Long> implements
       CounterProvider<Long> {
-    public MaxLongFn() { super(Long.MIN_VALUE); }
+    public MaxLongFn() {
+      super(Long.MIN_VALUE);
+    }
 
     @Override
     public Counter<Long> getCounter(String name) {
@@ -211,7 +215,9 @@ public Counter<Long> getCounter(String name) {
   @SuppressWarnings("serial")
   public static class MaxDoubleFn extends MaxFn<Double> implements
       CounterProvider<Double> {
-    public MaxDoubleFn() { super(Double.NEGATIVE_INFINITY); }
+    public MaxDoubleFn() {
+      super(Double.NEGATIVE_INFINITY);
+    }
 
     @Override
     public Counter<Double> getCounter(String name) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
index 5e7466cee9b60..467f67befa697 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
@@ -180,7 +180,9 @@ public static class MinIntegerFn extends MinFn<Integer> implements
       CounterProvider<Integer> {
     private static final long serialVersionUID = 0;
 
-    public MinIntegerFn() { super(Integer.MAX_VALUE); }
+    public MinIntegerFn() {
+      super(Integer.MAX_VALUE);
+    }
 
     @Override
     public Counter<Integer> getCounter(String name) {
@@ -197,7 +199,9 @@ public static class MinLongFn extends MinFn<Long> implements
       CounterProvider<Long> {
     private static final long serialVersionUID = 0;
 
-    public MinLongFn() { super(Long.MAX_VALUE); }
+    public MinLongFn() {
+      super(Long.MAX_VALUE);
+    }
 
     @Override
     public Counter<Long> getCounter(String name) {
@@ -214,7 +218,9 @@ public static class MinDoubleFn extends MinFn<Double> implements
       CounterProvider<Double> {
     private static final long serialVersionUID = 0;
 
-    public MinDoubleFn() { super(Double.POSITIVE_INFINITY); }
+    public MinDoubleFn() {
+      super(Double.POSITIVE_INFINITY);
+    }
 
     @Override
     public Counter<Double> getCounter(String name) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 2c9e8f217f9ae..991f97df5180c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -772,7 +772,9 @@ protected String getDefaultName() {
     }
 
     @Override
-    protected String getKindString() { return "ParDo"; }
+    protected String getKindString() {
+      return "ParDo";
+    }
 
     public DoFn<I, O> getFn() {
       return fn;
@@ -991,7 +993,9 @@ protected String getDefaultName() {
     }
 
     @Override
-    protected String getKindString() { return "ParMultiDo"; }
+    protected String getKindString() {
+      return "ParMultiDo";
+    }
 
     public DoFn<I, O> getFn() {
       return fn;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index ddaceda2528a0..bf6e2ad6448a3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -166,7 +166,9 @@ public Instant getTimestamp() {
    */
   private static class ValueInGlobalWindow<V>
       extends MinTimestampWindowedValue<V> {
-    public ValueInGlobalWindow(V value) { super(value); }
+    public ValueInGlobalWindow(V value) {
+      super(value);
+    }
 
     @Override
     public <V> WindowedValue<V> withValue(V value) {
@@ -205,7 +207,9 @@ public String toString() {
    */
   private static class ValueInEmptyWindows<V>
       extends MinTimestampWindowedValue<V> {
-    public ValueInEmptyWindows(V value) { super(value); }
+    public ValueInEmptyWindows(V value) {
+      super(value);
+    }
 
     @Override
     public <V> WindowedValue<V> withValue(V value) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
index 483d75dd724e8..f3e59ccb68ef4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
@@ -648,8 +648,16 @@ protected IllegalArgumentException illegalArgumentException() {
   //////////////////////////////////////////////////////////////////////////////
 
   // For testing.
-  synchronized T getTotalAggregate() { return aggregate; }
-  synchronized T getDeltaAggregate() { return deltaAggregate; }
-  synchronized long getTotalCount() { return count; }
-  synchronized long getDeltaCount() { return deltaCount; }
+  synchronized T getTotalAggregate() {
+    return aggregate;
+  }
+  synchronized T getDeltaAggregate() {
+    return deltaAggregate;
+  }
+  synchronized long getTotalCount() {
+    return count;
+  }
+  synchronized long getDeltaCount() {
+    return deltaCount;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Metric.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Metric.java
index f7534bab5873a..5245abf55986a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Metric.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Metric.java
@@ -30,9 +30,13 @@ public Metric(String name, T value) {
     this.value = value;
   }
 
-  public String getName() { return name; }
+  public String getName() {
+    return name;
+  }
 
-  public T getValue() { return value; }
+  public T getValue() {
+    return value;
+  }
 
   /**
    * A double-valued Metric.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java
index f86bd94889d27..3cd34e67b1a36 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java
@@ -87,10 +87,18 @@ private PeekingReiterator<Object> getStart(int tag) {
   private static final PeekingReiterator<Object> EMPTY_TAIL =
       new PeekingReiterator<Object>(
           new Reiterator<Object>() {
-            public boolean hasNext() { return false; }
-            public Object next() { throw new NoSuchElementException(); }
-            public void remove() { throw new IllegalArgumentException(); }
-            public Reiterator<Object> copy() { throw new IllegalArgumentException(); }
+            public boolean hasNext() {
+              return false;
+            }
+            public Object next() {
+              throw new NoSuchElementException();
+            }
+            public void remove() {
+              throw new IllegalArgumentException();
+            }
+            public Reiterator<Object> copy() {
+              throw new IllegalArgumentException();
+            }
       });
 
   private class SubIterator implements Reiterator<Object> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
index 7d7a0c27bfaf4..e6da4ceecb282 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
@@ -75,7 +75,9 @@ public StructuralByteArray(byte[] value) {
       this.value = value;
     }
 
-    public byte[] getValue() { return value; }
+    public byte[] getValue() {
+      return value;
+    }
 
     @Override
     public boolean equals(Object o) {
@@ -388,9 +390,15 @@ public GroupingTableEntry<K, V, List<V>> createTableEntry(final K key) throws Ex
       return new GroupingTableEntry<K, V, List<V>>() {
         long size = keySizer.estimateSize(key);
         final List<V> values = new ArrayList<>();
-        public K getKey() { return key; }
-        public List<V> getValue() { return values; }
-        public long getSize() { return size; }
+        public K getKey() {
+          return key;
+        }
+        public List<V> getValue() {
+          return values;
+        }
+        public long getSize() {
+          return size;
+        }
         public void add(V value) throws Exception {
           values.add(value);
           size += BYTES_PER_JVM_WORD + valueSizer.estimateSize(value);
@@ -426,9 +434,15 @@ public GroupingTableEntry<K, VI, VA> createTableEntry(final K key) throws Except
         final long keySize = keySizer.estimateSize(key);
         VA accumulator = combiner.createAccumulator(key);
         long accumulatorSize = 0; // never used before a value is added...
-        public K getKey() { return key; }
-        public VA getValue() { return accumulator; }
-        public long getSize() { return keySize + accumulatorSize; }
+        public K getKey() {
+          return key;
+        }
+        public VA getValue() {
+          return accumulator;
+        }
+        public long getSize() {
+          return keySize + accumulatorSize;
+        }
         public void add(VI value) throws Exception {
           accumulator = combiner.add(key, accumulator, value);
           accumulatorSize = accumulatorSizer.estimateSize(accumulator);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
index 0e1fb1d7fdec6..80d3bcf3daa0e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
@@ -386,7 +386,9 @@ public boolean endsWith(String suffix) {
 
   // TODO: support "." and ".." path components?
   @Override
-  public GcsPath normalize() { return this; }
+  public GcsPath normalize() {
+    return this;
+  }
 
   @Override
   public GcsPath resolve(Path other) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
index 9aa72d3c92d9d..fa354acf869a4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
@@ -92,7 +92,9 @@ public TupleTag(String id) {
    * Comparable interface. TupleTags implement equals and hashCode, making them
    * suitable for use as keys in HashMap and HashSet.
    */
-  public String getId() { return id; }
+  public String getId() {
+    return id;
+  }
 
   /**
    * If this {@code TupleTag} is tagging output {@code outputIndex} of
@@ -190,8 +192,12 @@ public boolean equals(Object that) {
   }
 
   @Override
-  public int hashCode() { return id.hashCode(); }
+  public int hashCode() {
+    return id.hashCode();
+  }
 
   @Override
-  public String toString() { return "Tag<" + id + ">"; }
+  public String toString() {
+    return "Tag<" + id + ">";
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index 3d43248841fdb..c5d30b22b4a13 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -265,9 +265,13 @@ public CloudObject asCloudObject() {
     @Override
     public void verifyDeterministic() { }
 
-    public boolean consistentWithEquals() { return true; }
+    public boolean consistentWithEquals() {
+      return true;
+    }
 
-    public Object structuralValue(MyValue value) { return value; }
+    public Object structuralValue(MyValue value) {
+      return value;
+    }
 
     @Override
     public boolean isRegisterByteSizeObserverCheap(MyValue value, Context context) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
index 0e71c190a2807..038311d2484b8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
@@ -153,7 +153,9 @@ public CloudObject asCloudObject() {
     }
 
     @Override
-    public List<? extends Coder<?>> getCoderArguments() { return null; }
+    public List<? extends Coder<?>> getCoderArguments() {
+      return null;
+    }
 
     public List<Object> getInstanceComponents(MeanInts.CountSum exampleValue) {
       return null;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index dc7dd60635617..acde7425ff6a6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -375,8 +375,12 @@ public void testBinaryCombineFnWithNulls() {
   }
 
   private static final class TestProdInt extends Combine.BinaryCombineIntegerFn {
-    public int apply(int left, int right) { return left * right; }
-    public int identity() { return 1; }
+    public int apply(int left, int right) {
+      return left * right;
+    }
+    public int identity() {
+      return 1;
+    }
     @Override
     public Counter<Integer> getCounter(String name) {
       throw new UnsupportedOperationException();
@@ -384,7 +388,9 @@ public Counter<Integer> getCounter(String name) {
   }
 
   private static final class TestProdObj extends Combine.BinaryCombineFn<Integer> {
-    public Integer apply(Integer left, Integer right) { return left * right; }
+    public Integer apply(Integer left, Integer right) {
+      return left * right;
+    }
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflectorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflectorTest.java
index a9f78a8861c23..5a70170ced959 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflectorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflectorTest.java
@@ -442,7 +442,9 @@ public void testBadExtraProcessContextType() throws Exception {
   }
 
   @SuppressWarnings("unused")
-  private int badReturnType() { return 0; }
+  private int badReturnType() {
+    return 0;
+  }
 
   @Test
   public void testBadReturnType() throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultTest.java
index 88e29e9ad2ec4..f7d5b1f439d94 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultTest.java
@@ -97,13 +97,19 @@ public Reiterator<RawUnionValue> iterator() {
     public Reiterator<RawUnionValue> iterator(final int start) {
       return new Reiterator<RawUnionValue>() {
         int pos = start;
-        public boolean hasNext() { return pos < tags.length; }
+        public boolean hasNext() {
+          return pos < tags.length;
+        }
         public RawUnionValue next() {
           maxPos = Math.max(pos + 1, maxPos);
           return new RawUnionValue(tags[pos], pos++);
         }
-        public void remove() { throw new UnsupportedOperationException(); }
-        public Reiterator<RawUnionValue> copy() { return iterator(pos); }
+        public void remove() {
+          throw new UnsupportedOperationException();
+        }
+        public Reiterator<RawUnionValue> copy() {
+          return iterator(pos);
+        }
       };
     }
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
index 7b2506ec5ae28..b6ef6e1803ba8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
@@ -43,7 +43,9 @@
 @SuppressWarnings("serial")
 public class CoderUtilsTest {
   static class TestCoder extends AtomicCoder<Integer> {
-    public static TestCoder of() { return new TestCoder(); }
+    public static TestCoder of() {
+      return new TestCoder();
+    }
 
     @Override
     public void encode(Integer value, OutputStream outStream, Context context) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java
index 47c99daead0d4..d5ebb1b0df1f5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java
@@ -223,7 +223,9 @@ public void testUnsupportedCombineFn() throws Exception {
         AGGREGATOR_NAME,
         new Combine.CombineFn<Integer, List<Integer>, Integer>() {
           @Override
-          public List<Integer> createAccumulator() { return null; }
+          public List<Integer> createAccumulator() {
+            return null;
+          }
           @Override
           public List<Integer> addInput(List<Integer> accumulator, Integer input) {
             return null;
@@ -233,7 +235,9 @@ public List<Integer> mergeAccumulators(Iterable<List<Integer>> accumulators) {
             return null;
           }
           @Override
-          public Integer extractOutput(List<Integer> accumulator) { return null; }
+          public Integer extractOutput(List<Integer> accumulator) {
+            return null;
+          }
         }, (new CounterSet()).getAddCounterMutator());
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
index 394c6785d5f1a..5cc80d65af9f3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
@@ -307,7 +307,9 @@ public Long add(Object key, Long accumulator, Integer value) {
           }
           public Long merge(Object key, Iterable<Long> accumulators) {
             long sum = 0;
-            for (Long part : accumulators) { sum += part; }
+            for (Long part : accumulators) {
+              sum += part;
+            }
             return sum;
           }
           public Long extract(Object key, Long accumulator) {

From 2229a4755163e2030be56b214eca40ca550046d9 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Wed, 29 Apr 2015 15:45:09 -0700
Subject: [PATCH 0490/1541] Add support to the SDK for reloading streaming
 pipelines.  This is not yet supported by the dataflow service

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92399469
---
 sdk/pom.xml                                   |  2 +-
 .../options/DataflowPipelineDebugOptions.java | 11 +++++
 .../sdk/runners/DataflowPipelineRunner.java   | 49 +++++++++++++++++--
 .../sdk/util/DataflowPathValidator.java       |  6 +++
 .../runners/DataflowPipelineRunnerTest.java   | 38 ++++++++++++++
 5 files changed, 102 insertions(+), 4 deletions(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 520ff7c92e888..4d210e78fb686 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -221,7 +221,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-dataflow</artifactId>
-      <version>v1beta3-rev9-1.19.1</version>
+      <version>v1beta3-rev12-1.19.1</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
index 34a418a136136..8fbd64cb3800e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.options;
 
 import com.google.api.services.dataflow.Dataflow;
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.util.DataflowPathValidator;
 import com.google.cloud.dataflow.sdk.util.GcsStager;
 import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
@@ -157,6 +158,16 @@ public Dataflow create(PipelineOptions options) {
     }
   }
 
+  /**
+   * Whether to reload the currently running pipeline with the same name as this one.
+   */
+  @JsonIgnore
+  @Description("If set, replace the existing pipeline with the name specified by --jobName with "
+      + "this pipeline, preserving state.")
+  @Experimental
+  boolean getReload();
+  void setReload(boolean value);
+
   /**
    * Creates a {@link PathValidator} object using the class specified in
    * {@link #getPathValidatorClass()}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index f6091717773c6..f38261e5ab086 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -16,11 +16,14 @@
 
 package com.google.cloud.dataflow.sdk.runners;
 
+import static com.google.cloud.dataflow.sdk.PipelineResult.State;
+
 import com.google.api.client.googleapis.json.GoogleJsonResponseException;
 import com.google.api.client.util.Joiner;
 import com.google.api.services.dataflow.Dataflow;
 import com.google.api.services.dataflow.model.DataflowPackage;
 import com.google.api.services.dataflow.model.Job;
+import com.google.api.services.dataflow.model.ListJobsResponse;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
@@ -213,11 +216,20 @@ public DataflowPipelineJob run(Pipeline pipeline) {
       }
     }
 
+    String reloadJobId = null;
+    if (options.getReload()) {
+      reloadJobId = getJobIdFromName(options.getJobName());
+    }
+
     Job jobResult;
     try {
-      jobResult = dataflowClient.v1b3().projects().jobs()
-          .create(options.getProject(), newJob)
-          .execute();
+      Dataflow.V1b3.Projects.Jobs.Create createRequest =
+          dataflowClient.v1b3().projects().jobs()
+          .create(options.getProject(), newJob);
+      if (reloadJobId != null) {
+        createRequest.setReplaceJobId(reloadJobId);
+      }
+      jobResult = createRequest.execute();
     } catch (GoogleJsonResponseException e) {
       throw new RuntimeException(
           "Failed to create a workflow job: "
@@ -288,4 +300,35 @@ protected static List<String> detectClassPathResourcesToStage(ClassLoader classL
     }
     return files;
   }
+
+  /**
+   * Finds the id for the running job of the given name.
+   */
+  private String getJobIdFromName(String jobName) {
+    try {
+      ListJobsResponse listResult;
+      String token = null;
+      do {
+        listResult = dataflowClient.v1b3().projects().jobs()
+            .list(options.getProject())
+            .setPageToken(token)
+            .execute();
+        token = listResult.getNextPageToken();
+        for (Job job : listResult.getJobs()) {
+          if (job.getName().equals(jobName)
+              && MonitoringUtil.toState(job.getCurrentState()).equals(State.RUNNING)) {
+            return job.getId();
+          }
+        }
+      } while (token != null);
+    } catch (GoogleJsonResponseException e) {
+      throw new RuntimeException(
+          "Got error while looking up jobs: "
+          + (e.getDetails() != null ? e.getDetails().getMessage() : e), e);
+    } catch (IOException e) {
+      throw new RuntimeException("Got error while looking up jobs: ", e);
+    }
+
+    throw new IllegalArgumentException("Could not find running job named " + jobName);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
index a8a4a15023775..fa8ac45fcba99 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
@@ -42,6 +42,12 @@ public void validateAndUpdateOptions() {
     Preconditions.checkArgument(!(Strings.isNullOrEmpty(dataflowOptions.getTempLocation())
         && Strings.isNullOrEmpty(dataflowOptions.getStagingLocation())),
         "Missing required value: at least one of tempLocation or stagingLocation must be set.");
+    if (dataflowOptions.getStagingLocation() != null) {
+      verifyGcsPath(dataflowOptions.getStagingLocation());
+    }
+    if (dataflowOptions.getTempLocation() != null) {
+      verifyGcsPath(dataflowOptions.getTempLocation());
+    }
     if (Strings.isNullOrEmpty(dataflowOptions.getTempLocation())) {
       dataflowOptions.setTempLocation(dataflowOptions.getStagingLocation());
     } else if (Strings.isNullOrEmpty(dataflowOptions.getStagingLocation())) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 2fecd21d80942..0640cced6a65e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -32,6 +32,7 @@
 import com.google.api.services.dataflow.Dataflow;
 import com.google.api.services.dataflow.model.DataflowPackage;
 import com.google.api.services.dataflow.model.Job;
+import com.google.api.services.dataflow.model.ListJobsResponse;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -105,12 +106,21 @@ private static Dataflow buildMockDataflow(
     Dataflow.V1b3.Projects.Jobs mockJobs = mock(Dataflow.V1b3.Projects.Jobs.class);
     Dataflow.V1b3.Projects.Jobs.Create mockRequest =
         mock(Dataflow.V1b3.Projects.Jobs.Create.class);
+    Dataflow.V1b3.Projects.Jobs.List mockList = mock(Dataflow.V1b3.Projects.Jobs.List.class);
 
     when(mockDataflowClient.v1b3()).thenReturn(mockV1b3);
     when(mockV1b3.projects()).thenReturn(mockProjects);
     when(mockProjects.jobs()).thenReturn(mockJobs);
     when(mockJobs.create(eq("someProject"), jobCaptor.capture()))
         .thenReturn(mockRequest);
+    when(mockJobs.list(eq("someProject"))).thenReturn(mockList);
+    when(mockList.setPageToken(anyString())).thenReturn(mockList);
+    when(mockList.execute())
+        .thenReturn(new ListJobsResponse().setJobs(
+            Arrays.asList(new Job()
+                              .setName("oldJobName")
+                              .setId("oldJobId")
+                              .setCurrentState("JOB_STATE_RUNNING"))));
 
     Job resultJob = new Job();
     resultJob.setId("newid");
@@ -153,6 +163,32 @@ public void testRun() throws IOException {
     assertValidJob(jobCaptor.getValue());
   }
 
+  @Test
+  public void testReload() throws IOException {
+    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+    DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+    options.setReload(true);
+    options.setJobName("oldJobName");
+    DataflowPipeline p = buildDataflowPipeline(options);
+    DataflowPipelineJob job = p.run();
+    assertEquals("newid", job.getJobId());
+    assertValidJob(jobCaptor.getValue());
+  }
+
+  @Test
+  public void testReloadNonExistentPipeline() throws IOException {
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Could not find running job named badJobName");
+
+    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+    DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+    options.setReload(true);
+    options.setJobName("badJobName");
+    DataflowPipeline p = buildDataflowPipeline(options);
+    p.run();
+  }
+
   @Test
   public void testRunWithFiles() throws IOException {
     // Test that the function DataflowPipelineRunner.stageFiles works as
@@ -373,12 +409,14 @@ public void testInvalidStagingLocation() throws IOException {
     options.setStagingLocation("file://my/staging/location");
     try {
       DataflowPipelineRunner.fromOptions(options);
+      fail("fromOptions should have failed");
     } catch (IllegalArgumentException e) {
       assertThat(e.getMessage(), containsString("GCS URI"));
     }
     options.setStagingLocation("my/staging/location");
     try {
       DataflowPipelineRunner.fromOptions(options);
+      fail("fromOptions should have failed");
     } catch (IllegalArgumentException e) {
       assertThat(e.getMessage(), containsString("GCS URI"));
     }

From a37c27ecf156ee3c2f2a21ba4307a31a663c7544 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 29 Apr 2015 21:01:08 -0700
Subject: [PATCH 0491/1541] Move CoderProperties into main SDK. Use maven's
 "provided" dependency scope to avoid bundling JUnit and Hamcrest into the
 SDK.

----Release Notes----
CoderProperties are now available in the SDK for assembling test suites for user-defined coders. In order to use them JUnit and Hamcrest must be provided at test execution time.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92421884
---
 sdk/pom.xml                                                  | 4 ++--
 .../google/cloud/dataflow/sdk/testing}/CoderProperties.java  | 4 +++-
 .../com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java  | 1 +
 .../cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java | 2 ++
 .../cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java    | 2 ++
 .../google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java | 1 +
 .../cloud/dataflow/sdk/coders/CoderPropertiesTest.java       | 5 ++++-
 .../cloud/dataflow/sdk/coders/CollectionCoderTest.java       | 2 ++
 .../google/cloud/dataflow/sdk/coders/CustomCoderTest.java    | 1 +
 .../google/cloud/dataflow/sdk/coders/DelegateCoderTest.java  | 2 ++
 .../google/cloud/dataflow/sdk/coders/EntityCoderTest.java    | 1 +
 .../google/cloud/dataflow/sdk/coders/InstantCoderTest.java   | 1 +
 .../google/cloud/dataflow/sdk/coders/IterableCoderTest.java  | 2 ++
 .../com/google/cloud/dataflow/sdk/coders/KvCoderTest.java    | 1 +
 .../com/google/cloud/dataflow/sdk/coders/ListCoderTest.java  | 2 ++
 .../com/google/cloud/dataflow/sdk/coders/MapCoderTest.java   | 1 +
 .../google/cloud/dataflow/sdk/coders/Proto2CoderTest.java    | 1 +
 .../com/google/cloud/dataflow/sdk/coders/SetCoderTest.java   | 2 ++
 .../cloud/dataflow/sdk/coders/StringDelegateCoderTest.java   | 1 +
 .../cloud/dataflow/sdk/coders/StringUtf8CoderTest.java       | 2 ++
 .../cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java     | 1 +
 .../cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java   | 2 ++
 .../google/cloud/dataflow/sdk/coders/VarIntCoderTest.java    | 2 ++
 .../google/cloud/dataflow/sdk/coders/VarLongCoderTest.java   | 2 ++
 24 files changed, 41 insertions(+), 4 deletions(-)
 rename sdk/src/{test/java/com/google/cloud/dataflow/sdk/coders => main/java/com/google/cloud/dataflow/sdk/testing}/CoderProperties.java (98%)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 4d210e78fb686..55d7f3c6c9757 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -448,14 +448,14 @@
       <groupId>org.hamcrest</groupId>
       <artifactId>hamcrest-all</artifactId>
       <version>1.3</version>
-      <scope>test</scope>
+      <scope>provided</scope>
     </dependency>
 
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
       <version>4.11</version>
-      <scope>test</scope>
+      <scope>provided</scope>
     </dependency>
 
     <dependency>
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
similarity index 98%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
index 30fd517d2d08c..031073e1ab65a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProperties.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.coders;
+package com.google.cloud.dataflow.sdk.testing;
 
 import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.containsInAnyOrder;
@@ -24,7 +24,9 @@
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.fail;
 
+import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.Serializer;
 import com.google.common.collect.Iterables;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
index 2bd506cdf3549..fcfd3ff98b575 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
@@ -24,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder.Context;
 import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java
index 2c8f52cbe206f..f059aadaa82de 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java
index 12b0a7b031c32..54553de30d445 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
index aeba1aceadb03..76b8f3f4a1cd4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 import com.google.cloud.dataflow.sdk.util.common.CounterTestUtils;
 
 import org.junit.Test;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java
index b761fe7a4e603..6c3a85018d7b2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java
@@ -14,11 +14,14 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.coders;
+package com.google.cloud.dataflow.sdk.testing;
 
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.fail;
 
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.CustomCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.common.base.Strings;
 
 import org.hamcrest.CoreMatchers;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java
index 98b4b54a983c9..438cecc3bc541 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java
index 265d1cd3d0bd1..c648c7216fc8a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.values.KV;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java
index 1e8bd711e5b35..519f26e430d84 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java
index 924877280abfa..019e1160a14d1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java
@@ -21,6 +21,7 @@
 import static com.google.api.services.datastore.client.DatastoreHelper.makeValue;
 
 import com.google.api.services.datastore.DatastoreV1.Entity;
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java
index 37043c891a56f..f3455b07daf61 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.common.primitives.UnsignedBytes;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java
index 5bf29ee0b8b73..575bcfde8f5f2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java
@@ -19,6 +19,8 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java
index d4aa1909aad22..15fbf968d035f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.collect.ImmutableMap;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java
index 227b2a42ec7e5..62023fd4e9814 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java
@@ -19,6 +19,8 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java
index 6af8e21b05733..396a75b84b6d6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java
@@ -19,6 +19,7 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 import com.google.common.collect.ImmutableMap;
 
 import org.junit.Test;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
index be73e7e78af60..0fdd6752d1742 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageA;
 import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageB;
 import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageC;
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 import com.google.common.collect.ImmutableList;
 
 import org.junit.Ignore;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java
index 094bc23101de6..60f446ec70f52 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java
index e7a68a2a7ff54..5317324ccb786 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringUtf8CoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringUtf8CoderTest.java
index 27c34d5da65dd..1a43babf89434 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringUtf8CoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringUtf8CoderTest.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java
index e761ebaf11ac7..d15ccce3118fe 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java
index 420baf26fc6dd..1454fddeb9df1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java
index a7529233445d3..265c34431c819 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java
index eb8e875bdb23f..5ad3ec31a0a31 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;

From 616f2de70413765587800b6710d72165fc39384e Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Thu, 30 Apr 2015 11:57:13 -0700
Subject: [PATCH 0492/1541] Improve Counter interface

The current Counter interface makes provides overly-flexible APIs in
order to reduce the number of methods required to support mean. This
reduction improves the quality and specificity of the provided
interface.

Call getAndResetDelta and getAndResetMeanDelta everywhere that
resetDelta was currently being called, and refactor to make the access
atomic.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92478342
---
 .../worker/StreamingDataflowWorker.java       |  62 +--
 .../dataflow/sdk/util/CloudCounterUtils.java  |  27 +-
 .../dataflow/sdk/util/common/Counter.java     | 396 +++++++++++++++---
 .../worker/MapTaskExecutorFactoryTest.java    |  18 +-
 .../dataflow/sdk/util/common/CounterTest.java |  49 ++-
 .../sdk/util/common/CounterTestUtils.java     |  26 +-
 .../common/worker/FlattenOperationTest.java   |  12 +-
 .../common/worker/MapTaskExecutorTest.java    |  18 +-
 .../common/worker/OutputReceiverTest.java     |  38 +-
 .../common/worker/ParDoOperationTest.java     |   8 +-
 .../PartialGroupByKeyOperationTest.java       |  16 +-
 .../util/common/worker/ReadOperationTest.java |  10 +-
 .../util/common/worker/StateSamplerTest.java  |   2 +-
 .../common/worker/WriteOperationTest.java     |   6 +-
 14 files changed, 478 insertions(+), 210 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 3cbb42e0d2f3e..22fb64832e1fc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -29,6 +29,8 @@
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.UserCodeException;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind;
+import com.google.cloud.dataflow.sdk.util.common.Counter.CounterMean;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
 import com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation;
@@ -54,6 +56,7 @@
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicReference;
+
 import javax.servlet.ServletException;
 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;
@@ -463,16 +466,19 @@ private void getConfig(String computation) {
 
   private void buildCounters(CounterSet counterSet,
                              Windmill.WorkItemCommitRequest.Builder builder) {
-    for (Counter counter : counterSet) {
+    for (Counter<?> counter : counterSet) {
       Windmill.Counter.Builder counterBuilder = Windmill.Counter.newBuilder();
       Windmill.Counter.Kind kind;
+      Object aggregateObj = null;
       switch (counter.getKind()) {
         case SUM: kind = Windmill.Counter.Kind.SUM; break;
         case MAX: kind = Windmill.Counter.Kind.MAX; break;
         case MIN: kind = Windmill.Counter.Kind.MIN; break;
         case MEAN:
           kind = Windmill.Counter.Kind.MEAN;
-          long count = counter.getCount(true /* delta */);
+          CounterMean<?> mean = counter.getAndResetMeanDelta();
+          long count = mean.getCount();
+          aggregateObj = mean.getAggregate();
           if (count <= 0) {
             continue;
           }
@@ -482,30 +488,38 @@ private void buildCounters(CounterSet counterSet,
           LOG.debug("Unhandled counter type: {}", counter.getKind());
           continue;
       }
-      Object aggregateObj = counter.getAggregate(true /* delta */);
-      counter.resetDelta();
-      if (aggregateObj instanceof Double) {
-        double aggregate = (Double) aggregateObj;
-        if (aggregate != 0) {
-          counterBuilder.setDoubleScalar(aggregate);
-        }
-      } else if (aggregateObj instanceof Long) {
-        long aggregate = (Long) aggregateObj;
-        if (aggregate != 0) {
-          counterBuilder.setIntScalar(aggregate);
-        }
-      } else if (aggregateObj instanceof Integer) {
-        long aggregate = ((Integer) aggregateObj).longValue();
-        if (aggregate != 0) {
-          counterBuilder.setIntScalar(aggregate);
-        }
-      } else {
-        LOG.debug("Unhandled aggregate class: {}", aggregateObj.getClass());
-        continue;
+      if (counter.getKind() != AggregationKind.MEAN) {
+        aggregateObj = counter.getAndResetDelta();
+      }
+      if (addKnownTypeToCounterBuilder(aggregateObj, counterBuilder)) {
+        counterBuilder.setName(counter.getName()).setKind(kind);
+        builder.addCounterUpdates(counterBuilder);
+      }
+    }
+  }
+
+  private boolean addKnownTypeToCounterBuilder(Object aggregateObj,
+      Windmill.Counter.Builder counterBuilder) {
+    if (aggregateObj instanceof Double) {
+      double aggregate = (Double) aggregateObj;
+      if (aggregate != 0) {
+        counterBuilder.setDoubleScalar(aggregate);
+      }
+    } else if (aggregateObj instanceof Long) {
+      long aggregate = (Long) aggregateObj;
+      if (aggregate != 0) {
+        counterBuilder.setIntScalar(aggregate);
+      }
+    } else if (aggregateObj instanceof Integer) {
+      long aggregate = ((Integer) aggregateObj).longValue();
+      if (aggregate != 0) {
+        counterBuilder.setIntScalar(aggregate);
       }
-      counterBuilder.setName(counter.getName()).setKind(kind);
-      builder.addCounterUpdates(counterBuilder);
+    } else {
+      LOG.debug("Unhandled aggregate class: {}", aggregateObj.getClass());
+      return false;
     }
+    return true;
   }
 
   private Windmill.Exception buildExceptionReport(Throwable t) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java
index f02a332156b0a..b1b50d40f365a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java
@@ -19,6 +19,7 @@
 import com.google.api.services.dataflow.model.MetricStructuredName;
 import com.google.api.services.dataflow.model.MetricUpdate;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.Counter.CounterMean;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 
 import org.slf4j.Logger;
@@ -67,23 +68,33 @@ public static MetricUpdate extractCounter(Counter<?> counter, boolean delta) {
         case MIN:
         case AND:
         case OR:
-          metricUpdate.setScalar(CloudObject.forKnownType(counter.getAggregate(delta)));
+          Object aggregate;
+          if (delta) {
+            aggregate = counter.getAndResetDelta();
+          } else {
+            aggregate = counter.getAggregate();
+          }
+          metricUpdate.setScalar(
+                CloudObject.forKnownType(aggregate));
           break;
         case MEAN: {
-          long countUpdate = counter.getCount(delta);
-          if (countUpdate <= 0) {
+          CounterMean<?> mean;
+          if (delta) {
+            mean = counter.getAndResetMeanDelta();
+          } else {
+            mean = counter.getMean();
+          }
+          if (mean.getCount() <= 0) {
             return null;
           }
-          metricUpdate.setMeanSum(CloudObject.forKnownType(counter.getAggregate(delta)));
-          metricUpdate.setMeanCount(CloudObject.forKnownType(countUpdate));
+          metricUpdate.setMeanSum(
+              CloudObject.forKnownType(mean.getAggregate()));
+          metricUpdate.setMeanCount(CloudObject.forKnownType(mean.getCount()));
           break;
         }
         default:
           throw new IllegalArgumentException("unexpected kind of counter");
       }
-      if (delta) {
-        counter.resetDelta();
-      }
       return metricUpdate;
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
index f3e59ccb68ef4..9b09cc11d12da 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
@@ -24,6 +24,8 @@
 
 import java.util.Objects;
 
+import javax.annotation.Nullable;
+
 /**
  * A Counter enables the aggregation of a stream of values over time.  The
  * cumulative aggregate value is updated as new values are added, or it can be
@@ -168,43 +170,33 @@ private static Counter<String> strings(String name, AggregationKind kind) {
   public abstract Counter<T> addValue(T value);
 
   /**
-   * Resets the aggregation stream to this new value. Returns this (to allow
-   * method chaining).
+   * Resets the aggregation stream to this new value. This aggregator must not
+   * be a MEAN aggregator. Returns this (to allow method chaining).
    */
-  public Counter<T> resetToValue(T value) {
-    return resetToValue(-1, value);
-  }
+  public abstract Counter<T> resetToValue(T value);
 
   /**
    * Resets the aggregation stream to this new value. Returns this (to allow
-   * method chaining). The value of elementCount must be -1 for non-MEAN
-   * aggregations. The value of elementCount must be non-negative for MEAN
-   * aggregation.
+   * method chaining). The value of elementCount must be non-negative, and this
+   * aggregator must be a MEAN aggregator.
    */
-  public synchronized Counter<T> resetToValue(long elementCount, T value) {
-    aggregate = value;
-    deltaAggregate = value;
+  public abstract Counter<T> resetMeanToValue(long elementCount, T value);
 
-    if (kind.equals(MEAN)) {
-      if (elementCount < 0) {
-        throw new AssertionError(
-            "elementCount must be non-negative for MEAN aggregation");
-      }
-      count = elementCount;
-      deltaCount = elementCount;
-    } else {
-      if (elementCount != -1) {
-        throw new AssertionError(
-            "elementCount must be -1 for non-MEAN aggregations");
-      }
-      count = 0;
-      deltaCount = 0;
-    }
-    return this;
-  }
+  /**
+   * Resets the counter's delta value to have no values accumulated and returns
+   * the value of the delta prior to the reset.
+   *
+   * @return the aggregate delta at the time this method is called
+   */
+  public abstract T getAndResetDelta();
 
-  /** Resets the counter's delta value to have no values accumulated. */
-  public abstract void resetDelta();
+  /**
+   * Resets the counter's delta value to have no values accumulated and returns
+   * the value of the delta prior to the reset, for a MEAN counter.
+   *
+   * @return the mean delta t the time this method is called
+   */
+  public abstract CounterMean<T> getAndResetMeanDelta();
 
   /**
    * Returns the counter's name.
@@ -231,21 +223,35 @@ public Class<?> getType() {
 
   /**
    * Returns the aggregated value, or the sum for MEAN aggregation, either
-   * total or, if delta, since the last update extraction or resetDelta..
+   * total or, if delta, since the last update extraction or resetDelta.
    */
-  public T getAggregate(boolean delta) {
-    return delta ? deltaAggregate : aggregate;
-  }
+  public abstract T getAggregate();
 
   /**
-   * Returns the number of aggregated values, either total or, if
-   * delta, since the last update extraction or resetDelta, if a MEAN
-   * aggregation.
+   * The mean value of a {@code Counter}, represented as an aggregate value and
+   * a count.
+   *
+   * @param <T> the type of the aggregate
    */
-  public long getCount(boolean delta) {
-    return delta ? deltaCount : count;
+  public static interface CounterMean<T> {
+    /**
+     * Gets the aggregate value of this {@code CounterMean}.
+     */
+    T getAggregate();
+
+    /**
+     * Gets the count of this {@code CounterMean}.
+     */
+    long getCount();
   }
 
+  /**
+   * Returns the mean in the form of a CounterMean, or null if this is not a
+   * MEAN counter.
+   */
+  @Nullable
+  public abstract CounterMean<T> getMean();
+
   /**
    * Returns a string representation of the Counter. Useful for debugging logs.
    * Example return value: "ElementCount:SUM(15)".
@@ -337,7 +343,6 @@ protected Counter(String name, AggregationKind kind) {
     this.deltaCount = 0;
   }
 
-
   //////////////////////////////////////////////////////////////////////////////
 
   /**
@@ -392,7 +397,13 @@ public synchronized LongCounter addValue(Long value) {
     }
 
     @Override
-    public synchronized void resetDelta() {
+    public synchronized Long getAggregate() {
+      return aggregate;
+    }
+
+    @Override
+    public synchronized Long getAndResetDelta() {
+      long oldDelta = deltaAggregate;
       switch (kind) {
         case SUM:
           deltaAggregate = 0L;
@@ -410,6 +421,70 @@ public synchronized void resetDelta() {
         default:
           throw illegalArgumentException();
       }
+      return oldDelta;
+    }
+
+    @Override
+    public synchronized Counter<Long> resetToValue(Long value) {
+      if (kind == MEAN) {
+        throw illegalArgumentException();
+      }
+      aggregate = value;
+      deltaAggregate = value;
+      return this;
+    }
+
+    @Override
+    public synchronized Counter<Long> resetMeanToValue(long elementCount,
+        Long value) {
+      if (kind != MEAN) {
+        throw illegalArgumentException();
+      }
+      aggregate = value;
+      deltaAggregate = value;
+      count = elementCount;
+      deltaCount = elementCount;
+      return this;
+    }
+
+    @Override
+    public synchronized CounterMean<Long> getAndResetMeanDelta() {
+      if (kind != MEAN) {
+        throw illegalArgumentException();
+      }
+      CounterMean<Long> mean = new LongCounterMean(deltaAggregate, deltaCount);
+      deltaAggregate = 0L;
+      deltaCount = 0L;
+      return mean;
+    }
+
+    @Override
+    @Nullable
+    public synchronized CounterMean<Long> getMean() {
+      if (kind != MEAN) {
+        throw illegalArgumentException();
+      }
+      return new LongCounterMean(aggregate, count);
+    }
+
+    private static class LongCounterMean implements CounterMean<Long> {
+      private final long aggregate;
+      private final long count;
+
+      public LongCounterMean(long aggregate, long count) {
+        this.aggregate = aggregate;
+        this.count = count;
+      }
+
+      @Override
+      public Long getAggregate() {
+        return aggregate;
+      }
+
+      @Override
+      public long getCount() {
+        return count;
+      }
     }
   }
 
@@ -465,7 +540,8 @@ public synchronized DoubleCounter addValue(Double value) {
     }
 
     @Override
-    public synchronized void resetDelta() {
+    public synchronized Double getAndResetDelta() {
+      double oldDelta = deltaAggregate;
       switch (kind) {
         case SUM:
           deltaAggregate = 0.0;
@@ -483,6 +559,76 @@ public synchronized void resetDelta() {
         default:
           throw illegalArgumentException();
       }
+      return oldDelta;
+    }
+
+    @Override
+    public synchronized Counter<Double> resetToValue(Double value) {
+      if (kind == MEAN) {
+        throw illegalArgumentException();
+      }
+      aggregate = value;
+      deltaAggregate = value;
+      return this;
+    }
+
+    @Override
+    public synchronized Counter<Double> resetMeanToValue(long elementCount,
+        Double value) {
+      if (kind != MEAN) {
+        throw illegalArgumentException();
+      }
+      if (elementCount < 0) {
+        throw new IllegalArgumentException("elementCount must be non-negative");
+      }
+      aggregate = value;
+      deltaAggregate = value;
+      count = elementCount;
+      deltaCount = elementCount;
+      return this;
+    }
+
+    @Override
+    public synchronized CounterMean<Double> getAndResetMeanDelta() {
+      if (kind != MEAN) {
+        throw illegalArgumentException();
+      }
+      CounterMean<Double> mean =
+          new DoubleCounterMean(deltaAggregate, deltaCount);
+      deltaAggregate = 0.0;
+      deltaCount = 0L;
+      return mean;
+    }
+
+    @Override
+    public synchronized Double getAggregate() {
+      return aggregate;
+    }
+
+    @Override
+    @Nullable
+    public synchronized CounterMean<Double> getMean() {
+      return new DoubleCounterMean(aggregate, count);
+    }
+
+    private static class DoubleCounterMean implements CounterMean<Double> {
+      private final double aggregate;
+      private final long count;
+
+      public DoubleCounterMean(double aggregate, long count) {
+        this.aggregate = aggregate;
+        this.count = count;
+      }
+
+      @Override
+      public Double getAggregate() {
+        return aggregate;
+      }
+
+      @Override
+      public long getCount() {
+        return count;
+      }
     }
   }
 
@@ -516,7 +662,8 @@ public synchronized BooleanCounter addValue(Boolean value) {
     }
 
     @Override
-    public synchronized void resetDelta() {
+    public synchronized Boolean getAndResetDelta() {
+      boolean delta = deltaAggregate;
       switch (kind) {
         case AND:
           deltaAggregate = true;
@@ -527,6 +674,36 @@ public synchronized void resetDelta() {
         default:
           throw illegalArgumentException();
       }
+      return delta;
+    }
+
+    @Override
+    public synchronized Counter<Boolean> resetToValue(Boolean value) {
+      aggregate = value;
+      deltaAggregate = value;
+      return this;
+    }
+
+    @Override
+    public Counter<Boolean> resetMeanToValue(long elementCount, Boolean value) {
+      throw illegalArgumentException();
+    }
+
+    @Override
+    public com.google.cloud.dataflow.sdk.util.common.Counter.CounterMean<
+        Boolean> getAndResetMeanDelta() {
+      throw illegalArgumentException();
+    }
+
+    @Override
+    public synchronized Boolean getAggregate() {
+      return aggregate;
+    }
+
+    @Override
+    @Nullable
+    public CounterMean<Boolean> getMean() {
+      throw illegalArgumentException();
     }
   }
 
@@ -551,7 +728,48 @@ public synchronized StringCounter addValue(String value) {
     }
 
     @Override
-    public synchronized void resetDelta() {
+    public Counter<String> resetToValue(String value) {
+      switch (kind) {
+        default:
+          throw illegalArgumentException();
+      }
+    }
+
+    @Override
+    public Counter<String> resetMeanToValue(long elementCount, String value) {
+      switch (kind) {
+        default:
+          throw illegalArgumentException();
+      }
+    }
+
+    @Override
+    public String getAndResetDelta() {
+      switch (kind) {
+        default:
+          throw illegalArgumentException();
+      }
+    }
+
+    @Override
+    public CounterMean<String> getAndResetMeanDelta() {
+      switch (kind) {
+        default:
+          throw illegalArgumentException();
+      }
+    }
+
+    @Override
+    public String getAggregate() {
+      switch (kind) {
+        default:
+          throw illegalArgumentException();
+      }
+    }
+
+    @Override
+    @Nullable
+    public CounterMean<String> getMean() {
       switch (kind) {
         default:
           throw illegalArgumentException();
@@ -611,7 +829,8 @@ public synchronized IntegerCounter addValue(Integer value) {
     }
 
     @Override
-    public synchronized void resetDelta() {
+    public synchronized Integer getAndResetDelta() {
+      int delta = deltaAggregate;
       switch (kind) {
         case SUM:
           deltaAggregate = 0;
@@ -629,9 +848,77 @@ public synchronized void resetDelta() {
         default:
           throw illegalArgumentException();
       }
+      return delta;
     }
-  }
 
+    @Override
+    public synchronized Counter<Integer> resetToValue(Integer value) {
+      if (kind == MEAN) {
+        throw illegalArgumentException();
+      }
+      aggregate = value;
+      deltaAggregate = value;
+      return this;
+    }
+
+    @Override
+    public Counter<Integer> resetMeanToValue(long elementCount, Integer value) {
+      if (kind != MEAN) {
+        throw illegalArgumentException();
+      }
+      if (elementCount < 0) {
+        throw new IllegalArgumentException("elementCount must be non-negative");
+      }
+      aggregate = value;
+      deltaAggregate = value;
+      count = value;
+      deltaCount = value;
+      return this;
+    }
+
+    @Override
+    public synchronized CounterMean<Integer> getAndResetMeanDelta() {
+      if (kind != MEAN) {
+        throw illegalArgumentException();
+      }
+      CounterMean<Integer> mean =
+          new IntegerCounterMean(deltaAggregate, deltaCount);
+      deltaAggregate = 0;
+      deltaCount = 0L;
+      return mean;
+    }
+
+    @Override
+    public synchronized Integer getAggregate() {
+      return aggregate;
+    }
+
+    @Override
+    @Nullable
+    public synchronized CounterMean<Integer> getMean() {
+      return new IntegerCounterMean(aggregate, count);
+    }
+
+    private static class IntegerCounterMean implements CounterMean<Integer> {
+      private final int aggregate;
+      private final long count;
+
+      public IntegerCounterMean(int aggregate, long count) {
+        this.aggregate = aggregate;
+        this.count = count;
+      }
+
+      @Override
+      public Integer getAggregate() {
+        return aggregate;
+      }
+
+      @Override
+      public long getCount() {
+        return count;
+      }
+    }
+  }
 
   //////////////////////////////////////////////////////////////////////////////
 
@@ -643,21 +930,4 @@ protected IllegalArgumentException illegalArgumentException() {
     return new IllegalArgumentException("Cannot compute " + kind
         + " aggregation over " + getType().getSimpleName() + " values.");
   }
-
-
-  //////////////////////////////////////////////////////////////////////////////
-
-  // For testing.
-  synchronized T getTotalAggregate() {
-    return aggregate;
-  }
-  synchronized T getDeltaAggregate() {
-    return deltaAggregate;
-  }
-  synchronized long getTotalCount() {
-    return count;
-  }
-  synchronized long getDeltaCount() {
-    return deltaCount;
-  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index bd0a0e08513ce..bfa8ce5c887eb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -107,24 +107,24 @@ public void testCreateMapTaskExecutor() throws Exception {
 
     assertEquals(
         new CounterSet(Counter.longs("read_output_name-ElementCount", SUM).resetToValue(0L),
-            Counter.longs("read_output_name-MeanByteCount", MEAN).resetToValue(0, 0L),
+            Counter.longs("read_output_name-MeanByteCount", MEAN).resetMeanToValue(0, 0L),
             Counter.longs("Read-ByteCount", SUM).resetToValue(0L),
             Counter.longs("test-Read-start-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Read-read-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Read-process-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Read-finish-msecs", SUM).resetToValue(0L),
             Counter.longs("DoFn1_output-ElementCount", SUM).resetToValue(0L),
-            Counter.longs("DoFn1_output-MeanByteCount", MEAN).resetToValue(0, 0L),
+            Counter.longs("DoFn1_output-MeanByteCount", MEAN).resetMeanToValue(0, 0L),
             Counter.longs("test-DoFn1-start-msecs", SUM).resetToValue(0L),
             Counter.longs("test-DoFn1-process-msecs", SUM).resetToValue(0L),
             Counter.longs("test-DoFn1-finish-msecs", SUM).resetToValue(0L),
             Counter.longs("DoFnWithContext_output-ElementCount", SUM).resetToValue(0L),
-            Counter.longs("DoFnWithContext_output-MeanByteCount", MEAN).resetToValue(0, 0L),
+            Counter.longs("DoFnWithContext_output-MeanByteCount", MEAN).resetMeanToValue(0, 0L),
             Counter.longs("test-DoFnWithContext-start-msecs", SUM).resetToValue(0L),
             Counter.longs("test-DoFnWithContext-process-msecs", SUM).resetToValue(0L),
             Counter.longs("test-DoFnWithContext-finish-msecs", SUM).resetToValue(0L),
             Counter.longs("flatten_output_name-ElementCount", SUM).resetToValue(0L),
-            Counter.longs("flatten_output_name-MeanByteCount", MEAN).resetToValue(0, 0L),
+            Counter.longs("flatten_output_name-MeanByteCount", MEAN).resetMeanToValue(0, 0L),
             Counter.longs("test-Flatten-start-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Flatten-process-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Flatten-finish-msecs", SUM).resetToValue(0L),
@@ -135,7 +135,7 @@ public void testCreateMapTaskExecutor() throws Exception {
             Counter.longs("test-other-msecs", SUM)
                 .resetToValue(
                     ((Counter<Long>)
-                        counterSet.getExistingCounter("test-other-msecs")).getAggregate(false))),
+                        counterSet.getExistingCounter("test-other-msecs")).getAggregate())),
         counterSet);
   }
 
@@ -204,7 +204,7 @@ public void testCreateReadOperation() throws Exception {
     assertEquals(
         new CounterSet(
             Counter.longs("test-Read-start-msecs", SUM).resetToValue(0L),
-            Counter.longs("read_output_name-MeanByteCount", MEAN).resetToValue(0, 0L),
+            Counter.longs("read_output_name-MeanByteCount", MEAN).resetMeanToValue(0, 0L),
             Counter.longs("Read-ByteCount", SUM).resetToValue(0L),
             Counter.longs("test-Read-finish-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Read-read-msecs", SUM),
@@ -269,13 +269,13 @@ instruction, new BatchModeExecutionContext(), priorOperations, counterPrefix,
         new CounterSet(Counter.longs("WriteOperation-ByteCount", SUM).resetToValue(0L),
             Counter.longs("test-WriteOperation-start-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                   "test-WriteOperation-start-msecs")).getAggregate(false)),
+                                   "test-WriteOperation-start-msecs")).getAggregate()),
             Counter.longs("test-WriteOperation-process-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                   "test-WriteOperation-process-msecs")).getAggregate(false)),
+                                   "test-WriteOperation-process-msecs")).getAggregate()),
             Counter.longs("test-WriteOperation-finish-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                   "test-WriteOperation-finish-msecs")).getAggregate(false))),
+                                   "test-WriteOperation-finish-msecs")).getAggregate())),
         counterSet);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
index a51ba6c687c49..ddae986ef1bff 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
@@ -31,6 +31,7 @@
 
 import com.google.api.services.dataflow.model.MetricUpdate;
 import com.google.cloud.dataflow.sdk.util.CloudCounterUtils;
+import com.google.cloud.dataflow.sdk.util.common.Counter.CounterMean;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -170,13 +171,13 @@ public void testCompatibility() {
 
 
   private void assertOK(long total, long delta, Counter<Long> c) {
-    assertEquals(total, c.getTotalAggregate().longValue());
-    assertEquals(delta, c.getDeltaAggregate().longValue());
+    assertEquals(total, c.getAggregate().longValue());
+    assertEquals(delta, c.getAndResetDelta().longValue());
   }
 
   private void assertOK(double total, double delta, Counter<Double> c) {
-    assertEquals(total, asDouble(c.getTotalAggregate()), EPSILON);
-    assertEquals(delta, asDouble(c.getDeltaAggregate()), EPSILON);
+    assertEquals(total, asDouble(c.getAggregate()), EPSILON);
+    assertEquals(delta, asDouble(c.getAndResetDelta()), EPSILON);
   }
 
 
@@ -364,18 +365,22 @@ public void testMinDouble() {
   // Tests for MEAN.
 
   private void assertMean(long s, long sd, long c, long cd, Counter<Long> cn) {
-    assertEquals(s, cn.getTotalAggregate().longValue());
-    assertEquals(sd, cn.getDeltaAggregate().longValue());
-    assertEquals(c, cn.getTotalCount());
-    assertEquals(cd, cn.getDeltaCount());
+    CounterMean<Long> mean = cn.getMean();
+    CounterMean<Long> deltaMean = cn.getAndResetMeanDelta();
+    assertEquals(s, mean.getAggregate().longValue());
+    assertEquals(sd, deltaMean.getAggregate().longValue());
+    assertEquals(c, mean.getCount());
+    assertEquals(cd, deltaMean.getCount());
   }
 
   private void assertMean(double s, double sd, long c, long cd,
       Counter<Double> cn) {
-    assertEquals(s, cn.getTotalAggregate().doubleValue(), EPSILON);
-    assertEquals(sd, cn.getDeltaAggregate().doubleValue(), EPSILON);
-    assertEquals(c, cn.getTotalCount());
-    assertEquals(cd, cn.getDeltaCount());
+    CounterMean<Double> mean = cn.getMean();
+    CounterMean<Double> deltaMean = cn.getAndResetMeanDelta();
+    assertEquals(s, mean.getAggregate().doubleValue(), EPSILON);
+    assertEquals(sd, deltaMean.getAggregate().doubleValue(), EPSILON);
+    assertEquals(c, mean.getCount());
+    assertEquals(cd, deltaMean.getCount());
   }
 
   @Test
@@ -394,7 +399,7 @@ public void testMeanLong() {
     expCountDelta += 3;
     assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
 
-    c.resetToValue(1L, 120L).addValue(17L).addValue(37L);
+    c.resetMeanToValue(1L, 120L).addValue(17L).addValue(37L);
     expTotal = expDelta = 174;
     assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
 
@@ -410,7 +415,7 @@ public void testMeanLong() {
     expCountDelta += 2;
     assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
 
-    c.resetToValue(3L, 100L).addValue(17L).addValue(49L);
+    c.resetMeanToValue(3L, 100L).addValue(17L).addValue(49L);
     expTotal = expDelta = 166;
     expCountTotal = expCountDelta = 5;
     assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
@@ -432,7 +437,8 @@ public void testMeanDouble() {
     expCountDelta += 3;
     assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
 
-    c.resetToValue(1L, Math.sqrt(2)).addValue(2 * Math.PI).addValue(3 * Math.E);
+    c.resetMeanToValue(1L, Math.sqrt(2)).addValue(2 * Math.PI)
+        .addValue(3 * Math.E);
     expTotal = expDelta = Math.sqrt(2) + 2 * Math.PI + 3 * Math.E;
     assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
 
@@ -448,7 +454,7 @@ public void testMeanDouble() {
     expCountDelta += 2;
     assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
 
-    c.resetToValue(3L, Math.sqrt(17)).addValue(17.0).addValue(49.0);
+    c.resetMeanToValue(3L, Math.sqrt(17)).addValue(17.0).addValue(49.0);
     expTotal = expDelta = Math.sqrt(17.0) + 17.0 + 49.0;
     expCountTotal = expCountDelta = 5;
     assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
@@ -456,10 +462,9 @@ public void testMeanDouble() {
 
 
   // Test for AND and OR.
-
   private void assertBool(boolean total, boolean delta, Counter<Boolean> c) {
-    assertEquals(total, c.getTotalAggregate().booleanValue());
-    assertEquals(delta, c.getDeltaAggregate().booleanValue());
+    assertEquals(total, c.getAggregate().booleanValue());
+    assertEquals(delta, c.getAndResetDelta().booleanValue());
   }
 
   @Test
@@ -491,9 +496,6 @@ public void testBoolAnd() {
     c.addValue(false);
     expectedDelta = false;
     assertBool(expectedTotal, expectedDelta, c);
-
-    c.addValue(true);
-    assertBool(expectedTotal, expectedDelta, c);
   }
 
   @Test
@@ -525,9 +527,6 @@ public void testBoolOr() {
     c.addValue(true);
     expectedDelta = true;
     assertBool(expectedTotal, expectedDelta, c);
-
-    c.addValue(false);
-    assertBool(expectedTotal, expectedDelta, c);
   }
 
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java
index a71b9972b9347..5c9af99d14e9a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java
@@ -21,6 +21,7 @@
 import com.google.api.services.dataflow.model.MetricUpdate;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.util.CloudCounterUtils;
+import com.google.cloud.dataflow.sdk.util.common.Counter.CounterMean;
 
 import org.junit.Assert;
 
@@ -70,25 +71,6 @@ public static List<MetricUpdate> extractCounterUpdates(
     return cloudCounters;
   }
 
-
-  // These methods expose a counter's values for testing.
-
-  public static <T> T getTotalAggregate(Counter<T> counter) {
-    return counter.getTotalAggregate();
-  }
-
-  public static <T> T getDeltaAggregate(Counter<T> counter) {
-    return counter.getDeltaAggregate();
-  }
-
-  public static <T> long getTotalCount(Counter<T> counter) {
-    return counter.getTotalCount();
-  }
-
-  public static <T> long getDeltaCount(Counter<T> counter) {
-    return counter.getDeltaCount();
-  }
-
   /**
    * A utility method that passes the given (unencoded) elements through
    * coder's registerByteSizeObserver() and encode() methods, and confirms
@@ -108,7 +90,9 @@ public static <T> void testByteCount(Coder<T> coder, Coder.Context context, T[]
     }
     long expectedLength = os.toByteArray().length;
 
-    Assert.assertEquals(expectedLength, (long) getTotalAggregate(meanByteCount));
-    Assert.assertEquals(elements.length, getTotalCount(meanByteCount));
+    CounterMean<Long> mean = meanByteCount.getMean();
+
+    Assert.assertEquals(expectedLength, mean.getAggregate().longValue());
+    Assert.assertEquals(elements.length, mean.getCount());
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java
index afa2d06dd8133..f7abe6884f65d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java
@@ -64,17 +64,15 @@ public void testRunFlattenOperation() throws Exception {
         new CounterSet(
             Counter.longs("test-FlattenOperation-start-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-FlattenOperation-start-msecs")).getAggregate(false)),
+                                   "test-FlattenOperation-start-msecs")).getAggregate()),
             Counter.longs("test-FlattenOperation-process-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-FlattenOperation-process-msecs")).getAggregate(false)),
+                                   "test-FlattenOperation-process-msecs")).getAggregate()),
             Counter.longs("test-FlattenOperation-finish-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-FlattenOperation-finish-msecs")).getAggregate(false)),
-            Counter.longs("test_receiver_out-ElementCount", SUM)
-                .resetToValue(4L),
-            Counter.longs("test_receiver_out-MeanByteCount", MEAN)
-                .resetToValue(4, 10L)),
+                                   "test-FlattenOperation-finish-msecs")).getAggregate()),
+            Counter.longs("test_receiver_out-ElementCount", SUM).resetToValue(4L),
+            Counter.longs("test_receiver_out-MeanByteCount", MEAN).resetMeanToValue(4, 10L)),
         counterSet);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
index 14e73d3a574b0..1e02f7ead7b73 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
@@ -152,37 +152,37 @@ public void testGetOutputCounters() throws Exception {
             Counter.longs("test-o1-start-msecs", SUM)
                 .resetToValue(
                     ((Counter<Long>)
-                        counterSet.getExistingCounter("test-o1-start-msecs")).getAggregate(false)),
+                        counterSet.getExistingCounter("test-o1-start-msecs")).getAggregate()),
             Counter.longs("test-o1-process-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                   "test-o1-process-msecs")).getAggregate(false)),
+                                   "test-o1-process-msecs")).getAggregate()),
             Counter.longs("test-o1-finish-msecs", SUM)
                 .resetToValue(
                     ((Counter<Long>)
-                        counterSet.getExistingCounter("test-o1-finish-msecs")).getAggregate(false)),
+                        counterSet.getExistingCounter("test-o1-finish-msecs")).getAggregate()),
             Counter.longs("o2-ElementCount", SUM).resetToValue(2L),
             Counter.longs("test-o2-start-msecs", SUM)
                 .resetToValue(
                     ((Counter<Long>)
-                        counterSet.getExistingCounter("test-o2-start-msecs")).getAggregate(false)),
+                        counterSet.getExistingCounter("test-o2-start-msecs")).getAggregate()),
             Counter.longs("test-o2-process-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                   "test-o2-process-msecs")).getAggregate(false)),
+                                   "test-o2-process-msecs")).getAggregate()),
             Counter.longs("test-o2-finish-msecs", SUM)
                 .resetToValue(
                     ((Counter<Long>)
-                        counterSet.getExistingCounter("test-o2-finish-msecs")).getAggregate(false)),
+                        counterSet.getExistingCounter("test-o2-finish-msecs")).getAggregate()),
             Counter.longs("o3-ElementCount", SUM).resetToValue(3L),
             Counter.longs("test-o3-start-msecs", SUM)
                 .resetToValue(
                     ((Counter<Long>)
-                        counterSet.getExistingCounter("test-o3-start-msecs")).getAggregate(false)),
+                        counterSet.getExistingCounter("test-o3-start-msecs")).getAggregate()),
             Counter.longs("test-o3-process-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                   "test-o3-process-msecs")).getAggregate(false)),
+                                   "test-o3-process-msecs")).getAggregate()),
             Counter.longs("test-o3-finish-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                   "test-o3-finish-msecs")).getAggregate(false))),
+                                   "test-o3-finish-msecs")).getAggregate())),
         counterSet);
 
     executor.close();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java
index 362fb456ca2cd..2f71f3ef2ac10 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java
@@ -23,8 +23,8 @@
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.ElementByteSizeObservableCoder;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.Counter.CounterMean;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.CounterTestUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReceiver;
 
 import org.hamcrest.CoreMatchers;
@@ -65,15 +65,11 @@ public void testEmptyOutputReceiver() throws Exception {
     fanOut.process("bob");
 
     Assert.assertEquals("output_name", fanOut.getName());
-    Assert.assertEquals(
-        2,
-        (long) CounterTestUtils.getTotalAggregate(fanOut.getElementCount()));
-    Assert.assertEquals(
-        5,
-        (long) CounterTestUtils.getTotalAggregate(fanOut.getMeanByteCount()));
-    Assert.assertEquals(
-        2,
-        CounterTestUtils.getTotalCount(fanOut.getMeanByteCount()));
+    Assert.assertEquals(2, (long) fanOut.getElementCount().getAggregate());
+
+    CounterMean<Long> meanByteCount = fanOut.getMeanByteCount().getMean();
+    Assert.assertEquals(5, (long) meanByteCount.getAggregate());
+    Assert.assertEquals(2, meanByteCount.getCount());
   }
 
   @Test
@@ -93,19 +89,15 @@ public void testMultipleOutputReceiver() throws Exception {
     fanOut.process("bob");
 
     Assert.assertEquals("output_name", fanOut.getName());
-    Assert.assertEquals(
-        2,
-        (long) CounterTestUtils.getTotalAggregate(fanOut.getElementCount()));
-    Assert.assertEquals(
-        5,
-        (long) CounterTestUtils.getTotalAggregate(fanOut.getMeanByteCount()));
-    Assert.assertEquals(
-        2,
-        CounterTestUtils.getTotalCount(fanOut.getMeanByteCount()));
+    Assert.assertEquals(2, (long) fanOut.getElementCount().getAggregate());
+
+    CounterMean<Long> meanByteCount = fanOut.getMeanByteCount().getMean();
+    Assert.assertEquals(5, meanByteCount.getAggregate().longValue());
+    Assert.assertEquals(2, meanByteCount.getCount());
     Assert.assertThat(receiver1.outputElems,
-                      CoreMatchers.<Object>hasItems("hi", "bob"));
+        CoreMatchers.<Object>hasItems("hi", "bob"));
     Assert.assertThat(receiver2.outputElems,
-                      CoreMatchers.<Object>hasItems("hi", "bob"));
+        CoreMatchers.<Object>hasItems("hi", "bob"));
   }
 
   @Test(expected = ClassCastException.class)
@@ -123,14 +115,14 @@ public void testNullArgument() throws Exception {
   @Test
   public void testAddingCountersIntoCounterSet() throws Exception {
     CounterSet counters = new CounterSet();
-    TestOutputReceiver receiver = new TestOutputReceiver(counters);
+    new TestOutputReceiver(counters);
 
     Assert.assertEquals(
         new CounterSet(
             Counter.longs("output_name-ElementCount", SUM)
                 .resetToValue(0L),
             Counter.longs("output_name-MeanByteCount", MEAN)
-                .resetToValue(0, 0L)),
+                .resetMeanToValue(0, 0L)),
         counters);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
index 79ec4f6dd0467..701f6f057bc65 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
@@ -98,17 +98,17 @@ public void testRunParDoOperation() throws Exception {
         new CounterSet(
             Counter.longs("test-ParDoOperation-start-msecs", SUM)
               .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                  "test-ParDoOperation-start-msecs")).getAggregate(false)),
+                  "test-ParDoOperation-start-msecs")).getAggregate()),
             Counter.longs("test-ParDoOperation-process-msecs", SUM)
               .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                  "test-ParDoOperation-process-msecs")).getAggregate(false)),
+                  "test-ParDoOperation-process-msecs")).getAggregate()),
             Counter.longs("test-ParDoOperation-finish-msecs", SUM)
               .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                  "test-ParDoOperation-finish-msecs")).getAggregate(false)),
+                  "test-ParDoOperation-finish-msecs")).getAggregate()),
             Counter.longs("test_receiver_out-ElementCount", SUM)
                 .resetToValue(6L),
             Counter.longs("test_receiver_out-MeanByteCount", MEAN)
-                .resetToValue(6, 33L)),
+                .resetMeanToValue(6, 33L)),
         counterSet);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
index 5cc80d65af9f3..da5f410cbf9f0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
@@ -114,17 +114,17 @@ public void testRunPartialGroupByKeyOperation() throws Exception {
         new CounterSet(
             Counter.longs("test-PartialGroupByKeyOperation-start-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-PartialGroupByKeyOperation-start-msecs")).getAggregate(false)),
+                    "test-PartialGroupByKeyOperation-start-msecs")).getAggregate()),
             Counter.longs("test-PartialGroupByKeyOperation-process-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-PartialGroupByKeyOperation-process-msecs")).getAggregate(false)),
+                    "test-PartialGroupByKeyOperation-process-msecs")).getAggregate()),
             Counter.longs("test-PartialGroupByKeyOperation-finish-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-PartialGroupByKeyOperation-finish-msecs")).getAggregate(false)),
+                    "test-PartialGroupByKeyOperation-finish-msecs")).getAggregate()),
             Counter.longs("test_receiver_out-ElementCount", SUM)
                 .resetToValue(3L),
             Counter.longs("test_receiver_out-MeanByteCount", MEAN)
-                .resetToValue(3, 49L)),
+                .resetMeanToValue(3, 49L)),
         counterSet);
   }
 
@@ -197,17 +197,17 @@ public Integer extract(WindowedValue<String> key, Integer accumulator) {
         new CounterSet(
             Counter.longs("test-PartialGroupByKeyOperation-start-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-PartialGroupByKeyOperation-start-msecs")).getAggregate(false)),
+                    "test-PartialGroupByKeyOperation-start-msecs")).getAggregate()),
             Counter.longs("test-PartialGroupByKeyOperation-process-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-PartialGroupByKeyOperation-process-msecs")).getAggregate(false)),
+                    "test-PartialGroupByKeyOperation-process-msecs")).getAggregate()),
             Counter.longs("test-PartialGroupByKeyOperation-finish-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-PartialGroupByKeyOperation-finish-msecs")).getAggregate(false)),
+                    "test-PartialGroupByKeyOperation-finish-msecs")).getAggregate()),
             Counter.longs("test_receiver_out-ElementCount", SUM)
                 .resetToValue(3L),
             Counter.longs("test_receiver_out-MeanByteCount", MEAN)
-                .resetToValue(3, 25L)),
+                .resetMeanToValue(3, 25L)),
         counterSet);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
index bde15eb7293c5..dbb990f92c69d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -73,19 +73,19 @@ public void testRunReadOperation() throws Exception {
             new CounterSet(
                 Counter.longs("ReadOperation-ByteCount", SUM).resetToValue(2L + 5 + 0 + 3),
                 Counter.longs("test_receiver_out-ElementCount", SUM).resetToValue(4L),
-                Counter.longs("test_receiver_out-MeanByteCount", MEAN).resetToValue(4, 10L),
+                Counter.longs("test_receiver_out-MeanByteCount", MEAN).resetMeanToValue(4, 10L),
                 Counter.longs("test-ReadOperation-start-msecs", SUM)
                     .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                       "test-ReadOperation-start-msecs")).getAggregate(false)),
+                                       "test-ReadOperation-start-msecs")).getAggregate()),
                 Counter.longs("test-ReadOperation-read-msecs", SUM)
                     .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                       "test-ReadOperation-read-msecs")).getAggregate(false)),
+                                       "test-ReadOperation-read-msecs")).getAggregate()),
                 Counter.longs("test-ReadOperation-process-msecs", SUM)
                     .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                       "test-ReadOperation-process-msecs")).getAggregate(false)),
+                                       "test-ReadOperation-process-msecs")).getAggregate()),
                 Counter.longs("test-ReadOperation-finish-msecs", SUM)
                     .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                       "test-ReadOperation-finish-msecs")).getAggregate(false))),
+                                       "test-ReadOperation-finish-msecs")).getAggregate())),
             counterSet);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
index b4f12582627c0..0bdbd2592acfe 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
@@ -32,7 +32,7 @@
 public class StateSamplerTest {
   public static long getCounterLongValue(CounterSet counters, String name) {
     Counter<Long> counter = (Counter<Long>) counters.getExistingCounter(name);
-    return counter.getAggregate(false);
+    return counter.getAggregate();
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperationTest.java
index 0da219d3e6f17..bb013afa156e3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperationTest.java
@@ -62,13 +62,13 @@ public void testRunWriteOperation() throws Exception {
                 .resetToValue(2L + 5 + 0 + 3),
             Counter.longs("test-WriteOperation-start-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-WriteOperation-start-msecs")).getAggregate(false)),
+                    "test-WriteOperation-start-msecs")).getAggregate()),
             Counter.longs("test-WriteOperation-process-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-WriteOperation-process-msecs")).getAggregate(false)),
+                    "test-WriteOperation-process-msecs")).getAggregate()),
             Counter.longs("test-WriteOperation-finish-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-WriteOperation-finish-msecs")).getAggregate(false))),
+                    "test-WriteOperation-finish-msecs")).getAggregate())),
         counterSet);
   }
 }

From 86ac202bef83102135001939306ba8fecccb93c8 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Thu, 30 Apr 2015 12:20:57 -0700
Subject: [PATCH 0493/1541] Use Atomic Values in Counter implementations

This increases performance by not requiring locking on each
AddValue call.
----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92480589
---
 .../dataflow/sdk/util/common/Counter.java     | 484 +++++++++++-------
 1 file changed, 288 insertions(+), 196 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
index 9b09cc11d12da..41f8239cb7443 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
@@ -21,8 +21,13 @@
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.OR;
 
 import com.google.common.reflect.TypeToken;
+import com.google.common.util.concurrent.AtomicDouble;
 
 import java.util.Objects;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.AtomicReference;
 
 import javax.annotation.Nullable;
 
@@ -269,12 +274,10 @@ public String toString() {
       case MIN:
       case AND:
       case OR:
-        sb.append(aggregate);
+        sb.append(getAggregate());
         break;
       case MEAN:
-        sb.append(aggregate);
-        sb.append("/");
-        sb.append(count);
+        sb.append(getMean());
         break;
       default:
         throw illegalArgumentException();
@@ -290,19 +293,30 @@ public boolean equals(Object o) {
       return true;
     } else if (o instanceof Counter) {
       Counter<?> that = (Counter<?>) o;
-      return this.name.equals(that.name)
-          && this.kind == that.kind
-          && this.getClass().equals(that.getClass())
-          && this.count == that.count
-          && Objects.equals(this.aggregate, that.aggregate);
-    } else {
-      return false;
+      if (this.name.equals(that.name) && this.kind == that.kind
+          && this.getClass().equals(that.getClass())) {
+        if (kind == MEAN) {
+          CounterMean<T> thisMean = this.getMean();
+          CounterMean<?> thatMean = that.getMean();
+          return thisMean == thatMean
+              || (Objects.equals(thisMean.getAggregate(), thatMean.getAggregate())
+                     && thisMean.getCount() == thatMean.getCount());
+        } else {
+          return Objects.equals(this.getAggregate(), that.getAggregate());
+        }
+      }
     }
+    return false;
   }
 
   @Override
   public int hashCode() {
-    return Objects.hash(getClass(), name, kind, aggregate, count);
+    if (kind == MEAN) {
+      CounterMean<T> mean = getMean();
+      return Objects.hash(getClass(), name, kind, mean.getAggregate(), mean.getCount());
+    } else {
+      return Objects.hash(getClass(), name, kind, getAggregate());
+    }
   }
 
   /**
@@ -324,23 +338,9 @@ public boolean isCompatibleWith(Counter<?> that) {
   /** The kind of aggregation function to apply to this counter. */
   protected final AggregationKind kind;
 
-  /** The total cumulative aggregation value. Holds sum for MEAN aggregation. */
-  protected T aggregate;
-
-  /** The cumulative aggregation value since the last update extraction. */
-  protected T deltaAggregate;
-
-  /** The total number of aggregated values. Useful for MEAN aggregation. */
-  protected long count;
-
-  /** The number of aggregated values since the last update extraction. */
-  protected long deltaCount;
-
   protected Counter(String name, AggregationKind kind) {
     this.name = name;
     this.kind = kind;
-    this.count = 0;
-    this.deltaCount = 0;
   }
 
   //////////////////////////////////////////////////////////////////////////////
@@ -349,20 +349,30 @@ protected Counter(String name, AggregationKind kind) {
    * Implements a {@link Counter} for {@link Long} values.
    */
   private static class LongCounter extends Counter<Long> {
+    private final AtomicLong aggregate;
+    private final AtomicLong deltaAggregate;
+    private final AtomicReference<LongCounterMean> mean;
+    private final AtomicReference<LongCounterMean> deltaMean;
 
     /** Initializes a new {@link Counter} for {@link Long} values. */
     private LongCounter(String name, AggregationKind kind) {
       super(name, kind);
       switch (kind) {
-        case SUM:
         case MEAN:
-          aggregate = deltaAggregate = 0L;
+          mean = new AtomicReference<>();
+          deltaMean = new AtomicReference<>();
+          getAndResetMeanDelta();
+          mean.set(deltaMean.get());
+          aggregate = deltaAggregate = null;
           break;
+        case SUM:
         case MAX:
-          aggregate = deltaAggregate = Long.MIN_VALUE;
-          break;
         case MIN:
-          aggregate = deltaAggregate = Long.MAX_VALUE;
+          aggregate = new AtomicLong();
+          deltaAggregate = new AtomicLong();
+          getAndResetDelta();
+          aggregate.set(deltaAggregate.get());
+          mean = deltaMean = null;
           break;
         default:
           throw illegalArgumentException();
@@ -370,25 +380,23 @@ private LongCounter(String name, AggregationKind kind) {
     }
 
     @Override
-    public synchronized LongCounter addValue(Long value) {
+    public LongCounter addValue(Long value) {
       switch (kind) {
         case SUM:
-          aggregate += value;
-          deltaAggregate += value;
+          aggregate.addAndGet(value);
+          deltaAggregate.addAndGet(value);
           break;
         case MEAN:
-          aggregate += value;
-          deltaAggregate += value;
-          count++;
-          deltaCount++;
+          addToMeanAndSet(value, mean);
+          addToMeanAndSet(value, deltaMean);
           break;
         case MAX:
-          aggregate = Math.max(aggregate, value);
-          deltaAggregate = Math.max(deltaAggregate, value);
+          maxAndSet(value, aggregate);
+          maxAndSet(value, deltaAggregate);
           break;
         case MIN:
-          aggregate = Math.min(aggregate, value);
-          deltaAggregate = Math.min(deltaAggregate, value);
+          minAndSet(value, aggregate);
+          minAndSet(value, deltaAggregate);
           break;
         default:
           throw illegalArgumentException();
@@ -396,75 +404,95 @@ public synchronized LongCounter addValue(Long value) {
       return this;
     }
 
+    private void minAndSet(Long value, AtomicLong target) {
+      long current;
+      long update;
+      do {
+        current = target.get();
+        update = Math.min(value, current);
+      } while (update < current && !target.compareAndSet(current, update));
+    }
+
+    private void maxAndSet(Long value, AtomicLong target) {
+      long current;
+      long update;
+      do {
+        current = target.get();
+        update = Math.max(value, current);
+      } while (update > current && !target.compareAndSet(current, update));
+    }
+
+    private void addToMeanAndSet(Long value, AtomicReference<LongCounterMean> target) {
+      LongCounterMean current;
+      LongCounterMean update;
+      do {
+        current = target.get();
+        update = new LongCounterMean(current.getAggregate() + value, current.getCount() + 1L);
+      } while (!target.compareAndSet(current, update));
+    }
+
     @Override
-    public synchronized Long getAggregate() {
-      return aggregate;
+    public Long getAggregate() {
+      if (kind != MEAN) {
+        return aggregate.get();
+      } else {
+        return getMean().getAggregate();
+      }
     }
 
     @Override
-    public synchronized Long getAndResetDelta() {
-      long oldDelta = deltaAggregate;
+    public Long getAndResetDelta() {
       switch (kind) {
         case SUM:
-          deltaAggregate = 0L;
-          break;
-        case MEAN:
-          deltaAggregate = 0L;
-          deltaCount = 0;
-          break;
+          return deltaAggregate.getAndSet(0L);
         case MAX:
-          deltaAggregate = Long.MIN_VALUE;
-          break;
+          return deltaAggregate.getAndSet(Long.MIN_VALUE);
         case MIN:
-          deltaAggregate = Long.MAX_VALUE;
-          break;
+          return deltaAggregate.getAndSet(Long.MAX_VALUE);
         default:
           throw illegalArgumentException();
       }
-      return oldDelta;
     }
 
     @Override
-    public synchronized Counter<Long> resetToValue(Long value) {
+    public Counter<Long> resetToValue(Long value) {
       if (kind == MEAN) {
         throw illegalArgumentException();
       }
-      aggregate = value;
-      deltaAggregate = value;
+      aggregate.set(value);
+      deltaAggregate.set(value);
       return this;
     }
 
     @Override
-    public synchronized Counter<Long> resetMeanToValue(long elementCount,
-        Long value) {
+    public Counter<Long> resetMeanToValue(long elementCount, Long value) {
       if (kind != MEAN) {
         throw illegalArgumentException();
       }
-      aggregate = value;
-      deltaAggregate = value;
-      count = elementCount;
-      deltaCount = elementCount;
+      if (elementCount < 0) {
+        throw new IllegalArgumentException("elementCount must be non-negative");
+      }
+      LongCounterMean counterMean = new LongCounterMean(value, elementCount);
+      mean.set(counterMean);
+      deltaMean.set(counterMean);
       return this;
     }
 
     @Override
-    public synchronized CounterMean<Long> getAndResetMeanDelta() {
+    public CounterMean<Long> getAndResetMeanDelta() {
       if (kind != MEAN) {
         throw illegalArgumentException();
       }
-      CounterMean<Long> mean = new LongCounterMean(deltaAggregate, deltaCount);
-      deltaAggregate = 0L;
-      deltaCount = 0L;
-      return mean;
+      return deltaMean.getAndSet(new LongCounterMean(0L, 0L));
     }
 
     @Override
     @Nullable
-    public synchronized CounterMean<Long> getMean() {
+    public CounterMean<Long> getMean() {
       if (kind != MEAN) {
         throw illegalArgumentException();
       }
-      return new LongCounterMean(aggregate, count);
+      return mean.get();
     }
 
     private static class LongCounterMean implements CounterMean<Long> {
@@ -485,6 +513,11 @@ public Long getAggregate() {
       public long getCount() {
         return count;
       }
+
+      @Override
+      public String toString() {
+        return aggregate + "/" + count;
+      }
     }
   }
 
@@ -492,20 +525,30 @@ public long getCount() {
    * Implements a {@link Counter} for {@link Double} values.
    */
   private static class DoubleCounter extends Counter<Double> {
+    AtomicDouble aggregate;
+    AtomicDouble deltaAggregate;
+    AtomicReference<DoubleCounterMean> mean;
+    AtomicReference<DoubleCounterMean> deltaMean;
 
     /** Initializes a new {@link Counter} for {@link Double} values. */
     private DoubleCounter(String name, AggregationKind kind) {
       super(name, kind);
       switch (kind) {
-        case SUM:
         case MEAN:
-          aggregate = deltaAggregate = 0.0;
+          aggregate = deltaAggregate = null;
+          mean = new AtomicReference<>();
+          deltaMean = new AtomicReference<>();
+          getAndResetMeanDelta();
+          mean.set(deltaMean.get());
           break;
+        case SUM:
         case MAX:
-          aggregate = deltaAggregate = Double.NEGATIVE_INFINITY;
-          break;
         case MIN:
-          aggregate = deltaAggregate = Double.POSITIVE_INFINITY;
+          mean = deltaMean = null;
+          aggregate = new AtomicDouble();
+          deltaAggregate = new AtomicDouble();
+          getAndResetDelta();
+          aggregate.set(deltaAggregate.get());
           break;
         default:
           throw illegalArgumentException();
@@ -513,25 +556,23 @@ private DoubleCounter(String name, AggregationKind kind) {
     }
 
     @Override
-    public synchronized DoubleCounter addValue(Double value) {
+    public DoubleCounter addValue(Double value) {
       switch (kind) {
         case SUM:
-          aggregate += value;
-          deltaAggregate += value;
+          aggregate.addAndGet(value);
+          deltaAggregate.addAndGet(value);
           break;
         case MEAN:
-          aggregate += value;
-          deltaAggregate += value;
-          count++;
-          deltaCount++;
+          addToMeanAndSet(value, mean);
+          addToMeanAndSet(value, deltaMean);
           break;
         case MAX:
-          aggregate = Math.max(aggregate, value);
-          deltaAggregate = Math.max(deltaAggregate, value);
+          maxAndSet(value, aggregate);
+          maxAndSet(value, deltaAggregate);
           break;
         case MIN:
-          aggregate = Math.min(aggregate, value);
-          deltaAggregate = Math.min(deltaAggregate, value);
+          minAndSet(value, aggregate);
+          minAndSet(value, deltaAggregate);
           break;
         default:
           throw illegalArgumentException();
@@ -539,76 +580,95 @@ public synchronized DoubleCounter addValue(Double value) {
       return this;
     }
 
+    private void addToMeanAndSet(Double value, AtomicReference<DoubleCounterMean> target) {
+      DoubleCounterMean current;
+      DoubleCounterMean update;
+      do {
+        current = target.get();
+        update = new DoubleCounterMean(current.getAggregate() + value, current.getCount() + 1);
+      } while (!target.compareAndSet(current, update));
+    }
+
+    private void maxAndSet(Double value, AtomicDouble target) {
+      double current;
+      double update;
+      do {
+        current = target.get();
+        update = Math.max(current, value);
+      } while (update > current && !target.compareAndSet(current, update));
+    }
+
+    private void minAndSet(Double value, AtomicDouble target) {
+      double current;
+      double update;
+      do {
+        current = target.get();
+        update = Math.min(current, value);
+      } while (update < current && !target.compareAndSet(current, update));
+    }
+
     @Override
-    public synchronized Double getAndResetDelta() {
-      double oldDelta = deltaAggregate;
+    public Double getAndResetDelta() {
       switch (kind) {
         case SUM:
-          deltaAggregate = 0.0;
-          break;
-        case MEAN:
-          deltaAggregate = 0.0;
-          deltaCount = 0;
-          break;
+          return deltaAggregate.getAndSet(0.0);
         case MAX:
-          deltaAggregate = Double.NEGATIVE_INFINITY;
-          break;
+          return deltaAggregate.getAndSet(Double.NEGATIVE_INFINITY);
         case MIN:
-          deltaAggregate = Double.POSITIVE_INFINITY;
-          break;
+          return deltaAggregate.getAndSet(Double.POSITIVE_INFINITY);
         default:
           throw illegalArgumentException();
       }
-      return oldDelta;
     }
 
     @Override
-    public synchronized Counter<Double> resetToValue(Double value) {
+    public Counter<Double> resetToValue(Double value) {
       if (kind == MEAN) {
         throw illegalArgumentException();
       }
-      aggregate = value;
-      deltaAggregate = value;
+      aggregate.set(value);
+      deltaAggregate.set(value);
       return this;
     }
 
     @Override
-    public synchronized Counter<Double> resetMeanToValue(long elementCount,
-        Double value) {
+    public Counter<Double> resetMeanToValue(long elementCount, Double value) {
       if (kind != MEAN) {
         throw illegalArgumentException();
       }
       if (elementCount < 0) {
         throw new IllegalArgumentException("elementCount must be non-negative");
       }
-      aggregate = value;
-      deltaAggregate = value;
-      count = elementCount;
-      deltaCount = elementCount;
+      DoubleCounterMean counterMean = new DoubleCounterMean(value, elementCount);
+      mean.set(counterMean);
+      deltaMean.set(counterMean);
       return this;
     }
 
     @Override
-    public synchronized CounterMean<Double> getAndResetMeanDelta() {
+    public CounterMean<Double> getAndResetMeanDelta() {
       if (kind != MEAN) {
         throw illegalArgumentException();
       }
-      CounterMean<Double> mean =
-          new DoubleCounterMean(deltaAggregate, deltaCount);
-      deltaAggregate = 0.0;
-      deltaCount = 0L;
-      return mean;
+      return deltaMean.getAndSet(new DoubleCounterMean(0.0, 0L));
     }
 
     @Override
-    public synchronized Double getAggregate() {
-      return aggregate;
+    public Double getAggregate() {
+      if (kind != MEAN) {
+        return aggregate.get();
+      } else {
+        return getMean().getAggregate();
+      }
     }
 
     @Override
     @Nullable
-    public synchronized CounterMean<Double> getMean() {
-      return new DoubleCounterMean(aggregate, count);
+    public CounterMean<Double> getMean() {
+      if (kind != MEAN) {
+        throw illegalArgumentException();
+      }
+      return mean.get();
     }
 
     private static class DoubleCounterMean implements CounterMean<Double> {
@@ -629,6 +689,11 @@ public Double getAggregate() {
       public long getCount() {
         return count;
       }
+
+      @Override
+      public String toString() {
+        return aggregate + "/" + count;
+      }
     }
   }
 
@@ -636,51 +701,46 @@ public long getCount() {
    * Implements a {@link Counter} for {@link Boolean} values.
    */
   private static class BooleanCounter extends Counter<Boolean> {
+    private final AtomicBoolean aggregate;
+    private final AtomicBoolean deltaAggregate;
 
     /** Initializes a new {@link Counter} for {@link Boolean} values. */
     private BooleanCounter(String name, AggregationKind kind) {
       super(name, kind);
-      if (kind.equals(AND)) {
-        aggregate = deltaAggregate = true;
-      } else if (kind.equals(OR)) {
-        aggregate = deltaAggregate = false;
-      } else {
-        throw illegalArgumentException();
-      }
+      aggregate = new AtomicBoolean();
+      deltaAggregate = new AtomicBoolean();
+      getAndResetDelta();
+      aggregate.set(deltaAggregate.get());
     }
 
     @Override
-    public synchronized BooleanCounter addValue(Boolean value) {
-      if (kind.equals(AND)) {
-        aggregate &= value;
-        deltaAggregate &= value;
-      } else { // kind.equals(OR))
-        aggregate |= value;
-        deltaAggregate |= value;
+    public BooleanCounter addValue(Boolean value) {
+      if (kind.equals(AND) && !value) {
+        aggregate.set(value);
+        deltaAggregate.set(value);
+      } else if (kind.equals(OR) && value) {
+        aggregate.set(value);
+        deltaAggregate.set(value);
       }
       return this;
     }
 
     @Override
-    public synchronized Boolean getAndResetDelta() {
-      boolean delta = deltaAggregate;
+    public Boolean getAndResetDelta() {
       switch (kind) {
         case AND:
-          deltaAggregate = true;
-          break;
+          return deltaAggregate.getAndSet(true);
         case OR:
-          deltaAggregate = false;
-          break;
+          return deltaAggregate.getAndSet(false);
         default:
           throw illegalArgumentException();
       }
-      return delta;
     }
 
     @Override
-    public synchronized Counter<Boolean> resetToValue(Boolean value) {
-      aggregate = value;
-      deltaAggregate = value;
+    public Counter<Boolean> resetToValue(Boolean value) {
+      aggregate.set(value);
+      deltaAggregate.set(value);
       return this;
     }
 
@@ -690,14 +750,13 @@ public Counter<Boolean> resetMeanToValue(long elementCount, Boolean value) {
     }
 
     @Override
-    public com.google.cloud.dataflow.sdk.util.common.Counter.CounterMean<
-        Boolean> getAndResetMeanDelta() {
+    public CounterMean<Boolean> getAndResetMeanDelta() {
       throw illegalArgumentException();
     }
 
     @Override
-    public synchronized Boolean getAggregate() {
-      return aggregate;
+    public Boolean getAggregate() {
+      return aggregate.get();
     }
 
     @Override
@@ -720,7 +779,7 @@ private StringCounter(String name, AggregationKind kind) {
     }
 
     @Override
-    public synchronized StringCounter addValue(String value) {
+    public StringCounter addValue(String value) {
       switch (kind) {
         default:
           throw illegalArgumentException();
@@ -781,20 +840,30 @@ public CounterMean<String> getMean() {
    * Implements a {@link Counter} for {@link Integer} values.
    */
   private static class IntegerCounter extends Counter<Integer> {
+    private final AtomicInteger aggregate;
+    private final AtomicInteger deltaAggregate;
+    private final AtomicReference<IntegerCounterMean> mean;
+    private final AtomicReference<IntegerCounterMean> deltaMean;
 
     /** Initializes a new {@link Counter} for {@link Integer} values. */
     private IntegerCounter(String name, AggregationKind kind) {
       super(name, kind);
       switch (kind) {
-        case SUM:
         case MEAN:
-          aggregate = deltaAggregate = 0;
+          aggregate = deltaAggregate = null;
+          mean = new AtomicReference<>();
+          deltaMean = new AtomicReference<>();
+          getAndResetMeanDelta();
+          mean.set(deltaMean.get());
           break;
+        case SUM:
         case MAX:
-          aggregate = deltaAggregate = Integer.MIN_VALUE;
-          break;
         case MIN:
-          aggregate = deltaAggregate = Integer.MAX_VALUE;
+          mean = deltaMean = null;
+          aggregate = new AtomicInteger();
+          deltaAggregate = new AtomicInteger();
+          getAndResetDelta();
+          aggregate.set(deltaAggregate.get());
           break;
         default:
           throw illegalArgumentException();
@@ -802,25 +871,23 @@ private IntegerCounter(String name, AggregationKind kind) {
     }
 
     @Override
-    public synchronized IntegerCounter addValue(Integer value) {
+    public IntegerCounter addValue(Integer value) {
       switch (kind) {
         case SUM:
-          aggregate += value;
-          deltaAggregate += value;
+          aggregate.getAndAdd(value);
+          deltaAggregate.getAndAdd(value);
           break;
         case MEAN:
-          aggregate += value;
-          deltaAggregate += value;
-          count++;
-          deltaCount++;
+          addToMeanAndSet(value, mean);
+          addToMeanAndSet(value, deltaMean);
           break;
         case MAX:
-          aggregate = Math.max(aggregate, value);
-          deltaAggregate = Math.max(deltaAggregate, value);
+          maxAndSet(value, aggregate);
+          maxAndSet(value, deltaAggregate);
           break;
         case MIN:
-          aggregate = Math.min(aggregate, value);
-          deltaAggregate = Math.min(deltaAggregate, value);
+          minAndSet(value, aggregate);
+          minAndSet(value, deltaAggregate);
           break;
         default:
           throw illegalArgumentException();
@@ -828,36 +895,54 @@ public synchronized IntegerCounter addValue(Integer value) {
       return this;
     }
 
+    private void addToMeanAndSet(int value, AtomicReference<IntegerCounterMean> target) {
+      IntegerCounterMean current;
+      IntegerCounterMean update;
+      do {
+        current = target.get();
+        update = new IntegerCounterMean(current.getAggregate() + value, current.getCount() + 1);
+      } while (!target.compareAndSet(current, update));
+    }
+
+    private void maxAndSet(int value, AtomicInteger target) {
+      int current;
+      int update;
+      do {
+        current = target.get();
+        update = Math.max(value, current);
+      } while (update > current && !target.compareAndSet(current, update));
+    }
+
+    private void minAndSet(int value, AtomicInteger target) {
+      int current;
+      int update;
+      do {
+        current = target.get();
+        update = Math.min(value, current);
+      } while (update < current && !target.compareAndSet(current, update));
+    }
+
     @Override
-    public synchronized Integer getAndResetDelta() {
-      int delta = deltaAggregate;
+    public Integer getAndResetDelta() {
       switch (kind) {
         case SUM:
-          deltaAggregate = 0;
-          break;
-        case MEAN:
-          deltaAggregate = 0;
-          deltaCount = 0;
-          break;
+          return deltaAggregate.getAndSet(0);
         case MAX:
-          deltaAggregate = Integer.MIN_VALUE;
-          break;
+          return deltaAggregate.getAndSet(Integer.MIN_VALUE);
         case MIN:
-          deltaAggregate = Integer.MAX_VALUE;
-          break;
+          return deltaAggregate.getAndSet(Integer.MAX_VALUE);
         default:
           throw illegalArgumentException();
       }
-      return delta;
     }
 
     @Override
-    public synchronized Counter<Integer> resetToValue(Integer value) {
+    public Counter<Integer> resetToValue(Integer value) {
       if (kind == MEAN) {
         throw illegalArgumentException();
       }
-      aggregate = value;
-      deltaAggregate = value;
+      aggregate.set(value);
+      deltaAggregate.set(value);
       return this;
     }
 
@@ -869,34 +954,36 @@ public Counter<Integer> resetMeanToValue(long elementCount, Integer value) {
       if (elementCount < 0) {
         throw new IllegalArgumentException("elementCount must be non-negative");
       }
-      aggregate = value;
-      deltaAggregate = value;
-      count = value;
-      deltaCount = value;
+      IntegerCounterMean counterMean = new IntegerCounterMean(value, elementCount);
+      mean.set(counterMean);
+      deltaMean.set(counterMean);
       return this;
     }
 
     @Override
-    public synchronized CounterMean<Integer> getAndResetMeanDelta() {
+    public CounterMean<Integer> getAndResetMeanDelta() {
       if (kind != MEAN) {
         throw illegalArgumentException();
       }
-      CounterMean<Integer> mean =
-          new IntegerCounterMean(deltaAggregate, deltaCount);
-      deltaAggregate = 0;
-      deltaCount = 0L;
-      return mean;
+      return deltaMean.getAndSet(new IntegerCounterMean(0, 0L));
     }
 
     @Override
-    public synchronized Integer getAggregate() {
-      return aggregate;
+    public Integer getAggregate() {
+      if (kind != MEAN) {
+        return aggregate.get();
+      } else {
+        return getMean().getAggregate();
+      }
     }
 
     @Override
     @Nullable
-    public synchronized CounterMean<Integer> getMean() {
-      return new IntegerCounterMean(aggregate, count);
+    public CounterMean<Integer> getMean() {
+      if (kind != MEAN) {
+        throw illegalArgumentException();
+      }
+      return mean.get();
     }
 
     private static class IntegerCounterMean implements CounterMean<Integer> {
@@ -917,6 +1004,11 @@ public Integer getAggregate() {
       public long getCount() {
         return count;
       }
+
+      @Override
+      public String toString() {
+        return aggregate + "/" + count;
+      }
     }
   }
 

From 3e962d08c0cebc4a568693bf8861a20d6ca7c7c6 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Thu, 30 Apr 2015 13:20:41 -0700
Subject: [PATCH 0494/1541] Suppress Unused warning on Counter#strings

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92485929
---
 .../java/com/google/cloud/dataflow/sdk/util/common/Counter.java  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
index 41f8239cb7443..0d4826d73d69c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
@@ -161,6 +161,7 @@ public static Counter<Boolean> booleans(String name, AggregationKind kind) {
    * @return the newly constructed Counter
    * @throws IllegalArgumentException if the aggregation kind is not supported
    */
+  @SuppressWarnings("unused")
   private static Counter<String> strings(String name, AggregationKind kind) {
     return new StringCounter(name, kind);
   }

From 753e4bec877f4e18d458e23f2dded261c374eb27 Mon Sep 17 00:00:00 2001
From: tudorm <tudorm@google.com>
Date: Sat, 7 Mar 2015 13:37:11 -0800
Subject: [PATCH 0495/1541] Updates SDK to configure the boot VM disk type.
 ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=92487601

---
 .../cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 444076edb0e8c..ed342c1b13663 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -398,6 +398,7 @@ public Job translate(List<DataflowPackage> packages) {
         job.setType("JOB_TYPE_STREAMING");
       } else {
         job.setType("JOB_TYPE_BATCH");
+        workerPool.setDiskType(options.getWorkerDiskType());
       }
 
       if (options.getWorkerMachineType() != null) {

From b0a42003a69fb0fda5a3bf3b23e5840ebf764f6c Mon Sep 17 00:00:00 2001
From: zhouyunqing <zhouyunqing@google.com>
Date: Thu, 30 Apr 2015 13:37:52 -0700
Subject: [PATCH 0496/1541] Fast coder encoding by avoiding byte copying.

----Release Notes----
Fast coder encoding by avoiding byte copying.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92487668
---
 .../dataflow/sdk/coders/ByteArrayCoder.java   |   9 +-
 .../dataflow/sdk/coders/StringUtf8Coder.java  |   8 +-
 .../cloud/dataflow/sdk/util/CoderUtils.java   |  12 +-
 .../util/ExposedByteArrayOutputStream.java    | 115 ++++++++
 .../cloud/dataflow/sdk/util/StreamUtils.java  |   6 +-
 .../ExposedByteArrayOutputStreamTest.java     | 245 ++++++++++++++++++
 6 files changed, 382 insertions(+), 13 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayOutputStream.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayOutputStreamTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
index 32744f3bd88fe..915a656ee1461 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.util.ExposedByteArrayOutputStream;
 import com.google.cloud.dataflow.sdk.util.StreamUtils;
 import com.google.cloud.dataflow.sdk.util.VarInt;
 import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.StructuralByteArray;
@@ -55,8 +56,14 @@ public void encode(byte[] value, OutputStream outStream, Context context)
     }
     if (!context.isWholeStream) {
       VarInt.encode(value.length, outStream);
+      outStream.write(value);
+    } else {
+      if (outStream instanceof ExposedByteArrayOutputStream) {
+        ((ExposedByteArrayOutputStream) outStream).writeAndOwn(value);
+      } else {
+        outStream.write(value);
+      }
     }
-    outStream.write(value);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
index fd66cd5329367..589af6e1c01ea 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.util.ExposedByteArrayOutputStream;
 import com.google.cloud.dataflow.sdk.util.StreamUtils;
 import com.google.cloud.dataflow.sdk.util.VarInt;
 
@@ -80,7 +81,12 @@ public void encode(String value, OutputStream outStream, Context context)
       throw new CoderException("cannot encode a null String");
     }
     if (context.isWholeStream) {
-      outStream.write(value.getBytes(Singletons.UTF8));
+      byte[] bytes = value.getBytes(Singletons.UTF8);
+      if (outStream instanceof ExposedByteArrayOutputStream) {
+        ((ExposedByteArrayOutputStream) outStream).writeAndOwn(bytes);
+      } else {
+        outStream.write(bytes);
+      }
     } else {
       writeString(value, new DataOutputStream(outStream));
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
index 1c4c7916f453b..fc8aef70e9b9d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
@@ -59,7 +59,7 @@ private CoderUtils() {}  // Non-instantiable
    */
   public static final String KIND_STREAM = "kind:stream";
 
-  private static ThreadLocal<SoftReference<ByteArrayOutputStream>> threadLocalOutputStream
+  private static ThreadLocal<SoftReference<ExposedByteArrayOutputStream>> threadLocalOutputStream
       = new ThreadLocal<>();
 
   /**
@@ -74,13 +74,11 @@ public static <T> byte[] encodeToByteArray(Coder<T> coder, T value) throws Coder
   public static <T> byte[] encodeToByteArray(Coder<T> coder, T value, Coder.Context context)
       throws CoderException {
     try {
-      ByteArrayOutputStream stream;
-      SoftReference<ByteArrayOutputStream> refStream = threadLocalOutputStream.get();
-      if (refStream == null) {
-        stream = new ByteArrayOutputStream();
+      SoftReference<ExposedByteArrayOutputStream> refStream = threadLocalOutputStream.get();
+      ExposedByteArrayOutputStream stream = refStream == null ? null : refStream.get();
+      if (stream == null) {
+        stream = new ExposedByteArrayOutputStream();
         threadLocalOutputStream.set(new SoftReference<>(stream));
-      } else {
-        stream = refStream.get();
       }
       stream.reset();
       coder.encode(value, stream, context);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayOutputStream.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayOutputStream.java
new file mode 100644
index 0000000000000..d8e4d50714b5b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayOutputStream.java
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+/**
+ * {@link ByteArrayOutputStream} special cased to treat writes of a single byte-array specially.
+ * When calling {@link #toByteArray()} after writing only one {@code byte[]} using
+ * {@link #writeAndOwn(byte[])}, it will return that array directly.
+ */
+public class ExposedByteArrayOutputStream extends ByteArrayOutputStream {
+
+  private byte[] swappedBuffer;
+
+  /**
+   * If true, this stream doesn't allow direct access to the passed in byte-array. It behaves just
+   * like a normal {@link ByteArrayOutputStream}.
+   *
+   * <p>It is set to true after any write operations other than the first call to
+   * {@link #writeAndOwn(byte[])}.
+   */
+  private boolean isFallback = false;
+
+  /**
+   * Fall back to the behavior of a normal {@link ByteArrayOutputStream}.
+   */
+  private void fallback() {
+    isFallback = true;
+    if (swappedBuffer != null) {
+      // swappedBuffer != null means buf is actually provided by the caller of writeAndOwn(),
+      // while swappedBuffer is the original buffer.
+      // Recover the buffer and copy the bytes from buf.
+      byte[] tempBuffer = buf;
+      count = 0;
+      buf = swappedBuffer;
+      super.write(tempBuffer, 0, tempBuffer.length);
+      swappedBuffer = null;
+    }
+  }
+
+  /**
+   * Write {@code b} to the stream and take the ownership of {@code b}.
+   * If the stream is empty, {@code b} itself will be used as the content of the stream and
+   * no content copy will be involved.
+   * <p><i>Note: After passing any byte array to this method, it must not be modified again.</i>
+   *
+   * @throws IOException
+   */
+  public void writeAndOwn(byte[] b) throws IOException {
+    if (b.length == 0) {
+      return;
+    }
+    if (count == 0) {
+      // Optimized first-time whole write.
+      // The original buffer will be swapped to swappedBuffer, while the input b is used as buf.
+      swappedBuffer = buf;
+      buf = b;
+      count = b.length;
+    } else {
+      fallback();
+      super.write(b);
+    }
+  }
+
+  @Override
+  public void write(byte[] b, int off, int len) {
+    fallback();
+    super.write(b, off, len);
+  }
+
+  @Override
+  public void write(int b) {
+    fallback();
+    super.write(b);
+  }
+
+  @Override
+  public byte[] toByteArray() {
+    // Note: count == buf.length is not a correct criteria to "return buf;", because the internal
+    // buf may be reused after reset().
+    if (!isFallback && count > 0) {
+      return buf;
+    } else {
+      return super.toByteArray();
+    }
+  }
+
+  @Override
+  public void reset() {
+    if (count == 0) {
+      return;
+    }
+    count = 0;
+    if (isFallback) {
+      isFallback = false;
+    } else {
+      buf = swappedBuffer;
+      swappedBuffer = null;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamUtils.java
index a6a665cb2cc44..268eb7fe4e9cd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamUtils.java
@@ -48,13 +48,11 @@ public static byte[] getBytes(InputStream stream) throws IOException {
       return ret;
     }
     // Falls back to normal stream copying.
-    byte[] buffer;
     SoftReference<byte[]> refBuffer = threadLocalBuffer.get();
-    if (refBuffer == null) {
+    byte[] buffer = refBuffer == null ? null : refBuffer.get();
+    if (buffer == null) {
       buffer = new byte[BUF_SIZE];
       threadLocalBuffer.set(new SoftReference<byte[]>(buffer));
-    } else {
-      buffer = refBuffer.get();
     }
     ByteArrayOutputStream outStream = new ByteArrayOutputStream();
     while (true) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayOutputStreamTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayOutputStreamTest.java
new file mode 100644
index 0000000000000..f40f0508dbb50
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayOutputStreamTest.java
@@ -0,0 +1,245 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertSame;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+/** Unit tests for {@link ExposedByteArrayOutputStream}. */
+@RunWith(JUnit4.class)
+public class ExposedByteArrayOutputStreamTest {
+
+  private static final byte[] TEST_DATA = "Hello World!".getBytes();
+
+  private ExposedByteArrayOutputStream exposedStream = new ExposedByteArrayOutputStream();
+  private ByteArrayOutputStream stream = new ByteArrayOutputStream();
+
+  @Test
+  public void testNoWrite() {
+    assertStreamContentsEquals(stream, exposedStream);
+  }
+
+  @Test
+  public void testWriteZeroLengthArray() throws IOException {
+    writeToBoth(new byte[0]);
+    assertStreamContentsEquals(stream, exposedStream);
+  }
+
+  @Test
+  public void testWriteZeroLengthArrayWithOffset(){
+    writeToBoth(new byte[0], 0, 0);
+    assertStreamContentsEquals(stream, exposedStream);
+  }
+
+  @Test
+  public void testWriteSingleByte() {
+    writeToBoth(32);
+    assertStreamContentsEquals(stream, exposedStream);
+  }
+
+  @Test
+  public void testWriteSingleByteTwice() {
+    writeToBoth(32);
+    writeToBoth(32);
+    assertStreamContentsEquals(stream, exposedStream);
+  }
+
+  @Test
+  public void testWriteSingleArray() throws IOException {
+    writeToBoth(TEST_DATA);
+    assertStreamContentsEquals(stream, exposedStream);
+    assertNotSame(TEST_DATA, exposedStream.toByteArray());
+  }
+
+  @Test
+  public void testWriteSingleArrayFast() throws IOException {
+    writeToBothFast(TEST_DATA);
+    assertStreamContentsEquals(stream, exposedStream);
+    assertSame(TEST_DATA, exposedStream.toByteArray());
+  }
+
+
+  @Test
+  public void testWriteSingleArrayTwice() throws IOException {
+    writeToBoth(TEST_DATA);
+    writeToBoth(TEST_DATA);
+    assertStreamContentsEquals(stream, exposedStream);
+  }
+
+  @Test
+  public void testWriteSingleArrayTwiceFast() throws IOException {
+    writeToBothFast(TEST_DATA);
+    writeToBothFast(TEST_DATA);
+    assertStreamContentsEquals(stream, exposedStream);
+  }
+
+  @Test
+  public void testWriteSingleArrayTwiceFast1() throws IOException {
+    writeToBothFast(TEST_DATA);
+    writeToBoth(TEST_DATA);
+    assertStreamContentsEquals(stream, exposedStream);
+  }
+
+  @Test
+  public void testWriteSingleArrayTwiceFast2() throws IOException {
+    writeToBoth(TEST_DATA);
+    writeToBothFast(TEST_DATA);
+    assertStreamContentsEquals(stream, exposedStream);
+  }
+
+  @Test
+  public void testWriteSingleArrayWithLength() {
+    writeToBoth(TEST_DATA, 0, TEST_DATA.length);
+    assertStreamContentsEquals(stream, exposedStream);
+    assertNotSame(TEST_DATA, exposedStream.toByteArray());
+  }
+
+  @Test
+  public void testWritePartial() {
+    writeToBoth(TEST_DATA, 0, TEST_DATA.length - 1);
+    assertStreamContentsEquals(stream, exposedStream);
+  }
+
+  @Test
+  public void testWritePartialWithNonZeroBegin() {
+    writeToBoth(TEST_DATA, 1, TEST_DATA.length - 1);
+    assertStreamContentsEquals(stream, exposedStream);
+  }
+
+  @Test
+  public void testWriteByteAfterWriteArrayFast() throws IOException {
+    writeToBothFast(TEST_DATA);
+    writeToBoth(32);
+    assertStreamContentsEquals(stream, exposedStream);
+  }
+
+  @Test
+  public void testWriteArrayFastAfterByte() throws IOException {
+    writeToBoth(32);
+    writeToBothFast(TEST_DATA);
+    assertStreamContentsEquals(stream, exposedStream);
+  }
+
+  @Test
+  public void testResetAfterWriteFast() throws IOException {
+    writeToBothFast(TEST_DATA);
+    resetBoth();
+    assertStreamContentsEquals(stream, exposedStream);
+  }
+
+  @Test
+  public void testWriteArrayFastAfterReset() throws IOException {
+    writeToBothFast(TEST_DATA);
+    resetBoth();
+    writeToBothFast(TEST_DATA);
+    assertStreamContentsEquals(stream, exposedStream);
+    assertSame(TEST_DATA, exposedStream.toByteArray());
+  }
+
+  @Test
+  public void testWriteArrayFastAfterReset1() throws IOException {
+    writeToBothFast(TEST_DATA);
+    writeToBothFast(TEST_DATA);
+    resetBoth();
+    writeToBothFast(TEST_DATA);
+    assertStreamContentsEquals(stream, exposedStream);
+    assertSame(TEST_DATA, exposedStream.toByteArray());
+  }
+
+  @Test
+  public void testWriteArrayFastAfterReset2() throws IOException {
+    resetBoth();
+    writeToBothFast(TEST_DATA);
+    assertStreamContentsEquals(stream, exposedStream);
+    assertSame(TEST_DATA, exposedStream.toByteArray());
+  }
+
+  @Test
+  public void testWriteArrayFastTwiceAfterReset() throws IOException {
+    writeToBothFast(TEST_DATA);
+    resetBoth();
+    writeToBothFast(TEST_DATA);
+    writeToBothFast(TEST_DATA);
+    assertStreamContentsEquals(stream, exposedStream);
+  }
+
+  @Test
+  public void testWriteArrayFastTwiceAfterReset1() throws IOException {
+    writeToBothFast(TEST_DATA);
+    writeToBothFast(TEST_DATA);
+    resetBoth();
+    writeToBothFast(TEST_DATA);
+    writeToBothFast(TEST_DATA);
+    assertStreamContentsEquals(stream, exposedStream);
+  }
+
+  @Test
+  public void testWriteByteAfterReset() {
+    writeToBoth(32);
+    resetBoth();
+    writeToBoth(32);
+    assertStreamContentsEquals(stream, exposedStream);
+  }
+
+  @Test
+  public void testWriteByteAfterReset1() {
+    resetBoth();
+    writeToBoth(32);
+    assertStreamContentsEquals(stream, exposedStream);
+  }
+
+  private void assertStreamContentsEquals(
+      ByteArrayOutputStream stream1, ByteArrayOutputStream stream2) {
+    assertArrayEquals(stream1.toByteArray(), stream2.toByteArray());
+    assertEquals(stream1.toString(), stream2.toString());
+    assertEquals(stream1.size(), stream2.size());
+  }
+
+  private void writeToBoth(int b) {
+    exposedStream.write(b);
+    stream.write(b);
+  }
+
+  private void writeToBoth(byte[] b) throws IOException {
+    exposedStream.write(b);
+    stream.write(b);
+  }
+
+  private void writeToBothFast(byte[] b) throws IOException {
+    exposedStream.writeAndOwn(b);
+    stream.write(b);
+  }
+
+  private void writeToBoth(byte[] b, int off, int length) {
+    exposedStream.write(b, off, length);
+    stream.write(b, off, length);
+  }
+
+  private void resetBoth() {
+    exposedStream.reset();
+    stream.reset();
+  }
+
+}

From 23ae128165c36e196df9bfe9ee472ef58e21c490 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 30 Apr 2015 14:16:09 -0700
Subject: [PATCH 0497/1541] Allow for coder inference of lists of null or
 singular null values for Create.of(...) ----Release Notes---- []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=92492030

---
 .../dataflow/sdk/coders/CoderRegistry.java    |  2 +-
 .../cloud/dataflow/sdk/transforms/Create.java | 44 ++++++++-----------
 .../sdk/coders/CoderRegistryTest.java         | 14 ++++++
 .../dataflow/sdk/transforms/CreateTest.java   | 30 ++++++++++++-
 4 files changed, 62 insertions(+), 28 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 326b8bc4f1eee..3916f85554b24 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -228,7 +228,7 @@ public <T, O> Coder<O> getDefaultCoder(
    * more than one coder matches
    */
   public <T> Coder<T> getDefaultCoder(T exampleValue) throws CannotProvideCoderException {
-    Class<?> clazz = exampleValue.getClass();
+    Class<?> clazz = exampleValue == null ? Void.class : exampleValue.getClass();
 
     if (clazz.getTypeParameters().length == 0) {
       // Trust that getDefaultCoder returns a valid
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index 2434d8910077f..2517b150d2d9a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -20,14 +20,11 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -199,16 +196,7 @@ public PCollection<T> apply(PInput input) {
 
   public PCollection<T> applyHelper(PInput input, boolean isStreaming) {
     if (isStreaming) {
-      @SuppressWarnings("unchecked")
-      Coder<T> elemCoder;
-      try {
-        elemCoder = (Coder<T>) getElementCoder(input.getPipeline().getCoderRegistry());
-      } catch (CannotProvideCoderException exc) {
-        throw new IllegalArgumentException(
-            "Failed to apply Create: could not determine element coder");
-      }
-
-      return Pipeline.applyTransform(
+      PCollection<T> output = Pipeline.applyTransform(
           input, PubsubIO.Read.named("StartingSignal").subscription("_starting_signal/"))
           .apply(ParDo.of(new DoFn<String, KV<Void, Void>>() {
             private static final long serialVersionUID = 0;
@@ -219,7 +207,19 @@ public void processElement(DoFn<String, KV<Void, Void>>.ProcessContext c)
               c.output(KV.of((Void) null, (Void) null));
             }
           }))
-          .apply(ParDo.of(new OutputOnceDoFn<>(elems, elemCoder)));
+          .apply(ParDo.of(new OutputOnceDoFn<>(elems)));
+
+      // Best effort attempt to set the coder for the user on the output of the
+      // "Create". ParDo has a different way in which it attempts to get
+      // the coder which doesn't take a look at the elements.
+      try {
+        @SuppressWarnings("unchecked")
+        Coder<T> coder = (Coder<T>) getDefaultOutputCoder(input);
+        output.setCoder(coder);
+      } catch (CannotProvideCoderException expected) {
+        // The user will need to specify a coder.
+      }
+      return output;
     } else {
       return PCollection.<T>createPrimitiveOutputInternal(
           input.getPipeline(),
@@ -233,24 +233,16 @@ private static class OutputOnceDoFn<T> extends DoFn<KV<Void, Void>, T>
 
     private final CodedTupleTag<String> outputOnceTag =
         CodedTupleTag.of("outputOnce", StringUtf8Coder.of());
-    private final byte[] encodedBytes;
-    private final IterableCoder<T> iterableCoder;
+    private final Iterable<T> elems;
 
-    public OutputOnceDoFn(Iterable<T> elems, Coder<T> coder) {
-      this.iterableCoder = IterableCoder.of(coder);
-      try {
-        this.encodedBytes = CoderUtils.encodeToByteArray(iterableCoder, elems);
-      } catch (CoderException e) {
-        throw new IllegalArgumentException(
-            "Unable to encode element '" + elems + "' using coder '" + coder + "'.", e);
-      }
+    public OutputOnceDoFn(Iterable<T> elems) {
+      this.elems = elems;
     }
 
     @Override
     public void processElement(ProcessContext c) throws IOException {
       String state = c.keyedState().lookup(outputOnceTag);
       if (state == null || state.isEmpty()) {
-        Iterable<T> elems = CoderUtils.decodeFromByteArray(iterableCoder, encodedBytes);
         for (T t : elems) {
           c.output(t);
         }
@@ -282,7 +274,7 @@ private Coder<?> getElementCoder(CoderRegistry coderRegistry) throws CannotProvi
     // First try to deduce a coder using the types of the elements.
     Class<?> elementClazz = null;
     for (T elem : elems) {
-      Class<?> clazz = elem.getClass();
+      Class<?> clazz = elem == null ? Void.class : elem.getClass();
       if (elementClazz == null) {
         elementClazz = clazz;
       } else if (!elementClazz.equals(clazz)) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index c5d30b22b4a13..19269cdda71e5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -187,6 +187,12 @@ public void testGetDefaultCoderFromIntegerValue() throws Exception {
     assertEquals(VarIntCoder.of(), coder);
   }
 
+  @Test
+  public void testGetDefaultCoderFromNullValue() throws Exception {
+    CoderRegistry registry = getStandardRegistry();
+    assertEquals(VoidCoder.of(), registry.getDefaultCoder((Void) null));
+  }
+
   @Test
   public void testGetDefaultCoderFromKvValue() throws Exception {
     CoderRegistry registry = getStandardRegistry();
@@ -196,6 +202,14 @@ public void testGetDefaultCoderFromKvValue() throws Exception {
         coder);
   }
 
+  @Test
+  public void testGetDefaultCoderFromKvNullValue() throws Exception {
+    CoderRegistry registry = getStandardRegistry();
+    KV<Void, Void> kv = KV.of((Void) null, (Void) null);
+    assertEquals(KvCoder.of(VoidCoder.of(), VoidCoder.of()),
+        registry.getDefaultCoder(kv));
+  }
+
   @Test
   public void testGetDefaultCoderFromNestedKvValue() throws Exception {
     CoderRegistry registry = getStandardRegistry();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
index b4c7b2b8c86f4..6d4f854a8d1cc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
@@ -169,8 +170,35 @@ public void testCreateTimestampedPolymorphicType() throws Exception {
 
     p.run();
 
-
     throw new RuntimeException("Coder: " + c.getCoder());
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testCreateWithVoidType() throws Exception {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Void> output = p.apply(Create.of((Void) null, (Void) null));
 
+    DataflowAssert.that(output)
+      .containsInAnyOrder(null, null);
+
+    p.run();
+  }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testCreateWithKVVoidType() throws Exception {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<Void, Void>> output = p.apply(Create.of(
+        KV.of((Void) null, (Void) null),
+        KV.of((Void) null, (Void) null)));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of((Void) null, (Void) null),
+        KV.of((Void) null, (Void) null));
+
+    p.run();
   }
 }

From fa36a5646c30e0e73bc738c3d265bf7116af76ae Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Thu, 30 Apr 2015 14:45:05 -0700
Subject: [PATCH 0498/1541] Improve the getSideInputWindow behavior for
 windowed side inputs. 1. SlidingWindows: returns the earliest window that
 contains the end of the main-input window. 2. Sessions, InvalidWindow:
 UnsupportedOperationException 3. PartitioningWindowFn: move to assignWindow
 from assignWindows.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92495028
---
 .../transforms/windowing/InvalidWindows.java  |  5 +++
 .../windowing/PartitioningWindowFn.java       |  9 +++++
 .../sdk/transforms/windowing/Sessions.java    |  5 +++
 .../transforms/windowing/SlidingWindows.java  | 24 ++++++++++-
 .../sdk/transforms/windowing/WindowFn.java    | 40 ++-----------------
 .../windowing/SlidingWindowsTest.java         | 26 ++++++++++++
 6 files changed, 70 insertions(+), 39 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java
index 21f95b5a02d8d..699b11bebbaf0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java
@@ -74,4 +74,9 @@ public boolean isCompatible(WindowFn<?, ?> other) {
         && getOriginalWindowFn().isCompatible(
             ((InvalidWindows<?>) other).getOriginalWindowFn());
   }
+
+  @Override
+  public W getSideInputWindow(BoundedWindow window) {
+    throw new UnsupportedOperationException("InvalidWindows is not allowed in side inputs");
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
index 85282084bfb72..8172bca4f5a79 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
@@ -41,4 +41,13 @@ public abstract class PartitioningWindowFn<T, W extends BoundedWindow>
   public final Collection<W> assignWindows(AssignContext c) {
     return Arrays.asList(assignWindow(c.timestamp()));
   }
+
+  @Override
+  public W getSideInputWindow(final BoundedWindow window) {
+    if (window instanceof GlobalWindow) {
+      throw new IllegalArgumentException(
+          "Attempted to get side input window for GlobalWindow from non-global WindowFn");
+    }
+    return assignWindow(window.maxTimestamp());
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
index 9e6eb69099dc5..fabb71fd0e086 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
@@ -79,4 +79,9 @@ public Coder<IntervalWindow> windowCoder() {
   public boolean isCompatible(WindowFn<?, ?> other) {
     return other instanceof Sessions;
   }
+
+  @Override
+  public IntervalWindow getSideInputWindow(BoundedWindow window) {
+    throw new UnsupportedOperationException("Sessions is not allowed in side inputs");
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
index 040d2e8a1126f..c26c15dfe55b8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
@@ -109,8 +109,7 @@ public Collection<IntervalWindow> assignWindows(AssignContext c) {
     List<IntervalWindow> windows =
         new ArrayList<>((int) (size.getMillis() / period.getMillis()));
     Instant timestamp = c.timestamp();
-    long lastStart = timestamp.getMillis()
-        - timestamp.plus(period).minus(offset).getMillis() % period.getMillis();
+    long lastStart = lastStartFor(timestamp);
     for (long start = lastStart;
          start > timestamp.minus(size).getMillis();
          start -= period.getMillis()) {
@@ -119,6 +118,19 @@ public Collection<IntervalWindow> assignWindows(AssignContext c) {
     return windows;
   }
 
+  /**
+   * Return the earliest window that contains the end of the main-input window.
+   */
+  @Override
+  public IntervalWindow getSideInputWindow(final BoundedWindow window) {
+    if (window instanceof GlobalWindow) {
+      throw new IllegalArgumentException(
+          "Attempted to get side input window for GlobalWindow from non-global WindowFn");
+    }
+    long lastStart = lastStartFor(window.maxTimestamp().minus(size));
+    return new IntervalWindow(new Instant(lastStart + period.getMillis()), size);
+  }
+
   @Override
   public boolean isCompatible(WindowFn<?, ?> other) {
     if (other instanceof SlidingWindows) {
@@ -131,6 +143,14 @@ public boolean isCompatible(WindowFn<?, ?> other) {
     }
   }
 
+  /**
+   * Return the last start of a sliding window that contains the timestamp.
+   */
+  private long lastStartFor(Instant timestamp) {
+    return timestamp.getMillis()
+        - timestamp.plus(period).minus(offset).getMillis() % period.getMillis();
+  }
+
   static Duration getDefaultPeriod(Duration size) {
     if (size.isLongerThan(Duration.standardHours(1))) {
       return Duration.standardHours(1);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
index afeb4e83d73e6..044df3ae80463 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
@@ -118,43 +118,9 @@ public abstract void merge(Collection<W> toBeMerged, W mergeResult)
 
   /**
    * Returns the window of the side input corresponding to the given window of
-   * the main input. By default, this runs assignWindows over a non-existent
-   * element whose timestamp is the maxTimestamp() of the input window.
+   * the main input.
    *
-   * <p> For example, if both the main and side inputs are windowed by
-   * {@link FixedWindows}, the side input corresponding to a particular main
-   * input element will be the one in the same window as that element.
-   *
-   * <p> Authors of custom {@code WindowFn}s should override this if that is not
-   * the desired behavior for side inputs with their {@code WindowFn}.
+   * <p> Authors of custom {@code WindowFn}s should override this.
    */
-  public W getSideInputWindow(final BoundedWindow window) {
-    if (window instanceof GlobalWindow) {
-      throw new IllegalArgumentException(
-          "Attempted to get side input window for GlobalWindow from non-global WindowFn");
-    }
-
-    try {
-      return assignWindows(new AssignContext() {
-          @Override
-          public T element() {
-            throw new UnsupportedOperationException(
-                "WindowFn attemped to access input element when none was available");
-          }
-
-          @Override
-          public Instant timestamp() {
-            return window.maxTimestamp();
-          }
-
-          @Override
-          public Collection<? extends BoundedWindow> windows() {
-            throw new UnsupportedOperationException(
-                "WindowFn attemped to access input windows when none were available");
-          }
-        }).iterator().next();
-    } catch (Exception e) {
-      throw new RuntimeException("Failed to get side input window: ", e);
-    }
-  }
+  public abstract W getSideInputWindow(final BoundedWindow window);
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java
index 8e0853eebb385..04c0592ae328a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java
@@ -146,4 +146,30 @@ public void testEquality() {
     assertFalse(SlidingWindows.of(new Duration(10)).isCompatible(
         SlidingWindows.of(new Duration(20))));
   }
+
+  @Test
+  public void testGetSideInputWindow() {
+    // [40, 1040), [340, 1340), [640, 1640) ...
+    SlidingWindows slidingWindows = SlidingWindows.of(new Duration(1000))
+        .every(new Duration(300)).withOffset(new Duration(40));
+    // Prior
+    assertEquals(
+        new IntervalWindow(new Instant(340), new Instant(1340)),
+        slidingWindows.getSideInputWindow(
+            new IntervalWindow(new Instant(0), new Instant(1041))));
+    assertEquals(
+        new IntervalWindow(new Instant(340), new Instant(1340)),
+        slidingWindows.getSideInputWindow(
+            new IntervalWindow(new Instant(0), new Instant(1339))));
+    // Align
+    assertEquals(
+        new IntervalWindow(new Instant(340), new Instant(1340)),
+        slidingWindows.getSideInputWindow(
+            new IntervalWindow(new Instant(0), new Instant(1340))));
+    // After
+    assertEquals(
+        new IntervalWindow(new Instant(640), new Instant(1640)),
+        slidingWindows.getSideInputWindow(
+            new IntervalWindow(new Instant(0), new Instant(1341))));
+  }
 }

From cd1249aaf9c9bbe60c73d78828a8d8a7a01e1527 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 30 Apr 2015 21:35:19 -0700
Subject: [PATCH 0499/1541] Replace some abbreviated type variable names with
 good identifiers. Re-enable checkstyle Javadoc HTML checks. ----Release
 Notes---- [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=92523736

---
 checkstyle.xml                                |  31 +-
 examples/pom.xml                              |   7 +
 .../google/cloud/dataflow/sdk/Pipeline.java   |  27 +-
 .../dataflow/sdk/coders/CoderRegistry.java    |  34 +-
 .../dataflow/sdk/coders/DelegateCoder.java    |  42 +-
 .../sdk/coders/IterableLikeCoder.java         |  29 +-
 .../google/cloud/dataflow/sdk/io/AvroIO.java  |   8 +-
 .../cloud/dataflow/sdk/io/PubsubIO.java       |  14 +-
 .../google/cloud/dataflow/sdk/io/Sink.java    |  18 +-
 .../google/cloud/dataflow/sdk/io/TextIO.java  |  16 +-
 .../google/cloud/dataflow/sdk/io/XmlSink.java |   3 +
 .../cloud/dataflow/sdk/io/XmlSource.java      |  10 +-
 .../BlockingDataflowPipelineRunner.java       |   4 +-
 .../sdk/runners/DataflowPipelineRunner.java   |  10 +-
 .../runners/DataflowPipelineTranslator.java   |  42 +-
 .../sdk/runners/DirectPipelineRunner.java     | 106 ++--
 .../dataflow/sdk/runners/PipelineRunner.java  |  10 +-
 .../sdk/runners/worker/CombineValuesFn.java   |  54 +-
 .../worker/GroupAlsoByWindowsParDoFn.java     |  20 +-
 .../worker/MapTaskExecutorFactory.java        |  16 +-
 .../sdk/runners/worker/PubsubSink.java        |   2 +-
 .../worker/UngroupedWindmillReader.java       |   2 +-
 .../sdk/runners/worker/WindmillSink.java      |   2 +-
 .../worker/WindowingWindmillReader.java       |   2 +-
 .../dataflow/sdk/testing/CoderProperties.java |  19 +-
 .../dataflow/sdk/testing/DataflowAssert.java  |  50 +-
 .../dataflow/sdk/transforms/Aggregator.java   |  12 +-
 .../sdk/transforms/AppliedPTransform.java     |  30 +-
 .../sdk/transforms/ApproximateQuantiles.java  |  38 +-
 .../dataflow/sdk/transforms/Combine.java      | 531 +++++++++---------
 .../cloud/dataflow/sdk/transforms/DoFn.java   |  74 +--
 .../sdk/transforms/DoFnReflector.java         | 204 +++----
 .../dataflow/sdk/transforms/DoFnTester.java   |  51 +-
 .../sdk/transforms/DoFnWithContext.java       |  36 +-
 .../cloud/dataflow/sdk/transforms/Filter.java |   4 +-
 .../IntraBundleParallelization.java           |  62 +-
 .../cloud/dataflow/sdk/transforms/KvSwap.java |  26 +-
 .../cloud/dataflow/sdk/transforms/Max.java    |  16 +-
 .../cloud/dataflow/sdk/transforms/Mean.java   |  26 +-
 .../cloud/dataflow/sdk/transforms/Min.java    |  16 +-
 .../dataflow/sdk/transforms/PTransform.java   |  42 +-
 .../cloud/dataflow/sdk/transforms/ParDo.java  | 181 +++---
 .../dataflow/sdk/transforms/Partition.java    |  12 +-
 .../sdk/transforms/SerializableFunction.java  |  13 +-
 .../cloud/dataflow/sdk/transforms/Top.java    |  12 +-
 .../cloud/dataflow/sdk/transforms/View.java   |  52 +-
 .../cloud/dataflow/sdk/transforms/Write.java  |  36 +-
 .../join/KeyedPCollectionTuple.java           |  10 +-
 .../dataflow/sdk/transforms/package-info.java |   4 +-
 .../dataflow/sdk/util/AbstractWindowSet.java  |  14 +-
 .../dataflow/sdk/util/CombiningWindowSet.java |  50 +-
 .../dataflow/sdk/util/CounterAggregator.java  |  24 +-
 .../cloud/dataflow/sdk/util/DoFnInfo.java     |  20 +-
 .../cloud/dataflow/sdk/util/DoFnRunner.java   | 132 ++---
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  35 +-
 .../dataflow/sdk/util/InstanceBuilder.java    |   4 +-
 .../util/StreamingGroupAlsoByWindowsDoFn.java |  51 +-
 .../util/StreamingSideInputDoFnRunner.java    |  34 +-
 .../dataflow/sdk/util/TimerOrElement.java     |  22 +-
 .../dataflow/sdk/util/TriggerExecutor.java    |  20 +-
 .../dataflow/sdk/util/TriggerTester.java      |  32 +-
 .../dataflow/sdk/util/WindowedValue.java      |   2 +-
 .../dataflow/sdk/util/WindowingInternals.java |   8 +-
 .../ElementByteSizeObservableIterable.java    |  10 +-
 .../worker/PartialGroupByKeyOperation.java    |  72 +--
 .../dataflow/sdk/util/common/worker/Sink.java |   4 +-
 .../cloud/dataflow/sdk/values/PBegin.java     |   4 +-
 .../dataflow/sdk/values/PCollection.java      |   2 +-
 .../dataflow/sdk/values/PCollectionList.java  |   4 +-
 .../dataflow/sdk/values/PCollectionTuple.java |   4 +-
 .../google/cloud/dataflow/sdk/TestUtils.java  |  38 +-
 .../sdk/coders/CoderRegistryTest.java         |  10 +-
 .../sdk/transforms/ApproximateUniqueTest.java |   6 +-
 .../sdk/transforms/DoFnReflectorTest.java     |  13 +-
 .../dataflow/sdk/transforms/NoOpDoFn.java     |  36 +-
 .../sdk/transforms/SimpleStatsFnsTest.java    |  12 +-
 .../sdk/util/CounterAggregatorTest.java       |   4 +-
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  |   8 +-
 .../sdk/util/KeyedStateCacheTest.java         |   8 +-
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  10 +-
 .../sdk/util/common/ReflectHelpersTest.java   |  12 +-
 81 files changed, 1428 insertions(+), 1343 deletions(-)

diff --git a/checkstyle.xml b/checkstyle.xml
index 9c0f4f8cae4b8..c207f3674d6a9 100644
--- a/checkstyle.xml
+++ b/checkstyle.xml
@@ -76,6 +76,13 @@ page at http://checkstyle.sourceforge.net/config.html -->
     <property name="severity" value="error"/>
   </module>
 
+  <!-- Allow use of comment to suppress javadocstyle -->
+  <module name="SuppressionCommentFilter">
+    <property name="offCommentFormat" value="JAVADOCSTYLE OFF"/>
+    <property name="onCommentFormat" value="JAVADOCSTYLE ON"/>
+    <property name="checkFormat" value="JavadocStyle"/>
+  </module>
+
   <!-- All Java AST specific tests live under TreeWalker module. -->
   <module name="TreeWalker">
 
@@ -148,9 +155,7 @@ page at http://checkstyle.sourceforge.net/config.html -->
 
     <module name="JavadocStyle">
       <property name="severity" value="error"/>
-      <!-- checkHtml considers {@code PCollection<I>} and {@code DoFn<I, O>}
-          to contains unclosed tags. -->
-      <property name="checkHtml" value="false"/>
+      <property name="checkHtml" value="true"/>
     </module>
 
     <!--
@@ -240,6 +245,23 @@ page at http://checkstyle.sourceforge.net/config.html -->
       <property name="severity" value="error"/>
     </module>
 
+    <!-- Type parameters must be either one of the four blessed letters
+    T, K, V, W, X or else be capital-case terminated with a T,
+    such as MyGenericParameterT -->
+    <module name="ClassTypeParameterName">
+      <property name="format" value="^(((T|K|V|W|X)[0-9]*)|([A-Z][a-z][a-zA-Z]*T))$"/>
+      <property name="severity" value="error"/>
+    </module>
+
+    <module name="MethodTypeParameterName">
+      <property name="format" value="^(((T|K|V|W|X)[0-9]*)|([A-Z][a-z][a-zA-Z]*T))$"/>
+      <property name="severity" value="error"/>
+    </module>
+
+    <module name="InterfaceTypeParameterName">
+      <property name="format" value="^(((T|K|V|W|X)[0-9]*)|([A-Z][a-z][a-zA-Z]*T))$"/>
+      <property name="severity" value="error"/>
+    </module>
 
     <!--
 
@@ -382,6 +404,9 @@ page at http://checkstyle.sourceforge.net/config.html -->
       <property name="severity" value="error"/>
     </module>
 
+    <!-- Required to support SuppressWarningsComment -->
+    <module name="FileContentsHolder"/>
+
   </module>
 </module>
 
diff --git a/examples/pom.xml b/examples/pom.xml
index 708921a70db68..c97e09350c4f9 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -64,6 +64,13 @@
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-checkstyle-plugin</artifactId>
         <version>2.12</version>
+        <dependencies>
+          <dependency>
+            <groupId>com.puppycrawl.tools</groupId>
+            <artifactId>checkstyle</artifactId>
+            <version>6.6</version>
+          </dependency>
+        </dependencies>
         <configuration>
           <configLocation>../checkstyle.xml</configLocation>
           <consoleOutput>true</consoleOutput>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index 7516dd709a69f..45554a5d6e64d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -129,11 +129,10 @@ public PBegin begin() {
    * {@code TextIO.Read} or
    * {@link com.google.cloud.dataflow.sdk.transforms.Create}.
    *
-   * <P>
-   * Alias for {@code begin().apply(root)}.
+   * <p> Alias for {@code begin().apply(root)}.
    */
-  public <Output extends POutput> Output apply(
-      PTransform<? super PBegin, Output> root) {
+  public <OutputT extends POutput> OutputT apply(
+      PTransform<? super PBegin, OutputT> root) {
     return begin().apply(root);
   }
 
@@ -211,14 +210,14 @@ public void traverseTopologically(PipelineVisitor visitor) {
   }
 
   /**
-   * Applies the given PTransform to the given Input,
-   * and returns its Output.
+   * Applies the given {@link PTransform} to the given {@code InputT},
+   * and returns its {@code OutputT}.
    *
-   * <p> Called by PInput subclasses in their {@code apply} methods.
+   * <p> Called by {@link PInput} subclasses in their {@code apply} methods.
    */
-  public static <Input extends PInput, Output extends POutput>
-  Output applyTransform(Input input,
-                        PTransform<? super Input, Output> transform) {
+  public static <InputT extends PInput, OutputT extends POutput>
+  OutputT applyTransform(InputT input,
+                        PTransform<? super InputT, OutputT> transform) {
     return input.getPipeline().applyInternal(input, transform);
   }
 
@@ -255,9 +254,9 @@ public String toString() {
    *
    * @see Pipeline#apply
    */
-  private <Input extends PInput, Output extends POutput>
-  Output applyInternal(Input input,
-      PTransform<? super Input, Output> transform) {
+  private <InputT extends PInput, OutputT extends POutput>
+  OutputT applyInternal(InputT input,
+      PTransform<? super InputT, OutputT> transform) {
     input.finishSpecifying();
 
     TransformTreeNode parent = transforms.getCurrent();
@@ -284,7 +283,7 @@ Output applyInternal(Input input,
     try {
       transforms.pushNode(child);
       transform.validate(input);
-      Output output = runner.apply(transform, input);
+      OutputT output = runner.apply(transform, input);
       transforms.setOutput(child, output);
 
       // recordAsOutput is a NOOP if already called;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 3916f85554b24..187a4ee943f19 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -154,10 +154,10 @@ public <T> Coder<T> getCoder(TypeToken<T> typeToken) throws CannotProvideCoderEx
    *
    * @throws CannotProvideCoderException if there is no default Coder.
    */
-  public <I, O> Coder<O> getDefaultCoder(
-      TypeToken<O> typeToken,
-      TypeToken<I> contextTypeToken,
-      Coder<I> contextCoder)
+  public <InputT, OutputT> Coder<OutputT> getDefaultCoder(
+      TypeToken<OutputT> typeToken,
+      TypeToken<InputT> contextTypeToken,
+      Coder<InputT> contextCoder)
       throws CannotProvideCoderException {
     return getDefaultCoder(typeToken,
                            getTypeToCoderBindings(contextTypeToken.getType(), contextCoder));
@@ -167,8 +167,8 @@ public <I, O> Coder<O> getDefaultCoder(
    * Returns the Coder to use on elements produced by this function, given
    * the coder used for its input elements.
    */
-  public <I, O> Coder<O> getDefaultOutputCoder(
-      SerializableFunction<I, O> fn, Coder<I> inputCoder)
+  public <InputT, OutputT> Coder<OutputT> getDefaultOutputCoder(
+      SerializableFunction<InputT, OutputT> fn, Coder<InputT> inputCoder)
       throws CannotProvideCoderException {
     return getDefaultCoder(
         fn.getClass(), SerializableFunction.class, inputCoder);
@@ -181,7 +181,7 @@ public <I, O> Coder<O> getDefaultOutputCoder(
    *
    * @throws CannotProvideCoderException if there is no default Coder.
    */
-  public <T, O> Coder<O> getDefaultCoder(
+  public <T, OutputT> Coder<OutputT> getDefaultCoder(
       Class<? extends T> subClass,
       Class<T> baseClass,
       Coder<?>... knownCoders) throws CannotProvideCoderException {
@@ -190,7 +190,7 @@ public <T, O> Coder<O> getDefaultCoder(
     System.arraycopy(knownCoders, 0, allCoders, 0, knownCoders.length);
     allCoders = getDefaultCoders(subClass, baseClass, allCoders);
     @SuppressWarnings("unchecked") // trusted
-    Coder<O> coder = (Coder<O>) allCoders[knownCoders.length];
+    Coder<OutputT> coder = (Coder<OutputT>) allCoders[knownCoders.length];
     return coder;
   }
 
@@ -201,7 +201,7 @@ public <T, O> Coder<O> getDefaultCoder(
    *
    * @throws CannotProvideCoderException if there is no default Coder.
    */
-  public <T, O> Coder<O> getDefaultCoder(
+  public <T, OutputT> Coder<OutputT> getDefaultCoder(
       Class<? extends T> subClass,
       Class<T> baseClass,
       Map<String, ? extends Coder<?>> knownCoders,
@@ -211,7 +211,7 @@ public <T, O> Coder<O> getDefaultCoder(
     Map<String, Coder<?>> inferredCoders = getDefaultCoders(subClass, baseClass, knownCoders);
 
     @SuppressWarnings("unchecked")
-    Coder<O> paramCoderOrNull = (Coder<O>) inferredCoders.get(paramName);
+    Coder<OutputT> paramCoderOrNull = (Coder<OutputT>) inferredCoders.get(paramName);
     if (paramCoderOrNull != null) {
       return paramCoderOrNull;
     } else {
@@ -283,10 +283,9 @@ public <T> Coder<T> getDefaultCoder(T exampleValue) throws CannotProvideCoderExc
    * the parameters that will be used to infer the others.
    *
    * <p> Note that inference is attempted for every type variable.
-   * For a type {@code MyType<A, B, C, D, E>} inference will will be
-   * attempted for all of {@code A}, {@code B}, {@code C}, {@code D},
-   * and {@code E}, even if the requester only wants a coder for
-   * {@code C}.
+   * For a type {@code MyType<One, Two, Three>} inference will will be
+   * attempted for all of {@code One}, {@code Two}, {@code Three},
+   * even if the requester only wants a coder for {@code Two}.
    *
    * <p> For this reason, {@code getDefaultCoders} (plural) does not throw
    * an exception if a coder for a particular type variable cannot be
@@ -334,10 +333,9 @@ public <T> Map<String, Coder<?>> getDefaultCoders(
    * for any of the parameters that will be used to infer the others.
    *
    * <p> Note that inference is attempted for every type variable.
-   * For a type {@code MyType<A, B, C, D, E>} inference will will be
-   * attempted for all of {@code A}, {@code B}, {@code C}, {@code D},
-   * and {@code E}, even if the requester only wants a coder for
-   * {@code C}.
+   * For a type {@code MyType<One, Two, Three>} inference will will be
+   * attempted for all of {@code One}, {@code Two}, {@code Three},
+   * even if the requester only wants a coder for {@code Two}.
    *
    * <p> For this reason {@code getDefaultCoders} (plural) does not throw
    * an exception if a coder for a particular type variable cannot be
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
index 64f28bfde0f44..06083cc7f5090 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
@@ -22,7 +22,7 @@
 import java.io.Serializable;
 
 /**
- * A {@code DelegateCoder<T, DT>} wraps a {@link Coder Coder&lt;DT&gt;} and
+ * A {@code DelegateCoder<T, IntermediateT>} wraps a {@link Coder Coder&lt;IntermediateT&gt;} and
  * encodes/decodes values of type {@code T}s by converting
  * to/from {@code DT} and then encoding/decoding using the underlying
  * {@link Coder Coder&lt;DT&gt;}.
@@ -35,24 +35,24 @@
  * a {@link CoderException}.
  *
  * @param <T> The type of objects coded by this Coder.
- * @param <DT> The type of objects a {@code T} will be converted to for coding.
+ * @param <IntermediateT> The type of objects a {@code T} will be converted to for coding.
  */
-public class DelegateCoder<T, DT> extends CustomCoder<T> {
+public class DelegateCoder<T, IntermediateT> extends CustomCoder<T> {
   private static final long serialVersionUID = 0;
 
   /**
-   * A {@code CodingFunction<Input, Output>} is a serializable function
-   * from {@code Input} to {@code Output} that
+   * A {@code CodingFunction<InputT, OutputT>} is a serializable function
+   * from {@code InputT} to {@code OutputT} that
    * may throw any {@code Exception}.
    */
-  public static interface CodingFunction<Input, Output> extends Serializable {
-     public abstract Output apply(Input input) throws Exception;
+  public static interface CodingFunction<InputT, OutputT> extends Serializable {
+     public abstract OutputT apply(InputT input) throws Exception;
   }
 
-  public static <T, DT> DelegateCoder<T, DT> of(Coder<DT> coder,
-      CodingFunction<T, DT> toFn,
-      CodingFunction<DT, T> fromFn) {
-    return new DelegateCoder<T, DT>(coder, toFn, fromFn);
+  public static <T, IntermediateT> DelegateCoder<T, IntermediateT> of(Coder<IntermediateT> coder,
+      CodingFunction<T, IntermediateT> toFn,
+      CodingFunction<IntermediateT, T> fromFn) {
+    return new DelegateCoder<T, IntermediateT>(coder, toFn, fromFn);
   }
 
   @Override
@@ -68,7 +68,7 @@ public T decode(InputStream inStream, Context context) throws CoderException, IO
 
   /**
    * A delegate coder is deterministic if the underlying coder is deterministic.
-   * For this to be safe, the intermediate {@code CodingFunction<T, DT>} must
+   * For this to be safe, the intermediate {@code CodingFunction<T, IntermediateT>} must
    * also be deterministic.
    */
   @Override
@@ -88,9 +88,9 @@ public String toString() {
 
   /////////////////////////////////////////////////////////////////////////////
 
-  private <Input, Output> Output applyAndWrapExceptions(
-      CodingFunction<Input, Output> fn,
-      Input input) throws CoderException, IOException {
+  private <InputT, OutputT> OutputT applyAndWrapExceptions(
+      CodingFunction<InputT, OutputT> fn,
+      InputT input) throws CoderException, IOException {
     try {
       return fn.apply(input);
     } catch (IOException exc) {
@@ -100,13 +100,13 @@ private <Input, Output> Output applyAndWrapExceptions(
     }
   }
 
-  private final Coder<DT> coder;
-  private final CodingFunction<T, DT> toFn;
-  private final CodingFunction<DT, T> fromFn;
+  private final Coder<IntermediateT> coder;
+  private final CodingFunction<T, IntermediateT> toFn;
+  private final CodingFunction<IntermediateT, T> fromFn;
 
-  protected DelegateCoder(Coder<DT> coder,
-      CodingFunction<T, DT> toFn,
-      CodingFunction<DT, T> fromFn) {
+  protected DelegateCoder(Coder<IntermediateT> coder,
+      CodingFunction<T, IntermediateT> toFn,
+      CodingFunction<IntermediateT, T> fromFn) {
     this.coder = coder;
     this.fromFn = fromFn;
     this.toFn = toFn;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
index bbfbf71f1ea0f..5569251dc74ab 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
@@ -37,10 +37,10 @@
  * {@link Coder} for a class that implements {@code Iterable}.
  *
  * @param <T> the type of the elements of the {@code Iterable}s being transcoded
- * @param <IT> the type of the Iterables being transcoded
+ * @param <IterableT> the type of the Iterables being transcoded
  */
-public abstract class IterableLikeCoder<T, IT extends Iterable<T>>
-    extends StandardCoder<IT> {
+public abstract class IterableLikeCoder<T, IterableT extends Iterable<T>>
+    extends StandardCoder<IterableT> {
   private static final long serialVersionUID = 0;
 
   public Coder<T> getElemCoder() {
@@ -49,11 +49,11 @@ public Coder<T> getElemCoder() {
 
   /**
    * Builds an instance of the coder's associated {@code Iterable} from a list
-   * of decoded elements.  If {@code IT} is a supertype of {@code List<T>}, the
+   * of decoded elements.  If {@code IterableT} is a supertype of {@code List<T>}, the
    * derived class implementation is permitted to return {@code decodedElements}
    * directly.
    */
-  protected abstract IT decodeToIterable(List<T> decodedElements);
+  protected abstract IterableT decodeToIterable(List<T> decodedElements);
 
   /////////////////////////////////////////////////////////////////////////////
   // Internal operations below here.
@@ -64,9 +64,8 @@ public Coder<T> getElemCoder() {
    * Returns the first element in this iterable-like if it is non-empty,
    * otherwise returns {@code null}.
    */
-  protected static <T, IT extends Iterable<T>>
-      List<Object> getInstanceComponentsHelper(
-          IT exampleValue) {
+  protected static <T, IterableT extends Iterable<T>>
+      List<Object> getInstanceComponentsHelper(IterableT exampleValue) {
     for (T value : exampleValue) {
       return Arrays.<Object>asList(value);
     }
@@ -80,7 +79,8 @@ protected IterableLikeCoder(Coder<T> elementCoder) {
   }
 
   @Override
-  public void encode(IT iterable, OutputStream outStream, Context context)
+  public void encode(
+      IterableT iterable, OutputStream outStream, Context context)
       throws IOException, CoderException  {
     if (iterable == null) {
       throw new CoderException("cannot encode a null Iterable");
@@ -111,7 +111,7 @@ public void encode(IT iterable, OutputStream outStream, Context context)
   }
 
   @Override
-  public IT decode(InputStream inStream, Context context)
+  public IterableT decode(InputStream inStream, Context context)
       throws IOException, CoderException {
     Context nestedContext = context.nested();
     DataInputStream dataInStream = new DataInputStream(inStream);
@@ -154,7 +154,8 @@ public void verifyDeterministic() throws NonDeterministicException {
    * requires minimal extra computation.
    */
   @Override
-  public boolean isRegisterByteSizeObserverCheap(IT iterable, Context context) {
+  public boolean isRegisterByteSizeObserverCheap(
+      IterableT iterable, Context context) {
     return iterable instanceof ElementByteSizeObservableIterable;
   }
 
@@ -164,7 +165,7 @@ public boolean isRegisterByteSizeObserverCheap(IT iterable, Context context) {
    */
   @Override
   public void registerByteSizeObserver(
-      IT iterable, ElementByteSizeObserver observer, Context context)
+      IterableT iterable, ElementByteSizeObserver observer, Context context)
       throws Exception {
     if (iterable == null) {
       throw new CoderException("cannot encode a null Iterable");
@@ -173,9 +174,9 @@ public void registerByteSizeObserver(
 
     if (iterable instanceof ElementByteSizeObservableIterable) {
       observer.setLazy();
-      ElementByteSizeObservableIterable<?, ?> observableIT =
+      ElementByteSizeObservableIterable<?, ?> observableIterable =
           (ElementByteSizeObservableIterable) iterable;
-      observableIT.addObserver(
+      observableIterable.addObserver(
           new IteratorObserver(observer, iterable instanceof Collection));
     } else {
       if (iterable instanceof Collection) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index 6effc30ca4ccf..f0c237de65577 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -239,10 +239,10 @@ public Bound<T> from(String filepattern) {
        * that reads Avro file(s) containing records whose type is the
        * specified Avro-generated class.  Does not modify this object.
        *
-       * @param <T1> the type of the decoded elements, and the elements of
+       * @param <X> the type of the decoded elements, and the elements of
        * the resulting PCollection
        */
-      public <T1> Bound<T1> withSchema(Class<T1> type) {
+      public <X> Bound<X> withSchema(Class<X> type) {
         return new Bound<>(name, filepattern, type, ReflectData.get().getSchema(type), validate);
       }
 
@@ -573,9 +573,9 @@ public Bound<T> withoutSharding() {
        * that writes to Avro file(s) containing records whose type is the
        * specified Avro-generated class.  Does not modify this object.
        *
-       * @param <T1> the type of the elements of the input PCollection
+       * @param <X> the type of the elements of the input PCollection
        */
-      public <T1> Bound<T1> withSchema(Class<T1> type) {
+      public <X> Bound<X> withSchema(Class<X> type) {
         return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type,
             ReflectData.get().getSchema(type), validate);
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 423f2306ae4de..24080a962d898 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -346,13 +346,13 @@ public Bound<T> idLabel(String idLabel) {
 
       /**
        * Returns a new PubsubIO.Read PTransform that's like this one but that uses the given
-       * {@code Coder<T1>} to decode each record into a value of type {@code T1}.  Does not modify
+       * {@code Coder<X>} to decode each record into a value of type {@code X}.  Does not modify
        * this object.
        *
-       * @param <T1> the type of the decoded elements, and the
+       * @param <X> the type of the decoded elements, and the
        * elements of the resulting PCollection.
        */
-      public <T1> Bound<T1> withCoder(Coder<T1> coder) {
+      public <X> Bound<X> withCoder(Coder<X> coder) {
         return new Bound<>(name, subscription, topic, timestampLabel, dropLateData, coder, idLabel);
       }
 
@@ -531,13 +531,13 @@ public Bound<T> idLabel(String idLabel) {
 
      /**
        * Returns a new PubsubIO.Write PTransform that's like this one
-       * but that uses the given {@code Coder<T1>} to encode each of
-       * the elements of the input {@code PCollection<T1>} into an
+       * but that uses the given {@code Coder<X>} to encode each of
+       * the elements of the input {@code PCollection<X>} into an
        * output record.  Does not modify this object.
        *
-       * @param <T1> the type of the elements of the input PCollection
+       * @param <X> the type of the elements of the input PCollection
        */
-      public <T1> Bound<T1> withCoder(Coder<T1> coder) {
+      public <X> Bound<X> withCoder(Coder<X> coder) {
         return new Bound<>(name, topic, timestampLabel, idLabel, coder);
       }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
index 67dfcdae59e51..3a23aecd42bbc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
@@ -152,9 +152,9 @@ public abstract class Sink<T> implements Serializable {
    * <p>See {@link Sink} for more detailed documentation about the process of writing to a Sink.
    *
    * @param <T> The type of objects to write
-   * @param <WR> The result of a per-bundle write
+   * @param <WriteT> The result of a per-bundle write
    */
-  public abstract static class WriteOperation<T, WR> implements Serializable {
+  public abstract static class WriteOperation<T, WriteT> implements Serializable {
     private static final long serialVersionUID = 0;
 
     /**
@@ -182,7 +182,7 @@ public abstract static class WriteOperation<T, WR> implements Serializable {
      *
      * @param writerResults an Iterable of results from successful bundle writes.
      */
-    public abstract void finalize(Iterable<WR> writerResults, PipelineOptions options)
+    public abstract void finalize(Iterable<WriteT> writerResults, PipelineOptions options)
         throws Exception;
 
     /**
@@ -193,7 +193,7 @@ public abstract void finalize(Iterable<WR> writerResults, PipelineOptions option
      *
      * <p>Must not mutate the state of the WriteOperation.
      */
-    public abstract Writer<T, WR> createWriter(PipelineOptions options) throws Exception;
+    public abstract Writer<T, WriteT> createWriter(PipelineOptions options) throws Exception;
 
     /**
      * Returns the Sink that this write operation writes to.
@@ -203,7 +203,7 @@ public abstract void finalize(Iterable<WR> writerResults, PipelineOptions option
     /**
      * Returns a coder for the writer result type.
      */
-    public Coder<WR> getWriterResultCoder() {
+    public Coder<WriteT> getWriterResultCoder() {
       return null;
     }
   }
@@ -219,9 +219,9 @@ public Coder<WR> getWriterResultCoder() {
    * <p>See {@link Sink} for more detailed documentation about the process of writing to a Sink.
    *
    * @param <T> The type of object to write
-   * @param <WR> The writer results type (e.g., the bundle's output filename, as String)
+   * @param <WriteT> The writer results type (e.g., the bundle's output filename, as String)
    */
-  public abstract static class Writer<T, WR> {
+  public abstract static class Writer<T, WriteT> {
     /**
      * Performs bundle initialization. For example, creates a temporary file for writing or
      * initializes any state that will be used across calls to {@link Writer#write}.
@@ -247,11 +247,11 @@ public abstract static class Writer<T, WR> {
      *
      * @return the writer result
      */
-    public abstract WR close() throws Exception;
+    public abstract WriteT close() throws Exception;
 
     /**
      * Returns the write operation this writer belongs to.
      */
-    public abstract WriteOperation<T, WR> getWriteOperation();
+    public abstract WriteOperation<T, WriteT> getWriteOperation();
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 684344e1a9f84..34989b775be67 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -229,14 +229,14 @@ public Bound<T> from(String filepattern) {
 
       /**
        * Returns a new TextIO.Read PTransform that's like this one but
-       * that uses the given {@code Coder<T1>} to decode each of the
-       * lines of the file into a value of type {@code T1}.  Does not
+       * that uses the given {@code Coder<X>} to decode each of the
+       * lines of the file into a value of type {@code X}.  Does not
        * modify this object.
        *
-       * @param <T1> the type of the decoded elements, and the
+       * @param <X> the type of the decoded elements, and the
        * elements of the resulting PCollection
        */
-      public <T1> Bound<T1> withCoder(Coder<T1> coder) {
+      public <X> Bound<X> withCoder(Coder<X> coder) {
         return new Bound<>(name, filepattern, coder, validate, compressionType);
       }
 
@@ -546,13 +546,13 @@ public Bound<T> withoutSharding() {
 
       /**
        * Returns a new TextIO.Write PTransform that's like this one
-       * but that uses the given {@code Coder<T1>} to encode each of
-       * the elements of the input {@code PCollection<T1>} into an
+       * but that uses the given {@code Coder<X>} to encode each of
+       * the elements of the input {@code PCollection<X>} into an
        * output text line.  Does not modify this object.
        *
-       * @param <T1> the type of the elements of the input PCollection
+       * @param <X> the type of the elements of the input PCollection
        */
-      public <T1> Bound<T1> withCoder(Coder<T1> coder) {
+      public <X> Bound<X> withCoder(Coder<X> coder) {
         return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate,
             validate);
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
index a0be1a30d147c..67a5242c13938 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
@@ -33,6 +33,7 @@
 import javax.xml.bind.JAXBException;
 import javax.xml.bind.Marshaller;
 
+// JAVADOCSTYLE OFF
 /**
  * A {@link Sink} that outputs records as XML-formatted elements. Writes a {@link PCollection} of
  * records from JAXB-annotated classes to a single file location.
@@ -131,6 +132,8 @@
  * }
  * </pre>
  */
+// JAVADOCSTYLE ON
+@SuppressWarnings("checkstyle:javadocstyle")
 public class XmlSink {
   protected static final String XML_EXTENSION = "xml";
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
index babc5f349508a..f58c7b5f2b104 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
@@ -47,6 +47,7 @@
 import javax.xml.stream.XMLStreamException;
 import javax.xml.stream.XMLStreamReader;
 
+// JAVADOCSTYLE OFF
 /**
  * A source that can be used to read XML files. This source reads one or more
  * XML files and creates a {@code PCollection} of a given type. An Dataflow read transform can be
@@ -59,11 +60,11 @@
  * <pre>
  * {@code
  * <root>
- * <record>...</record>
- * <record>...</record>
- * <record>...</record>
+ * <record> ... </record>
+ * <record> ... </record>
+ * <record> ... </record>
  * ...
- * <record>...</record>
+ * <record> ... </record>
  * </root>
  * }
  * </pre>
@@ -100,6 +101,7 @@
  * @param <T> Type of the objects that represent the records of the XML file. The
  *        {@code PCollection} generated by this source will be of this type.
  */
+// JAVADOCSTYLE ON
 public class XmlSource<T> extends FileBasedSource<T> {
   static final long serialVersionUID = 0L;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
index 808c0dd7c453b..e269cdeb8f1b3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
@@ -106,8 +106,8 @@ public DataflowPipelineJob run(Pipeline p) {
   }
 
   @Override
-  public <Output extends POutput, Input extends PInput> Output apply(
-      PTransform<Input, Output> transform, Input input) {
+  public <OutputT extends POutput, InputT extends PInput> OutputT apply(
+      PTransform<InputT, OutputT> transform, InputT input) {
     return dataflowPipelineRunner.apply(transform, input);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index f38261e5ab086..b969e515e6c17 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -143,21 +143,21 @@ private DataflowPipelineRunner(DataflowPipelineOptions options) {
 
   @Override
   @SuppressWarnings("unchecked")
-  public <Output extends POutput, Input extends PInput> Output apply(
-      PTransform<Input, Output> transform, Input input) {
+  public <OutputT extends POutput, InputT extends PInput> OutputT apply(
+      PTransform<InputT, OutputT> transform, InputT input) {
     if (transform instanceof Combine.GroupedValues) {
       // TODO: Redundant with translator registration?
-      return (Output) PCollection.createPrimitiveOutputInternal(
+      return (OutputT) PCollection.createPrimitiveOutputInternal(
           input.getPipeline(),
           ((PCollection<?>) input).getWindowingStrategy());
     } else if (transform instanceof GroupByKey) {
       // The DataflowPipelineRunner implementation of GroupByKey will sort values by timestamp,
       // so no need for an explicit sort transform.
       boolean runnerSortsByTimestamp = true;
-      return (Output) ((GroupByKey) transform).applyHelper(
+      return (OutputT) ((GroupByKey) transform).applyHelper(
           (PCollection<?>) input, options.isStreaming(), runnerSortsByTimestamp);
     } else if (transform instanceof Create) {
-      return (Output) ((Create) transform).applyHelper(input, options.isStreaming());
+      return (OutputT) ((Create) transform).applyHelper(input, options.isStreaming());
     } else {
       return super.apply(transform, input);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index ed342c1b13663..b84fd691f3a95 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -160,9 +160,9 @@ public static String jobToString(Job job)
    * should be translated by default by the corresponding
    * {@link TransformTranslator}.
    */
-  public static <PT extends PTransform> void registerTransformTranslator(
-      Class<PT> transformClass,
-      TransformTranslator<? extends PT> transformTranslator) {
+  public static <TransformT extends PTransform> void registerTransformTranslator(
+      Class<TransformT> transformClass,
+      TransformTranslator<? extends TransformT> transformTranslator) {
     if (transformTranslators.put(transformClass, transformTranslator) != null) {
       throw new IllegalArgumentException(
           "defining multiple translators for " + transformClass);
@@ -174,8 +174,8 @@ public static <PT extends PTransform> void registerTransformTranslator(
    * specified PTransform class, or null if none registered.
    */
   @SuppressWarnings("unchecked")
-  public <PT extends PTransform>
-      TransformTranslator<PT> getTransformTranslator(Class<PT> transformClass) {
+  public <TransformT extends PTransform>
+      TransformTranslator<TransformT> getTransformTranslator(Class<TransformT> transformClass) {
     return transformTranslators.get(transformClass);
   }
 
@@ -185,8 +185,8 @@ TransformTranslator<PT> getTransformTranslator(Class<PT> transformClass) {
    * Cloud Dataflow service. It does so by
    * mutating the {@link TranslationContext}.
    */
-  public interface TransformTranslator<PT extends PTransform> {
-    public void translate(PT transform,
+  public interface TransformTranslator<TransformT extends PTransform> {
+    public void translate(TransformT transform,
                           TranslationContext context);
   }
 
@@ -204,12 +204,12 @@ public interface TranslationContext {
     /**
      * Returns the input of the currently being translated transform.
      */
-    <Input extends PInput> Input getInput(PTransform<Input, ?> transform);
+    <InputT extends PInput> InputT getInput(PTransform<InputT, ?> transform);
 
     /**
      * Returns the output of the currently being translated transform.
      */
-    <Output extends POutput> Output getOutput(PTransform<?, Output> transform);
+    <OutputT extends POutput> OutputT getOutput(PTransform<?, OutputT> transform);
 
     /**
      * Adds a step to the Dataflow workflow for the given transform, with
@@ -446,17 +446,17 @@ public DataflowPipelineOptions getPipelineOptions() {
     }
 
     @Override
-    public <Input extends PInput> Input getInput(PTransform<Input, ?> transform) {
+    public <InputT extends PInput> InputT getInput(PTransform<InputT, ?> transform) {
       checkArgument(currentTransform != null && currentTransform.transform == transform,
           "can only be called with current transform");
-      return (Input) currentTransform.input;
+      return (InputT) currentTransform.input;
     }
 
     @Override
-    public <Output extends POutput> Output getOutput(PTransform<?, Output> transform) {
+    public <OutputT extends POutput> OutputT getOutput(PTransform<?, OutputT> transform) {
       checkArgument(currentTransform != null && currentTransform.transform == transform,
           "can only be called with current transform");
-      return (Output) currentTransform.output;
+      return (OutputT) currentTransform.output;
     }
 
     @Override
@@ -760,8 +760,8 @@ public void translate(
             translateTyped(transform, context);
           }
 
-          private <R, T> void translateTyped(
-              View.CreatePCollectionView<R, T> transform,
+          private <ElemT, ViewT> void translateTyped(
+              View.CreatePCollectionView<ElemT, ViewT> transform,
               TranslationContext context) {
             context.addStep(transform, "CollectionToSingleton");
             context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
@@ -783,8 +783,8 @@ public void translate(
             translateHelper(transform, context);
           }
 
-          private <K, VI, VO> void translateHelper(
-              final Combine.GroupedValues<K, VI, VO> transform,
+          private <K, InputT, OutputT> void translateHelper(
+              final Combine.GroupedValues<K, InputT, OutputT> transform,
               DataflowPipelineTranslator.TranslationContext context) {
             context.addStep(transform, "CombineValues");
             context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
@@ -898,8 +898,8 @@ public void translate(
             translateMultiHelper(transform, context);
           }
 
-          private <I, O> void translateMultiHelper(
-              ParDo.BoundMulti<I, O> transform,
+          private <InputT, OutputT> void translateMultiHelper(
+              ParDo.BoundMulti<InputT, OutputT> transform,
               TranslationContext context) {
             context.addStep(transform, "ParallelDo");
             translateInputs(context.getInput(transform), transform.getSideInputs(), context);
@@ -919,8 +919,8 @@ public void translate(
             translateSingleHelper(transform, context);
           }
 
-          private <I, O> void translateSingleHelper(
-              ParDo.Bound<I, O> transform,
+          private <InputT, OutputT> void translateSingleHelper(
+              ParDo.Bound<InputT, OutputT> transform,
               TranslationContext context) {
             context.addStep(transform, "ParallelDo");
             translateInputs(context.getInput(transform), transform.getSideInputs(), context);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 463c2820c6c26..db3acea0f0ee2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -100,10 +100,10 @@ public class DirectPipelineRunner
    * should be evaluated by default by the corresponding
    * TransformEvaluator.
    */
-  public static <PT extends PTransform<?, ?>>
+  public static <TransformT extends PTransform<?, ?>>
   void registerDefaultTransformEvaluator(
-      Class<PT> transformClass,
-      TransformEvaluator<PT> transformEvaluator) {
+      Class<TransformT> transformClass,
+      TransformEvaluator<TransformT> transformEvaluator) {
     if (defaultTransformEvaluators.put(transformClass, transformEvaluator)
         != null) {
       throw new IllegalArgumentException(
@@ -117,10 +117,10 @@ void registerDefaultTransformEvaluator(
    * Overrides any bindings specified by
    * {@link #registerDefaultTransformEvaluator}.
    */
-  public <PT extends PTransform<?, ?>>
+  public <TransformT extends PTransform<?, ?>>
   void registerTransformEvaluator(
-      Class<PT> transformClass,
-      TransformEvaluator<PT> transformEvaluator) {
+      Class<TransformT> transformClass,
+      TransformEvaluator<TransformT> transformEvaluator) {
     if (localTransformEvaluators.put(transformClass, transformEvaluator)
         != null) {
       throw new IllegalArgumentException(
@@ -133,9 +133,9 @@ void registerTransformEvaluator(
    * specified PTransform class, or null if none registered.
    */
   @SuppressWarnings("unchecked")
-  public <PT extends PTransform<?, ?>>
-      TransformEvaluator<PT> getTransformEvaluator(Class<PT> transformClass) {
-    TransformEvaluator<PT> transformEvaluator =
+  public <TransformT extends PTransform<?, ?>>
+      TransformEvaluator<TransformT> getTransformEvaluator(Class<TransformT> transformClass) {
+    TransformEvaluator<TransformT> transformEvaluator =
         localTransformEvaluators.get(transformClass);
     if (transformEvaluator == null) {
       transformEvaluator = defaultTransformEvaluators.get(transformClass);
@@ -209,20 +209,20 @@ public DirectPipelineRunner withUnorderednessTesting(boolean enable) {
 
   @Override
   @SuppressWarnings("unchecked")
-  public <Output extends POutput, Input extends PInput> Output apply(
-      PTransform<Input, Output> transform, Input input) {
+  public <OutputT extends POutput, InputT extends PInput> OutputT apply(
+      PTransform<InputT, OutputT> transform, InputT input) {
     if (transform instanceof Combine.GroupedValues) {
-      return (Output) applyTestCombine((Combine.GroupedValues) transform, (PCollection) input);
+      return (OutputT) applyTestCombine((Combine.GroupedValues) transform, (PCollection) input);
     } else {
       return super.apply(transform, input);
     }
   }
 
-  private <K, VI, VA, VO> PCollection<KV<K, VO>> applyTestCombine(
-      Combine.GroupedValues<K, VI, VO> transform,
-      PCollection<KV<K, Iterable<VI>>> input) {
+  private <K, InputT, AccumT, OutputT> PCollection<KV<K, OutputT>> applyTestCombine(
+      Combine.GroupedValues<K, InputT, OutputT> transform,
+      PCollection<KV<K, Iterable<InputT>>> input) {
 
-    PCollection<KV<K, VO>> output = input
+    PCollection<KV<K, OutputT>> output = input
         .apply(ParDo.of(TestCombineDoFn.create(transform, input, testSerializability)));
 
     try {
@@ -248,21 +248,21 @@ private <K, VI, VA, VO> PCollection<KV<K, VO>> applyTestCombine(
    */
   // @VisibleForTesting
   @SuppressWarnings("serial")
-  public static class TestCombineDoFn<K, VI, VA, VO>
-      extends DoFn<KV<K, Iterable<VI>>, KV<K, VO>> {
-    private final KeyedCombineFn<? super K, ? super VI, VA, VO> fn;
-    private final Coder<VA> accumCoder;
+  public static class TestCombineDoFn<K, InputT, AccumT, OutputT>
+      extends DoFn<KV<K, Iterable<InputT>>, KV<K, OutputT>> {
+    private final KeyedCombineFn<? super K, ? super InputT, AccumT, OutputT> fn;
+    private final Coder<AccumT> accumCoder;
     private final boolean testSerializability;
 
     @SuppressWarnings({"unchecked", "rawtypes"})
-    public static <K, VI, VA, VO> TestCombineDoFn<K, VI, VA, VO> create(
-        Combine.GroupedValues<K, VI, VO> transform,
-        PCollection<KV<K, Iterable<VI>>> input,
+    public static <K, InputT, AccumT, OutputT> TestCombineDoFn<K, InputT, AccumT, OutputT> create(
+        Combine.GroupedValues<K, InputT, OutputT> transform,
+        PCollection<KV<K, Iterable<InputT>>> input,
         boolean testSerializability) {
 
-      Coder<VA> accumCoder;
+      Coder<AccumT> accumCoder;
       try {
-        accumCoder = (Coder<VA>) transform.getAccumulatorCoder(
+        accumCoder = (Coder<AccumT>) transform.getAccumulatorCoder(
             input.getPipeline().getCoderRegistry(), input);
       } catch (CannotProvideCoderException exc) {
         throw new IllegalArgumentException(
@@ -276,8 +276,8 @@ public static <K, VI, VA, VO> TestCombineDoFn<K, VI, VA, VO> create(
     }
 
     public TestCombineDoFn(
-        KeyedCombineFn<? super K, ? super VI, VA, VO> fn,
-        Coder<VA> accumCoder,
+        KeyedCombineFn<? super K, ? super InputT, AccumT, OutputT> fn,
+        Coder<AccumT> accumCoder,
         boolean testSerializability) {
       this.fn = fn;
       this.accumCoder = accumCoder;
@@ -287,33 +287,33 @@ public TestCombineDoFn(
     @Override
     public void processElement(ProcessContext c) throws Exception {
       K key = c.element().getKey();
-      Iterable<VI> values = c.element().getValue();
-      List<VA> groupedPostShuffle =
+      Iterable<InputT> values = c.element().getValue();
+      List<AccumT> groupedPostShuffle =
           ensureSerializableByCoder(ListCoder.of(accumCoder),
               addInputsRandomly(fn, key, values, new Random()),
               "After addInputs of KeyedCombineFn " + fn.toString());
-      VA merged =
+      AccumT merged =
           ensureSerializableByCoder(accumCoder,
               fn.mergeAccumulators(key, groupedPostShuffle),
               "After mergeAccumulators of KeyedCombineFn " + fn.toString());
-      // Note: The serializability of KV<K, VO> is ensured by the
+      // Note: The serializability of KV<K, OutputT> is ensured by the
       // runner itself, since it's a transform output.
       c.output(KV.of(key, fn.extractOutput(key, merged)));
     }
 
     // Create a random list of accumulators from the given list of values
     // @VisibleForTesting
-    public static <K, VA, VI> List<VA> addInputsRandomly(
-        KeyedCombineFn<? super K, ? super VI, VA, ?> fn,
+    public static <K, AccumT, InputT> List<AccumT> addInputsRandomly(
+        KeyedCombineFn<? super K, ? super InputT, AccumT, ?> fn,
         K key,
-        Iterable<VI> values,
+        Iterable<InputT> values,
         Random random) {
-      List<VA> out = new ArrayList<VA>();
+      List<AccumT> out = new ArrayList<AccumT>();
       int i = 0;
-      VA accumulator = fn.createAccumulator(key);
+      AccumT accumulator = fn.createAccumulator(key);
       boolean hasInput = false;
 
-      for (VI value : values) {
+      for (InputT value : values) {
         accumulator = fn.addInput(key, accumulator, value);
         hasInput = true;
 
@@ -367,8 +367,8 @@ public EvaluationResults run(Pipeline pipeline) {
   /**
    * An evaluator of a PTransform.
    */
-  public interface TransformEvaluator<PT extends PTransform> {
-    public void evaluate(PT transform,
+  public interface TransformEvaluator<TransformT extends PTransform> {
+    public void evaluate(TransformT transform,
                          EvaluationContext context);
   }
 
@@ -403,7 +403,7 @@ public interface EvaluationResults extends PipelineResult {
      * implementation a {@link PCollectionView} should convert from this representation to a
      * suitable side input value.
      */
-    <T, WT> Iterable<WindowedValue<?>> getPCollectionView(PCollectionView<T> view);
+    <T, WindowedT> Iterable<WindowedValue<?>> getPCollectionView(PCollectionView<T> view);
   }
 
   /**
@@ -510,12 +510,12 @@ public interface EvaluationContext extends EvaluationResults {
     /**
      * Returns the input of the currently being processed transform.
      */
-    <Input extends PInput> Input getInput(PTransform<Input, ?> transform);
+    <InputT extends PInput> InputT getInput(PTransform<InputT, ?> transform);
 
     /**
      * Returns the output of the currently being processed transform.
      */
-    <Output extends POutput> Output getOutput(PTransform<?, Output> transform);
+    <OutputT extends POutput> OutputT getOutput(PTransform<?, OutputT> transform);
 
     /**
      * Sets the value of the given PCollection, where each element also has a timestamp
@@ -550,9 +550,9 @@ <T> void setPCollectionValuesWithMetadata(
      * Sets the value associated with the given {@link PCollectionView}.
      * Throws an exception if the {@link PCollectionView}'s value has already been set.
      */
-    <R, T, WT> void setPCollectionView(
+    <ElemT, T, WindowedT> void setPCollectionView(
         PCollectionView<T> pc,
-        Iterable<WindowedValue<R>> value);
+        Iterable<WindowedValue<ElemT>> value);
 
     /**
      * Ensures that the element is encodable and decodable using the
@@ -576,7 +576,7 @@ <T> List<T> randomizeIfUnordered(List<T> elements,
      * by encoding it and then decoding it, and returning the result.
      * Otherwise returns the argument unchanged.
      */
-    <Fn extends Serializable> Fn ensureSerializable(Fn fn);
+    <FunctionT extends Serializable> FunctionT ensureSerializable(FunctionT fn);
 
     /**
      * If the evaluation context is testing serializability, ensures
@@ -635,17 +635,17 @@ public DirectPipelineOptions getPipelineOptions() {
     }
 
     @Override
-    public <Input extends PInput> Input getInput(PTransform<Input, ?> transform) {
+    public <InputT extends PInput> InputT getInput(PTransform<InputT, ?> transform) {
       checkArgument(currentTransform != null && currentTransform.transform == transform,
           "can only be called with current transform");
-      return (Input) currentTransform.input;
+      return (InputT) currentTransform.input;
     }
 
     @Override
-    public <Output extends POutput> Output getOutput(PTransform<?, Output> transform) {
+    public <OutputT extends POutput> OutputT getOutput(PTransform<?, OutputT> transform) {
       checkArgument(currentTransform != null && currentTransform.transform == transform,
           "can only be called with current transform");
-      return (Output) currentTransform.output;
+      return (OutputT) currentTransform.output;
     }
 
     @Override
@@ -756,9 +756,9 @@ public <T> void setPCollectionValuesWithMetadata(
     }
 
     @Override
-    public <R, T, WT> void setPCollectionView(
+    public <ElemT, T, WindowedT> void setPCollectionView(
         PCollectionView<T> view,
-        Iterable<WindowedValue<R>> value) {
+        Iterable<WindowedValue<ElemT>> value) {
       LOG.debug("Setting {} = {}", view, value);
       setPValue(view, value);
     }
@@ -811,7 +811,7 @@ public <T> List<List<T>> getPCollectionList(PCollectionList<T> pcs) {
      * converts from this representation to a suitable side input value.
      */
     @Override
-    public <T, WT> Iterable<WindowedValue<?>> getPCollectionView(PCollectionView<T> view) {
+    public <T, WindowedT> Iterable<WindowedValue<?>> getPCollectionView(PCollectionView<T> view) {
       @SuppressWarnings("unchecked")
       Iterable<WindowedValue<?>> value = (Iterable<WindowedValue<?>>) getPValue(view);
       LOG.debug("Getting {} = {}", view, value);
@@ -855,7 +855,7 @@ public <T> List<T> randomizeIfUnordered(List<T> elements,
     }
 
     @Override
-    public <Fn extends Serializable> Fn ensureSerializable(Fn fn) {
+    public <FunctionT extends Serializable> FunctionT ensureSerializable(FunctionT fn) {
       if (!testSerializability) {
         return fn;
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java
index 9dcf8cf732a61..9a2d3e29b47f6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java
@@ -32,9 +32,9 @@
  * A {@link PipelineRunner} can execute, translate, or otherwise process a
  * {@link Pipeline}.
  *
- * @param <Results> the type of the result of {@link #run}.
+ * @param <ResultT> the type of the result of {@link #run}.
  */
-public abstract class PipelineRunner<Results extends PipelineResult> {
+public abstract class PipelineRunner<ResultT extends PipelineResult> {
 
   /**
    * Constructs a runner from the provided options.
@@ -61,7 +61,7 @@ public static PipelineRunner<? extends PipelineResult> fromOptions(PipelineOptio
   /**
    * Processes the given Pipeline, returning the results.
    */
-  public abstract Results run(Pipeline pipeline);
+  public abstract ResultT run(Pipeline pipeline);
 
   /**
    * Applies a transform to the given input, returning the output.
@@ -69,8 +69,8 @@ public static PipelineRunner<? extends PipelineResult> fromOptions(PipelineOptio
    * <p> The default implementation calls PTransform.apply(input), but can be overridden
    * to customize behavior for a particular runner.
    */
-  public <Output extends POutput, Input extends PInput> Output apply(
-      PTransform<Input, Output> transform, Input input) {
+  public <OutputT extends POutput, InputT extends PInput> OutputT apply(
+      PTransform<InputT, OutputT> transform, InputT input) {
     return transform.apply(input);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
index 25685dfb09edb..e31981cac193f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
@@ -137,18 +137,18 @@ private CombineValuesFn(
    * The ALL phase is the unsplit combiner, in case combiner lifting
    * is disabled or the optimizer chose not to lift this combiner.
    */
-  private static class CombineValuesDoFn<K, VI, VO>
-      extends DoFn<KV<K, Iterable<VI>>, KV<K, VO>>{
-    private final Combine.KeyedCombineFn<K, VI, ?, VO> combineFn;
+  private static class CombineValuesDoFn<K, InputT, OutputT>
+      extends DoFn<KV<K, Iterable<InputT>>, KV<K, OutputT>>{
+    private final Combine.KeyedCombineFn<K, InputT, ?, OutputT> combineFn;
 
     private CombineValuesDoFn(
-        Combine.KeyedCombineFn<K, VI, ?, VO> combineFn) {
+        Combine.KeyedCombineFn<K, InputT, ?, OutputT> combineFn) {
       this.combineFn = combineFn;
     }
 
     @Override
     public void processElement(ProcessContext c) {
-      KV<K, Iterable<VI>> kv = c.element();
+      KV<K, Iterable<InputT>> kv = c.element();
       K key = kv.getKey();
 
       c.output(KV.of(key, this.combineFn.apply(key, kv.getValue())));
@@ -156,23 +156,23 @@ public void processElement(ProcessContext c) {
   }
 
   /*
-   * ADD phase: KV<K, Iterable<VI>> -> KV<K, VA>.
+   * ADD phase: KV<K, Iterable<InputT>> -> KV<K, AccumT>.
    */
-  private static class AddInputsDoFn<K, VI, VA>
-      extends DoFn<KV<K, Iterable<VI>>, KV<K, VA>>{
-    private final Combine.KeyedCombineFn<K, VI, VA, ?> combineFn;
+  private static class AddInputsDoFn<K, InputT, AccumT>
+      extends DoFn<KV<K, Iterable<InputT>>, KV<K, AccumT>>{
+    private final Combine.KeyedCombineFn<K, InputT, AccumT, ?> combineFn;
 
     private AddInputsDoFn(
-        Combine.KeyedCombineFn<K, VI, VA, ?> combineFn) {
+        Combine.KeyedCombineFn<K, InputT, AccumT, ?> combineFn) {
       this.combineFn = combineFn;
     }
 
     @Override
     public void processElement(ProcessContext c) {
-      KV<K, Iterable<VI>> kv = c.element();
+      KV<K, Iterable<InputT>> kv = c.element();
       K key = kv.getKey();
-      VA accum = this.combineFn.createAccumulator(key);
-      for (VI input : kv.getValue()) {
+      AccumT accum = this.combineFn.createAccumulator(key);
+      for (InputT input : kv.getValue()) {
         accum = this.combineFn.addInput(key, accum, input);
       }
 
@@ -181,44 +181,44 @@ public void processElement(ProcessContext c) {
   }
 
   /*
-   * MERGE phase: KV<K, Iterable<VA>> -> KV<K, VA>.
+   * MERGE phase: KV<K, Iterable<AccumT>> -> KV<K, AccumT>.
    */
-  private static class MergeAccumulatorsDoFn<K, VA>
-      extends DoFn<KV<K, Iterable<VA>>, KV<K, VA>>{
-    private final Combine.KeyedCombineFn<K, ?, VA, ?> combineFn;
+  private static class MergeAccumulatorsDoFn<K, AccumT>
+      extends DoFn<KV<K, Iterable<AccumT>>, KV<K, AccumT>>{
+    private final Combine.KeyedCombineFn<K, ?, AccumT, ?> combineFn;
 
     private MergeAccumulatorsDoFn(
-        Combine.KeyedCombineFn<K, ?, VA, ?> combineFn) {
+        Combine.KeyedCombineFn<K, ?, AccumT, ?> combineFn) {
       this.combineFn = combineFn;
     }
 
     @Override
     public void processElement(ProcessContext c) {
-      KV<K, Iterable<VA>> kv = c.element();
+      KV<K, Iterable<AccumT>> kv = c.element();
       K key = kv.getKey();
-      VA accum = this.combineFn.mergeAccumulators(key, kv.getValue());
+      AccumT accum = this.combineFn.mergeAccumulators(key, kv.getValue());
 
       c.output(KV.of(key, accum));
     }
   }
 
   /*
-   * EXTRACT phase: KV<K, VA> -> KV<K, VO>.
+   * EXTRACT phase: KV<K, AccumT> -> KV<K, OutputT>.
    */
-  private static class ExtractOutputDoFn<K, VA, VO>
-      extends DoFn<KV<K, VA>, KV<K, VO>>{
-    private final Combine.KeyedCombineFn<K, ?, VA, VO> combineFn;
+  private static class ExtractOutputDoFn<K, AccumT, OutputT>
+      extends DoFn<KV<K, AccumT>, KV<K, OutputT>>{
+    private final Combine.KeyedCombineFn<K, ?, AccumT, OutputT> combineFn;
 
     private ExtractOutputDoFn(
-        Combine.KeyedCombineFn<K, ?, VA, VO> combineFn) {
+        Combine.KeyedCombineFn<K, ?, AccumT, OutputT> combineFn) {
       this.combineFn = combineFn;
     }
 
     @Override
     public void processElement(ProcessContext c) {
-      KV<K, VA> kv = c.element();
+      KV<K, AccumT> kv = c.element();
       K key = kv.getKey();
-      VO output = this.combineFn.extractOutput(key, kv.getValue());
+      OutputT output = this.combineFn.extractOutput(key, kv.getValue());
 
       c.output(KV.of(key, output));
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index 810b5743d8aa0..af9dd84648bc0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -164,18 +164,20 @@ private static DoFn getGroupAlsoByWindowsDoFn(
     }
   }
 
-  static class MergingKeyedCombineFn<K, VA> extends KeyedCombineFn<K, VA, List<VA>, VA> {
+  static class MergingKeyedCombineFn<K, AccumT>
+      extends KeyedCombineFn<K, AccumT, List<AccumT>, AccumT> {
+
     private static final long serialVersionUID = 0;
-    final KeyedCombineFn<K, ?, VA, ?> keyedCombineFn;
-    MergingKeyedCombineFn(KeyedCombineFn<K, ?, VA, ?> keyedCombineFn) {
+    final KeyedCombineFn<K, ?, AccumT, ?> keyedCombineFn;
+    MergingKeyedCombineFn(KeyedCombineFn<K, ?, AccumT, ?> keyedCombineFn) {
       this.keyedCombineFn = keyedCombineFn;
     }
     @Override
-    public List<VA> createAccumulator(K key) {
+    public List<AccumT> createAccumulator(K key) {
       return new ArrayList<>();
     }
     @Override
-    public List<VA> addInput(K key, List<VA> accumulator, VA input) {
+    public List<AccumT> addInput(K key, List<AccumT> accumulator, AccumT input) {
       accumulator.add(input);
       // TODO: Buffer more once we have compaction operation.
       if (accumulator.size() > 1) {
@@ -185,19 +187,19 @@ public List<VA> addInput(K key, List<VA> accumulator, VA input) {
       }
     }
     @Override
-    public List<VA> mergeAccumulators(K key, Iterable<List<VA>> accumulators) {
+    public List<AccumT> mergeAccumulators(K key, Iterable<List<AccumT>> accumulators) {
       return mergeToSingleton(key, Iterables.concat(accumulators));
     }
     @Override
-    public VA extractOutput(K key, List<VA> accumulator) {
+    public AccumT extractOutput(K key, List<AccumT> accumulator) {
       if (accumulator.size() == 0) {
         return keyedCombineFn.createAccumulator(key);
       } else {
         return keyedCombineFn.mergeAccumulators(key, accumulator);
       }
     }
-    private List<VA> mergeToSingleton(K key, Iterable<VA> accumulators) {
-      List<VA> singleton = new ArrayList<>();
+    private List<AccumT> mergeToSingleton(K key, Iterable<AccumT> accumulators) {
+      List<AccumT> singleton = new ArrayList<>();
       singleton.add(keyedCombineFn.mergeAccumulators(key, accumulators));
       return singleton;
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index 1e094086fcea8..ffe2baade3c23 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -222,31 +222,31 @@ static ValueCombiner createValueCombiner(PartialGroupByKeyInstruction pgbk) thro
   /**
    * Implements PGBKOp.Combiner via Combine.KeyedCombineFn.
    */
-  public static class ValueCombiner<K, VI, VA, VO>
-      implements PartialGroupByKeyOperation.Combiner<WindowedValue<K>, VI, VA, VO> {
-    private final Combine.KeyedCombineFn<K, VI, VA, VO> combineFn;
+  public static class ValueCombiner<K, InputT, AccumT, OutputT>
+      implements PartialGroupByKeyOperation.Combiner<WindowedValue<K>, InputT, AccumT, OutputT> {
+    private final Combine.KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn;
 
-    private ValueCombiner(Combine.KeyedCombineFn<K, VI, VA, VO> combineFn) {
+    private ValueCombiner(Combine.KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
       this.combineFn = combineFn;
     }
 
     @Override
-    public VA createAccumulator(WindowedValue<K> windowedKey) {
+    public AccumT createAccumulator(WindowedValue<K> windowedKey) {
       return this.combineFn.createAccumulator(windowedKey.getValue());
     }
 
     @Override
-    public VA add(WindowedValue<K> windowedKey, VA accumulator, VI value) {
+    public AccumT add(WindowedValue<K> windowedKey, AccumT accumulator, InputT value) {
       return this.combineFn.addInput(windowedKey.getValue(), accumulator, value);
     }
 
     @Override
-    public VA merge(WindowedValue<K> windowedKey, Iterable<VA> accumulators) {
+    public AccumT merge(WindowedValue<K> windowedKey, Iterable<AccumT> accumulators) {
       return this.combineFn.mergeAccumulators(windowedKey.getValue(), accumulators);
     }
 
     @Override
-    public VO extract(WindowedValue<K> windowedKey, VA accumulator) {
+    public OutputT extract(WindowedValue<K> windowedKey, AccumT accumulator) {
       return this.combineFn.extractOutput(windowedKey.getValue(), accumulator);
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
index 2880f9a1d2380..9fd54fd437f8b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
@@ -71,7 +71,7 @@ private PubsubWriter(String topic) {
       outputBuilder = Windmill.PubSubMessageBundle.newBuilder().setTopic(topic);
     }
 
-    private <S> ByteString encode(Coder<S> coder, S object) throws IOException {
+    private <T> ByteString encode(Coder<T> coder, T object) throws IOException {
       ByteString.Output stream = ByteString.newOutput();
       coder.encode(object, stream, Coder.Context.OUTER);
       return stream.toByteString();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
index 0b439eed3757f..21bbe37526a55 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
@@ -104,7 +104,7 @@ public WindowedValue<T> next() throws IOException {
       }
     }
 
-    private <S> S decode(Coder<S> coder, InputStream input) throws IOException {
+    private <T> T decode(Coder<T> coder, InputStream input) throws IOException {
       return coder.decode(input, Coder.Context.OUTER);
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
index 737f6679887b3..42096098fce00 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
@@ -74,7 +74,7 @@ private WindmillStreamWriter(String destinationName) {
       productionMap = new HashMap<ByteString, Windmill.KeyedMessageBundle.Builder>();
     }
 
-    private <S> ByteString encode(Coder<S> coder, S object) throws IOException {
+    private <T> ByteString encode(Coder<T> coder, T object) throws IOException {
       ByteString.Output stream = ByteString.newOutput();
       coder.encode(object, stream, Coder.Context.OUTER);
       return stream.toByteString();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
index 37b1d354af24d..c483adefecb26 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
@@ -145,7 +145,7 @@ public WindowedValue<TimerOrElement<T>> next() throws IOException {
       }
     }
 
-    private <S> S decode(Coder<S> coder, InputStream input) throws IOException {
+    private <T> T decode(Coder<T> coder, InputStream input) throws IOException {
       return coder.decode(input, Coder.Context.OUTER);
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
index 031073e1ab65a..9a6326c83760a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
@@ -115,8 +115,8 @@ public static <T> void coderDecodeEncodeEqualInContext(
    * and value of type {@code Collection<T>}, encoding followed by decoding yields an
    * equal value of type {@code Collection<T>}, in any {@link Coder.Context}.
    */
-  public static <T, IT extends Collection<T>> void coderDecodeEncodeContentsEqual(
-      Coder<IT> coder, IT value)
+  public static <T, CollectionT extends Collection<T>> void coderDecodeEncodeContentsEqual(
+      Coder<CollectionT> coder, CollectionT value)
       throws Exception {
     for (Coder.Context context : ALL_CONTEXTS) {
       coderDecodeEncodeContentsEqualInContext(coder, context, value);
@@ -129,8 +129,8 @@ public static <T, IT extends Collection<T>> void coderDecodeEncodeContentsEqual(
    * equal value of type {@code Collection<T>}, in the given {@link Coder.Context}.
    */
   @SuppressWarnings("unchecked")
-  public static <T, CT extends Collection<T>> void coderDecodeEncodeContentsEqualInContext(
-      Coder<CT> coder, Coder.Context context, CT value)
+  public static <T, CollectionT extends Collection<T>> void coderDecodeEncodeContentsEqualInContext(
+      Coder<CollectionT> coder, Coder.Context context, CollectionT value)
       throws Exception {
     // Matchers.containsInAnyOrder() requires at least one element
     Collection<T> result = decodeEncode(coder, context, value);
@@ -147,11 +147,11 @@ public static <T, CT extends Collection<T>> void coderDecodeEncodeContentsEqualI
    * and value of type {@code Collection<T>}, encoding followed by decoding yields an
    * equal value of type {@code Collection<T>}, in any {@link Coder.Context}.
    */
-  public static <T, IT extends Iterable<T>> void coderDecodeEncodeContentsInSameOrder(
-      Coder<IT> coder, IT value)
+  public static <T, IterableT extends Iterable<T>> void coderDecodeEncodeContentsInSameOrder(
+      Coder<IterableT> coder, IterableT value)
       throws Exception {
     for (Coder.Context context : ALL_CONTEXTS) {
-      CoderProperties.<T, IT>coderDecodeEncodeContentsInSameOrderInContext(
+      CoderProperties.<T, IterableT>coderDecodeEncodeContentsInSameOrderInContext(
           coder, context, value);
     }
   }
@@ -162,8 +162,9 @@ public static <T, IT extends Iterable<T>> void coderDecodeEncodeContentsInSameOr
    * equal value of type {@code Collection<T>}, in the given {@link Coder.Context}.
    */
   @SuppressWarnings("unchecked")
-  public static <T, IT extends Iterable<T>> void coderDecodeEncodeContentsInSameOrderInContext(
-      Coder<IT> coder, Coder.Context context, IT value)
+  public static <T, IterableT extends Iterable<T>> void
+      coderDecodeEncodeContentsInSameOrderInContext(
+          Coder<IterableT> coder, Coder.Context context, IterableT value)
       throws Exception {
     Iterable<T> result = decodeEncode(coder, context, value);
     // Matchers.contains() requires at least one element
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 51c25d3d7961e..7f16028107c4c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -371,16 +371,16 @@ private static <T> PCollection<T> inGlobalWindows(PCollection<T> input) {
    * are serializable but whose underlying data may not have a coder.
    */
   @SuppressWarnings("serial")
-  private static class OneSideInputAssert<Actual> implements Serializable {
+  private static class OneSideInputAssert<ActualT> implements Serializable {
 
-    private final PCollectionView<Actual> actualView;
+    private final PCollectionView<ActualT> actualView;
 
-    public OneSideInputAssert(PCollectionView<Actual> actualView) {
+    public OneSideInputAssert(PCollectionView<ActualT> actualView) {
       this.actualView = actualView;
     }
 
-    public OneSideInputAssert<Actual> satisfies(
-        final SerializableFunction<Actual, Void> checkerFn) {
+    public OneSideInputAssert<ActualT> satisfies(
+        final SerializableFunction<ActualT, Void> checkerFn) {
       actualView.getPipeline()
         .apply(Create.<Void>of((Void) null))
         .setCoder(VoidCoder.of())
@@ -389,7 +389,7 @@ public OneSideInputAssert<Actual> satisfies(
           .of(new DoFn<Void, Void>() {
             @Override
             public void processElement(ProcessContext c) {
-              Actual actualContents = c.sideInput(actualView);
+              ActualT actualContents = c.sideInput(actualView);
               checkerFn.apply(actualContents);
             }
           }));
@@ -408,20 +408,20 @@ public void processElement(ProcessContext c) {
    * by the underlying {@link PCollection}s).
    */
   @SuppressWarnings("serial")
-  private static class TwoSideInputAssert<Actual, Expected> implements Serializable {
+  private static class TwoSideInputAssert<ActualT, ExpectedT> implements Serializable {
 
-    private final PCollectionView<Actual> actualView;
-    private final PCollectionView<Expected> expectedView;
+    private final PCollectionView<ActualT> actualView;
+    private final PCollectionView<ExpectedT> expectedView;
 
     protected TwoSideInputAssert(
-        PCollectionView<Actual> actualView,
-        PCollectionView<Expected> expectedView) {
+        PCollectionView<ActualT> actualView,
+        PCollectionView<ExpectedT> expectedView) {
       this.actualView = actualView;
       this.expectedView = expectedView;
     }
 
-    public TwoSideInputAssert<Actual, Expected> satisfies(
-        final AssertRelation<Actual, Expected> relation) {
+    public TwoSideInputAssert<ActualT, ExpectedT> satisfies(
+        final AssertRelation<ActualT, ExpectedT> relation) {
       actualView.getPipeline()
         .apply(Create.<Void>of((Void) null))
         .setCoder(VoidCoder.of())
@@ -430,8 +430,8 @@ public TwoSideInputAssert<Actual, Expected> satisfies(
           .of(new DoFn<Void, Void>() {
             @Override
             public void processElement(ProcessContext c) {
-              Actual actualContents = c.sideInput(actualView);
-              Expected expectedContents = c.sideInput(expectedView);
+              ActualT actualContents = c.sideInput(actualView);
+              ExpectedT expectedContents = c.sideInput(expectedView);
               relation.assertFor(expectedContents).apply(actualContents);
             }
           }));
@@ -446,19 +446,19 @@ public void processElement(ProcessContext c) {
    * {@code Assert.assertThat()} operation using a
    * {@code Matcher} operation.
    *
-   * <P> The {@code MatcherFactory} should take an {@code Expected} and
-   * produce a Matcher to be used to check an {@code Actual} value
+   * <p> The {@code MatcherFactory} should take an {@code ExpectedT} and
+   * produce a Matcher to be used to check an {@code ActualT} value
    * against.
    */
   @SuppressWarnings("serial")
-  public static class AssertThat<Actual, Expected>
-      implements SerializableFunction<Actual, Void> {
-    final Expected expected;
+  public static class AssertThat<ActualT, ExpectedT>
+      implements SerializableFunction<ActualT, Void> {
+    final ExpectedT expected;
     final Class<?> expectedClass;
     final String matcherClassName;
     final String matcherFactoryMethodName;
 
-    AssertThat(Expected expected,
+    AssertThat(ExpectedT expected,
                Class<?> expectedClass,
                String matcherClassName,
                String matcherFactoryMethodName) {
@@ -469,7 +469,7 @@ public static class AssertThat<Actual, Expected>
     }
 
     @Override
-    public Void apply(Actual in) {
+    public Void apply(ActualT in) {
       try {
         Method matcherFactoryMethod = Class.forName(this.matcherClassName)
             .getMethod(this.matcherFactoryMethodName, expectedClass);
@@ -581,10 +581,10 @@ public AssertContainsInOrder(Iterable<T> expected) {
 
   /**
    * A serializable function implementing a binary predicate
-   * between types {@code Actual} and {@code Expected}.
+   * between types {@code ActualT} and {@code ExpectedT}.
    */
-  public static interface AssertRelation<Actual, Expected> extends Serializable {
-    public SerializableFunction<Actual, Void> assertFor(Expected input);
+  public static interface AssertRelation<ActualT, ExpectedT> extends Serializable {
+    public SerializableFunction<ActualT, Void> assertFor(ExpectedT input);
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
index 8050b59401d5f..06f55540501dc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
@@ -19,7 +19,7 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 
 /**
- * An {@code Aggregator<VI>} enables monitoring of values of type {@code VI},
+ * An {@code Aggregator<InputT>} enables monitoring of values of type {@code InputT},
  * to be combined across all bundles.
  *
  * <p> Aggregators are created by calling {@link DoFn#createAggregator},
@@ -48,15 +48,15 @@
  * }
  * } </pre>
  *
- * @param <VI> the type of input values
- * @param <VO> the type of output values
+ * @param <InputT> the type of input values
+ * @param <OutputT> the type of output values
  */
-public interface Aggregator<VI, VO> {
+public interface Aggregator<InputT, OutputT> {
 
   /**
    * Adds a new value into the Aggregator.
    */
-  void addValue(VI value);
+  void addValue(InputT value);
 
   /**
    * Returns the name of the Aggregator.
@@ -67,7 +67,7 @@ public interface Aggregator<VI, VO> {
    * Returns the {@link CombineFn}, which combines input elements in the
    * aggregator.
    */
-  CombineFn<VI, ?, VO> getCombineFn();
+  CombineFn<InputT, ?, OutputT> getCombineFn();
 
   // TODO: Consider the following additional API conveniences:
   // - In addition to createAggregator(), consider adding getAggregator() to
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java
index 135b01924b26b..d73b689de2a22 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java
@@ -23,23 +23,29 @@
  * Represents the application of a {@link PTransform} to a specific input to produce
  * a specific output.
  *
- * @param <Input> transform input type
- * @param <Output> transform output type
- * @param <PT> transform type
+ * @param <InputT> transform input type
+ * @param <OutputT> transform output type
+ * @param <TransformT> transform type
  */
-public class AppliedPTransform
-    <Input extends PInput, Output extends POutput, PT extends PTransform<Input, Output>> {
-  public final Input input;
-  public final Output output;
-  public final PT transform;
-  public AppliedPTransform(Input input, Output output, PT transform) {
+public class AppliedPTransform<
+    InputT extends PInput,
+    OutputT extends POutput,
+    TransformT extends PTransform<InputT, OutputT>> {
+  public final InputT input;
+  public final OutputT output;
+  public final TransformT transform;
+  public AppliedPTransform(InputT input, OutputT output, TransformT transform) {
     this.input = input;
     this.output = output;
     this.transform = transform;
   }
 
-  public static <Input extends PInput, Output extends POutput, PT extends PTransform<Input, Output>>
-  AppliedPTransform<Input, Output, PT> of(Input input, Output output, PT transform) {
-    return new AppliedPTransform<Input, Output, PT>(input, output, transform);
+  public static <
+      InputT extends PInput,
+      OutputT extends POutput,
+      TransformT extends PTransform<InputT, OutputT>>
+      AppliedPTransform<InputT, OutputT, TransformT>
+      of(InputT input, OutputT output, TransformT transform) {
+    return new AppliedPTransform<InputT, OutputT, TransformT>(input, output, transform);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
index fb1776fc6972d..bbc7124498181 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
@@ -84,9 +84,9 @@ public class ApproximateQuantiles {
    *        quantile values {@code List}
    * @param compareFn the function to use to order the elements
    */
-  public static <T, C extends Comparator<T> & Serializable>
+  public static <T, ComparatorT extends Comparator<T> & Serializable>
   PTransform<PCollection<T>, PCollection<List<T>>> globally(
-      int numQuantiles, C compareFn) {
+      int numQuantiles, ComparatorT compareFn) {
     return Combine.globally(
         ApproximateQuantilesCombineFn.create(numQuantiles, compareFn));
   }
@@ -144,9 +144,9 @@ PTransform<PCollection<T>, PCollection<List<T>>> globally(int numQuantiles) {
    *        quantile values {@code List}
    * @param compareFn the function to use to order the elements
    */
-  public static <K, V, C extends Comparator<V> & Serializable>
+  public static <K, V, ComparatorT extends Comparator<V> & Serializable>
       PTransform<PCollection<KV<K, V>>, PCollection<KV<K, List<V>>>>
-      perKey(int numQuantiles, C compareFn) {
+      perKey(int numQuantiles, ComparatorT compareFn) {
     return Combine.perKey(
         ApproximateQuantilesCombineFn.create(numQuantiles, compareFn)
         .<K>asKeyedFn());
@@ -187,7 +187,7 @@ PTransform<PCollection<T>, PCollection<List<T>>> globally(int numQuantiles) {
    * {@code numQuantiles}, then the result {@code List} will contain all the
    * values being combined, in sorted order.
    *
-   * <P> Values are ordered using either a specified
+   * <p> Values are ordered using either a specified
    * {@code Comparator} or the values' natural ordering.
    *
    * <p> To evaluate the quantiles we use the "New Algorithm" described here:
@@ -198,7 +198,7 @@ PTransform<PCollection<T>, PCollection<List<T>>> globally(int numQuantiles) {
    *   http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.6.6513&amp;rep=rep1&amp;type=pdf
    * </pre>
    *
-   * <P> The default error bound is {@code 1 / N}, though in practice
+   * <p> The default error bound is {@code 1 / N}, though in practice
    * the accuracy tends to be much better.  <p> See
    * {@link #create(int, Comparator, long, double)} for
    * more information about the meaning of {@code epsilon}, and
@@ -208,9 +208,9 @@ PTransform<PCollection<T>, PCollection<List<T>>> globally(int numQuantiles) {
    */
   @SuppressWarnings("serial")
   public static class ApproximateQuantilesCombineFn
-      <T, C extends Comparator<T> & Serializable>
+      <T, ComparatorT extends Comparator<T> & Serializable>
       extends AccumulatingCombineFn
-      <T, ApproximateQuantilesCombineFn<T, C>.QuantileState, List<T>> {
+      <T, ApproximateQuantilesCombineFn<T, ComparatorT>.QuantileState, List<T>> {
 
     /**
      * The cost (in time and space) to compute quantiles to a given
@@ -224,7 +224,7 @@ public static class ApproximateQuantilesCombineFn
     public static final long DEFAULT_MAX_NUM_ELEMENTS = (long) 1e9;
 
     /** The comparison function to use. */
-    private final C compareFn;
+    private final ComparatorT compareFn;
 
     /**
      * Number of quantiles to produce.  The size of the final output
@@ -259,9 +259,9 @@ public static class ApproximateQuantilesCombineFn
      * holds as long as the number of elements is less than
      * {@link #DEFAULT_MAX_NUM_ELEMENTS}.
      */
-    public static <T, C extends Comparator<T> & Serializable>
-    ApproximateQuantilesCombineFn<T, C> create(
-        int numQuantiles, C compareFn) {
+    public static <T, ComparatorT extends Comparator<T> & Serializable>
+    ApproximateQuantilesCombineFn<T, ComparatorT> create(
+        int numQuantiles, ComparatorT compareFn) {
       return create(numQuantiles, compareFn,
                     DEFAULT_MAX_NUM_ELEMENTS, 1.0 / numQuantiles);
     }
@@ -284,7 +284,7 @@ ApproximateQuantilesCombineFn<T, Top.Largest<T>> create(int numQuantiles) {
      * double)} for more information about the meaning of
      * {@code epsilon}.
      */
-    public ApproximateQuantilesCombineFn<T, C> withEpsilon(double epsilon) {
+    public ApproximateQuantilesCombineFn<T, ComparatorT> withEpsilon(double epsilon) {
       return create(numQuantiles, compareFn, maxNumElements, epsilon);
     }
 
@@ -296,7 +296,7 @@ public ApproximateQuantilesCombineFn<T, C> withEpsilon(double epsilon) {
      * <p> See {@link #create(int, Comparator, long, double)} for more
      * information about the meaning of {@code maxNumElements}.
      */
-    public ApproximateQuantilesCombineFn<T, C> withMaxInputSize(
+    public ApproximateQuantilesCombineFn<T, ComparatorT> withMaxInputSize(
         long maxNumElements) {
       return create(numQuantiles, compareFn, maxNumElements, maxNumElements);
     }
@@ -317,10 +317,10 @@ public ApproximateQuantilesCombineFn<T, C> withMaxInputSize(
      * Note that these errors are worst-case scenarios; in practice the accuracy
      * tends to be much better.
      */
-    public static <T, C extends Comparator<T> & Serializable>
-    ApproximateQuantilesCombineFn<T, C> create(
+    public static <T, ComparatorT extends Comparator<T> & Serializable>
+    ApproximateQuantilesCombineFn<T, ComparatorT> create(
         int numQuantiles,
-        C compareFn,
+        ComparatorT compareFn,
         long maxNumElements,
         double epsilon) {
       // Compute optimal b and k.
@@ -335,7 +335,7 @@ ApproximateQuantilesCombineFn<T, C> create(
     }
 
     private ApproximateQuantilesCombineFn(int numQuantiles,
-                                          C compareFn,
+                                          ComparatorT compareFn,
                                           int bufferSize,
                                           int numBuffers,
                                           double epsilon,
@@ -369,7 +369,7 @@ public Coder<QuantileState> getAccumulatorCoder(
      */
     class QuantileState
         implements AccumulatingCombineFn.Accumulator
-        <T, ApproximateQuantilesCombineFn<T, C>.QuantileState, List<T>> {
+        <T, ApproximateQuantilesCombineFn<T, ComparatorT>.QuantileState, List<T>> {
 
       private T min;
       private T max;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index a8f6fd386f652..b965ba48bfe96 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -93,8 +93,8 @@ public static <V> Globally<V, V> globally(
    *
    * <p> See {@link Globally Combine.Globally} for more information.
    */
-  public static <VI, VO> Globally<VI, VO> globally(
-      CombineFn<? super VI, ?, VO> fn) {
+  public static <InputT, OutputT> Globally<InputT, OutputT> globally(
+      CombineFn<? super InputT, ?, OutputT> fn) {
     return new Globally<>(fn, true, 0);
   }
 
@@ -135,8 +135,8 @@ public static <K, V> PerKey<K, V, V> perKey(
    *
    * <p> See {@link PerKey Combine.PerKey} for more information.
    */
-  public static <K, VI, VO> PerKey<K, VI, VO> perKey(
-      CombineFn<? super VI, ?, VO> fn) {
+  public static <K, InputT, OutputT> PerKey<K, InputT, OutputT> perKey(
+      CombineFn<? super InputT, ?, OutputT> fn) {
     return perKey(fn.<K>asKeyedFn());
   }
 
@@ -156,8 +156,8 @@ public static <K, VI, VO> PerKey<K, VI, VO> perKey(
    *
    * <p> See {@link PerKey Combine.PerKey} for more information.
    */
-  public static <K, VI, VO> PerKey<K, VI, VO> perKey(
-      KeyedCombineFn<? super K, ? super VI, ?, VO> fn) {
+  public static <K, InputT, OutputT> PerKey<K, InputT, OutputT> perKey(
+      KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
     return new PerKey<>(fn, false /*fewKeys*/);
   }
 
@@ -165,8 +165,8 @@ public static <K, VI, VO> PerKey<K, VI, VO> perKey(
    * Returns a {@link PerKey Combine.PerKey}, and set fewKeys
    * in {@link GroupByKey}.
    */
-  private static <K, VI, VO> PerKey<K, VI, VO> fewKeys(
-      KeyedCombineFn<? super K, ? super VI, ?, VO> fn) {
+  private static <K, InputT, OutputT> PerKey<K, InputT, OutputT> fewKeys(
+      KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
     return new PerKey<>(fn, true /*fewKeys*/);
   }
 
@@ -217,8 +217,8 @@ public static <K, V> GroupedValues<K, V, V> groupedValues(
    * more convenient to use than {@link GroupByKey} followed by
    * {@code groupedValues(...)}.
    */
-  public static <K, VI, VO> GroupedValues<K, VI, VO> groupedValues(
-      CombineFn<? super VI, ?, VO> fn) {
+  public static <K, InputT, OutputT> GroupedValues<K, InputT, OutputT> groupedValues(
+      CombineFn<? super InputT, ?, OutputT> fn) {
     return groupedValues(fn.<K>asKeyedFn());
   }
 
@@ -243,8 +243,8 @@ public static <K, VI, VO> GroupedValues<K, VI, VO> groupedValues(
    * more convenient to use than {@link GroupByKey} followed by
    * {@code groupedValues(...)}.
    */
-  public static <K, VI, VO> GroupedValues<K, VI, VO> groupedValues(
-      KeyedCombineFn<? super K, ? super VI, ?, VO> fn) {
+  public static <K, InputT, OutputT> GroupedValues<K, InputT, OutputT> groupedValues(
+      KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
     return new GroupedValues<>(fn);
   }
 
@@ -252,40 +252,40 @@ public static <K, VI, VO> GroupedValues<K, VI, VO> groupedValues(
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * A {@code CombineFn<VI, VA, VO>} specifies how to combine a
-   * collection of input values of type {@code VI} into a single
-   * output value of type {@code VO}.  It does this via one or more
-   * intermediate mutable accumulator values of type {@code VA}.
+   * A {@code CombineFn<InputT, AccumT, OutputT>} specifies how to combine a
+   * collection of input values of type {@code InputT} into a single
+   * output value of type {@code OutputT}.  It does this via one or more
+   * intermediate mutable accumulator values of type {@code AccumT}.
    *
    * <p> The overall process to combine a collection of input
-   * {@code VI} values into a single output {@code VO} value is as
+   * {@code InputT} values into a single output {@code OutputT} value is as
    * follows:
    *
    * <ol>
    *
-   * <li> The input {@code VI} values are partitioned into one or more
+   * <li> The input {@code InputT} values are partitioned into one or more
    * batches.
    *
    * <li> For each batch, the {@link #createAccumulator} operation is
    * invoked to create a fresh mutable accumulator value of type
-   * {@code VA}, initialized to represent the combination of zero
+   * {@code AccumT}, initialized to represent the combination of zero
    * values.
    *
-   * <li> For each input {@code VI} value in a batch, the
+   * <li> For each input {@code InputT} value in a batch, the
    * {@link #addInput} operation is invoked to add the value to that
-   * batch's accumulator {@code VA} value.  The accumulator may just
-   * record the new value (e.g., if {@code VA == List<VI>}, or may do
+   * batch's accumulator {@code AccumT} value.  The accumulator may just
+   * record the new value (e.g., if {@code AccumT == List<InputT>}, or may do
    * work to represent the combination more compactly.
    *
    * <li> The {@link #mergeAccumulators} operation is invoked to
-   * combine a collection of accumulator {@code VA} values into a
-   * single combined output accumulator {@code VA} value, once the
+   * combine a collection of accumulator {@code AccumT} values into a
+   * single combined output accumulator {@code AccumT} value, once the
    * merging accumulators have had all all the input values in their
    * batches added to them.  This operation is invoked repeatedly,
    * until there is only one accumulator value left.
    *
    * <li> The {@link #extractOutput} operation is invoked on the final
-   * accumulator {@code VA} value to get the output {@code VO} value.
+   * accumulator {@code AccumT} value to get the output {@code OutputT} value.
    *
    * </ol>
    *
@@ -329,16 +329,16 @@ public static <K, VI, VO> GroupedValues<K, VI, VO> groupedValues(
    * required because any order of the input values is ignored when
    * breaking up input values into groups.
    *
-   * @param <VI> type of input values
-   * @param <VA> type of mutable accumulator values
-   * @param <VO> type of output values
+   * @param <InputT> type of input values
+   * @param <AccumT> type of mutable accumulator values
+   * @param <OutputT> type of output values
    */
-  public abstract static class CombineFn<VI, VA, VO> implements Serializable {
+  public abstract static class CombineFn<InputT, AccumT, OutputT> implements Serializable {
     /**
      * Returns a new, mutable accumulator value, representing the
      * accumulation of zero input values.
      */
-    public abstract VA createAccumulator();
+    public abstract AccumT createAccumulator();
 
     /**
      * Adds the given input value to the given accumulator, returning the
@@ -346,7 +346,7 @@ public abstract static class CombineFn<VI, VA, VO> implements Serializable {
      *
      * <P> For efficiency, the input accumulator may be modified and returned.
      */
-    public abstract VA addInput(VA accumulator, VI input);
+    public abstract AccumT addInput(AccumT accumulator, InputT input);
 
     /**
      * Returns an accumulator representing the accumulation of all the
@@ -356,13 +356,13 @@ public abstract static class CombineFn<VI, VA, VO> implements Serializable {
      * fresh accumulator, or may return one of the (modified) argument
      * accumulators.
      */
-    public abstract VA mergeAccumulators(Iterable<VA> accumulators);
+    public abstract AccumT mergeAccumulators(Iterable<AccumT> accumulators);
 
     /**
      * Returns the output value that is the result of combining all
      * the input values represented by the given accumulator.
      */
-    public abstract VO extractOutput(VA accumulator);
+    public abstract OutputT extractOutput(AccumT accumulator);
 
     /**
      * Applies this {@code CombineFn} to a collection of input values
@@ -371,53 +371,53 @@ public abstract static class CombineFn<VI, VA, VO> implements Serializable {
      * <p> Useful when testing the behavior of a {@code CombineFn}
      * separately from a {@code Combine} transform.
      */
-    public VO apply(Iterable<? extends VI> inputs) {
-      VA accum = createAccumulator();
-      for (VI input : inputs) {
+    public OutputT apply(Iterable<? extends InputT> inputs) {
+      AccumT accum = createAccumulator();
+      for (InputT input : inputs) {
         accum = addInput(accum, input);
       }
       return extractOutput(accum);
     }
 
     /**
-     * Returns the {@code Coder} to use for accumulator {@code VA}
+     * Returns the {@code Coder} to use for accumulator {@code AccumT}
      * values, or null if it is not able to be inferred.
      *
      * <p> By default, uses the knowledge of the {@code Coder} being used
-     * for {@code VI} values and the enclosing {@code Pipeline}'s
-     * {@code CoderRegistry} to try to infer the Coder for {@code VA}
+     * for {@code InputT} values and the enclosing {@code Pipeline}'s
+     * {@code CoderRegistry} to try to infer the Coder for {@code AccumT}
      * values.
      *
      * <p> This is the Coder used to send data through a communication-intensive
      * shuffle step, so a compact and efficient representation may have
      * significant performance benefits.
      */
-    public Coder<VA> getAccumulatorCoder(
-        CoderRegistry registry, Coder<VI> inputCoder) throws CannotProvideCoderException {
+    public Coder<AccumT> getAccumulatorCoder(
+        CoderRegistry registry, Coder<InputT> inputCoder) throws CannotProvideCoderException {
       return registry.getDefaultCoder(
           getClass(),
           CombineFn.class,
-          ImmutableMap.of("VI", inputCoder),
-          "VA");
+          ImmutableMap.of("InputT", inputCoder),
+          "AccumT");
     }
 
     /**
      * Returns the {@code Coder} to use by default for output
-     * {@code VO} values, or null if it is not able to be inferred.
+     * {@code OutputT} values, or null if it is not able to be inferred.
      *
      * <p> By default, uses the knowledge of the {@code Coder} being
-     * used for input {@code VI} values and the enclosing
+     * used for input {@code InputT} values and the enclosing
      * {@code Pipeline}'s {@code CoderRegistry} to try to infer the
-     * Coder for {@code VO} values.
+     * Coder for {@code OutputT} values.
      */
-    public Coder<VO> getDefaultOutputCoder(
-        CoderRegistry registry, Coder<VI> inputCoder) throws CannotProvideCoderException {
+    public Coder<OutputT> getDefaultOutputCoder(
+        CoderRegistry registry, Coder<InputT> inputCoder) throws CannotProvideCoderException {
       return registry.getDefaultCoder(
           getClass(),
           CombineFn.class,
-          ImmutableMap.of("VI", inputCoder,
-                          "VA", getAccumulatorCoder(registry, inputCoder)),
-          "VO");
+          ImmutableMap.of("InputT", inputCoder,
+                          "AccumT", getAccumulatorCoder(registry, inputCoder)),
+          "OutputT");
     }
 
     /**
@@ -428,39 +428,39 @@ public Coder<VO> getDefaultOutputCoder(
      * @param <K> the type of the (ignored) keys
      */
     @SuppressWarnings({"unchecked", "rawtypes"})
-    public <K> KeyedCombineFn<K, VI, VA, VO> asKeyedFn() {
+    public <K> KeyedCombineFn<K, InputT, AccumT, OutputT> asKeyedFn() {
       // The key, an object, is never even looked at.
-      return new KeyedCombineFn<K, VI, VA, VO>() {
+      return new KeyedCombineFn<K, InputT, AccumT, OutputT>() {
         @Override
-        public VA createAccumulator(K key) {
+        public AccumT createAccumulator(K key) {
           return CombineFn.this.createAccumulator();
         }
 
         @Override
-        public VA addInput(K key, VA accumulator, VI input) {
+        public AccumT addInput(K key, AccumT accumulator, InputT input) {
           return CombineFn.this.addInput(accumulator, input);
         }
 
         @Override
-        public VA mergeAccumulators(K key, Iterable<VA> accumulators) {
+        public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators) {
           return CombineFn.this.mergeAccumulators(accumulators);
         }
 
         @Override
-        public VO extractOutput(K key, VA accumulator) {
+        public OutputT extractOutput(K key, AccumT accumulator) {
           return CombineFn.this.extractOutput(accumulator);
         }
 
         @Override
-        public Coder<VA> getAccumulatorCoder(
-            CoderRegistry registry, Coder<K> keyCoder, Coder<VI> inputCoder)
+        public Coder<AccumT> getAccumulatorCoder(
+            CoderRegistry registry, Coder<K> keyCoder, Coder<InputT> inputCoder)
             throws CannotProvideCoderException {
           return CombineFn.this.getAccumulatorCoder(registry, inputCoder);
         }
 
         @Override
-        public Coder<VO> getDefaultOutputCoder(
-            CoderRegistry registry, Coder<K> keyCoder, Coder<VI> inputCoder)
+        public Coder<OutputT> getDefaultOutputCoder(
+            CoderRegistry registry, Coder<K> keyCoder, Coder<InputT> inputCoder)
             throws CannotProvideCoderException {
           return CombineFn.this.getDefaultOutputCoder(registry, inputCoder);
         }
@@ -850,55 +850,57 @@ private double[] wrap(double value) {
    * PCollection<Double> average = pc.apply(Combine.globally(new AverageFn()));
    * } </pre>
    *
-   * @param <VI> type of input values
-   * @param <VA> type of mutable accumulator values
-   * @param <VO> type of output values
+   * @param <InputT> type of input values
+   * @param <AccumT> type of mutable accumulator values
+   * @param <OutputT> type of output values
    */
-  public abstract static class AccumulatingCombineFn
-      <VI, VA extends AccumulatingCombineFn.Accumulator<VI, VA, VO>, VO>
-      extends CombineFn<VI, VA, VO> {
+  public abstract static class AccumulatingCombineFn<
+      InputT,
+      AccumT extends AccumulatingCombineFn.Accumulator<InputT, AccumT, OutputT>,
+      OutputT>
+      extends CombineFn<InputT, AccumT, OutputT> {
 
     /**
      * The type of mutable accumulator values used by this
      * {@code AccumulatingCombineFn}.
      */
-    public abstract static interface Accumulator<VI, VA, VO> {
+    public abstract static interface Accumulator<InputT, AccumT, OutputT> {
       /**
        * Adds the given input value to this accumulator, modifying
        * this accumulator.
        */
-      public abstract void addInput(VI input);
+      public abstract void addInput(InputT input);
 
       /**
        * Adds the input values represented by the given accumulator
        * into this accumulator.
        */
-      public abstract void mergeAccumulator(VA other);
+      public abstract void mergeAccumulator(AccumT other);
 
       /**
        * Returns the output value that is the result of combining all
        * the input values represented by this accumulator.
        */
-      public abstract VO extractOutput();
+      public abstract OutputT extractOutput();
     }
 
     @Override
-    public final VA addInput(VA accumulator, VI input) {
+    public final AccumT addInput(AccumT accumulator, InputT input) {
       accumulator.addInput(input);
       return accumulator;
     }
 
     @Override
-    public final VA mergeAccumulators(Iterable<VA> accumulators) {
-      VA accumulator = createAccumulator();
-      for (VA partial : accumulators) {
+    public final AccumT mergeAccumulators(Iterable<AccumT> accumulators) {
+      AccumT accumulator = createAccumulator();
+      for (AccumT partial : accumulators) {
         accumulator.mergeAccumulator(partial);
       }
       return accumulator;
     }
 
     @Override
-    public final VO extractOutput(VA accumulator) {
+    public final OutputT extractOutput(AccumT accumulator) {
       return accumulator.extractOutput();
     }
   }
@@ -908,41 +910,41 @@ public final VO extractOutput(VA accumulator) {
 
 
   /**
-   * A {@code KeyedCombineFn<K, VI, VA, VO>} specifies how to combine
-   * a collection of input values of type {@code VI}, associated with
+   * A {@code KeyedCombineFn<K, InputT, AccumT, OutputT>} specifies how to combine
+   * a collection of input values of type {@code InputT}, associated with
    * a key of type {@code K}, into a single output value of type
-   * {@code VO}.  It does this via one or more intermediate mutable
-   * accumulator values of type {@code VA}.
+   * {@code OutputT}.  It does this via one or more intermediate mutable
+   * accumulator values of type {@code AccumT}.
    *
    * <p> The overall process to combine a collection of input
-   * {@code VI} values associated with an input {@code K} key into a
-   * single output {@code VO} value is as follows:
+   * {@code InputT} values associated with an input {@code K} key into a
+   * single output {@code OutputT} value is as follows:
    *
    * <ol>
    *
-   * <li> The input {@code VI} values are partitioned into one or more
+   * <li> The input {@code InputT} values are partitioned into one or more
    * batches.
    *
    * <li> For each batch, the {@link #createAccumulator} operation is
    * invoked to create a fresh mutable accumulator value of type
-   * {@code VA}, initialized to represent the combination of zero
+   * {@code AccumT}, initialized to represent the combination of zero
    * values.
    *
-   * <li> For each input {@code VI} value in a batch, the
+   * <li> For each input {@code InputT} value in a batch, the
    * {@link #addInput} operation is invoked to add the value to that
-   * batch's accumulator {@code VA} value.  The accumulator may just
-   * record the new value (e.g., if {@code VA == List<VI>}, or may do
+   * batch's accumulator {@code AccumT} value.  The accumulator may just
+   * record the new value (e.g., if {@code AccumT == List<InputT>}, or may do
    * work to represent the combination more compactly.
    *
    * <li> The {@link #mergeAccumulators} operation is invoked to
-   * combine a collection of accumulator {@code VA} values into a
-   * single combined output accumulator {@code VA} value, once the
+   * combine a collection of accumulator {@code AccumT} values into a
+   * single combined output accumulator {@code AccumT} value, once the
    * merging accumulators have had all all the input values in their
    * batches added to them.  This operation is invoked repeatedly,
    * until there is only one accumulator value left.
    *
    * <li> The {@link #extractOutput} operation is invoked on the final
-   * accumulator {@code VA} value to get the output {@code VO} value.
+   * accumulator {@code AccumT} value to get the output {@code OutputT} value.
    *
    * </ol>
    *
@@ -988,11 +990,11 @@ public final VO extractOutput(VA accumulator) {
    * is ignored when breaking up input values into groups.
    *
    * @param <K> type of keys
-   * @param <VI> type of input values
-   * @param <VA> type of mutable accumulator values
-   * @param <VO> type of output values
+   * @param <InputT> type of input values
+   * @param <AccumT> type of mutable accumulator values
+   * @param <OutputT> type of output values
    */
-  public abstract static class KeyedCombineFn<K, VI, VA, VO>
+  public abstract static class KeyedCombineFn<K, InputT, AccumT, OutputT>
       implements Serializable {
     /**
      * Returns a new, mutable accumulator value representing the
@@ -1001,7 +1003,7 @@ public abstract static class KeyedCombineFn<K, VI, VA, VO>
      * @param key the key that all the accumulated values using the
      * accumulator are associated with
      */
-    public abstract VA createAccumulator(K key);
+    public abstract AccumT createAccumulator(K key);
 
     /**
      * Adds the given input value to the given accumulator,
@@ -1012,7 +1014,7 @@ public abstract static class KeyedCombineFn<K, VI, VA, VO>
      * @param key the key that all the accumulated values using the
      * accumulator are associated with
      */
-    public abstract VA addInput(K key, VA accumulator, VI value);
+    public abstract AccumT addInput(K key, AccumT accumulator, InputT value);
 
     /**
      * Returns an accumulator representing the accumulation of all the
@@ -1025,7 +1027,7 @@ public abstract static class KeyedCombineFn<K, VI, VA, VO>
      * @param key the key that all the accumulators are associated
      * with
      */
-    public abstract VA mergeAccumulators(K key, Iterable<VA> accumulators);
+    public abstract AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators);
 
     /**
      * Returns the output value that is the result of combining all
@@ -1034,7 +1036,7 @@ public abstract static class KeyedCombineFn<K, VI, VA, VO>
      * @param key the key that all the accumulated values using the
      * accumulator are associated with
      */
-    public abstract VO extractOutput(K key, VA accumulator);
+    public abstract OutputT extractOutput(K key, AccumT accumulator);
 
     /**
      * Applies this {@code KeyedCombineFn} to a key and a collection
@@ -1043,57 +1045,57 @@ public abstract static class KeyedCombineFn<K, VI, VA, VO>
      * <p> Useful when testing the behavior of a {@code KeyedCombineFn}
      * separately from a {@code Combine} transform.
      */
-    public VO apply(K key, Iterable<? extends VI> inputs) {
-      VA accum = createAccumulator(key);
-      for (VI input : inputs) {
+    public OutputT apply(K key, Iterable<? extends InputT> inputs) {
+      AccumT accum = createAccumulator(key);
+      for (InputT input : inputs) {
         accum = addInput(key, accum, input);
       }
       return extractOutput(key, accum);
     }
 
     /**
-     * Returns the {@code Coder} to use for accumulator {@code VA}
+     * Returns the {@code Coder} to use for accumulator {@code AccumT}
      * values, or null if it is not able to be inferred.
      *
      * <p> By default, uses the knowledge of the {@code Coder} being
-     * used for {@code K} keys and input {@code VI} values and the
+     * used for {@code K} keys and input {@code InputT} values and the
      * enclosing {@code Pipeline}'s {@code CoderRegistry} to try to
-     * infer the Coder for {@code VA} values.
+     * infer the Coder for {@code AccumT} values.
      *
      * <p> This is the Coder used to send data through a communication-intensive
      * shuffle step, so a compact and efficient representation may have
      * significant performance benefits.
      */
-    public Coder<VA> getAccumulatorCoder(
-        CoderRegistry registry, Coder<K> keyCoder, Coder<VI> inputCoder)
+    public Coder<AccumT> getAccumulatorCoder(
+        CoderRegistry registry, Coder<K> keyCoder, Coder<InputT> inputCoder)
         throws CannotProvideCoderException {
       return registry.getDefaultCoder(
           getClass(),
           KeyedCombineFn.class,
-          ImmutableMap.of("K", keyCoder, "VI", inputCoder),
-          "VA");
+          ImmutableMap.of("K", keyCoder, "InputT", inputCoder),
+          "AccumT");
     }
 
     /**
      * Returns the {@code Coder} to use by default for output
-     * {@code VO} values, or null if it is not able to be inferred.
+     * {@code OutputT} values, or null if it is not able to be inferred.
      *
      * <p> By default, uses the knowledge of the {@code Coder} being
-     * used for {@code K} keys and input {@code VI} values and the
+     * used for {@code K} keys and input {@code InputT} values and the
      * enclosing {@code Pipeline}'s {@code CoderRegistry} to try to
-     * infer the Coder for {@code VO} values.
+     * infer the Coder for {@code OutputT} values.
      */
-    public Coder<VO> getDefaultOutputCoder(
-        CoderRegistry registry, Coder<K> keyCoder, Coder<VI> inputCoder)
+    public Coder<OutputT> getDefaultOutputCoder(
+        CoderRegistry registry, Coder<K> keyCoder, Coder<InputT> inputCoder)
         throws CannotProvideCoderException {
       return registry.getDefaultCoder(
           getClass(),
           KeyedCombineFn.class,
           ImmutableMap.of(
               "K", keyCoder,
-              "VI", inputCoder,
-              "VA", getAccumulatorCoder(registry, keyCoder, inputCoder)),
-          "VO");
+              "InputT", inputCoder,
+              "AccumT", getAccumulatorCoder(registry, keyCoder, inputCoder)),
+          "OutputT");
     }
   }
 
@@ -1101,11 +1103,11 @@ public Coder<VO> getDefaultOutputCoder(
   ////////////////////////////////////////////////////////////////////////////
 
   /**
-   * {@code Combine.Globally<VI, VO>} takes a {@code PCollection<VI>}
-   * and returns a {@code PCollection<VO>} whose elements are the result of
+   * {@code Combine.Globally<InputT, OutputT>} takes a {@code PCollection<InputT>}
+   * and returns a {@code PCollection<OutputT>} whose elements are the result of
    * combining all the elements in each window of the input {@code PCollection},
-   * using a specified {@link CombineFn CombineFn<VI, VA, VO>}.  It is common
-   * for {@code VI == VO}, but not required.  Common combining
+   * using a specified {@link CombineFn CombineFn<InputT, AccumT, OutputT>}.  It is common
+   * for {@code InputT == OutputT}, but not required.  Common combining
    * functions include sums, mins, maxes, and averages of numbers,
    * conjunctions and disjunctions of booleans, statistical
    * aggregations, etc.
@@ -1127,42 +1129,41 @@ public Coder<VO> getDefaultOutputCoder(
    * {@code PCollection} is empty.  To use this with inputs with other windowing,
    * either {@link #withoutDefaults} or {@link #asSingletonView} must be called.
    *
-   * <p> By default, the {@code Coder} of the output {@code PValue<VO>}
+   * <p> By default, the {@code Coder} of the output {@code PValue<OutputT>}
    * is inferred from the concrete type of the
-   * {@code CombineFn<VI, VA, VO>}'s output type {@code VO}.
+   * {@code CombineFn<InputT, AccumT, OutputT>}'s output type {@code OutputT}.
    *
    * <p> See also {@link #perKey}/{@link PerKey Combine.PerKey} and
    * {@link #groupedValues}/{@link GroupedValues Combine.GroupedValues}, which
    * are useful for combining values associated with each key in
    * a {@code PCollection} of {@code KV}s.
    *
-   * @param <VI> type of input values
-   * @param <VO> type of output values
+   * @param <InputT> type of input values
+   * @param <OutputT> type of output values
    */
-  public static class Globally<VI, VO>
-      extends PTransform<PCollection<VI>, PCollection<VO>> {
+  public static class Globally<InputT, OutputT>
+      extends PTransform<PCollection<InputT>, PCollection<OutputT>> {
 
-    private final CombineFn<? super VI, ?, VO> fn;
+    private final CombineFn<? super InputT, ?, OutputT> fn;
     private final boolean insertDefault;
     private final int fanout;
 
-    private Globally(CombineFn<? super VI, ?, VO> fn, boolean insertDefault, int fanout) {
+    private Globally(CombineFn<? super InputT, ?, OutputT> fn, boolean insertDefault, int fanout) {
       this.fn = fn;
       this.insertDefault = insertDefault;
       this.fanout = fanout;
     }
 
     @Override
-    public Globally<VI, VO> setName(String name) {
+    public Globally<InputT, OutputT> setName(String name) {
       super.setName(name);
       return this;
     }
 
     @Override
     @Deprecated
-    public Globally<VI, VO> withName(String name) {
-      super.setName(name);
-      return this;
+    public Globally<InputT, OutputT> withName(String name) {
+      return setName(name);
     }
 
     /**
@@ -1172,7 +1173,7 @@ public Globally<VI, VO> withName(String name) {
      * for a window that is not present, the result of calling the {@code CombineFn}
      * on empty input will returned.
      */
-    public GloballyAsSingletonView<VI, VO> asSingletonView() {
+    public GloballyAsSingletonView<InputT, OutputT> asSingletonView() {
       return new GloballyAsSingletonView<>(fn, insertDefault, fanout);
     }
 
@@ -1180,7 +1181,7 @@ public GloballyAsSingletonView<VI, VO> asSingletonView() {
      * Returns a {@link PTransform} identical to this, but that does not attempt to
      * provide a default value in the case of empty input.
      */
-    public Globally<VI, VO> withoutDefaults() {
+    public Globally<InputT, OutputT> withoutDefaults() {
       return new Globally<>(fn, false, fanout);
     }
 
@@ -1191,25 +1192,25 @@ public Globally<VI, VO> withoutDefaults() {
      * <p> The {@code fanout} parameter determines the number of intermediate keys
      * that will be used.
      */
-    public Globally<VI, VO> withFanout(int fanout) {
+    public Globally<InputT, OutputT> withFanout(int fanout) {
       return new Globally<>(fn, insertDefault, fanout);
     }
 
     @Override
-    public PCollection<VO> apply(PCollection<VI> input) {
-      PCollection<KV<Void, VI>> withKeys = input
-          .apply(WithKeys.<Void, VI>of((Void) null))
+    public PCollection<OutputT> apply(PCollection<InputT> input) {
+      PCollection<KV<Void, InputT>> withKeys = input
+          .apply(WithKeys.<Void, InputT>of((Void) null))
           .setCoder(KvCoder.of(VoidCoder.of(), input.getCoder()));
 
-      PCollection<KV<Void, VO>> combined;
+      PCollection<KV<Void, OutputT>> combined;
       if (fanout >= 2) {
         combined = withKeys.apply(
-            Combine.<Void, VI, VO>fewKeys(fn.<Void>asKeyedFn()).withHotKeyFanout(fanout));
+            Combine.<Void, InputT, OutputT>fewKeys(fn.<Void>asKeyedFn()).withHotKeyFanout(fanout));
       } else {
-        combined = withKeys.apply(Combine.<Void, VI, VO>fewKeys(fn.<Void>asKeyedFn()));
+        combined = withKeys.apply(Combine.<Void, InputT, OutputT>fewKeys(fn.<Void>asKeyedFn()));
       }
 
-      PCollection<VO> output = combined.apply(Values.<VO>create());
+      PCollection<OutputT> output = combined.apply(Values.<OutputT>create());
 
       if (insertDefault) {
         if (!output.getWindowingStrategy().getWindowFn().isCompatible(new GlobalWindows())) {
@@ -1224,20 +1225,20 @@ public PCollection<VO> apply(PCollection<VI> input) {
       }
     }
 
-    private PCollection<VO> insertDefaultValueIfEmpty(PCollection<VO> maybeEmpty) {
-      final PCollectionView<Iterable<VO>> maybeEmptyView = maybeEmpty.apply(
-          View.<VO>asIterable());
+    private PCollection<OutputT> insertDefaultValueIfEmpty(PCollection<OutputT> maybeEmpty) {
+      final PCollectionView<Iterable<OutputT>> maybeEmptyView = maybeEmpty.apply(
+          View.<OutputT>asIterable());
       return maybeEmpty.getPipeline()
           .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
           .apply(ParDo.of(
-              new DoFn<Void, VO>() {
+              new DoFn<Void, OutputT>() {
                 @Override
-                public void processElement(DoFn<Void, VO>.ProcessContext c) {
-                  Iterator<VO> combined = c.sideInput(maybeEmptyView).iterator();
+                public void processElement(DoFn<Void, OutputT>.ProcessContext c) {
+                  Iterator<OutputT> combined = c.sideInput(maybeEmptyView).iterator();
                   if (combined.hasNext()) {
                     c.output(combined.next());
                   } else {
-                    c.output(fn.apply(Collections.<VI>emptyList()));
+                    c.output(fn.apply(Collections.<InputT>emptyList()));
                   }
                 }
               }).withSideInputs(maybeEmptyView))
@@ -1251,11 +1252,11 @@ protected String getKindString() {
   }
 
   /**
-   * {@code Combine.GloballyAsSingletonView<VI, VO>} takes a {@code PCollection<VI>}
-   * and returns a {@code PCollectionView<VO>} whose elements are the result of
+   * {@code Combine.GloballyAsSingletonView<InputT, OutputT>} takes a {@code PCollection<InputT>}
+   * and returns a {@code PCollectionView<OutputT>} whose elements are the result of
    * combining all the elements in each window of the input {@code PCollection},
-   * using a specified {@link CombineFn CombineFn<VI, VA, VO>}. It is common for
-   * {@code VI == VO}, but not required. Common combining
+   * using a specified {@link CombineFn CombineFn<InputT, AccumT, OutputT>}. It is common for
+   * {@code InputT == OutputT}, but not required. Common combining
    * functions include sums, mins, maxes, and averages of numbers,
    * conjunctions and disjunctions of booleans, statistical
    * aggregations, etc.
@@ -1277,54 +1278,54 @@ protected String getKindString() {
    * on empty input will returned. If {@code insertDefault} is false, an
    * exception will be thrown instead.
    *
-   * <p> By default, the {@code Coder} of the output {@code PValue<VO>}
+   * <p> By default, the {@code Coder} of the output {@code PValue<OutputT>}
    * is inferred from the concrete type of the
-   * {@code CombineFn<VI, VA, VO>}'s output type {@code VO}.
+   * {@code CombineFn<InputT, AccumT, OutputT>}'s output type {@code OutputT}.
    *
    * <p> See also {@link #perKey}/{@link PerKey Combine.PerKey} and
    * {@link #groupedValues}/{@link GroupedValues Combine.GroupedValues}, which
    * are useful for combining values associated with each key in
    * a {@code PCollection} of {@code KV}s.
    *
-   * @param <VI> type of input values
-   * @param <VO> type of output values
+   * @param <InputT> type of input values
+   * @param <OutputT> type of output values
    */
-  public static class GloballyAsSingletonView<VI, VO>
-      extends PTransform<PCollection<VI>, PCollectionView<VO>> {
+  public static class GloballyAsSingletonView<InputT, OutputT>
+      extends PTransform<PCollection<InputT>, PCollectionView<OutputT>> {
 
-    private final CombineFn<? super VI, ?, VO> fn;
+    private final CombineFn<? super InputT, ?, OutputT> fn;
     private final boolean insertDefault;
     private final int fanout;
 
     private GloballyAsSingletonView(
-        CombineFn<? super VI, ?, VO> fn, boolean insertDefault, int fanout) {
+        CombineFn<? super InputT, ?, OutputT> fn, boolean insertDefault, int fanout) {
       this.fn = fn;
       this.insertDefault = insertDefault;
       this.fanout = fanout;
     }
 
     @Override
-    public GloballyAsSingletonView<VI, VO> setName(String name) {
+    public GloballyAsSingletonView<InputT, OutputT> setName(String name) {
       super.setName(name);
       return this;
     }
 
     @Override
     @Deprecated
-    public GloballyAsSingletonView<VI, VO> withName(String name) {
+    public GloballyAsSingletonView<InputT, OutputT> withName(String name) {
       return setName(name);
     }
 
     @Override
-    public PCollectionView<VO> apply(PCollection<VI> input) {
-      PCollection<VO> combined = input
+    public PCollectionView<OutputT> apply(PCollection<InputT> input) {
+      PCollection<OutputT> combined = input
           .apply(Combine.globally(fn).withoutDefaults().withFanout(fanout));
       if (insertDefault) {
         return combined
-            .apply(View.<VO>asSingleton().withDefaultValue(
-                fn.apply(Collections.<VI>emptyList())));
+            .apply(View.<OutputT>asSingleton().withDefaultValue(
+                fn.apply(Collections.<InputT>emptyList())));
       } else {
-        return combined.apply(View.<VO>asSingleton());
+        return combined.apply(View.<OutputT>asSingleton());
       }
     }
 
@@ -1402,13 +1403,13 @@ private List<V> mergeToSingleton(Iterable<V> values) {
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * {@code PerKey<K, VI, VO>} takes a
-   * {@code PCollection<KV<K, VI>>}, groups it by key, applies a
-   * combining function to the {@code VI} values associated with each
-   * key to produce a combined {@code VO} value, and returns a
-   * {@code PCollection<KV<K, VO>>} representing a map from each
+   * {@code PerKey<K, InputT, OutputT>} takes a
+   * {@code PCollection<KV<K, InputT>>}, groups it by key, applies a
+   * combining function to the {@code InputT} values associated with each
+   * key to produce a combined {@code OutputT} value, and returns a
+   * {@code PCollection<KV<K, OutputT>>} representing a map from each
    * distinct key of the input {@code PCollection} to the corresponding
-   * combined value.  {@code VI} and {@code VO} are often the same.
+   * combined value.  {@code InputT} and {@code OutputT} are often the same.
    *
    * <p> This is a concise shorthand for an application of
    * {@link GroupByKey} followed by an application of
@@ -1432,17 +1433,17 @@ private List<V> mergeToSingleton(Iterable<V> values) {
    *
    * @param <K> the type of the keys of the input and output
    * {@code PCollection}s
-   * @param <VI> the type of the values of the input {@code PCollection}
-   * @param <VO> the type of the values of the output {@code PCollection}
+   * @param <InputT> the type of the values of the input {@code PCollection}
+   * @param <OutputT> the type of the values of the output {@code PCollection}
    */
-  public static class PerKey<K, VI, VO>
-    extends PTransform<PCollection<KV<K, VI>>, PCollection<KV<K, VO>>> {
+  public static class PerKey<K, InputT, OutputT>
+    extends PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> {
 
-    private final transient KeyedCombineFn<? super K, ? super VI, ?, VO> fn;
+    private final transient KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn;
     private final boolean fewKeys;
 
     private PerKey(
-        KeyedCombineFn<? super K, ? super VI, ?, VO> fn,
+        KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn,
         boolean fewKeys) {
       this.fn = fn;
       this.fewKeys = fewKeys;
@@ -1459,16 +1460,16 @@ private PerKey(
      * If N is less than or equal to 1, this key will not be sent through an
      * intermediate node.
      */
-    public PerKeyWithHotKeyFanout<K, VI, VO> withHotKeyFanout(
+    public PerKeyWithHotKeyFanout<K, InputT, OutputT> withHotKeyFanout(
         SerializableFunction<? super K, Integer> hotKeyFanout) {
-      return new PerKeyWithHotKeyFanout<K, VI, VO>(fn, hotKeyFanout).withName(name);
+      return new PerKeyWithHotKeyFanout<K, InputT, OutputT>(fn, hotKeyFanout).withName(name);
     }
 
     /**
      * Like {@link #withHotKeyFanout(SerializableFunction)}, but returning the given
      * constant value for every key.
      */
-    public PerKeyWithHotKeyFanout<K, VI, VO> withHotKeyFanout(final int hotKeyFanout) {
+    public PerKeyWithHotKeyFanout<K, InputT, OutputT> withHotKeyFanout(final int hotKeyFanout) {
       return withHotKeyFanout(
           new SerializableFunction<K, Integer>(){
             @Override
@@ -1479,29 +1480,29 @@ public Integer apply(K unused) {
     }
 
     @Override
-    public PerKey<K, VI, VO> setName(String name) {
+    public PerKey<K, InputT, OutputT> setName(String name) {
       super.setName(name);
       return this;
     }
 
     @Override
     @Deprecated
-    public PerKey<K, VI, VO> withName(String name) {
+    public PerKey<K, InputT, OutputT> withName(String name) {
       return setName(name);
     }
 
     /**
      * Returns the KeyedCombineFn used by this Combine operation.
      */
-    public KeyedCombineFn<? super K, ? super VI, ?, VO> getFn() {
+    public KeyedCombineFn<? super K, ? super InputT, ?, OutputT> getFn() {
       return fn;
     }
 
     @Override
-    public PCollection<KV<K, VO>> apply(PCollection<KV<K, VI>> input) {
+    public PCollection<KV<K, OutputT>> apply(PCollection<KV<K, InputT>> input) {
       return input
-        .apply(GroupByKey.<K, VI>create(fewKeys))
-        .apply(Combine.<K, VI, VO>groupedValues(fn));
+        .apply(GroupByKey.<K, InputT>create(fewKeys))
+        .apply(Combine.<K, InputT, OutputT>groupedValues(fn));
     }
 
     @Override
@@ -1513,14 +1514,14 @@ protected String getKindString() {
   /**
    * Like {@link PerKey}, but sharding the combining of hot keys.
    */
-  public static class PerKeyWithHotKeyFanout<K, VI, VO>
-      extends PTransform<PCollection<KV<K, VI>>, PCollection<KV<K, VO>>> {
+  public static class PerKeyWithHotKeyFanout<K, InputT, OutputT>
+      extends PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> {
 
-    private final transient KeyedCombineFn<? super K, ? super VI, ?, VO> fn;
+    private final transient KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn;
     private final SerializableFunction<? super K, Integer> hotKeyFanout;
 
     private PerKeyWithHotKeyFanout(
-        KeyedCombineFn<? super K, ? super VI, ?, VO> fn,
+        KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn,
         SerializableFunction<? super K, Integer> hotKeyFanout) {
       this.fn = fn;
       this.hotKeyFanout = hotKeyFanout;
@@ -1528,47 +1529,49 @@ private PerKeyWithHotKeyFanout(
 
     @Override
     @SuppressWarnings("unchecked")
-    public PerKeyWithHotKeyFanout<K, VI, VO> withName(String name) {
-      return (PerKeyWithHotKeyFanout<K, VI, VO>) super.withName(name);
+    public PerKeyWithHotKeyFanout<K, InputT, OutputT> withName(String name) {
+      return (PerKeyWithHotKeyFanout<K, InputT, OutputT>) super.withName(name);
     }
 
     @Override
-    public PCollection<KV<K, VO>> apply(PCollection<KV<K, VI>> input) {
+    public PCollection<KV<K, OutputT>> apply(PCollection<KV<K, InputT>> input) {
       return applyHelper(input);
     }
 
-    private <VA> PCollection<KV<K, VO>> applyHelper(PCollection<KV<K, VI>> input) {
+    private <AccumT> PCollection<KV<K, OutputT>> applyHelper(PCollection<KV<K, InputT>> input) {
       // Name the accumulator type.
       @SuppressWarnings("unchecked")
-      final KeyedCombineFn<K, VI, VA, VO> fn = (KeyedCombineFn<K, VI, VA, VO>) this.fn;
+      final KeyedCombineFn<K, InputT, AccumT, OutputT> fn =
+          (KeyedCombineFn<K, InputT, AccumT, OutputT>) this.fn;
 
       // A CombineFn's mergeAccumulator can be applied in a tree-like fashon.
       // Here we shard the key using an integer nonce, combine on that partial
       // set of values, then drop the nonce and do a final combine of the
       // aggregates.  We do this by splitting the original CombineFn into two,
       // on that does addInput + merge and another that does merge + extract.
-      KeyedCombineFn<KV<K, Integer>, VI, VA, VA> hotPreCombine =
-          new KeyedCombineFn<KV<K, Integer>, VI, VA, VA>() {
+      KeyedCombineFn<KV<K, Integer>, InputT, AccumT, AccumT> hotPreCombine =
+          new KeyedCombineFn<KV<K, Integer>, InputT, AccumT, AccumT>() {
             @Override
-            public VA createAccumulator(KV<K, Integer> key) {
+            public AccumT createAccumulator(KV<K, Integer> key) {
               return fn.createAccumulator(key.getKey());
             }
             @Override
-            public VA addInput(KV<K, Integer> key, VA accumulator, VI value) {
+            public AccumT addInput(KV<K, Integer> key, AccumT accumulator, InputT value) {
               return fn.addInput(key.getKey(), accumulator, value);
             }
             @Override
-            public VA mergeAccumulators(KV<K, Integer> key, Iterable<VA> accumulators) {
+            public AccumT mergeAccumulators(
+                KV<K, Integer> key, Iterable<AccumT> accumulators) {
               return fn.mergeAccumulators(key.getKey(), accumulators);
             }
             @Override
-            public VA extractOutput(KV<K, Integer> key, VA accumulator) {
+            public AccumT extractOutput(KV<K, Integer> key, AccumT accumulator) {
               return accumulator;
             }
             @Override
             @SuppressWarnings("unchecked")
-            public Coder<VA> getAccumulatorCoder(
-                CoderRegistry registry, Coder<KV<K, Integer>> keyCoder, Coder<VI> inputCoder)
+            public Coder<AccumT> getAccumulatorCoder(
+                CoderRegistry registry, Coder<KV<K, Integer>> keyCoder, Coder<InputT> inputCoder)
                 throws CannotProvideCoderException {
               return fn.getAccumulatorCoder(
                   registry, ((KvCoder<K, Integer>) keyCoder).getKeyCoder(), inputCoder);
@@ -1576,29 +1579,29 @@ public Coder<VA> getAccumulatorCoder(
       };
 
       @SuppressWarnings("unchecked")
-      final KvCoder<K, VI> inputCoder = ((KvCoder<K, VI>) input.getCoder());
-      KeyedCombineFn<K, VA, VA, VO> hotPostCombine =
-          new KeyedCombineFn<K, VA, VA, VO>() {
+      final KvCoder<K, InputT> inputCoder = ((KvCoder<K, InputT>) input.getCoder());
+      KeyedCombineFn<K, AccumT, AccumT, OutputT> hotPostCombine =
+          new KeyedCombineFn<K, AccumT, AccumT, OutputT>() {
             @Override
-            public VA createAccumulator(K key) {
+            public AccumT createAccumulator(K key) {
               return fn.createAccumulator(key);
             }
             @Override
-            public VA addInput(K key, VA accumulator, VA value) {
+            public AccumT addInput(K key, AccumT accumulator, AccumT value) {
               return fn.mergeAccumulators(
                   key, ImmutableList.of(accumulator, value));
             }
             @Override
-            public VA mergeAccumulators(K key, Iterable<VA> accumulators) {
+            public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators) {
               return fn.mergeAccumulators(key, accumulators);
             }
             @Override
-            public VO extractOutput(K key, VA accumulator) {
+            public OutputT extractOutput(K key, AccumT accumulator) {
               return fn.extractOutput(key, accumulator);
             }
             @Override
-            public Coder<VO> getDefaultOutputCoder(
-                CoderRegistry registry, Coder<K> keyCoder, Coder<VA> accumulatorCoder)
+            public Coder<OutputT> getDefaultOutputCoder(
+                CoderRegistry registry, Coder<K> keyCoder, Coder<AccumT> accumulatorCoder)
                 throws CannotProvideCoderException {
               return fn.getDefaultOutputCoder(registry, keyCoder, inputCoder.getValueCoder());
             }
@@ -1606,11 +1609,11 @@ public Coder<VO> getDefaultOutputCoder(
 
       // Use the provided hotKeyFanout fn to split into "hot" and "cold" keys,
       // augmenting the hot keys with a nonce.
-      final TupleTag<KV<KV<K, Integer>, VI>> hot = new TupleTag<>();
-      final TupleTag<KV<K, VI>> cold = new TupleTag<>();
+      final TupleTag<KV<KV<K, Integer>, InputT>> hot = new TupleTag<>();
+      final TupleTag<KV<K, InputT>> cold = new TupleTag<>();
       PCollectionTuple split = input.apply(
           ParDo.of(
-              new DoFn<KV<K, VI>, KV<K, VI>>() {
+              new DoFn<KV<K, InputT>, KV<K, InputT>>() {
                 transient int counter;
                 @Override
                 public void startBundle(Context c) {
@@ -1620,7 +1623,7 @@ public void startBundle(Context c) {
 
                 @Override
                 public void processElement(ProcessContext c) {
-                  KV<K, VI> kv = c.element();
+                  KV<K, InputT> kv = c.element();
                   int spread = hotKeyFanout.apply(kv.getKey());
                   if (spread <= 1) {
                     c.output(kv);
@@ -1634,28 +1637,28 @@ public void processElement(ProcessContext c) {
           .withName("AddNonce"));
 
       // Combine the hot and cold keys separately.
-      PCollection<KV<K, VO>> combinedHot = split
+      PCollection<KV<K, OutputT>> combinedHot = split
           .get(hot)
           .setCoder(KvCoder.of(KvCoder.of(inputCoder.getKeyCoder(), VarIntCoder.of()),
                                inputCoder.getValueCoder()))
           .apply(Combine.perKey(hotPreCombine).withName("PreCombineHot"))
           .apply(ParDo.of(
-              new DoFn<KV<KV<K, Integer>, VA>, KV<K, VA>>() {
+              new DoFn<KV<KV<K, Integer>, AccumT>, KV<K, AccumT>>() {
                 @Override
                 public void processElement(ProcessContext c) {
                   c.output(KV.of(c.element().getKey().getKey(), c.element().getValue()));
                 }
               }).withName("StripNonce"))
-          .apply(Window.<KV<K, VA>>remerge())
+          .apply(Window.<KV<K, AccumT>>remerge())
           .apply(Combine.perKey(hotPostCombine).withName("PostCombineHot"));
-      PCollection<KV<K, VO>> combinedCold = split
+      PCollection<KV<K, OutputT>> combinedCold = split
           .get(cold)
           .setCoder(inputCoder)
           .apply(Combine.perKey(fn).withName("CombineCold"));
 
       // Return the union of the hot and cold key results.
       return PCollectionList.of(combinedHot).and(combinedCold)
-          .apply(Flatten.<KV<K, VO>>pCollections());
+          .apply(Flatten.<KV<K, OutputT>>pCollections());
     }
 
     @Override
@@ -1668,14 +1671,14 @@ protected String getKindString() {
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * {@code GroupedValues<K, VI, VO>} takes a
-   * {@code PCollection<KV<K, Iterable<VI>>>}, such as the result of
+   * {@code GroupedValues<K, InputT, OutputT>} takes a
+   * {@code PCollection<KV<K, Iterable<InputT>>>}, such as the result of
    * {@link GroupByKey}, applies a specified
-   * {@link KeyedCombineFn KeyedCombineFn&lt;K, VI, VA, VO&gt;}
-   * to each of the input {@code KV<K, Iterable<VI>>} elements to
-   * produce a combined output {@code KV<K, VO>} element, and returns a
-   * {@code PCollection<KV<K, VO>>} containing all the combined output
-   * elements.  It is common for {@code VI == VO}, but not required.
+   * {@link KeyedCombineFn KeyedCombineFn&lt;K, InputT, AccumT, OutputT&gt;}
+   * to each of the input {@code KV<K, Iterable<InputT>>} elements to
+   * produce a combined output {@code KV<K, OutputT>} element, and returns a
+   * {@code PCollection<KV<K, OutputT>>} containing all the combined output
+   * elements.  It is common for {@code InputT == OutputT}, but not required.
    * Common combining functions include sums, mins, maxes, and averages
    * of numbers, conjunctions and disjunctions of booleans, statistical
    * aggregations, etc.
@@ -1695,18 +1698,18 @@ protected String getKindString() {
    * single easy-to-use {@code PTransform}.
    *
    * <p> Combining for different keys can happen in parallel.  Moreover,
-   * combining of the {@code Iterable<VI>} values associated a single
+   * combining of the {@code Iterable<InputT>} values associated a single
    * key can happen in parallel, with different subsets of the values
    * being combined separately, and their intermediate results combined
    * further, in an arbitrary tree reduction pattern, until a single
    * result value is produced for each key.
    *
    * <p> By default, the {@code Coder} of the keys of the output
-   * {@code PCollection<KV<K, VO>>} is that of the keys of the input
-   * {@code PCollection<KV<K, VI>>}, and the {@code Coder} of the values
-   * of the output {@code PCollection<KV<K, VO>>} is inferred from the
-   * concrete type of the {@code KeyedCombineFn<K, VI, VA, VO>}'s output
-   * type {@code VO}.
+   * {@code PCollection<KV<K, OutputT>>} is that of the keys of the input
+   * {@code PCollection<KV<K, InputT>>}, and the {@code Coder} of the values
+   * of the output {@code PCollection<KV<K, OutputT>>} is inferred from the
+   * concrete type of the {@code KeyedCombineFn<K, InputT, AccumT, OutputT>}'s output
+   * type {@code OutputT}.
    *
    * <p> Each output element has the same timestamp and is in the same window
    * as its corresponding input element, and the output
@@ -1719,33 +1722,33 @@ protected String getKindString() {
    * single value in a {@code PCollection}.
    *
    * @param <K> type of input and output keys
-   * @param <VI> type of input values
-   * @param <VO> type of output values
+   * @param <InputT> type of input values
+   * @param <OutputT> type of output values
    */
-  public static class GroupedValues<K, VI, VO>
+  public static class GroupedValues<K, InputT, OutputT>
       extends PTransform
-                        <PCollection<? extends KV<K, ? extends Iterable<VI>>>,
-                         PCollection<KV<K, VO>>> {
+                        <PCollection<? extends KV<K, ? extends Iterable<InputT>>>,
+                         PCollection<KV<K, OutputT>>> {
 
-    private final KeyedCombineFn<? super K, ? super VI, ?, VO> fn;
+    private final KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn;
 
-    private GroupedValues(KeyedCombineFn<? super K, ? super VI, ?, VO> fn) {
+    private GroupedValues(KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
       this.fn = fn;
     }
 
     /**
      * Returns the KeyedCombineFn used by this Combine operation.
      */
-    public KeyedCombineFn<? super K, ? super VI, ?, VO> getFn() {
+    public KeyedCombineFn<? super K, ? super InputT, ?, OutputT> getFn() {
       return fn;
     }
 
     @Override
-    public PCollection<KV<K, VO>> apply(
-        PCollection<? extends KV<K, ? extends Iterable<VI>>> input) {
+    public PCollection<KV<K, OutputT>> apply(
+        PCollection<? extends KV<K, ? extends Iterable<InputT>>> input) {
 
-      PCollection<KV<K, VO>> output = input.apply(ParDo.of(
-          new DoFn<KV<K, ? extends Iterable<VI>>, KV<K, VO>>() {
+      PCollection<KV<K, OutputT>> output = input.apply(ParDo.of(
+          new DoFn<KV<K, ? extends Iterable<InputT>>, KV<K, OutputT>>() {
             @Override
             public void processElement(ProcessContext c) {
               K key = c.element().getKey();
@@ -1754,7 +1757,7 @@ public void processElement(ProcessContext c) {
           }));
 
       try {
-        Coder<KV<K, VO>> outputCoder = getDefaultOutputCoder(input);
+        Coder<KV<K, OutputT>> outputCoder = getDefaultOutputCoder(input);
         output.setCoder(outputCoder);
       } catch (CannotProvideCoderException exc) {
         // let coder inference happen later, if it can
@@ -1763,10 +1766,10 @@ public void processElement(ProcessContext c) {
       return output;
     }
 
-    private KvCoder<K, VI> getKvCoder(
-        Coder<? extends KV<K, ? extends Iterable<VI>>> inputCoder) {
+    private KvCoder<K, InputT> getKvCoder(
+        Coder<? extends KV<K, ? extends Iterable<InputT>>> inputCoder) {
       /*
-      Coder<? extends KV<K, ? extends Iterable<VI>>> inputCoder =
+      Coder<? extends KV<K, ? extends Iterable<InputT>>> inputCoder =
           getInput().getCoder();
           */
       if (!(inputCoder instanceof KvCoder)) {
@@ -1774,37 +1777,37 @@ private KvCoder<K, VI> getKvCoder(
             "Combine.GroupedValues requires its input to use KvCoder");
       }
       @SuppressWarnings({"unchecked", "rawtypes"})
-      KvCoder<K, ? extends Iterable<VI>> kvCoder = (KvCoder) inputCoder;
+      KvCoder<K, ? extends Iterable<InputT>> kvCoder = (KvCoder) inputCoder;
       Coder<K> keyCoder = kvCoder.getKeyCoder();
-      Coder<? extends Iterable<VI>> kvValueCoder = kvCoder.getValueCoder();
+      Coder<? extends Iterable<InputT>> kvValueCoder = kvCoder.getValueCoder();
       if (!(kvValueCoder instanceof IterableCoder)) {
         throw new IllegalStateException(
             "Combine.GroupedValues requires its input values to use "
             + "IterableCoder");
       }
       @SuppressWarnings("unchecked")
-      IterableCoder<VI> inputValuesCoder = (IterableCoder<VI>) kvValueCoder;
-      Coder<VI> inputValueCoder = inputValuesCoder.getElemCoder();
+      IterableCoder<InputT> inputValuesCoder = (IterableCoder<InputT>) kvValueCoder;
+      Coder<InputT> inputValueCoder = inputValuesCoder.getElemCoder();
       return KvCoder.of(keyCoder, inputValueCoder);
     }
 
     @SuppressWarnings("unchecked")
     public Coder<?> getAccumulatorCoder(
         CoderRegistry coderRegistry,
-        PCollection<? extends KV<K, ? extends Iterable<VI>>> input)
+        PCollection<? extends KV<K, ? extends Iterable<InputT>>> input)
         throws CannotProvideCoderException {
-      KvCoder<K, VI> kvCoder = getKvCoder(input.getCoder());
-      return ((KeyedCombineFn<K, VI, ?, VO>) fn).getAccumulatorCoder(
+      KvCoder<K, InputT> kvCoder = getKvCoder(input.getCoder());
+      return ((KeyedCombineFn<K, InputT, ?, OutputT>) fn).getAccumulatorCoder(
           coderRegistry, kvCoder.getKeyCoder(), kvCoder.getValueCoder());
     }
 
     @Override
-    public Coder<KV<K, VO>> getDefaultOutputCoder(
-        PCollection<? extends KV<K, ? extends Iterable<VI>>> input)
+    public Coder<KV<K, OutputT>> getDefaultOutputCoder(
+        PCollection<? extends KV<K, ? extends Iterable<InputT>>> input)
         throws CannotProvideCoderException {
-      KvCoder<K, VI> kvCoder = getKvCoder(input.getCoder());
+      KvCoder<K, InputT> kvCoder = getKvCoder(input.getCoder());
       @SuppressWarnings("unchecked")
-      Coder<VO> outputValueCoder = ((KeyedCombineFn<K, VI, ?, VO>) fn)
+      Coder<OutputT> outputValueCoder = ((KeyedCombineFn<K, InputT, ?, OutputT>) fn)
           .getDefaultOutputCoder(
               input.getPipeline().getCoderRegistry(),
               kvCoder.getKeyCoder(), kvCoder.getValueCoder());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 14ca1a2d092c4..b1e62f5deb1e3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -60,11 +60,11 @@
  * {@link ProcessContext#window()} without the need to implement
  * {@link RequiresKeyedState} or {@link RequiresWindowAccess}.
  *
- * @param <I> the type of the (main) input elements
- * @param <O> the type of the (main) output elements
+ * @param <InputT> the type of the (main) input elements
+ * @param <OutputT> the type of the (main) output elements
  */
 @SuppressWarnings("serial")
-public abstract class DoFn<I, O> implements Serializable {
+public abstract class DoFn<InputT, OutputT> implements Serializable {
 
   /** Information accessible to all methods in this {@code DoFn}. */
   public abstract class Context {
@@ -92,7 +92,7 @@ public abstract class Context {
      * to access any information about the input element. The output element
      * will have a timestamp of negative infinity.
      */
-    public abstract void output(O output);
+    public abstract void output(OutputT output);
 
     /**
      * Adds the given element to the main output {@code PCollection},
@@ -111,7 +111,7 @@ public abstract class Context {
      * to access any information about the input element except for the
      * timestamp.
      */
-    public abstract void outputWithTimestamp(O output, Instant timestamp);
+    public abstract void outputWithTimestamp(OutputT output, Instant timestamp);
 
     /**
      * Adds the given element to the side output {@code PCollection} with the
@@ -175,9 +175,8 @@ public abstract <T> void sideOutputWithTimestamp(
      *         context
      */
     @Experimental(Kind.AGGREGATOR)
-    protected abstract <VI, VO> Aggregator<VI, VO> createAggregatorInternal(
-        String name,
-        CombineFn<VI, ?, VO> combiner);
+    protected abstract <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT>
+        createAggregatorInternal(String name, CombineFn<AggInputT, ?, AggOutputT> combiner);
 
     /**
      * Sets up {@link Aggregator}s created by the {@link DoFn} so they are
@@ -193,10 +192,10 @@ protected final void setupDelegateAggregators() {
       }
     }
 
-    private final <VI, VO> void setupDelegateAggregator(
-        DelegatingAggregator<VI, VO> aggregator) {
+    private final <AggInputT, AggOutputT> void setupDelegateAggregator(
+        DelegatingAggregator<AggInputT, AggOutputT> aggregator) {
 
-      Aggregator<VI, VO> delegate = createAggregatorInternal(
+      Aggregator<AggInputT, AggOutputT> delegate = createAggregatorInternal(
           aggregator.getName(), aggregator.getCombineFn());
 
       aggregator.setDelegate(delegate);
@@ -211,18 +210,18 @@ public abstract class ProcessContext extends Context {
     /**
      * Returns the input element to be processed.
      */
-    public abstract I element();
+    public abstract InputT element();
 
     /**
      * Returns the value of the side input for the window corresponding to the
      * window of the main input element.
      *
      * <p> See
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn#getSideInputWindow}
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn#getSideInputTWindow}
      * for how this corresponding window is determined.
      *
      * @throws IllegalArgumentException if this is not a side input
-     * @see ParDo#withSideInputs
+     * @see ParDo#withSideInputTs
      */
     public abstract <T> T sideInput(PCollectionView<T> view);
 
@@ -270,7 +269,7 @@ public abstract class ProcessContext extends Context {
      * Returns the process context to use for implementing windowing.
      */
     @Experimental
-    public abstract WindowingInternals<I, O> windowingInternals();
+    public abstract WindowingInternals<InputT, OutputT> windowingInternals();
   }
 
   /**
@@ -391,8 +390,8 @@ public void finishBundle(Context c) throws Exception {
    *
    * <p> See {@link #getOutputTypeToken} for more discussion.
    */
-  protected TypeToken<I> getInputTypeToken() {
-    return new TypeToken<I>(getClass()) {};
+  protected TypeToken<InputT> getInputTypeToken() {
+    return new TypeToken<InputT>(getClass()) {};
   }
 
   /**
@@ -403,11 +402,11 @@ protected TypeToken<I> getInputTypeToken() {
    * <p> In the normal case of a concrete {@code DoFn} subclass with
    * no generic type parameters of its own (including anonymous inner
    * classes), this will be a complete non-generic type, which is good
-   * for choosing a default output {@code Coder<O>} for the output
-   * {@code PCollection<O>}.
+   * for choosing a default output {@code Coder<OutputT>} for the output
+   * {@code PCollection<OutputT>}.
    */
-  protected TypeToken<O> getOutputTypeToken() {
-    return new TypeToken<O>(getClass()) {};
+  protected TypeToken<OutputT> getOutputTypeToken() {
+    return new TypeToken<OutputT>(getClass()) {};
   }
 
   /**
@@ -423,15 +422,15 @@ protected TypeToken<O> getOutputTypeToken() {
    * @throws IllegalArgumentException if the given name collides with another
    *         aggregator in this scope
    */
-  protected final <VI, VO> Aggregator<VI, VO> createAggregator(String name,
-      CombineFn<? super VI, ?, VO> combiner) {
+  protected final <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT>
+      createAggregator(String name, CombineFn<? super AggInputT, ?, AggOutputT> combiner) {
     checkNotNull(name, "name cannot be null");
     checkNotNull(combiner, "combiner cannot be null");
     checkArgument(!aggregators.containsKey(name),
         "Cannot create aggregator with name %s."
         + " An Aggregator with that name already exists within this scope.",
         name);
-    DelegatingAggregator<VI, VO> aggregator =
+    DelegatingAggregator<AggInputT, AggOutputT> aggregator =
         new DelegatingAggregator<>(name, combiner);
     aggregators.put(name, aggregator);
     return aggregator;
@@ -450,8 +449,8 @@ protected final <VI, VO> Aggregator<VI, VO> createAggregator(String name,
    * @throws IllegalArgumentException if the given name collides with another
    *         aggregator in this scope
    */
-  protected final <VI> Aggregator<VI, VI> createAggregator(String name,
-      SerializableFunction<Iterable<VI>, VI> combiner) {
+  protected final <AggInputT> Aggregator<AggInputT, AggInputT> createAggregator(String name,
+      SerializableFunction<Iterable<AggInputT>, AggInputT> combiner) {
     checkNotNull(combiner, "combiner cannot be null.");
     return createAggregator(name, Combine.SimpleCombineFn.of(combiner));
   }
@@ -460,30 +459,31 @@ protected final <VI> Aggregator<VI, VI> createAggregator(String name,
    * An {@link Aggregator} that delegates calls to addValue to another
    * aggregator.
    *
-   * @param <VI> the type of input element
-   * @param <VO> the type of output element
+   * @param <AggInputT> the type of input element
+   * @param <AggOutputT> the type of output element
    */
-  static class DelegatingAggregator<VI, VO> implements
-      Aggregator<VI, VO>, Serializable {
+  static class DelegatingAggregator<AggInputT, AggOutputT> implements
+      Aggregator<AggInputT, AggOutputT>, Serializable {
     private static final long serialVersionUID = 0L;
 
     private final String name;
 
-    private final CombineFn<VI, ?, VO> combineFn;
+    private final CombineFn<AggInputT, ?, AggOutputT> combineFn;
 
-    private Aggregator<VI, ?> delegate;
+    private Aggregator<AggInputT, ?> delegate;
 
     public DelegatingAggregator(String name,
-        CombineFn<? super VI, ?, VO> combiner) {
+        CombineFn<? super AggInputT, ?, AggOutputT> combiner) {
       this.name = name;
       // Safe contravariant cast
       @SuppressWarnings("unchecked")
-      CombineFn<VI, ?, VO> specificCombiner = (CombineFn<VI, ?, VO>) combiner;
+      CombineFn<AggInputT, ?, AggOutputT> specificCombiner =
+          (CombineFn<AggInputT, ?, AggOutputT>) combiner;
       this.combineFn = specificCombiner;
     }
 
     @Override
-    public void addValue(VI value) {
+    public void addValue(AggInputT value) {
       if (delegate == null) {
         throw new IllegalStateException(
             "addValue cannot be called on Aggregator outside of the execution of a DoFn.");
@@ -498,7 +498,7 @@ public String getName() {
     }
 
     @Override
-    public CombineFn<VI, ?, VO> getCombineFn() {
+    public CombineFn<AggInputT, ?, AggOutputT> getCombineFn() {
       return combineFn;
     }
 
@@ -507,7 +507,7 @@ public String getName() {
      *
      * @param delegate the delegate to set in this aggregator
      */
-    public void setDelegate(Aggregator<VI, ?> delegate) {
+    public void setDelegate(Aggregator<AggInputT, ?> delegate) {
       this.delegate = delegate;
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
index 5ab8c901fbabc..5ead4822e73d3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
@@ -64,12 +64,13 @@ private interface ExtraContextInfo {
     /**
      * Create an instance of the given instance using the instance factory.
      */
-    <I, O> Object createInstance(DoFnWithContext.ExtraContextFactory<I, O> factory);
+    <InputT, OutputT> Object createInstance(
+        DoFnWithContext.ExtraContextFactory<InputT, OutputT> factory);
 
     /**
      * Create the type token for the given type, filling in the generics.
      */
-    <I, O> TypeToken<?> tokenFor(TypeToken<I> in, TypeToken<O> out);
+    <InputT, OutputT> TypeToken<?> tokenFor(TypeToken<InputT> in, TypeToken<OutputT> out);
   }
 
   private static final Map<Class<?>, ExtraContextInfo> EXTRA_CONTEXTS = Collections.emptyMap();
@@ -78,39 +79,45 @@ private interface ExtraContextInfo {
       .putAll(EXTRA_CONTEXTS)
       .put(KeyedState.class, new ExtraContextInfo() {
         @Override
-        public <I, O> Object createInstance(ExtraContextFactory<I, O> factory) {
+        public <InputT, OutputT> Object
+            createInstance(ExtraContextFactory<InputT, OutputT> factory) {
           return factory.keyedState();
         }
 
         @Override
-        public <I, O> TypeToken<?> tokenFor(TypeToken<I> in, TypeToken<O> out) {
+        public <InputT, OutputT> TypeToken<?>
+            tokenFor(TypeToken<InputT> in, TypeToken<OutputT> out) {
           return TypeToken.of(KeyedState.class);
         }
       })
       .put(BoundedWindow.class, new ExtraContextInfo() {
         @Override
-        public <I, O> Object createInstance(ExtraContextFactory<I, O> factory) {
+        public <InputT, OutputT> Object
+            createInstance(ExtraContextFactory<InputT, OutputT> factory) {
           return factory.window();
         }
 
         @Override
-        public <I, O> TypeToken<?> tokenFor(TypeToken<I> in, TypeToken<O> out) {
+        public <InputT, OutputT> TypeToken<?>
+            tokenFor(TypeToken<InputT> in, TypeToken<OutputT> out) {
           return TypeToken.of(BoundedWindow.class);
         }
       })
       .put(WindowingInternals.class, new ExtraContextInfo() {
         @Override
-        public <I, O> Object createInstance(ExtraContextFactory<I, O> factory) {
+        public <InputT, OutputT> Object
+            createInstance(ExtraContextFactory<InputT, OutputT> factory) {
           return factory.windowingInternals();
         }
 
         @Override
-        public <I, O> TypeToken<?> tokenFor(TypeToken<I> in, TypeToken<O> out) {
-          return new TypeToken<WindowingInternals<I, O>>() {
+        public <InputT, OutputT> TypeToken<?>
+            tokenFor(TypeToken<InputT> in, TypeToken<OutputT> out) {
+          return new TypeToken<WindowingInternals<InputT, OutputT>>() {
             private static final long serialVersionUID = 0;
           }
-          .where(new TypeParameter<I>() {}, in)
-          .where(new TypeParameter<O>() {}, out);
+          .where(new TypeParameter<InputT>() {}, in)
+          .where(new TypeParameter<OutputT>() {}, out);
         }
       })
       .build();
@@ -132,10 +139,10 @@ public <I, O> TypeToken<?> tokenFor(TypeToken<I> in, TypeToken<O> out) {
    * @param c the {@link com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.ProcessContext}
    *     to pass to {@link ProcessElement}.
    */
-  abstract <I, O> void invokeProcessElement(
-      DoFnWithContext<I, O> fn,
-      DoFnWithContext<I, O>.ProcessContext c,
-      ExtraContextFactory<I, O> extra);
+  abstract <InputT, OutputT> void invokeProcessElement(
+      DoFnWithContext<InputT, OutputT> fn,
+      DoFnWithContext<InputT, OutputT>.ProcessContext c,
+      ExtraContextFactory<InputT, OutputT> extra);
 
   /**
    * Invoke the reflected {@link StartBundle} method on the given instance.
@@ -144,10 +151,10 @@ abstract <I, O> void invokeProcessElement(
    * @param c the {@link com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.Context}
    *     to pass to {@link StartBundle}.
    */
-  abstract <I, O> void invokeStartBundle(
-     DoFnWithContext<I, O> fn,
-     DoFnWithContext<I, O>.Context c,
-     ExtraContextFactory<I, O> extra);
+  abstract <InputT, OutputT> void invokeStartBundle(
+     DoFnWithContext<InputT, OutputT> fn,
+     DoFnWithContext<InputT, OutputT>.Context c,
+     ExtraContextFactory<InputT, OutputT> extra);
 
   /**
    * Invoke the reflected {@link FinishBundle} method on the given instance.
@@ -156,10 +163,10 @@ abstract <I, O> void invokeStartBundle(
    * @param c the {@link com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.Context}
    *     to pass to {@link FinishBundle}.
    */
-  abstract <I, O> void invokeFinishBundle(
-      DoFnWithContext<I, O> fn,
-      DoFnWithContext<I, O>.Context c,
-      ExtraContextFactory<I, O> extra);
+  abstract <InputT, OutputT> void invokeFinishBundle(
+      DoFnWithContext<InputT, OutputT> fn,
+      DoFnWithContext<InputT, OutputT>.Context c,
+      ExtraContextFactory<InputT, OutputT> extra);
 
   private static final Map<Class<?>, DoFnReflector> REFLECTOR_CACHE =
       new LinkedHashMap<Class<?>, DoFnReflector>();
@@ -182,15 +189,15 @@ public static DoFnReflector of(
   /**
    * Create a {@link DoFn} that the {@link DoFnWithContext}.
    */
-  public <I, O> DoFn<I, O> toDoFn(DoFnWithContext<I, O> fn) {
+  public <InputT, OutputT> DoFn<InputT, OutputT> toDoFn(DoFnWithContext<InputT, OutputT> fn) {
     if (usesKeyedState() && usesSingleWindow()) {
-      return new WindowAndKeyedStateDoFnAdapter<I, O>(this, fn);
+      return new WindowAndKeyedStateDoFnAdapter<InputT, OutputT>(this, fn);
     } else if (usesKeyedState()) {
-      return new KeyedStateDoFnAdapter<I, O>(this, fn);
+      return new KeyedStateDoFnAdapter<InputT, OutputT>(this, fn);
     } else if (usesSingleWindow()) {
-      return new WindowDoFnAdapter<I, O>(this, fn);
+      return new WindowDoFnAdapter<InputT, OutputT>(this, fn);
     } else {
-      return new SimpleDoFnAdapter<I, O>(this, fn);
+      return new SimpleDoFnAdapter<InputT, OutputT>(this, fn);
     }
   }
 
@@ -217,24 +224,26 @@ public String apply(@Nullable ExtraContextInfo input) {
         .toSortedSet(String.CASE_INSENSITIVE_ORDER);
   }
 
-  @VisibleForTesting static <I, O> ExtraContextInfo[] verifyProcessMethodArguments(Method m) {
+  @VisibleForTesting
+  static <InputT, OutputT> ExtraContextInfo[] verifyProcessMethodArguments(Method m) {
     return verifyMethodArguments(m,
         EXTRA_PROCESS_CONTEXTS,
-        new TypeToken<DoFnWithContext<I, O>.ProcessContext>() {
+        new TypeToken<DoFnWithContext<InputT, OutputT>.ProcessContext>() {
           private static final long serialVersionUID = 0;
         },
-        new TypeParameter<I>() {},
-        new TypeParameter<O>() {});
+        new TypeParameter<InputT>() {},
+        new TypeParameter<OutputT>() {});
   }
 
-  @VisibleForTesting static <I, O> ExtraContextInfo[] verifyBundleMethodArguments(Method m) {
+  @VisibleForTesting
+  static <InputT, OutputT> ExtraContextInfo[] verifyBundleMethodArguments(Method m) {
     return verifyMethodArguments(m,
         EXTRA_CONTEXTS,
-        new TypeToken<DoFnWithContext<I, O>.Context>() {
+        new TypeToken<DoFnWithContext<InputT, OutputT>.Context>() {
           private static final long serialVersionUID = 0;
         },
-        new TypeParameter<I>() {},
-        new TypeParameter<O>() {});
+        new TypeParameter<InputT>() {},
+        new TypeParameter<OutputT>() {});
   }
 
   /**
@@ -246,8 +255,9 @@ public String apply(@Nullable ExtraContextInfo input) {
    * <li>The first argument is of type firstContextArg.
    * <li>The remaining arguments have raw types that appear in {@code contexts}
    * <li>Any generics on the extra context arguments match what is expected. Eg.,
-   *     {@code WindowingInternals<I, O>} either matches the {@code I} and {@code O} parameters of
-   *     the {@code DoFn<I, O>.ProcessContext}, or it uses a wildcard, etc.
+   *     {@code WindowingInternals<InputT, OutputT>} either matches the
+   *     {@code InputT} and {@code OutputT} parameters of the
+   *     {@code DoFn<InputT, OutputT>.ProcessContext}, or it uses a wildcard, etc.
    * </ol>
    *
    * @param m the method to verify
@@ -257,9 +267,9 @@ public String apply(@Nullable ExtraContextInfo input) {
    * @param iParam TypeParameter representing the input type
    * @param oParam TypeParameter representing the output type
    */
-  @VisibleForTesting static <I, O> ExtraContextInfo[] verifyMethodArguments(Method m,
+  @VisibleForTesting static <InputT, OutputT> ExtraContextInfo[] verifyMethodArguments(Method m,
       Map<Class<?>, ExtraContextInfo> contexts,
-      TypeToken<?> firstContextArg, TypeParameter<I> iParam, TypeParameter<O> oParam) {
+      TypeToken<?> firstContextArg, TypeParameter<InputT> iParam, TypeParameter<OutputT> oParam) {
 
     if (!void.class.equals(m.getReturnType())) {
       throw new IllegalStateException(String.format(
@@ -290,9 +300,9 @@ public String apply(@Nullable ExtraContextInfo input) {
     // We actually want the owner, since ProcessContext and Context are owned by DoFnWithContext.
     pt = (ParameterizedType) pt.getOwnerType();
     @SuppressWarnings("unchecked")
-    TypeToken<I> iActual = (TypeToken<I>) TypeToken.of(pt.getActualTypeArguments()[0]);
+    TypeToken<InputT> iActual = (TypeToken<InputT>) TypeToken.of(pt.getActualTypeArguments()[0]);
     @SuppressWarnings("unchecked")
-    TypeToken<O> oActual = (TypeToken<O>) TypeToken.of(pt.getActualTypeArguments()[1]);
+    TypeToken<OutputT> oActual = (TypeToken<OutputT>) TypeToken.of(pt.getActualTypeArguments()[1]);
 
     // All of the remaining parameters must be a super-interface of allExtraContextArgs
     // that is not listed in the EXCLUDED_INTERFACES set.
@@ -446,37 +456,37 @@ private boolean usesContext(Class<?> context) {
     }
 
     @Override
-    <I, O> void invokeProcessElement(
-        DoFnWithContext<I, O> fn,
-        DoFnWithContext<I, O>.ProcessContext c,
-        ExtraContextFactory<I, O> extra) {
+    <InputT, OutputT> void invokeProcessElement(
+        DoFnWithContext<InputT, OutputT> fn,
+        DoFnWithContext<InputT, OutputT>.ProcessContext c,
+        ExtraContextFactory<InputT, OutputT> extra) {
       invoke(processElement, fn, c, extra, processElementArgs);
     }
 
     @Override
-    <I, O> void invokeStartBundle(
-        DoFnWithContext<I, O> fn,
-        DoFnWithContext<I, O>.Context c,
-        ExtraContextFactory<I, O> extra) {
+    <InputT, OutputT> void invokeStartBundle(
+        DoFnWithContext<InputT, OutputT> fn,
+        DoFnWithContext<InputT, OutputT>.Context c,
+        ExtraContextFactory<InputT, OutputT> extra) {
       if (startBundle != null) {
         invoke(startBundle, fn, c, extra, startBundleArgs);
       }
     }
 
     @Override
-    <I, O> void invokeFinishBundle(
-        DoFnWithContext<I, O> fn,
-        DoFnWithContext<I, O>.Context c,
-        ExtraContextFactory<I, O> extra) {
+    <InputT, OutputT> void invokeFinishBundle(
+        DoFnWithContext<InputT, OutputT> fn,
+        DoFnWithContext<InputT, OutputT>.Context c,
+        ExtraContextFactory<InputT, OutputT> extra) {
       if (finishBundle != null) {
         invoke(finishBundle, fn, c, extra, finishBundleArgs);
       }
     }
 
-    private <I, O> void invoke(Method m,
-        DoFnWithContext<I, O> on,
-        DoFnWithContext<I, O>.Context contextArg,
-        ExtraContextFactory<I, O> extraArgFactory,
+    private <InputT, OutputT> void invoke(Method m,
+        DoFnWithContext<InputT, OutputT> on,
+        DoFnWithContext<InputT, OutputT>.Context contextArg,
+        ExtraContextFactory<InputT, OutputT> extraArgFactory,
         ExtraContextInfo[] extraArgs) {
 
       Class<?>[] parameterTypes = m.getParameterTypes();
@@ -499,12 +509,14 @@ private <I, O> void invoke(Method m,
     }
   }
 
-  private static class ContextAdapter<I, O> extends DoFnWithContext<I, O>.Context
-      implements DoFnWithContext.ExtraContextFactory<I, O> {
+  private static class ContextAdapter<InputT, OutputT>
+      extends DoFnWithContext<InputT, OutputT>.Context
+      implements DoFnWithContext.ExtraContextFactory<InputT, OutputT> {
 
-    private DoFn<I, O>.Context context;
+    private DoFn<InputT, OutputT>.Context context;
 
-    private ContextAdapter(DoFnWithContext<I, O> fn, DoFn<I, O>.Context context) {
+    private ContextAdapter(
+        DoFnWithContext<InputT, OutputT> fn, DoFn<InputT, OutputT>.Context context) {
       fn.super();
       this.context = context;
     }
@@ -515,12 +527,12 @@ public PipelineOptions getPipelineOptions() {
     }
 
     @Override
-    public void output(O output) {
+    public void output(OutputT output) {
       context.output(output);
     }
 
     @Override
-    public void outputWithTimestamp(O output, Instant timestamp) {
+    public void outputWithTimestamp(OutputT output, Instant timestamp) {
       context.outputWithTimestamp(output, timestamp);
     }
 
@@ -549,7 +561,7 @@ public BoundedWindow window() {
     }
 
     @Override
-    public WindowingInternals<I, O> windowingInternals() {
+    public WindowingInternals<InputT, OutputT> windowingInternals() {
       // The DoFnWithContext doesn't allow us to ask for these outside ProcessElements, so this
       // should be unreachable.
       throw new UnsupportedOperationException(
@@ -557,13 +569,15 @@ public WindowingInternals<I, O> windowingInternals() {
     }
   }
 
-  private static class ProcessContextAdapter<I, O>
-      extends DoFnWithContext<I, O>.ProcessContext
-      implements DoFnWithContext.ExtraContextFactory<I, O> {
+  private static class ProcessContextAdapter<InputT, OutputT>
+      extends DoFnWithContext<InputT, OutputT>.ProcessContext
+      implements DoFnWithContext.ExtraContextFactory<InputT, OutputT> {
 
-    private DoFn<I, O>.ProcessContext context;
+    private DoFn<InputT, OutputT>.ProcessContext context;
 
-    private ProcessContextAdapter(DoFnWithContext<I, O> fn, DoFn<I, O>.ProcessContext context) {
+    private ProcessContextAdapter(
+        DoFnWithContext<InputT, OutputT> fn,
+        DoFn<InputT, OutputT>.ProcessContext context) {
       fn.super();
       this.context = context;
     }
@@ -579,12 +593,12 @@ public <T> T sideInput(PCollectionView<T> view) {
     }
 
     @Override
-    public void output(O output) {
+    public void output(OutputT output) {
       context.output(output);
     }
 
     @Override
-    public void outputWithTimestamp(O output, Instant timestamp) {
+    public void outputWithTimestamp(OutputT output, Instant timestamp) {
       context.outputWithTimestamp(output, timestamp);
     }
 
@@ -599,7 +613,7 @@ public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant times
     }
 
     @Override
-    public I element() {
+    public InputT element() {
       return context.element();
     }
 
@@ -619,7 +633,7 @@ public BoundedWindow window() {
     }
 
     @Override
-    public WindowingInternals<I, O> windowingInternals() {
+    public WindowingInternals<InputT, OutputT> windowingInternals() {
       return context.windowingInternals();
     }
   }
@@ -632,44 +646,44 @@ public static Class<?> getDoFnClass(DoFn<?, ?> fn) {
     }
   }
 
-  private static class SimpleDoFnAdapter<I, O> extends DoFn<I, O> {
+  private static class SimpleDoFnAdapter<InputT, OutputT> extends DoFn<InputT, OutputT> {
 
     private static final long serialVersionUID = 0;
 
     private transient DoFnReflector reflector;
-    private DoFnWithContext<I, O> fn;
+    private DoFnWithContext<InputT, OutputT> fn;
 
-    private SimpleDoFnAdapter(DoFnReflector reflector, DoFnWithContext<I, O> fn) {
+    private SimpleDoFnAdapter(DoFnReflector reflector, DoFnWithContext<InputT, OutputT> fn) {
       super(fn.aggregators);
       this.reflector = reflector;
       this.fn = fn;
     }
 
     @Override
-    public void startBundle(DoFn<I, O>.Context c) throws Exception {
-      ContextAdapter<I, O> adapter = new ContextAdapter<>(fn, c);
+    public void startBundle(DoFn<InputT, OutputT>.Context c) throws Exception {
+      ContextAdapter<InputT, OutputT> adapter = new ContextAdapter<>(fn, c);
       reflector.invokeStartBundle(fn, adapter, adapter);
     }
 
     @Override
-    public void finishBundle(DoFn<I, O>.Context c) throws Exception {
-      ContextAdapter<I, O> adapter = new ContextAdapter<>(fn, c);
+    public void finishBundle(DoFn<InputT, OutputT>.Context c) throws Exception {
+      ContextAdapter<InputT, OutputT> adapter = new ContextAdapter<>(fn, c);
       reflector.invokeFinishBundle(fn, adapter, adapter);
     }
 
     @Override
-    public void processElement(DoFn<I, O>.ProcessContext c) throws Exception {
-      ProcessContextAdapter<I, O> adapter = new ProcessContextAdapter<>(fn, c);
+    public void processElement(DoFn<InputT, OutputT>.ProcessContext c) throws Exception {
+      ProcessContextAdapter<InputT, OutputT> adapter = new ProcessContextAdapter<>(fn, c);
       reflector.invokeProcessElement(fn, adapter, adapter);
     }
 
     @Override
-    protected TypeToken<I> getInputTypeToken() {
+    protected TypeToken<InputT> getInputTypeToken() {
       return fn.getInputTypeToken();
     }
 
     @Override
-    protected TypeToken<O> getOutputTypeToken() {
+    protected TypeToken<OutputT> getOutputTypeToken() {
       return fn.getOutputTypeToken();
     }
 
@@ -680,29 +694,31 @@ private void readObject(java.io.ObjectInputStream in)
     }
   }
 
-  private static class KeyedStateDoFnAdapter<I, O>
-      extends SimpleDoFnAdapter<I, O> implements DoFn.RequiresKeyedState {
+  private static class KeyedStateDoFnAdapter<InputT, OutputT>
+      extends SimpleDoFnAdapter<InputT, OutputT> implements DoFn.RequiresKeyedState {
 
     private static final long serialVersionUID = 0;
-    private KeyedStateDoFnAdapter(DoFnReflector reflector, DoFnWithContext<I, O> fn) {
+    private KeyedStateDoFnAdapter(DoFnReflector reflector, DoFnWithContext<InputT, OutputT> fn) {
       super(reflector, fn);
     }
   }
 
-  private static class WindowDoFnAdapter<I, O>
-  extends SimpleDoFnAdapter<I, O> implements DoFn.RequiresWindowAccess {
+  private static class WindowDoFnAdapter<InputT, OutputT>
+  extends SimpleDoFnAdapter<InputT, OutputT> implements DoFn.RequiresWindowAccess {
 
     private static final long serialVersionUID = 0;
-    private WindowDoFnAdapter(DoFnReflector reflector, DoFnWithContext<I, O> fn) {
+    private WindowDoFnAdapter(DoFnReflector reflector, DoFnWithContext<InputT, OutputT> fn) {
       super(reflector, fn);
     }
   }
 
-  private static class WindowAndKeyedStateDoFnAdapter<I, O>
-  extends SimpleDoFnAdapter<I, O> implements DoFn.RequiresKeyedState, DoFn.RequiresWindowAccess {
+  private static class WindowAndKeyedStateDoFnAdapter<InputT, OutputT>
+      extends SimpleDoFnAdapter<InputT, OutputT>
+      implements DoFn.RequiresKeyedState, DoFn.RequiresWindowAccess {
 
     private static final long serialVersionUID = 0;
-    private WindowAndKeyedStateDoFnAdapter(DoFnReflector reflector, DoFnWithContext<I, O> fn) {
+    private WindowAndKeyedStateDoFnAdapter(
+        DoFnReflector reflector, DoFnWithContext<InputT, OutputT> fn) {
       super(reflector, fn);
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index 21af2ecc253b8..d7660994993f1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -43,9 +43,9 @@
  * <p> For example:
  *
  * <pre> {@code
- * DoFn<Input, Output> fn = ...;
+ * DoFn<InputT, OutputT> fn = ...;
  *
- * DoFnTester<Input, Output> fnTester = DoFnTester.of(fn);
+ * DoFnTester<InputT, OutputT> fnTester = DoFnTester.of(fn);
  *
  * // Set arguments shared across all batches:
  * fnTester.setSideInputs(...);      // If fn takes side inputs.
@@ -53,7 +53,7 @@
  *
  * // Process a batch containing a single input element:
  * Input testInput = ...;
- * List<Output> testOutputs = fnTester.processBatch(testInput);
+ * List<OutputT> testOutputs = fnTester.processBatch(testInput);
  * Assert.assertThat(testOutputs,
  *                   JUnitMatchers.hasItems(...));
  *
@@ -62,17 +62,17 @@
  *                   JUnitMatchers.hasItems(...));
  * } </pre>
  *
- * @param <I> the type of the {@code DoFn}'s (main) input elements
- * @param <O> the type of the {@code DoFn}'s (main) output elements
+ * @param <InputT> the type of the {@code DoFn}'s (main) input elements
+ * @param <OutputT> the type of the {@code DoFn}'s (main) output elements
  */
-public class DoFnTester<I, O> {
+public class DoFnTester<InputT, OutputT> {
   /**
    * Returns a {@code DoFnTester} supporting unit-testing of the given
    * {@link DoFn}.
    */
   @SuppressWarnings("unchecked")
-  public static <I, O> DoFnTester<I, O> of(DoFn<I, O> fn) {
-    return new DoFnTester<I, O>(fn);
+  public static <InputT, OutputT> DoFnTester<InputT, OutputT> of(DoFn<InputT, OutputT> fn) {
+    return new DoFnTester<InputT, OutputT>(fn);
   }
 
   /**
@@ -80,8 +80,9 @@ public static <I, O> DoFnTester<I, O> of(DoFn<I, O> fn) {
    * {@link DoFn}.
    */
   @SuppressWarnings("unchecked")
-  public static <I, O> DoFnTester<I, O> of(DoFnWithContext<I, O> fn) {
-    return new DoFnTester<I, O>(DoFnReflector.of(fn.getClass()).toDoFn(fn));
+  public static <InputT, OutputT> DoFnTester<InputT, OutputT>
+      of(DoFnWithContext<InputT, OutputT> fn) {
+    return new DoFnTester<InputT, OutputT>(DoFnReflector.of(fn.getClass()).toDoFn(fn));
   }
 
   /**
@@ -153,9 +154,9 @@ public void setSideOutputTags(TupleTagList sideOutputTags) {
    * calls {@link #finishBundle}, then returns the result of
    * {@link #takeOutputElements}.
    */
-  public List<O> processBatch(I... inputElements) {
+  public List<OutputT> processBatch(InputT... inputElements) {
     startBundle();
-    for (I inputElement : inputElements) {
+    for (InputT inputElement : inputElements) {
       processElement(inputElement);
     }
     finishBundle();
@@ -185,7 +186,7 @@ public void startBundle() {
    * @throws IllegalStateException if the {@code DoFn} under test has already
    * been finished
    */
-  public void processElement(I element) {
+  public void processElement(InputT element) {
     if (state == State.FINISHED) {
       throw new IllegalStateException("finishBundle() has already been called");
     }
@@ -226,14 +227,14 @@ public void finishBundle() {
    * <p> TODO: provide accessors that take and return {@code WindowedValue}s
    * in order to test timestamp- and window-sensitive DoFns.
    */
-  public List<O> peekOutputElements() {
+  public List<OutputT> peekOutputElements() {
     // TODO: Should we return an unmodifiable list?
     return Lists.transform(fnRunner.getReceiver(mainOutputTag),
-                           new Function<Object, O>() {
+                           new Function<Object, OutputT>() {
                              @Override
                              @SuppressWarnings("unchecked")
-                             public O apply(Object input) {
-                               return ((WindowedValue<O>) input).getValue();
+                             public OutputT apply(Object input) {
+                               return ((WindowedValue<OutputT>) input).getValue();
                              }
                            });
 
@@ -254,8 +255,8 @@ public void clearOutputElements() {
    *
    * @see #peekOutputElements
    */
-  public List<O> takeOutputElements() {
-    List<O> resultElems = new ArrayList<>(peekOutputElements());
+  public List<OutputT> takeOutputElements() {
+    List<OutputT> resultElems = new ArrayList<>(peekOutputElements());
     clearOutputElements();
     return resultElems;
   }
@@ -308,21 +309,21 @@ enum State { UNSTARTED, STARTED, FINISHED }
   final PipelineOptions options = PipelineOptionsFactory.create();
 
   /** The original DoFn under test. */
-  final DoFn<I, O> origFn;
+  final DoFn<InputT, OutputT> origFn;
 
   /** The side input values to provide to the DoFn under test. */
   private Map<PCollectionView<?>, Iterable<WindowedValue<?>>> sideInputs =
       new HashMap<>();
 
   /** The output tags used by the DoFn under test. */
-  TupleTag<O> mainOutputTag = new TupleTag<>();
+  TupleTag<OutputT> mainOutputTag = new TupleTag<>();
   List<TupleTag<?>> sideOutputTags = new ArrayList<>();
 
   /** The original DoFn under test, if started. */
-  DoFn<I, O> fn;
+  DoFn<InputT, OutputT> fn;
 
   /** The DoFnRunner if processing is in progress. */
-  DoFnRunner<I, O, List> fnRunner;
+  DoFnRunner<InputT, OutputT, List> fnRunner;
 
   /** Counters for user-defined Aggregators if processing is in progress. */
   CounterSet counterSet;
@@ -332,7 +333,7 @@ enum State { UNSTARTED, STARTED, FINISHED }
   /** The state of processing of the DoFn under test. */
   State state;
 
-  DoFnTester(DoFn<I, O> origFn) {
+  DoFnTester(DoFn<InputT, OutputT> origFn) {
     this.origFn = origFn;
     resetState();
   }
@@ -346,7 +347,7 @@ void resetState() {
 
   @SuppressWarnings("unchecked")
   void initializeState() {
-    fn = (DoFn<I, O>)
+    fn = (DoFn<InputT, OutputT>)
         SerializableUtils.deserializeFromByteArray(
             SerializableUtils.serializeToByteArray(origFn),
             origFn.toString());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
index b599a19702a9b..d71f069e505d7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
@@ -76,12 +76,12 @@
  *         }}));
  * } </pre>
  *
- * @param <I> the type of the (main) input elements
- * @param <O> the type of the (main) output elements
+ * @param <InputT> the type of the (main) input elements
+ * @param <OutputT> the type of the (main) output elements
  */
 @Experimental
 @SuppressWarnings("serial")
-public abstract class DoFnWithContext<I, O> implements Serializable {
+public abstract class DoFnWithContext<InputT, OutputT> implements Serializable {
 
   /** Information accessible to all methods in this {@code DoFnWithContext}. */
   public abstract class Context {
@@ -109,7 +109,7 @@ public abstract class Context {
      * to access any information about the input element. The output element
      * will have a timestamp of negative infinity.
      */
-    public abstract void output(O output);
+    public abstract void output(OutputT output);
 
     /**
      * Adds the given element to the main output {@code PCollection},
@@ -128,7 +128,7 @@ public abstract class Context {
      * to access any information about the input element except for the
      * timestamp.
      */
-    public abstract void outputWithTimestamp(O output, Instant timestamp);
+    public abstract void outputWithTimestamp(OutputT output, Instant timestamp);
 
     /**
      * Adds the given element to the side output {@code PCollection} with the
@@ -189,7 +189,7 @@ public abstract class ProcessContext extends Context {
     /**
      * Returns the input element to be processed.
      */
-    public abstract I element();
+    public abstract InputT element();
 
 
     /**
@@ -233,8 +233,8 @@ public Duration getAllowedTimestampSkew() {
    *
    * <p> See {@link #getOutputTypeToken} for more discussion.
    */
-  protected TypeToken<I> getInputTypeToken() {
-    return new TypeToken<I>(getClass()) {};
+  protected TypeToken<InputT> getInputTypeToken() {
+    return new TypeToken<InputT>(getClass()) {};
   }
 
   /**
@@ -248,8 +248,8 @@ protected TypeToken<I> getInputTypeToken() {
    * for choosing a default output {@code Coder<O>} for the output
    * {@code PCollection<O>}.
    */
-  protected TypeToken<O> getOutputTypeToken() {
-    return new TypeToken<O>(getClass()) {};
+  protected TypeToken<OutputT> getOutputTypeToken() {
+    return new TypeToken<OutputT>(getClass()) {};
   }
 
   /**
@@ -262,7 +262,7 @@ protected TypeToken<O> getOutputTypeToken() {
    * <p>In the case of {@link ProcessElement} it is called once per invocation of
    * {@link ProcessElement}.
    */
-  public interface ExtraContextFactory<I, O> {
+  public interface ExtraContextFactory<InputT, OutputT> {
     /**
      * Construct the {@link KeyedState} interface for use within a {@link DoFnWithContext} that
      * needs it. This is called if the {@link ProcessElement} method has a parameter of type
@@ -286,7 +286,7 @@ public interface ExtraContextFactory<I, O> {
      * needs it. This is called if the {@link ProcessElement} method has a parameter of type
      * {@link WindowingInternals}.
      */
-    WindowingInternals<I, O> windowingInternals();
+    WindowingInternals<InputT, OutputT> windowingInternals();
   }
 
   /////////////////////////////////////////////////////////////////////////////
@@ -312,7 +312,7 @@ public interface ExtraContextFactory<I, O> {
    *   <li>It must have at least one argument.
    *   <li>Its first argument must be a {@link DoFnWithContext.ProcessContext}.
    *   <li>Its remaining arguments must be {@link KeyedState}, {@link BoundedWindow}, or
-   *   {@link WindowingInternals WindowingInternals<I, O>}.
+   *   {@link WindowingInternals WindowingInternals<InputT, OutputT>}.
    * </ul>
    */
   @Documented
@@ -346,15 +346,15 @@ public interface ExtraContextFactory<I, O> {
    * @throws IllegalArgumentException if the given name collides with another
    *         aggregator in this scope
    */
-  public final <VI, VO> Aggregator<VI, VO> createAggregator(
-      String name, Combine.CombineFn<? super VI, ?, VO> combiner) {
+  public final <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT>
+      createAggregator(String name, Combine.CombineFn<? super AggInputT, ?, AggOutputT> combiner) {
     checkNotNull(name, "name cannot be null");
     checkNotNull(combiner, "combiner cannot be null");
     checkArgument(!aggregators.containsKey(name),
         "Cannot create aggregator with name %s."
         + " An Aggregator with that name already exists within this scope.",
         name);
-    DelegatingAggregator<VI, VO> aggregator =
+    DelegatingAggregator<AggInputT, AggOutputT> aggregator =
         new DelegatingAggregator<>(name, combiner);
     aggregators.put(name, aggregator);
     return aggregator;
@@ -373,8 +373,8 @@ public final <VI, VO> Aggregator<VI, VO> createAggregator(
    * @throws IllegalArgumentException if the given name collides with another
    *         aggregator in this scope
    */
-  public final <VI> Aggregator<VI, VI> createAggregator(
-      String name, SerializableFunction<Iterable<VI>, VI> combiner) {
+  public final <AggInputT> Aggregator<AggInputT, AggInputT> createAggregator(
+      String name, SerializableFunction<Iterable<AggInputT>, AggInputT> combiner) {
     checkNotNull(combiner, "combiner cannot be null.");
     return createAggregator(name, Combine.SimpleCombineFn.of(combiner));
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
index 8f90aecfcfdf1..4eaaf8c97ed71 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
@@ -47,8 +47,8 @@ public class Filter<T> extends PTransform<PCollection<T>,
    * satisfying various inequalities with the specified value based on
    * the elements' natural ordering.
    */
-  public static <T, C extends SerializableFunction<T, Boolean>>
-      ParDo.Bound<T, T> by(final C filterPred) {
+  public static <T, PredicateT extends SerializableFunction<T, Boolean>>
+      ParDo.Bound<T, T> by(final PredicateT filterPred) {
     return ParDo.named("Filter").of(new DoFn<T, T>() {
             @Override
             public void processElement(ProcessContext c) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
index b19ff629a38ac..5296d6bbe0dbf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
@@ -35,7 +35,7 @@
 import java.util.concurrent.atomic.AtomicReference;
 
 /**
- * Provides multi-threading of {@link DoFn DoFns}, using threaded execution to
+ * Provides multi-threading of {@link DoFn}s, using threaded execution to
  * process multiple elements concurrently within a bundle.
  *
  * <p> Note, that each Dataflow worker will already process multiple bundles
@@ -52,7 +52,7 @@
  * share of the maximum write rate) will take at least 6 seconds to complete (there is additional
  * overhead in the extra parallelization).
  *
- * <p> To parallelize a DoFn to 10 threads:
+ * <p> To parallelize a {@link DoFn} to 10 threads:
  * <pre>{@code
  * PCollection<T> data = ...;
  * data.apply(
@@ -60,7 +60,7 @@
  *                             .withMaxParallelism(10)));
  * }</pre>
  *
- * <p> An uncaught exception from the wrapped DoFn will result in the exception
+ * <p> An uncaught exception from the wrapped {@link DoFn} will result in the exception
  * being rethrown in later calls to {@link MultiThreadedIntraBundleProcessingDoFn#processElement}
  * or a call to {@link MultiThreadedIntraBundleProcessingDoFn#finishBundle}.
  */
@@ -71,7 +71,7 @@ public class IntraBundleParallelization {
    *
    * <p> Note that the specified {@code doFn} needs to be thread safe.
    */
-  public static <I, O> Bound<I, O> of(DoFn<I, O> doFn) {
+  public static <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT> doFn) {
     return new Unbound().of(doFn);
   }
 
@@ -117,29 +117,29 @@ public Unbound withMaxParallelism(int maxParallelism) {
      *
      * <p> Note that the specified {@code doFn} needs to be thread safe.
      */
-    public <I, O> Bound<I, O> of(DoFn<I, O> doFn) {
+    public <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT> doFn) {
       return new Bound<>(doFn, maxParallelism);
     }
   }
 
   /**
-   * A {@code PTransform} that, when applied to a {@code PCollection<I>},
-   * invokes a user-specified {@code DoFn<I, O>} on all its elements,
+   * A {@code PTransform} that, when applied to a {@code PCollection<InputT>},
+   * invokes a user-specified {@code DoFn<InputT, OutputT>} on all its elements,
    * with all its outputs collected into an output
-   * {@code PCollection<O>}.
+   * {@code PCollection<OutputT>}.
    *
    * <p> Note that the specified {@code doFn} needs to be thread safe.
    *
-   * @param <I> the type of the (main) input {@code PCollection} elements
-   * @param <O> the type of the (main) output {@code PCollection} elements
+   * @param <InputT> the type of the (main) input {@code PCollection} elements
+   * @param <OutputT> the type of the (main) output {@code PCollection} elements
    */
-  public static class Bound<I, O>
-      extends PTransform<PCollection<? extends I>, PCollection<O>> {
+  public static class Bound<InputT, OutputT>
+      extends PTransform<PCollection<? extends InputT>, PCollection<OutputT>> {
     private static final long serialVersionUID = 0;
-    private final DoFn<I, O> doFn;
+    private final DoFn<InputT, OutputT> doFn;
     private final int maxParallelism;
 
-    Bound(DoFn<I, O> doFn, int maxParallelism) {
+    Bound(DoFn<InputT, OutputT> doFn, int maxParallelism) {
       Preconditions.checkArgument(maxParallelism > 0,
           "Expected parallelism factor greater than zero, received %s.", maxParallelism);
       this.doFn = doFn;
@@ -150,7 +150,7 @@ public static class Bound<I, O>
      * Returns a new {@link IntraBundleParallelization} {@link PTransform} like this one
      * with the specified maximum concurrency level.
      */
-    public Bound<I, O> withMaxParallelism(int maxParallelism) {
+    public Bound<InputT, OutputT> withMaxParallelism(int maxParallelism) {
       return new Bound<>(doFn, maxParallelism);
     }
 
@@ -160,12 +160,12 @@ public Bound<I, O> withMaxParallelism(int maxParallelism) {
      *
      * <p> Note that the specified {@code doFn} needs to be thread safe.
      */
-    public <I, O> Bound<I, O> of(DoFn<I, O> doFn) {
+    public <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT> doFn) {
       return new Bound<>(doFn, maxParallelism);
     }
 
     @Override
-    public PCollection<O> apply(PCollection<? extends I> input) {
+    public PCollection<OutputT> apply(PCollection<? extends InputT> input) {
       return input.apply(
           ParDo.of(new MultiThreadedIntraBundleProcessingDoFn<>(doFn, maxParallelism)));
     }
@@ -176,13 +176,15 @@ public PCollection<O> apply(PCollection<? extends I> input) {
    *
    * @see IntraBundleParallelization#of(DoFn)
    *
-   * @param <I> the type of the (main) input elements
-   * @param <O> the type of the (main) output elements
+   * @param <InputT> the type of the (main) input elements
+   * @param <OutputT> the type of the (main) output elements
    */
-  public static class MultiThreadedIntraBundleProcessingDoFn<I, O> extends DoFn<I, O> {
+  public static class MultiThreadedIntraBundleProcessingDoFn<InputT, OutputT>
+      extends DoFn<InputT, OutputT> {
+
     private static final long serialVersionUID = 0;
 
-    public MultiThreadedIntraBundleProcessingDoFn(DoFn<I, O> doFn, int maxParallelism) {
+    public MultiThreadedIntraBundleProcessingDoFn(DoFn<InputT, OutputT> doFn, int maxParallelism) {
       Preconditions.checkArgument(maxParallelism > 0,
           "Expected parallelism factor greater than zero, received %s.", maxParallelism);
       this.doFn = doFn;
@@ -238,12 +240,12 @@ public void finishBundle(Context c) throws Exception {
     }
 
     @Override
-    protected TypeToken<I> getInputTypeToken() {
+    protected TypeToken<InputT> getInputTypeToken() {
       return doFn.getInputTypeToken();
     }
 
     @Override
-    protected TypeToken<O> getOutputTypeToken() {
+    protected TypeToken<OutputT> getOutputTypeToken() {
       return doFn.getOutputTypeToken();
     }
 
@@ -261,7 +263,7 @@ private class WrappedContext extends ProcessContext {
       }
 
       @Override
-      public I element() {
+      public InputT element() {
         return context.element();
       }
 
@@ -281,14 +283,14 @@ public <T> T sideInput(PCollectionView<T> view) {
       }
 
       @Override
-      public void output(O output) {
+      public void output(OutputT output) {
         synchronized (MultiThreadedIntraBundleProcessingDoFn.this) {
           context.output(output);
         }
       }
 
       @Override
-      public void outputWithTimestamp(O output, Instant timestamp) {
+      public void outputWithTimestamp(OutputT output, Instant timestamp) {
         synchronized (MultiThreadedIntraBundleProcessingDoFn.this) {
           context.outputWithTimestamp(output, timestamp);
         }
@@ -319,18 +321,18 @@ public BoundedWindow window() {
       }
 
       @Override
-      public WindowingInternals<I, O> windowingInternals() {
+      public WindowingInternals<InputT, OutputT> windowingInternals() {
         return context.windowingInternals();
       }
 
       @Override
-      protected <VI, VO> Aggregator<VI, VO> createAggregatorInternal(
-          String name, CombineFn<VI, ?, VO> combiner) {
+      protected <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT> createAggregatorInternal(
+          String name, CombineFn<AggInputT, ?, AggOutputT> combiner) {
         return context.createAggregatorInternal(name, combiner);
       }
     }
 
-    private final DoFn<I, O> doFn;
+    private final DoFn<InputT, OutputT> doFn;
     private int maxParallelism;
 
     private transient ExecutorService executor;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
index a9dec913a39ff..f4266f8c3408f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
@@ -20,8 +20,8 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
 /**
- * {@code KvSwap<A, B>} takes a {@code PCollection<KV<A, B>>} and
- * returns a {@code PCollection<KV<B, A>>}, where all the keys and
+ * {@code KvSwap<K, V>} takes a {@code PCollection<KV<K, V>>} and
+ * returns a {@code PCollection<KV<V, K>>}, where all the keys and
  * values have been swapped.
  *
  * <p> Example of use:
@@ -37,36 +37,36 @@
  * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
  * associated with it as the input.
  *
- * @param <A> the type of the keys in the input {@code PCollection}
+ * @param <K> the type of the keys in the input {@code PCollection}
  * and the values in the output {@code PCollection}
- * @param <B> the type of the values in the input {@code PCollection}
+ * @param <V> the type of the values in the input {@code PCollection}
  * and the keys in the output {@code PCollection}
  */
 @SuppressWarnings("serial")
-public class KvSwap<A, B> extends PTransform<PCollection<KV<A, B>>,
-                                             PCollection<KV<B, A>>> {
+public class KvSwap<K, V> extends PTransform<PCollection<KV<K, V>>,
+                                             PCollection<KV<V, K>>> {
   /**
-   * Returns a {@code KvSwap<A, B>} {@code PTransform}.
+   * Returns a {@code KvSwap<K, V>} {@code PTransform}.
    *
-   * @param <A> the type of the keys in the input {@code PCollection}
+   * @param <K> the type of the keys in the input {@code PCollection}
    * and the values in the output {@code PCollection}
-   * @param <B> the type of the values in the input {@code PCollection}
+   * @param <V> the type of the values in the input {@code PCollection}
    * and the keys in the output {@code PCollection}
    */
-  public static <A, B> KvSwap<A, B> create() {
+  public static <K, V> KvSwap<K, V> create() {
     return new KvSwap<>();
   }
 
   private KvSwap() { }
 
   @Override
-  public PCollection<KV<B, A>> apply(PCollection<KV<A, B>> in) {
+  public PCollection<KV<V, K>> apply(PCollection<KV<K, V>> in) {
     return
         in.apply(ParDo.named("KvSwap")
-                 .of(new DoFn<KV<A, B>, KV<B, A>>() {
+                 .of(new DoFn<KV<K, V>, KV<V, K>>() {
                      @Override
                      public void processElement(ProcessContext c) {
-                       KV<A, B> e = c.element();
+                       KV<K, V> e = c.element();
                        c.output(KV.of(e.getValue(), e.getKey()));
                      }
                     }));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
index 07bfbf10286dd..09370950dec1b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
@@ -141,14 +141,14 @@ public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
    * of type {@code N}, useful as an argument to {@link Combine#globally}
    * or {@link Combine#perKey}.
    *
-   * @param <N> the type of the {@code Number}s being compared
+   * @param <NumT> the type of the {@code Number}s being compared
    */
   @SuppressWarnings("serial")
-  public static class MaxFn<N extends Comparable<N>>
-      extends Combine.BinaryCombineFn<N> {
+  public static class MaxFn<NumT extends Comparable<NumT>>
+      extends Combine.BinaryCombineFn<NumT> {
 
-    /** The smallest value of type N. */
-    private final N initialValue;
+    /** The smallest value of type NumT. */
+    private final NumT initialValue;
 
     /**
      * Constructs a combining function that computes the maximum over
@@ -156,17 +156,17 @@ public static class MaxFn<N extends Comparable<N>>
      * value of type {@code N}, which is the identity value for the
      * maximum operation over {@code N}s.
      */
-    public MaxFn(N initialValue) {
+    public MaxFn(NumT initialValue) {
       this.initialValue = initialValue;
     }
 
     @Override
-    public N apply(N a, N b) {
+    public NumT apply(NumT a, NumT b) {
       return a.compareTo(b) >= 0 ? a : b;
     }
 
     @Override
-    public N identity() {
+    public NumT identity() {
       return initialValue;
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
index a6b64e04d165e..f85fe9b1414fa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
@@ -51,15 +51,15 @@ public class Mean {
 
   /**
    * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<N>} and returns a
+   * {@code PCollection<NumT>} and returns a
    * {@code PCollection<Double>} whose contents is the mean of the
    * input {@code PCollection}'s elements, or
    * {@code 0} if there are no elements.
    *
-   * @param <N> the type of the {@code Number}s being combined
+   * @param <NumT> the type of the {@code Number}s being combined
    */
-  public static <N extends Number> Combine.Globally<N, Double> globally() {
-    Combine.Globally<N, Double> combine = Combine.globally(new MeanFn<>());
+  public static <NumT extends Number> Combine.Globally<NumT, Double> globally() {
+    Combine.Globally<NumT, Double> combine = Combine.globally(new MeanFn<>());
     combine.setName("Mean");
     return combine;
   }
@@ -75,10 +75,10 @@ public static <N extends Number> Combine.Globally<N, Double> globally() {
    * <p> See {@link Combine.PerKey} for how this affects timestamps and bucketing.
    *
    * @param <K> the type of the keys
-   * @param <N> the type of the {@code Number}s being combined
+   * @param <NumT> the type of the {@code Number}s being combined
    */
-  public static <K, N extends Number> Combine.PerKey<K, N, Double> perKey() {
-    Combine.PerKey<K, N, Double> combine = Combine.perKey(new MeanFn<>());
+  public static <K, NumT extends Number> Combine.PerKey<K, NumT, Double> perKey() {
+    Combine.PerKey<K, NumT, Double> combine = Combine.perKey(new MeanFn<>());
     combine.setName("Mean.PerKey");
     return combine;
   }
@@ -94,10 +94,10 @@ public static <K, N extends Number> Combine.PerKey<K, N, Double> perKey() {
    *
    * <p> Returns {@code Double.NaN} if combining zero elements.
    *
-   * @param <N> the type of the {@code Number}s being combined
+   * @param <NumT> the type of the {@code Number}s being combined
    */
-  public static class MeanFn<N extends Number> extends
-    Combine.AccumulatingCombineFn<N, MeanFn<N>.CountSum, Double> {
+  public static class MeanFn<NumT extends Number> extends
+    Combine.AccumulatingCombineFn<NumT, MeanFn<NumT>.CountSum, Double> {
     private static final long serialVersionUID = 0;
 
     /**
@@ -110,7 +110,7 @@ public MeanFn() {}
      * Accumulator helper class for MeanFn.
      */
     class CountSum
-        implements Combine.AccumulatingCombineFn.Accumulator<N, CountSum, Double> {
+        implements Combine.AccumulatingCombineFn.Accumulator<NumT, CountSum, Double> {
 
       long count = 0;
       double sum = 0.0;
@@ -125,7 +125,7 @@ public CountSum(long count, double sum) {
       }
 
       @Override
-      public void addInput(N element) {
+      public void addInput(NumT element) {
         count++;
         sum += element.doubleValue();
       }
@@ -153,7 +153,7 @@ public CountSum createAccumulator() {
     @SuppressWarnings("unchecked")
     @Override
     public Coder<CountSum> getAccumulatorCoder(
-        CoderRegistry registry, Coder<N> inputCoder) {
+        CoderRegistry registry, Coder<NumT> inputCoder) {
       return new CustomCoder<CountSum> () {
         private static final long serialVersionUID = 0;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
index 467f67befa697..3c26495e64d31 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
@@ -141,14 +141,14 @@ public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
    * of elements of type {@code N}, useful as an
    * argument to {@link Combine#globally} or {@link Combine#perKey}.
    *
-   * @param <N> the type of the {@code Number}s being compared
+   * @param <NumT> the type of the {@code Number}s being compared
    */
-  public static class MinFn<N extends Comparable<N>>
-      extends Combine.BinaryCombineFn<N> {
+  public static class MinFn<NumT extends Comparable<NumT>>
+      extends Combine.BinaryCombineFn<NumT> {
     private static final long serialVersionUID = 0;
 
-    /** The largest value of type N. */
-    private final N initialValue;
+    /** The largest value of type NumT. */
+    private final NumT initialValue;
 
     /**
      * Constructs a combining function that computes the minimum over
@@ -156,17 +156,17 @@ public static class MinFn<N extends Comparable<N>>
      * value of type {@code N}, which is the identity value for the
      * minimum operation over {@code N}s.
      */
-    public MinFn(N initialValue) {
+    public MinFn(NumT initialValue) {
       this.initialValue = initialValue;
     }
 
     @Override
-    public N apply(N a, N b) {
+    public NumT apply(NumT a, NumT b) {
       return a.compareTo(b) <= 0 ? a : b;
     }
 
     @Override
-    public N identity() {
+    public NumT identity() {
       return initialValue;
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
index 9ada2753a9f2a..f66c32858859f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
@@ -29,9 +29,9 @@
 import java.io.Serializable;
 
 /**
- * A {@code PTransform<Input, Output>} is an operation that takes an
- * {@code Input} (some subtype of {@link PInput}) and produces an
- * {@code Output} (some subtype of {@link POutput}).
+ * A {@code PTransform<InputT, OutputT>} is an operation that takes an
+ * {@code InputT} (some subtype of {@link PInput}) and produces an
+ * {@code OutputT} (some subtype of {@link POutput}).
  *
  * <p> Common PTransforms include root PTransforms like
  * {@link com.google.cloud.dataflow.sdk.io.TextIO.Read},
@@ -44,10 +44,10 @@
  * {@link com.google.cloud.dataflow.sdk.io.TextIO.Write}.  Users also
  * define their own application-specific composite PTransforms.
  *
- * <p> Each {@code PTransform<Input, Output>} has a single
- * {@code Input} type and a single {@code Output} type.  Many
+ * <p> Each {@code PTransform<InputT, OutputT>} has a single
+ * {@code InputT} type and a single {@code OutputT} type.  Many
  * PTransforms conceptually transform one input value to one output
- * value, and in this case {@code Input} and {@code Output} are
+ * value, and in this case {@code InputT} and {@code Output} are
  * typically instances of
  * {@link com.google.cloud.dataflow.sdk.values.PCollection}.
  * A root
@@ -65,8 +65,8 @@
  * to combine multiple values into a single bundle for passing into or
  * returning from the PTransform.
  *
- * <p> A {@code PTransform<Input, Output>} is invoked by calling
- * {@code apply()} on its {@code Input}, returning its {@code Output}.
+ * <p> A {@code PTransform<InputT, OutputT>} is invoked by calling
+ * {@code apply()} on its {@code InputT}, returning its {@code OutputT}.
  * Calls can be chained to concisely create linear pipeline segments.
  * For example:
  *
@@ -128,7 +128,7 @@
  * The majority of PTransforms are
  * implemented as composites of other PTransforms.  Such a PTransform
  * subclass typically just implements {@link #apply}, computing its
- * Output value from its Input value.  User programs are encouraged to
+ * Output value from its {@code InputT} value.  User programs are encouraged to
  * use this mechanism to modularize their own code.  Such composite
  * abstractions get their own name, and navigating through the
  * composition hierarchy of PTransforms is supported by the monitoring
@@ -164,15 +164,15 @@
  * "https://cloud.google.com/dataflow/java-sdk/applying-transforms"
  * >Applying Transformations</a>
  *
- * @param <Input> the type of the input to this PTransform
- * @param <Output> the type of the output of this PTransform
+ * @param <InputT> the type of the input to this PTransform
+ * @param <OutputT> the type of the output of this PTransform
  */
-public abstract class PTransform<Input extends PInput, Output extends POutput>
+public abstract class PTransform<InputT extends PInput, OutputT extends POutput>
     implements Serializable /* See the note above */ {
   private static final long serialVersionUID = 0;
 
   /**
-   * Applies this {@code PTransform} on the given {@code Input}, and returns its
+   * Applies this {@code PTransform} on the given {@code InputT}, and returns its
    * {@code Output}.
    *
    * <p> Composite transforms, which are defined in terms of other transforms,
@@ -186,7 +186,7 @@ public abstract class PTransform<Input extends PInput, Output extends POutput>
    * implementation via
    * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner#apply}.
    */
-  public Output apply(Input input) {
+  public OutputT apply(InputT input) {
     throw new IllegalArgumentException(
         "Runner " + getPipeline().getRunner()
             + " has not registered an implementation for the required primitive operation "
@@ -200,13 +200,13 @@ public Output apply(Input input) {
    *
    * <p> By default, does nothing.
    */
-  public void validate(Input input) { }
+  public void validate(InputT input) { }
 
   /**
    * Sets the base name of this {@code PTransform}.
    * Returns {@code this} for method chaining.
    */
-  public PTransform<Input, Output> setName(String name) {
+  public PTransform<InputT, OutputT> setName(String name) {
     this.name = name;
     return this;
   }
@@ -215,7 +215,7 @@ public PTransform<Input, Output> setName(String name) {
    * @deprecated Use {@link #setName}, which has been modified to return {@code this}.
    */
   @Deprecated
-  public PTransform<Input, Output> withName(String name) {
+  public PTransform<InputT, OutputT> withName(String name) {
     return setName(name);
   }
 
@@ -248,9 +248,9 @@ private Pipeline getPipeline() {
    * @throws IllegalStateException if this PTransform hasn't been applied yet
    */
   @Deprecated
-  private Output getOutput() {
+  private OutputT getOutput() {
     @SuppressWarnings("unchecked")
-    Output output = (Output) getPipeline().getOutput(this);
+    OutputT output = (OutputT) getPipeline().getOutput(this);
     return output;
   }
 
@@ -375,7 +375,7 @@ protected Coder<?> getDefaultOutputCoder() throws CannotProvideCoderException {
    *
    * <p> By default, always throws.
    */
-  protected Coder<?> getDefaultOutputCoder(Input input) throws CannotProvideCoderException {
+  protected Coder<?> getDefaultOutputCoder(InputT input) throws CannotProvideCoderException {
     return getDefaultOutputCoder();
   }
 
@@ -387,7 +387,7 @@ protected Coder<?> getDefaultOutputCoder(Input input) throws CannotProvideCoderE
    *
    * <p> By default, always throws.
    */
-  public <T> Coder<T> getDefaultOutputCoder(Input input, TypedPValue<T> output)
+  public <T> Coder<T> getDefaultOutputCoder(InputT input, TypedPValue<T> output)
       throws CannotProvideCoderException {
     if (output != getOutput()) {
       throw new CannotProvideCoderException(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 991f97df5180c..718afb8503c7d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -45,9 +45,9 @@
 /**
  * {@code ParDo} is the core element-wise transform in Google Cloud
  * Dataflow, invoking a user-specified function (from {@code I} to
- * {@code O}) on each of the elements of the input
- * {@code PCollection<I>} to produce zero or more output elements, all
- * of which are collected into the output {@code PCollection<O>}.
+ * {@code Output}) on each of the elements of the input
+ * {@code PCollection<InputT>} to produce zero or more output elements, all
+ * of which are collected into the output {@code PCollection<OutputT>}.
  *
  * <p> Elements are processed independently, and possibly in parallel across
  * distributed cloud resources.
@@ -61,12 +61,12 @@
  * {@link DoFn DoFn&lt;I, O&gt;}.
  *
  * <p> Conceptually, when a {@code ParDo} transform is executed, the
- * elements of the input {@code PCollection<I>} are first divided up
+ * elements of the input {@code PCollection<InputT>} are first divided up
  * into some number of "batches".  These are farmed off to distributed
  * worker machines (or run locally, if using the
  * {@link com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner}).
  * For each batch of input elements, a fresh instance of the argument
- * {@code DoFn<I, O>} is created on a worker, then the {@code DoFn}'s
+ * {@code DoFn<InputT, OutputT>} is created on a worker, then the {@code DoFn}'s
  * optional {@link DoFn#startBundle} method is called to initialize it,
  * then the {@code DoFn}'s required {@link DoFn#processElement} method
  * is called on each of the input elements in the batch, then the
@@ -76,7 +76,7 @@
  * methods can produce zero or more output elements, which are
  * collected together into a batch of output elements.  All of the
  * batches of output elements from all of the {@code DoFn} instances
- * are "flattened" together into the output {@code PCollection<O>}.
+ * are "flattened" together into the output {@code PCollection<OutputT>}.
  *
  * <p> For example:
  *
@@ -100,7 +100,7 @@
  * } </pre>
  *
  * <p> Each output element has the same timestamp and is in the same windows
- * as its corresponding input element, and the output {@code PCollection}
+ * as its corresponding input element, and the output {@code PCollection<OutputT>}
  * has the same
  * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
  * associated with it as the input.
@@ -155,7 +155,7 @@
  *
  * <p> Optionally, a {@code ParDo} transform can produce multiple
  * output {@code PCollection}s, both a "main output"
- * {@code PCollection<O>} plus any number of "side output"
+ * {@code PCollection<OutputT>} plus any number of "side output"
  * {@code PCollection}s, each keyed by a distinct {@link TupleTag},
  * and bundled in a {@link PCollectionTuple}.  The {@code TupleTag}s
  * to be used for the output {@code PCollectionTuple} is specified by
@@ -250,11 +250,11 @@
  *
  * <h2>Output Coders</h2>
  *
- * <p> By default, the {@code Coder} of the
- * elements of the main output {@code PCollection<O>} is inferred from the
- * concrete type of the {@code DoFn<I, O>}'s output type {@code O}.
+ * <p> By default, the {@code Coder<OutputT>} of the
+ * elements of the main output {@code PCollection<OutputT>} is inferred from the
+ * concrete type of the {@code DoFn<InputT, OutputT>}'s output type {@code Output}.
  *
- * <p> By default, the {@code Coder} of the elements of a side output
+ * <p> By default, the {@code Coder<X>} of the elements of a side output
  * {@code PCollection<X>} is inferred from the concrete type of the
  * corresponding {@code TupleTag<X>}'s type {@code X}.  To be
  * successful, the {@code TupleTag} should be created as an instance
@@ -264,7 +264,7 @@
  * For example:
  * <pre> {@code
  * // A TupleTag to use for a side input can be written concisely:
- * final TupleTag<Integer> sideInputTag = new TupleTag<>();
+ * final TupleTag<Integer> sideInputag = new TupleTag<>();
  * // A TupleTag to use for a side output should be written with "{}",
  * // and explicit generic parameter type:
  * final TupleTag<String> sideOutputTag = new TupleTag<String>(){};
@@ -508,8 +508,8 @@ public static Unbound withSideInputs(
    * to specify the {@link DoFn} to invoke, which will also bind the
    * input type of this {@code PTransform}.
    */
-  public static <O> UnboundMulti<O> withOutputTags(
-      TupleTag<O> mainOutputTag,
+  public static <OutputT> UnboundMulti<OutputT> withOutputTags(
+      TupleTag<OutputT> mainOutputTag,
       TupleTagList sideOutputTags) {
     return new Unbound().withOutputTags(mainOutputTag, sideOutputTags);
   }
@@ -519,16 +519,17 @@ public static <O> UnboundMulti<O> withOutputTags(
    * given {@link DoFn} function.
    *
    * <p> The resulting {@code PTransform}'s types have been bound, with the
-   * input being a {@code PCollection<I>} and the output a
-   * {@code PCollection<O>}, inferred from the types of the argument
-   * {@code DoFn<I, O>}.  It is ready to be applied, or further
+   * input being a {@code PCollection<InputT>} and the output a
+   * {@code PCollection<OutputT>}, inferred from the types of the argument
+   * {@code DoFn<InputT, OutputT>}.  It is ready to be applied, or further
    * properties can be set on it first.
    */
-  public static <I, O> Bound<I, O> of(DoFn<I, O> fn) {
+  public static <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT> fn) {
     return new Unbound().of(fn);
   }
 
-  private static <I, O> DoFn<I, O> adapt(DoFnWithContext<I, O> fn) {
+  private static <InputT, OutputT> DoFn<InputT, OutputT>
+      adapt(DoFnWithContext<InputT, OutputT> fn) {
     return DoFnReflector.of(fn.getClass()).toDoFn(fn);
   }
 
@@ -537,9 +538,9 @@ private static <I, O> DoFn<I, O> adapt(DoFnWithContext<I, O> fn) {
    * given {@link DoFnWithContext} function.
    *
    * <p> The resulting {@code PTransform}'s types have been bound, with the
-   * input being a {@code PCollection<I>} and the output a
-   * {@code PCollection<O>}, inferred from the types of the argument
-   * {@code DoFn<I, O>}.  It is ready to be applied, or further
+   * input being a {@code PCollection<InputT>} and the output a
+   * {@code PCollection<OutputT>}, inferred from the types of the argument
+   * {@code DoFn<InputT, OutputT>}.  It is ready to be applied, or further
    * properties can be set on it first.
    *
    * <p> {@link DoFnWithContext} is an experimental alternative to
@@ -547,12 +548,12 @@ private static <I, O> DoFn<I, O> adapt(DoFnWithContext<I, O> fn) {
    * the window of the element.
    */
   @Experimental
-  public static <I, O> Bound<I, O> of(DoFnWithContext<I, O> fn) {
+  public static <InputT, OutputT> Bound<InputT, OutputT> of(DoFnWithContext<InputT, OutputT> fn) {
     return of(adapt(fn));
   }
 
-  private static <I> void validateCoder(
-      DoFn<I, ?> fn, PCollection<? extends I> input) {
+  private static <InputT> void validateCoder(
+      DoFn<InputT, ?> fn, PCollection<? extends InputT> input) {
     if (RequiresKeyedState.class.isAssignableFrom(fn.getClass())
         && !isKvEquivalentCoder(input.getCoder())) {
       throw new UnsupportedOperationException(
@@ -632,7 +633,7 @@ public Unbound withSideInputs(
      * <p> See the discussion of Side Outputs above and on
      * {@link ParDo#withOutputTags} for more explanation.
      */
-    public <O> UnboundMulti<O> withOutputTags(TupleTag<O> mainOutputTag,
+    public <OutputT> UnboundMulti<OutputT> withOutputTags(TupleTag<OutputT> mainOutputTag,
                                               TupleTagList sideOutputTags) {
       return new UnboundMulti<>(
           name, sideInputs, mainOutputTag, sideOutputTags);
@@ -646,7 +647,7 @@ public <O> UnboundMulti<O> withOutputTags(TupleTag<O> mainOutputTag,
      * sufficiently specified to be applied, but more properties can
      * still be specified.
      */
-    public <I, O> Bound<I, O> of(DoFn<I, O> fn) {
+    public <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT> fn) {
       return new Bound<>(name, sideInputs, fn);
     }
 
@@ -658,33 +659,33 @@ public <I, O> Bound<I, O> of(DoFn<I, O> fn) {
      * sufficiently specified to be applied, but more properties can
      * still be specified.
      */
-    public <I, O> Bound<I, O> of(DoFnWithContext<I, O> fn) {
+    public <InputT, OutputT> Bound<InputT, OutputT> of(DoFnWithContext<InputT, OutputT> fn) {
       return of(adapt(fn));
     }
   }
 
   /**
-   * A {@code PTransform} that, when applied to a {@code PCollection<I>},
-   * invokes a user-specified {@code DoFn<I, O>} on all its elements,
+   * A {@code PTransform} that, when applied to a {@code PCollection<InputT>},
+   * invokes a user-specified {@code DoFn<InputT, OutputT>} on all its elements,
    * with all its outputs collected into an output
-   * {@code PCollection<O>}.
+   * {@code PCollection<OutputT>}.
    *
    * <p> A multi-output form of this transform can be created with
    * {@link ParDo.Bound#withOutputTags}.
    *
-   * @param <I> the type of the (main) input {@code PCollection} elements
-   * @param <O> the type of the (main) output {@code PCollection} elements
+   * @param <InputT> the type of the (main) input {@code PCollection} elements
+   * @param <OutputT> the type of the (main) output {@code PCollection} elements
    */
   @SuppressWarnings("serial")
-  public static class Bound<I, O>
-      extends PTransform<PCollection<? extends I>, PCollection<O>> {
+  public static class Bound<InputT, OutputT>
+      extends PTransform<PCollection<? extends InputT>, PCollection<OutputT>> {
     // Inherits name.
     List<PCollectionView<?>> sideInputs;
-    DoFn<I, O> fn;
+    DoFn<InputT, OutputT> fn;
 
     Bound(String name,
           List<PCollectionView<?>> sideInputs,
-          DoFn<I, O> fn) {
+          DoFn<InputT, OutputT> fn) {
       super(name);
       this.sideInputs = sideInputs;
       this.fn = SerializableUtils.clone(fn);
@@ -697,7 +698,7 @@ public static class Bound<I, O>
      *
      * <p> See the discussion of Naming above for more explanation.
      */
-    public Bound<I, O> named(String name) {
+    public Bound<InputT, OutputT> named(String name) {
       return new Bound<>(name, sideInputs, fn);
     }
 
@@ -709,7 +710,7 @@ public Bound<I, O> named(String name) {
      * <p> See the discussion of Side Inputs above and on
      * {@link ParDo#withSideInputs} for more explanation.
      */
-    public Bound<I, O> withSideInputs(PCollectionView<?>... sideInputs) {
+    public Bound<InputT, OutputT> withSideInputs(PCollectionView<?>... sideInputs) {
       return new Bound<>(name, ImmutableList.copyOf(sideInputs), fn);
     }
 
@@ -721,7 +722,7 @@ public Bound<I, O> withSideInputs(PCollectionView<?>... sideInputs) {
      * <p> See the discussion of Side Inputs above and on
      * {@link ParDo#withSideInputs} for more explanation.
      */
-    public Bound<I, O> withSideInputs(
+    public Bound<InputT, OutputT> withSideInputs(
         Iterable<? extends PCollectionView<?>> sideInputs) {
       return new Bound<>(name, ImmutableList.copyOf(sideInputs), fn);
     }
@@ -734,18 +735,18 @@ public Bound<I, O> withSideInputs(
      * <p> See the discussion of Side Outputs above and on
      * {@link ParDo#withOutputTags} for more explanation.
      */
-    public BoundMulti<I, O> withOutputTags(TupleTag<O> mainOutputTag,
+    public BoundMulti<InputT, OutputT> withOutputTags(TupleTag<OutputT> mainOutputTag,
                                            TupleTagList sideOutputTags) {
       return new BoundMulti<>(
           name, sideInputs, mainOutputTag, sideOutputTags, fn);
     }
 
     @Override
-    public PCollection<O> apply(PCollection<? extends I> input) {
+    public PCollection<OutputT> apply(PCollection<? extends InputT> input) {
       if (sideInputs == null) {
         sideInputs = Collections.emptyList();
       }
-      return PCollection.<O>createPrimitiveOutputInternal(
+      return PCollection.<OutputT>createPrimitiveOutputInternal(
               input.getPipeline(),
               input.getWindowingStrategy())
           .setTypeTokenInternal(fn.getOutputTypeToken());
@@ -753,12 +754,12 @@ public PCollection<O> apply(PCollection<? extends I> input) {
 
     @Override
     @SuppressWarnings("unchecked")
-    protected Coder<O> getDefaultOutputCoder(PCollection<? extends I> input)
+    protected Coder<OutputT> getDefaultOutputCoder(PCollection<? extends InputT> input)
         throws CannotProvideCoderException {
       return input.getPipeline().getCoderRegistry().getDefaultCoder(
           fn.getOutputTypeToken(),
           fn.getInputTypeToken(),
-          ((PCollection<I>) input).getCoder());
+          ((PCollection<InputT>) input).getCoder());
     }
 
     @Override
@@ -776,7 +777,7 @@ protected String getKindString() {
       return "ParDo";
     }
 
-    public DoFn<I, O> getFn() {
+    public DoFn<InputT, OutputT> getFn() {
       return fn;
     }
 
@@ -785,7 +786,7 @@ public List<PCollectionView<?>> getSideInputs() {
     }
 
     @Override
-    public void validate(PCollection<? extends I> input) {
+    public void validate(PCollection<? extends InputT> input) {
       super.validate(input);
       ParDo.validateCoder(fn, input);
     }
@@ -799,17 +800,17 @@ public void validate(PCollection<? extends I> input) {
    * invoked to specify the {@link DoFn} to invoke, which will also
    * bind the input type of this {@code PTransform}.
    *
-   * @param <O> the type of the main output {@code PCollection} elements
+   * @param <OutputT> the type of the main output {@code PCollection} elements
    */
-  public static class UnboundMulti<O> {
+  public static class UnboundMulti<OutputT> {
     String name;
     List<PCollectionView<?>> sideInputs;
-    TupleTag<O> mainOutputTag;
+    TupleTag<OutputT> mainOutputTag;
     TupleTagList sideOutputTags;
 
     UnboundMulti(String name,
                  List<PCollectionView<?>> sideInputs,
-                 TupleTag<O> mainOutputTag,
+                 TupleTag<OutputT> mainOutputTag,
                  TupleTagList sideOutputTags) {
       this.name = name;
       this.sideInputs = sideInputs;
@@ -824,7 +825,7 @@ public static class UnboundMulti<O> {
      *
      * <p> See the discussion of Naming above for more explanation.
      */
-    public UnboundMulti<O> named(String name) {
+    public UnboundMulti<OutputT> named(String name) {
       return new UnboundMulti<>(
           name, sideInputs, mainOutputTag, sideOutputTags);
     }
@@ -838,7 +839,7 @@ public UnboundMulti<O> named(String name) {
      * <p> See the discussion of Side Inputs above and on
      * {@link ParDo#withSideInputs} for more explanation.
      */
-    public UnboundMulti<O> withSideInputs(
+    public UnboundMulti<OutputT> withSideInputs(
         PCollectionView<?>... sideInputs) {
       return new UnboundMulti<>(
           name, ImmutableList.copyOf(sideInputs),
@@ -854,7 +855,7 @@ public UnboundMulti<O> withSideInputs(
      * <p> See the discussion of Side Inputs above and on
      * {@link ParDo#withSideInputs} for more explanation.
      */
-    public UnboundMulti<O> withSideInputs(
+    public UnboundMulti<OutputT> withSideInputs(
         Iterable<? extends PCollectionView<?>> sideInputs) {
       return new UnboundMulti<>(
           name, ImmutableList.copyOf(sideInputs),
@@ -869,7 +870,7 @@ public UnboundMulti<O> withSideInputs(
      * {@code PTransform} is sufficiently specified to be applied, but
      * more properties can still be specified.
      */
-    public <I> BoundMulti<I, O> of(DoFn<I, O> fn) {
+    public <InputT> BoundMulti<InputT, OutputT> of(DoFn<InputT, OutputT> fn) {
       return new BoundMulti<>(
           name, sideInputs, mainOutputTag, sideOutputTags, fn);
     }
@@ -882,36 +883,36 @@ public <I> BoundMulti<I, O> of(DoFn<I, O> fn) {
      * {@code PTransform} is sufficiently specified to be applied, but
      * more properties can still be specified.
      */
-    public <I> BoundMulti<I, O> of(DoFnWithContext<I, O> fn) {
+    public <InputT> BoundMulti<InputT, OutputT> of(DoFnWithContext<InputT, OutputT> fn) {
       return of(adapt(fn));
     }
   }
 
   /**
    * A {@code PTransform} that, when applied to a
-   * {@code PCollection<I>}, invokes a user-specified
-   * {@code DoFn<I, O>} on all its elements, which can emit elements
+   * {@code PCollection<InputT>}, invokes a user-specified
+   * {@code DoFn<InputT, OutputT>} on all its elements, which can emit elements
    * to any of the {@code PTransform}'s main and side output
    * {@code PCollection}s, which are bundled into a result
    * {@code PCollectionTuple}.
    *
-   * @param <I> the type of the (main) input {@code PCollection} elements
-   * @param <O> the type of the main output {@code PCollection} elements
+   * @param <InputT> the type of the (main) input {@code PCollection} elements
+   * @param <OutputT> the type of the main output {@code PCollection} elements
    */
   @SuppressWarnings("serial")
-  public static class BoundMulti<I, O>
-      extends PTransform<PCollection<? extends I>, PCollectionTuple> {
+  public static class BoundMulti<InputT, OutputT>
+      extends PTransform<PCollection<? extends InputT>, PCollectionTuple> {
     // Inherits name.
     List<PCollectionView<?>> sideInputs;
-    TupleTag<O> mainOutputTag;
+    TupleTag<OutputT> mainOutputTag;
     TupleTagList sideOutputTags;
-    DoFn<I, O> fn;
+    DoFn<InputT, OutputT> fn;
 
     BoundMulti(String name,
                List<PCollectionView<?>> sideInputs,
-               TupleTag<O> mainOutputTag,
+               TupleTag<OutputT> mainOutputTag,
                TupleTagList sideOutputTags,
-               DoFn<I, O> fn) {
+               DoFn<InputT, OutputT> fn) {
       super(name);
       this.sideInputs = sideInputs;
       this.mainOutputTag = mainOutputTag;
@@ -926,7 +927,7 @@ public static class BoundMulti<I, O>
      *
      * <p> See the discussion of Naming above for more explanation.
      */
-    public BoundMulti<I, O> named(String name) {
+    public BoundMulti<InputT, OutputT> named(String name) {
       return new BoundMulti<>(
           name, sideInputs, mainOutputTag, sideOutputTags, fn);
     }
@@ -939,7 +940,7 @@ public BoundMulti<I, O> named(String name) {
      * <p> See the discussion of Side Inputs above and on
      * {@link ParDo#withSideInputs} for more explanation.
      */
-    public BoundMulti<I, O> withSideInputs(
+    public BoundMulti<InputT, OutputT> withSideInputs(
         PCollectionView<?>... sideInputs) {
       return new BoundMulti<>(
           name, ImmutableList.copyOf(sideInputs),
@@ -954,7 +955,7 @@ public BoundMulti<I, O> withSideInputs(
      * <p> See the discussion of Side Inputs above and on
      * {@link ParDo#withSideInputs} for more explanation.
      */
-    public BoundMulti<I, O> withSideInputs(
+    public BoundMulti<InputT, OutputT> withSideInputs(
         Iterable<? extends PCollectionView<?>> sideInputs) {
       return new BoundMulti<>(
           name, ImmutableList.copyOf(sideInputs),
@@ -963,7 +964,7 @@ public BoundMulti<I, O> withSideInputs(
 
 
     @Override
-    public PCollectionTuple apply(PCollection<? extends I> input) {
+    public PCollectionTuple apply(PCollection<? extends InputT> input) {
       PCollectionTuple outputs = PCollectionTuple.ofPrimitiveOutputsInternal(
           input.getPipeline(),
           TupleTagList.of(mainOutputTag).and(sideOutputTags.getAll()),
@@ -978,7 +979,7 @@ public PCollectionTuple apply(PCollection<? extends I> input) {
     }
 
     @Override
-    protected Coder<O> getDefaultOutputCoder() {
+    protected Coder<OutputT> getDefaultOutputCoder() {
       throw new RuntimeException(
           "internal error: shouldn't be calling this on a multi-output ParDo");
     }
@@ -997,11 +998,11 @@ protected String getKindString() {
       return "ParMultiDo";
     }
 
-    public DoFn<I, O> getFn() {
+    public DoFn<InputT, OutputT> getFn() {
       return fn;
     }
 
-    public TupleTag<O> getMainOutputTag() {
+    public TupleTag<OutputT> getMainOutputTag() {
       return mainOutputTag;
     }
 
@@ -1010,7 +1011,7 @@ public List<PCollectionView<?>> getSideInputs() {
     }
 
     @Override
-    public void validate(PCollection<? extends I> input) {
+    public void validate(PCollection<? extends InputT> input) {
       super.validate(input);
       ParDo.validateCoder(fn, input);
     }
@@ -1031,14 +1032,14 @@ public void evaluate(
         });
   }
 
-  private static <I, O> void evaluateSingleHelper(
-      Bound<I, O> transform,
+  private static <InputT, OutputT> void evaluateSingleHelper(
+      Bound<InputT, OutputT> transform,
       DirectPipelineRunner.EvaluationContext context) {
-    TupleTag<O> mainOutputTag = new TupleTag<>("out");
+    TupleTag<OutputT> mainOutputTag = new TupleTag<>("out");
 
     DirectModeExecutionContext executionContext = new DirectModeExecutionContext();
 
-    DoFnRunner<I, O, List> fnRunner =
+    DoFnRunner<InputT, OutputT, List> fnRunner =
         evaluateHelper(transform.fn, context.getStepName(transform),
             context.getInput(transform), transform.sideInputs,
             mainOutputTag, new ArrayList<TupleTag<?>>(),
@@ -1064,13 +1065,13 @@ public void evaluate(
         });
   }
 
-  private static <I, O> void evaluateMultiHelper(
-      BoundMulti<I, O> transform,
+  private static <InputT, OutputT> void evaluateMultiHelper(
+      BoundMulti<InputT, OutputT> transform,
       DirectPipelineRunner.EvaluationContext context) {
 
     DirectModeExecutionContext executionContext = new DirectModeExecutionContext();
 
-    DoFnRunner<I, O, List> fnRunner =
+    DoFnRunner<InputT, OutputT, List> fnRunner =
         evaluateHelper(transform.fn, context.getStepName(transform),
                        context.getInput(transform), transform.sideInputs,
                        transform.mainOutputTag, transform.sideOutputTags.getAll(),
@@ -1090,17 +1091,17 @@ private static <I, O> void evaluateMultiHelper(
     }
   }
 
-  private static <I, O> DoFnRunner<I, O, List> evaluateHelper(
-      DoFn<I, O> doFn,
+  private static <InputT, OutputT> DoFnRunner<InputT, OutputT, List> evaluateHelper(
+      DoFn<InputT, OutputT> doFn,
       String name,
-      PCollection<? extends I> input,
+      PCollection<? extends InputT> input,
       List<PCollectionView<?>> sideInputs,
-      TupleTag<O> mainOutputTag,
+      TupleTag<OutputT> mainOutputTag,
       List<TupleTag<?>> sideOutputTags,
       DirectPipelineRunner.EvaluationContext context,
       DirectModeExecutionContext executionContext) {
     // TODO: Run multiple shards?
-    DoFn<I, O> fn = context.ensureSerializable(doFn);
+    DoFn<InputT, OutputT> fn = context.ensureSerializable(doFn);
 
     PTuple sideInputValues = PTuple.empty();
     for (PCollectionView<?> view : sideInputs) {
@@ -1109,7 +1110,7 @@ private static <I, O> DoFnRunner<I, O, List> evaluateHelper(
           context.getPCollectionView(view));
     }
 
-    DoFnRunner<I, O, List> fnRunner =
+    DoFnRunner<InputT, OutputT, List> fnRunner =
         DoFnRunner.createWithListOutputs(
             context.getPipelineOptions(),
             fn,
@@ -1122,7 +1123,7 @@ private static <I, O> DoFnRunner<I, O, List> evaluateHelper(
 
     fnRunner.startBundle();
 
-    for (DirectPipelineRunner.ValueWithMetadata<? extends I> elem
+    for (DirectPipelineRunner.ValueWithMetadata<? extends InputT> elem
              : context.getPCollectionValuesWithMetadata(input)) {
       if (doFn instanceof DoFn.RequiresKeyedState) {
         // If the DoFn needs keyed state, set the implicit keys to the keys in the input elements.
@@ -1134,7 +1135,7 @@ private static <I, O> DoFnRunner<I, O, List> evaluateHelper(
       } else {
         executionContext.setKey(elem.getKey());
       }
-      fnRunner.processElement((WindowedValue<I>) elem.getWindowedValue());
+      fnRunner.processElement((WindowedValue<InputT>) elem.getWindowedValue());
     }
 
     fnRunner.finishBundle();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
index 7e9070876d926..5bf51c33af95e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
@@ -128,9 +128,9 @@ private Partition(PartitionDoFn<T> partitionDoFn) {
     this.partitionDoFn = partitionDoFn;
   }
 
-  private static class PartitionDoFn<T1> extends DoFn<T1, Void> {
+  private static class PartitionDoFn<X> extends DoFn<X, Void> {
     private final int numPartitions;
-    private final PartitionFn<? super T1> partitionFn;
+    private final PartitionFn<? super X> partitionFn;
     private final TupleTagList outputTags;
 
     /**
@@ -139,7 +139,7 @@ private static class PartitionDoFn<T1> extends DoFn<T1, Void> {
      * @throws IllegalArgumentException if {@code numPartitions <= 0}
      */
     public PartitionDoFn(
-        int numPartitions, PartitionFn<? super T1> partitionFn) {
+        int numPartitions, PartitionFn<? super X> partitionFn) {
       if (numPartitions <= 0) {
         throw new IllegalArgumentException("numPartitions must be > 0");
       }
@@ -149,7 +149,7 @@ public PartitionDoFn(
 
       TupleTagList buildOutputTags = TupleTagList.empty();
       for (int partition = 0; partition < numPartitions; partition++) {
-        buildOutputTags = buildOutputTags.and(new TupleTag<T1>());
+        buildOutputTags = buildOutputTags.and(new TupleTag<X>());
       }
       outputTags = buildOutputTags;
     }
@@ -160,11 +160,11 @@ public TupleTagList getOutputTags() {
 
     @Override
     public void processElement(ProcessContext c) {
-      T1 input = c.element();
+      X input = c.element();
       int partition = partitionFn.partitionFor(input, numPartitions);
       if (0 <= partition && partition < numPartitions) {
         @SuppressWarnings("unchecked")
-        TupleTag<T1> typedTag = (TupleTag<T1>) outputTags.get(partition);
+        TupleTag<X> typedTag = (TupleTag<X>) outputTags.get(partition);
         c.sideOutput(typedTag, input);
       } else {
         throw new IndexOutOfBoundsException(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableFunction.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableFunction.java
index 477caa35b261c..81bf3d4cb584b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableFunction.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableFunction.java
@@ -19,14 +19,13 @@
 import java.io.Serializable;
 
 /**
- * A function that computes an output value of type
- * {@code O} from an input value of type {@code I}
- * and is {@link Serializable}.
+ * A function that computes an output value of type {@code OutputT} from an input value of type
+ * {@code InputT} and is {@link Serializable}.
  *
- * @param <I> input value type
- * @param <O> output value type
+ * @param <InputT> input value type
+ * @param <OutputT> output value type
  */
-public interface SerializableFunction<I, O> extends Serializable {
+public interface SerializableFunction<InputT, OutputT> extends Serializable {
   /** Returns the result of invoking this function on the given input. */
-  public O apply(I input);
+  public OutputT apply(InputT input);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index aa8eb343a26bc..3a66c2de99e4a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -79,8 +79,8 @@ public class Top {
    * {@link #largestPerKey}, which take a {@code PCollection} of
    * {@code KV}s and return the top values associated with each key.
    */
-  public static <T, C extends Comparator<T> & Serializable>
-      Combine.Globally<T, List<T>> of(int count, C compareFn) {
+  public static <T, ComparatorT extends Comparator<T> & Serializable>
+      Combine.Globally<T, List<T>> of(int count, ComparatorT compareFn) {
     return Combine.globally(new TopCombineFn<>(count, compareFn))
         .setName("Top");
   }
@@ -206,9 +206,9 @@ Combine.Globally<T, List<T>> largest(int count) {
    * <p> See also {@link #of}, {@link #smallest}, and {@link #largest}, which
    * take a {@code PCollection} and return the top elements.
    */
-  public static <K, V, C extends Comparator<V> & Serializable>
+  public static <K, V, ComparatorT extends Comparator<V> & Serializable>
       PTransform<PCollection<KV<K, V>>, PCollection<KV<K, List<V>>>>
-      perKey(int count, C compareFn) {
+      perKey(int count, ComparatorT compareFn) {
     return Combine.perKey(
         new TopCombineFn<>(count, compareFn).<K>asKeyedFn())
         .setName("Top.PerKey");
@@ -328,8 +328,8 @@ public static class TopCombineFn<T>
     private final int count;
     private final Comparator<T> compareFn;
 
-    public <C extends Comparator<T> & Serializable> TopCombineFn(
-        int count, C compareFn) {
+    public <ComparatorT extends Comparator<T> & Serializable> TopCombineFn(
+        int count, ComparatorT compareFn) {
       if (count < 0) {
         throw new IllegalArgumentException("count must be >= 0");
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index e1b588126ba73..a9bc05b5ca621 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -60,7 +60,7 @@
  *
  * <pre>
  * {@code
- * PCollectionView<A> output = someOtherPCollection
+ * PCollectionView<T> output = someOtherPCollection
  *     .apply(Combine.globally(...))
  *     .apply(View.asSingleton());
  * }
@@ -71,7 +71,7 @@
  *
  * <pre>
  * {@code
- * PCollectionView<Iterable<A>> output =
+ * PCollectionView<Iterable<T>> output =
  *     somePCollection.apply(View.asIterable());
  * }
  * </pre>
@@ -259,12 +259,13 @@ public AsSingletonMap<K, V, V> withSingletonValues() {
     }
 
     /**
-     * Returns a PTransform creating a view as a {@code Map<K, VO>} rather than a
+     * Returns a PTransform creating a view as a {@code Map<K, OutputT>} rather than a
      * {@code Map<K, Iterable<V>>} by applying the given combiner to the set of
      * values associated with each key.
      */
-    public <VO> AsSingletonMap<K, V, VO> withCombiner(CombineFn<V, ?, VO> combineFn) {
-      return new AsSingletonMap<K, V, VO>(combineFn);
+    public <OutputT> AsSingletonMap<K, V, OutputT>
+        withCombiner(CombineFn<V, ?, OutputT> combineFn) {
+      return new AsSingletonMap<K, V, OutputT>(combineFn);
     }
 
     @Override
@@ -292,33 +293,34 @@ public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
    *
    * <p> Instantiate via {@link View#asMap}.
    */
-  public static class AsSingletonMap<K, VI, VO>
-      extends PTransform<PCollection<KV<K, VI>>, PCollectionView<Map<K, VO>>> {
+  public static class AsSingletonMap<K, InputT, OutputT>
+      extends PTransform<PCollection<KV<K, InputT>>, PCollectionView<Map<K, OutputT>>> {
     private static final long serialVersionUID = 0;
 
-    private CombineFn<VI, ?, VO> combineFn;
+    private CombineFn<InputT, ?, OutputT> combineFn;
 
-    private AsSingletonMap(CombineFn<VI, ?, VO> combineFn) {
+    private AsSingletonMap(CombineFn<InputT, ?, OutputT> combineFn) {
       this.combineFn = combineFn;
     }
 
     @Override
-    public PCollectionView<Map<K, VO>> apply(PCollection<KV<K, VI>> input) {
-      // VI == VO if combineFn is null
+    public PCollectionView<Map<K, OutputT>> apply(PCollection<KV<K, InputT>> input) {
+      // InputT == OutputT if combineFn is null
       @SuppressWarnings("unchecked")
-      PCollection<KV<K, VO>> combined =
+      PCollection<KV<K, OutputT>> combined =
         combineFn == null
         ? (PCollection) input
         : input.apply(Combine.perKey(combineFn.<K>asKeyedFn()));
 
-      MapPCollectionView<K, VO> view = new MapPCollectionView<K, VO>(
+      MapPCollectionView<K, OutputT> view = new MapPCollectionView<K, OutputT>(
           input.getPipeline(), combined.getWindowingStrategy(), combined.getCoder());
 
-      CreatePCollectionView<KV<K, VO>, Map<K, VO>> createView = new CreatePCollectionView<>(view);
+      CreatePCollectionView<KV<K, OutputT>, Map<K, OutputT>> createView =
+          new CreatePCollectionView<>(view);
 
       if (combined.getPipeline().getOptions().as(StreamingOptions.class).isStreaming()) {
         return combined
-            .apply(Combine.globally(new Concatenate<KV<K, VO>>()).withoutDefaults())
+            .apply(Combine.globally(new Concatenate<KV<K, OutputT>>()).withoutDefaults())
             .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, combined.getCoder())))
             .apply(createView);
       } else {
@@ -380,21 +382,21 @@ public Coder<List<T>> getDefaultOutputCoder(CoderRegistry registry, Coder<T> inp
    *
    * <p> For internal use only.
    *
-   * @param <R> The type of the elements of the input PCollection
-   * @param <T> The type associated with the {@link PCollectionView} used as a side input
+   * @param <ElemT> The type of the elements of the input PCollection
+   * @param <ViewT> The type associated with the {@link PCollectionView} used as a side input
    */
-  public static class CreatePCollectionView<R, T>
-      extends PTransform<PCollection<R>, PCollectionView<T>> {
+  public static class CreatePCollectionView<ElemT, ViewT>
+      extends PTransform<PCollection<ElemT>, PCollectionView<ViewT>> {
     private static final long serialVersionUID = 0;
 
-    private PCollectionView<T> view;
+    private PCollectionView<ViewT> view;
 
-    public CreatePCollectionView(PCollectionView<T> view) {
+    public CreatePCollectionView(PCollectionView<ViewT> view) {
       this.view = view;
     }
 
     @Override
-    public PCollectionView<T> apply(PCollection<R> input) {
+    public PCollectionView<ViewT> apply(PCollection<ElemT> input) {
       return view;
     }
 
@@ -409,10 +411,10 @@ public void evaluate(
               evaluateTyped(transform, context);
             }
 
-            private <R, T> void evaluateTyped(
-                CreatePCollectionView<R, T> transform,
+            private <ElemT, ViewT> void evaluateTyped(
+                CreatePCollectionView<ElemT, ViewT> transform,
                 DirectPipelineRunner.EvaluationContext context) {
-              List<WindowedValue<R>> elems =
+              List<WindowedValue<ElemT>> elems =
                   context.getPCollectionWindowedValues(context.getInput(transform));
               context.setPCollectionView(context.getOutput(transform), elems);
             }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
index 8d5222480e501..0d3a2f1824b0a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
@@ -106,28 +106,29 @@ public PDone apply(PCollection<T> input) {
      * implementations should guarantee that {@link WriteOperation#createWriter} does not mutate
      * WriteOperation).
      */
-    private <WR> PDone createWrite(PCollection<T> input, WriteOperation<T, WR> writeOperation) {
+    private <WriteT> PDone createWrite(
+        PCollection<T> input, WriteOperation<T, WriteT> writeOperation) {
       Pipeline p = input.getPipeline();
 
       // A coder to user for the WriteOperation.
       @SuppressWarnings("unchecked")
-      Coder<WriteOperation<T, WR>> operationCoder =
-          (Coder<WriteOperation<T, WR>>) SerializableCoder.of(writeOperation.getClass());
+      Coder<WriteOperation<T, WriteT>> operationCoder =
+          (Coder<WriteOperation<T, WriteT>>) SerializableCoder.of(writeOperation.getClass());
 
       // A singleton collection of the WriteOperation, to be used as input to a ParDo to initialize
       // the sink.
-      PCollection<WriteOperation<T, WR>> operationCollection =
-          p.apply(Create.<WriteOperation<T, WR>>of(writeOperation)).setCoder(operationCoder);
+      PCollection<WriteOperation<T, WriteT>> operationCollection =
+          p.apply(Create.<WriteOperation<T, WriteT>>of(writeOperation)).setCoder(operationCoder);
 
       // Initialize the resource in a do-once ParDo on the WriteOperation.
       operationCollection =
           operationCollection
-              .apply(ParDo.of(new DoFn<WriteOperation<T, WR>, WriteOperation<T, WR>>() {
+              .apply(ParDo.of(new DoFn<WriteOperation<T, WriteT>, WriteOperation<T, WriteT>>() {
                 private static final long serialVersionUID = 0;
 
                 @Override
                 public void processElement(ProcessContext c) throws Exception {
-                  WriteOperation<T, WR> writeOperation = c.element();
+                  WriteOperation<T, WriteT> writeOperation = c.element();
                   writeOperation.initialize(c.getPipelineOptions());
                   // The WriteOperation is also the output of this ParDo, so it can have mutable
                   // state.
@@ -137,25 +138,25 @@ public void processElement(ProcessContext c) throws Exception {
               .setCoder(operationCoder);
 
       // Create a view of the WriteOperation to be used as a sideInput to the parallel write phase.
-      final PCollectionView<WriteOperation<T, WR>> writeOperationView =
-          operationCollection.apply(View.<WriteOperation<T, WR>>asSingleton());
+      final PCollectionView<WriteOperation<T, WriteT>> writeOperationView =
+          operationCollection.apply(View.<WriteOperation<T, WriteT>>asSingleton());
 
       // Perform the per-bundle writes as a ParDo on the input PCollection (with the WriteOperation
       // as a side input) and collect the results of the writes in a PCollection.
       // There is a dependency between this ParDo and the first (the WriteOperation PCollection
       // as a side input), so this will happen after the initial ParDo.
-      PCollection<WR> results = input.apply(ParDo.of(new DoFn<T, WR>() {
+      PCollection<WriteT> results = input.apply(ParDo.of(new DoFn<T, WriteT>() {
         private static final long serialVersionUID = 0;
 
         // Writer that will write the records in this bundle. Lazily
         // initialized in processElement.
-        private Writer<T, WR> writer = null;
+        private Writer<T, WriteT> writer = null;
 
         @Override
         public void processElement(ProcessContext c) throws Exception {
           // Lazily initialize the Writer
           if (writer == null) {
-            WriteOperation<T, WR> writeOperation = c.sideInput(writeOperationView);
+            WriteOperation<T, WriteT> writeOperation = c.sideInput(writeOperationView);
             writer = writeOperation.createWriter(c.getPipelineOptions());
             writer.open(UUID.randomUUID().toString());
           }
@@ -175,14 +176,15 @@ public void processElement(ProcessContext c) throws Exception {
         @Override
         public void finishBundle(Context c) throws Exception {
           if (writer != null) {
-            WR result = writer.close();
+            WriteT result = writer.close();
             // Output the result of the write.
             c.output(result);
           }
         }
       }).withSideInputs(writeOperationView)).setCoder(writeOperation.getWriterResultCoder());
 
-      final PCollectionView<Iterable<WR>> resultsView = results.apply(View.<WR>asIterable());
+      final PCollectionView<Iterable<WriteT>> resultsView =
+          results.apply(View.<WriteT>asIterable());
 
       // Finalize the write in another do-once ParDo on the singleton collection containing the
       // Writer. The results from the per-bundle writes are given as an Iterable side input.
@@ -191,13 +193,13 @@ public void finishBundle(Context c) throws Exception {
       // collection as a side input), so it will happen after the parallel write.
       @SuppressWarnings("unused")
       final PCollection<Integer> done =
-          operationCollection.apply(ParDo.of(new DoFn<WriteOperation<T, WR>, Integer>() {
+          operationCollection.apply(ParDo.of(new DoFn<WriteOperation<T, WriteT>, Integer>() {
             private static final long serialVersionUID = 0;
 
             @Override
             public void processElement(ProcessContext c) throws Exception {
-              Iterable<WR> results = c.sideInput(resultsView);
-              WriteOperation<T, WR> writeOperation = c.element();
+              Iterable<WriteT> results = c.sideInput(resultsView);
+              WriteOperation<T, WriteT> writeOperation = c.element();
               writeOperation.finalize(results, c.getPipelineOptions());
             }
           }).withSideInputs(resultsView));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
index c5b44f57a95b3..1f229128b3e20 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
@@ -52,9 +52,9 @@ public static <K> KeyedPCollectionTuple<K> empty(Pipeline pipeline) {
    * Returns a new {@code KeyedPCollectionTuple<K>} with the given tag and initial
    * PCollection.
    */
-  public static <K, VI> KeyedPCollectionTuple<K> of(
-      TupleTag<VI> tag,
-      PCollection<KV<K, VI>> pc) {
+  public static <K, InputT> KeyedPCollectionTuple<K> of(
+      TupleTag<InputT> tag,
+      PCollection<KV<K, InputT>> pc) {
     return new KeyedPCollectionTuple<K>(pc.getPipeline()).and(tag, pc);
   }
 
@@ -100,8 +100,8 @@ public boolean isEmpty() {
   /**
    * Applies the given {@link PTransform} to this input.
    */
-  public <O extends POutput> O apply(
-      PTransform<KeyedPCollectionTuple<K>, O> transform) {
+  public <OutputT extends POutput> OutputT apply(
+      PTransform<KeyedPCollectionTuple<K>, OutputT> transform) {
     return Pipeline.applyTransform(this, transform);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java
index e70f32dfb9559..b3fcb66e3b9b4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java
@@ -19,9 +19,9 @@
  * data in a pipeline.
  *
  * <p>A {@link com.google.cloud.dataflow.sdk.transforms.PTransform} is an operation that takes an
- * {@code Input} (some subtype of {@link com.google.cloud.dataflow.sdk.values.PInput})
+ * {@code InputT} (some subtype of {@link com.google.cloud.dataflow.sdk.values.PInput})
  * and produces an
- * {@code Output} (some subtype of {@link com.google.cloud.dataflow.sdk.values.POutput}).
+ * {@code OutputT} (some subtype of {@link com.google.cloud.dataflow.sdk.values.POutputT}).
  *
  * <p> Common PTransforms include root PTransforms like
  * {@link com.google.cloud.dataflow.sdk.io.TextIO.Read} and
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
index 0b60db3ec2503..0daee3bb3f3a5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
@@ -27,13 +27,13 @@
 /**
  * Abstract class representing a set of active windows for a key.
  */
-abstract class AbstractWindowSet<K, VI, VO, W extends BoundedWindow> {
+abstract class AbstractWindowSet<K, InputT, OutputT, W extends BoundedWindow> {
 
   /**
    * Factory for creating a window set.
    */
-  public interface Factory<K, VI, VO, W extends BoundedWindow> extends Serializable {
-    public AbstractWindowSet<K, VI, VO, W> create(
+  public interface Factory<K, InputT, OutputT, W extends BoundedWindow> extends Serializable {
+    public AbstractWindowSet<K, InputT, OutputT, W> create(
         K key,
         Coder<W> windowCoder,
         KeyedState keyedState,
@@ -42,14 +42,14 @@ public AbstractWindowSet<K, VI, VO, W> create(
 
   protected final K key;
   protected final Coder<W> windowCoder;
-  protected final Coder<VI> inputCoder;
+  protected final Coder<InputT> inputCoder;
   protected final KeyedState keyedState;
   protected final WindowingInternals<?, ?> windowingInternals;
 
   protected AbstractWindowSet(
       K key,
       Coder<W> windowCoder,
-      Coder<VI> inputCoder,
+      Coder<InputT> inputCoder,
       KeyedState keyedState,
       WindowingInternals<?, ?> windowingInternals) {
     this.key = key;
@@ -69,7 +69,7 @@ protected AbstractWindowSet(
    *
    * <p> Returns null if the window does not exist in the set.
    */
-  protected abstract VO finalValue(W window) throws Exception;
+  protected abstract OutputT finalValue(W window) throws Exception;
 
   /**
    * Adds the given value in the given window to the set.
@@ -78,7 +78,7 @@ protected AbstractWindowSet(
    * If not, adds the window to the set first, then puts the element
    * in the window.
    */
-  protected abstract WindowStatus put(W window, VI value) throws Exception;
+  protected abstract WindowStatus put(W window, InputT value) throws Exception;
 
   /**
    * Removes the given window from the set.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
index e18098fb06571..dcb313279c7c0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
@@ -41,24 +41,24 @@
  * It merges accumulators when windows are added or merged.
  *
  * @param <K> key type
- * @param <VI> value input type
- * @param <VA> accumulator type
- * @param <VO> value output type
+ * @param <InputT> value input type
+ * @param <AccumT> accumulator type
+ * @param <OutputT> value output type
  * @param <W> window type
  */
-public class CombiningWindowSet<K, VI, VA, VO, W extends BoundedWindow>
-    extends AbstractWindowSet<K, VI, VO, W> {
+public class CombiningWindowSet<K, InputT, AccumT, OutputT, W extends BoundedWindow>
+    extends AbstractWindowSet<K, InputT, OutputT, W> {
 
-  public static <K, VI, VA, VO, W extends BoundedWindow>
-  AbstractWindowSet.Factory<K, VI, VO, W> factory(
-      final KeyedCombineFn<K, VI, VA, VO> combineFn,
-      final Coder<K> keyCoder, final Coder<VI> inputCoder) {
-    return new AbstractWindowSet.Factory<K, VI, VO, W>() {
+  public static <K, InputT, AccumT, OutputT, W extends BoundedWindow>
+  AbstractWindowSet.Factory<K, InputT, OutputT, W> factory(
+      final KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn,
+      final Coder<K> keyCoder, final Coder<InputT> inputCoder) {
+    return new AbstractWindowSet.Factory<K, InputT, OutputT, W>() {
 
       private static final long serialVersionUID = 0L;
 
       @Override
-      public AbstractWindowSet<K, VI, VO, W> create(K key,
+      public AbstractWindowSet<K, InputT, OutputT, W> create(K key,
           Coder<W> windowCoder, KeyedState keyedState,
           WindowingInternals<?, ?> windowingInternals) throws Exception {
         return new CombiningWindowSet<>(
@@ -70,17 +70,17 @@ public AbstractWindowSet<K, VI, VO, W> create(K key,
   private final CodedTupleTag<Iterable<W>> windowListTag =
       CodedTupleTag.of("liveWindowsList", IterableCoder.of(windowCoder));
 
-  private final KeyedCombineFn<K, VI, VA, VO> combineFn;
+  private final KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn;
   private final Set<W> liveWindows;
-  private final Coder<VA> accumulatorCoder;
+  private final Coder<AccumT> accumulatorCoder;
   private boolean liveWindowsModified;
 
   protected CombiningWindowSet(
       K key,
       Coder<W> windowCoder,
-      KeyedCombineFn<K, VI, VA, VO> combineFn,
+      KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn,
       Coder<K> keyCoder,
-      Coder<VI> inputValueCoder,
+      Coder<InputT> inputValueCoder,
       KeyedState keyedState,
       WindowingInternals<?, ?> windowingInternals) throws Exception {
     super(key, windowCoder, inputValueCoder, keyedState, windowingInternals);
@@ -101,8 +101,8 @@ protected Collection<W> windows() {
   }
 
   @Override
-  protected VO finalValue(W window) throws Exception {
-    VA accumulator = lookupAccumulator(window);
+  protected OutputT finalValue(W window) throws Exception {
+    AccumT accumulator = lookupAccumulator(window);
     if (accumulator == null) {
       return null;
     }
@@ -111,8 +111,8 @@ protected VO finalValue(W window) throws Exception {
   }
 
   @Override
-  protected WindowStatus put(W window, VI value) throws Exception {
-    VA accumulator = lookupAccumulator(window);
+  protected WindowStatus put(W window, InputT value) throws Exception {
+    AccumT accumulator = lookupAccumulator(window);
     if (accumulator == null) {
       storeAccumulator(window, combineFn.addInput(key, combineFn.createAccumulator(key), value));
       return WindowStatus.NEW;
@@ -132,27 +132,27 @@ protected void remove(W window) throws Exception {
 
   @Override
   protected void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
-    List<VA> accumulators = Lists.newArrayList();
+    List<AccumT> accumulators = Lists.newArrayList();
     for (W window : toBeMerged) {
-      VA accumulator = Preconditions.checkNotNull(lookupAccumulator(window));
+      AccumT accumulator = Preconditions.checkNotNull(lookupAccumulator(window));
       accumulators.add(accumulator);
       remove(window);
     }
-    VA mergedAccumulator = combineFn.mergeAccumulators(key, accumulators);
+    AccumT mergedAccumulator = combineFn.mergeAccumulators(key, accumulators);
     storeAccumulator(mergeResult, mergedAccumulator);
   }
 
-  private CodedTupleTag<VA> accumulatorTag(W window) throws Exception {
+  private CodedTupleTag<AccumT> accumulatorTag(W window) throws Exception {
     // TODO: Cache this.
     return bufferTag(window, windowCoder, accumulatorCoder);
   }
 
-  private void storeAccumulator(W window, VA accumulator) throws Exception {
+  private void storeAccumulator(W window, AccumT accumulator) throws Exception {
     keyedState.store(accumulatorTag(window), accumulator);
     liveWindowsModified = liveWindows.add(window);
   }
 
-  private VA lookupAccumulator(W window) throws Exception {
+  private AccumT lookupAccumulator(W window) throws Exception {
     return keyedState.lookup(accumulatorTag(window));
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CounterAggregator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CounterAggregator.java
index 21718840499d8..82eb0335513b0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CounterAggregator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CounterAggregator.java
@@ -30,14 +30,14 @@
  * {@link Counter} as the underlying representation. Supports {@link CombineFn}s
  * from the {@link Sum}, {@link Min} and {@link Max} classes.
  *
- * @param <VI> the type of input values
- * @param <VA> the type of accumulator values
- * @param <VO> the type of output value
+ * @param <InputT> the type of input values
+ * @param <AccumT> the type of accumulator values
+ * @param <OutputT> the type of output value
  */
-public class CounterAggregator<VI, VA, VO> implements Aggregator<VI, VO> {
+public class CounterAggregator<InputT, AccumT, OutputT> implements Aggregator<InputT, OutputT> {
 
-  private final Counter<VI> counter;
-  private final CombineFn<VI, VA, VO> combiner;
+  private final Counter<InputT> counter;
+  private final CombineFn<InputT, AccumT, OutputT> combiner;
 
   /**
    * Constructs a new aggregator with the given name and aggregation logic
@@ -47,16 +47,16 @@ public class CounterAggregator<VI, VA, VO> implements Aggregator<VI, VO> {
    *  <p> If a counter with the same name already exists, it will be reused, as
    * long as it has the same type.
    */
-  public CounterAggregator(String name, CombineFn<? super VI, VA, VO> combiner,
+  public CounterAggregator(String name, CombineFn<? super InputT, AccumT, OutputT> combiner,
       CounterSet.AddCounterMutator addCounterMutator) {
     // Safe contravariant cast
     this(constructCounter(name, combiner), addCounterMutator,
-        (CombineFn<VI, VA, VO>) combiner);
+        (CombineFn<InputT, AccumT, OutputT>) combiner);
   }
 
-  private CounterAggregator(Counter<VI> counter,
+  private CounterAggregator(Counter<InputT> counter,
       CounterSet.AddCounterMutator addCounterMutator,
-      CombineFn<VI, VA, VO> combiner) {
+      CombineFn<InputT, AccumT, OutputT> combiner) {
     try {
       this.counter = addCounterMutator.addCounter(counter);
     } catch (IllegalArgumentException ex) {
@@ -80,7 +80,7 @@ private static <T> Counter<T> constructCounter(String name,
   }
 
   @Override
-  public void addValue(VI value) {
+  public void addValue(InputT value) {
     counter.addValue(value);
   }
 
@@ -90,7 +90,7 @@ public String getName() {
   }
 
   @Override
-  public CombineFn<VI, ?, VO> getCombineFn() {
+  public CombineFn<InputT, ?, OutputT> getCombineFn() {
     return combiner;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
index d290f28d56cc6..dab58a68c9dc6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
@@ -25,32 +25,32 @@
 /**
  * Wrapper class holding the necessary information to serialize a DoFn.
  *
- * @param <I> the type of the (main) input elements of the DoFn
- * @param <O> the type of the (main) output elements of the DoFn
+ * @param <InputT> the type of the (main) input elements of the DoFn
+ * @param <OutputT> the type of the (main) output elements of the DoFn
  */
-public class DoFnInfo<I, O> implements Serializable {
+public class DoFnInfo<InputT, OutputT> implements Serializable {
   private static final long serialVersionUID = 0;
-  private final DoFn<I, O> doFn;
+  private final DoFn<InputT, OutputT> doFn;
   private final WindowingStrategy<?, ?> windowingStrategy;
   private final Iterable<PCollectionView<?>> sideInputViews;
-  private final Coder<I> inputCoder;
+  private final Coder<InputT> inputCoder;
 
-  public DoFnInfo(DoFn<I, O> doFn, WindowingStrategy<?, ?> windowingStrategy) {
+  public DoFnInfo(DoFn<InputT, OutputT> doFn, WindowingStrategy<?, ?> windowingStrategy) {
     this.doFn = doFn;
     this.windowingStrategy = windowingStrategy;
     this.sideInputViews = null;
     this.inputCoder = null;
   }
 
-  public DoFnInfo(DoFn<I, O> doFn, WindowingStrategy<?, ?> windowingStrategy,
-                  Iterable<PCollectionView<?>> sideInputViews, Coder<I> inputCoder) {
+  public DoFnInfo(DoFn<InputT, OutputT> doFn, WindowingStrategy<?, ?> windowingStrategy,
+                  Iterable<PCollectionView<?>> sideInputViews, Coder<InputT> inputCoder) {
     this.doFn = doFn;
     this.windowingStrategy = windowingStrategy;
     this.sideInputViews = sideInputViews;
     this.inputCoder = inputCoder;
   }
 
-  public DoFn<I, O> getDoFn() {
+  public DoFn<InputT, OutputT> getDoFn() {
     return doFn;
   }
 
@@ -62,7 +62,7 @@ public Iterable<PCollectionView<?>> getSideInputViews() {
     return sideInputViews;
   }
 
-  public Coder<I> getInputCoder() {
+  public Coder<InputT> getInputCoder() {
     return inputCoder;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 035509c625f9e..6946873611626 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -53,35 +53,35 @@
 /**
  * Runs a DoFn by constructing the appropriate contexts and passing them in.
  *
- * @param <I> the type of the DoFn's (main) input elements
- * @param <O> the type of the DoFn's (main) output elements
- * @param <R> the type of object that receives outputs
+ * @param <InputT> the type of the DoFn's (main) input elements
+ * @param <OutputT> the type of the DoFn's (main) output elements
+ * @param <ReceiverT> the type of object that receives outputs
  */
-public class DoFnRunner<I, O, R> {
+public class DoFnRunner<InputT, OutputT, ReceiverT> {
 
   /** Information about how to create output receivers and output to them. */
-  public interface OutputManager<R> {
+  public interface OutputManager<ReceiverT> {
 
     /** Returns the receiver to use for a given tag. */
-    public R initialize(TupleTag<?> tag);
+    public ReceiverT initialize(TupleTag<?> tag);
 
     /** Outputs a single element to the provided receiver. */
-    public void output(R receiver, WindowedValue<?> output);
+    public void output(ReceiverT receiver, WindowedValue<?> output);
 
   }
 
   /** The DoFn being run. */
-  public final DoFn<I, O> fn;
+  public final DoFn<InputT, OutputT> fn;
 
   /** The context used for running the DoFn. */
-  public final DoFnContext<I, O, R> context;
+  public final DoFnContext<InputT, OutputT, ReceiverT> context;
 
   DoFnRunner(
       PipelineOptions options,
-      DoFn<I, O> fn,
+      DoFn<InputT, OutputT> fn,
       PTuple sideInputs,
-      OutputManager<R> outputManager,
-      TupleTag<O> mainOutputTag,
+      OutputManager<ReceiverT> outputManager,
+      TupleTag<OutputT> mainOutputTag,
       List<TupleTag<?>> sideOutputTags,
       StepContext stepContext,
       CounterSet.AddCounterMutator addCounterMutator,
@@ -92,12 +92,12 @@ public interface OutputManager<R> {
         addCounterMutator, windowingStrategy == null ? null : windowingStrategy.getWindowFn());
   }
 
-  public static <I, O, R> DoFnRunner<I, O, R> create(
+  public static <InputT, OutputT, ReceiverT> DoFnRunner<InputT, OutputT, ReceiverT> create(
       PipelineOptions options,
-      DoFn<I, O> fn,
+      DoFn<InputT, OutputT> fn,
       PTuple sideInputs,
-      OutputManager<R> outputManager,
-      TupleTag<O> mainOutputTag,
+      OutputManager<ReceiverT> outputManager,
+      TupleTag<OutputT> mainOutputTag,
       List<TupleTag<?>> sideOutputTags,
       StepContext stepContext,
       CounterSet.AddCounterMutator addCounterMutator,
@@ -108,11 +108,11 @@ public static <I, O, R> DoFnRunner<I, O, R> create(
   }
 
   @SuppressWarnings({"rawtypes", "unchecked"})
-  public static <I, O> DoFnRunner<I, O, List> createWithListOutputs(
+  public static <InputT, OutputT> DoFnRunner<InputT, OutputT, List> createWithListOutputs(
       PipelineOptions options,
-      DoFn<I, O> fn,
+      DoFn<InputT, OutputT> fn,
       PTuple sideInputs,
-      TupleTag<O> mainOutputTag,
+      TupleTag<OutputT> mainOutputTag,
       List<TupleTag<?>> sideOutputTags,
       StepContext stepContext,
       CounterSet.AddCounterMutator addCounterMutator,
@@ -148,7 +148,7 @@ public void startBundle() {
    * Calls {@link DoFn#processElement} with a ProcessContext containing
    * the current element.
    */
-  public void processElement(WindowedValue<I> elem) {
+  public void processElement(WindowedValue<InputT> elem) {
     if (elem.getWindows().size() <= 1
         || (!RequiresWindowAccess.class.isAssignableFrom(fn.getClass())
             && context.sideInputs.getAll().isEmpty())) {
@@ -163,8 +163,8 @@ public void processElement(WindowedValue<I> elem) {
     }
   }
 
-  protected void invokeProcessElement(WindowedValue<I> elem) {
-    DoFn<I, O>.ProcessContext processContext = createProcessContext(elem);
+  protected void invokeProcessElement(WindowedValue<InputT> elem) {
+    DoFn<InputT, OutputT>.ProcessContext processContext = createProcessContext(elem);
     // This can contain user code. Wrap it in case it throws an exception.
     try {
       fn.processElement(processContext);
@@ -188,37 +188,38 @@ public void finishBundle() {
   }
 
   /** Returns the receiver who gets outputs with the provided tag. */
-  public R getReceiver(TupleTag<?> tag) {
+  public ReceiverT getReceiver(TupleTag<?> tag) {
     return context.getReceiver(tag);
   }
 
   /**
-   * A concrete implementation of {@link DoFn<I, O>.Context} used for running
+   * A concrete implementation of {@link DoFn<InputT, OutputT>.Context} used for running
    * a {@link DoFn}.
    *
-   * @param <I> the type of the DoFn's (main) input elements
-   * @param <O> the type of the DoFn's (main) output elements
+   * @param <InputT> the type of the DoFn's (main) input elements
+   * @param <OutputT> the type of the DoFn's (main) output elements
    * @param <R> the type of object that receives outputs
    */
-  private static class DoFnContext<I, O, R> extends DoFn<I, O>.Context {
+  private static class DoFnContext<InputT, OutputT, ReceiverT>
+      extends DoFn<InputT, OutputT>.Context {
     private static final int MAX_SIDE_OUTPUTS = 1000;
 
     final PipelineOptions options;
-    final DoFn<I, O> fn;
+    final DoFn<InputT, OutputT> fn;
     final PTuple sideInputs;
     final Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
-    final OutputManager<R> outputManager;
-    final Map<TupleTag<?>, R> outputMap;
-    final TupleTag<O> mainOutputTag;
+    final OutputManager<ReceiverT> outputManager;
+    final Map<TupleTag<?>, ReceiverT> outputMap;
+    final TupleTag<OutputT> mainOutputTag;
     final StepContext stepContext;
     final CounterSet.AddCounterMutator addCounterMutator;
     final WindowFn windowFn;
 
     public DoFnContext(PipelineOptions options,
-                       DoFn<I, O> fn,
+                       DoFn<InputT, OutputT> fn,
                        PTuple sideInputs,
-                       OutputManager<R> outputManager,
-                       TupleTag<O> mainOutputTag,
+                       OutputManager<ReceiverT> outputManager,
+                       TupleTag<OutputT> mainOutputTag,
                        List<TupleTag<?>> sideOutputTags,
                        StepContext stepContext,
                        CounterSet.AddCounterMutator addCounterMutator,
@@ -241,8 +242,8 @@ public DoFnContext(PipelineOptions options,
       super.setupDelegateAggregators();
     }
 
-    public R getReceiver(TupleTag<?> tag) {
-      R receiver = outputMap.get(tag);
+    public ReceiverT getReceiver(TupleTag<?> tag) {
+      ReceiverT receiver = outputMap.get(tag);
       if (receiver == null) {
         throw new IllegalArgumentException(
             "calling getReceiver() with unknown tag " + tag);
@@ -303,10 +304,10 @@ public Collection<? extends BoundedWindow> windows() {
     }
 
     void outputWindowedValue(
-        O output,
+        OutputT output,
         Instant timestamp,
         Collection<? extends BoundedWindow> windows) {
-      WindowedValue<O> windowedElem = makeWindowedValue(output, timestamp, windows);
+      WindowedValue<OutputT> windowedElem = makeWindowedValue(output, timestamp, windows);
       outputManager.output(outputMap.get(mainOutputTag), windowedElem);
       if (stepContext != null) {
         stepContext.noteOutput(windowedElem);
@@ -317,7 +318,7 @@ protected <T> void sideOutputWindowedValue(TupleTag<T> tag,
                                                T output,
                                                Instant timestamp,
                                                Collection<? extends BoundedWindow> windows) {
-      R receiver = outputMap.get(tag);
+      ReceiverT receiver = outputMap.get(tag);
       if (receiver == null) {
         // This tag wasn't declared nor was it seen before during this execution.
         // Thus, this must be a new, undeclared and unconsumed output.
@@ -347,12 +348,12 @@ protected <T> void sideOutputWindowedValue(TupleTag<T> tag,
     // are only accessible in DoFn.startBundle and DoFn.finishBundle, and will be shadowed by
     // ProcessContext's versions in DoFn.processElement.
     @Override
-    public void output(O output) {
+    public void output(OutputT output) {
       outputWindowedValue(output, null, null);
     }
 
     @Override
-    public void outputWithTimestamp(O output, Instant timestamp) {
+    public void outputWithTimestamp(OutputT output, Instant timestamp) {
       outputWindowedValue(output, timestamp, null);
     }
 
@@ -373,8 +374,8 @@ private String generateInternalAggregatorName(String userName) {
     }
 
     @Override
-    protected <VI, VO> Aggregator<VI, VO> createAggregatorInternal(String name,
-        CombineFn<VI, ?, VO> combiner) {
+    protected <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT> createAggregatorInternal(
+        String name, CombineFn<AggInputT, ?, AggOutputT> combiner) {
       Preconditions.checkNotNull(combiner,
           "Combiner passed to createAggregator cannot be null");
       return new CounterAggregator<>(generateInternalAggregatorName(name),
@@ -386,27 +387,28 @@ protected <VI, VO> Aggregator<VI, VO> createAggregatorInternal(String name,
   /**
    * Returns a new {@link DoFn.ProcessContext} for the given element.
    */
-  protected DoFn<I, O>.ProcessContext createProcessContext(WindowedValue<I> elem) {
-    return new DoFnProcessContext<I, O>(fn, context, elem);
+  protected DoFn<InputT, OutputT>.ProcessContext createProcessContext(WindowedValue<InputT> elem) {
+    return new DoFnProcessContext<InputT, OutputT>(fn, context, elem);
   }
 
   /**
-   * A concrete implementation of {@link DoFn<I, O>.ProcessContext} used for running
+   * A concrete implementation of {@link DoFn<InputT, OutputT>.ProcessContext} used for running
    * a {@link DoFn} over a single element.
    *
-   * @param <I> the type of the DoFn's (main) input elements
-   * @param <O> the type of the DoFn's (main) output elements
+   * @param <InputT> the type of the DoFn's (main) input elements
+   * @param <OutputT> the type of the DoFn's (main) output elements
    */
-  private static class DoFnProcessContext<I, O> extends DoFn<I, O>.ProcessContext {
+  private static class DoFnProcessContext<InputT, OutputT>
+      extends DoFn<InputT, OutputT>.ProcessContext {
 
 
-    final DoFn<I, O> fn;
-    final DoFnContext<I, O, ?> context;
-    final WindowedValue<I> windowedValue;
+    final DoFn<InputT, OutputT> fn;
+    final DoFnContext<InputT, OutputT, ?> context;
+    final WindowedValue<InputT> windowedValue;
 
-    public DoFnProcessContext(DoFn<I, O> fn,
-                              DoFnContext<I, O, ?> context,
-                              WindowedValue<I> windowedValue) {
+    public DoFnProcessContext(DoFn<InputT, OutputT> fn,
+                              DoFnContext<InputT, OutputT, ?> context,
+                              WindowedValue<InputT> windowedValue) {
       fn.super();
       this.fn = fn;
       this.context = context;
@@ -419,7 +421,7 @@ public PipelineOptions getPipelineOptions() {
     }
 
     @Override
-    public I element() {
+    public InputT element() {
       return windowedValue.getValue();
     }
 
@@ -469,18 +471,18 @@ public BoundedWindow window() {
     }
 
     @Override
-    public void output(O output) {
+    public void output(OutputT output) {
       context.outputWindowedValue(output, windowedValue.getTimestamp(), windowedValue.getWindows());
     }
 
     @Override
-    public void outputWithTimestamp(O output, Instant timestamp) {
+    public void outputWithTimestamp(OutputT output, Instant timestamp) {
       checkTimestamp(timestamp);
       context.outputWindowedValue(output, timestamp, windowedValue.getWindows());
     }
 
     void outputWindowedValue(
-        O output,
+        OutputT output,
         Instant timestamp,
         Collection<? extends BoundedWindow> windows) {
       context.outputWindowedValue(output, timestamp, windows);
@@ -517,7 +519,7 @@ private void checkTimestamp(Instant timestamp) {
           "Timestamp %s exceeds allowed maximum skew.", timestamp);
     }
 
-    private boolean equivalentToKV(I input) {
+    private boolean equivalentToKV(InputT input) {
       if (input == null) {
         return true;
       } else if (input instanceof KV) {
@@ -530,10 +532,10 @@ private boolean equivalentToKV(I input) {
     }
 
     @Override
-    public WindowingInternals<I, O> windowingInternals() {
-      return new WindowingInternals<I, O>() {
+    public WindowingInternals<InputT, OutputT> windowingInternals() {
+      return new WindowingInternals<InputT, OutputT>() {
         @Override
-        public void outputWindowedValue(O output, Instant timestamp,
+        public void outputWindowedValue(OutputT output, Instant timestamp,
             Collection<? extends BoundedWindow> windows) {
           context.outputWindowedValue(output, timestamp, windows);
         }
@@ -596,8 +598,8 @@ public <T> void store(CodedTupleTag<T> tag, T value, Instant timestamp) throws I
     }
 
     @Override
-    protected <VI, VO> Aggregator<VI, VO> createAggregatorInternal(String name,
-        CombineFn<VI, ?, VO> combiner) {
+    protected <InputT, OutputT> Aggregator<InputT, OutputT> createAggregatorInternal(String name,
+        CombineFn<InputT, ?, OutputT> combiner) {
       return context.createAggregatorInternal(name, combiner);
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index aa9b3dc043e1d..e8aa3f4b7ec10 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -35,13 +35,13 @@
  * combining values.
  *
  * @param <K> key type
- * @param <VI> input value element type
- * @param <VO> output value element type
+ * @param <InputT> input value element type
+ * @param <OutputT> output value element type
  * @param <W> window type
  */
 @SuppressWarnings("serial")
-public abstract class GroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
-    extends DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>
+public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends BoundedWindow>
+    extends DoFn<KV<K, Iterable<WindowedValue<InputT>>>, KV<K, OutputT>>
     implements RequiresKeyedState {
 
   /**
@@ -65,27 +65,28 @@ public abstract class GroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
   /**
    * Construct a {@link GroupAlsoByWindowsDoFn} using the {@code combineFn} if available.
    */
-  public static <K, VI, VA, VO, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, VI, VO, W>
+  public static <K, InputT, AccumT, OutputT, W extends BoundedWindow>
+      GroupAlsoByWindowsDoFn<K, InputT, OutputT, W>
   create(
       final WindowingStrategy<?, W> windowingStrategy,
-      final KeyedCombineFn<K, VI, VA, VO> combineFn,
+      final KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn,
       final Coder<K> keyCoder,
-      final Coder<VI> inputCoder) {
+      final Coder<InputT> inputCoder) {
     Preconditions.checkNotNull(combineFn);
     return new GABWViaWindowSetDoFn<>(
-        windowingStrategy, CombiningWindowSet.<K, VI, VA, VO, W>factory(
+        windowingStrategy, CombiningWindowSet.<K, InputT, AccumT, OutputT, W>factory(
             combineFn, keyCoder, inputCoder));
   }
 
-  private static class GABWViaWindowSetDoFn<K, VI, VO, W extends BoundedWindow>
-     extends GroupAlsoByWindowsDoFn<K, VI, VO, W> {
+  private static class GABWViaWindowSetDoFn<K, InputT, OutputT, W extends BoundedWindow>
+     extends GroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
 
     private WindowFn<Object, W> windowFn;
-    private AbstractWindowSet.Factory<K, VI, VO, W> windowSetFactory;
+    private AbstractWindowSet.Factory<K, InputT, OutputT, W> windowSetFactory;
     private Trigger<W> trigger;
 
     public GABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
-        AbstractWindowSet.Factory<K, VI, VO, W> factory) {
+        AbstractWindowSet.Factory<K, InputT, OutputT, W> factory) {
       @SuppressWarnings("unchecked")
       WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
       this.windowFn = noWildcard.getWindowFn();
@@ -95,17 +96,19 @@ public GABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
 
     @Override
     public void processElement(
-        DoFn<KV<K, Iterable<WindowedValue<VI>>>, KV<K, VO>>.ProcessContext c) throws Exception {
+        DoFn<KV<K, Iterable<WindowedValue<InputT>>>,
+        KV<K, OutputT>>.ProcessContext c)
+        throws Exception {
       K key = c.element().getKey();
-      AbstractWindowSet<K, VI, VO, W> windowSet = windowSetFactory.create(
+      AbstractWindowSet<K, InputT, OutputT, W> windowSet = windowSetFactory.create(
           key, windowFn.windowCoder(), c.keyedState(), c.windowingInternals());
 
       BatchTimerManager timerManager = new BatchTimerManager(Instant.now());
-      TriggerExecutor<K, VI, VO, W> triggerExecutor = new TriggerExecutor<>(
+      TriggerExecutor<K, InputT, OutputT, W> triggerExecutor = new TriggerExecutor<>(
           windowFn, timerManager, trigger,
           c.keyedState(), c.windowingInternals(), windowSet);
 
-      for (WindowedValue<VI> e : c.element().getValue()) {
+      for (WindowedValue<InputT> e : c.element().getValue()) {
         // First, handle anything that needs to happen for this element
         triggerExecutor.onElement(e);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
index f065c0605f792..7ab451ce8e8d4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
@@ -124,8 +124,10 @@ public InstanceBuilder<T> fromFactoryMethod(String methodName) {
    * a supertype of the argument value's class.
    * <p>
    * Modifies and returns the {@code InstanceBuilder} for chaining.
+   *
+   * @param <ArgT> the argument type
    */
-  public <A> InstanceBuilder<T> withArg(Class<? super A> argType, A value) {
+  public <ArgT> InstanceBuilder<T> withArg(Class<? super ArgT> argType, ArgT value) {
     parameterTypes.add(argType);
     arguments.add(value);
     return this;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 7dbd9168b342f..14ce168d1c6ab 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -35,49 +35,50 @@
  * DoFn that merges windows and groups elements in those windows.
  *
  * @param <K> key type
- * @param <VI> input value element type
- * @param <VO> output value element type
+ * @param <InputT> input value element type
+ * @param <OutputT> output value element type
  * @param <W> window type
  */
 @SuppressWarnings("serial")
-public abstract class StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W extends BoundedWindow>
-    extends DoFn<TimerOrElement<KV<K, VI>>, KV<K, VO>> implements DoFn.RequiresKeyedState {
+public abstract class StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends BoundedWindow>
+    extends DoFn<TimerOrElement<KV<K, InputT>>, KV<K, OutputT>> implements DoFn.RequiresKeyedState {
 
-  public static <K, VI, VA, VO, W extends BoundedWindow>
-      StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W> create(
+  public static <K, InputT, AccumT, OutputT, W extends BoundedWindow>
+      StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> create(
           final WindowingStrategy<?, W> windowingStrategy,
-          final KeyedCombineFn<K, VI, VA, VO> combineFn,
+          final KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn,
           final Coder<K> keyCoder,
-          final Coder<VI> inputValueCoder) {
+          final Coder<InputT> inputValueCoder) {
     Preconditions.checkNotNull(combineFn);
     return new StreamingGABWViaWindowSetDoFn<>(windowingStrategy,
-        CombiningWindowSet.<K, VI, VA, VO, W>factory(combineFn, keyCoder, inputValueCoder));
+        CombiningWindowSet.<K, InputT, AccumT, OutputT, W>factory(
+            combineFn, keyCoder, inputValueCoder));
   }
 
-  public static <K, VI, W extends BoundedWindow>
-  StreamingGroupAlsoByWindowsDoFn<K, VI, Iterable<VI>, W> createForIterable(
+  public static <K, InputT, W extends BoundedWindow>
+  StreamingGroupAlsoByWindowsDoFn<K, InputT, Iterable<InputT>, W> createForIterable(
       final WindowingStrategy<?, W> windowingStrategy,
-      final Coder<VI> inputValueCoder) {
+      final Coder<InputT> inputValueCoder) {
     if (windowingStrategy.getWindowFn() instanceof PartitioningWindowFn
         // TODO: Characterize the other kinds of triggers that work with the
         // PartitioningBufferingWindowSet
         && windowingStrategy.getTrigger() instanceof DefaultTrigger) {
       return new StreamingGABWViaWindowSetDoFn<>(windowingStrategy,
-          PartitionBufferingWindowSet.<K, VI, W>factory(inputValueCoder));
+          PartitionBufferingWindowSet.<K, InputT, W>factory(inputValueCoder));
     } else {
       return new StreamingGABWViaWindowSetDoFn<>(windowingStrategy,
-          BufferingWindowSet.<K, VI, W>factory(inputValueCoder));
+          BufferingWindowSet.<K, InputT, W>factory(inputValueCoder));
     }
   }
 
-  private static class StreamingGABWViaWindowSetDoFn<K, VI, VO, W extends BoundedWindow>
-  extends StreamingGroupAlsoByWindowsDoFn<K, VI, VO, W> {
+  private static class StreamingGABWViaWindowSetDoFn<K, InputT, OutputT, W extends BoundedWindow>
+  extends StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
     private final WindowFn<Object, W> windowFn;
-    private Factory<K, VI, VO, W> windowSetFactory;
+    private Factory<K, InputT, OutputT, W> windowSetFactory;
     private Trigger<W> trigger;
 
     public StreamingGABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
-        AbstractWindowSet.Factory<K, VI, VO, W> windowSetFactory) {
+        AbstractWindowSet.Factory<K, InputT, OutputT, W> windowSetFactory) {
       this.windowSetFactory = windowSetFactory;
       @SuppressWarnings("unchecked")
       WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
@@ -88,12 +89,12 @@ public StreamingGABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
     @Override
     public void processElement(ProcessContext context) throws Exception {
       if (!context.element().isTimer()) {
-        KV<K, VI> element = context.element().element();
+        KV<K, InputT> element = context.element().element();
         K key = element.getKey();
-        VI value = element.getValue();
-        AbstractWindowSet<K, VI, VO, W> windowSet = windowSetFactory.create(
+        InputT value = element.getValue();
+        AbstractWindowSet<K, InputT, OutputT, W> windowSet = windowSetFactory.create(
                 key, windowFn.windowCoder(), context.keyedState(), context.windowingInternals());
-        TriggerExecutor<K, VI, VO, W> executor = new TriggerExecutor<>(
+        TriggerExecutor<K, InputT, OutputT, W> executor = new TriggerExecutor<>(
             windowFn,
             new StreamingTimerManager(context),
             trigger,
@@ -105,12 +106,12 @@ public void processElement(ProcessContext context) throws Exception {
             value, context.timestamp(), context.windowingInternals().windows()));
         windowSet.persist();
       } else {
-        TimerOrElement<KV<K, VI>> timer = context.element();
+        TimerOrElement<KV<K, InputT>> timer = context.element();
         @SuppressWarnings("unchecked")
         K key = (K) timer.key();
-        AbstractWindowSet<K, VI, VO, W> windowSet = windowSetFactory.create(
+        AbstractWindowSet<K, InputT, OutputT, W> windowSet = windowSetFactory.create(
             key, windowFn.windowCoder(), context.keyedState(), context.windowingInternals());
-        TriggerExecutor<K, VI, VO, W> executor = new TriggerExecutor<>(
+        TriggerExecutor<K, InputT, OutputT, W> executor = new TriggerExecutor<>(
             windowFn,
             new StreamingTimerManager(context),
             trigger,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
index 883dc1a632310..e72b3d5cc4ee3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
@@ -42,27 +42,27 @@
 /**
  * Runs a DoFn by constructing the appropriate contexts and passing them in.
  *
- * @param <I> the type of the DoFn's (main) input elements
- * @param <O> the type of the DoFn's (main) output elements
- * @param <R> the type of object that receives outputs
+ * @param <InputT> the type of the DoFn's (main) input elements
+ * @param <OutputT> the type of the DoFn's (main) output elements
+ * @param <ReceiverT> the type of object that receives outputs
  * @param <W> the type of the windows of the main input
  */
-public class StreamingSideInputDoFnRunner<I, O, R, W extends BoundedWindow>
-    extends DoFnRunner<I, O, R> {
+public class StreamingSideInputDoFnRunner<InputT, OutputT, ReceiverT, W extends BoundedWindow>
+    extends DoFnRunner<InputT, OutputT, ReceiverT> {
   private StepContext stepContext;
   private StreamingModeExecutionContext execContext;
   private WindowingStrategy<?, W> windowingStrategy;
   private Map<String, PCollectionView<?>> sideInputViews;
   private CodedTupleTag<Map<W, Set<Windmill.GlobalDataRequest>>> blockedMapTag;
   private Map<W, Set<Windmill.GlobalDataRequest>> blockedMap;
-  private Coder<I> elemCoder;
+  private Coder<InputT> elemCoder;
 
   public StreamingSideInputDoFnRunner(
       PipelineOptions options,
-      DoFnInfo<I, O> doFnInfo,
+      DoFnInfo<InputT, OutputT> doFnInfo,
       PTuple sideInputs,
-      OutputManager<R> outputManager,
-      TupleTag<O> mainOutputTag,
+      OutputManager<ReceiverT> outputManager,
+      TupleTag<OutputT> mainOutputTag,
       List<TupleTag<?>> sideOutputTags,
       StepContext stepContext,
       CounterSet.AddCounterMutator addCounterMutator) throws Exception {
@@ -92,7 +92,7 @@ public StreamingSideInputDoFnRunner(
   public void startBundle() {
     super.startBundle();
 
-    Map<W, CodedTupleTag<WindowedValue<I>>> readyWindowTags = new HashMap<>();
+    Map<W, CodedTupleTag<WindowedValue<InputT>>> readyWindowTags = new HashMap<>();
 
     for (Windmill.GlobalDataId id : execContext.getSideInputNotifications()) {
       PCollectionView<?> view = sideInputViews.get(id.getTag());
@@ -119,19 +119,19 @@ public void startBundle() {
       }
     }
 
-    Map<CodedTupleTag<WindowedValue<I>>, Iterable<WindowedValue<I>>> elementsPerWindow;
+    Map<CodedTupleTag<WindowedValue<InputT>>, Iterable<WindowedValue<InputT>>> elementsPerWindow;
     try {
       elementsPerWindow = stepContext.readTagLists(readyWindowTags.values());
     } catch (IOException e) {
       throw Throwables.propagate(e);
     }
 
-    for (Map.Entry<W, CodedTupleTag<WindowedValue<I>>> entry : readyWindowTags.entrySet()) {
+    for (Map.Entry<W, CodedTupleTag<WindowedValue<InputT>>> entry : readyWindowTags.entrySet()) {
       blockedMap.remove(entry.getKey());
 
-      Iterable<WindowedValue<I>> elements = elementsPerWindow.get(entry.getValue());
+      Iterable<WindowedValue<InputT>> elements = elementsPerWindow.get(entry.getValue());
       try {
-        for (WindowedValue<I> elem : elements) {
+        for (WindowedValue<InputT> elem : elements) {
           fn.processElement(createProcessContext(elem));
         }
       } catch (Throwable t) {
@@ -145,7 +145,7 @@ public void startBundle() {
   }
 
   @Override
-  public void invokeProcessElement(WindowedValue<I> elem) {
+  public void invokeProcessElement(WindowedValue<InputT> elem) {
     // This can contain user code. Wrap it in case it throws an exception.
     try {
       W window = (W) elem.getWindows().iterator().next();
@@ -207,8 +207,8 @@ public void finishBundle() {
     }
   }
 
-  private CodedTupleTag<WindowedValue<I>> getElemListTag(W window) throws IOException {
-    return CodedTupleTag.<WindowedValue<I>>of(
+  private CodedTupleTag<WindowedValue<InputT>> getElemListTag(W window) throws IOException {
+    return CodedTupleTag.<WindowedValue<InputT>>of(
         "e:" + CoderUtils.encodeToBase64(windowingStrategy.getWindowFn().windowCoder(), window),
         WindowedValue.getFullCoder(elemCoder, windowingStrategy.getWindowFn().windowCoder()));
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
index 600bbe555b714..e2364a75cb481 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
@@ -34,26 +34,26 @@
  * Class representing either a timer, or arbitrary element.
  * Used as the input type of {@link StreamingGroupAlsoByWindowsDoFn}.
  *
- * @param <E> the element type
+ * @param <ElemT> the element type
  */
-public class TimerOrElement<E> {
+public class TimerOrElement<ElemT> {
 
   /**
-   * Creates a new {@code TimerOrElement<E>} representing a timer.
+   * Creates a new {@code TimerOrElement<ElemT>} representing a timer.
    *
-   * @param <E> the element type
+   * @param <ElemT> the element type
    */
-  public static <E> TimerOrElement<E> timer(
+  public static <ElemT> TimerOrElement<ElemT> timer(
       String tag, Instant timestamp, Object key) {
     return new TimerOrElement<>(tag, timestamp, key);
   }
 
   /**
-   * Creates a new {@code TimerOrElement<E>} representing an element.
+   * Creates a new {@code TimerOrElement<ElemT>} representing an element.
    *
-   * @param <E> the element type
+   * @param <ElemT> the element type
    */
-  public static <E> TimerOrElement<E> element(E element) {
+  public static <ElemT> TimerOrElement<ElemT> element(ElemT element) {
     return new TimerOrElement<>(element);
   }
 
@@ -97,7 +97,7 @@ public Object key() {
   /**
    * If this is an element, returns it, otherwise throws an exception.
    */
-  public E element() {
+  public ElemT element() {
     if (isTimer) {
       throw new IllegalStateException("element() called, but this is a timer");
     }
@@ -181,7 +181,7 @@ private TimerOrElementCoder(Coder<T> elemCoder) {
   private String tag;
   private Instant timestamp;
   private Object key;
-  private E element;
+  private ElemT element;
 
   TimerOrElement(String tag, Instant timestamp, Object key) {
     this.isTimer = true;
@@ -190,7 +190,7 @@ private TimerOrElementCoder(Coder<T> elemCoder) {
     this.key = key;
   }
 
-  TimerOrElement(E element) {
+  TimerOrElement(ElemT element) {
     this.isTimer = false;
     this.element = element;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 2728d19db0e44..2181d2c1a7ad3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -61,11 +61,11 @@
  * Manages the execution of a trigger.
  *
  * @param <K>
- * @param <VI>
- * @param <VO>
+ * @param <InputT>
+ * @param <OutputT>
  * @param <W> The type of windows this operates on.
  */
-public class TriggerExecutor<K, VI, VO, W extends BoundedWindow> {
+public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
 
 
   /**
@@ -80,8 +80,8 @@ public class TriggerExecutor<K, VI, VO, W extends BoundedWindow> {
       CodedTupleTag.of("finished-root", VarIntCoder.of());
 
   private final Trigger<W> trigger;
-  private final WindowingInternals<?, KV<K, VO>> windowingInternals;
-  private final AbstractWindowSet<K, VI, VO, W> windowSet;
+  private final WindowingInternals<?, KV<K, OutputT>> windowingInternals;
+  private final AbstractWindowSet<K, InputT, OutputT, W> windowSet;
   private final WindowFn<Object, W> windowFn;
   private final TimerManager timerManager;
   private final KeyedState keyedState;
@@ -121,8 +121,8 @@ public TriggerExecutor(
       TimerManager timerManager,
       Trigger<W> trigger,
       KeyedState keyedState,
-      WindowingInternals<?, KV<K, VO>> windowingInternals,
-      AbstractWindowSet<K, VI, VO, W> windowSet) {
+      WindowingInternals<?, KV<K, OutputT>> windowingInternals,
+      AbstractWindowSet<K, InputT, OutputT, W> windowSet) {
     this.windowFn = windowFn;
     this.trigger = trigger;
     this.keyedState = keyedState;
@@ -175,7 +175,7 @@ private Map<W, Boolean> isRootFinishedInEachWindow(Iterable<W> windows) throws I
         Functions.forPredicate(Predicates.equalTo(FINISHED)));
   }
 
-  public void onElement(WindowedValue<VI> value) throws Exception {
+  public void onElement(WindowedValue<InputT> value) throws Exception {
     @SuppressWarnings("unchecked")
     Collection<W> windows = (Collection<W>) value.getWindows();
 
@@ -281,12 +281,12 @@ private void handleResult(Trigger<W> trigger, W window, TriggerResult result) th
   }
 
   private void emitWindow(W window) throws Exception {
-    VO finalValue = windowSet.finalValue(window);
+    OutputT finalValue = windowSet.finalValue(window);
 
     // If there were any contents to output in the window, do so.
     if (finalValue != null) {
       // Emit the (current) final values for the window
-      KV<K, VO> value = KV.of(windowSet.getKey(), finalValue);
+      KV<K, OutputT> value = KV.of(windowSet.getKey(), finalValue);
 
       // Output the windowed value.
       windowingInternals.outputWindowedValue(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 9bfe55143f993..d2902f30d734b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -60,11 +60,12 @@
  * <p>To have all interactions between the trigger and underlying components logged, call
  * {@link #logInteractions(boolean)}.
  *
- * @param <VI> The element types.
- * @param <VO> The final type for elements in the window (for instance, {@code Iterable<VI>})
+ * @param <InputT> The element types.
+ * @param <OutputT> The final type for elements in the window (for instance,
+ *     {@code Iterable<InputT>})
  * @param <W> The type of windows being used.
  */
-public class TriggerTester<VI, VO, W extends BoundedWindow> {
+public class TriggerTester<InputT, OutputT, W extends BoundedWindow> {
 
   private static final Logger LOGGER = Logger.getLogger(TriggerTester.class.getName());
 
@@ -72,7 +73,7 @@ public class TriggerTester<VI, VO, W extends BoundedWindow> {
   private Instant processingTime = BoundedWindow.TIMESTAMP_MIN_VALUE;
   private BatchTimerManager timerManager = new LoggingBatchTimerManager(processingTime);
 
-  private TriggerExecutor<String, VI, VO, W> triggerExecutor;
+  private TriggerExecutor<String, InputT, OutputT, W> triggerExecutor;
 
   private WindowFn<Object, W> windowFn;
   private StubContexts stubContexts;
@@ -116,10 +117,10 @@ public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>
   private TriggerTester(
       WindowFn<Object, W> windowFn,
       Trigger<W> trigger,
-      AbstractWindowSet.Factory<String, VI, VO, W> windowSetFactory) throws Exception {
+      AbstractWindowSet.Factory<String, InputT, OutputT, W> windowSetFactory) throws Exception {
     this.windowFn = windowFn;
     this.stubContexts = new StubContexts();
-    AbstractWindowSet<String, VI, VO, W> windowSet = windowSetFactory.create(
+    AbstractWindowSet<String, InputT, OutputT, W> windowSet = windowSetFactory.create(
         KEY, windowFn.windowCoder(), stubContexts, stubContexts);
     this.triggerExecutor = new TriggerExecutor<>(
         windowFn, timerManager, trigger, stubContexts, stubContexts, windowSet);
@@ -169,12 +170,12 @@ public String earliestElement(W window) throws CoderException {
   /**
    * Retrieve the values that have been output to this time, and clear out the output accumulator.
    */
-  public Iterable<WindowedValue<VO>> extractOutput() {
-    ImmutableList<WindowedValue<VO>> result = FluentIterable.from(stubContexts.outputs)
-        .transform(new Function<WindowedValue<KV<String, VO>>, WindowedValue<VO>>() {
+  public Iterable<WindowedValue<OutputT>> extractOutput() {
+    ImmutableList<WindowedValue<OutputT>> result = FluentIterable.from(stubContexts.outputs)
+        .transform(new Function<WindowedValue<KV<String, OutputT>>, WindowedValue<OutputT>>() {
           @Override
           @Nullable
-          public WindowedValue<VO> apply(@Nullable WindowedValue<KV<String, VO>> input) {
+          public WindowedValue<OutputT> apply(@Nullable WindowedValue<KV<String, OutputT>> input) {
             return WindowedValue.of(
                 input.getValue().getValue(), input.getTimestamp(), input.getWindows());
           }
@@ -201,7 +202,7 @@ public void advanceProcessingTime(
     timerManager.advanceProcessingTime(triggerExecutor, newProcessingTime);
   }
 
-  public void injectElement(VI value, Instant timestamp) throws Exception {
+  public void injectElement(InputT value, Instant timestamp) throws Exception {
     Collection<W> windows = windowFn.assignWindows(new TriggerTester.StubAssignContext<W>(
         windowFn, value, timestamp, Arrays.asList(GlobalWindow.INSTANCE)));
     logInteraction("Element %s at time %d put in windows %s",
@@ -215,19 +216,20 @@ public void setTimer(
     triggerExecutor.setTimer(new TriggerId<W>(window, subTriggerPath), timestamp, domain);
   }
 
-  private class StubContexts implements WindowingInternals<VI, KV<String, VO>>, DoFn.KeyedState {
+  private class StubContexts
+      implements WindowingInternals<InputT, KV<String, OutputT>>, DoFn.KeyedState {
 
     private Map<CodedTupleTag<?>, List<?>> tagListValues = new HashMap<>();
     private Map<CodedTupleTag<?>, Object> tagValues = new HashMap<>();
-    private List<WindowedValue<KV<String, VO>>> outputs = new ArrayList<>();
+    private List<WindowedValue<KV<String, OutputT>>> outputs = new ArrayList<>();
 
     private Map<CodedTupleTag<?>, Instant> tagTimestamps = new HashMap<>();
     private PriorityQueue<Instant> minTagTimestamp = new PriorityQueue<>();
 
     @Override
-    public void outputWindowedValue(KV<String, VO> output, Instant timestamp,
+    public void outputWindowedValue(KV<String, OutputT> output, Instant timestamp,
         Collection<? extends BoundedWindow> windows) {
-      WindowedValue<KV<String, VO>> value = WindowedValue.of(output, timestamp, windows);
+      WindowedValue<KV<String, OutputT>> value = WindowedValue.of(output, timestamp, windows);
       logInteraction("Outputting: %s", value);
       outputs.add(value);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index bf6e2ad6448a3..0941f5b743fd7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -582,7 +582,7 @@ public List<? extends Coder<?>> getComponents() {
   /**
    * Coder for {@code WindowedValue}.
    *
-   * <P>A {@code ValueOnlyWindowedValueCoder} only encodes and decodes the value. It drops
+   * <p>A {@code ValueOnlyWindowedValueCoder} only encodes and decodes the value. It drops
    * timestamp and windows for encoding, and uses defaults timestamp, and windows for decoding.
    */
   public static class ValueOnlyWindowedValueCoder<T> extends WindowedValueCoder<T> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
index 36dff41e580f2..ac32668ccb6a0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
@@ -36,10 +36,10 @@
  *
  * <p>This interface should be provided by runner implementors to support windowing on their runner.
  *
- * @param <I> input type
- * @param <O> output type
+ * @param <InputT> input type
+ * @param <OutputT> output type
  */
-public interface WindowingInternals<I, O> {
+public interface WindowingInternals<InputT, OutputT> {
 
   /**
    * Updates the {@code KeyedState} in place so that the given tag maps to the given value.
@@ -57,7 +57,7 @@ public interface WindowingInternals<I, O> {
   /**
    * Output the value at the specified timestamp in the listed windows.
    */
-  void outputWindowedValue(O output, Instant timestamp,
+  void outputWindowedValue(OutputT output, Instant timestamp,
       Collection<? extends BoundedWindow> windows);
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterable.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterable.java
index 3bed2bef47a6f..591d2be28abcb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterable.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterable.java
@@ -25,10 +25,10 @@
  * bytes of their elements, as they are being iterated over.
  *
  * @param <V> the type of elements returned by this iterable
- * @param <VI> type type of iterator returned by this iterable
+ * @param <InputT> type type of iterator returned by this iterable
  */
 public abstract class ElementByteSizeObservableIterable<
-    V, VI extends ElementByteSizeObservableIterator<V>>
+    V, InputT extends ElementByteSizeObservableIterator<V>>
     implements Iterable<V> {
   private List<Observer> observers = new ArrayList<>();
 
@@ -36,7 +36,7 @@ public abstract class ElementByteSizeObservableIterable<
    * Derived classes override this method to return an iterator for this
    * iterable.
    */
-  protected abstract VI createIterator();
+  protected abstract InputT createIterator();
 
   /**
    * Sets the observer, which will observe the iterator returned in
@@ -52,8 +52,8 @@ public void addObserver(Observer observer) {
    * a previous call to setObserver(), it will observe the iterator returned.
    */
   @Override
-  public VI iterator() {
-    VI iterator = createIterator();
+  public InputT iterator() {
+    InputT iterator = createIterator();
     for (Observer observer : observers) {
       iterator.addObserver(observer);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
index e6da4ceecb282..a21297913f4b1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
@@ -40,8 +40,8 @@ public static interface GroupingKeyCreator<K> {
   /**
    * Provides client-specific operations for size estimates.
    */
-  public static interface SizeEstimator<E> {
-    public long estimateSize(E element) throws Exception;
+  public static interface SizeEstimator<T> {
+    public long estimateSize(T element) throws Exception;
   }
 
   /**
@@ -57,11 +57,11 @@ public interface PairInfo {
   /**
    * Provides client-specific operations for combining values.
    */
-  public interface Combiner<K, VI, VA, VO> {
-    public VA createAccumulator(K key);
-    public VA add(K key, VA accumulator, VI value);
-    public VA merge(K key, Iterable<VA> accumulators);
-    public VO extract(K key, VA accumulator);
+  public interface Combiner<K, InputT, AccumT, OutputT> {
+    public AccumT createAccumulator(K key);
+    public AccumT add(K key, AccumT accumulator, InputT value);
+    public AccumT merge(K key, Iterable<AccumT> accumulators);
+    public OutputT extract(K key, AccumT accumulator);
   }
 
   /**
@@ -270,7 +270,7 @@ static int getBytesPerJvmWord() {
     }
   }
 
-  private abstract static class GroupingTable<K, VI, VA> {
+  private abstract static class GroupingTable<K, InputT, AccumT> {
 
     // Keep the table relatively full to increase the chance of collisions.
     private static final double TARGET_LOAD = 0.9;
@@ -280,7 +280,7 @@ private abstract static class GroupingTable<K, VI, VA> {
     private final PairInfo pairInfo;
 
     private long size = 0;
-    private Map<Object, GroupingTableEntry<K, VI, VA>> table;
+    private Map<Object, GroupingTableEntry<K, InputT, AccumT>> table;
 
     public GroupingTable(long maxSize,
                           GroupingKeyCreator<? super K> groupingKeyCreator,
@@ -291,14 +291,14 @@ public GroupingTable(long maxSize,
       this.table = new HashMap<>();
     }
 
-    interface GroupingTableEntry<K, VI, VA> {
+    interface GroupingTableEntry<K, InputT, AccumT> {
       public K getKey();
-      public VA getValue();
-      public void add(VI value) throws Exception;
+      public AccumT getValue();
+      public void add(InputT value) throws Exception;
       public long getSize();
     }
 
-    public abstract GroupingTableEntry<K, VI, VA> createTableEntry(K key) throws Exception;
+    public abstract GroupingTableEntry<K, InputT, AccumT> createTableEntry(K key) throws Exception;
 
     /**
      * Adds a pair to this table, possibly flushing some entries to output
@@ -307,7 +307,7 @@ interface GroupingTableEntry<K, VI, VA> {
     @SuppressWarnings("unchecked")
     public void put(Object pair, Receiver receiver) throws Exception {
       put((K) pairInfo.getKeyFromInputPair(pair),
-          (VI) pairInfo.getValueFromInputPair(pair),
+          (InputT) pairInfo.getValueFromInputPair(pair),
           receiver);
     }
 
@@ -315,9 +315,9 @@ public void put(Object pair, Receiver receiver) throws Exception {
      * Adds the key and value to this table, possibly flushing some entries
      * to output if the table is full.
      */
-    public void put(K key, VI value, Receiver receiver) throws Exception {
+    public void put(K key, InputT value, Receiver receiver) throws Exception {
       Object groupingKey = groupingKeyCreator.createGroupingKey(key);
-      GroupingTableEntry<K, VI, VA> entry = table.get(groupingKey);
+      GroupingTableEntry<K, InputT, AccumT> entry = table.get(groupingKey);
       if (entry == null) {
         entry = createTableEntry(key);
         table.put(groupingKey, entry);
@@ -330,7 +330,7 @@ public void put(K key, VI value, Receiver receiver) throws Exception {
 
       if (size >= maxSize) {
         long targetSize = (long) (TARGET_LOAD * maxSize);
-        Iterator<GroupingTableEntry<K, VI, VA>> entries =
+        Iterator<GroupingTableEntry<K, InputT, AccumT>> entries =
             table.values().iterator();
         while (size >= targetSize) {
           if (!entries.hasNext()) {
@@ -338,7 +338,7 @@ public void put(K key, VI value, Receiver receiver) throws Exception {
             size = 0;
             break;
           }
-          GroupingTableEntry<K, VI, VA> toFlush = entries.next();
+          GroupingTableEntry<K, InputT, AccumT> toFlush = entries.next();
           entries.remove();
           size -= toFlush.getSize() + PER_KEY_OVERHEAD;
           output(toFlush, receiver);
@@ -350,7 +350,8 @@ public void put(K key, VI value, Receiver receiver) throws Exception {
      * Output the given entry. Does not actually remove it from the table or
      * update this table's size.
      */
-    private void output(GroupingTableEntry<K, VI, VA> entry, Receiver receiver) throws Exception {
+    private void output(GroupingTableEntry<K, InputT, AccumT> entry, Receiver receiver)
+        throws Exception {
       receiver.process(pairInfo.makeOutputPair(entry.getKey(), entry.getValue()));
     }
 
@@ -358,7 +359,7 @@ private void output(GroupingTableEntry<K, VI, VA> entry, Receiver receiver) thro
      * Flushes all entries in this table to output.
      */
     public void flush(Receiver output) throws Exception {
-      for (GroupingTableEntry<K, VI, VA> entry : table.values()) {
+      for (GroupingTableEntry<K, InputT, AccumT> entry : table.values()) {
         output(entry, output);
       }
       table.clear();
@@ -410,18 +411,19 @@ public void add(V value) throws Exception {
   /**
    * A grouping table that uses the given combiner to combine values in place.
    */
-  public static class CombiningGroupingTable<K, VI, VA> extends GroupingTable<K, VI, VA> {
+  public static class CombiningGroupingTable<K, InputT, AccumT>
+      extends GroupingTable<K, InputT, AccumT> {
 
-    private final Combiner<? super K, VI, VA, ?> combiner;
+    private final Combiner<? super K, InputT, AccumT, ?> combiner;
     private final SizeEstimator<? super K> keySizer;
-    private final SizeEstimator<? super VA> accumulatorSizer;
+    private final SizeEstimator<? super AccumT> accumulatorSizer;
 
     public CombiningGroupingTable(long maxSize,
                                   GroupingKeyCreator<? super K> groupingKeyCreator,
                                   PairInfo pairInfo,
-                                  Combiner<? super K, VI, VA, ?> combineFn,
+                                  Combiner<? super K, InputT, AccumT, ?> combineFn,
                                   SizeEstimator<? super K> keySizer,
-                                  SizeEstimator<? super VA> accumulatorSizer) {
+                                  SizeEstimator<? super AccumT> accumulatorSizer) {
       super(maxSize, groupingKeyCreator, pairInfo);
       this.combiner =  combineFn;
       this.keySizer = keySizer;
@@ -429,21 +431,21 @@ public CombiningGroupingTable(long maxSize,
     }
 
     @Override
-    public GroupingTableEntry<K, VI, VA> createTableEntry(final K key) throws Exception {
-      return new GroupingTableEntry<K, VI, VA>() {
+    public GroupingTableEntry<K, InputT, AccumT> createTableEntry(final K key) throws Exception {
+      return new GroupingTableEntry<K, InputT, AccumT>() {
         final long keySize = keySizer.estimateSize(key);
-        VA accumulator = combiner.createAccumulator(key);
+        AccumT accumulator = combiner.createAccumulator(key);
         long accumulatorSize = 0; // never used before a value is added...
         public K getKey() {
           return key;
         }
-        public VA getValue() {
+        public AccumT getValue() {
           return accumulator;
         }
         public long getSize() {
           return keySize + accumulatorSize;
         }
-        public void add(VI value) throws Exception {
+        public void add(InputT value) throws Exception {
           accumulator = combiner.add(key, accumulator, value);
           accumulatorSize = accumulatorSizer.estimateSize(accumulator);
         }
@@ -460,7 +462,7 @@ public void add(VI value) throws Exception {
    * (potentially more expensive) estimator for some elements and returning
    * the average value for others.
    */
-  public static class SamplingSizeEstimator<E> implements SizeEstimator<E> {
+  public static class SamplingSizeEstimator<T> implements SizeEstimator<T> {
 
     /**
      * The degree of confidence required in our expected value predictions
@@ -484,7 +486,7 @@ public static class SamplingSizeEstimator<E> implements SizeEstimator<E> {
      */
     public static final long DEFAULT_MIN_SAMPLED = 20;
 
-    private final SizeEstimator<E> underlying;
+    private final SizeEstimator<T> underlying;
     private final double minSampleRate;
     private final double maxSampleRate;
     private final long minSampled;
@@ -499,13 +501,13 @@ public static class SamplingSizeEstimator<E> implements SizeEstimator<E> {
     private long nextSample = 0;
 
     public SamplingSizeEstimator(
-        SizeEstimator<E> underlying,
+        SizeEstimator<T> underlying,
         double minSampleRate,
         double maxSampleRate) {
       this(underlying, minSampleRate, maxSampleRate, DEFAULT_MIN_SAMPLED, new Random());
     }
 
-    public SamplingSizeEstimator(SizeEstimator<E> underlying,
+    public SamplingSizeEstimator(SizeEstimator<T> underlying,
                                  double minSampleRate,
                                  double maxSampleRate,
                                  long minSampled,
@@ -518,7 +520,7 @@ public SamplingSizeEstimator(SizeEstimator<E> underlying,
     }
 
     @Override
-    public long estimateSize(E element) throws Exception {
+    public long estimateSize(T element) throws Exception {
       if (sampleNow()) {
         return recordSample(underlying.estimateSize(element));
       } else {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
index 106accb1a308e..73a22e7f09238 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
@@ -35,11 +35,11 @@ public abstract class Sink<T> {
   /**
    * Writes to a Sink.
    */
-  public interface SinkWriter<E> extends AutoCloseable {
+  public interface SinkWriter<ElemT> extends AutoCloseable {
     /**
      * Adds a value to the sink. Returns the size in bytes of the data written.
      */
-    public long add(E value) throws IOException;
+    public long add(ElemT value) throws IOException;
 
     @Override
     public void close() throws IOException;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
index b006e77c47616..cc669b9917c74 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
@@ -43,8 +43,8 @@ public static PBegin in(Pipeline pipeline) {
    * Applies the given {@link PTransform} to this input {@code PBegin}, and
    * returns the {@link PTransform}'s Output.
    */
-  public <Output extends POutput> Output apply(
-      PTransform<? super PBegin, Output> t) {
+  public <OutputT extends POutput> OutputT apply(
+      PTransform<? super PBegin, OutputT> t) {
     return Pipeline.applyTransform(this, t);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
index 785b48ee91ff2..a4401454d869a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
@@ -124,7 +124,7 @@ public PCollection<T> setCoder(Coder<T> coder) {
    * Applies the given PTransform to this input PCollection, and
    * returns the PTransform's Output.
    */
-  public <Output extends POutput> Output apply(PTransform<? super PCollection<T>, Output> t) {
+  public <OutputT extends POutput> OutputT apply(PTransform<? super PCollection<T>, OutputT> t) {
     return Pipeline.applyTransform(this, t);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
index 50b594bc0e359..7195c46620a10 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
@@ -166,8 +166,8 @@ public List<PCollection<T>> getAll() {
    * Applies the given PTransform to this input {@code PCollectionList<T>},
    * and returns the PTransform's Output.
    */
-  public <Output extends POutput> Output apply(
-      PTransform<PCollectionList<T>, Output> t) {
+  public <OutputT extends POutput> OutputT apply(
+      PTransform<PCollectionList<T>, OutputT> t) {
     return Pipeline.applyTransform(this, t);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
index 20c15d84f8b22..c9743b8b8cfab 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
@@ -154,8 +154,8 @@ public Map<TupleTag<?>, PCollection<?>> getAll() {
    * Applies the given PTransform to this input PCollectionTuple, and
    * returns the PTransform's Output.
    */
-  public <Output extends POutput> Output apply(
-      PTransform<PCollectionTuple, Output> t) {
+  public <OutputT extends POutput> OutputT apply(
+      PTransform<PCollectionTuple, OutputT> t) {
     return Pipeline.applyTransform(this, t);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
index 38c0054da2045..d6f1fdf65f60b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
@@ -155,20 +155,20 @@ public static PCollection<Integer> createInts(Pipeline p,
   // Utilities for testing CombineFns, ensuring they give correct results
   // across various permutations and shardings of the input.
 
-  public static <VI, VA, VO> void checkCombineFn(
-      CombineFn<VI, VA, VO> fn, List<VI> input, final VO expected) {
+  public static <InputT, AccumT, OutputT> void checkCombineFn(
+      CombineFn<InputT, AccumT, OutputT> fn, List<InputT> input, final OutputT expected) {
     checkCombineFn(fn, input, CoreMatchers.is(expected));
   }
 
-  public static <VI, VA, VO> void checkCombineFn(
-      CombineFn<VI, VA, VO> fn, List<VI> input, Matcher<? super VO> matcher) {
+  public static <InputT, AccumT, OutputT> void checkCombineFn(
+      CombineFn<InputT, AccumT, OutputT> fn, List<InputT> input, Matcher<? super OutputT> matcher) {
     checkCombineFnInternal(fn, input, matcher);
     Collections.shuffle(input);
     checkCombineFnInternal(fn, input, matcher);
   }
 
-  private static <VI, VA, VO> void checkCombineFnInternal(
-      CombineFn<VI, VA, VO> fn, List<VI> input, Matcher<? super VO> matcher) {
+  private static <InputT, AccumT, OutputT> void checkCombineFnInternal(
+      CombineFn<InputT, AccumT, OutputT> fn, List<InputT> input, Matcher<? super OutputT> matcher) {
     int size = input.size();
     checkCombineFnShards(fn, Collections.singletonList(input), matcher);
     checkCombineFnShards(fn, shardEvenly(input, 2), matcher);
@@ -182,28 +182,28 @@ private static <VI, VA, VO> void checkCombineFnInternal(
     checkCombineFnShards(fn, shardExponentially(input, Math.E), matcher);
   }
 
-  public static <VI, VA, VO> void checkCombineFnShards(
-      CombineFn<VI, VA, VO> fn,
-      List<? extends Iterable<VI>> shards,
-      Matcher<? super VO> matcher) {
+  public static <InputT, AccumT, OutputT> void checkCombineFnShards(
+      CombineFn<InputT, AccumT, OutputT> fn,
+      List<? extends Iterable<InputT>> shards,
+      Matcher<? super OutputT> matcher) {
     checkCombineFnShardsInternal(fn, shards, matcher);
     Collections.shuffle(shards);
     checkCombineFnShardsInternal(fn, shards, matcher);
   }
 
-  private static <VI, VA, VO> void checkCombineFnShardsInternal(
-      CombineFn<VI, VA, VO> fn,
-      Iterable<? extends Iterable<VI>> shards,
-      Matcher<? super VO> matcher) {
-    List<VA> accumulators = new ArrayList<>();
-    for (Iterable<VI> shard : shards) {
-      VA accumulator = fn.createAccumulator();
-      for (VI elem : shard) {
+  private static <InputT, AccumT, OutputT> void checkCombineFnShardsInternal(
+      CombineFn<InputT, AccumT, OutputT> fn,
+      Iterable<? extends Iterable<InputT>> shards,
+      Matcher<? super OutputT> matcher) {
+    List<AccumT> accumulators = new ArrayList<>();
+    for (Iterable<InputT> shard : shards) {
+      AccumT accumulator = fn.createAccumulator();
+      for (InputT elem : shard) {
         accumulator = fn.addInput(accumulator, elem);
       }
       accumulators.add(accumulator);
     }
-    VA merged = fn.mergeAccumulators(accumulators);
+    AccumT merged = fn.mergeAccumulators(accumulators);
     assertThat(fn.extractOutput(merged), matcher);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index 19269cdda71e5..9290c1cfb921f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -142,15 +142,15 @@ public void testTypeParameterInference() throws Exception {
     Map<String, Coder<?>> coderMap = registry.getDefaultCoders(
         instance.getClass(),
         MyGenericClass.class,
-        Collections.singletonMap("A", MyValueCoder.of()));
-    assertEquals(listCoder, coderMap.get("B"));
+        Collections.singletonMap("FooT", MyValueCoder.of()));
+    assertEquals(listCoder, coderMap.get("BazT"));
 
     // Check we can infer the other direction as well.
     Map<String, Coder<?>> coderMap2 = registry.getDefaultCoders(
         instance.getClass(),
         MyGenericClass.class,
-        Collections.singletonMap("B", listCoder));
-    assertEquals(MyValueCoder.of(), coderMap2.get("A"));
+        Collections.singletonMap("BazT", listCoder));
+    assertEquals(MyValueCoder.of(), coderMap2.get("FooT"));
 
     // The array interface operates on position.
     Coder<?>[] coders = registry.getDefaultCoders(
@@ -239,7 +239,7 @@ public void testTypeCompatibility() throws Exception {
         new TypeToken<List<String>>() {}.getType()));
   }
 
-  static class MyGenericClass<A, B> { }
+  static class MyGenericClass<FooT, BazT> { }
 
   static class MyValue { }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
index 1bba902a46a09..b4e17295c75c0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
@@ -295,9 +295,9 @@ public Void apply(Iterable<KV<Long, Long>> estimatePerKey) {
   /**
    * Combiner function counting the number of elements in an input PCollection.
    *
-   * @param <E> the type of elements in the input PCollection.
+   * @param <T> the type of elements in the input PCollection.
    */
-  private static class CountElements<E> extends CombineFn<E, Long, Long> {
+  private static class CountElements<T> extends CombineFn<T, Long, Long> {
 
     @Override
     public Long createAccumulator() {
@@ -305,7 +305,7 @@ public Long createAccumulator() {
     }
 
     @Override
-    public Long addInput(Long accumulator, E input) {
+    public Long addInput(Long accumulator, T input) {
       return accumulator + 1;
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflectorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflectorTest.java
index 5a70170ced959..82998bd1ccf30 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflectorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflectorTest.java
@@ -488,8 +488,9 @@ public void testGoodBoundedWildcards() throws Exception {
   }
 
   @SuppressWarnings("unused")
-  private <I, O> void goodTypeVariables(DoFnWithContext<I, O>.ProcessContext c,
-      WindowingInternals<I, O> i1) {}
+  private <InputT, OutputT> void goodTypeVariables(
+      DoFnWithContext<InputT, OutputT>.ProcessContext c,
+      WindowingInternals<InputT, OutputT> i1) {}
 
   @Test
   public void testGoodTypeVariables() throws Exception {
@@ -537,8 +538,8 @@ public void testBadGenericWildCards() throws Exception {
   }
 
   @SuppressWarnings("unused")
-  private <I, O> void badTypeVariables(DoFnWithContext<I, O>.ProcessContext c,
-      WindowingInternals<I, I> i1) {}
+  private <InputT, OutputT> void badTypeVariables(DoFnWithContext<InputT, OutputT>.ProcessContext c,
+      WindowingInternals<InputT, InputT> i1) {}
 
   @Test
   public void testBadTypeVariables() throws Exception {
@@ -547,9 +548,9 @@ public void testBadTypeVariables() throws Exception {
 
     thrown.expect(IllegalStateException.class);
     thrown.expectMessage("Incompatible generics in context parameter "
-        + "WindowingInternals<I, I> for method " + getClass().getName()
+        + "WindowingInternals<InputT, InputT> for method " + getClass().getName()
         + "#badTypeVariables(ProcessContext, WindowingInternals). Should be "
-        + "WindowingInternals<I, O>");
+        + "WindowingInternals<InputT, OutputT>");
 
     DoFnReflector.verifyProcessMethodArguments(method);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/NoOpDoFn.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/NoOpDoFn.java
index 316273408e4b7..c90c22744d314 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/NoOpDoFn.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/NoOpDoFn.java
@@ -29,43 +29,43 @@
  * A {@link DoFn} that does nothing with provided elements. Used for testing
  * methods provided by the DoFn abstract class.
  *
- * @param <I> unused.
- * @param <O> unused.
+ * @param <InputT> unused.
+ * @param <OutputT> unused.
  */
-class NoOpDoFn<I, O> extends DoFn<I, O> {
+class NoOpDoFn<InputT, OutputT> extends DoFn<InputT, OutputT> {
   private static final long serialVersionUID = 0L;
 
   @Override
-  public void processElement(DoFn<I, O>.ProcessContext c) throws Exception {
+  public void processElement(DoFn<InputT, OutputT>.ProcessContext c) throws Exception {
   }
 
   /**
    * Returns a new NoOp Context.
    */
-  public DoFn<I, O>.Context context() {
+  public DoFn<InputT, OutputT>.Context context() {
     return new NoOpDoFnContext();
   }
 
   /**
    * Returns a new NoOp Process Context.
    */
-  public DoFn<I, O>.ProcessContext processContext() {
+  public DoFn<InputT, OutputT>.ProcessContext processContext() {
     return new NoOpDoFnProcessContext();
   }
 
   /**
    * A {@link DoFn.Context} that does nothing and returns exclusively null.
    */
-  private class NoOpDoFnContext extends DoFn<I, O>.Context {
+  private class NoOpDoFnContext extends DoFn<InputT, OutputT>.Context {
     @Override
     public PipelineOptions getPipelineOptions() {
       return null;
     }
     @Override
-    public void output(O output) {
+    public void output(OutputT output) {
     }
     @Override
-    public void outputWithTimestamp(O output, Instant timestamp) {
+    public void outputWithTimestamp(OutputT output, Instant timestamp) {
     }
     @Override
     public <T> void sideOutput(TupleTag<T> tag, T output) {
@@ -75,8 +75,8 @@ public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output,
         Instant timestamp) {
     }
     @Override
-    protected <VI, VO> Aggregator<VI, VO> createAggregatorInternal(String name,
-        CombineFn<VI, ?, VO> combiner) {
+    protected <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT>
+        createAggregatorInternal(String name, CombineFn<AggInputT, ?, AggOutputT> combiner) {
       return null;
     }
   }
@@ -85,9 +85,9 @@ protected <VI, VO> Aggregator<VI, VO> createAggregatorInternal(String name,
    * A {@link DoFn.ProcessContext} that does nothing and returns exclusively
    * null.
    */
-  private class NoOpDoFnProcessContext extends DoFn<I, O>.ProcessContext {
+  private class NoOpDoFnProcessContext extends DoFn<InputT, OutputT>.ProcessContext {
     @Override
-    public I element() {
+    public InputT element() {
       return null;
     }
 
@@ -112,7 +112,7 @@ public BoundedWindow window() {
     }
 
     @Override
-    public WindowingInternals<I, O> windowingInternals() {
+    public WindowingInternals<InputT, OutputT> windowingInternals() {
       return null;
     }
 
@@ -122,10 +122,10 @@ public PipelineOptions getPipelineOptions() {
     }
 
     @Override
-    public void output(O output) {}
+    public void output(OutputT output) {}
 
     @Override
-    public void outputWithTimestamp(O output, Instant timestamp) {}
+    public void outputWithTimestamp(OutputT output, Instant timestamp) {}
 
     @Override
     public <T> void sideOutput(TupleTag<T> tag, T output) {}
@@ -135,8 +135,8 @@ public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output,
         Instant timestamp) {}
 
     @Override
-    protected <VI, VO> Aggregator<VI, VO> createAggregatorInternal(String name,
-        CombineFn<VI, ?, VO> combiner) {
+    protected <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT>
+        createAggregatorInternal(String name, CombineFn<AggInputT, ?, AggOutputT> combiner) {
       return null;
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
index e0fc474f983d3..af33a8d5d7a83 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
@@ -40,16 +40,16 @@
 public class SimpleStatsFnsTest {
   static final double DOUBLE_COMPARISON_ACCURACY = 1e-7;
 
-  private static class TestCase<N extends Number & Comparable<N>> {
-    final List<N> data;
-    final N min;
-    final N max;
-    final N sum;
+  private static class TestCase<NumT extends Number & Comparable<NumT>> {
+    final List<NumT> data;
+    final NumT min;
+    final NumT max;
+    final NumT sum;
     final Double mean;
 
     @SafeVarargs
     @SuppressWarnings("all")
-    public TestCase(N min, N max, N sum, N... values) {
+    public TestCase(NumT min, NumT max, NumT sum, NumT... values) {
       this.data = Arrays.asList(values);
       this.min = min;
       this.max = max;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java
index d5ebb1b0df1f5..646afcea7a10c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java
@@ -63,8 +63,8 @@ public class CounterAggregatorTest {
   private static final String AGGREGATOR_NAME = "aggregator_name";
 
   @SuppressWarnings("rawtypes")
-  private <V, VA> void testAggregator(List<V> items,
-                                      Combine.CombineFn<V, VA, V> combiner,
+  private <V, AccumT> void testAggregator(List<V> items,
+                                      Combine.CombineFn<V, AccumT, V> combiner,
                                       Counter expectedCounter) {
     CounterSet counters = new CounterSet();
     Aggregator<V, V> aggregator = new CounterAggregator<>(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index d98152a2a9513..042edcf14376a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -311,16 +311,16 @@ KV<String, Long>, List> makeRunner(
     return makeRunner(windowingStrategy, fn);
   }
 
-  private <VI, VO> DoFnRunner<KV<String, Iterable<WindowedValue<VI>>>,
-    KV<String, VO>, List> makeRunner(
+  private <InputT, OutputT> DoFnRunner<KV<String, Iterable<WindowedValue<InputT>>>,
+    KV<String, OutputT>, List> makeRunner(
         WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
-        GroupAlsoByWindowsDoFn<String, VI, VO, IntervalWindow> fn) {
+        GroupAlsoByWindowsDoFn<String, InputT, OutputT, IntervalWindow> fn) {
     return
         DoFnRunner.createWithListOutputs(
             PipelineOptionsFactory.create(),
             fn,
             PTuple.empty(),
-            (TupleTag<KV<String, VO>>) (TupleTag) outputTag,
+            (TupleTag<KV<String, OutputT>>) (TupleTag) outputTag,
             new ArrayList<TupleTag<?>>(),
             execContext.createStepContext("merge"),
             counters.getAddCounterMutator(),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/KeyedStateCacheTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/KeyedStateCacheTest.java
index f6ae14cf67e85..b610d81924e49 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/KeyedStateCacheTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/KeyedStateCacheTest.java
@@ -73,8 +73,8 @@ private Iterable<? extends CodedTupleTag<?>> tagsMatcher(CodedTupleTag<?>... tag
     return Mockito.argThat(Matchers.containsInAnyOrder(tags));
   }
 
-  private <T1> Map<CodedTupleTag<?>, Optional<?>> lookup(
-      CodedTupleTag<T1> tag1, Optional<T1> value1) {
+  private <T> Map<CodedTupleTag<?>, Optional<?>> lookup(
+      CodedTupleTag<T> tag1, Optional<T> value1) {
     return ImmutableMap.<CodedTupleTag<?>, Optional<?>>of(tag1, value1);
   }
 
@@ -84,8 +84,8 @@ private <T1, T2> Map<CodedTupleTag<?>, Optional<?>> lookup(
     return ImmutableMap.of(tag1, value1, tag2, value2);
   }
 
-  private <T1> Map<CodedTupleTag<?>, List<?>> lookupList(
-      CodedTupleTag<T1> tag1, List<T1> value1) {
+  private <T> Map<CodedTupleTag<?>, List<?>> lookupList(
+      CodedTupleTag<T> tag1, List<T> value1) {
     return ImmutableMap.<CodedTupleTag<?>, List<?>>of(tag1, value1);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 20ee8acb46ee5..a62a459e67577 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -411,15 +411,17 @@ private DoFnRunner<TimerOrElement<KV<String, Long>>, KV<String, Long>, List> mak
     return makeRunner(windowingStrategy, fn);
   }
 
-  private <VI, VO> DoFnRunner<TimerOrElement<KV<String, VI>>, KV<String, VO>, List> makeRunner(
-      WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
-      StreamingGroupAlsoByWindowsDoFn<String, VI, VO, IntervalWindow> fn) {
+  private <InputT, OutputT>
+      DoFnRunner<TimerOrElement<KV<String, InputT>>, KV<String, OutputT>, List>
+      makeRunner(
+          WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
+          StreamingGroupAlsoByWindowsDoFn<String, InputT, OutputT, IntervalWindow> fn) {
     return
         DoFnRunner.createWithListOutputs(
             PipelineOptionsFactory.create(),
             fn,
             PTuple.empty(),
-            (TupleTag<KV<String, VO>>) (TupleTag) outputTag,
+            (TupleTag<KV<String, OutputT>>) (TupleTag) outputTag,
             new ArrayList<TupleTag<?>>(),
             execContext.createStepContext("merge"),
             counters.getAddCounterMutator(),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java
index bd1e6db128b0f..32146a4296e9c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java
@@ -117,19 +117,19 @@ public void testTypeFormatterWithGenerics() throws Exception {
   }
 
   @Test
-  public <I> void testTypeFormatterWithWildcards() throws Exception {
-    assertEquals("Map<I, I>",
+  public <T> void testTypeFormatterWithWildcards() throws Exception {
+    assertEquals("Map<T, T>",
         ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(
-            new TypeToken<Map<I, I>>() {
+            new TypeToken<Map<T, T>>() {
               private static final long serialVersionUID = 0;
             }.getType()));
   }
 
   @Test
-  public <I, O> void testTypeFormatterWithMultipleWildcards() throws Exception {
-    assertEquals("Map<? super I, ? extends O>",
+  public <InputT, OutputT> void testTypeFormatterWithMultipleWildcards() throws Exception {
+    assertEquals("Map<? super InputT, ? extends OutputT>",
         ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(
-            new TypeToken<Map<? super I, ? extends O>>() {
+            new TypeToken<Map<? super InputT, ? extends OutputT>>() {
               private static final long serialVersionUID = 0;
             }.getType()));
   }

From a9e97d9bc8dc5a4ebd00485a1c64c20ce7cb1683 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 1 May 2015 14:42:29 -0700
Subject: [PATCH 0500/1541] Clarify Javadoc for Triggers.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92583941
---
 .../sdk/transforms/windowing/Trigger.java       | 17 +++++++++++++----
 .../sdk/transforms/windowing/Window.java        | 11 +++++++----
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index c8a9e80367e01..f51a19baf53da 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -38,9 +38,10 @@
  * for more information about how grouping with windows works.
  *
  * <p>The elements that are assigned to a window since the last time it was fired (or since the
- * window was created) are placed into a pane. Triggers are evaluated against the elements in the
- * current pane, and when fired, will output those elements. Depending on the trigger, this will
- * either finish the trigger (and the window) or start a new pane.
+ * window was created) are placed into the current window pane. Triggers are evaluated against the
+ * elements as they are added. When the root trigger fires, the elements in the current pane will be
+ * output. When the root trigger finishes (indicating it will never fire again), the window is
+ * closed and any new elements assigned to that window are discarded.
  *
  * <p>Several predefined {@code Trigger}s are provided:
  * <ul>
@@ -441,7 +442,15 @@ public Iterable<Integer> getPath() {
    *
    * <p> The expression {@code t1.orFinally(t2)} fires every time {@code t1} fires, and finishes
    * as soon as either {@code t1} finishes or {@code t2} fires, in which case it fires one last time
-   * for {@code t2}.
+   * for {@code t2}. Both {@code t1} and {@code t2} are executed in parallel. This means that
+   * {@code t1} may have fired since {@code t2} started, so not all of the elements that {@code t2}
+   * has seen are necessarily in the current pane.
+   *
+   * <p>For example the final firing of the following trigger may only have 1 element:
+   * <pre> {@code
+   * Repeatedly.forever(AfterPane.elementCountAtLeast(2))
+   *     .orFinally(AfterPane.elementCountAtLeast(5))
+   * } </pre>
    *
    * <p> Note that if {@code t1} is {@link OnceTrigger}, then {@code t1.orFinally(t2)} is the same
    * as {@code AfterFirst.of(t1, t2)}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 1bc2fe96b3fbb..d50dd7e60310b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -63,7 +63,7 @@
  * <pre> {@code
  * PCollection<String> items = ...;
  * PCollection<String> windowed_items = items.apply(
- *   Window.<String>into(FixedWindows.of(1, TimeUnit.MINUTES)));
+ *   Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))));
  * PCollection<KV<String, Long>> windowed_counts = windowed_items.apply(
  *   Count.<String>perElement());
  * } </pre>
@@ -92,9 +92,8 @@
  * behavior is to trigger first when the watermark passes the end of the window, and then trigger
  * again every time there is late arriving data.
  *
- * <p> All of the elements in a window since the last time a trigger fired are
- * part of the current pane. When a trigger fires, new output is produced
- * based on the elements in the current pane.
+ * <p> Elements are added to the current window pane as they arrive. When the root trigger fires,
+ * output is produced based on the elements in the current pane.
  *
  * <p>Depending on the trigger, this can be used both to output partial results
  * early during the processing of the whole window, and to deal with late
@@ -133,6 +132,10 @@
  *              .until(AfterWatermark.pastEndOfWindow())));
  * } </pre>
  *
+ * <p> After a {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} the trigger is reset to
+ * the default trigger. If you want to produce early results from a pipeline consisting of multiple
+ * {@code GroupByKey}s, you must set a trigger before <i>each</i> {@code GroupByKey}.
+ *
  * <p> See {@link Trigger} for details on the available triggers.
  */
 public class Window {

From 8e49bfc82a606abf7ace7120cc63e097bcf055c8 Mon Sep 17 00:00:00 2001
From: cherba <cherba@google.com>
Date: Mon, 4 May 2015 09:13:16 -0700
Subject: [PATCH 0501/1541] Remove reference to CLOUDSDK_EXTRA_SCOPES.

----Release Notes----
Datasore uses platform scope set by gcloud, no need to set extra scopes.
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92730361
---
 .../google/cloud/dataflow/examples/DatastoreWordCount.java  | 4 +---
 .../java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java  | 6 ++----
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
index fd44949a61295..580fae513ae41 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
@@ -44,10 +44,8 @@
  * write the results to Cloud Storage.  Note that this example will write
  * data to Datastore, which may incur charge for Datastore operations.
  *
- * <p> To run this example, users need to set up the environment and use gcloud
- * to get credential for Datastore:
+ * <p> To run this example, users need to use gcloud to get credential for Datastore:
  * <pre>
- * $ export CLOUDSDK_EXTRA_SCOPES=https://www.googleapis.com/auth/datastore
  * $ gcloud auth login
  * </pre>
  *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 5ccbd52b37aa1..01d073c54fbb6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -80,11 +80,9 @@
  * database table.  DatastoreIO supports Read/Write from/to Datastore within
  * Dataflow SDK service.
  *
- * <p> To use {@link DatastoreIO}, users must set up the environment and use gcloud
- * to get credential for Datastore:
+ * <p> To use {@link DatastoreIO}, users must use gcloud to get credential for Datastore:
  * <pre>
- * $ export CLOUDSDK_EXTRA_SCOPES=https://www.googleapis.com/auth/datastore
- * $ gcloud auth login
+  * $ gcloud auth login
  * </pre>
  *
  * <p> Note that the environment variable CLOUDSDK_EXTRA_SCOPES must be set

From 9f5bd3459e2406c154db86d5c3cdf6ff4dcbb793 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 4 May 2015 13:22:47 -0700
Subject: [PATCH 0502/1541] Remove a few uses of Guava.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92754457
---
 .../transforms/windowing/AfterProcessingTime.java   |  7 ++++---
 .../sdk/transforms/windowing/AfterWatermark.java    | 13 +++++++------
 .../sdk/transforms/windowing/TimeTrigger.java       | 10 ++++++----
 .../cloud/dataflow/sdk/util/KeyedStateCache.java    |  8 ++++++--
 .../cloud/dataflow/sdk/values/TimestampedValue.java | 10 ----------
 5 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index d20668bcb80ad..dd87710862e90 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -20,10 +20,11 @@
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.common.collect.ImmutableList;
 
 import org.joda.time.Instant;
 
+import java.util.List;
+
 /**
  * {@code AfterProcessingTime} triggers fire based on the current processing time. They operate in
  * the real-time domain.
@@ -39,7 +40,7 @@ public class AfterProcessingTime<W extends BoundedWindow>
   private static final CodedTupleTag<Instant> DELAYED_UNTIL_TAG =
       CodedTupleTag.of("delayed-until", InstantCoder.of());
 
-  private AfterProcessingTime(ImmutableList<SerializableFunction<Instant, Instant>> transforms) {
+  private AfterProcessingTime(List<SerializableFunction<Instant, Instant>> transforms) {
     super(transforms);
   }
 
@@ -53,7 +54,7 @@ public static <W extends BoundedWindow> AfterProcessingTime<W> pastFirstElementI
 
   @Override
   protected AfterProcessingTime<W> newWith(
-      ImmutableList<SerializableFunction<Instant, Instant>> transforms) {
+      List<SerializableFunction<Instant, Instant>> transforms) {
     return new AfterProcessingTime<W>(transforms);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index b98639f612867..d49787c6e9653 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -20,10 +20,11 @@
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.common.collect.ImmutableList;
 
 import org.joda.time.Instant;
 
+import java.util.List;
+
 /**
  * <p>{@code AfterWatermark} triggers fire based on progress of the system watermark. This time is a
  * lower-bound, sometimes heuristically established, on event times that have been fully processed
@@ -57,7 +58,7 @@ public abstract class AfterWatermark<W extends BoundedWindow>
 
   private static final long serialVersionUID = 0L;
 
-  protected AfterWatermark(ImmutableList<SerializableFunction<Instant, Instant>> transforms) {
+  protected AfterWatermark(List<SerializableFunction<Instant, Instant>> transforms) {
     super(transforms);
   }
 
@@ -84,7 +85,7 @@ private static class FromFirstElementInPane<W extends BoundedWindow> extends Aft
         CodedTupleTag.of("delayed-until", InstantCoder.of());
 
     private FromFirstElementInPane(
-        ImmutableList<SerializableFunction<Instant, Instant>> delayFunction) {
+        List<SerializableFunction<Instant, Instant>> delayFunction) {
       super(delayFunction);
     }
 
@@ -141,7 +142,7 @@ public Instant getWatermarkCutoff(W window) {
 
     @Override
     protected AfterWatermark<W> newWith(
-        ImmutableList<SerializableFunction<Instant, Instant>> transforms) {
+        List<SerializableFunction<Instant, Instant>> transforms) {
       return new FromFirstElementInPane<W>(transforms);
     }
   }
@@ -151,7 +152,7 @@ private static class FromEndOfWindow<W extends BoundedWindow> extends AfterWater
     private static final long serialVersionUID = 0L;
 
     private FromEndOfWindow(
-        ImmutableList<SerializableFunction<Instant, Instant>> composed) {
+        List<SerializableFunction<Instant, Instant>> composed) {
       super(composed);
     }
 
@@ -194,7 +195,7 @@ public Instant getWatermarkCutoff(W window) {
 
     @Override
     protected AfterWatermark<W> newWith(
-        ImmutableList<SerializableFunction<Instant, Instant>> transforms) {
+        List<SerializableFunction<Instant, Instant>> transforms) {
       return new FromEndOfWindow<>(transforms);
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
index e4c1fb0fc7aa7..24f88faa35dc9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
@@ -25,6 +25,8 @@
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 
+import java.util.List;
+
 /**
  * Support for manipulating the time at which time-based {@link Trigger}s fire.
  *
@@ -37,13 +39,13 @@ public abstract class TimeTrigger<W extends BoundedWindow, T extends TimeTrigger
 
   private static final long serialVersionUID = 0L;
 
-  protected static final ImmutableList<SerializableFunction<Instant, Instant>> IDENTITY =
+  protected static final List<SerializableFunction<Instant, Instant>> IDENTITY =
       ImmutableList.<SerializableFunction<Instant, Instant>>of();
 
-  private final ImmutableList<SerializableFunction<Instant, Instant>> timestampMappers;
+  private final List<SerializableFunction<Instant, Instant>> timestampMappers;
 
   protected TimeTrigger(
-      ImmutableList<SerializableFunction<Instant, Instant>> timestampMappers) {
+      List<SerializableFunction<Instant, Instant>> timestampMappers) {
     this.timestampMappers = timestampMappers;
   }
 
@@ -145,5 +147,5 @@ private T newWith(SerializableFunction<Instant, Instant> timestampMapper) {
    * @param transform The new transform to apply to target times.
    * @return a new {@code TimeTrigger}.
    */
-  protected abstract T newWith(ImmutableList<SerializableFunction<Instant, Instant>> transform);
+  protected abstract T newWith(List<SerializableFunction<Instant, Instant>> transform);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedStateCache.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedStateCache.java
index 8275c34aa7762..d9aa4d1430711 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedStateCache.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedStateCache.java
@@ -26,9 +26,9 @@
 import com.google.common.base.Predicate;
 import com.google.common.base.Throwables;
 import com.google.common.cache.LoadingCache;
-import com.google.common.collect.FluentIterable;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.protobuf.ByteString;
 
@@ -276,7 +276,11 @@ private void add(T value, Instant timestamp) {
     }
 
     private Iterable<T> getAddedItems() {
-      return FluentIterable.from(added).transform(TimestampedValue.<T>valueFunction()).toList();
+      List<T> addedItems = Lists.newArrayList();
+      for (TimestampedValue<T> item : added) {
+        addedItems.add(item.getValue());
+      }
+      return addedItems;
     }
 
     public List<T> mergeWith(List<?> wildcardValue) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
index d0c006f8cd4b0..f827b2a56bd9b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
@@ -23,7 +23,6 @@
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.coders.StandardCoder;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.common.base.Function;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
@@ -145,15 +144,6 @@ public static <T> List<Object> getInstanceComponents(TimestampedValue<T> example
     }
   }
 
-  public static <T> Function<TimestampedValue<T>, T> valueFunction() {
-    return new Function<TimestampedValue<T>, T>() {
-      @Override
-      public T apply(TimestampedValue<T> input) {
-        return input.getValue();
-      }
-    };
-  }
-
   /////////////////////////////////////////////////////////////////////////////
 
   private final V value;

From e2e6099af16d5573f4c5f9140114a9e571584340 Mon Sep 17 00:00:00 2001
From: tudorm <tudorm@google.com>
Date: Mon, 4 May 2015 14:28:41 -0700
Subject: [PATCH 0503/1541] Rollback the change that removed the
 diskSourceImage param. Used for testing.

*** Original change description ***

Remove DataflowPipelineWorkerPoolOptions.setDiskSourceImage

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92761177
---
 .../sdk/options/DataflowPipelineWorkerPoolOptions.java   | 9 +++++++++
 .../dataflow/sdk/runners/DataflowPipelineTranslator.java | 3 +++
 2 files changed, 12 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index 4ac3f54f599ef..aabc91034be96 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -25,6 +25,15 @@
  */
 @Description("Options that are used to configure the Dataflow pipeline worker pool.")
 public interface DataflowPipelineWorkerPoolOptions extends PipelineOptions {
+  /**
+   * Disk source image to use by VMs for jobs.
+   * @see <a href="https://developers.google.com/compute/docs/images">Compute Engine Images</a>
+   */
+  @Description("Disk source image to use by VMs for jobs. See "
+      + "https://developers.google.com/compute/docs/images for further details.")
+  String getDiskSourceImage();
+  void setDiskSourceImage(String value);
+
   /**
    * Number of workers to use when executing the Dataflow job.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index b84fd691f3a95..5df334c37f0e9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -407,6 +407,9 @@ public Job translate(List<DataflowPackage> packages) {
 
       workerPool.setPackages(packages);
       workerPool.setNumWorkers(options.getNumWorkers());
+      if (options.getDiskSourceImage() != null) {
+        workerPool.setDiskSourceImage(options.getDiskSourceImage());
+      }
 
       if (options.isStreaming()) {
         // Use separate data disk for streaming.

From 16ab6e3900b814f52eb6bf964d7a433d3100f80d Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 4 May 2015 20:14:16 -0700
Subject: [PATCH 0504/1541] Cleanup some of the code in Windowing javadocs.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92788216
---
 .../cloud/dataflow/sdk/transforms/windowing/FixedWindows.java | 2 +-
 .../cloud/dataflow/sdk/transforms/windowing/Sessions.java     | 2 +-
 .../dataflow/sdk/transforms/windowing/SlidingWindows.java     | 2 +-
 .../cloud/dataflow/sdk/transforms/windowing/Window.java       | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
index 8db171e7b089d..c1ac21754782b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
@@ -28,7 +28,7 @@
  * <pre> {@code
  * PCollection<Integer> items = ...;
  * PCollection<Integer> windowedItems = items.apply(
- *   Window.<Integer>by(FixedWindows.of(Duration.standardMinutes(10))));
+ *   Window.<Integer>into(FixedWindows.of(Duration.standardMinutes(10))));
  * } </pre>
  */
 @SuppressWarnings("serial")
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
index fabb71fd0e086..9dcde74dc92b7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
@@ -32,7 +32,7 @@
  * <pre> {@code
  * PCollection<Integer> pc = ...;
  * PCollection<Integer> windowed_pc = pc.apply(
- *   Window.<Integer>by(Sessions.withGapDuration(Duration.standardMinutes(10))));
+ *   Window.<Integer>into(Sessions.withGapDuration(Duration.standardMinutes(10))));
  * } </pre>
  */
 public class Sessions extends WindowFn<Object, IntervalWindow> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
index c26c15dfe55b8..ee471aa2ea5b1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
@@ -34,7 +34,7 @@
  * <pre> {@code
  * PCollection<Integer> items = ...;
  * PCollection<Integer> windowedItems = items.apply(
- *   Window.<Integer>by(SlidingWindows.of(Duration.standardMinutes(10))));
+ *   Window.<Integer>into(SlidingWindows.of(Duration.standardMinutes(10))));
  * } </pre>
  */
 @SuppressWarnings("serial")
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index d50dd7e60310b..f3bfe3fcb8eb9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -108,7 +108,7 @@
  * <pre> {@code
  * PCollection<String> items = ...;
  * PCollection<String> windowed_items = items.apply(
- *   Window.<String>into(FixedWindows.of(1, TimeUnit.MINUTES)
+ *   Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))
  *      .triggering(AfterEach.inOrder(
  *          AfterWatermark.pastEndOfWindow(),
  *          Repeatedly
@@ -125,7 +125,7 @@
  *
  * <pre> {@code
  * PCollection<String> windowed_items = items.apply(
- *   Window.<String>into(FixedWindows.of(1, TimeUnit.MINUTES)
+ *   Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))
  *      .triggering(Repeatedly
  *              .forever(AfterProcessingTime
  *                  .pastFirstElementInPane().plusDelay(Duration.standardMinutes(1)))

From 9d1ae24b9447494fe0951037b505551acde393cc Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 4 May 2015 20:38:43 -0700
Subject: [PATCH 0505/1541] Replace Guava's TypeToken with our own
 TypeDescriptor to remove Guava from public API.

This will allow using Maven shading to support multiple versions of Guava.

----Release Notes----
Any interaction with our coder inference process by way of Guava's TypeToken should now be written using our TypeDescriptor.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92789224
---
 .../cloud/dataflow/sdk/coders/AvroCoder.java  |  52 ++--
 .../dataflow/sdk/coders/CoderFactories.java   |  35 +--
 .../dataflow/sdk/coders/CoderProvider.java    |   4 +-
 .../dataflow/sdk/coders/CoderRegistry.java    |  90 +++++--
 .../sdk/coders/SerializableCoder.java         |  19 +-
 .../sdk/runners/worker/ReaderFactory.java     |   4 +-
 .../sdk/runners/worker/SinkFactory.java       |   4 +-
 .../cloud/dataflow/sdk/transforms/Create.java |   4 +-
 .../cloud/dataflow/sdk/transforms/DoFn.java   |  16 +-
 .../sdk/transforms/DoFnReflector.java         |   9 +-
 .../sdk/transforms/DoFnWithContext.java       |  18 +-
 .../IntraBundleParallelization.java           |  10 +-
 .../cloud/dataflow/sdk/transforms/ParDo.java  |  10 +-
 .../dataflow/sdk/transforms/WithKeys.java     |   4 +-
 .../cloud/dataflow/sdk/util/CoderUtils.java   |  15 ++
 .../dataflow/sdk/util/InstanceBuilder.java    |   8 +-
 .../dataflow/sdk/util/common/Counter.java     |   4 +-
 .../dataflow/sdk/values/PCollection.java      |   7 +-
 .../dataflow/sdk/values/PCollectionTuple.java |  11 +-
 .../cloud/dataflow/sdk/values/TupleTag.java   |   9 +-
 .../dataflow/sdk/values/TypeDescriptor.java   | 248 ++++++++++++++++++
 .../dataflow/sdk/values/TypedPValue.java      |  19 +-
 .../sdk/coders/CoderRegistryTest.java         |  16 +-
 .../dataflow/sdk/coders/DefaultCoderTest.java |   4 +-
 .../runners/worker/CombineValuesFnTest.java   |   4 +-
 .../sdk/util/common/ReflectHelpersTest.java   |  12 +-
 .../sdk/values/TypeDescriptorTest.java        | 127 +++++++++
 27 files changed, 580 insertions(+), 183 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypeDescriptorTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
index 2df9137aa03cc..e0cb614c81cdf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -20,7 +20,7 @@
 
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.common.reflect.TypeToken;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
@@ -118,16 +118,6 @@ public static <T> AvroCoder<T> of(Class<T> clazz) {
     return new AvroCoder<>(clazz, ReflectData.get().getSchema(clazz));
   }
 
-  /**
-   * Returns an {@code AvroCoder} instance for the provided element type token.
-   * @param <T> the element type
-   */
-  public static <T> AvroCoder<T> of(TypeToken<T> typeToken) {
-    @SuppressWarnings("unchecked")
-    Class<T> clazz = (Class<T>) typeToken.getRawType();
-    return AvroCoder.of(clazz);
-  }
-
   /**
    * Returns an {@code AvroCoder} instance for the Avro schema. The implicit
    * type is GenericRecord.
@@ -160,8 +150,14 @@ public static AvroCoder<?> of(
 
   public static final CoderProvider PROVIDER = new CoderProvider() {
     @Override
-    public <T> Coder<T> getCoder(TypeToken<T> typeToken) {
-      return AvroCoder.of(typeToken);
+    public <T> Coder<T> getCoder(TypeDescriptor<T> typeDescriptor) {
+      // This is a downcast from `? super T` to T. However, because
+      // it comes from a TypeDescriptor<T>, the class object itself
+      // is the same so the supertype in question shares the same
+      // generated AvroCoder schema.
+      @SuppressWarnings("unchecked")
+      Class<T> rawType = (Class<T>) typeDescriptor.getRawType();
+      return AvroCoder.of(rawType);
     }
   };
 
@@ -180,7 +176,7 @@ protected AvroCoder(Class<T> type, Schema schema) {
     this.schema = schema;
 
     nonDeterministicReasons = new AvroDeterminismChecker()
-        .check(TypeToken.of(type), schema);
+        .check(TypeDescriptor.of(type), schema);
     this.reader = createDatumReader();
     this.writer = createDatumWriter();
   }
@@ -295,7 +291,7 @@ protected static class AvroDeterminismChecker {
     // Types that are currently "open". Used to make sure we don't have any
     // recursive types. Note that we assume that all occurrences of a given type
     // are equal, rather than tracking pairs of type + schema.
-    private Set<TypeToken<?>> activeTypes = new HashSet<>();
+    private Set<TypeDescriptor<?>> activeTypes = new HashSet<>();
 
     // Similarly to how we record active types, we record the schemas we visit
     // to make sure we don't encounter recursive fields.
@@ -338,9 +334,9 @@ private void reportError(String context, String fmt, Object... args) {
     /**
      * Return true if the given type token is a subtype of *any* of the listed parents.
      */
-    private static boolean isSubtypeOf(TypeToken<?> type, Class<?>... parents) {
+    private static boolean isSubtypeOf(TypeDescriptor<?> type, Class<?>... parents) {
       for (Class<?> parent : parents) {
-        if (TypeToken.of(parent).isAssignableFrom(type)) {
+        if (type.isSubtypeOf(TypeDescriptor.of(parent))) {
           return true;
         }
       }
@@ -350,14 +346,14 @@ private static boolean isSubtypeOf(TypeToken<?> type, Class<?>... parents) {
     protected AvroDeterminismChecker() {}
 
     // The entry point for the check. Should not be recursively called.
-    public List<String> check(TypeToken<?> type, Schema schema) {
+    public List<String> check(TypeDescriptor<?> type, Schema schema) {
       recurse(type.getRawType().getName(), type, schema);
       return reasons;
     }
 
     // This is the method that should be recursively called. It sets up the path
     // and visited types correctly.
-    private void recurse(String context, TypeToken<?> type, Schema schema) {
+    private void recurse(String context, TypeDescriptor<?> type, Schema schema) {
       if (type.getRawType().isAnnotationPresent(AvroSchema.class)) {
         reportError(context, "Custom schemas are not supported -- remove @AvroSchema.");
         return;
@@ -380,7 +376,7 @@ private void recurse(String context, TypeToken<?> type, Schema schema) {
       activeTypes.remove(type);
     }
 
-    private void doCheck(String context, TypeToken<?> type, Schema schema) {
+    private void doCheck(String context, TypeDescriptor<?> type, Schema schema) {
       switch (schema.getType()) {
         case ARRAY:
           checkArray(context, type, schema);
@@ -424,7 +420,7 @@ private void doCheck(String context, TypeToken<?> type, Schema schema) {
       }
     }
 
-    private void checkString(String context, TypeToken<?> type) {
+    private void checkString(String context, TypeDescriptor<?> type) {
       // For types that are encoded as strings, we need to make sure they're in an approved
       // whitelist. For other types that are annotated @Stringable, Avro will just use the
       // #toString() methods, which has no guarantees of determinism.
@@ -433,7 +429,7 @@ private void checkString(String context, TypeToken<?> type) {
       }
     }
 
-    private void checkUnion(String context, TypeToken<?> type, Schema schema) {
+    private void checkUnion(String context, TypeDescriptor<?> type, Schema schema) {
       if (!type.getRawType().isAnnotationPresent(Union.class)) {
         reportError(context, "Expected type %s to have @Union annotation", type);
         return;
@@ -445,13 +441,13 @@ private void checkUnion(String context, TypeToken<?> type, Schema schema) {
       // For a union, we need to make sure that each possible instantiation is deterministic.
       for (Schema concrete : schema.getTypes()) {
         @SuppressWarnings("unchecked")
-        TypeToken<?> unionType = TypeToken.of(ReflectData.get().getClass(concrete));
+        TypeDescriptor<?> unionType = TypeDescriptor.of(ReflectData.get().getClass(concrete));
 
         recurse(baseClassContext, unionType, concrete);
       }
     }
 
-    private void checkRecord(String context, TypeToken<?> type, Schema schema) {
+    private void checkRecord(String context, TypeDescriptor<?> type, Schema schema) {
       // For a record, we want to make sure that all the fields are deterministic.
       Class<?> clazz = type.getRawType();
       for (org.apache.avro.Schema.Field fieldSchema : schema.getFields()) {
@@ -473,7 +469,7 @@ private void checkRecord(String context, TypeToken<?> type, Schema schema) {
           continue;
         }
 
-        TypeToken<?> fieldType = type.resolveType(field.getGenericType());
+        TypeDescriptor<?> fieldType = type.resolveType(field.getGenericType());
         recurse(fieldContext, fieldType, fieldSchema.schema());
       }
     }
@@ -556,7 +552,7 @@ private void checkIndexedRecord(String context, Schema schema,
       activeSchemas.remove(schema);
     }
 
-    private void checkMap(String context, TypeToken<?> type, Schema schema) {
+    private void checkMap(String context, TypeDescriptor<?> type, Schema schema) {
       if (!isSubtypeOf(type, SortedMap.class)) {
         reportError(context, "%s may not be deterministically ordered", type);
       }
@@ -573,8 +569,8 @@ private void checkMap(String context, TypeToken<?> type, Schema schema) {
           schema.getValueType());
     }
 
-    private void checkArray(String context, TypeToken<?> type, Schema schema) {
-      TypeToken<?> elementType = null;
+    private void checkArray(String context, TypeDescriptor<?> type, Schema schema) {
+      TypeDescriptor<?> elementType = null;
       if (type.isArray()) {
         // The type is an array (with ordering)-> deterministic iff the element is deterministic.
         elementType = type.getComponentType();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactories.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactories.java
index 01c1cdc1daced..1939baadff26f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactories.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactories.java
@@ -16,8 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
-import com.google.common.reflect.Invokable;
-import com.google.common.reflect.TypeToken;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
@@ -70,11 +69,7 @@ private CoderFactories() { } // Static utility class
    * will produce a {@code Coder<List<X>>} for any {@code Coder Coder<X>}.
    */
   public static <T> CoderFactory fromStaticMethods(Class<T> clazz) {
-    return fromStaticMethods(TypeToken.of(clazz));
-  }
-
-  public static <T> CoderFactory fromStaticMethods(TypeToken<T> typeToken) {
-    return new CoderFactoryFromStaticMethods(typeToken);
+    return new CoderFactoryFromStaticMethods(clazz);
   }
 
   /**
@@ -146,9 +141,9 @@ public List<Object> getInstanceComponents(Object value) {
      * Returns a CoderFactory that invokes the given static factory method
      * to create the Coder.
      */
-    private CoderFactoryFromStaticMethods(TypeToken<?> coderType) {
-      this.factoryMethod = getFactoryMethod(coderType.getRawType());
-      this.getComponentsMethod = getInstanceComponentsMethod(coderType);
+    private CoderFactoryFromStaticMethods(Class<?> coderClazz) {
+      this.factoryMethod = getFactoryMethod(coderClazz);
+      this.getComponentsMethod = getInstanceComponentsMethod(coderClazz);
     }
 
     /**
@@ -207,8 +202,9 @@ private Method getFactoryMethod(Class<?> coderClazz) {
      * each corresponding to an argument of the {@code of}
      * method.
      */
-    private <T> Method getInstanceComponentsMethod(TypeToken<?> coderType) {
-      TypeToken<T> argumentType = getCodedType(coderType);
+    private <T> Method getInstanceComponentsMethod(Class<?> coderClazz) {
+      TypeDescriptor<?> coderType = TypeDescriptor.of(coderClazz);
+      TypeDescriptor<T> argumentType = getCodedType(coderType);
 
       // getInstanceComponents may be implemented in a superclass,
       // so we search them all for an applicable method. We do not
@@ -217,14 +213,10 @@ private <T> Method getInstanceComponentsMethod(TypeToken<?> coderType) {
       // However, subtypes are listed before supertypes (it is a
       // topological ordering) so probably the best one will be chosen
       // if there are more than one (which should be rare)
-      for (TypeToken<?> supertype : coderType.getTypes().classes()) {
+      for (TypeDescriptor<?> supertype : coderType.getClasses()) {
         for (Method method : supertype.getRawType().getDeclaredMethods()) {
           if (method.getName().equals("getInstanceComponents")) {
-            Invokable<?, ?> typedMethod = supertype.method(method);
-            TypeToken<?> formalArgumentType = supertype.resolveType(
-                typedMethod.getParameters().get(0)
-                    .getType() // A TypeToken
-                    .getType()); // A Type
+            TypeDescriptor<?> formalArgumentType = supertype.getArgumentTypes(method).get(0);
             if (formalArgumentType.getRawType().isAssignableFrom(argumentType.getRawType())) {
               return method;
             }
@@ -242,12 +234,13 @@ private <T> Method getInstanceComponentsMethod(TypeToken<?> coderType) {
      * If {@code coderType} is a subclass of {@link Coder<T>} for a fixed T,
      * returns {@code T.class}. Otherwise, raises IllegalArgumentException
      */
-    private <T> TypeToken<T> getCodedType(TypeToken<?> coderType) {
-      for (TypeToken<?> ifaceType : coderType.getTypes().interfaces()) {
+    private <T> TypeDescriptor<T> getCodedType(TypeDescriptor<?> coderType) {
+      for (TypeDescriptor<?> ifaceType : coderType.getInterfaces()) {
         if (ifaceType.getRawType().equals(Coder.class)) {
           ParameterizedType coderIface = (ParameterizedType) ifaceType.getType();
           @SuppressWarnings("unchecked")
-          TypeToken<T> token = (TypeToken<T>) TypeToken.of(coderIface.getActualTypeArguments()[0]);
+          TypeDescriptor<T> token =
+              (TypeDescriptor<T>) TypeDescriptor.of(coderIface.getActualTypeArguments()[0]);
           return token;
         }
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProvider.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProvider.java
index 05071c1e5b65c..25543f85826a8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProvider.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProvider.java
@@ -16,7 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
-import com.google.common.reflect.TypeToken;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 /**
  * A {@code CoderProvider} may create a {@link Coder} for
@@ -29,5 +29,5 @@ public interface CoderProvider {
    *
    * @throws CannotProvideCoderException if no coder can be provided
    */
-  public <T> Coder<T> getCoder(TypeToken<T> type) throws CannotProvideCoderException;
+  public <T> Coder<T> getCoder(TypeDescriptor<T> type) throws CannotProvideCoderException;
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 187a4ee943f19..aeb189792e0a0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -18,12 +18,13 @@
 
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Maps;
-import com.google.common.reflect.TypeToken;
 
 import org.joda.time.Instant;
 import org.slf4j.Logger;
@@ -136,16 +137,18 @@ public void registerCoder(Class<?> rawClazz, Coder<?> coder) {
    *
    * @throws CannotProvideCoderException if there is no default Coder.
    */
-  public <T> Coder<T> getDefaultCoder(TypeToken<T> typeToken) throws CannotProvideCoderException {
-    return getDefaultCoder(typeToken, Collections.<Type, Coder<?>>emptyMap());
+  public <T> Coder<T> getDefaultCoder(TypeDescriptor<T> typeDescriptor)
+      throws CannotProvideCoderException {
+    return getDefaultCoder(typeDescriptor, Collections.<Type, Coder<?>>emptyMap());
   }
 
   /**
-   * See {@link #getDefaultCoder(TypeToken)}.
+   * See {@link #getDefaultCoder(TypeDescriptor)}.
    */
   @Override
-  public <T> Coder<T> getCoder(TypeToken<T> typeToken) throws CannotProvideCoderException {
-    return getDefaultCoder(typeToken);
+  public <T> Coder<T> getCoder(TypeDescriptor<T> typeDescriptor)
+      throws CannotProvideCoderException {
+    return getDefaultCoder(typeDescriptor);
   }
 
   /**
@@ -155,12 +158,12 @@ public <T> Coder<T> getCoder(TypeToken<T> typeToken) throws CannotProvideCoderEx
    * @throws CannotProvideCoderException if there is no default Coder.
    */
   public <InputT, OutputT> Coder<OutputT> getDefaultCoder(
-      TypeToken<OutputT> typeToken,
-      TypeToken<InputT> contextTypeToken,
+      TypeDescriptor<OutputT> typeDescriptor,
+      TypeDescriptor<InputT> contextTypeDescriptor,
       Coder<InputT> contextCoder)
       throws CannotProvideCoderException {
-    return getDefaultCoder(typeToken,
-                           getTypeToCoderBindings(contextTypeToken.getType(), contextCoder));
+    return getDefaultCoder(typeDescriptor,
+                           getTypeToCoderBindings(contextTypeDescriptor.getType(), contextCoder));
   }
 
   /**
@@ -359,7 +362,7 @@ public <T> Coder<?>[] getDefaultCoders(
       Class<? extends T> subClass,
       Class<T> baseClass,
       Coder<?>[] knownCoders) {
-    Type type = TypeToken.of(subClass).getSupertype(baseClass).getType();
+    Type type = TypeDescriptor.of(subClass).getSupertype(baseClass).getType();
     if (!(type instanceof ParameterizedType)) {
       throw new IllegalArgumentException(type + " is not a ParameterizedType");
     }
@@ -404,38 +407,67 @@ public <T> Coder<?>[] getDefaultCoders(
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * Returns whether the given coder can possibly encode elements
+   * Returns {@code true} if the given coder can possibly encode elements
    * of the given type.
    */
-  static boolean isCompatible(Coder<?> coder, Type type) {
-    Type coderType =
-        ((ParameterizedType)
-            TypeToken.of(coder.getClass()).getSupertype(Coder.class).getType())
-        .getActualTypeArguments()[0];
-    if (type instanceof TypeVariable) {
-      return true; // Can't rule it out.
+  static <T, CoderT extends Coder<T>, CandidateT> boolean
+      isCompatible(CoderT coder, Type candidateType) {
+
+    // Various representations of the coder's class
+    @SuppressWarnings("unchecked")
+    Class<CoderT> coderClass = (Class<CoderT>) coder.getClass();
+    TypeDescriptor<CoderT> coderDescriptor = TypeDescriptor.of(coderClass);
+
+    // Various representations of the actual coded type
+    @SuppressWarnings("unchecked")
+    TypeDescriptor<T> codedDescriptor = CoderUtils.getCodedType(coderDescriptor);
+    @SuppressWarnings("unchecked")
+    Class<T> codedClass = (Class<T>) codedDescriptor.getRawType();
+    Type codedType = codedDescriptor.getType();
+
+    // Various representations of the candidate type
+    @SuppressWarnings("unchecked")
+    TypeDescriptor<CandidateT> candidateDescriptor =
+        (TypeDescriptor<CandidateT>) TypeDescriptor.<CandidateT>of(candidateType);
+    @SuppressWarnings("unchecked")
+    Class<CandidateT> candidateClass = (Class<CandidateT>) candidateDescriptor.getRawType();
+
+    // If coder has type Coder<T> where the actual value of T is lost
+    // to erasure, then we cannot rule it out.
+    if (candidateType instanceof TypeVariable) {
+      return true;
     }
-    Class<?> coderClass = TypeToken.of(coderType).getRawType();
-    if (!coderClass.isAssignableFrom(TypeToken.of(type).getRawType())) {
+
+    // If the raw types are not compatible, we can certainly rule out
+    // coder compatibility
+    if (!codedClass.isAssignableFrom(candidateClass)) {
       return false;
     }
-    if (coderType instanceof ParameterizedType
-        && !isNullOrEmpty(coder.getCoderArguments())) {
+    // we have established that this is a covariant upcast... though
+    // coders are invariant, we are just checking one direction
+    @SuppressWarnings("unchecked")
+    TypeDescriptor<T> candidateOkDescriptor = (TypeDescriptor<T>) candidateDescriptor;
+
+    // If the coded type is a parameterized type where any of the actual
+    // type parameters are not compatible, then the whole thing is certainly not
+    // compatible.
+    if ((codedType instanceof ParameterizedType) && !isNullOrEmpty(coder.getCoderArguments())) {
       @SuppressWarnings("unchecked")
       Type[] typeArguments =
           ((ParameterizedType)
-           TypeToken.of(type).getSupertype((Class) coderClass).getType())
+           candidateOkDescriptor.getSupertype(codedClass).getType())
           .getActualTypeArguments();
       List<? extends Coder<?>> typeArgumentCoders = coder.getCoderArguments();
       assert typeArguments.length == typeArgumentCoders.size();
       for (int i = 0; i < typeArguments.length; i++) {
         if (!isCompatible(
                 typeArgumentCoders.get(i),
-                TypeToken.of(type).resolveType(typeArguments[i]).getType())) {
+                candidateDescriptor.resolveType(typeArguments[i]).getType())) {
           return false;
         }
       }
     }
+
     return true; // For all we can tell.
   }
 
@@ -477,13 +509,13 @@ CoderFactory getDefaultCoderFactory(Class<?> clazz) throws CannotProvideCoderExc
    * @throws CannotProvideCoderException if a coder cannot be provided
    */
   <T> Coder<T> getDefaultCoder(
-      TypeToken<T> typeToken,
+      TypeDescriptor<T> typeDescriptor,
       Map<Type, Coder<?>> typeCoderBindings)
       throws CannotProvideCoderException {
 
-    Coder<?> defaultCoder = getDefaultCoder(typeToken.getType(),
+    Coder<?> defaultCoder = getDefaultCoder(typeDescriptor.getType(),
                                             typeCoderBindings);
-    LOG.debug("Default Coder for {}: {}", typeToken, defaultCoder);
+    LOG.debug("Default Coder for {}: {}", typeDescriptor, defaultCoder);
     @SuppressWarnings("unchecked")
     Coder<T> result = (Coder<T>) defaultCoder;
     return result;
@@ -559,7 +591,7 @@ <T> Coder<T> getDefaultCoder(Class<T> clazz) throws CannotProvideCoderException
 
     if (getFallbackCoderProvider() != null) {
       try {
-        return getFallbackCoderProvider().getCoder(TypeToken.of(clazz));
+        return getFallbackCoderProvider().getCoder(TypeDescriptor.<T>of(clazz));
       } catch (CannotProvideCoderException exc) {
         throw new CannotProvideCoderException(
             "Cannot provide coder for class " + clazz + " because "
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
index abbd51660cc1d..0a5c0791cda74 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
@@ -17,7 +17,7 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.common.reflect.TypeToken;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
@@ -59,16 +59,6 @@ public static <T extends Serializable> SerializableCoder<T> of(Class<T> clazz) {
     return new SerializableCoder<>(clazz);
   }
 
-  /**
-   * Returns a {@code SerializableCoder} instance for the provided element type token.
-   * @param <T> the element type
-   */
-  public static <T extends Serializable> SerializableCoder<T> of(TypeToken<T> typeToken) {
-    @SuppressWarnings("unchecked")
-    Class<T> clazz = (Class<T>) typeToken.getRawType();
-    return SerializableCoder.of(clazz);
-  }
-
   @JsonCreator
   @SuppressWarnings("unchecked")
   public static SerializableCoder<?> of(@JsonProperty("type") String classType)
@@ -87,8 +77,9 @@ public static SerializableCoder<?> of(@JsonProperty("type") String classType)
    */
   public static final CoderProvider PROVIDER = new CoderProvider() {
     @Override
-    public <T> Coder<T> getCoder(TypeToken<T> typeToken) throws CannotProvideCoderException {
-      Class<?> clazz = typeToken.getRawType();
+    public <T> Coder<T> getCoder(TypeDescriptor<T> typeDescriptor)
+        throws CannotProvideCoderException {
+      Class<?> clazz = typeDescriptor.getRawType();
       if (Serializable.class.isAssignableFrom(clazz)) {
         @SuppressWarnings("unchecked")
         Class<? extends Serializable> serializableClazz =
@@ -98,7 +89,7 @@ public <T> Coder<T> getCoder(TypeToken<T> typeToken) throws CannotProvideCoderEx
         return coder;
       } else {
         throw new CannotProvideCoderException(
-            "Cannot provide SerializableCoder because " + typeToken
+            "Cannot provide SerializableCoder because " + typeDescriptor
             + " does not implement Serializable");
       }
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
index 4c2a3338be518..c96f8fc7d40c6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
@@ -25,7 +25,7 @@
 import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
 import com.google.cloud.dataflow.sdk.util.Serializer;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
-import com.google.common.reflect.TypeToken;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -98,7 +98,7 @@ public static <T> Reader<T> create(@Nullable PipelineOptions options, Source clo
     }
 
     try {
-      return InstanceBuilder.ofType(new TypeToken<Reader<T>>() {
+      return InstanceBuilder.ofType(new TypeDescriptor<Reader<T>>() {
           private static final long serialVersionUID = 0;
       })
           .fromClassName(sourceFactoryClassName)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
index f708cacbae5f0..787c50e02e146 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
@@ -23,7 +23,7 @@
 import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
 import com.google.cloud.dataflow.sdk.util.Serializer;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-import com.google.common.reflect.TypeToken;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -82,7 +82,7 @@ public static <T> Sink<T> create(
     }
 
     try {
-      return InstanceBuilder.ofType(new TypeToken<Sink<T>>() {})
+      return InstanceBuilder.ofType(new TypeDescriptor<Sink<T>>() {})
           .fromClassName(className)
           .fromFactoryMethod("create")
           .withArg(PipelineOptions.class, options)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index 2517b150d2d9a..7c58256e32475 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -33,7 +33,7 @@
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue.TimestampedValueCoder;
-import com.google.common.reflect.TypeToken;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 import org.joda.time.Instant;
 
@@ -286,7 +286,7 @@ private Coder<?> getElementCoder(CoderRegistry coderRegistry) throws CannotProvi
 
     if (elementClazz.getTypeParameters().length == 0) {
       try {
-        return coderRegistry.getDefaultCoder(TypeToken.of(elementClazz));
+        return coderRegistry.getDefaultCoder(TypeDescriptor.of(elementClazz));
       } catch (CannotProvideCoderException exc) {
         // let the next stage try
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index b1e62f5deb1e3..f213e5f88f43d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -29,7 +29,7 @@
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.reflect.TypeToken;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -384,18 +384,18 @@ public void finishBundle(Context c) throws Exception {
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * Returns a {@link TypeToken} capturing what is known statically
+   * Returns a {@link TypeDescriptor} capturing what is known statically
    * about the input type of this {@code DoFn} instance's most-derived
    * class.
    *
-   * <p> See {@link #getOutputTypeToken} for more discussion.
+   * <p> See {@link #getOutputTypeDescriptor} for more discussion.
    */
-  protected TypeToken<InputT> getInputTypeToken() {
-    return new TypeToken<InputT>(getClass()) {};
+  protected TypeDescriptor<InputT> getInputTypeDescriptor() {
+    return new TypeDescriptor<InputT>(getClass()) {};
   }
 
   /**
-   * Returns a {@link TypeToken} capturing what is known statically
+   * Returns a {@link TypeDescriptor} capturing what is known statically
    * about the output type of this {@code DoFn} instance's
    * most-derived class.
    *
@@ -405,8 +405,8 @@ protected TypeToken<InputT> getInputTypeToken() {
    * for choosing a default output {@code Coder<OutputT>} for the output
    * {@code PCollection<OutputT>}.
    */
-  protected TypeToken<OutputT> getOutputTypeToken() {
-    return new TypeToken<OutputT>(getClass()) {};
+  protected TypeDescriptor<OutputT> getOutputTypeDescriptor() {
+    return new TypeDescriptor<OutputT>(getClass()) {};
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
index 5ead4822e73d3..974bf0bc5cfdd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
@@ -28,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.util.common.ReflectHelpers;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Function;
 import com.google.common.base.Throwables;
@@ -678,13 +679,13 @@ public void processElement(DoFn<InputT, OutputT>.ProcessContext c) throws Except
     }
 
     @Override
-    protected TypeToken<InputT> getInputTypeToken() {
-      return fn.getInputTypeToken();
+    protected TypeDescriptor<InputT> getInputTypeDescriptor() {
+      return fn.getInputTypeDescriptor();
     }
 
     @Override
-    protected TypeToken<OutputT> getOutputTypeToken() {
-      return fn.getOutputTypeToken();
+    protected TypeDescriptor<OutputT> getOutputTypeDescriptor() {
+      return fn.getOutputTypeDescriptor();
     }
 
     private void readObject(java.io.ObjectInputStream in)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
index d71f069e505d7..59da718c5ea62 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
@@ -28,7 +28,7 @@
 import com.google.cloud.dataflow.sdk.util.WindowingInternals;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.reflect.TypeToken;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -214,7 +214,7 @@ public abstract class ProcessContext extends Context {
    * duration that timestamps can be shifted backward in
    * {@link DoFnWithContext.Context#outputWithTimestamp}.
    *
-   * The default value is {@code Duration.ZERO}, in which case
+   * <p>The default value is {@code Duration.ZERO}, in which case
    * timestamps can only be shifted forward to future.  For infinite
    * skew, return {@code Duration.millis(Long.MAX_VALUE)}.
    */
@@ -227,18 +227,18 @@ public Duration getAllowedTimestampSkew() {
   Map<String, DelegatingAggregator<?, ?>> aggregators = new HashMap<>();
 
   /**
-   * Returns a {@link TypeToken} capturing what is known statically
+   * Returns a {@link TypeDescriptor} capturing what is known statically
    * about the input type of this {@code DoFnWithContext} instance's most-derived
    * class.
    *
-   * <p> See {@link #getOutputTypeToken} for more discussion.
+   * <p> See {@link #getOutputTypeDescriptor} for more discussion.
    */
-  protected TypeToken<InputT> getInputTypeToken() {
-    return new TypeToken<InputT>(getClass()) {};
+  protected TypeDescriptor<InputT> getInputTypeDescriptor() {
+    return new TypeDescriptor<InputT>(getClass()) {};
   }
 
   /**
-   * Returns a {@link TypeToken} capturing what is known statically
+   * Returns a {@link TypeDescriptor} capturing what is known statically
    * about the output type of this {@code DoFnWithContext} instance's
    * most-derived class.
    *
@@ -248,8 +248,8 @@ protected TypeToken<InputT> getInputTypeToken() {
    * for choosing a default output {@code Coder<O>} for the output
    * {@code PCollection<O>}.
    */
-  protected TypeToken<OutputT> getOutputTypeToken() {
-    return new TypeToken<OutputT>(getClass()) {};
+  protected TypeDescriptor<OutputT> getOutputTypeDescriptor() {
+    return new TypeDescriptor<OutputT>(getClass()) {};
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
index 5296d6bbe0dbf..7b7967d6aaa61 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
@@ -25,8 +25,8 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 import com.google.common.base.Preconditions;
-import com.google.common.reflect.TypeToken;
 
 import org.joda.time.Instant;
 
@@ -240,13 +240,13 @@ public void finishBundle(Context c) throws Exception {
     }
 
     @Override
-    protected TypeToken<InputT> getInputTypeToken() {
-      return doFn.getInputTypeToken();
+    protected TypeDescriptor<InputT> getInputTypeDescriptor() {
+      return doFn.getInputTypeDescriptor();
     }
 
     @Override
-    protected TypeToken<OutputT> getOutputTypeToken() {
-      return doFn.getOutputTypeToken();
+    protected TypeDescriptor<OutputT> getOutputTypeDescriptor() {
+      return doFn.getOutputTypeDescriptor();
     }
 
     /////////////////////////////////////////////////////////////////////////////
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 718afb8503c7d..149df8b065e9f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -749,7 +749,7 @@ public PCollection<OutputT> apply(PCollection<? extends InputT> input) {
       return PCollection.<OutputT>createPrimitiveOutputInternal(
               input.getPipeline(),
               input.getWindowingStrategy())
-          .setTypeTokenInternal(fn.getOutputTypeToken());
+          .setTypeDescriptorInternal(fn.getOutputTypeDescriptor());
     }
 
     @Override
@@ -757,8 +757,8 @@ public PCollection<OutputT> apply(PCollection<? extends InputT> input) {
     protected Coder<OutputT> getDefaultOutputCoder(PCollection<? extends InputT> input)
         throws CannotProvideCoderException {
       return input.getPipeline().getCoderRegistry().getDefaultCoder(
-          fn.getOutputTypeToken(),
-          fn.getInputTypeToken(),
+          fn.getOutputTypeDescriptor(),
+          fn.getInputTypeDescriptor(),
           ((PCollection<InputT>) input).getCoder());
     }
 
@@ -972,8 +972,8 @@ public PCollectionTuple apply(PCollection<? extends InputT> input) {
 
       // The fn will likely be an instance of an anonymous subclass
       // such as DoFn<Integer, String> { }, thus will have a high-fidelity
-      // TypeToken for the output type.
-      outputs.get(mainOutputTag).setTypeTokenInternal(fn.getOutputTypeToken());
+      // TypeDescriptor for the output type.
+      outputs.get(mainOutputTag).setTypeDescriptorInternal(fn.getOutputTypeDescriptor());
 
       return outputs;
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
index ce1d95ee9fc91..a5c5b538c5124 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
@@ -22,7 +22,7 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.reflect.TypeToken;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 /**
  * {@code WithKeys<K, V>} takes a {@code PCollection<V>}, and either a
@@ -111,7 +111,7 @@ public void processElement(ProcessContext c) {
       if (keyClass == null) {
         keyCoder = coderRegistry.getDefaultOutputCoder(fn, in.getCoder());
       } else {
-        keyCoder = coderRegistry.getDefaultCoder(TypeToken.of(keyClass));
+        keyCoder = coderRegistry.getDefaultCoder(TypeDescriptor.of(keyClass));
       }
       // TODO: Remove when we can set the coder inference context.
       result.setCoder(KvCoder.of(keyCoder, in.getCoder()));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
index fc8aef70e9b9d..e51f302472593 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoderBase;
 import com.google.cloud.dataflow.sdk.coders.MapCoder;
 import com.google.cloud.dataflow.sdk.coders.MapCoderBase;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 import com.fasterxml.jackson.annotation.JsonTypeInfo;
 import com.fasterxml.jackson.annotation.JsonTypeInfo.As;
@@ -41,6 +42,7 @@
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.lang.ref.SoftReference;
+import java.lang.reflect.ParameterizedType;
 import java.lang.reflect.TypeVariable;
 
 /**
@@ -144,6 +146,18 @@ public static <T> T decodeFromBase64(Coder<T> coder, String encodedValue) {
     }
   }
 
+  /**
+   * If {@code coderType} is a subclass of {@link Coder<T>} for a fixed T,
+   * returns {@code T.class}.
+   */
+  @SuppressWarnings({"rawtypes", "unchecked"})
+  public static TypeDescriptor getCodedType(TypeDescriptor coderDescriptor) {
+    ParameterizedType coderType =
+        (ParameterizedType) coderDescriptor.getSupertype(Coder.class).getType();
+    TypeDescriptor codedType = TypeDescriptor.of(coderType.getActualTypeArguments()[0]);
+    return codedType;
+  }
+
   public static CloudObject makeCloudEncoding(
       String type,
       CloudObject... componentSpecs) {
@@ -184,6 +198,7 @@ public JavaType typeFromId(String id) {
         if (clazz == MapCoder.class) {
           clazz = MapCoderBase.class;
         }
+        @SuppressWarnings("rawtypes")
         TypeVariable[] tvs = clazz.getTypeParameters();
         JavaType[] types = new JavaType[tvs.length];
         for (int lupe = 0; lupe < tvs.length; lupe++) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
index 7ab451ce8e8d4..e64503e9d53b6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
@@ -17,7 +17,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.api.client.util.Preconditions;
-import com.google.common.reflect.TypeToken;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
@@ -53,11 +53,11 @@ public static <T> InstanceBuilder<T> ofType(Class<T> type) {
    * typically the common base type or interface for the instance to be
    * constructed.
    * <p>
-   * The TypeToken argument allows specification of generic types.  For example,
+   * The TypeDescriptor argument allows specification of generic types.  For example,
    * a {@code List<String>} return type can be specified as
-   * {@code ofType(new TypeToken<List<String>>(){})}.
+   * {@code ofType(new TypeDescriptor<List<String>>(){})}.
    */
-  public static <T> InstanceBuilder<T> ofType(TypeToken<T> token) {
+  public static <T> InstanceBuilder<T> ofType(TypeDescriptor<T> token) {
     @SuppressWarnings("unchecked")
     Class<T> type = (Class<T>) token.getRawType();
     return new InstanceBuilder<>(type);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
index 0d4826d73d69c..a9179bf92c83d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
@@ -20,7 +20,7 @@
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.OR;
 
-import com.google.common.reflect.TypeToken;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 import com.google.common.util.concurrent.AtomicDouble;
 
 import java.util.Objects;
@@ -222,7 +222,7 @@ public AggregationKind getKind() {
    * Returns the counter's type.
    */
   public Class<?> getType() {
-    return new TypeToken<T>(getClass()) {
+    return new TypeDescriptor<T>(getClass()) {
       private static final long serialVersionUID = 0;
     }.getRawType();
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
index a4401454d869a..ace602748b0d6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
@@ -21,7 +21,6 @@
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.common.reflect.TypeToken;
 
 /**
  * A {@code PCollection<T>} is an immutable collection of values of type
@@ -151,14 +150,14 @@ private PCollection(Pipeline p) {
   }
 
   /**
-   * Sets the {@code TypeToken<T>} for this {@code PCollection<T>}, so that
+   * Sets the {@code TypeDescriptor<T>} for this {@code PCollection<T>}, so that
    * the enclosing {@code PCollectionTuple}, {@code PCollectionList<T>},
    * or {@code PTransform<?, PCollection<T>>}, etc., can provide
    * more detailed reflective information.
    */
   @Override
-  public PCollection<T> setTypeTokenInternal(TypeToken<T> typeToken) {
-    super.setTypeTokenInternal(typeToken);
+  public PCollection<T> setTypeDescriptorInternal(TypeDescriptor<T> typeDescriptor) {
+    super.setTypeDescriptorInternal(typeDescriptor);
     return this;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
index c9743b8b8cfab..5df86fb15827e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
@@ -20,7 +20,6 @@
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.common.collect.ImmutableMap;
-import com.google.common.reflect.TypeToken;
 
 import java.util.Collection;
 import java.util.Collections;
@@ -109,9 +108,9 @@ public <T> PCollectionTuple and(TupleTag<T> tag, PCollection<T> pc) {
           "PCollections come from different Pipelines");
     }
 
-    // The TypeToken<T> in tag will often have good
+    // The TypeDescriptor<T> in tag will often have good
     // reflective information about T
-    pc.setTypeTokenInternal(tag.getTypeToken());
+    pc.setTypeDescriptorInternal(tag.getTypeDescriptor());
     return new PCollectionTuple(pipeline,
         new ImmutableMap.Builder<TupleTag<?>, PCollection<?>>()
             .putAll(pcollectionMap)
@@ -194,16 +193,16 @@ public static PCollectionTuple ofPrimitiveOutputsInternal(
       }
 
       // In fact, `token` and `outputCollection` should have
-      // types TypeToken<T> and PCollection<T> for some
+      // types TypeDescriptor<T> and PCollection<T> for some
       // unknown T. It is safe to create `outputCollection`
       // with type PCollection<Object> because it has the same
       // erasure as the correct type. When a transform adds
       // elements to `outputCollection` they will be of type T.
       @SuppressWarnings("unchecked")
-      TypeToken<Object> token = (TypeToken<Object>) outputTag.getTypeToken();
+      TypeDescriptor<Object> token = (TypeDescriptor<Object>) outputTag.getTypeDescriptor();
       PCollection<Object> outputCollection = PCollection
           .createPrimitiveOutputInternal(pipeline, windowingStrategy)
-          .setTypeTokenInternal(token);
+          .setTypeDescriptorInternal(token);
 
       pcollectionMap.put(outputTag, outputCollection);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
index fa354acf869a4..f69fe350d1b5c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
@@ -23,7 +23,6 @@
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.common.collect.HashMultiset;
 import com.google.common.collect.Multiset;
-import com.google.common.reflect.TypeToken;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
@@ -110,7 +109,7 @@ public String getOutName(int outIndex) {
   }
 
   /**
-   * Returns a {@code TypeToken} capturing what is known statically
+   * Returns a {@code TypeDescriptor} capturing what is known statically
    * about the type of this {@code TupleTag} instance's most-derived
    * class.
    *
@@ -118,10 +117,8 @@ public String getOutName(int outIndex) {
    * instance of an anonymous subclass with a trailing {@code {}},
    * e.g., {@code new TupleTag<SomeType>(){}}.
    */
-  public TypeToken<V> getTypeToken() {
-    return new TypeToken<V>(getClass()) {
-      private static final long serialVersionUID = 0;
-    };
+  public TypeDescriptor<V> getTypeDescriptor() {
+    return new TypeDescriptor<V>(getClass()) {};
   }
 
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
new file mode 100644
index 0000000000000..1cdf53ff3eb64
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import com.google.common.collect.Lists;
+import com.google.common.reflect.Invokable;
+import com.google.common.reflect.Parameter;
+import com.google.common.reflect.TypeToken;
+
+import java.lang.reflect.Method;
+import java.lang.reflect.Type;
+import java.util.List;
+
+/**
+ * A description of a Java type, including actual generic parameters where possible.
+ *
+ * <p> To prevent losing actual type arguments due to erasure, create an anonymous subclass
+ * with concrete types:
+ * <pre>
+ * {@code
+ * TypeDecriptor<List<String>> = new TypeDescriptor<List<String>>() {};
+ * }
+ * </pre>
+ *
+ * <p>If the above were not an anonymous subclass, the type {@code List<String>}
+ * would be erased and unavailable at run time.
+ *
+ * @param <T> the type represented by this {@link TypeDescriptor}
+ */
+@SuppressWarnings("serial")
+public abstract class TypeDescriptor<T> {
+
+  // This class is just a wrapper for TypeToken
+  private final TypeToken<T> token;
+
+  /**
+   * Creates a TypeDescriptor wrapping the provided token.
+   * This constructor is private so Guava types do not leak.
+   */
+  private TypeDescriptor(TypeToken<T> token) {
+    this.token = token;
+  }
+
+  /**
+   * Creates a {@link TypeDescriptor} representing
+   * the type parameter {@code T}. To use this constructor
+   * properly, the type parameter must be a concrete type, for example
+   * {@code new TypeDescriptor<List<String>>(){}}.
+   */
+  protected TypeDescriptor() {
+    token = new TypeToken<T>(getClass()) {};
+  }
+
+  /**
+   * Creates a {@link TypeDescriptor} representing the type parameter
+   * {@code T}, which should resolve to a concrete type in the context
+   * of the class {@code clazz}.
+   */
+  @SuppressWarnings("unchecked")
+  protected TypeDescriptor(Class<?> clazz) {
+    TypeToken<T> unresolvedToken = new TypeToken<T>(getClass()){};
+    token = (TypeToken<T>) TypeToken.of(clazz).resolveType(unresolvedToken.getType());
+  }
+
+  /**
+   * Returns a {@link TypeDescriptor} representing the given type.
+   */
+  public static <T> TypeDescriptor<T> of(Class<T> type) {
+    return new SimpleTypeDescriptor<>(TypeToken.<T>of(type));
+  }
+
+  /**
+   * Returns a {@link TypeDescriptor} representing the given type.
+   */
+  @SuppressWarnings("unchecked")
+  public static TypeDescriptor<?> of(Type type) {
+    return new SimpleTypeDescriptor<>((TypeToken<Object>) TypeToken.of(type));
+  }
+
+  /**
+   * Returns the {@code Type} represented by this {@link TypeDescriptor}.
+   */
+  public Type getType() {
+    return token.getType();
+  }
+
+  /**
+   * Returns the {@code Class} underlying the {@code Type} represented by
+   * this {@link TypeDescriptor}.
+   */
+  public Class<? super T> getRawType() {
+    return token.getRawType();
+  }
+
+  /**
+   * Returns the component type if this type is an array type,
+   * otherwise returns {@code null}.
+   */
+  public TypeDescriptor<?> getComponentType() {
+    return new SimpleTypeDescriptor<>(token.getComponentType());
+  }
+
+  /**
+   * Returns the generic form of a supertype.
+   */
+  public final TypeDescriptor<? super T> getSupertype(Class<? super T> superclass) {
+    return new SimpleTypeDescriptor<>(token.getSupertype(superclass));
+  }
+
+  /**
+   * Returns true if this type is known to be an array type.
+   */
+  public final boolean isArray() {
+    return token.isArray();
+  }
+
+  /**
+   * Returns a new {@link TypeDescriptor} with the provided binding
+   * for the type parameters.
+   */
+
+  /**
+   * Returns true if this type is assignable from the given type.
+   */
+  public final boolean isSupertypeOf(TypeDescriptor<?> source) {
+    return token.isAssignableFrom(source.token);
+  }
+
+  /**
+   * Return true if this type is a subtype of the given type.
+   */
+  public final boolean isSubtypeOf(TypeDescriptor<?> parent) {
+    return parent.token.isAssignableFrom(token);
+  }
+
+  /**
+   * Returns a list of argument types for the given method, which must
+   * be a part of the class.
+   */
+  public List<TypeDescriptor<?>> getArgumentTypes(Method method) {
+    Invokable<?, ?> typedMethod = token.method(method);
+
+    List<TypeDescriptor<?>> argTypes = Lists.newArrayList();
+    for (Parameter parameter : typedMethod.getParameters()) {
+      argTypes.add(new SimpleTypeDescriptor<>(parameter.getType()));
+    }
+    return argTypes;
+  }
+
+  /**
+   * Returns a {@code TypeDescriptor} representing the given
+   * type, with type variables resolved according to the specialization
+   * in this type.
+   *
+   * <p>For example, consider the following class:
+   * <pre>
+   * {@code
+   * class MyList implements List<String> { ... }
+   * }
+   * </pre>
+   *
+   * <p>The {@link TypeDescriptor} returned by
+   * <pre>
+   * {@code
+   * TypeDescriptor.of(MyList.class)
+   *     .resolveType(Mylist.class.getMethod("get", int.class).getGenericReturnType)
+   * }
+   * </pre>
+   * will represent the type {@code String}.
+   */
+  public TypeDescriptor<?> resolveType(Type type) {
+    return new SimpleTypeDescriptor<>(token.resolveType(type));
+  }
+
+  /**
+   * Returns a set of {@link TypeDescriptor}s, one for each
+   * interface implemented by this class.
+   */
+  @SuppressWarnings("rawtypes")
+  public Iterable<TypeDescriptor> getInterfaces() {
+    List<TypeDescriptor> interfaces = Lists.newArrayList();
+    for (TypeToken<?> interfaceToken : token.getTypes().interfaces()) {
+      interfaces.add(new SimpleTypeDescriptor<>(interfaceToken));
+    }
+    return interfaces;
+  }
+
+  /**
+   * Returns a set of {@link TypeDescriptor}s, one for each
+   * superclass (including this class).
+   */
+  @SuppressWarnings("rawtypes")
+  public Iterable<TypeDescriptor> getClasses() {
+    List<TypeDescriptor> classes = Lists.newArrayList();
+    for (TypeToken<?> classToken : token.getTypes().classes()) {
+      classes.add(new SimpleTypeDescriptor<>(classToken));
+    }
+    return classes;
+  }
+
+  public String toString() {
+    return token.toString();
+  }
+
+  /**
+   * Two type descriptor are equal if and only if they
+   * represent the same type.
+   */
+  @Override
+  public boolean equals(Object other) {
+    if (!(other instanceof TypeDescriptor)) {
+      return false;
+    } else {
+      @SuppressWarnings("unchecked")
+      TypeDescriptor<?> descriptor = (TypeDescriptor<?>) other;
+      return token.equals(descriptor.token);
+    }
+  }
+
+  @Override
+  public int hashCode() {
+    return token.hashCode();
+  }
+
+  /**
+   * A non-abstract {@link TypeDescriptor} for construction directly from an existing
+   * {@link TypeToken}.
+   */
+  private static final class SimpleTypeDescriptor<T> extends TypeDescriptor<T> {
+    SimpleTypeDescriptor(TypeToken<T> typeToken) {
+      super(typeToken);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
index 2b8788dab60a9..456fd28f55f55 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
@@ -21,7 +21,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.common.reflect.TypeToken;
 
 /**
  * A {@code TypedPValue<T>} is the abstract base class of things that
@@ -103,32 +102,32 @@ protected TypedPValue(Pipeline p) {
     super(p);
   }
 
-  private TypeToken<T> typeToken;
+  private TypeDescriptor<T> typeDescriptor;
 
   /**
-   * Returns a {@code TypeToken<T>} with some reflective information
+   * Returns a {@code TypeDescriptor<T>} with some reflective information
    * about {@code T}, if possible. May return {@code null} if no information
    * is available. Subclasses may override this to enable better
    * {@code Coder} inference.
    */
-  public TypeToken<T> getTypeToken() {
-    return typeToken;
+  public TypeDescriptor<T> getTypeDescriptor() {
+    return typeDescriptor;
   }
 
   /**
-   * Sets the {@code TypeToken<T>} associated with this class. Better
+   * Sets the {@code TypeDescriptor<T>} associated with this class. Better
    * reflective type information will lead to better {@code Coder}
    * inference.
    */
-  public TypedPValue<T> setTypeTokenInternal(TypeToken<T> typeToken) {
-    this.typeToken = typeToken;
+  public TypedPValue<T> setTypeDescriptorInternal(TypeDescriptor<T> typeDescriptor) {
+    this.typeDescriptor = typeDescriptor;
     return this;
   }
 
   /**
    * If the coder is not explicitly set, this sets the coder for
    * this {@code TypedPValue<T>} to the best coder that can be inferred
-   * based upon the known {@code TypeToken<T>}. By default, this is null,
+   * based upon the known {@code TypeDescriptor<T>}. By default, this is null,
    * but can and should be improved by subclasses.
    */
   @SuppressWarnings({"unchecked", "rawtypes"})
@@ -137,7 +136,7 @@ private Coder<T> inferCoderOrFail() throws CannotProvideCoderException {
       return coder;
     }
 
-    TypeToken<T> token = getTypeToken();
+    TypeDescriptor<T> token = getTypeDescriptor();
     CoderRegistry registry = getPipeline().getCoderRegistry();
 
     try {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index 9290c1cfb921f..aa45028bec82a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -24,7 +24,7 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.reflect.TypeToken;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 import org.junit.Rule;
 import org.junit.Test;
@@ -108,13 +108,13 @@ public void testSimpleUnknownDefaultCoder() throws Exception {
   @Test
   public void testParameterizedDefaultCoder() throws Exception {
     CoderRegistry registry = getStandardRegistry();
-    TypeToken<List<Integer>> listToken = new TypeToken<List<Integer>>() {};
+    TypeDescriptor<List<Integer>> listToken = new TypeDescriptor<List<Integer>>() {};
     assertEquals(ListCoder.of(VarIntCoder.of()),
                  registry.getDefaultCoder(listToken));
 
     registry.registerCoder(MyValue.class, MyValueCoder.class);
-    TypeToken<KV<String, List<MyValue>>> kvToken =
-        new TypeToken<KV<String, List<MyValue>>>() {};
+    TypeDescriptor<KV<String, List<MyValue>>> kvToken =
+        new TypeDescriptor<KV<String, List<MyValue>>>() {};
     assertEquals(KvCoder.of(StringUtf8Coder.of(),
                             ListCoder.of(MyValueCoder.of())),
                  registry.getDefaultCoder(kvToken));
@@ -124,8 +124,8 @@ public void testParameterizedDefaultCoder() throws Exception {
   @Test
   public void testParameterizedDefaultCoderUnknown() throws Exception {
     CoderRegistry registry = getStandardRegistry();
-    TypeToken<List<UnknownType>> listUnknownToken =
-        new TypeToken<List<UnknownType>>() {};
+    TypeDescriptor<List<UnknownType>> listUnknownToken =
+        new TypeDescriptor<List<UnknownType>>() {};
 
     thrown.expect(CannotProvideCoderException.class);
     registry.getDefaultCoder(listUnknownToken);
@@ -233,10 +233,10 @@ public void testTypeCompatibility() throws Exception {
         ListCoder.of(BigEndianIntegerCoder.of()), Integer.class));
     assertTrue(CoderRegistry.isCompatible(
         ListCoder.of(BigEndianIntegerCoder.of()),
-        new TypeToken<List<Integer>>() {}.getType()));
+        new TypeDescriptor<List<Integer>>() {}.getType()));
     assertFalse(CoderRegistry.isCompatible(
         ListCoder.of(BigEndianIntegerCoder.of()),
-        new TypeToken<List<String>>() {}.getType()));
+        new TypeDescriptor<List<String>>() {}.getType()));
   }
 
   static class MyGenericClass<FooT, BazT> { }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
index 07a6d48b61fb8..28f872ed02c4a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
@@ -17,7 +17,7 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import com.google.api.client.util.Preconditions;
-import com.google.common.reflect.TypeToken;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 import org.hamcrest.Matchers;
 import org.junit.Assert;
@@ -93,7 +93,7 @@ public void testUnknown() throws Exception {
   private void checkDefault(Class<?> valueType,
       Class<?> expectedCoder) throws Exception {
     CoderRegistry registry = new CoderRegistry();
-    Coder<?> coder = registry.getDefaultCoder(TypeToken.of(valueType));
+    Coder<?> coder = registry.getDefaultCoder(TypeDescriptor.of(valueType));
     Assert.assertThat(coder, Matchers.instanceOf(expectedCoder));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
index 038311d2484b8..98b3073e8d1fe 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
@@ -39,7 +39,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
 import com.google.cloud.dataflow.sdk.util.common.worker.Receiver;
 import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.reflect.TypeToken;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -329,7 +329,7 @@ public void testCombineValuesFnCoders() throws Exception {
     MeanInts.CountSum countSum = meanInts.new CountSum(6, 27);
 
     Coder<MeanInts.CountSum> coder = meanInts.getAccumulatorCoder(
-        registry, registry.getDefaultCoder(TypeToken.of(Integer.class)));
+        registry, registry.getDefaultCoder(TypeDescriptor.of(Integer.class)));
 
     assertEquals(
         countSum,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java
index 32146a4296e9c..002ffbe7fe71b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java
@@ -17,7 +17,7 @@
 
 import static org.junit.Assert.assertEquals;
 
-import com.google.common.reflect.TypeToken;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -101,17 +101,17 @@ public void testTypeFormatterOnArrays() throws Exception {
   public void testTypeFormatterWithGenerics() throws Exception {
     assertEquals("Map<Integer, String>",
         ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(
-            new TypeToken<Map<Integer, String>>() {
+            new TypeDescriptor<Map<Integer, String>>() {
               private static final long serialVersionUID = 0;
             }.getType()));
     assertEquals("Map<?, String>",
         ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(
-            new TypeToken<Map<?, String>>() {
+            new TypeDescriptor<Map<?, String>>() {
               private static final long serialVersionUID = 0;
             }.getType()));
     assertEquals("Map<? extends Integer, String>",
         ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(
-            new TypeToken<Map<? extends Integer, String>>() {
+            new TypeDescriptor<Map<? extends Integer, String>>() {
               private static final long serialVersionUID = 0;
             }.getType()));
   }
@@ -120,7 +120,7 @@ public void testTypeFormatterWithGenerics() throws Exception {
   public <T> void testTypeFormatterWithWildcards() throws Exception {
     assertEquals("Map<T, T>",
         ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(
-            new TypeToken<Map<T, T>>() {
+            new TypeDescriptor<Map<T, T>>() {
               private static final long serialVersionUID = 0;
             }.getType()));
   }
@@ -129,7 +129,7 @@ public <T> void testTypeFormatterWithWildcards() throws Exception {
   public <InputT, OutputT> void testTypeFormatterWithMultipleWildcards() throws Exception {
     assertEquals("Map<? super InputT, ? extends OutputT>",
         ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(
-            new TypeToken<Map<? super InputT, ? extends OutputT>>() {
+            new TypeDescriptor<Map<? super InputT, ? extends OutputT>>() {
               private static final long serialVersionUID = 0;
             }.getType()));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypeDescriptorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypeDescriptorTest.java
new file mode 100644
index 0000000000000..778739bcf2e5b
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypeDescriptorTest.java
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.common.reflect.TypeToken;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.lang.reflect.Method;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Tests for TypeDescriptor.
+ */
+@RunWith(JUnit4.class)
+@SuppressWarnings("serial")
+public class TypeDescriptorTest {
+
+  @Test
+  public void testTypeDescriptorOfRawType() throws Exception {
+    assertEquals(
+        TypeToken.of(String.class).getRawType(),
+        TypeDescriptor.of(String.class).getRawType());
+  }
+
+  @Test
+  public void testTypeDescriptorImmediate() throws Exception {
+    TypeDescriptor<String> descriptor = new TypeDescriptor<String>(){};
+    assertEquals(String.class, descriptor.getRawType());
+  }
+
+  @Test
+  public void testTypeDescriptorGeneric() throws Exception {
+    TypeDescriptor<List<String>> descriptor = new TypeDescriptor<List<String>>(){};
+    TypeToken<List<String>> token = new TypeToken<List<String>>(){};
+    assertEquals(token.getType(), descriptor.getType());
+  }
+
+  private static class TypeRememberer<T> {
+    public TypeToken<T> token;
+    public TypeDescriptor<T> descriptor;
+
+    public TypeRememberer() {
+      token = new TypeToken<T>(getClass()){};
+      descriptor = new TypeDescriptor<T>(getClass()){};
+    }
+  }
+
+  @Test
+  public void testTypeDescriptorNested() throws Exception {
+    TypeRememberer<String> rememberer = new TypeRememberer<String>(){};
+    assertEquals(rememberer.token.getType(), rememberer.descriptor.getType());
+
+    TypeRememberer<List<String>> genericRememberer = new TypeRememberer<List<String>>(){};
+    assertEquals(genericRememberer.token.getType(), genericRememberer.descriptor.getType());
+  }
+
+  private static class Id<T> {
+    public T identity(T thingie) {
+      return thingie;
+    }
+  }
+
+
+  @Test
+  public void testGetArgumentTypes() throws Exception {
+    Method identity = Id.class.getDeclaredMethod("identity", Object.class);
+
+    TypeToken<Id<String>> token = new TypeToken <Id<String>>(){};
+    TypeDescriptor<Id<String>> descriptor = new TypeDescriptor <Id<String>>(){};
+    assertEquals(
+        token.method(identity).getParameters().get(0).getType().getType(),
+        descriptor.getArgumentTypes(identity).get(0).getType());
+
+    TypeToken<Id<List<String>>> genericToken = new TypeToken <Id<List<String>>>(){};
+    TypeDescriptor<Id<List<String>>> genericDescriptor = new TypeDescriptor <Id<List<String>>>(){};
+    assertEquals(
+        genericToken.method(identity).getParameters().get(0).getType().getType(),
+        genericDescriptor.getArgumentTypes(identity).get(0).getType());
+  }
+
+  private static class TypeRemembererer<T1, T2> {
+    public TypeToken<T1> token1;
+    public TypeToken<T2> token2;
+
+    public TypeDescriptor<T1> descriptor1;
+    public TypeDescriptor<T2> descriptor2;
+
+    public TypeRemembererer() {
+      token1 = new TypeToken<T1>(getClass()){};
+      token2 = new TypeToken<T2>(getClass()){};
+      descriptor1 = new TypeDescriptor<T1>(getClass()){};
+      descriptor2 = new TypeDescriptor<T2>(getClass()){};
+    }
+  }
+
+  @Test
+  public void testTypeDescriptorNested2() throws Exception {
+    TypeRemembererer<String, Integer> remembererer = new TypeRemembererer<String, Integer>(){};
+    assertEquals(remembererer.token1.getType(), remembererer.descriptor1.getType());
+    assertEquals(remembererer.token2.getType(), remembererer.descriptor2.getType());
+
+    TypeRemembererer<List<String>, Set<Integer>> genericRemembererer =
+        new TypeRemembererer<List<String>, Set<Integer>>(){};
+    assertEquals(genericRemembererer.token1.getType(), genericRemembererer.descriptor1.getType());
+    assertEquals(genericRemembererer.token2.getType(), genericRemembererer.descriptor2.getType());
+  }
+}

From d68f10efa60ddbfb4c9b802d4cd31aa4bb360dc8 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 5 May 2015 08:02:00 -0700
Subject: [PATCH 0506/1541] Directly raise AssertionError from TestPipeline.
 Use JUnit and Hamcrest directly in DataflowAssert.

----Release Notes----
Previously, an assertion failure from use of DataflowAssert manifested as a RuntimeError where the AssertionError was deeply wrapped. Now, if TestPipeline is used, the AssertionError is thrown directly, for ease of writing tests expecting the exception.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92824042
---
 .../dataflow/sdk/testing/DataflowAssert.java  | 145 ++++++------------
 .../dataflow/sdk/testing/TestPipeline.java    |  18 +++
 .../sdk/testing/DataflowAssertTest.java       |  34 +++-
 3 files changed, 93 insertions(+), 104 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 7f16028107c4c..065fca9ee0e36 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -16,6 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.testing;
 
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertThat;
+
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
@@ -38,8 +43,6 @@
 import com.google.common.collect.Lists;
 
 import java.io.Serializable;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
@@ -442,105 +445,36 @@ public void processElement(ProcessContext c) {
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * A {@link SerializableFunction} that performs an
-   * {@code Assert.assertThat()} operation using a
-   * {@code Matcher} operation.
-   *
-   * <p> The {@code MatcherFactory} should take an {@code ExpectedT} and
-   * produce a Matcher to be used to check an {@code ActualT} value
-   * against.
+   * A {@link SerializableFunction} that verifies that an actual value is equal to an
+   * expected value.
    */
   @SuppressWarnings("serial")
-  public static class AssertThat<ActualT, ExpectedT>
-      implements SerializableFunction<ActualT, Void> {
-    final ExpectedT expected;
-    final Class<?> expectedClass;
-    final String matcherClassName;
-    final String matcherFactoryMethodName;
-
-    AssertThat(ExpectedT expected,
-               Class<?> expectedClass,
-               String matcherClassName,
-               String matcherFactoryMethodName) {
+  private static class AssertIsEqualTo<T> implements SerializableFunction<T, Void> {
+    private T expected;
+
+    public AssertIsEqualTo(T expected) {
       this.expected = expected;
-      this.expectedClass = expectedClass;
-      this.matcherClassName = matcherClassName;
-      this.matcherFactoryMethodName = matcherFactoryMethodName;
     }
 
     @Override
-    public Void apply(ActualT in) {
-      try {
-        Method matcherFactoryMethod = Class.forName(this.matcherClassName)
-            .getMethod(this.matcherFactoryMethodName, expectedClass);
-        Object matcher = matcherFactoryMethod.invoke(null, (Object) expected);
-        Method assertThatMethod = Class.forName("org.junit.Assert")
-            .getMethod("assertThat",
-                       Object.class,
-                       Class.forName("org.hamcrest.Matcher"));
-        assertThatMethod.invoke(null, in, matcher);
-      } catch (InvocationTargetException e) {
-        // An error in the assertThat or matcher itself.
-        throw new RuntimeException(e);
-      } catch (ReflectiveOperationException e) {
-        // An error looking up the classes and methods.
-        throw new RuntimeException(
-            "DataflowAssert requires that JUnit and Hamcrest be linked in.",
-            e);
-      }
+    public Void apply(T actual) {
+      assertThat(actual, equalTo(expected));
       return null;
     }
   }
 
   /**
-   * An {@link AssertThat} taking a single element.
+   * A {@link SerializableFunction} that verifies that an {@code Iterable} contains
+   * expected items in any order.
    */
   @SuppressWarnings("serial")
-  private static class AssertThatValue<T> extends AssertThat<T, T> {
-    AssertThatValue(T expected,
-                    String matcherClassName,
-                    String matcherFactoryMethodName) {
-      super(expected, Object.class,
-            matcherClassName, matcherFactoryMethodName);
-    }
-  }
+  private static class AssertContainsInAnyOrder<T>
+      implements SerializableFunction<Iterable<T>, Void> {
 
-  /**
-   * An {@link AssertThatValue} that verifies that an actual value is equal to an
-   * expected value.
-   */
-  @SuppressWarnings("serial")
-  private static class AssertIsEqualTo<T> extends AssertThatValue<T> {
-    public AssertIsEqualTo(T expected) {
-      super(expected, "org.hamcrest.core.IsEqual", "equalTo");
-    }
-  }
+    private T[] expected;
 
-  /**
-   * An {@link AssertThat} that operates on an {@code Iterable}. The
-   * underlying matcher takes a {@code T[]} of expected values, for
-   * compatibility with the corresponding Hamcrest {@code Matcher}s.
-   */
-  @SuppressWarnings("serial")
-  private static class AssertThatIterable<T> extends AssertThat<Iterable<T>, T[]> {
-    AssertThatIterable(T[] expected,
-                       String matcherClassName,
-                       String matcherFactoryMethodName) {
-      super(expected, Object[].class,
-            matcherClassName, matcherFactoryMethodName);
-    }
-  }
-
-  /**
-   * An {@link AssertThatIterable} that verifies that an {@code Iterable} contains
-   * expected items in any order.
-   */
-  @SuppressWarnings("serial")
-  private static class AssertContainsInAnyOrder<T> extends AssertThatIterable<T> {
     public AssertContainsInAnyOrder(T... expected) {
-      super(expected,
-            "org.hamcrest.collection.IsIterableContainingInAnyOrder",
-            "containsInAnyOrder");
+      this.expected = expected;
     }
 
     @SuppressWarnings("unchecked")
@@ -552,18 +486,24 @@ public AssertContainsInAnyOrder(Collection<T> expected) {
     public AssertContainsInAnyOrder(Iterable<T> expected) {
       this(Lists.newArrayList(expected));
     }
+
+    @Override
+    public Void apply(Iterable<T> actual) {
+      assertThat(actual, containsInAnyOrder(expected));
+      return null;
+    }
   }
 
   /**
-   * An {@link AssertThatIterable} that verifies that an {@code Iterable} contains
+   * A {@link SerializableFunction} that verifies that an {@code Iterable} contains
    * the expected items in the provided order.
    */
   @SuppressWarnings("serial")
-  private static class AssertContainsInOrder<T> extends AssertThatIterable<T> {
+  private static class AssertContainsInOrder<T> implements SerializableFunction<Iterable<T>, Void> {
+    private T[] expected;
+
     public AssertContainsInOrder(T... expected) {
-      super(expected,
-            "org.hamcrest.collection.IsIterableContainingInOrder",
-            "contains");
+      this.expected = expected;
     }
 
     @SuppressWarnings("unchecked")
@@ -575,35 +515,42 @@ public AssertContainsInOrder(Collection<T> expected) {
     public AssertContainsInOrder(Iterable<T> expected) {
       this(Lists.newArrayList(expected));
     }
+
+    @Override
+    public Void apply(Iterable<T> actual) {
+      assertThat(actual, contains(expected));
+      return null;
+    }
   }
 
   ////////////////////////////////////////////////////////////
 
   /**
-   * A serializable function implementing a binary predicate
-   * between types {@code ActualT} and {@code ExpectedT}.
+   * A binary predicate between types {@code Actual} and {@code Expected}.
+   * Implemented as a method {@code assertFor(Expected)} which returns
+   * a {@link SerializableFunction SerializableFunction<Actual, Void>}
+   * that should verify the assertion..
    */
   public static interface AssertRelation<ActualT, ExpectedT> extends Serializable {
     public SerializableFunction<ActualT, Void> assertFor(ExpectedT input);
   }
 
   /**
-   * An {@link AssertRelation} implementing the binary predicate
-   * that two objects are equal.
+   * An {@link AssertRelation} implementing the binary predicate that two objects are equal.
    */
   private static class AssertIsEqualToRelation<T>
       implements AssertRelation<T, T> {
     private static final long serialVersionUID = 0;
 
     @Override
-    public AssertThat<T, T> assertFor(T expected) {
+    public SerializableFunction<T, Void> assertFor(T expected) {
       return new AssertIsEqualTo<T>(expected);
     }
   }
 
   /**
-   * An {@code AssertRelation} implementing the binary predicate
-   * that two collections are equal modulo reordering.
+   * An {@code AssertRelation} implementing the binary predicate that two collections are equal
+   * modulo reordering.
    */
   private static class AssertContainsInAnyOrderRelation<T>
       implements AssertRelation<Iterable<T>, Iterable<T>> {
@@ -616,8 +563,8 @@ public SerializableFunction<Iterable<T>, Void> assertFor(Iterable<T> expectedEle
   }
 
   /**
-   * A {@code AssertRelation} implementating the binary function
-   * that two iterables have equal contents, in the same order.
+   * A {@code AssertRelation} implementating the binary function that two iterables have equal
+   * contents, in the same order.
    */
   private static class AssertContainsInOrderRelation<T>
       implements AssertRelation<Iterable<T>, Iterable<T>> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
index fdd72724510a9..25d39290782ae 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
@@ -90,6 +90,24 @@ private TestPipeline(PipelineRunner<? extends PipelineResult> runner, PipelineOp
     super(runner, options);
   }
 
+  /**
+   * Runs this {@link TestPipeline}, unwrapping any {@code AssertionError}
+   * that is raised during testing.
+   */
+  @Override
+  public PipelineResult run() {
+    try {
+      return super.run();
+    } catch (RuntimeException exc) {
+      Throwable cause = exc.getCause();
+      if (cause instanceof AssertionError) {
+        throw (AssertionError) cause;
+      } else {
+        throw exc;
+      }
+    }
+  }
+
   /**
    * Creates and returns a TestDataflowPipelineRunner based on
    * configuration via system properties.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
index 0e6557bbae869..09e8f25f76e58 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
@@ -16,9 +16,14 @@
 
 package com.google.cloud.dataflow.sdk.testing;
 
+import static org.hamcrest.core.StringContains.containsString;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
+
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
@@ -178,10 +183,6 @@ public void testContainsInAnyOrder() throws Exception {
   @Test
   @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
   public void testContainsInAnyOrderFalse() throws Exception {
-    // The actual AssertionError is deep in the stack
-    // TODO: dig it out
-    thrown.expect(RuntimeException.class);
-
     Pipeline pipeline = TestPipeline.create();
 
     PCollection<Integer> pcollection = pipeline
@@ -189,6 +190,29 @@ public void testContainsInAnyOrderFalse() throws Exception {
 
     DataflowAssert.that(pcollection).containsInAnyOrder(2, 1, 4, 3, 7);
 
-    pipeline.run();
+    // Even though this test will succeed or fail adequately whether local or on the service,
+    // it results in a different exception depending on the runner.
+    if (pipeline.getRunner() instanceof DirectPipelineRunner) {
+      // We cannot use thrown.expect(AssertionError.class) because the AssertionError
+      // is first caught by JUnit and causes a test failure.
+      try {
+        pipeline.run();
+      } catch (AssertionError exc) {
+        assertThat(exc.getMessage(),
+            containsString("Expected: iterable over [<4>, <7>, <3>, <2>, <1>] in any order"));
+        return;
+      }
+    } else if (pipeline.getRunner() instanceof TestDataflowPipelineRunner) {
+      // Separately, if this is run on the service, then the TestDataflowPipelineRunner throws
+      // an IllegalStateException with a basic message.
+      try {
+        pipeline.run();
+      } catch (IllegalStateException exc) {
+        assertThat(exc.getMessage(),
+            containsString("The dataflow failed."));
+        return;
+      }
+    }
+    fail("assertion should have failed");
   }
 }

From 57073735515d9b247a767a8decafe16419b48e81 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 5 May 2015 09:41:45 -0700
Subject: [PATCH 0507/1541] Move CoderPropertiesTest to the proper directory.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92831874
---
 .../dataflow/sdk/{coders => testing}/CoderPropertiesTest.java     | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/{coders => testing}/CoderPropertiesTest.java (100%)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/CoderPropertiesTest.java
similarity index 100%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderPropertiesTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/CoderPropertiesTest.java

From d1bf9a7f727d44cb2a3ba37e0d0c6c2f957ef9b0 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Tue, 5 May 2015 11:12:18 -0700
Subject: [PATCH 0508/1541] Adds a simple thread dump status page to the
 streaming worker

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92841450
---
 .../runners/worker/StreamingDataflowWorker.java    | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 22fb64832e1fc..92f6cead10169 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -583,6 +583,8 @@ public void handle(
 
       if (target.equals("/healthz")) {
         responseWriter.println("ok");
+      } else if (target.equals("/threadz")) {
+        printThreads(responseWriter);
       } else {
         printHeader(responseWriter);
         printMetrics(responseWriter);
@@ -646,4 +648,16 @@ private void printLastException(PrintWriter response) {
       response.println(writer.toString().replace("\t", "&nbsp&nbsp").replace("\n", "<br>"));
     }
   }
+
+  private void printThreads(PrintWriter response) {
+    Map<Thread, StackTraceElement[]> stacks = Thread.getAllStackTraces();
+    for (Map.Entry<Thread,  StackTraceElement[]> entry : stacks.entrySet()) {
+      Thread thread = entry.getKey();
+      response.println("Thread: " + thread + " State: " + thread.getState() + "<br>");
+      for (StackTraceElement element : entry.getValue()) {
+        response.println("&nbsp&nbsp" + element + "<br>");
+      }
+      response.println("<br>");
+    }
+  }
 }

From 449d8ee2610ae324787b6c3dd47805043c30ec74 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Tue, 5 May 2015 11:18:49 -0700
Subject: [PATCH 0509/1541] Fix race condition that can cause side inputs to be
 null.  This change ensures that side inputs are pinned in the WorkItem-local
 cache for the entire duration of the WorkItem

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92842231
---
 .../cloud/dataflow/sdk/util/StateFetcher.java |  10 +-
 .../util/StreamingModeExecutionContext.java   |  27 +++-
 .../util/StreamingSideInputDoFnRunner.java    | 129 ++++++++++++------
 .../StreamingSideInputDoFnRunnerTest.java     |  18 ++-
 4 files changed, 125 insertions(+), 59 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
index 74c8825c45c0f..3d185fd0b7012 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
@@ -203,7 +203,7 @@ private <T> List<T> decodeTagList(CodedTupleTag<T> tag, Windmill.TagList tagList
    * Indicates the caller's knowledge of whether a particular side input has been computed.
    */
   public enum SideInputState {
-    KNOWN_READY, UNKNOWN;
+    CACHED_IN_WORKITEM, KNOWN_READY, UNKNOWN;
   }
 
   /**
@@ -279,13 +279,7 @@ public SideInputCacheEntry call() throws Exception {
             }
           }
 
-          T result = (T) sideInputCache.get(id, fetchCallable).value;
-          if (result == null) {
-            throw new IllegalStateException(
-                "Side input fetch unexpectedly returned null. Tag: "
-                + view.getTagInternal().getId());
-          }
-          return result;
+          return (T) sideInputCache.get(id, fetchCallable).value;
         } else {
           return (T) entry.value;
         }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index 463bcd1e3aebb..eee0995e21662 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -101,15 +101,15 @@ public <T> T getSideInput(
           "ParDo.withSideInputs()?");
     }
 
-    return fetchSideInput(view, mainInputWindow, SideInputState.KNOWN_READY);
+    return fetchSideInput(view, mainInputWindow, SideInputState.CACHED_IN_WORKITEM);
   }
 
   /**
    * Fetch the given side input asynchronously and return true if it is present.
    */
   public boolean issueSideInputFetch(
-      PCollectionView<?> view, BoundedWindow mainInputWindow) {
-    return fetchSideInput(view, mainInputWindow, SideInputState.UNKNOWN) != null;
+      PCollectionView<?> view, BoundedWindow mainInputWindow, SideInputState state) {
+    return fetchSideInput(view, mainInputWindow, state) != null;
   }
 
   /**
@@ -130,6 +130,11 @@ private <T> T fetchSideInput(
     @SuppressWarnings("unchecked")
     T sideInput = (T) tagCache.get(sideInputWindow);
     if (sideInput == null) {
+      if (state == SideInputState.CACHED_IN_WORKITEM) {
+        throw new IllegalStateException(
+            "Expected side input to be cached. Tag: "
+            + view.getTagInternal().getId());
+      }
       T typed = (T) stateFetcher.fetchSideInput(view, sideInputWindow, state);
       sideInput = typed;
       if (sideInput != null) {
@@ -173,10 +178,20 @@ public Iterable<Windmill.GlobalDataId> getSideInputNotifications() {
     return work.getGlobalDataIdNotificationsList();
   }
 
-  public void setBlockingSideInputs(Iterable<Windmill.GlobalDataRequest> sideInputs) {
+  /**
+   * Note that there is data on the current key that is blocked on the given side input.
+   */
+  public void addBlockingSideInput(Windmill.GlobalDataRequest sideInput) {
+    outputBuilder.addGlobalDataRequests(sideInput);
+    outputBuilder.addGlobalDataIdRequests(sideInput.getDataId());
+  }
+
+  /**
+   * Note that there is data on the current key that is blocked on the given side inputs.
+   */
+  public void addBlockingSideInputs(Iterable<Windmill.GlobalDataRequest> sideInputs) {
     for (Windmill.GlobalDataRequest sideInput : sideInputs) {
-      outputBuilder.addGlobalDataRequests(sideInput);
-      outputBuilder.addGlobalDataIdRequests(sideInput.getDataId());
+      addBlockingSideInput(sideInput);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
index e72b3d5cc4ee3..b22ab606c0323 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
@@ -24,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
+import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -88,30 +89,49 @@ public StreamingSideInputDoFnRunner(
     }
   }
 
-  @Override
-  public void startBundle() {
-    super.startBundle();
-
+  /**
+   * Computes the set of main input windows for which all side inputs are ready and cached.
+   */
+  private Map<W, CodedTupleTag<WindowedValue<InputT>>> getReadyWindowTags() {
     Map<W, CodedTupleTag<WindowedValue<InputT>>> readyWindowTags = new HashMap<>();
 
     for (Windmill.GlobalDataId id : execContext.getSideInputNotifications()) {
-      PCollectionView<?> view = sideInputViews.get(id.getTag());
-      if (view == null) {
+      if (sideInputViews.get(id.getTag()) == null) {
         // Side input is for a different DoFn; ignore it.
         continue;
       }
 
       for (Map.Entry<W, Set<Windmill.GlobalDataRequest>> entry : blockedMap.entrySet()) {
+        Set<Windmill.GlobalDataRequest> windowBlockedSet = entry.getValue();
         Set<Windmill.GlobalDataRequest> found = new HashSet<>();
-        for (Windmill.GlobalDataRequest request : entry.getValue()) {
+        for (Windmill.GlobalDataRequest request : windowBlockedSet) {
           if (id.equals(request.getDataId())) {
             found.add(request);
           }
         }
-        entry.getValue().removeAll(found);
-        if (entry.getValue().isEmpty()) {
+
+        windowBlockedSet.removeAll(found);
+
+        if (windowBlockedSet.isEmpty()) {
+          // Notifications were received for all side inputs for this window.
+          // Issue fetches for all the needed side inputs to make sure they are all present
+          // in the local cache.  If not, note the side inputs as still being blocked.
           try {
-            readyWindowTags.put(entry.getKey(), getElemListTag(entry.getKey()));
+            W window = entry.getKey();
+            boolean allSideInputsCached = true;
+            for (PCollectionView<?> view : sideInputViews.values()) {
+              if (!execContext.issueSideInputFetch(
+                  view, window, SideInputState.KNOWN_READY)) {
+                Windmill.GlobalDataRequest request = buildGlobalDataRequest(view, window);
+                execContext.addBlockingSideInput(request);
+                windowBlockedSet.add(request);
+                allSideInputsCached = false;
+              }
+            }
+
+            if (allSideInputsCached) {
+              readyWindowTags.put(window, getElemListTag(window));
+            }
           } catch (IOException e) {
             throw Throwables.propagate(e);
           }
@@ -119,6 +139,17 @@ public void startBundle() {
       }
     }
 
+    return readyWindowTags;
+  }
+
+  @Override
+  public void startBundle() {
+    super.startBundle();
+
+    // Find the set of ready windows.
+    Map<W, CodedTupleTag<WindowedValue<InputT>>> readyWindowTags = getReadyWindowTags();
+
+    // Fetch the elements for each of the ready windows.
     Map<CodedTupleTag<WindowedValue<InputT>>, Iterable<WindowedValue<InputT>>> elementsPerWindow;
     try {
       elementsPerWindow = stepContext.readTagLists(readyWindowTags.values());
@@ -126,6 +157,7 @@ public void startBundle() {
       throw Throwables.propagate(e);
     }
 
+    // Run the DoFn code now that all side inputs are ready.
     for (Map.Entry<W, CodedTupleTag<WindowedValue<InputT>>> entry : readyWindowTags.entrySet()) {
       blockedMap.remove(entry.getKey());
 
@@ -144,43 +176,32 @@ public void startBundle() {
     }
   }
 
+  /**
+   * Compute the set of side inputs that are not yet ready for the given main input window.
+   */
+  private Set<Windmill.GlobalDataRequest> computeBlockedSideInputs(W window) throws IOException {
+    Set<Windmill.GlobalDataRequest> blocked = blockedMap.get(window);
+    if (blocked == null) {
+      for (PCollectionView<?> view : sideInputViews.values()) {
+        if (!execContext.issueSideInputFetch(view, window, SideInputState.UNKNOWN)) {
+          if (blocked == null) {
+            blocked = new HashSet<>();
+            blockedMap.put(window, blocked);
+          }
+          blocked.add(buildGlobalDataRequest(view, window));
+        }
+      }
+    }
+    return blocked;
+  }
+
   @Override
   public void invokeProcessElement(WindowedValue<InputT> elem) {
     // This can contain user code. Wrap it in case it throws an exception.
     try {
       W window = (W) elem.getWindows().iterator().next();
 
-      Set<Windmill.GlobalDataRequest> blocked = blockedMap.get(window);
-      if (blocked == null) {
-        for (PCollectionView<?> view : sideInputViews.values()) {
-          if (!execContext.issueSideInputFetch(view, window)) {
-            if (blocked == null) {
-              blocked = new HashSet<>();
-              blockedMap.put(window, blocked);
-            }
-            Coder<BoundedWindow> sideInputWindowCoder =
-                view.getWindowingStrategyInternal().getWindowFn().windowCoder();
-
-            BoundedWindow sideInputWindow =
-                view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(window);
-
-            ByteString.Output windowStream = ByteString.newOutput();
-            sideInputWindowCoder.encode(sideInputWindow, windowStream, Coder.Context.OUTER);
-
-            blocked.add(Windmill.GlobalDataRequest.newBuilder()
-                .setDataId(Windmill.GlobalDataId.newBuilder()
-                    .setTag(view.getTagInternal().getId())
-                    .setVersion(windowStream.toByteString())
-                    .build())
-                .setExistenceWatermarkDeadline(
-                    TimeUnit.MILLISECONDS.toMicros(view.getWindowingStrategyInternal()
-                        .getTrigger()
-                        .getWatermarkCutoff(sideInputWindow)
-                        .getMillis()))
-                .build());
-          }
-        }
-      }
+      Set<Windmill.GlobalDataRequest> blocked = computeBlockedSideInputs(window);
 
       if (blocked == null) {
         fn.processElement(createProcessContext(elem));
@@ -188,7 +209,7 @@ public void invokeProcessElement(WindowedValue<InputT> elem) {
         stepContext.writeToTagList(
             getElemListTag(window), elem, elem.getTimestamp());
 
-        execContext.setBlockingSideInputs(blocked);
+        execContext.addBlockingSideInputs(blocked);
       }
     } catch (Throwable t) {
       // Exception in user code.
@@ -207,6 +228,30 @@ public void finishBundle() {
     }
   }
 
+  private Windmill.GlobalDataRequest buildGlobalDataRequest(
+      PCollectionView<?> view, BoundedWindow window) throws IOException {
+    Coder<BoundedWindow> sideInputWindowCoder =
+        view.getWindowingStrategyInternal().getWindowFn().windowCoder();
+
+    BoundedWindow sideInputWindow =
+        view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(window);
+
+    ByteString.Output windowStream = ByteString.newOutput();
+    sideInputWindowCoder.encode(sideInputWindow, windowStream, Coder.Context.OUTER);
+
+    return Windmill.GlobalDataRequest.newBuilder()
+        .setDataId(Windmill.GlobalDataId.newBuilder()
+            .setTag(view.getTagInternal().getId())
+            .setVersion(windowStream.toByteString())
+            .build())
+        .setExistenceWatermarkDeadline(
+            TimeUnit.MILLISECONDS.toMicros(view.getWindowingStrategyInternal()
+                .getTrigger()
+                .getWatermarkCutoff(sideInputWindow)
+                .getMillis()))
+        .build();
+  }
+
   private CodedTupleTag<WindowedValue<InputT>> getElemListTag(W window) throws IOException {
     return CodedTupleTag.<WindowedValue<InputT>>of(
         "e:" + CoderUtils.encodeToBase64(windowingStrategy.getWindowFn().windowCoder(), window),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
index 3bd823916a9cb..58e883638ece4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
@@ -35,6 +35,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -83,7 +84,9 @@ public void testSideInputReady() throws Exception {
     when(stepContext.lookup(any(CodedTupleTag.class))).thenReturn(new HashMap());
     when(execContext.getSideInputNotifications())
         .thenReturn(Arrays.<Windmill.GlobalDataId>asList());
-    when(execContext.issueSideInputFetch(eq(view), any(BoundedWindow.class))).thenReturn(true);
+    when(execContext.issueSideInputFetch(
+             eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN)))
+        .thenReturn(true);
     when(execContext.getSideInput(eq(view), any(BoundedWindow.class), any(PTuple.class)))
         .thenReturn("data");
 
@@ -105,7 +108,9 @@ public void testSideInputNotReady() throws Exception {
     when(stepContext.lookup(any(CodedTupleTag.class))).thenReturn(new HashMap());
     when(execContext.getSideInputNotifications())
         .thenReturn(Arrays.<Windmill.GlobalDataId>asList());
-    when(execContext.issueSideInputFetch(eq(view), any(BoundedWindow.class))).thenReturn(false);
+    when(execContext.issueSideInputFetch(
+             eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN)))
+        .thenReturn(false);
 
     StreamingSideInputDoFnRunner<String, String, List, IntervalWindow> runner =
         createRunner(Arrays.asList(view));
@@ -153,6 +158,12 @@ public void testSideInputNotification() throws Exception {
         .lookup(Mockito.<CodedTupleTag<Map<IntervalWindow, Set<Windmill.GlobalDataRequest>>>>any()))
         .thenReturn(blockedMap);
     when(execContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
+    when(execContext.issueSideInputFetch(
+             eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN)))
+        .thenReturn(false);
+    when(execContext.issueSideInputFetch(
+             eq(view), any(BoundedWindow.class), eq(SideInputState.KNOWN_READY)))
+        .thenReturn(true);
     when(execContext.getSideInput(eq(view), eq(window), any(PTuple.class)))
         .thenReturn("data");
     when(stepContext.readTagLists(
@@ -210,7 +221,8 @@ public void testMultipleSideInputs() throws Exception {
         Mockito.<CodedTupleTag<Map<IntervalWindow, Set<Windmill.GlobalDataRequest>>>>any()))
         .thenReturn(blockedMap);
     when(execContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
-    when(execContext.issueSideInputFetch(any(PCollectionView.class), any(BoundedWindow.class)))
+    when(execContext.issueSideInputFetch(
+             any(PCollectionView.class), any(BoundedWindow.class), any(SideInputState.class)))
         .thenReturn(true);
     when(execContext.getSideInput(eq(view1), eq(window), any(PTuple.class)))
         .thenReturn("data1");

From af1116cef89cc70a11ae3d828b6fe5933fed3e16 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 5 May 2015 12:19:29 -0700
Subject: [PATCH 0510/1541] Update trigger execution and improve merging.

Merge results are now computed at the leaves, and each parent will
interact with children triggers to get their results, rather than trying
to guess them based on recorded finished bits.

This updates trigger merging to depend on the behavior of the leaf
triggers. As a result, the finished bits are stored in a single keyed
state entry, instead of composite trigger, since we likely want them
all.

This required assigning indices to triggers. ExecutableTrigger is a tree
view of the actual Trigger object graph, that includes numbers assigned
by DFS.

Timer tags and finished bits have been updated to use these indexes.

This also adds some more tests for combinations of real triggers, to
verify that keyed state is properly cleaned up.

----Release Notes----

Changes to how non-default triggers handle merging their intermediate
states when used with a merging WindowFn.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92848635
---
 .../dataflow/sdk/transforms/Flatten.java      |   3 +-
 .../dataflow/sdk/transforms/GroupByKey.java   |   4 +-
 .../sdk/transforms/windowing/AfterAll.java    |  90 ++--
 .../sdk/transforms/windowing/AfterEach.java   | 100 ++---
 .../sdk/transforms/windowing/AfterFirst.java  |  64 +--
 .../sdk/transforms/windowing/AfterPane.java   |  33 +-
 .../windowing/AfterProcessingTime.java        |  18 +-
 .../transforms/windowing/AfterWatermark.java  |  46 +-
 .../transforms/windowing/DefaultTrigger.java  |  13 +-
 .../sdk/transforms/windowing/Repeatedly.java  |  55 +--
 .../windowing/SubTriggerExecutor.java         | 248 -----------
 .../sdk/transforms/windowing/TimeTrigger.java |   1 +
 .../sdk/transforms/windowing/Trigger.java     | 338 ++++++++++-----
 .../sdk/transforms/windowing/Window.java      |   4 +-
 .../dataflow/sdk/util/ExecutableTrigger.java  | 213 +++++++++
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |   5 +-
 .../cloud/dataflow/sdk/util/StateFetcher.java |   4 +-
 .../util/StreamingGroupAlsoByWindowsDoFn.java |   4 +-
 .../util/StreamingSideInputDoFnRunner.java    |   2 +-
 .../dataflow/sdk/util/TriggerExecutor.java    | 409 ++++++++++++------
 .../dataflow/sdk/util/TriggerTester.java      |  47 +-
 .../dataflow/sdk/util/WindowingStrategy.java  |  11 +-
 .../runners/worker/ParDoFnFactoryTest.java    |   2 +-
 .../worker/StreamingDataflowWorkerTest.java   |  12 +-
 .../transforms/windowing/AfterAllTest.java    |  82 +++-
 .../transforms/windowing/AfterEachTest.java   |  76 +++-
 .../transforms/windowing/AfterFirstTest.java  |  81 +++-
 .../transforms/windowing/AfterPaneTest.java   |   8 +-
 .../windowing/AfterProcessingTimeTest.java    |  10 +-
 .../windowing/AfterWatermarkTest.java         |  12 +-
 .../transforms/windowing/RepeatedlyTest.java  |  16 +-
 .../sdk/transforms/windowing/TriggerTest.java | 103 ++++-
 .../sdk/util/ExecutableTriggerTest.java       | 134 ++++++
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  54 +--
 34 files changed, 1395 insertions(+), 907 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SubTriggerExecutor.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index e04341b38e85f..10a882c31bdca 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -120,7 +120,8 @@ public PCollection<T> apply(PCollectionList<T> inputs) {
                 + windowingStrategy.getWindowFn() + ", " + other.getWindowFn());
           }
 
-          if (!windowingStrategy.getTrigger().isCompatible(other.getTrigger())) {
+          if (!windowingStrategy.getTrigger().getSpec()
+              .isCompatible(other.getTrigger().getSpec())) {
             throw new IllegalStateException(
                 "Inputs to Flatten had incompatible triggers: "
                 + windowingStrategy.getTrigger() + ", " + other.getTrigger());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 89805d8650357..ab1b938ae1b11 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -485,10 +485,10 @@ public PCollection<KV<K, Iterable<V>>> applyHelper(
         !(windowingStrategy.getWindowFn() instanceof NonMergingWindowFn)
         || (isStreaming && !fewKeys)
         // TODO: Allow combiner lifting on the non-default trigger, as appropriate.
-        || !(windowingStrategy.getTrigger() instanceof DefaultTrigger);
+        || !(windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger);
 
     if (windowingStrategy.getWindowFn().isCompatible(new GlobalWindows())
-        && windowingStrategy.getTrigger() instanceof DefaultTrigger) {
+        && windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger) {
       // The input PCollection is using the degenerate default
       // window function, which uses a single global window for all
       // elements.  We can implement this using a more-primitive
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
index 2bb6f58559005..4b0a5e6a3f3fd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
+import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.common.base.Preconditions;
 
 import org.joda.time.Instant;
@@ -35,10 +36,9 @@
 public class AfterAll<W extends BoundedWindow> extends OnceTrigger<W> {
 
   private static final long serialVersionUID = 0L;
-  private List<Trigger<W>> subTriggers;
 
   private AfterAll(List<Trigger<W>> subTriggers) {
-    this.subTriggers = subTriggers;
+    super(subTriggers);
     Preconditions.checkArgument(subTriggers.size() > 1);
   }
 
@@ -48,9 +48,9 @@ public static <W extends BoundedWindow> OnceTrigger<W> of(
     return new AfterAll<W>(Arrays.<Trigger<W>>asList(triggers));
   }
 
-  private TriggerResult wrapResult(SubTriggerExecutor<W> subExecutor) {
+  private TriggerResult wrapResult(TriggerContext<W> c) {
     // If all children have finished, then they must have each fired at least once.
-    if (subExecutor.allFinished()) {
+    if (c.areAllSubtriggersFinished()) {
       return TriggerResult.FIRE_AND_FINISH;
     }
 
@@ -59,58 +59,42 @@ private TriggerResult wrapResult(SubTriggerExecutor<W> subExecutor) {
 
   @Override
   public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
-    SubTriggerExecutor<W> subExecutor = SubTriggerExecutor.forWindow(subTriggers, c, e.window());
-    for (int i : subExecutor.getUnfinishedTriggers()) {
-      // Mark any fired triggers as finished.
-      if (subExecutor.onElement(i, e).isFire()) {
-        subExecutor.markFinished(i);
-      }
+    for (ExecutableTrigger<W> subTrigger : c.unfinishedSubTriggers()) {
+      // Since subTriggers are all OnceTriggers, they must either CONTINUE or FIRE_AND_FINISH.
+      // invokeElement will automatically mark the finish bit if they return FIRE_AND_FINISH.
+      subTrigger.invokeElement(c, e);
     }
 
-    return wrapResult(subExecutor);
+    return wrapResult(c);
   }
 
   @Override
-  public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
-    SubTriggerExecutor<W> subExecutor = SubTriggerExecutor.forMerge(subTriggers, c, e);
-
-    // If after merging the set of fire & finished sub-triggers, we're done, we can
-    // FIRE_AND_FINISH early.
-    if (subExecutor.allFinished()) {
-      return TriggerResult.FIRE_AND_FINISH;
-    }
-
-    // Otherwise, merge all of the unfinished triggers.
-    for (int i : subExecutor.getUnfinishedTriggers()) {
-      if (subExecutor.onMerge(i, e).isFire()) {
-        subExecutor.markFinished(i);
+  public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+    // CONTINUE if merging returns CONTINUE for at least one sub-trigger
+    // FIRE_AND_FINISH if merging returns FIRE or FIRE_AND_FINISH for at least one sub-trigger
+    //   *and* FIRE, FIRE_AND_FINISH, or FINISH for all other sub-triggers.
+    // FINISH if merging returns FINISH for all sub-triggers.
+    boolean fired = false;
+    for (ExecutableTrigger<W> subTrigger : c.subTriggers()) {
+      MergeResult result = subTrigger.invokeMerge(c, e);
+      if (MergeResult.CONTINUE.equals(result)) {
+        return MergeResult.CONTINUE;
       }
+      fired |= result.isFire();
     }
 
-    return wrapResult(subExecutor);
+    return fired ? MergeResult.FIRE_AND_FINISH : MergeResult.ALREADY_FINISHED;
   }
 
   @Override
   public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
-    SubTriggerExecutor<W> subExecutor = SubTriggerExecutor.forWindow(subTriggers, c, e.window());
-
-    // We take at-most-once triggers, so the result of the timer on the child should be either
-    // CONTINUE or FIRE_AND_FINISH. The subexecutor already tracks finishing of children, so we just
-    // need to know that we fire and finish if all of the children have finished.
-    subExecutor.onTimer(e);
-    return wrapResult(subExecutor);
-  }
-
+    if (c.isCurrentTrigger(e.getDestinationIndex())) {
+      throw new IllegalStateException("AfterAll shouldn't receive any timers.");
+    }
 
-  @Override
-  public void clear(Trigger.TriggerContext<W> c, W window) throws Exception {
-    SubTriggerExecutor.forWindow(subTriggers, c, window).clear();
-  }
-
-  @Override
-  public boolean willNeverFinish() {
-    // even if one of the triggers never finishes, the AfterAll could finish if it FIREs.
-    return false;
+    ExecutableTrigger<W> subTrigger = c.nextStepTowards(e.getDestinationIndex());
+    subTrigger.invokeTimer(c, e);
+    return wrapResult(c);
   }
 
   @Override
@@ -125,24 +109,4 @@ public Instant getWatermarkCutoff(W window) {
     }
     return deadline;
   }
-
-  @Override
-  public boolean isCompatible(Trigger<?> other) {
-    if (!(other instanceof AfterAll)) {
-      return false;
-    }
-
-    AfterAll<?> that = (AfterAll<?>) other;
-    if (this.subTriggers.size() != that.subTriggers.size()) {
-      return false;
-    }
-
-    for (int i = 0; i < this.subTriggers.size(); i++) {
-      if (!this.subTriggers.get(i).isCompatible(that.subTriggers.get(i))) {
-        return false;
-      }
-    }
-
-    return true;
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index dc9282c23a67b..a148f69bd5ccf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -17,13 +17,13 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.common.base.Preconditions;
 
 import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.util.Arrays;
+import java.util.Iterator;
 import java.util.List;
 
 /**
@@ -48,13 +48,10 @@
 @Experimental(Experimental.Kind.TRIGGER)
 public class AfterEach<W extends BoundedWindow> extends Trigger<W> {
 
-  private static final Logger LOG = LoggerFactory.getLogger(AfterEach.class);
-
   private static final long serialVersionUID = 0L;
-  private List<Trigger<W>> subTriggers;
 
   private AfterEach(List<Trigger<W>> subTriggers) {
-    this.subTriggers = subTriggers;
+    super(subTriggers);
     Preconditions.checkArgument(subTriggers.size() > 1);
   }
 
@@ -63,12 +60,10 @@ public static <W extends BoundedWindow> Trigger<W> inOrder(Trigger<W>... trigger
     return new AfterEach<W>(Arrays.<Trigger<W>>asList(triggers));
   }
 
-  private TriggerResult wrapResult(
-      TriggerResult subResult, SubTriggerExecutor<W> subexecutor)
+  private TriggerResult wrapResult(TriggerContext<W> c, TriggerResult subResult)
       throws Exception {
-
     if (subResult.isFire()) {
-      return subexecutor.allFinished() ? TriggerResult.FIRE_AND_FINISH : TriggerResult.FIRE;
+      return c.areAllSubtriggersFinished() ? TriggerResult.FIRE_AND_FINISH : TriggerResult.FIRE;
     } else {
       return TriggerResult.CONTINUE;
     }
@@ -78,42 +73,55 @@ private TriggerResult wrapResult(
   public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
     // If all the sub-triggers have finished, we should have already finished, so we know there is
     // at least one unfinished trigger.
-
-    SubTriggerExecutor<W> subexecutor = SubTriggerExecutor.forWindow(subTriggers, c, e.window());
-
-    // There must be at least one unfinished, because otherwise we would have finished the root.
-    int current = subexecutor.firstUnfinished();
-    return wrapResult(subexecutor.onElement(current, e), subexecutor);
+    ExecutableTrigger<W> subTrigger = c.firstUnfinishedSubTrigger();
+    return wrapResult(c, subTrigger.invokeElement(c, e));
   }
 
   @Override
-  public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
-    SubTriggerExecutor<W> subexecutor = SubTriggerExecutor.forMerge(subTriggers, c, e);
-
-    // There must be at least one unfinished, because otherwise we would have finished the root.
-    int current = subexecutor.firstUnfinished();
-    return wrapResult(subexecutor.onMerge(current, e), subexecutor);
-  }
+  public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+    // Iterate over the sub-triggers to identify the "current" sub-trigger.
+    Iterator<ExecutableTrigger<W>> iterator = c.subTriggers().iterator();
+    while (iterator.hasNext()) {
+      ExecutableTrigger<W> subTrigger = iterator.next();
+
+      MergeResult mergeResult = subTrigger.invokeMerge(c, e);
+
+      if (MergeResult.CONTINUE.equals(mergeResult)) {
+        resetRemaining(c, e, iterator);
+        return MergeResult.CONTINUE;
+      } else if (MergeResult.FIRE.equals(mergeResult)) {
+        resetRemaining(c, e, iterator);
+        return MergeResult.FIRE;
+      } else if (MergeResult.FIRE_AND_FINISH.equals(mergeResult)) {
+        resetRemaining(c, e, iterator);
+        return c.areAllSubtriggersFinished() ? MergeResult.FIRE_AND_FINISH : MergeResult.FIRE;
+      }
+    }
 
-  @Override
-  public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
-    SubTriggerExecutor<W> subExecutor = SubTriggerExecutor.forWindow(subTriggers, c, e.window());
-    return wrapResult(subExecutor.onTimer(e), subExecutor);
+    // If we get here, all the merges indicated they were finished, which means there was at least
+    // one merged window in which the triggers had all already finished. Given that, this AfterEach
+    // would have already finished in that window as well. Since the window was still in the window
+    // set for merging, we can return FINISHED (because we were finished in that window) and we also
+    // know that there must be another trigger (parent or sibling) which hasn't finished yet, which
+    // will FIRE, CONTINUE, or FIRE_AND_FINISH.
+    return MergeResult.ALREADY_FINISHED;
   }
 
-  @Override
-  public void clear(Trigger.TriggerContext<W> c, W window) throws Exception {
-    SubTriggerExecutor.forWindow(subTriggers, c, window).clear();
+  private void resetRemaining(TriggerContext<W> c, OnMergeEvent<W> e,
+      Iterator<ExecutableTrigger<W>> triggers) throws Exception {
+    while (triggers.hasNext()) {
+      c.forTrigger(triggers.next()).resetTree(e.newWindow());
+    }
   }
 
   @Override
-  public boolean willNeverFinish() {
-    for (Trigger<W> trigger : subTriggers) {
-      if (trigger.willNeverFinish()) {
-        return true;
-      }
+  public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+    if (c.isCurrentTrigger(e.getDestinationIndex())) {
+      throw new IllegalStateException("AfterEach shouldn't receive timers.");
     }
-    return false;
+
+    ExecutableTrigger<W> timerChild = c.nextStepTowards(e.getDestinationIndex());
+    return wrapResult(c, timerChild.invokeTimer(c,  e));
   }
 
   @Override
@@ -122,24 +130,4 @@ public Instant getWatermarkCutoff(W window) {
     // fires at least once.
     return subTriggers.get(0).getWatermarkCutoff(window);
   }
-
-  @Override
-  public boolean isCompatible(Trigger<?> other) {
-    if (!(other instanceof AfterEach)) {
-      return false;
-    }
-
-    AfterEach<?> that = (AfterEach<?>) other;
-    if (this.subTriggers.size() != that.subTriggers.size()) {
-      return false;
-    }
-
-    for (int i = 0; i < this.subTriggers.size(); i++) {
-      if (!this.subTriggers.get(i).isCompatible(that.subTriggers.get(i))) {
-        return false;
-      }
-    }
-
-    return true;
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
index b96eea350f788..2263a82e1b10b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
+import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.common.base.Preconditions;
 
 import org.joda.time.Instant;
@@ -36,10 +37,9 @@
 public class AfterFirst<W extends BoundedWindow> extends OnceTrigger<W> {
 
   private static final long serialVersionUID = 0L;
-  private List<Trigger<W>> subTriggers;
 
   private AfterFirst(List<Trigger<W>> subTriggers) {
-    this.subTriggers = subTriggers;
+    super(subTriggers);
     Preconditions.checkArgument(subTriggers.size() > 1);
   }
 
@@ -51,48 +51,44 @@ public static <W extends BoundedWindow> OnceTrigger<W> of(
 
   @Override
   public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
-    for (int i = 0; i < subTriggers.size(); i++) {
-      if (subTriggers.get(i).onElement(c.forChild(i), e).isFire()) {
+    for (ExecutableTrigger<W> subTrigger : c.subTriggers()) {
+      if (subTrigger.invokeElement(c, e).isFire()) {
         return TriggerResult.FIRE_AND_FINISH;
       }
     }
+
     return TriggerResult.CONTINUE;
   }
 
   @Override
-  public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
-    for (int i = 0; i < subTriggers.size(); i++) {
-      if (subTriggers.get(i).onMerge(c.forChild(i), e).isFire()) {
-        return TriggerResult.FIRE_AND_FINISH;
+  public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+    // FINISH if merging returns FINISH for any sub-trigger.
+    // FIRE_AND_FINISH if merging returns FIRE or FIRE_AND_FINISH for at least one sub-trigger.
+    // CONTINUE otherwise
+    boolean fired = false;
+    for (ExecutableTrigger<W> subTrigger : c.subTriggers()) {
+      MergeResult mergeResult = subTrigger.invokeMerge(c, e);
+      if (MergeResult.ALREADY_FINISHED.equals(mergeResult)) {
+        return MergeResult.ALREADY_FINISHED;
+      } else if (mergeResult.isFire()) {
+        fired = true;
       }
     }
-    return TriggerResult.CONTINUE;
+    return fired ? MergeResult.FIRE_AND_FINISH : MergeResult.CONTINUE;
   }
 
   @Override
   public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
-    if (e.isForCurrentLayer()) {
+    if (c.isCurrentTrigger(e.getDestinationIndex())) {
       throw new IllegalStateException("AfterFirst shouldn't receive any timers.");
     }
 
-    int childIdx = e.getChildIndex();
-    return subTriggers.get(childIdx).onTimer(c.forChild(childIdx), e.withoutOuterTrigger()).isFire()
+    ExecutableTrigger<W> subTrigger = c.nextStepTowards(e.getDestinationIndex());
+    return subTrigger.invokeTimer(c, e).isFire()
         ? TriggerResult.FIRE_AND_FINISH
         : TriggerResult.CONTINUE;
   }
 
-  @Override
-  public void clear(Trigger.TriggerContext<W> c, W window) throws Exception {
-    SubTriggerExecutor.forWindow(subTriggers, c, window).clear();
-  }
-
-  @Override
-  public boolean willNeverFinish() {
-    // The only case an AfterAll will never finish, is if some trigger never fires. But, we can't
-    // statically determine if (or when) a trigger might fire.
-    return false;
-  }
-
   @Override
   public Instant getWatermarkCutoff(W window) {
     // This trigger will fire after the earliest of its sub-triggers.
@@ -105,24 +101,4 @@ public Instant getWatermarkCutoff(W window) {
     }
     return deadline;
   }
-
-  @Override
-  public boolean isCompatible(Trigger<?> other) {
-    if (!(other instanceof AfterFirst)) {
-      return false;
-    }
-
-    AfterFirst<?> that = (AfterFirst<?>) other;
-    if (this.subTriggers.size() != that.subTriggers.size()) {
-      return false;
-    }
-
-    for (int i = 0; i < this.subTriggers.size(); i++) {
-      if (!this.subTriggers.get(i).isCompatible(that.subTriggers.get(i))) {
-        return false;
-      }
-    }
-
-    return true;
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index 4d49c67104f71..ee9b856540a7b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -42,6 +42,7 @@ public class AfterPane<W extends BoundedWindow> extends OnceTrigger<W>{
   private final int countElems;
 
   private AfterPane(int countElems) {
+    super(null);
     this.countElems = countElems;
   }
 
@@ -69,21 +70,28 @@ public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws
   }
 
   @Override
-  public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+  public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+    // If we've already received enough elements and finished in some window, then this trigger
+    // is just finished.
+    if (e.finishedInAnyMergingWindow(c.current())) {
+      return MergeResult.ALREADY_FINISHED;
+    }
+
+    // Otherwise, compute the sum of elements in all the active panes
     int count = 0;
     for (Entry<W, Integer> old : c.lookup(ELEMENTS_IN_PANE_TAG, e.oldWindows()).entrySet()) {
       if (old.getValue() != null) {
         count += old.getValue();
-        c.remove(ELEMENTS_IN_PANE_TAG, old.getKey());
       }
     }
 
-    // Don't break early because we want to clean up the old keyed state.
+    // And determine the final status from that.
     if (count >= countElems) {
-      return TriggerResult.FIRE_AND_FINISH;
+      return MergeResult.FIRE_AND_FINISH;
+    } else {
+      c.store(ELEMENTS_IN_PANE_TAG, e.newWindow(), count);
+      return MergeResult.CONTINUE;
     }
-    c.store(ELEMENTS_IN_PANE_TAG, e.newWindow(), count);
-    return TriggerResult.CONTINUE;
   }
 
   @Override
@@ -96,19 +104,10 @@ public void clear(TriggerContext<W> c, W window) throws Exception {
     c.remove(ELEMENTS_IN_PANE_TAG, window);
   }
 
-  @Override
-  public boolean willNeverFinish() {
-    return false;
-  }
-
   @Override
   public boolean isCompatible(Trigger<?> other) {
-    if (!(other instanceof AfterPane)) {
-      return false;
-    }
-
-    AfterPane<?> that = (AfterPane<?>) other;
-    return countElems == that.countElems;
+    return (other instanceof AfterPane)
+        && countElems == ((AfterPane<?>) other).countElems;
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index dd87710862e90..09725d37af20a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -72,8 +72,15 @@ public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e)
   }
 
   @Override
-  public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
-    // To have gotten here, we must not have fired in any of the oldWindows.
+  public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+    // If the processing time timer has fired in any of the windows being merged, it would have
+    // fired at the same point if it had been added to the merged window. So, we just report it as
+    // finished.
+    if (e.finishedInAnyMergingWindow(c.current())) {
+      return MergeResult.ALREADY_FINISHED;
+    }
+
+    // Otherwise, determine the earliest delay for all of the windows, and delay to that point.
     Instant earliestTimer = BoundedWindow.TIMESTAMP_MAX_VALUE;
     for (Instant delayedUntil : c.lookup(DELAYED_UNTIL_TAG, e.oldWindows()).values()) {
       if (delayedUntil != null && delayedUntil.isBefore(earliestTimer)) {
@@ -86,7 +93,7 @@ public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exce
       c.setTimer(e.newWindow(), earliestTimer, TimeDomain.PROCESSING_TIME);
     }
 
-    return TriggerResult.CONTINUE;
+    return MergeResult.CONTINUE;
   }
 
   @Override
@@ -100,11 +107,6 @@ public void clear(TriggerContext<W> c, W window) throws Exception {
     c.deleteTimer(window, TimeDomain.PROCESSING_TIME);
   }
 
-  @Override
-  public boolean willNeverFinish() {
-    return false;
-  }
-
   @Override
   public Instant getWatermarkCutoff(W window) {
     return BoundedWindow.TIMESTAMP_MAX_VALUE;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index d49787c6e9653..cdee27a00c651 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -63,8 +63,8 @@ protected AfterWatermark(List<SerializableFunction<Instant, Instant>> transforms
   }
 
   /**
-   * Creates a trigger that fires when the watermark passes timestamp of the first element in the
-   * pane.
+   * Creates a trigger that fires when the watermark passes timestamp of the first element added to
+   * the pane.
    */
   static <W extends BoundedWindow> AfterWatermark<W> pastFirstElementInPane() {
     return new FromFirstElementInPane<W>(IDENTITY);
@@ -102,8 +102,17 @@ public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws
     }
 
     @Override
-    public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
-      // To have gotten here, we must not have fired in any of the oldWindows.
+    public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+      // If the watermark time timer has fired in any of the windows being merged, it would have
+      // fired at the same point if it had been added to the merged window. So, we just record it as
+      // finished.
+      if (e.finishedInAnyMergingWindow(c.current())) {
+        return MergeResult.ALREADY_FINISHED;
+      }
+
+      // To have gotten here, we must not have fired in any of the oldWindows. Determine the event
+      // timestamp from the minimum (we could also just pick one, or try to record the arrival times
+      // of this first element in each pane).
       Instant earliestTimer = BoundedWindow.TIMESTAMP_MAX_VALUE;
       for (Instant delayedUntil : c.lookup(DELAYED_UNTIL_TAG, e.oldWindows()).values()) {
         if (delayedUntil != null && delayedUntil.isBefore(earliestTimer)) {
@@ -116,7 +125,7 @@ public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exce
         c.setTimer(e.newWindow(), earliestTimer, TimeDomain.EVENT_TIME);
       }
 
-      return TriggerResult.CONTINUE;
+      return MergeResult.CONTINUE;
     }
 
     @Override
@@ -130,11 +139,6 @@ public void clear(TriggerContext<W> c, W window) throws Exception {
       c.deleteTimer(window, TimeDomain.EVENT_TIME);
     }
 
-    @Override
-    public boolean willNeverFinish() {
-      return false;
-    }
-
     @Override
     public Instant getWatermarkCutoff(W window) {
       return computeTargetTimestamp(window.maxTimestamp());
@@ -164,13 +168,20 @@ public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws
     }
 
     @Override
-    public TriggerResult onMerge(Trigger.TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
-      for (W oldWindow : e.oldWindows()) {
-        c.deleteTimer(oldWindow, TimeDomain.EVENT_TIME);
+    public MergeResult onMerge(Trigger.TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+      // If the watermark was past the end of a window that is past the end of the new window,
+      // then the watermark must also be past the end of this window. What's more, we've already
+      // fired some elements for that trigger firing, so we report FINISHED (without firing).
+      for (W finishedWindow : e.getFinishedMergingWindows(c.current())) {
+        if (finishedWindow.maxTimestamp().isAfter(e.newWindow().maxTimestamp())) {
+          return MergeResult.ALREADY_FINISHED;
+        }
       }
 
-      c.setTimer(e.newWindow(), e.newWindow().maxTimestamp(), TimeDomain.EVENT_TIME);
-      return TriggerResult.CONTINUE;
+      // Otherwise, set a timer for this window, and return.
+      c.setTimer(e.newWindow(),
+          computeTargetTimestamp(e.newWindow().maxTimestamp()), TimeDomain.EVENT_TIME);
+      return MergeResult.CONTINUE;
     }
 
     @Override
@@ -183,11 +194,6 @@ public void clear(TriggerContext<W> c, W window) throws Exception {
       c.deleteTimer(window, TimeDomain.EVENT_TIME);
     }
 
-    @Override
-    public boolean willNeverFinish() {
-      return false;
-    }
-
     @Override
     public Instant getWatermarkCutoff(W window) {
       return computeTargetTimestamp(window.maxTimestamp());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
index ae25d307ae836..e648d350179cd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
@@ -31,7 +31,9 @@ public class DefaultTrigger<W extends BoundedWindow> extends Trigger<W>{
 
   private static final long serialVersionUID = 0L;
 
-  private DefaultTrigger() {}
+  private DefaultTrigger() {
+    super(null);
+  }
 
   /**
    * Returns the default trigger.
@@ -47,9 +49,9 @@ public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws
   }
 
   @Override
-  public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+  public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
     c.setTimer(e.newWindow(), e.newWindow().maxTimestamp(), TimeDomain.EVENT_TIME);
-    return TriggerResult.CONTINUE;
+    return MergeResult.CONTINUE;
   }
 
   @Override
@@ -62,11 +64,6 @@ public void clear(TriggerContext<W> c, W window) throws Exception {
     c.deleteTimer(window, TimeDomain.EVENT_TIME);
   }
 
-  @Override
-  public boolean willNeverFinish() {
-    return true;
-  }
-
   @Override
   public Instant getWatermarkCutoff(W window) {
     return window.maxTimestamp();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
index 53c7188df1a73..2b56f901fdb40 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
@@ -18,6 +18,8 @@
 
 import org.joda.time.Instant;
 
+import java.util.Arrays;
+
 /**
  * Repeat a trigger, either until some condition is met or forever.
  *
@@ -36,7 +38,7 @@ public class Repeatedly<W extends BoundedWindow> extends Trigger<W> {
 
   private static final long serialVersionUID = 0L;
 
-  private Trigger<W> repeated;
+  private static final int REPEATED = 0;
 
   /**
    * Create a composite trigger that repeatedly executes the trigger {@code toRepeat}, firing each
@@ -51,56 +53,41 @@ public static <W extends BoundedWindow> Repeatedly<W> forever(Trigger<W> repeate
   }
 
   private Repeatedly(Trigger<W> repeated) {
-    this.repeated = repeated;
+    super(Arrays.asList(repeated));
   }
 
-  private TriggerResult wrap(TriggerContext<W> c, W window, TriggerResult result) throws Exception {
-    if (result.isFire() || result.isFinish()) {
-      repeated.clear(c, window);
-    }
-    return result.isFire() ? TriggerResult.FIRE : TriggerResult.CONTINUE;
-  }
 
   @Override
   public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e)
       throws Exception {
-    return wrap(c, e.window(), repeated.onElement(c, e));
+    TriggerResult result = c.subTrigger(REPEATED).invokeElement(c, e);
+    if (result.isFinish()) {
+      c.forTrigger(c.subTrigger(REPEATED)).resetTree(e.window());
+    }
+    return result.isFire() ? TriggerResult.FIRE : TriggerResult.CONTINUE;
   }
 
   @Override
-  public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
-    return wrap(c, e.newWindow(), repeated.onMerge(c, e));
+  public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+    MergeResult mergeResult = c.subTrigger(REPEATED).invokeMerge(c, e);
+    if (mergeResult.isFinish()) {
+      c.forTrigger(c.subTrigger(REPEATED)).resetTree(e.newWindow());
+    }
+    return mergeResult.isFire() ? MergeResult.FIRE : MergeResult.CONTINUE;
   }
 
   @Override
   public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
-    return wrap(c, e.window(), repeated.onTimer(c, e));
-  }
-
-  @Override
-  public void clear(TriggerContext<W> c, W window) throws Exception {
-    repeated.clear(c, window);
-  }
-
-  @Override
-  public boolean willNeverFinish() {
-    // Repeatedly without an until will never finish.
-    return true;
+    TriggerResult result = c.subTrigger(REPEATED).invokeTimer(c, e);
+    if (result.isFinish()) {
+      c.forTrigger(c.subTrigger(REPEATED)).resetTree(e.window());
+    }
+    return result.isFire() ? TriggerResult.FIRE : TriggerResult.CONTINUE;
   }
 
   @Override
   public Instant getWatermarkCutoff(W window) {
     // This trigger fires once the repeated trigger fires.
-    return repeated.getWatermarkCutoff(window);
-  }
-
-  @Override
-  public boolean isCompatible(Trigger<?> other) {
-    if (!(other instanceof Repeatedly)) {
-      return false;
-    }
-
-    Repeatedly<?> that = (Repeatedly<?>) other;
-    return repeated.isCompatible(that.repeated);
+    return subTriggers.get(REPEATED).getWatermarkCutoff(window);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SubTriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SubTriggerExecutor.java
deleted file mode 100644
index 8d967d30068cb..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SubTriggerExecutor.java
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
-import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.common.collect.ImmutableList;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.BitSet;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Utility that executes some subtriggers and tracks (in keyed state) which of the sub-triggers
- * have already finished. Useful for implementing composite {@link Trigger Triggers} that need
- * to track state of their sub-triggers between elements.
- */
-class SubTriggerExecutor<W extends BoundedWindow> {
-
-  private List<Trigger<W>> subTriggers;
-  private final BitSet isFinished;
-  private final W window;
-  private final TriggerContext<W> context;
-
-  private static final CodedTupleTag<BitSet> SUBTRIGGERS_FINISHED_SET_TAG =
-      CodedTupleTag.of("finished", new BitSetCoder());
-
-  /**
-   * Return a {code SubTriggerExecutor} for executing sub-triggers in the given context and
-   * window.
-   *
-   * @param c The context of the composite trigger
-   * @param window the window
-   */
-  public static <W extends BoundedWindow> SubTriggerExecutor<W> forWindow(
-      List<Trigger<W>> subTriggers, TriggerContext<W> c, W window) throws IOException {
-    BitSet bitset = c.lookup(SUBTRIGGERS_FINISHED_SET_TAG, window);
-    if (bitset == null) {
-      bitset = new BitSet(subTriggers.size());
-    }
-    return new SubTriggerExecutor<W>(subTriggers, c, window, bitset);
-  }
-
-  /**
-   * Return a {code SubTriggerExecutor} for executing sub-triggers in the given context and
-   * windows.
-   *
-   * <p>The finished states of all of the sub-triggers will be OR-ed across all of the windows.
-   * This applies the behavior that a trigger that has finished in any of the merged windows is
-   * finished in the merged window.
-   *
-   * @param c The context of the composite trigger
-   * @param e The on merge event that is being processed.o
-   */
-  public static <W extends BoundedWindow> SubTriggerExecutor<W> forMerge(
-      List<Trigger<W>> subTriggers, TriggerContext<W> c, OnMergeEvent<W> e)
-      throws Exception {
-    BitSet bitset = new BitSet(subTriggers.size());
-    Map<W, BitSet> lookup = c.lookup(SUBTRIGGERS_FINISHED_SET_TAG, e.oldWindows());
-    for (BitSet stateInWindow : lookup.values()) {
-      if (stateInWindow != null) {
-        bitset.or(stateInWindow);
-      }
-    }
-
-    SubTriggerExecutor<W> subTrigger =
-        new SubTriggerExecutor<W>(subTriggers, c, e.newWindow(), bitset);
-
-    // Preemptively flush this since we just constructed it from the sub-windows.
-    subTrigger.flush();
-    return subTrigger;
-  }
-
-  private SubTriggerExecutor(
-      List<Trigger<W>> subTriggers, TriggerContext<W> context, W window, BitSet isFinished) {
-    this.subTriggers = subTriggers;
-    this.context = context;
-    this.window = window;
-    this.isFinished = isFinished;
-  }
-
-  private void flush() throws Exception {
-    context.store(SUBTRIGGERS_FINISHED_SET_TAG, window, isFinished);
-  }
-
-  public boolean allFinished() {
-    return isFinished.cardinality() == subTriggers.size();
-  }
-
-  public List<Integer> getUnfinishedTriggers() {
-    ImmutableList.Builder<Integer> result = ImmutableList.builder();
-    for (int i = isFinished.nextClearBit(0); i >= 0 && i < subTriggers.size();
-        i = isFinished.nextClearBit(i + 1)) {
-      result.add(i);
-    }
-    return result.build();
-  }
-
-  /**
-   * Return the index of the first-unfinished sub-trigger.
-   */
-  public int firstUnfinished() {
-    return isFinished.nextClearBit(0);
-  }
-
-  private TriggerResult handleChildResult(
-      TriggerContext<W> childContext, int index, TriggerResult result) throws Exception {
-    if (result.isFinish()) {
-      markFinishedInChild(childContext, index);
-    }
-
-    return result;
-  }
-
-  /**
-   * Invoke {@code onElement} for the {@code index} sub-trigger on the given event.
-   *
-   * <p>Updates the is-finished set if the sub-trigger finishes.
-   */
-  public TriggerResult onElement(int index, OnElementEvent<W> e) throws Exception {
-    if (isFinished.get(index)) {
-      throw new IllegalStateException("Calling onElement on already finished sub-element " + index);
-    }
-
-    TriggerContext<W> childContext = context.forChild(index);
-    Trigger<W> subTrigger = subTriggers.get(index);
-    return handleChildResult(
-        childContext, index,
-        subTrigger.onElement(childContext, e));
-  }
-
-  /**
-   * Invoke {@code onTimer} for the {@code index} sub-trigger on the given event. Expects the
-   * {@link OnTimerEvent} to be for one of the sub-triggers.
-   *
-   * <p>Updates the is-finished set if the sub-trigger finishes.
-   */
-  public TriggerResult onTimer(OnTimerEvent<W> e) throws Exception {
-    if (e.isForCurrentLayer()) {
-      throw new IllegalStateException(
-          "SubTriggerExecutor can only execute timers for sub-triggres.");
-    }
-
-    int index = e.getChildIndex();
-    if (isFinished.get(index)) {
-      throw new IllegalStateException("Calling onTimer on already finished sub-element " + index);
-    }
-
-    TriggerContext<W> childContext = context.forChild(index);
-    TriggerResult onTimer = subTriggers.get(index).onTimer(childContext, e.withoutOuterTrigger());
-    return handleChildResult(childContext, index, onTimer);
-  }
-
-  public TriggerResult onMerge(int index, OnMergeEvent<W> e)
-      throws Exception {
-    if (isFinished.get(index)) {
-      throw new IllegalStateException("Calling onMerge on already finished sub-element " + index);
-    }
-
-    TriggerContext<W> childContext = context.forChild(index);
-    return handleChildResult(
-        childContext, index, subTriggers.get(index).onMerge(childContext, e));
-  }
-
-  /**
-   * Clears the state associated with the given subtrigger.
-   */
-  private void clearSubTrigger(int index) throws Exception {
-    subTriggers.get(index).clear(context.forChild(index), window);
-  }
-
-  /**
-   * Clears the sub-triggers and the finished bits for the sub-trigger executor in the window.
-   */
-  public void clear() throws Exception {
-    for (int i = 0; i < subTriggers.size(); i++) {
-      clearSubTrigger(i);
-    }
-    context.remove(SUBTRIGGERS_FINISHED_SET_TAG, window);
-  }
-
-  private void markFinishedInChild(TriggerContext<W> childContext, int index) throws Exception {
-    isFinished.set(index);
-    flush();
-    subTriggers.get(index).clear(childContext, window);
-  }
-
-  /**
-   * Mark the child at the given index as finished and clean up any associated state.
-   */
-  public void markFinished(int index) throws Exception {
-    markFinishedInChild(context.forChild(index), index);
-  }
-
-  /**
-   * Coder for the BitSet used to track child-trigger finished states.
-   */
-  protected static class BitSetCoder extends AtomicCoder<BitSet> {
-
-    private static final long serialVersionUID = 1L;
-
-    private transient Coder<byte[]> byteArrayCoder = ByteArrayCoder.of();
-
-    @Override
-    public void encode(BitSet value, OutputStream outStream, Context context)
-        throws CoderException, IOException {
-      byteArrayCoder.encode(value.toByteArray(), outStream, context);
-    }
-
-    @Override
-    public BitSet decode(InputStream inStream, Context context)
-        throws CoderException, IOException {
-      return BitSet.valueOf(byteArrayCoder.decode(inStream, context));
-    }
-
-    @Override
-    public void verifyDeterministic() throws NonDeterministicException {
-      verifyDeterministic(
-          "SubTriggerExecutor.BitSetCoder requires its byteArrayCoder to be deterministic.",
-          byteArrayCoder);
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
index 24f88faa35dc9..d3c2e6161cb9d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
@@ -46,6 +46,7 @@ public abstract class TimeTrigger<W extends BoundedWindow, T extends TimeTrigger
 
   protected TimeTrigger(
       List<SerializableFunction<Instant, Instant>> timestampMappers) {
+    super(null);
     this.timestampMappers = timestampMappers;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index f51a19baf53da..ae5dbd9bede6f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -18,16 +18,23 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
+import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Predicate;
+import com.google.common.collect.Maps;
 
 import org.joda.time.Instant;
 
 import java.io.IOException;
 import java.io.Serializable;
+import java.util.Arrays;
+import java.util.BitSet;
 import java.util.List;
 import java.util.Map;
 
+import javax.annotation.Nullable;
+
 /**
  * {@code Trigger}s control when the elements for a specific key and window are output. As elements
  * arrive, they are put into one or more windows by the {@code Window} by the {@link WindowFn}, and
@@ -157,11 +164,51 @@ public boolean isFinish() {
     }
   }
 
+  /**
+   * {@code TriggerResult} enumerates the possible result a trigger can have when it is merged.
+   */
+  public enum MergeResult {
+    FIRE(true, false, TriggerResult.FIRE),
+    CONTINUE(false, false, TriggerResult.CONTINUE),
+    FIRE_AND_FINISH(true, true, TriggerResult.FIRE_AND_FINISH),
+
+    /**
+     * A trigger can only return {@code ALREADY_FINISHED} from {@code onMerge}, and it should only
+     * be returned if the trigger was previously finished in at least one window.
+     *
+     * <p> Returning this indicates that the sub-trigger should be treated as finished in the output
+     * window.
+     */
+    ALREADY_FINISHED(false, true, null);
+
+    private boolean finish;
+    private boolean fire;
+    private TriggerResult triggerResult;
+
+    private MergeResult(boolean fire, boolean finish, TriggerResult triggerResult) {
+      this.fire = fire;
+      this.finish = finish;
+      this.triggerResult = triggerResult;
+    }
+
+    public boolean isFire() {
+      return fire;
+    }
+
+    public boolean isFinish() {
+      return finish;
+    }
+
+    public TriggerResult getTriggerResult() {
+      return triggerResult;
+    }
+  }
+
   /**
    * Information accessible to all of the callbacks that are executed on a {@link Trigger}.
    *
    * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
-   *            {@link TriggerContext}
+   *            {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext}.
    */
   public interface TriggerContext<W extends BoundedWindow>  {
 
@@ -211,9 +258,72 @@ public interface TriggerContext<W extends BoundedWindow>  {
     <T> Map<W, T> lookup(CodedTupleTag<T> tag, Iterable<W> windows) throws IOException;
 
     /**
-     * Create a {@code TriggerContext} for executing a given sub-trigger.
+     * Create a {@code TriggerContext} for executing the given trigger.
+     */
+    TriggerContext<W> forTrigger(ExecutableTrigger<W> trigger);
+
+    /**
+     * Access the executable version of the trigger currently being executed.
+     */
+    ExecutableTrigger<W> current();
+
+    /**
+     * Access the executable versions of the sub-triggers of the current trigger.
+     */
+    Iterable<ExecutableTrigger<W>> subTriggers();
+
+    /**
+     * Access the executable version of the specified sub-trigger.
      */
-    TriggerContext<W> forChild(int childIndex);
+    ExecutableTrigger<W> subTrigger(int subtriggerIndex);
+
+    /**
+     * Returns true if the given trigger index corresponds to the current trigger.
+     */
+    boolean isCurrentTrigger(int triggerIndex);
+
+    /**
+     * Returns the sub-trigger of the current trigger that is the next step towards the destination.
+     */
+    ExecutableTrigger<W> nextStepTowards(int destinationIndex);
+
+    /**
+     * Returns true if the current trigger is marked finished.
+     */
+    boolean isFinished();
+
+    /**
+     * Returns true if all the sub-triggers of the current trigger are marked finished.
+     */
+    boolean areAllSubtriggersFinished();
+
+    /**
+     * Returns an iterable over the unfinished sub-triggers of the current trigger.
+     */
+    Iterable<ExecutableTrigger<W>> unfinishedSubTriggers();
+
+    /**
+     * Returns the first unfinished sub-trigger.
+     */
+    ExecutableTrigger<W> firstUnfinishedSubTrigger();
+
+    /**
+     * Clears all keyed state for triggers in the current sub-tree and unsets all the associated
+     * finished bits.
+     */
+    void resetTree(W window) throws Exception;
+
+    /**
+     * Sets the finished bit for the current trigger.
+     */
+    void setFinished(boolean finished);
+  }
+
+  @Nullable
+  protected final List<Trigger<W>> subTriggers;
+
+  protected Trigger(@Nullable List<Trigger<W>> subTriggers) {
+    this.subTriggers = subTriggers;
   }
 
   /**
@@ -222,11 +332,11 @@ public interface TriggerContext<W extends BoundedWindow>  {
    * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
    *            {@code OnElementEvent}
    */
-  public static class OnElementEvent<W> {
-    private Object value;
-    private Instant timestamp;
-    private W window;
-    private WindowStatus status;
+  public static class OnElementEvent<W extends BoundedWindow> {
+    private final Object value;
+    private final Instant timestamp;
+    private final W window;
+    private final WindowStatus status;
 
     public OnElementEvent(Object value, Instant timestamp, W window, WindowStatus status) {
       this.value = value;
@@ -279,13 +389,15 @@ public abstract TriggerResult onElement(
    * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
    *            {@code OnMergeEvent}
    */
-  public static class OnMergeEvent<W> {
-    private Iterable<W> oldWindows;
-    private W newWindow;
+  public static class OnMergeEvent<W extends BoundedWindow> {
+    private final Iterable<W> oldWindows;
+    private final W newWindow;
+    private final Map<W, BitSet> finishedSets;
 
-    public OnMergeEvent(Iterable<W> oldWindows, W newWindow) {
+    public OnMergeEvent(Iterable<W> oldWindows, W newWindow, Map<W, BitSet> finishedSets) {
       this.oldWindows = oldWindows;
       this.newWindow = newWindow;
+      this.finishedSets = finishedSets;
     }
 
     /**
@@ -301,21 +413,54 @@ public Iterable<W> oldWindows() {
     public W newWindow() {
       return newWindow;
     }
+
+    /** Return true if the trigger is finished in any window being merged. */
+    public boolean finishedInAnyMergingWindow(ExecutableTrigger<W> trigger) {
+      for (BitSet bitSet : finishedSets.values()) {
+        if (bitSet.get(trigger.getTriggerIndex())) {
+          return true;
+        }
+      }
+      return false;
+    }
+
+    /** Return true if the trigger is finished in all the windows being merged. */
+    public boolean finishedInAllMergingWindows(ExecutableTrigger<W> trigger) {
+      for (BitSet bitSet : finishedSets.values()) {
+        if (!bitSet.get(trigger.getTriggerIndex())) {
+          return false;
+        }
+      }
+      return true;
+    }
+
+    /** Return the merging windows in which the trigger is finished. */
+    public Iterable<W> getFinishedMergingWindows(final ExecutableTrigger<W> trigger) {
+      return Maps.filterValues(finishedSets, new Predicate<BitSet>() {
+        @Override
+        public boolean apply(BitSet input) {
+          return input.get(trigger.getTriggerIndex());
+        }
+      }).keySet();
+    }
   }
 
   /**
    * Called immediately after windows have been merged.
    *
-   * <p>This will only be called if the trigger hasn't finished in any of the {@code oldWindows}.
-   * If it had finished, we assume that it is also finished in the resulting window.
+   * <p> Leaf triggers should determine their result by inspecting their status and any state
+   * in the merging windows. Composite triggers should determine their result by calling
+   * {@link ExecutableTrigger#invokeMerge} on their sub-triggers, and applying appropriate logic.
+   *
+   * <p> A trigger can only return {@link MergeResult#FINISHED} if it is marked as finished in
+   * at least one of the windows being merged.
    *
    * <p>The implementation does not need to clear out any state associated with the old windows.
-   * That will automatically be done by the trigger execution layer.
    *
    * @param c the context to interact with
    * @param e an event describnig the cause of this callback being executed
    */
-  public abstract TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception;
+  public abstract MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception;
 
   /**
    * Details about an invocation of {@link Trigger#onTimer}.
@@ -325,7 +470,7 @@ public W newWindow() {
    */
   public static class OnTimerEvent<W extends BoundedWindow> {
 
-    private TriggerId<W> triggerId;
+    private final TriggerId<W> triggerId;
 
     public OnTimerEvent(TriggerId<W> triggerId) {
       this.triggerId = triggerId;
@@ -335,30 +480,8 @@ public W window() {
       return triggerId.window;
     }
 
-    public TriggerId<W> triggerId() {
-      return triggerId;
-    }
-
-    /**
-     * Remove the outer layer from the path to the desired timer. This produces an
-     * {@code OnTimerEvent} suitable for passing to a subtrigger.
-     */
-    public OnTimerEvent<W> withoutOuterTrigger() {
-      return new OnTimerEvent<W>(triggerId.withoutOuterTrigger());
-    }
-
-    /**
-     * Return the index of the child this trigger ID is for.
-     */
-    public int getChildIndex() {
-      return triggerId.getPath().iterator().next();
-    }
-
-    /**
-     * Return true if the timer event is for the current layer.
-     */
-    public boolean isForCurrentLayer() {
-      return !triggerId.getPath().iterator().hasNext();
+    public int getDestinationIndex() {
+      return triggerId.getTriggerIdx();
     }
   }
 
@@ -380,13 +503,16 @@ public boolean isForCurrentLayer() {
    * @param c the context to interact with
    * @param window the window that is being cleared
    */
-  public abstract void clear(TriggerContext<W> c, W window) throws Exception;
+  public void clear(TriggerContext<W> c, W window) throws Exception {
+    if (subTriggers != null) {
+      for (ExecutableTrigger<W> trigger : c.subTriggers()) {
+        trigger.invokeClear(c, window);
+      }
+    }
+  }
 
-  /**
-   * Return true if the trigger is guaranteed to never finish.
-   */
-  public boolean willNeverFinish() {
-    return false;
+  public Iterable<Trigger<W>> subTriggers() {
+    return subTriggers;
   }
 
   /**
@@ -402,37 +528,50 @@ public boolean willNeverFinish() {
   /**
    * Returns whether this performs the same triggering as the given {@code Trigger}.
    */
-  public abstract boolean isCompatible(Trigger<?> other);
+  public boolean isCompatible(Trigger<?> other) {
+    if (!getClass().equals(other.getClass())) {
+      return false;
+    }
+
+    if (subTriggers == null) {
+      return other.subTriggers == null;
+    } else if (other.subTriggers == null) {
+      return false;
+    } else if (subTriggers.size() != other.subTriggers.size()) {
+      return false;
+    }
+
+    for (int i = 0; i < subTriggers.size(); i++) {
+      if (!subTriggers.get(i).isCompatible(other.subTriggers.get(i))) {
+        return false;
+      }
+    }
+
+    return true;
+  }
 
   /**
-   * Identifies a unique {@link Trigger} instance, by the window it is in
-   * and the path through the {@link Trigger} tree.
+   * Identifies a unique {@link Trigger} instance, by the window it is in and the identifier of the
+   * trigger within the trigger tree.
    *
    * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
    *            {@code TriggerId}
    */
   public static class TriggerId<W extends BoundedWindow> {
     private final W window;
-    private final List<Integer> subTriggers;
+    private final int triggerId;
 
-    public TriggerId(W window, List<Integer> subTriggers) {
+    public TriggerId(W window, int triggerId) {
       this.window = window;
-      this.subTriggers = subTriggers;
-    }
-
-    /**
-     * Return a trigger ID that is applicable for the sub-trigger.
-     */
-    public TriggerId<W> withoutOuterTrigger() {
-      return new TriggerId<>(window, subTriggers.subList(1, subTriggers.size()));
+      this.triggerId = triggerId;
     }
 
     public W window() {
       return window;
     }
 
-    public Iterable<Integer> getPath() {
-      return subTriggers;
+    public int getTriggerIdx() {
+      return triggerId;
     }
   }
 
@@ -463,13 +602,15 @@ public Trigger<W> orFinally(OnceTrigger<W> until) {
    * {@link Trigger}s that are guaranteed to fire at most once should extend from this, rather
    * than the general {@link Trigger} class to indicate that behavior.
    *
-   * <p> TODO: Add checks that an AtMostOnceTrigger never returns TriggerResult.FIRE.
-   *
    * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
    *            {@code AtMostOnceTrigger}
    */
   public abstract static class OnceTrigger<W extends BoundedWindow> extends Trigger<W> {
     private static final long serialVersionUID = 0L;
+
+    protected OnceTrigger(List<Trigger<W>> subTriggers) {
+      super(subTriggers);
+    }
   }
 
   /**
@@ -479,79 +620,56 @@ public abstract static class OnceTrigger<W extends BoundedWindow> extends Trigge
 
     private static final int ACTUAL = 0;
     private static final int UNTIL = 1;
-    private static final long serialVersionUID = 0L;
 
-    private Trigger<W> actual;
-    private OnceTrigger<W> until;
+    private static final long serialVersionUID = 0L;
 
     @VisibleForTesting OrFinallyTrigger(Trigger<W> actual, OnceTrigger<W> until) {
-      this.actual = actual;
-      this.until = until;
+      super(Arrays.asList(actual, until));
     }
 
     @Override
     public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
-      TriggerResult untilResult = until.onElement(c.forChild(UNTIL), e);
+      TriggerResult untilResult = c.subTrigger(UNTIL).invokeElement(c, e);
       if (untilResult != TriggerResult.CONTINUE) {
         return TriggerResult.FIRE_AND_FINISH;
       }
 
-      return actual.onElement(c.forChild(ACTUAL), e);
+      return c.subTrigger(ACTUAL).invokeElement(c, e);
     }
 
     @Override
-    public TriggerResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
-      TriggerResult untilResult = until.onMerge(c.forChild(UNTIL), e);
-      if (untilResult != TriggerResult.CONTINUE) {
-        return TriggerResult.FIRE_AND_FINISH;
+    public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+      MergeResult untilResult = c.subTrigger(UNTIL).invokeMerge(c, e);
+      if (untilResult == MergeResult.ALREADY_FINISHED) {
+        return MergeResult.ALREADY_FINISHED;
+      } else if (untilResult.isFire()) {
+        return MergeResult.FIRE_AND_FINISH;
+      } else {
+        // was CONTINUE -- so merge the underlying trigger
+        return c.subTrigger(ACTUAL).invokeMerge(c, e);
       }
-
-      return actual.onMerge(c.forChild(ACTUAL), e);
     }
 
     @Override
     public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
-      if (e.isForCurrentLayer()) {
-        throw new IllegalStateException("Until shouldn't receive any timers.");
-      } else if (e.getChildIndex() == ACTUAL) {
-        return actual.onTimer(c.forChild(ACTUAL), e.withoutOuterTrigger());
-      } else {
-        if (until.onTimer(c.forChild(UNTIL), e.withoutOuterTrigger()) != TriggerResult.CONTINUE) {
-          return TriggerResult.FIRE_AND_FINISH;
-        }
+      if (c.isCurrentTrigger(e.getDestinationIndex())) {
+        throw new IllegalStateException("OrFinally shouldn't receive any timers.");
       }
 
-      return TriggerResult.CONTINUE;
-    }
-
-    @Override
-    public void clear(TriggerContext<W> c, W window) throws Exception {
-      actual.clear(c.forChild(ACTUAL), window);
-      until.clear(c.forChild(UNTIL), window);
-    }
-
-    @Override
-    public boolean willNeverFinish() {
-      return false;
+      ExecutableTrigger<W> destination = c.nextStepTowards(e.getDestinationIndex());
+      TriggerResult result = destination.invokeTimer(c, e);
+      if (destination == c.subTrigger(UNTIL) && result.isFire()) {
+        return TriggerResult.FIRE_AND_FINISH;
+      }
+      return result;
     }
 
     @Override
     public Instant getWatermarkCutoff(W window) {
       // This trigger fires once either the trigger or the until trigger fires.
-      Instant actualDeadline = actual.getWatermarkCutoff(window);
-      Instant untilDeadline = until.getWatermarkCutoff(window);
+      Instant actualDeadline = subTriggers.get(ACTUAL).getWatermarkCutoff(window);
+      Instant untilDeadline = subTriggers.get(UNTIL).getWatermarkCutoff(window);
       return actualDeadline.isBefore(untilDeadline) ? actualDeadline : untilDeadline;
     }
-
-    @Override
-    public boolean isCompatible(Trigger<?> other) {
-      if (!(other instanceof OrFinallyTrigger)) {
-        return false;
-      }
-
-      OrFinallyTrigger<?> that = (OrFinallyTrigger<?>) other;
-      return actual.isCompatible(that.actual)
-          && until.isCompatible(that.until);
-    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index f3bfe3fcb8eb9..18e8fd61d260e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
@@ -214,7 +215,8 @@ public <T> Bound<T> into(WindowFn<? super T, ?> fn) {
     WindowFn<? super T, W> typedFn = (WindowFn<? super T, W>) fn;
     @SuppressWarnings("unchecked")
     Trigger<W> typedTrigger = (Trigger<W>) trigger;
-    return WindowingStrategy.of(typedFn, typedTrigger);
+
+    return WindowingStrategy.of(typedFn, ExecutableTrigger.create(typedTrigger));
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
new file mode 100644
index 0000000000000..c94da715a1ca8
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.common.base.Preconditions;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A wrapper around a trigger used during execution. While an actual trigger may appear multiple
+ * times (both in the same trigger expression and in other trigger expressions), the
+ * {@code ExecutableTrigger} wrapped around them forms a tree (only one occurrence).
+ *
+ * @param <W> {@link BoundedWindow} subclass used to represent the windows used.
+ */
+public class ExecutableTrigger<W extends BoundedWindow> implements Serializable {
+
+  private static final long serialVersionUID = 0L;
+
+  /** Store the index assigned to this trigger. */
+  private final int triggerIndex;
+  private final int firstIndexAfterSubtree;
+  private final List<ExecutableTrigger<W>> subTriggers = new ArrayList<>();
+  private final Trigger<W> trigger;
+
+  public static <W extends BoundedWindow> ExecutableTrigger<W> create(Trigger<W> trigger) {
+    return create(trigger, 0);
+  }
+
+  private static <W extends BoundedWindow> ExecutableTrigger<W> create(
+      Trigger<W> trigger, int nextUnusedIndex) {
+    if (trigger instanceof OnceTrigger) {
+      return new ExecutableOnceTrigger<W>((OnceTrigger<W>) trigger, nextUnusedIndex);
+    } else {
+      return new ExecutableTrigger<W>(trigger, nextUnusedIndex);
+    }
+  }
+
+  public static <W extends BoundedWindow> ExecutableTrigger<W> createForOnceTrigger(
+      OnceTrigger<W> trigger, int nextUnusedIndex) {
+    return new ExecutableOnceTrigger<W>(trigger, nextUnusedIndex);
+  }
+
+  private ExecutableTrigger(Trigger<W> trigger, int nextUnusedIndex) {
+    this.trigger = trigger;
+    this.triggerIndex = nextUnusedIndex++;
+
+    if (trigger.subTriggers() != null) {
+      for (Trigger<W> subTrigger : trigger.subTriggers()) {
+        ExecutableTrigger<W> subExecutable = create(subTrigger, nextUnusedIndex);
+        subTriggers.add(subExecutable);
+        nextUnusedIndex = subExecutable.firstIndexAfterSubtree;
+      }
+    }
+    firstIndexAfterSubtree = nextUnusedIndex;
+  }
+
+  public List<ExecutableTrigger<W>> subTriggers() {
+    return subTriggers;
+  }
+
+  /**
+   * Return the underlying trigger specification corresponding to this {@code ExecutableTrigger}.
+   */
+  public Trigger<W> getSpec() {
+    return trigger;
+  }
+
+  public int getTriggerIndex() {
+    return triggerIndex;
+  }
+
+  public final int getFirstIndexAfterSubtree() {
+    return firstIndexAfterSubtree;
+  }
+
+  public boolean isCompatible(ExecutableTrigger<W> other) {
+    return trigger.isCompatible(other.trigger);
+  }
+
+  public ExecutableTrigger<W> getSubTriggerContaining(int index) {
+    Preconditions.checkNotNull(subTriggers);
+    Preconditions.checkState(index > triggerIndex && index < firstIndexAfterSubtree,
+        "Cannot find sub-trigger containing index not in this tree.");
+    ExecutableTrigger<W> previous = null;
+    for (ExecutableTrigger<W> subTrigger : subTriggers) {
+      if (index < subTrigger.triggerIndex) {
+        return previous;
+      }
+      previous = subTrigger;
+    }
+    return previous;
+  }
+
+  /**
+   * Invoke the {@link Trigger#onElement} method for this trigger, ensuring that the bits are
+   * properly updated if the trigger finishes.
+   */
+  public TriggerResult invokeElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
+    TriggerContext<W> subContext = c.forTrigger(this);
+    if (subContext.isFinished()) {
+      throw new IllegalStateException("Shouldn't invokeElement on finished triggers.");
+    }
+
+    Trigger.TriggerResult result = trigger.onElement(subContext, e);
+
+    if (result.isFinish()) {
+      subContext.setFinished(true);
+    }
+
+    return result;
+  }
+
+  /**
+   * Invoke the {@link Trigger#onTimer} method for this trigger, ensuring that the bits are properly
+   * updated if the trigger finishes.
+   */
+  public TriggerResult invokeTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+    TriggerContext<W> subContext = c.forTrigger(this);
+    if (subContext.isFinished()) {
+      throw new IllegalStateException("Shouldn't invokeTimer on finished triggers.");
+    }
+
+    Trigger.TriggerResult result = trigger.onTimer(subContext, e);
+    if (result.isFinish()) {
+      subContext.setFinished(true);
+    }
+    return result;
+  }
+
+  /**
+   * Invoke the {@link Trigger#onMerge} method for this trigger, ensuring that the bits are properly
+   * updated.
+   */
+  public MergeResult invokeMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+    Trigger.TriggerContext<W> subContext = c.forTrigger(this);
+    Trigger.MergeResult result = trigger.onMerge(subContext, e);
+    subContext.setFinished(result.isFinish());
+    return result;
+  }
+
+  /**
+   * Invoke clear for the current this trigger.
+   */
+  public void invokeClear(Trigger.TriggerContext<W> c, W window) throws Exception {
+    trigger.clear(c.forTrigger(this), window);
+  }
+
+  /**
+   * {@link ExecutableTrigger} that enforces the fact that the trigger should always FIRE_AND_FINISH
+   * and never just FIRE.
+   */
+  private static class ExecutableOnceTrigger<W extends BoundedWindow> extends ExecutableTrigger<W> {
+
+    private static final long serialVersionUID = 0L;
+
+    public ExecutableOnceTrigger(OnceTrigger<W> trigger, int nextUnusedIndex) {
+      super(trigger, nextUnusedIndex);
+    }
+
+    @Override
+    public TriggerResult invokeElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
+      TriggerResult result = super.invokeElement(c, e);
+      if (TriggerResult.FIRE.equals(result)) {
+        throw new IllegalStateException("TriggerResult.FIRE returned from once trigger");
+      }
+      return result;
+    }
+
+    @Override
+    public TriggerResult invokeTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+      TriggerResult result = super.invokeTimer(c, e);
+      if (TriggerResult.FIRE.equals(result)) {
+        throw new IllegalStateException("TriggerResult.FIRE returned from once trigger");
+      }
+      return result;
+    }
+
+    @Override
+    public MergeResult invokeMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+      MergeResult result = super.invokeMerge(c, e);
+      if (MergeResult.FIRE.equals(result)) {
+        throw new IllegalStateException("MergeResult.FIRE returned from once trigger");
+      }
+      return result;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index e8aa3f4b7ec10..8e16499c31a31 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -23,7 +23,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
@@ -54,7 +53,7 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
   public static <K, V, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W>
   createForIterable(WindowingStrategy<?, W> windowingStrategy, Coder<V> inputCoder) {
     if (windowingStrategy.getWindowFn() instanceof NonMergingWindowFn
-        && windowingStrategy.getTrigger() instanceof DefaultTrigger) {
+        && windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger) {
       return new GroupAlsoByWindowsViaIteratorsDoFn<K, V, W>();
     } else {
       return new GABWViaWindowSetDoFn<>(
@@ -83,7 +82,7 @@ private static class GABWViaWindowSetDoFn<K, InputT, OutputT, W extends BoundedW
 
     private WindowFn<Object, W> windowFn;
     private AbstractWindowSet.Factory<K, InputT, OutputT, W> windowSetFactory;
-    private Trigger<W> trigger;
+    private ExecutableTrigger<W> trigger;
 
     public GABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
         AbstractWindowSet.Factory<K, InputT, OutputT, W> factory) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
index 3d185fd0b7012..9897f35a8f6df 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
@@ -166,7 +166,7 @@ private Windmill.KeyedGetDataResponse getResponse(
     if (response.getDataCount() != 1
         || !response.getData(0).getComputationId().equals(computation)
         || response.getData(0).getDataCount() != 1) {
-      throw new IOException("Invalid data response, expected single computation and key.");
+      throw new IOException("Invalid data response, expected single computation and key");
     }
 
     Windmill.KeyedGetDataResponse keyResponse = response.getData(0).getData(0);
@@ -233,7 +233,7 @@ public SideInputCacheEntry call() throws Exception {
                     .build())
                 .setExistenceWatermarkDeadline(
                      TimeUnit.MILLISECONDS.toMicros(view.getWindowingStrategyInternal()
-                         .getTrigger()
+                         .getTrigger().getSpec()
                          .getWatermarkCutoff(window)
                          .getMillis()))
                 .build();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 14ce168d1c6ab..5a62ab7f84a40 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -62,7 +62,7 @@ StreamingGroupAlsoByWindowsDoFn<K, InputT, Iterable<InputT>, W> createForIterabl
     if (windowingStrategy.getWindowFn() instanceof PartitioningWindowFn
         // TODO: Characterize the other kinds of triggers that work with the
         // PartitioningBufferingWindowSet
-        && windowingStrategy.getTrigger() instanceof DefaultTrigger) {
+        && windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger) {
       return new StreamingGABWViaWindowSetDoFn<>(windowingStrategy,
           PartitionBufferingWindowSet.<K, InputT, W>factory(inputValueCoder));
     } else {
@@ -75,7 +75,7 @@ private static class StreamingGABWViaWindowSetDoFn<K, InputT, OutputT, W extends
   extends StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
     private final WindowFn<Object, W> windowFn;
     private Factory<K, InputT, OutputT, W> windowSetFactory;
-    private Trigger<W> trigger;
+    private ExecutableTrigger<W> trigger;
 
     public StreamingGABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
         AbstractWindowSet.Factory<K, InputT, OutputT, W> windowSetFactory) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
index b22ab606c0323..8d0e8fb997186 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
@@ -202,7 +202,6 @@ public void invokeProcessElement(WindowedValue<InputT> elem) {
       W window = (W) elem.getWindows().iterator().next();
 
       Set<Windmill.GlobalDataRequest> blocked = computeBlockedSideInputs(window);
-
       if (blocked == null) {
         fn.processElement(createProcessContext(elem));
       } else {
@@ -247,6 +246,7 @@ private Windmill.GlobalDataRequest buildGlobalDataRequest(
         .setExistenceWatermarkDeadline(
             TimeUnit.MILLISECONDS.toMicros(view.getWindowingStrategyInternal()
                 .getTrigger()
+                .getSpec()
                 .getWatermarkCutoff(sideInputWindow)
                 .getMillis()))
         .build();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 2181d2c1a7ad3..92e1f8222d10f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -16,16 +16,19 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.StandardCoder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PartitioningWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
@@ -39,11 +42,13 @@
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Functions;
-import com.google.common.base.Predicates;
+import com.google.common.base.Function;
+import com.google.common.base.Predicate;
+import com.google.common.base.Throwables;
 import com.google.common.collect.FluentIterable;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Maps;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Iterables;
 
 import org.joda.time.Instant;
 
@@ -52,10 +57,13 @@
 import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.BitSet;
 import java.util.Collection;
+import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 /**
  * Manages the execution of a trigger.
@@ -67,28 +75,14 @@
  */
 public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
 
-
-  /**
-   * Integer used to identify triggers that have been finished and marked the window closed.
-   */
-  private static final Integer FINISHED = Integer.valueOf(1);
-
-  /**
-   * Tag that holds {@code FINISHED} if the root trigger has closed the associated window.
-   */
-  private static final CodedTupleTag<Integer> IS_ROOT_FINISHED_TAG =
-      CodedTupleTag.of("finished-root", VarIntCoder.of());
-
-  private final Trigger<W> trigger;
+  private final ExecutableTrigger<W> trigger;
   private final WindowingInternals<?, KV<K, OutputT>> windowingInternals;
   private final AbstractWindowSet<K, InputT, OutputT, W> windowSet;
   private final WindowFn<Object, W> windowFn;
   private final TimerManager timerManager;
   private final KeyedState keyedState;
   private final MergeContext mergeContext;
-  private final TriggerContextImpl triggerContext;
   private final Coder<TriggerId<W>> triggerIdCoder;
-  private final boolean willNeverFinish;
   private final WatermarkHold watermarkHold;
 
   /**
@@ -119,7 +113,7 @@ public interface TimerManager {
   public TriggerExecutor(
       WindowFn<Object, W> windowFn,
       TimerManager timerManager,
-      Trigger<W> trigger,
+      ExecutableTrigger<W> trigger,
       KeyedState keyedState,
       WindowingInternals<?, KV<K, OutputT>> windowingInternals,
       AbstractWindowSet<K, InputT, OutputT, W> windowSet) {
@@ -130,72 +124,79 @@ public TriggerExecutor(
     this.windowSet = windowSet;
     this.timerManager = timerManager;
     this.mergeContext = new MergeContext();
-    this.triggerContext = new TriggerContextImpl();
     this.triggerIdCoder = new TriggerIdCoder<>(windowFn.windowCoder());
     this.watermarkHold = new WatermarkHold();
+  }
 
-    this.willNeverFinish = trigger.willNeverFinish();
+  private boolean isRootFinished(BitSet bitSet) {
+    return bitSet.get(0);
+  }
 
+  public CodedTupleTag<BitSet> finishedSetTag(W window) throws CoderException {
+    return CodedTupleTag.of(
+        CoderUtils.encodeToBase64(windowFn.windowCoder(), window) + "finished-set",
+        BitSetCoder.of());
   }
 
-  /**
-   * Determine if the root trigger is finished in the given window.
-   */
-  @VisibleForTesting boolean isRootFinished(W window) throws IOException {
-    return !willNeverFinish
-        && FINISHED.equals(triggerContext.lookup(IS_ROOT_FINISHED_TAG, window));
+  public CodedTupleTag<Instant> earliestElementTag(W window) throws CoderException {
+    return CodedTupleTag.of(
+        CoderUtils.encodeToBase64(windowFn.windowCoder(), window) + "earliest-element",
+        InstantCoder.of());
   }
 
-  /**
-   * The root is finished in a merged window if it was finished in any of the windows being merged.
-   */
-  private boolean isRootFinished(Iterable<W> windows) throws IOException {
-    if (willNeverFinish) {
-      return false;
-    }
+  private TriggerContext<W> context(BitSet finishedSet) {
+    return new TriggerContextImpl(finishedSet, trigger);
+  }
 
-    for (Integer isFinished : triggerContext.lookup(IS_ROOT_FINISHED_TAG, windows).values()) {
-      if (FINISHED.equals(isFinished)) {
-        return true;
-      }
+  @VisibleForTesting BitSet lookupFinishedSet(W window) throws IOException {
+    // TODO: If we know that no trigger in the tree will ever finish, we don't need to do the
+    // lookup. Right now, we special case this for the DefaultTrigger.
+    if (trigger.getSpec() instanceof DefaultTrigger) {
+      return new BitSet(1);
     }
-    return false;
+
+    BitSet finishedSet = keyedState.lookup(finishedSetTag(window));
+    return finishedSet == null ? new BitSet(trigger.getFirstIndexAfterSubtree()) : finishedSet;
   }
 
   /**
-   * The root is finished in a merged window if it was finished in any of the windows being merged.
+   * Issue a load for all the keyed state tags that we know we need for the given windows.
    */
-  private Map<W, Boolean> isRootFinishedInEachWindow(Iterable<W> windows) throws IOException {
-    if (willNeverFinish) {
-      return FluentIterable.from(windows).toMap(Functions.constant(false));
-    }
-
-    return Maps.transformValues(
-        triggerContext.lookup(IS_ROOT_FINISHED_TAG, windows),
-        Functions.forPredicate(Predicates.equalTo(FINISHED)));
+  private void warmUpCache(Iterable<W> windows) throws IOException {
+    // Prepare the cache by loading keyed state for all the given windows.
+    Set<CodedTupleTag<?>> tags = new HashSet<>();
+    for (W window : windows) {
+      tags.add(finishedSetTag(window));
+      tags.add(earliestElementTag(window));
+    }
+    keyedState.lookup(tags);
   }
 
   public void onElement(WindowedValue<InputT> value) throws Exception {
     @SuppressWarnings("unchecked")
     Collection<W> windows = (Collection<W>) value.getWindows();
 
-    for (Map.Entry<W, Boolean> entry : isRootFinishedInEachWindow(windows).entrySet()) {
-      if (entry.getValue()) {
+    warmUpCache(windows);
+
+    for (W window : windows) {
+      BitSet finishedSet = lookupFinishedSet(window);
+      if (isRootFinished(finishedSet)) {
         // If the trigger was already finished in that window, don't bother passing the element down
+        // TODO: Log the fact that we're discarding an element for a closed window.
         continue;
       }
 
-      W window = entry.getKey();
       WindowStatus status = windowSet.put(window, value.getValue());
-
       watermarkHold.updateHoldForElement(window, value.getTimestamp());
 
-      handleResult(trigger, window,
-          trigger.onElement(triggerContext,
-              new OnElementEvent<W>(value.getValue(), value.getTimestamp(), window, status)));
+      BitSet originalFinishedSet = (BitSet) finishedSet.clone();
+      OnElementEvent<W> e =
+          new OnElementEvent<W>(value.getValue(), value.getTimestamp(), window, status);
+      handleResult(trigger, window, originalFinishedSet, finishedSet,
+          trigger.invokeElement(context(finishedSet), e));
 
       if (WindowStatus.NEW.equals(status)) {
-        // Attempt to merge windows before continuing
+        // Attempt to merge windows before continuing.
         windowFn.mergeWindows(mergeContext);
       }
     }
@@ -204,14 +205,18 @@ public void onElement(WindowedValue<InputT> value) throws Exception {
   public void onTimer(String timerTag) throws Exception {
     TriggerId<W> triggerId = CoderUtils.decodeFromBase64(triggerIdCoder, timerTag);
     W window = triggerId.window();
+    BitSet finishedSet = lookupFinishedSet(window);
 
     // If we receive a timer for an already finished trigger tree, we can ignore it. Once the
     // trigger is finished, it has reached a terminal state, and the trigger shouldn't be allowed
     // to do anything.
-    if (isRootFinished(window)) {
+    if (isRootFinished(finishedSet)) {
+      // TODO: Add logging for this case since it means we failed to clean up the timer.
       return;
     }
 
+    BitSet originalFinishedSet = (BitSet) finishedSet.clone();
+
     // Attempt to merge windows before continuing; that may remove the current window from
     // consideration.
     windowFn.mergeWindows(mergeContext);
@@ -220,44 +225,56 @@ public void onTimer(String timerTag) throws Exception {
     // merge windows in a way that causes the timer to no longer be applicable. Otherwise, we
     // confirm that the window is still in the windowSet.
     if ((windowFn instanceof PartitioningWindowFn) || windowSet.contains(window)) {
-      handleResult(trigger, window,
-          trigger.onTimer(triggerContext, new OnTimerEvent<W>(triggerId)));
+      TriggerResult result = trigger.invokeTimer(
+          context(finishedSet), new OnTimerEvent<W>(triggerId));
+      handleResult(trigger, window, originalFinishedSet, finishedSet, result);
     }
   }
 
-  private void onMerge(Collection<W> toBeMerged, W mergeResult) throws Exception {
-    // If the root is finished in any of the windows to be merged, then it is finished in the result
-    // Merging cannot "wake up" a trigger. This case should never happen, because once finished the
-    // window should have been removed from the window set, so it shouldn't even be around as a
-    // source for merges. If it is, we don't bother merging and mark things finished.
-    boolean isFinished = isRootFinished(toBeMerged);
+  private OnMergeEvent<W> createMergeEvent(Collection<W> toBeMerged, W resultWindow)
+      throws IOException {
+    warmUpCache(
+        toBeMerged.contains(resultWindow)
+        ? toBeMerged
+        : ImmutableSet.<W>builder().addAll(toBeMerged).add(resultWindow).build());
+    ImmutableMap.Builder<W, BitSet> finishedSets = ImmutableMap.builder();
+    for (W window : toBeMerged) {
+      finishedSets.put(window, lookupFinishedSet(window));
+    }
 
-    if (!isFinished) {
-      watermarkHold.updateHoldForMerge(toBeMerged, mergeResult);
+    return new OnMergeEvent<W>(toBeMerged, resultWindow, finishedSets.build());
+  }
 
-      // If the root wasn't finished in any of the windows, then call the underlying merge and
-      // handle the result appropriately.
-      handleResult(trigger, mergeResult,
-          trigger.onMerge(triggerContext, new OnMergeEvent<W>(toBeMerged, mergeResult)));
-    } else {
-      // Otherwise, act like we were just told to finish in the resulting window.
-      handleResult(trigger, mergeResult, TriggerResult.FIRE_AND_FINISH);
+  private void onMerge(Collection<W> toBeMerged, W resultWindow) throws Exception {
+    OnMergeEvent<W> e = createMergeEvent(toBeMerged, resultWindow);
+    BitSet originalFinishedSet = lookupFinishedSet(resultWindow);
+    BitSet finishedSet = (BitSet) originalFinishedSet.clone();
+
+    TriggerContext<W> context = context(finishedSet);
+    MergeResult result = trigger.invokeMerge(context, e);
+    if (MergeResult.ALREADY_FINISHED.equals(result)) {
+      throw new IllegalStateException("Root trigger returned MergeResult.ALREADY_FINISHED.");
     }
 
-    // Before we finish, we can clean up the state associated with the trigger in the old windows
-    for (W window : toBeMerged) {
-      if (!mergeResult.equals(window)) {
-        trigger.clear(triggerContext, window);
-        if (!willNeverFinish) {
-          triggerContext.remove(IS_ROOT_FINISHED_TAG, window);
-        }
+    watermarkHold.updateHoldForMerge(toBeMerged, resultWindow);
+
+    // Commit the updated states
+    handleResult(
+        trigger, resultWindow, originalFinishedSet, finishedSet, result.getTriggerResult());
 
-        watermarkHold.clearHold(window);
+    // Before we finish, we can clean up the state associated with the trigger in the old windows
+    for (W windowBeingMerged : toBeMerged) {
+      if (!resultWindow.equals(windowBeingMerged)) {
+        trigger.invokeClear(context(lookupFinishedSet(windowBeingMerged)), windowBeingMerged);
+        keyedState.remove(finishedSetTag(windowBeingMerged));
+        watermarkHold.clearHold(windowBeingMerged);
       }
     }
   }
 
-  private void handleResult(Trigger<W> trigger, W window, TriggerResult result) throws Exception {
+  private void handleResult(
+      ExecutableTrigger<W> trigger, W window,
+      BitSet originalFinishedSet, BitSet finishedSet, TriggerResult result) throws Exception {
     if (result.isFire()) {
       emitWindow(window);
     }
@@ -271,12 +288,11 @@ private void handleResult(Trigger<W> trigger, W window, TriggerResult result) th
     // If the trigger is finished, we can clear out its state as long as we keep the
     // IS_ROOT_FINISHED bit.
     if (result.isFinish()) {
-      if (willNeverFinish) {
-        throw new RuntimeException("Trigger that shouldn't finish finished: " + trigger);
-      }
+      trigger.invokeClear(context(finishedSet), window);
+    }
 
-      triggerContext.store(IS_ROOT_FINISHED_TAG, window, FINISHED);
-      trigger.clear(triggerContext, window);
+    if (!finishedSet.equals(originalFinishedSet)) {
+      keyedState.store(finishedSetTag(window), finishedSet);
     }
   }
 
@@ -309,39 +325,64 @@ private void emitWindow(W window) throws Exception {
    * and holds up the watermark accordingly.
    */
   private class WatermarkHold {
-    /**
-     * Tag used to store the timestamp of the earliest element in the active pane.
-     */
-    private final CodedTupleTag<Instant> earliestElementTag =
-        CodedTupleTag.of("earliest-element", InstantCoder.of());
 
     public Instant lookupEarliestElement(W window) throws IOException {
-      return triggerContext.lookup(earliestElementTag, window);
+      return keyedState.lookup(earliestElementTag(window));
     }
 
     public void updateHoldForElement(W window, Instant timestamp) throws IOException {
-      // TODO: Combine the lookup of EARLIEST_ELEMENT_TAG with any lookups needed by the windowset.
-      Instant oldHold = triggerContext.lookup(earliestElementTag, window);
+      CodedTupleTag<Instant> earliestElementTag = earliestElementTag(window);
+      Instant oldHold = keyedState.lookup(earliestElementTag);
       if (oldHold == null || oldHold.isAfter(timestamp)) {
-        triggerContext.store(earliestElementTag, window, timestamp, timestamp);
+        windowingInternals.store(earliestElementTag(window), timestamp, timestamp);
       }
     }
 
     public void updateHoldForMerge(Iterable<W> oldWindows, W newWindow) throws IOException {
       Instant earliestElement = BoundedWindow.TIMESTAMP_MAX_VALUE;
-      for (Instant old : triggerContext.lookup(earliestElementTag, oldWindows).values()) {
-        if (old.isBefore(earliestElement)) {
+      Iterable<Instant> instants = lookupKeyedState(
+          oldWindows, new Function<W, CodedTupleTag<Instant>>() {
+        @Override
+        public CodedTupleTag<Instant> apply(W window) {
+          try {
+            return earliestElementTag(window);
+          } catch (CoderException e) {
+            throw Throwables.propagate(e);
+          }
+        }
+      }).values();
+
+      for (Instant old : instants) {
+        if (old != null && old.isBefore(earliestElement)) {
           earliestElement = old;
         }
       }
-      triggerContext.store(earliestElementTag, newWindow, earliestElement, earliestElement);
+      windowingInternals.store(earliestElementTag(newWindow), earliestElement, earliestElement);
     }
 
     public void clearHold(W window) throws IOException {
-      triggerContext.remove(earliestElementTag, window);
+      keyedState.remove(earliestElementTag(window));
     }
   }
 
+  private <T> Map<W, T> lookupKeyedState(
+      Iterable<W> windows, Function<W, CodedTupleTag<T>> tagFn) throws IOException {
+    List<CodedTupleTag<T>> tags = new ArrayList<>();
+    for (W window : windows) {
+      tags.add(tagFn.apply(window));
+    }
+
+    CodedTupleTagMap tagMap = keyedState.lookup(tags);
+
+    Map<W, T> result = new LinkedHashMap<>();
+    int i = 0;
+    for (W window : windows) {
+      result.put(window, tagMap.get(tags.get(i++)));
+    }
+
+    return result;
+  }
+
   private class MergeContext extends WindowFn<Object, W>.MergeContext {
 
     @SuppressWarnings("cast")
@@ -363,18 +404,16 @@ public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
 
   private class TriggerContextImpl implements TriggerContext<W> {
 
-    private List<Integer> subTriggers;
-
-    private TriggerContextImpl() {
-      this.subTriggers = ImmutableList.of();
-    }
+    private final BitSet finishedSet;
+    private final ExecutableTrigger<W> trigger;
 
-    private TriggerContextImpl(List<Integer> subTriggers) {
-      this.subTriggers = subTriggers;
+    private TriggerContextImpl(BitSet finishedSet, ExecutableTrigger<W> trigger) {
+      this.finishedSet = finishedSet;
+      this.trigger = trigger;
     }
 
     private TriggerId<W> triggerId(W window) {
-      return new TriggerId<>(window, subTriggers);
+      return new TriggerId<>(window, trigger.getTriggerIndex());
     }
 
     private String triggerIdTag(W window) throws CoderException {
@@ -402,12 +441,6 @@ public <T> void store(CodedTupleTag<T> tag, W window, T value) throws IOExceptio
       keyedState.store(codedTriggerIdTag, value);
     }
 
-    private <T> void store(CodedTupleTag<T> tag, W window, T value, Instant timestamp)
-        throws IOException {
-      CodedTupleTag<T> codedTriggerIdTag = codedTriggerIdTag(tag, window);
-      windowingInternals.store(codedTriggerIdTag, value, timestamp);
-    }
-
     @Override
     public <T> void remove(CodedTupleTag<T> tag, W window) throws IOException {
       CodedTupleTag<T> codedTriggerIdTag = codedTriggerIdTag(tag, window);
@@ -420,32 +453,96 @@ public <T> T lookup(CodedTupleTag<T> tag, W window) throws IOException {
     }
 
     @Override
-    public <T> Map<W, T> lookup(CodedTupleTag<T> tag, Iterable<W> windows) throws IOException {
-      List<CodedTupleTag<T>> tags = new ArrayList<>();
-      for (W window : windows) {
-        tags.add(codedTriggerIdTag(tag, window));
-      }
+    public <T> Map<W, T> lookup(
+        final CodedTupleTag<T> tag, final Iterable<W> windows) throws IOException {
+      return lookupKeyedState(windows, new Function<W, CodedTupleTag<T>>() {
+        @Override
+        public CodedTupleTag<T> apply(W window) {
+          try {
+            return codedTriggerIdTag(tag, window);
+          } catch (CoderException e) {
+            throw Throwables.propagate(e);
+          }
+        }
+      });
+    }
 
-      CodedTupleTagMap tagMap = keyedState.lookup(tags);
+    @Override
+    public Instant currentProcessingTime() {
+      return timerManager.currentProcessingTime();
+    }
 
-      Map<W, T> result = new LinkedHashMap<>();
-      int i = 0;
-      for (W window : windows) {
-        result.put(window, tagMap.get(tags.get(i++)));
-      }
+    @Override
+    public TriggerContext<W> forTrigger(ExecutableTrigger<W> trigger) {
+      return new TriggerContextImpl(finishedSet, trigger);
+    }
 
-      return result;
+    @Override
+    public ExecutableTrigger<W> current() {
+      return trigger;
     }
 
     @Override
-    public Instant currentProcessingTime() {
-      return timerManager.currentProcessingTime();
+    public boolean isCurrentTrigger(int triggerIndex) {
+      return trigger.getTriggerIndex() == triggerIndex;
+    }
+
+    @Override
+    public ExecutableTrigger<W> nextStepTowards(int someTriggerIndex) {
+      return trigger.getSubTriggerContaining(someTriggerIndex);
+    }
+
+    @Override
+    public Iterable<ExecutableTrigger<W>> subTriggers() {
+      return trigger.subTriggers();
+    }
+
+    @Override
+    public ExecutableTrigger<W> subTrigger(int subtriggerIndex) {
+      return trigger.subTriggers().get(subtriggerIndex);
+    }
+
+    @Override
+    public boolean isFinished() {
+      return finishedSet.get(trigger.getTriggerIndex());
+    }
+
+    @Override
+    public boolean areAllSubtriggersFinished() {
+      return Iterables.isEmpty(unfinishedSubTriggers());
+    }
+
+    @Override
+    public Iterable<ExecutableTrigger<W>> unfinishedSubTriggers() {
+      return FluentIterable
+          .from(trigger.subTriggers())
+          .filter(new Predicate<ExecutableTrigger<W>>() {
+            @Override
+            public boolean apply(ExecutableTrigger<W> input) {
+              return !finishedSet.get(input.getTriggerIndex());
+            }
+          });
+    }
+
+    @Override
+    public ExecutableTrigger<W> firstUnfinishedSubTrigger() {
+      for (ExecutableTrigger<W> subTrigger : trigger.subTriggers()) {
+        if (!finishedSet.get(subTrigger.getTriggerIndex())) {
+          return subTrigger;
+        }
+      }
+      return null;
     }
 
     @Override
-    public TriggerContext<W> forChild(int childIndex) {
-      return new TriggerContextImpl(
-          ImmutableList.<Integer>builder().addAll(subTriggers).add(childIndex).build());
+    public void resetTree(W window) throws Exception {
+      finishedSet.clear(trigger.getTriggerIndex(), trigger.getFirstIndexAfterSubtree());
+      trigger.invokeClear(this, window);
+    }
+
+    @Override
+    public void setFinished(boolean finished) {
+      finishedSet.set(trigger.getTriggerIndex(), finished);
     }
   }
 
@@ -456,8 +553,8 @@ public static class TriggerIdCoder<W extends BoundedWindow> extends StandardCode
 
     private static final long serialVersionUID = 1L;
 
-    private transient Coder<Iterable<Integer>> pathCoder = IterableCoder.of(VarIntCoder.of());
     private final Coder<W> windowCoder;
+    private transient Coder<Integer> triggerIdxCoder = VarIntCoder.of();
 
     public TriggerIdCoder(Coder<W> windowCoder) {
       this.windowCoder = windowCoder;
@@ -467,15 +564,15 @@ public TriggerIdCoder(Coder<W> windowCoder) {
     public void encode(TriggerId<W> triggerId, OutputStream outStream, Context context)
         throws CoderException, IOException {
       windowCoder.encode(triggerId.window(), outStream, context);
-      pathCoder.encode(triggerId.getPath(), outStream, context);
+      triggerIdxCoder.encode(triggerId.getTriggerIdx(), outStream, context);
     }
 
     @Override
     public TriggerId<W> decode(InputStream inStream, Context context)
         throws CoderException, IOException {
       W window = windowCoder.decode(inStream, context);
-      List<Integer> path = ImmutableList.copyOf(pathCoder.decode(inStream, context));
-      return new TriggerId<>(window, path);
+      Integer triggerIdx = triggerIdxCoder.decode(inStream, context);
+      return new TriggerId<>(window, triggerIdx);
     }
 
     @Override
@@ -488,4 +585,40 @@ public List<? extends Coder<?>> getCoderArguments() {
       return Arrays.asList(windowCoder);
     }
   }
+
+  /**
+   * Coder for the BitSet used to track child-trigger finished states.
+   */
+  protected static class BitSetCoder extends AtomicCoder<BitSet> {
+
+    private static final BitSetCoder INSTANCE = new BitSetCoder();
+    private static final long serialVersionUID = 1L;
+
+    private transient Coder<byte[]> byteArrayCoder = ByteArrayCoder.of();
+
+    private BitSetCoder() {}
+
+    public static BitSetCoder of() {
+      return INSTANCE;
+    }
+
+    @Override
+    public void encode(BitSet value, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+      byteArrayCoder.encode(value.toByteArray(), outStream, context);
+    }
+
+    @Override
+    public BitSet decode(InputStream inStream, Context context)
+        throws CoderException, IOException {
+      return BitSet.valueOf(byteArrayCoder.decode(inStream, context));
+    }
+
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException {
+      verifyDeterministic(
+          "SubTriggerExecutor.BitSetCoder requires its byteArrayCoder to be deterministic.",
+          byteArrayCoder);
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index d2902f30d734b..3ee7156b71864 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -27,7 +27,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.TriggerExecutor.TriggerIdCoder;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -77,12 +76,9 @@ public class TriggerTester<InputT, OutputT, W extends BoundedWindow> {
 
   private WindowFn<Object, W> windowFn;
   private StubContexts stubContexts;
-
   private static final String KEY = "TEST_KEY";
-
   private boolean logInteractions = false;
-
-  private TriggerIdCoder<W> triggerIdCoder;
+  private ExecutableTrigger<W> executableTrigger;
 
   private void logInteraction(String fmt, Object... args) {
     if (logInteractions) {
@@ -122,9 +118,13 @@ private TriggerTester(
     this.stubContexts = new StubContexts();
     AbstractWindowSet<String, InputT, OutputT, W> windowSet = windowSetFactory.create(
         KEY, windowFn.windowCoder(), stubContexts, stubContexts);
+    executableTrigger = ExecutableTrigger.create(trigger);
     this.triggerExecutor = new TriggerExecutor<>(
-        windowFn, timerManager, trigger, stubContexts, stubContexts, windowSet);
-    this.triggerIdCoder = new TriggerIdCoder<W>(windowFn.windowCoder());
+        windowFn, timerManager, executableTrigger, stubContexts, stubContexts, windowSet);
+  }
+
+  public ExecutableTrigger<W> getTrigger() {
+    return executableTrigger;
   }
 
   public void logInteractions(boolean logInteractions) {
@@ -132,7 +132,7 @@ public void logInteractions(boolean logInteractions) {
   }
 
   public boolean isDone(W window) throws IOException {
-    return triggerExecutor.isRootFinished(window);
+    return triggerExecutor.lookupFinishedSet(window).get(0);
   }
 
   /**
@@ -142,18 +142,8 @@ public Iterable<String> getKeyedStateInUse() {
     return stubContexts.getKeyedStateInUse();
   }
 
-  // TODO: Share the tag-mangling code with the TriggerExecutor.
-  public String rootFinished(W window) throws CoderException {
-    return "finished-root-"
-        + CoderUtils.encodeToBase64(triggerIdCoder,
-            new TriggerId<W>(window, Collections.<Integer>emptyList()));
-  }
-
-  public String subFinished(W window, Integer... path) throws CoderException {
-    List<Integer> pathList = new ArrayList<Integer>();
-    Collections.addAll(pathList, path);
-    return "finished-"
-        + CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<W>(window, pathList));
+  public String finishedSet(W window) throws CoderException {
+    return triggerExecutor.finishedSetTag(window).getId();
   }
 
   public String bufferTag(W window) throws IOException {
@@ -162,9 +152,7 @@ public String bufferTag(W window) throws IOException {
   }
 
   public String earliestElement(W window) throws CoderException {
-    return "earliest-element-"
-        + CoderUtils.encodeToBase64(triggerIdCoder,
-            new TriggerId<W>(window, Arrays.<Integer>asList()));
+    return triggerExecutor.earliestElementTag(window).getId();
   }
 
   /**
@@ -187,7 +175,9 @@ public WindowedValue<OutputT> apply(@Nullable WindowedValue<KV<String, OutputT>>
 
   /** Advance the watermark to the specified time, firing any timers that should fire. */
   public void advanceWatermark(Instant newWatermark) throws Exception {
-    Preconditions.checkState(!newWatermark.isBefore(watermark));
+    Preconditions.checkState(!newWatermark.isBefore(watermark),
+        "Cannot move watermark time backwards from %s to %s",
+        watermark.getMillis(), newWatermark.getMillis());
     logInteraction("Advancing watermark to %d", newWatermark.getMillis());
     watermark = newWatermark;
     timerManager.advanceWatermark(triggerExecutor, newWatermark);
@@ -196,7 +186,9 @@ public void advanceWatermark(Instant newWatermark) throws Exception {
   /** Advance the processing time to the specified time, firing any timers that should fire. */
   public void advanceProcessingTime(
       Instant newProcessingTime) throws Exception {
-    Preconditions.checkState(!newProcessingTime.isBefore(processingTime));
+    Preconditions.checkState(!newProcessingTime.isBefore(processingTime),
+        "Cannot move processing time backwards from %s to %s",
+        processingTime.getMillis(), newProcessingTime.getMillis());
     logInteraction("Advancing processing time to %d", newProcessingTime.getMillis());
     processingTime = newProcessingTime;
     timerManager.advanceProcessingTime(triggerExecutor, newProcessingTime);
@@ -211,9 +203,10 @@ public void injectElement(InputT value, Instant timestamp) throws Exception {
   }
 
   public void setTimer(
-      W window, Instant timestamp, TimeDomain domain, List<Integer> subTriggerPath)
+      W window, Instant timestamp, TimeDomain domain, ExecutableTrigger<W> trigger)
           throws CoderException {
-    triggerExecutor.setTimer(new TriggerId<W>(window, subTriggerPath), timestamp, domain);
+    triggerExecutor.setTimer(
+        new TriggerId<W>(window, trigger.getTriggerIndex()), timestamp, domain);
   }
 
   private class StubContexts
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
index 7a4682ab4824d..f36d5e1aecd24 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
@@ -41,9 +41,9 @@ public class WindowingStrategy<T, W extends BoundedWindow> implements Serializab
   private static final long serialVersionUID = 0L;
 
   private final WindowFn<T, W> windowFn;
-  private final Trigger<W> trigger;
+  private final ExecutableTrigger<W> trigger;
 
-  private WindowingStrategy(WindowFn<T, W> windowFn, Trigger<W> trigger) {
+  private WindowingStrategy(WindowFn<T, W> windowFn, ExecutableTrigger<W> trigger) {
     this.windowFn = windowFn;
     this.trigger = trigger;
   }
@@ -57,14 +57,15 @@ public static WindowingStrategy<Object, GlobalWindow> globalDefault() {
    * {@link DefaultTrigger}.
    */
   public static <T, W extends BoundedWindow> WindowingStrategy<T, W> of(WindowFn<T, W> windowFn) {
-    return of(windowFn, DefaultTrigger.<W>of());
+    DefaultTrigger<W> defaultTrigger = DefaultTrigger.of();
+    return of(windowFn, ExecutableTrigger.create(defaultTrigger));
   }
 
   /**
    * Create a {@code WindowingStrategy} for the given {@code windowFn} and {@code trigger}.
    */
   public static <T, W extends BoundedWindow> WindowingStrategy<T, W> of(
-      WindowFn<T, W> windowFn, Trigger<W> trigger) {
+      WindowFn<T, W> windowFn, ExecutableTrigger<W> trigger) {
     return new WindowingStrategy<>(windowFn, trigger);
   }
 
@@ -72,7 +73,7 @@ public WindowFn<T, W> getWindowFn() {
     return windowFn;
   }
 
-  public Trigger<W> getTrigger() {
+  public ExecutableTrigger<W> getTrigger() {
     return trigger;
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
index d78a374c45df1..a19bfa433e676 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
@@ -103,7 +103,7 @@ public void testCreateNormalParDoFn() throws Exception {
         normalParDoFn.fnFactory.createDoFnInfo().getWindowingStrategy().getWindowFn(),
         new IsInstanceOf(GlobalWindows.class));
     Assert.assertThat(
-        normalParDoFn.fnFactory.createDoFnInfo().getWindowingStrategy().getTrigger(),
+        normalParDoFn.fnFactory.createDoFnInfo().getWindowingStrategy().getTrigger().getSpec(),
         new IsInstanceOf(DefaultTrigger.class));
     TestDoFn actualTestDoFn = (TestDoFn) actualDoFn;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index c2e07a1b5abbf..4460849234377 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -122,7 +122,7 @@ public Windmill.GetWorkResponse getWork(Windmill.GetWorkRequest request) {
     public Windmill.GetDataResponse getData(Windmill.GetDataRequest request) {
       Windmill.GetDataResponse response = dataToOffer.poll();
       if (response == null) {
-        return Windmill.GetDataResponse.newBuilder().build();
+        response = Windmill.GetDataResponse.newBuilder().build();
       }
       return response;
     }
@@ -871,7 +871,7 @@ public void processElement(ProcessContext c) {
         "  data {" +
         "    key: \"key\"" +
         "    values {" +
-        "      tag: \"12:MergeWindowsearliest-element-gAAAAAAAAAAAAAAA\"" +
+        "      tag: \"12:MergeWindowsgAAAAAAAAAA=earliest-element\"" +
         "    }" +
         "  }" +
         "}"));
@@ -882,12 +882,12 @@ public void processElement(ProcessContext c) {
         "key: \"key\" " +
         "work_token: 0 " +
         "output_timers {" +
-        "  tag: \"gAAAAAAAAAAAAAAA\"" +
+        "  tag: \"gAAAAAAAAAAA\"" +
         "  timestamp: 999000" +
         "  type: WATERMARK" +
         "} " +
         "value_updates {" +
-        "  tag: \"12:MergeWindowsearliest-element-gAAAAAAAAAAAAAAA\"" +
+        "  tag: \"12:MergeWindowsgAAAAAAAAAA=earliest-element\"" +
         "  value {" +
         "    timestamp: 0" +
         "    data: \"\\200\\000\\000\\000\\000\\000\\000\\000\"" +
@@ -936,7 +936,7 @@ public void processElement(ProcessContext c) {
         "  data {" +
         "    key: \"key\"" +
         "    values {" +
-        "      tag: \"12:MergeWindowsearliest-element-gAAAAAAAAAAAAAAA\"" +
+        "      tag: \"12:MergeWindowsgAAAAAAAAAA=earliest-element\"" +
         "      value {" +
         "        timestamp: 0" +
         "        data: \"\\200\\000\\000\\000\\000\\000\\000\\000\"" +
@@ -961,7 +961,7 @@ public void processElement(ProcessContext c) {
         "  }" +
         "} " +
         "value_updates {" +
-        "  tag: \"12:MergeWindowsearliest-element-gAAAAAAAAAAAAAAA\"" +
+        "  tag: \"12:MergeWindowsgAAAAAAAAAA=earliest-element\"" +
         "  value {" +
         "    timestamp: 9223372036854775807" +
         "    data: \"\"" +
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index 9047ee100dc9a..90601e2823794 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -21,9 +21,9 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
@@ -31,8 +31,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
-import com.google.common.collect.ImmutableList;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -51,12 +51,17 @@
 public class AfterAllTest {
   @Mock private OnceTrigger<IntervalWindow> mockTrigger1;
   @Mock private OnceTrigger<IntervalWindow> mockTrigger2;
+  private ExecutableTrigger<IntervalWindow> executable1;
+  private ExecutableTrigger<IntervalWindow> executable2;
+
   private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
     tester = TriggerTester.buffering(windowFn, AfterAll.of(mockTrigger1, mockTrigger2));
+    executable1 = tester.getTrigger().subTriggers().get(0);
+    executable2 = tester.getTrigger().subTriggers().get(1);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
@@ -91,7 +96,7 @@ public void testOnElementT1FiresFirst() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
   }
 
   @Test
@@ -104,7 +109,7 @@ public void testOnElementT2FiresFirst() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
   }
 
   @SuppressWarnings("unchecked")
@@ -114,7 +119,7 @@ public void testOnTimerFire() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
 
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(0));
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executable1);
     when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
     tester.advanceWatermark(new Instant(12));
@@ -122,7 +127,7 @@ public void testOnTimerFire() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
   }
 
   @SuppressWarnings("unchecked")
@@ -132,7 +137,7 @@ public void testOnTimerFireAndFinish() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(1));
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executable2);
     when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
 
@@ -145,7 +150,7 @@ public void testOnTimerFireAndFinish() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
 
     assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
   }
 
   @Test
@@ -155,9 +160,13 @@ public void testOnMergeFires() throws Exception {
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
     injectElement(12, TriggerResult.FIRE_AND_FINISH, TriggerResult.CONTINUE);
 
+    when(mockTrigger1.onMerge(
+        isTriggerContext(), Mockito.<OnMergeEvent<IntervalWindow>>any()))
+        .thenReturn(MergeResult.ALREADY_FINISHED);
+
     when(mockTrigger2.onMerge(
         isTriggerContext(), Mockito.<OnMergeEvent<IntervalWindow>>any()))
-        .thenReturn(TriggerResult.FIRE_AND_FINISH);
+        .thenReturn(MergeResult.FIRE_AND_FINISH);
 
     // The arrival of this element should trigger merging.
     injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
@@ -166,12 +175,7 @@ public void testOnMergeFires() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
     assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
-        tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(22)))));
-
-    verify(mockTrigger1, Mockito.never())
-        .onMerge(
-            Mockito.<TriggerContext<IntervalWindow>>any(),
-            Mockito.<OnMergeEvent<IntervalWindow>>any());
+        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
   }
 
   @Test
@@ -186,4 +190,52 @@ public void testFireDeadline() throws Exception {
         AfterAll.of(AfterWatermark.pastEndOfWindow(), AfterPane.elementCountAtLeast(1))
             .getWatermarkCutoff(window));
   }
+
+  @Test
+  public void testAfterAllRealTriggersFixedWindow() throws Exception {
+    tester = TriggerTester.buffering(FixedWindows.of(Duration.millis(50)),
+        Repeatedly.<IntervalWindow>forever(
+            AfterAll.<IntervalWindow>of(
+                AfterPane.<IntervalWindow>elementCountAtLeast(5),
+                AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
+                    .plusDelayOf(Duration.millis(5)))));
+    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(50));
+
+    tester.advanceProcessingTime(new Instant(0));
+    // 6 elements -> after pane fires
+    tester.injectElement(0, new Instant(0));
+    tester.injectElement(1, new Instant(0));
+    tester.injectElement(2, new Instant(1));
+    tester.injectElement(3, new Instant(1));
+    tester.injectElement(4, new Instant(1));
+    tester.injectElement(5, new Instant(2));
+
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        Matchers.equalTo(tester.finishedSet(window)),
+        Matchers.equalTo(tester.bufferTag(window)),
+        Matchers.containsString("delayed-until"),
+        Matchers.containsString("elements-in-pane"),
+        Matchers.containsString("earliest-element")));
+    tester.advanceProcessingTime(new Instant(5));
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(0, 1, 2, 3, 4, 5), 0, 0, 50)));
+
+    // 4 elements, advance processing time, then deliver the last elem
+    tester.advanceProcessingTime(new Instant(15));
+    tester.injectElement(6, new Instant(2));
+    tester.injectElement(7, new Instant(3));
+    tester.injectElement(8, new Instant(4));
+    tester.injectElement(9, new Instant(5));
+    tester.advanceProcessingTime(new Instant(20));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    tester.injectElement(10, new Instant(6));
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(6, 7, 8, 9, 10), 2, 0, 50)));
+
+    assertFalse(tester.isDone(new IntervalWindow(new Instant(0), new Instant(50))));
+    // We're holding some finished bits for intermediate state in the AfterAll.
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(window)));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index b814db3628605..9724d5ec3de5a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -23,14 +23,15 @@
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
-import com.google.common.collect.ImmutableList;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -50,12 +51,15 @@ public class AfterEachTest {
 
   @Mock private Trigger<IntervalWindow> mockTrigger1;
   @Mock private Trigger<IntervalWindow> mockTrigger2;
+  private ExecutableTrigger<IntervalWindow> executable1;
+
   private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
     tester = TriggerTester.buffering(windowFn, AfterEach.inOrder(mockTrigger1, mockTrigger2));
+    executable1 = tester.getTrigger().subTriggers().get(0);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
@@ -98,7 +102,7 @@ public void testOnElementT1Fires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(4), 4, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
   }
 
   @Test
@@ -122,7 +126,7 @@ public void testOnTimerFire() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, null);
 
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(0));
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executable1);
     when(mockTrigger1.onTimer(isTriggerContext(), Mockito.isA(OnTimerEvent.class)))
         .thenReturn(TriggerResult.FIRE);
     tester.advanceWatermark(new Instant(12));
@@ -140,7 +144,7 @@ public void testOnTimerFinish() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, null);
 
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(0));
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executable1);
     when(mockTrigger1.onTimer(isTriggerContext(), Mockito.isA(OnTimerEvent.class)))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
 
@@ -152,7 +156,33 @@ public void testOnTimerFinish() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(2), 2, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
+  }
+
+  @Test
+  public void testOnMergeFinishes() throws Exception {
+    setUp(Sessions.withGapDuration(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    when(mockTrigger1.onMerge(
+        isTriggerContext(),
+        Mockito.<OnMergeEvent<IntervalWindow>>any()))
+        .thenReturn(MergeResult.ALREADY_FINISHED);
+
+    when(mockTrigger2.onMerge(
+        isTriggerContext(),
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.FIRE_AND_FINISH);
+
+    // The arrival of this element should trigger merging.
+    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
+    assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
   }
 
   @Test
@@ -164,7 +194,12 @@ public void testOnMergeFires() throws Exception {
 
     when(mockTrigger1.onMerge(
         isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.FIRE_AND_FINISH);
+        Mockito.<OnMergeEvent<IntervalWindow>>any()))
+        .thenReturn(MergeResult.ALREADY_FINISHED);
+
+    when(mockTrigger2.onMerge(
+        isTriggerContext(),
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.FIRE);
 
     // The arrival of this element should trigger merging.
     injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
@@ -173,7 +208,7 @@ public void testOnMergeFires() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
     assertFalse(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
-        tester.subFinished(new IntervalWindow(new Instant(1), new Instant(22)))));
+        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
   }
 
   @Test
@@ -188,4 +223,31 @@ public void testFireDeadline() throws Exception {
         AfterEach.inOrder(AfterPane.elementCountAtLeast(2), AfterWatermark.pastEndOfWindow())
             .getWatermarkCutoff(window));
   }
+
+  @Test
+  public void testSequenceRealTriggersFixedWindow() throws Exception {
+    tester = TriggerTester.buffering(FixedWindows.of(Duration.millis(50)),
+        AfterEach.<IntervalWindow>inOrder(
+            AfterPane.<IntervalWindow>elementCountAtLeast(5),
+            AfterPane.<IntervalWindow>elementCountAtLeast(5),
+            Repeatedly.<IntervalWindow>forever(AfterEach.inOrder(
+                AfterPane.<IntervalWindow>elementCountAtLeast(2),
+                AfterPane.<IntervalWindow>elementCountAtLeast(2)))
+                .orFinally(AfterPane.<IntervalWindow>elementCountAtLeast(7))));
+
+    // Inject a bunch of elements
+    for (int i = 0; i < 20; i++) {
+      tester.injectElement(i, new Instant(i));
+    }
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(0, 1, 2, 3, 4), 0, 0, 50),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(5, 6, 7, 8, 9), 5, 0, 50),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(10, 11), 10, 0, 50),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(12, 13), 12, 0, 50),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(14, 15), 14, 0, 50),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(16), 16, 0, 50)));
+    assertTrue(tester.isDone(new IntervalWindow(new Instant(0), new Instant(50))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+        tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(50)))));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index 5e8f2c4fe8f85..aeb83e8e92e2d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -18,10 +18,12 @@
 
 import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
@@ -29,8 +31,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
-import com.google.common.collect.ImmutableList;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -50,12 +52,17 @@ public class AfterFirstTest {
 
   @Mock private OnceTrigger<IntervalWindow> mockTrigger1;
   @Mock private OnceTrigger<IntervalWindow> mockTrigger2;
+  private ExecutableTrigger<IntervalWindow> executable1;
+  private ExecutableTrigger<IntervalWindow> executable2;
+
   private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
     tester = TriggerTester.buffering(windowFn, AfterFirst.of(mockTrigger1, mockTrigger2));
+    executable1 = tester.getTrigger().subTriggers().get(0);
+    executable2 = tester.getTrigger().subTriggers().get(1);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
@@ -85,11 +92,11 @@ public void testOnElementT1Fires() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    injectElement(2, TriggerResult.FIRE, TriggerResult.CONTINUE);
+    injectElement(2, TriggerResult.FIRE_AND_FINISH, TriggerResult.CONTINUE);
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
   }
 
   @Test
@@ -98,11 +105,11 @@ public void testOnElementT2Fires() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE);
+    injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
   }
 
   @SuppressWarnings("unchecked")
@@ -112,15 +119,15 @@ public void testOnTimerFire() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(0));
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executable1);
     when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
-        .thenReturn(TriggerResult.FIRE);
+        .thenReturn(TriggerResult.FIRE_AND_FINISH);
     tester.advanceWatermark(new Instant(12));
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
   }
 
   @SuppressWarnings("unchecked")
@@ -130,7 +137,7 @@ public void testOnTimerFinish() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(1));
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executable2);
     when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
 
@@ -139,7 +146,7 @@ public void testOnTimerFinish() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
 
     assertTrue(tester.isDone(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.rootFinished(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
   }
 
   @Test
@@ -151,11 +158,11 @@ public void testOnMergeFires() throws Exception {
 
     when(mockTrigger1.onMerge(
         isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.CONTINUE);
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.CONTINUE);
 
     when(mockTrigger2.onMerge(
         isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.FIRE);
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.FIRE_AND_FINISH);
 
     // The arrival of this element should trigger merging.
     injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
@@ -164,7 +171,7 @@ public void testOnMergeFires() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
     assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
-        tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(22)))));
+        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
   }
 
   @Test
@@ -179,4 +186,52 @@ public void testFireDeadline() throws Exception {
         AfterFirst.of(AfterPane.elementCountAtLeast(2), AfterPane.elementCountAtLeast(1))
             .getWatermarkCutoff(window));
   }
+
+  @Test
+  public void testAfterFirstRealTriggersFixedWindow() throws Exception {
+    tester = TriggerTester.buffering(FixedWindows.of(Duration.millis(50)),
+        Repeatedly.<IntervalWindow>forever(
+            AfterFirst.<IntervalWindow>of(
+                AfterPane.<IntervalWindow>elementCountAtLeast(5),
+                AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
+                    .plusDelayOf(Duration.millis(5)))));
+
+    tester.advanceProcessingTime(new Instant(0));
+    // 5 elements -> after pane fires
+    tester.injectElement(0, new Instant(0));
+    tester.injectElement(1, new Instant(0));
+    tester.injectElement(2, new Instant(1));
+    tester.injectElement(3, new Instant(1));
+    tester.injectElement(4, new Instant(1));
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(0, 1, 2, 3, 4), 0, 0, 50)));
+
+    // 4 elements, advance processing time to 5 (shouldn't fire yet), then advance it to 6
+    tester.advanceProcessingTime(new Instant(1));
+    tester.injectElement(5, new Instant(2));
+    tester.injectElement(6, new Instant(3));
+    tester.injectElement(7, new Instant(4));
+    tester.injectElement(8, new Instant(5));
+    tester.advanceProcessingTime(new Instant(5));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    tester.advanceProcessingTime(new Instant(6));
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(5, 6, 7, 8), 2, 0, 50)));
+
+    // Now, send in 5 more elements, and make sure they come out as a group. State should not
+    // be carried over.
+    tester.injectElement(9, new Instant(6));
+    tester.injectElement(10, new Instant(7));
+    tester.injectElement(11, new Instant(8));
+    tester.injectElement(12, new Instant(9));
+    tester.injectElement(13, new Instant(10));
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(9, 10, 11, 12, 13), 6, 0, 50)));
+    assertFalse(tester.isDone(new IntervalWindow(new Instant(0), new Instant(50))));
+    // Because none of the triggers every stay finished (we always immediately reset) there is no
+    // persisted keyed state.
+    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
index 539bf109ae9a5..36a15faff09ca 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
@@ -57,7 +57,7 @@ public void testAfterPaneWithGlobalWindowsAndCombining() throws Exception {
     assertTrue(tester.isDone(new IntervalWindow(new Instant(0), new Instant(10))));
     assertFalse(tester.isDone(new IntervalWindow(new Instant(10), new Instant(20))));
     assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.rootFinished(new IntervalWindow(new Instant(0), new Instant(10)))));
+        tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(10)))));
   }
 
   @Test
@@ -81,7 +81,7 @@ public void testAfterPaneWithFixedWindow() throws Exception {
     assertTrue(tester.isDone(new IntervalWindow(new Instant(0), new Instant(10))));
     assertFalse(tester.isDone(new IntervalWindow(new Instant(10), new Instant(20))));
     assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.rootFinished(new IntervalWindow(new Instant(0), new Instant(10)))));
+        tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(10)))));
   }
 
   @Test
@@ -107,8 +107,8 @@ public void testAfterPaneWithMerging() throws Exception {
 
     assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(12))));
     assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(12))),
-        tester.rootFinished(new IntervalWindow(new Instant(7), new Instant(18)))));
+        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(12))),
+        tester.finishedSet(new IntervalWindow(new Instant(7), new Instant(18)))));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
index ff231e68387fa..95d898a03d162 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
@@ -71,9 +71,9 @@ public void testAfterProcessingTimeWithFixedWindow() throws Exception {
         WindowMatchers.isSingleWindowedValue(Matchers.contains(5), 30, 30, 40)));
     assertTrue(tester.isDone(new IntervalWindow(new Instant(0), new Instant(10))));
     assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.rootFinished(new IntervalWindow(new Instant(0), new Instant(10))),
-        tester.rootFinished(new IntervalWindow(new Instant(10), new Instant(20))),
-        tester.rootFinished(new IntervalWindow(new Instant(30), new Instant(40)))));
+        tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(10))),
+        tester.finishedSet(new IntervalWindow(new Instant(10), new Instant(20))),
+        tester.finishedSet(new IntervalWindow(new Instant(30), new Instant(40)))));
   }
 
   @Test
@@ -104,8 +104,8 @@ public void testAfterProcessingTimeWithMergingWindowAlreadyFired() throws Except
 
     assertTrue(tester.isDone(new IntervalWindow(new Instant(2), new Instant(12))));
     assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(11))),
-        tester.rootFinished(new IntervalWindow(new Instant(2), new Instant(12)))));
+        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(11))),
+        tester.finishedSet(new IntervalWindow(new Instant(2), new Instant(12)))));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
index 06ecd830bde09..b159466ea3dd0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
@@ -61,7 +61,7 @@ public void testFirstInPaneWithFixedWindow() throws Exception {
     assertTrue(tester.isDone(new IntervalWindow(new Instant(0), new Instant(10))));
     assertFalse(tester.isDone(new IntervalWindow(new Instant(10), new Instant(20))));
     assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.rootFinished(new IntervalWindow(new Instant(0), new Instant(10)))));
+        tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(10)))));
   }
 
   @Test
@@ -88,8 +88,8 @@ public void testFirstInPaneWithMerging() throws Exception {
 
     assertTrue(tester.isDone(new IntervalWindow(new Instant(2), new Instant(12))));
     assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(11))),
-        tester.rootFinished(new IntervalWindow(new Instant(2), new Instant(12)))));
+        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(11))),
+        tester.finishedSet(new IntervalWindow(new Instant(2), new Instant(12)))));
   }
 
   @Test
@@ -116,7 +116,7 @@ public void testEndOfWindowFixedWindow() throws Exception {
     assertTrue(tester.isDone(new IntervalWindow(new Instant(0), new Instant(10))));
     assertFalse(tester.isDone(new IntervalWindow(new Instant(10), new Instant(20))));
     assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.rootFinished(new IntervalWindow(new Instant(0), new Instant(10)))));
+        tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(10)))));
   }
 
   @Test
@@ -143,8 +143,8 @@ public void testEndOfWindowWithMerging() throws Exception {
 
     assertTrue(tester.isDone(new IntervalWindow(new Instant(2), new Instant(12))));
     assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(11))),
-        tester.rootFinished(new IntervalWindow(new Instant(2), new Instant(12)))));
+        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(11))),
+        tester.finishedSet(new IntervalWindow(new Instant(2), new Instant(12)))));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index 7718b2d4872c9..97a2e7db606a2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -22,14 +22,15 @@
 import static org.junit.Assert.assertThat;
 import static org.mockito.Mockito.when;
 
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
-import com.google.common.collect.ImmutableList;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -47,6 +48,8 @@
 @RunWith(JUnit4.class)
 public class RepeatedlyTest {
   @Mock private Trigger<IntervalWindow> mockRepeated;
+  private ExecutableTrigger<IntervalWindow> executableRepeated;
+
   private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
@@ -54,6 +57,7 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
     Trigger<IntervalWindow> underTest = Repeatedly.forever(mockRepeated);
     tester = TriggerTester.buffering(windowFn, underTest);
+    executableRepeated = tester.getTrigger().subTriggers().get(0);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
@@ -97,28 +101,28 @@ public void testOnElementTimerFires() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE);
 
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(0));
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executableRepeated);
     when(mockRepeated.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE);
     tester.advanceWatermark(new Instant(12));
 
     injectElement(2, TriggerResult.CONTINUE);
 
-    tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, ImmutableList.of(0));
+    tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, executableRepeated);
     when(mockRepeated.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
     tester.advanceWatermark(new Instant(13));
 
     injectElement(3, TriggerResult.CONTINUE);
 
-    tester.setTimer(firstWindow, new Instant(13), TimeDomain.EVENT_TIME, ImmutableList.of(0));
+    tester.setTimer(firstWindow, new Instant(13), TimeDomain.EVENT_TIME, executableRepeated);
     when(mockRepeated.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.CONTINUE);
     tester.advanceWatermark(new Instant(14));
 
     injectElement(4, TriggerResult.CONTINUE);
 
-    tester.setTimer(firstWindow, new Instant(14), TimeDomain.EVENT_TIME, ImmutableList.of(0));
+    tester.setTimer(firstWindow, new Instant(14), TimeDomain.EVENT_TIME, executableRepeated);
     when(mockRepeated.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE);
     tester.advanceWatermark(new Instant(15));
@@ -140,7 +144,7 @@ public void testMerge() throws Exception {
 
     when(mockRepeated.onMerge(
         isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.FIRE_AND_FINISH);
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.FIRE_AND_FINISH);
 
     // The arrival of this element should trigger merging.
     injectElement(5, TriggerResult.CONTINUE);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
index 871e10717902b..e03ee2be69e01 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
@@ -23,6 +23,7 @@
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
@@ -31,8 +32,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
-import com.google.common.collect.ImmutableList;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -51,15 +52,19 @@
 public class TriggerTest {
   @Mock private Trigger<IntervalWindow> mockActual;
   @Mock private OnceTrigger<IntervalWindow> mockUntil;
+  private ExecutableTrigger<IntervalWindow> executableUntil;
+
   private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
+
     Trigger<IntervalWindow> underTest =
         new OrFinallyTrigger<IntervalWindow>(mockActual, mockUntil);
 
     tester = TriggerTester.buffering(windowFn, underTest);
+    executableUntil = tester.getTrigger().subTriggers().get(1);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
@@ -91,7 +96,7 @@ public void testOnElementActualFires() throws Exception {
     injectElement(2, TriggerResult.FIRE_AND_FINISH, TriggerResult.CONTINUE);
 
     // This should do nothing (we've already fired and finished)
-    injectElement(3, TriggerResult.FIRE, TriggerResult.FIRE);
+    injectElement(3, TriggerResult.FIRE, TriggerResult.FIRE_AND_FINISH);
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10),
@@ -99,7 +104,7 @@ public void testOnElementActualFires() throws Exception {
     assertTrue(tester.isDone(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
         // We're storing that the root trigger has finished.
-        tester.rootFinished(firstWindow)));
+        tester.finishedSet(firstWindow)));
   }
 
   @Test
@@ -107,14 +112,14 @@ public void testOnElementUntilFires() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE);
+    injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
         // We're storing that the root trigger has finished.
-        tester.rootFinished(firstWindow)));
+        tester.finishedSet(firstWindow)));
   }
 
   @Test
@@ -129,7 +134,7 @@ public void testOnElementUntilFiresAndFinishes() throws Exception {
     assertTrue(tester.isDone(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
         // We're storing that the root trigger has finished.
-        tester.rootFinished(firstWindow)));
+        tester.finishedSet(firstWindow)));
   }
 
   @Test
@@ -139,7 +144,7 @@ public void testOnTimerFiresWithUntil() throws Exception {
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     // Timer fires for until, which says continue
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(1));
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executableUntil);
     when(mockUntil.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.CONTINUE);
     tester.advanceWatermark(new Instant(12));
@@ -147,16 +152,16 @@ public void testOnTimerFiresWithUntil() throws Exception {
     injectElement(2, TriggerResult.FIRE, TriggerResult.CONTINUE);
 
     // Timer fires for until, which says fire, so we stop repeating.
-    tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, ImmutableList.of(1));
+    tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, executableUntil);
     when(mockUntil.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
-        .thenReturn(TriggerResult.FIRE);
+        .thenReturn(TriggerResult.FIRE_AND_FINISH);
     tester.advanceWatermark(new Instant(13));
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
-        tester.rootFinished(firstWindow)));
+        tester.finishedSet(firstWindow)));
   }
 
   @Test
@@ -166,7 +171,7 @@ public void testOnTimerFinishesUntil() throws Exception {
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     // Timer fires for until, which says continue
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, ImmutableList.of(1));
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executableUntil);
     when(mockUntil.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.CONTINUE);
     tester.advanceWatermark(new Instant(12));
@@ -176,16 +181,16 @@ public void testOnTimerFinishesUntil() throws Exception {
     injectElement(3, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     // Timer fires for until, which says FIRE, so we fire and finish
-    tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, ImmutableList.of(1));
+    tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, executableUntil);
     when(mockUntil.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
-        .thenReturn(TriggerResult.FIRE);
+        .thenReturn(TriggerResult.FIRE_AND_FINISH);
     tester.advanceWatermark(new Instant(13));
 
     // These timers shouldn't do anything -- at this point we've already finished
-    tester.setTimer(firstWindow, new Instant(13), TimeDomain.EVENT_TIME, ImmutableList.of(1));
+    tester.setTimer(firstWindow, new Instant(13), TimeDomain.EVENT_TIME, executableUntil);
     tester.advanceWatermark(new Instant(14));
 
-    tester.setTimer(firstWindow, new Instant(14), TimeDomain.EVENT_TIME, ImmutableList.of(0));
+    tester.setTimer(firstWindow, new Instant(14), TimeDomain.EVENT_TIME, executableUntil);
     tester.advanceWatermark(new Instant(15));
 
     assertThat(tester.extractOutput(), Matchers.contains(
@@ -193,7 +198,7 @@ public void testOnTimerFinishesUntil() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
-        tester.rootFinished(firstWindow)));
+        tester.finishedSet(firstWindow)));
   }
 
   @Test
@@ -205,11 +210,11 @@ public void testMergeActualFires() throws Exception {
 
     when(mockActual.onMerge(
         isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.FIRE);
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.FIRE);
 
     when(mockUntil.onMerge(
         isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.CONTINUE);
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.CONTINUE);
 
     // The arrival of this element should trigger merging.
     injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
@@ -229,11 +234,11 @@ public void testMergeUntilFires() throws Exception {
 
     when(mockActual.onMerge(
         isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.CONTINUE);
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.CONTINUE);
 
     when(mockUntil.onMerge(
         isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(TriggerResult.FIRE);
+        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.FIRE_AND_FINISH);
 
     // The arrival of this element should trigger merging.
     injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
@@ -244,7 +249,7 @@ public void testMergeUntilFires() throws Exception {
     assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
         // We're storing that the root has finished
-        tester.rootFinished(new IntervalWindow(new Instant(1), new Instant(22)))));
+        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
   }
 
   @Test
@@ -269,4 +274,60 @@ public void testFireDeadline() throws Exception {
         .orFinally(AfterPane.elementCountAtLeast(10))
         .getWatermarkCutoff(window));
   }
+
+  @Test
+  public void testOrFinallyRealTriggersFixedWindow() throws Exception {
+    // Test an orFinally with a composite trigger, and make sure it properly resets state, etc.
+    tester = TriggerTester.buffering(FixedWindows.of(Duration.millis(50)),
+        Repeatedly.<IntervalWindow>forever(
+            // This element count should never fire because the orFinally fires sooner, every time
+            AfterPane.<IntervalWindow>elementCountAtLeast(12)
+                .orFinally(AfterAll.<IntervalWindow>of(
+                    AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
+                        .plusDelayOf(Duration.millis(5)),
+                    AfterPane.<IntervalWindow>elementCountAtLeast(5)))));
+
+    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(50));
+
+    // First, fire processing time then the 5 element
+
+    tester.advanceProcessingTime(new Instant(0));
+    tester.injectElement(0, new Instant(0));
+    tester.injectElement(1, new Instant(0));
+    tester.injectElement(2, new Instant(1));
+    tester.injectElement(3, new Instant(1));
+    tester.advanceProcessingTime(new Instant(5));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        Matchers.equalTo(tester.finishedSet(window)),
+        Matchers.equalTo(tester.bufferTag(window)),
+        Matchers.containsString("delayed-until"),
+        Matchers.containsString("elements-in-pane"),
+        Matchers.containsString("elements-in-pane"),
+        Matchers.containsString("earliest-element")));
+
+    tester.injectElement(4, new Instant(1));
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(0, 1, 2, 3, 4), 0, 0, 50)));
+
+    // Then fire 6 new elements, then processing time
+    tester.injectElement(6, new Instant(2));
+    tester.injectElement(7, new Instant(3));
+    tester.injectElement(8, new Instant(4));
+    tester.injectElement(9, new Instant(5));
+    tester.injectElement(10, new Instant(2));
+    tester.injectElement(11, new Instant(3));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    tester.advanceProcessingTime(new Instant(15));
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(6, 7, 8, 9, 10, 11), 2, 0, 50)));
+
+    // Finally, fire 3 more elements and verify the base of the orFinally doesn't fire.
+    tester.injectElement(100, new Instant(1));
+    tester.injectElement(101, new Instant(1));
+    tester.injectElement(102, new Instant(1));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
new file mode 100644
index 0000000000000..47a3d867794e8
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertSame;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
+
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+
+/**
+ * Tests for {@link ExecutableTrigger}.
+ */
+@RunWith(JUnit4.class)
+public class ExecutableTriggerTest {
+
+  @Test
+  public void testIndexAssignmentLeaf() throws Exception {
+    StubTrigger t1 = new StubTrigger();
+    ExecutableTrigger<?> executable = ExecutableTrigger.create(t1);
+    assertEquals(0, executable.getTriggerIndex());
+  }
+
+  @Test
+  public void testIndexAssignmentOneLevel() throws Exception {
+    StubTrigger t1 = new StubTrigger();
+    StubTrigger t2 = new StubTrigger();
+    StubTrigger t = new StubTrigger(t1, t2);
+
+    ExecutableTrigger<?> executable = ExecutableTrigger.create(t);
+
+    assertEquals(0, executable.getTriggerIndex());
+    assertEquals(1, executable.subTriggers().get(0).getTriggerIndex());
+    assertSame(t1, executable.subTriggers().get(0).getSpec());
+    assertEquals(2, executable.subTriggers().get(1).getTriggerIndex());
+    assertSame(t2, executable.subTriggers().get(1).getSpec());
+  }
+
+  @Test
+  public void testIndexAssignmentTwoLevel() throws Exception {
+    StubTrigger t11 = new StubTrigger();
+    StubTrigger t12 = new StubTrigger();
+    StubTrigger t13 = new StubTrigger();
+    StubTrigger t14 = new StubTrigger();
+    StubTrigger t21 = new StubTrigger();
+    StubTrigger t22 = new StubTrigger();
+    StubTrigger t1 = new StubTrigger(t11, t12, t13, t14);
+    StubTrigger t2 = new StubTrigger(t21, t22);
+    StubTrigger t = new StubTrigger(t1, t2);
+
+    ExecutableTrigger<?> executable = ExecutableTrigger.create(t);
+
+    assertEquals(0, executable.getTriggerIndex());
+    assertEquals(1, executable.subTriggers().get(0).getTriggerIndex());
+    assertEquals(6, executable.subTriggers().get(0).getFirstIndexAfterSubtree());
+    assertEquals(6, executable.subTriggers().get(1).getTriggerIndex());
+
+    assertSame(t1, executable.getSubTriggerContaining(1).getSpec());
+    assertSame(t2, executable.getSubTriggerContaining(6).getSpec());
+    assertSame(t1, executable.getSubTriggerContaining(2).getSpec());
+    assertSame(t1, executable.getSubTriggerContaining(3).getSpec());
+    assertSame(t1, executable.getSubTriggerContaining(5).getSpec());
+    assertSame(t2, executable.getSubTriggerContaining(7).getSpec());
+  }
+
+
+  private static class StubTrigger extends Trigger<IntervalWindow> {
+
+    @SafeVarargs
+    protected StubTrigger(Trigger<IntervalWindow>... subTriggers) {
+      super(Arrays.asList(subTriggers));
+    }
+
+    private static final long serialVersionUID = 0L;
+
+    @Override
+    public TriggerResult onElement(
+        TriggerContext<IntervalWindow> c, OnElementEvent<IntervalWindow> e) throws Exception {
+      return TriggerResult.CONTINUE;
+    }
+
+    @Override
+    public MergeResult onMerge(TriggerContext<IntervalWindow> c, OnMergeEvent<IntervalWindow> e)
+        throws Exception {
+      return MergeResult.CONTINUE;
+    }
+
+    @Override
+    public TriggerResult onTimer(
+        TriggerContext<IntervalWindow> c,
+        OnTimerEvent<IntervalWindow> e)
+        throws Exception {
+      return TriggerResult.CONTINUE;
+    }
+
+    @Override
+    public void clear(
+        TriggerContext<IntervalWindow> c,
+        IntervalWindow window) throws Exception {
+    }
+
+    @Override
+    public Instant getWatermarkCutoff(IntervalWindow window) {
+      return BoundedWindow.TIMESTAMP_MAX_VALUE;
+    }
+
+    @Override
+    public boolean isCompatible(Trigger<?> other) {
+      return false;
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index a62a459e67577..d4dadbab32790 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -47,7 +47,6 @@
 
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
 import java.util.List;
 
 /** Unit tests for {@link StreamingGroupAlsoByWindowsDoFn}. */
@@ -116,14 +115,12 @@ public void deleteTimer(String tag, Trigger.TimeDomain domain) {}
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
-                (IntervalWindow) window(0, 10), Collections.<Integer>emptyList())),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(0, 10), 0)),
             new Instant(9), "k")));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
-                (IntervalWindow) window(10, 20), Collections.<Integer>emptyList())),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(10, 20), 0)),
             new Instant(19), "k")));
 
     runner.finishBundle();
@@ -137,13 +134,13 @@ public void deleteTimer(String tag, Trigger.TimeDomain domain) {}
     assertEquals("k", item0.getValue().getKey());
     assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1", "v2"));
     assertEquals(new Instant(0), item0.getTimestamp());
-    assertThat(item0.getWindows(), Matchers.contains(window(0, 10)));
+    assertThat(item0.getWindows(), Matchers.<BoundedWindow>contains(window(0, 10)));
 
     WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
     assertEquals("k", item1.getValue().getKey());
     assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v3"));
     assertEquals(new Instant(13), item1.getTimestamp());
-    assertThat(item1.getWindows(), Matchers.contains(window(10, 20)));
+    assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(10, 20)));
   }
 
   @Test public void testSlidingWindows() throws Exception {
@@ -171,8 +168,7 @@ public void deleteTimer(String tag, Trigger.TimeDomain domain) {}
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
-                (IntervalWindow) window(-10, 10), Collections.<Integer>emptyList())),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(-10, 10), 0)),
             new Instant(9), "k")));
 
     runner.processElement(WindowedValue.of(
@@ -182,14 +178,12 @@ public void deleteTimer(String tag, Trigger.TimeDomain domain) {}
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
-                (IntervalWindow) window(0, 20), Collections.<Integer>emptyList())),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(0, 20), 0)),
             new Instant(19), "k")));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
-                (IntervalWindow) window(10, 30), Collections.<Integer>emptyList())),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(10, 30), 0)),
             new Instant(29), "k")));
 
     runner.finishBundle();
@@ -203,19 +197,19 @@ public void deleteTimer(String tag, Trigger.TimeDomain domain) {}
     assertEquals("k", item0.getValue().getKey());
     assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1"));
     assertEquals(new Instant(2), item0.getTimestamp());
-    assertThat(item0.getWindows(), Matchers.contains(window(-10, 10)));
+    assertThat(item0.getWindows(), Matchers.<BoundedWindow>contains(window(-10, 10)));
 
     WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
     assertEquals("k", item1.getValue().getKey());
     assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1", "v2"));
     assertEquals(new Instant(2), item1.getTimestamp());
-    assertThat(item1.getWindows(), Matchers.contains(window(0, 20)));
+    assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(0, 20)));
 
     WindowedValue<KV<String, Iterable<String>>> item2 = result.get(2);
     assertEquals("k", item2.getValue().getKey());
     assertThat(item2.getValue().getValue(), Matchers.containsInAnyOrder("v2"));
     assertEquals(new Instant(5), item2.getTimestamp());
-    assertThat(item2.getWindows(), Matchers.contains(window(10, 30)));
+    assertThat(item2.getWindows(), Matchers.<BoundedWindow>contains(window(10, 30)));
   }
 
   @Test public void testSessions() throws Exception {
@@ -252,20 +246,17 @@ public void deleteTimer(String tag, Trigger.TimeDomain domain) {}
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
-                (IntervalWindow) window(0, 10), Collections.<Integer>emptyList())),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(0, 10), 0)),
             new Instant(9), "k")));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
-                (IntervalWindow) window(0, 15), Collections.<Integer>emptyList())),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(0, 15), 0)),
             new Instant(14), "k")));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
-                (IntervalWindow) window(15, 25), Collections.<Integer>emptyList())),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(15, 25), 0)),
             new Instant(24), "k")));
 
     runner.finishBundle();
@@ -279,13 +270,13 @@ public void deleteTimer(String tag, Trigger.TimeDomain domain) {}
     assertEquals("k", item0.getValue().getKey());
     assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1", "v2"));
     assertEquals(new Instant(0), item0.getTimestamp());
-    assertThat(item0.getWindows(), Matchers.contains(window(0, 15)));
+    assertThat(item0.getWindows(), Matchers.<BoundedWindow>contains(window(0, 15)));
 
     WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
     assertEquals("k", item1.getValue().getKey());
     assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v3"));
     assertEquals(new Instant(15), item1.getTimestamp());
-    assertThat(item1.getWindows(), Matchers.contains(window(15, 25)));
+    assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(15, 25)));
   }
 
   /**
@@ -358,20 +349,17 @@ public Long extractOutput(Long accumulator) {
     // and fire them as appropriate. This would essentially be the batch timer context.
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, Long>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
-                (IntervalWindow) window(0, 10), Collections.<Integer>emptyList())),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(0, 10), 0)),
             new Instant(9), "k")));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, Long>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
-                (IntervalWindow) window(0, 15), Collections.<Integer>emptyList())),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(0, 15), 0)),
             new Instant(14), "k")));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, Long>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<IntervalWindow>(
-                (IntervalWindow) window(15, 25), Collections.<Integer>emptyList())),
+            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(15, 25), 0)),
             new Instant(24), "k")));
 
     runner.finishBundle();
@@ -385,13 +373,13 @@ public Long extractOutput(Long accumulator) {
     assertEquals("k", item0.getValue().getKey());
     assertEquals((Long) 7L, item0.getValue().getValue());
     assertEquals(new Instant(0), item0.getTimestamp());
-    assertThat(item0.getWindows(), Matchers.contains(window(0, 15)));
+    assertThat(item0.getWindows(), Matchers.<BoundedWindow>contains(window(0, 15)));
 
     WindowedValue<KV<String, Long>> item1 = result.get(1);
     assertEquals("k", item1.getValue().getKey());
     assertEquals((Long) 3L, item1.getValue().getValue());
     assertEquals(new Instant(15), item1.getTimestamp());
-    assertThat(item1.getWindows(), Matchers.contains(window(15, 25)));
+    assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(15, 25)));
   }
 
   private DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>, List>
@@ -428,7 +416,7 @@ private DoFnRunner<TimerOrElement<KV<String, Long>>, KV<String, Long>, List> mak
             windowingStrategy);
   }
 
-  private BoundedWindow window(long start, long end) {
+  private IntervalWindow window(long start, long end) {
     return new IntervalWindow(new Instant(start), new Instant(end));
   }
 }

From a8bfab15fba40be58ff6d11885ad53c18a32f176 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 5 May 2015 12:21:14 -0700
Subject: [PATCH 0511/1541] Add high-level class for examining classes exposed
 in a Java package. Add tests for a whitelist to avoid dependency leakage.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92848846
---
 .../cloud/dataflow/sdk/util/ApiSurface.java   | 572 ++++++++++++++++++
 .../dataflow/sdk/util/ApiSurfaceTest.java     | 193 ++++++
 2 files changed, 765 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
new file mode 100644
index 0000000000000..5e162e9a4e044
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
@@ -0,0 +1,572 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.common.base.Joiner;
+import com.google.common.base.Supplier;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Multimap;
+import com.google.common.collect.Multimaps;
+import com.google.common.collect.Sets;
+import com.google.common.reflect.ClassPath;
+import com.google.common.reflect.ClassPath.ClassInfo;
+import com.google.common.reflect.Invokable;
+import com.google.common.reflect.Parameter;
+import com.google.common.reflect.TypeToken;
+
+import java.io.IOException;
+import java.lang.annotation.Annotation;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
+import java.lang.reflect.GenericArrayType;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+import java.lang.reflect.ParameterizedType;
+import java.lang.reflect.Type;
+import java.lang.reflect.TypeVariable;
+import java.lang.reflect.WildcardType;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+/**
+ * Represents the API surface of a package prefix. Used for accessing public classes,
+ * methods, and the types they reference, to control what dependencies are re-exported.
+ *
+ * <p>For the purposes of calculating the public API surface, exposure includes any public
+ * or protected occurrence of:
+ *
+ * <ul>
+ * <li>superclasses
+ * <li>interfaces implemented
+ * <li>actual type arguments to generic types
+ * <li>array component types
+ * <li>method return types
+ * <li>method parameter types
+ * <li>type variable bounds
+ * <li>wildcard bounds
+ * </ul>
+ *
+ * <p>Exposure is a transitive property. The resulting map excludes primitives
+ * and array classes themselves.
+ *
+ * <p>It is prudent (though not required) to prune prefixes like "java" via the builder
+ * method {@link #pruningPrefix} to halt the traversal so it does not uselessly catalog references
+ * that are not interesting.
+ */
+@SuppressWarnings("rawtypes")
+public class ApiSurface {
+
+  /**
+   * Returns an empty {@link ApiSurface}.
+   */
+  public static ApiSurface empty() {
+    return new ApiSurface(Collections.<Class<?>>emptySet(), Collections.<Pattern>emptySet());
+  }
+
+  /**
+   * Returns an {@link ApiSurface} object representing the given package and all subpackages.
+   */
+  public static ApiSurface ofPackage(String packageName) throws IOException {
+    return ApiSurface.empty().includingPackage(packageName);
+  }
+
+  /**
+   * Returns an {@link ApiSurface} object representing just the surface of the given class.
+   */
+  public static ApiSurface ofClass(Class<?> clazz) {
+    return ApiSurface.empty().includingClass(clazz);
+  }
+
+  /**
+   * Returns an {@link ApiSurface} like this one, but also including the named
+   * package and all of its subpackages.
+   */
+  public ApiSurface includingPackage(String packageName) throws IOException {
+    ClassPath classPath = ClassPath.from(ClassLoader.getSystemClassLoader());
+
+    Set<Class<?>> newRootClasses = Sets.newHashSet();
+    newRootClasses.addAll(rootClasses);
+    for (ClassInfo classInfo : classPath.getTopLevelClassesRecursive(packageName)) {
+      Class clazz = classInfo.load();
+      if (exposed(clazz.getModifiers())) {
+        newRootClasses.add(clazz);
+      }
+    }
+
+    return new ApiSurface(newRootClasses, patternsToPrune);
+  }
+
+  /**
+   * Returns an {@link ApiSurface} like this one, but also including the given class.
+   */
+  public ApiSurface includingClass(Class<?> clazz) {
+    Set<Class<?>> newRootClasses = Sets.newHashSet();
+    newRootClasses.addAll(rootClasses);
+    newRootClasses.add(clazz);
+    return new ApiSurface(newRootClasses, patternsToPrune);
+  }
+
+  /**
+   * Returns an {@link ApiSurface} like this one, but pruning transitive
+   * references from classes whose full name (including package) begins with the provided prefix.
+   */
+  public ApiSurface pruningPrefix(String prefix) {
+    return pruningPattern(Pattern.compile(Pattern.quote(prefix) + ".*"));
+  }
+
+  /**
+   * Returns an {@link ApiSurface} like this one, but pruning references from the named
+   * class.
+   */
+  public ApiSurface pruningClassName(String className) {
+    return pruningPattern(Pattern.compile(Pattern.quote(className)));
+  }
+
+  /**
+   * Returns an {@link ApiSurface} like this one, but pruning references from the
+   * provided class.
+   */
+  public ApiSurface pruningClass(Class<?> clazz) {
+    return pruningClassName(clazz.getName());
+  }
+
+  /**
+   * Returns an {@link ApiSurface} like this one, but pruning transitive
+   * references from classes whose full name (including package) begins with the provided prefix.
+   */
+  public ApiSurface pruningPattern(Pattern pattern) {
+    Set<Pattern> newPatterns = Sets.newHashSet();
+    newPatterns.addAll(patternsToPrune);
+    newPatterns.add(pattern);
+    return new ApiSurface(rootClasses, newPatterns);
+  }
+
+  /**
+   * See {@link #pruningPattern(Pattern).
+   */
+  public ApiSurface pruningPattern(String patternString) {
+    return pruningPattern(Pattern.compile(patternString));
+  }
+
+  /**
+   * Returns all public classes originally belonging to the package
+   * in the {@link ApiSurface}.
+   */
+  public Set<Class<?>> getRootClasses() {
+    return rootClasses;
+  }
+
+  /**
+   * Returns exposed types in this set, including arrays and primitives as
+   * specified.
+   */
+  public Set<Class<?>> getExposedClasses() {
+    return getExposedToExposers().keySet();
+  }
+
+  /**
+   * Returns a path from an exposed class to a root class. There may be many, but this
+   * gives only one.
+   *
+   * <p>If there are only cycles, with no path back to a root class, throws
+   * IllegalStateException.
+   */
+  public List<Class<?>> getAnyExposurePath(Class<?> exposedClass) {
+    Set<Class<?>> excluded = Sets.newHashSet();
+    excluded.add(exposedClass);
+    List<Class<?>> path = getAnyExposurePath(exposedClass, excluded);
+    if (path == null) {
+      throw new IllegalArgumentException(
+          "Class " + exposedClass + " has no path back to any root class."
+          + " It should never have been considered exposed.");
+    } else {
+      return path;
+    }
+  }
+
+  /**
+   * Returns a path from an exposed class to a root class. There may be many, but this
+   * gives only one. It will not return a path that crosses the excluded classes.
+   *
+   * <p>If there are only cycles or paths through the excluded classes, returns null.
+   *
+   * <p>If the class is not actually in the exposure map, throws IllegalArgumentException
+   */
+  private List<Class<?>> getAnyExposurePath(Class<?> exposedClass, Set<Class<?>> excluded) {
+    List<Class<?>> exposurePath = Lists.newArrayList();
+    exposurePath.add(exposedClass);
+
+    Collection<Class<?>> exposers = getExposedToExposers().get(exposedClass);
+    if (exposers.isEmpty()) {
+      throw new IllegalArgumentException("Class " + exposedClass + " is not exposed.");
+    }
+
+    for (Class<?> exposer : exposers) {
+      if (excluded.contains(exposer)) {
+        continue;
+      }
+
+      // A null exposer means this is already a root class.
+      if (exposer == null) {
+        return exposurePath;
+      }
+
+      List<Class<?>> restOfPath = getAnyExposurePath(
+          exposer,
+          Sets.union(excluded, Sets.newHashSet(exposer)));
+
+      if (restOfPath != null) {
+        exposurePath.addAll(restOfPath);
+        return exposurePath;
+      }
+    }
+    return null;
+  }
+
+  ////////////////////////////////////////////////////////////////////
+
+  // Fields initialized upon construction
+  private final Set<Class<?>> rootClasses;
+  private final Set<Pattern> patternsToPrune;
+
+  // Fields computed on-demand
+  private Multimap<Class<?>, Class<?>> exposedToExposers = null;
+  private Pattern prunedPattern = null;
+  private Set<Type> visited = null;
+
+  private ApiSurface(Set<Class<?>> rootClasses, Set<Pattern> patternsToPrune) {
+    this.rootClasses = rootClasses;
+    this.patternsToPrune = patternsToPrune;
+  }
+
+  /**
+   * A map from exposed types to place where they are exposed, in the sense of being a part
+   * of a public-facing API surface.
+   *
+   * <p>This map is the adjencency list representation of a directed graph, where an edge from type
+   * {@code T1} to type {@code T2} indicates that {@code T2} directly exposes {@code T1} in its API
+   * surface.
+   *
+   * <p>The traversal methods in this class are designed to avoid repeatedly processing types, since
+   * there will almost always be cyclic references.
+   */
+  private Multimap<Class<?>, Class<?>> getExposedToExposers() {
+    if (exposedToExposers == null) {
+      constructExposedToExposers();
+    }
+    return exposedToExposers;
+  }
+
+  /**
+   * See {@link #getExposedToExposers}.
+   */
+  private void constructExposedToExposers() {
+    visited = Sets.newHashSet();
+    exposedToExposers = Multimaps.newSetMultimap(
+        Maps.<Class<?>, Collection<Class<?>>>newHashMap(),
+        new Supplier<Set<Class<?>>>() {
+          @Override
+          public Set<Class<?>> get() {
+            return Sets.newHashSet();
+          }
+        });
+
+    for (Class<?> clazz : rootClasses) {
+      addExposedTypes(clazz, null);
+    }
+  }
+
+  /**
+   * A combined {@code Pattern} that implements all the pruning specified.
+   */
+  private Pattern getPrunedPattern() {
+    if (prunedPattern == null) {
+      constructPrunedPattern();
+    }
+    return prunedPattern;
+  }
+
+  /**
+   * See {@link #getPrunedPattern}.
+   */
+  private void constructPrunedPattern() {
+    Set<String> prunedPatternStrings = Sets.newHashSet();
+    for (Pattern patternToPrune : patternsToPrune) {
+      prunedPatternStrings.add(patternToPrune.pattern());
+    }
+    prunedPattern = Pattern.compile("(" + Joiner.on(")|(").join(prunedPatternStrings) + ")");
+  }
+
+  /**
+   * Whether a type and all that it references should be pruned from the graph.
+   */
+  private boolean pruned(Type type) {
+    return pruned(TypeToken.of(type).getRawType());
+  }
+
+  /**
+   * Whether a class and all that it references should be pruned from the graph.
+   */
+  private boolean pruned(Class<?> clazz) {
+    return clazz.isPrimitive()
+        || clazz.isArray()
+        || getPrunedPattern().matcher(clazz.getName()).matches();
+  }
+
+  /**
+   * Whether a type has already beens sufficiently processed.
+   */
+  private boolean done(Type type) {
+    return visited.contains(type);
+  }
+
+  private void recordExposure(Class<?> exposed, Class<?> cause) {
+    exposedToExposers.put(exposed, cause);
+  }
+
+  private void recordExposure(Type exposed, Class<?> cause) {
+    exposedToExposers.put(TypeToken.of(exposed).getRawType(), cause);
+  }
+
+  private void visit(Type type) {
+    visited.add(type);
+  }
+
+  /**
+   * See {@link #addExposedTypes(Type, Class)}.
+   */
+  private void addExposedTypes(TypeToken type, Class<?> cause) {
+    addExposedTypes(type.getType(), cause);
+  }
+
+  /**
+   * Adds any references learned by following a link from {@code cause} to {@code type}.
+   * This will dispatch according to the concrete {@code Type} implementation. See the
+   * other overloads of {@code addExposedTypes} for their details.
+   */
+  private void addExposedTypes(Type type, Class<?> cause) {
+    if (type instanceof TypeVariable) {
+      addExposedTypes((TypeVariable) type, cause);
+    } else if (type instanceof WildcardType) {
+      addExposedTypes((WildcardType) type, cause);
+    } else if (type instanceof GenericArrayType) {
+      addExposedTypes((GenericArrayType) type, cause);
+    } else if (type instanceof ParameterizedType) {
+      addExposedTypes((ParameterizedType) type, cause);
+    } else if (type instanceof Class) {
+      addExposedTypes((Class) type, cause);
+    } else {
+      throw new IllegalArgumentException("Unknown implementation of Type");
+    }
+  }
+
+  /**
+   * Adds any types exposed to this set. These will
+   * come from the (possibly absent) bounds on the
+   * type variable.
+   */
+  private void addExposedTypes(TypeVariable type, Class<?> cause) {
+    if (done(type)) {
+      return;
+    }
+    visit(type);
+    for (Type bound : type.getBounds()) {
+      addExposedTypes(bound, cause);
+    }
+  }
+
+  /**
+   * Adds any types exposed to this set. These will come from the (possibly absent) bounds on the
+   * wildcard.
+   */
+  private void addExposedTypes(WildcardType type, Class<?> cause) {
+    visit(type);
+    for (Type lowerBound : type.getLowerBounds()) {
+      addExposedTypes(lowerBound, cause);
+    }
+    for (Type upperBound : type.getUpperBounds()) {
+      addExposedTypes(upperBound, cause);
+    }
+  }
+
+  /**
+   * Adds any types exposed from the given array type. The array type itself is not added. The
+   * cause of the exposure of the underlying type is considered whatever type exposed the array
+   * type.
+   */
+  private void addExposedTypes(GenericArrayType type, Class<?> cause) {
+    if (done(type)) {
+      return;
+    }
+    visit(type);
+    addExposedTypes(type.getGenericComponentType(), cause);
+  }
+
+  /**
+   * Adds any types exposed to this set. Even if the
+   * root type is to be pruned, the actual type arguments
+   * are processed.
+   */
+  private void addExposedTypes(ParameterizedType type, Class<?> cause) {
+    // Even if the type is already done, this link to it may be new
+    boolean alreadyDone = done(type);
+    if (!pruned(type)) {
+      visit(type);
+      recordExposure(type, cause);
+    }
+    if (alreadyDone) {
+      return;
+    }
+
+    // For a parameterized type, pruning does not take place
+    // here, only for the raw class.
+    // The type parameters themselves may not be pruned,
+    // for example with List<MyApiType> probably the
+    // standard List is pruned, but MyApiType is not.
+    addExposedTypes(type.getRawType(), cause);
+    for (Type typeArg : type.getActualTypeArguments()) {
+      addExposedTypes(typeArg, cause);
+    }
+  }
+
+  /**
+   * Adds a class and all of the types it exposes. The cause
+   * of the class being exposed is given, and the cause
+   * of everything within the class is that class itself.
+   */
+  private void addExposedTypes(Class<?> clazz, Class<?> cause) {
+    if (pruned(clazz)) {
+      return;
+    }
+    // Even if `clazz` has been visited, the link from `cause` may be new
+    boolean alreadyDone = done(clazz);
+    visit(clazz);
+    recordExposure(clazz, cause);
+    if (alreadyDone || pruned(clazz)) {
+      return;
+    }
+
+    TypeToken<?> token = TypeToken.of(clazz);
+    for (TypeToken<?> superType : token.getTypes()) {
+      if (!superType.equals(token)) {
+        addExposedTypes(superType, clazz);
+      }
+    }
+    for (Class innerClass : clazz.getDeclaredClasses()) {
+      if (exposed(innerClass.getModifiers())) {
+        addExposedTypes(innerClass, clazz);
+      }
+    }
+    for (Field field : clazz.getDeclaredFields()) {
+      if (exposed(field.getModifiers())) {
+        addExposedTypes(field, clazz);
+      }
+    }
+    for (Invokable invokable : getExposedInvokables(token)) {
+      addExposedTypes(invokable, clazz);
+    }
+  }
+
+  private void addExposedTypes(Invokable<?, ?> invokable, Class<?> cause) {
+    addExposedTypes(invokable.getReturnType(), cause);
+    for (Annotation annotation : invokable.getAnnotations()) {
+     addExposedTypes(annotation.annotationType(), cause);
+    }
+    for (Parameter parameter : invokable.getParameters()) {
+      addExposedTypes(parameter, cause);
+    }
+    for (TypeToken<?> exceptionType : invokable.getExceptionTypes()) {
+      addExposedTypes(exceptionType, cause);
+    }
+  }
+
+  private void addExposedTypes(Parameter parameter, Class<?> cause) {
+    addExposedTypes(parameter.getType(), cause);
+    for (Annotation annotation : parameter.getAnnotations()) {
+      addExposedTypes(annotation.annotationType(), cause);
+    }
+  }
+
+  private void addExposedTypes(Field field, Class<?> cause) {
+    addExposedTypes(field.getGenericType(), cause);
+    for (Annotation annotation : field.getDeclaredAnnotations()) {
+      addExposedTypes(annotation.annotationType(), cause);
+    }
+  }
+
+  /**
+   * Returns an {@link Invokable} for each public methods or constructors of a type.
+   */
+  private Set<Invokable> getExposedInvokables(TypeToken type) {
+    Set<Invokable> invokables = Sets.newHashSet();
+
+    for (Constructor constructor : type.getRawType().getConstructors()) {
+      if (0 != (constructor.getModifiers() & (Modifier.PUBLIC | Modifier.PROTECTED))) {
+        invokables.add(type.constructor(constructor));
+      }
+    }
+
+    for (Method method : type.getRawType().getMethods()) {
+      if (0 != (method.getModifiers() & (Modifier.PUBLIC | Modifier.PROTECTED))) {
+        invokables.add(type.method(method));
+      }
+    }
+
+    return invokables;
+  }
+
+  /**
+   * Returns true of the given modifier bitmap indicates exposure (public or protected access).
+   */
+  private boolean exposed(int modifiers) {
+    return 0 != (modifiers & (Modifier.PUBLIC | Modifier.PROTECTED));
+  }
+
+
+  ////////////////////////////////////////////////////////////////////////////
+
+  public static ApiSurface getSdkApiSurface() throws IOException {
+    return ApiSurface.ofPackage("com.google.cloud.dataflow")
+        .pruningPattern("com[.]google[.]cloud[.]dataflow.*Test")
+        .pruningPrefix("java")
+        .pruningPrefix("com.google.api")
+        .pruningPrefix("com.google.protobuf")
+        .pruningPrefix("org.codehaus.jackson")
+        .pruningPrefix("org.joda.time")
+        .pruningPrefix("org.apache.avro")
+        .pruningPrefix("org.junit")
+        .pruningPrefix("org.hamcrest")
+        .pruningPrefix("com.fasterxml.jackson");
+  }
+
+  public static void main(String[] args) throws Exception {
+    List<String> names = Lists.newArrayList();
+    for (Class clazz : getSdkApiSurface().getExposedClasses()) {
+      names.add(clazz.getName());
+    }
+    List<String> sortedNames = Lists.newArrayList(names);
+    Collections.sort(sortedNames);
+
+    for (String name : sortedNames) {
+      System.out.println(name);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java
new file mode 100644
index 0000000000000..1dc86398855e6
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.emptyIterable;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Tests for ApiSurface. These both test the functionality and also that our
+ * public API is conformant to a hard-coded policy.
+ */
+@RunWith(JUnit4.class)
+public class ApiSurfaceTest {
+
+  @Test
+  public void testOurApiSurface() throws Exception {
+    ApiSurface checkedApiSurface = ApiSurface.getSdkApiSurface()
+      .pruningClassName("com.google.cloud.dataflow.sdk.util.StateFetcher")
+      .pruningClassName("com.google.cloud.dataflow.sdk.util.common.ReflectHelpers");
+
+    checkedApiSurface.getExposedClasses();
+
+    Map<Class<?>, List<Class<?>>> disallowedClasses = Maps.newHashMap();
+    for (Class<?> clazz : checkedApiSurface.getExposedClasses()) {
+      if (!classIsAllowed(clazz)) {
+        disallowedClasses.put(clazz, checkedApiSurface.getAnyExposurePath(clazz));
+      }
+    }
+
+    List<String> disallowedMessages = Lists.newArrayList();
+    for (Map.Entry<Class<?>, List<Class<?>>> entry : disallowedClasses.entrySet()) {
+      disallowedMessages.add(entry.getKey() + " exposed via:\n\t\t"
+      + Joiner.on("\n\t\t").join(entry.getValue()));
+    }
+    Collections.sort(disallowedMessages);
+
+    if (!disallowedMessages.isEmpty()) {
+      fail("The following disallowed classes appear in the public API surface of the SDK:\n\t"
+        + Joiner.on("\n\t").join(disallowedMessages));
+    }
+  }
+
+  private boolean classIsAllowed(Class clazz) {
+    return  clazz.getName().startsWith("com.google.cloud.dataflow");
+  }
+
+  //////////////////////////////////////////////////////////////////////////////////
+
+  @SuppressWarnings({"rawtypes", "unchecked"})
+  private void assertExposed(Class classToExamine, Class... exposedClasses) {
+    ApiSurface apiSurface = ApiSurface
+        .ofClass(classToExamine)
+        .pruningPrefix("java");
+
+    Set<Class> expectedExposed = Sets.newHashSet(classToExamine);
+    for (Class clazz : exposedClasses) {
+      expectedExposed.add(clazz);
+    }
+    assertThat(apiSurface.getExposedClasses(), containsInAnyOrder(expectedExposed.toArray()));
+  }
+
+  private static interface Exposed { }
+
+  private static interface ExposedReturnType {
+    Exposed zero();
+  }
+
+  @Test
+  public void testExposedReturnType() throws Exception {
+    assertExposed(ExposedReturnType.class, Exposed.class);
+  }
+
+  private static interface ExposedReturnTypeVarBound {
+    <T extends Exposed> T getList();
+  }
+
+  @Test
+  public void testExposedReturnTypeVarBound() throws Exception {
+    assertExposed(ExposedReturnTypeVarBound.class, Exposed.class);
+  }
+
+  private static interface ExposedParameterTypeVarBound {
+    <T extends Exposed> void getList(T whatever);
+  }
+
+  @Test
+  public void testExposedParameterTypeVarBound() throws Exception {
+    assertExposed(ExposedParameterTypeVarBound.class, Exposed.class);
+  }
+
+  private static interface ExposedWildcardBound {
+    void acceptList(List<? extends Exposed> arg);
+  }
+
+  @Test
+  public void testExposedWildcardBound() throws Exception {
+    assertExposed(ExposedWildcardBound.class, Exposed.class);
+  }
+
+  private static interface ExposedActualTypeArgument extends List<Exposed> { }
+
+  @Test
+  public void testExposedActualTypeArgument() throws Exception {
+    assertExposed(ExposedActualTypeArgument.class, Exposed.class);
+  }
+
+  @Test
+  public void testIgnoreAll() throws Exception {
+    ApiSurface apiSurface = ApiSurface.ofClass(ExposedWildcardBound.class)
+        .includingClass(Object.class)
+        .includingClass(ApiSurface.class)
+        .pruningPattern(".*");
+    assertThat(apiSurface.getExposedClasses(), emptyIterable());
+  }
+
+  private static interface PrunedPattern { }
+  private static interface NotPruned extends PrunedPattern { }
+
+  @Test
+  public void testprunedPattern() throws Exception {
+    ApiSurface apiSurface = ApiSurface.ofClass(NotPruned.class)
+        .pruningClass(PrunedPattern.class);
+    assertThat(apiSurface.getExposedClasses(), containsInAnyOrder((Class) NotPruned.class));
+  }
+
+  private static interface ExposedTwice {
+    Exposed zero();
+    Exposed one();
+  }
+
+  @Test
+  public void testExposedTwice() throws Exception {
+    assertExposed(ExposedTwice.class, Exposed.class);
+  }
+
+  private static interface ExposedCycle {
+    ExposedCycle zero(Exposed foo);
+  }
+
+  @Test
+  public void testExposedCycle() throws Exception {
+    assertExposed(ExposedCycle.class, Exposed.class);
+  }
+
+  private static interface ExposedGenericCycle {
+    Exposed zero(List<ExposedGenericCycle> foo);
+  }
+
+  @Test
+  public void testExposedGenericCycle() throws Exception {
+    assertExposed(ExposedGenericCycle.class, Exposed.class);
+  }
+
+  private static interface ExposedArrayCycle {
+    Exposed zero(ExposedArrayCycle[] foo);
+  }
+
+  @Test
+  public void testExposedArrayCycle() throws Exception {
+    assertExposed(ExposedArrayCycle.class, Exposed.class);
+  }
+}
+

From fcb5c129d06b8595098e2a65ce60a13bc9e8eec3 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 5 May 2015 16:38:35 -0700
Subject: [PATCH 0512/1541] Reduce the frequency of calls to merge.

1. Don't force merging in onElement unless we're about to fire.
2. (batch) GroupAlsoByWindows calls merge after each key
3. StreamingGorupALsoByWindows calls merge in finishBundle

Also, make sure that batch finishes all processing time timers before moving to
the next key.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92875007
---
 .../sdk/transforms/windowing/AfterPane.java   |  6 +-
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  7 +-
 .../util/StreamingGroupAlsoByWindowsDoFn.java | 66 ++++++++++---------
 .../dataflow/sdk/util/TriggerExecutor.java    | 21 ++++--
 .../dataflow/sdk/util/TriggerTester.java      |  4 ++
 .../transforms/windowing/AfterAllTest.java    |  4 +-
 .../transforms/windowing/AfterEachTest.java   | 11 ++--
 .../transforms/windowing/AfterFirstTest.java  |  5 +-
 .../transforms/windowing/AfterPaneTest.java   |  4 +-
 .../transforms/windowing/RepeatedlyTest.java  |  5 +-
 .../sdk/transforms/windowing/TriggerTest.java |  9 ++-
 .../sdk/util/ExecutableTriggerTest.java       |  1 -
 12 files changed, 76 insertions(+), 67 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index ee9b856540a7b..fb3c4a0f206ad 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -61,12 +61,8 @@ public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws
     }
     count++;
 
-    if (count >= countElems) {
-      return TriggerResult.FIRE_AND_FINISH;
-    }
-
     c.store(ELEMENTS_IN_PANE_TAG, e.window(), count);
-    return TriggerResult.CONTINUE;
+    return count >= countElems ? TriggerResult.FIRE_AND_FINISH : TriggerResult.CONTINUE;
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 8e16499c31a31..01bd607485481 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -119,11 +119,14 @@ public void processElement(
         timerManager.advanceProcessingTime(triggerExecutor, Instant.now());
       }
 
+      // Merge the active windows for the current key, to fire any data-based triggers.
+      triggerExecutor.merge();
+
       // Finish any pending windows by advancing the watermark to infinity.
       timerManager.advanceWatermark(triggerExecutor, new Instant(Long.MAX_VALUE));
 
-      // Finally, advance the processing time
-      timerManager.advanceProcessingTime(triggerExecutor, Instant.now());
+      // Finally, advance the processing time to infinity to fire any timers.
+      timerManager.advanceProcessingTime(triggerExecutor, new Instant(Long.MAX_VALUE));
 
       windowSet.persist();
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 5a62ab7f84a40..e1fb30b78b425 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -77,6 +77,9 @@ private static class StreamingGABWViaWindowSetDoFn<K, InputT, OutputT, W extends
     private Factory<K, InputT, OutputT, W> windowSetFactory;
     private ExecutableTrigger<W> trigger;
 
+    private AbstractWindowSet<K, InputT, OutputT, W> windowSet;
+    private TriggerExecutor<K, InputT, OutputT, W> executor;
+
     public StreamingGABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
         AbstractWindowSet.Factory<K, InputT, OutputT, W> windowSetFactory) {
       this.windowSetFactory = windowSetFactory;
@@ -86,42 +89,41 @@ public StreamingGABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
       this.trigger = noWildcard.getTrigger();
     }
 
+    private void initForKey(ProcessContext c, K key) throws Exception{
+      if (windowSet == null) {
+        windowSet = windowSetFactory.create(
+            key, windowFn.windowCoder(), c.keyedState(), c.windowingInternals());
+        executor = new TriggerExecutor<>(
+            windowFn, new StreamingTimerManager(c), trigger, c.keyedState(),
+            c.windowingInternals(), windowSet);
+      }
+    }
+
     @Override
-    public void processElement(ProcessContext context) throws Exception {
-      if (!context.element().isTimer()) {
-        KV<K, InputT> element = context.element().element();
-        K key = element.getKey();
-        InputT value = element.getValue();
-        AbstractWindowSet<K, InputT, OutputT, W> windowSet = windowSetFactory.create(
-                key, windowFn.windowCoder(), context.keyedState(), context.windowingInternals());
-        TriggerExecutor<K, InputT, OutputT, W> executor = new TriggerExecutor<>(
-            windowFn,
-            new StreamingTimerManager(context),
-            trigger,
-            context.keyedState(),
-            context.windowingInternals(),
-            windowSet);
-
-        executor.onElement(WindowedValue.of(
-            value, context.timestamp(), context.windowingInternals().windows()));
-        windowSet.persist();
+    public void processElement(ProcessContext c) throws Exception {
+      @SuppressWarnings("unchecked")
+      K key = c.element().isTimer() ? (K) c.element().key() : c.element().element().getKey();
+      initForKey(c, key);
+
+      if (c.element().isTimer()) {
+        executor.onTimer(c.element().tag());
       } else {
-        TimerOrElement<KV<K, InputT>> timer = context.element();
-        @SuppressWarnings("unchecked")
-        K key = (K) timer.key();
-        AbstractWindowSet<K, InputT, OutputT, W> windowSet = windowSetFactory.create(
-            key, windowFn.windowCoder(), context.keyedState(), context.windowingInternals());
-        TriggerExecutor<K, InputT, OutputT, W> executor = new TriggerExecutor<>(
-            windowFn,
-            new StreamingTimerManager(context),
-            trigger,
-            context.keyedState(),
-            context.windowingInternals(),
-            windowSet);
-
-        executor.onTimer(timer.tag());
+        InputT value = c.element().element().getValue();
+        executor.onElement(
+            WindowedValue.of(value, c.timestamp(), c.windowingInternals().windows()));
+      }
+    }
+
+    @Override
+    public void finishBundle(Context c) throws Exception {
+      if (executor != null) {
+        executor.merge();
         windowSet.persist();
       }
+
+      // Prepare this DoFn for reuse.
+      executor = null;
+      windowSet = null;
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 92e1f8222d10f..640c6de01e496 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -192,12 +192,17 @@ public void onElement(WindowedValue<InputT> value) throws Exception {
       BitSet originalFinishedSet = (BitSet) finishedSet.clone();
       OnElementEvent<W> e =
           new OnElementEvent<W>(value.getValue(), value.getTimestamp(), window, status);
-      handleResult(trigger, window, originalFinishedSet, finishedSet,
-          trigger.invokeElement(context(finishedSet), e));
 
-      if (WindowStatus.NEW.equals(status)) {
-        // Attempt to merge windows before continuing.
-        windowFn.mergeWindows(mergeContext);
+      TriggerResult result = trigger.invokeElement(context(finishedSet), e);
+
+      // Make sure we merge before firing, in case a larger window is produced
+      if (result.isFire()) {
+        merge();
+      }
+
+      // Only invoke handleResult if the window is still active after merging.
+      if ((windowFn instanceof PartitioningWindowFn) || windowSet.contains(window)) {
+        handleResult(trigger, window, originalFinishedSet, finishedSet, result);
       }
     }
   }
@@ -219,7 +224,7 @@ public void onTimer(String timerTag) throws Exception {
 
     // Attempt to merge windows before continuing; that may remove the current window from
     // consideration.
-    windowFn.mergeWindows(mergeContext);
+    merge();
 
     // The WindowSet used with PartitioningWindowFn doesn't support contains, but it will never
     // merge windows in a way that causes the timer to no longer be applicable. Otherwise, we
@@ -272,6 +277,10 @@ private void onMerge(Collection<W> toBeMerged, W resultWindow) throws Exception
     }
   }
 
+  public void merge() throws Exception {
+    windowFn.mergeWindows(mergeContext);
+  }
+
   private void handleResult(
       ExecutableTrigger<W> trigger, W window,
       BitSet originalFinishedSet, BitSet finishedSet, TriggerResult result) throws Exception {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 3ee7156b71864..4c46409646c03 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -202,6 +202,10 @@ public void injectElement(InputT value, Instant timestamp) throws Exception {
     triggerExecutor.onElement(WindowedValue.of(value, timestamp, windows));
   }
 
+  public void doMerge() throws Exception {
+    triggerExecutor.merge();
+  }
+
   public void setTimer(
       W window, Instant timestamp, TimeDomain domain, ExecutableTrigger<W> trigger)
           throws CoderException {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index 90601e2823794..ab8db59af514f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -159,6 +159,7 @@ public void testOnMergeFires() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
     injectElement(12, TriggerResult.FIRE_AND_FINISH, TriggerResult.CONTINUE);
+    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     when(mockTrigger1.onMerge(
         isTriggerContext(), Mockito.<OnMergeEvent<IntervalWindow>>any()))
@@ -168,8 +169,7 @@ public void testOnMergeFires() throws Exception {
         isTriggerContext(), Mockito.<OnMergeEvent<IntervalWindow>>any()))
         .thenReturn(MergeResult.FIRE_AND_FINISH);
 
-    // The arrival of this element should trigger merging.
-    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    tester.doMerge();
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index 9724d5ec3de5a..3b378068befdb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -165,6 +165,7 @@ public void testOnMergeFinishes() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
     injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     when(mockTrigger1.onMerge(
         isTriggerContext(),
@@ -174,9 +175,7 @@ public void testOnMergeFinishes() throws Exception {
     when(mockTrigger2.onMerge(
         isTriggerContext(),
         Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.FIRE_AND_FINISH);
-
-    // The arrival of this element should trigger merging.
-    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    tester.doMerge();
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
@@ -191,18 +190,16 @@ public void testOnMergeFires() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
     injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     when(mockTrigger1.onMerge(
         isTriggerContext(),
         Mockito.<OnMergeEvent<IntervalWindow>>any()))
         .thenReturn(MergeResult.ALREADY_FINISHED);
-
     when(mockTrigger2.onMerge(
         isTriggerContext(),
         Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.FIRE);
-
-    // The arrival of this element should trigger merging.
-    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    tester.doMerge();
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index aeb83e8e92e2d..dce365999e4a9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -155,6 +155,7 @@ public void testOnMergeFires() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
     injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     when(mockTrigger1.onMerge(
         isTriggerContext(),
@@ -163,9 +164,7 @@ public void testOnMergeFires() throws Exception {
     when(mockTrigger2.onMerge(
         isTriggerContext(),
         Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.FIRE_AND_FINISH);
-
-    // The arrival of this element should trigger merging.
-    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    tester.doMerge();
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
index 36a15faff09ca..887759aaa03fb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
@@ -95,12 +95,14 @@ public void testAfterPaneWithMerging() throws Exception {
 
     tester.injectElement(1, new Instant(1)); // in [1, 11)
     tester.injectElement(2, new Instant(2)); // in [2, 12)
+    tester.doMerge();
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 1, 12)));
 
-    // Because we discarded the previous window, we don't have it around to merge with.
+    // Because we closed the previous window, we don't have it around to merge with.
     tester.injectElement(3, new Instant(7)); // in [7, 17)
     tester.injectElement(4, new Instant(8)); // in [8, 18)
+    tester.doMerge();
 
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 7, 7, 18)));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index 97a2e7db606a2..e5f6b18b4da00 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -141,13 +141,12 @@ public void testMerge() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE);
     injectElement(12, TriggerResult.CONTINUE);
+    injectElement(5, TriggerResult.CONTINUE);
 
     when(mockRepeated.onMerge(
         isTriggerContext(),
         Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.FIRE_AND_FINISH);
-
-    // The arrival of this element should trigger merging.
-    injectElement(5, TriggerResult.CONTINUE);
+    tester.doMerge();
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
index e03ee2be69e01..0ceb70dd024e9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
@@ -207,6 +207,7 @@ public void testMergeActualFires() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
     injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     when(mockActual.onMerge(
         isTriggerContext(),
@@ -215,9 +216,7 @@ public void testMergeActualFires() throws Exception {
     when(mockUntil.onMerge(
         isTriggerContext(),
         Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.CONTINUE);
-
-    // The arrival of this element should trigger merging.
-    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    tester.doMerge();
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
@@ -231,6 +230,7 @@ public void testMergeUntilFires() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
     injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     when(mockActual.onMerge(
         isTriggerContext(),
@@ -240,8 +240,7 @@ public void testMergeUntilFires() throws Exception {
         isTriggerContext(),
         Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.FIRE_AND_FINISH);
 
-    // The arrival of this element should trigger merging.
-    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    tester.doMerge();
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
index 47a3d867794e8..0c943f5948a4a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
@@ -85,7 +85,6 @@ public void testIndexAssignmentTwoLevel() throws Exception {
     assertSame(t2, executable.getSubTriggerContaining(7).getSpec());
   }
 
-
   private static class StubTrigger extends Trigger<IntervalWindow> {
 
     @SafeVarargs

From 4731a07fed96e889649dc0eb1c922bb5ae3e36ef Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Wed, 6 May 2015 08:57:12 -0700
Subject: [PATCH 0513/1541] Requires that a value returned by
 Reader.getCurrent() must be effectively immutable and remain valid
 indefinitely. ----Release Notes---- The value returned by
 Source.Reader.getCurrent() must now be effectively immutable and remain valid
 indefinitely. [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=92929924

---
 .../main/java/com/google/cloud/dataflow/sdk/io/Source.java   | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
index e2f42b7d1e53c..07f9cd7841e0b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
@@ -122,14 +122,15 @@ public interface Reader<T> extends AutoCloseable {
 
     /**
      * Advances the reader to the next valid record.
-     * Invalidates the result of the previous {@link #getCurrent} call.
+     *
      * @return {@code true} if a record was read, {@code false} if we're at the end of input.
      */
     public boolean advance() throws IOException;
 
     /**
      * Returns the value of the data item that was read by the last {@link #start} or
-     * {@link #advance} call.
+     * {@link #advance} call. The returned value must be effectively immutable and remain valid
+     * indefinitely.
      *
      * @throws java.util.NoSuchElementException if the reader is at the beginning of the input and
      *         {@link #start} or {@link #advance} wasn't called, or if the last {@link #start} or

From 0c854086e646c6679aecab71e2b799b7fcb9b34a Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 6 May 2015 10:27:47 -0700
Subject: [PATCH 0514/1541] Remove SuppressWarnings("serial") in TypeDescriptor

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92938920
---
 .../cloud/dataflow/sdk/values/TypeDescriptor.java      | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
index 1cdf53ff3eb64..1d1aa3ecae4b1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
@@ -41,7 +41,6 @@
  *
  * @param <T> the type represented by this {@link TypeDescriptor}
  */
-@SuppressWarnings("serial")
 public abstract class TypeDescriptor<T> {
 
   // This class is just a wrapper for TypeToken
@@ -62,7 +61,9 @@ private TypeDescriptor(TypeToken<T> token) {
    * {@code new TypeDescriptor<List<String>>(){}}.
    */
   protected TypeDescriptor() {
-    token = new TypeToken<T>(getClass()) {};
+    token = new TypeToken<T>(getClass()) {
+      private static final long serialVersionUID = 0L;
+    };
   }
 
   /**
@@ -72,7 +73,9 @@ protected TypeDescriptor() {
    */
   @SuppressWarnings("unchecked")
   protected TypeDescriptor(Class<?> clazz) {
-    TypeToken<T> unresolvedToken = new TypeToken<T>(getClass()){};
+    TypeToken<T> unresolvedToken = new TypeToken<T>(getClass()) {
+      private static final long serialVersionUID = 0L;
+    };
     token = (TypeToken<T>) TypeToken.of(clazz).resolveType(unresolvedToken.getType());
   }
 
@@ -212,6 +215,7 @@ public Iterable<TypeDescriptor> getClasses() {
     return classes;
   }
 
+  @Override
   public String toString() {
     return token.toString();
   }

From 270e6914c0740e6392a474dbc85761730bcf4fbf Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Sat, 7 Mar 2015 14:31:33 -0800
Subject: [PATCH 0515/1541] Use StandardCharsets in StingUtf8Coder. ----Release
 Notes---- [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=92964296

---
 .../dataflow/sdk/coders/StringUtf8Coder.java     | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
index 589af6e1c01ea..9650bd07706cd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
@@ -30,7 +30,7 @@
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.UTFDataFormatException;
-import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
 
 /**
  * A {@code StringUtf8Coder} encodes Java Strings in UTF-8 encoding.
@@ -49,14 +49,10 @@ public static StringUtf8Coder of() {
 
   private static final StringUtf8Coder INSTANCE = new StringUtf8Coder();
 
-  private static class Singletons {
-    private static final Charset UTF8 = Charset.forName("UTF-8");
-  }
-
   // Writes a string with VarInt size prefix, supporting large strings.
   private static void writeString(String value, DataOutputStream dos)
       throws IOException {
-    byte[] bytes = value.getBytes(Singletons.UTF8);
+    byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
     VarInt.encode(bytes.length, dos);
     dos.write(bytes);
   }
@@ -69,7 +65,7 @@ private static String readString(DataInputStream dis) throws IOException {
     }
     byte[] bytes = new byte[len];
     dis.readFully(bytes);
-    return new String(bytes, Singletons.UTF8);
+    return new String(bytes, StandardCharsets.UTF_8);
   }
 
   private StringUtf8Coder() {}
@@ -81,7 +77,7 @@ public void encode(String value, OutputStream outStream, Context context)
       throw new CoderException("cannot encode a null String");
     }
     if (context.isWholeStream) {
-      byte[] bytes = value.getBytes(Singletons.UTF8);
+      byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
       if (outStream instanceof ExposedByteArrayOutputStream) {
         ((ExposedByteArrayOutputStream) outStream).writeAndOwn(bytes);
       } else {
@@ -97,7 +93,7 @@ public String decode(InputStream inStream, Context context)
       throws IOException {
     if (context.isWholeStream) {
       byte[] bytes = StreamUtils.getBytes(inStream);
-      return new String(bytes, Singletons.UTF8);
+      return new String(bytes, StandardCharsets.UTF_8);
     } else {
       try {
         return readString(new DataInputStream(inStream));
@@ -124,7 +120,7 @@ protected long getEncodedElementByteSize(String value, Context context)
       throw new CoderException("cannot encode a null String");
     }
     if (context.isWholeStream) {
-      return value.getBytes(Singletons.UTF8).length;
+      return value.getBytes(StandardCharsets.UTF_8).length;
     } else {
       DataOutputStream stream = new DataOutputStream(new ByteArrayOutputStream());
       writeString(value, stream);

From e749e9cc3ee39e21c9e5ec225f33c141aee693e9 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 6 May 2015 15:20:19 -0700
Subject: [PATCH 0516/1541] Support null values in SerializableCoder.
 ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=92969364

---
 .../sdk/coders/SerializableCoder.java         |  5 +--
 .../sdk/coders/SerializableCoderTest.java     | 37 +++++++++++++++++--
 2 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
index 0a5c0791cda74..3bdf776186b96 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
@@ -109,9 +109,6 @@ public Class<T> getRecordType() {
   @Override
   public void encode(T value, OutputStream outStream, Context context)
       throws IOException, CoderException {
-    if (value == null) {
-      throw new CoderException("cannot encode a null record");
-    }
     try (ObjectOutputStream oos = new ObjectOutputStream(outStream)) {
       oos.writeObject(value);
     } catch (IOException exn) {
@@ -147,7 +144,7 @@ public boolean equals(Object other) {
     if (getClass() != other.getClass()) {
       return false;
     }
-    return type == ((SerializableCoder) other).type;
+    return type == ((SerializableCoder<?>) other).type;
   }
 
   @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java
index 2d1eee5bc5448..732fdcb2cc03d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
@@ -173,11 +174,39 @@ public void testLongStringEncoding() throws Exception {
 
     // Decode from NESTED form.
     try (ByteArrayInputStream is = new ByteArrayInputStream(nestedEncoding)) {
-      String result = coder.decode(is, Coder.Context.NESTED);
-      String result2 = coder.decode(is, Coder.Context.NESTED);
+      assertEquals(source, coder.decode(is, Coder.Context.NESTED));
+      assertEquals(source2, coder.decode(is, Coder.Context.NESTED));
+      assertEquals(0, is.available());
+    }
+  }
+
+  @Test
+  public void testNullEncoding() throws Exception {
+    Coder<String> coder = SerializableCoder.of(String.class);
+    byte[] encodedBytes = CoderUtils.encodeToByteArray(coder, null);
+    assertNull(CoderUtils.decodeFromByteArray(coder, encodedBytes));
+  }
+
+  @Test
+  public void testMixedWithNullsEncoding() throws Exception {
+    Coder<String> coder = SerializableCoder.of(String.class);
+    byte[] encodedBytes;
+    try (ByteArrayOutputStream os = new ByteArrayOutputStream()) {
+      coder.encode(null, os, Coder.Context.NESTED);
+      coder.encode("TestValue", os, Coder.Context.NESTED);
+      coder.encode(null, os, Coder.Context.NESTED);
+      coder.encode("TestValue2", os, Coder.Context.NESTED);
+      coder.encode(null, os, Coder.Context.NESTED);
+      encodedBytes = os.toByteArray();
+    }
+
+    try (ByteArrayInputStream is = new ByteArrayInputStream(encodedBytes)) {
+      assertNull(coder.decode(is, Coder.Context.NESTED));
+      assertEquals("TestValue", coder.decode(is,  Coder.Context.NESTED));
+      assertNull(coder.decode(is, Coder.Context.NESTED));
+      assertEquals("TestValue2", coder.decode(is,  Coder.Context.NESTED));
+      assertNull(coder.decode(is, Coder.Context.NESTED));
       assertEquals(0, is.available());
-      assertEquals(source, result);
-      assertEquals(source2, result2);
     }
   }
 }

From 70b64a441bd61fcc246f6c71f69ae04d95066e1f Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 6 May 2015 16:19:49 -0700
Subject: [PATCH 0517/1541] Import RunnableOnService in tests. ----Release
 Notes---- [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=92975132

---
 .../dataflow/examples/DeDupExampleTest.java   |  5 ++--
 .../dataflow/examples/JoinExamplesTest.java   |  3 ++-
 .../cloud/dataflow/examples/TfIdfTest.java    |  3 ++-
 .../examples/TopWikipediaSessionsTest.java    |  4 ++--
 .../dataflow/examples/WordCountTest.java      |  3 ++-
 .../dataflow/sdk/io/DatastoreIOTest.java      |  3 ++-
 .../sdk/testing/DataflowAssertTest.java       | 12 +++++-----
 .../sdk/transforms/ApproximateUniqueTest.java |  3 ++-
 .../dataflow/sdk/transforms/CombineTest.java  | 23 ++++++++++---------
 .../dataflow/sdk/transforms/CountTest.java    |  9 ++++----
 .../dataflow/sdk/transforms/CreateTest.java   | 13 ++++++-----
 .../dataflow/sdk/transforms/FilterTest.java   |  9 +++++---
 .../dataflow/sdk/transforms/FlattenTest.java  | 15 ++++++------
 .../sdk/transforms/GroupByKeyTest.java        |  7 +++---
 .../dataflow/sdk/transforms/KeysTest.java     |  5 ++--
 .../dataflow/sdk/transforms/KvSwapTest.java   |  5 ++--
 .../dataflow/sdk/transforms/ParDoTest.java    | 19 +++++++--------
 .../sdk/transforms/PartitionTest.java         |  3 ++-
 .../sdk/transforms/RemoveDuplicatesTest.java  |  5 ++--
 .../dataflow/sdk/transforms/SampleTest.java   | 15 ++++++------
 .../dataflow/sdk/transforms/ValuesTest.java   |  5 ++--
 .../dataflow/sdk/transforms/ViewTest.java     | 23 ++++++++++---------
 .../sdk/transforms/join/CoGroupByKeyTest.java |  5 ++--
 .../transforms/windowing/WindowingTest.java   | 15 ++++++------
 .../cloud/dataflow/sdk/values/PDoneTest.java  |  3 ++-
 25 files changed, 120 insertions(+), 95 deletions(-)

diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/DeDupExampleTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/DeDupExampleTest.java
index 5051a539990d8..e549803ce8706 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/DeDupExampleTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/DeDupExampleTest.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.RemoveDuplicates;
@@ -37,7 +38,7 @@
 public class DeDupExampleTest {
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testRemoveDuplicates() {
     List<String> strings = Arrays.asList(
         "k1",
@@ -63,7 +64,7 @@ public void testRemoveDuplicates() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testRemoveDuplicatesEmpty() {
     List<String> strings = Arrays.asList();
 
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/JoinExamplesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/JoinExamplesTest.java
index 154cd0f701c69..f66a9bf90b08a 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/JoinExamplesTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/JoinExamplesTest.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.examples.JoinExamples.ExtractEventDataFn;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
@@ -100,7 +101,7 @@ public void testExtractCountryInfoFn() {
 
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testJoin() throws java.lang.Exception {
     Pipeline p = TestPipeline.create();
     PCollection<TableRow> input1 = p.apply(Create.of(EVENT_ARRAY));
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java
index 19df88499957b..3acdc17f92e55 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.StringDelegateCoder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.Keys;
@@ -42,7 +43,7 @@ public class TfIdfTest {
 
   /** Test that the example runs. */
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testTfIdf() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/TopWikipediaSessionsTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/TopWikipediaSessionsTest.java
index af5c38d9da170..5362c7f45b5c4 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/TopWikipediaSessionsTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/TopWikipediaSessionsTest.java
@@ -19,6 +19,7 @@
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -33,9 +34,8 @@
 /** Unit tests for {@link TopWikipediaSessions}. */
 @RunWith(JUnit4.class)
 public class TopWikipediaSessionsTest {
-
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testComputeTopUsers() {
     Pipeline p = TestPipeline.create();
 
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
index bd2b1eeb509e6..c7b3076ef17d4 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
@@ -67,7 +68,7 @@ public void testExtractWordsFn() {
 
   /** Example test that tests a PTransform by using an in-memory input and inspecting the output. */
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testCountWords() throws Exception {
     Pipeline p = TestPipeline.create();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
index 01634b0058df3..e8e9f3f2bb13a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
@@ -39,6 +39,7 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.Write;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
@@ -99,7 +100,7 @@ public void setUp() {
    * Test for reading one entity from kind "food".
    */
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testBuildRead() throws Exception {
     DatastoreIO.Source readQuery =
         DatastoreIO.read().withHost(this.host).withDataset(this.datasetId).withQuery(this.query);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
index 09e8f25f76e58..9a00a7cb92316 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
@@ -107,7 +107,7 @@ public void registerByteSizeObserver(
    * serializable.
    */
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testContainsInAnyOrderNotSerializable() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
@@ -130,7 +130,7 @@ public void testContainsInAnyOrderNotSerializable() throws Exception {
    * though.
    */
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testSerializablePredicate() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
@@ -146,7 +146,7 @@ public void testSerializablePredicate() throws Exception {
 
           @Override
           public Void apply(Iterable<NotSerializableObject> contents) {
-            return (Void) null; // no problem!
+            return null; // no problem!
           }
         });
 
@@ -155,7 +155,7 @@ public Void apply(Iterable<NotSerializableObject> contents) {
 
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testIsEqualTo() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
@@ -168,7 +168,7 @@ public void testIsEqualTo() throws Exception {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testContainsInAnyOrder() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
@@ -181,7 +181,7 @@ public void testContainsInAnyOrder() throws Exception {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testContainsInAnyOrderFalse() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
index b4e17295c75c0..462b721947521 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -63,7 +64,7 @@ public void testEstimationErrorToSampleSize() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testApproximateUniqueWithSmallInput() {
     Pipeline p = TestPipeline.create();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index acde7425ff6a6..14aa00c512c3a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -36,6 +36,7 @@
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.RecordingPipelineVisitor;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
@@ -123,7 +124,7 @@ private void runTestSimpleCombine(KV<String, Integer>[] table,
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   @SuppressWarnings({"rawtypes", "unchecked"})
   public void testSimpleCombine() {
     runTestSimpleCombine(TABLE, 20, new KV[] {
@@ -131,7 +132,7 @@ public void testSimpleCombine() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   @SuppressWarnings({"rawtypes", "unchecked"})
   public void testSimpleCombineEmpty() {
     runTestSimpleCombine(EMPTY_TABLE, 0, new KV[] { });
@@ -160,7 +161,7 @@ private void runTestBasicCombine(KV<String, Integer>[] table,
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   @SuppressWarnings({"rawtypes", "unchecked"})
   public void testBasicCombine() {
     runTestBasicCombine(TABLE, ImmutableSet.of(1, 13, 4), new KV[] {
@@ -169,7 +170,7 @@ public void testBasicCombine() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   @SuppressWarnings("rawtypes")
   public void testBasicCombineEmpty() {
     runTestBasicCombine(EMPTY_TABLE, ImmutableSet.<Integer>of(), new KV[] { });
@@ -196,7 +197,7 @@ private void runTestAccumulatingCombine(KV<String, Integer>[] table,
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testFixedWindowsCombine() {
     Pipeline p = TestPipeline.create();
 
@@ -223,7 +224,7 @@ public void testFixedWindowsCombine() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testSessionsCombine() {
     Pipeline p = TestPipeline.create();
 
@@ -249,7 +250,7 @@ public void testSessionsCombine() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testWindowedCombineEmpty() {
     Pipeline p = TestPipeline.create();
 
@@ -264,14 +265,14 @@ public void testWindowedCombineEmpty() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testAccumulatingCombine() {
     runTestAccumulatingCombine(TABLE, 4.0, new KV[] {
         KV.of("a", 2.0), KV.of("b", 7.0) });
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testAccumulatingCombineEmpty() {
     runTestAccumulatingCombine(EMPTY_TABLE, 0.0, new KV[] { });
   }
@@ -327,7 +328,7 @@ public Integer apply(String input) {
       };
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testHotKeyCombining() {
     Pipeline p = TestPipeline.create();
     PCollection<KV<String, Integer>> input = copy(createInput(p, TABLE), 10);
@@ -403,7 +404,7 @@ public Integer apply(Integer left, Integer right) {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testCombineGloballyAsSingletonView() {
     Pipeline p = TestPipeline.create();
     final PCollectionView<Integer> view = p
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java
index 802d6df3bfb1a..4821c26eaad28 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java
@@ -21,6 +21,7 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -45,7 +46,7 @@ public class CountTest {
   static final List<String> WORDS = Arrays.asList(WORDS_ARRAY);
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   @SuppressWarnings("unchecked")
   public void testCountPerElementBasic() {
     Pipeline p = TestPipeline.create();
@@ -67,7 +68,7 @@ public void testCountPerElementBasic() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   @SuppressWarnings("unchecked")
   public void testCountPerElementEmpty() {
     Pipeline p = TestPipeline.create();
@@ -82,7 +83,7 @@ public void testCountPerElementEmpty() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testCountGloballyBasic() {
     Pipeline p = TestPipeline.create();
 
@@ -97,7 +98,7 @@ public void testCountGloballyBasic() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testCountGloballyEmpty() {
     Pipeline p = TestPipeline.create();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
index 6d4f854a8d1cc..49c987d9d48e0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
@@ -24,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -53,7 +54,7 @@ public class CreateTest {
   @Rule public final ExpectedException thrown = ExpectedException.none();
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testCreate() {
     Pipeline p = TestPipeline.create();
 
@@ -66,7 +67,7 @@ public void testCreate() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testCreateEmpty() {
     Pipeline p = TestPipeline.create();
 
@@ -122,7 +123,7 @@ public void processElement(ProcessContext c) {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testCreateTimestamped() {
     Pipeline p = TestPipeline.create();
 
@@ -142,7 +143,7 @@ public void testCreateTimestamped() {
 
   @Test
   // This test fails when run on the service!
-  // TODO: @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  // TODO: @Category(RunnableOnService.class)
   public void testCreateTimestampedEmpty() {
     Pipeline p = TestPipeline.create();
 
@@ -174,7 +175,7 @@ public void testCreateTimestampedPolymorphicType() throws Exception {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testCreateWithVoidType() throws Exception {
     Pipeline p = TestPipeline.create();
 
@@ -187,7 +188,7 @@ public void testCreateWithVoidType() throws Exception {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testCreateWithKVVoidType() throws Exception {
     Pipeline p = TestPipeline.create();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
index 74bc642222d85..58776434cf87f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
@@ -19,6 +19,7 @@
 import static com.google.cloud.dataflow.sdk.TestUtils.createInts;
 
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
@@ -44,19 +45,21 @@ static class TrivialFn implements SerializableFunction<Integer, Boolean> {
       this.returnVal = returnVal;
     }
 
+    @Override
     public Boolean apply(Integer elem) {
       return this.returnVal;
     }
   }
 
   static class EvenFn implements SerializableFunction<Integer, Boolean> {
+    @Override
     public Boolean apply(Integer elem) {
       return elem % 2 == 0;
     }
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testIdentityFilterBy() {
     TestPipeline p = TestPipeline.create();
 
@@ -81,7 +84,7 @@ public void testNoFilter() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testFilterBy() {
     TestPipeline p = TestPipeline.create();
 
@@ -94,7 +97,7 @@ public void testFilterBy() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testFilterLessThan() {
     TestPipeline p = TestPipeline.create();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
index e50056c3aa482..1619a6c8a0d71 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
@@ -32,6 +32,7 @@
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
@@ -71,7 +72,7 @@ private static class ClassWithoutCoder { }
 
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testFlattenPCollectionList() {
     Pipeline p = TestPipeline.create();
 
@@ -87,7 +88,7 @@ public void testFlattenPCollectionList() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testFlattenPCollectionListThenParDo() {
     Pipeline p = TestPipeline.create();
 
@@ -104,7 +105,7 @@ public void testFlattenPCollectionListThenParDo() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testFlattenPCollectionListEmpty() {
     Pipeline p = TestPipeline.create();
 
@@ -117,7 +118,7 @@ public void testFlattenPCollectionListEmpty() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testEmptyFlattenAsSideInput() {
     Pipeline p = TestPipeline.create();
 
@@ -144,7 +145,7 @@ public void processElement(ProcessContext c) {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testFlattenPCollectionListEmptyThenParDo() {
 
     Pipeline p = TestPipeline.create();
@@ -205,7 +206,7 @@ public void testFlattenNoListsNoCoder() {
   /////////////////////////////////////////////////////////////////////////////
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testFlattenIterables() {
     Pipeline p = TestPipeline.create();
 
@@ -223,7 +224,7 @@ public void testFlattenIterables() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testFlattenIterablesEmpty() {
     Pipeline p = TestPipeline.create();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
index 10c20a950d15f..2ed94304739a6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
@@ -27,6 +27,7 @@
 import com.google.cloud.dataflow.sdk.coders.MapCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
@@ -59,7 +60,7 @@ public class GroupByKeyTest {
   public ExpectedException expectedEx = ExpectedException.none();
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testGroupByKey() {
     List<KV<String, Integer>> ungroupedPairs = Arrays.asList(
         KV.of("k1", 3),
@@ -101,7 +102,7 @@ public Void apply(Iterable<KV<String, Iterable<Integer>>> actual) {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testGroupByKeyAndWindows() {
     List<KV<String, Integer>> ungroupedPairs = Arrays.asList(
         KV.of("k1", 3),  // window [0, 5)
@@ -145,7 +146,7 @@ public Void apply(Iterable<KV<String, Iterable<Integer>>> actual) {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testGroupByKeyEmpty() {
     List<KV<String, Integer>> ungroupedPairs = Arrays.asList();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java
index 0cd549735f598..ea93c0fa1e87a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -51,7 +52,7 @@ public class KeysTest {
   };
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testKeys() {
     Pipeline p = TestPipeline.create();
 
@@ -67,7 +68,7 @@ public void testKeys() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testKeysEmpty() {
     Pipeline p = TestPipeline.create();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java
index 476b4a8155251..54fc2e56fef89 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -51,7 +52,7 @@ public class KvSwapTest {
   };
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testKvSwap() {
     Pipeline p = TestPipeline.create();
 
@@ -73,7 +74,7 @@ public void testKvSwap() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testKvSwapEmpty() {
     Pipeline p = TestPipeline.create();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index dc59bdccb0d71..0cd463085ebea 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -36,6 +36,7 @@
 import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
@@ -358,7 +359,7 @@ public void processElement(ProcessContext c) throws Exception {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testParDo() {
     Pipeline p = TestPipeline.create();
 
@@ -377,7 +378,7 @@ public void testParDo() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testParDo2() {
     Pipeline p = TestPipeline.create();
 
@@ -396,7 +397,7 @@ public void testParDo2() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testParDoEmpty() {
     Pipeline p = TestPipeline.create();
 
@@ -415,7 +416,7 @@ public void testParDoEmpty() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testParDoWithSideOutputs() {
     Pipeline p = TestPipeline.create();
 
@@ -458,7 +459,7 @@ public void testParDoWithSideOutputs() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testParDoWithOnlySideOutputs() {
     Pipeline p = TestPipeline.create();
 
@@ -543,7 +544,7 @@ public void processElement(ProcessContext c) {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testParDoWithSideInputs() {
     Pipeline p = TestPipeline.create();
 
@@ -640,7 +641,7 @@ public void testParDoWithErrorInFinishBatch() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testParDoKeyedState() {
     Pipeline p = TestPipeline.create();
 
@@ -664,7 +665,7 @@ public void processElement(ProcessContext c) {
 
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testParDoKeyedState2() {
     Pipeline p = TestPipeline.create();
 
@@ -1240,7 +1241,7 @@ public void testParDoShiftTimestampInvalid() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testWindowingInStartAndFinishBundle() {
     Pipeline p = TestPipeline.create();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
index b799cc33ee5fc..c2a11a352387e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
@@ -24,6 +24,7 @@
 import static org.junit.Assert.fail;
 
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
@@ -59,7 +60,7 @@ public int partitionFor(Integer elem, int numPartitions) {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testEvenOddPartition() {
     TestPipeline p = TestPipeline.create();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java
index 7d5872c897687..946cebdd93f88 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
@@ -36,7 +37,7 @@
 @RunWith(JUnit4.class)
 public class RemoveDuplicatesTest {
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testRemoveDuplicates() {
     List<String> strings = Arrays.asList(
         "k1",
@@ -62,7 +63,7 @@ public void testRemoveDuplicates() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testRemoveDuplicatesEmpty() {
     List<String> strings = Arrays.asList();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
index 6fd2ce958a5fe..7e8c306f3d560 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.Preconditions;
@@ -100,7 +101,7 @@ public Void apply(Iterable<T> in) {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testSample() {
     Pipeline p = TestPipeline.create();
 
@@ -115,7 +116,7 @@ public void testSample() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testSampleEmpty() {
     Pipeline p = TestPipeline.create();
 
@@ -130,7 +131,7 @@ public void testSampleEmpty() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testSampleZero() {
     Pipeline p = TestPipeline.create();
 
@@ -145,7 +146,7 @@ public void testSampleZero() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testSampleInsufficientElements() {
     Pipeline p = TestPipeline.create();
 
@@ -169,7 +170,7 @@ public void testSampleNegative() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testSampleMultiplicity() {
     Pipeline p = TestPipeline.create();
 
@@ -231,7 +232,7 @@ void runPickAnyTest(final List<String> lines, int limit) {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testPickAny() {
     runPickAnyTest(LINES, 0);
     runPickAnyTest(LINES, LINES.size() / 2);
@@ -247,7 +248,7 @@ public void testPickAnyMore() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testPickAnyWhenEmpty() {
     runPickAnyTest(NO_LINES, 0);
     runPickAnyTest(NO_LINES, 1);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
index dbcb840d12ced..66f11f95862c2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -51,7 +52,7 @@ public class ValuesTest {
   };
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testValues() {
     Pipeline p = TestPipeline.create();
 
@@ -68,7 +69,7 @@ public void testValues() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testValuesEmpty() {
     Pipeline p = TestPipeline.create();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index e8423c6667d38..acd1b8ebced59 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
@@ -62,7 +63,7 @@ public class ViewTest implements Serializable {
   public transient ExpectedException thrown = ExpectedException.none();
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testSingletonSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
@@ -142,7 +143,7 @@ public void processElement(ProcessContext c) {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testListSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
@@ -172,7 +173,7 @@ public void processElement(ProcessContext c) {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testIterableSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
@@ -200,7 +201,7 @@ public void processElement(ProcessContext c) {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testMapSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
@@ -228,7 +229,7 @@ public void processElement(ProcessContext c) {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testSingletonMapSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
@@ -254,7 +255,7 @@ public void processElement(ProcessContext c) {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testCombinedMapSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
@@ -280,7 +281,7 @@ public void processElement(ProcessContext c) {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testWindowedSideInputFixedToFixed() {
     Pipeline p = TestPipeline.create();
 
@@ -313,7 +314,7 @@ public void processElement(ProcessContext c) {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testWindowedSideInputFixedToGlobal() {
     Pipeline p = TestPipeline.create();
 
@@ -346,7 +347,7 @@ public void processElement(ProcessContext c) {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testWindowedSideInputFixedToFixedWithDefault() {
     Pipeline p = TestPipeline.create();
 
@@ -377,7 +378,7 @@ public void processElement(ProcessContext c) {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testSideInputWithNullDefault() {
     Pipeline p = TestPipeline.create();
 
@@ -386,7 +387,7 @@ public void testSideInputWithNullDefault() {
         .apply(Combine.globally(new SerializableFunction<Iterable<Void>, Void>() {
                   @Override
                   public Void apply(Iterable<Void> input) {
-                    return (Void) null;
+                    return null;
                   }
                 }).asSingletonView());
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
index 13cad8c49840c..235a6ce080775 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
@@ -448,7 +449,7 @@ public void testConsumingDoFn() {
    */
   @SuppressWarnings("unchecked")
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testCoGroupByKeyHandleResults() {
     TupleTag<String> namesTag = new TupleTag<>();
     TupleTag<String> addressesTag = new TupleTag<>();
@@ -479,7 +480,7 @@ public void testCoGroupByKeyHandleResults() {
    */
   @SuppressWarnings("unchecked")
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testCoGroupByKeyWithWindowing() {
     TupleTag<String> clicksTag = new TupleTag<>();
     TupleTag<String> purchasesTag = new TupleTag<>();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
index bcbf508bb73a7..8113efd94d84d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Count;
 import com.google.cloud.dataflow.sdk.transforms.Create;
@@ -66,9 +67,9 @@ public void processElement(ProcessContext c) {
             + ":" + c.timestamp().getMillis() + ":" + c.window());
       }
     }
-    private WindowFn<Object, ?> windowFn;
+    private WindowFn<? super String, ?> windowFn;
     public WindowedCount(WindowFn<? super String, ?> windowFn) {
-      this.windowFn = (WindowFn) windowFn;
+      this.windowFn = windowFn;
     }
     @Override
     public PCollection<String> apply(PCollection<String> in) {
@@ -87,7 +88,7 @@ private String output(String value, int count, int timestamp, int windowStart, i
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testPartitioningWindowing() {
     Pipeline p = TestPipeline.create();
     PCollection<String> input =
@@ -113,7 +114,7 @@ public void testPartitioningWindowing() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testNonPartitioningWindowing() {
     Pipeline p = TestPipeline.create();
     PCollection<String> input =
@@ -139,7 +140,7 @@ public void testNonPartitioningWindowing() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testMergingWindowing() {
     Pipeline p = TestPipeline.create();
     PCollection<String> input =
@@ -161,7 +162,7 @@ public void testMergingWindowing() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testWindowPreservation() {
     Pipeline p = TestPipeline.create();
     PCollection<String> input1 = p.apply(
@@ -189,7 +190,7 @@ public void testWindowPreservation() {
   }
 
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testElementsSortedByTimestamp() {
     // The Windowing API does not guarantee that elements will be sorted by
     // timestamp, but the implementation currently relies on this, so it
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java
index 856477bc30aff..9534f70de4a00 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java
@@ -20,6 +20,7 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -76,7 +77,7 @@ public PDone apply(PBegin begin) {
   // transforms that contain no nested transforms.
   @Ignore
   @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testEmptyTransform() {
     Pipeline p = TestPipeline.create();
 

From 06fa27d568e5673c10bb11fa65e60326b1f715ef Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 6 May 2015 17:24:29 -0700
Subject: [PATCH 0518/1541] Add support for resolving paths within
 IOChannelUtils and corresponding IO channel factories. ----Release Notes----
 [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=92981241

---
 .../sdk/util/FileIOChannelFactory.java        |  6 +++
 .../sdk/util/GcsIOChannelFactory.java         |  5 +++
 .../dataflow/sdk/util/IOChannelFactory.java   | 14 ++++++
 .../dataflow/sdk/util/IOChannelUtils.java     | 21 ++++++---
 .../sdk/util/FileIOChannelFactoryTest.java    | 18 ++++++++
 .../sdk/util/GcsIOChannelFactoryTest.java     | 43 +++++++++++++++++++
 .../dataflow/sdk/util/IOChannelUtilsTest.java | 25 +++++++----
 7 files changed, 118 insertions(+), 14 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactoryTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
index 5e5b1c73213c5..81f81e5be9e97 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
@@ -31,6 +31,7 @@
 import java.nio.file.FileSystems;
 import java.nio.file.Files;
 import java.nio.file.PathMatcher;
+import java.nio.file.Paths;
 import java.util.Collection;
 import java.util.LinkedList;
 import java.util.List;
@@ -110,4 +111,9 @@ public long getSizeBytes(String spec) throws IOException {
   public boolean isReadSeekEfficient(String spec) throws IOException {
     return true;
   }
+
+  @Override
+  public String resolve(String path, String other) throws IOException {
+    return Paths.get(path).resolve(other).toString();
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java
index 2f90428daa282..ce933f563aac4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java
@@ -78,4 +78,9 @@ public boolean isReadSeekEfficient(String spec) throws IOException {
     // TODO It is incorrect to return true here for files with content encoding set to gzip.
     return true;
   }
+
+  @Override
+  public String resolve(String path, String other) throws IOException {
+    return GcsPath.fromUri(path).resolve(other).toString();
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
index a3a133ec15060..bc67fe10f4b48 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
@@ -80,4 +80,18 @@ public interface IOChannelFactory {
    * <p>The specification is not expanded; it is used verbatim.
    */
   boolean isReadSeekEfficient(String spec) throws IOException;
+
+  /**
+   * Resolve the given {@code other} against the {@code path}.
+   * <p>
+   * If the {@code other} parameter is an absolute path then this method trivially returns other.
+   * If {@code other} is an empty path then this method trivially returns the given {@code path}.
+   * Otherwise this method considers the given {@code path} to be a directory and resolves the
+   * {@code other} path against this path. In the simplest case, the {@code other} path does not
+   * have a root component, in which case this method joins the {@code other} path to the given
+   * {@code path} and returns a resulting path that ends with the {@code other} path.
+   * Where the {@code other} path has a root component then resolution is highly implementation
+   * dependent and therefore unspecified.
+   */
+  public String resolve(String path, String other) throws IOException;
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
index 96cc9c2501f0c..13d17e4a2570d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
@@ -19,9 +19,6 @@
 import com.google.cloud.dataflow.sdk.options.GcsOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import java.io.IOException;
 import java.nio.channels.WritableByteChannel;
 import java.text.DecimalFormat;
@@ -38,8 +35,6 @@
  * Provides utilities for creating read and write channels.
  */
 public class IOChannelUtils {
-  private static final Logger LOG = LoggerFactory.getLogger(IOChannelUtils.class);
-
   // TODO: add registration mechanism for adding new schemas.
   private static final Map<String, IOChannelFactory> FACTORY_MAP =
       Collections.synchronizedMap(new HashMap<String, IOChannelFactory>());
@@ -176,4 +171,20 @@ public static IOChannelFactory getFactory(String spec) throws IOException {
 
     throw new IOException("Unable to find handler for " + spec);
   }
+
+  /**
+   * Resolve the given {@code other} against the {@code path}.
+   * <p>
+   * If the {@code other} parameter is an absolute path then this method trivially returns other.
+   * If {@code other} is an empty path then this method trivially returns the given {@code path}.
+   * Otherwise this method considers the given {@code path} to be a directory and resolves the
+   * {@code other} path against this path. In the simplest case, the {@code other} path does not
+   * have a root component, in which case this method joins the {@code other} path to the given
+   * {@code path} and returns a resulting path that ends with the {@code other} path.
+   * Where the {@code other} path has a root component then resolution is highly implementation
+   * dependent and therefore unspecified.
+   */
+  public static String resolve(String path, String other) throws IOException {
+    return getFactory(path).resolve(path, other);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java
index 96d35c9d4fecc..19d7c8bd8f30d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java
@@ -124,4 +124,22 @@ public void testMatchMultiple() throws Exception {
     assertThat(factory.match(temporaryFolder.getRoot().toPath().resolve("a*").toString()),
         Matchers.hasItems(expected.toArray(new String[expected.size()])));
   }
+
+  @Test
+  public void testResolve() throws Exception {
+    String expected = temporaryFolder.getRoot().toPath().resolve("aa").toString();
+    assertEquals(expected, factory.resolve(temporaryFolder.getRoot().toString(), "aa"));
+  }
+
+  @Test
+  public void testResolveOtherIsFullPath() throws Exception {
+    String expected = temporaryFolder.getRoot().getPath().toString();
+    assertEquals(expected, factory.resolve(expected, expected));
+  }
+
+  @Test
+  public void testResolveOtherIsEmptyPath() throws Exception {
+    String expected = temporaryFolder.getRoot().getPath().toString();
+    assertEquals(expected, factory.resolve(expected, ""));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactoryTest.java
new file mode 100644
index 0000000000000..6e4605f7de75f
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactoryTest.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.options.GcsOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link GcsIOChannelFactoryTest}. */
+@RunWith(JUnit4.class)
+public class GcsIOChannelFactoryTest {
+  private GcsIOChannelFactory factory;
+
+  @Before
+  public void setUp() {
+    factory = new GcsIOChannelFactory(PipelineOptionsFactory.as(GcsOptions.class));
+  }
+
+  @Test
+  public void testResolve() throws Exception {
+    assertEquals("gs://bucket/object", factory.resolve("gs://bucket", "object"));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOChannelUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOChannelUtilsTest.java
index 68f9b5902bec0..4ea50e00180f8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOChannelUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOChannelUtilsTest.java
@@ -16,6 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
 import org.junit.Assert;
 import org.junit.Rule;
 import org.junit.Test;
@@ -24,7 +27,6 @@
 import org.junit.runners.JUnit4;
 
 import java.io.File;
-import java.nio.channels.WritableByteChannel;
 
 /**
  * Tests for IOChannelUtils.
@@ -36,22 +38,22 @@ public class IOChannelUtilsTest {
 
   @Test
   public void testShardFormatExpansion() {
-    Assert.assertEquals("output-001-of-123.txt",
+    assertEquals("output-001-of-123.txt",
         IOChannelUtils.constructName("output", "-SSS-of-NNN",
             ".txt",
             1, 123));
 
-    Assert.assertEquals("out.txt/part-00042",
+    assertEquals("out.txt/part-00042",
         IOChannelUtils.constructName("out.txt", "/part-SSSSS", "",
             42, 100));
 
-    Assert.assertEquals("out.txt",
+    assertEquals("out.txt",
         IOChannelUtils.constructName("ou", "t.t", "xt", 1, 1));
 
-    Assert.assertEquals("out0102shard.txt",
+    assertEquals("out0102shard.txt",
         IOChannelUtils.constructName("out", "SSNNshard", ".txt", 1, 2));
 
-    Assert.assertEquals("out-2/1.part-1-of-2.txt",
+    assertEquals("out-2/1.part-1-of-2.txt",
         IOChannelUtils.constructName("out", "-N/S.part-S-of-N",
             ".txt", 1, 2));
   }
@@ -61,9 +63,8 @@ public void testShardNameCollision() throws Exception {
     File outFolder = tmpFolder.newFolder();
     String filename = outFolder.toPath().resolve("output").toString();
 
-    WritableByteChannel output = IOChannelUtils
-        .create(filename, "", "", 2, "text");
-    Assert.fail("IOChannelUtils.create expected to fail due "
+    IOChannelUtils.create(filename, "", "", 2, "text");
+    fail("IOChannelUtils.create expected to fail due "
         + "to filename collision");
   }
 
@@ -73,4 +74,10 @@ public void testLargeShardCount() {
         IOChannelUtils.constructName("out", "-SS-of-NN", ".txt",
             100, 5000));
   }
+
+  @Test
+  public void testResolve() throws Exception {
+    String expected = tmpFolder.getRoot().toPath().resolve("aa").toString();
+    assertEquals(expected, IOChannelUtils.resolve(tmpFolder.getRoot().toString(), "aa"));
+  }
 }

From c2ac1b16e28b47f869fcc56f0e855ecd9a5bebbb Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Wed, 6 May 2015 18:03:01 -0700
Subject: [PATCH 0519/1541] Stops using system properties for controlling
 progress reporting constants in tests.

This was extremely fragile and depended on order of class loading (DataflowWorkProgressUpdaterTest would not work if the WorkProgressUpdater class was loaded, initializing the constants to default values, before the test class was loaded).

The system properties weren't being used for anything else. If there's ever a need to control these settings for any purpose other than testing, PipelineOptions or work item properties should be used instead.
----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=92984528
---
 .../common/worker/WorkProgressUpdater.java    | 49 +++++++++++++------
 .../DataflowWorkProgressUpdaterTest.java      | 19 ++++---
 2 files changed, 47 insertions(+), 21 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
index d40bd205ffacd..40a841838610b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
@@ -38,8 +38,8 @@
  * controlled by the worker service through reporting interval hints
  * sent back in the update response messages.  To avoid update storms
  * and monitoring staleness, the interval between two consecutive
- * updates is also bound by {@link #MIN_REPORTING_INTERVAL_MILLIS} and
- * {@link #MAX_REPORTING_INTERVAL_MILLIS}.
+ * updates is also bound by {@link #DEFAULT_MIN_REPORTING_INTERVAL_MILLIS} and
+ * {@link #DEFAULT_MAX_REPORTING_INTERVAL_MILLIS}.
  */
 @NotThreadSafe
 public abstract class WorkProgressUpdater {
@@ -49,21 +49,19 @@ public abstract class WorkProgressUpdater {
   public static final long DEFAULT_LEASE_DURATION_MILLIS = 3 * 60 * 1000;
 
   /** The lease renewal RPC latency margin. */
-  private static final long LEASE_RENEWAL_LATENCY_MARGIN =
-      Long.valueOf(System.getProperty("worker_lease_renewal_latency_margin", "5000"));
+  private static final long DEFAULT_LEASE_RENEWAL_LATENCY_MARGIN = 5000;
 
   /**
    * The minimum period between two consecutive progress updates. Ensures the
    * {@link WorkProgressUpdater} does not generate update storms.
    */
-  private static final long MIN_REPORTING_INTERVAL_MILLIS =
-      Long.valueOf(System.getProperty("minimum_worker_update_interval_millis", "5000"));
+  private static final long DEFAULT_MIN_REPORTING_INTERVAL_MILLIS = 5000;
 
   /**
    * The maximum period between two consecutive progress updates. Ensures the
    * {@link WorkProgressUpdater} does not cause monitoring staleness.
    */
-  private static final long MAX_REPORTING_INTERVAL_MILLIS = 10 * 60 * 1000;
+  private static final long DEFAULT_MAX_REPORTING_INTERVAL_MILLIS = 10 * 60 * 1000;
 
   /** Worker providing the work progress updates. */
   protected final WorkExecutor worker;
@@ -98,7 +96,8 @@ public void startReportingProgress() {
     // Otherwise the default is half-way through the lease.
     long leaseRemainingTime = leaseRemainingTime(getWorkUnitLeaseExpirationTimestamp());
     progressReportIntervalMs =
-        nextProgressReportInterval(getWorkUnitSuggestedReportingInterval(), leaseRemainingTime);
+        nextProgressReportInterval(
+            getWorkUnitSuggestedReportingInterval(), leaseRemainingTime);
     requestedLeaseDurationMs = DEFAULT_LEASE_DURATION_MILLIS;
 
     LOG.debug("Started reporting progress for work item: {}", workString());
@@ -131,16 +130,16 @@ public void stopReportingProgress() throws Exception {
 
   /**
    * Computes the time before sending the next work progress update making sure
-   * that it falls between the [{@link #MIN_REPORTING_INTERVAL_MILLIS},
-   * {@link #MAX_REPORTING_INTERVAL_MILLIS}) interval. Makes an attempt to bound
+   * that it falls between the [{@link #DEFAULT_MIN_REPORTING_INTERVAL_MILLIS},
+   * {@link #DEFAULT_MAX_REPORTING_INTERVAL_MILLIS}) interval. Makes an attempt to bound
    * the result by the remaining lease time, with an RPC latency margin of
-   * {@link #LEASE_RENEWAL_LATENCY_MARGIN}.
+   * {@link #DEFAULT_LEASE_RENEWAL_LATENCY_MARGIN}.
    *
    * @param suggestedInterval the suggested progress report interval
    * @param leaseRemainingTime milliseconds left before the work lease expires
    * @return the time in milliseconds before sending the next progress update
    */
-  protected static long nextProgressReportInterval(
+  protected final long nextProgressReportInterval(
       long suggestedInterval, long leaseRemainingTime) {
     // Sanitize input in case we get a negative suggested time interval.
     suggestedInterval = Math.max(0, suggestedInterval);
@@ -148,11 +147,11 @@ protected static long nextProgressReportInterval(
     // Try to send the next progress update before the next lease expiration
     // allowing some RPC latency margin.
     suggestedInterval =
-        Math.min(suggestedInterval, leaseRemainingTime - LEASE_RENEWAL_LATENCY_MARGIN);
+        Math.min(suggestedInterval, leaseRemainingTime - getLeaseRenewalLatencyMargin());
 
     // Bound reporting interval to avoid staleness and progress update storms.
     return Math.min(
-        Math.max(MIN_REPORTING_INTERVAL_MILLIS, suggestedInterval), MAX_REPORTING_INTERVAL_MILLIS);
+        Math.max(getMinReportingInterval(), suggestedInterval), getMaxReportingInterval());
   }
 
   /**
@@ -231,6 +230,28 @@ protected long getWorkUnitSuggestedReportingInterval() {
     return getWorkUnitLeaseExpirationTimestamp() / 2;
   }
 
+  /**
+   * Returns the minimum allowed time between two periodic progress updates.
+   */
+  protected long getMinReportingInterval() {
+    return DEFAULT_MIN_REPORTING_INTERVAL_MILLIS;
+  }
+
+  /**
+   * Returns the maximum allowed time between two periodic progress updates.
+   */
+  protected long getMaxReportingInterval() {
+    return DEFAULT_MAX_REPORTING_INTERVAL_MILLIS;
+  }
+
+  /**
+   * Returns the maximum allowed time between a periodic progress update and the moment
+   * the current lease expires.
+   */
+  protected long getLeaseRenewalLatencyMargin() {
+    return DEFAULT_LEASE_RENEWAL_LATENCY_MARGIN;
+  }
+
   /**
    * Returns a string representation of the work item whose progress
    * is being updated, for use in logging messages.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
index 45e8500e6bac7..2b4a48322e27a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -111,12 +111,6 @@ public void setWorkerProgress(ApproximateProgress progress) {
     }
   }
 
-  static {
-    // To shorten wait times during testing.
-    System.setProperty("minimum_worker_update_interval_millis", "100");
-    System.setProperty("worker_lease_renewal_latency_margin", "100");
-  }
-
   private static final String PROJECT_ID = "TEST_PROJECT_ID";
   private static final String JOB_ID = "TEST_JOB_ID";
   private static final String WORKER_ID = "TEST_WORKER_ID";
@@ -167,7 +161,18 @@ public Collection<Metric<?>> getOutputMetrics() {
     workItem.setReportStatusInterval(toCloudDuration(Duration.millis(300)));
     workItem.setInitialReportIndex(1L);
 
-    progressUpdater = new DataflowWorkProgressUpdater(workItem, worker, workUnitClient, options);
+    progressUpdater = new DataflowWorkProgressUpdater(workItem, worker, workUnitClient, options) {
+      // Shorten reporting interval boundaries for faster testing.
+      @Override
+      protected long getMinReportingInterval() {
+        return 100;
+      }
+
+      @Override
+      protected long getLeaseRenewalLatencyMargin() {
+        return 100;
+      }
+    };
   }
 
   // TODO: Remove sleeps from this test by using a mock sleeper.  This

From 27d097e4bf8eab1ad43f1781dbb43d85cc65a92c Mon Sep 17 00:00:00 2001
From: amaksimenka <amaksimenka@google.com>
Date: Thu, 7 May 2015 09:10:06 -0700
Subject: [PATCH 0520/1541] Workflow data disk specification improvements.
 ----Release Notes---- Removed deprecated data disk mount point setting and
 stop setting the data disk size from the SDK. [] ------------- Created by
 MOE: http://code.google.com/p/moe-java MOE_MIGRATED_REVID=93035259

---
 .../dataflow/sdk/runners/DataflowPipelineTranslator.java      | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 5df334c37f0e9..60a9f9a1b7afb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -414,11 +414,7 @@ public Job translate(List<DataflowPackage> packages) {
       if (options.isStreaming()) {
         // Use separate data disk for streaming.
         Disk disk = new Disk();
-        disk.setSizeGb(10);
         disk.setDiskType(options.getWorkerDiskType());
-        // TODO: introduce a separate location for Windmill binary in the
-        // TaskRunner so it wouldn't interfere with the data disk mount point.
-        disk.setMountPoint("/windmill");
         workerPool.setDataDisks(Collections.singletonList(disk));
       }
       if (!Strings.isNullOrEmpty(options.getZone())) {

From 9014d9553893e4cd791a8ef8ed98889395b68b0d Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 7 May 2015 09:56:04 -0700
Subject: [PATCH 0521/1541] Use ExpectedException instead of try/catch/fail.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93039304
---
 .../sdk/coders/CoderRegistryTest.java         | 32 ++++++---------
 .../cloud/dataflow/sdk/io/TextIOTest.java     | 41 ++++++++++---------
 .../sdk/options/PipelineOptionsTest.java      | 23 ++++++-----
 .../sdk/transforms/PartitionTest.java         | 28 ++++++-------
 4 files changed, 59 insertions(+), 65 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index aa45028bec82a..50fca7f0c18f6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -19,7 +19,6 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
 
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
@@ -48,9 +47,7 @@
 @RunWith(JUnit4.class)
 @SuppressWarnings("serial")
 public class CoderRegistryTest {
-
-  @Rule
-  public transient ExpectedException thrown = ExpectedException.none();
+  @Rule public ExpectedException expectedException = ExpectedException.none();
 
   public static CoderRegistry getStandardRegistry() {
     CoderRegistry registry = new CoderRegistry();
@@ -63,8 +60,6 @@ private static class SerializableClass implements Serializable {
 
   private static class NotSerializableClass { }
 
-  private static class NotACoderProvider { }
-
   @Test
   public void testSerializableFallbackCoderProvider() throws Exception {
     CoderRegistry registry = getStandardRegistry();
@@ -101,7 +96,7 @@ public void testSimpleDefaultCoder() throws Exception {
   @Test
   public void testSimpleUnknownDefaultCoder() throws Exception {
     CoderRegistry registry = getStandardRegistry();
-    thrown.expect(CannotProvideCoderException.class);
+    expectedException.expect(CannotProvideCoderException.class);
     registry.getDefaultCoder(UnknownType.class);
   }
 
@@ -127,7 +122,7 @@ public void testParameterizedDefaultCoderUnknown() throws Exception {
     TypeDescriptor<List<UnknownType>> listUnknownToken =
         new TypeDescriptor<List<UnknownType>>() {};
 
-    thrown.expect(CannotProvideCoderException.class);
+    expectedException.expect(CannotProvideCoderException.class);
     registry.getDefaultCoder(listUnknownToken);
   }
 
@@ -166,17 +161,14 @@ public void testTypeParameterInference() throws Exception {
         MyValueCoder.of());
     assertEquals(listCoder, actual);
 
-    try {
-      registry.getDefaultCoder(
-          instance.getClass(),
-          MyGenericClass.class,
-          BigEndianIntegerCoder.of());
-      fail("should have failed");
-    } catch (IllegalArgumentException exn) {
-      assertEquals("Cannot encode elements of type class "
-          + "com.google.cloud.dataflow.sdk.coders.CoderRegistryTest$MyValue "
-          + "with BigEndianIntegerCoder", exn.getMessage());
-    }
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Cannot encode elements of type class "
+        + "com.google.cloud.dataflow.sdk.coders.CoderRegistryTest$MyValue "
+        + "with BigEndianIntegerCoder");
+    registry.getDefaultCoder(
+        instance.getClass(),
+        MyGenericClass.class,
+        BigEndianIntegerCoder.of());
   }
 
   @Test
@@ -279,10 +271,12 @@ public CloudObject asCloudObject() {
     @Override
     public void verifyDeterministic() { }
 
+    @Override
     public boolean consistentWithEquals() {
       return true;
     }
 
+    @Override
     public Object structuralValue(MyValue value) {
       return value;
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
index 3cc3ba37b0e7e..e06eb44aa7cce 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -44,7 +44,6 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PDone;
 
-import org.junit.Assert;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
@@ -72,42 +71,46 @@
 @RunWith(JUnit4.class)
 @SuppressWarnings("unchecked")
 public class TextIOTest {
-
-  @Rule
-  public TemporaryFolder tmpFolder = new TemporaryFolder();
-
-  @Rule
-  public ExpectedException thrown = ExpectedException.none();
+  @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
+  @Rule public ExpectedException expectedException = ExpectedException.none();
 
   private static class EmptySeekableByteChannel implements SeekableByteChannel {
+    @Override
     public long position() {
       return 0L;
     }
 
+    @Override
     public SeekableByteChannel position(long newPosition) {
       return this;
     }
 
+    @Override
     public long size() {
       return 0L;
     }
 
+    @Override
     public SeekableByteChannel truncate(long size) {
       return this;
     }
 
+    @Override
     public int write(ByteBuffer src) {
       return 0;
     }
 
+    @Override
     public int read(ByteBuffer dst) {
       return 0;
     }
 
+    @Override
     public boolean isOpen() {
       return true;
     }
 
+    @Override
     public void close() { }
   }
 
@@ -231,14 +234,15 @@ <T> void runTestWrite(T[] elems, Coder<T> coder) throws Exception {
 
     p.run();
 
-    BufferedReader reader = new BufferedReader(new FileReader(tmpFile));
     List<String> actual = new ArrayList<>();
-    for (;;) {
-      String line = reader.readLine();
-      if (line == null) {
-        break;
+    try (BufferedReader reader = new BufferedReader(new FileReader(tmpFile))) {
+      for (;;) {
+        String line = reader.readLine();
+        if (line == null) {
+          break;
+        }
+        actual.add(line);
       }
-      actual.add(line);
     }
 
     String[] expected = new String[elems.length];
@@ -316,7 +320,7 @@ public void testWriteNamed() {
     }
   }
 
-  @Test(expected = IllegalArgumentException.class)
+  @Test
   public void testUnsupportedFilePattern() throws IOException {
     File outFolder = tmpFolder.newFolder();
     String filename = outFolder.toPath().resolve("output@*").toString();
@@ -327,10 +331,9 @@ public void testUnsupportedFilePattern() throws IOException {
         p.apply(Create.of(Arrays.asList(LINES_ARRAY)))
             .setCoder(StringUtf8Coder.of());
 
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Output name components are not allowed to contain");
     input.apply(TextIO.Write.to(filename));
-
-    p.run();
-    Assert.fail("Expected failure due to unsupported output pattern");
   }
 
   /**
@@ -374,8 +377,8 @@ public void testBadWildcardRecursive() throws Exception {
     pipeline.apply(TextIO.Read.from("gs://bucket/foo**/baz"));
 
     // Check that running does fail.
-    thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage("wildcard");
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("wildcard");
     pipeline.run();
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java
index f821e4656a619..98e83980e2b93 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsTest.java
@@ -16,15 +16,12 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
-import static org.hamcrest.CoreMatchers.containsString;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNotSame;
 import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
 
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.common.collect.Lists;
@@ -33,7 +30,9 @@
 import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.fasterxml.jackson.databind.ObjectMapper;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -44,13 +43,17 @@
 /** Unit tests for {@link PipelineOptions}. */
 @RunWith(JUnit4.class)
 public class PipelineOptionsTest {
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+
   /** Interfaces used for testing that {@link PipelineOptions#as(Class)} functions. */
   private static interface DerivedTestOptions extends BaseTestOptions {
     int getDerivedValue();
     void setDerivedValue(int derivedValue);
 
+    @Override
     @JsonIgnore
     Set<String> getIgnoredValue();
+    @Override
     void setIgnoredValue(Set<String> ignoredValue);
   }
 
@@ -58,8 +61,10 @@ private static interface ConflictedTestOptions extends BaseTestOptions {
     String getDerivedValue();
     void setDerivedValue(String derivedValue);
 
+    @Override
     @JsonIgnore
     Set<String> getIgnoredValue();
+    @Override
     void setIgnoredValue(Set<String> ignoredValue);
   }
 
@@ -106,20 +111,16 @@ public void testCloneAs() throws IOException {
   }
 
   @Test
-  public void testCloneAsConflicted() throws IOException {
+  public void testCloneAsConflicted() throws Exception {
     DerivedTestOptions options = PipelineOptionsFactory.create().as(DerivedTestOptions.class);
     options.setBaseValue(Lists.<Boolean>newArrayList());
     options.setIgnoredValue(Sets.<String>newHashSet());
     options.getIgnoredValue().add("ignoredString");
     options.setDerivedValue(0);
 
-    try {
-      options.cloneAs(ConflictedTestOptions.class);
-      fail("should have failed");
-    } catch (Exception e) {
-      // Expected
-      assertThat(e.toString(), containsString("incompatible return types"));
-    }
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("incompatible return types");
+    options.cloneAs(ConflictedTestOptions.class);
   }
 }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
index c2a11a352387e..d907787490b50 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
@@ -17,20 +17,19 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 import static com.google.cloud.dataflow.sdk.TestUtils.createInts;
-import static com.google.cloud.dataflow.sdk.transforms.Partition.PartitionFn;
-import static org.hamcrest.Matchers.containsString;
-import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
 
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Partition.PartitionFn;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
 
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -45,6 +44,8 @@
 @RunWith(JUnit4.class)
 @SuppressWarnings("serial")
 public class PartitionTest implements Serializable {
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+
   static class ModFn implements PartitionFn<Integer> {
     @Override
     public int partitionFor(Integer elem, int numPartitions) {
@@ -98,12 +99,10 @@ public void testOutOfBoundsPartitions() {
 
     input.apply(Partition.of(5, new IdentityFn()));
 
-    try {
-      p.run();
-    } catch (RuntimeException e) {
-      assertThat(e.toString(), containsString(
-          "Partition function returned out of bounds index: -1 not in [0..5)"));
-    }
+    expectedException.expect(RuntimeException.class);
+    expectedException.expectMessage(
+        "Partition function returned out of bounds index: -1 not in [0..5)");
+    p.run();
   }
 
   @Test
@@ -112,12 +111,9 @@ public void testZeroNumPartitions() {
 
     PCollection<Integer> input = createInts(p, Arrays.asList(591));
 
-    try {
-      input.apply(Partition.of(0, new IdentityFn()));
-      fail("should have failed");
-    } catch (IllegalArgumentException exn) {
-      assertThat(exn.toString(), containsString("numPartitions must be > 0"));
-    }
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("numPartitions must be > 0");
+    input.apply(Partition.of(0, new IdentityFn()));
   }
 
   @Test

From 638fc083b22e9e4b93761049a84031520754c036 Mon Sep 17 00:00:00 2001
From: laraschmidt <laraschmidt@google.com>
Date: Thu, 7 May 2015 11:02:17 -0700
Subject: [PATCH 0522/1541] Setting the ReadChannel to null in the case when an
 SSL exception is thrown on close and in the case when RuntimeExceptions
 happen. ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=93046257

---
 .../gcsio/GoogleCloudStorageReadChannel.java  | 61 +++++++++++--------
 1 file changed, 36 insertions(+), 25 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
index 0711c120edb56..babb4244bdeff 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
@@ -38,7 +38,6 @@
 import java.nio.channels.ReadableByteChannel;
 import java.nio.channels.SeekableByteChannel;
 import java.util.regex.Pattern;
-import javax.net.ssl.SSLException;
 
 /**
  * Provides seekable read access to GCS.
@@ -228,6 +227,8 @@ public void setMaxRetries(int maxRetries) {
   /**
    * Reads from this channel and stores read data in the given buffer.
    *
+   * <p> On unexpected failure, will attempt to close the channel and clean up state.
+   *
    * @param buffer buffer to read data into
    * @return number of bytes read or -1 on end-of-stream
    * @throws java.io.IOException on IO error
@@ -329,28 +330,15 @@ public int read(ByteBuffer buffer)
           currentPosition = -1;
           position(newPosition);
 
-          // Before performing lazy seek, explicitly close the underlying channel if necessary,
-          // catching and ignoring SSLException since the retry indicates an error occurred, so
-          // there's a high probability that SSL connections would be broken in a way that
-          // causes close() itself to throw an exception, even though underlying sockets have
-          // already been cleaned up; close() on an SSLSocketImpl requires a shutdown handshake
-          // in order to shutdown cleanly, and if the connection has been broken already, then
-          // this is not possible, and the SSLSocketImpl was already responsible for performing
-          // local cleanup at the time the exception was raised.
+          // Before performing lazy seek, explicitly close the underlying channel if necessary.
           if (lazySeekPending && readChannel != null) {
-            try {
-              readChannel.close();
-              readChannel = null;
-            } catch (SSLException ssle) {
-              LOG.debug("Got SSLException on readChannel.close() before retry; ignoring it.", ssle);
-              readChannel = null;
-            }
-            // For "other" exceptions, we'll let it propagate out without setting readChannel to
-            // null, in case the caller is able to handle it and then properly try to close()
-            // again.
+            closeReadChannel();
           }
           performLazySeek();
         }
+      } catch (RuntimeException r) {
+        closeReadChannel();
+        throw r;
       }
     } while (buffer.remaining() > 0);
 
@@ -386,6 +374,30 @@ public boolean isOpen() {
     return channelIsOpen;
   }
 
+ /**
+   * Closes the underlying {@link ReadableByteChannel}.
+   *
+   * <p>Catches and ignores all exceptions as there is not a lot the user can do to fix errors here
+   * and a new connection will be needed. Especially SSLExceptions since the there's a high
+   * probability that SSL connections would be broken in a way that causes
+   * {@link Channel#close()} itself to throw an exception, even though underlying
+   * sockets have already been cleaned up; close() on an SSLSocketImpl requires a shutdown
+   * handshake in order to shutdown cleanly, and if the connection has been broken already, then
+   * this is not possible, and the SSLSocketImpl was already responsible for performing local
+   * cleanup at the time the exception was raised.
+   */
+  private void closeReadChannel() {
+    if (readChannel != null) {
+      try {
+        readChannel.close();
+      } catch (Exception e) {
+        LOG.debug("Got an exception on readChannel.close(); ignoring it.", e);
+      } finally {
+        readChannel = null;
+      }
+    }
+  }
+
   /**
    * Closes this channel.
    *
@@ -400,9 +412,7 @@ public void close() throws IOException {
       return;
     }
     channelIsOpen = false;
-    if (readChannel != null) {
-      readChannel.close();
-    }
+    closeReadChannel();
   }
 
   /**
@@ -509,9 +519,7 @@ private void performLazySeek()
     }
 
     // Close the underlying channel if it is open.
-    if (readChannel != null) {
-      readChannel.close();
-    }
+    closeReadChannel();
 
     InputStream objectContentStream = openStreamAndSetSize(currentPosition);
     readChannel = Channels.newChannel(objectContentStream);
@@ -554,6 +562,9 @@ protected InputStream openStreamAndSetSize(long newPosition)
             StorageResourceId.createReadableString(bucketName, objectName), newPosition);
         throw new IOException(msg, e);
       }
+    } catch (RuntimeException r) {
+      closeReadChannel();
+      throw r;
     }
 
     // If the content is compressed, content length reported in the header is counting the number of

From 67fc627f26d4366368d91241cc3086afe935ed72 Mon Sep 17 00:00:00 2001
From: samuelw <samuelw@google.com>
Date: Thu, 7 May 2015 15:18:18 -0700
Subject: [PATCH 0523/1541] Add input data watermark to GetWork response.
 ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=93072170

---
 sdk/src/main/proto/windmill.proto | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index 5cf398d0b54ab..a3c2b1506bf39 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -109,6 +109,7 @@ message WorkItem {
 message ComputationWorkItems {
   required string computation_id = 1;
   repeated WorkItem work = 2;
+  optional int64 input_data_watermark = 3 [default=-0x8000000000000000];
 }
 
 ////////////////////////////////////////////////////////////////////////////////

From 3ff3dfb3041b880db3ebdd38a710522d682bb5f4 Mon Sep 17 00:00:00 2001
From: stevewheeler <stevewheeler@google.com>
Date: Thu, 7 May 2015 16:34:43 -0700
Subject: [PATCH 0524/1541] For the PipelineOptionsFactory, provided a clearer
 exception message when getter and setter methods have different types.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93079982
---
 .../sdk/options/PipelineOptionsFactory.java   | 34 ++++++++++++++-----
 .../options/PipelineOptionsFactoryTest.java   | 16 +++++++++
 2 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 2ff484d50795a..642735ff9f0eb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -558,9 +558,7 @@ static synchronized <T extends PipelineOptions> Registration<T> validateWellForm
               combinedPipelineOptionsInterfaces.toArray(EMPTY_CLASS_ARRAY));
       try {
         List<PropertyDescriptor> propertyDescriptors =
-            getPropertyDescriptors(allProxyClass);
-        validateClass(iface, validatedPipelineOptionsInterfaces,
-            allProxyClass, propertyDescriptors);
+            validateClass(iface, validatedPipelineOptionsInterfaces, allProxyClass);
         COMBINED_CACHE.put(combinedPipelineOptionsInterfaces,
             new Registration<T>(allProxyClass, propertyDescriptors));
       } catch (IntrospectionException e) {
@@ -575,8 +573,7 @@ static synchronized <T extends PipelineOptions> Registration<T> validateWellForm
           PipelineOptionsFactory.class.getClassLoader(), new Class[] {iface});
       try {
         List<PropertyDescriptor> propertyDescriptors =
-            getPropertyDescriptors(proxyClass);
-        validateClass(iface, validatedPipelineOptionsInterfaces, proxyClass, propertyDescriptors);
+            validateClass(iface, validatedPipelineOptionsInterfaces, proxyClass);
         INTERFACE_CACHE.put(iface,
             new Registration<T>(proxyClass, propertyDescriptors));
       } catch (IntrospectionException e) {
@@ -831,8 +828,20 @@ private static List<PropertyDescriptor> getPropertyDescriptors(Class<?> beanClas
         continue;
       }
       String propertyName = Introspector.decapitalize(methodName.substring(3));
+      Method getterMethod = propertyNamesToGetters.remove(propertyName);
+
+      // Validate that the getter and setter property types are the same.
+      if (getterMethod != null) {
+        Class<?> getterPropertyType = getterMethod.getReturnType();
+        Class<?> setterPropertyType = method.getParameterTypes()[0];
+        Preconditions.checkArgument(getterPropertyType == setterPropertyType,
+            "Type mismatch between getter and setter methods for property [%s]. "
+            + "Getter is of type [%s] whereas setter is of type [%s].",
+            propertyName, getterPropertyType.getName(), setterPropertyType.getName());
+      }
+
       descriptors.add(new PropertyDescriptor(
-          propertyName, propertyNamesToGetters.remove(propertyName), method));
+          propertyName, getterMethod, method));
     }
 
     // Add the remaining getters with missing setters.
@@ -882,11 +891,13 @@ private static SortedMap<String, Method> getPropertyNamesToGetters(Iterable<Meth
    * @param validatedPipelineOptionsInterfaces The set of validated pipeline options interfaces to
    *        validate against.
    * @param klass The proxy class representing the interface.
-   * @param descriptors A list of {@link PropertyDescriptor}s to use when validating.
+   * @return A list of {@link PropertyDescriptor}s representing all valid bean properties of
+   *         {@code iface}.
+   * @throws IntrospectionException if invalid property descriptors.
    */
-  private static void validateClass(Class<? extends PipelineOptions> iface,
+  private static List<PropertyDescriptor> validateClass(Class<? extends PipelineOptions> iface,
       Set<Class<? extends PipelineOptions>> validatedPipelineOptionsInterfaces,
-      Class<?> klass, List<PropertyDescriptor> descriptors) {
+      Class<?> klass) throws IntrospectionException {
     Set<Method> methods = Sets.newHashSet(IGNORED_METHODS);
     // Ignore static methods, "equals", "hashCode", "toString" and "as" on the generated class.
     for (Method method : klass.getMethods()) {
@@ -937,6 +948,9 @@ private static void validateClass(Class<? extends PipelineOptions> iface,
     for (Method method : allInterfaceMethods) {
       methodNameToAllMethodMap.put(MethodNameEquivalence.INSTANCE.wrap(method), method);
     }
+
+    List<PropertyDescriptor> descriptors = getPropertyDescriptors(klass);
+
     for (PropertyDescriptor descriptor : descriptors) {
       if (IGNORED_METHODS.contains(descriptor.getReadMethod())
           || IGNORED_METHODS.contains(descriptor.getWriteMethod())) {
@@ -1001,6 +1015,8 @@ private static void validateClass(Class<? extends PipelineOptions> iface,
         "Methods %s on [%s] do not conform to being bean properties.",
         FluentIterable.from(unknownMethods).transform(ReflectHelpers.METHOD_FORMATTER),
         iface.getName());
+
+    return descriptors;
   }
 
   /** A {@link Comparator} that uses the classes name to compare them. */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index c326d50463a8c..47605ed635b23 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -142,6 +142,22 @@ public void testMissingSetterThrows() throws Exception {
     PipelineOptionsFactory.as(MissingSetter.class);
   }
 
+  /** A test interface with a type mismatch between the getter and setter. */
+  public static interface GetterSetterTypeMismatch extends PipelineOptions {
+    boolean getValue();
+    void setValue(int value);
+  }
+
+  @Test
+  public void testGetterSetterTypeMismatchThrows() throws Exception {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage(
+        "Type mismatch between getter and setter methods for property [value]. Getter is of type "
+        + "[boolean] whereas setter is of type [int].");
+
+    PipelineOptionsFactory.as(GetterSetterTypeMismatch.class);
+  }
+
   /** A test interface representing a composite interface. */
   public static interface CombinedObject extends MissingGetter, MissingSetter {
   }

From 18c82ad5c749b2eac13a58f50f2d9543d6e8c46c Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Thu, 7 May 2015 18:47:25 -0700
Subject: [PATCH 0525/1541] Updated BigQueryReader so that BigQuery values of
 type TIMESTAMP are formatted similarly for local and service-based runs. This
 change updates the TIMESTAMP values produced in local runs to be formatted
 similar to the values produced by serviced-based runs.

This closes #20

----Release Notes----

This change modifies the formatting of the values of type TIMESTAMP produced by Dataflow BigQuery source. This change may only affect Dataflow jobs that are run in direct mode and Dataflow jobs that use BigQuery data as side inputs. This change will not affect Dataflow jobs that only use BigQuery data as a main input.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93090065
---
 .../sdk/util/BigQueryTableRowIterator.java    | 13 ++++++
 .../dataflow/sdk/util/BigQueryUtilTest.java   | 43 +++++++++++++++++++
 2 files changed, 56 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
index 4b5ecca789748..07d9d926c9063 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
@@ -26,6 +26,9 @@
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
 
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+
 import java.io.Closeable;
 import java.io.IOException;
 import java.util.ArrayList;
@@ -82,6 +85,8 @@ public boolean hasNext() {
    *   <li> Record columns are {@link TableRow}s.
    *   <li> {@code BOOLEAN} columns are JSON booleans, hence Java {@link Boolean}s.
    *   <li> {@code FLOAT} columns are JSON floats, hence Java {@link Double}s.
+   *   <li> {@code TIMESTAMP} columns are {@link String}s that are of the format
+   *        {yyyy-MM-dd HH:mm:ss.SSS UTC}.
    *   <li> Every other atomic type is a {@link String}.
    * </ul></p>
    *
@@ -122,6 +127,14 @@ private Object getTypedCellValue(TableFieldSchema fieldSchema, Object v) {
       return Boolean.parseBoolean((String) v);
     }
 
+    if (fieldSchema.getType().equals("TIMESTAMP")) {
+      // Seconds to milliseconds
+      long milliSecs = (new Double(Double.parseDouble((String) v) * 1000)).longValue();
+      DateTimeFormatter formatter =
+          DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS").withZoneUTC();
+      return formatter.print(milliSecs) + " UTC";
+    }
+
     return v;
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
index 8ae876e51e303..bc94cad21a758 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
@@ -166,6 +166,49 @@ private Table basicTableSchema() {
             )));
   }
 
+  private Table basicTableSchemaWithTime() {
+    return new Table()
+        .setSchema(new TableSchema()
+            .setFields(Arrays.asList(
+                new TableFieldSchema()
+                    .setName("name")
+                    .setType("STRING"),
+                new TableFieldSchema()
+                    .setName("time")
+                    .setType("TIMESTAMP"),
+                new TableFieldSchema()
+                    .setName("answer")
+                    .setType("INTEGER")
+            )));
+  }
+
+  @Test
+  public void testReadWithTime() throws IOException {
+    onTableGet(basicTableSchemaWithTime());
+
+    TableDataList dataList = rawDataList(rawRow("Arthur", "1.430397296789E9", 42));
+    onTableList(dataList);
+
+    BigQueryTableRowIterator iterator = new BigQueryTableRowIterator(
+        mockClient,
+        BigQueryIO.parseTableSpec("project:dataset.table"));
+
+    Assert.assertTrue(iterator.hasNext());
+    TableRow row = iterator.next();
+
+    Assert.assertTrue(row.containsKey("name"));
+    Assert.assertTrue(row.containsKey("time"));
+    Assert.assertTrue(row.containsKey("answer"));
+    Assert.assertEquals("Arthur", row.get("name"));
+    Assert.assertEquals("2015-04-30 12:34:56.789 UTC", row.get("time"));
+    Assert.assertEquals(42, row.get("answer"));
+
+    Assert.assertFalse(iterator.hasNext());
+
+    verifyTableGet();
+    verifyTabledataList();
+  }
+
   private TableRow rawRow(Object...args) {
     List<TableCell> cells = new LinkedList<>();
     for (Object a : args) {

From 329d47774488fdd2948fa8d7953d1d80667a14f4 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Fri, 8 May 2015 11:26:17 -0700
Subject: [PATCH 0526/1541] Changes logging level of Windmill.CommitWorkRequest
 to trace - the message is too big and clutters debug logs too much.
 ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=93148451

---
 .../dataflow/sdk/runners/worker/StreamingDataflowWorker.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 92f6cead10169..1aff27332e47d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -430,7 +430,7 @@ private void commitLoop() {
       }
       if (commitRequestBuilder.getRequestsCount() > 0) {
         Windmill.CommitWorkRequest commitRequest = commitRequestBuilder.build();
-        LOG.debug("Commit: {}", commitRequest);
+        LOG.trace("Commit: {}", commitRequest);
         commitWork(commitRequest);
       }
       if (remainingCommitBytes > 0) {

From e6c43840188ce51379161ada889133b1731d29f2 Mon Sep 17 00:00:00 2001
From: malo <malo@google.com>
Date: Fri, 8 May 2015 11:33:55 -0700
Subject: [PATCH 0527/1541] Stop sending progress updates if a workItem fails.
 ----Release Notes---- [] ------------- Created by MOE:
 http://code.google.com/p/moe-java MOE_MIGRATED_REVID=93149349

---
 .../sdk/runners/worker/DataflowWorker.java    | 26 +++++++++------
 .../runners/worker/DataflowWorkerTest.java    | 32 +++++++++++++++++++
 2 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 98db69430c292..4061b75da60ee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -122,12 +122,8 @@ private boolean doWork(WorkItem workItem) throws IOException {
 
       DataflowWorkProgressUpdater progressUpdater =
           new DataflowWorkProgressUpdater(workItem, worker, workUnitClient, options);
-      progressUpdater.startReportingProgress();
 
-      // Blocks while executing the work.
-      // TODO: refactor to allow multiple work unit
-      // processing threads.
-      worker.execute();
+      executeWork(worker, progressUpdater);
 
       // Log all counter values for debugging purposes.
       CounterSet counters = worker.getOutputCounters();
@@ -141,12 +137,7 @@ private boolean doWork(WorkItem workItem) throws IOException {
         LOG.trace("METRIC {}: {}", metric.getName(), metric.getValue());
       }
 
-      // stopReportingProgress can throw an exception if the final progress
-      // update fails. For correctness, the task must then be marked as failed.
-      progressUpdater.stopReportingProgress();
-
       // Report job success.
-
       // TODO: Find out a generic way for the WorkExecutor to report work-specific results
       // into the work update.
       SourceFormat.OperationResponse operationResponse =
@@ -175,6 +166,21 @@ private boolean doWork(WorkItem workItem) throws IOException {
     }
   }
 
+  /** Executes the work and report progress. For testing only. */
+  void executeWork(WorkExecutor worker, DataflowWorkProgressUpdater progressUpdater)
+      throws Exception {
+    progressUpdater.startReportingProgress();
+    // Blocks while executing the work.
+    try {
+      worker.execute();
+    } finally {
+      // stopReportingProgress can throw an exception if the final progress
+      // update fails. For correctness, the task must then be marked as failed.
+      progressUpdater.stopReportingProgress();
+    }
+  }
+
+
   /** Handles the exception thrown when reading and executing the work. */
   private void handleWorkError(WorkItem workItem, WorkExecutor worker, Throwable e)
       throws IOException {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
index 9fe059b352811..15ea00cbc612e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
@@ -17,6 +17,8 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.mockito.Matchers.argThat;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
@@ -24,6 +26,7 @@
 import com.google.api.services.dataflow.model.WorkItemStatus;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
 import com.google.cloud.dataflow.sdk.testing.FastNanoClockAndSleeper;
+import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
 
 import org.hamcrest.CoreMatchers;
 import org.hamcrest.Description;
@@ -40,6 +43,11 @@
 /** Unit tests for {@link DataflowWorker}. */
 @RunWith(JUnit4.class)
 public class DataflowWorkerTest {
+
+  private class WorkerException extends Exception {
+    static final long serialVersionUID = 0L;
+  }
+
   @Rule
   public FastNanoClockAndSleeper clockAndSleeper = new FastNanoClockAndSleeper();
 
@@ -49,6 +57,12 @@ public class DataflowWorkerTest {
   @Mock
   DataflowWorkerHarnessOptions options;
 
+  @Mock
+  DataflowWorkProgressUpdater mockProgressUpdater;
+
+  @Mock
+  WorkExecutor mockWorkExecutor;
+
   @Before
   public void setUp() {
     MockitoAnnotations.initMocks(this);
@@ -72,6 +86,24 @@ public void testWhenProcessingWorkUnitFailsWeReportStatus() throws Exception {
     verify(mockWorkUnitClient).reportWorkItemStatus(argThat(cloudWorkHasErrors()));
   }
 
+  @Test
+  public void testStartAndStopProgressReport() throws Exception {
+    DataflowWorker worker = new DataflowWorker(mockWorkUnitClient, options);
+    worker.executeWork(mockWorkExecutor, mockProgressUpdater);
+    verify(mockProgressUpdater, times(1)).startReportingProgress();
+    verify(mockProgressUpdater, times(1)).stopReportingProgress();
+  }
+
+  @Test
+  public void testStopProgressReportInCaseOfFailure() throws Exception {
+    doThrow(new WorkerException()).when(mockWorkExecutor).execute();
+    DataflowWorker worker = new DataflowWorker(mockWorkUnitClient, options);
+    try {
+      worker.executeWork(mockWorkExecutor, mockProgressUpdater);
+    } catch (WorkerException e) { /* Expected - ignore. */ }
+      verify(mockProgressUpdater, times(1)).stopReportingProgress();
+  }
+
   private Matcher<WorkItemStatus> cloudWorkHasErrors() {
     return new TypeSafeMatcher<WorkItemStatus>() {
       @Override

From b33c057fbec0392ef3b7bbabf0cf710d25e6360e Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 8 May 2015 14:31:45 -0700
Subject: [PATCH 0528/1541] Change how WindowFn's report their properties.

Specifically, introduce isNonMerging and assignsToSingleWindow.

Update NonMergingWindowFn and PartitioningWindowFn to implement these
methods as appropriate.

This allows GlobalWindows to act like a PartitioningWindowFn without
needing to extend PartitioningWindowFn, which will not work in cases
where the element timestmap is not available.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93165211
---
 .../dataflow/sdk/transforms/GroupByKey.java    |  5 ++---
 .../transforms/windowing/GlobalWindows.java    | 18 +++++++++++++-----
 .../windowing/NonMergingWindowFn.java          |  5 +++++
 .../windowing/PartitioningWindowFn.java        |  5 +++++
 .../sdk/transforms/windowing/WindowFn.java     | 14 ++++++++++++++
 .../sdk/util/GroupAlsoByWindowsDoFn.java       |  3 +--
 .../util/StreamingGroupAlsoByWindowsDoFn.java  |  4 ++--
 .../dataflow/sdk/util/TriggerExecutor.java     |  5 ++---
 8 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index ab1b938ae1b11..4681cdbcbdf98 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -28,7 +28,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.ReifyTimestampAndWindowsDoFn;
@@ -321,7 +320,7 @@ public void validate(PCollection<KV<K, V>> input) {
     public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
       WindowingStrategy<?, ?> oldWindowingStrategy = input.getWindowingStrategy();
       WindowFn<?, ?> newWindowFn = oldWindowingStrategy.getWindowFn();
-      if (!(newWindowFn instanceof NonMergingWindowFn)) {
+      if (!newWindowFn.isNonMerging()) {
         // Prevent merging windows again, without explicit user
         // involvement, e.g., by Window.into() or Window.remerge().
         newWindowFn = new InvalidWindows(
@@ -482,7 +481,7 @@ public PCollection<KV<K, Iterable<V>>> applyHelper(
           + "Invalid because: " + cause);
     }
     boolean disallowCombinerLifting =
-        !(windowingStrategy.getWindowFn() instanceof NonMergingWindowFn)
+        !windowingStrategy.getWindowFn().isNonMerging()
         || (isStreaming && !fewKeys)
         // TODO: Allow combiner lifting on the non-default trigger, as appropriate.
         || !(windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
index 93db03ece0797..977eacbd3b8bf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
@@ -18,22 +18,25 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 
-import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 
 /**
  * Default {@link WindowFn} where all data is in the same window.
  */
 @SuppressWarnings("serial")
-public class GlobalWindows
-    extends NonMergingWindowFn<Object, GlobalWindow> {
+public class GlobalWindows extends NonMergingWindowFn<Object, GlobalWindow> {
+
+  private static final Collection<GlobalWindow> GLOBAL_WINDOWS =
+      Collections.singletonList(GlobalWindow.INSTANCE);
+
   @Override
   public Collection<GlobalWindow> assignWindows(AssignContext c) {
-    return Arrays.asList(GlobalWindow.INSTANCE);
+    return GLOBAL_WINDOWS;
   }
 
   @Override
-  public boolean isCompatible(WindowFn o) {
+  public boolean isCompatible(WindowFn<?, ?> o) {
     return o instanceof GlobalWindows;
   }
 
@@ -46,4 +49,9 @@ public Coder<GlobalWindow> windowCoder() {
   public GlobalWindow getSideInputWindow(BoundedWindow window) {
     return GlobalWindow.INSTANCE;
   }
+
+  @Override
+  public boolean assignsToSingleWindow() {
+    return true;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java
index c42c4ff09a28b..e7db3cecfcbf5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java
@@ -29,4 +29,9 @@ public abstract class NonMergingWindowFn<T, W extends BoundedWindow>
 
   @Override
   public final void mergeWindows(MergeContext c) { }
+
+  @Override
+  public boolean isNonMerging() {
+    return true;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
index 8172bca4f5a79..0cd3a85d5415f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
@@ -50,4 +50,9 @@ public W getSideInputWindow(final BoundedWindow window) {
     }
     return assignWindow(window.maxTimestamp());
   }
+
+  @Override
+  public boolean assignsToSingleWindow() {
+    return true;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
index 044df3ae80463..db83a6ed954f5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
@@ -123,4 +123,18 @@ public abstract void merge(Collection<W> toBeMerged, W mergeResult)
    * <p> Authors of custom {@code WindowFn}s should override this.
    */
   public abstract W getSideInputWindow(final BoundedWindow window);
+
+  /**
+   * Returns true if this {@code WindowFn} never needs to merge any windows.
+   */
+  public boolean isNonMerging() {
+    return false;
+  }
+
+  /**
+   * Returns true if this {@code WindowFn} assigns each element to a single window.
+   */
+  public boolean assignsToSingleWindow() {
+    return false;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 01bd607485481..e24eaf6d92b9d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -22,7 +22,6 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresKeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
@@ -52,7 +51,7 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
    */
   public static <K, V, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W>
   createForIterable(WindowingStrategy<?, W> windowingStrategy, Coder<V> inputCoder) {
-    if (windowingStrategy.getWindowFn() instanceof NonMergingWindowFn
+    if (windowingStrategy.getWindowFn().isNonMerging()
         && windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger) {
       return new GroupAlsoByWindowsViaIteratorsDoFn<K, V, W>();
     } else {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index e1fb30b78b425..4e0a1c6234c26 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -21,7 +21,6 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PartitioningWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.AbstractWindowSet.Factory;
@@ -59,7 +58,8 @@ StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> create(
   StreamingGroupAlsoByWindowsDoFn<K, InputT, Iterable<InputT>, W> createForIterable(
       final WindowingStrategy<?, W> windowingStrategy,
       final Coder<InputT> inputValueCoder) {
-    if (windowingStrategy.getWindowFn() instanceof PartitioningWindowFn
+    if (windowingStrategy.getWindowFn().assignsToSingleWindow()
+        && windowingStrategy.getWindowFn().isNonMerging()
         // TODO: Characterize the other kinds of triggers that work with the
         // PartitioningBufferingWindowSet
         && windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 640c6de01e496..88e588adcd533 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -26,7 +26,6 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PartitioningWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
@@ -201,7 +200,7 @@ public void onElement(WindowedValue<InputT> value) throws Exception {
       }
 
       // Only invoke handleResult if the window is still active after merging.
-      if ((windowFn instanceof PartitioningWindowFn) || windowSet.contains(window)) {
+      if (windowFn.isNonMerging() || windowSet.contains(window)) {
         handleResult(trigger, window, originalFinishedSet, finishedSet, result);
       }
     }
@@ -229,7 +228,7 @@ public void onTimer(String timerTag) throws Exception {
     // The WindowSet used with PartitioningWindowFn doesn't support contains, but it will never
     // merge windows in a way that causes the timer to no longer be applicable. Otherwise, we
     // confirm that the window is still in the windowSet.
-    if ((windowFn instanceof PartitioningWindowFn) || windowSet.contains(window)) {
+    if (windowFn.isNonMerging() || windowSet.contains(window)) {
       TriggerResult result = trigger.invokeTimer(
           context(finishedSet), new OnTimerEvent<W>(triggerId));
       handleResult(trigger, window, originalFinishedSet, finishedSet, result);

From e2e2ac62b98868b6667de7ff590f04c1d1165e07 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Fri, 8 May 2015 14:35:08 -0700
Subject: [PATCH 0529/1541] Set maxTimestamp of GlobalWindow to be one day
 before max.

This allows watermark timers that advance to TIMESTAMP_MAX_VALUE to pass the end of the GlobalWindow.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93165595
---
 .../sdk/transforms/windowing/GlobalWindow.java       | 12 +++++++++++-
 .../sdk/transforms/windowing/AfterWatermarkTest.java |  2 +-
 .../sdk/transforms/windowing/DefaultTriggerTest.java |  2 +-
 .../cloud/dataflow/sdk/util/StateFetcherTest.java    |  4 ++--
 4 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
index 9986dc619266a..c988b3f2be8a0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 
+import org.joda.time.Duration;
 import org.joda.time.Instant;
 
 import java.io.InputStream;
@@ -30,9 +31,18 @@
 public class GlobalWindow extends BoundedWindow {
   public static final GlobalWindow INSTANCE = new GlobalWindow();
 
+  // Triggers use maxTimestamp to set timers' timestamp. Timers fires when
+  // the watermark passes their timestamps. So, the maxTimestamp needs to be
+  // smaller than the TIMESTAMP_MAX_VALUE.
+  // One standard day is subtracted from TIMESTAMP_MAX_VALUE to make sure
+  // the maxTimestamp is smaller than TIMESTAMP_MAX_VALUE even after rounding up
+  // to seconds or minutes.
+  private static final Instant END_OF_GLOBAL_WINDOW =
+      TIMESTAMP_MAX_VALUE.minus(Duration.standardDays(1));
+
   @Override
   public Instant maxTimestamp() {
-    return TIMESTAMP_MAX_VALUE;
+    return END_OF_GLOBAL_WINDOW;
   }
 
   private GlobalWindow() {}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
index b159466ea3dd0..b1f9908c1e362 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
@@ -152,7 +152,7 @@ public void testFireDeadline() throws Exception {
     BoundedWindow window = new IntervalWindow(new Instant(0), new Instant(10));
 
     assertEquals(new Instant(9), AfterWatermark.pastEndOfWindow().getWatermarkCutoff(window));
-    assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
+    assertEquals(GlobalWindow.INSTANCE.maxTimestamp(),
         AfterWatermark.pastEndOfWindow().getWatermarkCutoff(GlobalWindow.INSTANCE));
     assertEquals(new Instant(19),
         AfterWatermark
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
index 4493acd6c138c..51052de7cf0d6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
@@ -146,7 +146,7 @@ public void testDefaultTriggerWithContainedSessionWindow() throws Exception {
   public void testFireDeadline() throws Exception {
     assertEquals(new Instant(9), DefaultTrigger.of().getWatermarkCutoff(
         new IntervalWindow(new Instant(0), new Instant(10))));
-    assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
+    assertEquals(GlobalWindow.INSTANCE.maxTimestamp(),
         DefaultTrigger.of().getWatermarkCutoff(GlobalWindow.INSTANCE));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
index 39bd6e65a5d6c..2b3946f6c4286 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
@@ -35,7 +35,6 @@
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.View;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
@@ -318,7 +317,8 @@ private Windmill.GetDataRequest buildGlobalDataRequest(
              Windmill.GlobalDataRequest.newBuilder()
                  .setDataId(id)
                  .setExistenceWatermarkDeadline(
-                      TimeUnit.MILLISECONDS.toMicros(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()))
+                      TimeUnit.MILLISECONDS.toMicros(
+                          GlobalWindow.INSTANCE.maxTimestamp().getMillis()))
                  .build())
         .addGlobalDataToFetch(id)
         .build();

From 2eff8261a36d23798a8fbd97bcca61695122bfa8 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Fri, 8 May 2015 17:34:12 -0700
Subject: [PATCH 0530/1541] Move GroupByKey expansion to the backend. This
 passes in windowFn through GroupByKey to the backend, and the backend will
 expand it to
 ReifyTimestampAndWindowsParDoFn+GroupByKey+GroupAlsoByWindowsParDoFn. This
 enables combiner lifting for batch pipelines with windowing (streaming and
 batch with global window already do combiner lifting when
 GroupByKey.disallowCombinerLifting is true).

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93180959
---
 .../sdk/runners/DataflowPipelineRunner.java   |   8 +-
 .../runners/DataflowPipelineTranslator.java   |  28 ++-
 .../worker/GroupAlsoByWindowsParDoFn.java     |  21 +-
 .../dataflow/sdk/transforms/GroupByKey.java   | 198 ++++++++----------
 4 files changed, 125 insertions(+), 130 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index b969e515e6c17..2ed68353143d8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -145,17 +145,11 @@ private DataflowPipelineRunner(DataflowPipelineOptions options) {
   @SuppressWarnings("unchecked")
   public <OutputT extends POutput, InputT extends PInput> OutputT apply(
       PTransform<InputT, OutputT> transform, InputT input) {
-    if (transform instanceof Combine.GroupedValues) {
+    if (transform instanceof Combine.GroupedValues || transform instanceof GroupByKey) {
       // TODO: Redundant with translator registration?
       return (OutputT) PCollection.createPrimitiveOutputInternal(
           input.getPipeline(),
           ((PCollection<?>) input).getWindowingStrategy());
-    } else if (transform instanceof GroupByKey) {
-      // The DataflowPipelineRunner implementation of GroupByKey will sort values by timestamp,
-      // so no need for an explicit sort transform.
-      boolean runnerSortsByTimestamp = true;
-      return (OutputT) ((GroupByKey) transform).applyHelper(
-          (PCollection<?>) input, options.isStreaming(), runnerSortsByTimestamp);
     } else if (transform instanceof Create) {
       return (OutputT) ((Create) transform).applyHelper(input, options.isStreaming());
     } else {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 60a9f9a1b7afb..ad83c9bbada2a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -51,6 +51,7 @@
 import com.google.cloud.dataflow.sdk.options.CloudDebuggerOptions.DebuggerConfig;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType;
+import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.dataflow.AvroIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BigQueryIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
@@ -61,10 +62,11 @@
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.Flatten;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
@@ -866,24 +868,36 @@ private <T> void flattenHelper(
         });
 
     registerTransformTranslator(
-        GroupByKeyOnly.class,
-        new TransformTranslator<GroupByKeyOnly>() {
+        GroupByKey.class,
+        new TransformTranslator<GroupByKey>() {
           @Override
           public void translate(
-              GroupByKeyOnly transform,
+              GroupByKey transform,
               TranslationContext context) {
             groupByKeyHelper(transform, context);
           }
 
           private <K, V> void groupByKeyHelper(
-              GroupByKeyOnly<K, V> transform,
+              GroupByKey<K, V> transform,
               TranslationContext context) {
             context.addStep(transform, "GroupByKey");
             context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
             context.addOutput(PropertyNames.OUTPUT, context.getOutput(transform));
+
+            WindowingStrategy<?, ?> windowingStrategy =
+                context.getInput(transform).getWindowingStrategy();
+            boolean isStreaming =
+                context.getPipelineOptions().as(StreamingOptions.class).isStreaming();
+            boolean disallowCombinerLifting =
+                !windowingStrategy.getWindowFn().isNonMerging()
+                || (isStreaming && !transform.fewKeys())
+                // TODO: Allow combiner lifting on the non-default trigger, as appropriate.
+                || !(windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger);
+            context.addInput(
+                PropertyNames.DISALLOW_COMBINER_LIFTING, disallowCombinerLifting);
             context.addInput(
-                PropertyNames.DISALLOW_COMBINER_LIFTING, transform.disallowCombinerLifting());
-            // TODO: sortsValues
+                PropertyNames.SERIALIZED_FN,
+                byteArrayToJsonString(serializeToByteArray(windowingStrategy)));
           }
         });
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index af9dd84648bc0..103c3ba4a1b1c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -18,13 +18,16 @@
 
 import static com.google.cloud.dataflow.sdk.util.Structs.getBytes;
 import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 
+import com.google.api.client.util.Preconditions;
 import com.google.api.services.dataflow.model.MultiOutputInfo;
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
+import com.google.cloud.dataflow.sdk.runners.worker.CombineValuesFn.CombinePhase;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
@@ -55,6 +58,7 @@
  */
 class GroupAlsoByWindowsParDoFn extends NormalParDoFn {
 
+  @SuppressWarnings({"rawtypes", "unchecked"})
   public static GroupAlsoByWindowsParDoFn create(
       PipelineOptions options,
       CloudObject cloudUserFn,
@@ -115,12 +119,17 @@ public static GroupAlsoByWindowsParDoFn create(
       isStreamingPipeline = ((StreamingOptions) options).isStreaming();
     }
 
-    boolean isMergingOnly = true;
-    KeyedCombineFn maybeMergingCombineFn;
-    if (isMergingOnly && combineFn != null) {
-      maybeMergingCombineFn = new MergingKeyedCombineFn(combineFn);
-    } else {
-      maybeMergingCombineFn = combineFn;
+    KeyedCombineFn maybeMergingCombineFn = null;
+    if (combineFn != null) {
+      String phase = getString(cloudUserFn, PropertyNames.PHASE, CombinePhase.ALL);
+      Preconditions.checkArgument(
+          phase.equals(CombinePhase.ALL) || phase.equals(CombinePhase.MERGE),
+          "Unexpected phase: " + phase);
+      if (phase.equals(CombinePhase.MERGE)) {
+        maybeMergingCombineFn = new MergingKeyedCombineFn(combineFn);
+      } else {
+        maybeMergingCombineFn = combineFn;
+      }
     }
 
     DoFnInfoFactory fnFactory;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 4681cdbcbdf98..b7318b58e5043 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -25,8 +25,6 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
-import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
@@ -164,14 +162,99 @@ static <K, V> GroupByKey<K, V> create(boolean fewKeys) {
     return new GroupByKey<>(fewKeys);
   }
 
+  /**
+   * Returns whether it groups just few keys.
+   */
+  public boolean fewKeys() {
+    return fewKeys;
+  }
 
   /////////////////////////////////////////////////////////////////////////////
 
   @Override
   public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
-    return applyHelper(input, false, false);
+    // This operation groups by the combination of key and window,
+    // merging windows as needed, using the windows assigned to the
+    // key/value input elements and the window merge operation of the
+    // window function associated with the input PCollection.
+    WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
+    if (windowingStrategy.getWindowFn() instanceof InvalidWindows) {
+      String cause = ((InvalidWindows<?>) windowingStrategy.getWindowFn()).getCause();
+      throw new IllegalStateException(
+          "GroupByKey must have a valid Window merge function.  "
+          + "Invalid because: " + cause);
+    }
+    // By default, implement GroupByKey[AndWindow] via a series of lower-level
+    // operations.
+    return input
+        // Make each input element's timestamp and assigned windows
+        // explicit, in the value part.
+        .apply(new ReifyTimestampsAndWindows<K, V>())
+
+        // Group by just the key.
+        // Combiner lifting will not happen regardless of the disallowCombinerLifting value.
+        // There will be no combiners right after the GroupByKeyOnly because of the two ParDos
+        // introduced in here.
+        .apply(new GroupByKeyOnly<K, WindowedValue<V>>())
+
+        // Sort each key's values by timestamp. GroupAlsoByWindow requires
+        // its input to be sorted by timestamp.
+        .apply(new SortValuesByTimestamp<K, V>())
+
+        // Group each key's values by window, merging windows as needed.
+        .apply(new GroupAlsoByWindow<K, V>(windowingStrategy));
+  }
+
+  @Override
+  protected Coder<KV<K, Iterable<V>>> getDefaultOutputCoder(PCollection<KV<K, V>> input) {
+    return getOutputKvCoder(input.getCoder());
+  }
+
+  /**
+   * Returns the {@code Coder} of the input to this transform, which
+   * should be a {@code KvCoder}.
+   */
+  @SuppressWarnings("unchecked")
+  static <K, V> KvCoder<K, V> getInputKvCoder(Coder<KV<K, V>> inputCoder) {
+    if (!(inputCoder instanceof KvCoder)) {
+      throw new IllegalStateException(
+          "GroupByKey requires its input to use KvCoder");
+    }
+    return (KvCoder<K, V>) inputCoder;
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Returns the {@code Coder} of the keys of the input to this
+   * transform, which is also used as the {@code Coder} of the keys of
+   * the output of this transform.
+   */
+  static <K, V> Coder<K> getKeyCoder(Coder<KV<K, V>> inputCoder) {
+    return getInputKvCoder(inputCoder).getKeyCoder();
   }
 
+  /**
+   * Returns the {@code Coder} of the values of the input to this transform.
+   */
+  static <K, V> Coder<V> getInputValueCoder(Coder<KV<K, V>> inputCoder) {
+    return getInputKvCoder(inputCoder).getValueCoder();
+  }
+
+  /**
+   * Returns the {@code Coder} of the {@code Iterable} values of the
+   * output of this transform.
+   */
+  static <K, V> Coder<Iterable<V>> getOutputValueCoder(Coder<KV<K, V>> inputCoder) {
+    return IterableCoder.of(getInputValueCoder(inputCoder));
+  }
+
+  /**
+   * Returns the {@code Coder} of the output of this transform.
+   */
+  static <K, V> KvCoder<K, Iterable<V>> getOutputKvCoder(Coder<KV<K, V>> inputCoder) {
+    return KvCoder.of(getKeyCoder(inputCoder), getOutputValueCoder(inputCoder));
+  }
 
   /////////////////////////////////////////////////////////////////////////////
 
@@ -296,12 +379,6 @@ public PCollection<KV<K, Iterable<V>>> apply(
   public static class GroupByKeyOnly<K, V>
       extends PTransform<PCollection<KV<K, V>>,
                          PCollection<KV<K, Iterable<V>>>> {
-    final boolean disallowCombinerLifting;
-
-    public GroupByKeyOnly(boolean disallowCombinerLifting) {
-      this.disallowCombinerLifting = disallowCombinerLifting;
-    }
-
     @Override
     public void validate(PCollection<KV<K, V>> input) {
       // Verify that the input Coder<KV<K, V>> is a KvCoder<K, V>, and that
@@ -346,47 +423,9 @@ KvCoder<K, V> getInputKvCoder(Coder<KV<K, V>> inputCoder) {
       return (KvCoder<K, V>) inputCoder;
     }
 
-    /**
-     * Returns the {@code Coder} of the keys of the input to this
-     * transform, which is also used as the {@code Coder} of the keys of
-     * the output of this transform.
-     */
-    Coder<K> getKeyCoder(Coder<KV<K, V>> inputCoder) {
-      return getInputKvCoder(inputCoder).getKeyCoder();
-    }
-
-    /**
-     * Returns the {@code Coder} of the values of the input to this transform.
-     */
-    Coder<V> getInputValueCoder(Coder<KV<K, V>> inputCoder) {
-      return getInputKvCoder(inputCoder).getValueCoder();
-    }
-
-    /**
-     * Returns the {@code Coder} of the {@code Iterable} values of the
-     * output of this transform.
-     */
-    Coder<Iterable<V>> getOutputValueCoder(Coder<KV<K, V>> inputCoder) {
-      return IterableCoder.of(getInputValueCoder(inputCoder));
-    }
-
-    /**
-     * Returns the {@code Coder} of the output of this transform.
-     */
-    KvCoder<K, Iterable<V>> getOutputKvCoder(Coder<KV<K, V>> inputCoder) {
-      return KvCoder.of(getKeyCoder(inputCoder), getOutputValueCoder(inputCoder));
-    }
-
     @Override
     protected Coder<KV<K, Iterable<V>>> getDefaultOutputCoder(PCollection<KV<K, V>> input) {
-      return getOutputKvCoder(input.getCoder());
-    }
-
-    /**
-     * Returns whether this GBK allows lifting combiner through.
-     */
-    public boolean disallowCombinerLifting() {
-      return disallowCombinerLifting;
+      return GroupByKey.getOutputKvCoder(input.getCoder());
     }
   }
 
@@ -419,7 +458,7 @@ private static <K, V> void evaluateHelper(
     List<ValueWithMetadata<KV<K, V>>> inputElems =
         context.getPCollectionValuesWithMetadata(input);
 
-    Coder<K> keyCoder = transform.getKeyCoder(input.getCoder());
+    Coder<K> keyCoder = GroupByKey.getKeyCoder(input.getCoder());
 
     Map<GroupingKey<K>, List<V>> groupingMap = new HashMap<>();
 
@@ -462,67 +501,6 @@ private static <K, V> void evaluateHelper(
                                              outputElems);
   }
 
-  public PCollection<KV<K, Iterable<V>>> applyHelper(
-      PCollection<KV<K, V>> input, boolean isStreaming, boolean runnerSortsByTimestamp) {
-    Coder<KV<K, V>> inputCoder = input.getCoder();
-    if (!(inputCoder instanceof KvCoder)) {
-      throw new IllegalStateException(
-          "GroupByKey requires its input to use KvCoder");
-    }
-    // This operation groups by the combination of key and window,
-    // merging windows as needed, using the windows assigned to the
-    // key/value input elements and the window merge operation of the
-    // window function associated with the input PCollection.
-    WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
-    if (windowingStrategy.getWindowFn() instanceof InvalidWindows) {
-      String cause = ((InvalidWindows<?>) windowingStrategy.getWindowFn()).getCause();
-      throw new IllegalStateException(
-          "GroupByKey must have a valid Window merge function.  "
-          + "Invalid because: " + cause);
-    }
-    boolean disallowCombinerLifting =
-        !windowingStrategy.getWindowFn().isNonMerging()
-        || (isStreaming && !fewKeys)
-        // TODO: Allow combiner lifting on the non-default trigger, as appropriate.
-        || !(windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger);
-
-    if (windowingStrategy.getWindowFn().isCompatible(new GlobalWindows())
-        && windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger) {
-      // The input PCollection is using the degenerate default
-      // window function, which uses a single global window for all
-      // elements.  We can implement this using a more-primitive
-      // non-window-aware GBK transform.
-      return input.apply(new GroupByKeyOnly<K, V>(disallowCombinerLifting));
-
-    } else if (isStreaming) {
-      // If using the streaming runner, the service will do the insertion of
-      // the GroupAlsoByWindow step.
-      // TODO: Remove this case once the Dataflow Runner handles GBK directly
-      return input.apply(new GroupByKeyOnly<K, V>(disallowCombinerLifting));
-
-    } else {
-      // By default, implement GroupByKey[AndWindow] via a series of lower-level
-      // operations.
-      PCollection<KV<K, Iterable<WindowedValue<V>>>> gbkOutput = input
-          // Make each input element's timestamp and assigned windows
-          // explicit, in the value part.
-          .apply(new ReifyTimestampsAndWindows<K, V>())
-
-          // Group by just the key.
-          .apply(new GroupByKeyOnly<K, WindowedValue<V>>(disallowCombinerLifting));
-
-      if (!runnerSortsByTimestamp) {
-        // Sort each key's values by timestamp. GroupAlsoByWindow requires
-        // its input to be sorted by timestamp.
-        gbkOutput = gbkOutput.apply(new SortValuesByTimestamp<K, V>());
-      }
-
-      return gbkOutput
-          // Group each key's values by window, merging windows as needed.
-          .apply(new GroupAlsoByWindow<K, V>(windowingStrategy));
-    }
-  }
-
   private static class GroupingKey<K> {
     private K key;
     private byte[] encodedKey;

From a06ed78a39182449cb9d22bd302e8d8e5023a526 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 11 May 2015 10:40:00 -0700
Subject: [PATCH 0531/1541] Add AccumulationMode enum and support for
 accumulating.

Trigger's only clear the pane when a trigger fires if the accumulation
mode is discarding fired panes.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93320992
---
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  21 ++--
 .../util/StreamingGroupAlsoByWindowsDoFn.java |  25 ++--
 .../dataflow/sdk/util/TriggerExecutor.java    |  31 ++++-
 .../dataflow/sdk/util/TriggerTester.java      |  27 ++++-
 .../dataflow/sdk/util/WindowingStrategy.java  |  13 +++
 .../transforms/windowing/AfterAllTest.java    |   7 +-
 .../transforms/windowing/AfterEachTest.java   |   8 +-
 .../transforms/windowing/AfterFirstTest.java  |   8 +-
 .../transforms/windowing/AfterPaneTest.java   |  10 +-
 .../windowing/AfterProcessingTimeTest.java    |   7 +-
 .../windowing/AfterWatermarkTest.java         |  13 ++-
 .../windowing/DefaultTriggerTest.java         |  13 ++-
 .../transforms/windowing/RepeatedlyTest.java  |   3 +-
 .../sdk/transforms/windowing/TriggerTest.java |   6 +-
 .../sdk/util/TriggerExecutorTest.java         | 108 ++++++++++++++++++
 15 files changed, 241 insertions(+), 59 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index e24eaf6d92b9d..e34ce5e053b67 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -22,7 +22,7 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresKeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
 
@@ -52,7 +52,8 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
   public static <K, V, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W>
   createForIterable(WindowingStrategy<?, W> windowingStrategy, Coder<V> inputCoder) {
     if (windowingStrategy.getWindowFn().isNonMerging()
-        && windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger) {
+        && windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger
+        && windowingStrategy.getMode() == AccumulationMode.DISCARDING_FIRED_PANES) {
       return new GroupAlsoByWindowsViaIteratorsDoFn<K, V, W>();
     } else {
       return new GABWViaWindowSetDoFn<>(
@@ -79,16 +80,14 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
   private static class GABWViaWindowSetDoFn<K, InputT, OutputT, W extends BoundedWindow>
      extends GroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
 
-    private WindowFn<Object, W> windowFn;
     private AbstractWindowSet.Factory<K, InputT, OutputT, W> windowSetFactory;
-    private ExecutableTrigger<W> trigger;
+    private WindowingStrategy<Object, W> strategy;
 
     public GABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
         AbstractWindowSet.Factory<K, InputT, OutputT, W> factory) {
       @SuppressWarnings("unchecked")
       WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
-      this.windowFn = noWildcard.getWindowFn();
-      this.trigger = noWildcard.getTrigger();
+      this.strategy = noWildcard;
       this.windowSetFactory = factory;
     }
 
@@ -98,13 +97,9 @@ public void processElement(
         KV<K, OutputT>>.ProcessContext c)
         throws Exception {
       K key = c.element().getKey();
-      AbstractWindowSet<K, InputT, OutputT, W> windowSet = windowSetFactory.create(
-          key, windowFn.windowCoder(), c.keyedState(), c.windowingInternals());
-
       BatchTimerManager timerManager = new BatchTimerManager(Instant.now());
-      TriggerExecutor<K, InputT, OutputT, W> triggerExecutor = new TriggerExecutor<>(
-          windowFn, timerManager, trigger,
-          c.keyedState(), c.windowingInternals(), windowSet);
+      TriggerExecutor<K, InputT, OutputT, W> triggerExecutor = TriggerExecutor.create(
+          key, strategy, timerManager, windowSetFactory, c.keyedState(), c.windowingInternals());
 
       for (WindowedValue<InputT> e : c.element().getValue()) {
         // First, handle anything that needs to happen for this element
@@ -127,7 +122,7 @@ public void processElement(
       // Finally, advance the processing time to infinity to fire any timers.
       timerManager.advanceProcessingTime(triggerExecutor, new Instant(Long.MAX_VALUE));
 
-      windowSet.persist();
+      triggerExecutor.persistWindowSet();
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 4e0a1c6234c26..cd25aa8d1ff78 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -22,7 +22,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.AbstractWindowSet.Factory;
 import com.google.cloud.dataflow.sdk.util.TriggerExecutor.TimerManager;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -73,11 +72,9 @@ StreamingGroupAlsoByWindowsDoFn<K, InputT, Iterable<InputT>, W> createForIterabl
 
   private static class StreamingGABWViaWindowSetDoFn<K, InputT, OutputT, W extends BoundedWindow>
   extends StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
-    private final WindowFn<Object, W> windowFn;
-    private Factory<K, InputT, OutputT, W> windowSetFactory;
-    private ExecutableTrigger<W> trigger;
+    private final Factory<K, InputT, OutputT, W> windowSetFactory;
+    private final WindowingStrategy<Object, W> windowingStrategy;
 
-    private AbstractWindowSet<K, InputT, OutputT, W> windowSet;
     private TriggerExecutor<K, InputT, OutputT, W> executor;
 
     public StreamingGABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
@@ -85,17 +82,15 @@ public StreamingGABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
       this.windowSetFactory = windowSetFactory;
       @SuppressWarnings("unchecked")
       WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
-      this.windowFn = noWildcard.getWindowFn();
-      this.trigger = noWildcard.getTrigger();
+      this.windowingStrategy = noWildcard;
     }
 
     private void initForKey(ProcessContext c, K key) throws Exception{
-      if (windowSet == null) {
-        windowSet = windowSetFactory.create(
-            key, windowFn.windowCoder(), c.keyedState(), c.windowingInternals());
-        executor = new TriggerExecutor<>(
-            windowFn, new StreamingTimerManager(c), trigger, c.keyedState(),
-            c.windowingInternals(), windowSet);
+      if (executor == null) {
+        TimerManager timerManager = new StreamingTimerManager(c);
+        executor = TriggerExecutor.create(
+          key, windowingStrategy, timerManager, windowSetFactory,
+          c.keyedState(), c.windowingInternals());
       }
     }
 
@@ -117,13 +112,13 @@ public void processElement(ProcessContext c) throws Exception {
     @Override
     public void finishBundle(Context c) throws Exception {
       if (executor != null) {
+        // Merge before finishing the bundle in case it causes triggers to fire.
         executor.merge();
-        windowSet.persist();
+        executor.persistWindowSet();
       }
 
       // Prepare this DoFn for reuse.
       executor = null;
-      windowSet = null;
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 88e588adcd533..be6094cab1f3d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -37,6 +37,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -84,6 +85,8 @@ public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
   private final Coder<TriggerId<W>> triggerIdCoder;
   private final WatermarkHold watermarkHold;
 
+  private AccumulationMode mode;
+
   /**
    * Methods that the system must provide in order for us to implement triggers.
    */
@@ -109,19 +112,21 @@ public interface TimerManager {
     Instant currentProcessingTime();
   }
 
-  public TriggerExecutor(
+  TriggerExecutor(
       WindowFn<Object, W> windowFn,
       TimerManager timerManager,
       ExecutableTrigger<W> trigger,
       KeyedState keyedState,
       WindowingInternals<?, KV<K, OutputT>> windowingInternals,
-      AbstractWindowSet<K, InputT, OutputT, W> windowSet) {
+      AbstractWindowSet<K, InputT, OutputT, W> windowSet,
+      AccumulationMode mode) {
     this.windowFn = windowFn;
     this.trigger = trigger;
     this.keyedState = keyedState;
     this.windowingInternals = windowingInternals;
     this.windowSet = windowSet;
     this.timerManager = timerManager;
+    this.mode = mode;
     this.mergeContext = new MergeContext();
     this.triggerIdCoder = new TriggerIdCoder<>(windowFn.windowCoder());
     this.watermarkHold = new WatermarkHold();
@@ -143,6 +148,21 @@ public CodedTupleTag<Instant> earliestElementTag(W window) throws CoderException
         InstantCoder.of());
   }
 
+  public static <K, InputT, OutputT, W extends BoundedWindow>
+  TriggerExecutor<K, InputT, OutputT, W> create(
+      K key,
+      WindowingStrategy<Object, W> windowingStrategy,
+      TimerManager timerManager,
+      AbstractWindowSet.Factory<K, InputT, OutputT, W> windowSetFactory,
+      KeyedState keyedState, WindowingInternals<?, KV<K, OutputT>> windowingInternals)
+          throws Exception {
+    AbstractWindowSet<K, InputT, OutputT, W> windowSet = windowSetFactory.create(
+        key, windowingStrategy.getWindowFn().windowCoder(), keyedState, windowingInternals);
+    return new TriggerExecutor<K, InputT, OutputT, W>(
+        windowingStrategy.getWindowFn(), timerManager, windowingStrategy.getTrigger(),
+        keyedState, windowingInternals, windowSet, windowingStrategy.getMode());
+  }
+
   private TriggerContext<W> context(BitSet finishedSet) {
     return new TriggerContextImpl(finishedSet, trigger);
   }
@@ -249,6 +269,10 @@ private OnMergeEvent<W> createMergeEvent(Collection<W> toBeMerged, W resultWindo
     return new OnMergeEvent<W>(toBeMerged, resultWindow, finishedSets.build());
   }
 
+  public void persistWindowSet() throws Exception {
+    windowSet.persist();
+  }
+
   private void onMerge(Collection<W> toBeMerged, W resultWindow) throws Exception {
     OnMergeEvent<W> e = createMergeEvent(toBeMerged, resultWindow);
     BitSet originalFinishedSet = lookupFinishedSet(resultWindow);
@@ -287,7 +311,8 @@ private void handleResult(
       emitWindow(window);
     }
 
-    if (result.isFire() || result.isFinish()) {
+    if (result.isFinish()
+        || (mode == AccumulationMode.DISCARDING_FIRED_PANES && result.isFire())) {
       // Remove the window from management (assume it is "done")
       windowSet.remove(window);
       watermarkHold.clearHold(window);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 4c46409646c03..75a3afb665cd4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -27,6 +27,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -87,7 +88,7 @@ private void logInteraction(String fmt, Object... args) {
   }
 
   public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>, W> buffering(
-      WindowFn<?, W> windowFn, Trigger<W> trigger) throws Exception {
+      WindowFn<?, W> windowFn, Trigger<W> trigger, AccumulationMode mode) throws Exception {
     @SuppressWarnings("unchecked")
     WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
 
@@ -95,11 +96,24 @@ public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>
         BufferingWindowSet.<String, Integer, W>factory(VarIntCoder.of());
 
     return new TriggerTester<Integer, Iterable<Integer>, W>(
-        objectWindowFn, trigger, windowSetFactory);
+        objectWindowFn, trigger, windowSetFactory, mode);
+  }
+
+  public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>, W>
+    partitionBuffering(WindowFn<?, W> windowFn, Trigger<W> trigger, AccumulationMode mode)
+        throws Exception {
+    @SuppressWarnings("unchecked")
+    WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
+
+    AbstractWindowSet.Factory<String, Integer, Iterable<Integer>, W> windowSetFactory =
+        PartitionBufferingWindowSet.<String, Integer, W>factory(VarIntCoder.of());
+
+    return new TriggerTester<Integer, Iterable<Integer>, W>(
+        objectWindowFn, trigger, windowSetFactory, mode);
   }
 
   public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>, W> combining(
-      WindowFn<?, W> windowFn, Trigger<W> trigger) throws Exception {
+      WindowFn<?, W> windowFn, Trigger<W> trigger, AccumulationMode mode) throws Exception {
     @SuppressWarnings("unchecked")
     WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
 
@@ -107,20 +121,21 @@ public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>
         BufferingWindowSet.<String, Integer, W>factory(VarIntCoder.of());
 
     return new TriggerTester<Integer, Iterable<Integer>, W>(
-        objectWindowFn, trigger, windowSetFactory);
+        objectWindowFn, trigger, windowSetFactory, mode);
   }
 
   private TriggerTester(
       WindowFn<Object, W> windowFn,
       Trigger<W> trigger,
-      AbstractWindowSet.Factory<String, InputT, OutputT, W> windowSetFactory) throws Exception {
+      AbstractWindowSet.Factory<String, InputT, OutputT, W> windowSetFactory,
+      AccumulationMode mode) throws Exception {
     this.windowFn = windowFn;
     this.stubContexts = new StubContexts();
     AbstractWindowSet<String, InputT, OutputT, W> windowSet = windowSetFactory.create(
         KEY, windowFn.windowCoder(), stubContexts, stubContexts);
     executableTrigger = ExecutableTrigger.create(trigger);
     this.triggerExecutor = new TriggerExecutor<>(
-        windowFn, timerManager, executableTrigger, stubContexts, stubContexts, windowSet);
+        windowFn, timerManager, executableTrigger, stubContexts, stubContexts, windowSet, mode);
   }
 
   public ExecutableTrigger<W> getTrigger() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
index f36d5e1aecd24..463808dc81715 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
@@ -36,12 +36,21 @@
  */
 public class WindowingStrategy<T, W extends BoundedWindow> implements Serializable {
 
+  /**
+   * The accumulation modes that can be used with windowing.
+   */
+  public enum AccumulationMode {
+    DISCARDING_FIRED_PANES,
+    ACCUMULATING_FIRED_PANES;
+  }
+
   private static final WindowingStrategy<Object, GlobalWindow> DEFAULT = of(new GlobalWindows());
 
   private static final long serialVersionUID = 0L;
 
   private final WindowFn<T, W> windowFn;
   private final ExecutableTrigger<W> trigger;
+  private final AccumulationMode mode = AccumulationMode.DISCARDING_FIRED_PANES;
 
   private WindowingStrategy(WindowFn<T, W> windowFn, ExecutableTrigger<W> trigger) {
     this.windowFn = windowFn;
@@ -76,4 +85,8 @@ public WindowFn<T, W> getWindowFn() {
   public ExecutableTrigger<W> getTrigger() {
     return trigger;
   }
+
+  public AccumulationMode getMode() {
+    return mode;
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index ab8db59af514f..78caaa1d1a0ab 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -33,6 +33,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -59,7 +60,8 @@ public class AfterAllTest {
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
-    tester = TriggerTester.buffering(windowFn, AfterAll.of(mockTrigger1, mockTrigger2));
+    tester = TriggerTester.buffering(
+        windowFn, AfterAll.of(mockTrigger1, mockTrigger2), AccumulationMode.DISCARDING_FIRED_PANES);
     executable1 = tester.getTrigger().subTriggers().get(0);
     executable2 = tester.getTrigger().subTriggers().get(1);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
@@ -198,7 +200,8 @@ public void testAfterAllRealTriggersFixedWindow() throws Exception {
             AfterAll.<IntervalWindow>of(
                 AfterPane.<IntervalWindow>elementCountAtLeast(5),
                 AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
-                    .plusDelayOf(Duration.millis(5)))));
+                    .plusDelayOf(Duration.millis(5)))),
+        AccumulationMode.DISCARDING_FIRED_PANES);
     IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(50));
 
     tester.advanceProcessingTime(new Instant(0));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index 3b378068befdb..64b1212814a1f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -32,6 +32,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -58,7 +59,9 @@ public class AfterEachTest {
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
-    tester = TriggerTester.buffering(windowFn, AfterEach.inOrder(mockTrigger1, mockTrigger2));
+    tester = TriggerTester.buffering(
+        windowFn, AfterEach.inOrder(mockTrigger1, mockTrigger2),
+        AccumulationMode.DISCARDING_FIRED_PANES);
     executable1 = tester.getTrigger().subTriggers().get(0);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
@@ -230,7 +233,8 @@ public void testSequenceRealTriggersFixedWindow() throws Exception {
             Repeatedly.<IntervalWindow>forever(AfterEach.inOrder(
                 AfterPane.<IntervalWindow>elementCountAtLeast(2),
                 AfterPane.<IntervalWindow>elementCountAtLeast(2)))
-                .orFinally(AfterPane.<IntervalWindow>elementCountAtLeast(7))));
+                .orFinally(AfterPane.<IntervalWindow>elementCountAtLeast(7))),
+        AccumulationMode.DISCARDING_FIRED_PANES);
 
     // Inject a bunch of elements
     for (int i = 0; i < 20; i++) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index dce365999e4a9..772e9c9559c6d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -33,6 +33,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -60,7 +61,9 @@ public class AfterFirstTest {
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
-    tester = TriggerTester.buffering(windowFn, AfterFirst.of(mockTrigger1, mockTrigger2));
+    tester = TriggerTester.buffering(
+        windowFn, AfterFirst.of(mockTrigger1, mockTrigger2),
+        AccumulationMode.DISCARDING_FIRED_PANES);
     executable1 = tester.getTrigger().subTriggers().get(0);
     executable2 = tester.getTrigger().subTriggers().get(1);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
@@ -193,7 +196,8 @@ public void testAfterFirstRealTriggersFixedWindow() throws Exception {
             AfterFirst.<IntervalWindow>of(
                 AfterPane.<IntervalWindow>elementCountAtLeast(5),
                 AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
-                    .plusDelayOf(Duration.millis(5)))));
+                    .plusDelayOf(Duration.millis(5)))),
+        AccumulationMode.DISCARDING_FIRED_PANES);
 
     tester.advanceProcessingTime(new Instant(0));
     // 5 elements -> after pane fires
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
index 887759aaa03fb..df68029f22d70 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
@@ -23,6 +23,7 @@
 
 import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -41,7 +42,8 @@ public void testAfterPaneWithGlobalWindowsAndCombining() throws Exception {
     Duration windowDuration = Duration.millis(10);
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.combining(
         FixedWindows.of(windowDuration),
-        AfterPane.<IntervalWindow>elementCountAtLeast(2));
+        AfterPane.<IntervalWindow>elementCountAtLeast(2),
+        AccumulationMode.DISCARDING_FIRED_PANES);
 
     tester.injectElement(1, new Instant(1));
     tester.injectElement(2, new Instant(9));
@@ -65,7 +67,8 @@ public void testAfterPaneWithFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
         FixedWindows.of(windowDuration),
-        AfterPane.<IntervalWindow>elementCountAtLeast(2));
+        AfterPane.<IntervalWindow>elementCountAtLeast(2),
+        AccumulationMode.DISCARDING_FIRED_PANES);
 
     tester.injectElement(1, new Instant(1)); // first in window [0, 10)
     tester.injectElement(2, new Instant(9));
@@ -89,7 +92,8 @@ public void testAfterPaneWithMerging() throws Exception {
     Duration windowDuration = Duration.millis(10);
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
         Sessions.withGapDuration(windowDuration),
-        AfterPane.<IntervalWindow>elementCountAtLeast(2));
+        AfterPane.<IntervalWindow>elementCountAtLeast(2),
+        AccumulationMode.DISCARDING_FIRED_PANES);
 
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
index 95d898a03d162..abc37df590932 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
@@ -22,6 +22,7 @@
 
 import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -42,7 +43,8 @@ public void testAfterProcessingTimeWithFixedWindow() throws Exception {
         FixedWindows.of(windowDuration),
         AfterProcessingTime
             .<IntervalWindow>pastFirstElementInPane()
-            .plusDelayOf(Duration.millis(5)));
+            .plusDelayOf(Duration.millis(5)),
+        AccumulationMode.DISCARDING_FIRED_PANES);
 
     tester.advanceProcessingTime(new Instant(10));
 
@@ -83,7 +85,8 @@ public void testAfterProcessingTimeWithMergingWindowAlreadyFired() throws Except
         Sessions.withGapDuration(windowDuration),
         AfterProcessingTime
             .<IntervalWindow>pastFirstElementInPane()
-            .plusDelayOf(Duration.millis(5)));
+            .plusDelayOf(Duration.millis(5)),
+        AccumulationMode.DISCARDING_FIRED_PANES);
 
     tester.advanceProcessingTime(new Instant(10));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
index b1f9908c1e362..d5a802442e17d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
@@ -23,6 +23,7 @@
 
 import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -42,7 +43,8 @@ public void testFirstInPaneWithFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
         FixedWindows.of(windowDuration),
-        AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelayOf(Duration.millis(5)));
+        AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelayOf(Duration.millis(5)),
+        AccumulationMode.DISCARDING_FIRED_PANES);
 
     tester.injectElement(1, new Instant(1)); // first in window [0, 10), timer set for 6
     tester.advanceWatermark(new Instant(5));
@@ -69,7 +71,8 @@ public void testFirstInPaneWithMerging() throws Exception {
     Duration windowDuration = Duration.millis(10);
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
         Sessions.withGapDuration(windowDuration),
-        AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelayOf(Duration.millis(5)));
+        AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelayOf(Duration.millis(5)),
+        AccumulationMode.DISCARDING_FIRED_PANES);
 
     tester.advanceWatermark(new Instant(1));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
@@ -97,7 +100,8 @@ public void testEndOfWindowFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
         FixedWindows.of(windowDuration),
-        AfterWatermark.<IntervalWindow>pastEndOfWindow().plusDelayOf(Duration.millis(5)));
+        AfterWatermark.<IntervalWindow>pastEndOfWindow().plusDelayOf(Duration.millis(5)),
+        AccumulationMode.DISCARDING_FIRED_PANES);
 
     tester.injectElement(1, new Instant(1)); // first in window [0, 10), timer set for 15
     tester.advanceWatermark(new Instant(11));
@@ -124,7 +128,8 @@ public void testEndOfWindowWithMerging() throws Exception {
     Duration windowDuration = Duration.millis(10);
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
         Sessions.withGapDuration(windowDuration),
-        AfterWatermark.<IntervalWindow>pastEndOfWindow().plusDelayOf(Duration.millis(5)));
+        AfterWatermark.<IntervalWindow>pastEndOfWindow().plusDelayOf(Duration.millis(5)),
+        AccumulationMode.DISCARDING_FIRED_PANES);
 
     tester.advanceWatermark(new Instant(1));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
index 51052de7cf0d6..7cc6fafc027ed 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
@@ -23,6 +23,7 @@
 
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -41,7 +42,8 @@ public class DefaultTriggerTest {
   public void testDefaultTriggerWithFixedWindow() throws Exception {
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
         FixedWindows.of(Duration.millis(10)),
-        DefaultTrigger.<IntervalWindow>of());
+        DefaultTrigger.<IntervalWindow>of(),
+        AccumulationMode.DISCARDING_FIRED_PANES);
 
     tester.injectElement(1, new Instant(1));
     tester.injectElement(2, new Instant(9));
@@ -74,7 +76,8 @@ public void testDefaultTriggerWithFixedWindow() throws Exception {
   public void testDefaultTriggerWithSessionWindow() throws Exception {
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
         Sessions.withGapDuration(Duration.millis(10)),
-        DefaultTrigger.<IntervalWindow>of());
+        DefaultTrigger.<IntervalWindow>of(),
+        AccumulationMode.DISCARDING_FIRED_PANES);
 
     tester.injectElement(1, new Instant(1));
     tester.injectElement(2, new Instant(9));
@@ -99,7 +102,8 @@ public void testDefaultTriggerWithSessionWindow() throws Exception {
   public void testDefaultTriggerWithSlidingWindow() throws Exception {
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
         SlidingWindows.of(Duration.millis(10)).every(Duration.millis(5)),
-        DefaultTrigger.<IntervalWindow>of());
+        DefaultTrigger.<IntervalWindow>of(),
+        AccumulationMode.DISCARDING_FIRED_PANES);
 
     tester.injectElement(1, new Instant(1));
     tester.injectElement(2, new Instant(4));
@@ -129,7 +133,8 @@ public void testDefaultTriggerWithSlidingWindow() throws Exception {
   public void testDefaultTriggerWithContainedSessionWindow() throws Exception {
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
         Sessions.withGapDuration(Duration.millis(10)),
-        DefaultTrigger.<IntervalWindow>of());
+        DefaultTrigger.<IntervalWindow>of(),
+        AccumulationMode.DISCARDING_FIRED_PANES);
 
     tester.injectElement(1, new Instant(1));
     tester.injectElement(2, new Instant(9));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index e5f6b18b4da00..9b7a51eabb695 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -31,6 +31,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -56,7 +57,7 @@ public class RepeatedlyTest {
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
     Trigger<IntervalWindow> underTest = Repeatedly.forever(mockRepeated);
-    tester = TriggerTester.buffering(windowFn, underTest);
+    tester = TriggerTester.buffering(windowFn, underTest, AccumulationMode.DISCARDING_FIRED_PANES);
     executableRepeated = tester.getTrigger().subTriggers().get(0);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
index 0ceb70dd024e9..8bbeadcad7797 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
@@ -34,6 +34,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -63,7 +64,7 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     Trigger<IntervalWindow> underTest =
         new OrFinallyTrigger<IntervalWindow>(mockActual, mockUntil);
 
-    tester = TriggerTester.buffering(windowFn, underTest);
+    tester = TriggerTester.buffering(windowFn, underTest, AccumulationMode.DISCARDING_FIRED_PANES);
     executableUntil = tester.getTrigger().subTriggers().get(1);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
@@ -284,7 +285,8 @@ public void testOrFinallyRealTriggersFixedWindow() throws Exception {
                 .orFinally(AfterAll.<IntervalWindow>of(
                     AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
                         .plusDelayOf(Duration.millis(5)),
-                    AfterPane.<IntervalWindow>elementCountAtLeast(5)))));
+                    AfterPane.<IntervalWindow>elementCountAtLeast(5)))),
+        AccumulationMode.DISCARDING_FIRED_PANES);
 
     IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(50));
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
new file mode 100644
index 0000000000000..46b6489d653b5
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.MockitoAnnotations;
+
+/**
+ * Tests for {@link TriggerExecutor}.
+ */
+@RunWith(JUnit4.class)
+public class TriggerExecutorTest {
+
+  @Mock private Trigger<IntervalWindow> mockTrigger;
+  private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
+  private IntervalWindow firstWindow;
+
+  public void setUpBuffering(
+      WindowFn<?, IntervalWindow> windowFn, AccumulationMode mode) throws Exception {
+    MockitoAnnotations.initMocks(this);
+    tester = TriggerTester.buffering(windowFn, mockTrigger, mode);
+    firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
+  }
+
+  @SuppressWarnings("unchecked")
+  private TriggerContext<IntervalWindow> isTriggerContext() {
+    return Mockito.isA(TriggerContext.class);
+  }
+
+  private void injectElement(int element, TriggerResult result)
+      throws Exception {
+    when(mockTrigger.onElement(
+        isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
+        .thenReturn(result);
+    tester.injectElement(element, new Instant(element));
+  }
+
+  @Test
+  public void testOnElementBufferingDiscarding() throws Exception {
+    setUpBuffering(FixedWindows.of(Duration.millis(10)), AccumulationMode.DISCARDING_FIRED_PANES);
+
+    injectElement(1, TriggerResult.CONTINUE);
+    injectElement(2, TriggerResult.FIRE);
+    injectElement(3, TriggerResult.FIRE_AND_FINISH);
+
+    // This element shouldn't be seen, because the trigger has finished
+    injectElement(4, null);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
+    assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
+  }
+
+  @Test
+  public void testOnElementBufferingAccumulating() throws Exception {
+    setUpBuffering(FixedWindows.of(Duration.millis(10)), AccumulationMode.ACCUMULATING_FIRED_PANES);
+
+    injectElement(1, TriggerResult.CONTINUE);
+    injectElement(2, TriggerResult.FIRE);
+    injectElement(3, TriggerResult.FIRE_AND_FINISH);
+
+    // This element shouldn't be seen, because the trigger has finished
+    injectElement(4, null);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10)));
+    assertTrue(tester.isDone(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
+  }
+}

From 399f0f919e67644acd78238239b1afe3c7c36b2c Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 11 May 2015 17:06:41 -0700
Subject: [PATCH 0532/1541] Improve the description of Window.into operations.

Triggers create a descriptive string based on the types of triggers
being used.

Also include the Accumulation Mode.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93361911
---
 .../dataflow/sdk/transforms/windowing/Trigger.java | 14 ++++++++++++++
 .../dataflow/sdk/transforms/windowing/Window.java  |  5 +++--
 .../cloud/dataflow/sdk/util/ExecutableTrigger.java |  5 +++++
 .../sdk/transforms/windowing/TriggerTest.java      |  7 +++++++
 4 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index ae5dbd9bede6f..128e7bde8c491 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Joiner;
 import com.google.common.base.Predicate;
 import com.google.common.collect.Maps;
 
@@ -550,6 +551,19 @@ public boolean isCompatible(Trigger<?> other) {
     return true;
   }
 
+  @Override
+  public String toString() {
+    String simpleName = getClass().getSimpleName();
+    if (getClass().getEnclosingClass() != null) {
+      simpleName = getClass().getEnclosingClass().getSimpleName() + "." + simpleName;
+    }
+    if (subTriggers == null || subTriggers.size() == 0) {
+      return simpleName;
+    } else {
+      return simpleName + "(" + Joiner.on(", ").join(subTriggers) + ")";
+    }
+  }
+
   /**
    * Identifies a unique {@link Trigger} instance, by the window it is in and the identifier of the
    * trigger within the trigger tree.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 18e8fd61d260e..2e1286464534d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -281,8 +281,9 @@ protected String getKindString() {
       return "Window.Into("
           + StringUtils.approximateSimpleName(windowingStrategy.getWindowFn().getClass())
           + ", "
-          // TODO: Add support for describing triggers.
-          + StringUtils.approximateSimpleName(windowingStrategy.getTrigger().getClass())
+          + windowingStrategy.getTrigger()
+          + ", "
+          + windowingStrategy.getMode()
           + ")";
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
index c94da715a1ca8..56b1f8884b3c5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
@@ -84,6 +84,11 @@ public List<ExecutableTrigger<W>> subTriggers() {
     return subTriggers;
   }
 
+  @Override
+  public String toString() {
+    return trigger.toString();
+  }
+
   /**
    * Return the underlying trigger specification corresponding to this {@code ExecutableTrigger}.
    */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
index 8bbeadcad7797..00f2a38d85c6b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
@@ -331,4 +331,11 @@ public void testOrFinallyRealTriggersFixedWindow() throws Exception {
     tester.injectElement(102, new Instant(1));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
   }
+
+  @Test
+  public void testTriggerToString() throws Exception {
+    assertEquals("AfterWatermark.FromEndOfWindow", AfterWatermark.pastEndOfWindow().toString());
+    assertEquals("Repeatedly(AfterWatermark.FromEndOfWindow)",
+        Repeatedly.forever(AfterWatermark.pastEndOfWindow()).toString());
+  }
 }

From 2908894a1128911910be33c9408b3527df6a8d08 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 11 May 2015 20:23:40 -0700
Subject: [PATCH 0533/1541] Use TypeVariable instead of String to name generic
 parameters during Coder inference.

Before, a typo (or bad rename) would result in "cannot infer coder" at pipeline translation/run time. With this change, a typo or bad rename results in "that type variable does not exist" failure at pipeline construction time.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93375318
---
 .../dataflow/sdk/coders/CoderRegistry.java    | 20 ++--
 .../dataflow/sdk/transforms/Combine.java      | 95 ++++++++++++++-----
 .../dataflow/sdk/values/TypeDescriptor.java   | 28 +++++-
 .../sdk/coders/CoderRegistryTest.java         | 60 ++++++++----
 .../sdk/values/TypeDescriptorTest.java        | 23 +++++
 5 files changed, 171 insertions(+), 55 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index aeb189792e0a0..7124122db8aa5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -207,19 +207,19 @@ public <T, OutputT> Coder<OutputT> getDefaultCoder(
   public <T, OutputT> Coder<OutputT> getDefaultCoder(
       Class<? extends T> subClass,
       Class<T> baseClass,
-      Map<String, ? extends Coder<?>> knownCoders,
-      String paramName)
+      Map<Type, ? extends Coder<?>> knownCoders,
+      TypeVariable<?> param)
       throws CannotProvideCoderException {
 
-    Map<String, Coder<?>> inferredCoders = getDefaultCoders(subClass, baseClass, knownCoders);
+    Map<Type, Coder<?>> inferredCoders = getDefaultCoders(subClass, baseClass, knownCoders);
 
     @SuppressWarnings("unchecked")
-    Coder<OutputT> paramCoderOrNull = (Coder<OutputT>) inferredCoders.get(paramName);
+    Coder<OutputT> paramCoderOrNull = (Coder<OutputT>) inferredCoders.get(param);
     if (paramCoderOrNull != null) {
       return paramCoderOrNull;
     } else {
       throw new CannotProvideCoderException(
-          "Cannot infer coder for type parameter " + paramName);
+          "Cannot infer coder for type parameter " + param.getName());
     }
   }
 
@@ -301,21 +301,21 @@ public <T> Coder<T> getDefaultCoder(T exampleValue) throws CannotProvideCoderExc
    * @param knownCoders a map corresponding to the set of known coders indexed
    *        by parameter name
    */
-  public <T> Map<String, Coder<?>> getDefaultCoders(
+  public <T> Map<Type, Coder<?>> getDefaultCoders(
       Class<? extends T> subClass,
       Class<T> baseClass,
-      Map<String, ? extends Coder<?>> knownCoders) {
+      Map<Type, ? extends Coder<?>> knownCoders) {
     TypeVariable<Class<T>>[] typeParams = baseClass.getTypeParameters();
     Coder<?>[] knownCodersArray = new Coder<?>[typeParams.length];
     for (int i = 0; i < typeParams.length; i++) {
-      knownCodersArray[i] = knownCoders.get(typeParams[i].getName());
+      knownCodersArray[i] = knownCoders.get(typeParams[i]);
     }
     Coder<?>[] resultArray = getDefaultCoders(
       subClass, baseClass, knownCodersArray);
-    Map<String, Coder<?>> result = new HashMap<>();
+    Map<Type, Coder<?>> result = new HashMap<>();
     for (int i = 0; i < typeParams.length; i++) {
       if (resultArray[i] != null) {
-        result.put(typeParams[i].getName(), resultArray[i]);
+        result.put(typeParams[i], resultArray[i]);
       }
     }
     return result;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index b965ba48bfe96..732d6d4643fae 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -37,6 +37,7 @@
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.cloud.dataflow.sdk.values.TupleTagList;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterables;
@@ -45,6 +46,8 @@
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.Serializable;
+import java.lang.reflect.Type;
+import java.lang.reflect.TypeVariable;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Iterator;
@@ -379,6 +382,27 @@ public OutputT apply(Iterable<? extends InputT> inputs) {
       return extractOutput(accum);
     }
 
+    @SuppressWarnings("unchecked")
+    private TypeVariable<Class<CombineFn<?, ?, ?>>> getInputTVariable() {
+      return (TypeVariable<Class<CombineFn<?, ?, ?>>>)
+          new TypeDescriptor<InputT>(CombineFn.class) {}
+          .getType();
+    }
+
+    @SuppressWarnings("unchecked")
+    private TypeVariable<Class<CombineFn<?, ?, ?>>> getAccumTVariable() {
+      return (TypeVariable<Class<CombineFn<?, ?, ?>>>)
+          new TypeDescriptor<AccumT>(CombineFn.class) {}
+          .getType();
+    }
+
+    @SuppressWarnings("unchecked")
+    private TypeVariable<Class<CombineFn<?, ?, ?>>> getOutputTVariable() {
+      return (TypeVariable<Class<CombineFn<?, ?, ?>>>)
+          new TypeDescriptor<OutputT>(CombineFn.class) {}
+          .getType();
+    }
+
     /**
      * Returns the {@code Coder} to use for accumulator {@code AccumT}
      * values, or null if it is not able to be inferred.
@@ -394,11 +418,9 @@ public OutputT apply(Iterable<? extends InputT> inputs) {
      */
     public Coder<AccumT> getAccumulatorCoder(
         CoderRegistry registry, Coder<InputT> inputCoder) throws CannotProvideCoderException {
-      return registry.getDefaultCoder(
-          getClass(),
-          CombineFn.class,
-          ImmutableMap.of("InputT", inputCoder),
-          "AccumT");
+      return registry.getDefaultCoder(getClass(), CombineFn.class,
+          ImmutableMap.<Type, Coder<?>>of(getInputTVariable(), inputCoder),
+          getAccumTVariable());
     }
 
     /**
@@ -412,12 +434,11 @@ public Coder<AccumT> getAccumulatorCoder(
      */
     public Coder<OutputT> getDefaultOutputCoder(
         CoderRegistry registry, Coder<InputT> inputCoder) throws CannotProvideCoderException {
-      return registry.getDefaultCoder(
-          getClass(),
-          CombineFn.class,
-          ImmutableMap.of("InputT", inputCoder,
-                          "AccumT", getAccumulatorCoder(registry, inputCoder)),
-          "OutputT");
+      return registry.getDefaultCoder(getClass(), CombineFn.class,
+          ImmutableMap.<Type, Coder<?>>of(
+              getInputTVariable(), inputCoder,
+              getAccumTVariable(), getAccumulatorCoder(registry, inputCoder)),
+          getOutputTVariable());
     }
 
     /**
@@ -1069,11 +1090,11 @@ public OutputT apply(K key, Iterable<? extends InputT> inputs) {
     public Coder<AccumT> getAccumulatorCoder(
         CoderRegistry registry, Coder<K> keyCoder, Coder<InputT> inputCoder)
         throws CannotProvideCoderException {
-      return registry.getDefaultCoder(
-          getClass(),
-          KeyedCombineFn.class,
-          ImmutableMap.of("K", keyCoder, "InputT", inputCoder),
-          "AccumT");
+      return registry.getDefaultCoder(getClass(), KeyedCombineFn.class,
+          ImmutableMap.<Type, Coder<?>>of(
+              getKTypeVariable(), keyCoder,
+              getInputTVariable(), inputCoder),
+          getAccumTVariable());
     }
 
     /**
@@ -1088,14 +1109,40 @@ public Coder<AccumT> getAccumulatorCoder(
     public Coder<OutputT> getDefaultOutputCoder(
         CoderRegistry registry, Coder<K> keyCoder, Coder<InputT> inputCoder)
         throws CannotProvideCoderException {
-      return registry.getDefaultCoder(
-          getClass(),
-          KeyedCombineFn.class,
-          ImmutableMap.of(
-              "K", keyCoder,
-              "InputT", inputCoder,
-              "AccumT", getAccumulatorCoder(registry, keyCoder, inputCoder)),
-          "OutputT");
+      return registry.getDefaultCoder(getClass(), KeyedCombineFn.class,
+          ImmutableMap.<Type, Coder<?>>of(
+              getKTypeVariable(), keyCoder,
+              getInputTVariable(), inputCoder,
+              getAccumTVariable(), getAccumulatorCoder(registry, keyCoder, inputCoder)),
+          getOutputTVariable());
+    }
+
+    @SuppressWarnings("unchecked")
+    private TypeVariable<Class<KeyedCombineFn<?, ?, ?, ?>>> getKTypeVariable() {
+      return (TypeVariable<Class<KeyedCombineFn<?, ?, ?, ?>>>)
+          new TypeDescriptor<K>(KeyedCombineFn.class) {}
+          .getType();
+    }
+
+    @SuppressWarnings("unchecked")
+    private TypeVariable<Class<KeyedCombineFn<?, ?, ?, ?>>> getInputTVariable() {
+      return (TypeVariable<Class<KeyedCombineFn<?, ?, ?, ?>>>)
+          new TypeDescriptor<InputT>(KeyedCombineFn.class) {}
+          .getType();
+    }
+
+    @SuppressWarnings("unchecked")
+    private TypeVariable<Class<KeyedCombineFn<?, ?, ?, ?>>> getAccumTVariable() {
+      return (TypeVariable<Class<KeyedCombineFn<?, ?, ?, ?>>>)
+          new TypeDescriptor<AccumT>(KeyedCombineFn.class) {}
+          .getType();
+    }
+
+    @SuppressWarnings("unchecked")
+    private TypeVariable<Class<KeyedCombineFn<?, ?, ?, ?>>> getOutputTVariable() {
+      return (TypeVariable<Class<KeyedCombineFn<?, ?, ?, ?>>>)
+          new TypeDescriptor<OutputT>(KeyedCombineFn.class) {}
+          .getType();
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
index 1d1aa3ecae4b1..8d9e40239bf0f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
@@ -23,6 +23,7 @@
 
 import java.lang.reflect.Method;
 import java.lang.reflect.Type;
+import java.lang.reflect.TypeVariable;
 import java.util.List;
 
 /**
@@ -132,9 +133,32 @@ public final boolean isArray() {
   }
 
   /**
-   * Returns a new {@link TypeDescriptor} with the provided binding
-   * for the type parameters.
+   * Returns a {@code TypeVariable} for the named type parameter. Throws
+   * {@code IllegalArgumentException} if a type variable by the requested type parameter is not
+   * found.
+   *
+   * <p>For example, {@code new TypeDescriptor<List>(){}.getTypeParameter("T")} returns a
+   * {@code TypeVariable<? super List>} representing the formal type parameter {@code T}.
+   *
+   * <p>Do not mistake the type parameters (formal type argument list) with the actual
+   * type arguments. For example, if a class {@code Foo} extends {@code List<String>}, it
+   * does not make sense to ask for a type parameter, because {@code Foo} does not have any.
    */
+  public final TypeVariable<Class<? super T>> getTypeParameter(String paramName) {
+    // Cannot convert TypeVariable<Class<? super T>>[] to TypeVariable<Class<? super T>>[]
+    // due to how they are used here, so the result of getTypeParameters() cannot be used
+    // without upcast.
+    Class<?> rawType = getRawType();
+    for (TypeVariable<?> param : rawType.getTypeParameters()) {
+      if (param.getName().equals(paramName)) {
+        @SuppressWarnings("unchecked")
+        TypeVariable<Class<? super T>> typedParam = (TypeVariable<Class<? super T>>) param;
+        return typedParam;
+      }
+    }
+     throw new IllegalArgumentException(
+         "No type parameter named " + paramName + " found on " + getRawType());
+  }
 
   /**
    * Returns true if this type is assignable from the given type.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index 50fca7f0c18f6..d15c939d9a8cf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -35,11 +35,11 @@
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.Serializable;
+import java.lang.reflect.Type;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
-import java.util.Map;
 
 /**
  * Tests for CoderRegistry.
@@ -127,39 +127,61 @@ public void testParameterizedDefaultCoderUnknown() throws Exception {
   }
 
   @Test
-  public void testTypeParameterInference() throws Exception {
+  public void testTypeParameterInferenceForward() throws Exception {
     CoderRegistry registry = getStandardRegistry();
     MyGenericClass<MyValue, List<MyValue>> instance =
         new MyGenericClass<MyValue, List<MyValue>>() {};
-    Coder<List<MyValue>> listCoder = ListCoder.of(MyValueCoder.of());
 
-    // The map method operates on parameter names.
-    Map<String, Coder<?>> coderMap = registry.getDefaultCoders(
+    Coder<?> bazCoder = registry.getDefaultCoder(
         instance.getClass(),
         MyGenericClass.class,
-        Collections.singletonMap("FooT", MyValueCoder.of()));
-    assertEquals(listCoder, coderMap.get("BazT"));
+        Collections.<Type, Coder<?>>singletonMap(
+            TypeDescriptor.of(MyGenericClass.class).getTypeParameter("FooT"), MyValueCoder.of()),
+        TypeDescriptor.of(MyGenericClass.class).getTypeParameter("BazT"));
 
-    // Check we can infer the other direction as well.
-    Map<String, Coder<?>> coderMap2 = registry.getDefaultCoders(
-        instance.getClass(),
-        MyGenericClass.class,
-        Collections.singletonMap("BazT", listCoder));
-    assertEquals(MyValueCoder.of(), coderMap2.get("FooT"));
+    assertEquals(ListCoder.of(MyValueCoder.of()), bazCoder);
+  }
 
-    // The array interface operates on position.
-    Coder<?>[] coders = registry.getDefaultCoders(
+  @Test
+  public void testTypeParameterInferenceBackward() throws Exception {
+    CoderRegistry registry = getStandardRegistry();
+    MyGenericClass<MyValue, List<MyValue>> instance =
+        new MyGenericClass<MyValue, List<MyValue>>() {};
+
+    Coder<?> fooCoder = registry.getDefaultCoder(
         instance.getClass(),
         MyGenericClass.class,
-        new Coder<?>[] { MyValueCoder.of(), null });
-    assertEquals(listCoder, coders[1]);
+        Collections.<Type, Coder<?>>singletonMap(
+            TypeDescriptor.of(MyGenericClass.class).getTypeParameter("BazT"),
+            ListCoder.of(MyValueCoder.of())),
+        TypeDescriptor.of(MyGenericClass.class).getTypeParameter("FooT"));
+
+    assertEquals(MyValueCoder.of(), fooCoder);
+  }
+
+  @Test
+  public void testTypeParameterInferenceLast() throws Exception {
+    CoderRegistry registry = getStandardRegistry();
+    MyGenericClass<MyValue, List<MyValue>> instance =
+        new MyGenericClass<MyValue, List<MyValue>>() {};
 
-    // The "last argument" coder handles a common case.
     Coder<List<MyValueCoder>> actual = registry.getDefaultCoder(
         instance.getClass(),
         MyGenericClass.class,
         MyValueCoder.of());
-    assertEquals(listCoder, actual);
+
+    assertEquals(ListCoder.of(MyValueCoder.of()), actual);
+  }
+
+  /**
+   * Tests sanity checking of the not-type-safe {@code Map<TypeVariable, Coder>}
+   * that the user can provide to {@code getDefaultCoder}.
+   */
+  @Test
+  public void testTypeParameterInferenceIncompatibleMap() throws Exception {
+    CoderRegistry registry = getStandardRegistry();
+    MyGenericClass<MyValue, List<MyValue>> instance =
+        new MyGenericClass<MyValue, List<MyValue>>() {};
 
     expectedException.expect(IllegalArgumentException.class);
     expectedException.expectMessage("Cannot encode elements of type class "
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypeDescriptorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypeDescriptorTest.java
index 778739bcf2e5b..df80c8582364f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypeDescriptorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypeDescriptorTest.java
@@ -20,11 +20,14 @@
 
 import com.google.common.reflect.TypeToken;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
 import java.lang.reflect.Method;
+import java.lang.reflect.TypeVariable;
 import java.util.List;
 import java.util.Set;
 
@@ -35,6 +38,9 @@
 @SuppressWarnings("serial")
 public class TypeDescriptorTest {
 
+  @Rule
+  public transient ExpectedException thrown = ExpectedException.none();
+
   @Test
   public void testTypeDescriptorOfRawType() throws Exception {
     assertEquals(
@@ -124,4 +130,21 @@ public void testTypeDescriptorNested2() throws Exception {
     assertEquals(genericRemembererer.token1.getType(), genericRemembererer.descriptor1.getType());
     assertEquals(genericRemembererer.token2.getType(), genericRemembererer.descriptor2.getType());
   }
+
+  private static class GenericClass<BizzleT> { }
+
+  @Test
+  public void testGetTypeParameterGood() throws Exception {
+    @SuppressWarnings("rawtypes")
+    TypeVariable<Class<? super GenericClass>> bizzleT =
+        TypeDescriptor.of(GenericClass.class).getTypeParameter("BizzleT");
+    assertEquals(GenericClass.class.getTypeParameters()[0], bizzleT);
+  }
+
+  @Test
+  public void testGetTypeParameterBad() throws Exception {
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("MerpleT"); // just check that the message gives actionable details
+    TypeDescriptor.of(GenericClass.class).getTypeParameter("MerpleT");
+  }
 }

From 77452b101de7f8a1d2b262b1279d309c67244114 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 11 May 2015 22:05:56 -0700
Subject: [PATCH 0534/1541] Verify the status of windows at various points in
 Trigger Execution.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93380511
---
 .../cloud/dataflow/sdk/util/TriggerTester.java    | 15 ++++++++++-----
 .../dataflow/sdk/util/TriggerExecutorTest.java    | 11 +++++++++++
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 75a3afb665cd4..a73184aed48d0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -71,12 +71,13 @@ public class TriggerTester<InputT, OutputT, W extends BoundedWindow> {
 
   private Instant watermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
   private Instant processingTime = BoundedWindow.TIMESTAMP_MIN_VALUE;
-  private BatchTimerManager timerManager = new LoggingBatchTimerManager(processingTime);
 
-  private TriggerExecutor<String, InputT, OutputT, W> triggerExecutor;
+  private final BatchTimerManager timerManager = new LoggingBatchTimerManager(processingTime);
+  private final TriggerExecutor<String, InputT, OutputT, W> triggerExecutor;
+  private final WindowFn<Object, W> windowFn;
+  private final StubContexts stubContexts;
+  private final AbstractWindowSet<String, InputT, OutputT, W> windowSet;
 
-  private WindowFn<Object, W> windowFn;
-  private StubContexts stubContexts;
   private static final String KEY = "TEST_KEY";
   private boolean logInteractions = false;
   private ExecutableTrigger<W> executableTrigger;
@@ -131,7 +132,7 @@ private TriggerTester(
       AccumulationMode mode) throws Exception {
     this.windowFn = windowFn;
     this.stubContexts = new StubContexts();
-    AbstractWindowSet<String, InputT, OutputT, W> windowSet = windowSetFactory.create(
+    this.windowSet = windowSetFactory.create(
         KEY, windowFn.windowCoder(), stubContexts, stubContexts);
     executableTrigger = ExecutableTrigger.create(trigger);
     this.triggerExecutor = new TriggerExecutor<>(
@@ -170,6 +171,10 @@ public String earliestElement(W window) throws CoderException {
     return triggerExecutor.earliestElementTag(window).getId();
   }
 
+  public boolean isWindowActive(W window) {
+    return windowSet.contains(window);
+  }
+
   /**
    * Retrieve the values that have been output to this time, and clear out the output accumulator.
    */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index 46b6489d653b5..51a792c9b0ba9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
@@ -75,7 +76,11 @@ public void testOnElementBufferingDiscarding() throws Exception {
     setUpBuffering(FixedWindows.of(Duration.millis(10)), AccumulationMode.DISCARDING_FIRED_PANES);
 
     injectElement(1, TriggerResult.CONTINUE);
+    assertTrue(tester.isWindowActive(firstWindow));
+
     injectElement(2, TriggerResult.FIRE);
+    assertFalse(tester.isWindowActive(firstWindow));
+
     injectElement(3, TriggerResult.FIRE_AND_FINISH);
 
     // This element shouldn't be seen, because the trigger has finished
@@ -86,6 +91,7 @@ public void testOnElementBufferingDiscarding() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
+    assertFalse(tester.isWindowActive(firstWindow));
   }
 
   @Test
@@ -93,7 +99,11 @@ public void testOnElementBufferingAccumulating() throws Exception {
     setUpBuffering(FixedWindows.of(Duration.millis(10)), AccumulationMode.ACCUMULATING_FIRED_PANES);
 
     injectElement(1, TriggerResult.CONTINUE);
+    assertTrue(tester.isWindowActive(firstWindow));
+
     injectElement(2, TriggerResult.FIRE);
+    assertTrue(tester.isWindowActive(firstWindow));
+
     injectElement(3, TriggerResult.FIRE_AND_FINISH);
 
     // This element shouldn't be seen, because the trigger has finished
@@ -104,5 +114,6 @@ public void testOnElementBufferingAccumulating() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
+    assertFalse(tester.isWindowActive(firstWindow));
   }
 }

From 28706bea01fd0766c2c868b88c24c135b90ef8a3 Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Tue, 12 May 2015 11:39:32 -0700
Subject: [PATCH 0535/1541] When reading from source with gzip
 Content-Encoding, always request the full file. ----Release Notes---- []
 ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=93435829

---
 .../gcsio/GoogleCloudStorageReadChannel.java  | 143 +++++++++++++-----
 1 file changed, 108 insertions(+), 35 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
index babb4244bdeff..0c6b0db6dedce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
@@ -24,6 +24,7 @@
 import com.google.api.client.util.Preconditions;
 import com.google.api.client.util.Sleeper;
 import com.google.api.services.storage.Storage;
+import com.google.api.services.storage.model.StorageObject;
 import com.google.cloud.dataflow.sdk.util.ApiErrorExtractor;
 
 import org.slf4j.Logger;
@@ -77,7 +78,6 @@ public class GoogleCloudStorageReadChannel implements SeekableByteChannel {
 
   // Size of the object being read.
   private long size = -1;
-  private boolean isCompressedStream;
 
   // Maximum number of automatic retries when reading from the underlying channel without making
   // progress; each time at least one byte is successfully read, the counter of attempted retries
@@ -124,6 +124,13 @@ public class GoogleCloudStorageReadChannel implements SeekableByteChannel {
   // accounting for the randomization of backoff intervals.
   public static final int DEFAULT_BACKOFF_MAX_ELAPSED_TIME_MILLIS = 2 * 60 * 1000;
 
+  // For files that have Content-Encoding: gzip set in the file metadata, the size of the response
+  // from GCS is the size of the compressed file. However, the HTTP client wraps the content
+  // in a GZIPInputStream, so the number of bytes that can be read from the stream may be greater
+  // than the size of the response. In this case, we allow the position in the stream to be greater
+  // than size when the position is validated.
+  private FileEncoding fileEncoding = FileEncoding.UNINITIALIZED;
+
   // ClientRequestHelper to be used instead of calling final methods in client requests.
   private static ClientRequestHelper clientRequestHelper = new ClientRequestHelper();
 
@@ -262,7 +269,7 @@ public int read(ByteBuffer buffer)
           // bytes read against the stream size. Unfortunately we don't have information about the
           // actual size of the data stream when stream compression is used, so we can only ignore
           // this case here.
-          checkIOPrecondition(isCompressedStream || currentPosition == size,
+          checkIOPrecondition(fileEncoding == FileEncoding.GZIPPED || currentPosition == size,
               String.format(
                   "Received end of stream result before all the file data has been received; "
                   + "totalBytesRead: %s, currentPosition: %s, size: %s",
@@ -350,7 +357,7 @@ public int read(ByteBuffer buffer)
       // Check that we didn't get a premature End of Stream signal by checking the number of bytes
       // read against the stream size. Unfortunately we don't have information about the actual size
       // of the data stream when stream compression is used, so we can only ignore this case here.
-      checkIOPrecondition(isCompressedStream || currentPosition == size,
+      checkIOPrecondition(fileEncoding == FileEncoding.GZIPPED || currentPosition == size,
           String.format("Failed to read any data before all the file data has been received; "
               + "currentPosition: %s, size: %s", currentPosition, size));
       return -1;
@@ -494,11 +501,13 @@ protected void validatePosition(long newPosition) {
     // this channel has been computed by a prior call. This means that position could be
     // potentially set to an invalid value (>= size) by position(long). However, that error
     // gets caught during lazy seek.
-    if ((size >= 0) && (newPosition >= size)) {
-      throw new IllegalArgumentException(
-          String.format(
-              "Invalid seek offset: position value (%d) must be between 0 and %d",
-              newPosition, size));
+    // If a file is gzip encoded, the size of the response may be less than the number of bytes
+    // that can be read. In this case, the new position may be a valid offset, and we proceed.
+    // If not, then the size of the response is the number of bytes that can be read and we throw
+    // an exception for an invalid seek.
+    if ((size >= 0) && (newPosition >= size) && (fileEncoding != FileEncoding.GZIPPED)) {
+      throw new IllegalArgumentException(String.format(
+          "Invalid seek offset: position value (%d) must be between 0 and %d", newPosition, size));
     }
   }
 
@@ -521,26 +530,100 @@ private void performLazySeek()
     // Close the underlying channel if it is open.
     closeReadChannel();
 
-    InputStream objectContentStream = openStreamAndSetSize(currentPosition);
+    // Open the stream and create the channel.
+    InputStream objectContentStream = openStreamAndSetMetadata(currentPosition);
     readChannel = Channels.newChannel(objectContentStream);
+
     lazySeekPending = false;
   }
 
   /**
-   * Opens the underlying stream, sets its position to the given value and sets size based on
-   * stream content size.
+   * Retrieve the object's metadata from GCS.
+   *
+   * @throws IOException on IO error.
+   */
+  protected StorageObject getMetadata() throws IOException {
+    Storage.Objects.Get getObject = gcs.objects().get(bucketName, objectName);
+    try {
+      StorageObject response = getObject.execute();
+      return response;
+    } catch (IOException e) {
+      if (errorExtractor.itemNotFound(e)) {
+        throw GoogleCloudStorageExceptions.getFileNotFoundException(bucketName, objectName);
+      }
+      String msg =
+          "Error reading " + StorageResourceId.createReadableString(bucketName, objectName);
+      throw new IOException(msg, e);
+    }
+  }
+
+  /**
+   * Returns the FileEncoding of a file given its metadata. Currently supports GZIPPED and OTHER.
+   *
+   * @param metadata the object's metadata.
+   * @return FileEncoding.GZIPPED if the response from GCS will have gzip encoding or
+   *         FileEncoding.OTHER otherwise.
+   */
+  protected FileEncoding getEncoding(StorageObject metadata) {
+    String contentEncoding = metadata.getContentEncoding();
+    return contentEncoding != null && contentEncoding.contains("gzip") ? FileEncoding.GZIPPED
+        : FileEncoding.OTHER;
+  }
+
+  /**
+   * Set the size of the content.
+   *
+   * <p>First, we look at the object's metadata.  If no value exists, then we
+   * examine the Content-Length header.  If neither exists, we then look for and parse the
+   * Content-Range header. If there is no way to determine the content length, an exception
+   * is thrown. If the Content-Length header is present, then the offset is added to this
+   * value (i.e., offset is the number of bytes that were not requested).
+   *
+   * @param response response to parse.
+   * @param offset the number of bytes that were not requested.
+   * @throws IOException on IO error.
+   */
+  protected void setSize(StorageObject metadata, HttpResponse response, long offset)
+      throws IOException {
+    String contentRange = response.getHeaders().getContentRange();
+    if (metadata.getSize() != null) {
+      size = metadata.getSize().longValue();
+    } else if (response.getHeaders().getContentLength() != null) {
+      size = response.getHeaders().getContentLength() + offset;
+    } else if (contentRange != null) {
+      String sizeStr = SLASH.split(contentRange)[1];
+      try {
+        size = Long.parseLong(sizeStr);
+      } catch (NumberFormatException e) {
+        throw new IOException(
+            "Could not determine size from response from Content-Range: " + contentRange, e);
+      }
+    } else {
+      throw new IOException("Could not determine size of response");
+    }
+  }
+
+  /**
+   * Opens the underlying stream, sets its position to the given value and initializes the object's
+   * metadata (size and encoding).
+   *
+   * <p>If the file encoding in GCS is gzip (and therefore the HTTP client will attempt to
+   * decompress it), the entire file is always requested and we seek to the position requested. If
+   * the file encoding is not gzip, only the remaining bytes to be read are requested from GCS.
    *
    * @param newPosition position to seek into the new stream.
    * @throws IOException on IO error
    */
-  protected InputStream openStreamAndSetSize(long newPosition)
+  protected InputStream openStreamAndSetMetadata(long newPosition)
       throws IOException {
+    StorageObject metadata = getMetadata();
+    fileEncoding = getEncoding(metadata);
     validatePosition(newPosition);
     Storage.Objects.Get getObject = gcs.objects().get(bucketName, objectName);
     // Set the range on the existing request headers that may have been initialized with things
-    // like user-agent already.
-    clientRequestHelper.getRequestHeaders(getObject)
-        .setRange(String.format("bytes=%d-", newPosition));
+    // like user-agent already. If the file is gzip encoded, request the entire file.
+    clientRequestHelper.getRequestHeaders(getObject).setRange(
+        String.format("bytes=%d-", fileEncoding == FileEncoding.GZIPPED ? 0 : newPosition));
     HttpResponse response;
     try {
       response = getObject.executeMedia();
@@ -567,27 +650,13 @@ protected InputStream openStreamAndSetSize(long newPosition)
       throw r;
     }
 
-    // If the content is compressed, content length reported in the header is counting the number of
-    // compressed bytes. That means that we cannot rely on the reported content length to check that
-    // we have received all the data from the data stream.
-    String contentEncoding = response.getContentEncoding();
-    isCompressedStream = (contentEncoding != null && contentEncoding.contains("gzip"));
+    InputStream content = response.getContent();
+    // If the file is gzip encoded, we requested the entire file and need to seek in the content
+    // to the desired position.  If it is not, we only requested the bytes we haven't read.
+    setSize(metadata, response, fileEncoding == FileEncoding.GZIPPED ? 0 : newPosition);
+    content.skip(fileEncoding == FileEncoding.GZIPPED ? newPosition : 0);
 
-    String contentRange = response.getHeaders().getContentRange();
-    if (response.getHeaders().getContentLength() != null) {
-      size = response.getHeaders().getContentLength() + newPosition;
-    } else if (contentRange != null) {
-      String sizeStr = SLASH.split(contentRange)[1];
-      try {
-        size = Long.parseLong(sizeStr);
-      } catch (NumberFormatException e) {
-        throw new IOException(
-            "Could not determine size from response from Content-Range: " + contentRange, e);
-      }
-    } else {
-      throw new IOException("Could not determine size of response");
-    }
-    return response.getContent();
+    return content;
   }
 
   /**
@@ -612,4 +681,8 @@ private void checkIOPrecondition(boolean precondition, String errorMessage) thro
       throw new IOException(errorMessage);
     }
   }
+
+  private static enum FileEncoding {
+    UNINITIALIZED, GZIPPED, OTHER;
+  }
 }

From 6c9987659ee1ec2c0ccd9bede64d89fc33f88050 Mon Sep 17 00:00:00 2001
From: zhouyunqing <zhouyunqing@google.com>
Date: Tue, 12 May 2015 11:55:27 -0700
Subject: [PATCH 0536/1541] Fix wrongly set buffer size for
 GoogleCloudStorageWriteChannel.

The code logic seems like:
if JVM's heap size is larger than 512MB, then 64MB buffer is used, 8MB
otherwise.

But it used Runtime.totalMemory() to get the heap size, which is roughly the
current JVM size, which is always small (<100MB) if no combine operation happened
in the worker. JVM heap grows as needed.

If -Xms (min heap size) is set larger than 512MB, the original logic would
work, but I've verified our workers do not have -Xms set.

So I change this logic to use Runtime.maxMemory() which will return the value
of -Xmx (max heap size, which is set to 1/4 of physical memory in dataflow
workers).

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93437192
---
 .../dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
index 769c3632017b5..42dce0dd59c5e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
@@ -112,7 +112,7 @@ public class GoogleCloudStorageWriteChannel
   // Chunk size to use. Limit the amount of memory used in low memory
   // environments such as small AppEngine instances.
   private static final int UPLOAD_CHUNK_SIZE_DEFAULT =
-      Runtime.getRuntime().totalMemory() < 512 * 1024 * 1024
+      Runtime.getRuntime().maxMemory() < 512 * 1024 * 1024
       ? GCS_UPLOAD_GRANULARITY : 8 * GCS_UPLOAD_GRANULARITY;
 
   // If true, we get very high write throughput but writing files larger than UPLOAD_MAX_SIZE

From 431b317f58f2a342cab34dcc0826cc9acb56882b Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Tue, 12 May 2015 11:57:31 -0700
Subject: [PATCH 0537/1541] Add MapFn and FlatMapFn

These are DoFn implementations that represent a simple Map or FlatMap
operation. They take a Serializable Function and apply it to all
elements in the input PCollection.

The fact that SerializableFunction is a functional interface makes it
easy for Java 8 users to define simple mapping from input to output
PCollections.

Add ParDo#map(MapFn) and ParDo#flatMap(FlatMapFn), which provide
implementation-time hints to use MapFn and FlatMapFn when appropriate.

----Release Notes----
Add MapFn and FlatMapFn, which apply a SerializableFunction to all
elements in the input/output PCollections

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93437423
---
 .../dataflow/sdk/transforms/FlatMapFn.java    | 102 +++++++++++++++
 .../cloud/dataflow/sdk/transforms/MapFn.java  | 102 +++++++++++++++
 .../cloud/dataflow/sdk/transforms/ParDo.java  |  63 +++++++++
 .../sdk/transforms/FlatMapFnTest.java         | 120 ++++++++++++++++++
 .../dataflow/sdk/transforms/MapFnTest.java    | 120 ++++++++++++++++++
 .../dataflow/sdk/transforms/ParDoTest.java    |  42 ++++++
 6 files changed, 549 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapFn.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapFn.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapFnTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapFnTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapFn.java
new file mode 100644
index 0000000000000..5f9e957738d25
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapFn.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import java.lang.reflect.Method;
+
+/**
+ * A {@link DoFn} that represents a FlatMap operation. {@code FlatMapFn} is applied to each
+ * element of the input {@link PCollection} and outputs each element in the result of the
+ * application. No behavior is provided in startBundle and finishBundle. A {@link
+ * SerializableFunction} can be provided to specify the behavior of {@link #apply}.
+ *
+ * @param <InputT> the type of input element
+ * @param <OutputT> the type of output element
+ */
+public abstract class FlatMapFn<InputT, OutputT> extends DoFn<InputT, OutputT> {
+  private static final long serialVersionUID = 0L;
+  private final SerializableFunction<InputT, Iterable<OutputT>> fn;
+
+  /**
+   * Creates a new {@code FlatMapFn}. {@link #apply} must be overridden for this FlatMapDoFn to
+   * function properly.
+   */
+  protected FlatMapFn() {
+    this.fn = null;
+
+    if (!applyOverriden()) {
+      throw new IllegalStateException("Didn't find an override for FlatMapFn#apply(InputT). "
+          + "Apply must be overridden to use the no-arg FlatMapFn constructor.");
+    }
+  }
+
+  /**
+   * Creates a new {@code FlatMapFn} that applies the {@link SerializableFunction} to its inputs.
+   */
+  protected FlatMapFn(SerializableFunction<InputT, Iterable<OutputT>> fn) {
+    this.fn = checkNotNull(fn, "null SerializableFunction provided to FlatMapFn constructor");
+
+    if (applyOverriden()) {
+      throw new IllegalStateException("Found an override of FlatMapFn#apply(InputT). "
+          + "FlatMapFn#apply(InputT) cannot be overriden if a SerializableFunction is provided.");
+    }
+  }
+
+  private boolean applyOverriden() {
+    try {
+      Method m = getClass().getMethod("apply", Object.class);
+      if (m.getDeclaringClass().equals(FlatMapFn.class)) {
+        return false;
+      }
+      return true;
+    } catch (NoSuchMethodException e) {
+      throw new AssertionError(
+          "NoSuchMethodException encountered for method apply() in FlatMapFn "
+          + "but FlatMapFn declares apply()",
+          e);
+    }
+  }
+
+  @Override
+  public final void startBundle(Context c) throws Exception {}
+
+  @Override
+  public final void processElement(DoFn<InputT, OutputT>.ProcessContext c) throws Exception {
+    for (OutputT output : apply(c.element())) {
+      c.output(output);
+    }
+  }
+
+  @Override
+  public final void finishBundle(Context c) throws Exception {}
+
+  /**
+   * Applies this FlatMapFn to an input element, returning the elements to add to the output
+   * {@link PCollection}.
+   *
+   * <p> If a {@link SerializableFunction} was not provided to this {@code FlatMapDoFn} when it was
+   * created, this method must be overriden, or it will throw a {@link NullPointerException} when it
+   * is invoked.
+   */
+  public Iterable<OutputT> apply(InputT input) {
+    return fn.apply(input);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapFn.java
new file mode 100644
index 0000000000000..5447520a2b8d6
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapFn.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import java.lang.reflect.Method;
+
+/**
+ * A {@link DoFn} that represents a simple Map operation. {@code MapDoFn} is applied to each element
+ * of the input {@link PCollection} and outputs the result. No behavior is provided in startBundle
+ * and finishBundle.  A {@link SerializableFunction} can be provided to specify the behavior of
+ * {@link #apply}.
+ *
+ * @param <InputT> the type of input element
+ * @param <OutputT> the type of output element
+ */
+public abstract class MapFn<InputT, OutputT> extends DoFn<InputT, OutputT> {
+  private static final long serialVersionUID = 0L;
+  private final SerializableFunction<InputT, OutputT> fn;
+
+  /**
+   * Creates a new {@code MapFn}. {@link #apply} must be overridden for this MapFn to function
+   * properly.
+   */
+  protected MapFn() {
+    this.fn = null;
+
+    if (!applyOverriden()) {
+      throw new IllegalStateException("Didn't find an override for MapFn#apply(InputT). "
+          + "Apply must be overridden to use the no-arg MapFn constructor.");
+    }
+  }
+
+  /**
+   * Create a new {@code MapFn} that applies the provided {@link SerializableFunction} to its
+   * inputs.
+   */
+  protected MapFn(SerializableFunction<InputT, OutputT> fn) {
+    this.fn = checkNotNull(fn, "null SerializableFunction provided to MapFn constructor");
+
+    if (applyOverriden()) {
+      throw new IllegalStateException("Found an override of MapFn#apply(InputT). "
+          + "MapFn#apply(InputT) cannot be overriden if a SerializableFunction is provided.");
+    }
+  }
+
+  private boolean applyOverriden() {
+    try {
+      Method m = getClass().getMethod("apply", Object.class);
+      if (m.getDeclaringClass().equals(MapFn.class)) {
+        return false;
+      }
+      return true;
+    } catch (NoSuchMethodException e) {
+      // Generic apply is declared in this class
+      throw new AssertionError(
+          "NoSuchMethodException encountered for method apply() in FlatMapFn "
+          + "but FlatMapFn declares apply()",
+          e);
+    }
+  }
+
+  @Override
+  public final void startBundle(Context c) throws Exception {}
+
+  @Override
+  public final void processElement(DoFn<InputT, OutputT>.ProcessContext c) throws Exception {
+    c.output(apply(c.element()));
+  }
+
+  @Override
+  public final void finishBundle(Context c) throws Exception {}
+
+  /**
+   * Applies this MapFn to an input element, returning the element to add to the output
+   * {@link PCollection}.
+   *
+   * <p> If a {@link SerializableFunction} was not provided to this {@code MapFn} when it was
+   * created, this method must be overriden, or it will throw a {@link NullPointerException} when it
+   * is invoked.
+   */
+  public OutputT apply(InputT input) {
+    return fn.apply(input);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 149df8b065e9f..1a91a362f5693 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -528,6 +528,44 @@ public static <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT>
     return new Unbound().of(fn);
   }
 
+  /**
+   * Creates a {@code ParDo} {@link PTransform} that will invoke the
+   * given {@link MapFn} function.
+   *
+   * <p> This is a specialized {@code ParDo} that has no startBundle or finishBundle,
+   * and which exclusively transforms the input {@code PCollection<InputT>} to an
+   * output {@code PCollection<OutputT>} based on the apply method of the
+   * {@code MapFn<InputT, OutputT>}.
+   *
+   * <p> The resulting {@code PTransform}'s types have been bound, with the
+   * input being a {@code PCollection<InputT>} and the output a
+   * {@code PCollection<OutputT>}, inferred from the types of the argument
+   * {@code MapFn<InputT, OutputT>}.  It is ready to be applied, or further
+   * properties can be set on it first.
+   */
+  public static <InputT, OutputT> Bound<InputT, OutputT> map(MapFn<InputT, OutputT> fn) {
+    return of(fn);
+  }
+
+  /**
+   * Creates a {@code ParDo} {@link PTransform} that will invoke the
+   * given {@link FlatMapFn} function.
+   *
+   * <p> This is a specialized {@code ParDo} that has no startBundle or finishBundle,
+   * and which exclusively transforms the input {@code PCollection<InputT>} to an
+   * output {@code PCollection<OutputT>} based on the apply method of the
+   * {@code FlatMapFn<InputT, OutputT>}.
+   *
+   * <p> The resulting {@code PTransform}'s types have been bound, with the
+   * input being a {@code PCollection<InputT>} and the output a
+   * {@code PCollection<OutputT>}, inferred from the types of the argument
+   * {@code FlatMapFn<InputT, OutputT>}.  It is ready to be applied, or further
+   * properties can be set on it first.
+   */
+  public static <InputT, OutputT> Bound<InputT, OutputT> flatMap(FlatMapFn<InputT, OutputT> fn) {
+    return of(fn);
+  }
+
   private static <InputT, OutputT> DoFn<InputT, OutputT>
       adapt(DoFnWithContext<InputT, OutputT> fn) {
     return DoFnReflector.of(fn.getClass()).toDoFn(fn);
@@ -662,6 +700,31 @@ public <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT> fn) {
     public <InputT, OutputT> Bound<InputT, OutputT> of(DoFnWithContext<InputT, OutputT> fn) {
       return of(adapt(fn));
     }
+
+    /**
+     * Returns a new {@code ParDo} {@code PTransform} that's like this
+     * transform but that will invoke the given {@link MapFn}
+     * function, and that has its input and output types bound.  Does
+     * not modify this transform.  The resulting {@code PTransform} is
+     * sufficiently specified to be applied, but more properties can
+     * still be specified.
+     */
+
+    public <InputT, OutputT> Bound<InputT, OutputT> map(MapFn<InputT, OutputT> fn) {
+      return of(fn);
+    }
+
+    /**
+     * Returns a new {@code ParDo} {@code PTransform} that's like this
+     * transform but that will invoke the given {@link FlatMapFn}
+     * function, and that has its input and output types bound.  Does
+     * not modify this transform.  The resulting {@code PTransform} is
+     * sufficiently specified to be applied, but more properties can
+     * still be specified.
+     */
+    public <InputT, OutputT> Bound<InputT, OutputT> flatMap(FlatMapFn<InputT, OutputT> fn) {
+      return of(fn);
+    }
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapFnTest.java
new file mode 100644
index 0000000000000..c8b1e85c87bc2
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapFnTest.java
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for FlatMapFn.
+ */
+@RunWith(JUnit4.class)
+public class FlatMapFnTest {
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @SuppressWarnings("serial")
+  @Test
+  public void testNoArgConstructorWithoutOverridingApplyThrowsIllegalStateException() {
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("FlatMapFn#apply(InputT)");
+    thrown.expectMessage("Didn't find");
+    thrown.expectMessage("override");
+
+    new FlatMapFn<String, Object>() {};
+  }
+
+  @SuppressWarnings("serial")
+  @Test
+  public void testNoArgConstructorWithDeclaredApplySucceeds() {
+    new FlatMapFn<String, Object>() {
+      @Override
+      public Iterable<Object> apply(String input) {
+        return null;
+      }
+    };
+  }
+
+  @SuppressWarnings("serial")
+  @Test
+  public void testNoArgConstructorWithMultipleDeclaredApplyWithDifferentErasureSucceeds() {
+    new FlatMapFn<Integer, String>() {
+      @Override
+      public Iterable<String> apply(Integer input) {
+        throw new IllegalArgumentException();
+      }
+
+      @SuppressWarnings("unused")
+      public Iterable<String> apply(String input) {
+        throw new IllegalStateException();
+      }
+
+    };
+  }
+
+  @SuppressWarnings("serial")
+  @Test
+  public void testSerializableFunctionWithNullThrowsNullPointerException() {
+    thrown.expect(NullPointerException.class);
+    thrown.expectMessage("null SerializableFunction");
+    thrown.expectMessage("FlatMapFn constructor");
+
+    new FlatMapFn<String, String>(null) {};
+  }
+
+  @SuppressWarnings("serial")
+  @Test
+  public void testSerializableFunctionWithApplyOverrideThrowsIllegalStateException() {
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("FlatMapFn#apply(InputT)");
+    thrown.expectMessage("cannot be overriden");
+
+    new FlatMapFn<String, String>(new SerializableFunction<String, Iterable<String>>() {
+      @Override
+      public Iterable<String> apply(String input) {
+        return null;
+      }
+    }) {
+      @Override
+      public Iterable<String> apply(String input) {
+        return null;
+      }
+    };
+  }
+
+  @SuppressWarnings("serial")
+  @Test
+  public void testSubclassNoArgConstructorWithDifferentTypeParametersSucceeds() {
+    new ExtendedFlatMapFn<String, Object>() {
+      @Override
+      public Iterable<String> apply(Object input) {
+        return null;
+      }
+    };
+  }
+
+  @SuppressWarnings("serial")
+  private static class ExtendedFlatMapFn<OutputT, InputT> extends FlatMapFn<InputT, OutputT> {
+    public ExtendedFlatMapFn() {
+      super();
+    }
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapFnTest.java
new file mode 100644
index 0000000000000..fac7bdc7f0fb4
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapFnTest.java
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for MapFn.
+ */
+@RunWith(JUnit4.class)
+public class MapFnTest {
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @SuppressWarnings("serial")
+  @Test
+  public void testNoArgConstructorWithoutOverridingApplyThrowsIllegalStateException() {
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("MapFn#apply(InputT)");
+    thrown.expectMessage("Didn't find");
+    thrown.expectMessage("override");
+
+    new MapFn<String, Object>() {};
+  }
+
+  @SuppressWarnings("serial")
+  @Test
+  public void testNoArgConstructorWithDeclaredApplySucceeds() {
+    new MapFn<String, Object>() {
+      @Override
+      public Object apply(String input) {
+        return null;
+      }
+    };
+  }
+
+  @SuppressWarnings("serial")
+  @Test
+  public void testNoArgConstructorWithMultipleDeclaredApplyWithDifferentErasureSucceeds() {
+    new MapFn<Integer, String>() {
+      @Override
+      public String apply(Integer input) {
+        throw new IllegalArgumentException();
+      }
+
+      @SuppressWarnings("unused")
+      public String apply(String input) {
+        throw new IllegalStateException();
+      }
+
+    };
+  }
+
+  @SuppressWarnings("serial")
+  @Test
+  public void testSerializableFunctionWithNullThrowsNullPointerException() {
+    thrown.expect(NullPointerException.class);
+    thrown.expectMessage("null SerializableFunction");
+    thrown.expectMessage("MapFn constructor");
+
+    new MapFn<String, String>(null) {};
+  }
+
+  @SuppressWarnings("serial")
+  @Test
+  public void testSerializableFunctionWithApplyOverrideThrowsIllegalStateException() {
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("MapFn#apply(InputT)");
+    thrown.expectMessage("cannot be overriden");
+
+    new MapFn<String, String>(new SerializableFunction<String, String>() {
+      @Override
+      public String apply(String input) {
+        return null;
+      }
+    }) {
+      @Override
+      public String apply(String input) {
+        return null;
+      }
+    };
+  }
+
+  @SuppressWarnings("serial")
+  @Test
+  public void testSubclassNoArgConstructorWithDifferentTypeParametersSucceeds() {
+    new ExtendedMapFn<String, Object>() {
+      @Override
+      public String apply(Object input) {
+        return null;
+      }
+    };
+  }
+
+  @SuppressWarnings("serial")
+  private static class ExtendedMapFn<OutputT, InputT> extends MapFn<InputT, OutputT> {
+    public ExtendedMapFn() {
+      super();
+    }
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 0cd463085ebea..c3d6c578af6ac 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -396,6 +396,48 @@ public void testParDo2() {
     p.run();
   }
 
+  @Test
+  @Category(RunnableOnService.class)
+  public void testMapAppliesMapFn() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Integer> result =
+        p.apply(Create.<String>of("o", "tw", "thr", "four"))
+        .apply(ParDo.map(new MapFn<String, Integer>() {
+          @Override
+          public Integer apply(String input) {
+            return input.length();
+          }
+        }));
+
+    DataflowAssert.that(result).containsInAnyOrder(1, 2, 3, 4);
+
+    p.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testFlatMapAppliesFlatMapFn() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Character> result =
+        p.apply(Create.of("foo", "bar", "BAZ"))
+        .apply(ParDo.flatMap(new FlatMapFn<String, Character>() {
+          @Override
+          public Iterable<Character> apply(String input) {
+            List<Character> chars = new ArrayList<>();
+            for (char c : input.toCharArray()) {
+              chars.add(c);
+            }
+            return chars;
+          }
+        }));
+
+    DataflowAssert.that(result).containsInAnyOrder('f', 'o', 'o', 'b', 'a', 'r', 'B', 'A', 'Z');
+
+    p.run();
+  }
+
   @Test
   @Category(RunnableOnService.class)
   public void testParDoEmpty() {

From 571940fcd1cef943767d5b4ace221b9a0215a1de Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 12 May 2015 13:30:21 -0700
Subject: [PATCH 0538/1541] Migrate the package stager to use IOChannelUtils so
 that it can stage files to arbritrary locations.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93447135
---
 .../cloud/dataflow/sdk/util/GcsStager.java    |  7 +-
 .../dataflow/sdk/util/IOChannelUtils.java     |  9 +++
 .../cloud/dataflow/sdk/util/PackageUtil.java  | 48 ++++++-------
 .../runners/DataflowPipelineRunnerTest.java   |  4 +-
 .../dataflow/sdk/util/IOChannelUtilsTest.java | 11 +++
 .../dataflow/sdk/util/PackageUtilTest.java    | 68 +++++++++++--------
 6 files changed, 85 insertions(+), 62 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsStager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsStager.java
index bca6253d50815..c51e064dee63a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsStager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsStager.java
@@ -19,7 +19,6 @@
 import com.google.api.services.dataflow.model.DataflowPackage;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.common.base.Preconditions;
 
 import java.util.List;
@@ -41,8 +40,8 @@ public static GcsStager fromOptions(PipelineOptions options) {
   @Override
   public List<DataflowPackage> stageFiles() {
     Preconditions.checkNotNull(options.getStagingLocation());
-    return PackageUtil.stageClasspathElementsToGcs(
-        options.getGcsUtil(), options.getFilesToStage(),
-        GcsPath.fromUri(options.getStagingLocation()));
+    return PackageUtil.stageClasspathElements(
+        options.getFilesToStage(),
+        options.getStagingLocation());
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
index 13d17e4a2570d..2e5042e8ce531 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
@@ -105,6 +105,15 @@ public static WritableByteChannel create(String prefix, String shardTemplate,
     return shardingChannel;
   }
 
+  /**
+   * Returns the size in bytes for the given specification.
+   *
+   * <p>The specification is not expanded; it is used verbatim.
+   */
+  public static long getSizeBytes(String spec) throws IOException {
+    return getFactory(spec).getSizeBytes(spec);
+  }
+
   /**
    * Constructs a fully qualified name from components.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
index c15b42e82db4b..d908c8336ace9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
@@ -22,7 +22,6 @@
 import com.google.api.client.util.BackOffUtils;
 import com.google.api.client.util.Sleeper;
 import com.google.api.services.dataflow.model.DataflowPackage;
-import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.common.collect.TreeTraverser;
 import com.google.common.hash.Funnels;
 import com.google.common.hash.Hasher;
@@ -65,13 +64,13 @@ public class PackageUtil {
    * staged.
    *
    * @param classpathElement The local path for the classpath element.
-   * @param stagingDirectory The base location in GCS for staged classpath elements.
+   * @param stagingPath The base location in for staged classpath elements.
    * @param overridePackageName If non-null, use the given value as the package name
    *                            instead of generating one automatically.
    * @return The package.
    */
   public static DataflowPackage createPackage(String classpathElement,
-      GcsPath stagingDirectory, String overridePackageName) {
+      String stagingPath, String overridePackageName) {
     try {
       File file = new File(classpathElement);
       String contentHash = computeContentHash(file);
@@ -79,11 +78,11 @@ public static DataflowPackage createPackage(String classpathElement,
       // Drop the directory prefixes, and form the filename + hash + extension.
       String uniqueName = getUniqueContentName(file, contentHash);
 
-      GcsPath stagingPath = stagingDirectory.resolve(uniqueName);
+      String resourcePath = IOChannelUtils.resolve(stagingPath, uniqueName);
 
       DataflowPackage target = new DataflowPackage();
       target.setName(overridePackageName != null ? overridePackageName : uniqueName);
-      target.setLocation(stagingPath.toResourceName());
+      target.setLocation(resourcePath);
       return target;
     } catch (IOException e) {
       throw new RuntimeException("Package setup failure for " + classpathElement, e);
@@ -91,33 +90,28 @@ public static DataflowPackage createPackage(String classpathElement,
   }
 
   /**
-   * Transfers the classpath elements to GCS.
+   * Transfers the classpath elements to the staging location.
    *
-   * @param gcsUtil GCS utility.
-   * @param classpathElements The elements to stage onto GCS.
-   * @param gcsStaging The path on GCS to stage the classpath elements to.
+   * @param classpathElements The elements to stage.
+   * @param stagingPath The base location to stage the elements to.
    * @return A list of cloud workflow packages, each representing a classpath element.
    */
-  public static List<DataflowPackage> stageClasspathElementsToGcs(
-      GcsUtil gcsUtil,
-      Collection<String> classpathElements,
-      GcsPath gcsStaging) {
-    return stageClasspathElementsToGcs(gcsUtil, classpathElements, gcsStaging, Sleeper.DEFAULT);
+  public static List<DataflowPackage> stageClasspathElements(
+      Collection<String> classpathElements, String stagingPath) {
+    return stageClasspathElements(classpathElements, stagingPath, Sleeper.DEFAULT);
   }
 
   // Visible for testing.
-  static List<DataflowPackage> stageClasspathElementsToGcs(
-      GcsUtil gcsUtil,
-      Collection<String> classpathElements,
-      GcsPath gcsStaging,
+  static List<DataflowPackage> stageClasspathElements(
+      Collection<String> classpathElements, String stagingPath,
       Sleeper retrySleeper) {
-    LOG.info("Uploading {} files from PipelineOptions.filesToStage to GCS to prepare for execution "
-        + "in the cloud.", classpathElements.size());
+    LOG.info("Uploading {} files from PipelineOptions.filesToStage to staging location to "
+        + "prepare for execution.", classpathElements.size());
     ArrayList<DataflowPackage> packages = new ArrayList<>();
 
-    if (gcsStaging == null) {
+    if (stagingPath == null) {
       throw new IllegalArgumentException(
-          "Can't stage classpath elements on GCS because no GCS location has been provided");
+          "Can't stage classpath elements on because no staging location has been provided");
     }
 
     int numUploaded = 0;
@@ -131,17 +125,17 @@ static List<DataflowPackage> stageClasspathElementsToGcs(
       }
 
       DataflowPackage workflowPackage = createPackage(
-          classpathElement, gcsStaging, packageName);
+          classpathElement, stagingPath, packageName);
 
       packages.add(workflowPackage);
-      GcsPath target = GcsPath.fromResourceName(workflowPackage.getLocation());
+      String target = workflowPackage.getLocation();
 
       // TODO: Should we attempt to detect the Mime type rather than
       // always using MimeTypes.BINARY?
       try {
-        long remoteLength = gcsUtil.fileSize(target);
+        long remoteLength = IOChannelUtils.getSizeBytes(target);
         if (remoteLength >= 0 && remoteLength == getClasspathElementLength(classpathElement)) {
-          LOG.debug("Skipping classpath element already on gcs: {} at {}",
+          LOG.debug("Skipping classpath element already staged: {} at {}",
               classpathElement, target);
           numCached++;
           continue;
@@ -154,7 +148,7 @@ static List<DataflowPackage> stageClasspathElementsToGcs(
         while (true) {
           try {
             LOG.debug("Uploading classpath element {} to {}", classpathElement, target);
-            try (WritableByteChannel writer = gcsUtil.create(target, MimeTypes.BINARY)) {
+            try (WritableByteChannel writer = IOChannelUtils.create(target, MimeTypes.BINARY)) {
               copyContent(classpathElement, writer);
             }
             numUploaded++;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 0640cced6a65e..419279a7377ef 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -207,11 +207,11 @@ public void testRunWithFiles() throws IOException {
     temp2.deleteOnExit();
 
     DataflowPackage expectedPackage1 = PackageUtil.createPackage(
-        temp1.getAbsolutePath(), gcsStaging, null);
+        temp1.getAbsolutePath(), gcsStaging.toString(), null);
 
     String overridePackageName = "alias.txt";
     DataflowPackage expectedPackage2 = PackageUtil.createPackage(
-        temp2.getAbsolutePath(), gcsStaging, overridePackageName);
+        temp2.getAbsolutePath(), gcsStaging.toString(), overridePackageName);
 
     ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOChannelUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOChannelUtilsTest.java
index 4ea50e00180f8..b357dcfaffa61 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOChannelUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOChannelUtilsTest.java
@@ -19,6 +19,8 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.fail;
 
+import com.google.common.io.Files;
+
 import org.junit.Assert;
 import org.junit.Rule;
 import org.junit.Test;
@@ -27,6 +29,7 @@
 import org.junit.runners.JUnit4;
 
 import java.io.File;
+import java.nio.charset.StandardCharsets;
 
 /**
  * Tests for IOChannelUtils.
@@ -75,6 +78,14 @@ public void testLargeShardCount() {
             100, 5000));
   }
 
+  @Test
+  public void testGetSizeBytes() throws Exception {
+    String data = "TestData";
+    File file = tmpFolder.newFile();
+    Files.write(data, file, StandardCharsets.UTF_8);
+    assertEquals(data.length(), IOChannelUtils.getSizeBytes(file.getPath()));
+  }
+
   @Test
   public void testResolve() throws Exception {
     String expected = tmpFolder.getRoot().toPath().resolve("aa").toString();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
index e8d5ffff4b2fa..1329ccd2de206 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
@@ -29,6 +29,8 @@
 import static org.mockito.Mockito.when;
 
 import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.cloud.dataflow.sdk.options.GcsOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.testing.FastNanoClockAndSleeper;
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.common.collect.ImmutableList;
@@ -70,6 +72,11 @@ public class PackageUtilTest {
   @Before
   public void setUp() {
     MockitoAnnotations.initMocks(this);
+
+    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
+    pipelineOptions.setGcsUtil(mockGcsUtil);
+
+    IOChannelUtils.registerStandardIOFactories(pipelineOptions);
   }
 
   @Test
@@ -78,10 +85,11 @@ public void testPackageNamingWithFileHavingExtension() throws Exception {
     Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
 
-    DataflowPackage target = PackageUtil.createPackage(tmpFile.getAbsolutePath(), gcsStaging, null);
+    DataflowPackage target =
+        PackageUtil.createPackage(tmpFile.getAbsolutePath(), gcsStaging.toString(), null);
 
     assertEquals("file-SAzzqSB2zmoIgNHC9A2G0A.txt", target.getName());
-    assertEquals("storage.googleapis.com/somebucket/base/path/file-SAzzqSB2zmoIgNHC9A2G0A.txt",
+    assertEquals("gs://somebucket/base/path/file-SAzzqSB2zmoIgNHC9A2G0A.txt",
         target.getLocation());
   }
 
@@ -91,10 +99,11 @@ public void testPackageNamingWithFileMissingExtension() throws Exception {
     Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
 
-    DataflowPackage target = PackageUtil.createPackage(tmpFile.getAbsolutePath(), gcsStaging, null);
+    DataflowPackage target =
+        PackageUtil.createPackage(tmpFile.getAbsolutePath(), gcsStaging.toString(), null);
 
     assertEquals("file-SAzzqSB2zmoIgNHC9A2G0A", target.getName());
-    assertEquals("storage.googleapis.com/somebucket/base/path/file-SAzzqSB2zmoIgNHC9A2G0A",
+    assertEquals("gs://somebucket/base/path/file-SAzzqSB2zmoIgNHC9A2G0A",
         target.getLocation());
   }
 
@@ -106,10 +115,10 @@ public void testPackageNamingWithDirectory() throws Exception {
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
 
     DataflowPackage target =
-        PackageUtil.createPackage(tmpDirectory.getAbsolutePath(), gcsStaging, null);
+        PackageUtil.createPackage(tmpDirectory.getAbsolutePath(), gcsStaging.toString(), null);
 
     assertEquals("folder-9MHI5fxducQ06t3IG9MC-g.zip", target.getName());
-    assertEquals("storage.googleapis.com/somebucket/base/path/folder-9MHI5fxducQ06t3IG9MC-g.zip",
+    assertEquals("gs://somebucket/base/path/folder-9MHI5fxducQ06t3IG9MC-g.zip",
                  target.getLocation());
   }
 
@@ -128,9 +137,9 @@ public void testPackageNamingWithFilesHavingSameContentsButDifferentNames() thro
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
 
     DataflowPackage target1 =
-        PackageUtil.createPackage(tmpDirectory1.getAbsolutePath(), gcsStaging, null);
+        PackageUtil.createPackage(tmpDirectory1.getAbsolutePath(), gcsStaging.toString(), null);
     DataflowPackage target2 =
-        PackageUtil.createPackage(tmpDirectory2.getAbsolutePath(), gcsStaging, null);
+        PackageUtil.createPackage(tmpDirectory2.getAbsolutePath(), gcsStaging.toString(), null);
 
     assertFalse(target1.getName().equals(target2.getName()));
     assertFalse(target1.getLocation().equals(target2.getLocation()));
@@ -150,9 +159,9 @@ public void testPackageNamingWithDirectoriesHavingSameContentsButDifferentNames(
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
 
     DataflowPackage target1 =
-        PackageUtil.createPackage(tmpDirectory1.getAbsolutePath(), gcsStaging, null);
+        PackageUtil.createPackage(tmpDirectory1.getAbsolutePath(), gcsStaging.toString(), null);
     DataflowPackage target2 =
-        PackageUtil.createPackage(tmpDirectory2.getAbsolutePath(), gcsStaging, null);
+        PackageUtil.createPackage(tmpDirectory2.getAbsolutePath(), gcsStaging.toString(), null);
 
     assertFalse(target1.getName().equals(target2.getName()));
     assertFalse(target1.getLocation().equals(target2.getLocation()));
@@ -167,8 +176,8 @@ public void testPackageUploadWithFileSucceeds() throws Exception {
     when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(-1L);
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
-    List<DataflowPackage> targets = PackageUtil.stageClasspathElementsToGcs(mockGcsUtil,
-        ImmutableList.of(tmpFile.getAbsolutePath()), gcsStaging);
+    List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
+        ImmutableList.of(tmpFile.getAbsolutePath()), gcsStaging.toString());
     DataflowPackage target = Iterables.getOnlyElement(targets);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -176,7 +185,7 @@ public void testPackageUploadWithFileSucceeds() throws Exception {
     verifyNoMoreInteractions(mockGcsUtil);
 
     assertEquals("file-SAzzqSB2zmoIgNHC9A2G0A.txt", target.getName());
-    assertEquals("storage.googleapis.com/somebucket/base/path/file-SAzzqSB2zmoIgNHC9A2G0A.txt",
+    assertEquals("gs://somebucket/base/path/file-SAzzqSB2zmoIgNHC9A2G0A.txt",
         target.getLocation());
     assertEquals("This is a test!",
         new LineReader(Channels.newReader(pipe.source(), "UTF-8")).readLine());
@@ -197,8 +206,8 @@ public void testPackageUploadWithDirectorySucceeds() throws Exception {
     when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(-1L);
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
-    PackageUtil.stageClasspathElementsToGcs(mockGcsUtil,
-        ImmutableList.of(tmpDirectory.getAbsolutePath()), gcsStaging);
+    PackageUtil.stageClasspathElements(
+        ImmutableList.of(tmpDirectory.getAbsolutePath()), gcsStaging.toString());
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
     verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -224,8 +233,8 @@ public void testPackageUploadWithEmptyDirectorySucceeds() throws Exception {
     when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(-1L);
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
-    List<DataflowPackage> targets = PackageUtil.stageClasspathElementsToGcs(mockGcsUtil,
-        ImmutableList.of(tmpDirectory.getAbsolutePath()), gcsStaging);
+    List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
+        ImmutableList.of(tmpDirectory.getAbsolutePath()), gcsStaging.toString());
     DataflowPackage target = Iterables.getOnlyElement(targets);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -233,7 +242,7 @@ public void testPackageUploadWithEmptyDirectorySucceeds() throws Exception {
     verifyNoMoreInteractions(mockGcsUtil);
 
     assertEquals("folder-wstW9MW_ZW-soJhufroDCA.zip", target.getName());
-    assertEquals("storage.googleapis.com/somebucket/base/path/folder-wstW9MW_ZW-soJhufroDCA.zip",
+    assertEquals("gs://somebucket/base/path/folder-wstW9MW_ZW-soJhufroDCA.zip",
         target.getLocation());
     assertNull(new ZipInputStream(Channels.newInputStream(pipe.source())).getNextEntry());
   }
@@ -248,8 +257,9 @@ public void testPackageUploadFailsWhenIOExceptionThrown() throws Exception {
         .thenThrow(new IOException("Fake Exception: Upload error"));
 
     try {
-      PackageUtil.stageClasspathElementsToGcs(mockGcsUtil,
-          ImmutableList.of(tmpFile.getAbsolutePath()), gcsStaging, fastNanoClockAndSleeper);
+      PackageUtil.stageClasspathElements(
+          ImmutableList.of(tmpFile.getAbsolutePath()),
+          gcsStaging.toString(), fastNanoClockAndSleeper);
     } finally {
       verify(mockGcsUtil).fileSize(any(GcsPath.class));
       verify(mockGcsUtil, times(5)).create(any(GcsPath.class), anyString());
@@ -269,9 +279,9 @@ public void testPackageUploadEventuallySucceeds() throws Exception {
         .thenReturn(pipe.sink());                               // second attempt succeeds
 
     try {
-      PackageUtil.stageClasspathElementsToGcs(mockGcsUtil,
+      PackageUtil.stageClasspathElements(
                                               ImmutableList.of(tmpFile.getAbsolutePath()),
-                                              gcsStaging,
+                                              gcsStaging.toString(),
                                               fastNanoClockAndSleeper);
     } finally {
       verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -287,8 +297,8 @@ public void testPackageUploadIsSkippedWhenFileAlreadyExists() throws Exception {
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
     when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(tmpFile.length());
 
-    PackageUtil.stageClasspathElementsToGcs(mockGcsUtil,
-        ImmutableList.of(tmpFile.getAbsolutePath()), gcsStaging);
+    PackageUtil.stageClasspathElements(
+        ImmutableList.of(tmpFile.getAbsolutePath()), gcsStaging.toString());
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
     verifyNoMoreInteractions(mockGcsUtil);
@@ -308,8 +318,8 @@ public void testPackageUploadIsNotSkippedWhenSizesAreDifferent() throws Exceptio
     when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(Long.MAX_VALUE);
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
-    PackageUtil.stageClasspathElementsToGcs(mockGcsUtil,
-        ImmutableList.of(tmpDirectory.getAbsolutePath()), gcsStaging);
+    PackageUtil.stageClasspathElements(
+        ImmutableList.of(tmpDirectory.getAbsolutePath()), gcsStaging.toString());
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
     verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -327,8 +337,8 @@ public void testPackageUploadWithExplicitPackageName() throws Exception {
     when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(-1L);
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
-    List<DataflowPackage> targets = PackageUtil.stageClasspathElementsToGcs(mockGcsUtil,
-        ImmutableList.of(overriddenName + "=" + tmpFile.getAbsolutePath()), gcsStaging);
+    List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
+        ImmutableList.of(overriddenName + "=" + tmpFile.getAbsolutePath()), gcsStaging.toString());
     DataflowPackage target = Iterables.getOnlyElement(targets);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -336,7 +346,7 @@ public void testPackageUploadWithExplicitPackageName() throws Exception {
     verifyNoMoreInteractions(mockGcsUtil);
 
     assertEquals(overriddenName, target.getName());
-    assertEquals("storage.googleapis.com/somebucket/base/path/file-SAzzqSB2zmoIgNHC9A2G0A.txt",
+    assertEquals("gs://somebucket/base/path/file-SAzzqSB2zmoIgNHC9A2G0A.txt",
         target.getLocation());
   }
 

From 3f6f308c6b000255dd880aff74129335e2ae933e Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 12 May 2015 23:14:42 -0700
Subject: [PATCH 0539/1541] Expose Accumulating Mode via the Window.into
 operation.

Add a simple integration test demonstrating that elements are in fact
accumulated.

----Release Notes----

Add support for accumulating elements within a window across trigger
firings.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93490525
---
 .../sdk/transforms/windowing/Window.java      | 37 ++++++++--------
 .../sdk/util/PartitionBufferingWindowSet.java |  3 +-
 .../dataflow/sdk/util/TriggerExecutor.java    | 42 +++++++++++++------
 .../dataflow/sdk/util/WindowingStrategy.java  | 31 +++++++++-----
 .../sdk/util/TriggerExecutorTest.java         |  2 +-
 5 files changed, 74 insertions(+), 41 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 2e1286464534d..6eda62613b145 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
@@ -25,10 +26,10 @@
 import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
-import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
@@ -205,20 +206,10 @@ public Unbound named(String name) {
      * but more properties can still be specified.
      */
     public <T> Bound<T> into(WindowFn<? super T, ?> fn) {
-      return new Bound<>(name, createWindowingStrategy(fn, DefaultTrigger.of()));
+      return new Bound<>(name, WindowingStrategy.of(fn));
     }
   }
 
-  private static <T, W extends BoundedWindow> WindowingStrategy<? super T, ?>
-    createWindowingStrategy(WindowFn<? super T, ?> fn, Trigger<?> trigger) {
-    @SuppressWarnings("unchecked")
-    WindowFn<? super T, W> typedFn = (WindowFn<? super T, W>) fn;
-    @SuppressWarnings("unchecked")
-    Trigger<W> typedTrigger = (Trigger<W>) trigger;
-
-    return WindowingStrategy.of(typedFn, ExecutableTrigger.create(typedTrigger));
-  }
-
   /**
    * A {@code PTransform} that windows the elements of a {@code PCollection<T>},
    * into finite windows according to a user-specified {@code WindowFn<T, B>}.
@@ -258,8 +249,7 @@ public Bound<T> named(String name) {
      */
     @Experimental(Experimental.Kind.TRIGGER)
     public Triggering<T> triggering(Trigger<?> trigger) {
-      return new Triggering<T>(name,
-          createWindowingStrategy(windowingStrategy.getWindowFn(), trigger));
+      return new Triggering<T>(name, windowingStrategy.withTrigger(trigger));
     }
 
     @Override
@@ -319,7 +309,21 @@ public static class Triggering<T> {
      * specified to be applied, but more properties can still be specified.
      */
     public Bound<T> discardingFiredPanes() {
-      return new Bound<>(name, windowingStrategy);
+      return new Bound<>(
+          name, windowingStrategy.withMode(AccumulationMode.DISCARDING_FIRED_PANES));
+    }
+
+    /**
+     * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
+     * Triggering behavior, and that accumulates elements in a pane after they are triggered.
+     *
+     * <p> Does not modify this transform.  The resulting {@code PTransform} is sufficiently
+     * specified to be applied, but more properties can still be specified.
+     */
+    @Experimental(Kind.TRIGGER)
+    public Bound<T> accumulatingFiredPanes() {
+      return new Bound<>(
+          name, windowingStrategy.withMode(AccumulationMode.ACCUMULATING_FIRED_PANES));
     }
   }
 
@@ -358,8 +362,7 @@ private <W extends BoundedWindow> WindowingStrategy<?, W> getOutputWindowing(
       if (inputStrategy.getWindowFn() instanceof InvalidWindows) {
         @SuppressWarnings("unchecked")
         InvalidWindows<W> invalidWindows = (InvalidWindows<W>) inputStrategy.getWindowFn();
-        return WindowingStrategy.of(
-            invalidWindows.getOriginalWindowFn(), inputStrategy.getTrigger());
+        return inputStrategy.withWindowFn(invalidWindows.getOriginalWindowFn());
       } else {
         return inputStrategy;
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
index f25b9c56f4c9a..55a14f0e42fd6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.common.collect.ImmutableList;
 
 import java.util.Collection;
 
@@ -99,6 +100,6 @@ protected Iterable<V> finalValue(W window) throws Exception {
       return null;
     }
 
-    return result;
+    return ImmutableList.copyOf(result);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index be6094cab1f3d..d19d1366b70c6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -212,15 +212,19 @@ public void onElement(WindowedValue<InputT> value) throws Exception {
       OnElementEvent<W> e =
           new OnElementEvent<W>(value.getValue(), value.getTimestamp(), window, status);
 
+      // Update the trigger state as appropriate for the arrival of the element.
+      // Must come before merge so the state is updated (for merging).
       TriggerResult result = trigger.invokeElement(context(finishedSet), e);
 
       // Make sure we merge before firing, in case a larger window is produced
+      boolean stillExists = true;
       if (result.isFire()) {
-        merge();
+        stillExists = mergeIfAppropriate(window);
       }
 
-      // Only invoke handleResult if the window is still active after merging.
-      if (windowFn.isNonMerging() || windowSet.contains(window)) {
+      // Only invoke handleResult if the window is still active after merging. If not, the
+      // merge should have taken care of any firing behaviors that needed to happen.
+      if (stillExists) {
         handleResult(trigger, window, originalFinishedSet, finishedSet, result);
       }
     }
@@ -243,12 +247,7 @@ public void onTimer(String timerTag) throws Exception {
 
     // Attempt to merge windows before continuing; that may remove the current window from
     // consideration.
-    merge();
-
-    // The WindowSet used with PartitioningWindowFn doesn't support contains, but it will never
-    // merge windows in a way that causes the timer to no longer be applicable. Otherwise, we
-    // confirm that the window is still in the windowSet.
-    if (windowFn.isNonMerging() || windowSet.contains(window)) {
+    if (mergeIfAppropriate(window)) {
       TriggerResult result = trigger.invokeTimer(
           context(finishedSet), new OnTimerEvent<W>(triggerId));
       handleResult(trigger, window, originalFinishedSet, finishedSet, result);
@@ -300,8 +299,22 @@ private void onMerge(Collection<W> toBeMerged, W resultWindow) throws Exception
     }
   }
 
+  /**
+   * Invoke merge if the windowFn supports it, and return a boolean indicating whether the window
+   * still exists.
+   */
+  private boolean mergeIfAppropriate(W window) throws Exception {
+    if (windowFn.isNonMerging()) {
+      // These never merge so the window won't disappear.
+      return true;
+    } else {
+      windowFn.mergeWindows(mergeContext);
+      return window != null && windowSet.contains(window);
+    }
+  }
+
   public void merge() throws Exception {
-    windowFn.mergeWindows(mergeContext);
+    mergeIfAppropriate(null);
   }
 
   private void handleResult(
@@ -309,13 +322,15 @@ private void handleResult(
       BitSet originalFinishedSet, BitSet finishedSet, TriggerResult result) throws Exception {
     if (result.isFire()) {
       emitWindow(window);
+
+      // Clear the hold each time we fire, so that the hold is based on elements in the pane.
+      watermarkHold.clearHold(window);
     }
 
     if (result.isFinish()
         || (mode == AccumulationMode.DISCARDING_FIRED_PANES && result.isFire())) {
       // Remove the window from management (assume it is "done")
       windowSet.remove(window);
-      watermarkHold.clearHold(window);
     }
 
     // If the trigger is finished, we can clear out its state as long as we keep the
@@ -360,7 +375,10 @@ private void emitWindow(W window) throws Exception {
   private class WatermarkHold {
 
     public Instant lookupEarliestElement(W window) throws IOException {
-      return keyedState.lookup(earliestElementTag(window));
+      // Normally, output at the earliest element in the pane.
+      // If the pane is empty, window.maxTimestamp.
+      Instant earliest = keyedState.lookup(earliestElementTag(window));
+      return earliest == null ? window.maxTimestamp() : earliest;
     }
 
     public void updateHoldForElement(W window, Instant timestamp) throws IOException {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
index 463808dc81715..6ee77ba2b4888 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
@@ -50,11 +50,13 @@ public enum AccumulationMode {
 
   private final WindowFn<T, W> windowFn;
   private final ExecutableTrigger<W> trigger;
-  private final AccumulationMode mode = AccumulationMode.DISCARDING_FIRED_PANES;
+  private final AccumulationMode mode;
 
-  private WindowingStrategy(WindowFn<T, W> windowFn, ExecutableTrigger<W> trigger) {
+  private WindowingStrategy(
+      WindowFn<T, W> windowFn, ExecutableTrigger<W> trigger, AccumulationMode mode) {
     this.windowFn = windowFn;
     this.trigger = trigger;
+    this.mode = mode;
   }
 
   public static WindowingStrategy<Object, GlobalWindow> globalDefault() {
@@ -66,16 +68,25 @@ public static WindowingStrategy<Object, GlobalWindow> globalDefault() {
    * {@link DefaultTrigger}.
    */
   public static <T, W extends BoundedWindow> WindowingStrategy<T, W> of(WindowFn<T, W> windowFn) {
-    DefaultTrigger<W> defaultTrigger = DefaultTrigger.of();
-    return of(windowFn, ExecutableTrigger.create(defaultTrigger));
+    ExecutableTrigger<W> defaultTrigger = ExecutableTrigger.create(DefaultTrigger.<W>of());
+    return new WindowingStrategy<>(
+        windowFn, defaultTrigger, AccumulationMode.DISCARDING_FIRED_PANES);
   }
 
-  /**
-   * Create a {@code WindowingStrategy} for the given {@code windowFn} and {@code trigger}.
-   */
-  public static <T, W extends BoundedWindow> WindowingStrategy<T, W> of(
-      WindowFn<T, W> windowFn, ExecutableTrigger<W> trigger) {
-    return new WindowingStrategy<>(windowFn, trigger);
+  public WindowingStrategy<T, W> withTrigger(Trigger<?> wildcardTrigger) {
+    @SuppressWarnings("unchecked")
+    Trigger<W> trigger = (Trigger<W>) wildcardTrigger;
+    return new WindowingStrategy<T, W>(windowFn, ExecutableTrigger.create(trigger), mode);
+  }
+
+  public WindowingStrategy<T, W> withMode(AccumulationMode mode) {
+    return new WindowingStrategy<T, W>(windowFn, trigger, mode);
+  }
+
+  public <T> WindowingStrategy<T, W> withWindowFn(WindowFn<?, ?> wildcardWindowFn) {
+    @SuppressWarnings("unchecked")
+    WindowFn<T, W> windowFn = (WindowFn<T, W>) wildcardWindowFn;
+    return new WindowingStrategy<T, W>(windowFn, trigger, mode);
   }
 
   public WindowFn<T, W> getWindowFn() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index 51a792c9b0ba9..1a777a4992475 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -111,7 +111,7 @@ public void testOnElementBufferingAccumulating() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10)));
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 3, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
     assertFalse(tester.isWindowActive(firstWindow));

From 3e7961cd7934b927fe2dd65454a2f0fb7e224d45 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 13 May 2015 00:44:34 -0700
Subject: [PATCH 0540/1541] Refactor the existing WindowSets.

Rename PartitionBufferingWindowSet to NonMergingWindowSet, to more
accurately reflect the use cases it is tuned for.

Allow NonMergingWindowSet to be used regardless of the kind of trigger, and
in batch mode when the ViaIterators strategy is not known to work.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93494606
---
 .../dataflow/sdk/util/BufferingWindowSet.java |  1 +
 .../dataflow/sdk/util/CombiningWindowSet.java |  3 +-
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  3 ++
 ...java => NonMergingBufferingWindowSet.java} | 17 ++++---
 .../util/StreamingGroupAlsoByWindowsDoFn.java |  9 +---
 .../dataflow/sdk/util/TriggerTester.java      | 44 +++++++++----------
 .../transforms/windowing/AfterPaneTest.java   |  8 ++--
 .../sdk/transforms/windowing/TriggerTest.java |  3 +-
 8 files changed, 43 insertions(+), 45 deletions(-)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/util/{PartitionBufferingWindowSet.java => NonMergingBufferingWindowSet.java} (84%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
index c9e2c9daeee35..f6e1bdfc96069 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
@@ -186,6 +186,7 @@ public CodedTupleTag<V> apply(W input) {
         })
         .toList();
 
+    // The FluentIterable#toList creates a copy, so the result will never be mutated.
     List<V> toEmit = FluentIterable
         .from(windowingInternals.readTagList(bufferTags).values())
         .transformAndConcat(Functions.<Iterable<V>>identity())
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
index dcb313279c7c0..415f35716ca45 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
@@ -86,8 +86,7 @@ protected CombiningWindowSet(
     super(key, windowCoder, inputValueCoder, keyedState, windowingInternals);
     this.combineFn = combineFn;
     liveWindows = new HashSet<W>();
-    Iterators.addAll(liveWindows,
-                     emptyIfNull(keyedState.lookup(windowListTag)).iterator());
+    Iterators.addAll(liveWindows, emptyIfNull(keyedState.lookup(windowListTag)).iterator());
     liveWindowsModified = false;
     // TODO: Use the pipeline's registry once the TODO in GroupByKey is resolved.
     CoderRegistry coderRegistry = new CoderRegistry();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index e34ce5e053b67..3872419ebb2d3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -55,6 +55,9 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
         && windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger
         && windowingStrategy.getMode() == AccumulationMode.DISCARDING_FIRED_PANES) {
       return new GroupAlsoByWindowsViaIteratorsDoFn<K, V, W>();
+    } else if (windowingStrategy.getWindowFn().isNonMerging()) {
+      return new GABWViaWindowSetDoFn<>(
+          windowingStrategy, NonMergingBufferingWindowSet.<K, V, W>factory(inputCoder));
     } else {
       return new GABWViaWindowSetDoFn<>(
           windowingStrategy, BufferingWindowSet.<K, V, W>factory(inputCoder));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingBufferingWindowSet.java
similarity index 84%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingBufferingWindowSet.java
index 55a14f0e42fd6..4611789bf422b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PartitionBufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingBufferingWindowSet.java
@@ -28,14 +28,11 @@
 import java.util.Collection;
 
 /**
- * A WindowSet where each value is placed in exactly one window,
- * and windows are never merged, deleted, or flushed early, and the
- * WindowSet itself is never exposed to user code, allowing
- * a much simpler (and cheaper) implementation.
- *
- * <p>This WindowSet only works with {@link StreamingGroupAlsoByWindowsDoFn}.
+ * A WindowSet where windows are never merged or deleted. This allows us to improve upon the default
+ * {@link BufferingWindowSet} by not maintaining a merge tree (or the list of active windows at all)
+ * and by blindly using tag lists to store elements.
  */
-class PartitionBufferingWindowSet<K, V, W extends BoundedWindow>
+class NonMergingBufferingWindowSet<K, V, W extends BoundedWindow>
     extends AbstractWindowSet<K, V, Iterable<V>, W> {
 
   public static <K, V, W extends BoundedWindow>
@@ -48,13 +45,13 @@ AbstractWindowSet.Factory<K, V, Iterable<V>, W> factory(final Coder<V> inputCode
       public AbstractWindowSet<K, V, Iterable<V>, W> create(K key,
           Coder<W> windowFn, KeyedState keyedState,
           WindowingInternals<?, ?> windowingInternals) throws Exception {
-        return new PartitionBufferingWindowSet<>(
+        return new NonMergingBufferingWindowSet<>(
             key, windowFn, inputCoder, keyedState, windowingInternals);
       }
     };
   }
 
-  private PartitionBufferingWindowSet(
+  private NonMergingBufferingWindowSet(
       K key,
       Coder<W> windowCoder,
       Coder<V> inputCoder,
@@ -100,6 +97,8 @@ protected Iterable<V> finalValue(W window) throws Exception {
       return null;
     }
 
+    // Create a copy here, since otherwise we may return the same list object from readTagList, and
+    // that may be mutated later, which would lead to mutation of output values.
     return ImmutableList.copyOf(result);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index cd25aa8d1ff78..f62a094bd18c8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -20,7 +20,6 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.util.AbstractWindowSet.Factory;
 import com.google.cloud.dataflow.sdk.util.TriggerExecutor.TimerManager;
@@ -57,13 +56,9 @@ StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> create(
   StreamingGroupAlsoByWindowsDoFn<K, InputT, Iterable<InputT>, W> createForIterable(
       final WindowingStrategy<?, W> windowingStrategy,
       final Coder<InputT> inputValueCoder) {
-    if (windowingStrategy.getWindowFn().assignsToSingleWindow()
-        && windowingStrategy.getWindowFn().isNonMerging()
-        // TODO: Characterize the other kinds of triggers that work with the
-        // PartitioningBufferingWindowSet
-        && windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger) {
+    if (windowingStrategy.getWindowFn().isNonMerging()) {
       return new StreamingGABWViaWindowSetDoFn<>(windowingStrategy,
-          PartitionBufferingWindowSet.<K, InputT, W>factory(inputValueCoder));
+          NonMergingBufferingWindowSet.<K, InputT, W>factory(inputValueCoder));
     } else {
       return new StreamingGABWViaWindowSetDoFn<>(windowingStrategy,
           BufferingWindowSet.<K, InputT, W>factory(inputValueCoder));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index a73184aed48d0..4d3d69085a571 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -18,8 +18,10 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
@@ -49,6 +51,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.PriorityQueue;
+import java.util.Set;
 import java.util.logging.Logger;
 
 import javax.annotation.Nullable;
@@ -93,35 +96,29 @@ public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>
     @SuppressWarnings("unchecked")
     WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
 
-    AbstractWindowSet.Factory<String, Integer, Iterable<Integer>, W> windowSetFactory =
-        BufferingWindowSet.<String, Integer, W>factory(VarIntCoder.of());
-
-    return new TriggerTester<Integer, Iterable<Integer>, W>(
-        objectWindowFn, trigger, windowSetFactory, mode);
-  }
-
-  public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>, W>
-    partitionBuffering(WindowFn<?, W> windowFn, Trigger<W> trigger, AccumulationMode mode)
-        throws Exception {
-    @SuppressWarnings("unchecked")
-    WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
-
-    AbstractWindowSet.Factory<String, Integer, Iterable<Integer>, W> windowSetFactory =
-        PartitionBufferingWindowSet.<String, Integer, W>factory(VarIntCoder.of());
+    AbstractWindowSet.Factory<String, Integer, Iterable<Integer>, W> windowSetFactory;
+    if (windowFn.isNonMerging()) {
+      windowSetFactory = NonMergingBufferingWindowSet.<String, Integer, W>factory(VarIntCoder.of());
+    } else {
+      windowSetFactory = BufferingWindowSet.<String, Integer, W>factory(VarIntCoder.of());
+    }
 
     return new TriggerTester<Integer, Iterable<Integer>, W>(
         objectWindowFn, trigger, windowSetFactory, mode);
   }
 
-  public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>, W> combining(
-      WindowFn<?, W> windowFn, Trigger<W> trigger, AccumulationMode mode) throws Exception {
+  public static <W extends BoundedWindow, AccumT, OutputT>
+      TriggerTester<Integer, OutputT, W> combining(
+          WindowFn<?, W> windowFn, Trigger<W> trigger, AccumulationMode mode,
+          KeyedCombineFn<String, Integer, AccumT, OutputT> combineFn) throws Exception {
     @SuppressWarnings("unchecked")
     WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
 
-    AbstractWindowSet.Factory<String, Integer, Iterable<Integer>, W> windowSetFactory =
-        BufferingWindowSet.<String, Integer, W>factory(VarIntCoder.of());
+    AbstractWindowSet.Factory<String, Integer, OutputT, W> windowSetFactory =
+        CombiningWindowSet.<String, Integer, AccumT, OutputT, W>factory(
+            combineFn, StringUtf8Coder.of(), VarIntCoder.of());
 
-    return new TriggerTester<Integer, Iterable<Integer>, W>(
+    return new TriggerTester<Integer, OutputT, W>(
         objectWindowFn, trigger, windowSetFactory, mode);
   }
 
@@ -171,8 +168,9 @@ public String earliestElement(W window) throws CoderException {
     return triggerExecutor.earliestElementTag(window).getId();
   }
 
-  public boolean isWindowActive(W window) {
-    return windowSet.contains(window);
+  public boolean isWindowActive(W window) throws IOException {
+    return stubContexts.getKeyedStateInUse()
+        .contains(WindowUtils.bufferTag(window, windowFn.windowCoder(), VarIntCoder.of()).getId());
   }
 
   /**
@@ -251,7 +249,7 @@ public void outputWindowedValue(KV<String, OutputT> output, Instant timestamp,
       outputs.add(value);
     }
 
-    public Iterable<String> getKeyedStateInUse() {
+    public Set<String> getKeyedStateInUse() {
       return FluentIterable
           .from(tagListValues.keySet())
           .append(tagValues.keySet())
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
index df68029f22d70..8083ac597f99d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
@@ -22,6 +22,7 @@
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.WindowMatchers;
+import com.google.cloud.dataflow.sdk.transforms.Sum.SumIntegerFn;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
@@ -40,16 +41,17 @@ public class AfterPaneTest {
   @Test
   public void testAfterPaneWithGlobalWindowsAndCombining() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.combining(
+    TriggerTester<Integer, Integer, IntervalWindow> tester = TriggerTester.combining(
         FixedWindows.of(windowDuration),
         AfterPane.<IntervalWindow>elementCountAtLeast(2),
-        AccumulationMode.DISCARDING_FIRED_PANES);
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        new SumIntegerFn().<String>asKeyedFn());
 
     tester.injectElement(1, new Instant(1));
     tester.injectElement(2, new Instant(9));
 
     assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+        WindowMatchers.isSingleWindowedValue(Matchers.equalTo(3), 1, 0, 10)));
 
     // This element should not be output because that trigger (which was one-time) has already
     // gone off.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
index 00f2a38d85c6b..61f0c135df496 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
@@ -159,7 +159,8 @@ public void testOnTimerFiresWithUntil() throws Exception {
     tester.advanceWatermark(new Instant(13));
 
     assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
+        isSingleWindowedValue(Matchers.emptyIterable(), 9, 0, 10)));
     assertTrue(tester.isDone(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
         tester.finishedSet(firstWindow)));

From 7356c93f0b4ca5acf25df49f0ba170b254c837ad Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 13 May 2015 01:07:46 -0700
Subject: [PATCH 0541/1541] Centralize the choice of what window set to use.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93495900
---
 .../dataflow/sdk/util/AbstractWindowSet.java  | 13 ++++++++
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  9 ++----
 .../util/NonMergingBufferingWindowSet.java    |  3 +-
 .../util/StreamingGroupAlsoByWindowsDoFn.java | 15 +++------
 .../dataflow/sdk/util/TriggerTester.java      | 31 ++++++++++++-------
 .../transforms/windowing/AfterAllTest.java    |  4 +--
 .../transforms/windowing/AfterEachTest.java   |  4 +--
 .../transforms/windowing/AfterFirstTest.java  |  4 +--
 .../transforms/windowing/AfterPaneTest.java   |  8 +++--
 .../windowing/AfterProcessingTimeTest.java    |  4 +--
 .../windowing/AfterWatermarkTest.java         |  8 ++---
 .../windowing/DefaultTriggerTest.java         |  8 ++---
 .../transforms/windowing/RepeatedlyTest.java  |  3 +-
 .../sdk/transforms/windowing/TriggerTest.java |  5 +--
 .../sdk/util/TriggerExecutorTest.java         |  2 +-
 15 files changed, 69 insertions(+), 52 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
index 0daee3bb3f3a5..6180e2b2a3b66 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
@@ -40,6 +40,19 @@ public AbstractWindowSet<K, InputT, OutputT, W> create(
         WindowingInternals<?, ?> windowingInternals) throws Exception;
   }
 
+  /**
+   * Return the {@code AbstractWindowSet.Factory} that will produce the appropriate kind of window
+   * set for the given windowing strategy.
+   */
+  public static <K, V, W extends BoundedWindow> Factory<K, V, Iterable<V>, W> factoryFor(
+      WindowingStrategy<?, W> windowingStrategy, Coder<V> inputCoder) {
+    if (windowingStrategy.getWindowFn().isNonMerging()) {
+      return NonMergingBufferingWindowSet.<K, V, W>factory(inputCoder);
+    } else {
+      return BufferingWindowSet.<K, V, W>factory(inputCoder);
+    }
+  }
+
   protected final K key;
   protected final Coder<W> windowCoder;
   protected final Coder<InputT> inputCoder;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 3872419ebb2d3..f985d154f2ae4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -55,13 +55,10 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
         && windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger
         && windowingStrategy.getMode() == AccumulationMode.DISCARDING_FIRED_PANES) {
       return new GroupAlsoByWindowsViaIteratorsDoFn<K, V, W>();
-    } else if (windowingStrategy.getWindowFn().isNonMerging()) {
-      return new GABWViaWindowSetDoFn<>(
-          windowingStrategy, NonMergingBufferingWindowSet.<K, V, W>factory(inputCoder));
-    } else {
-      return new GABWViaWindowSetDoFn<>(
-          windowingStrategy, BufferingWindowSet.<K, V, W>factory(inputCoder));
     }
+
+    return new GABWViaWindowSetDoFn<>(
+        windowingStrategy, AbstractWindowSet.<K, V, W>factoryFor(windowingStrategy, inputCoder));
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingBufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingBufferingWindowSet.java
index 4611789bf422b..9f9c41b4b7155 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingBufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingBufferingWindowSet.java
@@ -86,7 +86,8 @@ public Collection<W> windows() {
 
   @Override
   public boolean contains(W window) {
-    throw new UnsupportedOperationException();
+    throw new UnsupportedOperationException(
+        "NonMergingBufferingWindowSet does not supporting reading the active window set.");
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index f62a094bd18c8..5c854c08953d7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -52,17 +52,12 @@ StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> create(
             combineFn, keyCoder, inputValueCoder));
   }
 
-  public static <K, InputT, W extends BoundedWindow>
-  StreamingGroupAlsoByWindowsDoFn<K, InputT, Iterable<InputT>, W> createForIterable(
+  public static <K, V, W extends BoundedWindow>
+  StreamingGroupAlsoByWindowsDoFn<K, V, Iterable<V>, W> createForIterable(
       final WindowingStrategy<?, W> windowingStrategy,
-      final Coder<InputT> inputValueCoder) {
-    if (windowingStrategy.getWindowFn().isNonMerging()) {
-      return new StreamingGABWViaWindowSetDoFn<>(windowingStrategy,
-          NonMergingBufferingWindowSet.<K, InputT, W>factory(inputValueCoder));
-    } else {
-      return new StreamingGABWViaWindowSetDoFn<>(windowingStrategy,
-          BufferingWindowSet.<K, InputT, W>factory(inputValueCoder));
-    }
+      final Coder<V> inputCoder) {
+    return new StreamingGABWViaWindowSetDoFn<>(
+        windowingStrategy, AbstractWindowSet.<K, V, W>factoryFor(windowingStrategy, inputCoder));
   }
 
   private static class StreamingGABWViaWindowSetDoFn<K, InputT, OutputT, W extends BoundedWindow>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 4d3d69085a571..5c6245b39c227 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -18,6 +18,8 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
@@ -80,6 +82,7 @@ public class TriggerTester<InputT, OutputT, W extends BoundedWindow> {
   private final WindowFn<Object, W> windowFn;
   private final StubContexts stubContexts;
   private final AbstractWindowSet<String, InputT, OutputT, W> windowSet;
+  private final Coder<OutputT> outputCoder;
 
   private static final String KEY = "TEST_KEY";
   private boolean logInteractions = false;
@@ -91,26 +94,25 @@ private void logInteraction(String fmt, Object... args) {
     }
   }
 
-  public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>, W> buffering(
+  public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>, W> nonCombining(
       WindowFn<?, W> windowFn, Trigger<W> trigger, AccumulationMode mode) throws Exception {
     @SuppressWarnings("unchecked")
     WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
 
-    AbstractWindowSet.Factory<String, Integer, Iterable<Integer>, W> windowSetFactory;
-    if (windowFn.isNonMerging()) {
-      windowSetFactory = NonMergingBufferingWindowSet.<String, Integer, W>factory(VarIntCoder.of());
-    } else {
-      windowSetFactory = BufferingWindowSet.<String, Integer, W>factory(VarIntCoder.of());
-    }
+    WindowingStrategy<?, W> strategy =
+        WindowingStrategy.of(windowFn).withTrigger(trigger).withMode(mode);
+    AbstractWindowSet.Factory<String, Integer, Iterable<Integer>, W> windowSetFactory =
+        AbstractWindowSet.<String, Integer, W>factoryFor(strategy, VarIntCoder.of());
 
     return new TriggerTester<Integer, Iterable<Integer>, W>(
-        objectWindowFn, trigger, windowSetFactory, mode);
+        objectWindowFn, trigger, windowSetFactory, mode, IterableCoder.of(VarIntCoder.of()));
   }
 
   public static <W extends BoundedWindow, AccumT, OutputT>
       TriggerTester<Integer, OutputT, W> combining(
           WindowFn<?, W> windowFn, Trigger<W> trigger, AccumulationMode mode,
-          KeyedCombineFn<String, Integer, AccumT, OutputT> combineFn) throws Exception {
+          KeyedCombineFn<String, Integer, AccumT, OutputT> combineFn,
+          Coder<OutputT> outputCoder) throws Exception {
     @SuppressWarnings("unchecked")
     WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
 
@@ -119,18 +121,20 @@ TriggerTester<Integer, OutputT, W> combining(
             combineFn, StringUtf8Coder.of(), VarIntCoder.of());
 
     return new TriggerTester<Integer, OutputT, W>(
-        objectWindowFn, trigger, windowSetFactory, mode);
+        objectWindowFn, trigger, windowSetFactory, mode, outputCoder);
   }
 
   private TriggerTester(
       WindowFn<Object, W> windowFn,
       Trigger<W> trigger,
       AbstractWindowSet.Factory<String, InputT, OutputT, W> windowSetFactory,
-      AccumulationMode mode) throws Exception {
+      AccumulationMode mode,
+      Coder<OutputT> outputCoder) throws Exception {
     this.windowFn = windowFn;
     this.stubContexts = new StubContexts();
     this.windowSet = windowSetFactory.create(
         KEY, windowFn.windowCoder(), stubContexts, stubContexts);
+    this.outputCoder = outputCoder;
     executableTrigger = ExecutableTrigger.create(trigger);
     this.triggerExecutor = new TriggerExecutor<>(
         windowFn, timerManager, executableTrigger, stubContexts, stubContexts, windowSet, mode);
@@ -244,7 +248,10 @@ private class StubContexts
     @Override
     public void outputWindowedValue(KV<String, OutputT> output, Instant timestamp,
         Collection<? extends BoundedWindow> windows) {
-      WindowedValue<KV<String, OutputT>> value = WindowedValue.of(output, timestamp, windows);
+      // Copy the output value (using coders) before capturing it.
+      KV<String, OutputT> copy = SerializableUtils.<KV<String, OutputT>>ensureSerializableByCoder(
+          KvCoder.of(StringUtf8Coder.of(), outputCoder), output, "outputForWindow");
+      WindowedValue<KV<String, OutputT>> value = WindowedValue.of(copy, timestamp, windows);
       logInteraction("Outputting: %s", value);
       outputs.add(value);
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index 78caaa1d1a0ab..6b72144642c03 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -60,7 +60,7 @@ public class AfterAllTest {
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
-    tester = TriggerTester.buffering(
+    tester = TriggerTester.nonCombining(
         windowFn, AfterAll.of(mockTrigger1, mockTrigger2), AccumulationMode.DISCARDING_FIRED_PANES);
     executable1 = tester.getTrigger().subTriggers().get(0);
     executable2 = tester.getTrigger().subTriggers().get(1);
@@ -195,7 +195,7 @@ public void testFireDeadline() throws Exception {
 
   @Test
   public void testAfterAllRealTriggersFixedWindow() throws Exception {
-    tester = TriggerTester.buffering(FixedWindows.of(Duration.millis(50)),
+    tester = TriggerTester.nonCombining(FixedWindows.of(Duration.millis(50)),
         Repeatedly.<IntervalWindow>forever(
             AfterAll.<IntervalWindow>of(
                 AfterPane.<IntervalWindow>elementCountAtLeast(5),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index 64b1212814a1f..c3f0a0413a026 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -59,7 +59,7 @@ public class AfterEachTest {
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
-    tester = TriggerTester.buffering(
+    tester = TriggerTester.nonCombining(
         windowFn, AfterEach.inOrder(mockTrigger1, mockTrigger2),
         AccumulationMode.DISCARDING_FIRED_PANES);
     executable1 = tester.getTrigger().subTriggers().get(0);
@@ -226,7 +226,7 @@ public void testFireDeadline() throws Exception {
 
   @Test
   public void testSequenceRealTriggersFixedWindow() throws Exception {
-    tester = TriggerTester.buffering(FixedWindows.of(Duration.millis(50)),
+    tester = TriggerTester.nonCombining(FixedWindows.of(Duration.millis(50)),
         AfterEach.<IntervalWindow>inOrder(
             AfterPane.<IntervalWindow>elementCountAtLeast(5),
             AfterPane.<IntervalWindow>elementCountAtLeast(5),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index 772e9c9559c6d..397bb3a3c417b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -61,7 +61,7 @@ public class AfterFirstTest {
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
-    tester = TriggerTester.buffering(
+    tester = TriggerTester.nonCombining(
         windowFn, AfterFirst.of(mockTrigger1, mockTrigger2),
         AccumulationMode.DISCARDING_FIRED_PANES);
     executable1 = tester.getTrigger().subTriggers().get(0);
@@ -191,7 +191,7 @@ public void testFireDeadline() throws Exception {
 
   @Test
   public void testAfterFirstRealTriggersFixedWindow() throws Exception {
-    tester = TriggerTester.buffering(FixedWindows.of(Duration.millis(50)),
+    tester = TriggerTester.nonCombining(FixedWindows.of(Duration.millis(50)),
         Repeatedly.<IntervalWindow>forever(
             AfterFirst.<IntervalWindow>of(
                 AfterPane.<IntervalWindow>elementCountAtLeast(5),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
index 8083ac597f99d..936580f91a669 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
@@ -22,6 +22,7 @@
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.WindowMatchers;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.transforms.Sum.SumIntegerFn;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
@@ -45,7 +46,8 @@ public void testAfterPaneWithGlobalWindowsAndCombining() throws Exception {
         FixedWindows.of(windowDuration),
         AfterPane.<IntervalWindow>elementCountAtLeast(2),
         AccumulationMode.DISCARDING_FIRED_PANES,
-        new SumIntegerFn().<String>asKeyedFn());
+        new SumIntegerFn().<String>asKeyedFn(),
+        VarIntCoder.of());
 
     tester.injectElement(1, new Instant(1));
     tester.injectElement(2, new Instant(9));
@@ -67,7 +69,7 @@ public void testAfterPaneWithGlobalWindowsAndCombining() throws Exception {
   @Test
   public void testAfterPaneWithFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         FixedWindows.of(windowDuration),
         AfterPane.<IntervalWindow>elementCountAtLeast(2),
         AccumulationMode.DISCARDING_FIRED_PANES);
@@ -92,7 +94,7 @@ public void testAfterPaneWithFixedWindow() throws Exception {
   @Test
   public void testAfterPaneWithMerging() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         Sessions.withGapDuration(windowDuration),
         AfterPane.<IntervalWindow>elementCountAtLeast(2),
         AccumulationMode.DISCARDING_FIRED_PANES);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
index abc37df590932..11f414243ec92 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
@@ -39,7 +39,7 @@ public class AfterProcessingTimeTest {
   @Test
   public void testAfterProcessingTimeWithFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         FixedWindows.of(windowDuration),
         AfterProcessingTime
             .<IntervalWindow>pastFirstElementInPane()
@@ -81,7 +81,7 @@ public void testAfterProcessingTimeWithFixedWindow() throws Exception {
   @Test
   public void testAfterProcessingTimeWithMergingWindowAlreadyFired() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         Sessions.withGapDuration(windowDuration),
         AfterProcessingTime
             .<IntervalWindow>pastFirstElementInPane()
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
index d5a802442e17d..646129eb2435d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
@@ -41,7 +41,7 @@ public class AfterWatermarkTest {
   @Test
   public void testFirstInPaneWithFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         FixedWindows.of(windowDuration),
         AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelayOf(Duration.millis(5)),
         AccumulationMode.DISCARDING_FIRED_PANES);
@@ -69,7 +69,7 @@ public void testFirstInPaneWithFixedWindow() throws Exception {
   @Test
   public void testFirstInPaneWithMerging() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         Sessions.withGapDuration(windowDuration),
         AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelayOf(Duration.millis(5)),
         AccumulationMode.DISCARDING_FIRED_PANES);
@@ -98,7 +98,7 @@ public void testFirstInPaneWithMerging() throws Exception {
   @Test
   public void testEndOfWindowFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         FixedWindows.of(windowDuration),
         AfterWatermark.<IntervalWindow>pastEndOfWindow().plusDelayOf(Duration.millis(5)),
         AccumulationMode.DISCARDING_FIRED_PANES);
@@ -126,7 +126,7 @@ public void testEndOfWindowFixedWindow() throws Exception {
   @Test
   public void testEndOfWindowWithMerging() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         Sessions.withGapDuration(windowDuration),
         AfterWatermark.<IntervalWindow>pastEndOfWindow().plusDelayOf(Duration.millis(5)),
         AccumulationMode.DISCARDING_FIRED_PANES);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
index 7cc6fafc027ed..5ebc4916bb8af 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
@@ -40,7 +40,7 @@ public class DefaultTriggerTest {
 
   @Test
   public void testDefaultTriggerWithFixedWindow() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         FixedWindows.of(Duration.millis(10)),
         DefaultTrigger.<IntervalWindow>of(),
         AccumulationMode.DISCARDING_FIRED_PANES);
@@ -74,7 +74,7 @@ public void testDefaultTriggerWithFixedWindow() throws Exception {
 
   @Test
   public void testDefaultTriggerWithSessionWindow() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         Sessions.withGapDuration(Duration.millis(10)),
         DefaultTrigger.<IntervalWindow>of(),
         AccumulationMode.DISCARDING_FIRED_PANES);
@@ -100,7 +100,7 @@ public void testDefaultTriggerWithSessionWindow() throws Exception {
 
   @Test
   public void testDefaultTriggerWithSlidingWindow() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         SlidingWindows.of(Duration.millis(10)).every(Duration.millis(5)),
         DefaultTrigger.<IntervalWindow>of(),
         AccumulationMode.DISCARDING_FIRED_PANES);
@@ -131,7 +131,7 @@ public void testDefaultTriggerWithSlidingWindow() throws Exception {
 
   @Test
   public void testDefaultTriggerWithContainedSessionWindow() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.buffering(
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         Sessions.withGapDuration(Duration.millis(10)),
         DefaultTrigger.<IntervalWindow>of(),
         AccumulationMode.DISCARDING_FIRED_PANES);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index 9b7a51eabb695..771ea2d53f69b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -57,7 +57,8 @@ public class RepeatedlyTest {
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
     Trigger<IntervalWindow> underTest = Repeatedly.forever(mockRepeated);
-    tester = TriggerTester.buffering(windowFn, underTest, AccumulationMode.DISCARDING_FIRED_PANES);
+    tester = TriggerTester.nonCombining(
+        windowFn, underTest, AccumulationMode.DISCARDING_FIRED_PANES);
     executableRepeated = tester.getTrigger().subTriggers().get(0);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
index 61f0c135df496..08e9064abcd96 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
@@ -64,7 +64,8 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     Trigger<IntervalWindow> underTest =
         new OrFinallyTrigger<IntervalWindow>(mockActual, mockUntil);
 
-    tester = TriggerTester.buffering(windowFn, underTest, AccumulationMode.DISCARDING_FIRED_PANES);
+    tester = TriggerTester.nonCombining(
+        windowFn, underTest, AccumulationMode.DISCARDING_FIRED_PANES);
     executableUntil = tester.getTrigger().subTriggers().get(1);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
@@ -279,7 +280,7 @@ public void testFireDeadline() throws Exception {
   @Test
   public void testOrFinallyRealTriggersFixedWindow() throws Exception {
     // Test an orFinally with a composite trigger, and make sure it properly resets state, etc.
-    tester = TriggerTester.buffering(FixedWindows.of(Duration.millis(50)),
+    tester = TriggerTester.nonCombining(FixedWindows.of(Duration.millis(50)),
         Repeatedly.<IntervalWindow>forever(
             // This element count should never fire because the orFinally fires sooner, every time
             AfterPane.<IntervalWindow>elementCountAtLeast(12)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index 1a777a4992475..fab9e90c9862a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -54,7 +54,7 @@ public class TriggerExecutorTest {
   public void setUpBuffering(
       WindowFn<?, IntervalWindow> windowFn, AccumulationMode mode) throws Exception {
     MockitoAnnotations.initMocks(this);
-    tester = TriggerTester.buffering(windowFn, mockTrigger, mode);
+    tester = TriggerTester.nonCombining(windowFn, mockTrigger, mode);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 

From 3f69cac337e4d19109dd4df50329fecf47579dd5 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 13 May 2015 10:31:25 -0700
Subject: [PATCH 0542/1541] Fix test failure issues on windows because of
 paths.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93535223
---
 .../java/com/google/cloud/dataflow/sdk/io/TextIOTest.java  | 3 ++-
 .../cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java  | 7 +++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
index e06eb44aa7cce..b8e6eac3a890c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -323,7 +323,8 @@ public void testWriteNamed() {
   @Test
   public void testUnsupportedFilePattern() throws IOException {
     File outFolder = tmpFolder.newFolder();
-    String filename = outFolder.toPath().resolve("output@*").toString();
+    // Windows doesn't like resolving paths with * in them.
+    String filename = outFolder.toPath().resolve("output@5").toString();
 
     Pipeline p = TestPipeline.create();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java
index 19d7c8bd8f30d..c7b5d711683c0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java
@@ -111,7 +111,8 @@ public void testMatchNone() throws Exception {
     temporaryFolder.newFile("aa");
     temporaryFolder.newFile("ab");
 
-    assertThat(factory.match(temporaryFolder.getRoot().toPath().resolve("b*").toString()),
+    // Windows doesn't like resolving paths with * in them, so the * is appended after resolve.
+    assertThat(factory.match(factory.resolve(temporaryFolder.getRoot().getPath(), "b") + "*"),
         Matchers.hasItems(expected.toArray(new String[expected.size()])));
   }
 
@@ -121,7 +122,9 @@ public void testMatchMultiple() throws Exception {
         temporaryFolder.newFile("aa").toString(), temporaryFolder.newFile("ab").toString());
     temporaryFolder.newFile("ba");
     temporaryFolder.newFile("bb");
-    assertThat(factory.match(temporaryFolder.getRoot().toPath().resolve("a*").toString()),
+
+    // Windows doesn't like resolving paths with * in them, so the * is appended after resolve.
+    assertThat(factory.match(factory.resolve(temporaryFolder.getRoot().getPath(), "a") + "*"),
         Matchers.hasItems(expected.toArray(new String[expected.size()])));
   }
 

From 0f0069598abf61f9fc089886dc32869b1d4eace2 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 13 May 2015 12:25:09 -0700
Subject: [PATCH 0543/1541] Revert "Add MapFn and FlatMapFn"

This reverts commit 431b317f58f2a342cab34dcc0826cc9acb56882b

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93547690
---
 .../dataflow/sdk/transforms/FlatMapFn.java    | 102 ---------------
 .../cloud/dataflow/sdk/transforms/MapFn.java  | 102 ---------------
 .../cloud/dataflow/sdk/transforms/ParDo.java  |  63 ---------
 .../sdk/transforms/FlatMapFnTest.java         | 120 ------------------
 .../dataflow/sdk/transforms/MapFnTest.java    | 120 ------------------
 .../dataflow/sdk/transforms/ParDoTest.java    |  42 ------
 6 files changed, 549 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapFn.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapFn.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapFnTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapFnTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapFn.java
deleted file mode 100644
index 5f9e957738d25..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapFn.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import java.lang.reflect.Method;
-
-/**
- * A {@link DoFn} that represents a FlatMap operation. {@code FlatMapFn} is applied to each
- * element of the input {@link PCollection} and outputs each element in the result of the
- * application. No behavior is provided in startBundle and finishBundle. A {@link
- * SerializableFunction} can be provided to specify the behavior of {@link #apply}.
- *
- * @param <InputT> the type of input element
- * @param <OutputT> the type of output element
- */
-public abstract class FlatMapFn<InputT, OutputT> extends DoFn<InputT, OutputT> {
-  private static final long serialVersionUID = 0L;
-  private final SerializableFunction<InputT, Iterable<OutputT>> fn;
-
-  /**
-   * Creates a new {@code FlatMapFn}. {@link #apply} must be overridden for this FlatMapDoFn to
-   * function properly.
-   */
-  protected FlatMapFn() {
-    this.fn = null;
-
-    if (!applyOverriden()) {
-      throw new IllegalStateException("Didn't find an override for FlatMapFn#apply(InputT). "
-          + "Apply must be overridden to use the no-arg FlatMapFn constructor.");
-    }
-  }
-
-  /**
-   * Creates a new {@code FlatMapFn} that applies the {@link SerializableFunction} to its inputs.
-   */
-  protected FlatMapFn(SerializableFunction<InputT, Iterable<OutputT>> fn) {
-    this.fn = checkNotNull(fn, "null SerializableFunction provided to FlatMapFn constructor");
-
-    if (applyOverriden()) {
-      throw new IllegalStateException("Found an override of FlatMapFn#apply(InputT). "
-          + "FlatMapFn#apply(InputT) cannot be overriden if a SerializableFunction is provided.");
-    }
-  }
-
-  private boolean applyOverriden() {
-    try {
-      Method m = getClass().getMethod("apply", Object.class);
-      if (m.getDeclaringClass().equals(FlatMapFn.class)) {
-        return false;
-      }
-      return true;
-    } catch (NoSuchMethodException e) {
-      throw new AssertionError(
-          "NoSuchMethodException encountered for method apply() in FlatMapFn "
-          + "but FlatMapFn declares apply()",
-          e);
-    }
-  }
-
-  @Override
-  public final void startBundle(Context c) throws Exception {}
-
-  @Override
-  public final void processElement(DoFn<InputT, OutputT>.ProcessContext c) throws Exception {
-    for (OutputT output : apply(c.element())) {
-      c.output(output);
-    }
-  }
-
-  @Override
-  public final void finishBundle(Context c) throws Exception {}
-
-  /**
-   * Applies this FlatMapFn to an input element, returning the elements to add to the output
-   * {@link PCollection}.
-   *
-   * <p> If a {@link SerializableFunction} was not provided to this {@code FlatMapDoFn} when it was
-   * created, this method must be overriden, or it will throw a {@link NullPointerException} when it
-   * is invoked.
-   */
-  public Iterable<OutputT> apply(InputT input) {
-    return fn.apply(input);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapFn.java
deleted file mode 100644
index 5447520a2b8d6..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapFn.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import java.lang.reflect.Method;
-
-/**
- * A {@link DoFn} that represents a simple Map operation. {@code MapDoFn} is applied to each element
- * of the input {@link PCollection} and outputs the result. No behavior is provided in startBundle
- * and finishBundle.  A {@link SerializableFunction} can be provided to specify the behavior of
- * {@link #apply}.
- *
- * @param <InputT> the type of input element
- * @param <OutputT> the type of output element
- */
-public abstract class MapFn<InputT, OutputT> extends DoFn<InputT, OutputT> {
-  private static final long serialVersionUID = 0L;
-  private final SerializableFunction<InputT, OutputT> fn;
-
-  /**
-   * Creates a new {@code MapFn}. {@link #apply} must be overridden for this MapFn to function
-   * properly.
-   */
-  protected MapFn() {
-    this.fn = null;
-
-    if (!applyOverriden()) {
-      throw new IllegalStateException("Didn't find an override for MapFn#apply(InputT). "
-          + "Apply must be overridden to use the no-arg MapFn constructor.");
-    }
-  }
-
-  /**
-   * Create a new {@code MapFn} that applies the provided {@link SerializableFunction} to its
-   * inputs.
-   */
-  protected MapFn(SerializableFunction<InputT, OutputT> fn) {
-    this.fn = checkNotNull(fn, "null SerializableFunction provided to MapFn constructor");
-
-    if (applyOverriden()) {
-      throw new IllegalStateException("Found an override of MapFn#apply(InputT). "
-          + "MapFn#apply(InputT) cannot be overriden if a SerializableFunction is provided.");
-    }
-  }
-
-  private boolean applyOverriden() {
-    try {
-      Method m = getClass().getMethod("apply", Object.class);
-      if (m.getDeclaringClass().equals(MapFn.class)) {
-        return false;
-      }
-      return true;
-    } catch (NoSuchMethodException e) {
-      // Generic apply is declared in this class
-      throw new AssertionError(
-          "NoSuchMethodException encountered for method apply() in FlatMapFn "
-          + "but FlatMapFn declares apply()",
-          e);
-    }
-  }
-
-  @Override
-  public final void startBundle(Context c) throws Exception {}
-
-  @Override
-  public final void processElement(DoFn<InputT, OutputT>.ProcessContext c) throws Exception {
-    c.output(apply(c.element()));
-  }
-
-  @Override
-  public final void finishBundle(Context c) throws Exception {}
-
-  /**
-   * Applies this MapFn to an input element, returning the element to add to the output
-   * {@link PCollection}.
-   *
-   * <p> If a {@link SerializableFunction} was not provided to this {@code MapFn} when it was
-   * created, this method must be overriden, or it will throw a {@link NullPointerException} when it
-   * is invoked.
-   */
-  public OutputT apply(InputT input) {
-    return fn.apply(input);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 1a91a362f5693..149df8b065e9f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -528,44 +528,6 @@ public static <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT>
     return new Unbound().of(fn);
   }
 
-  /**
-   * Creates a {@code ParDo} {@link PTransform} that will invoke the
-   * given {@link MapFn} function.
-   *
-   * <p> This is a specialized {@code ParDo} that has no startBundle or finishBundle,
-   * and which exclusively transforms the input {@code PCollection<InputT>} to an
-   * output {@code PCollection<OutputT>} based on the apply method of the
-   * {@code MapFn<InputT, OutputT>}.
-   *
-   * <p> The resulting {@code PTransform}'s types have been bound, with the
-   * input being a {@code PCollection<InputT>} and the output a
-   * {@code PCollection<OutputT>}, inferred from the types of the argument
-   * {@code MapFn<InputT, OutputT>}.  It is ready to be applied, or further
-   * properties can be set on it first.
-   */
-  public static <InputT, OutputT> Bound<InputT, OutputT> map(MapFn<InputT, OutputT> fn) {
-    return of(fn);
-  }
-
-  /**
-   * Creates a {@code ParDo} {@link PTransform} that will invoke the
-   * given {@link FlatMapFn} function.
-   *
-   * <p> This is a specialized {@code ParDo} that has no startBundle or finishBundle,
-   * and which exclusively transforms the input {@code PCollection<InputT>} to an
-   * output {@code PCollection<OutputT>} based on the apply method of the
-   * {@code FlatMapFn<InputT, OutputT>}.
-   *
-   * <p> The resulting {@code PTransform}'s types have been bound, with the
-   * input being a {@code PCollection<InputT>} and the output a
-   * {@code PCollection<OutputT>}, inferred from the types of the argument
-   * {@code FlatMapFn<InputT, OutputT>}.  It is ready to be applied, or further
-   * properties can be set on it first.
-   */
-  public static <InputT, OutputT> Bound<InputT, OutputT> flatMap(FlatMapFn<InputT, OutputT> fn) {
-    return of(fn);
-  }
-
   private static <InputT, OutputT> DoFn<InputT, OutputT>
       adapt(DoFnWithContext<InputT, OutputT> fn) {
     return DoFnReflector.of(fn.getClass()).toDoFn(fn);
@@ -700,31 +662,6 @@ public <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT> fn) {
     public <InputT, OutputT> Bound<InputT, OutputT> of(DoFnWithContext<InputT, OutputT> fn) {
       return of(adapt(fn));
     }
-
-    /**
-     * Returns a new {@code ParDo} {@code PTransform} that's like this
-     * transform but that will invoke the given {@link MapFn}
-     * function, and that has its input and output types bound.  Does
-     * not modify this transform.  The resulting {@code PTransform} is
-     * sufficiently specified to be applied, but more properties can
-     * still be specified.
-     */
-
-    public <InputT, OutputT> Bound<InputT, OutputT> map(MapFn<InputT, OutputT> fn) {
-      return of(fn);
-    }
-
-    /**
-     * Returns a new {@code ParDo} {@code PTransform} that's like this
-     * transform but that will invoke the given {@link FlatMapFn}
-     * function, and that has its input and output types bound.  Does
-     * not modify this transform.  The resulting {@code PTransform} is
-     * sufficiently specified to be applied, but more properties can
-     * still be specified.
-     */
-    public <InputT, OutputT> Bound<InputT, OutputT> flatMap(FlatMapFn<InputT, OutputT> fn) {
-      return of(fn);
-    }
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapFnTest.java
deleted file mode 100644
index c8b1e85c87bc2..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapFnTest.java
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for FlatMapFn.
- */
-@RunWith(JUnit4.class)
-public class FlatMapFnTest {
-  @Rule
-  public ExpectedException thrown = ExpectedException.none();
-
-  @SuppressWarnings("serial")
-  @Test
-  public void testNoArgConstructorWithoutOverridingApplyThrowsIllegalStateException() {
-    thrown.expect(IllegalStateException.class);
-    thrown.expectMessage("FlatMapFn#apply(InputT)");
-    thrown.expectMessage("Didn't find");
-    thrown.expectMessage("override");
-
-    new FlatMapFn<String, Object>() {};
-  }
-
-  @SuppressWarnings("serial")
-  @Test
-  public void testNoArgConstructorWithDeclaredApplySucceeds() {
-    new FlatMapFn<String, Object>() {
-      @Override
-      public Iterable<Object> apply(String input) {
-        return null;
-      }
-    };
-  }
-
-  @SuppressWarnings("serial")
-  @Test
-  public void testNoArgConstructorWithMultipleDeclaredApplyWithDifferentErasureSucceeds() {
-    new FlatMapFn<Integer, String>() {
-      @Override
-      public Iterable<String> apply(Integer input) {
-        throw new IllegalArgumentException();
-      }
-
-      @SuppressWarnings("unused")
-      public Iterable<String> apply(String input) {
-        throw new IllegalStateException();
-      }
-
-    };
-  }
-
-  @SuppressWarnings("serial")
-  @Test
-  public void testSerializableFunctionWithNullThrowsNullPointerException() {
-    thrown.expect(NullPointerException.class);
-    thrown.expectMessage("null SerializableFunction");
-    thrown.expectMessage("FlatMapFn constructor");
-
-    new FlatMapFn<String, String>(null) {};
-  }
-
-  @SuppressWarnings("serial")
-  @Test
-  public void testSerializableFunctionWithApplyOverrideThrowsIllegalStateException() {
-    thrown.expect(IllegalStateException.class);
-    thrown.expectMessage("FlatMapFn#apply(InputT)");
-    thrown.expectMessage("cannot be overriden");
-
-    new FlatMapFn<String, String>(new SerializableFunction<String, Iterable<String>>() {
-      @Override
-      public Iterable<String> apply(String input) {
-        return null;
-      }
-    }) {
-      @Override
-      public Iterable<String> apply(String input) {
-        return null;
-      }
-    };
-  }
-
-  @SuppressWarnings("serial")
-  @Test
-  public void testSubclassNoArgConstructorWithDifferentTypeParametersSucceeds() {
-    new ExtendedFlatMapFn<String, Object>() {
-      @Override
-      public Iterable<String> apply(Object input) {
-        return null;
-      }
-    };
-  }
-
-  @SuppressWarnings("serial")
-  private static class ExtendedFlatMapFn<OutputT, InputT> extends FlatMapFn<InputT, OutputT> {
-    public ExtendedFlatMapFn() {
-      super();
-    }
-  }
-}
-
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapFnTest.java
deleted file mode 100644
index fac7bdc7f0fb4..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapFnTest.java
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for MapFn.
- */
-@RunWith(JUnit4.class)
-public class MapFnTest {
-  @Rule
-  public ExpectedException thrown = ExpectedException.none();
-
-  @SuppressWarnings("serial")
-  @Test
-  public void testNoArgConstructorWithoutOverridingApplyThrowsIllegalStateException() {
-    thrown.expect(IllegalStateException.class);
-    thrown.expectMessage("MapFn#apply(InputT)");
-    thrown.expectMessage("Didn't find");
-    thrown.expectMessage("override");
-
-    new MapFn<String, Object>() {};
-  }
-
-  @SuppressWarnings("serial")
-  @Test
-  public void testNoArgConstructorWithDeclaredApplySucceeds() {
-    new MapFn<String, Object>() {
-      @Override
-      public Object apply(String input) {
-        return null;
-      }
-    };
-  }
-
-  @SuppressWarnings("serial")
-  @Test
-  public void testNoArgConstructorWithMultipleDeclaredApplyWithDifferentErasureSucceeds() {
-    new MapFn<Integer, String>() {
-      @Override
-      public String apply(Integer input) {
-        throw new IllegalArgumentException();
-      }
-
-      @SuppressWarnings("unused")
-      public String apply(String input) {
-        throw new IllegalStateException();
-      }
-
-    };
-  }
-
-  @SuppressWarnings("serial")
-  @Test
-  public void testSerializableFunctionWithNullThrowsNullPointerException() {
-    thrown.expect(NullPointerException.class);
-    thrown.expectMessage("null SerializableFunction");
-    thrown.expectMessage("MapFn constructor");
-
-    new MapFn<String, String>(null) {};
-  }
-
-  @SuppressWarnings("serial")
-  @Test
-  public void testSerializableFunctionWithApplyOverrideThrowsIllegalStateException() {
-    thrown.expect(IllegalStateException.class);
-    thrown.expectMessage("MapFn#apply(InputT)");
-    thrown.expectMessage("cannot be overriden");
-
-    new MapFn<String, String>(new SerializableFunction<String, String>() {
-      @Override
-      public String apply(String input) {
-        return null;
-      }
-    }) {
-      @Override
-      public String apply(String input) {
-        return null;
-      }
-    };
-  }
-
-  @SuppressWarnings("serial")
-  @Test
-  public void testSubclassNoArgConstructorWithDifferentTypeParametersSucceeds() {
-    new ExtendedMapFn<String, Object>() {
-      @Override
-      public String apply(Object input) {
-        return null;
-      }
-    };
-  }
-
-  @SuppressWarnings("serial")
-  private static class ExtendedMapFn<OutputT, InputT> extends MapFn<InputT, OutputT> {
-    public ExtendedMapFn() {
-      super();
-    }
-  }
-}
-
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index c3d6c578af6ac..0cd463085ebea 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -396,48 +396,6 @@ public void testParDo2() {
     p.run();
   }
 
-  @Test
-  @Category(RunnableOnService.class)
-  public void testMapAppliesMapFn() {
-    Pipeline p = TestPipeline.create();
-
-    PCollection<Integer> result =
-        p.apply(Create.<String>of("o", "tw", "thr", "four"))
-        .apply(ParDo.map(new MapFn<String, Integer>() {
-          @Override
-          public Integer apply(String input) {
-            return input.length();
-          }
-        }));
-
-    DataflowAssert.that(result).containsInAnyOrder(1, 2, 3, 4);
-
-    p.run();
-  }
-
-  @Test
-  @Category(RunnableOnService.class)
-  public void testFlatMapAppliesFlatMapFn() {
-    Pipeline p = TestPipeline.create();
-
-    PCollection<Character> result =
-        p.apply(Create.of("foo", "bar", "BAZ"))
-        .apply(ParDo.flatMap(new FlatMapFn<String, Character>() {
-          @Override
-          public Iterable<Character> apply(String input) {
-            List<Character> chars = new ArrayList<>();
-            for (char c : input.toCharArray()) {
-              chars.add(c);
-            }
-            return chars;
-          }
-        }));
-
-    DataflowAssert.that(result).containsInAnyOrder('f', 'o', 'o', 'b', 'a', 'r', 'B', 'A', 'Z');
-
-    p.run();
-  }
-
   @Test
   @Category(RunnableOnService.class)
   public void testParDoEmpty() {

From 98357fb3a94d8d0a7543a206452ca915cd95b23f Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Wed, 7 Jan 2015 19:50:28 -0800
Subject: [PATCH 0544/1541] Only invoke waitToFinish() when DataflowPipelineJob
 is provided.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93588114
---
 .../examples/common/DataflowExampleUtils.java | 26 ++++++++++++-------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
index ff825eea53bc9..d5b79726fac8c 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
@@ -226,18 +226,24 @@ public void runInjectorPipeline(String inputFile, String topic) {
   }
 
   /**
-   * Waits for the pipeline to finish, and cancels it (and the injector) before the program exists.
+   * If {@literal DataflowPipelineRunner} or {@literal BlockingDataflowPipelineRunner} is used,
+   * waits for the pipeline to finish and cancels it (and the injector) before the program exists.
    */
   public void waitToFinish(PipelineResult result) {
-    final DataflowPipelineJob job = (DataflowPipelineJob) result;
-    jobsToCancel.add(job);
-    if (!options.as(DataflowExampleOptions.class).getKeepJobsRunning()) {
-      addShutdownHook(jobsToCancel);
-    }
-    try {
-      job.waitToFinish(-1, TimeUnit.SECONDS, new MonitoringUtil.PrintHandler(System.out));
-    } catch (Exception e) {
-      throw new RuntimeException("Failed to wait for job to finish: " + job.getJobId());
+    if (result instanceof DataflowPipelineJob) {
+      final DataflowPipelineJob job = (DataflowPipelineJob) result;
+      jobsToCancel.add(job);
+      if (!options.as(DataflowExampleOptions.class).getKeepJobsRunning()) {
+        addShutdownHook(jobsToCancel);
+      }
+      try {
+        job.waitToFinish(-1, TimeUnit.SECONDS, new MonitoringUtil.PrintHandler(System.out));
+      } catch (Exception e) {
+        throw new RuntimeException("Failed to wait for job to finish: " + job.getJobId());
+      }
+    } else {
+      // Do nothing if the given PipelineResult doesn't support waitToFinish(),
+      // such as EvaluationResults returned by DirectPipelineRunner.
     }
   }
 

From 02ee0455aa3a43c020a15775ececf7543a554cae Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Thu, 14 May 2015 14:02:40 -0700
Subject: [PATCH 0545/1541] Apply Maven shading to the Guava dependency
 library.

Also, renamed execution ids of maven-jar-plugin to the default names. Otherwise, another execution is added, instead of its configuration being applied to the default one.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93656346
---
 examples/pom.xml |  53 ++++++++++++++-------
 pom.xml          |   6 ---
 sdk/pom.xml      | 118 ++++++++++++++++++++++++++++++++++-------------
 3 files changed, 123 insertions(+), 54 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index c97e09350c4f9..66cfc350df206 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -110,18 +110,35 @@
       </plugin>
 
       <plugin>
-        <groupId>org.apache.felix</groupId>
-        <artifactId>maven-bundle-plugin</artifactId>
-        <version>2.4.0</version>
-        <extensions>true</extensions>
-        <configuration>
-          <finalName>${project.artifactId}-bundled-${project.version}</finalName>
-          <instructions>
-            <!-- Embed all dependencies -->
-            <Embed-Transitive>true</Embed-Transitive>
-            <Embed-Dependency>*;scope=compile|runtime;inline=true</Embed-Dependency>
-          </instructions>
-        </configuration>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>2.3</version>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <finalName>${project.artifactId}-bundled-${project.version}</finalName>
+              <artifactSet>
+                <includes>
+                  <include>*:*</include>
+                </includes>
+              </artifactSet>
+              <filters>
+                <filter>
+                  <artifact>*:*</artifact>
+                  <excludes>
+                    <exclude>META-INF/*.SF</exclude>
+                    <exclude>META-INF/*.DSA</exclude>
+                    <exclude>META-INF/*.RSA</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+            </configuration>
+          </execution>
+        </executions>
       </plugin>
 
       <plugin>
@@ -129,15 +146,13 @@
         <artifactId>maven-jar-plugin</artifactId>
         <executions>
           <execution>
-            <id>dataflow-examples-compile</id>
-            <phase>compile</phase>
+            <id>default-jar</id>
             <goals>
               <goal>jar</goal>
             </goals>
           </execution>
           <execution>
-            <id>dataflow-examples-test-compile</id>
-            <phase>test-compile</phase>
+            <id>default-test-jar</id>
             <goals>
               <goal>test-jar</goal>
             </goals>
@@ -196,6 +211,12 @@
       </exclusions>
     </dependency>
 
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>18.0</version>
+    </dependency>
+
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-core</artifactId>
diff --git a/pom.xml b/pom.xml
index b51b7f27d4e3f..091216c002b67 100644
--- a/pom.xml
+++ b/pom.xml
@@ -138,12 +138,6 @@
           </configuration>
         </plugin>
 
-        <plugin>
-          <groupId>org.apache.felix</groupId>
-          <artifactId>maven-bundle-plugin</artifactId>
-          <version>2.4.0</version>
-        </plugin>
-
         <!-- Coverage analysis for tests -->
         <plugin>
           <groupId>org.jacoco</groupId>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 55d7f3c6c9757..7a8cfa43fdf5c 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -104,21 +104,19 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-jar-plugin</artifactId>
-          <executions>
-            <execution>
-              <id>dataflow-sdk-compile</id>
-              <phase>compile</phase>
-              <goals>
-                <goal>jar</goal>
-              </goals>
-            </execution>
-            <execution>
-              <id>dataflow-sdk-test-compile</id>
-              <phase>test-compile</phase>
-              <goals>
-                <goal>test-jar</goal>
-              </goals>
-            </execution>
+        <executions>
+          <execution>
+            <id>default-jar</id>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>default-test-jar</id>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
         </executions>
       </plugin>
 
@@ -146,23 +144,79 @@
       </plugin>
 
       <plugin>
-        <groupId>org.apache.felix</groupId>
-        <artifactId>maven-bundle-plugin</artifactId>
-        <extensions>true</extensions>
-        <configuration>
-          <finalName>${project.artifactId}-bundled-${project.version}</finalName>
-          <instructions>
-            <Include-Resource>META-INF=target/classes/META-INF</Include-Resource>
-            <Export-Package>
-              !${dataflow}.sdk.runners.worker.*,
-              !${dataflow}.sdk.streaming.*,
-              !${dataflow}.sdk.util.gcsio,
-              ${dataflow}.*
-            </Export-Package>
-            <Embed-Transitive>true</Embed-Transitive>
-            <Embed-Dependency>*;scope=compile|runtime;inline=true</Embed-Dependency>
-          </instructions>
-        </configuration>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>2.3</version>
+        <executions>
+          <!-- In the first phase, we pick dependencies and relocate them. -->
+          <execution>
+            <id>bundle-and-repackage</id>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <shadeTestJar>true</shadeTestJar>
+              <artifactSet>
+                <includes>
+                  <include>com.google.guava:guava</include>
+                </includes>
+              </artifactSet>
+              <filters>
+                <filter>
+                  <artifact>*:*</artifact>
+                  <excludes>
+                    <exclude>META-INF/*.SF</exclude>
+                    <exclude>META-INF/*.DSA</exclude>
+                    <exclude>META-INF/*.RSA</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+              <relocations>
+                <!-- TODO: Once ready, change the following pattern to 'com'
+                     only, exclude 'com.google.cloud.dataflow.**', and remove
+                     the second relocation. -->
+                <relocation>
+                  <pattern>com.google.common</pattern>
+                  <shadedPattern>com.google.cloud.dataflow.sdk.repackaged.com.google.common</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>com.google.thirdparty</pattern>
+                  <shadedPattern>com.google.cloud.dataflow.sdk.repackaged.com.google.thirdparty</shadedPattern>
+                </relocation>
+              </relocations>
+            </configuration>
+          </execution>
+
+          <!-- In the second phase, we pick remaining dependencies and bundle
+               them without repackaging. -->
+          <execution>
+            <id>bundle-rest-without-repackaging</id>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <shadeTestJar>true</shadeTestJar>
+              <finalName>${project.artifactId}-bundled-${project.version}</finalName>
+              <artifactSet>
+                <excludes>
+                  <exclude>com.google.guava:guava</exclude>
+                </excludes>
+              </artifactSet>
+              <filters>
+                <filter>
+                  <artifact>*:*</artifact>
+                  <excludes>
+                    <exclude>META-INF/*.SF</exclude>
+                    <exclude>META-INF/*.DSA</exclude>
+                    <exclude>META-INF/*.RSA</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+            </configuration>
+          </execution>
+        </executions>
       </plugin>
 
       <!-- Coverage analysis for unit tests. -->

From 9b6346270aaf7363aea352c2aa6f31bb0d912e2c Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Thu, 14 May 2015 16:33:54 -0700
Subject: [PATCH 0546/1541] Update test_wordcount.sh following replacement of
 bundling with shading

---
 test_wordcount.sh | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/test_wordcount.sh b/test_wordcount.sh
index cc79a50579c34..48c4e7cccb188 100755
--- a/test_wordcount.sh
+++ b/test_wordcount.sh
@@ -102,13 +102,6 @@ function check_for_jar_file {
   fi
 }
 
-# NOTE: We could still test via mvn exec if this fails for some reason.  Perhaps
-# we ought to do that.
-echo "Generating bundled JAR file" >&2
-# NOTE: If this fails, run "mvn clean install" and try again.
-mvn bundle:bundle -f $TOPDIR/pom.xml -pl examples
-check_for_jar_file
-
 run_all_ways wordcount1 "LICENSE" c5350a5ad4bb51e3e018612b4b044097
 run_all_ways wordcount2 "./LICENSE" c5350a5ad4bb51e3e018612b4b044097
 run_all_ways wordcount3 "$PWD/LICENSE" c5350a5ad4bb51e3e018612b4b044097

From 800c82b56decc586fc96e435f6d039952931a1ee Mon Sep 17 00:00:00 2001
From: Alex Van Boxel <alex@vanboxel.be>
Date: Sat, 16 May 2015 14:04:28 +0200
Subject: [PATCH 0547/1541] Ignore additional IntelliJ files.

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index 1fed5fc45db84..4b9c4ea406a81 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,5 @@ target/
 # Ignore IntelliJ files.
 .idea/
 *.iml
+*.ipr
+*.iws
\ No newline at end of file

From d6227c3ddfba69357d62865755e2a3c37c54e0d4 Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Mon, 18 May 2015 10:15:47 -0700
Subject: [PATCH 0548/1541] Add newline at the end of .gitignore

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 4b9c4ea406a81..f6bd2af381db8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,5 @@ target/
 .idea/
 *.iml
 *.ipr
-*.iws
\ No newline at end of file
+*.iws
+

From cfc74d192d3fa3198156a5cdff22cfa155016af0 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Thu, 14 May 2015 18:13:09 -0700
Subject: [PATCH 0549/1541] Fix Javadoc errors that show up with JDK 8.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93678620
---
 .../dataflow/sdk/coders/CoderFactories.java   |  8 ++---
 .../dataflow/sdk/coders/DelegateCoder.java    |  8 ++---
 .../cloud/dataflow/sdk/io/BigQueryIO.java     |  8 ++---
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |  4 +--
 .../sdk/runners/worker/OrderedCode.java       | 10 +++---
 .../dataflow/sdk/testing/CoderProperties.java | 16 ++++-----
 .../dataflow/sdk/testing/DataflowAssert.java  | 33 +++++++++----------
 .../dataflow/sdk/transforms/Combine.java      |  8 ++---
 .../cloud/dataflow/sdk/transforms/DoFn.java   | 11 ++++---
 .../sdk/transforms/DoFnWithContext.java       |  7 ++--
 .../dataflow/sdk/transforms/GroupByKey.java   |  3 +-
 .../cloud/dataflow/sdk/transforms/ParDo.java  |  2 +-
 .../cloud/dataflow/sdk/transforms/View.java   |  4 +--
 .../join/KeyedPCollectionTuple.java           | 15 ++++-----
 .../dataflow/sdk/transforms/package-info.java |  2 +-
 .../sdk/transforms/windowing/Trigger.java     |  4 +--
 .../cloud/dataflow/sdk/util/CoderUtils.java   |  4 +--
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |  6 ++--
 .../dataflow/sdk/values/PCollectionView.java  |  6 ++--
 .../cloud/dataflow/sdk/values/PInput.java     |  2 +-
 20 files changed, 82 insertions(+), 79 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactories.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactories.java
index 1939baadff26f..10103029ed538 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactories.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactories.java
@@ -34,7 +34,7 @@ private CoderFactories() { } // Static utility class
 
   /**
    * Creates a {@link CoderFactory} built from particular static methods of a class that
-   * implements {@link Coder Coder<T>} for some {@code T}.
+   * implements {@link Coder}.
    *
    * <p> The class must have the following static methods:
    *
@@ -48,7 +48,7 @@ private CoderFactories() { } // Static utility class
    * </ul>
    *
    * <p> The {@code of(...)} method will be used to construct a
-   * {@link Coder Coder<T>} from component {@link Coder}s.
+   * {@code Coder<T>} from component {@link Coder}s.
    * It must accept one {@link Coder} argument for each
    * generic type parameter of {@code T}. If {@code T} takes no generic
    * type parameters, then the {@code of()} factory method should take
@@ -231,8 +231,8 @@ private <T> Method getInstanceComponentsMethod(Class<?> coderClazz) {
     }
 
     /**
-     * If {@code coderType} is a subclass of {@link Coder<T>} for a fixed T,
-     * returns {@code T.class}. Otherwise, raises IllegalArgumentException
+     * If {@code coderType} is a subclass of {@link Coder} for a specific
+     * type {@code T}, returns {@code T.class}. Otherwise, raises IllegalArgumentException.
      */
     private <T> TypeDescriptor<T> getCodedType(TypeDescriptor<?> coderType) {
       for (TypeDescriptor<?> ifaceType : coderType.getInterfaces()) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
index 06083cc7f5090..df4cf8b9c080f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
@@ -22,12 +22,12 @@
 import java.io.Serializable;
 
 /**
- * A {@code DelegateCoder<T, IntermediateT>} wraps a {@link Coder Coder&lt;IntermediateT&gt;} and
+ * A {@code DelegateCoder<T, IntermediateT>} wraps a {@link Coder} for {@code IntermediateT} and
  * encodes/decodes values of type {@code T}s by converting
- * to/from {@code DT} and then encoding/decoding using the underlying
- * {@link Coder Coder&lt;DT&gt;}.
+ * to/from {@code IntermediateT} and then encoding/decoding using the underlying
+ * {@code Coder<IntermediateT>}.
  *
- * <p> The conversions from {@code T} to {@code DT} and vice versa
+ * <p> The conversions from {@code T} to {@code IntermediateT} and vice versa
  * must be supplied as {@link CodingFunction}, a serializable
  * function that may throw any {@code Exception}. If a thrown
  * exception is an instance of {@link CoderException} or
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index dad94594466f3..a5100b3f9e281 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -185,7 +185,7 @@ public static String toTableSpec(TableReference ref) {
 
   /**
    * A {@link PTransform} that reads from a BigQuery table and returns a
-   * {@link PCollection PCollection<TableRow>} containing each of the rows of the table.
+   * {@link PCollection} of {@link TableRow TableRows} containing each of the rows of the table.
    * <p>
    * Each TableRow record contains values indexed by column name.  Here is a
    * sample processing function that processes a "line" column from rows:
@@ -235,7 +235,7 @@ public static Bound withoutValidation() {
 
     /**
      * A {@link PTransform} that reads from a BigQuery table and returns a bounded
-     * {@link PCollection PCollection<TableRow>}.
+     * {@link PCollection} of {@link TableRow TableRows}.
      */
     public static class Bound extends PTransform<PInput, PCollection<TableRow>> {
       private static final long serialVersionUID = 0;
@@ -338,7 +338,7 @@ public boolean getValidate() {
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * A {@link PTransform} that writes a {@link PCollection PCollection<TableRow>} containing rows
+   * A {@link PTransform} that writes a {@link PCollection} containing {@link TableRow TableRows}
    * to a BigQuery table.
    * <p>
    * By default, tables will be created if they do not exist, which
@@ -483,7 +483,7 @@ public static Bound withoutValidation() {
 
     /**
      * A {@link PTransform} that can write either a bounded or unbounded
-     * {@link PCollection PCollection<TableRow>}s to a BigQuery table.
+     * {@link PCollection} of {@link TableRow TableRows} to a BigQuery table.
      */
     public static class Bound extends PTransform<PCollection<TableRow>, PDone> {
       private static final long serialVersionUID = 0;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 01d073c54fbb6..da1b5aaf3bcaf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -393,8 +393,8 @@ public static Write.Bound<Entity> writeTo(String datasetId) {
   }
 
   /**
-   * A {@link Sink} that writes a {@link PCollection PCollection<Entity>} containing
-   * entities to a Datastore kind.
+   * A {@link Sink} that writes a {@link PCollection} containing
+   * {@link Entity Entities} to a Datastore kind.
    *
    */
   public static class Sink extends com.google.cloud.dataflow.sdk.io.Sink<Entity> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java
index edd64757f4474..28b6eb79be824 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java
@@ -225,8 +225,9 @@ public void writeBytes(byte[] value) {
    * internal encoded byte array store.
    * <p>
    * Note that the specified long is treated like a uint64, e.g.
-   * {@code new OrderedCode().writeNumIncreasing(-1L).getEncodedBytes() &gt;
-   * new OrderedCode().writeNumIncreasing(Long.MAX_VALUE).getEncodedBytes()}.
+   * {@code new OrderedCode().writeNumIncreasing(-1L).getEncodedBytes()}
+   * is greater than
+   * {@code new OrderedCode().writeNumIncreasing(Long.MAX_VALUE).getEncodedBytes()}.
    *
    * @see #readNumIncreasing()
    */
@@ -270,8 +271,9 @@ int getSignedEncodingLength(long n) {
    * internal encoded byte array store.
    * <p>
    * Note that the specified long is treated like an int64, i.e.
-   * {@code new OrderedCode().writeNumIncreasing(-1L).getEncodedBytes() &lt;
-   * new OrderedCode().writeNumIncreasing(0L).getEncodedBytes()}.
+   * {@code new OrderedCode().writeNumIncreasing(-1L).getEncodedBytes()}
+   * is less than
+   * {@code new OrderedCode().writeNumIncreasing(0L).getEncodedBytes()}.
    *
    * @see #readSignedNumIncreasing()
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
index 9a6326c83760a..9b6aa4ebae6a0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
@@ -55,7 +55,7 @@ public class CoderProperties {
        Coder.Context.OUTER, Coder.Context.NESTED);
 
   /**
-   * Verifies that for the given {@link Coder Coder<T>}, and values of
+   * Verifies that for the given {@code Coder<T>}, and values of
    * type {@code T}, if the values are equal then the encoded bytes are equal,
    * in any {@link Coder.Context}.
    */
@@ -68,7 +68,7 @@ public static <T> void coderDeterministic(
   }
 
   /**
-   * Verifies that for the given {@link Coder Coder<T>}, {@link Coder.Context}, and values of
+   * Verifies that for the given {@code Coder<T>}, {@link Coder.Context}, and values of
    * type {@code T}, if the values are equal then the encoded bytes are equal.
    */
   public static <T> void coderDeterministicInContext(
@@ -87,7 +87,7 @@ public static <T> void coderDeterministicInContext(
   }
 
   /**
-   * Verifies that for the given {@link Coder Coder<T>},
+   * Verifies that for the given {@code Coder<T>},
    * and value of type {@code T}, encoding followed by decoding yields an
    * equal value of type {@code T}, in any {@link Coder.Context}.
    */
@@ -100,7 +100,7 @@ public static <T> void coderDecodeEncodeEqual(
   }
 
   /**
-   * Verifies that for the given {@link Coder Coder<T>}, {@link Coder.Context},
+   * Verifies that for the given {@code Coder<T>}, {@link Coder.Context},
    * and value of type {@code T}, encoding followed by decoding yields an
    * equal value of type {@code T}.
    */
@@ -111,7 +111,7 @@ public static <T> void coderDecodeEncodeEqualInContext(
   }
 
   /**
-   * Verifies that for the given {@link Coder Coder<Collection<T>>},
+   * Verifies that for the given {@code Coder<Collection<T>>},
    * and value of type {@code Collection<T>}, encoding followed by decoding yields an
    * equal value of type {@code Collection<T>}, in any {@link Coder.Context}.
    */
@@ -124,7 +124,7 @@ public static <T, CollectionT extends Collection<T>> void coderDecodeEncodeConte
   }
 
   /**
-   * Verifies that for the given {@link Coder Coder<Collection<T>>},
+   * Verifies that for the given {@code Coder<Collection<T>>},
    * and value of type {@code Collection<T>}, encoding followed by decoding yields an
    * equal value of type {@code Collection<T>}, in the given {@link Coder.Context}.
    */
@@ -143,7 +143,7 @@ public static <T, CollectionT extends Collection<T>> void coderDecodeEncodeConte
   }
 
   /**
-   * Verifies that for the given {@link Coder Coder<Collection<T>>},
+   * Verifies that for the given {@code Coder<Collection<T>>},
    * and value of type {@code Collection<T>}, encoding followed by decoding yields an
    * equal value of type {@code Collection<T>}, in any {@link Coder.Context}.
    */
@@ -157,7 +157,7 @@ public static <T, IterableT extends Iterable<T>> void coderDecodeEncodeContentsI
   }
 
   /**
-   * Verifies that for the given {@link Coder Coder<Iterable<T>>},
+   * Verifies that for the given {@code Coder<Iterable<T>>},
    * and value of type {@code Iterable<T>}, encoding followed by decoding yields an
    * equal value of type {@code Collection<T>}, in the given {@link Coder.Context}.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 065fca9ee0e36..eb56a896d91d3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -87,7 +87,7 @@ private DataflowAssert() {}
 
   /**
    * Constructs an {@link IterableAssert} for the elements of the provided
-   * {@link PCollection PCollection&lt;T&gt;}.
+   * {@link PCollection}.
    */
   public static <T> IterableAssert<T> that(PCollection<T> actual) {
     return
@@ -97,8 +97,8 @@ public static <T> IterableAssert<T> that(PCollection<T> actual) {
 
   /**
    * Constructs an {@link IterableAssert} for the value of the provided
-   * {@link PCollection PCollection&lt;Iterable&lt;T&gt;&gt;}, which must be a
-   * singleton.
+   * {@link PCollection} which must contain a single {@code Iterable<T>}
+   * value.
    */
   public static <T> IterableAssert<T>
       thatSingletonIterable(PCollection<? extends Iterable<T>> actual) {
@@ -141,8 +141,7 @@ public static <T> SingletonAssert<T> thatSingleton(PCollection<T> actual) {
   }
 
   /**
-   * Constructs a {@link SingletonAssert SingletonAssert<Map<K, Iterable<V>>>}
-   * for the value of the provided {@link PCollection PCollection<KV<K, V>>}
+   * Constructs a {@link SingletonAssert} for the value of the provided {@link PCollection}.
    *
    * <p> Note that the actual value must be coded by a {@link KvCoder},
    * not just any {@code Coder<K, V>}.
@@ -156,10 +155,9 @@ public static <T> SingletonAssert<T> thatSingleton(PCollection<T> actual) {
   }
 
   /**
-   * Constructs a {@link SingletonAssert SingletonAssert<Map<K, V>>} for the value of the provided
-   * {@link PCollection PCollection<KV<K, V>>}, which must have at
-   * most one value per key.
-
+   * Constructs a {@link SingletonAssert} for the value of the provided {@link PCollection},
+   * which must have at most one value per key.
+   *
    * <p> Note that the actual value must be coded by a {@link KvCoder},
    * not just any {@code Coder<K, V>}.
    */
@@ -174,8 +172,7 @@ public static <K, V> SingletonAssert<Map<K, V>> thatMap(PCollection<KV<K, V>> ac
   ////////////////////////////////////////////////////////////
 
   /**
-   * An assertion about the contents of a
-   * {@link PCollectionView PCollectionView&lt;Iterable&lt;T&gt;&gt;}.
+   * An assertion about the contents of a {@link PCollectionView}.
    */
   @SuppressWarnings("serial")
   public static class IterableAssert<T> implements Serializable {
@@ -264,7 +261,7 @@ public IterableAssert<T> containsInAnyOrder(T... expectedElements) {
 
   /**
    * An assertion about the single value of type {@code T}
-   * associated with a {@link PCollectionView PCollectionView&lt;T&gt;}.
+   * associated with a {@link PCollectionView}.
    */
   @SuppressWarnings("serial")
   public static class SingletonAssert<T> implements Serializable {
@@ -363,8 +360,10 @@ private static <T> PCollection<T> inGlobalWindows(PCollection<T> input) {
   }
 
   /**
-   * An assertion checker that takes a single {@link PCollectionView PCollectionView&lt;A&gt;}
-   * and an assertion over {@code A}, and checks it within a dataflow pipeline.
+   * An assertion checker that takes a single
+   * {@link PCollectionView PCollectionView&lt;ActualT&gt;}
+   * and an assertion over {@code ActualT}, and checks it within a dataflow
+   * pipeline.
    *
    * <p> Note that the entire assertion must be serializable. If
    * you need to make assertions involving multiple inputs
@@ -401,8 +400,8 @@ public void processElement(ProcessContext c) {
   }
 
   /**
-   * An assertion checker that takes a {@link PCollectionView PCollectionView&lt;A&gt;},
-   * a {@link PCollectionView PCollectionView&lt;B&gt;}, a relation
+   * An assertion checker that takes a {@link PCollectionView PCollectionView&lt;ActualT&gt;},
+   * a {@link PCollectionView PCollectionView&lt;ExpectedT&gt;}, a relation
    * over {@code A} and {@code B}, and checks that the relation holds
    * within a dataflow pipeline.
    *
@@ -528,7 +527,7 @@ public Void apply(Iterable<T> actual) {
   /**
    * A binary predicate between types {@code Actual} and {@code Expected}.
    * Implemented as a method {@code assertFor(Expected)} which returns
-   * a {@link SerializableFunction SerializableFunction<Actual, Void>}
+   * a {@code SerializableFunction<Actual, Void>}
    * that should verify the assertion..
    */
   public static interface AssertRelation<ActualT, ExpectedT> extends Serializable {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 732d6d4643fae..73e12769722a9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -1153,8 +1153,8 @@ public Coder<OutputT> getDefaultOutputCoder(
    * {@code Combine.Globally<InputT, OutputT>} takes a {@code PCollection<InputT>}
    * and returns a {@code PCollection<OutputT>} whose elements are the result of
    * combining all the elements in each window of the input {@code PCollection},
-   * using a specified {@link CombineFn CombineFn<InputT, AccumT, OutputT>}.  It is common
-   * for {@code InputT == OutputT}, but not required.  Common combining
+   * using a specified {@link CombineFn CombineFn&lt;InputT, AccumT, OutputT&gt;}.
+   * It is common for {@code InputT == OutputT}, but not required.  Common combining
    * functions include sums, mins, maxes, and averages of numbers,
    * conjunctions and disjunctions of booleans, statistical
    * aggregations, etc.
@@ -1302,8 +1302,8 @@ protected String getKindString() {
    * {@code Combine.GloballyAsSingletonView<InputT, OutputT>} takes a {@code PCollection<InputT>}
    * and returns a {@code PCollectionView<OutputT>} whose elements are the result of
    * combining all the elements in each window of the input {@code PCollection},
-   * using a specified {@link CombineFn CombineFn<InputT, AccumT, OutputT>}. It is common for
-   * {@code InputT == OutputT}, but not required. Common combining
+   * using a specified {@link CombineFn CombineFn&lt;InputT, AccumT, OutputT&gt;}.
+   * It is common for {@code InputT == OutputT}, but not required. Common combining
    * functions include sums, mins, maxes, and averages of numbers,
    * conjunctions and disjunctions of booleans, statistical
    * aggregations, etc.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index f213e5f88f43d..694f50d70eb7d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -217,11 +217,11 @@ public abstract class ProcessContext extends Context {
      * window of the main input element.
      *
      * <p> See
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn#getSideInputTWindow}
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn#getSideInputWindow}
      * for how this corresponding window is determined.
      *
      * @throws IllegalArgumentException if this is not a side input
-     * @see ParDo#withSideInputTs
+     * @see ParDo#withSideInputs
      */
     public abstract <T> T sideInput(PCollectionView<T> view);
 
@@ -301,9 +301,10 @@ public interface RequiresKeyedState {}
   public interface RequiresWindowAccess {}
 
   /**
-   * A {@code KeyedState} is a mutable mapping
-   * from {@link CodedTupleTag CodedTupleTag<T>}
-   * to {@code T}.
+   * {@code KeyedState} maps {@link CodedTupleTag CodedTupleTags} to
+   * associated values.  The storage is persistent across bundles, and
+   * stored per-key. Specifically, for a given {@code CodedTupleTag<T>},
+   * each key will store a distinct {@code T} value.
    */
   @Experimental
   public interface KeyedState {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
index 59da718c5ea62..998c713f7bf72 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
@@ -70,7 +70,7 @@
  * PCollection<String> lines = ... ;
  * PCollection<String> words =
  *     lines.apply(ParDo.of(new DoFnWithContext<String, String>() {
- *         @ProcessElement
+ *         {@literal @}ProcessElement
  *         public void processElement(ProcessContext c, BoundedWindow window) {
  *
  *         }}));
@@ -99,7 +99,8 @@ public abstract class Context {
      *
      * <p> If invoked from {@link ProcessElement}, the output
      * element will have the same timestamp and be in the same windows
-     * as the input element passed to {@link @ProcessElement}).
+     * as the input element passed to the method annotated with
+     * {@code @ProcessElement}.
      *
      * <p> If invoked from {@link StartBundle} or {@link FinishBundle},
      * this will attempt to use the
@@ -312,7 +313,7 @@ public interface ExtraContextFactory<InputT, OutputT> {
    *   <li>It must have at least one argument.
    *   <li>Its first argument must be a {@link DoFnWithContext.ProcessContext}.
    *   <li>Its remaining arguments must be {@link KeyedState}, {@link BoundedWindow}, or
-   *   {@link WindowingInternals WindowingInternals<InputT, OutputT>}.
+   *   {@link WindowingInternals WindowingInternals&lt;InputT, OutputT&gt;}.
    * </ul>
    */
   @Documented
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index b7318b58e5043..f4004ebe7715d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -103,7 +103,8 @@
  * corresponding to the new, merged window will be created. The items in this pane
  * will be emitted when a trigger fires. By default this will be when the input
  * sources estimate there will be no more data for the window. See
- * {@link windowing.AfterWatermark} for details on the estimation.
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark}
+ * for details on the estimation.
  *
  * <p>The timestamp for each emitted pane is the earliest event time among all elements in
  * the pane. The output {@code PCollection} will have the same {@link WindowFn}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 149df8b065e9f..a1ea7c8b66ec0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -58,7 +58,7 @@
  * <h2>{@code DoFn}s</h2>
  *
  * <p> The function to use to process each element is specified by a
- * {@link DoFn DoFn&lt;I, O&gt;}.
+ * {@link DoFn}.
  *
  * <p> Conceptually, when a {@code ParDo} transform is executed, the
  * elements of the input {@code PCollection<InputT>} are first divided up
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index a9bc05b5ca621..0d74845d95cd0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -76,7 +76,7 @@
  * }
  * </pre>
  *
- * <p> To access a {@link PCollection PCollection<K, V>} as a
+ * <p> To access a {@link PCollection} of {@code KV<K, V>} as a
  * {@code Map<K, Iterable<V>>} side input, use {@link View#asMap()}:
  *
  * <pre>
@@ -86,7 +86,7 @@
  * }
  * </pre>
  *
- * <p> If a {@link PCollection PCollection<K, V>} is known to
+ * <p> If a {@link PCollection} of {@code KV<K, V>} is known to
  * have a single value for each key, then use
  * {@code View.AsMultimap#withSingletonValues View.asMap().withSingletonValues()}
  * to view it as a {@code Map<K, V>}:
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
index 1f229128b3e20..3510d0c40e76e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
@@ -33,9 +33,9 @@
 import java.util.List;
 
 /**
- * An immutable tuple of keyed {@link PCollection}s
+ * An immutable tuple of keyed {@link PCollection PCollections}
  * with key type K.
- * ({@link PCollection}s containing values of type
+ * ({@link PCollection PCollections} containing values of type
  * {@code KV<K, ?>})
  *
  * @param <K> the type of key shared by all constituent PCollections
@@ -89,9 +89,8 @@ public boolean isEmpty() {
   }
 
   /**
-   * Returns a list of {@link TaggedKeyedPCollection}s for the
-   * {@link PCollection}s contained in
-   * this {@link KeyedPCollectionTuple KeyedPCollectionTuple<K>}.
+   * Returns a list of {@link TaggedKeyedPCollection TaggedKeyedPCollections} for the
+   * {@link PCollection PCollections} contained in this {@link KeyedPCollectionTuple}.
    */
   public List<TaggedKeyedPCollection<K, ?>> getKeyedCollections() {
     return keyedCollections;
@@ -106,8 +105,8 @@ public <OutputT extends POutput> OutputT apply(
   }
 
   /**
-   * Expands the component {@link PCollection}s, stripping off any tag-specific
-   * information.
+   * Expands the component {@link PCollection PCollections}, stripping off
+   * any tag-specific information.
    */
   @Override
   public Collection<? extends PValue> expand() {
@@ -119,7 +118,7 @@ public Collection<? extends PValue> expand() {
   }
 
   /**
-   * Returns the key {@link Coder} for all {@link PCollection}s
+   * Returns the key {@link Coder} for all {@link PCollection PCollections}
    * in this {@link KeyedPCollectionTuple}.
    */
   public Coder<K> getKeyCoder() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java
index b3fcb66e3b9b4..1db5ffb97dfbf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java
@@ -21,7 +21,7 @@
  * <p>A {@link com.google.cloud.dataflow.sdk.transforms.PTransform} is an operation that takes an
  * {@code InputT} (some subtype of {@link com.google.cloud.dataflow.sdk.values.PInput})
  * and produces an
- * {@code OutputT} (some subtype of {@link com.google.cloud.dataflow.sdk.values.POutputT}).
+ * {@code OutputT} (some subtype of {@link com.google.cloud.dataflow.sdk.values.POutput}).
  *
  * <p> Common PTransforms include root PTransforms like
  * {@link com.google.cloud.dataflow.sdk.io.TextIO.Read} and
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 128e7bde8c491..153b8e3590fa3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -453,8 +453,8 @@ public boolean apply(BitSet input) {
    * in the merging windows. Composite triggers should determine their result by calling
    * {@link ExecutableTrigger#invokeMerge} on their sub-triggers, and applying appropriate logic.
    *
-   * <p> A trigger can only return {@link MergeResult#FINISHED} if it is marked as finished in
-   * at least one of the windows being merged.
+   * <p> A trigger can only return {@link MergeResult#ALREADY_FINISHED} if it is marked as finished
+   * in at least one of the windows being merged.
    *
    * <p>The implementation does not need to clear out any state associated with the old windows.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
index e51f302472593..73cc02180cdbb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
@@ -147,8 +147,8 @@ public static <T> T decodeFromBase64(Coder<T> coder, String encodedValue) {
   }
 
   /**
-   * If {@code coderType} is a subclass of {@link Coder<T>} for a fixed T,
-   * returns {@code T.class}.
+   * If {@code coderType} is a subclass of {@code Coder<T>} for a specific
+   * type {@code T}, returns {@code T.class}.
    */
   @SuppressWarnings({"rawtypes", "unchecked"})
   public static TypeDescriptor getCodedType(TypeDescriptor coderDescriptor) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 6946873611626..63ea57be096ec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -193,7 +193,7 @@ public ReceiverT getReceiver(TupleTag<?> tag) {
   }
 
   /**
-   * A concrete implementation of {@link DoFn<InputT, OutputT>.Context} used for running
+   * A concrete implementation of {@link DoFn.Context} used for running
    * a {@link DoFn}.
    *
    * @param <InputT> the type of the DoFn's (main) input elements
@@ -392,8 +392,8 @@ protected DoFn<InputT, OutputT>.ProcessContext createProcessContext(WindowedValu
   }
 
   /**
-   * A concrete implementation of {@link DoFn<InputT, OutputT>.ProcessContext} used for running
-   * a {@link DoFn} over a single element.
+   * A concrete implementation of {@link DoFn.ProcessContext} used for
+   * running a {@link DoFn} over a single element.
    *
    * @param <InputT> the type of the DoFn's (main) input elements
    * @param <OutputT> the type of the DoFn's (main) output elements
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
index 48e25d1710405..8ca8ce92b9ded 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
@@ -23,9 +23,9 @@
 import java.io.Serializable;
 
 /**
- * A {@link PCollectionView PCollectionView<T>} is an immutable view of a {@link PCollection} as a
- * value of type {@code T} that can be accessed e.g. as a side input to a
- * {@link com.google.cloud.dataflow.sdk.transforms.DoFn}.
+ * A {@link PCollectionView PCollectionView&lt;T&gt;} is an immutable view of a
+ * {@link PCollection} as a value of type {@code T} that can be accessed e.g. as
+ * a side input to a {@link com.google.cloud.dataflow.sdk.transforms.DoFn}.
  *
  * <p>A {@code PCollectionView} should always be the output of a
  * {@link com.google.cloud.dataflow.sdk.transforms.PTransform}. It is the joint responsibility of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java
index aa87fef5abd22..a7f0d6645513e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java
@@ -44,7 +44,7 @@ public interface PInput {
 
   /**
    * <p> After building, finalizes this {@code PInput} to make it ready for
-   * being used as an input to a {@link PTransform}.
+   * being used as an input to a {@link com.google.cloud.dataflow.sdk.transforms.PTransform}.
    *
    * <p> Automatically invoked whenever {@code apply()} is invoked on
    * this {@code PInput}, so users do not normally call this explicitly.

From 268e7db27ba2ea799a85b599c4346f2af25b32ff Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Thu, 14 May 2015 18:44:31 -0700
Subject: [PATCH 0550/1541] Document the immutability of input/output elements.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93680376
---
 .../cloud/dataflow/sdk/transforms/DoFn.java       | 15 +++++++++++++++
 .../dataflow/sdk/transforms/DoFnWithContext.java  | 15 +++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 694f50d70eb7d..0802bb4e33f82 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -80,6 +80,9 @@ public abstract class Context {
     /**
      * Adds the given element to the main output {@code PCollection}.
      *
+     * <p> Once passed to {@code output} the element should not be modified in
+     * any way.
+     *
      * <p> If invoked from {@link DoFn#processElement}, the output
      * element will have the same timestamp and be in the same windows
      * as the input element passed to {@link DoFn#processElement}).
@@ -98,6 +101,9 @@ public abstract class Context {
      * Adds the given element to the main output {@code PCollection},
      * with the given timestamp.
      *
+     * <p> Once passed to {@code outputWithTimestamp} the element should not be
+     * modified in any way.
+     *
      * <p> If invoked from {@link DoFn#processElement}), the timestamp
      * must not be older than the input element's timestamp minus
      * {@link DoFn#getAllowedTimestampSkew}.  The output element will
@@ -117,6 +123,9 @@ public abstract class Context {
      * Adds the given element to the side output {@code PCollection} with the
      * given tag.
      *
+     * <p> Once passed to {@code sideOutput} the element should not be modified
+     * in any way.
+     *
      * <p> The caller of {@code ParDo} uses {@link ParDo#withOutputTags} to
      * specify the tags of side outputs that it consumes. Non-consumed side
      * outputs, e.g., outputs for monitoring purposes only, don't necessarily
@@ -143,6 +152,9 @@ public abstract class Context {
      * Adds the given element to the specified side output {@code PCollection},
      * with the given timestamp.
      *
+     * <p> Once passed to {@code sideOutputWithTimestamp} the element should not be
+     * modified in any way.
+     *
      * <p> If invoked from {@link DoFn#processElement}), the timestamp
      * must not be older than the input element's timestamp minus
      * {@link DoFn#getAllowedTimestampSkew}.  The output element will
@@ -209,6 +221,9 @@ public abstract class ProcessContext extends Context {
 
     /**
      * Returns the input element to be processed.
+     *
+     * <p> The element will not be changed -- it is safe to cache, etc.
+     * without copying.
      */
     public abstract InputT element();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
index 998c713f7bf72..501e40bdec586 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
@@ -97,6 +97,9 @@ public abstract class Context {
     /**
      * Adds the given element to the main output {@code PCollection}.
      *
+     * <p> Once passed to {@code output} the element should not be modified in
+     * any way.
+     *
      * <p> If invoked from {@link ProcessElement}, the output
      * element will have the same timestamp and be in the same windows
      * as the input element passed to the method annotated with
@@ -116,6 +119,9 @@ public abstract class Context {
      * Adds the given element to the main output {@code PCollection},
      * with the given timestamp.
      *
+     * <p> Once passed to {@code outputWithTimestamp} the element should not be
+     * modified in any way.
+     *
      * <p> If invoked from {@link ProcessElement}), the timestamp
      * must not be older than the input element's timestamp minus
      * {@link DoFn#getAllowedTimestampSkew}.  The output element will
@@ -135,6 +141,9 @@ public abstract class Context {
      * Adds the given element to the side output {@code PCollection} with the
      * given tag.
      *
+     * <p> Once passed to {@code sideOutput} the element should not be modified
+     * in any way.
+     *
      * <p> The caller of {@code ParDo} uses {@link ParDo#withOutputTags} to
      * specify the tags of side outputs that it consumes. Non-consumed side
      * outputs, e.g., outputs for monitoring purposes only, don't necessarily
@@ -161,6 +170,9 @@ public abstract class Context {
      * Adds the given element to the specified side output {@code PCollection},
      * with the given timestamp.
      *
+     * <p> Once passed to {@code sideOutputWithTimestamp} the element should not be
+     * modified in any way.
+     *
      * <p> If invoked from {@link ProcessElement}), the timestamp
      * must not be older than the input element's timestamp minus
      * {@link DoFn#getAllowedTimestampSkew}.  The output element will
@@ -189,6 +201,9 @@ public abstract class ProcessContext extends Context {
 
     /**
      * Returns the input element to be processed.
+     *
+     * <p> The element will not be changed -- it is safe to cache, etc.
+     * without copying.
      */
     public abstract InputT element();
 

From 151d1e0d287ac274bb3dedfd9b122a2477e71a37 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 15 May 2015 10:09:10 -0700
Subject: [PATCH 0551/1541] Remove NoopStager. Warn if classpath contains
 non-existent resources.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93725933
---
 .../sdk/runners/DataflowPipelineRunner.java   | 29 +++++++--
 .../sdk/util/DataflowPathValidator.java       | 30 +--------
 .../cloud/dataflow/sdk/util/GcsUtil.java      |  6 +-
 .../dataflow/sdk/util/IOChannelFactory.java   |  3 +
 .../dataflow/sdk/util/IOChannelUtils.java     |  3 +
 .../dataflow/sdk/util/NoopPathValidator.java  | 10 +--
 .../cloud/dataflow/sdk/util/NoopStager.java   | 63 -------------------
 .../cloud/dataflow/sdk/util/PackageUtil.java  | 33 ++++++----
 .../dataflow/sdk/util/PathValidator.java      | 14 +----
 .../runners/DataflowPipelineRunnerTest.java   | 23 +++----
 .../sdk/util/FileIOChannelFactoryTest.java    | 15 +++++
 .../cloud/dataflow/sdk/util/GcsUtilTest.java  | 54 ++++++++++++++--
 .../dataflow/sdk/util/PackageUtilTest.java    | 42 +++++++++----
 13 files changed, 167 insertions(+), 158 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopStager.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 2ed68353143d8..0adea95cb683b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -93,6 +93,9 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
    * @return The newly created runner.
    */
   public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
+    // (Re-)register standard IO factories. Clobbers any prior credentials.
+    IOChannelUtils.registerStandardIOFactories(options);
+
     DataflowPipelineOptions dataflowOptions =
         PipelineOptionsValidator.validate(DataflowPipelineOptions.class, options);
     ArrayList<String> missing = new ArrayList<>();
@@ -109,7 +112,26 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
     }
 
     PathValidator validator = dataflowOptions.getPathValidator();
-    validator.validateAndUpdateOptions();
+    Preconditions.checkArgument(!(Strings.isNullOrEmpty(dataflowOptions.getTempLocation())
+        && Strings.isNullOrEmpty(dataflowOptions.getStagingLocation())),
+        "Missing required value: at least one of tempLocation or stagingLocation must be set.");
+    if (dataflowOptions.getStagingLocation() != null) {
+      validator.verifyPath(dataflowOptions.getStagingLocation());
+    }
+    if (dataflowOptions.getTempLocation() != null) {
+      validator.verifyPath(dataflowOptions.getTempLocation());
+    }
+    if (Strings.isNullOrEmpty(dataflowOptions.getTempLocation())) {
+      dataflowOptions.setTempLocation(dataflowOptions.getStagingLocation());
+    } else if (Strings.isNullOrEmpty(dataflowOptions.getStagingLocation())) {
+      try {
+        dataflowOptions.setStagingLocation(
+            IOChannelUtils.resolve(dataflowOptions.getTempLocation(), "staging"));
+      } catch (IOException e) {
+        throw new IllegalArgumentException("Unable to resolve PipelineOptions.stagingLocation "
+            + "from PipelineOptions.tempLocation. Please set the staging location explicitly.", e);
+      }
+    }
 
     if (dataflowOptions.getFilesToStage() == null) {
       dataflowOptions.setFilesToStage(detectClassPathResourcesToStage(
@@ -136,9 +158,6 @@ private DataflowPipelineRunner(DataflowPipelineOptions options) {
     this.options = options;
     this.dataflowClient = options.getDataflowClient();
     this.translator = DataflowPipelineTranslator.fromOptions(options);
-
-    // (Re-)register standard IO factories. Clobbers any prior credentials.
-    IOChannelUtils.registerStandardIOFactories(options);
   }
 
   @Override
@@ -174,7 +193,7 @@ public DataflowPipelineJob run(Pipeline pipeline) {
     if (!Strings.isNullOrEmpty(options.getTempLocation())) {
       DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
       newJob.getEnvironment().setTempStoragePrefix(
-          dataflowOptions.getPathValidator().verifyGcsPath(options.getTempLocation()));
+          dataflowOptions.getPathValidator().verifyPath(options.getTempLocation()));
     }
     newJob.getEnvironment().setDataset(options.getTempDatasetId());
     newJob.getEnvironment().setClusterManagerApiService(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
index fa8ac45fcba99..0ec496fc7ec01 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
@@ -20,7 +20,6 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
-import com.google.common.base.Strings;
 
 /**
  * GCP implementation of {@link PathValidator}. Only GCS paths are allowed.
@@ -37,44 +36,21 @@ public static DataflowPathValidator fromOptions(PipelineOptions options) {
     return new DataflowPathValidator(options.as(DataflowPipelineOptions.class));
   }
 
-  @Override
-  public void validateAndUpdateOptions() {
-    Preconditions.checkArgument(!(Strings.isNullOrEmpty(dataflowOptions.getTempLocation())
-        && Strings.isNullOrEmpty(dataflowOptions.getStagingLocation())),
-        "Missing required value: at least one of tempLocation or stagingLocation must be set.");
-    if (dataflowOptions.getStagingLocation() != null) {
-      verifyGcsPath(dataflowOptions.getStagingLocation());
-    }
-    if (dataflowOptions.getTempLocation() != null) {
-      verifyGcsPath(dataflowOptions.getTempLocation());
-    }
-    if (Strings.isNullOrEmpty(dataflowOptions.getTempLocation())) {
-      dataflowOptions.setTempLocation(dataflowOptions.getStagingLocation());
-    } else if (Strings.isNullOrEmpty(dataflowOptions.getStagingLocation())) {
-      dataflowOptions.setStagingLocation(
-          GcsPath.fromUri(dataflowOptions.getTempLocation()).resolve("staging").toString());
-    }
-  }
-
   @Override
   public String validateInputFilePatternSupported(String filepattern) {
     GcsPath gcsPath = GcsPath.fromUri(filepattern);
     Preconditions.checkArgument(
         dataflowOptions.getGcsUtil().isGcsPatternSupported(gcsPath.getObject()));
-    return verifyGcsPath(filepattern);
+    return verifyPath(filepattern);
   }
 
   @Override
   public String validateOutputFilePrefixSupported(String filePrefix) {
-    return verifyGcsPath(filePrefix);
+    return verifyPath(filePrefix);
   }
 
-  /**
-   * Verifies that a path can be used by the Dataflow Service API.
-   * @return the supplied path
-   */
   @Override
-  public String verifyGcsPath(String path) {
+  public String verifyPath(String path) {
     GcsPath gcsPath = GcsPath.fromUri(path);
     Preconditions.checkArgument(gcsPath.isAbsolute(),
         "Must provide absolute paths for Dataflow");
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
index b8d2642c89860..d25318f762441 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
@@ -34,6 +34,7 @@
 import java.io.IOException;
 import java.nio.channels.SeekableByteChannel;
 import java.nio.channels.WritableByteChannel;
+import java.nio.file.NoSuchFileException;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.concurrent.ExecutorService;
@@ -175,7 +176,8 @@ public List<GcsPath> expand(GcsPath gcsPattern) throws IOException {
   }
 
   /**
-   * Returns the file size from GCS, or -1 if the file does not exist.
+   * Returns the file size from GCS or throws {@link NoSuchFileException}
+   * if the resource does not exist.
    */
   public long fileSize(GcsPath path) throws IOException {
     try {
@@ -186,7 +188,7 @@ public long fileSize(GcsPath path) throws IOException {
       return object.getSize().longValue();
     } catch (IOException e) {
       if (errorExtractor.itemNotFound(e)) {
-        return -1;
+        throw new NoSuchFileException(path.toString());
       }
 
       // Re-throw any other error.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
index bc67fe10f4b48..ef9b0687e58f4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
@@ -19,6 +19,7 @@
 import java.io.IOException;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.channels.WritableByteChannel;
+import java.nio.file.NoSuchFileException;
 import java.util.Collection;
 
 /**
@@ -64,6 +65,8 @@ public interface IOChannelFactory {
    * Returns the size in bytes for the given specification.
    *
    * <p>The specification is not expanded; it is used verbatim.
+   *
+   * <p>{@link NoSuchFileException} will be thrown if the resource does not exist.
    */
   long getSizeBytes(String spec) throws IOException;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
index 2e5042e8ce531..e3a4c40c9785f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
@@ -21,6 +21,7 @@
 
 import java.io.IOException;
 import java.nio.channels.WritableByteChannel;
+import java.nio.file.NoSuchFileException;
 import java.text.DecimalFormat;
 import java.util.Arrays;
 import java.util.Collections;
@@ -109,6 +110,8 @@ public static WritableByteChannel create(String prefix, String shardTemplate,
    * Returns the size in bytes for the given specification.
    *
    * <p>The specification is not expanded; it is used verbatim.
+   *
+   * <p>{@link NoSuchFileException} will be thrown if the resource does not exist.
    */
   public static long getSizeBytes(String spec) throws IOException {
     return getFactory(spec).getSizeBytes(spec);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopPathValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopPathValidator.java
index 043f8d076f78a..00abbb146ff74 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopPathValidator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopPathValidator.java
@@ -26,15 +26,11 @@ public class NoopPathValidator implements PathValidator {
   private NoopPathValidator() {
   }
 
-  public static PathValidator fromOptions(PipelineOptions options) {
+  public static PathValidator fromOptions(
+      @SuppressWarnings("unused") PipelineOptions options) {
     return new NoopPathValidator();
   }
 
-  @Override
-  public void validateAndUpdateOptions() {
-    return;
-  }
-
   @Override
   public String validateInputFilePatternSupported(String filepattern) {
     return filepattern;
@@ -46,7 +42,7 @@ public String validateOutputFilePrefixSupported(String filePrefix) {
   }
 
   @Override
-  public String verifyGcsPath(String path) {
+  public String verifyPath(String path) {
     return path;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopStager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopStager.java
deleted file mode 100644
index 306a9d6606bcc..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopStager.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.services.dataflow.model.DataflowPackage;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-
-import java.io.File;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Do-nothing stager class. stageFiles() returns a package list for all files in
- * options.getFilesToStage in their original locations.
- */
-class NoopStager implements Stager {
-  private DataflowPipelineOptions options;
-
-  private NoopStager(DataflowPipelineOptions options) {
-      this.options = options;
-  }
-
-  public static NoopStager fromOptions(PipelineOptions options) {
-    return new NoopStager(options.as(DataflowPipelineOptions.class));
-  }
-
-  @Override
-  public List<DataflowPackage> stageFiles() {
-    ArrayList<DataflowPackage> packages = new ArrayList<>();
-    for (String fileName : options.getFilesToStage()) {
-      String packageName = null;
-      if (fileName.contains("=")) {
-        String[] components = fileName.split("=", 2);
-        packageName = components[0];
-        fileName = components[1];
-      }
-      if (packageName == null) {
-        packageName = PackageUtil.getUniqueContentName(new File(fileName), "");
-      }
-
-      DataflowPackage workflowPackage = new DataflowPackage();
-      workflowPackage.setName(packageName);
-      workflowPackage.setLocation(fileName);
-      packages.add(workflowPackage);
-    }
-    return packages;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
index d908c8336ace9..26e88a3195d6b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
@@ -41,6 +41,7 @@
 import java.nio.channels.Channels;
 import java.nio.channels.WritableByteChannel;
 import java.nio.charset.StandardCharsets;
+import java.nio.file.NoSuchFileException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
@@ -69,14 +70,13 @@ public class PackageUtil {
    *                            instead of generating one automatically.
    * @return The package.
    */
-  public static DataflowPackage createPackage(String classpathElement,
+  public static DataflowPackage createPackage(File classpathElement,
       String stagingPath, String overridePackageName) {
     try {
-      File file = new File(classpathElement);
-      String contentHash = computeContentHash(file);
+      String contentHash = computeContentHash(classpathElement);
 
       // Drop the directory prefixes, and form the filename + hash + extension.
-      String uniqueName = getUniqueContentName(file, contentHash);
+      String uniqueName = getUniqueContentName(classpathElement, contentHash);
 
       String resourcePath = IOChannelUtils.resolve(stagingPath, uniqueName);
 
@@ -124,8 +124,15 @@ static List<DataflowPackage> stageClasspathElements(
         classpathElement = components[1];
       }
 
+      File file = new File(classpathElement);
+      if (!file.exists()) {
+        LOG.warn("Skipping non-existent classpath element {} that was specified.",
+            classpathElement);
+        continue;
+      }
+
       DataflowPackage workflowPackage = createPackage(
-          classpathElement, stagingPath, packageName);
+          file, stagingPath, packageName);
 
       packages.add(workflowPackage);
       String target = workflowPackage.getLocation();
@@ -133,12 +140,16 @@ static List<DataflowPackage> stageClasspathElements(
       // TODO: Should we attempt to detect the Mime type rather than
       // always using MimeTypes.BINARY?
       try {
-        long remoteLength = IOChannelUtils.getSizeBytes(target);
-        if (remoteLength >= 0 && remoteLength == getClasspathElementLength(classpathElement)) {
-          LOG.debug("Skipping classpath element already staged: {} at {}",
-              classpathElement, target);
-          numCached++;
-          continue;
+        try {
+          long remoteLength = IOChannelUtils.getSizeBytes(target);
+          if (remoteLength == getClasspathElementLength(classpathElement)) {
+            LOG.debug("Skipping classpath element already staged: {} at {}",
+                classpathElement, target);
+            numCached++;
+            continue;
+          }
+        } catch (NoSuchFileException expected) {
+          // If the file doesn't exist, it means we need to upload it.
         }
 
         // Upload file, retrying on failure.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PathValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PathValidator.java
index eec6395376cdf..20bddcba2b14a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PathValidator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PathValidator.java
@@ -16,18 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-
 /**
  * Interface for controlling validation of paths.
  */
 public interface PathValidator {
-  /**
-   * Validates paths in the current {@link PipelineOptions} object. May modify the
-   * options object.
-   */
-  public void validateAndUpdateOptions();
-
   /**
    * Validate that a file pattern is conforming.
    *
@@ -45,10 +37,10 @@ public interface PathValidator {
   public String validateOutputFilePrefixSupported(String filePrefix);
 
   /**
-   * Validate that a GCS path is conforming.
+   * Validate that a path is conforming.
    *
-   * @param path The GCS path to verify.
+   * @param path The path to verify.
    * @return The post-validation path.
    */
-  public String verifyGcsPath(String path);
+  public String verifyPath(String path);
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 419279a7377ef..d97f212ab4afb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -143,7 +143,7 @@ private DataflowPipelineOptions buildPipelineOptions(
       ArgumentCaptor<Job> jobCaptor) throws IOException {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setProject("someProject");
-    options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString());
+    options.setTempLocation("gs://somebucket/some/path");
     // Set FILES_PROPERTY to empty to prevent a default value calculated from classpath.
     options.setFilesToStage(new LinkedList<String>());
     options.setDataflowClient(buildMockDataflow(jobCaptor));
@@ -194,10 +194,8 @@ public void testRunWithFiles() throws IOException {
     // Test that the function DataflowPipelineRunner.stageFiles works as
     // expected.
     GcsUtil mockGcsUtil = buildMockGcsUtil();
-    final GcsPath gcsStaging =
-        GcsPath.fromComponents("somebucket", "some/path");
-    final GcsPath gcsTemp =
-        GcsPath.fromComponents("somebucket", "some/temp/path");
+    final String gcsStaging = "gs://somebucket/some/path";
+    final String gcsTemp = "gs://somebucket/some/temp/path";
     final String cloudDataflowDataset = "somedataset";
 
     // Create some temporary files.
@@ -207,19 +205,19 @@ public void testRunWithFiles() throws IOException {
     temp2.deleteOnExit();
 
     DataflowPackage expectedPackage1 = PackageUtil.createPackage(
-        temp1.getAbsolutePath(), gcsStaging.toString(), null);
+        temp1, gcsStaging, null);
 
     String overridePackageName = "alias.txt";
     DataflowPackage expectedPackage2 = PackageUtil.createPackage(
-        temp2.getAbsolutePath(), gcsStaging.toString(), overridePackageName);
+        temp2, gcsStaging, overridePackageName);
 
     ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setFilesToStage(ImmutableList.of(
         temp1.getAbsolutePath(),
         overridePackageName + "=" + temp2.getAbsolutePath()));
-    options.setStagingLocation(gcsStaging.toString());
-    options.setTempLocation(gcsTemp.toString());
+    options.setStagingLocation(gcsStaging);
+    options.setTempLocation(gcsTemp);
     options.setTempDatasetId(cloudDataflowDataset);
     options.setProject("someProject");
     options.setJobName("job");
@@ -248,7 +246,7 @@ public void testRunWithFiles() throws IOException {
     assertEquals(expectedPackage2.getLocation(), workflowPackage2.getLocation());
 
     assertEquals(
-        gcsTemp.toResourceName(),
+        "storage.googleapis.com/somebucket/some/temp/path",
         workflowJob.getEnvironment().getTempStoragePrefix());
     assertEquals(
         cloudDataflowDataset,
@@ -308,12 +306,11 @@ public void detectClassPathResourceWithNonFileResources() throws Exception {
   @Test
   public void testGcsStagingLocationInitialization() {
     // Test that the staging location is initialized correctly.
-    GcsPath gcsTemp = GcsPath.fromComponents("somebucket",
-        "some/temp/path");
+    String gcsTemp = "gs://somebucket/some/temp/path";
 
     // Set temp location (required), and check that staging location is set.
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
-    options.setTempLocation(gcsTemp.toString());
+    options.setTempLocation(gcsTemp);
     options.setProject("testProject");
     options.setGcpCredential(new TestCredential());
     DataflowPipelineRunner.fromOptions(options);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java
index c7b5d711683c0..675fef452b056 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java
@@ -37,6 +37,7 @@
 import java.io.Writer;
 import java.nio.channels.Channels;
 import java.nio.charset.StandardCharsets;
+import java.nio.file.NoSuchFileException;
 import java.nio.file.Path;
 import java.util.List;
 
@@ -145,4 +146,18 @@ public void testResolveOtherIsEmptyPath() throws Exception {
     String expected = temporaryFolder.getRoot().getPath().toString();
     assertEquals(expected, factory.resolve(expected, ""));
   }
+
+  @Test
+  public void testGetSizeBytes() throws Exception {
+    String data = "TestData!!!";
+    File file = temporaryFolder.newFile();
+    Files.write(data, file, StandardCharsets.UTF_8);
+    assertEquals(data.length(), factory.getSizeBytes(file.getPath()));
+  }
+
+  @Test(expected = NoSuchFileException.class)
+  public void testGetSizeBytesForNonExistentFile() throws Exception {
+    factory.getSizeBytes(
+        factory.resolve(temporaryFolder.getRoot().getPath(), "non-existent-file"));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
index b151da60fb015..ea8b9c1b8645b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
@@ -24,7 +24,9 @@
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
-import com.google.api.client.auth.oauth2.Credential;
+import com.google.api.client.http.HttpStatusCodes;
+import com.google.api.client.testing.http.MockHttpTransport;
+import com.google.api.client.testing.http.MockLowLevelHttpResponse;
 import com.google.api.client.util.Throwables;
 import com.google.api.services.storage.Storage;
 import com.google.api.services.storage.model.Objects;
@@ -43,7 +45,9 @@
 import org.mockito.Mockito;
 
 import java.io.IOException;
+import java.math.BigInteger;
 import java.nio.channels.SeekableByteChannel;
+import java.nio.file.NoSuchFileException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.CountDownLatch;
@@ -67,14 +71,14 @@ public void testGlobTranslation() {
   @Test
   public void testCreationWithDefaultOptions() {
     GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
-    pipelineOptions.setGcpCredential(Mockito.mock(Credential.class));
+    pipelineOptions.setGcpCredential(new TestCredential());
     assertNotNull(pipelineOptions.getGcpCredential());
   }
 
   @Test
   public void testCreationWithExecutorServiceProvided() {
     GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
-    pipelineOptions.setGcpCredential(Mockito.mock(Credential.class));
+    pipelineOptions.setGcpCredential(new TestCredential());
     pipelineOptions.setExecutorService(Executors.newCachedThreadPool());
     assertSame(pipelineOptions.getExecutorService(), pipelineOptions.getGcsUtil().executorService);
   }
@@ -123,7 +127,7 @@ public void run() {
   @Test
   public void testGlobExpansion() throws IOException {
     GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
-    pipelineOptions.setGcpCredential(Mockito.mock(Credential.class));
+    pipelineOptions.setGcpCredential(new TestCredential());
     GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
 
     Storage mockStorage = Mockito.mock(Storage.class);
@@ -205,7 +209,7 @@ public void testGlobExpansion() throws IOException {
   @Test
   public void testRecursiveGlobExpansionFails() throws IOException {
     GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
-    pipelineOptions.setGcpCredential(Mockito.mock(Credential.class));
+    pipelineOptions.setGcpCredential(new TestCredential());
     GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
     GcsPath pattern = GcsPath.fromUri("gs://testbucket/test**");
 
@@ -219,7 +223,7 @@ public void testRecursiveGlobExpansionFails() throws IOException {
   @Test
   public void testNonExistent() throws IOException {
     GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
-    pipelineOptions.setGcpCredential(Mockito.mock(Credential.class));
+    pipelineOptions.setGcpCredential(new TestCredential());
     GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
 
     Storage mockStorage = Mockito.mock(Storage.class);
@@ -256,6 +260,44 @@ public void testNonExistent() throws IOException {
     }
   }
 
+  @Test
+  public void testGetSizeBytes() throws Exception {
+    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
+    pipelineOptions.setGcpCredential(new TestCredential());
+    GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
+
+    Storage mockStorage = Mockito.mock(Storage.class);
+    gcsUtil.setStorageClient(mockStorage);
+
+    Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
+    Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);
+
+    when(mockStorage.objects()).thenReturn(mockStorageObjects);
+    when(mockStorageObjects.get("testbucket", "testobject")).thenReturn(mockStorageGet);
+    when(mockStorageGet.execute()).thenReturn(
+        new StorageObject().setSize(BigInteger.valueOf(1000)));
+
+    assertEquals(1000, gcsUtil.fileSize(GcsPath.fromComponents("testbucket", "testobject")));
+  }
+
+  @Test(expected = NoSuchFileException.class)
+  public void testGetSizeBytesWhenFileNotFound() throws Exception {
+    MockLowLevelHttpResponse notFoundResponse = new MockLowLevelHttpResponse();
+    notFoundResponse.setContent("");
+    notFoundResponse.setStatusCode(HttpStatusCodes.STATUS_CODE_NOT_FOUND);
+
+    MockHttpTransport mockTransport =
+        new MockHttpTransport.Builder().setLowLevelHttpResponse(notFoundResponse).build();
+
+    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
+    pipelineOptions.setGcpCredential(new TestCredential());
+    GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
+
+    gcsUtil.setStorageClient(new Storage(mockTransport, Transport.getJsonFactory(), null));
+
+    gcsUtil.fileSize(GcsPath.fromComponents("testbucket", "testobject"));
+  }
+
   @Test
   public void testGCSChannelCloseIdempotent() throws IOException {
     SeekableByteChannel channel =
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
index 1329ccd2de206..1f5e182f2a618 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
@@ -52,7 +52,9 @@
 import java.nio.channels.Channels;
 import java.nio.channels.Pipe;
 import java.nio.charset.StandardCharsets;
+import java.nio.file.NoSuchFileException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;
@@ -86,7 +88,7 @@ public void testPackageNamingWithFileHavingExtension() throws Exception {
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
 
     DataflowPackage target =
-        PackageUtil.createPackage(tmpFile.getAbsolutePath(), gcsStaging.toString(), null);
+        PackageUtil.createPackage(tmpFile, gcsStaging.toString(), null);
 
     assertEquals("file-SAzzqSB2zmoIgNHC9A2G0A.txt", target.getName());
     assertEquals("gs://somebucket/base/path/file-SAzzqSB2zmoIgNHC9A2G0A.txt",
@@ -100,7 +102,7 @@ public void testPackageNamingWithFileMissingExtension() throws Exception {
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
 
     DataflowPackage target =
-        PackageUtil.createPackage(tmpFile.getAbsolutePath(), gcsStaging.toString(), null);
+        PackageUtil.createPackage(tmpFile, gcsStaging.toString(), null);
 
     assertEquals("file-SAzzqSB2zmoIgNHC9A2G0A", target.getName());
     assertEquals("gs://somebucket/base/path/file-SAzzqSB2zmoIgNHC9A2G0A",
@@ -115,7 +117,7 @@ public void testPackageNamingWithDirectory() throws Exception {
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
 
     DataflowPackage target =
-        PackageUtil.createPackage(tmpDirectory.getAbsolutePath(), gcsStaging.toString(), null);
+        PackageUtil.createPackage(tmpDirectory, gcsStaging.toString(), null);
 
     assertEquals("folder-9MHI5fxducQ06t3IG9MC-g.zip", target.getName());
     assertEquals("gs://somebucket/base/path/folder-9MHI5fxducQ06t3IG9MC-g.zip",
@@ -137,9 +139,9 @@ public void testPackageNamingWithFilesHavingSameContentsButDifferentNames() thro
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
 
     DataflowPackage target1 =
-        PackageUtil.createPackage(tmpDirectory1.getAbsolutePath(), gcsStaging.toString(), null);
+        PackageUtil.createPackage(tmpDirectory1, gcsStaging.toString(), null);
     DataflowPackage target2 =
-        PackageUtil.createPackage(tmpDirectory2.getAbsolutePath(), gcsStaging.toString(), null);
+        PackageUtil.createPackage(tmpDirectory2, gcsStaging.toString(), null);
 
     assertFalse(target1.getName().equals(target2.getName()));
     assertFalse(target1.getLocation().equals(target2.getLocation()));
@@ -159,9 +161,9 @@ public void testPackageNamingWithDirectoriesHavingSameContentsButDifferentNames(
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
 
     DataflowPackage target1 =
-        PackageUtil.createPackage(tmpDirectory1.getAbsolutePath(), gcsStaging.toString(), null);
+        PackageUtil.createPackage(tmpDirectory1, gcsStaging.toString(), null);
     DataflowPackage target2 =
-        PackageUtil.createPackage(tmpDirectory2.getAbsolutePath(), gcsStaging.toString(), null);
+        PackageUtil.createPackage(tmpDirectory2, gcsStaging.toString(), null);
 
     assertFalse(target1.getName().equals(target2.getName()));
     assertFalse(target1.getLocation().equals(target2.getLocation()));
@@ -173,7 +175,8 @@ public void testPackageUploadWithFileSucceeds() throws Exception {
     File tmpFile = tmpFolder.newFile("file.txt");
     Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
-    when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(-1L);
+    when(mockGcsUtil.fileSize(any(GcsPath.class)))
+        .thenThrow(new NoSuchFileException("some/path"));
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
@@ -203,7 +206,8 @@ public void testPackageUploadWithDirectorySucceeds() throws Exception {
     Files.write("This is also a test!", tmpFile2, StandardCharsets.UTF_8);
 
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
-    when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(-1L);
+    when(mockGcsUtil.fileSize(any(GcsPath.class)))
+        .thenThrow(new NoSuchFileException("some/path"));
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     PackageUtil.stageClasspathElements(
@@ -230,7 +234,8 @@ public void testPackageUploadWithEmptyDirectorySucceeds() throws Exception {
     File tmpDirectory = tmpFolder.newFolder("folder");
 
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
-    when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(-1L);
+    when(mockGcsUtil.fileSize(any(GcsPath.class)))
+        .thenThrow(new NoSuchFileException("some/path"));
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
@@ -252,7 +257,8 @@ public void testPackageUploadFailsWhenIOExceptionThrown() throws Exception {
     File tmpFile = tmpFolder.newFile("file.txt");
     Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
-    when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(-1L);
+    when(mockGcsUtil.fileSize(any(GcsPath.class)))
+        .thenThrow(new NoSuchFileException("some/path"));
     when(mockGcsUtil.create(any(GcsPath.class), anyString()))
         .thenThrow(new IOException("Fake Exception: Upload error"));
 
@@ -273,7 +279,8 @@ public void testPackageUploadEventuallySucceeds() throws Exception {
     File tmpFile = tmpFolder.newFile("file.txt");
     Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
-    when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(-1L);
+    when(mockGcsUtil.fileSize(any(GcsPath.class)))
+        .thenThrow(new NoSuchFileException("some/path"));
     when(mockGcsUtil.create(any(GcsPath.class), anyString()))
         .thenThrow(new IOException("Fake Exception: 410 Gone")) // First attempt fails
         .thenReturn(pipe.sink());                               // second attempt succeeds
@@ -334,7 +341,8 @@ public void testPackageUploadWithExplicitPackageName() throws Exception {
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
     final String overriddenName = "alias.txt";
 
-    when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(-1L);
+    when(mockGcsUtil.fileSize(any(GcsPath.class)))
+        .thenThrow(new NoSuchFileException("some/path"));
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
@@ -350,4 +358,12 @@ public void testPackageUploadWithExplicitPackageName() throws Exception {
         target.getLocation());
   }
 
+  @Test
+  public void testPackageUploadIsSkippedWithNonExistentResource() throws Exception {
+    String nonExistentFile =
+        IOChannelUtils.resolve(tmpFolder.getRoot().getPath(), "non-existent-file");
+    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    assertEquals(Collections.EMPTY_LIST, PackageUtil.stageClasspathElements(
+        ImmutableList.of(nonExistentFile), gcsStaging.toString()));
+  }
 }

From 395cbb49e04a6b2a6ee64eb29d7f8d26f7ec2224 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 15 May 2015 13:17:10 -0700
Subject: [PATCH 0552/1541] Remove unused method that leaked a dependency into
 the API surface.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93743152
---
 .../com/google/cloud/dataflow/sdk/util/Serializer.java    | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java
index 4cc915a374eef..dbf3fbb8580d7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java
@@ -18,7 +18,6 @@
 
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.DeserializationFeature;
-import com.fasterxml.jackson.databind.Module;
 import com.fasterxml.jackson.databind.ObjectMapper;
 
 import java.util.ArrayList;
@@ -72,13 +71,6 @@ private static ObjectMapper createObjectMapper() {
     }
   }
 
-  /**
-   * Registers a module to use during object deserialization.
-   */
-  public static void registerModule(Module module) {
-    SingletonHelper.OBJECT_MAPPER.registerModule(module);
-  }
-
   /**
    * Deserializes an object from a Dataflow structured encoding (represented in
    * Java as a map).

From 7a479f6c42003f5bfa0ef2006d620f5088c0dd4b Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 15 May 2015 13:35:53 -0700
Subject: [PATCH 0553/1541] Use application default credentials from gcloud
 core component.

----Release Notes----
Users will need to update to the latest version of gcloud by running 'gcloud components update'. See https://cloud.google.com/sdk/gcloud/ for more details.
See https://developers.google.com/accounts/docs/application-default-credentials for more details on how credentials can be specified.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93744723
---
 .../dataflow/sdk/options/GcpOptions.java      |   6 -
 .../cloud/dataflow/sdk/util/Credentials.java  |  63 +++-------
 .../dataflow/sdk/util/GCloudCredential.java   | 113 ------------------
 3 files changed, 18 insertions(+), 164 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
index 5494399917bd0..7fad0b42d3236 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
@@ -96,12 +96,6 @@ public interface GcpOptions extends GoogleApiDebugOptions, PipelineOptions {
   String getServiceAccountName();
   void setServiceAccountName(String value);
 
-  @JsonIgnore
-  @Description("The path to the gcloud binary. "
-      + " Default is to search the system path.")
-  String getGCloudPath();
-  void setGCloudPath(String value);
-
   /**
    * This option controls which file to use when attempting to create the credentials
    * using the OAuth 2 webflow. After the OAuth2 webflow, the credentials will be stored
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
index cf1d5a61d71fe..7c2e3e9c5402f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
@@ -28,7 +28,6 @@
 import com.google.api.client.json.JsonFactory;
 import com.google.api.client.json.jackson2.JacksonFactory;
 import com.google.api.client.util.Preconditions;
-import com.google.api.client.util.Strings;
 import com.google.api.client.util.store.FileDataStoreFactory;
 import com.google.cloud.dataflow.sdk.options.GcpOptions;
 
@@ -50,14 +49,15 @@ public class Credentials {
 
   private static final Logger LOG = LoggerFactory.getLogger(Credentials.class);
 
-  /** OAuth 2.0 scopes used by a local worker (not on GCE).
-   *  The scope cloud-platform provides access to all Cloud Platform resources.
-   *  cloud-platform isn't sufficient yet for talking to datastore so we request
-   *  those resources separately.
-   *
-   *  Note that trusted scope relationships don't apply to OAuth tokens, so for
-   *  services we access directly (GCS) as opposed to through the backend
-   *  (BigQuery, GCE), we need to explicitly request that scope.
+  /**
+   * OAuth 2.0 scopes used by a local worker (not on GCE).
+   * The scope cloud-platform provides access to all Cloud Platform resources.
+   * cloud-platform isn't sufficient yet for talking to datastore so we request
+   * those resources separately.
+   * <p>
+   * Note that trusted scope relationships don't apply to OAuth tokens, so for
+   * services we access directly (GCS) as opposed to through the backend
+   * (BigQuery, GCE), we need to explicitly request that scope.
    */
   private static final List<String> SCOPES = Arrays.asList(
       "https://www.googleapis.com/auth/cloud-platform",
@@ -74,8 +74,8 @@ public String getRedirectUri() {
 
   /**
    * Initializes OAuth2 credentials.
-   *
-   * <p> This can use 4 different mechanisms for obtaining a credential:
+   * <p>
+   * This can use 3 different mechanisms for obtaining a credential:
    * <ol>
    *   <li>
    *     It can fetch the
@@ -83,11 +83,6 @@ public String getRedirectUri() {
    *     application default credentials</a>.
    *   </li>
    *   <li>
-   *     It can run the gcloud tool in a subprocess to obtain a credential.
-   *     This is the preferred mechanism.  The property "gcloud_path" can be
-   *     used to specify where we search for gcloud data.
-   *   </li>
-   *   <li>
    *     The user can specify a client secrets file and go through the OAuth2
    *     webflow. The credential will then be cached in the user's home
    *     directory for reuse. Provide the property "secrets_file" to use this
@@ -101,8 +96,8 @@ public String getRedirectUri() {
    * </ol>
    * The default mechanism is to use the
    * <a href="https://developers.google.com/accounts/docs/application-default-credentials">
-   * application default credentials</a> falling back to gcloud. The other options can be
-   * used by providing the corresponding properties.
+   * application default credentials</a>. The other options can be used by providing the
+   * corresponding properties.
    */
   public static Credential getCredential(GcpOptions options)
       throws IOException, GeneralSecurityException {
@@ -124,11 +119,12 @@ public static Credential getCredential(GcpOptions options)
     try {
       return GoogleCredential.getApplicationDefault().createScoped(SCOPES);
     } catch (IOException e) {
-      LOG.debug("Failed to get application default credentials, falling back to gcloud.");
+      throw new RuntimeException("Unable to get application default credentials. Please see "
+          + "https://developers.google.com/accounts/docs/application-default-credentials "
+          + "for details on how to specify credentials. This version of the SDK is "
+          + "dependent on the gcloud core component version 2015.02.05 or newer to "
+          + "be able to get credentials from the currently authorized user via gcloud auth.", e);
     }
-
-    String gcloudPath = options.getGCloudPath();
-    return getCredentialFromGCloud(gcloudPath);
   }
 
   /**
@@ -149,29 +145,6 @@ private static Credential getCredentialFromFile(
     return credential;
   }
 
-  /**
-   * Loads OAuth2 credential from GCloud utility.
-   */
-  private static Credential getCredentialFromGCloud(String gcloudPath)
-      throws IOException, GeneralSecurityException {
-    GCloudCredential credential;
-    HttpTransport transport = GoogleNetHttpTransport.newTrustedTransport();
-    if (Strings.isNullOrEmpty(gcloudPath)) {
-      credential = new GCloudCredential(transport);
-    } else {
-      credential = new GCloudCredential(gcloudPath, transport);
-    }
-
-    try {
-      credential.refreshToken();
-    } catch (IOException e) {
-      throw new RuntimeException("Could not obtain credential using gcloud", e);
-    }
-
-    LOG.info("Got user credential from GCloud");
-    return credential;
-  }
-
   /**
    * Loads OAuth2 credential from client secrets, which may require an
    * interactive authorization prompt.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java
deleted file mode 100644
index dc1623ae1dde9..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GCloudCredential.java
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.auth.oauth2.BearerToken;
-import com.google.api.client.auth.oauth2.Credential;
-import com.google.api.client.auth.oauth2.TokenResponse;
-import com.google.api.client.http.HttpTransport;
-import com.google.api.client.util.IOUtils;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Arrays;
-
-/**
- * A credential object that uses the GCloud command line tool to get
- * an access token.
- */
-public class GCloudCredential extends Credential {
-  private static final String DEFAULT_GCLOUD_BINARY = "gcloud";
-  private final String binary;
-
-  public GCloudCredential(HttpTransport transport) {
-    this(DEFAULT_GCLOUD_BINARY, transport);
-  }
-
-  /**
-   * Path to the GCloud binary.
-   */
-  public GCloudCredential(String binary, HttpTransport transport) {
-    super(new Builder(BearerToken.authorizationHeaderAccessMethod())
-        .setTransport(transport));
-
-    this.binary = binary;
-  }
-
-  private String readStream(InputStream stream) throws IOException {
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    IOUtils.copy(stream, baos);
-    return baos.toString("UTF-8");
-  }
-
-  @Override
-  protected TokenResponse executeRefreshToken() throws IOException {
-    TokenResponse response = new TokenResponse();
-
-    ProcessBuilder builder = new ProcessBuilder();
-    // ProcessBuilder will search the path automatically for the binary
-    // GCLOUD_BINARY.
-    builder.command(Arrays.asList(binary, "auth", "print-access-token"));
-    Process process = builder.start();
-
-    try {
-      process.waitFor();
-    } catch (InterruptedException e) {
-      throw new RuntimeException(
-          "Could not obtain an access token using gcloud; timed out waiting " +
-          "for gcloud.");
-    }
-
-    if (process.exitValue() != 0) {
-      String output;
-      try {
-        output = readStream(process.getErrorStream());
-      } catch (IOException e) {
-        throw new RuntimeException(
-            "Could not obtain an access token using gcloud.");
-      }
-
-      throw new RuntimeException(
-          "Could not obtain an access token using gcloud. Result of " +
-          "invoking gcloud was:\n" + output);
-    }
-
-    String output;
-    try {
-      output = readStream(process.getInputStream());
-    } catch (IOException e) {
-      throw new RuntimeException(
-          "Could not obtain an access token using gcloud. We encountered an " +
-          "an error trying to read stdout.", e);
-    }
-    String[] lines = output.split("\n");
-
-    if (lines.length != 1) {
-      throw new RuntimeException(
-          "Could not obtain an access token using gcloud. Result of " +
-          "invoking gcloud was:\n" + output);
-    }
-
-    // Access token should be good for 5 minutes.
-    Long expiresInSeconds = 5L * 60;
-    response.setExpiresInSeconds(expiresInSeconds);
-    response.setAccessToken(output.trim());
-
-    return response;
-  }
-}

From 2f12369f602d6000305357ef2ab6d4b2bf676283 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 15 May 2015 16:35:30 -0700
Subject: [PATCH 0554/1541] Expose input watermark in
 StreamingModeExecutionContext

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93760563
---
 sdk/pom.xml                                   |  2 +-
 .../worker/StreamingDataflowWorker.java       | 23 ++++++++++++-------
 .../util/StreamingModeExecutionContext.java   | 14 +++++++++--
 3 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 7a8cfa43fdf5c..50436848737ef 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -289,7 +289,7 @@
     <dependency>
       <groupId>com.google.cloud.dataflow</groupId>
       <artifactId>google-cloud-dataflow-java-proto-library-all</artifactId>
-      <version>0.3.150406</version>
+      <version>0.4.150515</version>
     </dependency>
 
     <dependency>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 1aff27332e47d..e41e2fe47c54c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -39,6 +39,7 @@
 import org.eclipse.jetty.server.Request;
 import org.eclipse.jetty.server.Server;
 import org.eclipse.jetty.server.handler.AbstractHandler;
+import org.joda.time.Instant;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -294,15 +295,19 @@ private void dispatchLoop() {
         backoff = Math.min(1000, backoff * 2);
       } while (running.get());
       for (final Windmill.ComputationWorkItems computationWork : workResponse.getWorkList()) {
+        final String computation = computationWork.getComputationId();
+        if (!instructionMap.containsKey(computation)) {
+          getConfig(computation);
+        }
+
+        long watermarkMicros = computationWork.getInputDataWatermark();
+        final Instant inputDataWatermark = new Instant(watermarkMicros / 1000);
+
         for (final Windmill.WorkItem work : computationWork.getWorkList()) {
-          final String computation = computationWork.getComputationId();
-          if (!instructionMap.containsKey(computation)) {
-            getConfig(computation);
-          }
           executor.execute(new Runnable() {
               @Override
               public void run() {
-                process(computation, work);
+                process(computation, inputDataWatermark, work);
               }
             });
         }
@@ -312,7 +317,9 @@ public void run() {
   }
 
   private void process(
-      final String computation, final Windmill.WorkItem work) {
+      final String computation,
+      final Instant inputDataWatermark,
+      final Windmill.WorkItem work) {
     LOG.debug("Starting processing for {}:\n{}", computation, work);
 
     MapTask mapTask = instructionMap.get(computation);
@@ -348,7 +355,7 @@ private void process(
         context = workerAndContext.getContext();
       }
 
-      context.start(work, outputBuilder);
+      context.start(work, inputDataWatermark, outputBuilder);
 
       // Blocks while executing work.
       worker.execute();
@@ -391,7 +398,7 @@ private void process(
           executor.forceExecute(new Runnable() {
               @Override
               public void run() {
-                process(computation, work);
+                process(computation, inputDataWatermark, work);
               }
             });
         } else {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index eee0995e21662..7f28eaaa72e40 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -44,6 +44,7 @@
  */
 public class StreamingModeExecutionContext extends ExecutionContext {
   private String computation;
+  private Instant inputDataWatermark;
   private Windmill.WorkItem work;
   private StateFetcher stateFetcher;
   private Windmill.WorkItemCommitRequest.Builder outputBuilder;
@@ -52,12 +53,17 @@ public class StreamingModeExecutionContext extends ExecutionContext {
   public StreamingModeExecutionContext(String computation, StateFetcher stateFetcher) {
     this.computation = computation;
     this.stateFetcher = stateFetcher;
+    this.sideInputCache = new HashMap<>();
   }
 
-  public void start(Windmill.WorkItem work, Windmill.WorkItemCommitRequest.Builder outputBuilder) {
+  public void start(
+      Windmill.WorkItem work,
+      Instant inputDataWatermark,
+      Windmill.WorkItemCommitRequest.Builder outputBuilder) {
     this.work = work;
     this.outputBuilder = outputBuilder;
-    this.sideInputCache = new HashMap<>();
+    this.sideInputCache.clear();
+    this.inputDataWatermark = inputDataWatermark;
   }
 
   @Override
@@ -195,6 +201,10 @@ public void addBlockingSideInputs(Iterable<Windmill.GlobalDataRequest> sideInput
     }
   }
 
+  public Instant getInputDataWatermark() {
+    return inputDataWatermark;
+  }
+
   public ByteString getSerializedKey() {
     return work.getKey();
   }

From 4a4c23b1906c3d15191e17a076ec445f8ba7de76 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Fri, 15 May 2015 17:38:03 -0700
Subject: [PATCH 0555/1541] Allows a PTransform to be used more than once

Also gets rid of deprecated PTransform and Pipeline methods.
----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93765096
---
 .../google/cloud/dataflow/sdk/Pipeline.java   | 73 +++++++---------
 .../runners/DataflowPipelineTranslator.java   | 39 ++++++---
 .../sdk/runners/DirectPipelineRunner.java     | 10 +--
 .../sdk/runners/TransformHierarchy.java       | 10 ---
 .../sdk/runners/TransformTreeNode.java        |  4 -
 .../sdk/transforms/AppliedPTransform.java     | 66 +++++++++++----
 .../dataflow/sdk/transforms/PTransform.java   | 84 +++----------------
 .../cloud/dataflow/sdk/transforms/ParDo.java  |  8 ++
 .../dataflow/sdk/values/PCollectionList.java  |  3 +-
 .../dataflow/sdk/values/PCollectionTuple.java |  3 +-
 .../cloud/dataflow/sdk/values/POutput.java    |  8 +-
 .../dataflow/sdk/values/POutputValueBase.java | 15 ++--
 .../cloud/dataflow/sdk/values/PValue.java     |  9 +-
 .../cloud/dataflow/sdk/values/PValueBase.java | 14 ++--
 .../dataflow/sdk/values/TypedPValue.java      |  6 +-
 .../cloud/dataflow/sdk/PipelineTest.java      | 46 +++++++++-
 .../dataflow/sdk/transforms/CombineTest.java  |  8 +-
 .../dataflow/sdk/transforms/TopTest.java      |  2 +-
 18 files changed, 209 insertions(+), 199 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index 45554a5d6e64d..e625ddfbf2481 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.TransformHierarchy;
 import com.google.cloud.dataflow.sdk.runners.TransformTreeNode;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.UserCodeException;
 import com.google.cloud.dataflow.sdk.values.PBegin;
@@ -29,6 +30,9 @@
 import com.google.cloud.dataflow.sdk.values.POutput;
 import com.google.cloud.dataflow.sdk.values.PValue;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Multimap;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -230,6 +234,8 @@ OutputT applyTransform(InputT input,
   private Collection<PValue> values = new ArrayList<>();
   private Set<String> usedFullNames = new HashSet<>();
   private CoderRegistry coderRegistry;
+  private Multimap<PTransform<?, ?>, AppliedPTransform<?, ?, ?>> transformApplicationsForTesting =
+      HashMultimap.create();
 
   /**
    * @deprecated replaced by {@link #Pipeline(PipelineRunner, PipelineOptions)}
@@ -278,7 +284,6 @@ OutputT applyInternal(InputT input,
 
     transforms.addInput(child, input);
 
-    transform.setPipeline(this);
     LOG.debug("Adding {} to {}", transform, this);
     try {
       transforms.pushNode(child);
@@ -286,8 +291,11 @@ OutputT applyInternal(InputT input,
       OutputT output = runner.apply(transform, input);
       transforms.setOutput(child, output);
 
+      AppliedPTransform<?, ?, ?> applied = AppliedPTransform.of(
+          child.getFullName(), input, output, transform);
+      transformApplicationsForTesting.put(transform, applied);
       // recordAsOutput is a NOOP if already called;
-      output.recordAsOutput(child.getTransform());
+      output.recordAsOutput(applied);
       verifyOutputState(output, child);
       return output;
     } finally {
@@ -299,10 +307,10 @@ OutputT applyInternal(InputT input,
    * Returns all producing transforms for the {@link PValue PValues} contained
    * in {@code output}.
    */
-  private List<PTransform<?, ?>> getProducingTransforms(POutput output) {
-    List<PTransform<?, ?>> producingTransforms = new ArrayList<>();
+  private List<AppliedPTransform<?, ?, ?>> getProducingTransforms(POutput output) {
+    List<AppliedPTransform<?, ?, ?>> producingTransforms = new ArrayList<>();
     for (PValue value : output.expand()) {
-      PTransform<?, ?> transform = value.getProducingTransformInternal();
+      AppliedPTransform<?, ?, ?> transform = value.getProducingTransformInternal();
       if (transform != null) {
         producingTransforms.add(transform);
       }
@@ -325,12 +333,11 @@ OutputT applyInternal(InputT input,
   private void verifyOutputState(POutput output, TransformTreeNode node) {
     if (!node.isCompositeNode()) {
       PTransform<?, ?> thisTransform = node.getTransform();
-      List<PTransform<?, ?>> producingTransforms = getProducingTransforms(output);
-      for (PTransform<?, ?> producingTransform : producingTransforms) {
-
+      List<AppliedPTransform<?, ?, ?>> producingTransforms = getProducingTransforms(output);
+      for (AppliedPTransform<?, ?, ?> producingTransform : producingTransforms) {
         // Using != because object identity indicates that the transforms
         // are the same node in the pipeline
-        if (thisTransform != producingTransform) {
+        if (thisTransform != producingTransform.getTransform()) {
           throw new IllegalArgumentException("Output of non-composite transform "
               + thisTransform + " is registered as being produced by"
               + " a different transform: " + producingTransform);
@@ -338,12 +345,11 @@ private void verifyOutputState(POutput output, TransformTreeNode node) {
       }
     } else {
       PTransform<?, ?> thisTransform = node.getTransform();
-      List<PTransform<?, ?>> producingTransforms = getProducingTransforms(output);
-      for (PTransform<?, ?> producingTransform : producingTransforms) {
-
+      List<AppliedPTransform<?, ?, ?>> producingTransforms = getProducingTransforms(output);
+      for (AppliedPTransform<?, ?, ?> producingTransform : producingTransforms) {
         // Using == because object identity indicates that the transforms
         // are the same node in the pipeline
-        if (thisTransform == producingTransform) {
+        if (thisTransform == producingTransform.getTransform()) {
           throw new IllegalStateException("Output of composite transform "
               + thisTransform + " is registered as being produced by it,"
               + " but the output of every composite transform should be"
@@ -368,41 +374,18 @@ public PipelineOptions getOptions() {
   }
 
   /**
-   * Returns the output associated with a transform.
+   * Returns the fully qualified name of a transform for testing.
    *
-   * @throws IllegalStateException if the transform has not been applied to the pipeline.
+   * @throws IllegalStateException if the transform has not been applied to the pipeline
+   * or was applied multiple times.
    */
   @Deprecated
-  public POutput getOutput(PTransform<?, ?> transform) {
-    TransformTreeNode node = transforms.getNode(transform);
-    Preconditions.checkState(node != null,
-                             "Unknown transform: " + transform);
-    return node.getOutput();
-  }
-
-  /**
-   * Returns the input associated with a transform.
-   *
-   * @throws IllegalStateException if the transform has not been applied to the pipeline.
-   */
-  @Deprecated
-  public PInput getInput(PTransform<?, ?> transform) {
-    TransformTreeNode node = transforms.getNode(transform);
-    Preconditions.checkState(node != null,
-                             "Unknown transform: " + transform);
-    return node.getInput();
-  }
-
-  /**
-   * Returns the fully qualified name of a transform.
-   *
-   * @throws IllegalStateException if the transform has not been applied to the pipeline.
-   */
-  public String getFullName(PTransform<?, ?> transform) {
-    TransformTreeNode node = transforms.getNode(transform);
-    Preconditions.checkState(node != null,
-                             "Unknown transform: " + transform);
-    return node.getFullName();
+  public String getFullNameForTesting(PTransform<?, ?> transform) {
+    Collection<AppliedPTransform<?, ?, ?>> uses =
+        transformApplicationsForTesting.get(transform);
+    Preconditions.checkState(uses.size() > 0, "Unknown transform: " + transform);
+    Preconditions.checkState(uses.size() <= 1, "Transform used multiple times: " + transform);
+    return Iterables.getOnlyElement(uses).getFullName();
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index ad83c9bbada2a..4f1d9c8e84fa4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -213,6 +213,11 @@ public interface TranslationContext {
      */
     <OutputT extends POutput> OutputT getOutput(PTransform<?, OutputT> transform);
 
+    /**
+     * Returns the full name of the currently being translated transform.
+     */
+    String getFullName(PTransform<?, ?> transform);
+
     /**
      * Adds a step to the Dataflow workflow for the given transform, with
      * the given Dataflow step type.
@@ -323,9 +328,9 @@ class Translator implements PipelineVisitor, TranslationContext {
     private Step currentStep;
 
     /**
-     * A Map from PTransforms to their unique Dataflow step names.
+     * A Map from AppliedPTransform to their unique Dataflow step names.
      */
-    private final Map<PTransform, String> stepNames = new HashMap<>();
+    private final Map<AppliedPTransform, String> stepNames = new HashMap<>();
 
     /**
      * A Map from PValues to their output names used by their producer
@@ -448,16 +453,24 @@ public DataflowPipelineOptions getPipelineOptions() {
 
     @Override
     public <InputT extends PInput> InputT getInput(PTransform<InputT, ?> transform) {
-      checkArgument(currentTransform != null && currentTransform.transform == transform,
-          "can only be called with current transform");
-      return (InputT) currentTransform.input;
+      return (InputT) getCurrentTransform(transform).getInput();
     }
 
     @Override
     public <OutputT extends POutput> OutputT getOutput(PTransform<?, OutputT> transform) {
-      checkArgument(currentTransform != null && currentTransform.transform == transform,
+      return (OutputT) getCurrentTransform(transform).getOutput();
+    }
+
+    @Override
+    public String getFullName(PTransform<?, ?> transform) {
+      return getCurrentTransform(transform).getFullName();
+    }
+
+    private AppliedPTransform<?, ?, ?> getCurrentTransform(PTransform<?, ?> transform) {
+      checkArgument(
+          currentTransform != null && currentTransform.getTransform() == transform,
           "can only be called with current transform");
-      return (OutputT) currentTransform.output;
+      return currentTransform;
     }
 
     @Override
@@ -480,7 +493,7 @@ public void visitTransform(TransformTreeNode node) {
       }
       LOG.debug("Translating {}", transform);
       currentTransform = AppliedPTransform.of(
-          node.getInput(), node.getOutput(), (PTransform) transform);
+          node.getFullName(), node.getInput(), node.getOutput(), (PTransform) transform);
       translator.translate(transform, this);
       currentTransform = null;
     }
@@ -502,7 +515,7 @@ public void visitValue(PValue value, TransformTreeNode producer) {
     @Override
     public void addStep(PTransform<?, ?> transform, String type) {
       String stepName = genStepName();
-      if (stepNames.put(transform, stepName) != null) {
+      if (stepNames.put(getCurrentTransform(transform), stepName) != null) {
         throw new IllegalArgumentException(
             transform + " already has a name specified");
       }
@@ -517,14 +530,14 @@ public void addStep(PTransform<?, ?> transform, String type) {
       currentStep.setName(stepName);
       currentStep.setKind(type);
       steps.add(currentStep);
-      addInput(PropertyNames.USER_NAME, pipeline.getFullName(transform));
+      addInput(PropertyNames.USER_NAME, getFullName(transform));
     }
 
     @Override
     public void addStep(PTransform<?, ? extends PValue> transform, Step original) {
       Step step = original.clone();
       String stepName = step.getName();
-      if (stepNames.put(transform, stepName) != null) {
+      if (stepNames.put(getCurrentTransform(transform), stepName) != null) {
         throw new IllegalArgumentException(transform + " already has a name specified");
       }
 
@@ -549,7 +562,7 @@ public void addStep(PTransform<?, ? extends PValue> transform, Step original) {
             name = null;
           }
           if (name != null) {
-            registerOutputName(pipeline.getOutput(transform), name);
+            registerOutputName(getOutput(transform), name);
           }
         }
       }
@@ -696,7 +709,7 @@ private void addOutput(String name, PValue value, Coder<?> valueCoder) {
 
     @Override
     public OutputReference asOutputReference(PValue value) {
-      PTransform<?, ?> transform =
+      AppliedPTransform<?, ?, ?> transform =
           value.getProducingTransformInternal();
       String stepName = stepNames.get(transform);
       if (stepName == null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index db3acea0f0ee2..e468c4112b222 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -636,16 +636,16 @@ public DirectPipelineOptions getPipelineOptions() {
 
     @Override
     public <InputT extends PInput> InputT getInput(PTransform<InputT, ?> transform) {
-      checkArgument(currentTransform != null && currentTransform.transform == transform,
+      checkArgument(currentTransform != null && currentTransform.getTransform() == transform,
           "can only be called with current transform");
-      return (InputT) currentTransform.input;
+      return (InputT) currentTransform.getInput();
     }
 
     @Override
     public <OutputT extends POutput> OutputT getOutput(PTransform<?, OutputT> transform) {
-      checkArgument(currentTransform != null && currentTransform.transform == transform,
+      checkArgument(currentTransform != null && currentTransform.getTransform() == transform,
           "can only be called with current transform");
-      return (OutputT) currentTransform.output;
+      return (OutputT) currentTransform.getOutput();
     }
 
     @Override
@@ -668,7 +668,7 @@ public void visitTransform(TransformTreeNode node) {
       }
       LOG.debug("Evaluating {}", transform);
       currentTransform = AppliedPTransform.of(
-          node.getInput(), node.getOutput(), (PTransform) transform);
+          node.getFullName(), node.getInput(), node.getOutput(), (PTransform) transform);
       evaluator.evaluate(transform, this);
       currentTransform = null;
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java
index 1d7e2af642d36..037f9f51a78bf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java
@@ -17,7 +17,6 @@
 package com.google.cloud.dataflow.sdk.runners;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.cloud.dataflow.sdk.values.POutput;
 import com.google.cloud.dataflow.sdk.values.PValue;
@@ -36,7 +35,6 @@
 public class TransformHierarchy {
   private final Deque<TransformTreeNode> transformStack = new LinkedList<>();
   private final Map<PInput, TransformTreeNode> producingTransformNode = new HashMap<>();
-  private final Map<PTransform<?, ?>, TransformTreeNode> transformToNode = new HashMap<>();
 
   public TransformHierarchy() {
     // First element in the stack is the root node, holding all child nodes.
@@ -55,7 +53,6 @@ public TransformTreeNode getCurrent() {
    */
   public void pushNode(TransformTreeNode current) {
     transformStack.push(current);
-    transformToNode.put(current.getTransform(), current);
   }
 
   /**
@@ -94,13 +91,6 @@ public void setOutput(TransformTreeNode producer, POutput output) {
     }
   }
 
-  /**
-   * Returns the TransformTreeNode associated with a given transform.
-   */
-  public TransformTreeNode getNode(PTransform<?, ?> transform) {
-    return transformToNode.get(transform);
-  }
-
   /**
    * Visits all nodes in the transform hierarchy, in transitive order.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
index d1d6a0f537274..2df4af2ae7fce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
@@ -226,10 +226,6 @@ public void finishSpecifying() {
       }
     }
 
-    if (transform != null) {
-      transform.finishSpecifyingInternal();
-    }
-
     if (output != null) {
       output.finishSpecifyingOutput();
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java
index d73b689de2a22..a2274066cf885 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.cloud.dataflow.sdk.values.POutput;
+import com.google.common.base.Objects;
 
 /**
  * Represents the application of a {@link PTransform} to a specific input to produce
@@ -27,25 +28,60 @@
  * @param <OutputT> transform output type
  * @param <TransformT> transform type
  */
-public class AppliedPTransform<
-    InputT extends PInput,
-    OutputT extends POutput,
-    TransformT extends PTransform<InputT, OutputT>> {
-  public final InputT input;
-  public final OutputT output;
-  public final TransformT transform;
-  public AppliedPTransform(InputT input, OutputT output, TransformT transform) {
+public class AppliedPTransform
+    <InputT extends PInput, OutputT extends POutput,
+     TransformT extends PTransform<? super InputT, OutputT>> {
+
+  private final String fullName;
+  private final InputT input;
+  private final OutputT output;
+  private final TransformT transform;
+
+  private AppliedPTransform(String fullName, InputT input, OutputT output, TransformT transform) {
     this.input = input;
     this.output = output;
     this.transform = transform;
+    this.fullName = fullName;
+  }
+
+  public static <InputT extends PInput, OutputT extends POutput,
+                 TransformT extends PTransform<? super InputT, OutputT>>
+  AppliedPTransform<InputT, OutputT, TransformT> of(
+      String fullName, InputT input, OutputT output, TransformT transform) {
+    return new AppliedPTransform<InputT, OutputT, TransformT>(fullName, input, output, transform);
+  }
+
+  public String getFullName() {
+    return fullName;
+  }
+
+  public InputT getInput() {
+    return input;
+  }
+
+  public OutputT getOutput() {
+    return output;
+  }
+
+  public TransformT getTransform() {
+    return transform;
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(getFullName(), getInput(), getOutput(), getTransform());
   }
 
-  public static <
-      InputT extends PInput,
-      OutputT extends POutput,
-      TransformT extends PTransform<InputT, OutputT>>
-      AppliedPTransform<InputT, OutputT, TransformT>
-      of(InputT input, OutputT output, TransformT transform) {
-    return new AppliedPTransform<InputT, OutputT, TransformT>(input, output, transform);
+  @Override
+  public boolean equals(Object other) {
+    if (other instanceof AppliedPTransform) {
+      AppliedPTransform<?, ?, ?> that = (AppliedPTransform<?, ?, ?>) other;
+      return Objects.equal(this.getFullName(), that.getFullName())
+          && Objects.equal(this.getInput(), that.getInput())
+          && Objects.equal(this.getOutput(), that.getOutput())
+          && Objects.equal(this.getTransform(), that.getTransform());
+    } else {
+      return false;
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
index f66c32858859f..83f70512d4ae2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
@@ -188,7 +188,7 @@ public abstract class PTransform<InputT extends PInput, OutputT extends POutput>
    */
   public OutputT apply(InputT input) {
     throw new IllegalArgumentException(
-        "Runner " + getPipeline().getRunner()
+        "Runner " + input.getPipeline().getRunner()
             + " has not registered an implementation for the required primitive operation "
             + this);
   }
@@ -228,33 +228,6 @@ public String getName() {
     return name != null ? name : getDefaultName();
   }
 
-  /**
-   * Returns the owning {@link Pipeline} of this {@code PTransform}.
-   *
-   * @throws IllegalStateException if the owning {@code Pipeline} hasn't been
-   * set yet
-   */
-  @Deprecated
-  private Pipeline getPipeline() {
-    if (pipeline == null) {
-      throw new IllegalStateException("owning pipeline not set");
-    }
-    return pipeline;
-  }
-
-  /**
-   * Returns the output of this transform.
-   *
-   * @throws IllegalStateException if this PTransform hasn't been applied yet
-   */
-  @Deprecated
-  private OutputT getOutput() {
-    @SuppressWarnings("unchecked")
-    OutputT output = (OutputT) getPipeline().getOutput(this);
-    return output;
-  }
-
-
   /////////////////////////////////////////////////////////////////////////////
 
   // See the note about about PTransform's fake Serializability, to
@@ -267,12 +240,6 @@ private OutputT getOutput() {
    */
   protected transient String name;
 
-  /**
-   * The {@link Pipeline} that owns this {@code PTransform}, or {@code null}
-   * if not yet set.
-   */
-  private transient Pipeline pipeline;
-
   protected PTransform() {
     this.name = null;
   }
@@ -281,23 +248,6 @@ protected PTransform(String name) {
     this.name = name;
   }
 
-  /**
-   * Associates this {@code PTransform} with the given {@code Pipeline}.
-   *
-   * <p> For internal use only.
-   *
-   * @throws IllegalArgumentException if this transform has already
-   * been associated with a pipeline
-   */
-  @Deprecated
-  public void setPipeline(Pipeline pipeline) {
-    if (this.pipeline != null) {
-      throw new IllegalStateException(
-          "internal error: transform already initialized");
-    }
-    this.pipeline = pipeline;
-  }
-
   @Override
   public String toString() {
     return getName() + " [" + getKindString() + "]";
@@ -343,17 +293,6 @@ private void readObject(ObjectInputStream oos) {
     // PTransform.
   }
 
-  /**
-   * After building, finalizes this {@code PTransform} to
-   * make it ready for running.  Called automatically when its
-   * output(s) are finished.
-   *
-   * <p> Not normally called by user code.
-   */
-  public void finishSpecifyingInternal() {
-    getOutput().finishSpecifyingOutput();
-  }
-
   /**
    * Returns the default {@code Coder} to use for the output of this
    * single-output {@code PTransform}.
@@ -369,33 +308,30 @@ protected Coder<?> getDefaultOutputCoder() throws CannotProvideCoderException {
 
   /**
    * Returns the default {@code Coder} to use for the output of this
-   * single-output {@code PTransform}.
+   * single-output {@code PTransform} when applied to the given input.
    *
    * @throws CannotProvideCoderException if none can be inferred.
    *
    * <p> By default, always throws.
    */
-  protected Coder<?> getDefaultOutputCoder(InputT input) throws CannotProvideCoderException {
+  protected Coder<?> getDefaultOutputCoder(@SuppressWarnings("unused") InputT input)
+      throws CannotProvideCoderException {
     return getDefaultOutputCoder();
   }
 
   /**
    * Returns the default {@code Coder} to use for the given output of
-   * this single-output {@code PTransform}.
+   * this single-output {@code PTransform} when applied to the given input.
    *
    * @throws CannotProvideCoderException if none can be inferred.
    *
    * <p> By default, always throws.
    */
-  public <T> Coder<T> getDefaultOutputCoder(InputT input, TypedPValue<T> output)
+  public <T> Coder<T> getDefaultOutputCoder(
+      InputT input, @SuppressWarnings("unused") TypedPValue<T> output)
       throws CannotProvideCoderException {
-    if (output != getOutput()) {
-      throw new CannotProvideCoderException(
-          "Attempt to get default output coder from PTransform for a POutput it did not produce");
-    } else {
-      @SuppressWarnings("unchecked")
-      Coder<T> defaultOutputCoder = (Coder<T>) getDefaultOutputCoder(input);
-      return defaultOutputCoder;
-    }
+    @SuppressWarnings("unchecked")
+    Coder<T> defaultOutputCoder = (Coder<T>) getDefaultOutputCoder(input);
+    return defaultOutputCoder;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index a1ea7c8b66ec0..de7fc4d210221 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -35,6 +35,7 @@
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.cloud.dataflow.sdk.values.TupleTagList;
+import com.google.cloud.dataflow.sdk.values.TypedPValue;
 import com.google.common.collect.ImmutableList;
 
 import java.util.ArrayList;
@@ -984,6 +985,13 @@ protected Coder<OutputT> getDefaultOutputCoder() {
           "internal error: shouldn't be calling this on a multi-output ParDo");
     }
 
+    @Override
+    public <T> Coder<T> getDefaultOutputCoder(
+        PCollection<? extends InputT> input, TypedPValue<T> output)
+        throws CannotProvideCoderException {
+      throw new CannotProvideCoderException("default coder not provided");
+    }
+
     @Override
     protected String getDefaultName() {
       if (fn.getClass().isAnonymousClass()) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
index 7195c46620a10..9c36f1654d8f5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.values;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.common.collect.ImmutableList;
 
@@ -198,7 +199,7 @@ public Collection<? extends PValue> expand() {
   }
 
   @Override
-  public void recordAsOutput(PTransform<?, ?> transform) {
+  public void recordAsOutput(AppliedPTransform<?, ?, ?> transform) {
     int i = 0;
     for (PCollection<T> pc : pcollections) {
       pc.recordAsOutput(transform, "out" + i);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
index 5df86fb15827e..06af50290c41c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.values;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.common.collect.ImmutableMap;
@@ -220,7 +221,7 @@ public Collection<? extends PValue> expand() {
   }
 
   @Override
-  public void recordAsOutput(PTransform<?, ?> transform) {
+  public void recordAsOutput(AppliedPTransform<?, ?, ?> transform) {
     int i = 0;
     for (Map.Entry<TupleTag<?>, PCollection<?>> entry
              : pcollectionMap.entrySet()) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
index 358d0f3777bbf..080b41aff8ad8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.values;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 
 import java.util.Collection;
@@ -56,12 +57,11 @@ public interface POutput {
    * is automatically invoked as part of applying the
    * producing {@code PTransform}.
    */
-  public void recordAsOutput(PTransform<?, ?> transform);
+  public void recordAsOutput(AppliedPTransform<?, ?, ?> transform);
 
   /**
-   * As part of finishing the producing {@code PTransform}, finalizes this
-   * {@code PTransform} output to make it ready for being used as an input and
-   * for running.
+   * As part of applying the producing {@code PTransform}, finalizes this
+   * output to make it ready for being used as an input and for running.
    *
    * <p> This includes ensuring that all {@code PCollection}s
    * have {@code Coder}s specified or defaulted.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
index baaa96f9c8c52..8fab7dcba0ebb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
@@ -17,14 +17,14 @@
 package com.google.cloud.dataflow.sdk.values;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 
 /**
  * A {@code POutputValueBase} is the abstract base class of
  * {@code PTransform} outputs.
  *
  * <p> A {@code PValueBase} that adds tracking of its producing
- * {@code PTransform}.
+ * {@code AppliedPTransform}.
  *
  * <p> For internal use.
  */
@@ -51,23 +51,24 @@ public Pipeline getPipeline() {
   }
 
   /**
-   * Returns the {@code PTransform} that this {@code POutputValueBase}
+   * Returns the {@code AppliedPTransform} that this {@code POutputValueBase}
    * is an output of.
    *
    * <p> For internal use only.
    */
-  public PTransform<?, ?> getProducingTransformInternal() {
+  public AppliedPTransform<?, ?, ?> getProducingTransformInternal() {
     return producingTransform;
   }
 
   /**
    * Records that this {@code POutputValueBase} is an output with the
-   * given name of the given {@code PTransform} in the given
+   * given name of the given {@code AppliedPTransform}.
    *
    * <p> To be invoked only by {@link POutput#recordAsOutput}
    * implementations.  Not to be invoked directly by user code.
    */
-  public void recordAsOutput(PTransform<?, ?> transform) {
+  @Override
+  public void recordAsOutput(AppliedPTransform<?, ?, ?> transform) {
     if (producingTransform != null) {
       // Already used this POutput as a PTransform output.  This can
       // happen if the POutput is an output of a transform within a
@@ -95,5 +96,5 @@ public void finishSpecifyingOutput() { }
   /**
    * The {@code PTransform} that produces this {@code POutputValueBase}.
    */
-  private PTransform<?, ?> producingTransform = null;
+  private AppliedPTransform<?, ?, ?> producingTransform;
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java
index 802c27c7f1308..def7ddf5de96e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java
@@ -16,11 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.values;
 
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 
 /**
  * A {@code PValue} is the interface to values that can be
- * input and output from {@link PTransform}s.
+ * input and output from {@link com.google.cloud.dataflow.sdk.transforms.PTransform}s.
  */
 public interface PValue extends POutput, PInput {
 
@@ -30,9 +30,10 @@ public interface PValue extends POutput, PInput {
   public String getName();
 
   /**
-   * Returns the {@code PTransform} that this {@code PValue} is an output of.
+   * Returns the {@code AppliedPTransform} that this {@code POutputValueBase}
+   * is an output of.
    *
    * <p> For internal use only.
    */
-  public PTransform<?, ?> getProducingTransformInternal();
+  public AppliedPTransform<?, ?, ?> getProducingTransformInternal();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
index be9f81a6abe5f..5a2264b12c2ac 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
@@ -17,7 +17,7 @@
 package com.google.cloud.dataflow.sdk.values;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
 
 import java.util.Collection;
@@ -98,23 +98,23 @@ protected PValueBase() {
   private boolean finishedSpecifying = false;
 
   @Override
-  public void recordAsOutput(PTransform<?, ?> transform) {
+  public void recordAsOutput(AppliedPTransform<?, ?, ?> transform) {
     recordAsOutput(transform, "out");
   }
 
   /**
    * Records that this {@code POutputValueBase} is an output with the
-   * given name of the given {@code PTransform} in the given
+   * given name of the given {@code AppliedPTransform} in the given
    * {@code Pipeline}.
    *
    * <p> To be invoked only by {@link POutput#recordAsOutput}
    * implementations.  Not to be invoked directly by user code.
    */
-  protected void recordAsOutput(PTransform<?, ?> transform,
-                             String outName) {
+  protected void recordAsOutput(AppliedPTransform<?, ?, ?> transform,
+                                String outName) {
     super.recordAsOutput(transform);
     if (name == null) {
-      name = getPipeline().getFullName(transform) + "." + outName;
+      name = transform.getFullName() + "." + outName;
     }
   }
 
@@ -135,7 +135,7 @@ public Collection<? extends PValue> expand() {
 
   @Override
   public void finishSpecifying() {
-    getProducingTransformInternal().finishSpecifyingInternal();
+    finishSpecifyingOutput();
     finishedSpecifying = true;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
index 456fd28f55f55..7fbf4be362fee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 
 /**
@@ -147,7 +148,8 @@ private Coder<T> inferCoderOrFail() throws CannotProvideCoderException {
         // try the next thing
     }
 
-    return ((PTransform) getProducingTransformInternal()).getDefaultOutputCoder(
-        getPipeline().getInput(getProducingTransformInternal()), this);
+    AppliedPTransform<?, ?, ?> application = getProducingTransformInternal();
+    return ((PTransform) application.getTransform()).getDefaultOutputCoder(
+        application.getInput(), this);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
index a61fad9ccaff9..ec7b3e1f11a0a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
@@ -23,10 +23,21 @@
 
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Flatten;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.util.UserCodeException;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+import com.google.common.collect.ImmutableList;
 
 import org.junit.Assert;
 import org.junit.Test;
+import org.junit.experimental.categories.Category;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -70,7 +81,7 @@ public void testPipelineUserExceptionHandling() {
 
     // Check pipeline runner correctly catches user errors.
     try {
-      Object results = p.run();
+      p.run();
       fail("Should have thrown an exception.");
     } catch (RuntimeException exn) {
       // Make sure users don't have to worry about the
@@ -91,7 +102,7 @@ public void testPipelineSDKExceptionHandling() {
 
     // Check pipeline runner correctly catches SDK errors.
     try {
-      Object results = p.run();
+      p.run();
       fail("Should have thrown an exception.");
     } catch (RuntimeException exn) {
       // Make sure the exception isn't a UserCodeException.
@@ -102,4 +113,35 @@ public void testPipelineSDKExceptionHandling() {
       Assert.assertThat(exn, instanceOf(IllegalStateException.class));
     }
   }
+
+  @Test
+  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
+  public void testMultipleApply() {
+    PTransform<PCollection<? extends String>, PCollection<String>> myTransform =
+        addSuffix("+");
+
+    Pipeline p = TestPipeline.create();
+    PCollection<String> input = p.apply(Create.<String>of(ImmutableList.of("a", "b")));
+
+    PCollection<String> left = input.apply(myTransform).apply(myTransform);
+    PCollection<String> right = input.apply(myTransform);
+
+    PCollection<String> both = PCollectionList.of(left).and(right)
+        .apply(Flatten.<String>pCollections());
+
+    DataflowAssert.that(both).containsInAnyOrder("a++", "b++", "a+", "b+");
+
+    p.run();
+  }
+
+  private static PTransform<PCollection<? extends String>, PCollection<String>> addSuffix(
+      final String suffix) {
+    return ParDo.of(new DoFn<String, String>() {
+      private static final long serialVersionUID = 0;
+      @Override
+      public void processElement(DoFn<String, String>.ProcessContext c) {
+        c.output(c.element() + suffix);
+      }
+    });
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 14aa00c512c3a..65081d12a0081 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -296,10 +296,10 @@ public void testCombinerNames() {
 
     p.traverseTopologically(new RecordingPipelineVisitor());
 
-    assertThat(p.getFullName(min), Matchers.startsWith("Min"));
-    assertThat(p.getFullName(max), Matchers.startsWith("Max"));
-    assertThat(p.getFullName(mean), Matchers.startsWith("Mean"));
-    assertThat(p.getFullName(sum), Matchers.startsWith("Sum"));
+    assertThat(p.getFullNameForTesting(min), Matchers.startsWith("Min"));
+    assertThat(p.getFullNameForTesting(max), Matchers.startsWith("Max"));
+    assertThat(p.getFullNameForTesting(mean), Matchers.startsWith("Mean"));
+    assertThat(p.getFullNameForTesting(sum), Matchers.startsWith("Sum"));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
index d2d486cc29dc2..80e560a7b511a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
@@ -211,7 +211,7 @@ public void testTransformName() {
 
     p.traverseTopologically(new RecordingPipelineVisitor());
     // Check that the transform is named "Top" rather than "Combine".
-    assertThat(p.getFullName(top), Matchers.startsWith("Top"));
+    assertThat(p.getFullNameForTesting(top), Matchers.startsWith("Top"));
   }
 
   static class OrderByLength implements Comparator<String>, Serializable {

From 4ad5489e4da6a59ca588c91ef7f95b5f8897efe3 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 15 May 2015 20:28:05 -0700
Subject: [PATCH 0556/1541] De-whitelist org.codehaus.jackson in ApiSurface

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93772495
---
 .../main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
index 5e162e9a4e044..b477bd0d8f4a4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
@@ -549,7 +549,6 @@ public static ApiSurface getSdkApiSurface() throws IOException {
         .pruningPrefix("java")
         .pruningPrefix("com.google.api")
         .pruningPrefix("com.google.protobuf")
-        .pruningPrefix("org.codehaus.jackson")
         .pruningPrefix("org.joda.time")
         .pruningPrefix("org.apache.avro")
         .pruningPrefix("org.junit")

From ef39441e566e30682a63c9e0cd2868a3f83fde40 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 15 May 2015 21:25:28 -0700
Subject: [PATCH 0557/1541] Remove extraneous jackson leakage from
 CloudDebuggerOptions

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93774601
---
 .../sdk/options/CloudDebuggerOptions.java     | 14 -----------
 .../sdk/runners/DataflowPipelineRunner.java   |  4 +---
 .../runners/DataflowPipelineTranslator.java   | 24 +++++++++++++++----
 3 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
index 4104151900b24..6e8f84c541c28 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
@@ -18,9 +18,6 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.ObjectMapper;
-
 /**
  * Options for controlling Cloud Debugger.
  */
@@ -50,17 +47,6 @@ public String getVersion() {
     public void setVersion(String version) {
       this.version = version;
     }
-
-    /**
-     * Compute the string of Debugger config.
-     * @return JSON string of Debugger config metadata.
-     * @throws JsonProcessingException when converting to Json fails.
-     */
-    public String computeMetadataString() throws JsonProcessingException {
-      ObjectMapper mapper = new ObjectMapper();
-      String debuggerConfigString = mapper.writeValueAsString(this);
-      return debuggerConfigString;
-    }
   }
 }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 0adea95cb683b..6020cba088b13 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -45,8 +45,6 @@
 import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
 
-import com.fasterxml.jackson.core.JsonProcessingException;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -222,7 +220,7 @@ public DataflowPipelineJob run(Pipeline pipeline) {
         String workSpecJson = DataflowPipelineTranslator.jobToString(newJob);
         printWriter.print(workSpecJson);
         LOG.info("Printed workflow specification to {}", options.getDataflowJobFile());
-      } catch (JsonProcessingException ex) {
+      } catch (IllegalStateException ex) {
         LOG.warn("Cannot translate workflow spec to json for debug.");
       } catch (FileNotFoundException ex) {
         LOG.warn("Cannot create workflow spec output file.");
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 4f1d9c8e84fa4..989e3a6818264 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -149,10 +149,12 @@ public Job translate(Pipeline pipeline, List<DataflowPackage> packages) {
     return translator.translate(packages);
   }
 
-  public static String jobToString(Job job)
-      throws JsonProcessingException {
-    return new ObjectMapper().writerWithDefaultPrettyPrinter()
-        .writeValueAsString(job);
+  public static String jobToString(Job job) {
+    try {
+      return MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(job);
+    } catch (JsonProcessingException exc) {
+      throw new IllegalStateException("Failed to render Job as String.", exc);
+    }
   }
 
   /////////////////////////////////////////////////////////////////////////////
@@ -394,7 +396,7 @@ public Job translate(List<DataflowPackage> packages) {
         }
 
         try {
-          metadata.put("debugger", debuggerConfig.computeMetadataString());
+          metadata.put("debugger", computeMetadataString(debuggerConfig));
         } catch (JsonProcessingException e) {
           throw new IllegalArgumentException("Cannot format Debugger version.", e);
         }
@@ -1061,4 +1063,16 @@ private static void translateOutputs(
       context.addOutput(tag.getId(), output);
     }
   }
+
+  /**
+   * Serialize the provided {@link DebuggerConfig} to a JSON string.
+   *
+   * @return JSON string of Debugger config metadata.
+   * @throws JsonProcessingException when converting to Json fails.
+   */
+  private String computeMetadataString(DebuggerConfig config)
+      throws JsonProcessingException {
+    String debuggerConfigString = MAPPER.writeValueAsString(config);
+    return debuggerConfigString;
+  }
 }

From a0a27c45011dd6898642484180e9d6e13f1d01e0 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 15 May 2015 21:51:13 -0700
Subject: [PATCH 0558/1541] Limit jackson leakage to annotations

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93775397
---
 .../java/com/google/cloud/dataflow/sdk/util/ApiSurface.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
index b477bd0d8f4a4..4c74684bf4e4e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
@@ -553,7 +553,7 @@ public static ApiSurface getSdkApiSurface() throws IOException {
         .pruningPrefix("org.apache.avro")
         .pruningPrefix("org.junit")
         .pruningPrefix("org.hamcrest")
-        .pruningPrefix("com.fasterxml.jackson");
+        .pruningPrefix("com.fasterxml.jackson.annotation");
   }
 
   public static void main(String[] args) throws Exception {

From 866a248fcf1910a059f406a4f75bd2a6e4d6720c Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Sun, 17 May 2015 14:52:34 -0700
Subject: [PATCH 0559/1541] Introduce isSplittable for FileBasedSources

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93842965
---
 .../dataflow/sdk/io/FileBasedSource.java      | 20 ++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index 22c1ef9e8f9a6..88d5ab872c921 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -202,11 +202,7 @@ public final List<? extends FileBasedSource<T>> splitIntoBundles(long desiredBun
           + (System.currentTimeMillis() - startTime) + " ms");
       return splitResults;
     } else {
-      // We split a file-based source into subranges only if the file is seekable. If a file is not
-      // seekable it will be highly inefficient to create and read a source based on a subrange of
-      // that file.
-      IOChannelFactory factory = IOChannelUtils.getFactory(fileOrPatternSpec);
-      if (factory.isReadSeekEfficient(fileOrPatternSpec)) {
+      if (isSplittable()) {
         List<FileBasedSource<T>> splitResults = new ArrayList<>();
         for (ByteOffsetBasedSource<T> split :
             super.splitIntoBundles(desiredBundleSizeBytes, options)) {
@@ -221,6 +217,20 @@ public final List<? extends FileBasedSource<T>> splitIntoBundles(long desiredBun
     }
   }
 
+  /**
+   * Determines whether a file represented by this source is can be split into bundles.
+   *
+   * <p>By default, a file is splittable if it is on a file system that supports efficient read
+   * seeking. Subclasses may override to provide different behavior.
+   */
+  protected boolean isSplittable() throws Exception {
+    // We split a file-based source into subranges only if the file is efficiently seekable.
+    // If a file is not efficiently seekable it would be highly inefficient to create and read a
+    // source based on a subrange of that file.
+    IOChannelFactory factory = IOChannelUtils.getFactory(fileOrPatternSpec);
+    return factory.isReadSeekEfficient(fileOrPatternSpec);
+  }
+
   @Override
   public final BoundedReader<T> createReader(PipelineOptions options,
                                              ExecutionContext executionContext) throws IOException {

From 3288ec4ae61b97b0f793b2a96db95884a1c7ca0b Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 18 May 2015 11:39:51 -0700
Subject: [PATCH 0560/1541] Add Maven rules for producing Javadoc

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93909008
---
 examples/pom.xml                              | 51 +++++++++++++++++++
 javadoc/README.md                             |  4 ++
 javadoc/bq-docs/package-list                  |  2 +
 javadoc/dataflow-sdk-docs/package-list        | 11 ++++
 javadoc/guava-docs/package-list               | 15 ++++++
 javadoc/jackson-annotations-docs/package-list |  1 +
 javadoc/jackson-databind-docs/package-list    |  2 +
 javadoc/joda-docs/package-list                |  7 +++
 sdk/pom.xml                                   | 49 ++++++++++++++++++
 9 files changed, 142 insertions(+)
 create mode 100644 javadoc/README.md
 create mode 100644 javadoc/bq-docs/package-list
 create mode 100644 javadoc/dataflow-sdk-docs/package-list
 create mode 100644 javadoc/guava-docs/package-list
 create mode 100644 javadoc/jackson-annotations-docs/package-list
 create mode 100644 javadoc/jackson-databind-docs/package-list
 create mode 100644 javadoc/joda-docs/package-list

diff --git a/examples/pom.xml b/examples/pom.xml
index 66cfc350df206..b4bb8f8c0ee0b 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -109,6 +109,57 @@
         </executions>
       </plugin>
 
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-javadoc-plugin</artifactId>
+
+        <configuration>
+          <windowtitle>Google Cloud Dataflow Examples</windowtitle>
+          <doctitle>Google Cloud Dataflow Examples</doctitle>
+
+          <subpackages>com.google.cloud.dataflow.examples</subpackages>
+          <additionalparam>-exclude com.google.cloud.dataflow.sdk.runners.worker:com.google.cloud.dataflow.sdk.runners.dataflow:com.google.cloud.dataflow.sdk.util</additionalparam>
+          <use>false</use>
+          <bottom><![CDATA[<br>]]></bottom>
+
+          <offlineLinks>
+            <offlineLink>
+              <url>https://cloud.google.com/dataflow/java-sdk/JavaDoc/</url>
+              <location>${basedir}/../javadoc/dataflow-sdk-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>http://docs.guava-libraries.googlecode.com/git-history/release18/javadoc/</url>
+              <location>${basedir}/../javadoc/guava-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>http://www.joda.org/joda-time/apidocs</url>
+              <location>${basedir}/../javadoc/joda-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/</url>
+              <location>${basedir}/../javadoc//bq-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>http://fasterxml.github.io/jackson-databind/javadoc/2.4/</url>
+              <location>${basedir}/../javadoc/jackson-databind-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>http://fasterxml.github.io/jackson-annotations/javadoc/2.4/</url>
+              <location>${basedir}/../javadoc/jackson-annotations-docs</location>
+            </offlineLink>
+          </offlineLinks>
+        </configuration>
+
+        <executions>
+          <execution>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+            <phase>package</phase>
+          </execution>
+        </executions>
+      </plugin>
+
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-shade-plugin</artifactId>
diff --git a/javadoc/README.md b/javadoc/README.md
new file mode 100644
index 0000000000000..8240d3ce00e88
--- /dev/null
+++ b/javadoc/README.md
@@ -0,0 +1,4 @@
+# SDK Javadoc
+
+This directory contains package-info files for external javadoc we would like
+our javadoc to link to using `-linkoffline`.
diff --git a/javadoc/bq-docs/package-list b/javadoc/bq-docs/package-list
new file mode 100644
index 0000000000000..384b3fc2d81d5
--- /dev/null
+++ b/javadoc/bq-docs/package-list
@@ -0,0 +1,2 @@
+com.google.api.services.bigquery
+com.google.api.services.bigquery.model
diff --git a/javadoc/dataflow-sdk-docs/package-list b/javadoc/dataflow-sdk-docs/package-list
new file mode 100644
index 0000000000000..a26f5a35cb84b
--- /dev/null
+++ b/javadoc/dataflow-sdk-docs/package-list
@@ -0,0 +1,11 @@
+com.google.cloud.dataflow.sdk
+com.google.cloud.dataflow.sdk.annotations
+com.google.cloud.dataflow.sdk.coders
+com.google.cloud.dataflow.sdk.io
+com.google.cloud.dataflow.sdk.options
+com.google.cloud.dataflow.sdk.runners
+com.google.cloud.dataflow.sdk.testing
+com.google.cloud.dataflow.sdk.transforms
+com.google.cloud.dataflow.sdk.transforms.join
+com.google.cloud.dataflow.sdk.transforms.windowing
+com.google.cloud.dataflow.sdk.values
diff --git a/javadoc/guava-docs/package-list b/javadoc/guava-docs/package-list
new file mode 100644
index 0000000000000..f8551784fd3f7
--- /dev/null
+++ b/javadoc/guava-docs/package-list
@@ -0,0 +1,15 @@
+com.google.common.annotations
+com.google.common.base
+com.google.common.cache
+com.google.common.collect
+com.google.common.escape
+com.google.common.eventbus
+com.google.common.hash
+com.google.common.html
+com.google.common.io
+com.google.common.math
+com.google.common.net
+com.google.common.primitives
+com.google.common.reflect
+com.google.common.util.concurrent
+com.google.common.xml
diff --git a/javadoc/jackson-annotations-docs/package-list b/javadoc/jackson-annotations-docs/package-list
new file mode 100644
index 0000000000000..768b3bab1cdaf
--- /dev/null
+++ b/javadoc/jackson-annotations-docs/package-list
@@ -0,0 +1 @@
+com.fasterxml.jackson.annotation
diff --git a/javadoc/jackson-databind-docs/package-list b/javadoc/jackson-databind-docs/package-list
new file mode 100644
index 0000000000000..8a7d4399ff192
--- /dev/null
+++ b/javadoc/jackson-databind-docs/package-list
@@ -0,0 +1,2 @@
+com.fasterxml.jackson.databind
+com.fasterxml.jackson.databind.annotation
diff --git a/javadoc/joda-docs/package-list b/javadoc/joda-docs/package-list
new file mode 100644
index 0000000000000..2ab05aa0cf086
--- /dev/null
+++ b/javadoc/joda-docs/package-list
@@ -0,0 +1,7 @@
+org.joda.time
+org.joda.time.base
+org.joda.time.chrono
+org.joda.time.convert
+org.joda.time.field
+org.joda.time.format
+org.joda.time.tz
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 50436848737ef..8e8fc73c2ea84 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -143,6 +143,55 @@
         </executions>
       </plugin>
 
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-javadoc-plugin</artifactId>
+        <version>2.10.3</version>
+
+        <configuration>
+          <windowtitle>Google Cloud Dataflow SDK ${project.version} API</windowtitle>
+          <doctitle>Google Cloud Dataflow SDK ${project.version} API</doctitle>
+          <overview>../overview.html</overview>
+
+          <subpackages>com.google.cloud.dataflow.sdk</subpackages>
+          <additionalparam>-exclude com.google.cloud.dataflow.sdk.runners.worker:com.google.cloud.dataflow.sdk.runners.dataflow:com.google.cloud.dataflow.sdk.util</additionalparam>
+          <use>false</use>
+          <bottom><![CDATA[<br>]]></bottom>
+
+          <offlineLinks>
+            <offlineLink>
+              <url>http://docs.guava-libraries.googlecode.com/git-history/release18/javadoc/</url>
+              <location>${basedir}/../javadoc/guava-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>http://www.joda.org/joda-time/apidocs</url>
+              <location>${basedir}/../javadoc/joda-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/</url>
+              <location>${basedir}/../javadoc//bq-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>http://fasterxml.github.io/jackson-databind/javadoc/2.4/</url>
+              <location>${basedir}/../javadoc/jackson-databind-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>http://fasterxml.github.io/jackson-annotations/javadoc/2.4/</url>
+              <location>${basedir}/../javadoc/jackson-annotations-docs</location>
+            </offlineLink>
+          </offlineLinks>
+        </configuration>
+
+        <executions>
+          <execution>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+            <phase>package</phase>
+          </execution>
+        </executions>
+      </plugin>
+
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-shade-plugin</artifactId>

From 73d813c5e94c5dc28e3cc033d19f51ef947b3d1a Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 18 May 2015 11:51:54 -0700
Subject: [PATCH 0561/1541] Print the CLI command for cancelling jobs running
 in the cloud

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93910408
---
 .../BlockingDataflowPipelineRunner.java       | 59 ++++++++++++-------
 .../sdk/runners/DataflowPipelineRunner.java   | 13 ++--
 .../dataflow/sdk/util/MonitoringUtil.java     |  8 +++
 3 files changed, 54 insertions(+), 26 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
index e269cdeb8f1b3..7372c486cf706 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
@@ -78,31 +78,46 @@ public static BlockingDataflowPipelineRunner fromOptions(
 
   @Override
   public DataflowPipelineJob run(Pipeline p) {
-    DataflowPipelineJob job = dataflowPipelineRunner.run(p);
+    final DataflowPipelineJob job = dataflowPipelineRunner.run(p);
 
-    @Nullable
-    State result;
-    try {
-      result = job.waitToFinish(
-          BUILTIN_JOB_TIMEOUT_SEC, TimeUnit.SECONDS, jobMessagesHandler);
-    } catch (IOException | InterruptedException ex) {
-      throw new RuntimeException("Exception caught during job execution", ex);
-    }
-
-    if (result == null) {
-      throw new RuntimeException("No result provided: "
-          + "possible error requesting job status.");
-    }
+    Thread shutdownHook = new Thread() {
+      @Override
+      public void run() {
+        LOG.warn("Job is already running in Google Cloud Platform, Ctrl-C will not cancel it.\n"
+            + "To cancel the job in the cloud, run:\n> {}",
+            MonitoringUtil.getGcloudCancelCommand(job.getProjectId(), job.getJobId()));
+      }
+    };
 
-    LOG.info("Job finished with status {}", result);
-    if (result.isTerminal()) {
-      return job;
+    try {
+      Runtime.getRuntime().addShutdownHook(shutdownHook);
+
+      @Nullable
+      State result;
+      try {
+        result = job.waitToFinish(
+            BUILTIN_JOB_TIMEOUT_SEC, TimeUnit.SECONDS, jobMessagesHandler);
+      } catch (IOException | InterruptedException ex) {
+        throw new RuntimeException("Exception caught during job execution", ex);
+      }
+
+      if (result == null) {
+        throw new RuntimeException("No result provided: "
+            + "possible error requesting job status.");
+      }
+
+      LOG.info("Job finished with status {}", result);
+      if (result.isTerminal()) {
+        return job;
+      }
+
+      // TODO: introduce an exception that can wrap a JobState,
+      // so that detailed error information can be retrieved.
+      throw new RuntimeException(
+          "Failed to wait for the job to finish. Returned result: " + result);
+    } finally {
+      Runtime.getRuntime().removeShutdownHook(shutdownHook);
     }
-
-    // TODO: introduce an exception that can wrap a JobState,
-    // so that detailed error information can be retrieved.
-    throw new RuntimeException(
-        "Failed to wait for the job to finish. Returned result: " + result);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 6020cba088b13..4d61ae056723a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -16,8 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.runners;
 
-import static com.google.cloud.dataflow.sdk.PipelineResult.State;
-
 import com.google.api.client.googleapis.json.GoogleJsonResponseException;
 import com.google.api.client.util.Joiner;
 import com.google.api.services.dataflow.Dataflow;
@@ -25,6 +23,7 @@
 import com.google.api.services.dataflow.model.Job;
 import com.google.api.services.dataflow.model.ListJobsResponse;
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult.State;
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
@@ -253,10 +252,16 @@ public DataflowPipelineJob run(Pipeline pipeline) {
         MonitoringUtil.getJobMonitoringPageURL(options.getProject(), jobResult.getId()));
     System.out.println("Submitted job: " + jobResult.getId());
 
+    LOG.info("To cancel the job using the 'gcloud' tool, run:\n > {}",
+        MonitoringUtil.getGcloudCancelCommand(jobResult.getProjectId(), jobResult.getId()));
+
     // Use a raw client for post-launch monitoring, as status calls may fail
     // regularly and need not be retried automatically.
-    return new DataflowPipelineJob(options.getProject(), jobResult.getId(),
-        Transport.newRawDataflowClient(options).build());
+    DataflowPipelineJob dataflowPipelineJob =
+        new DataflowPipelineJob(options.getProject(), jobResult.getId(),
+            Transport.newRawDataflowClient(options).build());
+
+    return dataflowPipelineJob;
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
index 5dce27277cf19..937ec31e5bf21 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
@@ -43,6 +43,9 @@
  * A helper class for monitoring jobs submitted to the service.
  */
 public final class MonitoringUtil {
+
+  private static final String GCLOUD_DATAFLOW_PREFIX = "gcloud alpha dataflow";
+
   private static final Map<String, State> DATAFLOW_STATE_TO_JOB_STATE =
       ImmutableMap
           .<String, State>builder()
@@ -203,6 +206,11 @@ public static String getJobMonitoringPageURL(String projectName, String jobId) {
     }
   }
 
+  public static String getGcloudCancelCommand(String projectName, String jobId) {
+    return String.format("%s jobs --project=%s cancel %s",
+        GCLOUD_DATAFLOW_PREFIX, projectName, jobId);
+  }
+
   public static State toState(String stateName) {
     return MoreObjects.firstNonNull(DATAFLOW_STATE_TO_JOB_STATE.get(stateName),
         State.UNKNOWN);

From 1db93f3e026549dd92acb46f196f51b18140ba3a Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 18 May 2015 13:59:06 -0700
Subject: [PATCH 0562/1541] Rename getWatermarkCutoff to
 getWatermarkThatGuaranteesFiring

This naming is more in-line with what it actually does, and prevents any
confusion with other watermark-based properties of the trigger.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93922585
---
 .../dataflow/sdk/transforms/windowing/AfterAll.java   |  4 ++--
 .../dataflow/sdk/transforms/windowing/AfterEach.java  |  4 ++--
 .../dataflow/sdk/transforms/windowing/AfterFirst.java |  4 ++--
 .../dataflow/sdk/transforms/windowing/AfterPane.java  |  2 +-
 .../sdk/transforms/windowing/AfterProcessingTime.java |  2 +-
 .../sdk/transforms/windowing/AfterWatermark.java      |  4 ++--
 .../sdk/transforms/windowing/DefaultTrigger.java      |  2 +-
 .../dataflow/sdk/transforms/windowing/Repeatedly.java |  4 ++--
 .../sdk/transforms/windowing/TimeTrigger.java         |  6 ++++--
 .../dataflow/sdk/transforms/windowing/Trigger.java    | 11 +++++++----
 .../google/cloud/dataflow/sdk/util/StateFetcher.java  |  2 +-
 .../sdk/util/StreamingSideInputDoFnRunner.java        |  2 +-
 .../sdk/transforms/windowing/AfterAllTest.java        |  4 ++--
 .../sdk/transforms/windowing/AfterEachTest.java       |  4 ++--
 .../sdk/transforms/windowing/AfterFirstTest.java      |  4 ++--
 .../sdk/transforms/windowing/AfterPaneTest.java       |  2 +-
 .../transforms/windowing/AfterProcessingTimeTest.java |  2 +-
 .../sdk/transforms/windowing/AfterWatermarkTest.java  |  7 ++++---
 .../sdk/transforms/windowing/DefaultTriggerTest.java  |  4 ++--
 .../sdk/transforms/windowing/RepeatedlyTest.java      |  9 +++++----
 .../sdk/transforms/windowing/TriggerTest.java         | 11 ++++++-----
 .../dataflow/sdk/util/ExecutableTriggerTest.java      |  2 +-
 22 files changed, 52 insertions(+), 44 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
index 4b0a5e6a3f3fd..4307268ae339d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
@@ -98,11 +98,11 @@ public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exce
   }
 
   @Override
-  public Instant getWatermarkCutoff(W window) {
+  public Instant getWatermarkThatGuaranteesFiring(W window) {
     // This trigger will fire after the latest of its sub-triggers.
     Instant deadline = BoundedWindow.TIMESTAMP_MIN_VALUE;
     for (Trigger<W> subTrigger : subTriggers) {
-      Instant subDeadline = subTrigger.getWatermarkCutoff(window);
+      Instant subDeadline = subTrigger.getWatermarkThatGuaranteesFiring(window);
       if (deadline.isBefore(subDeadline)) {
         deadline = subDeadline;
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index a148f69bd5ccf..06f2d1aef56bf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -125,9 +125,9 @@ public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exce
   }
 
   @Override
-  public Instant getWatermarkCutoff(W window) {
+  public Instant getWatermarkThatGuaranteesFiring(W window) {
     // This trigger will fire at least once when the first trigger in the sequence
     // fires at least once.
-    return subTriggers.get(0).getWatermarkCutoff(window);
+    return subTriggers.get(0).getWatermarkThatGuaranteesFiring(window);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
index 2263a82e1b10b..0b81223e5083d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
@@ -90,11 +90,11 @@ public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exce
   }
 
   @Override
-  public Instant getWatermarkCutoff(W window) {
+  public Instant getWatermarkThatGuaranteesFiring(W window) {
     // This trigger will fire after the earliest of its sub-triggers.
     Instant deadline = BoundedWindow.TIMESTAMP_MAX_VALUE;
     for (Trigger<W> subTrigger : subTriggers) {
-      Instant subDeadline = subTrigger.getWatermarkCutoff(window);
+      Instant subDeadline = subTrigger.getWatermarkThatGuaranteesFiring(window);
       if (deadline.isAfter(subDeadline)) {
         deadline = subDeadline;
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index fb3c4a0f206ad..c7413b84a72aa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -107,7 +107,7 @@ public boolean isCompatible(Trigger<?> other) {
   }
 
   @Override
-  public Instant getWatermarkCutoff(W window) {
+  public Instant getWatermarkThatGuaranteesFiring(W window) {
     return BoundedWindow.TIMESTAMP_MAX_VALUE;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 09725d37af20a..4be8e18e0540f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -108,7 +108,7 @@ public void clear(TriggerContext<W> c, W window) throws Exception {
   }
 
   @Override
-  public Instant getWatermarkCutoff(W window) {
+  public Instant getWatermarkThatGuaranteesFiring(W window) {
     return BoundedWindow.TIMESTAMP_MAX_VALUE;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index cdee27a00c651..ae247b0408c17 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -140,7 +140,7 @@ public void clear(TriggerContext<W> c, W window) throws Exception {
     }
 
     @Override
-    public Instant getWatermarkCutoff(W window) {
+    public Instant getWatermarkThatGuaranteesFiring(W window) {
       return computeTargetTimestamp(window.maxTimestamp());
     }
 
@@ -195,7 +195,7 @@ public void clear(TriggerContext<W> c, W window) throws Exception {
     }
 
     @Override
-    public Instant getWatermarkCutoff(W window) {
+    public Instant getWatermarkThatGuaranteesFiring(W window) {
       return computeTargetTimestamp(window.maxTimestamp());
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
index e648d350179cd..2c3736b5a2ff2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
@@ -65,7 +65,7 @@ public void clear(TriggerContext<W> c, W window) throws Exception {
   }
 
   @Override
-  public Instant getWatermarkCutoff(W window) {
+  public Instant getWatermarkThatGuaranteesFiring(W window) {
     return window.maxTimestamp();
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
index 2b56f901fdb40..ec131b54fb8b0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
@@ -86,8 +86,8 @@ public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exce
   }
 
   @Override
-  public Instant getWatermarkCutoff(W window) {
+  public Instant getWatermarkThatGuaranteesFiring(W window) {
     // This trigger fires once the repeated trigger fires.
-    return subTriggers.get(REPEATED).getWatermarkCutoff(window);
+    return subTriggers.get(REPEATED).getWatermarkThatGuaranteesFiring(window);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
index d3c2e6161cb9d..0ad0505a9a300 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
@@ -39,8 +39,10 @@ public abstract class TimeTrigger<W extends BoundedWindow, T extends TimeTrigger
 
   private static final long serialVersionUID = 0L;
 
-  protected static final List<SerializableFunction<Instant, Instant>> IDENTITY =
-      ImmutableList.<SerializableFunction<Instant, Instant>>of();
+  protected static final List<SerializableFunction<Instant, Instant>> IDENTITY;
+  static {
+    IDENTITY = ImmutableList.<SerializableFunction<Instant, Instant>>of();
+  }
 
   private final List<SerializableFunction<Instant, Instant>> timestampMappers;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 153b8e3590fa3..ea621057865fa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -523,8 +523,11 @@ public Iterable<Trigger<W>> subTriggers() {
    *
    * <p> For triggers that do not fire based on the watermark advancing, returns
    * {@link BoundedWindow#TIMESTAMP_MAX_VALUE}.
+   *
+   * <p> This estimate is used to determine that there are no elements in a side-input window, which
+   * causes the default value to be used instead.
    */
-  public abstract Instant getWatermarkCutoff(W window);
+  public abstract Instant getWatermarkThatGuaranteesFiring(W window);
 
   /**
    * Returns whether this performs the same triggering as the given {@code Trigger}.
@@ -679,10 +682,10 @@ public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exce
     }
 
     @Override
-    public Instant getWatermarkCutoff(W window) {
+    public Instant getWatermarkThatGuaranteesFiring(W window) {
       // This trigger fires once either the trigger or the until trigger fires.
-      Instant actualDeadline = subTriggers.get(ACTUAL).getWatermarkCutoff(window);
-      Instant untilDeadline = subTriggers.get(UNTIL).getWatermarkCutoff(window);
+      Instant actualDeadline = subTriggers.get(ACTUAL).getWatermarkThatGuaranteesFiring(window);
+      Instant untilDeadline = subTriggers.get(UNTIL).getWatermarkThatGuaranteesFiring(window);
       return actualDeadline.isBefore(untilDeadline) ? actualDeadline : untilDeadline;
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
index 9897f35a8f6df..01fbfc7b04b3a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
@@ -234,7 +234,7 @@ public SideInputCacheEntry call() throws Exception {
                 .setExistenceWatermarkDeadline(
                      TimeUnit.MILLISECONDS.toMicros(view.getWindowingStrategyInternal()
                          .getTrigger().getSpec()
-                         .getWatermarkCutoff(window)
+                         .getWatermarkThatGuaranteesFiring(window)
                          .getMillis()))
                 .build();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
index 8d0e8fb997186..92cb3941fc37a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
@@ -247,7 +247,7 @@ private Windmill.GlobalDataRequest buildGlobalDataRequest(
             TimeUnit.MILLISECONDS.toMicros(view.getWindowingStrategyInternal()
                 .getTrigger()
                 .getSpec()
-                .getWatermarkCutoff(sideInputWindow)
+                .getWatermarkThatGuaranteesFiring(sideInputWindow)
                 .getMillis()))
         .build();
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index 6b72144642c03..f3c462c7368e0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -187,10 +187,10 @@ public void testFireDeadline() throws Exception {
     assertEquals(new Instant(19),
         AfterAll.of(AfterWatermark.pastEndOfWindow(),
                      AfterWatermark.pastEndOfWindow().plusDelayOf(Duration.millis(10)))
-            .getWatermarkCutoff(window));
+            .getWatermarkThatGuaranteesFiring(window));
     assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
         AfterAll.of(AfterWatermark.pastEndOfWindow(), AfterPane.elementCountAtLeast(1))
-            .getWatermarkCutoff(window));
+            .getWatermarkThatGuaranteesFiring(window));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index c3f0a0413a026..4a33cf492434d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -218,10 +218,10 @@ public void testFireDeadline() throws Exception {
     assertEquals(new Instant(9),
         AfterEach.inOrder(AfterWatermark.pastEndOfWindow(),
                       AfterWatermark.pastEndOfWindow().plusDelayOf(Duration.millis(10)))
-            .getWatermarkCutoff(window));
+            .getWatermarkThatGuaranteesFiring(window));
     assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
         AfterEach.inOrder(AfterPane.elementCountAtLeast(2), AfterWatermark.pastEndOfWindow())
-            .getWatermarkCutoff(window));
+            .getWatermarkThatGuaranteesFiring(window));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index 397bb3a3c417b..b66cb56da161c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -183,10 +183,10 @@ public void testFireDeadline() throws Exception {
     assertEquals(new Instant(9),
         AfterFirst.of(AfterWatermark.pastEndOfWindow(),
                        AfterWatermark.pastEndOfWindow().plusDelayOf(Duration.millis(10)))
-            .getWatermarkCutoff(window));
+            .getWatermarkThatGuaranteesFiring(window));
     assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
         AfterFirst.of(AfterPane.elementCountAtLeast(2), AfterPane.elementCountAtLeast(1))
-            .getWatermarkCutoff(window));
+            .getWatermarkThatGuaranteesFiring(window));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
index 936580f91a669..239b2f9ff6646 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
@@ -124,7 +124,7 @@ public void testAfterPaneWithMerging() throws Exception {
   @Test
   public void testFireDeadline() throws Exception {
     assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
-        AfterPane.elementCountAtLeast(1).getWatermarkCutoff(
+        AfterPane.elementCountAtLeast(1).getWatermarkThatGuaranteesFiring(
             new IntervalWindow(new Instant(0), new Instant(10))));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
index 11f414243ec92..4bd1a1058eb39 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
@@ -114,7 +114,7 @@ public void testAfterProcessingTimeWithMergingWindowAlreadyFired() throws Except
   @Test
   public void testFireDeadline() throws Exception {
     assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
-        AfterProcessingTime.pastFirstElementInPane().getWatermarkCutoff(
+        AfterProcessingTime.pastFirstElementInPane().getWatermarkThatGuaranteesFiring(
             new IntervalWindow(new Instant(0), new Instant(10))));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
index 646129eb2435d..8d44586462d2f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
@@ -156,12 +156,13 @@ public void testEndOfWindowWithMerging() throws Exception {
   public void testFireDeadline() throws Exception {
     BoundedWindow window = new IntervalWindow(new Instant(0), new Instant(10));
 
-    assertEquals(new Instant(9), AfterWatermark.pastEndOfWindow().getWatermarkCutoff(window));
+    assertEquals(new Instant(9), AfterWatermark.pastEndOfWindow()
+        .getWatermarkThatGuaranteesFiring(window));
     assertEquals(GlobalWindow.INSTANCE.maxTimestamp(),
-        AfterWatermark.pastEndOfWindow().getWatermarkCutoff(GlobalWindow.INSTANCE));
+        AfterWatermark.pastEndOfWindow().getWatermarkThatGuaranteesFiring(GlobalWindow.INSTANCE));
     assertEquals(new Instant(19),
         AfterWatermark
             .pastEndOfWindow()
-            .plusDelayOf(Duration.millis(10)).getWatermarkCutoff(window));
+            .plusDelayOf(Duration.millis(10)).getWatermarkThatGuaranteesFiring(window));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
index 5ebc4916bb8af..5417c5793aa04 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
@@ -149,9 +149,9 @@ public void testDefaultTriggerWithContainedSessionWindow() throws Exception {
 
   @Test
   public void testFireDeadline() throws Exception {
-    assertEquals(new Instant(9), DefaultTrigger.of().getWatermarkCutoff(
+    assertEquals(new Instant(9), DefaultTrigger.of().getWatermarkThatGuaranteesFiring(
         new IntervalWindow(new Instant(0), new Instant(10))));
     assertEquals(GlobalWindow.INSTANCE.maxTimestamp(),
-        DefaultTrigger.of().getWatermarkCutoff(GlobalWindow.INSTANCE));
+        DefaultTrigger.of().getWatermarkThatGuaranteesFiring(GlobalWindow.INSTANCE));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index 771ea2d53f69b..8282fa3671480 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -161,16 +161,17 @@ public void testFireDeadline() throws Exception {
     BoundedWindow window = new IntervalWindow(new Instant(0), new Instant(10));
 
     assertEquals(new Instant(9),
-        Repeatedly.forever(AfterWatermark.pastEndOfWindow()).getWatermarkCutoff(window));
+        Repeatedly.forever(AfterWatermark.pastEndOfWindow())
+        .getWatermarkThatGuaranteesFiring(window));
     assertEquals(new Instant(9), Repeatedly.forever(AfterWatermark.pastEndOfWindow())
         .orFinally(AfterPane.elementCountAtLeast(1))
-        .getWatermarkCutoff(window));
+        .getWatermarkThatGuaranteesFiring(window));
     assertEquals(new Instant(9), Repeatedly.forever(AfterPane.elementCountAtLeast(1))
         .orFinally(AfterWatermark.pastEndOfWindow())
-        .getWatermarkCutoff(window));
+        .getWatermarkThatGuaranteesFiring(window));
     assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
         Repeatedly.forever(AfterPane.elementCountAtLeast(1))
         .orFinally(AfterPane.elementCountAtLeast(10))
-        .getWatermarkCutoff(window));
+        .getWatermarkThatGuaranteesFiring(window));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
index 08e9064abcd96..aabde157e4d02 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
@@ -259,22 +259,23 @@ public void testFireDeadline() throws Exception {
     BoundedWindow window = new IntervalWindow(new Instant(0), new Instant(10));
 
     assertEquals(new Instant(9),
-        Repeatedly.forever(AfterWatermark.pastEndOfWindow()).getWatermarkCutoff(window));
+        Repeatedly.forever(AfterWatermark.pastEndOfWindow())
+        .getWatermarkThatGuaranteesFiring(window));
     assertEquals(new Instant(9), Repeatedly.forever(AfterWatermark.pastEndOfWindow())
         .orFinally(AfterPane.elementCountAtLeast(1))
-        .getWatermarkCutoff(window));
+        .getWatermarkThatGuaranteesFiring(window));
     assertEquals(new Instant(9), Repeatedly.forever(AfterPane.elementCountAtLeast(1))
         .orFinally(AfterWatermark.pastEndOfWindow())
-        .getWatermarkCutoff(window));
+        .getWatermarkThatGuaranteesFiring(window));
     assertEquals(new Instant(9),
         AfterPane.elementCountAtLeast(100)
             .orFinally(AfterWatermark.pastEndOfWindow())
-            .getWatermarkCutoff(window));
+            .getWatermarkThatGuaranteesFiring(window));
 
     assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
         Repeatedly.forever(AfterPane.elementCountAtLeast(1))
         .orFinally(AfterPane.elementCountAtLeast(10))
-        .getWatermarkCutoff(window));
+        .getWatermarkThatGuaranteesFiring(window));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
index 0c943f5948a4a..bd55faedfe19e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
@@ -121,7 +121,7 @@ public void clear(
     }
 
     @Override
-    public Instant getWatermarkCutoff(IntervalWindow window) {
+    public Instant getWatermarkThatGuaranteesFiring(IntervalWindow window) {
       return BoundedWindow.TIMESTAMP_MAX_VALUE;
     }
 

From 9bb4ac4247c987e9fd4f501a728461c457aeda46 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 18 May 2015 15:28:41 -0700
Subject: [PATCH 0563/1541] Update comment on the shutdown hook

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93931772
---
 .../dataflow/sdk/runners/BlockingDataflowPipelineRunner.java | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
index 7372c486cf706..4a5a777bc1a26 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
@@ -80,6 +80,11 @@ public static BlockingDataflowPipelineRunner fromOptions(
   public DataflowPipelineJob run(Pipeline p) {
     final DataflowPipelineJob job = dataflowPipelineRunner.run(p);
 
+    // We ignore the potential race condition here (Ctrl-C after job submission but before the
+    // shutdown hook is registered). Even if we tried to do something smarter (eg., SettableFuture)
+    // the run method (which produces the job) could fail or be Ctrl-C'd before it had returned a
+    // job. The display of the command to cancel the job is best-effort anyways -- RPC's could fail,
+    // etc. If the user wants to verify the job was cancelled they should look at the job status.
     Thread shutdownHook = new Thread() {
       @Override
       public void run() {

From 0a70440086c28a11c56de898e4d53c4f6b2f466e Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Mon, 18 May 2015 19:26:04 -0700
Subject: [PATCH 0564/1541] Make tests locale-independent

Currently, the test encode decimal point in various strings.
This commit makes the test locale-agnostic, by avoiding string
comparison between numbers.

This fixes #17 on GitHub.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=93951372
---
 .../sdk/runners/worker/CombineValuesFnTest.java     | 13 ++++++-------
 ...oggingMediaHttpUploaderProgressListenerTest.java | 10 ++++++----
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
index 98b3073e8d1fe..cef91e0bdb970 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
@@ -212,8 +212,7 @@ private static ParDoFn createCombineValuesFn(
   public void testCombineValuesFnAll() throws Exception {
     TestReceiver receiver = new TestReceiver();
 
-    Combine.KeyedCombineFn<String, Integer,
-        MeanInts.CountSum, String> combiner =
+    Combine.KeyedCombineFn<String, Integer, MeanInts.CountSum, String> combiner =
         (new MeanInts()).asKeyedFn();
 
     ParDoFn combineParDoFn = createCombineValuesFn(
@@ -229,9 +228,9 @@ public void testCombineValuesFnAll() throws Exception {
     combineParDoFn.finishBundle();
 
     Object[] expectedReceivedElems = {
-      WindowedValue.valueInGlobalWindow(KV.of("a", "6.0")),
-      WindowedValue.valueInGlobalWindow(KV.of("b", "3.7")),
-      WindowedValue.valueInGlobalWindow(KV.of("c", "6.5")),
+      WindowedValue.valueInGlobalWindow(KV.of("a", String.format("%.1f", 6.0))),
+      WindowedValue.valueInGlobalWindow(KV.of("b", String.format("%.1f", 3.7))),
+      WindowedValue.valueInGlobalWindow(KV.of("c", String.format("%.1f", 6.5))),
     };
     assertArrayEquals(expectedReceivedElems, receiver.receivedElems.toArray());
   }
@@ -315,8 +314,8 @@ public void testCombineValuesFnExtract() throws Exception {
     combineParDoFn.finishBundle();
 
     assertArrayEquals(
-        new Object[]{ WindowedValue.valueInGlobalWindow(KV.of("a", "4.5")),
-                      WindowedValue.valueInGlobalWindow(KV.of("b", "7.0")) },
+        new Object[]{ WindowedValue.valueInGlobalWindow(KV.of("a", String.format("%.1f", 4.5))),
+                      WindowedValue.valueInGlobalWindow(KV.of("b", String.format("%.1f", 7.0))) },
         receiver.receivedElems.toArray());
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListenerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListenerTest.java
index 49092700721ff..ee8f518e3f613 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListenerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListenerTest.java
@@ -54,10 +54,12 @@ public void testLoggingInitiation() {
   public void testLoggingProgressAfterSixtySeconds() {
     listener.progressChanged(mockLogger, UploadState.MEDIA_IN_PROGRESS, 10485760L, 60001L);
     listener.progressChanged(mockLogger, UploadState.MEDIA_IN_PROGRESS, 104857600L, 120002L);
-    verify(mockLogger).debug(
-        "Uploading: NAME Average Rate: 0.167 MiB/s, Current Rate: 0.167 MiB/s, Total: 10.000 MiB");
-    verify(mockLogger).debug(
-        "Uploading: NAME Average Rate: 0.833 MiB/s, Current Rate: 1.500 MiB/s, Total: 100.000 MiB");
+    verify(mockLogger).debug(String.format(
+        "Uploading: NAME Average Rate: %.3f MiB/s, Current Rate: %.3f MiB/s, Total: %.3f MiB",
+        0.167, 0.167, 10.0));
+    verify(mockLogger).debug(String.format(
+        "Uploading: NAME Average Rate: %.3f MiB/s, Current Rate: %.3f MiB/s, Total: %.3f MiB",
+        0.833, 1.5, 100.0));
     verifyNoMoreInteractions(mockLogger);
   }
 

From bcc4811b126ee73c6478c680720f6324041519f4 Mon Sep 17 00:00:00 2001
From: vanya <vanya@google.com>
Date: Tue, 19 May 2015 10:46:38 -0700
Subject: [PATCH 0565/1541] Fix dataflow cancel command suggestion message

Before the fix it shows 'null' instead of the project name in the
command line.
----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94003714
---
 .../cloud/dataflow/sdk/runners/DataflowPipelineRunner.java    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 4d61ae056723a..a944c7c279c62 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -252,8 +252,8 @@ public DataflowPipelineJob run(Pipeline pipeline) {
         MonitoringUtil.getJobMonitoringPageURL(options.getProject(), jobResult.getId()));
     System.out.println("Submitted job: " + jobResult.getId());
 
-    LOG.info("To cancel the job using the 'gcloud' tool, run:\n > {}",
-        MonitoringUtil.getGcloudCancelCommand(jobResult.getProjectId(), jobResult.getId()));
+    LOG.info("To cancel the job using the 'gcloud' tool, run:\n> {}",
+        MonitoringUtil.getGcloudCancelCommand(options.getProject(), jobResult.getId()));
 
     // Use a raw client for post-launch monitoring, as status calls may fail
     // regularly and need not be retried automatically.

From 3221d6ed04f9a649fad6fdd7b4675c9f4aedb9f4 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 19 May 2015 14:40:24 -0700
Subject: [PATCH 0566/1541] Have WindowingInternals provide the TimerManager

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94027814
---
 .../sdk/util/BatchModeExecutionContext.java   | 10 +---
 .../dataflow/sdk/util/BatchTimerManager.java  |  1 -
 .../cloud/dataflow/sdk/util/DoFnRunner.java   | 14 ++----
 .../dataflow/sdk/util/ExecutionContext.java   | 13 +----
 .../util/StreamingGroupAlsoByWindowsDoFn.java | 30 +-----------
 .../util/StreamingModeExecutionContext.java   | 47 +++++++++++-------
 .../cloud/dataflow/sdk/util/TimerManager.java | 49 +++++++++++++++++++
 .../dataflow/sdk/util/TriggerExecutor.java    | 26 ----------
 .../dataflow/sdk/util/TriggerTester.java      | 10 +---
 .../dataflow/sdk/util/WindowingInternals.java | 13 ++---
 .../StreamingGroupAlsoByWindowsDoFnTest.java  | 18 ++++---
 11 files changed, 103 insertions(+), 128 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
index 8f27d2d770c27..d9d52f77b1553 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
@@ -18,7 +18,6 @@
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -69,13 +68,8 @@ public Object getKey() {
   }
 
   @Override
-  public void setTimer(String timer, Instant timestamp, Trigger.TimeDomain domain) {
-    throw new UnsupportedOperationException("setTimer is not supported in batch mode");
-  }
-
-  @Override
-  public void deleteTimer(String timer, Trigger.TimeDomain domain) {
-    throw new UnsupportedOperationException("deleteTimer is not supported in batch mode");
+  public TimerManager getTimerManager() {
+    return null;
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
index 4370912e2831a..a720ca68d4224 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
@@ -18,7 +18,6 @@
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.TriggerExecutor.TimerManager;
 
 import org.joda.time.Instant;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 63ea57be096ec..c46ba3727ef44 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -29,7 +29,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -564,18 +563,13 @@ public <T> Map<CodedTupleTag<T>, Iterable<T>> readTagList(List<CodedTupleTag<T>>
         }
 
         @Override
-        public void setTimer(String timer, Instant timestamp, Trigger.TimeDomain domain) {
-          context.stepContext.getExecutionContext().setTimer(timer, timestamp, domain);
-        }
-
-        @Override
-        public void deleteTimer(String timer, Trigger.TimeDomain domain) {
-          context.stepContext.getExecutionContext().deleteTimer(timer, domain);
+        public Collection<? extends BoundedWindow> windows() {
+          return windowedValue.getWindows();
         }
 
         @Override
-        public Collection<? extends BoundedWindow> windows() {
-          return windowedValue.getWindows();
+        public TimerManager getTimerManager() {
+          return context.stepContext.getExecutionContext().getTimerManager();
         }
 
         @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
index 8db792aab9838..bd4e5cfc60ad1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
@@ -19,7 +19,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -66,17 +65,9 @@ public Collection<StepContext> getAllStepContexts() {
   public abstract StepContext createStepContext(String stepName);
 
   /**
-   * Writes out a timer to be fired when the watermark reaches the given
-   * timestamp.  Timers are identified by their name, and can be moved
-   * by calling {@code setTimer} again, or deleted with
-   * {@link ExecutionContext#deleteTimer}.
+   * Return the {@link TimerManager} to use with this context, or null if it should be emulated.
    */
-  public abstract void setTimer(String timer, Instant timestamp, Trigger.TimeDomain domain);
-
-  /**
-   * Deletes the given timer.
-   */
-  public abstract void deleteTimer(String timer, Trigger.TimeDomain domain);
+  public abstract TimerManager getTimerManager();
 
   /**
    * Hook for subclasses to implement that will be called whenever
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 5c854c08953d7..4eed1c54dba7d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -20,14 +20,10 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.util.AbstractWindowSet.Factory;
-import com.google.cloud.dataflow.sdk.util.TriggerExecutor.TimerManager;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
 
-import org.joda.time.Instant;
-
 /**
  * DoFn that merges windows and groups elements in those windows.
  *
@@ -77,7 +73,7 @@ public StreamingGABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
 
     private void initForKey(ProcessContext c, K key) throws Exception{
       if (executor == null) {
-        TimerManager timerManager = new StreamingTimerManager(c);
+        TimerManager timerManager = c.windowingInternals().getTimerManager();
         executor = TriggerExecutor.create(
           key, windowingStrategy, timerManager, windowSetFactory,
           c.keyedState(), c.windowingInternals());
@@ -111,28 +107,4 @@ public void finishBundle(Context c) throws Exception {
       executor = null;
     }
   }
-
-  private static class StreamingTimerManager implements TimerManager {
-
-    private DoFn<?, ?>.ProcessContext context;
-
-    public StreamingTimerManager(DoFn<?, ?>.ProcessContext context) {
-      this.context = context;
-    }
-
-    @Override
-    public void setTimer(String timer, Instant timestamp, Trigger.TimeDomain domain) {
-      context.windowingInternals().setTimer(timer, timestamp, domain);
-    }
-
-    @Override
-    public void deleteTimer(String timer, Trigger.TimeDomain domain) {
-      context.windowingInternals().deleteTimer(timer, domain);
-    }
-
-    @Override
-    public Instant currentProcessingTime() {
-      return Instant.now();
-    }
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index 7f28eaaa72e40..5e97807a33e6a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
@@ -72,23 +73,33 @@ public ExecutionContext.StepContext createStepContext(String stepName) {
   }
 
   @Override
-  public void setTimer(String timer, Instant timestamp, Trigger.TimeDomain domain) {
-    long timestampMicros = TimeUnit.MILLISECONDS.toMicros(timestamp.getMillis());
-    outputBuilder.addOutputTimers(
-        Windmill.Timer.newBuilder()
-        .setTimestamp(timestampMicros)
-        .setTag(ByteString.copyFromUtf8(timer))
-        .setType(timerType(domain))
-        .build());
-  }
+  public TimerManager getTimerManager() {
+    return new TimerManager() {
+      @Override
+      public void setTimer(String timer, Instant timestamp, TimeDomain domain) {
+        long timestampMicros = TimeUnit.MILLISECONDS.toMicros(timestamp.getMillis());
+        outputBuilder.addOutputTimers(
+            Windmill.Timer.newBuilder()
+            .setTimestamp(timestampMicros)
+            .setTag(ByteString.copyFromUtf8(timer))
+            .setType(timerType(domain))
+            .build());
+      }
 
-  @Override
-  public void deleteTimer(String timer, Trigger.TimeDomain domain) {
-    outputBuilder.addOutputTimers(
-        Windmill.Timer.newBuilder()
-        .setTag(ByteString.copyFromUtf8(timer))
-        .setType(timerType(domain))
-        .build());
+      @Override
+      public void deleteTimer(String timer, TimeDomain domain) {
+        outputBuilder.addOutputTimers(
+            Windmill.Timer.newBuilder()
+            .setTag(ByteString.copyFromUtf8(timer))
+            .setType(timerType(domain))
+            .build());
+      }
+
+      @Override
+      public Instant currentProcessingTime() {
+        return Instant.now();
+      }
+    };
   }
 
   private Windmill.Timer.Type timerType(Trigger.TimeDomain domain) {
@@ -141,7 +152,7 @@ private <T> T fetchSideInput(
             "Expected side input to be cached. Tag: "
             + view.getTagInternal().getId());
       }
-      T typed = (T) stateFetcher.fetchSideInput(view, sideInputWindow, state);
+      T typed = stateFetcher.fetchSideInput(view, sideInputWindow, state);
       sideInput = typed;
       if (sideInput != null) {
         tagCache.put(sideInputWindow, sideInput);
@@ -315,7 +326,7 @@ public <T> Iterable<T> readTagList(CodedTupleTag<T> tag) throws IOException {
     @Override
     public <T> Map<CodedTupleTag<T>, Iterable<T>> readTagLists(Iterable<CodedTupleTag<T>> tags)
         throws IOException {
-      @SuppressWarnings({"unchecked"})
+      @SuppressWarnings({"unchecked", "rawtypes"})
       Iterable<CodedTupleTag<?>> wildcardTags = (Iterable) tags;
       Map<CodedTupleTag<?>, Iterable<?>> wildcardMap = tagCache.readTagLists(wildcardTags);
       @SuppressWarnings({"unchecked", "rawtypes"})
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java
new file mode 100644
index 0000000000000..0f97992b5ec01
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
+
+import org.joda.time.Instant;
+
+/**
+ * Encapsulate interaction with time within the execution environment.
+ *
+ * <p> This class allows setting and deleting timers, and also retrieving an
+ * estimate of the current time.
+ */
+public interface TimerManager {
+
+  /**
+   * Writes out a timer to be fired when the watermark reaches the given
+   * timestamp.  Timers are identified by their name, and can be moved
+   * by calling {@code setTimer} again, or deleted with {@link #deleteTimer}.
+   */
+  void setTimer(String timer, Instant timestamp, Trigger.TimeDomain domain);
+
+  /**
+   * Deletes the given timer.
+   */
+  void deleteTimer(String timer, Trigger.TimeDomain domain);
+
+  /**
+   * @return the current timestamp in the
+   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain#PROCESSING_TIME}
+   * time domain.
+   */
+  Instant currentProcessingTime();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index d19d1366b70c6..eaf71159bd286 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -26,7 +26,6 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
@@ -87,31 +86,6 @@ public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
 
   private AccumulationMode mode;
 
-  /**
-   * Methods that the system must provide in order for us to implement triggers.
-   */
-  public interface TimerManager {
-
-    /**
-     * Writes out a timer to be fired when the watermark reaches the given
-     * timestamp.  Timers are identified by their name, and can be moved
-     * by calling {@code setTimer} again, or deleted with {@link #deleteTimer}.
-     */
-    void setTimer(String timer, Instant timestamp, Trigger.TimeDomain domain);
-
-    /**
-     * Deletes the given timer.
-     */
-    void deleteTimer(String timer, Trigger.TimeDomain domain);
-
-    /**
-     * @return the current timestamp in the
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain#PROCESSING_TIME}
-     * time domain.
-     */
-    Instant currentProcessingTime();
-  }
-
   TriggerExecutor(
       WindowFn<Object, W> windowFn,
       TimerManager timerManager,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 5c6245b39c227..7d3a9334d1ddb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -311,15 +311,9 @@ public Iterable<T> apply(@Nullable CodedTupleTag<T> tag) {
     }
 
     @Override
-    public void setTimer(String timer, Instant timestamp, Trigger.TimeDomain domain) {
+    public TimerManager getTimerManager() {
       throw new UnsupportedOperationException(
-          "Testing triggers should not use timers from WindowingInternals.");
-    }
-
-    @Override
-    public void deleteTimer(String timer, Trigger.TimeDomain domain) {
-      throw new UnsupportedOperationException(
-          "Testing triggers should not use timers from WindowingInternals.");
+          "getTimerManager() should not be called on StubContexts.");
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
index ac32668ccb6a0..856f536df02eb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
@@ -18,7 +18,6 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -91,16 +90,10 @@ <T> Map<CodedTupleTag<T>, Iterable<T>> readTagList(
       List<CodedTupleTag<T>> tags) throws IOException;
 
   /**
-   * Writes out a timer to be fired when the watermark reaches the given
-   * timestamp.  Timers are identified by their name, and can be moved
-   * by calling {@code setTimer} again, or deleted with {@link #deleteTimer}.
+   * Return the timer manager provided by the underlying system, or null if Timers need
+   * to be emulated.
    */
-  void setTimer(String timer, Instant timestamp, Trigger.TimeDomain domain);
-
-  /**
-   * Deletes the given timer.
-   */
-  void deleteTimer(String timer, Trigger.TimeDomain domain);
+  TimerManager getTimerManager();
 
   /**
    * Access the windows the element is being processed in without "exploding" it.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index d4dadbab32790..8f29fada0fba5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -30,7 +30,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.util.TriggerExecutor.TriggerIdCoder;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -44,6 +43,8 @@
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
 
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -57,14 +58,17 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
   CounterSet counters;
   TupleTag<KV<String, Iterable<String>>> outputTag;
 
+  @Mock
+  private TimerManager mockTimerManager;
+
   @Before public void setUp() {
+    MockitoAnnotations.initMocks(this);
     execContext = new DirectModeExecutionContext() {
-        @Override
-        public void setTimer(String tag, Instant timestamp, Trigger.TimeDomain domain) {}
-
-        @Override
-        public void deleteTimer(String tag, Trigger.TimeDomain domain) {}
-      };
+      @Override
+      public TimerManager getTimerManager() {
+        return mockTimerManager;
+      }
+    };
     counters = new CounterSet();
     outputTag = new TupleTag<>();
   }

From fae548b3ee5538ba4f63e4ee6216cfc45e8e9eea Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 19 May 2015 15:06:15 -0700
Subject: [PATCH 0567/1541] Decrease number of threads created in test

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94030535
---
 .../java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
index ea8b9c1b8645b..5070c176f4d92 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
@@ -96,7 +96,7 @@ public void testMultipleThreadsCanCompleteOutOfOrderWithDefaultThreadPool() thro
     GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
     ExecutorService executorService = pipelineOptions.getExecutorService();
 
-    int numThreads = 1000;
+    int numThreads = 100;
     final CountDownLatch[] countDownLatches = new CountDownLatch[numThreads];
     for (int i = 0; i < numThreads; i++) {
       final int currentLatch = i;

From 0262ed9520c1cd68c8e0ce63d0007013a974be45 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 19 May 2015 18:42:19 -0700
Subject: [PATCH 0568/1541] Move TimeDomain into TimerManager

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94048943
---
 .../windowing/AfterProcessingTime.java        |  1 +
 .../transforms/windowing/AfterWatermark.java  |  1 +
 .../transforms/windowing/DefaultTrigger.java  |  1 +
 .../sdk/transforms/windowing/Trigger.java     | 19 +------------
 .../dataflow/sdk/util/BatchTimerManager.java  | 15 +++++-----
 .../util/StreamingModeExecutionContext.java   |  7 ++---
 .../cloud/dataflow/sdk/util/TimerManager.java | 28 ++++++++++++++-----
 .../dataflow/sdk/util/TriggerExecutor.java    |  2 +-
 .../dataflow/sdk/util/TriggerTester.java      |  4 +--
 .../transforms/windowing/AfterAllTest.java    |  2 +-
 .../transforms/windowing/AfterEachTest.java   |  2 +-
 .../transforms/windowing/AfterFirstTest.java  |  2 +-
 .../transforms/windowing/RepeatedlyTest.java  |  2 +-
 .../sdk/transforms/windowing/TriggerTest.java |  2 +-
 14 files changed, 43 insertions(+), 45 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 4be8e18e0540f..0c8e36c30df4a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 
 import org.joda.time.Instant;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index ae247b0408c17..02beb954e2578 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 
 import org.joda.time.Instant;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
index 2c3736b5a2ff2..9cea2d19fae0b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 
 import org.joda.time.Instant;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index ea621057865fa..536758aa3ef9d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
+import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Joiner;
@@ -102,24 +103,6 @@ public abstract class Trigger<W extends BoundedWindow> implements Serializable {
 
   private static final long serialVersionUID = 0L;
 
-  /**
-   * {@code TimeDomain} specifies whether an operation is based on
-   * timestamps of elements or current "real-world" time as reported while processing.
-   */
-  public enum TimeDomain {
-    /**
-     * The {@code EVENT_TIME} domain corresponds to the timestamps on the elemnts. Time advances
-     * on the system watermark advances.
-     */
-    EVENT_TIME,
-
-    /**
-     * The {@code PROCESSING_TIME} domain corresponds to the current to the current (system) time.
-     * This is advanced during exeuction of the Dataflow pipeline.
-     */
-    PROCESSING_TIME;
-  }
-
   /**
    * {@code WindowStatus} indicates the status of the window that an element is being processed in.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
index a720ca68d4224..aab96798bf08c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
@@ -16,8 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 
 import org.joda.time.Instant;
 
@@ -39,12 +38,12 @@ public class BatchTimerManager implements TimerManager {
 
   private Instant processingTime;
 
-  private PriorityQueue<BatchTimerManager.BatchTimer> queue(Trigger.TimeDomain domain) {
-    return Trigger.TimeDomain.EVENT_TIME.equals(domain) ? watermarkTimers : processingTimers;
+  private PriorityQueue<BatchTimerManager.BatchTimer> queue(TimerManager.TimeDomain domain) {
+    return TimeDomain.EVENT_TIME.equals(domain) ? watermarkTimers : processingTimers;
   }
 
-  private Map<String, BatchTimer> map(Trigger.TimeDomain domain) {
-    return Trigger.TimeDomain.EVENT_TIME.equals(domain)
+  private Map<String, BatchTimer> map(TimeDomain domain) {
+    return TimeDomain.EVENT_TIME.equals(domain)
         ? watermarkTagToTimer : processingTagToTimer;
   }
 
@@ -53,7 +52,7 @@ public BatchTimerManager(Instant processingTime) {
   }
 
   @Override
-  public void setTimer(String tag, Instant timestamp, Trigger.TimeDomain domain) {
+  public void setTimer(String tag, Instant timestamp, TimeDomain domain) {
     BatchTimerManager.BatchTimer newTimer = new BatchTimerManager.BatchTimer(tag, timestamp);
 
     BatchTimerManager.BatchTimer oldTimer = map(domain).put(tag, newTimer);
@@ -64,7 +63,7 @@ public void setTimer(String tag, Instant timestamp, Trigger.TimeDomain domain) {
   }
 
   @Override
-  public void deleteTimer(String tag, Trigger.TimeDomain domain) {
+  public void deleteTimer(String tag, TimeDomain domain) {
     queue(domain).remove(map(domain).get(tag));
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index 5e97807a33e6a..ee11ad16ddb7d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -19,9 +19,8 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
+import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -102,8 +101,8 @@ public Instant currentProcessingTime() {
     };
   }
 
-  private Windmill.Timer.Type timerType(Trigger.TimeDomain domain) {
-    return domain == Trigger.TimeDomain.EVENT_TIME
+  private Windmill.Timer.Type timerType(TimeDomain domain) {
+    return domain == TimeDomain.EVENT_TIME
         ? Windmill.Timer.Type.WATERMARK
         : Windmill.Timer.Type.REALTIME;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java
index 0f97992b5ec01..ca56f99444493 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java
@@ -16,8 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
-
 import org.joda.time.Instant;
 
 /**
@@ -28,22 +26,38 @@
  */
 public interface TimerManager {
 
+  /**
+   * {@code TimeDomain} specifies whether an operation is based on
+   * timestamps of elements or current "real-world" time as reported while processing.
+   */
+  public enum TimeDomain {
+    /**
+     * The {@code EVENT_TIME} domain corresponds to the timestamps on the elemnts. Time advances
+     * on the system watermark advances.
+     */
+    EVENT_TIME,
+
+    /**
+     * The {@code PROCESSING_TIME} domain corresponds to the current to the current (system) time.
+     * This is advanced during exeuction of the Dataflow pipeline.
+     */
+    PROCESSING_TIME;
+  }
+
   /**
    * Writes out a timer to be fired when the watermark reaches the given
    * timestamp.  Timers are identified by their name, and can be moved
    * by calling {@code setTimer} again, or deleted with {@link #deleteTimer}.
    */
-  void setTimer(String timer, Instant timestamp, Trigger.TimeDomain domain);
+  void setTimer(String timer, Instant timestamp, TimeDomain domain);
 
   /**
    * Deletes the given timer.
    */
-  void deleteTimer(String timer, Trigger.TimeDomain domain);
+  void deleteTimer(String timer, TimeDomain domain);
 
   /**
-   * @return the current timestamp in the
-   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain#PROCESSING_TIME}
-   * time domain.
+   * @return the current timestamp in the {@link TimeDomain#PROCESSING_TIME} time domain.
    */
   Instant currentProcessingTime();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index eaf71159bd286..f3b317529e50a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -30,12 +30,12 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 7d3a9334d1ddb..e1af386cfbd16 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -28,9 +28,9 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
@@ -385,7 +385,7 @@ public void setTimer(String tag, Instant timestamp, TimeDomain domain) {
     }
 
     @Override
-    public void deleteTimer(String tag, Trigger.TimeDomain domain) {
+    public void deleteTimer(String tag, TimeDomain domain) {
       logInteraction("Delete timer '%s' in domain %s", tag, domain);
       super.deleteTimer(tag, domain);
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index f3c462c7368e0..2532b46131da3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -28,10 +28,10 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
+import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index 4a33cf492434d..517296b3a1b04 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -27,10 +27,10 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
+import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index b66cb56da161c..cfe7885357f9f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -28,10 +28,10 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
+import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index 8282fa3671480..ae1dd606b3748 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -26,10 +26,10 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
+import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
index aabde157e4d02..3f1e7130ad408 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
@@ -29,10 +29,10 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OrFinallyTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TimeDomain;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
+import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 

From 5cc4b302021c01146cb0415586afbf955bbd5970 Mon Sep 17 00:00:00 2001
From: jlewi <jlewi@google.com>
Date: Tue, 19 May 2015 20:13:29 -0700
Subject: [PATCH 0569/1541] Fix a typo in the java doc. ----Release Notes----
 [] ------------- Created by MOE: http://code.google.com/p/moe-java
 MOE_MIGRATED_REVID=94053791

---
 .../cloud/dataflow/sdk/transforms/windowing/Trigger.java    | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 536758aa3ef9d..18223edd6ce36 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -39,9 +39,9 @@
 
 /**
  * {@code Trigger}s control when the elements for a specific key and window are output. As elements
- * arrive, they are put into one or more windows by the {@code Window} by the {@link WindowFn}, and
- * then passed to the associated {@code Trigger} to determine if the {@code Window}s contents should
- * be output.
+ * arrive, they are put into one or more windows by a {@link Window} transform and its associated
+ * {@link WindowFn}, and then passed to the associated {@code Trigger} to determine if the
+ * {@code Window}s contents should be output.
  *
  * <p> See {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} and {@link Window}
  * for more information about how grouping with windows works.

From 7b13ea1713a89c429d7f226478c6bb2dd7fe8388 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 20 May 2015 09:32:03 -0700
Subject: [PATCH 0570/1541] Remove unused SuppressWarnings annotations

The presence of these annotations would suppress any warnings generated
by code added in the future.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94096020
---
 .../dataflow/sdk/runners/DataflowPipelineTranslator.java    | 5 -----
 .../cloud/dataflow/sdk/runners/DirectPipelineRunner.java    | 6 ------
 2 files changed, 11 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 989e3a6818264..0432b4b565bbe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -177,7 +177,6 @@ public static <TransformT extends PTransform> void registerTransformTranslator(
    * Returns the {@link TransformTranslator} to use for instances of the
    * specified PTransform class, or null if none registered.
    */
-  @SuppressWarnings("unchecked")
   public <TransformT extends PTransform>
       TransformTranslator<TransformT> getTransformTranslator(Class<TransformT> transformClass) {
     return transformTranslators.get(transformClass);
@@ -483,7 +482,6 @@ public void enterCompositeTransform(TransformTreeNode node) {
     public void leaveCompositeTransform(TransformTreeNode node) {
     }
 
-    @SuppressWarnings("unchecked")
     @Override
     public void visitTransform(TransformTreeNode node) {
       PTransform<?, ?> transform = node.getTransform();
@@ -548,7 +546,6 @@ public void addStep(PTransform<?, ? extends PValue> transform, Step original) {
         @Nullable List<Map<String, Object>> outputInfoList = null;
         try {
           // TODO: This should be done via a Structs accessor.
-          @SuppressWarnings("unchecked")
           @Nullable List<Map<String, Object>> list =
               (List<Map<String, Object>>) properties.get(PropertyNames.OUTPUT_INFO);
           outputInfoList = list;
@@ -676,7 +673,6 @@ public void addCollectionToSingletonOutput(String name,
      * Dataflow step, producing the specified output {@code PValue}
      * with the given {@code Coder} (if not {@code null}).
      */
-    @SuppressWarnings("unchecked")
     private void addOutput(String name, PValue value, Coder<?> valueCoder) {
       registerOutputName(value, name);
 
@@ -791,7 +787,6 @@ private <ElemT, ViewT> void translateTyped(
     DataflowPipelineTranslator.registerTransformTranslator(
         Combine.GroupedValues.class,
         new DataflowPipelineTranslator.TransformTranslator<Combine.GroupedValues>() {
-          @SuppressWarnings("unchecked")
           @Override
           public void translate(
               Combine.GroupedValues transform,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index e468c4112b222..5876cf012ee31 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -132,7 +132,6 @@ void registerTransformEvaluator(
    * Returns the TransformEvaluator to use for instances of the
    * specified PTransform class, or null if none registered.
    */
-  @SuppressWarnings("unchecked")
   public <TransformT extends PTransform<?, ?>>
       TransformEvaluator<TransformT> getTransformEvaluator(Class<TransformT> transformClass) {
     TransformEvaluator<TransformT> transformEvaluator =
@@ -208,7 +207,6 @@ public DirectPipelineRunner withUnorderednessTesting(boolean enable) {
   }
 
   @Override
-  @SuppressWarnings("unchecked")
   public <OutputT extends POutput, InputT extends PInput> OutputT apply(
       PTransform<InputT, OutputT> transform, InputT input) {
     if (transform instanceof Combine.GroupedValues) {
@@ -254,7 +252,6 @@ public static class TestCombineDoFn<K, InputT, AccumT, OutputT>
     private final Coder<AccumT> accumCoder;
     private final boolean testSerializability;
 
-    @SuppressWarnings({"unchecked", "rawtypes"})
     public static <K, InputT, AccumT, OutputT> TestCombineDoFn<K, InputT, AccumT, OutputT> create(
         Combine.GroupedValues<K, InputT, OutputT> transform,
         PCollection<KV<K, Iterable<InputT>>> input,
@@ -656,7 +653,6 @@ public void enterCompositeTransform(TransformTreeNode node) {
     public void leaveCompositeTransform(TransformTreeNode node) {
     }
 
-    @SuppressWarnings("unchecked")
     @Override
     public void visitTransform(TransformTreeNode node) {
       PTransform<?, ?> transform = node.getTransform();
@@ -789,7 +785,6 @@ public WindowedValue<T> apply(ValueWithMetadata<T> input) {
 
     @Override
     public <T> List<ValueWithMetadata<T>> getPCollectionValuesWithMetadata(PCollection<T> pc) {
-      @SuppressWarnings("unchecked")
       List<ValueWithMetadata<T>> elements = (List<ValueWithMetadata<T>>) getPValue(pc);
       elements = randomizeIfUnordered(elements, false /* not inPlaceAllowed */);
       LOG.debug("Getting {} = {}", pc, elements);
@@ -812,7 +807,6 @@ public <T> List<List<T>> getPCollectionList(PCollectionList<T> pcs) {
      */
     @Override
     public <T, WindowedT> Iterable<WindowedValue<?>> getPCollectionView(PCollectionView<T> view) {
-      @SuppressWarnings("unchecked")
       Iterable<WindowedValue<?>> value = (Iterable<WindowedValue<?>>) getPValue(view);
       LOG.debug("Getting {} = {}", view, value);
       return value;

From af42f3aedcd8e82c868f953c807cbbd10d22e104 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Wed, 20 May 2015 11:03:08 -0700
Subject: [PATCH 0571/1541] Mark PCollections as bounded or unbounded

Disallow unbounded PCollection in GroupByKey
if GlobalWindows and DefaultTrigger are used.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94105225
---
 .../google/cloud/dataflow/sdk/io/AvroIO.java  |  4 +-
 .../cloud/dataflow/sdk/io/BigQueryIO.java     |  4 +-
 .../cloud/dataflow/sdk/io/PubsubIO.java       |  4 +-
 .../google/cloud/dataflow/sdk/io/Read.java    |  4 +-
 .../google/cloud/dataflow/sdk/io/TextIO.java  |  4 +-
 .../sdk/runners/DataflowPipelineRunner.java   |  6 ++-
 .../cloud/dataflow/sdk/transforms/Create.java |  4 +-
 .../dataflow/sdk/transforms/Flatten.java      |  6 ++-
 .../dataflow/sdk/transforms/GroupByKey.java   | 13 ++++-
 .../cloud/dataflow/sdk/transforms/ParDo.java  |  6 ++-
 .../sdk/transforms/windowing/Window.java      |  3 +-
 .../dataflow/sdk/values/PCollection.java      | 54 ++++++++++++++++++-
 .../dataflow/sdk/values/PCollectionTuple.java |  6 ++-
 .../runners/DataflowPipelineRunnerTest.java   |  3 +-
 .../DataflowPipelineTranslatorTest.java       |  7 ++-
 .../sdk/runners/TransformTreeTest.java        |  3 +-
 16 files changed, 110 insertions(+), 21 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index f0c237de65577..7a011e3161847 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -30,6 +30,7 @@
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
 
@@ -292,7 +293,8 @@ public PCollection<T> apply(PInput input) {
         // format specified by the Read transform.
         return PCollection.<T>createPrimitiveOutputInternal(
             input.getPipeline(),
-            WindowingStrategy.globalDefault())
+            WindowingStrategy.globalDefault(),
+            IsBounded.BOUNDED)
             .setCoder(getDefaultOutputCoder());
       }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index a5100b3f9e281..e9f3a8d6daa73 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -38,6 +38,7 @@
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
 
@@ -291,7 +292,8 @@ public PCollection<TableRow> apply(PInput input) {
         }
         return PCollection.<TableRow>createPrimitiveOutputInternal(
             input.getPipeline(),
-            WindowingStrategy.globalDefault())
+            WindowingStrategy.globalDefault(),
+            IsBounded.BOUNDED)
             // Force the output's Coder to be what the read is using, and
             // unchangeable later, to ensure that we read the input in the
             // format specified by the Read transform.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 24080a962d898..8010b5fd12aff 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
 
@@ -370,7 +371,8 @@ public PCollection<T> apply(PInput input) {
         }
         return PCollection.<T>createPrimitiveOutputInternal(
                 input.getPipeline(),
-                WindowingStrategy.globalDefault())
+                WindowingStrategy.globalDefault(),
+                IsBounded.UNBOUNDED)
             .setCoder(coder);
       }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
index 7dedf599c15db..d2f2a190d0c6d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.common.base.Preconditions;
 
@@ -101,7 +102,8 @@ public final PCollection<T> apply(PInput input) {
       source.validate();
       return PCollection.<T>createPrimitiveOutputInternal(
           input.getPipeline(),
-          WindowingStrategy.globalDefault())
+          WindowingStrategy.globalDefault(),
+          IsBounded.BOUNDED)
           .setCoder(getDefaultOutputCoder());
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 34989b775be67..9f5d907bedfb3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -30,6 +30,7 @@
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.common.primitives.Ints;
@@ -280,7 +281,8 @@ public PCollection<T> apply(PInput input) {
         // format specified by the Read transform.
         return PCollection.<T>createPrimitiveOutputInternal(
                 input.getPipeline(),
-                WindowingStrategy.globalDefault())
+                WindowingStrategy.globalDefault(),
+                IsBounded.BOUNDED)
             .setCoder(coder);
       }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index a944c7c279c62..732912b9142fe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -162,10 +162,12 @@ private DataflowPipelineRunner(DataflowPipelineOptions options) {
   public <OutputT extends POutput, InputT extends PInput> OutputT apply(
       PTransform<InputT, OutputT> transform, InputT input) {
     if (transform instanceof Combine.GroupedValues || transform instanceof GroupByKey) {
+      PCollection<?> pc = (PCollection<?>) input;
       // TODO: Redundant with translator registration?
       return (OutputT) PCollection.createPrimitiveOutputInternal(
-          input.getPipeline(),
-          ((PCollection<?>) input).getWindowingStrategy());
+          pc.getPipeline(),
+          pc.getWindowingStrategy(),
+          pc.isBounded());
     } else if (transform instanceof Create) {
       return (OutputT) ((Create) transform).applyHelper(input, options.isStreaming());
     } else {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index 7c58256e32475..8c44fd460b8a8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -30,6 +30,7 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue.TimestampedValueCoder;
@@ -223,7 +224,8 @@ public void processElement(DoFn<String, KV<Void, Void>>.ProcessContext c)
     } else {
       return PCollection.<T>createPrimitiveOutputInternal(
           input.getPipeline(),
-          WindowingStrategy.globalDefault());
+          WindowingStrategy.globalDefault(),
+          IsBounded.BOUNDED);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index 10a882c31bdca..8346a843650b4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
 
 import java.util.ArrayList;
@@ -110,6 +111,7 @@ private FlattenPCollectionList() { }
     @Override
     public PCollection<T> apply(PCollectionList<T> inputs) {
       WindowingStrategy<?, ?> windowingStrategy;
+      IsBounded isBounded = IsBounded.BOUNDED;
       if (!inputs.getAll().isEmpty()) {
         windowingStrategy = inputs.get(0).getWindowingStrategy();
         for (PCollection<?> input : inputs.getAll()) {
@@ -126,6 +128,7 @@ public PCollection<T> apply(PCollectionList<T> inputs) {
                 "Inputs to Flatten had incompatible triggers: "
                 + windowingStrategy.getTrigger() + ", " + other.getTrigger());
           }
+          isBounded = isBounded.and(input.isBounded());
         }
       } else {
         windowingStrategy = WindowingStrategy.globalDefault();
@@ -133,7 +136,8 @@ public PCollection<T> apply(PCollectionList<T> inputs) {
 
       return PCollection.<T>createPrimitiveOutputInternal(
           inputs.getPipeline(),
-          windowingStrategy);
+          windowingStrategy,
+          isBounded);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index f4004ebe7715d..dcf094211f31d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -25,6 +25,8 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
+import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
@@ -35,6 +37,7 @@
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -382,6 +385,14 @@ public static class GroupByKeyOnly<K, V>
                          PCollection<KV<K, Iterable<V>>>> {
     @Override
     public void validate(PCollection<KV<K, V>> input) {
+      WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
+      if (windowingStrategy.getWindowFn() instanceof GlobalWindows
+          && windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger
+          && input.isBounded() != IsBounded.BOUNDED) {
+        throw new IllegalStateException("Non-bounded PCollection cannot be "
+            + "processed with GlobalWindow and DefaultTrigger for GroupByKey."
+            + "Use Window.into transform prior to GroupByKey.");
+      }
       // Verify that the input Coder<KV<K, V>> is a KvCoder<K, V>, and that
       // the key coder is deterministic.
       Coder<K> keyCoder = getKeyCoder(input.getCoder());
@@ -408,7 +419,7 @@ public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
       // We also return to the default trigger.
       WindowingStrategy<?, ?> newWindowingStrategy = WindowingStrategy.of(newWindowFn);
       return PCollection.<KV<K, Iterable<V>>>createPrimitiveOutputInternal(
-          input.getPipeline(), newWindowingStrategy);
+          input.getPipeline(), newWindowingStrategy, input.isBounded());
     }
 
     /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index de7fc4d210221..e52e83807c552 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -749,7 +749,8 @@ public PCollection<OutputT> apply(PCollection<? extends InputT> input) {
       }
       return PCollection.<OutputT>createPrimitiveOutputInternal(
               input.getPipeline(),
-              input.getWindowingStrategy())
+              input.getWindowingStrategy(),
+              input.isBounded())
           .setTypeDescriptorInternal(fn.getOutputTypeDescriptor());
     }
 
@@ -969,7 +970,8 @@ public PCollectionTuple apply(PCollection<? extends InputT> input) {
       PCollectionTuple outputs = PCollectionTuple.ofPrimitiveOutputsInternal(
           input.getPipeline(),
           TupleTagList.of(mainOutputTag).and(sideOutputTags.getAll()),
-          input.getWindowingStrategy());
+          input.getWindowingStrategy(),
+          input.isBounded());
 
       // The fn will likely be an instance of an anonymous subclass
       // such as DoFn<Integer, String> { }, thus will have a high-fidelity
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 6eda62613b145..dcc80f74a2077 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -254,7 +254,8 @@ public Triggering<T> triggering(Trigger<?> trigger) {
 
     @Override
     public PCollection<T> apply(PCollection<T> input) {
-      return PCollection.<T>createPrimitiveOutputInternal(input.getPipeline(), windowingStrategy);
+      return PCollection.<T>createPrimitiveOutputInternal(
+          input.getPipeline(), windowingStrategy, input.isBounded());
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
index ace602748b0d6..376be39119db6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
@@ -66,6 +66,38 @@
  * @param <T> the type of the elements of this PCollection
  */
 public class PCollection<T> extends TypedPValue<T> {
+
+  /**
+   * The PCollection IsBounded property.
+   */
+  public enum IsBounded {
+    /**
+     * {@code PCollection} contains bounded data elements, such as
+     * {@code PCollection}s from {@code TextIO}, {@code BigQueryIO},
+     * {@code Create} e.t.c.
+     */
+    BOUNDED,
+    /**
+     * {@code PCollection} contains unbounded data elements, such as
+     * {@code PCollection}s from {@code PubsubIO}.
+     */
+    UNBOUNDED;
+
+    /**
+     * Returns the composed IsBounded property.
+     *
+     * <p> The composed property is BOUNDED only if all components are BOUNDED.
+     * Otherwise, it is UNBOUNDED.
+     */
+    public IsBounded and(IsBounded that) {
+      if (this == BOUNDED && that == BOUNDED) {
+        return BOUNDED;
+      } else {
+        return UNBOUNDED;
+      }
+    }
+  }
+
   /**
    * Returns the name of this PCollection.
    *
@@ -134,6 +166,11 @@ public <OutputT extends POutput> OutputT apply(PTransform<? super PCollection<T>
     return windowingStrategy;
   }
 
+  public IsBounded isBounded() {
+    return isBounded;
+  }
+
+
   /////////////////////////////////////////////////////////////////////////////
   // Internal details below here.
 
@@ -145,6 +182,8 @@ public <OutputT extends POutput> OutputT apply(PTransform<? super PCollection<T>
    */
   private WindowingStrategy<?, ?> windowingStrategy;
 
+  private IsBounded isBounded;
+
   private PCollection(Pipeline p) {
     super(p);
   }
@@ -171,6 +210,14 @@ public PCollection<T> setWindowingStrategyInternal(WindowingStrategy<?, ?> windo
      return this;
   }
 
+  /**
+   * Sets the {@link PCollection.IsBounded} of this {@code PCollection}.
+   */
+  private PCollection<T> setIsBoundedInternal(IsBounded isBounded) {
+    this.isBounded = isBounded;
+    return this;
+  }
+
   /**
    * Creates and returns a new PCollection for a primitive output.
    *
@@ -178,7 +225,10 @@ public PCollection<T> setWindowingStrategyInternal(WindowingStrategy<?, ?> windo
    */
   public static <T> PCollection<T> createPrimitiveOutputInternal(
       Pipeline pipeline,
-      WindowingStrategy<?, ?> windowingStrategy) {
-    return new PCollection<T>(pipeline).setWindowingStrategyInternal(windowingStrategy);
+      WindowingStrategy<?, ?> windowingStrategy,
+      IsBounded isBounded) {
+    return new PCollection<T>(pipeline)
+        .setWindowingStrategyInternal(windowingStrategy)
+        .setIsBoundedInternal(isBounded);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
index 06af50290c41c..1a8182a7ffe25 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
+import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.common.collect.ImmutableMap;
 
 import java.util.Collection;
@@ -185,7 +186,8 @@ public <OutputT extends POutput> OutputT apply(
   public static PCollectionTuple ofPrimitiveOutputsInternal(
       Pipeline pipeline,
       TupleTagList outputTags,
-      WindowingStrategy<?, ?> windowingStrategy) {
+      WindowingStrategy<?, ?> windowingStrategy,
+      IsBounded isBounded) {
     Map<TupleTag<?>, PCollection<?>> pcollectionMap = new LinkedHashMap<>();
     for (TupleTag<?> outputTag : outputTags.tupleTags) {
       if (pcollectionMap.containsKey(outputTag)) {
@@ -202,7 +204,7 @@ public static PCollectionTuple ofPrimitiveOutputsInternal(
       @SuppressWarnings("unchecked")
       TypeDescriptor<Object> token = (TypeDescriptor<Object>) outputTag.getTypeDescriptor();
       PCollection<Object> outputCollection = PCollection
-          .createPrimitiveOutputInternal(pipeline, windowingStrategy)
+          .createPrimitiveOutputInternal(pipeline, windowingStrategy, isBounded)
           .setTypeDescriptorInternal(token);
 
       pcollectionMap.put(outputTag, outputCollection);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index d97f212ab4afb..e6fa8a3508b87 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -475,7 +475,8 @@ public static class TestTransform
     public PCollection<Integer> apply(PCollection<Integer> input) {
       return PCollection.<Integer>createPrimitiveOutputInternal(
           input.getPipeline(),
-          WindowingStrategy.globalDefault());
+          WindowingStrategy.globalDefault(),
+          input.isBounded());
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 4d381a390af6e..98822fda7fca6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -86,6 +86,7 @@ public class DataflowPipelineTranslatorTest {
   // A Custom Mockito matcher for an initial Job that checks that all
   // expected fields are set.
   private static class IsValidCreateRequest extends ArgumentMatcher<Job> {
+    @Override
     public boolean matches(Object o) {
       Job job = (Job) o;
       return job.getId() == null
@@ -333,7 +334,8 @@ public EmbeddedTransform(Step step) {
     public PCollection<String> apply(PCollection<String> input) {
       return PCollection.createPrimitiveOutputInternal(
           input.getPipeline(),
-          WindowingStrategy.globalDefault());
+          WindowingStrategy.globalDefault(),
+          input.isBounded());
     }
 
     @Override
@@ -416,7 +418,8 @@ public PCollectionTuple apply(PCollection<Integer> input) {
       return PCollectionTuple.of(sumTag, sum)
           .and(doneTag, PCollection.<Void>createPrimitiveOutputInternal(
               input.getPipeline(),
-              WindowingStrategy.globalDefault()));
+              WindowingStrategy.globalDefault(),
+              input.isBounded()));
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
index 52af69f8cb875..f98eb313988a4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
@@ -76,7 +76,8 @@ public PCollectionList<String> apply(PBegin b) {
       return PCollectionList.of(
           Arrays.asList(result, PCollection.<String>createPrimitiveOutputInternal(
               b.getPipeline(),
-              WindowingStrategy.globalDefault())));
+              WindowingStrategy.globalDefault(),
+              result.isBounded())));
     }
   }
 

From e13bc84ab8de1a3f76bce69b25b364b79977cfb2 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Wed, 20 May 2015 18:57:54 -0700
Subject: [PATCH 0572/1541] Fix a typo in the java doc.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94148474
---
 .../java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index 88d5ab872c921..926aa9053063a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -50,7 +50,7 @@
  * <p>In addition to the methods left abstract from {@code Source}, subclasses must implement
  * methods to create a sub-source and a reader for a range of a single file -
  * {@link #createForSubrangeOfFile} and {@link #createSingleFileReader}. Please refer to
- * {@link XmlSource} for an example implementation of {@code FilebasedSource}.
+ * {@link XmlSource} for an example implementation of {@code FileBasedSource}.
  *
  * @param <T> Type of records represented by the source.
  */

From 410f253019d1e5fc980aaa96d57f533ece7d1646 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 20 May 2015 19:12:26 -0700
Subject: [PATCH 0573/1541] Fix javadoc errors that surface with JDK8

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94149298
---
 .../cloud/dataflow/examples/AutoComplete.java | 14 +++++----
 .../dataflow/examples/BigQueryTornadoes.java  | 26 +++++++++++-----
 .../examples/CombinePerKeyExamples.java       | 24 ++++++++++-----
 .../dataflow/examples/DatastoreWordCount.java | 10 +++----
 .../cloud/dataflow/examples/DeDupExample.java | 16 +++++-----
 .../dataflow/examples/FilterExamples.java     | 30 +++++++++++++------
 .../cloud/dataflow/examples/JoinExamples.java | 20 +++++++++----
 .../dataflow/examples/MaxPerKeyExamples.java  | 26 +++++++++++-----
 .../google/cloud/dataflow/examples/TfIdf.java | 19 ++++++++----
 .../examples/TopWikipediaSessions.java        | 18 +++++++----
 .../dataflow/examples/WindowingWordCount.java | 26 +++++++++++-----
 .../cloud/dataflow/examples/WordCount.java    | 28 ++++++++++++-----
 12 files changed, 176 insertions(+), 81 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
index 17fd4fd6215a3..cee98b3f45176 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
@@ -76,18 +76,22 @@
  *
  * <p> To execute this pipeline using the Dataflow service in batch mode,
  * specify pipeline configuration:
- *   --project=<PROJECT ID>
- *   --stagingLocation=gs://<STAGING DIRECTORY>
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
  *   --runner=DataflowPipelineRunner
  *   --inputFile=gs://path/to/input*.txt
+ * }</pre>
  *
  * <p> To execute this pipeline using the Dataflow service in streaming mode,
  * specify pipeline configuration:
- *   --project=<PROJECT ID>
- *   --stagingLocation=gs://<STAGING DIRECTORY>
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
  *   --runner=DataflowPipelineRunner
- *   --inputFile=gs://path/to/input*.txt
+ *   --inputFile=gs://YOUR_INPUT_DIRECTORY/*.txt
  *   --streaming
+ * }</pre>
  *
  * <p> This will update the datastore every 10 seconds based on the last
  * 30 minutes of data received.
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
index c73cfcab43249..58331e47ce60c 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
@@ -46,19 +46,29 @@
  * table.
  *
  * <p> To execute this pipeline locally, specify general pipeline configuration:
- *   --project=<PROJECT ID>
- * and the BigQuery table for the output:
- *   --output=<project_id>:<dataset_id>.<table_id>
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
+ * and the BigQuery table for the output, with the form
+ * <pre>{@code
+ *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
+ * }</pre>
  *
  * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
- *   --project=<PROJECT ID>
- *   --stagingLocation=gs://<STAGING DIRECTORY>
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
  *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
  * and the BigQuery table for the output:
- *   --output=<project_id>:<dataset_id>.<table_id>
+ * <pre>{@code
+ *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
+ * }</pre>
  *
- * <p> The BigQuery input table defaults to clouddataflow-readonly:samples.weather_stations and can
- * be overridden with --input.
+ * <p> The BigQuery input table defaults to {@code clouddataflow-readonly:samples.weather_stations}
+ * and can be overridden with {@code --input}.
  */
 public class BigQueryTornadoes {
   // Default to using a 1000 row subset of the public weather station table publicdata:samples.gsod.
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java
index 6f59e74a2e2cf..4b7a98f4038b3 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java
@@ -53,19 +53,29 @@
  * table.
  *
  * <p> To execute this pipeline locally, specify general pipeline configuration:
- *   --project=<PROJECT ID>
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
  * and the BigQuery table for the output:
- *   --output=<project_id>:<dataset_id>.<table_id>
+ * <pre>{@code
+ *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
+ * }</pre>
  *
  * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
- *   --project=<PROJECT ID>
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
  *   --stagingLocation=gs://<STAGING DIRECTORY>
  *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
  * and the BigQuery table for the output:
- *   --output=<project_id>:<dataset_id>.<table_id>
+ * <pre>{@code
+ *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
+ * }</pre>
  *
- * <p> The BigQuery input table defaults to publicdata:samples.shakespeare and can
- * be overridden with --input.
+ * <p> The BigQuery input table defaults to {@code publicdata:samples.shakespeare} and can
+ * be overridden with {@code --input}.
  */
 public class CombinePerKeyExamples {
   // Use the shakespeare public BigQuery sample
@@ -75,7 +85,7 @@ public class CombinePerKeyExamples {
   private static final int MIN_WORD_LENGTH = 9;
 
   /**
-   * Examines each row in the input table. If the word is >= MIN_WORD_LENGTH,
+   * Examines each row in the input table. If the word is greater than or equal to MIN_WORD_LENGTH,
    * outputs word, play_name.
    */
   static class ExtractLargeWordsFn extends DoFn<TableRow, KV<String, String>> {
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
index 580fae513ae41..27309de71ffff 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
@@ -45,9 +45,9 @@
  * data to Datastore, which may incur charge for Datastore operations.
  *
  * <p> To run this example, users need to use gcloud to get credential for Datastore:
- * <pre>
+ * <pre>{@code
  * $ gcloud auth login
- * </pre>
+ * }</pre>
  *
  * <p> Note that the environment variable CLOUDSDK_EXTRA_SCOPES must be set
  * to the same value when executing a Datastore pipeline, as the local auth
@@ -55,9 +55,9 @@
  *
  * <p> To run this pipeline locally, the following options must be provided:
  * <pre>{@code
- *   --project=<PROJECT ID>
- *   --dataset=<DATASET ID>
- *   --output=[<LOCAL FILE> | gs://<OUTPUT PATH>]
+ *   --project=YOUR_PROJECT_ID
+ *   --dataset=YOUR_DATASET_ID
+ *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PATH]
  * }</pre>
  *
  * <p> To run this example using Dataflow service, you must additionally
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java
index 61390137b0e33..748baf7ac62ca 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java
@@ -32,22 +32,22 @@
  * duplicate lines across all the files. (The output does not preserve any input order).
  *
  * <p> Concepts: the RemoveDuplicates transform, and how to wire transforms together.
- * Demonstrates TextIO.Read/RemoveDuplicates/TextIO.Write.
+ * Demonstrates {@link TextIO.Read}/{@link RemoveDuplicates}/{@link TextIO.Write}.
  *
  * <p> To execute this pipeline locally, specify general pipeline configuration:
- *   --project=<PROJECT ID>
+ *   --project=YOUR_PROJECT_ID
  * and a local output file or output prefix on GCS:
- *   --output=[<LOCAL FILE> | gs://<OUTPUT PREFIX>]
+ *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
  *
  * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
- *   --project=<PROJECT ID>
- *   --stagingLocation=gs://<STAGING DIRECTORY>
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
  *   --runner=BlockingDataflowPipelineRunner
  * and an output prefix on GCS:
- *   --output=gs://<OUTPUT PREFIX>
+ *   --output=gs://YOUR_OUTPUT_PREFIX
  *
- * <p> The input defaults to gs://dataflow-samples/shakespeare/* and can be
- * overridden with --input.
+ * <p> The input defaults to {@code gs://dataflow-samples/shakespeare/*} and can be
+ * overridden with {@code --input}.
  */
 public class DeDupExample {
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
index 23934ada4ccdb..cbdfcd7a78a55 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
@@ -55,23 +55,35 @@
  * table.
  *
  * <p> To execute this pipeline locally, specify general pipeline configuration:
- *   --project=<PROJECT ID>
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
  * and the BigQuery table for the output:
- *   --output=<project_id>:<dataset_id>.<table_id>
+ * <pre>{@code
+ *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
  *   [--monthFilter=<month_number>]
- * where optional parameter --monthFilter is set to a number 1-12.
+ * }
+ * </pre>
+ * where optional parameter {@code --monthFilter} is set to a number 1-12.
  *
  * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
- *   --project=<PROJECT ID>
- *   --stagingLocation=gs://<STAGING DIRECTORY>
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
  *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
  * and the BigQuery table for the output:
- *   --output=<project_id>:<dataset_id>.<table_id>
+ * <pre>{@code
+ *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
  *   [--monthFilter=<month_number>]
- * where optional parameter --monthFilter is set to a number 1-12.
+ * }
+ * </pre>
+ * where optional parameter {@code --monthFilter} is set to a number 1-12.
  *
- * <p> The BigQuery input table defaults to clouddataflow-readonly:samples.weather_stations and can
- * be overridden with --input.
+ * <p> The BigQuery input table defaults to {@code clouddataflow-readonly:samples.weather_stations}
+ * and can be overridden with {@code --input}.
  */
 public class FilterExamples {
   // Default to using a 1000 row subset of the public weather station table publicdata:samples.gsod.
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
index ccf3aa9a48e18..b62625473c107 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
@@ -41,16 +41,26 @@
  * <p> Concepts: Join operation; multiple input sources.
  *
  * <p> To execute this pipeline locally, specify general pipeline configuration:
- *   --project=<PROJECT ID>
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
  * and a local output file or output prefix on GCS:
- *   --output=[<LOCAL FILE> | gs://<OUTPUT PREFIX>]
+ * <pre>{@code
+ *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
+ * }</pre>
  *
  * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
- *   --project=<PROJECT ID>
- *   --stagingLocation=gs://<STAGING DIRECTORY>
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
  *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
  * and an output prefix on GCS:
- *   --output=gs://<OUTPUT PREFIX>
+ * <pre>{@code
+ *   --output=gs://YOUR_OUTPUT_PREFIX
+ * }</pre>
  */
 public class JoinExamples {
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java
index 311ebb31db253..f02f7250c8016 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java
@@ -47,19 +47,29 @@
  * table.
  *
  * <p> To execute this pipeline locally, specify general pipeline configuration:
- *   --project=<PROJECT ID>
- * and the BigQuery table for the output:
- *   --output=<project_id>:<dataset_id>.<table_id>
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
+ * and the BigQuery table for the output, with the form
+ * <pre>{@code
+ *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
+ * }</pre>
  *
  * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
- *   --project=<PROJECT ID>
- *   --stagingLocation=gs://<STAGING DIRECTORY>
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
  *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
  * and the BigQuery table for the output:
- *   --output=<project_id>:<dataset_id>.<table_id>
+ * <pre>{@code
+ *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
+ * }</pre>
  *
- * <p> The BigQuery input table defaults to clouddataflow-readonly:samples.weather_stations and can
- * be overridden with --input.
+ * <p> The BigQuery input table defaults to {@code clouddataflow-readonly:samples.weather_stations }
+ * and can be overridden with {@code --input}.
  */
 public class MaxPerKeyExamples {
   // Default to using a 1000 row subset of the public weather station table publicdata:samples.gsod.
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
index b9bca0404bca2..d1f439b1a2c63 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
@@ -67,18 +67,25 @@
  * <p> Concepts: joining data; side inputs; logging
  *
  * <p> To execute this pipeline locally, specify general pipeline configuration:
- *   --project=<PROJECT ID>
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }</pre>
  * and a local output file or output prefix on GCS:
- *   --output=[<LOCAL FILE> | gs://<OUTPUT PREFIX>]
+ * <pre>{@code
+ *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
+ * }</pre>
  *
  * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
- *   --project=<PROJECT ID>
- *   --stagingLocation=gs://<STAGING DIRECTORY>
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
  *   --runner=BlockingDataflowPipelineRunner
  * and an output prefix on GCS:
- *   --output=gs://<OUTPUT PREFIX>
+ *   --output=gs://YOUR_OUTPUT_PREFIX
+ * }</pre>
  *
- * <p> The default input is gs://dataflow-samples/shakespeare/ and can be overridden with --input.
+ * <p> The default input is {@code gs://dataflow-samples/shakespeare/} and can be overridden with
+ * {@code --input}.
  */
 public class TfIdf {
   /**
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
index da45ea3a6ba3b..7e5ddbf018f48 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
@@ -55,19 +55,27 @@
  * data.
  *
  * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
- *   --project=<PROJECT ID>
- *   --stagingLocation=gs://<STAGING DIRECTORY>
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
  *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
  * and an output prefix on GCS:
- *   --output=gs://<OUTPUT PREFIX>
+ * <pre>{@code
+ *   --output=gs://YOUR_OUTPUT_PREFIX
+ * }</pre>
  *
- * <p> The default input is gs://dataflow-samples/wikipedia_edits/*.json and can be overridden with
- * --input.
+ * <p> The default input is {@code gs://dataflow-samples/wikipedia_edits/*.json} and can be
+ * overridden with {@code --input}.
  *
  * <p> The input for this example is large enough that it's a good place to enable (experimental)
  * autoscaling:
+ * <pre>{@code
  *   --autoscalingAlgorithm=BASIC
  *   --maxNumWorkers=20
+ * }
+ * </pre>
  * This will automatically scale the number of workers up over time until the job completes.
  */
 public class TopWikipediaSessions {
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
index 849e3718954e0..8507533b41bca 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
@@ -40,22 +40,34 @@
 /**
  * An example that counts words in Shakespeare. For a detailed walkthrough of this
  * example see:
+ *   <a href="https://cloud.google.com/dataflow/java-sdk/wordcount-example">
  *   https://cloud.google.com/dataflow/java-sdk/wordcount-example
+ *   </a>
  *
  * <p> To execute this pipeline locally, specify general pipeline configuration:
- *   --project=<PROJECT ID>
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
  * and a local output file or output prefix on GCS:
- *   --output=[<LOCAL FILE> | gs://<OUTPUT PREFIX>]
+ * <pre>{@code
+ *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
+ * }</pre>
  *
  * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
- *   --project=<PROJECT ID>
- *   --stagingLocation=gs://<STAGING DIRECTORY>
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
  *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
  * and an output prefix on GCS:
- *   --output=gs://<OUTPUT PREFIX>
+ * <pre>{@code
+ *   --output=gs://YOUR_OUTPUT_PREFIX
+ * }</pre>
  *
- * <p> The input file defaults to gs://dataflow-samples/shakespeare/kinglear.txt and can be
- * overridden with --input.
+ * <p> The input file defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt} and can be
+ * overridden with {@code --input}.
  */
 public class WindowingWordCount {
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
index 1300d88920958..ced439dca0b1a 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
@@ -36,25 +36,37 @@
 
 /**
  * An example that counts words in Shakespeare. For a detailed walkthrough of this
- * example see:
+ * example see
+ *   <a href="https://cloud.google.com/dataflow/java-sdk/wordcount-example">
  *   https://cloud.google.com/dataflow/java-sdk/wordcount-example
+ *   </a>
  *
  * <p> Concepts: Reading/writing text files; counting a PCollection; user-defined PTransforms
  *
  * <p> To execute this pipeline locally, specify general pipeline configuration:
- *   --project=<PROJECT ID>
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
  * and a local output file or output prefix on GCS:
- *   --output=[<LOCAL FILE> | gs://<OUTPUT PREFIX>]
+ * <pre>{@code
+ *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
+ * }</pre>
  *
  * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
- *   --project=<PROJECT ID>
- *   --stagingLocation=gs://<STAGING DIRECTORY>
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
  *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
  * and an output prefix on GCS:
- *   --output=gs://<OUTPUT PREFIX>
+ * <pre>{@code
+ *   --output=gs://YOUR_OUTPUT_PREFIX
+ * }</pre>
  *
- * <p> The input file defaults to gs://dataflow-samples/shakespeare/kinglear.txt and can be
- * overridden with --input.
+ * <p> The input file defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt} and can be
+ * overridden with {@code --input}.
  */
 public class WordCount {
 

From 049652d99e89ceb653e18c31c889f1f1e72b1879 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 21 May 2015 09:41:38 -0700
Subject: [PATCH 0574/1541] Use logging rather than stdout in StarterPipeline

This example asks users to try both LOCAL and SERVICE
configurations, but as far as I can tell there is actually
no way to access worker stdout from SERVICE. Switch to logging,
then we can see the output in both places.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94196037
---
 eclipse/starter/pom.xml                              | 12 ++++++++++++
 .../cloud/dataflow/starter/StarterPipeline.java      |  8 ++++++--
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/eclipse/starter/pom.xml b/eclipse/starter/pom.xml
index 237cbbfaf9a6d..3c78477e10783 100644
--- a/eclipse/starter/pom.xml
+++ b/eclipse/starter/pom.xml
@@ -27,5 +27,17 @@
       <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
       <version>LATEST</version>
     </dependency>
+
+    <!-- slf4j API frontend binding with JUL backend -->
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <version>1.7.7</version>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-jdk14</artifactId>
+      <version>1.7.7</version>
+    </dependency>
   </dependencies>
 </project>
diff --git a/eclipse/starter/src/main/java/com/google/cloud/dataflow/starter/StarterPipeline.java b/eclipse/starter/src/main/java/com/google/cloud/dataflow/starter/StarterPipeline.java
index db2ed6ae675a4..311b49f41ee5f 100644
--- a/eclipse/starter/src/main/java/com/google/cloud/dataflow/starter/StarterPipeline.java
+++ b/eclipse/starter/src/main/java/com/google/cloud/dataflow/starter/StarterPipeline.java
@@ -22,11 +22,14 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 /**
  * A starter example for writing Google Cloud Dataflow programs.
  *
  * <p>The example takes two strings, converts them to their upper-case
- * representation and prints them on the console.
+ * representation and logs them.
  *
  * <p>To run this starter example locally using DirectPipelineRunner, just
  * execute it without any additional parameters from your favorite development
@@ -42,6 +45,7 @@
  */
 @SuppressWarnings("serial")
 public class StarterPipeline {
+  private static final Logger LOG = LoggerFactory.getLogger(StarterPipeline.class);
 
   public static void main(String[] args) {
     Pipeline p = Pipeline.create(
@@ -57,7 +61,7 @@ public void processElement(ProcessContext c) {
     .apply(ParDo.of(new DoFn<String, Void>() {
       @Override
       public void processElement(ProcessContext c)  {
-        System.out.println(c.element());
+        LOG.info(c.element());
       }
     }));
 

From e259954805b4bed3a73f4cb518e7276defc0b274 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 21 May 2015 09:48:01 -0700
Subject: [PATCH 0575/1541] Fix error in SDK javadoc surfaced via JDK 8

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94196589
---
 .../cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 02beb954e2578..907b9e08ddd98 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -49,7 +49,7 @@
  * once when the watermark passes the end of the window and then immediately therafter when any
  * late data arrive, is one such example.
  *
- * <p> The watermark is the clock that defines {@link Trigger.TimeDomain#EVENT_TIME}.
+ * <p> The watermark is the clock that defines {@link TimeDomain#EVENT_TIME}.
  *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used.
  */

From d3c647d6a166a59e4d42c61eb17bda30b4b473bc Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Thu, 21 May 2015 12:47:26 -0700
Subject: [PATCH 0576/1541] Fix a typo in the java doc.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94214683
---
 .../main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index da1b5aaf3bcaf..7d9287c0de1d9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -690,7 +690,7 @@ public static class DatastoreReader extends BoundedSource.AbstractBoundedReader<
     private Entity currentEntity;
 
     /**
-     * Returns a DatastoreIterator with query and Datastore object set.
+     * Returns a DatastoreReader with Source and Datastore object set.
      *
      * @param datastore a datastore connection to use.
      */

From 09817e8439425704ce59af675d3882ebc3f02cc0 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Thu, 21 May 2015 13:36:44 -0700
Subject: [PATCH 0577/1541] Add AggregatorPipelineExtractor

This is a PipelineVisitor that retrieves the User Aggregators at each
ParDo in the pipeline.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94218958
---
 .../runners/AggregatorPipelineExtractor.java  |  98 ++++++++
 .../sdk/transforms/AggregatorRetriever.java   |  36 +++
 .../cloud/dataflow/sdk/transforms/DoFn.java   |  55 ++++-
 .../AggregatorPipelineExtractorTest.java      | 229 ++++++++++++++++++
 4 files changed, 416 insertions(+), 2 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorPipelineExtractor.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AggregatorRetriever.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/AggregatorPipelineExtractorTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorPipelineExtractor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorPipelineExtractor.java
new file mode 100644
index 0000000000000..76b61c14a4de2
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorPipelineExtractor.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.AggregatorRetriever;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.values.PValue;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.SetMultimap;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
+
+/**
+ * An {@code AggregatorPipelineExtractor} retrieves {@link Aggregator Aggregators} at each
+ * {@link ParDo} and returns a {@link Map} of {@link Aggregator} to the
+ * {@link PTransform PTransforms} in which it is present.
+ */
+public class AggregatorPipelineExtractor {
+  private final Pipeline pipeline;
+
+  /**
+   * Creates an {@code AggregatorPipelineExtractor} for the given {@link Pipeline}.
+   */
+  public AggregatorPipelineExtractor(Pipeline pipeline) {
+    this.pipeline = pipeline;
+  }
+
+  /**
+   * Returns a {@link Map} mapping each {@link Aggregator} in the {@link Pipeline} to the {@link
+   * PTransform PTransforms} in which it is used.
+   */
+  public Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> getAggregatorSteps() {
+    HashMultimap<Aggregator<?, ?>, PTransform<?, ?>> aggregatorSteps = HashMultimap.create();
+    pipeline.traverseTopologically(new AggregatorVisitor(aggregatorSteps));
+    return aggregatorSteps.asMap();
+  }
+
+  private static class AggregatorVisitor implements PipelineVisitor {
+    private final SetMultimap<Aggregator<?, ?>, PTransform<?, ?>> aggregatorSteps;
+
+    public AggregatorVisitor(SetMultimap<Aggregator<?, ?>, PTransform<?, ?>> aggregatorSteps) {
+      this.aggregatorSteps = aggregatorSteps;
+    }
+
+    @Override
+    public void enterCompositeTransform(TransformTreeNode node) {}
+
+    @Override
+    public void leaveCompositeTransform(TransformTreeNode node) {}
+
+    @Override
+    public void visitTransform(TransformTreeNode node) {
+      PTransform<?, ?> transform = node.getTransform();
+      addStepToAggregators(transform, getAggregators(transform));
+    }
+
+    private Collection<Aggregator<?, ?>> getAggregators(PTransform<?, ?> transform) {
+      if (transform != null) {
+        if (transform instanceof ParDo.Bound) {
+          return AggregatorRetriever.getAggregators(((ParDo.Bound<?, ?>) transform).getFn());
+        } else if (transform instanceof ParDo.BoundMulti) {
+          return AggregatorRetriever.getAggregators(((ParDo.BoundMulti<?, ?>) transform).getFn());
+        }
+      }
+      return Collections.emptyList();
+    }
+
+    private void addStepToAggregators(
+        PTransform<?, ?> transform, Collection<Aggregator<?, ?>> aggregators) {
+      for (Aggregator<?, ?> aggregator : aggregators) {
+        aggregatorSteps.put(aggregator, transform);
+      }
+    }
+
+    @Override
+    public void visitValue(PValue value, TransformTreeNode producer) {}
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AggregatorRetriever.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AggregatorRetriever.java
new file mode 100644
index 0000000000000..97d5367626876
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AggregatorRetriever.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import java.util.Collection;
+
+/**
+ * A class for extracting {@link Aggregator Aggregators} from {@link DoFn DoFns}.
+ */
+public final class AggregatorRetriever {
+  private AggregatorRetriever() {
+    // do not instantiate
+  }
+
+  /**
+   * Returns the {@link Aggregator Aggregators} created by the provided {@link DoFn}.
+   */
+  public static Collection<Aggregator<?, ?>> getAggregators(DoFn<?, ?> fn) {
+    return fn.getAggregators();
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 0802bb4e33f82..d806ed30e610a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -30,14 +30,19 @@
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.base.MoreObjects;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 
 import java.io.IOException;
 import java.io.Serializable;
+import java.util.Collection;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.Objects;
+import java.util.UUID;
 
 /**
  * The argument to {@link ParDo} providing the code to use to process
@@ -471,6 +476,13 @@ protected final <AggInputT> Aggregator<AggInputT, AggInputT> createAggregator(St
     return createAggregator(name, Combine.SimpleCombineFn.of(combiner));
   }
 
+  /**
+   * Returns the {@link Aggregator Aggregators} created by this {@code DoFn}.
+   */
+  Collection<Aggregator<?, ?>> getAggregators() {
+    return Collections.<Aggregator<?, ?>>unmodifiableCollection(aggregators.values());
+  }
+
   /**
    * An {@link Aggregator} that delegates calls to addValue to another
    * aggregator.
@@ -482,6 +494,8 @@ static class DelegatingAggregator<AggInputT, AggOutputT> implements
       Aggregator<AggInputT, AggOutputT>, Serializable {
     private static final long serialVersionUID = 0L;
 
+    private final UUID id;
+
     private final String name;
 
     private final CombineFn<AggInputT, ?, AggOutputT> combineFn;
@@ -490,11 +504,12 @@ static class DelegatingAggregator<AggInputT, AggOutputT> implements
 
     public DelegatingAggregator(String name,
         CombineFn<? super AggInputT, ?, AggOutputT> combiner) {
-      this.name = name;
+      this.id = UUID.randomUUID();
+      this.name = checkNotNull(name, "name cannot be null");
       // Safe contravariant cast
       @SuppressWarnings("unchecked")
       CombineFn<AggInputT, ?, AggOutputT> specificCombiner =
-          (CombineFn<AggInputT, ?, AggOutputT>) combiner;
+          (CombineFn<AggInputT, ?, AggOutputT>) checkNotNull(combiner, "combineFn cannot be null");
       this.combineFn = specificCombiner;
     }
 
@@ -526,5 +541,41 @@ public String getName() {
     public void setDelegate(Aggregator<AggInputT, ?> delegate) {
       this.delegate = delegate;
     }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(getClass())
+          .add("name", name)
+          .add("combineFn", combineFn)
+          .toString();
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(id, name, combineFn.getClass());
+    }
+
+    /**
+     * Indicates whether some other object is "equal to" this one.
+     *
+     * <p>{@code DelegatingAggregator} instances are equal if they have the same name, their
+     * CombineFns are the same class, and they have identical IDs.
+     */
+    @Override
+    public boolean equals(Object o) {
+      if (o == this) {
+        return true;
+      }
+      if (o == null) {
+        return false;
+      }
+      if (o instanceof DelegatingAggregator) {
+        DelegatingAggregator<?, ?> that = (DelegatingAggregator<?, ?>) o;
+        return Objects.equals(this.id, that.id)
+            && Objects.equals(this.name, that.name)
+            && Objects.equals(this.combineFn.getClass(), that.combineFn.getClass());
+      }
+      return false;
+    }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/AggregatorPipelineExtractorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/AggregatorPipelineExtractorTest.java
new file mode 100644
index 0000000000000..c7824b1e7ccb6
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/AggregatorPipelineExtractorTest.java
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Max;
+import com.google.cloud.dataflow.sdk.transforms.Min;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.MockitoAnnotations;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+
+/**
+ * Tests for {@link AggregatorPipelineExtractor}.
+ */
+@RunWith(JUnit4.class)
+public class AggregatorPipelineExtractorTest {
+  @Mock
+  private Pipeline p;
+
+  @Before
+  public void setup() {
+    MockitoAnnotations.initMocks(this);
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testGetAggregatorStepsWithParDoBoundExtractsSteps() {
+    @SuppressWarnings("rawtypes")
+    ParDo.Bound bound = mock(ParDo.Bound.class, "Bound");
+    AggregatorProvidingDoFn<ThreadGroup, StrictMath> fn = new AggregatorProvidingDoFn<>();
+    when(bound.getFn()).thenReturn(fn);
+
+    Aggregator<Long, Long> aggregatorOne = fn.addAggregator(new Sum.SumLongFn());
+    Aggregator<Integer, Integer> aggregatorTwo = fn.addAggregator(new Min.MinIntegerFn());
+
+    TransformTreeNode transformNode = mock(TransformTreeNode.class);
+    when(transformNode.getTransform()).thenReturn(bound);
+
+    doAnswer(new VisitNodesAnswer(ImmutableList.of(transformNode)))
+        .when(p)
+        .traverseTopologically(Mockito.any(PipelineVisitor.class));
+
+    AggregatorPipelineExtractor extractor = new AggregatorPipelineExtractor(p);
+
+    Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorSteps =
+        extractor.getAggregatorSteps();
+
+    assertEquals(ImmutableSet.<PTransform<?, ?>>of(bound), aggregatorSteps.get(aggregatorOne));
+    assertEquals(ImmutableSet.<PTransform<?, ?>>of(bound), aggregatorSteps.get(aggregatorTwo));
+    assertEquals(aggregatorSteps.size(), 2);
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testGetAggregatorStepsWithParDoBoundMultiExtractsSteps() {
+    @SuppressWarnings("rawtypes")
+    ParDo.BoundMulti bound = mock(ParDo.BoundMulti.class, "BoundMulti");
+    AggregatorProvidingDoFn<Object, Void> fn = new AggregatorProvidingDoFn<>();
+    when(bound.getFn()).thenReturn(fn);
+
+    Aggregator<Long, Long> aggregatorOne = fn.addAggregator(new Max.MaxLongFn());
+    Aggregator<Double, Double> aggregatorTwo = fn.addAggregator(new Min.MinDoubleFn());
+
+    TransformTreeNode transformNode = mock(TransformTreeNode.class);
+    when(transformNode.getTransform()).thenReturn(bound);
+
+    doAnswer(new VisitNodesAnswer(ImmutableList.of(transformNode)))
+        .when(p)
+        .traverseTopologically(Mockito.any(PipelineVisitor.class));
+
+    AggregatorPipelineExtractor extractor = new AggregatorPipelineExtractor(p);
+
+    Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorSteps =
+        extractor.getAggregatorSteps();
+
+    assertEquals(ImmutableSet.<PTransform<?, ?>>of(bound), aggregatorSteps.get(aggregatorOne));
+    assertEquals(ImmutableSet.<PTransform<?, ?>>of(bound), aggregatorSteps.get(aggregatorTwo));
+    assertEquals(2, aggregatorSteps.size());
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testGetAggregatorStepsWithOneAggregatorInMultipleStepsAddsSteps() {
+    @SuppressWarnings("rawtypes")
+    ParDo.Bound bound = mock(ParDo.Bound.class, "Bound");
+    @SuppressWarnings("rawtypes")
+    ParDo.BoundMulti otherBound = mock(ParDo.BoundMulti.class, "otherBound");
+    AggregatorProvidingDoFn<String, Math> fn = new AggregatorProvidingDoFn<>();
+    when(bound.getFn()).thenReturn(fn);
+    when(otherBound.getFn()).thenReturn(fn);
+
+    Aggregator<Long, Long> aggregatorOne = fn.addAggregator(new Sum.SumLongFn());
+    Aggregator<Double, Double> aggregatorTwo = fn.addAggregator(new Min.MinDoubleFn());
+
+    TransformTreeNode transformNode = mock(TransformTreeNode.class);
+    when(transformNode.getTransform()).thenReturn(bound);
+    TransformTreeNode otherTransformNode = mock(TransformTreeNode.class);
+    when(otherTransformNode.getTransform()).thenReturn(otherBound);
+
+    doAnswer(new VisitNodesAnswer(ImmutableList.of(transformNode, otherTransformNode)))
+        .when(p)
+        .traverseTopologically(Mockito.any(PipelineVisitor.class));
+
+    AggregatorPipelineExtractor extractor = new AggregatorPipelineExtractor(p);
+
+    Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorSteps =
+        extractor.getAggregatorSteps();
+
+    assertEquals(
+        ImmutableSet.<PTransform<?, ?>>of(bound, otherBound), aggregatorSteps.get(aggregatorOne));
+    assertEquals(
+        ImmutableSet.<PTransform<?, ?>>of(bound, otherBound), aggregatorSteps.get(aggregatorTwo));
+    assertEquals(2, aggregatorSteps.size());
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testGetAggregatorStepsWithDifferentStepsAddsSteps() {
+    @SuppressWarnings("rawtypes")
+    ParDo.Bound bound = mock(ParDo.Bound.class, "Bound");
+
+    AggregatorProvidingDoFn<ThreadGroup, Void> fn = new AggregatorProvidingDoFn<>();
+    Aggregator<Long, Long> aggregatorOne = fn.addAggregator(new Sum.SumLongFn());
+
+    when(bound.getFn()).thenReturn(fn);
+
+    @SuppressWarnings("rawtypes")
+    ParDo.BoundMulti otherBound = mock(ParDo.BoundMulti.class, "otherBound");
+
+    AggregatorProvidingDoFn<Long, Long> otherFn = new AggregatorProvidingDoFn<>();
+    Aggregator<Double, Double> aggregatorTwo = otherFn.addAggregator(new Sum.SumDoubleFn());
+
+    when(otherBound.getFn()).thenReturn(otherFn);
+
+    TransformTreeNode transformNode = mock(TransformTreeNode.class);
+    when(transformNode.getTransform()).thenReturn(bound);
+    TransformTreeNode otherTransformNode = mock(TransformTreeNode.class);
+    when(otherTransformNode.getTransform()).thenReturn(otherBound);
+
+    doAnswer(new VisitNodesAnswer(ImmutableList.of(transformNode, otherTransformNode)))
+        .when(p)
+        .traverseTopologically(Mockito.any(PipelineVisitor.class));
+
+    AggregatorPipelineExtractor extractor = new AggregatorPipelineExtractor(p);
+
+    Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorSteps =
+        extractor.getAggregatorSteps();
+
+    assertEquals(ImmutableSet.<PTransform<?, ?>>of(bound), aggregatorSteps.get(aggregatorOne));
+    assertEquals(ImmutableSet.<PTransform<?, ?>>of(otherBound), aggregatorSteps.get(aggregatorTwo));
+    assertEquals(2, aggregatorSteps.size());
+  }
+
+  private static class VisitNodesAnswer implements Answer<Object> {
+    private final List<TransformTreeNode> nodes;
+
+    public VisitNodesAnswer(List<TransformTreeNode> nodes) {
+      this.nodes = nodes;
+    }
+
+    @Override
+    public Object answer(InvocationOnMock invocation) throws Throwable {
+      PipelineVisitor visitor = (PipelineVisitor) invocation.getArguments()[0];
+      for (TransformTreeNode node : nodes) {
+        visitor.visitTransform(node);
+      }
+      return null;
+    }
+  }
+
+  @SuppressWarnings("serial")
+  private static class AggregatorProvidingDoFn<InT, OuT> extends DoFn<InT, OuT> {
+    public <InputT, OutT> Aggregator<InputT, OutT> addAggregator(
+        CombineFn<InputT, ?, OutT> combiner) {
+      return createAggregator(randomName(), combiner);
+    }
+
+    private String randomName() {
+      return UUID.randomUUID().toString();
+    }
+
+    @Override
+    public void processElement(DoFn<InT, OuT>.ProcessContext c) throws Exception {
+      fail();
+    }
+  }
+}
+

From 0b7a6109a5c4d6ed529c59161fbc84ed8306ba0d Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 21 May 2015 13:47:54 -0700
Subject: [PATCH 0578/1541] Ignore empty arguments when parsing PipelineOptions

The exec.args property to `mvn exec` is split on space characters, not contiguous whitespace. Multiple spaces in a row result in empty arguments. This is a bug in `mvn exec`, but it is mostly harmless for us to compensate.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94220068
---
 .../sdk/options/PipelineOptionsFactory.java         | 13 ++++++++++---
 .../sdk/options/PipelineOptionsFactoryTest.java     | 12 ++++++++++++
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 642735ff9f0eb..016c4fa726f5f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -26,6 +26,7 @@
 import com.google.common.base.Optional;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Predicate;
+import com.google.common.base.Strings;
 import com.google.common.base.Throwables;
 import com.google.common.collect.ArrayListMultimap;
 import com.google.common.collect.Collections2;
@@ -140,7 +141,8 @@ public static <T extends PipelineOptions> T as(Class<T> klass) {
    * <p>
    * By default, strict parsing is enabled and arguments must conform to be either
    * {@code --booleanArgName} or {@code --argName=argValue}. Strict parsing can be disabled with
-   * {@link Builder#withoutStrictParsing()}.
+   * {@link Builder#withoutStrictParsing()}. Empty or null arguments will be ignored whether
+   * or not strict parsing is enabled.
    * <p>
    * Help information can be output to {@link System#out} by specifying {@code --help} as an
    * argument. After help is printed, the application will exit. Specifying only {@code --help}
@@ -206,7 +208,8 @@ private Builder(String[] args, boolean validation,
      * <p>
      * By default, strict parsing is enabled and arguments must conform to be either
      * {@code --booleanArgName} or {@code --argName=argValue}. Strict parsing can be disabled with
-     * {@link Builder#withoutStrictParsing()}.
+     * {@link Builder#withoutStrictParsing()}. Empty or null arguments will be ignored whether
+     * or not strict parsing is enabled.
      * <p>
      * Help information can be output to {@link System#out} by specifying {@code --help} as an
      * argument. After help is printed, the application will exit. Specifying only {@code --help}
@@ -1112,12 +1115,16 @@ public boolean apply(Method input) {
    * {@code String[]}, and {@code List<String>}.
    *
    * <p> If strict parsing is enabled, options must start with '--', and not have an empty argument
-   * name or value based upon the positioning of the '='.
+   * name or value based upon the positioning of the '='. Empty or null arguments will be ignored
+   * whether or not strict parsing is enabled.
    */
   private static ListMultimap<String, String> parseCommandLine(
       String[] args, boolean strictParsing) {
     ImmutableListMultimap.Builder<String, String> builder = ImmutableListMultimap.builder();
     for (String arg : args) {
+      if (Strings.isNullOrEmpty(arg)) {
+        continue;
+      }
       try {
         Preconditions.checkArgument(arg.startsWith("--"),
             "Argument '%s' does not begin with '--'", arg);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index 47605ed635b23..6ffbc8e875730 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -629,6 +629,18 @@ public void testUsingArgumentStartingWithIllegalCharacterIsIgnoredWithoutStrictP
     PipelineOptionsFactory.fromArgs(args).withoutStrictParsing().create();
   }
 
+  @Test
+  public void testEmptyArgumentIsIgnored() {
+    String[] args = new String[] {"", "--diskSizeGb=100", "", "", "--runner=DirectPipelineRunner"};
+    PipelineOptionsFactory.fromArgs(args).create();
+  }
+
+  @Test
+  public void testNullArgumentIsIgnored() {
+    String[] args = new String[] {"--diskSizeGb=100", null, null, "--runner=DirectPipelineRunner"};
+    PipelineOptionsFactory.fromArgs(args).create();
+  }
+
   @Test
   public void testUsingArgumentWithInvalidNameIsNotAllowed() {
     String[] args = new String[] {"--=100"};

From 9afb431e1fe589281c9b79d0f3e814166e6fc4c8 Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Thu, 21 May 2015 14:23:44 -0700
Subject: [PATCH 0579/1541] Log the number of matched files for a filepattern

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94223697
---
 .../com/google/cloud/dataflow/sdk/io/FileBasedSource.java     | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index 926aa9053063a..cd54358ed780e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -307,7 +307,9 @@ public final long getMaxEndOffset(PipelineOptions options) throws Exception {
 
   private static Collection<String> expandFilePattern(String fileOrPatternSpec) throws IOException {
     IOChannelFactory factory = IOChannelUtils.getFactory(fileOrPatternSpec);
-    return factory.match(fileOrPatternSpec);
+    Collection<String> matches = factory.match(fileOrPatternSpec);
+    LOG.info("Matched {} files for pattern {}", matches.size(), fileOrPatternSpec);
+    return matches;
   }
 
   /**

From d9b3ed04567842ad5a300e93e794605a61f8efc4 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Thu, 21 May 2015 14:43:33 -0700
Subject: [PATCH 0580/1541] Fix a typo in the error message.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94225703
---
 .../java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 7d9287c0de1d9..66afb943945de 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -434,7 +434,8 @@ public void validate(PipelineOptions options) {
       Preconditions.checkNotNull(
           host, "Host is a required parameter. Please use withHost to set the host.");
       Preconditions.checkNotNull(
-          datasetId, "Dataset id is a required parameter. Please use to to set the datasetId.");
+          datasetId,
+          "Dataset ID is a required parameter. Please use withDataset to to set the datasetId.");
     }
 
     @Override

From d27534ccf04549f4275035d3fde7dbe835322552 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Thu, 21 May 2015 14:54:52 -0700
Subject: [PATCH 0581/1541] Add JobSpecification to DataflowPipelineTranslator

This provides the capability to return multiple things from the
DataflowPipelineTranslator, rather than just the Job, allowing state
resulting from the translation to be made easily available (rather than
having to rerun the PipelineVisitor or examine the job).

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94226828
---
 .../sdk/runners/DataflowPipelineRunner.java   |  4 +-
 .../runners/DataflowPipelineTranslator.java   | 25 +++++++++--
 .../DataflowPipelineTranslatorTest.java       | 45 +++++++++++--------
 .../BasicSerializableSourceFormatTest.java    |  2 +-
 4 files changed, 52 insertions(+), 24 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 732912b9142fe..041e10899e6cd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -28,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.JobSpecification;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
@@ -181,7 +182,8 @@ public DataflowPipelineJob run(Pipeline pipeline) {
         + "related to Google Compute Engine usage and other Google Cloud Services.");
 
     List<DataflowPackage> packages = options.getStager().stageFiles();
-    Job newJob = translator.translate(pipeline, packages);
+    JobSpecification jobSpecification = translator.translate(pipeline, packages);
+    Job newJob = jobSpecification.getJob();
 
     String version = DataflowReleaseInfo.getReleaseInfo().getVersion();
     System.out.println("Dataflow SDK version: " + version);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 0432b4b565bbe..7747a32135b32 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -142,11 +142,30 @@ private DataflowPipelineTranslator(DataflowPipelineOptions options) {
   }
 
   /**
-   * Translates a {@link Pipeline} into a {@code Job}.
+   * Translates a {@link Pipeline} into a {@code JobSpecification}.
    */
-  public Job translate(Pipeline pipeline, List<DataflowPackage> packages) {
+  public JobSpecification translate(Pipeline pipeline, List<DataflowPackage> packages) {
     Translator translator = new Translator(pipeline);
-    return translator.translate(packages);
+    Job result = translator.translate(packages);
+    return new JobSpecification(result);
+  }
+
+  /**
+   * The result of a job translation.
+   *
+   * <p>Used to pass the result {@link Job} and any state that was used to construct the job that
+   * may be of use to other classes (eg the {@link PTransform} to StepName mapping).
+   */
+  public static class JobSpecification {
+    private final Job job;
+
+    public JobSpecification(Job job) {
+      this.job = job;
+    }
+
+    public Job getJob() {
+      return job;
+    }
   }
 
   public static String jobToString(Job job) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 98822fda7fca6..18f05462d1540 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -53,7 +53,6 @@
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.collect.ImmutableList;
@@ -151,8 +150,10 @@ public void testSettingOfSdkPipelineOptions() throws IOException {
 
     Pipeline p = buildPipeline(options);
     p.traverseTopologically(new RecordingPipelineVisitor());
-    Job job = DataflowPipelineTranslator.fromOptions(options).translate(
-        p, Collections.<DataflowPackage>emptyList());
+    Job job =
+        DataflowPipelineTranslator.fromOptions(options)
+            .translate(p, Collections.<DataflowPackage>emptyList())
+            .getJob();
 
     assertEquals(ImmutableMap.of("options",
         ImmutableMap.builder()
@@ -178,8 +179,10 @@ public void testZoneConfig() throws IOException {
 
     Pipeline p = buildPipeline(options);
     p.traverseTopologically(new RecordingPipelineVisitor());
-    Job job = DataflowPipelineTranslator.fromOptions(options).translate(
-        p, Collections.<DataflowPackage>emptyList());
+    Job job =
+        DataflowPipelineTranslator.fromOptions(options)
+            .translate(p, Collections.<DataflowPackage>emptyList())
+            .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
     assertEquals(testZone,
@@ -195,8 +198,10 @@ public void testWorkerMachineTypeConfig() throws IOException {
 
     Pipeline p = buildPipeline(options);
     p.traverseTopologically(new RecordingPipelineVisitor());
-    Job job = DataflowPipelineTranslator.fromOptions(options).translate(
-        p, Collections.<DataflowPackage>emptyList());
+    Job job =
+        DataflowPipelineTranslator.fromOptions(options)
+            .translate(p, Collections.<DataflowPackage>emptyList())
+            .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
 
@@ -213,8 +218,10 @@ public void testDebuggerConfig() throws IOException {
 
     Pipeline p = buildPipeline(options);
     p.traverseTopologically(new RecordingPipelineVisitor());
-    Job job = DataflowPipelineTranslator.fromOptions(options).translate(
-        p, Collections.<DataflowPackage>emptyList());
+    Job job =
+        DataflowPipelineTranslator.fromOptions(options)
+            .translate(p, Collections.<DataflowPackage>emptyList())
+            .getJob();
 
     for (WorkerPool pool : job.getEnvironment().getWorkerPools()) {
       if ("harness".equals(pool.getKind())) {
@@ -232,8 +239,10 @@ public void testDiskSizeGbConfig() throws IOException {
 
     Pipeline p = buildPipeline(options);
     p.traverseTopologically(new RecordingPipelineVisitor());
-    Job job = DataflowPipelineTranslator.fromOptions(options).translate(
-        p, Collections.<DataflowPackage>emptyList());
+    Job job =
+        DataflowPipelineTranslator.fromOptions(options)
+            .translate(p, Collections.<DataflowPackage>emptyList())
+            .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
     assertEquals(diskSizeGb,
@@ -257,7 +266,7 @@ public void testPredefinedAddStep() throws Exception {
         .apply(ParDo.of(new NoOpFn()))
         .apply(new EmbeddedTransform(predefinedStep.clone()))
         .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/out"));
-    Job job = translator.translate(pipeline, Collections.<DataflowPackage>emptyList());
+    Job job = translator.translate(pipeline, Collections.<DataflowPackage>emptyList()).getJob();
 
     List<Step> steps = job.getSteps();
     assertEquals(4, steps.size());
@@ -305,7 +314,7 @@ private static Step createPredefinedStep() throws Exception {
     pipeline.apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/in"))
         .apply(ParDo.of(new NoOpFn()).named(stepName))
         .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/out"));
-    Job job = translator.translate(pipeline, Collections.<DataflowPackage>emptyList());
+    Job job = translator.translate(pipeline, Collections.<DataflowPackage>emptyList()).getJob();
 
     assertEquals(3, job.getSteps().size());
     Step step = job.getSteps().get(1);
@@ -509,10 +518,9 @@ public void testToSingletonTranslation() throws Exception {
     DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
 
     DataflowPipeline pipeline = DataflowPipeline.create(options);
-    PCollectionView<Integer> view =  pipeline
-        .apply(Create.of(1))
+    pipeline.apply(Create.of(1))
         .apply(View.<Integer>asSingleton());
-    Job job = translator.translate(pipeline, Collections.<DataflowPackage>emptyList());
+    Job job = translator.translate(pipeline, Collections.<DataflowPackage>emptyList()).getJob();
 
     List<Step> steps = job.getSteps();
     assertEquals(2, steps.size());
@@ -535,10 +543,9 @@ public void testToIterableTranslation() throws Exception {
     DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
 
     DataflowPipeline pipeline = DataflowPipeline.create(options);
-    PCollectionView<Iterable<Integer>> view =  pipeline
-        .apply(Create.of(1, 2, 3))
+    pipeline.apply(Create.of(1, 2, 3))
         .apply(View.<Integer>asIterable());
-    Job job = translator.translate(pipeline, Collections.<DataflowPackage>emptyList());
+    Job job = translator.translate(pipeline, Collections.<DataflowPackage>emptyList()).getJob();
 
     List<Step> steps = job.getSteps();
     assertEquals(2, steps.size());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index be3b5342f7a35..3e3b2ecf8c5dd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -525,7 +525,7 @@ private static com.google.api.services.dataflow.model.Source translateIOToCloudS
     Pipeline p = Pipeline.create(options);
     p.begin().apply(Read.from(io));
 
-    Job workflow = translator.translate(p, new ArrayList<DataflowPackage>());
+    Job workflow = translator.translate(p, new ArrayList<DataflowPackage>()).getJob();
     Step step = workflow.getSteps().get(0);
 
     return stepToCloudSource(step);

From 1a7dc2593515b2502246a7e6fb4045d66257efa3 Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Thu, 21 May 2015 15:21:03 -0700
Subject: [PATCH 0582/1541] Always produce at least one split of a Datastore
 query

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94229309
---
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |  3 +-
 .../dataflow/sdk/io/DatastoreIOTest.java      | 33 +++++++++++++++++++
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 66afb943945de..287e3e216e93d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -293,7 +293,8 @@ public List<Source> splitIntoBundles(long desiredBundleSizeBytes, PipelineOption
       DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
       long numSplits;
       try {
-        numSplits = getEstimatedSizeBytes(options) / desiredBundleSizeBytes;
+        numSplits = Math.max(
+            Math.round(((double) getEstimatedSizeBytes(options)) / desiredBundleSizeBytes), 1);
       } catch (Exception e) {
         LOG.warn("Estimated size unavailable, using number of workers", e);
         // Fallback in case estimated size is unavailable.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
index e8e9f3f2bb13a..bcfe6222c3a81 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
@@ -167,6 +167,39 @@ public void testQuerySplitWithMockSplitter() throws Exception {
     }
   }
 
+  @Test
+  public void testQuerySplitWithSmallDataset() throws Exception {
+    String dataset = "mydataset";
+    DatastoreV1.KindExpression mykind =
+        DatastoreV1.KindExpression.newBuilder().setName("mykind").build();
+    Query query = Query.newBuilder().addKind(mykind).build();
+
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    options.setGcpCredential(new TestCredential());
+
+    List<Query> mockSplits = Lists.newArrayList(
+        Query.newBuilder()
+            .addKind(mykind)
+            .build());
+
+    QuerySplitter splitter = mock(QuerySplitter.class);
+    when(splitter.getSplits(any(Query.class), eq(1), any(Datastore.class))).thenReturn(mockSplits);
+
+    DatastoreIO.Source io =
+        DatastoreIO.read()
+            .withDataset(dataset)
+            .withQuery(query)
+            .withMockSplitter(splitter)
+            .withMockEstimateSizeBytes(1L);
+
+    List<DatastoreIO.Source> bundles = io.splitIntoBundles(1024, options);
+    assertEquals(1, bundles.size());
+    DatastoreIO.Source bundle = bundles.get(0);
+    Query bundleQuery = bundle.query;
+    assertEquals("mykind", bundleQuery.getKind(0).getName());
+  }
+
   /**
    * Test building a Sink using builder methods.
    */

From 18cd9c385891f3d514c78bc420b2f83d8e9b29b0 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 21 May 2015 15:27:05 -0700
Subject: [PATCH 0583/1541] Fix parameter name in AutoCompleteTest

The old name must have been cut and pasted from another test.
Also remove unused import while in there.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94229791
---
 .../com/google/cloud/dataflow/examples/AutoCompleteTest.java   | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/AutoCompleteTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/AutoCompleteTest.java
index 5887f9e31575c..4a57abb2d01e0 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/AutoCompleteTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/AutoCompleteTest.java
@@ -23,7 +23,6 @@
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.ProcessContext;
 import com.google.cloud.dataflow.sdk.transforms.Filter;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
@@ -60,7 +59,7 @@ public AutoCompleteTest(Boolean recursive) {
   }
 
   @Parameterized.Parameters
-  public static Collection<Object[]> primeNumbers() {
+  public static Collection<Object[]> testRecursive() {
     return Arrays.asList(new Object[][] {
         { true },
         { false }

From dc5ef71726073ac360d0ebad1e5fc1d5ba089d92 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 22 May 2015 08:57:01 -0700
Subject: [PATCH 0584/1541] Improve toString on *Pipeline and *Runner classes

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94281636
---
 .../BlockingDataflowPipelineRunner.java       | 19 +++++---
 .../sdk/runners/DataflowPipeline.java         |  6 +++
 .../sdk/runners/DataflowPipelineRunner.java   |  2 +-
 .../dataflow/sdk/runners/DirectPipeline.java  |  5 +++
 .../testing/TestDataflowPipelineRunner.java   | 14 ++++--
 .../dataflow/sdk/testing/TestPipeline.java    | 11 +++--
 .../cloud/dataflow/sdk/PipelineTest.java      | 11 +++++
 .../BlockingDataflowPipelineRunnerTest.java   | 18 +++++++-
 .../runners/DataflowPipelineRunnerTest.java   | 11 +++++
 .../sdk/runners/DataflowPipelineTest.java     | 42 ++++++++++++++++++
 .../sdk/runners/DirectPipelineRunnerTest.java | 38 ++++++++++++++++
 .../sdk/runners/DirectPipelineTest.java       | 34 ++++++++++++++
 .../TestDataflowPipelineRunnerTest.java       | 44 +++++++++++++++++++
 .../sdk/testing/TestPipelineTest.java         |  5 +++
 14 files changed, 245 insertions(+), 15 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunnerTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
index 4a5a777bc1a26..3cb530f6b5662 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
@@ -52,14 +52,14 @@ public class BlockingDataflowPipelineRunner extends
   // TODO: make this configurable after removal of option map.
   private static final long BUILTIN_JOB_TIMEOUT_SEC = -1L;
 
-  private DataflowPipelineRunner dataflowPipelineRunner = null;
-  private MonitoringUtil.JobMessagesHandler jobMessagesHandler;
+  private final DataflowPipelineRunner dataflowPipelineRunner;
+  private final BlockingDataflowPipelineOptions options;
 
   protected BlockingDataflowPipelineRunner(
       DataflowPipelineRunner internalRunner,
-      MonitoringUtil.JobMessagesHandler jobMessagesHandler) {
+      BlockingDataflowPipelineOptions options) {
     this.dataflowPipelineRunner = internalRunner;
-    this.jobMessagesHandler = jobMessagesHandler;
+    this.options = options;
   }
 
   /**
@@ -72,8 +72,7 @@ public static BlockingDataflowPipelineRunner fromOptions(
     DataflowPipelineRunner dataflowPipelineRunner =
         DataflowPipelineRunner.fromOptions(dataflowOptions);
 
-    return new BlockingDataflowPipelineRunner(dataflowPipelineRunner,
-        new MonitoringUtil.PrintHandler(dataflowOptions.getJobMessageOutput()));
+    return new BlockingDataflowPipelineRunner(dataflowPipelineRunner, dataflowOptions);
   }
 
   @Override
@@ -101,7 +100,8 @@ public void run() {
       State result;
       try {
         result = job.waitToFinish(
-            BUILTIN_JOB_TIMEOUT_SEC, TimeUnit.SECONDS, jobMessagesHandler);
+            BUILTIN_JOB_TIMEOUT_SEC, TimeUnit.SECONDS,
+            new MonitoringUtil.PrintHandler(options.getJobMessageOutput()));
       } catch (IOException | InterruptedException ex) {
         throw new RuntimeException("Exception caught during job execution", ex);
       }
@@ -138,4 +138,9 @@ public <OutputT extends POutput, InputT extends PInput> OutputT apply(
   public void setHooks(DataflowPipelineRunnerHooks hooks) {
     this.dataflowPipelineRunner.setHooks(hooks);
   }
+
+  @Override
+  public String toString() {
+    return "BlockingDataflowPipelineRunner#" + options.getJobName();
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java
index 80653b8533cb6..d1752cacd5d8d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 
 /**
  * A {@link DataflowPipeline} is a {@link Pipeline} that returns a
@@ -50,4 +51,9 @@ public DataflowPipelineJob run() {
   public DataflowPipelineRunner getRunner() {
     return (DataflowPipelineRunner) super.getRunner();
   }
+
+  @Override
+  public String toString() {
+    return "DataflowPipeline#" + getOptions().as(DataflowPipelineOptions.class).getJobName();
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 041e10899e6cd..d06717f40fcb5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -288,7 +288,7 @@ public void setHooks(DataflowPipelineRunnerHooks hooks) {
 
   @Override
   public String toString() {
-    return "DataflowPipelineRunner#" + hashCode();
+    return "DataflowPipelineRunner#" + options.getJobName();
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipeline.java
index cadbf37ffee01..5217a908138ef 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipeline.java
@@ -47,4 +47,9 @@ public DirectPipelineRunner.EvaluationResults run() {
   public DirectPipelineRunner getRunner() {
     return (DirectPipelineRunner) super.getRunner();
   }
+
+  @Override
+  public String toString() {
+    return "DirectPipeline#" + hashCode();
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
index 94f24ef8a750f..bc89b6f46b3ff 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
@@ -21,7 +21,6 @@
 import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineJob;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
 
 /**
  * {@link TestDataflowPipelineRunner} is a pipeline runner that wraps a
@@ -30,10 +29,14 @@
  * @see TestPipeline
  */
 public class TestDataflowPipelineRunner extends BlockingDataflowPipelineRunner {
+
+  private final TestDataflowPipelineOptions options;
+
   TestDataflowPipelineRunner(
       DataflowPipelineRunner internalRunner,
-      MonitoringUtil.JobMessagesHandler jobMessagesHandler) {
-    super(internalRunner, jobMessagesHandler);
+      TestDataflowPipelineOptions options) {
+    super(internalRunner, options);
+    this.options = options;
   }
 
   @Override
@@ -45,4 +48,9 @@ public DataflowPipelineJob run(Pipeline pipeline) {
     }
     return state;
   }
+
+  @Override
+  public String toString() {
+    return "TestDataflowPipelineRunner#" + options.getAppName();
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
index 25d39290782ae..17e78d4c9cc61 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
@@ -18,11 +18,11 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.options.ApplicationNameOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
 import com.google.common.base.Optional;
 import com.google.common.collect.Iterators;
 
@@ -82,6 +82,7 @@ public static TestPipeline create() {
       return new TestPipeline(createRunner(options), options);
     } else {
       DirectPipelineRunner directRunner = DirectPipelineRunner.createForTest();
+      directRunner.getPipelineOptions().setAppName(getAppName());
       return new TestPipeline(directRunner, directRunner.getPipelineOptions());
     }
   }
@@ -108,6 +109,11 @@ public PipelineResult run() {
     }
   }
 
+  @Override
+  public String toString() {
+    return "TestPipeline#" + getOptions().as(ApplicationNameOptions.class).getAppName();
+  }
+
   /**
    * Creates and returns a TestDataflowPipelineRunner based on
    * configuration via system properties.
@@ -117,8 +123,7 @@ private static TestDataflowPipelineRunner createRunner(
 
     DataflowPipelineRunner dataflowRunner = DataflowPipelineRunner
         .fromOptions(options);
-    return new TestDataflowPipelineRunner(dataflowRunner,
-        new MonitoringUtil.PrintHandler(options.getJobMessageOutput()));
+    return new TestDataflowPipelineRunner(dataflowRunner, options);
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
index ec7b3e1f11a0a..89b4f31e59e07 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
@@ -19,9 +19,12 @@
 import static org.hamcrest.CoreMatchers.containsString;
 import static org.hamcrest.core.IsInstanceOf.instanceOf;
 import static org.hamcrest.core.IsNot.not;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.fail;
 
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
@@ -144,4 +147,12 @@ public void processElement(DoFn<String, String>.ProcessContext c) {
       }
     });
   }
+
+  @Test
+  public void testToString() {
+    PipelineOptions options = PipelineOptionsFactory.as(PipelineOptions.class);
+    options.setRunner(DirectPipelineRunner.class);
+    Pipeline pipeline = Pipeline.create(options);
+    assertEquals("Pipeline#" + pipeline.hashCode(), pipeline.toString());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
index f9cb52205302f..069284d67a063 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.runners;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Matchers.anyLong;
@@ -25,8 +26,12 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult.State;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
+import com.google.cloud.dataflow.sdk.testing.TestDataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
+import com.google.cloud.dataflow.sdk.util.TestCredential;
 
 import org.junit.Rule;
 import org.junit.Test;
@@ -110,7 +115,7 @@ public void testJobWaitComplete() throws IOException, InterruptedException {
     final BlockingDataflowPipelineRunner blockingRunner =
         new BlockingDataflowPipelineRunner(
             mockDataflowPipelineRunner,
-            new MonitoringUtil.PrintHandler(System.out));
+            PipelineOptionsFactory.as(TestDataflowPipelineOptions.class));
 
     final NotificationHelper executionStarted = new NotificationHelper();
     final NotificationHelper jobCompleted = new NotificationHelper();
@@ -137,4 +142,15 @@ public void run() {
     assertTrue("run() should return after job completion is mocked.",
         jobCompleted.waitTillSet(2000));
   }
+
+  @Test
+  public void testToString() {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setJobName("TestJobName");
+    options.setProject("TestProject");
+    options.setTempLocation("gs://test/temp/location");
+    options.setGcpCredential(new TestCredential());
+    assertEquals("BlockingDataflowPipelineRunner#TestJobName",
+        BlockingDataflowPipelineRunner.fromOptions(options).toString());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index e6fa8a3508b87..2c9a789acd6c0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -539,4 +539,15 @@ public void translate(
     translator.translate(p, Collections.<DataflowPackage>emptyList());
     assertTrue(transform.translated);
   }
+
+  @Test
+  public void testToString() {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setJobName("TestJobName");
+    options.setProject("TestProject");
+    options.setTempLocation("gs://test/temp/location");
+    options.setGcpCredential(new TestCredential());
+    assertEquals("DataflowPipelineRunner#TestJobName",
+        DataflowPipelineRunner.fromOptions(options).toString());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTest.java
new file mode 100644
index 0000000000000..988dd15cfa1e4
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTest.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.util.TestCredential;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link DataflowPipeline}. */
+@RunWith(JUnit4.class)
+public class DataflowPipelineTest {
+  @Test
+  public void testToString() {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setJobName("TestJobName");
+    options.setProject("TestProject");
+    options.setTempLocation("gs://test/temp/location");
+    options.setGcpCredential(new TestCredential());
+    assertEquals("DataflowPipeline#TestJobName",
+        DataflowPipeline.create(options).toString());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java
new file mode 100644
index 0000000000000..52f0820571f28
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link DirectPipelineRunner}. */
+@RunWith(JUnit4.class)
+public class DirectPipelineRunnerTest {
+  @Test
+  public void testToString() {
+    PipelineOptions options = PipelineOptionsFactory.create();
+    DirectPipelineRunner runner = DirectPipelineRunner.fromOptions(options);
+    assertEquals("DirectPipelineRunner#" + runner.hashCode(),
+        runner.toString());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineTest.java
new file mode 100644
index 0000000000000..ed4542fa13e55
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineTest.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link DirectPipeline}. */
+@RunWith(JUnit4.class)
+public class DirectPipelineTest {
+  @Test
+  public void testToString() {
+    DirectPipeline pipeline = DirectPipeline.createForTest();
+    assertEquals("DirectPipeline#" + pipeline.hashCode(),
+        pipeline.toString());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunnerTest.java
new file mode 100644
index 0000000000000..d14f0a33c7eb4
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunnerTest.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.util.TestCredential;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link TestDataflowPipelineRunner}. */
+@RunWith(JUnit4.class)
+public class TestDataflowPipelineRunnerTest {
+  @Test
+  public void testToString() {
+    TestDataflowPipelineOptions options =
+        PipelineOptionsFactory.as(TestDataflowPipelineOptions.class);
+    options.setAppName("TestAppName");
+    options.setProject("TestProject");
+    options.setTempLocation("gs://test/temp/location");
+    options.setGcpCredential(new TestCredential());
+    assertEquals("TestDataflowPipelineRunner#TestAppName",
+        new TestDataflowPipelineRunner(
+            DataflowPipelineRunner.fromOptions(options), options).toString());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
index 5e476f2450f12..ed27f3702fa18 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
@@ -87,4 +87,9 @@ public void testCreationOfPipelineOptionsFromReallyVerboselyNamedTestCase() thro
     assertThat(options.getJobName(), startsWith(
         "testpipelinetest0testcreationofpipelineoptionsfrom"));
   }
+
+  @Test
+  public void testToString() {
+    assertEquals("TestPipeline#TestPipelineTest-testToString", TestPipeline.create().toString());
+  }
 }

From 718303985d2d80e365326fd7a8b6b348159e9796 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 22 May 2015 13:35:51 -0700
Subject: [PATCH 0585/1541] Expand JavaDoc around PipelineOptions

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94304469
---
 .../dataflow/sdk/options/PipelineOptions.java | 171 ++++++++++++++++--
 1 file changed, 159 insertions(+), 12 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
index 1413e615a0d8b..659c14ae8176b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
@@ -16,29 +16,176 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.options.GoogleApiDebugOptions.GoogleApiTracer;
 import com.google.cloud.dataflow.sdk.options.ProxyInvocationHandler.Deserializer;
 import com.google.cloud.dataflow.sdk.options.ProxyInvocationHandler.Serializer;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.Context;
 
 import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
 import com.fasterxml.jackson.databind.annotation.JsonSerialize;
 
+import java.lang.reflect.Proxy;
+import java.util.ServiceLoader;
+
 import javax.annotation.concurrent.ThreadSafe;
 
 /**
- * Dataflow SDK pipeline configuration options.
- * <p>
- * Serialization
- * <p>
- * For runners that execute their work remotely, every property available within PipelineOptions
- * must either be serializable using Jackson's {@link ObjectMapper} or the getter method for the
- * property annotated with {@link JsonIgnore @JsonIgnore}.
- * <p>
- * It is an error to have the same property available in multiple interfaces with only some
- * of them being annotated with {@link JsonIgnore @JsonIgnore}. It is also an error to mark a
+ * PipelineOptions are used to configure Pipelines. You can extend {@link PipelineOptions}
+ * to create custom configuration options specific to your {@link Pipeline},
+ * for both local execution and execution via {@link PipelineRunner}.
+ *
+ * <p> {@link PipelineOptions} and their subinterfaces represent a collection of properties
+ * which can be manipulated in a type safe manner. {@link PipelineOptions} is backed by a
+ * dynamic {@link Proxy} which allows for type safe manipulation of properties in an extensible
+ * fashion through plain old Java interfaces.
+ *
+ * <p> {@link PipelineOptions} can be created with {@link PipelineOptionsFactory#create()}
+ * and {@link PipelineOptionsFactory#as(Class)}. They can be created
+ * from command-line arguments with {@link PipelineOptionsFactory#fromArgs(String[])}.
+ * They can be converted to another type by invoking {@link PipelineOptions#as(Class)} and
+ * can be accessed from within a {@link DoFn} by invoking
+ * {@link Context#getPipelineOptions()}.
+ *
+ * <p> For example:
+ * <pre> {@code
+ * // The most common way to construct PipelineOptions is via command-line argument parsing:
+ * public static void main(String[] args) {
+ *   // Will parse the arguments passed into the application and construct a PipelineOptions
+ *   // Note that --help will print registered options, and --help=PipelineOptionsClassName
+ *   // will print out usage for the specific class.
+ *   PipelineOptions options =
+ *       PipelineOptionsFactory.fromArgs(args).create();
+ *
+ *   Pipeline p = Pipeline.create(options);
+ *   ...
+ *   p.run();
+ * }
+ *
+ * // To create options for the DirectPipeline:
+ * DirectPipelineOptions directPipelineOptions =
+ *     PipelineOptionsFactory.as(DirectPipelineOptions.class);
+ * directPipelineOptions.setStreaming(true);
+ *
+ * // To cast from one type to another using the as(Class) method:
+ * DataflowPipelineOptions dataflowPipelineOptions =
+ *     directPipelineOptions.as(DataflowPipelineOptions.class);
+ *
+ * // Options for the same property are shared between types
+ * // The statement below will print out "true"
+ * System.out.println(dataflowPipelineOptions.isStreaming());
+ *
+ * // Prints out registered options.
+ * PipelineOptionsFactory.printHelp(System.out);
+ *
+ * // Prints out options which are available to be set on DataflowPipelineOptions
+ * PipelineOptionsFactory.printHelp(System.out, DataflowPipelineOptions.class);
+ * } </pre>
+ *
+ * <h2>Defining Your Own PipelineOptions</h2>
+ *
+ * Defining your own {@link PipelineOptions} is the way for you to make configuration
+ * options available for both local execution and execution via {@link PipelineRunner}.
+ * By having PipelineOptionsFactory as your command-line interpreter, you will provide
+ * a standardized way for users to interact with your application via the command-line.
+ *
+ * <p> To define your own {@link PipelineOptions}, you create an interface which
+ * extends {@link PipelineOptions} and define getter/setter pairs. These
+ * getter/setter pairs define a collection of
+ * <a href="https://docs.oracle.com/javase/tutorial/javabeans/writing/properties.html">
+ * JavaBean properties</a>.
+ *
+ * <p> For example:
+ * <pre> {@code
+ *  // Creates a user defined property called "myProperty"
+ *  public interface MyOptions extends PipelineOptions {
+ *    String getMyProperty();
+ *    void setMyProperty(String value);
+ *  }
+ * } </pre>
+ *
+ * <p> Note: Please see the section on Registration below when using custom property types.
+ *
+ * <h3>Restrictions</h3>
+ *
+ * Since PipelineOptions can be "cast" to multiple types dynamically using
+ * {@link PipelineOptions#as(Class)}, a property must conform to the following set of restrictions:
+ * <ul>
+ *   <li>Any property with the same name must have the same return type for all derived
+ *       interfaces of {@link PipelineOptions}.
+ *   <li>Every bean property of any interface derived from {@link PipelineOptions} must have a
+ *       getter and setter method.
+ *   <li>Every method must conform to being a getter or setter for a JavaBean.
+ *   <li>The derived interface of {@link PipelineOptions} must be composable with every interface
+ *       part of allPipelineOptionsClasses.
+ *   <li>Only getters may be annotated with {@link JsonIgnore @JsonIgnore}.
+ *   <li>If any getter is annotated with {@link JsonIgnore @JsonIgnore}, then all getters for
+ *       this property must be annotated with {@link JsonIgnore @JsonIgnore}.
+ * </ul>
+ *
+ * <h3>Annotations For PipelineOptions</h3>
+ *
+ * {@link Description @Description} can be used to annotate an interface or a getter
+ * with useful information which is output when {@code --help}
+ * is invoked via {@link PipelineOptionsFactory#fromArgs(String[])}.
+ *
+ * <p> {@link Default @Default} represents a set of annotations that can be used to annotate getter
+ * properties on {@link PipelineOptions} with information representing the default value to be
+ * returned if no value is specified.
+ *
+ * <p> {@link Hidden @Hidden} hides an option from being listed when {@code --help}
+ * is invoked via {@link PipelineOptionsFactory#fromArgs(String[])}.
+ *
+ * <p> {@link Validation @Validation} represents a set of annotations that can be used to annotate
+ * getter properties on {@link PipelineOptions} with information representing the validation
+ * criteria to be used when validating with the {@link PipelineOptionsValidator}. Validation
+ * will be performed if during construction of the {@link PipelineOptions},
+ * {@link PipelineOptionsFactory#withValidation()} is invoked.
+ *
+ * <p> {@link JsonIgnore @JsonIgnore} is used to prevent a property from being serialized and
+ * available during execution of {@link DoFn}. See the Serialization section below for more
+ * details.
+ *
+ * <h2>Registration Of PipelineOptions</h2>
+ *
+ * Registration of {@link PipelineOptions} by an application guarantees that the
+ * {@link PipelineOptions} is composable during execution of their {@link Pipeline} and
+ * meets the restrictions listed above or will fail during registration. Registration
+ * also lists the registered {@link PipelineOptions} when {@code --help}
+ * is invoked via {@link PipelineOptionsFactory#fromArgs(String[])}.
+ *
+ * <p> Registration can be performed by invoking {@link PipelineOptionsFactory#register} within
+ * a users application or via automatic registration by creating a {@link ServiceLoader} entry
+ * and a concrete implementation of the {@link PipelineOptionsRegistrar} interface.
+ *
+ * <p> It is optional but recommended to use one of the many build time tools such as
+ * {@link com.google.auto.service.AutoService} to generate the necessary META-INF
+ * files automatically.
+ *
+ * <p> A list of registered options can be fetched from
+ * {@link PipelineOptionsFactory#getRegisteredOptions()}.
+ *
+ * <h2>Serialization Of PipelineOptions</h2>
+ *
+ * {@link PipelineRunner}s require support for options to be serialized. Each property
+ * within {@link PipelineOptions} must be able to be serialized using Jackson's
+ * {@link ObjectMapper} or the getter method for the property annotated with
+ * {@link JsonIgnore @JsonIgnore}.
+ *
+ * <p> Jackson supports serialization of many types and supports a useful set of
+ * <a href="https://github.com/FasterXML/jackson-annotations">annotations</a> to aid in
+ * serialization of custom types. We point you to the public
+ * <a href="https://github.com/FasterXML/jackson">Jackson documentation</a> when attempting
+ * to add serialization support for your custom types. See {@link GoogleApiTracer} for an
+ * example using the Jackson annotations to serialize and deserialize a custom type.
+ *
+ * <p> Note: It is an error to have the same property available in multiple interfaces with only
+ * some of them being annotated with {@link JsonIgnore @JsonIgnore}. It is also an error to mark a
  * setter for a property with {@link JsonIgnore @JsonIgnore}.
  */
 @JsonSerialize(using = Serializer.class)
@@ -46,8 +193,8 @@
 @ThreadSafe
 public interface PipelineOptions {
   /**
-   * Transforms this object into an object of type {@code <T>}. {@code <T>} must extend
-   * {@link PipelineOptions}.
+   * Transforms this object into an object of type {@code <T>} saving each property
+   * that has been manipulated. {@code <T>} must extend {@link PipelineOptions}.
    * <p>
    * If {@code <T>} is not registered with the {@link PipelineOptionsFactory}, then we
    * attempt to verify that {@code <T>} is composable with every interface that this

From ef1163e744c89cd25103100bd2c598f89bdfe4d9 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 22 May 2015 16:18:44 -0700
Subject: [PATCH 0586/1541] Make non-static TupleTag test robust to superficial
 changes

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94317790
---
 .../dataflow/sdk/values/TupleTagTest.java     | 22 +++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TupleTagTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TupleTagTest.java
index 5d56b813647b8..af6743419412c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TupleTagTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TupleTagTest.java
@@ -16,8 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.values;
 
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.startsWith;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertThat;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -60,10 +63,25 @@ public void testStaticTupleTag() {
         AnotherClass.anotherTag.getId());
   }
 
+  private TupleTag<Object> createNonstaticTupleTag() {
+    return new TupleTag<Object>();
+  }
+
   @Test
   public void testNonstaticTupleTag() {
-    assertEquals("com.google.cloud.dataflow.sdk.values.TupleTagTest.testNonstaticTupleTag:65",
-                 new TupleTag<Object>().getId().split("#")[0]);
     assertNotEquals(new TupleTag<Object>().getId(), new TupleTag<Object>().getId());
+    assertNotEquals(createNonstaticTupleTag(), createNonstaticTupleTag());
+
+    TupleTag<Object> tag = createNonstaticTupleTag();
+
+    // Check that the name is derived from the method it is created in.
+    assertThat(tag.getId().split("#")[0],
+        startsWith("com.google.cloud.dataflow.sdk.values.TupleTagTest.createNonstaticTupleTag"));
+
+    // Check that after the name there is a ':' followed by a line number, and just make
+    // sure the line number is big enough to be reasonable, so superficial changes don't break
+    // the test.
+    assertThat(Integer.parseInt(tag.getId().split("#")[0].split(":")[1]),
+        greaterThan(15));
   }
 }

From 7aa1a0c7999a5b6b5b80bd5f72afc88a7bb66ea5 Mon Sep 17 00:00:00 2001
From: tudorm <tudorm@google.com>
Date: Tue, 26 May 2015 10:11:01 -0700
Subject: [PATCH 0587/1541] Make the StateSampler available to the abstract
 Reader super-class so it can be used by different concrete readers that need
 a more fine control over the breakdown of the reading (such as the
 GroupingShuffleReader that would have otherwise incorrectly attribute reading
 from shuffle while iterating over the values).

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94482237
---
 .../runners/worker/GroupingShuffleReader.java | 58 ++++++++++++++-----
 .../sdk/util/common/worker/ReadOperation.java | 22 +++----
 .../sdk/util/common/worker/Reader.java        | 24 ++++++++
 .../worker/MapTaskExecutorFactoryTest.java    |  2 -
 .../util/common/worker/ReadOperationTest.java |  3 -
 5 files changed, 75 insertions(+), 34 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index e6112957811c6..002b497c91063 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -31,6 +31,7 @@
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.Reiterable;
 import com.google.cloud.dataflow.sdk.util.common.Reiterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
@@ -39,6 +40,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.values.KV;
 
 import org.slf4j.Logger;
@@ -157,24 +159,42 @@ private final class GroupingShuffleReaderIterator
     /** The next group to be consumed, if available. */
     private KeyGroupedShuffleEntries nextGroup = null;
 
+    protected StateSampler stateSampler = null;
+    protected int readState;
+
     public GroupingShuffleReaderIterator(ShuffleEntryReader reader) {
+      if (GroupingShuffleReader.this.stateSampler == null) {
+        CounterSet counterSet = new CounterSet();
+        this.stateSampler = new StateSampler("local", counterSet.getAddCounterMutator());
+        this.readState = stateSampler.stateForName("shuffle");
+      } else {
+        checkNotNull(GroupingShuffleReader.this.stateSamplerOperationName);
+        this.stateSampler = GroupingShuffleReader.this.stateSampler;
+        this.readState = stateSampler.stateForName(
+            GroupingShuffleReader.this.stateSamplerOperationName + "-process");
+      }
       promisedPosition = ByteArrayShufflePosition.fromBase64(startShufflePosition);
       if (promisedPosition == null) {
         promisedPosition = new ByteArrayShufflePosition(new byte[0]);
       }
       stopPosition = ByteArrayShufflePosition.fromBase64(stopShufflePosition);
-      this.groups = new GroupingShuffleEntryIterator(reader.read(promisedPosition, stopPosition)) {
-        @Override
-        protected void notifyElementRead(long byteSize) {
-          GroupingShuffleReader.this.notifyElementRead(byteSize);
-        }
-      };
+      try (StateSampler.ScopedState read = stateSampler.scopedState(readState)) {
+        this.groups = new GroupingShuffleEntryIterator(
+            reader.read(promisedPosition, stopPosition)) {
+          @Override
+          protected void notifyElementRead(long byteSize) {
+            GroupingShuffleReader.this.notifyElementRead(byteSize);
+          }
+        };
+      }
     }
 
     private void advanceIfNecessary() {
-      if (nextGroup == null && groups.hasNext()) {
-        nextGroup = groups.next();
-        promisedPosition = ByteArrayShufflePosition.of(nextGroup.position);
+      try (StateSampler.ScopedState read = stateSampler.scopedState(readState)) {
+        if (nextGroup == null && groups.hasNext()) {
+          nextGroup = groups.next();
+          promisedPosition = ByteArrayShufflePosition.of(nextGroup.position);
+        }
       }
     }
 
@@ -302,16 +322,24 @@ public ValuesIterator(Reiterator<ShuffleEntry> base) {
 
       @Override
       public boolean hasNext() {
-        return base.hasNext();
+        try (StateSampler.ScopedState read =
+            GroupingShuffleReaderIterator.this.stateSampler.scopedState(
+                GroupingShuffleReaderIterator.this.readState)) {
+          return base.hasNext();
+        }
       }
 
       @Override
       public V next() {
-        ShuffleEntry entry = base.next();
-        try {
-          return CoderUtils.decodeFromByteArray(valueCoder, entry.getValue());
-        } catch (IOException exn) {
-          throw new RuntimeException(exn);
+        try (StateSampler.ScopedState read =
+            GroupingShuffleReaderIterator.this.stateSampler.scopedState(
+                GroupingShuffleReaderIterator.this.readState)) {
+          ShuffleEntry entry = base.next();
+          try {
+            return CoderUtils.decodeFromByteArray(valueCoder, entry.getValue());
+          } catch (IOException exn) {
+            throw new RuntimeException(exn);
+          }
         }
       }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index 92111fa53b177..c0b20c908fd62 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -47,9 +47,6 @@ public class ReadOperation extends Operation {
   /** The total byte counter for all data read by this operation. */
   final Counter<Long> byteCount;
 
-  /** StateSampler state for advancing the ReaderIterator. */
-  private final int readState;
-
   /**
    * The Reader's iterator this operation reads from, created by start().
    * Guarded by sourceIteratorLock.
@@ -85,8 +82,8 @@ public ReadOperation(String operationName, Reader<?> reader, OutputReceiver[] re
     this.reader = reader;
     this.byteCount = addCounterMutator.addCounter(
         Counter.longs(bytesCounterName(counterPrefix, operationName), SUM));
-    readState = stateSampler.stateForName(operationName + "-read");
     reader.addObserver(new ReaderObserver());
+    reader.setStateSamplerAndOperationName(stateSampler, operationName);
   }
 
   /** Invoked by tests. */
@@ -168,17 +165,14 @@ public void run() {
           Object value;
           // Stop position update request comes concurrently.
           // Accesses to iterator need to be synchronized.
-          try (StateSampler.ScopedState read = stateSampler.scopedState(readState)) {
-            assert read != null;
-            synchronized (sourceIteratorLock) {
-              if (!readerIterator.hasNext()) {
-                break;
-              }
-              value = readerIterator.next();
+          synchronized (sourceIteratorLock) {
+            if (!readerIterator.hasNext()) {
+              break;
+            }
+            value = readerIterator.next();
 
-              if (isProgressUpdateRequested.getAndSet(false) || progressUpdatePeriodMs == 0) {
-                setProgressFromIterator();
-              }
+            if (isProgressUpdateRequested.getAndSet(false) || progressUpdatePeriodMs == 0) {
+              setProgressFromIterator();
             }
           }
           receiver.process(value);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
index 9590eca28ec80..fd17321c8a502 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
@@ -29,6 +29,30 @@
  * @param <T> the type of the elements read from the source
  */
 public abstract class Reader<T> extends Observable {
+  /**
+   * StateSampler object for readers interested in further breaking
+   * down of the state space at a finer granularity.
+   */
+  protected StateSampler stateSampler = null;
+
+  /**
+   * Name to be used as a prefix with {@code stateSampler}.
+   */
+  protected String stateSamplerOperationName = null;
+
+  /**
+   * Sets the state sampler and the state sampler operation name.
+   *
+   * @param stateSampler the {@link StateSampler} object
+   * @param stateSamplerOperationName the operation name to be used by
+   * the state sampler
+   */
+  public void setStateSamplerAndOperationName(StateSampler stateSampler,
+      String stateSamplerOperationName) {
+    this.stateSampler = stateSampler;
+    this.stateSamplerOperationName = stateSamplerOperationName;
+  }
+
   /**
    * Returns a ReaderIterator that allows reading from this source.
    */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index bfa8ce5c887eb..53aa93791ea13 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -110,7 +110,6 @@ public void testCreateMapTaskExecutor() throws Exception {
             Counter.longs("read_output_name-MeanByteCount", MEAN).resetMeanToValue(0, 0L),
             Counter.longs("Read-ByteCount", SUM).resetToValue(0L),
             Counter.longs("test-Read-start-msecs", SUM).resetToValue(0L),
-            Counter.longs("test-Read-read-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Read-process-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Read-finish-msecs", SUM).resetToValue(0L),
             Counter.longs("DoFn1_output-ElementCount", SUM).resetToValue(0L),
@@ -207,7 +206,6 @@ public void testCreateReadOperation() throws Exception {
             Counter.longs("read_output_name-MeanByteCount", MEAN).resetMeanToValue(0, 0L),
             Counter.longs("Read-ByteCount", SUM).resetToValue(0L),
             Counter.longs("test-Read-finish-msecs", SUM).resetToValue(0L),
-            Counter.longs("test-Read-read-msecs", SUM),
             Counter.longs("test-Read-process-msecs", SUM),
             Counter.longs("read_output_name-ElementCount", SUM).resetToValue(0L)),
         counterSet);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
index dbb990f92c69d..729915fa041aa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -77,9 +77,6 @@ public void testRunReadOperation() throws Exception {
                 Counter.longs("test-ReadOperation-start-msecs", SUM)
                     .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
                                        "test-ReadOperation-start-msecs")).getAggregate()),
-                Counter.longs("test-ReadOperation-read-msecs", SUM)
-                    .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                       "test-ReadOperation-read-msecs")).getAggregate()),
                 Counter.longs("test-ReadOperation-process-msecs", SUM)
                     .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
                                        "test-ReadOperation-process-msecs")).getAggregate()),

From 8aa7849c2eb71c0f4af8ef47816a6a97c507bb2a Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Tue, 26 May 2015 17:14:39 -0700
Subject: [PATCH 0588/1541] Updated KMean to do Windowing, and be able to run
 in streaming.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94526824
---
 .../dataflow/examples/PubsubFileInjector.java | 57 +++++++++++++++++--
 .../examples/common/DataflowExampleUtils.java | 13 ++++-
 2 files changed, 62 insertions(+), 8 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java b/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
index 6557a21684d19..a0e018625c18e 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.examples;
 
 import com.google.api.services.pubsub.Pubsub;
+import com.google.api.services.pubsub.model.Label;
 import com.google.api.services.pubsub.model.PublishRequest;
 import com.google.api.services.pubsub.model.PubsubMessage;
 import com.google.cloud.dataflow.sdk.Pipeline;
@@ -29,6 +30,7 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
 import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.common.collect.ImmutableList;
 
 import java.io.IOException;
 
@@ -36,7 +38,7 @@
  * A batch Dataflow pipeline for injecting a set of GCS files into
  * a PubSub topic line by line.
  *
- * <p>  This is useful for testing streaming
+ * <p> This is useful for testing streaming
  * pipelines. Note that since batch pipelines might retry chunks, this
  * does _not_ guarantee exactly-once injection of file data. Some lines may
  * be published multiple times.
@@ -44,15 +46,40 @@
  */
 public class PubsubFileInjector {
 
+  /**
+   * An incomplete {@code PubsubFileInjector} transform with unbound output topic.
+   */
+  public static class Unbound {
+    private final String timestampLabelKey;
+
+    Unbound() {
+      this.timestampLabelKey = null;
+    }
+
+    Unbound(String timestampLabelKey) {
+      this.timestampLabelKey = timestampLabelKey;
+    }
+
+    Unbound withTimestampLabelKey(String timestampLabelKey) {
+      return new Unbound(timestampLabelKey);
+    }
+
+    public Bound publish(String outputTopic) {
+      return new Bound(outputTopic, timestampLabelKey);
+    }
+  }
+
   /** A DoFn that publishes lines to Google Cloud PubSub. */
-  public static class Publish extends DoFn<String, Void> {
+  public static class Bound extends DoFn<String, Void> {
     private static final long serialVersionUID = 0;
 
-    private String outputTopic;
+    private final String outputTopic;
+    private final String timestampLabelKey;
     public transient Pubsub pubsub;
 
-    public Publish(String outputTopic) {
+    public Bound(String outputTopic, String timestampLabelKey) {
       this.outputTopic = outputTopic;
+      this.timestampLabelKey = timestampLabelKey;
     }
 
     @Override
@@ -66,12 +93,32 @@ public void startBundle(Context context) {
     public void processElement(ProcessContext c) throws IOException {
       PubsubMessage pubsubMessage = new PubsubMessage();
       pubsubMessage.encodeData(c.element().getBytes());
+      if (timestampLabelKey != null) {
+        Label timestampLabel = new Label();
+        timestampLabel.setKey(timestampLabelKey);
+        timestampLabel.setNumValue(c.timestamp().getMillis());
+        pubsubMessage.setLabel(ImmutableList.of(timestampLabel));
+      }
       PublishRequest publishRequest = new PublishRequest();
       publishRequest.setTopic(outputTopic).setMessage(pubsubMessage);
       this.pubsub.topics().publish(publishRequest).execute();
     }
   }
 
+  /**
+   * Creates a {@code PubsubFileInjector} transform with the given timestamp label key.
+   */
+  public static Unbound withTimestampLabelKey(String timestampLabelKey) {
+    return new Unbound(timestampLabelKey);
+  }
+
+  /**
+   * Creates a {@code PubsubFileInjector} transform that publishes to the given output topic.
+   */
+  public static Bound publish(String outputTopic) {
+    return new Unbound().publish(outputTopic);
+  }
+
   /**
    * Command line parameter options.
    */
@@ -99,7 +146,7 @@ public static void main(String[] args) {
 
     pipeline
         .apply(TextIO.Read.from(options.getInput()))
-        .apply(IntraBundleParallelization.of(new Publish(options.getOutputTopic()))
+        .apply(IntraBundleParallelization.of(PubsubFileInjector.publish(options.getOutputTopic()))
             .withMaxParallelism(20));
 
     pipeline.run();
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
index d5b79726fac8c..550cbadafb8d0 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
@@ -61,7 +61,6 @@ public class DataflowExampleUtils {
   private Bigquery bigQueryClient = null;
   private Pubsub pubsubClient = null;
   private Dataflow dataflowClient = null;
-  private Pipeline injectorPipeline = null;
   private Set<DataflowPipelineJob> jobsToCancel = Sets.newHashSet();
   private List<String> pendingMessages = Lists.newArrayList();
 
@@ -216,15 +215,23 @@ public void runInjectorPipeline(String inputFile, String topic) {
     copiedOptions.setStreaming(false);
     copiedOptions.setNumWorkers(
         options.as(ExamplePubsubTopicOptions.class).getInjectorNumWorkers());
-    injectorPipeline = Pipeline.create(copiedOptions);
+    Pipeline injectorPipeline = Pipeline.create(copiedOptions);
     injectorPipeline.apply(TextIO.Read.from(inputFile))
                     .apply(IntraBundleParallelization
-                        .of(new PubsubFileInjector.Publish(topic))
+                        .of(PubsubFileInjector.publish(topic))
                         .withMaxParallelism(20));
     DataflowPipelineJob injectorJob = (DataflowPipelineJob) injectorPipeline.run();
     jobsToCancel.add(injectorJob);
   }
 
+  /**
+   * Runs the provided injector pipeline for the streaming pipeline.
+   */
+  public void runInjectorPipeline(Pipeline injectorPipeline) {
+    DataflowPipelineJob injectorJob = (DataflowPipelineJob) injectorPipeline.run();
+    jobsToCancel.add(injectorJob);
+  }
+
   /**
    * If {@literal DataflowPipelineRunner} or {@literal BlockingDataflowPipelineRunner} is used,
    * waits for the pipeline to finish and cancels it (and the injector) before the program exists.

From 0aa1bcaa50cfe1da47e1bc7b597fd801f90d1587 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Tue, 26 May 2015 17:30:50 -0700
Subject: [PATCH 0589/1541] Add Aggregator Value lookups

PipelineResult is expanded to allow querying of an Aggregator after the
pipeline is run. An Aggregator emits results at each step it is used,
which are output with the values.

----Release Notes----
Added support for querying the current value of an Aggregator with a
PipelineResult.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94528141
---
 .../cloud/dataflow/sdk/PipelineResult.java    |  16 +-
 .../runners/AggregatorRetrievalException.java |  30 ++
 .../sdk/runners/AggregatorValues.java         |  52 +++
 .../sdk/runners/DataflowPipelineJob.java      |  54 ++-
 .../sdk/runners/DataflowPipelineRunner.java   |  14 +-
 .../runners/DataflowPipelineTranslator.java   |  16 +-
 .../sdk/runners/DirectPipelineRunner.java     |  35 +-
 .../DataflowAggregatorTransforms.java         |  79 ++++
 .../DataflowMetricUpdateExtractor.java        | 106 +++++
 .../runners/dataflow/MapAggregatorValues.java |  48 ++
 .../dataflow/sdk/transforms/Combine.java      |  14 +
 .../sdk/runners/DataflowPipelineJobTest.java  | 414 ++++++++++++++++--
 12 files changed, 840 insertions(+), 38 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorRetrievalException.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorValues.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DataflowAggregatorTransforms.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DataflowMetricUpdateExtractor.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/MapAggregatorValues.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java
index c52c8512066ff..2a191de2778bf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java
@@ -16,6 +16,10 @@
 
 package com.google.cloud.dataflow.sdk;
 
+import com.google.cloud.dataflow.sdk.runners.AggregatorRetrievalException;
+import com.google.cloud.dataflow.sdk.runners.AggregatorValues;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+
 /**
  * Result of {@link com.google.cloud.dataflow.sdk.Pipeline#run()}.
  */
@@ -27,8 +31,18 @@ public interface PipelineResult {
    * @return the {@link State} representing the state of this pipeline.
    */
   State getState();
-  // TODO: method to retrieve error messages.
 
+  /**
+   * Retrieves the current value of the provided {@link Aggregator}.
+   *
+   * @param aggregator the Aggregator to retrieve values for
+   * @return the current values of the aggregator, which may be empty if there are no values yet
+   * @throws AggregatorRetrievalException if the aggregator values could not be retrieved
+   */
+  <T> AggregatorValues<T> getAggregatorValues(Aggregator<?, T> aggregator)
+      throws AggregatorRetrievalException;
+
+  // TODO: method to retrieve error messages.
 
   /** Named constants for common values for the job state. */
   public enum State {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorRetrievalException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorRetrievalException.java
new file mode 100644
index 0000000000000..8ad3510cfa435
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorRetrievalException.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+
+/**
+ * Signals that an exception has occurred while retrieving {@link Aggregator}s.
+ */
+public class AggregatorRetrievalException extends Exception {
+  private static final long serialVersionUID = 0L;
+
+  public AggregatorRetrievalException(String message, Throwable cause) {
+    super(message, cause);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorValues.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorValues.java
new file mode 100644
index 0000000000000..21f02821c5dd6
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorValues.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+
+import java.util.Collection;
+import java.util.Map;
+
+/**
+ * A collection of values associated with an {@link Aggregator}. Aggregators declared in a
+ * {@link DoFn} are emitted on a per-{@code DoFn}-application basis.
+ *
+ * @param <T> the output type of the aggregator
+ */
+public abstract class AggregatorValues<T> {
+  /**
+   * Get the values of the {@link Aggregator} at all steps it was used.
+   */
+  public Collection<T> getValues() {
+    return getValuesAtSteps().values();
+  }
+
+  /**
+   * Get the values of the {@link Aggregator} by the user name at each step it was used.
+   */
+  public abstract Map<String, T> getValuesAtSteps();
+
+  /**
+   * Get the total value of this {@link Aggregator} by applying the specified {@link CombineFn}.
+   */
+  public T getTotalValue(CombineFn<T, ?, T> combineFn) {
+    return combineFn.apply(getValues());
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
index 52ab717abdb1d..32899422b3180 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
@@ -25,7 +25,13 @@
 import com.google.api.services.dataflow.Dataflow;
 import com.google.api.services.dataflow.model.Job;
 import com.google.api.services.dataflow.model.JobMessage;
+import com.google.api.services.dataflow.model.JobMetrics;
+import com.google.api.services.dataflow.model.MetricUpdate;
 import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowAggregatorTransforms;
+import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowMetricUpdateExtractor;
+import com.google.cloud.dataflow.sdk.runners.dataflow.MapAggregatorValues;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.util.AttemptAndTimeBoundedExponentialBackOff;
 import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
@@ -38,6 +44,7 @@
 import java.io.IOException;
 import java.net.SocketTimeoutException;
 import java.util.List;
+import java.util.Map;
 import java.util.concurrent.TimeUnit;
 
 import javax.annotation.Nullable;
@@ -70,6 +77,13 @@ public class DataflowPipelineJob implements PipelineResult {
    */
   private State terminalState;
 
+  private DataflowAggregatorTransforms aggregatorTransforms;
+
+  /**
+   * The Metric Updates retrieved after the job was in a terminal state.
+   */
+  private List<MetricUpdate> terminalMetricUpdates;
+
   /**
    * The polling interval for job status and messages information.
    */
@@ -89,11 +103,12 @@ public class DataflowPipelineJob implements PipelineResult {
    * @param jobId the job id
    * @param client the workflow client
    */
-  public DataflowPipelineJob(
-      String projectId, String jobId, Dataflow client) {
+  public DataflowPipelineJob(String projectId, String jobId, Dataflow client,
+      DataflowAggregatorTransforms aggregatorTransforms) {
     project = projectId;
     this.jobId = jobId;
     dataflowClient = client;
+    this.aggregatorTransforms = aggregatorTransforms;
   }
 
   public String getJobId() {
@@ -264,4 +279,39 @@ State getStateWithRetries(int attempts, Sleeper sleeper) {
     } while (shouldRetry);
     return State.UNKNOWN;
   }
+
+  @Override
+  public <OutputT> AggregatorValues<OutputT> getAggregatorValues(Aggregator<?, OutputT> aggregator)
+      throws AggregatorRetrievalException {
+    try {
+      return new MapAggregatorValues<>(fromMetricUpdates(aggregator));
+    } catch (IOException e) {
+      throw new AggregatorRetrievalException(
+          "IOException when retrieving Aggregator values for Aggregator " + aggregator, e);
+    }
+  }
+
+  private <OutputT> Map<String, OutputT> fromMetricUpdates(Aggregator<?, OutputT> aggregator)
+      throws IOException {
+    if (aggregatorTransforms.contains(aggregator)) {
+      List<MetricUpdate> metricUpdates;
+      if (terminalMetricUpdates != null) {
+        metricUpdates = terminalMetricUpdates;
+      } else {
+        boolean terminal = getState().isTerminal();
+        JobMetrics jobMetrics =
+            dataflowClient.v1b3().projects().jobs().getMetrics(project, jobId).execute();
+        metricUpdates = jobMetrics.getMetrics();
+        if (terminal && jobMetrics.getMetrics() != null) {
+          terminalMetricUpdates = metricUpdates;
+        }
+      }
+
+      return DataflowMetricUpdateExtractor.fromMetricUpdates(
+          aggregator, aggregatorTransforms, metricUpdates);
+    } else {
+      throw new IllegalArgumentException(
+          "Aggregator " + aggregator + " is not used in this pipeline");
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index d06717f40fcb5..fe3ef78c1fde4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -29,6 +29,8 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.JobSpecification;
+import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowAggregatorTransforms;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
@@ -56,6 +58,7 @@
 import java.net.URL;
 import java.net.URLClassLoader;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -259,11 +262,20 @@ public DataflowPipelineJob run(Pipeline pipeline) {
     LOG.info("To cancel the job using the 'gcloud' tool, run:\n> {}",
         MonitoringUtil.getGcloudCancelCommand(options.getProject(), jobResult.getId()));
 
+    // Obtain all of the extractors from the PTransforms used in the pipeline so the
+    // DataflowPipelineJob has access to them.
+    AggregatorPipelineExtractor aggregatorExtractor = new AggregatorPipelineExtractor(pipeline);
+    Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorSteps =
+        aggregatorExtractor.getAggregatorSteps();
+
+    DataflowAggregatorTransforms aggregatorTransforms =
+        new DataflowAggregatorTransforms(aggregatorSteps, jobSpecification.getStepNames());
+
     // Use a raw client for post-launch monitoring, as status calls may fail
     // regularly and need not be retried automatically.
     DataflowPipelineJob dataflowPipelineJob =
         new DataflowPipelineJob(options.getProject(), jobResult.getId(),
-            Transport.newRawDataflowClient(options).build());
+            Transport.newRawDataflowClient(options).build(), aggregatorTransforms);
 
     return dataflowPipelineJob;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 7747a32135b32..902c6100e403d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -147,7 +147,7 @@ private DataflowPipelineTranslator(DataflowPipelineOptions options) {
   public JobSpecification translate(Pipeline pipeline, List<DataflowPackage> packages) {
     Translator translator = new Translator(pipeline);
     Job result = translator.translate(packages);
-    return new JobSpecification(result);
+    return new JobSpecification(result, Collections.unmodifiableMap(translator.stepNames));
   }
 
   /**
@@ -158,14 +158,24 @@ public JobSpecification translate(Pipeline pipeline, List<DataflowPackage> packa
    */
   public static class JobSpecification {
     private final Job job;
+    private final Map<AppliedPTransform<?, ?, ?>, String> stepNames;
 
-    public JobSpecification(Job job) {
+    public JobSpecification(Job job, Map<AppliedPTransform<?, ?, ?>, String> stepNames) {
       this.job = job;
+      this.stepNames = stepNames;
     }
 
     public Job getJob() {
       return job;
     }
+
+    /**
+     * Returns the mapping of {@link AppliedPTransform AppliedPTransforms} to the internal step
+     * name for that {@code AppliedPTransform}.
+     */
+    public Map<AppliedPTransform<?, ?, ?>, String> getStepNames() {
+      return stepNames;
+    }
   }
 
   public static String jobToString(Job job) {
@@ -350,7 +360,7 @@ class Translator implements PipelineVisitor, TranslationContext {
     /**
      * A Map from AppliedPTransform to their unique Dataflow step names.
      */
-    private final Map<AppliedPTransform, String> stepNames = new HashMap<>();
+    private final Map<AppliedPTransform<?, ?, ?>, String> stepNames = new HashMap<>();
 
     /**
      * A Map from PValues to their output names used by their producer
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 5876cf012ee31..a8076b045a5c2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -28,6 +28,8 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
+import com.google.cloud.dataflow.sdk.runners.dataflow.MapAggregatorValues;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
@@ -610,11 +612,23 @@ <T> T ensureSerializableByCoder(Coder<T> coder,
   /////////////////////////////////////////////////////////////////////////////
 
   class Evaluator implements PipelineVisitor, EvaluationContext {
-    private final Map<PTransform, String> stepNames = new HashMap<>();
+    /**
+     * A map from PTransform to the step name of that transform. This is the internal name for the
+     * transform (e.g. "s2").
+     */
+    private final Map<PTransform<?, ?>, String> stepNames = new HashMap<>();
     private final Map<PValue, Object> store = new HashMap<>();
     private final CounterSet counters = new CounterSet();
     private AppliedPTransform<?, ?, ?> currentTransform;
 
+    private Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorSteps = null;
+
+    /**
+     * A map from PTransform to the full name of that transform. This is the user name of the
+     * transform (e.g. "RemoveDuplicates/Combine/GroupByKey").
+     */
+    private final Map<PTransform<?, ?>, String> fullNames = new HashMap<>();
+
     // Use a random number generator with a fixed seed, so execution
     // using this evaluator is deterministic.  (If the user-defined
     // functions, transforms, and coders are deterministic.)
@@ -624,6 +638,7 @@ public Evaluator() {}
 
     public void run(Pipeline pipeline) {
       pipeline.traverseTopologically(this);
+      aggregatorSteps = new AggregatorPipelineExtractor(pipeline).getAggregatorSteps();
     }
 
     @Override
@@ -656,6 +671,7 @@ public void leaveCompositeTransform(TransformTreeNode node) {
     @Override
     public void visitTransform(TransformTreeNode node) {
       PTransform<?, ?> transform = node.getTransform();
+      fullNames.put(transform, node.getFullName());
       TransformEvaluator evaluator =
           getTransformEvaluator(transform.getClass());
       if (evaluator == null) {
@@ -906,8 +922,23 @@ public CounterSet getCounters() {
     public State getState() {
       return State.DONE;
     }
-  }
 
+    @Override
+    public <T> AggregatorValues<T> getAggregatorValues(Aggregator<?, T> aggregator) {
+      Map<String, T> stepValues = new HashMap<>();
+      for (PTransform<?, ?> step : aggregatorSteps.get(aggregator)) {
+        String stepName = String.format("user-%s-%s", stepNames.get(step), aggregator.getName());
+        String fullName = fullNames.get(step);
+        Counter<?> counter = counters.getExistingCounter(stepName);
+        if (counter == null) {
+          throw new IllegalArgumentException(
+              "Aggregator " + aggregator + " is not used in this pipeline");
+        }
+        stepValues.put(fullName, (T) counter.getAggregate());
+      }
+      return new MapAggregatorValues<>(stepValues);
+    }
+  }
 
   /////////////////////////////////////////////////////////////////////////////
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DataflowAggregatorTransforms.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DataflowAggregatorTransforms.java
new file mode 100644
index 0000000000000..e1d73019fec7a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DataflowAggregatorTransforms.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.dataflow;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.common.collect.BiMap;
+import com.google.common.collect.HashBiMap;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Multimap;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Map;
+
+/**
+ * A mapping relating {@link Aggregator}s and the {@link PTransform} in which they are used.
+ */
+public class DataflowAggregatorTransforms {
+  private final Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorTransforms;
+  private final Multimap<PTransform<?, ?>, AppliedPTransform<?, ?, ?>> transformAppliedTransforms;
+  private final BiMap<AppliedPTransform<?, ?, ?>, String> appliedStepNames;
+
+  public DataflowAggregatorTransforms(
+      Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorTransforms,
+      Map<AppliedPTransform<?, ?, ?>, String> transformStepNames) {
+    this.aggregatorTransforms = aggregatorTransforms;
+    appliedStepNames = HashBiMap.create(transformStepNames);
+
+    transformAppliedTransforms = HashMultimap.create();
+    for (AppliedPTransform<?, ?, ?> appliedTransform : transformStepNames.keySet()) {
+      transformAppliedTransforms.put(appliedTransform.getTransform(), appliedTransform);
+    }
+  }
+
+  /**
+   * Returns true if the provided {@link Aggregator} is used in the constructing {@link Pipeline}.
+   */
+  public boolean contains(Aggregator<?, ?> aggregator) {
+    return aggregatorTransforms.containsKey(aggregator);
+  }
+
+  /**
+   * Gets the step names in which the {@link Aggregator} is used.
+   */
+  public Collection<String> getAggregatorStepNames(Aggregator<?, ?> aggregator) {
+    Collection<String> names = new HashSet<>();
+    Collection<PTransform<?, ?>> transforms = aggregatorTransforms.get(aggregator);
+    for (PTransform<?, ?> transform : transforms) {
+      for (AppliedPTransform<?, ?, ?> applied : transformAppliedTransforms.get(transform)) {
+        names.add(appliedStepNames.get(applied));
+      }
+    }
+    return names;
+  }
+
+  /**
+   * Gets the {@link PTransform} that was assigned the provided step name.
+   */
+  public AppliedPTransform<?, ?, ?> getAppliedTransformForStepName(String stepName) {
+    return appliedStepNames.inverse().get(stepName);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DataflowMetricUpdateExtractor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DataflowMetricUpdateExtractor.java
new file mode 100644
index 0000000000000..00412e82ffb18
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DataflowMetricUpdateExtractor.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.dataflow;
+
+import com.google.api.services.dataflow.model.MetricStructuredName;
+import com.google.api.services.dataflow.model.MetricUpdate;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Methods for extracting the values of an {@link Aggregator} from a collection of {@link
+ * MetricUpdate MetricUpdates}.
+ */
+public final class DataflowMetricUpdateExtractor {
+  private static final String STEP_NAME_CONTEXT_KEY = "step";
+
+  private DataflowMetricUpdateExtractor() {
+    // Do not instantiate.
+  }
+
+  /**
+   * Extract the values of the provided {@link Aggregator} at each {@link PTransform} it was used in
+   * according to the provided {@link DataflowAggregatorTransforms} from the given list of {@link
+   * MetricUpdate MetricUpdates}.
+   */
+  public static <OutputT> Map<String, OutputT> fromMetricUpdates(Aggregator<?, OutputT> aggregator,
+      DataflowAggregatorTransforms aggregatorTransforms, List<MetricUpdate> metricUpdates) {
+    Map<String, OutputT> results = new HashMap<>();
+    if (metricUpdates == null) {
+      return results;
+    }
+
+    String aggregatorName = aggregator.getName();
+    Collection<String> aggregatorSteps = aggregatorTransforms.getAggregatorStepNames(aggregator);
+
+    for (MetricUpdate metricUpdate : metricUpdates) {
+      MetricStructuredName metricStructuredName = metricUpdate.getName();
+      Map<String, String> context = metricStructuredName.getContext();
+      if (metricStructuredName.getName().equals(aggregatorName) && context != null
+          && aggregatorSteps.contains(context.get(STEP_NAME_CONTEXT_KEY))) {
+        AppliedPTransform<?, ?, ?> transform =
+            aggregatorTransforms.getAppliedTransformForStepName(
+                context.get(STEP_NAME_CONTEXT_KEY));
+        String fullName = transform.getFullName();
+        results.put(fullName, toValue(aggregator, metricUpdate));
+      }
+    }
+
+    return results;
+
+  }
+
+  private static <OutputT> OutputT toValue(
+      Aggregator<?, OutputT> aggregator, MetricUpdate metricUpdate) {
+    CombineFn<?, ?, OutputT> combineFn = aggregator.getCombineFn();
+    Class<? super OutputT> outputType = combineFn.getOutputType().getRawType();
+
+    if (outputType.equals(Long.class)) {
+      @SuppressWarnings("unchecked")
+      OutputT asLong = (OutputT) Long.valueOf(toNumber(metricUpdate).longValue());
+      return asLong;
+    }
+    if (outputType.equals(Integer.class)) {
+      @SuppressWarnings("unchecked")
+      OutputT asInt = (OutputT) Integer.valueOf(toNumber(metricUpdate).intValue());
+      return asInt;
+    }
+    if (outputType.equals(Double.class)) {
+      @SuppressWarnings("unchecked")
+      OutputT asDouble = (OutputT) Double.valueOf(toNumber(metricUpdate).doubleValue());
+      return asDouble;
+    }
+    throw new UnsupportedOperationException(
+        "Unsupported Output Type " + outputType + " in aggregator " + aggregator);
+  }
+
+  private static Number toNumber(MetricUpdate update) {
+    if (update.getScalar() instanceof Number) {
+      return (Number) update.getScalar();
+    }
+    throw new IllegalArgumentException(
+        "Metric Update " + update + " does not have a numeric scalar");
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/MapAggregatorValues.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/MapAggregatorValues.java
new file mode 100644
index 0000000000000..8cf7e8af9452b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/MapAggregatorValues.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.dataflow;
+
+import com.google.cloud.dataflow.sdk.runners.AggregatorValues;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.common.base.MoreObjects;
+
+import java.util.Map;
+
+/**
+ * An {@link AggregatorValues} implementation that is backed by an in-memory map.
+ *
+ * @param <T> the output type of the {@link Aggregator}
+ */
+public class MapAggregatorValues<T> extends AggregatorValues<T> {
+  private final Map<String, T> stepValues;
+
+  public MapAggregatorValues(Map<String, T> stepValues) {
+    this.stepValues = stepValues;
+  }
+
+  @Override
+  public Map<String, T> getValuesAtSteps() {
+    return stepValues;
+  }
+
+  @Override
+  public String toString() {
+    return MoreObjects.toStringHelper(MapAggregatorValues.class)
+        .add("stepValues", stepValues)
+        .toString();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 73e12769722a9..e3b9f25a4b97d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
@@ -382,6 +383,19 @@ public OutputT apply(Iterable<? extends InputT> inputs) {
       return extractOutput(accum);
     }
 
+    /**
+     * Returns a {@link TypeDescriptor} capturing what is known statically
+     * about the output type of this {@code CombineFn} instance's
+     * most-derived class.
+     *
+     * <p> In the normal case of a concrete {@code CombineFn} subclass with
+     * no generic type parameters of its own, this will be a complete
+     * non-generic type.
+     */
+    public TypeDescriptor<OutputT> getOutputType() {
+      return new TypeDescriptor<OutputT>(getClass()) {};
+    }
+
     @SuppressWarnings("unchecked")
     private TypeVariable<Class<CombineFn<?, ?, ?>>> getInputTVariable() {
       return (TypeVariable<Class<CombineFn<?, ?, ?>>>)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java
index 3c88a8d3ddd19..88a6a1e78b8df 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java
@@ -17,7 +17,13 @@
 
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.Matchers.allOf;
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.empty;
+import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
+import static org.hamcrest.Matchers.hasEntry;
+import static org.hamcrest.Matchers.is;
 import static org.hamcrest.Matchers.lessThanOrEqualTo;
 import static org.junit.Assert.assertEquals;
 import static org.mockito.Matchers.eq;
@@ -25,22 +31,40 @@
 import static org.mockito.Mockito.when;
 
 import com.google.api.services.dataflow.Dataflow;
+import com.google.api.services.dataflow.Dataflow.V1b3.Projects.Jobs.Get;
+import com.google.api.services.dataflow.Dataflow.V1b3.Projects.Jobs.GetMetrics;
 import com.google.api.services.dataflow.Dataflow.V1b3.Projects.Jobs.Messages;
 import com.google.api.services.dataflow.model.Job;
+import com.google.api.services.dataflow.model.JobMetrics;
+import com.google.api.services.dataflow.model.MetricStructuredName;
+import com.google.api.services.dataflow.model.MetricUpdate;
 import com.google.cloud.dataflow.sdk.PipelineResult.State;
+import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowAggregatorTransforms;
 import com.google.cloud.dataflow.sdk.testing.FastNanoClockAndSleeper;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
+import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.cloud.dataflow.sdk.values.POutput;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSetMultimap;
 
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.mockito.Mock;
 import org.mockito.MockitoAnnotations;
 
 import java.io.IOException;
+import java.math.BigDecimal;
 import java.net.SocketTimeoutException;
 import java.util.concurrent.TimeUnit;
 
@@ -63,6 +87,8 @@ public class DataflowPipelineJobTest {
   @Rule
   public FastNanoClockAndSleeper fastClock = new FastNanoClockAndSleeper();
 
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
 
   @Before
   public void setup() {
@@ -73,7 +99,7 @@ public void setup() {
     when(mockProjects.jobs()).thenReturn(mockJobs);
   }
 
-/**
+  /**
    * Validates that a given time is valid for the total time slept by a
    * AttemptBoundedExponentialBackOff given the number of retries and
    * an initial polling interval.
@@ -83,14 +109,15 @@ public void setup() {
    * @param timeSleptMillis The amount of time slept by the clock. This is checked
    * against the valid interval.
    */
-  void checkValidInterval(long pollingIntervalMillis, int attempts, long timeSleptMillis){
+  void checkValidInterval(long pollingIntervalMillis, int attempts, long timeSleptMillis) {
     long highSum = 0;
     long lowSum = 0;
     for (int i = 1; i < attempts; i++) {
-      double currentInterval = pollingIntervalMillis
+      double currentInterval =
+          pollingIntervalMillis
           * Math.pow(AttemptBoundedExponentialBackOff.DEFAULT_MULTIPLIER, i - 1);
-      double offset = AttemptBoundedExponentialBackOff.DEFAULT_RANDOMIZATION_FACTOR
-          * currentInterval;
+      double offset =
+          AttemptBoundedExponentialBackOff.DEFAULT_RANDOMIZATION_FACTOR * currentInterval;
       highSum += Math.round(currentInterval + offset);
       lowSum += Math.round(currentInterval - offset);
     }
@@ -103,8 +130,7 @@ public void testWaitToFinishMessagesFail() throws Exception {
 
     Job statusResponse = new Job();
     statusResponse.setCurrentState("JOB_STATE_" + State.DONE.name());
-    when(mockJobs.get(eq(PROJECT_ID), eq(JOB_ID)))
-        .thenReturn(statusRequest);
+    when(mockJobs.get(eq(PROJECT_ID), eq(JOB_ID))).thenReturn(statusRequest);
     when(statusRequest.execute()).thenReturn(statusResponse);
 
     MonitoringUtil.JobMessagesHandler jobHandler = mock(MonitoringUtil.JobMessagesHandler.class);
@@ -114,9 +140,11 @@ public void testWaitToFinishMessagesFail() throws Exception {
     when(mockJobs.messages()).thenReturn(mockMessages);
     when(mockMessages.list(eq(PROJECT_ID), eq(JOB_ID))).thenReturn(listRequest);
     when(listRequest.execute()).thenThrow(SocketTimeoutException.class);
+    DataflowAggregatorTransforms dataflowAggregatorTransforms =
+        mock(DataflowAggregatorTransforms.class);
 
     DataflowPipelineJob job = new DataflowPipelineJob(
-        PROJECT_ID, JOB_ID, mockWorkflowClient);
+        PROJECT_ID, JOB_ID, mockWorkflowClient, dataflowAggregatorTransforms);
 
     State state = job.waitToFinish(5, TimeUnit.MINUTES, jobHandler, fastClock, fastClock);
     assertEquals(null, state);
@@ -129,12 +157,13 @@ public void testWaitToFinish() throws Exception {
     Job statusResponse = new Job();
     statusResponse.setCurrentState("JOB_STATE_" + State.DONE.name());
 
-    when(mockJobs.get(eq(PROJECT_ID), eq(JOB_ID)))
-        .thenReturn(statusRequest);
+    when(mockJobs.get(eq(PROJECT_ID), eq(JOB_ID))).thenReturn(statusRequest);
     when(statusRequest.execute()).thenReturn(statusResponse);
+    DataflowAggregatorTransforms dataflowAggregatorTransforms =
+        mock(DataflowAggregatorTransforms.class);
 
     DataflowPipelineJob job = new DataflowPipelineJob(
-        PROJECT_ID, JOB_ID, mockWorkflowClient);
+        PROJECT_ID, JOB_ID, mockWorkflowClient, dataflowAggregatorTransforms);
 
     State state = job.waitToFinish(1, TimeUnit.MINUTES, null, fastClock, fastClock);
     assertEquals(State.DONE, state);
@@ -144,12 +173,13 @@ public void testWaitToFinish() throws Exception {
   public void testWaitToFinishFail() throws Exception {
     Dataflow.V1b3.Projects.Jobs.Get statusRequest = mock(Dataflow.V1b3.Projects.Jobs.Get.class);
 
-    when(mockJobs.get(eq(PROJECT_ID), eq(JOB_ID)))
-        .thenReturn(statusRequest);
+    when(mockJobs.get(eq(PROJECT_ID), eq(JOB_ID))).thenReturn(statusRequest);
     when(statusRequest.execute()).thenThrow(IOException.class);
+    DataflowAggregatorTransforms dataflowAggregatorTransforms =
+        mock(DataflowAggregatorTransforms.class);
 
     DataflowPipelineJob job = new DataflowPipelineJob(
-        PROJECT_ID, JOB_ID, mockWorkflowClient);
+        PROJECT_ID, JOB_ID, mockWorkflowClient, dataflowAggregatorTransforms);
 
     long startTime = fastClock.nanoTime();
     State state = job.waitToFinish(5, TimeUnit.MINUTES, null, fastClock, fastClock);
@@ -163,12 +193,13 @@ public void testWaitToFinishFail() throws Exception {
   public void testWaitToFinishTimeFail() throws Exception {
     Dataflow.V1b3.Projects.Jobs.Get statusRequest = mock(Dataflow.V1b3.Projects.Jobs.Get.class);
 
-    when(mockJobs.get(eq(PROJECT_ID), eq(JOB_ID)))
-        .thenReturn(statusRequest);
+    when(mockJobs.get(eq(PROJECT_ID), eq(JOB_ID))).thenReturn(statusRequest);
     when(statusRequest.execute()).thenThrow(IOException.class);
+    DataflowAggregatorTransforms dataflowAggregatorTransforms =
+        mock(DataflowAggregatorTransforms.class);
 
     DataflowPipelineJob job = new DataflowPipelineJob(
-        PROJECT_ID, JOB_ID, mockWorkflowClient);
+        PROJECT_ID, JOB_ID, mockWorkflowClient, dataflowAggregatorTransforms);
     long startTime = fastClock.nanoTime();
     State state = job.waitToFinish(4, TimeUnit.MILLISECONDS, null, fastClock, fastClock);
     assertEquals(null, state);
@@ -179,8 +210,7 @@ public void testWaitToFinishTimeFail() throws Exception {
 
   @Test
   public void testGetStateReturnsServiceState() throws Exception {
-    Dataflow.V1b3.Projects.Jobs.Get statusRequest =
-        mock(Dataflow.V1b3.Projects.Jobs.Get.class);
+    Dataflow.V1b3.Projects.Jobs.Get statusRequest = mock(Dataflow.V1b3.Projects.Jobs.Get.class);
 
     Job statusResponse = new Job();
     statusResponse.setCurrentState("JOB_STATE_" + State.RUNNING.name());
@@ -188,29 +218,355 @@ public void testGetStateReturnsServiceState() throws Exception {
     when(mockJobs.get(eq(PROJECT_ID), eq(JOB_ID))).thenReturn(statusRequest);
     when(statusRequest.execute()).thenReturn(statusResponse);
 
-    DataflowPipelineJob job =
-        new DataflowPipelineJob(PROJECT_ID, JOB_ID, mockWorkflowClient);
+    DataflowAggregatorTransforms dataflowAggregatorTransforms =
+        mock(DataflowAggregatorTransforms.class);
+
+    DataflowPipelineJob job = new DataflowPipelineJob(
+        PROJECT_ID, JOB_ID, mockWorkflowClient, dataflowAggregatorTransforms);
 
-    assertEquals(State.RUNNING, job.getStateWithRetries(
-        DataflowPipelineJob.STATUS_POLLING_ATTEMPTS, fastClock));
+    assertEquals(
+        State.RUNNING,
+        job.getStateWithRetries(DataflowPipelineJob.STATUS_POLLING_ATTEMPTS, fastClock));
   }
 
   @Test
   public void testGetStateWithExceptionReturnsUnknown() throws Exception {
-    Dataflow.V1b3.Projects.Jobs.Get statusRequest =
-        mock(Dataflow.V1b3.Projects.Jobs.Get.class);
+    Dataflow.V1b3.Projects.Jobs.Get statusRequest = mock(Dataflow.V1b3.Projects.Jobs.Get.class);
 
     when(mockJobs.get(eq(PROJECT_ID), eq(JOB_ID))).thenReturn(statusRequest);
     when(statusRequest.execute()).thenThrow(IOException.class);
+    DataflowAggregatorTransforms dataflowAggregatorTransforms =
+        mock(DataflowAggregatorTransforms.class);
 
-    DataflowPipelineJob job =
-        new DataflowPipelineJob(PROJECT_ID, JOB_ID, mockWorkflowClient);
+    DataflowPipelineJob job = new DataflowPipelineJob(
+        PROJECT_ID, JOB_ID, mockWorkflowClient, dataflowAggregatorTransforms);
 
     long startTime = fastClock.nanoTime();
-    assertEquals(State.UNKNOWN, job.getStateWithRetries(
-        DataflowPipelineJob.STATUS_POLLING_ATTEMPTS, fastClock));
+    assertEquals(
+        State.UNKNOWN,
+        job.getStateWithRetries(DataflowPipelineJob.STATUS_POLLING_ATTEMPTS, fastClock));
     long timeDiff = TimeUnit.NANOSECONDS.toMillis(fastClock.nanoTime() - startTime);
     checkValidInterval(DataflowPipelineJob.STATUS_POLLING_INTERVAL,
         DataflowPipelineJob.STATUS_POLLING_ATTEMPTS, timeDiff);
   }
+
+  @Test
+  public void testGetAggregatorValuesWithNoMetricUpdatesReturnsEmptyValue()
+      throws IOException, AggregatorRetrievalException {
+    Aggregator<?, ?> aggregator = mock(Aggregator.class);
+    @SuppressWarnings("unchecked")
+    PTransform<PInput, POutput> pTransform = mock(PTransform.class);
+    String stepName = "s1";
+    String fullName = "Foo/Bar/Baz";
+    AppliedPTransform<?, ?, ?> appliedTransform = appliedPTransform(fullName, pTransform);
+
+    DataflowAggregatorTransforms aggregatorTransforms = new DataflowAggregatorTransforms(
+        ImmutableSetMultimap.<Aggregator<?, ?>, PTransform<?, ?>>of(aggregator, pTransform).asMap(),
+        ImmutableMap.<AppliedPTransform<?, ?, ?>, String>of(appliedTransform, stepName));
+
+    GetMetrics getMetrics = mock(GetMetrics.class);
+    when(mockJobs.getMetrics(PROJECT_ID, JOB_ID)).thenReturn(getMetrics);
+    JobMetrics jobMetrics = new JobMetrics();
+    when(getMetrics.execute()).thenReturn(jobMetrics);
+
+    jobMetrics.setMetrics(ImmutableList.<MetricUpdate>of());
+
+    Get getState = mock(Get.class);
+    when(mockJobs.get(PROJECT_ID, JOB_ID)).thenReturn(getState);
+    Job modelJob = new Job();
+    when(getState.execute()).thenReturn(modelJob);
+    modelJob.setCurrentState(State.RUNNING.toString());
+
+    DataflowPipelineJob job =
+        new DataflowPipelineJob(PROJECT_ID, JOB_ID, mockWorkflowClient, aggregatorTransforms);
+
+    AggregatorValues<?> values = job.getAggregatorValues(aggregator);
+
+    assertThat(values.getValues(), empty());
+  }
+
+  @Test
+  public void testGetAggregatorValuesWithNullMetricUpdatesReturnsEmptyValue()
+      throws IOException, AggregatorRetrievalException {
+    Aggregator<?, ?> aggregator = mock(Aggregator.class);
+    @SuppressWarnings("unchecked")
+    PTransform<PInput, POutput> pTransform = mock(PTransform.class);
+    String stepName = "s1";
+    String fullName = "Foo/Bar/Baz";
+    AppliedPTransform<?, ?, ?> appliedTransform = appliedPTransform(fullName, pTransform);
+
+    DataflowAggregatorTransforms aggregatorTransforms = new DataflowAggregatorTransforms(
+        ImmutableSetMultimap.<Aggregator<?, ?>, PTransform<?, ?>>of(aggregator, pTransform).asMap(),
+        ImmutableMap.<AppliedPTransform<?, ?, ?>, String>of(appliedTransform, stepName));
+
+    GetMetrics getMetrics = mock(GetMetrics.class);
+    when(mockJobs.getMetrics(PROJECT_ID, JOB_ID)).thenReturn(getMetrics);
+    JobMetrics jobMetrics = new JobMetrics();
+    when(getMetrics.execute()).thenReturn(jobMetrics);
+
+    jobMetrics.setMetrics(null);
+
+    Get getState = mock(Get.class);
+    when(mockJobs.get(PROJECT_ID, JOB_ID)).thenReturn(getState);
+    Job modelJob = new Job();
+    when(getState.execute()).thenReturn(modelJob);
+    modelJob.setCurrentState(State.RUNNING.toString());
+
+    DataflowPipelineJob job =
+        new DataflowPipelineJob(PROJECT_ID, JOB_ID, mockWorkflowClient, aggregatorTransforms);
+
+    AggregatorValues<?> values = job.getAggregatorValues(aggregator);
+
+    assertThat(values.getValues(), empty());
+  }
+
+  @Test
+  public void testGetAggregatorValuesWithSingleMetricUpdateReturnsSingletonCollection()
+      throws IOException, AggregatorRetrievalException {
+    CombineFn<Long, long[], Long> combineFn = new Sum.SumLongFn();
+    String aggregatorName = "agg";
+    Aggregator<Long, Long> aggregator = new TestAggregator<>(combineFn, aggregatorName);
+    @SuppressWarnings("unchecked")
+    PTransform<PInput, POutput> pTransform = mock(PTransform.class);
+    String stepName = "s1";
+    String fullName = "Foo/Bar/Baz";
+    AppliedPTransform<?, ?, ?> appliedTransform = appliedPTransform(fullName, pTransform);
+
+    DataflowAggregatorTransforms aggregatorTransforms = new DataflowAggregatorTransforms(
+        ImmutableSetMultimap.<Aggregator<?, ?>, PTransform<?, ?>>of(aggregator, pTransform).asMap(),
+        ImmutableMap.<AppliedPTransform<?, ?, ?>, String>of(appliedTransform, stepName));
+
+    GetMetrics getMetrics = mock(GetMetrics.class);
+    when(mockJobs.getMetrics(PROJECT_ID, JOB_ID)).thenReturn(getMetrics);
+    JobMetrics jobMetrics = new JobMetrics();
+    when(getMetrics.execute()).thenReturn(jobMetrics);
+
+    MetricUpdate update = new MetricUpdate();
+    long stepValue = 1234L;
+    update.setScalar(new BigDecimal(stepValue));
+
+    MetricStructuredName structuredName = new MetricStructuredName();
+    structuredName.setName(aggregatorName);
+    structuredName.setContext(ImmutableMap.of("step", stepName));
+    update.setName(structuredName);
+
+    jobMetrics.setMetrics(ImmutableList.of(update));
+
+    Get getState = mock(Get.class);
+    when(mockJobs.get(PROJECT_ID, JOB_ID)).thenReturn(getState);
+    Job modelJob = new Job();
+    when(getState.execute()).thenReturn(modelJob);
+    modelJob.setCurrentState(State.RUNNING.toString());
+
+    DataflowPipelineJob job =
+        new DataflowPipelineJob(PROJECT_ID, JOB_ID, mockWorkflowClient, aggregatorTransforms);
+
+    AggregatorValues<Long> values = job.getAggregatorValues(aggregator);
+
+    assertThat(values.getValuesAtSteps(), hasEntry(fullName, stepValue));
+    assertThat(values.getValuesAtSteps().size(), equalTo(1));
+    assertThat(values.getValues(), contains(stepValue));
+    assertThat(values.getTotalValue(combineFn), equalTo(Long.valueOf(stepValue)));
+  }
+
+  @Test
+  public void testGetAggregatorValuesWithMultipleMetricUpdatesReturnsCollection()
+      throws IOException, AggregatorRetrievalException {
+    CombineFn<Long, long[], Long> combineFn = new Sum.SumLongFn();
+    String aggregatorName = "agg";
+    Aggregator<Long, Long> aggregator = new TestAggregator<>(combineFn, aggregatorName);
+
+    @SuppressWarnings("unchecked")
+    PTransform<PInput, POutput> pTransform = mock(PTransform.class);
+    String stepName = "s1";
+    String fullName = "Foo/Bar/Baz";
+    AppliedPTransform<?, ?, ?> appliedTransform = appliedPTransform(fullName, pTransform);
+
+    @SuppressWarnings("unchecked")
+    PTransform<PInput, POutput> otherTransform = mock(PTransform.class);
+    String otherStepName = "s88";
+    String otherFullName = "Spam/Ham/Eggs";
+    AppliedPTransform<?, ?, ?> otherAppliedTransform =
+        appliedPTransform(otherFullName, otherTransform);
+
+    DataflowAggregatorTransforms aggregatorTransforms = new DataflowAggregatorTransforms(
+        ImmutableSetMultimap.<Aggregator<?, ?>, PTransform<?, ?>>of(
+                                aggregator, pTransform, aggregator, otherTransform).asMap(),
+        ImmutableMap.<AppliedPTransform<?, ?, ?>, String>of(
+            appliedTransform, stepName, otherAppliedTransform, otherStepName));
+
+    GetMetrics getMetrics = mock(GetMetrics.class);
+    when(mockJobs.getMetrics(PROJECT_ID, JOB_ID)).thenReturn(getMetrics);
+    JobMetrics jobMetrics = new JobMetrics();
+    when(getMetrics.execute()).thenReturn(jobMetrics);
+
+    MetricUpdate updateOne = new MetricUpdate();
+    long stepValue = 1234L;
+    updateOne.setScalar(new BigDecimal(stepValue));
+
+    MetricStructuredName structuredNameOne = new MetricStructuredName();
+    structuredNameOne.setName(aggregatorName);
+    structuredNameOne.setContext(ImmutableMap.of("step", stepName));
+    updateOne.setName(structuredNameOne);
+
+    MetricUpdate updateTwo = new MetricUpdate();
+    long stepValueTwo = 1024L;
+    updateTwo.setScalar(new BigDecimal(stepValueTwo));
+
+    MetricStructuredName structuredNameTwo = new MetricStructuredName();
+    structuredNameTwo.setName(aggregatorName);
+    structuredNameTwo.setContext(ImmutableMap.of("step", otherStepName));
+    updateTwo.setName(structuredNameTwo);
+
+    jobMetrics.setMetrics(ImmutableList.of(updateOne, updateTwo));
+
+    Get getState = mock(Get.class);
+    when(mockJobs.get(PROJECT_ID, JOB_ID)).thenReturn(getState);
+    Job modelJob = new Job();
+    when(getState.execute()).thenReturn(modelJob);
+    modelJob.setCurrentState(State.RUNNING.toString());
+
+    DataflowPipelineJob job =
+        new DataflowPipelineJob(PROJECT_ID, JOB_ID, mockWorkflowClient, aggregatorTransforms);
+
+    AggregatorValues<Long> values = job.getAggregatorValues(aggregator);
+
+    assertThat(values.getValuesAtSteps(), hasEntry(fullName, stepValue));
+    assertThat(values.getValuesAtSteps(), hasEntry(otherFullName, stepValueTwo));
+    assertThat(values.getValuesAtSteps().size(), equalTo(2));
+    assertThat(values.getValues(), containsInAnyOrder(stepValue, stepValueTwo));
+    assertThat(values.getTotalValue(combineFn), equalTo(Long.valueOf(stepValue + stepValueTwo)));
+  }
+
+  @Test
+  public void testGetAggregatorValuesWithUnrelatedMetricUpdateIgnoresUpdate()
+      throws IOException, AggregatorRetrievalException {
+    CombineFn<Long, long[], Long> combineFn = new Sum.SumLongFn();
+    String aggregatorName = "agg";
+    Aggregator<Long, Long> aggregator = new TestAggregator<>(combineFn, aggregatorName);
+    @SuppressWarnings("unchecked")
+    PTransform<PInput, POutput> pTransform = mock(PTransform.class);
+    String stepName = "s1";
+    String fullName = "Foo/Bar/Baz";
+    AppliedPTransform<?, ?, ?> appliedTransform = appliedPTransform(fullName, pTransform);
+
+    DataflowAggregatorTransforms aggregatorTransforms = new DataflowAggregatorTransforms(
+        ImmutableSetMultimap.<Aggregator<?, ?>, PTransform<?, ?>>of(aggregator, pTransform).asMap(),
+        ImmutableMap.<AppliedPTransform<?, ?, ?>, String>of(appliedTransform, stepName));
+
+    GetMetrics getMetrics = mock(GetMetrics.class);
+    when(mockJobs.getMetrics(PROJECT_ID, JOB_ID)).thenReturn(getMetrics);
+    JobMetrics jobMetrics = new JobMetrics();
+    when(getMetrics.execute()).thenReturn(jobMetrics);
+
+    MetricUpdate ignoredUpdate = new MetricUpdate();
+    ignoredUpdate.setScalar(null);
+
+    MetricStructuredName ignoredName = new MetricStructuredName();
+    ignoredName.setName("ignoredAggregator.elementCount.out0");
+    ignoredName.setContext(null);
+    ignoredUpdate.setName(ignoredName);
+
+    jobMetrics.setMetrics(ImmutableList.of(ignoredUpdate));
+
+    Get getState = mock(Get.class);
+    when(mockJobs.get(PROJECT_ID, JOB_ID)).thenReturn(getState);
+    Job modelJob = new Job();
+    when(getState.execute()).thenReturn(modelJob);
+    modelJob.setCurrentState(State.RUNNING.toString());
+
+    DataflowPipelineJob job =
+        new DataflowPipelineJob(PROJECT_ID, JOB_ID, mockWorkflowClient, aggregatorTransforms);
+
+    AggregatorValues<Long> values = job.getAggregatorValues(aggregator);
+
+    assertThat(values.getValuesAtSteps().entrySet(), empty());
+    assertThat(values.getValues(), empty());
+  }
+
+  @Test
+  public void testGetAggregatorValuesWithUnusedAggregatorThrowsException()
+      throws AggregatorRetrievalException {
+    Aggregator<?, ?> aggregator = mock(Aggregator.class);
+
+    DataflowAggregatorTransforms aggregatorTransforms = new DataflowAggregatorTransforms(
+        ImmutableSetMultimap.<Aggregator<?, ?>, PTransform<?, ?>>of().asMap(),
+        ImmutableMap.<AppliedPTransform<?, ?, ?>, String>of());
+
+    DataflowPipelineJob job =
+        new DataflowPipelineJob(PROJECT_ID, JOB_ID, mockWorkflowClient, aggregatorTransforms);
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("not used in this pipeline");
+
+    job.getAggregatorValues(aggregator);
+  }
+
+  @Test
+  public void testGetAggregatorValuesWhenClientThrowsExceptionThrowsAggregatorRetrievalException()
+      throws IOException, AggregatorRetrievalException {
+    CombineFn<Long, long[], Long> combineFn = new Sum.SumLongFn();
+    String aggregatorName = "agg";
+    Aggregator<Long, Long> aggregator = new TestAggregator<>(combineFn, aggregatorName);
+    @SuppressWarnings("unchecked")
+    PTransform<PInput, POutput> pTransform = mock(PTransform.class);
+    String stepName = "s1";
+    String fullName = "Foo/Bar/Baz";
+    AppliedPTransform<?, ?, ?> appliedTransform = appliedPTransform(fullName, pTransform);
+
+    DataflowAggregatorTransforms aggregatorTransforms = new DataflowAggregatorTransforms(
+        ImmutableSetMultimap.<Aggregator<?, ?>, PTransform<?, ?>>of(aggregator, pTransform).asMap(),
+        ImmutableMap.<AppliedPTransform<?, ?, ?>, String>of(appliedTransform, stepName));
+
+    GetMetrics getMetrics = mock(GetMetrics.class);
+    when(mockJobs.getMetrics(PROJECT_ID, JOB_ID)).thenReturn(getMetrics);
+    IOException cause = new IOException();
+    when(getMetrics.execute()).thenThrow(cause);
+
+    Get getState = mock(Get.class);
+    when(mockJobs.get(PROJECT_ID, JOB_ID)).thenReturn(getState);
+    Job modelJob = new Job();
+    when(getState.execute()).thenReturn(modelJob);
+    modelJob.setCurrentState(State.RUNNING.toString());
+
+    DataflowPipelineJob job =
+        new DataflowPipelineJob(PROJECT_ID, JOB_ID, mockWorkflowClient, aggregatorTransforms);
+
+    thrown.expect(AggregatorRetrievalException.class);
+    thrown.expectCause(is(cause));
+    thrown.expectMessage(aggregator.toString());
+    thrown.expectMessage("when retrieving Aggregator values for");
+
+    job.getAggregatorValues(aggregator);
+  }
+
+  private static class TestAggregator<InT, OutT> implements Aggregator<InT, OutT> {
+    private final CombineFn<InT, ?, OutT> combineFn;
+    private final String name;
+
+    public TestAggregator(CombineFn<InT, ?, OutT> combineFn, String name) {
+      this.combineFn = combineFn;
+      this.name = name;
+    }
+
+    @Override
+    public void addValue(InT value) {
+      throw new AssertionError();
+    }
+
+    @Override
+    public String getName() {
+      return name;
+    }
+
+    @Override
+    public CombineFn<InT, ?, OutT> getCombineFn() {
+      return combineFn;
+    }
+  }
+
+  private AppliedPTransform<?, ?, ?> appliedPTransform(
+      String fullName, PTransform<PInput, POutput> transform) {
+    return AppliedPTransform.of(fullName, mock(PInput.class), mock(POutput.class), transform);
+  }
 }

From 38a8327bfe8b14c6d6831dc8ca4613e03e25689b Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Wed, 27 May 2015 16:00:23 -0700
Subject: [PATCH 0590/1541] Fix documentation and change visibility of
 FileResult

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94618037
---
 .../cloud/dataflow/sdk/io/FileBasedSink.java     | 16 ++++++++++------
 .../cloud/dataflow/sdk/io/FileBasedSinkTest.java |  2 +-
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
index 3271abae6ed8d..02a1e1fb73d95 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
@@ -272,7 +272,7 @@ public void initialize(PipelineOptions options) throws Exception {}
      *
      * <p>Finalization may be overridden by subclass implementations to perform customized
      * finalization (e.g., initiating some operation on output bundles, merging them, etc.).
-     * bundleResults contains the filenames of written bundles.
+     * {@code writerResults} contains the filenames of written bundles.
      *
      * <p>If subclasses override this method, they must guarantee that its implementation is
      * idempotent, as it may be executed multiple times in the case of failure or for redundancy. It
@@ -286,8 +286,8 @@ public void finalize(Iterable<FileResult> writerResults, PipelineOptions options
       // Collect names of temporary files and rename them.
       List<String> files = new ArrayList<>();
       for (FileResult result : writerResults) {
-        LOG.debug("Temporary bundle output file {} will be copied.", result.filename);
-        files.add(result.filename);
+        LOG.debug("Temporary bundle output file {} will be copied.", result.getFilename());
+        files.add(result.getFilename());
       }
       copyToOutputFiles(files, options);
 
@@ -498,7 +498,7 @@ public final FileResult close() throws Exception {
         writeFooter();
       }
       FileResult result = new FileResult(filename);
-      LOG.debug("Result for bundle {}: {}", this.id, result.filename);
+      LOG.debug("Result for bundle {}: {}", this.id, filename);
       return result;
     }
 
@@ -514,14 +514,18 @@ public FileBasedWriteOperation<T> getWriteOperation() {
   /**
    * Result of a single bundle write. Contains the filename of the bundle.
    */
-  static final class FileResult implements Serializable {
+  public static final class FileResult implements Serializable {
     private static final long serialVersionUID = 0;
 
-    final String filename;
+    private final String filename;
 
     public FileResult(String filename) {
       this.filename = filename;
     }
+
+    public String getFilename() {
+      return filename;
+    }
   }
 
   // File system operations
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSinkTest.java
index 11a941699efae..14c859a1c3f3a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSinkTest.java
@@ -93,7 +93,7 @@ public void testWriter() throws Exception {
     }
     FileResult result = writer.close();
 
-    assertEquals(expectedFilename, result.filename);
+    assertEquals(expectedFilename, result.getFilename());
     assertFileContains(expected, expectedFilename);
   }
 

From 9dea6e163f176fe46f7cc9b91b4b4de3a6f9d43d Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 27 May 2015 16:36:53 -0700
Subject: [PATCH 0591/1541] Remove all uses of deprecated PTransform#withName

- Replace with the better-named setName.
- Duplicate change to Combine.PerKeyWithHotKeyFanout,
  which had overridden withName.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94621416
---
 .../google/cloud/dataflow/examples/TfIdf.java  | 18 +++++++++---------
 .../cloud/dataflow/sdk/transforms/Combine.java | 15 +++++++++++----
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
index d1f439b1a2c63..e782d65308838 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
@@ -190,7 +190,7 @@ public PCollection<KV<URI, String>> apply(PInput input) {
         PCollection<KV<URI, String>> oneUriToLines = pipeline
             .apply(TextIO.Read.from(uriString)
                 .named("TextIO.Read(" + uriString + ")"))
-            .apply(WithKeys.<URI, String>of(uri).withName("WithKeys(" + uriString + ")"));
+            .apply(WithKeys.<URI, String>of(uri).setName("WithKeys(" + uriString + ")"));
 
         urisToLines = urisToLines.and(oneUriToLines);
       }
@@ -220,8 +220,8 @@ public PCollection<KV<String, KV<URI, Double>>> apply(
       // use as a side input.
       final PCollectionView<Long> totalDocuments =
           uriToContent
-          .apply(Keys.<URI>create().withName("GetURIs"))
-          .apply(RemoveDuplicates.<URI>create().withName("RemoveDuplicateDocs"))
+          .apply(Keys.<URI>create().setName("GetURIs"))
+          .apply(RemoveDuplicates.<URI>create().setName("RemoveDuplicateDocs"))
           .apply(Count.<URI>globally())
           .apply(View.<Long>asSingleton());
 
@@ -252,21 +252,21 @@ public void processElement(ProcessContext c) {
       // Compute a mapping from each word to the total
       // number of documents in which it appears.
       PCollection<KV<String, Long>> wordToDocCount = uriToWords
-          .apply(RemoveDuplicates.<KV<URI, String>>create().withName("RemoveDuplicateWords"))
+          .apply(RemoveDuplicates.<KV<URI, String>>create().setName("RemoveDuplicateWords"))
           .apply(Values.<String>create())
-          .apply(Count.<String>perElement().withName("CountDocs"));
+          .apply(Count.<String>perElement().setName("CountDocs"));
 
       // Compute a mapping from each URI to the total
       // number of words in the document associated with that URI.
       PCollection<KV<URI, Long>> uriToWordTotal = uriToWords
-          .apply(Keys.<URI>create().withName("GetURIs2"))
-          .apply(Count.<URI>perElement().withName("CountWords"));
+          .apply(Keys.<URI>create().setName("GetURIs2"))
+          .apply(Count.<URI>perElement().setName("CountWords"));
 
       // Count, for each (URI, word) pair, the number of
       // occurrences of that word in the document associated
       // with the URI.
       PCollection<KV<KV<URI, String>, Long>> uriAndWordToCount = uriToWords
-          .apply(Count.<KV<URI, String>>perElement().withName("CountWordDocPairs"));
+          .apply(Count.<KV<URI, String>>perElement().setName("CountWordDocPairs"));
 
       // Adjust the above collection to a mapping from
       // (URI, word) pairs to counts into an isomorphic mapping
@@ -307,7 +307,7 @@ public void processElement(ProcessContext c) {
       // words in that document as well as all the (word, count)
       // pairs for particular words.
       PCollection<KV<URI, CoGbkResult>> uriToWordAndCountAndTotal = coGbkInput
-          .apply(CoGroupByKey.<URI>create().withName("CoGroupByURI"));
+          .apply(CoGroupByKey.<URI>create().setName("CoGroupByURI"));
 
       // Compute a mapping from each word to a (URI, term frequency)
       // pair for each URI. A word's term frequency for a document
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index e3b9f25a4b97d..f44c4cb25f7e7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -1523,7 +1523,7 @@ private PerKey(
      */
     public PerKeyWithHotKeyFanout<K, InputT, OutputT> withHotKeyFanout(
         SerializableFunction<? super K, Integer> hotKeyFanout) {
-      return new PerKeyWithHotKeyFanout<K, InputT, OutputT>(fn, hotKeyFanout).withName(name);
+      return new PerKeyWithHotKeyFanout<K, InputT, OutputT>(fn, hotKeyFanout).setName(name);
     }
 
     /**
@@ -1590,8 +1590,15 @@ private PerKeyWithHotKeyFanout(
 
     @Override
     @SuppressWarnings("unchecked")
+    public PerKeyWithHotKeyFanout<K, InputT, OutputT> setName(String name) {
+      return (PerKeyWithHotKeyFanout<K, InputT, OutputT>) super.setName(name);
+    }
+
+    @Override
+    @Deprecated
+    @SuppressWarnings("unchecked")
     public PerKeyWithHotKeyFanout<K, InputT, OutputT> withName(String name) {
-      return (PerKeyWithHotKeyFanout<K, InputT, OutputT>) super.withName(name);
+      return setName(name);
     }
 
     @Override
@@ -1695,7 +1702,7 @@ public void processElement(ProcessContext c) {
                 }
               })
           .withOutputTags(cold, TupleTagList.of(hot))
-          .withName("AddNonce"));
+          .setName("AddNonce"));
 
       // Combine the hot and cold keys separately.
       PCollection<KV<K, OutputT>> combinedHot = split
@@ -1709,7 +1716,7 @@ public void processElement(ProcessContext c) {
                 public void processElement(ProcessContext c) {
                   c.output(KV.of(c.element().getKey().getKey(), c.element().getValue()));
                 }
-              }).withName("StripNonce"))
+              }).setName("StripNonce"))
           .apply(Window.<KV<K, AccumT>>remerge())
           .apply(Combine.perKey(hotPostCombine).withName("PostCombineHot"));
       PCollection<KV<K, OutputT>> combinedCold = split

From 26f222e69a4470d6ac855945525d6709badb9117 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 27 May 2015 17:37:39 -0700
Subject: [PATCH 0592/1541] Add allowedLateness to WindowingStrategy

Update Window.into to allow setting it, and add tests to verify
that it propagates appropriately.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94626626
---
 .../sdk/transforms/windowing/Window.java      | 33 ++++++--
 .../dataflow/sdk/util/WindowingStrategy.java  | 40 ++++++++--
 .../sdk/transforms/windowing/WindowTest.java  | 77 +++++++++++++++++++
 3 files changed, 136 insertions(+), 14 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index dcc80f74a2077..1837b9f1490bc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -21,18 +21,20 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
 import com.google.cloud.dataflow.sdk.util.PTuple;
-import com.google.cloud.dataflow.sdk.util.StringUtils;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
+import org.joda.time.Duration;
+
 import java.util.ArrayList;
 import java.util.List;
 
@@ -252,8 +254,29 @@ public Triggering<T> triggering(Trigger<?> trigger) {
       return new Triggering<T>(name, windowingStrategy.withTrigger(trigger));
     }
 
+    /**
+     * Override the amount of lateness allowed for data elements in the pipeline. Like
+     * the other properties on this {@link Window} operation, this will be applied at
+     * the next {@link GroupByKey}. Any elements that are later than this as decided by
+     * the system-maintained watermark will be dropped.
+     *
+     * <p>This value also determines how long state will be kept around for old windows.
+     * Once no elements will be added to a window (because this duration has passed) any state
+     * associated with the window will be cleaned up.
+     */
+    @Experimental(Experimental.Kind.TRIGGER)
+    public Bound<T> withAllowedLateness(Duration allowedLateness) {
+      return new Bound<>(name, windowingStrategy.withAllowedLateness(allowedLateness));
+    }
+
     @Override
     public PCollection<T> apply(PCollection<T> input) {
+      // Propagate the allowed lateness unless it was explicitly set.
+      if (windowingStrategy.isDefaultAllowedLateness()) {
+        windowingStrategy = windowingStrategy.withAllowedLateness(
+            input.getWindowingStrategy().getAllowedLateness());
+      }
+
       return PCollection.<T>createPrimitiveOutputInternal(
           input.getPipeline(), windowingStrategy, input.isBounded());
     }
@@ -269,13 +292,7 @@ protected Coder<?> getDefaultOutputCoder(PCollection<T> input) {
 
     @Override
     protected String getKindString() {
-      return "Window.Into("
-          + StringUtils.approximateSimpleName(windowingStrategy.getWindowFn().getClass())
-          + ", "
-          + windowingStrategy.getTrigger()
-          + ", "
-          + windowingStrategy.getMode()
-          + ")";
+      return "Window.Into(" + windowingStrategy + ")";
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
index 6ee77ba2b4888..2fca0c3a9ddb6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
@@ -23,6 +23,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 
+import org.joda.time.Duration;
+
 import java.io.Serializable;
 
 /**
@@ -44,6 +46,7 @@ public enum AccumulationMode {
     ACCUMULATING_FIRED_PANES;
   }
 
+  private static final Duration DEFAULT_ALLOWED_LATENESS = Duration.ZERO;
   private static final WindowingStrategy<Object, GlobalWindow> DEFAULT = of(new GlobalWindows());
 
   private static final long serialVersionUID = 0L;
@@ -51,12 +54,15 @@ public enum AccumulationMode {
   private final WindowFn<T, W> windowFn;
   private final ExecutableTrigger<W> trigger;
   private final AccumulationMode mode;
+  private final Duration allowedLateness;
 
   private WindowingStrategy(
-      WindowFn<T, W> windowFn, ExecutableTrigger<W> trigger, AccumulationMode mode) {
+      WindowFn<T, W> windowFn, ExecutableTrigger<W> trigger, AccumulationMode mode,
+      Duration allowedLateness) {
     this.windowFn = windowFn;
     this.trigger = trigger;
     this.mode = mode;
+    this.allowedLateness = allowedLateness;
   }
 
   public static WindowingStrategy<Object, GlobalWindow> globalDefault() {
@@ -70,23 +76,28 @@ public static WindowingStrategy<Object, GlobalWindow> globalDefault() {
   public static <T, W extends BoundedWindow> WindowingStrategy<T, W> of(WindowFn<T, W> windowFn) {
     ExecutableTrigger<W> defaultTrigger = ExecutableTrigger.create(DefaultTrigger.<W>of());
     return new WindowingStrategy<>(
-        windowFn, defaultTrigger, AccumulationMode.DISCARDING_FIRED_PANES);
+        windowFn, defaultTrigger, AccumulationMode.DISCARDING_FIRED_PANES, null);
   }
 
   public WindowingStrategy<T, W> withTrigger(Trigger<?> wildcardTrigger) {
     @SuppressWarnings("unchecked")
     Trigger<W> trigger = (Trigger<W>) wildcardTrigger;
-    return new WindowingStrategy<T, W>(windowFn, ExecutableTrigger.create(trigger), mode);
+    return new WindowingStrategy<T, W>(
+        windowFn, ExecutableTrigger.create(trigger), mode, allowedLateness);
   }
 
   public WindowingStrategy<T, W> withMode(AccumulationMode mode) {
-    return new WindowingStrategy<T, W>(windowFn, trigger, mode);
+    return new WindowingStrategy<T, W>(windowFn, trigger, mode, allowedLateness);
   }
 
-  public <T> WindowingStrategy<T, W> withWindowFn(WindowFn<?, ?> wildcardWindowFn) {
+  public WindowingStrategy<T, W> withWindowFn(WindowFn<?, ?> wildcardWindowFn) {
     @SuppressWarnings("unchecked")
     WindowFn<T, W> windowFn = (WindowFn<T, W>) wildcardWindowFn;
-    return new WindowingStrategy<T, W>(windowFn, trigger, mode);
+    return new WindowingStrategy<T, W>(windowFn, trigger, mode, allowedLateness);
+  }
+
+  public WindowingStrategy<T, W> withAllowedLateness(Duration allowedLateness) {
+    return new WindowingStrategy<T, W>(windowFn, trigger, mode, allowedLateness);
   }
 
   public WindowFn<T, W> getWindowFn() {
@@ -97,7 +108,24 @@ public ExecutableTrigger<W> getTrigger() {
     return trigger;
   }
 
+  public Duration getAllowedLateness() {
+    return allowedLateness == null ? DEFAULT_ALLOWED_LATENESS : allowedLateness;
+  }
+
+  public boolean isDefaultAllowedLateness() {
+    return allowedLateness == null;
+  }
+
   public AccumulationMode getMode() {
     return mode;
   }
+
+  @Override
+  public String toString() {
+    return String.format("%s, %s, %s, %s",
+        StringUtils.approximateSimpleName(windowFn.getClass()),
+        trigger.toString(),
+        mode.toString(),
+        allowedLateness);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java
new file mode 100644
index 0000000000000..eadd314556938
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
+
+import org.joda.time.Duration;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link Window}.
+ */
+@RunWith(JUnit4.class)
+public class WindowTest {
+
+  @Test
+  public void testBasicWindowIntoSettings() {
+    WindowingStrategy<?, ?> strategy = TestPipeline.create()
+      .apply(Create.of("hello", "world")).setCoder(StringUtf8Coder.of())
+      .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(10))))
+      .getWindowingStrategy();
+    assertTrue(strategy.getWindowFn() instanceof FixedWindows);
+    assertTrue(strategy.getTrigger().getSpec() instanceof DefaultTrigger);
+    assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, strategy.getMode());
+  }
+
+  @Test
+  public void testWindowIntoTriggersAndAccumulating() {
+    WindowingStrategy<?, ?> strategy = TestPipeline.create()
+      .apply(Create.of("hello", "world")).setCoder(StringUtf8Coder.of())
+      .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(10)))
+          .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(5)))
+          .accumulatingFiredPanes())
+      .getWindowingStrategy();
+
+    assertTrue(strategy.getWindowFn() instanceof FixedWindows);
+    assertTrue(strategy.getTrigger().getSpec() instanceof Repeatedly);
+    assertEquals(AccumulationMode.ACCUMULATING_FIRED_PANES, strategy.getMode());
+  }
+
+  @Test
+  public void testWindowIntoPropagatesLateness() {
+    WindowingStrategy<?, ?> strategy = TestPipeline.create()
+        .apply(Create.of("hello", "world")).setCoder(StringUtf8Coder.of())
+        .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(10)))
+            .withAllowedLateness(Duration.standardDays(1))
+            .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(5)))
+            .accumulatingFiredPanes())
+        .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(25))))
+        .getWindowingStrategy();
+
+    assertEquals(Duration.standardDays(1), strategy.getAllowedLateness());
+  }
+}

From 617b6c4ca28bbe30b6d53879269b292200fbe66f Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Wed, 27 May 2015 18:06:05 -0700
Subject: [PATCH 0593/1541] Remove the experimental KeyedState API from DoFns

The Windowing and Triggering API are now sufficient to cover
most usecases, and we will be exploring other alternatives
going forward.

----Release Notes----

RequiresKeyedState and its associated accessor on ProcessContext
are now gone, instead windowing and triggering should be used.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94628736
---
 .../dataflow/examples/TrafficRoutes.java      | 102 +++++-----
 .../cloud/dataflow/sdk/io/BigQueryIO.java     |  37 +++-
 .../runners/DataflowPipelineTranslator.java   |   7 -
 .../sdk/runners/DirectPipelineRunner.java     |   2 +-
 .../cloud/dataflow/sdk/transforms/Create.java |  29 +--
 .../cloud/dataflow/sdk/transforms/DoFn.java   |  84 +-------
 .../sdk/transforms/DoFnReflector.java         |  62 +-----
 .../sdk/transforms/DoFnWithContext.java       |  12 +-
 .../IntraBundleParallelization.java           |   5 -
 .../cloud/dataflow/sdk/transforms/ParDo.java  |  43 +---
 .../sdk/transforms/windowing/Trigger.java     |   6 +-
 .../dataflow/sdk/util/AbstractWindowSet.java  |   7 +-
 .../dataflow/sdk/util/BufferingWindowSet.java |   5 +-
 .../dataflow/sdk/util/CombiningWindowSet.java |   5 +-
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |  21 +-
 .../dataflow/sdk/util/ExecutionContext.java   |   3 +-
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |   6 +-
 .../dataflow/sdk/util/KeyedStateCache.java    |  10 +-
 .../util/NonMergingBufferingWindowSet.java    |  14 +-
 .../util/StreamingGroupAlsoByWindowsDoFn.java |   5 +-
 .../dataflow/sdk/util/TriggerExecutor.java    |  13 +-
 .../dataflow/sdk/util/TriggerTester.java      |   8 +-
 .../dataflow/sdk/util/WindowingInternals.java |  54 +++++
 .../dataflow/sdk/values/CodedTupleTag.java    |   2 +-
 .../worker/StreamingDataflowWorkerTest.java   | 191 ------------------
 .../sdk/transforms/DoFnReflectorTest.java     |  38 +---
 .../dataflow/sdk/transforms/NoOpDoFn.java     |   5 -
 .../dataflow/sdk/transforms/ParDoTest.java    | 153 +-------------
 28 files changed, 213 insertions(+), 716 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java
index c2c3518fe5918..2f4fb8c487925 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java
@@ -27,7 +27,6 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
@@ -42,10 +41,9 @@
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.base.MoreObjects;
+import com.google.common.collect.Lists;
 
 import org.apache.avro.reflect.Nullable;
 import org.joda.time.Duration;
@@ -55,6 +53,8 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
 import java.util.Hashtable;
 import java.util.List;
 import java.util.Map;
@@ -63,13 +63,12 @@
  * A Dataflow Example that runs in both batch and streaming modes with traffic sensor data.
  * You can configure the running mode by setting {@literal --streaming} to true or false.
  *
- * <p>Concepts: The batch and streaming runners, GroupByKey, keyed state, sliding windows, and
+ * <p>Concepts: The batch and streaming runners, GroupByKey, sliding windows, and
  * Google Cloud Pub/Sub topic injection.
  *
  * <p> This example analyzes traffic sensor data using SlidingWindows. For each window,
  * it calculates the average speed over the window for some small set of predefined 'routes',
- * and looks for 'slowdowns' in those routes. It uses keyed state to track slowdown information
- * across successive sliding windows. It writes its results to a BigQuery table.
+ * and looks for 'slowdowns' in those routes. It writes its results to a BigQuery table.
  *
  * <p> In batch mode, the pipeline reads traffic sensor data from {@literal --inputFile}.
  *
@@ -104,15 +103,17 @@ public class TrafficRoutes {
    * This class holds information about a station reading's average speed.
    */
   @DefaultCoder(AvroCoder.class)
-  static class StationSpeed {
+  static class StationSpeed implements Comparable<StationSpeed> {
     @Nullable String stationId;
     @Nullable Double avgSpeed;
+    @Nullable Long timestamp;
 
     public StationSpeed() {}
 
-    public StationSpeed(String stationId, Double avgSpeed) {
+    public StationSpeed(String stationId, Double avgSpeed, Long timestamp) {
       this.stationId = stationId;
       this.avgSpeed = avgSpeed;
+      this.timestamp = timestamp;
     }
 
     public String getStationId() {
@@ -121,6 +122,11 @@ public String getStationId() {
     public Double getAvgSpeed() {
       return this.avgSpeed;
     }
+
+    @Override
+    public int compareTo(StationSpeed other) {
+      return Long.compare(this.timestamp, other.timestamp);
+    }
   }
 
   /**
@@ -178,13 +184,17 @@ public void processElement(ProcessContext c) {
         String stationId = tryParseStationId(items);
         // For this simple example, filter out everything but some hardwired routes.
         if (avgSpeed != null && stationId != null && sdStations.containsKey(stationId)) {
-          StationSpeed stationSpeed = new StationSpeed(stationId, avgSpeed);
+          Instant timestamp;
+          if (outputTimestamp) {
+            timestamp = new Instant(dateTimeFormat.parseMillis(tryParseTimestamp(items)));
+          } else {
+            timestamp = c.timestamp();
+          }
+          StationSpeed stationSpeed = new StationSpeed(stationId, avgSpeed, timestamp.getMillis());
           // The tuple key is the 'route' name stored in the 'sdStations' hash.
           KV<String, StationSpeed> outputValue = KV.of(sdStations.get(stationId), stationSpeed);
           if (outputTimestamp) {
-            String timestamp = tryParseTimestamp(items);
-            c.outputWithTimestamp(outputValue,
-                                  new Instant(dateTimeFormat.parseMillis(timestamp)));
+            c.outputWithTimestamp(outputValue, timestamp);
           } else {
             c.output(outputValue);
           }
@@ -194,59 +204,49 @@ public void processElement(ProcessContext c) {
   }
 
   /**
-   * For a given route, track average speed for the window. Calculate whether traffic is currently
-   * slowing down, via a predefined threshold. Use keyed state to keep a count of the speed drops,
-   * with at least 3 in a row constituting a 'slowdown'.
+   * For a given route, track average speed for the window. Calculate whether
+   * traffic is currently slowing down, via a predefined threshold. If a supermajority of
+   * speeds in this sliding window are less than the previous reading we call this a 'slowdown'.
    * Note: these calculations are for example purposes only, and are unrealistic and oversimplified.
    */
-  static class GatherStats extends DoFn<KV<String, Iterable<StationSpeed>>, KV<String, RouteInfo>>
-    implements DoFn.RequiresKeyedState {
+  static class GatherStats
+      extends DoFn<KV<String, Iterable<StationSpeed>>, KV<String, RouteInfo>> {
     private static final long serialVersionUID = 0;
 
-    static final int SLOWDOWN_THRESH = 67;
-    static final int SLOWDOWN_COUNT_CAP = 3;
-
     @Override
     public void processElement(ProcessContext c) throws IOException {
       String route = c.element().getKey();
-      CodedTupleTag<Integer> tag = CodedTupleTag.of(route, BigEndianIntegerCoder.of());
-      // For the given key (a route), get the keyed state.
-      Integer slowdownCount = MoreObjects.firstNonNull(c.keyedState().lookup(tag), 0);
-      Double speedSum = 0.0;
-      Integer scount = 0;
-      Iterable<StationSpeed> infoList = c.element().getValue();
+      double speedSum = 0.0;
+      int speedCount = 0;
+      int speedups = 0;
+      int slowdowns = 0;
+      List<StationSpeed> infoList = Lists.newArrayList(c.element().getValue());
+      // StationSpeeds sort by embedded timestamp.
+      Collections.sort(infoList);
+      Map<String, Double> prevSpeeds = new HashMap<>();
       // For all stations in the route, sum (non-null) speeds. Keep a count of the non-null speeds.
       for (StationSpeed item : infoList) {
         Double speed = item.getAvgSpeed();
         if (speed != null) {
           speedSum += speed;
-          scount++;
+          speedCount++;
+          Double lastSpeed = prevSpeeds.get(item.getStationId());
+          if (lastSpeed != null) {
+            if (lastSpeed < speed) {
+              speedups += 1;
+            } else {
+              slowdowns += 1;
+            }
+          }
+          prevSpeeds.put(item.getStationId(), speed);
         }
       }
-      // calculate average speed.
-      if (scount == 0) {
+      if (speedCount == 0) {
+        // No average to compute.
         return;
       }
-      Double speedAvg = speedSum / scount;
-      Boolean slowdownEvent = false;
-      if (speedAvg != null) {
-        // see if the speed falls below defined threshold. If it does, increment the count of
-        // slow readings, as retrieved from the keyed state, up to the defined cap.
-        if (speedAvg < SLOWDOWN_THRESH) {
-          if (slowdownCount < SLOWDOWN_COUNT_CAP) {
-            slowdownCount++;
-          }
-        } else if (slowdownCount > 0) {
-          // if speed is not below threshold, then decrement the count of slow readings.
-          slowdownCount--;
-        }
-        // if our count of slowdowns has reached its cap, we consider this a 'slowdown event'
-        if (slowdownCount >= SLOWDOWN_COUNT_CAP) {
-          slowdownEvent = true;
-        }
-      }
-      // store the new slowdownCount in the keyed state for the route key.
-      c.keyedState().store(tag, slowdownCount);
+      double speedAvg = speedSum / speedCount;
+      boolean slowdownEvent = slowdowns >= 2 * speedups;
       RouteInfo routeInfo = new RouteInfo(route, speedAvg, slowdownEvent);
       c.output(KV.of(route, routeInfo));
     }
@@ -285,8 +285,8 @@ static TableSchema getSchema() {
 
   /**
    * This PTransform extracts speed info from traffic station readings.
-   * It groups the readings by 'route' and analyzes traffic slowdown for that route, using keyed
-   * state to retain previous slowdown information. Then, it formats the results for BigQuery.
+   * It groups the readings by 'route' and analyzes traffic slowdown for that route.
+   * Lastly, it formats the results for BigQuery.
    */
   static class TrackSpeed extends
       PTransform<PCollection<KV<String, StationSpeed>>, PCollection<TableRow>> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index e9f3a8d6daa73..13b644b072b42 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -29,8 +29,17 @@
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.worker.BigQueryReader;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Flatten;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.Values;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterFirst;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterProcessingTime;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.BigQueryTableInserter;
 import com.google.cloud.dataflow.sdk.util.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.Transport;
@@ -42,6 +51,7 @@
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
 
+import org.joda.time.Duration;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -647,7 +657,7 @@ public boolean getValidate() {
    * Implementation of DoFn to perform streaming BigQuery write.
    */
   private static class StreamingWriteFn
-      extends DoFn<KV<Integer, KV<String, TableRow>>, Void> implements DoFn.RequiresKeyedState {
+      extends DoFn<KV<String, TableRow>, Void> {
     private static final long serialVersionUID = 0;
 
     /** TableReference in JSON.  Use String to make the class Serializable. */
@@ -714,8 +724,8 @@ public void startBundle(Context context) {
     /** Accumulates the input into JsonTableRows and uniqueIdsForTableRows. */
     @Override
     public void processElement(ProcessContext context) {
-      KV<Integer, KV<String, TableRow>> kv = context.element();
-      addRow(kv.getValue().getValue(), kv.getValue().getKey());
+      KV<String, TableRow> kv = context.element();
+      addRow(kv.getValue(), kv.getKey());
     }
 
     /** Writes the accumulated rows into BigQuery with streaming API. */
@@ -786,6 +796,10 @@ public void processElement(ProcessContext context) {
   private static class StreamWithDeDup extends PTransform<PCollection<TableRow>, PDone> {
     private static final long serialVersionUID = 0;
 
+    // TODO: Consider making these configurable.
+    private static final int WRITE_BUFFER_COUNT = 100;
+    private static final Duration WRITE_BUFFER_WAIT = Duration.standardSeconds(1);
+
     private final TableReference tableReference;
     private final TableSchema tableSchema;
 
@@ -803,7 +817,7 @@ protected Coder<Void> getDefaultOutputCoder() {
     @Override
     public PDone apply(PCollection<TableRow> input) {
       // A naive implementation would be to simply stream data directly to BigQuery.
-      // However, this could occassionally lead to duplicated data, e.g., when
+      // However, this could occasionally lead to duplicated data, e.g., when
       // a VM that runs this code is restarted and the code is re-run.
 
       // The above risk is mitigated in this implementation by relying on
@@ -817,8 +831,19 @@ public PDone apply(PCollection<TableRow> input) {
 
       // To prevent having the same TableRow processed more than once with regenerated
       // different unique ids, this implementation relies on "checkpointing", which is
-      // achieved as a side effect of having StreamingWriteFn implement RequiresKeyedState.
-      tagged.apply(ParDo.of(new StreamingWriteFn(tableReference, tableSchema)));
+      // achieved as a side effect of having StreamingWriteFn immediately follow a GBK.
+      tagged
+          .apply(Window.<KV<Integer, KV<String, TableRow>>>into(new GlobalWindows())
+                       .triggering(Repeatedly.forever(
+                           AfterFirst.of(
+                               AfterProcessingTime.pastFirstElementInPane()
+                                                  .plusDelayOf(WRITE_BUFFER_WAIT),
+                               AfterPane.elementCountAtLeast(WRITE_BUFFER_COUNT))))
+                       .discardingFiredPanes())
+          .apply(GroupByKey.<Integer, KV<String, TableRow>>create())
+          .apply(Values.<Iterable<KV<String, TableRow>>>create())
+          .apply(Flatten.<KV<String, TableRow>>iterables())
+          .apply(ParDo.of(new StreamingWriteFn(tableReference, tableSchema)));
 
       // Note that the implementation to return PDone here breaks the
       // implicit assumption about the job execution order.  If a user
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 902c6100e403d..3f8d417d18aa6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -70,7 +70,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.OutputReference;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
@@ -1069,12 +1068,6 @@ private static void translateFn(
         PropertyNames.SERIALIZED_FN,
         byteArrayToJsonString(serializeToByteArray(
             new DoFnInfo(fn, windowingStrategy, sideInputs, inputCoder))));
-    if (fn instanceof DoFn.RequiresKeyedState
-        // Adjust requires keyed state property for the Dataflow Service.
-        // TODO: Remove when this is performed by the service.
-        && !(fn instanceof GroupAlsoByWindowsDoFn)) {
-      context.addInput(PropertyNames.USES_KEYED_STATE, "true");
-    }
   }
 
   private static void translateOutputs(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index a8076b045a5c2..e7940f1bf7ed2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -423,7 +423,7 @@ public static <V> ValueWithMetadata<V> of(WindowedValue<V> windowedValue) {
      * with this value set.  The key is the last key grouped by in the chain of
      * productions that produced this element.
      * These keys are used internally by {@link DirectPipelineRunner} for keeping
-     * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState} separate
+     * {@link com.google.cloud.dataflow.sdk.util.WindowingInternals.KeyedState} separate
      * across keys.
      */
     public ValueWithMetadata<V> withKey(Object key) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index 8c44fd460b8a8..33641a4788af4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -21,12 +21,13 @@
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -208,7 +209,14 @@ public void processElement(DoFn<String, KV<Void, Void>>.ProcessContext c)
               c.output(KV.of((Void) null, (Void) null));
             }
           }))
-          .apply(ParDo.of(new OutputOnceDoFn<>(elems)));
+          .apply(Window.<KV<Void, Void>>into(new GlobalWindows())
+                       .triggering(AfterPane.elementCountAtLeast(1))
+                       .discardingFiredPanes()
+                       .setName("GlobalSingleton"))
+          .apply(GroupByKey.<Void, Void>create())
+          // Can't do this after the ParDo due to lazy coder inference.
+          .apply(Window.<KV<Void, Iterable<Void>>>into(new GlobalWindows()))
+          .apply(ParDo.of(new OutputElements<>(elems)));
 
       // Best effort attempt to set the coder for the user on the output of the
       // "Create". ParDo has a different way in which it attempts to get
@@ -229,26 +237,19 @@ public void processElement(DoFn<String, KV<Void, Void>>.ProcessContext c)
     }
   }
 
-  private static class OutputOnceDoFn<T> extends DoFn<KV<Void, Void>, T>
-      implements DoFn.RequiresKeyedState {
+  private static class OutputElements<T> extends DoFn<Object, T> {
     private static final long serialVersionUID = 0;
 
-    private final CodedTupleTag<String> outputOnceTag =
-        CodedTupleTag.of("outputOnce", StringUtf8Coder.of());
     private final Iterable<T> elems;
 
-    public OutputOnceDoFn(Iterable<T> elems) {
+    public OutputElements(Iterable<T> elems) {
       this.elems = elems;
     }
 
     @Override
     public void processElement(ProcessContext c) throws IOException {
-      String state = c.keyedState().lookup(outputOnceTag);
-      if (state == null || state.isEmpty()) {
-        for (T t : elems) {
-          c.output(t);
-        }
-        c.keyedState().store(outputOnceTag, "done");
+      for (T t : elems) {
+        c.output(t);
       }
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index d806ed30e610a..0872f8313f004 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -25,8 +25,6 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.WindowingInternals;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
@@ -35,7 +33,6 @@
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 
-import java.io.IOException;
 import java.io.Serializable;
 import java.util.Collection;
 import java.util.Collections;
@@ -61,9 +58,8 @@
  * can be done via the {@link DoFnTester} harness.
  *
  * <p> {@link DoFnWithContext} (currently experimental) offers an alternative
- * mechanism for accessing {@link ProcessContext#keyedState} and
- * {@link ProcessContext#window()} without the need to implement
- * {@link RequiresKeyedState} or {@link RequiresWindowAccess}.
+ * mechanism for accessing {@link ProcessContext#window()} without the need
+ * to implement {@link RequiresWindowAccess}.
  *
  * @param <InputT> the type of the (main) input elements
  * @param <OutputT> the type of the (main) output elements
@@ -245,27 +241,6 @@ public abstract class ProcessContext extends Context {
      */
     public abstract <T> T sideInput(PCollectionView<T> view);
 
-    /**
-     * Returns this {@code DoFn}'s state associated with the input
-     * element's key.  This state can be used by the {@code DoFn} to
-     * store whatever information it likes with that key.  Unlike
-     * {@code DoFn} instance variables, this state is persistent and
-     * can be arbitrarily large; it is more expensive than instance
-     * variable state, however.  It is particularly intended for
-     * streaming computations.
-     *
-     * <p> Requires that this {@code DoFn} implements
-     * {@link RequiresKeyedState}.
-     *
-     * <p> Each {@link ParDo} invocation with this {@code DoFn} as an
-     * argument will maintain its own {@code KeyedState} maps, one per
-     * key.
-     *
-     * @throws UnsupportedOperationException if this {@link DoFn} does
-     * not implement {@link RequiresKeyedState}.
-     */
-    public abstract KeyedState keyedState();
-
     /**
      * Returns the timestamp of the input element.
      *
@@ -305,14 +280,6 @@ public Duration getAllowedTimestampSkew() {
     return Duration.ZERO;
   }
 
-  /**
-   * Interface for signaling that a {@link DoFn} needs to maintain
-   * per-key state, accessed via
-   * {@link DoFn.ProcessContext#keyedState}.
-   */
-  @Experimental
-  public interface RequiresKeyedState {}
-
   /**
    * Interface for signaling that a {@link DoFn} needs to access the window the
    * element is being processed in, via {@link DoFn.ProcessContext#window}.
@@ -320,53 +287,6 @@ public interface RequiresKeyedState {}
   @Experimental
   public interface RequiresWindowAccess {}
 
-  /**
-   * {@code KeyedState} maps {@link CodedTupleTag CodedTupleTags} to
-   * associated values.  The storage is persistent across bundles, and
-   * stored per-key. Specifically, for a given {@code CodedTupleTag<T>},
-   * each key will store a distinct {@code T} value.
-   */
-  @Experimental
-  public interface KeyedState {
-    /**
-     * Updates this {@code KeyedState} in place so that the given tag maps to the given value.
-     *
-     * @throws IOException if encoding the given value fails
-     */
-    public <T> void store(CodedTupleTag<T> tag, T value) throws IOException;
-
-    /**
-     * Removes the data associated with the given tag from {@code KeyedState}.
-     */
-    public <T> void remove(CodedTupleTag<T> tag);
-
-    /**
-     * Returns the value associated with the given tag in this
-     * {@code KeyedState}, or {@code null} if the tag has no asssociated
-     * value.
-     *
-     * <p> See {@link #lookup(Iterable)} to look up multiple tags at
-     * once.  It is significantly more efficient to look up multiple
-     * tags all at once rather than one at a time.
-     *
-     * @throws IOException if decoding the requested value fails
-     */
-    public <T> T lookup(CodedTupleTag<T> tag) throws IOException;
-
-    /**
-     * Returns a map from the given tags to the values associated with
-     * those tags in this {@code KeyedState}.  A tag will map to null if
-     * the tag had no associated value.
-     *
-     * <p> See {@link #lookup(CodedTupleTag)} to look up a single
-     * tag.
-     *
-     * @throws IOException if decoding any of the requested values fails, often
-     * a {@link com.google.cloud.dataflow.sdk.coders.CoderException}.
-     */
-    public CodedTupleTagMap lookup(Iterable<? extends CodedTupleTag<?>> tags) throws IOException;
-  }
-
   public DoFn() {
     this(new HashMap<String, DelegatingAggregator<?, ?>>());
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
index 974bf0bc5cfdd..5e6bbf373d543 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
@@ -17,7 +17,6 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.ExtraContextFactory;
 import com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.FinishBundle;
 import com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.ProcessElement;
@@ -78,19 +77,6 @@ <InputT, OutputT> Object createInstance(
   private static final Map<Class<?>, ExtraContextInfo> EXTRA_PROCESS_CONTEXTS =
       ImmutableMap.<Class<?>, ExtraContextInfo>builder()
       .putAll(EXTRA_CONTEXTS)
-      .put(KeyedState.class, new ExtraContextInfo() {
-        @Override
-        public <InputT, OutputT> Object
-            createInstance(ExtraContextFactory<InputT, OutputT> factory) {
-          return factory.keyedState();
-        }
-
-        @Override
-        public <InputT, OutputT> TypeToken<?>
-            tokenFor(TypeToken<InputT> in, TypeToken<OutputT> out) {
-          return TypeToken.of(KeyedState.class);
-        }
-      })
       .put(BoundedWindow.class, new ExtraContextInfo() {
         @Override
         public <InputT, OutputT> Object
@@ -123,11 +109,6 @@ <InputT, OutputT> Object createInstance(
       })
       .build();
 
-  /**
-   * @return true if the reflected {@link DoFnWithContext} uses Keyed State.
-   */
-  public abstract boolean usesKeyedState();
-
   /**
    * @return true if the reflected {@link DoFnWithContext} uses a Single Window.
    */
@@ -191,11 +172,7 @@ public static DoFnReflector of(
    * Create a {@link DoFn} that the {@link DoFnWithContext}.
    */
   public <InputT, OutputT> DoFn<InputT, OutputT> toDoFn(DoFnWithContext<InputT, OutputT> fn) {
-    if (usesKeyedState() && usesSingleWindow()) {
-      return new WindowAndKeyedStateDoFnAdapter<InputT, OutputT>(this, fn);
-    } else if (usesKeyedState()) {
-      return new KeyedStateDoFnAdapter<InputT, OutputT>(this, fn);
-    } else if (usesSingleWindow()) {
+    if (usesSingleWindow()) {
       return new WindowDoFnAdapter<InputT, OutputT>(this, fn);
     } else {
       return new SimpleDoFnAdapter<InputT, OutputT>(this, fn);
@@ -437,11 +414,6 @@ private static Method findAnnotatedMethod(
       return first;
     }
 
-    @Override
-    public boolean usesKeyedState() {
-      return usesContext(DoFn.KeyedState.class);
-    }
-
     @Override
     public boolean usesSingleWindow() {
       return usesContext(BoundedWindow.class);
@@ -547,13 +519,6 @@ public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant times
       context.sideOutputWithTimestamp(tag, output, timestamp);
     }
 
-    @Override
-    public KeyedState keyedState() {
-      // The DoFnWithContext doesn't allow us to ask for these outside ProcessElements, so this
-      // should be unreachable.
-      throw new UnsupportedOperationException("Can only get keyedState in ProcessElements");
-    }
-
     @Override
     public BoundedWindow window() {
       // The DoFnWithContext doesn't allow us to ask for these outside ProcessElements, so this
@@ -623,11 +588,6 @@ public Instant timestamp() {
       return context.timestamp();
     }
 
-    @Override
-    public KeyedState keyedState() {
-      return context.keyedState();
-    }
-
     @Override
     public BoundedWindow window() {
       return context.window();
@@ -695,15 +655,6 @@ private void readObject(java.io.ObjectInputStream in)
     }
   }
 
-  private static class KeyedStateDoFnAdapter<InputT, OutputT>
-      extends SimpleDoFnAdapter<InputT, OutputT> implements DoFn.RequiresKeyedState {
-
-    private static final long serialVersionUID = 0;
-    private KeyedStateDoFnAdapter(DoFnReflector reflector, DoFnWithContext<InputT, OutputT> fn) {
-      super(reflector, fn);
-    }
-  }
-
   private static class WindowDoFnAdapter<InputT, OutputT>
   extends SimpleDoFnAdapter<InputT, OutputT> implements DoFn.RequiresWindowAccess {
 
@@ -712,15 +663,4 @@ private WindowDoFnAdapter(DoFnReflector reflector, DoFnWithContext<InputT, Outpu
       super(reflector, fn);
     }
   }
-
-  private static class WindowAndKeyedStateDoFnAdapter<InputT, OutputT>
-      extends SimpleDoFnAdapter<InputT, OutputT>
-      implements DoFn.RequiresKeyedState, DoFn.RequiresWindowAccess {
-
-    private static final long serialVersionUID = 0;
-    private WindowAndKeyedStateDoFnAdapter(
-        DoFnReflector reflector, DoFnWithContext<InputT, OutputT> fn) {
-      super(reflector, fn);
-    }
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
index 501e40bdec586..940e76d87d321 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
@@ -23,7 +23,6 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.DelegatingAggregator;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.WindowingInternals;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -279,15 +278,6 @@ protected TypeDescriptor<OutputT> getOutputTypeDescriptor() {
    * {@link ProcessElement}.
    */
   public interface ExtraContextFactory<InputT, OutputT> {
-    /**
-     * Construct the {@link KeyedState} interface for use within a {@link DoFnWithContext} that
-     * needs it. This is called if the {@link ProcessElement} method has a parameter of type
-     * {@link KeyedState}.
-     *
-     * @return {@link KeyedState} interface for interacting with keyed state.
-     */
-    KeyedState keyedState();
-
     /**
      * Construct the {@link BoundedWindow} to use within a {@link DoFnWithContext} that
      * needs it. This is called if the {@link ProcessElement} method has a parameter of type
@@ -327,7 +317,7 @@ public interface ExtraContextFactory<InputT, OutputT> {
    * <ul>
    *   <li>It must have at least one argument.
    *   <li>Its first argument must be a {@link DoFnWithContext.ProcessContext}.
-   *   <li>Its remaining arguments must be {@link KeyedState}, {@link BoundedWindow}, or
+   *   <li>Its remaining arguments must be {@link BoundedWindow}, or
    *   {@link WindowingInternals WindowingInternals&lt;InputT, OutputT&gt;}.
    * </ul>
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
index 7b7967d6aaa61..4ce746a8e686f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
@@ -267,11 +267,6 @@ public InputT element() {
         return context.element();
       }
 
-      @Override
-      public KeyedState keyedState() {
-        return context.keyedState();
-      }
-
       @Override
       public PipelineOptions getPipelineOptions() {
         return context.getPipelineOptions();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index e52e83807c552..02aa8e68e8a67 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -19,15 +19,12 @@
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresKeyedState;
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
-import com.google.cloud.dataflow.sdk.util.TimerOrElement.TimerOrElementCoder;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -545,30 +542,13 @@ public static <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT>
    * properties can be set on it first.
    *
    * <p> {@link DoFnWithContext} is an experimental alternative to
-   * {@link DoFn} which simplifies accessing {@code KeyedState} and
-   * the window of the element.
+   * {@link DoFn} which simplifies accessing the window of the element.
    */
   @Experimental
   public static <InputT, OutputT> Bound<InputT, OutputT> of(DoFnWithContext<InputT, OutputT> fn) {
     return of(adapt(fn));
   }
 
-  private static <InputT> void validateCoder(
-      DoFn<InputT, ?> fn, PCollection<? extends InputT> input) {
-    if (RequiresKeyedState.class.isAssignableFrom(fn.getClass())
-        && !isKvEquivalentCoder(input.getCoder())) {
-      throw new UnsupportedOperationException(
-          "KeyedState is only available in DoFn's with keyed inputs, but input coder "
-          + input.getCoder() + " is not keyed.");
-    }
-  }
-
-  private static boolean isKvEquivalentCoder(Coder<?> coder) {
-    return (coder instanceof KvCoder)
-        || (coder instanceof TimerOrElementCoder
-            && ((TimerOrElementCoder<?>) coder).getElementCoder() instanceof KvCoder);
-  }
-
   /**
    * An incomplete {@code ParDo} transform, with unbound input/output types.
    *
@@ -786,12 +766,6 @@ public DoFn<InputT, OutputT> getFn() {
     public List<PCollectionView<?>> getSideInputs() {
       return sideInputs;
     }
-
-    @Override
-    public void validate(PCollection<? extends InputT> input) {
-      super.validate(input);
-      ParDo.validateCoder(fn, input);
-    }
   }
 
   /**
@@ -1019,12 +993,6 @@ public TupleTag<OutputT> getMainOutputTag() {
     public List<PCollectionView<?>> getSideInputs() {
       return sideInputs;
     }
-
-    @Override
-    public void validate(PCollection<? extends InputT> input) {
-      super.validate(input);
-      ParDo.validateCoder(fn, input);
-    }
   }
 
   /////////////////////////////////////////////////////////////////////////////
@@ -1135,12 +1103,9 @@ private static <InputT, OutputT> DoFnRunner<InputT, OutputT, List> evaluateHelpe
 
     for (DirectPipelineRunner.ValueWithMetadata<? extends InputT> elem
              : context.getPCollectionValuesWithMetadata(input)) {
-      if (doFn instanceof DoFn.RequiresKeyedState) {
-        // If the DoFn needs keyed state, set the implicit keys to the keys in the input elements.
-        if (!(elem.getValue() instanceof KV)) {
-          throw new IllegalStateException(
-              name + " marked as 'RequiresKeyedState' but input elements were not of type KV.");
-        }
+      if (elem.getValue() instanceof KV) {
+        // In case the DoFn needs keyed state, set the implicit keys to the keys
+        // in the input elements.
         executionContext.setKey(((KV) elem.getValue()).getKey());
       } else {
         executionContext.setKey(elem.getKey());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 18223edd6ce36..5a8b7c85b45b9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -17,7 +17,6 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
@@ -82,7 +81,8 @@
  *   with it anywhere in the system. A trigger moves to the executing state as soon as it
  *   processes in the current pane.
  *   <li> Executing - while the trigger is receiving items and may fire. While it is in this state,
- *   it may persist book-keeping information to {@link KeyedState}, set timers, etc.
+ *   it may persist book-keeping information to {@link WindowingInternals.KeyedState},
+ *   set timers, etc.
  *   <li> Finished - after a trigger finishes, all of its book-keeping data is cleaned up, and the
  *   system remembers only that it is finished. Entering this state causes us to discard any
  *   elements in the buffer for that window, as well.
@@ -93,7 +93,7 @@
  *
  * <p> Triggers should not build up any state internally since they may be recreated
  * between invocations of the callbacks. All important values should be persisted to
- * {@link KeyedState} before the callback returns.
+ * {@link WindowingInternals.KeyedState} before the callback returns.
  *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
  *            {@code Trigger}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
index 6180e2b2a3b66..ea27deaa79006 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
@@ -17,7 +17,6 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
 
@@ -36,7 +35,7 @@ public interface Factory<K, InputT, OutputT, W extends BoundedWindow> extends Se
     public AbstractWindowSet<K, InputT, OutputT, W> create(
         K key,
         Coder<W> windowCoder,
-        KeyedState keyedState,
+        WindowingInternals.KeyedState keyedState,
         WindowingInternals<?, ?> windowingInternals) throws Exception;
   }
 
@@ -56,14 +55,14 @@ public static <K, V, W extends BoundedWindow> Factory<K, V, Iterable<V>, W> fact
   protected final K key;
   protected final Coder<W> windowCoder;
   protected final Coder<InputT> inputCoder;
-  protected final KeyedState keyedState;
+  protected final WindowingInternals.KeyedState keyedState;
   protected final WindowingInternals<?, ?> windowingInternals;
 
   protected AbstractWindowSet(
       K key,
       Coder<W> windowCoder,
       Coder<InputT> inputCoder,
-      KeyedState keyedState,
+      WindowingInternals.KeyedState keyedState,
       WindowingInternals<?, ?> windowingInternals) {
     this.key = key;
     this.windowCoder = windowCoder;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
index f6e1bdfc96069..07f691035abfd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
@@ -21,7 +21,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.MapCoder;
 import com.google.cloud.dataflow.sdk.coders.SetCoder;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
@@ -54,7 +53,7 @@ AbstractWindowSet.Factory<K, V, Iterable<V>, W> factory(final Coder<V> inputCode
 
       @Override
       public AbstractWindowSet<K, V, Iterable<V>, W> create(K key,
-          Coder<W> windowCoder, KeyedState keyedState,
+          Coder<W> windowCoder, WindowingInternals.KeyedState keyedState,
           WindowingInternals<?, ?> windowingInternals) throws Exception {
         return new BufferingWindowSet<>(
             key, windowCoder, inputCoder, keyedState, windowingInternals);
@@ -89,7 +88,7 @@ protected BufferingWindowSet(
       K key,
       Coder<W> windowCoder,
       Coder<V> inputCoder,
-      KeyedState keyedState,
+      WindowingInternals.KeyedState keyedState,
       WindowingInternals<?, ?> windowingInternals) throws Exception {
     super(key, windowCoder, inputCoder, keyedState, windowingInternals);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
index 415f35716ca45..87bbdea3b6596 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
@@ -23,7 +23,6 @@
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
@@ -59,7 +58,7 @@ AbstractWindowSet.Factory<K, InputT, OutputT, W> factory(
 
       @Override
       public AbstractWindowSet<K, InputT, OutputT, W> create(K key,
-          Coder<W> windowCoder, KeyedState keyedState,
+          Coder<W> windowCoder, WindowingInternals.KeyedState keyedState,
           WindowingInternals<?, ?> windowingInternals) throws Exception {
         return new CombiningWindowSet<>(
             key, windowCoder, combineFn, keyCoder, inputCoder, keyedState, windowingInternals);
@@ -81,7 +80,7 @@ protected CombiningWindowSet(
       KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn,
       Coder<K> keyCoder,
       Coder<InputT> inputValueCoder,
-      KeyedState keyedState,
+      WindowingInternals.KeyedState keyedState,
       WindowingInternals<?, ?> windowingInternals) throws Exception {
     super(key, windowCoder, inputValueCoder, keyedState, windowingInternals);
     this.combineFn = combineFn;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index c46ba3727ef44..2e824563e311b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -23,8 +23,6 @@
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresKeyedState;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
@@ -397,7 +395,7 @@ protected DoFn<InputT, OutputT>.ProcessContext createProcessContext(WindowedValu
    * @param <InputT> the type of the DoFn's (main) input elements
    * @param <OutputT> the type of the DoFn's (main) output elements
    */
-  private static class DoFnProcessContext<InputT, OutputT>
+  static class DoFnProcessContext<InputT, OutputT>
       extends DoFn<InputT, OutputT>.ProcessContext {
 
 
@@ -448,18 +446,6 @@ public <T> T sideInput(PCollectionView<T> view) {
       return context.sideInput(view, window);
     }
 
-    @Override
-    public KeyedState keyedState() {
-      if (!(fn instanceof RequiresKeyedState)
-          || !equivalentToKV(element())) {
-        throw new UnsupportedOperationException(
-            "Keyed state is only available in the context of a keyed DoFn "
-            + "marked as requiring state");
-      }
-
-      return context.stepContext;
-    }
-
     @Override
     public BoundedWindow window() {
       if (!(fn instanceof RequiresWindowAccess)) {
@@ -588,6 +574,11 @@ public <T> void writePCollectionViewData(
         public <T> void store(CodedTupleTag<T> tag, T value, Instant timestamp) throws IOException {
           context.stepContext.store(tag, value, timestamp);
         }
+
+        @Override
+        public WindowingInternals.KeyedState keyedState() {
+          return context.stepContext;
+        }
       };
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
index bd4e5cfc60ad1..1287d114f1884 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
@@ -17,7 +17,6 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
@@ -103,7 +102,7 @@ public <T, W extends BoundedWindow> void writePCollectionViewData(
   /**
    * Per-step, per-key context used for retrieving state.
    */
-  public abstract class StepContext implements DoFn.KeyedState {
+  public abstract class StepContext implements WindowingInternals.KeyedState {
     private final String stepName;
 
     public StepContext(String stepName) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index f985d154f2ae4..f7789176822aa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -19,7 +19,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresKeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
@@ -39,8 +38,7 @@
  */
 @SuppressWarnings("serial")
 public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends BoundedWindow>
-    extends DoFn<KV<K, Iterable<WindowedValue<InputT>>>, KV<K, OutputT>>
-    implements RequiresKeyedState {
+    extends DoFn<KV<K, Iterable<WindowedValue<InputT>>>, KV<K, OutputT>> {
 
   /**
    * Create a {@link GroupAlsoByWindowsDoFn} without a combine function. Depending on the
@@ -99,7 +97,7 @@ public void processElement(
       K key = c.element().getKey();
       BatchTimerManager timerManager = new BatchTimerManager(Instant.now());
       TriggerExecutor<K, InputT, OutputT, W> triggerExecutor = TriggerExecutor.create(
-          key, strategy, timerManager, windowSetFactory, c.keyedState(), c.windowingInternals());
+          key, strategy, timerManager, windowSetFactory, c.windowingInternals());
 
       for (WindowedValue<InputT> e : c.element().getValue()) {
         // First, handle anything that needs to happen for this element
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedStateCache.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedStateCache.java
index d9aa4d1430711..812bc345116cb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedStateCache.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedStateCache.java
@@ -26,7 +26,6 @@
 import com.google.common.base.Predicate;
 import com.google.common.base.Throwables;
 import com.google.common.cache.LoadingCache;
-import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
@@ -36,6 +35,7 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -275,7 +275,7 @@ private void add(T value, Instant timestamp) {
       added.add(TimestampedValue.of(value, timestamp));
     }
 
-    private Iterable<T> getAddedItems() {
+    private List<T> getAddedItems() {
       List<T> addedItems = Lists.newArrayList();
       for (TimestampedValue<T> item : added) {
         addedItems.add(item.getValue());
@@ -286,7 +286,11 @@ private Iterable<T> getAddedItems() {
     public List<T> mergeWith(List<?> wildcardValue) {
       @SuppressWarnings("unchecked")
       List<T> value = (List<T>) wildcardValue;
-      return ImmutableList.<T>builder().addAll(value).addAll(getAddedItems()).build();
+      List<T> addedItems = getAddedItems();
+      List<T> all = new ArrayList<>(wildcardValue.size() + addedItems.size());
+      all.addAll(value);
+      all.addAll(addedItems);
+      return Collections.unmodifiableList(all);
     }
 
     private void flushTo(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingBufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingBufferingWindowSet.java
index 9f9c41b4b7155..d525021821a35 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingBufferingWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingBufferingWindowSet.java
@@ -19,13 +19,13 @@
 import static com.google.cloud.dataflow.sdk.util.WindowUtils.bufferTag;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.common.collect.ImmutableList;
 
+import java.util.ArrayList;
 import java.util.Collection;
+import java.util.List;
 
 /**
  * A WindowSet where windows are never merged or deleted. This allows us to improve upon the default
@@ -43,7 +43,7 @@ AbstractWindowSet.Factory<K, V, Iterable<V>, W> factory(final Coder<V> inputCode
 
       @Override
       public AbstractWindowSet<K, V, Iterable<V>, W> create(K key,
-          Coder<W> windowFn, KeyedState keyedState,
+          Coder<W> windowFn, WindowingInternals.KeyedState keyedState,
           WindowingInternals<?, ?> windowingInternals) throws Exception {
         return new NonMergingBufferingWindowSet<>(
             key, windowFn, inputCoder, keyedState, windowingInternals);
@@ -55,7 +55,7 @@ private NonMergingBufferingWindowSet(
       K key,
       Coder<W> windowCoder,
       Coder<V> inputCoder,
-      KeyedState keyedState,
+      WindowingInternals.KeyedState keyedState,
       WindowingInternals<?, ?> windowingInternals) {
     super(key, windowCoder, inputCoder, keyedState, windowingInternals);
   }
@@ -100,6 +100,10 @@ protected Iterable<V> finalValue(W window) throws Exception {
 
     // Create a copy here, since otherwise we may return the same list object from readTagList, and
     // that may be mutated later, which would lead to mutation of output values.
-    return ImmutableList.copyOf(result);
+    List<V> copy = new ArrayList<>();
+    for (V item : result) {
+      copy.add(item);
+    }
+    return copy;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 4eed1c54dba7d..f596af6e486d8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -34,7 +34,7 @@
  */
 @SuppressWarnings("serial")
 public abstract class StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends BoundedWindow>
-    extends DoFn<TimerOrElement<KV<K, InputT>>, KV<K, OutputT>> implements DoFn.RequiresKeyedState {
+    extends DoFn<TimerOrElement<KV<K, InputT>>, KV<K, OutputT>> {
 
   public static <K, InputT, AccumT, OutputT, W extends BoundedWindow>
       StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> create(
@@ -75,8 +75,7 @@ private void initForKey(ProcessContext c, K key) throws Exception{
       if (executor == null) {
         TimerManager timerManager = c.windowingInternals().getTimerManager();
         executor = TriggerExecutor.create(
-          key, windowingStrategy, timerManager, windowSetFactory,
-          c.keyedState(), c.windowingInternals());
+          key, windowingStrategy, timerManager, windowSetFactory, c.windowingInternals());
       }
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index f3b317529e50a..b0975c212b858 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -23,7 +23,6 @@
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.coders.StandardCoder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
@@ -79,7 +78,7 @@ public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
   private final AbstractWindowSet<K, InputT, OutputT, W> windowSet;
   private final WindowFn<Object, W> windowFn;
   private final TimerManager timerManager;
-  private final KeyedState keyedState;
+  private final WindowingInternals.KeyedState keyedState;
   private final MergeContext mergeContext;
   private final Coder<TriggerId<W>> triggerIdCoder;
   private final WatermarkHold watermarkHold;
@@ -90,7 +89,7 @@ public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
       WindowFn<Object, W> windowFn,
       TimerManager timerManager,
       ExecutableTrigger<W> trigger,
-      KeyedState keyedState,
+      WindowingInternals.KeyedState keyedState,
       WindowingInternals<?, KV<K, OutputT>> windowingInternals,
       AbstractWindowSet<K, InputT, OutputT, W> windowSet,
       AccumulationMode mode) {
@@ -128,13 +127,15 @@ TriggerExecutor<K, InputT, OutputT, W> create(
       WindowingStrategy<Object, W> windowingStrategy,
       TimerManager timerManager,
       AbstractWindowSet.Factory<K, InputT, OutputT, W> windowSetFactory,
-      KeyedState keyedState, WindowingInternals<?, KV<K, OutputT>> windowingInternals)
+      WindowingInternals<?, KV<K, OutputT>> windowingInternals)
           throws Exception {
     AbstractWindowSet<K, InputT, OutputT, W> windowSet = windowSetFactory.create(
-        key, windowingStrategy.getWindowFn().windowCoder(), keyedState, windowingInternals);
+        key, windowingStrategy.getWindowFn().windowCoder(),
+        windowingInternals.keyedState(), windowingInternals);
     return new TriggerExecutor<K, InputT, OutputT, W>(
         windowingStrategy.getWindowFn(), timerManager, windowingStrategy.getTrigger(),
-        keyedState, windowingInternals, windowSet, windowingStrategy.getMode());
+        windowingInternals.keyedState(), windowingInternals, windowSet,
+        windowingStrategy.getMode());
   }
 
   private TriggerContext<W> context(BitSet finishedSet) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index e1af386cfbd16..7798fcfdad7a4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -24,7 +24,6 @@
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
@@ -236,7 +235,7 @@ public void setTimer(
   }
 
   private class StubContexts
-      implements WindowingInternals<InputT, KV<String, OutputT>>, DoFn.KeyedState {
+      implements WindowingInternals<InputT, KV<String, OutputT>>, WindowingInternals.KeyedState {
 
     private Map<CodedTupleTag<?>, List<?>> tagListValues = new HashMap<>();
     private Map<CodedTupleTag<?>, Object> tagValues = new HashMap<>();
@@ -369,6 +368,11 @@ public <T> void writePCollectionViewData(TupleTag<?> tag, Iterable<WindowedValue
       throw new UnsupportedOperationException(
           "Testing triggers should not use writePCollectionViewData from WindowingInternals.");
     }
+
+    @Override
+    public WindowingInternals.KeyedState keyedState() {
+      return this;
+    }
   }
 
   private class LoggingBatchTimerManager extends BatchTimerManager {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
index 856f536df02eb..22eda5b3fc6bc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
@@ -16,9 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
@@ -40,6 +42,58 @@
  */
 public interface WindowingInternals<InputT, OutputT> {
 
+  /**
+   * {@code KeyedState} maps {@link CodedTupleTag CodedTupleTags} to
+   * associated values.  The storage is persistent across bundles, and
+   * stored per-key. Specifically, for a given {@code CodedTupleTag<T>},
+   * each key will store a distinct {@code T} value.
+   */
+  @Experimental
+  public interface KeyedState {
+    /**
+     * Updates this {@code KeyedState} in place so that the given tag maps to the given value.
+     *
+     * @throws IOException if encoding the given value fails
+     */
+    public <T> void store(CodedTupleTag<T> tag, T value) throws IOException;
+
+    /**
+     * Removes the data associated with the given tag from {@code KeyedState}.
+     */
+    public <T> void remove(CodedTupleTag<T> tag);
+
+    /**
+     * Returns the value associated with the given tag in this
+     * {@code KeyedState}, or {@code null} if the tag has no asssociated
+     * value.
+     *
+     * <p> See {@link #lookup(Iterable)} to look up multiple tags at
+     * once.  It is significantly more efficient to look up multiple
+     * tags all at once rather than one at a time.
+     *
+     * @throws IOException if decoding the requested value fails
+     */
+    public <T> T lookup(CodedTupleTag<T> tag) throws IOException;
+
+    /**
+     * Returns a map from the given tags to the values associated with
+     * those tags in this {@code KeyedState}.  A tag will map to null if
+     * the tag had no associated value.
+     *
+     * <p> See {@link #lookup(CodedTupleTag)} to look up a single
+     * tag.
+     *
+     * @throws IOException if decoding any of the requested values fails, often
+     * a {@link com.google.cloud.dataflow.sdk.coders.CoderException}.
+     */
+    public CodedTupleTagMap lookup(Iterable<? extends CodedTupleTag<?>> tags) throws IOException;
+  }
+
+  /**
+   * Returns the persistent state associated with this key.
+   */
+  public WindowingInternals.KeyedState keyedState();
+
   /**
    * Updates the {@code KeyedState} in place so that the given tag maps to the given value.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
index 58d2028f19bec..02c66a89c4348 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
@@ -23,7 +23,7 @@
  * values associated with the tag.
  *
  * <p> Used as tags in
- * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState}.
+ * {@link com.google.cloud.dataflow.sdk.util.WindowingInternals.KeyedState}.
  *
  * @param <T> the type of the values associated with this tag
  */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 4460849234377..4e734dc951a16 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -53,8 +53,6 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.protobuf.ByteString;
 import com.google.protobuf.TextFormat;
@@ -422,195 +420,6 @@ public void processElement(ProcessContext c) {
                         stripCounters(result.get(1L)));
   }
 
-  static class TestStateFn extends DoFn<KV<String, String>, KV<String, String>>
-      implements DoFn.RequiresKeyedState {
-    private static final long serialVersionUID = 0;
-
-    @Override
-    public void processElement(ProcessContext c) {
-      try {
-        CodedTupleTag<String> stateTag = CodedTupleTag.of("state", StringUtf8Coder.of());
-        CodedTupleTag<String> emptyStateTag =
-            CodedTupleTag.of("other_state", StringUtf8Coder.of());
-        CodedTupleTagMap result =
-            c.keyedState().lookup(Arrays.asList(stateTag, emptyStateTag));
-        Assert.assertNull(result.get(emptyStateTag));
-        String state = result.get(stateTag);
-        state += "-" + c.element().getValue();
-        c.keyedState().store(CodedTupleTag.of("state", StringUtf8Coder.of()), state);
-      } catch (Exception e) {
-        throw new RuntimeException(e);
-      }
-    }
-  }
-
-  @Test public void testState() throws Exception {
-    KvCoder<String, String> kvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
-
-    List<ParallelInstruction> instructions = Arrays.asList(
-        makeSourceInstruction(kvCoder),
-        makeDoFnInstruction(new TestStateFn(), 0, kvCoder),
-        makeSinkInstruction(kvCoder, 1));
-
-    FakeWindmillServer server = new FakeWindmillServer();
-    StreamingDataflowWorker worker = new StreamingDataflowWorker(
-        Arrays.asList(makeMapTask(instructions)), server, createTestingPipelineOptions());
-    worker.start();
-
-    server.addDataToOffer(buildData(
-        "data {" +
-        "  computation_id: \"computation\"" +
-        "  data {" +
-        "    key: \"key0\"" +
-        "    values {" +
-        "      tag: \"5:parDostate\"" +
-        "      value {" +
-        "        timestamp: 0" +
-        "        data: \"key0\"" +
-        "      }" +
-        "    }" +
-        "    values {" +
-        "      tag: \"5:Stagestate\"" +
-        "      value {" +
-        "        timestamp: 1" +
-        "        data: \"key0\"" +
-        "      }" +
-        "    }" +
-        "  }" +
-        "}"));
-
-    server.addWorkToOffer(buildInput(
-        "work {" +
-        "  computation_id: \"computation\"" +
-        "  work {" +
-        "    key: \"key0\"" +
-        "    work_token: 0" +
-        "    message_bundles {" +
-        "      source_computation_id: \"upstream\"" +
-        "      messages {" +
-        "        timestamp: 0" +
-        "        data: \"0\"" +
-        "      }" +
-        "      messages {" +
-        "        timestamp: 1" +
-        "        data: \"1\"" +
-        "      }" +
-        "    }" +
-        "  }" +
-        "}",
-        CoderUtils.encodeToByteArray(CollectionCoder.of(IntervalWindow.getCoder()),
-                                     Arrays.asList(DEFAULT_WINDOW))));
-
-    server.waitForAndGetCommits(1);
-
-    server.addDataToOffer(buildData(
-        "data {" +
-        "  computation_id: \"computation\"" +
-        "  data {" +
-        "    key: \"key1\"" +
-        "    values {" +
-        "      tag: \"5:parDostate\"" +
-        "      value {" +
-        "        timestamp: 0" +
-        "        data: \"key1\"" +
-        "      }" +
-        "    }" +
-        "  }" +
-        "}"));
-
-    server.addWorkToOffer(buildInput(
-        "work {" +
-        "  computation_id: \"computation\"" +
-        "  work {" +
-        "    key: \"key1\"" +
-        "    work_token: 1" +
-        "    message_bundles {" +
-        "      source_computation_id: \"upstream\"" +
-        "      messages {" +
-        "        timestamp: 2" +
-        "        data: \"2\"" +
-        "      }" +
-        "    }" +
-        "  }" +
-        "}",
-        CoderUtils.encodeToByteArray(CollectionCoder.of(IntervalWindow.getCoder()),
-            Arrays.asList(DEFAULT_WINDOW))));
-
-    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
-
-    Assert.assertEquals(buildExpectedOutput(
-        "key: \"key0\" " +
-        "work_token: 0 " +
-        "value_updates {" +
-        "  tag: \"5:parDostate\"" +
-        "  value {" +
-        "    timestamp: 9223372036854775000" +
-        "    data: \"key0-0-1\"" +
-        "  }" +
-        "} " +
-        "counter_updates {" +
-        "  name: \"par_do_output-ElementCount\"" +
-        "  kind: SUM" +
-        "} " +
-        "counter_updates {" +
-        "  name: \"read_output-ElementCount\"" +
-        "  kind: SUM" +
-        "  int_scalar: 2" +
-        "} " +
-        "counter_updates {" +
-        "  name: \"read_output-MeanByteCount\"" +
-        "  kind: MEAN" +
-        "  int_scalar: 70" +
-        "  mean_count: 2" +
-        "} " +
-        "counter_updates {" +
-        "  name: \"sink-ByteCount\"" +
-        "  kind: SUM" +
-        "}" +
-        "counter_updates {" +
-        "  name: \"source-ByteCount\"" +
-        "  kind: SUM" +
-        "  int_scalar: 10" +
-        "} "),
-        stripProcessingTimeCounters(result.get(0L)));
-
-    Assert.assertEquals(buildExpectedOutput(
-        "key: \"key1\" " +
-        "work_token: 1 " +
-        "value_updates {" +
-        "  tag: \"5:parDostate\"" +
-        "  value {" +
-        "    timestamp: 9223372036854775000" +
-        "    data: \"key1-2\"" +
-        "  }" +
-        "}" +
-        "counter_updates {" +
-        "  name: \"par_do_output-ElementCount\"" +
-        "  kind: SUM" +
-        "} " +
-        "counter_updates {" +
-        "  name: \"read_output-ElementCount\"" +
-        "  kind: SUM" +
-        "  int_scalar: 1" +
-        "} " +
-        "counter_updates {" +
-        "  name: \"read_output-MeanByteCount\"" +
-        "  kind: MEAN" +
-        "  int_scalar: 35" +
-        "  mean_count: 1" +
-        "} " +
-        "counter_updates {" +
-        "  name: \"sink-ByteCount\"" +
-        "  kind: SUM" +
-        "} " +
-        "counter_updates {" +
-        "  name: \"source-ByteCount\"" +
-        "  kind: SUM" +
-        "  int_scalar: 5" +
-        "} "),
-        stripProcessingTimeCounters(result.get(1L)));
-  }
-
   static class TestExceptionFn extends DoFn<String, String> {
     private static final long serialVersionUID = 0;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflectorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflectorTest.java
index 82998bd1ccf30..26d93c4ffca19 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflectorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflectorTest.java
@@ -20,7 +20,6 @@
 import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertTrue;
 
-import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.Context;
 import com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.ExtraContextFactory;
 import com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.ProcessContext;
@@ -56,9 +55,6 @@ public class DoFnReflectorTest {
 
   @Mock
   private DoFnWithContext<String, String>.ProcessContext mockContext;
-
-  @Mock
-  private KeyedState mockKeyedState;
   @Mock
   private BoundedWindow mockWindow;
   @Mock
@@ -70,11 +66,6 @@ public class DoFnReflectorTest {
   public void setUp() {
     MockitoAnnotations.initMocks(this);
     this.extraContextFactory = new ExtraContextFactory<String, String>() {
-      @Override
-      public KeyedState keyedState() {
-        return mockKeyedState;
-      }
-
       @Override
       public BoundedWindow window() {
         return mockWindow;
@@ -124,7 +115,6 @@ public void processElement(ProcessContext c)
       }
     });
 
-    assertFalse(reflector.usesKeyedState());
     assertFalse(reflector.usesSingleWindow());
 
     checkInvokeProcessElementWorks(reflector);
@@ -153,7 +143,6 @@ public void processElement(DoFnWithContext<String, String>.ProcessContext c) {
   @Test
   public void testDoFnWithProcessElementInterface() throws Exception {
     DoFnReflector reflector = underTest(new IdentityUsingInterfaceWithProcessElement());
-    assertFalse(reflector.usesKeyedState());
     assertFalse(reflector.usesSingleWindow());
     checkInvokeProcessElementWorks(reflector);
   }
@@ -175,7 +164,6 @@ private class IdentityChild extends IdentityParent {
   @Test
   public void testDoFnWithMethodInSuperclass() throws Exception {
     DoFnReflector reflector = underTest(new IdentityChild());
-    assertFalse(reflector.usesKeyedState());
     assertFalse(reflector.usesSingleWindow());
     checkInvokeProcessElementWorks(reflector);
   }
@@ -195,29 +183,6 @@ public void processElement(ProcessContext c, BoundedWindow w)
       }
     });
 
-    assertFalse(reflector.usesKeyedState());
-    assertTrue(reflector.usesSingleWindow());
-
-    checkInvokeProcessElementWorks(reflector);
-  }
-
-  @Test
-  public void testDoFnWithWindowAndKeyedState() throws Exception {
-    DoFnReflector reflector = underTest(new DoFnWithContext<String, String>() {
-
-      private static final long serialVersionUID = 0;
-
-      @ProcessElement
-      public void processElement(ProcessContext c, BoundedWindow w, KeyedState k)
-          throws Exception {
-        wasProcessElementInvoked = true;
-        assertSame(c, mockContext);
-        assertSame(w, mockWindow);
-        assertSame(k, mockKeyedState);
-      }
-    });
-
-    assertTrue(reflector.usesKeyedState());
     assertTrue(reflector.usesSingleWindow());
 
     checkInvokeProcessElementWorks(reflector);
@@ -238,7 +203,6 @@ public void processElement(ProcessContext c, WindowingInternals<String, String>
       }
     });
 
-    assertFalse(reflector.usesKeyedState());
     assertFalse(reflector.usesSingleWindow());
 
     checkInvokeProcessElementWorks(reflector);
@@ -434,7 +398,7 @@ public void testBadExtraProcessContextType() throws Exception {
     thrown.expectMessage(
         "Integer is not a valid context parameter for method "
         + getClass().getName() + "#badExtraProcessContext(ProcessContext, Integer)"
-        + ". Should be one of [BoundedWindow, KeyedState, WindowingInternals<Integer, String>]");
+        + ". Should be one of [BoundedWindow, WindowingInternals<Integer, String>]");
 
     DoFnReflector.verifyProcessMethodArguments(
         getClass().getDeclaredMethod("badExtraProcessContext",
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/NoOpDoFn.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/NoOpDoFn.java
index c90c22744d314..13cc9e05efd28 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/NoOpDoFn.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/NoOpDoFn.java
@@ -96,11 +96,6 @@ public <T> T sideInput(PCollectionView<T> view) {
       return null;
     }
 
-    @Override
-    public com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState keyedState() {
-      return null;
-    }
-
     @Override
     public Instant timestamp() {
       return null;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 0cd463085ebea..2337d54075eb0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -33,18 +33,14 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
-import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -249,41 +245,6 @@ public void finishBundle(Context c) {
     }
   }
 
-  /**
-   * Output the keys that have appeared at least three times.
-   */
-  static class TestKeyedStateCountAtLeastThreeDoFn
-      extends DoFn<KV<String, Integer>, String> implements DoFn.RequiresKeyedState{
-    @Override
-    public void processElement(ProcessContext c) throws IOException {
-      String key = c.element().getKey();
-      CodedTupleTag<Long> tag = CodedTupleTag.of(key, BigEndianLongCoder.of());
-      Long result = c.keyedState().lookup(tag);
-      long count = result == null ? 0 : result;
-      c.keyedState().store(tag, ++count);
-      if (count == 3) {
-        c.output(key);
-      }
-    }
-  }
-
-  static class TestUnexpectedKeyedStateDoFn extends DoFn<KV<String, Integer>, String> {
-    @Override
-    public void processElement(ProcessContext c) {
-      // Will fail since this DoFn doesn't implement RequiresKeyedState.
-      c.keyedState();
-    }
-  }
-
-  static class TestKeyedStateDoFnWithNonKvInput
-      extends DoFn<Integer, String> implements DoFn.RequiresKeyedState {
-    @Override
-    public void processElement(ProcessContext c) {
-      // Will fail since this DoFn's input isn't KV.
-      c.keyedState();
-    }
-  }
-
   private static class StrangelyNamedDoer extends DoFn<Integer, String> {
     @Override
     public void processElement(ProcessContext c) {
@@ -640,118 +601,6 @@ public void testParDoWithErrorInFinishBatch() {
     p.run();
   }
 
-  @Test
-  @Category(RunnableOnService.class)
-  public void testParDoKeyedState() {
-    Pipeline p = TestPipeline.create();
-
-    List<String> inputs = Arrays.asList(
-        "A", "A", "B", "C", "B", "A", "D", "D", "D", "D");
-
-    PCollection<String> output =
-        p.apply(Create.of(inputs))
-         .apply(ParDo.named("ToKv")
-                     .of(new DoFn<String, KV<String, Integer>>() {
-                         @Override
-                         public void processElement(ProcessContext c) {
-                           c.output(KV.of(c.element(), 1));
-                         }
-                     }))
-     .apply(ParDo.of(new TestKeyedStateCountAtLeastThreeDoFn()));
-
-    DataflowAssert.that(output).containsInAnyOrder("A", "D");
-    p.run();
-  }
-
-
-  @Test
-  @Category(RunnableOnService.class)
-  public void testParDoKeyedState2() {
-    Pipeline p = TestPipeline.create();
-
-    List<String> inputs = Arrays.asList(
-        "A", "A", "B", "C", "B", "A", "D", "D", "D", "D");
-
-    PCollection<String> output =
-        p.apply(Create.of(inputs))
-         .apply(ParDo.named("ToKv")
-                     .of(new DoFn<String, KV<String, Integer>>() {
-                         @Override
-                         public void processElement(ProcessContext c) {
-                           c.output(KV.of(c.element(), 1));
-                         }
-                     }))
-     .apply(ParDo.of(new DoFnWithContext<KV<String, Integer>, String>() {
-       @ProcessElement
-       public void processElement(ProcessContext c, KeyedState keyedState) throws IOException {
-         String key = c.element().getKey();
-         CodedTupleTag<Long> tag = CodedTupleTag.of(key, BigEndianLongCoder.of());
-         Long result = keyedState.lookup(tag);
-         long count = result == null ? 0 : result;
-         keyedState.store(tag, ++count);
-         if (count == 3) {
-           c.output(key);
-         }
-       }
-     }));
-
-    DataflowAssert.that(output).containsInAnyOrder("A", "D");
-    p.run();
-  }
-
-  @Test
-  public void testParDoWithUnexpectedKeyedState() {
-    Pipeline p = TestPipeline.create();
-
-    List<KV<String, Integer>> inputs = Arrays.asList(
-        KV.of("a", 1));
-
-    PCollection<KV<String, Integer>> input = p.apply(Create.of(inputs));
-
-    input
-        .apply(ParDo.of(new TestUnexpectedKeyedStateDoFn()));
-
-    thrown.expect(RuntimeException.class);
-    thrown.expectMessage("Keyed state is only available");
-    p.run();
-  }
-
-  @Test
-  public void testParDoKeyedStateDoFnWithNonKvInput() {
-    Pipeline p = TestPipeline.create();
-
-    List<Integer> inputs = Arrays.asList(3, -42, 666);
-
-    PCollection<Integer> input = createInts(p, inputs);
-
-    thrown.expect(RuntimeException.class);
-    thrown.expectMessage(
-        "KeyedState is only available in DoFn's with keyed inputs, but "
-        + "input coder BigEndianIntegerCoder is not keyed.");
-
-    input.apply(ParDo.of(new TestKeyedStateDoFnWithNonKvInput())).finishSpecifying();
-  }
-
-  @Test
-  public void testParDoKeyedStateDoFnWithContextWithNonKvInput() {
-    Pipeline p = TestPipeline.create();
-
-    List<Integer> inputs = Arrays.asList(3, -42, 666);
-
-    PCollection<Integer> input = createInts(p, inputs);
-
-    thrown.expect(RuntimeException.class);
-    thrown.expectMessage(
-        "KeyedState is only available in DoFn's with keyed inputs, but "
-            + "input coder BigEndianIntegerCoder is not keyed.");
-
-    input.apply(ParDo.of(new DoFnWithContext<Integer, String>() {
-      @SuppressWarnings("unused")
-      @ProcessElement
-      public void process(ProcessContext c, KeyedState keyedState) {}
-    })).finishSpecifying();
-  }
-
   @Test
   public void testParDoName() {
     Pipeline p = TestPipeline.create();
@@ -1278,7 +1127,7 @@ public void finishBundle(Context c) {
   public void testWindowingInStartBundleException() {
     Pipeline p = TestPipeline.create();
 
-    PCollection<String> output = p
+    p
         .apply(Create.timestamped(TimestampedValue.of("elem", new Instant(1))))
         .apply(Window.<String>into(FixedWindows.of(Duration.millis(1))))
         .apply(ParDo.of(new DoFn<String, String>() {

From 3f9793aafd423806b485ac52d00e5a94aa67a3d9 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 28 May 2015 10:29:50 -0700
Subject: [PATCH 0594/1541] Get security fixes from newer version of Jetty

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94679509
---
 sdk/pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 8e8fc73c2ea84..e253112a2d736 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -515,13 +515,13 @@
     <dependency>
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-server</artifactId>
-      <version>9.2.6.v20141205</version>
+      <version>9.2.10.v20150310</version>
     </dependency>
 
     <dependency>
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-jmx</artifactId>
-      <version>9.2.6.v20141205</version>
+      <version>9.2.10.v20150310</version>
     </dependency>
 
     <dependency>

From ca847daeeb98c5171a64e74effbb8ebe1debebe7 Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Thu, 28 May 2015 14:43:40 -0700
Subject: [PATCH 0595/1541] Fix JDK8-specific Javadoc errors

This fixes #28 GitHub issue.
---
 .../cloud/dataflow/sdk/transforms/windowing/Trigger.java      | 4 ++--
 .../google/cloud/dataflow/sdk/values/CodedTupleTagMap.java    | 3 ---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 5a8b7c85b45b9..fcaaf3030f96f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -81,7 +81,7 @@
  *   with it anywhere in the system. A trigger moves to the executing state as soon as it
  *   processes in the current pane.
  *   <li> Executing - while the trigger is receiving items and may fire. While it is in this state,
- *   it may persist book-keeping information to {@link WindowingInternals.KeyedState},
+ *   it may persist book-keeping information to {@code WindowingInternals.KeyedState},
  *   set timers, etc.
  *   <li> Finished - after a trigger finishes, all of its book-keeping data is cleaned up, and the
  *   system remembers only that it is finished. Entering this state causes us to discard any
@@ -93,7 +93,7 @@
  *
  * <p> Triggers should not build up any state internally since they may be recreated
  * between invocations of the callbacks. All important values should be persisted to
- * {@link WindowingInternals.KeyedState} before the callback returns.
+ * {@code WindowingInternals.KeyedState} before the callback returns.
  *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
  *            {@code Trigger}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java
index b6a82f2bc7d26..af5c95263e384 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java
@@ -21,9 +21,6 @@
 
 /**
  * A mapping of {@link CodedTupleTag}s to associated values.
- *
- * <p> Returned by
- * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.KeyedState#lookup(java.lang.Iterable)}.
  */
 public class CodedTupleTagMap {
 

From 6e3291178be0ede7845e5cbe9c7a967539a22a18 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Thu, 28 May 2015 15:42:33 -0700
Subject: [PATCH 0596/1541] Improve Trigger interaction with the Watermark

1. Triggered output will have a timestmap computed from
   MIN(MIN(non-late-elements), window.maxTimestmap())
2. Update the watermark hold accordingly -- either the
   MIN(non-late-elements) or window.maxTimestamp() + allowedLateness
   to ensure we don't make data dropably late.
3. Trigger execution will automatically close windows when they expire.
   The expiration time is computed from the window.maxTimestamp() +
   allowedLateness.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94709678
---
 .../dataflow/sdk/util/BatchTimerManager.java  |   9 ++
 .../util/StreamingModeExecutionContext.java   |   9 +-
 .../cloud/dataflow/sdk/util/TimerManager.java |   7 +-
 .../dataflow/sdk/util/TriggerExecutor.java    | 130 ++++++++++++++----
 .../dataflow/sdk/util/TriggerTester.java      |  28 ++--
 .../worker/StreamingDataflowWorkerTest.java   |  35 +++--
 .../transforms/windowing/AfterAllTest.java    |  22 +--
 .../transforms/windowing/AfterEachTest.java   |  23 ++--
 .../transforms/windowing/AfterFirstTest.java  |  18 +--
 .../transforms/windowing/AfterPaneTest.java   |  19 +--
 .../windowing/AfterProcessingTimeTest.java    |  10 +-
 .../windowing/AfterWatermarkTest.java         |  28 ++--
 .../windowing/DefaultTriggerTest.java         |  26 ++--
 .../transforms/windowing/RepeatedlyTest.java  |  14 +-
 .../sdk/transforms/windowing/TriggerTest.java |  25 ++--
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  |  22 ++-
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |   5 +
 .../sdk/util/TriggerExecutorTest.java         | 121 ++++++++++++++--
 18 files changed, 394 insertions(+), 157 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
index aab96798bf08c..cbcd4387389af 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 
 import org.joda.time.Instant;
@@ -36,6 +37,7 @@ public class BatchTimerManager implements TimerManager {
   private PriorityQueue<BatchTimerManager.BatchTimer> processingTimers = new PriorityQueue<>(11);
   private Map<String, BatchTimerManager.BatchTimer> processingTagToTimer = new HashMap<>();
 
+  private Instant watermarkTime;
   private Instant processingTime;
 
   private PriorityQueue<BatchTimerManager.BatchTimer> queue(TimerManager.TimeDomain domain) {
@@ -49,6 +51,7 @@ private Map<String, BatchTimer> map(TimeDomain domain) {
 
   public BatchTimerManager(Instant processingTime) {
     this.processingTime = processingTime;
+    this.watermarkTime = BoundedWindow.TIMESTAMP_MIN_VALUE;
   }
 
   @Override
@@ -72,6 +75,11 @@ public Instant currentProcessingTime() {
     return processingTime;
   }
 
+  @Override
+  public Instant currentWatermarkTime() {
+    return watermarkTime;
+  }
+
   @Override
   public String toString() {
     StringBuilder builder = new StringBuilder("BatchTimerManager [");
@@ -88,6 +96,7 @@ public String toString() {
   public void advanceWatermark(TriggerExecutor<?, ?, ?, ?> triggerExecutor, Instant newWatermark)
       throws Exception {
     advance(triggerExecutor, newWatermark, TimeDomain.EVENT_TIME);
+    this.watermarkTime = newWatermark;
   }
 
   public void advanceProcessingTime(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index ee11ad16ddb7d..b6d93c75f6bde 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -98,6 +98,11 @@ public void deleteTimer(String timer, TimeDomain domain) {
       public Instant currentProcessingTime() {
         return Instant.now();
       }
+
+      @Override
+      public Instant currentWatermarkTime() {
+        return inputDataWatermark;
+      }
     };
   }
 
@@ -211,10 +216,6 @@ public void addBlockingSideInputs(Iterable<Windmill.GlobalDataRequest> sideInput
     }
   }
 
-  public Instant getInputDataWatermark() {
-    return inputDataWatermark;
-  }
-
   public ByteString getSerializedKey() {
     return work.getKey();
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java
index ca56f99444493..537a07c2b8da1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java
@@ -57,7 +57,12 @@ public enum TimeDomain {
   void deleteTimer(String timer, TimeDomain domain);
 
   /**
-   * @return the current timestamp in the {@link TimeDomain#PROCESSING_TIME} time domain.
+   * Returns the current timestamp in the {@link TimeDomain#PROCESSING_TIME} time domain.
    */
   Instant currentProcessingTime();
+
+  /**
+   * Returns an estimate of the current timestamp in the {@link TimeDomain#EVENT_TIME} time domain.
+   */
+  Instant currentWatermarkTime();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index b0975c212b858..0724882aeae26 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -48,6 +48,7 @@
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Iterables;
 
+import org.joda.time.Duration;
 import org.joda.time.Instant;
 
 import java.io.IOException;
@@ -73,18 +74,21 @@
  */
 public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
 
+  private static final int FINAL_CLEANUP_PSEUDO_ID = -1;
+
+  private final WindowFn<Object, W> windowFn;
   private final ExecutableTrigger<W> trigger;
+  private AccumulationMode mode;
+  private Duration allowedLateness;
+
   private final WindowingInternals<?, KV<K, OutputT>> windowingInternals;
   private final AbstractWindowSet<K, InputT, OutputT, W> windowSet;
-  private final WindowFn<Object, W> windowFn;
   private final TimerManager timerManager;
   private final WindowingInternals.KeyedState keyedState;
   private final MergeContext mergeContext;
   private final Coder<TriggerId<W>> triggerIdCoder;
   private final WatermarkHold watermarkHold;
 
-  private AccumulationMode mode;
-
   TriggerExecutor(
       WindowFn<Object, W> windowFn,
       TimerManager timerManager,
@@ -92,7 +96,8 @@ public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
       WindowingInternals.KeyedState keyedState,
       WindowingInternals<?, KV<K, OutputT>> windowingInternals,
       AbstractWindowSet<K, InputT, OutputT, W> windowSet,
-      AccumulationMode mode) {
+      AccumulationMode mode,
+      Duration allowedLateness) {
     this.windowFn = windowFn;
     this.trigger = trigger;
     this.keyedState = keyedState;
@@ -103,6 +108,7 @@ public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
     this.mergeContext = new MergeContext();
     this.triggerIdCoder = new TriggerIdCoder<>(windowFn.windowCoder());
     this.watermarkHold = new WatermarkHold();
+    this.allowedLateness = allowedLateness;
   }
 
   private boolean isRootFinished(BitSet bitSet) {
@@ -135,7 +141,7 @@ TriggerExecutor<K, InputT, OutputT, W> create(
     return new TriggerExecutor<K, InputT, OutputT, W>(
         windowingStrategy.getWindowFn(), timerManager, windowingStrategy.getTrigger(),
         windowingInternals.keyedState(), windowingInternals, windowSet,
-        windowingStrategy.getMode());
+        windowingStrategy.getMode(), windowingStrategy.getAllowedLateness());
   }
 
   private TriggerContext<W> context(BitSet finishedSet) {
@@ -166,7 +172,17 @@ private void warmUpCache(Iterable<W> windows) throws IOException {
     keyedState.lookup(tags);
   }
 
+  private TriggerId<W> cleanupTimer(W window) {
+    return new TriggerId<W>(window, FINAL_CLEANUP_PSEUDO_ID);
+  }
+
   public void onElement(WindowedValue<InputT> value) throws Exception {
+    Instant minimumAllowedTimestamp = timerManager.currentWatermarkTime().minus(allowedLateness);
+    if (minimumAllowedTimestamp.isAfter(value.getTimestamp())) {
+      // TODO: Count the number of elements discarded because they are too late.
+      return;
+    }
+
     @SuppressWarnings("unchecked")
     Collection<W> windows = (Collection<W>) value.getWindows();
 
@@ -176,12 +192,23 @@ public void onElement(WindowedValue<InputT> value) throws Exception {
       BitSet finishedSet = lookupFinishedSet(window);
       if (isRootFinished(finishedSet)) {
         // If the trigger was already finished in that window, don't bother passing the element down
-        // TODO: Log the fact that we're discarding an element for a closed window.
+        // TODO: Count the number of elements discarded because the window is closed.
         continue;
       }
 
       WindowStatus status = windowSet.put(window, value.getValue());
-      watermarkHold.updateHoldForElement(window, value.getTimestamp());
+      if (status != WindowStatus.EXISTING) {
+        // Set the timer for final cleanup. We add an extra millisecond since
+        // maxTimestamp will be the maximum timestamp in the window, and we
+        // want the maximum timestamp of an element outside the window.
+        Instant cleanupTime = window.maxTimestamp()
+            .plus(allowedLateness)
+            .plus(Duration.millis(1));
+        setTimer(cleanupTimer(window), cleanupTime, TimeDomain.EVENT_TIME);
+      }
+
+      watermarkHold.updateHoldForElement(window, value.getTimestamp(),
+          value.getTimestamp().isBefore(timerManager.currentWatermarkTime()));
 
       BitSet originalFinishedSet = (BitSet) finishedSet.clone();
       OnElementEvent<W> e =
@@ -210,6 +237,21 @@ public void onTimer(String timerTag) throws Exception {
     W window = triggerId.window();
     BitSet finishedSet = lookupFinishedSet(window);
 
+    if (triggerId.getTriggerIdx() == FINAL_CLEANUP_PSEUDO_ID) {
+      // If there are pending elements in the pane, emit it:
+      if (watermarkHold.holdingForElements(window)) {
+        if (mergeIfAppropriate(window)) {
+          emitWindow(window);
+        }
+      }
+
+      // Perform final cleanup.
+      windowSet.remove(window);
+      trigger.invokeClear(context(finishedSet), window);
+      keyedState.remove(finishedSetTag(window));
+      return;
+    }
+
     // If we receive a timer for an already finished trigger tree, we can ignore it. Once the
     // trigger is finished, it has reached a terminal state, and the trigger shouldn't be allowed
     // to do anything.
@@ -297,9 +339,6 @@ private void handleResult(
       BitSet originalFinishedSet, BitSet finishedSet, TriggerResult result) throws Exception {
     if (result.isFire()) {
       emitWindow(window);
-
-      // Clear the hold each time we fire, so that the hold is based on elements in the pane.
-      watermarkHold.clearHold(window);
     }
 
     if (result.isFinish()
@@ -320,6 +359,11 @@ private void handleResult(
   }
 
   private void emitWindow(W window) throws Exception {
+    if (!watermarkHold.holdingForElements(window)) {
+      // No elements to emit.
+      return;
+    }
+
     OutputT finalValue = windowSet.finalValue(window);
 
     // If there were any contents to output in the window, do so.
@@ -329,8 +373,10 @@ private void emitWindow(W window) throws Exception {
 
       // Output the windowed value.
       windowingInternals.outputWindowedValue(
-          value, watermarkHold.lookupEarliestElement(window), Arrays.asList(window));
+          value, watermarkHold.timestampToEmit(window), Arrays.asList(window));
     }
+
+    watermarkHold.clearHold(window);
   }
 
   @VisibleForTesting void setTimer(TriggerId<W> triggerId, Instant timestamp, TimeDomain domain)
@@ -344,30 +390,52 @@ private void emitWindow(W window) throws Exception {
   }
 
   /**
-   * Helper class for managing the keyed state that tracks the earliest element in the active pane,
-   * and holds up the watermark accordingly.
+   * Manages the time to which the watermark is held, specifically, we hold it to earliest non-late
+   * timestamp as elements arrive.
+   *
+   * <p>When windows merge, {@link #updateHoldForMerge} determines the earliest non-late element
+   * across all those windows.
    */
   private class WatermarkHold {
 
-    public Instant lookupEarliestElement(W window) throws IOException {
-      // Normally, output at the earliest element in the pane.
-      // If the pane is empty, window.maxTimestamp.
+    /**
+     * Return true if there is an active hold for elements in the given window.
+     */
+    public boolean holdingForElements(W window) throws IOException {
+      return keyedState.lookup(earliestElementTag(window)) != null;
+    }
+
+    /**
+     * Determine the timestamp to emit the current values at.
+     */
+    public Instant timestampToEmit(W window) throws IOException {
+      // Normally, output at the earliest non-late element in the pane.
+      // If the pane is empty or all the elements were late, output at window.maxTimestamp().
       Instant earliest = keyedState.lookup(earliestElementTag(window));
-      return earliest == null ? window.maxTimestamp() : earliest;
+      return earliest == null || earliest.isAfter(window.maxTimestamp())
+          ? window.maxTimestamp() : earliest;
     }
 
-    public void updateHoldForElement(W window, Instant timestamp) throws IOException {
+    public void updateHoldForElement(
+        W window, Instant timestamp, boolean wasLate) throws IOException {
       CodedTupleTag<Instant> earliestElementTag = earliestElementTag(window);
-      Instant oldHold = keyedState.lookup(earliestElementTag);
-      if (oldHold == null || oldHold.isAfter(timestamp)) {
-        windowingInternals.store(earliestElementTag(window), timestamp, timestamp);
+      Instant earliestElement = keyedState.lookup(earliestElementTag);
+
+      if (earliestElement == null && wasLate) {
+        // If the element was late, then we want to put a hold in at the maxTimestamp for the end
+        // of the window plus the allowed lateness to ensure that we don't output something
+        // that is dropably late.
+        earliestElement = window.maxTimestamp().plus(allowedLateness);
+      } else if (earliestElement == null
+          || (!wasLate && timestamp.isBefore(earliestElement))) {
+        earliestElement = timestamp;
       }
+      windowingInternals.store(earliestElementTag, earliestElement, earliestElement);
     }
 
-    public void updateHoldForMerge(Iterable<W> oldWindows, W newWindow) throws IOException {
-      Instant earliestElement = BoundedWindow.TIMESTAMP_MAX_VALUE;
-      Iterable<Instant> instants = lookupKeyedState(
-          oldWindows, new Function<W, CodedTupleTag<Instant>>() {
+    public void updateHoldForMerge(Iterable<W> mergingWindows, W newWindow) throws IOException {
+      Iterable<Instant> mergingEarliestElements = lookupKeyedState(
+          mergingWindows, new Function<W, CodedTupleTag<Instant>>() {
         @Override
         public CodedTupleTag<Instant> apply(W window) {
           try {
@@ -378,12 +446,16 @@ public CodedTupleTag<Instant> apply(W window) {
         }
       }).values();
 
-      for (Instant old : instants) {
-        if (old != null && old.isBefore(earliestElement)) {
-          earliestElement = old;
+      // If any of the merging windows had a hold, we should too.
+      // That hold should be at the earliest hold that any of the merging windows had in place.
+      Instant result = newWindow.maxTimestamp().plus(allowedLateness);
+      for (Instant earliestElement : mergingEarliestElements) {
+        if (earliestElement != null && result.isAfter(earliestElement)) {
+          result = earliestElement;
         }
       }
-      windowingInternals.store(earliestElementTag(newWindow), earliestElement, earliestElement);
+
+      windowingInternals.store(earliestElementTag(newWindow), result, result);
     }
 
     public void clearHold(W window) throws IOException {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 7798fcfdad7a4..418ff27aa7c49 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -40,6 +40,7 @@
 import com.google.common.collect.FluentIterable;
 import com.google.common.collect.ImmutableList;
 
+import org.joda.time.Duration;
 import org.joda.time.Instant;
 
 import java.io.IOException;
@@ -59,7 +60,8 @@
 
 /**
  * Test utility that runs a {@link WindowFn}, {@link Trigger} and {@link AbstractWindowSet} using
- * stub implementations of everything under the hood.
+ * in-memory stub implementations to provide the {@link TimerManager}, {@link KeyedState}, and
+ * {@link WindowingInternals} needed by {@link TriggerExecutor}.
  *
  * <p>To have all interactions between the trigger and underlying components logged, call
  * {@link #logInteractions(boolean)}.
@@ -94,7 +96,8 @@ private void logInteraction(String fmt, Object... args) {
   }
 
   public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>, W> nonCombining(
-      WindowFn<?, W> windowFn, Trigger<W> trigger, AccumulationMode mode) throws Exception {
+      WindowFn<?, W> windowFn, Trigger<W> trigger, AccumulationMode mode,
+      Duration allowedDataLateness) throws Exception {
     @SuppressWarnings("unchecked")
     WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
 
@@ -104,14 +107,17 @@ public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>
         AbstractWindowSet.<String, Integer, W>factoryFor(strategy, VarIntCoder.of());
 
     return new TriggerTester<Integer, Iterable<Integer>, W>(
-        objectWindowFn, trigger, windowSetFactory, mode, IterableCoder.of(VarIntCoder.of()));
+        objectWindowFn, trigger, windowSetFactory, mode,
+        IterableCoder.of(VarIntCoder.of()),
+        allowedDataLateness);
   }
 
   public static <W extends BoundedWindow, AccumT, OutputT>
       TriggerTester<Integer, OutputT, W> combining(
           WindowFn<?, W> windowFn, Trigger<W> trigger, AccumulationMode mode,
           KeyedCombineFn<String, Integer, AccumT, OutputT> combineFn,
-          Coder<OutputT> outputCoder) throws Exception {
+          Coder<OutputT> outputCoder,
+          Duration allowedDataLateness) throws Exception {
     @SuppressWarnings("unchecked")
     WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
 
@@ -120,7 +126,7 @@ TriggerTester<Integer, OutputT, W> combining(
             combineFn, StringUtf8Coder.of(), VarIntCoder.of());
 
     return new TriggerTester<Integer, OutputT, W>(
-        objectWindowFn, trigger, windowSetFactory, mode, outputCoder);
+        objectWindowFn, trigger, windowSetFactory, mode, outputCoder, allowedDataLateness);
   }
 
   private TriggerTester(
@@ -128,7 +134,8 @@ private TriggerTester(
       Trigger<W> trigger,
       AbstractWindowSet.Factory<String, InputT, OutputT, W> windowSetFactory,
       AccumulationMode mode,
-      Coder<OutputT> outputCoder) throws Exception {
+      Coder<OutputT> outputCoder,
+      Duration allowedDataLateness) throws Exception {
     this.windowFn = windowFn;
     this.stubContexts = new StubContexts();
     this.windowSet = windowSetFactory.create(
@@ -136,7 +143,8 @@ private TriggerTester(
     this.outputCoder = outputCoder;
     executableTrigger = ExecutableTrigger.create(trigger);
     this.triggerExecutor = new TriggerExecutor<>(
-        windowFn, timerManager, executableTrigger, stubContexts, stubContexts, windowSet, mode);
+        windowFn, timerManager, executableTrigger, stubContexts, stubContexts, windowSet, mode,
+        allowedDataLateness);
   }
 
   public ExecutableTrigger<W> getTrigger() {
@@ -147,7 +155,7 @@ public void logInteractions(boolean logInteractions) {
     this.logInteractions = logInteractions;
   }
 
-  public boolean isDone(W window) throws IOException {
+  public boolean isMarkedFinished(W window) throws IOException {
     return triggerExecutor.lookupFinishedSet(window).get(0);
   }
 
@@ -171,6 +179,10 @@ public String earliestElement(W window) throws CoderException {
     return triggerExecutor.earliestElementTag(window).getId();
   }
 
+  public Instant getWatermarkHold() {
+    return stubContexts.minTagTimestamp.peek();
+  }
+
   public boolean isWindowActive(W window) throws IOException {
     return stubContexts.getKeyedStateInUse()
         .contains(WindowUtils.bufferTag(window, windowFn.windowCoder(), VarIntCoder.of()).getId());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 4e734dc951a16..c9852bf509528 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -659,6 +659,7 @@ public void processElement(ProcessContext c) {
     server.addWorkToOffer(buildInput(
         "work {" +
         "  computation_id: \"computation\"" +
+        "  input_data_watermark: 0" +
         "  work {" +
         "    key: \"key\"" +
         "    work_token: 0" +
@@ -691,6 +692,11 @@ public void processElement(ProcessContext c) {
         "key: \"key\" " +
         "work_token: 0 " +
         "output_timers {" +
+        "  tag: \"gAAAAAAAAAD/////Dw==\"" +
+        "  timestamp: 1000000" +
+        "  type: WATERMARK" +
+        "}" +
+        "output_timers {" +
         "  tag: \"gAAAAAAAAAAA\"" +
         "  timestamp: 999000" +
         "  type: WATERMARK" +
@@ -714,6 +720,7 @@ public void processElement(ProcessContext c) {
     server.addWorkToOffer(buildTimerInput(
         "work {" +
         "  computation_id: \"computation\"" +
+        "  input_data_watermark: 0" +
         "  work {" +
         "    key: \"key\"" +
         "    work_token: 1" +
@@ -725,6 +732,20 @@ public void processElement(ProcessContext c) {
         "    }" +
         "  }" +
         "}"));
+    server.addDataToOffer(buildData(
+        "data {" +
+            "  computation_id: \"computation\"" +
+            "  data {" +
+            "    key: \"key\"" +
+            "    values {" +
+            "      tag: \"12:MergeWindowsgAAAAAAAAAA=earliest-element\"" +
+            "      value {" +
+            "        timestamp: 0" +
+            "        data: \"\\200\\000\\000\\000\\000\\000\\000\\000\"" +
+            "      }" +
+            "    }" +
+            "  }" +
+        "}"));
     server.addDataToOffer(buildData(
         "data {" +
         "  computation_id: \"computation\"" +
@@ -739,20 +760,6 @@ public void processElement(ProcessContext c) {
         "    }" +
         "  }" +
         "}"));
-    server.addDataToOffer(buildData(
-        "data {" +
-        "  computation_id: \"computation\"" +
-        "  data {" +
-        "    key: \"key\"" +
-        "    values {" +
-        "      tag: \"12:MergeWindowsgAAAAAAAAAA=earliest-element\"" +
-        "      value {" +
-        "        timestamp: 0" +
-        "        data: \"\\200\\000\\000\\000\\000\\000\\000\\000\"" +
-        "      }" +
-        "    }" +
-        "  }" +
-        "}"));
 
     result = server.waitForAndGetCommits(1);
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index 2532b46131da3..dc9c344e93707 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -61,7 +61,10 @@ public class AfterAllTest {
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
     tester = TriggerTester.nonCombining(
-        windowFn, AfterAll.of(mockTrigger1, mockTrigger2), AccumulationMode.DISCARDING_FIRED_PANES);
+        windowFn,
+        AfterAll.of(mockTrigger1, mockTrigger2),
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
     executable1 = tester.getTrigger().subTriggers().get(0);
     executable2 = tester.getTrigger().subTriggers().get(1);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
@@ -97,7 +100,7 @@ public void testOnElementT1FiresFirst() throws Exception {
     injectElement(3, null, TriggerResult.FIRE_AND_FINISH);
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
+    assertTrue(tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
   }
 
@@ -110,7 +113,7 @@ public void testOnElementT2FiresFirst() throws Exception {
     injectElement(2, TriggerResult.FIRE_AND_FINISH, null);
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
+    assertTrue(tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
   }
 
@@ -128,7 +131,7 @@ public void testOnTimerFire() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
+    assertTrue(tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
   }
 
@@ -145,13 +148,13 @@ public void testOnTimerFireAndFinish() throws Exception {
 
     tester.advanceWatermark(new Instant(12));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    assertFalse(tester.isDone(firstWindow));
+    assertFalse(tester.isMarkedFinished(firstWindow));
 
     injectElement(2, TriggerResult.FIRE_AND_FINISH, null);
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
 
-    assertTrue(tester.isDone(firstWindow));
+    assertTrue(tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
   }
 
@@ -175,7 +178,7 @@ public void testOnMergeFires() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
         tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
   }
@@ -201,7 +204,8 @@ public void testAfterAllRealTriggersFixedWindow() throws Exception {
                 AfterPane.<IntervalWindow>elementCountAtLeast(5),
                 AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
                     .plusDelayOf(Duration.millis(5)))),
-        AccumulationMode.DISCARDING_FIRED_PANES);
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
     IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(50));
 
     tester.advanceProcessingTime(new Instant(0));
@@ -237,7 +241,7 @@ public void testAfterAllRealTriggersFixedWindow() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(6, 7, 8, 9, 10), 2, 0, 50)));
 
-    assertFalse(tester.isDone(new IntervalWindow(new Instant(0), new Instant(50))));
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(50))));
     // We're holding some finished bits for intermediate state in the AfterAll.
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(window)));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index 517296b3a1b04..bfcca49dfc848 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -61,7 +61,8 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
     tester = TriggerTester.nonCombining(
         windowFn, AfterEach.inOrder(mockTrigger1, mockTrigger2),
-        AccumulationMode.DISCARDING_FIRED_PANES);
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
     executable1 = tester.getTrigger().subTriggers().get(0);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
@@ -104,7 +105,7 @@ public void testOnElementT1Fires() throws Exception {
     injectElement(4, null, TriggerResult.FIRE_AND_FINISH);
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(4), 4, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
+    assertTrue(tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
   }
 
@@ -114,7 +115,7 @@ public void testOnElementT2Fires() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.FIRE);
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    assertFalse(tester.isDone(firstWindow));
+    assertFalse(tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
         // Buffering element 1; Ignored the trigger for T2 since we aren't there yet.
         tester.bufferTag(firstWindow),
@@ -136,7 +137,8 @@ public void testOnTimerFire() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
-    assertFalse("Should still be waiting for the second trigger.", tester.isDone(firstWindow));
+    assertFalse("Should still be waiting for the second trigger.",
+        tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
   }
 
@@ -157,8 +159,8 @@ public void testOnTimerFinish() throws Exception {
 
     injectElement(2, null, TriggerResult.FIRE_AND_FINISH);
     assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(2), 2, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
+        isSingleWindowedValue(Matchers.containsInAnyOrder(2), 9, 0, 10)));
+    assertTrue(tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
   }
 
@@ -182,7 +184,7 @@ public void testOnMergeFinishes() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
         tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
   }
@@ -206,7 +208,7 @@ public void testOnMergeFires() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    assertFalse(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
         tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
   }
@@ -234,7 +236,8 @@ public void testSequenceRealTriggersFixedWindow() throws Exception {
                 AfterPane.<IntervalWindow>elementCountAtLeast(2),
                 AfterPane.<IntervalWindow>elementCountAtLeast(2)))
                 .orFinally(AfterPane.<IntervalWindow>elementCountAtLeast(7))),
-        AccumulationMode.DISCARDING_FIRED_PANES);
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
 
     // Inject a bunch of elements
     for (int i = 0; i < 20; i++) {
@@ -247,7 +250,7 @@ public void testSequenceRealTriggersFixedWindow() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(12, 13), 12, 0, 50),
         isSingleWindowedValue(Matchers.containsInAnyOrder(14, 15), 14, 0, 50),
         isSingleWindowedValue(Matchers.containsInAnyOrder(16), 16, 0, 50)));
-    assertTrue(tester.isDone(new IntervalWindow(new Instant(0), new Instant(50))));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(50))));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
         tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(50)))));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index cfe7885357f9f..ab1db00b12998 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -63,7 +63,8 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
     tester = TriggerTester.nonCombining(
         windowFn, AfterFirst.of(mockTrigger1, mockTrigger2),
-        AccumulationMode.DISCARDING_FIRED_PANES);
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
     executable1 = tester.getTrigger().subTriggers().get(0);
     executable2 = tester.getTrigger().subTriggers().get(1);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
@@ -98,7 +99,7 @@ public void testOnElementT1Fires() throws Exception {
     injectElement(2, TriggerResult.FIRE_AND_FINISH, TriggerResult.CONTINUE);
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
+    assertTrue(tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
   }
 
@@ -111,7 +112,7 @@ public void testOnElementT2Fires() throws Exception {
     injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
+    assertTrue(tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
   }
 
@@ -129,7 +130,7 @@ public void testOnTimerFire() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
+    assertTrue(tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
   }
 
@@ -148,7 +149,7 @@ public void testOnTimerFinish() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
 
-    assertTrue(tester.isDone(firstWindow));
+    assertTrue(tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
   }
 
@@ -171,7 +172,7 @@ public void testOnMergeFires() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
         tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
   }
@@ -197,7 +198,8 @@ public void testAfterFirstRealTriggersFixedWindow() throws Exception {
                 AfterPane.<IntervalWindow>elementCountAtLeast(5),
                 AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
                     .plusDelayOf(Duration.millis(5)))),
-        AccumulationMode.DISCARDING_FIRED_PANES);
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
 
     tester.advanceProcessingTime(new Instant(0));
     // 5 elements -> after pane fires
@@ -232,7 +234,7 @@ public void testAfterFirstRealTriggersFixedWindow() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(9, 10, 11, 12, 13), 6, 0, 50)));
-    assertFalse(tester.isDone(new IntervalWindow(new Instant(0), new Instant(50))));
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(50))));
     // Because none of the triggers every stay finished (we always immediately reset) there is no
     // persisted keyed state.
     assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
index 239b2f9ff6646..b2e5e1ae27d9f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
@@ -47,7 +47,8 @@ public void testAfterPaneWithGlobalWindowsAndCombining() throws Exception {
         AfterPane.<IntervalWindow>elementCountAtLeast(2),
         AccumulationMode.DISCARDING_FIRED_PANES,
         new SumIntegerFn().<String>asKeyedFn(),
-        VarIntCoder.of());
+        VarIntCoder.of(),
+        Duration.millis(100));
 
     tester.injectElement(1, new Instant(1));
     tester.injectElement(2, new Instant(9));
@@ -60,8 +61,8 @@ public void testAfterPaneWithGlobalWindowsAndCombining() throws Exception {
     tester.injectElement(6, new Instant(2));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
-    assertTrue(tester.isDone(new IntervalWindow(new Instant(0), new Instant(10))));
-    assertFalse(tester.isDone(new IntervalWindow(new Instant(10), new Instant(20))));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(10), new Instant(20))));
     assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(10)))));
   }
@@ -72,7 +73,8 @@ public void testAfterPaneWithFixedWindow() throws Exception {
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         FixedWindows.of(windowDuration),
         AfterPane.<IntervalWindow>elementCountAtLeast(2),
-        AccumulationMode.DISCARDING_FIRED_PANES);
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
 
     tester.injectElement(1, new Instant(1)); // first in window [0, 10)
     tester.injectElement(2, new Instant(9));
@@ -85,8 +87,8 @@ public void testAfterPaneWithFixedWindow() throws Exception {
     tester.injectElement(6, new Instant(2));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
-    assertTrue(tester.isDone(new IntervalWindow(new Instant(0), new Instant(10))));
-    assertFalse(tester.isDone(new IntervalWindow(new Instant(10), new Instant(20))));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(10), new Instant(20))));
     assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(10)))));
   }
@@ -97,7 +99,8 @@ public void testAfterPaneWithMerging() throws Exception {
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         Sessions.withGapDuration(windowDuration),
         AfterPane.<IntervalWindow>elementCountAtLeast(2),
-        AccumulationMode.DISCARDING_FIRED_PANES);
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
 
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
@@ -115,7 +118,7 @@ public void testAfterPaneWithMerging() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 7, 7, 18)));
 
-    assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(12))));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
     assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(12))),
         tester.finishedSet(new IntervalWindow(new Instant(7), new Instant(18)))));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
index 4bd1a1058eb39..4bbcd7ccf379d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
@@ -44,7 +44,8 @@ public void testAfterProcessingTimeWithFixedWindow() throws Exception {
         AfterProcessingTime
             .<IntervalWindow>pastFirstElementInPane()
             .plusDelayOf(Duration.millis(5)),
-        AccumulationMode.DISCARDING_FIRED_PANES);
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
 
     tester.advanceProcessingTime(new Instant(10));
 
@@ -71,7 +72,7 @@ public void testAfterProcessingTimeWithFixedWindow() throws Exception {
     assertThat(tester.extractOutput(), Matchers.containsInAnyOrder(
         WindowMatchers.isSingleWindowedValue(Matchers.contains(4), 19, 10, 20),
         WindowMatchers.isSingleWindowedValue(Matchers.contains(5), 30, 30, 40)));
-    assertTrue(tester.isDone(new IntervalWindow(new Instant(0), new Instant(10))));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
     assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(10))),
         tester.finishedSet(new IntervalWindow(new Instant(10), new Instant(20))),
@@ -86,7 +87,8 @@ public void testAfterProcessingTimeWithMergingWindowAlreadyFired() throws Except
         AfterProcessingTime
             .<IntervalWindow>pastFirstElementInPane()
             .plusDelayOf(Duration.millis(5)),
-        AccumulationMode.DISCARDING_FIRED_PANES);
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
 
     tester.advanceProcessingTime(new Instant(10));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
@@ -105,7 +107,7 @@ public void testAfterProcessingTimeWithMergingWindowAlreadyFired() throws Except
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.contains(2), 2, 2, 12)));
 
-    assertTrue(tester.isDone(new IntervalWindow(new Instant(2), new Instant(12))));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(2), new Instant(12))));
     assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(11))),
         tester.finishedSet(new IntervalWindow(new Instant(2), new Instant(12)))));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
index 8d44586462d2f..c0e8a9fab2466 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
@@ -44,7 +44,8 @@ public void testFirstInPaneWithFixedWindow() throws Exception {
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         FixedWindows.of(windowDuration),
         AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelayOf(Duration.millis(5)),
-        AccumulationMode.DISCARDING_FIRED_PANES);
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
 
     tester.injectElement(1, new Instant(1)); // first in window [0, 10), timer set for 6
     tester.advanceWatermark(new Instant(5));
@@ -60,8 +61,8 @@ public void testFirstInPaneWithFixedWindow() throws Exception {
     tester.injectElement(6, new Instant(2));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
-    assertTrue(tester.isDone(new IntervalWindow(new Instant(0), new Instant(10))));
-    assertFalse(tester.isDone(new IntervalWindow(new Instant(10), new Instant(20))));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(10), new Instant(20))));
     assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(10)))));
   }
@@ -72,7 +73,8 @@ public void testFirstInPaneWithMerging() throws Exception {
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         Sessions.withGapDuration(windowDuration),
         AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelayOf(Duration.millis(5)),
-        AccumulationMode.DISCARDING_FIRED_PANES);
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
 
     tester.advanceWatermark(new Instant(1));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
@@ -87,9 +89,9 @@ public void testFirstInPaneWithMerging() throws Exception {
 
     tester.advanceWatermark(new Instant(100));
     assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.contains(2), 2, 2, 12)));
+        WindowMatchers.isSingleWindowedValue(Matchers.contains(2), 11, 2, 12)));
 
-    assertTrue(tester.isDone(new IntervalWindow(new Instant(2), new Instant(12))));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(2), new Instant(12))));
     assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(11))),
         tester.finishedSet(new IntervalWindow(new Instant(2), new Instant(12)))));
@@ -101,7 +103,8 @@ public void testEndOfWindowFixedWindow() throws Exception {
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         FixedWindows.of(windowDuration),
         AfterWatermark.<IntervalWindow>pastEndOfWindow().plusDelayOf(Duration.millis(5)),
-        AccumulationMode.DISCARDING_FIRED_PANES);
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
 
     tester.injectElement(1, new Instant(1)); // first in window [0, 10), timer set for 15
     tester.advanceWatermark(new Instant(11));
@@ -117,8 +120,8 @@ public void testEndOfWindowFixedWindow() throws Exception {
     tester.injectElement(6, new Instant(2));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
-    assertTrue(tester.isDone(new IntervalWindow(new Instant(0), new Instant(10))));
-    assertFalse(tester.isDone(new IntervalWindow(new Instant(10), new Instant(20))));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(10), new Instant(20))));
     assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(10)))));
   }
@@ -129,7 +132,8 @@ public void testEndOfWindowWithMerging() throws Exception {
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         Sessions.withGapDuration(windowDuration),
         AfterWatermark.<IntervalWindow>pastEndOfWindow().plusDelayOf(Duration.millis(5)),
-        AccumulationMode.DISCARDING_FIRED_PANES);
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
 
     tester.advanceWatermark(new Instant(1));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
@@ -144,9 +148,9 @@ public void testEndOfWindowWithMerging() throws Exception {
 
     tester.advanceWatermark(new Instant(100));
     assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.contains(2), 2, 2, 12)));
+        WindowMatchers.isSingleWindowedValue(Matchers.contains(2), 11, 2, 12)));
 
-    assertTrue(tester.isDone(new IntervalWindow(new Instant(2), new Instant(12))));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(2), new Instant(12))));
     assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(11))),
         tester.finishedSet(new IntervalWindow(new Instant(2), new Instant(12)))));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
index 5417c5793aa04..6cbca64060e6d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
@@ -43,7 +43,8 @@ public void testDefaultTriggerWithFixedWindow() throws Exception {
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         FixedWindows.of(Duration.millis(10)),
         DefaultTrigger.<IntervalWindow>of(),
-        AccumulationMode.DISCARDING_FIRED_PANES);
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
 
     tester.injectElement(1, new Instant(1));
     tester.injectElement(2, new Instant(9));
@@ -68,7 +69,7 @@ public void testDefaultTriggerWithFixedWindow() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 15, 10, 20),
         isSingleWindowedValue(Matchers.contains(5), 30, 30, 40)));
-    assertFalse(tester.isDone(new IntervalWindow(new Instant(30), new Instant(40))));
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(30), new Instant(40))));
     assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
   }
 
@@ -77,7 +78,8 @@ public void testDefaultTriggerWithSessionWindow() throws Exception {
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         Sessions.withGapDuration(Duration.millis(10)),
         DefaultTrigger.<IntervalWindow>of(),
-        AccumulationMode.DISCARDING_FIRED_PANES);
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
 
     tester.injectElement(1, new Instant(1));
     tester.injectElement(2, new Instant(9));
@@ -93,8 +95,8 @@ public void testDefaultTriggerWithSessionWindow() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 1, 25),
         isSingleWindowedValue(Matchers.contains(4), 30, 30, 40)));
-    assertFalse(tester.isDone(new IntervalWindow(new Instant(1), new Instant(25))));
-    assertFalse(tester.isDone(new IntervalWindow(new Instant(30), new Instant(40))));
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(25))));
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(30), new Instant(40))));
     assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
   }
 
@@ -103,7 +105,8 @@ public void testDefaultTriggerWithSlidingWindow() throws Exception {
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         SlidingWindows.of(Duration.millis(10)).every(Duration.millis(5)),
         DefaultTrigger.<IntervalWindow>of(),
-        AccumulationMode.DISCARDING_FIRED_PANES);
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
 
     tester.injectElement(1, new Instant(1));
     tester.injectElement(2, new Instant(4));
@@ -120,11 +123,11 @@ public void testDefaultTriggerWithSlidingWindow() throws Exception {
     // Late data means the merge tree might be empty
     tester.advanceWatermark(new Instant(101));
     assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(4), 8, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(4), 8, 5, 15)));
+        isSingleWindowedValue(Matchers.containsInAnyOrder(4), 9, 0, 10),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(4), 14, 5, 15)));
 
-    assertFalse(tester.isDone(new IntervalWindow(new Instant(1), new Instant(10))));
-    assertFalse(tester.isDone(new IntervalWindow(new Instant(5), new Instant(15))));
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(10))));
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(15))));
     assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
   }
 
@@ -134,7 +137,8 @@ public void testDefaultTriggerWithContainedSessionWindow() throws Exception {
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         Sessions.withGapDuration(Duration.millis(10)),
         DefaultTrigger.<IntervalWindow>of(),
-        AccumulationMode.DISCARDING_FIRED_PANES);
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
 
     tester.injectElement(1, new Instant(1));
     tester.injectElement(2, new Instant(9));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index ae1dd606b3748..9e8eb3cf301f3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -58,7 +58,9 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
     Trigger<IntervalWindow> underTest = Repeatedly.forever(mockRepeated);
     tester = TriggerTester.nonCombining(
-        windowFn, underTest, AccumulationMode.DISCARDING_FIRED_PANES);
+        windowFn, underTest,
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
     executableRepeated = tester.getTrigger().subTriggers().get(0);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
@@ -90,7 +92,7 @@ public void testOnElement() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
-    assertFalse(tester.isDone(firstWindow));
+    assertFalse(tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
         tester.bufferTag(firstWindow),
         // Holding the earliest not-yet-output element (4) waiting to fire.
@@ -131,9 +133,9 @@ public void testOnElementTimerFires() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(2), 2, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 3, 0, 10)));
-    assertFalse(tester.isDone(firstWindow));
+        isSingleWindowedValue(Matchers.containsInAnyOrder(2), 9, 0, 10),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 9, 0, 10)));
+    assertFalse(tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
   }
 
@@ -152,7 +154,7 @@ public void testMerge() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    assertFalse(tester.isDone(firstWindow));
+    assertFalse(tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
index 3f1e7130ad408..a92923158294d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
@@ -65,7 +65,8 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
         new OrFinallyTrigger<IntervalWindow>(mockActual, mockUntil);
 
     tester = TriggerTester.nonCombining(
-        windowFn, underTest, AccumulationMode.DISCARDING_FIRED_PANES);
+        windowFn, underTest, AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
     executableUntil = tester.getTrigger().subTriggers().get(1);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
@@ -103,7 +104,7 @@ public void testOnElementActualFires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(2), 2, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
+    assertTrue(tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
         // We're storing that the root trigger has finished.
         tester.finishedSet(firstWindow)));
@@ -118,7 +119,7 @@ public void testOnElementUntilFires() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
+    assertTrue(tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
         // We're storing that the root trigger has finished.
         tester.finishedSet(firstWindow)));
@@ -133,7 +134,7 @@ public void testOnElementUntilFiresAndFinishes() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
+    assertTrue(tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
         // We're storing that the root trigger has finished.
         tester.finishedSet(firstWindow)));
@@ -160,9 +161,8 @@ public void testOnTimerFiresWithUntil() throws Exception {
     tester.advanceWatermark(new Instant(13));
 
     assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
-        isSingleWindowedValue(Matchers.emptyIterable(), 9, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertTrue(tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
         tester.finishedSet(firstWindow)));
   }
@@ -198,8 +198,8 @@ public void testOnTimerFinishesUntil() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
+        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 9, 0, 10)));
+    assertTrue(tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
         tester.finishedSet(firstWindow)));
   }
@@ -223,7 +223,7 @@ public void testMergeActualFires() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    assertFalse(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
     assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
   }
 
@@ -248,7 +248,7 @@ public void testMergeUntilFires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
     // the until fired during the merge
-    assertTrue(tester.isDone(new IntervalWindow(new Instant(1), new Instant(22))));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(
         // We're storing that the root has finished
         tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
@@ -289,7 +289,8 @@ public void testOrFinallyRealTriggersFixedWindow() throws Exception {
                     AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
                         .plusDelayOf(Duration.millis(5)),
                     AfterPane.<IntervalWindow>elementCountAtLeast(5)))),
-        AccumulationMode.DISCARDING_FIRED_PANES);
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
 
     IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(50));
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index 042edcf14376a..524477d096ad9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -19,6 +19,8 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 
+import com.google.cloud.dataflow.sdk.TestUtils.KvMatcher;
+import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
@@ -277,19 +279,13 @@ public class GroupAlsoByWindowsDoFnTest {
 
     List<WindowedValue<KV<String, Long>>> result = runner.getReceiver(outputTag);
 
-    assertEquals(2, result.size());
-
-    WindowedValue<KV<String, Long>> item0 = result.get(0);
-    assertEquals("k", item0.getValue().getKey());
-    assertEquals(3L, item0.getValue().getValue().longValue());
-    assertEquals(new Instant(0), item0.getTimestamp());
-    assertThat(item0.getWindows(), Matchers.contains(window(0, 15)));
-
-    WindowedValue<KV<String, Long>> item1 = result.get(1);
-    assertEquals("k", item1.getValue().getKey());
-    assertEquals(4L, item1.getValue().getValue().longValue());
-    assertEquals(new Instant(15), item1.getTimestamp());
-    assertThat(item1.getWindows(), Matchers.contains(window(15, 25)));
+    assertThat(result, Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(
+            KvMatcher.isKv(Matchers.equalTo("k"), Matchers.equalTo(3L)),
+            0, 0, 15),
+        WindowMatchers.isSingleWindowedValue(
+            KvMatcher.isKv(Matchers.equalTo("k"), Matchers.equalTo(4L)),
+            15, 15, 25)));
   }
 
   private DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 8f29fada0fba5..4761b17724b79 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -18,6 +18,7 @@
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
+import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -96,6 +97,7 @@ public TimerManager getTimerManager() {
         new TriggerIdCoder<IntervalWindow>(windowCoder);
 
     runner.startBundle();
+    when(mockTimerManager.currentWatermarkTime()).thenReturn(new Instant(0));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v1")),
@@ -159,6 +161,7 @@ public TimerManager getTimerManager() {
         new TriggerIdCoder<IntervalWindow>(windowCoder);
 
     runner.startBundle();
+    when(mockTimerManager.currentWatermarkTime()).thenReturn(new Instant(0));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v1")),
@@ -227,6 +230,7 @@ public TimerManager getTimerManager() {
         new TriggerIdCoder<IntervalWindow>(windowCoder);
 
     runner.startBundle();
+    when(mockTimerManager.currentWatermarkTime()).thenReturn(new Instant(0));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v1")),
@@ -328,6 +332,7 @@ public Long extractOutput(Long accumulator) {
         new TriggerIdCoder<IntervalWindow>(windowCoder);
 
     runner.startBundle();
+    when(mockTimerManager.currentWatermarkTime()).thenReturn(new Instant(0));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", 1L)),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index fab9e90c9862a..dd8ccc187623b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
@@ -24,16 +25,21 @@
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
+import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -51,10 +57,9 @@ public class TriggerExecutorTest {
   private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
-  public void setUpBuffering(
-      WindowFn<?, IntervalWindow> windowFn, AccumulationMode mode) throws Exception {
+  @Before
+  public void setUp() {
     MockitoAnnotations.initMocks(this);
-    tester = TriggerTester.nonCombining(windowFn, mockTrigger, mode);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
@@ -73,7 +78,12 @@ private void injectElement(int element, TriggerResult result)
 
   @Test
   public void testOnElementBufferingDiscarding() throws Exception {
-    setUpBuffering(FixedWindows.of(Duration.millis(10)), AccumulationMode.DISCARDING_FIRED_PANES);
+    // Test basic execution of a trigger using a non-combining window set and discarding mode.
+    tester = TriggerTester.nonCombining(
+        FixedWindows.of(Duration.millis(10)),
+        mockTrigger,
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
 
     injectElement(1, TriggerResult.CONTINUE);
     assertTrue(tester.isWindowActive(firstWindow));
@@ -89,14 +99,19 @@ public void testOnElementBufferingDiscarding() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
+    assertTrue(tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
     assertFalse(tester.isWindowActive(firstWindow));
   }
 
   @Test
   public void testOnElementBufferingAccumulating() throws Exception {
-    setUpBuffering(FixedWindows.of(Duration.millis(10)), AccumulationMode.ACCUMULATING_FIRED_PANES);
+ // Test basic execution of a trigger using a non-combining window set and accumulating mode.
+    tester = TriggerTester.nonCombining(
+        FixedWindows.of(Duration.millis(10)),
+        mockTrigger,
+        AccumulationMode.ACCUMULATING_FIRED_PANES,
+        Duration.millis(100));
 
     injectElement(1, TriggerResult.CONTINUE);
     assertTrue(tester.isWindowActive(firstWindow));
@@ -112,8 +127,98 @@ public void testOnElementBufferingAccumulating() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 3, 0, 10)));
-    assertTrue(tester.isDone(firstWindow));
+    assertTrue(tester.isMarkedFinished(firstWindow));
     assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
     assertFalse(tester.isWindowActive(firstWindow));
   }
+
+  @Test
+  public void testWatermarkHoldAndLateData() throws Exception {
+    // Test handling of late data. Specifically, ensure the watermark hold is correct.
+    tester = TriggerTester.nonCombining(
+        FixedWindows.of(Duration.millis(10)),
+        mockTrigger,
+        AccumulationMode.ACCUMULATING_FIRED_PANES,
+        Duration.millis(10));
+
+    // All on time data, verify watermark hold.
+    injectElement(1, TriggerResult.CONTINUE);
+    injectElement(3, TriggerResult.CONTINUE);
+    assertEquals(new Instant(1), tester.getWatermarkHold());
+    injectElement(2, TriggerResult.FIRE);
+    assertEquals(null, tester.getWatermarkHold());
+
+    // Some late, some on time. Verify that we only hold to the minimum of on-time.
+    tester.advanceWatermark(new Instant(4));
+    injectElement(2, TriggerResult.CONTINUE);
+    injectElement(3, TriggerResult.CONTINUE);
+    assertEquals(new Instant(19), tester.getWatermarkHold());
+    injectElement(5, TriggerResult.CONTINUE);
+    assertEquals(new Instant(5), tester.getWatermarkHold());
+    injectElement(4, TriggerResult.FIRE);
+
+    // All late -- output at end of window timestamp.
+    tester.advanceWatermark(new Instant(8));
+    injectElement(6, TriggerResult.CONTINUE);
+    injectElement(5, TriggerResult.CONTINUE);
+    assertEquals(new Instant(19), tester.getWatermarkHold());
+    injectElement(4, TriggerResult.FIRE);
+
+    // This is "pending" at the time the watermark makes it way-late.
+    // Because we're about to expire the window, we output it.
+    injectElement(8, TriggerResult.CONTINUE);
+
+    // All very late -- gets dropped.
+    tester.advanceWatermark(new Instant(50));
+    injectElement(2, TriggerResult.FIRE);
+    assertEquals(null, tester.getWatermarkHold());
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3, 2, 3, 4, 5), 4, 0, 10),
+        // Output time is end of the window, because all the new data was late
+        isSingleWindowedValue(Matchers.containsInAnyOrder(
+            1, 2, 3, 2, 3, 4, 5, 4, 5, 6), 9, 0, 10),
+        // Output time is not end of the window, because the new data (8) wasn't late
+        isSingleWindowedValue(Matchers.containsInAnyOrder(
+            1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 8), 8, 0, 10)));
+
+    // And because we're past the end of window + allowed lateness, everything should be cleaned up.
+    assertFalse(tester.isMarkedFinished(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
+    assertFalse(tester.isWindowActive(firstWindow));
+  }
+
+  @Test
+  public void testMergeBeforeFinalizing() throws Exception {
+    // Verify that we merge windows before producing output so users don't see undesired
+    // unmerged windows.
+    tester = TriggerTester.nonCombining(
+        Sessions.withGapDuration(Duration.millis(10)),
+        mockTrigger,
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(0));
+
+    when(mockTrigger.onMerge(
+        Mockito.<TriggerContext<IntervalWindow>>any(),
+        Mockito.<OnMergeEvent<IntervalWindow>>any()))
+        .thenReturn(MergeResult.CONTINUE);
+
+    when(mockTrigger.onTimer(
+        Mockito.<TriggerContext<IntervalWindow>>any(),
+        Mockito.<OnTimerEvent<IntervalWindow>>any()))
+        .thenReturn(TriggerResult.FIRE);
+
+    // All on time data, verify watermark hold.
+    injectElement(1, TriggerResult.CONTINUE); // [1-11)
+    injectElement(10, TriggerResult.CONTINUE); // [10-20)
+
+    // Create a fake timer to fire
+    tester.setTimer(
+        new IntervalWindow(new Instant(1), new Instant(20)), new Instant(20),
+        TimeDomain.EVENT_TIME, tester.getTrigger());
+    tester.advanceWatermark(new Instant(100));
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 10), 1, 1, 20)));
+  }
 }

From 45e0f9c35be6d002f0c9c09740acf569c2ed0229 Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Thu, 28 May 2015 16:19:26 -0700
Subject: [PATCH 0597/1541] Makes Write compatible with windowing

This closes #27

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94713127
---
 .../cloud/dataflow/sdk/transforms/Write.java  | 79 ++++++++++---------
 .../dataflow/sdk/transforms/WriteTest.java    | 31 +++++++-
 2 files changed, 70 insertions(+), 40 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
index 0d3a2f1824b0a..e056a6a7323df 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
@@ -23,10 +23,14 @@
 import com.google.cloud.dataflow.sdk.io.Sink.Writer;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.PDone;
 
+import org.joda.time.Instant;
+
 import java.util.UUID;
 
 /**
@@ -145,43 +149,46 @@ public void processElement(ProcessContext c) throws Exception {
       // as a side input) and collect the results of the writes in a PCollection.
       // There is a dependency between this ParDo and the first (the WriteOperation PCollection
       // as a side input), so this will happen after the initial ParDo.
-      PCollection<WriteT> results = input.apply(ParDo.of(new DoFn<T, WriteT>() {
-        private static final long serialVersionUID = 0;
-
-        // Writer that will write the records in this bundle. Lazily
-        // initialized in processElement.
-        private Writer<T, WriteT> writer = null;
-
-        @Override
-        public void processElement(ProcessContext c) throws Exception {
-          // Lazily initialize the Writer
-          if (writer == null) {
-            WriteOperation<T, WriteT> writeOperation = c.sideInput(writeOperationView);
-            writer = writeOperation.createWriter(c.getPipelineOptions());
-            writer.open(UUID.randomUUID().toString());
-          }
-          try {
-            writer.write(c.element());
-          } catch (Exception e) {
-            // Discard write result and close the write.
-            try {
-              writer.close();
-            } catch (Exception closeException) {
-              // Do not mask the exception that caused the write to fail.
+      PCollection<WriteT> results =
+          input.apply(ParDo.of(new DoFn<T, WriteT>() {
+            private static final long serialVersionUID = 0;
+
+            // Writer that will write the records in this bundle. Lazily
+            // initialized in processElement.
+            private Writer<T, WriteT> writer = null;
+
+            @Override
+            public void processElement(ProcessContext c) throws Exception {
+              // Lazily initialize the Writer
+              if (writer == null) {
+                WriteOperation<T, WriteT> writeOperation = c.sideInput(writeOperationView);
+                writer = writeOperation.createWriter(c.getPipelineOptions());
+                writer.open(UUID.randomUUID().toString());
+              }
+              try {
+                writer.write(c.element());
+              } catch (Exception e) {
+                // Discard write result and close the write.
+                try {
+                  writer.close();
+                } catch (Exception closeException) {
+                  // Do not mask the exception that caused the write to fail.
+                }
+                throw e;
+              }
+            }
+
+            @Override
+            public void finishBundle(Context c) throws Exception {
+              if (writer != null) {
+                WriteT result = writer.close();
+                // Output the result of the write.
+                c.outputWithTimestamp(result, Instant.now());
+              }
             }
-            throw e;
-          }
-        }
-
-        @Override
-        public void finishBundle(Context c) throws Exception {
-          if (writer != null) {
-            WriteT result = writer.close();
-            // Output the result of the write.
-            c.output(result);
-          }
-        }
-      }).withSideInputs(writeOperationView)).setCoder(writeOperation.getWriterResultCoder());
+          }).withSideInputs(writeOperationView))
+          .setCoder(writeOperation.getWriterResultCoder())
+          .apply(Window.<WriteT>into(new GlobalWindows()));
 
       final PCollectionView<Iterable<WriteT>> resultsView =
           results.apply(View.<WriteT>asIterable());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java
index 9759bca763cba..edf6df0c9499e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java
@@ -33,8 +33,11 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest.TestPipelineOptions;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
+import org.joda.time.Duration;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -61,7 +64,7 @@ public class WriteTest {
   public void testWrite() {
     List<String> inputs = Arrays.asList("Critical canary", "Apprehensive eagle",
         "Intimidating pigeon", "Pedantic gull", "Frisky finch");
-    runWrite(inputs);
+    runWrite(inputs, /* not windowed */ false);
   }
 
   /**
@@ -70,7 +73,17 @@ public void testWrite() {
   @Test
   public void testWriteWithEmptyPCollection() {
     List<String> inputs = new ArrayList<>();
-    runWrite(inputs);
+    runWrite(inputs, /* not windowed */ false);
+  }
+
+  /**
+   * Test a Write with a windowed PCollection.
+   */
+  @Test
+  public void testWriteWindowed() {
+    List<String> inputs = Arrays.asList("Critical canary", "Apprehensive eagle",
+        "Intimidating pigeon", "Pedantic gull", "Frisky finch");
+    runWrite(inputs, /* windowed */ true);
   }
 
   /**
@@ -78,7 +91,7 @@ public void testWriteWithEmptyPCollection() {
    * a test sink in the correct order, as well as verifies that the elements of a PCollection are
    * written to the sink.
    */
-  public void runWrite(List<String> inputs) {
+  public void runWrite(List<String> inputs, boolean windowed) {
     // Flag to validate that the pipeline options are passed to the Sink
     String[] args = {"--testFlag=test_value"};
     PipelineOptions options = PipelineOptionsFactory.fromArgs(args).as(WriteOptions.class);
@@ -88,7 +101,17 @@ public void runWrite(List<String> inputs) {
     sinkContents.clear();
 
     // Construct the input PCollection and test Sink.
-    PCollection<String> input = createStrings(p, inputs);
+    PCollection<String> input;
+    if (windowed) {
+      List<Long> timestamps = new ArrayList<>();
+      for (long i = 0; i < inputs.size(); i++) {
+        timestamps.add(i + 1);
+      }
+      input = p.apply(Create.timestamped(inputs, timestamps))
+               .apply(Window.<String>into(FixedWindows.of(new Duration(2))));
+    } else {
+      input = createStrings(p, inputs);
+    }
     TestSink sink = new TestSink();
 
     input.apply(Write.to(sink));

From e8b8c319dfde8a7e16cc5e0f6036b2d2ec0f6187 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Thu, 28 May 2015 16:20:18 -0700
Subject: [PATCH 0598/1541] Move experimental late-data handling from PubSubIO
 to Window.

The data is dropped at the first GroupByKey following the Read. To
allow late data through use Window.Bound#withAllowedLateness.

----Release Notes----

Move experimental late-data handling from PubSubIO to Window. Late data
will default to being dropped at the first GroupByKey following the
Read. To allow late data through use Window.Bound#withAllowedLateness.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94713209
---
 .../cloud/dataflow/sdk/io/PubsubIO.java       | 43 +++----------------
 .../runners/dataflow/PubsubIOTranslator.java  |  1 -
 .../dataflow/sdk/util/PropertyNames.java      |  1 -
 3 files changed, 7 insertions(+), 38 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 8010b5fd12aff..bb8b7969422e8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -214,18 +214,6 @@ public static Bound<String> timestampLabel(String timestampLabel) {
       return new Bound<>(DEFAULT_PUBSUB_CODER).timestampLabel(timestampLabel);
     }
 
-    /**
-     * If true, then late-arriving data from this source will be dropped.
-     *
-     * <p> If late data is not dropped, data for a window can arrive after that window has already
-     * been closed.  This relaxes the semantics of {@code GroupByKey}; see
-     * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}
-     * for additional information on late data and windowing.
-     */
-    public static Bound<String> dropLateData(boolean dropLateData) {
-      return new Bound<>(DEFAULT_PUBSUB_CODER).dropLateData(dropLateData);
-    }
-
     /**
      * Creates and returns a PubSubIO.Read PTransform where unique record identifiers are
      * expected to be provided using the PubSub labeling API. The {@code <idLabel>} parameter
@@ -266,8 +254,6 @@ public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
       String subscription;
       /** The Pubsub label to read timestamps from. */
       String timestampLabel;
-      /** If true, late data will be dropped. */
-      Boolean dropLateData;
       /** The Pubsub label to read ids from. */
       String idLabel;
       /** The coder used to decode each record. */
@@ -275,12 +261,11 @@ public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
       final Coder<T> coder;
 
       Bound(Coder<T> coder) {
-        this.dropLateData = true;
         this.coder = coder;
       }
 
       Bound(String name, String subscription, String topic, String timestampLabel,
-          boolean dropLateData, Coder<T> coder, String idLabel) {
+          Coder<T> coder, String idLabel) {
         super(name);
         if (subscription != null) {
           Validator.validateSubscriptionName(subscription);
@@ -291,7 +276,6 @@ public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
         this.subscription = subscription;
         this.topic = topic;
         this.timestampLabel = timestampLabel;
-        this.dropLateData = dropLateData;
         this.coder = coder;
         this.idLabel = idLabel;
       }
@@ -301,8 +285,7 @@ public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
        * step name. Does not modify the object.
        */
       public Bound<T> named(String name) {
-        return new Bound<>(name, subscription, topic, timestampLabel, dropLateData,
-            coder, idLabel);
+        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel);
       }
 
       /**
@@ -310,7 +293,7 @@ public Bound<T> named(String name) {
        * given subscription. Does not modify the object.
        */
       public Bound<T> subscription(String subscription) {
-        return new Bound<>(name, subscription, topic, timestampLabel, dropLateData, coder, idLabel);
+        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel);
       }
 
       /**
@@ -318,7 +301,7 @@ public Bound<T> subscription(String subscription) {
        * give topic. Does not modify the object.
        */
       public Bound<T> topic(String topic) {
-        return new Bound<>(name, subscription, topic, timestampLabel, dropLateData, coder, idLabel);
+        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel);
       }
 
       /**
@@ -326,15 +309,7 @@ public Bound<T> topic(String topic) {
        * from the given PubSub label. Does not modify the object.
        */
       public Bound<T> timestampLabel(String timestampLabel) {
-        return new Bound<>(name, subscription, topic, timestampLabel, dropLateData, coder, idLabel);
-      }
-
-      /**
-       * Returns a new PubsubIO.Read PTransform that's like this one but with the specified
-       * setting for dropLateData. Does not modify the object.
-       */
-      public Bound<T> dropLateData(boolean dropLateData) {
-        return new Bound<>(name, subscription, topic, timestampLabel, dropLateData, coder, idLabel);
+        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel);
       }
 
       /**
@@ -342,7 +317,7 @@ public Bound<T> dropLateData(boolean dropLateData) {
        * from the given PubSub label. Does not modify the object.
        */
       public Bound<T> idLabel(String idLabel) {
-        return new Bound<>(name, subscription, topic, timestampLabel, dropLateData, coder, idLabel);
+        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel);
       }
 
       /**
@@ -354,7 +329,7 @@ public Bound<T> idLabel(String idLabel) {
        * elements of the resulting PCollection.
        */
       public <X> Bound<X> withCoder(Coder<X> coder) {
-        return new Bound<>(name, subscription, topic, timestampLabel, dropLateData, coder, idLabel);
+        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel);
       }
 
       @Override
@@ -398,10 +373,6 @@ public String getTimestampLabel() {
         return timestampLabel;
       }
 
-      public boolean getDropLateData() {
-        return dropLateData;
-      }
-
       public String getIdLabel() {
         return idLabel;
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
index d84777bbecc4f..4bd59a0b8d12a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
@@ -56,7 +56,6 @@ private <T> void translateReadHelper(
       if (transform.getTimestampLabel() != null) {
         context.addInput(PropertyNames.PUBSUB_TIMESTAMP_LABEL, transform.getTimestampLabel());
       }
-      context.addInput(PropertyNames.PUBSUB_DROP_LATE_DATA, transform.getDropLateData());
       if (transform.getIdLabel() != null) {
         context.addInput(PropertyNames.PUBSUB_ID_LABEL, transform.getIdLabel());
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
index 3f5c31c04ba43..efe31ac1fc8dd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
@@ -69,7 +69,6 @@ public class PropertyNames {
   public static final String OUTPUT_NAME = "output_name";
   public static final String PARALLEL_INPUT = "parallel_input";
   public static final String PHASE = "phase";
-  public static final String PUBSUB_DROP_LATE_DATA = "pubsub_drop_late_data";
   public static final String PUBSUB_ID_LABEL = "pubsub_id_label";
   public static final String PUBSUB_SUBSCRIPTION = "pubsub_subscription";
   public static final String PUBSUB_TIMESTAMP_LABEL = "pubsub_timestamp_label";

From aa21f262b3df61c8dbad8dd4b2264ffbceefe272 Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Fri, 29 May 2015 11:25:28 -0700
Subject: [PATCH 0599/1541] Fix incorrect use of effect/affect

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94774866
---
 .../dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index aabc91034be96..4129dc3cafb05 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -38,7 +38,7 @@ public interface DataflowPipelineWorkerPoolOptions extends PipelineOptions {
    * Number of workers to use when executing the Dataflow job.
    */
   @Description("Number of workers to use when executing the Dataflow job. Note that "
-      + "selection of an autoscaling algorithm other then \"NONE\" will effect the "
+      + "selection of an autoscaling algorithm other then \"NONE\" will affect the "
       + "size of the worker pool.")
   @Default.Integer(3)
   int getNumWorkers();

From 07e2ddc411729dbefb8c13cb1967ad9dac88340f Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 29 May 2015 17:11:24 -0700
Subject: [PATCH 0600/1541] Update maven surefire plugin

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94802993
---
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 091216c002b67..c08ee7be19229 100644
--- a/pom.xml
+++ b/pom.xml
@@ -166,7 +166,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-surefire-plugin</artifactId>
-          <version>2.15</version>
+          <version>2.18.1</version>
           <configuration>
             <parallel>${testParallelValue}</parallel>
             <threadCount>4</threadCount>
@@ -186,7 +186,7 @@
             <dependency>
               <groupId>org.apache.maven.surefire</groupId>
               <artifactId>surefire-junit47</artifactId>
-              <version>2.7.2</version>
+              <version>2.18.1</version>
             </dependency>
           </dependencies>
         </plugin>

From 6ec1a04e1e2e6b5719cff2b1c6b85bb4a32a28ad Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 1 Jun 2015 09:08:15 -0700
Subject: [PATCH 0601/1541] Link from our javadoc to that of Hamcrest and
 JUnit.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94915885
---
 javadoc/hamcrest-docs/package-list | 10 ++++++++++
 javadoc/junit-docs/package-list    |  7 +++++++
 sdk/pom.xml                        |  8 ++++++++
 3 files changed, 25 insertions(+)
 create mode 100644 javadoc/hamcrest-docs/package-list
 create mode 100644 javadoc/junit-docs/package-list

diff --git a/javadoc/hamcrest-docs/package-list b/javadoc/hamcrest-docs/package-list
new file mode 100644
index 0000000000000..3f5e945f7afbd
--- /dev/null
+++ b/javadoc/hamcrest-docs/package-list
@@ -0,0 +1,10 @@
+org.hamcrest
+org.hamcrest.beans
+org.hamcrest.collection
+org.hamcrest.core
+org.hamcrest.integration
+org.hamcrest.internal
+org.hamcrest.number
+org.hamcrest.object
+org.hamcrest.text
+org.hamcrest.xml
diff --git a/javadoc/junit-docs/package-list b/javadoc/junit-docs/package-list
new file mode 100644
index 0000000000000..0735177ae6ed4
--- /dev/null
+++ b/javadoc/junit-docs/package-list
@@ -0,0 +1,7 @@
+org.hamcrest.core
+org.junit
+org.junit.matchers
+org.junit.runner
+org.junit.runner.manipulation
+org.junit.runner.notification
+org.junit.runners
diff --git a/sdk/pom.xml b/sdk/pom.xml
index e253112a2d736..0fd0957c01b55 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -179,6 +179,14 @@
               <url>http://fasterxml.github.io/jackson-annotations/javadoc/2.4/</url>
               <location>${basedir}/../javadoc/jackson-annotations-docs</location>
             </offlineLink>
+            <offlineLink>
+              <url>http://hamcrest.org/JavaHamcrest/javadoc/1.3/</url>
+              <location>${basedir}/../javadoc/hamcrest-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>http://junit.sourceforge.net/javadoc/</url>
+              <location>${basedir}/../javadoc/junit-docs</location>
+            </offlineLink>
           </offlineLinks>
         </configuration>
 

From bdde37492d5f79b3570a543fe6392391023dddec Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 1 Jun 2015 10:20:47 -0700
Subject: [PATCH 0602/1541] Change ExpectedLogs to include better stack-traces

Also add support for verifying the absence of a message.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94922273
---
 .../options/PipelineOptionsFactoryTest.java   |   8 +-
 .../BlockingDataflowPipelineRunnerTest.java   |   2 +-
 .../dataflow/sdk/testing/ExpectedLogs.java    | 162 ++++++++++++------
 .../sdk/testing/ExpectedLogsTest.java         |  84 +++++----
 4 files changed, 163 insertions(+), 93 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index 6ffbc8e875730..576bdaf4fa732 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -577,8 +577,8 @@ public void testSetASingularAttributeUsingAListIsIgnoredWithoutStrictParsing() {
     String[] args = new String[] {
         "--diskSizeGb=100",
         "--diskSizeGb=200"};
-    expectedLogs.expectWarn("Strict parsing is disabled, ignoring option");
     PipelineOptionsFactory.fromArgs(args).withoutStrictParsing().create();
+    expectedLogs.verifyWarn("Strict parsing is disabled, ignoring option");
   }
 
   @Test
@@ -610,8 +610,8 @@ public void testUsingArgumentWithUnknownPropertyIsNotAllowed() {
   @Test
   public void testUsingArgumentWithUnknownPropertyIsIgnoredWithoutStrictParsing() {
     String[] args = new String[] {"--unknownProperty=value"};
-    expectedLogs.expectWarn("missing a property named 'unknownProperty'");
     PipelineOptionsFactory.fromArgs(args).withoutStrictParsing().create();
+    expectedLogs.verifyWarn("missing a property named 'unknownProperty'");
   }
 
   @Test
@@ -625,8 +625,8 @@ public void testUsingArgumentStartingWithIllegalCharacterIsNotAllowed() {
   @Test
   public void testUsingArgumentStartingWithIllegalCharacterIsIgnoredWithoutStrictParsing() {
     String[] args = new String[] {" --diskSizeGb=100"};
-    expectedLogs.expectWarn("Strict parsing is disabled, ignoring option");
     PipelineOptionsFactory.fromArgs(args).withoutStrictParsing().create();
+    expectedLogs.verifyWarn("Strict parsing is disabled, ignoring option");
   }
 
   @Test
@@ -652,8 +652,8 @@ public void testUsingArgumentWithInvalidNameIsNotAllowed() {
   @Test
   public void testUsingArgumentWithInvalidNameIsIgnoredWithoutStrictParsing() {
     String[] args = new String[] {"--=100"};
-    expectedLogs.expectWarn("Strict parsing is disabled, ignoring option");
     PipelineOptionsFactory.fromArgs(args).withoutStrictParsing().create();
+    expectedLogs.verifyWarn("Strict parsing is disabled, ignoring option");
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
index 069284d67a063..fed862d44436b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
@@ -100,7 +100,6 @@ public synchronized boolean waitTillSet(long timeout) throws InterruptedExceptio
 
   @Test
   public void testJobWaitComplete() throws IOException, InterruptedException {
-    expectedLogs.expectInfo("Job finished with status DONE");
 
     DataflowPipelineRunner mockDataflowPipelineRunner = mock(DataflowPipelineRunner.class);
     DataflowPipelineJob mockJob = mock(DataflowPipelineJob.class);
@@ -141,6 +140,7 @@ public void run() {
     mockWait.signalJobComplete();
     assertTrue("run() should return after job completion is mocked.",
         jobCompleted.waitTillSet(2000));
+    expectedLogs.verifyInfo("Job finished with status DONE");
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java
index 8b1a688d6804b..e262e68594c67 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java
@@ -18,8 +18,6 @@
 
 import static org.junit.Assert.fail;
 
-import com.google.common.collect.Lists;
-
 import org.hamcrest.Description;
 import org.hamcrest.Matcher;
 import org.hamcrest.TypeSafeMatcher;
@@ -28,10 +26,12 @@
 
 import java.util.Collection;
 import java.util.concurrent.ConcurrentLinkedDeque;
+import java.util.logging.Formatter;
 import java.util.logging.Handler;
 import java.util.logging.Level;
 import java.util.logging.LogRecord;
 import java.util.logging.Logger;
+import java.util.logging.SimpleFormatter;
 
 import javax.annotation.concurrent.ThreadSafe;
 
@@ -53,105 +53,143 @@ public static ExpectedLogs none(Class<?> klass) {
   }
 
   /**
-   * Expect a logging event at the trace level with the given message.
+   * Verify a logging event at the trace level with the given message.
    *
    * @param substring The message to match against.
    */
-  public void expectTrace(String substring) {
-    expect(Level.FINEST, substring);
+  public void verifyTrace(String substring) {
+    verify(Level.FINEST, substring);
   }
 
   /**
-   * Expect a logging event at the trace level with the given message and throwable.
+   * Verify a logging event at the trace level with the given message and throwable.
    *
    * @param substring The message to match against.
    * @param t The throwable to match against.
    */
-  public void expectTrace(String substring, Throwable t) {
-    expect(Level.FINEST, substring, t);
+  public void verifyTrace(String substring, Throwable t) {
+    verify(Level.FINEST, substring, t);
   }
 
   /**
-   * Expect a logging event at the debug level with the given message.
+   * Verify a logging event at the debug level with the given message.
    *
    * @param substring The message to match against.
    */
-  public void expectDebug(String substring) {
-    expect(Level.FINE, substring);
+  public void verifyDebug(String substring) {
+    verify(Level.FINE, substring);
   }
 
   /**
-   * Expect a logging event at the debug level with the given message and throwable.
+   * Verify a logging event at the debug level with the given message and throwable.
    *
    * @param message The message to match against.
    * @param t The throwable to match against.
    */
-  public void expectDebug(String message, Throwable t) {
-    expect(Level.FINE, message, t);
+  public void verifyDebug(String message, Throwable t) {
+    verify(Level.FINE, message, t);
   }
 
   /**
-   * Expect a logging event at the info level with the given message.
+   * Verify a logging event at the info level with the given message.
+   *
    * @param substring The message to match against.
    */
-  public void expectInfo(String substring) {
-    expect(Level.INFO, substring);
+  public void verifyInfo(String substring) {
+    verify(Level.INFO, substring);
   }
 
   /**
-   * Expect a logging event at the info level with the given message and throwable.
+   * Verify a logging event at the info level with the given message and throwable.
    *
    * @param message The message to match against.
    * @param t The throwable to match against.
    */
-  public void expectInfo(String message, Throwable t) {
-    expect(Level.INFO, message, t);
+  public void verifyInfo(String message, Throwable t) {
+    verify(Level.INFO, message, t);
+  }
+
+  /**
+   * Verify a logging event at the warn level with the given message.
+   *
+   * @param substring The message to match against.
+   */
+  public void verifyWarn(String substring) {
+    verify(Level.WARNING, substring);
+  }
+
+  /**
+   * Verify a logging event at the warn level with the given message and throwable.
+   *
+   * @param substring The message to match against.
+   * @param t The throwable to match against.
+   */
+  public void verifyWarn(String substring, Throwable t) {
+    verify(Level.WARNING, substring, t);
   }
 
   /**
-   * Expect a logging event at the warn level with the given message.
+   * Verify a logging event at the error level with the given message.
    *
    * @param substring The message to match against.
    */
-  public void expectWarn(String substring) {
-    expect(Level.WARNING, substring);
+  public void verifyError(String substring) {
+    verify(Level.SEVERE, substring);
   }
 
   /**
-   * Expect a logging event at the warn level with the given message and throwable.
+   * Verify a logging event at the error level with the given message and throwable.
    *
    * @param substring The message to match against.
    * @param t The throwable to match against.
    */
-  public void expectWarn(String substring, Throwable t) {
-    expect(Level.WARNING, substring, t);
+  public void verifyError(String substring, Throwable t) {
+    verify(Level.SEVERE, substring, t);
   }
 
   /**
-   * Expect a logging event at the error level with the given message.
+   * Verify there are no logging events with messages containing the given substring.
    *
    * @param substring The message to match against.
    */
-  public void expectError(String substring) {
-    expect(Level.SEVERE, substring);
+  public void verifyNotLogged(String substring) {
+    verifyNotLogged(matcher(substring));
   }
 
   /**
-   * Expect a logging event at the error level with the given message and throwable.
+   * Verify there is no logging event at the error level with the given message and throwable.
    *
    * @param substring The message to match against.
    * @param t The throwable to match against.
    */
-  public void expectError(String substring, Throwable t) {
-    expect(Level.SEVERE, substring, t);
+  public void verifyNoError(String substring, Throwable t) {
+    verifyNo(Level.SEVERE, substring, t);
+  }
+
+  private void verify(final Level level, final String substring) {
+    verifyLogged(matcher(level, substring));
   }
 
-  private void expect(final Level level, final String substring) {
-    expectations.add(new TypeSafeMatcher<LogRecord>() {
+  private TypeSafeMatcher<LogRecord> matcher(final String substring) {
+    return new TypeSafeMatcher<LogRecord>() {
+      @Override
+      public void describeTo(Description description) {
+        description.appendText(String.format("log message containing message [%s]", substring));
+      }
+
+      @Override
+      protected boolean matchesSafely(LogRecord item) {
+        return item.getMessage().contains(substring);
+      }
+    };
+  }
+
+  private TypeSafeMatcher<LogRecord> matcher(final Level level, final String substring) {
+    return new TypeSafeMatcher<LogRecord>() {
       @Override
       public void describeTo(Description description) {
         description.appendText(String.format(
-            "Expected log message of level [%s] containing message [%s]", level, substring));
+            "log message of level [%s] containing message [%s]", level, substring));
       }
 
       @Override
@@ -159,15 +197,24 @@ protected boolean matchesSafely(LogRecord item) {
         return level.equals(item.getLevel())
             && item.getMessage().contains(substring);
       }
-    });
+    };
+  }
+
+  private void verify(final Level level, final String substring, final Throwable throwable) {
+    verifyLogged(matcher(level, substring, throwable));
   }
 
-  private void expect(final Level level, final String substring, final Throwable throwable) {
-    expectations.add(new TypeSafeMatcher<LogRecord>() {
+  private void verifyNo(final Level level, final String substring, final Throwable throwable) {
+    verifyNotLogged(matcher(level, substring, throwable));
+  }
+
+  private TypeSafeMatcher<LogRecord> matcher(
+      final Level level, final String substring, final Throwable throwable) {
+    return new TypeSafeMatcher<LogRecord>() {
       @Override
       public void describeTo(Description description) {
         description.appendText(String.format(
-            "Expected log message of level [%s] containg message [%s] with exception [%s] "
+            "log message of level [%s] containg message [%s] with exception [%s] "
             + "containing message [%s]",
             level, substring, throwable.getClass(), throwable.getMessage()));
       }
@@ -179,7 +226,26 @@ protected boolean matchesSafely(LogRecord item) {
             && item.getThrown().getClass().equals(throwable.getClass())
             && item.getThrown().getMessage().contains(throwable.getMessage());
       }
-    });
+    };
+  }
+
+  private void verifyLogged(Matcher<LogRecord> matcher) {
+    for (LogRecord record : logSaver.getLogs()) {
+      if (matcher.matches(record)) {
+        return;
+      }
+    }
+
+    fail(String.format("Missing match for [%s]", matcher));
+  }
+
+  private void verifyNotLogged(Matcher<LogRecord> matcher) {
+    // Don't use Matchers.everyItem(Matchers.not(matcher)) because it doesn't format the logRecord
+    for (LogRecord record : logSaver.getLogs()) {
+      if (matcher.matches(record)) {
+        fail(String.format("Unexpected match of [%s]: [%s]", matcher, logFormatter.format(record)));
+      }
+    }
   }
 
   @Override
@@ -193,30 +259,16 @@ protected void before() throws Throwable {
   protected void after() {
     log.removeHandler(logSaver);
     log.setLevel(previousLevel);
-    Collection<Matcher<LogRecord>> missingExpecations = Lists.newArrayList();
-    FOUND: for (Matcher<LogRecord> expectation : expectations) {
-      for (LogRecord log : logSaver.getLogs()) {
-        if (expectation.matches(log)) {
-          continue FOUND;
-        }
-      }
-      missingExpecations.add(expectation);
-    }
-
-    if (!missingExpecations.isEmpty()) {
-      fail(String.format("Missed logging expectations: %s", missingExpecations));
-    }
   }
 
   private final Logger log;
   private final LogSaver logSaver;
-  private final Collection<Matcher<LogRecord>> expectations;
+  private final Formatter logFormatter = new SimpleFormatter();
   private Level previousLevel;
 
   private ExpectedLogs(Class<?> klass) {
     log = Logger.getLogger(klass.getName());
     logSaver = new LogSaver();
-    expectations = Lists.newArrayList();
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java
index e10d58fb4ff87..44b0b60039c95 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java
@@ -16,7 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.testing;
 
-import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -24,6 +24,8 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Random;
 import java.util.concurrent.Callable;
 import java.util.concurrent.CompletionService;
@@ -35,74 +37,89 @@
 public class ExpectedLogsTest {
   private static final Logger LOG = LoggerFactory.getLogger(ExpectedLogsTest.class);
 
-  private ExpectedLogs expectedLogs;
+  private Random random = new Random();
 
-  @Before
-  public void setUp() {
-    expectedLogs = ExpectedLogs.none(ExpectedLogsTest.class);
-  }
+  @Rule public ExpectedLogs expectedLogs = ExpectedLogs.none(ExpectedLogsTest.class);
 
   @Test
   public void testWhenNoExpectations() throws Throwable {
-    expectedLogs.before();
     LOG.error(generateRandomString());
-    expectedLogs.after();
   }
 
   @Test
-  public void testWhenExpectationIsMatchedFully() throws Throwable {
+  public void testVerifyWhenMatchedFully() throws Throwable {
     String expected = generateRandomString();
-    expectedLogs.before();
-    expectedLogs.expectError(expected);
+
     LOG.error(expected);
-    expectedLogs.after();
+    expectedLogs.verifyError(expected);
   }
 
   @Test
-  public void testWhenExpectationIsMatchedPartially() throws Throwable {
+  public void testVerifyWhenMatchedPartially() throws Throwable {
     String expected = generateRandomString();
-    expectedLogs.before();
-    expectedLogs.expectError(expected);
     LOG.error("Extra stuff around expected " + expected + " blah");
-    expectedLogs.after();
+    expectedLogs.verifyError(expected);
   }
 
   @Test
-  public void testWhenExpectationIsMatchedWithExceptionBeingLogged() throws Throwable {
+  public void testVerifyWhenMatchedWithExceptionBeingLogged() throws Throwable {
     String expected = generateRandomString();
-    expectedLogs.before();
-    expectedLogs.expectError(expected);
     LOG.error(expected, new IOException("Fake Exception"));
-    expectedLogs.after();
+    expectedLogs.verifyError(expected);
+  }
+
+  @Test(expected = AssertionError.class)
+  public void testVerifyWhenNotMatched() throws Throwable {
+    String expected = generateRandomString();
+
+    expectedLogs.verifyError(expected);
+  }
+
+  @Test(expected = AssertionError.class)
+  public void testVerifyNotLoggedWhenMatchedFully() throws Throwable {
+    String expected = generateRandomString();
+
+    LOG.error(expected);
+    expectedLogs.verifyNotLogged(expected);
   }
 
   @Test(expected = AssertionError.class)
-  public void testWhenExpectationIsNotMatched() throws Throwable {
+  public void testVerifyNotLoggedWhenMatchedPartially() throws Throwable {
     String expected = generateRandomString();
-    expectedLogs.before();
-    expectedLogs.expectError(expected);
-    expectedLogs.after();
+    LOG.error("Extra stuff around expected " + expected + " blah");
+    expectedLogs.verifyNotLogged(expected);
+  }
+
+  @Test(expected = AssertionError.class)
+  public void testVerifyNotLoggedWhenMatchedWithException() throws Throwable {
+    String expected = generateRandomString();
+    LOG.error(expected, new IOException("Fake Exception"));
+    expectedLogs.verifyNotLogged(expected);
+  }
+
+  @Test
+  public void testVerifyNotLoggedWhenNotMatched() throws Throwable {
+    String expected = generateRandomString();
+    expectedLogs.verifyNotLogged(expected);
   }
 
   @Test
   public void testLogCaptureOccursAtLowestLogLevel() throws Throwable {
     String expected = generateRandomString();
-    expectedLogs.before();
-    expectedLogs.expectTrace(expected);
     LOG.trace(expected);
-    expectedLogs.after();
+    expectedLogs.verifyTrace(expected);
   }
 
   @Test
   public void testThreadSafetyOfLogSaver() throws Throwable {
-    expectedLogs.before();
-
     CompletionService<Void> completionService =
         new ExecutorCompletionService<>(Executors.newCachedThreadPool());
     final long scheduledLogTime = System.currentTimeMillis() + 500L;
+
+    List<String> expectedStrings = new ArrayList<>();
     for (int i = 0; i < 100; i++) {
       final String expected = generateRandomString();
-      expectedLogs.expectTrace(expected);
+      expectedStrings.add(expected);
       completionService.submit(new Callable<Void>() {
         @Override
         public Void call() throws Exception {
@@ -119,12 +136,13 @@ public Void call() throws Exception {
       completionService.take();
     }
 
-    expectedLogs.after();
+    for (String expected : expectedStrings) {
+      expectedLogs.verifyTrace(expected);
+    }
   }
 
   // Generates a random fake error message.
-  private static String generateRandomString() {
-    Random random = new Random();
+  private String generateRandomString() {
     return "Fake error message: " + random.nextInt();
   }
 }

From 8ddd4d8fcfac95836cd7570e4a7f14cffc68ed52 Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Mon, 1 Jun 2015 13:29:32 -0700
Subject: [PATCH 0603/1541] Introduces CompressedSource

A CompressedSource is a compressed FileBasedSource. CompressedSource
objects wrap a FileBasedSource and delegate reading of the decompressed
file to the underlying FileBasedSource.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94941217
---
 .../dataflow/sdk/io/CompressedSource.java     | 277 ++++++++++++++++++
 .../dataflow/sdk/io/CompressedSourceTest.java | 255 ++++++++++++++++
 2 files changed, 532 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
new file mode 100644
index 0000000000000..bb9b24a67c5c0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
@@ -0,0 +1,277 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.io.FileBasedSource.FileBasedReader;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.common.base.Preconditions;
+
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.nio.channels.Channels;
+import java.nio.channels.ReadableByteChannel;
+import java.util.NoSuchElementException;
+
+/**
+ * A Source that reads from compressed files. A {@code CompressedSources} wraps a delegate
+ * {@link FileBasedSource} that is able to read the decompressed file format.
+ *
+ * <p>For example, use the following to read from a gzip-compressed XML file:
+ *
+ * {@code
+ * XmlSource mySource = XmlSource.from(...);
+ * PCollection<T> collection = p.apply(CompressedSource.readFromSource(mySource,
+ * CompressedSource.CompressionMode.GZIP);}
+ *
+ * Or, alternatively:
+ * XmlSource mySource = XmlSource.from(...);
+ * {@code PCollection<T> collection = p.apply(Read.from(CompressedSource.from(mySource,
+ * CompressedSource.CompressionMode.GZIP)));}
+ *
+ * <p>Default compression modes are {@link CompressionMode#GZIP} and {@link CompressionMode#BZIP2}.
+ * User-defined compression types are supported by implementing {@link DecompressingChannelFactory}.
+ *
+ * @param <T> The type to read from the compressed file.
+ */
+@Experimental(Experimental.Kind.SOURCE_SINK)
+public class CompressedSource<T> extends FileBasedSource<T> {
+  private static final long serialVersionUID = 0;
+
+  /**
+   * Factory interface for creating channels that decompress the content of an underlying channel.
+   *
+   * <p>TODO: Refactor decompressing channel/stream creation and default instances to util classes.
+   */
+  public static interface DecompressingChannelFactory extends Serializable {
+    /**
+     * Given a channel, create a channel that decompresses the content read from the channel.
+     * @throws IOException
+     */
+    public ReadableByteChannel createDecompressingChannel(ReadableByteChannel channel)
+        throws IOException;
+  }
+
+  /**
+   * Default compression types supported by the {@code CompressedSource}.
+   */
+  public enum CompressionMode implements DecompressingChannelFactory {
+    GZIP {
+      @Override
+      public ReadableByteChannel createDecompressingChannel(ReadableByteChannel channel)
+          throws IOException {
+        return Channels.newChannel(new GzipCompressorInputStream(Channels.newInputStream(channel)));
+      }
+    },
+    BZIP2 {
+      @Override
+      public ReadableByteChannel createDecompressingChannel(ReadableByteChannel channel)
+          throws IOException {
+        return Channels.newChannel(
+            new BZip2CompressorInputStream(Channels.newInputStream(channel)));
+      }
+    };
+
+    @Override
+    public abstract ReadableByteChannel createDecompressingChannel(ReadableByteChannel channel)
+        throws IOException;
+  }
+
+  private final FileBasedSource<T> sourceDelegate;
+  private final DecompressingChannelFactory channelFactory;
+
+  /**
+   * Creates a {@link Read} transform that reads from a {@code CompressedSource} that reads from an
+   * underlying {@link FileBasedSource} after decompressing it with a {@link
+   * DecompressingChannelFactory}.
+   */
+  public static <T> Read.Bound<T> readFromSource(
+      FileBasedSource<T> sourceDelegate, DecompressingChannelFactory channelFactory) {
+    return Read.from(new CompressedSource<>(sourceDelegate, channelFactory));
+  }
+
+  /**
+   * Creates a {@code CompressedSource} from an underlying {@code FileBasedSource} that must be
+   * further configured with {@link CompressedSource#withDecompression}.
+   */
+  public static <T> CompressedSource<T> from(FileBasedSource<T> sourceDelegate) {
+    return new CompressedSource<>(sourceDelegate, null);
+  }
+
+  /**
+   * Return a {@code CompressedSource} that is like this one but will decompress its underlying file
+   * with the given {@link DecompressingChannelFactory}.
+   */
+  public CompressedSource<T> withDecompression(DecompressingChannelFactory channelFactory) {
+    return new CompressedSource<>(this.sourceDelegate, channelFactory);
+  }
+
+  /**
+   * Creates a {@code CompressedSource} from a delegate file based source and a decompressing
+   * channel factory.
+   */
+  private CompressedSource(
+      FileBasedSource<T> sourceDelegate, DecompressingChannelFactory channelFactory) {
+    super(sourceDelegate.getFileOrPatternSpec(), Long.MAX_VALUE);
+    this.sourceDelegate = sourceDelegate;
+    this.channelFactory = channelFactory;
+  }
+
+  /**
+   * Creates a {@code CompressedSource} for an individual file. Used by {@link
+   * CompressedSource#createForSubrangeOfFile}.
+   */
+  private CompressedSource(FileBasedSource<T> sourceDelegate,
+      DecompressingChannelFactory channelFactory, String filePatternOrSpec, long minBundleSize,
+      long startOffset, long endOffset) {
+    super(filePatternOrSpec, minBundleSize, startOffset, endOffset);
+    Preconditions.checkArgument(
+        startOffset == 0,
+        "CompressedSources must start reading at offset 0. Requested offset: " + startOffset);
+    this.sourceDelegate = sourceDelegate;
+    this.channelFactory = channelFactory;
+  }
+
+  /**
+   * Validates that the delegate source is a valid source and that the channel factory is not null.
+   */
+  @Override
+  public void validate() {
+    super.validate();
+    Preconditions.checkNotNull(sourceDelegate);
+    sourceDelegate.validate();
+    Preconditions.checkNotNull(channelFactory);
+  }
+
+  /**
+   * Creates a {@code CompressedSource} for a subrange of a file. Called by superclass to create a
+   * source for a single file.
+   */
+  @Override
+  public CompressedSource<T> createForSubrangeOfFile(String fileName, long start, long end) {
+    return new CompressedSource<>(sourceDelegate.createForSubrangeOfFile(fileName, start, end),
+        channelFactory, fileName, Long.MAX_VALUE, start, end);
+  }
+
+  /**
+   * Determines whether a single file represented by this source is splittable. Returns false:
+   * compressed sources are not splittable.
+   */
+  @Override
+  protected final boolean isSplittable() throws Exception {
+    return false;
+  }
+
+  /**
+   * Creates a {@code CompressedReader} to read a single file.
+   *
+   * <p>Uses the delegate source to create a single file reader for the delegate source.
+   */
+  @Override
+  public final CompressedReader<T> createSingleFileReader(
+      PipelineOptions options, ExecutionContext executionContext) {
+    return new CompressedReader<T>(
+        this, sourceDelegate.createSingleFileReader(options, executionContext));
+  }
+
+  /**
+   * Returns whether the delegate source produces sorted keys.
+   */
+  @Override
+  public final boolean producesSortedKeys(PipelineOptions options) throws Exception {
+    return sourceDelegate.producesSortedKeys(options);
+  }
+
+  /**
+   * Returns the delegate source's default output coder.
+   */
+  @Override
+  public final Coder<T> getDefaultOutputCoder() {
+    return sourceDelegate.getDefaultOutputCoder();
+  }
+
+  public final DecompressingChannelFactory getChannelFactory() {
+    return channelFactory;
+  }
+
+  /**
+   * Reader for a {@link CompressedSource}. Decompresses its input and uses a delegate
+   * reader to read elements from the decompressed input.
+   * @param <T> The type of records read from the source.
+   */
+  public static class CompressedReader<T> extends FileBasedReader<T> {
+    static final long serialVersionUID = 0;
+
+    private final FileBasedReader<T> readerDelegate;
+    private final CompressedSource<T> source;
+
+    /**
+     * Create a {@code CompressedReader} from a {@code CompressedSource} and delegate reader.
+     */
+    public CompressedReader(CompressedSource<T> source, FileBasedReader<T> readerDelegate) {
+      super(source);
+      this.source = source;
+      this.readerDelegate = readerDelegate;
+    }
+
+    /**
+     * Gets the current record from the delegate reader.
+     */
+    @Override
+    public T getCurrent() throws NoSuchElementException {
+      return readerDelegate.getCurrent();
+    }
+
+    /**
+     * Returns false; compressed sources cannot be split.
+     */
+    @Override
+    protected final boolean isAtSplitPoint() {
+      return false;
+    }
+
+    /**
+     * Creates a decompressing channel from the input channel and passes it to its delegate reader's
+     * {@link FileBasedReader#startReading(ReadableByteChannel)}.
+     */
+    @Override
+    protected final void startReading(ReadableByteChannel channel) throws IOException {
+      readerDelegate.startReading(source.getChannelFactory().createDecompressingChannel(channel));
+    }
+
+    /**
+     * Reads the next record via the delegate reader.
+     */
+    @Override
+    protected final boolean readNextRecord() throws IOException {
+      return readerDelegate.readNextRecord();
+    }
+
+    /**
+     * Returns the delegate reader's current offset in the decompressed input.
+     */
+    @Override
+    protected final long getCurrentOffset() {
+      return readerDelegate.getCurrentOffset();
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
new file mode 100644
index 0000000000000..b74b2d2e2b617
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
+import com.google.cloud.dataflow.sdk.io.CompressedSource.CompressionMode;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.primitives.Bytes;
+
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.channels.ReadableByteChannel;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * Tests for CompressedSource.
+ */
+@RunWith(JUnit4.class)
+public class CompressedSourceTest {
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  /**
+   * Test reading nonempty input with gzip.
+   */
+  @Test
+  public void testReadGzip() throws Exception {
+    byte[] input = generateInput(5000);
+    runReadTest(input, CompressionMode.GZIP);
+  }
+
+  /**
+   * Test reading nonempty input with bzip2.
+   */
+  @Test
+  public void testReadBzip2() throws Exception {
+    byte[] input = generateInput(5000);
+    runReadTest(input, CompressionMode.BZIP2);
+  }
+
+  /**
+   * Test reading empty input with gzip.
+   */
+  @Test
+  public void testEmptyReadGzip() throws Exception {
+    byte[] input = generateInput(0);
+    runReadTest(input, CompressionMode.GZIP);
+  }
+
+  /**
+   * Test reading empty input with bzip2.
+   */
+  @Test
+  public void testCompressedReadBzip2() throws Exception {
+    byte[] input = generateInput(0);
+    runReadTest(input, CompressionMode.BZIP2);
+  }
+
+  /**
+   * Test reading multiple files.
+   */
+  @Test
+  public void testCompressedReadMultipleFiles() throws Exception {
+    int numFiles = 10;
+    String baseName = "test_input-";
+    String filePattern = new File(tmpFolder.getRoot().toString(), baseName + "*").toString();
+    List<Byte> expected = new ArrayList<>();
+
+    for (int i = 0; i < numFiles; i++) {
+      byte[] generated = generateInput(1000);
+      File tmpFile = tmpFolder.newFile(baseName + i);
+      writeFile(tmpFile, generated, CompressionMode.GZIP);
+      expected.addAll(Bytes.asList(generated));
+    }
+
+    Pipeline p = TestPipeline.create();
+
+    CompressedSource<Byte> source =
+        CompressedSource.from(new ByteSource(filePattern, 1))
+            .withDecompression(CompressionMode.GZIP);
+    PCollection<Byte> output = p.apply(Read.from(source));
+
+    DataflowAssert.that(output).containsInAnyOrder(expected);
+    p.run();
+  }
+
+  /**
+   * Generate byte array of given size.
+   */
+  private byte[] generateInput(int size) {
+    byte[] buff = new byte[size];
+    for (int i = 0; i < size; i++) {
+      buff[i] = (byte) (i % Byte.MAX_VALUE);
+    }
+    return buff;
+  }
+
+  /**
+   * Get a compressing stream for a given compression mode.
+   */
+  private OutputStream getStreamForMode(CompressionMode mode, OutputStream stream)
+      throws IOException {
+    switch (mode) {
+      case GZIP:
+        return new GzipCompressorOutputStream(stream);
+      case BZIP2:
+        return new BZip2CompressorOutputStream(stream);
+      default:
+        throw new RuntimeException("Unexpected compression mode");
+    }
+  }
+
+  /**
+   * Writes a single output file.
+   */
+  private void writeFile(File file, byte[] input, CompressionMode mode) throws IOException {
+    OutputStream os = getStreamForMode(mode, new FileOutputStream(file));
+    os.write(input);
+    os.close();
+  }
+
+  /**
+   * Run a single read test, writing and reading back input with the given compression mode.
+   */
+  private void runReadTest(byte[] input, CompressionMode mode) throws IOException {
+    File tmpFile = tmpFolder.newFile();
+    writeFile(tmpFile, input, mode);
+
+    Pipeline p = TestPipeline.create();
+
+    CompressedSource<Byte> source =
+        CompressedSource.from(new ByteSource(tmpFile.toPath().toString(), 1))
+            .withDecompression(mode);
+    PCollection<Byte> output = p.apply(Read.from(source));
+
+    DataflowAssert.that(output).containsInAnyOrder(Bytes.asList(input));
+    p.run();
+  }
+
+  /**
+   * Dummy source for use in tests.
+   */
+  private static class ByteSource extends FileBasedSource<Byte> {
+    private static final long serialVersionUID = 0;
+
+    public ByteSource(String fileOrPatternSpec, long minBundleSize) {
+      super(fileOrPatternSpec, minBundleSize);
+    }
+
+    public ByteSource(String fileName, long minBundleSize, long startOffset, long endOffset) {
+      super(fileName, minBundleSize, startOffset, endOffset);
+    }
+
+    @Override
+    public FileBasedSource<Byte> createForSubrangeOfFile(String fileName, long start, long end) {
+      return new ByteSource(fileName, getMinBundleSize(), start, end);
+    }
+
+    @Override
+    public ByteReader createSingleFileReader(
+        PipelineOptions options, ExecutionContext executionContext) {
+      return new ByteReader(this);
+    }
+
+    @Override
+    public boolean producesSortedKeys(PipelineOptions options) throws Exception {
+      return false;
+    }
+
+    @Override
+    public Coder<Byte> getDefaultOutputCoder() {
+      return SerializableCoder.of(Byte.class);
+    }
+
+    private static class ByteReader extends FileBasedReader<Byte> {
+      private static final long serialVersionUID = 0;
+      ByteBuffer buff = ByteBuffer.allocate(1);
+      Byte current;
+      long offset = 0;
+      ReadableByteChannel channel;
+
+      public ByteReader(ByteSource source) {
+        super(source);
+      }
+
+      @Override
+      public Byte getCurrent() throws NoSuchElementException {
+        return current;
+      }
+
+      @Override
+      protected boolean isAtSplitPoint() {
+        return true;
+      }
+
+      @Override
+      protected void startReading(ReadableByteChannel channel) throws IOException {
+        this.channel = channel;
+      }
+
+      @Override
+      protected boolean readNextRecord() throws IOException {
+        buff.clear();
+        if (channel.read(buff) != 1) {
+          return false;
+        }
+        current = new Byte(buff.get(0));
+        offset += 1;
+        return true;
+      }
+
+      @Override
+      protected long getCurrentOffset() {
+        return offset;
+      }
+    }
+  }
+}

From 6e2e7b6738c6c8479bf26f7893770e5a08af0c31 Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Mon, 1 Jun 2015 16:13:17 -0700
Subject: [PATCH 0604/1541] Ensure at least one split is produced in
 DatastoreIO

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=94957508
---
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |   4 +-
 .../dataflow/sdk/io/DatastoreIOTest.java      | 118 ++++++++++++++++++
 2 files changed, 120 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 287e3e216e93d..7c0b1032c36b8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -293,13 +293,13 @@ public List<Source> splitIntoBundles(long desiredBundleSizeBytes, PipelineOption
       DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
       long numSplits;
       try {
-        numSplits = Math.max(
-            Math.round(((double) getEstimatedSizeBytes(options)) / desiredBundleSizeBytes), 1);
+        numSplits = Math.round(((double) getEstimatedSizeBytes(options)) / desiredBundleSizeBytes);
       } catch (Exception e) {
         LOG.warn("Estimated size unavailable, using number of workers", e);
         // Fallback in case estimated size is unavailable.
         numSplits = dataflowOptions.getNumWorkers();
       }
+      numSplits = Math.max(numSplits, 1);
       List<Query> splitQueries;
       if (mockSplitter == null) {
         splitQueries = DatastoreHelper.getQuerySplitter().getSplits(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
index bcfe6222c3a81..00c135f282bc8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
@@ -24,6 +24,7 @@
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
 import static org.mockito.Mockito.when;
 
 import com.google.api.services.datastore.DatastoreV1;
@@ -37,6 +38,7 @@
 import com.google.cloud.dataflow.sdk.coders.EntityCoder;
 import com.google.cloud.dataflow.sdk.io.DatastoreIO.DatastoreWriter;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
 import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
@@ -53,9 +55,11 @@
 import org.mockito.Mock;
 import org.mockito.MockitoAnnotations;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import java.util.NoSuchElementException;
 
 /**
  * Tests for DatastoreIO Read and Write transforms.
@@ -167,6 +171,120 @@ public void testQuerySplitWithMockSplitter() throws Exception {
     }
   }
 
+  @Test
+  public void testQuerySplitWithZeroSize() throws Exception {
+    String dataset = "mydataset";
+    DatastoreV1.KindExpression mykind =
+        DatastoreV1.KindExpression.newBuilder().setName("mykind").build();
+    Query query = Query.newBuilder().addKind(mykind).build();
+
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    options.setGcpCredential(new TestCredential());
+
+    List<Query> mockSplits = Lists.newArrayList(
+        Query.newBuilder()
+            .addKind(mykind)
+            .build());
+
+    QuerySplitter splitter = mock(QuerySplitter.class);
+    when(splitter.getSplits(any(Query.class), eq(1), any(Datastore.class))).thenReturn(mockSplits);
+
+    DatastoreIO.Source io =
+        DatastoreIO.read()
+            .withDataset(dataset)
+            .withQuery(query)
+            .withMockSplitter(splitter)
+            .withMockEstimateSizeBytes(0L);
+
+    List<DatastoreIO.Source> bundles = io.splitIntoBundles(1024, options);
+    assertEquals(1, bundles.size());
+    DatastoreIO.Source bundle = bundles.get(0);
+    Query bundleQuery = bundle.query;
+    assertEquals("mykind", bundleQuery.getKind(0).getName());
+  }
+
+  @Test
+  public void testQuerySplitSizeUnavailable() throws Exception {
+    String dataset = "mydataset";
+    DatastoreV1.KindExpression mykind =
+        DatastoreV1.KindExpression.newBuilder().setName("mykind").build();
+    Query query = Query.newBuilder().addKind(mykind).build();
+
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    options.setGcpCredential(new TestCredential());
+    options.setNumWorkers(2);
+
+    List<Query> mockSplits = new ArrayList<>();
+    for (int i = 0; i < 2; i++) {
+      mockSplits.add(
+          Query.newBuilder()
+              .addKind(mykind)
+              .setFilter(
+                  DatastoreHelper.makeFilter("foo", DatastoreV1.PropertyFilter.Operator.EQUAL,
+                      DatastoreV1.Value.newBuilder().setIntegerValue(i).build()))
+              .build());
+    }
+
+    QuerySplitter splitter = mock(QuerySplitter.class);
+    when(splitter.getSplits(any(Query.class), eq(2), any(Datastore.class))).thenReturn(mockSplits);
+
+    DatastoreIO.Source io =
+        DatastoreIO.read()
+            .withDataset(dataset)
+            .withQuery(query)
+            .withMockSplitter(splitter)
+            .withMockEstimateSizeBytes(8 * 1024L);
+
+    DatastoreIO.Source spiedIo = spy(io);
+    when(spiedIo.getEstimatedSizeBytes(any(PipelineOptions.class))).thenThrow(new IOException());
+
+    List<DatastoreIO.Source> bundles = spiedIo.splitIntoBundles(1024, options);
+    assertEquals(2, bundles.size());
+    for (int i = 0; i < 2; ++i) {
+      DatastoreIO.Source bundle = bundles.get(i);
+      Query bundleQuery = bundle.query;
+      assertEquals("mykind", bundleQuery.getKind(0).getName());
+      assertEquals(i, bundleQuery.getFilter().getPropertyFilter().getValue().getIntegerValue());
+    }
+  }
+
+  @Test
+  public void testQuerySplitNoWorkers() throws Exception {
+    String dataset = "mydataset";
+    DatastoreV1.KindExpression mykind =
+        DatastoreV1.KindExpression.newBuilder().setName("mykind").build();
+    Query query = Query.newBuilder().addKind(mykind).build();
+
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    options.setGcpCredential(new TestCredential());
+    options.setNumWorkers(0);
+
+    List<Query> mockSplits = Lists.newArrayList(Query.newBuilder().addKind(mykind).build());
+
+    QuerySplitter splitter = mock(QuerySplitter.class);
+    when(splitter.getSplits(any(Query.class), eq(1), any(Datastore.class))).thenReturn(mockSplits);
+
+    DatastoreIO.Source io =
+        DatastoreIO.read()
+            .withDataset(dataset)
+            .withQuery(query)
+            .withMockSplitter(splitter)
+            .withMockEstimateSizeBytes(8 * 1024L);
+
+    DatastoreIO.Source spiedIo = spy(io);
+    when(spiedIo.getEstimatedSizeBytes(any(PipelineOptions.class)))
+        .thenThrow(new NoSuchElementException());
+
+    List<DatastoreIO.Source> bundles = spiedIo.splitIntoBundles(1024, options);
+    assertEquals(1, bundles.size());
+    DatastoreIO.Source bundle = bundles.get(0);
+    Query bundleQuery = bundle.query;
+    assertEquals("mykind", bundleQuery.getKind(0).getName());
+  }
+
   @Test
   public void testQuerySplitWithSmallDataset() throws Exception {
     String dataset = "mydataset";

From 12c18e7e15edd5dfb40f32ed7ce90ca8e99e5725 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 2 Jun 2015 11:22:11 -0700
Subject: [PATCH 0605/1541] Add option to treat non-stable-unique name as an
 error

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95025780
---
 .../google/cloud/dataflow/sdk/Pipeline.java   | 17 +++++++-
 .../dataflow/sdk/options/PipelineOptions.java | 16 ++++++++
 .../cloud/dataflow/sdk/PipelineTest.java      | 40 +++++++++++++++++++
 .../DataflowPipelineTranslatorTest.java       |  1 +
 4 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index e625ddfbf2481..bc2b00fdee906 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -274,8 +274,21 @@ OutputT applyInternal(InputT input,
     boolean nameIsUnique = fullName.equals(buildName(namePrefix, name));
 
     if (!nameIsUnique) {
-      LOG.warn("Transform {} does not have a stable unique name.  "
-          + "In the future, this will prevent reloading streaming pipelines", fullName);
+      switch (getOptions().getStableUniqueNames()) {
+        case OFF:
+          break;
+        case WARNING:
+          LOG.warn("Transform {} does not have a stable unique name. "
+              + "This will prevent reloading of pipelines.", fullName);
+          break;
+        case ERROR:
+          throw new IllegalStateException(
+              "Transform " + fullName + " does not have a stable unique name. "
+              + "This will prevent reloading of pipelines.");
+        default:
+          throw new IllegalArgumentException(
+              "Unrecognized value for stable unique names: " + getOptions().getStableUniqueNames());
+      }
     }
 
     TransformTreeNode child =
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
index 659c14ae8176b..363ce35ff0f48 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
@@ -220,4 +220,20 @@ public interface PipelineOptions {
   @Default.Class(DirectPipelineRunner.class)
   Class<? extends PipelineRunner<?>> getRunner();
   void setRunner(Class<? extends PipelineRunner<?>> kls);
+
+  /**
+   * Enumeration of the possible states for a given check.
+   */
+  public static enum CheckEnabled {
+    OFF,
+    WARNING,
+    ERROR;
+  }
+
+  @Validation.Required
+  @Description("Whether to check for stable unique names on each stage. This is necessary to "
+      + "support reloading streaming pipelines.")
+  @Default.Enum("WARNING")
+  CheckEnabled getStableUniqueNames();
+  void setStableUniqueNames(CheckEnabled enabled);
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
index 89b4f31e59e07..4ddf9bf75f807 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
@@ -23,10 +23,12 @@
 import static org.junit.Assert.fail;
 
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
@@ -39,8 +41,10 @@
 import com.google.common.collect.ImmutableList;
 
 import org.junit.Assert;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -50,6 +54,9 @@
 @RunWith(JUnit4.class)
 public class PipelineTest {
 
+  @Rule public ExpectedLogs logged = ExpectedLogs.none(Pipeline.class);
+  @Rule public ExpectedException thrown = ExpectedException.none();
+
   static class PipelineWrapper extends Pipeline {
     protected PipelineWrapper(PipelineRunner<?> runner) {
       super(runner, PipelineOptionsFactory.create());
@@ -155,4 +162,37 @@ public void testToString() {
     Pipeline pipeline = Pipeline.create(options);
     assertEquals("Pipeline#" + pipeline.hashCode(), pipeline.toString());
   }
+
+  @Test
+  public void testStableUniqueNameOff() {
+    Pipeline p = TestPipeline.create();
+    p.getOptions().setStableUniqueNames(CheckEnabled.OFF);
+
+    p.apply(Create.of(5, 6, 7));
+    p.apply(Create.of(5, 6, 7));
+
+    logged.verifyNotLogged("does not have a stable unique name.");
+  }
+
+  @Test
+  public void testStableUniqueNameWarning() {
+    Pipeline p = TestPipeline.create();
+    p.getOptions().setStableUniqueNames(CheckEnabled.WARNING);
+
+    p.apply(Create.of(5, 6, 7));
+    p.apply(Create.of(5, 6, 7));
+
+    logged.verifyWarn("does not have a stable unique name.");
+  }
+
+  @Test
+  public void testStableUniqueNameError() {
+    Pipeline p = TestPipeline.create();
+    p.getOptions().setStableUniqueNames(CheckEnabled.ERROR);
+
+    p.apply(Create.of(5, 6, 7));
+
+    thrown.expectMessage("does not have a stable unique name.");
+    p.apply(Create.of(5, 6, 7));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 18f05462d1540..7acc4716536d1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -165,6 +165,7 @@ public void testSettingOfSdkPipelineOptions() throws IOException {
           .put("tempLocation", "gs://somebucket/some/path")
           .put("filesToStage", ImmutableList.of())
           .put("stagingLocation", "gs://somebucket/some/path/staging")
+          .put("stableUniqueNames", "WARNING")
           .build()),
         job.getEnvironment().getSdkPipelineOptions());
   }

From 24718beeb8552bc5c5a7e7c92c2346399b4fb7ce Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 2 Jun 2015 12:20:37 -0700
Subject: [PATCH 0606/1541] Allow overriding transform name in apply

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95031887
---
 .../google/cloud/dataflow/sdk/Pipeline.java   | 46 ++++++++++++++-----
 .../cloud/dataflow/sdk/values/PBegin.java     | 15 +++++-
 .../dataflow/sdk/values/PCollection.java      | 15 +++++-
 .../dataflow/sdk/values/PCollectionList.java  | 14 +++++-
 4 files changed, 72 insertions(+), 18 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index bc2b00fdee906..ef55a6c1a309f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -129,17 +129,28 @@ public PBegin begin() {
   }
 
   /**
-   * Starts using this pipeline with a root PTransform such as
-   * {@code TextIO.Read} or
-   * {@link com.google.cloud.dataflow.sdk.transforms.Create}.
-   *
-   * <p> Alias for {@code begin().apply(root)}.
+   * Like {@link #apply(String, PTransform)} but defaulting to the name
+   * of the {@code PTransform}.
    */
   public <OutputT extends POutput> OutputT apply(
       PTransform<? super PBegin, OutputT> root) {
     return begin().apply(root);
   }
 
+  /**
+   * Starts using this pipeline with a root {@code PTransform} such as
+   * {@code TextIO.READ} or {@link com.google.cloud.dataflow.sdk.transforms.Create}.
+   * This specific call to {@code apply} is identified by the provided {@code name}.
+   * This name is used in various places, including the monitoring UI, logging,
+   * and to stably identify this application node in the job graph.
+   *
+   * <p> Alias for {@code begin().apply(name, root)}.
+   */
+  public <OutputT extends POutput> OutputT apply(
+      String name, PTransform<? super PBegin, OutputT> root) {
+    return begin().apply(name, root);
+  }
+
   /**
    * Runs the Pipeline.
    */
@@ -214,15 +225,27 @@ public void traverseTopologically(PipelineVisitor visitor) {
   }
 
   /**
-   * Applies the given {@link PTransform} to the given {@code InputT},
-   * and returns its {@code OutputT}.
+   * Like {@link #applyTransform(String, PInput, PTransform)} but defaulting to the name
+   * provided by the {@link PTransform}.
+   */
+  public static <InputT extends PInput, OutputT extends POutput>
+  OutputT applyTransform(InputT input,
+      PTransform<? super InputT, OutputT> transform) {
+    return input.getPipeline().applyInternal(transform.getName(), input, transform);
+  }
+
+  /**
+   * Applies the given {@code PTransform} to this input {@code InputT} and returns
+   * its {@code OutputT}. This uses {@code name} to identify this specific application
+   * of the transform. This name is used in various places, including the monitoring UI,
+   * logging, and to stably identify this application node in the job graph.
    *
    * <p> Called by {@link PInput} subclasses in their {@code apply} methods.
    */
   public static <InputT extends PInput, OutputT extends POutput>
-  OutputT applyTransform(InputT input,
-                        PTransform<? super InputT, OutputT> transform) {
-    return input.getPipeline().applyInternal(input, transform);
+  OutputT applyTransform(String name, InputT input,
+      PTransform<? super InputT, OutputT> transform) {
+    return input.getPipeline().applyInternal(name, input, transform);
   }
 
   /////////////////////////////////////////////////////////////////////////////
@@ -261,14 +284,13 @@ public String toString() {
    * @see Pipeline#apply
    */
   private <InputT extends PInput, OutputT extends POutput>
-  OutputT applyInternal(InputT input,
+  OutputT applyInternal(String name, InputT input,
       PTransform<? super InputT, OutputT> transform) {
     input.finishSpecifying();
 
     TransformTreeNode parent = transforms.getCurrent();
     String namePrefix = parent.getFullName();
 
-    String name = transform.getName();
     String fullName = uniquifyInternal(namePrefix, name);
 
     boolean nameIsUnique = fullName.equals(buildName(namePrefix, name));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
index cc669b9917c74..57cbdf106c587 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
@@ -40,14 +40,25 @@ public static PBegin in(Pipeline pipeline) {
   }
 
   /**
-   * Applies the given {@link PTransform} to this input {@code PBegin}, and
-   * returns the {@link PTransform}'s Output.
+   * Like {@link #apply(String, PTransform)} but defaulting to the name
+   * of the {@code PTransform}.
    */
   public <OutputT extends POutput> OutputT apply(
       PTransform<? super PBegin, OutputT> t) {
     return Pipeline.applyTransform(this, t);
   }
 
+  /**
+   * Applies the given {@code PTransform} to this input {@code PBegin},
+   * using {@code name} to identify this specific application of the transform.
+   * This name is used in various places, including the monitoring UI, logging,
+   * and to stably identify this application node in the job graph.
+   */
+  public <OutputT extends POutput> OutputT apply(
+      String name, PTransform<? super PBegin, OutputT> t) {
+    return Pipeline.applyTransform(name, this, t);
+  }
+
   @Override
   public Pipeline getPipeline() {
     return pipeline;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
index 376be39119db6..07a60da9006f8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
@@ -152,13 +152,24 @@ public PCollection<T> setCoder(Coder<T> coder) {
   }
 
   /**
-   * Applies the given PTransform to this input PCollection, and
-   * returns the PTransform's Output.
+   * Like {@link IsBounded#apply(String, PTransform)} but defaulting to the name
+   * of the {@link PTransform}.
    */
   public <OutputT extends POutput> OutputT apply(PTransform<? super PCollection<T>, OutputT> t) {
     return Pipeline.applyTransform(this, t);
   }
 
+  /**
+   * Applies the given {@code PTransform} to this input {@code PCollection<T>},
+   * using {@code name} to identify this specific application of the transform.
+   * This name is used in various places, including the monitoring UI, logging,
+   * and to stably identify this application node in the job graph.
+   */
+  public <OutputT extends POutput> OutputT apply(
+      String name, PTransform<? super PCollection<T>, OutputT> t) {
+    return Pipeline.applyTransform(name, this, t);
+  }
+
   /**
    * Returns the {@link WindowingStrategy} of this {@code PCollection}.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
index 9c36f1654d8f5..8174269df156e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
@@ -164,14 +164,24 @@ public List<PCollection<T>> getAll() {
   }
 
   /**
-   * Applies the given PTransform to this input {@code PCollectionList<T>},
-   * and returns the PTransform's Output.
+   * Like {@link #apply(String, PTransform)} but defaulting to the name
+   * of the {@code PTransform}.
    */
   public <OutputT extends POutput> OutputT apply(
       PTransform<PCollectionList<T>, OutputT> t) {
     return Pipeline.applyTransform(this, t);
   }
 
+  /**
+   * Applies the given {@code PTransform} to this input {@code PCollectionList<T>},
+   * using {@code name} to identify this specific application of the transform.
+   * This name is used in various places, including the monitoring UI, logging,
+   * and to stably identify this application node in the job graph.
+   */
+  public <OutputT extends POutput> OutputT apply(
+      String name, PTransform<PCollectionList<T>, OutputT> t) {
+    return Pipeline.applyTransform(name, this, t);
+  }
 
   /////////////////////////////////////////////////////////////////////////////
   // Internal details below here.

From d9243e251dbb8b028d42a8551738a6f341e4e530 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Tue, 2 Jun 2015 14:15:47 -0700
Subject: [PATCH 0607/1541] Adds ConcatReader - a Reader that may encapsulate
 one or more Dataflow Readers.

Adds ConcatReaderFactory that contains logic for decoding a ConcatReader description encoded in a CloudObject.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95043588
---
 .../sdk/runners/worker/ConcatReader.java      | 129 ++++++++
 .../runners/worker/ConcatReaderFactory.java   | 116 +++++++
 .../sdk/runners/worker/ReaderFactory.java     |  11 +-
 .../dataflow/sdk/util/PropertyNames.java      |   2 +
 .../cloud/dataflow/sdk/util/Structs.java      |  33 ++
 .../worker/ConcatReaderFactoryTest.java       | 126 +++++++
 .../sdk/runners/worker/ConcatReaderTest.java  | 311 ++++++++++++++++++
 .../sdk/runners/worker/ReaderTestUtils.java   |  20 ++
 .../cloud/dataflow/sdk/util/StructsTest.java  |  18 +
 9 files changed, 764 insertions(+), 2 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
new file mode 100644
index 0000000000000..8321dc312033b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
@@ -0,0 +1,129 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.api.client.util.Preconditions;
+import com.google.api.services.dataflow.model.Source;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * A {@link Reader} that reads elements from a given set of encoded {@code Source}s. Creates {@code
+ * Reader}s for sources lazily, i.e. only when elements from the particular {@code Reader} are about
+ * to be read.
+ *
+ * <p> This class does does not cache {@code Reader}s and creates a new {@code Reader} every time a
+ * new {@code ReaderIterator} has to be created. Because of this, multiple iterators created using
+ * the same {@code ConcatReader} will not be able to share any state between each other. This design
+ * was chosen since keeping a large number of {@code Reader} objects alive within a single
+ * {@code ConcatReader} could be highly memory consuming.
+ *
+ * @param <T> Type of the elements read by the {@code Reader}s.
+ */
+public class ConcatReader<T> extends Reader<T> {
+  public static final String SOURCE_NAME = "ConcatSource";
+
+  private final List<Source> sources;
+  private final PipelineOptions options;
+  private final ExecutionContext executionContext;
+
+  /**
+   * Create a {@code ConcatReader} using a given list of encoded {@code Source}s.
+   */
+  public ConcatReader(
+      PipelineOptions options, ExecutionContext executionContext, List<Source> sources) {
+    Preconditions.checkNotNull(sources);
+    this.sources = sources;
+    this.options = options;
+    this.executionContext = executionContext;
+  }
+
+  public Iterator<Source> getSources() {
+    return sources.iterator();
+  }
+
+  @Override
+  public ReaderIterator<T> iterator() throws IOException {
+    return new ConcatIterator<T>(options, executionContext, sources);
+  }
+
+  private static class ConcatIterator<T> extends AbstractReaderIterator<T> {
+    private int currentIteratorIndex = -1;
+    private ReaderIterator<T> currentIterator = null;
+    private final List<Source> sources;
+    private final PipelineOptions options;
+    private final ExecutionContext executionContext;
+
+    public ConcatIterator(
+        PipelineOptions options, ExecutionContext executionContext, List<Source> sources) {
+      this.sources = sources;
+      this.options = options;
+      this.executionContext = executionContext;
+    }
+
+    @Override
+    public boolean hasNext() throws IOException {
+      for (;;) {
+        if (currentIterator != null && currentIterator.hasNext()) {
+          return true;
+        }
+
+        if (currentIterator != null) {
+          currentIterator.close();
+        }
+
+        currentIteratorIndex++;
+        if (currentIteratorIndex == sources.size()) {
+          // All sources were read.
+          return false;
+        }
+
+        Source currentSource = sources.get(currentIteratorIndex);
+        try {
+          @SuppressWarnings("unchecked")
+          Reader<T> currentReader =
+              (Reader<T>) ReaderFactory.create(options, currentSource, executionContext);
+          currentIterator = currentReader.iterator();
+        } catch (Exception e) {
+          throw new IOException("Failed to create a reader for source: " + currentSource, e);
+        }
+      }
+    }
+
+    @Override
+    public T next() throws IOException, NoSuchElementException {
+      if (!hasNext()) {
+        throw new NoSuchElementException();
+      } else {
+        return currentIterator.next();
+      }
+    }
+
+    @Override
+    public void close() throws IOException {
+      if (currentIterator != null) {
+        currentIterator.close();
+      }
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java
new file mode 100644
index 0000000000000..26117cf2dd310
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java
@@ -0,0 +1,116 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.getBoolean;
+import static com.google.cloud.dataflow.sdk.util.Structs.getListOfMaps;
+import static com.google.cloud.dataflow.sdk.util.Structs.getLong;
+import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
+
+import com.google.api.services.dataflow.model.Source;
+import com.google.api.services.dataflow.model.SourceMetadata;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Creates an {@link ConcatReader} from a {@link CloudObject} spec.
+ */
+public class ConcatReaderFactory {
+  public static <T> Reader<T> create(PipelineOptions options, CloudObject spec,
+      @SuppressWarnings("unused") Coder<T> coder, ExecutionContext executionContext)
+      throws Exception {
+    List<Source> sources = getSubSources(spec);
+    return new ConcatReader<T>(options, executionContext, sources);
+  }
+
+  private static List<Source> getSubSources(CloudObject spec) throws Exception {
+    List<Source> subSources = new ArrayList<>();
+
+    // Get the list of sub-sources.
+    List<Map<String, Object>> subSourceDictionaries =
+        getListOfMaps(spec, PropertyNames.CONCAT_SOURCE_SOURCES, null);
+    if (subSourceDictionaries == null) {
+      return subSources;
+    }
+
+    for (Map<String, Object> subSourceDictionary : subSourceDictionaries) {
+      // Each sub-source is encoded as a dictionary that contains several properties.
+      subSources.add(createSourceFromDictionary(subSourceDictionary));
+    }
+
+    return subSources;
+  }
+
+  public static Source createSourceFromDictionary(Map<String, Object> dictionary) throws Exception {
+    Source source = new Source();
+
+    // Set spec
+    CloudObject subSourceSpec =
+        CloudObject.fromSpec(getObject(dictionary, PropertyNames.SOURCE_SPEC));
+    source.setSpec(subSourceSpec);
+
+    // Set encoding
+    CloudObject subSourceEncoding =
+        CloudObject.fromSpec(getObject(dictionary, PropertyNames.ENCODING, null));
+    if (subSourceEncoding != null) {
+      source.setCodec(subSourceEncoding);
+    }
+
+    // Set base specs
+    List<Map<String, Object>> subSourceBaseSpecs =
+        getListOfMaps(dictionary, PropertyNames.CONCAT_SOURCE_BASE_SPECS, null);
+    if (subSourceBaseSpecs != null) {
+      source.setBaseSpecs(subSourceBaseSpecs);
+    }
+
+    // Set metadata
+    SourceMetadata metadata = new SourceMetadata();
+    Boolean producesSortedKeys =
+        getBoolean(dictionary, PropertyNames.SOURCE_PRODUCES_SORTED_KEYS, null);
+    if (producesSortedKeys != null) {
+      metadata.setProducesSortedKeys(producesSortedKeys);
+    }
+    Boolean infinite = getBoolean(dictionary, PropertyNames.SOURCE_IS_INFINITE, null);
+    if (infinite != null) {
+      metadata.setInfinite(infinite);
+    }
+    Long estimatedSizeBytes = getLong(dictionary, PropertyNames.SOURCE_ESTIMATED_SIZE_BYTES, null);
+    if (estimatedSizeBytes != null) {
+      metadata.setEstimatedSizeBytes(estimatedSizeBytes);
+    }
+    if (producesSortedKeys != null || estimatedSizeBytes != null || infinite != null) {
+      source.setMetadata(metadata);
+    }
+
+    // Set doesNotNeedSplitting
+    Boolean doesNotNeedSplitting =
+        getBoolean(dictionary, PropertyNames.SOURCE_DOES_NOT_NEED_SPLITTING, null);
+    if (doesNotNeedSplitting != null) {
+      source.setDoesNotNeedSplitting(doesNotNeedSplitting);
+    }
+
+    return source;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
index c96f8fc7d40c6..9e0c780b86aeb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
@@ -78,6 +78,7 @@ private ReaderFactory() {}
         PubsubReader.class.getName());
     predefinedReaderFactories.put(
         "PubsubReader", PubsubReader.class.getName());
+    predefinedReaderFactories.put(ConcatReader.SOURCE_NAME, ConcatReaderFactory.class.getName());
   }
 
   /**
@@ -89,14 +90,20 @@ private ReaderFactory() {}
   public static <T> Reader<T> create(@Nullable PipelineOptions options, Source cloudSource,
       @Nullable ExecutionContext executionContext) throws Exception {
     cloudSource = CloudSourceUtils.flattenBaseSpecs(cloudSource);
-    Coder<T> coder = Serializer.deserialize(cloudSource.getCodec(), Coder.class);
+
     CloudObject object = CloudObject.fromSpec(cloudSource.getSpec());
 
-    String sourceFactoryClassName = predefinedReaderFactories.get(object.getClassName());
+    String objClassName = object.getClassName();
+    String sourceFactoryClassName = predefinedReaderFactories.get(objClassName);
     if (sourceFactoryClassName == null) {
       sourceFactoryClassName = object.getClassName();
     }
 
+    @SuppressWarnings("rawtypes")
+    Coder coder = null;
+    if (cloudSource.getCodec() != null) {
+      coder = Serializer.deserialize(cloudSource.getCodec(), Coder.class);
+    }
     try {
       return InstanceBuilder.ofType(new TypeDescriptor<Reader<T>>() {
           private static final long serialVersionUID = 0;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
index efe31ac1fc8dd..beb32c8edc5a4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
@@ -32,6 +32,8 @@ public class PropertyNames {
   public static final String COMPONENT_ENCODINGS = "component_encodings";
   public static final String COMPRESSION_TYPE = "compression_type";
   public static final String CUSTOM_SOURCE_FORMAT = "custom_source";
+  public static final String CONCAT_SOURCE_SOURCES = "sources";
+  public static final String CONCAT_SOURCE_BASE_SPECS = "base_specs";
   public static final String SOURCE_STEP_INPUT = "custom_source_step_input";
   public static final String SOURCE_SPEC = "spec";
   public static final String SOURCE_METADATA = "metadata";
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java
index 25d72cbbf73a4..27469095cdb49 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java
@@ -163,6 +163,39 @@ private static Map<String, Object> checkObject(
     return mapValue;
   }
 
+  @Nullable
+  public static List<Map<String, Object>> getListOfMaps(Map<String, Object> map, String name,
+      @Nullable List<Map<String, Object>> defaultValue) throws Exception {
+    @Nullable
+    Object value = map.get(name);
+    if (value == null) {
+      if (map.containsKey(name)) {
+        throw new IncorrectTypeException(name, map, "a list");
+      }
+      return defaultValue;
+    }
+    if (Data.isNull(value)) {
+      // This is a JSON literal null.  When represented as a list,
+      // this is an empty list.
+      return Collections.<Map<String, Object>>emptyList();
+    }
+
+    if (!(value instanceof List)) {
+      throw new IncorrectTypeException(name, map, "a list");
+    }
+
+    List<?> elements = (List<?>) value;
+    for (Object elem : elements) {
+      if (!(elem instanceof Map)) {
+        throw new IncorrectTypeException(name, map, "a list of Map objects");
+      }
+    }
+
+    @SuppressWarnings("unchecked")
+    List<Map<String, Object>> result = (List<Map<String, Object>>) elements;
+    return result;
+  }
+
   public static Map<String, Object> getDictionary(
       Map<String, Object> map, String name) throws Exception {
     @Nullable Object value = map.get(name);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
new file mode 100644
index 0000000000000..f975f66f2084a
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
@@ -0,0 +1,126 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readFully;
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
+import static com.google.cloud.dataflow.sdk.util.Structs.addList;
+import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
+import static com.google.cloud.dataflow.sdk.util.Structs.addStringList;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertThat;
+
+import com.google.api.services.dataflow.model.Source;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Test for {@code ConcatReaderFactory}.
+ */
+@RunWith(JUnit4.class)
+public class ConcatReaderFactoryTest {
+
+  Source createSourcesWithInMemorySources(List<List<String>> allData) {
+    List<Map<String, Object>> sourcesList = new ArrayList<>();
+    Source source = new Source();
+
+    for (List<String> data : allData) {
+      CloudObject inMemorySourceSpec = CloudObject.forClassName("InMemorySource");
+
+      Map<String, Object> inMemorySourceDictionary = new HashMap<>();
+      addStringList(inMemorySourceSpec, PropertyNames.ELEMENTS, data);
+      addLong(inMemorySourceSpec, PropertyNames.START_INDEX, 0L);
+      addLong(inMemorySourceSpec, PropertyNames.END_INDEX, data.size());
+
+      inMemorySourceDictionary.put(PropertyNames.SOURCE_SPEC, inMemorySourceSpec);
+
+      CloudObject textSourceEncoding = makeCloudEncoding("StringUtf8Coder");
+      inMemorySourceDictionary.put(PropertyNames.ENCODING, textSourceEncoding);
+
+      sourcesList.add(inMemorySourceDictionary);
+    }
+    CloudObject spec = CloudObject.forClassName("ConcatSource");
+    addList(spec, PropertyNames.CONCAT_SOURCE_SOURCES, sourcesList);
+
+    source.setSpec(spec);
+    return source;
+  }
+
+  private List<List<String>> createInMemorySourceData(int numSources, int dataPerSource) {
+    List<List<String>> allData = new ArrayList<>();
+    for (int i = 0; i < numSources; i++) {
+      List<String> data = new ArrayList<>();
+      for (int j = 0; j < dataPerSource; j++) {
+        data.add("data j of source i");
+      }
+      allData.add(data);
+    }
+    return allData;
+  }
+
+  @Test
+  public void testCreateConcatReaderWithOneSubSource() throws Exception {
+    List<List<String>> allData = createInMemorySourceData(1, 10);
+
+    Source source = createSourcesWithInMemorySources(allData);
+    Reader<String> reader = ReaderFactory.create(null, source, null);
+    assertNotNull(reader);
+
+    List<String> expected = new ArrayList<>();
+    for (List<String> data : allData) {
+      expected.addAll(data);
+    }
+
+    List<String> actual = new ArrayList<>();
+    readFully(reader, actual);
+
+    assertEquals(actual.size(), 10);
+    assertThat(actual, containsInAnyOrder(expected.toArray()));
+  }
+
+  @Test
+  public void testCreateConcatReaderWithManySubSources() throws Exception {
+    List<List<String>> allData = createInMemorySourceData(15, 10);
+
+    Source source = createSourcesWithInMemorySources(allData);
+    Reader<String> reader = ReaderFactory.create(null, source, null);
+    assertNotNull(reader);
+
+    List<String> expected = new ArrayList<>();
+    for (List<String> data : allData) {
+      expected.addAll(data);
+    }
+
+    List<String> actual = new ArrayList<>();
+    readFully(reader, actual);
+
+    assertEquals(actual.size(), 150);
+    assertThat(actual, containsInAnyOrder(expected.toArray()));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
new file mode 100644
index 0000000000000..c1595ca149f35
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
@@ -0,0 +1,311 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readFully;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
+
+import com.google.api.services.dataflow.model.Source;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader.ReaderIterator;
+
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * Tests for {@code ConcatReader}.
+ */
+@RunWith(JUnit4.class)
+public class ConcatReaderTest {
+  private static final String READER_OBJECT = "reader_object";
+
+  @Rule
+  public ExpectedException expectedException = ExpectedException.none();
+
+  private List<TestReader<?>> recordedReaders = new ArrayList<>();
+
+  @Before
+  public void setUp() {
+    recordedReaders.clear();
+  }
+
+  /**
+   * A {@code Reader} used for testing purposes. Delegates functionality to an underlying {@link
+   * InMemoryReader}.
+   */
+  public class TestReader<T> extends Reader<T> {
+    private final long recordToFailAt;
+    private final boolean failWhenClosing;
+    private TestIterator<T> lastIterator = null;
+    private final Reader<T> readerDelegator;
+
+    /**
+     * Create a TestReader.
+     *
+     * @param encodedElements list of elements read by the {@code Reader}
+     * @param coder {@code Coder} to by used by the underlying {@code Reader}
+     * @param recordToFailAt if non-negative, a {@code TestIterator} will fail throwing an {@code
+     * IOException} when trying to read the element at this index
+     * @param failWhenClosing if {@code true}, a {@code TestIterator} will fail throwing an {@code
+     * IOException} when {@link TestIterator#close()} is invoked
+     */
+    public TestReader(List<String> encodedElements, Coder<T> coder, long recordToFailAt,
+        boolean failWhenClosing) {
+      this.recordToFailAt = recordToFailAt;
+      this.failWhenClosing = failWhenClosing;
+      readerDelegator =
+          new InMemoryReader<>(encodedElements, 0L, (long) encodedElements.size(), coder);
+      recordedReaders.add(this);
+    }
+
+    public boolean isClosedOrUnopened() {
+      if (lastIterator != null) {
+        return lastIterator.isClosed;
+      }
+
+      // A reader was not created
+      return true;
+    }
+
+    @Override
+    public ReaderIterator<T> iterator() throws IOException {
+      lastIterator = new TestIterator<T>(readerDelegator.iterator());
+      return lastIterator;
+    }
+
+    private class TestIterator<T> implements ReaderIterator<T> {
+      private final ReaderIterator<T> iteratorImpl;
+      private long currentRecord;
+      private boolean isClosed = false;
+
+      private TestIterator(ReaderIterator<T> iteratorImpl) {
+        this.iteratorImpl = iteratorImpl;
+      }
+
+      @Override
+      public boolean hasNext() throws IOException {
+        return iteratorImpl.hasNext();
+      }
+
+      @Override
+      public T next() throws IOException, NoSuchElementException {
+        if (currentRecord == recordToFailAt) {
+          throw new IOException("Failing at record " + currentRecord);
+        }
+        currentRecord++;
+        return iteratorImpl.next();
+      }
+
+      @Override
+      public ReaderIterator<T> copy() throws IOException {
+        return new TestIterator<T>(iteratorImpl.copy());
+      }
+
+      @Override
+      public void close() throws IOException {
+        isClosed = true;
+        if (failWhenClosing) {
+          throw new IOException("Failing when closing");
+        }
+        iteratorImpl.close();
+      }
+
+      @Override
+      public Progress getProgress() {
+        return iteratorImpl.getProgress();
+      }
+
+      @Override
+      public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest request) {
+        throw new UnsupportedOperationException();
+      }
+    }
+  }
+
+  // This create method was defined outside "TestReader" since a static method has to be in a static
+  // or a top level class. "TestReader" was not defined static since it needs to have access to the
+  // "recordedReaders" instance variable.
+  static <T> TestReader<T> create(@SuppressWarnings("unused") PipelineOptions options,
+      CloudObject spec, @SuppressWarnings("unused") Coder<T> coder,
+      @SuppressWarnings("unused") ExecutionContext executionContext) throws Exception {
+    @SuppressWarnings("unchecked")
+    TestReader<T> reader = (TestReader<T>) spec.get(READER_OBJECT);
+    return reader;
+  }
+
+  private TestReader<String> createTestReader(long recordsPerReader, long recordToFailAt,
+      boolean failWhenClosing, List<String> expectedData) throws Exception {
+    List<String> records = new ArrayList<>();
+    for (int i = 0; i < recordsPerReader; i++) {
+      String record = "Record" + i;
+      records.add(record);
+      if (recordToFailAt < 0 || i < recordToFailAt) {
+        expectedData.add(record);
+      }
+    }
+
+    return new TestReader<String>(records, StringUtf8Coder.of(), recordToFailAt, failWhenClosing);
+  }
+
+  private static void assertAllOpenReadersClosed(List<TestReader<?>> readers) {
+    for (TestReader<?> reader : readers) {
+      if (!reader.isClosedOrUnopened()) {
+        throw new AssertionError("At least one reader was not closed");
+      }
+    }
+  }
+
+  private Source createSourceForTestReader(TestReader<String> testReader) {
+    Source source = new Source();
+    CloudObject specObj = CloudObject.forClass(ConcatReaderTest.class);
+    specObj.put(READER_OBJECT, testReader);
+    source.setSpec(specObj);
+    return source;
+  }
+
+  private void testReadersOfSizes(int... recordsPerReader) throws Exception {
+    List<Source> sourceList = new ArrayList<>();
+    List<String> expected = new ArrayList<>();
+    for (int items : recordsPerReader) {
+      sourceList.add(createSourceForTestReader(createTestReader(items/* recordsPerReader */,
+          -1/* recordToFailAt */, false/* failWhenClosing */, expected)));
+    }
+    ConcatReader<String> concatReader = new ConcatReader<>(null, null, sourceList);
+
+    List<String> actual = new ArrayList<>();
+    readFully(concatReader, actual);
+    assertThat(actual, containsInAnyOrder(expected.toArray()));
+    assertEquals(recordedReaders.size(), recordsPerReader.length);
+    assertAllOpenReadersClosed(recordedReaders);
+  }
+
+  @Test
+  public void testCreateFromNull() throws Exception {
+    expectedException.expect(NullPointerException.class);
+    new ConcatReader<String>(null, null, null);
+  }
+
+  @Test
+  public void testReadEmptyList() throws Exception {
+    ConcatReader<String> concat = new ConcatReader<>(null, null, new ArrayList<Source>());
+    ReaderIterator<String> iterator = concat.iterator();
+    assertNotNull(iterator);
+    assertFalse(concat.iterator().hasNext());
+
+    expectedException.expect(NoSuchElementException.class);
+    iterator.next();
+  }
+
+  @Test
+  public void testReadOne() throws Exception {
+    testReadersOfSizes(100);
+  }
+
+  @Test
+  public void testReadMulti() throws Exception {
+    testReadersOfSizes(10, 5, 20, 40);
+  }
+
+  @Test
+  public void testReadFirstReaderEmpty() throws Exception {
+    testReadersOfSizes(0, 5, 20, 40);
+  }
+
+  @Test
+  public void testReadLastReaderEmpty() throws Exception {
+    testReadersOfSizes(10, 5, 20, 0);
+  }
+
+  @Test
+  public void testEmptyReaderBeforeNonEmptyReader() throws Exception {
+    testReadersOfSizes(10, 0, 20, 30);
+  }
+
+  @Test
+  public void testMultipleReadersAreEmpty() throws Exception {
+    testReadersOfSizes(10, 0, 20, 0, 30, 0, 40);
+  }
+
+  @Test
+  public void testAReaderFailsAtClose() throws Exception {
+    List<String> expected = new ArrayList<>();
+    List<Source> sources = Arrays.asList(
+        createSourceForTestReader(createTestReader(10 /* recordsPerReader */,
+            -1 /* recordToFailAt */, false /* failWhenClosing */, expected)),
+        createSourceForTestReader(createTestReader(10 /* recordsPerReader */,
+            -1 /* recordToFailAt */, true /* failWhenClosing */, expected)),
+        createSourceForTestReader(createTestReader(10/* recordsPerReader */, -1/* recordToFailAt */,
+            false/* failWhenClosing */, new ArrayList<String>())));
+
+    ConcatReader<String> concatReader = new ConcatReader<>(null, null, sources);
+    List<String> actual = new ArrayList<>();
+    try {
+      readFully(concatReader, actual);
+      fail();
+    } catch (IOException e) {
+      assertThat(actual, containsInAnyOrder(expected.toArray()));
+      assertEquals(3, recordedReaders.size());
+      assertAllOpenReadersClosed(recordedReaders);
+    }
+  }
+
+  @Test
+  public void testReaderFailsAtRead() throws Exception {
+    List<String> expected = new ArrayList<>();
+    List<Source> sources = Arrays.asList(
+        createSourceForTestReader(createTestReader(10 /* recordsPerReader */,
+            -1 /* recordToFailAt */, false /* failWhenClosing */, expected)),
+        createSourceForTestReader(createTestReader(10 /* recordsPerReader */,
+            6 /* recordToFailAt */, false /* failWhenClosing */, expected)),
+        createSourceForTestReader(createTestReader(10 /* recordsPerReader */,
+            -1 /* recordToFailAt */, false /* failWhenClosing */, expected)));
+    expected = expected.subList(0, 16);
+    assertEquals(16, expected.size());
+
+    ConcatReader<String> concatReader = new ConcatReader<>(null, null, sources);
+    List<String> actual = new ArrayList<>();
+    try {
+      readFully(concatReader, actual);
+      fail();
+    } catch (IOException e) {
+      assertThat(actual, containsInAnyOrder(expected.toArray()));
+
+      assertEquals(3, recordedReaders.size());
+      assertAllOpenReadersClosed(recordedReaders);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
index 76c4a169c193b..7d9f397d10557 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
@@ -22,6 +22,10 @@
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.api.services.dataflow.model.Position;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader.ReaderIterator;
+
+import java.io.IOException;
+import java.util.List;
 
 import javax.annotation.Nullable;
 
@@ -78,4 +82,20 @@ public static Position positionFromProgress(Reader.Progress progress) {
   public static Reader.DynamicSplitRequest splitRequestAtFraction(float fraction) {
     return toDynamicSplitRequest(approximateProgressAtFraction(fraction));
   }
+
+  /**
+   * Creates an {@link ReaderIterator} from the given {@code Reader} and reads it to the end.
+   *
+   * @param reader {@code Reader} to read from
+   * @param results elements that are read are added to this list. Will contain partially read
+   * results if an exception is thrown
+   * @throws IOException
+   */
+  public static <T> void readFully(Reader<T> reader, List<T> results) throws IOException {
+    try (ReaderIterator<T> iterator = reader.iterator()) {
+      while (iterator.hasNext()) {
+        results.add(iterator.next());
+      }
+    }
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StructsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StructsTest.java
index e951d36b78840..b50d9d2bdc682 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StructsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StructsTest.java
@@ -26,6 +26,7 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.addStringList;
 import static com.google.cloud.dataflow.sdk.util.Structs.getBoolean;
 import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
+import static com.google.cloud.dataflow.sdk.util.Structs.getListOfMaps;
 import static com.google.cloud.dataflow.sdk.util.Structs.getLong;
 import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
@@ -173,5 +174,22 @@ public void testGetLongParameter() throws Exception {
     }
   }
 
+  @Test
+  public void testGetListOfMaps() throws Exception {
+    Map<String, Object> o = makeCloudDictionary();
+
+    Assert.assertEquals(
+        makeCloudObjects(),
+        getListOfMaps(o, "multipleObjectsKey", null));
+
+    try {
+      getListOfMaps(o, "singletonLongKey", null);
+      Assert.fail("should have thrown an exception");
+    } catch (Exception exn) {
+      Assert.assertThat(exn.toString(),
+                        Matchers.containsString("not a list"));
+    }
+  }
+
   // TODO: Test builder operations.
 }

From 11e39ea994f0547793f1b4ecb4b9f57e26ca0656 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 2 Jun 2015 14:57:46 -0700
Subject: [PATCH 0608/1541] Use PTransforms inside DataflowAssert to scope
 operations

Also, use static integers to ensure that multiple DataflowAssert's in a
single test are given unique names.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95048110
---
 .../dataflow/sdk/testing/DataflowAssert.java  | 282 ++++++++++++------
 1 file changed, 191 insertions(+), 91 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index eb56a896d91d3..e9bfcff870042 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -30,14 +30,17 @@
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.View;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.common.base.Optional;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
@@ -82,6 +85,8 @@
  */
 public class DataflowAssert {
 
+  private static int assertCount = 0;
+
   // Do not instantiate.
   private DataflowAssert() {}
 
@@ -90,9 +95,10 @@ private DataflowAssert() {}
    * {@link PCollection}.
    */
   public static <T> IterableAssert<T> that(PCollection<T> actual) {
-    return
-        new IterableAssert<>(inGlobalWindows(actual).apply(View.<T>asIterable()))
-        .setCoder(actual.getCoder());
+    return new IterableAssert<>(
+        new CreateActual<T, Iterable<T>>(actual, View.<T>asIterable()),
+         actual.getPipeline())
+         .setCoder(actual.getCoder());
   }
 
   /**
@@ -117,9 +123,10 @@ public static <T> IterableAssert<T> that(PCollection<T> actual) {
     @SuppressWarnings("unchecked") // Safe covariant cast
     PCollection<Iterable<T>> actualIterables = (PCollection<Iterable<T>>) actual;
 
-    return new IterableAssert<T>(
-            inGlobalWindows(actualIterables)
-            .apply(View.<Iterable<T>>asSingleton()))
+    return new IterableAssert<>(
+        new CreateActual<Iterable<T>, Iterable<T>>(
+            actualIterables, View.<Iterable<T>>asSingleton()),
+        actual.getPipeline())
         .setCoder(tCoder);
   }
 
@@ -128,7 +135,7 @@ public static <T> IterableAssert<T> that(PCollection<T> actual) {
    * {@code PCollectionView PCollectionView<Iterable<T>>}.
    */
   public static <T> IterableAssert<T> thatIterable(PCollectionView<Iterable<T>> actual) {
-    return new IterableAssert<>(actual);
+    return new IterableAssert<>(new PreExisting<Iterable<T>>(actual), actual.getPipeline());
   }
 
   /**
@@ -136,7 +143,8 @@ public static <T> IterableAssert<T> thatIterable(PCollectionView<Iterable<T>> ac
    * {@code PCollection PCollection<T>}, which must be a singleton.
    */
   public static <T> SingletonAssert<T> thatSingleton(PCollection<T> actual) {
-    return new SingletonAssert<>(inGlobalWindows(actual).apply(View.<T>asSingleton()))
+    return new SingletonAssert<>(
+        new CreateActual<T, T>(actual, View.<T>asSingleton()), actual.getPipeline())
         .setCoder(actual.getCoder());
   }
 
@@ -150,7 +158,10 @@ public static <T> SingletonAssert<T> thatSingleton(PCollection<T> actual) {
       thatMultimap(PCollection<KV<K, V>> actual) {
     @SuppressWarnings("unchecked")
     KvCoder<K, V> kvCoder = (KvCoder<K, V>) actual.getCoder();
-    return new SingletonAssert<>(inGlobalWindows(actual).apply(View.<K, V>asMap()))
+
+    return new SingletonAssert<>(
+        new CreateActual<KV<K, V>, Map<K, Iterable<V>>>(
+            actual, View.<K, V>asMap()), actual.getPipeline())
         .setCoder(MapCoder.of(kvCoder.getKeyCoder(), IterableCoder.of(kvCoder.getValueCoder())));
   }
 
@@ -164,8 +175,10 @@ public static <T> SingletonAssert<T> thatSingleton(PCollection<T> actual) {
   public static <K, V> SingletonAssert<Map<K, V>> thatMap(PCollection<KV<K, V>> actual) {
     @SuppressWarnings("unchecked")
     KvCoder<K, V> kvCoder = (KvCoder<K, V>) actual.getCoder();
+
     return new SingletonAssert<>(
-        inGlobalWindows(actual).apply(View.<K, V>asMap().withSingletonValues()))
+        new CreateActual<KV<K, V>, Map<K, V>>(
+            actual, View.<K, V>asMap().withSingletonValues()), actual.getPipeline())
         .setCoder(MapCoder.of(kvCoder.getKeyCoder(), kvCoder.getValueCoder()));
   }
 
@@ -177,12 +190,15 @@ public static <K, V> SingletonAssert<Map<K, V>> thatMap(PCollection<KV<K, V>> ac
   @SuppressWarnings("serial")
   public static class IterableAssert<T> implements Serializable {
 
-    private final PCollectionView<Iterable<T>> actualView;
+    private final Pipeline pipeline;
+    private final PTransform<PBegin, PCollectionView<Iterable<T>>> createActual;
     private Optional<Coder<T>> coder;
 
-    protected IterableAssert(PCollectionView<Iterable<T>> actualView) {
-      this.actualView = actualView;
-      coder = Optional.absent();
+    protected IterableAssert(
+        PTransform<PBegin, PCollectionView<Iterable<T>>> createActual, Pipeline pipeline) {
+      this.createActual = createActual;
+      this.pipeline = pipeline;
+      this.coder = Optional.absent();
     }
 
     /**
@@ -205,17 +221,20 @@ public Coder<T> getCoder() {
       } else {
         throw new IllegalStateException(
             "Attempting to access the coder of an IterableAssert"
-            + " that has not been set yet.");
+                + " that has not been set yet.");
       }
     }
 
+
     /**
      * Applies a {@link SerializableFunction} to check the elements of the {@code Iterable}.
      *
      * <p> Returns this {@code IterableAssert}.
      */
     public IterableAssert<T> satisfies(SerializableFunction<Iterable<T>, Void> checkerFn) {
-      new OneSideInputAssert<Iterable<T>>(actualView).satisfies(checkerFn);
+      pipeline.apply(
+          "DataflowAssert$" + (assertCount++),
+          new OneSideInputAssert<Iterable<T>>(createActual, checkerFn));
       return this;
     }
 
@@ -226,13 +245,13 @@ public IterableAssert<T> satisfies(SerializableFunction<Iterable<T>, Void> check
      */
     public IterableAssert<T> satisfies(
         AssertRelation<Iterable<T>, Iterable<T>> relation,
-        Iterable<T> expectedElements) {
-      new TwoSideInputAssert<Iterable<T>, Iterable<T>>(actualView,
-          actualView.getPipeline()
-              .apply(Create.of(expectedElements))
-              .setCoder(getCoder())
-              .apply(View.<T>asIterable()))
-          .satisfies(relation);
+        final Iterable<T> expectedElements) {
+      pipeline.apply(
+          "DataflowAssert$" + (assertCount++),
+          new TwoSideInputAssert<Iterable<T>, Iterable<T>>(createActual,
+              new CreateExpected<T, Iterable<T>>(expectedElements, coder, View.<T>asIterable()),
+              relation));
+
       return this;
     }
 
@@ -256,7 +275,7 @@ public IterableAssert<T> containsInAnyOrder(T... expectedElements) {
       return satisfies(
         new AssertContainsInAnyOrderRelation<T>(),
         Arrays.asList(expectedElements));
-    };
+    }
   }
 
   /**
@@ -266,17 +285,22 @@ public IterableAssert<T> containsInAnyOrder(T... expectedElements) {
   @SuppressWarnings("serial")
   public static class SingletonAssert<T> implements Serializable {
 
-    private final PCollectionView<T> actualView;
+    private final Pipeline pipeline;
+    private final CreateActual<?, T> createActual;
     private Optional<Coder<T>> coder;
 
-    protected SingletonAssert(PCollectionView<T> actualView) {
-      this.actualView = actualView;
-      coder = Optional.absent();
+    protected SingletonAssert(
+        CreateActual<?, T> createActual, Pipeline pipeline) {
+      this.pipeline = pipeline;
+      this.createActual = createActual;
+      this.coder = Optional.absent();
     }
 
     /**
      * Sets the coder to use for elements of type {@code T}, as needed
      * for internal purposes.
+     *
+     * <p> Returns this {@code IterableAssert}.
      */
     public SingletonAssert<T> setCoder(Coder<T> coderOrNull) {
       this.coder = Optional.fromNullable(coderOrNull);
@@ -291,8 +315,8 @@ public Coder<T> getCoder() {
         return coder.get();
       } else {
         throw new IllegalStateException(
-            "Attempting to access the coder of a SingletonAssert"
-            + " that has not been set yet.");
+            "Attempting to access the coder of an IterableAssert"
+                + " that has not been set yet.");
       }
     }
 
@@ -302,8 +326,10 @@ public Coder<T> getCoder() {
      *
      * <p> Returns this {@code SingletonAssert}.
      */
-    public SingletonAssert<T> satisfies(final SerializableFunction<T, Void> checkerFn) {
-      new OneSideInputAssert<T>(actualView).satisfies(checkerFn);
+    public SingletonAssert<T> satisfies(SerializableFunction<T, Void> checkerFn) {
+      pipeline.apply(
+          "DataflowAssert$" + (assertCount++),
+          new OneSideInputAssert<T>(createActual, checkerFn));
       return this;
     }
 
@@ -315,13 +341,13 @@ public SingletonAssert<T> satisfies(final SerializableFunction<T, Void> checkerF
      */
     public SingletonAssert<T> satisfies(
         AssertRelation<T, T> relation,
-        T expectedValue) {
-      new TwoSideInputAssert<T, T>(actualView,
-          actualView.getPipeline()
-              .apply(Create.of(expectedValue))
-              .setCoder(getCoder())
-              .apply(View.<T>asSingleton()))
-          .satisfies(relation);
+        final T expectedValue) {
+      pipeline.apply(
+          "DataflowAssert$" + (assertCount++),
+          new TwoSideInputAssert<T, T>(createActual,
+              new CreateExpected<T, T>(Arrays.asList(expectedValue), coder, View.<T>asSingleton()),
+              relation));
+
       return this;
     }
 
@@ -351,12 +377,68 @@ public SingletonAssert<T> is(T expectedValue) {
 
   ////////////////////////////////////////////////////////////////////////
 
-  /**
-   * Returns a new PCollection equivalent to the input, but with all elements
-   * in the GlobalWindow.
-   */
-  private static <T> PCollection<T> inGlobalWindows(PCollection<T> input) {
-    return input.apply(Window.<T>into(new GlobalWindows()));
+  private static class CreateActual<T, ActualT>
+      extends PTransform<PBegin, PCollectionView<ActualT>> {
+
+    private static final long serialVersionUID = 0;
+
+    private final transient PCollection<T> actual;
+    private final transient PTransform<PCollection<T>, PCollectionView<ActualT>> actualView;
+
+    private CreateActual(PCollection<T> actual,
+        PTransform<PCollection<T>, PCollectionView<ActualT>> actualView) {
+      this.actual = actual;
+      this.actualView = actualView;
+    }
+
+    @Override
+    public PCollectionView<ActualT> apply(PBegin input) {
+      return actual
+          .apply(Window.<T>into(new GlobalWindows()))
+          .apply(actualView);
+    }
+  }
+
+  private static class CreateExpected<T, ExpectedT>
+      extends PTransform<PBegin, PCollectionView<ExpectedT>> {
+
+    private static final long serialVersionUID = 0;
+
+    private final Iterable<T> elements;
+    private final Optional<Coder<T>> coder;
+    private final transient PTransform<PCollection<T>, PCollectionView<ExpectedT>> view;
+
+    private CreateExpected(Iterable<T> elements, Optional<Coder<T>> coder,
+        PTransform<PCollection<T>, PCollectionView<ExpectedT>> view) {
+      this.elements = elements;
+      this.coder = coder;
+      this.view = view;
+    }
+
+    @Override
+    public PCollectionView<ExpectedT> apply(PBegin input) {
+      PCollection<T> elemCollection = input.apply(Create.<T>of(elements));
+      if (coder.isPresent()) {
+        elemCollection.setCoder(coder.get());
+      }
+      return elemCollection.apply(view);
+    }
+  }
+
+  private static class PreExisting<T> extends PTransform<PBegin, PCollectionView<T>> {
+
+    private static final long serialVersionUID = 0;
+
+    private final PCollectionView<T> view;
+
+    private PreExisting(PCollectionView<T> view) {
+      this.view = view;
+    }
+
+    @Override
+    public PCollectionView<T> apply(PBegin input) {
+      return view;
+    }
   }
 
   /**
@@ -372,30 +454,39 @@ private static <T> PCollection<T> inGlobalWindows(PCollection<T> input) {
    * <p> This is generally useful for assertion functions that
    * are serializable but whose underlying data may not have a coder.
    */
-  @SuppressWarnings("serial")
-  private static class OneSideInputAssert<ActualT> implements Serializable {
+  private static class OneSideInputAssert<ActualT>
+      extends PTransform<PBegin, PDone> implements Serializable {
 
-    private final PCollectionView<ActualT> actualView;
+    private static final long serialVersionUID = 0;
 
-    public OneSideInputAssert(PCollectionView<ActualT> actualView) {
-      this.actualView = actualView;
+    private final transient PTransform<PBegin, PCollectionView<ActualT>> createActual;
+    private final SerializableFunction<ActualT, Void> checkerFn;
+
+    public OneSideInputAssert(
+        PTransform<PBegin, PCollectionView<ActualT>> createActual,
+        SerializableFunction<ActualT, Void> checkerFn) {
+      this.createActual = createActual;
+      this.checkerFn = checkerFn;
     }
 
-    public OneSideInputAssert<ActualT> satisfies(
-        final SerializableFunction<ActualT, Void> checkerFn) {
-      actualView.getPipeline()
-        .apply(Create.<Void>of((Void) null))
-        .setCoder(VoidCoder.of())
-        .apply(ParDo
-          .withSideInputs(actualView)
-          .of(new DoFn<Void, Void>() {
-            @Override
-            public void processElement(ProcessContext c) {
-              ActualT actualContents = c.sideInput(actualView);
-              checkerFn.apply(actualContents);
-            }
-          }));
-      return this;
+    @Override
+    public PDone apply(PBegin input) {
+      final PCollectionView<ActualT> actual = input.apply("CreateActual", createActual);
+
+      input
+          .apply(Create.<Void>of((Void) null)).setCoder(VoidCoder.of())
+          .apply(ParDo.named("RunChecks").withSideInputs(actual)
+              .of(new DoFn<Void, Void>() {
+                private static final long serialVersionUID = 0;
+
+                @Override
+                public void processElement(DoFn<Void, Void>.ProcessContext c) {
+                  ActualT actualContents = c.sideInput(actual);
+                  checkerFn.apply(actualContents);
+                }
+              }));
+
+      return PDone.in(input.getPipeline());
     }
   }
 
@@ -409,35 +500,44 @@ public void processElement(ProcessContext c) {
    * are not serializable, but have coders (provided
    * by the underlying {@link PCollection}s).
    */
-  @SuppressWarnings("serial")
-  private static class TwoSideInputAssert<ActualT, ExpectedT> implements Serializable {
+  private static class TwoSideInputAssert<ActualT, ExpectedT>
+      extends PTransform<PBegin, PDone> implements Serializable {
 
-    private final PCollectionView<ActualT> actualView;
-    private final PCollectionView<ExpectedT> expectedView;
+    private static final long serialVersionUID = 0;
+
+    private final transient PTransform<PBegin, PCollectionView<ActualT>> createActual;
+    private final transient PTransform<PBegin, PCollectionView<ExpectedT>> createExpected;
+    private final AssertRelation<ActualT, ExpectedT> relation;
 
     protected TwoSideInputAssert(
-        PCollectionView<ActualT> actualView,
-        PCollectionView<ExpectedT> expectedView) {
-      this.actualView = actualView;
-      this.expectedView = expectedView;
-    }
-
-    public TwoSideInputAssert<ActualT, ExpectedT> satisfies(
-        final AssertRelation<ActualT, ExpectedT> relation) {
-      actualView.getPipeline()
-        .apply(Create.<Void>of((Void) null))
-        .setCoder(VoidCoder.of())
-        .apply(ParDo
-          .withSideInputs(actualView, expectedView)
-          .of(new DoFn<Void, Void>() {
-            @Override
-            public void processElement(ProcessContext c) {
-              ActualT actualContents = c.sideInput(actualView);
-              ExpectedT expectedContents = c.sideInput(expectedView);
-              relation.assertFor(expectedContents).apply(actualContents);
-            }
-          }));
-      return this;
+        PTransform<PBegin, PCollectionView<ActualT>> createActual,
+        PTransform<PBegin, PCollectionView<ExpectedT>> createExpected,
+        AssertRelation<ActualT, ExpectedT> relation) {
+      this.createActual = createActual;
+      this.createExpected = createExpected;
+      this.relation = relation;
+    }
+
+    @Override
+    public PDone apply(PBegin input) {
+      final PCollectionView<ActualT> actual = input.apply("CreateActual", createActual);
+      final PCollectionView<ExpectedT> expected = input.apply("CreateExpected", createExpected);
+
+      input
+          .apply(Create.<Void>of((Void) null)).setCoder(VoidCoder.of())
+          .apply(ParDo.named("RunChecks").withSideInputs(actual, expected)
+              .of(new DoFn<Void, Void>() {
+                private static final long serialVersionUID = 0;
+
+                @Override
+                public void processElement(DoFn<Void, Void>.ProcessContext c) {
+                  ActualT actualContents = c.sideInput(actual);
+                  ExpectedT expectedContents = c.sideInput(expected);
+                  relation.assertFor(expectedContents).apply(actualContents);
+                }
+              }));
+
+      return PDone.in(input.getPipeline());
     }
   }
 

From 4d7a0854e816a8ac808d9260ea539ee7cee86673 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Wed, 3 Jun 2015 12:11:44 -0700
Subject: [PATCH 0609/1541] Update BigQueryTableRowIterator so that it retries
 BigQuery requests several times before failing.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95129317
---
 .../sdk/util/BigQueryTableRowIterator.java    | 56 +++++++++++++++++--
 .../runners/worker/BigQueryReaderTest.java    | 24 +++++---
 .../dataflow/sdk/util/BigQueryUtilTest.java   |  2 +-
 3 files changed, 68 insertions(+), 14 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
index 07d9d926c9063..c29c4d2e2cd7b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
@@ -16,8 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.api.client.util.BackOff;
+import com.google.api.client.util.BackOffUtils;
 import com.google.api.client.util.Data;
 import com.google.api.client.util.Preconditions;
+import com.google.api.client.util.Sleeper;
 import com.google.api.services.bigquery.Bigquery;
 import com.google.api.services.bigquery.model.Table;
 import com.google.api.services.bigquery.model.TableDataList;
@@ -28,6 +31,8 @@
 
 import org.joda.time.format.DateTimeFormat;
 import org.joda.time.format.DateTimeFormatter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.Closeable;
 import java.io.IOException;
@@ -43,6 +48,7 @@
  * Iterates over all rows in a table.
  */
 public class BigQueryTableRowIterator implements Iterator<TableRow>, Closeable {
+  private static final Logger LOG = LoggerFactory.getLogger(BigQueryTableRowIterator.class);
 
   private final Bigquery client;
   private final TableReference ref;
@@ -52,6 +58,11 @@ public class BigQueryTableRowIterator implements Iterator<TableRow>, Closeable {
   // Set true when the final page is seen from the service.
   private boolean lastPage = false;
 
+  // The maximum number of times a BigQuery request will be retried
+  private static final int MAX_RETRIES = 3;
+  // Initial wait time for the backoff implementation
+  private static final int INITIAL_BACKOFF_MILLIS = 1000;
+
   public BigQueryTableRowIterator(Bigquery client, TableReference ref) {
     this.client = client;
     this.ref = ref;
@@ -67,7 +78,7 @@ public boolean hasNext() {
       if (!rowIterator.hasNext() && !lastPage) {
         readNext();
       }
-    } catch (IOException e) {
+    } catch (IOException | InterruptedException e) {
       throw new RuntimeException(e);
     }
 
@@ -172,14 +183,31 @@ public void remove() {
     throw new UnsupportedOperationException();
   }
 
-  private void readNext() throws IOException {
+  private void readNext() throws IOException, InterruptedException {
     Bigquery.Tabledata.List list = client.tabledata()
         .list(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
     if (pageToken != null) {
       list.setPageToken(pageToken);
     }
 
-    TableDataList result = list.execute();
+    Sleeper sleeper = Sleeper.DEFAULT;
+    BackOff backOff = new AttemptBoundedExponentialBackOff(MAX_RETRIES, INITIAL_BACKOFF_MILLIS);
+
+    TableDataList result = null;
+    while (true) {
+      try {
+        result = list.execute();
+        break;
+      } catch (IOException e) {
+        LOG.error("Error reading from BigQuery table {} of dataset {} : {}", ref.getTableId(),
+            ref.getDatasetId(), e.getMessage());
+        if (!BackOffUtils.next(sleeper, backOff)) {
+          LOG.error("Aborting after {} retries.", MAX_RETRIES);
+          throw e;
+        }
+      }
+    }
+
     pageToken = result.getPageToken();
     rowIterator = result.getRows() != null ? result.getRows().iterator() :
                   Collections.<TableRow>emptyIterator();
@@ -205,11 +233,29 @@ private boolean isOpen() {
    * Opens the table for read.
    * @throws IOException on failure
    */
-  private void open() throws IOException {
+  private void open() throws IOException, InterruptedException {
     // Get table schema.
     Bigquery.Tables.Get get = client.tables()
         .get(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
-    Table table = get.execute();
+
+    Sleeper sleeper = Sleeper.DEFAULT;
+    BackOff backOff = new AttemptBoundedExponentialBackOff(MAX_RETRIES, INITIAL_BACKOFF_MILLIS);
+    Table table = null;
+
+    while (true) {
+      try {
+        table = get.execute();
+        break;
+      } catch (IOException e) {
+        LOG.error("Error opening BigQuery table {} of dataset {} : {}", ref.getTableId(),
+            ref.getDatasetId(), e.getMessage());
+        if (!BackOffUtils.next(sleeper, backOff)) {
+          LOG.error("Aborting after {} retries.", MAX_RETRIES);
+          throw e;
+        }
+      }
+    }
+
     schema = table.getSchema();
 
     // Read the first page of results.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
index ee308f42a5707..a01a206335fba 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
@@ -19,6 +19,7 @@
 import static org.mockito.Matchers.endsWith;
 import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.atLeastOnce;
+import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.verifyNoMoreInteractions;
 import static org.mockito.Mockito.when;
@@ -296,24 +297,31 @@ public class BigQueryReaderTest {
   @Before
   public void setUp() throws Exception {
     MockitoAnnotations.initMocks(this);
+
+    // To make sure that we retry requests, each invocation below throws an IOException first time
+    // they are invoked.
     when(mockTransport.buildRequest(eq("GET"), endsWith(GET_TABLE_REQUEST_PATH)))
+        .thenThrow(new IOException())
         .thenAnswer(new Answer<LowLevelHttpRequest>() {
           @Override
           public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
-            MockLowLevelHttpResponse response = new MockLowLevelHttpResponse()
-                .setContentType(Json.MEDIA_TYPE)
-                .setContent(GET_TABLE_RESPONSE_JSON);
+            MockLowLevelHttpResponse response =
+                new MockLowLevelHttpResponse()
+                    .setContentType(Json.MEDIA_TYPE)
+                    .setContent(GET_TABLE_RESPONSE_JSON);
             return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
                 .setResponse(response);
           }
         });
     when(mockTransport.buildRequest(eq("GET"), endsWith(LIST_TABLE_DATA_REQUEST_PATH)))
+        .thenThrow(new IOException())
         .thenAnswer(new Answer<LowLevelHttpRequest>() {
           @Override
           public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
-            MockLowLevelHttpResponse response = new MockLowLevelHttpResponse()
-                .setContentType(Json.MEDIA_TYPE)
-                .setContent(LIST_TABLEDATA_RESPONSE_JSON);
+            MockLowLevelHttpResponse response =
+                new MockLowLevelHttpResponse()
+                    .setContentType(Json.MEDIA_TYPE)
+                    .setContent(LIST_TABLEDATA_RESPONSE_JSON);
             return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
                 .setResponse(response);
           }
@@ -330,11 +338,11 @@ public void tearDown() throws IOException {
   }
 
   private void verifyTableGet() throws IOException {
-    verify(mockTransport).buildRequest(eq("GET"), endsWith(GET_TABLE_REQUEST_PATH));
+    verify(mockTransport, times(2)).buildRequest(eq("GET"), endsWith(GET_TABLE_REQUEST_PATH));
   }
 
   private void verifyTabledataList() throws IOException {
-    verify(mockTransport).buildRequest(eq("GET"), endsWith(LIST_TABLE_DATA_REQUEST_PATH));
+    verify(mockTransport, times(2)).buildRequest(eq("GET"), endsWith(LIST_TABLE_DATA_REQUEST_PATH));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
index bc94cad21a758..b145852d2e71b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
@@ -133,7 +133,7 @@ private void onTableGet(Table table) throws IOException {
   private void verifyTableGet() throws IOException {
     verify(mockClient).tables();
     verify(mockTables).get("project", "dataset", "table");
-    verify(mockTablesGet).execute();
+    verify(mockTablesGet, atLeastOnce()).execute();
   }
 
   private void onTableList(TableDataList result) throws IOException {

From 8a40b2928d714c8228bbd91382c97ceb17c7156f Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 3 Jun 2015 13:03:12 -0700
Subject: [PATCH 0610/1541] Fix flaky PipelineOptionsFactoryTest order issue

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95133424
---
 .../sdk/options/PipelineOptionsFactory.java   | 65 +++++++++----------
 .../options/PipelineOptionsFactoryTest.java   |  6 +-
 2 files changed, 32 insertions(+), 39 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 016c4fa726f5f..a529f48ef52cd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -20,7 +20,6 @@
 import com.google.cloud.dataflow.sdk.runners.PipelineRunnerRegistrar;
 import com.google.cloud.dataflow.sdk.runners.worker.DataflowWorkerHarness;
 import com.google.cloud.dataflow.sdk.util.common.ReflectHelpers;
-import com.google.common.base.Equivalence;
 import com.google.common.base.Function;
 import com.google.common.base.Joiner;
 import com.google.common.base.Optional;
@@ -31,7 +30,6 @@
 import com.google.common.collect.ArrayListMultimap;
 import com.google.common.collect.Collections2;
 import com.google.common.collect.FluentIterable;
-import com.google.common.collect.HashMultimap;
 import com.google.common.collect.ImmutableListMultimap;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableSet;
@@ -40,8 +38,9 @@
 import com.google.common.collect.ListMultimap;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
-import com.google.common.collect.SetMultimap;
 import com.google.common.collect.Sets;
+import com.google.common.collect.SortedSetMultimap;
+import com.google.common.collect.TreeMultimap;
 
 import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.fasterxml.jackson.databind.JavaType;
@@ -922,12 +921,12 @@ private static List<PropertyDescriptor> validateClass(Class<? extends PipelineOp
     Iterable<Method> interfaceMethods = FluentIterable
         .from(ReflectHelpers.getClosureOfMethodsOnInterface(iface))
         .toSortedSet(MethodComparator.INSTANCE);
-    SetMultimap<Equivalence.Wrapper<Method>, Method> methodNameToMethodMap =
-        HashMultimap.create();
+    SortedSetMultimap<Method, Method> methodNameToMethodMap =
+        TreeMultimap.create(MethodNameComparator.INSTANCE, MethodComparator.INSTANCE);
     for (Method method : interfaceMethods) {
-      methodNameToMethodMap.put(MethodNameEquivalence.INSTANCE.wrap(method), method);
+      methodNameToMethodMap.put(method, method);
     }
-    for (Map.Entry<Equivalence.Wrapper<Method>, Collection<Method>> entry
+    for (Map.Entry<Method, Collection<Method>> entry
         : methodNameToMethodMap.asMap().entrySet()) {
       Set<Class<?>> returnTypes = FluentIterable.from(entry.getValue())
           .transform(ReturnTypeFetchingFunction.INSTANCE).toSet();
@@ -935,7 +934,7 @@ private static List<PropertyDescriptor> validateClass(Class<? extends PipelineOp
           .toSortedSet(MethodComparator.INSTANCE);
       Preconditions.checkArgument(returnTypes.size() == 1,
           "Method [%s] has multiple definitions %s with different return types for [%s].",
-          entry.getKey().get().getName(),
+          entry.getKey().getName(),
           collidingMethods,
           iface.getName());
     }
@@ -946,24 +945,23 @@ private static List<PropertyDescriptor> validateClass(Class<? extends PipelineOp
         .from(ReflectHelpers.getClosureOfMethodsOnInterfaces(validatedPipelineOptionsInterfaces))
         .append(ReflectHelpers.getClosureOfMethodsOnInterface(iface))
         .toSortedSet(MethodComparator.INSTANCE);
-    SetMultimap<Equivalence.Wrapper<Method>, Method> methodNameToAllMethodMap =
-        HashMultimap.create();
+    SortedSetMultimap<Method, Method> methodNameToAllMethodMap =
+        TreeMultimap.create(MethodNameComparator.INSTANCE, MethodComparator.INSTANCE);
     for (Method method : allInterfaceMethods) {
-      methodNameToAllMethodMap.put(MethodNameEquivalence.INSTANCE.wrap(method), method);
+      methodNameToAllMethodMap.put(method, method);
     }
 
     List<PropertyDescriptor> descriptors = getPropertyDescriptors(klass);
 
     for (PropertyDescriptor descriptor : descriptors) {
-      if (IGNORED_METHODS.contains(descriptor.getReadMethod())
+      if (descriptor.getReadMethod() == null
+          || descriptor.getWriteMethod() == null
+          || IGNORED_METHODS.contains(descriptor.getReadMethod())
           || IGNORED_METHODS.contains(descriptor.getWriteMethod())) {
         continue;
       }
-      Set<Method> getters =
-          methodNameToAllMethodMap.get(
-              MethodNameEquivalence.INSTANCE.wrap(descriptor.getReadMethod()));
-      Set<Method> gettersWithJsonIgnore =
-          FluentIterable.from(getters).filter(JsonIgnorePredicate.INSTANCE).toSet();
+      SortedSet<Method> getters = methodNameToAllMethodMap.get(descriptor.getReadMethod());
+      SortedSet<Method> gettersWithJsonIgnore = Sets.filter(getters, JsonIgnorePredicate.INSTANCE);
 
       Iterable<String> getterClassNames = FluentIterable.from(getters)
           .transform(MethodToDeclaringClassFunction.INSTANCE)
@@ -978,10 +976,9 @@ private static List<PropertyDescriptor> validateClass(Class<? extends PipelineOp
           + "found only on %s",
           descriptor.getName(), getterClassNames, gettersWithJsonIgnoreClassNames);
 
-      Set<Method> settersWithJsonIgnore = FluentIterable.from(
-          methodNameToAllMethodMap.get(
-              MethodNameEquivalence.INSTANCE.wrap(descriptor.getWriteMethod())))
-                  .filter(JsonIgnorePredicate.INSTANCE).toSet();
+      SortedSet<Method> settersWithJsonIgnore =
+          Sets.filter(methodNameToAllMethodMap.get(descriptor.getWriteMethod()),
+              JsonIgnorePredicate.INSTANCE);
 
       Iterable<String> settersWithJsonIgnoreClassNames = FluentIterable.from(settersWithJsonIgnore)
           .transform(MethodToDeclaringClassFunction.INSTANCE)
@@ -1013,7 +1010,8 @@ private static List<PropertyDescriptor> validateClass(Class<? extends PipelineOp
     }
 
     // Verify that no additional methods are on an interface that aren't a bean property.
-    Set<Method> unknownMethods = Sets.difference(Sets.newHashSet(klass.getMethods()), methods);
+    SortedSet<Method> unknownMethods = new TreeSet<>(MethodComparator.INSTANCE);
+    unknownMethods.addAll(Sets.difference(Sets.newHashSet(klass.getMethods()), methods));
     Preconditions.checkArgument(unknownMethods.isEmpty(),
         "Methods %s on [%s] do not conform to being bean properties.",
         FluentIterable.from(unknownMethods).transform(ReflectHelpers.METHOD_FORMATTER),
@@ -1049,6 +1047,15 @@ public int compare(Method o1, Method o2) {
     }
   }
 
+  /** A {@link Comparator} that uses the methods name to compare them. */
+  private static class MethodNameComparator implements Comparator<Method> {
+    static final MethodNameComparator INSTANCE = new MethodNameComparator();
+    @Override
+    public int compare(Method o1, Method o2) {
+      return o1.getName().compareTo(o2.getName());
+    }
+  }
+
   /** A {@link Function} that gets the method's return type. */
   private static class ReturnTypeFetchingFunction implements Function<Method, Class<?>> {
     static final ReturnTypeFetchingFunction INSTANCE = new ReturnTypeFetchingFunction();
@@ -1067,20 +1074,6 @@ public Class<?> apply(Method input) {
     }
   }
 
-  /** An {@link Equivalence} that considers two methods equivalent if they share the same name. */
-  private static class MethodNameEquivalence extends Equivalence<Method> {
-    static final MethodNameEquivalence INSTANCE = new MethodNameEquivalence();
-    @Override
-    protected boolean doEquivalent(Method a, Method b) {
-      return a.getName().equals(b.getName());
-    }
-
-    @Override
-    protected int doHash(Method t) {
-      return t.getName().hashCode();
-    }
-  }
-
   /**
    * A {@link Predicate} that returns true if the method is annotated with
    * {@link JsonIgnore @JsonIgnore}.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index 576bdaf4fa732..547b4171f2594 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -236,9 +236,9 @@ public void testNotAllGettersAnnotatedWithJsonIgnore() throws Exception {
 
     expectedException.expect(IllegalArgumentException.class);
     expectedException.expectMessage(
-        "Expected getter for property [object] to be marked with @JsonIgnore on all [com."
-        + "google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest$MissingSetter, "
-        + "com.google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest$GetterWithJsonIgnore], "
+        "Expected getter for property [object] to be marked with @JsonIgnore on all ["
+        + "com.google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest$GetterWithJsonIgnore, "
+        + "com.google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest$MissingSetter], "
         + "found only on [com.google.cloud.dataflow.sdk.options."
         + "PipelineOptionsFactoryTest$GetterWithJsonIgnore]");
 

From 483b1bc7a819470d4844458cb1a8569607e19c57 Mon Sep 17 00:00:00 2001
From: jpet <jpet@google.com>
Date: Wed, 3 Jun 2015 15:31:42 -0700
Subject: [PATCH 0611/1541] Include BASIC importance in the messages printed by
 PrintHandler

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95148350
---
 .../java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
index 937ec31e5bf21..24e26425daf21 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
@@ -95,6 +95,8 @@ public void process(List<JobMessage> messages) {
           importanceString = "Error:   ";
         } else if (message.getMessageImportance().equals("JOB_MESSAGE_WARNING")) {
           importanceString = "Warning: ";
+        } else if (message.getMessageImportance().equals("JOB_MESSAGE_BASIC")) {
+          importanceString = "Basic:  ";
         } else if (message.getMessageImportance().equals("JOB_MESSAGE_DETAILED")) {
           importanceString = "Detail:  ";
         } else {

From ef5015451b3fd638ec4841bf3c932dde6bac33ae Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Wed, 3 Jun 2015 15:39:04 -0700
Subject: [PATCH 0612/1541] Clarifies that TextIO.Write will overwrite existing
 files

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95149056
---
 .../main/java/com/google/cloud/dataflow/sdk/io/TextIO.java  | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 9f5d907bedfb3..89e80f1234490 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -82,8 +82,12 @@
  * {@code "gs://<bucket>/<filepath>"}), and optionally
  * {@link TextIO.Write#named} to specify the name of the pipeline step
  * and/or {@link TextIO.Write#withCoder} to specify the Coder to use
- * to encode the Java values into text lines.  For example:
+ * to encode the Java values into text lines.
  *
+ * <p> Any existing files with the same names as generated output files
+ * will be overwritten.
+ *
+ * <p> For example:
  * <pre> {@code
  * // A simple Write to a local file (only runs locally):
  * PCollection<String> lines = ...;

From 1cd25232e171a3786e0a91088f41902ba3fd5ca5 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 4 Jun 2015 11:36:50 -0700
Subject: [PATCH 0613/1541] Add withCoder to Create and CreateTimestamped

This mirrors other sources such as TextIO which allowed for one to
explicitly set the coder during construction properly to handle the encoding/decoding of elements.

----Release Notes----
Migrate to Create.withCoder/CreateTimestamped.withCoder instead of calling setCoder on the outcoming PCollection.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95221095
---
 .../dataflow/examples/DeDupExampleTest.java   |   8 +-
 .../dataflow/examples/WordCountTest.java      |   2 +-
 .../google/cloud/dataflow/sdk/Pipeline.java   |   2 +-
 .../sdk/runners/DataflowPipelineRunner.java   |  15 +-
 .../runners/DataflowPipelineTranslator.java   |   8 +-
 .../dataflow/sdk/testing/DataflowAssert.java  |  10 +-
 .../dataflow/sdk/transforms/Combine.java      |   2 +-
 .../cloud/dataflow/sdk/transforms/Create.java | 423 +++++++++++-------
 .../cloud/dataflow/sdk/transforms/Sample.java |   2 +-
 .../cloud/dataflow/sdk/transforms/Write.java  |   2 +-
 .../google/cloud/dataflow/sdk/TestUtils.java  |   4 +-
 .../cloud/dataflow/sdk/io/AvroIOTest.java     |   4 +-
 .../cloud/dataflow/sdk/io/BigQueryIOTest.java |   2 +-
 .../dataflow/sdk/io/DatastoreIOTest.java      |   2 +-
 .../cloud/dataflow/sdk/io/TextIOTest.java     |  10 +-
 .../runners/DataflowPipelineRunnerTest.java   |  56 ++-
 .../sdk/testing/DataflowAssertTest.java       |   8 +-
 .../transforms/ApproximateQuantilesTest.java  |   4 +-
 .../dataflow/sdk/transforms/CombineTest.java  |  18 +-
 .../dataflow/sdk/transforms/CreateTest.java   |  50 ++-
 .../dataflow/sdk/transforms/FlattenTest.java  |  12 +-
 .../sdk/transforms/GroupByKeyTest.java        |  36 +-
 .../dataflow/sdk/transforms/KeysTest.java     |   8 +-
 .../dataflow/sdk/transforms/KvSwapTest.java   |   8 +-
 .../dataflow/sdk/transforms/ParDoTest.java    |   4 +-
 .../sdk/transforms/RemoveDuplicatesTest.java  |   8 +-
 .../dataflow/sdk/transforms/SampleTest.java   |  28 +-
 .../sdk/transforms/SimpleStatsFnsTest.java    |   4 +-
 .../dataflow/sdk/transforms/TopTest.java      |  32 +-
 .../dataflow/sdk/transforms/ValuesTest.java   |   8 +-
 .../dataflow/sdk/transforms/ViewTest.java     |   6 +-
 .../dataflow/sdk/transforms/WithKeysTest.java |   8 +-
 .../sdk/transforms/join/CoGroupByKeyTest.java |  10 +-
 .../sdk/transforms/windowing/WindowTest.java  |   6 +-
 .../transforms/windowing/WindowingTest.java   |   4 +-
 .../dataflow/sdk/util/StateFetcherTest.java   |  16 +-
 .../StreamingSideInputDoFnRunnerTest.java     |   2 +-
 37 files changed, 499 insertions(+), 333 deletions(-)

diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/DeDupExampleTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/DeDupExampleTest.java
index e549803ce8706..45c0151148a8b 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/DeDupExampleTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/DeDupExampleTest.java
@@ -52,8 +52,8 @@ public void testRemoveDuplicates() {
     Pipeline p = TestPipeline.create();
 
     PCollection<String> input =
-        p.apply(Create.of(strings))
-        .setCoder(StringUtf8Coder.of());
+        p.apply(Create.of(strings)
+            .withCoder(StringUtf8Coder.of()));
 
     PCollection<String> output =
         input.apply(RemoveDuplicates.<String>create());
@@ -71,8 +71,8 @@ public void testRemoveDuplicatesEmpty() {
     Pipeline p = TestPipeline.create();
 
     PCollection<String> input =
-        p.apply(Create.of(strings))
-        .setCoder(StringUtf8Coder.of());
+        p.apply(Create.of(strings)
+            .withCoder(StringUtf8Coder.of()));
 
     PCollection<String> output =
         input.apply(RemoveDuplicates.<String>create());
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
index c7b3076ef17d4..c000368021cea 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
@@ -72,7 +72,7 @@ public void testExtractWordsFn() {
   public void testCountWords() throws Exception {
     Pipeline p = TestPipeline.create();
 
-    PCollection<String> input = p.apply(Create.of(WORDS)).setCoder(StringUtf8Coder.of());
+    PCollection<String> input = p.apply(Create.of(WORDS).withCoder(StringUtf8Coder.of()));
 
     PCollection<String> output = input.apply(new CountWords());
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index ef55a6c1a309f..416171a636ae0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -76,7 +76,7 @@
  * PCollection<String> moreLines =
  *     p.apply(TextIO.Read.from("gs://bucket/other/dir/file*.txt"));
  * PCollection<String> yetMoreLines =
- *     p.apply(Create.of("yet", "more", "lines")).setCoder(StringUtf8Coder.of());
+ *     p.apply(Create.of("yet", "more", "lines").withCoder(StringUtf8Coder.of()));
  *
  * // Further PTransforms can be applied, in an arbitrary (acyclic) graph.
  * // Subsequent PTransforms (and intermediate PCollections etc.) are
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index fe3ef78c1fde4..15d6703e7cc8a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -162,18 +162,23 @@ private DataflowPipelineRunner(DataflowPipelineOptions options) {
   }
 
   @Override
-  @SuppressWarnings("unchecked")
   public <OutputT extends POutput, InputT extends PInput> OutputT apply(
       PTransform<InputT, OutputT> transform, InputT input) {
-    if (transform instanceof Combine.GroupedValues || transform instanceof GroupByKey) {
+    if (Combine.GroupedValues.class.equals(transform.getClass())
+        || GroupByKey.class.equals(transform.getClass())) {
       PCollection<?> pc = (PCollection<?>) input;
       // TODO: Redundant with translator registration?
-      return (OutputT) PCollection.createPrimitiveOutputInternal(
+      @SuppressWarnings("unchecked")
+      OutputT outputT = (OutputT) PCollection.createPrimitiveOutputInternal(
           pc.getPipeline(),
           pc.getWindowingStrategy(),
           pc.isBounded());
-    } else if (transform instanceof Create) {
-      return (OutputT) ((Create) transform).applyHelper(input, options.isStreaming());
+      return outputT;
+    } else if (Create.Values.class.equals(transform.getClass())) {
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      OutputT output = (OutputT)
+          ((Create.Values) transform).applyHelper(input, options.isStreaming());
+      return output;
     } else {
       return super.apply(transform, input);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 3f8d417d18aa6..3be323270a3c0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -843,17 +843,17 @@ private <K, InputT, OutputT> void translateHelper(
         });
 
     registerTransformTranslator(
-        Create.class,
-        new TransformTranslator<Create>() {
+        Create.Values.class,
+        new TransformTranslator<Create.Values>() {
           @Override
           public void translate(
-              Create transform,
+              Create.Values transform,
               TranslationContext context) {
             createHelper(transform, context);
           }
 
           private <T> void createHelper(
-              Create<T> transform,
+              Create.Values<T> transform,
               TranslationContext context) {
             context.addStep(transform, "CreateCollection");
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index e9bfcff870042..2782aa827791a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -417,11 +417,11 @@ private CreateExpected(Iterable<T> elements, Optional<Coder<T>> coder,
 
     @Override
     public PCollectionView<ExpectedT> apply(PBegin input) {
-      PCollection<T> elemCollection = input.apply(Create.<T>of(elements));
+      Create.Values<T> createTransform = Create.<T>of(elements);
       if (coder.isPresent()) {
-        elemCollection.setCoder(coder.get());
+        createTransform = createTransform.withCoder(coder.get());
       }
-      return elemCollection.apply(view);
+      return input.apply(createTransform).apply(view);
     }
   }
 
@@ -474,7 +474,7 @@ public PDone apply(PBegin input) {
       final PCollectionView<ActualT> actual = input.apply("CreateActual", createActual);
 
       input
-          .apply(Create.<Void>of((Void) null)).setCoder(VoidCoder.of())
+          .apply(Create.<Void>of((Void) null).withCoder(VoidCoder.of()))
           .apply(ParDo.named("RunChecks").withSideInputs(actual)
               .of(new DoFn<Void, Void>() {
                 private static final long serialVersionUID = 0;
@@ -524,7 +524,7 @@ public PDone apply(PBegin input) {
       final PCollectionView<ExpectedT> expected = input.apply("CreateExpected", createExpected);
 
       input
-          .apply(Create.<Void>of((Void) null)).setCoder(VoidCoder.of())
+          .apply(Create.<Void>of((Void) null).withCoder(VoidCoder.of()))
           .apply(ParDo.named("RunChecks").withSideInputs(actual, expected)
               .of(new DoFn<Void, Void>() {
                 private static final long serialVersionUID = 0;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index f44c4cb25f7e7..f2fb815d01a16 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -1290,7 +1290,7 @@ private PCollection<OutputT> insertDefaultValueIfEmpty(PCollection<OutputT> mayb
       final PCollectionView<Iterable<OutputT>> maybeEmptyView = maybeEmpty.apply(
           View.<OutputT>asIterable());
       return maybeEmpty.getPipeline()
-          .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
+          .apply(Create.of((Void) null).withCoder(VoidCoder.of()))
           .apply(ParDo.of(
               new DoFn<Void, OutputT>() {
                 @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index 33641a4788af4..4a987fa9f40e2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
@@ -27,15 +28,18 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue.TimestampedValueCoder;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.base.Function;
+import com.google.common.base.Optional;
+import com.google.common.collect.Iterables;
 
 import org.joda.time.Instant;
 
@@ -57,19 +61,22 @@
  * <pre> {@code
  * Pipeline p = ...;
  *
- * PCollection<Integer> pc = p.apply(Create.of(3, 4, 5)).setCoder(BigEndianIntegerCoder.of());
+ * PCollection<Integer> pc = p.apply(Create.of(3, 4, 5).withCoder(BigEndianIntegerCoder.of()));
  *
  * Map<String, Integer> map = ...;
  * PCollection<KV<String, Integer>> pt =
- *     p.apply(Create.of(map))
- *      .setCoder(KvCoder.of(StringUtf8Coder.of(),
- *                           BigEndianIntegerCoder.of()));
+ *     p.apply(Create.of(map)
+ *      .withCoder(KvCoder.of(StringUtf8Coder.of(),
+ *                            BigEndianIntegerCoder.of())));
  * } </pre>
  *
- * <p> Note that {@link PCollection#setCoder} must be called
+ * <p> {@code Create} can automatically determine the {@code Coder} to use
+ * if all elements have the same run-time class, and a default coder is registered for that
+ * class. See {@link CoderRegistry} for details on how defaults are determined.
+ *
+ * <p> If a coder can not be inferred, {@link Create.Values#withCoder} must be called
  * explicitly to set the encoding of the resulting
- * {@code PCollection}, since {@code Create} does not infer the
- * encoding.
+ * {@code PCollection}.
  *
  * <p> A good use for {@code Create} is when a {@code PCollection}
  * needs to be created without dependencies on files or other external
@@ -79,18 +86,12 @@
  * particularly when submitting jobs to the Google Cloud Dataflow
  * service.
  *
- * <p> {@code Create} can automatically determine the {@code Coder} to use
- * if all elements are the same type, and a default exists for that type.
- * See {@link com.google.cloud.dataflow.sdk.coders.CoderRegistry} for details
- * on how defaults are determined.
- *
  * @param <T> the type of the elements of the resulting {@code PCollection}
  */
 @SuppressWarnings("serial")
-public class Create<T> extends PTransform<PInput, PCollection<T>> {
-
+public class Create<T> {
   /**
-   * Returns a new {@code Create} root transform that produces a
+   * Returns a new {@code Create.Values} transform that produces a
    * {@link PCollection} containing the specified elements.
    *
    * <p> The argument should not be modified after this is called.
@@ -99,16 +100,17 @@ public class Create<T> extends PTransform<PInput, PCollection<T>> {
    * {@link Create#timestamped} for a way of creating a {@code PCollection}
    * with timestamped elements.
    *
-   * <p> The result of applying this transform should have its
-   * {@link Coder} specified explicitly, via a call to
-   * {@link PCollection#setCoder}.
+   * <p> By default, {@code Create.Values} can automatically determine the {@code Coder} to use
+   * if all elements have the same run-time class, and a default coder is registered for that
+   * class. See {@link CoderRegistry} for details on how defaults are determined.
+   * Otherwise, use {@link Create.Values#withCoder} to set the coder explicitly.
    */
-  public static <T> Create<T> of(Iterable<T> elems) {
-    return new Create<>(elems);
+  public static <T> Values<T> of(Iterable<T> elems) {
+    return new Values<>(elems, Optional.<Coder<T>>absent());
   }
 
   /**
-   * Returns a new {@code Create} root transform that produces a
+   * Returns a new {@code Create.Values} transform that produces a
    * {@link PCollection} containing the specified elements.
    *
    * <p> The elements will have a timestamp of negative infinity, see
@@ -117,17 +119,18 @@ public static <T> Create<T> of(Iterable<T> elems) {
    *
    * <p> The argument should not be modified after this is called.
    *
-   * <p> The result of applying this transform should have its
-   * {@link Coder} specified explicitly, via a call to
-   * {@link PCollection#setCoder}.
+   * <p> By default, {@code Create.Values} can automatically determine the {@code Coder} to use
+   * if all elements have the same run-time class, and a default coder is registered for that
+   * class. See {@link CoderRegistry} for details on how defaults are determined.
+   * Otherwise, use {@link Create.Values#withCoder} to set the coder explicitly.
    */
   @SafeVarargs
-  public static <T> Create<T> of(T... elems) {
+  public static <T> Values<T> of(T... elems) {
     return of(Arrays.asList(elems));
   }
 
   /**
-   * Returns a new {@code Create} root transform that produces a
+   * Returns a new {@code Create.Values} transform that produces a
    * {@link PCollection} of {@link KV}s corresponding to the keys and
    * values of the specified {@code Map}.
    *
@@ -135,11 +138,12 @@ public static <T> Create<T> of(T... elems) {
    * {@link Create#timestamped} for a way of creating a {@code PCollection}
    * with timestamped elements.
    *
-   * <p> The result of applying this transform should have its
-   * {@link Coder} specified explicitly, via a call to
-   * {@link PCollection#setCoder}.
+   * <p> By default, {@code Create.Values} can automatically determine the {@code Coder} to use
+   * if all elements have the same run-time class, and a default coder is registered for that
+   * class. See {@link CoderRegistry} for details on how defaults are determined.
+   * Otherwise, use {@link Create.Values#withCoder} to set the coder explicitly.
    */
-  public static <K, V> Create<KV<K, V>> of(Map<K, V> elems) {
+  public static <K, V> Values<KV<K, V>> of(Map<K, V> elems) {
     List<KV<K, V>> kvs = new ArrayList<>(elems.size());
     for (Map.Entry<K, V> entry : elems.entrySet()) {
       kvs.add(KV.of(entry.getKey(), entry.getValue()));
@@ -148,24 +152,29 @@ public static <K, V> Create<KV<K, V>> of(Map<K, V> elems) {
   }
 
   /**
-   * Returns a new root transform that produces a {@link PCollection} containing
-   * the specified elements with the specified timestamps.
+   * Returns a new {@link Create.TimestampedValues} transform that produces a {@link PCollection}
+   * containing the specified elements with the specified timestamps.
    *
    * <p> The argument should not be modified after this is called.
+   *
+   * <p> By default, {@code Create.TimestampedValues} can automatically determine the {@code Coder}
+   * to use if all elements have the same run-time class, and a default coder is registered for
+   * that class. See {@link CoderRegistry} for details on how defaults are determined.
+   * Otherwise, use {@link Create.TimestampedValues#withCoder} to set the coder explicitly.
    */
-  public static <T> CreateTimestamped<T> timestamped(Iterable<TimestampedValue<T>> elems) {
-    return new CreateTimestamped<>(elems);
+  public static <T> TimestampedValues<T> timestamped(Iterable<TimestampedValue<T>> elems) {
+    return new TimestampedValues<>(elems, Optional.<Coder<T>>absent());
   }
 
   /**
-   * Returns a new root transform that produces a {@link PCollection} containing
-   * the specified elements with the specified timestamps.
+   * Returns a new {@link Create.TimestampedValues} transform that produces a {@link PCollection}
+   * containing the specified elements with the specified timestamps.
    *
    * <p> The argument should not be modified after this is called.
    */
-  @SuppressWarnings("unchecked")
-  public static <T> CreateTimestamped<T> timestamped(TimestampedValue<T>... elems) {
-    return new CreateTimestamped<>(Arrays.asList(elems));
+  public static <T> TimestampedValues<T> timestamped(
+      @SuppressWarnings("unchecked") TimestampedValue<T>... elems) {
+    return timestamped(Arrays.asList(elems));
   }
 
   /**
@@ -174,10 +183,15 @@ public static <T> CreateTimestamped<T> timestamped(TimestampedValue<T>... elems)
    *
    * <p> The arguments should not be modified after this is called.
    *
+   * <p> By default, {@code Create.TimestampedValues} can automatically determine the {@code Coder}
+   * to use if all elements have the same run-time class, and a default coder is registered for
+   * that class. See {@link CoderRegistry} for details on how defaults are determined.
+   * Otherwise, use {@link Create.TimestampedValues#withCoder} to set the coder explicitly.
+
    * @throws IllegalArgumentException if there are a different number of values
    * and timestamps
    */
-  public static <T> CreateTimestamped<T> timestamped(
+  public static <T> TimestampedValues<T> timestamped(
       Iterable<T> values, Iterable<Long> timestamps) {
     List<TimestampedValue<T>> elems = new ArrayList<>();
     Iterator<T> valueIter = values.iterator();
@@ -188,165 +202,234 @@ public static <T> CreateTimestamped<T> timestamped(
     Preconditions.checkArgument(
         !valueIter.hasNext() && !timestampIter.hasNext(),
         "Expect sizes of values and timestamps are same.");
-    return new CreateTimestamped<>(elems);
+    return timestamped(elems);
   }
 
-  @Override
-  public PCollection<T> apply(PInput input) {
-    return applyHelper(input, false);
-  }
+  /////////////////////////////////////////////////////////////////////////////
 
-  public PCollection<T> applyHelper(PInput input, boolean isStreaming) {
-    if (isStreaming) {
-      PCollection<T> output = Pipeline.applyTransform(
-          input, PubsubIO.Read.named("StartingSignal").subscription("_starting_signal/"))
-          .apply(ParDo.of(new DoFn<String, KV<Void, Void>>() {
-            private static final long serialVersionUID = 0;
-
-            @Override
-            public void processElement(DoFn<String, KV<Void, Void>>.ProcessContext c)
-                throws Exception {
-              c.output(KV.of((Void) null, (Void) null));
-            }
-          }))
-          .apply(Window.<KV<Void, Void>>into(new GlobalWindows())
-                       .triggering(AfterPane.elementCountAtLeast(1))
-                       .discardingFiredPanes()
-                       .setName("GlobalSingleton"))
-          .apply(GroupByKey.<Void, Void>create())
-          // Can't do this after the ParDo due to lazy coder inference.
-          .apply(Window.<KV<Void, Iterable<Void>>>into(new GlobalWindows()))
-          .apply(ParDo.of(new OutputElements<>(elems)));
-
-      // Best effort attempt to set the coder for the user on the output of the
-      // "Create". ParDo has a different way in which it attempts to get
-      // the coder which doesn't take a look at the elements.
-      try {
-        @SuppressWarnings("unchecked")
-        Coder<T> coder = (Coder<T>) getDefaultOutputCoder(input);
-        output.setCoder(coder);
-      } catch (CannotProvideCoderException expected) {
-        // The user will need to specify a coder.
-      }
-      return output;
-    } else {
-      return PCollection.<T>createPrimitiveOutputInternal(
-          input.getPipeline(),
-          WindowingStrategy.globalDefault(),
-          IsBounded.BOUNDED);
+  /**
+   * A {@code PTransform} that creates a {@code PCollection} from a set of in-memory objects.
+   */
+  public static class Values<T> extends PTransform<PInput, PCollection<T>> {
+    /**
+     * Returns a {@link Create.Values} PTransform like this one that uses the given
+     * {@code Coder<T>} to decode each of the objects into a
+     * value of type {@code T}.
+     *
+     * <p> By default, {@code Create.Values} can automatically determine the {@code Coder} to use
+     * if all elements have the same run-time class, and a default coder is registered for that
+     * class. See {@link CoderRegistry} for details on how defaults are determined.
+     *
+     * <p> Note that for {@link Create.Values} with no elements, the {@link VoidCoder} is used.
+     */
+    public Values<T> withCoder(Coder<T> coder) {
+      return new Values<>(elems, Optional.of(coder));
     }
-  }
 
-  private static class OutputElements<T> extends DoFn<Object, T> {
-    private static final long serialVersionUID = 0;
-
-    private final Iterable<T> elems;
-
-    public OutputElements(Iterable<T> elems) {
-      this.elems = elems;
+    public Iterable<T> getElements() {
+      return elems;
     }
 
     @Override
-    public void processElement(ProcessContext c) throws IOException {
-      for (T t : elems) {
-        c.output(t);
-      }
+    public PCollection<T> apply(PInput input) {
+      return applyHelper(input, false);
     }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
 
-  /** The elements of the resulting PCollection. */
-  private final Iterable<T> elems;
+    @Override
+    protected Coder<T> getDefaultOutputCoder(PInput input) throws CannotProvideCoderException {
+      if (coder.isPresent()) {
+        return coder.get();
+      }
+      // First try to deduce a coder using the types of the elements.
+      Class<?> elementClazz = Void.class;
+      for (T elem : elems) {
+        if (elem == null) {
+          continue;
+        }
+        Class<?> clazz = elem.getClass();
+        if (elementClazz.equals(Void.class)) {
+          elementClazz = clazz;
+        } else if (!elementClazz.equals(clazz)) {
+          // Elements are not the same type, require a user-specified coder.
+          throw new CannotProvideCoderException(
+              "Cannot provide coder for Create: The elements are not all of the same class.");
+        }
+      }
 
-  /**
-   * Constructs a {@code Create} transform that produces a
-   * {@link PCollection} containing the specified elements.
-   *
-   * <p> The argument should not be modified after this is called.
-   */
-  private Create(Iterable<T> elems) {
-    this.elems = elems;
-  }
+      if (elementClazz.getTypeParameters().length == 0) {
+        try {
+          @SuppressWarnings("unchecked") // elementClazz is a wildcard type
+          Coder<T> coder = (Coder<T>) input.getPipeline().getCoderRegistry()
+              .getDefaultCoder(TypeDescriptor.of(elementClazz));
+          return coder;
+        } catch (CannotProvideCoderException exc) {
+          // let the next stage try
+        }
+      }
 
-  public Iterable<T> getElements() {
-    return elems;
-  }
+      // If that fails, try to deduce a coder using the elements themselves
+      Optional<Coder<T>> coder = Optional.absent();
+      for (T elem : elems) {
+        Coder<T> c = input.getPipeline().getCoderRegistry().getDefaultCoder(elem);
+        if (!coder.isPresent()) {
+          coder = Optional.of(c);
+        } else if (!Objects.equals(c, coder.get())) {
+          throw new CannotProvideCoderException(
+              "Cannot provide coder for elements of " + Create.class.getSimpleName() + ":"
+              + " For their common class, no coder could be provided."
+              + " Based on their values, they do not all default to the same Coder.");
+        }
+      }
 
-  private Coder<?> getElementCoder(CoderRegistry coderRegistry) throws CannotProvideCoderException {
-    // First try to deduce a coder using the types of the elements.
-    Class<?> elementClazz = null;
-    for (T elem : elems) {
-      Class<?> clazz = elem == null ? Void.class : elem.getClass();
-      if (elementClazz == null) {
-        elementClazz = clazz;
-      } else if (!elementClazz.equals(clazz)) {
-        // Elements are not the same type, require a user-specified coder.
-        throw new CannotProvideCoderException(
-            "Cannot provide coder for Create: The elements are not all of the same class.");
+      if (!coder.isPresent()) {
+        throw new CannotProvideCoderException("Unable to infer a coder. Please register "
+            + "a coder for ");
       }
+      return coder.get();
     }
 
-    if (elementClazz.getTypeParameters().length == 0) {
+    public PCollection<T> applyHelper(PInput input, boolean isStreaming) {
       try {
-        return coderRegistry.getDefaultCoder(TypeDescriptor.of(elementClazz));
-      } catch (CannotProvideCoderException exc) {
-        // let the next stage try
+        Coder<T> coder = getDefaultOutputCoder(input);
+        if (isStreaming) {
+          PCollection<T> output = Pipeline.applyTransform(
+              input, PubsubIO.Read.named("StartingSignal").subscription("_starting_signal/"))
+              .apply(ParDo.of(new DoFn<String, KV<Void, Void>>() {
+                private static final long serialVersionUID = 0;
+
+                @Override
+                public void processElement(DoFn<String, KV<Void, Void>>.ProcessContext c)
+                    throws Exception {
+                  c.output(KV.of((Void) null, (Void) null));
+                }
+              }))
+              .apply(Window.<KV<Void, Void>>into(new GlobalWindows())
+                           .triggering(AfterPane.elementCountAtLeast(1))
+                           .discardingFiredPanes()
+                           .setName("GlobalSingleton"))
+              .apply(GroupByKey.<Void, Void>create())
+              .apply(Window.<KV<Void, Iterable<Void>>>into(new GlobalWindows()))
+              .apply(ParDo.of(new OutputElements<>(elems, coder)));
+          output.setCoder(coder);
+          return output;
+        } else {
+          return PCollection.<T>createPrimitiveOutputInternal(
+              input.getPipeline(),
+              WindowingStrategy.globalDefault(),
+              IsBounded.BOUNDED);
+        }
+      } catch (CannotProvideCoderException e) {
+        throw new IllegalArgumentException("Unable to infer a coder and no Coder was specified. "
+            + "Please set a coder by invoking Create.withCoder() explicitly.", e);
       }
     }
 
-    // If that fails, try to deduce a coder using the elements themselves
-    Coder<?> coder = null;
-    for (T elem : elems) {
-      Coder<?> c = coderRegistry.getDefaultCoder(elem);
-      if (coder == null) {
-        coder = c;
-      } else if (!Objects.equals(c, coder)) {
-        throw new CannotProvideCoderException(
-            "Cannot provide coder for elements of " + Create.class.getSimpleName() + ":"
-            + " For their common class, no coder could be provided."
-            + " Based on their values, they do not all default to the same Coder.");
-      }
+    /////////////////////////////////////////////////////////////////////////////
+
+    /** The elements of the resulting PCollection. */
+    private final transient Iterable<T> elems;
+
+    /** The coder used to encode the values to and from a binary representation. */
+    private final transient Optional<Coder<T>> coder;
+
+    /**
+     * Constructs a {@code Create.Values} transform that produces a
+     * {@link PCollection} containing the specified elements.
+     *
+     * <p> The arguments should not be modified after this is called.
+     */
+    private Values(Iterable<T> elems, Optional<Coder<T>> coder) {
+      this.elems = elems;
+      this.coder = coder;
     }
-    return coder;
-  }
 
-  @Override
-  protected Coder<?> getDefaultOutputCoder(PInput input) throws CannotProvideCoderException {
-    Coder<?> elemCoder = getElementCoder(input.getPipeline().getCoderRegistry());
-    if (elemCoder == null) {
-      return super.getDefaultOutputCoder(input);
-    } else {
-      return elemCoder;
+    /**
+     * A {@link DoFn} which outputs the specified elements by first encoding them to bytes using
+     * the specified {@link Coder} so that they are serialized part of the {@link DoFn}.
+     */
+    private static class OutputElements<T> extends DoFn<Object, T> {
+      private static final long serialVersionUID = 0;
+
+      private final Coder<T> coder;
+      private final List<byte[]> encodedElements;
+
+      public OutputElements(Iterable<T> elems, Coder<T> coder) {
+        this.coder = coder;
+        this.encodedElements = new ArrayList<>();
+        for (T t : elems) {
+          try {
+            encodedElements.add(CoderUtils.encodeToByteArray(coder, t));
+          } catch (CoderException e) {
+            throw new IllegalArgumentException("Unable to encode value " + t
+                + " with coder " + coder, e);
+          }
+        }
+      }
+
+      @Override
+      public void processElement(ProcessContext c) throws IOException {
+        for (byte[] encodedElement : encodedElements) {
+          c.output(CoderUtils.decodeFromByteArray(coder, encodedElement));
+        }
+      }
     }
   }
 
+  /////////////////////////////////////////////////////////////////////////////
+
   /**
    * A {@code PTransform} that creates a {@code PCollection} whose elements have
    * associated timestamps.
    */
-  private static class CreateTimestamped<T> extends PTransform<PBegin, PCollection<T>> {
-    /** The timestamped elements of the resulting PCollection. */
-    private final Iterable<TimestampedValue<T>> elems;
-
-    private CreateTimestamped(Iterable<TimestampedValue<T>> elems) {
-      this.elems = elems;
+  public static class TimestampedValues<T> extends Values<T> {
+    /**
+     * Returns a {@link Create.TimestampedValues} PTransform like this one that uses the given
+     * {@code Coder<T>} to decode each of the objects into a
+     * value of type {@code T}.
+     *
+     * <p> By default, {@code Create.TimestampedValues} can automatically determine the
+     * {@code Coder} to use if all elements have the same run-time class, and a default coder is
+     * registered for that class. See {@link CoderRegistry} for details on how defaults are
+     * determined.
+     *
+     * <p> Note that for {@link Create.TimestampedValues with no elements}, the {@link VoidCoder}
+     * is used.
+     */
+    @Override
+    public TimestampedValues<T> withCoder(Coder<T> coder) {
+      return new TimestampedValues<>(elems, Optional.<Coder<T>>of(coder));
     }
 
     @Override
-    public PCollection<T> apply(PBegin input) {
-      PCollection<TimestampedValue<T>> intermediate = input.apply(Create.of(elems));
-      if (!elems.iterator().hasNext()) {
-        // There aren't any elements, so we can provide a fake coder instance.
-        // If we don't set a Coder here, users of CreateTimestamped have
-        // no way to set the coder of the intermediate PCollection.
-        @SuppressWarnings("unchecked")
-        TimestampedValueCoder<T> fakeCoder =
-            (TimestampedValueCoder<T>) TimestampedValue.TimestampedValueCoder.of(VoidCoder.of());
-        intermediate.setCoder(fakeCoder);
+    public PCollection<T> apply(PInput input) {
+      try {
+        Coder<T> coder = getDefaultOutputCoder(input);
+        PCollection<TimestampedValue<T>> intermediate = Pipeline.applyTransform(input,
+            Create.of(elems).withCoder(TimestampedValueCoder.of(coder)));
+
+        PCollection<T> output = intermediate.apply(ParDo.of(new ConvertTimestamps<T>()));
+        output.setCoder(coder);
+        return output;
+      } catch (CannotProvideCoderException e) {
+        throw new IllegalArgumentException("Unable to infer a coder and no Coder was specified. "
+            + "Please set a coder by invoking CreateTimestamped.withCoder() explicitly.", e);
       }
+    }
+
+    /////////////////////////////////////////////////////////////////////////////
 
-      return intermediate.apply(ParDo.of(new ConvertTimestamps<T>()));
+    /** The timestamped elements of the resulting PCollection. */
+    private final transient Iterable<TimestampedValue<T>> elems;
+
+    private TimestampedValues(Iterable<TimestampedValue<T>> elems,
+        Optional<Coder<T>> coder) {
+      super(Iterables.transform(elems, new Function<TimestampedValue<T>, T>() {
+        @Override
+        public T apply(TimestampedValue<T> input) {
+          return input.getValue();
+        }
+      }), coder);
+      this.elems = elems;
     }
 
     private static class ConvertTimestamps<T> extends DoFn<TimestampedValue<T>, T> {
@@ -366,11 +449,11 @@ public void processElement(ProcessContext c) {
   @SuppressWarnings({"rawtypes", "unchecked"})
   private static void registerDefaultTransformEvaluator() {
     DirectPipelineRunner.registerDefaultTransformEvaluator(
-        Create.class,
-        new DirectPipelineRunner.TransformEvaluator<Create>() {
+        Create.Values.class,
+        new DirectPipelineRunner.TransformEvaluator<Create.Values>() {
           @Override
           public void evaluate(
-              Create transform,
+              Create.Values transform,
               DirectPipelineRunner.EvaluationContext context) {
             evaluateHelper(transform, context);
           }
@@ -378,7 +461,7 @@ public void evaluate(
   }
 
   private static <T> void evaluateHelper(
-      Create<T> transform,
+      Create.Values<T> transform,
       DirectPipelineRunner.EvaluationContext context) {
     // Convert the Iterable of elems into a List of elems.
     List<T> listElems;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
index 493bb2f87595d..bfd887ef50526 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
@@ -146,7 +146,7 @@ public PCollection<T> apply(PCollection<T> in) {
       PCollectionView<Iterable<T>> iterableView = in.apply(View.<T>asIterable());
       return
           in.getPipeline()
-          .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
+          .apply(Create.of((Void) null).withCoder(VoidCoder.of()))
           .apply(ParDo
                  .withSideInputs(iterableView)
                  .of(new SampleAnyDoFn<>(limit, iterableView)))
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
index e056a6a7323df..b663bba8c7765 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
@@ -122,7 +122,7 @@ private <WriteT> PDone createWrite(
       // A singleton collection of the WriteOperation, to be used as input to a ParDo to initialize
       // the sink.
       PCollection<WriteOperation<T, WriteT>> operationCollection =
-          p.apply(Create.<WriteOperation<T, WriteT>>of(writeOperation)).setCoder(operationCoder);
+          p.apply(Create.<WriteOperation<T, WriteT>>of(writeOperation).withCoder(operationCoder));
 
       // Initialize the resource in a do-once ParDo on the WriteOperation.
       operationCollection =
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
index d6f1fdf65f60b..e2cadf02a4415 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
@@ -137,12 +137,12 @@ public void describeTo(Description description) {
 
   public static PCollection<String> createStrings(Pipeline p,
                                                   Iterable<String> values) {
-    return p.apply(Create.of(values)).setCoder(StringUtf8Coder.of());
+    return p.apply(Create.of(values).withCoder(StringUtf8Coder.of()));
   }
 
   public static PCollection<Integer> createInts(Pipeline p,
                                                 Iterable<Integer> values) {
-    return p.apply(Create.of(values)).setCoder(BigEndianIntegerCoder.of());
+    return p.apply(Create.of(values).withCoder(BigEndianIntegerCoder.of()));
   }
 
   public static PCollectionView<Integer>
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
index 205cfa2714fd5..21bef1dda329c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
@@ -252,8 +252,8 @@ <T> void runTestWrite(AvroIO.Write.Bound<T> write, String expectedName)
 
     DirectPipeline p = DirectPipeline.createForTest();
     @SuppressWarnings("unchecked")
-    PCollection<T> input = p.apply(Create.of(Arrays.asList((T[]) users)))
-                            .setCoder((Coder<T>) AvroCoder.of(User.class));
+    PCollection<T> input = p.apply(Create.of(Arrays.asList((T[]) users))
+                            .withCoder((Coder<T>) AvroCoder.of(User.class)));
     PDone output = input.apply(write.withoutSharding());
     EvaluationResults results = p.run();
     assertEquals(expectedName, write.getName());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
index 49ba60e202699..e78bd78f8805a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
@@ -177,7 +177,7 @@ public void testBuildSinkWithTableReference() throws IOException {
   @Test(expected = IllegalStateException.class)
   public void testBuildSinkWithoutTable() throws IOException {
     Pipeline p = TestPipeline.create();
-    p.apply(Create.<TableRow>of()).setCoder(TableRowJsonCoder.of())
+    p.apply(Create.<TableRow>of().withCoder(TableRowJsonCoder.of()))
         .apply(BigQueryIO.Write.named("WriteMyTable"));
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
index 00c135f282bc8..f8e6541b186f7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
@@ -357,7 +357,7 @@ public void testBuildWriteDefaults() throws Exception {
   public void testBuildWriteWithoutDatastoreToCatchException() throws Exception {
     // create pipeline and run the pipeline to get result
     Pipeline p = DirectPipeline.createForTest();
-    p.apply(Create.<Entity>of()).setCoder(EntityCoder.of()).apply(Write.to(DatastoreIO.sink()));
+    p.apply(Create.<Entity>of().withCoder(EntityCoder.of())).apply(Write.to(DatastoreIO.sink()));
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
index b8e6eac3a890c..fd81968d6c497 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -218,7 +218,7 @@ <T> void runTestWrite(T[] elems, Coder<T> coder) throws Exception {
     Pipeline p = TestPipeline.create();
 
     PCollection<T> input =
-        p.apply(Create.of(Arrays.asList(elems))).setCoder(coder);
+        p.apply(Create.of(Arrays.asList(elems)).withCoder(coder));
 
     TextIO.Write.Bound<T> write;
     if (coder.equals(StringUtf8Coder.of())) {
@@ -285,8 +285,8 @@ public void testWriteSharded() throws IOException {
     Pipeline p = TestPipeline.create();
 
     PCollection<String> input =
-        p.apply(Create.of(Arrays.asList(LINES_ARRAY)))
-            .setCoder(StringUtf8Coder.of());
+        p.apply(Create.of(Arrays.asList(LINES_ARRAY))
+            .withCoder(StringUtf8Coder.of()));
 
     input.apply(TextIO.Write.to(filename).withNumShards(2).withSuffix(".txt"));
 
@@ -329,8 +329,8 @@ public void testUnsupportedFilePattern() throws IOException {
     Pipeline p = TestPipeline.create();
 
     PCollection<String> input =
-        p.apply(Create.of(Arrays.asList(LINES_ARRAY)))
-            .setCoder(StringUtf8Coder.of());
+        p.apply(Create.of(Arrays.asList(LINES_ARRAY))
+            .withCoder(StringUtf8Coder.of()));
 
     expectedException.expect(IllegalArgumentException.class);
     expectedException.expectMessage("Output name components are not allowed to contain");
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 2c9a789acd6c0..39611edb1d945 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -34,6 +34,7 @@
 import com.google.api.services.dataflow.model.Job;
 import com.google.api.services.dataflow.model.ListJobsResponse;
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.io.TextIO;
@@ -48,9 +49,12 @@
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PValue;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.common.collect.ImmutableList;
 
 import org.hamcrest.Matchers;
+import org.joda.time.Instant;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
@@ -67,6 +71,7 @@
 import java.nio.channels.FileChannel;
 import java.nio.file.Files;
 import java.nio.file.StandardOpenOption;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.LinkedList;
@@ -512,9 +517,8 @@ public void testTransformTranslator() throws IOException {
     DataflowPipeline p = DataflowPipeline.create(options);
     TestTransform transform = new TestTransform();
 
-    p.apply(Create.of(Arrays.asList(1, 2, 3)))
-        .apply(transform)
-        .setCoder(BigEndianIntegerCoder.of());
+    p.apply(Create.of(Arrays.asList(1, 2, 3)).withCoder(BigEndianIntegerCoder.of()))
+        .apply(transform);
 
     DataflowPipelineTranslator translator = DataflowPipelineRunner
         .fromOptions(options).getTranslator();
@@ -540,6 +544,52 @@ public void translate(
     assertTrue(transform.translated);
   }
 
+  /** Records all the composite transforms visited within the Pipeline. */
+  private static class CompositeTransformRecorder implements PipelineVisitor {
+    private List<PTransform<?, ?>> transforms = new ArrayList<>();
+
+    @Override
+    public void enterCompositeTransform(TransformTreeNode node) {
+      if (node.getTransform() != null) {
+        transforms.add(node.getTransform());
+      }
+    }
+
+    @Override
+    public void leaveCompositeTransform(TransformTreeNode node) {
+    }
+
+    @Override
+    public void visitTransform(TransformTreeNode node) {
+    }
+
+    @Override
+    public void visitValue(PValue value, TransformTreeNode producer) {
+    }
+
+    public List<PTransform<?, ?>> getCompositeTransforms() {
+      return transforms;
+    }
+  }
+
+  @Test
+  public void testApplyIsScopedToExactClass() throws IOException {
+    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+    DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+    DataflowPipeline p = DataflowPipeline.create(options);
+
+    Create.TimestampedValues<String> transform =
+        Create.timestamped(Arrays.asList(TimestampedValue.of("TestString", Instant.now())));
+    p.apply(transform);
+
+    CompositeTransformRecorder recorder = new CompositeTransformRecorder();
+    p.traverseTopologically(recorder);
+
+    assertThat("Expected to have seen CreateTimestamped composite transform.",
+        recorder.getCompositeTransforms(),
+        Matchers.<PTransform<?, ?>>contains(transform));
+  }
+
   @Test
   public void testToString() {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
index 9a00a7cb92316..8868176b766e3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
@@ -114,8 +114,8 @@ public void testContainsInAnyOrderNotSerializable() throws Exception {
     PCollection<NotSerializableObject> pcollection = pipeline
         .apply(Create.of(
           new NotSerializableObject(),
-          new NotSerializableObject()))
-        .setCoder(NotSerializableObjectCoder.of());
+          new NotSerializableObject())
+            .withCoder(NotSerializableObjectCoder.of()));
 
     DataflowAssert.that(pcollection).containsInAnyOrder(
       new NotSerializableObject(),
@@ -137,8 +137,8 @@ public void testSerializablePredicate() throws Exception {
     PCollection<NotSerializableObject> pcollection = pipeline
         .apply(Create.of(
           new NotSerializableObject(),
-          new NotSerializableObject()))
-        .setCoder(NotSerializableObjectCoder.of());
+          new NotSerializableObject())
+            .withCoder(NotSerializableObjectCoder.of()));
 
     DataflowAssert.that(pcollection).satisfies(
         new SerializableFunction<Iterable<NotSerializableObject>, Void>() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
index d6634b8826277..3391c1d04eb6e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
@@ -61,8 +61,8 @@ public class ApproximateQuantilesTest {
   );
 
   public PCollection<KV<String, Integer>> createInputTable(Pipeline p) {
-    return p.apply(Create.of(TABLE)).setCoder(
-        KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+    return p.apply(Create.of(TABLE).withCoder(
+        KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 65081d12a0081..976c1cf40dd4e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -99,8 +99,8 @@ public class CombineTest implements Serializable {
 
   PCollection<KV<String, Integer>> createInput(Pipeline p,
                                                KV<String, Integer>[] table) {
-    return p.apply(Create.of(Arrays.asList(table))).setCoder(
-        KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+    return p.apply(Create.of(Arrays.asList(table)).withCoder(
+        KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())));
   }
 
   private void runTestSimpleCombine(KV<String, Integer>[] table,
@@ -203,8 +203,8 @@ public void testFixedWindowsCombine() {
 
     PCollection<KV<String, Integer>> input =
         p.apply(Create.timestamped(Arrays.asList(TABLE),
-                                   Arrays.asList(0L, 1L, 6L, 7L, 8L)))
-         .setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()))
+                                   Arrays.asList(0L, 1L, 6L, 7L, 8L))
+                .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())))
          .apply(Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(2))));
 
     PCollection<Integer> sum = input
@@ -230,8 +230,8 @@ public void testSessionsCombine() {
 
     PCollection<KV<String, Integer>> input =
         p.apply(Create.timestamped(Arrays.asList(TABLE),
-                                   Arrays.asList(0L, 4L, 7L, 10L, 16L)))
-         .setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()))
+                                   Arrays.asList(0L, 4L, 7L, 10L, 16L))
+                .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())))
          .apply(Window.<KV<String, Integer>>into(Sessions.withGapDuration(Duration.millis(5))));
 
     PCollection<Integer> sum = input
@@ -255,7 +255,7 @@ public void testWindowedCombineEmpty() {
     Pipeline p = TestPipeline.create();
 
     PCollection<Double> mean = p
-        .apply(Create.<Integer>of()).setCoder(BigEndianIntegerCoder.of())
+        .apply(Create.<Integer>of().withCoder(BigEndianIntegerCoder.of()))
         .apply(Window.<Integer>into(FixedWindows.of(Duration.millis(1))))
         .apply(Combine.globally(new MeanInts()).withoutDefaults());
 
@@ -408,8 +408,8 @@ public Integer apply(Integer left, Integer right) {
   public void testCombineGloballyAsSingletonView() {
     Pipeline p = TestPipeline.create();
     final PCollectionView<Integer> view = p
-        .apply(Create.<Integer>of())
-        .setCoder(BigEndianIntegerCoder.of())
+        .apply(Create.<Integer>of()
+               .withCoder(BigEndianIntegerCoder.of()))
         .apply(Sum.integersGlobally().asSingletonView());
 
     PCollection<Integer> output = p
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
index 49c987d9d48e0..a274a31772471 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
@@ -20,9 +20,11 @@
 import static com.google.cloud.dataflow.sdk.TestUtils.LINES_ARRAY;
 import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES;
 import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES_ARRAY;
+import static org.junit.Assert.assertEquals;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
@@ -40,7 +42,6 @@
 import org.junit.runners.JUnit4;
 
 import java.io.Serializable;
-
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -72,14 +73,24 @@ public void testCreateEmpty() {
     Pipeline p = TestPipeline.create();
 
     PCollection<String> output =
-        p.apply(Create.of(NO_LINES))
-        .setCoder(StringUtf8Coder.of());
+        p.apply(Create.of(NO_LINES)
+            .withCoder(StringUtf8Coder.of()));
 
     DataflowAssert.that(output)
         .containsInAnyOrder(NO_LINES_ARRAY);
     p.run();
   }
 
+  @Test
+  public void testCreateEmptyInfersCoder() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Object> output =
+        p.apply(Create.of());
+
+    assertEquals(VoidCoder.of(), output.getCoder());
+  }
+
   static class Record implements Serializable {
   }
 
@@ -90,7 +101,7 @@ static class Record2 extends Record {
   public void testPolymorphicType() throws Exception {
     thrown.expect(RuntimeException.class);
     thrown.expectMessage(
-        Matchers.containsString("Unable to infer a default Coder"));
+        Matchers.containsString("Unable to infer a coder"));
 
     Pipeline p = TestPipeline.create();
 
@@ -100,6 +111,17 @@ public void testPolymorphicType() throws Exception {
     p.run();
   }
 
+  @Test
+  @Category(RunnableOnService.class)
+  public void testCreateWithNullsAndValues() throws Exception {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> output =
+        p.apply(Create.of(null, "test1", null, "test2", null));
+    DataflowAssert.that(output)
+        .containsInAnyOrder(null, "test1", null, "test2", null);
+  }
+
   @Test
   public void testCreateParameterizedType() throws Exception {
     Pipeline p = TestPipeline.create();
@@ -114,7 +136,6 @@ public void testCreateParameterizedType() throws Exception {
             TimestampedValue.of("a", new Instant(0)),
             TimestampedValue.of("b", new Instant(0)));
   }
-
   private static class PrintTimestamps extends DoFn<String, String> {
     @Override
       public void processElement(ProcessContext c) {
@@ -142,25 +163,34 @@ public void testCreateTimestamped() {
   }
 
   @Test
-  // This test fails when run on the service!
-  // TODO: @Category(RunnableOnService.class)
+  @Category(RunnableOnService.class)
   public void testCreateTimestampedEmpty() {
     Pipeline p = TestPipeline.create();
 
     PCollection<String> output = p
-        .apply(Create.timestamped(new ArrayList<TimestampedValue<String>>()))
-        .setCoder(StringUtf8Coder.of());
+        .apply(Create.timestamped(new ArrayList<TimestampedValue<String>>())
+            .withCoder(StringUtf8Coder.of()));
 
     DataflowAssert.that(output)
         .containsInAnyOrder();
     p.run();
   }
 
+  @Test
+  public void testCreateTimestampedEmptyInfersCoder() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<Object> output = p
+        .apply(Create.timestamped());
+
+    assertEquals(VoidCoder.of(), output.getCoder());
+  }
+
   @Test
   public void testCreateTimestampedPolymorphicType() throws Exception {
     thrown.expect(RuntimeException.class);
     thrown.expectMessage(
-        Matchers.containsString("Unable to infer a default Coder"));
+        Matchers.containsString("Unable to infer a coder"));
 
     Pipeline p = TestPipeline.create();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
index 1619a6c8a0d71..87c24309aa5e2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
@@ -128,7 +128,7 @@ public void testEmptyFlattenAsSideInput() {
         .apply(View.<String>asIterable());
 
     PCollection<String> output = p
-        .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
+        .apply(Create.of((Void) null).withCoder(VoidCoder.of()))
         .apply(ParDo.withSideInputs(view).of(new DoFn<Void, String>() {
                   private static final long serialVersionUID = 0;
 
@@ -211,8 +211,8 @@ public void testFlattenIterables() {
     Pipeline p = TestPipeline.create();
 
     PCollection<Iterable<String>> input = p
-        .apply(Create.<Iterable<String>>of(LINES))
-        .setCoder(IterableCoder.of(StringUtf8Coder.of()));
+        .apply(Create.<Iterable<String>>of(LINES)
+            .withCoder(IterableCoder.of(StringUtf8Coder.of())));
 
     PCollection<String> output =
         input.apply(Flatten.<String>iterables());
@@ -229,8 +229,8 @@ public void testFlattenIterablesEmpty() {
     Pipeline p = TestPipeline.create();
 
     PCollection<Iterable<String>> input = p
-        .apply(Create.<Iterable<String>>of(NO_LINES))
-        .setCoder(IterableCoder.of(StringUtf8Coder.of()));
+        .apply(Create.<Iterable<String>>of(NO_LINES)
+            .withCoder(IterableCoder.of(StringUtf8Coder.of())));
 
     PCollection<String> output =
         input.apply(Flatten.<String>iterables());
@@ -329,7 +329,7 @@ private <T> PCollectionList<T> makePCollectionList(
       List<List<T>> lists) {
     List<PCollection<T>> pcs = new ArrayList<>();
     for (List<T> list : lists) {
-      PCollection<T> pc = p.apply(Create.of(list)).setCoder(coder);
+      PCollection<T> pc = p.apply(Create.of(list).withCoder(coder));
       pcs.add(pc);
     }
     return PCollectionList.of(pcs);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
index 2ed94304739a6..ebbb2dc03f54e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
@@ -74,8 +74,8 @@ public void testGroupByKey() {
     Pipeline p = TestPipeline.create();
 
     PCollection<KV<String, Integer>> input =
-        p.apply(Create.of(ungroupedPairs))
-        .setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+        p.apply(Create.of(ungroupedPairs)
+            .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())));
 
     PCollection<KV<String, Iterable<Integer>>> output =
         input.apply(GroupByKey.<String, Integer>create());
@@ -116,8 +116,8 @@ public void testGroupByKeyAndWindows() {
     Pipeline p = TestPipeline.create();
 
     PCollection<KV<String, Integer>> input =
-        p.apply(Create.timestamped(ungroupedPairs, Arrays.asList(1L, 2L, 3L, 4L, 5L, 6L, 7L)))
-        .setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+        p.apply(Create.timestamped(ungroupedPairs, Arrays.asList(1L, 2L, 3L, 4L, 5L, 6L, 7L))
+            .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())));
     PCollection<KV<String, Iterable<Integer>>> output =
         input.apply(Window.<KV<String, Integer>>into(FixedWindows.of(new Duration(5))))
              .apply(GroupByKey.<String, Integer>create());
@@ -153,8 +153,8 @@ public void testGroupByKeyEmpty() {
     Pipeline p = TestPipeline.create();
 
     PCollection<KV<String, Integer>> input =
-        p.apply(Create.of(ungroupedPairs))
-        .setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+        p.apply(Create.of(ungroupedPairs)
+            .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())));
 
     PCollection<KV<String, Iterable<Integer>>> output =
         input.apply(GroupByKey.<String, Integer>create());
@@ -175,10 +175,10 @@ public void testGroupByKeyNonDeterministic() throws Exception {
     Pipeline p = TestPipeline.create();
 
     PCollection<KV<Map<String, String>, Integer>> input =
-        p.apply(Create.of(ungroupedPairs))
-        .setCoder(
-            KvCoder.of(MapCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()),
-                BigEndianIntegerCoder.of()));
+        p.apply(Create.of(ungroupedPairs)
+            .withCoder(
+                KvCoder.of(MapCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()),
+                    BigEndianIntegerCoder.of())));
 
     input.apply(GroupByKey.<Map<String, String>, Integer>create());
 
@@ -192,8 +192,8 @@ public void testIdentityWindowFnPropagation() {
     List<KV<String, Integer>> ungroupedPairs = Arrays.asList();
 
     PCollection<KV<String, Integer>> input =
-        p.apply(Create.of(ungroupedPairs))
-        .setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()))
+        p.apply(Create.of(ungroupedPairs)
+            .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())))
         .apply(Window.<KV<String, Integer>>into(FixedWindows.of(Duration.standardMinutes(1))));
 
     PCollection<KV<String, Iterable<Integer>>> output =
@@ -212,8 +212,8 @@ public void testWindowFnInvalidation() {
     List<KV<String, Integer>> ungroupedPairs = Arrays.asList();
 
     PCollection<KV<String, Integer>> input =
-        p.apply(Create.of(ungroupedPairs))
-        .setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()))
+        p.apply(Create.of(ungroupedPairs)
+            .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())))
         .apply(Window.<KV<String, Integer>>into(
             Sessions.withGapDuration(Duration.standardMinutes(1))));
 
@@ -237,8 +237,8 @@ public void testInvalidWindows() {
     List<KV<String, Integer>> ungroupedPairs = Arrays.asList();
 
     PCollection<KV<String, Integer>> input =
-        p.apply(Create.of(ungroupedPairs))
-        .setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()))
+        p.apply(Create.of(ungroupedPairs)
+            .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())))
         .apply(Window.<KV<String, Integer>>into(
             Sessions.withGapDuration(Duration.standardMinutes(1))));
 
@@ -260,8 +260,8 @@ public void testRemerge() {
     List<KV<String, Integer>> ungroupedPairs = Arrays.asList();
 
     PCollection<KV<String, Integer>> input =
-        p.apply(Create.of(ungroupedPairs))
-        .setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()))
+        p.apply(Create.of(ungroupedPairs)
+            .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())))
         .apply(Window.<KV<String, Integer>>into(
             Sessions.withGapDuration(Duration.standardMinutes(1))));
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java
index ea93c0fa1e87a..ea55bb4a2e9c8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java
@@ -57,8 +57,8 @@ public void testKeys() {
     Pipeline p = TestPipeline.create();
 
     PCollection<KV<String, Integer>> input =
-        p.apply(Create.of(Arrays.asList(TABLE))).setCoder(
-            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+        p.apply(Create.of(Arrays.asList(TABLE)).withCoder(
+            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())));
 
     PCollection<String> output = input.apply(Keys.<String>create());
     DataflowAssert.that(output)
@@ -73,8 +73,8 @@ public void testKeysEmpty() {
     Pipeline p = TestPipeline.create();
 
     PCollection<KV<String, Integer>> input =
-        p.apply(Create.of(Arrays.asList(EMPTY_TABLE))).setCoder(
-            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+        p.apply(Create.of(Arrays.asList(EMPTY_TABLE)).withCoder(
+            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())));
 
     PCollection<String> output = input.apply(Keys.<String>create());
     DataflowAssert.that(output)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java
index 54fc2e56fef89..42d20fe678746 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java
@@ -57,8 +57,8 @@ public void testKvSwap() {
     Pipeline p = TestPipeline.create();
 
     PCollection<KV<String, Integer>> input =
-        p.apply(Create.of(Arrays.asList(TABLE))).setCoder(
-            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+        p.apply(Create.of(Arrays.asList(TABLE)).withCoder(
+            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())));
 
     PCollection<KV<Integer, String>> output = input.apply(
         KvSwap.<String, Integer>create());
@@ -79,8 +79,8 @@ public void testKvSwapEmpty() {
     Pipeline p = TestPipeline.create();
 
     PCollection<KV<String, Integer>> input =
-        p.apply(Create.of(Arrays.asList(EMPTY_TABLE))).setCoder(
-            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+        p.apply(Create.of(Arrays.asList(EMPTY_TABLE)).withCoder(
+            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())));
 
     PCollection<KV<Integer, String>> output = input.apply(
         KvSwap.<String, Integer>create());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 2337d54075eb0..d33a94ad1b18c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -969,8 +969,8 @@ public void testMainOutputApplySideOutputNoCoder() {
     final TupleTag<TestDummy> mainOutputTag = new TupleTag<TestDummy>();
     final TupleTag<TestDummy> sideOutputTag = new TupleTag<TestDummy>();
     PCollectionTuple tuple = pipeline
-        .apply(Create.of(new TestDummy()))
-        .setCoder(TestDummyCoder.of())
+        .apply(Create.of(new TestDummy())
+            .withCoder(TestDummyCoder.of()))
         .apply(ParDo
             .withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag))
             .of(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java
index 946cebdd93f88..5573d09e6ea4f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java
@@ -51,8 +51,8 @@ public void testRemoveDuplicates() {
     Pipeline p = TestPipeline.create();
 
     PCollection<String> input =
-        p.apply(Create.of(strings))
-        .setCoder(StringUtf8Coder.of());
+        p.apply(Create.of(strings)
+            .withCoder(StringUtf8Coder.of()));
 
     PCollection<String> output =
         input.apply(RemoveDuplicates.<String>create());
@@ -70,8 +70,8 @@ public void testRemoveDuplicatesEmpty() {
     Pipeline p = TestPipeline.create();
 
     PCollection<String> input =
-        p.apply(Create.of(strings))
-        .setCoder(StringUtf8Coder.of());
+        p.apply(Create.of(strings)
+            .withCoder(StringUtf8Coder.of()));
 
     PCollection<String> output =
         input.apply(RemoveDuplicates.<String>create());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
index 7e8c306f3d560..cc336286a5902 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
@@ -105,8 +105,8 @@ public Void apply(Iterable<T> in) {
   public void testSample() {
     Pipeline p = TestPipeline.create();
 
-    PCollection<Integer> input = p.apply(Create.of(DATA))
-        .setCoder(BigEndianIntegerCoder.of());
+    PCollection<Integer> input = p.apply(Create.of(DATA)
+        .withCoder(BigEndianIntegerCoder.of()));
     PCollection<Iterable<Integer>> output = input.apply(
         Sample.<Integer>fixedSizeGlobally(3));
 
@@ -120,8 +120,8 @@ public void testSample() {
   public void testSampleEmpty() {
     Pipeline p = TestPipeline.create();
 
-    PCollection<Integer> input = p.apply(Create.of(EMPTY))
-        .setCoder(BigEndianIntegerCoder.of());
+    PCollection<Integer> input = p.apply(Create.of(EMPTY)
+        .withCoder(BigEndianIntegerCoder.of()));
     PCollection<Iterable<Integer>> output = input.apply(
         Sample.<Integer>fixedSizeGlobally(3));
 
@@ -135,8 +135,8 @@ public void testSampleEmpty() {
   public void testSampleZero() {
     Pipeline p = TestPipeline.create();
 
-    PCollection<Integer> input = p.apply(Create.of(DATA))
-        .setCoder(BigEndianIntegerCoder.of());
+    PCollection<Integer> input = p.apply(Create.of(DATA)
+        .withCoder(BigEndianIntegerCoder.of()));
     PCollection<Iterable<Integer>> output = input.apply(
         Sample.<Integer>fixedSizeGlobally(0));
 
@@ -150,8 +150,8 @@ public void testSampleZero() {
   public void testSampleInsufficientElements() {
     Pipeline p = TestPipeline.create();
 
-    PCollection<Integer> input = p.apply(Create.of(DATA))
-        .setCoder(BigEndianIntegerCoder.of());
+    PCollection<Integer> input = p.apply(Create.of(DATA)
+        .withCoder(BigEndianIntegerCoder.of()));
     PCollection<Iterable<Integer>> output = input.apply(
         Sample.<Integer>fixedSizeGlobally(10));
 
@@ -164,8 +164,8 @@ public void testSampleInsufficientElements() {
   public void testSampleNegative() {
     Pipeline p = TestPipeline.create();
 
-    PCollection<Integer> input = p.apply(Create.of(DATA))
-        .setCoder(BigEndianIntegerCoder.of());
+    PCollection<Integer> input = p.apply(Create.of(DATA)
+        .withCoder(BigEndianIntegerCoder.of()));
     input.apply(Sample.<Integer>fixedSizeGlobally(-1));
   }
 
@@ -174,8 +174,8 @@ public void testSampleNegative() {
   public void testSampleMultiplicity() {
     Pipeline p = TestPipeline.create();
 
-    PCollection<Integer> input = p.apply(Create.of(REPEATED_DATA))
-        .setCoder(BigEndianIntegerCoder.of());
+    PCollection<Integer> input = p.apply(Create.of(REPEATED_DATA)
+        .withCoder(BigEndianIntegerCoder.of()));
     // At least one value must be selected with multiplicity.
     PCollection<Iterable<Integer>> output = input.apply(
         Sample.<Integer>fixedSizeGlobally(6));
@@ -218,8 +218,8 @@ void runPickAnyTest(final List<String> lines, int limit) {
         "Duplicates are unsupported.");
     Pipeline p = TestPipeline.create();
 
-    PCollection<String> input = p.apply(Create.of(lines))
-        .setCoder(StringUtf8Coder.of());
+    PCollection<String> input = p.apply(Create.of(lines)
+        .withCoder(StringUtf8Coder.of()));
 
     PCollection<String> output =
         input.apply(Sample.<String>any(limit));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
index af33a8d5d7a83..bc67360f3cab1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
@@ -122,8 +122,8 @@ public void testMeanCountSumSerializable() {
     Pipeline p = TestPipeline.create();
 
     PCollection<KV<Long, Double>> input = p
-        .apply(Create.of(KV.of(1L, 1.5), KV.of(2L, 7.3)))
-        .setCoder(KvCoder.of(VarLongCoder.of(), DoubleCoder.of()));
+        .apply(Create.of(KV.of(1L, 1.5), KV.of(2L, 7.3))
+          .withCoder(KvCoder.of(VarLongCoder.of(), DoubleCoder.of())));
 
     input.apply(Mean.<Long, Double>perKey());
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
index 80e560a7b511a..b3a125f6f77ff 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
@@ -73,13 +73,13 @@ public class TopTest {
   };
 
   public PCollection<KV<String, Integer>> createInputTable(Pipeline p) {
-    return p.apply(Create.of(Arrays.asList(TABLE))).setCoder(
-        KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+    return p.apply(Create.of(Arrays.asList(TABLE)).withCoder(
+        KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())));
   }
 
   public PCollection<KV<String, Integer>> createEmptyInputTable(Pipeline p) {
-    return p.apply(Create.of(Arrays.asList(EMPTY_TABLE))).setCoder(
-        KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+    return p.apply(Create.of(Arrays.asList(EMPTY_TABLE)).withCoder(
+        KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())));
   }
 
   @Test
@@ -87,8 +87,8 @@ public PCollection<KV<String, Integer>> createEmptyInputTable(Pipeline p) {
   public void testTop() {
     Pipeline p = TestPipeline.create();
     PCollection<String> input =
-        p.apply(Create.of(Arrays.asList(COLLECTION)))
-                 .setCoder(StringUtf8Coder.of());
+        p.apply(Create.of(Arrays.asList(COLLECTION))
+                 .withCoder(StringUtf8Coder.of()));
 
     PCollection<List<String>> top1 = input.apply(Top.of(1, new OrderByLength()));
     PCollection<List<String>> top2 = input.apply(Top.<String>largest(2));
@@ -117,8 +117,8 @@ public void testTop() {
   public void testTopEmpty() {
     Pipeline p = TestPipeline.create();
     PCollection<String> input =
-        p.apply(Create.of(Arrays.asList(EMPTY_COLLECTION)))
-                 .setCoder(StringUtf8Coder.of());
+        p.apply(Create.of(Arrays.asList(EMPTY_COLLECTION))
+                 .withCoder(StringUtf8Coder.of()));
 
     PCollection<List<String>> top1 = input.apply(Top.of(1, new OrderByLength()));
     PCollection<List<String>> top2 = input.apply(Top.<String>largest(2));
@@ -143,8 +143,8 @@ public void testTopEmpty() {
   public void testTopZero() {
     Pipeline p = TestPipeline.create();
     PCollection<String> input =
-        p.apply(Create.of(Arrays.asList(COLLECTION)))
-                 .setCoder(StringUtf8Coder.of());
+        p.apply(Create.of(Arrays.asList(COLLECTION))
+                 .withCoder(StringUtf8Coder.of()));
 
     PCollection<List<String>> top1 = input.apply(Top.of(0, new OrderByLength()));
     PCollection<List<String>> top2 = input.apply(Top.<String>largest(0));
@@ -173,8 +173,8 @@ public void testTopZero() {
   @Test
   public void testPerKeySerializabilityRequirement() {
     Pipeline p = TestPipeline.create();
-    p.apply(Create.of(Arrays.asList(COLLECTION)))
-            .setCoder(StringUtf8Coder.of());
+    p.apply(Create.of(Arrays.asList(COLLECTION))
+            .withCoder(StringUtf8Coder.of()));
 
     createInputTable(p)
         .apply(Top.<String, Integer, IntegerComparator>perKey(1,
@@ -189,8 +189,8 @@ public void testPerKeySerializabilityRequirement() {
   public void testCountConstraint() {
     Pipeline p = TestPipeline.create();
     PCollection<String> input =
-        p.apply(Create.of(Arrays.asList(COLLECTION)))
-            .setCoder(StringUtf8Coder.of());
+        p.apply(Create.of(Arrays.asList(COLLECTION))
+            .withCoder(StringUtf8Coder.of()));
 
     expectedEx.expect(IllegalArgumentException.class);
     expectedEx.expectMessage(Matchers.containsString(">= 0"));
@@ -202,8 +202,8 @@ public void testCountConstraint() {
   public void testTransformName() {
     Pipeline p = TestPipeline.create();
     PCollection<String> input =
-        p.apply(Create.of(Arrays.asList(COLLECTION)))
-            .setCoder(StringUtf8Coder.of());
+        p.apply(Create.of(Arrays.asList(COLLECTION))
+            .withCoder(StringUtf8Coder.of()));
 
     PTransform<PCollection<String>, PCollection<List<String>>> top = Top
         .of(10, new OrderByLength());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
index 66f11f95862c2..95a824e7f7645 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
@@ -57,8 +57,8 @@ public void testValues() {
     Pipeline p = TestPipeline.create();
 
     PCollection<KV<String, Integer>> input =
-        p.apply(Create.of(Arrays.asList(TABLE))).setCoder(
-            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+        p.apply(Create.of(Arrays.asList(TABLE)).withCoder(
+            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())));
 
     PCollection<Integer> output = input.apply(Values.<Integer>create());
 
@@ -74,8 +74,8 @@ public void testValuesEmpty() {
     Pipeline p = TestPipeline.create();
 
     PCollection<KV<String, Integer>> input =
-        p.apply(Create.of(Arrays.asList(EMPTY_TABLE))).setCoder(
-            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+        p.apply(Create.of(Arrays.asList(EMPTY_TABLE)).withCoder(
+            KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())));
 
     PCollection<Integer> output = input.apply(Values.<Integer>create());
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index acd1b8ebced59..7cf05bc496f6a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -92,8 +92,8 @@ public void testEmptySingletonSideInput() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
     final PCollectionView<Integer> view = pipeline
-        .apply(Create.<Integer>of())
-        .setCoder(VarIntCoder.of())
+        .apply(Create.<Integer>of()
+            .withCoder(VarIntCoder.of()))
         .apply(View.<Integer>asSingleton());
 
     pipeline
@@ -383,7 +383,7 @@ public void testSideInputWithNullDefault() {
     Pipeline p = TestPipeline.create();
 
     final PCollectionView<Void> view = p
-        .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
+        .apply(Create.of((Void) null).withCoder(VoidCoder.of()))
         .apply(Combine.globally(new SerializableFunction<Iterable<Void>, Void>() {
                   @Override
                   public Void apply(Iterable<Void> input) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
index 399c475542987..55c810b66c3b8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
@@ -65,8 +65,8 @@ public void testExtractKeys() {
     Pipeline p = TestPipeline.create();
 
     PCollection<String> input =
-        p.apply(Create.of(Arrays.asList(COLLECTION))).setCoder(
-            StringUtf8Coder.of());
+        p.apply(Create.of(Arrays.asList(COLLECTION)).withCoder(
+            StringUtf8Coder.of()));
 
     PCollection<KV<Integer, String>> output = input.apply(WithKeys.of(
         new LengthAsKey()));
@@ -81,8 +81,8 @@ public void testConstantKeys() {
     Pipeline p = TestPipeline.create();
 
     PCollection<String> input =
-        p.apply(Create.of(Arrays.asList(COLLECTION))).setCoder(
-            StringUtf8Coder.of());
+        p.apply(Create.of(Arrays.asList(COLLECTION)).withCoder(
+            StringUtf8Coder.of()));
 
     PCollection<KV<Integer, String>> output =
         input.apply(WithKeys.<Integer, String>of(100));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
index 235a6ce080775..429111a1028d7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
@@ -76,15 +76,13 @@ private PCollection<KV<Integer, String>> createInput(
       Pipeline p, List<KV<Integer, String>> list, List<Long> timestamps) {
     PCollection<KV<Integer, String>> input;
     if (timestamps.isEmpty()) {
-      input = p.apply(Create.of(list));
+      input = p.apply(Create.of(list)
+          .withCoder(KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of())));
     } else {
-      input = p.apply(Create.timestamped(list, timestamps));
+      input = p.apply(Create.timestamped(list, timestamps)
+          .withCoder(KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of())));
     }
     return input
-            // Create doesn't infer coders for parameterized types.
-            .setCoder(
-                KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()))
-            // Do a dummy transform so consumers must deal with coder inference.
             .apply(ParDo.of(new DoFn<KV<Integer, String>,
                                      KV<Integer, String>>() {
               @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java
index eadd314556938..fdf93dee56801 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java
@@ -39,7 +39,7 @@ public class WindowTest {
   @Test
   public void testBasicWindowIntoSettings() {
     WindowingStrategy<?, ?> strategy = TestPipeline.create()
-      .apply(Create.of("hello", "world")).setCoder(StringUtf8Coder.of())
+      .apply(Create.of("hello", "world").withCoder(StringUtf8Coder.of()))
       .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(10))))
       .getWindowingStrategy();
     assertTrue(strategy.getWindowFn() instanceof FixedWindows);
@@ -50,7 +50,7 @@ public void testBasicWindowIntoSettings() {
   @Test
   public void testWindowIntoTriggersAndAccumulating() {
     WindowingStrategy<?, ?> strategy = TestPipeline.create()
-      .apply(Create.of("hello", "world")).setCoder(StringUtf8Coder.of())
+      .apply(Create.of("hello", "world").withCoder(StringUtf8Coder.of()))
       .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(10)))
           .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(5)))
           .accumulatingFiredPanes())
@@ -64,7 +64,7 @@ public void testWindowIntoTriggersAndAccumulating() {
   @Test
   public void testWindowIntoPropagatesLateness() {
     WindowingStrategy<?, ?> strategy = TestPipeline.create()
-        .apply(Create.of("hello", "world")).setCoder(StringUtf8Coder.of())
+        .apply(Create.of("hello", "world").withCoder(StringUtf8Coder.of()))
         .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(10)))
             .withAllowedLateness(Duration.standardDays(1))
             .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(5)))
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
index 8113efd94d84d..09411a8ec6a35 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
@@ -229,8 +229,8 @@ public void testElementsSortedByTimestamp() {
   public void testEmptyInput() {
     Pipeline p = TestPipeline.create();
     PCollection<String> input =
-        p.apply(Create.<String>timestamped())
-        .setCoder(StringUtf8Coder.of());
+        p.apply(Create.<String>timestamped()
+            .withCoder(StringUtf8Coder.of()));
 
     PCollection<String> output =
         input
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
index 2b3946f6c4286..2679831a82f5b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
@@ -185,8 +185,8 @@ public void testFetchGlobalDataBasic() throws Exception {
     ByteString encodedIterable = stream.toByteString();
 
     PCollectionView<String> view =
-        TestPipeline.create().apply(Create.<String>of())
-        .setCoder(StringUtf8Coder.of()).apply(View.<String>asSingleton());
+        TestPipeline.create().apply(Create.<String>of()
+            .withCoder(StringUtf8Coder.of())).apply(View.<String>asSingleton());
 
     String tag = view.getTagInternal().getId();
 
@@ -227,12 +227,12 @@ public void testFetchGlobalDataCacheOverflow() throws Exception {
     StateFetcher fetcher = new StateFetcher(server, cache);
 
     PCollectionView<String> view1 =
-        TestPipeline.create().apply(Create.<String>of())
-        .setCoder(StringUtf8Coder.of()).apply(View.<String>asSingleton());
+        TestPipeline.create().apply(Create.<String>of()
+            .withCoder(StringUtf8Coder.of())).apply(View.<String>asSingleton());
 
     PCollectionView<String> view2 =
-        TestPipeline.create().apply(Create.<String>of())
-        .setCoder(StringUtf8Coder.of()).apply(View.<String>asSingleton());
+        TestPipeline.create().apply(Create.<String>of()
+            .withCoder(StringUtf8Coder.of())).apply(View.<String>asSingleton());
 
     String tag1 = view1.getTagInternal().getId();
     String tag2 = view2.getTagInternal().getId();
@@ -273,8 +273,8 @@ public void testEmptyFetchGlobalData() throws Exception {
     ByteString encodedIterable = ByteString.EMPTY;
 
     PCollectionView<Long> view =
-        TestPipeline.create().apply(Create.<Long>of())
-        .setCoder(VarLongCoder.of()).apply(Sum.longsGlobally().asSingletonView());
+        TestPipeline.create().apply(Create.<Long>of()
+            .withCoder(VarLongCoder.of())).apply(Sum.longsGlobally().asSingletonView());
 
     String tag = view.getTagInternal().getId();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
index 58e883638ece4..f140326087a5b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
@@ -300,7 +300,7 @@ public void processElement(ProcessContext c) {
 
   private PCollectionView<String> createView() {
     return TestPipeline.create()
-        .apply(Create.<String>of()).setCoder(StringUtf8Coder.of())
+        .apply(Create.<String>of().withCoder(StringUtf8Coder.of()))
         .apply(Window.<String>into(FixedWindows.of(Duration.millis(10))))
         .apply(View.<String>asSingleton());
   }

From 5aa8abc9e4eef7783af279e0e443873d0fd6ce80 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 4 Jun 2015 12:19:13 -0700
Subject: [PATCH 0614/1541] Windmill API changes for unbounded custom sources

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95225042
---
 sdk/src/main/proto/windmill.proto | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index a3c2b1506bf39..9d59a8c45ced6 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -97,6 +97,11 @@ message GlobalData {
   optional bytes data = 3;
 }
 
+message SourceState {
+  optional bytes state = 1;
+  repeated fixed64 finalize_ids = 2;
+}
+
 message WorkItem {
   required bytes key = 1;
   required fixed64 work_token = 2;
@@ -104,6 +109,7 @@ message WorkItem {
   repeated InputMessageBundle message_bundles = 3;
   optional TimerBundle timers = 4;
   repeated GlobalDataId global_data_id_notifications = 5;
+  optional SourceState source_state = 6;
 }
 
 message ComputationWorkItems {
@@ -192,7 +198,7 @@ message GlobalDataRequest {
   optional int64 existence_watermark_deadline = 2 [default=0x7FFFFFFFFFFFFFFF];
 }
 
-// next id: 12
+// next id: 14
 message WorkItemCommitRequest {
   required bytes key = 1;
   required fixed64 work_token = 2;
@@ -204,6 +210,8 @@ message WorkItemCommitRequest {
   repeated Counter counter_updates = 8;
   repeated GlobalDataRequest global_data_requests = 11;
   repeated GlobalData global_data_updates = 10;
+  optional SourceState source_state_updates = 12;
+  optional int64 source_watermark = 13 [default=-0x8000000000000000];
 
   // DEPRECATED
   repeated GlobalDataId global_data_id_requests = 9;

From 1083703c8388075d04b574cef8688160611e933e Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 4 Jun 2015 13:04:52 -0700
Subject: [PATCH 0615/1541] Windmill API changes to support client-side
 caching.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95228605
---
 sdk/src/main/proto/windmill.proto | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index 9d59a8c45ced6..54b5ae09b0bd9 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -106,6 +106,8 @@ message WorkItem {
   required bytes key = 1;
   required fixed64 work_token = 2;
 
+  optional fixed64 cache_token = 7;
+
   repeated InputMessageBundle message_bundles = 3;
   optional TimerBundle timers = 4;
   repeated GlobalDataId global_data_id_notifications = 5;

From d895c6f39666b62f385e852fcfa9ba01fd046ede Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Thu, 4 Jun 2015 16:36:20 -0700
Subject: [PATCH 0616/1541] Pushes bounded-only Source methods into
 BoundedSource

* Makes Source.createReader and BoundedSource.createReader
  abstract, rather than throwing UnimplementedException.
* Moves splitIntoBundles from Source to BoundedSource because
  the current signature only makes sense in bounded context anyway.
  Restricts BasicSerializableSourceFormat to only handle the
  "split" source operation for BoundedSource as well.
* Read.from() can now only be applied to BoundedSource.
  This is a compatible change because writing a non-Bounded source
  and applying Read.from() to it would not work anyway.
----Release Notes----
* Makes Source.createReader and BoundedSource.createReader abstract.
* Moves splitIntoBundles from Source to BoundedSource.
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95248780
---
 .../cloud/dataflow/sdk/io/BoundedSource.java  | 32 +++++++--
 .../google/cloud/dataflow/sdk/io/Read.java    | 20 ++++--
 .../google/cloud/dataflow/sdk/io/Source.java  | 25 ++-----
 .../runners/DataflowPipelineTranslator.java   |  3 +-
 .../BasicSerializableSourceFormat.java        | 65 ++++++++++++-------
 .../sdk/runners/dataflow/ReadTranslator.java  |  4 +-
 .../sdk/runners/worker/DataflowWorker.java    |  9 +--
 .../BasicSerializableSourceFormatTest.java    | 61 ++++++++++++-----
 8 files changed, 138 insertions(+), 81 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
index baa1fe65159a8..1ab36ce39f54f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
@@ -21,18 +21,39 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 
 import java.io.IOException;
+import java.util.List;
 
 import javax.annotation.Nullable;
 
 /**
- * A {@link Source} that reads a bounded amount of input and, because of that, supports
- * some additional operations, e.g. size estimation, and its reader supports progress estimation.
+ * A {@link Source} that reads a finite amount of input and, because of that, supports
+ * some additional operations.
+ *
+ * <p>The operations are:
+ * <ul>
+ * <li>Splitting into bundles of given size: {@link #splitIntoBundles};
+ * <li>Size estimation: {@link #getEstimatedSizeBytes};
+ * <li>Telling whether or not this source produces key/value pairs in sorted order:
+ * {@link #producesSortedKeys};
+ * <li>The reader ({@link BoundedReader}) supports progress estimation
+ * ({@link BoundedReader#getFractionConsumed}) and dynamic splitting
+ * ({@link BoundedReader#splitAtFraction}).
+ * </ul>
+ *
+ * <p> To use this class for supporting your custom input type, derive your class
+ * class from it, and override the abstract methods. For an example, see {@link DatastoreIO}.
  *
  * @param <T> Type of records read by the source.
  */
 public abstract class BoundedSource<T> extends Source<T> {
   private static final long serialVersionUID = 0L;
 
+  /**
+   * Splits the source into bundles of approximately given size (in bytes).
+   */
+  public abstract List<? extends BoundedSource<T>> splitIntoBundles(
+      long desiredBundleSizeBytes, PipelineOptions options) throws Exception;
+
   /**
    * An estimate of the total size (in bytes) of the data that would be read from this source.
    * This estimate is in terms of external storage size, before any decompression or other
@@ -47,11 +68,8 @@ public abstract class BoundedSource<T> extends Source<T> {
   public abstract boolean producesSortedKeys(PipelineOptions options) throws Exception;
 
   @Override
-  public BoundedReader<T> createReader(
-      PipelineOptions options, @Nullable ExecutionContext executionContext)
-      throws IOException {
-    throw new UnsupportedOperationException();
-  }
+  public abstract BoundedReader<T> createReader(
+      PipelineOptions options, @Nullable ExecutionContext executionContext) throws IOException;
 
   /**
    * A {@code Reader} that reads a bounded amount of input and supports some additional
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
index d2f2a190d0c6d..c8c371166d120 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
@@ -42,24 +42,23 @@ public class Read {
   /**
    * Returns a new {@code Read.Bound} {@code PTransform} with the given name.
    */
-  @SuppressWarnings("unchecked")
+  @SuppressWarnings({"rawtypes", "unchecked"})
   public static Bound<?> named(String name) {
     return new Bound(name, null);
   }
 
   /**
    * Returns a new unnamed {@code Read.Bound} {@code PTransform} reading from the given
-   * {@code Source}.
+   * {@code BoundedSource}.
    */
-  public static <T> Bound<T> from(Source<T> source) {
+  public static <T> Bound<T> from(BoundedSource<T> source) {
     return new Bound<>("", source);
   }
 
   /**
    * Implementation of the {@code Read} {@link PTransform} builder.
    */
-  public static class Bound<T>
-      extends PTransform<PInput, PCollection<T>> {
+  public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
     private static final long serialVersionUID = 0;
 
     @Nullable
@@ -116,10 +115,17 @@ public Source<T> getSource() {
     }
 
     static {
+      registerDefaultTransformEvaluator();
+    }
+
+    @SuppressWarnings({"rawtypes", "unchecked"})
+    private static void registerDefaultTransformEvaluator() {
       DirectPipelineRunner.registerDefaultTransformEvaluator(
-          Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
+          Bound.class,
+          new DirectPipelineRunner.TransformEvaluator<Bound>() {
             @Override
-            public void evaluate(Bound transform, DirectPipelineRunner.EvaluationContext context) {
+            public void evaluate(
+                Bound transform, DirectPipelineRunner.EvaluationContext context) {
               BasicSerializableSourceFormat.evaluateReadHelper(transform, context);
             }
           });
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
index 07f9cd7841e0b..61de45d3e63f8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
@@ -26,7 +26,6 @@
 
 import java.io.IOException;
 import java.io.Serializable;
-import java.util.List;
 import java.util.NoSuchElementException;
 
 import javax.annotation.Nullable;
@@ -36,8 +35,9 @@
  * into bundles (parts of the input, each of which may be processed on a different worker)
  * and creating a {@code Source} for reading the input.
  *
- * <p> To use this class for supporting your custom input type, derive your class
- * class from it, and override the abstract methods. For an example, see {@link DatastoreIO}.
+ * <p> This class is not intended to be subclassed directly. Instead, to define
+ * a bounded source (a source which produces a finite amount of input), subclass
+ * {@link BoundedSource}; user-defined unbounded sources are currently not supported.
  *
  * <p> A {@code Source} passed to a {@code Read} transform must be
  * {@code Serializable}.  This allows the {@code Source} instance
@@ -52,7 +52,7 @@
  * mutable fields is to cache the results of expensive operations, and such fields MUST be
  * marked {@code transient}.
  *
- * <p> {@code Source} objects should implement {@link Object#toString}, as it will be
+ * <p> {@code Source} objects should override {@link Object#toString}, as it will be
  * used in important error and debugging messages.
  *
  * @param <T> Type of elements read by the source.
@@ -61,22 +61,11 @@
 public abstract class Source<T> implements Serializable {
   private static final long serialVersionUID = 0;
 
-  /**
-   * Splits the source into bundles.
-   *
-   * <p> {@code PipelineOptions} can be used to get information such as
-   * credentials for accessing an external storage.
-   */
-  public abstract List<? extends Source<T>> splitIntoBundles(
-      long desiredBundleSizeBytes, PipelineOptions options) throws Exception;
-
   /**
    * Creates a reader for this source.
    */
-  public Reader<T> createReader(
-      PipelineOptions options, @Nullable ExecutionContext executionContext) throws IOException {
-    throw new UnsupportedOperationException();
-  }
+  public abstract Reader<T> createReader(
+      PipelineOptions options, @Nullable ExecutionContext executionContext) throws IOException;
 
   /**
    * Checks that this source is valid, before it can be used in a pipeline.
@@ -106,7 +95,7 @@ public Reader<T> createReader(
    * }
    * </pre>
    * <p>
-   * Note: this interface is work-in-progress and may change.
+   * Note: this interface is a work-in-progress and may change.
    */
   public interface Reader<T> extends AutoCloseable {
     /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 3be323270a3c0..ad94d2ee1e2e0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -1030,8 +1030,7 @@ private <T> void translateHelper(
     registerTransformTranslator(
         TextIO.Write.Bound.class, new TextIOTranslator.WriteTranslator());
 
-    registerTransformTranslator(
-        Read.Bound.class, new ReadTranslator());
+    registerTransformTranslator(Read.Bound.class, new ReadTranslator());
   }
 
   private static void translateInputs(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index 725aec43b767e..9d269913efc1f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -82,21 +82,21 @@ public BasicSerializableSourceFormat(PipelineOptions options) {
   }
 
   /**
-   * A {@code DynamicSplitResult} specified explicitly by a pair of {@code Source}
+   * A {@code DynamicSplitResult} specified explicitly by a pair of {@code BoundedSource}
    * objects describing the primary and residual sources.
    */
-  public static final class SourceSplit<T> implements Reader.DynamicSplitResult {
-    public final Source<T> primary;
-    public final Source<T> residual;
+  public static final class BoundedSourceSplit<T> implements Reader.DynamicSplitResult {
+    public final BoundedSource<T> primary;
+    public final BoundedSource<T> residual;
 
-    public SourceSplit(Source<T> primary, Source<T> residual) {
+    public BoundedSourceSplit(BoundedSource<T> primary, BoundedSource<T> residual) {
       this.primary = primary;
       this.residual = residual;
     }
   }
 
   public static DynamicSourceSplit toSourceSplit(
-      SourceSplit<?> sourceSplitResult, PipelineOptions options) {
+      BoundedSourceSplit<?> sourceSplitResult, PipelineOptions options) {
     DynamicSourceSplit sourceSplit = new DynamicSourceSplit();
     com.google.api.services.dataflow.model.Source primarySource;
     com.google.api.services.dataflow.model.Source residualSource;
@@ -118,8 +118,9 @@ public static DynamicSourceSplit toSourceSplit(
   }
 
   /**
-   * Executes a protocol-level split {@code SourceOperationRequest} by deserializing its source
-   * to a {@code Source}, splitting it, and serializing results back.
+   * Executes a protocol-level split {@code SourceOperationRequest} for bounded sources
+   * by deserializing its source to a {@code BoundedSource}, splitting it, and
+   * serializing results back.
    */
   @Override
   public OperationResponse performSourceOperation(OperationRequest request) throws Exception {
@@ -158,7 +159,11 @@ public ReaderIterator<WindowedValue<T>> iterator() throws IOException {
   }
 
   private SourceSplitResponse performSplit(SourceSplitRequest request) throws Exception {
-    Source<?> source = deserializeFromCloudSource(request.getSource().getSpec());
+    Source<?> anySource = deserializeFromCloudSource(request.getSource().getSpec());
+    if (!(anySource instanceof BoundedSource)) {
+      throw new UnsupportedOperationException("Cannot split a non-Bounded source: " + anySource);
+    }
+    BoundedSource<?> source = (BoundedSource<?>) anySource;
     LOG.debug("Splitting source: {}", source);
 
     // Produce simple independent, unsplittable bundles with no metadata attached.
@@ -170,16 +175,20 @@ private SourceSplitResponse performSplit(SourceSplitRequest request) throws Exce
     if (desiredBundleSizeBytes == null) {
       desiredBundleSizeBytes = DEFAULT_DESIRED_BUNDLE_SIZE_BYTES;
     }
-    List<? extends Source<?>> bundles = source.splitIntoBundles(desiredBundleSizeBytes, options);
+    List<? extends BoundedSource<?>> bundles =
+        source.splitIntoBundles(desiredBundleSizeBytes, options);
     LOG.debug("Splitting produced {} bundles", bundles.size());
-    for (Source<?> split : bundles) {
+    for (BoundedSource<?> split : bundles) {
       try {
         split.validate();
-      } catch  (Exception e) {
+      } catch (Exception e) {
         throw new IllegalArgumentException(
             "Splitting a valid source produced an invalid bundle. "
-            + "\nOriginal source: " + source
-            + "\nInvalid bundle: " + split, e);
+                + "\nOriginal source: "
+                + source
+                + "\nInvalid bundle: "
+                + split,
+            e);
       }
       DerivedSource bundle = new DerivedSource();
 
@@ -255,12 +264,18 @@ public static <T> void evaluateReadHelper(
       Read.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
     try {
       List<DirectPipelineRunner.ValueWithMetadata<T>> output = new ArrayList<>();
-      Source<T> source = transform.getSource();
-      try (Source.Reader<T> reader = source.createReader(context.getPipelineOptions(), null)) {
+      Source<T> anySource = transform.getSource();
+      if (!(anySource instanceof BoundedSource)) {
+        throw new IllegalArgumentException("Unexpected read from a user-defined unbounded source");
+      }
+      BoundedSource<T> source = (BoundedSource<T>) anySource;
+      try (BoundedSource.BoundedReader<T> reader =
+          source.createReader(context.getPipelineOptions(), null)) {
         for (boolean available = reader.start(); available; available = reader.advance()) {
-          output.add(DirectPipelineRunner.ValueWithMetadata.of(
-              WindowedValue.of(
-                  reader.getCurrent(), reader.getCurrentTimestamp(), GlobalWindow.INSTANCE)));
+          output.add(
+              DirectPipelineRunner.ValueWithMetadata.of(
+                  WindowedValue.of(
+                      reader.getCurrent(), reader.getCurrentTimestamp(), GlobalWindow.INSTANCE)));
         }
       }
       context.setPCollectionValuesWithMetadata(context.getOutput(transform), output);
@@ -272,12 +287,16 @@ public static <T> void evaluateReadHelper(
   public static <T> void translateReadHelper(
       Read.Bound<T> transform, DataflowPipelineTranslator.TranslationContext context) {
     try {
+      Source<T> anySource = transform.getSource();
+      if (!(anySource instanceof BoundedSource)) {
+        throw new IllegalArgumentException("Unexpected read from a user-defined unbounded source");
+      }
+      BoundedSource<T> source = (BoundedSource<T>) anySource;
       context.addStep(transform, "ParallelRead");
       context.addInput(PropertyNames.FORMAT, PropertyNames.CUSTOM_SOURCE_FORMAT);
       context.addInput(
           PropertyNames.SOURCE_STEP_INPUT,
-          cloudSourceToDictionary(
-              serializeToCloudSource(transform.getSource(), context.getPipelineOptions())));
+          cloudSourceToDictionary(serializeToCloudSource(source, context.getPipelineOptions())));
       context.addValueOnlyOutput(PropertyNames.OUTPUT, context.getOutput(transform));
     } catch (Exception e) {
       throw new RuntimeException(e);
@@ -392,7 +411,7 @@ public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest
         return null;
       }
       // Try to catch some potential subclass implementation errors early.
-      Source<T> primary = boundedReader.getCurrentSource();
+      BoundedSource<T> primary = boundedReader.getCurrentSource();
       if (original == primary) {
         throw new IllegalStateException(
           "Successful split did not change the current source: primary is identical to original"
@@ -417,7 +436,7 @@ public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest
             "Successful split produced an illegal residual source. "
             + "\nOriginal: " + original + "\nPrimary: " + primary + "\nResidual: " + residual);
       }
-      return new SourceSplit<T>(primary, residual);
+      return new BoundedSourceSplit<T>(primary, residual);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
index 8b698605169b4..25c785a3a3d67 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
@@ -23,10 +23,10 @@
  * Translator for the {@code Read} {@code PTransform} for the Dataflow back-end.
  */
 public class ReadTranslator
-    implements DataflowPipelineTranslator.TransformTranslator<Read.Bound> {
+    implements DataflowPipelineTranslator.TransformTranslator<Read.Bound<?>> {
   @Override
   public void translate(
-      Read.Bound transform, DataflowPipelineTranslator.TranslationContext context) {
+      Read.Bound<?> transform, DataflowPipelineTranslator.TranslationContext context) {
     BasicSerializableSourceFormat.translateReadHelper(transform, context);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 4061b75da60ee..2cabaaa6b5992 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -127,7 +127,7 @@ private boolean doWork(WorkItem workItem) throws IOException {
 
       // Log all counter values for debugging purposes.
       CounterSet counters = worker.getOutputCounters();
-      for (Counter counter : counters) {
+      for (Counter<?> counter : counters) {
         LOG.trace("COUNTER {}.", counter);
       }
 
@@ -267,9 +267,10 @@ static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
       Reader.DynamicSplitResultWithPosition asPosition =
           (Reader.DynamicSplitResultWithPosition) dynamicSplitResult;
       status.setStopPosition(toCloudPosition(asPosition.getAcceptedPosition()));
-    } else if (dynamicSplitResult instanceof BasicSerializableSourceFormat.SourceSplit) {
-      status.setDynamicSourceSplit(BasicSerializableSourceFormat.toSourceSplit(
-          (BasicSerializableSourceFormat.SourceSplit) dynamicSplitResult, options));
+    } else if (dynamicSplitResult instanceof BasicSerializableSourceFormat.BoundedSourceSplit) {
+      status.setDynamicSourceSplit(
+          BasicSerializableSourceFormat.toSourceSplit(
+              (BasicSerializableSourceFormat.BoundedSourceSplit) dynamicSplitResult, options));
     } else if (dynamicSplitResult != null) {
       throw new IllegalArgumentException(
           "Unexpected type of dynamic split result: " + dynamicSplitResult);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 3e3b2ecf8c5dd..b16ff02ee0118 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -50,7 +50,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.io.BoundedSource;
 import com.google.cloud.dataflow.sdk.io.Read;
-import com.google.cloud.dataflow.sdk.io.Source;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
@@ -356,15 +355,16 @@ public void testProgressAndSourceSplitTranslation() throws Exception {
 
     assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(0)));
     assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(0.1f)));
-    BasicSerializableSourceFormat.SourceSplit<Integer> sourceSplit =
-        (BasicSerializableSourceFormat.SourceSplit<Integer>) iterator.requestDynamicSplit(
-            splitRequestAtFraction(0.5f));
+    BasicSerializableSourceFormat.BoundedSourceSplit<Integer> sourceSplit =
+        (BasicSerializableSourceFormat.BoundedSourceSplit<Integer>)
+            iterator.requestDynamicSplit(splitRequestAtFraction(0.5f));
     assertNotNull(sourceSplit);
     assertThat(readFromSource(sourceSplit.primary), contains(10, 11, 12, 13, 14));
     assertThat(readFromSource(sourceSplit.residual), contains(15, 16, 17, 18, 19));
 
-    sourceSplit = (BasicSerializableSourceFormat.SourceSplit<Integer>) iterator.requestDynamicSplit(
-        splitRequestAtFraction(0.8f));
+    sourceSplit =
+        (BasicSerializableSourceFormat.BoundedSourceSplit<Integer>)
+            iterator.requestDynamicSplit(splitRequestAtFraction(0.8f));
     assertNotNull(sourceSplit);
     assertThat(readFromSource(sourceSplit.primary), contains(10, 11, 12, 13));
     assertThat(readFromSource(sourceSplit.residual), contains(14));
@@ -377,11 +377,11 @@ public void testProgressAndSourceSplitTranslation() throws Exception {
   /**
    * A source that cannot do anything. Intended to be overridden for testing of individual methods.
    */
-  private static class MockSource extends Source<Integer> {
+  private static class MockSource extends BoundedSource<Integer> {
     private static final long serialVersionUID = -5041539913488064889L;
 
     @Override
-    public List<? extends Source<Integer>> splitIntoBundles(
+    public List<? extends BoundedSource<Integer>> splitIntoBundles(
         long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
       return Arrays.asList(this);
     }
@@ -389,6 +389,22 @@ public List<? extends Source<Integer>> splitIntoBundles(
     @Override
     public void validate() { }
 
+    @Override
+    public boolean producesSortedKeys(PipelineOptions options) {
+      return false;
+    }
+
+    @Override
+    public long getEstimatedSizeBytes(PipelineOptions options) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public BoundedReader<Integer> createReader(PipelineOptions options, ExecutionContext context)
+        throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
     @Override
     public String toString() {
       return "<unknown>";
@@ -412,7 +428,7 @@ private SourceProducingInvalidSplits(String description, String errorMessage) {
     }
 
     @Override
-    public List<? extends Source<Integer>> splitIntoBundles(
+    public List<? extends BoundedSource<Integer>> splitIntoBundles(
         long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
       Preconditions.checkState(errorMessage == null, "Unexpected invalid source");
       return Arrays.asList(
@@ -447,15 +463,15 @@ public void testSplittingProducedInvalidSource() throws Exception {
     performSplit(cloudSource, options);
   }
 
-  private static class FailingReader implements Source.Reader<Integer> {
-    private Source<Integer> source;
+  private static class FailingReader implements BoundedSource.BoundedReader<Integer> {
+    private BoundedSource<Integer> source;
 
-    private FailingReader(Source<Integer> source) {
+    private FailingReader(BoundedSource<Integer> source) {
       this.source = source;
     }
 
     @Override
-    public Source<Integer> getCurrentSource() {
+    public BoundedSource<Integer> getCurrentSource() {
       return source;
     }
 
@@ -480,16 +496,25 @@ public Instant getCurrentTimestamp() throws NoSuchElementException {
     }
 
     @Override
-    public void close() throws IOException { }
+    public void close() throws IOException {}
+
+    @Override
+    public Double getFractionConsumed() {
+      return null;
+    }
+
+    @Override
+    public BoundedSource<Integer> splitAtFraction(double fraction) {
+      return null;
+    }
   }
 
   private static class SourceProducingFailingReader extends MockSource {
     private static final long serialVersionUID = -1288303253742972653L;
 
     @Override
-    public Reader<Integer> createReader(
-        PipelineOptions options, @Nullable ExecutionContext executionContext)
-        throws IOException {
+    public BoundedReader<Integer> createReader(
+        PipelineOptions options, @Nullable ExecutionContext executionContext) throws IOException {
       return new FailingReader(this);
     }
 
@@ -520,7 +545,7 @@ public void testFailureToStartReadingIncludesSourceDetails() throws Exception {
   }
 
   private static com.google.api.services.dataflow.model.Source translateIOToCloudSource(
-      Source<?> io, DataflowPipelineOptions options) throws Exception {
+      BoundedSource<?> io, DataflowPipelineOptions options) throws Exception {
     DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
     Pipeline p = Pipeline.create(options);
     p.begin().apply(Read.from(io));

From 5c574634772a8a78b60070277ad07fb943e7c8ba Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 5 Jun 2015 10:09:11 -0700
Subject: [PATCH 0617/1541] Fix stable-unique-name errors in unit tests

Also, turn stable-unique-name problems into actual errors within tests.
This ensures that we'll catch any problems in library PTransforms that
are used in tests.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95301059
---
 .../contrib/joinlibrary/InnerJoinTest.java    |  18 ++-
 .../joinlibrary/OuterLeftJoinTest.java        |  29 ++--
 .../joinlibrary/OuterRightJoinTest.java       |  29 ++--
 .../cloud/dataflow/examples/AutoComplete.java |  10 +-
 .../dataflow/examples/JoinExamplesTest.java   |   4 +-
 .../dataflow/sdk/testing/TestPipeline.java    |   3 +
 .../dataflow/sdk/transforms/Combine.java      |   5 +-
 .../cloud/dataflow/sdk/transforms/Create.java |   3 +
 .../cloud/dataflow/sdk/transforms/Write.java  |  38 ++---
 .../cloud/dataflow/sdk/PipelineTest.java      |   4 +-
 .../google/cloud/dataflow/sdk/TestUtils.java  |  22 ---
 .../cloud/dataflow/sdk/io/TextIOTest.java     |  32 ++--
 .../BasicSerializableSourceFormatTest.java    |   3 +-
 .../transforms/ApproximateQuantilesTest.java  |   3 +-
 .../sdk/transforms/ApproximateUniqueTest.java |   2 +-
 .../dataflow/sdk/transforms/CombineTest.java  |  17 +-
 .../dataflow/sdk/transforms/CountTest.java    |  10 +-
 .../dataflow/sdk/transforms/FilterTest.java   |  33 ++--
 .../dataflow/sdk/transforms/FlattenTest.java  |  30 ++--
 .../sdk/transforms/GroupByKeyTest.java        |  14 +-
 .../IntraBundleParallelizationTest.java       |   9 +-
 .../dataflow/sdk/transforms/ParDoTest.java    | 147 ++++++++----------
 .../sdk/transforms/PartitionTest.java         |  32 ++--
 .../dataflow/sdk/transforms/TopTest.java      |  30 ++--
 .../dataflow/sdk/transforms/ViewTest.java     |  89 +++++------
 .../dataflow/sdk/transforms/WriteTest.java    |   6 +-
 .../sdk/transforms/join/CoGroupByKeyTest.java |  30 ++--
 .../transforms/windowing/WindowingTest.java   |   4 +-
 28 files changed, 321 insertions(+), 335 deletions(-)

diff --git a/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/InnerJoinTest.java b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/InnerJoinTest.java
index 28f9d4bf217f6..c077ea5603d60 100644
--- a/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/InnerJoinTest.java
+++ b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/InnerJoinTest.java
@@ -72,11 +72,13 @@ public void testJoinOneToOneMapping() {
   @Test
   public void testJoinOneToManyMapping() {
     leftListOfKv.add(KV.of("Key2", 4L));
-    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+    PCollection<KV<String, Long>> leftCollection = p
+        .apply("CreateLeft", Create.of(leftListOfKv));
 
     listRightOfKv.add(KV.of("Key2", "bar"));
     listRightOfKv.add(KV.of("Key2", "gazonk"));
-    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+    PCollection<KV<String, String>> rightCollection = p
+        .apply("CreateRight", Create.of(listRightOfKv));
 
     PCollection<KV<String, KV<Long, String>>> output = Join.innerJoin(
       leftCollection, rightCollection);
@@ -92,10 +94,12 @@ public void testJoinOneToManyMapping() {
   public void testJoinManyToOneMapping() {
     leftListOfKv.add(KV.of("Key2", 4L));
     leftListOfKv.add(KV.of("Key2", 6L));
-    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+    PCollection<KV<String, Long>> leftCollection = p
+        .apply("CreateLeft", Create.of(leftListOfKv));
 
     listRightOfKv.add(KV.of("Key2", "bar"));
-    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+    PCollection<KV<String, String>> rightCollection = p
+        .apply("CreateRight", Create.of(listRightOfKv));
 
     PCollection<KV<String, KV<Long, String>>> output = Join.innerJoin(
       leftCollection, rightCollection);
@@ -110,10 +114,12 @@ public void testJoinManyToOneMapping() {
   @Test
   public void testJoinNoneToNoneMapping() {
     leftListOfKv.add(KV.of("Key2", 4L));
-    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+    PCollection<KV<String, Long>> leftCollection = p
+        .apply("CreateLeft", Create.of(leftListOfKv));
 
     listRightOfKv.add(KV.of("Key3", "bar"));
-    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+    PCollection<KV<String, String>> rightCollection = p
+        .apply("CreateRight", Create.of(listRightOfKv));
 
     PCollection<KV<String, KV<Long, String>>> output = Join.innerJoin(
       leftCollection, rightCollection);
diff --git a/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterLeftJoinTest.java b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterLeftJoinTest.java
index f48adb04f7a3a..3dd67422d7f62 100644
--- a/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterLeftJoinTest.java
+++ b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterLeftJoinTest.java
@@ -54,11 +54,13 @@ public void setup() {
   public void testJoinOneToOneMapping() {
     leftListOfKv.add(KV.of("Key1", 5L));
     leftListOfKv.add(KV.of("Key2", 4L));
-    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+    PCollection<KV<String, Long>> leftCollection = p
+        .apply("CreateLeft", Create.of(leftListOfKv));
 
     listRightOfKv.add(KV.of("Key1", "foo"));
     listRightOfKv.add(KV.of("Key2", "bar"));
-    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+    PCollection<KV<String, String>> rightCollection = p
+        .apply("CreateRight", Create.of(listRightOfKv));
 
     PCollection<KV<String, KV<Long, String>>> output = Join.leftOuterJoin(
       leftCollection, rightCollection, "");
@@ -73,11 +75,13 @@ public void testJoinOneToOneMapping() {
   @Test
   public void testJoinOneToManyMapping() {
     leftListOfKv.add(KV.of("Key2", 4L));
-    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+    PCollection<KV<String, Long>> leftCollection = p
+        .apply("CreateLeft", Create.of(leftListOfKv));
 
     listRightOfKv.add(KV.of("Key2", "bar"));
     listRightOfKv.add(KV.of("Key2", "gazonk"));
-    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+    PCollection<KV<String, String>> rightCollection = p
+        .apply("CreateRight", Create.of(listRightOfKv));
 
     PCollection<KV<String, KV<Long, String>>> output = Join.leftOuterJoin(
       leftCollection, rightCollection, "");
@@ -93,10 +97,12 @@ public void testJoinOneToManyMapping() {
   public void testJoinManyToOneMapping() {
     leftListOfKv.add(KV.of("Key2", 4L));
     leftListOfKv.add(KV.of("Key2", 6L));
-    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+    PCollection<KV<String, Long>> leftCollection = p
+        .apply("CreateLeft", Create.of(leftListOfKv));
 
     listRightOfKv.add(KV.of("Key2", "bar"));
-    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+    PCollection<KV<String, String>> rightCollection = p
+        .apply("CreateRight", Create.of(listRightOfKv));
 
     PCollection<KV<String, KV<Long, String>>> output = Join.leftOuterJoin(
       leftCollection, rightCollection, "");
@@ -111,10 +117,12 @@ public void testJoinManyToOneMapping() {
   @Test
   public void testJoinOneToNoneMapping() {
     leftListOfKv.add(KV.of("Key2", 4L));
-    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+    PCollection<KV<String, Long>> leftCollection = p
+        .apply("CreateLeft", Create.of(leftListOfKv));
 
     listRightOfKv.add(KV.of("Key3", "bar"));
-    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+    PCollection<KV<String, String>> rightCollection = p
+        .apply("CreateRight", Create.of(listRightOfKv));
 
     PCollection<KV<String, KV<Long, String>>> output = Join.leftOuterJoin(
       leftCollection, rightCollection, "");
@@ -136,6 +144,9 @@ public void testJoinRightCollectionNull() {
 
   @Test(expected = NullPointerException.class)
   public void testJoinNullValueIsNull() {
-    Join.leftOuterJoin(p.apply(Create.of(leftListOfKv)), p.apply(Create.of(listRightOfKv)), null);
+    Join.leftOuterJoin(
+        p.apply("CreateLeft", Create.of(leftListOfKv)),
+        p.apply("CreateRight", Create.of(listRightOfKv)),
+        null);
   }
 }
diff --git a/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterRightJoinTest.java b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterRightJoinTest.java
index 33c948bf48497..9f3b80b8c93e5 100644
--- a/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterRightJoinTest.java
+++ b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/OuterRightJoinTest.java
@@ -54,11 +54,13 @@ public void setup() {
   public void testJoinOneToOneMapping() {
     leftListOfKv.add(KV.of("Key1", 5L));
     leftListOfKv.add(KV.of("Key2", 4L));
-    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+    PCollection<KV<String, Long>> leftCollection = p
+        .apply("CreateLeft", Create.of(leftListOfKv));
 
     listRightOfKv.add(KV.of("Key1", "foo"));
     listRightOfKv.add(KV.of("Key2", "bar"));
-    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+    PCollection<KV<String, String>> rightCollection = p
+        .apply("CreateRight", Create.of(listRightOfKv));
 
     PCollection<KV<String, KV<Long, String>>> output = Join.rightOuterJoin(
       leftCollection, rightCollection, -1L);
@@ -73,11 +75,13 @@ public void testJoinOneToOneMapping() {
   @Test
   public void testJoinOneToManyMapping() {
     leftListOfKv.add(KV.of("Key2", 4L));
-    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+    PCollection<KV<String, Long>> leftCollection = p
+        .apply("CreateLeft", Create.of(leftListOfKv));
 
     listRightOfKv.add(KV.of("Key2", "bar"));
     listRightOfKv.add(KV.of("Key2", "gazonk"));
-    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+    PCollection<KV<String, String>> rightCollection = p
+        .apply("CreateRight", Create.of(listRightOfKv));
 
     PCollection<KV<String, KV<Long, String>>> output = Join.rightOuterJoin(
       leftCollection, rightCollection, -1L);
@@ -93,10 +97,12 @@ public void testJoinOneToManyMapping() {
   public void testJoinManyToOneMapping() {
     leftListOfKv.add(KV.of("Key2", 4L));
     leftListOfKv.add(KV.of("Key2", 6L));
-    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+    PCollection<KV<String, Long>> leftCollection = p
+        .apply("CreateLeft", Create.of(leftListOfKv));
 
     listRightOfKv.add(KV.of("Key2", "bar"));
-    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+    PCollection<KV<String, String>> rightCollection = p
+        .apply("CreateRight", Create.of(listRightOfKv));
 
     PCollection<KV<String, KV<Long, String>>> output = Join.rightOuterJoin(
       leftCollection, rightCollection, -1L);
@@ -111,10 +117,12 @@ public void testJoinManyToOneMapping() {
   @Test
   public void testJoinNoneToOneMapping() {
     leftListOfKv.add(KV.of("Key2", 4L));
-    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+    PCollection<KV<String, Long>> leftCollection = p
+        .apply("CreateLeft", Create.of(leftListOfKv));
 
     listRightOfKv.add(KV.of("Key3", "bar"));
-    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+    PCollection<KV<String, String>> rightCollection = p
+        .apply("CreateRight", Create.of(listRightOfKv));
 
     PCollection<KV<String, KV<Long, String>>> output = Join.rightOuterJoin(
       leftCollection, rightCollection, -1L);
@@ -136,6 +144,9 @@ public void testJoinRightCollectionNull() {
 
   @Test(expected = NullPointerException.class)
   public void testJoinNullValueIsNull() {
-    Join.rightOuterJoin(p.apply(Create.of(leftListOfKv)), p.apply(Create.of(listRightOfKv)), null);
+    Join.rightOuterJoin(
+        p.apply("CreateLeft", Create.of(leftListOfKv)),
+        p.apply("CreateRight", Create.of(listRightOfKv)),
+        null);
   }
 }
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
index cee98b3f45176..695be1f8c79ec 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
@@ -254,14 +254,16 @@ public Boolean apply(CompletionCandidate c) {
                       return c.getValue().length() == minPrefix;
                     }
                   })))
-            .apply(Flatten.<CompletionCandidate>pCollections())
+            .apply("FlattenSmall", Flatten.<CompletionCandidate>pCollections())
             // ...set the key to be the minPrefix-length prefix...
             .apply(ParDo.of(new AllPrefixes(minPrefix, minPrefix)))
             // ...and (re)apply the Top operator to all of them together.
             .apply(Top.<String, CompletionCandidate>largestPerKey(candidatesPerPrefix));
-          return PCollectionList
-            .of(larger.apply(Flatten.<KV<String, List<CompletionCandidate>>>pCollections()))
-            .and(small);
+
+          PCollection<KV<String, List<CompletionCandidate>>> flattenLarger = larger
+              .apply("FlattenLarge", Flatten.<KV<String, List<CompletionCandidate>>>pCollections());
+
+          return PCollectionList.of(flattenLarger).and(small);
         }
     }
   }
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/JoinExamplesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/JoinExamplesTest.java
index f66a9bf90b08a..ae8292c75efbb 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/JoinExamplesTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/JoinExamplesTest.java
@@ -104,8 +104,8 @@ public void testExtractCountryInfoFn() {
   @Category(RunnableOnService.class)
   public void testJoin() throws java.lang.Exception {
     Pipeline p = TestPipeline.create();
-    PCollection<TableRow> input1 = p.apply(Create.of(EVENT_ARRAY));
-    PCollection<TableRow> input2 = p.apply(Create.of(CC_ARRAY));
+    PCollection<TableRow> input1 = p.apply("CreateEvent", Create.of(EVENT_ARRAY));
+    PCollection<TableRow> input2 = p.apply("CreateCC", Create.of(CC_ARRAY));
 
     PCollection<String> output = JoinExamples.joinEvents(input1, input2);
     DataflowAssert.that(output).containsInAnyOrder(JOINED_EVENTS);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
index 17e78d4c9cc61..23dd5cc8129cf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.PipelineResult;
 import com.google.cloud.dataflow.sdk.options.ApplicationNameOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
@@ -79,10 +80,12 @@ public static TestPipeline create() {
     if (Boolean.parseBoolean(System.getProperty("runIntegrationTestOnService"))) {
       TestDataflowPipelineOptions options = getPipelineOptions();
       LOG.info("Using passed in options: " + options);
+      options.setStableUniqueNames(CheckEnabled.ERROR);
       return new TestPipeline(createRunner(options), options);
     } else {
       DirectPipelineRunner directRunner = DirectPipelineRunner.createForTest();
       directRunner.getPipelineOptions().setAppName(getAppName());
+      directRunner.getPipelineOptions().setStableUniqueNames(CheckEnabled.ERROR);
       return new TestPipeline(directRunner, directRunner.getPipelineOptions());
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index f2fb815d01a16..ac7626d2f0140 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -26,7 +26,6 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
@@ -1290,8 +1289,8 @@ private PCollection<OutputT> insertDefaultValueIfEmpty(PCollection<OutputT> mayb
       final PCollectionView<Iterable<OutputT>> maybeEmptyView = maybeEmpty.apply(
           View.<OutputT>asIterable());
       return maybeEmpty.getPipeline()
-          .apply(Create.of((Void) null).withCoder(VoidCoder.of()))
-          .apply(ParDo.of(
+        .apply("CreateVoid", Create.of((Void) null).withCoder(VoidCoder.of()))
+          .apply(ParDo.named("ProduceDefault").of(
               new DoFn<Void, OutputT>() {
                 @Override
                 public void processElement(DoFn<Void, OutputT>.ProcessContext c) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index 4a987fa9f40e2..6fed70d4d3baf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -172,6 +172,7 @@ public static <T> TimestampedValues<T> timestamped(Iterable<TimestampedValue<T>>
    *
    * <p> The argument should not be modified after this is called.
    */
+  @SafeVarargs
   public static <T> TimestampedValues<T> timestamped(
       @SuppressWarnings("unchecked") TimestampedValue<T>... elems) {
     return timestamped(Arrays.asList(elems));
@@ -339,6 +340,7 @@ public void processElement(DoFn<String, KV<Void, Void>>.ProcessContext c)
      * <p> The arguments should not be modified after this is called.
      */
     private Values(Iterable<T> elems, Optional<Coder<T>> coder) {
+      super("CreateValues");
       this.elems = elems;
       this.coder = coder;
     }
@@ -429,6 +431,7 @@ public T apply(TimestampedValue<T> input) {
           return input.getValue();
         }
       }), coder);
+      setName("CreateTimestmapedValues");
       this.elems = elems;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
index b663bba8c7765..d10b7cc323649 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
@@ -125,21 +125,21 @@ private <WriteT> PDone createWrite(
           p.apply(Create.<WriteOperation<T, WriteT>>of(writeOperation).withCoder(operationCoder));
 
       // Initialize the resource in a do-once ParDo on the WriteOperation.
-      operationCollection =
-          operationCollection
-              .apply(ParDo.of(new DoFn<WriteOperation<T, WriteT>, WriteOperation<T, WriteT>>() {
-                private static final long serialVersionUID = 0;
-
-                @Override
-                public void processElement(ProcessContext c) throws Exception {
-                  WriteOperation<T, WriteT> writeOperation = c.element();
-                  writeOperation.initialize(c.getPipelineOptions());
-                  // The WriteOperation is also the output of this ParDo, so it can have mutable
-                  // state.
-                  c.output(writeOperation);
-                }
-              }))
-              .setCoder(operationCoder);
+      operationCollection = operationCollection
+          .apply("Initialize", ParDo.of(
+              new DoFn<WriteOperation<T, WriteT>, WriteOperation<T, WriteT>>() {
+            private static final long serialVersionUID = 0;
+
+            @Override
+            public void processElement(ProcessContext c) throws Exception {
+              WriteOperation<T, WriteT> writeOperation = c.element();
+              writeOperation.initialize(c.getPipelineOptions());
+              // The WriteOperation is also the output of this ParDo, so it can have mutable
+              // state.
+              c.output(writeOperation);
+            }
+          }))
+          .setCoder(operationCoder);
 
       // Create a view of the WriteOperation to be used as a sideInput to the parallel write phase.
       final PCollectionView<WriteOperation<T, WriteT>> writeOperationView =
@@ -149,8 +149,8 @@ public void processElement(ProcessContext c) throws Exception {
       // as a side input) and collect the results of the writes in a PCollection.
       // There is a dependency between this ParDo and the first (the WriteOperation PCollection
       // as a side input), so this will happen after the initial ParDo.
-      PCollection<WriteT> results =
-          input.apply(ParDo.of(new DoFn<T, WriteT>() {
+      PCollection<WriteT> results = input
+          .apply("WriteBundles", ParDo.of(new DoFn<T, WriteT>() {
             private static final long serialVersionUID = 0;
 
             // Writer that will write the records in this bundle. Lazily
@@ -199,8 +199,8 @@ public void finishBundle(Context c) throws Exception {
       // ParDo. There is a dependency between this ParDo and the parallel write (the writer results
       // collection as a side input), so it will happen after the parallel write.
       @SuppressWarnings("unused")
-      final PCollection<Integer> done =
-          operationCollection.apply(ParDo.of(new DoFn<WriteOperation<T, WriteT>, Integer>() {
+      final PCollection<Integer> done = operationCollection
+          .apply("Finalize", ParDo.of(new DoFn<WriteOperation<T, WriteT>, Integer>() {
             private static final long serialVersionUID = 0;
 
             @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
index 4ddf9bf75f807..5ae802257ec3d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
@@ -133,8 +133,8 @@ public void testMultipleApply() {
     Pipeline p = TestPipeline.create();
     PCollection<String> input = p.apply(Create.<String>of(ImmutableList.of("a", "b")));
 
-    PCollection<String> left = input.apply(myTransform).apply(myTransform);
-    PCollection<String> right = input.apply(myTransform);
+    PCollection<String> left = input.apply("Left1", myTransform).apply("Left2", myTransform);
+    PCollection<String> right = input.apply("Right", myTransform);
 
     PCollection<String> both = PCollectionList.of(left).and(right)
         .apply(Flatten.<String>pCollections());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
index e2cadf02a4415..cdf07ca9ecc15 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
@@ -18,14 +18,8 @@
 
 import static org.junit.Assert.assertThat;
 
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.View;
 import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
 
 import org.hamcrest.CoreMatchers;
 import org.hamcrest.Description;
@@ -135,22 +129,6 @@ public void describeTo(Description description) {
     }
   }
 
-  public static PCollection<String> createStrings(Pipeline p,
-                                                  Iterable<String> values) {
-    return p.apply(Create.of(values).withCoder(StringUtf8Coder.of()));
-  }
-
-  public static PCollection<Integer> createInts(Pipeline p,
-                                                Iterable<Integer> values) {
-    return p.apply(Create.of(values).withCoder(BigEndianIntegerCoder.of()));
-  }
-
-  public static PCollectionView<Integer>
-      createSingletonInt(Pipeline p, Integer value) {
-    PCollection<Integer> collection = p.apply(Create.of(value));
-    return collection.apply(View.<Integer>asSingleton());
-  }
-
   ////////////////////////////////////////////////////////////////////////////
   // Utilities for testing CombineFns, ensuring they give correct results
   // across various permutations and shardings of the input.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
index fd81968d6c497..9b551032915e5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -348,25 +348,29 @@ public void testGoodWildcards() throws Exception {
 
     Pipeline pipeline = Pipeline.create(options);
 
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo/"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo/*"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo/?"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo/[0-9]"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo/*baz*"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo/*baz?"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo/[0-9]baz?"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo/baz/*"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo/baz/*wonka*"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo/*baz/wonka*"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo*/baz"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo?/baz"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo[0-9]/baz"));
+    applyRead(pipeline, "gs://bucket/foo");
+    applyRead(pipeline, "gs://bucket/foo/");
+    applyRead(pipeline, "gs://bucket/foo/*");
+    applyRead(pipeline, "gs://bucket/foo/?");
+    applyRead(pipeline, "gs://bucket/foo/[0-9]");
+    applyRead(pipeline, "gs://bucket/foo/*baz*");
+    applyRead(pipeline, "gs://bucket/foo/*baz?");
+    applyRead(pipeline, "gs://bucket/foo/[0-9]baz?");
+    applyRead(pipeline, "gs://bucket/foo/baz/*");
+    applyRead(pipeline, "gs://bucket/foo/baz/*wonka*");
+    applyRead(pipeline, "gs://bucket/foo/*baz/wonka*");
+    applyRead(pipeline, "gs://bucket/foo*/baz");
+    applyRead(pipeline, "gs://bucket/foo?/baz");
+    applyRead(pipeline, "gs://bucket/foo[0-9]/baz");
 
     // Check that running doesn't fail.
     pipeline.run();
   }
 
+  private void applyRead(Pipeline pipeline, String path) {
+    pipeline.apply("Read(" + path + ")", TextIO.Read.from(path));
+  }
+
   /**
    * Recursive wildcards are not supported.
    * This tests "**".
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index b16ff02ee0118..607471fa62749 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -271,7 +271,8 @@ public void testSplitAndReadBundlesBack() throws Exception {
   @Test
   public void testDirectPipelineWithoutTimestamps() throws Exception {
     Pipeline p = TestPipeline.create();
-    PCollection<Integer> sum = p.apply(Read.from(TestIO.fromRange(10, 20)))
+    PCollection<Integer> sum = p
+        .apply(Read.from(TestIO.fromRange(10, 20)))
         .apply(Sum.integersGlobally())
         .apply(Sample.<Integer>any(1));
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
index 3391c1d04eb6e..87169e9fe4f1d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
@@ -17,7 +17,6 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 import static com.google.cloud.dataflow.sdk.TestUtils.checkCombineFn;
-import static com.google.cloud.dataflow.sdk.TestUtils.createInts;
 import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
@@ -274,7 +273,7 @@ public int compare(Integer o1, Integer o2) {
   }
 
   private PCollection<Integer> intRangeCollection(Pipeline p, int size) {
-    return createInts(p, intRange(size));
+    return p.apply("CreateIntsUpTo(" + size + ")", Create.of(intRange(size)));
   }
 
   private List<Integer> intRange(int size) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
index 462b721947521..f9a158a094d34 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
@@ -234,7 +234,7 @@ private PCollection<String> readPCollection(Pipeline p) {
       file.addAll(page);
     }
     assert file.size() == pages * page.size();
-    PCollection<String> words = TestUtils.createStrings(p, file);
+    PCollection<String> words = p.apply(Create.of(file));
     return words;
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 976c1cf40dd4e..6d675de2c4f6f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -335,11 +335,11 @@ public void testHotKeyCombining() {
 
     KeyedCombineFn<String, Integer, ?, Double> mean =
         new MeanInts().<String>asKeyedFn();
-    PCollection<KV<String, Double>> coldMean = input.apply(
+    PCollection<KV<String, Double>> coldMean = input.apply("ColdMean",
         Combine.perKey(mean).withHotKeyFanout(0));
-    PCollection<KV<String, Double>> warmMean = input.apply(
+    PCollection<KV<String, Double>> warmMean = input.apply("WarmMean",
         Combine.perKey(mean).withHotKeyFanout(hotKeyFanout));
-    PCollection<KV<String, Double>> hotMean = input.apply(
+    PCollection<KV<String, Double>> hotMean = input.apply("HotMean",
         Combine.perKey(mean).withHotKeyFanout(5));
 
     List<KV<String, Double>> expected = Arrays.asList(KV.of("a", 2.0), KV.of("b", 7.0));
@@ -355,9 +355,9 @@ public void testBinaryCombineFn() {
     Pipeline p = TestPipeline.create();
     PCollection<KV<String, Integer>> input = copy(createInput(p, TABLE), 2);
     PCollection<KV<String, Integer>> intProduct = input
-        .apply(Combine.<String, Integer, Integer>perKey(new TestProdInt()));
+        .apply("IntProduct", Combine.<String, Integer, Integer>perKey(new TestProdInt()));
     PCollection<KV<String, Integer>> objProduct = input
-        .apply(Combine.<String, Integer, Integer>perKey(new TestProdObj()));
+        .apply("ObjProduct", Combine.<String, Integer, Integer>perKey(new TestProdObj()));
 
     List<KV<String, Integer>> expected = Arrays.asList(KV.of("a", 16), KV.of("b", 169));
     DataflowAssert.that(intProduct).containsInAnyOrder(expected);
@@ -408,13 +408,12 @@ public Integer apply(Integer left, Integer right) {
   public void testCombineGloballyAsSingletonView() {
     Pipeline p = TestPipeline.create();
     final PCollectionView<Integer> view = p
-        .apply(Create.<Integer>of()
-               .withCoder(BigEndianIntegerCoder.of()))
+        .apply("CreateEmptySideInput", Create.<Integer>of().withCoder(BigEndianIntegerCoder.of()))
         .apply(Sum.integersGlobally().asSingletonView());
 
     PCollection<Integer> output = p
-        .apply(Create.of((Void) null))
-        .apply(ParDo.of(new DoFn<Void, Integer>() {
+        .apply("CreateVoidMainInput", Create.of((Void) null))
+        .apply("OutputSideInput", ParDo.of(new DoFn<Void, Integer>() {
                   @Override
                   public void processElement(ProcessContext c) {
                     c.output(c.sideInput(view));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java
index 4821c26eaad28..0bd94b5e34d54 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java
@@ -17,9 +17,9 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES;
-import static com.google.cloud.dataflow.sdk.TestUtils.createStrings;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
@@ -51,7 +51,7 @@ public class CountTest {
   public void testCountPerElementBasic() {
     Pipeline p = TestPipeline.create();
 
-    PCollection<String> input = createStrings(p, WORDS);
+    PCollection<String> input = p.apply(Create.of(WORDS));
 
     PCollection<KV<String, Long>> output =
         input.apply(Count.<String>perElement());
@@ -73,7 +73,7 @@ public void testCountPerElementBasic() {
   public void testCountPerElementEmpty() {
     Pipeline p = TestPipeline.create();
 
-    PCollection<String> input = createStrings(p, NO_LINES);
+    PCollection<String> input = p.apply(Create.of(NO_LINES).withCoder(StringUtf8Coder.of()));
 
     PCollection<KV<String, Long>> output =
         input.apply(Count.<String>perElement());
@@ -87,7 +87,7 @@ public void testCountPerElementEmpty() {
   public void testCountGloballyBasic() {
     Pipeline p = TestPipeline.create();
 
-    PCollection<String> input = createStrings(p, WORDS);
+    PCollection<String> input = p.apply(Create.of(WORDS));
 
     PCollection<Long> output =
         input.apply(Count.<String>globally());
@@ -102,7 +102,7 @@ public void testCountGloballyBasic() {
   public void testCountGloballyEmpty() {
     Pipeline p = TestPipeline.create();
 
-    PCollection<String> input = createStrings(p, NO_LINES);
+    PCollection<String> input = p.apply(Create.of(NO_LINES).withCoder(StringUtf8Coder.of()));
 
     PCollection<Long> output =
         input.apply(Count.<String>globally());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
index 58776434cf87f..aafb43483395f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
@@ -16,8 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
-import static com.google.cloud.dataflow.sdk.TestUtils.createInts;
-
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
@@ -29,7 +27,6 @@
 import org.junit.runners.JUnit4;
 
 import java.io.Serializable;
-import java.util.Arrays;
 
 /**
  * Tests for {@link Filter}.
@@ -63,9 +60,9 @@ public Boolean apply(Integer elem) {
   public void testIdentityFilterBy() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<Integer> input = createInts(p, Arrays.asList(591, 11789, 1257, 24578, 24799, 307));
-
-    PCollection<Integer> output = input.apply(Filter.by(new TrivialFn(true)));
+    PCollection<Integer> output = p
+        .apply(Create.of(591, 11789, 1257, 24578, 24799, 307))
+        .apply(Filter.by(new TrivialFn(true)));
 
     DataflowAssert.that(output).containsInAnyOrder(591, 11789, 1257, 24578, 24799, 307);
     p.run();
@@ -75,9 +72,9 @@ public void testIdentityFilterBy() {
   public void testNoFilter() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<Integer> input = createInts(p, Arrays.asList(1, 2, 4, 5));
-
-    PCollection<Integer> output = input.apply(Filter.by(new TrivialFn(false)));
+    PCollection<Integer> output = p
+        .apply(Create.of(1, 2, 4, 5))
+        .apply(Filter.by(new TrivialFn(false)));
 
     DataflowAssert.that(output).containsInAnyOrder();
     p.run();
@@ -88,9 +85,9 @@ public void testNoFilter() {
   public void testFilterBy() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<Integer> input = createInts(p, Arrays.asList(1, 2, 3, 4, 5, 6, 7));
-
-    PCollection<Integer> output = input.apply(Filter.by(new EvenFn()));
+    PCollection<Integer> output = p
+        .apply(Create.of(1, 2, 3, 4, 5, 6, 7))
+        .apply(Filter.by(new EvenFn()));
 
     DataflowAssert.that(output).containsInAnyOrder(2, 4, 6);
     p.run();
@@ -101,9 +98,9 @@ public void testFilterBy() {
   public void testFilterLessThan() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<Integer> input = createInts(p, Arrays.asList(1, 2, 3, 4, 5, 6, 7));
-
-    PCollection<Integer> output = input.apply(Filter.lessThan(4));
+    PCollection<Integer> output = p
+        .apply(Create.of(1, 2, 3, 4, 5, 6, 7))
+        .apply(Filter.lessThan(4));
 
     DataflowAssert.that(output).containsInAnyOrder(1, 2, 3);
     p.run();
@@ -113,9 +110,9 @@ public void testFilterLessThan() {
   public void testFilterGreaterThan() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<Integer> input = createInts(p, Arrays.asList(1, 2, 3, 4, 5, 6, 7));
-
-    PCollection<Integer> output = input.apply(Filter.greaterThan(4));
+    PCollection<Integer> output = p
+        .apply(Create.of(1, 2, 3, 4, 5, 6, 7))
+        .apply(Filter.greaterThan(4));
 
     DataflowAssert.that(output).containsInAnyOrder(5, 6, 7);
     p.run();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
index 87c24309aa5e2..86a980bbd97c1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
@@ -21,7 +21,6 @@
 import static com.google.cloud.dataflow.sdk.TestUtils.LINES_ARRAY;
 import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES;
 import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES_ARRAY;
-
 import static org.hamcrest.Matchers.isA;
 import static org.hamcrest.core.StringContains.containsString;
 
@@ -249,11 +248,11 @@ public void testEqualWindowFnPropagation() {
     Pipeline p = TestPipeline.create();
 
     PCollection<String> input1 =
-        p.apply(Create.of("Input1"))
-        .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))));
+        p.apply("CreateInput1", Create.of("Input1"))
+        .apply("Window1", Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))));
     PCollection<String> input2 =
-        p.apply(Create.of("Input2"))
-        .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))));
+        p.apply("CreateInput2", Create.of("Input2"))
+        .apply("Window2", Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))));
 
     PCollection<String> output =
         PCollectionList.of(input1).and(input2)
@@ -270,11 +269,13 @@ public void testCompatibleWindowFnPropagation() {
     Pipeline p = TestPipeline.create();
 
     PCollection<String> input1 =
-        p.apply(Create.of("Input1"))
-        .apply(Window.<String>into(Sessions.withGapDuration(Duration.standardMinutes(1))));
+        p.apply("CreateInput1", Create.of("Input1"))
+        .apply("Window1",
+            Window.<String>into(Sessions.withGapDuration(Duration.standardMinutes(1))));
     PCollection<String> input2 =
-        p.apply(Create.of("Input2"))
-        .apply(Window.<String>into(Sessions.withGapDuration(Duration.standardMinutes(2))));
+        p.apply("CreateInput2", Create.of("Input2"))
+        .apply("Window2",
+            Window.<String>into(Sessions.withGapDuration(Duration.standardMinutes(2))));
 
     PCollection<String> output =
         PCollectionList.of(input1).and(input2)
@@ -291,11 +292,11 @@ public void testIncompatibleWindowFnPropagationFailure() {
     Pipeline p = TestPipeline.create();
 
     PCollection<String> input1 =
-        p.apply(Create.of("Input1"))
-        .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))));
+        p.apply("CreateInput1", Create.of("Input1"))
+        .apply("Window1", Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))));
     PCollection<String> input2 =
-        p.apply(Create.of("Input2"))
-        .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(2))));
+        p.apply("CreateInput2", Create.of("Input2"))
+        .apply("Window2", Window.<String>into(FixedWindows.of(Duration.standardMinutes(2))));
 
     try {
       PCollectionList.of(input1).and(input2)
@@ -328,8 +329,9 @@ private <T> PCollectionList<T> makePCollectionList(
       Coder<T> coder,
       List<List<T>> lists) {
     List<PCollection<T>> pcs = new ArrayList<>();
+    int index = 0;
     for (List<T> list : lists) {
-      PCollection<T> pc = p.apply(Create.of(list).withCoder(coder));
+      PCollection<T> pc = p.apply("Create" + (index++), Create.of(list).withCoder(coder));
       pcs.add(pc);
     }
     return PCollectionList.of(pcs);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
index ebbb2dc03f54e..23486906cda2a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
@@ -243,9 +243,9 @@ public void testInvalidWindows() {
             Sessions.withGapDuration(Duration.standardMinutes(1))));
 
     try {
-      PCollection<KV<String, Iterable<Iterable<Integer>>>> output = input
-          .apply(GroupByKey.<String, Integer>create())
-          .apply(GroupByKey.<String, Iterable<Integer>>create());
+      input
+          .apply("GroupByKey", GroupByKey.<String, Integer>create())
+          .apply("GroupByKeyAgain", GroupByKey.<String, Iterable<Integer>>create());
       Assert.fail("Exception should have been thrown");
     } catch (IllegalStateException e) {
       Assert.assertTrue(e.getMessage().startsWith(
@@ -266,10 +266,10 @@ public void testRemerge() {
             Sessions.withGapDuration(Duration.standardMinutes(1))));
 
     PCollection<KV<String, Iterable<Iterable<Integer>>>> middle = input
-        .apply(GroupByKey.<String, Integer>create())
-        .apply(Window.<KV<String, Iterable<Integer>>>remerge())
-        .apply(GroupByKey.<String, Iterable<Integer>>create())
-        .apply(Window.<KV<String, Iterable<Iterable<Integer>>>>remerge());
+        .apply("GroupByKey", GroupByKey.<String, Integer>create())
+        .apply("Remerge", Window.<KV<String, Iterable<Integer>>>remerge())
+        .apply("GroupByKeyAgain", GroupByKey.<String, Iterable<Integer>>create())
+        .apply("RemergeAgain", Window.<KV<String, Iterable<Iterable<Integer>>>>remerge());
 
     p.run();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java
index b612246f43a1b..fb6da8e81d8c6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java
@@ -23,9 +23,7 @@
 import static org.hamcrest.Matchers.lessThan;
 import static org.hamcrest.Matchers.lessThanOrEqualTo;
 
-import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
-import com.google.cloud.dataflow.sdk.values.PCollection;
 
 import org.junit.Assert;
 import org.junit.Before;
@@ -184,9 +182,10 @@ private long run(int numElements, int maxParallelism, DoFn<Integer, Integer> doF
     }
 
     ConcurrencyMeasuringFn<Integer> downstream = new ConcurrencyMeasuringFn<>();
-    PCollection<Integer> input = TestUtils.createInts(p, data);
-    input.apply(IntraBundleParallelization.of(doFn).withMaxParallelism(maxParallelism))
-         .apply(ParDo.of(downstream));
+    p
+    .apply(Create.of(data))
+    .apply(IntraBundleParallelization.of(doFn).withMaxParallelism(maxParallelism))
+    .apply(ParDo.of(downstream));
 
     long startTime = System.currentTimeMillis();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index d33a94ad1b18c..d6b045c00c8fa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
-import static com.google.cloud.dataflow.sdk.TestUtils.createInts;
 import static com.google.cloud.dataflow.sdk.util.SerializableUtils.serializeToByteArray;
 import static com.google.cloud.dataflow.sdk.util.StringUtils.byteArrayToJsonString;
 import static com.google.cloud.dataflow.sdk.util.StringUtils.jsonStringToByteArray;
@@ -31,9 +30,9 @@
 
 import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
@@ -298,8 +297,8 @@ static class MultiFilter
 
     @Override
     public PCollectionTuple apply(PCollection<Integer> input) {
-      PCollection<Integer> by2 = input.apply(ParDo.of(new FilterFn(2)));
-      PCollection<Integer> by3 = input.apply(ParDo.of(new FilterFn(3)));
+      PCollection<Integer> by2 = input.apply("Filter2s", ParDo.of(new FilterFn(2)));
+      PCollection<Integer> by3 = input.apply("Filter3s", ParDo.of(new FilterFn(3)));
       return PCollectionTuple.of(BY2, by2).and(BY3, by3);
     }
 
@@ -326,10 +325,8 @@ public void testParDo() {
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
-    PCollection<Integer> input = createInts(p, inputs);
-
-    PCollection<String> output =
-        input
+    PCollection<String> output = p
+        .apply(Create.of(inputs))
         .apply(ParDo.of(new TestDoFn()));
 
     DataflowAssert.that(output)
@@ -345,10 +342,8 @@ public void testParDo2() {
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
-    PCollection<Integer> input = createInts(p, inputs);
-
-    PCollection<String> output =
-        input
+    PCollection<String> output = p
+        .apply(Create.of(inputs))
         .apply(ParDo.of(new TestDoFnWithContext()));
 
     DataflowAssert.that(output)
@@ -364,10 +359,8 @@ public void testParDoEmpty() {
 
     List<Integer> inputs = Arrays.asList();
 
-    PCollection<Integer> input = createInts(p, inputs);
-
-    PCollection<String> output =
-        input
+    PCollection<String> output = p
+        .apply(Create.of(inputs).withCoder(VarIntCoder.of()))
         .apply(ParDo.of(new TestDoFn()));
 
     DataflowAssert.that(output)
@@ -383,16 +376,14 @@ public void testParDoWithSideOutputs() {
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
-    PCollection<Integer> input = createInts(p, inputs);
-
     TupleTag<String> mainTag = new TupleTag<String>("main"){};
     TupleTag<String> sideTag1 = new TupleTag<String>("side1"){};
     TupleTag<String> sideTag2 = new TupleTag<String>("side2"){};
     TupleTag<String> sideTag3 = new TupleTag<String>("side3"){};
     TupleTag<String> sideTagUnwritten = new TupleTag<String>("sideUnwritten"){};
 
-    PCollectionTuple outputs =
-        input
+    PCollectionTuple outputs = p
+        .apply(Create.of(inputs))
         .apply(ParDo
                .of(new TestDoFn(
                    Arrays.<PCollectionView<Integer>>asList(),
@@ -426,19 +417,17 @@ public void testParDoWithOnlySideOutputs() {
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
-    PCollection<Integer> input = createInts(p, inputs);
-
     final TupleTag<Void> mainTag = new TupleTag<Void>("main"){};
     final TupleTag<Integer> sideTag = new TupleTag<Integer>("side"){};
 
-    PCollectionTuple outputs = input.apply(
-        ParDo
-        .withOutputTags(mainTag, TupleTagList.of(sideTag))
-        .of(new DoFn<Integer, Void>(){
-              @Override
-              public void processElement(ProcessContext c) {
-                c.sideOutput(sideTag, c.element());
-              }}));
+    PCollectionTuple outputs = p
+        .apply(Create.of(inputs))
+        .apply(ParDo.withOutputTags(mainTag, TupleTagList.of(sideTag))
+            .of(new DoFn<Integer, Void>(){
+                @Override
+                public void processElement(ProcessContext c) {
+                  c.sideOutput(sideTag, c.element());
+                }}));
 
     DataflowAssert.that(outputs.get(mainTag)).containsInAnyOrder();
     DataflowAssert.that(outputs.get(sideTag)).containsInAnyOrder(inputs);
@@ -452,12 +441,10 @@ public void testParDoWritingToUndeclaredSideOutput() {
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
-    PCollection<Integer> input = createInts(p, inputs);
-
     TupleTag<String> sideTag = new TupleTag<String>("side"){};
 
-    PCollection<String> output =
-        input
+    PCollection<String> output = p
+        .apply(Create.of(inputs))
         .apply(ParDo.of(new TestDoFn(
             Arrays.<PCollectionView<Integer>>asList(),
             Arrays.asList(sideTag))));
@@ -471,11 +458,11 @@ public void testParDoWritingToUndeclaredSideOutput() {
   @Test
   public void testParDoUndeclaredSideOutputLimit() {
     Pipeline p = TestPipeline.create();
-    PCollection<Integer> input = createInts(p, Arrays.asList(3));
+    PCollection<Integer> input = p.apply(Create.of(Arrays.asList(3)));
 
     // Success for a total of 1000 outputs.
     input
-        .apply(ParDo.of(new DoFn<Integer, String>() {
+        .apply("Success1000", ParDo.of(new DoFn<Integer, String>() {
             @Override
             public void processElement(ProcessContext c) {
               TupleTag<String> specialSideTag = new TupleTag<String>(){};
@@ -491,7 +478,7 @@ public void processElement(ProcessContext c) {
 
     // Failure for a total of 1001 outputs.
     input
-        .apply(ParDo.of(new DoFn<Integer, String>() {
+        .apply("Failure1001", ParDo.of(new DoFn<Integer, String>() {
             @Override
             public void processElement(ProcessContext c) {
               for (int i = 0; i < 1000; i++) {
@@ -511,19 +498,22 @@ public void testParDoWithSideInputs() {
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
-    PCollection<Integer> input = createInts(p, inputs);
-
-    PCollectionView<Integer> sideInput1 = TestUtils.createSingletonInt(p, 11);
-    PCollectionView<Integer> sideInputUnread = TestUtils.createSingletonInt(p, -3333);
-    PCollectionView<Integer> sideInput2 = TestUtils.createSingletonInt(p, 222);
+    PCollectionView<Integer> sideInput1 = p
+        .apply("CreateSideInput1", Create.of(11))
+        .apply("ViewSideInput1", View.<Integer>asSingleton());
+    PCollectionView<Integer> sideInputUnread = p
+        .apply("CreateSideInputUnread", Create.of(-3333))
+        .apply("ViewSideInputUnread", View.<Integer>asSingleton());
+    PCollectionView<Integer> sideInput2 =         p
+        .apply("CreateSideInput2", Create.of(222))
+        .apply("ViewSideInput2", View.<Integer>asSingleton());
 
-    PCollection<String> output =
-        input
-        .apply(ParDo
-               .withSideInputs(sideInput1, sideInputUnread, sideInput2)
-               .of(new TestDoFn(
-                   Arrays.asList(sideInput1, sideInput2),
-                   Arrays.<TupleTag<String>>asList())));
+    PCollection<String> output = p
+        .apply(Create.of(inputs))
+        .apply(ParDo.withSideInputs(sideInput1, sideInputUnread, sideInput2)
+            .of(new TestDoFn(
+                Arrays.asList(sideInput1, sideInput2),
+                Arrays.<TupleTag<String>>asList())));
 
     DataflowAssert.that(output)
         .satisfies(ParDoTest.HasExpectedOutput
@@ -539,11 +529,11 @@ public void testParDoReadingFromUnknownSideInput() {
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
-    PCollection<Integer> input = createInts(p, inputs);
-
-    PCollectionView<Integer> sideView = TestUtils.createSingletonInt(p, 3);
+    PCollectionView<Integer> sideView = p
+        .apply("Create3", Create.of(3))
+        .apply(View.<Integer>asSingleton());
 
-    input
+    p.apply("CreateMain", Create.of(inputs))
         .apply(ParDo.of(new TestDoFn(
             Arrays.<PCollectionView<Integer>>asList(sideView),
             Arrays.<TupleTag<String>>asList())));
@@ -559,9 +549,7 @@ public void testParDoWithErrorInStartBatch() {
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
-    PCollection<Integer> input = createInts(p, inputs);
-
-    input
+    p.apply(Create.of(inputs))
         .apply(ParDo.of(new TestStartBatchErrorDoFn()));
 
     thrown.expect(RuntimeException.class);
@@ -575,9 +563,7 @@ public void testParDoWithErrorInProcessElement() {
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
-    PCollection<Integer> input = createInts(p, inputs);
-
-    input
+    p.apply(Create.of(inputs))
         .apply(ParDo.of(new TestProcessElementErrorDoFn()));
 
     thrown.expect(RuntimeException.class);
@@ -591,9 +577,7 @@ public void testParDoWithErrorInFinishBatch() {
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
-    PCollection<Integer> input = createInts(p, inputs);
-
-    input
+    p.apply(Create.of(inputs))
         .apply(ParDo.of(new TestFinishBatchErrorDoFn()));
 
     thrown.expect(RuntimeException.class);
@@ -606,7 +590,7 @@ public void testParDoName() {
     Pipeline p = TestPipeline.create();
 
     PCollection<Integer> input =
-        createInts(p, Arrays.asList(3, -42, 666))
+        p.apply(Create.of(Arrays.asList(3, -42, 666)))
         .setName("MyInput");
 
     {
@@ -650,18 +634,14 @@ public void testParDoName() {
   public void testParDoWithSideOutputsName() {
     Pipeline p = TestPipeline.create();
 
-    PCollection<Integer> input =
-        createInts(p, Arrays.asList(3, -42, 666))
-        .setName("MyInput");
-
     TupleTag<String> mainTag = new TupleTag<String>("main"){};
     TupleTag<String> sideTag1 = new TupleTag<String>("side1"){};
     TupleTag<String> sideTag2 = new TupleTag<String>("side2"){};
     TupleTag<String> sideTag3 = new TupleTag<String>("side3"){};
     TupleTag<String> sideTagUnwritten = new TupleTag<String>("sideUnwritten"){};
 
-    PCollectionTuple outputs =
-        input
+    PCollectionTuple outputs = p
+        .apply(Create.of(Arrays.asList(3, -42, 666))).setName("MyInput")
         .apply(ParDo
                .named("MyParDo")
                .of(new TestDoFn(
@@ -686,10 +666,8 @@ public void testParDoInCustomTransform() {
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
-    PCollection<Integer> input = createInts(p, inputs);
-
-    PCollection<String> output =
-        input
+    PCollection<String> output = p
+        .apply(Create.of(inputs))
         .apply(new PTransform<PCollection<Integer>, PCollection<String>>() {
             @Override
             public PCollection<String> apply(PCollection<Integer> input) {
@@ -709,17 +687,17 @@ public PCollection<String> apply(PCollection<Integer> input) {
   public void testMultiOutputChaining() {
     Pipeline p = TestPipeline.create();
 
-    PCollection<Integer> input = createInts(p, Arrays.asList(3, 4, 5, 6));
-
-    PCollectionTuple filters = input.apply(new MultiFilter());
+    PCollectionTuple filters = p
+        .apply(Create.of(Arrays.asList(3, 4, 5, 6)))
+        .apply(new MultiFilter());
     PCollection<Integer> by2 = filters.get(MultiFilter.BY2);
     PCollection<Integer> by3 = filters.get(MultiFilter.BY3);
 
     // Apply additional filters to each operation.
     PCollection<Integer> by2then3 = by2
-        .apply(ParDo.of(new MultiFilter.FilterFn(3)));
+        .apply("Filter3sAgain", ParDo.of(new MultiFilter.FilterFn(3)));
     PCollection<Integer> by3then2 = by3
-        .apply(ParDo.of(new MultiFilter.FilterFn(2)));
+        .apply("Filter2sAgain", ParDo.of(new MultiFilter.FilterFn(2)));
 
     DataflowAssert.that(by2then3).containsInAnyOrder(6);
     DataflowAssert.that(by3then2).containsInAnyOrder(6);
@@ -953,8 +931,7 @@ public void testMainOutputUnregisteredExplicitCoder() {
     PCollectionTuple outputTuple = input.apply(ParDo.of(new MainOutputDummyFn(sideTag))
         .withOutputTags(mainTag, TupleTagList.of(sideTag)));
 
-    outputTuple.get(mainTag)
-        .setCoder(new TestDummyCoder());
+    outputTuple.get(mainTag).setCoder(new TestDummyCoder());
 
     pipeline.run();
   }
@@ -989,7 +966,7 @@ public void testMainOutputApplySideOutputNoCoder() {
     PCollection<Integer> foo = tuple
         .get(mainOutputTag)
         .setCoder(TestDummyCoder.of())
-        .apply(ParDo.of(new DoFn<TestDummy, Integer>() {
+        .apply("Output1", ParDo.of(new DoFn<TestDummy, Integer>() {
           public void processElement(ProcessContext context) {
             context.output(1);
           }
@@ -1005,7 +982,7 @@ public void testParDoOutputWithTimestamp() {
     Pipeline p = TestPipeline.create();
 
     PCollection<Integer> input =
-        createInts(p, Arrays.asList(3, 42, 6));
+        p.apply(Create.of(Arrays.asList(3, 42, 6)));
 
     PCollection<String> output =
         input
@@ -1026,7 +1003,7 @@ public void testParDoSideOutputWithTimestamp() {
     Pipeline p = TestPipeline.create();
 
     PCollection<Integer> input =
-        createInts(p, Arrays.asList(3, 42, 6));
+        p.apply(Create.of(Arrays.asList(3, 42, 6)));
 
     final TupleTag<Integer> mainTag = new TupleTag<Integer>(){};
     final TupleTag<Integer> sideTag = new TupleTag<Integer>(){};
@@ -1057,7 +1034,7 @@ public void testParDoShiftTimestamp() {
     Pipeline p = TestPipeline.create();
 
     PCollection<Integer> input =
-        createInts(p, Arrays.asList(3, 42, 6));
+        p.apply(Create.of(Arrays.asList(3, 42, 6)));
 
     PCollection<String> output =
         input
@@ -1078,7 +1055,7 @@ public void testParDoShiftTimestamp() {
   public void testParDoShiftTimestampInvalid() {
     Pipeline p = TestPipeline.create();
 
-    createInts(p, Arrays.asList(3, 42, 6))
+    p.apply(Create.of(Arrays.asList(3, 42, 6)))
         .apply(ParDo.of(new TestOutputTimestampDoFn()))
         .apply(ParDo.of(new TestShiftTimestampDoFn(Duration.millis(1000),
                                                    Duration.millis(-1001))))
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
index d907787490b50..fc6a840fe35f0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
-import static com.google.cloud.dataflow.sdk.TestUtils.createInts;
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
@@ -35,7 +34,6 @@
 
 import java.io.Serializable;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 
 /**
@@ -65,10 +63,9 @@ public int partitionFor(Integer elem, int numPartitions) {
   public void testEvenOddPartition() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<Integer> input =
-        createInts(p, Arrays.asList(591, 11789, 1257, 24578, 24799, 307));
-
-    PCollectionList<Integer> outputs = input.apply(Partition.of(2, new ModFn()));
+    PCollectionList<Integer> outputs = p
+        .apply(Create.of(591, 11789, 1257, 24578, 24799, 307))
+        .apply(Partition.of(2, new ModFn()));
     assertTrue(outputs.size() == 2);
     DataflowAssert.that(outputs.get(0)).containsInAnyOrder(24578);
     DataflowAssert.that(outputs.get(1)).containsInAnyOrder(591, 11789, 1257,
@@ -80,10 +77,9 @@ public void testEvenOddPartition() {
   public void testModPartition() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<Integer> input =
-        createInts(p, Arrays.asList(1, 2, 4, 5));
-
-    PCollectionList<Integer> outputs = input.apply(Partition.of(3, new ModFn()));
+    PCollectionList<Integer> outputs = p
+        .apply(Create.of(1, 2, 4, 5))
+        .apply(Partition.of(3, new ModFn()));
     assertTrue(outputs.size() == 3);
     DataflowAssert.that(outputs.get(0)).containsInAnyOrder();
     DataflowAssert.that(outputs.get(1)).containsInAnyOrder(1, 4);
@@ -95,9 +91,9 @@ public void testModPartition() {
   public void testOutOfBoundsPartitions() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<Integer> input = createInts(p, Arrays.asList(-1));
-
-    input.apply(Partition.of(5, new IdentityFn()));
+    p
+    .apply(Create.of(-1))
+    .apply(Partition.of(5, new IdentityFn()));
 
     expectedException.expect(RuntimeException.class);
     expectedException.expectMessage(
@@ -109,7 +105,7 @@ public void testOutOfBoundsPartitions() {
   public void testZeroNumPartitions() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<Integer> input = createInts(p, Arrays.asList(591));
+    PCollection<Integer> input = p.apply(Create.of(591));
 
     expectedException.expect(IllegalArgumentException.class);
     expectedException.expectMessage("numPartitions must be > 0");
@@ -120,12 +116,10 @@ public void testZeroNumPartitions() {
   public void testDroppedPartition() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<Integer> input = createInts(p,
-        Arrays.asList(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12));
-
     // Compute the set of integers either 1 or 2 mod 3, the hard way.
-    PCollectionList<Integer> outputs =
-        input.apply(Partition.of(3, new ModFn()));
+    PCollectionList<Integer> outputs = p
+        .apply(Create.of(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))
+        .apply(Partition.of(3, new ModFn()));
 
     List<PCollection<Integer>> outputsList = new ArrayList<>(outputs.getAll());
     outputsList.remove(0);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
index b3a125f6f77ff..f6c47e5138b27 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
@@ -73,12 +73,12 @@ public class TopTest {
   };
 
   public PCollection<KV<String, Integer>> createInputTable(Pipeline p) {
-    return p.apply(Create.of(Arrays.asList(TABLE)).withCoder(
+    return p.apply("CreateInputTable", Create.of(Arrays.asList(TABLE)).withCoder(
         KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())));
   }
 
   public PCollection<KV<String, Integer>> createEmptyInputTable(Pipeline p) {
-    return p.apply(Create.of(Arrays.asList(EMPTY_TABLE)).withCoder(
+    return p.apply("CreateEmptyInputTable", Create.of(Arrays.asList(EMPTY_TABLE)).withCoder(
         KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())));
   }
 
@@ -94,9 +94,10 @@ public void testTop() {
     PCollection<List<String>> top2 = input.apply(Top.<String>largest(2));
     PCollection<List<String>> top3 = input.apply(Top.<String>smallest(3));
 
-    PCollection<KV<String, List<Integer>>> largestPerKey = createInputTable(p)
+    PCollection<KV<String, Integer>> inputTable = createInputTable(p);
+    PCollection<KV<String, List<Integer>>> largestPerKey = inputTable
         .apply(Top.<String, Integer>largestPerKey(2));
-    PCollection<KV<String, List<Integer>>> smallestPerKey = createInputTable(p)
+    PCollection<KV<String, List<Integer>>> smallestPerKey = inputTable
         .apply(Top.<String, Integer>smallestPerKey(2));
 
     DataflowAssert.thatSingletonIterable(top1).containsInAnyOrder(Arrays.asList("bb"));
@@ -124,9 +125,10 @@ public void testTopEmpty() {
     PCollection<List<String>> top2 = input.apply(Top.<String>largest(2));
     PCollection<List<String>> top3 = input.apply(Top.<String>smallest(3));
 
-    PCollection<KV<String, List<Integer>>> largestPerKey = createEmptyInputTable(p)
+    PCollection<KV<String, Integer>> inputTable = createEmptyInputTable(p);
+    PCollection<KV<String, List<Integer>>> largestPerKey = inputTable
         .apply(Top.<String, Integer>largestPerKey(2));
-    PCollection<KV<String, List<Integer>>> smallestPerKey = createEmptyInputTable(p)
+    PCollection<KV<String, List<Integer>>> smallestPerKey = inputTable
         .apply(Top.<String, Integer>smallestPerKey(2));
 
     DataflowAssert.thatSingletonIterable(top1).containsInAnyOrder();
@@ -150,10 +152,11 @@ public void testTopZero() {
     PCollection<List<String>> top2 = input.apply(Top.<String>largest(0));
     PCollection<List<String>> top3 = input.apply(Top.<String>smallest(0));
 
-    PCollection<KV<String, List<Integer>>> largestPerKey = createInputTable(p)
+    PCollection<KV<String, Integer>> inputTable = createInputTable(p);
+    PCollection<KV<String, List<Integer>>> largestPerKey = inputTable
         .apply(Top.<String, Integer>largestPerKey(0));
 
-    PCollection<KV<String, List<Integer>>> smallestPerKey = createInputTable(p)
+    PCollection<KV<String, List<Integer>>> smallestPerKey = inputTable
         .apply(Top.<String, Integer>smallestPerKey(0));
 
     DataflowAssert.thatSingletonIterable(top1).containsInAnyOrder();
@@ -173,15 +176,16 @@ public void testTopZero() {
   @Test
   public void testPerKeySerializabilityRequirement() {
     Pipeline p = TestPipeline.create();
-    p.apply(Create.of(Arrays.asList(COLLECTION))
-            .withCoder(StringUtf8Coder.of()));
+    p.apply("CreateCollection", Create.of(Arrays.asList(COLLECTION))
+        .withCoder(StringUtf8Coder.of()));
 
-    createInputTable(p)
+    PCollection<KV<String, Integer>> inputTable = createInputTable(p);
+    inputTable
         .apply(Top.<String, Integer, IntegerComparator>perKey(1,
             new IntegerComparator()));
 
-    createInputTable(p)
-        .apply(Top.<String, Integer, IntegerComparator2>perKey(1,
+    inputTable
+        .apply("PerKey2", Top.<String, Integer, IntegerComparator2>perKey(1,
             new IntegerComparator2()));
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index 7cf05bc496f6a..f3dda604394ce 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -35,7 +35,6 @@
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
-
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
@@ -68,12 +67,12 @@ public void testSingletonSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
     final PCollectionView<Integer> view = pipeline
-        .apply(Create.of(47))
+        .apply("Create47", Create.of(47))
         .apply(View.<Integer>asSingleton());
 
     PCollection<Integer> output = pipeline
-        .apply(Create.of(1, 2, 3))
-        .apply(ParDo.withSideInputs(view).of(
+        .apply("Create123", Create.of(1, 2, 3))
+        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
             new DoFn<Integer, Integer>() {
               @Override
               public void processElement(ProcessContext c) {
@@ -92,13 +91,12 @@ public void testEmptySingletonSideInput() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
     final PCollectionView<Integer> view = pipeline
-        .apply(Create.<Integer>of()
-            .withCoder(VarIntCoder.of()))
+        .apply("CreateEmptyIntegers", Create.<Integer>of().withCoder(VarIntCoder.of()))
         .apply(View.<Integer>asSingleton());
 
     pipeline
-        .apply(Create.of(1, 2, 3))
-        .apply(ParDo.withSideInputs(view).of(
+        .apply("Create123", Create.of(1, 2, 3))
+        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
             new DoFn<Integer, Integer>() {
               @Override
               public void processElement(ProcessContext c) {
@@ -119,13 +117,12 @@ public void processElement(ProcessContext c) {
   public void testNonSingletonSideInput() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<Integer> view = pipeline
-        .apply(Create.<Integer>of(1, 2, 3))
+    PCollection<Integer> oneTwoThree = pipeline.apply(Create.<Integer>of(1, 2, 3));
+    final PCollectionView<Integer> view = oneTwoThree
         .apply(View.<Integer>asSingleton());
 
-    pipeline
-        .apply(Create.of(1, 2, 3))
-        .apply(ParDo.withSideInputs(view).of(
+    oneTwoThree
+        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
             new DoFn<Integer, Integer>() {
               @Override
               public void processElement(ProcessContext c) {
@@ -148,12 +145,12 @@ public void testListSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
     final PCollectionView<List<Integer>> view = pipeline
-        .apply(Create.of(11, 13, 17, 23))
+        .apply("CreateSideInput", Create.of(11, 13, 17, 23))
         .apply(View.<Integer>asList());
 
     PCollection<Integer> output = pipeline
-        .apply(Create.of(29, 31))
-        .apply(ParDo.withSideInputs(view).of(
+        .apply("CreateMainInput", Create.of(29, 31))
+        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
             new DoFn<Integer, Integer>() {
               @Override
               public void processElement(ProcessContext c) {
@@ -178,12 +175,12 @@ public void testIterableSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
     final PCollectionView<Iterable<Integer>> view = pipeline
-        .apply(Create.of(11, 13, 17, 23))
+        .apply("CreateSideInput", Create.of(11, 13, 17, 23))
         .apply(View.<Integer>asIterable());
 
     PCollection<Integer> output = pipeline
-        .apply(Create.of(29, 31))
-        .apply(ParDo.withSideInputs(view).of(
+        .apply("CreateMainInput", Create.of(29, 31))
+        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
             new DoFn<Integer, Integer>() {
               @Override
               public void processElement(ProcessContext c) {
@@ -206,12 +203,12 @@ public void testMapSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
     final PCollectionView<Map<String, Iterable<Integer>>> view = pipeline
-        .apply(Create.of(KV.of("a", 1), KV.of("a", 2), KV.of("b", 3)))
+        .apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("a", 2), KV.of("b", 3)))
         .apply(View.<String, Integer>asMap());
 
     PCollection<KV<String, Integer>> output = pipeline
-        .apply(Create.of("apple", "banana", "blackberry"))
-        .apply(ParDo.withSideInputs(view).of(
+        .apply("CreateMainInput", Create.of("apple", "banana", "blackberry"))
+        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
             new DoFn<String, KV<String, Integer>>() {
               @Override
               public void processElement(ProcessContext c) {
@@ -234,12 +231,12 @@ public void testSingletonMapSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
     final PCollectionView<Map<String, Integer>> view = pipeline
-        .apply(Create.of(KV.of("a", 1), KV.of("b", 3)))
+        .apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("b", 3)))
         .apply(View.<String, Integer>asMap().withSingletonValues());
 
     PCollection<KV<String, Integer>> output = pipeline
-        .apply(Create.of("apple", "banana", "blackberry"))
-        .apply(ParDo.withSideInputs(view).of(
+        .apply("CreateMainInput", Create.of("apple", "banana", "blackberry"))
+        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
             new DoFn<String, KV<String, Integer>>() {
               @Override
               public void processElement(ProcessContext c) {
@@ -260,12 +257,12 @@ public void testCombinedMapSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
     final PCollectionView<Map<String, Integer>> view = pipeline
-        .apply(Create.of(KV.of("a", 1), KV.of("a", 20), KV.of("b", 3)))
+        .apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("a", 20), KV.of("b", 3)))
         .apply(View.<String, Integer>asMap().withCombiner(new Sum.SumIntegerFn()));
 
     PCollection<KV<String, Integer>> output = pipeline
-        .apply(Create.of("apple", "banana", "blackberry"))
-        .apply(ParDo.withSideInputs(view).of(
+        .apply("CreateMainInput", Create.of("apple", "banana", "blackberry"))
+        .apply("Output", ParDo.withSideInputs(view).of(
             new DoFn<String, KV<String, Integer>>() {
               @Override
               public void processElement(ProcessContext c) {
@@ -286,21 +283,21 @@ public void testWindowedSideInputFixedToFixed() {
     Pipeline p = TestPipeline.create();
 
     final PCollectionView<Integer> view = p
-        .apply(Create.timestamped(
+        .apply("CreateSideInput", Create.timestamped(
             TimestampedValue.of(1, new Instant(1)),
             TimestampedValue.of(2, new Instant(11)),
             TimestampedValue.of(3, new Instant(13))))
-        .apply(Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
+        .apply("WindowSideInput", Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
         .apply(Sum.integersGlobally().withoutDefaults())
         .apply(View.<Integer>asSingleton());
 
     PCollection<String> output = p
-        .apply(Create.timestamped(
+        .apply("CreateMainInput", Create.timestamped(
             TimestampedValue.of("A", new Instant(4)),
             TimestampedValue.of("B", new Instant(15)),
             TimestampedValue.of("C", new Instant(7))))
-        .apply(Window.<String>into(FixedWindows.of(Duration.millis(10))))
-        .apply(ParDo.withSideInputs(view).of(
+        .apply("WindowMainInput", Window.<String>into(FixedWindows.of(Duration.millis(10))))
+        .apply("OutputMainAndSideInputs", ParDo.withSideInputs(view).of(
             new DoFn<String, String>() {
               @Override
               public void processElement(ProcessContext c) {
@@ -319,21 +316,21 @@ public void testWindowedSideInputFixedToGlobal() {
     Pipeline p = TestPipeline.create();
 
     final PCollectionView<Integer> view = p
-        .apply(Create.timestamped(
+        .apply("CreateSideInput", Create.timestamped(
             TimestampedValue.of(1, new Instant(1)),
             TimestampedValue.of(2, new Instant(11)),
             TimestampedValue.of(3, new Instant(13))))
-        .apply(Window.<Integer>into(new GlobalWindows()))
+        .apply("WindowSideInput", Window.<Integer>into(new GlobalWindows()))
         .apply(Sum.integersGlobally())
         .apply(View.<Integer>asSingleton());
 
     PCollection<String> output = p
-        .apply(Create.timestamped(
+        .apply("CreateMainInput", Create.timestamped(
             TimestampedValue.of("A", new Instant(4)),
             TimestampedValue.of("B", new Instant(15)),
             TimestampedValue.of("C", new Instant(7))))
-        .apply(Window.<String>into(FixedWindows.of(Duration.millis(10))))
-        .apply(ParDo.withSideInputs(view).of(
+        .apply("WindowMainInput", Window.<String>into(FixedWindows.of(Duration.millis(10))))
+        .apply("OutputMainAndSideInputs", ParDo.withSideInputs(view).of(
             new DoFn<String, String>() {
               @Override
               public void processElement(ProcessContext c) {
@@ -352,19 +349,19 @@ public void testWindowedSideInputFixedToFixedWithDefault() {
     Pipeline p = TestPipeline.create();
 
     final PCollectionView<Integer> view = p
-        .apply(Create.timestamped(
+        .apply("CreateSideInput", Create.timestamped(
             TimestampedValue.of(2, new Instant(11)),
             TimestampedValue.of(3, new Instant(13))))
-        .apply(Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
+        .apply("WindowSideInput", Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
         .apply(Sum.integersGlobally().asSingletonView());
 
     PCollection<String> output = p
-        .apply(Create.timestamped(
+        .apply("CreateMainInput", Create.timestamped(
             TimestampedValue.of("A", new Instant(4)),
             TimestampedValue.of("B", new Instant(15)),
             TimestampedValue.of("C", new Instant(7))))
-        .apply(Window.<String>into(FixedWindows.of(Duration.millis(10))))
-        .apply(ParDo.withSideInputs(view).of(
+        .apply("WindowMainInput", Window.<String>into(FixedWindows.of(Duration.millis(10))))
+        .apply("OutputMainAndSideInputs", ParDo.withSideInputs(view).of(
             new DoFn<String, String>() {
               @Override
               public void processElement(ProcessContext c) {
@@ -383,7 +380,7 @@ public void testSideInputWithNullDefault() {
     Pipeline p = TestPipeline.create();
 
     final PCollectionView<Void> view = p
-        .apply(Create.of((Void) null).withCoder(VoidCoder.of()))
+        .apply("CreateSideInput", Create.of((Void) null).withCoder(VoidCoder.of()))
         .apply(Combine.globally(new SerializableFunction<Iterable<Void>, Void>() {
                   @Override
                   public Void apply(Iterable<Void> input) {
@@ -392,8 +389,8 @@ public Void apply(Iterable<Void> input) {
                 }).asSingletonView());
 
     PCollection<String> output = p
-        .apply(Create.of(""))
-        .apply(ParDo.withSideInputs(view).of(
+        .apply("CreateMainInput", Create.of(""))
+        .apply("OutputMainAndSideInputs", ParDo.withSideInputs(view).of(
             new DoFn<String, String>() {
               @Override
               public void processElement(ProcessContext c) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java
index edf6df0c9499e..3db39a3942f66 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java
@@ -14,7 +14,6 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
-import static com.google.cloud.dataflow.sdk.TestUtils.createStrings;
 import static org.hamcrest.Matchers.anyOf;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.equalTo;
@@ -25,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.io.Sink;
 import com.google.cloud.dataflow.sdk.io.Sink.WriteOperation;
 import com.google.cloud.dataflow.sdk.io.Sink.Writer;
@@ -107,10 +107,10 @@ public void runWrite(List<String> inputs, boolean windowed) {
       for (long i = 0; i < inputs.size(); i++) {
         timestamps.add(i + 1);
       }
-      input = p.apply(Create.timestamped(inputs, timestamps))
+      input = p.apply(Create.timestamped(inputs, timestamps).withCoder(StringUtf8Coder.of()))
                .apply(Window.<String>into(FixedWindows.of(new Duration(2))));
     } else {
-      input = createStrings(p, inputs);
+      input = p.apply(Create.of(inputs).withCoder(StringUtf8Coder.of()));
     }
     TestSink sink = new TestSink();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
index 429111a1028d7..6efc41f88552e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
@@ -64,26 +64,26 @@ public class CoGroupByKeyTest implements Serializable {
    * Converts the given list into a PCollection belonging to the provided
    * Pipeline in such a way that coder inference needs to be performed.
    */
-  private PCollection<KV<Integer, String>> createInput(
+  private PCollection<KV<Integer, String>> createInput(String name,
       Pipeline p, List<KV<Integer, String>> list) {
-    return createInput(p, list,  new ArrayList<Long>());
+    return createInput(name, p, list,  new ArrayList<Long>());
   }
 
   /**
    * Converts the given list with timestamps into a PCollection.
    */
-  private PCollection<KV<Integer, String>> createInput(
+  private PCollection<KV<Integer, String>> createInput(String name,
       Pipeline p, List<KV<Integer, String>> list, List<Long> timestamps) {
     PCollection<KV<Integer, String>> input;
     if (timestamps.isEmpty()) {
-      input = p.apply(Create.of(list)
+      input = p.apply("Create" + name, Create.of(list)
           .withCoder(KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of())));
     } else {
-      input = p.apply(Create.timestamped(list, timestamps)
+      input = p.apply("Create" + name, Create.timestamped(list, timestamps)
           .withCoder(KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of())));
     }
     return input
-            .apply(ParDo.of(new DoFn<KV<Integer, String>,
+            .apply("Identity" + name, ParDo.of(new DoFn<KV<Integer, String>,
                                      KV<Integer, String>>() {
               @Override
               public void processElement(ProcessContext c) {
@@ -110,8 +110,8 @@ private PCollection<KV<Integer, CoGbkResult>> buildGetOnlyGbk(
         Arrays.asList(
             KV.of(2, "collection2-2"),
             KV.of(3, "collection2-3"));
-    PCollection<KV<Integer, String>> collection1 = createInput(p, list1);
-    PCollection<KV<Integer, String>> collection2 = createInput(p, list2);
+    PCollection<KV<Integer, String>> collection1 = createInput("CreateList1", p, list1);
+    PCollection<KV<Integer, String>> collection2 = createInput("CreateList2", p, list2);
     PCollection<KV<Integer, CoGbkResult>> coGbkResults =
         KeyedPCollectionTuple.of(tag1, collection1)
             .and(tag2, collection2)
@@ -186,13 +186,13 @@ private PCollection<KV<Integer, CoGbkResult>> buildPurchasesCoGbk(
             KV.of(20, "Joan Lichtfield"));
 
     PCollection<KV<Integer, String>> purchasesTable =
-        createInput(p, idToPurchases);
+        createInput("CreateIdToPurchases", p, idToPurchases);
 
     PCollection<KV<Integer, String>> addressTable =
-        createInput(p, idToAddress);
+        createInput("CreateIdToAddress", p, idToAddress);
 
     PCollection<KV<Integer, String>> nameTable =
-        createInput(p, idToName);
+        createInput("CreateIdToName", p, idToName);
 
     PCollection<KV<Integer, CoGbkResult>> coGbkResults =
         KeyedPCollectionTuple.of(namesTag, nameTable)
@@ -233,19 +233,19 @@ private PCollection<KV<Integer, CoGbkResult>> buildPurchasesCoGbkWithWindowing(
             KV.of(2, "House t10"));
 
     PCollection<KV<Integer, String>> clicksTable =
-        createInput(
+        createInput("CreateClicks",
             p,
             idToClick,
             Arrays.asList(0L, 2L, 4L, 6L, 8L))
-        .apply(Window.<KV<Integer, String>>into(
+        .apply("WindowClicks", Window.<KV<Integer, String>>into(
             FixedWindows.of(new Duration(4))));
 
     PCollection<KV<Integer, String>> purchasesTable =
-        createInput(
+        createInput("CreatePurchases",
             p,
             idToPurchases,
             Arrays.asList(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L))
-        .apply(Window.<KV<Integer, String>>into(
+        .apply("WindowPurchases", Window.<KV<Integer, String>>into(
             FixedWindows.of(new Duration(4))));
 
     PCollection<KV<Integer, CoGbkResult>> coGbkResults =
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
index 09411a8ec6a35..9a28c2c63bb15 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
@@ -165,12 +165,12 @@ public void testMergingWindowing() {
   @Category(RunnableOnService.class)
   public void testWindowPreservation() {
     Pipeline p = TestPipeline.create();
-    PCollection<String> input1 = p.apply(
+    PCollection<String> input1 = p.apply("Create12",
         Create.timestamped(
             TimestampedValue.of("a", new Instant(1)),
             TimestampedValue.of("b", new Instant(2))));
 
-    PCollection<String> input2 = p.apply(
+    PCollection<String> input2 = p.apply("Create34",
         Create.timestamped(
             TimestampedValue.of("a", new Instant(3)),
             TimestampedValue.of("b", new Instant(4))));

From 73acdfb86060bb74b56b26688077dae1514c5a3a Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Fri, 5 Jun 2015 10:19:34 -0700
Subject: [PATCH 0618/1541] Caches the Content-Encoding of GCS files

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95301952
---
 .../gcsio/GoogleCloudStorageReadChannel.java  | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
index 0c6b0db6dedce..2e6c1000ba5f4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
@@ -573,22 +573,18 @@ protected FileEncoding getEncoding(StorageObject metadata) {
   /**
    * Set the size of the content.
    *
-   * <p>First, we look at the object's metadata.  If no value exists, then we
-   * examine the Content-Length header.  If neither exists, we then look for and parse the
-   * Content-Range header. If there is no way to determine the content length, an exception
-   * is thrown. If the Content-Length header is present, then the offset is added to this
+   * <p>First, we examine the Content-Length header.  If it does not exists, we then look for and
+   * parse the Content-Range header. If there is no way to determine the content length, an
+   * exception is thrown. If the Content-Length header is present, then the offset is added to this
    * value (i.e., offset is the number of bytes that were not requested).
    *
    * @param response response to parse.
    * @param offset the number of bytes that were not requested.
    * @throws IOException on IO error.
    */
-  protected void setSize(StorageObject metadata, HttpResponse response, long offset)
-      throws IOException {
+  protected void setSize(HttpResponse response, long offset) throws IOException {
     String contentRange = response.getHeaders().getContentRange();
-    if (metadata.getSize() != null) {
-      size = metadata.getSize().longValue();
-    } else if (response.getHeaders().getContentLength() != null) {
+    if (response.getHeaders().getContentLength() != null) {
       size = response.getHeaders().getContentLength() + offset;
     } else if (contentRange != null) {
       String sizeStr = SLASH.split(contentRange)[1];
@@ -616,8 +612,10 @@ protected void setSize(StorageObject metadata, HttpResponse response, long offse
    */
   protected InputStream openStreamAndSetMetadata(long newPosition)
       throws IOException {
-    StorageObject metadata = getMetadata();
-    fileEncoding = getEncoding(metadata);
+    if (fileEncoding == FileEncoding.UNINITIALIZED) {
+      StorageObject metadata = getMetadata();
+      fileEncoding = getEncoding(metadata);
+    }
     validatePosition(newPosition);
     Storage.Objects.Get getObject = gcs.objects().get(bucketName, objectName);
     // Set the range on the existing request headers that may have been initialized with things
@@ -653,8 +651,10 @@ protected InputStream openStreamAndSetMetadata(long newPosition)
     InputStream content = response.getContent();
     // If the file is gzip encoded, we requested the entire file and need to seek in the content
     // to the desired position.  If it is not, we only requested the bytes we haven't read.
-    setSize(metadata, response, fileEncoding == FileEncoding.GZIPPED ? 0 : newPosition);
-    content.skip(fileEncoding == FileEncoding.GZIPPED ? newPosition : 0);
+    setSize(response, fileEncoding == FileEncoding.GZIPPED ? 0 : newPosition);
+    if (fileEncoding == FileEncoding.GZIPPED) {
+      content.skip(newPosition);
+    }
 
     return content;
   }

From 88693d2ee07d54735dfa562f8a019f78955b5cee Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 5 Jun 2015 10:49:53 -0700
Subject: [PATCH 0619/1541] Remove PTransform#withName and PTransform#setName

The name of a PTransform is now immutable.

----Release Notes----

Remove withName and setName. from PTransform. The name of a transform is
now immutable after construction. Library transforms (like Combine) can
provide builder-like methods to change the name. Names can always be
overridden at the location where the transform is applied using
apply("name", transform).

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95304725
---
 .../google/cloud/dataflow/examples/TfIdf.java | 18 ++--
 .../dataflow/sdk/transforms/Combine.java      | 97 +++++++------------
 .../cloud/dataflow/sdk/transforms/Create.java | 27 +++---
 .../cloud/dataflow/sdk/transforms/Max.java    | 29 ++----
 .../cloud/dataflow/sdk/transforms/Mean.java   |  8 +-
 .../cloud/dataflow/sdk/transforms/Min.java    | 29 ++----
 .../dataflow/sdk/transforms/PTransform.java   | 19 +---
 .../cloud/dataflow/sdk/transforms/Sum.java    | 29 ++----
 .../cloud/dataflow/sdk/transforms/Top.java    | 18 ++--
 .../join/KeyedPCollectionTuple.java           | 14 ++-
 .../sdk/transforms/windowing/Window.java      |  2 +-
 11 files changed, 99 insertions(+), 191 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
index e782d65308838..c51a9d8a2af27 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
@@ -190,7 +190,7 @@ public PCollection<KV<URI, String>> apply(PInput input) {
         PCollection<KV<URI, String>> oneUriToLines = pipeline
             .apply(TextIO.Read.from(uriString)
                 .named("TextIO.Read(" + uriString + ")"))
-            .apply(WithKeys.<URI, String>of(uri).setName("WithKeys(" + uriString + ")"));
+            .apply("WithKeys(" + uriString + ")", WithKeys.<URI, String>of(uri));
 
         urisToLines = urisToLines.and(oneUriToLines);
       }
@@ -220,8 +220,8 @@ public PCollection<KV<String, KV<URI, Double>>> apply(
       // use as a side input.
       final PCollectionView<Long> totalDocuments =
           uriToContent
-          .apply(Keys.<URI>create().setName("GetURIs"))
-          .apply(RemoveDuplicates.<URI>create().setName("RemoveDuplicateDocs"))
+          .apply("GetURIs", Keys.<URI>create())
+          .apply("RemoveDuplicateDocs", RemoveDuplicates.<URI>create())
           .apply(Count.<URI>globally())
           .apply(View.<Long>asSingleton());
 
@@ -252,21 +252,21 @@ public void processElement(ProcessContext c) {
       // Compute a mapping from each word to the total
       // number of documents in which it appears.
       PCollection<KV<String, Long>> wordToDocCount = uriToWords
-          .apply(RemoveDuplicates.<KV<URI, String>>create().setName("RemoveDuplicateWords"))
+          .apply("RemoveDuplicateWords", RemoveDuplicates.<KV<URI, String>>create())
           .apply(Values.<String>create())
-          .apply(Count.<String>perElement().setName("CountDocs"));
+          .apply("CountDocs", Count.<String>perElement());
 
       // Compute a mapping from each URI to the total
       // number of words in the document associated with that URI.
       PCollection<KV<URI, Long>> uriToWordTotal = uriToWords
-          .apply(Keys.<URI>create().setName("GetURIs2"))
-          .apply(Count.<URI>perElement().setName("CountWords"));
+          .apply("GetURIs2", Keys.<URI>create())
+          .apply("CountWords", Count.<URI>perElement());
 
       // Count, for each (URI, word) pair, the number of
       // occurrences of that word in the document associated
       // with the URI.
       PCollection<KV<KV<URI, String>, Long>> uriAndWordToCount = uriToWords
-          .apply(Count.<KV<URI, String>>perElement().setName("CountWordDocPairs"));
+          .apply("CountWordDocPairs", Count.<KV<URI, String>>perElement());
 
       // Adjust the above collection to a mapping from
       // (URI, word) pairs to counts into an isomorphic mapping
@@ -307,7 +307,7 @@ public void processElement(ProcessContext c) {
       // words in that document as well as all the (word, count)
       // pairs for particular words.
       PCollection<KV<URI, CoGbkResult>> uriToWordAndCountAndTotal = coGbkInput
-          .apply(CoGroupByKey.<URI>create().setName("CoGroupByURI"));
+          .apply("CoGroupByUri", CoGroupByKey.<URI>create());
 
       // Compute a mapping from each word to a (URI, term frequency)
       // pair for each URI. A word's term frequency for a document
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index ac7626d2f0140..028c2efd06b04 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -98,7 +98,7 @@ public static <V> Globally<V, V> globally(
    */
   public static <InputT, OutputT> Globally<InputT, OutputT> globally(
       CombineFn<? super InputT, ?, OutputT> fn) {
-    return new Globally<>(fn, true, 0);
+    return new Globally<>("Globally", fn, true, 0);
   }
 
   /**
@@ -161,7 +161,7 @@ public static <K, InputT, OutputT> PerKey<K, InputT, OutputT> perKey(
    */
   public static <K, InputT, OutputT> PerKey<K, InputT, OutputT> perKey(
       KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
-    return new PerKey<>(fn, false /*fewKeys*/);
+    return new PerKey<>("PerKey", fn, false /*fewKeys*/);
   }
 
   /**
@@ -170,7 +170,7 @@ public static <K, InputT, OutputT> PerKey<K, InputT, OutputT> perKey(
    */
   private static <K, InputT, OutputT> PerKey<K, InputT, OutputT> fewKeys(
       KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
-    return new PerKey<>(fn, true /*fewKeys*/);
+    return new PerKey<>("PerKey", fn, true /*fewKeys*/);
   }
 
   /**
@@ -1208,22 +1208,20 @@ public static class Globally<InputT, OutputT>
     private final boolean insertDefault;
     private final int fanout;
 
-    private Globally(CombineFn<? super InputT, ?, OutputT> fn, boolean insertDefault, int fanout) {
+    private Globally(
+        String name, CombineFn<? super InputT, ?, OutputT> fn, boolean insertDefault, int fanout) {
+      super(name);
       this.fn = fn;
       this.insertDefault = insertDefault;
       this.fanout = fanout;
     }
 
-    @Override
-    public Globally<InputT, OutputT> setName(String name) {
-      super.setName(name);
-      return this;
-    }
-
-    @Override
-    @Deprecated
-    public Globally<InputT, OutputT> withName(String name) {
-      return setName(name);
+    /**
+     * Return a new {@code Globally} transform that's like this transform but with the
+     * specified name. Does not modify this transform.
+     */
+    public Globally<InputT, OutputT> named(String name) {
+      return new Globally<>(name, fn, insertDefault, fanout);
     }
 
     /**
@@ -1242,7 +1240,7 @@ public GloballyAsSingletonView<InputT, OutputT> asSingletonView() {
      * provide a default value in the case of empty input.
      */
     public Globally<InputT, OutputT> withoutDefaults() {
-      return new Globally<>(fn, false, fanout);
+      return new Globally<>(name, fn, false, fanout);
     }
 
     /**
@@ -1253,7 +1251,7 @@ public Globally<InputT, OutputT> withoutDefaults() {
      * that will be used.
      */
     public Globally<InputT, OutputT> withFanout(int fanout) {
-      return new Globally<>(fn, insertDefault, fanout);
+      return new Globally<>(name, fn, insertDefault, fanout);
     }
 
     @Override
@@ -1364,17 +1362,6 @@ private GloballyAsSingletonView(
       this.fanout = fanout;
     }
 
-    @Override
-    public GloballyAsSingletonView<InputT, OutputT> setName(String name) {
-      super.setName(name);
-      return this;
-    }
-
-    @Override
-    @Deprecated
-    public GloballyAsSingletonView<InputT, OutputT> withName(String name) {
-      return setName(name);
-    }
 
     @Override
     public PCollectionView<OutputT> apply(PCollection<InputT> input) {
@@ -1503,12 +1490,21 @@ public static class PerKey<K, InputT, OutputT>
     private final boolean fewKeys;
 
     private PerKey(
-        KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn,
+        String name, KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn,
         boolean fewKeys) {
+      super(name);
       this.fn = fn;
       this.fewKeys = fewKeys;
     }
 
+    /**
+     * Return a new {@code Globally} transform that's like this transform but with the
+     * specified name. Does not modify this transform.
+     */
+    public PerKey<K, InputT, OutputT> named(String name) {
+      return new PerKey<K, InputT, OutputT>(name, fn, fewKeys);
+    }
+
     /**
      * If a single key has disproportionately many values, it may become a
      * bottleneck, especially in streaming mode.  This returns a new per-key
@@ -1522,7 +1518,7 @@ private PerKey(
      */
     public PerKeyWithHotKeyFanout<K, InputT, OutputT> withHotKeyFanout(
         SerializableFunction<? super K, Integer> hotKeyFanout) {
-      return new PerKeyWithHotKeyFanout<K, InputT, OutputT>(fn, hotKeyFanout).setName(name);
+      return new PerKeyWithHotKeyFanout<K, InputT, OutputT>(name, fn, hotKeyFanout);
     }
 
     /**
@@ -1539,18 +1535,6 @@ public Integer apply(K unused) {
           });
     }
 
-    @Override
-    public PerKey<K, InputT, OutputT> setName(String name) {
-      super.setName(name);
-      return this;
-    }
-
-    @Override
-    @Deprecated
-    public PerKey<K, InputT, OutputT> withName(String name) {
-      return setName(name);
-    }
-
     /**
      * Returns the KeyedCombineFn used by this Combine operation.
      */
@@ -1580,26 +1564,14 @@ public static class PerKeyWithHotKeyFanout<K, InputT, OutputT>
     private final transient KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn;
     private final SerializableFunction<? super K, Integer> hotKeyFanout;
 
-    private PerKeyWithHotKeyFanout(
+    private PerKeyWithHotKeyFanout(String name,
         KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn,
         SerializableFunction<? super K, Integer> hotKeyFanout) {
+      super(name);
       this.fn = fn;
       this.hotKeyFanout = hotKeyFanout;
     }
 
-    @Override
-    @SuppressWarnings("unchecked")
-    public PerKeyWithHotKeyFanout<K, InputT, OutputT> setName(String name) {
-      return (PerKeyWithHotKeyFanout<K, InputT, OutputT>) super.setName(name);
-    }
-
-    @Override
-    @Deprecated
-    @SuppressWarnings("unchecked")
-    public PerKeyWithHotKeyFanout<K, InputT, OutputT> withName(String name) {
-      return setName(name);
-    }
-
     @Override
     public PCollection<KV<K, OutputT>> apply(PCollection<KV<K, InputT>> input) {
       return applyHelper(input);
@@ -1679,7 +1651,7 @@ public Coder<OutputT> getDefaultOutputCoder(
       final TupleTag<KV<KV<K, Integer>, InputT>> hot = new TupleTag<>();
       final TupleTag<KV<K, InputT>> cold = new TupleTag<>();
       PCollectionTuple split = input.apply(
-          ParDo.of(
+          ParDo.named("AddNonce").of(
               new DoFn<KV<K, InputT>, KV<K, InputT>>() {
                 transient int counter;
                 @Override
@@ -1700,28 +1672,27 @@ public void processElement(ProcessContext c) {
                   }
                 }
               })
-          .withOutputTags(cold, TupleTagList.of(hot))
-          .setName("AddNonce"));
+          .withOutputTags(cold, TupleTagList.of(hot)));
 
       // Combine the hot and cold keys separately.
       PCollection<KV<K, OutputT>> combinedHot = split
           .get(hot)
           .setCoder(KvCoder.of(KvCoder.of(inputCoder.getKeyCoder(), VarIntCoder.of()),
                                inputCoder.getValueCoder()))
-          .apply(Combine.perKey(hotPreCombine).withName("PreCombineHot"))
-          .apply(ParDo.of(
+          .apply("PreCombineHot", Combine.perKey(hotPreCombine))
+          .apply(ParDo.named("StripNonce").of(
               new DoFn<KV<KV<K, Integer>, AccumT>, KV<K, AccumT>>() {
                 @Override
                 public void processElement(ProcessContext c) {
                   c.output(KV.of(c.element().getKey().getKey(), c.element().getValue()));
                 }
-              }).setName("StripNonce"))
+              }))
           .apply(Window.<KV<K, AccumT>>remerge())
-          .apply(Combine.perKey(hotPostCombine).withName("PostCombineHot"));
+          .apply("PostCombineHot", Combine.perKey(hotPostCombine));
       PCollection<KV<K, OutputT>> combinedCold = split
           .get(cold)
           .setCoder(inputCoder)
-          .apply(Combine.perKey(fn).withName("CombineCold"));
+          .apply("CombineCold", Combine.perKey(fn));
 
       // Return the union of the hot and cold key results.
       return PCollectionList.of(combinedHot).and(combinedCold)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index 6fed70d4d3baf..defc99a015a64 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -304,10 +304,9 @@ public void processElement(DoFn<String, KV<Void, Void>>.ProcessContext c)
                   c.output(KV.of((Void) null, (Void) null));
                 }
               }))
-              .apply(Window.<KV<Void, Void>>into(new GlobalWindows())
+              .apply("GlobalSingleton", Window.<KV<Void, Void>>into(new GlobalWindows())
                            .triggering(AfterPane.elementCountAtLeast(1))
-                           .discardingFiredPanes()
-                           .setName("GlobalSingleton"))
+                           .discardingFiredPanes())
               .apply(GroupByKey.<Void, Void>create())
               .apply(Window.<KV<Void, Iterable<Void>>>into(new GlobalWindows()))
               .apply(ParDo.of(new OutputElements<>(elems, coder)));
@@ -339,12 +338,16 @@ public void processElement(DoFn<String, KV<Void, Void>>.ProcessContext c)
      *
      * <p> The arguments should not be modified after this is called.
      */
-    private Values(Iterable<T> elems, Optional<Coder<T>> coder) {
-      super("CreateValues");
+    private Values(String name, Iterable<T> elems, Optional<Coder<T>> coder) {
+      super(name);
       this.elems = elems;
       this.coder = coder;
     }
 
+    private Values(Iterable<T> elems, Optional<Coder<T>> coder) {
+      this("CreateValues", elems, coder);
+    }
+
     /**
      * A {@link DoFn} which outputs the specified elements by first encoding them to bytes using
      * the specified {@link Coder} so that they are serialized part of the {@link DoFn}.
@@ -425,13 +428,13 @@ public PCollection<T> apply(PInput input) {
 
     private TimestampedValues(Iterable<TimestampedValue<T>> elems,
         Optional<Coder<T>> coder) {
-      super(Iterables.transform(elems, new Function<TimestampedValue<T>, T>() {
-        @Override
-        public T apply(TimestampedValue<T> input) {
-          return input.getValue();
-        }
-      }), coder);
-      setName("CreateTimestmapedValues");
+      super("CreateTimestampedValues",
+          Iterables.transform(elems, new Function<TimestampedValue<T>, T>() {
+            @Override
+            public T apply(TimestampedValue<T> input) {
+              return input.getValue();
+            }
+          }), coder);
       this.elems = elems;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
index 09370950dec1b..c1cfe3d3bbcbe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
@@ -49,10 +49,7 @@ public class Max {
    * {@code Integer.MIN_VALUE} if there are no elements.
    */
   public static Combine.Globally<Integer, Integer> integersGlobally() {
-    Combine.Globally<Integer, Integer> combine = Combine
-        .globally(new MaxIntegerFn());
-    combine.setName("Max");
-    return combine;
+    return Combine.globally(new MaxIntegerFn()).named("Max");
   }
 
   /**
@@ -66,10 +63,7 @@ public static Combine.Globally<Integer, Integer> integersGlobally() {
    * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
   public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() {
-    Combine.PerKey<K, Integer, Integer> combine = Combine
-        .perKey(new MaxIntegerFn());
-    combine.setName("Max.PerKey");
-    return combine;
+    return Combine.<K, Integer, Integer>perKey(new MaxIntegerFn()).named("Max.PerKey");
   }
 
   /**
@@ -80,9 +74,7 @@ public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() {
    * {@code Long.MIN_VALUE} if there are no elements.
    */
   public static Combine.Globally<Long, Long> longsGlobally() {
-    Combine.Globally<Long, Long> combine = Combine.globally(new MaxLongFn());
-    combine.setName("Max");
-    return combine;
+    return Combine.globally(new MaxLongFn()).named("Max");
   }
 
   /**
@@ -96,10 +88,7 @@ public static Combine.Globally<Long, Long> longsGlobally() {
    * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
   public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
-    Combine.PerKey<K, Long, Long> combine = Combine
-        .perKey(new MaxLongFn());
-    combine.setName("Max.PerKey");
-    return combine;
+    return Combine.<K, Long, Long>perKey(new MaxLongFn()).named("Max.PerKey");
   }
 
   /**
@@ -110,10 +99,7 @@ public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
    * {@code Double.NEGATIVE_INFINITY} if there are no elements.
    */
   public static Combine.Globally<Double, Double> doublesGlobally() {
-    Combine.Globally<Double, Double> combine = Combine
-        .globally(new MaxDoubleFn());
-    combine.setName("Max");
-    return combine;
+    return Combine.globally(new MaxDoubleFn()).named("Max");
   }
 
   /**
@@ -127,10 +113,7 @@ public static Combine.Globally<Double, Double> doublesGlobally() {
    * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
   public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
-    Combine.PerKey<K, Double, Double> combine = Combine
-        .perKey(new MaxDoubleFn());
-    combine.setName("Max.PerKey");
-    return combine;
+    return Combine.<K, Double, Double>perKey(new MaxDoubleFn()).named("Max.PerKey");
   }
 
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
index f85fe9b1414fa..27e23d9ce7a69 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
@@ -59,9 +59,7 @@ public class Mean {
    * @param <NumT> the type of the {@code Number}s being combined
    */
   public static <NumT extends Number> Combine.Globally<NumT, Double> globally() {
-    Combine.Globally<NumT, Double> combine = Combine.globally(new MeanFn<>());
-    combine.setName("Mean");
-    return combine;
+    return Combine.<NumT, Double>globally(new MeanFn<>()).named("Mean");
   }
 
   /**
@@ -78,9 +76,7 @@ public static <NumT extends Number> Combine.Globally<NumT, Double> globally() {
    * @param <NumT> the type of the {@code Number}s being combined
    */
   public static <K, NumT extends Number> Combine.PerKey<K, NumT, Double> perKey() {
-    Combine.PerKey<K, NumT, Double> combine = Combine.perKey(new MeanFn<>());
-    combine.setName("Mean.PerKey");
-    return combine;
+    return Combine.<K, NumT, Double>perKey(new MeanFn<>()).named("Mean.PerKey");
   }
 
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
index 3c26495e64d31..6230f93e7f9f0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
@@ -49,10 +49,7 @@ public class Min {
    * {@code Integer.MAX_VALUE} if there are no elements.
    */
   public static Combine.Globally<Integer, Integer> integersGlobally() {
-    Combine.Globally<Integer, Integer> combine = Combine
-        .globally(new MinIntegerFn());
-    combine.setName("Min");
-    return combine;
+    return Combine.globally(new MinIntegerFn()).named("Min");
   }
 
   /**
@@ -66,10 +63,7 @@ public static Combine.Globally<Integer, Integer> integersGlobally() {
    * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
   public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() {
-    Combine.PerKey<K, Integer, Integer> combine = Combine
-        .perKey(new MinIntegerFn());
-    combine.setName("Min.PerKey");
-    return combine;
+    return Combine.<K, Integer, Integer>perKey(new MinIntegerFn()).named("Min.PerKey");
   }
 
   /**
@@ -80,9 +74,7 @@ public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() {
    * {@code Long.MAX_VALUE} if there are no elements.
    */
   public static Combine.Globally<Long, Long> longsGlobally() {
-    Combine.Globally<Long, Long> combine = Combine.globally(new MinLongFn());
-    combine.setName("Min");
-    return combine;
+    return Combine.globally(new MinLongFn()).named("Min");
   }
 
   /**
@@ -96,10 +88,7 @@ public static Combine.Globally<Long, Long> longsGlobally() {
    * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
   public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
-    Combine.PerKey<K, Long, Long> combine = Combine
-        .perKey(new MinLongFn());
-    combine.setName("Min.PerKey");
-    return combine;
+   return Combine.<K, Long, Long>perKey(new MinLongFn()).named("Min.PerKey");
   }
 
   /**
@@ -110,10 +99,7 @@ public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
    * {@code Double.POSITIVE_INFINITY} if there are no elements.
    */
   public static Combine.Globally<Double, Double> doublesGlobally() {
-    Combine.Globally<Double, Double> combine = Combine
-        .globally(new MinDoubleFn());
-    combine.setName("Min");
-    return combine;
+    return Combine.globally(new MinDoubleFn()).named("Min");
   }
 
   /**
@@ -127,10 +113,7 @@ public static Combine.Globally<Double, Double> doublesGlobally() {
    * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
   public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
-    Combine.PerKey<K, Double, Double> combine = Combine
-        .perKey(new MinDoubleFn());
-    combine.setName("Min.PerKey");
-    return combine;
+    return Combine.<K, Double, Double>perKey(new MinDoubleFn()).named("Min.PerKey");
   }
 
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
index 83f70512d4ae2..34b34cbf0894e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
@@ -202,23 +202,6 @@ public OutputT apply(InputT input) {
    */
   public void validate(InputT input) { }
 
-  /**
-   * Sets the base name of this {@code PTransform}.
-   * Returns {@code this} for method chaining.
-   */
-  public PTransform<InputT, OutputT> setName(String name) {
-    this.name = name;
-    return this;
-  }
-
-  /**
-   * @deprecated Use {@link #setName}, which has been modified to return {@code this}.
-   */
-  @Deprecated
-  public PTransform<InputT, OutputT> withName(String name) {
-    return setName(name);
-  }
-
   /**
    * Returns the transform name.
    *
@@ -238,7 +221,7 @@ public String getName() {
    * {@link ParDo#named(String)}, or from defaults, or {@code null} if not
    * yet assigned.
    */
-  protected transient String name;
+  protected final transient String name;
 
   protected PTransform() {
     this.name = null;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
index 39cd7a94a48ee..84b5e701f7471 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
@@ -49,10 +49,7 @@ public class Sum {
    * {@code 0} if there are no elements.
    */
   public static Combine.Globally<Integer, Integer> integersGlobally() {
-    Combine.Globally<Integer, Integer> combine = Combine
-        .globally(new SumIntegerFn());
-    combine.setName("Sum");
-    return combine;
+    return Combine.globally(new SumIntegerFn()).named("Sum");
   }
 
   /**
@@ -64,10 +61,7 @@ public static Combine.Globally<Integer, Integer> integersGlobally() {
    * that key in the input {@code PCollection}.
    */
   public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() {
-    Combine.PerKey<K, Integer, Integer> combine = Combine
-        .perKey(new SumIntegerFn());
-    combine.setName("Sum.PerKey");
-    return combine;
+    return Combine.<K, Integer, Integer>perKey(new SumIntegerFn()).named("Sum.PerKey");
   }
 
   /**
@@ -78,9 +72,7 @@ public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() {
    * {@code 0} if there are no elements.
    */
   public static Combine.Globally<Long, Long> longsGlobally() {
-    Combine.Globally<Long, Long> combine = Combine.globally(new SumLongFn());
-    combine.setName("Sum");
-    return combine;
+    return Combine.globally(new SumLongFn()).named("Sum");
   }
 
   /**
@@ -92,10 +84,7 @@ public static Combine.Globally<Long, Long> longsGlobally() {
    * that key in the input {@code PCollection}.
    */
   public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
-    Combine.PerKey<K, Long, Long> combine = Combine
-        .perKey(new SumLongFn());
-    combine.setName("Sum.PerKey");
-    return combine;
+    return Combine.<K, Long, Long>perKey(new SumLongFn()).named("Sum.PerKey");
   }
 
   /**
@@ -106,10 +95,7 @@ public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
    * {@code 0} if there are no elements.
    */
   public static Combine.Globally<Double, Double> doublesGlobally() {
-    Combine.Globally<Double, Double> combine = Combine
-        .globally(new SumDoubleFn());
-    combine.setName("Sum");
-    return combine;
+    return Combine.globally(new SumDoubleFn()).named("Sum");
   }
 
   /**
@@ -121,10 +107,7 @@ public static Combine.Globally<Double, Double> doublesGlobally() {
    * that key in the input {@code PCollection}.
    */
   public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
-    Combine.PerKey<K, Double, Double> combine = Combine
-        .perKey(new SumDoubleFn());
-    combine.setName("Sum.PerKey");
-    return combine;
+    return Combine.<K, Double, Double>perKey(new SumDoubleFn()).named("Sum.PerKey");
   }
 
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index 3a66c2de99e4a..16f95afc2b6df 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -81,8 +81,7 @@ public class Top {
    */
   public static <T, ComparatorT extends Comparator<T> & Serializable>
       Combine.Globally<T, List<T>> of(int count, ComparatorT compareFn) {
-    return Combine.globally(new TopCombineFn<>(count, compareFn))
-        .setName("Top");
+    return Combine.globally(new TopCombineFn<>(count, compareFn)).named("Top");
   }
 
   /**
@@ -121,8 +120,7 @@ Combine.Globally<T, List<T>> of(int count, ComparatorT compareFn) {
    */
   public static <T extends Comparable<T>>
       Combine.Globally<T, List<T>> smallest(int count) {
-    return Combine.globally(new TopCombineFn<>(count, new Smallest<T>()))
-        .setName("Top.Smallest");
+    return Combine.globally(new TopCombineFn<>(count, new Smallest<T>())).named("Top.Smallest");
   }
 
   /**
@@ -161,8 +159,7 @@ Combine.Globally<T, List<T>> smallest(int count) {
    */
   public static <T extends Comparable<T>>
       Combine.Globally<T, List<T>> largest(int count) {
-    return Combine.globally(new TopCombineFn<>(count, new Largest<T>()))
-        .setName("Top.Largest");
+    return Combine.globally(new TopCombineFn<>(count, new Largest<T>())).named("Top.Largest");
   }
 
   /**
@@ -210,8 +207,7 @@ Combine.Globally<T, List<T>> largest(int count) {
       PTransform<PCollection<KV<K, V>>, PCollection<KV<K, List<V>>>>
       perKey(int count, ComparatorT compareFn) {
     return Combine.perKey(
-        new TopCombineFn<>(count, compareFn).<K>asKeyedFn())
-        .setName("Top.PerKey");
+        new TopCombineFn<>(count, compareFn).<K>asKeyedFn()).named("Top.PerKey");
   }
 
   /**
@@ -258,8 +254,7 @@ Combine.Globally<T, List<T>> largest(int count) {
       PTransform<PCollection<KV<K, V>>, PCollection<KV<K, List<V>>>>
       smallestPerKey(int count) {
     return Combine.perKey(
-        new TopCombineFn<>(count, new Smallest<V>()).<K>asKeyedFn())
-        .setName("Top.SmallestPerKey");
+        new TopCombineFn<>(count, new Smallest<V>()).<K>asKeyedFn()).named("Top.SmallestPerKey");
   }
 
   /**
@@ -306,8 +301,7 @@ Combine.Globally<T, List<T>> largest(int count) {
       PerKey<K, V, List<V>>
       largestPerKey(int count) {
     return Combine.perKey(
-        new TopCombineFn<>(count, new Largest<V>()).<K>asKeyedFn())
-        .setName("Top.LargestPerKey");
+        new TopCombineFn<>(count, new Largest<V>()).<K>asKeyedFn()).named("Top.LargestPerKey");
   }
 
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
index 3510d0c40e76e..abfbe08b28599 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
@@ -97,13 +97,25 @@ public boolean isEmpty() {
   }
 
   /**
-   * Applies the given {@link PTransform} to this input.
+   * Like {@link #apply(String, PTransform)} but defaulting to the name
+   * provided by the {@link PTransform}.
    */
   public <OutputT extends POutput> OutputT apply(
       PTransform<KeyedPCollectionTuple<K>, OutputT> transform) {
     return Pipeline.applyTransform(this, transform);
   }
 
+  /**
+   * Applies the given {@link PTransform} to this input {@code KeyedPCollectionTuple} and returns
+   * its {@code OutputT}. This uses {@code name} to identify the specific application of
+   * the transform. This name is used in various places, including the monitoring UI,
+   * logging, and to stably identify this application node in the job graph.
+   */
+  public <OutputT extends POutput> OutputT apply(
+      String name, PTransform<KeyedPCollectionTuple<K>, OutputT> transform) {
+    return Pipeline.applyTransform(name, this, transform);
+  }
+
   /**
    * Expands the component {@link PCollection PCollections}, stripping off
    * any tag-specific information.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 1837b9f1490bc..8e133b5bf49ef 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -224,7 +224,7 @@ public static class Bound<T> extends PTransform<PCollection<T>, PCollection<T>>
     WindowingStrategy<? super T, ?> windowingStrategy;
 
     Bound(String name, WindowingStrategy<? super T, ?> windowingStrategy) {
-      this.name = name;
+      super(name);
       this.windowingStrategy = windowingStrategy;
     }
 

From 7a3b0790348d2077544fa683855a23b2b6055698 Mon Sep 17 00:00:00 2001
From: drieber <drieber@google.com>
Date: Fri, 5 Jun 2015 12:04:43 -0700
Subject: [PATCH 0620/1541] DataflowWorker.handleWorkError should use
 nextReportIndex

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95311954
---
 .../sdk/runners/worker/DataflowWorker.java    | 27 +++++++++++--------
 .../runners/worker/DataflowWorkerTest.java    | 15 +++++++----
 2 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 2cabaaa6b5992..86bd08110cc7c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -104,6 +104,7 @@ private boolean doWork(WorkItem workItem) throws IOException {
     LOG.debug("Executing: {}", workItem);
 
     WorkExecutor worker = null;
+    long nextReportIndex = workItem.getInitialReportIndex();
     try {
       // Populate PipelineOptions with data from work unit.
       options.setProject(workItem.getProjectId());
@@ -122,8 +123,12 @@ private boolean doWork(WorkItem workItem) throws IOException {
 
       DataflowWorkProgressUpdater progressUpdater =
           new DataflowWorkProgressUpdater(workItem, worker, workUnitClient, options);
-
-      executeWork(worker, progressUpdater);
+      try {
+        executeWork(worker, progressUpdater);
+      } finally {
+        // Grab nextReportIndex so we can use it in handleWorkError if there is an exception.
+        nextReportIndex = progressUpdater.getNextReportIndex();
+      }
 
       // Log all counter values for debugging purposes.
       CounterSet counters = worker.getOutputCounters();
@@ -147,12 +152,12 @@ private boolean doWork(WorkItem workItem) throws IOException {
               : null;
       reportStatus(
           options, "Success", workItem, counters, metrics, operationResponse, null/*errors*/,
-          progressUpdater.getNextReportIndex());
+          nextReportIndex);
 
       return true;
 
     } catch (Throwable e) {
-      handleWorkError(workItem, worker, e);
+      handleWorkError(workItem, worker, nextReportIndex, e);
       return false;
 
     } finally {
@@ -182,8 +187,8 @@ void executeWork(WorkExecutor worker, DataflowWorkProgressUpdater progressUpdate
 
 
   /** Handles the exception thrown when reading and executing the work. */
-  private void handleWorkError(WorkItem workItem, WorkExecutor worker, Throwable e)
-      throws IOException {
+  private void handleWorkError(WorkItem workItem, WorkExecutor worker, long nextReportIndex,
+      Throwable e) throws IOException {
     LOG.warn("Uncaught exception occurred during work unit execution:", e);
 
     // TODO: Look into moving the stack trace thinning
@@ -196,13 +201,13 @@ private void handleWorkError(WorkItem workItem, WorkExecutor worker, Throwable e
 
     reportStatus(options, "Failure", workItem, worker == null ? null : worker.getOutputCounters(),
         worker == null ? null : worker.getOutputMetrics(), null/*sourceOperationResponse*/,
-        error == null ? null : Collections.singletonList(error), 0);
+        error == null ? null : Collections.singletonList(error), nextReportIndex);
   }
 
   private void reportStatus(DataflowWorkerHarnessOptions options, String status, WorkItem workItem,
       @Nullable CounterSet counters, @Nullable Collection<Metric<?>> metrics,
       @Nullable SourceFormat.OperationResponse operationResponse, @Nullable List<Status> errors,
-      long finalReportIndex)
+      long reportIndex)
       throws IOException {
     String message = "{} processing work item {}";
     if (null != errors && errors.size() > 0) {
@@ -211,7 +216,7 @@ private void reportStatus(DataflowWorkerHarnessOptions options, String status, W
       LOG.debug(message, status, uniqueId(workItem));
     }
     WorkItemStatus workItemStatus = buildStatus(workItem, true/*completed*/, counters, metrics,
-        options, null, null, operationResponse, errors, finalReportIndex);
+        options, null, null, operationResponse, errors, reportIndex);
     workUnitClient.reportWorkItemStatus(workItemStatus);
   }
 
@@ -220,11 +225,11 @@ static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
       DataflowWorkerHarnessOptions options, @Nullable Reader.Progress progress,
       @Nullable Reader.DynamicSplitResult dynamicSplitResult,
       @Nullable SourceFormat.OperationResponse operationResponse, @Nullable List<Status> errors,
-      long finalReportIndex) {
+      long reportIndex) {
     WorkItemStatus status = new WorkItemStatus();
     status.setWorkItemId(Long.toString(workItem.getId()));
     status.setCompleted(completed);
-    status.setReportIndex(finalReportIndex);
+    status.setReportIndex(reportIndex);
 
     List<MetricUpdate> counterUpdates = null;
     List<MetricUpdate> metricUpdates = null;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
index 15ea00cbc612e..b39470a6f41e1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
@@ -14,6 +14,7 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.mockito.Matchers.argThat;
@@ -79,11 +80,15 @@ public void testWhenNoWorkThatWeReturnFalse() throws Exception {
   @Test
   public void testWhenProcessingWorkUnitFailsWeReportStatus() throws Exception {
     DataflowWorker worker = new DataflowWorker(mockWorkUnitClient, options);
-    when(mockWorkUnitClient.getWorkItem()).thenReturn(
-        new WorkItem().setId(1L).setJobId("Expected to fail the job")).thenReturn(null);
+    // In practice this value is always 1, but for the sake of testing send a different value.
+    long initialReportIndex = 4L;
+    WorkItem workItem = new WorkItem()
+        .setId(1L).setJobId("Expected to fail the job").setInitialReportIndex(initialReportIndex);
+    when(mockWorkUnitClient.getWorkItem()).thenReturn(workItem).thenReturn(null);
 
     assertFalse(worker.getAndPerformWork());
-    verify(mockWorkUnitClient).reportWorkItemStatus(argThat(cloudWorkHasErrors()));
+    verify(mockWorkUnitClient)
+        .reportWorkItemStatus(argThat(cloudWorkHasErrors(initialReportIndex)));
   }
 
   @Test
@@ -104,7 +109,7 @@ public void testStopProgressReportInCaseOfFailure() throws Exception {
       verify(mockProgressUpdater, times(1)).stopReportingProgress();
   }
 
-  private Matcher<WorkItemStatus> cloudWorkHasErrors() {
+  private Matcher<WorkItemStatus> cloudWorkHasErrors(final long expectedReportIndex) {
     return new TypeSafeMatcher<WorkItemStatus>() {
       @Override
       public void describeTo(Description description) {
@@ -113,6 +118,7 @@ public void describeTo(Description description) {
 
       @Override
       protected boolean matchesSafely(WorkItemStatus status) {
+        assertEquals(expectedReportIndex, (long) status.getReportIndex());
         boolean returnValue = status.getCompleted() && !status.getErrors().isEmpty();
         if (returnValue) {
           assertThat(status.getErrors().get(0).getMessage(),
@@ -123,4 +129,3 @@ protected boolean matchesSafely(WorkItemStatus status) {
     };
   }
 }
-

From 38741fe6c8478e932b2900a39e97529d4867f9bf Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 5 Jun 2015 15:50:12 -0700
Subject: [PATCH 0621/1541] Improve user error message when a GCS path is
 expected

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95330886
---
 .../sdk/util/DataflowPathValidator.java       | 16 ++++-
 .../runners/DataflowPipelineRunnerTest.java   | 10 +--
 .../sdk/util/DataflowPathValidatorTest.java   | 71 +++++++++++++++++++
 3 files changed, 89 insertions(+), 8 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidatorTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
index 0ec496fc7ec01..c913d570c32df 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
@@ -28,7 +28,7 @@ public class DataflowPathValidator implements PathValidator {
 
   private DataflowPipelineOptions dataflowOptions;
 
-  private DataflowPathValidator(DataflowPipelineOptions options) {
+  DataflowPathValidator(DataflowPipelineOptions options) {
     this.dataflowOptions = options;
   }
 
@@ -38,7 +38,7 @@ public static DataflowPathValidator fromOptions(PipelineOptions options) {
 
   @Override
   public String validateInputFilePatternSupported(String filepattern) {
-    GcsPath gcsPath = GcsPath.fromUri(filepattern);
+    GcsPath gcsPath = getGcsPath(filepattern);
     Preconditions.checkArgument(
         dataflowOptions.getGcsUtil().isGcsPatternSupported(gcsPath.getObject()));
     return verifyPath(filepattern);
@@ -51,11 +51,21 @@ public String validateOutputFilePrefixSupported(String filePrefix) {
 
   @Override
   public String verifyPath(String path) {
-    GcsPath gcsPath = GcsPath.fromUri(path);
+    GcsPath gcsPath = getGcsPath(path);
     Preconditions.checkArgument(gcsPath.isAbsolute(),
         "Must provide absolute paths for Dataflow");
     Preconditions.checkArgument(!gcsPath.getObject().contains("//"),
         "Dataflow Service does not allow objects with consecutive slashes");
     return gcsPath.toResourceName();
   }
+
+  private GcsPath getGcsPath(String path) {
+    try {
+      return GcsPath.fromUri(path);
+    } catch (IllegalArgumentException e) {
+      throw new IllegalArgumentException(String.format(
+          "%s expected a valid 'gs://' path but was given '%s'",
+          dataflowOptions.getRunner().getSimpleName(), path), e);
+    }
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 39611edb1d945..9504a16b261d1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -342,7 +342,7 @@ public void testNonGcsFilePathInReadFailure() throws IOException {
     p.apply(TextIO.Read.named("ReadMyNonGcsFile").from("/tmp/file"));
 
     thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage(containsString("GCS URI"));
+    thrown.expectMessage(containsString("expected a valid 'gs://' path but was given"));
     p.run();
     assertValidJob(jobCaptor.getValue());
   }
@@ -356,7 +356,7 @@ public void testNonGcsFilePathInWriteFailure() throws IOException {
         .apply(TextIO.Write.named("WriteMyNonGcsFile").to("/tmp/file"));
 
     thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage(containsString("GCS URI"));
+    thrown.expectMessage(containsString("expected a valid 'gs://' path but was given"));
     p.run();
     assertValidJob(jobCaptor.getValue());
   }
@@ -398,7 +398,7 @@ public void testInvalidTempLocation() throws IOException {
     options.setTempLocation("file://temp/location");
 
     thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage(containsString("GCS URI"));
+    thrown.expectMessage(containsString("expected a valid 'gs://' path but was given"));
     DataflowPipelineRunner.fromOptions(options);
     assertValidJob(jobCaptor.getValue());
   }
@@ -413,14 +413,14 @@ public void testInvalidStagingLocation() throws IOException {
       DataflowPipelineRunner.fromOptions(options);
       fail("fromOptions should have failed");
     } catch (IllegalArgumentException e) {
-      assertThat(e.getMessage(), containsString("GCS URI"));
+      assertThat(e.getMessage(), containsString("expected a valid 'gs://' path but was given"));
     }
     options.setStagingLocation("my/staging/location");
     try {
       DataflowPipelineRunner.fromOptions(options);
       fail("fromOptions should have failed");
     } catch (IllegalArgumentException e) {
-      assertThat(e.getMessage(), containsString("GCS URI"));
+      assertThat(e.getMessage(), containsString("expected a valid 'gs://' path but was given"));
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidatorTest.java
new file mode 100644
index 0000000000000..e33d4d6f7dffc
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidatorTest.java
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link DataflowPathValidator}. */
+@RunWith(JUnit4.class)
+public class DataflowPathValidatorTest {
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+
+  private DataflowPathValidator validator;
+
+  @Before
+  public void setUp() {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setGcpCredential(new TestCredential());
+    options.setRunner(DataflowPipelineRunner.class);
+    validator = new DataflowPathValidator(options);
+  }
+
+  @Test
+  public void testValidFilePattern() {
+    validator.validateInputFilePatternSupported("gs://bucket/path");
+  }
+
+  @Test
+  public void testInvalidFilePattern() {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage(
+        "DataflowPipelineRunner expected a valid 'gs://' path but was given '/local/path'");
+    validator.validateInputFilePatternSupported("/local/path");
+  }
+
+  @Test
+  public void testValidOutputPrefix() {
+    validator.validateOutputFilePrefixSupported("gs://bucket/path");
+  }
+
+  @Test
+  public void testInvalidOutputPrefix() {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage(
+        "DataflowPipelineRunner expected a valid 'gs://' path but was given '/local/path'");
+    validator.validateOutputFilePrefixSupported("/local/path");
+  }
+}
+

From 4eab2a1310685acdfccbe83b9ead19ebde423311 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 8 Jun 2015 14:20:31 -0700
Subject: [PATCH 0622/1541] Testing improvements regarding DataflowAssert and
 TestPipeline

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95472634
---
 .../dataflow/sdk/testing/DataflowAssert.java  | 104 +++++++--
 .../testing/TestDataflowPipelineRunner.java   | 157 ++++++++++++-
 .../dataflow/sdk/testing/TestPipeline.java    |  15 +-
 .../TestDataflowPipelineRunnerTest.java       | 210 +++++++++++++++++-
 4 files changed, 432 insertions(+), 54 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 2782aa827791a..aa278484b13bf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -27,12 +27,15 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.MapCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.View;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
@@ -45,6 +48,9 @@
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import java.io.Serializable;
 import java.util.Arrays;
 import java.util.Collection;
@@ -85,6 +91,11 @@
  */
 public class DataflowAssert {
 
+  private static final Logger LOG = LoggerFactory.getLogger(DataflowAssert.class);
+
+  static final String SUCCESS_COUNTER = "DataflowAssertSuccess";
+  static final String FAILURE_COUNTER = "DataflowAssertFailure";
+
   private static int assertCount = 0;
 
   // Do not instantiate.
@@ -454,9 +465,8 @@ public PCollectionView<T> apply(PBegin input) {
    * <p> This is generally useful for assertion functions that
    * are serializable but whose underlying data may not have a coder.
    */
-  private static class OneSideInputAssert<ActualT>
+  static class OneSideInputAssert<ActualT>
       extends PTransform<PBegin, PDone> implements Serializable {
-
     private static final long serialVersionUID = 0;
 
     private final transient PTransform<PBegin, PCollectionView<ActualT>> createActual;
@@ -476,18 +486,42 @@ public PDone apply(PBegin input) {
       input
           .apply(Create.<Void>of((Void) null).withCoder(VoidCoder.of()))
           .apply(ParDo.named("RunChecks").withSideInputs(actual)
-              .of(new DoFn<Void, Void>() {
-                private static final long serialVersionUID = 0;
-
-                @Override
-                public void processElement(DoFn<Void, Void>.ProcessContext c) {
-                  ActualT actualContents = c.sideInput(actual);
-                  checkerFn.apply(actualContents);
-                }
-              }));
+              .of(new CheckerDoFn<>(checkerFn, actual)));
 
       return PDone.in(input.getPipeline());
     }
+
+    private static class CheckerDoFn<ActualT> extends DoFn<Void, Void> {
+      private static final long serialVersionUID = 0;
+      private final SerializableFunction<ActualT, Void> checkerFn;
+      private final Aggregator<Integer, Integer> success =
+          createAggregator(SUCCESS_COUNTER, new Sum.SumIntegerFn());
+      private final Aggregator<Integer, Integer> failure =
+          createAggregator(FAILURE_COUNTER, new Sum.SumIntegerFn());
+      private final PCollectionView<ActualT> actual;
+
+      private CheckerDoFn(final SerializableFunction<ActualT, Void> checkerFn,
+          PCollectionView<ActualT> actual) {
+        this.checkerFn = checkerFn;
+        this.actual = actual;
+      }
+
+      @Override
+      public void processElement(ProcessContext c) {
+        try {
+          ActualT actualContents = c.sideInput(actual);
+          checkerFn.apply(actualContents);
+          success.addValue(1);
+        } catch (Throwable t) {
+          LOG.error("DataflowAssert failed expectations.", t);
+          failure.addValue(1);
+          // TODO: allow for metrics to propagate on failure when running a streaming pipeline
+          if (!c.getPipelineOptions().as(StreamingOptions.class).isStreaming()) {
+            throw t;
+          }
+        }
+      }
+    }
   }
 
   /**
@@ -500,7 +534,7 @@ public void processElement(DoFn<Void, Void>.ProcessContext c) {
    * are not serializable, but have coders (provided
    * by the underlying {@link PCollection}s).
    */
-  private static class TwoSideInputAssert<ActualT, ExpectedT>
+  static class TwoSideInputAssert<ActualT, ExpectedT>
       extends PTransform<PBegin, PDone> implements Serializable {
 
     private static final long serialVersionUID = 0;
@@ -526,19 +560,45 @@ public PDone apply(PBegin input) {
       input
           .apply(Create.<Void>of((Void) null).withCoder(VoidCoder.of()))
           .apply(ParDo.named("RunChecks").withSideInputs(actual, expected)
-              .of(new DoFn<Void, Void>() {
-                private static final long serialVersionUID = 0;
-
-                @Override
-                public void processElement(DoFn<Void, Void>.ProcessContext c) {
-                  ActualT actualContents = c.sideInput(actual);
-                  ExpectedT expectedContents = c.sideInput(expected);
-                  relation.assertFor(expectedContents).apply(actualContents);
-                }
-              }));
+              .of(new CheckerDoFn<>(relation, actual, expected)));
 
       return PDone.in(input.getPipeline());
     }
+
+    private static class CheckerDoFn<ActualT, ExpectedT> extends DoFn<Void, Void> {
+      private static final long serialVersionUID = 0;
+      private final Aggregator<Integer, Integer> success =
+          createAggregator(SUCCESS_COUNTER, new Sum.SumIntegerFn());
+      private final Aggregator<Integer, Integer> failure =
+          createAggregator(FAILURE_COUNTER, new Sum.SumIntegerFn());
+      private final AssertRelation<ActualT, ExpectedT> relation;
+      private final PCollectionView<ActualT> actual;
+      private final PCollectionView<ExpectedT> expected;
+
+      private CheckerDoFn(AssertRelation<ActualT, ExpectedT> relation,
+          PCollectionView<ActualT> actual, PCollectionView<ExpectedT> expected) {
+        this.relation = relation;
+        this.actual = actual;
+        this.expected = expected;
+      }
+
+      @Override
+      public void processElement(ProcessContext c) {
+        try {
+          ActualT actualContents = c.sideInput(actual);
+          ExpectedT expectedContents = c.sideInput(expected);
+          relation.assertFor(expectedContents).apply(actualContents);
+          success.addValue(1);
+        } catch (Throwable t) {
+          LOG.error("DataflowAssert failed expectations.", t);
+          failure.addValue(1);
+          // TODO: allow for metrics to propagate on failure when running a streaming pipeline
+          if (!c.getPipelineOptions().as(StreamingOptions.class).isStreaming()) {
+            throw t;
+          }
+        }
+      }
+    }
   }
 
   /////////////////////////////////////////////////////////////////////////////
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
index bc89b6f46b3ff..8516c16f39f33 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
@@ -16,11 +16,31 @@
 
 package com.google.cloud.dataflow.sdk.testing;
 
+import com.google.api.services.dataflow.model.JobMetrics;
+import com.google.api.services.dataflow.model.MetricUpdate;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult.State;
-import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineJob;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
+import com.google.cloud.dataflow.sdk.util.MonitoringUtil.JobMessagesHandler;
+import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.cloud.dataflow.sdk.values.POutput;
+import com.google.common.base.Optional;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.util.concurrent.Callable;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
 
 /**
  * {@link TestDataflowPipelineRunner} is a pipeline runner that wraps a
@@ -28,25 +48,140 @@
  *
  * @see TestPipeline
  */
-public class TestDataflowPipelineRunner extends BlockingDataflowPipelineRunner {
+public class TestDataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob> {
+  private static final String TENTATIVE_COUNTER = "tentative";
+  private static final Logger LOG = LoggerFactory.getLogger(TestDataflowPipelineRunner.class);
 
   private final TestDataflowPipelineOptions options;
+  private final DataflowPipelineRunner runner;
+  private int expectedNumberOfAssertions = 0;
 
-  TestDataflowPipelineRunner(
-      DataflowPipelineRunner internalRunner,
-      TestDataflowPipelineOptions options) {
-    super(internalRunner, options);
+  TestDataflowPipelineRunner(TestDataflowPipelineOptions options) {
     this.options = options;
+    this.runner = DataflowPipelineRunner.fromOptions(options);
+  }
+
+  /**
+   * Constructs a runner from the provided options.
+   */
+  public static TestDataflowPipelineRunner fromOptions(
+      PipelineOptions options) {
+    TestDataflowPipelineOptions dataflowOptions = options.as(TestDataflowPipelineOptions.class);
+
+    return new TestDataflowPipelineRunner(dataflowOptions);
   }
 
   @Override
   public DataflowPipelineJob run(Pipeline pipeline) {
-    DataflowPipelineJob state = super.run(pipeline);
-    if (state.getState() != State.DONE) {
-      // TODO: Get an exception from the remote service.
-      throw new IllegalStateException("The dataflow failed.");
+    return run(pipeline, runner);
+  }
+
+  DataflowPipelineJob run(Pipeline pipeline, DataflowPipelineRunner runner) {
+    Preconditions.checkState(expectedNumberOfAssertions > 0,
+        "Expected non-zero number of DoFn's annotated with @IsDataflowAssert "
+        + "within the pipeline.");
+
+    final JobMessagesHandler messageHandler =
+        new MonitoringUtil.PrintHandler(options.getJobMessageOutput());
+    final DataflowPipelineJob job = runner.run(pipeline);
+
+    LOG.info("Running Dataflow job {} with {} expected assertions.",
+        job.getJobId(), expectedNumberOfAssertions);
+
+    try {
+      final Optional<Boolean> result;
+      if (options.isStreaming()) {
+        Future<Optional<Boolean>> resultFuture = options.getExecutorService().submit(
+            new Callable<Optional<Boolean>>() {
+          @Override
+          public Optional<Boolean> call() throws Exception {
+            try {
+              for (;;) {
+                Optional<Boolean> result = checkForSuccess(job);
+                if (result.isPresent()) {
+                  return result;
+                }
+                Thread.sleep(10000L);
+              }
+            } finally {
+              LOG.info("Cancelling Dataflow job {}", job.getJobId());
+              job.cancel();
+            }
+          }
+        });
+        job.waitToFinish(-1L, TimeUnit.SECONDS, messageHandler);
+        result = resultFuture.get();
+      } else {
+        job.waitToFinish(-1, TimeUnit.SECONDS, messageHandler);
+        if (job.getState() != State.DONE) {
+          // TODO: Get an exception from the remote service.
+          throw new IllegalStateException("The dataflow failed.");
+        }
+        result = checkForSuccess(job);
+      }
+      if (!result.isPresent()) {
+        throw new IllegalStateException(
+            "The dataflow did not output a success or failure metric.");
+      } else if (!result.get()) {
+        throw new IllegalStateException("The dataflow failed.");
+      }
+    } catch (Exception e) {
+      Throwables.propagateIfPossible(e);
+      throw Throwables.propagate(e);
+    }
+    return job;
+  }
+
+  @Override
+  public <OutputT extends POutput, InputT extends PInput> OutputT apply(
+      PTransform<InputT, OutputT> transform, InputT input) {
+    if (transform instanceof DataflowAssert.OneSideInputAssert
+        || transform instanceof DataflowAssert.TwoSideInputAssert) {
+      expectedNumberOfAssertions += 1;
+    }
+
+    return runner.apply(transform, input);
+  }
+
+  Optional<Boolean> checkForSuccess(DataflowPipelineJob job)
+      throws IOException {
+    JobMetrics metrics = job.getDataflowClient().v1b3().projects().jobs()
+        .getMetrics(job.getProjectId(), job.getJobId()).execute();
+
+    if (metrics == null || metrics.getMetrics() == null) {
+      LOG.warn("Metrics not present for Dataflow job {}.", job.getJobId());
+      return Optional.absent();
+    }
+
+    int successes = 0;
+    int failures = 0;
+    for (MetricUpdate metric : metrics.getMetrics()) {
+      if (metric.getName() == null || metric.getName().getContext() == null
+          || !metric.getName().getContext().containsKey(TENTATIVE_COUNTER)) {
+        // Don't double count using the non-tentative version of the metric.
+        continue;
+      }
+      if (DataflowAssert.SUCCESS_COUNTER.equals(metric.getName().getName())) {
+        successes += ((BigDecimal) metric.getScalar()).intValue();
+      } else if (DataflowAssert.FAILURE_COUNTER.equals(metric.getName().getName())) {
+        failures += ((BigDecimal) metric.getScalar()).intValue();
+      }
+    }
+
+    if (failures > 0) {
+      LOG.info("Found result while running Dataflow job {}. Found {} success, {} failures out of "
+          + "{} expected assertions.", job.getJobId(), successes, failures,
+          expectedNumberOfAssertions);
+      return Optional.of(false);
+    } else if (successes >= expectedNumberOfAssertions) {
+      LOG.info("Found result while running Dataflow job {}. Found {} success, {} failures out of "
+          + "{} expected assertions.", job.getJobId(), successes, failures,
+          expectedNumberOfAssertions);
+      return Optional.of(true);
     }
-    return state;
+    LOG.info("Running Dataflow job {}. Found {} success, {} failures out of {} expected "
+        + "assertions.", job.getJobId(), successes, failures, expectedNumberOfAssertions);
+    return Optional.<Boolean>absent();
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
index 23dd5cc8129cf..b1a6463daa945 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
@@ -21,7 +21,6 @@
 import com.google.cloud.dataflow.sdk.options.ApplicationNameOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.common.base.Optional;
@@ -81,7 +80,7 @@ public static TestPipeline create() {
       TestDataflowPipelineOptions options = getPipelineOptions();
       LOG.info("Using passed in options: " + options);
       options.setStableUniqueNames(CheckEnabled.ERROR);
-      return new TestPipeline(createRunner(options), options);
+      return new TestPipeline(TestDataflowPipelineRunner.fromOptions(options), options);
     } else {
       DirectPipelineRunner directRunner = DirectPipelineRunner.createForTest();
       directRunner.getPipelineOptions().setAppName(getAppName());
@@ -117,18 +116,6 @@ public String toString() {
     return "TestPipeline#" + getOptions().as(ApplicationNameOptions.class).getAppName();
   }
 
-  /**
-   * Creates and returns a TestDataflowPipelineRunner based on
-   * configuration via system properties.
-   */
-  private static TestDataflowPipelineRunner createRunner(
-      TestDataflowPipelineOptions options) {
-
-    DataflowPipelineRunner dataflowRunner = DataflowPipelineRunner
-        .fromOptions(options);
-    return new TestDataflowPipelineRunner(dataflowRunner, options);
-  }
-
   /**
    * Creates PipelineOptions for testing with a DataflowPipelineRunner.
    */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunnerTest.java
index d14f0a33c7eb4..bf0c067894c28 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunnerTest.java
@@ -17,28 +17,224 @@
 package com.google.cloud.dataflow.sdk.testing;
 
 import static org.junit.Assert.assertEquals;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Mockito.doCallRealMethod;
+import static org.mockito.Mockito.when;
 
+import com.google.api.client.http.LowLevelHttpResponse;
+import com.google.api.client.json.Json;
+import com.google.api.client.testing.http.MockHttpTransport;
+import com.google.api.client.testing.http.MockLowLevelHttpRequest;
+import com.google.api.client.testing.http.MockLowLevelHttpResponse;
+import com.google.api.services.dataflow.Dataflow;
+import com.google.api.services.dataflow.model.JobMetrics;
+import com.google.api.services.dataflow.model.MetricStructuredName;
+import com.google.api.services.dataflow.model.MetricUpdate;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult.State;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineJob;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
+import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.Optional;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
 
+import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.MockitoAnnotations;
+
+import java.math.BigDecimal;
 
 /** Tests for {@link TestDataflowPipelineRunner}. */
 @RunWith(JUnit4.class)
 public class TestDataflowPipelineRunnerTest {
-  @Test
-  public void testToString() {
-    TestDataflowPipelineOptions options =
-        PipelineOptionsFactory.as(TestDataflowPipelineOptions.class);
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+  @Mock private MockHttpTransport transport;
+  @Mock private MockLowLevelHttpRequest request;
+
+  private TestDataflowPipelineOptions options;
+  private Dataflow service;
+
+  @Before
+  public void setUp() throws Exception {
+    MockitoAnnotations.initMocks(this);
+    when(transport.buildRequest(anyString(), anyString())).thenReturn(request);
+    doCallRealMethod().when(request).getContentAsString();
+    service = new Dataflow(transport, Transport.getJsonFactory(), null);
+
+    options = PipelineOptionsFactory.as(TestDataflowPipelineOptions.class);
     options.setAppName("TestAppName");
-    options.setProject("TestProject");
+    options.setProject("test-project");
     options.setTempLocation("gs://test/temp/location");
     options.setGcpCredential(new TestCredential());
+    options.setDataflowClient(service);
+    options.setRunner(TestDataflowPipelineRunner.class);
+  }
+
+  @Test
+  public void testToString() {
     assertEquals("TestDataflowPipelineRunner#TestAppName",
-        new TestDataflowPipelineRunner(
-            DataflowPipelineRunner.fromOptions(options), options).toString());
+        new TestDataflowPipelineRunner(options).toString());
+  }
+
+  @Test
+  public void testRunBatchJobThatSucceeds() throws Exception {
+    Pipeline p = TestPipeline.create(options);
+    PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
+    DataflowAssert.that(pc).containsInAnyOrder(1, 2, 3);
+
+    DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
+    when(mockJob.getDataflowClient()).thenReturn(service);
+    when(mockJob.getState()).thenReturn(State.DONE);
+    when(mockJob.getProjectId()).thenReturn("test-project");
+    when(mockJob.getJobId()).thenReturn("test-job");
+
+    DataflowPipelineRunner mockRunner = Mockito.mock(DataflowPipelineRunner.class);
+    when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);
+
+    TestDataflowPipelineRunner runner = (TestDataflowPipelineRunner) p.getRunner();
+    when(request.execute()).thenReturn(
+        generateMockMetricResponse(true /* success */, true /* tentative */));
+    assertEquals(mockJob, runner.run(p, mockRunner));
+  }
+
+  @Test
+  public void testRunBatchJobThatFails() throws Exception {
+    expectedException.expect(IllegalStateException.class);
+    expectedException.expectMessage("The dataflow failed.");
+
+    Pipeline p = TestPipeline.create(options);
+    PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
+    DataflowAssert.that(pc).containsInAnyOrder(1, 2, 3);
+
+    DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
+    when(mockJob.getDataflowClient()).thenReturn(service);
+    when(mockJob.getState()).thenReturn(State.FAILED);
+    when(mockJob.getProjectId()).thenReturn("test-project");
+    when(mockJob.getJobId()).thenReturn("test-job");
+
+    DataflowPipelineRunner mockRunner = Mockito.mock(DataflowPipelineRunner.class);
+    when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);
+
+    TestDataflowPipelineRunner runner = (TestDataflowPipelineRunner) p.getRunner();
+    runner.run(p, mockRunner);
+  }
+
+  @Test
+  public void testRunStreamingJobThatSucceeds() throws Exception {
+    options.setStreaming(true);
+    Pipeline p = TestPipeline.create(options);
+    PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
+    DataflowAssert.that(pc).containsInAnyOrder(1, 2, 3);
+
+    DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
+    when(mockJob.getDataflowClient()).thenReturn(service);
+    when(mockJob.getState()).thenReturn(State.RUNNING);
+    when(mockJob.getProjectId()).thenReturn("test-project");
+    when(mockJob.getJobId()).thenReturn("test-job");
+
+    DataflowPipelineRunner mockRunner = Mockito.mock(DataflowPipelineRunner.class);
+    when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);
+
+    when(request.execute()).thenReturn(
+        generateMockMetricResponse(true /* success */, true /* tentative */));
+    TestDataflowPipelineRunner runner = (TestDataflowPipelineRunner) p.getRunner();
+    runner.run(p, mockRunner);
+  }
+
+  @Test
+  public void testRunStreamingJobThatFails() throws Exception {
+    expectedException.expect(IllegalStateException.class);
+    expectedException.expectMessage("The dataflow failed.");
+
+    options.setStreaming(true);
+    Pipeline p = TestPipeline.create(options);
+    PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
+    DataflowAssert.that(pc).containsInAnyOrder(1, 2, 3);
+
+    DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
+    when(mockJob.getDataflowClient()).thenReturn(service);
+    when(mockJob.getState()).thenReturn(State.RUNNING);
+    when(mockJob.getProjectId()).thenReturn("test-project");
+    when(mockJob.getJobId()).thenReturn("test-job");
+
+    DataflowPipelineRunner mockRunner = Mockito.mock(DataflowPipelineRunner.class);
+    when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);
+
+    when(request.execute()).thenReturn(
+        generateMockMetricResponse(false /* success */, true /* tentative */));
+    TestDataflowPipelineRunner runner = (TestDataflowPipelineRunner) p.getRunner();
+    runner.run(p, mockRunner);
+  }
+
+  @Test
+  public void testCheckingForSuccessWhenDataflowAssertSucceeds() throws Exception {
+    DataflowPipelineJob job = new DataflowPipelineJob("test-project", "test-job", service, null);
+    Pipeline p = TestPipeline.create(options);
+    PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
+    DataflowAssert.that(pc).containsInAnyOrder(1, 2, 3);
+
+    TestDataflowPipelineRunner runner = (TestDataflowPipelineRunner) p.getRunner();
+    when(request.execute()).thenReturn(
+        generateMockMetricResponse(true /* success */, true /* tentative */));
+    assertEquals(Optional.of(true), runner.checkForSuccess(job));
+  }
+
+  @Test
+  public void testCheckingForSuccessWhenDataflowAssertFails() throws Exception {
+    DataflowPipelineJob job = new DataflowPipelineJob("test-project", "test-job", service, null);
+    Pipeline p = TestPipeline.create(options);
+    PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
+    DataflowAssert.that(pc).containsInAnyOrder(1, 2, 3);
+
+    TestDataflowPipelineRunner runner = (TestDataflowPipelineRunner) p.getRunner();
+    when(request.execute()).thenReturn(
+        generateMockMetricResponse(false /* success */, true /* tentative */));
+    assertEquals(Optional.of(false), runner.checkForSuccess(job));
+  }
+
+  @Test
+  public void testCheckingForSuccessSkipsNonTentativeMetrics() throws Exception {
+    DataflowPipelineJob job = new DataflowPipelineJob("test-project", "test-job", service, null);
+    Pipeline p = TestPipeline.create(options);
+    PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
+    DataflowAssert.that(pc).containsInAnyOrder(1, 2, 3);
+
+    TestDataflowPipelineRunner runner = (TestDataflowPipelineRunner) p.getRunner();
+    when(request.execute()).thenReturn(
+        generateMockMetricResponse(true /* success */, false /* tentative */));
+    assertEquals(Optional.absent(), runner.checkForSuccess(job));
+  }
+
+  private LowLevelHttpResponse generateMockMetricResponse(boolean success, boolean tentative)
+      throws Exception {
+    MetricStructuredName name = new MetricStructuredName();
+    name.setName(success ? "DataflowAssertSuccess" : "DataflowAssertFailure");
+    name.setContext(
+        tentative ? ImmutableMap.of("tentative", "") : ImmutableMap.<String, String>of());
+
+    MetricUpdate metric = new MetricUpdate();
+    metric.setName(name);
+    metric.setScalar(BigDecimal.ONE);
+
+    MockLowLevelHttpResponse response = new MockLowLevelHttpResponse();
+    response.setContentType(Json.MEDIA_TYPE);
+    JobMetrics jobMetrics = new JobMetrics();
+    jobMetrics.setMetrics(Lists.newArrayList(metric));
+    // N.B. Setting the factory is necessary in order to get valid JSON.
+    jobMetrics.setFactory(Transport.getJsonFactory());
+    response.setContent(jobMetrics.toPrettyString());
+    return response;
   }
 }

From 755d06d0a5bd1fa57cdc2e140c0387eefbe4550b Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 8 Jun 2015 14:39:00 -0700
Subject: [PATCH 0623/1541] Only take ownership of argument to ByteArrayCoder
 explicitly

Previously, if a byte array were mutated after being encoded,
the encoded version would be corrupted. The problematic
code has been moved into ByteArrayCoder.encodeAndOwn, named
to raise awareness, while ByteArrayCoder.encode has been made
safe for general use.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95474347
---
 .../dataflow/sdk/coders/ByteArrayCoder.java   | 16 +++++++
 .../dataflow/sdk/util/TriggerExecutor.java    |  4 +-
 .../sdk/coders/ByteArrayCoderTest.java        | 42 +++++++++++++++++++
 3 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
index 915a656ee1461..9010c236d7c27 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
@@ -54,6 +54,22 @@ public void encode(byte[] value, OutputStream outStream, Context context)
     if (value == null) {
       throw new CoderException("cannot encode a null byte[]");
     }
+    if (!context.isWholeStream) {
+      VarInt.encode(value.length, outStream);
+      outStream.write(value);
+    } else {
+      outStream.write(value);
+    }
+  }
+
+  /**
+   * Encodes the provided {@code value} with the identical encoding to {@link #encode}, but with
+   * optimizations that take ownership of the value.
+   *
+   * <p>Once passed to this method, {@code value} should never be observed or mutated again.
+   */
+  public void encodeAndOwn(byte[] value, OutputStream outStream, Context context)
+      throws IOException, CoderException {
     if (!context.isWholeStream) {
       VarInt.encode(value.length, outStream);
       outStream.write(value);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 0724882aeae26..ac726082f818d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -692,7 +692,7 @@ protected static class BitSetCoder extends AtomicCoder<BitSet> {
     private static final BitSetCoder INSTANCE = new BitSetCoder();
     private static final long serialVersionUID = 1L;
 
-    private transient Coder<byte[]> byteArrayCoder = ByteArrayCoder.of();
+    private transient ByteArrayCoder byteArrayCoder = ByteArrayCoder.of();
 
     private BitSetCoder() {}
 
@@ -703,7 +703,7 @@ public static BitSetCoder of() {
     @Override
     public void encode(BitSet value, OutputStream outStream, Context context)
         throws CoderException, IOException {
-      byteArrayCoder.encode(value.toByteArray(), outStream, context);
+      byteArrayCoder.encodeAndOwn(value.toByteArray(), outStream, context);
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
index 76b8f3f4a1cd4..306a3ab4b14cf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
@@ -16,13 +16,21 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.not;
+import static org.junit.Assert.assertThat;
+
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.common.CounterTestUtils;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
 /**
  * Unit tests for {@link ByteArrayCoder}.
  */
@@ -60,4 +68,38 @@ public void testStructuralValueConsistentWithEquals() throws Exception {
       }
     }
   }
+
+  @Test
+  public void testEncodeThenMutate() throws Exception {
+    byte[] input = { 0x7, 0x3, 0xA, 0xf };
+    Coder<byte[]> coder = ByteArrayCoder.of();
+    byte[] encoded = CoderUtils.encodeToByteArray(coder, input);
+    input[1] = 0x9;
+    byte[] decoded = CoderUtils.decodeFromByteArray(coder, encoded);
+
+    // now that I have mutated the input, the output should NOT match
+    assertThat(input, not(equalTo(decoded)));
+  }
+
+  @Test
+  public void testEncodeAndOwn() throws Exception {
+    ByteArrayCoder coder = ByteArrayCoder.of();
+    for (byte[] value : TEST_VALUES) {
+      byte[] encodedSlow = CoderUtils.encodeToByteArray(coder, value);
+      byte[] encodedFast = encodeToByteArrayAndOwn(coder, value);
+      assertThat(encodedSlow, equalTo(encodedFast));
+    }
+  }
+
+  private static byte[] encodeToByteArrayAndOwn(ByteArrayCoder coder, byte[] value)
+      throws IOException {
+    return encodeToByteArrayAndOwn(coder, value, Coder.Context.OUTER);
+  }
+
+  private static byte[] encodeToByteArrayAndOwn(
+      ByteArrayCoder coder, byte[] value, Coder.Context context) throws IOException {
+    ByteArrayOutputStream os = new ByteArrayOutputStream();
+    coder.encodeAndOwn(value, os, context);
+    return os.toByteArray();
+  }
 }

From d3e3a26ce1b30e16e3b2fbb3ab0494a938be0870 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 8 Jun 2015 16:06:58 -0700
Subject: [PATCH 0624/1541] Fix handling of key token invalid errors

Loads are now wrapped in a cache, which can wrap the exceptions

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95482560
---
 .../worker/StreamingDataflowWorker.java       | 47 +++++++++++++------
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index e41e2fe47c54c..4f7ec5bf7612e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -89,6 +89,19 @@ public KeyTokenInvalidException(String key) {
     }
   }
 
+  /**
+   * Returns whether an exception was caused by a {@link KeyTokenInvalidException}.
+   */
+  private static boolean isKeyTokenInvalidException(Throwable t) {
+    while (t != null) {
+      if (t instanceof KeyTokenInvalidException) {
+        return true;
+      }
+      t = t.getCause();
+    }
+    return false;
+  }
+
   static MapTask parseMapTask(String input) throws IOException {
     return Transport.getJsonFactory()
         .fromString(input, MapTask.class);
@@ -348,8 +361,8 @@ private void process(
         // Disable progress updates since its results are unused for streaming
         // and involves starting a thread.
         readOperation.setProgressUpdatePeriodMs(0);
-        Preconditions.checkState(worker.supportsRestart(),
-            "Streaming runner requires all operations support restart.");
+        Preconditions.checkState(
+            worker.supportsRestart(), "Streaming runner requires all operations support restart.");
       } else {
         worker = workerAndContext.getWorker();
         context = workerAndContext.getContext();
@@ -382,25 +395,31 @@ private void process(
 
       t = t instanceof UserCodeException ? t.getCause() : t;
 
-      if (t instanceof KeyTokenInvalidException) {
-        LOG.debug("Execution of work for " + computation
-            + " for key " + work.getKey().toStringUtf8()
-            + " failed due to token expiration, will not retry locally.");
+      if (isKeyTokenInvalidException(t)) {
+        LOG.debug(
+            "Execution of work for "
+                + computation
+                + " for key "
+                + work.getKey().toStringUtf8()
+                + " failed due to token expiration, will not retry locally.");
       } else {
-        LOG.error("Execution of work for {} for key {} failed, retrying.",
-            computation, work.getKey().toStringUtf8());
+        LOG.error(
+            "Execution of work for {} for key {} failed, retrying.",
+            computation,
+            work.getKey().toStringUtf8());
         LOG.error("\nError: ", t);
         lastException.set(t);
         LOG.debug("Failed work: {}", work);
         if (reportFailure(computation, work, t)) {
           // Try again, after some delay and at the end of the queue to avoid a tight loop.
           sleep(10000);
-          executor.forceExecute(new Runnable() {
-              @Override
-              public void run() {
-                process(computation, inputDataWatermark, work);
-              }
-            });
+          executor.forceExecute(
+              new Runnable() {
+                @Override
+                public void run() {
+                  process(computation, inputDataWatermark, work);
+                }
+              });
         } else {
           // If we failed to report the error, the item is invalid and should
           // not be retried internally.  It will be retried at the higher level.

From 58dde1ae13028e2d6b99a9a80695993d8bda041c Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Tue, 9 Jun 2015 11:40:30 -0700
Subject: [PATCH 0625/1541] Adds support for PubsubIO in batch and direct
 modes.

Also updates Dataflow to use Pubsub v1beta2

----Release Notes----
- PubsubIO now supports reading a bounded view of a pubsub stream in batch and direct modes.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95556677
---
 .../dataflow/examples/PubsubFileInjector.java |  22 +-
 .../examples/common/DataflowExampleUtils.java |  13 +-
 sdk/pom.xml                                   |   2 +-
 .../cloud/dataflow/sdk/io/PubsubIO.java       | 507 +++++++++++++++---
 .../options/DataflowPipelineDebugOptions.java |   8 +
 .../runners/dataflow/PubsubIOTranslator.java  |  10 +-
 .../cloud/dataflow/sdk/util/Transport.java    |   4 +-
 7 files changed, 452 insertions(+), 114 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java b/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
index a0e018625c18e..0757240b525d5 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
@@ -17,22 +17,22 @@
 package com.google.cloud.dataflow.examples;
 
 import com.google.api.services.pubsub.Pubsub;
-import com.google.api.services.pubsub.model.Label;
 import com.google.api.services.pubsub.model.PublishRequest;
 import com.google.api.services.pubsub.model.PubsubMessage;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.Description;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.options.Validation;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
 import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
 
 import java.io.IOException;
+import java.util.Arrays;
 
 /**
  * A batch Dataflow pipeline for injecting a set of GCS files into
@@ -84,9 +84,9 @@ public Bound(String outputTopic, String timestampLabelKey) {
 
     @Override
     public void startBundle(Context context) {
-      StreamingOptions options =
-          context.getPipelineOptions().as(StreamingOptions.class);
-      this.pubsub = Transport.newPubsubClient(options).build();
+      this.pubsub =
+          Transport.newPubsubClient(context.getPipelineOptions().as(DataflowPipelineOptions.class))
+              .build();
     }
 
     @Override
@@ -94,14 +94,12 @@ public void processElement(ProcessContext c) throws IOException {
       PubsubMessage pubsubMessage = new PubsubMessage();
       pubsubMessage.encodeData(c.element().getBytes());
       if (timestampLabelKey != null) {
-        Label timestampLabel = new Label();
-        timestampLabel.setKey(timestampLabelKey);
-        timestampLabel.setNumValue(c.timestamp().getMillis());
-        pubsubMessage.setLabel(ImmutableList.of(timestampLabel));
+        pubsubMessage.setAttributes(
+            ImmutableMap.of(timestampLabelKey, Long.toString(c.timestamp().getMillis())));
       }
       PublishRequest publishRequest = new PublishRequest();
-      publishRequest.setTopic(outputTopic).setMessage(pubsubMessage);
-      this.pubsub.topics().publish(publishRequest).execute();
+      publishRequest.setMessages(Arrays.asList(pubsubMessage));
+      this.pubsub.projects().topics().publish(outputTopic, publishRequest).execute();
     }
   }
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
index 550cbadafb8d0..b0c64e8a9068a 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
@@ -35,7 +35,6 @@
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineJob;
 import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
@@ -183,10 +182,10 @@ private void setupBigQueryTable(String projectId, String datasetId, String table
 
   private void setupPubsubTopic(String topic) throws IOException {
     if (pubsubClient == null) {
-      pubsubClient = Transport.newPubsubClient(options.as(StreamingOptions.class)).build();
+      pubsubClient = Transport.newPubsubClient(options).build();
     }
-    if (executeNullIfNotFound(pubsubClient.topics().get(topic)) == null) {
-      pubsubClient.topics().create(new Topic().setName(topic)).execute();
+    if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) == null) {
+      pubsubClient.projects().topics().create(topic, new Topic().setName(topic)).execute();
     }
   }
 
@@ -197,10 +196,10 @@ private void setupPubsubTopic(String topic) throws IOException {
    */
   private void deletePubsubTopic(String topic) throws IOException {
     if (pubsubClient == null) {
-      pubsubClient = Transport.newPubsubClient(options.as(StreamingOptions.class)).build();
+      pubsubClient = Transport.newPubsubClient(options).build();
     }
-    if (executeNullIfNotFound(pubsubClient.topics().get(topic)) != null) {
-      pubsubClient.topics().delete(topic).execute();
+    if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) != null) {
+      pubsubClient.projects().topics().delete(topic).execute();
     }
   }
 
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 0fd0957c01b55..f9eccbcb79d80 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -392,7 +392,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-pubsub</artifactId>
-      <version>v1beta1-rev18-1.19.1</version>
+      <version>v1beta2-rev1-1.19.1</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index bb8b7969422e8..4410c43503c3f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -16,16 +16,45 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
+import com.google.api.services.pubsub.Pubsub;
+import com.google.api.services.pubsub.model.AcknowledgeRequest;
+import com.google.api.services.pubsub.model.PublishRequest;
+import com.google.api.services.pubsub.model.PubsubMessage;
+import com.google.api.services.pubsub.model.PullRequest;
+import com.google.api.services.pubsub.model.PullResponse;
+import com.google.api.services.pubsub.model.ReceivedMessage;
+import com.google.api.services.pubsub.model.Subscription;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.StreamingOptions;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
 
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -34,14 +63,9 @@
 /**
  * Read and Write {@link PTransform}s for Pub/Sub streams. These transforms create
  * and consume unbounded {@link com.google.cloud.dataflow.sdk.values.PCollection}s.
- *
- * <p> {@code PubsubIO}  is only usable
- * with the {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner}
- * and requires
- * {@link com.google.cloud.dataflow.sdk.options.StreamingOptions#setStreaming(boolean)}
- * to be enabled.
  */
 public class PubsubIO {
+  private static final Logger LOG = LoggerFactory.getLogger(PubsubIO.class);
   public static final Coder<String> DEFAULT_PUBSUB_CODER = StringUtf8Coder.of();
 
   /**
@@ -54,9 +78,15 @@ public class PubsubIO {
       Pattern.compile("[a-z][-a-z0-9:.]{4,61}[a-z0-9]");
 
   private static final Pattern SUBSCRIPTION_REGEXP =
-      Pattern.compile("/subscriptions/([^/]+)/(.+)");
+      Pattern.compile("projects/([^/]+)/subscriptions/(.+)");
 
   private static final Pattern TOPIC_REGEXP =
+      Pattern.compile("projects/([^/]+)/topics/(.+)");
+
+  private static final Pattern V1BETA1_SUBSCRIPTION_REGEXP =
+      Pattern.compile("/subscriptions/([^/]+)/(.+)");
+
+  private static final Pattern V1BETA1_TOPIC_REGEXP =
       Pattern.compile("/topics/([^/]+)/(.+)");
 
   private static final Pattern PUBSUB_NAME_REGEXP =
@@ -68,63 +98,156 @@ public class PubsubIO {
   private static final String SUBSCRIPTION_STARTING_SIGNAL = "_starting_signal/";
   private static final String TOPIC_DEV_NULL_TEST_NAME = "/topics/dev/null";
 
+  private static void validateProjectName(String project) {
+    Matcher match = PROJECT_ID_REGEXP.matcher(project);
+    if (!match.matches()) {
+      throw new IllegalArgumentException(
+          "Illegal project name specified in Pubsub subscription: " + project);
+    }
+  }
+
+  private static void validatePubsubName(String name) {
+    if (name.length() > PUBSUB_NAME_MAX_LENGTH) {
+      throw new IllegalArgumentException(
+          "Pubsub object name is longer than 255 characters: " + name);
+    }
+
+    if (name.startsWith("goog")) {
+      throw new IllegalArgumentException(
+          "Pubsub object name cannot start with goog: " + name);
+    }
+
+    Matcher match = PUBSUB_NAME_REGEXP.matcher(name);
+    if (!match.matches()) {
+      throw new IllegalArgumentException(
+          "Illegal Pubsub object name specified: " + name
+          + " Please see Javadoc for naming rules.");
+    }
+  }
+
   /**
-   * Utility class to validate topic and subscription names.
+   * Class representing a Pubsub Subscription.
    */
-  public static class Validator {
-    public static void validateTopicName(String topic) {
-      if (topic.equals(TOPIC_DEV_NULL_TEST_NAME)) {
-        return;
-      }
-      Matcher match = TOPIC_REGEXP.matcher(topic);
-      if (!match.matches()) {
-        throw new IllegalArgumentException(
-            "Pubsub topic is not in /topics/project_id/topic_name format: "
-            + topic);
-      }
-      validateProjectName(match.group(1));
-      validatePubsubName(match.group(2));
+  public static class PubsubSubscription implements Serializable {
+    private static final long serialVersionUID = 0L;
+    private enum Type { NORMAL, FAKE }
+
+    private final Type type;
+    private final String project;
+    private final String subscription;
+
+    private PubsubSubscription(Type type, String project, String subscription) {
+      this.type = type;
+      this.project = project;
+      this.subscription = subscription;
     }
 
-    public static void validateSubscriptionName(String subscription) {
-      if (subscription.startsWith(SUBSCRIPTION_RANDOM_TEST_PREFIX)
-          || subscription.startsWith(SUBSCRIPTION_STARTING_SIGNAL)) {
-        return;
+    public static PubsubSubscription fromPath(String path) {
+      if (path.startsWith(SUBSCRIPTION_RANDOM_TEST_PREFIX)
+          || path.startsWith(SUBSCRIPTION_STARTING_SIGNAL)) {
+        return new PubsubSubscription(Type.FAKE, "", path);
+      }
+
+      String projectName, subscriptionName;
+
+      Matcher v1beta1Match = V1BETA1_SUBSCRIPTION_REGEXP.matcher(path);
+      if (v1beta1Match.matches()) {
+        LOG.warn("Saw subscription in v1beta1 format. Subscriptions should be in the format "
+            + "projects/<project_id>/subscriptions/<subscription_name>");
+        projectName = v1beta1Match.group(1);
+        subscriptionName = v1beta1Match.group(2);
+      } else {
+        Matcher match = SUBSCRIPTION_REGEXP.matcher(path);
+        if (!match.matches()) {
+          throw new IllegalArgumentException(
+              "Pubsub subscription is not in "
+              + "projects/<project_id>/subscriptions/<subscription_name> format: " + path);
+        }
+        projectName = match.group(1);
+        subscriptionName = match.group(2);
       }
-      Matcher match = SUBSCRIPTION_REGEXP.matcher(subscription);
-      if (!match.matches()) {
-        throw new IllegalArgumentException(
-            "Pubsub subscription is not in /subscriptions/project_id/subscription_name format: "
-            + subscription);
+
+      validateProjectName(projectName);
+      validatePubsubName(subscriptionName);
+      return new PubsubSubscription(Type.NORMAL, projectName, subscriptionName);
+    }
+
+    public String asV1Beta1Path() {
+      if (type == Type.NORMAL) {
+        return "/subscriptions/" + project + "/" + subscription;
+      } else {
+        return subscription;
       }
-      validateProjectName(match.group(1));
-      validatePubsubName(match.group(2));
     }
 
-    private static void validateProjectName(String project) {
-      Matcher match = PROJECT_ID_REGEXP.matcher(project);
-      if (!match.matches()) {
-        throw new IllegalArgumentException(
-            "Illegal project name specified in Pubsub subscription: " + project);
+    public String asV1Beta2Path() {
+      if (type == Type.NORMAL) {
+        return "projects/" + project + "/subscriptions/" + subscription;
+      } else {
+        return subscription;
       }
     }
+  }
+
+  /**
+   * Class representing a Pubsub Topic.
+   */
+  public static class PubsubTopic implements Serializable {
+    private static final long serialVersionUID = 0L;
+    private enum Type { NORMAL, FAKE }
+
+    private final Type type;
+    private final String project;
+    private final String topic;
+
+    public PubsubTopic(Type type, String project, String topic) {
+      this.type = type;
+      this.project = project;
+      this.topic = topic;
+    }
 
-    private static void validatePubsubName(String name) {
-      if (name.length() > PUBSUB_NAME_MAX_LENGTH) {
-        throw new IllegalArgumentException(
-            "Pubsub object name is longer than 255 characters: " + name);
+    public static PubsubTopic fromPath(String path) {
+      if (path.equals(TOPIC_DEV_NULL_TEST_NAME)) {
+        return new PubsubTopic(Type.FAKE, "", path);
+      }
+
+      String projectName, topicName;
+
+      Matcher v1beta1Match = V1BETA1_TOPIC_REGEXP.matcher(path);
+      if (v1beta1Match.matches()) {
+        LOG.warn("Saw topic in v1beta1 format.  Topics should be in the format "
+            + "projects/<project_id>/topics/<topic_name>");
+        projectName = v1beta1Match.group(1);
+        topicName = v1beta1Match.group(2);
+      } else {
+        Matcher match = TOPIC_REGEXP.matcher(path);
+        if (!match.matches()) {
+          throw new IllegalArgumentException(
+              "Pubsub topic is not in projects/<project_id>/topics/<topic_name> format: "
+              + path);
+        }
+        projectName = match.group(1);
+        topicName = match.group(2);
       }
 
-      if (name.startsWith("goog")) {
-        throw new IllegalArgumentException(
-            "Pubsub object name cannot start with goog: " + name);
+      validateProjectName(projectName);
+      validatePubsubName(topicName);
+      return new PubsubTopic(Type.NORMAL, projectName, topicName);
+    }
+
+    public String asV1Beta1Path() {
+      if (type == Type.NORMAL) {
+        return "/topics/" + project + "/" + topic;
+      } else {
+        return topic;
       }
+    }
 
-      Matcher match = PUBSUB_NAME_REGEXP.matcher(name);
-      if (!match.matches()) {
-        throw new IllegalArgumentException(
-            "Illegal Pubsub object name specified: " + name
-            + " Please see Javadoc for naming rules.");
+    public String asV1Beta2Path() {
+      if (type == Type.NORMAL) {
+        return "projects/" + project + "/topics/" + topic;
+      } else {
+        return topic;
       }
     }
   }
@@ -133,6 +256,11 @@ private static void validatePubsubName(String name) {
    * A {@link PTransform} that continuously reads from a Pubsub stream and
    * returns a {@code PCollection<String>} containing the items from
    * the stream.
+   *
+   * <p> When running with a runner that only supports bounded {@code PCollection}s
+   * (such as DirectPipelineRunner or DataflowPipelineRunner without --streaming), only a
+   * bounded portion of the input Pubsub stream can be processed.  As such, either
+   * {@link Bound#maxNumRecords} or {@link Bound#maxReadTime} must be set.
    */
   public static class Read {
     public static Bound<String> named(String name) {
@@ -202,11 +330,6 @@ public static Bound<String> subscription(String subscription) {
      * Any late data will be handled by the trigger specified with the windowing strategy -- by
      * default it will be output immediately.
      *
-     * <p> The {#dropLateData} field allows you to control what to do with late data. This relaxes
-     * the semantics of {@code GroupByKey}; see
-     * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} for additional information on
-     * late data and windowing.
-     *
      * <p> Note that the system can guarantee that no late data will ever be seen when it assigns
      * timestamps by arrival time (i.e. {@code timestampLabel} is not provided).
      */
@@ -242,6 +365,26 @@ public static <T> Bound<T> withCoder(Coder<T> coder) {
       return new Bound<>(coder);
     }
 
+    /**
+     * Sets the maximum number of records that will be read from Pubsub.
+     *
+     * <p> Either this or {@link #maxReadTime} must be set for use as a bounded
+     * {@code PCollection}.
+     */
+    public static Bound<String> maxNumRecords(int maxNumRecords) {
+      return new Bound<>(DEFAULT_PUBSUB_CODER).maxNumRecords(maxNumRecords);
+    }
+
+    /**
+     * Sets the maximum duration during which records will be read from Pubsub.
+     *
+     * <p> Either this or {@link #maxNumRecords} must be set for use as a bounded
+     * {@code PCollection}.
+     */
+    public static Bound<String> maxReadTime(Duration maxReadTime) {
+      return new Bound<>(DEFAULT_PUBSUB_CODER).maxReadTime(maxReadTime);
+    }
+
     /**
      * A {@link PTransform} that reads from a PubSub source and returns
      * a unbounded PCollection containing the items from the stream.
@@ -249,9 +392,9 @@ public static <T> Bound<T> withCoder(Coder<T> coder) {
     @SuppressWarnings("serial")
     public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
       /** The Pubsub topic to read from. */
-      String topic;
+      PubsubTopic topic;
       /** The Pubsub subscription to read from. */
-      String subscription;
+      PubsubSubscription subscription;
       /** The Pubsub label to read timestamps from. */
       String timestampLabel;
       /** The Pubsub label to read ids from. */
@@ -259,25 +402,30 @@ public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
       /** The coder used to decode each record. */
       @Nullable
       final Coder<T> coder;
+      /** Stop after reading this many records. */
+      int maxNumRecords;
+      /** Stop after reading for this much time. */
+      Duration maxReadTime;
 
       Bound(Coder<T> coder) {
         this.coder = coder;
       }
 
-      Bound(String name, String subscription, String topic, String timestampLabel,
-          Coder<T> coder, String idLabel) {
+      Bound(String name, PubsubSubscription subscription, PubsubTopic topic, String timestampLabel,
+          Coder<T> coder, String idLabel,
+          int maxNumRecords, Duration maxReadTime) {
         super(name);
         if (subscription != null) {
-          Validator.validateSubscriptionName(subscription);
+          this.subscription = subscription;
         }
         if (topic != null) {
-          Validator.validateTopicName(topic);
+          this.topic = topic;
         }
-        this.subscription = subscription;
-        this.topic = topic;
         this.timestampLabel = timestampLabel;
         this.coder = coder;
         this.idLabel = idLabel;
+        this.maxNumRecords = maxNumRecords;
+        this.maxReadTime = maxReadTime;
       }
 
       /**
@@ -285,15 +433,21 @@ public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
        * step name. Does not modify the object.
        */
       public Bound<T> named(String name) {
-        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel);
+        return new Bound<>(name, subscription, topic, timestampLabel,
+            coder, idLabel, maxNumRecords, maxReadTime);
       }
 
       /**
        * Returns a new PubsubIO.Read PTransform that's like this one but reading from the
        * given subscription. Does not modify the object.
+       *
+       * <p> Multiple readers reading from the same subscription will each receive
+       * some arbirary portion of the data.  Most likely, separate readers should
+       * use their own subscriptions.
        */
       public Bound<T> subscription(String subscription) {
-        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel);
+        return new Bound<>(name, PubsubSubscription.fromPath(subscription), topic, timestampLabel,
+            coder, idLabel, maxNumRecords, maxReadTime);
       }
 
       /**
@@ -301,7 +455,8 @@ public Bound<T> subscription(String subscription) {
        * give topic. Does not modify the object.
        */
       public Bound<T> topic(String topic) {
-        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel);
+        return new Bound<>(name, subscription, PubsubTopic.fromPath(topic), timestampLabel,
+            coder, idLabel, maxNumRecords, maxReadTime);
       }
 
       /**
@@ -309,7 +464,8 @@ public Bound<T> topic(String topic) {
        * from the given PubSub label. Does not modify the object.
        */
       public Bound<T> timestampLabel(String timestampLabel) {
-        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel);
+        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel,
+            maxNumRecords, maxReadTime);
       }
 
       /**
@@ -317,7 +473,8 @@ public Bound<T> timestampLabel(String timestampLabel) {
        * from the given PubSub label. Does not modify the object.
        */
       public Bound<T> idLabel(String idLabel) {
-        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel);
+        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel,
+            maxNumRecords, maxReadTime);
       }
 
       /**
@@ -329,7 +486,30 @@ public Bound<T> idLabel(String idLabel) {
        * elements of the resulting PCollection.
        */
       public <X> Bound<X> withCoder(Coder<X> coder) {
-        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel);
+        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel,
+            maxNumRecords, maxReadTime);
+      }
+
+      /**
+       * Sets the maximum number of records that will be read from Pubsub.
+       *
+       * <p> Setting either this or {@link #maxNumRecords} will cause the output {@code PCollection}
+       * to be bounded.
+       */
+      public Bound<T> maxNumRecords(int maxNumRecords) {
+        return new Bound<>(name, subscription, topic, timestampLabel,
+            coder, idLabel, maxNumRecords, maxReadTime);
+      }
+
+      /**
+       * Sets the maximum duration during which records will be read from Pubsub.
+       *
+       * <p> Setting either this or {@link #maxNumRecords} will cause the output {@code PCollection}
+       * to be bounded.
+       */
+      public Bound<T> maxReadTime(Duration maxReadTime) {
+        return new Bound<>(name, subscription, topic, timestampLabel,
+            coder, idLabel, maxNumRecords, maxReadTime);
       }
 
       @Override
@@ -344,11 +524,19 @@ public PCollection<T> apply(PInput input) {
               "Can't set both the topic and the subscription for a "
               + "PubsubIO.Read transform");
         }
-        return PCollection.<T>createPrimitiveOutputInternal(
-                input.getPipeline(),
-                WindowingStrategy.globalDefault(),
-                IsBounded.UNBOUNDED)
-            .setCoder(coder);
+
+        boolean boundedOutput = getMaxNumRecords() > 0 || getMaxReadTime() != null;
+
+        if (boundedOutput) {
+          return input.getPipeline().begin()
+              .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
+              .apply(ParDo.of(new PubsubReader()))
+              .setCoder(coder);
+        } else {
+          return PCollection.<T>createPrimitiveOutputInternal(
+                  input.getPipeline(), WindowingStrategy.globalDefault(), IsBounded.UNBOUNDED)
+              .setCoder(coder);
+        }
       }
 
       @Override
@@ -361,11 +549,11 @@ protected String getKindString() {
         return "PubsubIO.Read";
       }
 
-      public String getTopic() {
+      public PubsubTopic getTopic() {
         return topic;
       }
 
-      public String getSubscription() {
+      public PubsubSubscription getSubscription() {
         return subscription;
       }
 
@@ -373,13 +561,106 @@ public String getTimestampLabel() {
         return timestampLabel;
       }
 
+      public Coder<T> getCoder() {
+        return coder;
+      }
+
       public String getIdLabel() {
         return idLabel;
       }
 
-      static {
-        // TODO: Figure out how to make this work under
-        // DirectPipelineRunner.
+      public int getMaxNumRecords() {
+        return maxNumRecords;
+      }
+
+      public Duration getMaxReadTime() {
+        return maxReadTime;
+      }
+
+      private class PubsubReader extends DoFn<Void, T> {
+        private static final long serialVersionUID = 0L;
+
+        @Override
+        public void processElement(ProcessContext c) throws IOException {
+          Pubsub pubsubClient =
+              Transport.newPubsubClient(c.getPipelineOptions().as(DataflowPipelineOptions.class))
+                  .build();
+
+          String subscription = getSubscription().asV1Beta2Path();
+          if (subscription == null) {
+            String topic = getTopic().asV1Beta2Path();
+            String[] split = topic.split("/");
+            subscription = "projects/" + split[1] + "/subscriptions/" + split[3]
+                + "_dataflow_" + new Random().nextLong();
+            Subscription subInfo = new Subscription()
+                .setAckDeadlineSeconds(60)
+                .setTopic(topic);
+            try {
+              pubsubClient.projects().subscriptions().create(subscription, subInfo).execute();
+            } catch (Exception e) {
+              throw new RuntimeException("Failed to create subscription: ", e);
+            }
+          }
+
+          Instant endTime = getMaxReadTime() == null
+              ? new Instant(Long.MAX_VALUE) : Instant.now().plus(getMaxReadTime());
+
+          List<PubsubMessage> messages = new ArrayList<>();
+
+          try {
+            while ((getMaxNumRecords() == 0 || messages.size() < getMaxNumRecords())
+                && Instant.now().isBefore(endTime)) {
+              PullRequest pullRequest = new PullRequest().setReturnImmediately(false);
+              if (getMaxNumRecords() > 0) {
+                pullRequest.setMaxMessages(getMaxNumRecords() - messages.size());
+              }
+
+              PullResponse pullResponse =
+                  pubsubClient.projects().subscriptions().pull(subscription, pullRequest).execute();
+              List<String> ackIds = new ArrayList<>();
+              for (ReceivedMessage received : pullResponse.getReceivedMessages()) {
+                messages.add(received.getMessage());
+                ackIds.add(received.getAckId());
+              }
+
+              if (ackIds.size() != 0) {
+                AcknowledgeRequest ackRequest = new AcknowledgeRequest().setAckIds(ackIds);
+                pubsubClient.projects()
+                    .subscriptions()
+                    .acknowledge(subscription, ackRequest)
+                    .execute();
+              }
+            }
+          } catch (IOException e) {
+            throw new RuntimeException("Unexected exception while reading from Pubsub: ", e);
+          } finally {
+            if (getTopic() != null) {
+              try {
+                pubsubClient.projects().subscriptions().delete(subscription).execute();
+              } catch (IOException e) {
+                throw new RuntimeException("Failed to delete subscription: ", e);
+              }
+            }
+          }
+
+          for (PubsubMessage message : messages) {
+            Instant timestamp;
+            if (getTimestampLabel() == null) {
+              timestamp = Instant.now();
+            } else {
+              if (message.getAttributes() == null
+                  || !message.getAttributes().containsKey(getTimestampLabel())) {
+                throw new RuntimeException(
+                    "Message from pubsub missing timestamp label: " + getTimestampLabel());
+              }
+              timestamp = new Instant(Long.parseLong(
+                      message.getAttributes().get(getTimestampLabel())));
+            }
+            c.outputWithTimestamp(
+                CoderUtils.decodeFromByteArray(getCoder(), message.decodeData()),
+                timestamp);
+          }
+        }
       }
     }
   }
@@ -444,13 +725,13 @@ public static <T> Bound<T> withCoder(Coder<T> coder) {
     }
 
     /**
-     * A {@link PTransform} that writes a unbounded {@code PCollection<String>}
+     * A {@link PTransform} that writes an unbounded {@code PCollection<String>}
      * to a PubSub stream.
      */
     @SuppressWarnings("serial")
     public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       /** The Pubsub topic to publish to. */
-      String topic;
+      PubsubTopic topic;
       String timestampLabel;
       String idLabel;
       final Coder<T> coder;
@@ -459,10 +740,9 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
         this.coder = coder;
       }
 
-      Bound(String name, String topic, String timestampLabel, String idLabel, Coder<T> coder) {
+      Bound(String name, PubsubTopic topic, String timestampLabel, String idLabel, Coder<T> coder) {
         super(name);
         if (topic != null) {
-          Validator.validateTopicName(topic);
           this.topic = topic;
         }
         this.timestampLabel = timestampLabel;
@@ -483,7 +763,7 @@ public Bound<T> named(String name) {
        * topic. Does not modify the object.
        */
       public Bound<T> topic(String topic) {
-        return new Bound<>(name, topic, timestampLabel, idLabel, coder);
+        return new Bound<>(name, PubsubTopic.fromPath(topic), timestampLabel, idLabel, coder);
       }
 
       /**
@@ -520,7 +800,13 @@ public PDone apply(PCollection<T> input) {
           throw new IllegalStateException(
               "need to set the topic of a PubsubIO.Write transform");
         }
-        return PDone.in(input.getPipeline());
+
+        if (input.getPipeline().getOptions().as(StreamingOptions.class).isStreaming()) {
+          return PDone.in(input.getPipeline());
+        } else {
+          input.apply(ParDo.of(new PubsubWriter()));
+          return PDone.in(input.getPipeline());
+        }
       }
 
       @Override
@@ -533,7 +819,7 @@ protected String getKindString() {
         return "PubsubIO.Write";
       }
 
-      public String getTopic() {
+      public PubsubTopic getTopic() {
         return topic;
       }
 
@@ -549,9 +835,54 @@ public Coder<T> getCoder() {
         return coder;
       }
 
-      static {
-        // TODO: Figure out how to make this work under
-        // DirectPipelineRunner.
+      private class PubsubWriter extends DoFn<T, Void> {
+        private static final long serialVersionUID = 0L;
+        private static final int MAX_PUBLISH_BATCH_SIZE = 100;
+        private transient List<PubsubMessage> output;
+        private transient Pubsub pubsubClient;
+
+
+        @Override
+        public void startBundle(Context c) {
+          this.output = new ArrayList<>();
+          this.pubsubClient =
+              Transport.newPubsubClient(c.getPipelineOptions().as(DataflowPipelineOptions.class))
+                  .build();
+        }
+
+        @Override
+        public void processElement(ProcessContext c) throws IOException {
+          PubsubMessage message = new PubsubMessage().encodeData(
+              CoderUtils.encodeToByteArray(getCoder(), c.element()));
+          if (getTimestampLabel() != null) {
+            Map<String, String> attributes = message.getAttributes();
+            if (attributes == null) {
+              attributes = new HashMap<>();
+              message.setAttributes(attributes);
+            }
+            attributes.put(
+                getTimestampLabel(), String.valueOf(c.timestamp().getMillis()));
+          }
+          output.add(message);
+
+          if (output.size() >= MAX_PUBLISH_BATCH_SIZE) {
+            publish();
+          }
+        }
+
+        @Override
+        public void finishBundle(Context c) throws IOException {
+          if (!output.isEmpty()) {
+            publish();
+          }
+        }
+
+        private void publish() throws IOException {
+          PublishRequest publishRequest = new PublishRequest().setMessages(output);
+          pubsubClient.projects().topics()
+              .publish(getTopic().asV1Beta2Path(), publishRequest).execute();
+          output.clear();
+        }
       }
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
index 8fbd64cb3800e..e570ada1b4344 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
@@ -168,6 +168,14 @@ public Dataflow create(PipelineOptions options) {
   boolean getReload();
   void setReload(boolean value);
 
+  /**
+   * Root URL for use with the Pubsub API.
+   */
+  @Description("Root URL for use with the Pubsub API")
+  @Default.String("https://pubsub.googleapis.com")
+  String getPubsubRootUrl();
+  void setPubsubRootUrl(String value);
+
   /**
    * Creates a {@link PathValidator} object using the class specified in
    * {@link #getPathValidatorClass()}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
index 4bd59a0b8d12a..b79a473f24011 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
@@ -42,16 +42,18 @@ private <T> void translateReadHelper(
         PubsubIO.Read.Bound<T> transform,
         TranslationContext context) {
       if (!context.getPipelineOptions().isStreaming()) {
-        throw new IllegalArgumentException("PubsubIO can only be used in streaming mode.");
+        throw new IllegalArgumentException(
+            "Unbounded PubsubIO can only be used in streaming mode.");
       }
 
       context.addStep(transform, "ParallelRead");
       context.addInput(PropertyNames.FORMAT, "pubsub");
       if (transform.getTopic() != null) {
-        context.addInput(PropertyNames.PUBSUB_TOPIC, transform.getTopic());
+        context.addInput(PropertyNames.PUBSUB_TOPIC, transform.getTopic().asV1Beta1Path());
       }
       if (transform.getSubscription() != null) {
-        context.addInput(PropertyNames.PUBSUB_SUBSCRIPTION, transform.getSubscription());
+        context.addInput(
+            PropertyNames.PUBSUB_SUBSCRIPTION, transform.getSubscription().asV1Beta1Path());
       }
       if (transform.getTimestampLabel() != null) {
         context.addInput(PropertyNames.PUBSUB_TIMESTAMP_LABEL, transform.getTimestampLabel());
@@ -83,7 +85,7 @@ private <T> void translateWriteHelper(
 
       context.addStep(transform, "ParallelWrite");
       context.addInput(PropertyNames.FORMAT, "pubsub");
-      context.addInput(PropertyNames.PUBSUB_TOPIC, transform.getTopic());
+      context.addInput(PropertyNames.PUBSUB_TOPIC, transform.getTopic().asV1Beta1Path());
       if (transform.getTimestampLabel() != null) {
         context.addInput(PropertyNames.PUBSUB_TIMESTAMP_LABEL, transform.getTimestampLabel());
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
index 3fd79b60b2681..edcd39eaee11b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
@@ -31,7 +31,6 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.GcsOptions;
-import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.common.base.MoreObjects;
 
 import java.io.IOException;
@@ -92,9 +91,10 @@ public static JsonFactory getJsonFactory() {
    * {@link DataflowPipelineDebugOptions#getApiRootUrl()} option.
    */
   public static Pubsub.Builder
-      newPubsubClient(StreamingOptions options) {
+      newPubsubClient(DataflowPipelineOptions options) {
     return new Pubsub.Builder(getTransport(), getJsonFactory(),
         new RetryHttpRequestInitializer(options.getGcpCredential()))
+        .setRootUrl(options.getPubsubRootUrl())
         .setApplicationName(options.getAppName())
         .setGoogleClientRequestInitializer(
             new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));

From 54369b91c9305cf11c67b7d521e44f64001d25f7 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 9 Jun 2015 14:52:34 -0700
Subject: [PATCH 0626/1541] Cleanup stable name warnings in `mvn clean install`

After this `mvn clean install` doesn't report any stable name warnings.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95577107
---
 .../sdk/runners/DirectPipelineRunner.java     |  2 ++
 .../dataflow/sdk/transforms/Flatten.java      |  2 +-
 .../cloud/dataflow/sdk/io/TextIOTest.java     |  1 -
 .../runners/DataflowPipelineRunnerTest.java   |  2 ++
 .../DataflowPipelineTranslatorTest.java       | 32 +++++++++++--------
 5 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index e7940f1bf7ed2..4e006708098a5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.coders.ListCoder;
 import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.MapAggregatorValues;
@@ -161,6 +162,7 @@ public static DirectPipelineRunner fromOptions(PipelineOptions options) {
    */
   public static DirectPipelineRunner createForTest() {
     DirectPipelineOptions options = PipelineOptionsFactory.as(DirectPipelineOptions.class);
+    options.setStableUniqueNames(CheckEnabled.ERROR);
     options.setGcpCredential(new TestCredential());
     return new DirectPipelineRunner(options);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index 8346a843650b4..fa6f46c3e5e2f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -178,7 +178,7 @@ public PCollection<T> apply(PCollection<Iterable<T>> in) {
       IterableCoder<T> iterableCoder = (IterableCoder<T>) inCoder;
       Coder<T> elemCoder = iterableCoder.getElemCoder();
 
-      return in.apply(ParDo.of(
+      return in.apply(ParDo.named("FlattenIterables").of(
           new DoFn<Iterable<T>, T>() {
             @Override
             public void processElement(ProcessContext c) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
index 9b551032915e5..695595d6abcca 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -342,7 +342,6 @@ public void testUnsupportedFilePattern() throws IOException {
    */
   @Test
   public void testGoodWildcards() throws Exception {
-
     TestDataflowPipelineOptions options = buildTestPipelineOptions();
     options.setGcsUtil(buildMockGcsUtil());
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 9504a16b261d1..3dc80600e1c75 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -39,6 +39,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -95,6 +96,7 @@ private static void assertValidJob(Job job) {
   }
 
   private DataflowPipeline buildDataflowPipeline(DataflowPipelineOptions options) {
+    options.setStableUniqueNames(CheckEnabled.ERROR);
     DataflowPipeline p = DataflowPipeline.create(options);
 
     p.apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/object"))
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 7acc4716536d1..e593d71e68d64 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -472,25 +472,29 @@ public void testGoodWildcards() throws Exception {
     Pipeline pipeline = DataflowPipeline.create(options);
     DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(options);
 
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo/"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo/*"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo/?"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo/[0-9]"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo/*baz*"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo/*baz?"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo/[0-9]baz?"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo/baz/*"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo/baz/*wonka*"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo/*/baz*"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo*/baz"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo?/baz"));
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo[0-9]/baz"));
+    applyRead(pipeline, "gs://bucket/foo");
+    applyRead(pipeline, "gs://bucket/foo/");
+    applyRead(pipeline, "gs://bucket/foo/*");
+    applyRead(pipeline, "gs://bucket/foo/?");
+    applyRead(pipeline, "gs://bucket/foo/[0-9]");
+    applyRead(pipeline, "gs://bucket/foo/*baz*");
+    applyRead(pipeline, "gs://bucket/foo/*baz?");
+    applyRead(pipeline, "gs://bucket/foo/[0-9]baz?");
+    applyRead(pipeline, "gs://bucket/foo/baz/*");
+    applyRead(pipeline, "gs://bucket/foo/baz/*wonka*");
+    applyRead(pipeline, "gs://bucket/foo/*baz/wonka*");
+    applyRead(pipeline, "gs://bucket/foo*/baz");
+    applyRead(pipeline, "gs://bucket/foo?/baz");
+    applyRead(pipeline, "gs://bucket/foo[0-9]/baz");
 
     // Check that translation doesn't fail.
     t.translate(pipeline, Collections.<DataflowPackage>emptyList());
   }
 
+  private void applyRead(Pipeline pipeline, String path) {
+    pipeline.apply("Read(" + path + ")", TextIO.Read.from(path));
+  }
+
   /**
    * Recursive wildcards are not supported.
    * This tests "**".

From a3d2c8ef2702e568b0774cf297dae12e78692f29 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Tue, 9 Jun 2015 17:58:57 -0700
Subject: [PATCH 0627/1541] Update ExamplePubsubTopicOptions to use pubsub
 v1beta2 format

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95594737
---
 .../dataflow/examples/common/ExamplePubsubTopicOptions.java   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
index 9626a30a21ee9..17c1bd284c527 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
@@ -42,8 +42,8 @@ static class PubsubTopicFactory implements DefaultValueFactory<String> {
     public String create(PipelineOptions options) {
       DataflowPipelineOptions dataflowPipelineOptions =
           options.as(DataflowPipelineOptions.class);
-      return "/topics/" + dataflowPipelineOptions.getProject()
-          + "/" + dataflowPipelineOptions.getJobName();
+      return "projects/" + dataflowPipelineOptions.getProject()
+          + "/topics/" + dataflowPipelineOptions.getJobName();
     }
   }
 }

From 32300f9fe2620d3a1bee0dff391c730824a88c40 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 9 Jun 2015 21:01:46 -0700
Subject: [PATCH 0628/1541] Add PCollectionTuple#apply(String, PTransform) and
 improve javadoc

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95603647
---
 .../dataflow/sdk/values/PCollectionTuple.java | 56 +++++++++++--------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
index 1a8182a7ffe25..c6862af7961f9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
@@ -39,7 +39,7 @@
  * {@link com.google.cloud.dataflow.sdk.transforms.ParDo} with side
  * outputs.
  *
- * <p> PCollectionTuples can be created and accessed like follows:
+ * <p> A {@code PCollectionTuple} can be created and accessed like follows:
  * <pre> {@code
  * PCollection<String> pc1 = ...;
  * PCollection<Integer> pc2 = ...;
@@ -74,9 +74,9 @@
  */
 public class PCollectionTuple implements PInput, POutput {
   /**
-   * Returns an empty PCollectionTuple that is part of the given Pipeline.
+   * Returns an empty {@code PCollectionTuple} that is part of the given {@link Pipeline}.
    *
-   * <p> Longer PCollectionTuples can be created by calling
+   * <p> A {@link PCollectionTuple} containing additional elements can be created by calling
    * {@link #and} on the result.
    */
   public static PCollectionTuple empty(Pipeline pipeline) {
@@ -84,10 +84,10 @@ public static PCollectionTuple empty(Pipeline pipeline) {
   }
 
   /**
-   * Returns a singleton PCollectionTuple containing the given
-   * PCollection keyed by the given TupleTag.
+   * Returns a singleton {@link PCollectionTuple} containing the given
+   * {@link PCollection} keyed by the given {@link TupleTag}.
    *
-   * <p> Longer PCollectionTuples can be created by calling
+   * <p> A {@code PCollectionTuple} containing additional elements can be created by calling
    * {@link #and} on the result.
    */
   public static <T> PCollectionTuple of(TupleTag<T> tag, PCollection<T> pc) {
@@ -95,14 +95,15 @@ public static <T> PCollectionTuple of(TupleTag<T> tag, PCollection<T> pc) {
   }
 
   /**
-   * Returns a new PCollectionTuple that has all the PCollections and
-   * tags of this PCollectionTuple plus the given PCollection and tag.
+   * Returns a new {@link PCollectionTuple} that has each {@link PCollection} and
+   * {@link TupleTag} of this {@link PCollectionTuple} plus the given {@link PCollection}
+   * associated with the given {@link TupleTag}.
    *
-   * <p> The given TupleTag should not already be mapped to a
-   * PCollection in this PCollectionTuple.
+   * <p> The given {@link TupleTag} should not already be mapped to a
+   * {@link PCollection} in this {@link PCollectionTuple}.
    *
-   * <p> All the PCollections in the resulting PCollectionTuple must be
-   * part of the same Pipeline.
+   * <p> Each {@link PCollection} in the resulting {@link PCollectionTuple} must be
+   * part of the same {@link Pipeline}.
    */
   public <T> PCollectionTuple and(TupleTag<T> tag, PCollection<T> pc) {
     if (pc.getPipeline() != pipeline) {
@@ -121,7 +122,7 @@ public <T> PCollectionTuple and(TupleTag<T> tag, PCollection<T> pc) {
   }
 
   /**
-   * Returns whether this PCollectionTuple contains a PCollection with
+   * Returns whether this {@link PCollectionTuple} contains a {@link PCollection} with
    * the given tag.
    */
   public <T> boolean has(TupleTag<T> tag) {
@@ -129,9 +130,9 @@ public <T> boolean has(TupleTag<T> tag) {
   }
 
   /**
-   * Returns the PCollection with the given tag in this
-   * PCollectionTuple.  Throws IllegalArgumentException if there is no
-   * such PCollection, i.e., {@code !has(tag)}.
+   * Returns the {@link PCollection} associated with the given {@link TupleTag}
+   * in this {@link PCollectionTuple}. Throws {@link IllegalArgumentException} if there is no
+   * such {@link PCollection}, i.e., {@code !has(tag)}.
    */
   public <T> PCollection<T> get(TupleTag<T> tag) {
     @SuppressWarnings("unchecked")
@@ -144,22 +145,33 @@ public <T> PCollection<T> get(TupleTag<T> tag) {
   }
 
   /**
-   * Returns an immutable Map from TupleTag to corresponding
-   * PCollection, for all the members of this PCollectionTuple.
+   * Returns an immutable Map from {@link TupleTag} to corresponding
+   * {@link PCollection}, for all the members of this {@link PCollectionTuple}.
    */
   public Map<TupleTag<?>, PCollection<?>> getAll() {
     return pcollectionMap;
   }
 
   /**
-   * Applies the given PTransform to this input PCollectionTuple, and
-   * returns the PTransform's Output.
+   * Like {@link #apply(String, PTransform)} but defaulting to the name
+   * of the {@link PTransform}.
    */
   public <OutputT extends POutput> OutputT apply(
       PTransform<PCollectionTuple, OutputT> t) {
     return Pipeline.applyTransform(this, t);
   }
 
+  /**
+   * Applies the given {@code PTransform} to this input {@code PCollectionTuple},
+   * using {@code name} to identify this specific application of the transform.
+   * This name is used in various places, including the monitoring UI, logging,
+   * and to stably identify this application node in the job graph.
+   */
+  public <OutputT extends POutput> OutputT apply(
+      String name, PTransform<PCollectionTuple, OutputT> t) {
+    return Pipeline.applyTransform(name, this, t);
+  }
+
 
   /////////////////////////////////////////////////////////////////////////////
   // Internal details below here.
@@ -178,8 +190,8 @@ public <OutputT extends POutput> OutputT apply(
   }
 
   /**
-   * Returns a PCollectionTuple with each of the given tags mapping to a new
-   * output PCollection.
+   * Returns a {@link PCollectionTuple} with each of the given tags mapping to a new
+   * output {@link PCollection}.
    *
    * <p> For use by primitive transformations only.
    */

From 274bb7c404dafa48f1bb099907ac2d1d6744d013 Mon Sep 17 00:00:00 2001
From: chernyak <chernyak@google.com>
Date: Tue, 9 Jun 2015 23:33:30 -0700
Subject: [PATCH 0629/1541] Proto changes for dependent system watermarks and
 dependent realtime timers.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95610131
---
 sdk/src/main/proto/windmill.proto | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index 54b5ae09b0bd9..7c9b868d652c1 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -41,6 +41,7 @@ message Timer {
   enum Type {
     WATERMARK = 0;
     REALTIME = 1;
+    DEPENDENT_REALTIME = 2;
   }
   optional Type type = 3 [default = WATERMARK];
 }

From c641ae3abb2e27de2527b645912af1a92a6ceb6e Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 10 Jun 2015 10:25:58 -0700
Subject: [PATCH 0630/1541] Unflake DataflowWorkProgressUpdaterTest and minor
 cleanup

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95648358
---
 .../common/worker/WorkProgressUpdater.java    | 25 ++++-----
 .../DataflowWorkProgressUpdaterTest.java      | 51 +++++++++----------
 2 files changed, 36 insertions(+), 40 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
index 40a841838610b..93fd325eb4ad2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
@@ -38,28 +38,28 @@
  * controlled by the worker service through reporting interval hints
  * sent back in the update response messages.  To avoid update storms
  * and monitoring staleness, the interval between two consecutive
- * updates is also bound by {@link #DEFAULT_MIN_REPORTING_INTERVAL_MILLIS} and
- * {@link #DEFAULT_MAX_REPORTING_INTERVAL_MILLIS}.
+ * updates is also bound by {@link #getMinReportingInterval} and
+ * {@link #getMaxReportingInterval}.
  */
 @NotThreadSafe
 public abstract class WorkProgressUpdater {
   private static final Logger LOG = LoggerFactory.getLogger(WorkProgressUpdater.class);
 
-  /** The default lease duration to request from the external worker service. */
+  /** The default lease duration to request from the external worker service (3 minutes). */
   public static final long DEFAULT_LEASE_DURATION_MILLIS = 3 * 60 * 1000;
 
-  /** The lease renewal RPC latency margin. */
+  /** The lease renewal RPC latency margin (5 seconds). */
   private static final long DEFAULT_LEASE_RENEWAL_LATENCY_MARGIN = 5000;
 
   /**
    * The minimum period between two consecutive progress updates. Ensures the
-   * {@link WorkProgressUpdater} does not generate update storms.
+   * {@link WorkProgressUpdater} does not generate update storms (5 seconds).
    */
   private static final long DEFAULT_MIN_REPORTING_INTERVAL_MILLIS = 5000;
 
   /**
    * The maximum period between two consecutive progress updates. Ensures the
-   * {@link WorkProgressUpdater} does not cause monitoring staleness.
+   * {@link WorkProgressUpdater} does not cause monitoring staleness (10 minutes).
    */
   private static final long DEFAULT_MAX_REPORTING_INTERVAL_MILLIS = 10 * 60 * 1000;
 
@@ -130,10 +130,10 @@ public void stopReportingProgress() throws Exception {
 
   /**
    * Computes the time before sending the next work progress update making sure
-   * that it falls between the [{@link #DEFAULT_MIN_REPORTING_INTERVAL_MILLIS},
-   * {@link #DEFAULT_MAX_REPORTING_INTERVAL_MILLIS}) interval. Makes an attempt to bound
+   * that it falls between the [{@link #getMinReportingInterval},
+   * {@link #getMaxReportingInterval}] interval. Makes an attempt to bound
    * the result by the remaining lease time, with an RPC latency margin of
-   * {@link #DEFAULT_LEASE_RENEWAL_LATENCY_MARGIN}.
+   * {@link #getLeaseRenewalLatencyMargin}.
    *
    * @param suggestedInterval the suggested progress report interval
    * @param leaseRemainingTime milliseconds left before the work lease expires
@@ -141,9 +141,6 @@ public void stopReportingProgress() throws Exception {
    */
   protected final long nextProgressReportInterval(
       long suggestedInterval, long leaseRemainingTime) {
-    // Sanitize input in case we get a negative suggested time interval.
-    suggestedInterval = Math.max(0, suggestedInterval);
-
     // Try to send the next progress update before the next lease expiration
     // allowing some RPC latency margin.
     suggestedInterval =
@@ -224,10 +221,10 @@ public Reader.DynamicSplitResult getDynamicSplitResultToReport() {
   protected abstract long getWorkUnitLeaseExpirationTimestamp();
 
   /**
-   * Returns the current work item's suggested progress Reporting interval.
+   * Returns the current work item's suggested progress reporting interval.
    */
   protected long getWorkUnitSuggestedReportingInterval() {
-    return getWorkUnitLeaseExpirationTimestamp() / 2;
+    return leaseRemainingTime(getWorkUnitLeaseExpirationTimestamp()) / 2;
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
index 2b4a48322e27a..c34bbf7e84657 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -22,7 +22,6 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
 import static com.google.cloud.dataflow.sdk.util.CloudCounterUtils.extractCounter;
 import static com.google.cloud.dataflow.sdk.util.CloudMetricUtils.extractCloudMetric;
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudDuration;
@@ -36,6 +35,8 @@
 import static org.mockito.Mockito.argThat;
 import static org.mockito.Mockito.timeout;
 import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.verifyZeroInteractions;
 import static org.mockito.Mockito.when;
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
@@ -243,7 +244,7 @@ public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
 
     setUpProgress(approximateProgressAtIndex(4L));
 
-    // The forth update should not respect the suggested report interval.
+    // The fourth update should not respect the suggested report interval.
     // It should be sent before the lease expires
     verify(workUnitClient, timeout(900))
         .reportWorkItemStatus(argThat(
@@ -257,44 +258,42 @@ public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
 
   // Verifies that a last update is sent when there is an unacknowledged split request.
   @Test(timeout = 2000)
-  public void workProgressUpdaterLastUpdate() throws Exception {
+  public void workProgressUpdaterSendsLastPendingUpdateWhenStopped() throws Exception {
+    // The setup process sends one update after 300ms. Enqueue another that should be scheduled
+    // 1000ms after that.
     when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
-        .thenReturn(generateServiceState(nowMillis + 2000, 1000, positionAtIndex(2L), 2L))
-        .thenReturn(generateServiceState(nowMillis + 3000, 2000, null, 3L));
+        .thenReturn(generateServiceState(nowMillis + 2000, 1000, positionAtIndex(2L), 2L));
 
     setUpProgress(approximateProgressAtIndex(1L));
     progressUpdater.startReportingProgress();
+
     // The initial update should be sent after 300 msec.
-    Thread.sleep(200);
-    verify(workUnitClient, timeout(200))
+    Thread.sleep(50);
+    verifyZeroInteractions(workUnitClient);
+
+    verify(workUnitClient, timeout(350))
         .reportWorkItemStatus(argThat(
             new ExpectedDataflowWorkItemStatus().withProgress(approximateProgressAtIndex(1L))));
 
-    // The first update should include the new dynamic split result.
-    // Verify that the progressUpdater has recorded it.
-    Reader.DynamicSplitResultWithPosition splitResult =
-        (Reader.DynamicSplitResultWithPosition) progressUpdater.getDynamicSplitResultToReport();
-    assertEquals(positionAtIndex(2L), toCloudPosition(splitResult.getAcceptedPosition()));
-
-    setUpProgress(approximateProgressAtIndex(2L));
-    // The second update should be sent after one second.
+    // The second update should be scheduled to happen after one second.
 
-    // Not enough time for an update so the latest split result is not acknowledged.
-    Thread.sleep(200);
+    // not immediately
+    verifyNoMoreInteractions(workUnitClient);
 
-    // Check that the progressUpdater still has a pending split result to send
-    splitResult = (Reader.DynamicSplitResultWithPosition)
-        progressUpdater.getDynamicSplitResultToReport();
-    assertEquals(positionAtIndex(2L), toCloudPosition(splitResult.getAcceptedPosition()));
+    // still not yet after 50ms
+    Thread.sleep(50);
+    verifyNoMoreInteractions(workUnitClient);
 
-    progressUpdater.stopReportingProgress(); // Should send the last update.
-    // Check that the progressUpdater is done with reporting its latest split result.
-    assertNull(progressUpdater.getDynamicSplitResultToReport());
+    // Stop the progressUpdater now, and expect the last update immediately
+    progressUpdater.stopReportingProgress();
 
-    // Verify that the last update contained the latest split result.
-    verify(workUnitClient, timeout(1000))
+    // Verify that the last update is sent immediately and contained the latest split result.
+    verify(workUnitClient)
         .reportWorkItemStatus(argThat(
             new ExpectedDataflowWorkItemStatus().withDynamicSplitAtPosition(positionAtIndex(2L))));
+
+    // And nothing happened after that.
+    verifyNoMoreInteractions(workUnitClient);
   }
 
   private void setUpCounters(int n) {

From e34b200feb02e23e938659ae4dcd2c141dcfceea Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 10 Jun 2015 15:08:09 -0700
Subject: [PATCH 0631/1541] Improve View.asMap docs, interface, and examples

----Release Notes----
View.asMap() now more simply converts a PCollection<KV<K, V>> into a Map<K, V>. To obtain the prior behavior of a Map<K, Iterable<V>>, use View.asMultimap().

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95678827
---
 .../dataflow/sdk/testing/DataflowAssert.java  |   6 +-
 .../cloud/dataflow/sdk/transforms/View.java   | 256 +++++++++++++-----
 .../dataflow/sdk/transforms/ViewTest.java     |  12 +-
 3 files changed, 199 insertions(+), 75 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index aa278484b13bf..3e4c12b18bdc4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -123,7 +123,9 @@ public static <T> IterableAssert<T> that(PCollection<T> actual) {
     List<? extends Coder<?>> maybeElementCoder = actual.getCoder().getCoderArguments();
     Coder<T> tCoder;
     try {
-      tCoder = (Coder<T>) Iterables.getOnlyElement(maybeElementCoder);
+      @SuppressWarnings("unchecked")
+      Coder<T> tCoderTmp = (Coder<T>) Iterables.getOnlyElement(maybeElementCoder);
+      tCoder = tCoderTmp;
     } catch (NoSuchElementException | IllegalArgumentException exc) {
       throw new IllegalArgumentException(
         "DataflowAssert.<T>thatSingletonIterable requires a PCollection<Iterable<T>>"
@@ -172,7 +174,7 @@ public static <T> SingletonAssert<T> thatSingleton(PCollection<T> actual) {
 
     return new SingletonAssert<>(
         new CreateActual<KV<K, V>, Map<K, Iterable<V>>>(
-            actual, View.<K, V>asMap()), actual.getPipeline())
+            actual, View.<K, V>asMultimap()), actual.getPipeline())
         .setCoder(MapCoder.of(kvCoder.getKeyCoder(), IterableCoder.of(kvCoder.getValueCoder())));
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index 0d74845d95cd0..b4bfa89fb95be 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -66,18 +66,30 @@
  * }
  * </pre>
  *
- * <p> To iterate over an entire window of a {@link PCollection} via
- * side input, use {@link View#asIterable()}:
+ * <p> For a small {@link PCollection} that can fit entirely in memory,
+ * use {@link View#asList()} to prepare it for use as a {@code List}.
+ * When read as a side input, the entire list will be cached in memory.
  *
  * <pre>
  * {@code
- * PCollectionView<Iterable<T>> output =
- *     somePCollection.apply(View.asIterable());
+ * PCollectionView<List<T>> output =
+ *    smallPCollection.apply(View.asList());
  * }
  * </pre>
  *
- * <p> To access a {@link PCollection} of {@code KV<K, V>} as a
- * {@code Map<K, Iterable<V>>} side input, use {@link View#asMap()}:
+ * <p> If a {@link PCollection} of {@code KV<K, V>} is known to
+ * have a single value for each key, then use {@link View#asMap()}
+ * to view it as a {@code Map<K, V>}:
+ *
+ * <pre>
+ * {@code
+ * PCollectionView<Map<K, V> output =
+ *     somePCollection.apply(View.asMap());
+ * }
+ * </pre>
+ *
+ * <p> Otherwise, to access a {@link PCollection} of {@code KV<K, V>} as a
+ * {@code Map<K, Iterable<V>>} side input, use {@link View#asMultimap()}:
  *
  * <pre>
  * {@code
@@ -86,18 +98,46 @@
  * }
  * </pre>
  *
- * <p> If a {@link PCollection} of {@code KV<K, V>} is known to
- * have a single value for each key, then use
- * {@code View.AsMultimap#withSingletonValues View.asMap().withSingletonValues()}
- * to view it as a {@code Map<K, V>}:
+ * <p> To iterate over an entire window of a {@link PCollection} via
+ * side input, use {@link View#asIterable()}:
  *
  * <pre>
  * {@code
- * PCollectionView<Map<K, V> output =
- *     somePCollection.apply(View.asMap().withSingletonValues());
+ * PCollectionView<Iterable<T>> output =
+ *     somePCollection.apply(View.asIterable());
  * }
  * </pre>
  *
+ *
+ * <p> Both {@link View#asMultimap()} and {@link View#asMap()} are useful
+ * for implementing lookup based "joins" with the main input, when the
+ * side input is small enough to fit into memory.
+ *
+ * <p> For example, if you represent a page on a website via some {@code Page} object and
+ * have some type {@code UrlVisits} logging that a URL was visited, you could convert these
+ * to more fully structured {@code PageVisit} objects using a side input, something like the
+ * following:
+ *
+ * <pre>
+ * {@code
+ * PCollection<Page> pages = ... // pages fit into memory
+ * PCollection<UrlVisit> urlVisits = ... // very large collection
+ * final PCollectionView<Map<URL, Page>> = urlToPage
+ *     .apply(WithKeys.of( ... ) ) // extract the URL from the page
+ *     .apply(View.asMap());
+ *
+ * PCollection PageVisits = urlVisits
+ *     .apply(ParDo.withSideInputs(urlToPage)
+ *         .of(new DoFn<UrlVisit, PageVisit>() {
+ *             @Override
+ *             void processElement(ProcessContext context) {
+ *               UrlVisit urlVisit = context.element();
+ *               Page page = urlToPage.get(urlVisit.getUrl());
+ *               c.output(new PageVisit(page, urlVisit.getVisitData()));
+ *             }
+ *         }));
+ * }</pre>
+ *
  * <p> See {@link ParDo#withSideInputs} for details on how to access
  * this variable inside a {@link ParDo} over another {@link PCollection}.
  */
@@ -111,6 +151,15 @@ private View() { }
    * {@link PCollection} as input and produces a {@link PCollectionView}
    * of the single value, to be consumed as a side input.
    *
+   * <pre>
+   * {@code
+   * PCollection<InputT> input = ...
+   * CombineFn<InputT, OutputT> yourCombineFn = ...
+   * PCollectionView<OutputT> output = input
+   *     .apply(Combine.globally(yourCombineFn))
+   *     .apply(View.asSingleton());
+   * }</pre>
+   *
    * <p> If the input {@link PCollection} is empty,
    * throws {@link NoSuchElementException} in the consuming
    * {@link DoFn}.
@@ -146,13 +195,43 @@ public static <T> AsIterable<T> asIterable() {
   }
 
   /**
-   * Returns an {@link AsMultimap} that takes a {@link PCollection} as input
+   * Returns an {@link AsMap} transform that takes a {@link PCollection} as input
    * and produces a {@link PCollectionView} of the values to be consumed
-   * as a {@code Map<K, Iterable<V>>} side input.
+   * as a {@code Map<K, V>} side input. It is required that each key of the input be
+   * associated with a single value. If this is not the case, precede this
+   * view with {@code Combine.perKey}, as below, or alternatively use {@link View#asMultimap()}.
+   *
+   * <pre>
+   * {@code
+   * PCollection<KV<K, V>> input = ...
+   * CombineFn<V, OutputT> yourCombineFn = ...
+   * PCollectionView<Map<K, OutputT>> output = input
+   *     .apply(Combine.perKey(yourCombineFn.<K>asKeyedFn()))
+   *     .apply(View.asMap());
+   * }</pre>
    *
    * <p> Currently, the resulting map is required to fit into memory.
    */
-  public static <K, V> AsMultimap<K, V> asMap() {
+  public static <K, V> AsMap<K, V> asMap() {
+    return new AsMap<K, V>();
+  }
+
+  /**
+   * Returns an {@link AsMultimap} transform that takes a {@link PCollection}
+   * of {@code KV<K, V>} pairs as input and produces a {@link PCollectionView} of
+   * its contents as a {@code Map<K, Iterable<V>>} for use as a side input.
+   * In contrast to {@link View#asMap()}, it is not required that the keys in the
+   * input collection be unique.
+   *
+   * <pre>
+   * {@code
+   * PCollection<KV<K, V>> input = ... // maybe more than one occurrence of a some keys
+   * PCollectionView<Map<K, V>> output = input.apply(View.asMultimap());
+   * }</pre>
+   *
+   * <p> Currently, the resulting map is required to fit into memory.
+   */
+  public static <K, V> AsMultimap<K, V> asMultimap() {
     return new AsMultimap<K, V>();
   }
 
@@ -172,8 +251,15 @@ private AsIterable() { }
     public PCollectionView<Iterable<T>> apply(
         PCollection<T> input) {
       if (input.getPipeline().getOptions().as(StreamingOptions.class).isStreaming()) {
-        return input.apply((Combine.GloballyAsSingletonView<T, Iterable<T>>)
-            Combine.globally(new Concatenate()).asSingletonView());
+
+        // safe covariant cast List<T> -> Iterable<T>
+        // not expressible in java, even with unchecked casts
+        @SuppressWarnings({"rawtypes", "unchecked"})
+        Combine.GloballyAsSingletonView<T, Iterable<T>> concatAndView =
+            (Combine.GloballyAsSingletonView)
+            Combine.globally(new Concatenate<T>()).asSingletonView();;
+
+        return input.apply(concatAndView);
       } else {
         return input.apply(
             new CreatePCollectionView<T, Iterable<T>>(
@@ -189,8 +275,7 @@ public PCollectionView<Iterable<T>> apply(
    *
    * <p> Instantiate via {@link View#asIterable}.
    */
-  public static class AsSingleton<T>
-      extends PTransform<PCollection<T>, PCollectionView<T>> {
+  public static class AsSingleton<T> extends PTransform<PCollection<T>, PCollectionView<T>> {
     private static final long serialVersionUID = 0;
     private final T defaultValue;
     private final boolean hasDefault;
@@ -249,25 +334,6 @@ public static class AsMultimap<K, V>
 
     private AsMultimap() { }
 
-    /**
-     * Returns a PTransform creating a view as a {@code Map<K, V>} rather than a
-     * {@code Map<K, Iterable<V>>}. Requires that the PCollection have only
-     * one value per key.
-     */
-    public AsSingletonMap<K, V, V> withSingletonValues() {
-      return new AsSingletonMap<K, V, V>(null);
-    }
-
-    /**
-     * Returns a PTransform creating a view as a {@code Map<K, OutputT>} rather than a
-     * {@code Map<K, Iterable<V>>} by applying the given combiner to the set of
-     * values associated with each key.
-     */
-    public <OutputT> AsSingletonMap<K, V, OutputT>
-        withCombiner(CombineFn<V, ?, OutputT> combineFn) {
-      return new AsSingletonMap<K, V, OutputT>(combineFn);
-    }
-
     @Override
     public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
       MultimapPCollectionView<K, V> view = new MultimapPCollectionView<K, V>(
@@ -289,42 +355,50 @@ public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
 
   /**
    * A {@link PTransform} that produces a {@link PCollectionView} of a keyed {@link PCollection}
-   * yielding a map of keys to a single associated values.
+   * yielding a map from each key to its unique associated value. When converting
+   * a {@link PCollection} that has more than one value per key, precede this transform with a
+   * {@code Combine.perKey}:
+   *
+   * <pre>
+   * {@code
+   * PCollectionView<Map<K, OutputT>> input
+   *     .apply(Combine.perKey(myCombineFunction))
+   *     .apply(View.asMap());
+   * }</pre>
    *
    * <p> Instantiate via {@link View#asMap}.
    */
-  public static class AsSingletonMap<K, InputT, OutputT>
-      extends PTransform<PCollection<KV<K, InputT>>, PCollectionView<Map<K, OutputT>>> {
+  public static class AsMap<K, V>
+      extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, V>>> {
     private static final long serialVersionUID = 0;
 
-    private CombineFn<InputT, ?, OutputT> combineFn;
+    private AsMap() { }
 
-    private AsSingletonMap(CombineFn<InputT, ?, OutputT> combineFn) {
-      this.combineFn = combineFn;
+    /**
+     * @deprecated this method simply returns this AsMap unmodified
+     */
+    @Deprecated()
+    public AsMap<K, V> withSingletonValues() {
+      return this;
     }
 
     @Override
-    public PCollectionView<Map<K, OutputT>> apply(PCollection<KV<K, InputT>> input) {
-      // InputT == OutputT if combineFn is null
+    public PCollectionView<Map<K, V>> apply(PCollection<KV<K, V>> input) {
       @SuppressWarnings("unchecked")
-      PCollection<KV<K, OutputT>> combined =
-        combineFn == null
-        ? (PCollection) input
-        : input.apply(Combine.perKey(combineFn.<K>asKeyedFn()));
 
-      MapPCollectionView<K, OutputT> view = new MapPCollectionView<K, OutputT>(
-          input.getPipeline(), combined.getWindowingStrategy(), combined.getCoder());
+      MapPCollectionView<K, V> view = new MapPCollectionView<K, V>(
+          input.getPipeline(), input.getWindowingStrategy(), input.getCoder());
 
-      CreatePCollectionView<KV<K, OutputT>, Map<K, OutputT>> createView =
+      CreatePCollectionView<KV<K, V>, Map<K, V>> createView =
           new CreatePCollectionView<>(view);
 
-      if (combined.getPipeline().getOptions().as(StreamingOptions.class).isStreaming()) {
-        return combined
-            .apply(Combine.globally(new Concatenate<KV<K, OutputT>>()).withoutDefaults())
-            .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, combined.getCoder())))
+      if (input.getPipeline().getOptions().as(StreamingOptions.class).isStreaming()) {
+        return input
+            .apply(Combine.globally(new Concatenate<KV<K, V>>()).withoutDefaults())
+            .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, input.getCoder())))
             .apply(createView);
       } else {
-        return combined.apply(createView);
+        return input.apply(createView);
       }
     }
   }
@@ -380,7 +454,7 @@ public Coder<List<T>> getDefaultOutputCoder(CoderRegistry registry, Coder<T> inp
   /**
    * Creates a primitive {@link PCollectionView}.
    *
-   * <p> For internal use only.
+   * <p> For internal use only by runner implementors.
    *
    * @param <ElemT> The type of the elements of the input PCollection
    * @param <ViewT> The type associated with the {@link PCollectionView} used as a side input
@@ -422,8 +496,10 @@ private <ElemT, ViewT> void evaluateTyped(
     }
   }
 
-  private static class SingletonPCollectionView<T>
-      extends PCollectionViewBase<T> {
+  /**
+   * Private implementation of conversion {@code Iterable<WindowedValue<T>>} to {@code T}.
+   */
+  private static class SingletonPCollectionView<T> extends PCollectionViewBase<T> {
     private static final long serialVersionUID = 0;
     private byte[] encodedDefaultValue;
     private transient T defaultValue;
@@ -444,6 +520,9 @@ public SingletonPCollectionView(
       }
     }
 
+    /**
+     * Input iterable must actually be {@code Iterable<WindowedValue<T>>}.
+     */
     @SuppressWarnings("unchecked")
     @Override
     public T fromIterableInternal(Iterable<WindowedValue<?>> contents) {
@@ -471,8 +550,10 @@ public T fromIterableInternal(Iterable<WindowedValue<?>> contents) {
     }
   }
 
-  private static class IterablePCollectionView<T>
-      extends PCollectionViewBase<Iterable<T>> {
+  /**
+   * Private implementation of conversion {@code Iterable<WindowedValue<T>>} to {@code Iterable<T>}.
+   */
+  private static class IterablePCollectionView<T> extends PCollectionViewBase<Iterable<T>> {
     private static final long serialVersionUID = 0;
 
     public IterablePCollectionView(
@@ -480,6 +561,9 @@ public IterablePCollectionView(
       super(pipeline, windowingStrategy, valueCoder);
     }
 
+    /**
+     * Input iterable must actually be {@code Iterable<WindowedValue<T>>}.
+     */
     @Override
     public Iterable<T> fromIterableInternal(Iterable<WindowedValue<?>> contents) {
       return Iterables.transform(contents, new Function<WindowedValue<?>, T>() {
@@ -492,6 +576,10 @@ public T apply(WindowedValue<?> input) {
     }
   }
 
+  /**
+   * Private implementation of conversion {@code Iterable<WindowedValue<KV<K, V>>>}
+   * to {@code Map<K, Iterable<V>>}.
+   */
   private static class MultimapPCollectionView<K, V>
       extends PCollectionViewBase<Map<K, Iterable<V>>> {
     private static final long serialVersionUID = 0;
@@ -501,21 +589,30 @@ public MultimapPCollectionView(
       super(pipeline, windowingStrategy, valueCoder);
     }
 
+    /**
+     * Input iterable must actually be {@code Iterable<WindowedValue<KV<K, V>>>}.
+     */
     @Override
-    @SuppressWarnings("unchecked")
     public Map<K, Iterable<V>> fromIterableInternal(Iterable<WindowedValue<?>> contents) {
       Multimap<K, V> multimap = HashMultimap.create();
       for (WindowedValue<?> elem : contents) {
+        @SuppressWarnings("unchecked")
         KV<K, V> kv = (KV<K, V>) elem.getValue();
         multimap.put(kv.getKey(), kv.getValue());
       }
-      // Don't want to promise in-memory or cheap Collection.size().
-      return (Map) multimap.asMap();
+
+      // Safe covariant cast that Java cannot express without rawtypes, even with unchecked casts
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      Map<K, Iterable<V>> resultMap = (Map) multimap.asMap();
+      return resultMap;
     }
   }
 
-  private static class MapPCollectionView<K, V>
-      extends PCollectionViewBase<Map<K, V>> {
+  /**
+   * Private implementation of conversion {@code Iterable<WindowedValue<KV<K, V>>} to
+   * {@code Map<K, V>}.
+   */
+  private static class MapPCollectionView<K, V> extends PCollectionViewBase<Map<K, V>> {
     private static final long serialVersionUID = 0;
 
     public MapPCollectionView(
@@ -523,6 +620,9 @@ public MapPCollectionView(
       super(pipeline, windowingStrategy, valueCoder);
     }
 
+    /**
+     * Input iterable must actually be {@code Iterable<WindowedValue<KV<K, V>>>}.
+     */
     @Override
     public Map<K, V> fromIterableInternal(Iterable<WindowedValue<?>> contents) {
       Map<K, V> map = new HashMap<>();
@@ -537,6 +637,15 @@ public Map<K, V> fromIterableInternal(Iterable<WindowedValue<?>> contents) {
     }
   }
 
+  /**
+   * Base class for new implementations of side input views.
+   *
+   * <p>To implement a {@code PCollectionView<ViewT>} built from a {@code PCollection<ElemT>},
+   * override {@code fromIterableInternal} with a function from
+   * {@code Iterable<WindowedValue<ElemT>>} to {@code ViewT}.
+   *
+   * <p>This base class provides initialization and getters for a few boilerplate fields.
+   */
   private abstract static class PCollectionViewBase<T>
       extends PValueBase
       implements PCollectionView<T> {
@@ -563,11 +672,22 @@ protected PCollectionViewBase(
               valueCoder, windowingStrategy.getWindowFn().windowCoder()));
     }
 
+    /**
+     * Returns a unique {@link TupleTag} identifying this {@link PCollectionView}.
+     *
+     * <p> For internal use only by runner implementors.
+     */
     @Override
     public TupleTag<Iterable<WindowedValue<?>>> getTagInternal() {
       return tag;
     }
 
+    /**
+     * Returns the {@link WindowingStrategy} of this {@link PCollectionView}, which should
+     * be that of the underlying {@link PCollection}.
+     *
+     * <p> For internal use only by runner implementors.
+     */
     @Override
     public WindowingStrategy<?, ?> getWindowingStrategyInternal() {
       return windowingStrategy;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index f3dda604394ce..4b642f8ae658c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -199,12 +199,12 @@ public void processElement(ProcessContext c) {
 
   @Test
   @Category(RunnableOnService.class)
-  public void testMapSideInput() {
+  public void testMultimapSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
     final PCollectionView<Map<String, Iterable<Integer>>> view = pipeline
         .apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("a", 2), KV.of("b", 3)))
-        .apply(View.<String, Integer>asMap());
+        .apply(View.<String, Integer>asMultimap());
 
     PCollection<KV<String, Integer>> output = pipeline
         .apply("CreateMainInput", Create.of("apple", "banana", "blackberry"))
@@ -227,12 +227,12 @@ public void processElement(ProcessContext c) {
 
   @Test
   @Category(RunnableOnService.class)
-  public void testSingletonMapSideInput() {
+  public void testMapSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
     final PCollectionView<Map<String, Integer>> view = pipeline
         .apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("b", 3)))
-        .apply(View.<String, Integer>asMap().withSingletonValues());
+        .apply(View.<String, Integer>asMap());
 
     PCollection<KV<String, Integer>> output = pipeline
         .apply("CreateMainInput", Create.of("apple", "banana", "blackberry"))
@@ -258,7 +258,9 @@ public void testCombinedMapSideInput() {
 
     final PCollectionView<Map<String, Integer>> view = pipeline
         .apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("a", 20), KV.of("b", 3)))
-        .apply(View.<String, Integer>asMap().withCombiner(new Sum.SumIntegerFn()));
+        .apply("SumIntegers",
+            Combine.perKey(new Sum.SumIntegerFn().<String>asKeyedFn()))
+        .apply(View.<String, Integer>asMap());
 
     PCollection<KV<String, Integer>> output = pipeline
         .apply("CreateMainInput", Create.of("apple", "banana", "blackberry"))

From af7493bc9e93006528a3990d86f3e21fae69530f Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 10 Jun 2015 17:47:52 -0700
Subject: [PATCH 0632/1541] BlockingDataflowPipelineRunner throw exception if
 pipeline fails

If the job was successfully submitted, the thrown exception
wraps the job information.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95693480
---
 .../sdk/runners/AbstractJobException.java     |  41 +++++++
 .../BlockingDataflowPipelineRunner.java       |  31 ++++--
 .../sdk/runners/JobExecutionException.java    |  35 ++++++
 .../sdk/runners/ServiceException.java         |  34 ++++++
 .../testing/TestDataflowPipelineRunner.java   |   8 +-
 .../BlockingDataflowPipelineRunnerTest.java   | 101 ++++++++++++++++++
 6 files changed, 240 insertions(+), 10 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AbstractJobException.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/JobExecutionException.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/ServiceException.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AbstractJobException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AbstractJobException.java
new file mode 100644
index 0000000000000..63c66432e67d4
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AbstractJobException.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import java.util.Objects;
+
+import javax.annotation.Nullable;
+
+/**
+ * A {@link RuntimeException} that contains information about a {@link DataflowPipelineJob}.
+ */
+public abstract class AbstractJobException extends RuntimeException {
+  private static final long serialVersionUID = 0L;
+  private final DataflowPipelineJob job;
+
+  AbstractJobException(DataflowPipelineJob job, String message, @Nullable Throwable cause) {
+    super(message, cause);
+    this.job = Objects.requireNonNull(job);
+  }
+
+  /**
+   * Returns the failed job.
+   */
+  public DataflowPipelineJob getJob() {
+    return job;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
index 3cb530f6b5662..6fe0926343ad0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
@@ -75,6 +75,12 @@ public static BlockingDataflowPipelineRunner fromOptions(
     return new BlockingDataflowPipelineRunner(dataflowPipelineRunner, dataflowOptions);
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @throws JobExecutionException if there is an exception during job execution.
+   * @throws ServiceException if there is an exception retrieving information about the job.
+   */
   @Override
   public DataflowPipelineJob run(Pipeline p) {
     final DataflowPipelineJob job = dataflowPipelineRunner.run(p);
@@ -103,23 +109,30 @@ public void run() {
             BUILTIN_JOB_TIMEOUT_SEC, TimeUnit.SECONDS,
             new MonitoringUtil.PrintHandler(options.getJobMessageOutput()));
       } catch (IOException | InterruptedException ex) {
-        throw new RuntimeException("Exception caught during job execution", ex);
+        LOG.debug("Exception caught while retrieving status for job {}", job.getJobId(), ex);
+        throw new ServiceException(
+            job, "Exception caught while retrieving status for job " + job.getJobId(), ex);
       }
 
       if (result == null) {
-        throw new RuntimeException("No result provided: "
-            + "possible error requesting job status.");
+        throw new ServiceException(
+            job, "Timed out while retrieving status for job " + job.getJobId());
       }
 
       LOG.info("Job finished with status {}", result);
-      if (result.isTerminal()) {
-        return job;
+      if (!result.isTerminal()) {
+        throw new IllegalStateException("Expected terminal state for job " + job.getJobId()
+            + ", got " + result);
       }
 
-      // TODO: introduce an exception that can wrap a JobState,
-      // so that detailed error information can be retrieved.
-      throw new RuntimeException(
-          "Failed to wait for the job to finish. Returned result: " + result);
+      if (result == State.DONE) {
+        return job;
+      } else {
+        // TODO: introduce an exception that can wrap a JobState,
+        // so that detailed error information can be retrieved.
+        throw new JobExecutionException(job, "Job " + job.getJobId()
+            + " finished unsuccessfully with status " + result);
+      }
     } finally {
       Runtime.getRuntime().removeShutdownHook(shutdownHook);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/JobExecutionException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/JobExecutionException.java
new file mode 100644
index 0000000000000..b6df318d373d1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/JobExecutionException.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import javax.annotation.Nullable;
+
+/**
+ * Signals that a job run by a {@link BlockingDataflowPipelineRunner} fails during execution, and
+ * provides access to the failed job.
+ */
+public class JobExecutionException extends AbstractJobException {
+  private static final long serialVersionUID = 0L;
+
+  JobExecutionException(DataflowPipelineJob job, String message) {
+    this(job, message, null);
+  }
+
+  JobExecutionException(DataflowPipelineJob job, String message, @Nullable Throwable cause) {
+    super(job, message, cause);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/ServiceException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/ServiceException.java
new file mode 100644
index 0000000000000..600379cbfc7e9
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/ServiceException.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+import javax.annotation.Nullable;
+
+/**
+ * Signals there was an error retrieving information about a job from the Cloud Dataflow Service.
+ */
+public class ServiceException extends AbstractJobException {
+  private static final long serialVersionUID = 0L;
+
+  ServiceException(DataflowPipelineJob job, String message) {
+    this(job, message, null);
+  }
+
+  ServiceException(DataflowPipelineJob job, String message, @Nullable Throwable cause) {
+    super(job, message, cause);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
index 8516c16f39f33..8519a12932ce8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineJob;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.JobExecutionException;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
@@ -83,7 +84,12 @@ DataflowPipelineJob run(Pipeline pipeline, DataflowPipelineRunner runner) {
 
     final JobMessagesHandler messageHandler =
         new MonitoringUtil.PrintHandler(options.getJobMessageOutput());
-    final DataflowPipelineJob job = runner.run(pipeline);
+    final DataflowPipelineJob job;
+    try {
+      job = runner.run(pipeline);
+    } catch (JobExecutionException ex) {
+      throw new IllegalStateException("The dataflow failed.");
+    }
 
     LOG.info("Running Dataflow job {} with {} expected assertions.",
         job.getJobId(), expectedNumberOfAssertions);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
index fed862d44436b..25c127d377176 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.runners;
 
+import static org.hamcrest.CoreMatchers.equalTo;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
@@ -33,8 +34,13 @@
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
 
+import org.hamcrest.Description;
+import org.hamcrest.Factory;
+import org.hamcrest.Matcher;
+import org.hamcrest.TypeSafeMatcher;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.mockito.invocation.InvocationOnMock;
@@ -50,6 +56,39 @@
 @RunWith(JUnit4.class)
 public class BlockingDataflowPipelineRunnerTest {
   @Rule public ExpectedLogs expectedLogs = ExpectedLogs.none(BlockingDataflowPipelineRunner.class);
+  @Rule public ExpectedException expectedThrown = ExpectedException.none();
+
+  private static class JobIdMatcher<T extends AbstractJobException> extends TypeSafeMatcher<T> {
+
+    private final Matcher<String> matcher;
+
+    public JobIdMatcher(Matcher<String> matcher) {
+        this.matcher = matcher;
+    }
+
+    @Override
+    public boolean matchesSafely(T ex) {
+      return matcher.matches(ex.getJob().getJobId());
+    }
+
+    @Override
+    protected void describeMismatchSafely(T item, Description description) {
+        description.appendText("jobId ");
+        matcher.describeMismatch(item.getMessage(), description);
+    }
+
+    @Override
+    public void describeTo(Description description) {
+      description.appendText("exception with jobId ");
+      description.appendDescriptionOf(matcher);
+    }
+
+    @Factory
+    public static <T extends AbstractJobException> Matcher<T> expectJobId(final String jobId) {
+      return new JobIdMatcher<T>(equalTo(jobId));
+    }
+
+  }
 
   // This class mocks a call to DataflowPipelineJob.waitToFinish():
   //    it blocks the thread to simulate waiting,
@@ -143,6 +182,68 @@ public void run() {
     expectedLogs.verifyInfo("Job finished with status DONE");
   }
 
+  /**
+   * Returns a {@link BlockingDataflowPipelineRunner} that will execute
+   * a mock job with specified jobId and final state.
+   *
+   * @param jobId the id of the mock job.
+   * @param jobState the state of the mock job after the runner waits for it to finish.
+   */
+  private BlockingDataflowPipelineRunner mockBlockingRunnerHelper(String jobId, State jobState)
+      throws IOException, InterruptedException {
+    DataflowPipelineRunner mockDataflowPipelineRunner = mock(DataflowPipelineRunner.class);
+    DataflowPipelineJob mockJob = mock(DataflowPipelineJob.class);
+
+    when(mockJob.waitToFinish(
+        anyLong(), isA(TimeUnit.class), isA(MonitoringUtil.JobMessagesHandler.class)))
+        .thenReturn(jobState);
+    when(mockJob.getJobId()).thenReturn(jobId);
+    when(mockDataflowPipelineRunner.run(isA(Pipeline.class))).thenReturn(mockJob);
+
+    // Construct a BlockingDataflowPipelineRunner with mockDataflowPipelineRunner inside.
+    final BlockingDataflowPipelineRunner blockingRunner =
+        new BlockingDataflowPipelineRunner(
+            mockDataflowPipelineRunner,
+            PipelineOptionsFactory.as(TestDataflowPipelineOptions.class));
+
+    return blockingRunner;
+  }
+
+  @Test
+  public void testFailedJobThrowsException() throws IOException, InterruptedException {
+    final BlockingDataflowPipelineRunner blockingRunner =
+        mockBlockingRunnerHelper("testFailedJob", State.FAILED);
+    expectedThrown.expect(JobExecutionException.class);
+    expectedThrown.expect(JobIdMatcher.expectJobId("testFailedJob"));
+    blockingRunner.run(DirectPipeline.createForTest());
+  }
+
+  @Test
+  public void testCanceledJobThrowsException() throws IOException, InterruptedException {
+    final BlockingDataflowPipelineRunner blockingRunner =
+        mockBlockingRunnerHelper("testCanceledJob", State.CANCELLED);
+    expectedThrown.expect(JobExecutionException.class);
+    expectedThrown.expect(JobIdMatcher.expectJobId("testCanceledJob"));
+    blockingRunner.run(DirectPipeline.createForTest());
+  }
+
+  @Test
+  public void testUnknownJobThrowsServiceException() throws IOException, InterruptedException {
+    final BlockingDataflowPipelineRunner blockingRunner =
+        mockBlockingRunnerHelper("testUnknownJob", State.UNKNOWN);
+    expectedThrown.expect(IllegalStateException.class);
+    blockingRunner.run(DirectPipeline.createForTest());
+  }
+
+  @Test
+  public void testNullJobThrowsServiceException() throws IOException, InterruptedException {
+    final BlockingDataflowPipelineRunner blockingRunner =
+        mockBlockingRunnerHelper("testNullJob", null);
+    expectedThrown.expect(ServiceException.class);
+    expectedThrown.expect(JobIdMatcher.expectJobId("testNullJob"));
+    blockingRunner.run(DirectPipeline.createForTest());
+  }
+
   @Test
   public void testToString() {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);

From 503c07aad9368ad5cb0fa93c7834d348ccc9aca8 Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Wed, 10 Jun 2015 18:11:05 -0700
Subject: [PATCH 0633/1541] Introduces additional source testing utilities

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95695239
---
 .../sdk/io/ByteOffsetBasedSourceTest.java     |  9 ++-
 .../dataflow/sdk/io/FileBasedSourceTest.java  | 79 +++++++++++--------
 .../dataflow/sdk/io/SourceTestUtils.java      | 65 ++++++++++-----
 .../cloud/dataflow/sdk/io/XmlSourceTest.java  | 22 +++---
 .../BasicSerializableSourceFormatTest.java    | 16 ++--
 5 files changed, 118 insertions(+), 73 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
index 94737da2cb4f5..8347917d54611 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
@@ -244,8 +244,8 @@ public void testSplitAtFraction() throws IOException {
 
     Source<Integer> residual = reader.splitAtFraction(reader.getFractionConsumed() + 0.1);
     Source<Integer> primary = reader.getCurrentSource();
-    List<Integer> primaryItems = readFromSource(primary);
-    List<Integer> residualItems = readFromSource(residual);
+    List<Integer> primaryItems = readFromSource(primary, options);
+    List<Integer> residualItems = readFromSource(residual, options);
     for (Integer item : residualItems) {
       assertTrue(item > reader.getCurrentOffset());
     }
@@ -261,12 +261,13 @@ public void testSplitAtFraction() throws IOException {
 
   @Test
   public void testSplitAtFractionExhaustive() throws IOException {
+    PipelineOptions options = PipelineOptionsFactory.create();
     CoarseByteRangeSource original = new CoarseByteRangeSource(13, 35, 1, 10);
-    int maxItems = readFromSource(original).size();
+    int maxItems = readFromSource(original, options).size();
     for (int numItems = 0; numItems <= maxItems; ++numItems) {
       for (double splitFraction = 0.0; splitFraction < 1.1; splitFraction += 0.05) {
         assertSplitAtFractionBehavior(
-            original, numItems, splitFraction, MUST_BE_CONSISTENT_IF_SUCCEEDS);
+            original, numItems, splitFraction, MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
       }
     }
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index cfa587b28f262..0d050005e6aa1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -29,6 +29,7 @@
 import com.google.cloud.dataflow.sdk.io.FileBasedSource.FileBasedReader;
 import com.google.cloud.dataflow.sdk.io.FileBasedSource.Mode;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
@@ -335,17 +336,19 @@ public List<String> createStringDataset(int dataItemLength, int numItems) {
 
   @Test
   public void testFullyReadSingleFile() throws IOException {
+    PipelineOptions options = PipelineOptionsFactory.create();
     List<String> data = createStringDataset(3, 50);
 
     String fileName = "file";
     File file = createFileWithData(fileName, data);
 
     TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 64, null);
-    assertEquals(data, readFromSource(source));
+    assertEquals(data, readFromSource(source, options));
   }
 
   @Test
   public void testFullyReadFilePattern() throws IOException {
+    PipelineOptions options = PipelineOptionsFactory.create();
     List<String> data1 = createStringDataset(3, 50);
     File file1 = createFileWithData("file1", data1);
 
@@ -364,7 +367,7 @@ public void testFullyReadFilePattern() throws IOException {
     expectedResults.addAll(data1);
     expectedResults.addAll(data2);
     expectedResults.addAll(data3);
-    assertThat(expectedResults, containsInAnyOrder(readFromSource(source).toArray()));
+    assertThat(expectedResults, containsInAnyOrder(readFromSource(source, options).toArray()));
   }
 
   @Test
@@ -399,6 +402,7 @@ public void testFractionConsumedWhenReadingFilepattern() throws IOException {
 
   @Test
   public void testFullyReadFilePatternFirstRecordEmpty() throws IOException {
+    PipelineOptions options = PipelineOptionsFactory.create();
     File file1 = createFileWithData("file1", new ArrayList<String>());
 
     IOChannelFactory mockIOFactory = Mockito.mock(IOChannelFactory.class);
@@ -423,11 +427,12 @@ public void testFullyReadFilePatternFirstRecordEmpty() throws IOException {
     List<String> expectedResults = new ArrayList<String>();
     expectedResults.addAll(data2);
     expectedResults.addAll(data3);
-    assertThat(expectedResults, containsInAnyOrder(readFromSource(source).toArray()));
+    assertThat(expectedResults, containsInAnyOrder(readFromSource(source, options).toArray()));
   }
 
   @Test
   public void testReadRangeAtStart() throws IOException {
+    PipelineOptions options = PipelineOptionsFactory.create();
     List<String> data = createStringDataset(3, 50);
 
     String fileName = "file";
@@ -438,13 +443,14 @@ public void testReadRangeAtStart() throws IOException {
         new TestFileBasedSource(file.getPath(), 64, 25, Long.MAX_VALUE, null);
 
     List<String> results = new ArrayList<String>();
-    results.addAll(readFromSource(source1));
-    results.addAll(readFromSource(source2));
+    results.addAll(readFromSource(source1, options));
+    results.addAll(readFromSource(source2, options));
     assertThat(data, containsInAnyOrder(results.toArray()));
   }
 
   @Test
   public void testReadEverythingFromFileWithSplits() throws IOException {
+    PipelineOptions options = PipelineOptionsFactory.create();
     String header = "<h>";
     List<String> data = new ArrayList<>();
     for (int i = 0; i < 10; i++) {
@@ -462,11 +468,12 @@ public void testReadEverythingFromFileWithSplits() throws IOException {
     // Remove all occurrences of header from expected results.
     expectedResults.removeAll(Arrays.asList(header));
 
-    assertEquals(expectedResults, readFromSource(source));
+    assertEquals(expectedResults, readFromSource(source, options));
   }
 
   @Test
   public void testReadRangeFromFileWithSplitsFromStart() throws IOException {
+    PipelineOptions options = PipelineOptionsFactory.create();
     String header = "<h>";
     List<String> data = new ArrayList<>();
     for (int i = 0; i < 10; i++) {
@@ -486,14 +493,15 @@ public void testReadRangeFromFileWithSplitsFromStart() throws IOException {
     expectedResults.removeAll(Arrays.asList(header));
 
     List<String> results = new ArrayList<>();
-    results.addAll(readFromSource(source1));
-    results.addAll(readFromSource(source2));
+    results.addAll(readFromSource(source1, options));
+    results.addAll(readFromSource(source2, options));
 
     assertThat(expectedResults, containsInAnyOrder(results.toArray()));
   }
 
   @Test
   public void testReadRangeFromFileWithSplitsFromMiddle() throws IOException {
+    PipelineOptions options = PipelineOptionsFactory.create();
     String header = "<h>";
     List<String> data = new ArrayList<>();
     for (int i = 0; i < 10; i++) {
@@ -515,15 +523,16 @@ public void testReadRangeFromFileWithSplitsFromMiddle() throws IOException {
     expectedResults.removeAll(Arrays.asList(header));
 
     List<String> results = new ArrayList<>();
-    results.addAll(readFromSource(source1));
-    results.addAll(readFromSource(source2));
-    results.addAll(readFromSource(source3));
+    results.addAll(readFromSource(source1, options));
+    results.addAll(readFromSource(source2, options));
+    results.addAll(readFromSource(source3, options));
 
     assertThat(expectedResults, containsInAnyOrder(results.toArray()));
   }
 
   @Test
   public void testReadFileWithSplitsWithEmptyRange() throws IOException {
+    PipelineOptions options = PipelineOptionsFactory.create();
     String header = "<h>";
     List<String> data = new ArrayList<>();
     for (int i = 0; i < 5; i++) {
@@ -545,15 +554,16 @@ public void testReadFileWithSplitsWithEmptyRange() throws IOException {
     expectedResults.removeAll(Arrays.asList(header));
 
     List<String> results = new ArrayList<>();
-    results.addAll(readFromSource(source1));
-    results.addAll(readFromSource(source2));
-    results.addAll(readFromSource(source3));
+    results.addAll(readFromSource(source1, options));
+    results.addAll(readFromSource(source2, options));
+    results.addAll(readFromSource(source3, options));
 
     assertThat(expectedResults, containsInAnyOrder(results.toArray()));
   }
 
   @Test
   public void testReadRangeFromFileWithSplitsFromMiddleOfHeader() throws IOException {
+    PipelineOptions options = PipelineOptionsFactory.create();
     String header = "<h>";
     List<String> data = new ArrayList<>();
     for (int i = 0; i < 10; i++) {
@@ -571,19 +581,20 @@ public void testReadRangeFromFileWithSplitsFromMiddleOfHeader() throws IOExcepti
     // Split starts after "<" of the header
     TestFileBasedSource source =
         new TestFileBasedSource(file.getPath(), 64, 1, Long.MAX_VALUE, header);
-    assertThat(expectedResults, containsInAnyOrder(readFromSource(source).toArray()));
+    assertThat(expectedResults, containsInAnyOrder(readFromSource(source, options).toArray()));
 
     // Split starts after "<h" of the header
     source = new TestFileBasedSource(file.getPath(), 64, 2, Long.MAX_VALUE, header);
-    assertThat(expectedResults, containsInAnyOrder(readFromSource(source).toArray()));
+    assertThat(expectedResults, containsInAnyOrder(readFromSource(source, options).toArray()));
 
     // Split starts after "<h>" of the header
     source = new TestFileBasedSource(file.getPath(), 64, 3, Long.MAX_VALUE, header);
-    assertThat(expectedResults, containsInAnyOrder(readFromSource(source).toArray()));
+    assertThat(expectedResults, containsInAnyOrder(readFromSource(source, options).toArray()));
   }
 
   @Test
   public void testReadRangeAtMiddle() throws IOException {
+    PipelineOptions options = PipelineOptionsFactory.create();
     List<String> data = createStringDataset(3, 50);
     String fileName = "file";
     File file = createFileWithData(fileName, data);
@@ -594,15 +605,16 @@ public void testReadRangeAtMiddle() throws IOException {
         new TestFileBasedSource(file.getPath(), 64, 72, Long.MAX_VALUE, null);
 
     List<String> results = new ArrayList<>();
-    results.addAll(readFromSource(source1));
-    results.addAll(readFromSource(source2));
-    results.addAll(readFromSource(source3));
+    results.addAll(readFromSource(source1, options));
+    results.addAll(readFromSource(source2, options));
+    results.addAll(readFromSource(source3, options));
 
     assertThat(data, containsInAnyOrder(results.toArray()));
   }
 
   @Test
   public void testReadRangeAtEnd() throws IOException {
+    PipelineOptions options = PipelineOptionsFactory.create();
     List<String> data = createStringDataset(3, 50);
 
     String fileName = "file";
@@ -613,14 +625,15 @@ public void testReadRangeAtEnd() throws IOException {
         new TestFileBasedSource(file.getPath(), 1024, 162, Long.MAX_VALUE, null);
 
     List<String> results = new ArrayList<>();
-    results.addAll(readFromSource(source1));
-    results.addAll(readFromSource(source2));
+    results.addAll(readFromSource(source1, options));
+    results.addAll(readFromSource(source2, options));
 
     assertThat(data, containsInAnyOrder(results.toArray()));
   }
 
   @Test
   public void testReadAllSplitsOfSingleFile() throws Exception {
+    PipelineOptions options = PipelineOptionsFactory.create();
     List<String> data = createStringDataset(3, 50);
 
     String fileName = "file";
@@ -635,7 +648,7 @@ public void testReadAllSplitsOfSingleFile() throws Exception {
 
     List<String> results = new ArrayList<String>();
     for (Source<String> split : sources) {
-      results.addAll(readFromSource(split));
+      results.addAll(readFromSource(split, options));
     }
 
     assertThat(data, containsInAnyOrder(results.toArray()));
@@ -724,6 +737,7 @@ public void testEstimatedSizeOfFilePattern() throws Exception {
 
   @Test
   public void testReadAllSplitsOfFilePattern() throws Exception {
+    PipelineOptions options = PipelineOptionsFactory.create();
     List<String> data1 = createStringDataset(3, 50);
     File file1 = createFileWithData("file1", data1);
 
@@ -745,7 +759,7 @@ public void testReadAllSplitsOfFilePattern() throws Exception {
 
     List<String> results = new ArrayList<String>();
     for (Source<String> split : sources) {
-      results.addAll(readFromSource(split));
+      results.addAll(readFromSource(split, options));
     }
 
     List<String> expectedResults = new ArrayList<String>();
@@ -758,16 +772,17 @@ public void testReadAllSplitsOfFilePattern() throws Exception {
 
   @Test
   public void testSplitAtFraction() throws IOException {
+    PipelineOptions options = PipelineOptionsFactory.create();
     File file = createFileWithData("file", createStringDataset(3, 100));
 
     TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 1, 0, file.length(), null);
-    assertSplitAtFractionSucceedsAndConsistent(source, 0, 0.7);
-    assertSplitAtFractionSucceedsAndConsistent(source, 1, 0.7);
-    assertSplitAtFractionSucceedsAndConsistent(source, 30, 0.7);
-    assertSplitAtFractionFails(source, 0, 0.0);
-    assertSplitAtFractionFails(source, 70, 0.3);
-    assertSplitAtFractionFails(source, 100, 1.0);
-    assertSplitAtFractionFails(source, 100, 0.99);
-    assertSplitAtFractionSucceedsAndConsistent(source, 100, 0.995);
+    assertSplitAtFractionSucceedsAndConsistent(source, 0, 0.7, options);
+    assertSplitAtFractionSucceedsAndConsistent(source, 1, 0.7, options);
+    assertSplitAtFractionSucceedsAndConsistent(source, 30, 0.7, options);
+    assertSplitAtFractionFails(source, 0, 0.0, options);
+    assertSplitAtFractionFails(source, 70, 0.3, options);
+    assertSplitAtFractionFails(source, 100, 1.0, options);
+    assertSplitAtFractionFails(source, 100, 0.99, options);
+    assertSplitAtFractionSucceedsAndConsistent(source, 100, 0.995, options);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
index 60b21bea89b02..06470b0c9ef07 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
@@ -16,12 +16,13 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
+import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -34,9 +35,9 @@ public class SourceTestUtils {
   /**
    * Reads all elements from the given {@link Source}.
    */
-  public static <T> List<T> readFromSource(Source<T> source) throws IOException {
-    return readFromUnstartedReader(source.createReader(
-        PipelineOptionsFactory.create(), null));
+  public static <T> List<T> readFromSource(Source<T> source, PipelineOptions options)
+      throws IOException {
+    return readFromUnstartedReader(source.createReader(options, null));
   }
 
   /**
@@ -68,6 +69,33 @@ public static <T> List<T> readNItemsFromUnstartedReader(Source.Reader<T> reader,
     return res;
   }
 
+  /**
+   * Given a reference {@code Source} and a list of {@code Source}s, assert that the union of
+   * the records read from the list of sources is equal to the records read from the reference
+   * source.
+   */
+  public static <T> void assertSourcesEqualReferenceSource(Source<T> referenceSource,
+      List<? extends Source<T>> sources, PipelineOptions options) throws IOException {
+    List<T> referenceRecords = readFromSource(referenceSource, options);
+    List<T> bundleRecords = new ArrayList<>();
+    for (Source<T> source : sources) {
+      List<T> elems = readFromSource(source, options);
+      bundleRecords.addAll(elems);
+    }
+    assertThat(bundleRecords, containsInAnyOrder(referenceRecords.toArray()));
+  }
+
+  /**
+   * Assert that a {@code Reader} returns a {@code Source} that, when read from, produces the same
+   * records as the reader.
+   */
+  public static <T> void assertUnstartedReaderReadsSameAsItsSource(
+      Source.Reader<T> reader, PipelineOptions options) throws IOException {
+    List<T> expected = readFromUnstartedReader(reader);
+    List<T> actual = readFromUnstartedReader(reader.getCurrentSource().createReader(options, null));
+    assertEquals(expected, actual);
+  }
+
   /**
    * Expected outcome of
    * {@link com.google.cloud.dataflow.sdk.io.BoundedSource.BoundedReader#splitAtFraction}.
@@ -92,11 +120,10 @@ public enum ExpectedSplitOutcome {
    * after reading {@code numItemsToReadBeforeSplit} items, or succeeds in a way that is
    * consistent according to {@link #assertSplitAtFractionSucceedsAndConsistent}.
    */
-  public static <T> void assertSplitAtFractionBehavior(
-      BoundedSource<T> source, int numItemsToReadBeforeSplit, double splitFraction,
-      ExpectedSplitOutcome expectedOutcome) throws IOException {
-    PipelineOptions options = PipelineOptionsFactory.create();
-    List<T> expectedItems = readFromSource(source);
+  public static <T> void assertSplitAtFractionBehavior(BoundedSource<T> source,
+      int numItemsToReadBeforeSplit, double splitFraction, ExpectedSplitOutcome expectedOutcome,
+      PipelineOptions options) throws IOException {
+    List<T> expectedItems = readFromSource(source, options);
     BoundedSource.BoundedReader<T> reader = source.createReader(options, null);
     List<T> currentItems = new ArrayList<>();
     currentItems.addAll(readNItemsFromUnstartedReader(reader, numItemsToReadBeforeSplit));
@@ -117,8 +144,8 @@ public static <T> void assertSplitAtFractionBehavior(
     }
     if (residual != null) {
       BoundedSource<T> primary = reader.getCurrentSource();
-      List<T> primaryItems = readFromSource(primary);
-      List<T> residualItems = readFromSource(residual);
+      List<T> primaryItems = readFromSource(primary, options);
+      List<T> residualItems = readFromSource(residual, options);
       List<T> totalItems = new ArrayList<>();
       totalItems.addAll(primaryItems);
       totalItems.addAll(residualItems);
@@ -153,23 +180,21 @@ public static <T> void assertSplitAtFractionBehavior(
    *   assert: items in original source == items in primary + items in residual
    * </pre>
    */
-  public static <T> void assertSplitAtFractionSucceedsAndConsistent(
-      BoundedSource<T> source, int numItemsToReadBeforeSplit, double splitFraction)
+  public static <T> void assertSplitAtFractionSucceedsAndConsistent(BoundedSource<T> source,
+      int numItemsToReadBeforeSplit, double splitFraction, PipelineOptions options)
       throws IOException {
-    assertSplitAtFractionBehavior(
-        source, numItemsToReadBeforeSplit, splitFraction,
-        ExpectedSplitOutcome.MUST_SUCCEED_AND_BE_CONSISTENT);
+    assertSplitAtFractionBehavior(source, numItemsToReadBeforeSplit, splitFraction,
+        ExpectedSplitOutcome.MUST_SUCCEED_AND_BE_CONSISTENT, options);
   }
 
   /**
    * Asserts that the {@code source}'s reader fails to {@code splitAtFraction(fraction)}
    * after reading {@code numItemsToReadBeforeSplit} items.
    */
-  public static <T> void assertSplitAtFractionFails(
-      BoundedSource<T> source, int numItemsToReadBeforeSplit, double splitFraction)
+  public static <T> void assertSplitAtFractionFails(BoundedSource<T> source,
+      int numItemsToReadBeforeSplit, double splitFraction, PipelineOptions options)
       throws IOException {
     assertSplitAtFractionBehavior(
-        source, numItemsToReadBeforeSplit, splitFraction,
-        ExpectedSplitOutcome.MUST_FAIL);
+        source, numItemsToReadBeforeSplit, splitFraction, ExpectedSplitOutcome.MUST_FAIL, options);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
index 76bbf62170a5b..d113655c240e4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
@@ -25,6 +25,8 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.io.Source.Reader;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -692,6 +694,7 @@ public void testXMLWithSplits() throws Exception {
 
   @Test
   public void testSplitAtFraction() throws Exception {
+    PipelineOptions options = PipelineOptionsFactory.create();
     String fileName = "temp.xml";
     List<Train> trains = generateRandomTrainList(100);
     File file = createRandomTrainXML(fileName, trains);
@@ -707,19 +710,20 @@ public void testSplitAtFraction() throws Exception {
         fileSource.splitIntoBundles(file.length() / 3, null);
     for (BoundedSource<Train> splitSource : splits) {
       int items = readEverythingFromReader(splitSource.createReader(null, null)).size();
-      assertSplitAtFractionSucceedsAndConsistent(splitSource, 0, 0.7);
-      assertSplitAtFractionSucceedsAndConsistent(splitSource, 1, 0.7);
-      assertSplitAtFractionSucceedsAndConsistent(splitSource, 15, 0.7);
-      assertSplitAtFractionFails(splitSource, 0, 0.0);
-      assertSplitAtFractionFails(splitSource, 20, 0.3);
-      assertSplitAtFractionFails(splitSource, items, 1.0);
-      assertSplitAtFractionFails(splitSource, items, 0.9);
-      assertSplitAtFractionSucceedsAndConsistent(splitSource, items, 0.999);
+      assertSplitAtFractionSucceedsAndConsistent(splitSource, 0, 0.7, options);
+      assertSplitAtFractionSucceedsAndConsistent(splitSource, 1, 0.7, options);
+      assertSplitAtFractionSucceedsAndConsistent(splitSource, 15, 0.7, options);
+      assertSplitAtFractionFails(splitSource, 0, 0.0, options);
+      assertSplitAtFractionFails(splitSource, 20, 0.3, options);
+      assertSplitAtFractionFails(splitSource, items, 1.0, options);
+      assertSplitAtFractionFails(splitSource, items, 0.9, options);
+      assertSplitAtFractionSucceedsAndConsistent(splitSource, items, 0.999, options);
     }
   }
 
   @Test
   public void testSplitAtFractionExhaustive() throws Exception {
+    PipelineOptions options = PipelineOptionsFactory.create();
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXMLWithAllFeatures.getBytes(StandardCharsets.UTF_8));
 
@@ -737,7 +741,7 @@ public void testSplitAtFractionExhaustive() throws Exception {
       for (int numItems = 0; numItems <= maxItems; ++numItems) {
         for (double splitFraction = 0.0; splitFraction < 1.1; splitFraction += 0.01) {
           assertSplitAtFractionBehavior(
-              splitSource, numItems, splitFraction, MUST_BE_CONSISTENT_IF_SUCCEEDS);
+              splitSource, numItems, splitFraction, MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
         }
       }
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 607471fa62749..ff6b41aed120f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -312,8 +312,8 @@ public void testRangeProgressAndSplitAtFraction() throws Exception {
       TestIO.Read residual = (TestIO.Read) reader.splitAtFraction(0.5);
       assertNotNull(residual);
       TestIO.Read primary = (TestIO.Read) reader.getCurrentSource();
-      assertThat(readFromSource(primary), contains(10, 11, 12, 13, 14));
-      assertThat(readFromSource(residual), contains(15, 16, 17, 18, 19));
+      assertThat(readFromSource(primary, options), contains(10, 11, 12, 13, 14));
+      assertThat(readFromSource(residual, options), contains(15, 16, 17, 18, 19));
     }
 
     // Range is now [10, 15) and we are at 12.
@@ -321,8 +321,8 @@ public void testRangeProgressAndSplitAtFraction() throws Exception {
       TestIO.Read residual = (TestIO.Read) reader.splitAtFraction(0.8);  // give up 14.
       assertNotNull(residual);
       TestIO.Read primary = (TestIO.Read) reader.getCurrentSource();
-      assertThat(readFromSource(primary), contains(10, 11, 12, 13));
-      assertThat(readFromSource(residual), contains(14));
+      assertThat(readFromSource(primary, options), contains(10, 11, 12, 13));
+      assertThat(readFromSource(residual, options), contains(14));
     }
 
     assertTrue(reader.advance());
@@ -360,15 +360,15 @@ public void testProgressAndSourceSplitTranslation() throws Exception {
         (BasicSerializableSourceFormat.BoundedSourceSplit<Integer>)
             iterator.requestDynamicSplit(splitRequestAtFraction(0.5f));
     assertNotNull(sourceSplit);
-    assertThat(readFromSource(sourceSplit.primary), contains(10, 11, 12, 13, 14));
-    assertThat(readFromSource(sourceSplit.residual), contains(15, 16, 17, 18, 19));
+    assertThat(readFromSource(sourceSplit.primary, options), contains(10, 11, 12, 13, 14));
+    assertThat(readFromSource(sourceSplit.residual, options), contains(15, 16, 17, 18, 19));
 
     sourceSplit =
         (BasicSerializableSourceFormat.BoundedSourceSplit<Integer>)
             iterator.requestDynamicSplit(splitRequestAtFraction(0.8f));
     assertNotNull(sourceSplit);
-    assertThat(readFromSource(sourceSplit.primary), contains(10, 11, 12, 13));
-    assertThat(readFromSource(sourceSplit.residual), contains(14));
+    assertThat(readFromSource(sourceSplit.primary, options), contains(10, 11, 12, 13));
+    assertThat(readFromSource(sourceSplit.residual, options), contains(14));
 
     assertTrue(iterator.hasNext());
     assertEquals(valueInGlobalWindow(13), iterator.next());

From b406622698f3071cc277fc7e65ed696a4617ea17 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 11 Jun 2015 13:27:29 -0700
Subject: [PATCH 0634/1541] Support empty composite transforms; tolerate empty
 pipelines

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95770830
---
 .../google/cloud/dataflow/sdk/Pipeline.java   |  21 +++-
 .../sdk/runners/TransformTreeNode.java        |  29 ++++-
 .../testing/TestDataflowPipelineRunner.java   |   4 -
 .../sdk/transforms/AppliedPTransform.java     |  11 ++
 .../cloud/dataflow/sdk/PipelineTest.java      | 113 ++++++++++++++++++
 5 files changed, 168 insertions(+), 10 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index 416171a636ae0..5ed19816eb8a4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -195,9 +195,28 @@ public void setCoderRegistry(CoderRegistry coderRegistry) {
    * transforms and values in the Pipeline.
    */
   public interface PipelineVisitor {
+    /**
+     * Called for each composite transform after all topological predecessors have been visited
+     * but before any of the component transforms.
+     */
     public void enterCompositeTransform(TransformTreeNode node);
+
+    /**
+     * Called for each composite transform after all of its component transforms and their ouputs
+     * have been visited.
+     */
     public void leaveCompositeTransform(TransformTreeNode node);
+
+    /**
+     * Called for each primitive transform after all of its topological predecessors
+     * and inputs have been visited.
+     */
     public void visitTransform(TransformTreeNode node);
+
+    /**
+     * Called for each value after the transform that produced the value has been
+     * visited.
+     */
     public void visitValue(PValue value, TransformTreeNode producer);
   }
 
@@ -360,7 +379,7 @@ OutputT applyInternal(String name, InputT input,
    * of its outputs registered as produced by the transform.
    *
    * <p> A composite transform must have all of its outputs
-   * registered as produced by the contains primitive transforms.
+   * registered as produced by the contained primitive transforms.
    * They have each had the above check performed already, when
    * they were applied, so the only possible failure state is
    * that the composite transform has returned a primitive output.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
index 2df4af2ae7fce..b5a70042d728c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
@@ -39,13 +39,14 @@
 public class TransformTreeNode {
   private final TransformTreeNode enclosingNode;
 
-  // The transform.  If composite.isEmpty(), then this is a
-  // PrimitivePTransform, otherwise a composite PTransform.
+  // The PTransform for this node, which may be a composite PTransform.
+  // The root of a TransformHierarchy is represented as a TransformTreeNode
+  // with a null transform field.
   private final PTransform<?, ?> transform;
 
   private final String fullName;
 
-  // Nodes of a composite transform.
+  // Nodes for sub-transforms of a composite transform.
   private final Collection<TransformTreeNode> parts = new ArrayList<>();
 
   // Inputs to the transform, in expanded form and mapped to the producer
@@ -110,10 +111,28 @@ public void addComposite(TransformTreeNode node) {
   }
 
   /**
-   * Returns true if this node represents a composite transform.
+   * Returns true if this node represents a composite transform that does not perform
+   * processing of its own, but merely encapsulates a sub-pipeline (which may be empty).
+   *
+   * <p>Note that a node may be composite with no sub-transforms if it  returns its input directly
+   * extracts a component of a tuple, or other operations that occur at pipeline assembly time.
    */
   public boolean isCompositeNode() {
-    return !parts.isEmpty();
+    return !parts.isEmpty() || returnsOthersOutput() || isRootNode();
+  }
+
+  private boolean returnsOthersOutput() {
+    PTransform<?, ?> transform = getTransform();
+    for (PValue output : getExpandedOutputs()) {
+      if (!output.getProducingTransformInternal().getTransform().equals(transform)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  public boolean isRootNode() {
+    return transform == null;
   }
 
   public String getFullName() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
index 8519a12932ce8..ab4c2aad031a4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
@@ -31,7 +31,6 @@
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.cloud.dataflow.sdk.values.POutput;
 import com.google.common.base.Optional;
-import com.google.common.base.Preconditions;
 import com.google.common.base.Throwables;
 
 import org.slf4j.Logger;
@@ -78,9 +77,6 @@ public DataflowPipelineJob run(Pipeline pipeline) {
   }
 
   DataflowPipelineJob run(Pipeline pipeline, DataflowPipelineRunner runner) {
-    Preconditions.checkState(expectedNumberOfAssertions > 0,
-        "Expected non-zero number of DoFn's annotated with @IsDataflowAssert "
-        + "within the pipeline.");
 
     final JobMessagesHandler messageHandler =
         new MonitoringUtil.PrintHandler(options.getJobMessageOutput());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java
index a2274066cf885..b86d5d723bbf2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.cloud.dataflow.sdk.values.POutput;
+import com.google.common.base.MoreObjects;
 import com.google.common.base.Objects;
 
 /**
@@ -84,4 +85,14 @@ public boolean equals(Object other) {
       return false;
     }
   }
+
+  @Override
+  public String toString() {
+    return MoreObjects.toStringHelper(getClass())
+        .add("fullName", getFullName())
+        .add("input", getInput())
+        .add("output", getOutput())
+        .add("transform", getTransform())
+        .toString();
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
index 5ae802257ec3d..6e96fc98ffde4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
@@ -29,6 +29,7 @@
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
@@ -38,6 +39,10 @@
 import com.google.cloud.dataflow.sdk.util.UserCodeException;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
+import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
+import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.cloud.dataflow.sdk.values.POutput;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.collect.ImmutableList;
 
 import org.junit.Assert;
@@ -195,4 +200,112 @@ public void testStableUniqueNameError() {
     thrown.expectMessage("does not have a stable unique name.");
     p.apply(Create.of(5, 6, 7));
   }
+
+  /**
+   * Tests that Pipeline supports a pass-through identity function.
+   */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testIdentityTransform() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<Integer> output = pipeline
+        .apply(Create.<Integer>of(1, 2, 3, 4))
+        .apply("IdentityTransform", new IdentityTransform<PCollection<Integer>>());
+
+    DataflowAssert.that(output).containsInAnyOrder(1, 2, 3, 4);
+    pipeline.run();
+  }
+
+  private static class IdentityTransform<T extends PInput & POutput>
+      extends PTransform<T, T> {
+    private static final long serialVersionUID = 0L;
+
+    @Override
+    public T apply(T input) {
+      return input;
+    }
+  }
+
+  /**
+   * Tests that Pipeline supports pulling an element out of a tuple as a transform.
+   */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testTupleProjectionTransform() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<Integer> input = pipeline
+        .apply(Create.<Integer>of(1, 2, 3, 4));
+
+    TupleTag<Integer> tag = new TupleTag<Integer>();
+    PCollectionTuple tuple = PCollectionTuple.of(tag, input);
+
+    PCollection<Integer> output = tuple
+        .apply("ProjectTag", new TupleProjectionTransform<Integer>(tag));
+
+    DataflowAssert.that(output).containsInAnyOrder(1, 2, 3, 4);
+    pipeline.run();
+  }
+
+  private static class TupleProjectionTransform<T>
+      extends PTransform<PCollectionTuple, PCollection<T>> {
+    private static final long serialVersionUID = 0L;
+
+    private TupleTag<T> tag;
+
+    public TupleProjectionTransform(TupleTag<T> tag) {
+      this.tag = tag;
+    }
+
+    @Override
+    public PCollection<T> apply(PCollectionTuple input) {
+      return input.get(tag);
+    }
+  }
+
+  /**
+   * Tests that Pipeline supports putting an element into a tuple as a transform.
+   */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testTupleInjectionTransform() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<Integer> input = pipeline
+        .apply(Create.<Integer>of(1, 2, 3, 4));
+
+    TupleTag<Integer> tag = new TupleTag<Integer>();
+
+    PCollectionTuple output = input
+        .apply("ProjectTag", new TupleInjectionTransform<Integer>(tag));
+
+    DataflowAssert.that(output.get(tag)).containsInAnyOrder(1, 2, 3, 4);
+    pipeline.run();
+  }
+
+  private static class TupleInjectionTransform<T>
+      extends PTransform<PCollection<T>, PCollectionTuple> {
+    private static final long serialVersionUID = 0L;
+
+    private TupleTag<T> tag;
+
+    public TupleInjectionTransform(TupleTag<T> tag) {
+      this.tag = tag;
+    }
+
+    @Override
+    public PCollectionTuple apply(PCollection<T> input) {
+      return PCollectionTuple.of(tag, input);
+    }
+  }
+
+  /**
+   * Tests that an empty pipeline runs.
+   */
+  @Test
+  public void testEmptyPipeline() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+    pipeline.run();
+  }
 }

From accf9e1c049acd7ae31610caa7351e32f79ee334 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 11 Jun 2015 15:27:04 -0700
Subject: [PATCH 0635/1541] Improve error for failing to serialize
 PipelineOptions

The property name and value is now part of the exception returned
back to the user when failing to serialize PipelineOptions.
----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95783116
---
 .../sdk/options/ProxyInvocationHandler.java   | 33 ++++++++++++++-----
 .../options/ProxyInvocationHandlerTest.java   | 10 ++++--
 2 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
index 5652ad0975383..819df4b09f280 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
@@ -342,13 +342,22 @@ public void serialize(PipelineOptions value, JsonGenerator jgen, SerializerProvi
         throws IOException, JsonProcessingException {
       ProxyInvocationHandler handler = (ProxyInvocationHandler) Proxy.getInvocationHandler(value);
       synchronized (handler) {
-        Map<String, Object> options = Maps.<String, Object>newHashMap(handler.jsonOptions);
-        options.putAll(handler.options);
-        removeIgnoredOptions(handler.knownInterfaces, options);
-        ensureSerializable(handler.knownInterfaces, options);
+        // We first filter out any properties that have been modified since
+        // the last serialization of this PipelineOptions and then verify that
+        // they are all serializable.
+        Map<String, Object> filteredOptions = Maps.newHashMap(handler.options);
+        removeIgnoredOptions(handler.knownInterfaces, filteredOptions);
+        ensureSerializable(handler.knownInterfaces, filteredOptions);
+
+        // Now we create the map of serializable options by taking the original
+        // set of serialized options (if any) and updating them with any properties
+        // instances that have been modified since the previous serialization.
+        Map<String, Object> serializableOptions =
+            Maps.<String, Object>newHashMap(handler.jsonOptions);
+        serializableOptions.putAll(filteredOptions);
         jgen.writeStartObject();
         jgen.writeFieldName("options");
-        jgen.writeObject(options);
+        jgen.writeObject(serializableOptions);
         jgen.writeEndObject();
       }
     }
@@ -396,10 +405,16 @@ private void ensureSerializable(Set<Class<? extends PipelineOptions>> interfaces
 
       // Attempt to serialize and deserialize each property.
       for (Map.Entry<String, Object> entry : options.entrySet()) {
-        String serializedValue = MAPPER.writeValueAsString(entry.getValue());
-        JavaType type = MAPPER.getTypeFactory()
-            .constructType(propertyToReturnType.get(entry.getKey()));
-        MAPPER.readValue(serializedValue, type);
+        try {
+          String serializedValue = MAPPER.writeValueAsString(entry.getValue());
+          JavaType type = MAPPER.getTypeFactory()
+              .constructType(propertyToReturnType.get(entry.getKey()));
+          MAPPER.readValue(serializedValue, type);
+        } catch (Exception e) {
+          throw new IOException(String.format(
+              "Failed to serialize and deserialize property '%s' with value '%s'",
+              entry.getKey(), entry.getValue()), e);
+        }
       }
     }
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
index 798591083e747..7eb5822b436ea 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
@@ -460,8 +460,12 @@ public void testJsonConversionOfAJsonConvertedType() throws Exception {
     SimpleTypes options = PipelineOptionsFactory.as(SimpleTypes.class);
     options.setString("TestValue");
     options.setInteger(5);
+    // It is important here that our first serialization goes to our most basic
+    // type so that we handle the case when we don't know the types of certain
+    // properties because the intermediate instance of PipelineOptions never
+    // saw their interface.
     SimpleTypes options2 = serializeDeserialize(SimpleTypes.class,
-        serializeDeserialize(SimpleTypes.class, options));
+        serializeDeserialize(PipelineOptions.class, options));
     assertEquals(5, options2.getInteger());
     assertEquals("TestValue", options2.getString());
   }
@@ -614,11 +618,13 @@ private static interface NotSerializableProperty extends PipelineOptions {
     void setValue(NotSerializable value);
   }
 
-  @Test(expected = JsonMappingException.class)
+  @Test
   public void testJsonConversionOfNotSerializableProperty() throws Exception {
     NotSerializableProperty options = PipelineOptionsFactory.as(NotSerializableProperty.class);
     options.setValue(new NotSerializable("TestString"));
 
+    expectedException.expect(JsonMappingException.class);
+    expectedException.expectMessage("Failed to serialize and deserialize property 'value'");
     serializeDeserialize(NotSerializableProperty.class, options);
   }
 

From 476329164c878401d10100b9b1df309f5be95056 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 11 Jun 2015 17:04:41 -0700
Subject: [PATCH 0636/1541] Update generated Dataflow API from v1beta3 to v1b3

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95792957
---
 sdk/pom.xml                                   |  2 +-
 .../sdk/runners/DataflowPipelineJob.java      |  5 ++-
 .../sdk/runners/DataflowPipelineRunner.java   |  7 ++--
 .../runners/worker/DataflowWorkerHarness.java |  4 +--
 .../testing/TestDataflowPipelineRunner.java   |  2 +-
 .../dataflow/sdk/util/MonitoringUtil.java     |  4 +--
 .../options/GoogleApiDebugOptionsTest.java    | 28 ++++++++--------
 .../sdk/runners/DataflowPipelineJobTest.java  | 33 +++++++++----------
 .../runners/DataflowPipelineRunnerTest.java   | 16 ++++-----
 .../DataflowPipelineTranslatorTest.java       | 14 ++++----
 .../dataflow/sdk/util/MonitoringUtilTest.java | 12 +++----
 11 files changed, 59 insertions(+), 68 deletions(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index f9eccbcb79d80..2d2b6ddc8c201 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -332,7 +332,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-dataflow</artifactId>
-      <version>v1beta3-rev12-1.19.1</version>
+      <version>v1b3-rev3-1.19.1</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
index 32899422b3180..f7c20c045e751 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
@@ -226,7 +226,7 @@ public void cancel() throws IOException {
     content.setProjectId(project);
     content.setId(jobId);
     content.setRequestedState("JOB_STATE_CANCELLED");
-    dataflowClient.v1b3().projects().jobs()
+    dataflowClient.projects().jobs()
         .update(project, jobId, content)
         .execute();
   }
@@ -257,7 +257,6 @@ State getStateWithRetries(int attempts, Sleeper sleeper) {
     do {
       try {
         job = dataflowClient
-            .v1b3()
             .projects()
             .jobs()
             .get(project, jobId)
@@ -300,7 +299,7 @@ private <OutputT> Map<String, OutputT> fromMetricUpdates(Aggregator<?, OutputT>
       } else {
         boolean terminal = getState().isTerminal();
         JobMetrics jobMetrics =
-            dataflowClient.v1b3().projects().jobs().getMetrics(project, jobId).execute();
+            dataflowClient.projects().jobs().getMetrics(project, jobId).execute();
         metricUpdates = jobMetrics.getMetrics();
         if (terminal && jobMetrics.getMetrics() != null) {
           terminalMetricUpdates = metricUpdates;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 15d6703e7cc8a..dc574585b299b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -242,11 +242,10 @@ public DataflowPipelineJob run(Pipeline pipeline) {
     if (options.getReload()) {
       reloadJobId = getJobIdFromName(options.getJobName());
     }
-
     Job jobResult;
     try {
-      Dataflow.V1b3.Projects.Jobs.Create createRequest =
-          dataflowClient.v1b3().projects().jobs()
+      Dataflow.Projects.Jobs.Create createRequest =
+          dataflowClient.projects().jobs()
           .create(options.getProject(), newJob);
       if (reloadJobId != null) {
         createRequest.setReplaceJobId(reloadJobId);
@@ -346,7 +345,7 @@ private String getJobIdFromName(String jobName) {
       ListJobsResponse listResult;
       String token = null;
       do {
-        listResult = dataflowClient.v1b3().projects().jobs()
+        listResult = dataflowClient.projects().jobs()
             .list(options.getProject())
             .setPageToken(token)
             .execute();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index 637b3e3a19307..315515d63b04a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -241,7 +241,7 @@ public WorkItem getWorkItem() throws IOException {
 
       LOG.debug("Leasing work: {}", request);
 
-      LeaseWorkItemResponse response = dataflow.v1b3().projects().jobs().workItems().lease(
+      LeaseWorkItemResponse response = dataflow.projects().jobs().workItems().lease(
           options.getProject(), options.getJobId(), request).execute();
       LOG.debug("Lease work response: {}", response);
 
@@ -271,7 +271,7 @@ public WorkItemServiceState reportWorkItemStatus(WorkItemStatus workItemStatus)
       workItemStatus.setFactory(Transport.getJsonFactory());
       LOG.debug("Reporting work status: {}", workItemStatus);
       ReportWorkItemStatusResponse result =
-          dataflow.v1b3().projects().jobs().workItems().reportStatus(
+          dataflow.projects().jobs().workItems().reportStatus(
               options.getProject(), options.getJobId(),
               new ReportWorkItemStatusRequest()
               .setWorkerId(options.getWorkerId())
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
index ab4c2aad031a4..a499b022a4b4f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
@@ -147,7 +147,7 @@ public <OutputT extends POutput, InputT extends PInput> OutputT apply(
 
   Optional<Boolean> checkForSuccess(DataflowPipelineJob job)
       throws IOException {
-    JobMetrics metrics = job.getDataflowClient().v1b3().projects().jobs()
+    JobMetrics metrics = job.getDataflowClient().projects().jobs()
         .getMetrics(job.getProjectId(), job.getJobId()).execute();
 
     if (metrics == null || metrics.getMetrics() == null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
index 24e26425daf21..0bc8834c2010b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
@@ -18,7 +18,7 @@
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudTime;
 
 import com.google.api.services.dataflow.Dataflow;
-import com.google.api.services.dataflow.Dataflow.V1b3.Projects.Jobs.Messages;
+import com.google.api.services.dataflow.Dataflow.Projects.Jobs.Messages;
 import com.google.api.services.dataflow.model.JobMessage;
 import com.google.api.services.dataflow.model.ListJobMessagesResponse;
 import com.google.cloud.dataflow.sdk.PipelineResult.State;
@@ -121,7 +121,7 @@ public void process(List<JobMessage> messages) {
 
   /** Construct a helper for monitoring. */
   public MonitoringUtil(String projectId, Dataflow dataflow) {
-    this(projectId, dataflow.v1b3().projects().jobs().messages());
+    this(projectId, dataflow.projects().jobs().messages());
   }
 
   // @VisibleForTesting
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
index 89161d3910215..375306cd40493 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
@@ -21,8 +21,8 @@
 import static org.junit.Assert.assertNull;
 
 import com.google.api.services.bigquery.Bigquery.Datasets.Delete;
-import com.google.api.services.dataflow.Dataflow.V1b3.Projects.Jobs.Create;
-import com.google.api.services.dataflow.Dataflow.V1b3.Projects.Jobs.Get;
+import com.google.api.services.dataflow.Dataflow.Projects.Jobs.Create;
+import com.google.api.services.dataflow.Dataflow.Projects.Jobs.Get;
 import com.google.cloud.dataflow.sdk.options.GoogleApiDebugOptions.GoogleApiTracer;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
 import com.google.cloud.dataflow.sdk.util.Transport;
@@ -46,7 +46,7 @@ public void testWhenTracingMatches() throws Exception {
     assertNotNull(options.getGoogleApiTrace());
 
     Get request =
-        options.getDataflowClient().v1b3().projects().jobs().get("testProjectId", "testJobId");
+        options.getDataflowClient().projects().jobs().get("testProjectId", "testJobId");
     assertEquals("GetTraceDestination", request.get("$trace"));
   }
 
@@ -60,7 +60,7 @@ public void testWhenTracingDoesNotMatch() throws Exception {
     assertNotNull(options.getGoogleApiTrace());
 
     Get request =
-        options.getDataflowClient().v1b3().projects().jobs().get("testProjectId", "testJobId");
+        options.getDataflowClient().projects().jobs().get("testProjectId", "testJobId");
     assertNull(request.get("$trace"));
   }
 
@@ -76,17 +76,17 @@ public void testWithMultipleTraces() throws Exception {
     assertNotNull(options.getGoogleApiTrace());
 
     Get getRequest =
-        options.getDataflowClient().v1b3().projects().jobs().get("testProjectId", "testJobId");
+        options.getDataflowClient().projects().jobs().get("testProjectId", "testJobId");
     assertEquals("GetTraceDestination", getRequest.get("$trace"));
 
     Create createRequest =
-        options.getDataflowClient().v1b3().projects().jobs().create("testProjectId", null);
+        options.getDataflowClient().projects().jobs().create("testProjectId", null);
     assertEquals("CreateTraceDestination", createRequest.get("$trace"));
   }
 
   @Test
-  public void testMatchingAllDataflowV1b3Calls() throws Exception {
-    String[] args = new String[] {"--googleApiTrace=Dataflow.V1b3#TraceDestination"};
+  public void testMatchingAllDataflowCalls() throws Exception {
+    String[] args = new String[] {"--googleApiTrace=Dataflow#TraceDestination"};
     DataflowPipelineOptions options =
         PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
@@ -94,11 +94,11 @@ public void testMatchingAllDataflowV1b3Calls() throws Exception {
     assertNotNull(options.getGoogleApiTrace());
 
     Get getRequest =
-        options.getDataflowClient().v1b3().projects().jobs().get("testProjectId", "testJobId");
+        options.getDataflowClient().projects().jobs().get("testProjectId", "testJobId");
     assertEquals("TraceDestination", getRequest.get("$trace"));
 
     Create createRequest =
-        options.getDataflowClient().v1b3().projects().jobs().create("testProjectId", null);
+        options.getDataflowClient().projects().jobs().create("testProjectId", null);
     assertEquals("TraceDestination", createRequest.get("$trace"));
   }
 
@@ -110,7 +110,7 @@ public void testMatchingAgainstClient() throws Exception {
         GoogleApiTracer.create(Transport.newDataflowClient(options).build(), "TraceDestination")});
 
     Get getRequest =
-        options.getDataflowClient().v1b3().projects().jobs().get("testProjectId", "testJobId");
+        options.getDataflowClient().projects().jobs().get("testProjectId", "testJobId");
     assertEquals("TraceDestination", getRequest.get("$trace"));
 
     Delete deleteRequest = Transport.newBigQueryClient(options).build().datasets()
@@ -123,15 +123,15 @@ public void testMatchingAgainstRequestType() throws Exception {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
     options.setGoogleApiTrace(new GoogleApiTracer[] {GoogleApiTracer.create(
-        Transport.newDataflowClient(options).build().v1b3().projects().jobs()
+        Transport.newDataflowClient(options).build().projects().jobs()
             .get("aProjectId", "aJobId"), "TraceDestination")});
 
     Get getRequest =
-        options.getDataflowClient().v1b3().projects().jobs().get("testProjectId", "testJobId");
+        options.getDataflowClient().projects().jobs().get("testProjectId", "testJobId");
     assertEquals("TraceDestination", getRequest.get("$trace"));
 
     Create createRequest =
-        options.getDataflowClient().v1b3().projects().jobs().create("testProjectId", null);
+        options.getDataflowClient().projects().jobs().create("testProjectId", null);
     assertNull(createRequest.get("$trace"));
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java
index 88a6a1e78b8df..6080a2441096c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java
@@ -31,9 +31,9 @@
 import static org.mockito.Mockito.when;
 
 import com.google.api.services.dataflow.Dataflow;
-import com.google.api.services.dataflow.Dataflow.V1b3.Projects.Jobs.Get;
-import com.google.api.services.dataflow.Dataflow.V1b3.Projects.Jobs.GetMetrics;
-import com.google.api.services.dataflow.Dataflow.V1b3.Projects.Jobs.Messages;
+import com.google.api.services.dataflow.Dataflow.Projects.Jobs.Get;
+import com.google.api.services.dataflow.Dataflow.Projects.Jobs.GetMetrics;
+import com.google.api.services.dataflow.Dataflow.Projects.Jobs.Messages;
 import com.google.api.services.dataflow.model.Job;
 import com.google.api.services.dataflow.model.JobMetrics;
 import com.google.api.services.dataflow.model.MetricStructuredName;
@@ -79,11 +79,9 @@ public class DataflowPipelineJobTest {
   @Mock
   private Dataflow mockWorkflowClient;
   @Mock
-  private Dataflow.V1b3 mockV1b3;
+  private Dataflow.Projects mockProjects;
   @Mock
-  private Dataflow.V1b3.Projects mockProjects;
-  @Mock
-  private Dataflow.V1b3.Projects.Jobs mockJobs;
+  private Dataflow.Projects.Jobs mockJobs;
   @Rule
   public FastNanoClockAndSleeper fastClock = new FastNanoClockAndSleeper();
 
@@ -94,8 +92,7 @@ public class DataflowPipelineJobTest {
   public void setup() {
     MockitoAnnotations.initMocks(this);
 
-    when(mockWorkflowClient.v1b3()).thenReturn(mockV1b3);
-    when(mockV1b3.projects()).thenReturn(mockProjects);
+    when(mockWorkflowClient.projects()).thenReturn(mockProjects);
     when(mockProjects.jobs()).thenReturn(mockJobs);
   }
 
@@ -126,7 +123,7 @@ void checkValidInterval(long pollingIntervalMillis, int attempts, long timeSlept
 
   @Test
   public void testWaitToFinishMessagesFail() throws Exception {
-    Dataflow.V1b3.Projects.Jobs.Get statusRequest = mock(Dataflow.V1b3.Projects.Jobs.Get.class);
+    Dataflow.Projects.Jobs.Get statusRequest = mock(Dataflow.Projects.Jobs.Get.class);
 
     Job statusResponse = new Job();
     statusResponse.setCurrentState("JOB_STATE_" + State.DONE.name());
@@ -134,9 +131,9 @@ public void testWaitToFinishMessagesFail() throws Exception {
     when(statusRequest.execute()).thenReturn(statusResponse);
 
     MonitoringUtil.JobMessagesHandler jobHandler = mock(MonitoringUtil.JobMessagesHandler.class);
-    Dataflow.V1b3.Projects.Jobs.Messages mockMessages =
-        mock(Dataflow.V1b3.Projects.Jobs.Messages.class);
-    Messages.List listRequest = mock(Dataflow.V1b3.Projects.Jobs.Messages.List.class);
+    Dataflow.Projects.Jobs.Messages mockMessages =
+        mock(Dataflow.Projects.Jobs.Messages.class);
+    Messages.List listRequest = mock(Dataflow.Projects.Jobs.Messages.List.class);
     when(mockJobs.messages()).thenReturn(mockMessages);
     when(mockMessages.list(eq(PROJECT_ID), eq(JOB_ID))).thenReturn(listRequest);
     when(listRequest.execute()).thenThrow(SocketTimeoutException.class);
@@ -152,7 +149,7 @@ public void testWaitToFinishMessagesFail() throws Exception {
 
   @Test
   public void testWaitToFinish() throws Exception {
-    Dataflow.V1b3.Projects.Jobs.Get statusRequest = mock(Dataflow.V1b3.Projects.Jobs.Get.class);
+    Dataflow.Projects.Jobs.Get statusRequest = mock(Dataflow.Projects.Jobs.Get.class);
 
     Job statusResponse = new Job();
     statusResponse.setCurrentState("JOB_STATE_" + State.DONE.name());
@@ -171,7 +168,7 @@ public void testWaitToFinish() throws Exception {
 
   @Test
   public void testWaitToFinishFail() throws Exception {
-    Dataflow.V1b3.Projects.Jobs.Get statusRequest = mock(Dataflow.V1b3.Projects.Jobs.Get.class);
+    Dataflow.Projects.Jobs.Get statusRequest = mock(Dataflow.Projects.Jobs.Get.class);
 
     when(mockJobs.get(eq(PROJECT_ID), eq(JOB_ID))).thenReturn(statusRequest);
     when(statusRequest.execute()).thenThrow(IOException.class);
@@ -191,7 +188,7 @@ public void testWaitToFinishFail() throws Exception {
 
   @Test
   public void testWaitToFinishTimeFail() throws Exception {
-    Dataflow.V1b3.Projects.Jobs.Get statusRequest = mock(Dataflow.V1b3.Projects.Jobs.Get.class);
+    Dataflow.Projects.Jobs.Get statusRequest = mock(Dataflow.Projects.Jobs.Get.class);
 
     when(mockJobs.get(eq(PROJECT_ID), eq(JOB_ID))).thenReturn(statusRequest);
     when(statusRequest.execute()).thenThrow(IOException.class);
@@ -210,7 +207,7 @@ public void testWaitToFinishTimeFail() throws Exception {
 
   @Test
   public void testGetStateReturnsServiceState() throws Exception {
-    Dataflow.V1b3.Projects.Jobs.Get statusRequest = mock(Dataflow.V1b3.Projects.Jobs.Get.class);
+    Dataflow.Projects.Jobs.Get statusRequest = mock(Dataflow.Projects.Jobs.Get.class);
 
     Job statusResponse = new Job();
     statusResponse.setCurrentState("JOB_STATE_" + State.RUNNING.name());
@@ -231,7 +228,7 @@ public void testGetStateReturnsServiceState() throws Exception {
 
   @Test
   public void testGetStateWithExceptionReturnsUnknown() throws Exception {
-    Dataflow.V1b3.Projects.Jobs.Get statusRequest = mock(Dataflow.V1b3.Projects.Jobs.Get.class);
+    Dataflow.Projects.Jobs.Get statusRequest = mock(Dataflow.Projects.Jobs.Get.class);
 
     when(mockJobs.get(eq(PROJECT_ID), eq(JOB_ID))).thenReturn(statusRequest);
     when(statusRequest.execute()).thenThrow(IOException.class);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 3dc80600e1c75..44c7872ac5bae 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -108,15 +108,13 @@ private DataflowPipeline buildDataflowPipeline(DataflowPipelineOptions options)
   private static Dataflow buildMockDataflow(
       final ArgumentCaptor<Job> jobCaptor) throws IOException {
     Dataflow mockDataflowClient = mock(Dataflow.class);
-    Dataflow.V1b3 mockV1b3 = mock(Dataflow.V1b3.class);
-    Dataflow.V1b3.Projects mockProjects = mock(Dataflow.V1b3.Projects.class);
-    Dataflow.V1b3.Projects.Jobs mockJobs = mock(Dataflow.V1b3.Projects.Jobs.class);
-    Dataflow.V1b3.Projects.Jobs.Create mockRequest =
-        mock(Dataflow.V1b3.Projects.Jobs.Create.class);
-    Dataflow.V1b3.Projects.Jobs.List mockList = mock(Dataflow.V1b3.Projects.Jobs.List.class);
-
-    when(mockDataflowClient.v1b3()).thenReturn(mockV1b3);
-    when(mockV1b3.projects()).thenReturn(mockProjects);
+    Dataflow.Projects mockProjects = mock(Dataflow.Projects.class);
+    Dataflow.Projects.Jobs mockJobs = mock(Dataflow.Projects.Jobs.class);
+    Dataflow.Projects.Jobs.Create mockRequest =
+        mock(Dataflow.Projects.Jobs.Create.class);
+    Dataflow.Projects.Jobs.List mockList = mock(Dataflow.Projects.Jobs.List.class);
+
+    when(mockDataflowClient.projects()).thenReturn(mockProjects);
     when(mockProjects.jobs()).thenReturn(mockJobs);
     when(mockJobs.create(eq("someProject"), jobCaptor.capture()))
         .thenReturn(mockRequest);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index e593d71e68d64..74dffe568aab8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -114,14 +114,12 @@ private DataflowPipeline buildPipeline(DataflowPipelineOptions options)
   private static Dataflow buildMockDataflow(
       ArgumentMatcher<Job> jobMatcher) throws IOException {
     Dataflow mockDataflowClient = mock(Dataflow.class);
-    Dataflow.V1b3 mockV1b3 = mock(Dataflow.V1b3.class);
-    Dataflow.V1b3.Projects mockProjects = mock(Dataflow.V1b3.Projects.class);
-    Dataflow.V1b3.Projects.Jobs mockJobs = mock(Dataflow.V1b3.Projects.Jobs.class);
-    Dataflow.V1b3.Projects.Jobs.Create mockRequest = mock(
-        Dataflow.V1b3.Projects.Jobs.Create.class);
-
-    when(mockDataflowClient.v1b3()).thenReturn(mockV1b3);
-    when(mockV1b3.projects()).thenReturn(mockProjects);
+    Dataflow.Projects mockProjects = mock(Dataflow.Projects.class);
+    Dataflow.Projects.Jobs mockJobs = mock(Dataflow.Projects.Jobs.class);
+    Dataflow.Projects.Jobs.Create mockRequest = mock(
+        Dataflow.Projects.Jobs.Create.class);
+
+    when(mockDataflowClient.projects()).thenReturn(mockProjects);
     when(mockProjects.jobs()).thenReturn(mockJobs);
     when(mockJobs.create(eq("someProject"), argThat(jobMatcher)))
         .thenReturn(mockRequest);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java
index 980e93787fdcc..d77b82bca7001 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java
@@ -44,14 +44,14 @@ public class MonitoringUtilTest {
 
   @Test
   public void testGetJobMessages() throws IOException {
-    Dataflow.V1b3.Projects.Jobs.Messages mockMessages =
-        mock(Dataflow.V1b3.Projects.Jobs.Messages.class);
+    Dataflow.Projects.Jobs.Messages mockMessages =
+        mock(Dataflow.Projects.Jobs.Messages.class);
 
     // Two requests are needed to get all the messages.
-    Dataflow.V1b3.Projects.Jobs.Messages.List firstRequest =
-        mock(Dataflow.V1b3.Projects.Jobs.Messages.List.class);
-    Dataflow.V1b3.Projects.Jobs.Messages.List secondRequest =
-        mock(Dataflow.V1b3.Projects.Jobs.Messages.List.class);
+    Dataflow.Projects.Jobs.Messages.List firstRequest =
+        mock(Dataflow.Projects.Jobs.Messages.List.class);
+    Dataflow.Projects.Jobs.Messages.List secondRequest =
+        mock(Dataflow.Projects.Jobs.Messages.List.class);
 
     when(mockMessages.list(PROJECT_ID, JOB_ID))
         .thenReturn(firstRequest)

From 149b16630737b64f47f983baf466b370ff4a7730 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Fri, 12 Jun 2015 09:18:44 -0700
Subject: [PATCH 0637/1541] Revert "Improve error for failing to serialize
 PipelineOptions"

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95845823
---
 .../sdk/options/ProxyInvocationHandler.java   | 33 +++++--------------
 .../options/ProxyInvocationHandlerTest.java   | 10 ++----
 2 files changed, 11 insertions(+), 32 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
index 819df4b09f280..5652ad0975383 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
@@ -342,22 +342,13 @@ public void serialize(PipelineOptions value, JsonGenerator jgen, SerializerProvi
         throws IOException, JsonProcessingException {
       ProxyInvocationHandler handler = (ProxyInvocationHandler) Proxy.getInvocationHandler(value);
       synchronized (handler) {
-        // We first filter out any properties that have been modified since
-        // the last serialization of this PipelineOptions and then verify that
-        // they are all serializable.
-        Map<String, Object> filteredOptions = Maps.newHashMap(handler.options);
-        removeIgnoredOptions(handler.knownInterfaces, filteredOptions);
-        ensureSerializable(handler.knownInterfaces, filteredOptions);
-
-        // Now we create the map of serializable options by taking the original
-        // set of serialized options (if any) and updating them with any properties
-        // instances that have been modified since the previous serialization.
-        Map<String, Object> serializableOptions =
-            Maps.<String, Object>newHashMap(handler.jsonOptions);
-        serializableOptions.putAll(filteredOptions);
+        Map<String, Object> options = Maps.<String, Object>newHashMap(handler.jsonOptions);
+        options.putAll(handler.options);
+        removeIgnoredOptions(handler.knownInterfaces, options);
+        ensureSerializable(handler.knownInterfaces, options);
         jgen.writeStartObject();
         jgen.writeFieldName("options");
-        jgen.writeObject(serializableOptions);
+        jgen.writeObject(options);
         jgen.writeEndObject();
       }
     }
@@ -405,16 +396,10 @@ private void ensureSerializable(Set<Class<? extends PipelineOptions>> interfaces
 
       // Attempt to serialize and deserialize each property.
       for (Map.Entry<String, Object> entry : options.entrySet()) {
-        try {
-          String serializedValue = MAPPER.writeValueAsString(entry.getValue());
-          JavaType type = MAPPER.getTypeFactory()
-              .constructType(propertyToReturnType.get(entry.getKey()));
-          MAPPER.readValue(serializedValue, type);
-        } catch (Exception e) {
-          throw new IOException(String.format(
-              "Failed to serialize and deserialize property '%s' with value '%s'",
-              entry.getKey(), entry.getValue()), e);
-        }
+        String serializedValue = MAPPER.writeValueAsString(entry.getValue());
+        JavaType type = MAPPER.getTypeFactory()
+            .constructType(propertyToReturnType.get(entry.getKey()));
+        MAPPER.readValue(serializedValue, type);
       }
     }
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
index 7eb5822b436ea..798591083e747 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
@@ -460,12 +460,8 @@ public void testJsonConversionOfAJsonConvertedType() throws Exception {
     SimpleTypes options = PipelineOptionsFactory.as(SimpleTypes.class);
     options.setString("TestValue");
     options.setInteger(5);
-    // It is important here that our first serialization goes to our most basic
-    // type so that we handle the case when we don't know the types of certain
-    // properties because the intermediate instance of PipelineOptions never
-    // saw their interface.
     SimpleTypes options2 = serializeDeserialize(SimpleTypes.class,
-        serializeDeserialize(PipelineOptions.class, options));
+        serializeDeserialize(SimpleTypes.class, options));
     assertEquals(5, options2.getInteger());
     assertEquals("TestValue", options2.getString());
   }
@@ -618,13 +614,11 @@ private static interface NotSerializableProperty extends PipelineOptions {
     void setValue(NotSerializable value);
   }
 
-  @Test
+  @Test(expected = JsonMappingException.class)
   public void testJsonConversionOfNotSerializableProperty() throws Exception {
     NotSerializableProperty options = PipelineOptionsFactory.as(NotSerializableProperty.class);
     options.setValue(new NotSerializable("TestString"));
 
-    expectedException.expect(JsonMappingException.class);
-    expectedException.expectMessage("Failed to serialize and deserialize property 'value'");
     serializeDeserialize(NotSerializableProperty.class, options);
   }
 

From 755e0e62a3c7755a6d0eefd232fb1cbdadbab45d Mon Sep 17 00:00:00 2001
From: ajamato <ajamato@google.com>
Date: Fri, 12 Jun 2015 12:49:54 -0700
Subject: [PATCH 0638/1541] Cancel command is printed with the endpoint.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95864369
---
 .../sdk/options/DataflowPipelineDebugOptions.java    |  8 +++++++-
 .../dataflow/sdk/runners/DataflowPipelineRunner.java | 12 ++++++++++--
 .../cloud/dataflow/sdk/util/MonitoringUtil.java      |  6 ++++++
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
index e570ada1b4344..c0170d274e54a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
@@ -37,6 +37,12 @@
     + "debugging and testing purposes.")
 @Hidden
 public interface DataflowPipelineDebugOptions extends PipelineOptions {
+
+  /**
+   * The default endpoint  to use with the Dataflow API.
+   */
+  static final String DEFAULT_API_ROOT = "https://dataflow.googleapis.com/";
+
   /**
    * Dataflow endpoint to use.
    *
@@ -75,7 +81,7 @@ public interface DataflowPipelineDebugOptions extends PipelineOptions {
   @Description("The endpoint to use with the Dataflow API. dataflowEndpoint can override this "
       + "value if it contains an absolute URL, otherwise apiRootUrl will be combined with "
       + "dataflowEndpoint to generate the full URL to communicate with the Dataflow API.")
-  @Default.String("https://dataflow.googleapis.com/")
+  @Default.String(DEFAULT_API_ROOT)
   String getApiRootUrl();
   void setApiRootUrl(String value);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index dc574585b299b..f14167fcae623 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult.State;
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
@@ -199,8 +200,8 @@ public DataflowPipelineJob run(Pipeline pipeline) {
     newJob.getEnvironment().setUserAgent(DataflowReleaseInfo.getReleaseInfo());
     // The Dataflow Service may write to the temporary directory directly, so
     // must be verified.
+    DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
     if (!Strings.isNullOrEmpty(options.getTempLocation())) {
-      DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
       newJob.getEnvironment().setTempStoragePrefix(
           dataflowOptions.getPathValidator().verifyPath(options.getTempLocation()));
     }
@@ -263,7 +264,14 @@ public DataflowPipelineJob run(Pipeline pipeline) {
         MonitoringUtil.getJobMonitoringPageURL(options.getProject(), jobResult.getId()));
     System.out.println("Submitted job: " + jobResult.getId());
 
-    LOG.info("To cancel the job using the 'gcloud' tool, run:\n> {}",
+    boolean usingCustomApiRootUrl =
+        !DataflowPipelineDebugOptions.DEFAULT_API_ROOT.equals(dataflowOptions.getApiRootUrl());
+    final String setApiEndpointCommand =
+        (usingCustomApiRootUrl
+         ? MonitoringUtil.getEndpointOverridePrefixCommand(dataflowOptions.getApiRootUrl())
+         : "");
+    LOG.info("To cancel the job using the 'gcloud' tool, run:\n> {}{}",
+        setApiEndpointCommand,
         MonitoringUtil.getGcloudCancelCommand(options.getProject(), jobResult.getId()));
 
     // Obtain all of the extractors from the PTransforms used in the pipeline so the
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
index 0bc8834c2010b..caccc1c60057c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
@@ -45,6 +45,8 @@
 public final class MonitoringUtil {
 
   private static final String GCLOUD_DATAFLOW_PREFIX = "gcloud alpha dataflow";
+  private static final String ENDPOINT_OVERRIDE_ENV_VAR =
+      "CLOUDSDK_API_ENDPOINT_OVERRIDES_DATAFLOW";
 
   private static final Map<String, State> DATAFLOW_STATE_TO_JOB_STATE =
       ImmutableMap
@@ -208,6 +210,10 @@ public static String getJobMonitoringPageURL(String projectName, String jobId) {
     }
   }
 
+  public static String getEndpointOverridePrefixCommand(String endpoint) {
+    return String.format("%s=%s ", ENDPOINT_OVERRIDE_ENV_VAR, endpoint);
+  }
+
   public static String getGcloudCancelCommand(String projectName, String jobId) {
     return String.format("%s jobs --project=%s cancel %s",
         GCLOUD_DATAFLOW_PREFIX, projectName, jobId);

From f45c245702449033465a0ee1ae8c0c9ed88b89ae Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Sat, 13 Jun 2015 22:47:45 -0700
Subject: [PATCH 0639/1541] Fix javadoc issue with @

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=95938797
---
 .../com/google/cloud/dataflow/sdk/transforms/View.java     | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index b4bfa89fb95be..ea2fe3dfbde4c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -123,20 +123,21 @@
  * PCollection<Page> pages = ... // pages fit into memory
  * PCollection<UrlVisit> urlVisits = ... // very large collection
  * final PCollectionView<Map<URL, Page>> = urlToPage
- *     .apply(WithKeys.of( ... ) ) // extract the URL from the page
+ *     .apply(WithKeys.of( ... )) // extract the URL from the page
  *     .apply(View.asMap());
  *
  * PCollection PageVisits = urlVisits
  *     .apply(ParDo.withSideInputs(urlToPage)
  *         .of(new DoFn<UrlVisit, PageVisit>() {
- *             @Override
+ *             {@literal @}Override
  *             void processElement(ProcessContext context) {
  *               UrlVisit urlVisit = context.element();
  *               Page page = urlToPage.get(urlVisit.getUrl());
  *               c.output(new PageVisit(page, urlVisit.getVisitData()));
  *             }
  *         }));
- * }</pre>
+ * }
+ * </pre>
  *
  * <p> See {@link ParDo#withSideInputs} for details on how to access
  * this variable inside a {@link ParDo} over another {@link PCollection}.

From 9af622520ba85e35c6e48169e554837d74baf81c Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 15 Jun 2015 11:09:49 -0700
Subject: [PATCH 0640/1541] Internal test changes

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96024504
---
 examples/pom.xml                              |   6 +
 .../dataflow/sdk/testing/TestPipeline.java    |   5 +-
 .../dataflow/sdk/io/DatastoreIOTest.java      |   3 -
 .../sdk/testing/DataflowJUnitTestRunner.java  | 129 ++++++++++++++++++
 .../sdk/testing/TestPipelineTest.java         |  32 ++---
 .../dataflow/sdk/transforms/CombineTest.java  |   3 +-
 .../dataflow/sdk/transforms/CreateTest.java   |   5 +-
 7 files changed, 157 insertions(+), 26 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowJUnitTestRunner.java

diff --git a/examples/pom.xml b/examples/pom.xml
index b4bb8f8c0ee0b..c29f2fb6e26e3 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -210,6 +210,12 @@
           </execution>
         </executions>
       </plugin>
+
+      <!-- Coverage analysis for unit tests. -->
+      <plugin>
+        <groupId>org.jacoco</groupId>
+        <artifactId>jacoco-maven-plugin</artifactId>
+      </plugin>
     </plugins>
   </build>
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
index b1a6463daa945..b674136bbacc5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.options.ApplicationNameOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.common.base.Optional;
@@ -121,8 +122,8 @@ public String toString() {
    */
   static TestDataflowPipelineOptions getPipelineOptions() {
     try {
-      TestDataflowPipelineOptions options = MAPPER.readValue(
-          System.getProperty(PROPERTY_DATAFLOW_OPTIONS), PipelineOptions.class)
+      TestDataflowPipelineOptions options = PipelineOptionsFactory.fromArgs(
+              MAPPER.readValue(System.getProperty(PROPERTY_DATAFLOW_OPTIONS), String[].class))
           .as(TestDataflowPipelineOptions.class);
       options.setAppName(getAppName());
       return options;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
index f8e6541b186f7..eead2ea601aa8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
@@ -41,7 +41,6 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
-import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.Write;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
@@ -49,7 +48,6 @@
 
 import org.junit.Before;
 import org.junit.Test;
-import org.junit.experimental.categories.Category;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.mockito.Mock;
@@ -104,7 +102,6 @@ public void setUp() {
    * Test for reading one entity from kind "food".
    */
   @Test
-  @Category(RunnableOnService.class)
   public void testBuildRead() throws Exception {
     DatastoreIO.Source readQuery =
         DatastoreIO.read().withHost(this.host).withDataset(this.datasetId).withQuery(this.query);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowJUnitTestRunner.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowJUnitTestRunner.java
new file mode 100644
index 0000000000000..f06c15a6c1129
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowJUnitTestRunner.java
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.common.base.Predicate;
+import com.google.common.collect.Iterables;
+import com.google.common.reflect.ClassPath;
+import com.google.common.reflect.ClassPath.ClassInfo;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import org.junit.runner.JUnitCore;
+import org.junit.runner.Request;
+import org.junit.runner.Result;
+import org.junit.runner.notification.Failure;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+import javax.annotation.Nullable;
+
+/**
+ * A test runner which can invoke a series of method or class test targets configuring
+ * the {@link TestPipeline} to run against the Dataflow service based upon command-line
+ * parameters specified.
+ * <p>
+ * Supported target definitions as command line parameters are:
+ * <ul>
+ *   <li>Class: "ClassName"
+ *   <li>Method: "ClassName#methodName"
+ * </ul>
+ * Multiple parameters can be specified in sequence, which will cause the test
+ * runner to invoke the tests in the specified order.
+ * <p>
+ * All tests will be executed after which, if any test had failed, the runner
+ * will exit with a non-zero status code.
+ */
+public class DataflowJUnitTestRunner {
+  private static final Logger LOG = LoggerFactory.getLogger(DataflowJUnitTestRunner.class);
+
+  /**
+   * Options which control a Dataflow JUnit test runner to invoke
+   * a series of method and/or class targets.
+   */
+  @Description("Options which control a Dataflow JUnit test runner to invoke "
+      + "a series of method and/or class targets.")
+  public interface Options extends PipelineOptions {
+    @Description("A list of supported test targets. Supported targets are 'ClassName' "
+        + "or 'ClassName#MethodName'")
+    @Validation.Required
+    String[] getTest();
+    void setTest(String[] values);
+  }
+
+  public static void main(String ... args) throws Exception {
+    PipelineOptionsFactory.register(Options.class);
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    Set<ClassPath.ClassInfo> classes =
+        ClassPath.from(ClassLoader.getSystemClassLoader()).getAllClasses();
+
+    // Build a list of requested test targets
+    List<Request> requests = new ArrayList<>();
+    for (String testTarget : options.getTest()) {
+      if (testTarget.contains("#")) {
+        String[] parts = testTarget.split("#", 2);
+        Class<?> klass = findClass(parts[0], classes);
+        requests.add(Request.method(klass, parts[1]));
+      } else {
+        requests.add(Request.aClass(findClass(testTarget, classes)));
+      }
+    }
+
+    // Set system properties required by TestPipeline so that it is able to execute tests
+    // on the service.
+    String dataflowPipelineOptions = new ObjectMapper().writeValueAsString(args);
+    System.setProperty("runIntegrationTestOnService", "true");
+    System.setProperty("dataflowOptions", dataflowPipelineOptions);
+
+    // Run the set of tests
+    boolean success = true;
+    JUnitCore core = new JUnitCore();
+    for (Request request : requests) {
+      Result result = core.run(request);
+      if (!result.wasSuccessful()) {
+        for (Failure failure : result.getFailures()) {
+          LOG.error(failure.getTestHeader(), failure.getException());
+        }
+        success = false;
+      }
+    }
+    if (!success) {
+      throw new IllegalStateException("Tests failed, check output logs for details.");
+    }
+  }
+
+  private static final Class<?> findClass(
+      final String simpleName, Set<ClassPath.ClassInfo> classes)
+      throws ClassNotFoundException {
+    Iterable<ClassPath.ClassInfo> matches =
+        Iterables.filter(classes, new Predicate<ClassPath.ClassInfo>() {
+      @Override
+      public boolean apply(@Nullable ClassInfo input) {
+        return simpleName.equals(input.getSimpleName());
+      }
+    });
+    return Class.forName(Iterables.getOnlyElement(matches).getName());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
index ed27f3702fa18..d74ba6a27b4fb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
@@ -22,7 +22,6 @@
 import static org.junit.Assert.assertThat;
 
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.common.collect.ImmutableMap;
 
 import com.fasterxml.jackson.databind.ObjectMapper;
 
@@ -45,20 +44,18 @@ public void testCreationUsingDefaults() {
   @Test
   public void testCreationOfPipelineOptions() throws Exception {
     ObjectMapper mapper = new ObjectMapper();
-    String stringOptions = mapper.writeValueAsString(
-        ImmutableMap.of("options",
-          ImmutableMap.<String, String>builder()
-          .put("runner", DataflowPipelineRunner.class.getName())
-          .put("project", "testProject")
-          .put("apiRootUrl", "testApiRootUrl")
-          .put("dataflowEndpoint", "testDataflowEndpoint")
-          .put("tempLocation", "testTempLocation")
-          .put("serviceAccountName", "testServiceAccountName")
-          .put("serviceAccountKeyfile", "testServiceAccountKeyfile")
-          .put("zone", "testZone")
-          .put("numWorkers", "1")
-          .put("diskSizeGb", "2")
-          .build()));
+    String stringOptions = mapper.writeValueAsString(new String[]{
+      "--runner=DataflowPipelineRunner",
+      "--project=testProject",
+      "--apiRootUrl=testApiRootUrl",
+      "--dataflowEndpoint=testDataflowEndpoint",
+      "--tempLocation=testTempLocation",
+      "--serviceAccountName=testServiceAccountName",
+      "--serviceAccountKeyfile=testServiceAccountKeyfile",
+      "--zone=testZone",
+      "--numWorkers=1",
+      "--diskSizeGb=2"
+    });
     System.getProperties().put("dataflowOptions", stringOptions);
     TestDataflowPipelineOptions options = TestPipeline.getPipelineOptions();
     assertEquals(DataflowPipelineRunner.class, options.getRunner());
@@ -76,10 +73,7 @@ public void testCreationOfPipelineOptions() throws Exception {
   @Test
   public void testCreationOfPipelineOptionsFromReallyVerboselyNamedTestCase() throws Exception {
         ObjectMapper mapper = new ObjectMapper();
-    String stringOptions = mapper.writeValueAsString(
-        ImmutableMap.of("options",
-          ImmutableMap.<String, String>builder()
-          .build()));
+    String stringOptions = mapper.writeValueAsString(new String[]{});
     System.getProperties().put("dataflowOptions", stringOptions);
     TestDataflowPipelineOptions options = TestPipeline.getPipelineOptions();
     assertThat(options.getAppName(), startsWith(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 6d675de2c4f6f..4cd2a297f336c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -418,9 +418,10 @@ public void testCombineGloballyAsSingletonView() {
                   public void processElement(ProcessContext c) {
                     c.output(c.sideInput(view));
                   }
-                }));
+                }).withSideInputs(view));
 
     DataflowAssert.thatSingleton(output).isEqualTo(0);
+    p.run();
   }
 
   ////////////////////////////////////////////////////////////////////////////
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
index a274a31772471..52108de121606 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
@@ -23,6 +23,7 @@
 import static org.junit.Assert.assertEquals;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
@@ -117,9 +118,11 @@ public void testCreateWithNullsAndValues() throws Exception {
     Pipeline p = TestPipeline.create();
 
     PCollection<String> output =
-        p.apply(Create.of(null, "test1", null, "test2", null));
+        p.apply(Create.of(null, "test1", null, "test2", null)
+            .withCoder(SerializableCoder.of(String.class)));
     DataflowAssert.that(output)
         .containsInAnyOrder(null, "test1", null, "test2", null);
+    p.run();
   }
 
   @Test

From 24b24e5f7a41e87d7ade45d7b0b32b09e3e38bbf Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 15 Jun 2015 14:08:16 -0700
Subject: [PATCH 0641/1541] Adds PipelineOption to specify a GCE network for
 GCE VMs

Fixes #26

----Release Notes----
- Add ability to select the network for worker VMs using
  DataflowPipelineWorkerPoolOptions.setNetwork()

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96041768
---
 sdk/pom.xml                                   |  2 +-
 .../DataflowPipelineWorkerPoolOptions.java    | 10 ++++++
 .../runners/DataflowPipelineTranslator.java   |  3 ++
 .../DataflowPipelineTranslatorTest.java       | 36 ++++++++++++++++++-
 4 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 2d2b6ddc8c201..dd9af5f3e9194 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -332,7 +332,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-dataflow</artifactId>
-      <version>v1b3-rev3-1.19.1</version>
+      <version>v1b3-rev4-1.19.1</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index 4129dc3cafb05..de6608391f4d7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -92,6 +92,16 @@ public String getAlgorithm() {
   int getDiskSizeGb();
   void setDiskSizeGb(int value);
 
+  /**
+   * GCE <a href="https://cloud.google.com/compute/docs/networking">network</a> for launching
+   * workers.
+   *
+   * <p> Default is up to the Dataflow service.
+   */
+  @Description("GCE network for launching workers. Default is up to the Dataflow service.")
+  String getNetwork();
+  void setNetwork(String value);
+
   /**
    * GCE <a href="https://developers.google.com/compute/docs/zones"
    * >availability zone</a> for launching workers.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index ad94d2ee1e2e0..21c8aa99aa029 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -456,6 +456,9 @@ public Job translate(List<DataflowPackage> packages) {
       if (!Strings.isNullOrEmpty(options.getZone())) {
         workerPool.setZone(options.getZone());
       }
+      if (!Strings.isNullOrEmpty(options.getNetwork())) {
+        workerPool.setNetwork(options.getNetwork());
+      }
       if (options.getDiskSizeGb() > 0) {
         workerPool.setDiskSizeGb(options.getDiskSizeGb());
       }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 74dffe568aab8..945841e05e811 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -20,6 +20,7 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 import static org.mockito.Matchers.argThat;
 import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.mock;
@@ -168,13 +169,46 @@ public void testSettingOfSdkPipelineOptions() throws IOException {
         job.getEnvironment().getSdkPipelineOptions());
   }
 
+  @Test
+  public void testNetworkConfig() throws IOException {
+    final String testNetwork = "test-network";
+
+    DataflowPipelineOptions options = buildPipelineOptions();
+    options.setNetwork(testNetwork);
+
+    Pipeline p = buildPipeline(options);
+    p.traverseTopologically(new RecordingPipelineVisitor());
+    Job job =
+        DataflowPipelineTranslator.fromOptions(options)
+            .translate(p, Collections.<DataflowPackage>emptyList())
+            .getJob();
+
+    assertEquals(1, job.getEnvironment().getWorkerPools().size());
+    assertEquals(testNetwork,
+        job.getEnvironment().getWorkerPools().get(0).getNetwork());
+  }
+
+  @Test
+  public void testNetworkConfigMissing() throws IOException {
+    DataflowPipelineOptions options = buildPipelineOptions();
+
+    Pipeline p = buildPipeline(options);
+    p.traverseTopologically(new RecordingPipelineVisitor());
+    Job job =
+        DataflowPipelineTranslator.fromOptions(options)
+            .translate(p, Collections.<DataflowPackage>emptyList())
+            .getJob();
+
+    assertEquals(1, job.getEnvironment().getWorkerPools().size());
+    assertNull(job.getEnvironment().getWorkerPools().get(0).getNetwork());
+  }
+
   @Test
   public void testZoneConfig() throws IOException {
     final String testZone = "test-zone-1";
 
     DataflowPipelineOptions options = buildPipelineOptions();
     options.setZone(testZone);
-    options.setRunner(DataflowPipelineRunner.class);
 
     Pipeline p = buildPipeline(options);
     p.traverseTopologically(new RecordingPipelineVisitor());

From 1147e4c9b11d3333915ef5066067e9d979fa0909 Mon Sep 17 00:00:00 2001
From: zhuoyao <zhuoyao@google.com>
Date: Mon, 15 Jun 2015 14:50:17 -0700
Subject: [PATCH 0642/1541] Add logic in SDK to support unique name check
 during job creation.

* Generate a unique client_reuqest_id in CreateJob request.
* Check the client_request_id in returned Job, if it is not the same as the one generated
  in CreateJob request, return Error::Already_Exists to user.
* If service return Error::Already_Exists, inform use with instructions.

----Release Notes----
Support unique name check during job creation with SDK retry protection.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96046078
---
 .../sdk/runners/DataflowPipelineRunner.java   | 31 +++++++++++++++++--
 .../runners/DataflowPipelineRunnerTest.java   | 22 +++++++++++++
 2 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index f14167fcae623..d9cf27a984e6e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -48,6 +48,9 @@
 import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
 
+import org.joda.time.DateTimeUtils;
+import org.joda.time.DateTimeZone;
+import org.joda.time.format.DateTimeFormat;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -63,6 +66,7 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Random;
 
 /**
  * A {@link PipelineRunner} that executes the operations in the
@@ -194,6 +198,18 @@ public DataflowPipelineJob run(Pipeline pipeline) {
     JobSpecification jobSpecification = translator.translate(pipeline, packages);
     Job newJob = jobSpecification.getJob();
 
+    // Set a unique client_request_id in the CreateJob request.
+    // This is used to ensure idempotence of job creation across retried
+    // attempts to create a job. Specifically, if the service returns a job with
+    // a different client_request_id, it means the returned one is a different
+    // job previously created with the same job name, and that the job creation
+    // has been effectively rejected. The SDK should return
+    // Error::Already_Exists to user in that case.
+    int randomNum = new Random().nextInt(9000) + 1000;
+    String requestId = DateTimeFormat.forPattern("YYYYMMddHHmmssmmm").withZone(DateTimeZone.UTC)
+        .print(DateTimeUtils.currentTimeMillis()) + "_" + randomNum;
+    newJob.setClientRequestId(requestId);
+
     String version = DataflowReleaseInfo.getReleaseInfo().getVersion();
     System.out.println("Dataflow SDK version: " + version);
 
@@ -253,12 +269,21 @@ public DataflowPipelineJob run(Pipeline pipeline) {
       }
       jobResult = createRequest.execute();
     } catch (GoogleJsonResponseException e) {
-      throw new RuntimeException(
-          "Failed to create a workflow job: "
-              + (e.getDetails() != null ? e.getDetails().getMessage() : e), e);
+        throw new RuntimeException("Failed to create a workflow job: "
+            + (e.getDetails() != null ? e.getDetails().getMessage() : e), e);
     } catch (IOException e) {
       throw new RuntimeException("Failed to create a workflow job", e);
     }
+    // If the service returned client request id, the SDK needs to compare it
+    // with the original id generated in the request, if they are not the same
+    // (i.e., the returned job is not created by this request), throw
+    // Error::Already_Exists.
+    if (jobResult.getClientRequestId() != null && !jobResult.getClientRequestId().isEmpty()
+        && !jobResult.getClientRequestId().equals(requestId)) {
+      throw new RuntimeException("The job you are trying to create with name " + newJob.getName()
+          + " already exists and is active in system with job id: " + jobResult.getId()
+          + ". If you want to submit a new job in parallel, try again with a different name.");
+    }
 
     LOG.info("To access the Dataflow monitoring console, please navigate to {}",
         MonitoringUtil.getJobMonitoringPageURL(options.getProject(), jobResult.getId()));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 44c7872ac5bae..fb853ad29b6b4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -168,6 +168,28 @@ public void testRun() throws IOException {
     assertValidJob(jobCaptor.getValue());
   }
 
+  @Test
+  public void testRunReturnDifferentRequestId() throws IOException {
+    thrown.expect(RuntimeException.class);
+    thrown.expectMessage(Matchers.containsString(
+        "If you want to submit a new job in parallel, try again with a different name."));
+
+    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+    DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+    Dataflow mockDataflowClient = options.getDataflowClient();
+    Dataflow.Projects.Jobs.Create mockRequest = mock(Dataflow.Projects.Jobs.Create.class);
+    when(mockDataflowClient.projects().jobs().create(eq("someProject"), jobCaptor.capture()))
+        .thenReturn(mockRequest);
+    Job resultJob = new Job();
+    resultJob.setId("newid");
+    // Return a different request id.
+    resultJob.setClientRequestId("different_request_id");
+    when(mockRequest.execute()).thenReturn(resultJob);
+
+    DataflowPipeline p = buildDataflowPipeline(options);
+    p.run();
+  }
+
   @Test
   public void testReload() throws IOException {
     ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);

From 90e29ed5f19d412d3cd1069e43333355a27a2f59 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 16 Jun 2015 09:15:25 -0700
Subject: [PATCH 0643/1541] Improve error for failing to serialize
 PipelineOptions

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96113371
---
 .../sdk/options/ProxyInvocationHandler.java   | 33 ++++++++++++++-----
 .../options/ProxyInvocationHandlerTest.java   | 10 ++++--
 2 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
index 5652ad0975383..819df4b09f280 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
@@ -342,13 +342,22 @@ public void serialize(PipelineOptions value, JsonGenerator jgen, SerializerProvi
         throws IOException, JsonProcessingException {
       ProxyInvocationHandler handler = (ProxyInvocationHandler) Proxy.getInvocationHandler(value);
       synchronized (handler) {
-        Map<String, Object> options = Maps.<String, Object>newHashMap(handler.jsonOptions);
-        options.putAll(handler.options);
-        removeIgnoredOptions(handler.knownInterfaces, options);
-        ensureSerializable(handler.knownInterfaces, options);
+        // We first filter out any properties that have been modified since
+        // the last serialization of this PipelineOptions and then verify that
+        // they are all serializable.
+        Map<String, Object> filteredOptions = Maps.newHashMap(handler.options);
+        removeIgnoredOptions(handler.knownInterfaces, filteredOptions);
+        ensureSerializable(handler.knownInterfaces, filteredOptions);
+
+        // Now we create the map of serializable options by taking the original
+        // set of serialized options (if any) and updating them with any properties
+        // instances that have been modified since the previous serialization.
+        Map<String, Object> serializableOptions =
+            Maps.<String, Object>newHashMap(handler.jsonOptions);
+        serializableOptions.putAll(filteredOptions);
         jgen.writeStartObject();
         jgen.writeFieldName("options");
-        jgen.writeObject(options);
+        jgen.writeObject(serializableOptions);
         jgen.writeEndObject();
       }
     }
@@ -396,10 +405,16 @@ private void ensureSerializable(Set<Class<? extends PipelineOptions>> interfaces
 
       // Attempt to serialize and deserialize each property.
       for (Map.Entry<String, Object> entry : options.entrySet()) {
-        String serializedValue = MAPPER.writeValueAsString(entry.getValue());
-        JavaType type = MAPPER.getTypeFactory()
-            .constructType(propertyToReturnType.get(entry.getKey()));
-        MAPPER.readValue(serializedValue, type);
+        try {
+          String serializedValue = MAPPER.writeValueAsString(entry.getValue());
+          JavaType type = MAPPER.getTypeFactory()
+              .constructType(propertyToReturnType.get(entry.getKey()));
+          MAPPER.readValue(serializedValue, type);
+        } catch (Exception e) {
+          throw new IOException(String.format(
+              "Failed to serialize and deserialize property '%s' with value '%s'",
+              entry.getKey(), entry.getValue()), e);
+        }
       }
     }
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
index 798591083e747..7eb5822b436ea 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandlerTest.java
@@ -460,8 +460,12 @@ public void testJsonConversionOfAJsonConvertedType() throws Exception {
     SimpleTypes options = PipelineOptionsFactory.as(SimpleTypes.class);
     options.setString("TestValue");
     options.setInteger(5);
+    // It is important here that our first serialization goes to our most basic
+    // type so that we handle the case when we don't know the types of certain
+    // properties because the intermediate instance of PipelineOptions never
+    // saw their interface.
     SimpleTypes options2 = serializeDeserialize(SimpleTypes.class,
-        serializeDeserialize(SimpleTypes.class, options));
+        serializeDeserialize(PipelineOptions.class, options));
     assertEquals(5, options2.getInteger());
     assertEquals("TestValue", options2.getString());
   }
@@ -614,11 +618,13 @@ private static interface NotSerializableProperty extends PipelineOptions {
     void setValue(NotSerializable value);
   }
 
-  @Test(expected = JsonMappingException.class)
+  @Test
   public void testJsonConversionOfNotSerializableProperty() throws Exception {
     NotSerializableProperty options = PipelineOptionsFactory.as(NotSerializableProperty.class);
     options.setValue(new NotSerializable("TestString"));
 
+    expectedException.expect(JsonMappingException.class);
+    expectedException.expectMessage("Failed to serialize and deserialize property 'value'");
     serializeDeserialize(NotSerializableProperty.class, options);
   }
 

From 095e9323bf671b91d9984218a3dbe2d2b2478c27 Mon Sep 17 00:00:00 2001
From: ckuhn <ckuhn@google.com>
Date: Tue, 16 Jun 2015 12:04:30 -0700
Subject: [PATCH 0644/1541] Adds source testing utilities for checking source
 splitAtFraction.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96130119
---
 .../sdk/io/ByteOffsetBasedSourceTest.java     | 11 +--
 .../dataflow/sdk/io/FileBasedSourceTest.java  |  2 +
 .../dataflow/sdk/io/SourceTestUtils.java      | 77 +++++++++++++++++--
 .../cloud/dataflow/sdk/io/XmlSourceTest.java  | 16 +---
 4 files changed, 78 insertions(+), 28 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
index 8347917d54611..1bbdae4689adf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
@@ -14,8 +14,7 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
-import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS;
-import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionBehavior;
+import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionExhaustive;
 import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.readFromSource;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -263,12 +262,6 @@ public void testSplitAtFraction() throws IOException {
   public void testSplitAtFractionExhaustive() throws IOException {
     PipelineOptions options = PipelineOptionsFactory.create();
     CoarseByteRangeSource original = new CoarseByteRangeSource(13, 35, 1, 10);
-    int maxItems = readFromSource(original, options).size();
-    for (int numItems = 0; numItems <= maxItems; ++numItems) {
-      for (double splitFraction = 0.0; splitFraction < 1.1; splitFraction += 0.05) {
-        assertSplitAtFractionBehavior(
-            original, numItems, splitFraction, MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
-      }
-    }
+    assertSplitAtFractionExhaustive(original, options);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index 0d050005e6aa1..22581c10556fd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -14,6 +14,7 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
+import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionExhaustive;
 import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionFails;
 import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent;
 import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.readFromSource;
@@ -776,6 +777,7 @@ public void testSplitAtFraction() throws IOException {
     File file = createFileWithData("file", createStringDataset(3, 100));
 
     TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 1, 0, file.length(), null);
+    assertSplitAtFractionExhaustive(source, options);
     assertSplitAtFractionSucceedsAndConsistent(source, 0, 0.7, options);
     assertSplitAtFractionSucceedsAndConsistent(source, 1, 0.7, options);
     assertSplitAtFractionSucceedsAndConsistent(source, 30, 0.7, options);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
index 06470b0c9ef07..671ad13bfd52d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
@@ -115,14 +115,30 @@ public enum ExpectedSplitOutcome {
     MUST_BE_CONSISTENT_IF_SUCCEEDS
   }
 
+  /**
+   * Contains two values: the number of items in the primary source, and the number of items in
+   * the residual source, -1 if split failed.
+   */
+  private static class SplitAtFractionResult {
+    public int numPrimaryItems;
+    public int numResidualItems;
+
+    public SplitAtFractionResult(int numPrimaryItems, int numResidualItems) {
+      this.numPrimaryItems = numPrimaryItems;
+      this.numResidualItems = numResidualItems;
+    }
+  }
+
   /**
    * Asserts that the {@code source}'s reader either fails to {@code splitAtFraction(fraction)}
    * after reading {@code numItemsToReadBeforeSplit} items, or succeeds in a way that is
    * consistent according to {@link #assertSplitAtFractionSucceedsAndConsistent}.
+   * <p> Returns SplitAtFractionResult.
    */
-  public static <T> void assertSplitAtFractionBehavior(BoundedSource<T> source,
-      int numItemsToReadBeforeSplit, double splitFraction, ExpectedSplitOutcome expectedOutcome,
-      PipelineOptions options) throws IOException {
+
+  public static <T> SplitAtFractionResult assertSplitAtFractionBehavior(
+      BoundedSource<T> source, int numItemsToReadBeforeSplit, double splitFraction,
+      ExpectedSplitOutcome expectedOutcome, PipelineOptions options) throws IOException {
     List<T> expectedItems = readFromSource(source, options);
     BoundedSource.BoundedReader<T> reader = source.createReader(options, null);
     List<T> currentItems = new ArrayList<>();
@@ -142,9 +158,9 @@ public static <T> void assertSplitAtFractionBehavior(BoundedSource<T> source,
         // Nothing.
         break;
     }
+    BoundedSource<T> primary = reader.getCurrentSource();
+    List<T> primaryItems = readFromSource(primary, options);
     if (residual != null) {
-      BoundedSource<T> primary = reader.getCurrentSource();
-      List<T> primaryItems = readFromSource(primary, options);
       List<T> residualItems = readFromSource(residual, options);
       List<T> totalItems = new ArrayList<>();
       totalItems.addAll(primaryItems);
@@ -163,7 +179,9 @@ public static <T> void assertSplitAtFractionBehavior(BoundedSource<T> source,
             + " items; original source: " + source + ", primary: " + primary
             + ", residual: " + residual,
           expectedItems, totalItems);
+      return new SplitAtFractionResult(primaryItems.size(), residualItems.size());
     }
+    return new SplitAtFractionResult(primaryItems.size(), -1);
   }
 
   /**
@@ -197,4 +215,53 @@ public static <T> void assertSplitAtFractionFails(BoundedSource<T> source,
     assertSplitAtFractionBehavior(
         source, numItemsToReadBeforeSplit, splitFraction, ExpectedSplitOutcome.MUST_FAIL, options);
   }
+
+  /**
+   * Asserts that given a start position,
+   * {@link BoundedSource.BoundedReader#splitAtFraction} at every interesting fraction (halfway
+   * between two fractions that differ by at least one item) can be called successfully and the
+   * results are consistent if a split succeeds.
+   */
+  public static <T> void assertSplitAtFractionBinary(
+      BoundedSource<T> source, int numItemsToBeReadBeforeSplit, double firstSplitFraction,
+      double secondSplitFraction, PipelineOptions options) throws IOException {
+    if (secondSplitFraction - firstSplitFraction < 0.0001) {
+      return;
+    }
+    double middleSplitFraction = ((secondSplitFraction - firstSplitFraction)
+        / 2) + firstSplitFraction;
+    SplitAtFractionResult splitAtFirst = assertSplitAtFractionBehavior(
+        source, numItemsToBeReadBeforeSplit, firstSplitFraction,
+        ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
+    SplitAtFractionResult splitAtMiddle = assertSplitAtFractionBehavior(
+        source, numItemsToBeReadBeforeSplit, middleSplitFraction,
+        ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
+    SplitAtFractionResult splitAtSecond = assertSplitAtFractionBehavior(
+        source, numItemsToBeReadBeforeSplit, secondSplitFraction,
+        ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
+    if (splitAtFirst.numPrimaryItems != splitAtMiddle.numPrimaryItems
+        || splitAtFirst.numResidualItems != splitAtMiddle.numResidualItems) {
+      assertSplitAtFractionBinary(source, numItemsToBeReadBeforeSplit, firstSplitFraction,
+          middleSplitFraction, options);
+    }
+    if (splitAtSecond.numPrimaryItems != splitAtMiddle.numPrimaryItems
+        || splitAtSecond.numResidualItems != splitAtMiddle.numResidualItems) {
+      assertSplitAtFractionBinary(source, numItemsToBeReadBeforeSplit, middleSplitFraction,
+          secondSplitFraction, options);
+    }
+  }
+
+  /**
+   * Asserts that for each possible start position,
+   * {@link BoundedSource.BoundedReader#splitAtFraction} at every interesting fraction (halfway
+   * between two fractions that differ by at least one item) can be called successfully and the
+   * results are consistent if a split succeeds.
+   */
+  public static <T> void assertSplitAtFractionExhaustive(
+      BoundedSource<T> source, PipelineOptions options) throws IOException {
+    List<T> expectedItems = readFromSource(source, options);
+    for (int i = 0; i < expectedItems.size(); i++) {
+      assertSplitAtFractionBinary(source, i, 0.0, 1.0, options);
+    }
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
index d113655c240e4..eba436226e0db 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
@@ -14,8 +14,7 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
-import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS;
-import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionBehavior;
+import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionExhaustive;
 import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionFails;
 import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent;
 import static org.hamcrest.Matchers.containsInAnyOrder;
@@ -733,18 +732,7 @@ public void testSplitAtFractionExhaustive() throws Exception {
             .withRecordElement("train")
             .withRecordClass(Train.class)
             .withMinBundleSize(1024);
-    List<? extends FileBasedSource<Train>> splits =
-        source.splitIntoBundles(file.length() / 3, null);
-
-    for (BoundedSource<Train> splitSource : splits) {
-      int maxItems = readEverythingFromReader(splitSource.createReader(null, null)).size();
-      for (int numItems = 0; numItems <= maxItems; ++numItems) {
-        for (double splitFraction = 0.0; splitFraction < 1.1; splitFraction += 0.01) {
-          assertSplitAtFractionBehavior(
-              splitSource, numItems, splitFraction, MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
-        }
-      }
-    }
+    assertSplitAtFractionExhaustive(source, options);
   }
 
   @Test

From acc49b110271aa9d811283fa5cab964cc4912fcb Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Sat, 7 Feb 2015 13:20:25 -0800
Subject: [PATCH 0645/1541] Update version of protobufs used by the SDK

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96136675
---
 sdk/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index dd9af5f3e9194..37221e14d0660 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -346,7 +346,7 @@
     <dependency>
       <groupId>com.google.cloud.dataflow</groupId>
       <artifactId>google-cloud-dataflow-java-proto-library-all</artifactId>
-      <version>0.4.150515</version>
+      <version>0.4.150612</version>
     </dependency>
 
     <dependency>

From b4adcdbb94dc10f28a2b1ea645a63156633e86c7 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Tue, 16 Jun 2015 19:23:59 -0700
Subject: [PATCH 0646/1541] Updates XmlSourceTest so that it passes for
 non-english locales.

Test was failing since JAXB internationalizes the error message we were checking for. Updated the test to only check for a relevant part of the error message.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96169120
---
 .../com/google/cloud/dataflow/sdk/io/XmlSourceTest.java   | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
index eba436226e0db..7156b864e038d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
@@ -17,7 +17,9 @@
 import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionExhaustive;
 import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionFails;
 import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent;
+import static org.hamcrest.Matchers.both;
 import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.containsString;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
@@ -31,6 +33,7 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.collect.ImmutableList;
 
+import org.hamcrest.Matchers;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
@@ -477,8 +480,9 @@ public void testReadXMLInvalidRecordClass() throws IOException {
             .withRecordClass(WrongTrainType.class);
 
     exception.expect(RuntimeException.class);
-    exception.expectMessage(
-        "unexpected element (uri:\"\", local:\"name\"). Expected elements are <{}something>");
+
+    // JAXB internationalizes the error message. So this is all we can match for.
+    exception.expectMessage(both(containsString("name")).and(Matchers.containsString("something")));
     Reader<WrongTrainType> reader = source.createReader(null, null);
 
     List<WrongTrainType> results = new ArrayList<>();

From c3c9e6a65a2ba645e7dfdbfc8d335e4090c910d7 Mon Sep 17 00:00:00 2001
From: relax <relax@google.com>
Date: Wed, 17 Jun 2015 10:08:41 -0700
Subject: [PATCH 0647/1541] Fix missing table creations in BigQueryIO.Write.

BigQueryIO.Write in streaming mode caches the fact that a table has already been created so that we don't multiply create the table. However instead of caching the table specification, it cached the table schema. This causes problems with a pipeline containing two BigQuery sinks writing to separate tables with the same schema - only the first table will be created.

----Release Notes----

Fix BigQueryIO.Write table creation bug in streaming mode.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96217241
---
 .../java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java  | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 13b644b072b42..295d713791d44 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -701,18 +701,19 @@ public void startBundle(Context context) {
       try {
         tableReference =
             JSON_FACTORY.fromString(jsonTableReference, TableReference.class);
+        String tableSpec = toTableSpec(tableReference);
 
-        if (!createdTables.contains(jsonTableSchema)) {
+        if (!createdTables.contains(tableSpec)) {
           synchronized (createdTables) {
             // Another thread may have succeeded in creating the table in the meanwhile, so
             // check again. This check isn't needed for correctness, but we add it to prevent
             // every thread from attempting a create and overwhelming our BigQuery quota.
-            if (!createdTables.contains(jsonTableSchema)) {
+            if (!createdTables.contains(tableSpec)) {
               TableSchema tableSchema = JSON_FACTORY.fromString(jsonTableSchema, TableSchema.class);
               Bigquery client = Transport.newBigQueryClient(options).build();
               BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
               inserter.tryCreateTable(tableSchema);
-              createdTables.add(jsonTableSchema);
+              createdTables.add(tableSpec);
             }
           }
         }

From fc35bb5fddbcd554b102e1c8e5d3470b32bd1302 Mon Sep 17 00:00:00 2001
From: laraschmidt <laraschmidt@google.com>
Date: Wed, 17 Jun 2015 10:40:12 -0700
Subject: [PATCH 0648/1541] Turning off the 250Gb upload limit by default

This limit is no longer a performance improvement and so now it is
turned off by default.

----Release Notes----
Remove 250Gb GCS file size upload restriction

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96220552
---
 .../sdk/util/gcsio/GoogleCloudStorageWriteChannel.java         | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
index 42dce0dd59c5e..e30edf58803be 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
@@ -117,7 +117,7 @@ public class GoogleCloudStorageWriteChannel
 
   // If true, we get very high write throughput but writing files larger than UPLOAD_MAX_SIZE
   // will not succeed. Set it to false to allow larger files at lower throughput.
-  private static boolean limitFileSizeTo250Gb = true;
+  private static boolean limitFileSizeTo250Gb = false;
 
   // Chunk size to use.
   static int uploadBufferSize = UPLOAD_CHUNK_SIZE_DEFAULT;
@@ -284,6 +284,7 @@ public static void setUploadBufferSize(int bufferSize) {
    * If enabled, we get very high write throughput but writing files larger than UPLOAD_MAX_SIZE
    * will not succeed. Set it to false to allow larger files at lower throughput.
    */
+  @Deprecated
   public static void enableFileSizeLimit250Gb(boolean enableLimit) {
     GoogleCloudStorageWriteChannel.limitFileSizeTo250Gb = enableLimit;
   }

From 5d0fbedb602c15c22764fef1c129f8f269441f96 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 17 Jun 2015 11:15:11 -0700
Subject: [PATCH 0649/1541] Make ParDoFnFactory a real class; remove some
 reflection

Previously, ParDoFnFactory was actually a global registry
mapping service-side class names to SDK-side class names.
These classes were to "implement" a particular interface
via their static methods.

This change makes ParDoFnFactory actually express that
interface, and provides ParDoFnFactory.DefaultFactory
encapsulating the default behavior, previously in a global
variable.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96224406
---
 .../runners/worker/AssignWindowsParDoFn.java  |  90 ++++--
 .../sdk/runners/worker/CombineValuesFn.java   | 149 +++++----
 .../worker/GroupAlsoByWindowsParDoFn.java     | 174 +++++-----
 .../worker/MapTaskExecutorFactory.java        |  15 +-
 .../sdk/runners/worker/NormalParDoFn.java     | 306 ++++++------------
 .../sdk/runners/worker/ParDoFnBase.java       | 196 +++++++++++
 .../sdk/runners/worker/ParDoFnFactory.java    | 128 ++++----
 .../ReifyTimestampAndWindowsParDoFn.java      |  71 ++--
 .../sdk/util/common/worker/ParDoFn.java       |   2 +-
 .../runners/worker/CombineValuesFnTest.java   |   9 +-
 .../worker/MapTaskExecutorFactoryTest.java    |  50 +--
 .../sdk/runners/worker/NormalParDoFnTest.java |  65 ++--
 .../runners/worker/ParDoFnFactoryTest.java    |  80 +++--
 .../common/worker/ParDoOperationTest.java     |   2 +-
 14 files changed, 784 insertions(+), 553 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
index 0dd64eb385b4c..bee35409df3d7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
@@ -21,6 +21,7 @@
 import com.google.api.services.dataflow.model.MultiOutputInfo;
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
@@ -30,7 +31,9 @@
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+import com.google.common.base.Preconditions;
 
 import java.util.Arrays;
 import java.util.List;
@@ -41,55 +44,84 @@
  * A wrapper around an AssignWindowsDoFn.  This class is the same as
  * NormalParDoFn, except that it gets deserialized differently.
  */
-@SuppressWarnings({"rawtypes", "unchecked"})
-class AssignWindowsParDoFn extends NormalParDoFn {
-  public static AssignWindowsParDoFn create(
+class AssignWindowsParDoFn extends ParDoFnBase {
+
+  static AssignWindowsParDoFn of(
       PipelineOptions options,
-      CloudObject cloudUserFn,
+      AssignWindowsDoFn<?, ?> fn,
       String stepName,
-      @Nullable List<SideInputInfo> sideInputInfos,
-      @Nullable List<MultiOutputInfo> multiOutputInfos,
-      Integer numOutputs,
       ExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler sampler /* unused */)
+      CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
-    final Object windowingStrategy =
-        SerializableUtils.deserializeFromByteArray(
-            getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
-            "serialized windowing strategy");
-    if (!(windowingStrategy instanceof WindowingStrategy)) {
-      throw new Exception(
-          "unexpected kind of WindowingStrategy: " + windowingStrategy.getClass().getName());
-    }
+    return new AssignWindowsParDoFn(options, fn, stepName, executionContext, addCounterMutator);
+  }
+
+  /**
+   * A {@link ParDoFnFactory} to create instances of {@link AssignWindowsParDoFn} according to
+   * specifications from the Dataflow service.
+   */
+  static final class Factory implements ParDoFnFactory {
+    @Override
+    public ParDoFn create(
+        PipelineOptions options,
+        final CloudObject cloudUserFn,
+        String stepName,
+        @Nullable List<SideInputInfo> sideInputInfos,
+        @Nullable List<MultiOutputInfo> multiOutputInfos,
+        int numOutputs,
+        ExecutionContext executionContext,
+        CounterSet.AddCounterMutator addCounterMutator,
+        StateSampler stateSampler /* ignored */)
+            throws Exception {
+
+      final Object deserializedWindowingStrategy =
+          SerializableUtils.deserializeFromByteArray(
+              getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
+              "serialized windowing strategy");
 
-    final AssignWindowsDoFn assignFn = new AssignWindowsDoFn(
-        ((WindowingStrategy) windowingStrategy).getWindowFn());
+      Preconditions.checkArgument(
+          deserializedWindowingStrategy instanceof WindowingStrategy,
+          "unexpected kind of WindowingStrategy: "
+          + deserializedWindowingStrategy.getClass().getName());
 
-    DoFnInfoFactory fnFactory = new DoFnInfoFactory() {
-        @Override
-        public DoFnInfo createDoFnInfo() {
-          return new DoFnInfo(assignFn, null);
-        }
-      };
+      // We just checked the raw type, and the other types are simply required to be enforced
+      // outside of this class
+      @SuppressWarnings("unchecked")
+      final WindowingStrategy<Object, BoundedWindow> windowingStrategy =
+          (WindowingStrategy<Object, BoundedWindow>) deserializedWindowingStrategy;
 
-    return new AssignWindowsParDoFn(
-        options, fnFactory, stepName, executionContext, addCounterMutator);
+      final AssignWindowsDoFn<Object, BoundedWindow> assignFn =
+          new AssignWindowsDoFn<>(windowingStrategy.getWindowFn());
+
+      return AssignWindowsParDoFn.of(
+          options,
+          assignFn,
+          stepName,
+          executionContext,
+          addCounterMutator);
+    }
+  };
+
+  @Override
+  protected DoFnInfo<?, ?> getDoFnInfo() {
+    return new DoFnInfo<>(fn, null);
   }
 
+  private final AssignWindowsDoFn<?, ?> fn;
+
   private AssignWindowsParDoFn(
       PipelineOptions options,
-      DoFnInfoFactory fnFactory,
+      AssignWindowsDoFn<?, ?> fn,
       String stepName,
       ExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator) {
     super(
         options,
-        fnFactory,
         PTuple.empty(),
         Arrays.asList("output"),
         stepName,
         executionContext,
         addCounterMutator);
+    this.fn = fn;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
index e31981cac193f..78e80cb39691d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
@@ -31,6 +31,7 @@
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
@@ -41,10 +42,9 @@
 import javax.annotation.Nullable;
 
 /**
- * A wrapper around a decoded user value combining function.
+ * A {@link ParDoFn} wrapping a decoded user {@link CombineFn}.
  */
-@SuppressWarnings({"rawtypes", "serial", "unchecked"})
-public class CombineValuesFn extends NormalParDoFn {
+class CombineValuesFn extends ParDoFnBase {
   /**
    * The optimizer may split run the user combiner in 3 separate
    * phases (ADD, MERGE, and EXTRACT), on separate VMs, as it sees
@@ -61,76 +61,107 @@ public static class CombinePhase {
     public static final String EXTRACT = "extract";
   }
 
-  public static CombineValuesFn create(
+  static CombineValuesFn of(
       PipelineOptions options,
-      CloudObject cloudUserFn,
+      Combine.KeyedCombineFn<?, ?, ?, ?> combineFn,
+      String phase,
       String stepName,
-      @Nullable List<SideInputInfo> sideInputInfos,
-      @Nullable List<MultiOutputInfo> multiOutputInfos,
-      Integer numOutputs,
       ExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler /* unused */)
+      CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
-    Object deserializedFn =
-        SerializableUtils.deserializeFromByteArray(
-            getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
-            "serialized user fn");
-    Preconditions.checkArgument(
-        deserializedFn instanceof Combine.KeyedCombineFn);
-    final Combine.KeyedCombineFn combineFn = (Combine.KeyedCombineFn) deserializedFn;
-
-    // Get the combine phase, default to ALL. (The implementation
-    // doesn't have to split the combiner).
-    final String phase = getString(cloudUserFn, PropertyNames.PHASE, CombinePhase.ALL);
-
-    Preconditions.checkArgument(
-        sideInputInfos == null || sideInputInfos.size() == 0,
-        "unexpected side inputs for CombineValuesFn");
-    Preconditions.checkArgument(
-        numOutputs == 1, "expected exactly one output for CombineValuesFn");
-
-    DoFnInfoFactory fnFactory = new DoFnInfoFactory() {
-        @Override
-        public DoFnInfo createDoFnInfo() {
-          DoFn doFn = null;
-          switch (phase) {
-            case CombinePhase.ALL:
-              doFn = new CombineValuesDoFn(combineFn);
-              break;
-            case CombinePhase.ADD:
-              doFn = new AddInputsDoFn(combineFn);
-              break;
-            case CombinePhase.MERGE:
-              doFn = new MergeAccumulatorsDoFn(combineFn);
-              break;
-            case CombinePhase.EXTRACT:
-              doFn = new ExtractOutputDoFn(combineFn);
-              break;
-            default:
-              throw new IllegalArgumentException(
-                  "phase must be one of 'all', 'add', 'merge', 'extract'");
-          }
-          return new DoFnInfo(doFn, null);
-        }
-      };
-    return new CombineValuesFn(options, fnFactory, stepName, executionContext, addCounterMutator);
+    return new CombineValuesFn(
+        options, combineFn, phase, stepName, executionContext, addCounterMutator);
   }
 
+  /**
+   * A {@link ParDoFnFactory} to create instances of {@link CombineValuesFn} according to
+   * specifications from the Dataflow service.
+   */
+  static final class Factory implements ParDoFnFactory {
+    @Override
+    public ParDoFn create(
+        PipelineOptions options,
+        final CloudObject cloudUserFn,
+        String stepName,
+        @Nullable List<SideInputInfo> sideInputInfos,
+        @Nullable List<MultiOutputInfo> multiOutputInfos,
+        int numOutputs,
+        ExecutionContext executionContext,
+        CounterSet.AddCounterMutator addCounterMutator,
+        StateSampler stateSampler)
+            throws Exception {
+
+      Preconditions.checkArgument(
+          sideInputInfos == null || sideInputInfos.size() == 0,
+          "unexpected side inputs for CombineValuesFn");
+      Preconditions.checkArgument(
+          numOutputs == 1, "expected exactly one output for CombineValuesFn");
+
+      Object deserializedFn =
+          SerializableUtils.deserializeFromByteArray(
+              getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
+              "serialized user fn");
+      Preconditions.checkArgument(
+          deserializedFn instanceof Combine.KeyedCombineFn);
+      Combine.KeyedCombineFn<?, ?, ?, ?> combineFn =
+          (Combine.KeyedCombineFn<?, ?, ?, ?>) deserializedFn;
+
+      // Get the combine phase, default to ALL. (The implementation
+      // doesn't have to split the combiner).
+      String phase = getString(cloudUserFn, PropertyNames.PHASE, CombinePhase.ALL);
+
+      return CombineValuesFn.of(
+          options,
+          combineFn,
+          phase,
+          stepName,
+          executionContext,
+          addCounterMutator);
+    }
+  }
+
+  @Override
+  protected DoFnInfo<?, ?> getDoFnInfo() {
+    DoFn doFn = null;
+    switch (phase) {
+      case CombinePhase.ALL:
+        doFn = new CombineValuesDoFn(combineFn);
+        break;
+      case CombinePhase.ADD:
+        doFn = new AddInputsDoFn(combineFn);
+        break;
+      case CombinePhase.MERGE:
+        doFn = new MergeAccumulatorsDoFn(combineFn);
+        break;
+      case CombinePhase.EXTRACT:
+        doFn = new ExtractOutputDoFn(combineFn);
+        break;
+      default:
+        throw new IllegalArgumentException(
+            "phase must be one of 'all', 'add', 'merge', 'extract'");
+    }
+    return new DoFnInfo<>(doFn, null);
+  }
+
+  private final String phase;
+  private final Combine.KeyedCombineFn<?, ?, ?, ?> combineFn;
+
   private CombineValuesFn(
       PipelineOptions options,
-      DoFnInfoFactory fnFactory,
+      Combine.KeyedCombineFn<?, ?, ?, ?> combineFn,
+      String phase,
       String stepName,
       ExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator) {
     super(
         options,
-        fnFactory,
         PTuple.empty(),
         Arrays.asList("output"),
         stepName,
         executionContext,
         addCounterMutator);
+    this.phase = phase;
+    this.combineFn = combineFn;
   }
 
   /**
@@ -139,6 +170,8 @@ private CombineValuesFn(
    */
   private static class CombineValuesDoFn<K, InputT, OutputT>
       extends DoFn<KV<K, Iterable<InputT>>, KV<K, OutputT>>{
+    private static final long serialVersionUID = 0L;
+
     private final Combine.KeyedCombineFn<K, InputT, ?, OutputT> combineFn;
 
     private CombineValuesDoFn(
@@ -160,6 +193,8 @@ public void processElement(ProcessContext c) {
    */
   private static class AddInputsDoFn<K, InputT, AccumT>
       extends DoFn<KV<K, Iterable<InputT>>, KV<K, AccumT>>{
+    private static final long serialVersionUID = 0L;
+
     private final Combine.KeyedCombineFn<K, InputT, AccumT, ?> combineFn;
 
     private AddInputsDoFn(
@@ -185,6 +220,8 @@ public void processElement(ProcessContext c) {
    */
   private static class MergeAccumulatorsDoFn<K, AccumT>
       extends DoFn<KV<K, Iterable<AccumT>>, KV<K, AccumT>>{
+    private static final long serialVersionUID = 0L;
+
     private final Combine.KeyedCombineFn<K, ?, AccumT, ?> combineFn;
 
     private MergeAccumulatorsDoFn(
@@ -207,6 +244,8 @@ public void processElement(ProcessContext c) {
    */
   private static class ExtractOutputDoFn<K, AccumT, OutputT>
       extends DoFn<KV<K, AccumT>, KV<K, OutputT>>{
+    private static final long serialVersionUID = 0L;
+
     private final Combine.KeyedCombineFn<K, ?, AccumT, OutputT> combineFn;
 
     private ExtractOutputDoFn(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index 103c3ba4a1b1c..0acdefc0b846a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -20,7 +20,6 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 
-import com.google.api.client.util.Preconditions;
 import com.google.api.services.dataflow.model.MultiOutputInfo;
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -42,7 +41,9 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.Iterables;
 
 import java.util.ArrayList;
@@ -56,98 +57,115 @@
  * A wrapper around a GroupAlsoByWindowsDoFn.  This class is the same as
  * NormalParDoFn, except that it gets deserialized differently.
  */
-class GroupAlsoByWindowsParDoFn extends NormalParDoFn {
+class GroupAlsoByWindowsParDoFn extends ParDoFnBase {
 
-  @SuppressWarnings({"rawtypes", "unchecked"})
-  public static GroupAlsoByWindowsParDoFn create(
+  static GroupAlsoByWindowsParDoFn of(
       PipelineOptions options,
-      CloudObject cloudUserFn,
+      DoFn<?, ?> groupAlsoByWindowsDoFn,
       String stepName,
-      @Nullable List<SideInputInfo> sideInputInfos,
-      @Nullable List<MultiOutputInfo> multiOutputInfos,
-      Integer numOutputs,
       ExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler sampler /* unused */)
+      CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
-    Object windowingStrategyObj;
-    byte[] encodedWindowingStrategy = getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN);
-    if (encodedWindowingStrategy.length == 0) {
-      windowingStrategyObj = WindowingStrategy.globalDefault();
-    } else {
-      windowingStrategyObj =
-        SerializableUtils.deserializeFromByteArray(
-            encodedWindowingStrategy, "serialized windowing strategy");
-      if (!(windowingStrategyObj instanceof WindowingStrategy)) {
-        throw new Exception(
-            "unexpected kind of WindowingStrategy: " + windowingStrategyObj.getClass().getName());
+    return new GroupAlsoByWindowsParDoFn(
+        options, groupAlsoByWindowsDoFn, stepName, executionContext, addCounterMutator);
+  }
+
+  /**
+   * A {@link ParDoFnFactory} to create {@link GroupAlsoByWindowsParDoFn} instances according to
+   * specifications from the Dataflow service.
+   */
+  static final class Factory implements ParDoFnFactory {
+    @Override
+    public ParDoFn create(
+        PipelineOptions options,
+        CloudObject cloudUserFn,
+        String stepName,
+        @Nullable List<SideInputInfo> sideInputInfos,
+        @Nullable List<MultiOutputInfo> multiOutputInfos,
+        int numOutputs,
+        ExecutionContext executionContext,
+        CounterSet.AddCounterMutator addCounterMutator,
+        StateSampler stateSampler)
+            throws Exception {
+      Object windowingStrategyObj;
+      byte[] encodedWindowingStrategy = getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN);
+      if (encodedWindowingStrategy.length == 0) {
+        windowingStrategyObj = WindowingStrategy.globalDefault();
+      } else {
+        windowingStrategyObj =
+          SerializableUtils.deserializeFromByteArray(
+              encodedWindowingStrategy, "serialized windowing strategy");
+        Preconditions.checkArgument(
+          windowingStrategyObj instanceof WindowingStrategy,
+          "unexpected kind of WindowingStrategy: " + windowingStrategyObj.getClass().getName());
       }
-    }
-    WindowingStrategy windowingStrategy = (WindowingStrategy) windowingStrategyObj;
-
-    byte[] serializedCombineFn = getBytes(cloudUserFn, PropertyNames.COMBINE_FN, null);
-    KeyedCombineFn combineFn;
-    if (serializedCombineFn != null) {
-      Object combineFnObj =
-          SerializableUtils.deserializeFromByteArray(serializedCombineFn, "serialized combine fn");
-      if (!(combineFnObj instanceof KeyedCombineFn)) {
-        throw new Exception(
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      WindowingStrategy windowingStrategy = (WindowingStrategy) windowingStrategyObj;
+
+      byte[] serializedCombineFn = getBytes(cloudUserFn, PropertyNames.COMBINE_FN, null);
+      KeyedCombineFn<?, ?, ?, ?> combineFn = null;
+      if (serializedCombineFn != null) {
+        Object combineFnObj = SerializableUtils.deserializeFromByteArray(
+            serializedCombineFn, "serialized combine fn");
+        Preconditions.checkArgument(
+            combineFnObj instanceof KeyedCombineFn,
             "unexpected kind of KeyedCombineFn: " + combineFnObj.getClass().getName());
+        combineFn = (KeyedCombineFn<?, ?, ?, ?>) combineFnObj;
       }
-      combineFn = (KeyedCombineFn) combineFnObj;
-    } else {
-      combineFn = null;
-    }
 
-    Map<String, Object> inputCoderObject = getObject(cloudUserFn, PropertyNames.INPUT_CODER);
+      Map<String, Object> inputCoderObject = getObject(cloudUserFn, PropertyNames.INPUT_CODER);
 
-    Coder inputCoder = Serializer.deserialize(inputCoderObject, Coder.class);
-    if (!(inputCoder instanceof WindowedValueCoder)) {
-      throw new Exception(
-          "Expected WindowedValueCoder for inputCoder, got: "
-          + inputCoder.getClass().getName());
-    }
-    Coder elemCoder = ((WindowedValueCoder) inputCoder).getValueCoder();
-    if (!(elemCoder instanceof KvCoder)) {
-      throw new Exception(
+      Coder<?> inputCoder = Serializer.deserialize(inputCoderObject, Coder.class);
+      Preconditions.checkArgument(
+          inputCoder instanceof WindowedValueCoder,
+          "Expected WindowedValueCoder for inputCoder, got: " + inputCoder.getClass().getName());
+      @SuppressWarnings("unchecked")
+      WindowedValueCoder<?> windowedValueCoder = (WindowedValueCoder<?>) inputCoder;
+
+      Coder<?> elemCoder = windowedValueCoder.getValueCoder();
+      Preconditions.checkArgument(
+          elemCoder instanceof KvCoder,
           "Expected KvCoder for inputCoder, got: " + elemCoder.getClass().getName());
-    }
-    KvCoder kvCoder = (KvCoder) elemCoder;
+      @SuppressWarnings("unchecked")
+      KvCoder<?, ?> kvCoder = (KvCoder<?, ?>) elemCoder;
 
-    boolean isStreamingPipeline = false;
-    if (options instanceof StreamingOptions) {
-      isStreamingPipeline = ((StreamingOptions) options).isStreaming();
-    }
+      boolean isStreamingPipeline = false;
+      if (options instanceof StreamingOptions) {
+        isStreamingPipeline = ((StreamingOptions) options).isStreaming();
+      }
 
-    KeyedCombineFn maybeMergingCombineFn = null;
-    if (combineFn != null) {
-      String phase = getString(cloudUserFn, PropertyNames.PHASE, CombinePhase.ALL);
-      Preconditions.checkArgument(
-          phase.equals(CombinePhase.ALL) || phase.equals(CombinePhase.MERGE),
-          "Unexpected phase: " + phase);
-      if (phase.equals(CombinePhase.MERGE)) {
-        maybeMergingCombineFn = new MergingKeyedCombineFn(combineFn);
-      } else {
-        maybeMergingCombineFn = combineFn;
+      KeyedCombineFn<?, ?, ?, ?> maybeMergingCombineFn = null;
+      if (combineFn != null) {
+        String phase = getString(cloudUserFn, PropertyNames.PHASE, CombinePhase.ALL);
+        Preconditions.checkArgument(
+            phase.equals(CombinePhase.ALL) || phase.equals(CombinePhase.MERGE),
+            "Unexpected phase: " + phase);
+        if (phase.equals(CombinePhase.MERGE)) {
+          maybeMergingCombineFn = new MergingKeyedCombineFn<>(combineFn);
+        } else {
+          maybeMergingCombineFn = combineFn;
+        }
       }
-    }
 
-    DoFnInfoFactory fnFactory;
-    final DoFn groupAlsoByWindowsDoFn = getGroupAlsoByWindowsDoFn(
-        isStreamingPipeline, windowingStrategy, kvCoder, maybeMergingCombineFn);
+      DoFn<?, ?> groupAlsoByWindowsDoFn = getGroupAlsoByWindowsDoFn(
+          isStreamingPipeline, windowingStrategy, kvCoder, maybeMergingCombineFn);
 
-    fnFactory = new DoFnInfoFactory() {
-      @Override
-      public DoFnInfo createDoFnInfo() {
-        return new DoFnInfo(groupAlsoByWindowsDoFn, null);
-      }
-    };
-    return new GroupAlsoByWindowsParDoFn(
-        options, fnFactory, stepName, executionContext, addCounterMutator);
+      return GroupAlsoByWindowsParDoFn.of(
+          options,
+          groupAlsoByWindowsDoFn,
+          stepName,
+          executionContext,
+          addCounterMutator);
+    }
+  };
+
+  @Override
+  protected DoFnInfo<?, ?> getDoFnInfo() {
+    return new DoFnInfo<>(groupAlsoByWindowsDoFn, null);
   }
 
-  @SuppressWarnings({"rawtypes", "unchecked"})
-  private static DoFn getGroupAlsoByWindowsDoFn(
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  private static DoFn<?, ?> getGroupAlsoByWindowsDoFn(
       boolean isStreamingPipeline,
       WindowingStrategy windowingStrategy,
       KvCoder kvCoder,
@@ -214,19 +232,21 @@ private List<AccumT> mergeToSingleton(K key, Iterable<AccumT> accumulators) {
     }
   }
 
+  private final DoFn<?, ?> groupAlsoByWindowsDoFn;
+
   private GroupAlsoByWindowsParDoFn(
       PipelineOptions options,
-      DoFnInfoFactory fnFactory,
+      DoFn<?, ?> groupAlsoByWindowsDoFn,
       String stepName,
       ExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator) {
     super(
         options,
-        fnFactory,
         PTuple.empty(),
         Arrays.asList("output"),
         stepName,
         executionContext,
         addCounterMutator);
+    this.groupAlsoByWindowsDoFn = groupAlsoByWindowsDoFn;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index ffe2baade3c23..fd99e085d93f9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -150,15 +150,24 @@ static WriteOperation createWriteOperation(PipelineOptions options,
     return operation;
   }
 
+  private static ParDoFnFactory parDoFnFactory = new ParDoFnFactory.DefaultFactory();
+
   static ParDoOperation createParDoOperation(PipelineOptions options,
       ParallelInstruction instruction, ExecutionContext executionContext,
       List<Operation> priorOperations, String counterPrefix,
       CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) throws Exception {
     ParDoInstruction parDo = instruction.getParDo();
 
-    ParDoFn fn = ParDoFnFactory.create(options, CloudObject.fromSpec(parDo.getUserFn()),
-        instruction.getSystemName(), parDo.getSideInputs(), parDo.getMultiOutputInfos(),
-        parDo.getNumOutputs(), executionContext, addCounterMutator, stateSampler);
+    ParDoFn fn = parDoFnFactory.create(
+        options,
+        CloudObject.fromSpec(parDo.getUserFn()),
+        instruction.getSystemName(),
+        parDo.getSideInputs(),
+        parDo.getMultiOutputInfos(),
+        parDo.getNumOutputs(),
+        executionContext,
+        addCounterMutator,
+        stateSampler);
 
     OutputReceiver[] receivers = createOutputReceivers(
         instruction, counterPrefix, addCounterMutator, stateSampler, parDo.getNumOutputs());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
index ab23cedc29d27..617d2689e631b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
@@ -20,251 +20,153 @@
 
 import com.google.api.services.dataflow.model.MultiOutputInfo;
 import com.google.api.services.dataflow.model.SideInputInfo;
-import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner.OutputManager;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.util.StreamingSideInputDoFnRunner;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.OutputReceiver;
 import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
-import com.google.cloud.dataflow.sdk.util.common.worker.Receiver;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.base.Throwables;
 
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
 import javax.annotation.Nullable;
 
 /**
- * A wrapper around a decoded user DoFn.
+ * A wrapper around a decoded user {@link DoFn}.
  */
-@SuppressWarnings({"rawtypes", "unchecked"})
-public class NormalParDoFn extends ParDoFn {
+class NormalParDoFn extends ParDoFnBase {
 
   /**
-   * Factory for creating DoFn instances.
+   * Create a {@link NormalParDoFn}.
    */
-  protected static interface DoFnInfoFactory {
-    public DoFnInfo createDoFnInfo() throws Exception;
-  }
-
-  public static NormalParDoFn create(
+  static NormalParDoFn of(
       PipelineOptions options,
-      final CloudObject cloudUserFn,
+      DoFnInfo doFnInfo,
+      PTuple sideInputValues,
+      List<String> outputTags,
       String stepName,
-      @Nullable List<SideInputInfo> sideInputInfos,
-      @Nullable List<MultiOutputInfo> multiOutputInfos,
-      Integer numOutputs,
       ExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler /* ignored */)
-      throws Exception {
-    Object deserializedFnInfo =
-        SerializableUtils.deserializeFromByteArray(
-            getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
-            "serialized fn info");
-    if (!(deserializedFnInfo instanceof DoFnInfo)) {
-      throw new Exception(
-          "unexpected kind of DoFnInfo: " + deserializedFnInfo.getClass().getName());
-    }
-    DoFnInfo doFnInfo = (DoFnInfo) deserializedFnInfo;
-
-    // If the side input data has already been computed, it will be in sideInputInfo.  Otherwise,
-    // we need to look it up dynamically from the Views.
-    PTuple sideInputValues = PTuple.empty();
-    final Iterable<PCollectionView<?>> sideInputViews = doFnInfo.getSideInputViews();
-    if (sideInputInfos != null && !sideInputInfos.isEmpty()) {
-      for (SideInputInfo sideInputInfo : sideInputInfos) {
-        Object sideInputValue = SideInputUtils.readSideInput(
-            options, sideInputInfo, executionContext);
-        TupleTag<Object> tag = new TupleTag<>(sideInputInfo.getTag());
-        sideInputValues = sideInputValues.and(tag, sideInputValue);
-      }
-    } else if (sideInputViews != null) {
-      for (PCollectionView<?> view : sideInputViews) {
-        sideInputValues = sideInputValues.and(view.getTagInternal(), null);
-      }
-    }
-
-    List<String> outputTags = new ArrayList<>();
-    if (multiOutputInfos != null) {
-      for (MultiOutputInfo multiOutputInfo : multiOutputInfos) {
-        outputTags.add(multiOutputInfo.getTag());
-      }
-    }
-    if (outputTags.isEmpty()) {
-      // Legacy support: assume there's a single output tag named "output".
-      // (The output tag name will be ignored, for the main output.)
-      outputTags.add("output");
-    }
-    if (numOutputs != outputTags.size()) {
-      throw new AssertionError(
-          "unexpected number of outputTags for DoFn");
-    }
-
-    final byte[] serializedDoFn = SerializableUtils.serializeToByteArray(
-        doFnInfo.getDoFn());
-    final WindowingStrategy windowingStrategy = doFnInfo.getWindowingStrategy();
-    final Coder inputCoder = doFnInfo.getInputCoder();
-    DoFnInfoFactory fnFactory = new DoFnInfoFactory() {
-        @Override public DoFnInfo createDoFnInfo() throws Exception {
-          // We guarantee the user a fresh DoFn object every call.  However we
-          // can avoid reparsing the other auxillary information.
-          Object deserializedDoFn = SerializableUtils.deserializeFromByteArray(
-              serializedDoFn, "serialized user fun");
-          if (!(deserializedDoFn instanceof DoFn)) {
-            throw new Exception(
-                "unexpected kind of DoFn: " + deserializedDoFn.getClass().getName());
-          }
-          return new DoFnInfo(
-              (DoFn) deserializedDoFn, windowingStrategy, sideInputViews, inputCoder);
-        }
-      };
-    return new NormalParDoFn(options, fnFactory, sideInputValues, outputTags,
-                             stepName, executionContext, addCounterMutator);
+      CounterSet.AddCounterMutator addCounterMutator) {
+    return new NormalParDoFn(
+        options,
+        doFnInfo,
+        sideInputValues,
+        outputTags,
+        stepName,
+        executionContext,
+        addCounterMutator);
   }
 
-  public final PipelineOptions options;
-  public final DoFnInfoFactory fnFactory;
-  public final PTuple sideInputValues;
-  public final TupleTag<Object> mainOutputTag;
-  public final List<TupleTag<?>> sideOutputTags;
-  public final String stepName;
-  public final ExecutionContext executionContext;
-  private final CounterSet.AddCounterMutator addCounterMutator;
-
-  /** The DoFnRunner executing a batch. Null between batches. */
-  DoFnRunner<Object, Object, Receiver> fnRunner;
-
-  public NormalParDoFn(PipelineOptions options,
-                       DoFnInfoFactory fnFactory,
-                       PTuple sideInputValues,
-                       List<String> outputTags,
-                       String stepName,
-                       ExecutionContext executionContext,
-                       CounterSet.AddCounterMutator addCounterMutator) {
-    this.options = options;
-    this.fnFactory = fnFactory;
-    this.sideInputValues = sideInputValues;
-    if (outputTags.size() < 1) {
-      throw new AssertionError("expected at least one output");
-    }
-    this.mainOutputTag = new TupleTag<>(outputTags.get(0));
-    this.sideOutputTags = new ArrayList<>();
-    if (outputTags.size() > 1) {
-      for (String tag : outputTags.subList(1, outputTags.size())) {
-        this.sideOutputTags.add(new TupleTag<Object>(tag));
+  /**
+   * A {@link ParDoFnFactory} to create instances of {@link NormalParDoFn} according to
+   * specifications from the Dataflow service.
+   */
+  static final class Factory implements ParDoFnFactory {
+    @Override
+    public ParDoFn create(
+        PipelineOptions options,
+        final CloudObject cloudUserFn,
+        String stepName,
+        @Nullable List<SideInputInfo> sideInputInfos,
+        @Nullable List<MultiOutputInfo> multiOutputInfos,
+        int numOutputs,
+        ExecutionContext executionContext,
+        CounterSet.AddCounterMutator addCounterMutator,
+        StateSampler stateSampler /* ignored */)
+            throws Exception {
+
+      Object deserializedFnInfo =
+          SerializableUtils.deserializeFromByteArray(
+              getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
+              "serialized fn info");
+      if (!(deserializedFnInfo instanceof DoFnInfo)) {
+        throw new Exception(
+            "unexpected kind of DoFnInfo: " + deserializedFnInfo.getClass().getName());
       }
-    }
-    this.stepName = stepName;
-    this.executionContext = executionContext;
-    this.addCounterMutator = addCounterMutator;
-  }
-
-  @Override
-  public void startBundle(final Receiver... receivers) throws Exception {
-    if (receivers.length != sideOutputTags.size() + 1) {
-      throw new AssertionError(
-          "unexpected number of receivers for DoFn");
-    }
-
-    StepContext stepContext = null;
-    if (executionContext != null) {
-      stepContext = executionContext.getStepContext(stepName);
-    }
-
-    DoFnInfo doFnInfo = fnFactory.createDoFnInfo();
-
-    OutputManager<Receiver> outputManager = new OutputManager<Receiver>() {
-      final Map<TupleTag<?>, OutputReceiver> undeclaredOutputs =
-      new HashMap<>();
-
-      @Override
-      public Receiver initialize(TupleTag tag) {
-        // Declared outputs.
-        if (tag.equals(mainOutputTag)) {
-          return receivers[0];
-        } else if (sideOutputTags.contains(tag)) {
-          return receivers[sideOutputTags.indexOf(tag) + 1];
+      DoFnInfo<?, ?> doFnInfo = (DoFnInfo<?, ?>) deserializedFnInfo;
+
+      // If the side input data has already been computed, it will be in sideInputInfo.  Otherwise,
+      // we need to look it up dynamically from the Views.
+      PTuple sideInputValues = PTuple.empty();
+      final Iterable<PCollectionView<?>> sideInputViews = doFnInfo.getSideInputViews();
+      if (sideInputInfos != null && !sideInputInfos.isEmpty()) {
+        for (SideInputInfo sideInputInfo : sideInputInfos) {
+          Object sideInputValue = SideInputUtils.readSideInput(
+              options, sideInputInfo, executionContext);
+          TupleTag<Object> tag = new TupleTag<>(sideInputInfo.getTag());
+          sideInputValues = sideInputValues.and(tag, sideInputValue);
         }
-
-        // Undeclared outputs.
-        OutputReceiver receiver = undeclaredOutputs.get(tag);
-        if (receiver == null) {
-          // A new undeclared output.
-          // TODO: plumb through the operationName, so that we can
-          // name implicit outputs after it.
-          String outputName = "implicit-" + tag.getId();
-          // TODO: plumb through the counter prefix, so we can
-          // make it available to the OutputReceiver class in case
-          // it wants to use it in naming output counters.  (It
-          // doesn't today.)
-          String counterPrefix = "";
-          receiver = new OutputReceiver(
-              outputName, counterPrefix, addCounterMutator);
-          undeclaredOutputs.put(tag, receiver);
+      } else if (sideInputViews != null) {
+        for (PCollectionView<?> view : sideInputViews) {
+          sideInputValues = sideInputValues.and(view.getTagInternal(), null);
         }
-        return receiver;
       }
 
-      @Override
-      public void output(Receiver receiver, WindowedValue<?> output) {
-        try {
-          receiver.process(output);
-        } catch (Throwable t) {
-          throw Throwables.propagate(t);
+      List<String> outputTags = new ArrayList<>();
+      if (multiOutputInfos != null) {
+        for (MultiOutputInfo multiOutputInfo : multiOutputInfos) {
+          outputTags.add(multiOutputInfo.getTag());
         }
       }
-    };
-
-
+      if (outputTags.isEmpty()) {
+        // Legacy support: assume there's a single output tag named "output".
+        // (The output tag name will be ignored, for the main output.)
+        outputTags.add("output");
+      }
+      if (numOutputs != outputTags.size()) {
+        throw new AssertionError(
+            "unexpected number of outputTags for DoFn");
+      }
 
-    if (options.as(StreamingOptions.class).isStreaming() && !sideInputValues.getAll().isEmpty()) {
-      fnRunner = new StreamingSideInputDoFnRunner(
-          options, doFnInfo, sideInputValues, outputManager,
-          mainOutputTag, sideOutputTags, stepContext, addCounterMutator);
-    } else {
-      fnRunner = DoFnRunner.create(
+      return NormalParDoFn.of(
           options,
-          doFnInfo.getDoFn(),
+          doFnInfo,
           sideInputValues,
-          outputManager,
-          mainOutputTag,
-          sideOutputTags,
-          stepContext,
-          addCounterMutator,
-          doFnInfo.getWindowingStrategy());
+          outputTags,
+          stepName,
+          executionContext,
+          addCounterMutator);
     }
-
-    fnRunner.startBundle();
   }
 
-  @Override
-  @SuppressWarnings("unchecked")
-  public void processElement(Object elem) throws Exception {
-    fnRunner.processElement((WindowedValue<Object>) elem);
+  private final byte[] serializedDoFn;
+  private final DoFnInfo<?, ?> doFnInfo;
+
+  private NormalParDoFn(
+      PipelineOptions options,
+      DoFnInfo<?, ?> doFnInfo,
+      PTuple sideInputValues,
+      List<String> outputTags,
+      String stepName,
+      ExecutionContext executionContext,
+      CounterSet.AddCounterMutator addCounterMutator) {
+    super(options, sideInputValues, outputTags, stepName, executionContext, addCounterMutator);
+    // The userDoFn is serialized because a fresh copy is provided each time it is accessed.
+    this.serializedDoFn = SerializableUtils.serializeToByteArray(doFnInfo.getDoFn());
+    this.doFnInfo = doFnInfo;
   }
 
-  @Override
-  public void finishBundle() throws Exception {
-    fnRunner.finishBundle();
-    fnRunner = null;
+  /**
+   * Produces a fresh {@link DoFnInfo} containing the user's {@link DoFn}.
+   */
+  protected DoFnInfo getDoFnInfo() {
+    // This class write the serialized data in its own constructor, as a way of doing
+    // a deep copy.
+    @SuppressWarnings("unchecked")
+    DoFn<?, ?> userDoFn = (DoFn<?, ?>) SerializableUtils.deserializeFromByteArray(
+        serializedDoFn, "serialized user fun");
+    return new DoFnInfo(
+        userDoFn,
+        doFnInfo.getWindowingStrategy(),
+        doFnInfo.getSideInputViews(),
+        doFnInfo.getInputCoder());
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
new file mode 100644
index 0000000000000..200218542772b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
@@ -0,0 +1,196 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.StreamingOptions;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.DoFnInfo;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner.OutputManager;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
+import com.google.cloud.dataflow.sdk.util.PTuple;
+import com.google.cloud.dataflow.sdk.util.StreamingSideInputDoFnRunner;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.OutputReceiver;
+import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
+import com.google.cloud.dataflow.sdk.util.common.worker.Receiver;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * A base class providing simple set up, processing, and tear down for a wrapped
+ * {@link DoFn}.
+ *
+ * <p>Subclasses override just a method to provide a {@link DoFnInfo} for the
+ * wrapped {@link DoFn}.
+ */
+public abstract class ParDoFnBase extends ParDoFn {
+
+  private final PipelineOptions options;
+  private final PTuple sideInputValues;
+  private final TupleTag<Object> mainOutputTag;
+  private final List<TupleTag<?>> sideOutputTags;
+  private final String stepName;
+  private final ExecutionContext executionContext;
+  private final CounterSet.AddCounterMutator addCounterMutator;
+
+  /** The DoFnRunner executing a batch. Null between batches. */
+  private DoFnRunner<Object, Object, Receiver> fnRunner;
+
+  public ExecutionContext getExecutionContext() {
+    return executionContext;
+  }
+
+  /**
+   * Creates a {@link ParDoFnBase} using basic information about the step being executed.
+   */
+  protected ParDoFnBase(
+      PipelineOptions options,
+      PTuple sideInputValues,
+      List<String> outputTags,
+      String stepName,
+      ExecutionContext executionContext,
+      CounterSet.AddCounterMutator addCounterMutator) {
+    this.options = options;
+
+    // We vend a freshly deserialized version for each run
+    this.sideInputValues = sideInputValues;
+    Preconditions.checkArgument(
+      outputTags.size() > 0,
+      "expected at least one output");
+    this.mainOutputTag = new TupleTag<>(outputTags.get(0));
+    this.sideOutputTags = new ArrayList<>();
+    if (outputTags.size() > 1) {
+      for (String tag : outputTags.subList(1, outputTags.size())) {
+        this.sideOutputTags.add(new TupleTag<Object>(tag));
+      }
+    }
+    this.stepName = stepName;
+    this.executionContext = executionContext;
+    this.addCounterMutator = addCounterMutator;
+  }
+
+  /**
+   * Creates a fresh {@link DoFnInfo}. This will be called for each bundle.
+   */
+  protected abstract DoFnInfo<?, ?> getDoFnInfo();
+
+  @Override
+  public void startBundle(final Receiver... receivers) throws Exception {
+    if (receivers.length != sideOutputTags.size() + 1) {
+      throw new AssertionError(
+          "unexpected number of receivers for DoFn");
+    }
+
+    StepContext stepContext = null;
+    if (executionContext != null) {
+      stepContext = executionContext.getStepContext(stepName);
+    }
+
+    @SuppressWarnings("unchecked")
+    DoFnInfo<Object, Object> doFnInfo = (DoFnInfo<Object, Object>) getDoFnInfo();
+
+    OutputManager<Receiver> outputManager = new OutputManager<Receiver>() {
+      final Map<TupleTag<?>, OutputReceiver> undeclaredOutputs =
+      new HashMap<>();
+
+      @Override
+      public Receiver initialize(TupleTag<?> tag) {
+        // Declared outputs.
+        if (tag.equals(mainOutputTag)) {
+          return receivers[0];
+        } else if (sideOutputTags.contains(tag)) {
+          return receivers[sideOutputTags.indexOf(tag) + 1];
+        }
+
+        // Undeclared outputs.
+        OutputReceiver receiver = undeclaredOutputs.get(tag);
+        if (receiver == null) {
+          // A new undeclared output.
+          // TODO: plumb through the operationName, so that we can
+          // name implicit outputs after it.
+          String outputName = "implicit-" + tag.getId();
+          // TODO: plumb through the counter prefix, so we can
+          // make it available to the OutputReceiver class in case
+          // it wants to use it in naming output counters.  (It
+          // doesn't today.)
+          String counterPrefix = "";
+          receiver = new OutputReceiver(
+              outputName, counterPrefix, addCounterMutator);
+          undeclaredOutputs.put(tag, receiver);
+        }
+        return receiver;
+      }
+
+      @Override
+      public void output(Receiver receiver, WindowedValue<?> output) {
+        try {
+          receiver.process(output);
+        } catch (Throwable t) {
+          throw Throwables.propagate(t);
+        }
+      }
+    };
+
+    if (options.as(StreamingOptions.class).isStreaming() && !sideInputValues.getAll().isEmpty()) {
+      fnRunner = new StreamingSideInputDoFnRunner<Object, Object, Receiver, BoundedWindow>(
+          options,
+          doFnInfo,
+          sideInputValues,
+          outputManager,
+          mainOutputTag,
+          sideOutputTags,
+          stepContext,
+          addCounterMutator);
+    } else {
+      fnRunner = DoFnRunner.create(
+          options,
+          doFnInfo.getDoFn(),
+          sideInputValues,
+          outputManager,
+          mainOutputTag,
+          sideOutputTags,
+          stepContext,
+          addCounterMutator,
+          doFnInfo.getWindowingStrategy());
+    }
+
+    fnRunner.startBundle();
+  }
+
+  @Override
+  @SuppressWarnings("unchecked")
+  public void processElement(Object elem) throws Exception {
+    fnRunner.processElement((WindowedValue<Object>) elem);
+  }
+
+  @Override
+  public void finishBundle() throws Exception {
+    fnRunner.finishBundle();
+    fnRunner = null;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
index 26509708b861d..28f2619ce048a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
@@ -21,7 +21,6 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
@@ -32,86 +31,73 @@
 
 /**
  * Creates a ParDoFn from a CloudObject spec.
- *
- * <p> A ParDoFnFactory concrete "subclass" should define a method with
- * the following signature:
- * <pre> {@code
- * static SomeParDoFnSubclass create(
- *     CloudObject spec,
- *     List<SideInputInfo> sideInputInfos,
- *     List<MultiOutputInfo> multiOutputInfos,
- *     int numOutputs,
- *     ExecutionContext executionContext);
- * } </pre>
  */
-public class ParDoFnFactory {
-  // Do not instantiate.
-  private ParDoFnFactory() {}
+public interface ParDoFnFactory {
 
   /**
-   * A map from the short names of predefined ParDoFnFactories to their full
-   * class names.
+   * Create a {@link ParDoFn} from standard parameters, corresponding to the specification
+   * provided to the worker by the Dataflow service.
    */
-  static Map<String, String> predefinedParDoFnFactories = new HashMap<>();
-
-  static {
-    predefinedParDoFnFactories.put("DoFn",
-                                   NormalParDoFn.class.getName());
-    predefinedParDoFnFactories.put("CombineValuesFn",
-                                   CombineValuesFn.class.getName());
-    // TODO: Remove outdated bindings once the services produces the right ones
-    predefinedParDoFnFactories.put("MergeBucketsDoFn",
-                                   GroupAlsoByWindowsParDoFn.class.getName());
-    predefinedParDoFnFactories.put("AssignBucketsDoFn",
-                                   AssignWindowsParDoFn.class.getName());
-    predefinedParDoFnFactories.put("MergeWindowsDoFn",
-                                   GroupAlsoByWindowsParDoFn.class.getName());
-    predefinedParDoFnFactories.put("AssignWindowsDoFn",
-                                   AssignWindowsParDoFn.class.getName());
-    predefinedParDoFnFactories.put("ReifyTimestampAndWindowsDoFn",
-                                   ReifyTimestampAndWindowsParDoFn.class.getName());
-  }
+  public ParDoFn create(
+      PipelineOptions options,
+      CloudObject cloudUserFn,
+      String stepName,
+      List<SideInputInfo> sideInputInfos,
+      List<MultiOutputInfo> multiOutputInfos,
+      int numOutputs,
+      ExecutionContext executionContext,
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler)
+      throws Exception;
 
   /**
-   * Creates a ParDoFn from a CloudObject spec.
-   *
-   * @throws Exception if the CloudObject spec could not be
-   * decoded and constructed.
+   * A factory that dispatches to all known factories in the Dataflow SDK based on the value of
+   * {@link CloudObject#getClassName()} for the specified {@code DoFn}.
    */
-  public static ParDoFn create(PipelineOptions options,
-                               CloudObject cloudUserFn,
-                               String stepName,
-                               List<SideInputInfo> sideInputInfos,
-                               List<MultiOutputInfo> multiOutputInfos,
-                               int numOutputs,
-                               ExecutionContext executionContext,
-                               CounterSet.AddCounterMutator addCounterMutator,
-                               StateSampler stateSampler)
-      throws Exception {
-    String className = cloudUserFn.getClassName();
-    String parDoFnFactoryClassName = predefinedParDoFnFactories.get(className);
-    if (parDoFnFactoryClassName == null) {
-      parDoFnFactoryClassName = className;
+  public class DefaultFactory implements ParDoFnFactory {
+    private final Map<String, ParDoFnFactory> defaultFactories = new HashMap<>();
+
+    public DefaultFactory() {
+      defaultFactories.put("DoFn", new NormalParDoFn.Factory());
+      defaultFactories.put("CombineValuesFn", new CombineValuesFn.Factory());
+      defaultFactories.put("MergeBucketsDoFn", new GroupAlsoByWindowsParDoFn.Factory());
+      defaultFactories.put("AssignBucketsDoFn", new AssignWindowsParDoFn.Factory());
+      defaultFactories.put("MergeWindowsDoFn", new GroupAlsoByWindowsParDoFn.Factory());
+      defaultFactories.put("AssignWindowsDoFn", new AssignWindowsParDoFn.Factory());
+      defaultFactories.put("ReifyTimestampAndWindowsDoFn",
+          new ReifyTimestampAndWindowsParDoFn.Factory());
     }
 
-    try {
-      return InstanceBuilder.ofType(ParDoFn.class)
-          .fromClassName(parDoFnFactoryClassName)
-          .fromFactoryMethod("create")
-          .withArg(PipelineOptions.class, options)
-          .withArg(CloudObject.class, cloudUserFn)
-          .withArg(String.class, stepName)
-          .withArg(List.class, sideInputInfos)
-          .withArg(List.class, multiOutputInfos)
-          .withArg(Integer.class, numOutputs)
-          .withArg(ExecutionContext.class, executionContext)
-          .withArg(CounterSet.AddCounterMutator.class, addCounterMutator)
-          .withArg(StateSampler.class, stateSampler)
-          .build();
+    @Override
+    public ParDoFn create(
+        PipelineOptions options,
+        CloudObject cloudUserFn,
+        String stepName,
+        List<SideInputInfo> sideInputInfos,
+        List<MultiOutputInfo> multiOutputInfos,
+        int numOutputs,
+        ExecutionContext executionContext,
+        CounterSet.AddCounterMutator addCounterMutator,
+        StateSampler stateSampler)
+            throws Exception {
+
+      String className = cloudUserFn.getClassName();
+      ParDoFnFactory factory = defaultFactories.get(className);
+
+      if (factory == null) {
+        throw new Exception("No known ParDoFnFactory for " + className);
+      }
 
-    } catch (ClassNotFoundException exn) {
-      throw new Exception(
-          "unable to create a ParDoFn from " + cloudUserFn, exn);
+      return factory.create(
+          options,
+          cloudUserFn,
+          stepName,
+          sideInputInfos,
+          multiOutputInfos,
+          numOutputs,
+          executionContext,
+          addCounterMutator,
+          stateSampler);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
index 4cab266e05068..dc585c095d35e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
@@ -19,7 +19,6 @@
 import com.google.api.services.dataflow.model.MultiOutputInfo;
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
@@ -27,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.util.ReifyTimestampAndWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
+import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 
 import java.util.Arrays;
@@ -35,47 +35,72 @@
 import javax.annotation.Nullable;
 
 /**
- * A wrapper around a ReifyTimestampAndWindowsDoFn. This class is the same as NormalParDoFn, except
- * that it gets deserialized differently.
+ * A {@link ParDoFn} wrapping a {@link ReifyTimestampAndWindowsDoFn}.
  */
-@SuppressWarnings({"rawtypes", "unchecked"})
-public class ReifyTimestampAndWindowsParDoFn extends NormalParDoFn {
+class ReifyTimestampAndWindowsParDoFn extends ParDoFnBase {
 
-  public static ReifyTimestampAndWindowsParDoFn create(
+  static ReifyTimestampAndWindowsParDoFn of(
       PipelineOptions options,
-      CloudObject cloudUserFn,
+      ReifyTimestampAndWindowsDoFn<?, ?> fn,
       String stepName,
-      @Nullable List<SideInputInfo> sideInputInfos,
-      @Nullable List<MultiOutputInfo> multiOutputInfos,
-      Integer numOutputs,
       ExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler sampler /* unused */)
+      CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
 
-    final DoFn doFn = new ReifyTimestampAndWindowsDoFn();
-
-    DoFnInfoFactory fnFactory = new DoFnInfoFactory() {
-      @Override
-      public DoFnInfo createDoFnInfo() {
-        return new DoFnInfo(doFn, null);
-      }
-    };
     return new ReifyTimestampAndWindowsParDoFn(
-        options, fnFactory, stepName, executionContext, addCounterMutator);
+        options, fn, stepName, executionContext, addCounterMutator);
+  }
+
+  /**
+   * A {@link ParDoFnFactory} to create instances of {@link ReifyTimestampAndWindowsParDoFn}
+   * according to specifications from the Dataflow service.
+   */
+  static final class Factory implements ParDoFnFactory {
+    @Override
+    public ParDoFn create(
+        PipelineOptions options,
+        final CloudObject cloudUserFn,
+        String stepName,
+        @Nullable List<SideInputInfo> sideInputInfos,
+        @Nullable List<MultiOutputInfo> multiOutputInfos,
+        int numOutputs,
+        ExecutionContext executionContext,
+        CounterSet.AddCounterMutator addCounterMutator,
+        StateSampler stateSampler /* ignored */)
+            throws Exception {
+
+      final ReifyTimestampAndWindowsDoFn<Object, Object> fn =
+          new ReifyTimestampAndWindowsDoFn<Object, Object>();
+
+      return ReifyTimestampAndWindowsParDoFn.of(
+          options,
+          fn,
+          stepName,
+          executionContext,
+          addCounterMutator);
+    }
+  };
+
+  @Override
+  protected DoFnInfo<?, ?> getDoFnInfo() {
+    return new DoFnInfo<>(fn, null);
   }
+
+  private final ReifyTimestampAndWindowsDoFn<?, ?> fn;
+
   private ReifyTimestampAndWindowsParDoFn(
       PipelineOptions options,
-      DoFnInfoFactory fnFactory,
+      ReifyTimestampAndWindowsDoFn fn,
       String stepName,
       ExecutionContext executionContext,
       AddCounterMutator addCounterMutator) {
+
     super(options,
-          fnFactory,
           PTuple.empty(),
           Arrays.asList("output"),
           stepName,
           executionContext,
           addCounterMutator);
+    this.fn = fn;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java
index 1d9ec59aedfa0..47e46263b5f78 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java
@@ -17,7 +17,7 @@
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
 /**
- * Abstract base class for ParDoFns, invocable by ParDoOperations.
+ * Abstract class for functions invocable by {@link ParDoOperation} instances.
  */
 public abstract class ParDoFn {
   public abstract void startBundle(Receiver... receivers) throws Exception;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
index cef91e0bdb970..e163cd3f32dc5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
@@ -148,6 +148,7 @@ public MeanInts.CountSum decode(InputStream inStream, Context context)
       return (new MeanInts ()).new CountSum(count, sum);
     }
 
+    @Override
     public CloudObject asCloudObject() {
       return makeCloudEncoding(this.getClass().getName());
     }
@@ -157,10 +158,6 @@ public List<? extends Coder<?>> getCoderArguments() {
       return null;
     }
 
-    public List<Object> getInstanceComponents(MeanInts.CountSum exampleValue) {
-      return null;
-    }
-
     @Override
     public boolean isRegisterByteSizeObserverCheap(
         MeanInts.CountSum value, Context context) {
@@ -184,6 +181,8 @@ public void process(Object outputElem) {
     }
   }
 
+  private static final ParDoFnFactory parDoFnFactory = new CombineValuesFn.Factory();
+
   @SuppressWarnings("rawtypes")
   private static ParDoFn createCombineValuesFn(
       String phase, Combine.KeyedCombineFn combineFn) throws Exception {
@@ -196,7 +195,7 @@ private static ParDoFn createCombineValuesFn(
         byteArrayToJsonString(serializeToByteArray(combineFn)));
     addString(spec, PropertyNames.PHASE, phase);
 
-    return CombineValuesFn.create(
+    return parDoFnFactory.create(
             PipelineOptionsFactory.create(),
             spec,
             "name",
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index 53aa93791ea13..1bc81853198d1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -20,7 +20,8 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.addString;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-import static org.hamcrest.core.IsInstanceOf.instanceOf;
+import static org.hamcrest.Matchers.hasItems;
+import static org.hamcrest.Matchers.instanceOf;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertThat;
@@ -64,8 +65,6 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.common.worker.WriteOperation;
 
-import org.hamcrest.CoreMatchers;
-import org.hamcrest.core.IsInstanceOf;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -95,16 +94,24 @@ public void testCreateMapTaskExecutor() throws Exception {
     try (
         MapTaskExecutor executor = MapTaskExecutorFactory.create(
             PipelineOptionsFactory.create(), mapTask, new BatchModeExecutionContext())) {
-      @SuppressWarnings("unchecked")
+      // Safe covariant cast not expressible without rawtypes.
+      @SuppressWarnings({"rawtypes", "unchecked"})
       List<Object> operations = (List) executor.operations;
       assertThat(
           operations,
-          CoreMatchers.hasItems(new IsInstanceOf(ReadOperation.class),
-              new IsInstanceOf(ParDoOperation.class), new IsInstanceOf(ParDoOperation.class),
-              new IsInstanceOf(FlattenOperation.class), new IsInstanceOf(WriteOperation.class)));
+          hasItems(
+              instanceOf(ReadOperation.class),
+              instanceOf(ParDoOperation.class),
+              instanceOf(ParDoOperation.class),
+              instanceOf(FlattenOperation.class),
+              instanceOf(WriteOperation.class)));
       counterSet = executor.getOutputCounters();
     }
 
+    @SuppressWarnings("unchecked")
+    Counter<Long> otherMsecCounter =
+        (Counter<Long>) counterSet.getExistingCounter("test-other-msecs");
+
     assertEquals(
         new CounterSet(Counter.longs("read_output_name-ElementCount", SUM).resetToValue(0L),
             Counter.longs("read_output_name-MeanByteCount", MEAN).resetMeanToValue(0, 0L),
@@ -131,10 +138,7 @@ public void testCreateMapTaskExecutor() throws Exception {
             Counter.longs("test-Write-start-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Write-process-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Write-finish-msecs", SUM).resetToValue(0L),
-            Counter.longs("test-other-msecs", SUM)
-                .resetToValue(
-                    ((Counter<Long>)
-                        counterSet.getExistingCounter("test-other-msecs")).getAggregate())),
+            Counter.longs("test-other-msecs", SUM).resetToValue(otherMsecCounter.getAggregate())),
         counterSet);
   }
 
@@ -158,7 +162,7 @@ public void testExecutionContextPlumbing() throws Exception {
     for (ExecutionContext.StepContext stepContext : context.getAllStepContexts()) {
       stepNames.add(stepContext.getStepName());
     }
-    assertThat(stepNames, CoreMatchers.hasItems("DoFn1", "DoFnWithContext"));
+    assertThat(stepNames, hasItems("DoFn1", "DoFnWithContext"));
   }
 
   static ParallelInstruction createReadInstruction(String name) {
@@ -192,13 +196,13 @@ public void testCreateReadOperation() throws Exception {
         createReadInstruction("Read"), new BatchModeExecutionContext(),
         Collections.<Operation>emptyList(), counterPrefix, counterSet.getAddCounterMutator(),
         stateSampler);
-    assertThat(operation, new IsInstanceOf(ReadOperation.class));
+    assertThat(operation, instanceOf(ReadOperation.class));
     ReadOperation readOperation = (ReadOperation) operation;
 
     assertEquals(readOperation.receivers.length, 1);
     assertEquals(readOperation.receivers[0].getReceiverCount(), 0);
     assertEquals(readOperation.initializationState, Operation.InitializationState.UNSTARTED);
-    assertThat(readOperation.reader, new IsInstanceOf(TestReader.class));
+    assertThat(readOperation.reader, instanceOf(TestReader.class));
 
     assertEquals(
         new CounterSet(
@@ -235,6 +239,7 @@ static ParallelInstruction createWriteInstruction(
     return instruction;
   }
 
+  @SuppressWarnings("unchecked")
   @Test
   public void testCreateWriteOperation() throws Exception {
     List<Operation> priorOperations = Arrays.asList(
@@ -252,12 +257,12 @@ public void testCreateWriteOperation() throws Exception {
     Operation operation = MapTaskExecutorFactory.createOperation(PipelineOptionsFactory.create(),
         instruction, new BatchModeExecutionContext(), priorOperations, counterPrefix,
         counterSet.getAddCounterMutator(), stateSampler);
-    assertThat(operation, new IsInstanceOf(WriteOperation.class));
+    assertThat(operation, instanceOf(WriteOperation.class));
     WriteOperation writeOperation = (WriteOperation) operation;
 
     assertEquals(writeOperation.receivers.length, 0);
     assertEquals(writeOperation.initializationState, Operation.InitializationState.UNSTARTED);
-    assertThat(writeOperation.sink, new IsInstanceOf(TestSink.class));
+    assertThat(writeOperation.sink, instanceOf(TestSink.class));
 
     assertSame(
         writeOperation,
@@ -332,23 +337,24 @@ public void testCreateParDoOperation() throws Exception {
     Operation operation = MapTaskExecutorFactory.createOperation(PipelineOptionsFactory.create(),
         instruction, context, priorOperations, counterPrefix, counterSet.getAddCounterMutator(),
         stateSampler);
-    assertThat(operation, new IsInstanceOf(ParDoOperation.class));
+    assertThat(operation, instanceOf(ParDoOperation.class));
     ParDoOperation parDoOperation = (ParDoOperation) operation;
 
     assertEquals(parDoOperation.receivers.length, 1);
     assertEquals(parDoOperation.receivers[0].getReceiverCount(), 0);
     assertEquals(parDoOperation.initializationState, Operation.InitializationState.UNSTARTED);
-    assertThat(parDoOperation.fn, new IsInstanceOf(NormalParDoFn.class));
+    assertThat(parDoOperation.fn, instanceOf(NormalParDoFn.class));
     NormalParDoFn normalParDoFn = (NormalParDoFn) parDoOperation.fn;
 
-    assertThat(normalParDoFn.fnFactory.createDoFnInfo().getDoFn(),
-        new IsInstanceOf(TestDoFn.class));
+    assertThat(
+        normalParDoFn.getDoFnInfo().getDoFn(),
+        instanceOf(TestDoFn.class));
 
     assertSame(
         parDoOperation,
         priorOperations.get(producerIndex).receivers[producerOutputNum].getOnlyReceiver());
 
-    assertEquals(context, normalParDoFn.executionContext);
+    assertEquals(context, normalParDoFn.getExecutionContext());
   }
 
   static ParallelInstruction createPartialGroupByKeyInstruction(
@@ -454,7 +460,7 @@ public void testCreateFlattenOperation() throws Exception {
     Operation operation = MapTaskExecutorFactory.createOperation(PipelineOptionsFactory.create(),
         instruction, new BatchModeExecutionContext(), priorOperations, counterPrefix,
         counterSet.getAddCounterMutator(), stateSampler);
-    assertThat(operation, new IsInstanceOf(FlattenOperation.class));
+    assertThat(operation, instanceOf(FlattenOperation.class));
     FlattenOperation flattenOperation = (FlattenOperation) operation;
 
     assertEquals(flattenOperation.receivers.length, 1);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
index 551faba0f66cf..7560db275184f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
@@ -136,18 +136,6 @@ public void process(Object outputElem) {
     }
   }
 
-  static class TestDoFnInfoFactory implements NormalParDoFn.DoFnInfoFactory {
-    DoFnInfo fnInfo;
-
-    TestDoFnInfoFactory(DoFnInfo fnInfo) {
-      this.fnInfo = fnInfo;
-    }
-
-    public DoFnInfo createDoFnInfo() {
-      return fnInfo;
-    }
-  }
-
   @Test
   public void testNormalParDoFn() throws Exception {
     List<String> sideOutputTags = Arrays.asList("tag1", "tag2", "tag3");
@@ -164,11 +152,14 @@ public void testNormalParDoFn() throws Exception {
     List<String> outputTags = new ArrayList<>();
     outputTags.add("output");
     outputTags.addAll(sideOutputTags);
-    NormalParDoFn normalParDoFn =
-        new NormalParDoFn(PipelineOptionsFactory.create(),
-                          new TestDoFnInfoFactory(fnInfo), sideInputValues, outputTags, "doFn",
-                          new BatchModeExecutionContext(),
-                          (new CounterSet()).getAddCounterMutator());
+    NormalParDoFn normalParDoFn = NormalParDoFn.of(
+        PipelineOptionsFactory.create(),
+        fnInfo,
+        sideInputValues,
+        outputTags,
+        "doFn",
+        new BatchModeExecutionContext(),
+        (new CounterSet()).getAddCounterMutator());
 
     normalParDoFn.startBundle(receiver, receiver1, receiver2, receiver3);
 
@@ -223,11 +214,14 @@ public void testUnexpectedNumberOfReceivers() throws Exception {
 
     PTuple sideInputValues = PTuple.empty();
     List<String> outputTags = Arrays.asList("output");
-    NormalParDoFn normalParDoFn =
-        new NormalParDoFn(PipelineOptionsFactory.create(),
-                          new TestDoFnInfoFactory(fnInfo), sideInputValues, outputTags, "doFn",
-                          new BatchModeExecutionContext(),
-                          (new CounterSet()).getAddCounterMutator());
+    NormalParDoFn normalParDoFn = NormalParDoFn.of(
+        PipelineOptionsFactory.create(),
+        fnInfo,
+        sideInputValues,
+        outputTags,
+        "doFn",
+        new BatchModeExecutionContext(),
+        (new CounterSet()).getAddCounterMutator());
 
     try {
       normalParDoFn.startBundle();
@@ -262,11 +256,14 @@ public void testErrorPropagation() throws Exception {
 
     PTuple sideInputValues = PTuple.empty();
     List<String> outputTags = Arrays.asList("output");
-    NormalParDoFn normalParDoFn =
-        new NormalParDoFn(PipelineOptionsFactory.create(),
-                          new TestDoFnInfoFactory(fnInfo), sideInputValues, outputTags, "doFn",
-                          new BatchModeExecutionContext(),
-                          (new CounterSet()).getAddCounterMutator());
+    NormalParDoFn normalParDoFn = NormalParDoFn.of(
+        PipelineOptionsFactory.create(),
+        fnInfo,
+        sideInputValues,
+        outputTags,
+        "doFn",
+        new BatchModeExecutionContext(),
+        (new CounterSet()).getAddCounterMutator());
 
     try {
       normalParDoFn.startBundle(receiver);
@@ -327,12 +324,14 @@ public void testUndeclaredSideOutputs() throws Exception {
     TestDoFn fn = new TestDoFn(Arrays.asList("declared", "undecl1", "undecl2", "undecl3"));
     DoFnInfo fnInfo = new DoFnInfo(fn, WindowingStrategy.globalDefault());
     CounterSet counters = new CounterSet();
-    NormalParDoFn normalParDoFn =
-        new NormalParDoFn(
-            PipelineOptionsFactory.create(), new TestDoFnInfoFactory(fnInfo), PTuple.empty(),
-            Arrays.asList("output", "declared"), "doFn",
-            new BatchModeExecutionContext(),
-            counters.getAddCounterMutator());
+    NormalParDoFn normalParDoFn = NormalParDoFn.of(
+        PipelineOptionsFactory.create(),
+        fnInfo,
+        PTuple.empty(),
+        Arrays.asList("output", "declared"),
+        "doFn",
+        new BatchModeExecutionContext(),
+        counters.getAddCounterMutator());
 
     normalParDoFn.startBundle(new TestReceiver(), new TestReceiver());
     normalParDoFn.processElement(WindowedValue.valueInGlobalWindow(5));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
index a19bfa433e676..311395d6a42b5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
@@ -17,6 +17,10 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+import static org.hamcrest.Matchers.instanceOf;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
 
 import com.google.api.services.dataflow.model.MultiOutputInfo;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
@@ -33,9 +37,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 
-import org.hamcrest.CoreMatchers;
-import org.hamcrest.core.IsInstanceOf;
-import org.junit.Assert;
+import org.hamcrest.Matchers;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -64,6 +66,8 @@ public void processElement(ProcessContext c) {
     }
   }
 
+  private static ParDoFnFactory factory = new ParDoFnFactory.DefaultFactory();
+
   @Test
   public void testCreateNormalParDoFn() throws Exception {
     String stringState = "some state";
@@ -89,28 +93,37 @@ public void testCreateNormalParDoFn() throws Exception {
     CounterSet counters = new CounterSet();
     StateSampler stateSampler = new StateSampler(
         "test", counters.getAddCounterMutator());
-    ParDoFn parDoFn = ParDoFnFactory.create(
+    ParDoFn parDoFn = factory.create(
         PipelineOptionsFactory.create(),
-        cloudUserFn, "name", null, multiOutputInfos, 1,
-        context, counters.getAddCounterMutator(), stateSampler);
-
-    Assert.assertThat(parDoFn, new IsInstanceOf(NormalParDoFn.class));
+        cloudUserFn,
+        "name",
+        null,
+        multiOutputInfos,
+        1,
+        context,
+        counters.getAddCounterMutator(),
+        stateSampler);
+
+    // Test that the factory created the correct class
+    assertThat(parDoFn, instanceOf(NormalParDoFn.class));
+
+    // Test that the DoFnInfo reflects the one passed in
     NormalParDoFn normalParDoFn = (NormalParDoFn) parDoFn;
-
-    DoFn actualDoFn = normalParDoFn.fnFactory.createDoFnInfo().getDoFn();
-    Assert.assertThat(actualDoFn, new IsInstanceOf(TestDoFn.class));
-    Assert.assertThat(
-        normalParDoFn.fnFactory.createDoFnInfo().getWindowingStrategy().getWindowFn(),
-        new IsInstanceOf(GlobalWindows.class));
-    Assert.assertThat(
-        normalParDoFn.fnFactory.createDoFnInfo().getWindowingStrategy().getTrigger().getSpec(),
-        new IsInstanceOf(DefaultTrigger.class));
+    DoFnInfo doFnInfo = normalParDoFn.getDoFnInfo();
+    DoFn actualDoFn = doFnInfo.getDoFn();
+    assertThat(actualDoFn, instanceOf(TestDoFn.class));
+    assertThat(
+        doFnInfo.getWindowingStrategy().getWindowFn(),
+        instanceOf(GlobalWindows.class));
+    assertThat(
+        doFnInfo.getWindowingStrategy().getTrigger().getSpec(),
+        instanceOf(DefaultTrigger.class));
+
+    // Test that the deserialized user DoFn is as expected
     TestDoFn actualTestDoFn = (TestDoFn) actualDoFn;
-
-    Assert.assertEquals(stringState, actualTestDoFn.stringState);
-    Assert.assertEquals(longState, actualTestDoFn.longState);
-
-    Assert.assertEquals(context, normalParDoFn.executionContext);
+    assertEquals(stringState, actualTestDoFn.stringState);
+    assertEquals(longState, actualTestDoFn.longState);
+    assertEquals(context, normalParDoFn.getExecutionContext());
   }
 
   @Test
@@ -120,16 +133,21 @@ public void testCreateUnknownParDoFn() throws Exception {
       CounterSet counters = new CounterSet();
       StateSampler stateSampler = new StateSampler(
           "test", counters.getAddCounterMutator());
-      ParDoFnFactory.create(PipelineOptionsFactory.create(),
-                            cloudUserFn, "name", null, null, 1,
-                            new BatchModeExecutionContext(),
-                            counters.getAddCounterMutator(),
-                            stateSampler);
-      Assert.fail("should have thrown an exception");
+      factory.create(
+          PipelineOptionsFactory.create(),
+          cloudUserFn,
+          "name",
+          null,
+          null,
+          1,
+          new BatchModeExecutionContext(),
+          counters.getAddCounterMutator(),
+          stateSampler);
+      fail("should have thrown an exception");
     } catch (Exception exn) {
-      Assert.assertThat(exn.toString(),
-                        CoreMatchers.containsString(
-                            "unable to create a ParDoFn"));
+      assertThat(
+          exn.toString(),
+          Matchers.containsString("No known ParDoFnFactory"));
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
index 701f6f057bc65..37d3b934a63aa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
@@ -34,7 +34,7 @@
 @RunWith(JUnit4.class)
 @SuppressWarnings("unchecked")
 public class ParDoOperationTest {
-  static class TestParDoFn extends ParDoFn {
+  private static class TestParDoFn extends ParDoFn {
     final OutputReceiver outputReceiver;
 
     public TestParDoFn(OutputReceiver outputReceiver) {

From 192c350ccd15541468887008d0cf39b32ff7021f Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Wed, 17 Jun 2015 11:41:40 -0700
Subject: [PATCH 0650/1541] Fix a typo in Aggregator java doc

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96227688
---
 .../com/google/cloud/dataflow/sdk/transforms/Aggregator.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
index 06f55540501dc..5c98df88b96ee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
@@ -38,7 +38,7 @@
  *   private Aggregator<Integer, Integer> myAggregator;
  *
  *   public MyDoFn() {
- *     myAggregator = createAggregator("myCounter", new Sum.SumIntegerFn());
+ *     myAggregator = createAggregator("myAggregator", new Sum.SumIntegerFn());
  *   }
  *
  *   {@literal @}Override

From 1ec3d2a85658226dfc60545b07ab848dfb9b967c Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Thu, 18 Jun 2015 11:04:26 -0700
Subject: [PATCH 0651/1541] Change LOG.info to LOG.debug to avoid the log spams

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96326021
---
 .../com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
index ab108ec2837c0..feddd7e8b5c16 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
@@ -237,7 +237,7 @@ public ByteOffsetBasedSource<T> splitAtFraction(double fraction) {
       ByteOffsetBasedSource<T> primary = source.createSourceForSubrange(start, splitOffset);
       ByteOffsetBasedSource<T> residual = source.createSourceForSubrange(splitOffset, end);
       this.source = primary;
-      LOG.info("Split at fraction {} (offset {}) of [{}, {}) (current offset {})",
+      LOG.debug("Split at fraction {} (offset {}) of [{}, {}) (current offset {})",
           fraction, splitOffset, start, end, current);
       return residual;
     }

From abd0937a0c460a702291f0af515130298fa50513 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Thu, 18 Jun 2015 11:53:23 -0700
Subject: [PATCH 0652/1541] Fixes many occurrences of missing
 try-with-resources

In many of these, a leak happened only in the failure case anyway,
but a few actually harmful leaks were found (though only in tests).

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96331736
---
 .../cloud/dataflow/sdk/io/AvroIOTest.java     |  18 +--
 .../sdk/io/ByteOffsetBasedSourceTest.java     |  99 +++++++-------
 .../dataflow/sdk/io/CompressedSourceTest.java |   6 +-
 .../dataflow/sdk/io/FileBasedSourceTest.java  |  29 ++--
 .../dataflow/sdk/io/SourceTestUtils.java      | 112 +++++++++------
 .../cloud/dataflow/sdk/io/XmlSinkTest.java    |   9 +-
 .../cloud/dataflow/sdk/io/XmlSourceTest.java  |  29 ++--
 .../BasicSerializableSourceFormatTest.java    | 128 +++++++++---------
 .../runners/worker/AvroByteReaderTest.java    |  32 ++---
 .../sdk/runners/worker/AvroByteSinkTest.java  |  27 ++--
 .../sdk/runners/worker/AvroReaderTest.java    |  30 ++--
 .../sdk/runners/worker/AvroSinkTest.java      |  14 +-
 .../runners/worker/DatastoreReaderTest.java   |  11 +-
 .../sdk/runners/worker/TextReaderTest.java    | 103 +++++++-------
 .../sdk/util/FileIOChannelFactoryTest.java    |   9 +-
 .../dataflow/sdk/util/IOChannelUtilsTest.java |   2 +-
 .../dataflow/sdk/util/IOFactoryTest.java      |  18 +--
 17 files changed, 363 insertions(+), 313 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
index 21bef1dda329c..6ff6c96d8beb5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
@@ -39,7 +39,6 @@
 import org.apache.avro.io.DatumWriter;
 import org.apache.avro.specific.SpecificDatumReader;
 import org.apache.avro.specific.SpecificDatumWriter;
-
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
@@ -114,20 +113,21 @@ private GenericRecord[] generateAvroGenericRecords() {
 
   private void generateAvroFile(User[] elements) throws IOException {
     DatumWriter<User> userDatumWriter = new SpecificDatumWriter<>(User.class);
-    DataFileWriter<User> dataFileWriter = new DataFileWriter<>(userDatumWriter);
-    dataFileWriter.create(elements[0].getSchema(), avroFile);
-    for (User user : elements) {
-      dataFileWriter.append(user);
+    try (DataFileWriter<User> dataFileWriter = new DataFileWriter<>(userDatumWriter)) {
+      dataFileWriter.create(elements[0].getSchema(), avroFile);
+      for (User user : elements) {
+        dataFileWriter.append(user);
+      }
     }
-    dataFileWriter.close();
   }
 
   private List<User> readAvroFile() throws IOException {
     DatumReader<User> userDatumReader = new SpecificDatumReader<>(User.class);
-    DataFileReader<User> dataFileReader = new DataFileReader<>(avroFile, userDatumReader);
     List<User> users = new ArrayList<>();
-    while (dataFileReader.hasNext()) {
-      users.add(dataFileReader.next());
+    try (DataFileReader<User> dataFileReader = new DataFileReader<>(avroFile, userDatumReader)) {
+      while (dataFileReader.hasNext()) {
+        users.add(dataFileReader.next());
+      }
     }
     return users;
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
index 1bbdae4689adf..679fec5bf34cb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
@@ -200,62 +200,65 @@ public void testReadingGranularityAndFractionConsumed() throws IOException {
     // in the face of that.
     PipelineOptions options = PipelineOptionsFactory.create();
     CoarseByteRangeSource source = new CoarseByteRangeSource(13, 35, 1, 10);
-    BoundedSource.BoundedReader<Integer> reader = source.createReader(options, null);
-    List<Integer> items = new ArrayList<>();
-
-    assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
-    assertTrue(reader.start());
-    do {
-      Double fraction = reader.getFractionConsumed();
-      assertNotNull(fraction);
-      assertTrue(fraction.toString(), fraction > 0.0);
-      assertTrue(fraction.toString(), fraction <= 1.0);
-      items.add(reader.getCurrent());
-    } while (reader.advance());
-    assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
-
-    assertEquals(20, items.size());
-    assertEquals(20, items.get(0).intValue());
-    assertEquals(39, items.get(items.size() - 1).intValue());
-
-    source = new CoarseByteRangeSource(13, 17, 1, 10);
-    reader = source.createReader(options, null);
-    assertFalse(reader.start());
+    try (BoundedSource.BoundedReader<Integer> reader = source.createReader(options, null)) {
+      List<Integer> items = new ArrayList<>();
+
+      assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
+      assertTrue(reader.start());
+      do {
+        Double fraction = reader.getFractionConsumed();
+        assertNotNull(fraction);
+        assertTrue(fraction.toString(), fraction > 0.0);
+        assertTrue(fraction.toString(), fraction <= 1.0);
+        items.add(reader.getCurrent());
+      } while (reader.advance());
+      assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
+
+      assertEquals(20, items.size());
+      assertEquals(20, items.get(0).intValue());
+      assertEquals(39, items.get(items.size() - 1).intValue());
+
+      source = new CoarseByteRangeSource(13, 17, 1, 10);
+    }
+    try (BoundedSource.BoundedReader<Integer> reader = source.createReader(options, null)) {
+      assertFalse(reader.start());
+    }
   }
 
   @Test
   public void testSplitAtFraction() throws IOException {
     PipelineOptions options = PipelineOptionsFactory.create();
     CoarseByteRangeSource source = new CoarseByteRangeSource(13, 35, 1, 10);
-    CoarseByteRangeReader reader =
-        (CoarseByteRangeReader) source.createReader(options, null);
-    List<Integer> originalItems = new ArrayList<>();
-    assertTrue(reader.start());
-    originalItems.add(reader.getCurrent());
-    assertTrue(reader.advance());
-    originalItems.add(reader.getCurrent());
-    assertTrue(reader.advance());
-    originalItems.add(reader.getCurrent());
-    assertTrue(reader.advance());
-    originalItems.add(reader.getCurrent());
-    assertNull(reader.splitAtFraction(0.0));
-    assertNull(reader.splitAtFraction(reader.getFractionConsumed()));
-
-    Source<Integer> residual = reader.splitAtFraction(reader.getFractionConsumed() + 0.1);
-    Source<Integer> primary = reader.getCurrentSource();
-    List<Integer> primaryItems = readFromSource(primary, options);
-    List<Integer> residualItems = readFromSource(residual, options);
-    for (Integer item : residualItems) {
-      assertTrue(item > reader.getCurrentOffset());
-    }
-    assertFalse(primaryItems.isEmpty());
-    assertFalse(residualItems.isEmpty());
-    assertTrue(primaryItems.get(primaryItems.size() - 1) <= residualItems.get(0));
-
-    while (reader.advance()) {
+    try (CoarseByteRangeReader reader =
+            (CoarseByteRangeReader) source.createReader(options, null)) {
+      List<Integer> originalItems = new ArrayList<>();
+      assertTrue(reader.start());
+      originalItems.add(reader.getCurrent());
+      assertTrue(reader.advance());
       originalItems.add(reader.getCurrent());
+      assertTrue(reader.advance());
+      originalItems.add(reader.getCurrent());
+      assertTrue(reader.advance());
+      originalItems.add(reader.getCurrent());
+      assertNull(reader.splitAtFraction(0.0));
+      assertNull(reader.splitAtFraction(reader.getFractionConsumed()));
+
+      Source<Integer> residual = reader.splitAtFraction(reader.getFractionConsumed() + 0.1);
+      Source<Integer> primary = reader.getCurrentSource();
+      List<Integer> primaryItems = readFromSource(primary, options);
+      List<Integer> residualItems = readFromSource(residual, options);
+      for (Integer item : residualItems) {
+        assertTrue(item > reader.getCurrentOffset());
+      }
+      assertFalse(primaryItems.isEmpty());
+      assertFalse(residualItems.isEmpty());
+      assertTrue(primaryItems.get(primaryItems.size() - 1) <= residualItems.get(0));
+
+      while (reader.advance()) {
+        originalItems.add(reader.getCurrent());
+      }
+      assertEquals(originalItems, primaryItems);
     }
-    assertEquals(originalItems, primaryItems);
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
index b74b2d2e2b617..e762364691f74 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
@@ -151,9 +151,9 @@ private OutputStream getStreamForMode(CompressionMode mode, OutputStream stream)
    * Writes a single output file.
    */
   private void writeFile(File file, byte[] input, CompressionMode mode) throws IOException {
-    OutputStream os = getStreamForMode(mode, new FileOutputStream(file));
-    os.write(input);
-    os.close();
+    try (OutputStream os = getStreamForMode(mode, new FileOutputStream(file))) {
+      os.write(input);
+    }
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index 22581c10556fd..038c869b358e3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -384,21 +384,22 @@ public void testFractionConsumedWhenReadingFilepattern() throws IOException {
 
     TestFileBasedSource source =
         new TestFileBasedSource(file1.getParent() + "/" + "file*", 1024, null);
-    BoundedSource.BoundedReader<String> reader = source.createReader(null, null);
-    double lastFractionConsumed = 0.0;
-    assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
-    assertTrue(reader.start());
-    assertTrue(reader.advance());
-    assertTrue(reader.advance());
-    // We're inside the first file. Should be in [0, 1/3).
-    assertTrue(reader.getFractionConsumed() > 0.0);
-    assertTrue(reader.getFractionConsumed() < 1.0 / 3.0);
-    while (reader.advance()) {
-      double fractionConsumed = reader.getFractionConsumed();
-      assertTrue(fractionConsumed > lastFractionConsumed);
-      lastFractionConsumed = fractionConsumed;
+    try (BoundedSource.BoundedReader<String> reader = source.createReader(null, null)) {
+      double lastFractionConsumed = 0.0;
+      assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
+      assertTrue(reader.start());
+      assertTrue(reader.advance());
+      assertTrue(reader.advance());
+      // We're inside the first file. Should be in [0, 1/3).
+      assertTrue(reader.getFractionConsumed() > 0.0);
+      assertTrue(reader.getFractionConsumed() < 1.0 / 3.0);
+      while (reader.advance()) {
+        double fractionConsumed = reader.getFractionConsumed();
+        assertTrue(fractionConsumed > lastFractionConsumed);
+        lastFractionConsumed = fractionConsumed;
+      }
+      assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
     }
-    assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
index 671ad13bfd52d..7e289ff89e3ad 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
@@ -37,7 +37,9 @@ public class SourceTestUtils {
    */
   public static <T> List<T> readFromSource(Source<T> source, PipelineOptions options)
       throws IOException {
-    return readFromUnstartedReader(source.createReader(options, null));
+    try (Source.Reader<T> reader = source.createReader(options, null)) {
+      return readFromUnstartedReader(reader);
+    }
   }
 
   /**
@@ -92,7 +94,7 @@ public static <T> void assertSourcesEqualReferenceSource(Source<T> referenceSour
   public static <T> void assertUnstartedReaderReadsSameAsItsSource(
       Source.Reader<T> reader, PipelineOptions options) throws IOException {
     List<T> expected = readFromUnstartedReader(reader);
-    List<T> actual = readFromUnstartedReader(reader.getCurrentSource().createReader(options, null));
+    List<T> actual = readFromSource(reader.getCurrentSource(), options);
     assertEquals(expected, actual);
   }
 
@@ -140,48 +142,72 @@ public static <T> SplitAtFractionResult assertSplitAtFractionBehavior(
       BoundedSource<T> source, int numItemsToReadBeforeSplit, double splitFraction,
       ExpectedSplitOutcome expectedOutcome, PipelineOptions options) throws IOException {
     List<T> expectedItems = readFromSource(source, options);
-    BoundedSource.BoundedReader<T> reader = source.createReader(options, null);
-    List<T> currentItems = new ArrayList<>();
-    currentItems.addAll(readNItemsFromUnstartedReader(reader, numItemsToReadBeforeSplit));
-    BoundedSource<T> residual = reader.splitAtFraction(splitFraction);
-    // Failure cases are: must succeed but fails; must fail but succeeds.
-    switch(expectedOutcome) {
-      case MUST_SUCCEED_AND_BE_CONSISTENT:
-        assertNotNull(
-            "Failed to split reader of source: " + source + " at " + splitFraction
-                + " after reading " + numItemsToReadBeforeSplit + " items", residual);
-        break;
-      case MUST_FAIL:
-        assertEquals(null, residual);
-        break;
-      case MUST_BE_CONSISTENT_IF_SUCCEEDS:
-        // Nothing.
-        break;
-    }
-    BoundedSource<T> primary = reader.getCurrentSource();
-    List<T> primaryItems = readFromSource(primary, options);
-    if (residual != null) {
-      List<T> residualItems = readFromSource(residual, options);
-      List<T> totalItems = new ArrayList<>();
-      totalItems.addAll(primaryItems);
-      totalItems.addAll(residualItems);
-      currentItems.addAll(numItemsToReadBeforeSplit > 0
-          ? readFromStartedReader(reader) : readFromUnstartedReader(reader));
-      assertEquals(
-          "Continued reading after split yielded different items than primary source: "
-            + " split at " + splitFraction + " after reading " + numItemsToReadBeforeSplit
-            + " items, original source: " + source + ", primary source: " + primary,
-          primaryItems, currentItems);
-      assertEquals(
-          "Items in primary and residual sources after split do not add up "
-            + "to items in the original source. "
-            + "Split at " + splitFraction + " after reading " + numItemsToReadBeforeSplit
-            + " items; original source: " + source + ", primary: " + primary
-            + ", residual: " + residual,
-          expectedItems, totalItems);
-      return new SplitAtFractionResult(primaryItems.size(), residualItems.size());
+    try (BoundedSource.BoundedReader<T> reader = source.createReader(options, null)) {
+      List<T> currentItems = new ArrayList<>();
+      currentItems.addAll(readNItemsFromUnstartedReader(reader, numItemsToReadBeforeSplit));
+      BoundedSource<T> residual = reader.splitAtFraction(splitFraction);
+      // Failure cases are: must succeed but fails; must fail but succeeds.
+      switch (expectedOutcome) {
+        case MUST_SUCCEED_AND_BE_CONSISTENT:
+          assertNotNull(
+              "Failed to split reader of source: "
+                  + source
+                  + " at "
+                  + splitFraction
+                  + " after reading "
+                  + numItemsToReadBeforeSplit
+                  + " items",
+              residual);
+          break;
+        case MUST_FAIL:
+          assertEquals(null, residual);
+          break;
+        case MUST_BE_CONSISTENT_IF_SUCCEEDS:
+          // Nothing.
+          break;
+      }
+      BoundedSource<T> primary = reader.getCurrentSource();
+      List<T> primaryItems = readFromSource(primary, options);
+      if (residual != null) {
+        List<T> residualItems = readFromSource(residual, options);
+        List<T> totalItems = new ArrayList<>();
+        totalItems.addAll(primaryItems);
+        totalItems.addAll(residualItems);
+        currentItems.addAll(
+            numItemsToReadBeforeSplit > 0
+                ? readFromStartedReader(reader)
+                : readFromUnstartedReader(reader));
+        assertEquals(
+            "Continued reading after split yielded different items than primary source: "
+                + " split at "
+                + splitFraction
+                + " after reading "
+                + numItemsToReadBeforeSplit
+                + " items, original source: "
+                + source
+                + ", primary source: "
+                + primary,
+            primaryItems,
+            currentItems);
+        assertEquals(
+            "Items in primary and residual sources after split do not add up "
+                + "to items in the original source. "
+                + "Split at "
+                + splitFraction
+                + " after reading "
+                + numItemsToReadBeforeSplit
+                + " items; original source: "
+                + source
+                + ", primary: "
+                + primary
+                + ", residual: "
+                + residual,
+            expectedItems,
+            totalItems);
+        return new SplitAtFractionResult(primaryItems.size(), residualItems.size());
+      }
+      return new SplitAtFractionResult(primaryItems.size(), -1);
     }
-    return new SplitAtFractionResult(primaryItems.size(), -1);
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSinkTest.java
index 68d5266d4f6e1..4b8380d89e53d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSinkTest.java
@@ -45,7 +45,6 @@
 import javax.xml.bind.annotation.XmlRootElement;
 import javax.xml.bind.annotation.XmlType;
 
-
 /**
  * Tests for XmlSink.
  */
@@ -167,11 +166,9 @@ public void testCreateWriter() throws Exception {
   private <T> void runTestWrite(XmlWriter<T> writer, List<T> bundle, List<String> expected)
       throws Exception {
     File tmpFile = tmpFolder.newFile("foo.txt");
-    FileOutputStream fileOutputStream = new FileOutputStream(tmpFile);
-
-    writeBundle(writer, bundle, fileOutputStream.getChannel());
-    fileOutputStream.close();
-
+    try (FileOutputStream fileOutputStream = new FileOutputStream(tmpFile)) {
+      writeBundle(writer, bundle, fileOutputStream.getChannel());
+    }
     List<String> lines = new ArrayList<>();
     try (BufferedReader reader = new BufferedReader(new FileReader(tmpFile))) {
       for (;;) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
index 7156b864e038d..355c94fd759d2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
@@ -234,17 +234,17 @@ private String trainToXMLElement(Train train) {
 
   private File createRandomTrainXML(String fileName, List<Train> trains) throws IOException {
     File file = tempFolder.newFile(fileName);
-    BufferedWriter writer = new BufferedWriter(new FileWriter(file));
-    writer.write("<trains>");
-    writer.newLine();
-    for (Train train : trains) {
-      String str = trainToXMLElement(train);
-      writer.write(str);
+    try (BufferedWriter writer = new BufferedWriter(new FileWriter(file))) {
+      writer.write("<trains>");
+      writer.newLine();
+      for (Train train : trains) {
+        String str = trainToXMLElement(train);
+        writer.write(str);
+        writer.newLine();
+      }
+      writer.write("</trains>");
       writer.newLine();
     }
-    writer.write("</trains>");
-    writer.newLine();
-    writer.close();
     return file;
   }
 
@@ -483,12 +483,13 @@ public void testReadXMLInvalidRecordClass() throws IOException {
 
     // JAXB internationalizes the error message. So this is all we can match for.
     exception.expectMessage(both(containsString("name")).and(Matchers.containsString("something")));
-    Reader<WrongTrainType> reader = source.createReader(null, null);
+    try (Reader<WrongTrainType> reader = source.createReader(null, null)) {
 
-    List<WrongTrainType> results = new ArrayList<>();
-    for (boolean available = reader.start(); available; available = reader.advance()) {
-      WrongTrainType train = reader.getCurrent();
-      results.add(train);
+      List<WrongTrainType> results = new ArrayList<>();
+      for (boolean available = reader.start(); available; available = reader.advance()) {
+        WrongTrainType train = reader.getCurrent();
+        results.add(train);
+      }
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index ff6b41aed120f..7400bab4bbf32 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -298,38 +298,39 @@ public void testRangeProgressAndSplitAtFraction() throws Exception {
     DataflowPipelineOptions options =
         PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
     TestIO.Read source = TestIO.fromRange(10, 20);
-    BoundedSource.BoundedReader<Integer> reader = source.createReader(options, null);
-    assertEquals(0, reader.getFractionConsumed().intValue());
-    assertTrue(reader.start());
-    assertEquals(0.1, reader.getFractionConsumed(), 1e-6);
-    assertTrue(reader.advance());
-    assertEquals(0.2, reader.getFractionConsumed(), 1e-6);
-    // Already past 0.0 and 0.1.
-    assertNull(reader.splitAtFraction(0.0));
-    assertNull(reader.splitAtFraction(0.1));
-
-    {
-      TestIO.Read residual = (TestIO.Read) reader.splitAtFraction(0.5);
-      assertNotNull(residual);
-      TestIO.Read primary = (TestIO.Read) reader.getCurrentSource();
-      assertThat(readFromSource(primary, options), contains(10, 11, 12, 13, 14));
-      assertThat(readFromSource(residual, options), contains(15, 16, 17, 18, 19));
-    }
+    try (BoundedSource.BoundedReader<Integer> reader = source.createReader(options, null)) {
+      assertEquals(0, reader.getFractionConsumed().intValue());
+      assertTrue(reader.start());
+      assertEquals(0.1, reader.getFractionConsumed(), 1e-6);
+      assertTrue(reader.advance());
+      assertEquals(0.2, reader.getFractionConsumed(), 1e-6);
+      // Already past 0.0 and 0.1.
+      assertNull(reader.splitAtFraction(0.0));
+      assertNull(reader.splitAtFraction(0.1));
+
+      {
+        TestIO.Read residual = (TestIO.Read) reader.splitAtFraction(0.5);
+        assertNotNull(residual);
+        TestIO.Read primary = (TestIO.Read) reader.getCurrentSource();
+        assertThat(readFromSource(primary, options), contains(10, 11, 12, 13, 14));
+        assertThat(readFromSource(residual, options), contains(15, 16, 17, 18, 19));
+      }
 
-    // Range is now [10, 15) and we are at 12.
-    {
-      TestIO.Read residual = (TestIO.Read) reader.splitAtFraction(0.8);  // give up 14.
-      assertNotNull(residual);
-      TestIO.Read primary = (TestIO.Read) reader.getCurrentSource();
-      assertThat(readFromSource(primary, options), contains(10, 11, 12, 13));
-      assertThat(readFromSource(residual, options), contains(14));
-    }
+      // Range is now [10, 15) and we are at 12.
+      {
+        TestIO.Read residual = (TestIO.Read) reader.splitAtFraction(0.8); // give up 14.
+        assertNotNull(residual);
+        TestIO.Read primary = (TestIO.Read) reader.getCurrentSource();
+        assertThat(readFromSource(primary, options), contains(10, 11, 12, 13));
+        assertThat(readFromSource(residual, options), contains(14));
+      }
 
-    assertTrue(reader.advance());
-    assertEquals(12, reader.getCurrent().intValue());
-    assertTrue(reader.advance());
-    assertEquals(13, reader.getCurrent().intValue());
-    assertFalse(reader.advance());
+      assertTrue(reader.advance());
+      assertEquals(12, reader.getCurrent().intValue());
+      assertTrue(reader.advance());
+      assertEquals(13, reader.getCurrent().intValue());
+      assertFalse(reader.advance());
+    }
   }
 
   @Test
@@ -342,37 +343,42 @@ public void testProgressAndSourceSplitTranslation() throws Exception {
         .as(DataflowPipelineOptions.class);
     Reader<WindowedValue<Integer>> reader = ReaderFactory.create(
         options, translateIOToCloudSource(TestIO.fromRange(10, 20), options), null);
-    Reader.ReaderIterator<WindowedValue<Integer>> iterator = reader.iterator();
-    assertTrue(iterator.hasNext());
-    assertEquals(0, readerProgressToCloudProgress(
-        iterator.getProgress()).getPercentComplete().intValue());
-    assertEquals(valueInGlobalWindow(10), iterator.next());
-    assertEquals(0.1, readerProgressToCloudProgress(
-        iterator.getProgress()).getPercentComplete().doubleValue(), 1e-6);
-    assertEquals(valueInGlobalWindow(11), iterator.next());
-    assertEquals(0.2, readerProgressToCloudProgress(
-        iterator.getProgress()).getPercentComplete().doubleValue(), 1e-6);
-    assertEquals(valueInGlobalWindow(12), iterator.next());
-
-    assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(0)));
-    assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(0.1f)));
-    BasicSerializableSourceFormat.BoundedSourceSplit<Integer> sourceSplit =
-        (BasicSerializableSourceFormat.BoundedSourceSplit<Integer>)
-            iterator.requestDynamicSplit(splitRequestAtFraction(0.5f));
-    assertNotNull(sourceSplit);
-    assertThat(readFromSource(sourceSplit.primary, options), contains(10, 11, 12, 13, 14));
-    assertThat(readFromSource(sourceSplit.residual, options), contains(15, 16, 17, 18, 19));
-
-    sourceSplit =
-        (BasicSerializableSourceFormat.BoundedSourceSplit<Integer>)
-            iterator.requestDynamicSplit(splitRequestAtFraction(0.8f));
-    assertNotNull(sourceSplit);
-    assertThat(readFromSource(sourceSplit.primary, options), contains(10, 11, 12, 13));
-    assertThat(readFromSource(sourceSplit.residual, options), contains(14));
-
-    assertTrue(iterator.hasNext());
-    assertEquals(valueInGlobalWindow(13), iterator.next());
-    assertFalse(iterator.hasNext());
+    try (Reader.ReaderIterator<WindowedValue<Integer>> iterator = reader.iterator()) {
+      assertTrue(iterator.hasNext());
+      assertEquals(
+          0, readerProgressToCloudProgress(iterator.getProgress()).getPercentComplete().intValue());
+      assertEquals(valueInGlobalWindow(10), iterator.next());
+      assertEquals(
+          0.1,
+          readerProgressToCloudProgress(iterator.getProgress()).getPercentComplete().doubleValue(),
+          1e-6);
+      assertEquals(valueInGlobalWindow(11), iterator.next());
+      assertEquals(
+          0.2,
+          readerProgressToCloudProgress(iterator.getProgress()).getPercentComplete().doubleValue(),
+          1e-6);
+      assertEquals(valueInGlobalWindow(12), iterator.next());
+
+      assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(0)));
+      assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(0.1f)));
+      BasicSerializableSourceFormat.BoundedSourceSplit<Integer> sourceSplit =
+          (BasicSerializableSourceFormat.BoundedSourceSplit<Integer>)
+              iterator.requestDynamicSplit(splitRequestAtFraction(0.5f));
+      assertNotNull(sourceSplit);
+      assertThat(readFromSource(sourceSplit.primary, options), contains(10, 11, 12, 13, 14));
+      assertThat(readFromSource(sourceSplit.residual, options), contains(15, 16, 17, 18, 19));
+
+      sourceSplit =
+          (BasicSerializableSourceFormat.BoundedSourceSplit<Integer>)
+              iterator.requestDynamicSplit(splitRequestAtFraction(0.8f));
+      assertNotNull(sourceSplit);
+      assertThat(readFromSource(sourceSplit.primary, options), contains(10, 11, 12, 13));
+      assertThat(readFromSource(sourceSplit.residual, options), contains(14));
+
+      assertTrue(iterator.hasNext());
+      assertEquals(valueInGlobalWindow(13), iterator.next());
+      assertFalse(iterator.hasNext());
+    }
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
index 66a1004d7ef86..491585b7311f9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
@@ -66,26 +66,26 @@ private <T> void runTestRead(List<List<T>> elemsList, Coder<T> coder, boolean re
         Channels.newOutputStream(IOChannelUtils.create(filename, MimeTypes.BINARY));
     Schema schema = Schema.create(Schema.Type.BYTES);
     DatumWriter<ByteBuffer> datumWriter = new GenericDatumWriter<>(schema);
-    DataFileWriter<ByteBuffer> fileWriter = new DataFileWriter<>(datumWriter);
-    fileWriter.create(schema, outStream);
-    boolean first = true;
     List<Long> syncPoints = new ArrayList<>();
     List<Integer> expectedSizes = new ArrayList<>();
-    for (List<T> elems : elemsList) {
-      if (first) {
-        first = false;
-      } else {
-        // Ensure a block boundary here.
-        long syncPoint = fileWriter.sync();
-        syncPoints.add(syncPoint);
-      }
-      for (T elem : elems) {
-        byte[] encodedElem = CoderUtils.encodeToByteArray(coder, elem);
-        fileWriter.append(ByteBuffer.wrap(encodedElem));
-        expectedSizes.add(encodedElem.length);
+    try (DataFileWriter<ByteBuffer> fileWriter = new DataFileWriter<>(datumWriter)) {
+      fileWriter.create(schema, outStream);
+      boolean first = true;
+      for (List<T> elems : elemsList) {
+        if (first) {
+          first = false;
+        } else {
+          // Ensure a block boundary here.
+          long syncPoint = fileWriter.sync();
+          syncPoints.add(syncPoint);
+        }
+        for (T elem : elems) {
+          byte[] encodedElem = CoderUtils.encodeToByteArray(coder, elem);
+          fileWriter.append(ByteBuffer.wrap(encodedElem));
+          expectedSizes.add(encodedElem.length);
+        }
       }
     }
-    fileWriter.close();
 
     // Test reading the data back.
     List<List<T>> actualElemsList = new ArrayList<>();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
index cef372b7681ef..907ca1fc5550b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
@@ -74,23 +74,22 @@ <T> void runTestWriteFile(List<T> elems, Coder<T> coder) throws Exception {
 
     DatumReader<ByteBuffer> datumReader = new GenericDatumReader<>(schema);
 
-    DataFileReader<ByteBuffer> fileReader = new DataFileReader<>(seekableInput, datumReader);
-
     List<T> actual = new ArrayList<>();
     List<Long> expectedSizes = new ArrayList<>();
-    ByteBuffer inBuffer = ByteBuffer.allocate(10 * 1024);
-    while (fileReader.hasNext()) {
-      inBuffer = fileReader.next(inBuffer);
-      byte[] encodedElem = new byte[inBuffer.remaining()];
-      inBuffer.get(encodedElem);
-      assert inBuffer.remaining() == 0;
-      inBuffer.clear();
-      T elem = CoderUtils.decodeFromByteArray(coder, encodedElem);
-      actual.add(elem);
-      expectedSizes.add((long) encodedElem.length);
-    }
 
-    fileReader.close();
+    try (DataFileReader<ByteBuffer> fileReader = new DataFileReader<>(seekableInput, datumReader)) {
+      ByteBuffer inBuffer = ByteBuffer.allocate(10 * 1024);
+      while (fileReader.hasNext()) {
+        inBuffer = fileReader.next(inBuffer);
+        byte[] encodedElem = new byte[inBuffer.remaining()];
+        inBuffer.get(encodedElem);
+        assert inBuffer.remaining() == 0;
+        inBuffer.clear();
+        T elem = CoderUtils.decodeFromByteArray(coder, encodedElem);
+        actual.add(elem);
+        expectedSizes.add((long) encodedElem.length);
+      }
+    }
 
     // Compare the expected and the actual elements.
     Assert.assertEquals(elems, actual);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
index a22cf72194d67..9cb133132f309 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
@@ -69,25 +69,25 @@ private <T> void runTestRead(
     OutputStream outStream =
         Channels.newOutputStream(IOChannelUtils.create(filename, MimeTypes.BINARY));
     DatumWriter<T> datumWriter = coder.createDatumWriter();
-    DataFileWriter<T> fileWriter = new DataFileWriter<>(datumWriter);
-    fileWriter.create(coder.getSchema(), outStream);
-    boolean first = true;
     List<Long> syncPoints = new ArrayList<>();
     List<Integer> expectedSizes = new ArrayList<>();
-    for (List<T> elems : elemsList) {
-      if (first) {
-        first = false;
-      } else {
-        // Ensure a block boundary here.
-        long syncPoint = fileWriter.sync();
-        syncPoints.add(syncPoint);
-      }
-      for (T elem : elems) {
-        fileWriter.append(elem);
-        expectedSizes.add(CoderUtils.encodeToByteArray(coder, elem).length);
+    try (DataFileWriter<T> fileWriter = new DataFileWriter<>(datumWriter)) {
+      fileWriter.create(coder.getSchema(), outStream);
+      boolean first = true;
+      for (List<T> elems : elemsList) {
+        if (first) {
+          first = false;
+        } else {
+          // Ensure a block boundary here.
+          long syncPoint = fileWriter.sync();
+          syncPoints.add(syncPoint);
+        }
+        for (T elem : elems) {
+          fileWriter.append(elem);
+          expectedSizes.add(CoderUtils.encodeToByteArray(coder, elem).length);
+        }
       }
     }
-    fileWriter.close();
 
     // Test reading the data back.
     List<List<T>> actualElemsList = new ArrayList<>();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
index 9e58b9565b3a4..86ba2cb13b83c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
@@ -70,18 +70,16 @@ <T> void runTestWriteFile(List<T> elems, AvroCoder<T> coder) throws Exception {
 
     DatumReader<T> datumReader = new GenericDatumReader<>(coder.getSchema());
 
-    DataFileReader<T> fileReader = new DataFileReader<>(seekableInput, datumReader);
-
     List<T> actual = new ArrayList<>();
     List<Long> expectedSizes = new ArrayList<>();
-    while (fileReader.hasNext()) {
-      T next = fileReader.next();
-      actual.add(next);
-      expectedSizes.add((long) CoderUtils.encodeToByteArray(coder, next).length);
+    try (DataFileReader<T> fileReader = new DataFileReader<>(seekableInput, datumReader)) {
+      while (fileReader.hasNext()) {
+        T next = fileReader.next();
+        actual.add(next);
+        expectedSizes.add((long) CoderUtils.encodeToByteArray(coder, next).length);
+      }
     }
 
-    fileReader.close();
-
     // Compare the expected and the actual elements.
     Assert.assertEquals(elems, actual);
     Assert.assertEquals(expectedSizes, actualSizes);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java
index 8b7f1fb80f2df..cb3c997633242 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java
@@ -121,12 +121,13 @@ public void testRead() throws Exception {
     q.addKindBuilder().setName(TEST_KIND);
     Query query = q.build();
 
-    DatastoreIO.DatastoreReader iterator =
-        new DatastoreIO.DatastoreReader(DatastoreIO.read().withQuery(query), datastore);
-
     List<Entity> entityResults = new ArrayList<Entity>();
-    while (iterator.advance()) {
-      entityResults.add(iterator.getCurrent());
+
+    try (DatastoreIO.DatastoreReader iterator =
+            new DatastoreIO.DatastoreReader(DatastoreIO.read().withQuery(query), datastore)) {
+      while (iterator.advance()) {
+        entityResults.add(iterator.getCurrent());
+      }
     }
 
     assertEquals(10, entityResults.size());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
index ae33ff3c2ee89..2976601912667 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
@@ -88,11 +88,11 @@ public class TextReaderTest {
 
   private File initTestFile() throws IOException {
     File tmpFile = tmpFolder.newFile();
-    FileOutputStream output = new FileOutputStream(tmpFile);
-    for (String s : fileContent) {
-      output.write(s.getBytes());
+    try (FileOutputStream output = new FileOutputStream(tmpFile)) {
+      for (String s : fileContent) {
+        output.write(s.getBytes());
+      }
     }
-    output.close();
 
     return tmpFile;
   }
@@ -214,12 +214,12 @@ public void testStartPosition() throws Exception {
   @Test
   public void testUtf8Handling() throws Exception {
     File tmpFile = tmpFolder.newFile();
-    FileOutputStream output = new FileOutputStream(tmpFile);
-    // first line:  €\n
-    // second line: ¢\n
-    output.write(
-        new byte[] {(byte) 0xE2, (byte) 0x82, (byte) 0xAC, '\n', (byte) 0xC2, (byte) 0xA2, '\n'});
-    output.close();
+    try (FileOutputStream output = new FileOutputStream(tmpFile)) {
+      // first line:  €\n
+      // second line: ¢\n
+      output.write(
+          new byte[] {(byte) 0xE2, (byte) 0x82, (byte) 0xAC, '\n', (byte) 0xC2, (byte) 0xA2, '\n'});
+    }
 
     {
       // 3L is after the first line if counting codepoints, but within
@@ -257,15 +257,15 @@ public void testUtf8Handling() throws Exception {
 
   private void testNewlineHandling(String separator, boolean stripNewlines) throws Exception {
     File tmpFile = tmpFolder.newFile();
-    PrintStream writer = new PrintStream(new FileOutputStream(tmpFile));
     List<String> expected = Arrays.asList("", "  hi there  ", "bob", "", "  ", "--zowie!--", "");
     List<Integer> expectedSizes = new ArrayList<>();
-    for (String line : expected) {
-      writer.print(line);
-      writer.print(separator);
-      expectedSizes.add(line.length() + separator.length());
+    try (PrintStream writer = new PrintStream(new FileOutputStream(tmpFile))) {
+      for (String line : expected) {
+        writer.print(line);
+        writer.print(separator);
+        expectedSizes.add(line.length() + separator.length());
+      }
     }
-    writer.close();
 
     TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), stripNewlines, null, null,
         StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
@@ -296,13 +296,13 @@ private void testStringPayload(String[] lines, String separator, boolean stripNe
       throws Exception {
     File tmpFile = tmpFolder.newFile();
     List<String> expected = new ArrayList<>();
-    PrintStream writer = new PrintStream(new FileOutputStream(tmpFile));
-    for (String line : lines) {
-      writer.print(line);
-      writer.print(separator);
-      expected.add(stripNewlines ? line : line + separator);
+    try (PrintStream writer = new PrintStream(new FileOutputStream(tmpFile))) {
+      for (String line : lines) {
+        writer.print(line);
+        writer.print(separator);
+        expected.add(stripNewlines ? line : line + separator);
+      }
     }
-    writer.close();
 
     TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), stripNewlines, null, null,
         StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
@@ -321,14 +321,14 @@ public void testCloneIteratorWithEndPositionAndFinalBytesInBuffer() throws Excep
     boolean stripNewlines = false;
     File tmpFile = tmpFolder.newFile();
     List<String> expected = new ArrayList<>();
-    PrintStream writer = new PrintStream(new FileOutputStream(tmpFile));
-    // Write 5x the size of the buffer and 10 extra trailing bytes
-    for (long bytesWritten = 0; bytesWritten < TextReader.BUF_SIZE * 3 + 10;) {
-      writer.print(line);
-      expected.add(line);
-      bytesWritten += line.length();
+    try (PrintStream writer = new PrintStream(new FileOutputStream(tmpFile))) {
+      // Write 5x the size of the buffer and 10 extra trailing bytes
+      for (long bytesWritten = 0; bytesWritten < TextReader.BUF_SIZE * 3 + 10; ) {
+        writer.print(line);
+        expected.add(line);
+        bytesWritten += line.length();
+      }
     }
-    writer.close();
     Long fileSize = tmpFile.length();
 
     TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), stripNewlines, null,
@@ -336,9 +336,18 @@ public void testCloneIteratorWithEndPositionAndFinalBytesInBuffer() throws Excep
 
     List<String> actual = new ArrayList<>();
     Reader.ReaderIterator<String> iterator = textReader.iterator();
-    while (iterator.hasNext()) {
-      actual.add(iterator.next());
-      iterator = iterator.copy();
+    while (true) {
+      Reader.ReaderIterator<String> copy;
+      try {
+        if (!iterator.hasNext()) {
+          break;
+        }
+        actual.add(iterator.next());
+        copy = iterator.copy();
+      } finally {
+        iterator.close();
+      }
+      iterator = copy;
     }
     assertEquals(expected, actual);
   }
@@ -346,16 +355,16 @@ public void testCloneIteratorWithEndPositionAndFinalBytesInBuffer() throws Excep
   @Test
   public void testNonStringCoders() throws Exception {
     File tmpFile = tmpFolder.newFile();
-    PrintStream writer = new PrintStream(new FileOutputStream(tmpFile));
     List<Integer> expected = TestUtils.INTS;
     List<Integer> expectedSizes = new ArrayList<>();
-    for (Integer elem : expected) {
-      byte[] encodedElem = CoderUtils.encodeToByteArray(TextualIntegerCoder.of(), elem);
-      writer.print(elem);
-      writer.print("\n");
-      expectedSizes.add(1 + encodedElem.length);
+    try (PrintStream writer = new PrintStream(new FileOutputStream(tmpFile))) {
+      for (Integer elem : expected) {
+        byte[] encodedElem = CoderUtils.encodeToByteArray(TextualIntegerCoder.of(), elem);
+        writer.print(elem);
+        writer.print("\n");
+        expectedSizes.add(1 + encodedElem.length);
+      }
     }
-    writer.close();
 
     TextReader<Integer> textReader = new TextReader<>(tmpFile.getPath(), true, null, null,
         TextualIntegerCoder.of(), TextIO.CompressionType.UNCOMPRESSED);
@@ -588,12 +597,14 @@ private OutputStream getOutputStreamForCompressionType(
   private File createFileWithCompressionType(
       String[] lines, String filename, CompressionType compressionType) throws IOException {
     File tmpFile = tmpFolder.newFile(filename);
-    PrintStream writer = new PrintStream(
-        getOutputStreamForCompressionType(new FileOutputStream(tmpFile), compressionType));
-    for (String line : lines) {
-      writer.println(line);
+    try (PrintStream writer =
+            new PrintStream(
+                getOutputStreamForCompressionType(
+                    new FileOutputStream(tmpFile), compressionType))) {
+      for (String line : lines) {
+        writer.println(line);
+      }
     }
-    writer.close();
     return tmpFile;
   }
 
@@ -678,7 +689,7 @@ public void testErrorOnFileNotFound() throws Exception {
     TextReader<String> textReader = new TextReader<>(
         "file-not-found", true, 0L, 100L,
         StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
-    textReader.iterator();
+    textReader.iterator().close();
   }
 
   @Test
@@ -692,7 +703,7 @@ public void testErrorOnMultipleFiles() throws Exception {
         StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
     expectedException.expect(IllegalArgumentException.class);
     expectedException.expectMessage("more than 1 file matched");
-    textReader.iterator();
+    textReader.iterator().close();
   }
 
   // TODO: sharded filenames
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java
index 675fef452b056..721fb8ea5559f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java
@@ -87,7 +87,14 @@ public void testReadWithExistingFile() throws Exception {
 
   @Test(expected = FileNotFoundException.class)
   public void testReadNonExistentFile() throws Exception {
-    factory.open(temporaryFolder.getRoot().toPath().resolve("non-existent-file.txt").toString());
+    factory
+        .open(
+            temporaryFolder
+                .getRoot()
+                .toPath()
+                .resolve("non-existent-file.txt")
+                .toString())
+        .close();
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOChannelUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOChannelUtilsTest.java
index b357dcfaffa61..cb9e9785e7849 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOChannelUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOChannelUtilsTest.java
@@ -66,7 +66,7 @@ public void testShardNameCollision() throws Exception {
     File outFolder = tmpFolder.newFolder();
     String filename = outFolder.toPath().resolve("output").toString();
 
-    IOChannelUtils.create(filename, "", "", 2, "text");
+    IOChannelUtils.create(filename, "", "", 2, "text").close();
     fail("IOChannelUtils.create expected to fail due "
         + "to filename collision");
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
index 729520523dd6b..e9cbb4996d982 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
@@ -62,19 +62,19 @@ public void testLocalFileIO() throws Exception {
   @Test
   public void testMultiFileRead() throws Exception {
     File file1 = tmpFolder.newFile("file1");
-    FileOutputStream output = new FileOutputStream(file1);
-    output.write("1\n2".getBytes());
-    output.close();
+    try (FileOutputStream output = new FileOutputStream(file1)) {
+      output.write("1\n2".getBytes());
+    }
 
     File file2 = tmpFolder.newFile("file2");
-    output = new FileOutputStream(file2);
-    output.write("3\n4\n".getBytes());
-    output.close();
+    try (FileOutputStream output = new FileOutputStream(file2)) {
+      output.write("3\n4\n".getBytes());
+    }
 
     File file3 = tmpFolder.newFile("file3");
-    output = new FileOutputStream(file3);
-    output.write("5".getBytes());
-    output.close();
+    try (FileOutputStream output = new FileOutputStream(file3)) {
+      output.write("5".getBytes());
+    }
 
 
     TextReader<String> reader = new TextReader<>(

From 6195a679e2d64ef695ab84e1bd7d20c27a069cfd Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 18 Jun 2015 11:57:07 -0700
Subject: [PATCH 0653/1541] Fix broken @link

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96332142
---
 .../google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 4e006708098a5..48d91b1579924 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -237,7 +237,7 @@ private <K, InputT, AccumT, OutputT> PCollection<KV<K, OutputT>> applyTestCombin
 
   /**
    * The implementation may split the {@link KeyedCombineFn} into ADD, MERGE
-   * and EXTRACT phases (see {@link com.google.cloud.dataflow.sdk.runners.worker.CombineValuesFn}).
+   * and EXTRACT phases (see {@code com.google.cloud.dataflow.sdk.runners.worker.CombineValuesFn}).
    * In order to emulate
    * this for the {@link DirectPipelineRunner} and provide an experience
    * closer to the service, go through heavy seralizability checks for

From 1086f55560b4641e8637641b02a9bbfb23e2c8f4 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Thu, 18 Jun 2015 11:07:36 -0700
Subject: [PATCH 0654/1541] Specify javadoc plugin version for the examples
 module

Fixes a Maven warning produced during the build.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96326456
---
 examples/pom.xml | 2 --
 pom.xml          | 6 ++++++
 sdk/pom.xml      | 3 ---
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index c29f2fb6e26e3..58372915e2ea7 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -112,7 +112,6 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-javadoc-plugin</artifactId>
-
         <configuration>
           <windowtitle>Google Cloud Dataflow Examples</windowtitle>
           <doctitle>Google Cloud Dataflow Examples</doctitle>
@@ -149,7 +148,6 @@
             </offlineLink>
           </offlineLinks>
         </configuration>
-
         <executions>
           <execution>
             <goals>
diff --git a/pom.xml b/pom.xml
index c08ee7be19229..5f8ec01c66670 100644
--- a/pom.xml
+++ b/pom.xml
@@ -110,6 +110,12 @@
           <version>2.5</version>
         </plugin>
 
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-javadoc-plugin</artifactId>
+          <version>2.10.3</version>
+        </plugin>
+
         <plugin>
           <groupId>org.codehaus.mojo</groupId>
           <artifactId>versions-maven-plugin</artifactId>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 37221e14d0660..ebe1a2d87fe77 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -146,8 +146,6 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-javadoc-plugin</artifactId>
-        <version>2.10.3</version>
-
         <configuration>
           <windowtitle>Google Cloud Dataflow SDK ${project.version} API</windowtitle>
           <doctitle>Google Cloud Dataflow SDK ${project.version} API</doctitle>
@@ -189,7 +187,6 @@
             </offlineLink>
           </offlineLinks>
         </configuration>
-
         <executions>
           <execution>
             <goals>

From 3986ea5a4c48bb238de715404fc26e9f7ef4ece9 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 18 Jun 2015 14:09:22 -0700
Subject: [PATCH 0655/1541] Fix the instructions to override the API endpoint

1: Need to include both the root URL and the endpoint
2: Ensure the output URL is normalized.
3: Ensure the cancel command is printed in both Dataflow runners

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96344554
---
 .../options/DataflowPipelineDebugOptions.java | 47 ++++++++--------
 .../BlockingDataflowPipelineRunner.java       |  2 +-
 .../sdk/runners/DataflowPipelineRunner.java   | 12 +----
 .../dataflow/sdk/util/MonitoringUtil.java     | 18 ++++---
 .../dataflow/sdk/util/MonitoringUtilTest.java | 53 +++++++++++++++++--
 5 files changed, 84 insertions(+), 48 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
index c0170d274e54a..749c795165f4a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
@@ -38,27 +38,6 @@
 @Hidden
 public interface DataflowPipelineDebugOptions extends PipelineOptions {
 
-  /**
-   * The default endpoint  to use with the Dataflow API.
-   */
-  static final String DEFAULT_API_ROOT = "https://dataflow.googleapis.com/";
-
-  /**
-   * Dataflow endpoint to use.
-   *
-   * <p> Defaults to the current version of the Google Cloud Dataflow
-   * API, at the time the current SDK version was released.
-   *
-   * <p> If the string contains "://", then this is treated as a url,
-   * otherwise {@link #getApiRootUrl()} is used as the root
-   * url.
-   */
-  @Description("The URL for the Dataflow API. If the string contains \"://\""
-      + " will be treated as the entire URL, otherwise will be treated relative to apiRootUrl.")
-  @Default.String("v1b3/projects/")
-  String getDataflowEndpoint();
-  void setDataflowEndpoint(String value);
-
   /**
    * The list of backend experiments to enable.
    *
@@ -74,17 +53,33 @@ public interface DataflowPipelineDebugOptions extends PipelineOptions {
   void setExperiments(List<String> value);
 
   /**
-   * The endpoint to use with the Dataflow API. dataflowEndpoint can override this value
-   * if it contains an absolute URL, otherwise apiRootUrl will be combined with dataflowEndpoint
-   * to generate the full URL to communicate with the Dataflow API.
+   * The root URL for the Dataflow API. {@code dataflowEndpoint} can override this value
+   * if it contains an absolute URL, otherwise {@code apiRootUrl} will be combined with
+   * {@code dataflowEndpoint} to generate the full URL to communicate with the Dataflow API.
    */
-  @Description("The endpoint to use with the Dataflow API. dataflowEndpoint can override this "
+  @Description("The root URL for the Dataflow API. dataflowEndpoint can override this "
       + "value if it contains an absolute URL, otherwise apiRootUrl will be combined with "
       + "dataflowEndpoint to generate the full URL to communicate with the Dataflow API.")
-  @Default.String(DEFAULT_API_ROOT)
+  @Default.String(Dataflow.DEFAULT_ROOT_URL)
   String getApiRootUrl();
   void setApiRootUrl(String value);
 
+  /**
+   * Dataflow endpoint to use.
+   *
+   * <p> Defaults to the current version of the Google Cloud Dataflow
+   * API, at the time the current SDK version was released.
+   *
+   * <p> If the string contains "://", then this is treated as a url,
+   * otherwise {@link #getApiRootUrl()} is used as the root
+   * url.
+   */
+  @Description("The URL for the Dataflow API. If the string contains \"://\""
+      + " will be treated as the entire URL, otherwise will be treated relative to apiRootUrl.")
+  @Default.String(Dataflow.DEFAULT_SERVICE_PATH)
+  String getDataflowEndpoint();
+  void setDataflowEndpoint(String value);
+
   /**
    * The path to write the translated Dataflow job specification out to
    * at job submission time. The Dataflow job specification will be represented in JSON
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
index 6fe0926343ad0..8fcf0400ac876 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
@@ -95,7 +95,7 @@ public DataflowPipelineJob run(Pipeline p) {
       public void run() {
         LOG.warn("Job is already running in Google Cloud Platform, Ctrl-C will not cancel it.\n"
             + "To cancel the job in the cloud, run:\n> {}",
-            MonitoringUtil.getGcloudCancelCommand(job.getProjectId(), job.getJobId()));
+            MonitoringUtil.getGcloudCancelCommand(options, job.getJobId()));
       }
     };
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index d9cf27a984e6e..af853cff6b569 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -25,7 +25,6 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult.State;
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
@@ -289,15 +288,8 @@ public DataflowPipelineJob run(Pipeline pipeline) {
         MonitoringUtil.getJobMonitoringPageURL(options.getProject(), jobResult.getId()));
     System.out.println("Submitted job: " + jobResult.getId());
 
-    boolean usingCustomApiRootUrl =
-        !DataflowPipelineDebugOptions.DEFAULT_API_ROOT.equals(dataflowOptions.getApiRootUrl());
-    final String setApiEndpointCommand =
-        (usingCustomApiRootUrl
-         ? MonitoringUtil.getEndpointOverridePrefixCommand(dataflowOptions.getApiRootUrl())
-         : "");
-    LOG.info("To cancel the job using the 'gcloud' tool, run:\n> {}{}",
-        setApiEndpointCommand,
-        MonitoringUtil.getGcloudCancelCommand(options.getProject(), jobResult.getId()));
+    LOG.info("To cancel the job using the 'gcloud' tool, run:\n> {}",
+        MonitoringUtil.getGcloudCancelCommand(options, jobResult.getId()));
 
     // Obtain all of the extractors from the PTransforms used in the pipeline so the
     // DataflowPipelineJob has access to them.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
index caccc1c60057c..3aa70ade08bec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
@@ -22,6 +22,7 @@
 import com.google.api.services.dataflow.model.JobMessage;
 import com.google.api.services.dataflow.model.ListJobMessagesResponse;
 import com.google.cloud.dataflow.sdk.PipelineResult.State;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.common.base.MoreObjects;
 import com.google.common.collect.ImmutableMap;
 
@@ -210,13 +211,18 @@ public static String getJobMonitoringPageURL(String projectName, String jobId) {
     }
   }
 
-  public static String getEndpointOverridePrefixCommand(String endpoint) {
-    return String.format("%s=%s ", ENDPOINT_OVERRIDE_ENV_VAR, endpoint);
-  }
+  public static String getGcloudCancelCommand(DataflowPipelineOptions options, String jobId) {
+
+    // If using a different Dataflow API than default, prefix command with an API override.
+    String dataflowApiOverridePrefix = "";
+    String apiUrl = options.getDataflowClient().getBaseUrl();
+    if (!apiUrl.equals(Dataflow.DEFAULT_BASE_URL)) {
+      dataflowApiOverridePrefix = String.format("%s=%s ", ENDPOINT_OVERRIDE_ENV_VAR, apiUrl);
+    }
 
-  public static String getGcloudCancelCommand(String projectName, String jobId) {
-    return String.format("%s jobs --project=%s cancel %s",
-        GCLOUD_DATAFLOW_PREFIX, projectName, jobId);
+    // Assemble cancel command from optional prefix and project/job parameters.
+    return String.format("%s%s jobs --project=%s cancel %s",
+        dataflowApiOverridePrefix, GCLOUD_DATAFLOW_PREFIX, options.getProject(), jobId);
   }
 
   public static State toState(String stateName) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java
index d77b82bca7001..b5fa772e7608c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java
@@ -24,9 +24,13 @@
 import com.google.api.services.dataflow.model.JobMessage;
 import com.google.api.services.dataflow.model.ListJobMessagesResponse;
 import com.google.cloud.dataflow.sdk.PipelineResult.State;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 
 import org.joda.time.Instant;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -42,10 +46,11 @@ public class MonitoringUtilTest {
   private static final String PROJECT_ID = "someProject";
   private static final String JOB_ID = "1234";
 
+  @Rule public ExpectedException thrown = ExpectedException.none();
+
   @Test
   public void testGetJobMessages() throws IOException {
-    Dataflow.Projects.Jobs.Messages mockMessages =
-        mock(Dataflow.Projects.Jobs.Messages.class);
+    Dataflow.Projects.Jobs.Messages mockMessages = mock(Dataflow.Projects.Jobs.Messages.class);
 
     // Two requests are needed to get all the messages.
     Dataflow.Projects.Jobs.Messages.List firstRequest =
@@ -53,9 +58,7 @@ public void testGetJobMessages() throws IOException {
     Dataflow.Projects.Jobs.Messages.List secondRequest =
         mock(Dataflow.Projects.Jobs.Messages.List.class);
 
-    when(mockMessages.list(PROJECT_ID, JOB_ID))
-        .thenReturn(firstRequest)
-        .thenReturn(secondRequest);
+    when(mockMessages.list(PROJECT_ID, JOB_ID)).thenReturn(firstRequest).thenReturn(secondRequest);
 
     ListJobMessagesResponse firstResponse = new ListJobMessagesResponse();
     firstResponse.setJobMessages(new ArrayList<JobMessage>());
@@ -115,4 +118,44 @@ public void testToStateWithOtherValueReturnsUnknown() {
 
     assertEquals(State.UNKNOWN, result);
   }
+
+  @Test
+  public void testDontOverrideEndpointWithDefaultApi() {
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    options.setProject(PROJECT_ID);
+    options.setGcpCredential(new TestCredential());
+    String cancelCommand = MonitoringUtil.getGcloudCancelCommand(options, JOB_ID);
+    assertEquals("gcloud alpha dataflow jobs --project=someProject cancel 1234", cancelCommand);
+  }
+
+  @Test
+  public void testOverridesEndpointWithStagedApi() {
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    options.setProject(PROJECT_ID);
+    options.setGcpCredential(new TestCredential());
+    String stagingApiRoot = "https://staging.com/";
+    options.setApiRootUrl(stagingApiRoot);
+    String cancelCommand = MonitoringUtil.getGcloudCancelCommand(options, JOB_ID);
+    assertEquals(
+        "CLOUDSDK_API_ENDPOINT_OVERRIDES_DATAFLOW=https://staging.com/v1b3/projects/ "
+        + "gcloud alpha dataflow jobs --project=someProject cancel 1234",
+        cancelCommand);
+  }
+
+  @Test
+  public void testOverridesEndpointWithStagedDataflowEndpoint() {
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    options.setProject(PROJECT_ID);
+    options.setGcpCredential(new TestCredential());
+    String stagingDataflowEndpoint = "v0neverExisted";
+    options.setDataflowEndpoint(stagingDataflowEndpoint);
+    String cancelCommand = MonitoringUtil.getGcloudCancelCommand(options, JOB_ID);
+    assertEquals(
+        "CLOUDSDK_API_ENDPOINT_OVERRIDES_DATAFLOW=https://dataflow.googleapis.com/v0neverExisted/ "
+        + "gcloud alpha dataflow jobs --project=someProject cancel 1234",
+        cancelCommand);
+  }
 }

From 5febb333d8a3613d60b841f81b31ef80fa0de972 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 18 Jun 2015 14:10:03 -0700
Subject: [PATCH 0656/1541] Enable Proto2Coder and SerializerTest

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96344628
---
 .../com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java   | 2 --
 .../java/com/google/cloud/dataflow/sdk/util/SerializerTest.java | 2 --
 2 files changed, 4 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
index 0fdd6752d1742..39876cdaff08c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
@@ -22,7 +22,6 @@
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 import com.google.common.collect.ImmutableList;
 
-import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -30,7 +29,6 @@
 /**
  * Tests for Proto2Coder.
  */
-@Ignore("Enable when we fix interal build process")
 @RunWith(JUnit4.class)
 public class Proto2CoderTest {
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializerTest.java
index 94fea9df1666b..54624ff2a9e37 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializerTest.java
@@ -27,7 +27,6 @@
 
 import org.hamcrest.Matchers;
 import org.junit.Assert;
-import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -36,7 +35,6 @@
  * Tests Serializer implementation.
  */
 @RunWith(JUnit4.class)
-@Ignore
 public class SerializerTest {
   /**
    * A POJO to use for testing serialization.

From 81e162e94357cbd2a8c0ac248ef7028d7c6d4eb5 Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Thu, 18 Jun 2015 17:45:26 -0700
Subject: [PATCH 0657/1541] Introduces a custom source implementation of Avro

AvroSource supports initial and dynamic splitting of Avro-format files.

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96366568
---
 sdk/pom.xml                                   |  15 +
 .../cloud/dataflow/sdk/io/AvroSource.java     | 648 ++++++++++++++++++
 .../dataflow/sdk/io/BlockBasedSource.java     | 220 ++++++
 .../dataflow/sdk/io/FileBasedSource.java      |   5 +-
 .../cloud/dataflow/sdk/io/AvroSourceTest.java | 562 +++++++++++++++
 .../dataflow/sdk/io/FileBasedSourceTest.java  |  28 +
 6 files changed, 1477 insertions(+), 1 deletion(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java

diff --git a/sdk/pom.xml b/sdk/pom.xml
index ebe1a2d87fe77..37d1cd7586eb3 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -529,6 +529,10 @@
       <version>9.2.10.v20150310</version>
     </dependency>
 
+    <!--
+    To use com.google.cloud.dataflow.io.XmlSource, please explicitly declare
+    the following two dependencies.
+    -->
     <dependency>
       <groupId>org.codehaus.woodstox</groupId>
       <artifactId>stax2-api</artifactId>
@@ -543,6 +547,17 @@
       <optional>true</optional>
     </dependency>
 
+    <!--
+    To use com.google.cloud.dataflow.io.AvroSource with XZ-encoded files,
+    please explicitly declare this dependency.
+    -->
+    <dependency>
+      <groupId>org.tukaani</groupId>
+      <artifactId>xz</artifactId>
+      <version>1.5</version>
+      <optional>true</optional>
+    </dependency>
+
     <!-- build dependencies -->
     <dependency>
       <groupId>com.google.auto.service</groupId>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
new file mode 100644
index 0000000000000..770627fa5480c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
@@ -0,0 +1,648 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileConstants;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.BinaryDecoder;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.io.DecoderFactory;
+import org.apache.avro.reflect.ReflectData;
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
+import org.apache.commons.compress.compressors.snappy.SnappyCompressorInputStream;
+import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PushbackInputStream;
+import java.nio.ByteBuffer;
+import java.nio.channels.Channels;
+import java.nio.channels.ReadableByteChannel;
+import java.util.Arrays;
+import java.util.zip.Inflater;
+import java.util.zip.InflaterInputStream;
+
+// JAVADOCSTYLE OFF
+/**
+ * A {@code FileBasedSource} for reading Avro-format files.
+ *
+ * <p>To read a {@link PCollection} of objects from one or more Avro files, use
+ * {@link AvroSource#from} to specify the path(s) of the files to read. The {@code AvroSource} that
+ * is returned will read objects of type {@code GenericRecord} with the schema(s) that were written
+ * at file creation. To further configure the {@code AvroSource} to read with a user-defined schema,
+ * or to return records of a type other than {@code GenericRecord}, use
+ * {@link AvroSource#withSchema(Schema)} (using a {@code Schema} object),
+ * {@link AvroSource#withSchema(String)} (using a JSON schema), or
+ * {@link AvroSource#withSchema(Class)} (to return objects of the class specified).
+ *
+ * <p>An {@code AvroSource} can be read from using the {@link Read} transform. For example:
+ *
+ * <pre>
+ * {@code
+ * AvroSource<T> source = AvroSource.from(file.toPath()).withSchema(MyType.class);
+ * PCollection<MyType> records = Read.from(mySource);
+ * }
+ * </pre>
+ *
+ * <p>The {@link AvroSource#readFromFileWithClass(String, Class)} method is a convenience method
+ * that returns a read transform. For example:
+ *
+ * <pre>
+ * {@code
+ * PCollection<MyType> records = AvroSource.readFromFileWithClass(file.toPath(), MyType.class));
+ * }
+ * </pre>
+ *
+ * <p>This class's implementation is based on the <a
+ * href="https://avro.apache.org/docs/1.7.7/spec.html">Avro 1.7.7</a> specification and implements
+ * parsing of some parts of Avro Object Container Files. The rationale for doing so is that the Avro
+ * API does not provide efficient ways of computing the precise offsets of blocks within a file,
+ * which is necessary to support dynamic work rebalancing. However, whenever it is possible to use
+ * the Avro API in a way that supports maintaining precise offsets, this class uses the Avro API.
+ *
+ * <p>Avro Object Container files store records in blocks. Each block contains a collection of
+ * records. Blocks may be encoded (e.g., with bzip2, deflate, snappy, etc.). Blocks are delineated
+ * from one another by a 16-byte sync marker.
+ *
+ * <p>An {@code AvroSource} for a subrange of a single file contains records in the blocks such that
+ * the start offset of the block is greater than or equal to the start offset of the source and less
+ * than the end offset of the source.
+ *
+ * <p>To use XZ-encoded Avro files, please include an explicit dependency on {@code xz-1.5.jar},
+ * which has been marked as optional in the Maven {@code sdk/pom.xml} for Google Cloud Dataflow:
+ * <pre>
+ * {@code
+ * <dependency>
+ *   <groupId>org.tukaani</groupId>
+ *   <artifactId>xz</artifactId>
+ *   <version>1.5</version>
+ * </dependency>
+ * }
+ * </pre>
+ *
+ * @param <T> The type of records to be read from the source.
+ */
+// JAVADOCSTYLE ON
+@Experimental(Experimental.Kind.SOURCE_SINK)
+public class AvroSource<T> extends BlockBasedSource<T> {
+  private static final long serialVersionUID = 0;
+
+  // Default minimum bundle size (chosen as two default-size Avro blocks to attempt to
+  // ensure that every source has at least one block of records).
+  // The default sync interval is 64k.
+  static final long DEFAULT_MIN_BUNDLE_SIZE = 2 * DataFileConstants.DEFAULT_SYNC_INTERVAL;
+
+  // The JSON schema used to encode records.
+  private final String schema;
+
+  // The type of the records contained in the file.
+  private final Class<T> type;
+
+  // The following metadata fields are not user-configurable. They are extracted from the object
+  // container file header upon subsource creation.
+
+  // The codec used to encode the blocks in the Avro file. String value drawn from those in
+  // https://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/file/CodecFactory.html
+  private final String codec;
+
+  // The object container file's 16-byte sync marker.
+  private final byte[] syncMarker;
+
+  // Default output coder, lazily initialized.
+  private transient AvroCoder<T> coder = null;
+
+  /**
+   * Creates a {@code Read} transform that will read from an {@code AvroSource} that is configured
+   * to read records of the given type from a file pattern.
+   */
+  public static <T> Read.Bound<T> readFromFileWithClass(String filePattern, Class<T> clazz) {
+    return Read.from(new AvroSource<T>(filePattern, DEFAULT_MIN_BUNDLE_SIZE,
+        ReflectData.get().getSchema(clazz).toString(), clazz, null, null));
+  }
+
+  /**
+   * Creates an {@code AvroSource} that reads from the given file name or pattern ("glob"). The
+   * returned source can be further configured by calling {@code withSchema} to return a type other
+   * than {@code GenericRecord}.
+   */
+  public static AvroSource<GenericRecord> from(String fileNameOrPattern) {
+    return new AvroSource<GenericRecord>(
+        fileNameOrPattern, DEFAULT_MIN_BUNDLE_SIZE, null, GenericRecord.class, null, null);
+  }
+
+  /**
+   * Returns an {@code AvroSource} that's like this one but reads files containing records that
+   * conform to the given schema.
+   */
+  public AvroSource<GenericRecord> withSchema(String schema) {
+    return new AvroSource<>(
+        getFileOrPatternSpec(), getMinBundleSize(), schema, GenericRecord.class, codec, syncMarker);
+  }
+
+  /**
+   * Returns an {@code AvroSource} that's like this one but reads files containing records that
+   * conform to the given schema.
+   */
+  public AvroSource<GenericRecord> withSchema(Schema schema) {
+    return new AvroSource<>(getFileOrPatternSpec(), getMinBundleSize(), schema.toString(),
+        GenericRecord.class, codec, syncMarker);
+  }
+
+  /**
+   * Returns an {@code AvroSource} that's like this one but reads files containing records of the
+   * type of the given class.
+   */
+  public <X> AvroSource<X> withSchema(Class<X> clazz) {
+    return new AvroSource<X>(getFileOrPatternSpec(), getMinBundleSize(),
+        ReflectData.get().getSchema(clazz).toString(), clazz, codec, syncMarker);
+  }
+
+  /**
+   * Returns an {@code AvroSource} that's like this one but uses the supplied minimum bundle size.
+   * Refer to {@link ByteOffsetBasedSource} for a description of {@code minBundleSize} and its use.
+   */
+  public AvroSource<T> withMinBundleSize(long minBundleSize) {
+    return new AvroSource<T>(
+        getFileOrPatternSpec(), minBundleSize, schema, type, codec, syncMarker);
+  }
+
+  private AvroSource(String fileNameOrPattern, long minBundleSize, String schema, Class<T> type,
+      String codec, byte[] syncMarker) {
+    super(fileNameOrPattern, minBundleSize);
+    this.schema = schema;
+    this.codec = codec;
+    this.syncMarker = syncMarker;
+    this.type = type;
+  }
+
+  private AvroSource(String fileName, long minBundleSize, long startOffset, long endOffset,
+      String schema, Class<T> type, String codec, byte[] syncMarker) {
+    super(fileName, minBundleSize, startOffset, endOffset);
+    this.schema = schema;
+    this.codec = codec;
+    this.syncMarker = syncMarker;
+    this.type = type;
+  }
+
+  @Override
+  public void validate() {
+    // AvroSource objects do not need to be configured with more than a file pattern. Overridden to
+    // make this explicit.
+    super.validate();
+  }
+
+  /**
+   * Avro file metadata. Visible for testing.
+   */
+  static class Metadata {
+    byte[] syncMarker;
+    String codec;
+    String schema;
+
+    public Metadata(byte[] syncMarker, String codec, String schema) {
+      this.syncMarker = syncMarker;
+      this.codec = codec;
+      this.schema = schema;
+    }
+  }
+
+  /**
+   * Reads the {@code Metadata} from the header of an Avro file. Throws an IOException if the file
+   * is an invalid format.
+   *
+   * <p>This method parses the header of an Avro
+   * <a href="https://avro.apache.org/docs/1.7.7/spec.html#Object+Container+Files">
+   * Object Container File</a>.
+   */
+  static Metadata readMetadataFromFile(String fileName) throws IOException {
+    String codec = null;
+    String schema = null;
+    byte[] syncMarker;
+    try (InputStream stream =
+        Channels.newInputStream(IOChannelUtils.getFactory(fileName).open(fileName))) {
+      BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(stream, null);
+
+      // The header of an object container file begins with a four-byte magic number, followed
+      // by the file metadata (including the schema and codec), encoded as a map. Finally, the
+      // header ends with the file's 16-byte sync marker.
+      // See https://avro.apache.org/docs/1.7.7/spec.html#Object+Container+Files for details on
+      // the encoding of container files.
+
+      // Read the magic number.
+      byte[] magic = new byte[DataFileConstants.MAGIC.length];
+      decoder.readFixed(magic);
+      if (!Arrays.equals(magic, DataFileConstants.MAGIC)) {
+        throw new IOException("Missing Avro file signature: " + fileName);
+      }
+
+      // Read the metadata to find the codec and schema.
+      ByteBuffer valueBuffer = ByteBuffer.allocate(512);
+      long numRecords = decoder.readMapStart();
+      while (numRecords > 0) {
+        for (long recordIndex = 0; recordIndex < numRecords; recordIndex++) {
+          String key = decoder.readString();
+          // readBytes() clears the buffer and returns a buffer where:
+          // - position is the start of the bytes read
+          // - limit is the end of the bytes read
+          valueBuffer = decoder.readBytes(valueBuffer);
+          byte[] bytes = new byte[valueBuffer.remaining()];
+          valueBuffer.get(bytes);
+          if (key.equals(DataFileConstants.CODEC)) {
+            codec = new String(bytes, "UTF-8");
+          } else if (key.equals(DataFileConstants.SCHEMA)) {
+            schema = new String(bytes, "UTF-8");
+          }
+        }
+        numRecords = decoder.mapNext();
+      }
+      if (codec == null) {
+        codec = DataFileConstants.NULL_CODEC;
+      }
+
+      // Finally, read the sync marker.
+      syncMarker = new byte[DataFileConstants.SYNC_SIZE];
+      decoder.readFixed(syncMarker);
+    }
+    return new Metadata(syncMarker, codec, schema);
+  }
+
+  @Override
+  public AvroSource<T> createForSubrangeOfFile(String fileName, long start, long end) {
+    byte[] syncMarker = this.syncMarker;
+    String codec = this.codec;
+    String schema = this.schema;
+    // codec and syncMarker are initially null when the source is created, as they differ
+    // across input files and must be read from the file. Here, when we are creating a source
+    // for a subrange of a file, we can initialize these values. When the resulting AvroSource
+    // is further split, they do not need to be read again.
+    if (codec == null || syncMarker == null) {
+      Metadata metadata;
+      try {
+        metadata = readMetadataFromFile(fileName);
+      } catch (IOException e) {
+        throw new RuntimeException("Error reading metadata from file " + fileName, e);
+      }
+      codec = metadata.codec;
+      syncMarker = metadata.syncMarker;
+      // If the source was created with a null schema, use the schema that we read from the file's
+      // metadata.
+      if (schema == null) {
+        schema = metadata.schema;
+      }
+    }
+    return new AvroSource<T>(
+        fileName, getMinBundleSize(), start, end, schema, type, codec, syncMarker);
+  }
+
+  @Override
+  public AvroReader<T> createSingleFileReader(PipelineOptions options, ExecutionContext context) {
+    return new AvroReader<T>(this);
+  }
+
+  @Override
+  public boolean producesSortedKeys(PipelineOptions options) throws Exception {
+    return false;
+  }
+
+  @Override
+  public AvroCoder<T> getDefaultOutputCoder() {
+    if (coder == null) {
+      Schema.Parser parser = new Schema.Parser();
+      coder = AvroCoder.of(type, parser.parse(schema));
+    }
+    return coder;
+  }
+
+  public String getSchema() {
+    return schema;
+  }
+
+  private byte[] getSyncMarker() {
+    return syncMarker;
+  }
+
+  private String getCodec() {
+    return codec;
+  }
+
+  /**
+   * A {@link BlockBasedSource.Block} of Avro records. Visible for testing.
+   * @param <T> The type of records stored in the block.
+   */
+  @Experimental(Experimental.Kind.SOURCE_SINK)
+  static class AvroBlock<T> extends Block<T> {
+    // The number of records in the block.
+    private final long numRecords;
+
+    // The current record in the block.
+    private T currentRecord;
+
+    // The index of the current record in the block.
+    private long currentRecordIndex = 0;
+
+    // A DatumReader to read records from the block.
+    private final DatumReader<T> reader;
+
+    // A BinaryDecoder used by the reader to decode records.
+    private final BinaryDecoder decoder;
+
+    /**
+     * Decodes a byte array as an InputStream. The byte array may be compressed using some
+     * codec. Reads from the returned stream will result in decompressed bytes.
+     *
+     * <p>This supports the same codecs as Avro's {@code CodecFactory}, namely those defined in
+     * <a
+     * href="https://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/file/DataFileConstants.html">
+     * {@code DataFileConstants}</a>.
+     * <ul>
+     * <li>"snappy" : Google's Snappy compression
+     * <li>"deflate" : deflate compression
+     * <li>"bzip2" : Bzip2 compression
+     * <li>"xz" : xz compression
+     * <li>"null" (the string, not the value): Uncompressed data
+     * </ul>
+     */
+    private static InputStream decodeAsInputStream(byte[] data, String codec) throws IOException {
+      ByteArrayInputStream byteStream = new ByteArrayInputStream(data);
+      switch (codec) {
+        case DataFileConstants.SNAPPY_CODEC:
+          return new SnappyCompressorInputStream(byteStream);
+        case DataFileConstants.DEFLATE_CODEC:
+          // nowrap == true: Do not expect ZLIB header or checksum, as Avro does not write them.
+          Inflater inflater = new Inflater(true);
+          return new InflaterInputStream(byteStream, inflater);
+        case DataFileConstants.XZ_CODEC:
+          return new XZCompressorInputStream(byteStream);
+        case DataFileConstants.BZIP2_CODEC:
+          return new BZip2CompressorInputStream(byteStream);
+        case DataFileConstants.NULL_CODEC:
+          return byteStream;
+        default:
+          throw new IllegalArgumentException("Unsupported codec: " + codec);
+      }
+    }
+
+    AvroBlock(byte[] data, long numRecords, AvroSource<T> source) throws IOException {
+      this.numRecords = numRecords;
+      this.reader = source.getDefaultOutputCoder().createDatumReader();
+      this.decoder =
+          DecoderFactory.get().binaryDecoder(decodeAsInputStream(data, source.getCodec()), null);
+    }
+
+    @Override
+    public T getCurrentRecord() {
+      return currentRecord;
+    }
+
+    @Override
+    public boolean readNextRecord() throws IOException {
+      if (currentRecordIndex >= numRecords) {
+        return false;
+      }
+      currentRecord = reader.read(null, decoder);
+      currentRecordIndex++;
+      return true;
+    }
+
+    @Override
+    public double getFractionOfBlockConsumed() {
+      return ((double) currentRecordIndex) / numRecords;
+    }
+  }
+
+  /**
+   * A {@link BlockBasedSource.BlockBasedReader} for reading blocks from Avro files.
+   *
+   * <p>An Avro Object Container File consists of a header followed by a 16-bit sync marker
+   * and then a sequence of blocks, where each block begins with two encoded longs representing
+   * the total number of records in the block and the block's size in bytes, followed by the
+   * block's (optionally-encoded) records. Each block is terminated by a 16-bit sync marker.
+   *
+   * <p>Here, we consider the sync marker that precedes a block to be its offset, as this allows
+   * a reader that begins reading at that offset to detect the sync marker and the beginning of
+   * the block.
+   *
+   * @param <T> The type of records contained in the block.
+   */
+  @Experimental(Experimental.Kind.SOURCE_SINK)
+  public static class AvroReader<T> extends BlockBasedReader<T> {
+    // The current block.
+    private AvroBlock<T> currentBlock;
+
+    // Offset of the block.
+    private long currentBlockOffset = 0;
+
+    // Size of the current block.
+    private long currentBlockSizeBytes = 0;
+
+    // Current offset within the stream.
+    private long currentOffset = 0;
+
+    // Stream used to read from the underlying file.
+    // A pushback stream is used to restore bytes buffered during seeking/decoding.
+    private PushbackInputStream stream;
+
+    // Small buffer for reading encoded values from the stream.
+    // The maximum size of an encoded long is 10 bytes, and this buffer will be used to read two.
+    private final byte[] readBuffer = new byte[20];
+
+    // Decoder to decode binary-encoded values from the buffer.
+    private BinaryDecoder decoder;
+
+    public AvroReader(AvroSource<T> source) {
+      super(source);
+    }
+
+    @Override
+    public AvroSource<T> getCurrentSource() {
+      return (AvroSource<T>) super.getCurrentSource();
+    }
+
+    @Override
+    public boolean readNextBlock() throws IOException {
+      // The next block in the file is after the first sync marker that can be read starting from
+      // the current offset. First, we seek past the next sync marker, if it exists. After a sync
+      // marker is the start of a block. A block begins with the number of records contained in
+      // the block, encoded as a long, followed by the size of the block in bytes, encoded as a
+      // long. The currentOffset after this method should be last byte after this block, and the
+      // currentBlockOffset should be the start of the sync marker before this block.
+
+      // Seek to the next sync marker, if one exists.
+      currentOffset += advancePastNextSyncMarker(stream, getCurrentSource().getSyncMarker());
+
+      // The offset of the current block includes its preceding sync marker.
+      currentBlockOffset = currentOffset - getCurrentSource().getSyncMarker().length;
+
+      // Read a small buffer to parse the block header.
+      // We cannot use a BinaryDecoder to do this directly from the stream because a BinaryDecoder
+      // internally buffers data and we only want to read as many bytes from the stream as the size
+      // of the header. Though BinaryDecoder#InputStream returns an input stream that is aware of
+      // its internal buffering, we would have to re-wrap this input stream to seek for the next
+      // block in the file.
+      int read = stream.read(readBuffer);
+      // We reached the last sync marker in the file.
+      if (read <= 0) {
+        return false;
+      }
+      decoder = DecoderFactory.get().binaryDecoder(readBuffer, decoder);
+      long numRecords = decoder.readLong();
+      long blockSize = decoder.readLong();
+
+      // The decoder buffers data internally, but since we know the size of the stream the
+      // decoder has constructed from the readBuffer, the number of bytes available in the
+      // input stream is equal to the number of unconsumed bytes.
+      int headerSize = readBuffer.length - decoder.inputStream().available();
+      stream.unread(readBuffer, headerSize, read - headerSize);
+
+      // Create the current block by reading blockSize bytes. Block sizes permitted by the Avro
+      // specification are [32, 2^30], so this narrowing is ok.
+      byte[] data = new byte[(int) blockSize];
+      stream.read(data);
+      currentBlock = new AvroBlock<>(data, numRecords, getCurrentSource());
+      currentBlockSizeBytes = blockSize;
+
+      // Update current offset with the number of bytes we read to get the next block.
+      currentOffset += headerSize + blockSize;
+      return true;
+    }
+
+    @Override
+    public AvroBlock<T> getCurrentBlock() {
+      return currentBlock;
+    }
+
+    @Override
+    public long getCurrentBlockOffset() {
+      return currentBlockOffset;
+    }
+
+    @Override
+    public long getCurrentBlockSize() {
+      return currentBlockSizeBytes;
+    }
+
+    /**
+     * Creates a {@code PushbackInputStream} that has a large enough pushback buffer to be able
+     * to push back the syncBuffer and the readBuffer.
+     */
+    private PushbackInputStream createStream(ReadableByteChannel channel) {
+      return new PushbackInputStream(
+          Channels.newInputStream(channel),
+          getCurrentSource().getSyncMarker().length + readBuffer.length);
+    }
+
+    /**
+     * Starts reading from the provided channel. Assumes that the channel is already seeked to
+     * the source's start offset.
+     */
+    @Override
+    protected void startReading(ReadableByteChannel channel) throws IOException {
+      stream = createStream(channel);
+      currentOffset = getCurrentSource().getStartOffset();
+    }
+
+    /**
+     * Advances to the first byte after the next occurrence of the sync marker in the
+     * stream when reading from the current offset. Returns the number of bytes consumed
+     * from the stream. Note that this method requires a PushbackInputStream with a buffer
+     * at least as big as the marker it is seeking for.
+     */
+    static long advancePastNextSyncMarker(PushbackInputStream stream, byte[] syncMarker)
+        throws IOException {
+      Seeker seeker = new Seeker(syncMarker);
+      byte[] syncBuffer = new byte[syncMarker.length];
+      long totalBytesConsumed = 0;
+      // Seek until either a sync marker is found or we reach the end of the file.
+      int mark = -1; // Position of the last byte in the sync marker.
+      int read; // Number of bytes read.
+      do {
+        read = stream.read(syncBuffer);
+        if (read >= 0) {
+          mark = seeker.find(syncBuffer, read);
+          // Update the currentOffset with the number of bytes read.
+          totalBytesConsumed += read;
+        }
+      } while (mark < 0 && read > 0);
+
+      // If the sync marker was found, unread block data and update the current offsets.
+      if (mark >= 0) {
+        // The current offset after this call should be just past the sync marker, so we should
+        // unread the remaining buffer contents and update the currentOffset accordingly.
+        stream.unread(syncBuffer, mark + 1, read - (mark + 1));
+        totalBytesConsumed = totalBytesConsumed - (read - (mark + 1));
+      }
+      return totalBytesConsumed;
+    }
+
+    /**
+     * A {@code Seeker} looks for a given marker within a byte buffer. Uses naive string matching
+     * with a sliding window, as sync markers are small and random.
+     */
+    static class Seeker {
+      // The marker to search for.
+      private byte[] marker;
+
+      // Buffer used for the sliding window.
+      private byte[] searchBuffer;
+
+      // Number of bytes available to be matched in the buffer.
+      private int available = 0;
+
+      /**
+       * Create a {@code Seeker} that looks for the given marker.
+       */
+      public Seeker(byte[] marker) {
+        this.marker = marker;
+        this.searchBuffer = new byte[marker.length];
+      }
+
+      /**
+       * Find the marker in the byte buffer. Returns the index of the end of the marker in the
+       * buffer. If the marker is not found, returns -1.
+       *
+       * <p>State is maintained between calls. If the marker was partially matched, a subsequent
+       * call to find will resume matching the marker.
+       *
+       * @param buffer
+       * @return the index of the end of the marker within the buffer, or -1 if the buffer was not
+       * found.
+       */
+      public int find(byte[] buffer, int length) {
+        for (int i = 0; i < length; i++) {
+          System.arraycopy(searchBuffer, 1, searchBuffer, 0, searchBuffer.length - 1);
+          searchBuffer[searchBuffer.length - 1] = buffer[i];
+          available = Math.min(available + 1, searchBuffer.length);
+          if (ByteBuffer.wrap(searchBuffer, searchBuffer.length - available, available)
+                  .equals(ByteBuffer.wrap(marker))) {
+            available = 0;
+            return i;
+          }
+        }
+        return -1;
+      }
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
new file mode 100644
index 0000000000000..8cca06089a584
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
@@ -0,0 +1,220 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+
+import java.io.IOException;
+import java.util.NoSuchElementException;
+
+/**
+ * A {@code BlockBasedSource} is a {@link FileBasedSource} where a file consists of blocks of
+ * records.
+ *
+ * <p>{@code BlockBasedSource} should be derived from when a file format does not support efficient
+ * seeking to a record in the file, but can support efficient seeking to a block. Alternatively,
+ * records in the file cannot be offset-addressed, but blocks can (i.e., it is not possible to say
+ * that record i starts at offset m, but it is possible to say that block j starts at offset n).
+ *
+ * <p>The records that will be read from a {@code BlockBasedSource} that corresponds to a subrange
+ * of a file [startOffset, endOffset) are those records such that the record is contained in a
+ * block that starts at offset {@code i}, where {@code i >= startOffset} and {@code i < endOffset}.
+ * In other words, a record will be read from the source if it is contained in a block that begins
+ * within the range described by the source.
+ *
+ * <p>This entails that it is possible to determine the start offsets of all blocks in a file.
+ *
+ * <p>Progress reporting for reading from a {@code BlockBasedSource} is inaccurate. A {@link
+ * BlockBasedReader} reports its current offset as {@code (offset of current block) + (current block
+ * size) * (fraction of block consumed)}. However, only the offset of the current block is required
+ * to be accurately reported by subclass implementations. As such, in the worst case, the current
+ * offset is only updated at block boundaries.
+ *
+ * <p>{@code BlockBasedSource} supports dynamic splitting. However, because records in a {@code
+ * BlockBasedSource} are not required to have offsets and progress reporting is inaccurate, {@code
+ * BlockBasedReader} only supports splitting at block boundaries.
+ * In other words, {@link BlockBasedReader#atSplitPoint} returns true iff the current record is the
+ * first record in a block. See {@link FileBasedSource.FileBasedReader} for discussion about split
+ * points.
+ *
+ * @param <T> The type of records to be read from the source.
+ */
+@Experimental(Experimental.Kind.SOURCE_SINK)
+public abstract class BlockBasedSource<T> extends FileBasedSource<T> {
+  private static final long serialVersionUID = 0;
+
+  /**
+   * Creates a {@code BlockBasedSource} based on a file name or pattern. Subclasses must call this
+   * constructor when creating a {@code BlockBasedSource} for a file pattern. See
+   * {@link FileBasedSource} for more information.
+   */
+  public BlockBasedSource(String fileOrPatternSpec, long minBundleSize) {
+    super(fileOrPatternSpec, minBundleSize);
+  }
+
+  /**
+   * Creates a {@code BlockBasedSource} for a single file. Subclasses must call this constructor
+   * when implementing {@link BlockBasedSource#createForSubrangeOfFile}. See documentation in
+   * {@link FileBasedSource}.
+   */
+  public BlockBasedSource(String fileName, long minBundleSize, long startOffset, long endOffset) {
+    super(fileName, minBundleSize, startOffset, endOffset);
+  }
+
+  /**
+   * Creates a {@code BlockBasedSource} for the specified range in a single file.
+   */
+  @Override
+  public abstract BlockBasedSource<T> createForSubrangeOfFile(
+      String fileName, long start, long end);
+
+  /**
+   * Creates a {@code BlockBasedReader}.
+   */
+  @Override
+  public abstract BlockBasedReader<T> createSingleFileReader(
+      PipelineOptions options, ExecutionContext context);
+
+  /**
+   * A {@code Block} represents a block of records that can be read.
+   */
+  @Experimental(Experimental.Kind.SOURCE_SINK)
+  protected abstract static class Block<T> {
+    /**
+     * Returns the current record.
+     */
+    public abstract T getCurrentRecord();
+
+    /**
+     * Reads the next record from the block and returns true iff one exists.
+     */
+    public abstract boolean readNextRecord() throws IOException;
+
+    /**
+     * Returns the fraction of the block already consumed (i.e., not including the current record),
+     * if possible, as a value in [0, 1]. Successive calls to this method must be monotonically
+     * non-decreasing.
+     *
+     * <p>If it is not possible to compute the fraction of the block consumed (e.g., the total
+     * number of records is unknown and record offsets are unknown), this method may return zero.
+     */
+    public abstract double getFractionOfBlockConsumed();
+  }
+
+  /**
+   * A {@code Reader} that reads records from a {@link BlockBasedSource}. If the source is a
+   * subrange of a file, the blocks that will be read by this reader are those such that the first
+   * byte of the block is within the range [start, end).
+   */
+  @Experimental(Experimental.Kind.SOURCE_SINK)
+  protected abstract static class BlockBasedReader<T> extends FileBasedReader<T> {
+    private Block<T> currentBlock;
+    private boolean atSplitPoint;
+
+    protected BlockBasedReader(BlockBasedSource<T> source) {
+      super(source);
+    }
+
+    /**
+     * Read the next block from the input.
+     */
+    public abstract boolean readNextBlock() throws IOException;
+
+    /**
+     * Returns the current block (the block that was read by the previous call to
+     * {@link BlockBasedReader#readNextBlock}).
+     */
+    public abstract Block<T> getCurrentBlock() throws NoSuchElementException;
+
+    /**
+     * Returns the size of the current block in bytes as it is represented in the underlying file,
+     * if possible. This method may return 0 if the size of the current block is unknown.
+     *
+     * <p>The size returned by this method must be such that for two successive blocks A and B,
+     * {@code offset(A) + size(A) <= offset(B)}. If this is not satisfied, the progress reported
+     * by the {@code BlockBasedReader} will be non-monotonic and will interfere with the quality
+     * (but not correctness) of dynamic work rebalancing.
+     *
+     * <p>This method and {@link Block#getFractionOfBlockConsumed} are used to provide an estimate
+     * of progress within a block ({@code currentBlock.getFractionOfBlockConsumed() *
+     * getCurrentBlockSize()}). It is acceptable for the result of this computation to be 0, but
+     * progress estimation will be inaccurate.
+     */
+    public abstract long getCurrentBlockSize();
+
+    /**
+     * Returns the largest offset such that starting to read from that offset includes the current
+     * block.
+     */
+    public abstract long getCurrentBlockOffset();
+
+    @Override
+    public final T getCurrent() throws NoSuchElementException {
+      return currentBlock.getCurrentRecord();
+    }
+
+    /**
+     * Returns true if the reader is at a split point. A {@code BlockBasedReader} is at a split
+     * point if the current record is the first record in a block. In other words, split points
+     * are block boundaries.
+     */
+    @Override
+    protected boolean isAtSplitPoint() {
+      return atSplitPoint;
+    }
+
+    @Override
+    protected final boolean readNextRecord() throws IOException {
+      atSplitPoint = false;
+      while (currentBlock == null || !currentBlock.readNextRecord()) {
+        if (!readNextBlock()) {
+          return false;
+        }
+        currentBlock = getCurrentBlock();
+        atSplitPoint = true;
+      }
+      return true;
+    }
+
+    @Override
+    public Double getFractionConsumed() {
+      if (getCurrentSource().getEndOffset() == Long.MAX_VALUE) {
+        return null;
+      }
+      long currentBlockOffset = getCurrentBlockOffset();
+      long startOffset = getCurrentSource().getStartOffset();
+      long endOffset = getCurrentSource().getEndOffset();
+      double fractionAtBlockStart =
+          ((double) (currentBlockOffset - startOffset)) / (endOffset - startOffset);
+      double fractionAtBlockEnd =
+          ((double) (currentBlockOffset + getCurrentBlockSize() - startOffset)
+              / (endOffset - startOffset));
+      return Math.min(
+          1.0,
+          fractionAtBlockStart
+          + currentBlock.getFractionOfBlockConsumed()
+            * (fractionAtBlockEnd - fractionAtBlockStart));
+    }
+
+    @Override
+    protected long getCurrentOffset() {
+      return getCurrentBlockOffset();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index cd54358ed780e..87533d8a6072d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -571,7 +571,10 @@ public Instant getCurrentTimestamp() throws NoSuchElementException {
     @Override
     public void close() throws IOException {
       // Close all readers that may have not yet been closed.
-      currentReader.close();
+      // If this reader has not been started, currentReader is null.
+      if (currentReader != null) {
+        currentReader.close();
+      }
       while (fileReadersIterator.hasNext()) {
         fileReadersIterator.next().close();
       }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
new file mode 100644
index 0000000000000..4a31607a4da41
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
@@ -0,0 +1,562 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
+import com.google.cloud.dataflow.sdk.io.AvroSource.AvroReader;
+import com.google.cloud.dataflow.sdk.io.AvroSource.AvroReader.Seeker;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.CodecFactory;
+import org.apache.avro.file.DataFileConstants;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.reflect.ReflectData;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.PushbackInputStream;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+/**
+ * Tests for AvroSource.
+ */
+@RunWith(JUnit4.class)
+public class AvroSourceTest {
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  @Rule
+  public ExpectedException expectedException = ExpectedException.none();
+
+  private enum SyncBehavior {
+    SYNC_REGULAR, // Sync at regular, user defined intervals
+    SYNC_RANDOM, // Sync at random intervals
+    SYNC_DEFAULT; // Sync at default intervals (i.e., no manual syncing).
+  }
+
+  private static final int DEFAULT_RECORD_COUNT = 10000;
+
+  /**
+   * Generates an input Avro file containing the given records in the temporary directory and
+   * returns the full path of the file.
+   */
+  private <T> String generateTestFile(String filename, List<T> elems, SyncBehavior syncBehavior,
+      int syncInterval, AvroCoder<T> coder, String codec) throws IOException {
+    Random random = new Random(0);
+    File tmpFile = tmpFolder.newFile(filename);
+    String path = tmpFile.toString();
+
+    FileOutputStream os = new FileOutputStream(tmpFile);
+    DatumWriter<T> datumWriter = coder.createDatumWriter();
+    try (DataFileWriter<T> writer = new DataFileWriter<>(datumWriter)) {
+      writer.setCodec(CodecFactory.fromString(codec));
+      writer.create(coder.getSchema(), os);
+
+      int recordIndex = 0;
+      int syncIndex = syncBehavior == SyncBehavior.SYNC_RANDOM ? random.nextInt(syncInterval) : 0;
+
+      for (T elem : elems) {
+        writer.append(elem);
+        recordIndex++;
+
+        switch (syncBehavior) {
+          case SYNC_REGULAR:
+            if (recordIndex == syncInterval) {
+              recordIndex = 0;
+              writer.sync();
+            }
+            break;
+          case SYNC_RANDOM:
+            if (recordIndex == syncIndex) {
+              recordIndex = 0;
+              writer.sync();
+              syncIndex = random.nextInt(syncInterval);
+            }
+            break;
+          case SYNC_DEFAULT:
+          default:
+        }
+      }
+    }
+    return path;
+  }
+
+  @Test
+  public void testReadWithDifferentCodecs() throws Exception {
+    // Test reading files generated using all codecs.
+    String codecs[] = {DataFileConstants.NULL_CODEC, DataFileConstants.BZIP2_CODEC,
+        DataFileConstants.DEFLATE_CODEC, DataFileConstants.SNAPPY_CODEC,
+        DataFileConstants.XZ_CODEC};
+    List<Bird> expected = createRandomRecords(DEFAULT_RECORD_COUNT);
+
+    for (String codec : codecs) {
+      String filename = generateTestFile(
+          codec, expected, SyncBehavior.SYNC_DEFAULT, 0, AvroCoder.of(Bird.class), codec);
+      AvroSource<Bird> source = AvroSource.from(filename).withSchema(Bird.class);
+      List<Bird> actual = SourceTestUtils.readFromSource(source, null);
+      assertThat(expected, containsInAnyOrder(actual.toArray()));
+    }
+  }
+
+  @Test
+  public void testSplitAtFraction() throws Exception {
+    List<FixedRecord> expected = createFixedRecords(DEFAULT_RECORD_COUNT);
+    // Create an AvroSource where each block is 16k
+    String filename = generateTestFile("tmp.avro", expected, SyncBehavior.SYNC_REGULAR, 1000,
+        AvroCoder.of(FixedRecord.class), DataFileConstants.NULL_CODEC);
+    File file = new File(filename);
+
+    AvroSource<FixedRecord> source = AvroSource.from(filename).withSchema(FixedRecord.class);
+    List<? extends BoundedSource<FixedRecord>> splits =
+        source.splitIntoBundles(file.length() / 3, null);
+    for (BoundedSource<FixedRecord> subSource : splits) {
+      int items = SourceTestUtils.readFromSource(subSource, null).size();
+      SourceTestUtils.assertSplitAtFractionFails(subSource, 0, 0.0, null);
+      SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(subSource, 0, 0.7, null);
+      SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(subSource, 1, 0.7, null);
+      SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(subSource, 100, 0.7, null);
+      SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(subSource, 1000, 0.1, null);
+      SourceTestUtils.assertSplitAtFractionFails(subSource, 1001, 0.1, null);
+      SourceTestUtils.assertSplitAtFractionFails(subSource, DEFAULT_RECORD_COUNT / 3, 0.3, null);
+      SourceTestUtils.assertSplitAtFractionFails(subSource, items, 1.0, null);
+      SourceTestUtils.assertSplitAtFractionFails(subSource, items, 0.9, null);
+      SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(subSource, items, 0.999, null);
+    }
+  }
+
+  @Test
+  public void testSplitAtFractionExhaustive() throws Exception {
+    List<FixedRecord> expected = createFixedRecords(100);
+    String filename = generateTestFile("tmp.avro", expected, SyncBehavior.SYNC_REGULAR, 5,
+        AvroCoder.of(FixedRecord.class), DataFileConstants.NULL_CODEC);
+
+    AvroSource<FixedRecord> source = AvroSource.from(filename).withSchema(FixedRecord.class);
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, null);
+  }
+
+  @Test
+  public void testSplitsWithSmallBlocks() throws Exception {
+    PipelineOptions options = PipelineOptionsFactory.create();
+    // Test reading from an object file with many small random-sized blocks.
+    List<Bird> expected = createRandomRecords(DEFAULT_RECORD_COUNT);
+    String filename = generateTestFile("tmp.avro", expected, SyncBehavior.SYNC_RANDOM,
+        100/* max records/block */, AvroCoder.of(Bird.class), DataFileConstants.NULL_CODEC);
+    File file = new File(filename);
+
+    // Small minimum bundle size
+    AvroSource<Bird> source =
+        AvroSource.from(filename).withSchema(Bird.class).withMinBundleSize(100L);
+
+    // Assert that the source produces the expected records
+    assertEquals(expected, SourceTestUtils.readFromSource(source, options));
+
+    List<? extends BoundedSource<Bird>> splits;
+    int nonEmptySplits;
+
+    // Split with the minimum bundle size
+    splits = source.splitIntoBundles(100L, options);
+    assertTrue(splits.size() > 2);
+    SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options);
+    nonEmptySplits = 0;
+    for (Source<Bird> subSource : splits) {
+      if (SourceTestUtils.readFromSource(subSource, options).size() > 0) {
+        nonEmptySplits += 1;
+      }
+    }
+    assertTrue(nonEmptySplits > 2);
+
+    // Split with larger bundle size
+    splits = source.splitIntoBundles(file.length() / 4, options);
+    assertTrue(splits.size() > 2);
+    SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options);
+    nonEmptySplits = 0;
+    for (Source<Bird> subSource : splits) {
+      if (SourceTestUtils.readFromSource(subSource, options).size() > 0) {
+        nonEmptySplits += 1;
+      }
+    }
+    assertTrue(nonEmptySplits > 2);
+
+    // Split with the file length
+    splits = source.splitIntoBundles(file.length(), options);
+    assertTrue(splits.size() == 1);
+    SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options);
+  }
+
+  @Test
+  public void testMultipleFiles() throws Exception {
+    String baseName = "tmp-";
+    List<Bird> expected = new ArrayList<>();
+    for (int i = 0; i < 10; i++) {
+      List<Bird> contents = createRandomRecords(DEFAULT_RECORD_COUNT / 10);
+      expected.addAll(contents);
+      generateTestFile(baseName + i, contents, SyncBehavior.SYNC_DEFAULT, 0,
+          AvroCoder.of(Bird.class), DataFileConstants.NULL_CODEC);
+    }
+
+    AvroSource<Bird> source =
+        AvroSource.from(Paths.get(tmpFolder.getRoot().toString(), baseName + "*").toString())
+            .withSchema(Bird.class);
+    List<Bird> actual = SourceTestUtils.readFromSource(source, null);
+    assertThat(actual, containsInAnyOrder(expected.toArray()));
+  }
+
+  @Test
+  public void testCreationWithSchema() throws Exception {
+    List<Bird> expected = createRandomRecords(100);
+    String filename = generateTestFile("tmp.avro", expected, SyncBehavior.SYNC_DEFAULT, 0,
+        AvroCoder.of(Bird.class), DataFileConstants.NULL_CODEC);
+
+    // Create a source with a schema object
+    Schema schema = ReflectData.get().getSchema(Bird.class);
+    AvroSource<GenericRecord> source = AvroSource.from(filename).withSchema(schema);
+    List<GenericRecord> records = SourceTestUtils.readFromSource(source, null);
+    assertEqualsWithGeneric(expected, records);
+
+    // Create a source with a JSON schema
+    String schemaString = ReflectData.get().getSchema(Bird.class).toString();
+    source = AvroSource.from(filename).withSchema(schemaString);
+    records = SourceTestUtils.readFromSource(source, null);
+    assertEqualsWithGeneric(expected, records);
+
+    // Create a source with no schema
+    source = AvroSource.from(filename);
+    records = SourceTestUtils.readFromSource(source, null);
+    assertEqualsWithGeneric(expected, records);
+  }
+
+  private void assertEqualsWithGeneric(List<Bird> expected, List<GenericRecord> actual) {
+    assertEquals(expected.size(), actual.size());
+    for (int i = 0; i < expected.size(); i++) {
+      Bird fixed = expected.get(i);
+      GenericRecord generic = actual.get(i);
+      assertEquals(fixed.number, generic.get("number"));
+      assertEquals(fixed.quality, generic.get("quality").toString()); // From Avro util.Utf8
+      assertEquals(fixed.quantity, generic.get("quantity"));
+      assertEquals(fixed.species, generic.get("species").toString());
+    }
+  }
+
+  /**
+   * Creates a haystack byte array of the give size with a needle that starts at the given position.
+   */
+  private byte[] createHaystack(byte[] needle, int position, int size) {
+    byte[] haystack = new byte[size];
+    for (int i = position, j = 0; i < size && j < needle.length; i++, j++) {
+      haystack[i] = needle[j];
+    }
+    return haystack;
+  }
+
+  /**
+   * Asserts that advancePastNextSyncMarker advances an input stream past a sync marker and
+   * correctly returns the number of bytes consumed from the stream.
+   * Creates a haystack of size bytes and places a 16-byte sync marker at the position specified.
+   */
+  private void testAdvancePastNextSyncMarkerAt(int position, int size) throws IOException {
+    byte sentinel = (byte) 0xFF;
+    byte[] marker = new byte[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6};
+    byte[] haystack = createHaystack(marker, position, size);
+    PushbackInputStream stream =
+        new PushbackInputStream(new ByteArrayInputStream(haystack), marker.length);
+    if (position + marker.length < size) {
+      haystack[position + marker.length] = sentinel;
+      assertEquals(position + marker.length, AvroReader.advancePastNextSyncMarker(stream, marker));
+      assertEquals(sentinel, (byte) stream.read());
+    } else {
+      assertEquals(size, AvroReader.advancePastNextSyncMarker(stream, marker));
+      assertEquals(-1, stream.read());
+    }
+  }
+
+  @Test
+  public void testAdvancePastNextSyncMarker() throws IOException {
+    // Test placing the sync marker at different locations at the start and in the middle of the
+    // buffer.
+    for (int i = 0; i <= 16; i++) {
+      testAdvancePastNextSyncMarkerAt(i, 1000);
+      testAdvancePastNextSyncMarkerAt(160 + i, 1000);
+    }
+    // Test placing the sync marker at the end of the buffer.
+    testAdvancePastNextSyncMarkerAt(983, 1000);
+    // Test placing the sync marker so that it begins at the end of the buffer.
+    testAdvancePastNextSyncMarkerAt(984, 1000);
+    testAdvancePastNextSyncMarkerAt(985, 1000);
+    testAdvancePastNextSyncMarkerAt(999, 1000);
+    // Test with no sync marker.
+    testAdvancePastNextSyncMarkerAt(1000, 1000);
+  }
+
+  // Tests for Seeker.
+  @Test
+  public void testSeekerFind() {
+    byte[] marker = {0, 1, 2, 3};
+    byte[] buffer;
+    Seeker s;
+    s = new Seeker(marker);
+
+    buffer = new byte[] {0, 1, 2, 3, 4, 5, 6, 7};
+    assertEquals(3, s.find(buffer, buffer.length));
+
+    buffer = new byte[] {0, 0, 0, 0, 0, 1, 2, 3};
+    assertEquals(7, s.find(buffer, buffer.length));
+
+    buffer = new byte[] {0, 1, 2, 0, 0, 1, 2, 3};
+    assertEquals(7, s.find(buffer, buffer.length));
+
+    buffer = new byte[] {0, 1, 2, 3};
+    assertEquals(3, s.find(buffer, buffer.length));
+  }
+
+  @Test
+  public void testSeekerFindResume() {
+    byte[] marker = {0, 1, 2, 3};
+    byte[] buffer;
+    Seeker s;
+    s = new Seeker(marker);
+
+    buffer = new byte[] {0, 0, 0, 0, 0, 0, 0, 0};
+    assertEquals(-1, s.find(buffer, buffer.length));
+    buffer = new byte[] {1, 2, 3, 0, 0, 0, 0, 0};
+    assertEquals(2, s.find(buffer, buffer.length));
+
+    buffer = new byte[] {0, 0, 0, 0, 0, 0, 1, 2};
+    assertEquals(-1, s.find(buffer, buffer.length));
+    buffer = new byte[] {3, 0, 1, 2, 3, 0, 1, 2};
+    assertEquals(0, s.find(buffer, buffer.length));
+
+    buffer = new byte[] {0};
+    assertEquals(-1, s.find(buffer, buffer.length));
+    buffer = new byte[] {1};
+    assertEquals(-1, s.find(buffer, buffer.length));
+    buffer = new byte[] {2};
+    assertEquals(-1, s.find(buffer, buffer.length));
+    buffer = new byte[] {3};
+    assertEquals(0, s.find(buffer, buffer.length));
+  }
+
+  @Test
+  public void testSeekerUsesBufferLength() {
+    byte[] marker = {0, 0, 1};
+    byte[] buffer;
+    Seeker s;
+    s = new Seeker(marker);
+
+    buffer = new byte[] {0, 0, 0, 1};
+    assertEquals(-1, s.find(buffer, 3));
+
+    s = new Seeker(marker);
+    buffer = new byte[] {0, 0};
+    assertEquals(-1, s.find(buffer, 1));
+    buffer = new byte[] {1, 0};
+    assertEquals(-1, s.find(buffer, 1));
+
+    s = new Seeker(marker);
+    buffer = new byte[] {0, 2};
+    assertEquals(-1, s.find(buffer, 1));
+    buffer = new byte[] {0, 2};
+    assertEquals(-1, s.find(buffer, 1));
+    buffer = new byte[] {1, 2};
+    assertEquals(0, s.find(buffer, 1));
+  }
+
+
+  @Test
+  public void testSeekerFindPartial() {
+    byte[] marker = {0, 0, 1};
+    byte[] buffer;
+    Seeker s;
+    s = new Seeker(marker);
+
+    buffer = new byte[] {0, 0, 0, 1};
+    assertEquals(3, s.find(buffer, buffer.length));
+
+    marker = new byte[] {1, 1, 1, 2};
+    s = new Seeker(marker);
+
+    buffer = new byte[] {1, 1, 1, 1, 1};
+    assertEquals(-1, s.find(buffer, buffer.length));
+    buffer = new byte[] {1, 1, 2};
+    assertEquals(2, s.find(buffer, buffer.length));
+
+    buffer = new byte[] {1, 1, 1, 1, 1};
+    assertEquals(-1, s.find(buffer, buffer.length));
+    buffer = new byte[] {2, 1, 1, 1, 2};
+    assertEquals(0, s.find(buffer, buffer.length));
+  }
+
+  @Test
+  public void testSeekerFindAllLocations() {
+    byte[] marker = {1, 1, 2};
+    byte[] allOnes = new byte[] {1, 1, 1, 1};
+    byte[] findIn = new byte[] {1, 1, 1, 1};
+    Seeker s = new Seeker(marker);
+
+    for (int i = 0; i < findIn.length; i++) {
+      assertEquals(-1, s.find(allOnes, allOnes.length));
+      findIn[i] = 2;
+      assertEquals(i, s.find(findIn, findIn.length));
+      findIn[i] = 1;
+    }
+  }
+
+  /**
+   * Class that will encode to a fixed size: 16 bytes.
+   *
+   * <p>Each object has a 15-byte array. Avro encodes an object of this type as
+   * a byte array, so each encoded object will consist of 1 byte that encodes the
+   * length of the array, followed by 15 bytes.
+   */
+  @DefaultCoder(AvroCoder.class)
+  public static class FixedRecord {
+    private byte[] value = new byte[15];
+
+    public FixedRecord() {
+      this(0);
+    }
+
+    public FixedRecord(int i) {
+      value[0] = (byte) i;
+      value[1] = (byte) (i >> 8);
+      value[2] = (byte) (i >> 16);
+      value[3] = (byte) (i >> 24);
+    }
+
+    public int asInt() {
+      return value[0] | (value[1] << 8) | (value[2] << 16) | (value[3] << 24);
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (o instanceof FixedRecord) {
+        FixedRecord other = (FixedRecord) o;
+        return this.asInt() == other.asInt();
+      }
+      return false;
+    }
+
+    @Override
+    public int hashCode() {
+      return toString().hashCode();
+    }
+
+    @Override
+    public String toString() {
+      return Integer.toString(this.asInt());
+    }
+  }
+
+  /**
+   * Create a list of count 16-byte records.
+   */
+  private static List<FixedRecord> createFixedRecords(int count) {
+    List<FixedRecord> records = new ArrayList<>();
+    for (int i = 0; i < count; i++) {
+      records.add(new FixedRecord(i));
+    }
+    return records;
+  }
+
+  /**
+   * Class used as the record type in tests.
+   */
+  @DefaultCoder(AvroCoder.class)
+  public static class Bird {
+    private long number;
+    private String species;
+    private String quality;
+    private long quantity;
+
+    public String getQuality() {
+      return this.quality;
+    }
+
+    public String getSpecies() {
+      return this.species;
+    }
+
+    public long getQuantity() {
+      return quantity;
+    }
+
+    public long getNumber() {
+      return number;
+    }
+
+    @Override
+    public String toString() {
+      return quantity + " " + quality + " " + species;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (obj instanceof Bird) {
+        Bird other = (Bird) obj;
+        return species.equals(other.species) && quality.equals(other.quality)
+            && quantity == other.quantity && number == other.number;
+      }
+      return false;
+    }
+
+    @Override
+    public int hashCode() {
+      return toString().hashCode();
+    }
+  }
+
+  /**
+   * Create a list of n random records.
+   */
+  private static List<Bird> createRandomRecords(long n) {
+    String[] qualities = {
+        "miserable", "forelorn", "fidgity", "squirrelly", "fanciful", "chipper", "lazy"};
+    String[] species = {"pigeons", "owls", "gulls", "hawks", "robins", "jays"};
+    Random random = new Random(0);
+
+    List<Bird> records = new ArrayList<>();
+    for (long i = 0; i < n; i++) {
+      Bird bird = new Bird();
+      bird.quality = qualities[random.nextInt(qualities.length)];
+      bird.species = species[random.nextInt(species.length)];
+      bird.number = i;
+      bird.quantity = random.nextLong();
+      records.add(bird);
+    }
+    return records;
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index 038c869b358e3..ae06b243d44d1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -22,6 +22,7 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
@@ -29,6 +30,7 @@
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.io.FileBasedSource.FileBasedReader;
 import com.google.cloud.dataflow.sdk.io.FileBasedSource.Mode;
+import com.google.cloud.dataflow.sdk.io.Source.Reader;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
@@ -371,6 +373,32 @@ public void testFullyReadFilePattern() throws IOException {
     assertThat(expectedResults, containsInAnyOrder(readFromSource(source, options).toArray()));
   }
 
+  @Test
+  public void testCloseUnstartedFilePatternReader() throws IOException {
+    PipelineOptions options = PipelineOptionsFactory.create();
+    List<String> data1 = createStringDataset(3, 50);
+    File file1 = createFileWithData("file1", data1);
+
+    List<String> data2 = createStringDataset(3, 50);
+    createFileWithData("file2", data2);
+
+    List<String> data3 = createStringDataset(3, 50);
+    createFileWithData("file3", data3);
+
+    List<String> data4 = createStringDataset(3, 50);
+    createFileWithData("otherfile", data4);
+
+    TestFileBasedSource source =
+        new TestFileBasedSource(new File(file1.getParent(), "file*").getPath(), 64, null);
+    Reader<String> reader = source.createReader(options, null);
+    // Closing an unstarted FilePatternReader should not throw an exception.
+    try {
+      reader.close();
+    } catch (Exception e) {
+      fail("Closing an unstarted FilePatternReader should not throw an exception");
+    }
+  }
+
   @Test
   public void testFractionConsumedWhenReadingFilepattern() throws IOException {
     List<String> data1 = createStringDataset(3, 1000);

From ccad188c19f0e237c6f9cc126cb553d383ad0be6 Mon Sep 17 00:00:00 2001
From: relax <relax@google.com>
Date: Sat, 20 Jun 2015 18:08:30 -0700
Subject: [PATCH 0658/1541] Support per-window tables in BigQueryIO

The user can provide a function mapping window to a table
specification. Records within a window will be written to the
provided table, creating the table if necessary.

----Release Notes----

Support per-window tables in BigQueryIO

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96495479
---
 .../cloud/dataflow/sdk/io/BigQueryIO.java     | 400 ++++++++++++++----
 1 file changed, 325 insertions(+), 75 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 295d713791d44..c46316c7284b5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -17,12 +17,18 @@
 package com.google.cloud.dataflow.sdk.io;
 
 import com.google.api.client.json.JsonFactory;
+import com.google.api.client.util.Preconditions;
 import com.google.api.services.bigquery.Bigquery;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.TableRowJsonCoder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
 import com.google.cloud.dataflow.sdk.options.GcpOptions;
@@ -33,14 +39,17 @@
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.Values;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterFirst;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterProcessingTime;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.BigQueryTableInserter;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
@@ -51,14 +60,22 @@
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
 
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
 import org.joda.time.Duration;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 import java.util.UUID;
 import java.util.concurrent.ConcurrentHashMap;
@@ -119,7 +136,30 @@
  * <p>
  * See {@link BigQueryIO.Write} for details on how to specify if a write should
  * append to an existing table, replace the table, or verify that the table is
- * empty. Note that the dataset being written to must already exist.
+ * empty. Note that the dataset being written to must already exist. Write
+ * dispositions are not supported in streaming mode.
+ *
+ * <p><h3>Sharding BigQuery output tables</h3>
+ * A common use case is to dynamically generate BigQuery table names based on
+ * the current window. To support this,
+ * {@link BigQueryIO.Write#to(SerializableFunction)}
+ * accepts a function mapping the current window to a tablespec. For example,
+ * here's code that outputs daily tables to BigQuery:
+ * <pre>{@code
+ * PCollection<TableRow> quotes = ...
+ * quotes.apply(Window.<TableRow>info(CalendarWindows.days(1)))
+ *       .apply(BigQueryIO.Write
+ *         .named("Write")
+ *         .withSchema(schema)
+ *       .to(new SerializableFunction<BoundedWindow, String>() {
+ *             public String apply(BoundedWindow window) {
+ *               return "my-project:output.output_table-" + window.toString();
+ *             }
+ *           }));
+ *
+ * }</pre>
+ *
+ * <p> Per-window tables are not yet supported in batch mode.
  *
  * @see <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html">TableRow</a>
  */
@@ -465,6 +505,26 @@ public static Bound to(TableReference table) {
       return new Bound().to(table);
     }
 
+    /** Creates a write transformation from a function that maps windows to table specifications.
+     * Each time a new window is encountered, this function will be called and the resulting table
+     * will be created. Records within that window will be written to the associated table.
+     * <p>
+     * See {@link #parseTableSpec(String)} for the format that tableSpecFunction should return.
+     * <p>
+     * tableSpecFunction should be determinstic. When given the same window, it should always return
+     * the same table specification.
+     */
+    public static Bound to(SerializableFunction<BoundedWindow, String> tableSpecFunction) {
+      return new Bound().to(tableSpecFunction);
+    }
+
+    /** Creates a write transformation from a function that maps windows to TableReference objects.
+     */
+    public static Bound toTableReference(
+        SerializableFunction<BoundedWindow, TableReference> tableRefFunction) {
+      return new Bound().toTableReference(tableRefFunction);
+    }
+
     /**
      * Specifies a table schema to use in table creation.
      * <p>
@@ -502,6 +562,8 @@ public static class Bound extends PTransform<PCollection<TableRow>, PDone> {
 
       final TableReference table;
 
+      final SerializableFunction<BoundedWindow, TableReference> tableRefFunction;
+
       // Table schema. The schema is required only if the table does not exist.
       final TableSchema schema;
 
@@ -516,19 +578,38 @@ public static class Bound extends PTransform<PCollection<TableRow>, PDone> {
       // An option to indicate if table validation is desired. Default is true.
       final boolean validate;
 
+      private static class TranslateTableSpecFunction implements
+          SerializableFunction<BoundedWindow, TableReference> {
+        private static final long serialVersionUID = 0;
+
+        private SerializableFunction<BoundedWindow, String> tableSpecFunction;
+
+        TranslateTableSpecFunction(SerializableFunction<BoundedWindow, String> tableSpecFunction) {
+          this.tableSpecFunction = tableSpecFunction;
+        }
+
+        @Override
+        public TableReference apply(BoundedWindow value) {
+          return parseTableSpec(tableSpecFunction.apply(value));
+        }
+      }
+
       public Bound() {
         this.table = null;
+        this.tableRefFunction = null;
         this.schema = null;
         this.createDisposition = CreateDisposition.CREATE_IF_NEEDED;
         this.writeDisposition = WriteDisposition.WRITE_EMPTY;
         this.validate = true;
       }
 
-      Bound(String name, TableReference ref, TableSchema schema,
+      Bound(String name, TableReference ref,
+          SerializableFunction<BoundedWindow, TableReference> tableRefFunction, TableSchema schema,
           CreateDisposition createDisposition, WriteDisposition writeDisposition,
           boolean validate) {
         super(name);
         this.table = ref;
+        this.tableRefFunction = tableRefFunction;
         this.schema = schema;
         this.createDisposition = createDisposition;
         this.writeDisposition = writeDisposition;
@@ -539,7 +620,8 @@ public Bound() {
        * Sets the name associated with this transformation.
        */
       public Bound named(String name) {
-        return new Bound(name, table, schema, createDisposition, writeDisposition, validate);
+        return new Bound(name, table, tableRefFunction, schema, createDisposition,
+            writeDisposition, validate);
       }
 
       /**
@@ -555,39 +637,63 @@ public Bound to(String tableSpec) {
        * Specifies the table to be written to.
        */
       public Bound to(TableReference table) {
-        return new Bound(name, table, schema, createDisposition, writeDisposition, validate);
+        return new Bound(name, table, tableRefFunction, schema, createDisposition,
+            writeDisposition, validate);
+      }
+
+      public Bound to(
+          SerializableFunction<BoundedWindow, String> tableSpecFunction) {
+        return toTableReference(new TranslateTableSpecFunction(tableSpecFunction));
+      }
+
+      public Bound toTableReference(
+          SerializableFunction<BoundedWindow, TableReference> tableRefFunction) {
+        return new Bound(name, table, tableRefFunction, schema, createDisposition,
+            writeDisposition, validate);
       }
 
+
       /**
        * Specifies the table schema, used if the table is created.
        */
       public Bound withSchema(TableSchema schema) {
-        return new Bound(name, table, schema, createDisposition, writeDisposition, validate);
+        return new Bound(name, table, tableRefFunction, schema, createDisposition,
+            writeDisposition, validate);
       }
 
       /** Specifies options for creating the table. */
       public Bound withCreateDisposition(CreateDisposition createDisposition) {
-        return new Bound(name, table, schema, createDisposition, writeDisposition, validate);
+        return new Bound(name, table, tableRefFunction, schema, createDisposition,
+            writeDisposition, validate);
       }
 
       /** Specifies options for writing the table. */
       public Bound withWriteDisposition(WriteDisposition writeDisposition) {
-        return new Bound(name, table, schema, createDisposition, writeDisposition, validate);
+        return new Bound(name, table, tableRefFunction, schema, createDisposition,
+            writeDisposition, validate);
       }
 
       /**
        * Disable table validation.
        */
       public Bound withoutValidation() {
-        return new Bound(name, table, schema, createDisposition, writeDisposition, false);
+        return new Bound(name, table, tableRefFunction, schema, createDisposition,
+            writeDisposition, false);
       }
 
       @Override
       public PDone apply(PCollection<TableRow> input) {
-        if (table == null) {
+        BigQueryOptions options = input.getPipeline().getOptions().as(BigQueryOptions.class);
+
+        if (table == null && tableRefFunction == null) {
           throw new IllegalStateException(
               "must set the table reference of a BigQueryIO.Write transform");
         }
+        if (table != null && tableRefFunction != null) {
+          throw new IllegalStateException(
+              "Cannot set both a table reference and a table function for a BigQueryIO.Write "
+                + "transform");
+        }
 
         if (createDisposition == CreateDisposition.CREATE_IF_NEEDED && schema == null) {
           throw new IllegalArgumentException("CreateDisposition is CREATE_IF_NEEDED, "
@@ -595,9 +701,19 @@ public PDone apply(PCollection<TableRow> input) {
         }
 
         // In streaming, BigQuery write is taken care of by StreamWithDeDup transform.
-        BigQueryOptions options = input.getPipeline().getOptions().as(BigQueryOptions.class);
-        if (options.isStreaming()) {
-          return input.apply(new StreamWithDeDup(table, schema));
+        // We also currently do this if a tablespec function is specified.
+        if (options.isStreaming() || tableRefFunction != null) {
+          if (createDisposition == CreateDisposition.CREATE_NEVER) {
+            throw new IllegalArgumentException("CreateDispostion.CREATE_NEVER is not "
+                + "supported for unbounded PCollections or when using tablespec functions.");
+          }
+
+          if (writeDisposition == WriteDisposition.WRITE_TRUNCATE) {
+            throw new IllegalArgumentException("WriteDisposition.WRITE_TRUNCATE is not "
+                + "supported for unbounded PCollections or when using tablespec functions.");
+          }
+
+          return input.apply(new StreamWithDeDup(table, tableRefFunction, schema));
         }
 
         return PDone.in(input.getPipeline());
@@ -656,8 +772,7 @@ public boolean getValidate() {
   /**
    * Implementation of DoFn to perform streaming BigQuery write.
    */
-  private static class StreamingWriteFn
-      extends DoFn<KV<String, TableRow>, Void> {
+  private static class StreamingWriteFn extends DoFn<TableRowInfo, Void> {
     private static final long serialVersionUID = 0;
 
     /** TableReference in JSON.  Use String to make the class Serializable. */
@@ -666,13 +781,16 @@ private static class StreamingWriteFn
     /** TableSchema in JSON.  Use String to make the class Serializable. */
     private final String jsonTableSchema;
 
-    private transient TableReference tableReference;
+    /** User function mapping windows to TableReference in JSON. */
+    private final SerializableFunction<BoundedWindow, TableReference> tableRefFunction;
+
+    private transient TableReference defaultTableReference;
 
     /** JsonTableRows to accumulate BigQuery rows. */
-    private transient List<TableRow> tableRows;
+    private transient Map<BoundedWindow, List<TableRow>> tableRows;
 
     /** The list of unique ids for each BigQuery table row. */
-    private transient List<String> uniqueIdsForTableRows;
+    private transient Map<BoundedWindow, List<String>> uniqueIdsForTableRows;
 
     /** The list of tables created so far, so we don't try the creation
         each time. */
@@ -680,9 +798,16 @@ private static class StreamingWriteFn
         Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>());
 
     /** Constructor. */
-    StreamingWriteFn(TableReference table, TableSchema schema) {
+    StreamingWriteFn(TableReference table,
+        SerializableFunction<BoundedWindow, TableReference> tableRefFunction,
+        TableSchema schema) {
       try {
-        jsonTableReference = JSON_FACTORY.toString(table);
+        if (table != null) {
+          jsonTableReference = JSON_FACTORY.toString(table);
+        } else {
+          jsonTableReference = null;
+        }
+        this.tableRefFunction = tableRefFunction;
         jsonTableSchema = JSON_FACTORY.toString(schema);
       } catch (IOException e) {
         throw new RuntimeException("Cannot initialize BigQuery streaming writer.", e);
@@ -692,18 +817,53 @@ private static class StreamingWriteFn
     /** Prepares a target BigQuery table. */
     @Override
     public void startBundle(Context context) {
-      tableRows = new ArrayList<>();
-      uniqueIdsForTableRows = new ArrayList<>();
+      tableRows = new HashMap<>();
+      uniqueIdsForTableRows = new HashMap<>();
+    }
+
+    /** Accumulates the input into JsonTableRows and uniqueIdsForTableRows. */
+    @Override
+    public void processElement(ProcessContext context) {
+      TableRowInfo rowInfo = context.element();
+      List<TableRow> rows = getOrCreateMapListValue(tableRows, rowInfo.window);
+      rows.add(rowInfo.tableRow);
+      List<String> uniqueIds = getOrCreateMapListValue(uniqueIdsForTableRows, rowInfo.window);
+      uniqueIds.add(rowInfo.uniqueId);
+    }
+
+    /** Writes the accumulated rows into BigQuery with streaming API. */
+    @Override
+    public void finishBundle(Context context) {
       BigQueryOptions options = context.getPipelineOptions().as(BigQueryOptions.class);
 
-      // TODO: Support table sharding and the better place to initialize
-      // BigQuery table.
-      try {
-        tableReference =
-            JSON_FACTORY.fromString(jsonTableReference, TableReference.class);
-        String tableSpec = toTableSpec(tableReference);
+      for (BoundedWindow window : tableRows.keySet()) {
+        TableReference tableReference = getOrCreateTableForWindow(options, window);
+        flushRows(options, tableReference, tableRows.get(window),
+            uniqueIdsForTableRows.get(window));
+      }
+      tableRows.clear();
+      uniqueIdsForTableRows.clear();
+    }
 
-        if (!createdTables.contains(tableSpec)) {
+    public TableReference getOrCreateTableForWindow(BigQueryOptions options, BoundedWindow window) {
+     try {
+       if (defaultTableReference != null) {
+         return defaultTableReference;
+       }
+
+       TableReference tableReference;
+       if (tableRefFunction != null) {
+         tableReference = tableRefFunction.apply(window);
+       } else {
+         tableReference = JSON_FACTORY.fromString(jsonTableReference, TableReference.class);
+       }
+       String tableSpec = toTableSpec(tableReference);
+
+       if (tableReference.getProjectId() == null) {
+         tableReference.setProjectId(options.getProject());
+       }
+
+       if (!createdTables.contains(tableSpec)) {
           synchronized (createdTables) {
             // Another thread may have succeeded in creating the table in the meanwhile, so
             // check again. This check isn't needed for correctness, but we add it to prevent
@@ -717,46 +877,105 @@ public void startBundle(Context context) {
             }
           }
         }
+       if (tableRefFunction == null) {
+         // A constant table spec is used, and we've already created it. Cache that value so that we
+         // can elide the parsing/lookup on future calls to getOrCreateTableForWindow.
+         defaultTableReference = tableReference;
+       }
+       return tableReference;
       } catch (IOException e) {
         throw new RuntimeException(e);
-      }
-    }
-
-    /** Accumulates the input into JsonTableRows and uniqueIdsForTableRows. */
-    @Override
-    public void processElement(ProcessContext context) {
-      KV<String, TableRow> kv = context.element();
-      addRow(kv.getValue(), kv.getKey());
-    }
-
-    /** Writes the accumulated rows into BigQuery with streaming API. */
-    @Override
-    public void finishBundle(Context context) {
-      flushRows(context.getPipelineOptions().as(BigQueryOptions.class));
-    }
-
-    /** Accumulate a row to be written to BigQuery. */
-    private void addRow(TableRow tableRow, String uniqueId) {
-      uniqueIdsForTableRows.add(uniqueId);
-      tableRows.add(tableRow);
+     }
     }
 
     /** Writes the accumulated rows into BigQuery with streaming API. */
-    private void flushRows(BigQueryOptions options) {
+    private void flushRows(BigQueryOptions options, TableReference tableReference,
+        List<TableRow> tableRows, List<String> uniqueIds) {
       if (!tableRows.isEmpty()) {
         Bigquery client = Transport.newBigQueryClient(options).build();
         try {
           BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
-          inserter.insertAll(tableRows, uniqueIdsForTableRows);
+          inserter.insertAll(tableRows, uniqueIds);
         } catch (IOException e) {
           throw new RuntimeException(e);
         }
-        tableRows.clear();
-        uniqueIdsForTableRows.clear();
       }
     }
   }
 
+
+  private static class TableRowInfoCoder extends StandardCoder<TableRowInfo> {
+    private static final long serialVersionUID = 0;
+
+    public static TableRowInfoCoder of(Coder<? extends BoundedWindow> windowCoder) {
+      return new TableRowInfoCoder(windowCoder);
+    }
+
+    @JsonCreator
+    public static TableRowInfoCoder of(
+         @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+         List<Coder<? extends BoundedWindow>> components) {
+      Preconditions.checkArgument(components.size() == 1,
+          "Expecting 1 component, got " + components.size());
+      return of(components.get(0));
+    }
+
+    protected TableRowInfoCoder(Coder<? extends BoundedWindow> windowCoder) {
+      this.tableRowCoder = TableRowJsonCoder.of();
+      this.idCoder = StringUtf8Coder.of();
+      @SuppressWarnings("unchecked")
+      Coder<BoundedWindow> boundedWindowCoder = (Coder<BoundedWindow>) windowCoder;
+      this.windowCoder = boundedWindowCoder;
+    }
+
+     @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return Arrays.asList(windowCoder);
+    }
+
+
+    @Override
+    public void encode(TableRowInfo value, OutputStream outStream, Context context)
+      throws IOException {
+      if (value == null) {
+        throw new CoderException("cannot encode a null value");
+      }
+      tableRowCoder.encode(value.tableRow, outStream, context.nested());
+      idCoder.encode(value.uniqueId, outStream, context.nested());
+      windowCoder.encode(value.window, outStream, context.nested());
+    }
+
+    @Override
+    public TableRowInfo decode(InputStream inStream, Context context)
+      throws IOException {
+      return new TableRowInfo(
+          tableRowCoder.decode(inStream, context.nested()),
+          idCoder.decode(inStream, context.nested()),
+          windowCoder.decode(inStream, context.nested()));
+    }
+
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException {
+      throw new NonDeterministicException(this, "TableRows are not deterministic.");
+    }
+
+    TableRowJsonCoder tableRowCoder;
+    StringUtf8Coder idCoder;
+    Coder<BoundedWindow> windowCoder;
+  }
+
+   private static class TableRowInfo {
+     TableRowInfo(TableRow tableRow, String uniqueId, BoundedWindow window) {
+       this.tableRow = tableRow;
+       this.uniqueId = uniqueId;
+       this.window = window;
+     }
+
+     final TableRow tableRow;
+     final String uniqueId;
+     final BoundedWindow window;
+   };
+
   /////////////////////////////////////////////////////////////////////////////
 
   /**
@@ -765,7 +984,8 @@ private void flushRows(BigQueryOptions options) {
    * a randomUUID is generated only once per bucket of data. The actual unique
    * id is created by concatenating this randomUUID with a sequential number.
    */
-  private static class TagWithUniqueIds extends DoFn<TableRow, KV<Integer, KV<String, TableRow>>> {
+  private static class TagWithUniqueIdsAndWindow extends DoFn<TableRow, KV<Integer, TableRowInfo>>
+        implements DoFn.RequiresWindowAccess {
     private static final long serialVersionUID = 0;
 
     private transient String randomUUID;
@@ -784,7 +1004,8 @@ public void processElement(ProcessContext context) {
       ThreadLocalRandom randomGenerator = ThreadLocalRandom.current();
       // We output on keys 0-50 to ensure that there's enough batching for
       // BigQuery.
-      context.output(KV.of(randomGenerator.nextInt(0, 50), KV.of(uniqueId, context.element())));
+      context.output(KV.of(randomGenerator.nextInt(0, 50),
+          new TableRowInfo(context.element(), uniqueId, context.window())));
     }
   }
 
@@ -801,12 +1022,16 @@ private static class StreamWithDeDup extends PTransform<PCollection<TableRow>, P
     private static final int WRITE_BUFFER_COUNT = 100;
     private static final Duration WRITE_BUFFER_WAIT = Duration.standardSeconds(1);
 
-    private final TableReference tableReference;
-    private final TableSchema tableSchema;
+    private final transient TableReference tableReference;
+    private final SerializableFunction<BoundedWindow, TableReference> tableRefFunction;
+    private final transient TableSchema tableSchema;
 
     /** Constructor. */
-    StreamWithDeDup(TableReference tableReference, TableSchema tableSchema) {
+    StreamWithDeDup(TableReference tableReference,
+        SerializableFunction<BoundedWindow, TableReference> tableRefFunction,
+        TableSchema tableSchema) {
       this.tableReference = tableReference;
+      this.tableRefFunction = tableRefFunction;
       this.tableSchema = tableSchema;
     }
 
@@ -827,24 +1052,27 @@ public PDone apply(PCollection<TableRow> input) {
       // To use this mechanism, each input TableRow is tagged with a generated
       // unique id, which is then passed to BigQuery and used to ignore duplicates.
 
-      PCollection<KV<Integer, KV<String, TableRow>>> tagged =
-          input.apply(ParDo.of(new TagWithUniqueIds()));
+      PCollection<KV<Integer, TableRowInfo>> tagged =
+          input.apply(ParDo.of(new TagWithUniqueIdsAndWindow()));
 
       // To prevent having the same TableRow processed more than once with regenerated
       // different unique ids, this implementation relies on "checkpointing", which is
       // achieved as a side effect of having StreamingWriteFn immediately follow a GBK.
       tagged
-          .apply(Window.<KV<Integer, KV<String, TableRow>>>into(new GlobalWindows())
+          .setCoder(KvCoder.of(VarIntCoder.of(),
+              TableRowInfoCoder.of(
+              tagged.getWindowingStrategy().getWindowFn().windowCoder())))
+          .apply(Window.<KV<Integer, TableRowInfo>>into(new GlobalWindows())
                        .triggering(Repeatedly.forever(
                            AfterFirst.of(
                                AfterProcessingTime.pastFirstElementInPane()
                                                   .plusDelayOf(WRITE_BUFFER_WAIT),
                                AfterPane.elementCountAtLeast(WRITE_BUFFER_COUNT))))
                        .discardingFiredPanes())
-          .apply(GroupByKey.<Integer, KV<String, TableRow>>create())
-          .apply(Values.<Iterable<KV<String, TableRow>>>create())
-          .apply(Flatten.<KV<String, TableRow>>iterables())
-          .apply(ParDo.of(new StreamingWriteFn(tableReference, tableSchema)));
+          .apply(GroupByKey.<Integer, TableRowInfo>create())
+          .apply(Values.<Iterable<TableRowInfo>>create())
+          .apply(Flatten.<TableRowInfo>iterables())
+          .apply(ParDo.of(new StreamingWriteFn(tableReference, tableRefFunction, tableSchema)));
 
       // Note that the implementation to return PDone here breaks the
       // implicit assumption about the job execution order.  If a user
@@ -879,6 +1107,15 @@ private static void evaluateReadHelper(
     context.setPCollectionWindowedValue(context.getOutput(transform), elems);
   }
 
+  private static <K, V> List<V> getOrCreateMapListValue(Map<K, List<V>> map, K key) {
+    List<V> value = map.get(key);
+    if (value == null) {
+      value = new ArrayList<>();
+      map.put(key, value);
+    }
+    return value;
+  }
+
   /**
    * Direct mode write evaluator.
    * <p>
@@ -889,21 +1126,34 @@ private static void evaluateWriteHelper(
       Write.Bound transform, DirectPipelineRunner.EvaluationContext context) {
     BigQueryOptions options = context.getPipelineOptions();
     Bigquery client = Transport.newBigQueryClient(options).build();
-    TableReference ref = transform.table;
-    if (ref.getProjectId() == null) {
-      ref.setProjectId(options.getProject());
-    }
-
-    LOG.info("Writing to BigQuery table {}", toTableSpec(ref));
 
     try {
-      BigQueryTableInserter inserter = new BigQueryTableInserter(client, ref);
+      Map<BoundedWindow, List<TableRow>> tableRows = new HashMap<>();
+      for (WindowedValue<TableRow> windowedValue : context.getPCollectionWindowedValues(
+          context.getInput(transform))) {
+        for (BoundedWindow window : windowedValue.getWindows()) {
+          List<TableRow> rows = getOrCreateMapListValue(tableRows, window);
+          rows.add(windowedValue.getValue());
+        }
+      }
 
-      inserter.getOrCreateTable(
-          transform.writeDisposition, transform.createDisposition, transform.schema);
+      for (BoundedWindow window : tableRows.keySet()) {
+        TableReference ref;
+        if (transform.tableRefFunction != null) {
+          ref = transform.tableRefFunction.apply(window);
+        } else {
+          ref = transform.table;
+        }
+        if (ref.getProjectId() == null) {
+          ref.setProjectId(options.getProject());
+        }
+        LOG.info("Writing to BigQuery table {}", toTableSpec(ref));
 
-      List<TableRow> tableRows = context.getPCollection(context.getInput(transform));
-      inserter.insertAll(tableRows);
+        BigQueryTableInserter inserter = new BigQueryTableInserter(client, ref);
+        inserter.getOrCreateTable(
+            transform.writeDisposition, transform.createDisposition, transform.schema);
+        inserter.insertAll(tableRows.get(window));
+      }
     } catch (IOException e) {
       throw new RuntimeException(e);
     }

From 63cb72479975dcb9c848792d72fbc2918cdd4cbe Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Mon, 22 Jun 2015 13:13:09 -0700
Subject: [PATCH 0659/1541] Do not use the Datastore query splitter for one
 split

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96598805
---
 .../com/google/cloud/dataflow/sdk/io/DatastoreIO.java    | 9 ++++++++-
 .../google/cloud/dataflow/sdk/io/DatastoreIOTest.java    | 8 ++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 7c0b1032c36b8..df743be868180 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -55,6 +55,7 @@
 import com.google.cloud.dataflow.sdk.util.RetryHttpRequestInitializer;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -299,7 +300,13 @@ public List<Source> splitIntoBundles(long desiredBundleSizeBytes, PipelineOption
         // Fallback in case estimated size is unavailable.
         numSplits = dataflowOptions.getNumWorkers();
       }
-      numSplits = Math.max(numSplits, 1);
+
+      // If the desiredBundleSize or number of workers results in 1 split, simply return
+      // a source that reads from the original query.
+      if (numSplits <= 1) {
+        return Lists.newArrayList(this);
+      }
+
       List<Query> splitQueries;
       if (mockSplitter == null) {
         splitQueries = DatastoreHelper.getQuerySplitter().getSplits(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
index eead2ea601aa8..43282dc94a779 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
@@ -24,7 +24,9 @@
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
 import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
 import com.google.api.services.datastore.DatastoreV1;
@@ -196,9 +198,11 @@ public void testQuerySplitWithZeroSize() throws Exception {
 
     List<DatastoreIO.Source> bundles = io.splitIntoBundles(1024, options);
     assertEquals(1, bundles.size());
+    verify(splitter, never()).getSplits(any(Query.class), eq(1), any(Datastore.class));
     DatastoreIO.Source bundle = bundles.get(0);
     Query bundleQuery = bundle.query;
     assertEquals("mykind", bundleQuery.getKind(0).getName());
+    assertFalse(bundleQuery.hasFilter());
   }
 
   @Test
@@ -277,9 +281,11 @@ public void testQuerySplitNoWorkers() throws Exception {
 
     List<DatastoreIO.Source> bundles = spiedIo.splitIntoBundles(1024, options);
     assertEquals(1, bundles.size());
+    verify(splitter, never()).getSplits(any(Query.class), eq(1), any(Datastore.class));
     DatastoreIO.Source bundle = bundles.get(0);
     Query bundleQuery = bundle.query;
     assertEquals("mykind", bundleQuery.getKind(0).getName());
+    assertFalse(bundleQuery.hasFilter());
   }
 
   @Test
@@ -310,9 +316,11 @@ public void testQuerySplitWithSmallDataset() throws Exception {
 
     List<DatastoreIO.Source> bundles = io.splitIntoBundles(1024, options);
     assertEquals(1, bundles.size());
+    verify(splitter, never()).getSplits(any(Query.class), eq(1), any(Datastore.class));
     DatastoreIO.Source bundle = bundles.get(0);
     Query bundleQuery = bundle.query;
     assertEquals("mykind", bundleQuery.getKind(0).getName());
+    assertFalse(bundleQuery.hasFilter());
   }
 
   /**

From 5cc2461b4807e51e6e1d6c42317dd23cd034ad88 Mon Sep 17 00:00:00 2001
From: samuelw <samuelw@google.com>
Date: Mon, 22 Jun 2015 15:06:54 -0700
Subject: [PATCH 0660/1541] Improve key throughput of StreamingDataflowWorker

Allow more keys to be simultaneously processing
since they may be blocked on I/O.
Add visibility of threads blocked on GetData calls
to StreamingDataflowWorker.
Also fix parameter use bug in BoundedQueueExecutor.

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96612383
---
 .../worker/StreamingDataflowWorker.java       | 34 ++++++++------
 .../sdk/util/BoundedQueueExecutor.java        |  2 +-
 .../cloud/dataflow/sdk/util/StateFetcher.java | 47 ++++++++++++++++---
 3 files changed, 61 insertions(+), 22 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 4f7ec5bf7612e..46a5f29cde2a0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -67,9 +67,13 @@
  */
 public class StreamingDataflowWorker {
   private static final Logger LOG = LoggerFactory.getLogger(StreamingDataflowWorker.class);
-  static final int MAX_THREAD_POOL_SIZE = 100;
+  // Maximum number of threads for processing.  Currently each thread processes one key at a time.
+  static final int MAX_PROCESSING_THREADS = 300;
   static final long THREAD_EXPIRATION_TIME_SEC = 60;
-  static final int MAX_THREAD_POOL_QUEUE_SIZE = 100;
+  // Maximum work units retrieved from Windmill and queued before processing. Limiting this delays
+  // retrieving extra work from Windmill without working on it, leading to better
+  // prioritization / utilization.
+  static final int MAX_WORK_UNITS_QUEUED = 100;
   static final long MAX_COMMIT_BYTES = 32 << 20;
   static final int DEFAULT_STATUS_PORT = 8081;
   // Memory threshold over which no new work will be processed.
@@ -155,7 +159,7 @@ public static void main(String[] args) throws Exception {
   private ConcurrentMap<String, ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest>> outputMap;
   private ConcurrentMap<String, ConcurrentLinkedQueue<WorkerAndContext>> mapTaskExecutors;
   private ThreadFactory threadFactory;
-  private BoundedQueueExecutor executor;
+  private BoundedQueueExecutor workUnitExecutor;
   private WindmillServerStub windmillServer;
   private Thread dispatchThread;
   private Thread commitThread;
@@ -183,9 +187,9 @@ public Thread newThread(Runnable r) {
           return t;
         }
       };
-    this.executor = new BoundedQueueExecutor(
-        MAX_THREAD_POOL_SIZE, THREAD_EXPIRATION_TIME_SEC, TimeUnit.SECONDS,
-        MAX_THREAD_POOL_QUEUE_SIZE, threadFactory);
+    this.workUnitExecutor = new BoundedQueueExecutor(
+        MAX_PROCESSING_THREADS, THREAD_EXPIRATION_TIME_SEC, TimeUnit.SECONDS,
+        MAX_WORK_UNITS_QUEUED, threadFactory);
     this.windmillServer = server;
     this.running = new AtomicBoolean();
     this.stateFetcher = new StateFetcher(server);
@@ -226,8 +230,8 @@ public void stop() {
       }
       running.set(false);
       dispatchThread.join();
-      executor.shutdown();
-      if (!executor.awaitTermination(5, TimeUnit.MINUTES)) {
+      workUnitExecutor.shutdown();
+      if (!workUnitExecutor.awaitTermination(5, TimeUnit.MINUTES)) {
         throw new RuntimeException("Process did not terminate within 5 minutes");
       }
       for (ConcurrentLinkedQueue<WorkerAndContext> queue : mapTaskExecutors.values()) {
@@ -317,7 +321,7 @@ private void dispatchLoop() {
         final Instant inputDataWatermark = new Instant(watermarkMicros / 1000);
 
         for (final Windmill.WorkItem work : computationWork.getWorkList()) {
-          executor.execute(new Runnable() {
+          workUnitExecutor.execute(new Runnable() {
               @Override
               public void run() {
                 process(computation, inputDataWatermark, work);
@@ -413,7 +417,7 @@ private void process(
         if (reportFailure(computation, work, t)) {
           // Try again, after some delay and at the end of the queue to avoid a tight loop.
           sleep(10000);
-          executor.forceExecute(
+          workUnitExecutor.forceExecute(
               new Runnable() {
                 @Override
                 public void run() {
@@ -631,10 +635,11 @@ private void printHeader(PrintWriter response) {
 
   private void printMetrics(PrintWriter response) {
     response.println("<h2>Metrics</h2>");
-    response.println("Worker Threads: " + executor.getPoolSize()
-        + "/" + MAX_THREAD_POOL_QUEUE_SIZE + "<br>");
-    response.println("Active Threads: " + executor.getActiveCount() + "<br>");
-    response.println("Work Queue Size: " + executor.getQueue().size() + "<br>");
+    response.println("Worker Threads: " + workUnitExecutor.getPoolSize()
+        + "/" + MAX_PROCESSING_THREADS + "<br>");
+    response.println("Active Threads: " + workUnitExecutor.getActiveCount() + "<br>");
+    response.println("Work Queue Size: " + workUnitExecutor.getQueue().size()
+        + "/" + MAX_WORK_UNITS_QUEUED + "<br>");
     response.println("Commit Queues: <ul>");
     for (Map.Entry<String, ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest>> entry
              : outputMap.entrySet()) {
@@ -645,6 +650,7 @@ private void printMetrics(PrintWriter response) {
       response.println("</li>");
     }
     response.println("</ul>");
+    stateFetcher.printHtml(response);
   }
 
   private void printResources(PrintWriter response) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java
index 2b4e6e289d31f..f1c3f2ae0dd2d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java
@@ -44,7 +44,7 @@ public BoundedQueueExecutor(int maximumPoolSize,
                    TimeUnit unit,
                    int maximumQueueSize,
                    ThreadFactory threadFactory) {
-    super(maximumQueueSize, maximumPoolSize, keepAliveTime, unit,
+    super(maximumPoolSize, maximumPoolSize, keepAliveTime, unit,
         new LinkedBlockingQueue<Runnable>(), threadFactory);
     this.semaphore = new ReducableSemaphore(maximumQueueSize);
     allowCoreThreadTimeOut(true);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
index 01fbfc7b04b3a..e51759ef86f36 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
@@ -36,6 +36,7 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.io.PrintWriter;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
@@ -44,6 +45,7 @@
 import java.util.Objects;
 import java.util.concurrent.Callable;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
 
 /**
  * Class responsible for fetching state from the windmill server.
@@ -53,6 +55,9 @@ public class StateFetcher {
 
   private WindmillServerStub server;
   private Cache<SideInputId, SideInputCacheEntry> sideInputCache;
+  private AtomicInteger tagValueOutstandingFetches;
+  private AtomicInteger tagListOutstandingFetches;
+  private AtomicInteger sideInputOutstandingFetches;
 
   public StateFetcher(WindmillServerStub server) {
     this(server, CacheBuilder
@@ -72,6 +77,9 @@ public StateFetcher(
       WindmillServerStub server, Cache<SideInputId, SideInputCacheEntry> sideInputCache) {
     this.server = server;
     this.sideInputCache = sideInputCache;
+    this.tagValueOutstandingFetches = new AtomicInteger();
+    this.tagListOutstandingFetches = new AtomicInteger();
+    this.sideInputOutstandingFetches = new AtomicInteger();
   }
 
   public Map<CodedTupleTag<?>, Optional<?>> fetch(
@@ -95,7 +103,13 @@ public Map<CodedTupleTag<?>, Optional<?>> fetch(
     }
 
     Map<CodedTupleTag<?>, Optional<?>> resultMap = new HashMap<>();
-    Windmill.KeyedGetDataResponse keyResponse = getResponse(computation, key, requestBuilder);
+    tagValueOutstandingFetches.getAndIncrement();
+    Windmill.KeyedGetDataResponse keyResponse;
+    try {
+      keyResponse = getResponse(computation, key, requestBuilder);
+    } finally {
+      tagValueOutstandingFetches.getAndDecrement();
+    }
 
     for (Windmill.TagValue tv : keyResponse.getValuesList()) {
       CodedTupleTag<?> tag = tagMap.get(tv.getTag());
@@ -142,7 +156,13 @@ public Map<CodedTupleTag<?>, List<?>> fetchList(
     }
 
     Map<CodedTupleTag<?>, List<?>> resultMap = new HashMap<>();
-    Windmill.KeyedGetDataResponse keyResponse = getResponse(computation, key, requestBuilder);
+    tagListOutstandingFetches.getAndIncrement();
+    Windmill.KeyedGetDataResponse keyResponse;
+    try {
+      keyResponse = getResponse(computation, key, requestBuilder);
+    } finally {
+      tagListOutstandingFetches.getAndDecrement();
+    }
     for (Windmill.TagList tagList : keyResponse.getListsList()) {
       CodedTupleTag<?> tag = tagMap.get(tagList.getTag());
       resultMap.put(tag, decodeTagList(tag, tagList));
@@ -238,11 +258,17 @@ public SideInputCacheEntry call() throws Exception {
                          .getMillis()))
                 .build();
 
-        Windmill.GetDataResponse response = server.getData(
-            Windmill.GetDataRequest.newBuilder()
-            .addGlobalDataFetchRequests(request)
-            .addGlobalDataToFetch(request.getDataId())
-            .build());
+        Windmill.GetDataResponse response;
+        sideInputOutstandingFetches.getAndIncrement();
+        try {
+          response = server.getData(
+              Windmill.GetDataRequest.newBuilder()
+              .addGlobalDataFetchRequests(request)
+              .addGlobalDataToFetch(request.getDataId())
+              .build());
+        } finally {
+          sideInputOutstandingFetches.getAndDecrement();
+        }
 
         Windmill.GlobalData data = response.getGlobalData(0);
 
@@ -292,6 +318,13 @@ public SideInputCacheEntry call() throws Exception {
     }
   }
 
+  public void printHtml(PrintWriter writer) {
+    writer.println("Active fetches:");
+    writer.println(" Values: " + tagValueOutstandingFetches.get());
+    writer.println(" Lists: " + tagListOutstandingFetches.get());
+    writer.println(" SideInputs: " + sideInputOutstandingFetches.get());
+  }
+
   /**
    * Struct representing a side input for a particular window.
    */

From 0d740ed129e8369f9dc8e879d545547a5cffea48 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 22 Jun 2015 17:34:54 -0700
Subject: [PATCH 0661/1541] Separate ActiveWindowSet and OutputBuffers

OutputBuffers and the associated WatermarkHolds are also updated to
not require reading any state when values are added.

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96627099
---
 .../sdk/transforms/windowing/Trigger.java     |  31 +-
 .../dataflow/sdk/util/AbstractWindowSet.java  | 139 ---------
 .../dataflow/sdk/util/ActiveWindowSet.java    |  68 ++++
 .../dataflow/sdk/util/BufferingWindowSet.java | 223 -------------
 .../cloud/dataflow/sdk/util/CoderUtils.java   |   2 +-
 .../sdk/util/CombiningOutputBuffer.java       | 114 +++++++
 .../dataflow/sdk/util/CombiningWindowSet.java | 177 -----------
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |  11 +-
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  24 +-
 .../dataflow/sdk/util/ListOutputBuffer.java   |  58 ++++
 .../sdk/util/MergingActiveWindowSet.java      | 179 +++++++++++
 .../sdk/util/NonMergingActiveWindowSet.java   |  61 ++++
 .../util/NonMergingBufferingWindowSet.java    | 109 -------
 .../cloud/dataflow/sdk/util/OutputBuffer.java |  79 +++++
 .../util/StreamingGroupAlsoByWindowsDoFn.java |  16 +-
 .../dataflow/sdk/util/TriggerExecutor.java    | 295 ++++++++----------
 .../dataflow/sdk/util/TriggerTester.java      | 112 +++----
 .../dataflow/sdk/util/WatermarkHold.java      | 106 +++++++
 .../cloud/dataflow/sdk/util/WindowUtils.java  |  40 ---
 .../dataflow/sdk/util/WindowingInternals.java |  27 +-
 .../dataflow/sdk/values/CodedTupleTag.java    |   3 +
 .../worker/StreamingDataflowWorkerTest.java   |  86 ++---
 .../transforms/windowing/AfterAllTest.java    |  19 +-
 .../transforms/windowing/AfterEachTest.java   |  16 +-
 .../transforms/windowing/AfterFirstTest.java  |  14 +-
 .../transforms/windowing/RepeatedlyTest.java  |   4 +-
 .../sdk/transforms/windowing/TriggerTest.java |  14 +-
 .../sdk/util/TriggerExecutorTest.java         | 128 ++++++--
 28 files changed, 1059 insertions(+), 1096 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningOutputBuffer.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ListOutputBuffer.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingBufferingWindowSet.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/OutputBuffer.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowUtils.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index fcaaf3030f96f..ea57bab423680 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -103,26 +103,6 @@ public abstract class Trigger<W extends BoundedWindow> implements Serializable {
 
   private static final long serialVersionUID = 0L;
 
-  /**
-   * {@code WindowStatus} indicates the status of the window that an element is being processed in.
-   */
-  public enum WindowStatus {
-    /**
-     * The arrival of this element started a new pane. Either the window is entirely new, or we had
-     * previously fired a trigger that caused us to output the earlier elements.
-     */
-    NEW,
-
-    /** This element was added to a pane that was already being managed. */
-    EXISTING,
-
-    /**
-     * The window set doesn’t track the windows being managed, so it is not known whether the pane
-     * is new. The trigger can track windows on its own if necessary.
-     */
-    UNKNOWN;
-  }
-
   /**
    * {@code TriggerResult} enumerates the possible result a trigger can have when it is executed.
    */
@@ -320,13 +300,11 @@ public static class OnElementEvent<W extends BoundedWindow> {
     private final Object value;
     private final Instant timestamp;
     private final W window;
-    private final WindowStatus status;
 
-    public OnElementEvent(Object value, Instant timestamp, W window, WindowStatus status) {
+    public OnElementEvent(Object value, Instant timestamp, W window) {
       this.value = value;
       this.timestamp = timestamp;
       this.window = window;
-      this.status = status;
     }
 
     /**
@@ -349,13 +327,6 @@ public Instant eventTimestamp() {
     public W window() {
       return window;
     }
-
-    /**
-     * The status of the window to which the element was assigned.
-     */
-    public WindowStatus windowStatus() {
-      return status;
-    }
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
deleted file mode 100644
index ea27deaa79006..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AbstractWindowSet.java
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
-
-import java.io.Serializable;
-import java.util.Collection;
-
-/**
- * Abstract class representing a set of active windows for a key.
- */
-abstract class AbstractWindowSet<K, InputT, OutputT, W extends BoundedWindow> {
-
-  /**
-   * Factory for creating a window set.
-   */
-  public interface Factory<K, InputT, OutputT, W extends BoundedWindow> extends Serializable {
-    public AbstractWindowSet<K, InputT, OutputT, W> create(
-        K key,
-        Coder<W> windowCoder,
-        WindowingInternals.KeyedState keyedState,
-        WindowingInternals<?, ?> windowingInternals) throws Exception;
-  }
-
-  /**
-   * Return the {@code AbstractWindowSet.Factory} that will produce the appropriate kind of window
-   * set for the given windowing strategy.
-   */
-  public static <K, V, W extends BoundedWindow> Factory<K, V, Iterable<V>, W> factoryFor(
-      WindowingStrategy<?, W> windowingStrategy, Coder<V> inputCoder) {
-    if (windowingStrategy.getWindowFn().isNonMerging()) {
-      return NonMergingBufferingWindowSet.<K, V, W>factory(inputCoder);
-    } else {
-      return BufferingWindowSet.<K, V, W>factory(inputCoder);
-    }
-  }
-
-  protected final K key;
-  protected final Coder<W> windowCoder;
-  protected final Coder<InputT> inputCoder;
-  protected final WindowingInternals.KeyedState keyedState;
-  protected final WindowingInternals<?, ?> windowingInternals;
-
-  protected AbstractWindowSet(
-      K key,
-      Coder<W> windowCoder,
-      Coder<InputT> inputCoder,
-      WindowingInternals.KeyedState keyedState,
-      WindowingInternals<?, ?> windowingInternals) {
-    this.key = key;
-    this.windowCoder = windowCoder;
-    this.inputCoder = inputCoder;
-    this.keyedState = keyedState;
-    this.windowingInternals = windowingInternals;
-  }
-
-  /**
-   * Returns the set of known windows.
-   */
-  protected abstract Collection<W> windows();
-
-  /**
-   * Returns the final value of the elements in the given window.
-   *
-   * <p> Returns null if the window does not exist in the set.
-   */
-  protected abstract OutputT finalValue(W window) throws Exception;
-
-  /**
-   * Adds the given value in the given window to the set.
-   *
-   * <p> If the window already exists, puts the element into that window.
-   * If not, adds the window to the set first, then puts the element
-   * in the window.
-   */
-  protected abstract WindowStatus put(W window, InputT value) throws Exception;
-
-  /**
-   * Removes the given window from the set.
-   *
-   * <p> Illegal to call if the window does not exist in the set.
-   *
-   * <p> {@code AbstractWindowSet} subclasses may throw
-   * {@link UnsupportedOperationException} if they do not support removing
-   * windows.
-   */
-  protected abstract void remove(W window) throws Exception;
-
-  /**
-   * Instructs this set to merge the windows in toBeMerged into mergeResult.
-   *
-   * <p> {@code toBeMerged} should be a subset of {@link #windows}
-   * and disjoint from the {@code toBeMerged} set of previous calls
-   * to {@code merge}.
-   *
-   * <p> {@code mergeResult} must either not be in {@link #windows} or be in
-   * {@code toBeMerged}.
-   *
-   * <p> {@code AbstractWindowSet} subclasses may throw
-   * {@link UnsupportedOperationException} if they do not support merging windows.
-   */
-  protected abstract void merge(Collection<W> toBeMerged, W mergeResult) throws Exception;
-
-  /**
-   * Returns whether this window set contains the given window.
-   *
-   * <p> {@code AbstractWindowSet} subclasses may throw
-   * {@link UnsupportedOperationException} if they do not support querying
-   * for which windows are active.  If this is the case, callers must ensure
-   * they do not call {@link #finalValue} on non-existent windows.
-   */
-  protected abstract boolean contains(W window);
-
-  /**
-   * Hook for WindowSets to take action before they are deleted.
-   */
-  protected void persist() throws Exception {}
-
-  public K getKey() {
-    return key;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
new file mode 100644
index 0000000000000..15aebedfd91a1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.WindowingInternals.KeyedState;
+
+import java.io.IOException;
+import java.util.Collection;
+
+/**
+ * Tracks the windows that are active.
+ *
+ * @param <W> the types of windows being managed
+ */
+public interface ActiveWindowSet<W extends BoundedWindow> {
+
+  /**
+   * Callback for {@link #mergeIfAppropriate}.
+   */
+  public interface MergeCallback<W extends BoundedWindow> {
+    void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResultNew) throws Exception;
+  }
+
+  /**
+   * Save any state changes needed.
+   */
+  void persist(KeyedState keyedState) throws IOException;
+
+  /**
+   * Add a window to the {@code ActiveWindowSet}.
+   *
+   * @return false if the window was definitely not-active before being added, true if it either
+   *     was already active, or the implementation doesn't have enough information to know.
+   */
+  boolean add(W window);
+
+  /**
+   * Remove a window from the {@code ActiveWindowSet}.
+   */
+  void remove(W window);
+
+  /**
+   * Invoke {@code merge} on the associated {@code WindowFn}, and return true if the {@code window}
+   * still exists afterwards.
+   */
+  boolean mergeIfAppropriate(W window, MergeCallback<W> mergeCallback) throws Exception;
+
+  /**
+   * Return the set of windows that were merged to produce {@code window}. If the associated
+   * {@code WindowFn} never merges windows, then this should return the singleton list containing
+   * {@code window}.
+   */
+  Iterable<W> sourceWindows(W window);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
deleted file mode 100644
index 07f691035abfd..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferingWindowSet.java
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.cloud.dataflow.sdk.util.WindowUtils.bufferTag;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.MapCoder;
-import com.google.cloud.dataflow.sdk.coders.SetCoder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.common.base.Function;
-import com.google.common.base.Functions;
-import com.google.common.base.Throwables;
-import com.google.common.collect.FluentIterable;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-/**
- * A WindowSet allowing windows to be merged and deleted.
- */
-class BufferingWindowSet<K, V, W extends BoundedWindow>
-    extends AbstractWindowSet<K, V, Iterable<V>, W> {
-
-  public static <K, V, W extends BoundedWindow>
-  AbstractWindowSet.Factory<K, V, Iterable<V>, W> factory(final Coder<V> inputCoder) {
-    return new AbstractWindowSet.Factory<K, V, Iterable<V>, W>() {
-
-      private static final long serialVersionUID = 0L;
-
-      @Override
-      public AbstractWindowSet<K, V, Iterable<V>, W> create(K key,
-          Coder<W> windowCoder, WindowingInternals.KeyedState keyedState,
-          WindowingInternals<?, ?> windowingInternals) throws Exception {
-        return new BufferingWindowSet<>(
-            key, windowCoder, inputCoder, keyedState, windowingInternals);
-      }
-    };
-  }
-
-  /**
-   * Tag for storing the merge tree, the data structure that keeps
-   * track of which windows have been merged together.
-   */
-  private final CodedTupleTag<Map<W, Set<W>>> mergeTreeTag =
-      CodedTupleTag.of("mergeTree", MapCoder.of(windowCoder, SetCoder.of(windowCoder)));
-
-  /**
-   * A map of live windows to windows that were merged into them.
-   *
-   * <p> The keys of the map correspond to the set of (merged) windows and the values
-   * are the no-longer-present windows that were merged into the keys.  A given
-   * window can appear in both the key and value of a single entry, but other at
-   * most once across all keys and values.
-   */
-  private final Map<W, Set<W>> mergeTree;
-
-  /**
-   * Used to determine if writing the mergeTree (which is relatively stable)
-   * is necessary.
-   */
-  private final Map<W, Set<W>> originalMergeTree;
-
-  protected BufferingWindowSet(
-      K key,
-      Coder<W> windowCoder,
-      Coder<V> inputCoder,
-      WindowingInternals.KeyedState keyedState,
-      WindowingInternals<?, ?> windowingInternals) throws Exception {
-    super(key, windowCoder, inputCoder, keyedState, windowingInternals);
-
-    mergeTree = emptyIfNull(
-        keyedState.lookup(Arrays.asList(mergeTreeTag))
-        .get(mergeTreeTag));
-
-    originalMergeTree = deepCopy(mergeTree);
-  }
-
-  @Override
-  public WindowStatus put(W window, V value) throws Exception {
-    windowingInternals.writeToTagList(bufferTag(window, windowCoder, inputCoder), value);
-
-    if (!mergeTree.containsKey(window)) {
-      mergeTree.put(window, new HashSet<W>());
-      return WindowStatus.NEW;
-    } else {
-      return WindowStatus.EXISTING;
-    }
-  }
-
-  @Override
-  public void remove(W window) throws Exception {
-    Set<W> subWindows = mergeTree.get(window);
-    if (subWindows == null) {
-      return;
-    }
-
-    for (W w : subWindows) {
-      windowingInternals.deleteTagList(bufferTag(w, windowCoder, inputCoder));
-    }
-    windowingInternals.deleteTagList(bufferTag(window, windowCoder, inputCoder));
-    mergeTree.remove(window);
-  }
-
-  @Override
-  public void merge(Collection<W> otherWindows, W newWindow) throws Exception {
-    Set<W> subWindows = mergeTree.get(newWindow);
-    if (subWindows == null) {
-      subWindows = new HashSet<>();
-    }
-    for (W other : otherWindows) {
-      if (!mergeTree.containsKey(other)) {
-        throw new IllegalArgumentException("Tried to merge a non-existent window: " + other);
-      }
-      subWindows.addAll(mergeTree.get(other));
-      subWindows.add(other);
-      mergeTree.remove(other);
-    }
-    mergeTree.put(newWindow, subWindows);
-  }
-
-  @Override
-  public Collection<W> windows() {
-    return Collections.unmodifiableSet(mergeTree.keySet());
-  }
-
-  @Override
-  public boolean contains(W window) {
-    return mergeTree.containsKey(window);
-  }
-
-  @Override
-  protected Iterable<V> finalValue(W window) throws Exception {
-    if (!contains(window)) {
-      return null;
-    }
-
-    // This is the set of windows that we're currently emitting.
-    Set<W> curWindows = new HashSet<>();
-    curWindows.add(window);
-    curWindows.addAll(mergeTree.get(window));
-
-    // This is the set of unflushed windows (for preservation detection).
-    Set<W> otherWindows = new HashSet<>();
-    for (Map.Entry<W, Set<W>> entry : mergeTree.entrySet()) {
-      if (!entry.getKey().equals(window)) {
-        otherWindows.add(entry.getKey());
-        otherWindows.addAll(entry.getValue());
-      }
-    }
-
-    List<CodedTupleTag<V>> bufferTags = FluentIterable.from(curWindows)
-        .transform(new Function<W, CodedTupleTag<V>>() {
-          @Override
-          public CodedTupleTag<V> apply(W input) {
-            try {
-              return bufferTag(input, windowCoder, inputCoder);
-            } catch (IOException e) {
-              throw Throwables.propagate(e);
-            }
-          }
-        })
-        .toList();
-
-    // The FluentIterable#toList creates a copy, so the result will never be mutated.
-    List<V> toEmit = FluentIterable
-        .from(windowingInternals.readTagList(bufferTags).values())
-        .transformAndConcat(Functions.<Iterable<V>>identity())
-        .toList();
-    return toEmit;
-  }
-
-  @Override
-  public void persist() throws Exception {
-    if (!mergeTree.equals(originalMergeTree)) {
-      keyedState.store(mergeTreeTag, mergeTree);
-    }
-  }
-
-  private static <W> Map<W, Set<W>> emptyIfNull(Map<W, Set<W>> input) {
-    if (input == null) {
-      return new HashMap<>();
-    } else {
-      for (Map.Entry<W, Set<W>> entry : input.entrySet()) {
-        if (entry.getValue() == null) {
-          entry.setValue(new HashSet<W>());
-        }
-      }
-      return input;
-    }
-  }
-
-  private Map<W, Set<W>> deepCopy(Map<W, Set<W>> mergeTree) {
-    Map<W, Set<W>> newMergeTree = new HashMap<>();
-    for (Map.Entry<W, Set<W>> entry : mergeTree.entrySet()) {
-      newMergeTree.put(entry.getKey(), new HashSet<W>(entry.getValue()));
-    }
-    return newMergeTree;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
index 73cc02180cdbb..48a00773f0c4e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
@@ -130,7 +130,7 @@ public static <T> String encodeToBase64(Coder<T> coder, T value) throws CoderExc
       throw new RuntimeException("unexpected IOException", e);
     }
     byte[] rawValue = stream.toByteArray();
-    return Base64.encodeBase64String(rawValue);
+    return Base64.encodeBase64URLSafeString(rawValue);
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningOutputBuffer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningOutputBuffer.java
new file mode 100644
index 0000000000000..5e009c1fb6b58
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningOutputBuffer.java
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.common.base.Functions;
+import com.google.common.base.Predicates;
+import com.google.common.collect.FluentIterable;
+import com.google.common.collect.Iterables;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * An {@link OutputBuffer} that uses a {@link KeyedCombineFn} to combine multiple inputs, allowing
+ * it to store a single {@code AccumT} rather than all of the {@code InputT} values.
+ */
+class CombiningOutputBuffer<K, InputT, AccumT, OutputT, W extends BoundedWindow>
+    implements OutputBuffer<K, InputT, OutputT, W> {
+
+  private static final long serialVersionUID = 0L;
+
+  private final KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn;
+  private final CodedTupleTag<AccumT> accumTag;
+
+  private final Map<W, AccumT> inMemoryBuffer = new HashMap<>();
+
+  public static <K, InputT, AccumT, OutputT, W extends BoundedWindow>
+  CombiningOutputBuffer<K, InputT, AccumT, OutputT, W> create(
+      KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn,
+      Coder<K> keyCoder,
+      Coder<InputT> inputCoder) {
+    CoderRegistry coderRegistry = new CoderRegistry();
+    coderRegistry.registerStandardCoders();
+    try {
+      Coder<AccumT> accumCoder = combineFn.getAccumulatorCoder(coderRegistry, keyCoder, inputCoder);
+      return new CombiningOutputBuffer<>(BUFFER_NAME, combineFn, accumCoder);
+    } catch (CannotProvideCoderException e) {
+      throw new RuntimeException("Unable to determine coder for accumulator", e);
+    }
+  }
+
+  public CombiningOutputBuffer(String bufferName,
+      KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn,
+      Coder<AccumT> accumCoder) {
+    this.combineFn = combineFn;
+    this.accumTag = CodedTupleTag.of(bufferName, accumCoder);
+  }
+
+  @Override
+  public void addValue(OutputBuffer.Context<K, W> c, InputT input) throws IOException {
+    // Only write to the in-memory accumulator.
+    AccumT accum = inMemoryBuffer.get(c.window());
+    if (accum == null) {
+      accum = combineFn.createAccumulator(c.key());
+    }
+    inMemoryBuffer.put(c.window(), combineFn.addInput(c.key(), accum, input));
+  }
+
+  @Override
+  public OutputT extract(OutputBuffer.Context<K, W> c) throws IOException {
+    Iterable<AccumT> accums = FluentIterable.from(c.sourceWindows())
+        .transform(Functions.forMap(inMemoryBuffer, null))
+        .filter(Predicates.notNull())
+        .append(c.readBuffers(accumTag, c.sourceWindows()));
+
+    AccumT result = Iterables.isEmpty(accums)
+        ? null : combineFn.mergeAccumulators(c.key(), accums);
+
+    clear(c);
+    inMemoryBuffer.put(c.window(), result);
+
+    return result == null ? null : combineFn.extractOutput(c.key(), result);
+  }
+
+  private void clearSourceWindows(OutputBuffer.Context<K, W> c) throws IOException {
+    c.clearBuffers(accumTag, c.sourceWindows());
+    for (W window : c.sourceWindows()) {
+      inMemoryBuffer.remove(window);
+    }
+  }
+
+  @Override
+  public void clear(OutputBuffer.Context<K, W> c) throws IOException {
+    clearSourceWindows(c);
+  }
+
+  @Override
+  public void flush(OutputBuffer.Context<K, W> c) throws IOException {
+    for (Map.Entry<W, AccumT> entry : inMemoryBuffer.entrySet()) {
+      c.addToBuffer(entry.getKey(), accumTag, entry.getValue());
+    }
+    inMemoryBuffer.clear();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
deleted file mode 100644
index 87bbdea3b6596..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningWindowSet.java
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.cloud.dataflow.sdk.util.WindowUtils.bufferTag;
-
-import com.google.api.client.util.Lists;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Iterators;
-
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-/**
- * A WindowSet for combine accumulators.
- * It merges accumulators when windows are added or merged.
- *
- * @param <K> key type
- * @param <InputT> value input type
- * @param <AccumT> accumulator type
- * @param <OutputT> value output type
- * @param <W> window type
- */
-public class CombiningWindowSet<K, InputT, AccumT, OutputT, W extends BoundedWindow>
-    extends AbstractWindowSet<K, InputT, OutputT, W> {
-
-  public static <K, InputT, AccumT, OutputT, W extends BoundedWindow>
-  AbstractWindowSet.Factory<K, InputT, OutputT, W> factory(
-      final KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn,
-      final Coder<K> keyCoder, final Coder<InputT> inputCoder) {
-    return new AbstractWindowSet.Factory<K, InputT, OutputT, W>() {
-
-      private static final long serialVersionUID = 0L;
-
-      @Override
-      public AbstractWindowSet<K, InputT, OutputT, W> create(K key,
-          Coder<W> windowCoder, WindowingInternals.KeyedState keyedState,
-          WindowingInternals<?, ?> windowingInternals) throws Exception {
-        return new CombiningWindowSet<>(
-            key, windowCoder, combineFn, keyCoder, inputCoder, keyedState, windowingInternals);
-      }
-    };
-  }
-
-  private final CodedTupleTag<Iterable<W>> windowListTag =
-      CodedTupleTag.of("liveWindowsList", IterableCoder.of(windowCoder));
-
-  private final KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn;
-  private final Set<W> liveWindows;
-  private final Coder<AccumT> accumulatorCoder;
-  private boolean liveWindowsModified;
-
-  protected CombiningWindowSet(
-      K key,
-      Coder<W> windowCoder,
-      KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn,
-      Coder<K> keyCoder,
-      Coder<InputT> inputValueCoder,
-      WindowingInternals.KeyedState keyedState,
-      WindowingInternals<?, ?> windowingInternals) throws Exception {
-    super(key, windowCoder, inputValueCoder, keyedState, windowingInternals);
-    this.combineFn = combineFn;
-    liveWindows = new HashSet<W>();
-    Iterators.addAll(liveWindows, emptyIfNull(keyedState.lookup(windowListTag)).iterator());
-    liveWindowsModified = false;
-    // TODO: Use the pipeline's registry once the TODO in GroupByKey is resolved.
-    CoderRegistry coderRegistry = new CoderRegistry();
-    coderRegistry.registerStandardCoders();
-    accumulatorCoder = combineFn.getAccumulatorCoder(coderRegistry, keyCoder, inputValueCoder);
-  }
-
-  @Override
-  protected Collection<W> windows() {
-    return Collections.unmodifiableSet(liveWindows);
-  }
-
-  @Override
-  protected OutputT finalValue(W window) throws Exception {
-    AccumT accumulator = lookupAccumulator(window);
-    if (accumulator == null) {
-      return null;
-    }
-
-    return combineFn.extractOutput(key, accumulator);
-  }
-
-  @Override
-  protected WindowStatus put(W window, InputT value) throws Exception {
-    AccumT accumulator = lookupAccumulator(window);
-    if (accumulator == null) {
-      storeAccumulator(window, combineFn.addInput(key, combineFn.createAccumulator(key), value));
-      return WindowStatus.NEW;
-    } else {
-      storeAccumulator(window, combineFn.addInput(key, accumulator, value));
-      return WindowStatus.EXISTING;
-    }
-  }
-
-  @Override
-  protected void remove(W window) throws Exception {
-    if (contains(window)) {
-      keyedState.remove(accumulatorTag(window));
-      liveWindowsModified = liveWindows.remove(window);
-    }
-  }
-
-  @Override
-  protected void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
-    List<AccumT> accumulators = Lists.newArrayList();
-    for (W window : toBeMerged) {
-      AccumT accumulator = Preconditions.checkNotNull(lookupAccumulator(window));
-      accumulators.add(accumulator);
-      remove(window);
-    }
-    AccumT mergedAccumulator = combineFn.mergeAccumulators(key, accumulators);
-    storeAccumulator(mergeResult, mergedAccumulator);
-  }
-
-  private CodedTupleTag<AccumT> accumulatorTag(W window) throws Exception {
-    // TODO: Cache this.
-    return bufferTag(window, windowCoder, accumulatorCoder);
-  }
-
-  private void storeAccumulator(W window, AccumT accumulator) throws Exception {
-    keyedState.store(accumulatorTag(window), accumulator);
-    liveWindowsModified = liveWindows.add(window);
-  }
-
-  private AccumT lookupAccumulator(W window) throws Exception {
-    return keyedState.lookup(accumulatorTag(window));
-  }
-
-  @Override
-  protected boolean contains(W window) {
-    return liveWindows.contains(window);
-  }
-
-  private static <T> Iterable<T> emptyIfNull(Iterable<T> list) {
-    if (list == null) {
-      return Collections.emptyList();
-    } else {
-      return list;
-    }
-  }
-
-  @Override
-  protected void persist() throws Exception {
-    if (liveWindowsModified) {
-      keyedState.store(windowListTag, liveWindows);
-      liveWindowsModified = false;
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 2e824563e311b..a07ad3222499c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -531,6 +531,12 @@ public <T> void writeToTagList(CodedTupleTag<T> tag, T value)
           context.stepContext.writeToTagList(tag, value);
         }
 
+        @Override
+        public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp)
+            throws IOException {
+          context.stepContext.writeToTagList(tag, value, timestamp);
+        }
+
         @Override
         public <T> void deleteTagList(CodedTupleTag<T> tag) {
           context.stepContext.deleteTagList(tag);
@@ -570,11 +576,6 @@ public <T> void writePCollectionViewData(
               window(), windowCoder);
         }
 
-        @Override
-        public <T> void store(CodedTupleTag<T> tag, T value, Instant timestamp) throws IOException {
-          context.stepContext.store(tag, value, timestamp);
-        }
-
         @Override
         public WindowingInternals.KeyedState keyedState() {
           return context.stepContext;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index f7789176822aa..2fd5a5afe44d2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -55,8 +55,8 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
       return new GroupAlsoByWindowsViaIteratorsDoFn<K, V, W>();
     }
 
-    return new GABWViaWindowSetDoFn<>(
-        windowingStrategy, AbstractWindowSet.<K, V, W>factoryFor(windowingStrategy, inputCoder));
+    return new GABWViaOutputBufferDoFn<>(
+        windowingStrategy, new ListOutputBuffer<K, V, W>(inputCoder));
   }
 
   /**
@@ -70,23 +70,23 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
       final Coder<K> keyCoder,
       final Coder<InputT> inputCoder) {
     Preconditions.checkNotNull(combineFn);
-    return new GABWViaWindowSetDoFn<>(
-        windowingStrategy, CombiningWindowSet.<K, InputT, AccumT, OutputT, W>factory(
+    return new GABWViaOutputBufferDoFn<>(windowingStrategy,
+        CombiningOutputBuffer.<K, InputT, AccumT, OutputT, W>create(
             combineFn, keyCoder, inputCoder));
   }
 
-  private static class GABWViaWindowSetDoFn<K, InputT, OutputT, W extends BoundedWindow>
+  private static class GABWViaOutputBufferDoFn<K, InputT, OutputT, W extends BoundedWindow>
      extends GroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
 
-    private AbstractWindowSet.Factory<K, InputT, OutputT, W> windowSetFactory;
-    private WindowingStrategy<Object, W> strategy;
+    private final WindowingStrategy<Object, W> strategy;
+    private final OutputBuffer<K, InputT, OutputT, W> outputBuffer;
 
-    public GABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
-        AbstractWindowSet.Factory<K, InputT, OutputT, W> factory) {
+    public GABWViaOutputBufferDoFn(WindowingStrategy<?, W> windowingStrategy,
+        OutputBuffer<K, InputT, OutputT, W> outputBuffer) {
       @SuppressWarnings("unchecked")
       WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
       this.strategy = noWildcard;
-      this.windowSetFactory = factory;
+      this.outputBuffer = outputBuffer;
     }
 
     @Override
@@ -97,7 +97,7 @@ public void processElement(
       K key = c.element().getKey();
       BatchTimerManager timerManager = new BatchTimerManager(Instant.now());
       TriggerExecutor<K, InputT, OutputT, W> triggerExecutor = TriggerExecutor.create(
-          key, strategy, timerManager, windowSetFactory, c.windowingInternals());
+          key, strategy, timerManager, outputBuffer, c.windowingInternals());
 
       for (WindowedValue<InputT> e : c.element().getValue()) {
         // First, handle anything that needs to happen for this element
@@ -120,7 +120,7 @@ public void processElement(
       // Finally, advance the processing time to infinity to fire any timers.
       timerManager.advanceProcessingTime(triggerExecutor, new Instant(Long.MAX_VALUE));
 
-      triggerExecutor.persistWindowSet();
+      triggerExecutor.persist();
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ListOutputBuffer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ListOutputBuffer.java
new file mode 100644
index 0000000000000..ccd15088ad7ed
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ListOutputBuffer.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.common.collect.Iterables;
+
+import java.io.IOException;
+
+/**
+ * {code OutputBuffer} that buffers input values emitting all added values as an Iterable.
+ */
+class ListOutputBuffer<K, T, W extends BoundedWindow>
+    implements OutputBuffer<K, T, Iterable<T>, W> {
+
+  private static final long serialVersionUID = 0L;
+
+  private CodedTupleTag<T> bufferTag;
+
+  public ListOutputBuffer(Coder<T> itemCoder) {
+    bufferTag = CodedTupleTag.of(BUFFER_NAME, itemCoder);
+  }
+
+  @Override
+  public void addValue(Context<K, W> c, T input) throws IOException {
+    c.addToBuffer(c.window(), bufferTag, input);
+  }
+
+  @Override
+  public Iterable<T> extract(Context<K, W> c) throws IOException {
+    Iterable<T> result = c.readBuffers(bufferTag, c.sourceWindows());
+    return Iterables.isEmpty(result) ? null : result;
+  }
+
+  @Override
+  public void clear(Context<K, W> c) throws IOException {
+    c.clearBuffers(bufferTag, c.sourceWindows());
+  }
+
+  @Override
+  public void flush(Context<K, W> c) throws IOException {
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
new file mode 100644
index 0000000000000..8ebb7f9391962
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
@@ -0,0 +1,179 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.MapCoder;
+import com.google.cloud.dataflow.sdk.coders.SetCoder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.WindowingInternals.KeyedState;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Implementation of {@link ActiveWindowSet} used with {@link WindowFn WindowFns} that support
+ * merging.
+ *
+ * @param <W> the types of windows being managed
+ */
+public class MergingActiveWindowSet<W extends BoundedWindow>
+    implements ActiveWindowSet<W> {
+
+  private final WindowFn<Object, W> windowFn;
+
+  /**
+   * Tag for storing the merge tree, the data structure that keeps
+   * track of which windows have been merged together.
+   */
+  private final CodedTupleTag<Map<W, Set<W>>> mergeTreeTag;
+
+  /**
+   * A map of live windows to windows that were merged into them.
+   *
+   * <p> The keys of the map correspond to the set of (merged) windows and the values
+   * are the no-longer-present windows that were merged into the keys.  A given
+   * window can appear in both the key and value of a single entry, but other at
+   * most once across all keys and values.
+   */
+  private final Map<W, Set<W>> mergeTree;
+
+  /**
+   * Used to determine if writing the mergeTree (which is relatively stable)
+   * is necessary.
+   */
+  private final Map<W, Set<W>> originalMergeTree;
+
+
+  public MergingActiveWindowSet(WindowFn<Object, W> windowFn, KeyedState keyedState)
+      throws IOException {
+    this.windowFn = windowFn;
+    mergeTreeTag = CodedTupleTag.of(
+        "mergeTree", MapCoder.of(windowFn.windowCoder(), SetCoder.of(windowFn.windowCoder())));
+
+    mergeTree = emptyIfNull(keyedState.lookup(Arrays.asList(mergeTreeTag))
+        .get(mergeTreeTag));
+
+    originalMergeTree = deepCopy(mergeTree);
+  }
+
+  @Override
+  public void persist(KeyedState keyedState) throws IOException {
+    if (!mergeTree.equals(originalMergeTree)) {
+      keyedState.store(mergeTreeTag, mergeTree);
+    }
+  }
+
+  @Override
+  public boolean add(W window) {
+    if (mergeTree.containsKey(window)) {
+      return false;
+    }
+
+    mergeTree.put(window, new HashSet<W>());
+    return true;
+  }
+
+  @Override
+  public void remove(W window) {
+    mergeTree.remove(window);
+  }
+
+  private class MergeContextImpl extends WindowFn<Object, W>.MergeContext {
+
+    private MergeCallback<W> mergeCallback;
+
+    public MergeContextImpl(MergeCallback<W> mergeCallback) {
+      windowFn.super();
+      this.mergeCallback = mergeCallback;
+    }
+
+    @Override
+    public Collection<W> windows() {
+      return mergeTree.keySet();
+    }
+
+    @Override
+    public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
+      boolean isResultNew = !mergeTree.containsKey(mergeResult);
+      recordMerge(toBeMerged, mergeResult);
+      mergeCallback.onMerge(toBeMerged, mergeResult, isResultNew);
+    }
+  }
+
+  @Override
+  public boolean mergeIfAppropriate(W window, MergeCallback<W> mergeCallback) throws Exception {
+    windowFn.mergeWindows(new MergeContextImpl(mergeCallback));
+    return window == null || mergeTree.containsKey(window);
+  }
+
+  @Override
+  public Iterable<W> sourceWindows(W window) {
+    Set<W> curWindows = new HashSet<>();
+    curWindows.add(window);
+
+    Set<W> sourceWindows = mergeTree.get(window);
+    if (sourceWindows != null) {
+      curWindows.addAll(sourceWindows);
+    }
+    return curWindows;
+  }
+
+  private void recordMerge(Collection<W> otherWindows, W newWindow) throws Exception {
+    Set<W> subWindows = mergeTree.get(newWindow);
+    if (subWindows == null) {
+      subWindows = new HashSet<>();
+    }
+
+    for (W other : otherWindows) {
+      if (!mergeTree.containsKey(other)) {
+        throw new IllegalArgumentException("Tried to merge a non-existent window: " + other);
+      }
+      subWindows.addAll(mergeTree.get(other));
+      subWindows.add(other);
+      mergeTree.remove(other);
+    }
+    mergeTree.put(newWindow, subWindows);
+  }
+
+
+  private static <W> Map<W, Set<W>> emptyIfNull(Map<W, Set<W>> input) {
+    if (input == null) {
+      return new HashMap<>();
+    } else {
+      for (Map.Entry<W, Set<W>> entry : input.entrySet()) {
+        if (entry.getValue() == null) {
+          entry.setValue(new HashSet<W>());
+        }
+      }
+      return input;
+    }
+  }
+
+  private Map<W, Set<W>> deepCopy(Map<W, Set<W>> mergeTree) {
+    Map<W, Set<W>> newMergeTree = new HashMap<>();
+    for (Map.Entry<W, Set<W>> entry : mergeTree.entrySet()) {
+      newMergeTree.put(entry.getKey(), new HashSet<W>(entry.getValue()));
+    }
+    return newMergeTree;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java
new file mode 100644
index 0000000000000..bac4d9861ca0c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.WindowingInternals.KeyedState;
+
+import java.io.IOException;
+import java.util.Collections;
+
+/**
+ * Implementation of {@link ActiveWindowSet} used with {@link WindowFn WindowFns} that don't support
+ * merging.
+ *
+ * @param <W> the types of windows being managed
+ */
+public class NonMergingActiveWindowSet<W extends BoundedWindow>
+    implements ActiveWindowSet<W> {
+
+  @Override
+  public void persist(KeyedState keyedState) throws IOException {
+    // Nothing to persist.
+  }
+
+  @Override
+  public boolean add(W window) {
+    // We don't track anything, so we cannot determine if the window is new or not.
+    return true;
+  }
+
+  @Override
+  public void remove(W window) {}
+
+  @Override
+  public boolean mergeIfAppropriate(W window, MergeCallback<W> reduceFnRunner)
+      throws Exception {
+    // We never merge, so there is nothing to do here.
+    // The window (which existed before the merge) must still exist after the merge.
+    return true;
+  }
+
+  @Override
+  public Iterable<W> sourceWindows(W window) {
+    // There is no merging, so the only source window is the window itself.
+    return Collections.singleton(window);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingBufferingWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingBufferingWindowSet.java
deleted file mode 100644
index d525021821a35..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingBufferingWindowSet.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.cloud.dataflow.sdk.util.WindowUtils.bufferTag;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-
-/**
- * A WindowSet where windows are never merged or deleted. This allows us to improve upon the default
- * {@link BufferingWindowSet} by not maintaining a merge tree (or the list of active windows at all)
- * and by blindly using tag lists to store elements.
- */
-class NonMergingBufferingWindowSet<K, V, W extends BoundedWindow>
-    extends AbstractWindowSet<K, V, Iterable<V>, W> {
-
-  public static <K, V, W extends BoundedWindow>
-  AbstractWindowSet.Factory<K, V, Iterable<V>, W> factory(final Coder<V> inputCoder) {
-    return new AbstractWindowSet.Factory<K, V, Iterable<V>, W>() {
-
-      private static final long serialVersionUID = 0L;
-
-      @Override
-      public AbstractWindowSet<K, V, Iterable<V>, W> create(K key,
-          Coder<W> windowFn, WindowingInternals.KeyedState keyedState,
-          WindowingInternals<?, ?> windowingInternals) throws Exception {
-        return new NonMergingBufferingWindowSet<>(
-            key, windowFn, inputCoder, keyedState, windowingInternals);
-      }
-    };
-  }
-
-  private NonMergingBufferingWindowSet(
-      K key,
-      Coder<W> windowCoder,
-      Coder<V> inputCoder,
-      WindowingInternals.KeyedState keyedState,
-      WindowingInternals<?, ?> windowingInternals) {
-    super(key, windowCoder, inputCoder, keyedState, windowingInternals);
-  }
-
-  @Override
-  public WindowStatus put(W window, V value) throws Exception {
-    windowingInternals.writeToTagList(bufferTag(window, windowCoder, inputCoder), value);
-
-    // Adds the window even if it is already present, relying on the streaming backend to
-    // de-duplicate. As such, we don't know if this was a genuinely new window.
-    return WindowStatus.UNKNOWN;
-  }
-
-  @Override
-  public void remove(W window) throws Exception {
-    windowingInternals.deleteTagList(bufferTag(window, windowCoder, inputCoder));
-  }
-
-  @Override
-  public void merge(Collection<W> otherWindows, W newWindow) {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public Collection<W> windows() {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public boolean contains(W window) {
-    throw new UnsupportedOperationException(
-        "NonMergingBufferingWindowSet does not supporting reading the active window set.");
-  }
-
-  @Override
-  protected Iterable<V> finalValue(W window) throws Exception {
-    CodedTupleTag<V> tag = bufferTag(window, windowCoder, inputCoder);
-    Iterable<V> result = windowingInternals.readTagList(tag);
-    if (result == null) {
-      return null;
-    }
-
-    // Create a copy here, since otherwise we may return the same list object from readTagList, and
-    // that may be mutated later, which would lead to mutation of output values.
-    List<V> copy = new ArrayList<>();
-    for (V item : result) {
-      copy.add(item);
-    }
-    return copy;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/OutputBuffer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/OutputBuffer.java
new file mode 100644
index 0000000000000..4030191f6a08d
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/OutputBuffer.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.io.Serializable;
+
+/**
+ * Interface for buffering outputs.
+ *
+ * @param <K> The type of key being held by the buffer.
+ * @param <InputT> The type of values added to the buffer.
+ * @param <OutputT> The type of values extracted from the buffer.
+ * @param <W> The type of windows being buffered.
+ */
+public interface OutputBuffer<K, InputT, OutputT, W extends BoundedWindow>
+    extends Serializable {
+
+  public static final String BUFFER_NAME = "__buffer";
+
+  /**
+   * Context methods necessary for implementing {@code OutputBuffer}s.
+   *
+   * <p> Provides access to underlying (runner specific) storage for buffering values.
+   *
+   * @param <K> The type of key being held by the buffer.
+   * @param <W> The type of windows being buffered.
+   */
+  public interface Context<K, W extends BoundedWindow> {
+    K key();
+    W window();
+    Iterable<W> sourceWindows();
+
+    <T> void addToBuffer(
+        W window, CodedTupleTag<T> buffer, T value) throws IOException;
+    <T> void addToBuffer(
+        W window, CodedTupleTag<T> buffer, T value, Instant timestamp) throws IOException;
+    void clearBuffers(CodedTupleTag<?> buffer, Iterable<W> windows) throws IOException;
+    <T> Iterable<T> readBuffers(CodedTupleTag<T> buffer, Iterable<W> windows) throws IOException;
+  }
+
+  /**
+   * Add a value to this buffer.
+   */
+  void addValue(Context<K, W> c, InputT input) throws IOException;
+
+  /**
+   * Extract the output value from the {@code OutputBuffer}.
+   */
+  OutputT extract(Context<K, W> c) throws IOException;
+
+  /**
+   * Clear the contents of the output buffer.
+   */
+  void clear(Context<K, W> c) throws IOException;
+
+  /**
+   * Flush any buffered state.
+   */
+  void flush(Context<K, W> c) throws IOException;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index f596af6e486d8..18822fc400757 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -20,7 +20,6 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.AbstractWindowSet.Factory;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
 
@@ -44,7 +43,7 @@ StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> create(
           final Coder<InputT> inputValueCoder) {
     Preconditions.checkNotNull(combineFn);
     return new StreamingGABWViaWindowSetDoFn<>(windowingStrategy,
-        CombiningWindowSet.<K, InputT, AccumT, OutputT, W>factory(
+        CombiningOutputBuffer.<K, InputT, AccumT, OutputT, W>create(
             combineFn, keyCoder, inputValueCoder));
   }
 
@@ -53,29 +52,30 @@ StreamingGroupAlsoByWindowsDoFn<K, V, Iterable<V>, W> createForIterable(
       final WindowingStrategy<?, W> windowingStrategy,
       final Coder<V> inputCoder) {
     return new StreamingGABWViaWindowSetDoFn<>(
-        windowingStrategy, AbstractWindowSet.<K, V, W>factoryFor(windowingStrategy, inputCoder));
+        windowingStrategy, new ListOutputBuffer<K, V, W>(inputCoder));
   }
 
   private static class StreamingGABWViaWindowSetDoFn<K, InputT, OutputT, W extends BoundedWindow>
   extends StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
-    private final Factory<K, InputT, OutputT, W> windowSetFactory;
+
     private final WindowingStrategy<Object, W> windowingStrategy;
+    private final OutputBuffer<K, InputT, OutputT, W> outputBuffer;
 
     private TriggerExecutor<K, InputT, OutputT, W> executor;
 
     public StreamingGABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
-        AbstractWindowSet.Factory<K, InputT, OutputT, W> windowSetFactory) {
-      this.windowSetFactory = windowSetFactory;
+        OutputBuffer<K, InputT, OutputT, W> outputBuffer) {
       @SuppressWarnings("unchecked")
       WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
       this.windowingStrategy = noWildcard;
+      this.outputBuffer = outputBuffer;
     }
 
     private void initForKey(ProcessContext c, K key) throws Exception{
       if (executor == null) {
         TimerManager timerManager = c.windowingInternals().getTimerManager();
         executor = TriggerExecutor.create(
-          key, windowingStrategy, timerManager, windowSetFactory, c.windowingInternals());
+          key, windowingStrategy, timerManager, outputBuffer, c.windowingInternals());
       }
     }
 
@@ -99,7 +99,7 @@ public void finishBundle(Context c) throws Exception {
       if (executor != null) {
         // Merge before finishing the bundle in case it causes triggers to fire.
         executor.merge();
-        executor.persistWindowSet();
+        executor.persist();
       }
 
       // Prepare this DoFn for reuse.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index ac726082f818d..020e61173493c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -20,7 +20,6 @@
 import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.coders.StandardCoder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
@@ -32,8 +31,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.WindowStatus;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.ActiveWindowSet.MergeCallback;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
@@ -78,53 +77,107 @@ public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
 
   private final WindowFn<Object, W> windowFn;
   private final ExecutableTrigger<W> trigger;
-  private AccumulationMode mode;
-  private Duration allowedLateness;
+  private final AccumulationMode mode;
+  private final Duration allowedLateness;
 
   private final WindowingInternals<?, KV<K, OutputT>> windowingInternals;
-  private final AbstractWindowSet<K, InputT, OutputT, W> windowSet;
   private final TimerManager timerManager;
   private final WindowingInternals.KeyedState keyedState;
-  private final MergeContext mergeContext;
   private final Coder<TriggerId<W>> triggerIdCoder;
-  private final WatermarkHold watermarkHold;
+  private final ActiveWindowSet<W> activeWindows;
+  private final OutputBuffer<K, InputT, OutputT, W> outputBuffer;
 
-  TriggerExecutor(
+  private final WatermarkHold<W> watermarkHolder;
+
+  private K key;
+
+
+  TriggerExecutor(K key,
       WindowFn<Object, W> windowFn,
       TimerManager timerManager,
       ExecutableTrigger<W> trigger,
       WindowingInternals.KeyedState keyedState,
       WindowingInternals<?, KV<K, OutputT>> windowingInternals,
-      AbstractWindowSet<K, InputT, OutputT, W> windowSet,
       AccumulationMode mode,
-      Duration allowedLateness) {
+      Duration allowedLateness,
+      ActiveWindowSet<W> activeWindows,
+      OutputBuffer<K, InputT, OutputT, W> outputBuffer) {
+    this.key = key;
     this.windowFn = windowFn;
     this.trigger = trigger;
     this.keyedState = keyedState;
     this.windowingInternals = windowingInternals;
-    this.windowSet = windowSet;
+    this.allowedLateness = allowedLateness;
+    this.activeWindows = activeWindows;
+    this.outputBuffer = outputBuffer;
+    this.watermarkHolder = new WatermarkHold<W>(allowedLateness);
     this.timerManager = timerManager;
     this.mode = mode;
-    this.mergeContext = new MergeContext();
     this.triggerIdCoder = new TriggerIdCoder<>(windowFn.windowCoder());
-    this.watermarkHold = new WatermarkHold();
-    this.allowedLateness = allowedLateness;
   }
 
   private boolean isRootFinished(BitSet bitSet) {
     return bitSet.get(0);
   }
 
-  public CodedTupleTag<BitSet> finishedSetTag(W window) throws CoderException {
-    return CodedTupleTag.of(
-        CoderUtils.encodeToBase64(windowFn.windowCoder(), window) + "finished-set",
-        BitSetCoder.of());
+  private OutputBuffer.Context<K, W> bufferContext(final W window) {
+    return new OutputBuffer.Context<K, W>() {
+      @Override
+      public K key() {
+        return key;
+      }
+
+      @Override
+      public W window() {
+        return window;
+      }
+
+      @Override
+      public Iterable<W> sourceWindows() {
+        return activeWindows.sourceWindows(window);
+      }
+
+      private <T> CodedTupleTag<T> windowedTag(W window, CodedTupleTag<T> tag)
+          throws CoderException {
+        return CodedTupleTag.of(
+            CoderUtils.encodeToBase64(windowFn.windowCoder(), window) + "/" + tag.getId(),
+            tag.getCoder());
+      }
+
+      @Override
+      public <T> void addToBuffer(W window, CodedTupleTag<T> buffer, T value) throws IOException {
+        windowingInternals.writeToTagList(windowedTag(window, buffer), value);
+      }
+
+      @Override
+      public <T> void addToBuffer(W window, CodedTupleTag<T> buffer, T value, Instant timestamp)
+          throws IOException {
+        windowingInternals.writeToTagList(windowedTag(window, buffer), value, timestamp);
+      }
+
+      @Override
+      public void clearBuffers(CodedTupleTag<?> buffer, Iterable<W> windows) throws IOException {
+        for (W window : windows) {
+          windowingInternals.deleteTagList(windowedTag(window, buffer));
+        }
+      }
+
+      @Override
+      public <T> Iterable<T> readBuffers(CodedTupleTag<T> buffer, Iterable<W> windows)
+          throws IOException {
+        List<CodedTupleTag<T>> tags = new ArrayList<>();
+        for (W window : windows) {
+          tags.add(windowedTag(window, buffer));
+        }
+        return Iterables.concat(windowingInternals.readTagList(tags).values());
+      }
+    };
   }
 
-  public CodedTupleTag<Instant> earliestElementTag(W window) throws CoderException {
+  public CodedTupleTag<BitSet> finishedSetTag(W window) throws CoderException {
     return CodedTupleTag.of(
-        CoderUtils.encodeToBase64(windowFn.windowCoder(), window) + "earliest-element",
-        InstantCoder.of());
+        CoderUtils.encodeToBase64(windowFn.windowCoder(), window) + "/finished-set",
+        BitSetCoder.of());
   }
 
   public static <K, InputT, OutputT, W extends BoundedWindow>
@@ -132,16 +185,17 @@ TriggerExecutor<K, InputT, OutputT, W> create(
       K key,
       WindowingStrategy<Object, W> windowingStrategy,
       TimerManager timerManager,
-      AbstractWindowSet.Factory<K, InputT, OutputT, W> windowSetFactory,
+      OutputBuffer<K, InputT, OutputT, W> outputBuffer,
       WindowingInternals<?, KV<K, OutputT>> windowingInternals)
           throws Exception {
-    AbstractWindowSet<K, InputT, OutputT, W> windowSet = windowSetFactory.create(
-        key, windowingStrategy.getWindowFn().windowCoder(),
-        windowingInternals.keyedState(), windowingInternals);
-    return new TriggerExecutor<K, InputT, OutputT, W>(
+    ActiveWindowSet<W> activeWindows = windowingStrategy.getWindowFn().isNonMerging()
+        ? new NonMergingActiveWindowSet<W>()
+        : new MergingActiveWindowSet<W>(
+            windowingStrategy.getWindowFn(), windowingInternals.keyedState());
+    return new TriggerExecutor<K, InputT, OutputT, W>(key,
         windowingStrategy.getWindowFn(), timerManager, windowingStrategy.getTrigger(),
-        windowingInternals.keyedState(), windowingInternals, windowSet,
-        windowingStrategy.getMode(), windowingStrategy.getAllowedLateness());
+        windowingInternals.keyedState(), windowingInternals, windowingStrategy.getMode(),
+        windowingStrategy.getAllowedLateness(), activeWindows, outputBuffer);
   }
 
   private TriggerContext<W> context(BitSet finishedSet) {
@@ -163,11 +217,14 @@ private TriggerContext<W> context(BitSet finishedSet) {
    * Issue a load for all the keyed state tags that we know we need for the given windows.
    */
   private void warmUpCache(Iterable<W> windows) throws IOException {
+    if ((trigger.getSpec() instanceof DefaultTrigger)) {
+      return;
+    }
+
     // Prepare the cache by loading keyed state for all the given windows.
     Set<CodedTupleTag<?>> tags = new HashSet<>();
     for (W window : windows) {
       tags.add(finishedSetTag(window));
-      tags.add(earliestElementTag(window));
     }
     keyedState.lookup(tags);
   }
@@ -196,23 +253,15 @@ public void onElement(WindowedValue<InputT> value) throws Exception {
         continue;
       }
 
-      WindowStatus status = windowSet.put(window, value.getValue());
-      if (status != WindowStatus.EXISTING) {
-        // Set the timer for final cleanup. We add an extra millisecond since
-        // maxTimestamp will be the maximum timestamp in the window, and we
-        // want the maximum timestamp of an element outside the window.
-        Instant cleanupTime = window.maxTimestamp()
-            .plus(allowedLateness)
-            .plus(Duration.millis(1));
-        setTimer(cleanupTimer(window), cleanupTime, TimeDomain.EVENT_TIME);
+      if (activeWindows.add(window)) {
+        scheduleCleanup(window);
       }
-
-      watermarkHold.updateHoldForElement(window, value.getTimestamp(),
-          value.getTimestamp().isBefore(timerManager.currentWatermarkTime()));
+      outputBuffer.addValue(bufferContext(window), value.getValue());
+      watermarkHolder.addHold(bufferContext(window), value.getTimestamp(),
+          timerManager.currentWatermarkTime().isAfter(value.getTimestamp()));
 
       BitSet originalFinishedSet = (BitSet) finishedSet.clone();
-      OnElementEvent<W> e =
-          new OnElementEvent<W>(value.getValue(), value.getTimestamp(), window, status);
+      OnElementEvent<W> e = new OnElementEvent<W>(value.getValue(), value.getTimestamp(), window);
 
       // Update the trigger state as appropriate for the arrival of the element.
       // Must come before merge so the state is updated (for merging).
@@ -232,21 +281,29 @@ public void onElement(WindowedValue<InputT> value) throws Exception {
     }
   }
 
+  private void scheduleCleanup(W window) throws CoderException {
+    // Set the timer for final cleanup. We add an extra millisecond since
+    // maxTimestamp will be the maximum timestamp in the window, and we
+    // want the maximum timestamp of an element outside the window.
+    Instant cleanupTime = window.maxTimestamp()
+        .plus(allowedLateness)
+        .plus(Duration.millis(1));
+    setTimer(cleanupTimer(window), cleanupTime, TimeDomain.EVENT_TIME);
+  }
+
   public void onTimer(String timerTag) throws Exception {
     TriggerId<W> triggerId = CoderUtils.decodeFromBase64(triggerIdCoder, timerTag);
     W window = triggerId.window();
     BitSet finishedSet = lookupFinishedSet(window);
 
     if (triggerId.getTriggerIdx() == FINAL_CLEANUP_PSEUDO_ID) {
-      // If there are pending elements in the pane, emit it:
-      if (watermarkHold.holdingForElements(window)) {
-        if (mergeIfAppropriate(window)) {
-          emitWindow(window);
-        }
+      if (mergeIfAppropriate(window)) {
+        emitWindow(window);
+        outputBuffer.clear(bufferContext(window));
       }
 
       // Perform final cleanup.
-      windowSet.remove(window);
+      activeWindows.remove(window);
       trigger.invokeClear(context(finishedSet), window);
       keyedState.remove(finishedSetTag(window));
       return;
@@ -285,8 +342,10 @@ private OnMergeEvent<W> createMergeEvent(Collection<W> toBeMerged, W resultWindo
     return new OnMergeEvent<W>(toBeMerged, resultWindow, finishedSets.build());
   }
 
-  public void persistWindowSet() throws Exception {
-    windowSet.persist();
+  public void persist() throws Exception {
+    activeWindows.persist(keyedState);
+    outputBuffer.flush(bufferContext(null));
+    watermarkHolder.flush(bufferContext(null));
   }
 
   private void onMerge(Collection<W> toBeMerged, W resultWindow) throws Exception {
@@ -300,8 +359,6 @@ private void onMerge(Collection<W> toBeMerged, W resultWindow) throws Exception
       throw new IllegalStateException("Root trigger returned MergeResult.ALREADY_FINISHED.");
     }
 
-    watermarkHold.updateHoldForMerge(toBeMerged, resultWindow);
-
     // Commit the updated states
     handleResult(
         trigger, resultWindow, originalFinishedSet, finishedSet, result.getTriggerResult());
@@ -311,7 +368,7 @@ private void onMerge(Collection<W> toBeMerged, W resultWindow) throws Exception
       if (!resultWindow.equals(windowBeingMerged)) {
         trigger.invokeClear(context(lookupFinishedSet(windowBeingMerged)), windowBeingMerged);
         keyedState.remove(finishedSetTag(windowBeingMerged));
-        watermarkHold.clearHold(windowBeingMerged);
+        deleteTimer(cleanupTimer(windowBeingMerged), TimeDomain.EVENT_TIME);
       }
     }
   }
@@ -321,13 +378,17 @@ private void onMerge(Collection<W> toBeMerged, W resultWindow) throws Exception
    * still exists.
    */
   private boolean mergeIfAppropriate(W window) throws Exception {
-    if (windowFn.isNonMerging()) {
-      // These never merge so the window won't disappear.
-      return true;
-    } else {
-      windowFn.mergeWindows(mergeContext);
-      return window != null && windowSet.contains(window);
-    }
+    return activeWindows.mergeIfAppropriate(window, new MergeCallback<W>() {
+      @Override
+      public void onMerge(
+          Collection<W> mergedWindows, W resultWindow, boolean isResultNew) throws Exception {
+        TriggerExecutor.this.onMerge(mergedWindows, resultWindow);
+
+        if (isResultNew) {
+          scheduleCleanup(resultWindow);
+        }
+      }
+    });
   }
 
   public void merge() throws Exception {
@@ -343,8 +404,10 @@ private void handleResult(
 
     if (result.isFinish()
         || (mode == AccumulationMode.DISCARDING_FIRED_PANES && result.isFire())) {
+      outputBuffer.clear(bufferContext(window));
+
       // Remove the window from management (assume it is "done")
-      windowSet.remove(window);
+      activeWindows.remove(window);
     }
 
     // If the trigger is finished, we can clear out its state as long as we keep the
@@ -359,24 +422,17 @@ private void handleResult(
   }
 
   private void emitWindow(W window) throws Exception {
-    if (!watermarkHold.holdingForElements(window)) {
-      // No elements to emit.
-      return;
-    }
-
-    OutputT finalValue = windowSet.finalValue(window);
+    Instant timestamp = watermarkHolder.extractAndRelease(bufferContext(window));
+    OutputT finalValue = outputBuffer.extract(bufferContext(window));
 
     // If there were any contents to output in the window, do so.
     if (finalValue != null) {
       // Emit the (current) final values for the window
-      KV<K, OutputT> value = KV.of(windowSet.getKey(), finalValue);
+      KV<K, OutputT> value = KV.of(key, finalValue);
 
       // Output the windowed value.
-      windowingInternals.outputWindowedValue(
-          value, watermarkHold.timestampToEmit(window), Arrays.asList(window));
+      windowingInternals.outputWindowedValue(value, timestamp, Arrays.asList(window));
     }
-
-    watermarkHold.clearHold(window);
   }
 
   @VisibleForTesting void setTimer(TriggerId<W> triggerId, Instant timestamp, TimeDomain domain)
@@ -389,80 +445,6 @@ private void emitWindow(W window) throws Exception {
     timerManager.deleteTimer(CoderUtils.encodeToBase64(triggerIdCoder, triggerId), domain);
   }
 
-  /**
-   * Manages the time to which the watermark is held, specifically, we hold it to earliest non-late
-   * timestamp as elements arrive.
-   *
-   * <p>When windows merge, {@link #updateHoldForMerge} determines the earliest non-late element
-   * across all those windows.
-   */
-  private class WatermarkHold {
-
-    /**
-     * Return true if there is an active hold for elements in the given window.
-     */
-    public boolean holdingForElements(W window) throws IOException {
-      return keyedState.lookup(earliestElementTag(window)) != null;
-    }
-
-    /**
-     * Determine the timestamp to emit the current values at.
-     */
-    public Instant timestampToEmit(W window) throws IOException {
-      // Normally, output at the earliest non-late element in the pane.
-      // If the pane is empty or all the elements were late, output at window.maxTimestamp().
-      Instant earliest = keyedState.lookup(earliestElementTag(window));
-      return earliest == null || earliest.isAfter(window.maxTimestamp())
-          ? window.maxTimestamp() : earliest;
-    }
-
-    public void updateHoldForElement(
-        W window, Instant timestamp, boolean wasLate) throws IOException {
-      CodedTupleTag<Instant> earliestElementTag = earliestElementTag(window);
-      Instant earliestElement = keyedState.lookup(earliestElementTag);
-
-      if (earliestElement == null && wasLate) {
-        // If the element was late, then we want to put a hold in at the maxTimestamp for the end
-        // of the window plus the allowed lateness to ensure that we don't output something
-        // that is dropably late.
-        earliestElement = window.maxTimestamp().plus(allowedLateness);
-      } else if (earliestElement == null
-          || (!wasLate && timestamp.isBefore(earliestElement))) {
-        earliestElement = timestamp;
-      }
-      windowingInternals.store(earliestElementTag, earliestElement, earliestElement);
-    }
-
-    public void updateHoldForMerge(Iterable<W> mergingWindows, W newWindow) throws IOException {
-      Iterable<Instant> mergingEarliestElements = lookupKeyedState(
-          mergingWindows, new Function<W, CodedTupleTag<Instant>>() {
-        @Override
-        public CodedTupleTag<Instant> apply(W window) {
-          try {
-            return earliestElementTag(window);
-          } catch (CoderException e) {
-            throw Throwables.propagate(e);
-          }
-        }
-      }).values();
-
-      // If any of the merging windows had a hold, we should too.
-      // That hold should be at the earliest hold that any of the merging windows had in place.
-      Instant result = newWindow.maxTimestamp().plus(allowedLateness);
-      for (Instant earliestElement : mergingEarliestElements) {
-        if (earliestElement != null && result.isAfter(earliestElement)) {
-          result = earliestElement;
-        }
-      }
-
-      windowingInternals.store(earliestElementTag(newWindow), result, result);
-    }
-
-    public void clearHold(W window) throws IOException {
-      keyedState.remove(earliestElementTag(window));
-    }
-  }
-
   private <T> Map<W, T> lookupKeyedState(
       Iterable<W> windows, Function<W, CodedTupleTag<T>> tagFn) throws IOException {
     List<CodedTupleTag<T>> tags = new ArrayList<>();
@@ -481,25 +463,6 @@ private <T> Map<W, T> lookupKeyedState(
     return result;
   }
 
-  private class MergeContext extends WindowFn<Object, W>.MergeContext {
-
-    @SuppressWarnings("cast")
-    public MergeContext() {
-      ((WindowFn<Object, W>) windowFn).super();
-    }
-
-    @Override
-    public Collection<W> windows() {
-      return windowSet.windows();
-    }
-
-    @Override
-    public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
-      windowSet.merge(toBeMerged, mergeResult);
-      onMerge(toBeMerged, mergeResult);
-    }
-  }
-
   private class TriggerContextImpl implements TriggerContext<W> {
 
     private final BitSet finishedSet;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 418ff27aa7c49..b7f6121760ba5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -22,7 +22,6 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
@@ -30,6 +29,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.WindowingInternals.KeyedState;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
@@ -39,6 +39,7 @@
 import com.google.common.base.Preconditions;
 import com.google.common.collect.FluentIterable;
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -59,9 +60,9 @@
 import javax.annotation.Nullable;
 
 /**
- * Test utility that runs a {@link WindowFn}, {@link Trigger} and {@link AbstractWindowSet} using
- * in-memory stub implementations to provide the {@link TimerManager}, {@link KeyedState}, and
- * {@link WindowingInternals} needed by {@link TriggerExecutor}.
+ * Test utility that runs a {@link WindowFn}, {@link Trigger} using in-memory stub implementations
+ * to provide the {@link TimerManager}, {@link KeyedState}, and {@link WindowingInternals}
+ * needed by {@link TriggerExecutor}.
  *
  * <p>To have all interactions between the trigger and underlying components logged, call
  * {@link #logInteractions(boolean)}.
@@ -82,7 +83,6 @@ public class TriggerTester<InputT, OutputT, W extends BoundedWindow> {
   private final TriggerExecutor<String, InputT, OutputT, W> triggerExecutor;
   private final WindowFn<Object, W> windowFn;
   private final StubContexts stubContexts;
-  private final AbstractWindowSet<String, InputT, OutputT, W> windowSet;
   private final Coder<OutputT> outputCoder;
 
   private static final String KEY = "TEST_KEY";
@@ -98,18 +98,16 @@ private void logInteraction(String fmt, Object... args) {
   public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>, W> nonCombining(
       WindowFn<?, W> windowFn, Trigger<W> trigger, AccumulationMode mode,
       Duration allowedDataLateness) throws Exception {
-    @SuppressWarnings("unchecked")
-    WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
 
-    WindowingStrategy<?, W> strategy =
-        WindowingStrategy.of(windowFn).withTrigger(trigger).withMode(mode);
-    AbstractWindowSet.Factory<String, Integer, Iterable<Integer>, W> windowSetFactory =
-        AbstractWindowSet.<String, Integer, W>factoryFor(strategy, VarIntCoder.of());
+    WindowingStrategy<?, W> strategy = WindowingStrategy.of(windowFn)
+        .withTrigger(trigger)
+        .withMode(mode)
+        .withAllowedLateness(allowedDataLateness);
 
     return new TriggerTester<Integer, Iterable<Integer>, W>(
-        objectWindowFn, trigger, windowSetFactory, mode,
-        IterableCoder.of(VarIntCoder.of()),
-        allowedDataLateness);
+        strategy,
+        new ListOutputBuffer<String, Integer, W>(VarIntCoder.of()),
+        IterableCoder.of(VarIntCoder.of()));
   }
 
   public static <W extends BoundedWindow, AccumT, OutputT>
@@ -118,33 +116,32 @@ TriggerTester<Integer, OutputT, W> combining(
           KeyedCombineFn<String, Integer, AccumT, OutputT> combineFn,
           Coder<OutputT> outputCoder,
           Duration allowedDataLateness) throws Exception {
-    @SuppressWarnings("unchecked")
-    WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
 
-    AbstractWindowSet.Factory<String, Integer, OutputT, W> windowSetFactory =
-        CombiningWindowSet.<String, Integer, AccumT, OutputT, W>factory(
-            combineFn, StringUtf8Coder.of(), VarIntCoder.of());
+    WindowingStrategy<?, W> strategy = WindowingStrategy.of(windowFn)
+        .withTrigger(trigger)
+        .withMode(mode)
+        .withAllowedLateness(allowedDataLateness);
 
     return new TriggerTester<Integer, OutputT, W>(
-        objectWindowFn, trigger, windowSetFactory, mode, outputCoder, allowedDataLateness);
+        strategy,
+        CombiningOutputBuffer.<String, Integer, AccumT, OutputT, W>create(
+            combineFn, StringUtf8Coder.of(), VarIntCoder.of()),
+        outputCoder);
   }
 
   private TriggerTester(
-      WindowFn<Object, W> windowFn,
-      Trigger<W> trigger,
-      AbstractWindowSet.Factory<String, InputT, OutputT, W> windowSetFactory,
-      AccumulationMode mode,
-      Coder<OutputT> outputCoder,
-      Duration allowedDataLateness) throws Exception {
-    this.windowFn = windowFn;
+      WindowingStrategy<?, W> wildcardStrategy,
+      OutputBuffer<String, InputT, OutputT, W> outputBuffer,
+      Coder<OutputT> outputCoder) throws Exception {
+    @SuppressWarnings("unchecked")
+    WindowingStrategy<Object, W> objectStrategy = (WindowingStrategy<Object, W>) wildcardStrategy;
+
+    this.windowFn = objectStrategy.getWindowFn();
     this.stubContexts = new StubContexts();
-    this.windowSet = windowSetFactory.create(
-        KEY, windowFn.windowCoder(), stubContexts, stubContexts);
     this.outputCoder = outputCoder;
-    executableTrigger = ExecutableTrigger.create(trigger);
-    this.triggerExecutor = new TriggerExecutor<>(
-        windowFn, timerManager, executableTrigger, stubContexts, stubContexts, windowSet, mode,
-        allowedDataLateness);
+    executableTrigger = wildcardStrategy.getTrigger();
+    this.triggerExecutor = TriggerExecutor.create(
+        KEY, objectStrategy, timerManager, outputBuffer, stubContexts);
   }
 
   public ExecutableTrigger<W> getTrigger() {
@@ -161,8 +158,10 @@ public boolean isMarkedFinished(W window) throws IOException {
 
   /**
    * Retrieve the tags of keyed state that is currently stored.
+   * @throws Exception
    */
-  public Iterable<String> getKeyedStateInUse() {
+  public Iterable<String> getKeyedStateInUse() throws Exception {
+    triggerExecutor.persist();
     return stubContexts.getKeyedStateInUse();
   }
 
@@ -171,21 +170,22 @@ public String finishedSet(W window) throws CoderException {
   }
 
   public String bufferTag(W window) throws IOException {
-    // We only care about the resulting tag ID, so we don't care about getting the type right.
-    return WindowUtils.bufferTag(window, windowFn.windowCoder(), VoidCoder.of()).getId();
+    return CoderUtils.encodeToBase64(windowFn.windowCoder(), window)
+        + "/" + OutputBuffer.BUFFER_NAME;
   }
 
-  public String earliestElement(W window) throws CoderException {
-    return triggerExecutor.earliestElementTag(window).getId();
+  public String earliestElementTag(W window) throws CoderException {
+    return CoderUtils.encodeToBase64(windowFn.windowCoder(), window)
+        + "/" + WatermarkHold.EARLIEST_ELEMENT_TAG.getId();
   }
 
-  public Instant getWatermarkHold() {
-    return stubContexts.minTagTimestamp.peek();
+  public Instant getWatermarkHold() throws Exception {
+    triggerExecutor.persist();
+    return stubContexts.minTagListTimestamp.peek();
   }
 
-  public boolean isWindowActive(W window) throws IOException {
-    return stubContexts.getKeyedStateInUse()
-        .contains(WindowUtils.bufferTag(window, windowFn.windowCoder(), VarIntCoder.of()).getId());
+  public boolean isWindowActive(W window) throws Exception {
+    return Iterables.contains(getKeyedStateInUse(), earliestElementTag(window));
   }
 
   /**
@@ -253,8 +253,8 @@ private class StubContexts
     private Map<CodedTupleTag<?>, Object> tagValues = new HashMap<>();
     private List<WindowedValue<KV<String, OutputT>>> outputs = new ArrayList<>();
 
-    private Map<CodedTupleTag<?>, Instant> tagTimestamps = new HashMap<>();
-    private PriorityQueue<Instant> minTagTimestamp = new PriorityQueue<>();
+    private Map<CodedTupleTag<?>, Instant> tagListTimestamps = new HashMap<>();
+    private PriorityQueue<Instant> minTagListTimestamp = new PriorityQueue<>();
 
     @Override
     public void outputWindowedValue(KV<String, OutputT> output, Instant timestamp,
@@ -295,6 +295,11 @@ public <T> void writeToTagList(CodedTupleTag<T> tag, T value) throws IOException
     @Override
     public <T> void deleteTagList(CodedTupleTag<T> tag) {
       tagListValues.remove(tag);
+
+      Instant hold = tagListTimestamps.remove(tag);
+      if (hold != null) {
+        minTagListTimestamp.remove(hold);
+      }
     }
 
     @Override
@@ -335,26 +340,27 @@ public Collection<? extends BoundedWindow> windows() {
 
     @Override
     public <T> void store(CodedTupleTag<T> tag, T value) throws IOException {
-      store(tag, value, BoundedWindow.TIMESTAMP_MAX_VALUE);
+      tagValues.put(tag, value);
     }
 
     @Override
-    public <T> void store(CodedTupleTag<T> tag, T value, Instant timestamp) throws IOException {
-      tagValues.put(tag, value);
+    public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp)
+        throws IOException {
+      writeToTagList(tag, value);
 
       // We never use the timestamp, but for testing purposes we want to keep track of the minimum
       // timestamp that is currently being stored, since this will be used to hold-up the watermark.
-      Instant old = tagTimestamps.put(tag, timestamp);
-      if (old != null) {
-        minTagTimestamp.remove(old);
+      Instant old = tagListTimestamps.get(tag);
+      if (old == null || old.isAfter(timestamp)) {
+        minTagListTimestamp.remove(old);
+        tagListTimestamps.put(tag, timestamp);
+        minTagListTimestamp.add(timestamp);
       }
-      minTagTimestamp.add(timestamp);
     }
 
     @Override
     public <T> void remove(CodedTupleTag<T> tag) {
       tagValues.remove(tag);
-      minTagTimestamp.remove(tagTimestamps.remove(tag));
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
new file mode 100644
index 0000000000000..3d68d186bf70a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.InstantCoder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.common.annotations.VisibleForTesting;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Implements the logic needed to hold the output watermark back to emit
+ * values with specific timestamps.
+ *
+ * @param <W> The kind of {@link BoundedWindow} the hold is for.
+ */
+public class WatermarkHold<W extends BoundedWindow> implements Serializable {
+
+  private static final long serialVersionUID = 0L;
+
+  @VisibleForTesting static final CodedTupleTag<Instant> EARLIEST_ELEMENT_TAG =
+      CodedTupleTag.of("__watermark_hold", InstantCoder.of());
+
+  private final Duration allowedLateness;
+
+  public WatermarkHold(Duration allowedLateness) {
+    this.allowedLateness = allowedLateness;
+  }
+
+  private final Map<W, Instant> inMemoryBuffer = new HashMap<>();
+
+  public void addHold(OutputBuffer.Context<?, W> c, Instant timestamp, boolean isLate) {
+    // If the element was late, then we want to put a hold in at the maxTimestamp for the end
+    // of the window plus the allowed lateness to ensure that we don't output something
+    // that is dropably late.
+    Instant holdTo = isLate
+        ? c.window().maxTimestamp().plus(allowedLateness)
+        : timestamp;
+    Instant storedEarliest = inMemoryBuffer.get(c.window());
+    if (storedEarliest == null || holdTo.isBefore(storedEarliest)) {
+      inMemoryBuffer.put(c.window(), holdTo);
+    }
+  }
+
+  /**
+   * Get the timestamp to use for output. This is computed as the minimum timestamp
+   * of any non-late elements that arrived in the current pane.
+   */
+  public Instant extractAndRelease(OutputBuffer.Context<?, W> c) throws IOException {
+    // Normally, output at the earliest non-late element in the pane.
+    // If the pane is empty or all the elements were late, output at window.maxTimestamp().
+    Instant earliest = c.window().maxTimestamp();
+
+    // MIN of already-persisted values for the source windows.
+    for (Instant hold : c.readBuffers(EARLIEST_ELEMENT_TAG, c.sourceWindows())) {
+      if (hold != null && hold.isBefore(earliest)) {
+        earliest = hold;
+      }
+    }
+
+    // MIN of not-yet-persisted values for the source windows.
+    for (W window : c.sourceWindows()) {
+      Instant hold = inMemoryBuffer.remove(window);
+      if (hold != null && hold.isBefore(earliest)) {
+        earliest = hold;
+      }
+    }
+
+    c.clearBuffers(EARLIEST_ELEMENT_TAG, c.sourceWindows());
+    return earliest;
+  }
+
+  public void release(OutputBuffer.Context<?, W> c) throws IOException {
+    c.clearBuffers(EARLIEST_ELEMENT_TAG, c.sourceWindows());
+    for (W window : c.sourceWindows()) {
+      inMemoryBuffer.remove(window);
+    }
+  }
+
+  public void flush(OutputBuffer.Context<?, W> c) throws IOException {
+    for (Map.Entry<W, Instant> entry : inMemoryBuffer.entrySet()) {
+      c.addToBuffer(entry.getKey(), EARLIEST_ELEMENT_TAG, entry.getValue(), entry.getValue());
+    }
+    inMemoryBuffer.clear();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowUtils.java
deleted file mode 100644
index 25c3c0f104084..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowUtils.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-
-import java.io.IOException;
-
-/**
- * Utility functions related to serializing windows.
- */
-class WindowUtils {
-  private static final String BUFFER_TAG_PREFIX = "buffer:";
-
-  /**
-   * Returns a tag for storing buffered data in per-key state.
-   */
-  public static <W extends BoundedWindow, V> CodedTupleTag<V> bufferTag(
-      W window, Coder<W> windowCoder, Coder<V> elemCoder)
-      throws IOException {
-    return CodedTupleTag.of(
-        BUFFER_TAG_PREFIX + CoderUtils.encodeToBase64(windowCoder, window), elemCoder);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
index 22eda5b3fc6bc..c544e17742861 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
@@ -94,19 +94,6 @@ public interface KeyedState {
    */
   public WindowingInternals.KeyedState keyedState();
 
-  /**
-   * Updates the {@code KeyedState} in place so that the given tag maps to the given value.
-   *
-   * <p> This method should be used with caution. Unless the value is removed or updated with
-   * a new timestamp, the watermark will be held up and no output will be produced.
-   *
-   * @param timestamp the timestamp to associate with the value. The watermark will be held to
-   *        the given point and no downstream watermark triggers will fire.
-   *
-   * @throws IOException if encoding the given value fails
-   */
-  public <T> void store(CodedTupleTag<T> tag, T value, Instant timestamp) throws IOException;
-
   /**
    * Output the value at the specified timestamp in the listed windows.
    */
@@ -121,6 +108,20 @@ void outputWindowedValue(OutputT output, Instant timestamp,
    */
   <T> void writeToTagList(CodedTupleTag<T> tag, T value) throws IOException;
 
+  /**
+   * Writes the provided value to the list of values in stored state corresponding to the
+   * provided tag.
+   *
+   * <p> This method should be used with caution. Unless the value is removed or updated with
+   * a new timestamp, the watermark will be held up and no output will be produced.
+   *
+   * @param timestamp the timestamp to associate with the value. The watermark will be held to
+   *        the given point and no downstream watermark triggers will fire.
+   *
+   * @throws IOException if encoding the given value fails
+   */
+  <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp) throws IOException;
+
   /**
    * Deletes the list corresponding to the given tag.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
index 02c66a89c4348..0f37fba0b4801 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
@@ -29,6 +29,9 @@
  */
 @SuppressWarnings("serial")
 public class CodedTupleTag<T> extends TupleTag<T> {
+
+  private static final long serialVersionUID = 0L;
+
   /**
    * Returns a {@code CodedTupleTag} with the given id that uses the
    * given {@code Coder} whenever a value associated with the tag
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index c9852bf509528..11875fa04ce52 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -70,7 +70,6 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Queue;
-import java.util.TreeMap;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.LinkedBlockingQueue;
@@ -345,24 +344,6 @@ private Windmill.WorkItemCommitRequest stripCounters(Windmill.WorkItemCommitRequ
     return Windmill.WorkItemCommitRequest.newBuilder(request).clearCounterUpdates().build();
   }
 
-  private Windmill.WorkItemCommitRequest stripProcessingTimeCounters(
-      Windmill.WorkItemCommitRequest request) {
-    Windmill.WorkItemCommitRequest.Builder builder =
-        Windmill.WorkItemCommitRequest.newBuilder(request).clearCounterUpdates();
-    TreeMap<String, Windmill.Counter> sortedCounters = new TreeMap<>();
-    for (Windmill.Counter counter : request.getCounterUpdatesList()) {
-      String name = counter.getName();
-      if (!(name.startsWith("computation-") && name.endsWith("-msecs"))) {
-        sortedCounters.put(name, counter);
-      }
-    }
-    for (Windmill.Counter counter : sortedCounters.values()) {
-      builder.addCounterUpdates(counter);
-    }
-    return builder.build();
-  }
-
-
   @Test public void testBasicHarness() throws Exception {
     List<ParallelInstruction> instructions = Arrays.asList(
         makeSourceInstruction(StringUtf8Coder.of()),
@@ -675,16 +656,6 @@ public void processElement(ProcessContext c) {
         CoderUtils.encodeToByteArray(
             CollectionCoder.of(IntervalWindow.getCoder()),
             Arrays.asList(new IntervalWindow(new Instant(0), new Instant(1000))))));
-    server.addDataToOffer(buildData(
-        "data {" +
-        "  computation_id: \"computation\"" +
-        "  data {" +
-        "    key: \"key\"" +
-        "    values {" +
-        "      tag: \"12:MergeWindowsgAAAAAAAAAA=earliest-element\"" +
-        "    }" +
-        "  }" +
-        "}"));
 
     Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
 
@@ -692,7 +663,7 @@ public void processElement(ProcessContext c) {
         "key: \"key\" " +
         "work_token: 0 " +
         "output_timers {" +
-        "  tag: \"gAAAAAAAAAD/////Dw==\"" +
+        "  tag: \"gAAAAAAAAAD_____Dw\"" +
         "  timestamp: 1000000" +
         "  type: WATERMARK" +
         "}" +
@@ -701,19 +672,19 @@ public void processElement(ProcessContext c) {
         "  timestamp: 999000" +
         "  type: WATERMARK" +
         "} " +
-        "value_updates {" +
-        "  tag: \"12:MergeWindowsgAAAAAAAAAA=earliest-element\"" +
-        "  value {" +
-        "    timestamp: 0" +
-        "    data: \"\\200\\000\\000\\000\\000\\000\\000\\000\"" +
-        "  }" +
-        "}" +
         "list_updates {" +
-        "  tag: \"12:MergeWindowsbuffer:gAAAAAAAAAA=\"" +
-        "    values {" +
+        "  tag: \"12:MergeWindowsgAAAAAAAAAA/__buffer\"" +
+        "  values {" +
         "    timestamp: 9223372036854775000" +
         "    data: \"\000data0\"" +
         "  }" +
+        "}" +
+        "list_updates {" +
+        "  tag: \"12:MergeWindowsgAAAAAAAAAA/__watermark_hold\"" +
+        "  values {" +
+        "    timestamp: 0" +
+        "    data: \"\000\\200\\000\\000\\000\\000\\000\\000\\000\"" +
+        "  }" +
         "}"),
         stripCounters(result.get(0L)));
 
@@ -734,17 +705,17 @@ public void processElement(ProcessContext c) {
         "}"));
     server.addDataToOffer(buildData(
         "data {" +
-            "  computation_id: \"computation\"" +
-            "  data {" +
-            "    key: \"key\"" +
-            "    values {" +
-            "      tag: \"12:MergeWindowsgAAAAAAAAAA=earliest-element\"" +
-            "      value {" +
-            "        timestamp: 0" +
-            "        data: \"\\200\\000\\000\\000\\000\\000\\000\\000\"" +
-            "      }" +
-            "    }" +
-            "  }" +
+        "  computation_id: \"computation\"" +
+        "  data {" +
+        "    key: \"key\"" +
+        "    lists {" +
+        "      tag: \"12:MergeWindowsgAAAAAAAAAA/__watermark_hold\"" +
+        "      values {" +
+        "        timestamp: 0" +
+        "        data: \"\000\\200\\000\\000\\000\\000\\000\\000\\000\"" +
+        "      }" +
+        "    }" +
+        "  }" +
         "}"));
     server.addDataToOffer(buildData(
         "data {" +
@@ -752,7 +723,7 @@ public void processElement(ProcessContext c) {
         "  data {" +
         "    key: \"key\"" +
         "    lists {" +
-        "      tag: \"12:MergeWindowsbuffer:gAAAAAAAAAA=\"" +
+        "      tag: \"12:MergeWindowsgAAAAAAAAAA/__buffer\"" +
         "      values {" +
         "        timestamp: 0" +
         "        data: \"\000data0\"" +
@@ -772,19 +743,16 @@ public void processElement(ProcessContext c) {
         "    key: \"key\"" +
         "    messages {" +
         "      timestamp: 0" +
-        "      data: \"\\000\\000\\000\\001\\005data0\"" +
+        "      data: \"\\377\\377\\377\\377\\001\\005data0\\000\"" +
         "    }" +
         "  }" +
         "} " +
-        "value_updates {" +
-        "  tag: \"12:MergeWindowsgAAAAAAAAAA=earliest-element\"" +
-        "  value {" +
-        "    timestamp: 9223372036854775807" +
-        "    data: \"\"" +
-        "  }" +
+        "list_updates {" +
+        "  tag: \"12:MergeWindowsgAAAAAAAAAA/__watermark_hold\"" +
+        "  end_timestamp: 9223372036854775807" +
         "}" +
         "list_updates {" +
-        "  tag: \"12:MergeWindowsbuffer:gAAAAAAAAAA=\"" +
+        "  tag: \"12:MergeWindowsgAAAAAAAAAA/__buffer\"" +
         "  end_timestamp: 9223372036854775807" +
         "}",
         CoderUtils.encodeToByteArray(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index dc9c344e93707..60caf91b2a042 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -101,7 +101,8 @@ public void testOnElementT1FiresFirst() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.finishedSet(firstWindow)));
   }
 
   @Test
@@ -114,7 +115,8 @@ public void testOnElementT2FiresFirst() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.finishedSet(firstWindow)));
   }
 
   @SuppressWarnings("unchecked")
@@ -132,7 +134,8 @@ public void testOnTimerFire() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.finishedSet(firstWindow)));
   }
 
   @SuppressWarnings("unchecked")
@@ -155,7 +158,8 @@ public void testOnTimerFireAndFinish() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
 
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.finishedSet(firstWindow)));
   }
 
   @Test
@@ -179,7 +183,7 @@ public void testOnMergeFires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
   }
 
@@ -223,7 +227,7 @@ public void testAfterAllRealTriggersFixedWindow() throws Exception {
         Matchers.equalTo(tester.bufferTag(window)),
         Matchers.containsString("delayed-until"),
         Matchers.containsString("elements-in-pane"),
-        Matchers.containsString("earliest-element")));
+        Matchers.equalTo(tester.earliestElementTag(window))));
     tester.advanceProcessingTime(new Instant(5));
 
     assertThat(tester.extractOutput(), Matchers.contains(
@@ -243,6 +247,7 @@ public void testAfterAllRealTriggersFixedWindow() throws Exception {
 
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(50))));
     // We're holding some finished bits for intermediate state in the AfterAll.
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(window)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.finishedSet(window)));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index bfcca49dfc848..b00308c2f5447 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -106,7 +106,8 @@ public void testOnElementT1Fires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(4), 4, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.finishedSet(firstWindow)));
   }
 
   @Test
@@ -116,11 +117,11 @@ public void testOnElementT2Fires() throws Exception {
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.FIRE);
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
     assertFalse(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         // Buffering element 1; Ignored the trigger for T2 since we aren't there yet.
         tester.bufferTag(firstWindow),
         // Still holding the earliest element, waiting to fire
-        tester.earliestElement(firstWindow)));
+        tester.earliestElementTag(firstWindow)));
   }
 
   @SuppressWarnings("unchecked")
@@ -161,7 +162,8 @@ public void testOnTimerFinish() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(2), 9, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.finishedSet(firstWindow)));
   }
 
   @Test
@@ -185,7 +187,7 @@ public void testOnMergeFinishes() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
   }
 
@@ -209,7 +211,7 @@ public void testOnMergeFires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
   }
 
@@ -251,7 +253,7 @@ public void testSequenceRealTriggersFixedWindow() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(14, 15), 14, 0, 50),
         isSingleWindowedValue(Matchers.containsInAnyOrder(16), 16, 0, 50)));
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(50))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(50)))));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index ab1db00b12998..a159496bfeb35 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -100,7 +100,8 @@ public void testOnElementT1Fires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.finishedSet(firstWindow)));
   }
 
   @Test
@@ -113,7 +114,8 @@ public void testOnElementT2Fires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.finishedSet(firstWindow)));
   }
 
   @SuppressWarnings("unchecked")
@@ -131,7 +133,8 @@ public void testOnTimerFire() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.finishedSet(firstWindow)));
   }
 
   @SuppressWarnings("unchecked")
@@ -150,7 +153,8 @@ public void testOnTimerFinish() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
 
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.finishedSet(firstWindow)));
   }
 
   @Test
@@ -173,7 +177,7 @@ public void testOnMergeFires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index 9e8eb3cf301f3..30dd87885856b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -93,10 +93,10 @@ public void testOnElement() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
     assertFalse(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         tester.bufferTag(firstWindow),
         // Holding the earliest not-yet-output element (4) waiting to fire.
-        tester.earliestElement(firstWindow)));
+        tester.earliestElementTag(firstWindow)));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
index a92923158294d..56ab9b1852439 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
@@ -105,7 +105,7 @@ public void testOnElementActualFires() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(2), 2, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         // We're storing that the root trigger has finished.
         tester.finishedSet(firstWindow)));
   }
@@ -120,7 +120,7 @@ public void testOnElementUntilFires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         // We're storing that the root trigger has finished.
         tester.finishedSet(firstWindow)));
   }
@@ -135,7 +135,7 @@ public void testOnElementUntilFiresAndFinishes() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         // We're storing that the root trigger has finished.
         tester.finishedSet(firstWindow)));
   }
@@ -163,7 +163,7 @@ public void testOnTimerFiresWithUntil() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         tester.finishedSet(firstWindow)));
   }
 
@@ -200,7 +200,7 @@ public void testOnTimerFinishesUntil() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(3), 9, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         tester.finishedSet(firstWindow)));
   }
 
@@ -249,7 +249,7 @@ public void testMergeUntilFires() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
     // the until fired during the merge
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         // We're storing that the root has finished
         tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
   }
@@ -310,7 +310,7 @@ public void testOrFinallyRealTriggersFixedWindow() throws Exception {
         Matchers.containsString("delayed-until"),
         Matchers.containsString("elements-in-pane"),
         Matchers.containsString("elements-in-pane"),
-        Matchers.containsString("earliest-element")));
+        Matchers.equalTo(tester.earliestElementTag(window))));
 
     tester.injectElement(4, new Instant(1));
     assertThat(tester.extractOutput(), Matchers.contains(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index dd8ccc187623b..321a63d1a49ca 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -23,6 +23,8 @@
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
@@ -54,7 +56,6 @@
 public class TriggerExecutorTest {
 
   @Mock private Trigger<IntervalWindow> mockTrigger;
-  private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
   @Before
@@ -68,7 +69,8 @@ private TriggerContext<IntervalWindow> isTriggerContext() {
     return Mockito.isA(TriggerContext.class);
   }
 
-  private void injectElement(int element, TriggerResult result)
+  private void injectElement(TriggerTester<Integer, ?, IntervalWindow> tester,
+      int element, TriggerResult result)
       throws Exception {
     when(mockTrigger.onElement(
         isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
@@ -79,98 +81,158 @@ private void injectElement(int element, TriggerResult result)
   @Test
   public void testOnElementBufferingDiscarding() throws Exception {
     // Test basic execution of a trigger using a non-combining window set and discarding mode.
-    tester = TriggerTester.nonCombining(
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         FixedWindows.of(Duration.millis(10)),
         mockTrigger,
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
 
-    injectElement(1, TriggerResult.CONTINUE);
+    injectElement(tester, 1, TriggerResult.CONTINUE);
     assertTrue(tester.isWindowActive(firstWindow));
 
-    injectElement(2, TriggerResult.FIRE);
+    injectElement(tester, 2, TriggerResult.FIRE);
     assertFalse(tester.isWindowActive(firstWindow));
 
-    injectElement(3, TriggerResult.FIRE_AND_FINISH);
+    injectElement(tester, 3, TriggerResult.FIRE_AND_FINISH);
 
     // This element shouldn't be seen, because the trigger has finished
-    injectElement(4, null);
+    injectElement(tester, 4, null);
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.finishedSet(firstWindow)));
     assertFalse(tester.isWindowActive(firstWindow));
   }
 
   @Test
   public void testOnElementBufferingAccumulating() throws Exception {
- // Test basic execution of a trigger using a non-combining window set and accumulating mode.
-    tester = TriggerTester.nonCombining(
+    // Test basic execution of a trigger using a non-combining window set and accumulating mode.
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         FixedWindows.of(Duration.millis(10)),
         mockTrigger,
         AccumulationMode.ACCUMULATING_FIRED_PANES,
         Duration.millis(100));
 
-    injectElement(1, TriggerResult.CONTINUE);
+    injectElement(tester, 1, TriggerResult.CONTINUE);
     assertTrue(tester.isWindowActive(firstWindow));
 
-    injectElement(2, TriggerResult.FIRE);
-    assertTrue(tester.isWindowActive(firstWindow));
-
-    injectElement(3, TriggerResult.FIRE_AND_FINISH);
+    injectElement(tester, 2, TriggerResult.FIRE);
+    injectElement(tester, 3, TriggerResult.FIRE_AND_FINISH);
 
     // This element shouldn't be seen, because the trigger has finished
-    injectElement(4, null);
+    injectElement(tester, 4, null);
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 3, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.contains(tester.finishedSet(firstWindow)));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.finishedSet(firstWindow)));
+    assertFalse(tester.isWindowActive(firstWindow));
+  }
+
+  @Test
+  public void testOnElementCombiningDiscarding() throws Exception {
+    // Test basic execution of a trigger using a non-combining window set and discarding mode.
+    TriggerTester<Integer, Integer, IntervalWindow> tester = TriggerTester.combining(
+        FixedWindows.of(Duration.millis(10)),
+        mockTrigger,
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        new Sum.SumIntegerFn().<String>asKeyedFn(),
+        VarIntCoder.of(),
+        Duration.millis(100));
+
+    injectElement(tester, 2, TriggerResult.CONTINUE);
+    assertTrue(tester.isWindowActive(firstWindow));
+
+    injectElement(tester, 3, TriggerResult.FIRE);
+    assertFalse(tester.isWindowActive(firstWindow));
+
+    injectElement(tester, 4, TriggerResult.FIRE_AND_FINISH);
+
+    // This element shouldn't be seen, because the trigger has finished
+    injectElement(tester, 6, null);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.equalTo(5), 2, 0, 10),
+        isSingleWindowedValue(Matchers.equalTo(4), 4, 0, 10)));
+    assertTrue(tester.isMarkedFinished(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.finishedSet(firstWindow)));
+    assertFalse(tester.isWindowActive(firstWindow));
+  }
+
+  @Test
+  public void testOnElementCombiningAccumulating() throws Exception {
+    // Test basic execution of a trigger using a non-combining window set and accumulating mode.
+    TriggerTester<Integer, Integer, IntervalWindow> tester = TriggerTester.combining(
+        FixedWindows.of(Duration.millis(10)),
+        mockTrigger,
+        AccumulationMode.ACCUMULATING_FIRED_PANES,
+        new Sum.SumIntegerFn().<String>asKeyedFn(),
+        VarIntCoder.of(),
+        Duration.millis(100));
+
+    injectElement(tester, 1, TriggerResult.CONTINUE);
+    assertTrue(tester.isWindowActive(firstWindow));
+
+    injectElement(tester, 2, TriggerResult.FIRE);
+    injectElement(tester, 3, TriggerResult.FIRE_AND_FINISH);
+
+    // This element shouldn't be seen, because the trigger has finished
+    injectElement(tester, 4, null);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.equalTo(3), 1, 0, 10),
+        isSingleWindowedValue(Matchers.equalTo(6), 3, 0, 10)));
+    assertTrue(tester.isMarkedFinished(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.finishedSet(firstWindow)));
     assertFalse(tester.isWindowActive(firstWindow));
   }
 
   @Test
   public void testWatermarkHoldAndLateData() throws Exception {
     // Test handling of late data. Specifically, ensure the watermark hold is correct.
-    tester = TriggerTester.nonCombining(
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         FixedWindows.of(Duration.millis(10)),
         mockTrigger,
         AccumulationMode.ACCUMULATING_FIRED_PANES,
         Duration.millis(10));
 
     // All on time data, verify watermark hold.
-    injectElement(1, TriggerResult.CONTINUE);
-    injectElement(3, TriggerResult.CONTINUE);
+    injectElement(tester, 1, TriggerResult.CONTINUE);
+    injectElement(tester, 3, TriggerResult.CONTINUE);
     assertEquals(new Instant(1), tester.getWatermarkHold());
-    injectElement(2, TriggerResult.FIRE);
+    injectElement(tester, 2, TriggerResult.FIRE);
     assertEquals(null, tester.getWatermarkHold());
 
     // Some late, some on time. Verify that we only hold to the minimum of on-time.
     tester.advanceWatermark(new Instant(4));
-    injectElement(2, TriggerResult.CONTINUE);
-    injectElement(3, TriggerResult.CONTINUE);
+    injectElement(tester, 2, TriggerResult.CONTINUE);
+    injectElement(tester, 3, TriggerResult.CONTINUE);
     assertEquals(new Instant(19), tester.getWatermarkHold());
-    injectElement(5, TriggerResult.CONTINUE);
+    injectElement(tester, 5, TriggerResult.CONTINUE);
     assertEquals(new Instant(5), tester.getWatermarkHold());
-    injectElement(4, TriggerResult.FIRE);
+    injectElement(tester, 4, TriggerResult.FIRE);
 
     // All late -- output at end of window timestamp.
     tester.advanceWatermark(new Instant(8));
-    injectElement(6, TriggerResult.CONTINUE);
-    injectElement(5, TriggerResult.CONTINUE);
+    injectElement(tester, 6, TriggerResult.CONTINUE);
+    injectElement(tester, 5, TriggerResult.CONTINUE);
     assertEquals(new Instant(19), tester.getWatermarkHold());
-    injectElement(4, TriggerResult.FIRE);
+    injectElement(tester, 4, TriggerResult.FIRE);
 
     // This is "pending" at the time the watermark makes it way-late.
     // Because we're about to expire the window, we output it.
-    injectElement(8, TriggerResult.CONTINUE);
+    injectElement(tester, 8, TriggerResult.CONTINUE);
 
     // All very late -- gets dropped.
     tester.advanceWatermark(new Instant(50));
-    injectElement(2, TriggerResult.FIRE);
+    injectElement(tester, 2, TriggerResult.FIRE);
     assertEquals(null, tester.getWatermarkHold());
 
     assertThat(tester.extractOutput(), Matchers.contains(
@@ -193,7 +255,7 @@ public void testWatermarkHoldAndLateData() throws Exception {
   public void testMergeBeforeFinalizing() throws Exception {
     // Verify that we merge windows before producing output so users don't see undesired
     // unmerged windows.
-    tester = TriggerTester.nonCombining(
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         Sessions.withGapDuration(Duration.millis(10)),
         mockTrigger,
         AccumulationMode.DISCARDING_FIRED_PANES,
@@ -210,8 +272,8 @@ public void testMergeBeforeFinalizing() throws Exception {
         .thenReturn(TriggerResult.FIRE);
 
     // All on time data, verify watermark hold.
-    injectElement(1, TriggerResult.CONTINUE); // [1-11)
-    injectElement(10, TriggerResult.CONTINUE); // [10-20)
+    injectElement(tester, 1, TriggerResult.CONTINUE); // [1-11)
+    injectElement(tester, 10, TriggerResult.CONTINUE); // [10-20)
 
     // Create a fake timer to fire
     tester.setTimer(

From ebc1c0cd0e239979f78801e42da247e6fccc1d54 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 22 Jun 2015 19:39:36 -0700
Subject: [PATCH 0662/1541] Make inner Coder class in Top transform static

The (private) inner class Top.TopCombineFn.HeapCoder is now the nested
static class Top.BoundedHeapCoder. Since it extends CustomCoder it is
serialized via Java serialization; this makes its serializable state
explicit.

Other nested classes have also been made static nested classes to avoid
any further hidden dependencies from developing.

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96634822
---
 .../cloud/dataflow/sdk/transforms/Sample.java |  31 +-
 .../cloud/dataflow/sdk/transforms/Top.java    | 301 ++++++++++--------
 .../google/cloud/dataflow/sdk/values/KV.java  |   7 +-
 .../dataflow/sdk/transforms/TopTest.java      |  13 +-
 4 files changed, 207 insertions(+), 145 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
index bfd887ef50526..ff29b05ff894d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
@@ -186,41 +186,48 @@ public void processElement(ProcessContext c) {
    * @param <T> the type of the elements
    */
   public static class FixedSizedSampleFn<T>
-      extends CombineFn<T, Top.TopCombineFn<KV<Integer, T>>.Heap, Iterable<T>> {
+      extends CombineFn<T,
+          Top.BoundedHeap<KV<Integer, T>, SerializableComparator<KV<Integer, T>>>,
+          Iterable<T>> {
     private static final long serialVersionUID = 0;
 
-    private final Top.TopCombineFn<KV<Integer, T>> topCombineFn;
+    private final Top.TopCombineFn<KV<Integer, T>, SerializableComparator<KV<Integer, T>>>
+        topCombineFn;
     private final Random rand = new Random();
 
     private FixedSizedSampleFn(int sampleSize) {
       if (sampleSize < 0) {
         throw new IllegalArgumentException("sample size must be >= 0");
       }
-      topCombineFn = new Top.TopCombineFn<>(sampleSize,
-                                            new KV.OrderByKey<Integer, T>());
+      topCombineFn = new Top.TopCombineFn<KV<Integer, T>, SerializableComparator<KV<Integer, T>>>(
+          sampleSize, new KV.OrderByKey<Integer, T>());
     }
 
     @Override
-    public Top.TopCombineFn<KV<Integer, T>>.Heap createAccumulator() {
+    public Top.BoundedHeap<KV<Integer, T>, SerializableComparator<KV<Integer, T>>>
+        createAccumulator() {
       return topCombineFn.createAccumulator();
     }
 
     @Override
-    public Top.TopCombineFn<KV<Integer, T>>.Heap addInput(
-        Top.TopCombineFn<KV<Integer, T>>.Heap accumulator, T input) {
+    public Top.BoundedHeap<KV<Integer, T>, SerializableComparator<KV<Integer, T>>> addInput(
+        Top.BoundedHeap<KV<Integer, T>, SerializableComparator<KV<Integer, T>>> accumulator,
+        T input) {
       accumulator.addInput(KV.of(rand.nextInt(), input));
       return accumulator;
     }
 
     @Override
-    public Top.TopCombineFn<KV<Integer, T>>.Heap mergeAccumulators(
-        Iterable<Top.TopCombineFn<KV<Integer, T>>.Heap> accumulators) {
+    public Top.BoundedHeap<KV<Integer, T>, SerializableComparator<KV<Integer, T>>>
+        mergeAccumulators(
+            Iterable<Top.BoundedHeap<KV<Integer, T>, SerializableComparator<KV<Integer, T>>>>
+            accumulators) {
       return topCombineFn.mergeAccumulators(accumulators);
     }
 
     @Override
     public Iterable<T> extractOutput(
-        Top.TopCombineFn<KV<Integer, T>>.Heap accumulator) {
+        Top.BoundedHeap<KV<Integer, T>, SerializableComparator<KV<Integer, T>>> accumulator) {
       List<T> out = new ArrayList<>();
       for (KV<Integer, T> element : accumulator.extractOutput()) {
         out.add(element.getValue());
@@ -229,8 +236,8 @@ public Iterable<T> extractOutput(
     }
 
     @Override
-    public Coder<Top.TopCombineFn<KV<Integer, T>>.Heap> getAccumulatorCoder(
-        CoderRegistry registry, Coder<T> inputCoder) {
+    public Coder<Top.BoundedHeap<KV<Integer, T>, SerializableComparator<KV<Integer, T>>>>
+        getAccumulatorCoder(CoderRegistry registry, Coder<T> inputCoder) {
       return topCombineFn.getAccumulatorCoder(
           registry, KvCoder.of(BigEndianIntegerCoder.of(), inputCoder));
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index 16f95afc2b6df..7be7503f1aa36 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -22,17 +22,19 @@
 import com.google.cloud.dataflow.sdk.coders.CustomCoder;
 import com.google.cloud.dataflow.sdk.coders.ListCoder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn.Accumulator;
 import com.google.cloud.dataflow.sdk.transforms.Combine.PerKey;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
 
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.Serializable;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Comparator;
 import java.util.List;
 import java.util.PriorityQueue;
@@ -304,6 +306,34 @@ Combine.Globally<T, List<T>> largest(int count) {
         new TopCombineFn<>(count, new Largest<V>()).<K>asKeyedFn()).named("Top.LargestPerKey");
   }
 
+  /**
+   * A {@code Serializable} {@code Comparator} that that uses the compared elements' natural
+   * ordering.
+   */
+  public static class Largest<T extends Comparable<T>>
+      implements Comparator<T>, Serializable {
+    private static final long serialVersionUID = 0L;
+
+    @Override
+    public int compare(T a, T b) {
+      return a.compareTo(b);
+    }
+  }
+
+  /**
+   * {@code Serializable} {@code Comparator} that that uses the reverse of the compared elements'
+   * natural ordering.
+   */
+  public static class Smallest<T extends Comparable<T>>
+      implements Comparator<T>, Serializable {
+    private static final long serialVersionUID = 0L;
+
+    @Override
+    public int compare(T a, T b) {
+      return b.compareTo(a);
+    }
+  }
+
 
   ////////////////////////////////////////////////////////////////////////////
 
@@ -316,169 +346,188 @@ Combine.Globally<T, List<T>> largest(int count) {
    * @param <T> type of element being compared
    */
   @SuppressWarnings("serial")
-  public static class TopCombineFn<T>
-      extends AccumulatingCombineFn<T, TopCombineFn<T>.Heap, List<T>> {
+  public static class TopCombineFn<T, ComparatorT extends Comparator<T> & Serializable>
+      extends AccumulatingCombineFn<T, BoundedHeap<T, ComparatorT>, List<T>> {
 
     private final int count;
-    private final Comparator<T> compareFn;
+    private final ComparatorT compareFn;
 
-    public <ComparatorT extends Comparator<T> & Serializable> TopCombineFn(
-        int count, ComparatorT compareFn) {
-      if (count < 0) {
-        throw new IllegalArgumentException("count must be >= 0");
-      }
+    public TopCombineFn(int count, ComparatorT compareFn) {
+      Preconditions.checkArgument(
+          count >= 0,
+          "count must be >= 0");
       this.count = count;
       this.compareFn = compareFn;
     }
 
-    class Heap implements AccumulatingCombineFn.Accumulator<T, TopCombineFn<T>.Heap, List<T>> {
-
-      // Exactly one of these should be set.
-      private List<T> asList;            // ordered largest first
-      private PriorityQueue<T> asQueue;  // head is smallest
-
-      private Heap(List<T> asList) {
-        this.asList = asList;
-      }
+    @Override
+    public BoundedHeap<T, ComparatorT> createAccumulator() {
+      return new BoundedHeap<>(count, compareFn, new ArrayList<T>());
+    }
 
-      @Override
-      public void addInput(T value) {
-        addInputInternal(value);
-      }
+    @Override
+    public Coder<BoundedHeap<T, ComparatorT>> getAccumulatorCoder(
+        CoderRegistry registry, Coder<T> inputCoder) {
+      return new BoundedHeapCoder<>(count, compareFn, inputCoder);
+    }
+  }
 
-      private boolean addInputInternal(T value) {
-        if (count == 0) {
-          // Don't add anything.
-          return false;
-        }
+  /**
+   * A heap that stores only a finite number of top elements according to its provided
+   * {@code Comparator}. Implemented as an {@link Accumulator} to facilitate implementation of
+   * {@link Top}.
+   *
+   * <p>This class is <i>not</i> safe for multithreaded use, except read-only.
+   */
+  static class BoundedHeap<T, ComparatorT extends Comparator<T> & Serializable>
+      implements Accumulator<T, BoundedHeap<T, ComparatorT>, List<T>> {
+
+    /**
+     * A queue with smallest at the head, for quick adds.
+     *
+     * <p>Only one of asList and asQueue may be non-null.
+     */
+    private PriorityQueue<T> asQueue;
+
+    /**
+     * A list in with largest first, the form of extractOutput().
+     *
+     * <p>Only one of asList and asQueue may be non-null.
+     */
+    private List<T> asList;
+
+    /** The user-provided Comparator. */
+    private final ComparatorT compareFn;
+
+    /** The maximum size of the heap. */
+    private final int maximumSize;
+
+    /**
+     * Creates a new heap with the provided size, comparator, and initial elements.
+     */
+    private BoundedHeap(int maximumSize, ComparatorT compareFn, List<T> asList) {
+      this.maximumSize = maximumSize;
+      this.asList = asList;
+      this.compareFn = compareFn;
+    }
 
-        if (asQueue == null) {
-          asQueue = new PriorityQueue<>(count, compareFn);
-          for (T item : asList) {
-            asQueue.add(item);
-          }
-          asList = null;
-        }
+    @Override
+    public void addInput(T value) {
+      maybeAddInput(value);
+    }
 
-        if (asQueue.size() < count) {
-          asQueue.add(value);
-          return true;
-        } else if (compareFn.compare(value, asQueue.peek()) > 0) {
-          asQueue.poll();
-          asQueue.add(value);
-          return true;
-        } else {
-          return false;
-        }
+    /**
+     * Adds {@code value} to this heap if it is larger than any of the current elements.
+     * Returns {@code true} if {@code value} was added.
+     */
+    private boolean maybeAddInput(T value) {
+      if (maximumSize == 0) {
+        // Don't add anything.
+        return false;
       }
 
-      @Override
-      public void mergeAccumulator(Heap accumulator) {
-        for (T value : accumulator.asList()) {
-          if (!addInputInternal(value)) {
-            // The list is ordered, remainder will also all be smaller.
-            break;
-          }
+      // If asQueue == null, then this is the first add after the latest call to the
+      // constructor or asList().
+      if (asQueue == null) {
+        asQueue = new PriorityQueue<>(maximumSize, compareFn);
+        for (T item : asList) {
+          asQueue.add(item);
         }
+        asList = null;
       }
 
-      @Override
-      public List<T> extractOutput() {
-        return asList();
+      if (asQueue.size() < maximumSize) {
+        asQueue.add(value);
+        return true;
+      } else if (compareFn.compare(value, asQueue.peek()) > 0) {
+        asQueue.poll();
+        asQueue.add(value);
+        return true;
+      } else {
+        return false;
       }
+    }
 
-      private List<T> asList() {
-        if (asList == null) {
-          int index = asQueue.size();
-          @SuppressWarnings("unchecked")
-          T[] ordered = (T[]) new Object[index];
-          while (!asQueue.isEmpty()) {
-            index--;
-            ordered[index] = asQueue.poll();
-          }
-          asList = Arrays.asList(ordered);
-          asQueue = null;
+    @Override
+    public void mergeAccumulator(BoundedHeap<T, ComparatorT> accumulator) {
+      for (T value : accumulator.asList()) {
+        if (!maybeAddInput(value)) {
+          // If this element of accumulator does not make the top N, neither
+          // will the rest, which are all smaller.
+          break;
         }
-        return asList;
       }
     }
 
     @Override
-    public Heap createAccumulator() {
-      return new Heap(new ArrayList<T>());
+    public List<T> extractOutput() {
+      return asList();
     }
 
-    @Override
-    public Coder<Heap> getAccumulatorCoder(
-        CoderRegistry registry, Coder<T> inputCoder) {
-      return new HeapCoder(inputCoder);
+    /**
+     * Returns the contents of this Heap as a List sorted largest-to-smallest.
+     */
+    private List<T> asList() {
+      if (asList == null) {
+        List<T> smallestFirstList = Lists.newArrayListWithCapacity(asQueue.size());
+        while (!asQueue.isEmpty()) {
+          smallestFirstList.add(asQueue.poll());
+        }
+        asList = Lists.reverse(smallestFirstList);
+        asQueue = null;
+      }
+      return asList;
     }
+  }
 
-    @SuppressWarnings("serial")
-    private class HeapCoder extends CustomCoder<Heap> {
-      private final Coder<List<T>> listCoder;
-
-      public HeapCoder(Coder<T> inputCoder) {
-        listCoder = ListCoder.of(inputCoder);
-      }
+  /**
+   * A {@link Coder} for {@link BoundedHeap}, using Java serialization via {@link CustomCoder}.
+   */
+  private static class BoundedHeapCoder<T, ComparatorT extends Comparator<T> & Serializable>
+      extends CustomCoder<BoundedHeap<T, ComparatorT>> {
+    private static final long serialVersionUID = 0L;
 
-      @Override
-      public void encode(Heap value, OutputStream outStream,
-          Context context) throws CoderException, IOException {
-        listCoder.encode(value.asList(), outStream, context);
-      }
+    private final Coder<List<T>> listCoder;
+    private final ComparatorT compareFn;
+    private final int maximumSize;
 
-      @Override
-      public Heap decode(InputStream inStream, Coder.Context context)
-          throws CoderException, IOException {
-        return new Heap(listCoder.decode(inStream, context));
-      }
+    public BoundedHeapCoder(int maximumSize, ComparatorT compareFn, Coder<T> elementCoder) {
+      listCoder = ListCoder.of(elementCoder);
+      this.compareFn = compareFn;
+      this.maximumSize = maximumSize;
+    }
 
-      @Override
-      public void verifyDeterministic() throws NonDeterministicException {
-        verifyDeterministic(
-            "HeapCoder requires a deterministic list coder", listCoder);
-      }
+    @Override
+    public void encode(
+        BoundedHeap<T, ComparatorT> value, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+      listCoder.encode(value.asList(), outStream, context);
+    }
 
-      @Override
-      public boolean isRegisterByteSizeObserverCheap(
-          Heap value, Context context) {
-        return listCoder.isRegisterByteSizeObserverCheap(
-            value.asList(), context);
-      }
+    @Override
+    public BoundedHeap<T, ComparatorT> decode(InputStream inStream, Coder.Context context)
+        throws CoderException, IOException {
+      return new BoundedHeap<>(maximumSize, compareFn, listCoder.decode(inStream, context));
+    }
 
-      @Override
-      public void registerByteSizeObserver(
-          Heap value, ElementByteSizeObserver observer, Context context)
-          throws Exception {
-        listCoder.registerByteSizeObserver(value.asList(), observer, context);
-      }
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException {
+      verifyDeterministic(
+          "HeapCoder requires a deterministic list coder", listCoder);
     }
-  }
 
-  /**
-   * {@code Serializable} {@code Comparator} that that uses the
-   * compared elements' natural ordering.
-   */
-  @SuppressWarnings("serial")
-  public static class Largest<T extends Comparable<T>>
-      implements Comparator<T>, Serializable {
     @Override
-    public int compare(T a, T b) {
-      return a.compareTo(b);
+    public boolean isRegisterByteSizeObserverCheap(
+        BoundedHeap<T, ComparatorT> value, Context context) {
+      return listCoder.isRegisterByteSizeObserverCheap(
+          value.asList(), context);
     }
-  }
 
-  /**
-   * {@code Serializable} {@code Comparator} that that uses the
-   * reverse of the compared elements' natural ordering.
-   */
-  @SuppressWarnings("serial")
-  public static class Smallest<T extends Comparable<T>>
-      implements Comparator<T>, Serializable {
     @Override
-    public int compare(T a, T b) {
-      return b.compareTo(a);
+    public void registerByteSizeObserver(
+        BoundedHeap<T, ComparatorT> value, ElementByteSizeObserver observer, Context context)
+            throws Exception {
+      listCoder.registerByteSizeObserver(value.asList(), observer, context);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
index 3629150fca7d1..2eb2c0a4a6dca 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
@@ -16,8 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.values;
 
+import com.google.cloud.dataflow.sdk.transforms.SerializableComparator;
+
 import java.io.Serializable;
-import java.util.Comparator;
 
 /**
  * An immutable key/value pair.
@@ -78,7 +79,7 @@ public boolean equals(Object o) {
   /** Orders the {@link KV} by the key. A null key is less than any non-null key. */
   @SuppressWarnings("serial")
   public static class OrderByKey<K extends Comparable<? super K>, V> implements
-      Comparator<KV<K, V>>, Serializable {
+      SerializableComparator<KV<K, V>> {
     @Override
     public int compare(KV<K, V> a, KV<K, V> b) {
       if (a.key == null) {
@@ -94,7 +95,7 @@ public int compare(KV<K, V> a, KV<K, V> b) {
   /** Orders the {@link KV} by the value. A null value is less than any non-null value. */
   @SuppressWarnings("serial")
   public static class OrderByValue<K, V extends Comparable<? super V>>
-      implements Comparator<KV<K, V>>, Serializable {
+      implements SerializableComparator<KV<K, V>> {
     @Override
     public int compare(KV<K, V> a, KV<K, V> b) {
       if (a.value == null) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
index f6c47e5138b27..837f7e7807a5e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
@@ -42,7 +42,6 @@
 
 /** Tests for Top. */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class TopTest {
 
   @Rule
@@ -218,7 +217,10 @@ public void testTransformName() {
     assertThat(p.getFullNameForTesting(top), Matchers.startsWith("Top"));
   }
 
+  // used by ApproximateQuantilesTest
   static class OrderByLength implements Comparator<String>, Serializable {
+    private static final long serialVersionUID = 0L;
+
     @Override
     public int compare(String a, String b) {
       if (a.length() != b.length()) {
@@ -229,18 +231,21 @@ public int compare(String a, String b) {
     }
   }
 
-  static class IntegerComparator implements Comparator<Integer>, Serializable {
+  private static class IntegerComparator implements Comparator<Integer>, Serializable {
+    private static final long serialVersionUID = 0L;
+
     @Override
     public int compare(Integer o1, Integer o2) {
       return o1.compareTo(o2);
     }
   }
 
-  static class IntegerComparator2 implements SerializableComparator<Integer> {
+  private static class IntegerComparator2 implements Comparator<Integer>, Serializable {
+    private static final long serialVersionUID = 0L;
+
     @Override
     public int compare(Integer o1, Integer o2) {
       return o1.compareTo(o2);
     }
   }
-
 }

From 4153044b3514d8d523818b4b530312722ab9ee1d Mon Sep 17 00:00:00 2001
From: ccy <ccy@google.com>
Date: Tue, 23 Jun 2015 12:03:19 -0700
Subject: [PATCH 0663/1541] Update option to turn on Cloud Debugger for
 Dataflow jobs

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96695738
---
 .../sdk/options/CloudDebuggerOptions.java     | 25 +++-------------
 .../runners/DataflowPipelineTranslator.java   | 29 ++++---------------
 .../DataflowPipelineTranslatorTest.java       |  7 ++---
 3 files changed, 12 insertions(+), 49 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
index 6e8f84c541c28..4656818acb6d7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
@@ -26,27 +26,10 @@
 public interface CloudDebuggerOptions {
 
   /**
-   * User defined application version. Cloud Debugger uses it to group all
-   * running debugged processes. Version should be different if users have
-   * multiple parallel runs of the same application with different inputs.
+   * Whether to enable the Cloud Debugger snapshot agent for the current job.
    */
-  @Description("User defined application version. Cloud Debugger uses it to group all "
-      + "running debugged processes. cdbgVersion should be different if users have "
-      + "multiple parallel runs of the same application with different inputs.")
-  String getCdbgVersion();
-  void setCdbgVersion(String value);
-
-  /**
-   * Return a JSON string for the Debugger metadata item.
-   */
-  public static class DebuggerConfig {
-    private String version;
-    public String getVersion() {
-      return version;
-    }
-    public void setVersion(String version) {
-      this.version = version;
-    }
-  }
+  @Description("Whether to enable the Cloud Debugger snapshot agent for the current job.")
+  boolean isCloudDebuggerEnabled();
+  void setCloudDebuggerEnabled(boolean enabled);
 }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 21c8aa99aa029..f252c3a4fd32f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -48,7 +48,6 @@
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.CloudDebuggerOptions.DebuggerConfig;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
@@ -345,6 +344,9 @@ public void addCollectionToSingletonOutput(String name,
    * Translates a Pipeline into the Dataflow representation.
    */
   class Translator implements PipelineVisitor, TranslationContext {
+    /** Metadata key to enable cloud debugger. */
+    static final String ENABLE_CLOUD_DEBUGGER_METADATA_KEY = "enableDebugger";
+
     /** The Pipeline to translate. */
     private final Pipeline pipeline;
 
@@ -412,21 +414,12 @@ public Job translate(List<DataflowPackage> packages) {
       }
 
       // Config Cloud Debugger
-      if (!Strings.isNullOrEmpty(options.getCdbgVersion())) {
-        String cdbgVersion = options.getCdbgVersion();
-        DebuggerConfig debuggerConfig = new DebuggerConfig();
-        debuggerConfig.setVersion(cdbgVersion);
-
+      if (options.isCloudDebuggerEnabled()) {
         Map<String, String> metadata = workerPool.getMetadata();
         if (metadata == null) {
           metadata = new HashMap<String, String>();
         }
-
-        try {
-          metadata.put("debugger", computeMetadataString(debuggerConfig));
-        } catch (JsonProcessingException e) {
-          throw new IllegalArgumentException("Cannot format Debugger version.", e);
-        }
+        metadata.put(ENABLE_CLOUD_DEBUGGER_METADATA_KEY, "true");
         workerPool.setMetadata(metadata);
       }
 
@@ -1082,16 +1075,4 @@ private static void translateOutputs(
       context.addOutput(tag.getId(), output);
     }
   }
-
-  /**
-   * Serialize the provided {@link DebuggerConfig} to a JSON string.
-   *
-   * @return JSON string of Debugger config metadata.
-   * @throws JsonProcessingException when converting to Json fails.
-   */
-  private String computeMetadataString(DebuggerConfig config)
-      throws JsonProcessingException {
-    String debuggerConfigString = MAPPER.writeValueAsString(config);
-    return debuggerConfigString;
-  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 945841e05e811..9a85b6f707f5e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -244,10 +244,9 @@ public void testWorkerMachineTypeConfig() throws IOException {
 
   @Test
   public void testDebuggerConfig() throws IOException {
-    final String cdbgVersion = "test-v1";
     DataflowPipelineOptions options = buildPipelineOptions();
-    options.setCdbgVersion(cdbgVersion);
-    String expectedConfig = "{\"version\":\"test-v1\"}";
+    options.setCloudDebuggerEnabled(true);
+    String expectedConfig = "true";
 
     Pipeline p = buildPipeline(options);
     p.traverseTopologically(new RecordingPipelineVisitor());
@@ -258,7 +257,7 @@ public void testDebuggerConfig() throws IOException {
 
     for (WorkerPool pool : job.getEnvironment().getWorkerPools()) {
       if ("harness".equals(pool.getKind())) {
-        assertEquals(pool.getMetadata().get("debugger"), expectedConfig);
+        assertEquals(pool.getMetadata().get("enableDebugger"), expectedConfig);
       }
     }
   }

From 6cba9468f3ca1eb871720b1393fec44eb090d466 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 23 Jun 2015 13:29:08 -0700
Subject: [PATCH 0664/1541] Move streaming custom transform expansions into the
 runner

Previously, these transforms had ad hoc code in their `apply` method
that customized their expansion for the streaming (or sometimes batch)
runner. All of these are now encapsulated in the runner.

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96703501
---
 .../cloud/dataflow/sdk/io/PubsubIO.java       |  12 +-
 .../sdk/runners/DataflowPipelineRunner.java   | 319 ++++++++++++++-
 .../runners/DataflowPipelineTranslator.java   |   3 +-
 .../runners/dataflow/PubsubIOTranslator.java  |  24 +-
 .../cloud/dataflow/sdk/transforms/Create.java | 102 +----
 .../cloud/dataflow/sdk/transforms/View.java   | 370 +++---------------
 .../cloud/dataflow/sdk/transforms/Write.java  |   6 -
 .../dataflow/sdk/util/PCollectionViews.java   | 340 ++++++++++++++++
 .../DataflowPipelineTranslatorTest.java       |   4 +
 9 files changed, 752 insertions(+), 428 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 4410c43503c3f..19f7299dc445b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -28,7 +28,6 @@
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -632,7 +631,7 @@ public void processElement(ProcessContext c) throws IOException {
               }
             }
           } catch (IOException e) {
-            throw new RuntimeException("Unexected exception while reading from Pubsub: ", e);
+            throw new RuntimeException("Unexpected exception while reading from Pubsub: ", e);
           } finally {
             if (getTopic() != null) {
               try {
@@ -800,13 +799,8 @@ public PDone apply(PCollection<T> input) {
           throw new IllegalStateException(
               "need to set the topic of a PubsubIO.Write transform");
         }
-
-        if (input.getPipeline().getOptions().as(StreamingOptions.class).isStreaming()) {
-          return PDone.in(input.getPipeline());
-        } else {
-          input.apply(ParDo.of(new PubsubWriter()));
-          return PDone.in(input.getPipeline());
-        }
+        input.apply(ParDo.of(new PubsubWriter()));
+        return PDone.in(input.getPipeline());
       }
 
       @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index af853cff6b569..b2946795474ee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -25,6 +25,10 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult.State;
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
@@ -33,19 +37,34 @@
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.transforms.Write;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.DataflowReleaseInfo;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
+import com.google.cloud.dataflow.sdk.util.PCollectionViews;
 import com.google.cloud.dataflow.sdk.util.PathValidator;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.StreamingPCollectionViewWriterFn;
 import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.cloud.dataflow.sdk.values.POutput;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
+import com.google.common.collect.ImmutableMap;
 
 import org.joda.time.DateTimeUtils;
 import org.joda.time.DateTimeZone;
@@ -61,6 +80,7 @@
 import java.net.URL;
 import java.net.URLClassLoader;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
@@ -85,6 +105,9 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
   /** Translator for this DataflowPipelineRunner, based on options. */
   private final DataflowPipelineTranslator translator;
 
+  /** Custom transforms implementations for running in streaming mode. */
+  private final Map<Class<?>, Class<?>> streamingOverrides;
+
   /** A set of user defined functions to invoke at different points in execution. */
   private DataflowPipelineRunnerHooks hooks;
 
@@ -163,26 +186,58 @@ private DataflowPipelineRunner(DataflowPipelineOptions options) {
     this.options = options;
     this.dataflowClient = options.getDataflowClient();
     this.translator = DataflowPipelineTranslator.fromOptions(options);
+
+    this.streamingOverrides = ImmutableMap.<Class<?>, Class<?>>builder()
+        .put(Create.Values.class, StreamingCreate.class)
+        .put(View.AsMap.class, StreamingViewAsMap.class)
+        .put(View.AsMultimap.class, StreamingViewAsMultimap.class)
+        .put(View.AsSingleton.class, StreamingViewAsSingleton.class)
+        .put(View.AsIterable.class, StreamingViewAsIterable.class)
+        .put(Write.Bound.class, StreamingWrite.class)
+        .put(PubsubIO.Write.Bound.class, StreamingPubsubIOWrite.class)
+        .build();
   }
 
+  /**
+   * Applies the given transform to the input. For transforms with customized definitions
+   * for the Dataflow pipeline runner, the application is intercepted and modified here.
+   */
   @Override
   public <OutputT extends POutput, InputT extends PInput> OutputT apply(
       PTransform<InputT, OutputT> transform, InputT input) {
+
     if (Combine.GroupedValues.class.equals(transform.getClass())
         || GroupByKey.class.equals(transform.getClass())) {
+
+      // For both Dataflow runners (streaming and batch), GroupByKey and GroupedValues are
+      // primitives. Returning a primitive output instead of the expanded definition
+      // signals to the translator that translation is necessary.
+      @SuppressWarnings("unchecked")
       PCollection<?> pc = (PCollection<?>) input;
-      // TODO: Redundant with translator registration?
       @SuppressWarnings("unchecked")
       OutputT outputT = (OutputT) PCollection.createPrimitiveOutputInternal(
           pc.getPipeline(),
           pc.getWindowingStrategy(),
           pc.isBounded());
       return outputT;
-    } else if (Create.Values.class.equals(transform.getClass())) {
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      OutputT output = (OutputT)
-          ((Create.Values) transform).applyHelper(input, options.isStreaming());
-      return output;
+
+    } else if (options.isStreaming() && streamingOverrides.containsKey(transform.getClass())) {
+      // It is the responsibility of whoever constructs streamingOverrides
+      // to ensure this is type safe.
+      @SuppressWarnings("unchecked")
+      Class<PTransform<InputT, OutputT>> transformClass =
+          (Class<PTransform<InputT, OutputT>>) transform.getClass();
+
+      @SuppressWarnings("unchecked")
+      Class<PTransform<InputT, OutputT>> customTransformClass =
+          (Class<PTransform<InputT, OutputT>>) streamingOverrides.get(transform.getClass());
+
+      PTransform<InputT, OutputT> customTransform =
+          InstanceBuilder.ofType(customTransformClass)
+          .withArg(transformClass, transform)
+          .build();
+
+      return Pipeline.applyTransform(input, customTransform);
     } else {
       return super.apply(transform, input);
     }
@@ -324,9 +379,259 @@ public void setHooks(DataflowPipelineRunnerHooks hooks) {
     this.hooks = hooks;
   }
 
-
   /////////////////////////////////////////////////////////////////////////////
 
+  /**
+   * Specialized (non-)implementation for {@link Write.Bound} for the Dataflow runner in streaming
+   * mode.
+   */
+  private static class StreamingWrite<T> extends PTransform<PCollection<T>, PDone> {
+    private static final long serialVersionUID = 0L;
+
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    public StreamingWrite(Write.Bound<T> transform) { }
+
+    @Override
+    public PDone apply(PCollection<T> input) {
+      throw new UnsupportedOperationException(
+          "The Write transform is not supported by the Dataflow streaming runner.");
+    }
+  }
+
+  /**
+   * Specialized implementation for {@link PubsubIO.Write} for the Dataflow runner in streaming
+   * mode.
+   *
+   * <p>For internal use only. Subject to change at any time.
+   *
+   * <p>Public so the {@link com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator}
+   * can access.
+   */
+  public static class StreamingPubsubIOWrite<T> extends PTransform<PCollection<T>, PDone> {
+    private static final long serialVersionUID = 0L;
+
+    private final PubsubIO.Write.Bound<T> transform;
+
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    public StreamingPubsubIOWrite(PubsubIO.Write.Bound<T> transform) {
+      this.transform = transform;
+    }
+
+    public PubsubIO.Write.Bound<T> getOverriddenTransform() {
+      return transform;
+    }
+
+    @Override
+    public PDone apply(PCollection<T> input) {
+      return PDone.in(input.getPipeline());
+    }
+  }
+
+  /**
+   * Specialized implementation for {@link Create.Values} for the Dataflow runner in streaming mode.
+   */
+  private static class StreamingCreate<T> extends PTransform<PInput, PCollection<T>> {
+    private static final long serialVersionUID = 0L;
+
+    private final Create.Values<T> transform;
+
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in apply()
+    public StreamingCreate(Create.Values<T> transform) {
+      this.transform = transform;
+    }
+
+    /**
+     * {@link DoFn} that outputs a single KV.of(null, null) kick off the {@link GroupByKey}
+     * in the streaming create implementation.
+     */
+    private static class OutputNullKv extends DoFn<String, KV<Void, Void>> {
+      private static final long serialVersionUID = 0;
+
+      @Override
+      public void processElement(DoFn<String, KV<Void, Void>>.ProcessContext c) throws Exception {
+        c.output(KV.of((Void) null, (Void) null));
+      }
+    }
+
+    /**
+     * A {@link DoFn} which outputs the specified elements by first encoding them to bytes using
+     * the specified {@link Coder} so that they are serialized as part of the {@link DoFn} but
+     * need not implement {@code Serializable}.
+     */
+    private static class OutputElements<T> extends DoFn<Object, T> {
+      private static final long serialVersionUID = 0;
+
+      private final Coder<T> coder;
+      private final List<byte[]> encodedElements;
+
+      public OutputElements(Iterable<T> elems, Coder<T> coder) {
+        this.coder = coder;
+        this.encodedElements = new ArrayList<>();
+        for (T t : elems) {
+          try {
+            encodedElements.add(CoderUtils.encodeToByteArray(coder, t));
+          } catch (CoderException e) {
+            throw new IllegalArgumentException("Unable to encode value " + t
+                + " with coder " + coder, e);
+          }
+        }
+      }
+
+      @Override
+      public void processElement(ProcessContext c) throws IOException {
+        for (byte[] encodedElement : encodedElements) {
+          c.output(CoderUtils.decodeFromByteArray(coder, encodedElement));
+        }
+      }
+    }
+
+    @Override
+    public PCollection<T> apply(PInput input) {
+      try {
+        Coder<T> coder = transform.getDefaultOutputCoder(input);
+        return Pipeline.applyTransform(
+            input, PubsubIO.Read.named("StartingSignal").subscription("_starting_signal/"))
+            .apply(ParDo.of(new OutputNullKv()))
+            .apply("GlobalSingleton", Window.<KV<Void, Void>>into(new GlobalWindows())
+                .triggering(AfterPane.elementCountAtLeast(1))
+                .discardingFiredPanes())
+                .apply(GroupByKey.<Void, Void>create())
+                .apply(Window.<KV<Void, Iterable<Void>>>into(new GlobalWindows()))
+                .apply(ParDo.of(new OutputElements<>(transform.getElements(), coder)))
+                .setCoder(coder);
+      } catch (CannotProvideCoderException e) {
+        throw new IllegalArgumentException("Unable to infer a coder and no Coder was specified. "
+            + "Please set a coder by invoking Create.withCoder() explicitly.", e);
+      }
+    }
+  }
+
+  /**
+   * Specialized implementation for {@link View.AsMap} for the Dataflow runner in streaming mode.
+   */
+  private static class StreamingViewAsMap<K, V>
+      extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, V>>> {
+    private static final long serialVersionUID = 0L;
+
+    @SuppressWarnings("unused") // used via reflection in apply()
+    public StreamingViewAsMap(View.AsMap<K, V> transform) { }
+
+    @Override
+    public PCollectionView<Map<K, V>> apply(PCollection<KV<K, V>> input) {
+      PCollectionView<Map<K, V>> view =
+          PCollectionViews.mapView(
+              input.getPipeline(),
+              input.getWindowingStrategy(),
+              input.getCoder());
+
+      return input
+          .apply(Combine.globally(new View.Concatenate<KV<K, V>>()).withoutDefaults())
+          .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, input.getCoder())))
+          .apply(View.CreatePCollectionView.<KV<K, V>, Map<K, V>>of(view));
+    }
+  }
+
+  /**
+   * Specialized expansion for {@link View.AsMultimap} for the Dataflow runner in streaming mode.
+   */
+  private static class StreamingViewAsMultimap<K, V>
+    extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, Iterable<V>>>> {
+    private static final long serialVersionUID = 0L;
+
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in apply()
+    public StreamingViewAsMultimap(View.AsMultimap<K, V> transform) { }
+
+    @Override
+    public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
+      PCollectionView<Map<K, Iterable<V>>> view =
+          PCollectionViews.multimapView(
+              input.getPipeline(),
+              input.getWindowingStrategy(),
+              input.getCoder());
+
+      return input
+          .apply(Combine.globally(new View.Concatenate<KV<K, V>>()).withoutDefaults())
+          .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, input.getCoder())))
+          .apply(View.CreatePCollectionView.<KV<K, V>, Map<K, Iterable<V>>>of(view));
+    }
+  }
+
+  /**
+   * Specialized implementation for {@link View.AsIterable} for the Dataflow runner in streaming
+   * mode.
+   */
+  private static class StreamingViewAsIterable<T>
+      extends PTransform<PCollection<T>, PCollectionView<Iterable<T>>> {
+    private static final long serialVersionUID = 0L;
+
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in apply()
+    public StreamingViewAsIterable(View.AsIterable<T> transform) { }
+
+    @Override
+    public PCollectionView<Iterable<T>> apply(PCollection<T> input) {
+      // Using Combine.globally(...).asSingletonView() allows automatic propagation of
+      // the CombineFn's default value as the default value of the SingletonView.
+      //
+      // safe covariant cast List<T> -> Iterable<T>
+      // not expressible in java, even with unchecked casts
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      Combine.GloballyAsSingletonView<T, Iterable<T>> concatAndView =
+      (Combine.GloballyAsSingletonView)
+      Combine.globally(new View.Concatenate<T>()).asSingletonView();
+      return input.apply(concatAndView);
+    }
+  }
+
+  private static class WrapAsList<T> extends DoFn<T, List<T>> {
+    private static final long serialVersionUID = 0;
+
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(Arrays.asList(c.element()));
+    }
+  }
+
+  /**
+   * Specialized expansion for {@link View.AsSingleton} for the Dataflow runner in streaming mode.
+   */
+  private static class StreamingViewAsSingleton<T>
+      extends PTransform<PCollection<T>, PCollectionView<T>> {
+    private static final long serialVersionUID = 0L;
+
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in apply()
+    public StreamingViewAsSingleton(View.AsSingleton<T> transform) { }
+
+    @Override
+    public PCollectionView<T> apply(PCollection<T> input) {
+      PCollectionView<T> view = PCollectionViews.singletonView(
+          input.getPipeline(),
+          input.getWindowingStrategy(),
+          false, // no default
+          null,  // unused default value
+          input.getCoder());
+      return input
+          .apply(ParDo.of(new WrapAsList<T>()))
+          .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, input.getCoder())))
+          .apply(View.CreatePCollectionView.<T, T>of(view));
+    }
+  }
+
   @Override
   public String toString() {
     return "DataflowPipelineRunner#" + options.getJobName();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index f252c3a4fd32f..52836580b7126 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -1019,7 +1019,8 @@ private <T> void translateHelper(
     registerTransformTranslator(
         PubsubIO.Read.Bound.class, new PubsubIOTranslator.ReadTranslator());
     registerTransformTranslator(
-        PubsubIO.Write.Bound.class, new PubsubIOTranslator.WriteTranslator());
+        DataflowPipelineRunner.StreamingPubsubIOWrite.class,
+        new PubsubIOTranslator.WriteTranslator());
 
     registerTransformTranslator(
         TextIO.Read.Bound.class, new TextIOTranslator.ReadTranslator());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
index b79a473f24011..8b066ab065dd4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.runners.dataflow;
 
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
@@ -30,8 +31,9 @@ public class PubsubIOTranslator {
   /**
    * Implements PubsubIO Read translation for the Dataflow backend.
    */
-  public static class ReadTranslator implements TransformTranslator<PubsubIO.Read.Bound> {
+  public static class ReadTranslator<T> implements TransformTranslator<PubsubIO.Read.Bound<T>> {
     @Override
+    @SuppressWarnings({"rawtypes", "unchecked"})
     public void translate(
         PubsubIO.Read.Bound transform,
         TranslationContext context) {
@@ -43,7 +45,7 @@ private <T> void translateReadHelper(
         TranslationContext context) {
       if (!context.getPipelineOptions().isStreaming()) {
         throw new IllegalArgumentException(
-            "Unbounded PubsubIO can only be used in streaming mode.");
+            "PubsubIO.Read can only be used with the Dataflow streaming runner.");
       }
 
       context.addStep(transform, "ParallelRead");
@@ -68,22 +70,28 @@ private <T> void translateReadHelper(
   /**
    * Implements PubsubIO Write translation for the Dataflow backend.
    */
-  public static class WriteTranslator implements TransformTranslator<PubsubIO.Write.Bound> {
+  public static class WriteTranslator<T>
+      implements TransformTranslator<DataflowPipelineRunner.StreamingPubsubIOWrite<T>> {
+
     @Override
+    @SuppressWarnings({"rawtypes", "unchecked"})
     public void translate(
-        PubsubIO.Write.Bound transform,
+        DataflowPipelineRunner.StreamingPubsubIOWrite transform,
         TranslationContext context) {
       translateWriteHelper(transform, context);
     }
 
     private <T> void translateWriteHelper(
-        PubsubIO.Write.Bound<T> transform,
+        DataflowPipelineRunner.StreamingPubsubIOWrite<T> customTransform,
         TranslationContext context) {
       if (!context.getPipelineOptions().isStreaming()) {
-        throw new IllegalArgumentException("PubsubIO can only be used in streaming mode.");
+        throw new IllegalArgumentException(
+            "PubsubIO.Write is non-primitive for the Dataflow batch runner.");
       }
 
-      context.addStep(transform, "ParallelWrite");
+      PubsubIO.Write.Bound<T> transform = customTransform.getOverriddenTransform();
+
+      context.addStep(customTransform, "ParallelWrite");
       context.addInput(PropertyNames.FORMAT, "pubsub");
       context.addInput(PropertyNames.PUBSUB_TOPIC, transform.getTopic().asV1Beta1Path());
       if (transform.getTimestampLabel() != null) {
@@ -93,7 +101,7 @@ private <T> void translateWriteHelper(
         context.addInput(PropertyNames.PUBSUB_ID_LABEL, transform.getIdLabel());
       }
       context.addEncodingInput(WindowedValue.getValueOnlyCoder(transform.getCoder()));
-      context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
+      context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(customTransform));
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index defc99a015a64..b0ebb983217d4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -20,15 +20,9 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -43,7 +37,6 @@
 
 import org.joda.time.Instant;
 
-import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -92,13 +85,14 @@
 public class Create<T> {
   /**
    * Returns a new {@code Create.Values} transform that produces a
-   * {@link PCollection} containing the specified elements.
+   * {@link PCollection} containing elements of the provided
+   * {@code Iterable}.
    *
    * <p> The argument should not be modified after this is called.
    *
-   * <p> The elements will have a timestamp of negative infinity, see
-   * {@link Create#timestamped} for a way of creating a {@code PCollection}
-   * with timestamped elements.
+   * <p> The elements of the output {@link PCollection} will have a timestamp of negative infinity,
+   * see {@link Create#timestamped} for a way of creating a {@code PCollection} with timestamped
+   * elements.
    *
    * <p> By default, {@code Create.Values} can automatically determine the {@code Coder} to use
    * if all elements have the same run-time class, and a default coder is registered for that
@@ -152,8 +146,9 @@ public static <K, V> Values<KV<K, V>> of(Map<K, V> elems) {
   }
 
   /**
-   * Returns a new {@link Create.TimestampedValues} transform that produces a {@link PCollection}
-   * containing the specified elements with the specified timestamps.
+   * Returns a new {@link Create.TimestampedValues} transform that produces a
+   * {@link PCollection} containing the elements of the provided {@code Iterable}
+   * with the specified timestamps.
    *
    * <p> The argument should not be modified after this is called.
    *
@@ -233,11 +228,22 @@ public Iterable<T> getElements() {
 
     @Override
     public PCollection<T> apply(PInput input) {
-      return applyHelper(input, false);
+      try {
+        Coder<T> coder = getDefaultOutputCoder(input);
+        return PCollection
+            .<T>createPrimitiveOutputInternal(
+                input.getPipeline(),
+                WindowingStrategy.globalDefault(),
+                IsBounded.BOUNDED)
+            .setCoder(coder);
+      } catch (CannotProvideCoderException e) {
+        throw new IllegalArgumentException("Unable to infer a coder and no Coder was specified. "
+            + "Please set a coder by invoking Create.withCoder() explicitly.", e);
+      }
     }
 
     @Override
-    protected Coder<T> getDefaultOutputCoder(PInput input) throws CannotProvideCoderException {
+    public Coder<T> getDefaultOutputCoder(PInput input) throws CannotProvideCoderException {
       if (coder.isPresent()) {
         return coder.get();
       }
@@ -289,41 +295,6 @@ protected Coder<T> getDefaultOutputCoder(PInput input) throws CannotProvideCoder
       return coder.get();
     }
 
-    public PCollection<T> applyHelper(PInput input, boolean isStreaming) {
-      try {
-        Coder<T> coder = getDefaultOutputCoder(input);
-        if (isStreaming) {
-          PCollection<T> output = Pipeline.applyTransform(
-              input, PubsubIO.Read.named("StartingSignal").subscription("_starting_signal/"))
-              .apply(ParDo.of(new DoFn<String, KV<Void, Void>>() {
-                private static final long serialVersionUID = 0;
-
-                @Override
-                public void processElement(DoFn<String, KV<Void, Void>>.ProcessContext c)
-                    throws Exception {
-                  c.output(KV.of((Void) null, (Void) null));
-                }
-              }))
-              .apply("GlobalSingleton", Window.<KV<Void, Void>>into(new GlobalWindows())
-                           .triggering(AfterPane.elementCountAtLeast(1))
-                           .discardingFiredPanes())
-              .apply(GroupByKey.<Void, Void>create())
-              .apply(Window.<KV<Void, Iterable<Void>>>into(new GlobalWindows()))
-              .apply(ParDo.of(new OutputElements<>(elems, coder)));
-          output.setCoder(coder);
-          return output;
-        } else {
-          return PCollection.<T>createPrimitiveOutputInternal(
-              input.getPipeline(),
-              WindowingStrategy.globalDefault(),
-              IsBounded.BOUNDED);
-        }
-      } catch (CannotProvideCoderException e) {
-        throw new IllegalArgumentException("Unable to infer a coder and no Coder was specified. "
-            + "Please set a coder by invoking Create.withCoder() explicitly.", e);
-      }
-    }
-
     /////////////////////////////////////////////////////////////////////////////
 
     /** The elements of the resulting PCollection. */
@@ -347,37 +318,6 @@ private Values(String name, Iterable<T> elems, Optional<Coder<T>> coder) {
     private Values(Iterable<T> elems, Optional<Coder<T>> coder) {
       this("CreateValues", elems, coder);
     }
-
-    /**
-     * A {@link DoFn} which outputs the specified elements by first encoding them to bytes using
-     * the specified {@link Coder} so that they are serialized part of the {@link DoFn}.
-     */
-    private static class OutputElements<T> extends DoFn<Object, T> {
-      private static final long serialVersionUID = 0;
-
-      private final Coder<T> coder;
-      private final List<byte[]> encodedElements;
-
-      public OutputElements(Iterable<T> elems, Coder<T> coder) {
-        this.coder = coder;
-        this.encodedElements = new ArrayList<>();
-        for (T t : elems) {
-          try {
-            encodedElements.add(CoderUtils.encodeToByteArray(coder, t));
-          } catch (CoderException e) {
-            throw new IllegalArgumentException("Unable to encode value " + t
-                + " with coder " + coder, e);
-          }
-        }
-      }
-
-      @Override
-      public void processElement(ProcessContext c) throws IOException {
-        for (byte[] encodedElement : encodedElements) {
-          c.output(CoderUtils.decodeFromByteArray(coder, encodedElement));
-        }
-      }
-    }
   }
 
   /////////////////////////////////////////////////////////////////////////////
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index ea2fe3dfbde4c..87b73b087c4c9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -16,34 +16,18 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
-import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.ListCoder;
-import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.StreamingPCollectionViewWriterFn;
+import com.google.cloud.dataflow.sdk.util.PCollectionViews;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.PValueBase;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.base.Function;
-import com.google.common.collect.HashMultimap;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Multimap;
-
-import java.io.IOException;
+
 import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
@@ -249,24 +233,9 @@ public static class AsIterable<T> extends PTransform<
     private AsIterable() { }
 
     @Override
-    public PCollectionView<Iterable<T>> apply(
-        PCollection<T> input) {
-      if (input.getPipeline().getOptions().as(StreamingOptions.class).isStreaming()) {
-
-        // safe covariant cast List<T> -> Iterable<T>
-        // not expressible in java, even with unchecked casts
-        @SuppressWarnings({"rawtypes", "unchecked"})
-        Combine.GloballyAsSingletonView<T, Iterable<T>> concatAndView =
-            (Combine.GloballyAsSingletonView)
-            Combine.globally(new Concatenate<T>()).asSingletonView();;
-
-        return input.apply(concatAndView);
-      } else {
-        return input.apply(
-            new CreatePCollectionView<T, Iterable<T>>(
-                new IterablePCollectionView<T>(
-                    input.getPipeline(), input.getWindowingStrategy(), input.getCoder())));
-      }
+    public PCollectionView<Iterable<T>> apply(PCollection<T> input) {
+      return input.apply(CreatePCollectionView.<T, Iterable<T>>of(PCollectionViews.iterableView(
+          input.getPipeline(), input.getWindowingStrategy(), input.getCoder())));
     }
   }
 
@@ -295,31 +264,17 @@ private AsSingleton(T defaultValue) {
      * Default value to return for windows with no value in them.
      */
     public AsSingleton<T> withDefaultValue(T defaultValue) {
-      return new AsSingleton(defaultValue);
+      return new AsSingleton<>(defaultValue);
     }
 
     @Override
     public PCollectionView<T> apply(PCollection<T> input) {
-      SingletonPCollectionView<T> view = new SingletonPCollectionView<T>(
-          input.getPipeline(), input.getWindowingStrategy(), hasDefault, defaultValue,
-          input.getCoder());
-
-      CreatePCollectionView<T, T> createView = new CreatePCollectionView<>(view);
-
-      if (input.getPipeline().getOptions().as(StreamingOptions.class).isStreaming()) {
-        return input
-            .apply(ParDo.named("WrapAsList").of(new DoFn<T, List<T>>() {
-                      private static final long serialVersionUID = 0;
-                      @Override
-                      public void processElement(ProcessContext c) {
-                        c.output(Arrays.asList(c.element()));
-                      }
-                    }))
-            .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, input.getCoder())))
-            .apply(createView);
-      } else {
-        return input.apply(createView);
-      }
+      return input.apply(CreatePCollectionView.<T, T>of(PCollectionViews.singletonView(
+          input.getPipeline(),
+          input.getWindowingStrategy(),
+          hasDefault,
+          defaultValue,
+          input.getCoder())));
     }
   }
 
@@ -337,20 +292,11 @@ private AsMultimap() { }
 
     @Override
     public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
-      MultimapPCollectionView<K, V> view = new MultimapPCollectionView<K, V>(
-          input.getPipeline(), input.getWindowingStrategy(), input.getCoder());
-
-      CreatePCollectionView<KV<K, V>, Map<K, Iterable<V>>> createView =
-          new CreatePCollectionView<>(view);
-
-      if (input.getPipeline().getOptions().as(StreamingOptions.class).isStreaming()) {
-        return input
-            .apply(Combine.globally(new Concatenate<KV<K, V>>()).withoutDefaults())
-            .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, input.getCoder())))
-            .apply(createView);
-      } else {
-        return input.apply(createView);
-      }
+      return input.apply(CreatePCollectionView.<KV<K, V>, Map<K, Iterable<V>>>of(
+          PCollectionViews.multimapView(
+              input.getPipeline(),
+              input.getWindowingStrategy(),
+              input.getCoder())));
     }
   }
 
@@ -385,73 +331,17 @@ public AsMap<K, V> withSingletonValues() {
 
     @Override
     public PCollectionView<Map<K, V>> apply(PCollection<KV<K, V>> input) {
-      @SuppressWarnings("unchecked")
-
-      MapPCollectionView<K, V> view = new MapPCollectionView<K, V>(
-          input.getPipeline(), input.getWindowingStrategy(), input.getCoder());
-
-      CreatePCollectionView<KV<K, V>, Map<K, V>> createView =
-          new CreatePCollectionView<>(view);
-
-      if (input.getPipeline().getOptions().as(StreamingOptions.class).isStreaming()) {
-        return input
-            .apply(Combine.globally(new Concatenate<KV<K, V>>()).withoutDefaults())
-            .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, input.getCoder())))
-            .apply(createView);
-      } else {
-        return input.apply(createView);
-      }
+      return input.apply(CreatePCollectionView.<KV<K, V>, Map<K, V>>of(
+          PCollectionViews.mapView(
+              input.getPipeline(),
+              input.getWindowingStrategy(),
+              input.getCoder())));
     }
   }
 
   ////////////////////////////////////////////////////////////////////////////
   // Internal details below
 
-  /**
-   * Combiner that combines {@code T}s into a single {@code List<T>} containing
-   * all inputs.
-   *
-   * @param <T> the type of elements to concatenate.
-   */
-  private static class Concatenate<T> extends CombineFn<T, List<T>, List<T>> {
-    private static final long serialVersionUID = 0;
-
-    @Override
-    public List<T> createAccumulator() {
-      return new ArrayList<T>();
-    }
-
-    @Override
-    public List<T> addInput(List<T> accumulator, T input) {
-      accumulator.add(input);
-      return accumulator;
-    }
-
-    @Override
-    public List<T> mergeAccumulators(Iterable<List<T>> accumulators) {
-      List<T> result = createAccumulator();
-      for (List<T> accumulator : accumulators) {
-        result.addAll(accumulator);
-      }
-      return result;
-    }
-
-    @Override
-    public List<T> extractOutput(List<T> accumulator) {
-      return accumulator;
-    }
-
-    @Override
-    public Coder<List<T>> getAccumulatorCoder(CoderRegistry registry, Coder<T> inputCoder) {
-      return ListCoder.of(inputCoder);
-    }
-
-    @Override
-    public Coder<List<T>> getDefaultOutputCoder(CoderRegistry registry, Coder<T> inputCoder) {
-      return ListCoder.of(inputCoder);
-    }
-  }
-
   /**
    * Creates a primitive {@link PCollectionView}.
    *
@@ -466,10 +356,15 @@ public static class CreatePCollectionView<ElemT, ViewT>
 
     private PCollectionView<ViewT> view;
 
-    public CreatePCollectionView(PCollectionView<ViewT> view) {
+    private CreatePCollectionView(PCollectionView<ViewT> view) {
       this.view = view;
     }
 
+    public static <ElemT, ViewT> CreatePCollectionView<ElemT, ViewT> of(
+        PCollectionView<ViewT> view) {
+      return new CreatePCollectionView<>(view);
+    }
+
     @Override
     public PCollectionView<ViewT> apply(PCollection<ElemT> input) {
       return view;
@@ -479,6 +374,7 @@ public PCollectionView<ViewT> apply(PCollection<ElemT> input) {
       DirectPipelineRunner.registerDefaultTransformEvaluator(
           CreatePCollectionView.class,
           new DirectPipelineRunner.TransformEvaluator<CreatePCollectionView>() {
+            @SuppressWarnings("rawtypes")
             @Override
             public void evaluate(
                 CreatePCollectionView transform,
@@ -498,209 +394,51 @@ private <ElemT, ViewT> void evaluateTyped(
   }
 
   /**
-   * Private implementation of conversion {@code Iterable<WindowedValue<T>>} to {@code T}.
-   */
-  private static class SingletonPCollectionView<T> extends PCollectionViewBase<T> {
-    private static final long serialVersionUID = 0;
-    private byte[] encodedDefaultValue;
-    private transient T defaultValue;
-    private Coder<T> valueCoder;
-
-    public SingletonPCollectionView(
-        Pipeline pipeline, WindowingStrategy<?, ?> windowingStrategy,
-        boolean hasDefault, T defaultValue, Coder<T> valueCoder) {
-      super(pipeline, windowingStrategy, valueCoder);
-      this.defaultValue = defaultValue;
-      this.valueCoder = valueCoder;
-      if (hasDefault) {
-        try {
-          this.encodedDefaultValue = CoderUtils.encodeToByteArray(valueCoder, defaultValue);
-        } catch (IOException e) {
-          throw new RuntimeException("Unexpected IOException: ", e);
-        }
-      }
-    }
-
-    /**
-     * Input iterable must actually be {@code Iterable<WindowedValue<T>>}.
-     */
-    @SuppressWarnings("unchecked")
-    @Override
-    public T fromIterableInternal(Iterable<WindowedValue<?>> contents) {
-      if (encodedDefaultValue != null && defaultValue == null) {
-        try {
-          defaultValue = CoderUtils.decodeFromByteArray(valueCoder, encodedDefaultValue);
-        } catch (IOException e) {
-          throw new RuntimeException("Unexpected IOException: ", e);
-        }
-      }
-
-      if (encodedDefaultValue != null && !contents.iterator().hasNext()) {
-        return defaultValue;
-      }
-      try {
-        return (T) Iterables.getOnlyElement(contents).getValue();
-      } catch (NoSuchElementException exc) {
-        throw new NoSuchElementException(
-            "Empty PCollection accessed as a singleton view.");
-      } catch (IllegalArgumentException exc) {
-        throw new IllegalArgumentException(
-            "PCollection with more than one element "
-            + "accessed as a singleton view.");
-      }
-    }
-  }
-
-  /**
-   * Private implementation of conversion {@code Iterable<WindowedValue<T>>} to {@code Iterable<T>}.
+   * Combiner that combines {@code T}s into a single {@code List<T>} containing
+   * all inputs.
+   *
+   * <p>For internal use only by {@link View#asList()}, which views a tiny {@link PCollection}
+   * that fits in memory as a single {@code List}. For a large {@link PCollection} this is
+   * expected to crash!
+   *
+   * @param <T> the type of elements to concatenate.
    */
-  private static class IterablePCollectionView<T> extends PCollectionViewBase<Iterable<T>> {
+  public static class Concatenate<T> extends CombineFn<T, List<T>, List<T>> {
     private static final long serialVersionUID = 0;
 
-    public IterablePCollectionView(
-        Pipeline pipeline, WindowingStrategy<?, ?> windowingStrategy, Coder<T> valueCoder) {
-      super(pipeline, windowingStrategy, valueCoder);
-    }
-
-    /**
-     * Input iterable must actually be {@code Iterable<WindowedValue<T>>}.
-     */
     @Override
-    public Iterable<T> fromIterableInternal(Iterable<WindowedValue<?>> contents) {
-      return Iterables.transform(contents, new Function<WindowedValue<?>, T>() {
-        @SuppressWarnings("unchecked")
-        @Override
-        public T apply(WindowedValue<?> input) {
-          return (T) input.getValue();
-        }
-      });
-    }
-  }
-
-  /**
-   * Private implementation of conversion {@code Iterable<WindowedValue<KV<K, V>>>}
-   * to {@code Map<K, Iterable<V>>}.
-   */
-  private static class MultimapPCollectionView<K, V>
-      extends PCollectionViewBase<Map<K, Iterable<V>>> {
-    private static final long serialVersionUID = 0;
-
-    public MultimapPCollectionView(
-        Pipeline pipeline, WindowingStrategy<?, ?> windowingStrategy, Coder<KV<K, V>> valueCoder) {
-      super(pipeline, windowingStrategy, valueCoder);
+    public List<T> createAccumulator() {
+      return new ArrayList<T>();
     }
 
-    /**
-     * Input iterable must actually be {@code Iterable<WindowedValue<KV<K, V>>>}.
-     */
     @Override
-    public Map<K, Iterable<V>> fromIterableInternal(Iterable<WindowedValue<?>> contents) {
-      Multimap<K, V> multimap = HashMultimap.create();
-      for (WindowedValue<?> elem : contents) {
-        @SuppressWarnings("unchecked")
-        KV<K, V> kv = (KV<K, V>) elem.getValue();
-        multimap.put(kv.getKey(), kv.getValue());
-      }
-
-      // Safe covariant cast that Java cannot express without rawtypes, even with unchecked casts
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      Map<K, Iterable<V>> resultMap = (Map) multimap.asMap();
-      return resultMap;
-    }
-  }
-
-  /**
-   * Private implementation of conversion {@code Iterable<WindowedValue<KV<K, V>>} to
-   * {@code Map<K, V>}.
-   */
-  private static class MapPCollectionView<K, V> extends PCollectionViewBase<Map<K, V>> {
-    private static final long serialVersionUID = 0;
-
-    public MapPCollectionView(
-        Pipeline pipeline, WindowingStrategy<?, ?> windowingStrategy, Coder<KV<K, V>> valueCoder) {
-      super(pipeline, windowingStrategy, valueCoder);
+    public List<T> addInput(List<T> accumulator, T input) {
+      accumulator.add(input);
+      return accumulator;
     }
 
-    /**
-     * Input iterable must actually be {@code Iterable<WindowedValue<KV<K, V>>>}.
-     */
     @Override
-    public Map<K, V> fromIterableInternal(Iterable<WindowedValue<?>> contents) {
-      Map<K, V> map = new HashMap<>();
-      for (WindowedValue<?> elem : contents) {
-        @SuppressWarnings("unchecked")
-        KV<K, V> kv = (KV<K, V>) elem.getValue();
-        if (map.put(kv.getKey(), kv.getValue()) != null) {
-          throw new IllegalArgumentException("Duplicate values for " + kv.getKey());
-        }
-      }
-      return Collections.unmodifiableMap(map);
-    }
-  }
-
-  /**
-   * Base class for new implementations of side input views.
-   *
-   * <p>To implement a {@code PCollectionView<ViewT>} built from a {@code PCollection<ElemT>},
-   * override {@code fromIterableInternal} with a function from
-   * {@code Iterable<WindowedValue<ElemT>>} to {@code ViewT}.
-   *
-   * <p>This base class provides initialization and getters for a few boilerplate fields.
-   */
-  private abstract static class PCollectionViewBase<T>
-      extends PValueBase
-      implements PCollectionView<T> {
-    private static final long serialVersionUID = 0;
-
-    // for serialization only
-    protected PCollectionViewBase() {
-      super();
-    }
-
-    protected PCollectionViewBase(
-        Pipeline pipeline,
-        WindowingStrategy<?, ?> windowingStrategy,
-        Coder<?> valueCoder) {
-
-      super(pipeline);
-
-      if (windowingStrategy.getWindowFn() instanceof InvalidWindows) {
-        throw new IllegalArgumentException("WindowFn of PCollectionView cannot be InvalidWindows");
+    public List<T> mergeAccumulators(Iterable<List<T>> accumulators) {
+      List<T> result = createAccumulator();
+      for (List<T> accumulator : accumulators) {
+        result.addAll(accumulator);
       }
-      this.windowingStrategy = windowingStrategy;
-      this.coder = (Coder)
-          IterableCoder.of(WindowedValue.getFullCoder(
-              valueCoder, windowingStrategy.getWindowFn().windowCoder()));
+      return result;
     }
 
-    /**
-     * Returns a unique {@link TupleTag} identifying this {@link PCollectionView}.
-     *
-     * <p> For internal use only by runner implementors.
-     */
     @Override
-    public TupleTag<Iterable<WindowedValue<?>>> getTagInternal() {
-      return tag;
+    public List<T> extractOutput(List<T> accumulator) {
+      return accumulator;
     }
 
-    /**
-     * Returns the {@link WindowingStrategy} of this {@link PCollectionView}, which should
-     * be that of the underlying {@link PCollection}.
-     *
-     * <p> For internal use only by runner implementors.
-     */
     @Override
-    public WindowingStrategy<?, ?> getWindowingStrategyInternal() {
-      return windowingStrategy;
+    public Coder<List<T>> getAccumulatorCoder(CoderRegistry registry, Coder<T> inputCoder) {
+      return ListCoder.of(inputCoder);
     }
 
     @Override
-    public Coder<Iterable<WindowedValue<?>>> getCoderInternal() {
-      return coder;
+    public Coder<List<T>> getDefaultOutputCoder(CoderRegistry registry, Coder<T> inputCoder) {
+      return ListCoder.of(inputCoder);
     }
-
-    private TupleTag<Iterable<WindowedValue<?>>> tag = new TupleTag<>();
-    private WindowingStrategy<?, ?> windowingStrategy;
-    private Coder<Iterable<WindowedValue<?>>> coder;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
index d10b7cc323649..1b975c5eeb97f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
@@ -21,7 +21,6 @@
 import com.google.cloud.dataflow.sdk.io.Sink;
 import com.google.cloud.dataflow.sdk.io.Sink.WriteOperation;
 import com.google.cloud.dataflow.sdk.io.Sink.Writer;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
@@ -70,11 +69,6 @@ private Bound(Sink<T> sink) {
     @Override
     public PDone apply(PCollection<T> input) {
       PipelineOptions options = input.getPipeline().getOptions();
-      DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
-      if (dataflowOptions.isStreaming()) {
-        throw new UnsupportedOperationException(
-            "The Write transform cannot be applied to streaming workflows.");
-      }
       sink.validate(options);
       return createWrite(input, sink.createWriteOperation(options));
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
new file mode 100644
index 0000000000000..98e49dbe3f569
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
@@ -0,0 +1,340 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.PValueBase;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Function;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Multimap;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.NoSuchElementException;
+
+/**
+ * Implementations of {@link PCollectionView} shared across the SDK.
+ *
+ * <p>For internal use only, subject to change.
+ */
+public class PCollectionViews {
+
+  /**
+   * Returns a {@code PCollectionView<T>} capable of processing elements encoded using the provided
+   * {@link Coder} and windowed using the provided * {@link WindowingStrategy}.
+   *
+   * <p>If {@code hasDefault} is {@code true}, then the view will take on the value
+   * {@code defaultValue} for any empty windows.
+   */
+  public static <T, W extends BoundedWindow> PCollectionView<T> singletonView(
+      Pipeline pipeline,
+      WindowingStrategy<?, W> windowingStrategy,
+      boolean hasDefault,
+      T defaultValue,
+      Coder<T> valueCoder) {
+    return new SingletonPCollectionView(
+        pipeline, windowingStrategy, hasDefault, defaultValue, valueCoder);
+  }
+
+  /**
+   * Returns a {@code PCollectionView<Iterable<T>>} capable of processing elements encoded using the
+   * provided {@link Coder} and windowed using the provided {@link WindowingStrategy}.
+   */
+  public static <T, W extends BoundedWindow> PCollectionView<Iterable<T>> iterableView(
+      Pipeline pipeline,
+      WindowingStrategy<?, W> windowingStrategy,
+      Coder<T> valueCoder) {
+    return new IterablePCollectionView(pipeline, windowingStrategy, valueCoder);
+  }
+
+  /**
+   * Returns a {@code PCollectionView<Map<K, V>>} capable of processing elements encoded using the
+   * provided {@link Coder} and windowed using the provided {@link WindowingStrategy}.
+   */
+  public static <K, V, W extends BoundedWindow> PCollectionView<Map<K, V>> mapView(
+      Pipeline pipeline,
+      WindowingStrategy<?, W> windowingStrategy,
+      Coder<KV<K, V>> valueCoder) {
+
+    return new MapPCollectionView(pipeline, windowingStrategy, valueCoder);
+  }
+
+  /**
+   * Returns a {@code PCollectionView<Map<K, Iterable<V>>>} capable of processing elements encoded
+   * using the provided {@link Coder} and windowed using the provided {@link WindowingStrategy}.
+   */
+  public static <K, V, W extends BoundedWindow> PCollectionView<Map<K, Iterable<V>>> multimapView(
+      Pipeline pipeline,
+      WindowingStrategy<?, W> windowingStrategy,
+      Coder<KV<K, V>> valueCoder) {
+    return new MultimapPCollectionView(pipeline, windowingStrategy, valueCoder);
+  }
+
+  /**
+   * Implementation of conversion of singleton {@code Iterable<WindowedValue<T>>} to {@code T}.
+   *
+   * <p>For internal use only.
+   *
+   * <p>Instantiate via {@link PCollectionViews#singletonView}.
+   */
+  private static class SingletonPCollectionView<T, W extends BoundedWindow>
+     extends PCollectionViewBase<T, T, W> {
+    private static final long serialVersionUID = 0;
+    private byte[] encodedDefaultValue;
+    private transient T defaultValue;
+    private Coder<T> valueCoder;
+
+    public SingletonPCollectionView(
+        Pipeline pipeline, WindowingStrategy<?, W> windowingStrategy,
+        boolean hasDefault, T defaultValue, Coder<T> valueCoder) {
+      super(pipeline, windowingStrategy, valueCoder);
+      this.defaultValue = defaultValue;
+      this.valueCoder = valueCoder;
+      if (hasDefault) {
+        try {
+          this.encodedDefaultValue = CoderUtils.encodeToByteArray(valueCoder, defaultValue);
+        } catch (IOException e) {
+          throw new RuntimeException("Unexpected IOException: ", e);
+        }
+      }
+    }
+
+    @Override
+    protected T fromElements(Iterable<WindowedValue<T>> contents) {
+      if (encodedDefaultValue != null && defaultValue == null) {
+        try {
+          defaultValue = CoderUtils.decodeFromByteArray(valueCoder, encodedDefaultValue);
+        } catch (IOException e) {
+          throw new RuntimeException("Unexpected IOException: ", e);
+        }
+      }
+
+      if (encodedDefaultValue != null && !contents.iterator().hasNext()) {
+        return defaultValue;
+      }
+      try {
+        return Iterables.getOnlyElement(contents).getValue();
+      } catch (NoSuchElementException exc) {
+        throw new NoSuchElementException(
+            "Empty PCollection accessed as a singleton view.");
+      } catch (IllegalArgumentException exc) {
+        throw new IllegalArgumentException(
+            "PCollection with more than one element "
+            + "accessed as a singleton view.");
+      }
+    }
+  }
+
+  /**
+   * Implementation of conversion {@code Iterable<WindowedValue<T>>} to {@code Iterable<T>}.
+   *
+   * <p>For internal use only.
+   *
+   * <p>Instantiate via {@link PCollectionViews#iterableView}.
+   */
+  private static class IterablePCollectionView<T, W extends BoundedWindow>
+      extends PCollectionViewBase<T, Iterable<T>, W> {
+    private static final long serialVersionUID = 0;
+
+    public IterablePCollectionView(
+        Pipeline pipeline, WindowingStrategy<?, W> windowingStrategy, Coder<T> valueCoder) {
+      super(pipeline, windowingStrategy, valueCoder);
+    }
+
+    @Override
+    protected Iterable<T> fromElements(Iterable<WindowedValue<T>> contents) {
+      return Iterables.transform(contents, new Function<WindowedValue<T>, T>() {
+        @SuppressWarnings("unchecked")
+        @Override
+        public T apply(WindowedValue<T> input) {
+          return input.getValue();
+        }
+      });
+    }
+  }
+
+  /**
+   * Implementation of conversion {@code Iterable<WindowedValue<KV<K, V>>>}
+   * to {@code Map<K, Iterable<V>>}.
+   *
+   * <p> For internal use only.
+   */
+  private static class MultimapPCollectionView<K, V, W extends BoundedWindow>
+      extends PCollectionViewBase<KV<K, V>, Map<K, Iterable<V>>, W> {
+    private static final long serialVersionUID = 0;
+
+    public MultimapPCollectionView(
+        Pipeline pipeline,
+        WindowingStrategy<KV<K, V>, W> windowingStrategy,
+        Coder<KV<K, V>> valueCoder) {
+      super(pipeline, windowingStrategy, valueCoder);
+    }
+
+    @Override
+    protected Map<K, Iterable<V>> fromElements(Iterable<WindowedValue<KV<K, V>>> elements) {
+      Multimap<K, V> multimap = HashMultimap.create();
+      for (WindowedValue<KV<K, V>> elem : elements) {
+        KV<K, V> kv = elem.getValue();
+        multimap.put(kv.getKey(), kv.getValue());
+      }
+      // Safe covariant cast that Java cannot express without rawtypes, even with unchecked casts
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      Map<K, Iterable<V>> resultMap = (Map) multimap.asMap();
+      return resultMap;
+    }
+  }
+
+  /**
+   * Implementation of conversion {@code Iterable<WindowedValue<KV<K, V>>} with
+   * one value per key to {@code Map<K, V>}.
+   *
+   * <p> For internal use only.
+   */
+  private static class MapPCollectionView<K, V, W extends BoundedWindow>
+      extends PCollectionViewBase<KV<K, V>, Map<K, V>, W> {
+    private static final long serialVersionUID = 0;
+
+    public MapPCollectionView(
+        Pipeline pipeline,
+        WindowingStrategy<KV<K, V>, W> windowingStrategy,
+        Coder<KV<K, V>> valueCoder) {
+      super(pipeline, windowingStrategy, valueCoder);
+    }
+
+    /**
+     * Input iterable must actually be {@code Iterable<WindowedValue<KV<K, V>>>}.
+     */
+    @Override
+    protected Map<K, V> fromElements(Iterable<WindowedValue<KV<K, V>>> elements) {
+      Map<K, V> map = new HashMap<>();
+      for (WindowedValue<KV<K, V>> elem : elements) {
+        KV<K, V> kv = elem.getValue();
+        if (map.put(kv.getKey(), kv.getValue()) != null) {
+          throw new IllegalArgumentException("Duplicate values for " + kv.getKey());
+        }
+      }
+      return Collections.unmodifiableMap(map);
+    }
+  }
+
+  /**
+   * A base class for {@link PCollectionView} implementations, with additional type parameters
+   * that are not visible at pipeline assembly time when the view is used as a side input.
+   */
+  private abstract static class PCollectionViewBase<ElemT, ViewT, W extends BoundedWindow>
+      extends PValueBase
+      implements PCollectionView<ViewT> {
+    private static final long serialVersionUID = 0L;
+
+    /** A unique tag for the view, typed according to the elements underlying the view. */
+    private TupleTag<Iterable<WindowedValue<ElemT>>> tag = new TupleTag<>();
+
+    /** The windowing strategy for the PCollection underlying the view. */
+    private WindowingStrategy<?, W> windowingStrategy;
+
+    /** The coder for the elements underlying the view. */
+    private Coder<Iterable<WindowedValue<ElemT>>> coder;
+
+    /**
+     * Implement this to complete the implementation. It is a conversion function from
+     * all of the elements of the underlying {@link PCollection} to the value of the view.
+     */
+    protected abstract ViewT fromElements(Iterable<WindowedValue<ElemT>> elements);
+
+    /**
+     * Call this constructor to initialize the fields for which this base class provides
+     * boilerplate accessors.
+     */
+    protected PCollectionViewBase(
+        Pipeline pipeline,
+        WindowingStrategy<?, W> windowingStrategy,
+        Coder<ElemT> valueCoder) {
+      super(pipeline);
+      if (windowingStrategy.getWindowFn() instanceof InvalidWindows) {
+        throw new IllegalArgumentException("WindowFn of PCollectionView cannot be InvalidWindows");
+      }
+      this.windowingStrategy = windowingStrategy;
+      this.coder =
+          IterableCoder.of(WindowedValue.getFullCoder(
+              valueCoder, windowingStrategy.getWindowFn().windowCoder()));
+    }
+
+    /**
+     * For serialization only. Do not use directly. Subclasses should call from their own
+     * protected no-argument constructor.
+     */
+    protected PCollectionViewBase() {
+      super();
+    }
+
+    @Override
+    public ViewT fromIterableInternal(Iterable<WindowedValue<?>> elements) {
+      // Safe cast: it is required that the rest of the SDK maintain the invariant
+      // that a PCollectionView is only provided an iterable for the elements of an
+      // appropriately typed PCollection.
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      Iterable<WindowedValue<ElemT>> typedElements = (Iterable) elements;
+      return fromElements(typedElements);
+    }
+
+    /**
+     * Returns a unique {@link TupleTag} identifying this {@link PCollectionView}.
+     *
+     * <p> For internal use only by runner implementors.
+     */
+    @Override
+    public TupleTag<Iterable<WindowedValue<?>>> getTagInternal() {
+      // Safe cast: It is required that the rest of the SDK maintain the invariant that
+      // this tag is only used to access the contents of an appropriately typed underlying
+      // PCollection
+      @SuppressWarnings({"rawtypes, unchecked"})
+      TupleTag<Iterable<WindowedValue<?>>> untypedTag = (TupleTag) tag;
+      return untypedTag;
+    }
+
+    /**
+     * Returns the {@link WindowingStrategy} of this {@link PCollectionView}, which should
+     * be that of the underlying {@link PCollection}.
+     *
+     * <p> For internal use only by runner implementors.
+     */
+    @Override
+    public WindowingStrategy<?, ?> getWindowingStrategyInternal() {
+      return windowingStrategy;
+    }
+
+    @Override
+    public Coder<Iterable<WindowedValue<?>>> getCoderInternal() {
+      // Safe cast: It is required that the rest of the SDK only use this untyped coder
+      // for the elements of an appropriately typed underlying PCollection.
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      Coder<Iterable<WindowedValue<?>>> untypedCoder = (Coder) coder;
+      return untypedCoder;
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 9a85b6f707f5e..9d2eee407a2b3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -154,6 +154,9 @@ public void testSettingOfSdkPipelineOptions() throws IOException {
             .translate(p, Collections.<DataflowPackage>emptyList())
             .getJob();
 
+    // Note that the contents of this materialized map may be changed by the act of reading an
+    // option, which will cause the default to get materialized whereas it would otherwise be
+    // left absent. It is permissible to simply alter this test to reflect current behavior.
     assertEquals(ImmutableMap.of("options",
         ImmutableMap.builder()
           .put("appName", "DataflowPipelineTranslatorTest")
@@ -165,6 +168,7 @@ public void testSettingOfSdkPipelineOptions() throws IOException {
           .put("filesToStage", ImmutableList.of())
           .put("stagingLocation", "gs://somebucket/some/path/staging")
           .put("stableUniqueNames", "WARNING")
+          .put("streaming", false)
           .build()),
         job.getEnvironment().getSdkPipelineOptions());
   }

From ff9132fbbe991c10ddc50d0622a3bcac1a2f5475 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 23 Jun 2015 19:33:20 -0700
Subject: [PATCH 0665/1541] Three successively more detailed WordCount examples

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96737741
---
 .../dataflow/examples/DatastoreWordCount.java |   1 +
 .../dataflow/examples/MinimalWordCount.java   | 120 +++++++++
 .../dataflow/examples/WindowedWordCount.java  | 249 ++++++++++++++++++
 .../dataflow/examples/WindowingWordCount.java | 183 -------------
 .../cloud/dataflow/examples/WordCount.java    |  95 ++++---
 .../examples/common/DataflowExampleUtils.java |  69 +++++
 .../dataflow/examples/WordCountTest.java      |   5 +-
 7 files changed, 502 insertions(+), 220 deletions(-)
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
 delete mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
index 27309de71ffff..ec71434ea3ec8 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
@@ -176,6 +176,7 @@ public static void readDataFromDatastore(Options options) {
     p.apply(DatastoreIO.readFrom(options.getDataset(), query).named("ReadShakespeareFromDatastore"))
      .apply(ParDo.of(new GetContentFn()))
      .apply(new WordCount.CountWords())
+     .apply(ParDo.of(new WordCount.FormatAsTextFn()))
      .apply(TextIO.Write.named("WriteLines")
                         .to(options.getOutput())
                         .withNumShards(options.getNumShards()));
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
new file mode 100644
index 0000000000000..096a84a7d5093
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+
+/**
+ * An example that counts words in Shakespeare.
+ *
+ * <p> This class, {@link MinimalWordCount}, is the first in a series of three successively more
+ * detailed 'word count' examples. Here, for simplicity, we don't show any error-checking or
+ * argument processing, and focus on construction of the pipeline, which chains together the
+ * application of core transforms.
+ *
+ * <p> Next, see the {@link WordCount} pipeline, and then the {@link WindowedWordCount} pipeline,
+ * for more detailed examples that introduce additional concepts.
+ *
+ * <p> Concepts:
+ * <ol>
+ *   <li>Reading data from text files.</li>
+ *   <li>Specifying 'inline' transforms.</li>
+ *   <li>Counting a PCollection.</li>
+ *   <li>Writing data to Cloud Storage as text files.</li>
+ * </ol>
+ *
+ * <p> To execute this pipeline, first edit the code to set your project name and Google Cloud
+ * Storage values. The specified GCS bucket(s) must already exist.
+ *
+ * <p> Then, run the pipeline as described in the README. It will be deployed and run using the
+ * Dataflow service. No args are required to run the pipeline. You can see the results in your
+ * output bucket in the GCS browser.
+ */
+
+
+public class MinimalWordCount {
+
+  public static void main(String[] args) {
+
+    // Create a DataflowPipelineOptions object. This object lets us set various execution
+    // options for our pipeline, such as the associated Cloud Platform project and a location in
+    // Google Cloud Storage to stage files.
+    DataflowPipelineOptions options = PipelineOptionsFactory.create()
+      .as(DataflowPipelineOptions.class);
+    options.setRunner(BlockingDataflowPipelineRunner.class);
+    // TODO: CHANGE THE FOLLOWING TWO SETTINGS.
+    // Your project name is required in order to run your pipeline on the Google Cloud.
+    options.setProject("SET-YOUR-PROJECT-NAME-HERE");
+    // Your Google Cloud Storage path for staging local files.
+    options.setStagingLocation("gs://SET-YOUR-BUCKET-NAME-HERE");
+
+    // Create the Pipeline object with the options we defined above.
+    Pipeline p = Pipeline.create(options);
+
+    // Apply the pipeline's transforms.
+
+    // Concept #1: Apply a root transform to the pipeline; in this case, TextIO.Read to read a set
+    // of input text files. TextIO.Read returns a PCollection where each element is one line from
+    // the input text (a set of Shakespeare's texts).
+    p.apply(TextIO.Read.from("gs://dataflow-samples/shakespeare/*"))
+     // Concept #2:  Apply a ParDo transform to our PCollection of text lines. This ParDo invokes a
+     // DoFn (defined in-line) on each element that tokenizes the text line into individual words.
+     // The ParDo returns a PCollection<String>, where each element is an individual word in
+     // Shakespeare's collected texts.
+     .apply(ParDo.named("ExtractWords").of(new DoFn<String, String>() {
+                       private static final long serialVersionUID = 0;
+                       @Override
+                       public void processElement(ProcessContext c) {
+                         for (String word : c.element().split("[^a-zA-Z']+")) {
+                           if (!word.isEmpty()) {
+                             c.output(word);
+                           }
+                         }
+                       }
+                     }))
+     // Concept #3: Apply the Count transform to our PCollection of individual words. The Count
+     // transform returns a new PCollection of key/value pairs, where each key represents a unique
+     // word in the text. The associated value is the occurrence count for that word.
+     .apply(Count.<String>perElement())
+     // Apply another ParDo transform that formats our PCollection of word counts into a printable
+     // string, suitable for writing to an output file.
+     .apply(ParDo.named("FormatResults").of(new DoFn<KV<String, Long>, String>() {
+                       private static final long serialVersionUID = 0;
+                       @Override
+                       public void processElement(ProcessContext c) {
+                         c.output(c.element().getKey() + ": " + c.element().getValue());
+                       }
+                     }))
+     // TODO: SPECIFY YOUR OUTPUT GCS PATH
+     // Concept #4: Apply a write transform, TextIO.Write, at the end of the pipeline.
+     // TextIO.Write writes the contents of a PCollection (in this case, our PCollection of
+     // formatted strings) to a series of text files in Google Cloud Storage.
+     .apply(TextIO.Write.to("gs://YOUR-OUTPUT-BUCKET/AND-PREFIX"));
+
+    // Run the pipeline.
+    p.run();
+  }
+}
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
new file mode 100644
index 0000000000000..1591f5a826883
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
@@ -0,0 +1,249 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * An example that counts words in text, and can run over either unbounded or bounded input
+ * collections.
+ *
+ * <p> This class, {@link WindowedWordCount}, is the third in a series of three successively more
+ * detailed 'word count' examples. First take a look at {@link MinimalWordCount} and {@link
+ * WordCount}. This class extends the {@link WordCount} class.
+ *
+ * <p> Basic concepts, also in the MinimalWordCount and WordCount examples:
+ * Reading text files; counting a PCollection; writing to GCS; executing a Pipeline both locally
+ * and using the Dataflow service; defining DoFns; creating a custom aggregator;
+ * user-defined Ptransforms; defining Pipeline options.
+ *
+ * <p> New Concepts:
+ * <ol>
+ *   <li>Unbounded and bounded pipeline input modes</li>
+ *   <li>Adding timestamps to data.</li>
+ *   <li>PubSub topics as sources</li>
+ *   <li>Windowing</li>
+ *   <li>Writing to BigQuery</li>
+ * </ol>
+ *
+ * <p> To execute this pipeline locally, specify general pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
+ *
+ * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
+ *
+ * <p> Optionally specify the input file path via:
+ *  {@code --inputFile=gs://INPUT_PATH},
+ * which defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt}.
+ *
+ * <p> Specify an output BigQuery dataset and optionally, a table for the output. If you don't
+ * specify the table, one will be created for you using the job name. If you don't specify the
+ * dataset, a dataset called {@code dataflow-examples} must already exist in your project.
+ * {@code --bigQueryDataset=YOUR-DATASET --bigQueryTable=YOUR-NEW-TABLE-NAME}.
+ *
+ * <p> Decide whether you want your pipeline to run with 'bounded' or 'unbounded' input.
+ * To run with unbounded input, set:
+ * {@code --unbounded=true}.
+ * Then, optionally specify the Google Cloud PubSub topic to read from via
+ * {@code --pubsubTopic=/topics/PROJECT ID/YOUR-TOPIC-NAME}.
+ * If the topic does not exist, the pipeline will create one for you.  It will delete this topic
+ * when it terminates.
+ * The pipeline will automatically launch an auxiliary batch pipeline to populate the given
+ * PubSub topic with the contents of the --inputFile, in order to make the example easy to run. If
+ * you want to use an independently-populated PubSub topic, indicate this by setting --inputFile to
+ * the empty string. In that case, the auxiliary pipeline will not be started.
+ *
+ * <p> By default, the pipeline will do fixed windowing, on 1-minute windows.  You can
+ * change this interval by setting the --windowSize parameter, e.g. --windowSize=10 for 10-minute
+ * windows.
+ */
+public class WindowedWordCount {
+
+    private static final Logger LOG = LoggerFactory.getLogger(WindowedWordCount.class);
+    static final int WINDOW_SIZE = 1;  // Default window duration in minutes
+
+  /**
+   * A DoFn that sets the data element timestamp. This is a silly method, just for this example,
+   * for the bounded data case.
+   * Imagine that many ghosts of Shakespeare are all typing madly at the same time to recreate his
+   * masterworks.  Each line of the corpus will get a random associated timestamp somewhere in a
+   * 2-hour period.
+   */
+  static class AddTimestampFn extends DoFn<String, String> {
+    private static final long serialVersionUID = 0;
+    private static final long RAND_RANGE = 7200000; // 2 hours in ms
+
+    @Override
+    public void processElement(ProcessContext c) {
+      // Generate a timestamp that falls somewhere in the past hour.
+      long randomTimestamp = System.currentTimeMillis()
+        - (int) (Math.random() * RAND_RANGE);
+      // Concept #2: Set the data element with that timestamp.
+      c.outputWithTimestamp(c.element(), new Instant(randomTimestamp));
+    }
+  }
+
+  /** A DoFn that converts a Word and Count into a BigQuery table row. */
+  static class FormatAsTableRowFn extends DoFn<KV<String, Long>, TableRow> {
+    private static final long serialVersionUID = 0;
+
+    @Override
+    public void processElement(ProcessContext c) {
+      TableRow row = new TableRow()
+          .set("word", c.element().getKey())
+          .set("count", c.element().getValue())
+          // include a field for the window timestamp
+         .set("window_timestamp", c.timestamp().toString());
+      c.output(row);
+    }
+  }
+
+  /**
+   * Helper method that defines the BigQuery schema used for the output.
+   */
+  private static TableSchema getSchema() {
+    List<TableFieldSchema> fields = new ArrayList<>();
+    fields.add(new TableFieldSchema().setName("word").setType("STRING"));
+    fields.add(new TableFieldSchema().setName("count").setType("INTEGER"));
+    fields.add(new TableFieldSchema().setName("window_timestamp").setType("TIMESTAMP"));
+    TableSchema schema = new TableSchema().setFields(fields);
+    return schema;
+  }
+
+  /**
+   * Concept #5: We'll stream the results to a BigQuery table. The BigQuery output source is one
+   * that supports both bounded and unbounded data. This is a helper method that creates a
+   * TableReference from input options, to tell the pipeline where to write its BigQuery results.
+   *
+   */
+  private static TableReference getTableReference(Options options) {
+    TableReference tableRef = new TableReference();
+    tableRef.setProjectId(options.getProject());
+    tableRef.setDatasetId(options.getBigQueryDataset());
+    tableRef.setTableId(options.getBigQueryTable());
+    return tableRef;
+  }
+
+  /**
+   * Options supported by {@link WindowedWordCount}.
+   *
+   * <p> Inherits standard example configuration options, which allow specification of the BigQuery
+   * table and the PubSub topic, as well as the {@link WordCount.WordCountOptions} support for
+   * specification of the input file.
+   */
+  public static interface Options
+        extends WordCount.WordCountOptions, DataflowExampleUtils.DataflowExampleUtilsOptions {
+    @Description("Fixed window duration, in minutes")
+    @Default.Integer(WINDOW_SIZE)
+    Integer getWindowSize();
+    void setWindowSize(Integer value);
+
+    @Description("Whether to run the pipeline with unbounded input")
+    boolean isUnbounded();
+    void setUnbounded(boolean value);
+  }
+
+  public static void main(String[] args) throws IOException {
+
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+
+    // Set up required resources with DataflowExampleUtils.
+    // Concept #1: the Dataflow SDK lets us run the same pipeline with either a bounded or
+    // unbounded input source.
+    DataflowExampleUtils exampleDataflowUtils = new DataflowExampleUtils(options,
+      options.isUnbounded());
+    Pipeline pipeline = Pipeline.create(options);
+
+    PCollection<String> input;
+    if (options.isUnbounded()) {
+      LOG.info("Reading from PubSub.");
+      // Concept #3: Read from the PubSub topic. A topic will be created if it wasn't
+      // specified as an arg. The data elements' timestamps will come from the pubsub injection.
+      input = pipeline
+          .apply(PubsubIO.Read.topic(options.getPubsubTopic()));
+    } else {
+      // Else, this is a bounded pipeline. Read from the GCS file.
+      input = pipeline
+          .apply(TextIO.Read.from(options.getInputFile()))
+          // Then-- to show windowing-- add an element timestamp, using an artificial time.
+          // See AddTimestampFn for more detail on this.
+          .apply(ParDo.of(new AddTimestampFn()));
+    }
+
+    // Concept #4: Window into fixed windows, then run the same CountWords transform as in our
+    // standard WordCount example. The fixed window size for this example defaults to 1 minute
+    // (you can change this with a command-line option). See the documentation for more information
+    // on how fixed windows work, and for information on the other types of windowing
+    // available (e.g., sliding windows).
+    PCollection<KV<String, Long>> wordCounts = input
+      .apply(Window.<String>into(
+        FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))))
+       .apply(new WordCount.CountWords());
+
+    // Concept #5: Format the results for a BigQuery table, then write to BigQuery.
+    // The BigQuery output source supports both bounded and unbounded data.
+    wordCounts.apply(ParDo.of(new FormatAsTableRowFn()))
+        .apply(BigQueryIO.Write.to(getTableReference(options)).withSchema(getSchema()));
+
+    PipelineResult result = pipeline.run();
+    /* To mock unbounded input from PubSub, we'll now start an auxiliary 'injector' pipeline that
+     * runs for a limited time, and publishes to the input PubSub topic.
+     *
+     * With an unbounded input source, you will need to explicitly shut down this pipeline when you
+     * are done with it, so that you are not charged for the instances. You can do this via the
+     * developer's console UI, or a ctl-C from the command line. The PubSub topic will also be
+     * deleted at this time.*/
+    exampleDataflowUtils.mockUnboundedSource(options.getInputFile(), result);
+  }
+}
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
deleted file mode 100644
index 8507533b41bca..0000000000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.Count;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-/**
- * An example that counts words in Shakespeare. For a detailed walkthrough of this
- * example see:
- *   <a href="https://cloud.google.com/dataflow/java-sdk/wordcount-example">
- *   https://cloud.google.com/dataflow/java-sdk/wordcount-example
- *   </a>
- *
- * <p> To execute this pipeline locally, specify general pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * </pre>
- * and a local output file or output prefix on GCS:
- * <pre>{@code
- *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
- * }</pre>
- *
- * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=BlockingDataflowPipelineRunner
- * }
- * </pre>
- * and an output prefix on GCS:
- * <pre>{@code
- *   --output=gs://YOUR_OUTPUT_PREFIX
- * }</pre>
- *
- * <p> The input file defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt} and can be
- * overridden with {@code --input}.
- */
-public class WindowingWordCount {
-
-  /** A DoFn that tokenizes lines of text into individual words with timestamp. */
-  static class ExtractWordsWithTimestampFn extends DoFn<String, String> {
-    private static final long serialVersionUID = 0;
-
-    @Override
-    public void processElement(ProcessContext c) {
-      String[] words = c.element().split("[^a-zA-Z']+");
-      for (String word : words) {
-        if (!word.isEmpty()) {
-          c.outputWithTimestamp(word, new Instant(System.currentTimeMillis()));
-        }
-      }
-    }
-  }
-
-  /** A DoFn that converts a Word and Count into a printable string. */
-  static class FormatCountsFn extends DoFn<KV<String, Long>, String>
-      implements RequiresWindowAccess {
-    private static final long serialVersionUID = 0;
-
-    @Override
-    public void processElement(ProcessContext c) {
-      String output = "Element: " + c.element().getKey()
-          + " Value: " + c.element().getValue()
-          + " Timestamp: " + c.timestamp()
-          + " Window: (" + c.window() + ")";
-      c.output(output);
-    }
-  }
-
-  /**
-   * A PTransform that converts a PCollection containing lines of text into a PCollection of
-   * formatted word counts.
-   * <p>
-   * Although this pipeline fragment could be inlined, bundling it as a PTransform allows for easy
-   * reuse, modular testing, and an improved monitoring experience.
-   */
-  public static class CountWords extends PTransform<PCollection<String>, PCollection<String>> {
-    private static final long serialVersionUID = 0;
-
-    @Override
-    public PCollection<String> apply(PCollection<String> lines) {
-
-      // Convert lines of text into individual words.
-      PCollection<String> words = lines.apply(
-          ParDo.of(new ExtractWordsWithTimestampFn()));
-
-      PCollection<String> windowedWords = words.apply(
-          Window.<String>into(FixedWindows.of(Duration.millis(1))));
-
-      // Count the number of times each word occurs.
-      PCollection<KV<String, Long>> wordCounts =
-          windowedWords.apply(Count.<String>perElement());
-
-      // Format each word and count into a printable string.
-      PCollection<String> results = wordCounts.apply(
-          ParDo.of(new FormatCountsFn()));
-
-      return results;
-    }
-  }
-
-  private interface Options extends PipelineOptions {
-    @Description("Path of the file to read from")
-    @Default.String("gs://dataflow-samples/shakespeare/kinglear.txt")
-    String getInput();
-    void setInput(String value);
-
-    @Description("Path of the file to write to")
-    String getOutput();
-    void setOutput(String value);
-
-    /**
-     * By default (numShards == 0), the system will choose the shard count.
-     * Most programs will not need this option.
-     */
-    @Description("Number of output shards (0 if the system should choose automatically)")
-    @Default.Integer(0)
-    int getNumShards();
-    void setNumShards(int value);
-  }
-
-  private static String getOutputLocation(Options options) {
-    DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
-    if (options.getOutput() != null) {
-      return options.getOutput();
-    } else if (dataflowOptions.getStagingLocation() != null) {
-      return GcsPath.fromUri(dataflowOptions.getStagingLocation())
-          .resolve("counts.txt").toString();
-    } else {
-      throw new IllegalArgumentException("Must specify --output or --stagingLocation");
-    }
-  }
-
-
-
-  public static void main(String[] args) {
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
-
-    Pipeline p = Pipeline.create(options);
-
-    p.apply(TextIO.Read.named("ReadLines").from(options.getInput()))
-     .apply(new CountWords())
-     .apply(TextIO.Write.named("WriteCounts")
-         .to(getOutputLocation(options))
-         .withNumShards(options.getNumShards()));
-
-    p.run();
-  }
-}
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
index ced439dca0b1a..6bce413a738dd 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
@@ -34,16 +34,36 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
+import java.io.IOException;
+
+
 /**
- * An example that counts words in Shakespeare. For a detailed walkthrough of this
- * example see
+ * An example that counts words in Shakespeare and includes Dataflow best practices.
+ *
+ * <p> This class, {@link WordCount}, is the second in a series of three successively more detailed
+ * 'word count' examples. You may first want to take a look at {@link MinimalWordCount}. After
+ * you've looked at this example, then see the {@link WindowedWordCount} pipeline, for introduction
+ * of additional concepts.
+ *
+ * <p> For a detailed walkthrough of this example, see
  *   <a href="https://cloud.google.com/dataflow/java-sdk/wordcount-example">
  *   https://cloud.google.com/dataflow/java-sdk/wordcount-example
  *   </a>
  *
- * <p> Concepts: Reading/writing text files; counting a PCollection; user-defined PTransforms
+ * <p> Basic concepts, also in the MinimalWordCount example:
+ * Reading text files; counting a PCollection; writing to GCS.
+ *
+ * <p> New Concepts:
+ * 1. Executing a Pipeline both locally and using the Dataflow service
+ * 2. Using ParDo with static DoFns defined out-of-line
+ * 3. Creating a custom aggregator
+ * 4. Building a composite transform
+ * 5. Using command-line arguments to set pipeline options
  *
- * <p> To execute this pipeline locally, specify general pipeline configuration:
+ * <p> Concept #1: you can execute this pipeline either locally or using the Dataflow service.
+ * These are now command-line options and not hard-coded as they were in the MinimalWordCount
+ * example.
+ * To execute this pipeline locally, specify general pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
  * }
@@ -66,14 +86,21 @@
  * }</pre>
  *
  * <p> The input file defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt} and can be
- * overridden with {@code --input}.
+ * overridden with {@code --inputFile}.
  */
 public class WordCount {
 
-  /** A DoFn that tokenizes lines of text into individual words. */
+  /**
+   * Concept #2: You can make your pipeline code less verbose by defining your DoFns statically out-
+   * of-line. This DoFn tokenizes lines of text into individual words; we pass it to a ParDo in the
+   * pipeline.
+   */
   static class ExtractWordsFn extends DoFn<String, String> {
     private static final long serialVersionUID = 0;
 
+    // Concept #3: A custom aggregator can track values in your pipeline as it runs, and that value
+    // can be displayed in the Dataflow monitoring UI. This aggregator tracks the number of empty
+    // lines that ExtractWordsFn encounters.
     private final Aggregator<Long, Long> emptyLines =
         createAggregator("emptyLines", new Sum.SumLongFn());
 
@@ -98,7 +125,7 @@ public void processElement(ProcessContext c) {
   }
 
   /** A DoFn that converts a Word and Count into a printable string. */
-  static class FormatCountsFn extends DoFn<KV<String, Long>, String> {
+  public static class FormatAsTextFn extends DoFn<KV<String, Long>, String> {
     private static final long serialVersionUID = 0;
 
     @Override
@@ -110,15 +137,17 @@ public void processElement(ProcessContext c) {
   /**
    * A PTransform that converts a PCollection containing lines of text into a PCollection of
    * formatted word counts.
-   * <p>
-   * Although this pipeline fragment could be inlined, bundling it as a PTransform allows for easy
-   * reuse, modular testing, and an improved monitoring experience.
+   *
+   * <p> Concept #4: This is a custom composite transform that bundles two transforms (ParDo and
+   * Count) as a reusable PTransform subclass. Using composite transforms allows for easy reuse,
+   * modular testing, and an improved monitoring experience.
    */
-  public static class CountWords extends PTransform<PCollection<String>, PCollection<String>> {
+  public static class CountWords extends PTransform<PCollection<String>,
+      PCollection<KV<String, Long>>> {
     private static final long serialVersionUID = 0;
 
     @Override
-    public PCollection<String> apply(PCollection<String> lines) {
+    public PCollection<KV<String, Long>> apply(PCollection<String> lines) {
 
       // Convert lines of text into individual words.
       PCollection<String> words = lines.apply(
@@ -128,24 +157,24 @@ public PCollection<String> apply(PCollection<String> lines) {
       PCollection<KV<String, Long>> wordCounts =
           words.apply(Count.<String>perElement());
 
-      // Format each word and count into a printable string.
-      PCollection<String> results = wordCounts.apply(
-          ParDo.of(new FormatCountsFn()));
-
-      return results;
+      return wordCounts;
     }
   }
 
   /**
    * Options supported by {@link WordCount}.
-   * <p>
-   * Inherits standard configuration options.
+   *
+   * <p> Concept #5: defining your own configuration options. Here, you can add your own args to be
+   * processed by the command-line parser, and specify default values for them. You can then access
+   * the options values in your pipeline code.
+   *
+   * <p> Inherits standard configuration options.
    */
-  public static interface Options extends PipelineOptions {
+  public static interface WordCountOptions extends PipelineOptions {
     @Description("Path of the file to read from")
     @Default.String("gs://dataflow-samples/shakespeare/kinglear.txt")
-    String getInput();
-    void setInput(String value);
+    String getInputFile();
+    void setInputFile(String value);
 
     @Description("Path of the file to write to")
     @Default.InstanceFactory(OutputFactory.class)
@@ -168,26 +197,20 @@ public String create(PipelineOptions options) {
       }
     }
 
-     /**
-     * By default (numShards == 0), the system will choose the shard count.
-     * Most programs will not need this option.
-     */
-    @Description("Number of output shards (0 if the system should choose automatically)")
-    int getNumShards();
-    void setNumShards(int value);
   }
 
-  public static void main(String[] args) {
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+  public static void main(String[] args) throws IOException {
+    WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
+      .as(WordCountOptions.class);
     Pipeline p = Pipeline.create(options);
 
-    p.apply(TextIO.Read.named("ReadLines").from(options.getInput()))
+    // Concepts #2 and #4: Our pipeline applies the composite CountWords transform, and passes the
+    // static FormatAsTextFn() to the ParDo transform.
+    p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
      .apply(new CountWords())
-     .apply(TextIO.Write.named("WriteCounts")
-         .to(options.getOutput())
-         .withNumShards(options.getNumShards()));
+     .apply(ParDo.of(new FormatAsTextFn()))
+     .apply(TextIO.Write.named("WriteCounts").to(options.getOutput()));
 
     p.run();
   }
 }
-
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
index b0c64e8a9068a..6f08c98a1ea73 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
@@ -36,6 +36,8 @@
 import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineJob;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
 import com.google.cloud.dataflow.sdk.util.Transport;
@@ -63,10 +65,26 @@ public class DataflowExampleUtils {
   private Set<DataflowPipelineJob> jobsToCancel = Sets.newHashSet();
   private List<String> pendingMessages = Lists.newArrayList();
 
+  /**
+   * Define an interface that supports the PubSub and BigQuery example options.
+   */
+  public static interface DataflowExampleUtilsOptions
+        extends DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
+  }
+
   public DataflowExampleUtils(DataflowPipelineOptions options) {
     this.options = options;
   }
 
+  /**
+   * Do resources and runner options setup.
+   */
+  public DataflowExampleUtils(DataflowPipelineOptions options, boolean isUnbounded)
+      throws IOException {
+    this.options = options;
+    setupResourcesAndRunner(isUnbounded);
+  }
+
   /**
    * Sets up external resources that are required by the example,
    * such as Pub/Sub topics and BigQuery tables.
@@ -78,6 +96,17 @@ public void setup() throws IOException {
     setupBigQueryTable();
   }
 
+  /**
+   * Set up external resources, and configure the runner appropriately.
+   */
+  public void setupResourcesAndRunner(boolean isUnbounded) throws IOException {
+    if (isUnbounded) {
+      options.setStreaming(true);
+    }
+    setup();
+    setupRunner();
+  }
+
   /**
    * Sets up the BigQuery table with the given schema.
    *
@@ -203,6 +232,38 @@ private void deletePubsubTopic(String topic) throws IOException {
     }
   }
 
+  /**
+   * If this is an unbounded (streaming) pipeline, and both inputFile and pubsub topic are defined,
+   * start an 'injector' pipeline that publishes the contents of the file to the given topic, first
+   * creating the topic if necessary.
+   */
+  public void startInjectorIfNeeded(String inputFile) {
+    ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
+    if (pubsubTopicOptions.isStreaming()
+        && inputFile != null && !inputFile.isEmpty()
+        && pubsubTopicOptions.getPubsubTopic() != null
+        && !pubsubTopicOptions.getPubsubTopic().isEmpty()) {
+      runInjectorPipeline(inputFile, pubsubTopicOptions.getPubsubTopic());
+    }
+  }
+
+  /**
+   * Do some runner setup: check that the DirectPipelineRunner is not used in conjunction with
+   * streaming, and if streaming is specified, use the DataflowPipelineRunner. Return the streaming
+   * flag value.
+   */
+  public void setupRunner() {
+    if (options.isStreaming()) {
+      if (options.getRunner() == DirectPipelineRunner.class) {
+        throw new IllegalArgumentException(
+          "Processing of unbounded input sources is not supported with the DirectPipelineRunner.");
+      }
+      // In order to cancel the pipelines automatically,
+      // {@literal DataflowPipelineRunner} is forced to be used.
+      options.setRunner(DataflowPipelineRunner.class);
+    }
+  }
+
   /**
    * Runs the batch injector for the streaming pipeline.
    *
@@ -231,6 +292,14 @@ public void runInjectorPipeline(Pipeline injectorPipeline) {
     jobsToCancel.add(injectorJob);
   }
 
+  /**
+   * Start the auxiliary injector pipeline, then wait for this pipeline to finish.
+   */
+  public void mockUnboundedSource(String inputFile, PipelineResult result) {
+    startInjectorIfNeeded(inputFile);
+    waitToFinish(result);
+  }
+
   /**
    * If {@literal DataflowPipelineRunner} or {@literal BlockingDataflowPipelineRunner} is used,
    * waits for the pipeline to finish and cancels it (and the injector) before the program exists.
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
index c000368021cea..527cdc3a84b56 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.examples.WordCount.CountWords;
 import com.google.cloud.dataflow.examples.WordCount.ExtractWordsFn;
+import com.google.cloud.dataflow.examples.WordCount.FormatAsTextFn;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
@@ -25,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
 import org.hamcrest.CoreMatchers;
@@ -74,7 +76,8 @@ public void testCountWords() throws Exception {
 
     PCollection<String> input = p.apply(Create.of(WORDS).withCoder(StringUtf8Coder.of()));
 
-    PCollection<String> output = input.apply(new CountWords());
+    PCollection<String> output = input.apply(new CountWords())
+      .apply(ParDo.of(new FormatAsTextFn()));
 
     DataflowAssert.that(output).containsInAnyOrder(COUNTS_ARRAY);
     p.run();

From 3e846b5aa42651f8536ccad90c5d539237b05831 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Tue, 23 Jun 2015 21:18:38 -0700
Subject: [PATCH 0666/1541] PTransform.getName() refactoring and fixes for
 readable names in UI

1. removed getDefaultName(), only provide a single API getKindString().
2. automate getDefaultName() for most PTransforms through
StringUtils.approximatePTransformName.
3. improve name readabilty for PTransforms by including outer class names.

----Release Notes----
Removed PTransform#getDefaultName which was redundant with PTransform#getKindString.

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96742413
---
 .../google/cloud/dataflow/sdk/io/AvroIO.java  | 10 ----
 .../cloud/dataflow/sdk/io/BigQueryIO.java     | 10 ----
 .../cloud/dataflow/sdk/io/PubsubIO.java       | 10 ----
 .../google/cloud/dataflow/sdk/io/TextIO.java  | 10 ----
 .../sdk/runners/DataflowPipelineRunner.java   | 35 +++++++++++
 .../sdk/transforms/ApproximateUnique.java     | 10 ----
 .../dataflow/sdk/transforms/Combine.java      | 38 +++++-------
 .../cloud/dataflow/sdk/transforms/Count.java  |  5 --
 .../cloud/dataflow/sdk/transforms/Create.java |  9 +--
 .../cloud/dataflow/sdk/transforms/Max.java    |  6 +-
 .../cloud/dataflow/sdk/transforms/Mean.java   |  2 +-
 .../cloud/dataflow/sdk/transforms/Min.java    |  6 +-
 .../dataflow/sdk/transforms/PTransform.java   | 22 +++----
 .../cloud/dataflow/sdk/transforms/ParDo.java  | 21 ++-----
 .../cloud/dataflow/sdk/transforms/Sum.java    |  6 +-
 .../cloud/dataflow/sdk/transforms/Top.java    | 14 +++--
 .../cloud/dataflow/sdk/util/StringUtils.java  | 58 +++++++++++++++----
 .../dataflow/sdk/util/WindowingStrategy.java  |  5 +-
 .../cloud/dataflow/sdk/io/AvroIOTest.java     | 10 ++++
 .../cloud/dataflow/sdk/io/BigQueryIOTest.java | 42 ++++++++------
 .../cloud/dataflow/sdk/io/PubsubIOTest.java   | 42 ++++++++++++++
 .../cloud/dataflow/sdk/io/TextIOTest.java     | 12 ++++
 .../sdk/transforms/ApproximateUniqueTest.java |  6 ++
 .../dataflow/sdk/transforms/CombineTest.java  | 18 ++++++
 .../dataflow/sdk/transforms/CountTest.java    |  7 +++
 .../dataflow/sdk/transforms/CreateTest.java   |  7 +++
 .../dataflow/sdk/transforms/FlattenTest.java  |  5 ++
 .../sdk/transforms/GroupByKeyTest.java        |  5 ++
 .../IntraBundleParallelizationTest.java       |  7 +++
 .../dataflow/sdk/transforms/MaxTest.java      | 39 +++++++++++++
 .../dataflow/sdk/transforms/MeanTest.java     | 35 +++++++++++
 .../dataflow/sdk/transforms/MinTest.java      | 39 +++++++++++++
 .../dataflow/sdk/transforms/ParDoTest.java    | 12 +++-
 .../sdk/transforms/PartitionTest.java         |  6 ++
 .../dataflow/sdk/transforms/SampleTest.java   |  5 ++
 .../dataflow/sdk/transforms/SumTest.java      | 40 +++++++++++++
 .../dataflow/sdk/transforms/TopTest.java      | 11 ++++
 .../dataflow/sdk/transforms/ValuesTest.java   |  7 +++
 .../dataflow/sdk/transforms/ViewTest.java     |  8 +++
 .../dataflow/sdk/transforms/WithKeysTest.java |  7 +++
 .../sdk/transforms/windowing/WindowTest.java  |  6 ++
 .../dataflow/sdk/util/StringUtilsTest.java    | 55 +++++++++++++++++-
 42 files changed, 543 insertions(+), 165 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/PubsubIOTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MaxTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MeanTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MinTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SumTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index 7a011e3161847..1903226b2b1a7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -303,11 +303,6 @@ protected Coder<T> getDefaultOutputCoder() {
         return AvroCoder.of(type, schema);
       }
 
-      @Override
-      protected String getKindString() {
-        return "AvroIO.Read";
-      }
-
       public String getFilepattern() {
         return filepattern;
       }
@@ -640,11 +635,6 @@ protected Coder<Void> getDefaultOutputCoder() {
         return VoidCoder.of();
       }
 
-      @Override
-      protected String getKindString() {
-        return "AvroIO.Write";
-      }
-
       public String getFilenamePrefix() {
         return filenamePrefix;
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index c46316c7284b5..d1658d236f3b0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -355,11 +355,6 @@ protected Coder<TableRow> getDefaultOutputCoder() {
         return TableRowJsonCoder.of();
       }
 
-      @Override
-      protected String getKindString() {
-        return "BigQueryIO.Read";
-      }
-
       static {
         DirectPipelineRunner.registerDefaultTransformEvaluator(
             Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
@@ -724,11 +719,6 @@ protected Coder<Void> getDefaultOutputCoder() {
         return VoidCoder.of();
       }
 
-      @Override
-      protected String getKindString() {
-        return "BigQueryIO.Write";
-      }
-
       static {
         DirectPipelineRunner.registerDefaultTransformEvaluator(
             Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 19f7299dc445b..58233d3e57c13 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -543,11 +543,6 @@ protected Coder<T> getDefaultOutputCoder() {
         return coder;
       }
 
-      @Override
-      protected String getKindString() {
-        return "PubsubIO.Read";
-      }
-
       public PubsubTopic getTopic() {
         return topic;
       }
@@ -808,11 +803,6 @@ protected Coder<Void> getDefaultOutputCoder() {
         return VoidCoder.of();
       }
 
-      @Override
-      protected String getKindString() {
-        return "PubsubIO.Write";
-      }
-
       public PubsubTopic getTopic() {
         return topic;
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 89e80f1234490..f77e0f30e28c9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -295,11 +295,6 @@ protected Coder<T> getDefaultOutputCoder() {
         return coder;
       }
 
-      @Override
-      protected String getKindString() {
-        return "TextIO.Read";
-      }
-
       public String getFilepattern() {
         return filepattern;
       }
@@ -598,11 +593,6 @@ protected Coder<Void> getDefaultOutputCoder() {
         return VoidCoder.of();
       }
 
-      @Override
-      protected String getKindString() {
-        return "TextIO.Write";
-      }
-
       public String getFilenamePrefix() {
         return filenamePrefix;
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index b2946795474ee..3666c406753d2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -398,6 +398,11 @@ public PDone apply(PCollection<T> input) {
       throw new UnsupportedOperationException(
           "The Write transform is not supported by the Dataflow streaming runner.");
     }
+
+    @Override
+    protected String getKindString() {
+      return "StreamingWrite";
+    }
   }
 
   /**
@@ -429,6 +434,11 @@ public PubsubIO.Write.Bound<T> getOverriddenTransform() {
     public PDone apply(PCollection<T> input) {
       return PDone.in(input.getPipeline());
     }
+
+    @Override
+    protected String getKindString() {
+      return "StreamingPubsubIOWrite";
+    }
   }
 
   /**
@@ -511,6 +521,11 @@ public PCollection<T> apply(PInput input) {
             + "Please set a coder by invoking Create.withCoder() explicitly.", e);
       }
     }
+
+    @Override
+    protected String getKindString() {
+      return "StreamingCreate";
+    }
   }
 
   /**
@@ -536,6 +551,11 @@ public PCollectionView<Map<K, V>> apply(PCollection<KV<K, V>> input) {
           .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, input.getCoder())))
           .apply(View.CreatePCollectionView.<KV<K, V>, Map<K, V>>of(view));
     }
+
+    @Override
+    protected String getKindString() {
+      return "StreamingViewAsMap";
+    }
   }
 
   /**
@@ -564,6 +584,11 @@ public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
           .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, input.getCoder())))
           .apply(View.CreatePCollectionView.<KV<K, V>, Map<K, Iterable<V>>>of(view));
     }
+
+    @Override
+    protected String getKindString() {
+      return "StreamingViewAsMultimap";
+    }
   }
 
   /**
@@ -593,6 +618,11 @@ public PCollectionView<Iterable<T>> apply(PCollection<T> input) {
       Combine.globally(new View.Concatenate<T>()).asSingletonView();
       return input.apply(concatAndView);
     }
+
+    @Override
+    protected String getKindString() {
+      return "StreamingViewAsIterable";
+    }
   }
 
   private static class WrapAsList<T> extends DoFn<T, List<T>> {
@@ -630,6 +660,11 @@ public PCollectionView<T> apply(PCollection<T> input) {
           .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, input.getCoder())))
           .apply(View.CreatePCollectionView.<T, T>of(view));
     }
+
+    @Override
+    protected String getKindString() {
+      return "StreamingViewAsSingleton";
+    }
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
index 0af85ace079cb..331a077c8b76a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
@@ -198,11 +198,6 @@ public PCollection<Long> apply(PCollection<T> input) {
           Combine.globally(
               new ApproximateUniqueCombineFn<>(sampleSize, coder)));
     }
-
-    @Override
-    protected String getKindString() {
-      return "ApproximateUnique.Globally";
-    }
   }
 
   /**
@@ -258,11 +253,6 @@ public PCollection<KV<K, Long>> apply(PCollection<KV<K, V>> input) {
           Combine.perKey(new ApproximateUniqueCombineFn<>(
               sampleSize, coder).<K>asKeyedFn()));
     }
-
-    @Override
-    protected String getKindString() {
-      return "ApproximateUnique.PerKey";
-    }
   }
 
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 028c2efd06b04..a511325ccc28f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -98,7 +98,7 @@ public static <V> Globally<V, V> globally(
    */
   public static <InputT, OutputT> Globally<InputT, OutputT> globally(
       CombineFn<? super InputT, ?, OutputT> fn) {
-    return new Globally<>("Globally", fn, true, 0);
+    return new Globally<>(fn, true, 0);
   }
 
   /**
@@ -161,7 +161,7 @@ public static <K, InputT, OutputT> PerKey<K, InputT, OutputT> perKey(
    */
   public static <K, InputT, OutputT> PerKey<K, InputT, OutputT> perKey(
       KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
-    return new PerKey<>("PerKey", fn, false /*fewKeys*/);
+    return new PerKey<>(fn, false /*fewKeys*/);
   }
 
   /**
@@ -170,7 +170,7 @@ public static <K, InputT, OutputT> PerKey<K, InputT, OutputT> perKey(
    */
   private static <K, InputT, OutputT> PerKey<K, InputT, OutputT> fewKeys(
       KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
-    return new PerKey<>("PerKey", fn, true /*fewKeys*/);
+    return new PerKey<>(fn, true /*fewKeys*/);
   }
 
   /**
@@ -1208,6 +1208,12 @@ public static class Globally<InputT, OutputT>
     private final boolean insertDefault;
     private final int fanout;
 
+    private Globally(CombineFn<? super InputT, ?, OutputT> fn, boolean insertDefault, int fanout) {
+      this.fn = fn;
+      this.insertDefault = insertDefault;
+      this.fanout = fanout;
+    }
+
     private Globally(
         String name, CombineFn<? super InputT, ?, OutputT> fn, boolean insertDefault, int fanout) {
       super(name);
@@ -1302,11 +1308,6 @@ public void processElement(DoFn<Void, OutputT>.ProcessContext c) {
               }).withSideInputs(maybeEmptyView))
           .setCoder(maybeEmpty.getCoder());
     }
-
-    @Override
-    protected String getKindString() {
-      return "Combine.Globally";
-    }
   }
 
   /**
@@ -1362,7 +1363,6 @@ private GloballyAsSingletonView(
       this.fanout = fanout;
     }
 
-
     @Override
     public PCollectionView<OutputT> apply(PCollection<InputT> input) {
       PCollection<OutputT> combined = input
@@ -1375,11 +1375,6 @@ public PCollectionView<OutputT> apply(PCollection<InputT> input) {
         return combined.apply(View.<OutputT>asSingleton());
       }
     }
-
-    @Override
-    protected String getKindString() {
-      return "Combine.GloballyAsSingletonView";
-    }
   }
 
   /**
@@ -1489,6 +1484,11 @@ public static class PerKey<K, InputT, OutputT>
     private final transient KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn;
     private final boolean fewKeys;
 
+    private PerKey(KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn, boolean fewKeys) {
+      this.fn = fn;
+      this.fewKeys = fewKeys;
+    }
+
     private PerKey(
         String name, KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn,
         boolean fewKeys) {
@@ -1548,11 +1548,6 @@ public PCollection<KV<K, OutputT>> apply(PCollection<KV<K, InputT>> input) {
         .apply(GroupByKey.<K, InputT>create(fewKeys))
         .apply(Combine.<K, InputT, OutputT>groupedValues(fn));
     }
-
-    @Override
-    protected String getKindString() {
-      return "Combine.PerKey";
-    }
   }
 
   /**
@@ -1698,11 +1693,6 @@ public void processElement(ProcessContext c) {
       return PCollectionList.of(combinedHot).and(combinedCold)
           .apply(Flatten.<KV<K, OutputT>>pCollections());
     }
-
-    @Override
-    protected String getKindString() {
-      return "Combine.PerKey";
-    }
   }
 
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
index 1dc42892f1067..e4ef0910c562c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
@@ -166,10 +166,5 @@ public void processElement(ProcessContext c) {
                    }))
           .apply(Sum.<T>longsPerKey());
     }
-
-    @Override
-    public String getKindString() {
-      return "Count.PerElement";
-    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index b0ebb983217d4..040b5b8cc5590 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -309,15 +309,10 @@ public Coder<T> getDefaultOutputCoder(PInput input) throws CannotProvideCoderExc
      *
      * <p> The arguments should not be modified after this is called.
      */
-    private Values(String name, Iterable<T> elems, Optional<Coder<T>> coder) {
-      super(name);
+    private Values(Iterable<T> elems, Optional<Coder<T>> coder) {
       this.elems = elems;
       this.coder = coder;
     }
-
-    private Values(Iterable<T> elems, Optional<Coder<T>> coder) {
-      this("CreateValues", elems, coder);
-    }
   }
 
   /////////////////////////////////////////////////////////////////////////////
@@ -368,7 +363,7 @@ public PCollection<T> apply(PInput input) {
 
     private TimestampedValues(Iterable<TimestampedValue<T>> elems,
         Optional<Coder<T>> coder) {
-      super("CreateTimestampedValues",
+      super(
           Iterables.transform(elems, new Function<TimestampedValue<T>, T>() {
             @Override
             public T apply(TimestampedValue<T> input) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
index c1cfe3d3bbcbe..b8b87c9ba9e13 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
@@ -49,7 +49,7 @@ public class Max {
    * {@code Integer.MIN_VALUE} if there are no elements.
    */
   public static Combine.Globally<Integer, Integer> integersGlobally() {
-    return Combine.globally(new MaxIntegerFn()).named("Max");
+    return Combine.globally(new MaxIntegerFn()).named("Max.Globally");
   }
 
   /**
@@ -74,7 +74,7 @@ public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() {
    * {@code Long.MIN_VALUE} if there are no elements.
    */
   public static Combine.Globally<Long, Long> longsGlobally() {
-    return Combine.globally(new MaxLongFn()).named("Max");
+    return Combine.globally(new MaxLongFn()).named("Max.Globally");
   }
 
   /**
@@ -99,7 +99,7 @@ public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
    * {@code Double.NEGATIVE_INFINITY} if there are no elements.
    */
   public static Combine.Globally<Double, Double> doublesGlobally() {
-    return Combine.globally(new MaxDoubleFn()).named("Max");
+    return Combine.globally(new MaxDoubleFn()).named("Max.Globally");
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
index 27e23d9ce7a69..be23dc6923acb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
@@ -59,7 +59,7 @@ public class Mean {
    * @param <NumT> the type of the {@code Number}s being combined
    */
   public static <NumT extends Number> Combine.Globally<NumT, Double> globally() {
-    return Combine.<NumT, Double>globally(new MeanFn<>()).named("Mean");
+    return Combine.<NumT, Double>globally(new MeanFn<>()).named("Mean.Globally");
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
index 6230f93e7f9f0..7ee16e29bf283 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
@@ -49,7 +49,7 @@ public class Min {
    * {@code Integer.MAX_VALUE} if there are no elements.
    */
   public static Combine.Globally<Integer, Integer> integersGlobally() {
-    return Combine.globally(new MinIntegerFn()).named("Min");
+    return Combine.globally(new MinIntegerFn()).named("Min.Globally");
   }
 
   /**
@@ -74,7 +74,7 @@ public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() {
    * {@code Long.MAX_VALUE} if there are no elements.
    */
   public static Combine.Globally<Long, Long> longsGlobally() {
-    return Combine.globally(new MinLongFn()).named("Min");
+    return Combine.globally(new MinLongFn()).named("Min.Globally");
   }
 
   /**
@@ -99,7 +99,7 @@ public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
    * {@code Double.POSITIVE_INFINITY} if there are no elements.
    */
   public static Combine.Globally<Double, Double> doublesGlobally() {
-    return Combine.globally(new MinDoubleFn()).named("Min");
+    return Combine.globally(new MinDoubleFn()).named("Min.Globally");
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
index 34b34cbf0894e..380fe82f4672b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
@@ -208,7 +208,7 @@ public void validate(InputT input) { }
    * <p> This name is provided by the transform creator and is not required to be unique.
    */
   public String getName() {
-    return name != null ? name : getDefaultName();
+    return name != null ? name : getKindString();
   }
 
   /////////////////////////////////////////////////////////////////////////////
@@ -233,34 +233,28 @@ protected PTransform(String name) {
 
   @Override
   public String toString() {
-    return getName() + " [" + getKindString() + "]";
+    if (name == null) {
+      return getKindString();
+    } else {
+      return getName() + " [" + getKindString() + "]";
+    }
   }
 
   /**
    * Returns the name to use by default for this {@code PTransform}
    * (not including the names of any enclosing {@code PTransform}s).
    *
-   * <p> By default, returns {@link #getKindString}.
+   * <p> By default, returns the base name of this {@code PTransform}'s class.
    *
    * <p> The caller is responsible for ensuring that names of applied
    * {@code PTransform}s are unique, e.g., by adding a uniquifying
    * suffix when needed.
    */
-  protected String getDefaultName() {
-    return getKindString();
-  }
-
-  /**
-   * Returns a string describing what kind of {@code PTransform} this is.
-   *
-   * <p> By default, returns the base name of this
-   * {@code PTransform}'s class.
-   */
   protected String getKindString() {
     if (getClass().isAnonymousClass()) {
       return "AnonymousTransform";
     } else {
-      return StringUtils.approximateSimpleName(getClass());
+      return StringUtils.approximatePTransformName(getClass());
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 02aa8e68e8a67..f4f0ec6c32b5e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -745,20 +745,15 @@ protected Coder<OutputT> getDefaultOutputCoder(PCollection<? extends InputT> inp
     }
 
     @Override
-    protected String getDefaultName() {
+    protected String getKindString() {
       Class<?> clazz = DoFnReflector.getDoFnClass(fn);
       if (clazz.isAnonymousClass()) {
         return "AnonymousParDo";
       } else {
-        return StringUtils.approximateSimpleName(clazz);
+        return String.format("ParDo(%s)", StringUtils.approximateSimpleName(clazz));
       }
     }
 
-    @Override
-    protected String getKindString() {
-      return "ParDo";
-    }
-
     public DoFn<InputT, OutputT> getFn() {
       return fn;
     }
@@ -969,19 +964,15 @@ public <T> Coder<T> getDefaultOutputCoder(
     }
 
     @Override
-    protected String getDefaultName() {
+    protected String getKindString() {
+      Class<?> clazz = DoFnReflector.getDoFnClass(fn);
       if (fn.getClass().isAnonymousClass()) {
-        return "AnonymousParDo";
+        return "AnonymousParMultiDo";
       } else {
-        return StringUtils.approximateSimpleName(fn.getClass());
+        return String.format("ParMultiDo(%s)", StringUtils.approximateSimpleName(clazz));
       }
     }
 
-    @Override
-    protected String getKindString() {
-      return "ParMultiDo";
-    }
-
     public DoFn<InputT, OutputT> getFn() {
       return fn;
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
index 84b5e701f7471..c05eba8b75a87 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
@@ -49,7 +49,7 @@ public class Sum {
    * {@code 0} if there are no elements.
    */
   public static Combine.Globally<Integer, Integer> integersGlobally() {
-    return Combine.globally(new SumIntegerFn()).named("Sum");
+    return Combine.globally(new SumIntegerFn()).named("Sum.Globally");
   }
 
   /**
@@ -72,7 +72,7 @@ public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() {
    * {@code 0} if there are no elements.
    */
   public static Combine.Globally<Long, Long> longsGlobally() {
-    return Combine.globally(new SumLongFn()).named("Sum");
+    return Combine.globally(new SumLongFn()).named("Sum.Globally");
   }
 
   /**
@@ -95,7 +95,7 @@ public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
    * {@code 0} if there are no elements.
    */
   public static Combine.Globally<Double, Double> doublesGlobally() {
-    return Combine.globally(new SumDoubleFn()).named("Sum");
+    return Combine.globally(new SumDoubleFn()).named("Sum.Globally");
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index 7be7503f1aa36..0b858d6ae2b06 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -83,7 +83,7 @@ public class Top {
    */
   public static <T, ComparatorT extends Comparator<T> & Serializable>
       Combine.Globally<T, List<T>> of(int count, ComparatorT compareFn) {
-    return Combine.globally(new TopCombineFn<>(count, compareFn)).named("Top");
+    return Combine.globally(new TopCombineFn<>(count, compareFn)).named("Top.Globally");
   }
 
   /**
@@ -122,7 +122,8 @@ Combine.Globally<T, List<T>> of(int count, ComparatorT compareFn) {
    */
   public static <T extends Comparable<T>>
       Combine.Globally<T, List<T>> smallest(int count) {
-    return Combine.globally(new TopCombineFn<>(count, new Smallest<T>())).named("Top.Smallest");
+    return Combine.globally(new TopCombineFn<>(count, new Smallest<T>()))
+        .named("Smallest.Globally");
   }
 
   /**
@@ -161,7 +162,8 @@ Combine.Globally<T, List<T>> smallest(int count) {
    */
   public static <T extends Comparable<T>>
       Combine.Globally<T, List<T>> largest(int count) {
-    return Combine.globally(new TopCombineFn<>(count, new Largest<T>())).named("Top.Largest");
+    return Combine.globally(new TopCombineFn<>(count, new Largest<T>()))
+.named("Largest.Globally");
   }
 
   /**
@@ -256,7 +258,8 @@ Combine.Globally<T, List<T>> largest(int count) {
       PTransform<PCollection<KV<K, V>>, PCollection<KV<K, List<V>>>>
       smallestPerKey(int count) {
     return Combine.perKey(
-        new TopCombineFn<>(count, new Smallest<V>()).<K>asKeyedFn()).named("Top.SmallestPerKey");
+new TopCombineFn<>(count, new Smallest<V>()).<K>asKeyedFn())
+        .named("Smallest.PerKey");
   }
 
   /**
@@ -303,7 +306,8 @@ Combine.Globally<T, List<T>> largest(int count) {
       PerKey<K, V, List<V>>
       largestPerKey(int count) {
     return Combine.perKey(
-        new TopCombineFn<>(count, new Largest<V>()).<K>asKeyedFn()).named("Top.LargestPerKey");
+new TopCombineFn<>(count, new Largest<V>()).<K>asKeyedFn())
+        .named("Largest.PerKey");
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
index 4ccc08b99394e..201068830f08a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 
@@ -102,43 +103,80 @@ public static byte[] jsonStringToByteArray(String string) {
   private static final Pattern NAMED_INNER_CLASS =
       Pattern.compile(".+\\$(?<INNER>[^0-9].*)");
 
+  private static final String ANONYMOUS_CLASS_REGEX = "\\$[0-9]+\\$";
+
   /**
    * Returns a simple name for a class.
    *
    * <p> Note: this is non-invertible - the name may be simplified to an
    * extent that it cannot be mapped back to the original class.
    *
-   * <p> This can be used to generate human-readable transform names.  It
-   * removes the package from the name, and removes common suffixes.
+   * <p> This can be used to generate human-readable names. It
+   * removes the package and outer classes from the name,
+   * and removes common suffixes.
    *
    * <p> Examples:
    * <ul>
-   *   <li>{@code some.package.WordSummaryDoFn} -> "WordSummary"
+   *   <li>{@code some.package.Word.SummaryDoFn} -> "Summary"
    *   <li>{@code another.package.PairingFn} -> "Pairing"
    * </ul>
    *
    * @throws IllegalArgumentException if the class is anonymous
    */
   public static String approximateSimpleName(Class<?> clazz) {
+    return approximateSimpleName(clazz, /* dropOuterClassNames */ true);
+  }
+
+  /**
+   * Returns a name for a PTransform class.
+   *
+   * <p> This can be used to generate human-readable transform names. It
+   * removes the package from the name, and removes common suffixes.
+   *
+   * <p> It is different than approximateSimpleName:
+   * <ul>
+   *   <li>1. It keeps the outer classes names.
+   *   <li>2. It removes the common transform inner class: "Bound".
+   * </ul>
+   *
+   * <p> Examples:
+   * <ul>
+   *   <li>{@code some.package.Word.Summary} -> "Word.Summary"
+   *   <li>{@code another.package.Pairing.Bound} -> "Pairing"
+   * </ul>
+   */
+  public static String approximatePTransformName(Class<?> clazz) {
+    Preconditions.checkArgument(PTransform.class.isAssignableFrom(clazz));
+    return approximateSimpleName(clazz, /* dropOuterClassNames */ false)
+        .replaceFirst("\\.Bound$", "");
+  }
+
+  private static String approximateSimpleName(Class<?> clazz, boolean dropOuterClassNames) {
     Preconditions.checkArgument(!clazz.isAnonymousClass(),
         "Attempted to get simple name of anonymous class");
 
     String fullName = clazz.getName();
     String shortName = fullName.substring(fullName.lastIndexOf('.') + 1);
 
-    // Simplify inner class name by dropping outer class prefixes.
-    Matcher m = NAMED_INNER_CLASS.matcher(shortName);
-    if (m.matches()) {
-      shortName = m.group("INNER");
-    }
-
     // Drop common suffixes for each named component.
     String[] names = shortName.split("\\$");
     for (int i = 0; i < names.length; i++) {
       names[i] = simplifyNameComponent(names[i]);
     }
+    shortName = Joiner.on('$').join(names);
 
-    return Joiner.on('$').join(names);
+    if (dropOuterClassNames) {
+      // Simplify inner class name by dropping outer class prefixes.
+      Matcher m = NAMED_INNER_CLASS.matcher(shortName);
+      if (m.matches()) {
+        shortName = m.group("INNER");
+      }
+    } else {
+      // Dropping anonymous outer classes
+      shortName = shortName.replaceAll(ANONYMOUS_CLASS_REGEX, ".");
+      shortName = shortName.replaceAll("\\$", ".");
+    }
+    return shortName;
   }
 
   private static String simplifyNameComponent(String name) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
index 2fca0c3a9ddb6..eeaba5e8a5ce8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
@@ -122,10 +122,9 @@ public AccumulationMode getMode() {
 
   @Override
   public String toString() {
-    return String.format("%s, %s, %s, %s",
+    return String.format("%s, %s, %s",
         StringUtils.approximateSimpleName(windowFn.getClass()),
         trigger.toString(),
-        mode.toString(),
-        allowedLateness);
+        mode.toString());
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
index 6ff6c96d8beb5..35878f5d7b2f8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
@@ -377,6 +377,16 @@ public void testWriteWithoutValidationFlag() throws Exception {
     assertFalse(write.withoutValidation().needsValidation());
   }
 
+  @Test
+  public void testAvroIOGetName() {
+    assertEquals("AvroIO.Read", AvroIO.Read.from("gs://bucket/foo*/baz").getName());
+    assertEquals("AvroIO.Write", AvroIO.Write.to("gs://bucket/foo/baz").getName());
+    assertEquals("ReadMyFile",
+        AvroIO.Read.named("ReadMyFile").from("gs://bucket/foo*/baz").getName());
+    assertEquals("WriteMyFile",
+        AvroIO.Write.named("WriteMyFile").to("gs://bucket/foo/baz").getName());
+  }
+
   // TODO: for Write only, test withSuffix, withNumShards,
   // withShardNameTemplate and withoutSharding.
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
index e78bd78f8805a..8874cebe1ada8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
@@ -41,8 +41,6 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-import java.io.IOException;
-
 /**
  * Tests for BigQueryIO.
  */
@@ -93,14 +91,14 @@ public void setUp() {
   }
 
   @Test
-  public void testBuildSource() throws IOException {
+  public void testBuildSource() {
     BigQueryIO.Read.Bound bound = BigQueryIO.Read.named("ReadMyTable")
         .from("foo.com:project:somedataset.sometable");
     checkReadObject(bound, "foo.com:project", "somedataset", "sometable");
   }
 
   @Test
-  public void testBuildSourcewithoutValidation() throws IOException {
+  public void testBuildSourcewithoutValidation() {
     // This test just checks that using withoutValidation will not trigger object
     // construction errors.
     BigQueryIO.Read.Bound bound = BigQueryIO.Read.named("ReadMyTable")
@@ -109,14 +107,14 @@ public void testBuildSourcewithoutValidation() throws IOException {
   }
 
   @Test
-  public void testBuildSourceWithDefaultProject() throws IOException {
+  public void testBuildSourceWithDefaultProject() {
     BigQueryIO.Read.Bound bound = BigQueryIO.Read.named("ReadMyTable")
         .from("somedataset.sometable");
     checkReadObject(bound, null, "somedataset", "sometable");
   }
 
   @Test
-  public void testBuildSourceWithTableReference() throws IOException {
+  public void testBuildSourceWithTableReference() {
     TableReference table = new TableReference()
         .setProjectId("foo.com:project")
         .setDatasetId("somedataset")
@@ -127,13 +125,13 @@ public void testBuildSourceWithTableReference() throws IOException {
   }
 
   @Test(expected = IllegalStateException.class)
-  public void testBuildSourceWithoutTable() throws IOException {
+  public void testBuildSourceWithoutTable() {
     Pipeline p = TestPipeline.create();
     p.apply(BigQueryIO.Read.named("ReadMyTable"));
   }
 
   @Test
-  public void testBuildSink() throws IOException {
+  public void testBuildSink() {
     BigQueryIO.Write.Bound bound = BigQueryIO.Write.named("WriteMyTable")
         .to("foo.com:project:somedataset.sometable");
     checkWriteObject(
@@ -142,7 +140,7 @@ public void testBuildSink() throws IOException {
   }
 
   @Test
-  public void testBuildSinkwithoutValidation() throws IOException {
+  public void testBuildSinkwithoutValidation() {
     // This test just checks that using withoutValidation will not trigger object
     // construction errors.
     BigQueryIO.Write.Bound bound = BigQueryIO.Write.named("WriteMyTable")
@@ -153,7 +151,7 @@ public void testBuildSinkwithoutValidation() throws IOException {
   }
 
   @Test
-  public void testBuildSinkDefaultProject() throws IOException {
+  public void testBuildSinkDefaultProject() {
     BigQueryIO.Write.Bound bound = BigQueryIO.Write.named("WriteMyTable")
         .to("somedataset.sometable");
     checkWriteObject(
@@ -162,7 +160,7 @@ public void testBuildSinkDefaultProject() throws IOException {
   }
 
   @Test
-  public void testBuildSinkWithTableReference() throws IOException {
+  public void testBuildSinkWithTableReference() {
     TableReference table = new TableReference()
         .setProjectId("foo.com:project")
         .setDatasetId("somedataset")
@@ -175,14 +173,14 @@ public void testBuildSinkWithTableReference() throws IOException {
   }
 
   @Test(expected = IllegalStateException.class)
-  public void testBuildSinkWithoutTable() throws IOException {
+  public void testBuildSinkWithoutTable() {
     Pipeline p = TestPipeline.create();
     p.apply(Create.<TableRow>of().withCoder(TableRowJsonCoder.of()))
         .apply(BigQueryIO.Write.named("WriteMyTable"));
   }
 
   @Test
-  public void testBuildSinkWithSchema() throws IOException {
+  public void testBuildSinkWithSchema() {
     TableSchema schema = new TableSchema();
     BigQueryIO.Write.Bound bound = BigQueryIO.Write.named("WriteMyTable")
         .to("foo.com:project:somedataset.sometable").withSchema(schema);
@@ -192,7 +190,7 @@ public void testBuildSinkWithSchema() throws IOException {
   }
 
   @Test
-  public void testBuildSinkWithCreateDispositionNever() throws IOException {
+  public void testBuildSinkWithCreateDispositionNever() {
     BigQueryIO.Write.Bound bound = BigQueryIO.Write.named("WriteMyTable")
         .to("foo.com:project:somedataset.sometable")
         .withCreateDisposition(CreateDisposition.CREATE_NEVER);
@@ -202,7 +200,7 @@ public void testBuildSinkWithCreateDispositionNever() throws IOException {
   }
 
   @Test
-  public void testBuildSinkWithCreateDispositionIfNeeded() throws IOException {
+  public void testBuildSinkWithCreateDispositionIfNeeded() {
     BigQueryIO.Write.Bound bound = BigQueryIO.Write.named("WriteMyTable")
         .to("foo.com:project:somedataset.sometable")
         .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED);
@@ -212,7 +210,7 @@ public void testBuildSinkWithCreateDispositionIfNeeded() throws IOException {
   }
 
   @Test
-  public void testBuildSinkWithWriteDispositionTruncate() throws IOException {
+  public void testBuildSinkWithWriteDispositionTruncate() {
     BigQueryIO.Write.Bound bound = BigQueryIO.Write.named("WriteMyTable")
         .to("foo.com:project:somedataset.sometable")
         .withWriteDisposition(WriteDisposition.WRITE_TRUNCATE);
@@ -222,7 +220,7 @@ public void testBuildSinkWithWriteDispositionTruncate() throws IOException {
   }
 
   @Test
-  public void testBuildSinkWithWriteDispositionAppend() throws IOException {
+  public void testBuildSinkWithWriteDispositionAppend() {
     BigQueryIO.Write.Bound bound = BigQueryIO.Write.named("WriteMyTable")
         .to("foo.com:project:somedataset.sometable")
         .withWriteDisposition(WriteDisposition.WRITE_APPEND);
@@ -232,7 +230,7 @@ public void testBuildSinkWithWriteDispositionAppend() throws IOException {
   }
 
   @Test
-  public void testBuildSinkWithWriteDispositionEmpty() throws IOException {
+  public void testBuildSinkWithWriteDispositionEmpty() {
     BigQueryIO.Write.Bound bound = BigQueryIO.Write.named("WriteMyTable")
         .to("foo.com:project:somedataset.sometable")
         .withWriteDisposition(WriteDisposition.WRITE_EMPTY);
@@ -304,4 +302,12 @@ public void testCoder_nullCell() throws CoderException {
 
     Assert.assertArrayEquals(bytes, newBytes);
   }
+
+  @Test
+  public void testBigQueryIOGetName() {
+    assertEquals("BigQueryIO.Read", BigQueryIO.Read.from("somedataset.sometable").getName());
+    assertEquals("BigQueryIO.Write", BigQueryIO.Write.to("somedataset.sometable").getName());
+    assertEquals("ReadMyTable", BigQueryIO.Read.named("ReadMyTable").getName());
+    assertEquals("WriteMyTable", BigQueryIO.Write.named("WriteMyTable").getName());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/PubsubIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/PubsubIOTest.java
new file mode 100644
index 0000000000000..46ea7a1cff0a2
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/PubsubIOTest.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for PubsubIO Read and Write transforms.
+ */
+@RunWith(JUnit4.class)
+public class PubsubIOTest {
+
+  @Test
+  public void testPubsubIOGetName() {
+    assertEquals("PubsubIO.Read",
+        PubsubIO.Read.topic("projects/myproject/topics/mytopic").getName());
+    assertEquals("PubsubIO.Write",
+        PubsubIO.Write.topic("projects/myproject/topics/mytopic").getName());
+    assertEquals("ReadMyTopic",
+        PubsubIO.Read.named("ReadMyTopic").topic("projects/myproject/topics/mytopic").getName());
+    assertEquals("WriteMyTopic",
+        PubsubIO.Write.named("WriteMyTopic").topic("projects/myproject/topics/mytopic").getName());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
index 695595d6abcca..abd8333bc42db 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -459,4 +459,16 @@ public void testGZIPReadWhenUncompressed() throws Exception {
 
     tmpFile.delete();
   }
+
+  @Test
+  public void testTextIOGetName() {
+    assertEquals("TextIO.Read", TextIO.Read.from("somefile").getName());
+    assertEquals("TextIO.Write", TextIO.Write.to("somefile").getName());
+    assertEquals("ReadMyFile", TextIO.Read.named("ReadMyFile").from("somefile").getName());
+    assertEquals("WriteMyFile", TextIO.Write.named("WriteMyFile").to("somefile").getName());
+
+    assertEquals("TextIO.Read", TextIO.Read.from("somefile").toString());
+    assertEquals(
+        "ReadMyFile [TextIO.Read]", TextIO.Read.named("ReadMyFile").from("somefile").toString());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
index f9a158a094d34..c52cc31248521 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
@@ -191,6 +191,12 @@ public void testApproximateUniqueWithDifferentSampleSizes() {
     }
   }
 
+  @Test
+  public void testApproximateUniqueGetName() {
+    assertEquals("ApproximateUnique.PerKey", ApproximateUnique.<Long, Long>perKey(16).getName());
+    assertEquals("ApproximateUnique.Globally", ApproximateUnique.<Integer>globally(16).getName());
+  }
+
   /**
    * Applies {@code ApproximateUnique(sampleSize)} verifying that the estimation
    * error falls within the maximum allowed error of {@code 2/sqrt(sampleSize)}.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 4cd2a297f336c..8bbf0c2794d8b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 import static com.google.cloud.dataflow.sdk.TestUtils.checkCombineFn;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 
 import com.google.api.client.util.Preconditions;
@@ -424,6 +425,23 @@ public void processElement(ProcessContext c) {
     p.run();
   }
 
+  @Test
+  public void testCombineGetName() {
+    assertEquals("Combine.Globally", Combine.globally(new SumInts()).getName());
+    assertEquals(
+        "MyCombineGlobally", Combine.globally(new SumInts()).named("MyCombineGlobally").getName());
+    assertEquals(
+        "Combine.GloballyAsSingletonView",
+        Combine.globally(new SumInts()).asSingletonView().getName());
+    assertEquals("Combine.PerKey", Combine.perKey(new TestKeyedCombineFn()).getName());
+    assertEquals(
+        "MyCombinePerKey",
+        Combine.perKey(new TestKeyedCombineFn()).named("MyCombinePerKey").getName());
+    assertEquals(
+        "Combine.PerKeyWithHotKeyFanout",
+        Combine.perKey(new TestKeyedCombineFn()).withHotKeyFanout(10).getName());
+  }
+
   ////////////////////////////////////////////////////////////////////////////
   // Test classes, for different kinds of combining fns.
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java
index 0bd94b5e34d54..1b84e75607986 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES;
+import static org.junit.Assert.assertEquals;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
@@ -111,4 +112,10 @@ public void testCountGloballyEmpty() {
         .containsInAnyOrder(0L);
     p.run();
   }
+
+  @Test
+  public void testCountGetName() {
+    assertEquals("Count.PerElement", Count.perElement().getName());
+    assertEquals("Count.Globally", Count.globally().getName());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
index 52108de121606..36107824df58c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
@@ -45,6 +45,7 @@
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 
 /**
@@ -235,4 +236,10 @@ public void testCreateWithKVVoidType() throws Exception {
 
     p.run();
   }
+
+  @Test
+  public void testCreateGetName() {
+    assertEquals("Create.Values", Create.of(1, 2, 3).getName());
+    assertEquals("Create.TimestampedValues", Create.timestamped(Collections.EMPTY_LIST).getName());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
index 86a980bbd97c1..951f12cd01574 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
@@ -308,6 +308,11 @@ public void testIncompatibleWindowFnPropagationFailure() {
     }
   }
 
+  @Test
+  public void testFlattenGetName() {
+    Assert.assertEquals("Flatten.FlattenIterables", Flatten.<String>iterables().getName());
+    Assert.assertEquals("Flatten.FlattenPCollectionList", Flatten.<String>pCollections().getName());
+  }
 
   /////////////////////////////////////////////////////////////////////////////
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
index 23486906cda2a..6033033376a1b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
@@ -277,4 +277,9 @@ public void testRemerge() {
         middle.getWindowingStrategy().getWindowFn().isCompatible(
             Sessions.withGapDuration(Duration.standardMinutes(1))));
   }
+
+  @Test
+  public void testGroupByKeyGetName() {
+    Assert.assertEquals("GroupByKey", GroupByKey.<String, Integer>create().getName());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java
index fb6da8e81d8c6..0a4d535b699e5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java
@@ -173,6 +173,13 @@ public void testExceptionHandlingOnLastElement() {
     Assert.assertEquals(1, numFailures.get());
   }
 
+  @Test
+  public void testIntraBundleParallelizationGetName() {
+    Assert.assertEquals(
+        "IntraBundleParallelization",
+        IntraBundleParallelization.of(new DelayFn<Integer>()).withMaxParallelism(1).getName());
+  }
+
   private long run(int numElements, int maxParallelism, DoFn<Integer, Integer> doFn) {
     DirectPipeline p = DirectPipeline.createForTest();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MaxTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MaxTest.java
new file mode 100644
index 0000000000000..cd61c2302dee1
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MaxTest.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for Max.
+ */
+@RunWith(JUnit4.class)
+public class MaxTest {
+  @Test
+  public void testMeanGetNames() {
+    assertEquals("Max.Globally", Max.integersGlobally().getName());
+    assertEquals("Max.Globally", Max.doublesGlobally().getName());
+    assertEquals("Max.Globally", Max.longsGlobally().getName());
+    assertEquals("Max.PerKey", Max.integersPerKey().getName());
+    assertEquals("Max.PerKey", Max.doublesPerKey().getName());
+    assertEquals("Max.PerKey", Max.longsPerKey().getName());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MeanTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MeanTest.java
new file mode 100644
index 0000000000000..5a9f2bdbf9104
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MeanTest.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for Mean.
+ */
+@RunWith(JUnit4.class)
+public class MeanTest {
+  @Test
+  public void testMeanGetNames() {
+    assertEquals("Mean.Globally", Mean.globally().getName());
+    assertEquals("Mean.PerKey", Mean.perKey().getName());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MinTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MinTest.java
new file mode 100644
index 0000000000000..3c5b78d80eb75
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MinTest.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for Min.
+ */
+@RunWith(JUnit4.class)
+public class MinTest {
+  @Test
+  public void testMeanGetNames() {
+    assertEquals("Min.Globally", Min.integersGlobally().getName());
+    assertEquals("Min.Globally", Min.doublesGlobally().getName());
+    assertEquals("Min.Globally", Min.longsGlobally().getName());
+    assertEquals("Min.PerKey", Min.integersPerKey().getName());
+    assertEquals("Min.PerKey", Min.doublesPerKey().getName());
+    assertEquals("Min.PerKey", Min.longsPerKey().getName());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index d6b045c00c8fa..9dd2cd4256dc5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -586,7 +586,7 @@ public void testParDoWithErrorInFinishBatch() {
   }
 
   @Test
-  public void testParDoName() {
+  public void testParDoGetName() {
     Pipeline p = TestPipeline.create();
 
     PCollection<Integer> input =
@@ -597,7 +597,7 @@ public void testParDoName() {
       PCollection<String> output1 =
           input
           .apply(ParDo.of(new TestDoFn()));
-      assertEquals("Test.out", output1.getName());
+      assertEquals("ParDo(Test).out", output1.getName());
     }
 
     {
@@ -625,9 +625,15 @@ public void testParDoName() {
       PCollection<String> output5 =
           input
               .apply(ParDo.of(new StrangelyNamedDoer()));
-      assertEquals("StrangelyNamedDoer.out",
+      assertEquals("ParDo(StrangelyNamedDoer).out",
           output5.getName());
     }
+
+    assertEquals("ParDo(Printing)", ParDo.of(new PrintingDoFn()).getName());
+
+    assertEquals(
+        "ParMultiDo(SideOutputDummy)",
+        ParDo.of(new SideOutputDummyFn(null)).withOutputTags(null, null).getName());
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
index fc6a840fe35f0..064636958dac7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
@@ -130,4 +131,9 @@ public void testDroppedPartition() {
     DataflowAssert.that(output).containsInAnyOrder(2, 4, 5, 7, 8, 10, 11);
     p.run();
   }
+
+  @Test
+  public void testPartitionGetName() {
+    assertEquals("Partition", Partition.of(3, new ModFn()).getName());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
index cc336286a5902..82795f11c6ba9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
@@ -253,4 +253,9 @@ public void testPickAnyWhenEmpty() {
     runPickAnyTest(NO_LINES, 0);
     runPickAnyTest(NO_LINES, 1);
   }
+
+  @Test
+  public void testSampleGetName() {
+    assertEquals("Sample.SampleAny", Sample.<String>any(1).getName());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SumTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SumTest.java
new file mode 100644
index 0000000000000..634f4ef9c07b1
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SumTest.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for Sum.
+ */
+@RunWith(JUnit4.class)
+public class SumTest {
+
+  @Test
+  public void testSumGetNames() {
+    assertEquals("Sum.Globally", Sum.integersGlobally().getName());
+    assertEquals("Sum.Globally", Sum.doublesGlobally().getName());
+    assertEquals("Sum.Globally", Sum.longsGlobally().getName());
+    assertEquals("Sum.PerKey", Sum.integersPerKey().getName());
+    assertEquals("Sum.PerKey", Sum.doublesPerKey().getName());
+    assertEquals("Sum.PerKey", Sum.longsPerKey().getName());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
index 837f7e7807a5e..25cc3045b093a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
@@ -217,6 +218,16 @@ public void testTransformName() {
     assertThat(p.getFullNameForTesting(top), Matchers.startsWith("Top"));
   }
 
+  @Test
+  public void testTopGetNames() {
+    assertEquals("Top.Globally", Top.of(1, new OrderByLength()).getName());
+    assertEquals("Smallest.Globally", Top.smallest(1).getName());
+    assertEquals("Largest.Globally", Top.largest(2).getName());
+    assertEquals("Top.PerKey", Top.perKey(1, new IntegerComparator()).getName());
+    assertEquals("Smallest.PerKey", Top.<String, Integer>smallestPerKey(1).getName());
+    assertEquals("Largest.PerKey", Top.<String, Integer>largestPerKey(2).getName());
+  }
+
   // used by ApproximateQuantilesTest
   static class OrderByLength implements Comparator<String>, Serializable {
     private static final long serialVersionUID = 0L;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
index 95a824e7f7645..c641f0f092511 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import static org.junit.Assert.assertEquals;
+
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
@@ -84,4 +86,9 @@ public void testValuesEmpty() {
 
     p.run();
   }
+
+  @Test
+  public void testValuesGetName() {
+    assertEquals("Values", Values.<Integer>create().getName());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index 4b642f8ae658c..55ef7ca9dc98e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 import static org.hamcrest.CoreMatchers.isA;
+import static org.junit.Assert.assertEquals;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
@@ -405,4 +406,11 @@ public void processElement(ProcessContext c) {
     p.run();
   }
 
+  @Test
+  public void testViewGetName() {
+    assertEquals("View.AsSingleton", View.<Integer>asSingleton().getName());
+    assertEquals("View.AsIterable", View.<Integer>asIterable().getName());
+    assertEquals("View.AsMap", View.<String, Integer>asMap().getName());
+    assertEquals("View.AsMultimap", View.<String, Integer>asMultimap().getName());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
index 55c810b66c3b8..af0015e21eac2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import static org.junit.Assert.assertEquals;
+
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
@@ -92,6 +94,11 @@ public void testConstantKeys() {
     p.run();
   }
 
+  @Test
+  public void testWithKeysGetName() {
+    assertEquals("WithKeys", WithKeys.<Integer, String>of(100).getName());
+  }
+
   /**
    * Key a value by its length.
    */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java
index fdf93dee56801..c20c64b6a792c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java
@@ -74,4 +74,10 @@ public void testWindowIntoPropagatesLateness() {
 
     assertEquals(Duration.standardDays(1), strategy.getAllowedLateness());
   }
+
+  @Test
+  public void testWindowGetName() {
+    assertEquals("Window.Into(FixedWindows, DefaultTrigger, DISCARDING_FIRED_PANES)",
+        Window.<String>into(FixedWindows.of(Duration.standardMinutes(10))).getName());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java
index 2f1103b8de120..abb97805f0c03 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java
@@ -19,6 +19,11 @@
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.values.PBegin;
+import com.google.cloud.dataflow.sdk.values.PDone;
+
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
@@ -55,13 +60,29 @@ public void testTranscodeMixedByteArray() {
    */
   private class EmbeddedDoFn {
 
-    private class DeeperEmbeddedDoFn extends EmbeddedDoFn {}
+private class DeeperEmbeddedDoFn extends EmbeddedDoFn {}
 
     private EmbeddedDoFn getEmbedded() {
       return new DeeperEmbeddedDoFn();
     }
   }
 
+  private class EmbeddedPTransform extends PTransform<PBegin, PDone> {
+    private static final long serialVersionUID = 0;
+
+    private class Bound extends PTransform<PBegin, PDone> {
+      private static final long serialVersionUID = 0;
+    }
+
+    private Bound getBound() {
+      return new Bound();
+    }
+  }
+
+  private interface AnonymousClass {
+    Object getInnerClassInstance();
+  }
+
   @Test
   public void testSimpleName() {
     assertEquals("Embedded",
@@ -84,4 +105,36 @@ public void testNestedSimpleName() {
 
     assertEquals("DeeperEmbedded", StringUtils.approximateSimpleName(inner.getClass()));
   }
+
+  @Test
+  public void testPTransformName() {
+    EmbeddedPTransform transform = new EmbeddedPTransform();
+    assertEquals(
+        "StringUtilsTest.EmbeddedPTransform",
+        StringUtils.approximatePTransformName(transform.getClass()));
+    assertEquals(
+        "StringUtilsTest.EmbeddedPTransform",
+        StringUtils.approximatePTransformName(transform.getBound().getClass()));
+    assertEquals("TextIO.Write", StringUtils.approximatePTransformName(TextIO.Write.Bound.class));
+  }
+
+  @Test
+  public void testPTransformNameWithAnonOuterClass() throws Exception {
+    AnonymousClass anonymousClassObj = new AnonymousClass() {
+      class NamedInnerClass extends PTransform<PBegin, PDone> {
+        private static final long serialVersionUID = 0;
+      }
+
+      @Override
+      public Object getInnerClassInstance() {
+        return new NamedInnerClass();
+      }
+    };
+
+    assertEquals("NamedInnerClass",
+        StringUtils.approximateSimpleName(anonymousClassObj.getInnerClassInstance().getClass()));
+    assertEquals("StringUtilsTest.NamedInnerClass",
+        StringUtils.approximatePTransformName(
+            anonymousClassObj.getInnerClassInstance().getClass()));
+  }
 }

From 8839670d0b9f25170ad1c190a0d8b583f39c9dc0 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 24 Jun 2015 08:11:56 -0700
Subject: [PATCH 0667/1541] Remove time-dependent parts of ReadOperationTest

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96775824
---
 .../util/common/worker/ExecutorTestUtils.java |  6 +-
 .../util/common/worker/ReadOperationTest.java | 84 ++++++++++++-------
 2 files changed, 56 insertions(+), 34 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
index 2fdb430974288..44ac00a26abd9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
@@ -127,10 +127,10 @@ protected boolean sampleElement() {
 
   /** A {@code Reader<String>} that yields a specified set of values. */
   public static class TestReader extends Reader<String> {
-    List<String> inputs = new ArrayList<>();
+    private final List<String> inputs;
 
-    public void addInput(String... inputs) {
-      this.inputs.addAll(Arrays.asList(inputs));
+    public TestReader(String... inputs) {
+      this.inputs = Arrays.asList(inputs);
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
index 729915fa041aa..bd93056d8ca70 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -23,9 +23,18 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
 
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.api.services.dataflow.model.Position;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
@@ -33,8 +42,6 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReceiver;
 
-import org.hamcrest.CoreMatchers;
-import org.junit.Assert;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -48,11 +55,37 @@
  */
 @RunWith(JUnit4.class)
 public class ReadOperationTest {
+
+  private <T> void assertCounterKindAndContents(
+      CounterSet counterSet, String name, AggregationKind kind, T contents) {
+    @SuppressWarnings("unchecked")
+    Counter<T> counter = (Counter<T>) counterSet.getExistingCounter(name);
+    assertThat(counter.getKind(), equalTo(kind));
+    assertThat(counter.getAggregate(), equalTo(contents));
+  }
+
+  private <T> void assertCounterMean(
+      CounterSet counterSet, String name, long count, T aggregate) {
+    @SuppressWarnings("unchecked")
+    Counter<T> counter = (Counter<T>) counterSet.getExistingCounter(name);
+    assertThat(counter.getKind(), equalTo(MEAN));
+    assertThat(counter.getMean().getCount(), equalTo(count));
+    assertThat(counter.getMean().getAggregate(), equalTo(aggregate));
+  }
+
+  private void assertCounterKind(
+      CounterSet counterSet, String name, AggregationKind kind) {
+    assertThat(counterSet.getExistingCounter(name).getKind(), equalTo(kind));
+  }
+
+  /**
+   * Tests that a {@link ReadOperation} has expected counters, and that their
+   * values are reasonable.
+   */
   @Test
   @SuppressWarnings("unchecked")
   public void testRunReadOperation() throws Exception {
-    TestReader reader = new TestReader();
-    reader.addInput("hi", "there", "", "bob");
+    TestReader reader = new TestReader("hi", "there", "", "bob");
 
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
@@ -65,25 +98,14 @@ public void testRunReadOperation() throws Exception {
     readOperation.start();
     readOperation.finish();
 
-    Assert.assertThat(
-        receiver.outputElems, CoreMatchers.<Object>hasItems("hi", "there", "", "bob"));
-
-    Assert
-        .assertEquals(
-            new CounterSet(
-                Counter.longs("ReadOperation-ByteCount", SUM).resetToValue(2L + 5 + 0 + 3),
-                Counter.longs("test_receiver_out-ElementCount", SUM).resetToValue(4L),
-                Counter.longs("test_receiver_out-MeanByteCount", MEAN).resetMeanToValue(4, 10L),
-                Counter.longs("test-ReadOperation-start-msecs", SUM)
-                    .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                       "test-ReadOperation-start-msecs")).getAggregate()),
-                Counter.longs("test-ReadOperation-process-msecs", SUM)
-                    .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                       "test-ReadOperation-process-msecs")).getAggregate()),
-                Counter.longs("test-ReadOperation-finish-msecs", SUM)
-                    .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                       "test-ReadOperation-finish-msecs")).getAggregate())),
-            counterSet);
+    assertThat(receiver.outputElems, containsInAnyOrder((Object) "hi", "there", "", "bob"));
+
+    assertCounterKindAndContents(counterSet, "ReadOperation-ByteCount", SUM, 2L + 5 + 0 + 3);
+    assertCounterKindAndContents(counterSet, "test_receiver_out-ElementCount", SUM, 4L);
+    assertCounterMean(counterSet, "test_receiver_out-MeanByteCount", 4, 10L);
+    assertCounterKind(counterSet, "test-ReadOperation-start-msecs", SUM);
+    assertCounterKind(counterSet, "test-ReadOperation-process-msecs", SUM);
+    assertCounterKind(counterSet, "test-ReadOperation-finish-msecs", SUM);
   }
 
   @Test
@@ -104,7 +126,7 @@ public void testGetProgress() throws Exception {
       // Ensure that getProgress() doesn't block while the next() method is blocked.
       ApproximateProgress progress = readerProgressToCloudProgress(readOperation.getProgress());
       long observedIndex = progress.getPosition().getRecordIndex().longValue();
-      Assert.assertTrue("Actual: " + observedIndex, i == observedIndex || i == observedIndex + 1);
+      assertTrue("Actual: " + observedIndex, i == observedIndex || i == observedIndex + 1);
       iterator.offerNext(i);
     }
     thread.join();
@@ -122,7 +144,7 @@ public void testDynamicSplit() throws Exception {
     readOperation.setProgressUpdatePeriodMs(0);
 
     // An unstarted ReadOperation refuses split requests.
-    Assert.assertNull(
+    assertNull(
         readOperation.requestDynamicSplit(splitRequestAtIndex(7L)));
 
     Thread thread = runReadLoopInThread(readOperation);
@@ -131,8 +153,8 @@ public void testDynamicSplit() throws Exception {
     Reader.DynamicSplitResultWithPosition split =
         (Reader.DynamicSplitResultWithPosition) readOperation.requestDynamicSplit(
           splitRequestAtIndex(7L));
-    Assert.assertNotNull(split);
-    Assert.assertEquals(positionAtIndex(7L), toCloudPosition(split.getAcceptedPosition()));
+    assertNotNull(split);
+    assertEquals(positionAtIndex(7L), toCloudPosition(split.getAcceptedPosition()));
     receiver.unblockProcess();
     iterator.offerNext(1);
     receiver.unblockProcess();
@@ -144,11 +166,11 @@ public void testDynamicSplit() throws Exception {
     // but we're also testing that ReadOperation correctly relays the request to the iterator.
     split = (Reader.DynamicSplitResultWithPosition) readOperation.requestDynamicSplit(
         splitRequestAtIndex(5L));
-    Assert.assertNotNull(split);
-    Assert.assertEquals(positionAtIndex(5L), toCloudPosition(split.getAcceptedPosition()));
+    assertNotNull(split);
+    assertEquals(positionAtIndex(5L), toCloudPosition(split.getAcceptedPosition()));
     split = (Reader.DynamicSplitResultWithPosition) readOperation.requestDynamicSplit(
         splitRequestAtIndex(5L));
-    Assert.assertNull(split);
+    assertNull(split);
     receiver.unblockProcess();
 
     iterator.offerNext(3);
@@ -161,7 +183,7 @@ public void testDynamicSplit() throws Exception {
     thread.join();
 
     // Operation is now finished. Check that it refuses a split request.
-    Assert.assertNull(readOperation.requestDynamicSplit(splitRequestAtIndex(5L)));
+    assertNull(readOperation.requestDynamicSplit(splitRequestAtIndex(5L)));
   }
 
   private Thread runReadLoopInThread(final ReadOperation readOperation) {

From 1854bdd88e74b51e1fc5582396e91d6a23499fd6 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 24 Jun 2015 09:57:51 -0700
Subject: [PATCH 0668/1541] Update internal documentation

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96784439
---
 examples/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/README.md b/examples/README.md
index 59ecd0a17d750..6a8fc3e11ed16 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -107,7 +107,7 @@ you can execute the `WordCount` and other example pipelines using the
 
     mvn exec:java -pl examples \
     -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
-    -Dexec.args="--input=<INPUT FILE PATTERN> --output=<OUTPUT FILE>"
+    -Dexec.args="--inputFile=<INPUT FILE PATTERN> --output=<OUTPUT FILE>"
 
 You can use the `BlockingDataflowPipelineRunner` to execute the `WordCount` example on
 Google Cloud Dataflow Service using managed resources in the Google Cloud Platform.
@@ -137,7 +137,7 @@ Platform:
 
     java -cp examples/target/google-cloud-dataflow-java-examples-all-bundled-manual_build.jar \
     com.google.cloud.dataflow.examples.WordCount \
-    --input=<INPUT FILE PATTERN> --output=<OUTPUT FILE>
+    --inputFile=<INPUT FILE PATTERN> --output=<OUTPUT FILE>
 
     java -cp examples/target/google-cloud-dataflow-java-examples-all-bundled-manual_build.jar \
     com.google.cloud.dataflow.examples.WordCount \

From bcb267f4762f2e54687adc3db7aec6c23854ae28 Mon Sep 17 00:00:00 2001
From: ckuhn <ckuhn@google.com>
Date: Wed, 24 Jun 2015 10:24:42 -0700
Subject: [PATCH 0669/1541] Internal test changes.

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96787340
---
 .../dataflow/sdk/options/GcsOptions.java      |  9 ++++
 .../cloud/dataflow/sdk/util/Transport.java    | 47 ++++++++++++++-----
 2 files changed, 44 insertions(+), 12 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
index cedc6d8fa6261..dc571b5b7ff5e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
@@ -59,6 +59,15 @@ public interface GcsOptions extends
   ExecutorService getExecutorService();
   void setExecutorService(ExecutorService value);
 
+  /**
+   * GCS endpoint to use. If unspecified, uses the default endpoint.
+   */
+  @JsonIgnore
+  @Hidden
+  @Description("The URL for the GCS API.")
+  String getGcsEndpoint();
+  void setGcsEndpoint(String value);
+
   /**
    * Returns the default {@link ExecutorService} to use within the Dataflow SDK. The
    * {@link ExecutorService} is compatible with AppEngine.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
index edcd39eaee11b..5470ef7ff121b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
@@ -69,6 +69,27 @@ public static JsonFactory getJsonFactory() {
     return SingletonHelper.JSON_FACTORY;
   }
 
+  private static class ApiComponents {
+    public String rootUrl;
+    public String servicePath;
+
+    public ApiComponents(String root, String path) {
+      this.rootUrl = root;
+      this.servicePath = path;
+    }
+  }
+
+  private static ApiComponents apiComponentsFromUrl(String urlString) {
+    try {
+      URL url = new URL(urlString);
+      String rootUrl = url.getProtocol() + "://" + url.getHost() +
+          (url.getPort() > 0 ? ":" + url.getPort() : "");
+      return new ApiComponents(rootUrl, url.getPath());
+    } catch (MalformedURLException e) {
+      throw new RuntimeException("Invalid URL: " + urlString);
+    }
+  }
+
   /**
    * Returns a BigQuery client builder.
    * <p>
@@ -104,25 +125,20 @@ public static JsonFactory getJsonFactory() {
    * Returns a Google Cloud Dataflow client builder.
    */
   public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options) {
-    String rootUrl = options.getApiRootUrl();
     String servicePath = options.getDataflowEndpoint();
+    ApiComponents components;
     if (servicePath.contains("://")) {
-      try {
-        URL url = new URL(servicePath);
-        rootUrl = url.getProtocol() + "://" + url.getHost() +
-            (url.getPort() > 0 ? ":" + url.getPort() : "");
-        servicePath = url.getPath();
-      } catch (MalformedURLException e) {
-        throw new RuntimeException("Invalid URL: " + servicePath);
-      }
+      components = apiComponentsFromUrl(servicePath);
+    } else {
+      components = new ApiComponents(options.getApiRootUrl(), servicePath);
     }
 
     return new Dataflow.Builder(getTransport(),
         getJsonFactory(),
         new RetryHttpRequestInitializer(options.getGcpCredential()))
         .setApplicationName(options.getAppName())
-        .setRootUrl(rootUrl)
-        .setServicePath(servicePath)
+        .setRootUrl(components.rootUrl)
+        .setServicePath(components.servicePath)
         .setGoogleClientRequestInitializer(
             new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
   }
@@ -147,7 +163,8 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
    */
   public static Storage.Builder
       newStorageClient(GcsOptions options) {
-    return new Storage.Builder(getTransport(), getJsonFactory(),
+    String servicePath = options.getGcsEndpoint();
+    Storage.Builder storageBuilder = new Storage.Builder(getTransport(), getJsonFactory(),
         new RetryHttpRequestInitializer(
             // Do not log the code 404. Code up the stack will deal with 404's if needed, and
             // logging it by default clutters the output during file staging.
@@ -155,6 +172,12 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
         .setApplicationName(options.getAppName())
         .setGoogleClientRequestInitializer(
             new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
+    if (servicePath != null) {
+      ApiComponents components = apiComponentsFromUrl(servicePath);
+      storageBuilder.setRootUrl(components.rootUrl);
+      storageBuilder.setServicePath(components.servicePath);
+    }
+    return storageBuilder;
   }
 
   /**

From 4272d814c4d6b62f937d8b8e96bc573788517ec0 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 24 Jun 2015 10:31:43 -0700
Subject: [PATCH 0670/1541] Explicitly require Project in Dataflow Options

Place the Validation.Required annotation on the getProject method
to allow the PipelineOptionsValidator to verify that a project is
provided in DataflowPipelineOptions, while not requiring it for all
PipelineOptions interfaces that extend the GcpOptions class.

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96788059
---
 .../sdk/options/DataflowPipelineOptions.java        |  9 +++++++++
 .../sdk/runners/DataflowPipelineRunner.java         |  3 ---
 .../sdk/runners/DataflowPipelineRunnerTest.java     | 13 +++++++++++++
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
index 7792c91688b9e..ef799361884fd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
@@ -33,6 +33,15 @@ public interface DataflowPipelineOptions extends
     DataflowPipelineWorkerPoolOptions, BigQueryOptions,
     GcsOptions, StreamingOptions, CloudDebuggerOptions, DataflowWorkerLoggingOptions {
 
+
+  @Description("Project id. Required when running a Dataflow in the cloud. "
+      + "See https://cloud.google.com/storage/docs/projects for further details.")
+  @Override
+  @Validation.Required
+  String getProject();
+  @Override
+  void setProject(String value);
+
   /**
    * GCS path for temporary files, e.g. gs://bucket/object
    * <p>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 3666c406753d2..a4a84900c0245 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -128,9 +128,6 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
         PipelineOptionsValidator.validate(DataflowPipelineOptions.class, options);
     ArrayList<String> missing = new ArrayList<>();
 
-    if (dataflowOptions.getProject() == null) {
-      missing.add("project");
-    }
     if (dataflowOptions.getAppName() == null) {
       missing.add("appName");
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index fb853ad29b6b4..b583980a2ad0b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -446,6 +446,19 @@ public void testInvalidStagingLocation() throws IOException {
     }
   }
 
+  @Test
+  public void testNoProjectFails() {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+
+    options.setRunner(DataflowPipelineRunner.class);
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Project id");
+    thrown.expectMessage("when running a Dataflow in the cloud");
+
+    DataflowPipelineRunner.fromOptions(options);
+  }
+
   @Test
   public void testInvalidJobName() throws IOException {
     List<String> invalidNames = Arrays.asList(

From ed24a8ea8010aa2849a380b38a255eb5940bcb1f Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Wed, 24 Jun 2015 11:24:17 -0700
Subject: [PATCH 0671/1541] Fixes the flaky test
 XmlSourceTest.testSplitAtFraction.

We assumed that WHEN
reader = "a reader with all elements read"
AND
fraction = "a number extremely close to 1"

THEN
reader.splitAtfraction(fraction) will always be successful.

But this is not the case if
"position of the reader after reading all elements" == "end position of the source - 1", i.e. when the last element of the bundle start at the last byte that belongs to the bundle.

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96793745
---
 .../cloud/dataflow/sdk/io/XmlSourceTest.java      | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
index 355c94fd759d2..604c78e54f5b5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
@@ -720,8 +720,21 @@ public void testSplitAtFraction() throws Exception {
       assertSplitAtFractionFails(splitSource, 0, 0.0, options);
       assertSplitAtFractionFails(splitSource, 20, 0.3, options);
       assertSplitAtFractionFails(splitSource, items, 1.0, options);
+
+      // After reading 100 elements we will be approximately at position
+      // 0.99 * (endOffset - startOffset) hence trying to split at fraction 0.9 will be
+      // unsuccessful.
       assertSplitAtFractionFails(splitSource, items, 0.9, options);
-      assertSplitAtFractionSucceedsAndConsistent(splitSource, items, 0.999, options);
+
+      // Following passes since we can always find a fraction that is extremely close to 1 such that
+      // the position suggested by the fraction will be larger than the position the reader is at
+      // after reading "items - 1" elements.
+      // This also passes for "numItemsToReadBeforeSplit = items" if the position at suggested
+      // fraction is larger than the position the reader is at after reading all "items" elements
+      // (i.e., the start position of the last element). This is true for most cases but will not
+      // be true if reader position is only one less than the end position. (i.e., the last element
+      // of the bundle start at the last byte that belongs to the bundle).
+      assertSplitAtFractionSucceedsAndConsistent(splitSource, items - 1, 0.999, options);
     }
   }
 

From d6aee92dcc073a97268ce240e1d1227653fe8837 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 24 Jun 2015 19:36:09 -0700
Subject: [PATCH 0672/1541] For batch execution, replace fixed side input Map
 with a Cache

This change adds the SideInputReader abstraction along with many
relevant extensions and implementations:

 - DirectSideInputReader, which implements standard PCollectionView
   logic atop raw side input values.
 - DataflowSideInputReader, which implements reading side inputs
   from the dataflow service.
 - CachingSideInputReader, which wraps any other side input reader
   and caches the results.
 - NullSideInputReader, which simply notes which PCollectionViews
   ought to be available.
 - StreamingModeSideInputReader, which delegates to StateFetcher.

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96835735
---
 .../options/DataflowWorkerHarnessOptions.java |  10 +
 .../runners/worker/AssignWindowsParDoFn.java  |  11 +-
 .../worker/CachingSideInputReader.java        |  89 ++++++
 .../sdk/runners/worker/CombineValuesFn.java   |  11 +-
 .../worker/DataflowExecutionContext.java      |  66 +++++
 .../worker/DataflowSideInputReader.java       | 144 +++++++++
 .../sdk/runners/worker/DataflowWorker.java    |  65 ++++-
 .../worker/GroupAlsoByWindowsParDoFn.java     |  11 +-
 .../worker/MapTaskExecutorFactory.java        |  39 ++-
 .../sdk/runners/worker/NormalParDoFn.java     |  53 ++--
 .../sdk/runners/worker/ParDoFnBase.java       |  14 +-
 .../sdk/runners/worker/ParDoFnFactory.java    |   5 +-
 .../ReifyTimestampAndWindowsParDoFn.java      |   8 +-
 .../sdk/runners/worker/SideInputUtils.java    |  71 ++++-
 .../sdk/runners/worker/SizedWeigher.java      |  46 +++
 .../dataflow/sdk/transforms/DoFnTester.java   |   7 +-
 .../cloud/dataflow/sdk/transforms/ParDo.java  |  39 ++-
 .../sdk/transforms/windowing/Window.java      |   6 +-
 .../sdk/util/BatchModeExecutionContext.java   |  54 +---
 .../sdk/util/DirectModeExecutionContext.java  |  11 +-
 .../sdk/util/DirectSideInputReader.java       |  73 +++++
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |  50 ++--
 .../dataflow/sdk/util/ExecutionContext.java   |   9 +-
 .../sdk/util/NullSideInputReader.java         |  61 ++++
 .../sdk/util/PCollectionViewWindow.java       |  67 +++++
 .../dataflow/sdk/util/PCollectionViews.java   |  38 ++-
 .../cloud/dataflow/sdk/util/PTuple.java       |   7 +
 .../dataflow/sdk/util/SideInputReader.java    |  47 +++
 .../google/cloud/dataflow/sdk/util/Sized.java |  45 +++
 .../sdk/util/SizedSideInputReader.java        |  47 +++
 .../util/StreamingModeExecutionContext.java   |  71 ++++-
 .../util/StreamingSideInputDoFnRunner.java    |   4 +-
 .../runners/worker/AvroReaderFactoryTest.java |   4 +-
 .../worker/BigQueryReaderFactoryTest.java     |   4 +-
 .../worker/CachingSideInputReaderTest.java    | 173 +++++++++++
 .../runners/worker/CombineValuesFnTest.java   |   3 +-
 .../worker/DataflowSideInputReaderTest.java   | 152 ++++++++++
 .../worker/MapTaskExecutorFactoryTest.java    |  17 +-
 .../sdk/runners/worker/NormalParDoFnTest.java |  19 +-
 .../runners/worker/ParDoFnFactoryTest.java    |   5 +-
 .../runners/worker/SideInputUtilsTest.java    |  17 +-
 .../sdk/testing/PCollectionViewTesting.java   | 275 ++++++++++++++++++
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  |   2 +-
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |   2 +-
 .../StreamingModeExecutionContextTest.java    |  89 ++++++
 .../StreamingSideInputDoFnRunnerTest.java     |  31 +-
 46 files changed, 1805 insertions(+), 267 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SizedWeigher.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectSideInputReader.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NullSideInputReader.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViewWindow.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SideInputReader.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Sized.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SizedSideInputReader.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java
index cac16dbf548f2..6e9fad030ec92 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java
@@ -37,4 +37,14 @@ public interface DataflowWorkerHarnessOptions extends DataflowPipelineOptions {
   @Description("The identity of the Dataflow job.")
   String getJobId();
   void setJobId(String value);
+
+  /**
+   * The size of the worker's in-memory cache, in megabytes.
+   *
+   * <p> Currently, this cache is used for storing read values of side inputs.
+   */
+  @Description("The size of the worker's in-memory cache, in megabytes.")
+  @Default.Integer(100)
+  Integer getWorkerCacheMb();
+  void setWorkerCacheMb(Integer value);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
index bee35409df3d7..eed860fc6dbf0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
@@ -25,8 +25,7 @@
 import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PTuple;
+import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
@@ -50,7 +49,7 @@ static AssignWindowsParDoFn of(
       PipelineOptions options,
       AssignWindowsDoFn<?, ?> fn,
       String stepName,
-      ExecutionContext executionContext,
+      DataflowExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
     return new AssignWindowsParDoFn(options, fn, stepName, executionContext, addCounterMutator);
@@ -69,7 +68,7 @@ public ParDoFn create(
         @Nullable List<SideInputInfo> sideInputInfos,
         @Nullable List<MultiOutputInfo> multiOutputInfos,
         int numOutputs,
-        ExecutionContext executionContext,
+        DataflowExecutionContext executionContext,
         CounterSet.AddCounterMutator addCounterMutator,
         StateSampler stateSampler /* ignored */)
             throws Exception {
@@ -113,11 +112,11 @@ private AssignWindowsParDoFn(
       PipelineOptions options,
       AssignWindowsDoFn<?, ?> fn,
       String stepName,
-      ExecutionContext executionContext,
+      DataflowExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator) {
     super(
         options,
-        PTuple.empty(),
+        NullSideInputReader.empty(),
         Arrays.asList("output"),
         stepName,
         executionContext,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java
new file mode 100644
index 0000000000000..4a6f6c8914c9d
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java
@@ -0,0 +1,89 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.PCollectionViewWindow;
+import com.google.cloud.dataflow.sdk.util.SideInputReader;
+import com.google.cloud.dataflow.sdk.util.Sized;
+import com.google.cloud.dataflow.sdk.util.SizedSideInputReader;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.common.base.Throwables;
+import com.google.common.cache.Cache;
+import com.google.common.util.concurrent.UncheckedExecutionException;
+
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+
+/**
+ * A {@link SideInputReader} that maintains a cache of side inputs per window.
+ *
+ * <p>For internal use only.
+ *
+ * <p>Package-private here so that the dependency on Guava does not leak into the public API
+ * surface. Note that Guava is "shaded" so the {@code Cache} class here is not actually compatible
+ * with a {@code Cache} created by anything other than the SDK.
+ */
+final class CachingSideInputReader
+    extends SizedSideInputReader.Defaults
+    implements SizedSideInputReader {
+  private SizedSideInputReader subReader;
+  private Cache<PCollectionViewWindow<?>, Sized<Object>> cache;
+
+  private CachingSideInputReader(
+      SizedSideInputReader subReader, Cache<PCollectionViewWindow<?>, Sized<Object>> cache) {
+    this.subReader = subReader;
+    this.cache = cache;
+  }
+
+  public static CachingSideInputReader of(
+      SizedSideInputReader subReader, Cache<PCollectionViewWindow<?>, Sized<Object>> cache) {
+    return new CachingSideInputReader(subReader, cache);
+  }
+
+  @Override
+  public <T> boolean contains(PCollectionView<T> view) {
+    return subReader.contains(view);
+  }
+
+  @Override
+  public boolean isEmpty() {
+    return subReader.isEmpty();
+  }
+
+  @Override
+  public <T> Sized<T> getSized(
+      final PCollectionView<T> view, final BoundedWindow window) {
+    PCollectionViewWindow<T> cacheKey = PCollectionViewWindow.of(view, window);
+
+      try {
+        @SuppressWarnings("unchecked") // safely uncasting the thing from the callback
+        Sized<T> sideInputContents = (Sized<T>) cache.get(cacheKey,
+            new Callable<Sized<Object>>() {
+              @Override
+              public Sized<Object> call() {
+                @SuppressWarnings("unchecked") // safe covariant cast
+                Sized<Object> value = (Sized<Object>) subReader.getSized(view, window);
+                return value;
+              }
+            });
+        return sideInputContents;
+      } catch (ExecutionException | UncheckedExecutionException exc) {
+        throw Throwables.propagate(exc.getCause());
+      }
+    }
+  }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
index 78e80cb39691d..91420f64ea497 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
@@ -26,8 +26,7 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PTuple;
+import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -66,7 +65,7 @@ static CombineValuesFn of(
       Combine.KeyedCombineFn<?, ?, ?, ?> combineFn,
       String phase,
       String stepName,
-      ExecutionContext executionContext,
+      DataflowExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
     return new CombineValuesFn(
@@ -86,7 +85,7 @@ public ParDoFn create(
         @Nullable List<SideInputInfo> sideInputInfos,
         @Nullable List<MultiOutputInfo> multiOutputInfos,
         int numOutputs,
-        ExecutionContext executionContext,
+        DataflowExecutionContext executionContext,
         CounterSet.AddCounterMutator addCounterMutator,
         StateSampler stateSampler)
             throws Exception {
@@ -151,11 +150,11 @@ private CombineValuesFn(
       Combine.KeyedCombineFn<?, ?, ?, ?> combineFn,
       String phase,
       String stepName,
-      ExecutionContext executionContext,
+      DataflowExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator) {
     super(
         options,
-        PTuple.empty(),
+        NullSideInputReader.empty(),
         Arrays.asList("output"),
         stepName,
         executionContext,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java
new file mode 100644
index 0000000000000..2fc7df00e4fb6
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.api.services.dataflow.model.SideInputInfo;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.SideInputReader;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+
+/**
+ * Extensions to {@link com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext} specific to
+ * the Dataflow worker.
+ */
+public abstract class DataflowExecutionContext extends BatchModeExecutionContext {
+  /**
+   * Returns a {@link SideInputReader} for all the side inputs described in the given
+   * {@link SideInputInfo} descriptors. By default, throws {@link UnsupportedOperationException}.
+   * Individual workers should override this behavior.
+   */
+  public abstract SideInputReader getSideInputReader(
+      Iterable<? extends SideInputInfo> sideInputInfos) throws Exception;
+
+  /**
+   * Returns a {@link SideInputReader} for all the provided views, where the execution context
+   * itself knows how to read data for the view. By default, throws
+   * {@code UnsupportedOperationException}. Particular workers should override this behavior.
+   */
+  public abstract SideInputReader getSideInputReaderForViews(
+      Iterable<? extends PCollectionView<?>> views) throws Exception;
+
+  /**
+   * Returns a {@link DataflowExecutionContext} that does not support side inputs at all, for
+   * situations when side inputs are not relevant, such as testing.
+   */
+  public static DataflowExecutionContext withoutSideInputs() {
+    return new DataflowExecutionContext() {
+      @Override
+      public SideInputReader getSideInputReader(
+          Iterable<? extends SideInputInfo> sideInputInfos) throws Exception {
+        throw new UnsupportedOperationException(
+            "DataflowExecutionContext.withoutSideInputs().getSideInputReader(...)");
+      }
+
+      @Override
+      public SideInputReader getSideInputReaderForViews(
+          Iterable<? extends PCollectionView<?>> views) throws Exception {
+        throw new UnsupportedOperationException(
+            "DataflowExecutionContext.withoutSideInputs().getSideInputReaderForViews(...)");
+      }
+    };
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
new file mode 100644
index 0000000000000..68348a9cf51b6
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.api.services.dataflow.model.SideInputInfo;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.DirectSideInputReader;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PTuple;
+import com.google.cloud.dataflow.sdk.util.SideInputReader;
+import com.google.cloud.dataflow.sdk.util.Sized;
+import com.google.cloud.dataflow.sdk.util.SizedSideInputReader;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+
+import java.util.Map;
+import java.util.Observable;
+import java.util.Observer;
+
+/**
+ * A simple side input reader that re-reads a side input from its iterable each time it is
+ * requested.
+ *
+ * <p>Sizes are accurate only for {@link PCollectionView} implementations that read the same
+ * amount of data for each access.
+ */
+public class DataflowSideInputReader
+    extends SizedSideInputReader.Defaults
+    implements SizedSideInputReader {
+
+  /** An observer for each side input to count its size as it is being read. */
+  private final Map<TupleTag<Object>, ByteSizeObserver> observers;
+
+  /** An byte count saved as overhead per side input, not cleared when the observer is reset. */
+  private final Map<TupleTag<Object>, Long> overheads;
+
+  /** The underlying reader, which does not keep track of sizes. */
+  private final SideInputReader subReader;
+
+  private DataflowSideInputReader(
+      Iterable<? extends SideInputInfo> sideInputInfos,
+      PipelineOptions options,
+      ExecutionContext executionContext) throws Exception {
+    // Initializing the values may or may not actually read through the
+    // source. The full size is the amount read here plus the amount
+    // read when view.fromIterableInternal() is called.
+    this.observers = Maps.newHashMap();
+    this.overheads = Maps.newHashMap();
+
+    PTuple sideInputValues = PTuple.empty();
+    for (SideInputInfo sideInputInfo : sideInputInfos) {
+      TupleTag<Object> tag = new TupleTag<>(sideInputInfo.getTag());
+      ByteSizeObserver observer = new ByteSizeObserver();
+      Object sideInputValue = SideInputUtils.readSideInput(
+          options, sideInputInfo, observer, executionContext);
+      overheads.put(tag, observer.getBytes());
+      observer.reset();
+      observers.put(tag, observer);
+      sideInputValues = sideInputValues.and(tag, sideInputValue);
+    }
+    this.subReader = DirectSideInputReader.of(sideInputValues);
+  }
+
+  /**
+   * Creates a new {@link SideInputReader} that will provide side inputs
+   * according to the provided {@link SideInputInfo} descriptors.
+   */
+  public static DataflowSideInputReader of(
+      Iterable<? extends SideInputInfo> sideInputInfos,
+      PipelineOptions options,
+      ExecutionContext context)
+      throws Exception {
+    return new DataflowSideInputReader(sideInputInfos, options, context);
+  }
+
+  @Override
+  public <T> boolean contains(PCollectionView<T> view) {
+    return subReader.contains(view);
+  }
+
+  @Override
+  public boolean isEmpty() {
+    return subReader.isEmpty();
+  }
+
+  /**
+   * Gets a side input for a view and window by reading data according to the corresponding
+   * {@link SideInputInfo}, passing the result through the view's
+   * {@link PCollectionView#fromIterableInternal} conversion method, and extracting
+   * the value for the appropriate window.
+   */
+  @Override
+  public <T> Sized<T> getSized(PCollectionView<T> view, final BoundedWindow window) {
+    // It is hard to estimate the size with any accuracy here, and there will be improvements
+    // possible, but it is only required to estimate in a way so that a cache will not OOM.
+    T value = subReader.get(view, window);
+    @SuppressWarnings({"rawtypes", "unchecked"}) // irrelevant phantom type
+    TupleTag<Object> tag = (TupleTag) view.getTagInternal();
+    ByteSizeObserver observer = observers.get(tag);
+    long overhead = overheads.get(tag);
+    long bytesRead = observer.getBytes();
+    observer.reset();
+    return Sized.of(value, overhead + bytesRead);
+  }
+
+  /**
+   * An observer for counting the bytes read and then resetting.
+   */
+  private static class ByteSizeObserver implements Observer {
+    /** a byte count beyond overhead, cleared when the observer is reset. */
+    private long byteCount = 0;
+
+    @Override
+    public void update(Observable reader, Object obj) {
+      Preconditions.checkArgument(obj instanceof Long, "unexpected parameter object");
+      byteCount = byteCount + (long) obj;
+    }
+
+    public void reset() {
+      byteCount = 0;
+    }
+
+    public long getBytes() {
+      return byteCount;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 86bd08110cc7c..d628376314057 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -22,17 +22,20 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
 
 import com.google.api.services.dataflow.model.MetricUpdate;
+import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.api.services.dataflow.model.Status;
 import com.google.api.services.dataflow.model.WorkItem;
 import com.google.api.services.dataflow.model.WorkItemServiceState;
 import com.google.api.services.dataflow.model.WorkItemStatus;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingFormatter;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudCounterUtils;
 import com.google.cloud.dataflow.sdk.util.CloudMetricUtils;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PCollectionViewWindow;
+import com.google.cloud.dataflow.sdk.util.SideInputReader;
+import com.google.cloud.dataflow.sdk.util.Sized;
 import com.google.cloud.dataflow.sdk.util.UserCodeException;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -40,7 +43,9 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
 import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
-
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -75,9 +80,27 @@ public class DataflowWorker {
    */
   private final DataflowWorkerHarnessOptions options;
 
+  /**
+   * A side input cache shared between all execution contexts.
+   */
+  private final Cache<PCollectionViewWindow<?>, Sized<Object>> sideInputCache;
+
+  /**
+   * A weight in "bytes" for the overhead of a {@link Sized} wrapper in the cache. It is just an
+   * approximation so it is OK for it to be fairly arbitrary as long as it is nonzero.
+   */
+  private static final int OVERHEAD_WEIGHT = 8;
+
+  private static final int MEGABYTES = 1024 * 1024;
+
   public DataflowWorker(WorkUnitClient workUnitClient, DataflowWorkerHarnessOptions options) {
     this.workUnitClient = workUnitClient;
     this.options = options;
+    this.sideInputCache = CacheBuilder.newBuilder()
+        .maximumWeight(options.getWorkerCacheMb() * MEGABYTES) // weights are in bytes
+        .weigher(new SizedWeigher<PCollectionViewWindow<?>, Object>(OVERHEAD_WEIGHT))
+        .softValues()
+        .build();
   }
 
   /**
@@ -109,7 +132,8 @@ private boolean doWork(WorkItem workItem) throws IOException {
       // Populate PipelineOptions with data from work unit.
       options.setProject(workItem.getProjectId());
 
-      ExecutionContext executionContext = new BatchModeExecutionContext();
+      DataflowExecutionContext executionContext =
+          new DataflowWorkerExecutionContext(sideInputCache, options);
 
       if (workItem.getMapTask() != null) {
         worker = MapTaskExecutorFactory.create(options, workItem.getMapTask(), executionContext);
@@ -312,4 +336,37 @@ public abstract static class WorkUnitClient {
     public abstract WorkItemServiceState reportWorkItemStatus(WorkItemStatus workItemStatus)
         throws IOException;
   }
+
+  /**
+   * A {@link DataflowExecutionContext} that provides a caching side input reader using
+   * the worker's shared cache.
+   */
+  private static class DataflowWorkerExecutionContext extends DataflowExecutionContext {
+
+    private final Cache<PCollectionViewWindow<?>, Sized<Object>> cache;
+    private final PipelineOptions options;
+
+    public DataflowWorkerExecutionContext(
+        Cache<PCollectionViewWindow<?>, Sized<Object>> cache, PipelineOptions options) {
+      this.cache = cache;
+      this.options = options;
+    }
+
+    @Override
+    public SideInputReader getSideInputReader(Iterable<? extends SideInputInfo> sideInputInfos)
+      throws Exception {
+      return CachingSideInputReader.of(
+          DataflowSideInputReader.of(sideInputInfos, options, this),
+          cache);
+    }
+
+    @Override
+    public SideInputReader getSideInputReaderForViews(
+        Iterable<? extends PCollectionView<?>> sideInputViews) {
+      throw new UnsupportedOperationException(
+        "Cannot call getSideInputReaderForViews for batch DataflowWorker: "
+        + "the MapTask specification should have had SideInputInfo descriptors "
+        + "for each side input, and a SideInputReader provided via getSideInputReader");
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index 0acdefc0b846a..3e25b85255019 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -31,9 +31,8 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
-import com.google.cloud.dataflow.sdk.util.PTuple;
+import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.Serializer;
@@ -63,7 +62,7 @@ static GroupAlsoByWindowsParDoFn of(
       PipelineOptions options,
       DoFn<?, ?> groupAlsoByWindowsDoFn,
       String stepName,
-      ExecutionContext executionContext,
+      DataflowExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
     return new GroupAlsoByWindowsParDoFn(
@@ -83,7 +82,7 @@ public ParDoFn create(
         @Nullable List<SideInputInfo> sideInputInfos,
         @Nullable List<MultiOutputInfo> multiOutputInfos,
         int numOutputs,
-        ExecutionContext executionContext,
+        DataflowExecutionContext executionContext,
         CounterSet.AddCounterMutator addCounterMutator,
         StateSampler stateSampler)
             throws Exception {
@@ -238,11 +237,11 @@ private GroupAlsoByWindowsParDoFn(
       PipelineOptions options,
       DoFn<?, ?> groupAlsoByWindowsDoFn,
       String stepName,
-      ExecutionContext executionContext,
+      DataflowExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator) {
     super(
         options,
-        PTuple.empty(),
+        NullSideInputReader.empty(),
         Arrays.asList("output"),
         stepName,
         executionContext,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index fd99e085d93f9..e2f343aa8fc15 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -74,7 +74,7 @@ public class MapTaskExecutorFactory {
    * Creates a new MapTaskExecutor from the given MapTask definition.
    */
   public static MapTaskExecutor create(
-      PipelineOptions options, MapTask mapTask, ExecutionContext context) throws Exception {
+      PipelineOptions options, MapTask mapTask, DataflowExecutionContext context) throws Exception {
     List<Operation> operations = new ArrayList<>();
     CounterSet counters = new CounterSet();
     String counterPrefix = mapTask.getStageName() + "-";
@@ -94,9 +94,15 @@ public static MapTaskExecutor create(
   /**
    * Creates an Operation from the given ParallelInstruction definition.
    */
-  static Operation createOperation(PipelineOptions options, ParallelInstruction instruction,
-      ExecutionContext executionContext, List<Operation> priorOperations, String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) throws Exception {
+  static Operation createOperation(
+      PipelineOptions options,
+      ParallelInstruction instruction,
+      DataflowExecutionContext executionContext,
+      List<Operation> priorOperations,
+      String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler)
+      throws Exception {
     if (instruction.getRead() != null) {
       return createReadOperation(options, instruction, executionContext, priorOperations,
           counterPrefix, addCounterMutator, stateSampler);
@@ -117,9 +123,15 @@ static Operation createOperation(PipelineOptions options, ParallelInstruction in
     }
   }
 
-  static ReadOperation createReadOperation(PipelineOptions options, ParallelInstruction instruction,
-      ExecutionContext executionContext, List<Operation> priorOperations, String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) throws Exception {
+  static ReadOperation createReadOperation(
+      PipelineOptions options,
+      ParallelInstruction instruction,
+      DataflowExecutionContext executionContext,
+      List<Operation> priorOperations,
+      String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler)
+      throws Exception {
     ReadInstruction read = instruction.getRead();
 
     Reader<?> reader = ReaderFactory.create(options, read.getSource(), executionContext);
@@ -152,10 +164,15 @@ static WriteOperation createWriteOperation(PipelineOptions options,
 
   private static ParDoFnFactory parDoFnFactory = new ParDoFnFactory.DefaultFactory();
 
-  static ParDoOperation createParDoOperation(PipelineOptions options,
-      ParallelInstruction instruction, ExecutionContext executionContext,
-      List<Operation> priorOperations, String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) throws Exception {
+  static ParDoOperation createParDoOperation(
+      PipelineOptions options,
+      ParallelInstruction instruction,
+      DataflowExecutionContext executionContext,
+      List<Operation> priorOperations,
+      String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler)
+      throws Exception {
     ParDoInstruction parDo = instruction.getParDo();
 
     ParDoFn fn = parDoFnFactory.create(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
index 617d2689e631b..1b77d50d322d3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
@@ -25,14 +25,15 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PTuple;
+import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.util.SideInputReader;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.collect.Iterables;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -49,16 +50,16 @@ class NormalParDoFn extends ParDoFnBase {
    */
   static NormalParDoFn of(
       PipelineOptions options,
-      DoFnInfo doFnInfo,
-      PTuple sideInputValues,
+      DoFnInfo<?, ?> doFnInfo,
+      SideInputReader sideInputReader,
       List<String> outputTags,
       String stepName,
-      ExecutionContext executionContext,
+      DataflowExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator) {
     return new NormalParDoFn(
         options,
         doFnInfo,
-        sideInputValues,
+        sideInputReader,
         outputTags,
         stepName,
         executionContext,
@@ -78,11 +79,10 @@ public ParDoFn create(
         @Nullable List<SideInputInfo> sideInputInfos,
         @Nullable List<MultiOutputInfo> multiOutputInfos,
         int numOutputs,
-        ExecutionContext executionContext,
+        DataflowExecutionContext executionContext,
         CounterSet.AddCounterMutator addCounterMutator,
         StateSampler stateSampler /* ignored */)
             throws Exception {
-
       Object deserializedFnInfo =
           SerializableUtils.deserializeFromByteArray(
               getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
@@ -93,21 +93,25 @@ public ParDoFn create(
       }
       DoFnInfo<?, ?> doFnInfo = (DoFnInfo<?, ?>) deserializedFnInfo;
 
-      // If the side input data has already been computed, it will be in sideInputInfo.  Otherwise,
-      // we need to look it up dynamically from the Views.
-      PTuple sideInputValues = PTuple.empty();
+      // If side input source metadata is provided by the service in sideInputInfos, we request
+      // a SideInputReader from the executionContext using that info.
+      //
+      // If no side input source metadata is provided but the DoFn expects side inputs, as a
+      // fallback, we request a SideInputReader based only on the expected views.
+      //
+      // These cases are not disjoint: Whenever a DoFn takes side inputs,
+      // doFnInfo.getSideInputViews() should be non-empty.
+      //
+      // A note on the behavior of the Dataflow service: Today, the first case corresponds to
+      // batch mode, while the fallback corresponds to streaming mode.
+      SideInputReader sideInputReader;
       final Iterable<PCollectionView<?>> sideInputViews = doFnInfo.getSideInputViews();
       if (sideInputInfos != null && !sideInputInfos.isEmpty()) {
-        for (SideInputInfo sideInputInfo : sideInputInfos) {
-          Object sideInputValue = SideInputUtils.readSideInput(
-              options, sideInputInfo, executionContext);
-          TupleTag<Object> tag = new TupleTag<>(sideInputInfo.getTag());
-          sideInputValues = sideInputValues.and(tag, sideInputValue);
-        }
-      } else if (sideInputViews != null) {
-        for (PCollectionView<?> view : sideInputViews) {
-          sideInputValues = sideInputValues.and(view.getTagInternal(), null);
-        }
+        sideInputReader = executionContext.getSideInputReader(sideInputInfos);
+      } else if (sideInputViews != null && Iterables.size(sideInputViews) > 0) {
+        sideInputReader = executionContext.getSideInputReaderForViews(sideInputViews);
+      } else {
+        sideInputReader = NullSideInputReader.empty();
       }
 
       List<String> outputTags = new ArrayList<>();
@@ -129,7 +133,7 @@ public ParDoFn create(
       return NormalParDoFn.of(
           options,
           doFnInfo,
-          sideInputValues,
+          sideInputReader,
           outputTags,
           stepName,
           executionContext,
@@ -143,12 +147,12 @@ public ParDoFn create(
   private NormalParDoFn(
       PipelineOptions options,
       DoFnInfo<?, ?> doFnInfo,
-      PTuple sideInputValues,
+      SideInputReader sideInputReader,
       List<String> outputTags,
       String stepName,
       ExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator) {
-    super(options, sideInputValues, outputTags, stepName, executionContext, addCounterMutator);
+    super(options, sideInputReader, outputTags, stepName, executionContext, addCounterMutator);
     // The userDoFn is serialized because a fresh copy is provided each time it is accessed.
     this.serializedDoFn = SerializableUtils.serializeToByteArray(doFnInfo.getDoFn());
     this.doFnInfo = doFnInfo;
@@ -157,6 +161,7 @@ private NormalParDoFn(
   /**
    * Produces a fresh {@link DoFnInfo} containing the user's {@link DoFn}.
    */
+  @Override
   protected DoFnInfo getDoFnInfo() {
     // This class write the serialized data in its own constructor, as a way of doing
     // a deep copy.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
index 200218542772b..9a1a00cbeca2e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
@@ -25,7 +25,7 @@
 import com.google.cloud.dataflow.sdk.util.DoFnRunner.OutputManager;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
-import com.google.cloud.dataflow.sdk.util.PTuple;
+import com.google.cloud.dataflow.sdk.util.SideInputReader;
 import com.google.cloud.dataflow.sdk.util.StreamingSideInputDoFnRunner;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -51,7 +51,7 @@
 public abstract class ParDoFnBase extends ParDoFn {
 
   private final PipelineOptions options;
-  private final PTuple sideInputValues;
+  private final SideInputReader sideInputReader;
   private final TupleTag<Object> mainOutputTag;
   private final List<TupleTag<?>> sideOutputTags;
   private final String stepName;
@@ -70,7 +70,7 @@ public ExecutionContext getExecutionContext() {
    */
   protected ParDoFnBase(
       PipelineOptions options,
-      PTuple sideInputValues,
+      SideInputReader sideInputReader,
       List<String> outputTags,
       String stepName,
       ExecutionContext executionContext,
@@ -78,7 +78,7 @@ protected ParDoFnBase(
     this.options = options;
 
     // We vend a freshly deserialized version for each run
-    this.sideInputValues = sideInputValues;
+    this.sideInputReader = sideInputReader;
     Preconditions.checkArgument(
       outputTags.size() > 0,
       "expected at least one output");
@@ -156,11 +156,11 @@ public void output(Receiver receiver, WindowedValue<?> output) {
       }
     };
 
-    if (options.as(StreamingOptions.class).isStreaming() && !sideInputValues.getAll().isEmpty()) {
+    if (options.as(StreamingOptions.class).isStreaming() && !sideInputReader.isEmpty()) {
       fnRunner = new StreamingSideInputDoFnRunner<Object, Object, Receiver, BoundedWindow>(
           options,
           doFnInfo,
-          sideInputValues,
+          sideInputReader,
           outputManager,
           mainOutputTag,
           sideOutputTags,
@@ -170,7 +170,7 @@ public void output(Receiver receiver, WindowedValue<?> output) {
       fnRunner = DoFnRunner.create(
           options,
           doFnInfo.getDoFn(),
-          sideInputValues,
+          sideInputReader,
           outputManager,
           mainOutputTag,
           sideOutputTags,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
index 28f2619ce048a..6a7489f6ad3fc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
@@ -20,7 +20,6 @@
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
@@ -45,7 +44,7 @@ public ParDoFn create(
       List<SideInputInfo> sideInputInfos,
       List<MultiOutputInfo> multiOutputInfos,
       int numOutputs,
-      ExecutionContext executionContext,
+      DataflowExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler)
       throws Exception;
@@ -76,7 +75,7 @@ public ParDoFn create(
         List<SideInputInfo> sideInputInfos,
         List<MultiOutputInfo> multiOutputInfos,
         int numOutputs,
-        ExecutionContext executionContext,
+        DataflowExecutionContext executionContext,
         CounterSet.AddCounterMutator addCounterMutator,
         StateSampler stateSampler)
             throws Exception {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
index dc585c095d35e..005a41494131c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
@@ -22,7 +22,7 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PTuple;
+import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
 import com.google.cloud.dataflow.sdk.util.ReifyTimestampAndWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
@@ -43,7 +43,7 @@ static ReifyTimestampAndWindowsParDoFn of(
       PipelineOptions options,
       ReifyTimestampAndWindowsDoFn<?, ?> fn,
       String stepName,
-      ExecutionContext executionContext,
+      DataflowExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
 
@@ -64,7 +64,7 @@ public ParDoFn create(
         @Nullable List<SideInputInfo> sideInputInfos,
         @Nullable List<MultiOutputInfo> multiOutputInfos,
         int numOutputs,
-        ExecutionContext executionContext,
+        DataflowExecutionContext executionContext,
         CounterSet.AddCounterMutator addCounterMutator,
         StateSampler stateSampler /* ignored */)
             throws Exception {
@@ -96,7 +96,7 @@ private ReifyTimestampAndWindowsParDoFn(
       AddCounterMutator addCounterMutator) {
 
     super(options,
-          PTuple.empty(),
+          NullSideInputReader.empty(),
           Arrays.asList("output"),
           stepName,
           executionContext,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
index 3e234663ec1a0..6e673fd161095 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
@@ -21,15 +21,18 @@
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
+import java.util.Observer;
 
 /**
  * Utilities for working with side inputs.
@@ -41,33 +44,61 @@ public class SideInputUtils {
   /**
    * Reads the given side input, producing the contents associated
    * with a a {@link PCollectionView}.
+   *
+   * @throws Exception anything thrown by the delegate {@link Reader}
    */
-  public static Object readSideInput(PipelineOptions options, SideInputInfo sideInputInfo,
-      ExecutionContext executionContext) throws Exception {
+  public static Object readSideInput(
+      PipelineOptions options,
+      SideInputInfo sideInputInfo,
+      Observer observer,
+      ExecutionContext executionContext)
+      throws Exception {
     Iterable<Object> elements =
-        readSideInputSources(options, sideInputInfo.getSources(), executionContext);
+        readSideInputSources(options, sideInputInfo.getSources(), observer, executionContext);
     return readSideInputValue(sideInputInfo.getKind(), elements);
   }
 
-  static Iterable<Object> readSideInputSources(PipelineOptions options,
-      List<Source> sideInputSources, ExecutionContext executionContext) throws Exception {
+  public static Object readSideInput(
+      PipelineOptions options,
+      SideInputInfo sideInputInfo,
+      ExecutionContext executionContext)
+      throws Exception {
+    Iterable<Object> elements =
+        readSideInputSources(options, sideInputInfo.getSources(), null, executionContext);
+    return readSideInputValue(sideInputInfo.getKind(), elements);
+  }
+
+  private static Iterable<Object> readSideInputSources(
+      PipelineOptions options,
+      List<Source> sideInputSources,
+      Observer observer,
+      ExecutionContext executionContext)
+      throws Exception {
     int numSideInputSources = sideInputSources.size();
     if (numSideInputSources == 0) {
       throw new Exception("expecting at least one side input Source");
     } else if (numSideInputSources == 1) {
-      return readSideInputSource(options, sideInputSources.get(0), executionContext);
+      return readSideInputSource(options, sideInputSources.get(0), observer, executionContext);
     } else {
       List<Iterable<Object>> shards = new ArrayList<>();
       for (Source sideInputSource : sideInputSources) {
-        shards.add(readSideInputSource(options, sideInputSource, executionContext));
+        shards.add(readSideInputSource(options, sideInputSource, observer, executionContext));
       }
       return new ShardedIterable<>(shards);
     }
   }
 
-  static Iterable<Object> readSideInputSource(PipelineOptions options, Source sideInputSource,
-      ExecutionContext executionContext) throws Exception {
-    return new ReaderIterable<>(ReaderFactory.create(options, sideInputSource, executionContext));
+  private static Iterable<Object> readSideInputSource(
+      PipelineOptions options,
+      Source sideInputSource,
+      Observer observer,
+      ExecutionContext executionContext)
+      throws Exception {
+    Reader<Object> reader = ReaderFactory.create(options, sideInputSource, executionContext);
+    if (observer != null) {
+      reader.addObserver(observer);
+    }
+    return new ReaderIterable<>(reader);
   }
 
   static Object readSideInputValue(Map<String, Object> sideInputKind, Iterable<Object> elements)
@@ -197,4 +228,24 @@ public void remove() {
       throw new UnsupportedOperationException();
     }
   }
+
+  /**
+   * Builds a {@link SideInputInfo} for a "singleton" side input.
+   */
+  public static SideInputInfo createSingletonSideInputInfo(Source sideInputSource) {
+    SideInputInfo sideInputInfo = new SideInputInfo();
+    sideInputInfo.setSources(Arrays.asList(sideInputSource));
+    sideInputInfo.setKind(CloudObject.forClassName("singleton"));
+    return sideInputInfo;
+  }
+
+  /**
+   * Builds a {@link SideInputInfo} for a "collection" side input.
+   */
+  public static SideInputInfo createCollectionSideInputInfo(Source... sideInputSources) {
+    SideInputInfo sideInputInfo = new SideInputInfo();
+    sideInputInfo.setSources(Arrays.asList(sideInputSources));
+    sideInputInfo.setKind(CloudObject.forClassName("collection"));
+    return sideInputInfo;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SizedWeigher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SizedWeigher.java
new file mode 100644
index 0000000000000..9844e077df3df
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SizedWeigher.java
@@ -0,0 +1,46 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.util.Sized;
+import com.google.common.base.Preconditions;
+import com.google.common.cache.Weigher;
+
+/**
+ * A {@code Weigher}
+ *
+ * <p>For internal use only.
+ *
+ * <p>Package-private here so that the dependency on Guava does not leak into the public API
+ * surface.
+ */
+class SizedWeigher<K, V> implements Weigher<K, Sized<V>>{
+
+  private final int baseWeight;
+
+  public SizedWeigher(int baseWeight) {
+    Preconditions.checkArgument(
+        baseWeight > 0,
+        "base weight for SizedWeigher must be positive");
+    this.baseWeight = baseWeight;
+  }
+
+  @Override
+  public int weigh(K key, Sized<V> value) {
+    return baseWeight + (int) Math.min(value.getSize(), Integer.MAX_VALUE);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index d7660994993f1..7c961769a7ef2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -18,7 +18,8 @@
 
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.DirectSideInputReader;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
@@ -360,10 +361,10 @@ void initializeState() {
     fnRunner = DoFnRunner.createWithListOutputs(
         options,
         fn,
-        runnerSideInputs,
+        DirectSideInputReader.of(runnerSideInputs),
         mainOutputTag,
         sideOutputTags,
-        (new BatchModeExecutionContext()).createStepContext("stepName"),
+        DirectModeExecutionContext.create().createStepContext("stepName"),
         counterSet.getAddCounterMutator(),
         WindowingStrategy.globalDefault());
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index f4f0ec6c32b5e..ff401d66d15a1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.DirectSideInputReader;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
@@ -1006,13 +1007,17 @@ private static <InputT, OutputT> void evaluateSingleHelper(
       DirectPipelineRunner.EvaluationContext context) {
     TupleTag<OutputT> mainOutputTag = new TupleTag<>("out");
 
-    DirectModeExecutionContext executionContext = new DirectModeExecutionContext();
+    DirectModeExecutionContext executionContext = DirectModeExecutionContext.create();
 
-    DoFnRunner<InputT, OutputT, List> fnRunner =
-        evaluateHelper(transform.fn, context.getStepName(transform),
-            context.getInput(transform), transform.sideInputs,
-            mainOutputTag, new ArrayList<TupleTag<?>>(),
-            context, executionContext);
+    evaluateHelper(
+        transform.fn,
+        context.getStepName(transform),
+        context.getInput(transform),
+        transform.sideInputs,
+        mainOutputTag,
+        new ArrayList<TupleTag<?>>(),
+        context,
+        executionContext);
 
     context.setPCollectionValuesWithMetadata(
         context.getOutput(transform),
@@ -1038,13 +1043,17 @@ private static <InputT, OutputT> void evaluateMultiHelper(
       BoundMulti<InputT, OutputT> transform,
       DirectPipelineRunner.EvaluationContext context) {
 
-    DirectModeExecutionContext executionContext = new DirectModeExecutionContext();
+    DirectModeExecutionContext executionContext = DirectModeExecutionContext.create();
 
-    DoFnRunner<InputT, OutputT, List> fnRunner =
-        evaluateHelper(transform.fn, context.getStepName(transform),
-                       context.getInput(transform), transform.sideInputs,
-                       transform.mainOutputTag, transform.sideOutputTags.getAll(),
-                       context, executionContext);
+    evaluateHelper(
+        transform.fn,
+        context.getStepName(transform),
+        context.getInput(transform),
+        transform.sideInputs,
+        transform.mainOutputTag,
+        transform.sideOutputTags.getAll(),
+        context,
+        executionContext);
 
     for (Map.Entry<TupleTag<?>, PCollection<?>> entry
         : context.getOutput(transform).getAll().entrySet()) {
@@ -1060,7 +1069,7 @@ private static <InputT, OutputT> void evaluateMultiHelper(
     }
   }
 
-  private static <InputT, OutputT> DoFnRunner<InputT, OutputT, List> evaluateHelper(
+  private static <InputT, OutputT> void evaluateHelper(
       DoFn<InputT, OutputT> doFn,
       String name,
       PCollection<? extends InputT> input,
@@ -1083,7 +1092,7 @@ private static <InputT, OutputT> DoFnRunner<InputT, OutputT, List> evaluateHelpe
         DoFnRunner.createWithListOutputs(
             context.getPipelineOptions(),
             fn,
-            sideInputValues,
+            DirectSideInputReader.of(sideInputValues),
             mainOutputTag,
             sideOutputTags,
             executionContext.getStepContext(name),
@@ -1105,7 +1114,5 @@ private static <InputT, OutputT> DoFnRunner<InputT, OutputT, List> evaluateHelpe
     }
 
     fnRunner.finishBundle();
-
-    return fnRunner;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 8e133b5bf49ef..8d2793a154437 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -27,7 +27,7 @@
 import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
-import com.google.cloud.dataflow.sdk.util.PTuple;
+import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -407,7 +407,7 @@ private static <T> void evaluateHelper(
       DirectPipelineRunner.EvaluationContext context) {
     PCollection<T> input = context.getInput(transform);
 
-    DirectModeExecutionContext executionContext = new DirectModeExecutionContext();
+    DirectModeExecutionContext executionContext = DirectModeExecutionContext.create();
 
     TupleTag<T> outputTag = new TupleTag<>();
     DoFn<T, T> addWindowsDoFn = new AssignWindowsDoFn<>(transform.windowingStrategy.getWindowFn());
@@ -415,7 +415,7 @@ private static <T> void evaluateHelper(
         DoFnRunner.createWithListOutputs(
             context.getPipelineOptions(),
             addWindowsDoFn,
-            PTuple.empty(),
+            NullSideInputReader.empty(),
             outputTag,
             new ArrayList<TupleTag<?>>(),
             executionContext.getStepContext(context.getStepName(transform)),
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
index d9d52f77b1553..6c448a51d928a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
@@ -16,16 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Function;
-import com.google.common.base.Predicate;
 import com.google.common.collect.FluentIterable;
-import com.google.common.collect.Iterables;
 
 import org.joda.time.Instant;
 
@@ -41,7 +35,11 @@
  */
 public class BatchModeExecutionContext extends ExecutionContext {
   private Object key;
-  private final Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache = new HashMap<>();
+
+  /**
+   * Creates a {@code BatchModeExecutionContext}.
+   */
+  public BatchModeExecutionContext() { }
 
   /**
    * Create a new {@link ExecutionContext.StepContext}.
@@ -72,48 +70,6 @@ public TimerManager getTimerManager() {
     return null;
   }
 
-  @Override
-  @SuppressWarnings("unchecked")
-  public <T> T getSideInput(
-      PCollectionView<T> view, BoundedWindow mainInputWindow, PTuple sideInputs) {
-    TupleTag<Iterable<WindowedValue<?>>> tag = view.getTagInternal();
-    Map<BoundedWindow, Object> tagCache = sideInputCache.get(tag);
-    if (tagCache == null) {
-      if (!sideInputs.has(tag)) {
-        throw new IllegalArgumentException(
-            "calling sideInput() with unknown view; did you forget to pass the view in "
-            + "ParDo.withSideInputs()?");
-      }
-      tagCache = new HashMap<>();
-      sideInputCache.put(tag, tagCache);
-    }
-
-    final BoundedWindow sideInputWindow =
-        view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(mainInputWindow);
-
-    // tagCache stores values in a type-safe way based on the TupleTag.
-    T result = (T) tagCache.get(sideInputWindow);
-
-    // TODO: Consider partial prefetching like in CoGBK to reduce iteration cost.
-    if (result == null) {
-      if (view.getWindowingStrategyInternal().getWindowFn() instanceof GlobalWindows) {
-        result = view.fromIterableInternal(sideInputs.get(tag));
-      } else {
-        result = view.fromIterableInternal(
-            Iterables.filter(sideInputs.get(tag),
-                new Predicate<WindowedValue<?>>() {
-                  @Override
-                  public boolean apply(WindowedValue<?> element) {
-                    return element.getWindows().contains(sideInputWindow);
-                  }
-                }));
-      }
-      tagCache.put(sideInputWindow, result);
-    }
-
-    return result;
-  }
-
   /**
    * {@link ExecutionContext.StepContext} used in batch mode.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
index d073c0750bdac..440d5101add1a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
@@ -29,8 +29,15 @@
  * {@link ExecutionContext} for use in direct mode.
  */
 public class DirectModeExecutionContext extends BatchModeExecutionContext {
-  List<ValueWithMetadata> output = new ArrayList<>();
-  Map<TupleTag<?>, List<ValueWithMetadata>> sideOutputs = new HashMap<>();
+
+  private List<ValueWithMetadata> output = new ArrayList<>();
+  private Map<TupleTag<?>, List<ValueWithMetadata>> sideOutputs = new HashMap<>();
+
+  protected DirectModeExecutionContext() { }
+
+  public static DirectModeExecutionContext create() {
+    return new DirectModeExecutionContext();
+  }
 
   @Override
   public ExecutionContext.StepContext createStepContext(String stepName) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectSideInputReader.java
new file mode 100644
index 0000000000000..ee8c922897ac4
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectSideInputReader.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Predicate;
+import com.google.common.collect.Iterables;
+
+/**
+ * Basic side input reader wrapping a {@link PTuple} of side input iterables. Encapsulates
+ * conversion according to the {@link PCollectionView} and projection to a particular
+ * window.
+ */
+public class DirectSideInputReader implements SideInputReader {
+
+  private PTuple sideInputValues;
+
+  private DirectSideInputReader(PTuple sideInputValues) {
+    this.sideInputValues = sideInputValues;
+  }
+
+  public static DirectSideInputReader of(PTuple sideInputValues) {
+    return new DirectSideInputReader(sideInputValues);
+  }
+
+  @Override
+  public <T> boolean contains(PCollectionView<T> view) {
+    return sideInputValues.has(view.getTagInternal());
+  }
+
+  @Override
+  public boolean isEmpty() {
+    return sideInputValues.isEmpty();
+  }
+
+  @Override
+  public <T> T get(PCollectionView<T> view, final BoundedWindow window) {
+    final TupleTag<Iterable<WindowedValue<?>>> tag = view.getTagInternal();
+    if (!sideInputValues.has(tag)) {
+      throw new IllegalArgumentException("calling getSideInput() with unknown view");
+    }
+
+    if (view.getWindowingStrategyInternal().getWindowFn() instanceof GlobalWindows) {
+      return view.fromIterableInternal(sideInputValues.get(tag));
+    } else {
+      return view.fromIterableInternal(
+          Iterables.filter(sideInputValues.get(tag),
+              new Predicate<WindowedValue<?>>() {
+                  @Override
+                  public boolean apply(WindowedValue<?> element) {
+                    return element.getWindows().contains(window);
+                  }
+                }));
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index a07ad3222499c..89eb889b5a092 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -31,7 +31,6 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Throwables;
@@ -76,7 +75,7 @@ public interface OutputManager<ReceiverT> {
   DoFnRunner(
       PipelineOptions options,
       DoFn<InputT, OutputT> fn,
-      PTuple sideInputs,
+      SideInputReader sideInputReader,
       OutputManager<ReceiverT> outputManager,
       TupleTag<OutputT> mainOutputTag,
       List<TupleTag<?>> sideOutputTags,
@@ -85,14 +84,14 @@ public interface OutputManager<ReceiverT> {
       WindowingStrategy<?, ?> windowingStrategy) {
     this.fn = fn;
     this.context = new DoFnContext<>(
-        options, fn, sideInputs, outputManager, mainOutputTag, sideOutputTags, stepContext,
+        options, fn, sideInputReader, outputManager, mainOutputTag, sideOutputTags, stepContext,
         addCounterMutator, windowingStrategy == null ? null : windowingStrategy.getWindowFn());
   }
 
   public static <InputT, OutputT, ReceiverT> DoFnRunner<InputT, OutputT, ReceiverT> create(
       PipelineOptions options,
       DoFn<InputT, OutputT> fn,
-      PTuple sideInputs,
+      SideInputReader sideInputReader,
       OutputManager<ReceiverT> outputManager,
       TupleTag<OutputT> mainOutputTag,
       List<TupleTag<?>> sideOutputTags,
@@ -100,7 +99,7 @@ public static <InputT, OutputT, ReceiverT> DoFnRunner<InputT, OutputT, ReceiverT
       CounterSet.AddCounterMutator addCounterMutator,
       WindowingStrategy<?, ?> windowingStrategy) {
     return new DoFnRunner<>(
-        options, fn, sideInputs, outputManager,
+        options, fn, sideInputReader, outputManager,
         mainOutputTag, sideOutputTags, stepContext, addCounterMutator, windowingStrategy);
   }
 
@@ -108,14 +107,14 @@ public static <InputT, OutputT, ReceiverT> DoFnRunner<InputT, OutputT, ReceiverT
   public static <InputT, OutputT> DoFnRunner<InputT, OutputT, List> createWithListOutputs(
       PipelineOptions options,
       DoFn<InputT, OutputT> fn,
-      PTuple sideInputs,
+      SideInputReader sideInputReader,
       TupleTag<OutputT> mainOutputTag,
       List<TupleTag<?>> sideOutputTags,
       StepContext stepContext,
       CounterSet.AddCounterMutator addCounterMutator,
       WindowingStrategy<?, ?> windowingStrategy) {
     return create(
-        options, fn, sideInputs,
+        options, fn, sideInputReader,
         new OutputManager<List>() {
           @Override
           public List initialize(TupleTag<?> tag) {
@@ -148,7 +147,7 @@ public void startBundle() {
   public void processElement(WindowedValue<InputT> elem) {
     if (elem.getWindows().size() <= 1
         || (!RequiresWindowAccess.class.isAssignableFrom(fn.getClass())
-            && context.sideInputs.getAll().isEmpty())) {
+            && context.sideInputReader.isEmpty())) {
       invokeProcessElement(elem);
     } else {
       // We could modify the windowed value (and the processContext) to
@@ -203,8 +202,7 @@ private static class DoFnContext<InputT, OutputT, ReceiverT>
 
     final PipelineOptions options;
     final DoFn<InputT, OutputT> fn;
-    final PTuple sideInputs;
-    final Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
+    final SideInputReader sideInputReader;
     final OutputManager<ReceiverT> outputManager;
     final Map<TupleTag<?>, ReceiverT> outputMap;
     final TupleTag<OutputT> mainOutputTag;
@@ -214,7 +212,7 @@ private static class DoFnContext<InputT, OutputT, ReceiverT>
 
     public DoFnContext(PipelineOptions options,
                        DoFn<InputT, OutputT> fn,
-                       PTuple sideInputs,
+                       SideInputReader sideInputReader,
                        OutputManager<ReceiverT> outputManager,
                        TupleTag<OutputT> mainOutputTag,
                        List<TupleTag<?>> sideOutputTags,
@@ -224,8 +222,7 @@ public DoFnContext(PipelineOptions options,
       fn.super();
       this.options = options;
       this.fn = fn;
-      this.sideInputs = sideInputs;
-      this.sideInputCache = new HashMap<>();
+      this.sideInputReader = sideInputReader;
       this.outputManager = outputManager;
       this.mainOutputTag = mainOutputTag;
       this.outputMap = new HashMap<>();
@@ -255,11 +252,6 @@ public PipelineOptions getPipelineOptions() {
       return options;
     }
 
-    @SuppressWarnings("unchecked")
-    <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
-      return stepContext.getExecutionContext().getSideInput(view, mainInputWindow, sideInputs);
-    }
-
     <T> WindowedValue<T> makeWindowedValue(
         T output, Instant timestamp, Collection<? extends BoundedWindow> windows) {
       final Instant inputTimestamp = timestamp;
@@ -300,6 +292,15 @@ public Collection<? extends BoundedWindow> windows() {
       return WindowedValue.of(output, timestamp, windows);
     }
 
+    public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
+      if (!sideInputReader.contains(view)) {
+        throw new IllegalArgumentException("calling sideInput() with unknown view");
+      }
+      BoundedWindow sideInputWindow =
+          view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(mainInputWindow);
+      return sideInputReader.get(view, sideInputWindow);
+    }
+
     void outputWindowedValue(
         OutputT output,
         Instant timestamp,
@@ -378,7 +379,6 @@ protected <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT> createAggreg
       return new CounterAggregator<>(generateInternalAggregatorName(name),
           combiner, addCounterMutator);
     }
-
   }
 
   /**
@@ -504,18 +504,6 @@ private void checkTimestamp(Instant timestamp) {
           "Timestamp %s exceeds allowed maximum skew.", timestamp);
     }
 
-    private boolean equivalentToKV(InputT input) {
-      if (input == null) {
-        return true;
-      } else if (input instanceof KV) {
-        return true;
-      } else if (input instanceof TimerOrElement) {
-        return ((TimerOrElement) input).isTimer()
-            || ((TimerOrElement) input).element() instanceof KV;
-      }
-      return false;
-    }
-
     @Override
     public WindowingInternals<InputT, OutputT> windowingInternals() {
       return new WindowingInternals<InputT, OutputT>() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
index 1287d114f1884..15daa25dbb138 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
@@ -32,7 +32,7 @@
 import java.util.Map;
 
 /**
- * Context about the current execution.  This is guaranteed to exist during processing,
+ * Context for the current execution. This is guaranteed to exist during processing,
  * but does not necessarily persist between different batches of work.
  */
 public abstract class ExecutionContext {
@@ -82,13 +82,6 @@ public void noteOutput(WindowedValue<?> output) {}
    */
   public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output) {}
 
-  /**
-   * Returns the side input associated with the given view and window, given the set of side
-   * inputs available.
-   */
-  public abstract <T> T getSideInput(
-      PCollectionView<T> view, BoundedWindow mainInputWindow, PTuple sideInputs);
-
   /**
    * Writes the given {@link PCollectionView} data to a globally accessible location.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NullSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NullSideInputReader.java
new file mode 100644
index 0000000000000..0fc264606f882
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NullSideInputReader.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.common.collect.Sets;
+
+import java.util.Collections;
+import java.util.Set;
+
+/**
+ * A {@link SideInputReader} representing a well-defined set of views, but not storing
+ * any values for them. Used to check if a side input is present when the data itself
+ * comes from elsewhere.
+ */
+public class NullSideInputReader implements SideInputReader {
+
+  private Set<PCollectionView<?>> views;
+
+  public static NullSideInputReader empty() {
+    return new NullSideInputReader(Collections.<PCollectionView<?>>emptySet());
+  }
+
+  public static NullSideInputReader of(Iterable<? extends PCollectionView<?>> views) {
+    return new NullSideInputReader(views);
+  }
+
+  private NullSideInputReader(Iterable<? extends PCollectionView<?>> views) {
+    this.views = Sets.newHashSet(views);
+  }
+
+  @Override
+  public <T> T get(PCollectionView<T> view, BoundedWindow window) {
+    throw new IllegalArgumentException("cannot call NullSideInputReader.get()");
+  }
+
+  @Override
+  public boolean isEmpty() {
+    return views.isEmpty();
+  }
+
+  @Override
+  public <T> boolean contains(PCollectionView<T> view) {
+    return views.contains(view);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViewWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViewWindow.java
new file mode 100644
index 0000000000000..7cf636eb63c4c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViewWindow.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+
+import java.util.Objects;
+
+/**
+ * A pair of a {@link PCollectionView} and a {@link BoundedWindow}, which can
+ * be thought of as window "of" the view. This is a value class for use e.g.
+ * as a compound cache key.
+ *
+ * @param <T> the type of the underlying PCollectionView
+ */
+public final class PCollectionViewWindow<T> {
+
+  private final PCollectionView<T> view;
+  private final BoundedWindow window;
+
+  private PCollectionViewWindow(PCollectionView<T> view, BoundedWindow window) {
+    this.view = view;
+    this.window = window;
+  }
+
+  public static <T> PCollectionViewWindow<T> of(PCollectionView<T> view, BoundedWindow window) {
+    return new PCollectionViewWindow<>(view, window);
+  }
+
+  public PCollectionView<T> getView() {
+    return view;
+  }
+
+  public BoundedWindow getWindow() {
+    return window;
+  }
+
+  @Override
+  public boolean equals(Object otherObject) {
+    if (!(otherObject instanceof PCollectionViewWindow)) {
+      return false;
+    }
+    @SuppressWarnings("unchecked")
+    PCollectionViewWindow<T> other = (PCollectionViewWindow<T>) otherObject;
+    return getView().equals(other.getView()) && getWindow().equals(other.getWindow());
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(getView(), getWindow());
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
index 98e49dbe3f569..b6c9861549c7e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
@@ -22,11 +22,11 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
 import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.PValueBase;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Function;
+import com.google.common.base.MoreObjects;
 import com.google.common.collect.HashMultimap;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Multimap;
@@ -36,6 +36,7 @@
 import java.util.HashMap;
 import java.util.Map;
 import java.util.NoSuchElementException;
+import java.util.Objects;
 
 /**
  * Implementations of {@link PCollectionView} shared across the SDK.
@@ -252,7 +253,7 @@ private abstract static class PCollectionViewBase<ElemT, ViewT, W extends Bounde
     private static final long serialVersionUID = 0L;
 
     /** A unique tag for the view, typed according to the elements underlying the view. */
-    private TupleTag<Iterable<WindowedValue<ElemT>>> tag = new TupleTag<>();
+    private TupleTag<Iterable<WindowedValue<ElemT>>> tag;
 
     /** The windowing strategy for the PCollection underlying the view. */
     private WindowingStrategy<?, W> windowingStrategy;
@@ -272,18 +273,31 @@ private abstract static class PCollectionViewBase<ElemT, ViewT, W extends Bounde
      */
     protected PCollectionViewBase(
         Pipeline pipeline,
+        TupleTag<Iterable<WindowedValue<ElemT>>> tag,
         WindowingStrategy<?, W> windowingStrategy,
         Coder<ElemT> valueCoder) {
       super(pipeline);
       if (windowingStrategy.getWindowFn() instanceof InvalidWindows) {
         throw new IllegalArgumentException("WindowFn of PCollectionView cannot be InvalidWindows");
       }
+      this.tag = tag;
       this.windowingStrategy = windowingStrategy;
       this.coder =
           IterableCoder.of(WindowedValue.getFullCoder(
               valueCoder, windowingStrategy.getWindowFn().windowCoder()));
     }
 
+    /**
+     * Call this constructor to initialize the fields for which this base class provides
+     * boilerplate accessors, with an auto-generated tag.
+     */
+    protected PCollectionViewBase(
+        Pipeline pipeline,
+        WindowingStrategy<?, W> windowingStrategy,
+        Coder<ElemT> valueCoder) {
+      this(pipeline, new TupleTag<Iterable<WindowedValue<ElemT>>>(), windowingStrategy, valueCoder);
+    }
+
     /**
      * For serialization only. Do not use directly. Subclasses should call from their own
      * protected no-argument constructor.
@@ -336,5 +350,25 @@ public Coder<Iterable<WindowedValue<?>>> getCoderInternal() {
       Coder<Iterable<WindowedValue<?>>> untypedCoder = (Coder) coder;
       return untypedCoder;
     }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(tag);
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (!(other instanceof PCollectionView) || other == null) {
+        return false;
+      }
+      @SuppressWarnings("unchecked")
+      PCollectionView<?> otherView = (PCollectionView<?>) other;
+      return tag.equals(otherView.getTagInternal());
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(this).add("tag", tag).toString();
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java
index 004943d8277f7..6ed6ae857dac5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java
@@ -103,6 +103,13 @@ public <V> boolean has(TupleTag<V> tag) {
     return valueMap.containsKey(tag);
   }
 
+  /**
+   * Returns true if this {@code PTuple} is empty.
+   */
+  public boolean isEmpty() {
+    return valueMap.isEmpty();
+  }
+
   /**
    * Returns the value with the given tag in this
    * PTuple.  Throws IllegalArgumentException if there is no
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SideInputReader.java
new file mode 100644
index 0000000000000..178d5dd2750ae
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SideInputReader.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+
+import javax.annotation.Nullable;
+
+/**
+ * The interface to objects that provide side inputs. Particular implementations
+ * may read a side input directly or use appropriate sorts of caching, etc.
+ */
+public interface SideInputReader {
+  /**
+   * Returns the value of the given {@link PCollectionView} for the given {@link BoundedWindow}.
+   *
+   * <p>It is valid for a side input to be {@code null}. It is <i>not</i> valid for this to
+   * return {@code null} for any other reason.
+   */
+  @Nullable
+  public <T> T get(PCollectionView<T> view, BoundedWindow window);
+
+  /**
+   * Returns true if the given {@link PCollectionView} is valid for this reader.
+   */
+  public <T> boolean contains(PCollectionView<T> view);
+
+  /**
+   * Returns true if there are no side inputs in this reader.
+   */
+  public boolean isEmpty();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Sized.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Sized.java
new file mode 100644
index 0000000000000..e812fa288a274
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Sized.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+/**
+ * A {@code T} with an accompanying size estimate. Units are unspecified.
+ *
+ * @param <T> the underlying type of object
+ */
+public final class Sized<T> {
+
+  private final T value;
+  private final long size;
+
+  private Sized(T value, long size) {
+    this.value = value;
+    this.size = size;
+  }
+
+  public static <T> Sized<T> of(T value, long size) {
+    return new Sized<>(value, size);
+  }
+
+  public long getSize() {
+    return size;
+  }
+
+  public T getValue() {
+    return value;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SizedSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SizedSideInputReader.java
new file mode 100644
index 0000000000000..445e9ce6a1c6e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SizedSideInputReader.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+
+/**
+ * Extension to {@link SideInputReader} that can approximate the size of the side input.
+ */
+public interface SizedSideInputReader extends SideInputReader {
+
+  /**
+   * Returns the value of the requested {@link PCollectionView} for the given {@link BoundedWindow}
+   * along with a rough estimate of the number of bytes of memory it consumes.
+   *
+   * <p>It is valid for a side input value to be {@code null}. In this case, the return
+   * value of this method must still be non-{@code null}. It should be a {@link Sized}
+   * object where {@link Sized#getValue()} returns {@code null} and {@link Sized#getSize()} may
+   * still return any non-negative value.
+   */
+  public <T> Sized<T> getSized(PCollectionView<T> view, BoundedWindow window);
+
+  /**
+   * Abstract class providing default implementations for methods of {@link SizedSideInputReader}.
+   */
+  public abstract static class Defaults implements SizedSideInputReader {
+    @Override
+    public <T> T get(PCollectionView<T> view, BoundedWindow window) {
+      return getSized(view, window).getValue();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index b6d93c75f6bde..fb71c9036d8ea 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -16,7 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.runners.worker.DataflowExecutionContext;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
@@ -28,6 +30,7 @@
 import com.google.common.base.Optional;
 import com.google.common.cache.CacheBuilder;
 import com.google.common.cache.CacheLoader;
+import com.google.common.collect.ImmutableSet;
 import com.google.protobuf.ByteString;
 
 import org.joda.time.Instant;
@@ -37,12 +40,13 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.TimeUnit;
 
 /**
  * {@link ExecutionContext} for use in streaming mode.
  */
-public class StreamingModeExecutionContext extends ExecutionContext {
+public class StreamingModeExecutionContext extends DataflowExecutionContext {
   private String computation;
   private Instant inputDataWatermark;
   private Windmill.WorkItem work;
@@ -113,16 +117,16 @@ private Windmill.Timer.Type timerType(TimeDomain domain) {
   }
 
   @Override
-  public <T> T getSideInput(
-      PCollectionView<T> view, BoundedWindow mainInputWindow, PTuple sideInputs) {
-    if (!sideInputs.has(view.getTagInternal())) {
-      throw new IllegalArgumentException(
-          "calling sideInput() with unknown view; " +
-          "did you forget to pass the view in " +
-          "ParDo.withSideInputs()?");
-    }
+  public SideInputReader getSideInputReader(Iterable<? extends SideInputInfo> sideInputInfos) {
+    throw new UnsupportedOperationException(
+        "Cannot call getSideInputReader for StreamingDataflowWorker: "
+        + "the MapTask specification should not have had any SideInputInfo descriptors "
+        + "since the streaming runner does not yet support them.");
+  }
 
-    return fetchSideInput(view, mainInputWindow, SideInputState.CACHED_IN_WORKITEM);
+  @Override
+  public SideInputReader getSideInputReaderForViews(Iterable<? extends PCollectionView<?>> views) {
+    return StreamingModeSideInputReader.of(views, this);
   }
 
   /**
@@ -130,7 +134,9 @@ public <T> T getSideInput(
    */
   public boolean issueSideInputFetch(
       PCollectionView<?> view, BoundedWindow mainInputWindow, SideInputState state) {
-    return fetchSideInput(view, mainInputWindow, state) != null;
+    BoundedWindow sideInputWindow =
+        view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(mainInputWindow);
+    return fetchSideInput(view, sideInputWindow, state) != null;
   }
 
   /**
@@ -138,9 +144,7 @@ public boolean issueSideInputFetch(
    * items until the active work item is finished.
    */
   private <T> T fetchSideInput(
-      PCollectionView<T> view, BoundedWindow mainInputWindow, SideInputState state) {
-    BoundedWindow sideInputWindow =
-        view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(mainInputWindow);
+      PCollectionView<T> view, BoundedWindow sideInputWindow, SideInputState state) {
 
     Map<BoundedWindow, Object> tagCache = sideInputCache.get(view.getTagInternal());
     if (tagCache == null) {
@@ -343,4 +347,43 @@ public void flushState() throws IOException {
       tagCache.flushTo(outputBuilder);
     }
   }
+
+  /**
+   * A {@link SideInputReader} that fetches side inputs from the streaming worker's
+   * cache.
+   */
+  public static class StreamingModeSideInputReader implements SideInputReader {
+    private StreamingModeExecutionContext context;
+    private Set<PCollectionView<?>> viewSet;
+
+    private StreamingModeSideInputReader(
+        Iterable<? extends PCollectionView<?>> views, StreamingModeExecutionContext context) {
+      this.context = context;
+      this.viewSet = ImmutableSet.copyOf(views);
+    }
+
+    public static StreamingModeSideInputReader of(
+        Iterable<? extends PCollectionView<?>> views, StreamingModeExecutionContext context) {
+      return new StreamingModeSideInputReader(views, context);
+    }
+
+    @Override
+    public <T> T get(PCollectionView<T> view, BoundedWindow window) {
+      if (!contains(view)) {
+        throw new RuntimeException("get() called with unknown view");
+      }
+
+      return context.fetchSideInput(view, window, SideInputState.CACHED_IN_WORKITEM);
+    }
+
+    @Override
+    public <T> boolean contains(PCollectionView<T> view) {
+      return viewSet.contains(view);
+    }
+
+    @Override
+    public boolean isEmpty() {
+      return viewSet.isEmpty();
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
index 92cb3941fc37a..c95cb8862bd12 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
@@ -61,13 +61,13 @@ public class StreamingSideInputDoFnRunner<InputT, OutputT, ReceiverT, W extends
   public StreamingSideInputDoFnRunner(
       PipelineOptions options,
       DoFnInfo<InputT, OutputT> doFnInfo,
-      PTuple sideInputs,
+      SideInputReader sideInputReader,
       OutputManager<ReceiverT> outputManager,
       TupleTag<OutputT> mainOutputTag,
       List<TupleTag<?>> sideOutputTags,
       StepContext stepContext,
       CounterSet.AddCounterMutator addCounterMutator) throws Exception {
-    super(options, doFnInfo.getDoFn(), sideInputs, outputManager,
+    super(options, doFnInfo.getDoFn(), sideInputReader, outputManager,
         mainOutputTag, sideOutputTags, stepContext,
         addCounterMutator, doFnInfo.getWindowingStrategy());
     this.stepContext = stepContext;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
index 06486696e4050..3981a01c2cfc5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
@@ -25,8 +25,8 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
@@ -62,7 +62,7 @@ Reader<?> runTestCreateAvroReader(String filename, @Nullable Long start, @Nullab
     cloudSource.setCodec(encoding);
 
     Reader<?> reader = ReaderFactory.create(
-        PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext());
+        PipelineOptionsFactory.create(), cloudSource, DirectModeExecutionContext.create());
     return reader;
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
index 5ece943d91068..7aec39c1cddb9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
@@ -21,8 +21,8 @@
 
 import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.hamcrest.core.IsInstanceOf;
@@ -48,7 +48,7 @@ void runTestCreateBigQueryReader(
     cloudSource.setCodec(encoding);
 
     Reader<?> reader = ReaderFactory.create(
-        PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext());
+        PipelineOptionsFactory.create(), cloudSource, DirectModeExecutionContext.create());
     Assert.assertThat(reader, new IsInstanceOf(BigQueryReader.class));
     BigQueryReader bigQueryReader = (BigQueryReader) reader;
     Assert.assertEquals(project, bigQueryReader.tableRef.getProjectId());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java
new file mode 100644
index 0000000000000..09ae735158a6b
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java
@@ -0,0 +1,173 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.DirectSideInputReader;
+import com.google.cloud.dataflow.sdk.util.PCollectionViewWindow;
+import com.google.cloud.dataflow.sdk.util.PTuple;
+import com.google.cloud.dataflow.sdk.util.SideInputReader;
+import com.google.cloud.dataflow.sdk.util.Sized;
+import com.google.cloud.dataflow.sdk.util.SizedSideInputReader;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Map;
+
+/**
+ * Tests for {@link CachingSideInputReader}.
+ */
+@RunWith(JUnit4.class)
+public class CachingSideInputReaderTest {
+
+  /**
+   * A {@link SizedSideInputReader} where the sizes are included in the values of the
+   * {@link PTuple} used to instantiate it.
+   */
+  private static class SizedDirectSideInputReader extends SizedSideInputReader.Defaults {
+
+    private final SideInputReader subReader;
+    private final Map<TupleTag<?>, Long> sizes;
+
+    /**
+     * Instantiates a {@link SizedDirectSideInputReader} from a {@link PTuple}. The values in the
+     * {@link PTuple} should all be {@link Sized}. A {@link DirectSideInputReader} will be used for
+     * the actual retrieval logic; this class merely does the size bookkeeping.
+     */
+    public SizedDirectSideInputReader(Map<TupleTag<Object>, Sized<Object>> sizedContents) {
+      sizes = Maps.newHashMap();
+      PTuple values = PTuple.empty();
+      for (Map.Entry<TupleTag<Object>, Sized<Object>> entry : sizedContents.entrySet()) {
+        values = values.and(entry.getKey(), entry.getValue().getValue());
+        sizes.put(entry.getKey(), entry.getValue().getSize());
+      }
+      subReader = DirectSideInputReader.of(values);
+    }
+
+    @Override
+    public boolean isEmpty() {
+      return subReader.isEmpty();
+    }
+
+    @Override
+    public <T> boolean contains(PCollectionView<T> view) {
+      return subReader.contains(view);
+    }
+
+    @Override
+    public <T> Sized<T> getSized(PCollectionView<T> view, BoundedWindow window) {
+      return Sized.of(
+          subReader.get(view, window),
+          sizes.get(view.getTagInternal()));
+    }
+  }
+
+  private static boolean isCached(
+      Cache<PCollectionViewWindow<?>, Sized<Object>> cache,
+      PCollectionView<?> view, BoundedWindow window) {
+    return null != cache.getIfPresent(PCollectionViewWindow.of(view, window));
+  }
+
+  @Test
+  public void testCachingSideInputReaderCachesSmallItem() throws Exception {
+    Cache<PCollectionViewWindow<?>, Sized<Object>> cache = CacheBuilder.newBuilder()
+        .maximumWeight(100)
+        .weigher(new SizedWeigher<>(1))
+        .build();
+
+    TupleTag<Iterable<WindowedValue<String>>> tag = new TupleTag<>();
+    TupleTag<Object> untypedTag = new TupleTag<>(tag.getId());
+    PCollectionView<Long> view = PCollectionViewTesting.testingView(
+        tag, new PCollectionViewTesting.LengthViewFn<String>(), StringUtf8Coder.of());
+    Iterable<WindowedValue<String>> contents =
+        PCollectionViewTesting.contentsInDefaultWindow("hello", "goodbye");
+
+    CachingSideInputReader sideInputReader = CachingSideInputReader.of(
+        new SizedDirectSideInputReader(ImmutableMap.<TupleTag<Object>, Sized<Object>>of(
+            untypedTag, Sized.<Object>of(contents, 5L))),
+        cache);
+
+    assertFalse(sideInputReader.isEmpty());
+    assertTrue(sideInputReader.contains(view));
+    assertFalse(isCached(cache, view, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW));
+    assertThat(
+        sideInputReader.get(view, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW),
+        equalTo(2L));
+    assertTrue(isCached(cache, view, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW));
+  }
+
+  @Test
+  public void testCachingSideInputReaderDoesNotCacheLargeItem() throws Exception {
+    Cache<PCollectionViewWindow<?>, Sized<Object>> cache = CacheBuilder.newBuilder()
+        .maximumWeight(100)
+        .weigher(new SizedWeigher<>(1000))
+        .build();
+
+    TupleTag<Iterable<WindowedValue<String>>> tag = new TupleTag<>();
+    TupleTag<Object> untypedTag = new TupleTag<>(tag.getId());
+    PCollectionView<Long> view = PCollectionViewTesting.testingView(
+        tag, new PCollectionViewTesting.LengthViewFn<String>(), StringUtf8Coder.of());
+    Iterable<WindowedValue<String>> contents =
+        PCollectionViewTesting.contentsInDefaultWindow("hello", "goodbye");
+
+    CachingSideInputReader sideInputReader = CachingSideInputReader.of(
+        new SizedDirectSideInputReader(ImmutableMap.<TupleTag<Object>, Sized<Object>>of(
+             untypedTag, Sized.<Object>of(contents, 5L))),
+        cache);
+
+    assertFalse(sideInputReader.isEmpty());
+    assertTrue(sideInputReader.contains(view));
+    assertFalse(isCached(cache, view, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW));
+    assertThat(
+        sideInputReader.get(view, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW),
+        equalTo(2L));
+    assertFalse(isCached(cache, view, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW));
+  }
+
+  @Test
+  public void testCachingSideInputReaderEmpty() throws Exception {
+    Cache<PCollectionViewWindow<?>, Sized<Object>> cache = CacheBuilder.newBuilder()
+        .build();
+
+    TupleTag<Iterable<WindowedValue<String>>> tag = new TupleTag<>();
+    PCollectionView<Long> view = PCollectionViewTesting.testingView(
+        tag, new PCollectionViewTesting.LengthViewFn<String>(), StringUtf8Coder.of());
+
+    CachingSideInputReader sideInputReader = CachingSideInputReader.of(
+        new SizedDirectSideInputReader(ImmutableMap.<TupleTag<Object>, Sized<Object>>of()),
+        cache);
+
+    assertFalse(sideInputReader.contains(view));
+    assertTrue(sideInputReader.isEmpty());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
index e163cd3f32dc5..d00244c087111 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
@@ -29,7 +29,6 @@
 import com.google.cloud.dataflow.sdk.coders.DeterministicStandardCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
@@ -202,7 +201,7 @@ private static ParDoFn createCombineValuesFn(
             null, // no side inputs
             null, // no side outputs
             1, // single main output
-            new BatchModeExecutionContext(),
+            DataflowExecutionContext.withoutSideInputs(),
             (new CounterSet()).getAddCounterMutator(),
             null);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
new file mode 100644
index 0000000000000..6eb985658a76f
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import com.google.api.services.dataflow.model.SideInputInfo;
+import com.google.api.services.dataflow.model.Source;
+import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.Sized;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.collect.Lists;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Tests for {@link DataflowSideInputReader}.
+ */
+@RunWith(JUnit4.class)
+public class DataflowSideInputReaderTest {
+
+  private static final long WINDOWED_BIG_ENDIAN_LONG_BYTES = 28L;
+
+  /**
+   * Creates a {@link Source} descriptor for reading the provided contents as a side input.
+   * The contents will all be placed in the {@link PCollectionViewTesting#DEFAULT_NONEMPTY_WINDOW}.
+   *
+   * <p>If the {@link PCollectionView} has an incompatible {@link Coder} or
+   * {@link WindowingStrategy}, then results are unpredictable.
+   */
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  private <T> Source sourceInDefaultWindow(PCollectionView<T> view, T... values)
+      throws Exception {
+    List<WindowedValue<T>> windowedValues =
+        Lists.newArrayList(PCollectionViewTesting.contentsInDefaultWindow(values));
+
+    List<Coder<?>> componentCoders = (List) view.getCoderInternal().getCoderArguments();
+    if (componentCoders == null || componentCoders.size() != 1) {
+      throw new Exception("Could not extract element Coder from " + view.getCoderInternal());
+    }
+    Coder<WindowedValue<T>> elemCoder = (Coder<WindowedValue<T>>) componentCoders.get(0);
+
+    return InMemoryReaderFactoryTest.createInMemoryCloudSource(
+        windowedValues, null, null, elemCoder);
+  }
+
+  /**
+   * Tests that when a PCollectionView is actually available in a {@link DataflowSideInputReader},
+   * the read succeeds and has the right size.
+   */
+  @Test
+  public void testDataflowSideInputReaderGoodRead() throws Exception {
+    PipelineOptions options = PipelineOptionsFactory.create();
+    ExecutionContext executionContext = DataflowExecutionContext.withoutSideInputs();
+    TupleTag<Iterable<WindowedValue<Long>>> tag = new TupleTag<>();
+    PCollectionView<Long> view = PCollectionViewTesting.<Long, Long>testingView(
+        tag, new PCollectionViewTesting.LengthViewFn<Long>(), BigEndianLongCoder.of());
+
+    SideInputInfo sideInputInfo = SideInputUtils.createCollectionSideInputInfo(
+        sourceInDefaultWindow(view, 1L, -43255L, 0L, 13L, 1975858L));
+    sideInputInfo.setTag(view.getTagInternal().getId());
+
+    DataflowSideInputReader sideInputReader = DataflowSideInputReader.of(
+        Arrays.asList(sideInputInfo), options, executionContext);
+
+    assertFalse(sideInputReader.isEmpty());
+    assertTrue(sideInputReader.contains(view));
+
+    Sized<Long> sizedValue = sideInputReader.getSized(
+        view, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
+    assertThat(sizedValue.getValue(), equalTo(5L));
+    assertThat(sizedValue.getSize(), equalTo(5 * WINDOWED_BIG_ENDIAN_LONG_BYTES));
+
+    // A repeated read should yield the same size
+    Sized<Long> repeatedRead = sideInputReader.getSized(
+        view, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
+    assertThat(repeatedRead.getValue(), equalTo(5L));
+    assertThat(sizedValue.getSize(), equalTo(5 * WINDOWED_BIG_ENDIAN_LONG_BYTES));
+
+    // Reading an empty window still yields the same size, for now
+    Sized<Long> emptyWindowValue = sideInputReader.getSized(
+        view, PCollectionViewTesting.DEFAULT_EMPTY_WINDOW);
+    assertThat(emptyWindowValue.getValue(), equalTo(0L));
+    assertThat(emptyWindowValue.getSize(), equalTo(5 * WINDOWED_BIG_ENDIAN_LONG_BYTES));
+  }
+
+  /**
+   * Tests that when a {@link PCollectionView} is not available in a
+   * {@link DataflowSideInputReader}, it is reflected properly.
+   */
+  @Test
+  public void testDataflowSideInputReaderBadRead() throws Exception {
+    PipelineOptions options = PipelineOptionsFactory.create();
+    ExecutionContext executionContext = DataflowExecutionContext.withoutSideInputs();
+    TupleTag<Iterable<WindowedValue<Long>>> tag = new TupleTag<>();
+    PCollectionView<Long> view = PCollectionViewTesting.testingView(
+        tag, new PCollectionViewTesting.LengthViewFn<Long>(), BigEndianLongCoder.of());
+
+    SideInputInfo sideInputInfo = SideInputUtils.createCollectionSideInputInfo(
+        sourceInDefaultWindow(view, 1L, -43255L, 0L, 13L, 1975858L));
+    sideInputInfo.setTag("not the same tag at all");
+
+    DataflowSideInputReader sideInputReader = DataflowSideInputReader.of(
+        Arrays.asList(sideInputInfo), options, executionContext);
+
+    assertFalse(sideInputReader.contains(view));
+  }
+
+  /**
+   * Tests that when a {@link PCollectionView} is not available in a
+   * {@link DataflowSideInputReader}, it is reflected properly.
+   */
+  @Test
+  public void testDataflowSideInputEmpty() throws Exception {
+    PipelineOptions options = PipelineOptionsFactory.create();
+    ExecutionContext executionContext = DataflowExecutionContext.withoutSideInputs();
+    DataflowSideInputReader sideInputReader = DataflowSideInputReader.of(
+        Collections.<SideInputInfo>emptyList(), options, executionContext);
+    assertTrue(sideInputReader.isEmpty());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index 1bc81853198d1..4a481a82be966 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -44,7 +44,6 @@
 import com.google.cloud.dataflow.sdk.runners.worker.SinkFactoryTest.TestSinkFactory;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
@@ -93,7 +92,9 @@ public void testCreateMapTaskExecutor() throws Exception {
     CounterSet counterSet = null;
     try (
         MapTaskExecutor executor = MapTaskExecutorFactory.create(
-            PipelineOptionsFactory.create(), mapTask, new BatchModeExecutionContext())) {
+            PipelineOptionsFactory.create(),
+            mapTask,
+            DataflowExecutionContext.withoutSideInputs())) {
       // Safe covariant cast not expressible without rawtypes.
       @SuppressWarnings({"rawtypes", "unchecked"})
       List<Object> operations = (List) executor.operations;
@@ -151,7 +152,7 @@ public void testExecutionContextPlumbing() throws Exception {
     MapTask mapTask = new MapTask();
     mapTask.setInstructions(instructions);
 
-    BatchModeExecutionContext context = new BatchModeExecutionContext();
+    DataflowExecutionContext context = DataflowExecutionContext.withoutSideInputs();
 
     try (MapTaskExecutor executor =
         MapTaskExecutorFactory.create(PipelineOptionsFactory.create(), mapTask, context)) {
@@ -193,7 +194,7 @@ public void testCreateReadOperation() throws Exception {
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
     Operation operation = MapTaskExecutorFactory.createOperation(PipelineOptionsFactory.create(),
-        createReadInstruction("Read"), new BatchModeExecutionContext(),
+        createReadInstruction("Read"), DataflowExecutionContext.withoutSideInputs(),
         Collections.<Operation>emptyList(), counterPrefix, counterSet.getAddCounterMutator(),
         stateSampler);
     assertThat(operation, instanceOf(ReadOperation.class));
@@ -255,7 +256,7 @@ public void testCreateWriteOperation() throws Exception {
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
     Operation operation = MapTaskExecutorFactory.createOperation(PipelineOptionsFactory.create(),
-        instruction, new BatchModeExecutionContext(), priorOperations, counterPrefix,
+        instruction, DataflowExecutionContext.withoutSideInputs(), priorOperations, counterPrefix,
         counterSet.getAddCounterMutator(), stateSampler);
     assertThat(operation, instanceOf(WriteOperation.class));
     WriteOperation writeOperation = (WriteOperation) operation;
@@ -330,7 +331,7 @@ public void testCreateParDoOperation() throws Exception {
     ParallelInstruction instruction =
         createParDoInstruction(producerIndex, producerOutputNum, "DoFn");
 
-    BatchModeExecutionContext context = new BatchModeExecutionContext();
+    DataflowExecutionContext context = DataflowExecutionContext.withoutSideInputs();
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
@@ -398,7 +399,7 @@ public void testCreatePartialGroupByKeyOperation() throws Exception {
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
     Operation operation = MapTaskExecutorFactory.createOperation(PipelineOptionsFactory.create(),
-        instruction, new BatchModeExecutionContext(), priorOperations, counterPrefix,
+        instruction, DataflowExecutionContext.withoutSideInputs(), priorOperations, counterPrefix,
         counterSet.getAddCounterMutator(), stateSampler);
     assertThat(operation, instanceOf(PartialGroupByKeyOperation.class));
     PartialGroupByKeyOperation pgbkOperation = (PartialGroupByKeyOperation) operation;
@@ -458,7 +459,7 @@ public void testCreateFlattenOperation() throws Exception {
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
     Operation operation = MapTaskExecutorFactory.createOperation(PipelineOptionsFactory.create(),
-        instruction, new BatchModeExecutionContext(), priorOperations, counterPrefix,
+        instruction, DataflowExecutionContext.withoutSideInputs(), priorOperations, counterPrefix,
         counterSet.getAddCounterMutator(), stateSampler);
     assertThat(operation, instanceOf(FlattenOperation.class));
     FlattenOperation flattenOperation = (FlattenOperation) operation;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
index 7560db275184f..e256a4bf97d42 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
@@ -29,8 +29,9 @@
 
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.DirectSideInputReader;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
+import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.UserCodeException;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
@@ -155,10 +156,10 @@ public void testNormalParDoFn() throws Exception {
     NormalParDoFn normalParDoFn = NormalParDoFn.of(
         PipelineOptionsFactory.create(),
         fnInfo,
-        sideInputValues,
+        DirectSideInputReader.of(sideInputValues),
         outputTags,
         "doFn",
-        new BatchModeExecutionContext(),
+        DataflowExecutionContext.withoutSideInputs(),
         (new CounterSet()).getAddCounterMutator());
 
     normalParDoFn.startBundle(receiver, receiver1, receiver2, receiver3);
@@ -217,10 +218,10 @@ public void testUnexpectedNumberOfReceivers() throws Exception {
     NormalParDoFn normalParDoFn = NormalParDoFn.of(
         PipelineOptionsFactory.create(),
         fnInfo,
-        sideInputValues,
+        DirectSideInputReader.of(sideInputValues),
         outputTags,
         "doFn",
-        new BatchModeExecutionContext(),
+        DataflowExecutionContext.withoutSideInputs(),
         (new CounterSet()).getAddCounterMutator());
 
     try {
@@ -259,10 +260,10 @@ public void testErrorPropagation() throws Exception {
     NormalParDoFn normalParDoFn = NormalParDoFn.of(
         PipelineOptionsFactory.create(),
         fnInfo,
-        sideInputValues,
+        DirectSideInputReader.of(sideInputValues),
         outputTags,
         "doFn",
-        new BatchModeExecutionContext(),
+        DataflowExecutionContext.withoutSideInputs(),
         (new CounterSet()).getAddCounterMutator());
 
     try {
@@ -327,10 +328,10 @@ public void testUndeclaredSideOutputs() throws Exception {
     NormalParDoFn normalParDoFn = NormalParDoFn.of(
         PipelineOptionsFactory.create(),
         fnInfo,
-        PTuple.empty(),
+        NullSideInputReader.empty(),
         Arrays.asList("output", "declared"),
         "doFn",
-        new BatchModeExecutionContext(),
+        DataflowExecutionContext.withoutSideInputs(),
         counters.getAddCounterMutator());
 
     normalParDoFn.startBundle(new TestReceiver(), new TestReceiver());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
index 311395d6a42b5..f22a0a6b6eb90 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
@@ -27,7 +27,6 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
@@ -89,7 +88,7 @@ public void testCreateNormalParDoFn() throws Exception {
     List<MultiOutputInfo> multiOutputInfos =
         Arrays.asList(multiOutputInfo);
 
-    BatchModeExecutionContext context = new BatchModeExecutionContext();
+    DataflowExecutionContext context = DataflowExecutionContext.withoutSideInputs();
     CounterSet counters = new CounterSet();
     StateSampler stateSampler = new StateSampler(
         "test", counters.getAddCounterMutator());
@@ -140,7 +139,7 @@ public void testCreateUnknownParDoFn() throws Exception {
           null,
           null,
           1,
-          new BatchModeExecutionContext(),
+          DataflowExecutionContext.withoutSideInputs(),
           counters.getAddCounterMutator(),
           stateSampler);
       fail("should have thrown an exception");
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java
index c58cf5cefa402..a34929dfa9bb6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java
@@ -16,6 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.runners.worker.SideInputUtils.createCollectionSideInputInfo;
+import static com.google.cloud.dataflow.sdk.runners.worker.SideInputUtils.createSingletonSideInputInfo;
+
 import static org.hamcrest.Matchers.emptyIterable;
 import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
 import static org.hamcrest.core.Is.is;
@@ -42,20 +45,6 @@
  */
 @RunWith(JUnit4.class)
 public class SideInputUtilsTest {
-  SideInputInfo createSingletonSideInputInfo(Source sideInputSource) {
-    SideInputInfo sideInputInfo = new SideInputInfo();
-    sideInputInfo.setSources(Arrays.asList(sideInputSource));
-    sideInputInfo.setKind(CloudObject.forClassName("singleton"));
-    return sideInputInfo;
-  }
-
-  SideInputInfo createCollectionSideInputInfo(Source... sideInputSources) {
-    SideInputInfo sideInputInfo = new SideInputInfo();
-    sideInputInfo.setSources(Arrays.asList(sideInputSources));
-    sideInputInfo.setKind(CloudObject.forClassName("collection"));
-    return sideInputInfo;
-  }
-
   Source createSideInputSource(Integer... ints) throws Exception {
     return InMemoryReaderFactoryTest.createInMemoryCloudSource(
         Arrays.asList(ints), null, null, BigEndianIntegerCoder.of());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java
new file mode 100644
index 0000000000000..e8beabc3ae87b
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java
@@ -0,0 +1,275 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.PValueBase;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Function;
+import com.google.common.base.MoreObjects;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * Methods for creating and using {@link PCollectionView} instances.
+ */
+public final class PCollectionViewTesting {
+
+  // Do not instantiate; static methods only
+  private PCollectionViewTesting() { }
+
+  /**
+   * The length of the default window, which is an {@link IntervalWindow}, but kept encapsulated
+   * as it is not for the user to know what sort of window it is.
+   */
+  private static final long DEFAULT_WINDOW_MSECS = 1000 * 60 * 60;
+
+  /**
+   * A default windowing strategy. Tests that are not concerned with the windowing
+   * strategy should not specify it, and all views will use this.
+   */
+  public static final WindowingStrategy<?, ?> DEFAULT_WINDOWING_STRATEGY =
+      WindowingStrategy.of(FixedWindows.of(new Duration(DEFAULT_WINDOW_MSECS)));
+
+  /**
+   * A default window into which test elements will be placed, if the window is
+   * not explicitly overridden.
+   */
+  public static final BoundedWindow DEFAULT_NONEMPTY_WINDOW =
+      new IntervalWindow(new Instant(0), new Instant(DEFAULT_WINDOW_MSECS));
+
+  /**
+   * A timestamp in the {@link #DEFAULT_NONEMPTY_WINDOW}.
+   */
+  public static final Instant DEFAULT_TIMESTAMP = DEFAULT_NONEMPTY_WINDOW.maxTimestamp().minus(1);
+
+  /**
+   * A window into which no element will be placed by methods in this class, unless explicitly
+   * requested.
+   */
+  public static final BoundedWindow DEFAULT_EMPTY_WINDOW = new IntervalWindow(
+      DEFAULT_NONEMPTY_WINDOW.maxTimestamp(),
+      DEFAULT_NONEMPTY_WINDOW.maxTimestamp().plus(DEFAULT_WINDOW_MSECS));
+
+  /**
+   * A specialization of {@link SerializableFunction} just for putting together
+   * {@link PCollectionView} instances.
+   */
+  public static interface ViewFn<ElemT, ViewT>
+      extends SerializableFunction<Iterable<WindowedValue<ElemT>>, ViewT> { }
+
+  /**
+   * A {@link ViewFn} that returns the provided contents as a fully lazy iterable.
+   */
+  public static class IdentityViewFn<T> implements ViewFn<T, Iterable<T>> {
+    private static final long serialVersionUID = 0L;
+
+    @Override
+    public Iterable<T> apply(Iterable<WindowedValue<T>> contents) {
+      return Iterables.transform(contents, new Function<WindowedValue<T>, T>() {
+        @Override
+        public T apply(WindowedValue<T> windowedValue) {
+          return windowedValue.getValue();
+        }
+      });
+    }
+  }
+
+  /**
+   * A {@link ViewFn} that traverses the whole iterable eagerly and returns the number of elements.
+   *
+   * <p>Only for use in testing scenarios with small collections. If there are more elements
+   * provided than {@code Integer.MAX_VALUE} then behavior is unpredictable.
+   */
+  public static class LengthViewFn<T> implements ViewFn<T, Long> {
+    private static final long serialVersionUID = 0L;
+
+    @Override
+    public Long apply(Iterable<WindowedValue<T>> contents) {
+      return (long) Iterables.size(contents);
+    }
+  }
+
+  /**
+   * A {@link ViewFn} that always returns the value with which it is instantiated.
+   */
+  public static class ConstantViewFn<ElemT, ViewT> implements ViewFn<ElemT, ViewT> {
+    private static final long serialVersionUID = 0L;
+
+    private ViewT value;
+
+    public ConstantViewFn(ViewT value) {
+      this.value = value;
+    }
+
+    @Override
+    public ViewT apply(Iterable<WindowedValue<ElemT>> contents) {
+      return value;
+    }
+  }
+
+  /**
+   * A {@link PCollectionView} explicitly built from a {@link TupleTag}
+   * and conversion {@link ViewFn}, and an element coder, using the
+   * {@link #DEFAULT_WINDOWING_STRATEGY}.
+   *
+   * <p>This method is only recommended for use by runner implementors to test their
+   * implementations. It is very easy to construct a {@link PCollectionView} that does
+   * not respect the invariants required for proper functioning.
+   *
+   * <p>Note that if the provided {@code WindowingStrategy} does not match that of the windowed
+   * values provided to the view during execution, results are unpredictable. It is recommended
+   * that the values be prepared via {@link #contentsInDefaultWindow}.
+   */
+  public static <ElemT, ViewT> PCollectionView<ViewT> testingView(
+      TupleTag<Iterable<WindowedValue<ElemT>>> tag,
+      ViewFn<ElemT, ViewT> viewFn,
+      Coder<ElemT> elemCoder) {
+    return testingView(
+        tag,
+        viewFn,
+        elemCoder,
+        DEFAULT_WINDOWING_STRATEGY);
+  }
+
+  /**
+   * A {@link PCollectionView} explicitly built from its {@link TupleTag},
+   * {@link WindowingStrategy}, {@link Coder}, and conversion function.
+   *
+   * <p>This method is only recommended for use by runner implementors to test their
+   * implementations. It is very easy to construct a {@link PCollectionView} that does
+   * not respect the invariants required for proper functioning.
+   *
+   * <p>Note that if the provided {@code WindowingStrategy} does not match that of the windowed
+   * values provided to the view during execution, results are unpredictable.
+   */
+  public static <ElemT, ViewT> PCollectionView<ViewT> testingView(
+      TupleTag<Iterable<WindowedValue<ElemT>>> tag,
+      ViewFn<ElemT, ViewT> viewFn,
+      Coder<ElemT> elemCoder,
+      WindowingStrategy<?, ?> windowingStrategy) {
+    return new PCollectionViewFromParts<>(
+        tag,
+        viewFn,
+        windowingStrategy,
+        IterableCoder.of(
+            WindowedValue.getFullCoder(elemCoder, windowingStrategy.getWindowFn().windowCoder())));
+  }
+
+  /**
+   * Prepares {@code values} for reading as the contents of a {@link PCollectionView} side input.
+   */
+  public static <T> Iterable<WindowedValue<T>> contentsInDefaultWindow(T... values)
+      throws Exception {
+    List<WindowedValue<T>> windowedValues = Lists.newArrayList();
+    for (T value : values) {
+      windowedValues.add(WindowedValue.of(value, DEFAULT_TIMESTAMP, DEFAULT_NONEMPTY_WINDOW));
+    }
+    return windowedValues;
+  }
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Internal details below here
+
+  /**
+   * A {@link PCollectionView} explicitly built from its {@link TupleTag},
+   * {@link WindowingStrategy}, and conversion function.
+   *
+   * <p>Instantiate via {@link #testingView}.
+   */
+  private static class PCollectionViewFromParts<ElemT, ViewT>
+      extends PValueBase
+      implements PCollectionView<ViewT> {
+    private static final long serialVersionUID = 0L;
+
+    private TupleTag<Iterable<WindowedValue<ElemT>>> tag;
+    private ViewFn<ElemT, ViewT> viewFn;
+    private WindowingStrategy<?, ?> windowingStrategy;
+    private Coder<Iterable<WindowedValue<ElemT>>> coder;
+
+    public PCollectionViewFromParts(
+        TupleTag<Iterable<WindowedValue<ElemT>>> tag,
+        ViewFn<ElemT, ViewT> viewFn,
+        WindowingStrategy<?, ?> windowingStrategy,
+        Coder<Iterable<WindowedValue<ElemT>>> coder) {
+      this.tag = tag;
+      this.viewFn = viewFn;
+      this.windowingStrategy = windowingStrategy;
+      this.coder = coder;
+    }
+
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    @Override
+    public TupleTag<Iterable<WindowedValue<?>>> getTagInternal() {
+      return (TupleTag) tag;
+    }
+
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    @Override
+    public ViewT fromIterableInternal(Iterable<WindowedValue<?>> contents) {
+      return (ViewT) viewFn.apply((Iterable) contents);
+    }
+
+    @Override
+    public WindowingStrategy<?, ?> getWindowingStrategyInternal() {
+      return windowingStrategy;
+    }
+
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    @Override
+    public Coder<Iterable<WindowedValue<?>>> getCoderInternal() {
+      return (Coder) coder;
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(tag);
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (!(other instanceof PCollectionView)) {
+        return false;
+      }
+      @SuppressWarnings("unchecked")
+      PCollectionView<?> otherView = (PCollectionView<?>) other;
+      return tag.equals(otherView.getTagInternal());
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(this)
+          .add("tag", tag)
+          .add("viewFn", viewFn)
+          .toString();
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index 524477d096ad9..0fc8a2dddd335 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -315,7 +315,7 @@ KV<String, OutputT>, List> makeRunner(
         DoFnRunner.createWithListOutputs(
             PipelineOptionsFactory.create(),
             fn,
-            PTuple.empty(),
+            NullSideInputReader.empty(),
             (TupleTag<KV<String, OutputT>>) (TupleTag) outputTag,
             new ArrayList<TupleTag<?>>(),
             execContext.createStepContext("merge"),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 4761b17724b79..b90a1a5e8874d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -417,7 +417,7 @@ private DoFnRunner<TimerOrElement<KV<String, Long>>, KV<String, Long>, List> mak
         DoFnRunner.createWithListOutputs(
             PipelineOptionsFactory.create(),
             fn,
-            PTuple.empty(),
+            NullSideInputReader.empty(),
             (TupleTag<KV<String, OutputT>>) (TupleTag) outputTag,
             new ArrayList<TupleTag<?>>(),
             execContext.createStepContext("merge"),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
new file mode 100644
index 0000000000000..056094be47328
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
@@ -0,0 +1,89 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting;
+import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting.ConstantViewFn;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.util.Arrays;
+
+/**
+ * Tests for {@link StreamingModeExecutionContext}.
+ */
+@RunWith(JUnit4.class)
+public class StreamingModeExecutionContextTest {
+
+  @Mock
+  StateFetcher stateFetcher;
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+  }
+
+  // Helper to aid type inference
+  private static TupleTag<Iterable<WindowedValue<String>>> newStringTag() {
+    return new TupleTag<>();
+  }
+
+  /**
+   * Tests that the {@link SideInputReader} returned by the {@link StreamingModeExecutionContext}
+   * contains the expected views when they are deserialized, as occurs on the
+   * service.
+   */
+  @Test
+  public void testSideInputReaderReconstituted() {
+    StreamingModeExecutionContext executionContext =
+        new StreamingModeExecutionContext("computation", stateFetcher);
+
+    PCollectionView<String> preview1 = PCollectionViewTesting.<String, String>testingView(
+        newStringTag(), new ConstantViewFn<String, String>("view1"), StringUtf8Coder.of());
+    PCollectionView<String> preview2 = PCollectionViewTesting.testingView(
+        newStringTag(), new ConstantViewFn<String, String>("view2"), StringUtf8Coder.of());
+    PCollectionView<String> preview3 = PCollectionViewTesting.testingView(
+        newStringTag(), new ConstantViewFn<String, String>("view3"), StringUtf8Coder.of());
+
+    SideInputReader sideInputReader = executionContext.getSideInputReaderForViews(
+        Arrays.asList(preview1, preview2));
+
+    assertTrue(sideInputReader.contains(preview1));
+    assertTrue(sideInputReader.contains(preview2));
+    assertFalse(sideInputReader.contains(preview3));
+
+    PCollectionView<String> view1 = SerializableUtils.ensureSerializable(preview1);
+    PCollectionView<String> view2 = SerializableUtils.ensureSerializable(preview2);
+    PCollectionView<String> view3 = SerializableUtils.ensureSerializable(preview3);
+
+    assertTrue(sideInputReader.contains(view1));
+    assertTrue(sideInputReader.contains(view2));
+    assertFalse(sideInputReader.contains(view3));
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
index f140326087a5b..b288a2db50b41 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
@@ -70,6 +70,7 @@ public class StreamingSideInputDoFnRunnerTest {
   static TupleTag<String> mainOutputTag = new TupleTag<String>();
   @Mock StreamingModeExecutionContext execContext;
   @Mock ExecutionContext.StepContext stepContext;
+  @Mock SideInputReader mockSideInputReader;
 
   @Before
   public void setUp() {
@@ -87,8 +88,11 @@ public void testSideInputReady() throws Exception {
     when(execContext.issueSideInputFetch(
              eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN)))
         .thenReturn(true);
-    when(execContext.getSideInput(eq(view), any(BoundedWindow.class), any(PTuple.class)))
-        .thenReturn("data");
+    @SuppressWarnings({"rawtypes", "unchecked"})
+    Iterable<PCollectionView<?>> anyIterable = (Iterable) any(Iterable.class);
+    when(execContext.getSideInputReaderForViews(anyIterable)).thenReturn(mockSideInputReader);
+    when(mockSideInputReader.contains(eq(view))).thenReturn(true);
+    when(mockSideInputReader.get(eq(view), any(BoundedWindow.class))).thenReturn("data");
 
     StreamingSideInputDoFnRunner<String, String, List, IntervalWindow> runner =
         createRunner(Arrays.asList(view));
@@ -164,8 +168,10 @@ public void testSideInputNotification() throws Exception {
     when(execContext.issueSideInputFetch(
              eq(view), any(BoundedWindow.class), eq(SideInputState.KNOWN_READY)))
         .thenReturn(true);
-    when(execContext.getSideInput(eq(view), eq(window), any(PTuple.class)))
-        .thenReturn("data");
+    when(execContext.getSideInputReaderForViews(any(Iterable.class)))
+        .thenReturn(mockSideInputReader);
+    when(mockSideInputReader.contains(eq(view))).thenReturn(true);
+    when(mockSideInputReader.get(eq(view), any(BoundedWindow.class))).thenReturn("data");
     when(stepContext.readTagLists(
         Mockito.<Iterable<CodedTupleTag<WindowedValue<String>>>>any()))
         .thenAnswer(readTagListAnswer(Arrays.asList(createDatum("e", 0))));
@@ -224,10 +230,12 @@ public void testMultipleSideInputs() throws Exception {
     when(execContext.issueSideInputFetch(
              any(PCollectionView.class), any(BoundedWindow.class), any(SideInputState.class)))
         .thenReturn(true);
-    when(execContext.getSideInput(eq(view1), eq(window), any(PTuple.class)))
-        .thenReturn("data1");
-    when(execContext.getSideInput(eq(view2), eq(window), any(PTuple.class)))
-        .thenReturn("data2");
+    when(execContext.getSideInputReaderForViews(any(Iterable.class)))
+        .thenReturn(mockSideInputReader);
+    when(mockSideInputReader.contains(eq(view1))).thenReturn(true);
+    when(mockSideInputReader.contains(eq(view2))).thenReturn(true);
+    when(mockSideInputReader.get(eq(view1), any(BoundedWindow.class))).thenReturn("data1");
+    when(mockSideInputReader.get(eq(view2), any(BoundedWindow.class))).thenReturn("data2");
     when(stepContext.readTagLists(
         Mockito.<Iterable<CodedTupleTag<WindowedValue<String>>>>any()))
         .thenAnswer(readTagListAnswer(Arrays.asList(createDatum("e1", 0))));
@@ -254,15 +262,10 @@ private StreamingSideInputDoFnRunner<String, String, List, IntervalWindow> creat
         WindowingStrategy.of(FixedWindows.of(Duration.millis(10))),
         (Iterable) views, StringUtf8Coder.of());
 
-    PTuple sideInputs = PTuple.empty();
-    for (PCollectionView<String> view : views) {
-      sideInputs = sideInputs.and(view.getTagInternal(), null);
-    }
-
     return new StreamingSideInputDoFnRunner<String, String, List, IntervalWindow>(
         PipelineOptionsFactory.create(),
         doFnInfo,
-        sideInputs,
+        mockSideInputReader,
         new DoFnRunner.OutputManager<List>() {
           @Override
           public List initialize(TupleTag<?> tag) {

From 782814a8613b514aa14484b673085fbc630e4ad3 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 25 Jun 2015 12:10:00 -0700
Subject: [PATCH 0673/1541] Drop filesToStage from serialized pipeline options

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96897160
---
 .../sdk/options/DataflowPipelineWorkerPoolOptions.java         | 3 +++
 .../dataflow/sdk/runners/DataflowPipelineTranslatorTest.java   | 2 --
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index de6608391f4d7..d814ffdaa0153 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -18,6 +18,8 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 
+import com.fasterxml.jackson.annotation.JsonIgnore;
+
 import java.util.List;
 
 /**
@@ -191,6 +193,7 @@ public String getTeardownPolicyName() {
   @Description("Files to stage on GCS and make available to workers. "
       + "Files are placed on the worker's classpath. "
       + "The default value is all files from the classpath.")
+  @JsonIgnore
   List<String> getFilesToStage();
   void setFilesToStage(List<String> value);
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 9d2eee407a2b3..57a9485558e89 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -56,7 +56,6 @@
 import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterables;
 
@@ -165,7 +164,6 @@ public void testSettingOfSdkPipelineOptions() throws IOException {
           .put("runner", "com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner")
           .put("jobName", "some-job-name")
           .put("tempLocation", "gs://somebucket/some/path")
-          .put("filesToStage", ImmutableList.of())
           .put("stagingLocation", "gs://somebucket/some/path/staging")
           .put("stableUniqueNames", "WARNING")
           .put("streaming", false)

From 5c0be72000799849eb41f14b75ca5a3c93bee60c Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 25 Jun 2015 16:41:06 -0700
Subject: [PATCH 0674/1541] Improve type safety of coder instance registration

Previously it was very easy to pass unsafe parameters to
CoderRegistry.registerCoder(Class<?>, Coder<?>). Added the
appropriate type parameter and improved the docs, including
a warning about remaining unsafe uses.

----Release Notes----
CoderRegistry.registerCoder(Class<T>, Coder<T>) is now typed to
enforce that the provided coder actually encodes values of the
given class, and its use with rawtypes of generic classes is
forbidden as it will rarely work correctly.

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96923124
---
 .../dataflow/sdk/coders/CoderRegistry.java    | 20 ++++++++---
 .../sdk/coders/CoderRegistryTest.java         | 34 ++++++++++++++++---
 2 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 7124122db8aa5..cedeede46939b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Maps;
 
@@ -30,7 +31,6 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.Serializable;
 import java.lang.reflect.ParameterizedType;
 import java.lang.reflect.Type;
 import java.lang.reflect.TypeVariable;
@@ -57,7 +57,7 @@
  *        {@link #registerStandardCoders()}.
  *   <li> Annotations: {@link DefaultCoder} can be used to annotate a type with
  *        the default {@code Coder} type.
- *   <li> Inheritance: {@link Serializable} objects are given a default
+ *   <li> Inheritance: {@code Serializable} objects are given a default
  *        {@code Coder} of {@link SerializableCoder}.
  * </ul>
  */
@@ -127,7 +127,19 @@ public void registerCoder(Class<?> rawClazz,
     }
   }
 
-  public void registerCoder(Class<?> rawClazz, Coder<?> coder) {
+  /**
+   * Registered the provided {@link Coder} for encoding all values of the specified {@code Class}.
+   *
+   * <p>Not for use with generic rawtypes. Instead, register a {@link CoderFactory} via
+   * {@link #registerCoder(Class, CoderFactory)} or ensure your {@code Coder} class has the
+   * appropriate static methods and register it directly via {@link #registerCoder(Class, Class)}.
+   */
+  public <T> void registerCoder(Class<T> rawClazz, Coder<T> coder) {
+    Preconditions.checkArgument(
+      rawClazz.getTypeParameters().length == 0,
+      "CoderRegistry.registerCoder(Class<T>, Coder<T>) may not be used "
+      + "with unspecialized generic classes");
+
     CoderFactory factory = CoderFactories.forCoder(coder);
     registerCoder(rawClazz, factory);
   }
@@ -428,7 +440,7 @@ public <T> Coder<?>[] getDefaultCoders(
     // Various representations of the candidate type
     @SuppressWarnings("unchecked")
     TypeDescriptor<CandidateT> candidateDescriptor =
-        (TypeDescriptor<CandidateT>) TypeDescriptor.<CandidateT>of(candidateType);
+        (TypeDescriptor<CandidateT>) TypeDescriptor.of(candidateType);
     @SuppressWarnings("unchecked")
     Class<CandidateT> candidateClass = (Class<CandidateT>) candidateDescriptor.getRawType();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index d15c939d9a8cf..cbbebfea2f924 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -36,7 +36,6 @@
 import java.io.OutputStream;
 import java.io.Serializable;
 import java.lang.reflect.Type;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
@@ -79,12 +78,36 @@ public void testAvroFallbackCoderProvider() throws Exception {
   }
 
   @Test
-  public void testRegisterInstantiatedGenericCoder() throws Exception {
-    class MyValueList extends ArrayList<MyValue> { }
+  public void testRegisterInstantiatedCoder() throws Exception {
+    CoderRegistry registry = new CoderRegistry();
+    registry.registerCoder(MyValue.class, MyValueCoder.of());
+    assertEquals(registry.getDefaultCoder(MyValue.class), MyValueCoder.of());
+  }
+
+  @SuppressWarnings("rawtypes") // this class exists to fail a test because of its rawtypes
+  private class MyListCoder extends DeterministicStandardCoder<List> {
+    @Override
+    public void encode(List value, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+    }
+
+    @Override
+    public List decode(InputStream inStream, Context context)
+        throws CoderException, IOException {
+      return Collections.emptyList();
+    }
+
+    @Override
+    public List<Coder<?>> getCoderArguments() {
+      return Collections.emptyList();
+    }
+  }
 
+  @Test
+  public void testRegisterInstantiatedCoderInvalidRawtype() throws Exception {
+    expectedException.expect(IllegalArgumentException.class);
     CoderRegistry registry = new CoderRegistry();
-    registry.registerCoder(MyValueList.class, ListCoder.of(MyValueCoder.of()));
-    assertEquals(registry.getDefaultCoder(MyValueList.class), ListCoder.of(MyValueCoder.of()));
+    registry.registerCoder(List.class, new MyListCoder());
   }
 
   @Test
@@ -265,6 +288,7 @@ public static MyValueCoder of() {
       return INSTANCE;
     }
 
+    @SuppressWarnings("unused") // this method an "override" of a latent static interface
     public static List<Object> getInstanceComponents(MyValue exampleValue) {
       return Arrays.asList();
     }

From c4b7682986880b783dd478d8fd77afe5d66d831d Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Thu, 25 Jun 2015 17:24:09 -0700
Subject: [PATCH 0675/1541] Implement GroupAlsoByWindowsAndCombineDoFn

For non-merging window fn and default triggering,
it combines input elements with one pass.

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96926562
---
 .../GroupAlsoByWindowsAndCombineDoFn.java     | 114 ++++++++++++++++++
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |   5 +
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  |  42 +++++++
 3 files changed, 161 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
new file mode 100644
index 0000000000000..d3eb50b2265b7
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.common.base.Preconditions.checkState;
+
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.collect.Maps;
+
+import org.joda.time.Instant;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.PriorityQueue;
+
+/**
+ * {@link GroupAlsoByWindowsDoFn} that uses combiner to accumulate input elements for non-merging
+ * window functions with the default triggering strategy.
+ *
+ * @param <K> key type
+ * @param <InputT> value input type
+ * @param <AccumT> accumulator type
+ * @param <OutputT> value output type
+ * @param <W> window type
+ */
+@SuppressWarnings("serial")
+public class GroupAlsoByWindowsAndCombineDoFn<K, InputT, AccumT, OutputT, W extends BoundedWindow>
+    extends GroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
+
+  private final KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn;
+
+  public GroupAlsoByWindowsAndCombineDoFn(
+      KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
+    this.combineFn = combineFn;
+  }
+
+  @Override
+  public void processElement(ProcessContext c) throws Exception {
+    K key = c.element().getKey();
+    Iterator<WindowedValue<InputT>> iterator = c.element().getValue().iterator();
+
+    PriorityQueue<W> liveWindows =
+        new PriorityQueue<>(11, new Comparator<BoundedWindow>() {
+          @Override
+          public int compare(BoundedWindow w1, BoundedWindow w2) {
+            return Long.signum(w1.maxTimestamp().getMillis() - w2.maxTimestamp().getMillis());
+          }
+        });
+
+    Map<W, AccumT> accumulators = Maps.newHashMap();
+    Map<W, Instant> minTimestamps = Maps.newHashMap();
+
+    while (iterator.hasNext()) {
+      WindowedValue<InputT> e = iterator.next();
+
+      @SuppressWarnings("unchecked")
+      Collection<W> windows = (Collection<W>) e.getWindows();
+      for (W w : windows) {
+        Instant timestamp = minTimestamps.get(w);
+        if (timestamp == null || timestamp.compareTo(e.getTimestamp()) > 0) {
+          minTimestamps.put(w, e.getTimestamp());
+        } else {
+          minTimestamps.put(w, timestamp);
+        }
+
+        AccumT accum = accumulators.get(w);
+        checkState((timestamp == null && accum == null) || (timestamp != null && accum != null));
+        if (accum == null) {
+          accum = combineFn.createAccumulator(key);
+          liveWindows.add(w);
+        }
+        accum = combineFn.addInput(key, accum, e.getValue());
+        accumulators.put(w, accum);
+      }
+
+      while (!liveWindows.isEmpty()
+          && liveWindows.peek().maxTimestamp().isBefore(e.getTimestamp())) {
+        closeWindow(key, liveWindows.poll(), accumulators, minTimestamps, c);
+      }
+    }
+
+    while (!liveWindows.isEmpty()) {
+      closeWindow(key, liveWindows.poll(), accumulators, minTimestamps, c);
+    }
+  }
+
+  private void closeWindow(
+      K key, W w, Map<W, AccumT> accumulators, Map<W, Instant> minTimestamps, ProcessContext c) {
+    AccumT accum = accumulators.remove(w);
+    Instant timestamp = minTimestamps.remove(w);
+    checkState(accum != null && timestamp != null);
+    c.windowingInternals().outputWindowedValue(
+        KV.of(key, combineFn.extractOutput(key, accum)), timestamp, Arrays.asList(w));
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 2fd5a5afe44d2..1fdae8aa71e52 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -70,6 +70,11 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
       final Coder<K> keyCoder,
       final Coder<InputT> inputCoder) {
     Preconditions.checkNotNull(combineFn);
+    if (windowingStrategy.getWindowFn().isNonMerging()
+        && windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger
+        && windowingStrategy.getMode() == AccumulationMode.DISCARDING_FIRED_PANES) {
+      return new GroupAlsoByWindowsAndCombineDoFn<>(combineFn);
+    }
     return new GABWViaOutputBufferDoFn<>(windowingStrategy,
         CombiningOutputBuffer.<K, InputT, AccumT, OutputT, W>create(
             combineFn, keyCoder, inputCoder));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index 0fc8a2dddd335..4d7449db113a1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -166,6 +166,48 @@ public class GroupAlsoByWindowsDoFnTest {
         Matchers.contains(window(10, 30)));
   }
 
+  @Test public void testSlidingWindowsCombine() throws Exception {
+    CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
+    DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>, KV<String, Long>, List> runner =
+        makeRunner(
+            WindowingStrategy.of(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))),
+            combineFn.<String>asKeyedFn());
+
+    runner.startBundle();
+
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        KV.of("k", (Iterable<WindowedValue<Long>>) Arrays.asList(
+            WindowedValue.of(
+                1L,
+                new Instant(5),
+                Arrays.asList(window(-10, 10), window(0, 20))),
+            WindowedValue.of(
+                2L,
+                new Instant(15),
+                Arrays.asList(window(0, 20), window(10, 30))),
+            WindowedValue.of(
+                4L,
+                new Instant(18),
+                Arrays.asList(window(0, 20), window(10, 30)))))));
+
+    runner.finishBundle();
+
+    List<WindowedValue<KV<String, Long>>> result = runner.getReceiver(outputTag);
+
+    assertEquals(3, result.size());
+
+    assertThat(result, Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(
+            KvMatcher.isKv(Matchers.equalTo("k"), Matchers.equalTo(1L)),
+            5, -10, 10),
+        WindowMatchers.isSingleWindowedValue(
+            KvMatcher.isKv(Matchers.equalTo("k"), Matchers.equalTo(7L)),
+            5, 0, 20),
+        WindowMatchers.isSingleWindowedValue(
+            KvMatcher.isKv(Matchers.equalTo("k"), Matchers.equalTo(6L)),
+            15, 10, 30)));
+  }
+
   @Test public void testDiscontiguousWindows() throws Exception {
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
         KV<String, Iterable<String>>, List> runner =

From 4c7f018c2c9ef47a98e7017da94ee70b2c6ec416 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 26 Jun 2015 10:59:50 -0700
Subject: [PATCH 0676/1541] Renamed --input to --inputFile for WordCount

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=96984656
---
 test_wordcount.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test_wordcount.sh b/test_wordcount.sh
index 48c4e7cccb188..bcacc1ebff014 100755
--- a/test_wordcount.sh
+++ b/test_wordcount.sh
@@ -62,7 +62,7 @@ function run_via_mvn {
   local outfile_prefix="$(get_outfile_prefix "$name")" || exit 2
   local cmd='mvn exec:java -f '"$TOPDIR"'/pom.xml -pl examples \
     -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
-    -Dexec.args="--runner=DirectPipelineRunner --input='"$input"' --output='"$outfile_prefix"'"'
+    -Dexec.args="--runner=DirectPipelineRunner --inputFile='"$input"' --output='"$outfile_prefix"'"'
   echo "$name: Running $cmd" >&2
   sh -c "$cmd"
   check_result_hash "$name" "$outfile_prefix" "$expected_hash"
@@ -77,7 +77,7 @@ function run_bundled {
   local cmd='java -cp '"$JAR_FILE"' \
     com.google.cloud.dataflow.examples.WordCount \
     --runner=DirectPipelineRunner \
-    --input='"'$input'"' \
+    --inputFile='"'$input'"' \
     --output='"$outfile_prefix"
   echo "$name: Running $cmd" >&2
   sh -c "$cmd"

From b1b51ab8d8c1aa1f919e3f82bd7ab880a98199ec Mon Sep 17 00:00:00 2001
From: mwegiel <mwegiel@google.com>
Date: Fri, 26 Jun 2015 16:12:13 -0700
Subject: [PATCH 0677/1541] Expose dataset ID in ApplianceShuffleWriter

----Release Notes----

-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97012732
---
 .../dataflow/sdk/runners/worker/ApplianceShuffleWriter.java     | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java
index a14e53b696a45..f3de2e9f9303f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java
@@ -58,6 +58,8 @@ private native long createFromConfig(byte[] shuffleWriterConfig,
                                        long bufferSize);
   private native void destroy();
 
+  public native String getDatasetId();
+
   @Override
   public native void write(byte[] chunk) throws IOException;
 

From fbbbe4c4eae4759ff522f22979365ddd9e89590f Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 26 Jun 2015 17:09:12 -0700
Subject: [PATCH 0678/1541] Adds support for unbounded custom sources

----Release Notes----
Add support for custom unbounded sources in the streaming DataflowPipelineRunner.
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97016581
---
 .../cloud/dataflow/sdk/io/BoundedSource.java  |   4 +-
 .../google/cloud/dataflow/sdk/io/Read.java    |  31 ++-
 .../google/cloud/dataflow/sdk/io/Source.java  |  66 +++--
 .../dataflow/sdk/io/UnboundedSource.java      | 203 +++++++++++++++
 .../BasicSerializableSourceFormat.java        | 240 +++++++++++++++---
 .../worker/StreamingDataflowWorker.java       | 149 +++++++----
 .../util/StreamingModeExecutionContext.java   |  81 +++++-
 .../cloud/dataflow/sdk/io/AvroSourceTest.java |   4 +-
 .../sdk/io/ByteOffsetBasedSourceTest.java     |   4 +-
 .../dataflow/sdk/io/FileBasedSourceTest.java  |   8 +-
 .../dataflow/sdk/io/SourceTestUtils.java      |  20 +-
 .../BasicSerializableSourceFormatTest.java    |  91 +++++++
 .../sdk/runners/dataflow/CountingSource.java  | 191 ++++++++++++++
 .../worker/StreamingDataflowWorkerTest.java   | 214 ++++++++++++++--
 .../StreamingModeExecutionContextTest.java    |   3 +-
 15 files changed, 1162 insertions(+), 147 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
index 1ab36ce39f54f..7019ba2361fa2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
@@ -67,13 +67,15 @@ public abstract List<? extends BoundedSource<T>> splitIntoBundles(
    */
   public abstract boolean producesSortedKeys(PipelineOptions options) throws Exception;
 
-  @Override
   public abstract BoundedReader<T> createReader(
       PipelineOptions options, @Nullable ExecutionContext executionContext) throws IOException;
 
   /**
    * A {@code Reader} that reads a bounded amount of input and supports some additional
    * operations, such as progress estimation and dynamic work rebalancing.
+   *
+   * <p> Once {@link #start} or {@link #advance} has returned false, neither will be called
+   * again on this object.
    */
   @Experimental(Experimental.Kind.SOURCE_SINK)
   public interface BoundedReader<T> extends Source.Reader<T> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
index c8c371166d120..7776167f23dcc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
+import static com.google.cloud.dataflow.sdk.util.StringUtils.approximateSimpleName;
+
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
@@ -48,11 +50,19 @@ public static Bound<?> named(String name) {
   }
 
   /**
-   * Returns a new unnamed {@code Read.Bound} {@code PTransform} reading from the given
+   * Returns a new {@code Read.Bound} {@code PTransform} reading from the given
    * {@code BoundedSource}.
    */
   public static <T> Bound<T> from(BoundedSource<T> source) {
-    return new Bound<>("", source);
+    return new Bound<>(null, source);
+  }
+
+  /**
+   * Returns a new {@code Read.Bound} {@code PTransform} reading from the given
+   * {@code UnboundedSource}.
+   */
+  public static <T> Bound<T> from(UnboundedSource<T, ?> source) {
+    return new Bound<>(null, source);
   }
 
   /**
@@ -76,7 +86,7 @@ private Bound(@Nullable String name, @Nullable Source<T> source) {
      * <p> Does not modify this object.
      */
     public <T> Bound<T> from(Source<T> source) {
-      return new Bound<T>(getName(), source);
+      return new Bound<T>(name, source);
     }
 
     /**
@@ -100,9 +110,9 @@ public final PCollection<T> apply(PInput input) {
       Preconditions.checkNotNull(source, "source must be set");
       source.validate();
       return PCollection.<T>createPrimitiveOutputInternal(
-          input.getPipeline(),
-          WindowingStrategy.globalDefault(),
-          IsBounded.BOUNDED)
+              input.getPipeline(),
+              WindowingStrategy.globalDefault(),
+              (source instanceof BoundedSource) ? IsBounded.BOUNDED : IsBounded.UNBOUNDED)
           .setCoder(getDefaultOutputCoder());
     }
 
@@ -126,10 +136,19 @@ private static void registerDefaultTransformEvaluator() {
             @Override
             public void evaluate(
                 Bound transform, DirectPipelineRunner.EvaluationContext context) {
+              if (transform.getSource() instanceof UnboundedSource) {
+                throw new IllegalArgumentException(
+                    "UnboundedSources are not supported in the DirectPipelineRunner");
+              }
               BasicSerializableSourceFormat.evaluateReadHelper(transform, context);
             }
           });
     }
+
+    @Override
+    public String getKindString() {
+      return "Read(" + approximateSimpleName(source.getClass()) + ")";
+    }
   }
 
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
index 61de45d3e63f8..69098f1d531c4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
@@ -18,9 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 
 import org.joda.time.Instant;
 
@@ -28,16 +26,12 @@
 import java.io.Serializable;
 import java.util.NoSuchElementException;
 
-import javax.annotation.Nullable;
-
 /**
- * Base class for defining input formats, with custom logic for splitting the input
- * into bundles (parts of the input, each of which may be processed on a different worker)
- * and creating a {@code Source} for reading the input.
+ * Base class for defining input formats and creating a {@code Source} for reading the input.
  *
  * <p> This class is not intended to be subclassed directly. Instead, to define
  * a bounded source (a source which produces a finite amount of input), subclass
- * {@link BoundedSource}; user-defined unbounded sources are currently not supported.
+ * {@link BoundedSource}; to define an unbounded source, subclass {@link UnboundedSource}.
  *
  * <p> A {@code Source} passed to a {@code Read} transform must be
  * {@code Serializable}.  This allows the {@code Source} instance
@@ -61,12 +55,6 @@
 public abstract class Source<T> implements Serializable {
   private static final long serialVersionUID = 0;
 
-  /**
-   * Creates a reader for this source.
-   */
-  public abstract Reader<T> createReader(
-      PipelineOptions options, @Nullable ExecutionContext executionContext) throws IOException;
-
   /**
    * Checks that this source is valid, before it can be used in a pipeline.
    *
@@ -86,14 +74,48 @@ public abstract Reader<T> createReader(
    * This interface is deliberately distinct from {@link java.util.Iterator} because
    * the current model tends to be easier to program and more efficient in practice
    * for iterating over sources such as files, databases etc. (rather than pure collections).
+   *
+   * <p>
+   * {@code Reader} implementations do not need to be thread-safe; they may only be accessed
+   * by a single thread at once.
+   *
+   * <p> Callers of {@code Readers} must obey the following access pattern:
+   * <ul>
+   * <li> One call to {@link Reader#start}
+   * <ul><li>If {@link Reader#start} returned true, any number of calls to {@code getCurrent}*
+   *   methods</ul>
+   * <li> Repeatedly, a call to {@link Reader#advance}. This may be called regardless
+   *   of what the previous {@link Reader#start}/{@link Reader#advance} returned.
+   * <ul><li>If {@link Reader#advance} returned true, any number of calls to {@code getCurrent}*
+   *   methods</ul>
+   * </ul>
+   *
    * <p>
-   * To read a {@code Reader}:
+   * For example, if the reader is reading a fixed set of data:
    * <pre>
    * for (boolean available = reader.start(); available; available = reader.advance()) {
    *   T item = reader.getCurrent();
+   *   Instant timestamp = reader.getCurrentTimestamp();
    *   ...
    * }
    * </pre>
+   *
+   * <p> If the set of data being read is continually growing:
+   * <pre>
+   * boolean available = reader.start();
+   * while (true) {
+   *   if (available) {
+   *     T item = reader.getCurrent();
+   *     Instant timestamp = reader.getCurrentTimestamp();
+   *     ...
+   *     resetExponentialBackoff();
+   *   } else {
+   *     exponentialBackoff();
+   *   }
+   *   available = reader.advance();
+   * }
+   * </pre>
+   *
    * <p>
    * Note: this interface is a work-in-progress and may change.
    */
@@ -105,14 +127,14 @@ public interface Reader<T> extends AutoCloseable {
      * {@link #advance} or {@link #getCurrent}. This method may perform expensive operations that
      * are needed to initialize the reader.
      *
-     * @return {@code true} if a record was read, {@code false} if we're at the end of input.
+     * @return {@code true} if a record was read, {@code false} if there is no more input available.
      */
     public boolean start() throws IOException;
 
     /**
      * Advances the reader to the next valid record.
      *
-     * @return {@code true} if a record was read, {@code false} if we're at the end of input.
+     * @return {@code true} if a record was read, {@code false} if there is no more input available.
      */
     public boolean advance() throws IOException;
 
@@ -121,6 +143,9 @@ public interface Reader<T> extends AutoCloseable {
      * {@link #advance} call. The returned value must be effectively immutable and remain valid
      * indefinitely.
      *
+     * <p> Multiple calls to this method without an intervening call to {@link #advance} should
+     * return the same result.
+     *
      * @throws java.util.NoSuchElementException if the reader is at the beginning of the input and
      *         {@link #start} or {@link #advance} wasn't called, or if the last {@link #start} or
      *         {@link #advance} returned {@code false}.
@@ -133,7 +158,12 @@ public interface Reader<T> extends AutoCloseable {
      * If the source does not support timestamps, this should return
      * {@code BoundedWindow.TIMESTAMP_MIN_VALUE}.
      *
-     * @throws NoSuchElementException
+     * <p> Multiple calls to this method without an intervening call to {@link #advance} should
+     * return the same result.
+     *
+     * @throws NoSuchElementException if the reader is at the beginning of the input and
+     *         {@link #start} or {@link #advance} wasn't called, or if the last {@link #start} or
+     *         {@link #advance} returned {@code false}.
      */
     public Instant getCurrentTimestamp() throws NoSuchElementException;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
new file mode 100644
index 0000000000000..97876cae1f98e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+import javax.annotation.Nullable;
+
+/**
+ * A {@link Source} that reads an unbounded amount of input and, because of that, supports
+ * some additional operations such as checkpointing, watermarks, and record ids.
+ *
+ * <ul>
+ * <li> Checkpointing allows sources to not re-read the same data again in the case of failures.
+ * <li> Watermarks allow for downstream parts of the pipeline to know up to what point
+ *   in time the data is complete.
+ * <li> Record ids allow for efficient deduplication of input records; many streaming sources
+ *   do not guarantee that a given record will only be read a single time.
+ * </ul>
+ *
+ * <p> See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window} and
+ * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger} for more information on
+ * timestamps and watermarks.
+ *
+ * @param <OutputT> Type of records output by this source.
+ * @param <CheckpointMarkT> Type of checkpoint marks used by the readers of this source.
+ */
+public abstract class UnboundedSource<
+        OutputT, CheckpointMarkT extends UnboundedSource.CheckpointMark> extends Source<OutputT> {
+  private static final long serialVersionUID = 0L;
+
+  /**
+   * Returns a list of {@code UnboundedSource} objects representing the instances of this source
+   * that should be used when executing the workflow.  Each split should return a separate partition
+   * of the input data.
+   *
+   * <p> For example, for a source reading from a growing directory of files, each split
+   * could correspond to a prefix of file names.
+   *
+   * <p> Some sources are not splittable, such as reading from a single TCP stream.  In that
+   * case, only a single split should be returned.
+   *
+   * <p> Some data sources automatically partition their data among readers.  For these types of
+   * inputs, {@code n} identical replicas of the top-level source can be returned.
+   *
+   * <p> The size of the returned list should be as close to {@code desiredNumSplits}
+   * as possible, but does not have to match exactly.  A low number of splits
+   * will limit the amount of parallelism in the source.
+   */
+  public abstract List<? extends UnboundedSource<OutputT, CheckpointMarkT>> generateInitialSplits(
+      int desiredNumSplits, PipelineOptions options) throws Exception;
+
+  /**
+   * Create a new {@link UnboundedReader} to read from this source, resuming from the given
+   * checkpoint if present.
+   */
+  public abstract UnboundedReader<OutputT> createReader(
+      PipelineOptions options, @Nullable CheckpointMarkT checkpointMark);
+
+  /**
+   * Returns a {@link Coder} for encoding and decoding the checkpoints for this source, or
+   * null if the checkpoints do not need to be durably committed.
+   */
+  @Nullable public abstract Coder<CheckpointMarkT> getCheckpointMarkCoder();
+
+  /**
+   * A marker representing the progress and state of an {@link UnboundedReader}.
+   *
+   * <p> For example, this could be offsets in a set of files being read.
+   */
+  public interface CheckpointMark {
+    /**
+     * Perform any finalization that needs to happen after a bundle of data read from
+     * the source has been processed and committed.
+     *
+     * <p> For example, this could be sending acknowledgement requests to an external
+     * data source such as Pub/Sub.
+     *
+     * <p> This may be called from any thread, potentially at the same time as calls to the
+     * {@code UnboundedReader} that created it.
+     */
+    void finalizeCheckpoint() throws IOException;
+  }
+
+  /**
+   * A {@code Reader} that reads an unbounded amount of input.
+   *
+   * <p> A given {@code UnboundedReader} object will only be accessed by a single thread at once.
+   */
+  @Experimental(Experimental.Kind.SOURCE_SINK)
+  public interface UnboundedReader<OutputT> extends Source.Reader<OutputT> {
+    /**
+     * Initializes the reader and advances the reader to the first record.
+     *
+     * <p> This method should be called exactly once. The invocation should occur prior to calling
+     * {@link #advance} or {@link #getCurrent}. This method may perform expensive operations that
+     * are needed to initialize the reader.
+     *
+     * <p> Returns {@code true} if a record was read, {@code false} if there is no more input
+     * currently available.  Future calls to {@link #advance} may return {@code true} once more data
+     * is available. Regardless of the return value of {@code start}, {@code start} will not be
+     * called again on the same {@code UnboundedReader} object; it will only be called again when a
+     * new reader object is constructed for the same source, e.g. on recovery.
+     */
+    boolean start() throws IOException;
+
+    /**
+     * Advances the reader to the next valid record.
+     *
+     * <p> Returns {@code true} if a record was read, {@code false} if there is no more input
+     * available. Future calls to {@link #advance} may return {@code true} once more data is
+     * available.
+     */
+    boolean advance() throws IOException;
+
+    /**
+     * Returns a unique identifier for the current record.  This should be the same for each
+     * instance of the same logical record read from the underlying data source.
+     *
+     * <p> For example, this could be a hash of the record contents, or a logical ID present in
+     * the record.  If this is generated as a hash of the record contents, it should be at least 16
+     * bytes (128 bits) to avoid collisions.
+     *
+     * <p> This method has the same restrictions on when it can be called as {@link #getCurrent} and
+     * {@link #getCurrentTimestamp}.
+     *
+     * <p> Note: this is not yet supported by the DataflowPipelineRunner, and it will be ignored.
+     *
+     * @throws NoSuchElementException if the reader is at the beginning of the input and
+     *         {@link #start} or {@link #advance} wasn't called, or if the last {@link #start} or
+     *         {@link #advance} returned {@code false}.
+     */
+    byte[] getCurrentRecordId() throws NoSuchElementException;
+
+    /**
+     * Returns a lower bound on timestamps of future elements read by this reader.
+     *
+     * <p> This can be approximate.  If records are read that violate this guarantee, they will be
+     * considered late, which will affect how they will be processed.  See
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window} for more information on
+     * late data and how to handle it.
+     *
+     * <p> This bound should be as tight as possible.  Downstream windows will not be able to close
+     * until this watermark passes the end of the window.
+     *
+     * <p> For example, a source may know that the records it reads will be in timestamp order.  In
+     * this case, the watermark can be the timestamp of the last record read minus one.  For a
+     * source that does not have natural timestamps, timestamps can be set to the time of
+     * reading, in which case the watermark is the current clock time.
+     *
+     * <p> See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window} and
+     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger} for more
+     * information on timestamps and watermarks.
+     * <p> May be called after {@link #advance} or {@link #start} has returned false, but not before
+     * {@link #start} has been called.
+     */
+    Instant getWatermark();
+
+    /**
+     * Returns a {@link CheckpointMark} representing the progress of this {@code UnboundedReader}.
+     *
+     * <p> The elements read up until this is called will be processed together as a bundle. Once
+     * the result of this processing has been durably committed,
+     * {@link CheckpointMark#finalizeCheckpoint} will be called on the {@link CheckpointMark}
+     * object.
+     *
+     * <p> The returned object should not be modified.
+     *
+     * <p> May be called after {@link #advance} or {@link #start} has returned false, but not before
+     * {@link #start} has been called.
+     */
+    CheckpointMark getCheckpointMark();
+
+    /**
+     * Returns the {@link UnboundedSource} that created this reader.  This will not change over the
+     * life of the reader.
+     */
+    @Override
+    UnboundedSource<OutputT, ?> getCurrentSource();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index 9d269913efc1f..1d382bd17d221 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.runners.dataflow;
 
+import static com.google.api.client.util.Base64.decodeBase64;
 import static com.google.api.client.util.Base64.encodeBase64String;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceOperationResponseToSourceOperationResponse;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceToDictionary;
@@ -23,7 +24,9 @@
 import static com.google.cloud.dataflow.sdk.util.SerializableUtils.deserializeFromByteArray;
 import static com.google.cloud.dataflow.sdk.util.SerializableUtils.serializeToByteArray;
 import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+import static com.google.cloud.dataflow.sdk.util.Structs.addStringList;
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+import static com.google.cloud.dataflow.sdk.util.Structs.getStrings;
 
 import com.google.api.client.util.Base64;
 import com.google.api.services.dataflow.model.ApproximateProgress;
@@ -41,6 +44,8 @@
 import com.google.cloud.dataflow.sdk.io.BoundedSource;
 import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.Source;
+import com.google.cloud.dataflow.sdk.io.UnboundedSource;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
@@ -49,9 +54,15 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -71,6 +82,7 @@
  */
 public class BasicSerializableSourceFormat implements SourceFormat {
   private static final String SERIALIZED_SOURCE = "serialized_source";
+  @VisibleForTesting static final String SERIALIZED_SOURCE_SPLITS = "serialized_source_splits";
   private static final long DEFAULT_DESIRED_BUNDLE_SIZE_BYTES = 64 * (1 << 20);
 
   private static final Logger LOG = LoggerFactory.getLogger(BasicSerializableSourceFormat.class);
@@ -142,20 +154,97 @@ public OperationResponse performSourceOperation(OperationRequest request) throws
    * {@link com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory}.
    */
   public static <T> Reader<WindowedValue<T>> create(
-      final PipelineOptions options, CloudObject spec, Coder<WindowedValue<T>> coder,
+      final PipelineOptions options, final CloudObject spec, Coder<WindowedValue<T>> coder,
       final ExecutionContext executionContext) throws Exception {
     // The parameter "coder" is deliberately never used. It is an artifact of ReaderFactory:
     // some readers need a coder, some don't (i.e. for some it doesn't even make sense),
     // but ReaderFactory passes it to all readers anyway.
-    @SuppressWarnings("unchecked")
     final Source<T> source = (Source<T>) deserializeFromCloudSource(spec);
-    return new Reader<WindowedValue<T>>() {
-      @Override
-      public ReaderIterator<WindowedValue<T>> iterator() throws IOException {
-        return new BasicSerializableSourceFormat.ReaderIterator<>(
-            source.createReader(options, executionContext));
+    if (source instanceof BoundedSource) {
+      return new Reader<WindowedValue<T>>() {
+        @Override
+        public Reader.ReaderIterator<WindowedValue<T>> iterator() throws IOException {
+          return new BoundedReaderIterator<>(
+              ((BoundedSource<T>) source).createReader(options, executionContext));
+        }
+      };
+    } else if (source instanceof UnboundedSource) {
+      return new UnboundedReader<T>(
+          options, spec, (StreamingModeExecutionContext) executionContext);
+    } else {
+      throw new IllegalArgumentException("Unexpected source kind: " + source.getClass());
+    }
+  }
+
+  /**
+   * {@link Reader} for reading from {@link UnboundedSource UnboundedSources}.
+   */
+  private static class UnboundedReader<T> extends Reader<WindowedValue<T>> {
+    private final PipelineOptions options;
+    private final CloudObject spec;
+    private final StreamingModeExecutionContext context;
+
+    UnboundedReader(
+        PipelineOptions options, CloudObject spec, StreamingModeExecutionContext context) {
+      this.options = options;
+      this.spec = spec;
+      this.context = context;
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    public Reader.ReaderIterator<WindowedValue<T>> iterator() {
+      UnboundedSource.UnboundedReader<T> reader =
+          (UnboundedSource.UnboundedReader<T>) context.getCachedReader();
+      final boolean started = reader != null;
+
+      if (reader == null) {
+        String key = context.getSerializedKey().toStringUtf8();
+        // Key is expected to be a zero-padded integer representing the split index.
+        int splitIndex = Integer.parseInt(key.substring(0, 16), 16) - 1;
+
+        UnboundedSource<T, UnboundedSource.CheckpointMark> splitSource = parseSource(splitIndex);
+
+        UnboundedSource.CheckpointMark checkpoint = null;
+        if (splitSource.getCheckpointMarkCoder() != null) {
+          checkpoint = context.getReaderCheckpoint(splitSource.getCheckpointMarkCoder());
+        }
+
+        reader = splitSource.createReader(options, checkpoint);
+      }
+
+      context.setActiveReader(reader);
+
+      return new UnboundedReaderIterator<>(reader, started);
+    }
+
+    @Override
+    public boolean supportsRestart() {
+      return true;
+    }
+
+    @SuppressWarnings("unchecked")
+    private UnboundedSource<T, UnboundedSource.CheckpointMark> parseSource(int index) {
+      List<String> serializedSplits = null;
+      try {
+        serializedSplits = getStrings(spec, SERIALIZED_SOURCE_SPLITS, null);
+      } catch (Exception e) {
+        throw new RuntimeException("Parsing serialized source splits failed: ", e);
       }
-    };
+      Preconditions.checkArgument(
+          serializedSplits != null, "UnboundedSource object did not contain splits");
+      Preconditions.checkArgument(
+          index < serializedSplits.size(),
+          "UnboundedSource splits contained too few splits.  Requested index was %s, size was %s",
+          index,
+          serializedSplits.size());
+      Object rawSource = deserializeFromByteArray(
+          decodeBase64(serializedSplits.get(index)), "UnboundedSource split");
+      if (!(rawSource instanceof UnboundedSource)) {
+        throw new IllegalArgumentException("Expected UnboundedSource, got " + rawSource.getClass());
+      }
+      return (UnboundedSource<T, UnboundedSource.CheckpointMark>) rawSource;
+    }
   }
 
   private SourceSplitResponse performSplit(SourceSplitRequest request) throws Exception {
@@ -230,7 +319,7 @@ public static Source<?> deserializeFromCloudSource(Map<String, Object> spec) thr
     return source;
   }
 
-  static com.google.api.services.dataflow.model.Source serializeToCloudSource(
+  public static com.google.api.services.dataflow.model.Source serializeToCloudSource(
       Source<?> source, PipelineOptions options) throws Exception {
     com.google.api.services.dataflow.model.Source cloudSource =
         new com.google.api.services.dataflow.model.Source();
@@ -254,6 +343,20 @@ static com.google.api.services.dataflow.model.Source serializeToCloudSource(
       } catch (Exception e) {
         LOG.warn("Size estimation of the source failed: " + source, e);
       }
+    } else if (source instanceof UnboundedSource) {
+      UnboundedSource<?, ?> unboundedSource = (UnboundedSource<?, ?>) source;
+      metadata.setInfinite(true);
+      List<String> encodedSplits = new ArrayList<>();
+      for (UnboundedSource<?, ?> split :
+          unboundedSource.generateInitialSplits(
+              options.as(DataflowPipelineOptions.class).getNumWorkers() * 2, options)) {
+        encodedSplits.add(encodeBase64String(serializeToByteArray(split)));
+      }
+      Preconditions.checkArgument(
+          !encodedSplits.isEmpty(), "UnboundedSources must have at least one split");
+      addStringList(cloudSource.getSpec(), SERIALIZED_SOURCE_SPLITS, encodedSplits);
+    } else {
+      throw new IllegalArgumentException("Unexpected source kind: " + source.getClass());
     }
 
     cloudSource.setMetadata(metadata);
@@ -287,11 +390,7 @@ public static <T> void evaluateReadHelper(
   public static <T> void translateReadHelper(
       Read.Bound<T> transform, DataflowPipelineTranslator.TranslationContext context) {
     try {
-      Source<T> anySource = transform.getSource();
-      if (!(anySource instanceof BoundedSource)) {
-        throw new IllegalArgumentException("Unexpected read from a user-defined unbounded source");
-      }
-      BoundedSource<T> source = (BoundedSource<T>) anySource;
+      Source<T> source = transform.getSource();
       context.addStep(transform, "ParallelRead");
       context.addInput(PropertyNames.FORMAT, PropertyNames.CUSTOM_SOURCE_FORMAT);
       context.addInput(
@@ -304,27 +403,31 @@ public static <T> void translateReadHelper(
   }
 
   /**
-   * Adapter from the {@code Source.Reader} interface to {@code Reader.ReaderIterator},
+   * Adapter from the {@code Source.Reader} interface to {@code Iterator},
    * wrapping every value into the global window. Proper windowing will be assigned by the
    * subsequent Window transform.
    * <p>
    * TODO: Consider changing the API of Reader.ReaderIterator so this adapter wouldn't be needed.
    */
-  private static class ReaderIterator<T> implements Reader.ReaderIterator<WindowedValue<T>> {
+  private static class ReaderToIteratorAdapter<T> {
     private enum NextState {
       UNKNOWN_BEFORE_START,
       UNKNOWN_BEFORE_ADVANCE,
       AVAILABLE,
-      FINISHED
+      UNAVAILABLE
     }
     private Source.Reader<T> reader;
-    private NextState state = NextState.UNKNOWN_BEFORE_START;
+    private NextState state;
 
-    private ReaderIterator(Source.Reader<T> reader) {
+    /**
+     * Creates an iterator adapter for the given reader.  {@code started} represents whether
+     * {@link Source.Reader#start} has previously been called on this reader.
+     */
+    private ReaderToIteratorAdapter(Source.Reader<T> reader, boolean started) {
       this.reader = reader;
+      this.state = started ? NextState.UNKNOWN_BEFORE_ADVANCE : NextState.UNKNOWN_BEFORE_START;
     }
 
-    @Override
     public boolean hasNext() throws IOException {
       switch(state) {
         case UNKNOWN_BEFORE_START:
@@ -333,7 +436,7 @@ public boolean hasNext() throws IOException {
               state = NextState.AVAILABLE;
               return true;
             } else {
-              state = NextState.FINISHED;
+              state = NextState.UNAVAILABLE;
               return false;
             }
           } catch (Exception e) {
@@ -345,16 +448,15 @@ public boolean hasNext() throws IOException {
             state = NextState.AVAILABLE;
             return true;
           } else {
-            state = NextState.FINISHED;
+            state = NextState.UNAVAILABLE;
             return false;
           }
         case AVAILABLE: return true;
-        case FINISHED: return false;
+        case UNAVAILABLE: return false;
         default: throw new AssertionError();
       }
     }
 
-    @Override
     public WindowedValue<T> next() throws IOException {
       if (!hasNext()) {
         throw new NoSuchElementException();
@@ -363,9 +465,29 @@ public WindowedValue<T> next() throws IOException {
       return WindowedValue.of(
           reader.getCurrent(), reader.getCurrentTimestamp(), GlobalWindow.INSTANCE);
     }
+  }
+
+  private static class BoundedReaderIterator<T> implements Reader.ReaderIterator<WindowedValue<T>> {
+    private BoundedSource.BoundedReader<T> reader;
+    private ReaderToIteratorAdapter<T> iteratorAdapter;
+
+    private BoundedReaderIterator(BoundedSource.BoundedReader<T> reader) {
+      this.reader = reader;
+      this.iteratorAdapter = new ReaderToIteratorAdapter<>(reader, false);
+    }
 
     @Override
-    public ReaderIterator<T> copy() throws IOException {
+    public boolean hasNext() throws IOException {
+      return iteratorAdapter.hasNext();
+    }
+
+    @Override
+    public WindowedValue<T> next() throws IOException {
+      return iteratorAdapter.next();
+    }
+
+    @Override
+    public BoundedReaderIterator<T> copy() throws IOException {
       throw new UnsupportedOperationException();
     }
 
@@ -378,7 +500,7 @@ public void close() throws IOException {
     public Reader.Progress getProgress() {
       if (reader instanceof BoundedSource.BoundedReader) {
         ApproximateProgress progress = new ApproximateProgress();
-        Double fractionConsumed = ((BoundedSource.BoundedReader<?>) reader).getFractionConsumed();
+        Double fractionConsumed = reader.getFractionConsumed();
         if (fractionConsumed != null) {
           progress.setPercentComplete(fractionConsumed.floatValue());
         }
@@ -391,13 +513,6 @@ public Reader.Progress getProgress() {
 
     @Override
     public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest request) {
-      if (!(reader instanceof BoundedSource.BoundedReader)) {
-        throw new IllegalStateException(
-            "Unexpected requestDynamicSplit on an unbounded source: " + reader.getCurrentSource()
-            + ", request: " + request);
-      }
-
-      BoundedSource.BoundedReader<T> boundedReader = (BoundedSource.BoundedReader<T>) reader;
       ApproximateProgress stopPosition =
           SourceTranslationUtils.splitRequestToApproximateProgress(request);
       Float fractionConsumed = stopPosition.getPercentComplete();
@@ -405,13 +520,13 @@ public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest
         // Only truncating at a fraction is currently supported.
         return null;
       }
-      BoundedSource<T> original = boundedReader.getCurrentSource();
-      BoundedSource<T> residual = boundedReader.splitAtFraction(fractionConsumed.doubleValue());
+      BoundedSource<T> original = reader.getCurrentSource();
+      BoundedSource<T> residual = reader.splitAtFraction(fractionConsumed.doubleValue());
       if (residual == null) {
         return null;
       }
       // Try to catch some potential subclass implementation errors early.
-      BoundedSource<T> primary = boundedReader.getCurrentSource();
+      BoundedSource<T> primary = reader.getCurrentSource();
       if (original == primary) {
         throw new IllegalStateException(
           "Successful split did not change the current source: primary is identical to original"
@@ -439,4 +554,57 @@ public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest
       return new BoundedSourceSplit<T>(primary, residual);
     }
   }
+
+  private static class UnboundedReaderIterator<T>
+      implements Reader.ReaderIterator<WindowedValue<T>> {
+    // Commit at least once every 10 seconds or 10k records.  This keeps the watermark advancing
+    // smoothly, and ensures that not too much work will have to be reprocessed in the event of
+    // a crash.
+    private static final int MAX_BUNDLE_SIZE = 10000;
+    private static final Duration MAX_BUNDLE_READ_TIME = Duration.standardSeconds(10);
+
+    private ReaderToIteratorAdapter<T> iteratorAdapter;
+    private Instant endTime;
+    private int elemsRead;
+
+    private UnboundedReaderIterator(UnboundedSource.UnboundedReader<T> reader, boolean started) {
+      this.iteratorAdapter = new ReaderToIteratorAdapter<>(reader, started);
+      this.endTime = Instant.now().plus(MAX_BUNDLE_READ_TIME);
+      this.elemsRead = 0;
+    }
+
+    @Override
+    public boolean hasNext() throws IOException {
+      if (elemsRead >= MAX_BUNDLE_SIZE
+          || Instant.now().isAfter(endTime)) {
+        return false;
+      }
+      return iteratorAdapter.hasNext();
+    }
+
+    @Override
+    public WindowedValue<T> next() throws IOException {
+      WindowedValue<T> result = iteratorAdapter.next();
+      elemsRead++;
+      return result;
+    }
+
+    @Override
+    public UnboundedReaderIterator<T> copy() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void close() { }
+
+    @Override
+    public Reader.Progress getProgress() {
+      return null;
+    }
+
+    @Override
+    public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest request) {
+      return null;
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 46a5f29cde2a0..a4bb535faa0ec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import com.google.api.services.dataflow.model.MapTask;
+import com.google.cloud.dataflow.sdk.io.UnboundedSource;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingFormatter;
@@ -35,6 +36,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
 import com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation;
 import com.google.common.base.Preconditions;
+import com.google.protobuf.ByteString;
 
 import org.eclipse.jetty.server.Request;
 import org.eclipse.jetty.server.Server;
@@ -53,7 +55,10 @@
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicReference;
@@ -155,14 +160,23 @@ public static void main(String[] args) throws Exception {
     worker.runStatusServer(statusPort);
   }
 
-  private ConcurrentMap<String, MapTask> instructionMap;
-  private ConcurrentMap<String, ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest>> outputMap;
-  private ConcurrentMap<String, ConcurrentLinkedQueue<WorkerAndContext>> mapTaskExecutors;
+  // Maps from computation ids to per-computation state.
+  private final ConcurrentMap<String, MapTask> instructionMap;
+  private final ConcurrentMap<String, ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest>>
+      outputMap;
+  private final ConcurrentMap<String, ConcurrentLinkedQueue<WorkerAndContext>> mapTaskExecutors;
+  // Per computation cache of active readers, keyed by split ID.
+  private final ConcurrentMap<String, ConcurrentMap<ByteString, UnboundedSource.UnboundedReader<?>>>
+      readerCache;
+
+  // Map of tokens to commit callbacks.
+  private ConcurrentMap<Long, Runnable> commitCallbacks;
+
   private ThreadFactory threadFactory;
   private BoundedQueueExecutor workUnitExecutor;
+  private ExecutorService commitExecutor;
   private WindmillServerStub windmillServer;
   private Thread dispatchThread;
-  private Thread commitThread;
   private AtomicBoolean running;
   private StateFetcher stateFetcher;
   private DataflowWorkerHarnessOptions options;
@@ -176,6 +190,8 @@ public StreamingDataflowWorker(
     this.instructionMap = new ConcurrentHashMap<>();
     this.outputMap = new ConcurrentHashMap<>();
     this.mapTaskExecutors = new ConcurrentHashMap<>();
+    this.readerCache = new ConcurrentHashMap<>();
+    this.commitCallbacks = new ConcurrentHashMap<>();
     for (MapTask mapTask : mapTasks) {
       addComputation(mapTask);
     }
@@ -190,6 +206,24 @@ public Thread newThread(Runnable r) {
     this.workUnitExecutor = new BoundedQueueExecutor(
         MAX_PROCESSING_THREADS, THREAD_EXPIRATION_TIME_SEC, TimeUnit.SECONDS,
         MAX_WORK_UNITS_QUEUED, threadFactory);
+    this.commitExecutor =
+        new ThreadPoolExecutor(
+            1,
+            1,
+            Long.MAX_VALUE,
+            TimeUnit.SECONDS,
+            new LinkedBlockingQueue<Runnable>(2),
+            new ThreadFactory() {
+              @Override
+              public Thread newThread(Runnable r) {
+                Thread t = new Thread(r);
+                t.setDaemon(true);
+                t.setPriority(Thread.MAX_PRIORITY);
+                t.setName("CommitThread");
+                return t;
+              }
+            },
+            new ThreadPoolExecutor.DiscardPolicy());
     this.windmillServer = server;
     this.running = new AtomicBoolean();
     this.stateFetcher = new StateFetcher(server);
@@ -211,16 +245,6 @@ public void run() {
     dispatchThread.setPriority(Thread.MIN_PRIORITY);
     dispatchThread.setName("DispatchThread");
     dispatchThread.start();
-
-    commitThread = threadFactory.newThread(new Runnable() {
-        @Override
-        public void run() {
-          commitLoop();
-        }
-      });
-    commitThread.setPriority(Thread.MAX_PRIORITY);
-    commitThread.setName("CommitThread");
-    commitThread.start();
   }
 
   public void stop() {
@@ -232,7 +256,7 @@ public void stop() {
       dispatchThread.join();
       workUnitExecutor.shutdown();
       if (!workUnitExecutor.awaitTermination(5, TimeUnit.MINUTES)) {
-        throw new RuntimeException("Process did not terminate within 5 minutes");
+        throw new RuntimeException("Work executor did not terminate within 5 minutes");
       }
       for (ConcurrentLinkedQueue<WorkerAndContext> queue : mapTaskExecutors.values()) {
         WorkerAndContext workerAndContext;
@@ -240,7 +264,10 @@ public void stop() {
           workerAndContext.getWorker().close();
         }
       }
-      commitThread.join();
+      commitExecutor.shutdown();
+      if (!commitExecutor.awaitTermination(5, TimeUnit.MINUTES)) {
+        throw new RuntimeException("Commit executor did not terminate within 5 minutes");
+      }
     } catch (Exception e) {
       LOG.warn("Exception while shutting down: ", e);
     }
@@ -264,9 +291,9 @@ private void addComputation(MapTask mapTask) {
       LOG.info("Adding config for {}: {}", computation, mapTask);
       outputMap.put(computation, new ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest>());
       instructionMap.put(computation, mapTask);
-      mapTaskExecutors.put(
-          computation,
-          new ConcurrentLinkedQueue<WorkerAndContext>());
+      mapTaskExecutors.put(computation, new ConcurrentLinkedQueue<WorkerAndContext>());
+      readerCache.put(
+          computation, new ConcurrentHashMap<ByteString, UnboundedSource.UnboundedReader<?>>());
     }
   }
 
@@ -359,7 +386,9 @@ private void process(
           work.getKey().toStringUtf8() + "-" + Long.toString(work.getWorkToken()));
       WorkerAndContext workerAndContext = mapTaskExecutors.get(computation).poll();
       if (workerAndContext == null) {
-        context = new StreamingModeExecutionContext(computation, stateFetcher);
+        context =
+            new StreamingModeExecutionContext(
+                computation, stateFetcher, readerCache.get(computation));
         worker = MapTaskExecutorFactory.create(options, mapTask, context);
         ReadOperation readOperation = worker.getReadOperation();
         // Disable progress updates since its results are unused for streaming
@@ -374,12 +403,28 @@ private void process(
 
       context.start(work, inputDataWatermark, outputBuilder);
 
+      for (Long callbackId : context.getReadyCommitCallbackIds()) {
+        final Runnable callback = commitCallbacks.remove(callbackId);
+        if (callback != null) {
+          workUnitExecutor.forceExecute(new Runnable() {
+              @Override
+              public void run() {
+                try {
+                  callback.run();
+                } catch (Throwable t) {
+                  LOG.error("Source checkpoint finalization failed:", t);
+                }
+              }
+            });
+        }
+      }
+
       // Blocks while executing work.
       worker.execute();
 
       buildCounters(worker.getOutputCounters(), outputBuilder);
 
-      context.flushState();
+      commitCallbacks.putAll(context.flushState());
 
       mapTaskExecutors.get(computation).offer(new WorkerAndContext(worker, context));
       worker = null;
@@ -387,6 +432,8 @@ private void process(
 
       Windmill.WorkItemCommitRequest output = outputBuilder.build();
       outputMap.get(computation).add(output);
+      scheduleCommit();
+
       LOG.debug("Processing done for work token: {}", work.getWorkToken());
     } catch (Throwable t) {
       if (worker != null) {
@@ -435,37 +482,43 @@ public void run() {
     }
   }
 
-  private void commitLoop() {
-    while (running.get()) {
-      Windmill.CommitWorkRequest.Builder commitRequestBuilder =
-          Windmill.CommitWorkRequest.newBuilder();
-      long remainingCommitBytes = MAX_COMMIT_BYTES;
-      for (Map.Entry<String, ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest>> entry :
-               outputMap.entrySet()) {
-        Windmill.ComputationCommitWorkRequest.Builder computationRequestBuilder =
-            Windmill.ComputationCommitWorkRequest.newBuilder();
-        ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest> queue = entry.getValue();
-        while (remainingCommitBytes > 0) {
-          Windmill.WorkItemCommitRequest request = queue.poll();
-          if (request == null) {
-            break;
+  private void scheduleCommit() {
+    commitExecutor.execute(new Commit());
+  }
+
+  private class Commit implements Runnable {
+    @Override
+    public void run() {
+      while (true) {
+        Windmill.CommitWorkRequest.Builder commitRequestBuilder =
+            Windmill.CommitWorkRequest.newBuilder();
+        long remainingCommitBytes = MAX_COMMIT_BYTES;
+        for (Map.Entry<String, ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest>> entry :
+                 outputMap.entrySet()) {
+          Windmill.ComputationCommitWorkRequest.Builder computationRequestBuilder =
+              Windmill.ComputationCommitWorkRequest.newBuilder();
+          ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest> queue = entry.getValue();
+          while (remainingCommitBytes > 0) {
+            Windmill.WorkItemCommitRequest request = queue.poll();
+            if (request == null) {
+              break;
+            }
+            remainingCommitBytes -= request.getSerializedSize();
+            computationRequestBuilder.addRequests(request);
+          }
+          if (computationRequestBuilder.getRequestsCount() > 0) {
+            computationRequestBuilder.setComputationId(entry.getKey());
+            commitRequestBuilder.addRequests(computationRequestBuilder);
           }
-          remainingCommitBytes -= request.getSerializedSize();
-          computationRequestBuilder.addRequests(request);
         }
-        if (computationRequestBuilder.getRequestsCount() > 0) {
-          computationRequestBuilder.setComputationId(entry.getKey());
-          commitRequestBuilder.addRequests(computationRequestBuilder);
+        if (commitRequestBuilder.getRequestsCount() > 0) {
+          Windmill.CommitWorkRequest commitRequest = commitRequestBuilder.build();
+          LOG.trace("Commit: {}", commitRequest);
+          commitWork(commitRequest);
+        } else {
+          break;
         }
       }
-      if (commitRequestBuilder.getRequestsCount() > 0) {
-        Windmill.CommitWorkRequest commitRequest = commitRequestBuilder.build();
-        LOG.trace("Commit: {}", commitRequest);
-        commitWork(commitRequest);
-      }
-      if (remainingCommitBytes > 0) {
-        sleep(100);
-      }
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index fb71c9036d8ea..09bcd4f328183 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -18,6 +18,7 @@
 
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.io.UnboundedSource;
 import com.google.cloud.dataflow.sdk.runners.worker.DataflowExecutionContext;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
@@ -41,6 +42,8 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.ThreadLocalRandom;
 import java.util.concurrent.TimeUnit;
 
 /**
@@ -53,11 +56,18 @@ public class StreamingModeExecutionContext extends DataflowExecutionContext {
   private StateFetcher stateFetcher;
   private Windmill.WorkItemCommitRequest.Builder outputBuilder;
   private Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
-
-  public StreamingModeExecutionContext(String computation, StateFetcher stateFetcher) {
+  // Per-key cache of active Reader objects in use by this process.
+  private ConcurrentMap<ByteString, UnboundedSource.UnboundedReader<?>> readerCache;
+  private UnboundedSource.UnboundedReader<?> activeReader;
+
+  public StreamingModeExecutionContext(
+      String computation,
+      StateFetcher stateFetcher,
+      ConcurrentMap<ByteString, UnboundedSource.UnboundedReader<?>> readerCache) {
     this.computation = computation;
     this.stateFetcher = stateFetcher;
     this.sideInputCache = new HashMap<>();
+    this.readerCache = readerCache;
   }
 
   public void start(
@@ -236,7 +246,31 @@ public Windmill.WorkItemCommitRequest.Builder getOutputBuilder() {
     return outputBuilder;
   }
 
-  public void flushState() {
+  public UnboundedSource.UnboundedReader<?> getCachedReader() {
+    return readerCache.get(getSerializedKey());
+  }
+
+  public void setActiveReader(UnboundedSource.UnboundedReader<?> reader) {
+    readerCache.put(getSerializedKey(), reader);
+    activeReader = reader;
+  }
+
+  public UnboundedSource.CheckpointMark getReaderCheckpoint(
+      Coder<? extends UnboundedSource.CheckpointMark> coder) {
+    try {
+      ByteString state = work.getSourceState().getState();
+      if (state.isEmpty()) {
+        return null;
+      }
+      return coder.decode(state.newInput(), Coder.Context.OUTER);
+    } catch (IOException e) {
+      throw new RuntimeException("Exception while decoding checkpoint", e);
+    }
+  }
+
+  public Map<Long, Runnable> flushState() {
+    Map<Long, Runnable> callbacks = new HashMap<>();
+
     for (ExecutionContext.StepContext stepContext : getAllStepContexts()) {
       try {
         ((StepContext) stepContext).flushState();
@@ -244,6 +278,47 @@ public void flushState() {
         throw new RuntimeException("Failed to flush state");
       }
     }
+
+    Windmill.SourceState.Builder sourceStateBuilder = Windmill.SourceState.newBuilder();
+
+    if (activeReader != null) {
+      final UnboundedSource.CheckpointMark checkpointMark = activeReader.getCheckpointMark();
+      final Instant watermark = activeReader.getWatermark();
+      long id = ThreadLocalRandom.current().nextLong();
+      sourceStateBuilder.addFinalizeIds(id);
+      callbacks.put(
+          id,
+          new Runnable() {
+            @Override
+            public void run() {
+              try {
+                checkpointMark.finalizeCheckpoint();
+              } catch (IOException e) {
+                throw new RuntimeException("Exception while finalizing checkpoint", e);
+              }
+            }
+          });
+
+      Coder<UnboundedSource.CheckpointMark> checkpointCoder =
+          ((UnboundedSource<?, UnboundedSource.CheckpointMark>) activeReader.getCurrentSource())
+              .getCheckpointMarkCoder();
+      if (checkpointCoder != null) {
+        ByteString.Output stream = ByteString.newOutput();
+        try {
+          checkpointCoder.encode(checkpointMark, stream, Coder.Context.OUTER);
+        } catch (IOException e) {
+          throw new RuntimeException("Exception while encoding checkpoint", e);
+        }
+        sourceStateBuilder.setState(stream.toByteString());
+      }
+      outputBuilder.setSourceStateUpdates(sourceStateBuilder.build());
+      outputBuilder.setSourceWatermark(TimeUnit.MILLISECONDS.toMicros(watermark.getMillis()));
+    }
+    return callbacks;
+  }
+
+  public List<Long> getReadyCommitCallbackIds() {
+    return work.getSourceState().getFinalizeIdsList();
   }
 
   private class TagLoader extends CacheLoader<CodedTupleTag<?>, Optional<?>> {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
index 4a31607a4da41..66475cffb6dbb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
@@ -193,7 +193,7 @@ public void testSplitsWithSmallBlocks() throws Exception {
     assertTrue(splits.size() > 2);
     SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options);
     nonEmptySplits = 0;
-    for (Source<Bird> subSource : splits) {
+    for (BoundedSource<Bird> subSource : splits) {
       if (SourceTestUtils.readFromSource(subSource, options).size() > 0) {
         nonEmptySplits += 1;
       }
@@ -205,7 +205,7 @@ public void testSplitsWithSmallBlocks() throws Exception {
     assertTrue(splits.size() > 2);
     SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options);
     nonEmptySplits = 0;
-    for (Source<Bird> subSource : splits) {
+    for (BoundedSource<Bird> subSource : splits) {
       if (SourceTestUtils.readFromSource(subSource, options).size() > 0) {
         nonEmptySplits += 1;
       }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
index 679fec5bf34cb..71a0af0773677 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
@@ -243,8 +243,8 @@ public void testSplitAtFraction() throws IOException {
       assertNull(reader.splitAtFraction(0.0));
       assertNull(reader.splitAtFraction(reader.getFractionConsumed()));
 
-      Source<Integer> residual = reader.splitAtFraction(reader.getFractionConsumed() + 0.1);
-      Source<Integer> primary = reader.getCurrentSource();
+      BoundedSource<Integer> residual = reader.splitAtFraction(reader.getFractionConsumed() + 0.1);
+      BoundedSource<Integer> primary = reader.getCurrentSource();
       List<Integer> primaryItems = readFromSource(primary, options);
       List<Integer> residualItems = readFromSource(residual, options);
       for (Integer item : residualItems) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index ae06b243d44d1..da78a3df4feb9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -671,13 +671,13 @@ public void testReadAllSplitsOfSingleFile() throws Exception {
 
     TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 16, null);
 
-    List<? extends Source<String>> sources = source.splitIntoBundles(32, null);
+    List<? extends BoundedSource<String>> sources = source.splitIntoBundles(32, null);
 
     // Not a trivial split.
     assertTrue(sources.size() > 1);
 
     List<String> results = new ArrayList<String>();
-    for (Source<String> split : sources) {
+    for (BoundedSource<String> split : sources) {
       results.addAll(readFromSource(split, options));
     }
 
@@ -782,13 +782,13 @@ public void testReadAllSplitsOfFilePattern() throws Exception {
 
     TestFileBasedSource source =
         new TestFileBasedSource(new File(file1.getParent(), "file*").getPath(), 64, null);
-    List<? extends Source<String>> sources = source.splitIntoBundles(512, null);
+    List<? extends BoundedSource<String>> sources = source.splitIntoBundles(512, null);
 
     // Not a trivial split.
     assertTrue(sources.size() > 1);
 
     List<String> results = new ArrayList<String>();
-    for (Source<String> split : sources) {
+    for (BoundedSource<String> split : sources) {
       results.addAll(readFromSource(split, options));
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
index 7e289ff89e3ad..2135da553a89d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
@@ -33,11 +33,11 @@
  */
 public class SourceTestUtils {
   /**
-   * Reads all elements from the given {@link Source}.
+   * Reads all elements from the given {@link BoundedSource}.
    */
-  public static <T> List<T> readFromSource(Source<T> source, PipelineOptions options)
+  public static <T> List<T> readFromSource(BoundedSource<T> source, PipelineOptions options)
       throws IOException {
-    try (Source.Reader<T> reader = source.createReader(options, null)) {
+    try (BoundedSource.BoundedReader<T> reader = source.createReader(options, null)) {
       return readFromUnstartedReader(reader);
     }
   }
@@ -45,7 +45,8 @@ public static <T> List<T> readFromSource(Source<T> source, PipelineOptions optio
   /**
    * Reads all elements from the given unstarted {@link Source.Reader}.
    */
-  public static <T> List<T> readFromUnstartedReader(Source.Reader<T> reader) throws IOException {
+  public static <T> List<T> readFromUnstartedReader(BoundedSource.BoundedReader<T> reader)
+      throws IOException {
     List<T> res = new ArrayList<>();
     for (boolean more = reader.start(); more; more = reader.advance()) {
       res.add(reader.getCurrent());
@@ -53,7 +54,8 @@ public static <T> List<T> readFromUnstartedReader(Source.Reader<T> reader) throw
     return res;
   }
 
-  public static <T> List<T> readFromStartedReader(Source.Reader<T> reader) throws IOException {
+  public static <T> List<T> readFromStartedReader(BoundedSource.BoundedReader<T> reader)
+      throws IOException {
     List<T> res = new ArrayList<>();
     while (reader.advance()) {
       res.add(reader.getCurrent());
@@ -76,11 +78,11 @@ public static <T> List<T> readNItemsFromUnstartedReader(Source.Reader<T> reader,
    * the records read from the list of sources is equal to the records read from the reference
    * source.
    */
-  public static <T> void assertSourcesEqualReferenceSource(Source<T> referenceSource,
-      List<? extends Source<T>> sources, PipelineOptions options) throws IOException {
+  public static <T> void assertSourcesEqualReferenceSource(BoundedSource<T> referenceSource,
+      List<? extends BoundedSource<T>> sources, PipelineOptions options) throws IOException {
     List<T> referenceRecords = readFromSource(referenceSource, options);
     List<T> bundleRecords = new ArrayList<>();
-    for (Source<T> source : sources) {
+    for (BoundedSource<T> source : sources) {
       List<T> elems = readFromSource(source, options);
       bundleRecords.addAll(elems);
     }
@@ -92,7 +94,7 @@ public static <T> void assertSourcesEqualReferenceSource(Source<T> referenceSour
    * records as the reader.
    */
   public static <T> void assertUnstartedReaderReadsSameAsItsSource(
-      Source.Reader<T> reader, PipelineOptions options) throws IOException {
+      BoundedSource.BoundedReader<T> reader, PipelineOptions options) throws IOException {
     List<T> expected = readFromUnstartedReader(reader);
     List<T> actual = readFromSource(reader.getCurrentSource(), options);
     assertEquals(expected, actual);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 7400bab4bbf32..74c4f39b9d3c0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -16,14 +16,17 @@
 
 package com.google.cloud.dataflow.sdk.runners.dataflow;
 
+import static com.google.api.client.util.Base64.decodeBase64;
 import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.readFromSource;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtFraction;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceOperationRequestToSourceOperationRequest;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.dictionaryToCloudSource;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceOperationResponseToCloudSourceOperationResponse;
+import static com.google.cloud.dataflow.sdk.util.SerializableUtils.deserializeFromByteArray;
 import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
 import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
+import static com.google.cloud.dataflow.sdk.util.Structs.getStrings;
 import static com.google.cloud.dataflow.sdk.util.WindowedValue.valueInGlobalWindow;
 import static com.google.common.base.Throwables.getStackTraceAsString;
 import static org.hamcrest.MatcherAssert.assertThat;
@@ -50,27 +53,33 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.io.BoundedSource;
 import com.google.cloud.dataflow.sdk.io.Read;
+import com.google.cloud.dataflow.sdk.io.UnboundedSource;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
 import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Sample;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
+import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.Preconditions;
+import com.google.protobuf.ByteString;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -86,6 +95,8 @@
 import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
 
 import javax.annotation.Nullable;
 
@@ -593,4 +604,84 @@ private static SourceSplitResponse performSplit(
         new BasicSerializableSourceFormat(options).performSourceOperation(request1);
     return sourceOperationResponseToCloudSourceOperationResponse(response).getSplit();
   }
+
+  @Test
+  public void testUnboundedSplits() throws Exception {
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    options.setNumWorkers(5);
+    com.google.api.services.dataflow.model.Source source =
+        BasicSerializableSourceFormat.serializeToCloudSource(
+            new CountingSource(Integer.MAX_VALUE), options);
+    List<String> serializedSplits =
+        getStrings(source.getSpec(), BasicSerializableSourceFormat.SERIALIZED_SOURCE_SPLITS, null);
+    assertEquals(10, serializedSplits.size());
+    for (String serializedSplit : serializedSplits) {
+      assertTrue(
+          deserializeFromByteArray(decodeBase64(serializedSplit), "source")
+              instanceof CountingSource);
+    }
+  }
+
+  @Test
+  public void testReadUnboundedReader() throws Exception {
+    StreamingModeExecutionContext context =
+        new StreamingModeExecutionContext(
+            null, null, new ConcurrentHashMap<ByteString, UnboundedSource.UnboundedReader<?>>());
+
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    options.setNumWorkers(5);
+
+    ByteString state = ByteString.EMPTY;
+    for (int i = 0; i < 100; /* Incremented in inner loop */) {
+      WindowedValue<KV<Integer, Integer>> value;
+
+      // Initialize streaming context with state from previous iteration.
+      context.start(
+          Windmill.WorkItem.newBuilder()
+              .setKey(ByteString.copyFromUtf8("0000000000000001")) // key is zero-padded index.
+              .setWorkToken(0) // Required proto field, unused.
+              .setSourceState(
+                  Windmill.SourceState.newBuilder().setState(state).build()) // Source state.
+              .build(),
+          new Instant(0),
+          Windmill.WorkItemCommitRequest.newBuilder());
+
+      Reader.ReaderIterator<WindowedValue<KV<Integer, Integer>>> reader =
+          BasicSerializableSourceFormat.<KV<Integer, Integer>>create(
+                  options,
+                  (CloudObject)
+                      BasicSerializableSourceFormat.serializeToCloudSource(
+                              new CountingSource(Integer.MAX_VALUE), options)
+                          .getSpec(),
+                  null,
+                  context)
+              .iterator();
+
+      // Verify data.
+      while (reader.hasNext()) {
+        value = reader.next();
+        assertEquals(KV.of(0, i), value.getValue());
+        assertThat(value.getWindows(), contains((BoundedWindow) GlobalWindow.INSTANCE));
+        assertEquals(i, value.getTimestamp().getMillis());
+        i++;
+      }
+
+      // Extract and verify state modifications.
+      context.flushState();
+      state = context.getOutputBuilder().getSourceStateUpdates().getState();
+      // CountingSource's watermark is the last record - 1.  i is now one past the last record,
+      // so the expected watermark is i-2 millis.
+      assertEquals(
+          TimeUnit.MILLISECONDS.toMicros(i - 2), context.getOutputBuilder().getSourceWatermark());
+      assertEquals(
+          1,
+          context
+              .getOutputBuilder()
+              .getSourceStateUpdates()
+              .getFinalizeIdsList()
+              .size());
+    }
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
new file mode 100644
index 0000000000000..2eb0dd70c9454
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
@@ -0,0 +1,191 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.dataflow;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.DelegateCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.io.UnboundedSource;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import org.joda.time.Instant;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ThreadLocalRandom;
+import javax.annotation.Nullable;
+
+/**
+ * An unbounded source for testing the unbounded sources framework code.
+ *
+ * <p> Each split of this sources produces records of the form KV(split_id, i),
+ * where i counts up from 0.  Each record has a timestamp of i, and the watermark
+ * accurately tracks these timestamps.  The reader will occasionally return false
+ * from {@code advance}, in order to simulate a source where not all the data is
+ * available immediately.
+ */
+public class CountingSource
+    extends UnboundedSource<KV<Integer, Integer>, CountingSource.CounterMark> {
+  private static final long serialVersionUID = 0L;
+  private static List<Integer> finalizeTracker;
+  private final int numMessagesPerShard;
+  private final int shardNumber;
+
+  public static void setFinalizeTracker(List<Integer> finalizeTracker) {
+    CountingSource.finalizeTracker = finalizeTracker;
+  }
+
+  public CountingSource(int numMessagesPerShard) {
+    this(numMessagesPerShard, -1);
+  }
+
+  private CountingSource withShardNumber(int shardNumber) {
+    return new CountingSource(numMessagesPerShard, shardNumber);
+  }
+
+  private CountingSource(int numMessagesPerShard, int shardNumber) {
+    this.numMessagesPerShard = numMessagesPerShard;
+    this.shardNumber = shardNumber;
+  }
+
+  public int getShardNumber() {
+    return shardNumber;
+  }
+
+  public List<CountingSource> generateInitialSplits(int desiredNumSplits, PipelineOptions options) {
+    List<CountingSource> splits = new ArrayList<>();
+    for (int i = 0; i < desiredNumSplits; i++) {
+      splits.add(withShardNumber(i));
+    }
+    return splits;
+  }
+
+  class CounterMark implements UnboundedSource.CheckpointMark {
+    int current;
+
+    public CounterMark(int current) {
+      this.current = current;
+    }
+
+    @Override
+    public void finalizeCheckpoint() {
+      if (finalizeTracker != null) {
+        finalizeTracker.add(current);
+      }
+    }
+  }
+
+  @Override
+  public Coder<CounterMark> getCheckpointMarkCoder() {
+    return DelegateCoder.of(
+        VarIntCoder.of(),
+        new DelegateCoder.CodingFunction<CounterMark, Integer>() {
+          private static final long serialVersionUID = 0L;
+
+          public Integer apply(CounterMark input) {
+            return input.current;
+          }
+        },
+        new DelegateCoder.CodingFunction<Integer, CounterMark>() {
+          private static final long serialVersionUID = 0L;
+
+          public CounterMark apply(Integer input) {
+            return new CounterMark(input);
+          }
+        });
+  }
+
+  private class CountingSourceReader implements UnboundedReader<KV<Integer, Integer>> {
+    private int current;
+    private boolean done = false;
+
+    public CountingSourceReader(int startingPoint) {
+      this.current = startingPoint;
+    }
+
+    @Override
+    public boolean start() {
+      return true;
+    }
+
+    @Override
+    public boolean advance() {
+      if (current < numMessagesPerShard - 1) {
+        if (ThreadLocalRandom.current().nextInt(10) == 0) {
+          return false;
+        }
+        current++;
+        return true;
+      } else {
+        done = true;
+        return false;
+      }
+    }
+
+    @Override
+    public KV<Integer, Integer> getCurrent() {
+      return KV.of(shardNumber, current);
+    }
+
+    @Override
+    public Instant getCurrentTimestamp() {
+      return new Instant(current);
+    }
+
+    @Override
+    public byte[] getCurrentRecordId() {
+      byte[] id = new byte[16];
+      ThreadLocalRandom.current().nextBytes(id);
+      return id;
+    }
+
+    @Override
+    public void close() {}
+
+    @Override
+    public CountingSource getCurrentSource() {
+      return CountingSource.this;
+    }
+
+    @Override
+    public Instant getWatermark() {
+      return done ? BoundedWindow.TIMESTAMP_MAX_VALUE : new Instant(current - 1);
+    }
+
+    @Override
+    public CheckpointMark getCheckpointMark() {
+      return new CounterMark(current);
+    }
+  }
+
+  @Override
+  public CountingSourceReader createReader(
+      PipelineOptions options, @Nullable CounterMark checkpointMark) {
+    return new CountingSourceReader(checkpointMark != null ? checkpointMark.current : 0);
+  }
+
+  @Override
+  public void validate() {}
+
+  @Override
+  public Coder<KV<Integer, Integer>> getDefaultOutputCoder() {
+    return KvCoder.of(VarIntCoder.of(), VarIntCoder.of());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 11875fa04ce52..d26b28d45e79c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -18,6 +18,7 @@
 
 import static com.google.cloud.dataflow.sdk.util.Structs.addObject;
 import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+import static org.hamcrest.Matchers.contains;
 import static org.junit.Assert.assertFalse;
 
 import com.google.api.services.dataflow.model.InstructionInput;
@@ -34,13 +35,18 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.ListCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
+import com.google.cloud.dataflow.sdk.runners.dataflow.CountingSource;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
@@ -54,6 +60,7 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.primitives.UnsignedLong;
 import com.google.protobuf.ByteString;
 import com.google.protobuf.TextFormat;
 
@@ -179,7 +186,7 @@ public Windmill.Exception getException() throws InterruptedException {
     }
   }
 
-  private ParallelInstruction makeWindowingSourceInstruction(Coder coder) {
+  private ParallelInstruction makeWindowingSourceInstruction(Coder<?> coder) {
     CloudObject encodedCoder = FullWindowedValueCoder.of(
         TimerOrElementCoder.of(coder), IntervalWindow.getCoder()).asCloudObject();
     return new ParallelInstruction()
@@ -194,7 +201,7 @@ private ParallelInstruction makeWindowingSourceInstruction(Coder coder) {
             .setCodec(encodedCoder)));
   }
 
-  private ParallelInstruction makeSourceInstruction(Coder coder) {
+  private ParallelInstruction makeSourceInstruction(Coder<?> coder) {
     return new ParallelInstruction()
         .setSystemName("source")
         .setRead(new ReadInstruction().setSource(
@@ -210,7 +217,10 @@ private ParallelInstruction makeSourceInstruction(Coder coder) {
   }
 
   private ParallelInstruction makeDoFnInstruction(
-      DoFn<?, ?> doFn, int producerIndex, Coder outputCoder) {
+      DoFn<?, ?> doFn,
+      int producerIndex,
+      Coder<?> outputCoder,
+      Coder<? extends BoundedWindow> windowCoder) {
     CloudObject spec = CloudObject.forClassName("DoFn");
     addString(spec, PropertyNames.SERIALIZED_FN,
         StringUtils.byteArrayToJsonString(
@@ -225,11 +235,17 @@ private ParallelInstruction makeDoFnInstruction(
         .setOutputs(Arrays.asList(
             new InstructionOutput()
             .setName("par_do_output")
-            .setCodec(WindowedValue.getFullCoder(outputCoder, IntervalWindow.getCoder())
+            .setCodec(WindowedValue.getFullCoder(outputCoder, windowCoder)
                                    .asCloudObject())));
   }
 
-  private ParallelInstruction makeSinkInstruction(Coder coder, int producerIndex) {
+  private ParallelInstruction makeDoFnInstruction(
+      DoFn<?, ?> doFn, int producerIndex, Coder<?> outputCoder) {
+    return makeDoFnInstruction(doFn, producerIndex, outputCoder, IntervalWindow.getCoder());
+  }
+
+  private ParallelInstruction makeSinkInstruction(
+      Coder<?> coder, int producerIndex, Coder<? extends BoundedWindow> windowCoder) {
     CloudObject spec = CloudObject.forClass(WindmillSink.class);
     addString(spec, "stream_id", "out");
     return new ParallelInstruction()
@@ -239,10 +255,14 @@ private ParallelInstruction makeSinkInstruction(Coder coder, int producerIndex)
                 new InstructionInput().setProducerInstructionIndex(producerIndex).setOutputNum(0))
             .setSink(new Sink()
                 .setSpec(spec)
-                .setCodec(WindowedValue.getFullCoder(coder, IntervalWindow.getCoder())
+                .setCodec(WindowedValue.getFullCoder(coder, windowCoder)
                                        .asCloudObject())));
   }
 
+  private ParallelInstruction makeSinkInstruction(Coder<?> coder, int producerIndex) {
+    return makeSinkInstruction(coder, producerIndex, IntervalWindow.getCoder());
+  }
+
   private MapTask makeMapTask(List<ParallelInstruction> instructions) {
     return new MapTask()
         .setStageName("computation")
@@ -259,12 +279,13 @@ private Windmill.GetWorkResponse buildTimerInput(String input) throws Exception
   private Windmill.GetWorkResponse buildInput(String input, byte[] metadata) throws Exception {
     Windmill.GetWorkResponse.Builder builder = Windmill.GetWorkResponse.newBuilder();
     TextFormat.merge(input, builder);
-    Windmill.InputMessageBundle.Builder messageBundleBuilder =
-        builder.getWorkBuilder(0)
-        .getWorkBuilder(0)
-        .getMessageBundlesBuilder(0);
-    for (Windmill.Message.Builder messageBuilder : messageBundleBuilder.getMessagesBuilderList()) {
-      messageBuilder.setMetadata(ByteString.copyFrom(metadata));
+    if (metadata != null) {
+      Windmill.InputMessageBundle.Builder messageBundleBuilder =
+          builder.getWorkBuilder(0).getWorkBuilder(0).getMessageBundlesBuilder(0);
+      for (Windmill.Message.Builder messageBuilder :
+          messageBundleBuilder.getMessagesBuilderList()) {
+        messageBuilder.setMetadata(ByteString.copyFrom(metadata));
+      }
     }
     return builder.build();
   }
@@ -305,10 +326,12 @@ private Windmill.WorkItemCommitRequest buildExpectedOutput(String output, byte[]
       throws Exception {
     Windmill.WorkItemCommitRequest.Builder builder = Windmill.WorkItemCommitRequest.newBuilder();
     TextFormat.merge(output, builder);
-    builder.getOutputMessagesBuilder(0)
-           .getBundlesBuilder(0)
-           .getMessagesBuilder(0)
-           .setMetadata(ByteString.copyFrom(metadata));
+    if (metadata != null) {
+      builder.getOutputMessagesBuilder(0)
+          .getBundlesBuilder(0)
+          .getMessagesBuilder(0)
+          .setMetadata(ByteString.copyFrom(metadata));
+    }
     return builder.build();
   }
 
@@ -760,4 +783,163 @@ public void processElement(ProcessContext c) {
             Arrays.asList(new IntervalWindow(new Instant(0), new Instant(1000))))),
         stripCounters(result.get(1L)));
   }
+
+  static class PrintFn extends DoFn<KV<Integer, Integer>, String> {
+    private static final long serialVersionUID = 0;
+
+    @Override
+    public void processElement(ProcessContext c) {
+      KV<Integer, Integer> elem = c.element();
+      c.output(elem.getKey() + ":" + elem.getValue());
+    }
+  }
+
+  @Test public void testUnboundedSources() throws Exception {
+    List<Integer> finalizeTracker = new ArrayList<>();
+
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    options.setNumWorkers(1);
+
+    List<ParallelInstruction> instructions =
+        Arrays.asList(new ParallelInstruction()
+            .setSystemName("Read")
+            .setRead(new ReadInstruction()
+                .setSource(
+                    BasicSerializableSourceFormat.serializeToCloudSource(
+                        new CountingSource(1), options)))
+            .setOutputs(
+                Arrays.asList(new InstructionOutput()
+                    .setName("read_output")
+                    .setCodec(
+                        WindowedValue.getFullCoder(
+                            KvCoder.of(VarIntCoder.of(), VarIntCoder.of()),
+                            GlobalWindow.Coder.INSTANCE)
+                        .asCloudObject()))),
+            makeDoFnInstruction(
+                new PrintFn(), 0, StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE),
+            makeSinkInstruction(StringUtf8Coder.of(), 1, GlobalWindow.Coder.INSTANCE));
+
+    CountingSource.setFinalizeTracker(finalizeTracker);
+
+    FakeWindmillServer server = new FakeWindmillServer();
+    StreamingDataflowWorker worker = new StreamingDataflowWorker(
+        Arrays.asList(makeMapTask(instructions)), server, createTestingPipelineOptions());
+    worker.start();
+
+    // Test new key.
+    server.addWorkToOffer(buildInput(
+        "work {" +
+        "  computation_id: \"computation\"" +
+        "  input_data_watermark: 0" +
+        "  work {" +
+        "    key: \"0000000000000001\"" +
+        "    work_token: 1" +
+        "  }" +
+        "}", null));
+
+    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
+
+    Windmill.WorkItemCommitRequest commit = stripCounters(result.get(1L));
+    UnsignedLong finalizeId =
+        UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
+
+    Assert.assertEquals(buildExpectedOutput(
+        "key: \"0000000000000001\" " +
+        "work_token: 1 " +
+        "output_messages {" +
+        "  destination_stream_id: \"out\"" +
+        "  bundles {" +
+        "    key: \"0000000000000001\"" +
+        "    messages {" +
+        "      timestamp: 0" +
+        "      data: \"0:0\"" +
+        "    }" +
+        "  }" +
+        "} " +
+        "source_state_updates {" +
+        "  state: \"\000\"" +
+        "  finalize_ids: " + finalizeId +
+        "} " +
+        "source_watermark: 9223372036854775000",
+        CoderUtils.encodeToByteArray(
+            CollectionCoder.of(GlobalWindow.Coder.INSTANCE),
+            Arrays.asList(GlobalWindow.INSTANCE))),
+        commit);
+
+    // Test same key continuing.
+    server.addWorkToOffer(buildInput(
+        "work {" +
+        "  computation_id: \"computation\"" +
+        "  input_data_watermark: 0" +
+        "  work {" +
+        "    key: \"0000000000000001\"" +
+        "    work_token: 2" +
+        "    source_state {" +
+        "      state: \"\000\"" +
+        "      finalize_ids: " + finalizeId +
+        "    } " +
+        "  }" +
+        "}", null));
+
+    result = server.waitForAndGetCommits(1);
+
+    commit = stripCounters(result.get(2L));
+    finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
+
+    Assert.assertEquals(buildExpectedOutput(
+        "key: \"0000000000000001\" " +
+        "work_token: 2 " +
+        "source_state_updates {" +
+        "  state: \"\000\"" +
+        "  finalize_ids: " + finalizeId +
+        "} " +
+        "source_watermark: 9223372036854775000",
+        null),
+        commit);
+
+    Assert.assertThat(finalizeTracker, contains(0));
+
+    // Test recovery.
+    server.addWorkToOffer(buildInput(
+        "work {" +
+        "  computation_id: \"computation\"" +
+        "  input_data_watermark: 0" +
+        "  work {" +
+        "    key: \"0000000000000002\"" +
+        "    work_token: 3" +
+        "    source_state {" +
+        "      state: \"\005\"" +
+        "    } " +
+        "  }" +
+        "}", null));
+
+    result = server.waitForAndGetCommits(1);
+
+    commit = stripCounters(result.get(3L));
+    finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
+
+    Assert.assertEquals(buildExpectedOutput(
+        "key: \"0000000000000002\" " +
+        "work_token: 3 " +
+        "output_messages {" +
+        "  destination_stream_id: \"out\"" +
+        "  bundles {" +
+        "    key: \"0000000000000002\"" +
+        "    messages {" +
+        "      timestamp: 5000" +
+        "      data: \"1:5\"" +
+        "    }" +
+        "  }" +
+        "} " +
+        "source_state_updates {" +
+        "  state: \"\005\"" +
+        "  finalize_ids: " + finalizeId +
+        "} " +
+        "source_watermark: 9223372036854775000",
+        CoderUtils.encodeToByteArray(
+            CollectionCoder.of(GlobalWindow.Coder.INSTANCE),
+            Arrays.asList(GlobalWindow.INSTANCE))),
+        commit);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
index 056094be47328..49e10831c5052 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
@@ -61,7 +61,7 @@ private static TupleTag<Iterable<WindowedValue<String>>> newStringTag() {
   @Test
   public void testSideInputReaderReconstituted() {
     StreamingModeExecutionContext executionContext =
-        new StreamingModeExecutionContext("computation", stateFetcher);
+        new StreamingModeExecutionContext("computation", stateFetcher, null);
 
     PCollectionView<String> preview1 = PCollectionViewTesting.<String, String>testingView(
         newStringTag(), new ConstantViewFn<String, String>("view1"), StringUtf8Coder.of());
@@ -86,4 +86,3 @@ public void testSideInputReaderReconstituted() {
     assertFalse(sideInputReader.contains(view3));
   }
 }
-

From 2cb93d516e4e0a43c993ba8ccc421a26e7424d96 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 26 Jun 2015 18:52:20 -0700
Subject: [PATCH 0679/1541] Make HolderCoder#equals depend on the inputCoder

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97021449
---
 .../com/google/cloud/dataflow/sdk/transforms/Combine.java | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index a511325ccc28f..7c5794045b5f8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
@@ -49,6 +50,7 @@
 import java.lang.reflect.Type;
 import java.lang.reflect.TypeVariable;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
@@ -566,6 +568,12 @@ public V extractOutput(Holder<V> accumulator) {
     @Override
     public Coder<Holder<V>> getAccumulatorCoder(CoderRegistry registry, final Coder<V> inputCoder) {
       return new CustomCoder<Holder<V>>() {
+
+        @Override
+        public List<Coder<?>> getCoderArguments() {
+          return Arrays.<Coder<?>>asList(inputCoder);
+        }
+
         @Override
         public void encode(Holder<V> accumulator, OutputStream outStream, Context context)
             throws CoderException, IOException {

From af557df00745644c252d61a61090d8c4ea63efd4 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 26 Jun 2015 20:12:46 -0700
Subject: [PATCH 0680/1541] Make ParDoFn an interface

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97024014
---
 .../dataflow/sdk/runners/worker/ParDoFnBase.java    |  2 +-
 .../dataflow/sdk/util/common/worker/ParDoFn.java    | 13 ++++++++-----
 .../sdk/util/common/worker/ParDoOperationTest.java  |  2 +-
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
index 9a1a00cbeca2e..10dd8b15fa96b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
@@ -48,7 +48,7 @@
  * <p>Subclasses override just a method to provide a {@link DoFnInfo} for the
  * wrapped {@link DoFn}.
  */
-public abstract class ParDoFnBase extends ParDoFn {
+public abstract class ParDoFnBase implements ParDoFn {
 
   private final PipelineOptions options;
   private final SideInputReader sideInputReader;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java
index 47e46263b5f78..22656b238e907 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java
@@ -17,12 +17,15 @@
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
 /**
- * Abstract class for functions invocable by {@link ParDoOperation} instances.
+ * Interface for functions invocable by {@link ParDoOperation} instances.
+ *
+ * <p>To easily create a {@link ParDoFn} implementation with default setup, processing,
+ * and teardown, extend {@link ParDoFnBase}.
  */
-public abstract class ParDoFn {
-  public abstract void startBundle(Receiver... receivers) throws Exception;
+public interface ParDoFn {
+  public void startBundle(Receiver... receivers) throws Exception;
 
-  public abstract void processElement(Object elem) throws Exception;
+  public void processElement(Object elem) throws Exception;
 
-  public abstract void finishBundle() throws Exception;
+  public void finishBundle() throws Exception;
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
index 37d3b934a63aa..44f2ec90ab0b8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
@@ -34,7 +34,7 @@
 @RunWith(JUnit4.class)
 @SuppressWarnings("unchecked")
 public class ParDoOperationTest {
-  private static class TestParDoFn extends ParDoFn {
+  private static class TestParDoFn implements ParDoFn {
     final OutputReceiver outputReceiver;
 
     public TestParDoFn(OutputReceiver outputReceiver) {

From 0f594c6621b742b025388e4b37705acf567c5aeb Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 26 Jun 2015 20:12:47 -0700
Subject: [PATCH 0681/1541] Clean up StreamingDataflowWorkerTest

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97024015
---
 .../runners/worker/FakeWindmillServer.java    | 134 +++
 .../worker/StreamingDataflowWorkerTest.java   | 781 +++++++++---------
 2 files changed, 531 insertions(+), 384 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java
new file mode 100644
index 0000000000000..f823ad30e7765
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static org.junit.Assert.assertFalse;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.CommitWorkResponse;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.ComputationCommitWorkRequest;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.GetDataResponse;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.GetWorkResponse;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.WorkItemCommitRequest;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
+import java.util.Map;
+import java.util.Queue;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * An in-memory Windmill server that offers provided work and data.
+ */
+class FakeWindmillServer extends WindmillServerStub {
+  private Queue<Windmill.GetWorkResponse> workToOffer;
+  private Queue<Windmill.GetDataResponse> dataToOffer;
+  private Map<Long, WorkItemCommitRequest> commitsReceived;
+  private LinkedBlockingQueue<Windmill.Exception> exceptions;
+  private int commitsRequested = 0;
+  private AtomicInteger expectedExceptionCount;
+
+  public FakeWindmillServer() {
+    workToOffer = new ConcurrentLinkedQueue<GetWorkResponse>();
+    dataToOffer = new ConcurrentLinkedQueue<GetDataResponse>();
+    commitsReceived = new ConcurrentHashMap<Long, WorkItemCommitRequest>();
+    exceptions = new LinkedBlockingQueue<>();
+    expectedExceptionCount = new AtomicInteger();
+  }
+
+  public void addWorkToOffer(Windmill.GetWorkResponse work) {
+    workToOffer.add(work);
+  }
+
+  public void addDataToOffer(Windmill.GetDataResponse data) {
+    dataToOffer.add(data);
+  }
+
+  @Override
+  public Windmill.GetWorkResponse getWork(Windmill.GetWorkRequest request) {
+    Windmill.GetWorkResponse response = workToOffer.poll();
+    if (response == null) {
+      return Windmill.GetWorkResponse.newBuilder().build();
+    }
+    return response;
+  }
+
+  @Override
+  public Windmill.GetDataResponse getData(Windmill.GetDataRequest request) {
+    Windmill.GetDataResponse response = dataToOffer.poll();
+    if (response == null) {
+      response = Windmill.GetDataResponse.newBuilder().build();
+    }
+    return response;
+  }
+
+  @Override
+  public CommitWorkResponse commitWork(Windmill.CommitWorkRequest request) {
+    for (ComputationCommitWorkRequest computationRequest : request.getRequestsList()) {
+      for (WorkItemCommitRequest commit : computationRequest.getRequestsList()) {
+        commitsReceived.put(commit.getWorkToken(), commit);
+      }
+    }
+    return CommitWorkResponse.newBuilder().build();
+  }
+
+  @Override
+  public Windmill.GetConfigResponse getConfig(Windmill.GetConfigRequest request) {
+    return Windmill.GetConfigResponse.newBuilder().build();
+  }
+
+  @Override
+  public Windmill.ReportStatsResponse reportStats(Windmill.ReportStatsRequest request) {
+    for (Windmill.Exception exception : request.getExceptionsList()) {
+      try {
+        exceptions.put(exception);
+      } catch (InterruptedException expected) {
+      }
+    }
+
+    if (expectedExceptionCount.getAndDecrement() > 0) {
+      return Windmill.ReportStatsResponse.newBuilder().build();
+    } else {
+      return Windmill.ReportStatsResponse.newBuilder().setFailed(true).build();
+    }
+  }
+
+  public Map<Long, WorkItemCommitRequest> waitForAndGetCommits(int numCommits) {
+    int maxTries = 10;
+    while (maxTries-- > 0 && commitsReceived.size() < commitsRequested + numCommits) {
+      try {
+        Thread.sleep(1000);
+      } catch (InterruptedException expected) {
+      }
+    }
+
+    assertFalse(
+        "Should have received commits after 10s, but only got " + commitsReceived,
+        commitsReceived.size() < commitsRequested + numCommits);
+    commitsRequested += numCommits;
+
+    return commitsReceived;
+  }
+
+  public void setExpectedExceptionCount(int i) {
+    expectedExceptionCount.getAndAdd(i);
+  }
+
+  public Windmill.Exception getException() throws InterruptedException {
+    return exceptions.take();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index d26b28d45e79c..4b2a38f181d8a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -19,7 +19,12 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.addObject;
 import static com.google.cloud.dataflow.sdk.util.Structs.addString;
 import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
 
 import com.google.api.services.dataflow.model.InstructionInput;
 import com.google.api.services.dataflow.model.InstructionOutput;
@@ -42,7 +47,7 @@
 import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
 import com.google.cloud.dataflow.sdk.runners.dataflow.CountingSource;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.WorkItemCommitRequest;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
@@ -60,137 +65,87 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.collect.Lists;
 import com.google.common.primitives.UnsignedLong;
 import com.google.protobuf.ByteString;
 import com.google.protobuf.TextFormat;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
-import org.junit.Assert;
 import org.junit.Test;
-import org.junit.matchers.JUnitMatchers;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
-import java.util.Queue;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
 
 /** Unit tests for {@link StreamingDataflowWorker}. */
 @RunWith(JUnit4.class)
 public class StreamingDataflowWorkerTest {
+
   private static final IntervalWindow DEFAULT_WINDOW =
       new IntervalWindow(new Instant(1234), new Duration(1000));
 
-  private static class FakeWindmillServer extends WindmillServerStub {
-    private Queue<Windmill.GetWorkResponse> workToOffer;
-    private Queue<Windmill.GetDataResponse> dataToOffer;
-    private Map<Long, Windmill.WorkItemCommitRequest> commitsReceived;
-    private LinkedBlockingQueue<Windmill.Exception> exceptions;
-    private int commitsRequested = 0;
-    private AtomicInteger expectedExceptionCount;
-
-    public FakeWindmillServer() {
-      workToOffer = new ConcurrentLinkedQueue<Windmill.GetWorkResponse>();
-      dataToOffer = new ConcurrentLinkedQueue<Windmill.GetDataResponse>();
-      commitsReceived = new ConcurrentHashMap<Long, Windmill.WorkItemCommitRequest>();
-      exceptions = new LinkedBlockingQueue<>();
-      expectedExceptionCount = new AtomicInteger();
-    }
-
-    public void addWorkToOffer(Windmill.GetWorkResponse work) {
-      workToOffer.add(work);
-    }
+  private static final IntervalWindow WINDOW_AT_ZERO =
+      new IntervalWindow(new Instant(0), new Instant(1000));
 
-    public void addDataToOffer(Windmill.GetDataResponse data) {
-      dataToOffer.add(data);
-    }
+  private static final IntervalWindow WINDOW_AT_ONE_SECOND =
+      new IntervalWindow(new Instant(1000), new Instant(2000));
 
-    @Override
-    public Windmill.GetWorkResponse getWork(Windmill.GetWorkRequest request) {
-      Windmill.GetWorkResponse response = workToOffer.poll();
-      if (response == null) {
-        return Windmill.GetWorkResponse.newBuilder().build();
-      }
-      return response;
-    }
+  private static final Coder<IntervalWindow> DEFAULT_WINDOW_CODER = IntervalWindow.getCoder();
+  private static final Coder<Collection<IntervalWindow>> DEFAULT_WINDOW_COLLECTION_CODER =
+      CollectionCoder.of(DEFAULT_WINDOW_CODER);
 
-    @Override
-    public Windmill.GetDataResponse getData(Windmill.GetDataRequest request) {
-      Windmill.GetDataResponse response = dataToOffer.poll();
-      if (response == null) {
-        response = Windmill.GetDataResponse.newBuilder().build();
-      }
-      return response;
-    }
-
-    @Override
-    public Windmill.CommitWorkResponse commitWork(Windmill.CommitWorkRequest request) {
-      for (Windmill.ComputationCommitWorkRequest computationRequest : request.getRequestsList()) {
-        for (Windmill.WorkItemCommitRequest commit : computationRequest.getRequestsList()) {
-          commitsReceived.put(commit.getWorkToken(), commit);
-        }
-      }
-      return Windmill.CommitWorkResponse.newBuilder().build();
-    }
-
-    @Override
-    public Windmill.GetConfigResponse getConfig(Windmill.GetConfigRequest request) {
-      return Windmill.GetConfigResponse.newBuilder().build();
-    }
-
-    @Override
-    public Windmill.ReportStatsResponse reportStats(Windmill.ReportStatsRequest request) {
-      for (Windmill.Exception exception : request.getExceptionsList()) {
-        try {
-          exceptions.put(exception);
-        } catch (InterruptedException expected) {}
-      }
-
-      if (expectedExceptionCount.getAndDecrement() > 0) {
-        return Windmill.ReportStatsResponse.newBuilder().build();
-      } else {
-        return Windmill.ReportStatsResponse.newBuilder()
-            .setFailed(true).build();
-      }
-    }
+  private static final byte[] defaultWindowsBytes() throws Exception {
+    return CoderUtils.encodeToByteArray(
+        DEFAULT_WINDOW_COLLECTION_CODER, Arrays.asList(DEFAULT_WINDOW));
+  }
 
-    public Map<Long, Windmill.WorkItemCommitRequest> waitForAndGetCommits(int numCommits) {
-      int maxTries = 10;
-      while (maxTries-- > 0 && commitsReceived.size() < commitsRequested + numCommits) {
-        try {
-          Thread.sleep(1000);
-        } catch (InterruptedException expected) {}
-      }
+  private static final byte[] windowAtZeroBytes() throws Exception {
+    return CoderUtils.encodeToByteArray(
+        DEFAULT_WINDOW_COLLECTION_CODER, Arrays.asList(WINDOW_AT_ZERO));
+  }
 
-      assertFalse("Should have received commits after 10s, but only got " + commitsReceived,
-          commitsReceived.size() < commitsRequested + numCommits);
-      commitsRequested += numCommits;
+  private static final byte[] windowAtOneSecondBytes() throws Exception {
+    return CoderUtils.encodeToByteArray(
+        DEFAULT_WINDOW_COLLECTION_CODER, Arrays.asList(WINDOW_AT_ONE_SECOND));
+  }
 
-      return commitsReceived;
-    }
+  private static final byte[] emptyWindowsBytes() throws Exception {
+    return CoderUtils.encodeToByteArray(
+        DEFAULT_WINDOW_COLLECTION_CODER, Collections.<IntervalWindow>emptyList());
+  }
 
-    public void setExpectedExceptionCount(int i) {
-      expectedExceptionCount.getAndAdd(i);
-    }
+  // Default values that are unimportant for correctness, but must be consistent
+  // between pieces of this test suite
+  private static final String DEFAULT_COMPUTATION_ID = "computation";
+  private static final String DEFAULT_MAP_STAGE_NAME = "computation";
+  private static final String DEFAULT_MAP_SYSTEM_NAME = "computation";
+  private static final String DEFAULT_PARDO_SYSTEM_NAME = "parDo";
+  private static final String DEFAULT_SOURCE_SYSTEM_NAME = "source";
+  private static final String DEFAULT_SINK_SYSTEM_NAME = "sink";
+  private static final String DEFAULT_SOURCE_COMPUTATION_ID = "upstream";
+  private static final String DEFAULT_KEY_STRING = "key";
+  private static final String DEFAULT_DATA_STRING = "data";
+  private static final String DEFAULT_DESTINATION_STREAM_ID = "out";
+
+  private String keyStringForIndex(int index) {
+    return DEFAULT_KEY_STRING + index;
+  }
 
-    public Windmill.Exception getException() throws InterruptedException {
-      return exceptions.take();
-    }
+  private String dataStringForIndex(long index) {
+    return DEFAULT_DATA_STRING + index;
   }
 
   private ParallelInstruction makeWindowingSourceInstruction(Coder<?> coder) {
     CloudObject encodedCoder = FullWindowedValueCoder.of(
         TimerOrElementCoder.of(coder), IntervalWindow.getCoder()).asCloudObject();
     return new ParallelInstruction()
-        .setSystemName("source")
+        .setSystemName(DEFAULT_SOURCE_SYSTEM_NAME)
         .setRead(new ReadInstruction().setSource(
             new Source()
             .setSpec(CloudObject.forClass(WindowingWindmillReader.class))
@@ -203,7 +158,7 @@ private ParallelInstruction makeWindowingSourceInstruction(Coder<?> coder) {
 
   private ParallelInstruction makeSourceInstruction(Coder<?> coder) {
     return new ParallelInstruction()
-        .setSystemName("source")
+        .setSystemName(DEFAULT_SOURCE_SYSTEM_NAME)
         .setRead(new ReadInstruction().setSource(
             new Source()
             .setSpec(CloudObject.forClass(UngroupedWindmillReader.class))
@@ -224,9 +179,9 @@ private ParallelInstruction makeDoFnInstruction(
     CloudObject spec = CloudObject.forClassName("DoFn");
     addString(spec, PropertyNames.SERIALIZED_FN,
         StringUtils.byteArrayToJsonString(
-            SerializableUtils.serializeToByteArray(new DoFnInfo(doFn, null))));
+            SerializableUtils.serializeToByteArray(new DoFnInfo<>(doFn, null))));
     return new ParallelInstruction()
-        .setSystemName("parDo")
+        .setSystemName(DEFAULT_PARDO_SYSTEM_NAME)
         .setParDo(new ParDoInstruction()
             .setInput(
                 new InstructionInput().setProducerInstructionIndex(producerIndex).setOutputNum(0))
@@ -247,9 +202,9 @@ private ParallelInstruction makeDoFnInstruction(
   private ParallelInstruction makeSinkInstruction(
       Coder<?> coder, int producerIndex, Coder<? extends BoundedWindow> windowCoder) {
     CloudObject spec = CloudObject.forClass(WindmillSink.class);
-    addString(spec, "stream_id", "out");
+    addString(spec, "stream_id", DEFAULT_DESTINATION_STREAM_ID);
     return new ParallelInstruction()
-        .setSystemName("sink")
+        .setSystemName(DEFAULT_SINK_SYSTEM_NAME)
         .setWrite(new WriteInstruction()
             .setInput(
                 new InstructionInput().setProducerInstructionIndex(producerIndex).setOutputNum(0))
@@ -263,10 +218,14 @@ private ParallelInstruction makeSinkInstruction(Coder<?> coder, int producerInde
     return makeSinkInstruction(coder, producerIndex, IntervalWindow.getCoder());
   }
 
-  private MapTask makeMapTask(List<ParallelInstruction> instructions) {
+  /**
+   * Returns a {@link MapTask} with the provided {@code instructions} and default values
+   * everywhere else.
+   */
+  private MapTask defaultMapTask(List<ParallelInstruction> instructions) {
     return new MapTask()
-        .setStageName("computation")
-        .setSystemName("computation")
+        .setStageName(DEFAULT_MAP_STAGE_NAME)
+        .setSystemName(DEFAULT_MAP_SYSTEM_NAME)
         .setInstructions(instructions);
   }
 
@@ -299,12 +258,12 @@ private Windmill.GetDataResponse buildData(String input) throws Exception {
   private Windmill.GetWorkResponse makeInput(int index, long timestamp) throws Exception {
     return buildInput(
         "work {" +
-        "  computation_id: \"computation\"" +
+        "  computation_id: \"" + DEFAULT_COMPUTATION_ID + "\"" +
         "  work {" +
-        "    key: \"key\"" +
+        "    key: \"" + DEFAULT_KEY_STRING + "\"" +
         "    work_token: " + index +
         "    message_bundles {" +
-        "      source_computation_id: \"upstream\"" +
+        "      source_computation_id: \"" + DEFAULT_SOURCE_COMPUTATION_ID + "\"" +
         "      messages {" +
         "        timestamp: " + timestamp +
         "        data: \"data" + index + "\"" +
@@ -316,43 +275,48 @@ private Windmill.GetWorkResponse makeInput(int index, long timestamp) throws Exc
             CollectionCoder.of(IntervalWindow.getCoder()), Arrays.asList(DEFAULT_WINDOW)));
   }
 
-  private Windmill.WorkItemCommitRequest buildExpectedOutput(String output) throws Exception {
-    Windmill.WorkItemCommitRequest.Builder builder = Windmill.WorkItemCommitRequest.newBuilder();
+  /**
+   * Returns a {@link WorkItemCommitRequest.Builder} parsed from the provided text format proto.
+   */
+  private WorkItemCommitRequest.Builder parseCommitRequest(String output) throws Exception {
+    WorkItemCommitRequest.Builder builder = Windmill.WorkItemCommitRequest.newBuilder();
     TextFormat.merge(output, builder);
-    return builder.build();
+    return builder;
   }
 
-  private Windmill.WorkItemCommitRequest buildExpectedOutput(String output, byte[] metadata)
-      throws Exception {
-    Windmill.WorkItemCommitRequest.Builder builder = Windmill.WorkItemCommitRequest.newBuilder();
-    TextFormat.merge(output, builder);
+  /**
+   * Sets the metadata of the first contained message in this WorkItemCommitRequest
+   * (it should only have one message).
+   */
+  private WorkItemCommitRequest.Builder setMessagesMetadata(
+      byte[] metadata, WorkItemCommitRequest.Builder builder) throws Exception {
     if (metadata != null) {
       builder.getOutputMessagesBuilder(0)
           .getBundlesBuilder(0)
           .getMessagesBuilder(0)
           .setMetadata(ByteString.copyFrom(metadata));
     }
-    return builder.build();
+    return builder;
   }
 
   private Windmill.WorkItemCommitRequest makeExpectedOutput(int index, long timestamp, String key)
       throws Exception {
-    return buildExpectedOutput(
-        "key: \"key\" " +
-        "work_token: " + index + " " +
-        "output_messages {" +
-        "  destination_stream_id: \"out\"" +
-        "  bundles {" +
-        "    key: \"" + key + "\"" +
-        "    messages {" +
-        "      timestamp: " + timestamp +
-        "      data: \"data" + index + "\"" +
-        "      metadata: \"\"" +
-        "    }" +
-        "  }" +
-        "}",
-        CoderUtils.encodeToByteArray(CollectionCoder.of(IntervalWindow.getCoder()),
-                                     Arrays.asList(DEFAULT_WINDOW)));
+    return setMessagesMetadata(defaultWindowsBytes(),
+        parseCommitRequest(
+            "key: \"" + DEFAULT_KEY_STRING + "\" " +
+            "work_token: " + index + " " +
+            "output_messages {" +
+            "  destination_stream_id: \"" + DEFAULT_DESTINATION_STREAM_ID + "\"" +
+            "  bundles {" +
+            "    key: \"" + key + "\"" +
+            "    messages {" +
+            "      timestamp: " + timestamp +
+            "      data: \"" + dataStringForIndex(index) + "\"" +
+            "      metadata: \"\"" +
+            "    }" +
+            "  }" +
+            "}"))
+        .build();
   }
 
   private DataflowWorkerHarnessOptions createTestingPipelineOptions() {
@@ -367,14 +331,15 @@ private Windmill.WorkItemCommitRequest stripCounters(Windmill.WorkItemCommitRequ
     return Windmill.WorkItemCommitRequest.newBuilder(request).clearCounterUpdates().build();
   }
 
-  @Test public void testBasicHarness() throws Exception {
+  @Test
+  public void testBasicHarness() throws Exception {
     List<ParallelInstruction> instructions = Arrays.asList(
         makeSourceInstruction(StringUtf8Coder.of()),
         makeSinkInstruction(StringUtf8Coder.of(), 0));
 
     FakeWindmillServer server = new FakeWindmillServer();
     StreamingDataflowWorker worker = new StreamingDataflowWorker(
-        Arrays.asList(makeMapTask(instructions)), server, createTestingPipelineOptions());
+        Arrays.asList(defaultMapTask(instructions)), server, createTestingPipelineOptions());
     worker.start();
 
     final int numIters = 2000;
@@ -386,8 +351,8 @@ private Windmill.WorkItemCommitRequest stripCounters(Windmill.WorkItemCommitRequ
     worker.stop();
 
     for (int i = 0; i < numIters; ++i) {
-      Assert.assertTrue(result.containsKey((long) i));
-      Assert.assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i), "key"),
+      assertTrue(result.containsKey((long) i));
+      assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i), "key"),
                           stripCounters(result.get((long) i)));
     }
   }
@@ -402,7 +367,8 @@ public void processElement(ProcessContext c) {
     }
   }
 
-  @Test public void testKeyChange() throws Exception {
+  @Test
+  public void testKeyChange() throws Exception {
     KvCoder<String, String> kvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
 
     List<ParallelInstruction> instructions = Arrays.asList(
@@ -413,15 +379,18 @@ public void processElement(ProcessContext c) {
     FakeWindmillServer server = new FakeWindmillServer();
     server.addWorkToOffer(makeInput(0, 0));
     server.addWorkToOffer(makeInput(1, TimeUnit.MILLISECONDS.toMicros(1)));
+
     StreamingDataflowWorker worker = new StreamingDataflowWorker(
-        Arrays.asList(makeMapTask(instructions)), server, createTestingPipelineOptions());
+        Arrays.asList(defaultMapTask(instructions)), server, createTestingPipelineOptions());
     worker.start();
 
     Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(2);
 
-    Assert.assertEquals(makeExpectedOutput(0, 0, "key_data0"), stripCounters(result.get(0L)));
-    Assert.assertEquals(makeExpectedOutput(1, TimeUnit.MILLISECONDS.toMicros(1), "key_data1"),
-                        stripCounters(result.get(1L)));
+    assertThat(stripCounters(result.get(0L)),
+        equalTo(makeExpectedOutput(0, 0, "key_data0")));
+
+    assertThat(stripCounters(result.get(1L)),
+        equalTo(makeExpectedOutput(1, TimeUnit.MILLISECONDS.toMicros(1), "key_data1")));
   }
 
   static class TestExceptionFn extends DoFn<String, String> {
@@ -437,7 +406,8 @@ public void processElement(ProcessContext c) throws Exception {
     }
   }
 
-  @Test public void testExceptions() throws Exception {
+  @Test
+  public void testExceptions() throws Exception {
     List<ParallelInstruction> instructions = Arrays.asList(
         makeSourceInstruction(StringUtf8Coder.of()),
         makeDoFnInstruction(new TestExceptionFn(), 0, StringUtf8Coder.of()),
@@ -447,12 +417,12 @@ public void processElement(ProcessContext c) throws Exception {
     server.setExpectedExceptionCount(1);
     server.addWorkToOffer(buildInput(
         "work {" +
-        "  computation_id: \"computation\"" +
+        "  computation_id: \"" + DEFAULT_COMPUTATION_ID + "\"" +
         "  work {" +
-        "    key: \"key0\"" +
+        "    key: \"" + keyStringForIndex(0) + "\"" +
         "    work_token: 0" +
         "    message_bundles {" +
-        "      source_computation_id: \"upstream\"" +
+        "      source_computation_id: \"" + DEFAULT_SOURCE_COMPUTATION_ID + "\"" +
         "      messages {" +
         "        timestamp: 0" +
         "        data: \"0\"" +
@@ -464,91 +434,107 @@ public void processElement(ProcessContext c) throws Exception {
                                      Arrays.asList(DEFAULT_WINDOW))));
 
     StreamingDataflowWorker worker = new StreamingDataflowWorker(
-        Arrays.asList(makeMapTask(instructions)), server, createTestingPipelineOptions());
+        Arrays.asList(defaultMapTask(instructions)), server, createTestingPipelineOptions());
     worker.start();
 
     Windmill.Exception exception = server.getException();
 
-    Assert.assertThat(exception.getStackFrames(0),
-        JUnitMatchers.containsString("Another exception!"));
-    Assert.assertThat(exception.getStackFrames(1),
-        JUnitMatchers.containsString("processElement"));
-    Assert.assertTrue(exception.hasCause());
+    assertThat(exception.getStackFrames(0),
+        containsString("Another exception!"));
+    assertThat(exception.getStackFrames(1),
+        containsString("processElement"));
+    assertTrue(exception.hasCause());
 
-    Assert.assertThat(exception.getCause().getStackFrames(0),
-        JUnitMatchers.containsString("Exception!"));
-    Assert.assertThat(exception.getCause().getStackFrames(1),
-        JUnitMatchers.containsString("processElement"));
-    Assert.assertFalse(exception.getCause().hasCause());
+    assertThat(exception.getCause().getStackFrames(0),
+        containsString("Exception!"));
+    assertThat(exception.getCause().getStackFrames(1),
+        containsString("processElement"));
+    assertFalse(exception.getCause().hasCause());
 
     // The server should retry the work since reporting the exception succeeded.
     // Make next retry should fail because we only expected 1 exception.
     exception = server.getException();
   }
 
+  /**
+   * An {@link AssignWindowsDoFn} that does not actually assign windows to values,
+   * but extracts their timestamps, which are output in {@code KV} pairs on a fixed key.
+   *
+   * <p>The key and value of the input are both discarded.
+   */
   private static class TestTimerFn
       extends AssignWindowsDoFn<KV<String, String>, BoundedWindow> {
     private static final long serialVersionUID = 0;
 
-    public TestTimerFn() {
+    private final String key;
+
+    public TestTimerFn(String key) {
       super(null);
+      this.key = key;
     }
+
     @Override
     public void processElement(ProcessContext c) {
-      c.output(KV.of("key0", Long.toString(c.timestamp().getMillis())));
+      c.output(KV.of(key, Long.toString(c.timestamp().getMillis())));
     }
   }
 
-  @Test public void testTimers() throws Exception {
+  @Test
+  public void testTimers() throws Exception {
     KvCoder<String, String> kvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
 
+    long timestamp = 3000L;
+    String key = keyStringForIndex(0);
+
     List<ParallelInstruction> instructions = Arrays.asList(
         makeWindowingSourceInstruction(kvCoder),
-        makeDoFnInstruction(new TestTimerFn(), 0, kvCoder),
+        makeDoFnInstruction(new TestTimerFn(key), 0, kvCoder),
         makeSinkInstruction(kvCoder, 1));
 
     FakeWindmillServer server = new FakeWindmillServer();
 
     server.addWorkToOffer(buildTimerInput(
         "work {" +
-        "  computation_id: \"computation\"" +
+        "  computation_id: \"" + DEFAULT_COMPUTATION_ID + "\"" +
         "  work {" +
-        "    key: \"key0\"" +
+        "    key: \"" + key + "\"" +
         "    work_token: 0" +
         "    timers {" +
         "      timers {" +
         "        tag: \"tag\"" +
-        "        timestamp: 3000" +
+        "        timestamp: " + timestamp +
         "      }" +
         "    }" +
         "  }" +
         "}"));
 
     StreamingDataflowWorker worker = new StreamingDataflowWorker(
-        Arrays.asList(makeMapTask(instructions)), server, createTestingPipelineOptions());
+        Arrays.asList(defaultMapTask(instructions)), server, createTestingPipelineOptions());
     worker.start();
 
     Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
 
-    Assert.assertEquals(buildExpectedOutput(
-        "key: \"key0\" " +
-        "work_token: 0 " +
-        "output_messages {" +
-        "  destination_stream_id: \"out\"" +
-        "  bundles {" +
-        "    key: \"key0\"" +
-        "    messages {" +
-        "      timestamp: 3000" +
-        "      data: \"3\"" +
-        "    }" +
-        "  }" +
-        "} ",
-        CoderUtils.encodeToByteArray(CollectionCoder.of(IntervalWindow.getCoder()),
-                                     new ArrayList())),
-        stripCounters(result.get(0L)));
+    assertThat(
+        stripCounters(result.get(0L)),
+        equalTo(setMessagesMetadata(emptyWindowsBytes(),
+            parseCommitRequest(
+                "key: \"" + keyStringForIndex(0) + "\" " +
+                "work_token: 0 " +
+                "output_messages {" +
+                "  destination_stream_id: \"" + DEFAULT_DESTINATION_STREAM_ID + "\"" +
+                "  bundles {" +
+                "    key: \"" + keyStringForIndex(0) + "\"" +
+                "    messages {" +
+                "      timestamp: " + timestamp +
+                "      data: \"" + TimeUnit.MILLISECONDS.toSeconds(timestamp) + "\"" +
+                "    }" +
+                "  }" +
+                "} "))
+            .build()));
   }
 
-  @Test public void testAssignWindows() throws Exception {
+  @Test
+  public void testAssignWindows() throws Exception {
     Duration gapDuration = Duration.standardSeconds(1);
     CloudObject spec = CloudObject.forClassName("AssignWindowsDoFn");
     addString(spec, PropertyNames.SERIALIZED_FN,
@@ -575,54 +561,56 @@ public void processElement(ProcessContext c) {
 
     FakeWindmillServer server = new FakeWindmillServer();
 
-    server.addWorkToOffer(makeInput(0, 0));
-    server.addWorkToOffer(makeInput(1000000, 1000000));
+    int timestamp1 = 0;
+    int timestamp2 = 1000000;
+
+    server.addWorkToOffer(makeInput(timestamp1, timestamp1));
+    server.addWorkToOffer(makeInput(timestamp2, timestamp2));
 
     StreamingDataflowWorker worker = new StreamingDataflowWorker(
-        Arrays.asList(makeMapTask(instructions)), server, createTestingPipelineOptions());
+        Arrays.asList(defaultMapTask(instructions)), server, createTestingPipelineOptions());
     worker.start();
 
     Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(2);
 
-    Assert.assertEquals(buildExpectedOutput(
-        "key: \"key\" " +
-        "work_token: 0 " +
-        "output_messages {" +
-        "  destination_stream_id: \"out\"" +
-        "  bundles {" +
-        "    key: \"key\"" +
-        "    messages {" +
-        "      timestamp: 0" +
-        "      data: \"data0\"" +
-        "    }" +
-        "  }" +
-        "} ",
-        CoderUtils.encodeToByteArray(
-            CollectionCoder.of(IntervalWindow.getCoder()),
-            Arrays.asList(new IntervalWindow(new Instant(0), new Instant(1000))))),
-        stripCounters(result.get(0L)));
-
-    Windmill.WorkItemCommitRequest.Builder expected = buildExpectedOutput(
-        "key: \"key\" " +
-        "work_token: 1000000 " +
-        "output_messages {" +
-        "  destination_stream_id: \"out\"" +
-        "  bundles {" +
-        "    key: \"key\"" +
-        "    messages {" +
-        "      timestamp: 1000000" +
-        "      data: \"data1000000\"" +
-        "    }" +
-        "  }" +
-        "} ",
-        CoderUtils.encodeToByteArray(
-            CollectionCoder.of(IntervalWindow.getCoder()),
-            Arrays.asList(new IntervalWindow(new Instant(1000), new Instant(2000))))).toBuilder();
-
-    Assert.assertEquals(expected.build(), stripCounters(result.get(1000000L)));
+    assertThat(
+        stripCounters(result.get(0L)),
+        equalTo(setMessagesMetadata(windowAtZeroBytes(),
+            parseCommitRequest(
+                "key: \"" + DEFAULT_KEY_STRING + "\" " +
+                "work_token: 0 " +
+                "output_messages {" +
+                "  destination_stream_id: \"" + DEFAULT_DESTINATION_STREAM_ID + "\"" +
+                "  bundles {" +
+                "    key: \"" + DEFAULT_KEY_STRING + "\"" +
+                "    messages {" +
+                "      timestamp: 0" +
+                "      data: \"" + dataStringForIndex(timestamp1) + "\"" +
+                "    }" +
+                "  }" +
+                "} "))
+            .build()));
+
+    assertThat(stripCounters(result.get(1000000L)),
+        equalTo(setMessagesMetadata(windowAtOneSecondBytes(),
+            parseCommitRequest(
+                "key: \"" + DEFAULT_KEY_STRING + "\" " +
+                "work_token: 1000000 " +
+                "output_messages {" +
+                "  destination_stream_id: \"" + DEFAULT_DESTINATION_STREAM_ID + "\"" +
+                "  bundles {" +
+                "    key: \"" + DEFAULT_KEY_STRING + "\"" +
+                "    messages {" +
+                "      timestamp: " + timestamp2 +
+                "      data: \"" + dataStringForIndex(timestamp2) + "\"" +
+                "    }" +
+                "  }" +
+                "} "))
+            .build()));
   }
 
-  @Test public void testMergeWindows() throws Exception {
+  @Test
+  public void testMergeWindows() throws Exception {
     Coder<KV<String, String>> kvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
     Coder<WindowedValue<KV<String, String>>> windowedKvCoder =
         FullWindowedValueCoder.of(kvCoder, IntervalWindow.getCoder());
@@ -657,131 +645,154 @@ public void processElement(ProcessContext c) {
     FakeWindmillServer server = new FakeWindmillServer();
 
     StreamingDataflowWorker worker = new StreamingDataflowWorker(
-        Arrays.asList(makeMapTask(instructions)), server, createTestingPipelineOptions());
+        Arrays.asList(defaultMapTask(instructions)), server, createTestingPipelineOptions());
     worker.start();
 
     server.addWorkToOffer(buildInput(
         "work {" +
-        "  computation_id: \"computation\"" +
-        "  input_data_watermark: 0" +
-        "  work {" +
-        "    key: \"key\"" +
-        "    work_token: 0" +
-        "    message_bundles {" +
-        "      source_computation_id: \"upstream\"" +
-        "      messages {" +
-        "        timestamp: 0" +
-        "        data: \"data0\"" +
-        "      }" +
-        "    }" +
-        "  }" +
-        "}",
-        CoderUtils.encodeToByteArray(
-            CollectionCoder.of(IntervalWindow.getCoder()),
-            Arrays.asList(new IntervalWindow(new Instant(0), new Instant(1000))))));
+            "  computation_id: \"" + DEFAULT_COMPUTATION_ID + "\"" +
+            "  input_data_watermark: 0" +
+            "  work {" +
+            "    key: \"" + DEFAULT_KEY_STRING + "\"" +
+            "    work_token: 0" +
+            "    message_bundles {" +
+            "      source_computation_id: \"" + DEFAULT_SOURCE_COMPUTATION_ID + "\"" +
+            "      messages {" +
+            "        timestamp: 0" +
+            "        data: \"" + dataStringForIndex(0) + "\"" +
+            "      }" +
+            "    }" +
+            "  }" +
+            "}",
+            windowAtZeroBytes()));
 
     Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
 
-    Assert.assertEquals(buildExpectedOutput(
-        "key: \"key\" " +
-        "work_token: 0 " +
-        "output_timers {" +
-        "  tag: \"gAAAAAAAAAD_____Dw\"" +
-        "  timestamp: 1000000" +
-        "  type: WATERMARK" +
-        "}" +
-        "output_timers {" +
-        "  tag: \"gAAAAAAAAAAA\"" +
-        "  timestamp: 999000" +
-        "  type: WATERMARK" +
-        "} " +
-        "list_updates {" +
-        "  tag: \"12:MergeWindowsgAAAAAAAAAA/__buffer\"" +
-        "  values {" +
-        "    timestamp: 9223372036854775000" +
-        "    data: \"\000data0\"" +
-        "  }" +
-        "}" +
-        "list_updates {" +
-        "  tag: \"12:MergeWindowsgAAAAAAAAAA/__watermark_hold\"" +
-        "  values {" +
-        "    timestamp: 0" +
-        "    data: \"\000\\200\\000\\000\\000\\000\\000\\000\\000\"" +
-        "  }" +
-        "}"),
-        stripCounters(result.get(0L)));
+    // These tags and data are opaque strings and this is a change detector test.
+    String timer1Tag = "gAAAAAAAAAD_____Dw";
+    String timer2Tag = "gAAAAAAAAAAA";
+    String timer3Tag = "gAAAAAAAAAAAAAAA";
+    String bufferTag = "12:MergeWindowsgAAAAAAAAAA/__buffer";
+    String watermarkHoldTag = "12:MergeWindowsgAAAAAAAAAA/__watermark_hold";
+    String watermarkHoldData = "\000\\200\\000\\000\\000\\000\\000\\000\\000";
+    String bufferData = "\000data0";
+    String outputData = "\\377\\377\\377\\377\\001\\005data0\\000";
+
+    // These values are not essential to the change detector test
+    long timer1Timestamp = 1000000L;
+    long timer2Timestamp = 999000L;
+
+    WorkItemCommitRequest actualOutput = stripCounters(result.get(0L));
+
+    WorkItemCommitRequest expectedOutput = parseCommitRequest(
+        "key: \"" + DEFAULT_KEY_STRING + "\" " +
+            "work_token: 0 " +
+            "output_timers {" +
+            "  tag: \"" + timer1Tag + "\"" +
+            "  timestamp: " + timer1Timestamp +
+            "  type: WATERMARK" +
+            "}" +
+            "output_timers {" +
+            "  tag: \"" + timer2Tag + "\"" +
+            "  timestamp: " + timer2Timestamp +
+            "  type: WATERMARK" +
+            "} " +
+            "list_updates {" +
+            "  tag: \"" + bufferTag + "\"" +
+            "  values {" +
+            "    timestamp: 9223372036854775000" +
+            "    data: \"" + bufferData + "\"" +
+            "  }" +
+            "}" +
+            "list_updates {" +
+            "  tag: \"" + watermarkHoldTag + "\"" +
+            "  values {" +
+            "    timestamp: 0" +
+            "    data: \"" + watermarkHoldData + "\"" +
+            "  }" +
+        "}").build();
+
+    assertThat(actualOutput.getOutputTimersCount(), equalTo(expectedOutput.getOutputTimersCount()));
+    for (int i = 0; i < actualOutput.getOutputTimersCount(); i++) {
+      assertThat(actualOutput.getOutputTimers(i), equalTo(expectedOutput.getOutputTimers(i)));
+    }
+
+    assertThat(actualOutput.getListUpdatesCount(), equalTo(expectedOutput.getListUpdatesCount()));
+    for (int i = 0; i < actualOutput.getListUpdatesCount(); i++) {
+      assertThat(actualOutput.getListUpdates(i), equalTo(expectedOutput.getListUpdates(i)));
+    }
 
     server.addWorkToOffer(buildTimerInput(
         "work {" +
-        "  computation_id: \"computation\"" +
-        "  input_data_watermark: 0" +
-        "  work {" +
-        "    key: \"key\"" +
-        "    work_token: 1" +
-        "    timers {" +
-        "      timers {" +
-        "        tag: \"gAAAAAAAAAAAAAAA\"" +
-        "        timestamp: 999000" +
-        "      }" +
-        "    }" +
-        "  }" +
+            "  computation_id: \"" + DEFAULT_COMPUTATION_ID + "\"" +
+            "  input_data_watermark: 0" +
+            "  work {" +
+            "    key: \"" + DEFAULT_KEY_STRING + "\"" +
+            "    work_token: 1" +
+            "    timers {" +
+            "      timers {" +
+            "        tag: \"" + timer3Tag + "\"" +
+            "        timestamp: " + timer2Timestamp +
+            "      }" +
+            "    }" +
+            "  }" +
         "}"));
+
     server.addDataToOffer(buildData(
         "data {" +
-        "  computation_id: \"computation\"" +
-        "  data {" +
-        "    key: \"key\"" +
-        "    lists {" +
-        "      tag: \"12:MergeWindowsgAAAAAAAAAA/__watermark_hold\"" +
-        "      values {" +
-        "        timestamp: 0" +
-        "        data: \"\000\\200\\000\\000\\000\\000\\000\\000\\000\"" +
-        "      }" +
-        "    }" +
-        "  }" +
+            "  computation_id: \"" + DEFAULT_COMPUTATION_ID + "\"" +
+            "  data {" +
+            "    key: \"" + DEFAULT_KEY_STRING + "\"" +
+            "    lists {" +
+            "      tag: \"" + watermarkHoldTag + "\"" +
+            "      values {" +
+            "        timestamp: 0" +
+            "        data: \"" + watermarkHoldData + "\"" +
+            "      }" +
+            "    }" +
+            "  }" +
         "}"));
+
     server.addDataToOffer(buildData(
         "data {" +
-        "  computation_id: \"computation\"" +
-        "  data {" +
-        "    key: \"key\"" +
-        "    lists {" +
-        "      tag: \"12:MergeWindowsgAAAAAAAAAA/__buffer\"" +
-        "      values {" +
-        "        timestamp: 0" +
-        "        data: \"\000data0\"" +
-        "      }" +
-        "    }" +
-        "  }" +
+            "  computation_id: \"" + DEFAULT_COMPUTATION_ID + "\"" +
+            "  data {" +
+            "    key: \"" + DEFAULT_KEY_STRING + "\"" +
+            "    lists {" +
+            "      tag: \"" + bufferTag + "\"" +
+            "      values {" +
+            "        timestamp: 0" +
+            "        data: \"" + bufferData + "\"" +
+            "      }" +
+            "    }" +
+            "  }" +
         "}"));
 
     result = server.waitForAndGetCommits(1);
 
-    Assert.assertEquals(buildExpectedOutput(
-        "key: \"key\" " +
-        "work_token: 1 " +
-        "output_messages {" +
-        "  destination_stream_id: \"out\"" +
-        "  bundles {" +
-        "    key: \"key\"" +
-        "    messages {" +
-        "      timestamp: 0" +
-        "      data: \"\\377\\377\\377\\377\\001\\005data0\\000\"" +
-        "    }" +
-        "  }" +
-        "} " +
-        "list_updates {" +
-        "  tag: \"12:MergeWindowsgAAAAAAAAAA/__watermark_hold\"" +
-        "  end_timestamp: 9223372036854775807" +
-        "}" +
-        "list_updates {" +
-        "  tag: \"12:MergeWindowsgAAAAAAAAAA/__buffer\"" +
-        "  end_timestamp: 9223372036854775807" +
-        "}",
-        CoderUtils.encodeToByteArray(
-            CollectionCoder.of(IntervalWindow.getCoder()),
-            Arrays.asList(new IntervalWindow(new Instant(0), new Instant(1000))))),
-        stripCounters(result.get(1L)));
+    assertThat(stripCounters(result.get(1L)),
+        equalTo(setMessagesMetadata(windowAtZeroBytes(),
+            parseCommitRequest(
+                "key: \"" + DEFAULT_KEY_STRING + "\" " +
+                    "work_token: 1 " +
+                    "output_messages {" +
+                    "  destination_stream_id: \"" + DEFAULT_DESTINATION_STREAM_ID + "\"" +
+                    "  bundles {" +
+                    "    key: \"" + DEFAULT_KEY_STRING + "\"" +
+                    "    messages {" +
+                    "      timestamp: 0" +
+                    "      data: \"" + outputData + "\"" +
+                    "    }" +
+                    "  }" +
+                    "} " +
+                    "list_updates {" +
+                    "  tag: \"" + watermarkHoldTag + "\"" +
+                    "  end_timestamp: 9223372036854775807" +
+                    "}" +
+                    "list_updates {" +
+                    "  tag: \"" + bufferTag + "\"" +
+                    "  end_timestamp: 9223372036854775807" +
+                "}")).build()));
   }
 
   static class PrintFn extends DoFn<KV<Integer, Integer>, String> {
@@ -794,8 +805,9 @@ public void processElement(ProcessContext c) {
     }
   }
 
-  @Test public void testUnboundedSources() throws Exception {
-    List<Integer> finalizeTracker = new ArrayList<>();
+  @Test
+  public void testUnboundedSources() throws Exception {
+    List<Integer> finalizeTracker = Lists.newArrayList();
 
     DataflowPipelineOptions options =
         PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
@@ -824,7 +836,7 @@ public void processElement(ProcessContext c) {
 
     FakeWindmillServer server = new FakeWindmillServer();
     StreamingDataflowWorker worker = new StreamingDataflowWorker(
-        Arrays.asList(makeMapTask(instructions)), server, createTestingPipelineOptions());
+        Arrays.asList(defaultMapTask(instructions)), server, createTestingPipelineOptions());
     worker.start();
 
     // Test new key.
@@ -844,28 +856,29 @@ public void processElement(ProcessContext c) {
     UnsignedLong finalizeId =
         UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
 
-    Assert.assertEquals(buildExpectedOutput(
-        "key: \"0000000000000001\" " +
-        "work_token: 1 " +
-        "output_messages {" +
-        "  destination_stream_id: \"out\"" +
-        "  bundles {" +
-        "    key: \"0000000000000001\"" +
-        "    messages {" +
-        "      timestamp: 0" +
-        "      data: \"0:0\"" +
-        "    }" +
-        "  }" +
-        "} " +
-        "source_state_updates {" +
-        "  state: \"\000\"" +
-        "  finalize_ids: " + finalizeId +
-        "} " +
-        "source_watermark: 9223372036854775000",
-        CoderUtils.encodeToByteArray(
-            CollectionCoder.of(GlobalWindow.Coder.INSTANCE),
-            Arrays.asList(GlobalWindow.INSTANCE))),
-        commit);
+    assertThat(commit,
+        equalTo(setMessagesMetadata(
+            CoderUtils.encodeToByteArray(
+                CollectionCoder.of(GlobalWindow.Coder.INSTANCE),
+                Arrays.asList(GlobalWindow.INSTANCE)),
+            parseCommitRequest(
+                "key: \"0000000000000001\" " +
+                "work_token: 1 " +
+                "output_messages {" +
+                "  destination_stream_id: \"out\"" +
+                "  bundles {" +
+                "    key: \"0000000000000001\"" +
+                "    messages {" +
+                "      timestamp: 0" +
+                "      data: \"0:0\"" +
+                "    }" +
+                "  }" +
+                "} " +
+                "source_state_updates {" +
+                "  state: \"\000\"" +
+                "  finalize_ids: " + finalizeId +
+                "} " +
+                "source_watermark: 9223372036854775000")).build()));
 
     // Test same key continuing.
     server.addWorkToOffer(buildInput(
@@ -887,18 +900,17 @@ public void processElement(ProcessContext c) {
     commit = stripCounters(result.get(2L));
     finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
 
-    Assert.assertEquals(buildExpectedOutput(
-        "key: \"0000000000000001\" " +
-        "work_token: 2 " +
-        "source_state_updates {" +
-        "  state: \"\000\"" +
-        "  finalize_ids: " + finalizeId +
-        "} " +
-        "source_watermark: 9223372036854775000",
-        null),
-        commit);
+    assertThat(commit,
+        equalTo(parseCommitRequest(
+            "key: \"0000000000000001\" " +
+            "work_token: 2 " +
+            "source_state_updates {" +
+            "  state: \"\000\"" +
+            "  finalize_ids: " + finalizeId +
+            "} " +
+            "source_watermark: 9223372036854775000").build()));
 
-    Assert.assertThat(finalizeTracker, contains(0));
+    assertThat(finalizeTracker, contains(0));
 
     // Test recovery.
     server.addWorkToOffer(buildInput(
@@ -919,27 +931,28 @@ public void processElement(ProcessContext c) {
     commit = stripCounters(result.get(3L));
     finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
 
-    Assert.assertEquals(buildExpectedOutput(
-        "key: \"0000000000000002\" " +
-        "work_token: 3 " +
-        "output_messages {" +
-        "  destination_stream_id: \"out\"" +
-        "  bundles {" +
-        "    key: \"0000000000000002\"" +
-        "    messages {" +
-        "      timestamp: 5000" +
-        "      data: \"1:5\"" +
-        "    }" +
-        "  }" +
-        "} " +
-        "source_state_updates {" +
-        "  state: \"\005\"" +
-        "  finalize_ids: " + finalizeId +
-        "} " +
-        "source_watermark: 9223372036854775000",
-        CoderUtils.encodeToByteArray(
-            CollectionCoder.of(GlobalWindow.Coder.INSTANCE),
-            Arrays.asList(GlobalWindow.INSTANCE))),
-        commit);
+    assertThat(commit,
+        equalTo(setMessagesMetadata(
+            CoderUtils.encodeToByteArray(
+                CollectionCoder.of(GlobalWindow.Coder.INSTANCE),
+                Arrays.asList(GlobalWindow.INSTANCE)),
+            parseCommitRequest(
+                "key: \"0000000000000002\" " +
+                "work_token: 3 " +
+                "output_messages {" +
+                "  destination_stream_id: \"out\"" +
+                "  bundles {" +
+                "    key: \"0000000000000002\"" +
+                "    messages {" +
+                "      timestamp: 5000" +
+                "      data: \"1:5\"" +
+                "    }" +
+                "  }" +
+                "} " +
+                "source_state_updates {" +
+                "  state: \"\005\"" +
+                "  finalize_ids: " + finalizeId +
+                "} " +
+                "source_watermark: 9223372036854775000")).build()));
   }
 }

From af3403622bf40c4c54b452d0ce49d04b67f01fb7 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 26 Jun 2015 20:28:01 -0700
Subject: [PATCH 0682/1541] Serialize Proto2Coder via JSON, not Java
 serialization

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97024566
---
 .../dataflow/sdk/coders/Proto2Coder.java      | 204 ++++++++++++++----
 .../dataflow/sdk/coders/Proto2CoderTest.java  |  20 +-
 2 files changed, 184 insertions(+), 40 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
index 1a1f0b7662713..d021af08db8a0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
@@ -15,16 +15,28 @@
  */
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.Structs;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
 import com.google.protobuf.ExtensionRegistry;
 import com.google.protobuf.Message;
 import com.google.protobuf.Parser;
 
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.lang.reflect.InvocationTargetException;
-import java.util.ArrayList;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+import java.util.Collections;
 import java.util.List;
+import java.util.Objects;
 
 /**
  * An encoder using Google Protocol Buffers 2 binary format.
@@ -54,23 +66,65 @@
  *
  * @param <T> the type of elements handled by this coder, must extend {@code Message}
  */
-public class Proto2Coder<T extends Message> extends CustomCoder<T> {
+public class Proto2Coder<T extends Message> extends AtomicCoder<T> {
   private static final long serialVersionUID = 0;
 
+  /** The class of Protobuf message to be encoded. */
+  private final Class<T> protoMessageClass;
+
   /**
-   * Produces a new Proto2Coder instance, for a given Protobuf message class.
+   * All extension host classes included in this Proto2Coder. The extensions from
+   * these classes will be included in the {@link ExtensionRegistry} used during
+   * encoding and decoding.
+   */
+  private final List<Class<?>> extensionHostClasses;
+
+  private Proto2Coder(Class<T> protoMessageClass, List<Class<?>> extensionHostClasses) {
+    this.protoMessageClass = protoMessageClass;
+    this.extensionHostClasses = extensionHostClasses;
+  }
+
+  /**
+   * Returns a {@code Proto2Coder} for the given Protobuf message class.
    */
   public static <T extends Message> Proto2Coder<T> of(Class<T> protoMessageClass) {
-    return new Proto2Coder<>(protoMessageClass);
+    return new Proto2Coder<>(protoMessageClass, Collections.<Class<?>>emptyList());
   }
 
-  private final Class<?> protoMessageClass;
-  private final List<Class<?>> extensionClassList = new ArrayList<>();
-  private transient Parser<T> parser;
-  private transient ExtensionRegistry extensionRegistry;
+  /**
+   * Produces a {@code Proto2Coder} like this one, but with the extensions from
+   * the given classes registered.
+   *
+   * @param moreExtensionHosts an iterable of classes that define a static
+   *      method {@code registerAllExtensions(ExtensionRegistry)}
+   */
+  public Proto2Coder<T> withExtensionsFrom(Iterable<Class<?>> moreExtensionHosts) {
+    for (Class<?> extensionHost : moreExtensionHosts) {
+      // Attempt to access the required method, to make sure it's present.
+      try {
+        Method registerAllExtensions = extensionHost.getDeclaredMethod(
+            "registerAllExtensions", ExtensionRegistry.class);
+        Preconditions.checkArgument(
+            0 != (registerAllExtensions.getModifiers() | Modifier.STATIC),
+            "Method registerAllExtensions() must be static for use with Proto2Coder");
+      } catch (NoSuchMethodException | SecurityException e) {
+        throw new IllegalArgumentException(e);
+      }
+    }
 
-  Proto2Coder(Class<T> protoMessageClass) {
-    this.protoMessageClass = protoMessageClass;
+    return new Proto2Coder<T>(
+        protoMessageClass,
+        new ImmutableList.Builder<Class<?>>()
+            .addAll(extensionHostClasses)
+            .addAll(moreExtensionHosts)
+            .build());
+  }
+
+  /**
+   * See {@link #withExtensionsFrom(Iterable)}.
+   */
+  public Proto2Coder<T> withExtensionsFrom(Class<?>... extensionHosts) {
+    return withExtensionsFrom(ImmutableList.copyOf(extensionHosts));
   }
 
   /**
@@ -79,8 +133,23 @@ public static <T extends Message> Proto2Coder<T> of(Class<T> protoMessageClass)
    *
    * @param extensionHosts must be a class that defines a static
    *      method name {@code registerAllExtensions}
+   * @deprecated use {@link #withExtensionsFrom}
    */
+  @Deprecated
   public Proto2Coder<T> addExtensionsFrom(Class<?>... extensionHosts) {
+    return addExtensionsFrom(ImmutableList.copyOf(extensionHosts));
+  }
+
+  /**
+   * Adds custom Protobuf extensions to the coder. Returns {@code this}
+   * for method chaining.
+   *
+   * @param extensionHosts must be a class that defines a static
+   *      method name {@code registerAllExtensions}
+   * @deprecated use {@link #withExtensionsFrom}
+   */
+  @Deprecated
+  public Proto2Coder<T> addExtensionsFrom(Iterable<Class<?>> extensionHosts) {
     for (Class<?> extensionHost : extensionHosts) {
       try {
         // Attempt to access the declared method, to make sure it's present.
@@ -89,7 +158,7 @@ public Proto2Coder<T> addExtensionsFrom(Class<?>... extensionHosts) {
       } catch (NoSuchMethodException e) {
         throw new IllegalArgumentException(e);
       }
-      extensionClassList.add(extensionHost);
+      extensionHostClasses.add(extensionHost);
     }
     return this;
   }
@@ -112,41 +181,102 @@ public T decode(InputStream inStream, Context context) throws IOException {
     }
   }
 
-  private Parser<T> getParser() {
-    if (parser != null) {
-      return parser;
+  @Override
+  public boolean equals(Object other) {
+    if (this == other) {
+      return true;
     }
-    try {
-      @SuppressWarnings("unchecked")
-      T protoMessageInstance = (T) protoMessageClass
-          .getMethod("getDefaultInstance").invoke(null);
-      @SuppressWarnings("unchecked")
-      Parser<T> tParser = (Parser<T>) protoMessageInstance.getParserForType();
-      parser = tParser;
-    } catch (IllegalAccessException
-        | InvocationTargetException
-        | NoSuchMethodException e) {
-      throw new IllegalArgumentException(e);
+    if (!(other instanceof Proto2Coder)) {
+      return false;
     }
-    return parser;
+    @SuppressWarnings("unchecked")
+    Proto2Coder<?> otherCoder = (Proto2Coder<?>) other;
+    return protoMessageClass.equals(otherCoder.protoMessageClass)
+        && Sets.newHashSet(extensionHostClasses)
+            .equals(Sets.newHashSet(otherCoder.extensionHostClasses));
   }
 
-  private ExtensionRegistry getExtensionRegistry() {
-    if (extensionRegistry != null) {
-      return extensionRegistry;
-    }
-    extensionRegistry = ExtensionRegistry.newInstance();
-    for (Class<?> extensionHost : extensionClassList) {
+  @Override
+  public int hashCode() {
+    return Objects.hash(protoMessageClass, extensionHostClasses);
+  }
+
+  private transient Parser<T> memoizedParser;
+
+  private Parser<T> getParser() {
+    if (memoizedParser == null) {
       try {
-        extensionHost
-            .getDeclaredMethod("registerAllExtensions", ExtensionRegistry.class)
-            .invoke(null, extensionRegistry);
+        @SuppressWarnings("unchecked")
+        T protoMessageInstance = (T) protoMessageClass
+            .getMethod("getDefaultInstance").invoke(null);
+        @SuppressWarnings("unchecked")
+        Parser<T> tParser = (Parser<T>) protoMessageInstance.getParserForType();
+        memoizedParser = tParser;
       } catch (IllegalAccessException
           | InvocationTargetException
           | NoSuchMethodException e) {
-        throw new IllegalStateException(e);
+        throw new IllegalArgumentException(e);
+      }
+    }
+    return memoizedParser;
+  }
+
+  private transient ExtensionRegistry memoizedExtensionRegistry;
+
+  private ExtensionRegistry getExtensionRegistry() {
+    if (memoizedExtensionRegistry == null) {
+      memoizedExtensionRegistry = ExtensionRegistry.newInstance();
+      for (Class<?> extensionHost : extensionHostClasses) {
+        try {
+          extensionHost
+              .getDeclaredMethod("registerAllExtensions", ExtensionRegistry.class)
+              .invoke(null, memoizedExtensionRegistry);
+        } catch (IllegalAccessException
+            | InvocationTargetException
+            | NoSuchMethodException e) {
+          throw new IllegalStateException(e);
+        }
       }
     }
-    return extensionRegistry;
+    return memoizedExtensionRegistry;
+  }
+
+  ////////////////////////////////////////////////////////////////////////////////////
+  // JSON Serialization details below
+
+  private static final String PROTO_MESSAGE_CLASS = "proto_message_class";
+  private static final String PROTO_EXTENSION_HOSTS = "proto_extension_hosts";
+
+  /**
+   * Constructor for JSON deserialization only.
+   */
+  @JsonCreator
+  public static <T extends Message> Proto2Coder<T> of(
+      @JsonProperty(PROTO_MESSAGE_CLASS) String protoMessageClassName,
+      @JsonProperty(PROTO_EXTENSION_HOSTS) List<String> extensionHostClassNames) {
+
+    try {
+      @SuppressWarnings("unchecked")
+      Class<T> protoMessageClass = (Class<T>) Class.forName(protoMessageClassName);
+      List<Class<?>> extensionHostClasses = Lists.newArrayList();
+      for (String extensionHostClassName : extensionHostClassNames) {
+        extensionHostClasses.add(Class.forName(extensionHostClassName));
+      }
+      return of(protoMessageClass).withExtensionsFrom(extensionHostClasses);
+    } catch (ClassNotFoundException e) {
+      throw new IllegalArgumentException(e);
+    }
+  }
+
+  @Override
+  public CloudObject asCloudObject() {
+    CloudObject result = super.asCloudObject();
+    Structs.addString(result, PROTO_MESSAGE_CLASS, protoMessageClass.getName());
+    List<CloudObject> extensionHostClassNames = Lists.newArrayList();
+    for (Class<?> clazz : extensionHostClasses) {
+      extensionHostClassNames.add(CloudObject.forString(clazz.getName()));
+    }
+    Structs.addList(result, PROTO_EXTENSION_HOSTS, extensionHostClassNames);
+    return result;
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
index 39876cdaff08c..490b034087a06 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
@@ -60,7 +60,8 @@ public void testCoderEncodeDecodeEqualNestedContext() throws Exception {
         .addField2(MessageB.newBuilder()
             .setField1(true).build())
         .build();
-    CoderProperties.coderDecodeEncodeEqual(ListCoder.of(Proto2Coder.of(MessageA.class)),
+    CoderProperties.coderDecodeEncodeEqual(
+        ListCoder.of(Proto2Coder.of(MessageA.class)),
         ImmutableList.of(value1, value2));
   }
 
@@ -80,7 +81,20 @@ public void testCoderEncodeDecodeExtensionsEqual() throws Exception {
             .build())
         .build();
     CoderProperties.coderDecodeEncodeEqual(
-        Proto2Coder.of(MessageC.class)
-        .addExtensionsFrom(Proto2CoderTestMessages.class), value);
+        Proto2Coder.of(MessageC.class).withExtensionsFrom(Proto2CoderTestMessages.class),
+        value);
+  }
+
+  @Test
+  public void testCoderSerialization() throws Exception {
+    Proto2Coder<MessageA> coder = Proto2Coder.of(MessageA.class);
+    CoderProperties.coderSerializable(coder);
+  }
+
+  @Test
+  public void testCoderExtensionsSerialization() throws Exception {
+    Proto2Coder<MessageC> coder = Proto2Coder.of(MessageC.class)
+        .withExtensionsFrom(Proto2CoderTestMessages.class);
+    CoderProperties.coderSerializable(coder);
   }
 }

From 49516a2abb5e044bc3993dea13085268c4c0ca4a Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 29 Jun 2015 12:20:34 -0700
Subject: [PATCH 0683/1541] Remove FixedIntervalWindowCoder

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97153413
---
 .../transforms/windowing/FixedWindows.java    |  2 +-
 .../transforms/windowing/IntervalWindow.java  | 59 -------------------
 .../transforms/windowing/SlidingWindows.java  |  2 +-
 .../worker/DataflowSideInputReaderTest.java   | 31 +++++++---
 .../worker/StreamingDataflowWorkerTest.java   | 11 ++--
 .../sdk/testing/PCollectionViewTesting.java   | 18 +++++-
 .../StreamingSideInputDoFnRunnerTest.java     |  6 +-
 7 files changed, 50 insertions(+), 79 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
index 9910837345ac5..d473bf398204c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
@@ -82,7 +82,7 @@ public IntervalWindow assignWindow(Instant timestamp) {
 
   @Override
   public Coder<IntervalWindow> windowCoder() {
-    return IntervalWindow.getFixedSizeCoder(size);
+    return IntervalWindow.getCoder();
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
index 80a218ad032c3..89fb09edb190c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
@@ -16,18 +16,12 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudDuration;
-import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudDuration;
-
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -169,15 +163,6 @@ public static Coder<IntervalWindow> getCoder() {
     return IntervalWindowCoder.of();
   }
 
-  /**
-   * Returns a Coder for encoding interval windows of fixed size (which
-   * is more efficient than {@link #getCoder()} as it only needs to encode
-   * one endpoint).
-   */
-  public static Coder<IntervalWindow> getFixedSizeCoder(final Duration size) {
-    return FixedSizeIntervalWindowCoder.of(size);
-  }
-
   @SuppressWarnings("serial")
   private static class IntervalWindowCoder extends AtomicCoder<IntervalWindow> {
     private static final IntervalWindowCoder INSTANCE =
@@ -206,48 +191,4 @@ public IntervalWindow decode(InputStream inStream, Context context)
       return new IntervalWindow(start, end);
     }
   }
-
-  @SuppressWarnings("serial")
-  private static class FixedSizeIntervalWindowCoder
-      extends AtomicCoder<IntervalWindow> {
-    private static final Coder<Instant> instantCoder = InstantCoder.of();
-
-    private final Duration size;
-
-    @JsonCreator
-    public static FixedSizeIntervalWindowCoder of(
-        @JsonProperty("duration") String duration) {
-      return of(fromCloudDuration(duration));
-    }
-
-    public static FixedSizeIntervalWindowCoder of(Duration size) {
-      return new FixedSizeIntervalWindowCoder(size);
-    }
-
-    private FixedSizeIntervalWindowCoder(Duration size) {
-      this.size = size;
-    }
-
-    @Override
-    public void encode(IntervalWindow window,
-                       OutputStream outStream,
-                       Context context)
-        throws IOException, CoderException {
-      instantCoder.encode(window.start, outStream, context);
-    }
-
-    @Override
-    public IntervalWindow decode(InputStream inStream, Context context)
-        throws IOException, CoderException {
-      Instant start = instantCoder.decode(inStream, context);
-      return new IntervalWindow(start, size);
-    }
-
-    @Override
-    public CloudObject asCloudObject() {
-      CloudObject result = super.asCloudObject();
-      addString(result, "duration", toCloudDuration(Duration.millis(size.getMillis())));
-      return result;
-    }
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
index 09ce7043510b3..6206401a79c41 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
@@ -101,7 +101,7 @@ private SlidingWindows(Duration period, Duration size, Duration offset) {
 
   @Override
   public Coder<IntervalWindow> windowCoder() {
-    return IntervalWindow.getFixedSizeCoder(size);
+    return IntervalWindow.getCoder();
   }
 
   @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
index 6eb985658a76f..c2ce7dd817e7d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
@@ -28,10 +28,10 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.Sized;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.collect.Lists;
@@ -50,7 +50,24 @@
 @RunWith(JUnit4.class)
 public class DataflowSideInputReaderTest {
 
-  private static final long WINDOWED_BIG_ENDIAN_LONG_BYTES = 28L;
+  private static final Coder<Long> LONG_CODER = BigEndianLongCoder.of();
+
+  /**
+   * The size, in bytes, of a {@code long} placed in
+   * {@link PCollectionViewTesting#DEFAULT_NONEMPTY_WINDOW}. This is the size of each of the
+   * elements of each {@link PCollection} created in the following tests.
+   *
+   * <p>This value is arbitrary from the point of view of these tests.
+   * The correctness of {@link DataflowSideInputReader} does not depend on this value,
+   * but depends on the fact that the reported sizes are this value times the number
+   * of elements in a collection.
+   */
+  private long windowedLongBytes() throws Exception {
+    long arbitraryLong = 42L;
+    return CoderUtils.encodeToByteArray(
+        PCollectionViewTesting.defaultWindowedValueCoder(LONG_CODER),
+        PCollectionViewTesting.valueInDefaultWindow(arbitraryLong)).length;
+  }
 
   /**
    * Creates a {@link Source} descriptor for reading the provided contents as a side input.
@@ -85,7 +102,7 @@ public void testDataflowSideInputReaderGoodRead() throws Exception {
     ExecutionContext executionContext = DataflowExecutionContext.withoutSideInputs();
     TupleTag<Iterable<WindowedValue<Long>>> tag = new TupleTag<>();
     PCollectionView<Long> view = PCollectionViewTesting.<Long, Long>testingView(
-        tag, new PCollectionViewTesting.LengthViewFn<Long>(), BigEndianLongCoder.of());
+        tag, new PCollectionViewTesting.LengthViewFn<Long>(), LONG_CODER);
 
     SideInputInfo sideInputInfo = SideInputUtils.createCollectionSideInputInfo(
         sourceInDefaultWindow(view, 1L, -43255L, 0L, 13L, 1975858L));
@@ -100,19 +117,19 @@ public void testDataflowSideInputReaderGoodRead() throws Exception {
     Sized<Long> sizedValue = sideInputReader.getSized(
         view, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
     assertThat(sizedValue.getValue(), equalTo(5L));
-    assertThat(sizedValue.getSize(), equalTo(5 * WINDOWED_BIG_ENDIAN_LONG_BYTES));
+    assertThat(sizedValue.getSize(), equalTo(5 * windowedLongBytes()));
 
     // A repeated read should yield the same size
     Sized<Long> repeatedRead = sideInputReader.getSized(
         view, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
     assertThat(repeatedRead.getValue(), equalTo(5L));
-    assertThat(sizedValue.getSize(), equalTo(5 * WINDOWED_BIG_ENDIAN_LONG_BYTES));
+    assertThat(sizedValue.getSize(), equalTo(5 * windowedLongBytes()));
 
     // Reading an empty window still yields the same size, for now
     Sized<Long> emptyWindowValue = sideInputReader.getSized(
         view, PCollectionViewTesting.DEFAULT_EMPTY_WINDOW);
     assertThat(emptyWindowValue.getValue(), equalTo(0L));
-    assertThat(emptyWindowValue.getSize(), equalTo(5 * WINDOWED_BIG_ENDIAN_LONG_BYTES));
+    assertThat(emptyWindowValue.getSize(), equalTo(5 * windowedLongBytes()));
   }
 
   /**
@@ -125,7 +142,7 @@ public void testDataflowSideInputReaderBadRead() throws Exception {
     ExecutionContext executionContext = DataflowExecutionContext.withoutSideInputs();
     TupleTag<Iterable<WindowedValue<Long>>> tag = new TupleTag<>();
     PCollectionView<Long> view = PCollectionViewTesting.testingView(
-        tag, new PCollectionViewTesting.LengthViewFn<Long>(), BigEndianLongCoder.of());
+        tag, new PCollectionViewTesting.LengthViewFn<Long>(), LONG_CODER);
 
     SideInputInfo sideInputInfo = SideInputUtils.createCollectionSideInputInfo(
         sourceInDefaultWindow(view, 1L, -43255L, 0L, 13L, 1975858L));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 4b2a38f181d8a..5e09a2fcb0acf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -86,7 +86,6 @@
 /** Unit tests for {@link StreamingDataflowWorker}. */
 @RunWith(JUnit4.class)
 public class StreamingDataflowWorkerTest {
-
   private static final IntervalWindow DEFAULT_WINDOW =
       new IntervalWindow(new Instant(1234), new Duration(1000));
 
@@ -669,11 +668,11 @@ public void testMergeWindows() throws Exception {
     Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
 
     // These tags and data are opaque strings and this is a change detector test.
-    String timer1Tag = "gAAAAAAAAAD_____Dw";
-    String timer2Tag = "gAAAAAAAAAAA";
-    String timer3Tag = "gAAAAAAAAAAAAAAA";
-    String bufferTag = "12:MergeWindowsgAAAAAAAAAA/__buffer";
-    String watermarkHoldTag = "12:MergeWindowsgAAAAAAAAAA/__watermark_hold";
+    String timer1Tag = "gAAAAAAAAACAAAAAAAAD6P____8P";
+    String timer2Tag = "gAAAAAAAAACAAAAAAAAD6AA";
+    String timer3Tag = "gAAAAAAAAACAAAAAAAAD6AAAAAA";
+    String bufferTag = "12:MergeWindowsgAAAAAAAAACAAAAAAAAD6A/__buffer";
+    String watermarkHoldTag = "12:MergeWindowsgAAAAAAAAACAAAAAAAAD6A/__watermark_hold";
     String watermarkHoldData = "\000\\200\\000\\000\\000\\000\\000\\000\\000";
     String bufferData = "\000data0";
     String outputData = "\\377\\377\\377\\377\\001\\005data0\\000";
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java
index e8beabc3ae87b..eb8d74f4bf4fc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java
@@ -160,6 +160,14 @@ public static <ElemT, ViewT> PCollectionView<ViewT> testingView(
         DEFAULT_WINDOWING_STRATEGY);
   }
 
+  /**
+   * The default {@link Coder} used for windowed values, given an element {@link Coder}.
+   */
+  public static <T> Coder<WindowedValue<T>> defaultWindowedValueCoder(Coder<T> elemCoder) {
+    return WindowedValue.getFullCoder(
+        elemCoder, DEFAULT_WINDOWING_STRATEGY.getWindowFn().windowCoder());
+  }
+
   /**
    * A {@link PCollectionView} explicitly built from its {@link TupleTag},
    * {@link WindowingStrategy}, {@link Coder}, and conversion function.
@@ -184,6 +192,13 @@ public static <ElemT, ViewT> PCollectionView<ViewT> testingView(
             WindowedValue.getFullCoder(elemCoder, windowingStrategy.getWindowFn().windowCoder())));
   }
 
+  /**
+   * Places the given {@code value} in the {@link #DEFAULT_NONEMPTY_WINDOW}.
+   */
+  public static <T> WindowedValue<T> valueInDefaultWindow(T value) {
+    return WindowedValue.of(value, DEFAULT_TIMESTAMP, DEFAULT_NONEMPTY_WINDOW);
+  }
+
   /**
    * Prepares {@code values} for reading as the contents of a {@link PCollectionView} side input.
    */
@@ -191,12 +206,11 @@ public static <T> Iterable<WindowedValue<T>> contentsInDefaultWindow(T... values
       throws Exception {
     List<WindowedValue<T>> windowedValues = Lists.newArrayList();
     for (T value : values) {
-      windowedValues.add(WindowedValue.of(value, DEFAULT_TIMESTAMP, DEFAULT_NONEMPTY_WINDOW));
+      windowedValues.add(valueInDefaultWindow(value));
     }
     return windowedValues;
   }
 
-  ///////////////////////////////////////////////////////////////////////////////
   // Internal details below here
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
index b288a2db50b41..9bd88c457fdba 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
@@ -136,7 +136,7 @@ public void testSideInputNotReady() throws Exception {
                 .setDataId(Windmill.GlobalDataId.newBuilder()
                     .setTag(view.getTagInternal().getId())
                     .setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(
-                        IntervalWindow.getFixedSizeCoder(Duration.millis(10)), window)))
+                        IntervalWindow.getCoder(), window)))
                     .build())
                 .setExistenceWatermarkDeadline(9000)
                 .build()))));
@@ -150,7 +150,7 @@ public void testSideInputNotification() throws Exception {
     Windmill.GlobalDataId id = Windmill.GlobalDataId.newBuilder()
         .setTag(view.getTagInternal().getId())
         .setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(
-            IntervalWindow.getFixedSizeCoder(Duration.millis(10)), window)))
+            IntervalWindow.getCoder(), window)))
         .build();
 
     Set<Windmill.GlobalDataRequest> requestSet = new HashSet<>();
@@ -215,7 +215,7 @@ public void testMultipleSideInputs() throws Exception {
     Windmill.GlobalDataId id = Windmill.GlobalDataId.newBuilder()
         .setTag(view1.getTagInternal().getId())
         .setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(
-            IntervalWindow.getFixedSizeCoder(Duration.millis(10)), window)))
+            IntervalWindow.getCoder(), window)))
         .build();
 
     Set<Windmill.GlobalDataRequest> requestSet = new HashSet<>();

From 3904f980aaf244a8a53887b74998be54d3286496 Mon Sep 17 00:00:00 2001
From: amyu <amyu@google.com>
Date: Mon, 29 Jun 2015 13:43:06 -0700
Subject: [PATCH 0684/1541] Cookbook examples reorganization

----Release Notes----
Cookbook examples re-organized.
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97160391
---
 .../examples/{ => cookbook}/CombinePerKeyExamples.java      | 2 +-
 .../dataflow/examples/{ => cookbook}/DeDupExample.java      | 2 +-
 .../dataflow/examples/{ => cookbook}/FilterExamples.java    | 2 +-
 .../dataflow/examples/{ => cookbook}/JoinExamples.java      | 3 +--
 .../dataflow/examples/{ => cookbook}/MaxPerKeyExamples.java | 2 +-
 .../examples/{ => cookbook}/CombinePerKeyExamplesTest.java  | 6 +++---
 .../dataflow/examples/{ => cookbook}/DeDupExampleTest.java  | 2 +-
 .../examples/{ => cookbook}/FilterExamplesTest.java         | 6 +++---
 .../dataflow/examples/{ => cookbook}/JoinExamplesTest.java  | 6 +++---
 .../examples/{ => cookbook}/MaxPerKeyExamplesTest.java      | 6 +++---
 10 files changed, 18 insertions(+), 19 deletions(-)
 rename examples/src/main/java/com/google/cloud/dataflow/examples/{ => cookbook}/CombinePerKeyExamples.java (99%)
 rename examples/src/main/java/com/google/cloud/dataflow/examples/{ => cookbook}/DeDupExample.java (98%)
 rename examples/src/main/java/com/google/cloud/dataflow/examples/{ => cookbook}/FilterExamples.java (99%)
 rename examples/src/main/java/com/google/cloud/dataflow/examples/{ => cookbook}/JoinExamples.java (98%)
 rename examples/src/main/java/com/google/cloud/dataflow/examples/{ => cookbook}/MaxPerKeyExamples.java (99%)
 rename examples/src/test/java/com/google/cloud/dataflow/examples/{ => cookbook}/CombinePerKeyExamplesTest.java (93%)
 rename examples/src/test/java/com/google/cloud/dataflow/examples/{ => cookbook}/DeDupExampleTest.java (97%)
 rename examples/src/test/java/com/google/cloud/dataflow/examples/{ => cookbook}/FilterExamplesTest.java (92%)
 rename examples/src/test/java/com/google/cloud/dataflow/examples/{ => cookbook}/JoinExamplesTest.java (95%)
 rename examples/src/test/java/com/google/cloud/dataflow/examples/{ => cookbook}/MaxPerKeyExamplesTest.java (93%)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java
similarity index 99%
rename from examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java
rename to examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java
index 4b7a98f4038b3..da975ff34b0e1 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.cookbook;
 
 import com.google.api.services.bigquery.model.TableFieldSchema;
 import com.google.api.services.bigquery.model.TableRow;
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java
similarity index 98%
rename from examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java
rename to examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java
index 748baf7ac62ca..837ef647f91cc 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.cookbook;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.io.TextIO;
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java
similarity index 99%
rename from examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
rename to examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java
index cbdfcd7a78a55..df041e8c51874 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.cookbook;
 
 import com.google.api.services.bigquery.model.TableFieldSchema;
 import com.google.api.services.bigquery.model.TableRow;
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java
similarity index 98%
rename from examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
rename to examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java
index b62625473c107..eb0b9423e5f7b 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.cookbook;
 
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.cloud.dataflow.sdk.Pipeline;
@@ -103,7 +103,6 @@ static PCollection<String> joinEvents(PCollection<TableRow> eventsTable,
           @Override
           public void processElement(ProcessContext c) {
             KV<String, CoGbkResult> e = c.element();
-            CoGbkResult val = e.getValue();
             String countryCode = e.getKey();
             String countryName = "none";
             countryName = e.getValue().getOnly(countryInfoTag);
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java
similarity index 99%
rename from examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java
rename to examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java
index f02f7250c8016..d8b5fa6570cc6 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.cookbook;
 
 import com.google.api.services.bigquery.model.TableFieldSchema;
 import com.google.api.services.bigquery.model.TableRow;
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/CombinePerKeyExamplesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamplesTest.java
similarity index 93%
rename from examples/src/test/java/com/google/cloud/dataflow/examples/CombinePerKeyExamplesTest.java
rename to examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamplesTest.java
index f98d5c3a38490..ad872a43ed089 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/CombinePerKeyExamplesTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamplesTest.java
@@ -14,11 +14,11 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.cookbook;
 
 import com.google.api.services.bigquery.model.TableRow;
-import com.google.cloud.dataflow.examples.CombinePerKeyExamples.ExtractLargeWordsFn;
-import com.google.cloud.dataflow.examples.CombinePerKeyExamples.FormatShakespeareOutputFn;
+import com.google.cloud.dataflow.examples.cookbook.CombinePerKeyExamples.ExtractLargeWordsFn;
+import com.google.cloud.dataflow.examples.cookbook.CombinePerKeyExamples.FormatShakespeareOutputFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
 import com.google.cloud.dataflow.sdk.values.KV;
 
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/DeDupExampleTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/DeDupExampleTest.java
similarity index 97%
rename from examples/src/test/java/com/google/cloud/dataflow/examples/DeDupExampleTest.java
rename to examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/DeDupExampleTest.java
index 45c0151148a8b..d648829793097 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/DeDupExampleTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/DeDupExampleTest.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.cookbook;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/FilterExamplesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/FilterExamplesTest.java
similarity index 92%
rename from examples/src/test/java/com/google/cloud/dataflow/examples/FilterExamplesTest.java
rename to examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/FilterExamplesTest.java
index ec858f3fcf884..6d822f9805199 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/FilterExamplesTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/FilterExamplesTest.java
@@ -14,11 +14,11 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.cookbook;
 
 import com.google.api.services.bigquery.model.TableRow;
-import com.google.cloud.dataflow.examples.FilterExamples.FilterSingleMonthDataFn;
-import com.google.cloud.dataflow.examples.FilterExamples.ProjectionFn;
+import com.google.cloud.dataflow.examples.cookbook.FilterExamples.FilterSingleMonthDataFn;
+import com.google.cloud.dataflow.examples.cookbook.FilterExamples.ProjectionFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
 
 import org.hamcrest.CoreMatchers;
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/JoinExamplesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/JoinExamplesTest.java
similarity index 95%
rename from examples/src/test/java/com/google/cloud/dataflow/examples/JoinExamplesTest.java
rename to examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/JoinExamplesTest.java
index ae8292c75efbb..db3ae34e7dab0 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/JoinExamplesTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/JoinExamplesTest.java
@@ -14,11 +14,11 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.cookbook;
 
 import com.google.api.services.bigquery.model.TableRow;
-import com.google.cloud.dataflow.examples.JoinExamples.ExtractCountryInfoFn;
-import com.google.cloud.dataflow.examples.JoinExamples.ExtractEventDataFn;
+import com.google.cloud.dataflow.examples.cookbook.JoinExamples.ExtractCountryInfoFn;
+import com.google.cloud.dataflow.examples.cookbook.JoinExamples.ExtractEventDataFn;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/MaxPerKeyExamplesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamplesTest.java
similarity index 93%
rename from examples/src/test/java/com/google/cloud/dataflow/examples/MaxPerKeyExamplesTest.java
rename to examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamplesTest.java
index 8f189f1636a59..539012d6fbcbf 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/MaxPerKeyExamplesTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamplesTest.java
@@ -14,11 +14,11 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.cookbook;
 
 import com.google.api.services.bigquery.model.TableRow;
-import com.google.cloud.dataflow.examples.MaxPerKeyExamples.ExtractTempFn;
-import com.google.cloud.dataflow.examples.MaxPerKeyExamples.FormatMaxesFn;
+import com.google.cloud.dataflow.examples.cookbook.MaxPerKeyExamples.ExtractTempFn;
+import com.google.cloud.dataflow.examples.cookbook.MaxPerKeyExamples.FormatMaxesFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
 import com.google.cloud.dataflow.sdk.values.KV;
 

From 6ce25e39da38c82b467e2e3e549602d7f9d3779c Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Mon, 29 Jun 2015 18:53:54 -0700
Subject: [PATCH 0685/1541] Add plumbing for pane/trigger tags

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97188067
---
 .../BasicSerializableSourceFormat.java        | 10 +-
 .../worker/MapTaskExecutorFactory.java        |  3 +-
 .../worker/PartitioningShuffleReader.java     |  3 +-
 .../sdk/runners/worker/PubsubReader.java      |  5 +-
 .../worker/UngroupedWindmillReader.java       | 10 +-
 .../sdk/runners/worker/WindmillSink.java      | 36 ++++++-
 .../worker/WindowingWindmillReader.java       | 14 ++-
 .../cloud/dataflow/sdk/transforms/DoFn.java   | 12 +++
 .../IntraBundleParallelization.java           |  6 ++
 .../sdk/transforms/windowing/PaneInfo.java    | 84 +++++++++++++++++
 .../dataflow/sdk/util/AssignWindowsDoFn.java  |  2 +-
 .../cloud/dataflow/sdk/util/DoFnRunner.java   | 63 ++++++++-----
 .../GroupAlsoByWindowsAndCombineDoFn.java     |  6 +-
 .../GroupAlsoByWindowsViaIteratorsDoFn.java   |  4 +-
 .../util/ReifyTimestampAndWindowsDoFn.java    |  6 +-
 .../util/StreamingGroupAlsoByWindowsDoFn.java |  6 +-
 .../StreamingPCollectionViewWriterFn.java     |  2 +-
 .../dataflow/sdk/util/TriggerExecutor.java    | 13 ++-
 .../dataflow/sdk/util/TriggerTester.java      | 16 +++-
 .../dataflow/sdk/util/WindowedValue.java      | 93 ++++++++++++++-----
 .../dataflow/sdk/util/WindowingInternals.java |  8 +-
 .../worker/GroupingShuffleReaderTest.java     |  2 +-
 .../worker/PartitioningShuffleReaderTest.java | 20 ++--
 .../sdk/runners/worker/ShuffleSinkTest.java   |  3 +-
 .../worker/StreamingDataflowWorkerTest.java   |  8 +-
 .../worker/UngroupedShuffleReaderTest.java    |  2 +-
 .../sdk/testing/PCollectionViewTesting.java   |  2 +-
 .../dataflow/sdk/transforms/NoOpDoFn.java     |  6 ++
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  | 51 ++++++----
 .../StreamingGroupAlsoByWindowsDoFnTest.java  | 45 ++++++---
 .../StreamingSideInputDoFnRunnerTest.java     |  3 +-
 .../dataflow/sdk/util/WindowedValueTest.java  |  3 +-
 32 files changed, 413 insertions(+), 134 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index 1d382bd17d221..3c6d51699065f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -50,7 +50,6 @@
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
@@ -63,7 +62,6 @@
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -377,8 +375,8 @@ public static <T> void evaluateReadHelper(
         for (boolean available = reader.start(); available; available = reader.advance()) {
           output.add(
               DirectPipelineRunner.ValueWithMetadata.of(
-                  WindowedValue.of(
-                      reader.getCurrent(), reader.getCurrentTimestamp(), GlobalWindow.INSTANCE)));
+                  WindowedValue.timestampedValueInGlobalWindow(
+                      reader.getCurrent(), reader.getCurrentTimestamp())));
         }
       }
       context.setPCollectionValuesWithMetadata(context.getOutput(transform), output);
@@ -462,8 +460,8 @@ public WindowedValue<T> next() throws IOException {
         throw new NoSuchElementException();
       }
       state = NextState.UNKNOWN_BEFORE_ADVANCE;
-      return WindowedValue.of(
-          reader.getCurrent(), reader.getCurrentTimestamp(), GlobalWindow.INSTANCE);
+      return WindowedValue.timestampedValueInGlobalWindow(
+          reader.getCurrent(), reader.getCurrentTimestamp());
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index e2f343aa8fc15..56c8b5a23d2c0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -328,7 +328,8 @@ public Object createGroupingKey(Object key) throws Exception {
       return WindowedValue.of(
           coder.structuralValue(windowedKey.getValue()),
           ignored,
-          windowedKey.getWindows());
+          windowedKey.getWindows(),
+          windowedKey.getPane());
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
index a0ef83f5c29f7..28211b5311fa7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
@@ -111,8 +111,7 @@ public WindowedValue<KV<K, V>> next() throws IOException {
       WindowedValue<V> windowedValue =
           CoderUtils.decodeFromByteArray(windowedValueCoder, record.getValue());
       notifyElementRead(record.length());
-      return WindowedValue.of(KV.of(key, windowedValue.getValue()), windowedValue.getTimestamp(),
-          windowedValue.getWindows());
+      return windowedValue.withValue(KV.of(key, windowedValue.getValue()));
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
index 21d03ccc96a2d..b8924dc5f19e0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
@@ -19,7 +19,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
@@ -84,9 +83,7 @@ public WindowedValue<T> next() throws IOException {
       InputStream data = message.getData().newInput();
       notifyElementRead(data.available());
       T value = (T) coder.getValueCoder().decode(data, Coder.Context.OUTER);
-      return WindowedValue.of(value,
-                              new Instant(timestampMillis),
-                              GlobalWindow.INSTANCE);
+      return WindowedValue.timestampedValueInGlobalWindow(value, new Instant(timestampMillis));
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
index 21bbe37526a55..f9af41dfcca0c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
@@ -89,7 +90,9 @@ public WindowedValue<T> next() throws IOException {
       Instant timestampMillis = new Instant(TimeUnit.MICROSECONDS.toMillis(message.getTimestamp()));
       InputStream data = message.getData().newInput();
       InputStream metadata = message.getMetadata().newInput();
-      Collection<? extends BoundedWindow> windows = decode(windowsCoder, metadata);
+      Collection<? extends BoundedWindow> windows = WindmillSink.decodeMetadataWindows(
+          windowsCoder, message.getMetadata());
+      PaneInfo pane = WindmillSink.decodeMetadataPane(message.getMetadata());
       if (valueCoder instanceof KvCoder) {
         KvCoder kvCoder = (KvCoder) valueCoder;
         InputStream key = context.getSerializedKey().newInput();
@@ -97,10 +100,11 @@ public WindowedValue<T> next() throws IOException {
         return WindowedValue.of((T) KV.of(decode(kvCoder.getKeyCoder(), key),
                                           decode(kvCoder.getValueCoder(), data)),
                                 timestampMillis,
-                                windows);
+                                windows,
+                                pane);
       } else {
         notifyElementRead(data.available() + metadata.available());
-        return WindowedValue.of(decode(valueCoder, data), timestampMillis, windows);
+        return WindowedValue.of(decode(valueCoder, data), timestampMillis, windows, pane);
       }
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
index 42096098fce00..b806e99b40482 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
@@ -22,6 +22,9 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.PaneInfoCoder;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
@@ -32,6 +35,8 @@
 import com.google.protobuf.ByteString;
 
 import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
@@ -39,7 +44,7 @@
 class WindmillSink<T> extends Sink<WindowedValue<T>> {
   private WindmillStreamWriter writer;
   private final Coder<T> valueCoder;
-  private final Coder windowsCoder;
+  private final Coder<Collection<? extends BoundedWindow>> windowsCoder;
   private StreamingModeExecutionContext context;
 
   WindmillSink(String destinationName,
@@ -52,6 +57,29 @@ class WindmillSink<T> extends Sink<WindowedValue<T>> {
     this.context = context;
   }
 
+  public static ByteString encodeMetadata(
+      Coder<Collection<? extends BoundedWindow>> windowsCoder,
+      Collection<? extends BoundedWindow> windows,
+      PaneInfo pane) throws IOException {
+    ByteString.Output stream = ByteString.newOutput();
+    PaneInfoCoder.INSTANCE.encode(pane, stream, Coder.Context.NESTED);
+    windowsCoder.encode(windows, stream, Coder.Context.OUTER);
+    return stream.toByteString();
+  }
+
+  public static PaneInfo decodeMetadataPane(ByteString metadata) throws IOException {
+    InputStream inStream = metadata.newInput();
+    return PaneInfoCoder.INSTANCE.decode(inStream, Coder.Context.NESTED);
+  }
+
+  public static Collection<? extends BoundedWindow> decodeMetadataWindows(
+      Coder<Collection<? extends BoundedWindow>> windowsCoder,
+      ByteString metadata) throws IOException {
+    InputStream inStream = metadata.newInput();
+    PaneInfoCoder.INSTANCE.decode(inStream, Coder.Context.NESTED);
+    return windowsCoder.decode(inStream, Coder.Context.OUTER);
+  }
+
   public static <T> WindmillSink<T> create(PipelineOptions options,
                                            CloudObject spec,
                                            Coder<WindowedValue<T>> coder,
@@ -83,7 +111,7 @@ private <T> ByteString encode(Coder<T> coder, T object) throws IOException {
     @Override
     public long add(WindowedValue<T> data) throws IOException {
       ByteString key, value;
-      ByteString windows = encode(windowsCoder, data.getWindows());
+      ByteString metadata = encodeMetadata(windowsCoder, data.getWindows(), data.getPane());
       if (valueCoder instanceof KvCoder) {
         KvCoder kvCoder = (KvCoder) valueCoder;
         KV kv = (KV) data.getValue();
@@ -103,9 +131,9 @@ public long add(WindowedValue<T> data) throws IOException {
       Windmill.Message.Builder builder = Windmill.Message.newBuilder()
           .setTimestamp(timestampMicros)
           .setData(value)
-          .setMetadata(windows);
+          .setMetadata(metadata);
       keyedOutput.addMessages(builder.build());
-      return key.size() + value.size() + windows.size();
+      return key.size() + value.size() + metadata.size();
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
index c483adefecb26..af7e49d6ffa72 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
@@ -107,7 +108,8 @@ public WindowedValue<TimerOrElement<T>> next() throws IOException {
                                                           new Instant(timestampMillis),
                                                           key),
                                   new Instant(timestampMillis),
-                                  new ArrayList());
+                                  new ArrayList(),
+                                  null);
         } else {
           throw new RuntimeException("Timer set on non-keyed DoFn");
         }
@@ -126,7 +128,9 @@ public WindowedValue<TimerOrElement<T>> next() throws IOException {
             new Instant(TimeUnit.MICROSECONDS.toMillis(message.getTimestamp()));
         InputStream data = message.getData().newInput();
         InputStream metadata = message.getMetadata().newInput();
-        Collection<? extends BoundedWindow> windows = decode(windowsCoder, metadata);
+        Collection<? extends BoundedWindow> windows = WindmillSink.decodeMetadataWindows(
+            windowsCoder, message.getMetadata());
+        PaneInfo pane = WindmillSink.decodeMetadataPane(message.getMetadata());
         if (valueCoder instanceof KvCoder) {
           KvCoder kvCoder = (KvCoder) valueCoder;
           InputStream key = context.getSerializedKey().newInput();
@@ -135,12 +139,14 @@ public WindowedValue<TimerOrElement<T>> next() throws IOException {
               TimerOrElement.element((T) KV.of(decode(kvCoder.getKeyCoder(), key),
                                                decode(kvCoder.getValueCoder(), data))),
               timestampMillis,
-              windows);
+              windows,
+              pane);
         } else {
           notifyElementRead(data.available() + metadata.available());
           return WindowedValue.of(TimerOrElement.element(decode(valueCoder, data)),
                                   timestampMillis,
-                                  windows);
+                                  windows,
+                                  pane);
         }
       }
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 0872f8313f004..97713bd5d8b18 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -24,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.WindowingInternals;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -260,6 +261,17 @@ public abstract class ProcessContext extends Context {
      */
     public abstract BoundedWindow window();
 
+    /**
+     * Returns information about the pane within this window into which the
+     * input element has been assigned.
+     *
+     * <p> Generally all data is in a single, uninteresting pane unless custom
+     * triggering and/or late data has been explicitly requested.
+     * See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window}
+     * for more information.
+     */
+    public abstract PaneInfo pane();
+
     /**
      * Returns the process context to use for implementing windowing.
      */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
index 4ce746a8e686f..c5a9ab1bc43e1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.WindowingInternals;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -315,6 +316,11 @@ public BoundedWindow window() {
         return context.window();
       }
 
+      @Override
+      public PaneInfo pane() {
+        return context.pane();
+      }
+
       @Override
       public WindowingInternals<InputT, OutputT> windowingInternals() {
         return context.windowingInternals();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
new file mode 100644
index 0000000000000..ce98babd6ffcc
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+/**
+ * Provides information about the pane this value belongs to, e.g. the index
+ * of the trigger fire and its timeliness.
+ */
+public final class PaneInfo {
+  private PaneInfo() { }
+
+  public static final PaneInfo DEFAULT_PANE = new PaneInfo();
+
+  /**
+   * Returns whether this is the pane associated with the single, final
+   * firing of the watermark trigger.
+   */
+  private boolean isDefaultPane() {
+    return true; // The only one supported so far.
+  }
+
+  public static boolean isDefaultPane(PaneInfo pane) {
+    return pane == null || pane.isDefaultPane();
+  }
+
+  public static PaneInfo createPaneInternal() {
+    return new PaneInfo();
+  }
+
+  /**
+   * A Coder for encoding PaneInfo instances.
+   */
+  public static class PaneInfoCoder extends AtomicCoder<PaneInfo> {
+    private static final long serialVersionUID = 0;
+
+    public static final PaneInfoCoder INSTANCE = new PaneInfoCoder();
+
+    @Override
+    public void encode(PaneInfo value, OutputStream outStream, Coder.Context context)
+        throws CoderException, IOException {
+      if (isDefaultPane(value)) {
+        outStream.write(0);
+      } else {
+        outStream.write(1);
+        // Actually encode.
+        throw new UnsupportedOperationException();
+      }
+    }
+
+    @Override
+    public PaneInfo decode(InputStream inStream, Coder.Context context)
+        throws CoderException, IOException {
+      int encoding = inStream.read();
+      if (encoding == 0) {
+        return null;
+      } else {
+        // Actually decode.
+        throw new UnsupportedOperationException();
+      }
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
index c109af0161157..9560cf82158b8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
@@ -60,6 +60,6 @@ public Collection<? extends BoundedWindow> windows() {
                 }
               });
 
-    c.windowingInternals().outputWindowedValue(c.element(), c.timestamp(), windows);
+    c.windowingInternals().outputWindowedValue(c.element(), c.timestamp(), windows, null);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 89eb889b5a092..423b8de6a591b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -27,6 +27,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -154,7 +155,7 @@ public void processElement(WindowedValue<InputT> elem) {
       // avoid repeated allocations, but this is more straightforward.
       for (BoundedWindow window : elem.getWindows()) {
         invokeProcessElement(WindowedValue.of(
-            elem.getValue(), elem.getTimestamp(), window));
+            elem.getValue(), elem.getTimestamp(), window, elem.getPane()));
       }
     }
   }
@@ -253,7 +254,7 @@ public PipelineOptions getPipelineOptions() {
     }
 
     <T> WindowedValue<T> makeWindowedValue(
-        T output, Instant timestamp, Collection<? extends BoundedWindow> windows) {
+        T output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
       final Instant inputTimestamp = timestamp;
 
       if (timestamp == null) {
@@ -289,7 +290,7 @@ public Collection<? extends BoundedWindow> windows() {
         }
       }
 
-      return WindowedValue.of(output, timestamp, windows);
+      return WindowedValue.of(output, timestamp, windows, pane);
     }
 
     public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
@@ -304,8 +305,12 @@ public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
     void outputWindowedValue(
         OutputT output,
         Instant timestamp,
-        Collection<? extends BoundedWindow> windows) {
-      WindowedValue<OutputT> windowedElem = makeWindowedValue(output, timestamp, windows);
+        Collection<? extends BoundedWindow> windows,
+        PaneInfo pane) {
+      outputWindowedValue(makeWindowedValue(output, timestamp, windows, pane));
+    }
+
+    void outputWindowedValue(WindowedValue<OutputT> windowedElem) {
       outputManager.output(outputMap.get(mainOutputTag), windowedElem);
       if (stepContext != null) {
         stepContext.noteOutput(windowedElem);
@@ -315,7 +320,12 @@ void outputWindowedValue(
     protected <T> void sideOutputWindowedValue(TupleTag<T> tag,
                                                T output,
                                                Instant timestamp,
-                                               Collection<? extends BoundedWindow> windows) {
+                                               Collection<? extends BoundedWindow> windows,
+                                               PaneInfo pane) {
+      sideOutputWindowedValue(tag, makeWindowedValue(output, timestamp, windows, pane));
+    }
+
+    protected <T> void sideOutputWindowedValue(TupleTag<T> tag, WindowedValue<T> windowedElem) {
       ReceiverT receiver = outputMap.get(tag);
       if (receiver == null) {
         // This tag wasn't declared nor was it seen before during this execution.
@@ -335,7 +345,6 @@ protected <T> void sideOutputWindowedValue(TupleTag<T> tag,
         outputMap.put(tag, receiver);
       }
 
-      WindowedValue<T> windowedElem = makeWindowedValue(output, timestamp, windows);
       outputManager.output(receiver, windowedElem);
       if (stepContext != null) {
         stepContext.noteSideOutput(tag, windowedElem);
@@ -347,24 +356,24 @@ protected <T> void sideOutputWindowedValue(TupleTag<T> tag,
     // ProcessContext's versions in DoFn.processElement.
     @Override
     public void output(OutputT output) {
-      outputWindowedValue(output, null, null);
+      outputWindowedValue(output, null, null, null);
     }
 
     @Override
     public void outputWithTimestamp(OutputT output, Instant timestamp) {
-      outputWindowedValue(output, timestamp, null);
+      outputWindowedValue(output, timestamp, null, null);
     }
 
     @Override
     public <T> void sideOutput(TupleTag<T> tag, T output) {
       Preconditions.checkNotNull(tag, "TupleTag passed to sideOutput cannot be null");
-      sideOutputWindowedValue(tag, output, null, null);
+      sideOutputWindowedValue(tag, output, null, null, null);
     }
 
     @Override
     public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
       Preconditions.checkNotNull(tag, "TupleTag passed to sideOutputWithTimestamp cannot be null");
-      sideOutputWindowedValue(tag, output, timestamp, null);
+      sideOutputWindowedValue(tag, output, timestamp, null, null);
     }
 
     private String generateInternalAggregatorName(String userName) {
@@ -455,38 +464,43 @@ public BoundedWindow window() {
       return Iterables.getOnlyElement(windows());
     }
 
+    @Override
+    public PaneInfo pane() {
+      return windowedValue.getPane();
+    }
+
     @Override
     public void output(OutputT output) {
-      context.outputWindowedValue(output, windowedValue.getTimestamp(), windowedValue.getWindows());
+      context.outputWindowedValue(windowedValue.withValue(output));
     }
 
     @Override
     public void outputWithTimestamp(OutputT output, Instant timestamp) {
       checkTimestamp(timestamp);
-      context.outputWindowedValue(output, timestamp, windowedValue.getWindows());
+      context.outputWindowedValue(output, timestamp,
+          windowedValue.getWindows(), windowedValue.getPane());
     }
 
     void outputWindowedValue(
         OutputT output,
         Instant timestamp,
-        Collection<? extends BoundedWindow> windows) {
-      context.outputWindowedValue(output, timestamp, windows);
+        Collection<? extends BoundedWindow> windows,
+        PaneInfo pane) {
+      context.outputWindowedValue(output, timestamp, windows, pane);
     }
 
     @Override
     public <T> void sideOutput(TupleTag<T> tag, T output) {
       Preconditions.checkNotNull(tag, "Tag passed to sideOutput cannot be null");
-      context.sideOutputWindowedValue(tag,
-                                      output,
-                                      windowedValue.getTimestamp(),
-                                      windowedValue.getWindows());
+      context.sideOutputWindowedValue(tag, windowedValue.withValue(output));
     }
 
     @Override
     public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
       Preconditions.checkNotNull(tag, "Tag passed to sideOutputWithTimestamp cannot be null");
       checkTimestamp(timestamp);
-      context.sideOutputWindowedValue(tag, output, timestamp, windowedValue.getWindows());
+      context.sideOutputWindowedValue(
+          tag, output, timestamp, windowedValue.getWindows(), windowedValue.getPane());
     }
 
     @Override
@@ -509,8 +523,8 @@ public WindowingInternals<InputT, OutputT> windowingInternals() {
       return new WindowingInternals<InputT, OutputT>() {
         @Override
         public void outputWindowedValue(OutputT output, Instant timestamp,
-            Collection<? extends BoundedWindow> windows) {
-          context.outputWindowedValue(output, timestamp, windows);
+            Collection<? extends BoundedWindow> windows, PaneInfo pane) {
+          context.outputWindowedValue(output, timestamp, windows, pane);
         }
 
         @Override
@@ -547,6 +561,11 @@ public Collection<? extends BoundedWindow> windows() {
           return windowedValue.getWindows();
         }
 
+        @Override
+        public PaneInfo pane() {
+          return windowedValue.getPane();
+        }
+
         @Override
         public TimerManager getTimerManager() {
           return context.stepContext.getExecutionContext().getTimerManager();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
index d3eb50b2265b7..9c5473dc190b7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
@@ -20,6 +20,7 @@
 
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.collect.Maps;
 
@@ -109,6 +110,9 @@ private void closeWindow(
     Instant timestamp = minTimestamps.remove(w);
     checkState(accum != null && timestamp != null);
     c.windowingInternals().outputWindowedValue(
-        KV.of(key, combineFn.extractOutput(key, accum)), timestamp, Arrays.asList(w));
+        KV.of(key, combineFn.extractOutput(key, accum)),
+        timestamp,
+        Arrays.asList(w),
+        PaneInfo.DEFAULT_PANE);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
index e4cc90d94c5c3..880a4d1ffbfe7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.common.PeekingReiterator;
 import com.google.cloud.dataflow.sdk.util.common.Reiterable;
 import com.google.cloud.dataflow.sdk.util.common.Reiterator;
@@ -73,7 +74,8 @@ public void processElement(ProcessContext c) throws Exception {
           c.windowingInternals().outputWindowedValue(
               KV.of(key, (Iterable<V>) new WindowReiterable<V>(iterator, window)),
               e.getTimestamp(),
-              Arrays.asList(window));
+              Arrays.asList(window),
+              PaneInfo.DEFAULT_PANE);
         }
       }
       // Copy the iterator in case the next DoFn cached its version of the iterator instead
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
index bc1e5f6419885..de4ec56368efe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
@@ -38,6 +38,10 @@ public void processElement(ProcessContext c)
     V value = kv.getValue();
     c.output(KV.of(
         key,
-        WindowedValue.of(value, c.timestamp(), c.windowingInternals().windows())));
+        WindowedValue.of(
+            value,
+            c.timestamp(),
+            c.windowingInternals().windows(),
+            c.pane())));
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 18822fc400757..6891d64227569 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -90,7 +90,11 @@ public void processElement(ProcessContext c) throws Exception {
       } else {
         InputT value = c.element().element().getValue();
         executor.onElement(
-            WindowedValue.of(value, c.timestamp(), c.windowingInternals().windows()));
+            WindowedValue.of(
+                value,
+                c.timestamp(),
+                c.windowingInternals().windows(),
+                c.pane()));
       }
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingPCollectionViewWriterFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingPCollectionViewWriterFn.java
index 49c6d0155b1e5..62f739e1b1bc0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingPCollectionViewWriterFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingPCollectionViewWriterFn.java
@@ -49,7 +49,7 @@ private StreamingPCollectionViewWriterFn(PCollectionView<?> view, Coder<T> dataC
   public void processElement(ProcessContext c) throws Exception {
     List<WindowedValue<T>> output = new ArrayList<>();
     for (T elem : c.element()) {
-      output.add(WindowedValue.of(elem, c.timestamp(), c.window()));
+      output.add(WindowedValue.of(elem, c.timestamp(), c.window(), c.pane()));
     }
 
     c.windowingInternals().writePCollectionViewData(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 020e61173493c..ff1ffad0d1e05 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -24,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
@@ -297,8 +298,10 @@ public void onTimer(String timerTag) throws Exception {
     BitSet finishedSet = lookupFinishedSet(window);
 
     if (triggerId.getTriggerIdx() == FINAL_CLEANUP_PSEUDO_ID) {
+      // TODO: Create appropriate Pane here.
+      PaneInfo pane = PaneInfo.createPaneInternal();
       if (mergeIfAppropriate(window)) {
-        emitWindow(window);
+        emitWindow(window, pane);
         outputBuffer.clear(bufferContext(window));
       }
 
@@ -399,7 +402,9 @@ private void handleResult(
       ExecutableTrigger<W> trigger, W window,
       BitSet originalFinishedSet, BitSet finishedSet, TriggerResult result) throws Exception {
     if (result.isFire()) {
-      emitWindow(window);
+      // TODO: Obtain pain from ExecutableTrigger or TriggerResult.
+      PaneInfo pane = PaneInfo.createPaneInternal();
+      emitWindow(window, pane);
     }
 
     if (result.isFinish()
@@ -421,7 +426,7 @@ private void handleResult(
     }
   }
 
-  private void emitWindow(W window) throws Exception {
+  private void emitWindow(W window, PaneInfo pane) throws Exception {
     Instant timestamp = watermarkHolder.extractAndRelease(bufferContext(window));
     OutputT finalValue = outputBuffer.extract(bufferContext(window));
 
@@ -431,7 +436,7 @@ private void emitWindow(W window) throws Exception {
       KV<K, OutputT> value = KV.of(key, finalValue);
 
       // Output the windowed value.
-      windowingInternals.outputWindowedValue(value, timestamp, Arrays.asList(window));
+      windowingInternals.outputWindowedValue(value, timestamp, Arrays.asList(window), pane);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index b7f6121760ba5..8f4abe500717d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
@@ -197,8 +198,7 @@ public Iterable<WindowedValue<OutputT>> extractOutput() {
           @Override
           @Nullable
           public WindowedValue<OutputT> apply(@Nullable WindowedValue<KV<String, OutputT>> input) {
-            return WindowedValue.of(
-                input.getValue().getValue(), input.getTimestamp(), input.getWindows());
+            return input.withValue(input.getValue().getValue());
           }
         })
         .toList();
@@ -232,7 +232,7 @@ public void injectElement(InputT value, Instant timestamp) throws Exception {
         windowFn, value, timestamp, Arrays.asList(GlobalWindow.INSTANCE)));
     logInteraction("Element %s at time %d put in windows %s",
         value, timestamp.getMillis(), windows);
-    triggerExecutor.onElement(WindowedValue.of(value, timestamp, windows));
+    triggerExecutor.onElement(WindowedValue.of(value, timestamp, windows, null));
   }
 
   public void doMerge() throws Exception {
@@ -258,11 +258,11 @@ private class StubContexts
 
     @Override
     public void outputWindowedValue(KV<String, OutputT> output, Instant timestamp,
-        Collection<? extends BoundedWindow> windows) {
+        Collection<? extends BoundedWindow> windows, PaneInfo pane) {
       // Copy the output value (using coders) before capturing it.
       KV<String, OutputT> copy = SerializableUtils.<KV<String, OutputT>>ensureSerializableByCoder(
           KvCoder.of(StringUtf8Coder.of(), outputCoder), output, "outputForWindow");
-      WindowedValue<KV<String, OutputT>> value = WindowedValue.of(copy, timestamp, windows);
+      WindowedValue<KV<String, OutputT>> value = WindowedValue.of(copy, timestamp, windows, pane);
       logInteraction("Outputting: %s", value);
       outputs.add(value);
     }
@@ -338,6 +338,12 @@ public Collection<? extends BoundedWindow> windows() {
           "Testing triggers should not use windows from WindowingInternals.");
     }
 
+    @Override
+    public PaneInfo pane() {
+      throw new UnsupportedOperationException(
+          "Testing triggers should not use pane from WindowingInternals.");
+    }
+
     @Override
     public <T> void store(CodedTupleTag<T> tag, T value) throws IOException {
       tagValues.put(tag, value);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index 0941f5b743fd7..36ce961d9e3e3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -27,6 +27,8 @@
 import com.google.cloud.dataflow.sdk.coders.StandardCoder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.PaneInfoCoder;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
@@ -61,14 +63,16 @@ public abstract class WindowedValue<V> {
   public static <V> WindowedValue<V> of(
       V value,
       Instant timestamp,
-      Collection<? extends BoundedWindow> windows) {
+      Collection<? extends BoundedWindow> windows,
+      PaneInfo pane) {
 
-    if (windows.size() == 0 && BoundedWindow.TIMESTAMP_MIN_VALUE.equals(timestamp)) {
+    if (windows.size() == 0 && BoundedWindow.TIMESTAMP_MIN_VALUE.equals(timestamp)
+        && PaneInfo.isDefaultPane(pane)) {
       return valueInEmptyWindows(value);
     } else if (windows.size() == 1) {
-      return of(value, timestamp, windows.iterator().next());
+      return of(value, timestamp, windows.iterator().next(), pane);
     } else {
-      return new TimestampedValueInMultipleWindows<>(value, timestamp, windows);
+      return new TimestampedValueInMultipleWindows<>(value, timestamp, windows, pane);
     }
   }
 
@@ -78,28 +82,42 @@ public static <V> WindowedValue<V> of(
   public static <V> WindowedValue<V> of(
       V value,
       Instant timestamp,
-      BoundedWindow window) {
+      BoundedWindow window,
+      PaneInfo pane) {
     boolean isGlobal = GlobalWindow.INSTANCE.equals(window);
-    if (isGlobal && BoundedWindow.TIMESTAMP_MIN_VALUE.equals(timestamp)) {
+    if (isGlobal && BoundedWindow.TIMESTAMP_MIN_VALUE.equals(timestamp)
+        && PaneInfo.isDefaultPane(pane)) {
       return valueInGlobalWindow(value);
     } else if (isGlobal) {
-      return new TimestampedValueInGlobalWindow<>(value, timestamp);
+      return new TimestampedValueInGlobalWindow<>(value, timestamp, pane);
     } else {
-      return new TimestampedValueInSingleWindow<>(value, timestamp, window);
+      return new TimestampedValueInSingleWindow<>(value, timestamp, window, pane);
     }
   }
 
   /**
    * Returns a {@code WindowedValue} with the given value, default timestamp,
-   * and {@code GlobalWindow}.
+   * pane, and {@code GlobalWindow}.
    */
   public static <V> WindowedValue<V> valueInGlobalWindow(V value) {
     return new ValueInGlobalWindow<>(value);
   }
 
+  /**
+   * Returns a {@code WindowedValue} with the given value and timestamp,
+   * {@code GlobalWindow} and default pane.
+   */
+  public static <V> WindowedValue<V> timestampedValueInGlobalWindow(V value, Instant timestamp) {
+    if (BoundedWindow.TIMESTAMP_MIN_VALUE.equals(timestamp)) {
+      return valueInGlobalWindow(value);
+    } else {
+      return new TimestampedValueInGlobalWindow<>(value, timestamp, PaneInfo.DEFAULT_PANE);
+    }
+  }
+
   /**
    * Returns a {@code WindowedValue} with the given value and default
-   * timestamp and empty windows.
+   * timestamp and pane and empty windows.
    */
   public static <V> WindowedValue<V> valueInEmptyWindows(V value) {
     return new ValueInEmptyWindows<>(value);
@@ -132,6 +150,11 @@ public V getValue() {
    */
   public abstract Collection<? extends BoundedWindow> getWindows();
 
+  /**
+   * Returns the pane of this {@code WindowedValue} in its window.
+   */
+  public abstract PaneInfo getPane();
+
   @Override
   public abstract boolean equals(Object o);
 
@@ -180,6 +203,11 @@ public Collection<? extends BoundedWindow> getWindows() {
       return GLOBAL_WINDOWS;
     }
 
+    @Override
+    public PaneInfo getPane() {
+      return PaneInfo.DEFAULT_PANE;
+    }
+
     @Override
     public boolean equals(Object o) {
       if (o instanceof ValueInGlobalWindow) {
@@ -221,6 +249,11 @@ public Collection<? extends BoundedWindow> getWindows() {
       return Collections.emptyList();
     }
 
+    @Override
+    public PaneInfo getPane() {
+      return PaneInfo.DEFAULT_PANE;
+    }
+
     @Override
     public boolean equals(Object o) {
       if (o instanceof ValueInEmptyWindows) {
@@ -249,17 +282,25 @@ public String toString() {
   private abstract static class TimestampedWindowedValue<V>
       extends WindowedValue<V> {
     protected final Instant timestamp;
+    protected final PaneInfo pane;
 
     public TimestampedWindowedValue(V value,
-                                    Instant timestamp) {
+                                    Instant timestamp,
+                                    PaneInfo pane) {
       super(value);
       this.timestamp = checkNotNull(timestamp);
+      this.pane = pane;
     }
 
     @Override
     public Instant getTimestamp() {
       return timestamp;
     }
+
+    @Override
+    public PaneInfo getPane() {
+      return pane;
+    }
   }
 
   /**
@@ -269,13 +310,14 @@ public Instant getTimestamp() {
   private static class TimestampedValueInGlobalWindow<V>
       extends TimestampedWindowedValue<V> {
     public TimestampedValueInGlobalWindow(V value,
-                                          Instant timestamp) {
-      super(value, timestamp);
+                                          Instant timestamp,
+                                          PaneInfo pane) {
+      super(value, timestamp, pane);
     }
 
     @Override
     public <V> WindowedValue<V> withValue(V value) {
-      return new TimestampedValueInGlobalWindow<>(value, timestamp);
+      return new TimestampedValueInGlobalWindow<>(value, timestamp, pane);
     }
 
     @Override
@@ -289,6 +331,7 @@ public boolean equals(Object o) {
         TimestampedValueInGlobalWindow<?> that =
             (TimestampedValueInGlobalWindow) o;
         return this.timestamp.getMillis() == that.timestamp.getMillis()
+            && Objects.equals(that.pane, this.pane)
             && Objects.equals(that.value, this.value);
       } else {
         return false;
@@ -317,14 +360,15 @@ private static class TimestampedValueInSingleWindow<V>
 
     public TimestampedValueInSingleWindow(V value,
                                           Instant timestamp,
-                                          BoundedWindow window) {
-      super(value, timestamp);
+                                          BoundedWindow window,
+                                          PaneInfo pane) {
+      super(value, timestamp, pane);
       this.window = checkNotNull(window);
     }
 
     @Override
     public <V> WindowedValue<V> withValue(V value) {
-      return new TimestampedValueInSingleWindow<>(value, timestamp, window);
+      return new TimestampedValueInSingleWindow<>(value, timestamp, window, pane);
     }
 
     @Override
@@ -339,6 +383,7 @@ public boolean equals(Object o) {
             (TimestampedValueInSingleWindow) o;
         return Objects.equals(that.value, this.value)
             && that.timestamp.isEqual(this.timestamp)
+            && Objects.equals(that.pane, this.pane)
             && that.window.equals(this.window);
       } else {
         return false;
@@ -369,14 +414,15 @@ private static class TimestampedValueInMultipleWindows<V>
     public TimestampedValueInMultipleWindows(
         V value,
         Instant timestamp,
-        Collection<? extends BoundedWindow> windows) {
-      super(value, timestamp);
+        Collection<? extends BoundedWindow> windows,
+        PaneInfo pane) {
+      super(value, timestamp, pane);
       this.windows = checkNotNull(windows);
     }
 
     @Override
     public <V> WindowedValue<V> withValue(V value) {
-      return new TimestampedValueInMultipleWindows<>(value, timestamp, windows);
+      return new TimestampedValueInMultipleWindows<>(value, timestamp, windows, pane);
     }
 
     @Override
@@ -390,7 +436,8 @@ public boolean equals(Object o) {
         TimestampedValueInMultipleWindows<?> that =
             (TimestampedValueInMultipleWindows) o;
         if (Objects.equals(that.value, this.value)
-            && that.timestamp.isEqual(this.timestamp)) {
+            && that.timestamp.isEqual(this.timestamp)
+            && Objects.equals(that.pane, this.pane)) {
           ensureWindowsAreASet();
           that.ensureWindowsAreASet();
           return that.windows.equals(this.windows);
@@ -529,6 +576,7 @@ public void encode(WindowedValue<T> windowedElem,
       InstantCoder.of().encode(
           windowedElem.getTimestamp(), outStream, nestedContext);
       windowsCoder.encode(windowedElem.getWindows(), outStream, nestedContext);
+      PaneInfoCoder.INSTANCE.encode(windowedElem.getPane(), outStream, context);
     }
 
     @Override
@@ -539,7 +587,8 @@ public WindowedValue<T> decode(InputStream inStream, Context context)
       Instant timestamp = InstantCoder.of().decode(inStream, nestedContext);
       Collection<? extends BoundedWindow> windows =
           windowsCoder.decode(inStream, nestedContext);
-      return WindowedValue.of(value, timestamp, windows);
+      PaneInfo pane = PaneInfoCoder.INSTANCE.decode(inStream, nestedContext);
+      return WindowedValue.of(value, timestamp, windows, pane);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
index c544e17742861..ac27c64dfee5a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -98,7 +99,7 @@ public interface KeyedState {
    * Output the value at the specified timestamp in the listed windows.
    */
   void outputWindowedValue(OutputT output, Instant timestamp,
-      Collection<? extends BoundedWindow> windows);
+      Collection<? extends BoundedWindow> windows, PaneInfo pane);
 
   /**
    * Writes the provided value to the list of values in stored state corresponding to the
@@ -155,6 +156,11 @@ <T> Map<CodedTupleTag<T>, Iterable<T>> readTagList(
    */
   Collection<? extends BoundedWindow> windows();
 
+  /**
+   * Access the pane of the current window(s).
+   */
+  PaneInfo pane();
+
   /**
    * Write the given {@link PCollectionView} data to a location accessible by other workers.
    */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
index d21b3ad0360c1..825185bb31d39 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
@@ -118,7 +118,7 @@ private void runTestReadFromShuffle(
         for (String value : kvs.getValue()) {
           ++kvCount;
           actualSizes.add(shuffleSinkWriter.add(
-              WindowedValue.of(KV.of(key, value), timestamp, Lists.newArrayList(window))));
+              WindowedValue.of(KV.of(key, value), timestamp, Lists.newArrayList(window), null)));
         }
       }
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
index b749ec0cdb70c..4219658a6ff2a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
@@ -53,16 +53,16 @@ public class PartitioningShuffleReaderTest {
   private static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
 
   private static final List<WindowedValue<KV<Integer, String>>> KVS = Arrays.asList(
-      WindowedValue.of(KV.of(1, "in 1a"), timestamp, Lists.newArrayList(window)),
-      WindowedValue.of(KV.of(1, "in 1b"), timestamp, Lists.newArrayList(window)),
-      WindowedValue.of(KV.of(2, "in 2a"), timestamp, Lists.newArrayList(window)),
-      WindowedValue.of(KV.of(2, "in 2b"), timestamp, Lists.newArrayList(window)),
-      WindowedValue.of(KV.of(3, "in 3"), timestamp, Lists.newArrayList(window)),
-      WindowedValue.of(KV.of(4, "in 4a"), timestamp, Lists.newArrayList(window)),
-      WindowedValue.of(KV.of(4, "in 4b"), timestamp, Lists.newArrayList(window)),
-      WindowedValue.of(KV.of(4, "in 4c"), timestamp, Lists.newArrayList(window)),
-      WindowedValue.of(KV.of(4, "in 4d"), timestamp, Lists.newArrayList(window)),
-      WindowedValue.of(KV.of(5, "in 5"), timestamp, Lists.newArrayList(window)));
+      WindowedValue.of(KV.of(1, "in 1a"), timestamp, Lists.newArrayList(window), null),
+      WindowedValue.of(KV.of(1, "in 1b"), timestamp, Lists.newArrayList(window), null),
+      WindowedValue.of(KV.of(2, "in 2a"), timestamp, Lists.newArrayList(window), null),
+      WindowedValue.of(KV.of(2, "in 2b"), timestamp, Lists.newArrayList(window), null),
+      WindowedValue.of(KV.of(3, "in 3"), timestamp, Lists.newArrayList(window), null),
+      WindowedValue.of(KV.of(4, "in 4a"), timestamp, Lists.newArrayList(window), null),
+      WindowedValue.of(KV.of(4, "in 4b"), timestamp, Lists.newArrayList(window), null),
+      WindowedValue.of(KV.of(4, "in 4c"), timestamp, Lists.newArrayList(window), null),
+      WindowedValue.of(KV.of(4, "in 4d"), timestamp, Lists.newArrayList(window), null),
+      WindowedValue.of(KV.of(5, "in 5"), timestamp, Lists.newArrayList(window), null));
 
   private void runTestReadFromShuffle(List<WindowedValue<KV<Integer, String>>> expected)
       throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
index ca9d29dd1a6b2..d950396cbd277 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
@@ -135,7 +135,8 @@ void runTestWriteGroupingShuffleSink(
         actualSizes.add(shuffleSinkWriter.add(
             WindowedValue.of(KV.of(kv.getKey(), kv.getValue()),
                              timestamp,
-                             Lists.newArrayList(window))));
+                             Lists.newArrayList(window),
+                             null)));
       }
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 5e09a2fcb0acf..874f50c2f7cbd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -242,7 +242,7 @@ private Windmill.GetWorkResponse buildInput(String input, byte[] metadata) throw
           builder.getWorkBuilder(0).getWorkBuilder(0).getMessageBundlesBuilder(0);
       for (Windmill.Message.Builder messageBuilder :
           messageBundleBuilder.getMessagesBuilderList()) {
-        messageBuilder.setMetadata(ByteString.copyFrom(metadata));
+        messageBuilder.setMetadata(addNullPaneTag(ByteString.copyFrom(metadata)));
       }
     }
     return builder.build();
@@ -293,7 +293,7 @@ private WorkItemCommitRequest.Builder setMessagesMetadata(
       builder.getOutputMessagesBuilder(0)
           .getBundlesBuilder(0)
           .getMessagesBuilder(0)
-          .setMetadata(ByteString.copyFrom(metadata));
+          .setMetadata(addNullPaneTag(ByteString.copyFrom(metadata)));
     }
     return builder;
   }
@@ -318,6 +318,10 @@ private Windmill.WorkItemCommitRequest makeExpectedOutput(int index, long timest
         .build();
   }
 
+  private ByteString addNullPaneTag(ByteString windows) {
+    return ByteString.copyFrom(new byte[1]).concat(windows);
+  }
+
   private DataflowWorkerHarnessOptions createTestingPipelineOptions() {
     DataflowWorkerHarnessOptions options =
         PipelineOptionsFactory.as(DataflowWorkerHarnessOptions.class);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
index 7c1d3da73ceea..574a3a618d4fe 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
@@ -63,7 +63,7 @@ void runTestReadFromShuffle(List<Integer> expected) throws Exception {
              shuffleSink.writer(shuffleWriter)) {
       for (Integer value : expected) {
         actualSizes.add(shuffleSinkWriter.add(
-            WindowedValue.of(value, timestamp, Lists.newArrayList(window))));
+            WindowedValue.of(value, timestamp, Lists.newArrayList(window), null)));
       }
     }
     List<ShuffleEntry> records = shuffleWriter.getRecords();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java
index eb8d74f4bf4fc..45a5c1848effa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java
@@ -196,7 +196,7 @@ public static <ElemT, ViewT> PCollectionView<ViewT> testingView(
    * Places the given {@code value} in the {@link #DEFAULT_NONEMPTY_WINDOW}.
    */
   public static <T> WindowedValue<T> valueInDefaultWindow(T value) {
-    return WindowedValue.of(value, DEFAULT_TIMESTAMP, DEFAULT_NONEMPTY_WINDOW);
+    return WindowedValue.of(value, DEFAULT_TIMESTAMP, DEFAULT_NONEMPTY_WINDOW, null);
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/NoOpDoFn.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/NoOpDoFn.java
index 13cc9e05efd28..f5b37f73508ea 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/NoOpDoFn.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/NoOpDoFn.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.WindowingInternals;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -106,6 +107,11 @@ public BoundedWindow window() {
       return null;
     }
 
+    @Override
+    public PaneInfo pane() {
+      return null;
+    }
+
     @Override
     public WindowingInternals<InputT, OutputT> windowingInternals() {
       return null;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index 4d7449db113a1..be926d3d2d2e0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -88,15 +88,18 @@ public class GroupAlsoByWindowsDoFnTest {
             WindowedValue.of(
                 "v1",
                 new Instant(1),
-                Arrays.asList(window(0, 10))),
+                Arrays.asList(window(0, 10)),
+                null),
             WindowedValue.of(
                 "v2",
                 new Instant(2),
-                Arrays.asList(window(0, 10))),
+                Arrays.asList(window(0, 10)),
+                null),
             WindowedValue.of(
                 "v3",
                 new Instant(13),
-                Arrays.asList(window(10, 20)))))));
+                Arrays.asList(window(10, 20)),
+                null)))));
 
     runner.finishBundle();
 
@@ -132,11 +135,13 @@ public class GroupAlsoByWindowsDoFnTest {
             WindowedValue.of(
                 "v1",
                 new Instant(5),
-                Arrays.asList(window(-10, 10), window(0, 20))),
+                Arrays.asList(window(-10, 10), window(0, 20)),
+                null),
             WindowedValue.of(
                 "v2",
                 new Instant(15),
-                Arrays.asList(window(0, 20), window(10, 30)))))));
+                Arrays.asList(window(0, 20), window(10, 30)),
+                null)))));
 
     runner.finishBundle();
 
@@ -180,15 +185,18 @@ public class GroupAlsoByWindowsDoFnTest {
             WindowedValue.of(
                 1L,
                 new Instant(5),
-                Arrays.asList(window(-10, 10), window(0, 20))),
+                Arrays.asList(window(-10, 10), window(0, 20)),
+                null),
             WindowedValue.of(
                 2L,
                 new Instant(15),
-                Arrays.asList(window(0, 20), window(10, 30))),
+                Arrays.asList(window(0, 20), window(10, 30)),
+                null),
             WindowedValue.of(
                 4L,
                 new Instant(18),
-                Arrays.asList(window(0, 20), window(10, 30)))))));
+                Arrays.asList(window(0, 20), window(10, 30)),
+                null)))));
 
     runner.finishBundle();
 
@@ -220,15 +228,18 @@ public class GroupAlsoByWindowsDoFnTest {
             WindowedValue.of(
                 "v1",
                 new Instant(1),
-                Arrays.asList(window(0, 5))),
+                Arrays.asList(window(0, 5)),
+                null),
             WindowedValue.of(
                 "v2",
                 new Instant(4),
-                Arrays.asList(window(1, 5))),
+                Arrays.asList(window(1, 5)),
+                null),
             WindowedValue.of(
                 "v3",
                 new Instant(4),
-                Arrays.asList(window(0, 5)))))));
+                Arrays.asList(window(0, 5)),
+                null)))));
 
     runner.finishBundle();
 
@@ -263,15 +274,18 @@ public class GroupAlsoByWindowsDoFnTest {
             WindowedValue.of(
                 "v1",
                 new Instant(0),
-                Arrays.asList(window(0, 10))),
+                Arrays.asList(window(0, 10)),
+                null),
             WindowedValue.of(
                 "v2",
                 new Instant(5),
-                Arrays.asList(window(5, 15))),
+                Arrays.asList(window(5, 15)),
+                null),
             WindowedValue.of(
                 "v3",
                 new Instant(15),
-                Arrays.asList(window(15, 25)))))));
+                Arrays.asList(window(15, 25)),
+                null)))));
 
     runner.finishBundle();
 
@@ -307,15 +321,18 @@ public class GroupAlsoByWindowsDoFnTest {
             WindowedValue.of(
                 1L,
                 new Instant(0),
-                Arrays.asList(window(0, 10))),
+                Arrays.asList(window(0, 10)),
+                null),
             WindowedValue.of(
                 2L,
                 new Instant(5),
-                Arrays.asList(window(5, 15))),
+                Arrays.asList(window(5, 15)),
+                null),
             WindowedValue.of(
                 4L,
                 new Instant(15),
-                Arrays.asList(window(15, 25)))))));
+                Arrays.asList(window(15, 25)),
+                null)))));
 
     runner.finishBundle();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index b90a1a5e8874d..9cfe996f3afdf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -102,22 +102,26 @@ public TimerManager getTimerManager() {
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v1")),
         new Instant(1),
-        Arrays.asList(window(0, 10))));
+        Arrays.asList(window(0, 10)),
+        null));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v2")),
         new Instant(2),
-        Arrays.asList(window(0, 10))));
+        Arrays.asList(window(0, 10)),
+        null));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v0")),
         new Instant(0),
-        Arrays.asList(window(0, 10))));
+        Arrays.asList(window(0, 10)),
+        null));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v3")),
         new Instant(13),
-        Arrays.asList(window(10, 20))));
+        Arrays.asList(window(10, 20)),
+        null));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
@@ -166,12 +170,14 @@ public TimerManager getTimerManager() {
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v1")),
         new Instant(5),
-        Arrays.asList(window(-10, 10), window(0, 20))));
+        Arrays.asList(window(-10, 10), window(0, 20)),
+        null));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v0")),
         new Instant(2),
-        Arrays.asList(window(-10, 10), window(0, 20))));
+        Arrays.asList(window(-10, 10), window(0, 20)),
+        null));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
@@ -181,7 +187,8 @@ public TimerManager getTimerManager() {
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v2")),
         new Instant(5),
-        Arrays.asList(window(0, 20), window(10, 30))));
+        Arrays.asList(window(0, 20), window(10, 30)),
+        null));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
@@ -235,22 +242,26 @@ public TimerManager getTimerManager() {
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v1")),
         new Instant(0),
-        Arrays.asList(window(0, 10))));
+        Arrays.asList(window(0, 10)),
+        null));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v2")),
         new Instant(5),
-        Arrays.asList(window(5, 15))));
+        Arrays.asList(window(5, 15)),
+        null));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v3")),
         new Instant(15),
-        Arrays.asList(window(15, 25))));
+        Arrays.asList(window(15, 25)),
+        null));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v0")),
         new Instant(3),
-        Arrays.asList(window(3, 13))));
+        Arrays.asList(window(3, 13)),
+        null));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
@@ -337,22 +348,26 @@ public Long extractOutput(Long accumulator) {
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", 1L)),
         new Instant(0),
-        Arrays.asList(window(0, 10))));
+        Arrays.asList(window(0, 10)),
+        null));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", 2L)),
         new Instant(5),
-        Arrays.asList(window(5, 15))));
+        Arrays.asList(window(5, 15)),
+        null));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", 3L)),
         new Instant(15),
-        Arrays.asList(window(15, 25))));
+        Arrays.asList(window(15, 25)),
+        null));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", 4L)),
         new Instant(3),
-        Arrays.asList(window(3, 13))));
+        Arrays.asList(window(3, 13)),
+        null));
 
     // TODO: To simplify tests, create a timer manager that can sweep a watermark past some timers
     // and fire them as appropriate. This would essentially be the batch timer context.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
index 9bd88c457fdba..6d2ebc6ff54cf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
@@ -314,6 +314,7 @@ private WindowedValue<String> createDatum(String element, long timestamp) {
         new Instant(timestamp),
         Arrays.asList(new IntervalWindow(
             new Instant(timestamp - timestamp % 10),
-            new Instant(timestamp - timestamp % 10 + 10))));
+            new Instant(timestamp - timestamp % 10 + 10))),
+        null);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/WindowedValueTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/WindowedValueTest.java
index 7edd71287be73..10ea5d58fee8d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/WindowedValueTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/WindowedValueTest.java
@@ -39,7 +39,8 @@ public void testWindowedValueCoder() throws CoderException {
         "abc",
         new Instant(1234),
         Arrays.asList(new IntervalWindow(timestamp, timestamp.plus(1000)),
-                      new IntervalWindow(timestamp.plus(1000), timestamp.plus(2000))));
+                      new IntervalWindow(timestamp.plus(1000), timestamp.plus(2000))),
+        null);
 
     Coder<WindowedValue<String>> windowedValueCoder =
         WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder());

From c6549ef1abe9f80f935608568f8e548f003a4a9e Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Tue, 30 Jun 2015 09:25:10 -0700
Subject: [PATCH 0686/1541] Convert PipelineOptions to Output Type
 Pre-validation

Converting to the target class prior to validation ensures that all of
the properties that are required (via the @Validation.Required
annotation) and any other invocations on methods in the target class are
available to the ProxyInvocationHandler before the validation logic is
executed.

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97233484
---
 .../sdk/options/PipelineOptionsValidator.java         | 10 +++++++---
 .../sdk/options/PipelineOptionsValidatorTest.java     | 11 +++++++++++
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
index dd8682256a419..b4cdbc6d6f91d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
@@ -44,15 +44,19 @@ public static <T extends PipelineOptions> T validate(Class<T> klass, PipelineOpt
     Preconditions.checkArgument(Proxy.getInvocationHandler(options)
         instanceof ProxyInvocationHandler);
 
+    // Ensure the methods for T are registered on the ProxyInvocationHandler
+    T asClassOptions = options.as(klass);
+
     ProxyInvocationHandler handler =
-        (ProxyInvocationHandler) Proxy.getInvocationHandler(options);
+        (ProxyInvocationHandler) Proxy.getInvocationHandler(asClassOptions);
+
     for (Method method : ReflectHelpers.getClosureOfMethodsOnInterface(klass)) {
       if (method.getAnnotation(Validation.Required.class) != null) {
-        Preconditions.checkArgument(handler.invoke(options, method, null) != null,
+        Preconditions.checkArgument(handler.invoke(asClassOptions, method, null) != null,
             "Missing required value for [" + method + ", \"" + getDescription(method) + "\"]. ");
       }
     }
-    return options.as(klass);
+    return asClassOptions;
   }
 
   private static String getDescription(Method method) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java
index bdba334d142ea..b56762b0e233c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java
@@ -66,6 +66,17 @@ public void testWhenRequiredOptionIsNeverSet() {
     PipelineOptionsValidator.validate(Required.class, required);
   }
 
+  @Test
+  public void testWhenRequiredOptionIsNeverSetOnSuperInterface() {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Missing required value for "
+        + "[public abstract java.lang.String com.google.cloud.dataflow."
+        + "sdk.options.PipelineOptionsValidatorTest$Required.getObject(), \"Fake Description\"].");
+
+    PipelineOptions options = PipelineOptionsFactory.create();
+    PipelineOptionsValidator.validate(Required.class, options);
+  }
+
   /** A test interface that overrides the parent's method. */
   public static interface SubClassValidation extends Required {
     @Override

From 7813083eb45fd9821b01de7c4dbae92ced73dcb3 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 30 Jun 2015 10:30:49 -0700
Subject: [PATCH 0687/1541] Verify that Window Coders are deterministic

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97239887
---
 .../dataflow/sdk/util/WindowingStrategy.java  |  7 +++++
 .../sdk/transforms/windowing/WindowTest.java  | 27 +++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
index eeaba5e8a5ce8..ecf6df6e6c03b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
@@ -74,6 +75,12 @@ public static WindowingStrategy<Object, GlobalWindow> globalDefault() {
    * {@link DefaultTrigger}.
    */
   public static <T, W extends BoundedWindow> WindowingStrategy<T, W> of(WindowFn<T, W> windowFn) {
+    try {
+      windowFn.windowCoder().verifyDeterministic();
+    } catch (NonDeterministicException e) {
+      throw new IllegalArgumentException("Window coders must be deterministic.", e);
+    }
+
     ExecutableTrigger<W> defaultTrigger = ExecutableTrigger.create(DefaultTrigger.<W>of());
     return new WindowingStrategy<>(
         windowFn, defaultTrigger, AccumulationMode.DISCARDING_FIRED_PANES, null);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java
index c20c64b6a792c..00e7f1b3a60f0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java
@@ -18,17 +18,24 @@
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.when;
 
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
+import org.hamcrest.Matchers;
 import org.joda.time.Duration;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
+import org.mockito.Mockito;
 
 /**
  * Tests for {@link Window}.
@@ -36,6 +43,9 @@
 @RunWith(JUnit4.class)
 public class WindowTest {
 
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
   @Test
   public void testBasicWindowIntoSettings() {
     WindowingStrategy<?, ?> strategy = TestPipeline.create()
@@ -80,4 +90,21 @@ public void testWindowGetName() {
     assertEquals("Window.Into(FixedWindows, DefaultTrigger, DISCARDING_FIRED_PANES)",
         Window.<String>into(FixedWindows.of(Duration.standardMinutes(10))).getName());
   }
+
+  @Test
+  public void testNonDeterministicWindowing() throws NonDeterministicException {
+    FixedWindows mockWindowFn = Mockito.mock(FixedWindows.class);
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    Class<Coder<IntervalWindow>> coderClazz = (Class) Coder.class;
+    Coder<IntervalWindow> mockCoder = Mockito.mock(coderClazz);
+    when(mockWindowFn.windowCoder()).thenReturn(mockCoder);
+    NonDeterministicException toBeThrown =
+        new NonDeterministicException(mockCoder, "Its just not deterministic.");
+    Mockito.doThrow(toBeThrown).when(mockCoder).verifyDeterministic();
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectCause(Matchers.sameInstance(toBeThrown));
+    thrown.expectMessage("Window coders must be deterministic");
+    Window.into(mockWindowFn);
+  }
 }

From fdb8a043e9773bd4e770b01ff85a2e7a021521e0 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 30 Jun 2015 10:39:57 -0700
Subject: [PATCH 0688/1541] Trigger-defined "continuation" behavior

Rather than returning to the default trigger after a GroupByKey, the
system will switch to a "Continuation Trigger" which is defined by the
original trigger. This attempts to preserve the original intention
regarding handling of speculative and late triggerings.

Also adds a synchronized-processing time timer and trigger to ensure
that aligned AfterProcessingTime triggers are grouped together.

----Release Notes----
After a GroupByKey, the system will switch to a "Continuation Trigger"
which attempts to preserve the original intention regarding handling
of speculative and late triggerings rather than returning to the
default trigger.
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97240819
---
 .../dataflow/sdk/transforms/GroupByKey.java   |   7 +-
 .../sdk/transforms/windowing/AfterAll.java    |   5 +
 .../sdk/transforms/windowing/AfterEach.java   |   5 +
 .../sdk/transforms/windowing/AfterFirst.java  |   7 +-
 .../sdk/transforms/windowing/AfterPane.java   |  23 ++++
 .../windowing/AfterProcessingTime.java        |  28 +++++
 .../AfterSynchronizedProcessingTime.java      | 112 ++++++++++++++++++
 .../transforms/windowing/AfterWatermark.java  |  55 +++++++++
 .../transforms/windowing/DefaultTrigger.java  |   7 ++
 .../sdk/transforms/windowing/Repeatedly.java  |   6 +
 .../sdk/transforms/windowing/TimeTrigger.java |  34 ++++--
 .../sdk/transforms/windowing/Trigger.java     |  62 ++++++++++
 .../dataflow/sdk/util/BatchTimerManager.java  |  12 +-
 .../util/StreamingModeExecutionContext.java   |  10 +-
 .../cloud/dataflow/sdk/util/TimerManager.java |  11 +-
 .../transforms/windowing/AfterAllTest.java    |  10 ++
 .../transforms/windowing/AfterEachTest.java   |  12 ++
 .../transforms/windowing/AfterFirstTest.java  |  10 ++
 .../transforms/windowing/AfterPaneTest.java   |  10 ++
 .../windowing/AfterProcessingTimeTest.java    |   9 ++
 .../AfterSynchronizedProcessingTimeTest.java  | 111 +++++++++++++++++
 .../windowing/AfterWatermarkTest.java         |  19 +++
 .../windowing/DefaultTriggerTest.java         |   6 +
 .../transforms/windowing/RepeatedlyTest.java  |  12 ++
 .../transforms/windowing/TimeTriggerTest.java |  74 ++++++++++--
 .../sdk/transforms/windowing/TriggerTest.java |  18 ++-
 .../sdk/util/ExecutableTriggerTest.java       |   7 ++
 27 files changed, 647 insertions(+), 35 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index dcf094211f31d..df55c74deace3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -416,8 +416,11 @@ public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
             "WindowFn has already been consumed by previous GroupByKey", newWindowFn);
       }
 
-      // We also return to the default trigger.
-      WindowingStrategy<?, ?> newWindowingStrategy = WindowingStrategy.of(newWindowFn);
+      // We also switch to the continuation trigger associated with the current trigger.
+      WindowingStrategy<?, ?> newWindowingStrategy = oldWindowingStrategy
+          .withWindowFn(newWindowFn)
+          .withTrigger(oldWindowingStrategy.getTrigger().getSpec().getContinuationTrigger());
+
       return PCollection.<KV<K, Iterable<V>>>createPrimitiveOutputInternal(
           input.getPipeline(), newWindowingStrategy, input.isBounded());
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
index 4307268ae339d..e095fab83ed40 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
@@ -109,4 +109,9 @@ public Instant getWatermarkThatGuaranteesFiring(W window) {
     }
     return deadline;
   }
+
+  @Override
+  public OnceTrigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
+    return new AfterAll<W>(continuationTriggers);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index 06f2d1aef56bf..da3ca9228acef 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -130,4 +130,9 @@ public Instant getWatermarkThatGuaranteesFiring(W window) {
     // fires at least once.
     return subTriggers.get(0).getWatermarkThatGuaranteesFiring(window);
   }
+
+  @Override
+  public Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
+    return Repeatedly.forever(new AfterFirst<W>(continuationTriggers));
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
index 0b81223e5083d..b9d87cc37df9c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
@@ -38,7 +38,7 @@ public class AfterFirst<W extends BoundedWindow> extends OnceTrigger<W> {
 
   private static final long serialVersionUID = 0L;
 
-  private AfterFirst(List<Trigger<W>> subTriggers) {
+  AfterFirst(List<Trigger<W>> subTriggers) {
     super(subTriggers);
     Preconditions.checkArgument(subTriggers.size() > 1);
   }
@@ -101,4 +101,9 @@ public Instant getWatermarkThatGuaranteesFiring(W window) {
     }
     return deadline;
   }
+
+  @Override
+  public OnceTrigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
+    return new AfterFirst<W>(continuationTriggers);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index c7413b84a72aa..f2dd28616b930 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -23,6 +23,7 @@
 
 import org.joda.time.Instant;
 
+import java.util.List;
 import java.util.Map.Entry;
 
 /**
@@ -110,4 +111,26 @@ public boolean isCompatible(Trigger<?> other) {
   public Instant getWatermarkThatGuaranteesFiring(W window) {
     return BoundedWindow.TIMESTAMP_MAX_VALUE;
   }
+
+  @Override
+  public OnceTrigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
+    return AfterPane.elementCountAtLeast(1);
+  }
+
+  @Override
+  public String toString() {
+    return "AfterPane.elementCountAtLeast(" + countElems + ")";
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    }
+    if (!(obj instanceof AfterPane)) {
+      return false;
+    }
+    AfterPane<?> that = (AfterPane<?>) obj;
+    return this.countElems == that.countElems;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 0c8e36c30df4a..4fca8c338ee0d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.common.base.Objects;
 
 import org.joda.time.Instant;
 
@@ -112,4 +113,31 @@ public void clear(TriggerContext<W> c, W window) throws Exception {
   public Instant getWatermarkThatGuaranteesFiring(W window) {
     return BoundedWindow.TIMESTAMP_MAX_VALUE;
   }
+
+  @Override
+  protected Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
+    return new AfterSynchronizedProcessingTime<W>();
+  }
+
+  @Override
+  public String toString() {
+    return "AfterProcessingTime.pastFirstElementInPane(" + timestampMappers + ")";
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    }
+    if (!(obj instanceof AfterProcessingTime)) {
+      return false;
+    }
+    AfterProcessingTime<?> that = (AfterProcessingTime<?>) obj;
+    return Objects.equal(this.timestampMappers, that.timestampMappers);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(this.timestampMappers);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
new file mode 100644
index 0000000000000..4231d56a81f8f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import com.google.cloud.dataflow.sdk.coders.InstantCoder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
+import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
+import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.common.base.Objects;
+
+import org.joda.time.Instant;
+
+import java.util.List;
+
+class AfterSynchronizedProcessingTime<W extends BoundedWindow> extends OnceTrigger<W> {
+
+  private static final long serialVersionUID = 0L;
+
+  private static final CodedTupleTag<Instant> DELAYED_UNTIL_TAG =
+      CodedTupleTag.of("delayed-until", InstantCoder.of());
+
+  public AfterSynchronizedProcessingTime() {
+    super(null);
+  }
+
+  @Override
+  public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e)
+      throws Exception {
+    Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, e.window());
+    if (delayUntil == null) {
+      delayUntil = c.currentProcessingTime();
+      c.setTimer(e.window(), delayUntil, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+      c.store(DELAYED_UNTIL_TAG, e.window(), delayUntil);
+    }
+
+    return TriggerResult.CONTINUE;
+  }
+
+  @Override
+  public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+    // If the processing time timer has fired in any of the windows being merged, it would have
+    // fired at the same point if it had been added to the merged window. So, we just report it as
+    // finished.
+    if (e.finishedInAnyMergingWindow(c.current())) {
+      return MergeResult.ALREADY_FINISHED;
+    }
+
+    // Otherwise, determine the earliest delay for all of the windows, and delay to that point.
+    Instant earliestTimer = BoundedWindow.TIMESTAMP_MAX_VALUE;
+    for (Instant delayedUntil : c.lookup(DELAYED_UNTIL_TAG, e.oldWindows()).values()) {
+      if (delayedUntil != null && delayedUntil.isBefore(earliestTimer)) {
+        earliestTimer = delayedUntil;
+      }
+    }
+
+    if (earliestTimer != null) {
+      c.store(DELAYED_UNTIL_TAG, e.newWindow(), earliestTimer);
+      c.setTimer(e.newWindow(), earliestTimer, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+    }
+
+    return MergeResult.CONTINUE;
+  }
+
+  @Override
+  public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+    return TriggerResult.FIRE_AND_FINISH;
+  }
+
+  @Override
+  public void clear(TriggerContext<W> c, W window) throws Exception {
+    c.remove(DELAYED_UNTIL_TAG, window);
+    c.deleteTimer(window, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+  }
+
+  @Override
+  public Instant getWatermarkThatGuaranteesFiring(W window) {
+    return BoundedWindow.TIMESTAMP_MAX_VALUE;
+  }
+
+  @Override
+  protected Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
+    return this;
+  }
+
+  @Override
+  public String toString() {
+    return "AfterSynchronizedProcessingTime.pastFirstElementInPane()";
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    return this == obj || obj instanceof AfterSynchronizedProcessingTime;
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(AfterSynchronizedProcessingTime.class);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 907b9e08ddd98..2388735289a60 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.common.base.Objects;
 
 import org.joda.time.Instant;
 
@@ -150,6 +151,33 @@ protected AfterWatermark<W> newWith(
         List<SerializableFunction<Instant, Instant>> transforms) {
       return new FromFirstElementInPane<W>(transforms);
     }
+
+    @Override
+    public OnceTrigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
+      return this;
+    }
+
+    @Override
+    public String toString() {
+      return "AfterWatermark.pastFirstElementInPane(" + timestampMappers + ")";
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) {
+        return true;
+      }
+      if (!(obj instanceof FromFirstElementInPane)) {
+        return false;
+      }
+      FromFirstElementInPane<?> that = (FromFirstElementInPane<?>) obj;
+      return Objects.equal(this.timestampMappers, that.timestampMappers);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(timestampMappers);
+    }
   }
 
   private static class FromEndOfWindow<W extends BoundedWindow> extends AfterWatermark<W> {
@@ -205,5 +233,32 @@ protected AfterWatermark<W> newWith(
         List<SerializableFunction<Instant, Instant>> transforms) {
       return new FromEndOfWindow<>(transforms);
     }
+
+    @Override
+    public OnceTrigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
+      return this;
+    }
+
+    @Override
+    public String toString() {
+      return "AfterWatermark.pastEndOfWindow(" + timestampMappers + ")";
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) {
+        return true;
+      }
+      if (!(obj instanceof FromEndOfWindow)) {
+        return false;
+      }
+      FromEndOfWindow<?> that = (FromEndOfWindow<?>) obj;
+      return Objects.equal(this.timestampMappers, that.timestampMappers);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(timestampMappers);
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
index 9cea2d19fae0b..8bae32fdd6b81 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
@@ -21,6 +21,8 @@
 
 import org.joda.time.Instant;
 
+import java.util.List;
+
 /**
  * A trigger that is equivalent to {@code Repeatedly.forever(AfterWatermark.pastEndOfWindow())}.
  * See {@link Repeatedly#forever} and {@link AfterWatermark#pastEndOfWindow} for more details.
@@ -75,4 +77,9 @@ public boolean isCompatible(Trigger<?> other) {
     // Semantically, all default triggers are identical
     return other instanceof DefaultTrigger;
   }
+
+  @Override
+  public Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
+    return this;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
index ec131b54fb8b0..c035d88a23376 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
@@ -19,6 +19,7 @@
 import org.joda.time.Instant;
 
 import java.util.Arrays;
+import java.util.List;
 
 /**
  * Repeat a trigger, either until some condition is met or forever.
@@ -90,4 +91,9 @@ public Instant getWatermarkThatGuaranteesFiring(W window) {
     // This trigger fires once the repeated trigger fires.
     return subTriggers.get(REPEATED).getWatermarkThatGuaranteesFiring(window);
   }
+
+  @Override
+  public Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
+    return new Repeatedly<W>(continuationTriggers.get(REPEATED));
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
index 0ad0505a9a300..996a3d922bb18 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
@@ -19,7 +19,6 @@
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ImmutableList;
 
 import org.joda.time.Duration;
@@ -32,6 +31,7 @@
  *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used.
  * @param <T> {@code TimeTrigger} subclass produced by modifying the current {@code TimeTrigger}.
+ *     Typically, this is the self type.
  */
 @Experimental(Experimental.Kind.TRIGGER)
 public abstract class TimeTrigger<W extends BoundedWindow, T extends TimeTrigger<W, T>>
@@ -44,7 +44,12 @@ public abstract class TimeTrigger<W extends BoundedWindow, T extends TimeTrigger
     IDENTITY = ImmutableList.<SerializableFunction<Instant, Instant>>of();
   }
 
-  private final List<SerializableFunction<Instant, Instant>> timestampMappers;
+  /**
+   * A list of timestampMappers m1, m2, m3, ... m_n considered to be composed in sequence. The
+   * overall mapping for an instance `instance` is `m_n(... m3(m2(m1(instant))`,
+   * implemented via #computeTargetTimestamp
+   */
+  protected final List<SerializableFunction<Instant, Instant>> timestampMappers;
 
   protected TimeTrigger(
       List<SerializableFunction<Instant, Instant>> timestampMappers) {
@@ -67,14 +72,18 @@ protected Instant computeTargetTimestamp(Instant time) {
    * @return An updated time trigger that will wait the additional time before firing.
    */
   public T plusDelayOf(final Duration delay) {
-    return newWith(new SerializableFunction<Instant, Instant>() {
+    return newWith(delayFn(delay));
+  }
+
+  private static SerializableFunction<Instant, Instant> delayFn(final Duration delay) {
+    return new SerializableFunction<Instant, Instant>() {
       private static final long serialVersionUID = 0L;
 
       @Override
       public Instant apply(Instant input) {
         return input.plus(delay);
       }
-    });
+    };
   }
 
   /**
@@ -85,14 +94,20 @@ public Instant apply(Instant input) {
    * CalendarWindows.
    */
   public T alignedTo(final Duration size, final Instant offset) {
-    return newWith(new SerializableFunction<Instant, Instant>() {
+    return newWith(alignFn(size, offset));
+  }
+
+  private static SerializableFunction<Instant, Instant> alignFn(
+      final Duration size, final Instant offset) {
+    return new SerializableFunction<Instant, Instant>() {
       private static final long serialVersionUID = 0L;
 
       @Override
       public Instant apply(Instant point) {
-        return alignedTo(point, size, offset);
+        long millisSinceStart = new Duration(offset, point).getMillis() % size.getMillis();
+        return millisSinceStart == 0 ? point : point.plus(size).minus(millisSinceStart);
       }
-    });
+    };
   }
 
   /**
@@ -103,11 +118,6 @@ public T alignedTo(final Duration size) {
     return alignedTo(size, new Instant(0));
   }
 
-  @VisibleForTesting static Instant alignedTo(Instant point, Duration size, Instant offset) {
-    long millisSinceStart = new Duration(offset, point).getMillis() % size.getMillis();
-    return millisSinceStart == 0 ? point : point.plus(size).minus(millisSinceStart);
-  }
-
   /**
    * Adjust the time at which the trigger will fire.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index ea57bab423680..2ced1db1a90bd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Joiner;
+import com.google.common.base.Objects;
 import com.google.common.base.Predicate;
 import com.google.common.collect.Maps;
 
@@ -29,6 +30,7 @@
 
 import java.io.IOException;
 import java.io.Serializable;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.BitSet;
 import java.util.List;
@@ -470,6 +472,31 @@ public Iterable<Trigger<W>> subTriggers() {
     return subTriggers;
   }
 
+  /**
+   * Return a trigger to use after a {@code GroupByKey} to preserve the
+   * intention of this trigger. Specifically, triggers that are time based
+   * and intended to provide speculative results should continue providing
+   * speculative results. Triggers that fire once (or multiple times) should
+   * continue firing once (or multiple times).
+   */
+  public Trigger<W> getContinuationTrigger() {
+    if (subTriggers == null) {
+      return getContinuationTrigger(null);
+    }
+
+    List<Trigger<W>> subTriggerContinuations = new ArrayList<>();
+    for (Trigger<W> subTrigger : subTriggers) {
+      subTriggerContinuations.add(subTrigger.getContinuationTrigger());
+    }
+    return getContinuationTrigger(subTriggerContinuations);
+  }
+
+  /**
+   * Return the {@link #getContinuationTrigger} of this {@code Trigger}. For convenience, this
+   * is provided the continuation trigger of each of the sub-triggers.
+   */
+  protected abstract Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers);
+
   /**
    * Returns a bound in watermark time by which this trigger would have fired at least once
    * for a given window had there been input data.  This is a static property of a trigger
@@ -521,6 +548,25 @@ public String toString() {
     }
   }
 
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    }
+    if (!(obj instanceof Trigger)) {
+      return false;
+    }
+    @SuppressWarnings("unchecked")
+    Trigger<W> that = (Trigger<W>) obj;
+    return Objects.equal(getClass(), that.getClass())
+        && Objects.equal(subTriggers, that.subTriggers);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(getClass(), subTriggers);
+  }
+
   /**
    * Identifies a unique {@link Trigger} instance, by the window it is in and the identifier of the
    * trigger within the trigger tree.
@@ -582,6 +628,15 @@ public abstract static class OnceTrigger<W extends BoundedWindow> extends Trigge
     protected OnceTrigger(List<Trigger<W>> subTriggers) {
       super(subTriggers);
     }
+
+    @Override
+    public final OnceTrigger<W> getContinuationTrigger() {
+      Trigger<W> continuation = super.getContinuationTrigger();
+      if (!(continuation instanceof OnceTrigger)) {
+        throw new IllegalStateException("Continuation of a OnceTrigger must be a OnceTrigger");
+      }
+      return (OnceTrigger<W>) continuation;
+    }
   }
 
   /**
@@ -642,5 +697,12 @@ public Instant getWatermarkThatGuaranteesFiring(W window) {
       Instant untilDeadline = subTriggers.get(UNTIL).getWatermarkThatGuaranteesFiring(window);
       return actualDeadline.isBefore(untilDeadline) ? actualDeadline : untilDeadline;
     }
+
+    @Override
+    public Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
+      return new OrFinallyTrigger<W>(
+          continuationTriggers.get(ACTUAL),
+          (OnceTrigger<W>) continuationTriggers.get(UNTIL));
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
index cbcd4387389af..fccdf05e475a1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
@@ -17,7 +17,6 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 
 import org.joda.time.Instant;
 
@@ -45,8 +44,14 @@ private PriorityQueue<BatchTimerManager.BatchTimer> queue(TimerManager.TimeDomai
   }
 
   private Map<String, BatchTimer> map(TimeDomain domain) {
-    return TimeDomain.EVENT_TIME.equals(domain)
-        ? watermarkTagToTimer : processingTagToTimer;
+    switch (domain) {
+      case EVENT_TIME: return watermarkTagToTimer;
+      case PROCESSING_TIME: case SYNCHRONIZED_PROCESSING_TIME:
+        // Batch fires timers in order, and only starts a stage after the previous stage is done.
+        // As a result, SYNCHRONIZED_PROCESSING_TIME is the same as PROCESSING_TIME.
+        return processingTagToTimer;
+    }
+    throw new IllegalArgumentException("Unrecognized TimeDomain: " + domain);
   }
 
   public BatchTimerManager(Instant processingTime) {
@@ -174,6 +179,5 @@ public boolean equals(Object other) {
       }
       return false;
     }
-
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index 09bcd4f328183..8d4c08dce3ea2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -121,9 +121,13 @@ public Instant currentWatermarkTime() {
   }
 
   private Windmill.Timer.Type timerType(TimeDomain domain) {
-    return domain == TimeDomain.EVENT_TIME
-        ? Windmill.Timer.Type.WATERMARK
-        : Windmill.Timer.Type.REALTIME;
+    switch (domain) {
+      case EVENT_TIME: return Windmill.Timer.Type.WATERMARK;
+      case PROCESSING_TIME: return Windmill.Timer.Type.REALTIME;
+      case SYNCHRONIZED_PROCESSING_TIME: return Windmill.Timer.Type.DEPENDENT_REALTIME;
+      default:
+        throw new IllegalArgumentException("Unrecgonized TimeDomain: " + domain);
+    }
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java
index 537a07c2b8da1..3b3665a60ee76 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java
@@ -39,9 +39,16 @@ public enum TimeDomain {
 
     /**
      * The {@code PROCESSING_TIME} domain corresponds to the current to the current (system) time.
-     * This is advanced during exeuction of the Dataflow pipeline.
+     * This is advanced during execution of the Dataflow pipeline.
      */
-    PROCESSING_TIME;
+    PROCESSING_TIME,
+
+    /**
+     * Same as the {@code PROCESSING_TIME} domain, except it won't fire a timer set for time
+     * {@code T} until all timers from earlier stages set for a time earlier than {@code T} have
+     * fired.
+     */
+    SYNCHRONIZED_PROCESSING_TIME;
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index 60caf91b2a042..ce1c5a05b031f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -250,4 +250,14 @@ public void testAfterAllRealTriggersFixedWindow() throws Exception {
     assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         tester.finishedSet(window)));
   }
+
+  @Test
+  public void testContinuation() throws Exception {
+    OnceTrigger<IntervalWindow> trigger1 = AfterProcessingTime.pastFirstElementInPane();
+    OnceTrigger<IntervalWindow> trigger2 = AfterWatermark.pastEndOfWindow();
+    Trigger<IntervalWindow> afterAll = AfterAll.of(trigger1, trigger2);
+    assertEquals(
+        AfterAll.of(trigger1.getContinuationTrigger(), trigger2.getContinuationTrigger()),
+        afterAll.getContinuationTrigger());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index b00308c2f5447..3f48cf6c5c508 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -27,6 +27,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
@@ -256,4 +257,15 @@ public void testSequenceRealTriggersFixedWindow() throws Exception {
     assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(50)))));
   }
+
+  @Test
+  public void testContinuation() throws Exception {
+    OnceTrigger<IntervalWindow> trigger1 = AfterProcessingTime.pastFirstElementInPane();
+    OnceTrigger<IntervalWindow> trigger2 = AfterWatermark.pastEndOfWindow();
+    Trigger<IntervalWindow> afterEach = AfterEach.inOrder(trigger1, trigger2);
+    assertEquals(
+        Repeatedly.forever(AfterFirst.of(
+            trigger1.getContinuationTrigger(), trigger2.getContinuationTrigger())),
+        afterEach.getContinuationTrigger());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index a159496bfeb35..eec660d8f06b1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -243,4 +243,14 @@ public void testAfterFirstRealTriggersFixedWindow() throws Exception {
     // persisted keyed state.
     assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
   }
+
+  @Test
+  public void testContinuation() throws Exception {
+    OnceTrigger<IntervalWindow> trigger1 = AfterProcessingTime.pastFirstElementInPane();
+    OnceTrigger<IntervalWindow> trigger2 = AfterWatermark.pastEndOfWindow();
+    Trigger<IntervalWindow> afterFirst = AfterFirst.of(trigger1, trigger2);
+    assertEquals(
+        AfterFirst.of(trigger1.getContinuationTrigger(), trigger2.getContinuationTrigger()),
+        afterFirst.getContinuationTrigger());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
index b2e5e1ae27d9f..f1fcf49aa5e4d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
@@ -130,4 +130,14 @@ public void testFireDeadline() throws Exception {
         AfterPane.elementCountAtLeast(1).getWatermarkThatGuaranteesFiring(
             new IntervalWindow(new Instant(0), new Instant(10))));
   }
+
+  @Test
+  public void testContinuation() throws Exception {
+    assertEquals(
+        AfterPane.elementCountAtLeast(1),
+        AfterPane.elementCountAtLeast(100).getContinuationTrigger());
+    assertEquals(
+        AfterPane.elementCountAtLeast(1),
+        AfterPane.elementCountAtLeast(100).getContinuationTrigger().getContinuationTrigger());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
index 4bbcd7ccf379d..e3dc0277c792f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
@@ -119,4 +119,13 @@ public void testFireDeadline() throws Exception {
         AfterProcessingTime.pastFirstElementInPane().getWatermarkThatGuaranteesFiring(
             new IntervalWindow(new Instant(0), new Instant(10))));
   }
+
+  @Test
+  public void testContinuation() throws Exception {
+    AfterProcessingTime<?> firstElementPlus1 =
+        AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.standardHours(1));
+    assertEquals(
+        new AfterSynchronizedProcessingTime<>(),
+        firstElementPlus1.getContinuationTrigger());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java
new file mode 100644
index 0000000000000..88afa5716431d
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.WindowMatchers;
+import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests the {@link AfterSynchronizedProcessingTime}.
+ */
+@RunWith(JUnit4.class)
+public class AfterSynchronizedProcessingTimeTest {
+
+  private Trigger<IntervalWindow> underTest =
+      new AfterSynchronizedProcessingTime<IntervalWindow>();
+
+  @Test
+  public void testAfterProcessingTimeWithFixedWindow() throws Exception {
+    Duration windowDuration = Duration.millis(10);
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        FixedWindows.of(windowDuration), underTest,
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
+
+    tester.advanceProcessingTime(new Instant(10));
+
+    tester.injectElement(1, new Instant(1)); // first in window [0, 10), timer set for 15
+    tester.injectElement(2, new Instant(9));
+    tester.injectElement(3, new Instant(8));
+
+    tester.advanceProcessingTime(new Instant(16));
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10)));
+
+    // This element belongs in the window that has already fired. It should not be re-output because
+    // that trigger (which was one-time) has already gone off.
+    tester.injectElement(6, new Instant(2));
+
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(10)))));
+  }
+
+  @Test
+  public void testAfterProcessingTimeWithMergingWindowAlreadyFired() throws Exception {
+    Duration windowDuration = Duration.millis(10);
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        Sessions.withGapDuration(windowDuration),
+        underTest,
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
+
+    tester.advanceProcessingTime(new Instant(10));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+
+    tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 15
+
+    tester.advanceProcessingTime(new Instant(16));
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.contains(1), 1, 1, 11)));
+
+    // Because we discarded the previous window, we don't have it around to merge with.
+    tester.injectElement(2, new Instant(2)); // in [2, 12), timer for 21
+
+    tester.advanceProcessingTime(new Instant(100));
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.contains(2), 2, 2, 12)));
+
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(2), new Instant(12))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(11))),
+        tester.finishedSet(new IntervalWindow(new Instant(2), new Instant(12)))));
+  }
+
+  @Test
+  public void testFireDeadline() throws Exception {
+    assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
+        underTest.getWatermarkThatGuaranteesFiring(
+            new IntervalWindow(new Instant(0), new Instant(10))));
+  }
+
+  @Test
+  public void testContinuation() throws Exception {
+    assertEquals(underTest, underTest.getContinuationTrigger());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
index c0e8a9fab2466..e057407bc039b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
@@ -169,4 +169,23 @@ public void testFireDeadline() throws Exception {
             .pastEndOfWindow()
             .plusDelayOf(Duration.millis(10)).getWatermarkThatGuaranteesFiring(window));
   }
+
+  @Test
+  public void testContinuation() throws Exception {
+    AfterWatermark<?> endOfWindowPlus1 =
+        AfterWatermark.pastEndOfWindow().plusDelayOf(Duration.standardMinutes(1));
+    assertEquals(endOfWindowPlus1, endOfWindowPlus1.getContinuationTrigger());
+    assertEquals(
+        endOfWindowPlus1,
+        endOfWindowPlus1.getContinuationTrigger().getContinuationTrigger());
+
+    AfterWatermark<?> firstElementAligned =
+        AfterWatermark.pastFirstElementInPane().alignedTo(Duration.standardDays(1));
+    assertEquals(
+        firstElementAligned,
+        firstElementAligned.getContinuationTrigger());
+    assertEquals(
+        firstElementAligned,
+        firstElementAligned.getContinuationTrigger().getContinuationTrigger());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
index 6cbca64060e6d..160af2aade804 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
@@ -158,4 +158,10 @@ public void testFireDeadline() throws Exception {
     assertEquals(GlobalWindow.INSTANCE.maxTimestamp(),
         DefaultTrigger.of().getWatermarkThatGuaranteesFiring(GlobalWindow.INSTANCE));
   }
+
+
+  @Test
+  public void testContinuation() throws Exception {
+    assertEquals(DefaultTrigger.of(), DefaultTrigger.of().getContinuationTrigger());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index 30dd87885856b..e2c2feada04ff 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -176,4 +176,16 @@ public void testFireDeadline() throws Exception {
         .orFinally(AfterPane.elementCountAtLeast(10))
         .getWatermarkThatGuaranteesFiring(window));
   }
+
+  @Test
+  public void testContinuation() throws Exception {
+    Trigger<IntervalWindow> trigger = AfterProcessingTime.pastFirstElementInPane();
+    Trigger<IntervalWindow> repeatedly = Repeatedly.forever(trigger);
+    assertEquals(
+        Repeatedly.forever(trigger.getContinuationTrigger()),
+        repeatedly.getContinuationTrigger());
+    assertEquals(
+        Repeatedly.forever(trigger.getContinuationTrigger().getContinuationTrigger()),
+        repeatedly.getContinuationTrigger().getContinuationTrigger());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
index cd8ba8758437e..ac5f742cea009 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
@@ -18,28 +18,84 @@
 
 import static org.junit.Assert.assertEquals;
 
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.util.Collections;
+import java.util.List;
+
 /**
  * Tests for the various time operations in {@link TimeTrigger}.
  */
 @RunWith(JUnit4.class)
 public class TimeTriggerTest {
-
   @Test
   public void testAlignTo() {
-    assertEquals(new Instant(100),
-        TimeTrigger.alignedTo(new Instant(100), new Duration(10), new Instant(0)));
-    assertEquals(new Instant(110),
-        TimeTrigger.alignedTo(new Instant(105), new Duration(10), new Instant(0)));
-    assertEquals(new Instant(105),
-        TimeTrigger.alignedTo(new Instant(105), new Duration(10), new Instant(5)));
-    assertEquals(new Instant(115),
-        TimeTrigger.alignedTo(new Instant(110), new Duration(10), new Instant(5)));
+    TestTimeTrigger size10 = new TestTimeTrigger().alignedTo(new Duration(10));
+    TestTimeTrigger size10offset5 =
+        new TestTimeTrigger().alignedTo(new Duration(10), new Instant(5));
+
+    assertEquals(new Instant(100), size10.computeTargetTimestamp(new Instant(100)));
+    assertEquals(new Instant(110), size10.computeTargetTimestamp(new Instant(105)));
+    assertEquals(new Instant(105), size10offset5.computeTargetTimestamp(new Instant(105)));
+    assertEquals(new Instant(115), size10offset5.computeTargetTimestamp(new Instant(110)));
   }
 
+  private static class TestTimeTrigger extends TimeTrigger<IntervalWindow, TestTimeTrigger> {
+
+    private static final long serialVersionUID = 0L;
+
+    private TestTimeTrigger() {
+      this(Collections.<SerializableFunction<Instant, Instant>>emptyList());
+    }
+
+    private TestTimeTrigger(List<SerializableFunction<Instant, Instant>> timestampMappers) {
+      super(timestampMappers);
+    }
+
+    @Override
+    public Instant computeTargetTimestamp(Instant time) {
+      return super.computeTargetTimestamp(time);
+    }
+
+    @Override
+    protected TestTimeTrigger newWith(List<SerializableFunction<Instant, Instant>> transform) {
+      return new TestTimeTrigger(transform);
+    }
+
+    @Override
+    public TriggerResult onElement(
+        TriggerContext<IntervalWindow> c, OnElementEvent<IntervalWindow> e) throws Exception {
+      return null;
+    }
+
+    @Override
+    public MergeResult onMerge(
+        TriggerContext<IntervalWindow> c, OnMergeEvent<IntervalWindow> e) throws Exception {
+      return null;
+    }
+
+    @Override
+    public TriggerResult onTimer(
+        TriggerContext<IntervalWindow> c, OnTimerEvent<IntervalWindow> e) throws Exception {
+      return null;
+    }
+
+    @Override
+    protected Trigger<IntervalWindow> getContinuationTrigger(
+        List<Trigger<IntervalWindow>> continuationTriggers) {
+      return null;
+    }
+
+    @Override
+    public Instant getWatermarkThatGuaranteesFiring(IntervalWindow window) {
+      return null;
+    }
+
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
index 56ab9b1852439..ca94a521c1831 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
@@ -338,8 +338,22 @@ public void testOrFinallyRealTriggersFixedWindow() throws Exception {
 
   @Test
   public void testTriggerToString() throws Exception {
-    assertEquals("AfterWatermark.FromEndOfWindow", AfterWatermark.pastEndOfWindow().toString());
-    assertEquals("Repeatedly(AfterWatermark.FromEndOfWindow)",
+    assertEquals("AfterWatermark.pastEndOfWindow([])", AfterWatermark.pastEndOfWindow().toString());
+    assertEquals("Repeatedly(AfterWatermark.pastEndOfWindow([]))",
         Repeatedly.forever(AfterWatermark.pastEndOfWindow()).toString());
   }
+
+  @Test
+  public void testContinuation() throws Exception {
+    OnceTrigger<IntervalWindow> triggerA = AfterProcessingTime.pastFirstElementInPane();
+    OnceTrigger<IntervalWindow> triggerB = AfterWatermark.pastEndOfWindow();
+    Trigger<IntervalWindow> aOrFinallyB = triggerA.orFinally(triggerB);
+    Trigger<IntervalWindow> bOrFinallyA = triggerB.orFinally(triggerA);
+    assertEquals(
+        triggerA.getContinuationTrigger().orFinally(triggerB.getContinuationTrigger()),
+        aOrFinallyB.getContinuationTrigger());
+    assertEquals(
+        triggerB.getContinuationTrigger().orFinally(triggerA.getContinuationTrigger()),
+        bOrFinallyA.getContinuationTrigger());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
index bd55faedfe19e..dcaa20772dbbd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
@@ -29,6 +29,7 @@
 import org.junit.runners.JUnit4;
 
 import java.util.Arrays;
+import java.util.List;
 
 /**
  * Tests for {@link ExecutableTrigger}.
@@ -129,5 +130,11 @@ public Instant getWatermarkThatGuaranteesFiring(IntervalWindow window) {
     public boolean isCompatible(Trigger<?> other) {
       return false;
     }
+
+    @Override
+    public Trigger<IntervalWindow> getContinuationTrigger(
+        List<Trigger<IntervalWindow>> continuationTriggers) {
+      return this;
+    }
   }
 }

From 238866ed32d7663d0c2cff8210a23123b10085eb Mon Sep 17 00:00:00 2001
From: ccy <ccy@google.com>
Date: Tue, 30 Jun 2015 10:58:53 -0700
Subject: [PATCH 0689/1541] Update option enabling Cloud Debugger for Dataflow
 jobs

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97242870
---
 .../sdk/options/CloudDebuggerOptions.java     |  4 ++--
 .../runners/DataflowPipelineTranslator.java   | 13 ------------
 .../DataflowPipelineTranslatorTest.java       | 20 -------------------
 3 files changed, 2 insertions(+), 35 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
index 4656818acb6d7..44d0647cef5bf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
@@ -29,7 +29,7 @@ public interface CloudDebuggerOptions {
    * Whether to enable the Cloud Debugger snapshot agent for the current job.
    */
   @Description("Whether to enable the Cloud Debugger snapshot agent for the current job.")
-  boolean isCloudDebuggerEnabled();
-  void setCloudDebuggerEnabled(boolean enabled);
+  boolean getEnableCloudDebugger();
+  void setEnableCloudDebugger(boolean enabled);
 }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 52836580b7126..8c0ce73332607 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -344,9 +344,6 @@ public void addCollectionToSingletonOutput(String name,
    * Translates a Pipeline into the Dataflow representation.
    */
   class Translator implements PipelineVisitor, TranslationContext {
-    /** Metadata key to enable cloud debugger. */
-    static final String ENABLE_CLOUD_DEBUGGER_METADATA_KEY = "enableDebugger";
-
     /** The Pipeline to translate. */
     private final Pipeline pipeline;
 
@@ -413,16 +410,6 @@ public Job translate(List<DataflowPackage> packages) {
         workerPool.setTeardownPolicy(options.getTeardownPolicy().getTeardownPolicyName());
       }
 
-      // Config Cloud Debugger
-      if (options.isCloudDebuggerEnabled()) {
-        Map<String, String> metadata = workerPool.getMetadata();
-        if (metadata == null) {
-          metadata = new HashMap<String, String>();
-        }
-        metadata.put(ENABLE_CLOUD_DEBUGGER_METADATA_KEY, "true");
-        workerPool.setMetadata(metadata);
-      }
-
       if (options.isStreaming()) {
         job.setType("JOB_TYPE_STREAMING");
       } else {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 57a9485558e89..746ef5c51e0f2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -244,26 +244,6 @@ public void testWorkerMachineTypeConfig() throws IOException {
     assertEquals(testMachineType, workerPool.getMachineType());
   }
 
-  @Test
-  public void testDebuggerConfig() throws IOException {
-    DataflowPipelineOptions options = buildPipelineOptions();
-    options.setCloudDebuggerEnabled(true);
-    String expectedConfig = "true";
-
-    Pipeline p = buildPipeline(options);
-    p.traverseTopologically(new RecordingPipelineVisitor());
-    Job job =
-        DataflowPipelineTranslator.fromOptions(options)
-            .translate(p, Collections.<DataflowPackage>emptyList())
-            .getJob();
-
-    for (WorkerPool pool : job.getEnvironment().getWorkerPools()) {
-      if ("harness".equals(pool.getKind())) {
-        assertEquals(pool.getMetadata().get("enableDebugger"), expectedConfig);
-      }
-    }
-  }
-
   @Test
   public void testDiskSizeGbConfig() throws IOException {
     final Integer diskSizeGb = 1234;

From d193aab2899c24c91179587aaaa316515400e654 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 30 Jun 2015 13:02:08 -0700
Subject: [PATCH 0690/1541] Remove unnecessary dependencies

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97255678
---
 examples/pom.xml | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index 58372915e2ea7..828b0bf481dd6 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -298,13 +298,6 @@
     </dependency>
 
     <!-- test dependencies -->
-    <dependency>
-      <groupId>com.google.cloud.dataflow</groupId>
-      <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
 
     <dependency>
       <groupId>org.hamcrest</groupId>

From 0d5f20dfb3a0ce47cb9673c64eb93f76fbbae721 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Tue, 30 Jun 2015 13:20:19 -0700
Subject: [PATCH 0691/1541] Internal testing change

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97257395
---
 .../sdk/options/DataflowPipelineDebugOptions.java      |  7 +++++++
 .../com/google/cloud/dataflow/sdk/util/GcsStager.java  | 10 ++++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
index 749c795165f4a..c8ef34401f2e9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
@@ -177,6 +177,13 @@ public Dataflow create(PipelineOptions options) {
   String getPubsubRootUrl();
   void setPubsubRootUrl(String value);
 
+  /**
+   * Custom windmill_main binary to use with the streaming runner.
+   */
+  @Description("Custom windmill_main binary to use with the streaming runner")
+  String getOverrideWindmillBinary();
+  void setOverrideWindmillBinary(String value);
+
   /**
    * Creates a {@link PathValidator} object using the class specified in
    * {@link #getPathValidatorClass()}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsStager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsStager.java
index c51e064dee63a..4219bc4269fbf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsStager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsStager.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.common.base.Preconditions;
@@ -40,8 +41,13 @@ public static GcsStager fromOptions(PipelineOptions options) {
   @Override
   public List<DataflowPackage> stageFiles() {
     Preconditions.checkNotNull(options.getStagingLocation());
+    List<String> filesToStage = options.getFilesToStage();
+    String windmillBinary =
+        options.as(DataflowPipelineDebugOptions.class).getOverrideWindmillBinary();
+    if (windmillBinary != null) {
+      filesToStage.add("windmill_main=" + windmillBinary);
+    }
     return PackageUtil.stageClasspathElements(
-        options.getFilesToStage(),
-        options.getStagingLocation());
+        options.getFilesToStage(), options.getStagingLocation());
   }
 }

From 85aed2fd0d938d82b2c250d51a65cdbc6f943f46 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Tue, 30 Jun 2015 14:15:54 -0700
Subject: [PATCH 0692/1541] Enable group also by window combiner lifting

Benchmark improvements:
[Streaming, FixedWindows, Count.Globally] throughput 50% higher.
[Streaming, SlidingWindows, Count.Globally] throughput 250% higher.
[Batch, SlidingWindows, Count.PerKey] run time reduced by 50%.

----Release Notes----
Improve Combine performance by lifting it into GroupAlsoByWindowsParDo.
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97263096
---
 .../cloud/dataflow/sdk/runners/DataflowPipelineRunner.java      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index a4a84900c0245..c98c693f56875 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -112,7 +112,7 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
   private DataflowPipelineRunnerHooks hooks;
 
   // Environment version information
-  private static final String ENVIRONMENT_MAJOR_VERSION = "2";
+  private static final String ENVIRONMENT_MAJOR_VERSION = "3";
 
   /**
    * Construct a runner from the provided options.

From e6eff59df42c4d3ed5ccc13c408b4bed6ff31d85 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 30 Jun 2015 16:48:38 -0700
Subject: [PATCH 0693/1541] Update GroupAlsoByWindowsAndCombineDoFn to support
 merging windows

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97277659
---
 .../GroupAlsoByWindowsAndCombineDoFn.java     | 53 +++++++++++++++++--
 .../sdk/util/GroupAlsoByWindowsDoFn.java      | 23 ++++----
 2 files changed, 62 insertions(+), 14 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
index 9c5473dc190b7..d1e5d921e5645 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
@@ -21,15 +21,18 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.collect.Maps;
 
 import org.joda.time.Instant;
 
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Comparator;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 import java.util.PriorityQueue;
 
@@ -48,18 +51,20 @@ public class GroupAlsoByWindowsAndCombineDoFn<K, InputT, AccumT, OutputT, W exte
     extends GroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
 
   private final KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn;
+  private WindowFn<Object, W> windowFn;
 
   public GroupAlsoByWindowsAndCombineDoFn(
-      KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
+      WindowFn<Object, W> windowFn, KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
+    this.windowFn = windowFn;
     this.combineFn = combineFn;
   }
 
   @Override
   public void processElement(ProcessContext c) throws Exception {
-    K key = c.element().getKey();
+    final K key = c.element().getKey();
     Iterator<WindowedValue<InputT>> iterator = c.element().getValue().iterator();
 
-    PriorityQueue<W> liveWindows =
+    final PriorityQueue<W> liveWindows =
         new PriorityQueue<>(11, new Comparator<BoundedWindow>() {
           @Override
           public int compare(BoundedWindow w1, BoundedWindow w2) {
@@ -67,8 +72,35 @@ public int compare(BoundedWindow w1, BoundedWindow w2) {
           }
         });
 
-    Map<W, AccumT> accumulators = Maps.newHashMap();
-    Map<W, Instant> minTimestamps = Maps.newHashMap();
+    final Map<W, AccumT> accumulators = Maps.newHashMap();
+    final Map<W, Instant> minTimestamps = Maps.newHashMap();
+
+    WindowFn<Object, W>.MergeContext mergeContext = new CombiningMergeContext() {
+      @Override
+      public Collection<W> windows() {
+        return liveWindows;
+      }
+
+      @Override
+      public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
+        List<AccumT> accumsToBeMerged = new ArrayList<>(toBeMerged.size());
+        Instant minTimestamp = null;
+        for (W window : toBeMerged) {
+          accumsToBeMerged.add(accumulators.remove(window));
+
+          Instant timestampToBeMerged = minTimestamps.remove(window);
+          if (minTimestamp == null
+              || (timestampToBeMerged != null && timestampToBeMerged.isBefore(minTimestamp))) {
+            minTimestamp = timestampToBeMerged;
+          }
+        }
+        liveWindows.removeAll(toBeMerged);
+
+        minTimestamps.put(mergeResult, minTimestamp);
+        liveWindows.add(mergeResult);
+        accumulators.put(mergeResult, combineFn.mergeAccumulators(key, accumsToBeMerged));
+      }
+    };
 
     while (iterator.hasNext()) {
       WindowedValue<InputT> e = iterator.next();
@@ -93,12 +125,16 @@ public int compare(BoundedWindow w1, BoundedWindow w2) {
         accumulators.put(w, accum);
       }
 
+      windowFn.mergeWindows(mergeContext);
+
       while (!liveWindows.isEmpty()
           && liveWindows.peek().maxTimestamp().isBefore(e.getTimestamp())) {
         closeWindow(key, liveWindows.poll(), accumulators, minTimestamps, c);
       }
     }
 
+    // To have gotten here, we've either not had any elements added, or we've only run merge
+    // and then closed windows. We don't need to retry merging.
     while (!liveWindows.isEmpty()) {
       closeWindow(key, liveWindows.poll(), accumulators, minTimestamps, c);
     }
@@ -115,4 +151,11 @@ private void closeWindow(
         Arrays.asList(w),
         PaneInfo.DEFAULT_PANE);
   }
+
+  private abstract class CombiningMergeContext extends WindowFn<Object, W>.MergeContext {
+
+    public CombiningMergeContext() {
+      windowFn.super();
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 1fdae8aa71e52..da729176362f2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -49,6 +49,9 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
    */
   public static <K, V, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W>
   createForIterable(WindowingStrategy<?, W> windowingStrategy, Coder<V> inputCoder) {
+    @SuppressWarnings("unchecked")
+    WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
+
     if (windowingStrategy.getWindowFn().isNonMerging()
         && windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger
         && windowingStrategy.getMode() == AccumulationMode.DISCARDING_FIRED_PANES) {
@@ -56,7 +59,7 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
     }
 
     return new GABWViaOutputBufferDoFn<>(
-        windowingStrategy, new ListOutputBuffer<K, V, W>(inputCoder));
+        noWildcard, new ListOutputBuffer<K, V, W>(inputCoder));
   }
 
   /**
@@ -70,12 +73,15 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
       final Coder<K> keyCoder,
       final Coder<InputT> inputCoder) {
     Preconditions.checkNotNull(combineFn);
-    if (windowingStrategy.getWindowFn().isNonMerging()
-        && windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger
+
+    @SuppressWarnings("unchecked")
+    WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
+
+    if (windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger
         && windowingStrategy.getMode() == AccumulationMode.DISCARDING_FIRED_PANES) {
-      return new GroupAlsoByWindowsAndCombineDoFn<>(combineFn);
+      return new GroupAlsoByWindowsAndCombineDoFn<>(noWildcard.getWindowFn(), combineFn);
     }
-    return new GABWViaOutputBufferDoFn<>(windowingStrategy,
+    return new GABWViaOutputBufferDoFn<>(noWildcard,
         CombiningOutputBuffer.<K, InputT, AccumT, OutputT, W>create(
             combineFn, keyCoder, inputCoder));
   }
@@ -86,11 +92,10 @@ private static class GABWViaOutputBufferDoFn<K, InputT, OutputT, W extends Bound
     private final WindowingStrategy<Object, W> strategy;
     private final OutputBuffer<K, InputT, OutputT, W> outputBuffer;
 
-    public GABWViaOutputBufferDoFn(WindowingStrategy<?, W> windowingStrategy,
+    public GABWViaOutputBufferDoFn(
+        WindowingStrategy<Object, W> windowingStrategy,
         OutputBuffer<K, InputT, OutputT, W> outputBuffer) {
-      @SuppressWarnings("unchecked")
-      WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
-      this.strategy = noWildcard;
+      this.strategy = windowingStrategy;
       this.outputBuffer = outputBuffer;
     }
 

From 786490a7c706f6520367825c722cc0018b8efc0c Mon Sep 17 00:00:00 2001
From: ccy <ccy@google.com>
Date: Tue, 30 Jun 2015 17:11:25 -0700
Subject: [PATCH 0694/1541] Add a status server to the batch DataflowWorker

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97279708
---
 .../sdk/runners/worker/DataflowWorker.java    | 51 +++++++++++++++++++
 .../runners/worker/DataflowWorkerHarness.java |  7 +++
 2 files changed, 58 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index d628376314057..e58db20468677 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -46,10 +46,15 @@
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
+
+import org.eclipse.jetty.server.Request;
+import org.eclipse.jetty.server.Server;
+import org.eclipse.jetty.server.handler.AbstractHandler;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.io.PrintWriter;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@@ -57,6 +62,10 @@
 
 import javax.annotation.Nullable;
 
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
 /**
  * This is a semi-abstract harness for executing WorkItem tasks in
  * Java workers. Concrete implementations need to implement a
@@ -85,6 +94,11 @@ public class DataflowWorker {
    */
   private final Cache<PCollectionViewWindow<?>, Sized<Object>> sideInputCache;
 
+  /**
+   * Status server returning health of worker.
+   */
+  private Server statusServer;
+
   /**
    * A weight in "bytes" for the overhead of a {@link Sized} wrapper in the cache. It is just an
    * approximation so it is OK for it to be fairly arbitrary as long as it is nonzero.
@@ -93,6 +107,8 @@ public class DataflowWorker {
 
   private static final int MEGABYTES = 1024 * 1024;
 
+  public static final int DEFAULT_STATUS_PORT = 18081;
+
   public DataflowWorker(WorkUnitClient workUnitClient, DataflowWorkerHarnessOptions options) {
     this.workUnitClient = workUnitClient;
     this.options = options;
@@ -369,4 +385,39 @@ public SideInputReader getSideInputReaderForViews(
         + "for each side input, and a SideInputReader provided via getSideInputReader");
     }
   }
+
+  /**
+   * Runs the status server to report worker health.
+   */
+  public void runStatusServer(int statusPort) {
+    statusServer = new Server(statusPort);
+    statusServer.setHandler(new StatusHandler());
+    try {
+      // Run status server in separate thread.
+      statusServer.start();
+      LOG.info("Status server started on port {}", statusPort);
+    } catch (Exception e) {
+      LOG.warn("Status server failed to start: ", e);
+    }
+  }
+
+  private class StatusHandler extends AbstractHandler {
+    @Override
+    public void handle(String target, Request baseRequest, HttpServletRequest request,
+        HttpServletResponse response) throws IOException, ServletException {
+      response.setContentType("text/html;charset=utf-8");
+      baseRequest.setHandled(true);
+
+      PrintWriter responseWriter = response.getWriter();
+
+      if (target.equals("/healthz")) {
+        response.setStatus(HttpServletResponse.SC_OK);
+        // Right now, we always return "ok".
+        responseWriter.println("ok");
+      } else {
+        response.setStatus(HttpServletResponse.SC_NOT_FOUND);
+        responseWriter.println("not found");
+      }
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index 315515d63b04a..40ac95eedb95f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -115,6 +115,13 @@ public static void main(String[] args) throws Exception {
 
     final Sleeper sleeper = Sleeper.DEFAULT;
     final DataflowWorker worker = create(pipelineOptions);
+
+    int statusPort = DataflowWorker.DEFAULT_STATUS_PORT;
+    if (System.getProperties().containsKey("status_port")) {
+      statusPort = Integer.parseInt(System.getProperty("status_port"));
+    }
+    worker.runStatusServer(statusPort);
+
     processWork(pipelineOptions, worker, sleeper);
   }
 

From 14e93212ca0981fed4b019c25b36f4644447e666 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 30 Jun 2015 17:16:10 -0700
Subject: [PATCH 0695/1541] Update GroupAlsoByWindowsAndCombineDoFn to support
 accumulation mode

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97280137
---
 .../GroupAlsoByWindowsAndCombineDoFn.java     | 19 +++++++++++++++++++
 .../sdk/util/GroupAlsoByWindowsDoFn.java      | 13 +++++--------
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
index d1e5d921e5645..fd44a7eab8e6e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
@@ -20,8 +20,10 @@
 
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.collect.Maps;
 
@@ -50,6 +52,23 @@
 public class GroupAlsoByWindowsAndCombineDoFn<K, InputT, AccumT, OutputT, W extends BoundedWindow>
     extends GroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
 
+  public static boolean isSupported(WindowingStrategy<?, ?> strategy) {
+    // TODO: Add support for other triggers.
+    if (!(strategy.getTrigger().getSpec() instanceof DefaultTrigger)) {
+      return false;
+    }
+
+    // Right now, we support ACCUMULATING_FIRED_PANES because it is the same as
+    // DISCARDING_FIRED_PANES. In Batch mode there is no late data so the default
+    // trigger (after watermark) will only fire once.
+    if (!strategy.getMode().equals(AccumulationMode.DISCARDING_FIRED_PANES)
+        && !strategy.getMode().equals(AccumulationMode.ACCUMULATING_FIRED_PANES)) {
+      return false;
+    }
+
+    return true;
+  }
+
   private final KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn;
   private WindowFn<Object, W> windowFn;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index da729176362f2..c4b7e71970327 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -76,14 +76,11 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
 
     @SuppressWarnings("unchecked")
     WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
-
-    if (windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger
-        && windowingStrategy.getMode() == AccumulationMode.DISCARDING_FIRED_PANES) {
-      return new GroupAlsoByWindowsAndCombineDoFn<>(noWildcard.getWindowFn(), combineFn);
-    }
-    return new GABWViaOutputBufferDoFn<>(noWildcard,
-        CombiningOutputBuffer.<K, InputT, AccumT, OutputT, W>create(
-            combineFn, keyCoder, inputCoder));
+    return GroupAlsoByWindowsAndCombineDoFn.isSupported(windowingStrategy)
+        ? new GroupAlsoByWindowsAndCombineDoFn<>(noWildcard.getWindowFn(), combineFn)
+        : new GABWViaOutputBufferDoFn<>(noWildcard,
+            CombiningOutputBuffer.<K, InputT, AccumT, OutputT, W>create(
+                combineFn, keyCoder, inputCoder));
   }
 
   private static class GABWViaOutputBufferDoFn<K, InputT, OutputT, W extends BoundedWindow>

From a96a062c88dea6af6f13d48de34247ef8e94988d Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 30 Jun 2015 18:02:17 -0700
Subject: [PATCH 0696/1541] Update GABWViaIteratorsDoFn for
 ACCUMULATING_FIRED_PANES

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97283603
---
 .../sdk/util/GroupAlsoByWindowsDoFn.java      | 13 +++--------
 .../GroupAlsoByWindowsViaIteratorsDoFn.java   | 23 +++++++++++++++++++
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index c4b7e71970327..980a108dee117 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -20,8 +20,6 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
 
@@ -52,14 +50,9 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
     @SuppressWarnings("unchecked")
     WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
 
-    if (windowingStrategy.getWindowFn().isNonMerging()
-        && windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger
-        && windowingStrategy.getMode() == AccumulationMode.DISCARDING_FIRED_PANES) {
-      return new GroupAlsoByWindowsViaIteratorsDoFn<K, V, W>();
-    }
-
-    return new GABWViaOutputBufferDoFn<>(
-        noWildcard, new ListOutputBuffer<K, V, W>(inputCoder));
+    return GroupAlsoByWindowsViaIteratorsDoFn.isSupported(windowingStrategy)
+        ? new GroupAlsoByWindowsViaIteratorsDoFn<K, V, W>()
+        : new GABWViaOutputBufferDoFn<>(noWildcard, new ListOutputBuffer<K, V, W>(inputCoder));
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
index 880a4d1ffbfe7..f33723a7bd5f3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
@@ -17,7 +17,9 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.util.common.PeekingReiterator;
 import com.google.cloud.dataflow.sdk.util.common.Reiterable;
 import com.google.cloud.dataflow.sdk.util.common.Reiterator;
@@ -42,6 +44,27 @@
 class GroupAlsoByWindowsViaIteratorsDoFn<K, V, W extends BoundedWindow>
     extends GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W> {
 
+  public static boolean isSupported(WindowingStrategy<?, ?> strategy) {
+    if (!strategy.getWindowFn().isNonMerging()) {
+      return false;
+    }
+
+    // TODO: Add support for other triggers.
+    if (!(strategy.getTrigger().getSpec() instanceof DefaultTrigger)) {
+      return false;
+    }
+
+    // Right now, we support ACCUMULATING_FIRED_PANES because it is the same as
+    // DISCARDING_FIRED_PANES. In Batch mode there is no late data so the default
+    // trigger (after watermark) will only fire once.
+    if (!strategy.getMode().equals(AccumulationMode.DISCARDING_FIRED_PANES)
+        && !strategy.getMode().equals(AccumulationMode.ACCUMULATING_FIRED_PANES)) {
+      return false;
+    }
+
+    return true;
+  }
+
   @Override
   public void processElement(ProcessContext c) throws Exception {
     K key = c.element().getKey();

From 67f04053bcdb770161a21b946ca24fbf87b2516f Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 1 Jul 2015 08:12:28 -0700
Subject: [PATCH 0697/1541] Serialize JAXBCoder via JSON, not Java
 serialization

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97328333
---
 .../cloud/dataflow/sdk/coders/JAXBCoder.java  | 36 +++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java
index 99f578ec35cb7..667eda2819eb6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java
@@ -14,6 +14,12 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.Structs;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
@@ -29,7 +35,7 @@
  *
  * @param <T> type of JAXB annotated objects that will be serialized.
  */
-public class JAXBCoder<T> extends CustomCoder<T> {
+public class JAXBCoder<T> extends AtomicCoder<T> {
   private static final long serialVersionUID = 0L;
   private final Class<T> jaxbClass;
   private transient Marshaller jaxbMarshaller = null;
@@ -82,5 +88,31 @@ public T decode(InputStream inStream, Context context) throws CoderException, IO
       throw new CoderException(e);
     }
   }
-}
 
+  ////////////////////////////////////////////////////////////////////////////////////
+  // JSON Serialization details below
+
+  private static final String JAXB_CLASS = "jaxb_class";
+
+  /**
+   * Constructor for JSON deserialization only.
+   */
+  @JsonCreator
+  public static <T> JAXBCoder<T> of(
+      @JsonProperty(JAXB_CLASS) String jaxbClassName) {
+    try {
+      @SuppressWarnings("unchecked")
+      Class<T> jaxbClass = (Class<T>) Class.forName(jaxbClassName);
+      return of(jaxbClass);
+    } catch (ClassNotFoundException e) {
+      throw new IllegalArgumentException(e);
+    }
+  }
+
+  @Override
+  public CloudObject asCloudObject() {
+    CloudObject result = super.asCloudObject();
+    Structs.addString(result, JAXB_CLASS, jaxbClass.getName());
+    return result;
+  }
+}

From 131466f9d5ebad3c9b3589e6b5f3f7cd2f40165d Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 1 Jul 2015 08:13:05 -0700
Subject: [PATCH 0698/1541] Test @DefaultCoder for collection contents

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97328371
---
 .../dataflow/sdk/coders/DefaultCoderTest.java | 48 +++++++++++--------
 1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
index 28f872ed02c4a..b0105c1ad0b33 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
@@ -16,11 +16,14 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.instanceOf;
+import static org.junit.Assert.assertThat;
+
 import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
-import org.hamcrest.Matchers;
-import org.junit.Assert;
+import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
@@ -28,6 +31,7 @@
 import org.junit.runners.JUnit4;
 
 import java.io.Serializable;
+import java.util.List;
 
 /**
  * Tests of Coder defaults.
@@ -39,6 +43,13 @@ public class DefaultCoderTest {
   @Rule
   public ExpectedException thrown = ExpectedException.none();
 
+  public CoderRegistry registry = new CoderRegistry();
+
+  @Before
+  public void registerStandardCoders() {
+    registry.registerStandardCoders();
+  }
+
   @DefaultCoder(AvroCoder.class)
   private static class AvroRecord {
   }
@@ -72,28 +83,27 @@ protected CustomCoder() {
   }
 
   @Test
-  public void testDefaultCoders() throws Exception {
-    checkDefault(AvroRecord.class, AvroCoder.class);
-    checkDefault(SerializableBase.class, SerializableCoder.class);
-    checkDefault(SerializableRecord.class, SerializableCoder.class);
-    checkDefault(CustomRecord.class, CustomCoder.class);
+  public void testDefaultCoderClasses() throws Exception {
+    assertThat(registry.getDefaultCoder(AvroRecord.class), instanceOf(AvroCoder.class));
+    assertThat(registry.getDefaultCoder(SerializableBase.class),
+        instanceOf(SerializableCoder.class));
+    assertThat(registry.getDefaultCoder(SerializableRecord.class),
+        instanceOf(SerializableCoder.class));
+    assertThat(registry.getDefaultCoder(CustomRecord.class), instanceOf(CustomCoder.class));
+  }
+
+  @Test
+  public void testDefaultCoderInCollection() throws Exception {
+    assertThat(registry.getDefaultCoder(new TypeDescriptor<List<AvroRecord>>(){}),
+        instanceOf(ListCoder.class));
+    assertThat(registry.getDefaultCoder(new TypeDescriptor<List<SerializableRecord>>(){}),
+        equalTo((Coder<List<SerializableRecord>>)
+            ListCoder.of(SerializableCoder.of(SerializableRecord.class))));
   }
 
   @Test
   public void testUnknown() throws Exception {
     thrown.expect(CannotProvideCoderException.class);
-    CoderRegistry registry = new CoderRegistry();
     registry.getDefaultCoder(Unknown.class);
   }
-
-  /**
-   * Checks that the default Coder for {@code valueType} is an instance of
-   * {@code expectedCoder}.
-   */
-  private void checkDefault(Class<?> valueType,
-      Class<?> expectedCoder) throws Exception {
-    CoderRegistry registry = new CoderRegistry();
-    Coder<?> coder = registry.getDefaultCoder(TypeDescriptor.of(valueType));
-    Assert.assertThat(coder, Matchers.instanceOf(expectedCoder));
-  }
 }

From d306801008dc0354ca4815d81f43e0ba56e93856 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 1 Jul 2015 08:15:58 -0700
Subject: [PATCH 0699/1541] Make inner Coder class in ApproximateQuantiles
 static

The (private) inner class
ApproximateQuantiles.AppproximateQuantilesCombineFn.QuantileStateCoder
is now the nested static class ApproximateQuantiles.QuantileStateCoder.
Since it extends CustomCoder it is serialized via Java serialization;
this makes its serializable state explicit.

Other nested classes have also been made static nested classes to avoid
any further hidden dependencies from developing.

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97328621
---
 .../sdk/transforms/ApproximateQuantiles.java  | 717 +++++++++---------
 1 file changed, 377 insertions(+), 340 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
index bbc7124498181..5c19a3337197b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
@@ -23,6 +23,8 @@
 import com.google.cloud.dataflow.sdk.coders.CustomCoder;
 import com.google.cloud.dataflow.sdk.coders.ListCoder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn.Accumulator;
+import com.google.cloud.dataflow.sdk.util.Sized;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -45,6 +47,8 @@
 import java.util.List;
 import java.util.PriorityQueue;
 
+import javax.annotation.Nullable;
+
 /**
  * {@code PTransform}s for getting an idea of a {@code PCollection}'s
  * data distribution using approximate {@code N}-tiles, either
@@ -206,11 +210,11 @@ PTransform<PCollection<T>, PCollection<List<T>>> globally(int numQuantiles) {
    *
    * @param <T> the type of the values being combined
    */
-  @SuppressWarnings("serial")
   public static class ApproximateQuantilesCombineFn
       <T, ComparatorT extends Comparator<T> & Serializable>
-      extends AccumulatingCombineFn
-      <T, ApproximateQuantilesCombineFn<T, ComparatorT>.QuantileState, List<T>> {
+      extends AccumulatingCombineFn<T, QuantileState<T, ComparatorT>, List<T>> {
+
+    private static final long serialVersionUID = 0L;
 
     /**
      * The cost (in time and space) to compute quantiles to a given
@@ -238,14 +242,23 @@ public static class ApproximateQuantilesCombineFn
     /**  The number of buffers, corresponding to b in the referenced paper. */
     private final int numBuffers;
 
-    private final double epsilon;
     private final long maxNumElements;
 
-    /**
-     * Used to alternate between biasing up and down in the even weight collapse
-     * operation.
-     */
-    private int offsetJitter = 0;
+    private ApproximateQuantilesCombineFn(
+        int numQuantiles,
+        ComparatorT compareFn,
+        int bufferSize,
+        int numBuffers,
+        long maxNumElements) {
+      Preconditions.checkArgument(numQuantiles >= 2);
+      Preconditions.checkArgument(bufferSize >= 2);
+      Preconditions.checkArgument(numBuffers >= 2);
+      this.numQuantiles = numQuantiles;
+      this.compareFn = compareFn;
+      this.bufferSize = bufferSize;
+      this.numBuffers = numBuffers;
+      this.maxNumElements = maxNumElements;
+    }
 
     /**
      * Returns an approximate quantiles combiner with the given
@@ -260,10 +273,10 @@ public static class ApproximateQuantilesCombineFn
      * {@link #DEFAULT_MAX_NUM_ELEMENTS}.
      */
     public static <T, ComparatorT extends Comparator<T> & Serializable>
-    ApproximateQuantilesCombineFn<T, ComparatorT> create(
-        int numQuantiles, ComparatorT compareFn) {
-      return create(numQuantiles, compareFn,
-                    DEFAULT_MAX_NUM_ELEMENTS, 1.0 / numQuantiles);
+        ApproximateQuantilesCombineFn<T, ComparatorT> create(
+            int numQuantiles, ComparatorT compareFn) {
+      return create(
+          numQuantiles, compareFn, DEFAULT_MAX_NUM_ELEMENTS, 1.0 / numQuantiles);
     }
 
     /**
@@ -271,7 +284,7 @@ ApproximateQuantilesCombineFn<T, ComparatorT> create(
      * values using their natural ordering.
      */
     public static <T extends Comparable<T>>
-    ApproximateQuantilesCombineFn<T, Top.Largest<T>> create(int numQuantiles) {
+        ApproximateQuantilesCombineFn<T, Top.Largest<T>> create(int numQuantiles) {
       return create(numQuantiles, new Top.Largest<T>());
     }
 
@@ -318,11 +331,11 @@ public ApproximateQuantilesCombineFn<T, ComparatorT> withMaxInputSize(
      * tends to be much better.
      */
     public static <T, ComparatorT extends Comparator<T> & Serializable>
-    ApproximateQuantilesCombineFn<T, ComparatorT> create(
-        int numQuantiles,
-        ComparatorT compareFn,
-        long maxNumElements,
-        double epsilon) {
+        ApproximateQuantilesCombineFn<T, ComparatorT> create(
+            int numQuantiles,
+            ComparatorT compareFn,
+            long maxNumElements,
+            double epsilon) {
       // Compute optimal b and k.
       int b = 2;
       while ((b - 2) * (1 << (b - 2)) < epsilon * maxNumElements) {
@@ -330,346 +343,192 @@ ApproximateQuantilesCombineFn<T, ComparatorT> create(
       }
       b--;
       int k = Math.max(2, (int) Math.ceil(maxNumElements / (1 << (b - 1))));
-      return new ApproximateQuantilesCombineFn<>(
-          numQuantiles, compareFn, k, b, epsilon, maxNumElements);
-    }
-
-    private ApproximateQuantilesCombineFn(int numQuantiles,
-                                          ComparatorT compareFn,
-                                          int bufferSize,
-                                          int numBuffers,
-                                          double epsilon,
-                                          long maxNumElements) {
-      Preconditions.checkArgument(numQuantiles >= 2);
-      Preconditions.checkArgument(bufferSize >= 2);
-      Preconditions.checkArgument(numBuffers >= 2);
-      Preconditions.checkArgument(compareFn instanceof Serializable);
-      this.numQuantiles = numQuantiles;
-      this.compareFn = compareFn;
-      this.bufferSize = bufferSize;
-      this.numBuffers = numBuffers;
-      this.epsilon = epsilon;
-      this.maxNumElements = maxNumElements;
+      return new ApproximateQuantilesCombineFn<T, ComparatorT>(
+          numQuantiles, compareFn, k, b, maxNumElements);
     }
 
     @Override
-    public QuantileState createAccumulator() {
-      return new QuantileState();
+    public QuantileState<T, ComparatorT> createAccumulator() {
+      return QuantileState.empty(compareFn, numQuantiles, numBuffers, bufferSize);
     }
 
     @Override
-    public Coder<QuantileState> getAccumulatorCoder(
+    public Coder<QuantileState<T, ComparatorT>> getAccumulatorCoder(
         CoderRegistry registry, Coder<T> elementCoder) {
-      return new QuantileStateCoder(elementCoder);
+      return new QuantileStateCoder<>(compareFn, elementCoder);
     }
+  }
 
-    /**
-     * Compact summarization of a collection on which quantiles can be
-     * estimated.
-     */
-    class QuantileState
-        implements AccumulatingCombineFn.Accumulator
-        <T, ApproximateQuantilesCombineFn<T, ComparatorT>.QuantileState, List<T>> {
-
-      private T min;
-      private T max;
-
-      /**
-       * The set of buffers, ordered by level from smallest to largest.
-       */
-      private PriorityQueue<QuantileBuffer> buffers =
-          new PriorityQueue<>(numBuffers + 1);
-
-      /**
-       * The algorithm requires that the manipulated buffers always be filled
-       * to capacity to perform the collapse operation.  This operation can
-       * be extended to buffers of varying sizes by introducing the notion of
-       * fractional weights, but it's easier to simply combine the remainders
-       * from all shards into new, full buffers and then take them into account
-       * when computing the final output.
-       */
-      private List<T> unbufferedElements = Lists.newArrayList();
-
-      public QuantileState() { }
-
-      public QuantileState(T elem) {
-        min = elem;
-        max = elem;
-        unbufferedElements.add(elem);
-      }
+  /**
+   * Compact summarization of a collection on which quantiles can be estimated.
+   */
+  static class QuantileState<T, ComparatorT extends Comparator<T> & Serializable>
+      implements Accumulator<T, QuantileState<T, ComparatorT>, List<T>> {
 
-      public QuantileState(T min, T max, Collection<T> unbufferedElements,
-                           Collection<QuantileBuffer> buffers) {
-        this.min = min;
-        this.max = max;
-        this.unbufferedElements.addAll(unbufferedElements);
-        this.buffers.addAll(buffers);
-      }
+    private ComparatorT compareFn;
+    private int numQuantiles;
+    private int numBuffers;
+    private int bufferSize;
 
-      /**
-       * Add a new element to the collection being summarized by this state.
-       */
-      @Override
-      public void addInput(T elem) {
-        if (isEmpty()) {
-          min = max = elem;
-        } else if (compareFn.compare(elem, min) < 0) {
-          min = elem;
-        } else if (compareFn.compare(elem, max) > 0) {
-          max = elem;
-        }
-        addUnbuffered(elem);
-      }
+    @Nullable
+    private T min;
 
-      /**
-       * Add a new buffer to the unbuffered list, creating a new buffer and
-       * collapsing if needed.
-       */
-      private void addUnbuffered(T elem) {
-        unbufferedElements.add(elem);
-        if (unbufferedElements.size() == bufferSize) {
-          Collections.sort(unbufferedElements, compareFn);
-          buffers.add(new QuantileBuffer(unbufferedElements));
-          unbufferedElements = Lists.newArrayListWithCapacity(bufferSize);
-          collapseIfNeeded();
-        }
-      }
+    @Nullable
+    private T max;
 
-      /**
-       * Updates this as if adding all elements seen by other.
-       */
-      @Override
-      public void mergeAccumulator(QuantileState other) {
-        if (other.isEmpty()) {
-          return;
-        }
-        if (min == null || compareFn.compare(other.min, min) < 0) {
-          min = other.min;
-        }
-        if (max == null || compareFn.compare(other.max, max) > 0) {
-          max = other.max;
-        }
-        for (T elem : other.unbufferedElements) {
-          addUnbuffered(elem);
-        }
-        buffers.addAll(other.buffers);
-        collapseIfNeeded();
-      }
+    /**
+     * The set of buffers, ordered by level from smallest to largest.
+     */
+    private PriorityQueue<QuantileBuffer<T>> buffers;
 
-      public boolean isEmpty() {
-        return unbufferedElements.size() == 0 && buffers.size() == 0;
-      }
+    /**
+     * The algorithm requires that the manipulated buffers always be filled
+     * to capacity to perform the collapse operation.  This operation can
+     * be extended to buffers of varying sizes by introducing the notion of
+     * fractional weights, but it's easier to simply combine the remainders
+     * from all shards into new, full buffers and then take them into account
+     * when computing the final output.
+     */
+    private List<T> unbufferedElements = Lists.newArrayList();
 
-      private void collapseIfNeeded() {
-        while (buffers.size() > numBuffers) {
-          List<QuantileBuffer> toCollapse = Lists.newArrayList();
-          toCollapse.add(buffers.poll());
-          toCollapse.add(buffers.poll());
-          int minLevel = toCollapse.get(1).level;
-          while (!buffers.isEmpty() && buffers.peek().level == minLevel) {
-            toCollapse.add(buffers.poll());
-          }
-          buffers.add(collapse(toCollapse));
-        }
-      }
+    private QuantileState(
+        ComparatorT compareFn,
+        int numQuantiles,
+        @Nullable T min,
+        @Nullable T max,
+        int numBuffers,
+        int bufferSize,
+        Collection<T> unbufferedElements,
+        Collection<QuantileBuffer<T>> buffers) {
+      this.compareFn = compareFn;
+      this.numQuantiles = numQuantiles;
+      this.numBuffers = numBuffers;
+      this.bufferSize = bufferSize;
+      this.buffers = new PriorityQueue<>(numBuffers + 1);
+      this.min = min;
+      this.max = max;
+      this.unbufferedElements.addAll(unbufferedElements);
+      this.buffers.addAll(buffers);
+    }
 
-      private QuantileBuffer collapse(Iterable<QuantileBuffer> buffers) {
-        int newLevel = 0;
-        long newWeight = 0;
-        for (QuantileBuffer buffer : buffers) {
-          // As presented in the paper, there should always be at least two
-          // buffers of the same (minimal) level to collapse, but it is possible
-          // to violate this condition when combining buffers from independently
-          // computed shards.  If they differ we take the max.
-          newLevel = Math.max(newLevel, buffer.level + 1);
-          newWeight += buffer.weight;
-        }
-        List<T> newElements =
-            interpolate(buffers, bufferSize, newWeight, offset(newWeight));
-        return new QuantileBuffer(newLevel, newWeight, newElements);
-      }
+    public static <T, ComparatorT extends Comparator<T> & Serializable>
+        QuantileState<T, ComparatorT> empty(
+            ComparatorT compareFn, int numQuantiles, int numBuffers, int bufferSize) {
+      return new QuantileState<T, ComparatorT>(
+          compareFn,
+          numQuantiles,
+          null, /* min */
+          null, /* max */
+          numBuffers,
+          bufferSize,
+          Collections.<T>emptyList(),
+          Collections.<QuantileBuffer<T>>emptyList());
+    }
 
-      /**
-       * Outputs numQuantiles elements consisting of the minimum, maximum, and
-       * numQuantiles - 2 evenly spaced intermediate elements.
-       * <p>
-       * Returns the empty list if no elements have been added.
-       */
-      @Override
-      public List<T> extractOutput() {
-        if (isEmpty()) {
-          return Lists.newArrayList();
-        }
-        long totalCount = unbufferedElements.size();
-        for (QuantileBuffer buffer : buffers) {
-          totalCount += bufferSize * buffer.weight;
-        }
-        List<QuantileBuffer> all = Lists.newArrayList(buffers);
-        if (!unbufferedElements.isEmpty()) {
-          Collections.sort(unbufferedElements, compareFn);
-          all.add(new QuantileBuffer(unbufferedElements));
-        }
-        double step = 1.0 * totalCount / (numQuantiles - 1);
-        double offset = (1.0 * totalCount - 1) / (numQuantiles - 1);
-        List<T> quantiles = interpolate(all, numQuantiles - 2, step, offset);
-        quantiles.add(0, min);
-        quantiles.add(max);
-        return quantiles;
-      }
+    public static <T, ComparatorT extends Comparator<T> & Serializable>
+        QuantileState<T, ComparatorT> singleton(
+            ComparatorT compareFn, int numQuantiles, T elem, int numBuffers, int bufferSize) {
+      return new QuantileState<T, ComparatorT>(
+          compareFn,
+          numQuantiles,
+          elem, /* min */
+          elem, /* max */
+          numBuffers,
+          bufferSize,
+          Collections.singletonList(elem),
+          Collections.<QuantileBuffer<T>>emptyList());
     }
 
     /**
-     * A single buffer in the sense of the referenced algorithm.
+     * Add a new element to the collection being summarized by this state.
      */
-    private class QuantileBuffer implements Comparable<QuantileBuffer> {
-      private int level;
-      private long weight;
-      private List<T> elements;
-
-      public QuantileBuffer(List<T> elements) {
-        this(0, 1, elements);
-      }
-
-      public QuantileBuffer(int level, long weight, List<T> elements) {
-        this.level = level;
-        this.weight = weight;
-        this.elements = elements;
-      }
-
-      @Override
-      public int compareTo(QuantileBuffer other) {
-        return this.level - other.level;
-      }
-
-      @Override
-      public String toString() {
-        return "QuantileBuffer["
-            + "level=" + level
-            + ", weight="
-            + weight + ", elements=" + elements + "]";
-      }
-
-      public Iterator<WeightedElement<T>> weightedIterator() {
-        return new UnmodifiableIterator<WeightedElement<T>>() {
-          Iterator<T> iter = elements.iterator();
-          @Override
-          public boolean hasNext() {
-            return iter.hasNext();
-          }
-          @Override public WeightedElement<T> next() {
-            return WeightedElement.of(weight, iter.next());
-          }
-        };
+    @Override
+    public void addInput(T elem) {
+      if (isEmpty()) {
+        min = max = elem;
+      } else if (compareFn.compare(elem, min) < 0) {
+        min = elem;
+      } else if (compareFn.compare(elem, max) > 0) {
+        max = elem;
       }
+      addUnbuffered(elem);
     }
 
     /**
-     * Coder for QuantileState.
+     * Add a new buffer to the unbuffered list, creating a new buffer and
+     * collapsing if needed.
      */
-    private class QuantileStateCoder extends CustomCoder<QuantileState> {
-
-      private final Coder<T> elementCoder;
-      private final Coder<List<T>> elementListCoder;
-
-      public QuantileStateCoder(Coder<T> elementCoder) {
-        this.elementCoder = elementCoder;
-        this.elementListCoder = ListCoder.of(elementCoder);
+    private void addUnbuffered(T elem) {
+      unbufferedElements.add(elem);
+      if (unbufferedElements.size() == bufferSize) {
+        Collections.sort(unbufferedElements, compareFn);
+        buffers.add(new QuantileBuffer<T>(unbufferedElements));
+        unbufferedElements = Lists.newArrayListWithCapacity(bufferSize);
+        collapseIfNeeded();
       }
+    }
 
-      @Override
-      public void encode(
-          QuantileState state, OutputStream outStream, Coder.Context context)
-          throws CoderException, IOException {
-        Coder.Context nestedContext = context.nested();
-        elementCoder.encode(state.min, outStream, nestedContext);
-        elementCoder.encode(state.max, outStream, nestedContext);
-        elementListCoder.encode(
-            state.unbufferedElements, outStream, nestedContext);
-        BigEndianIntegerCoder.of().encode(
-            state.buffers.size(), outStream, nestedContext);
-        for (QuantileBuffer buffer : state.buffers) {
-          encodeBuffer(buffer, outStream, nestedContext);
-        }
+    /**
+     * Updates this as if adding all elements seen by other.
+     *
+     * <p>Note that this ignores the {@code Comparator} of the other {@link QuantileState}. In
+     * practice, they should generally be equal, but this method tolerates a mismatch.
+     */
+    @Override
+    public void mergeAccumulator(QuantileState<T, ComparatorT> other) {
+      if (other.isEmpty()) {
+        return;
       }
-
-      @Override
-      public QuantileState decode(InputStream inStream, Coder.Context context)
-          throws CoderException, IOException {
-        Coder.Context nestedContext = context.nested();
-        T min = elementCoder.decode(inStream, nestedContext);
-        T max = elementCoder.decode(inStream, nestedContext);
-        List<T> unbufferedElements =
-            elementListCoder.decode(inStream, nestedContext);
-        int numBuffers =
-            BigEndianIntegerCoder.of().decode(inStream, nestedContext);
-        List<QuantileBuffer> buffers = new ArrayList<>(numBuffers);
-        for (int i = 0; i < numBuffers; i++) {
-          buffers.add(decodeBuffer(inStream, nestedContext));
-        }
-        return new QuantileState(min, max, unbufferedElements, buffers);
+      if (min == null || compareFn.compare(other.min, min) < 0) {
+        min = other.min;
       }
-
-      private void encodeBuffer(
-          QuantileBuffer buffer, OutputStream outStream, Coder.Context context)
-          throws CoderException, IOException {
-        DataOutputStream outData = new DataOutputStream(outStream);
-        outData.writeInt(buffer.level);
-        outData.writeLong(buffer.weight);
-        elementListCoder.encode(buffer.elements, outStream, context);
+      if (max == null || compareFn.compare(other.max, max) > 0) {
+        max = other.max;
       }
-
-      private QuantileBuffer decodeBuffer(
-          InputStream inStream, Coder.Context context)
-          throws IOException, CoderException {
-        DataInputStream inData = new DataInputStream(inStream);
-        return new QuantileBuffer(
-            inData.readInt(),
-            inData.readLong(),
-            elementListCoder.decode(inStream, context));
+      for (T elem : other.unbufferedElements) {
+        addUnbuffered(elem);
       }
+      buffers.addAll(other.buffers);
+      collapseIfNeeded();
+    }
 
-      /**
-       * Notifies ElementByteSizeObserver about the byte size of the
-       * encoded value using this coder.
-       */
-      @Override
-      public void registerByteSizeObserver(
-          QuantileState state,
-          ElementByteSizeObserver observer,
-          Coder.Context context)
-          throws Exception {
-        Coder.Context nestedContext = context.nested();
-        elementCoder.registerByteSizeObserver(
-            state.min, observer, nestedContext);
-        elementCoder.registerByteSizeObserver(
-            state.max, observer, nestedContext);
-        elementListCoder.registerByteSizeObserver(
-            state.unbufferedElements, observer, nestedContext);
-
-        BigEndianIntegerCoder.of().registerByteSizeObserver(
-            state.buffers.size(), observer, nestedContext);
-        for (QuantileBuffer buffer : state.buffers) {
-          observer.update(4L + 8);
+    public boolean isEmpty() {
+      return unbufferedElements.size() == 0 && buffers.size() == 0;
+    }
 
-          elementListCoder.registerByteSizeObserver(
-              buffer.elements, observer, nestedContext);
+    private void collapseIfNeeded() {
+      while (buffers.size() > numBuffers) {
+        List<QuantileBuffer<T>> toCollapse = Lists.newArrayList();
+        toCollapse.add(buffers.poll());
+        toCollapse.add(buffers.poll());
+        int minLevel = toCollapse.get(1).level;
+        while (!buffers.isEmpty() && buffers.peek().level == minLevel) {
+          toCollapse.add(buffers.poll());
         }
+        buffers.add(collapse(toCollapse));
       }
+    }
 
-      @Override
-      public void verifyDeterministic() throws NonDeterministicException {
-        verifyDeterministic(
-            "QuantileState.ElementCoder must be deterministic",
-            elementCoder);
-        verifyDeterministic(
-            "QuantileState.ElementListCoder must be deterministic",
-            elementListCoder);
+    private QuantileBuffer<T> collapse(
+        Iterable<QuantileBuffer<T>> buffers) {
+      int newLevel = 0;
+      long newWeight = 0;
+      for (QuantileBuffer<T> buffer : buffers) {
+        // As presented in the paper, there should always be at least two
+        // buffers of the same (minimal) level to collapse, but it is possible
+        // to violate this condition when combining buffers from independently
+        // computed shards.  If they differ we take the max.
+        newLevel = Math.max(newLevel, buffer.level + 1);
+        newWeight += buffer.weight;
       }
+      List<T> newElements =
+          interpolate(buffers, bufferSize, newWeight, offset(newWeight));
+      return new QuantileBuffer<>(newLevel, newWeight, newElements);
     }
 
     /**
-     * If the weight is even, we must round up our down.  Alternate between
-     * these two options to avoid a bias.
+     * If the weight is even, we must round up or down.  Alternate between these two options to
+     * avoid a bias.
      */
     private long offset(long newWeight) {
       if (newWeight % 2 == 1) {
@@ -680,52 +539,230 @@ private long offset(long newWeight) {
       }
     }
 
+    /** For alternating between biasing up and down in the above even weight collapse operation. */
+    private int offsetJitter = 0;
+
+
     /**
      * Emulates taking the ordered union of all elements in buffers, repeated
      * according to their weight, and picking out the (k * step + offset)-th
      * elements of this list for {@code 0 <= k < count}.
      */
-    private List<T> interpolate(Iterable<QuantileBuffer> buffers,
+    private List<T> interpolate(Iterable<QuantileBuffer<T>> buffers,
                                 int count, double step, double offset) {
-      List<Iterator<WeightedElement<T>>> iterators = Lists.newArrayList();
-      for (QuantileBuffer buffer : buffers) {
-        iterators.add(buffer.weightedIterator());
+      List<Iterator<Sized<T>>> iterators = Lists.newArrayList();
+      for (QuantileBuffer<T> buffer : buffers) {
+        iterators.add(buffer.sizedIterator());
       }
       // Each of the buffers is already sorted by element.
-      Iterator<WeightedElement<T>> sorted = Iterators.mergeSorted(
+      Iterator<Sized<T>> sorted = Iterators.mergeSorted(
           iterators,
-          new Comparator<WeightedElement<T>>() {
+          new Comparator<Sized<T>>() {
             @Override
-            public int compare(WeightedElement<T> a, WeightedElement<T> b) {
-              return compareFn.compare(a.value, b.value);
+            public int compare(Sized<T> a, Sized<T> b) {
+              return compareFn.compare(a.getValue(), b.getValue());
             }
           });
 
       List<T> newElements = Lists.newArrayListWithCapacity(count);
-      WeightedElement<T> weightedElement = sorted.next();
-      double current = weightedElement.weight;
+      Sized<T> sizedElement = sorted.next();
+      double current = sizedElement.getSize();
       for (int j = 0; j < count; j++) {
         double target = j * step + offset;
         while (current <= target && sorted.hasNext()) {
-          weightedElement = sorted.next();
-          current += weightedElement.weight;
+          sizedElement = sorted.next();
+          current += sizedElement.getSize();
         }
-        newElements.add(weightedElement.value);
+        newElements.add(sizedElement.getValue());
       }
       return newElements;
     }
 
-    /** An element and its weight. */
-    private static class WeightedElement<T> {
-      public long weight;
-      public T value;
-      private WeightedElement(long weight, T value) {
-        this.weight = weight;
-        this.value = value;
+    /**
+     * Outputs numQuantiles elements consisting of the minimum, maximum, and
+     * numQuantiles - 2 evenly spaced intermediate elements.
+     * <p>
+     * Returns the empty list if no elements have been added.
+     */
+    @Override
+    public List<T> extractOutput() {
+      if (isEmpty()) {
+        return Lists.newArrayList();
+      }
+      long totalCount = unbufferedElements.size();
+      for (QuantileBuffer<T> buffer : buffers) {
+        totalCount += bufferSize * buffer.weight;
       }
-      public static <T> WeightedElement<T> of(long weight, T value) {
-        return new WeightedElement<>(weight, value);
+      List<QuantileBuffer<T>> all = Lists.newArrayList(buffers);
+      if (!unbufferedElements.isEmpty()) {
+        Collections.sort(unbufferedElements, compareFn);
+        all.add(new QuantileBuffer<>(unbufferedElements));
       }
+      double step = 1.0 * totalCount / (numQuantiles - 1);
+      double offset = (1.0 * totalCount - 1) / (numQuantiles - 1);
+      List<T> quantiles = interpolate(all, numQuantiles - 2, step, offset);
+      quantiles.add(0, min);
+      quantiles.add(max);
+      return quantiles;
+    }
+  }
+
+  /**
+   * A single buffer in the sense of the referenced algorithm.
+   */
+  private static class QuantileBuffer<T> implements Comparable<QuantileBuffer<T>> {
+    private int level;
+    private long weight;
+    private List<T> elements;
+
+    public QuantileBuffer(List<T> elements) {
+      this(0, 1, elements);
+    }
+
+    public QuantileBuffer(int level, long weight, List<T> elements) {
+      this.level = level;
+      this.weight = weight;
+      this.elements = elements;
+    }
+
+    @Override
+    public int compareTo(QuantileBuffer<T> other) {
+      return this.level - other.level;
+    }
+
+    @Override
+    public String toString() {
+      return "QuantileBuffer["
+          + "level=" + level
+          + ", weight="
+          + weight + ", elements=" + elements + "]";
+    }
+
+    public Iterator<Sized<T>> sizedIterator() {
+      return new UnmodifiableIterator<Sized<T>>() {
+        Iterator<T> iter = elements.iterator();
+        @Override
+        public boolean hasNext() {
+          return iter.hasNext();
+        }
+        @Override public Sized<T> next() {
+          return Sized.of(iter.next(), weight);
+        }
+      };
+    }
+  }
+
+  /**
+   * Coder for QuantileState.
+   */
+  private static class QuantileStateCoder<T, ComparatorT extends Comparator<T> & Serializable>
+      extends CustomCoder<QuantileState<T, ComparatorT>> {
+    private static final long serialVersionUID = 0L;
+
+    private final ComparatorT compareFn;
+    private final Coder<T> elementCoder;
+    private final Coder<List<T>> elementListCoder;
+    private final Coder<Integer> intCoder = BigEndianIntegerCoder.of();
+
+    public QuantileStateCoder(ComparatorT compareFn, Coder<T> elementCoder) {
+      this.compareFn = compareFn;
+      this.elementCoder = elementCoder;
+      this.elementListCoder = ListCoder.of(elementCoder);
+    }
+
+    @Override
+    public void encode(
+        QuantileState<T, ComparatorT> state, OutputStream outStream, Coder.Context context)
+        throws CoderException, IOException {
+      Coder.Context nestedContext = context.nested();
+      intCoder.encode(state.numQuantiles, outStream, nestedContext);
+      intCoder.encode(state.bufferSize, outStream, nestedContext);
+      elementCoder.encode(state.min, outStream, nestedContext);
+      elementCoder.encode(state.max, outStream, nestedContext);
+      elementListCoder.encode(
+          state.unbufferedElements, outStream, nestedContext);
+      BigEndianIntegerCoder.of().encode(
+          state.buffers.size(), outStream, nestedContext);
+      for (QuantileBuffer<T> buffer : state.buffers) {
+        encodeBuffer(buffer, outStream, nestedContext);
+      }
+    }
+
+    @Override
+    public QuantileState<T, ComparatorT> decode(InputStream inStream, Coder.Context context)
+        throws CoderException, IOException {
+      Coder.Context nestedContext = context.nested();
+      int numQuantiles = intCoder.decode(inStream, nestedContext);
+      int bufferSize = intCoder.decode(inStream, nestedContext);
+      T min = elementCoder.decode(inStream, nestedContext);
+      T max = elementCoder.decode(inStream, nestedContext);
+      List<T> unbufferedElements =
+          elementListCoder.decode(inStream, nestedContext);
+      int numBuffers =
+          BigEndianIntegerCoder.of().decode(inStream, nestedContext);
+      List<QuantileBuffer<T>> buffers = new ArrayList<>(numBuffers);
+      for (int i = 0; i < numBuffers; i++) {
+        buffers.add(decodeBuffer(inStream, nestedContext));
+      }
+      return new QuantileState<T, ComparatorT>(
+          compareFn, numQuantiles, min, max, numBuffers, bufferSize, unbufferedElements, buffers);
+    }
+
+    private void encodeBuffer(
+        QuantileBuffer<T> buffer, OutputStream outStream, Coder.Context context)
+        throws CoderException, IOException {
+      DataOutputStream outData = new DataOutputStream(outStream);
+      outData.writeInt(buffer.level);
+      outData.writeLong(buffer.weight);
+      elementListCoder.encode(buffer.elements, outStream, context);
+    }
+
+    private QuantileBuffer<T> decodeBuffer(
+        InputStream inStream, Coder.Context context)
+        throws IOException, CoderException {
+      DataInputStream inData = new DataInputStream(inStream);
+      return new QuantileBuffer<>(
+          inData.readInt(),
+          inData.readLong(),
+          elementListCoder.decode(inStream, context));
+    }
+
+    /**
+     * Notifies ElementByteSizeObserver about the byte size of the
+     * encoded value using this coder.
+     */
+    @Override
+    public void registerByteSizeObserver(
+        QuantileState<T, ComparatorT> state,
+        ElementByteSizeObserver observer,
+        Coder.Context context)
+        throws Exception {
+      Coder.Context nestedContext = context.nested();
+      elementCoder.registerByteSizeObserver(
+          state.min, observer, nestedContext);
+      elementCoder.registerByteSizeObserver(
+          state.max, observer, nestedContext);
+      elementListCoder.registerByteSizeObserver(
+          state.unbufferedElements, observer, nestedContext);
+
+      BigEndianIntegerCoder.of().registerByteSizeObserver(
+          state.buffers.size(), observer, nestedContext);
+      for (QuantileBuffer<T> buffer : state.buffers) {
+        observer.update(4L + 8);
+
+        elementListCoder.registerByteSizeObserver(
+            buffer.elements, observer, nestedContext);
+      }
+    }
+
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException {
+      verifyDeterministic(
+          "QuantileState.ElementCoder must be deterministic",
+          elementCoder);
+      verifyDeterministic(
+          "QuantileState.ElementListCoder must be deterministic",
+          elementListCoder);
     }
   }
 }

From d2a231447227cdcde691333759e7009930cf2fa2 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 1 Jul 2015 08:26:26 -0700
Subject: [PATCH 0700/1541] Add counters for data that is dropped in
 GroupAlsoByWindows

This adds two counters:
  - DroppedInFinishedWindow which counts the number of elements that are
    dropped because the window they are assigned to is finished.
  - DroppedDueToLateness which counts the number of elements that are
    dropped because they are too late for the window they are assigned.

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97329387
---
 .../dataflow/sdk/transforms/Combine.java      |  1 -
 .../transforms/windowing/GlobalWindow.java    |  1 -
 .../dataflow/sdk/util/AssignWindowsDoFn.java  |  1 +
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |  3 +-
 .../GroupAlsoByWindowsAndCombineDoFn.java     |  3 +-
 .../sdk/util/GroupAlsoByWindowsDoFn.java      | 12 ++++-
 .../GroupAlsoByWindowsViaIteratorsDoFn.java   |  5 ++
 .../util/ReifyTimestampAndWindowsDoFn.java    |  1 +
 .../util/StreamingGroupAlsoByWindowsDoFn.java | 11 ++++-
 .../dataflow/sdk/util/SystemDoFnInternal.java | 37 +++++++++++++++
 .../dataflow/sdk/util/TriggerExecutor.java    | 29 ++++++++----
 .../dataflow/sdk/util/TriggerTester.java      | 47 ++++++++++++++++++-
 .../dataflow/sdk/util/CoderUtilsTest.java     |  1 -
 .../sdk/util/TriggerExecutorTest.java         | 42 +++++++++++++++++
 14 files changed, 178 insertions(+), 16 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemDoFnInternal.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 7c5794045b5f8..7ce7fdd88688d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -26,7 +26,6 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
index c988b3f2be8a0..db95d8e688080 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
@@ -17,7 +17,6 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
index 9560cf82158b8..aa14696abc206 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
@@ -31,6 +31,7 @@
  * @param <W> Window type
  */
 @SuppressWarnings("serial")
+@SystemDoFnInternal
 public class AssignWindowsDoFn<T, W extends BoundedWindow> extends DoFn<T, T> {
   private WindowFn<? super T, W> fn;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 423b8de6a591b..bc2ffc65d21ad 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -377,7 +377,8 @@ public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant times
     }
 
     private String generateInternalAggregatorName(String userName) {
-      return "user-" + stepContext.getStepName() + "-" + userName;
+      boolean system = fn.getClass().isAnnotationPresent(SystemDoFnInternal.class);
+      return (system ? "" : "user-") + stepContext.getStepName() + "-" + userName;
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
index fd44a7eab8e6e..bfb6a8118802b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
@@ -49,7 +49,8 @@
  * @param <W> window type
  */
 @SuppressWarnings("serial")
-public class GroupAlsoByWindowsAndCombineDoFn<K, InputT, AccumT, OutputT, W extends BoundedWindow>
+@SystemDoFnInternal
+class GroupAlsoByWindowsAndCombineDoFn<K, InputT, AccumT, OutputT, W extends BoundedWindow>
     extends GroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
 
   public static boolean isSupported(WindowingStrategy<?, ?> strategy) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 980a108dee117..117e196a3f5d1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -17,8 +17,10 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
@@ -34,6 +36,7 @@
  * @param <OutputT> output value element type
  * @param <W> window type
  */
+@SystemDoFnInternal
 @SuppressWarnings("serial")
 public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends BoundedWindow>
     extends DoFn<KV<K, Iterable<WindowedValue<InputT>>>, KV<K, OutputT>> {
@@ -76,9 +79,15 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
                 combineFn, keyCoder, inputCoder));
   }
 
+  @SystemDoFnInternal
   private static class GABWViaOutputBufferDoFn<K, InputT, OutputT, W extends BoundedWindow>
      extends GroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
 
+    private final Aggregator<Long, Long> droppedDueToClosedWindow =
+        createAggregator(TriggerExecutor.DROPPED_DUE_TO_CLOSED_WINDOW, new Sum.SumLongFn());
+    private final Aggregator<Long, Long> droppedDueToLateness =
+        createAggregator(TriggerExecutor.DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
+
     private final WindowingStrategy<Object, W> strategy;
     private final OutputBuffer<K, InputT, OutputT, W> outputBuffer;
 
@@ -97,7 +106,8 @@ public void processElement(
       K key = c.element().getKey();
       BatchTimerManager timerManager = new BatchTimerManager(Instant.now());
       TriggerExecutor<K, InputT, OutputT, W> triggerExecutor = TriggerExecutor.create(
-          key, strategy, timerManager, outputBuffer, c.windowingInternals());
+          key, strategy, timerManager, outputBuffer, c.windowingInternals(),
+          droppedDueToClosedWindow, droppedDueToLateness);
 
       for (WindowedValue<InputT> e : c.element().getValue()) {
         // First, handle anything that needs to happen for this element
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
index f33723a7bd5f3..61cb33573d3f8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
@@ -39,7 +39,12 @@
 /**
  * {@link GroupAlsoByWindowsDoFn} that uses reiterators to handle non-merging window functions with
  * the default triggering strategy.
+ *
+ * @param <K> key type
+ * @param <V> value element type
+ * @param <W> window type
  */
+@SystemDoFnInternal
 @SuppressWarnings("serial")
 class GroupAlsoByWindowsViaIteratorsDoFn<K, V, W extends BoundedWindow>
     extends GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
index de4ec56368efe..5acc7049fda7f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
@@ -26,6 +26,7 @@
  * @param <K> the type of the keys of the input and output {@code PCollection}s
  * @param <V> the type of the values of the input {@code PCollection}
  */
+@SystemDoFnInternal
 public class ReifyTimestampAndWindowsDoFn<K, V>
     extends DoFn<KV<K, V>, KV<K, WindowedValue<V>>> {
   private static final long serialVersionUID = 0;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 6891d64227569..bc983a5dadf2e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -17,8 +17,10 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
@@ -32,6 +34,7 @@
  * @param <W> window type
  */
 @SuppressWarnings("serial")
+@SystemDoFnInternal
 public abstract class StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends BoundedWindow>
     extends DoFn<TimerOrElement<KV<K, InputT>>, KV<K, OutputT>> {
 
@@ -58,6 +61,11 @@ StreamingGroupAlsoByWindowsDoFn<K, V, Iterable<V>, W> createForIterable(
   private static class StreamingGABWViaWindowSetDoFn<K, InputT, OutputT, W extends BoundedWindow>
   extends StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
 
+    private final Aggregator<Long, Long> droppedDueToClosedWindow =
+        createAggregator(TriggerExecutor.DROPPED_DUE_TO_CLOSED_WINDOW, new Sum.SumLongFn());
+    private final Aggregator<Long, Long> droppedDueToLateness =
+        createAggregator(TriggerExecutor.DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
+
     private final WindowingStrategy<Object, W> windowingStrategy;
     private final OutputBuffer<K, InputT, OutputT, W> outputBuffer;
 
@@ -75,7 +83,8 @@ private void initForKey(ProcessContext c, K key) throws Exception{
       if (executor == null) {
         TimerManager timerManager = c.windowingInternals().getTimerManager();
         executor = TriggerExecutor.create(
-          key, windowingStrategy, timerManager, outputBuffer, c.windowingInternals());
+          key, windowingStrategy, timerManager, outputBuffer, c.windowingInternals(),
+          droppedDueToClosedWindow, droppedDueToLateness);
       }
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemDoFnInternal.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemDoFnInternal.java
new file mode 100644
index 0000000000000..3255ede8755a0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemDoFnInternal.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+
+import java.lang.annotation.Documented;
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * Annotation to mark {@link DoFn DoFns} as an internal component of the Dataflow SDK.
+ *
+ * <p>Currently, the only effect of this is to mark any aggregators reported by an annotated
+ * {@code DoFn} as a system counter (as opposed to a user counter).
+ *
+ * <p>This is internal to the Dataflow SDK.
+ */
+@Documented
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.TYPE)
+public @interface SystemDoFnInternal {}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index ff1ffad0d1e05..089ce78c29e59 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.StandardCoder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
@@ -74,6 +75,9 @@
  */
 public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
 
+  public static final String DROPPED_DUE_TO_CLOSED_WINDOW = "DroppedDueToClosedWindow";
+  public static final String DROPPED_DUE_TO_LATENESS_COUNTER = "DroppedDueToLateness";
+
   private static final int FINAL_CLEANUP_PSEUDO_ID = -1;
 
   private final WindowFn<Object, W> windowFn;
@@ -90,8 +94,10 @@ public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
 
   private final WatermarkHold<W> watermarkHolder;
 
-  private K key;
+  private final K key;
 
+  private final Aggregator<Long, Long> droppedDueToClosedWindow;
+  private final Aggregator<Long, Long> droppedDueToLateness;
 
   TriggerExecutor(K key,
       WindowFn<Object, W> windowFn,
@@ -102,7 +108,9 @@ public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
       AccumulationMode mode,
       Duration allowedLateness,
       ActiveWindowSet<W> activeWindows,
-      OutputBuffer<K, InputT, OutputT, W> outputBuffer) {
+      OutputBuffer<K, InputT, OutputT, W> outputBuffer,
+      Aggregator<Long, Long> droppedDueToClosedWindow,
+      Aggregator<Long, Long> droppedDueToLateness) {
     this.key = key;
     this.windowFn = windowFn;
     this.trigger = trigger;
@@ -111,6 +119,8 @@ public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
     this.allowedLateness = allowedLateness;
     this.activeWindows = activeWindows;
     this.outputBuffer = outputBuffer;
+    this.droppedDueToClosedWindow = droppedDueToClosedWindow;
+    this.droppedDueToLateness = droppedDueToLateness;
     this.watermarkHolder = new WatermarkHold<W>(allowedLateness);
     this.timerManager = timerManager;
     this.mode = mode;
@@ -187,8 +197,9 @@ TriggerExecutor<K, InputT, OutputT, W> create(
       WindowingStrategy<Object, W> windowingStrategy,
       TimerManager timerManager,
       OutputBuffer<K, InputT, OutputT, W> outputBuffer,
-      WindowingInternals<?, KV<K, OutputT>> windowingInternals)
-          throws Exception {
+      WindowingInternals<?, KV<K, OutputT>> windowingInternals,
+      Aggregator<Long, Long> droppedDueToClosedWindow,
+      Aggregator<Long, Long> droppedDueToLateness) throws Exception {
     ActiveWindowSet<W> activeWindows = windowingStrategy.getWindowFn().isNonMerging()
         ? new NonMergingActiveWindowSet<W>()
         : new MergingActiveWindowSet<W>(
@@ -196,7 +207,8 @@ TriggerExecutor<K, InputT, OutputT, W> create(
     return new TriggerExecutor<K, InputT, OutputT, W>(key,
         windowingStrategy.getWindowFn(), timerManager, windowingStrategy.getTrigger(),
         windowingInternals.keyedState(), windowingInternals, windowingStrategy.getMode(),
-        windowingStrategy.getAllowedLateness(), activeWindows, outputBuffer);
+        windowingStrategy.getAllowedLateness(), activeWindows, outputBuffer,
+        droppedDueToClosedWindow, droppedDueToLateness);
   }
 
   private TriggerContext<W> context(BitSet finishedSet) {
@@ -237,7 +249,8 @@ private TriggerId<W> cleanupTimer(W window) {
   public void onElement(WindowedValue<InputT> value) throws Exception {
     Instant minimumAllowedTimestamp = timerManager.currentWatermarkTime().minus(allowedLateness);
     if (minimumAllowedTimestamp.isAfter(value.getTimestamp())) {
-      // TODO: Count the number of elements discarded because they are too late.
+      // We drop the element in all assigned windows if it is too late.
+      droppedDueToLateness.addValue((long) value.getWindows().size());
       return;
     }
 
@@ -249,8 +262,8 @@ public void onElement(WindowedValue<InputT> value) throws Exception {
     for (W window : windows) {
       BitSet finishedSet = lookupFinishedSet(window);
       if (isRootFinished(finishedSet)) {
-        // If the trigger was already finished in that window, don't bother passing the element down
-        // TODO: Count the number of elements discarded because the window is closed.
+        // If the window was finished (and closed) drop the element.
+        droppedDueToClosedWindow.addValue(1L);
         continue;
       }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 8f4abe500717d..9312f6d189bb3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -22,7 +22,10 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
@@ -90,6 +93,11 @@ public class TriggerTester<InputT, OutputT, W extends BoundedWindow> {
   private boolean logInteractions = false;
   private ExecutableTrigger<W> executableTrigger;
 
+  private final InMemoryLongSumAggregator droppedDueToClosedWindow =
+      new InMemoryLongSumAggregator(TriggerExecutor.DROPPED_DUE_TO_CLOSED_WINDOW);
+  private final InMemoryLongSumAggregator droppedDueToLateness =
+      new InMemoryLongSumAggregator(TriggerExecutor.DROPPED_DUE_TO_LATENESS_COUNTER);
+
   private void logInteraction(String fmt, Object... args) {
     if (logInteractions) {
       LOGGER.warning("Trigger Interaction: " + String.format(fmt, args));
@@ -142,7 +150,8 @@ private TriggerTester(
     this.outputCoder = outputCoder;
     executableTrigger = wildcardStrategy.getTrigger();
     this.triggerExecutor = TriggerExecutor.create(
-        KEY, objectStrategy, timerManager, outputBuffer, stubContexts);
+        KEY, objectStrategy, timerManager, outputBuffer, stubContexts,
+        droppedDueToClosedWindow, droppedDueToLateness);
   }
 
   public ExecutableTrigger<W> getTrigger() {
@@ -189,6 +198,14 @@ public boolean isWindowActive(W window) throws Exception {
     return Iterables.contains(getKeyedStateInUse(), earliestElementTag(window));
   }
 
+  public long getElementsDroppedDueToClosedWindow() {
+    return droppedDueToClosedWindow.getSum();
+  }
+
+  public long getElementsDroppedDueToLateness() {
+    return droppedDueToLateness.getSum();
+  }
+
   /**
    * Retrieve the values that have been output to this time, and clear out the output accumulator.
    */
@@ -456,4 +473,32 @@ public Collection<? extends BoundedWindow> windows() {
     }
   }
 
+  private static class InMemoryLongSumAggregator implements Aggregator<Long, Long> {
+
+    private final String name;
+    private long sum = 0;
+
+    public InMemoryLongSumAggregator(String name) {
+      this.name = name;
+    }
+
+    @Override
+    public void addValue(Long value) {
+      sum += value;
+    }
+
+    @Override
+    public String getName() {
+      return name;
+    }
+
+    @Override
+    public CombineFn<Long, ?, Long> getCombineFn() {
+      return new Sum.SumLongFn();
+    }
+
+    public long getSum() {
+      return sum;
+    }
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
index b6ef6e1803ba8..0146b9f605ac6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
@@ -21,7 +21,6 @@
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index 321a63d1a49ca..bacb171075617 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -25,9 +25,11 @@
 
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
@@ -105,6 +107,9 @@ public void testOnElementBufferingDiscarding() throws Exception {
     assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
         tester.finishedSet(firstWindow)));
     assertFalse(tester.isWindowActive(firstWindow));
+
+    assertEquals(1, tester.getElementsDroppedDueToClosedWindow());
+    assertEquals(0, tester.getElementsDroppedDueToLateness());
   }
 
   @Test
@@ -210,6 +215,9 @@ public void testWatermarkHoldAndLateData() throws Exception {
     injectElement(tester, 2, TriggerResult.FIRE);
     assertEquals(null, tester.getWatermarkHold());
 
+    assertEquals(0, tester.getElementsDroppedDueToLateness());
+    assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
+
     // Some late, some on time. Verify that we only hold to the minimum of on-time.
     tester.advanceWatermark(new Instant(4));
     injectElement(tester, 2, TriggerResult.CONTINUE);
@@ -230,11 +238,17 @@ public void testWatermarkHoldAndLateData() throws Exception {
     // Because we're about to expire the window, we output it.
     injectElement(tester, 8, TriggerResult.CONTINUE);
 
+    assertEquals(0, tester.getElementsDroppedDueToLateness());
+    assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
+
     // All very late -- gets dropped.
     tester.advanceWatermark(new Instant(50));
     injectElement(tester, 2, TriggerResult.FIRE);
     assertEquals(null, tester.getWatermarkHold());
 
+    assertEquals(1, tester.getElementsDroppedDueToLateness());
+    assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
+
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3, 2, 3, 4, 5), 4, 0, 10),
@@ -283,4 +297,32 @@ public void testMergeBeforeFinalizing() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 10), 1, 1, 20)));
   }
+
+  @Test
+  public void testDropDataMultipleWindows() throws Exception {
+    TriggerTester<Integer, Integer, IntervalWindow> tester = TriggerTester.combining(
+        SlidingWindows.of(Duration.millis(100)).every(Duration.millis(30)),
+        AfterWatermark.<IntervalWindow>pastEndOfWindow(),
+        AccumulationMode.ACCUMULATING_FIRED_PANES,
+        new Sum.SumIntegerFn().<String>asKeyedFn(),
+        VarIntCoder.of(),
+        Duration.millis(20));
+
+    tester.injectElement(10, new Instant(23)); // [-60, 40), [-30, 70), [0, 100)
+    tester.injectElement(12, new Instant(40)); // [-30, 70), [0, 100), [30, 130)
+
+    assertEquals(0, tester.getElementsDroppedDueToLateness());
+    assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
+
+    tester.advanceWatermark(new Instant(70));
+    tester.injectElement(14, new Instant(60)); // [-30, 70) = closed, [0, 100), [30, 130)
+
+    assertEquals(0, tester.getElementsDroppedDueToLateness());
+    assertEquals(1, tester.getElementsDroppedDueToClosedWindow());
+
+    tester.injectElement(16, new Instant(40)); // dropped due to lateness, assigned to 3 windows
+
+    assertEquals(3, tester.getElementsDroppedDueToLateness());
+    assertEquals(1, tester.getElementsDroppedDueToClosedWindow());
+  }
 }

From bec3b988d42088adc5338e81493d363032090464 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 1 Jul 2015 13:38:44 -0700
Subject: [PATCH 0701/1541] Add Group as an attribute of Validation.Required

A Group of required options is a collection of options where at least
one option must be non-null. This is used for DataflowPipelineOptions
TempLocation and StagingLocation attributes.

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97359748
---
 .../sdk/options/DataflowPipelineOptions.java  |   3 +
 .../sdk/options/PipelineOptionsFactory.java   |  59 ++++-
 .../sdk/options/PipelineOptionsValidator.java |  43 +++-
 .../dataflow/sdk/options/Validation.java      |   6 +
 .../sdk/runners/DataflowPipelineRunner.java   |   3 -
 .../options/PipelineOptionsValidatorTest.java | 222 +++++++++++++++++-
 .../runners/DataflowPipelineRunnerTest.java   |  48 +++-
 7 files changed, 356 insertions(+), 28 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
index ef799361884fd..b49fdfe3b0326 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
@@ -33,6 +33,7 @@ public interface DataflowPipelineOptions extends
     DataflowPipelineWorkerPoolOptions, BigQueryOptions,
     GcsOptions, StreamingOptions, CloudDebuggerOptions, DataflowWorkerLoggingOptions {
 
+  static final String DATAFLOW_STORAGE_LOCATION = "Dataflow Storage Location";
 
   @Description("Project id. Required when running a Dataflow in the cloud. "
       + "See https://cloud.google.com/storage/docs/projects for further details.")
@@ -55,6 +56,7 @@ public interface DataflowPipelineOptions extends
       + "Must be a valid Cloud Storage url, beginning with the prefix \"gs://\". "
       + "At least one of tempLocation or stagingLocation must be set. If tempLocation is unset, "
       + "defaults to using stagingLocation.")
+  @Validation.Required(groups = {DATAFLOW_STORAGE_LOCATION})
   String getTempLocation();
   void setTempLocation(String value);
 
@@ -71,6 +73,7 @@ public interface DataflowPipelineOptions extends
       + "Must be a valid Cloud Storage url, beginning with the prefix \"gs://\". "
       + "At least one of stagingLocation or tempLocation must be set. If stagingLocation is unset, "
       + "defaults to using tempLocation.")
+  @Validation.Required(groups = {DATAFLOW_STORAGE_LOCATION})
   String getStagingLocation();
   void setStagingLocation(String value);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index a529f48ef52cd..b349ae47cd1fb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
+import com.google.cloud.dataflow.sdk.options.Validation.Required;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunnerRegistrar;
 import com.google.cloud.dataflow.sdk.runners.worker.DataflowWorkerHarness;
@@ -93,7 +94,6 @@
  * specification</a> for more details as to what constitutes a property.
  */
 public class PipelineOptionsFactory {
-
   /**
    * Creates and returns an object that implements {@link PipelineOptions}.
    * This sets the {@link ApplicationNameOptions#getAppName() "appName"} to the calling
@@ -656,6 +656,8 @@ public boolean apply(Class<?> input) {
       if (propertyNamesToGetters.isEmpty()) {
         continue;
       }
+      SortedSetMultimap<String, String> requiredGroupNameToProperties =
+          getRequiredGroupNamesToProperties(propertyNamesToGetters);
 
       out.format("%s:%n", currentIface.getName());
       prettyPrintDescription(out, currentIface.getAnnotation(Description.class));
@@ -676,22 +678,51 @@ public boolean apply(Class<?> input) {
           out.format("    Default: %s%n", defaultValue.get());
         }
         prettyPrintDescription(out, method.getAnnotation(Description.class));
+        prettyPrintRequiredGroups(out, method.getAnnotation(Validation.Required.class),
+            requiredGroupNameToProperties);
       }
       out.println();
     }
   }
 
   /**
-   * Outputs the value of the description, breaking up long lines on white space characters
-   * and attempting to honor a line limit of {@code TERMINAL_WIDTH}.
+   * Output the requirement groups that the property is a member of, including all properties that
+   * satisfy the group requirement, breaking up long lines on white space characters and attempting
+   * to honor a line limit of {@code TERMINAL_WIDTH}.
+   */
+  private static void prettyPrintRequiredGroups(PrintStream out, Required annotation,
+      SortedSetMultimap<String, String> requiredGroupNameToProperties) {
+    if (annotation == null || annotation.groups() == null) {
+      return;
+    }
+    for (String group : annotation.groups()) {
+      SortedSet<String> groupMembers = requiredGroupNameToProperties.get(group);
+      String requirement;
+      if (groupMembers.size() == 1) {
+        requirement = Iterables.getOnlyElement(groupMembers) + " is required.";
+      } else {
+        requirement = "At least one of " + groupMembers + " is required";
+      }
+      terminalPrettyPrint(out, requirement.split("\\s+"));
+    }
+  }
+
+  /**
+   * Outputs the value of the description, breaking up long lines on white space characters and
+   * attempting to honor a line limit of {@code TERMINAL_WIDTH}.
    */
   private static void prettyPrintDescription(PrintStream out, Description description) {
-    final String spacing = "   ";
     if (description == null || description.value() == null) {
       return;
     }
 
     String[] words = description.value().split("\\s+");
+    terminalPrettyPrint(out, words);
+  }
+
+  private static void terminalPrettyPrint(PrintStream out, String[] words) {
+    final String spacing = "   ";
+
     if (words.length == 0) {
       return;
     }
@@ -876,6 +907,24 @@ private static SortedMap<String, Method> getPropertyNamesToGetters(Iterable<Meth
     return propertyNamesToGetters;
   }
 
+  /**
+   * Returns a map of required groups of arguments to the properties that satisfy the requirement.
+   */
+  private static SortedSetMultimap<String, String> getRequiredGroupNamesToProperties(
+      Map<String, Method> propertyNamesToGetters) {
+    SortedSetMultimap<String, String> result = TreeMultimap.create();
+    for (Map.Entry<String, Method> propertyEntry : propertyNamesToGetters.entrySet()) {
+      Required requiredAnnotation =
+          propertyEntry.getValue().getAnnotation(Validation.Required.class);
+      if (requiredAnnotation != null) {
+        for (String groupName : requiredAnnotation.groups()) {
+          result.put(groupName, propertyEntry.getKey());
+        }
+      }
+    }
+    return result;
+  }
+
   /**
    * Validates that a given class conforms to the following properties:
    * <ul>
@@ -1048,7 +1097,7 @@ public int compare(Method o1, Method o2) {
   }
 
   /** A {@link Comparator} that uses the methods name to compare them. */
-  private static class MethodNameComparator implements Comparator<Method> {
+  static class MethodNameComparator implements Comparator<Method> {
     static final MethodNameComparator INSTANCE = new MethodNameComparator();
     @Override
     public int compare(Method o1, Method o2) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
index b4cdbc6d6f91d..cfa923e0e4159 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
@@ -16,11 +16,17 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
+import com.google.cloud.dataflow.sdk.options.Validation.Required;
 import com.google.cloud.dataflow.sdk.util.common.ReflectHelpers;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Collections2;
+import com.google.common.collect.Ordering;
+import com.google.common.collect.SortedSetMultimap;
+import com.google.common.collect.TreeMultimap;
 
 import java.lang.reflect.Method;
 import java.lang.reflect.Proxy;
+import java.util.Collection;
 
 /**
  * Validates that the {@link PipelineOptions} conforms to all the {@link Validation} criteria.
@@ -50,18 +56,47 @@ public static <T extends PipelineOptions> T validate(Class<T> klass, PipelineOpt
     ProxyInvocationHandler handler =
         (ProxyInvocationHandler) Proxy.getInvocationHandler(asClassOptions);
 
+    SortedSetMultimap<String, Method> requiredGroups = TreeMultimap.create(
+        Ordering.natural(), PipelineOptionsFactory.MethodNameComparator.INSTANCE);
     for (Method method : ReflectHelpers.getClosureOfMethodsOnInterface(klass)) {
-      if (method.getAnnotation(Validation.Required.class) != null) {
-        Preconditions.checkArgument(handler.invoke(asClassOptions, method, null) != null,
-            "Missing required value for [" + method + ", \"" + getDescription(method) + "\"]. ");
+      Required requiredAnnotation = method.getAnnotation(Validation.Required.class);
+      if (requiredAnnotation != null) {
+        if (requiredAnnotation.groups().length > 0) {
+          for (String requiredGroup : requiredAnnotation.groups()) {
+            requiredGroups.put(requiredGroup, method);
+          }
+        } else {
+          Preconditions.checkArgument(handler.invoke(asClassOptions, method, null) != null,
+              "Missing required value for [" + method + ", \"" + getDescription(method) + "\"]. ");
+        }
       }
     }
+
+    for (String requiredGroup : requiredGroups.keySet()) {
+      if (!verifyGroup(handler, asClassOptions, requiredGroups.get(requiredGroup))) {
+        throw new IllegalArgumentException("Missing required value for group [" + requiredGroup
+            + "]. At least one of the following properties "
+            + Collections2.transform(
+                requiredGroups.get(requiredGroup), ReflectHelpers.METHOD_FORMATTER)
+            + " required. Run with --help=" + klass.getSimpleName() + " for more information.");
+      }
+    }
+
     return asClassOptions;
   }
 
+  private static boolean verifyGroup(ProxyInvocationHandler handler, PipelineOptions options,
+      Collection<Method> requiredGroup) {
+    for (Method m : requiredGroup) {
+      if (handler.invoke(options, m, null) != null) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   private static String getDescription(Method method) {
     Description description = method.getAnnotation(Description.class);
     return description == null ? "" : description.value();
   }
 }
-
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java
index 2841ad2367abc..1bdea4d1fdeda 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java
@@ -35,5 +35,11 @@
   @Target(value = ElementType.METHOD)
   @Retention(RetentionPolicy.RUNTIME)
   public @interface Required {
+    /**
+     * The groups that the annotated attribute is a member of. A member can be in 0 or more groups.
+     * Members not in any groups are considered to be in a group consisting exclusively of
+     * themselves. At least one member of a group must be non-null if the options are to be valid.
+     */
+    String[] groups() default {};
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index c98c693f56875..a3f4c96e61dd6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -137,9 +137,6 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
     }
 
     PathValidator validator = dataflowOptions.getPathValidator();
-    Preconditions.checkArgument(!(Strings.isNullOrEmpty(dataflowOptions.getTempLocation())
-        && Strings.isNullOrEmpty(dataflowOptions.getStagingLocation())),
-        "Missing required value: at least one of tempLocation or stagingLocation must be set.");
     if (dataflowOptions.getStagingLocation() != null) {
       validator.verifyPath(dataflowOptions.getStagingLocation());
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java
index b56762b0e233c..d6a3c189af41b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidatorTest.java
@@ -25,14 +25,15 @@
 /** Tests for {@link PipelineOptionsValidator}. */
 @RunWith(JUnit4.class)
 public class PipelineOptionsValidatorTest {
-  @Rule public ExpectedException expectedException = ExpectedException.none();
+  @Rule
+  public ExpectedException expectedException = ExpectedException.none();
 
   /** A test interface with an {@link Validation.Required} annotation. */
   public static interface Required extends PipelineOptions {
     @Validation.Required
     @Description("Fake Description")
-    public String getObject();
-    public void setObject(String value);
+    String getObject();
+    void setObject(String value);
   }
 
   @Test
@@ -80,9 +81,9 @@ public void testWhenRequiredOptionIsNeverSetOnSuperInterface() {
   /** A test interface that overrides the parent's method. */
   public static interface SubClassValidation extends Required {
     @Override
-    public String getObject();
+    String getObject();
     @Override
-    public void setObject(String value);
+    void setObject(String value);
   }
 
   @Test
@@ -95,4 +96,215 @@ public void testValidationOnOverriddenMethods() throws Exception {
     SubClassValidation required = PipelineOptionsFactory.as(SubClassValidation.class);
     PipelineOptionsValidator.validate(Required.class, required);
   }
+
+  /** A test interface with a required group. */
+  public static interface GroupRequired extends PipelineOptions {
+    @Validation.Required(groups = {"ham"})
+    String getFoo();
+    void setFoo(String foo);
+
+    @Validation.Required(groups = {"ham"})
+    String getBar();
+    void setBar(String bar);
+  }
+
+  @Test
+  public void testWhenOneOfRequiredGroupIsSetIsValid() {
+    GroupRequired groupRequired = PipelineOptionsFactory.as(GroupRequired.class);
+    groupRequired.setFoo("foo");
+    groupRequired.setBar(null);
+
+    PipelineOptionsValidator.validate(GroupRequired.class, groupRequired);
+
+    // Symmetric
+    groupRequired.setFoo(null);
+    groupRequired.setBar("bar");
+    PipelineOptionsValidator.validate(GroupRequired.class, groupRequired);
+  }
+
+  @Test
+  public void testWhenNoneOfRequiredGroupIsSetThrowsException() {
+    GroupRequired groupRequired = PipelineOptionsFactory.as(GroupRequired.class);
+
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Missing required value for group [ham]");
+    expectedException.expectMessage("properties");
+    expectedException.expectMessage("getFoo");
+    expectedException.expectMessage("getBar");
+
+    PipelineOptionsValidator.validate(GroupRequired.class, groupRequired);
+  }
+
+  /** A test interface with a member in multiple required groups. */
+  public static interface MultiGroupRequired extends PipelineOptions {
+    @Validation.Required(groups = {"spam", "ham"})
+    String getFoo();
+    void setFoo(String foo);
+
+    @Validation.Required(groups = {"spam"})
+    String getBar();
+    void setBar(String bar);
+
+    @Validation.Required(groups = {"ham"})
+    String getBaz();
+    void setBaz(String baz);
+  }
+
+  @Test
+  public void testWhenOneOfMultipleRequiredGroupsIsSetIsValid() {
+    MultiGroupRequired multiGroupRequired = PipelineOptionsFactory.as(MultiGroupRequired.class);
+
+    multiGroupRequired.setFoo("eggs");
+
+    PipelineOptionsValidator.validate(MultiGroupRequired.class, multiGroupRequired);
+  }
+
+  private static interface LeftOptions extends PipelineOptions {
+    @Validation.Required(groups = {"left"})
+    String getFoo();
+    void setFoo(String foo);
+
+    @Validation.Required(groups = {"left"})
+    String getLeft();
+    void setLeft(String left);
+
+    @Validation.Required(groups = {"both"})
+    String getBoth();
+    void setBoth(String both);
+  }
+
+  private static interface RightOptions extends PipelineOptions {
+    @Validation.Required(groups = {"right"})
+    String getFoo();
+    void setFoo(String foo);
+
+    @Validation.Required(groups = {"right"})
+    String getRight();
+    void setRight(String right);
+
+    @Validation.Required(groups = {"both"})
+    String getBoth();
+    void setBoth(String both);
+  }
+
+  private static interface JoinedOptions extends LeftOptions, RightOptions {}
+
+  @Test
+  public void testWhenOptionIsDefinedInMultipleSuperInterfacesAndIsNotPresentFailsRequirement() {
+    RightOptions rightOptions = PipelineOptionsFactory.as(RightOptions.class);
+    rightOptions.setBoth("foo");
+
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Missing required value for group");
+    expectedException.expectMessage("getFoo");
+
+    PipelineOptionsValidator.validate(JoinedOptions.class, rightOptions);
+  }
+
+  @Test
+  public void testWhenOptionIsDefinedInMultipleSuperInterfacesMeetsGroupRequirement() {
+    RightOptions rightOpts = PipelineOptionsFactory.as(RightOptions.class);
+    rightOpts.setFoo("true");
+    rightOpts.setBoth("bar");
+
+    LeftOptions leftOpts = PipelineOptionsFactory.as(LeftOptions.class);
+    leftOpts.setFoo("Untrue");
+    leftOpts.setBoth("Raise the");
+
+    PipelineOptionsValidator.validate(JoinedOptions.class, rightOpts);
+    PipelineOptionsValidator.validate(JoinedOptions.class, leftOpts);
+  }
+
+  @Test
+  public void testWhenOptionIsDefinedOnOtherOptionsClassMeetsGroupRequirement() {
+    RightOptions rightOpts = PipelineOptionsFactory.as(RightOptions.class);
+    rightOpts.setFoo("true");
+    rightOpts.setBoth("bar");
+
+    LeftOptions leftOpts = PipelineOptionsFactory.as(LeftOptions.class);
+    leftOpts.setFoo("Untrue");
+    leftOpts.setBoth("Raise the");
+
+    PipelineOptionsValidator.validate(RightOptions.class, leftOpts);
+    PipelineOptionsValidator.validate(LeftOptions.class, rightOpts);
+  }
+
+  @Test
+  public void testWhenOptionIsDefinedOnMultipleInterfacesOnlyListedOnceWhenNotPresent() {
+    JoinedOptions options = PipelineOptionsFactory.as(JoinedOptions.class);
+    options.setFoo("Hello");
+
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("required value for group [both]");
+    expectedException.expectMessage("properties [getBoth()]");
+
+    PipelineOptionsValidator.validate(JoinedOptions.class, options);
+  }
+
+  private static interface SuperOptions extends PipelineOptions {
+    @Validation.Required(groups = {"super"})
+    String getFoo();
+    void setFoo(String foo);
+
+    @Validation.Required(groups = {"sub"})
+    String getBar();
+    void setBar(String bar);
+
+    @Validation.Required(groups = {"otherSuper"})
+    String getSuperclassObj();
+    void setSuperclassObj(String sup);
+  }
+
+  private static interface SubOptions extends SuperOptions {
+    @Override
+    @Validation.Required(groups = {"sub"})
+    String getFoo();
+    @Override
+    void setFoo(String foo);
+
+    @Override
+    String getSuperclassObj();
+    @Override
+    void setSuperclassObj(String sup);
+  }
+
+  @Test
+  public void testSuperInterfaceRequiredOptionsAlsoRequiredInSubInterface() {
+    SubOptions subOpts = PipelineOptionsFactory.as(SubOptions.class);
+    subOpts.setFoo("Bar");
+
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("otherSuper");
+    expectedException.expectMessage("Missing required value");
+    expectedException.expectMessage("getSuperclassObj");
+
+    PipelineOptionsValidator.validate(SubOptions.class, subOpts);
+  }
+
+  @Test
+  public void
+      testSuperInterfaceGroupIsInAdditionToSubInterfaceGroupOnlyWhenValidatingSuperInterface() {
+    SubOptions opts = PipelineOptionsFactory.as(SubOptions.class);
+    opts.setFoo("Foo");
+    opts.setSuperclassObj("Hello world");
+
+    // Valid SubOptions, but invalid SuperOptions
+    PipelineOptionsValidator.validate(SubOptions.class, opts);
+
+    expectedException.expectMessage("sub");
+    expectedException.expectMessage("Missing required value");
+    expectedException.expectMessage("getBar");
+    PipelineOptionsValidator.validate(SuperOptions.class, opts);
+  }
+
+  @Test
+  public void testSuperInterfaceRequiredOptionsSatisfiedBySubInterface() {
+    SubOptions subOpts = PipelineOptionsFactory.as(SubOptions.class);
+    subOpts.setFoo("bar");
+    subOpts.setBar("bar");
+    subOpts.setSuperclassObj("SuperDuper");
+
+    PipelineOptionsValidator.validate(SubOptions.class, subOpts);
+    PipelineOptionsValidator.validate(SuperOptions.class, subOpts);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index b583980a2ad0b..d94ee4df8f90a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -345,17 +345,6 @@ public void testGcsStagingLocationInitialization() {
     assertNotNull(options.getStagingLocation());
   }
 
-  @Test
-  public void testGcsRequiredTempLocation() {
-    // Error raised if temp location not set.
-    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
-    options.setProject("someProject");
-
-    thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage(containsString("tempLocation"));
-    DataflowPipelineRunner.fromOptions(options);
-  }
-
   @Test
   public void testNonGcsFilePathInReadFailure() throws IOException {
     ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
@@ -459,6 +448,43 @@ public void testNoProjectFails() {
     DataflowPipelineRunner.fromOptions(options);
   }
 
+  @Test
+  public void testNoStagingLocationAndNoTempLocationFails() {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setRunner(DataflowPipelineRunner.class);
+    options.setProject("foo");
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Missing required value for group");
+    thrown.expectMessage(DataflowPipelineOptions.DATAFLOW_STORAGE_LOCATION);
+    thrown.expectMessage("getStagingLocation");
+    thrown.expectMessage("getTempLocation");
+
+    DataflowPipelineRunner.fromOptions(options);
+  }
+
+  @Test
+  public void testStagingLocationAndNoTempLocationSucceeds() {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setRunner(DataflowPipelineRunner.class);
+    options.setGcpCredential(new TestCredential());
+    options.setProject("foo");
+    options.setStagingLocation("gs://spam/ham/eggs");
+
+    DataflowPipelineRunner.fromOptions(options);
+  }
+
+  @Test
+  public void testTempLocationAndNoStagingLocationSucceeds() {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setRunner(DataflowPipelineRunner.class);
+    options.setGcpCredential(new TestCredential());
+    options.setProject("foo");
+    options.setTempLocation("gs://spam/ham/eggs");
+
+    DataflowPipelineRunner.fromOptions(options);
+  }
+
   @Test
   public void testInvalidJobName() throws IOException {
     List<String> invalidNames = Arrays.asList(

From 78772d58d838e7967f1c29c68940888a9af8ba1e Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Wed, 1 Jul 2015 15:51:57 -0700
Subject: [PATCH 0702/1541] Introduces RangeTracker for thread-safe dynamic
 splitting

The primary purpose of this change is to allow processing dynamic split
requests concurrently with reading a record. This allows much
finer-granularity dynamic splitting, and in some cases, is vital to
preventing the job from failing (if a split request arrives while the
reader is stuck for several minutes reading a record, we lose the
work lease).

To do that, readers are now required to be thread-safe, however we
introduce an abstraction that makes this simple to do in case of
readers based on a range of positions - the RangeTracker. It manages
the range of positions and the current position in a thread-safe way,
and, more importantly, documents which properties range-based readers
must obey in order to be safe against asynchronous dynamic splitting.

In the current change ReadOperation still, in fact, synchronizes with
the reader. The synchronization will be removed in a subsequent change.

Additionally, this change introduces a (purely internal) class
AbstractBoundedReaderIterator for abstracting away some logic that was
common in many ReaderIterator subclasses - lazy computation in hasNext()
and checking hasNext() in next(). It happens to help with one of the
properties demanded by RangeTracker.

----Release Notes----
The method BoundedReader.splitAtFraction() now has to be thread-safe,
i.e. safe to call asynchronously with advance() or start().
The class RangeTracker is introduced to help implement safe
readers. Users are heavily encouraged to use the class, rather than
implementing an ad-hoc solution.
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97371647
---
 .../cloud/dataflow/sdk/io/BoundedSource.java  |  51 +++-
 .../sdk/io/ByteOffsetBasedSource.java         | 110 ++++-----
 .../dataflow/sdk/io/CompressedSource.java     |  17 +-
 .../dataflow/sdk/io/FileBasedSource.java      |  75 ++----
 .../sdk/io/range/OffsetRangeTracker.java      | 182 ++++++++++++++
 .../dataflow/sdk/io/range/RangeTracker.java   | 220 +++++++++++++++++
 .../BasicSerializableSourceFormat.java        |   4 +
 .../sdk/runners/worker/AvroByteReader.java    |  11 +-
 .../sdk/runners/worker/AvroReader.java        |  16 +-
 .../sdk/runners/worker/BigQueryReader.java    |  15 +-
 .../worker/ByteArrayShufflePosition.java      |  14 +-
 .../sdk/runners/worker/ConcatReader.java      |  13 +-
 .../sdk/runners/worker/FileBasedReader.java   | 101 +++-----
 .../worker/GroupingShuffleRangeTracker.java   | 167 +++++++++++++
 .../runners/worker/GroupingShuffleReader.java | 135 +++++------
 .../sdk/runners/worker/InMemoryReader.java    |  43 ++--
 .../worker/LazyMultiReaderIterator.java       |  11 +-
 .../worker/PartitioningShuffleReader.java     |   8 +-
 .../worker/SourceTranslationUtils.java        |  30 +++
 .../sdk/runners/worker/TextReader.java        |  16 +-
 .../worker/UngroupedShuffleReader.java        |   7 +-
 .../worker/AbstractBoundedReaderIterator.java |  56 +++++
 .../sdk/util/common/worker/Reader.java        |  25 +-
 .../cloud/dataflow/sdk/io/AvroSourceTest.java |   5 +-
 .../sdk/io/ByteOffsetBasedSourceTest.java     |  13 +-
 .../dataflow/sdk/io/FileBasedSourceTest.java  | 228 +++++++++---------
 .../cloud/dataflow/sdk/io/XmlSourceTest.java  |  11 +-
 .../sdk/io/range/OffsetRangeTrackerTest.java  | 186 ++++++++++++++
 .../GroupingShuffleRangeTrackerTest.java      | 177 ++++++++++++++
 .../worker/GroupingShuffleReaderTest.java     |  42 +---
 .../runners/worker/InMemoryReaderTest.java    |  32 ++-
 .../sdk/runners/worker/TextReaderTest.java    | 149 ++++++++----
 .../util/common/worker/ExecutorTestUtils.java |   2 -
 33 files changed, 1576 insertions(+), 596 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/OffsetRangeTracker.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/RangeTracker.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTracker.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/AbstractBoundedReaderIterator.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/OffsetRangeTrackerTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTrackerTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
index 7019ba2361fa2..ed65c19210f94 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
@@ -74,8 +74,28 @@ public abstract BoundedReader<T> createReader(
    * A {@code Reader} that reads a bounded amount of input and supports some additional
    * operations, such as progress estimation and dynamic work rebalancing.
    *
-   * <p> Once {@link #start} or {@link #advance} has returned false, neither will be called
+   * <h3>Boundedness</h3>
+   * <p>Once {@link #start} or {@link #advance} has returned false, neither will be called
    * again on this object.
+   *
+   * <h3>Thread safety</h3>
+   * All methods will be run from the same thread except {@link #splitAtFraction}, which can be
+   * called concurrently from a different thread (but there will not be multiple concurrent calls
+   * to {@link #splitAtFraction} itself).
+   * <p>If the source does not implement {@link #splitAtFraction}, you do not need to worry about
+   * thread safety. If implemented, it must be safe to call {@link #splitAtFraction} concurrently
+   * with other methods.
+   *
+   * <h3>Implementing {@link #splitAtFraction}</h3>
+   * In the course of dynamic work rebalancing, the method {@link #splitAtFraction}
+   * may be called concurrently with {@link #advance} or {@link #start}. It is critical that
+   * their interaction is implemented in a thread-safe way, otherwise data loss is possible.
+   *
+   * <p>Sources which support dynamic work rebalancing should use
+   * {@link com.google.cloud.dataflow.sdk.io.range.RangeTracker} to manage the (source-specific)
+   * range of positions that is being split. If your source supports dynamic work rebalancing,
+   * please use that class to implement it if possible; if not possible, please contact the team
+   * at <i>dataflow-feedback@google.com</i>.
    */
   @Experimental(Experimental.Kind.SOURCE_SINK)
   public interface BoundedReader<T> extends Source.Reader<T> {
@@ -89,7 +109,6 @@ public interface BoundedReader<T> extends Source.Reader<T> {
      *   <li>Should return 1 after a {@link #start} or {@link #advance} call that returns false.
      *   <li>The returned values should be non-decreasing (though they don't have to be unique).
      * </ul>
-     *
      * @return A value in [0, 1] representing the fraction of this reader's current input
      *   read so far, or {@code null} if such an estimate is not available.
      */
@@ -102,16 +121,17 @@ public interface BoundedReader<T> extends Source.Reader<T> {
      * Tells the reader to narrow the range of the input it's going to read and give up
      * the remainder, so that the new range would contain approximately the given
      * fraction of the amount of data in the current range.
-     * Returns a {@code BoundedSource} representing the remainder.
-     * <p>
-     * More detailed description: Assuming the following sequence of calls:
+     * <p>Returns a {@code BoundedSource} representing the remainder.
+     *
+     * <h3>Detailed description</h3>
+     * Assuming the following sequence of calls:
      * <pre>{@code
      *   BoundedSource<T> initial = reader.getCurrentSource();
      *   BoundedSource<T> residual = reader.splitAtFraction(fraction);
      *   BoundedSource<T> primary = reader.getCurrentSource();
      * }</pre>
      * <ul>
-     *  <li> The "primary" and "residual" sources, when read, would together cover the same
+     *  <li> The "primary" and "residual" sources, when read, should together cover the same
      *  set of records as "initial".
      *  <li> The current reader should continue to be in a valid state, and continuing to read
      *  from it should, together with the records it already read, yield the same records
@@ -128,9 +148,26 @@ public interface BoundedReader<T> extends Source.Reader<T> {
      * in a file should return {@code null} if it is already past the position in its range
      * corresponding to the given fraction. In this case, the method MUST have no effect
      * (the reader must behave as if the method hadn't been called at all).
-     * <p>
+     *
+     * <h3>Statefulness</h3>
      * Since this method (if successful) affects the reader's source, in subsequent invocations
      * "fraction" should be interpreted relative to the new current source.
+     *
+     * <h3>Thread safety and blocking</h3>
+     * This method will be called concurrently to other methods (however there will not be multiple
+     * concurrent invocations of this method itself), and it is critical for it to be implemented
+     * in a thread-safe way (otherwise data loss is possible).
+     *
+     * <p>It is also very important that this method always completes quickly, in particular,
+     * it should not perform or wait on any blocking operations such as I/O, RPCs etc. Violating
+     * this requirement may stall completion of the work item or even cause it to fail.
+     *
+     * <p>E.g. it is incorrect to make both this method and {@link #start}/{@link #advance}
+     * {@code synchronized}, because those methods can perform blocking operations, and then
+     * this method would have to wait for those calls to complete.
+     *
+     * <p>{@link com.google.cloud.dataflow.sdk.io.range.RangeTracker} makes it easy to implement
+     * this method safely and correctly.
      */
     BoundedSource<T> splitAtFraction(double fraction);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
index feddd7e8b5c16..f15bff30de26d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
@@ -14,6 +14,8 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
+import com.google.cloud.dataflow.sdk.io.range.OffsetRangeTracker;
+import com.google.cloud.dataflow.sdk.io.range.RangeTracker;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.common.base.Preconditions;
 
@@ -35,16 +37,8 @@
  * and implements splitting into bundles. This should be used for sources that can be cheaply read
  * starting at any given byte offset.
  *
- * <p>The byte offset range of the source is between {@code startOffset} (inclusive) and endOffset
- * (exclusive), i.e. [{@code startOffset}, {@code endOffset}). The source may include a record if
- * its offset is at the range [{@code startOffset}, {@code endOffset}) even if the record extend
- * past the range. The source does not include any record at offsets before this range even if it
- * extend into this range because the previous range will include this record. A source may choose
- * to include records at offsets after this range. For example, a source may choose to set offset
- * boundaries based on blocks of records, in which case certain records may start after
- * {@code endOffset}. But for any given source type the combined set of data read by two sources for
- * ranges [A, B) and [B, C) must be the same as the records read by a single source of the same type
- * for the range [A, C).
+ * <p>Consult {@link RangeTracker} for important semantics
+ * common to all sources defined by a range of positions of a certain type.
  *
  * @param <T> Type of records represented by the source.
  */
@@ -97,8 +91,8 @@ public long getMinBundleSize() {
   }
 
   @Override
-  public List<? extends ByteOffsetBasedSource<T>> splitIntoBundles(long desiredBundleSizeBytes,
-      PipelineOptions options) throws Exception {
+  public List<? extends ByteOffsetBasedSource<T>> splitIntoBundles(
+      long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
     // Split the range into bundles based on the desiredBundleSizeBytes. Final bundle is adjusted to
     // make sure that we do not end up with a too small bundle at the end. If desiredBundleSizeBytes
     // is smaller than the minBundleSize of the source then minBundleSize will be used instead.
@@ -126,14 +120,20 @@ public List<? extends ByteOffsetBasedSource<T>> splitIntoBundles(long desiredBun
 
   @Override
   public void validate() {
-    Preconditions.checkArgument(this.startOffset >= 0,
-        "Start offset has value " + this.startOffset + ", must be non-negative");
-    Preconditions.checkArgument(this.endOffset >= 0,
-        "End offset has value " + this.endOffset + ", must be non-negative");
-    Preconditions.checkArgument(this.startOffset < this.endOffset,
-        "Start offset " + this.startOffset + " must be before end offset " + this.endOffset);
-    Preconditions.checkArgument(this.minBundleSize >= 0,
-        "minBundleSize has value " + this.minBundleSize + ", must be non-negative");
+    Preconditions.checkArgument(
+        this.startOffset >= 0,
+        "Start offset has value %s, must be non-negative", this.startOffset);
+    Preconditions.checkArgument(
+        this.endOffset >= 0,
+        "End offset has value %s, must be non-negative", this.endOffset);
+    Preconditions.checkArgument(
+        this.startOffset < this.endOffset,
+        "Start offset %s must be before end offset %s",
+        this.startOffset, this.endOffset);
+    Preconditions.checkArgument(
+        this.minBundleSize >= 0,
+        "minBundleSize has value %s, must be non-negative",
+        this.minBundleSize);
   }
 
   @Override
@@ -161,23 +161,27 @@ public abstract static class ByteOffsetBasedReader<T> extends AbstractBoundedRea
     private static final Logger LOG = LoggerFactory.getLogger(ByteOffsetBasedReader.class);
 
     private ByteOffsetBasedSource<T> source;
+    /**
+     * The {@link OffsetRangeTracker} managing the range and current position of the source.
+     * Subclasses MUST use it before returning records from {@link #start} or {@link #advance}:
+     * see documentation of {@link RangeTracker}.
+     */
+    protected final OffsetRangeTracker rangeTracker;
 
     /**
      * @param source the {@code ByteOffsetBasedSource} to be read by the current reader.
      */
     public ByteOffsetBasedReader(ByteOffsetBasedSource<T> source) {
       this.source = source;
+      this.rangeTracker = new OffsetRangeTracker(source.getStartOffset(), source.getEndOffset());
     }
 
     /**
-     * Returns the current offset of the reader. The value returned by this method is undefined
-     * until the method {@link Source.Reader#start} is called. After {@link Source.Reader#start} is
-     * called the value returned by this method should represent the offset of the value that will
-     * be returned by the {@link Source.Reader#getCurrent} call. Values returned for two consecutive
-     * records should be non-strictly increasing. If the reader has reached the end of the stream
-     * this should return {@code Long.MAX_VALUE}. The value returned may be outside the range
-     * defined by the {@code ByteOffsetBasedSource} corresponding to this reader, for reasons
-     * described in the comment to {@code ByteOffsetBasedSource}.
+     * Returns the <i>starting</i> offset of the {@link Source.Reader#getCurrent current record},
+     * which has been read by the last successful {@link Source.Reader#start} or
+     * {@link Source.Reader#advance} call.
+     * <p>If no such call has been made yet, the return value is unspecified.
+     * <p>See {@link RangeTracker} for description of offset semantics.
      */
     protected abstract long getCurrentOffset();
 
@@ -188,57 +192,29 @@ public ByteOffsetBasedSource<T> getCurrentSource() {
 
     @Override
     public Double getFractionConsumed() {
-      if (source.getEndOffset() == Long.MAX_VALUE) {
-        // True fraction consumed is unknown.
-        return null;
-      }
-      // TODO: a more sophisticated implementation could account for the fact that
-      // the first record's offset is not necessarily the same as getStartOffset(),
-      // and same for the last record. For example, we could assume that
-      // the position of the last record is as far after getEndOffset() as the
-      // position of the first record was after getStartOffset(), and compute
-      // fraction based on this adjusted range.
-      long current = getCurrentOffset();
-      double fraction =
-          1.0 * (current - source.getStartOffset())
-              / (source.getEndOffset() - source.getStartOffset());
-      return Math.max(0, Math.min(1, fraction));
+      return rangeTracker.getFractionConsumed();
     }
 
     @Override
     public ByteOffsetBasedSource<T> splitAtFraction(double fraction) {
-      if (source.getEndOffset() == Long.MAX_VALUE) {
-        // Impossible to convert fraction to an offset.
-        LOG.debug("Refusing to split at fraction {} because source does not have an end offset",
-            fraction);
-        return null;
-      }
-      long start = source.getStartOffset();
-      long end = source.getEndOffset();
-      long splitOffset = (long) (start + fraction * (end - start));
-      long current = getCurrentOffset();
-      if (splitOffset <= current) {
+      if (rangeTracker.getStopPosition() == Long.MAX_VALUE) {
         LOG.debug(
-            "Refusing to split at fraction {} (offset {}) because current offset is {} of [{}, {})",
-            fraction, splitOffset, current, start, end);
+            "Refusing to split unbounded ByteOffsetBasedReader {} at fraction {}",
+            rangeTracker, fraction);
         return null;
       }
-      if (splitOffset <= start || splitOffset >= end) {
-        LOG.debug(
-            "Refusing to split at fraction {} (offset {}) outside current range [{}, {})",
-            fraction, splitOffset, start, end);
+      long splitOffset = rangeTracker.getPositionForFractionConsumed(fraction);
+      LOG.debug(
+          "Proposing to split ByteOffsetBasedReader {} at fraction {} (offset {})",
+          rangeTracker, fraction, splitOffset);
+      if (!rangeTracker.trySplitAtPosition(splitOffset)) {
         return null;
       }
-      // Note: we intentionally ignore minBundleSize here.
-      // It is useful to respect it during initial splitting so we don't produce work items
-      // that are likely to turn out too small - but once dynamic work rebalancing kicks in,
-      // its estimates are far more precise and should take priority. If it says split into
-      // tiny single-record bundles, we should do that.
+      long start = source.getStartOffset();
+      long end = source.getEndOffset();
       ByteOffsetBasedSource<T> primary = source.createSourceForSubrange(start, splitOffset);
       ByteOffsetBasedSource<T> residual = source.createSourceForSubrange(splitOffset, end);
       this.source = primary;
-      LOG.debug("Split at fraction {} (offset {}) of [{}, {}) (current offset {})",
-          fraction, splitOffset, start, end, current);
       return residual;
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
index bb9b24a67c5c0..d7c2ffb4b48f2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
@@ -223,6 +223,7 @@ public static class CompressedReader<T> extends FileBasedReader<T> {
 
     private final FileBasedReader<T> readerDelegate;
     private final CompressedSource<T> source;
+    private int numRecordsRead;
 
     /**
      * Create a {@code CompressedReader} from a {@code CompressedSource} and delegate reader.
@@ -242,11 +243,17 @@ public T getCurrent() throws NoSuchElementException {
     }
 
     /**
-     * Returns false; compressed sources cannot be split.
+     * Returns true only for the first record; compressed sources cannot be split.
      */
     @Override
     protected final boolean isAtSplitPoint() {
-      return false;
+      // We have to return true for the first record, but not for the state before reading it,
+      // and not for the state after reading any other record. Hence == rather than >= or <=.
+      // This is required because FileBasedReader is intended for readers that can read a range
+      // of offsets in a file and where the range can be split in parts. CompressedReader,
+      // however, is a degenerate case because it cannot be split, but it has to satisfy the
+      // semantics of offsets and split points anyway.
+      return numRecordsRead == 1;
     }
 
     /**
@@ -263,7 +270,11 @@ protected final void startReading(ReadableByteChannel channel) throws IOExceptio
      */
     @Override
     protected final boolean readNextRecord() throws IOException {
-      return readerDelegate.readNextRecord();
+      if (!readerDelegate.readNextRecord()) {
+        return false;
+      }
+      ++numRecordsRead;
+      return true;
     }
 
     /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index 87533d8a6072d..6ff99a5f8a457 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -39,15 +39,15 @@
  * file-based custom source.
  *
  * <p>A file-based {@code Source} is a {@code Source} backed by a file pattern defined as a Java
- * glob, a single file, or a offset range for a single file. See {@link ByteOffsetBasedSource} for
- * semantics of offset ranges.
+ * glob, a single file, or a offset range for a single file. See {@link ByteOffsetBasedSource} and
+ * {@link com.google.cloud.dataflow.sdk.io.range.RangeTracker} for semantics of offset ranges.
  *
  * <p>This source stores a {@code String} that is an {@link IOChannelFactory} specification for a
  * file or file pattern. There should be an {@code IOChannelFactory} defined for the file
  * specification provided. Please refer to {@link IOChannelUtils} and {@link IOChannelFactory} for
  * more information on this.
  *
- * <p>In addition to the methods left abstract from {@code Source}, subclasses must implement
+ * <p>In addition to the methods left abstract from {@code BoundedSource}, subclasses must implement
  * methods to create a sub-source and a reader for a range of a single file -
  * {@link #createForSubrangeOfFile} and {@link #createSingleFileReader}. Please refer to
  * {@link XmlSource} for an example implementation of {@code FileBasedSource}.
@@ -327,33 +327,9 @@ private static Collection<String> expandFilePattern(String fileOrPatternSpec) th
    * determine the correct starting position.
    *
    * <h2>Split Points</h2>
-   *
-   * <p>Simple record-based formats (such as reading lines, reading CSV etc.), where each record can
-   * be identified by a unique offset, should interpret a range [A, B) as "read from the first
-   * record starting at or after offset A, up to but not including the first record starting at or
-   * after offset B".
-   *
-   * <p>More complex formats, such as some block-based formats, may have records that are not
-   * directly addressable: i.e., for some records, there is no way to describe the location of a
-   * record using a single offset number. For example, imagine a file format consisting of a
-   * sequence of blocks, where each block is compressed using some block compression algorithm. Then
-   * blocks have offsets, but individual records don't. More complex cases are also possible.
-   *
-   * <p>Many such formats still admit reading a range of offsets in a way consistent with the
-   * semantics of {@code ByteOffsetBasedReader}, i.e. reading [A, B) and [B, C) is equivalent to
-   * reading [A, C). E.g., for the compressed block-based format discussed above, reading [A, B)
-   * would mean "read all the records in all blocks whose starting offset is in [A, B)".
-   *
-   * <p>To support such complex formats in {@code FileBasedReader}, we introduce the notion of
-   * <i>split points</i>. We say that a record is a split point if there exists an offset A such
-   * that the record is the first one to be read for a range [A, {@code Long.MAX_VALUE}). E.g. for
-   * the block-based format above, the only split points would be the first records in each block.
-   *
-   * <p>With the above definition of split points an extended definition of the offset of a record
-   * can be specified. For a record that is at a split point, its offset is defined to be the
-   * largest A such that reading a source with the range [A, Long.MAX_VALUE) includes this record;
-   * offsets of other records are only required to be non-strictly increasing. Offsets of records of
-   * a {@code FileBasedReader} should be set based on this definition.
+   * File-based sources support the notion of <i>split points</i>, as defined in
+   * {@link com.google.cloud.dataflow.sdk.io.range.RangeTracker}, using
+   * {@link FileBasedReader#isAtSplitPoint}.
    *
    * <h2>Reading Records</h2>
    *
@@ -361,12 +337,12 @@ private static Collection<String> expandFilePattern(String fileOrPatternSpec) th
    *
    * <p>Then {@code FileBasedReader} implements "reading a range [A, B)" in the following way.
    * <ol>
-   * <li>{@code start()} opens the file
-   * <li>{@code start()} seeks the {@code SeekableByteChannel} to A (reading offset ranges for
+   * <li>{@link #start} opens the file
+   * <li>{@link #start} seeks the {@code SeekableByteChannel} to A (reading offset ranges for
    * non-seekable files is not supported) and calls {@code startReading()}
-   * <li>the subclass must do whatever is needed to move to the first split point at or after this
-   * position in the channel
-   * <li>{@code start()} calls {@code advance()} once
+   * <li>{@link #start} calls {@link #advance} once, which, via {@link #readNextRecord},
+   * locates the first record which is at a split point AND its offset is at or after A.
+   * If this record is at or after B, {@link #advance} returns false and reading is finished.
    * <li>if the previous advance call returned {@code true} sequential reading starts and
    * {@code advance()} will be called repeatedly
    * </ol>
@@ -381,9 +357,6 @@ private static Collection<String> expandFilePattern(String fileOrPatternSpec) th
   public abstract static class FileBasedReader<T> extends ByteOffsetBasedReader<T> {
     private ReadableByteChannel channel = null;
     private boolean finished = false; // Reader has finished advancing.
-    private boolean endPositionReached = false; // If true, records have been read up to the ending
-                                                // offset but the last split point may not have been
-                                                // reached.
     private boolean startCalled = false;
 
     /**
@@ -430,7 +403,6 @@ public final boolean start() throws IOException {
 
     @Override
     public final boolean advance() throws IOException {
-      FileBasedSource<T> source = getCurrentSource();
       Preconditions.checkState(startCalled, "advance() called before calling start()");
       if (finished) {
         return false;
@@ -441,16 +413,7 @@ public final boolean advance() throws IOException {
         finished = true;
         return false;
       }
-      if (getCurrentOffset() >= source.getEndOffset()) {
-        // Current record is at or after the end position defined by the source. The reader should
-        // continue reading until the next split point is reached.
-        endPositionReached = true;
-      }
-
-      // If the current record is at or after the end position defined by the source and if the
-      // current record is at a split point, then the current record, and any record after that
-      // does not belong to the offset range of the source.
-      if (endPositionReached && isAtSplitPoint()) {
+      if (!rangeTracker.tryReturnRecordAt(isAtSplitPoint(), getCurrentOffset())) {
         finished = true;
         return false;
       }
@@ -488,14 +451,8 @@ public void close() throws IOException {
      * reader. Subclass may use the {@code channel} to build a higher level IO abstraction, e.g., a
      * BufferedReader or an XML parser.
      *
-     * <p>A subclass may additionally use this to adjust the starting position prior to reading
-     * records. For example, the channel of a reader that reads text lines may point to the middle
-     * of a line after the position adjustment done at {@code FileBasedReader}. In this case the
-     * subclass could adjust the position of the channel to the beginning of the next line. If the
-     * corresponding source is for a subrange of a file, {@code channel} is guaranteed to be an
-     * instance of the type {@link SeekableByteChannel} in which case the subclass may traverse back
-     * in the channel to determine if the channel is already at the correct starting position (e.g.,
-     * to check if the previous character was a newline).
+     * <p>If the corresponding source is for a subrange of a file, {@code channel} is guaranteed to
+     * be an instance of the type {@link SeekableByteChannel}.
      *
      * <p>After this method is invoked the base class will not be reading data from the channel or
      * adjusting the position of the channel. But the base class is responsible for properly closing
@@ -510,6 +467,10 @@ public void close() throws IOException {
      * {@link #getCurrent}, {@link #getCurrentOffset}, and {@link #isAtSplitPoint()} should return
      * the corresponding information about the record read by the last invocation of this method.
      *
+     * <p>Note that this method will be called the same way for reading the first record in the
+     * source (file or offset range in the file) and for reading subsequent records. It is up to the
+     * subclass to do anything special for locating and reading the first record, if necessary.
+     *
      * @return {@code true} if a record was successfully read, {@code false} if the end of the
      *         channel was reached before successfully reading a new record.
      */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/OffsetRangeTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/OffsetRangeTracker.java
new file mode 100644
index 0000000000000..f4fb729b75670
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/OffsetRangeTracker.java
@@ -0,0 +1,182 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.io.range;
+
+import com.google.common.annotations.VisibleForTesting;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A {@link RangeTracker} for non-negative positions of type {@code long}.
+ */
+public class OffsetRangeTracker implements RangeTracker<Long> {
+  private static final Logger LOG = LoggerFactory.getLogger(OffsetRangeTracker.class);
+
+  private final long startOffset;
+  private long stopOffset;
+  private long lastRecordStart = -1L;
+  private long offsetOfLastSplitPoint = -1L;
+
+  /**
+   * Offset corresponding to infinity. This can only be used as the upper-bound of a range, and
+   * indicates reading all of the records until the end without specifying exactly what the end is.
+   *
+   * <p> Infinite ranges cannot be split because it is impossible to estimate progress within them.
+   */
+  public static final long OFFSET_INFINITY = Long.MAX_VALUE;
+
+  /**
+   * Creates an {@code OffsetRangeTracker} for the specified range.
+   */
+  public OffsetRangeTracker(long startOffset, long stopOffset) {
+    this.startOffset = startOffset;
+    this.stopOffset = stopOffset;
+  }
+
+  @Override
+  public synchronized Long getStartPosition() {
+    return startOffset;
+  }
+
+  @Override
+  public synchronized Long getStopPosition() {
+    return stopOffset;
+  }
+
+  @Override
+  public boolean tryReturnRecordAt(boolean isAtSplitPoint, Long recordStart) {
+    return tryReturnRecordAt(isAtSplitPoint, recordStart.longValue());
+  }
+
+  public synchronized boolean tryReturnRecordAt(boolean isAtSplitPoint, long recordStart) {
+    if (lastRecordStart == -1 && !isAtSplitPoint) {
+      throw new IllegalStateException(
+          String.format("The first record [starting at %d] must be at a split point", recordStart));
+    }
+    if (recordStart < lastRecordStart) {
+      throw new IllegalStateException(
+          String.format(
+              "Trying to return record [starting at %d] "
+                  + "which is before the last-returned record [starting at %d]",
+              recordStart,
+              lastRecordStart));
+    }
+    if (isAtSplitPoint) {
+      if (offsetOfLastSplitPoint != -1L && recordStart == offsetOfLastSplitPoint) {
+        throw new IllegalStateException(
+            String.format(
+                "Record at a split point has same offset as the previous split point: "
+                    + "previous split point at %d, current record starts at %d",
+                offsetOfLastSplitPoint, recordStart));
+      }
+      if (recordStart >= stopOffset) {
+        return false;
+      }
+      offsetOfLastSplitPoint = recordStart;
+    }
+
+    lastRecordStart = recordStart;
+    return true;
+  }
+
+  @Override
+  public boolean trySplitAtPosition(Long splitOffset) {
+    return trySplitAtPosition(splitOffset.longValue());
+  }
+
+  public synchronized boolean trySplitAtPosition(long splitOffset) {
+    if (stopOffset == OFFSET_INFINITY) {
+      LOG.debug("Refusing to split {} at {}: stop position unspecified", this, splitOffset);
+      return false;
+    }
+    if (lastRecordStart == -1) {
+      LOG.debug("Refusing to split {} at {}: unstarted", this, splitOffset);
+      return false;
+    }
+
+    // Note: technically it is correct to split at any position after the last returned
+    // split point, not just the last returned record.
+    // TODO: Investigate whether in practice this is useful or, rather, confusing.
+    if (splitOffset <= lastRecordStart) {
+      LOG.debug(
+          "Refusing to split {} at {}: already past proposed split position", this, splitOffset);
+      return false;
+    }
+    if (splitOffset < startOffset || splitOffset >= stopOffset) {
+      LOG.debug(
+          "Refusing to split {} at {}: proposed split position out of range", this, splitOffset);
+      return false;
+    }
+    LOG.debug("Agreeing to split {} at {}", this, splitOffset);
+    this.stopOffset = splitOffset;
+    return true;
+  }
+
+  /**
+   * Returns a position {@code P} such that the range {@code [start, P)} represents approximately
+   * the given fraction of the range {@code [start, end)}. Assumes that the density of records
+   * in the range is approximately uniform.
+   */
+  public synchronized long getPositionForFractionConsumed(double fraction) {
+    if (stopOffset == OFFSET_INFINITY) {
+      throw new IllegalArgumentException(
+          "getPositionForFractionConsumed is not applicable to an unbounded range: " + this);
+    }
+    return (long) Math.ceil(startOffset + fraction * (stopOffset - startOffset));
+  }
+
+  @Override
+  public synchronized double getFractionConsumed() {
+    if (stopOffset == OFFSET_INFINITY) {
+      return 0.0;
+    }
+    if (lastRecordStart == -1) {
+      return 0.0;
+    }
+    // E.g., when reading [3, 6) and lastRecordStart is 4, that means we consumed 3,4 of 3,4,5
+    // which is (4 - 3 + 1) / (6 - 3) = 67%.
+    // Also, clamp to at most 1.0 because the last consumed position can extend past the
+    // stop position.
+    return Math.min(1.0, 1.0 * (lastRecordStart - startOffset + 1) / (stopOffset - startOffset));
+  }
+
+  @Override
+  public synchronized String toString() {
+    String stopString = (stopOffset == OFFSET_INFINITY) ? "infinity" : String.valueOf(stopOffset);
+    if (lastRecordStart >= 0) {
+      return String.format(
+          "<at [starting at %d] of offset range [%d, %s)>",
+          lastRecordStart,
+          startOffset,
+          stopString);
+    } else {
+      return String.format("<unstarted in offset range [%d, %s)>", startOffset, stopString);
+    }
+  }
+
+  /**
+   * Returns a copy of this tracker for testing purposes (to simplify testing methods with
+   * side effects).
+   */
+  @VisibleForTesting
+  OffsetRangeTracker copy() {
+    OffsetRangeTracker res = new OffsetRangeTracker(startOffset, stopOffset);
+    res.lastRecordStart = this.lastRecordStart;
+    return res;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/RangeTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/RangeTracker.java
new file mode 100644
index 0000000000000..84359f1aa1dce
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/RangeTracker.java
@@ -0,0 +1,220 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.io.range;
+
+/**
+ * A {@code RangeTracker} is a thread-safe helper object for implementing dynamic work rebalancing
+ * in position-based {@link com.google.cloud.dataflow.sdk.io.BoundedSource.BoundedReader}
+ * subclasses.
+ *
+ * <h3>Usage of the RangeTracker class hierarchy</h3>
+ * The abstract {@code RangeTracker} interface should not be used per se - all users should use its
+ * subclasses directly. We declare it here because all subclasses have roughly the same interface
+ * and the same properties, to centralize the documentation. Currently we provide one
+ * implementation - {@link OffsetRangeTracker}.
+ *
+ * <h3>Position-based sources</h3>
+ * A position-based source is one where the source can be described by a range of positions of
+ * an ordered type and the records returned by the reader can be described by positions of the
+ * same type.
+ *
+ * <p>In case a record occupies a range of positions in the source, the most important thing about
+ * the record is the position where it starts.
+ *
+ * <p>Defining the semantics of positions for a source is entirely up to the source class, however
+ * the chosen definitions have to obey certain properties in order to make it possible to correctly
+ * split the source into parts, including dynamic splitting. Two main aspects need to be defined:
+ * <ul>
+ *   <li>How to assign starting positions to records.
+ *   <li>Which records should be read by a source with a range {@code [A, B)}.
+ * </ul>
+ * Moreover, reading a range must be <i>efficient</i>, i.e., the performance of reading a range
+ * should not significantly depend on the location of the range. For example, reading the range
+ * {@code [A, B)} should not require reading all data before {@code A}.
+ *
+ * <p>The sections below explain exactly what properties these definitions must satisfy, and
+ * how to use a {@code RangeTracker} with a properly defined source.
+ *
+ * <h3>Properties of position-based sources</h3>
+ * The main requirement for position-based sources is <i>associativity</i>: reading records from
+ * {@code [A, B)} and records from {@code [B, C)} should give the same records as reading from
+ * {@code [A, C)}, where {@code A <= B <= C}. This property ensures that no matter how a range
+ * of positions is split into arbitrarily many sub-ranges, the total set of records described by
+ * them stays the same.
+ *
+ * <p>The other important property is how the source's range relates to positions of records in
+ * the source. In many sources each record can be identified by a unique starting position.
+ * In this case:
+ * <ul>
+ *   <li>All records returned by a source {@code [A, B)} must have starting positions
+ *   in this range.
+ *   <li>All but the last record should end within this range. The last record may or may not
+ *   extend past the end of the range.
+ *   <li>Records should not overlap.
+ * </ul>
+ * Such sources should define "read {@code [A, B)}" as "read from the first record starting at or
+ * after A, up to but not including the first record starting at or after B".
+ *
+ * <p>Some examples of such sources include reading lines or CSV from a text file, reading keys and
+ * values from a BigTable, etc.
+ *
+ * <p>The concept of <i>split points</i> allows to extend the definitions for dealing with sources
+ * where some records cannot be identified by a unique starting position.
+ *
+ * <p>In all cases, all records returned by a source {@code [A, B)} must <i>start</i> at or after
+ * {@code A}.
+ *
+ * <h3>Split points</h3>
+ *
+ * <p>Some sources may have records that are not directly addressable. For example, imagine a file
+ * format consisting of a sequence of compressed blocks. Each block can be assigned an offset, but
+ * records within the block cannot be directly addressed without decompressing the block. Let us
+ * refer to this hypothetical format as <i>CBF (Compressed Blocks Format)</i>.
+ *
+ * <p>Many such formats can still satisfy the associativity property. For example, in CBF, reading
+ * {@code [A, B)} can mean "read all the records in all blocks whose starting offset is in
+ * {@code [A, B)}".
+ *
+ * <p>To support such complex formats, we introduce the notion of <i>split points</i>. We say that
+ * a record is a split point if there exists a position {@code A} such that the record is the first
+ * one to be returned when reading the range  {@code [A, infinity)}. In CBF, the only split points
+ * would be the first records in each block.
+ *
+ * <p>Split points allow us to define the meaning of a record's position and a source's range
+ * in all cases:
+ * <ul>
+ *   <li>For a record that is at a split point, its position is defined to be the largest
+ *   {@code A} such that reading a source with the range {@code [A, infinity)} returns this record;
+ *   <li>Positions of other records are only required to be non-decreasing;
+ *   <li>Reading the source {@code [A, B)} must return records starting from the first split point
+ *   at or after {@code A}, up to but not including the first split point at or after {@code B}.
+ *   In particular, this means that the first record returned by a source MUST always be
+ *   a split point.
+ *   <li>Positions of split points must be unique.
+ * </ul>
+ * As a result, for any decomposition of the full range of the source into position ranges, the
+ * total set of records will be the full set of records in the source, and each record
+ * will be read exactly once.
+ *
+ * <h3>Consumed positions</h3>
+ * As the source is being read, and records read from it are being passed to the downstream
+ * transforms in the pipeline, we say that positions in the source are being <i>consumed</i>.
+ * When a reader has read a record (or promised to a caller that a record will be returned),
+ * positions up to and including the record's start position are considered <i>consumed</i>.
+ *
+ * <p>Dynamic splitting can happen only at <i>unconsumed</i> positions. If the reader just
+ * returned a record at offset 42 in a file, dynamic splitting can happen only at offset 43 or
+ * beyond, as otherwise that record could be read twice (by the current reader and by a reader
+ * of the task starting at 43).
+ *
+ * <h3>Example</h3>
+ * The following example uses an {@link OffsetRangeTracker} to support dynamically splitting
+ * a source with integer positions (offsets).
+ * <pre> {@code
+ *   class MyReader implements BoundedReader<Foo> {
+ *     private MySource currentSource;
+ *     private final OffsetRangeTracker tracker = new OffsetRangeTracker();
+ *     ...
+ *     MyReader(MySource source) {
+ *       this.currentSource = source;
+ *       this.tracker = new MyRangeTracker<>(source.getStartOffset(), source.getEndOffset())
+ *     }
+ *     ...
+ *     boolean start() {
+ *       ... (general logic for locating the first record) ...
+ *       if (!tracker.tryReturnRecordAt(true, recordStartOffset)) return false;
+ *       ... (any logic that depends on the record being returned, e.g. counting returned records)
+ *       return true;
+ *     }
+ *     boolean advance() {
+ *       ... (general logic for locating the next record) ...
+ *       if (!tracker.tryReturnRecordAt(isAtSplitPoint, recordStartOffset)) return false;
+ *       ... (any logic that depends on the record being returned, e.g. counting returned records)
+ *       return true;
+ *     }
+ *
+ *     double getFractionConsumed() {
+ *       return tracker.getFractionConsumed();
+ *     }
+ *   }
+ * } </pre>
+ *
+ * <h3>Usage with different models of iteration</h3>
+ * When using this class to protect a
+ * {@link com.google.cloud.dataflow.sdk.io.BoundedSource.BoundedReader}, follow the pattern
+ * described above.
+ *
+ * <p>When using this class to protect iteration in the {@code hasNext()/next()}
+ * model, consider the record consumed when {@code hasNext()} is about to return true, rather than
+ * when {@code next()} is called, because {@code hasNext()} returning true is promising the caller
+ * that {@code next()} will have an element to return - so {@link #trySplitAtPosition} must not
+ * split the range in a way that would make the record promised by {@code hasNext()} belong to
+ * a different range.
+ *
+ * <p>Also note that implementations of {@code hasNext()} need to ensure
+ * that they call {@link #tryReturnRecordAt} only once even if {@code hasNext()} is called
+ * repeatedly, due to the requirement on uniqueness of split point positions.
+ *
+ * @param <PositionT> Type of positions used by the source to define ranges and identify records.
+ */
+public interface RangeTracker<PositionT> {
+  /**
+   * Returns the starting position of the current range, inclusive.
+   */
+  PositionT getStartPosition();
+
+  /**
+   * Returns the ending position of the current range, exclusive.
+   */
+  PositionT getStopPosition();
+
+  /**
+   * Atomically determines whether a record at the given position can be returned and updates
+   * internal state. In particular:
+   * <ul>
+   *   <li>If {@code isAtSplitPoint} is {@code true}, and {@code recordStart} is outside the current
+   *   range, returns {@code false};
+   *   <li>Otherwise, updates the last-consumed position to {@code recordStart} and returns
+   *   {@code true}.
+   * </ul>
+   * <p>This method MUST be called on all split point records. It may be called on every record.
+   */
+  boolean tryReturnRecordAt(boolean isAtSplitPoint, PositionT recordStart);
+
+  /**
+   * Atomically splits the current range [{@link #getStartPosition}, {@link #getStopPosition})
+   * into a "primary" part [{@link #getStartPosition}, {@code splitPosition})
+   * and a "residual" part [{@code splitPosition}, {@link #getStopPosition}), assuming the current
+   * last-consumed position is within [{@link #getStartPosition}, splitPosition)
+   * (i.e., {@code splitPosition} has not been consumed yet).
+   *
+   * <p>Updates the current range to be the primary and returns {@code true}. This means that
+   * all further calls on the current object will interpret their arguments relative to the
+   * primary range.
+   *
+   * <p>If the split position has already been consumed, or if no {@link #tryReturnRecordAt} call
+   * was made yet, returns {@code false}. The second condition is to prevent dynamic splitting
+   * during reader start-up.
+   */
+  boolean trySplitAtPosition(PositionT splitPosition);
+
+  /**
+   * Returns the approximate fraction of positions in the source that have been consumed by
+   * successful {@link #tryReturnRecordAt} calls, or 0.0 if no such calls have happened.
+   */
+  double getFractionConsumed();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index 3c6d51699065f..8d3530695c9c0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -103,6 +103,10 @@ public BoundedSourceSplit(BoundedSource<T> primary, BoundedSource<T> residual) {
       this.primary = primary;
       this.residual = residual;
     }
+
+    public String toString() {
+      return String.format("<primary: %s; residual: %s>", primary, residual);
+    }
   }
 
   public static DynamicSourceSplit toSourceSplit(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
index ce44bc7bb4500..736cec71d54ab 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.apache.avro.Schema;
@@ -27,7 +28,6 @@
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
-import java.util.NoSuchElementException;
 
 import javax.annotation.Nullable;
 
@@ -55,7 +55,7 @@ public ReaderIterator<T> iterator() throws IOException {
     return new AvroByteFileIterator();
   }
 
-  class AvroByteFileIterator extends AbstractReaderIterator<T> {
+  class AvroByteFileIterator extends AbstractBoundedReaderIterator<T> {
     private final ReaderIterator<WindowedValue<ByteBuffer>> avroFileIterator;
 
     public AvroByteFileIterator() throws IOException {
@@ -63,15 +63,12 @@ public AvroByteFileIterator() throws IOException {
     }
 
     @Override
-    public boolean hasNext() throws IOException {
+    protected boolean hasNextImpl() throws IOException {
       return avroFileIterator.hasNext();
     }
 
     @Override
-    public T next() throws IOException {
-      if (!hasNext()) {
-        throw new NoSuchElementException();
-      }
+    protected T nextImpl() throws IOException {
       ByteBuffer inBuffer = avroFileIterator.next().getValue();
       byte[] encodedElem = new byte[inBuffer.remaining()];
       inBuffer.get(encodedElem);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
index 400fc8fa817d8..eacfb02f38ff6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
@@ -24,13 +24,12 @@
 import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
-import org.apache.avro.Schema;
 import org.apache.avro.file.DataFileReader;
 import org.apache.avro.file.SeekableInput;
 import org.apache.avro.io.DatumReader;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -41,7 +40,6 @@
 import java.nio.channels.SeekableByteChannel;
 import java.util.Collection;
 import java.util.Iterator;
-import java.util.NoSuchElementException;
 
 import javax.annotation.Nullable;
 
@@ -59,8 +57,6 @@ public class AvroReader<T> extends Reader<WindowedValue<T>> {
   @Nullable
   final Long endPosition;
   final AvroCoder<T> avroCoder;
-  @SuppressWarnings("unchecked")
-  private final Schema schema;
 
   public AvroReader(String filename, @Nullable Long startPosition, @Nullable Long endPosition,
       WindowedValue.ValueOnlyWindowedValueCoder<T> coder) {
@@ -73,7 +69,6 @@ public AvroReader(String filename, @Nullable Long startPosition, @Nullable Long
     this.startPosition = startPosition;
     this.endPosition = endPosition;
     this.avroCoder = (AvroCoder<T>) coder.getValueCoder();
-    this.schema = this.avroCoder.getSchema();
   }
 
   public ReaderIterator<WindowedValue<T>> iterator(DatumReader<T> datumReader) throws IOException {
@@ -120,7 +115,7 @@ protected ReaderIterator<WindowedValue<T>> open(String input) throws IOException
     }
   }
 
-  class AvroFileIterator extends AbstractReaderIterator<WindowedValue<T>> {
+  class AvroFileIterator extends AbstractBoundedReaderIterator<WindowedValue<T>> {
     final DataFileReader<T> fileReader;
     final Long endOffset;
 
@@ -141,15 +136,12 @@ public AvroFileIterator(DatumReader<T> datumReader, String filename, ReadableByt
     }
 
     @Override
-    public boolean hasNext() throws IOException {
+    protected boolean hasNextImpl() throws IOException {
       return fileReader.hasNext() && (endOffset == null || !fileReader.pastSync(endOffset));
     }
 
     @Override
-    public WindowedValue<T> next() throws IOException {
-      if (!hasNext()) {
-        throw new NoSuchElementException();
-      }
+    protected WindowedValue<T> nextImpl() throws IOException {
       T next = fileReader.next();
       // DataFileReader doesn't seem to support getting the current position.
       // Calls to tell() return how much has been read from the underlying Channel, which is a bad
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
index d3e31ef5e2293..ad37d1c6e1840 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
@@ -23,10 +23,10 @@
 import com.google.cloud.dataflow.sdk.util.BigQueryTableRowIterator;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import java.io.IOException;
-import java.util.NoSuchElementException;
 
 /**
  * A source that reads a BigQuery table and yields TableRow objects.
@@ -68,7 +68,7 @@ public ReaderIterator<WindowedValue<TableRow>> iterator() throws IOException {
   /**
    * A ReaderIterator that yields TableRow objects for each row of a BigQuery table.
    */
-  class BigQueryReaderIterator extends AbstractReaderIterator<WindowedValue<TableRow>> {
+  class BigQueryReaderIterator extends AbstractBoundedReaderIterator<WindowedValue<TableRow>> {
     private BigQueryTableRowIterator rowIterator;
 
     public BigQueryReaderIterator(Bigquery bigQueryClient, TableReference tableRef) {
@@ -76,15 +76,12 @@ public BigQueryReaderIterator(Bigquery bigQueryClient, TableReference tableRef)
     }
 
     @Override
-    public boolean hasNext() {
+    protected boolean hasNextImpl() {
       return rowIterator.hasNext();
     }
 
     @Override
-    public WindowedValue<TableRow> next() throws IOException {
-      if (!hasNext()) {
-        throw new NoSuchElementException();
-      }
+    protected WindowedValue<TableRow> nextImpl() throws IOException {
       return WindowedValue.valueInGlobalWindow(rowIterator.next());
     }
 
@@ -92,7 +89,7 @@ public WindowedValue<TableRow> next() throws IOException {
     public Progress getProgress() {
       // For now reporting progress is not supported because this source is used only when
       // an entire table needs to be read by each worker (used as a side input for instance).
-      throw new UnsupportedOperationException();
+      return null;
     }
 
     @Override
@@ -100,7 +97,7 @@ public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest)
       // For now dynamic splitting is not supported because this source
       // is used only when an entire table needs to be read by each worker (used
       // as a side input for instance).
-      throw new UnsupportedOperationException();
+      return null;
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
index 13970ebd1f4f0..62bff895bed81 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
@@ -25,8 +25,12 @@
 
 import java.util.Arrays;
 
+import javax.annotation.Nullable;
+
 /**
- * Represents a ShufflePosition as an array of bytes.
+ * Represents a position of a {@link GroupingShuffleReader} as an opaque array of bytes,
+ * encoded in a way such that lexicographic ordering of the bytes is consistent with the inherent
+ * ordering of {@link GroupingShuffleReader} positions.
  */
 public class ByteArrayShufflePosition implements Comparable, ShufflePosition {
   private final byte[] position;
@@ -35,18 +39,18 @@ public ByteArrayShufflePosition(byte[] position) {
     this.position = position;
   }
 
-  public static ByteArrayShufflePosition fromBase64(String position) {
+  public static ByteArrayShufflePosition fromBase64(@Nullable String position) {
     return ByteArrayShufflePosition.of(decodeBase64(position));
   }
 
-  public static ByteArrayShufflePosition of(byte[] position) {
+  public static ByteArrayShufflePosition of(@Nullable byte[] position) {
     if (position == null) {
       return null;
     }
     return new ByteArrayShufflePosition(position);
   }
 
-  public static byte[] getPosition(ShufflePosition shufflePosition) {
+  public static byte[] getPosition(@Nullable ShufflePosition shufflePosition) {
     if (shufflePosition == null) {
       return null;
     }
@@ -83,7 +87,7 @@ public int hashCode() {
 
   @Override
   public String toString() {
-    return "ShufflePosition(" + (new String(position)) + ")";
+    return "ShufflePosition(base64:" + encodeBase64() + ")";
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
index 8321dc312033b..35fa251be1b6b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
@@ -20,6 +20,7 @@
 import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import java.io.IOException;
@@ -67,7 +68,7 @@ public ReaderIterator<T> iterator() throws IOException {
     return new ConcatIterator<T>(options, executionContext, sources);
   }
 
-  private static class ConcatIterator<T> extends AbstractReaderIterator<T> {
+  private static class ConcatIterator<T> extends AbstractBoundedReaderIterator<T> {
     private int currentIteratorIndex = -1;
     private ReaderIterator<T> currentIterator = null;
     private final List<Source> sources;
@@ -82,7 +83,7 @@ public ConcatIterator(
     }
 
     @Override
-    public boolean hasNext() throws IOException {
+    protected boolean hasNextImpl() throws IOException {
       for (;;) {
         if (currentIterator != null && currentIterator.hasNext()) {
           return true;
@@ -111,12 +112,8 @@ public boolean hasNext() throws IOException {
     }
 
     @Override
-    public T next() throws IOException, NoSuchElementException {
-      if (!hasNext()) {
-        throw new NoSuchElementException();
-      } else {
-        return currentIterator.next();
-      }
+    protected T nextImpl() throws IOException, NoSuchElementException {
+      return currentIterator.next();
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
index 96af0b2e37a76..c318c346c3082 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
@@ -24,9 +24,11 @@
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.io.range.OffsetRangeTracker;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.ProgressTracker;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
@@ -41,7 +43,6 @@
 import java.io.PushbackInputStream;
 import java.nio.channels.Channels;
 import java.util.Collection;
-import java.util.NoSuchElementException;
 
 import javax.annotation.Nullable;
 
@@ -120,20 +121,23 @@ public ReaderIterator<T> iterator() throws IOException {
   /**
    * Abstract base class for file-based source iterators.
    */
-  protected abstract class FileBasedIterator extends AbstractReaderIterator<T> {
+  protected abstract class FileBasedIterator extends AbstractBoundedReaderIterator<T> {
     protected final CopyableSeekableByteChannel seeker;
     protected final PushbackInputStream stream;
-    protected final long startOffset;
-    protected Long endOffset;
-    protected final ProgressTracker<Integer> tracker;
-    protected ByteArrayOutputStream nextElement;
-    protected boolean nextElementComputed = false;
+    protected final OffsetRangeTracker rangeTracker;
     protected long offset;
+    protected final ProgressTracker<Integer> progressTracker;
+    protected ByteArrayOutputStream nextElement;
     protected DecompressingStreamFactory compressionStreamFactory;
 
-    FileBasedIterator(CopyableSeekableByteChannel seeker, long startOffset, long offset,
-        @Nullable Long endOffset, ProgressTracker<Integer> tracker,
-        DecompressingStreamFactory compressionStreamFactory) throws IOException {
+    FileBasedIterator(
+        CopyableSeekableByteChannel seeker,
+        long startOffset,
+        long offset,
+        @Nullable Long endOffset,
+        ProgressTracker<Integer> progressTracker,
+        DecompressingStreamFactory compressionStreamFactory)
+        throws IOException {
       this.seeker = checkNotNull(seeker);
       this.seeker.position(startOffset);
       this.compressionStreamFactory = compressionStreamFactory;
@@ -144,10 +148,10 @@ protected abstract class FileBasedIterator extends AbstractReaderIterator<T> {
               ? new BufferedInputStream(inputStream)
               : new BufferedInputStream(inputStream, BUF_SIZE);
       this.stream = new PushbackInputStream(bufferedStream, BUF_SIZE);
-      this.startOffset = startOffset;
+      long stopOffset = (endOffset == null) ? OffsetRangeTracker.OFFSET_INFINITY : endOffset;
+      this.rangeTracker = new OffsetRangeTracker(startOffset, stopOffset);
       this.offset = offset;
-      this.endOffset = endOffset;
-      this.tracker = checkNotNull(tracker);
+      this.progressTracker = checkNotNull(progressTracker);
     }
 
     /**
@@ -161,25 +165,23 @@ protected abstract class FileBasedIterator extends AbstractReaderIterator<T> {
     protected abstract ByteArrayOutputStream readElement() throws IOException;
 
     @Override
-    public boolean hasNext() throws IOException {
-      computeNextElement();
+    protected boolean hasNextImpl() throws IOException {
+      long startOffset = offset;
+      ByteArrayOutputStream element = readElement(); // As a side effect, updates "offset"
+      if (element != null && rangeTracker.tryReturnRecordAt(true, startOffset)) {
+        nextElement = element;
+        progressTracker.saw((int) (offset - startOffset));
+      } else {
+        nextElement = null;
+      }
       return nextElement != null;
     }
 
     @Override
-    public T next() throws IOException {
-      advance();
+    protected T nextImpl() throws IOException {
       return CoderUtils.decodeFromByteArray(coder, nextElement.toByteArray());
     }
 
-    void advance() throws IOException {
-      computeNextElement();
-      if (nextElement == null) {
-        throw new NoSuchElementException();
-      }
-      nextElementComputed = false;
-    }
-
     @Override
     public Progress getProgress() {
       // Currently we assume that only a offset position and fraction are reported as
@@ -192,15 +194,9 @@ public Progress getProgress() {
       ApproximateProgress progress = new ApproximateProgress();
       progress.setPosition(currentPosition);
 
-      // If endOffset is null, we don't know the fraction consumed.
-      if (endOffset != null) {
-        // offset, in principle, can go beyond endOffset, e.g.:
-        // - We just read the last record and offset points to its end, which is after endOffset
-        // - This is some block-based file format where not every record is a "split point" and some
-        //   records can *start* after endOffset (though the first record of the next shard would
-        //   start still later).
-        progress.setPercentComplete(
-            Math.min(1.0f, 1.0f * (offset - startOffset) / (endOffset - startOffset)));
+      // If endOffset is unspecified, we don't know the fraction consumed.
+      if (rangeTracker.getStopPosition() != Long.MAX_VALUE) {
+        progress.setPercentComplete((float) rangeTracker.getFractionConsumed());
       }
 
       return cloudProgressToReaderProgress(progress);
@@ -224,50 +220,25 @@ public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest)
             splitPosition);
         return null;
       }
-      if (splitOffset <= offset) {
-        LOG.info("Already progressed to offset {}, which is after the requested split offset {}",
-            offset, splitOffset);
-        return null;
-      }
-
-      if (endOffset != null && splitOffset >= endOffset) {
-        LOG.info(
-            "Split requested at an offset beyond the end of the current range: {} >= {}",
-            splitOffset, endOffset);
+      if (rangeTracker.trySplitAtPosition(splitOffset)) {
+        return new DynamicSplitResultWithPosition(cloudPositionToReaderPosition(splitPosition));
+      } else {
         return null;
       }
-
-      this.endOffset = splitOffset;
-      LOG.info("Split FileBasedReader at offset {}", splitOffset);
-
-      return new DynamicSplitResultWithPosition(cloudPositionToReaderPosition(splitPosition));
     }
 
     /**
-     * Returns the end offset of the iterator.
+     * Returns the end offset of the iterator or Long.MAX_VALUE if unspecified.
      * The method is called for test ONLY.
      */
-    Long getEndOffset() {
-      return this.endOffset;
+    long getEndOffset() {
+      return rangeTracker.getStopPosition();
     }
 
     @Override
     public void close() throws IOException {
       stream.close();
     }
-
-    private void computeNextElement() throws IOException {
-      if (nextElementComputed) {
-        return;
-      }
-
-      if (endOffset == null || offset < endOffset) {
-        nextElement = readElement();
-      } else {
-        nextElement = null;
-      }
-      nextElementComputed = true;
-    }
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTracker.java
new file mode 100644
index 0000000000000..2db9abcd28ce6
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTracker.java
@@ -0,0 +1,167 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.io.range.RangeTracker;
+import com.google.common.annotations.VisibleForTesting;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.annotation.Nullable;
+
+/**
+ * A {@link RangeTracker} for positions used by {@code GroupingShuffleReader}
+ * ({@code ByteArrayShufflePosition}).
+ *
+ * <p> These positions roughly correspond to hashes of keys. In case of hash collisions,
+ * multiple groups can have the same position. In that case, the first group at a particular
+ * position is considered a split point (because it is the first to be returned when reading
+ * a position range starting at this position), others are not.
+ */
+public class GroupingShuffleRangeTracker implements RangeTracker<ByteArrayShufflePosition> {
+  private static final Logger LOG = LoggerFactory.getLogger(GroupingShuffleRangeTracker.class);
+
+  // null means "no limit": read from the beginning of the data.
+  @Nullable private final ByteArrayShufflePosition startPosition;
+
+  // null means "no limit": read until the end of the data.
+  @Nullable private ByteArrayShufflePosition stopPosition;
+
+  private ByteArrayShufflePosition lastGroupStart = null;
+  private boolean lastGroupWasAtSplitPoint = false;
+
+  public GroupingShuffleRangeTracker(
+      @Nullable ByteArrayShufflePosition startPosition,
+      @Nullable ByteArrayShufflePosition stopPosition) {
+    this.startPosition = startPosition;
+    this.stopPosition = stopPosition;
+  }
+
+  @Override
+  public ByteArrayShufflePosition getStartPosition() {
+    return startPosition;
+  }
+
+  @Override
+  public synchronized ByteArrayShufflePosition getStopPosition() {
+    return stopPosition;
+  }
+
+  public synchronized ByteArrayShufflePosition getLastGroupStart() {
+    return lastGroupStart;
+  }
+
+  @Override
+  public synchronized boolean tryReturnRecordAt(
+      boolean isAtSplitPoint, ByteArrayShufflePosition groupStart) {
+    if (lastGroupStart == null && !isAtSplitPoint) {
+      throw new IllegalStateException(
+          String.format("The first group [at %s] must be at a split point",
+              groupStart.encodeBase64()));
+    }
+    if (this.startPosition != null && groupStart.compareTo(this.startPosition) < 0) {
+      throw new IllegalStateException(
+          String.format(
+              "Trying to return record at %s which is before the starting position at %s",
+              groupStart,
+              this.startPosition));
+    }
+    int comparedToLast = (lastGroupStart == null) ? 1 : groupStart.compareTo(this.lastGroupStart);
+    if (comparedToLast < 0) {
+      throw new IllegalStateException(
+          String.format(
+              "Trying to return group at %s which is before the last-returned group at %s",
+              groupStart,
+              this.lastGroupStart));
+    }
+    if (isAtSplitPoint) {
+      if (comparedToLast == 0) {
+        throw new IllegalStateException(
+            String.format(
+                "Trying to return a group at a split point with same position as the "
+                    + "previous group: both at %s, last group was %s",
+                groupStart,
+                lastGroupWasAtSplitPoint ? "at a split point." : "not at a split point."));
+      }
+      if (stopPosition != null && groupStart.compareTo(stopPosition) >= 0) {
+        return false;
+      }
+    } else if (comparedToLast != 0) {
+      // This case is not a violation of general RangeTracker semantics, but it is
+      // contrary to how GroupingShuffleReader in particular works. Hitting it would
+      // mean it's behaving unexpectedly.
+      throw new IllegalStateException(
+          String.format(
+              "Trying to return a group not at a split point, but with a different position "
+              + "than the previous group: last group was %s at %s, current at %s",
+              lastGroupWasAtSplitPoint ? "a split point" : "a non-split point",
+              lastGroupStart, groupStart));
+    }
+    this.lastGroupStart = groupStart;
+    this.lastGroupWasAtSplitPoint = isAtSplitPoint;
+    return true;
+  }
+
+  public synchronized boolean trySplitAtPosition(ByteArrayShufflePosition splitPosition) {
+    if (lastGroupStart == null) {
+      LOG.debug("Refusing to split {} at {}: unstarted", this, splitPosition);
+      return false;
+    }
+    if (splitPosition.compareTo(lastGroupStart) <= 0) {
+      LOG.debug(
+          "Refusing to split {} at {}: already past proposed split position", this, splitPosition);
+      return false;
+    }
+    if ((stopPosition != null && splitPosition.compareTo(stopPosition) >= 0)
+        || (startPosition != null && splitPosition.compareTo(startPosition) <= 0)) {
+      LOG.error(
+          "Refusing to split {} at {}: proposed split position out of range", this, splitPosition);
+      return false;
+    }
+    LOG.debug("Agreeing to split {} at {}", this, splitPosition);
+    this.stopPosition = splitPosition;
+    return true;
+  }
+
+  @Override
+  public synchronized double getFractionConsumed() {
+    // GroupingShuffle sources have special support on the service and the service
+    // will estimate progress from positions for us.
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public synchronized String toString() {
+    if (lastGroupStart != null) {
+      return String.format(
+          "<at position %s of shuffle range [%s, %s)>",
+          lastGroupStart, startPosition, stopPosition);
+    } else {
+      return String.format("<unstarted in shuffle range [%s, %s)>", startPosition, stopPosition);
+    }
+  }
+
+  @VisibleForTesting
+  GroupingShuffleRangeTracker copy() {
+    GroupingShuffleRangeTracker res =
+        new GroupingShuffleRangeTracker(this.startPosition, this.stopPosition);
+    res.lastGroupStart = this.lastGroupStart;
+    res.lastGroupWasAtSplitPoint = lastGroupWasAtSplitPoint;
+    return res;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index 002b497c91063..394c95f5a53e6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -34,6 +34,7 @@
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.Reiterable;
 import com.google.cloud.dataflow.sdk.util.common.Reiterator;
+import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.GroupingShuffleEntryIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.KeyGroupedShuffleEntries;
@@ -48,7 +49,8 @@
 
 import java.io.IOException;
 import java.util.Iterator;
-import java.util.NoSuchElementException;
+
+import javax.annotation.Nullable;
 
 /**
  * A source that reads from a shuffled dataset and yields key-grouped data.
@@ -60,16 +62,20 @@ public class GroupingShuffleReader<K, V> extends Reader<WindowedValue<KV<K, Reit
   private static final Logger LOG = LoggerFactory.getLogger(GroupingShuffleReader.class);
 
   final byte[] shuffleReaderConfig;
-  final String startShufflePosition;
-  final String stopShufflePosition;
+  @Nullable final String startShufflePosition;
+  @Nullable final String stopShufflePosition;
   final BatchModeExecutionContext executionContext;
 
   Coder<K> keyCoder;
   Coder<V> valueCoder;
 
-  public GroupingShuffleReader(PipelineOptions options, byte[] shuffleReaderConfig,
-      String startShufflePosition, String stopShufflePosition,
-      Coder<WindowedValue<KV<K, Iterable<V>>>> coder, BatchModeExecutionContext executionContext)
+  public GroupingShuffleReader(
+      PipelineOptions options,
+      byte[] shuffleReaderConfig,
+      @Nullable String startShufflePosition,
+      @Nullable String stopShufflePosition,
+      Coder<WindowedValue<KV<K, Iterable<V>>>> coder,
+      BatchModeExecutionContext executionContext)
       throws Exception {
     this.shuffleReaderConfig = shuffleReaderConfig;
     this.startShufflePosition = startShufflePosition;
@@ -129,35 +135,18 @@ final ReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> iterator(ShuffleEntryR
    * penalty.
    */
   private final class GroupingShuffleReaderIterator
-      extends AbstractReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> {
+      extends AbstractBoundedReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> {
     // N.B. This class is *not* static; it uses the keyCoder, valueCoder, and
     // executionContext from its enclosing GroupingShuffleReader.
 
     /** The iterator over shuffle entries, grouped by common key. */
     private final Iterator<KeyGroupedShuffleEntries> groups;
 
-    /** The stop position. No records with a position at or after
-     * @stopPosition will be returned.  Initialized
-     * to @AbstractShuffleReader.stopShufflePosition but can be
-     * dynamically updated via @updateStopPosition() (note that such
-     * updates can only decrease @stopPosition).
-     *
-     * <p> The granularity of the stop position is such that it can
-     * only refer to records at the boundary of a key.
-     */
-    private ByteArrayShufflePosition stopPosition = null;
-
-    /**
-     * Position that this @GroupingShuffleReaderIterator is guaranteed
-     * not to stop before reaching (inclusive); @promisedPosition can
-     * only increase monotonically and is updated when advancing to a
-     * new group of records (either in the most recent call to next()
-     * or when peeked at in hasNext()).
-     */
-    private ByteArrayShufflePosition promisedPosition = null;
+    private final GroupingShuffleRangeTracker rangeTracker;
+    private ByteArrayShufflePosition lastGroupStart;
 
     /** The next group to be consumed, if available. */
-    private KeyGroupedShuffleEntries nextGroup = null;
+    private KeyGroupedShuffleEntries currentGroup = null;
 
     protected StateSampler stateSampler = null;
     protected int readState;
@@ -173,53 +162,46 @@ public GroupingShuffleReaderIterator(ShuffleEntryReader reader) {
         this.readState = stateSampler.stateForName(
             GroupingShuffleReader.this.stateSamplerOperationName + "-process");
       }
-      promisedPosition = ByteArrayShufflePosition.fromBase64(startShufflePosition);
-      if (promisedPosition == null) {
-        promisedPosition = new ByteArrayShufflePosition(new byte[0]);
-      }
-      stopPosition = ByteArrayShufflePosition.fromBase64(stopShufflePosition);
-      try (StateSampler.ScopedState read = stateSampler.scopedState(readState)) {
-        this.groups = new GroupingShuffleEntryIterator(
-            reader.read(promisedPosition, stopPosition)) {
-          @Override
-          protected void notifyElementRead(long byteSize) {
-            GroupingShuffleReader.this.notifyElementRead(byteSize);
-          }
-        };
-      }
-    }
 
-    private void advanceIfNecessary() {
+      this.rangeTracker =
+          new GroupingShuffleRangeTracker(
+              ByteArrayShufflePosition.fromBase64(startShufflePosition),
+              ByteArrayShufflePosition.fromBase64(stopShufflePosition));
       try (StateSampler.ScopedState read = stateSampler.scopedState(readState)) {
-        if (nextGroup == null && groups.hasNext()) {
-          nextGroup = groups.next();
-          promisedPosition = ByteArrayShufflePosition.of(nextGroup.position);
-        }
+        this.groups =
+            new GroupingShuffleEntryIterator(
+                reader.read(rangeTracker.getStartPosition(), rangeTracker.getStopPosition())) {
+              @Override
+              protected void notifyElementRead(long byteSize) {
+                GroupingShuffleReader.this.notifyElementRead(byteSize);
+              }
+            };
       }
     }
 
     @Override
-    public boolean hasNext() throws IOException {
-      advanceIfNecessary();
-      if (nextGroup == null) {
-        return false;
+    protected boolean hasNextImpl() throws IOException {
+      try (StateSampler.ScopedState read = stateSampler.scopedState(readState)) {
+        if (!groups.hasNext()) {
+          return false;
+        }
+        currentGroup = groups.next();
       }
-      return stopPosition == null || promisedPosition.compareTo(stopPosition) < 0;
+      ByteArrayShufflePosition groupStart = ByteArrayShufflePosition.of(currentGroup.position);
+      boolean isAtSplitPoint = (lastGroupStart == null) || (!groupStart.equals(lastGroupStart));
+      lastGroupStart = groupStart;
+      return rangeTracker.tryReturnRecordAt(isAtSplitPoint, groupStart);
     }
 
     @Override
-    public WindowedValue<KV<K, Reiterable<V>>> next() throws IOException {
-      if (!hasNext()) {
-        throw new NoSuchElementException();
-      }
-      KeyGroupedShuffleEntries group = nextGroup;
-      nextGroup = null;
-
-      K key = CoderUtils.decodeFromByteArray(keyCoder, group.key);
+    protected WindowedValue<KV<K, Reiterable<V>>> nextImpl() throws IOException {
+      K key = CoderUtils.decodeFromByteArray(keyCoder, currentGroup.key);
       if (executionContext != null) {
         executionContext.setKey(key);
       }
 
+      KeyGroupedShuffleEntries group = currentGroup;
+      currentGroup = null;
       return WindowedValue.valueInEmptyWindows(
           KV.<K, Reiterable<V>>of(key, new ValuesIterable(group.values)));
     }
@@ -234,8 +216,11 @@ public Progress getProgress() {
       com.google.api.services.dataflow.model.Position position =
           new com.google.api.services.dataflow.model.Position();
       ApproximateProgress progress = new ApproximateProgress();
-      position.setShufflePosition(promisedPosition.encodeBase64());
-      progress.setPosition(position);
+      ByteArrayShufflePosition groupStart = rangeTracker.getLastGroupStart();
+      if (groupStart != null) {
+        position.setShufflePosition(groupStart.encodeBase64());
+        progress.setPosition(position);
+      }
       return cloudProgressToReaderProgress(progress);
     }
 
@@ -263,25 +248,19 @@ public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest)
       }
       ByteArrayShufflePosition newStopPosition =
           ByteArrayShufflePosition.fromBase64(splitShufflePosition);
-      if (newStopPosition.compareTo(promisedPosition) <= 0) {
-        LOG.info("Already progressed to promised shuffle position {}, which is "
-            + "after the requested split shuffle position {}",
-            promisedPosition.encodeBase64(), splitShufflePosition);
-        return null;
-      }
-
-      if (this.stopPosition != null && newStopPosition.compareTo(this.stopPosition) >= 0) {
+      if (rangeTracker.trySplitAtPosition(newStopPosition)) {
+        LOG.info(
+            "Split GroupingShuffleReader at {}, now {}",
+            newStopPosition.encodeBase64(),
+            rangeTracker);
+        return new DynamicSplitResultWithPosition(cloudPositionToReaderPosition(splitPosition));
+      } else {
         LOG.info(
-            "Split requested at a shuffle position beyond the end of the current range: "
-            + "{} >= current stop position: {}",
-            splitShufflePosition, this.stopPosition.encodeBase64());
+            "Refused to split GroupingShuffleReader {} at {}",
+            rangeTracker,
+            newStopPosition.encodeBase64());
         return null;
       }
-
-      this.stopPosition = newStopPosition;
-      LOG.info("Split GroupingShuffleReader at {}", splitShufflePosition);
-
-      return new DynamicSplitResultWithPosition(cloudPositionToReaderPosition(splitPosition));
     }
 
     /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
index 8362dca797412..aeb0cf30586a1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
@@ -24,16 +24,18 @@
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.io.range.OffsetRangeTracker;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.common.annotations.VisibleForTesting;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.util.List;
-import java.util.NoSuchElementException;
 
 import javax.annotation.Nullable;
 
@@ -81,26 +83,24 @@ public ReaderIterator<T> iterator() throws IOException {
   /**
    * A ReaderIterator that yields an in-memory list of elements.
    */
-  class InMemoryReaderIterator extends AbstractReaderIterator<T> {
-    int index;
-    int endPosition;
+  class InMemoryReaderIterator extends AbstractBoundedReaderIterator<T> {
+    @VisibleForTesting
+    OffsetRangeTracker tracker;
+    private int nextIndex;
 
     public InMemoryReaderIterator() {
-      index = startIndex;
-      endPosition = endIndex;
+      this.tracker = new OffsetRangeTracker(startIndex, endIndex);
+      this.nextIndex = startIndex;
     }
 
     @Override
-    public boolean hasNext() {
-      return index < endPosition;
+    protected boolean hasNextImpl() {
+      return tracker.tryReturnRecordAt(true, (long) nextIndex);
     }
 
     @Override
-    public T next() throws IOException {
-      if (!hasNext()) {
-        throw new NoSuchElementException();
-      }
-      String encodedElementString = encodedElements.get(index++);
+    protected T nextImpl() throws IOException {
+      String encodedElementString = encodedElements.get(nextIndex++);
       // TODO: Replace with the real encoding used by the
       // front end, when we know what it is.
       byte[] encodedElement = StringUtils.jsonStringToByteArray(encodedElementString);
@@ -115,7 +115,7 @@ public Progress getProgress() {
       // other metrics, e.g. completion percentage or remaining time.
       com.google.api.services.dataflow.model.Position currentPosition =
           new com.google.api.services.dataflow.model.Position();
-      currentPosition.setRecordIndex((long) index);
+      currentPosition.setRecordIndex((long) nextIndex);
 
       ApproximateProgress progress = new ApproximateProgress();
       progress.setPosition(currentPosition);
@@ -141,21 +141,10 @@ public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest)
             splitPosition);
         return null;
       }
-      if (splitIndex <= index) {
-        LOG.info("Already progressed to index {}, which is after the requested split index {}",
-            index, splitIndex);
-        return null;
-      }
-      if (splitIndex >= endPosition) {
-        LOG.info(
-            "Split requested at an index beyond the end of the current range: {} >= {}",
-            splitIndex, endPosition);
+
+      if (!tracker.trySplitAtPosition(splitIndex)) {
         return null;
       }
-
-      this.endPosition = splitIndex.intValue();
-      LOG.info("Split InMemoryReader at index {}", splitIndex);
-
       return new DynamicSplitResultWithPosition(cloudPositionToReaderPosition(splitPosition));
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
index 724ee39b098ef..e9c97e347ab65 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
@@ -16,11 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import java.io.IOException;
 import java.util.Iterator;
-import java.util.NoSuchElementException;
 
 /**
  * Implements a ReaderIterator over a collection of inputs.
@@ -33,7 +33,7 @@
  * to be produced lazily, as an open source iterator may consume process
  * resources such as file descriptors.
  */
-abstract class LazyMultiReaderIterator<T> extends Reader.AbstractReaderIterator<T> {
+abstract class LazyMultiReaderIterator<T> extends AbstractBoundedReaderIterator<T> {
   private final Iterator<String> inputs;
   Reader.ReaderIterator<T> current;
 
@@ -42,7 +42,7 @@ public LazyMultiReaderIterator(Iterator<String> inputs) {
   }
 
   @Override
-  public boolean hasNext() throws IOException {
+  protected boolean hasNextImpl() throws IOException {
     while (selectReader()) {
       if (!current.hasNext()) {
         current.close();
@@ -55,10 +55,7 @@ public boolean hasNext() throws IOException {
   }
 
   @Override
-  public T next() throws IOException {
-    if (!hasNext()) {
-      throw new NoSuchElementException();
-    }
+  protected T nextImpl() throws IOException {
     return current.next();
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
index 28211b5311fa7..bfb9d82806ee8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
@@ -90,7 +91,8 @@ ReaderIterator<WindowedValue<KV<K, V>>> iterator(ShuffleEntryReader reader) thro
    * extracts K and {@code WindowedValue<V>}, and returns a constructed
    * {@code WindowedValue<KV>}.
    */
-  class PartitioningShuffleReaderIterator extends AbstractReaderIterator<WindowedValue<KV<K, V>>> {
+  class PartitioningShuffleReaderIterator
+      extends AbstractBoundedReaderIterator<WindowedValue<KV<K, V>>> {
     Iterator<ShuffleEntry> iterator;
 
     PartitioningShuffleReaderIterator(ShuffleEntryReader reader) {
@@ -100,12 +102,12 @@ class PartitioningShuffleReaderIterator extends AbstractReaderIterator<WindowedV
     }
 
     @Override
-    public boolean hasNext() throws IOException {
+    protected boolean hasNextImpl() throws IOException {
       return iterator.hasNext();
     }
 
     @Override
-    public WindowedValue<KV<K, V>> next() throws IOException {
+    protected WindowedValue<KV<K, V>> nextImpl() throws IOException {
       ShuffleEntry record = iterator.next();
       K key = CoderUtils.decodeFromByteArray(keyCoder, record.getKey());
       WindowedValue<V> windowedValue =
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
index 6f826c5dc5d60..b5f451e80eed0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
@@ -103,6 +103,11 @@ static class DataflowReaderProgress implements Reader.Progress {
     public DataflowReaderProgress(ApproximateProgress cloudProgress) {
       this.cloudProgress = cloudProgress;
     }
+
+    @Override
+    public String toString() {
+      return String.valueOf(cloudProgress);
+    }
   }
 
   static class DataflowReaderPosition implements Reader.Position {
@@ -111,6 +116,11 @@ static class DataflowReaderPosition implements Reader.Position {
     public DataflowReaderPosition(Position cloudPosition) {
       this.cloudPosition = cloudPosition;
     }
+
+    @Override
+    public String toString() {
+      return String.valueOf(cloudPosition);
+    }
   }
 
   static class DataflowSourceOperationRequest implements SourceFormat.OperationRequest {
@@ -119,6 +129,11 @@ static class DataflowSourceOperationRequest implements SourceFormat.OperationReq
     public DataflowSourceOperationRequest(SourceOperationRequest cloudRequest) {
       this.cloudRequest = cloudRequest;
     }
+
+    @Override
+    public String toString() {
+      return String.valueOf(cloudRequest);
+    }
   }
 
   static class DataflowSourceOperationResponse implements SourceFormat.OperationResponse {
@@ -127,6 +142,11 @@ static class DataflowSourceOperationResponse implements SourceFormat.OperationRe
     public DataflowSourceOperationResponse(SourceOperationResponse cloudResponse) {
       this.cloudResponse = cloudResponse;
     }
+
+    @Override
+    public String toString() {
+      return String.valueOf(cloudResponse);
+    }
   }
 
   static class DataflowSourceSpec implements SourceFormat.SourceSpec {
@@ -135,6 +155,11 @@ static class DataflowSourceSpec implements SourceFormat.SourceSpec {
     public DataflowSourceSpec(Source cloudSource) {
       this.cloudSource = cloudSource;
     }
+
+    @Override
+    public String toString() {
+      return String.valueOf(cloudSource);
+    }
   }
 
   // Represents a cloud Source as a dictionary for encoding inside the {@code SOURCE_STEP_INPUT}
@@ -183,5 +208,10 @@ private static class DataflowDynamicSplitRequest implements Reader.DynamicSplitR
     private DataflowDynamicSplitRequest(ApproximateProgress approximateProgress) {
       this.approximateProgress = approximateProgress;
     }
+
+    @Override
+    public String toString() {
+      return String.valueOf(approximateProgress);
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
index f8ac8d29f2c9b..7ffcb973be02e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
@@ -65,8 +65,8 @@ protected ReaderIterator<T> newReaderIteratorForRangeInFile(IOChannelFactory fac
         factory, oneFile, stripTrailingNewlines, start, endPosition);
 
     // Skip the initial record if start position was set.
-    if (startPosition > 0 && iterator.hasNext()) {
-      iterator.advance();
+    if (startPosition > 0) {
+      iterator.hasNextImpl();
     }
 
     return iterator;
@@ -157,8 +157,15 @@ private TextFileIterator(TextFileIterator it) throws IOException {
       // Correctly adjust the start position of the seeker given
       // that it may hold bytes that have been read and now reside
       // in the read buffer (that is copied during cloning).
-      this(it.seeker.copy(), it.stripTrailingNewlines, it.startOffset + it.state.totalBytesRead,
-          it.offset, it.endOffset, it.tracker.copy(), it.state.copy(), it.compressionStreamFactory);
+      this(
+          it.seeker.copy(),
+          it.stripTrailingNewlines,
+          it.rangeTracker.getStartPosition() + it.state.totalBytesRead,
+          it.offset,
+          it.rangeTracker.getStopPosition(),
+          it.progressTracker.copy(),
+          it.state.copy(),
+          it.compressionStreamFactory);
     }
 
     @Override
@@ -208,7 +215,6 @@ protected ByteArrayOutputStream readElement() throws IOException {
       }
 
       offset += charsConsumed;
-      tracker.saw(charsConsumed);
       return buffer;
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
index 2d788d2d8c3ff..379c36b3ba5f3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
@@ -65,7 +66,7 @@ ReaderIterator<T> iterator(ShuffleEntryReader reader) throws IOException {
    * A ReaderIterator that reads from a ShuffleEntryReader and extracts
    * just the values.
    */
-  class UngroupedShuffleReaderIterator extends AbstractReaderIterator<T> {
+  class UngroupedShuffleReaderIterator extends AbstractBoundedReaderIterator<T> {
     Iterator<ShuffleEntry> iterator;
 
     UngroupedShuffleReaderIterator(ShuffleEntryReader reader) throws IOException {
@@ -75,12 +76,12 @@ class UngroupedShuffleReaderIterator extends AbstractReaderIterator<T> {
     }
 
     @Override
-    public boolean hasNext() throws IOException {
+    protected boolean hasNextImpl() throws IOException {
       return iterator.hasNext();
     }
 
     @Override
-    public T next() throws IOException {
+    protected T nextImpl() throws IOException {
       ShuffleEntry record = iterator.next();
       // Throw away the primary and the secondary keys.
       byte[] value = record.getValue();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/AbstractBoundedReaderIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/AbstractBoundedReaderIterator.java
new file mode 100644
index 0000000000000..0e674c2f211b6
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/AbstractBoundedReaderIterator.java
@@ -0,0 +1,56 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import java.io.IOException;
+import java.util.NoSuchElementException;
+
+/**
+ * An abstract base class for implementations of ReaderIterator classes for bounded sources,
+ * where hasNext() returns the same value if called multiple times.
+ *
+ * <p>Provides basic treatment of hasNext()/next() to simplify implementations (e.g. ensuring
+ * hasNext() is called only once and verifying hasNext() in next()) and default no-op
+ * implementations of other operations.
+ *
+ * <p><i>This class is intended for internal usage. Users of Dataflow must not subclass it.</i>
+ *
+ * @param <T> Type of records returned by the reader.
+ */
+public abstract class AbstractBoundedReaderIterator<T> extends Reader.AbstractReaderIterator<T> {
+  private Boolean cachedHasNext;
+
+  @Override
+  public final boolean hasNext() throws IOException {
+    if (cachedHasNext == null) {
+      cachedHasNext = hasNextImpl();
+    }
+    return cachedHasNext;
+  }
+
+  protected abstract boolean hasNextImpl() throws IOException;
+
+  @Override
+  public final T next() throws IOException, NoSuchElementException {
+    if (!hasNext()) {
+      throw new NoSuchElementException();
+    }
+    cachedHasNext = null;
+    return nextImpl();
+  }
+
+  protected abstract T nextImpl()  throws IOException;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
index fd17321c8a502..9fa960ae6fc43 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
@@ -23,8 +23,12 @@
 /**
  * Abstract base class for readers.
  *
- * <p> A {@link Source} is read from by getting an {@code Iterator}-like value and
- * iterating through it.
+ * <p>A {@link com.google.api.services.dataflow.model.Source} is read from by getting an
+ * {@code Iterator}-like value and iterating through it.
+ *
+ * <p>This class is intended for formats that have built-in support on the Dataflow service.
+ * <b>Do not introduce new implementations:</b> for creating new input formats, use
+ * {@link com.google.cloud.dataflow.sdk.io.Source} instead.
  *
  * @param <T> the type of the elements read from the source
  */
@@ -60,6 +64,11 @@ public void setStateSamplerAndOperationName(StateSampler stateSampler,
 
   /**
    * A stateful iterator over the data in a Reader.
+   * <p>
+   * Partially thread-safe: methods {@link #hasNext}, {@link #next}, {@link #close},
+   * {@link #getProgress} are called serially, but {@link #requestDynamicSplit}
+   * can be called asynchronously to those. There will not be multiple concurrent calls to
+   * {@link #requestDynamicSplit}).
    */
   public interface ReaderIterator<T> extends AutoCloseable {
     /**
@@ -91,10 +100,6 @@ public interface ReaderIterator<T> extends AutoCloseable {
 
     /**
      * Returns a representation of how far this iterator is through the source.
-     *
-     * <p> This method is not required to be thread-safe, and it will not be
-     * called concurrently to any other methods.
-     *
      * @return the progress, or {@code null} if no progress measure
      * can be provided (implementors are discouraged from throwing
      * {@code UnsupportedOperationException} in this case).
@@ -123,9 +128,6 @@ public interface ReaderIterator<T> extends AutoCloseable {
      * After a successful call to {@link #requestDynamicSplit}, subsequent calls should be
      * interpreted relative to the new primary.
      * <p>
-     * This method is not required to be thread-safe, and it will not be
-     * called concurrently to any other methods.
-     * <p>
      * This call should not affect the range of input represented by the {@link Reader} that
      * produced this {@link ReaderIterator}.
      *
@@ -206,6 +208,11 @@ public DynamicSplitResultWithPosition(Position acceptedPosition) {
     public Position getAcceptedPosition() {
       return acceptedPosition;
     }
+
+    @Override
+    public String toString() {
+      return String.valueOf(acceptedPosition);
+    }
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
index 66475cffb6dbb..7a1b13d242228 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
@@ -146,15 +146,16 @@ public void testSplitAtFraction() throws Exception {
         source.splitIntoBundles(file.length() / 3, null);
     for (BoundedSource<FixedRecord> subSource : splits) {
       int items = SourceTestUtils.readFromSource(subSource, null).size();
+      // Shouldn't split while unstarted.
       SourceTestUtils.assertSplitAtFractionFails(subSource, 0, 0.0, null);
-      SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(subSource, 0, 0.7, null);
+      SourceTestUtils.assertSplitAtFractionFails(subSource, 0, 0.7, null);
       SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(subSource, 1, 0.7, null);
       SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(subSource, 100, 0.7, null);
       SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(subSource, 1000, 0.1, null);
       SourceTestUtils.assertSplitAtFractionFails(subSource, 1001, 0.1, null);
       SourceTestUtils.assertSplitAtFractionFails(subSource, DEFAULT_RECORD_COUNT / 3, 0.3, null);
-      SourceTestUtils.assertSplitAtFractionFails(subSource, items, 1.0, null);
       SourceTestUtils.assertSplitAtFractionFails(subSource, items, 0.9, null);
+      SourceTestUtils.assertSplitAtFractionFails(subSource, items, 1.0, null);
       SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(subSource, items, 0.999, null);
     }
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
index 71a0af0773677..608f3609fe5c7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
@@ -111,21 +111,16 @@ protected long getCurrentOffset() {
     @Override
     public boolean start() throws IOException {
       current = getCurrentSource().getStartOffset();
-      while (true) {
-        if (current >= getCurrentSource().getEndOffset()) {
-          return false;
-        }
-        if (current % granularity == 0) {
-          return true;
-        }
+      while (current % granularity != 0) {
         ++current;
       }
+      return rangeTracker.tryReturnRecordAt(true, current);
     }
 
     @Override
     public boolean advance() throws IOException {
       ++current;
-      return !(current >= getCurrentSource().getEndOffset() && current % granularity == 0);
+      return rangeTracker.tryReturnRecordAt(current % granularity == 0, current);
     }
 
     @Override
@@ -241,7 +236,7 @@ public void testSplitAtFraction() throws IOException {
       assertTrue(reader.advance());
       originalItems.add(reader.getCurrent());
       assertNull(reader.splitAtFraction(0.0));
-      assertNull(reader.splitAtFraction(reader.getFractionConsumed()));
+      assertNull(reader.splitAtFraction(reader.getFractionConsumed() - 0.1));
 
       BoundedSource<Integer> residual = reader.splitAtFraction(reader.getFractionConsumed() + 0.1);
       BoundedSource<Integer> primary = reader.getCurrentSource();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index da78a3df4feb9..3eeaedd2a781c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -29,7 +29,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.io.FileBasedSource.FileBasedReader;
-import com.google.cloud.dataflow.sdk.io.FileBasedSource.Mode;
 import com.google.cloud.dataflow.sdk.io.Source.Reader;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
@@ -69,10 +68,9 @@
 @RunWith(JUnit4.class)
 public class FileBasedSourceTest {
 
-  Random random = new Random(System.currentTimeMillis());
+  Random random = new Random(0L);
 
-  @Rule
-  public TemporaryFolder tempFolder = new TemporaryFolder();
+  @Rule public TemporaryFolder tempFolder = new TemporaryFolder();
 
   /**
    * If {@code splitHeader} is null, this is just a simple line-based reader. Otherwise, the file is
@@ -88,14 +86,17 @@ class TestFileBasedSource extends FileBasedSource<String> {
 
     final String splitHeader;
 
-    public TestFileBasedSource(String fileOrPattern, long minBundleSize,
-        String splitHeader) {
+    public TestFileBasedSource(String fileOrPattern, long minBundleSize, String splitHeader) {
       super(fileOrPattern, minBundleSize);
       this.splitHeader = splitHeader;
     }
 
-    public TestFileBasedSource(String fileOrPattern, long minBundleSize, long startOffset,
-        long endOffset, String splitHeader) {
+    public TestFileBasedSource(
+        String fileOrPattern,
+        long minBundleSize,
+        long startOffset,
+        long endOffset,
+        String splitHeader) {
       super(fileOrPattern, minBundleSize, startOffset, endOffset);
       this.splitHeader = splitHeader;
     }
@@ -119,8 +120,8 @@ public FileBasedSource<String> createForSubrangeOfFile(String fileName, long sta
     }
 
     @Override
-    public FileBasedReader<String> createSingleFileReader(PipelineOptions options,
-                                                          ExecutionContext executionContext) {
+    public FileBasedReader<String> createSingleFileReader(
+        PipelineOptions options, ExecutionContext executionContext) {
       if (splitHeader == null) {
         return new TestReader(this);
       } else {
@@ -130,26 +131,35 @@ public FileBasedReader<String> createSingleFileReader(PipelineOptions options,
   }
 
   /**
-   * A reader that can read lines of text from a {@link TestFileBasedSource}. This reader does not
-   * consider {@code splitHeader} defined by {@code TestFileBasedSource} hence every line can be the
-   * first line of a split.
+   * A utility class that starts reading lines from a given offset in a file until EOF.
    */
-  class TestReader extends FileBasedReader<String> {
+  private static class LineReader {
     private ReadableByteChannel channel = null;
-    private final byte boundary;
-    private long nextOffset = 0;
-    private long currentOffset = 0;
-    private boolean isAtSplitPoint = false;
+    private long nextLineStart = 0;
+    private long currentLineStart = 0;
     private final ByteBuffer buf;
     private static final int BUF_SIZE = 1024;
     private String currentValue = null;
-    private boolean emptyBundle = false;
 
-    public TestReader(TestFileBasedSource source) {
-      super(source);
-      boundary = '\n';
+    public LineReader(ReadableByteChannel channel) throws IOException {
       buf = ByteBuffer.allocate(BUF_SIZE);
       buf.flip();
+
+      boolean removeLine = false;
+      // If we are not at the beginning of a line, we should ignore the current line.
+      if (channel instanceof SeekableByteChannel) {
+        SeekableByteChannel seekChannel = (SeekableByteChannel) channel;
+        if (seekChannel.position() > 0) {
+          // Start from one character back and read till we find a new line.
+          seekChannel.position(seekChannel.position() - 1);
+          removeLine = true;
+        }
+        nextLineStart = seekChannel.position();
+      }
+      this.channel = channel;
+      if (removeLine) {
+        nextLineStart += readNextLine(new ByteArrayOutputStream());
+      }
     }
 
     private int readNextLine(ByteArrayOutputStream out) throws IOException {
@@ -165,7 +175,7 @@ private int readNextLine(ByteArrayOutputStream out) throws IOException {
         }
         byte b = buf.get();
         byteCount++;
-        if (b == boundary) {
+        if (b == '\n') {
           break;
         }
         out.write(b);
@@ -173,35 +183,8 @@ private int readNextLine(ByteArrayOutputStream out) throws IOException {
       return byteCount;
     }
 
-    @Override
-    protected void startReading(ReadableByteChannel channel) throws IOException {
-      boolean removeLine = false;
-      if (getCurrentSource().getMode() == Mode.SINGLE_FILE_OR_SUBRANGE) {
-        SeekableByteChannel seekChannel = (SeekableByteChannel) channel;
-        // If we are not at the beginning of a line, we should ignore the current line.
-        if (seekChannel.position() > 0) {
-          // Start from one character back and read till we find a new line.
-          seekChannel.position(seekChannel.position() - 1);
-          removeLine = true;
-        }
-        nextOffset = seekChannel.position();
-      }
-      this.channel = channel;
-      if (removeLine) {
-        nextOffset += readNextLine(new ByteArrayOutputStream());
-      }
-      if (nextOffset >= getCurrentSource().getEndOffset()) {
-        emptyBundle = true;
-      }
-    }
-
-    @Override
-    protected boolean readNextRecord() throws IOException {
-      if (emptyBundle) {
-        return false;
-      }
-
-      currentOffset = nextOffset;
+    public boolean readNextLine() throws IOException {
+      currentLineStart = nextLineStart;
 
       ByteArrayOutputStream buf = new ByteArrayOutputStream();
       int offsetAdjustment = readNextLine(buf);
@@ -209,27 +192,57 @@ protected boolean readNextRecord() throws IOException {
         // EOF
         return false;
       }
-      nextOffset += offsetAdjustment;
-      isAtSplitPoint = true;
+      nextLineStart += offsetAdjustment;
       // When running on Windows, each line obtained from 'readNextLine()' will end with a '\r'
       // since we use '\n' as the line boundary of the reader. So we trim it off here.
       currentValue = CoderUtils.decodeFromByteArray(StringUtf8Coder.of(), buf.toByteArray()).trim();
       return true;
     }
 
+    public String getCurrent() {
+      return currentValue;
+    }
+
+    public long getCurrentLineStart() {
+      return currentLineStart;
+    }
+  }
+
+  /**
+   * A reader that can read lines of text from a {@link TestFileBasedSource}. This reader does not
+   * consider {@code splitHeader} defined by {@code TestFileBasedSource} hence every line can be the
+   * first line of a split.
+   */
+  private static class TestReader extends FileBasedReader<String> {
+    private LineReader lineReader = null;
+
+    public TestReader(TestFileBasedSource source) {
+      super(source);
+    }
+
+    @Override
+    protected void startReading(ReadableByteChannel channel) throws IOException {
+      this.lineReader = new LineReader(channel);
+    }
+
+    @Override
+    protected boolean readNextRecord() throws IOException {
+      return lineReader.readNextLine();
+    }
+
     @Override
     protected boolean isAtSplitPoint() {
-      return isAtSplitPoint;
+      return true;
     }
 
     @Override
     protected long getCurrentOffset() {
-      return currentOffset;
+      return lineReader.getCurrentLineStart();
     }
 
     @Override
     public String getCurrent() throws NoSuchElementException {
-      return currentValue;
+      return lineReader.getCurrent();
     }
   }
 
@@ -238,11 +251,12 @@ public String getCurrent() throws NoSuchElementException {
    * {@code splitHeader} defined by {@code TestFileBasedSource} hence only lines that immediately
    * follow a {@code splitHeader} are split points.
    */
-  class TestReaderWithSplits extends TestReader {
+  private static class TestReaderWithSplits extends FileBasedReader<String> {
+    private LineReader lineReader;
     private final String splitHeader;
+    private boolean foundFirstSplitPoint = false;
     private boolean isAtSplitPoint = false;
     private long currentOffset;
-    private boolean emptyBundle = false;
 
     public TestReaderWithSplits(TestFileBasedSource source) {
       super(source);
@@ -251,56 +265,37 @@ public TestReaderWithSplits(TestFileBasedSource source) {
 
     @Override
     protected void startReading(ReadableByteChannel channel) throws IOException {
-      super.startReading(channel);
-
-      // Ignore all lines until next header.
-      if (!super.readNextRecord()) {
-        return;
-      }
-      String current = super.getCurrent();
-
-      currentOffset = super.getCurrentOffset();
-      while (current == null || !current.equals(splitHeader)) {
-        // Offset of a split point should be the offset of the header. Hence marking current
-        // offset before reading the record.
-        currentOffset = super.getCurrentOffset();
-        if (!super.readNextRecord()) {
-          return;
-        }
-        current = super.getCurrent();
-      }
-      if (currentOffset >= getCurrentSource().getEndOffset()) {
-        emptyBundle = true;
-      }
+      this.lineReader = new LineReader(channel);
     }
 
     @Override
     protected boolean readNextRecord() throws IOException {
-      // Get next record. If next record is a header read up to the next non-header record (ignoring
-      // any empty splits that does not have any records).
-
-      if (emptyBundle) {
-        return false;
+      if (!foundFirstSplitPoint) {
+        while (!isAtSplitPoint) {
+          if (!readNextRecordInternal()) {
+            return false;
+          }
+        }
+        foundFirstSplitPoint = true;
+        return true;
       }
+      return readNextRecordInternal();
+    }
 
+    private boolean readNextRecordInternal() throws IOException {
       isAtSplitPoint = false;
-      while (true) {
-        long previousOffset = super.getCurrentOffset();
-        if (!super.readNextRecord()) {
+      if (!lineReader.readNextLine()) {
+        return false;
+      }
+      currentOffset = lineReader.getCurrentLineStart();
+      while (getCurrent().equals(splitHeader)) {
+        currentOffset = lineReader.getCurrentLineStart();
+        if (!lineReader.readNextLine()) {
           return false;
         }
-        String current = super.getCurrent();
-        if (current == null || !current.equals(splitHeader)) {
-          if (isAtSplitPoint) {
-            // Offset of a split point should be the offset of the header.
-            currentOffset = previousOffset;
-          } else {
-            currentOffset = super.getCurrentOffset();
-          }
-          return true;
-        }
         isAtSplitPoint = true;
       }
+      return true;
     }
 
     @Override
@@ -312,6 +307,11 @@ protected boolean isAtSplitPoint() {
     protected long getCurrentOffset() {
       return currentOffset;
     }
+
+    @Override
+    public String getCurrent() throws NoSuchElementException {
+      return lineReader.getCurrent();
+    }
   }
 
   public File createFileWithData(String fileName, List<String> data) throws IOException {
@@ -426,7 +426,7 @@ public void testFractionConsumedWhenReadingFilepattern() throws IOException {
         assertTrue(fractionConsumed > lastFractionConsumed);
         lastFractionConsumed = fractionConsumed;
       }
-      assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
+      assertTrue(reader.getFractionConsumed() < 1.0);
     }
   }
 
@@ -438,9 +438,12 @@ public void testFullyReadFilePatternFirstRecordEmpty() throws IOException {
     IOChannelFactory mockIOFactory = Mockito.mock(IOChannelFactory.class);
     String parent = file1.getParent();
     String pattern = "mocked://test";
-    when(mockIOFactory.match(pattern)).thenReturn(ImmutableList.of(
-        new File(parent, "file1").getPath(), new File(parent, "file2").getPath(),
-        new File(parent, "file3").getPath()));
+    when(mockIOFactory.match(pattern))
+        .thenReturn(
+            ImmutableList.of(
+                new File(parent, "file1").getPath(),
+                new File(parent, "file2").getPath(),
+                new File(parent, "file3").getPath()));
     IOChannelUtils.setIOFactory("mocked", mockIOFactory);
 
     List<String> data2 = createStringDataset(3, 50);
@@ -490,8 +493,7 @@ public void testReadEverythingFromFileWithSplits() throws IOException {
     String fileName = "file";
     File file = createFileWithData(fileName, data);
 
-    TestFileBasedSource source =
-        new TestFileBasedSource(file.getPath(), 64, header);
+    TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 64, header);
 
     List<String> expectedResults = new ArrayList<String>();
     expectedResults.addAll(data);
@@ -761,8 +763,8 @@ public void testEstimatedSizeOfFilePattern() throws Exception {
 
     // Estimated size of the file pattern based source should be the total size of files that the
     // corresponding pattern is expanded into.
-    assertEquals(file1.length() + file2.length() + file3.length(),
-        source.getEstimatedSizeBytes(null));
+    assertEquals(
+        file1.length() + file2.length() + file3.length(), source.getEstimatedSizeBytes(null));
   }
 
   @Test
@@ -806,8 +808,8 @@ public void testSplitAtFraction() throws IOException {
     File file = createFileWithData("file", createStringDataset(3, 100));
 
     TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 1, 0, file.length(), null);
-    assertSplitAtFractionExhaustive(source, options);
-    assertSplitAtFractionSucceedsAndConsistent(source, 0, 0.7, options);
+    // Shouldn't be able to split while unstarted.
+    assertSplitAtFractionFails(source, 0, 0.7, options);
     assertSplitAtFractionSucceedsAndConsistent(source, 1, 0.7, options);
     assertSplitAtFractionSucceedsAndConsistent(source, 30, 0.7, options);
     assertSplitAtFractionFails(source, 0, 0.0, options);
@@ -816,4 +818,14 @@ public void testSplitAtFraction() throws IOException {
     assertSplitAtFractionFails(source, 100, 0.99, options);
     assertSplitAtFractionSucceedsAndConsistent(source, 100, 0.995, options);
   }
+
+  @Test
+  public void testSplitAtFractionExhaustive() throws IOException {
+    PipelineOptions options = PipelineOptionsFactory.create();
+    // Smaller file for exhaustive testing.
+    File file = createFileWithData("file", createStringDataset(3, 20));
+
+    TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 1, 0, file.length(), null);
+    assertSplitAtFractionExhaustive(source, options);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
index 604c78e54f5b5..0cba556daafa2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
@@ -713,18 +713,19 @@ public void testSplitAtFraction() throws Exception {
     List<? extends FileBasedSource<Train>> splits =
         fileSource.splitIntoBundles(file.length() / 3, null);
     for (BoundedSource<Train> splitSource : splits) {
-      int items = readEverythingFromReader(splitSource.createReader(null, null)).size();
-      assertSplitAtFractionSucceedsAndConsistent(splitSource, 0, 0.7, options);
+      int numItems = readEverythingFromReader(splitSource.createReader(null, null)).size();
+      // Should not split while unstarted.
+      assertSplitAtFractionFails(splitSource, 0, 0.7, options);
       assertSplitAtFractionSucceedsAndConsistent(splitSource, 1, 0.7, options);
       assertSplitAtFractionSucceedsAndConsistent(splitSource, 15, 0.7, options);
       assertSplitAtFractionFails(splitSource, 0, 0.0, options);
       assertSplitAtFractionFails(splitSource, 20, 0.3, options);
-      assertSplitAtFractionFails(splitSource, items, 1.0, options);
+      assertSplitAtFractionFails(splitSource, numItems, 1.0, options);
 
       // After reading 100 elements we will be approximately at position
       // 0.99 * (endOffset - startOffset) hence trying to split at fraction 0.9 will be
       // unsuccessful.
-      assertSplitAtFractionFails(splitSource, items, 0.9, options);
+      assertSplitAtFractionFails(splitSource, numItems, 0.9, options);
 
       // Following passes since we can always find a fraction that is extremely close to 1 such that
       // the position suggested by the fraction will be larger than the position the reader is at
@@ -734,7 +735,7 @@ public void testSplitAtFraction() throws Exception {
       // (i.e., the start position of the last element). This is true for most cases but will not
       // be true if reader position is only one less than the end position. (i.e., the last element
       // of the bundle start at the last byte that belongs to the bundle).
-      assertSplitAtFractionSucceedsAndConsistent(splitSource, items - 1, 0.999, options);
+      assertSplitAtFractionSucceedsAndConsistent(splitSource, numItems - 1, 0.999, options);
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/OffsetRangeTrackerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/OffsetRangeTrackerTest.java
new file mode 100644
index 0000000000000..b1f1070ba0623
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/OffsetRangeTrackerTest.java
@@ -0,0 +1,186 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * icensed under the Apache icense, Version 2.0 (the "icense"); you may not
+ * use this file except in compliance with the icense. You may obtain a copy of
+ * the icense at
+ *
+ * http://www.apache.org/licenses/ICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the icense is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * icense for the specific language governing permissions and limitations under
+ * the icense.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.io.range;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link OffsetRangeTracker}.
+ */
+@RunWith(JUnit4.class)
+public class OffsetRangeTrackerTest {
+  @Rule public final ExpectedException expected = ExpectedException.none();
+
+  @Test
+  public void testTryReturnRecordSimpleSparse() throws Exception {
+    OffsetRangeTracker tracker = new OffsetRangeTracker(100, 200);
+    assertTrue(tracker.tryReturnRecordAt(true, 110));
+    assertTrue(tracker.tryReturnRecordAt(true, 140));
+    assertTrue(tracker.tryReturnRecordAt(true, 183));
+    assertFalse(tracker.tryReturnRecordAt(true, 210));
+  }
+
+  @Test
+  public void testTryReturnRecordSimpleDense() throws Exception {
+    OffsetRangeTracker tracker = new OffsetRangeTracker(3, 6);
+    assertTrue(tracker.tryReturnRecordAt(true, 3));
+    assertTrue(tracker.tryReturnRecordAt(true, 4));
+    assertTrue(tracker.tryReturnRecordAt(true, 5));
+    assertFalse(tracker.tryReturnRecordAt(true, 6));
+  }
+
+  @Test
+  public void testTryReturnRecordContinuesUntilSplitPoint() throws Exception {
+    OffsetRangeTracker tracker = new OffsetRangeTracker(9, 18);
+    // Return records with gaps of 2; every 3rd record is a split point.
+    assertTrue(tracker.tryReturnRecordAt(true, 10));
+    assertTrue(tracker.tryReturnRecordAt(false, 12));
+    assertTrue(tracker.tryReturnRecordAt(false, 14));
+    assertTrue(tracker.tryReturnRecordAt(true, 16));
+    // Out of range, but not a split point...
+    assertTrue(tracker.tryReturnRecordAt(false, 18));
+    assertTrue(tracker.tryReturnRecordAt(false, 20));
+    // Out of range AND a split point.
+    assertFalse(tracker.tryReturnRecordAt(true, 22));
+  }
+
+  @Test
+  public void testSplitAtOffsetFailsIfUnstarted() throws Exception {
+    OffsetRangeTracker tracker = new OffsetRangeTracker(100, 200);
+    assertFalse(tracker.trySplitAtPosition(150));
+  }
+
+  @Test
+  public void testSplitAtOffset() throws Exception {
+    OffsetRangeTracker tracker = new OffsetRangeTracker(100, 200);
+    assertTrue(tracker.tryReturnRecordAt(true, 110));
+    // Example positions we shouldn't split at, when last record is [110, 130]:
+    assertFalse(tracker.trySplitAtPosition(109));
+    assertFalse(tracker.trySplitAtPosition(110));
+    assertFalse(tracker.trySplitAtPosition(200));
+    assertFalse(tracker.trySplitAtPosition(210));
+    // Example positions we *should* split at:
+    assertTrue(tracker.copy().trySplitAtPosition(111));
+    assertTrue(tracker.copy().trySplitAtPosition(129));
+    assertTrue(tracker.copy().trySplitAtPosition(130));
+    assertTrue(tracker.copy().trySplitAtPosition(131));
+    assertTrue(tracker.copy().trySplitAtPosition(150));
+    assertTrue(tracker.copy().trySplitAtPosition(199));
+
+    // If we split at 170 and then at 150:
+    assertTrue(tracker.trySplitAtPosition(170));
+    assertTrue(tracker.trySplitAtPosition(150));
+    // Should be able to return a record starting before the new stop offset.
+    // Returning records starting at the same offset is ok.
+    assertTrue(tracker.copy().tryReturnRecordAt(true, 135));
+    assertTrue(tracker.copy().tryReturnRecordAt(true, 135));
+    // Should be able to return a record starting right before the new stop offset.
+    assertTrue(tracker.copy().tryReturnRecordAt(true, 149));
+    // Should not be able to return a record starting at or after the new stop offset
+    assertFalse(tracker.tryReturnRecordAt(true, 150));
+    assertFalse(tracker.tryReturnRecordAt(true, 151));
+    // Should accept non-splitpoint records starting after stop offset.
+    assertTrue(tracker.tryReturnRecordAt(false, 135));
+    assertTrue(tracker.tryReturnRecordAt(false, 152));
+    assertTrue(tracker.tryReturnRecordAt(false, 160));
+    assertFalse(tracker.tryReturnRecordAt(true, 171));
+  }
+
+  @Test
+  public void testGetPositionForFractionDense() throws Exception {
+    // Represents positions 3, 4, 5.
+    OffsetRangeTracker tracker = new OffsetRangeTracker(3, 6);
+    // [3, 3) represents 0.0 of [3, 6)
+    assertEquals(3, tracker.getPositionForFractionConsumed(0.0));
+    // [3, 4) represents up to 1/3 of [3, 6)
+    assertEquals(4, tracker.getPositionForFractionConsumed(1.0 / 6));
+    assertEquals(4, tracker.getPositionForFractionConsumed(0.333));
+    // [3, 5) represents up to 2/3 of [3, 6)
+    assertEquals(5, tracker.getPositionForFractionConsumed(0.334));
+    assertEquals(5, tracker.getPositionForFractionConsumed(0.666));
+    // any fraction consumed over 2/3 means the whole [3, 6) has been consumed.
+    assertEquals(6, tracker.getPositionForFractionConsumed(0.667));
+  }
+
+  @Test
+  public void testGetFractionConsumedDense() throws Exception {
+    OffsetRangeTracker tracker = new OffsetRangeTracker(3, 6);
+    assertEquals(0, tracker.getFractionConsumed(), 1e-6);
+    assertTrue(tracker.tryReturnRecordAt(true, 3));
+    assertEquals(1.0 / 3, tracker.getFractionConsumed(), 1e-6);
+    assertTrue(tracker.tryReturnRecordAt(true, 4));
+    assertEquals(2.0 / 3, tracker.getFractionConsumed(), 1e-6);
+    assertTrue(tracker.tryReturnRecordAt(true, 5));
+    assertEquals(1.0, tracker.getFractionConsumed(), 1e-6);
+    assertTrue(tracker.tryReturnRecordAt(false /* non-split-point */, 6));
+    assertEquals(1.0, tracker.getFractionConsumed(), 1e-6);
+    assertTrue(tracker.tryReturnRecordAt(false /* non-split-point */, 7));
+    assertEquals(1.0, tracker.getFractionConsumed(), 1e-6);
+    assertFalse(tracker.tryReturnRecordAt(true, 7));
+  }
+
+  @Test
+  public void testGetFractionConsumedSparse() throws Exception {
+    OffsetRangeTracker tracker = new OffsetRangeTracker(100, 200);
+    assertEquals(0, tracker.getFractionConsumed(), 1e-6);
+    assertTrue(tracker.tryReturnRecordAt(true, 110));
+    // Consumed positions through 110 = total 11 positions of 100.
+    assertEquals(0.11, tracker.getFractionConsumed(), 1e-6);
+    assertTrue(tracker.tryReturnRecordAt(true, 150));
+    assertEquals(0.51, tracker.getFractionConsumed(), 1e-6);
+    assertTrue(tracker.tryReturnRecordAt(true, 195));
+    assertEquals(0.96, tracker.getFractionConsumed(), 1e-6);
+  }
+
+  @Test
+  public void testEverythingWithUnboundedRange() throws Exception {
+    OffsetRangeTracker tracker = new OffsetRangeTracker(100, Long.MAX_VALUE);
+    assertTrue(tracker.tryReturnRecordAt(true, 150));
+    assertTrue(tracker.tryReturnRecordAt(true, 250));
+    assertEquals(0.0, tracker.getFractionConsumed(), 1e-6);
+    assertFalse(tracker.trySplitAtPosition(1000));
+    try {
+      tracker.getPositionForFractionConsumed(0.5);
+      fail("getPositionForFractionConsumed should fail for an unbounded range");
+    } catch (IllegalArgumentException e) {
+      // Expected.
+    }
+  }
+
+  @Test
+  public void testTryReturnFirstRecordNotSplitPoint() throws Exception {
+    expected.expect(IllegalStateException.class);
+    new OffsetRangeTracker(100, 200).tryReturnRecordAt(false, 120);
+  }
+
+  @Test
+  public void testTryReturnRecordNonMonotonic() throws Exception {
+    OffsetRangeTracker tracker = new OffsetRangeTracker(100, 200);
+    expected.expect(IllegalStateException.class);
+    tracker.tryReturnRecordAt(true, 120);
+    tracker.tryReturnRecordAt(true, 110);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTrackerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTrackerTest.java
new file mode 100644
index 0000000000000..ed525330a3a02
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTrackerTest.java
@@ -0,0 +1,177 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * icensed under the Apache icense, Version 2.0 (the "icense"); you may not
+ * use this file except in compliance with the icense. You may obtain a copy of
+ * the icense at
+ *
+ * http://www.apache.org/licenses/ICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the icense is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * icense for the specific language governing permissions and limitations under
+ * the icense.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link GroupingShuffleRangeTracker}.
+ */
+@RunWith(JUnit4.class)
+public class GroupingShuffleRangeTrackerTest {
+  @Rule
+  public final ExpectedException expected = ExpectedException.none();
+
+  private static ByteArrayShufflePosition ofBytes(int... bytes) {
+    byte[] b = new byte[bytes.length];
+    for (int i = 0; i < bytes.length; ++i) {
+      b[i] = (byte) bytes[i];
+    }
+    return ByteArrayShufflePosition.of(b);
+  }
+
+  @Test
+  public void testTryReturnRecordInfiniteRange() throws Exception {
+    GroupingShuffleRangeTracker tracker = new GroupingShuffleRangeTracker(null, null);
+    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(1, 2, 3)));
+    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(1, 2, 5)));
+    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(3, 6, 8, 10)));
+  }
+
+  @Test
+  public void testTryReturnRecordFiniteRange() throws Exception {
+    GroupingShuffleRangeTracker tracker = new GroupingShuffleRangeTracker(
+        ofBytes(1, 0, 0), ofBytes(5, 0, 0));
+    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(1, 2, 3)));
+    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(1, 2, 5)));
+    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(3, 6, 8, 10)));
+    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(4, 255, 255, 255, 255)));
+    // Should fail on lexicographically larger positions.
+    assertFalse(tracker.copy().tryReturnRecordAt(true, ofBytes(5, 0, 0)));
+    assertFalse(tracker.copy().tryReturnRecordAt(true, ofBytes(5, 0, 1)));
+    assertFalse(tracker.copy().tryReturnRecordAt(true, ofBytes(6, 0, 0)));
+  }
+
+  @Test
+  public void testTryReturnRecordWithNonSplitPoints() throws Exception {
+    GroupingShuffleRangeTracker tracker = new GroupingShuffleRangeTracker(
+        ofBytes(1, 0, 0), ofBytes(5, 0, 0));
+    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(1, 2, 3)));
+    assertTrue(tracker.tryReturnRecordAt(false, ofBytes(1, 2, 3)));
+    assertTrue(tracker.tryReturnRecordAt(false, ofBytes(1, 2, 3)));
+    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(1, 2, 5)));
+    assertTrue(tracker.tryReturnRecordAt(false, ofBytes(1, 2, 5)));
+    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(3, 6, 8, 10)));
+    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(4, 255, 255, 255, 255)));
+  }
+
+  @Test
+  public void testFirstRecordNonSplitPoint() throws Exception {
+    GroupingShuffleRangeTracker tracker = new GroupingShuffleRangeTracker(
+        ofBytes(3, 0, 0), ofBytes(5, 0, 0));
+    expected.expect(IllegalStateException.class);
+    tracker.tryReturnRecordAt(false, ofBytes(3, 4, 5));
+  }
+
+  @Test
+  public void testNonSplitPointRecordWithDifferentPosition() throws Exception {
+    GroupingShuffleRangeTracker tracker = new GroupingShuffleRangeTracker(
+        ofBytes(3, 0, 0), ofBytes(5, 0, 0));
+    tracker.tryReturnRecordAt(true, ofBytes(3, 4, 5));
+    expected.expect(IllegalStateException.class);
+    tracker.tryReturnRecordAt(false, ofBytes(3, 4, 6));
+  }
+
+  @Test
+  public void testTryReturnRecordBeforeStart() throws Exception {
+    GroupingShuffleRangeTracker tracker = new GroupingShuffleRangeTracker(
+        ofBytes(3, 0, 0), ofBytes(5, 0, 0));
+    expected.expect(IllegalStateException.class);
+    tracker.tryReturnRecordAt(true, ofBytes(1, 2, 3));
+  }
+
+  @Test
+  public void testTryReturnNonMonotonic() throws Exception {
+    GroupingShuffleRangeTracker tracker = new GroupingShuffleRangeTracker(
+        ofBytes(3, 0, 0), ofBytes(5, 0, 0));
+    tracker.tryReturnRecordAt(true, ofBytes(3, 4, 5));
+    tracker.tryReturnRecordAt(true, ofBytes(3, 4, 6));
+    expected.expect(IllegalStateException.class);
+    tracker.tryReturnRecordAt(true, ofBytes(3, 2, 1));
+  }
+
+  @Test
+  public void testTryReturnIdenticalPositions() throws Exception {
+    GroupingShuffleRangeTracker tracker = new GroupingShuffleRangeTracker(
+        ofBytes(3, 0, 0), ofBytes(5, 0, 0));
+    tracker.tryReturnRecordAt(true, ofBytes(3, 4, 5));
+    expected.expect(IllegalStateException.class);
+    tracker.tryReturnRecordAt(true, ofBytes(3, 4, 5));
+  }
+
+  @Test
+  public void testTrySplitAtPositionInfiniteRange() throws Exception {
+    GroupingShuffleRangeTracker tracker = new GroupingShuffleRangeTracker(null, null);
+    // Should fail before first record is returned.
+    assertFalse(tracker.trySplitAtPosition(ofBytes(3, 4, 5, 6)));
+
+    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(1, 2, 3)));
+
+    // Should now succeed.
+    assertTrue(tracker.trySplitAtPosition(ofBytes(3, 4, 5, 6)));
+    // Should not split at same or larger position.
+    assertFalse(tracker.trySplitAtPosition(ofBytes(3, 4, 5, 6)));
+    assertFalse(tracker.trySplitAtPosition(ofBytes(3, 4, 5, 6, 7)));
+    assertFalse(tracker.trySplitAtPosition(ofBytes(4, 5, 6, 7)));
+
+    // Should split at smaller position.
+    assertTrue(tracker.trySplitAtPosition(ofBytes(3, 2, 1)));
+
+    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(2, 3, 4)));
+
+    // Should not split at a position we're already past.
+    assertFalse(tracker.trySplitAtPosition(ofBytes(2, 3, 4)));
+    assertFalse(tracker.trySplitAtPosition(ofBytes(2, 3, 3)));
+
+    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(3, 2, 0)));
+    assertFalse(tracker.tryReturnRecordAt(true, ofBytes(3, 2, 1)));
+  }
+
+  @Test
+  public void testTrySplitAtPositionFiniteRange() throws Exception {
+    GroupingShuffleRangeTracker tracker = new GroupingShuffleRangeTracker(
+        ofBytes(0, 0, 0), ofBytes(10, 20, 30));
+    // Should fail before first record is returned.
+    assertFalse(tracker.trySplitAtPosition(ofBytes(0, 0, 0)));
+    assertFalse(tracker.trySplitAtPosition(ofBytes(3, 4, 5, 6)));
+
+    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(1, 2, 3)));
+
+    // Should now succeed.
+    assertTrue(tracker.trySplitAtPosition(ofBytes(3, 4, 5, 6)));
+    // Should not split at same or larger position.
+    assertFalse(tracker.trySplitAtPosition(ofBytes(3, 4, 5, 6)));
+    assertFalse(tracker.trySplitAtPosition(ofBytes(3, 4, 5, 6, 7)));
+    assertFalse(tracker.trySplitAtPosition(ofBytes(4, 5, 6, 7)));
+
+    // Should split at smaller position.
+    assertTrue(tracker.trySplitAtPosition(ofBytes(3, 2, 1)));
+    // But not at a position at or before last returned record.
+    assertFalse(tracker.trySplitAtPosition(ofBytes(1, 2, 3)));
+
+    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(2, 3, 4)));
+    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(3, 2, 0)));
+    assertFalse(tracker.tryReturnRecordAt(true, ofBytes(3, 2, 1)));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
index 825185bb31d39..e5a6b1ec5027b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
@@ -20,7 +20,6 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromSplitResult;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toDynamicSplitRequest;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -266,40 +265,6 @@ static byte[] fabricatePosition(int shard, @Nullable byte[] key) throws Exceptio
     return os.toByteArray();
   }
 
-  @Test
-  public void testReadFromEmptyShuffleDataAndRequestDynamicSplit() throws Exception {
-    BatchModeExecutionContext context = new BatchModeExecutionContext();
-    GroupingShuffleReader<Integer, Integer> groupingShuffleReader = new GroupingShuffleReader<>(
-        PipelineOptionsFactory.create(), null, null, null,
-        WindowedValue.getFullCoder(
-            KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())),
-            IntervalWindow.getCoder()),
-        context);
-    TestShuffleReader shuffleReader = new TestShuffleReader();
-    try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
-        groupingShuffleReader.iterator(shuffleReader)) {
-      // Can split, the source range spans the entire interval.
-      Position proposedSplitPosition = new Position();
-      String stop = encodeBase64URLSafeString(fabricatePosition(0, null));
-      proposedSplitPosition.setShufflePosition(stop);
-
-      Reader.DynamicSplitResult dynamicSplitResult =
-          iter.requestDynamicSplit(toDynamicSplitRequest(
-              createApproximateProgress(proposedSplitPosition)));
-      Reader.Position acceptedSplitPosition =
-          ((Reader.DynamicSplitResultWithPosition) dynamicSplitResult).getAcceptedPosition();
-      assertEquals(stop, toCloudPosition(acceptedSplitPosition).getShufflePosition());
-
-
-      // Cannot split at a position >= the current stop position
-      stop = encodeBase64URLSafeString(fabricatePosition(1, null));
-      proposedSplitPosition.setShufflePosition(stop);
-
-      assertNull(iter.requestDynamicSplit(toDynamicSplitRequest(
-          createApproximateProgress(proposedSplitPosition))));
-    }
-  }
-
   @Test
   public void testReadFromShuffleDataAndFailToSplit() throws Exception {
     BatchModeExecutionContext context = new BatchModeExecutionContext();
@@ -325,6 +290,9 @@ public void testReadFromShuffleDataAndFailToSplit() throws Exception {
 
     try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
         groupingShuffleReader.iterator(shuffleReader)) {
+      // Poke the iterator so we can test dynamic splitting.
+      assertTrue(iter.hasNext());
+
       // Cannot split since the value provided is past the current stop position.
       assertNull(iter.requestDynamicSplit(splitRequestAtPosition(
           makeShufflePosition(kNumRecords + 1, null))));
@@ -396,11 +364,15 @@ public void testReadFromShuffleAndDynamicSplit() throws Exception {
     int i = 0;
     try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
         groupingShuffleReader.iterator(shuffleReader)) {
+      // Poke the iterator so we can test dynamic splitting.
+      assertTrue(iter.hasNext());
+
       assertNull(iter.requestDynamicSplit(splitRequestAtPosition(new Position())));
 
       // Split at the shard boundary
       Reader.DynamicSplitResult dynamicSplitResult =
           iter.requestDynamicSplit(splitRequestAtPosition(makeShufflePosition(kSecondShard, null)));
+      assertNotNull(dynamicSplitResult);
       assertEquals(
           encodeBase64URLSafeString(fabricatePosition(kSecondShard, null)),
           positionFromSplitResult(dynamicSplitResult).getShufflePosition());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
index a79300dd72ed2..42dd1bf07470b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
@@ -135,16 +135,16 @@ public void testReadNoElementsFromStartToEndEmptyRange() throws Exception {
   @Test
   public void testDynamicSplit() throws Exception {
     List<Integer> elements = Arrays.asList(33, 44, 55, 66, 77, 88);
-    final long start = 1L;
-    final long stop = 3L;
-    final long end = 4L;
+    // Should initially read elements at indices: 44@1, 55@2, 66@3, 77@4
 
     Coder<Integer> coder = BigEndianIntegerCoder.of();
     InMemoryReader<Integer> inMemoryReader =
-        new InMemoryReader<>(encodedElements(elements, coder), start, end, coder);
+        new InMemoryReader<>(encodedElements(elements, coder), 1L, 4L, coder);
 
     // Illegal proposed split position.
     try (Reader.ReaderIterator<Integer> iterator = inMemoryReader.iterator()) {
+      // Poke the iterator so that we can test dynamic splitting.
+      assertTrue(iterator.hasNext());
       assertNull(iterator.requestDynamicSplit(toDynamicSplitRequest(new ApproximateProgress())));
       assertNull(iterator.requestDynamicSplit(splitRequestAtIndex(null)));
     }
@@ -152,10 +152,12 @@ public void testDynamicSplit() throws Exception {
     // Successful update.
     try (InMemoryReader<Integer>.InMemoryReaderIterator iterator =
         (InMemoryReader<Integer>.InMemoryReaderIterator) inMemoryReader.iterator()) {
-      Reader.DynamicSplitResult dynamicSplitResult = iterator.requestDynamicSplit(
-          splitRequestAtIndex(stop));
-      assertEquals(positionAtIndex(stop), positionFromSplitResult(dynamicSplitResult));
-      assertEquals(stop, iterator.endPosition);
+      // Poke the iterator so that we can test dynamic splitting.
+      assertTrue(iterator.hasNext());
+      Reader.DynamicSplitResult dynamicSplitResult =
+          iterator.requestDynamicSplit(splitRequestAtIndex(3L));
+      assertEquals(positionAtIndex(3L), positionFromSplitResult(dynamicSplitResult));
+      assertEquals(3, iterator.tracker.getStopPosition().longValue());
       assertEquals(44, iterator.next().intValue());
       assertEquals(55, iterator.next().intValue());
       assertFalse(iterator.hasNext());
@@ -164,18 +166,24 @@ public void testDynamicSplit() throws Exception {
     // Proposed split position is before the current position, no update.
     try (InMemoryReader<Integer>.InMemoryReaderIterator iterator =
         (InMemoryReader<Integer>.InMemoryReaderIterator) inMemoryReader.iterator()) {
+      // Poke the iterator so that we can test dynamic splitting.
+      assertTrue(iterator.hasNext());
       assertEquals(44, iterator.next().intValue());
       assertEquals(55, iterator.next().intValue());
-      assertNull(iterator.requestDynamicSplit(splitRequestAtIndex(stop)));
-      assertEquals((int) end, iterator.endPosition);
+      assertTrue(iterator.hasNext()); // Returns true => we promised to return 66.
+      // Now we have to refuse the split.
+      assertNull(iterator.requestDynamicSplit(splitRequestAtIndex(3L)));
+      assertEquals(4, iterator.tracker.getStopPosition().longValue());
       assertTrue(iterator.hasNext());
     }
 
     // Proposed split position is after the current stop (end) position, no update.
     try (InMemoryReader.InMemoryReaderIterator iterator =
         (InMemoryReader.InMemoryReaderIterator) inMemoryReader.iterator()) {
-      assertNull(iterator.requestDynamicSplit(splitRequestAtIndex(end + 1)));
-      assertEquals((int) end, iterator.endPosition);
+      // Poke the iterator so that we can test dynamic splitting.
+      assertTrue(iterator.hasNext());
+      assertNull(iterator.requestDynamicSplit(splitRequestAtIndex(5L)));
+      assertEquals(4, iterator.tracker.getStopPosition().longValue());
     }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
index 2976601912667..64ad259c02770 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
@@ -33,6 +33,8 @@
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.api.services.dataflow.model.Position;
 import com.google.cloud.dataflow.sdk.TestUtils;
+import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
 import com.google.cloud.dataflow.sdk.io.TextIO;
@@ -55,6 +57,7 @@
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.PrintStream;
 import java.nio.channels.Channels;
@@ -69,7 +72,9 @@
  */
 @RunWith(JUnit4.class)
 public class TextReaderTest {
-  private static final String[] fileContent = {"First line\n", "Second line\r\n", "Third line"};
+  private static final String[] fileContent = {
+      "<First line>\n", "<Second line>\r\n", "<Third line>"
+  };
   private static final long TOTAL_BYTES_COUNT;
 
   static {
@@ -86,6 +91,30 @@ public class TextReaderTest {
   @Rule
   public ExpectedException expectedException = ExpectedException.none();
 
+  /**
+   * A coder that verifies that all lines are of the form {@code <...>},
+   * to give further assurance that TextReader is never returning or even
+   * trying to decode partial lines, in the tests where this coder is used.
+   */
+  private static class WholeLineVerifyingCoder extends AtomicCoder<String> {
+    private static final long serialVersionUID = 0L;
+
+    @Override
+    public void encode(String value, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+      StringUtf8Coder.of().encode(value, outStream, context);
+    }
+
+    @Override
+    public String decode(InputStream inStream, Context context) throws CoderException, IOException {
+      String res = StringUtf8Coder.of().decode(inStream, context);
+      if (!res.trim().startsWith("<") || !res.trim().endsWith(">")) {
+        throw new CoderException("A partial line was passed to the coder by TextReader: " + res);
+      }
+      return res;
+    }
+  }
+
   private File initTestFile() throws IOException {
     File tmpFile = tmpFolder.newFile();
     try (FileOutputStream output = new FileOutputStream(tmpFile)) {
@@ -100,7 +129,7 @@ private File initTestFile() throws IOException {
   @Test
   public void testReadEmptyFile() throws Exception {
     TextReader<String> textReader = new TextReader<>(tmpFolder.newFile().getPath(), true, null,
-        null, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
+        null, new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
     try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
       assertFalse(iterator.hasNext());
     }
@@ -117,9 +146,11 @@ public void testStrippedNewlines() throws Exception {
   public void testStrippedNewlinesAtEndOfReadBuffer() throws Exception {
     boolean stripNewLines = true;
     StringBuilder payload = new StringBuilder();
-    for (int i = 0; i < TextReader.BUF_SIZE - 2; ++i) {
+    payload.append('<');
+    for (int i = 0; i < TextReader.BUF_SIZE - 4; ++i) {
       payload.append('a');
     }
+    payload.append('>');
     String[] lines = {payload.toString(), payload.toString()};
     testStringPayload(lines, "\r", stripNewLines);
     testStringPayload(lines, "\r\n", stripNewLines);
@@ -151,62 +182,62 @@ public void testStartPosition() throws Exception {
     File tmpFile = initTestFile();
 
     {
-      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 11L, null,
-          StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
+      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 13L, null,
+          new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
       try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
-        assertEquals("Second line\r\n", iterator.next());
-        assertEquals("Third line", iterator.next());
+        assertEquals("<Second line>\r\n", iterator.next());
+        assertEquals("<Third line>", iterator.next());
         assertFalse(iterator.hasNext());
         // The first '1' in the array represents the reading of '\n' between first and
         // second line, to confirm that we are reading from the beginning of a record.
-        assertEquals(Arrays.asList(1, 13, 10), observer.getActualSizes());
+        assertEquals(Arrays.asList(1, 15, 12), observer.getActualSizes());
       }
     }
 
     {
-      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 20L, null,
-          StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
+      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 24L, null,
+          new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
       try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
-        assertEquals("Third line", iterator.next());
+        assertEquals("<Third line>", iterator.next());
         assertFalse(iterator.hasNext());
         // The first '5' in the array represents the reading of a portion of the second
         // line, which had to be read to find the beginning of the third line.
-        assertEquals(Arrays.asList(5, 10), observer.getActualSizes());
+        assertEquals(Arrays.asList(5, 12), observer.getActualSizes());
       }
     }
 
     {
-      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), true, 0L, 20L,
-          StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
+      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), true, 0L, 22L,
+          new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
       try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
-        assertEquals("First line", iterator.next());
-        assertEquals("Second line", iterator.next());
+        assertEquals("<First line>", iterator.next());
+        assertEquals("<Second line>", iterator.next());
         assertFalse(iterator.hasNext());
-        assertEquals(Arrays.asList(11, 13), observer.getActualSizes());
+        assertEquals(Arrays.asList(13, 15), observer.getActualSizes());
       }
     }
 
     {
       TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), true, 1L, 20L,
-          StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
+          new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
       try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
-        assertEquals("Second line", iterator.next());
+        assertEquals("<Second line>", iterator.next());
         assertFalse(iterator.hasNext());
-        // The first '11' in the array represents the reading of the entire first
+        // The first '13' in the array represents the reading of the entire first
         // line, which had to be read to find the beginning of the second line.
-        assertEquals(Arrays.asList(11, 13), observer.getActualSizes());
+        assertEquals(Arrays.asList(13, 15), observer.getActualSizes());
       }
     }
   }
@@ -386,23 +417,23 @@ public void testNonStringCoders() throws Exception {
   public void testGetProgressNoEndOffset() throws Exception {
     File tmpFile = initTestFile();
     TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 0L, null,
-        StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
+        new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
 
     try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
       ApproximateProgress progress = readerProgressToCloudProgress(iterator.getProgress());
       assertEquals(0L, progress.getPosition().getByteOffset().longValue());
       iterator.next();
       progress = readerProgressToCloudProgress(iterator.getProgress());
-      assertEquals(11L, progress.getPosition().getByteOffset().longValue());
+      assertEquals(13L, progress.getPosition().getByteOffset().longValue());
       iterator.next();
       progress = readerProgressToCloudProgress(iterator.getProgress());
-      assertEquals(24L, progress.getPosition().getByteOffset().longValue());
+      assertEquals(28L, progress.getPosition().getByteOffset().longValue());
       // Since end position is not specified, percentComplete should be null.
       assertNull(progress.getPercentComplete());
 
       iterator.next();
       progress = readerProgressToCloudProgress(iterator.getProgress());
-      assertEquals(34L, progress.getPosition().getByteOffset().longValue());
+      assertEquals(40L, progress.getPosition().getByteOffset().longValue());
       assertFalse(iterator.hasNext());
     }
   }
@@ -411,17 +442,18 @@ public void testGetProgressNoEndOffset() throws Exception {
   public void testGetProgressWithEndOffset() throws Exception {
     File tmpFile = initTestFile();
     TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 0L, 40L,
-        StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
+        new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
 
     try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
-      ApproximateProgress progress = readerProgressToCloudProgress(iterator.getProgress());
       iterator.next();
-      progress = readerProgressToCloudProgress(iterator.getProgress());
-      assertEquals(1.0f * 11 / 40, progress.getPercentComplete(), 1e-6);
+      ApproximateProgress progress = readerProgressToCloudProgress(iterator.getProgress());
+      // Returned a record that starts at position 0 of 40 - 1/40 fraction consumed.
+      assertEquals(1.0f / 40, progress.getPercentComplete(), 1e-6);
       iterator.next();
       iterator.next();
       progress = readerProgressToCloudProgress(iterator.getProgress());
-      assertEquals(1.0f * 34 / 40, progress.getPercentComplete(), 1e-6);
+      // Returned a record that starts at position 28 - 29/40 consumed.
+      assertEquals(1.0f * 29 / 40, progress.getPercentComplete(), 1e-6);
       assertFalse(iterator.hasNext());
     }
   }
@@ -431,33 +463,42 @@ public void testUpdateStopPosition() throws Exception {
     final long end = 10L; // in the first line
     final long stop = 14L; // in the middle of the second line
     File tmpFile = initTestFile();
+    long fileSize = tmpFile.length();
 
     // Illegal proposed stop position, no update.
     {
-      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, null, null,
-          StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
+      TextReader<String> textReader = new TextReader<>(
+          tmpFile.getPath(), false, 0L, fileSize,
+          new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
 
       try (TextReader<String>.TextFileIterator iterator =
           (TextReader<String>.TextFileIterator) textReader.iterator()) {
+        // Poke the iterator so we can test dynamic splitting.
+        assertTrue(iterator.hasNext());
+
         assertNull(iterator.requestDynamicSplit(splitRequestAtPosition(new Position())));
       }
     }
 
     // Successful update.
     {
-      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, null, null,
-          StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
+      TextReader<String> textReader = new TextReader<>(
+          tmpFile.getPath(), false, 0L, fileSize,
+          new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
       try (TextReader<String>.TextFileIterator iterator =
           (TextReader<String>.TextFileIterator) textReader.iterator()) {
-        assertNull(iterator.getEndOffset());
+        // Poke the iterator so we can test dynamic splitting.
+        assertTrue(iterator.hasNext());
+
+        assertEquals(fileSize, iterator.getEndOffset());
         assertEquals(
             Long.valueOf(stop),
             positionFromSplitResult(iterator.requestDynamicSplit(splitRequestAtByteOffset(stop)))
                 .getByteOffset());
-        assertEquals(stop, iterator.getEndOffset().longValue());
+        assertEquals(stop, iterator.getEndOffset());
         assertEquals(fileContent[0], iterator.next());
         assertEquals(fileContent[1], iterator.next());
         assertFalse(iterator.hasNext());
@@ -469,8 +510,9 @@ public void testUpdateStopPosition() throws Exception {
 
     // Proposed stop position is before the current position, no update.
     {
-      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, null, null,
-          StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
+      TextReader<String> textReader = new TextReader<>(
+          tmpFile.getPath(), false, 0L, fileSize,
+          new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
@@ -481,8 +523,10 @@ public void testUpdateStopPosition() throws Exception {
         assertThat(
             readerProgressToCloudProgress(iterator.getProgress()).getPosition().getByteOffset(),
             greaterThan(stop));
+        assertTrue(iterator.hasNext());
+        // The iterator just promised to return the next record, which is beyond "stop".
         assertNull(iterator.requestDynamicSplit(splitRequestAtByteOffset(stop)));
-        assertNull(iterator.getEndOffset());
+        assertEquals(fileSize, iterator.getEndOffset());
         assertTrue(iterator.hasNext());
         assertEquals(fileContent[2], iterator.next());
         assertEquals(
@@ -495,15 +539,16 @@ public void testUpdateStopPosition() throws Exception {
     // Proposed stop position is after the current stop (end) position, no update.
     {
       TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, null, end,
-          StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
+          new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
       try (TextReader<String>.TextFileIterator iterator =
           (TextReader<String>.TextFileIterator) textReader.iterator()) {
+        assertTrue(iterator.hasNext());
         assertEquals(fileContent[0], iterator.next());
         assertNull(iterator.requestDynamicSplit(splitRequestAtByteOffset(stop)));
-        assertEquals(end, iterator.getEndOffset().longValue());
+        assertEquals(end, iterator.getEndOffset());
         assertFalse(iterator.hasNext());
         assertEquals(Arrays.asList(fileContent[0].length()), observer.getActualSizes());
       }
@@ -539,7 +584,7 @@ private void stopPositionTestInternal(
 
     // Read from source without split attempts.
     TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, startOffset,
-        endOffset, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
+        endOffset, new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
 
     try (TextReader<String>.TextFileIterator iterator =
         (TextReader<String>.TextFileIterator) textReader.iterator()) {
@@ -551,7 +596,7 @@ private void stopPositionTestInternal(
 
     // Read the first half of the split.
     textReader = new TextReader<>(tmpFile.getPath(), false, startOffset, stopOffset,
-        StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
+        new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
     accumulatedRead = new StringBuilder();
 
     try (TextReader<String>.TextFileIterator iterator =
@@ -564,7 +609,7 @@ private void stopPositionTestInternal(
 
     // Read the second half of the split.
     textReader = new TextReader<>(tmpFile.getPath(), false, stopOffset, endOffset,
-        StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
+        new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
     accumulatedRead = new StringBuilder();
 
     try (TextReader<String>.TextFileIterator iterator =
@@ -619,7 +664,7 @@ private void testCompressionTypeHelper(String[] lines, String filename,
     }
 
     TextReader<String> textReader = new TextReader<>(
-        tmpFile.getPath(), true, null, null, StringUtf8Coder.of(), inputCompressionType);
+        tmpFile.getPath(), true, null, null, new WholeLineVerifyingCoder(), inputCompressionType);
 
     List<String> actual = new ArrayList<>();
     try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
@@ -633,7 +678,7 @@ private void testCompressionTypeHelper(String[] lines, String filename,
 
   @Test
   public void testCompressionTypeOneFile() throws IOException {
-    String[] contents = {"Miserable pigeon", "Vulnerable sparrow", "Brazen crow"};
+    String[] contents = {"<Miserable pigeon>", "<Vulnerable sparrow>", "<Brazen crow>"};
     // test AUTO compression type with different extensions
     testCompressionTypeHelper(contents, "test.gz", CompressionType.GZIP, CompressionType.AUTO);
     testCompressionTypeHelper(contents, "test.bz2", CompressionType.BZIP2, CompressionType.AUTO);
@@ -650,8 +695,9 @@ public void testCompressionTypeOneFile() throws IOException {
   @Test
   public void testCompressionTypeFileGlob() throws IOException {
     String[][] contents = {
-        {"Miserable pigeon", "Vulnerable sparrow", "Brazen crow"}, {"Timid osprey", "Lazy vulture"},
-        {"Erratic finch", "Impressible parakeet"},
+        {"<Miserable pigeon>", "<Vulnerable sparrow>", "<Brazen crow>"},
+        {"<Timid osprey>", "<Lazy vulture>"},
+        {"<Erratic finch>", "<Impressible parakeet>"},
     };
     File[] files = {
         createFileWithCompressionType(contents[0], "test.gz", CompressionType.GZIP),
@@ -669,7 +715,8 @@ public void testCompressionTypeFileGlob() throws IOException {
     String path = tmpFolder.getRoot().getPath() + System.getProperty("file.separator") + "*";
 
     TextReader<String> textReader =
-        new TextReader<>(path, true, null, null, StringUtf8Coder.of(), CompressionType.AUTO);
+        new TextReader<>(path, true, null, null, new WholeLineVerifyingCoder(),
+                         CompressionType.AUTO);
 
     List<String> actual = new ArrayList<>();
     try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
@@ -688,7 +735,7 @@ public void testErrorOnFileNotFound() throws Exception {
     expectedException.expect(FileNotFoundException.class);
     TextReader<String> textReader = new TextReader<>(
         "file-not-found", true, 0L, 100L,
-        StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
+        new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
     textReader.iterator().close();
   }
 
@@ -700,7 +747,7 @@ public void testErrorOnMultipleFiles() throws Exception {
     Channels.newOutputStream(IOChannelUtils.create(file2.getPath(), MimeTypes.BINARY)).close();
     TextReader<String> textReader = new TextReader<>(
         new File(tmpFolder.getRoot(), "*").getPath(), true, 0L, 100L,
-        StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
+        new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
     expectedException.expect(IllegalArgumentException.class);
     expectedException.expectMessage("more than 1 file matched");
     textReader.iterator().close();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
index 44ac00a26abd9..63f88633aaf83 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
@@ -171,7 +171,6 @@ public void close() {
    * against the gold standard during testing.
    */
   public static class TestReaderObserver implements Observer {
-    private final Reader reader;
     private final List<Integer> sizes;
 
     public TestReaderObserver(Reader reader) {
@@ -179,7 +178,6 @@ public TestReaderObserver(Reader reader) {
     }
 
     public TestReaderObserver(Reader reader, List<Integer> sizes) {
-      this.reader = reader;
       this.sizes = sizes;
       reader.addObserver(this);
     }

From 90c666dee7ddba4cd4417d79dd5542d6e3d670ed Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Mon, 6 Jul 2015 10:39:53 -0700
Subject: [PATCH 0703/1541] Makes dynamic splits non-blocking

Removes synchronization between the reader loop and
ReaderIterator.requestDynamicSplit, so that slow record reads
cannot delay processing of work item status update responses
that include a dynamic split recommendation.

Progress reporting is still serial (but still non-blocking!)
so that users who do not implement dynamic splitting do not
have to worry about thread safety.

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97590910
---
 .../sdk/util/common/worker/ReadOperation.java | 43 ++++++-----------
 .../util/common/worker/ReadOperationTest.java | 48 ++++++++++++++-----
 2 files changed, 52 insertions(+), 39 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index c0b20c908fd62..16634c6f52bc7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -52,7 +52,6 @@ public class ReadOperation extends Operation {
    * Guarded by sourceIteratorLock.
    */
   volatile Reader.ReaderIterator<?> readerIterator = null;
-  private final Object sourceIteratorLock = new Object();
 
   /**
    * A cache of sourceIterator.getProgress() updated inside the read loop at a bounded rate.
@@ -133,7 +132,7 @@ protected void runReadLoop() throws Exception {
 
     try (StateSampler.ScopedState process = stateSampler.scopedState(processState)) {
       assert process != null;
-      synchronized (sourceIteratorLock) {
+      synchronized (initializationStateLock) {
         readerIterator = reader.iterator();
       }
 
@@ -158,32 +157,22 @@ public void run() {
 
       try {
         // Force a progress update at the beginning and at the end.
-        synchronized (sourceIteratorLock) {
-          setProgressFromIterator();
-        }
+        setProgressFromIterator();
         while (true) {
           Object value;
-          // Stop position update request comes concurrently.
-          // Accesses to iterator need to be synchronized.
-          synchronized (sourceIteratorLock) {
-            if (!readerIterator.hasNext()) {
-              break;
-            }
-            value = readerIterator.next();
+          if (!readerIterator.hasNext()) {
+            break;
+          }
+          value = readerIterator.next();
 
-            if (isProgressUpdateRequested.getAndSet(false) || progressUpdatePeriodMs == 0) {
-              setProgressFromIterator();
-            }
+          if (isProgressUpdateRequested.getAndSet(false) || progressUpdatePeriodMs == 0) {
+            setProgressFromIterator();
           }
           receiver.process(value);
         }
-        synchronized (sourceIteratorLock) {
-          setProgressFromIterator();
-        }
+        setProgressFromIterator();
       } finally {
-        synchronized (sourceIteratorLock) {
-          readerIterator.close();
-        }
+        readerIterator.close();
         if (progressUpdatePeriodMs != 0) {
           updateRequester.interrupt();
           updateRequester.join();
@@ -223,14 +212,12 @@ public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest
         LOG.warn("Iterator is in the Finished state, returning null stop position.");
         return null;
       }
-      synchronized (sourceIteratorLock) {
-        if (readerIterator == null) {
-          LOG.warn("Iterator has not been initialized, refusing to split at {}",
-              splitRequest);
-          return null;
-        }
-        return readerIterator.requestDynamicSplit(splitRequest);
+      if (readerIterator == null) {
+        LOG.warn("Iterator has not been initialized, refusing to split at {}",
+            splitRequest);
+        return null;
       }
+      return readerIterator.requestDynamicSplit(splitRequest);
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
index bd93056d8ca70..11e592b6109b4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -37,6 +37,7 @@
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.api.services.dataflow.model.Position;
+import com.google.cloud.dataflow.sdk.io.range.OffsetRangeTracker;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReader;
@@ -186,6 +187,33 @@ public void testDynamicSplit() throws Exception {
     assertNull(readOperation.requestDynamicSplit(splitRequestAtIndex(5L)));
   }
 
+  @Test
+  public void testDynamicSplitDoesNotBlock() throws Exception {
+    MockReaderIterator iterator = new MockReaderIterator(0, 10);
+    CounterSet counterSet = new CounterSet();
+    MockOutputReceiver receiver = new MockOutputReceiver(counterSet.getAddCounterMutator());
+    ReadOperation readOperation = new ReadOperation(new MockReader(iterator), receiver, "test-",
+        counterSet.getAddCounterMutator(),
+        new StateSampler("test-", counterSet.getAddCounterMutator()));
+
+    Thread thread = runReadLoopInThread(readOperation);
+    iterator.offerNext(0);
+    receiver.unblockProcess();
+    // Read loop is blocked in next(). Do not offer another next item,
+    // but check that we can still split while the read loop is blocked.
+    Reader.DynamicSplitResultWithPosition split = (Reader.DynamicSplitResultWithPosition)
+        readOperation.requestDynamicSplit(splitRequestAtIndex(5L));
+    assertNotNull(split);
+    assertEquals(positionAtIndex(5L), toCloudPosition(split.getAcceptedPosition()));
+
+    for (int i = 1; i < 5; ++i) {
+      iterator.offerNext(i);
+      receiver.unblockProcess();
+    }
+
+    thread.join();
+  }
+
   private Thread runReadLoopInThread(final ReadOperation readOperation) {
     Thread thread = new Thread() {
       @Override
@@ -202,23 +230,23 @@ public void run() {
     return thread;
   }
 
-  private static class MockReaderIterator extends Reader.AbstractReaderIterator<Integer> {
-    private int to;
+  private static class MockReaderIterator extends AbstractBoundedReaderIterator<Integer> {
+    private final OffsetRangeTracker tracker;
     private Exchanger<Integer> exchanger = new Exchanger<>();
     private int current;
 
     public MockReaderIterator(int from, int to) {
+      this.tracker = new OffsetRangeTracker(from, to);
       this.current = from;
-      this.to = to;
     }
 
     @Override
-    public boolean hasNext() throws IOException {
-      return current < to;
+    protected boolean hasNextImpl() throws IOException {
+      return tracker.tryReturnRecordAt(true, current);
     }
 
     @Override
-    public Integer next() throws IOException {
+    protected Integer nextImpl() throws IOException {
       ++current;
       try {
         return exchanger.exchange(current);
@@ -238,13 +266,11 @@ public Reader.DynamicSplitResult requestDynamicSplit(
         Reader.DynamicSplitRequest splitRequest) {
       ApproximateProgress progress = splitRequestToApproximateProgress(splitRequest);
       int index = progress.getPosition().getRecordIndex().intValue();
-      if (index >= to) {
+      if (!tracker.trySplitAtPosition(index)) {
         return null;
-      } else {
-        this.to = index;
-        return new Reader.DynamicSplitResultWithPosition(
-            cloudPositionToReaderPosition(progress.getPosition()));
       }
+      return new Reader.DynamicSplitResultWithPosition(
+          cloudPositionToReaderPosition(progress.getPosition()));
     }
 
     public int offerNext(int next) {

From 967b0e0630532966e67ab9badf59d6e1d54f4bc8 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 6 Jul 2015 13:03:31 -0700
Subject: [PATCH 0704/1541] Windmill API changes for supporting explicit record
 ids.

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97604480
---
 sdk/src/main/proto/windmill.proto | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index 7c9b868d652c1..37326ba60b75b 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -54,6 +54,7 @@ message InputMessageBundle {
 message KeyedMessageBundle {
   required bytes key = 1;
   repeated Message messages = 2;
+  repeated bytes messages_ids = 3;
 }
 
 message OutputMessageBundle {

From 8f242911b9a72dc2ca422063c57cb2e764cb17af Mon Sep 17 00:00:00 2001
From: laraschmidt <laraschmidt@google.com>
Date: Mon, 6 Jul 2015 17:40:18 -0700
Subject: [PATCH 0705/1541] Adding in a utility class to help with gcs retries

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97629234
---
 .../cloud/dataflow/sdk/util/GcsUtil.java      |  57 ++++--
 .../dataflow/sdk/util/ResilientOperation.java | 169 ++++++++++++++++++
 .../sdk/util/RetryBoundedBackOff.java         |  66 +++++++
 .../dataflow/sdk/util/RetryDeterminer.java    |  93 ++++++++++
 .../gcsio/GoogleCloudStorageReadChannel.java  |  43 +++--
 .../cloud/dataflow/sdk/util/GcsUtilTest.java  |  29 +++
 6 files changed, 424 insertions(+), 33 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ResilientOperation.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryBoundedBackOff.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryDeterminer.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
index d25318f762441..2877070bec629 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
@@ -16,7 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.api.client.util.BackOff;
 import com.google.api.client.util.Preconditions;
+import com.google.api.client.util.Sleeper;
 import com.google.api.services.storage.Storage;
 import com.google.api.services.storage.model.Objects;
 import com.google.api.services.storage.model.StorageObject;
@@ -26,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.cloud.dataflow.sdk.util.gcsio.GoogleCloudStorageReadChannel;
 import com.google.cloud.dataflow.sdk.util.gcsio.GoogleCloudStorageWriteChannel;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ImmutableList;
 
 import org.slf4j.Logger;
@@ -117,9 +120,9 @@ protected void setStorageClient(Storage storage) {
   }
 
   /**
-   * Expands a pattern into matched paths. The pattern path may contain globs, which are expanded in
-   * the result. This function may return non-existent files so this should not be used to validate
-   * the existence of files in GCS.
+   * Expands a pattern into matched paths. The pattern path may contain globs, which are expanded
+   * in the result. This function may return non-existent files so this should not be used to
+   * validate the existence of files in GCS.
    */
   public List<GcsPath> expand(GcsPath gcsPattern) throws IOException {
     Preconditions.checkArgument(isGcsPatternSupported(gcsPattern.getObject()));
@@ -152,7 +155,18 @@ public List<GcsPath> expand(GcsPath gcsPattern) throws IOException {
         listObject.setPageToken(pageToken);
       }
 
-      Objects objects = listObject.execute();
+      Objects objects;
+      try {
+        objects = ResilientOperation.retry(
+            ResilientOperation.getGoogleRequestCallable(listObject),
+            new AttemptBoundedExponentialBackOff(3, 200),
+            RetryDeterminer.SOCKET_ERRORS,
+            IOException.class);
+      } catch (Exception e) {
+        throw new IOException("Unable to match files in bucket " + gcsPattern.getBucket()
+            +  ", prefix " + prefix + " against pattern " + p.toString(), e);
+      }
+      //Objects objects = listObject.execute();
       Preconditions.checkNotNull(objects);
 
       if (objects.getItems() == null) {
@@ -180,20 +194,31 @@ public List<GcsPath> expand(GcsPath gcsPattern) throws IOException {
    * if the resource does not exist.
    */
   public long fileSize(GcsPath path) throws IOException {
-    try {
+    return fileSize(path, new AttemptBoundedExponentialBackOff(4, 200), Sleeper.DEFAULT);
+  }
+
+  /**
+   * Returns the file size from GCS or throws {@link NoSuchFileException}
+   * if the resource does not exist.
+   */
+  @VisibleForTesting
+  long fileSize(GcsPath path, BackOff backoff, Sleeper sleeper) throws IOException {
       Storage.Objects.Get getObject =
           storage.objects().get(path.getBucket(), path.getObject());
-
-      StorageObject object = getObject.execute();
-      return object.getSize().longValue();
-    } catch (IOException e) {
-      if (errorExtractor.itemNotFound(e)) {
-        throw new NoSuchFileException(path.toString());
-      }
-
-      // Re-throw any other error.
-      throw e;
-    }
+      try {
+        StorageObject object = ResilientOperation.retry(
+            ResilientOperation.getGoogleRequestCallable(getObject),
+            backoff,
+            RetryDeterminer.SOCKET_ERRORS,
+            IOException.class,
+            sleeper);
+        return object.getSize().longValue();
+      } catch (Exception e) {
+        if (e instanceof IOException && errorExtractor.itemNotFound((IOException) e)) {
+          throw new NoSuchFileException(path.toString());
+        }
+        throw new IOException("Unable to get file size", e);
+     }
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ResilientOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ResilientOperation.java
new file mode 100644
index 0000000000000..2b9a03a5fbe2b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ResilientOperation.java
@@ -0,0 +1,169 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
+import com.google.api.client.util.BackOff;
+import com.google.api.client.util.Preconditions;
+import com.google.api.client.util.Sleeper;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.concurrent.Callable;
+
+/**
+ * A class which defines static functions to be called to make a user-provided function more
+ * resilient by attempting retries.
+ */
+public class ResilientOperation {
+  // Logger.
+  private static final Logger LOG = LoggerFactory.getLogger(ResilientOperation.class);
+
+  /**
+   * Retries the given executable function in the case of transient errors defined by the
+   * RetryDeterminer.
+   * <p>
+   * Does not support unchecked exceptions that are not instances of RuntimeException.
+   *
+   * @param callable CheckedCallable to retry execution of
+   * @param backoff BackOff to determine how long to sleep for
+   * @param retryDet RetryDeterminer to determine when to retry
+   * @param classType class type of X
+   * @param sleeper Used to sleep
+   * @param <T> Type of object returned by the call.
+   * @param <X> Type of exception thrown by the call.
+   * @throws X What is thrown from the executable or the RetryDeterminer
+   * @throws InterruptedException - Exception thrown from sleep
+   */
+ @SuppressWarnings("unchecked")
+  public static <T, X extends Exception> T retry(CheckedCallable<T, X> callable, BackOff backoff,
+      RetryDeterminer<? super X> retryDet, Class<X> classType, Sleeper sleeper)
+      throws X, InterruptedException {
+    Preconditions.checkNotNull(backoff, "Must provide a non-null BackOff.");
+    Preconditions.checkNotNull(retryDet, "Must provide a non-null RetryDeterminer.");
+    Preconditions.checkNotNull(sleeper, "Must provide a non-null Sleeper.");
+    Preconditions.checkNotNull(callable, "Must provide a non-null Execitable object.");
+
+    X currentException = null;
+    do {
+      try {
+        return callable.call();
+      } catch (Exception e) {
+        if (classType.isInstance(e)) {  // e is something that extends X
+          currentException = (X) e;
+          if (!retryDet.shouldRetry(currentException)) {
+            throw currentException;
+          }
+        } else {
+          if (e instanceof RuntimeException) {
+            throw (RuntimeException) e;
+          }
+          LOG.warn("Retrying with unchecked exceptions that are not"
+              + " RuntimeExceptions is not supported.");
+          throw new RuntimeException("Retrying with unchecked exceptions that are not"
+              + " RuntimeExceptions is not supported.", e);
+        }
+      }
+    } while (nextSleep(backoff, sleeper, currentException));
+    LOG.warn("Attempted retries failed.");
+    throw currentException;
+  }
+
+  /**
+   * Retries the given executable function in the case of transient errors defined by the
+   * RetryDeterminer and uses default sleeper.
+   *
+   * @param callable CheckedCallable to retry execution of
+   * @param backoff BackOff to determine how long to sleep for
+   * @param retryDet RetryDeterminer to determine when to retry
+   * @param classType class type of X
+   * @param <T> Type of object returned by the call.
+   * @param <X> Type of exception thrown by the call.
+   * @throws X What is thrown from the executable or the RetryDeterminer
+   * @throws InterruptedException - Exception thrown from sleep
+   */
+  public static <T, X extends Exception> T retry(CheckedCallable<T, X> callable, BackOff backoff,
+      RetryDeterminer<? super X> retryDet, Class<X> classType) throws X, InterruptedException {
+    return retry(callable, backoff, retryDet, classType, Sleeper.DEFAULT);
+  }
+
+  /**
+   * Determines the amount to sleep for and sleeps if needed.
+   *
+   * @param backoff BackOff to determine how long to sleep for
+   * @param sleeper Used to sleep
+   * @param currentException exception that caused the retry and sleep. For logging.
+   * @throws InterruptedException if sleep is interrupted
+   */
+  private static boolean nextSleep(BackOff backoff, Sleeper sleeper, Exception currentException)
+      throws InterruptedException {
+    long backOffTime = 0;
+    try {
+      backOffTime = backoff.nextBackOffMillis();
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+    if (backOffTime == BackOff.STOP) {
+      return false;
+    }
+    LOG.info("Transient exception caught. Sleeping for " + backOffTime + ", then retrying."
+        + currentException);
+    sleeper.sleep(backOffTime);
+    return true;
+  }
+
+  /**
+   * Interface that allows a call that can throw an exception X.
+   * @param <T> Type of object returned by the call.
+   * @param <X> Type of exception thrown by the call.
+   */
+  // TODO: Replace with Guava's CheckedCallable when not in beta.
+  public interface CheckedCallable<T, X extends Exception> extends Callable<T> {
+    @Override
+    T call() throws X;
+  }
+
+  /**
+   * Returns a {@link CheckedCallable} that returns encompasses a
+   * {@link AbstractGoogleClientRequest} and can be used to retry the execute for a
+   * AbstractGoogleClientReqest.
+   *
+   * @param request The AbstractGoogleClientRequest to turn into a {@link CheckedCallable}.
+   * @return a CheckedCallable object that attempts a AbstractGoogleClientRequest
+   */
+  public static <V> CheckedCallable<V, IOException> getGoogleRequestCallable(
+      AbstractGoogleClientRequest<V> request){
+    return new AbstractGoogleClientRequestExecutor<V>(request);
+  }
+
+  /**
+   * Simple class to create a {@link CheckedCallable} from a {@link AbstractGoogleClientRequest}.
+   */
+  private static class AbstractGoogleClientRequestExecutor<T>
+      implements CheckedCallable<T, IOException> {
+    AbstractGoogleClientRequest<T> request;
+    private AbstractGoogleClientRequestExecutor (AbstractGoogleClientRequest<T> request) {
+      this.request = request;
+    }
+    @Override
+    public T call() throws IOException {
+      return request.execute();
+    }
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryBoundedBackOff.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryBoundedBackOff.java
new file mode 100644
index 0000000000000..a4392e535e524
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryBoundedBackOff.java
@@ -0,0 +1,66 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.util.BackOff;
+import com.google.api.client.util.Preconditions;
+
+import java.io.IOException;
+
+import javax.annotation.concurrent.NotThreadSafe;
+
+/**
+ * An implementation of {@link BackOff} that limits the number of calls on another {@link BackOff}.
+ * This class will call the same methods of another BackOff until the maximum number of
+ * retries are reached, and then it will return {@codeBackOff.STOP}.
+ */
+@NotThreadSafe
+public class RetryBoundedBackOff implements BackOff {
+  private int retriesAttempted = 0;
+  private final int maxRetries;
+  private BackOff backoff;
+
+  /**
+   * RetryBoundedBackOff takes a {@link BackOff} and limits the retries.
+   *
+   * @param maxRetries Number of retries to attempt. Must be greater or equal to 0.
+   * @param backoff The underlying {@link BackOff} to use.
+   */
+  public RetryBoundedBackOff(int maxRetries, BackOff backoff) {
+    Preconditions.checkArgument(maxRetries >= 0,
+        "Maximum number of retries must not be less than 0.");
+    this.backoff = backoff;
+    this.maxRetries = maxRetries;
+  }
+
+  @Override
+  public void reset() throws IOException {
+    backoff.reset();
+    retriesAttempted = 0;
+  }
+
+  @Override
+  public long nextBackOffMillis() throws IOException {
+    if (retriesAttempted >= maxRetries) {
+      return BackOff.STOP;
+    }
+    long next = backoff.nextBackOffMillis();
+    if (next == BackOff.STOP) {
+      return BackOff.STOP;
+    }
+    retriesAttempted++;
+    return next;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryDeterminer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryDeterminer.java
new file mode 100644
index 0000000000000..051df6fd5785d
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryDeterminer.java
@@ -0,0 +1,93 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.http.HttpResponseException;
+
+import java.io.IOException;
+import java.net.SocketException;
+import java.net.SocketTimeoutException;
+
+import javax.net.ssl.SSLException;
+
+/**
+ * This abstract class is designed to tell if an exception is transient and should result in a
+ * retry or not, and should result in a returned exception to the caller. Meant to be used with
+ * a {@link ResilientOperation}.
+ *
+ * @param <X> The type of exception you are checking and could possibly return.
+ */
+public abstract class RetryDeterminer<X extends Exception> {
+  /**
+   *  Retries when either SOCKET_ERRORS or SERVER_ERRORS would retry.
+   */
+  public static final RetryDeterminer<Exception> DEFAULT = new RetryDeterminer<Exception>() {
+    @Override
+    public boolean shouldRetry(Exception e) {
+      if (e instanceof IOException) {
+        return SOCKET_ERRORS.shouldRetry((IOException) e)
+            || SERVER_ERRORS.shouldRetry((IOException) e);
+      }
+      return false;
+    }
+  };
+
+  /**
+   * Socket errors retry determiner retries on socket exceptions and ssl exceptions. Note:
+   * Assumes that the new SSL connection would be re-established inside the retry. If this is not
+   * true, then retrying after a failed SSL connection would not help.
+   */
+  public static final RetryDeterminer<IOException> SOCKET_ERRORS =
+      new RetryDeterminer<IOException>() {
+    @Override
+    public boolean shouldRetry(IOException e) {
+      /* Assumes that the ssl connection happens within the retry. This is true for the {@link
+       * AbstractGoogleClientRequest} execute functions. SocketTimeoutExceptions are thrown only
+       * for timeouts and it is safe to retry on them even if connect isn't new. We want to pass
+       * any other exceptions back up including InterruptedExceptions.
+       */
+      return e instanceof SSLException || e instanceof SocketException
+          || e instanceof SocketTimeoutException;
+    }
+  };
+
+  /**
+   *  Server errors RetryDeterminer decides to retry on HttpResponseExceptions that return a 500.
+   */
+  public static final RetryDeterminer<IOException> SERVER_ERRORS =
+      new RetryDeterminer<IOException>() {
+    @Override
+    public boolean shouldRetry(IOException e) {
+      if (e instanceof HttpResponseException) {
+        HttpResponseException httpException = (HttpResponseException) e;
+        // TODO: Find what we should do for 500 codes that are not always transient.
+        return httpException.getStatusCode() / 100 == 5;
+      }
+      return false;
+    }
+  };
+
+  /**
+   * Determines if we should attempt a retry depending on the caught exception.
+   * <p>
+   * To indicate that no retry should be made, return false. If no retry,
+   * the exception should be returned to the user.
+   *
+   * @param e Exception of type X that can be examined to determine if a retry is possible.
+   * @return true if should retry, false otherwise
+   */
+  public abstract boolean shouldRetry(X e);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
index 2e6c1000ba5f4..335e74e79d715 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
@@ -26,6 +26,9 @@
 import com.google.api.services.storage.Storage;
 import com.google.api.services.storage.model.StorageObject;
 import com.google.cloud.dataflow.sdk.util.ApiErrorExtractor;
+import com.google.cloud.dataflow.sdk.util.ResilientOperation;
+import com.google.cloud.dataflow.sdk.util.RetryBoundedBackOff;
+import com.google.cloud.dataflow.sdk.util.RetryDeterminer;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -208,17 +211,23 @@ BackOff getBackOff() {
   }
 
   /**
-   * Helper for initializing the BackOff used for retries.
+   * Helper for reseting the BackOff used for retries. If no backoff is given, a generic
+   * one is initialized.
    */
-  private BackOff createBackOff() {
-    return new ExponentialBackOff.Builder()
-        .setInitialIntervalMillis(DEFAULT_BACKOFF_INITIAL_INTERVAL_MILLIS)
-        .setRandomizationFactor(DEFAULT_BACKOFF_RANDOMIZATION_FACTOR)
-        .setMultiplier(DEFAULT_BACKOFF_MULTIPLIER)
-        .setMaxIntervalMillis(DEFAULT_BACKOFF_MAX_INTERVAL_MILLIS)
-        .setMaxElapsedTimeMillis(DEFAULT_BACKOFF_MAX_ELAPSED_TIME_MILLIS)
-        .setNanoClock(clock)
-        .build();
+  private BackOff resetOrCreateBackOff() throws IOException{
+    if (backOff != null){
+      backOff.reset();
+    } else {
+      backOff = new ExponentialBackOff.Builder()
+          .setInitialIntervalMillis(DEFAULT_BACKOFF_INITIAL_INTERVAL_MILLIS)
+          .setRandomizationFactor(DEFAULT_BACKOFF_RANDOMIZATION_FACTOR)
+          .setMultiplier(DEFAULT_BACKOFF_MULTIPLIER)
+          .setMaxIntervalMillis(DEFAULT_BACKOFF_MAX_INTERVAL_MILLIS)
+          .setMaxElapsedTimeMillis(DEFAULT_BACKOFF_MAX_ELAPSED_TIME_MILLIS)
+          .setNanoClock(clock)
+          .build();
+    }
+    return backOff;
   }
 
   /**
@@ -292,11 +301,7 @@ public int read(ByteBuffer buffer)
           if (retriesAttempted == 0) {
             // If this is the first of a series of retries, we also want to reset the backOff
             // to have fresh initial values.
-            if (backOff == null) {
-              backOff = createBackOff();
-            } else {
-              backOff.reset();
-            }
+            resetOrCreateBackOff();
           }
 
           ++retriesAttempted;
@@ -545,8 +550,10 @@ private void performLazySeek()
   protected StorageObject getMetadata() throws IOException {
     Storage.Objects.Get getObject = gcs.objects().get(bucketName, objectName);
     try {
-      StorageObject response = getObject.execute();
-      return response;
+      return ResilientOperation.retry(
+          ResilientOperation.getGoogleRequestCallable(getObject),
+          new RetryBoundedBackOff(3, resetOrCreateBackOff()),
+          RetryDeterminer.SOCKET_ERRORS, IOException.class, sleeper);
     } catch (IOException e) {
       if (errorExtractor.itemNotFound(e)) {
         throw GoogleCloudStorageExceptions.getFileNotFoundException(bucketName, objectName);
@@ -554,6 +561,8 @@ protected StorageObject getMetadata() throws IOException {
       String msg =
           "Error reading " + StorageResourceId.createReadableString(bucketName, objectName);
       throw new IOException(msg, e);
+    } catch (InterruptedException e) {  // From the sleep
+      throw new IOException("Thread interrupt received.", e);
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
index 5070c176f4d92..7e978016b5d4f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
@@ -27,12 +27,14 @@
 import com.google.api.client.http.HttpStatusCodes;
 import com.google.api.client.testing.http.MockHttpTransport;
 import com.google.api.client.testing.http.MockLowLevelHttpResponse;
+import com.google.api.client.util.BackOff;
 import com.google.api.client.util.Throwables;
 import com.google.api.services.storage.Storage;
 import com.google.api.services.storage.model.Objects;
 import com.google.api.services.storage.model.StorageObject;
 import com.google.cloud.dataflow.sdk.options.GcsOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.testing.FastNanoClockAndSleeper;
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.cloud.dataflow.sdk.util.gcsio.GoogleCloudStorageReadChannel;
 import com.google.common.collect.ImmutableList;
@@ -46,6 +48,7 @@
 
 import java.io.IOException;
 import java.math.BigInteger;
+import java.net.SocketTimeoutException;
 import java.nio.channels.SeekableByteChannel;
 import java.nio.file.NoSuchFileException;
 import java.util.ArrayList;
@@ -298,6 +301,32 @@ public void testGetSizeBytesWhenFileNotFound() throws Exception {
     gcsUtil.fileSize(GcsPath.fromComponents("testbucket", "testobject"));
   }
 
+  @Test
+  public void testRetryFileSize() throws IOException {
+    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
+    pipelineOptions.setGcpCredential(new TestCredential());
+    GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
+
+    Storage mockStorage = Mockito.mock(Storage.class);
+    gcsUtil.setStorageClient(mockStorage);
+
+    Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
+    Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);
+
+    BackOff mockBackOff = new AttemptBoundedExponentialBackOff(3, 200);
+
+    when(mockStorage.objects()).thenReturn(mockStorageObjects);
+    when(mockStorageObjects.get("testbucket", "testobject")).thenReturn(mockStorageGet);
+    when(mockStorageGet.execute())
+        .thenThrow(new SocketTimeoutException("SocketException"))
+        .thenThrow(new SocketTimeoutException("SocketException"))
+        .thenReturn(new StorageObject().setSize(BigInteger.valueOf(1000)));
+
+    assertEquals(1000, gcsUtil.fileSize(GcsPath.fromComponents("testbucket", "testobject"),
+        mockBackOff, new FastNanoClockAndSleeper()));
+    assertEquals(mockBackOff.nextBackOffMillis(), BackOff.STOP);
+  }
+
   @Test
   public void testGCSChannelCloseIdempotent() throws IOException {
     SeekableByteChannel channel =

From a5bcc15c8ce09cdd42970289f5cca563afe3a754 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 6 Jul 2015 22:32:16 -0700
Subject: [PATCH 0706/1541] Expose CoderRegistry.getDefaultCoder(Class)

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97643023
---
 .../cloud/dataflow/sdk/coders/CoderRegistry.java    | 13 +++++++++----
 .../dataflow/sdk/coders/CoderRegistryTest.java      | 12 +++++++++++-
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index cedeede46939b..6a5c61ed4e565 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -304,8 +304,6 @@ public <T> Coder<T> getDefaultCoder(T exampleValue) throws CannotProvideCoderExc
    *
    * <p> For this reason, {@code getDefaultCoders} (plural) does not throw
    * an exception if a coder for a particular type variable cannot be
-   * inferred. Instead, it is left absent from the map. It is the responsibility
-   * of the caller (usually {@link #getDefaultCoder} to extract the
    * desired coder or throw a {@link CannotProvideCoderException} when appropriate.
    *
    * @param subClass the concrete type whose specializations are being inferred
@@ -577,7 +575,14 @@ Coder<?> getDefaultCoder(Type type, Map<Type, Coder<?>> typeCoderBindings)
    *
    * @throws CannotProvideCoderException if a coder cannot be provided
    */
-  <T> Coder<T> getDefaultCoder(Class<T> clazz) throws CannotProvideCoderException {
+  public <T> Coder<T> getDefaultCoder(Class<T> clazz) throws CannotProvideCoderException {
+    if (clazz.getTypeParameters().length > 0) {
+      throw new IllegalArgumentException(
+          "CoderRegistry.getDefaultCoder(Class) cannot be used with parameterized types due to "
+              + "erasure. Instead of getDefaultCoder(" + clazz.getSimpleName() + ") "
+              + "use getDefaultCoder(new TypeDescriptor<" + clazz.getSimpleName() + "<...>>(){}).");
+    }
+
     try {
       CoderFactory coderFactory = getDefaultCoderFactory(clazz);
       LOG.debug("Default Coder for {} found by factory", clazz);
@@ -593,7 +598,7 @@ <T> Coder<T> getDefaultCoder(Class<T> clazz) throws CannotProvideCoderException
     if (defaultAnnotation != null) {
       LOG.debug("Default Coder for {} found by DefaultCoder annotation", clazz);
       @SuppressWarnings("unchecked")
-      Coder<T> coder = (Coder<T>) InstanceBuilder.ofType(Coder.class)
+      Coder<T> coder = InstanceBuilder.ofType(Coder.class)
           .fromClass(defaultAnnotation.value())
           .fromFactoryMethod("of")
           .withArg(Class.class, clazz)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index cbbebfea2f924..ee4da23fa358a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -46,7 +46,9 @@
 @RunWith(JUnit4.class)
 @SuppressWarnings("serial")
 public class CoderRegistryTest {
-  @Rule public ExpectedException expectedException = ExpectedException.none();
+
+  @Rule
+  public ExpectedException expectedException = ExpectedException.none();
 
   public static CoderRegistry getStandardRegistry() {
     CoderRegistry registry = new CoderRegistry();
@@ -149,6 +151,14 @@ public void testParameterizedDefaultCoderUnknown() throws Exception {
     registry.getDefaultCoder(listUnknownToken);
   }
 
+  @Test
+  public void testParameterizedDefaultCoderWrongMethod() throws Exception {
+    CoderRegistry registry = getStandardRegistry();
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("new TypeDescriptor<List<...>>(){}");
+    registry.getDefaultCoder(List.class);
+  }
+
   @Test
   public void testTypeParameterInferenceForward() throws Exception {
     CoderRegistry registry = getStandardRegistry();

From 2e7422309e57a646f1a1b8ae8cec59f9d5a6a9c5 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 7 Jul 2015 00:11:28 -0700
Subject: [PATCH 0707/1541] Fix Javadoc in Window

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97647015
---
 .../cloud/dataflow/sdk/transforms/windowing/Window.java       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 8d2793a154437..8ed40968cf9ee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -118,7 +118,7 @@
  *          Repeatedly
  *              .forever(AfterProcessingTime
  *                  .pastFirstElementInPane().plusDelay(Duration.standardMinutes(1)))
- *              .until(AfterWatermark
+ *              .orFinally(AfterWatermark
  *                  .pastEndOfWindow().plusDelay(Duration.standardDays(1)))));
  * PCollection<KV<String, Long>> windowed_counts = windowed_items.apply(
  *   Count.<String>perElement());
@@ -133,7 +133,7 @@
  *      .triggering(Repeatedly
  *              .forever(AfterProcessingTime
  *                  .pastFirstElementInPane().plusDelay(Duration.standardMinutes(1)))
- *              .until(AfterWatermark.pastEndOfWindow())));
+ *              .orFinally(AfterWatermark.pastEndOfWindow())));
  * } </pre>
  *
  * <p> After a {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} the trigger is reset to

From 2542cc96985066cf22e8e90c57f84fbdd48f7e12 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 7 Jul 2015 09:37:43 -0700
Subject: [PATCH 0708/1541] Propagate CoderException from Coder through
 CoderUtils

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97679572
---
 .../cloud/dataflow/sdk/util/CoderUtils.java   | 92 +++++++++++--------
 .../sdk/util/ExposedByteArrayInputStream.java |  8 ++
 .../sdk/runners/DirectPipelineRunnerTest.java | 58 +++++++++++-
 .../dataflow/sdk/util/CoderUtilsTest.java     | 26 ++++++
 4 files changed, 144 insertions(+), 40 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
index 48a00773f0c4e..c960832cda942 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
@@ -27,20 +27,21 @@
 import com.google.cloud.dataflow.sdk.coders.MapCoder;
 import com.google.cloud.dataflow.sdk.coders.MapCoderBase;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.base.Throwables;
 
 import com.fasterxml.jackson.annotation.JsonTypeInfo;
 import com.fasterxml.jackson.annotation.JsonTypeInfo.As;
 import com.fasterxml.jackson.annotation.JsonTypeInfo.Id;
 import com.fasterxml.jackson.databind.JavaType;
-import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.annotation.JsonTypeIdResolver;
 import com.fasterxml.jackson.databind.jsontype.impl.TypeIdResolverBase;
 import com.fasterxml.jackson.databind.module.SimpleModule;
 import com.fasterxml.jackson.databind.type.TypeFactory;
 
 import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.lang.ref.SoftReference;
 import java.lang.reflect.ParameterizedType;
 import java.lang.reflect.TypeVariable;
@@ -69,24 +70,36 @@ private CoderUtils() {}  // Non-instantiable
    * the encoded bytes.
    * This function is non-reentrant due to the use of ThreadLocal.
    */
-  public static <T> byte[] encodeToByteArray(Coder<T> coder, T value) throws CoderException{
+  public static <T> byte[] encodeToByteArray(Coder<T> coder, T value) throws CoderException {
     return encodeToByteArray(coder, value, Coder.Context.OUTER);
   }
 
   public static <T> byte[] encodeToByteArray(Coder<T> coder, T value, Coder.Context context)
       throws CoderException {
+    SoftReference<ExposedByteArrayOutputStream> refStream = threadLocalOutputStream.get();
+    ExposedByteArrayOutputStream stream = refStream == null ? null : refStream.get();
+    if (stream == null) {
+      stream = new ExposedByteArrayOutputStream();
+      threadLocalOutputStream.set(new SoftReference<>(stream));
+    }
+    stream.reset();
+    encodeToSafeStream(coder, value, stream, context);
+    return stream.toByteArray();
+  }
+
+  /**
+   * Encodes {@code value} to the given {@code stream}, which should be a stream that never throws
+   * {@code IOException}, such as {@code ByteArrayOutputStream} or
+   * {@link ExposedByteArrayOutputStream}.
+   */
+  private static <T> void encodeToSafeStream(
+      Coder<T> coder, T value, OutputStream stream, Coder.Context context) throws CoderException {
     try {
-      SoftReference<ExposedByteArrayOutputStream> refStream = threadLocalOutputStream.get();
-      ExposedByteArrayOutputStream stream = refStream == null ? null : refStream.get();
-      if (stream == null) {
-        stream = new ExposedByteArrayOutputStream();
-        threadLocalOutputStream.set(new SoftReference<>(stream));
-      }
-      stream.reset();
       coder.encode(value, stream, context);
-      return stream.toByteArray();
     } catch (IOException exn) {
-      throw new RuntimeException("unexpected IOException", exn);
+      Throwables.propagateIfPossible(exn, CoderException.class);
+      throw new IllegalArgumentException(
+          "Forbidden IOException when writing to OutputStream", exn);
     }
   }
 
@@ -101,18 +114,29 @@ public static <T> T decodeFromByteArray(Coder<T> coder, byte[] encodedValue)
 
   public static <T> T decodeFromByteArray(
       Coder<T> coder, byte[] encodedValue, Coder.Context context) throws CoderException {
-    try {
-      try (ByteArrayInputStream is = new ExposedByteArrayInputStream(encodedValue)) {
-        T result = coder.decode(is, context);
-        if (is.available() != 0) {
-          throw new CoderException(
-              is.available() + " unexpected extra bytes after decoding " +
-              result);
-        }
-        return result;
+    try (ExposedByteArrayInputStream stream = new ExposedByteArrayInputStream(encodedValue)) {
+      T result = decodeFromSafeStream(coder, stream, context);
+      if (stream.available() != 0) {
+        throw new CoderException(
+            stream.available() + " unexpected extra bytes after decoding " + result);
       }
+      return result;
+    }
+  }
+
+  /**
+   * Decodes a value from the given {@code stream}, which should be a stream that never throws
+   * {@code IOException}, such as {@code ByteArrayInputStream} or
+   * {@link ExposedByteArrayInputStream}.
+   */
+  private static <T> T decodeFromSafeStream(
+      Coder<T> coder, InputStream stream, Coder.Context context) throws CoderException {
+    try {
+      return coder.decode(stream, context);
     } catch (IOException exn) {
-      throw new RuntimeException("unexpected IOException", exn);
+      Throwables.propagateIfPossible(exn, CoderException.class);
+      throw new IllegalArgumentException(
+          "Forbidden IOException when reading from InputStream", exn);
     }
   }
 
@@ -122,28 +146,18 @@ public static <T> T decodeFromByteArray(
    *
    * @throws CoderException if there are errors during encoding.
    */
-  public static <T> String encodeToBase64(Coder<T> coder, T value) throws CoderException {
-    ByteArrayOutputStream stream = new ByteArrayOutputStream();
-    try {
-      coder.encode(value, stream, Coder.Context.OUTER);
-    } catch (IOException e) {
-      throw new RuntimeException("unexpected IOException", e);
-    }
-    byte[] rawValue = stream.toByteArray();
+  public static <T> String encodeToBase64(Coder<T> coder, T value)
+      throws CoderException {
+    byte[] rawValue = encodeToByteArray(coder, value);
     return Base64.encodeBase64URLSafeString(rawValue);
   }
 
   /**
-   * Parses a window from a base64-encoded String using the given coder.
+   * Parses a value from a base64-encoded String using the given coder.
    */
-  public static <T> T decodeFromBase64(Coder<T> coder, String encodedValue) {
-    try {
-      return coder.decode(
-          new ByteArrayInputStream(Base64.decodeBase64(encodedValue)),
-          Coder.Context.OUTER);
-    } catch (IOException e) {
-      throw new RuntimeException("unexpected IOException", e);
-    }
+  public static <T> T decodeFromBase64(Coder<T> coder, String encodedValue) throws CoderException {
+    return decodeFromSafeStream(
+        coder, new ByteArrayInputStream(Base64.decodeBase64(encodedValue)), Coder.Context.OUTER);
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayInputStream.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayInputStream.java
index f2beccfe4d48f..dff5fd17a374c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayInputStream.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayInputStream.java
@@ -40,4 +40,12 @@ public byte[] readAll() throws IOException {
     return ret;
   }
 
+  @Override
+  public void close() {
+    try {
+      super.close();
+    } catch (IOException exn) {
+      throw new RuntimeException("Unexpected IOException closing ByteArrayInputStream", exn);
+    }
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java
index 52f0820571f28..2b180d3c66eac 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java
@@ -18,16 +18,34 @@
 
 import static org.junit.Assert.assertEquals;
 
+import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
 
+import org.hamcrest.Matchers;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.Serializable;
+
 /** Tests for {@link DirectPipelineRunner}. */
 @RunWith(JUnit4.class)
-public class DirectPipelineRunnerTest {
+public class DirectPipelineRunnerTest implements Serializable {
+
+  private static final long serialVersionUID = 0L;
+
+  @Rule
+  public transient ExpectedException expectedException = ExpectedException.none();
+
   @Test
   public void testToString() {
     PipelineOptions options = PipelineOptionsFactory.create();
@@ -35,4 +53,42 @@ public void testToString() {
     assertEquals("DirectPipelineRunner#" + runner.hashCode(),
         runner.toString());
   }
+
+  private static class CrashingCoder<T> extends AtomicCoder<T> {
+    private static final long serialVersionUID = 0L;
+
+    @Override
+    public void encode(T value, OutputStream stream, Context context) throws CoderException {
+      throw new CoderException("Called CrashingCoder.encode");
+    }
+
+    @Override
+    public T decode(
+        InputStream inStream, com.google.cloud.dataflow.sdk.coders.Coder.Context context)
+            throws CoderException {
+      throw new CoderException("Called CrashingCoder.decode");
+    }
+  }
+
+  @Test
+  public void testCoderException() {
+    expectedException.expect(RuntimeException.class);
+    expectedException.expectMessage("CrashDuringCoding");
+    expectedException.expectCause(Matchers.<CoderException>instanceOf(CoderException.class));
+    DirectPipeline pipeline = DirectPipeline.createForTest();
+
+    pipeline
+        .apply("CreateTestData", Create.of(42))
+        .apply("CrashDuringCoding", ParDo.of(new DoFn<Integer, String>() {
+          private static final long serialVersionUID = 0L;
+
+          @Override
+          public void processElement(ProcessContext context) {
+            context.output("hello");
+          }
+        }))
+        .setCoder(new CrashingCoder<String>());
+
+    pipeline.run();
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
index 0146b9f605ac6..47abc38d38c8b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
@@ -17,10 +17,16 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
+import static org.mockito.Mockito.any;
+import static org.mockito.Mockito.anyString;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.mock;
 
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
@@ -28,7 +34,9 @@
 
 import org.hamcrest.CoreMatchers;
 import org.junit.Assert;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -41,6 +49,10 @@
 @RunWith(JUnit4.class)
 @SuppressWarnings("serial")
 public class CoderUtilsTest {
+
+  @Rule
+  public transient ExpectedException expectedException = ExpectedException.none();
+
   static class TestCoder extends AtomicCoder<Integer> {
     public static TestCoder of() {
       return new TestCoder();
@@ -63,6 +75,20 @@ public void verifyDeterministic() throws NonDeterministicException {
     }
   }
 
+  @Test
+  public void testCoderExceptionPropagation() throws Exception {
+    @SuppressWarnings("unchecked")
+    Coder<String> crashingCoder = mock(Coder.class);
+    doThrow(new CoderException("testing exception"))
+        .when(crashingCoder)
+        .encode(anyString(), any(OutputStream.class), any(Coder.Context.class));
+
+    expectedException.expect(CoderException.class);
+    expectedException.expectMessage("testing exception");
+
+    CoderUtils.encodeToByteArray(crashingCoder, "hello");
+  }
+
   @Test
   public void testCreateAtomicCoders() throws Exception {
     Assert.assertEquals(

From 0cf58656f1fd12bba282e48c6ee5e6af7ec184cc Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 7 Jul 2015 11:39:21 -0700
Subject: [PATCH 0709/1541] Make anonymous inner Coder class in Mean named and
 static

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97693117
---
 .../cloud/dataflow/sdk/transforms/Mean.java   | 147 +++++++++++-------
 .../dataflow/sdk/transforms/MeanTest.java     |  27 ++++
 .../sdk/transforms/SimpleStatsFnsTest.java    |  22 ---
 3 files changed, 116 insertions(+), 80 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
index be23dc6923acb..832a089cd4b2d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
@@ -16,16 +16,19 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.CustomCoder;
 import com.google.cloud.dataflow.sdk.coders.DoubleCoder;
+import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn.Accumulator;
+import com.google.common.base.MoreObjects;
 
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.util.Objects;
 
 /**
  * {@code PTransform}s for computing the arithmetic mean
@@ -49,6 +52,8 @@
  */
 public class Mean {
 
+  private Mean() { } // Namespace only
+
   /**
    * Returns a {@code PTransform} that takes an input
    * {@code PCollection<NumT>} and returns a
@@ -92,8 +97,8 @@ public static <K, NumT extends Number> Combine.PerKey<K, NumT, Double> perKey()
    *
    * @param <NumT> the type of the {@code Number}s being combined
    */
-  public static class MeanFn<NumT extends Number> extends
-    Combine.AccumulatingCombineFn<NumT, MeanFn<NumT>.CountSum, Double> {
+  static class MeanFn<NumT extends Number>
+  extends Combine.AccumulatingCombineFn<NumT, CountSum<NumT>, Double> {
     private static final long serialVersionUID = 0;
 
     /**
@@ -102,74 +107,100 @@ public static class MeanFn<NumT extends Number> extends
      */
     public MeanFn() {}
 
-    /**
-     * Accumulator helper class for MeanFn.
-     */
-    class CountSum
-        implements Combine.AccumulatingCombineFn.Accumulator<NumT, CountSum, Double> {
+    @Override
+    public CountSum<NumT> createAccumulator() {
+      return new CountSum<>();
+    }
 
-      long count = 0;
-      double sum = 0.0;
+    @Override
+    public Coder<CountSum<NumT>> getAccumulatorCoder(
+        CoderRegistry registry, Coder<NumT> inputCoder) {
+      return new CountSumCoder<>();
+    }
+  }
 
-      public CountSum() {
-        this(0, 0);
-      }
+  /**
+   * Accumulator class for {@link MeanFn}.
+   */
+  static class CountSum<NumT extends Number>
+  implements Accumulator<NumT, CountSum<NumT>, Double> {
 
-      public CountSum(long count, double sum) {
-        this.count = count;
-        this.sum = sum;
-      }
+    long count = 0;
+    double sum = 0.0;
 
-      @Override
-      public void addInput(NumT element) {
-        count++;
-        sum += element.doubleValue();
-      }
+    public CountSum() {
+      this(0, 0);
+    }
 
-      @Override
-      public void mergeAccumulator(CountSum accumulator) {
-        count += accumulator.count;
-        sum += accumulator.sum;
-      }
+    public CountSum(long count, double sum) {
+      this.count = count;
+      this.sum = sum;
+    }
 
-      @Override
-      public Double extractOutput() {
-        return count == 0 ? Double.NaN : sum / count;
-      }
+    @Override
+    public void addInput(NumT element) {
+      count++;
+      sum += element.doubleValue();
     }
 
     @Override
-    public CountSum createAccumulator() {
-      return new CountSum();
+    public void mergeAccumulator(CountSum<NumT> accumulator) {
+      count += accumulator.count;
+      sum += accumulator.sum;
     }
 
-    private static final Coder<Long> LONG_CODER = BigEndianLongCoder.of();
-    private static final Coder<Double> DOUBLE_CODER = DoubleCoder.of();
+    @Override
+    public Double extractOutput() {
+      return count == 0 ? Double.NaN : sum / count;
+    }
 
-    @SuppressWarnings("unchecked")
     @Override
-    public Coder<CountSum> getAccumulatorCoder(
-        CoderRegistry registry, Coder<NumT> inputCoder) {
-      return new CustomCoder<CountSum> () {
-        private static final long serialVersionUID = 0;
-
-        @Override
-        public void encode(CountSum value, OutputStream outStream, Coder.Context context)
-            throws CoderException, IOException {
-          Coder.Context nestedContext = context.nested();
-          LONG_CODER.encode(value.count, outStream, nestedContext);
-          DOUBLE_CODER.encode(value.sum, outStream, nestedContext);
-        }
-
-        @Override
-        public CountSum decode(InputStream inStream, Coder.Context context)
-            throws CoderException, IOException {
-          Coder.Context nestedContext = context.nested();
-          return new CountSum(
-              LONG_CODER.decode(inStream, nestedContext),
-              DOUBLE_CODER.decode(inStream, nestedContext));
-        }
-      };
+    public boolean equals(Object other) {
+      if (!(other instanceof CountSum)) {
+        return false;
+      }
+      @SuppressWarnings("unchecked")
+      CountSum<?> otherCountSum = (CountSum<?>) other;
+      return (count == otherCountSum.count)
+          && (sum == otherCountSum.sum);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(count, sum);
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(this)
+          .add("count", count)
+          .add("sum", sum)
+          .toString();
+    }
+  }
+
+  static class CountSumCoder<NumT extends Number>
+  extends AtomicCoder<CountSum<NumT>> {
+     private static final long serialVersionUID = 0;
+
+     private static final Coder<Long> LONG_CODER = BigEndianLongCoder.of();
+     private static final Coder<Double> DOUBLE_CODER = DoubleCoder.of();
+
+     @Override
+     public void encode(CountSum<NumT> value, OutputStream outStream, Coder.Context context)
+         throws CoderException, IOException {
+       Coder.Context nestedContext = context.nested();
+       LONG_CODER.encode(value.count, outStream, nestedContext);
+       DOUBLE_CODER.encode(value.sum, outStream, nestedContext);
+     }
+
+     @Override
+     public CountSum<NumT> decode(InputStream inStream, Coder.Context context)
+         throws CoderException, IOException {
+       Coder.Context nestedContext = context.nested();
+       return new CountSum<>(
+           LONG_CODER.decode(inStream, nestedContext),
+           DOUBLE_CODER.decode(inStream, nestedContext));
     }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MeanTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MeanTest.java
index 5a9f2bdbf9104..bb2729f659daa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MeanTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MeanTest.java
@@ -18,10 +18,18 @@
 
 import static org.junit.Assert.assertEquals;
 
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.transforms.Mean.CountSum;
+import com.google.cloud.dataflow.sdk.transforms.Mean.CountSumCoder;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.util.Arrays;
+import java.util.List;
+
 /**
  * Tests for Mean.
  */
@@ -32,4 +40,23 @@ public void testMeanGetNames() {
     assertEquals("Mean.Globally", Mean.globally().getName());
     assertEquals("Mean.PerKey", Mean.perKey().getName());
   }
+
+  private static final Coder<CountSum<Number>> TEST_CODER = new CountSumCoder<>();
+
+  private static final List<CountSum<Number>> TEST_VALUES = Arrays.asList(
+      new CountSum<>(1, 5.7),
+      new CountSum<>(42, 42.0),
+      new CountSum<>(29, 2.2));
+
+  @Test
+  public void testCountSumCoderEncodeDecode() throws Exception {
+    for (CountSum<Number> value : TEST_VALUES) {
+      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
+    }
+  }
+
+  @Test
+  public void testCountSumCoderSerializable() throws Exception {
+    CoderProperties.coderSerializable(TEST_CODER);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
index bc67360f3cab1..3240de1b0c255 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
@@ -18,14 +18,6 @@
 
 import static org.junit.Assert.assertEquals;
 
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.DoubleCoder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -115,18 +107,4 @@ public void testLongStats() {
       assertEquals(t.mean, new Mean.MeanFn<Long>().apply(t.data));
     }
   }
-
-  @Test
-  @SuppressWarnings("unchecked")
-  public void testMeanCountSumSerializable() {
-    Pipeline p = TestPipeline.create();
-
-    PCollection<KV<Long, Double>> input = p
-        .apply(Create.of(KV.of(1L, 1.5), KV.of(2L, 7.3))
-          .withCoder(KvCoder.of(VarLongCoder.of(), DoubleCoder.of())));
-
-    input.apply(Mean.<Long, Double>perKey());
-
-    p.run();
-  }
 }

From e54ef7e352eaeeb95bf224cc91ceb85a046adda5 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 7 Jul 2015 12:11:12 -0700
Subject: [PATCH 0710/1541] Remove unneeded warning-causing code in
 CoderRegistry

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97696253
---
 .../dataflow/sdk/coders/CoderRegistry.java    | 35 ++++++-------------
 .../sdk/coders/CoderRegistryTest.java         | 34 ------------------
 2 files changed, 11 insertions(+), 58 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 6a5c61ed4e565..90bae24a2e183 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -174,8 +174,8 @@ public <InputT, OutputT> Coder<OutputT> getDefaultCoder(
       TypeDescriptor<InputT> contextTypeDescriptor,
       Coder<InputT> contextCoder)
       throws CannotProvideCoderException {
-    return getDefaultCoder(typeDescriptor,
-                           getTypeToCoderBindings(contextTypeDescriptor.getType(), contextCoder));
+    return getDefaultCoder(
+        typeDescriptor, getTypeToCoderBindings(contextTypeDescriptor.getType(), contextCoder));
   }
 
   /**
@@ -185,28 +185,15 @@ public <InputT, OutputT> Coder<OutputT> getDefaultCoder(
   public <InputT, OutputT> Coder<OutputT> getDefaultOutputCoder(
       SerializableFunction<InputT, OutputT> fn, Coder<InputT> inputCoder)
       throws CannotProvideCoderException {
-    return getDefaultCoder(
-        fn.getClass(), SerializableFunction.class, inputCoder);
-  }
 
-  /**
-   * Returns the Coder to use for the last type parameter specialization
-   * of the subclass given Coders to use for all other type parameters
-   * specializations (if any).
-   *
-   * @throws CannotProvideCoderException if there is no default Coder.
-   */
-  public <T, OutputT> Coder<OutputT> getDefaultCoder(
-      Class<? extends T> subClass,
-      Class<T> baseClass,
-      Coder<?>... knownCoders) throws CannotProvideCoderException {
-    Coder<?>[] allCoders = new Coder<?>[knownCoders.length + 1];
-    // Last entry intentionally left null.
-    System.arraycopy(knownCoders, 0, allCoders, 0, knownCoders.length);
-    allCoders = getDefaultCoders(subClass, baseClass, allCoders);
-    @SuppressWarnings("unchecked") // trusted
-    Coder<OutputT> coder = (Coder<OutputT>) allCoders[knownCoders.length];
-    return coder;
+    ParameterizedType fnType = (ParameterizedType)
+        TypeDescriptor.of(fn.getClass()).getSupertype(SerializableFunction.class).getType();
+
+    return getDefaultCoder(
+        fn.getClass(),
+        SerializableFunction.class,
+        ImmutableMap.of(fnType.getActualTypeArguments()[0], inputCoder),
+        SerializableFunction.class.getTypeParameters()[1]);
   }
 
   /**
@@ -368,7 +355,7 @@ public <T> Map<Type, Coder<?>> getDefaultCoders(
    *         or if the length of knownCoders is not equal to the number of type
    *         parameters
    */
-  public <T> Coder<?>[] getDefaultCoders(
+  private <T> Coder<?>[] getDefaultCoders(
       Class<? extends T> subClass,
       Class<T> baseClass,
       Coder<?>[] knownCoders) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index ee4da23fa358a..4c5c6eca2def3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -192,40 +192,6 @@ public void testTypeParameterInferenceBackward() throws Exception {
     assertEquals(MyValueCoder.of(), fooCoder);
   }
 
-  @Test
-  public void testTypeParameterInferenceLast() throws Exception {
-    CoderRegistry registry = getStandardRegistry();
-    MyGenericClass<MyValue, List<MyValue>> instance =
-        new MyGenericClass<MyValue, List<MyValue>>() {};
-
-    Coder<List<MyValueCoder>> actual = registry.getDefaultCoder(
-        instance.getClass(),
-        MyGenericClass.class,
-        MyValueCoder.of());
-
-    assertEquals(ListCoder.of(MyValueCoder.of()), actual);
-  }
-
-  /**
-   * Tests sanity checking of the not-type-safe {@code Map<TypeVariable, Coder>}
-   * that the user can provide to {@code getDefaultCoder}.
-   */
-  @Test
-  public void testTypeParameterInferenceIncompatibleMap() throws Exception {
-    CoderRegistry registry = getStandardRegistry();
-    MyGenericClass<MyValue, List<MyValue>> instance =
-        new MyGenericClass<MyValue, List<MyValue>>() {};
-
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("Cannot encode elements of type class "
-        + "com.google.cloud.dataflow.sdk.coders.CoderRegistryTest$MyValue "
-        + "with BigEndianIntegerCoder");
-    registry.getDefaultCoder(
-        instance.getClass(),
-        MyGenericClass.class,
-        BigEndianIntegerCoder.of());
-  }
-
   @Test
   public void testGetDefaultCoderFromIntegerValue() throws Exception {
     CoderRegistry registry = getStandardRegistry();

From e436b63c0ea0d2852c64cc144f6babc093043233 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Tue, 7 Jul 2015 12:50:06 -0700
Subject: [PATCH 0711/1541] Add a transform name mapping for updating streaming
 pipelines

----Release Notes----
PTransform names may no longer contain '=' or ';'
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97700098
---
 .../google/cloud/dataflow/sdk/Pipeline.java   |  4 ++
 .../options/DataflowPipelineDebugOptions.java | 49 +++++++++++++++++--
 .../sdk/runners/DataflowPipelineRunner.java   |  1 +
 .../DataflowPipelineDebugOptionsTest.java     | 43 ++++++++++++++++
 4 files changed, 92 insertions(+), 5 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptionsTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index 5ed19816eb8a4..7b644c0f51633 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -310,6 +310,10 @@ OutputT applyInternal(String name, InputT input,
     TransformTreeNode parent = transforms.getCurrent();
     String namePrefix = parent.getFullName();
 
+    if (name.contains(";") || name.contains("=")) {
+      throw new IllegalArgumentException(
+          "PTransform names may not contain ';' or '='. Saw " + name);
+    }
     String fullName = uniquifyInternal(namePrefix, name);
 
     boolean nameIsUnique = fullName.equals(buildName(namePrefix, name));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
index c8ef34401f2e9..10cb8a0dfd14f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
@@ -24,9 +24,12 @@
 import com.google.cloud.dataflow.sdk.util.PathValidator;
 import com.google.cloud.dataflow.sdk.util.Stager;
 import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.common.base.Preconditions;
 
+import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonIgnore;
 
+import java.util.HashMap;
 import java.util.List;
 
 /**
@@ -159,6 +162,14 @@ public Dataflow create(PipelineOptions options) {
     }
   }
 
+  /**
+   * Root URL for use with the Pubsub API.
+   */
+  @Description("Root URL for use with the Pubsub API")
+  @Default.String("https://pubsub.googleapis.com")
+  String getPubsubRootUrl();
+  void setPubsubRootUrl(String value);
+
   /**
    * Whether to reload the currently running pipeline with the same name as this one.
    */
@@ -170,12 +181,16 @@ public Dataflow create(PipelineOptions options) {
   void setReload(boolean value);
 
   /**
-   * Root URL for use with the Pubsub API.
+   * Mapping of old PTranform names to new ones, specified as a semicolon-separated list of
+   * oldName=newName pairs. To mark a transform as deleted, omit newName.
    */
-  @Description("Root URL for use with the Pubsub API")
-  @Default.String("https://pubsub.googleapis.com")
-  String getPubsubRootUrl();
-  void setPubsubRootUrl(String value);
+  @JsonIgnore
+  @Description(
+      "Mapping of old PTranform names to new ones, specified as a semicolon-separated "
+      + "list of oldName=newName pairs. To mark a transform as deleted, omit newName.")
+  @Experimental
+  NameMap getTransformNameMapping();
+  void setTransformNameMapping(NameMap value);
 
   /**
    * Custom windmill_main binary to use with the streaming runner.
@@ -215,4 +230,28 @@ public Stager create(PipelineOptions options) {
           .build();
     }
   }
+
+  /**
+   * Map of old names to new names.
+   */
+  public static class NameMap extends HashMap<String, String> {
+    private static final long serialVersionUID = 0L;
+    private static final String ENTRY_SEPARATOR = ";";
+    private static final String VALUE_SEPARATOR = "=";
+
+    /**
+     * Parses a NameMap from a String.
+     */
+    @JsonCreator
+    public static NameMap create(String value) {
+      NameMap result = new NameMap();
+      for (String entry : value.split(ENTRY_SEPARATOR)) {
+        String[] splitEntry = entry.split(VALUE_SEPARATOR, -1);
+        Preconditions.checkArgument(splitEntry.length == 2,
+            "Invalid value for --nameMapping.  Should be in old1=new1;old2=new2 format");
+        result.put(splitEntry[0], splitEntry[1]);
+      }
+      return result;
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index a3f4c96e61dd6..908175ea28d3d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -306,6 +306,7 @@ public DataflowPipelineJob run(Pipeline pipeline) {
     String reloadJobId = null;
     if (options.getReload()) {
       reloadJobId = getJobIdFromName(options.getJobName());
+      newJob.setTransformNameMapping(options.getTransformNameMapping());
     }
     Job jobResult;
     try {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptionsTest.java
new file mode 100644
index 0000000000000..050d995df0f2b
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptionsTest.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.options;
+
+import static com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions.NameMap;
+import static org.hamcrest.Matchers.hasEntry;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link DataflowPipelineDebugOptions}. */
+@RunWith(JUnit4.class)
+public class DataflowPipelineDebugOptionsTest {
+  private static final ObjectMapper MAPPER = new ObjectMapper();
+
+  @Test
+  public void testTransformNameMapping() throws Exception {
+    NameMap map = MAPPER.convertValue("a=b;foo=;bar=baz", NameMap.class);
+    assertEquals(3, map.size());
+    assertThat(map, hasEntry("a", "b"));
+    assertThat(map, hasEntry("foo", ""));
+    assertThat(map, hasEntry("bar", "baz"));
+  }
+}

From 1732386f4d7c47a642371ecb93cc2043328ac67a Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 7 Jul 2015 23:15:27 -0700
Subject: [PATCH 0712/1541] Rename TriggerExecutor.trigger to
 TriggerExecutor.rootTrigger

This avoids a name conflict, and is more indicative of its actual
function.

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97745709
---
 .../dataflow/sdk/util/TriggerExecutor.java    | 30 +++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 089ce78c29e59..cb7d412b1eb92 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -81,7 +81,7 @@ public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
   private static final int FINAL_CLEANUP_PSEUDO_ID = -1;
 
   private final WindowFn<Object, W> windowFn;
-  private final ExecutableTrigger<W> trigger;
+  private final ExecutableTrigger<W> rootTrigger;
   private final AccumulationMode mode;
   private final Duration allowedLateness;
 
@@ -102,7 +102,7 @@ public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
   TriggerExecutor(K key,
       WindowFn<Object, W> windowFn,
       TimerManager timerManager,
-      ExecutableTrigger<W> trigger,
+      ExecutableTrigger<W> rootTrigger,
       WindowingInternals.KeyedState keyedState,
       WindowingInternals<?, KV<K, OutputT>> windowingInternals,
       AccumulationMode mode,
@@ -113,7 +113,7 @@ public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
       Aggregator<Long, Long> droppedDueToLateness) {
     this.key = key;
     this.windowFn = windowFn;
-    this.trigger = trigger;
+    this.rootTrigger = rootTrigger;
     this.keyedState = keyedState;
     this.windowingInternals = windowingInternals;
     this.allowedLateness = allowedLateness;
@@ -212,25 +212,25 @@ TriggerExecutor<K, InputT, OutputT, W> create(
   }
 
   private TriggerContext<W> context(BitSet finishedSet) {
-    return new TriggerContextImpl(finishedSet, trigger);
+    return new TriggerContextImpl(finishedSet, rootTrigger);
   }
 
   @VisibleForTesting BitSet lookupFinishedSet(W window) throws IOException {
     // TODO: If we know that no trigger in the tree will ever finish, we don't need to do the
     // lookup. Right now, we special case this for the DefaultTrigger.
-    if (trigger.getSpec() instanceof DefaultTrigger) {
+    if (rootTrigger.getSpec() instanceof DefaultTrigger) {
       return new BitSet(1);
     }
 
     BitSet finishedSet = keyedState.lookup(finishedSetTag(window));
-    return finishedSet == null ? new BitSet(trigger.getFirstIndexAfterSubtree()) : finishedSet;
+    return finishedSet == null ? new BitSet(rootTrigger.getFirstIndexAfterSubtree()) : finishedSet;
   }
 
   /**
    * Issue a load for all the keyed state tags that we know we need for the given windows.
    */
   private void warmUpCache(Iterable<W> windows) throws IOException {
-    if ((trigger.getSpec() instanceof DefaultTrigger)) {
+    if ((rootTrigger.getSpec() instanceof DefaultTrigger)) {
       return;
     }
 
@@ -279,7 +279,7 @@ public void onElement(WindowedValue<InputT> value) throws Exception {
 
       // Update the trigger state as appropriate for the arrival of the element.
       // Must come before merge so the state is updated (for merging).
-      TriggerResult result = trigger.invokeElement(context(finishedSet), e);
+      TriggerResult result = rootTrigger.invokeElement(context(finishedSet), e);
 
       // Make sure we merge before firing, in case a larger window is produced
       boolean stillExists = true;
@@ -290,7 +290,7 @@ public void onElement(WindowedValue<InputT> value) throws Exception {
       // Only invoke handleResult if the window is still active after merging. If not, the
       // merge should have taken care of any firing behaviors that needed to happen.
       if (stillExists) {
-        handleResult(trigger, window, originalFinishedSet, finishedSet, result);
+        handleResult(rootTrigger, window, originalFinishedSet, finishedSet, result);
       }
     }
   }
@@ -320,7 +320,7 @@ public void onTimer(String timerTag) throws Exception {
 
       // Perform final cleanup.
       activeWindows.remove(window);
-      trigger.invokeClear(context(finishedSet), window);
+      rootTrigger.invokeClear(context(finishedSet), window);
       keyedState.remove(finishedSetTag(window));
       return;
     }
@@ -338,9 +338,9 @@ public void onTimer(String timerTag) throws Exception {
     // Attempt to merge windows before continuing; that may remove the current window from
     // consideration.
     if (mergeIfAppropriate(window)) {
-      TriggerResult result = trigger.invokeTimer(
+      TriggerResult result = rootTrigger.invokeTimer(
           context(finishedSet), new OnTimerEvent<W>(triggerId));
-      handleResult(trigger, window, originalFinishedSet, finishedSet, result);
+      handleResult(rootTrigger, window, originalFinishedSet, finishedSet, result);
     }
   }
 
@@ -370,19 +370,19 @@ private void onMerge(Collection<W> toBeMerged, W resultWindow) throws Exception
     BitSet finishedSet = (BitSet) originalFinishedSet.clone();
 
     TriggerContext<W> context = context(finishedSet);
-    MergeResult result = trigger.invokeMerge(context, e);
+    MergeResult result = rootTrigger.invokeMerge(context, e);
     if (MergeResult.ALREADY_FINISHED.equals(result)) {
       throw new IllegalStateException("Root trigger returned MergeResult.ALREADY_FINISHED.");
     }
 
     // Commit the updated states
     handleResult(
-        trigger, resultWindow, originalFinishedSet, finishedSet, result.getTriggerResult());
+        rootTrigger, resultWindow, originalFinishedSet, finishedSet, result.getTriggerResult());
 
     // Before we finish, we can clean up the state associated with the trigger in the old windows
     for (W windowBeingMerged : toBeMerged) {
       if (!resultWindow.equals(windowBeingMerged)) {
-        trigger.invokeClear(context(lookupFinishedSet(windowBeingMerged)), windowBeingMerged);
+        rootTrigger.invokeClear(context(lookupFinishedSet(windowBeingMerged)), windowBeingMerged);
         keyedState.remove(finishedSetTag(windowBeingMerged));
         deleteTimer(cleanupTimer(windowBeingMerged), TimeDomain.EVENT_TIME);
       }

From 3caf9d2a70a79e14237188730c4544e823bce022 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 7 Jul 2015 23:54:16 -0700
Subject: [PATCH 0713/1541] Make TriggerContext an abstract class

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97747433
---
 .../sdk/transforms/windowing/AfterAll.java    |  8 +--
 .../sdk/transforms/windowing/AfterEach.java   | 10 ++--
 .../sdk/transforms/windowing/AfterFirst.java  |  6 +-
 .../sdk/transforms/windowing/AfterPane.java   |  8 +--
 .../windowing/AfterProcessingTime.java        |  8 +--
 .../AfterSynchronizedProcessingTime.java      |  8 +--
 .../transforms/windowing/AfterWatermark.java  | 16 ++---
 .../transforms/windowing/DefaultTrigger.java  |  8 +--
 .../sdk/transforms/windowing/Repeatedly.java  |  6 +-
 .../sdk/transforms/windowing/Trigger.java     | 59 +++++++++----------
 .../dataflow/sdk/util/ExecutableTrigger.java  | 26 ++++----
 .../dataflow/sdk/util/TriggerExecutor.java    | 11 ++--
 .../transforms/windowing/AfterAllTest.java    |  2 +-
 .../transforms/windowing/AfterEachTest.java   |  2 +-
 .../transforms/windowing/AfterFirstTest.java  |  2 +-
 .../transforms/windowing/RepeatedlyTest.java  |  3 +-
 .../transforms/windowing/TimeTriggerTest.java |  6 +-
 .../sdk/transforms/windowing/TriggerTest.java |  2 +-
 .../sdk/util/ExecutableTriggerTest.java       |  8 +--
 .../sdk/util/TriggerExecutorTest.java         |  6 +-
 20 files changed, 105 insertions(+), 100 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
index e095fab83ed40..0866a240d711d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
@@ -48,7 +48,7 @@ public static <W extends BoundedWindow> OnceTrigger<W> of(
     return new AfterAll<W>(Arrays.<Trigger<W>>asList(triggers));
   }
 
-  private TriggerResult wrapResult(TriggerContext<W> c) {
+  private TriggerResult wrapResult(TriggerContext c) {
     // If all children have finished, then they must have each fired at least once.
     if (c.areAllSubtriggersFinished()) {
       return TriggerResult.FIRE_AND_FINISH;
@@ -58,7 +58,7 @@ private TriggerResult wrapResult(TriggerContext<W> c) {
   }
 
   @Override
-  public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
+  public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e) throws Exception {
     for (ExecutableTrigger<W> subTrigger : c.unfinishedSubTriggers()) {
       // Since subTriggers are all OnceTriggers, they must either CONTINUE or FIRE_AND_FINISH.
       // invokeElement will automatically mark the finish bit if they return FIRE_AND_FINISH.
@@ -69,7 +69,7 @@ public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws
   }
 
   @Override
-  public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+  public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
     // CONTINUE if merging returns CONTINUE for at least one sub-trigger
     // FIRE_AND_FINISH if merging returns FIRE or FIRE_AND_FINISH for at least one sub-trigger
     //   *and* FIRE, FIRE_AND_FINISH, or FINISH for all other sub-triggers.
@@ -87,7 +87,7 @@ public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Except
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+  public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception {
     if (c.isCurrentTrigger(e.getDestinationIndex())) {
       throw new IllegalStateException("AfterAll shouldn't receive any timers.");
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index da3ca9228acef..bbc18b7d374f6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -60,7 +60,7 @@ public static <W extends BoundedWindow> Trigger<W> inOrder(Trigger<W>... trigger
     return new AfterEach<W>(Arrays.<Trigger<W>>asList(triggers));
   }
 
-  private TriggerResult wrapResult(TriggerContext<W> c, TriggerResult subResult)
+  private TriggerResult wrapResult(TriggerContext c, TriggerResult subResult)
       throws Exception {
     if (subResult.isFire()) {
       return c.areAllSubtriggersFinished() ? TriggerResult.FIRE_AND_FINISH : TriggerResult.FIRE;
@@ -70,7 +70,7 @@ private TriggerResult wrapResult(TriggerContext<W> c, TriggerResult subResult)
   }
 
   @Override
-  public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
+  public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e) throws Exception {
     // If all the sub-triggers have finished, we should have already finished, so we know there is
     // at least one unfinished trigger.
     ExecutableTrigger<W> subTrigger = c.firstUnfinishedSubTrigger();
@@ -78,7 +78,7 @@ public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws
   }
 
   @Override
-  public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+  public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
     // Iterate over the sub-triggers to identify the "current" sub-trigger.
     Iterator<ExecutableTrigger<W>> iterator = c.subTriggers().iterator();
     while (iterator.hasNext()) {
@@ -107,7 +107,7 @@ public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Except
     return MergeResult.ALREADY_FINISHED;
   }
 
-  private void resetRemaining(TriggerContext<W> c, OnMergeEvent<W> e,
+  private void resetRemaining(TriggerContext c, OnMergeEvent<W> e,
       Iterator<ExecutableTrigger<W>> triggers) throws Exception {
     while (triggers.hasNext()) {
       c.forTrigger(triggers.next()).resetTree(e.newWindow());
@@ -115,7 +115,7 @@ private void resetRemaining(TriggerContext<W> c, OnMergeEvent<W> e,
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+  public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception {
     if (c.isCurrentTrigger(e.getDestinationIndex())) {
       throw new IllegalStateException("AfterEach shouldn't receive timers.");
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
index b9d87cc37df9c..a1213ae03c7bc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
@@ -50,7 +50,7 @@ public static <W extends BoundedWindow> OnceTrigger<W> of(
   }
 
   @Override
-  public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
+  public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e) throws Exception {
     for (ExecutableTrigger<W> subTrigger : c.subTriggers()) {
       if (subTrigger.invokeElement(c, e).isFire()) {
         return TriggerResult.FIRE_AND_FINISH;
@@ -61,7 +61,7 @@ public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws
   }
 
   @Override
-  public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+  public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
     // FINISH if merging returns FINISH for any sub-trigger.
     // FIRE_AND_FINISH if merging returns FIRE or FIRE_AND_FINISH for at least one sub-trigger.
     // CONTINUE otherwise
@@ -78,7 +78,7 @@ public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Except
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+  public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception {
     if (c.isCurrentTrigger(e.getDestinationIndex())) {
       throw new IllegalStateException("AfterFirst shouldn't receive any timers.");
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index f2dd28616b930..e23c3acbc07fb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -55,7 +55,7 @@ public static <W extends BoundedWindow> AfterPane<W> elementCountAtLeast(int cou
   }
 
   @Override
-  public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
+  public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e) throws Exception {
     Integer count = c.lookup(ELEMENTS_IN_PANE_TAG, e.window());
     if (count == null) {
       count = 0;
@@ -67,7 +67,7 @@ public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws
   }
 
   @Override
-  public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+  public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
     // If we've already received enough elements and finished in some window, then this trigger
     // is just finished.
     if (e.finishedInAnyMergingWindow(c.current())) {
@@ -92,12 +92,12 @@ public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Except
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) {
+  public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) {
     return TriggerResult.CONTINUE;
   }
 
   @Override
-  public void clear(TriggerContext<W> c, W window) throws Exception {
+  public void clear(TriggerContext c, W window) throws Exception {
     c.remove(ELEMENTS_IN_PANE_TAG, window);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 4fca8c338ee0d..fe9d298147ad6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -61,7 +61,7 @@ protected AfterProcessingTime<W> newWith(
   }
 
   @Override
-  public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e)
+  public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e)
       throws Exception {
     Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, e.window());
     if (delayUntil == null) {
@@ -74,7 +74,7 @@ public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e)
   }
 
   @Override
-  public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+  public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
     // If the processing time timer has fired in any of the windows being merged, it would have
     // fired at the same point if it had been added to the merged window. So, we just report it as
     // finished.
@@ -99,12 +99,12 @@ public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Except
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+  public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception {
     return TriggerResult.FIRE_AND_FINISH;
   }
 
   @Override
-  public void clear(TriggerContext<W> c, W window) throws Exception {
+  public void clear(TriggerContext c, W window) throws Exception {
     c.remove(DELAYED_UNTIL_TAG, window);
     c.deleteTimer(window, TimeDomain.PROCESSING_TIME);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
index 4231d56a81f8f..d89272a596a04 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
@@ -37,7 +37,7 @@ public AfterSynchronizedProcessingTime() {
   }
 
   @Override
-  public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e)
+  public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e)
       throws Exception {
     Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, e.window());
     if (delayUntil == null) {
@@ -50,7 +50,7 @@ public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e)
   }
 
   @Override
-  public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+  public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
     // If the processing time timer has fired in any of the windows being merged, it would have
     // fired at the same point if it had been added to the merged window. So, we just report it as
     // finished.
@@ -75,12 +75,12 @@ public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Except
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+  public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception {
     return TriggerResult.FIRE_AND_FINISH;
   }
 
   @Override
-  public void clear(TriggerContext<W> c, W window) throws Exception {
+  public void clear(TriggerContext c, W window) throws Exception {
     c.remove(DELAYED_UNTIL_TAG, window);
     c.deleteTimer(window, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 2388735289a60..a7042fed8d4ca 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -92,7 +92,7 @@ private FromFirstElementInPane(
     }
 
     @Override
-    public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
+    public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e) throws Exception {
       Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, e.window());
       if (delayUntil == null) {
         delayUntil = computeTargetTimestamp(e.eventTimestamp());
@@ -104,7 +104,7 @@ public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws
     }
 
     @Override
-    public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+    public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
       // If the watermark time timer has fired in any of the windows being merged, it would have
       // fired at the same point if it had been added to the merged window. So, we just record it as
       // finished.
@@ -131,12 +131,12 @@ public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Except
     }
 
     @Override
-    public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+    public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception {
       return TriggerResult.FIRE_AND_FINISH;
     }
 
     @Override
-    public void clear(TriggerContext<W> c, W window) throws Exception {
+    public void clear(TriggerContext c, W window) throws Exception {
       c.remove(DELAYED_UNTIL_TAG, window);
       c.deleteTimer(window, TimeDomain.EVENT_TIME);
     }
@@ -190,14 +190,14 @@ private FromEndOfWindow(
     }
 
     @Override
-    public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
+    public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e) throws Exception {
       c.setTimer(e.window(),
           computeTargetTimestamp(e.window().maxTimestamp()), TimeDomain.EVENT_TIME);
       return TriggerResult.CONTINUE;
     }
 
     @Override
-    public MergeResult onMerge(Trigger.TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+    public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
       // If the watermark was past the end of a window that is past the end of the new window,
       // then the watermark must also be past the end of this window. What's more, we've already
       // fired some elements for that trigger firing, so we report FINISHED (without firing).
@@ -214,12 +214,12 @@ public MergeResult onMerge(Trigger.TriggerContext<W> c, OnMergeEvent<W> e) throw
     }
 
     @Override
-    public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+    public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception {
       return TriggerResult.FIRE_AND_FINISH;
     }
 
     @Override
-    public void clear(TriggerContext<W> c, W window) throws Exception {
+    public void clear(TriggerContext c, W window) throws Exception {
       c.deleteTimer(window, TimeDomain.EVENT_TIME);
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
index 8bae32fdd6b81..688246ab579ea 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
@@ -46,24 +46,24 @@ public static <W extends BoundedWindow> DefaultTrigger<W> of() {
   }
 
   @Override
-  public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
+  public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e) throws Exception {
     c.setTimer(e.window(), e.window().maxTimestamp(), TimeDomain.EVENT_TIME);
     return TriggerResult.CONTINUE;
   }
 
   @Override
-  public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+  public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
     c.setTimer(e.newWindow(), e.newWindow().maxTimestamp(), TimeDomain.EVENT_TIME);
     return MergeResult.CONTINUE;
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+  public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception {
     return TriggerResult.FIRE;
   }
 
   @Override
-  public void clear(TriggerContext<W> c, W window) throws Exception {
+  public void clear(TriggerContext c, W window) throws Exception {
     c.deleteTimer(window, TimeDomain.EVENT_TIME);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
index c035d88a23376..6375b47a62505 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
@@ -59,7 +59,7 @@ private Repeatedly(Trigger<W> repeated) {
 
 
   @Override
-  public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e)
+  public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e)
       throws Exception {
     TriggerResult result = c.subTrigger(REPEATED).invokeElement(c, e);
     if (result.isFinish()) {
@@ -69,7 +69,7 @@ public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e)
   }
 
   @Override
-  public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+  public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
     MergeResult mergeResult = c.subTrigger(REPEATED).invokeMerge(c, e);
     if (mergeResult.isFinish()) {
       c.forTrigger(c.subTrigger(REPEATED)).resetTree(e.newWindow());
@@ -78,7 +78,7 @@ public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Except
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+  public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception {
     TriggerResult result = c.subTrigger(REPEATED).invokeTimer(c, e);
     if (result.isFinish()) {
       c.forTrigger(c.subTrigger(REPEATED)).resetTree(e.window());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 2ced1db1a90bd..44142420fb5eb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -172,11 +172,8 @@ public TriggerResult getTriggerResult() {
 
   /**
    * Information accessible to all of the callbacks that are executed on a {@link Trigger}.
-   *
-   * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
-   *            {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext}.
    */
-  public interface TriggerContext<W extends BoundedWindow>  {
+  public abstract class TriggerContext {
 
     /**
      * Sets a timer to fire when the watermark or processing time is beyond the given timestamp.
@@ -190,99 +187,101 @@ public interface TriggerContext<W extends BoundedWindow>  {
      *        execute
      * @param timeDomain the domain that the {@code timestamp} applies to
      */
-    void setTimer(W window, Instant timestamp, TimeDomain timeDomain) throws IOException;
+    public abstract void setTimer(
+        W window, Instant timestamp, TimeDomain timeDomain) throws IOException;
 
     /**
      * Removes the timer set in this trigger context for the given {@code window} and
      * {@code timeDomain}.
      */
-    void deleteTimer(W window, TimeDomain timeDomain) throws IOException;
+    public abstract void deleteTimer(W window, TimeDomain timeDomain) throws IOException;
 
     /**
      * Returns the current processing time.
      */
-    Instant currentProcessingTime();
+    public abstract Instant currentProcessingTime();
 
     /**
      * Updates the value stored in keyed state for the given {@code tag} and {@code window}.
      */
-    <T> void store(CodedTupleTag<T> tag, W window, T value) throws IOException;
+    public abstract <T> void store(CodedTupleTag<T> tag, W window, T value) throws IOException;
 
     /**
      * Removes the keyed state associated with the given {@code tag} and {@code window}.
      */
-    <T> void remove(CodedTupleTag<T> tag, W window) throws IOException;
+    public abstract <T> void remove(CodedTupleTag<T> tag, W window) throws IOException;
 
     /**
      * Lookup the value stored for the given {@code tag} and {@code window}.
      */
-    <T> T lookup(CodedTupleTag<T> tag, W window) throws IOException;
+    public abstract <T> T lookup(CodedTupleTag<T> tag, W window) throws IOException;
 
     /**
      * Lookup the value stored for a given {@code tag} in a bunch of {@code window}s.
      */
-    <T> Map<W, T> lookup(CodedTupleTag<T> tag, Iterable<W> windows) throws IOException;
+    public abstract <T> Map<W, T> lookup(
+        CodedTupleTag<T> tag, Iterable<W> windows) throws IOException;
 
     /**
      * Create a {@code TriggerContext} for executing the given trigger.
      */
-    TriggerContext<W> forTrigger(ExecutableTrigger<W> trigger);
+    public abstract TriggerContext forTrigger(ExecutableTrigger<W> trigger);
 
     /**
      * Access the executable version of the trigger currently being executed.
      */
-    ExecutableTrigger<W> current();
+    public abstract ExecutableTrigger<W> current();
 
     /**
      * Access the executable versions of the sub-triggers of the current trigger.
      */
-    Iterable<ExecutableTrigger<W>> subTriggers();
+    public abstract Iterable<ExecutableTrigger<W>> subTriggers();
 
     /**
      * Access the executable version of the specified sub-trigger.
      */
-    ExecutableTrigger<W> subTrigger(int subtriggerIndex);
+    public abstract ExecutableTrigger<W> subTrigger(int subtriggerIndex);
 
     /**
      * Returns true if the given trigger index corresponds to the current trigger.
      */
-    boolean isCurrentTrigger(int triggerIndex);
+    public abstract boolean isCurrentTrigger(int triggerIndex);
 
     /**
      * Returns the sub-trigger of the current trigger that is the next step towards the destination.
      */
-    ExecutableTrigger<W> nextStepTowards(int destinationIndex);
+    public abstract ExecutableTrigger<W> nextStepTowards(int destinationIndex);
 
     /**
      * Returns true if the current trigger is marked finished.
      */
-    boolean isFinished();
+    public abstract boolean isFinished();
 
     /**
      * Returns true if all the sub-triggers of the current trigger are marked finished.
      */
-    boolean areAllSubtriggersFinished();
+    public abstract boolean areAllSubtriggersFinished();
 
     /**
      * Returns an iterable over the unfinished sub-triggers of the current trigger.
      */
-    Iterable<ExecutableTrigger<W>> unfinishedSubTriggers();
+    public abstract Iterable<ExecutableTrigger<W>> unfinishedSubTriggers();
 
     /**
      * Returns the first unfinished sub-trigger.
      */
-    ExecutableTrigger<W> firstUnfinishedSubTrigger();
+    public abstract ExecutableTrigger<W> firstUnfinishedSubTrigger();
 
     /**
      * Clears all keyed state for triggers in the current sub-tree and unsets all the associated
      * finished bits.
      */
-    void resetTree(W window) throws Exception;
+    public abstract void resetTree(W window) throws Exception;
 
     /**
      * Sets the finished bit for the current trigger.
      */
-    void setFinished(boolean finished);
+    public abstract void setFinished(boolean finished);
   }
 
   @Nullable
@@ -338,7 +337,7 @@ public W window() {
    * @param e an event describing the cause of this callback being executed
    */
   public abstract TriggerResult onElement(
-      TriggerContext<W> c, OnElementEvent<W> e) throws Exception;
+      TriggerContext c, OnElementEvent<W> e) throws Exception;
 
   /**
    * Details about an invocation of {@link Trigger#onMerge}.
@@ -417,7 +416,7 @@ public boolean apply(BitSet input) {
    * @param c the context to interact with
    * @param e an event describnig the cause of this callback being executed
    */
-  public abstract MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception;
+  public abstract MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception;
 
   /**
    * Details about an invocation of {@link Trigger#onTimer}.
@@ -448,7 +447,7 @@ public int getDestinationIndex() {
    * @param c the context to interact with
    * @param e identifier for the trigger that the timer is for.
    */
-  public abstract TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception;
+  public abstract TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception;
 
   /**
    * Clear any state associated with this trigger in the given window.
@@ -460,7 +459,7 @@ public int getDestinationIndex() {
    * @param c the context to interact with
    * @param window the window that is being cleared
    */
-  public void clear(TriggerContext<W> c, W window) throws Exception {
+  public void clear(TriggerContext c, W window) throws Exception {
     if (subTriggers != null) {
       for (ExecutableTrigger<W> trigger : c.subTriggers()) {
         trigger.invokeClear(c, window);
@@ -654,7 +653,7 @@ public final OnceTrigger<W> getContinuationTrigger() {
     }
 
     @Override
-    public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
+    public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e) throws Exception {
       TriggerResult untilResult = c.subTrigger(UNTIL).invokeElement(c, e);
       if (untilResult != TriggerResult.CONTINUE) {
         return TriggerResult.FIRE_AND_FINISH;
@@ -664,7 +663,7 @@ public TriggerResult onElement(TriggerContext<W> c, OnElementEvent<W> e) throws
     }
 
     @Override
-    public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+    public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
       MergeResult untilResult = c.subTrigger(UNTIL).invokeMerge(c, e);
       if (untilResult == MergeResult.ALREADY_FINISHED) {
         return MergeResult.ALREADY_FINISHED;
@@ -677,7 +676,7 @@ public MergeResult onMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Except
     }
 
     @Override
-    public TriggerResult onTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+    public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception {
       if (c.isCurrentTrigger(e.getDestinationIndex())) {
         throw new IllegalStateException("OrFinally shouldn't receive any timers.");
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
index 56b1f8884b3c5..d16096a3bb900 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
@@ -23,7 +23,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.common.base.Preconditions;
 
@@ -126,8 +125,9 @@ public ExecutableTrigger<W> getSubTriggerContaining(int index) {
    * Invoke the {@link Trigger#onElement} method for this trigger, ensuring that the bits are
    * properly updated if the trigger finishes.
    */
-  public TriggerResult invokeElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
-    TriggerContext<W> subContext = c.forTrigger(this);
+  public TriggerResult invokeElement(
+      Trigger<W>.TriggerContext c, OnElementEvent<W> e) throws Exception {
+    Trigger<W>.TriggerContext subContext = c.forTrigger(this);
     if (subContext.isFinished()) {
       throw new IllegalStateException("Shouldn't invokeElement on finished triggers.");
     }
@@ -145,8 +145,9 @@ public TriggerResult invokeElement(TriggerContext<W> c, OnElementEvent<W> e) thr
    * Invoke the {@link Trigger#onTimer} method for this trigger, ensuring that the bits are properly
    * updated if the trigger finishes.
    */
-  public TriggerResult invokeTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
-    TriggerContext<W> subContext = c.forTrigger(this);
+  public TriggerResult invokeTimer(Trigger<W>.TriggerContext c, OnTimerEvent<W> e)
+      throws Exception {
+    Trigger<W>.TriggerContext subContext = c.forTrigger(this);
     if (subContext.isFinished()) {
       throw new IllegalStateException("Shouldn't invokeTimer on finished triggers.");
     }
@@ -162,8 +163,8 @@ public TriggerResult invokeTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws
    * Invoke the {@link Trigger#onMerge} method for this trigger, ensuring that the bits are properly
    * updated.
    */
-  public MergeResult invokeMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
-    Trigger.TriggerContext<W> subContext = c.forTrigger(this);
+  public MergeResult invokeMerge(Trigger<W>.TriggerContext c, OnMergeEvent<W> e) throws Exception {
+    Trigger<W>.TriggerContext subContext = c.forTrigger(this);
     Trigger.MergeResult result = trigger.onMerge(subContext, e);
     subContext.setFinished(result.isFinish());
     return result;
@@ -172,7 +173,7 @@ public MergeResult invokeMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Ex
   /**
    * Invoke clear for the current this trigger.
    */
-  public void invokeClear(Trigger.TriggerContext<W> c, W window) throws Exception {
+  public void invokeClear(Trigger<W>.TriggerContext c, W window) throws Exception {
     trigger.clear(c.forTrigger(this), window);
   }
 
@@ -189,7 +190,8 @@ public ExecutableOnceTrigger(OnceTrigger<W> trigger, int nextUnusedIndex) {
     }
 
     @Override
-    public TriggerResult invokeElement(TriggerContext<W> c, OnElementEvent<W> e) throws Exception {
+    public TriggerResult invokeElement(
+        Trigger<W>.TriggerContext c, OnElementEvent<W> e) throws Exception {
       TriggerResult result = super.invokeElement(c, e);
       if (TriggerResult.FIRE.equals(result)) {
         throw new IllegalStateException("TriggerResult.FIRE returned from once trigger");
@@ -198,7 +200,8 @@ public TriggerResult invokeElement(TriggerContext<W> c, OnElementEvent<W> e) thr
     }
 
     @Override
-    public TriggerResult invokeTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws Exception {
+    public TriggerResult invokeTimer(
+        Trigger<W>.TriggerContext c, OnTimerEvent<W> e) throws Exception {
       TriggerResult result = super.invokeTimer(c, e);
       if (TriggerResult.FIRE.equals(result)) {
         throw new IllegalStateException("TriggerResult.FIRE returned from once trigger");
@@ -207,7 +210,8 @@ public TriggerResult invokeTimer(TriggerContext<W> c, OnTimerEvent<W> e) throws
     }
 
     @Override
-    public MergeResult invokeMerge(TriggerContext<W> c, OnMergeEvent<W> e) throws Exception {
+    public MergeResult invokeMerge(
+        Trigger<W>.TriggerContext c, OnMergeEvent<W> e) throws Exception {
       MergeResult result = super.invokeMerge(c, e);
       if (MergeResult.FIRE.equals(result)) {
         throw new IllegalStateException("MergeResult.FIRE returned from once trigger");
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index cb7d412b1eb92..af5abb1c9d774 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -26,11 +26,11 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
@@ -211,7 +211,7 @@ TriggerExecutor<K, InputT, OutputT, W> create(
         droppedDueToClosedWindow, droppedDueToLateness);
   }
 
-  private TriggerContext<W> context(BitSet finishedSet) {
+  private Trigger<W>.TriggerContext context(BitSet finishedSet) {
     return new TriggerContextImpl(finishedSet, rootTrigger);
   }
 
@@ -369,7 +369,7 @@ private void onMerge(Collection<W> toBeMerged, W resultWindow) throws Exception
     BitSet originalFinishedSet = lookupFinishedSet(resultWindow);
     BitSet finishedSet = (BitSet) originalFinishedSet.clone();
 
-    TriggerContext<W> context = context(finishedSet);
+    Trigger<W>.TriggerContext context = context(finishedSet);
     MergeResult result = rootTrigger.invokeMerge(context, e);
     if (MergeResult.ALREADY_FINISHED.equals(result)) {
       throw new IllegalStateException("Root trigger returned MergeResult.ALREADY_FINISHED.");
@@ -481,12 +481,13 @@ private <T> Map<W, T> lookupKeyedState(
     return result;
   }
 
-  private class TriggerContextImpl implements TriggerContext<W> {
+  private class TriggerContextImpl extends Trigger<W>.TriggerContext {
 
     private final BitSet finishedSet;
     private final ExecutableTrigger<W> trigger;
 
     private TriggerContextImpl(BitSet finishedSet, ExecutableTrigger<W> trigger) {
+      trigger.getSpec().super();
       this.finishedSet = finishedSet;
       this.trigger = trigger;
     }
@@ -552,7 +553,7 @@ public Instant currentProcessingTime() {
     }
 
     @Override
-    public TriggerContext<W> forTrigger(ExecutableTrigger<W> trigger) {
+    public Trigger<W>.TriggerContext forTrigger(ExecutableTrigger<W> trigger) {
       return new TriggerContextImpl(finishedSet, trigger);
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index ce1c5a05b031f..164b0a2c42e01 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -71,7 +71,7 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
   }
 
   @SuppressWarnings("unchecked")
-  private TriggerContext<IntervalWindow> isTriggerContext() {
+  private Trigger<IntervalWindow>.TriggerContext isTriggerContext() {
     return Mockito.isA(TriggerContext.class);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index 3f48cf6c5c508..efaf84a47b174 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -69,7 +69,7 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
   }
 
   @SuppressWarnings("unchecked")
-  private TriggerContext<IntervalWindow> isTriggerContext() {
+  private Trigger<IntervalWindow>.TriggerContext isTriggerContext() {
     return Mockito.isA(TriggerContext.class);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index eec660d8f06b1..e9f8cf2870a7e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -71,7 +71,7 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
   }
 
   @SuppressWarnings("unchecked")
-  private TriggerContext<IntervalWindow> isTriggerContext() {
+  private Trigger<IntervalWindow>.TriggerContext isTriggerContext() {
     return Mockito.isA(TriggerContext.class);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index e2c2feada04ff..38a9420b2ab62 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -66,10 +66,11 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
   }
 
   @SuppressWarnings("unchecked")
-  private TriggerContext<IntervalWindow> isTriggerContext() {
+  private Trigger<IntervalWindow>.TriggerContext isTriggerContext() {
     return Mockito.isA(TriggerContext.class);
   }
 
+
   private void injectElement(int element, TriggerResult result1)
       throws Exception {
     if (result1 != null) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
index ac5f742cea009..6b60b9fbe1433 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
@@ -70,19 +70,19 @@ protected TestTimeTrigger newWith(List<SerializableFunction<Instant, Instant>> t
 
     @Override
     public TriggerResult onElement(
-        TriggerContext<IntervalWindow> c, OnElementEvent<IntervalWindow> e) throws Exception {
+        TriggerContext c, OnElementEvent<IntervalWindow> e) throws Exception {
       return null;
     }
 
     @Override
     public MergeResult onMerge(
-        TriggerContext<IntervalWindow> c, OnMergeEvent<IntervalWindow> e) throws Exception {
+        TriggerContext c, OnMergeEvent<IntervalWindow> e) throws Exception {
       return null;
     }
 
     @Override
     public TriggerResult onTimer(
-        TriggerContext<IntervalWindow> c, OnTimerEvent<IntervalWindow> e) throws Exception {
+        TriggerContext c, OnTimerEvent<IntervalWindow> e) throws Exception {
       return null;
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
index ca94a521c1831..117f42b75be87 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
@@ -72,7 +72,7 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
   }
 
   @SuppressWarnings("unchecked")
-  private TriggerContext<IntervalWindow> isTriggerContext() {
+  private Trigger<IntervalWindow>.TriggerContext isTriggerContext() {
     return Mockito.isA(TriggerContext.class);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
index dcaa20772dbbd..7d90f43ff5979 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
@@ -97,19 +97,19 @@ protected StubTrigger(Trigger<IntervalWindow>... subTriggers) {
 
     @Override
     public TriggerResult onElement(
-        TriggerContext<IntervalWindow> c, OnElementEvent<IntervalWindow> e) throws Exception {
+        TriggerContext c, OnElementEvent<IntervalWindow> e) throws Exception {
       return TriggerResult.CONTINUE;
     }
 
     @Override
-    public MergeResult onMerge(TriggerContext<IntervalWindow> c, OnMergeEvent<IntervalWindow> e)
+    public MergeResult onMerge(TriggerContext c, OnMergeEvent<IntervalWindow> e)
         throws Exception {
       return MergeResult.CONTINUE;
     }
 
     @Override
     public TriggerResult onTimer(
-        TriggerContext<IntervalWindow> c,
+        TriggerContext c,
         OnTimerEvent<IntervalWindow> e)
         throws Exception {
       return TriggerResult.CONTINUE;
@@ -117,7 +117,7 @@ public TriggerResult onTimer(
 
     @Override
     public void clear(
-        TriggerContext<IntervalWindow> c,
+        TriggerContext c,
         IntervalWindow window) throws Exception {
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index bacb171075617..981a458e5e1b7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -67,7 +67,7 @@ public void setUp() {
   }
 
   @SuppressWarnings("unchecked")
-  private TriggerContext<IntervalWindow> isTriggerContext() {
+  private Trigger<IntervalWindow>.TriggerContext isTriggerContext() {
     return Mockito.isA(TriggerContext.class);
   }
 
@@ -276,12 +276,12 @@ public void testMergeBeforeFinalizing() throws Exception {
         Duration.millis(0));
 
     when(mockTrigger.onMerge(
-        Mockito.<TriggerContext<IntervalWindow>>any(),
+        Mockito.<Trigger<IntervalWindow>.TriggerContext>any(),
         Mockito.<OnMergeEvent<IntervalWindow>>any()))
         .thenReturn(MergeResult.CONTINUE);
 
     when(mockTrigger.onTimer(
-        Mockito.<TriggerContext<IntervalWindow>>any(),
+        Mockito.<Trigger<IntervalWindow>.TriggerContext>any(),
         Mockito.<OnTimerEvent<IntervalWindow>>any()))
         .thenReturn(TriggerResult.FIRE);
 

From 6ceda674fc990b15b2c4660cf017e1d05fab6042 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 8 Jul 2015 09:20:11 -0700
Subject: [PATCH 0714/1541] Combine the On*Event and TriggerContext interfaces

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97780557
---
 .../sdk/transforms/windowing/AfterAll.java    |  16 +-
 .../sdk/transforms/windowing/AfterEach.java   |  24 +-
 .../sdk/transforms/windowing/AfterFirst.java  |  16 +-
 .../sdk/transforms/windowing/AfterPane.java   |  16 +-
 .../windowing/AfterProcessingTime.java        |  20 +-
 .../AfterSynchronizedProcessingTime.java      |  20 +-
 .../transforms/windowing/AfterWatermark.java  |  40 +-
 .../transforms/windowing/DefaultTrigger.java  |  10 +-
 .../sdk/transforms/windowing/Repeatedly.java  |  18 +-
 .../sdk/transforms/windowing/Trigger.java     |  79 ++--
 .../dataflow/sdk/util/ExecutableTrigger.java  |  38 +-
 .../dataflow/sdk/util/TriggerExecutor.java    | 343 +++++++++++++++++-
 .../transforms/windowing/AfterAllTest.java    |  21 +-
 .../transforms/windowing/AfterEachTest.java   |  31 +-
 .../transforms/windowing/AfterFirstTest.java  |  24 +-
 .../transforms/windowing/RepeatedlyTest.java  |  24 +-
 .../transforms/windowing/TimeTriggerTest.java |   9 +-
 .../sdk/transforms/windowing/TriggerTest.java |  35 +-
 .../sdk/util/ExecutableTriggerTest.java       |   7 +-
 .../sdk/util/TriggerExecutorTest.java         |  17 +-
 20 files changed, 528 insertions(+), 280 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
index 0866a240d711d..d87035ad43094 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
@@ -58,25 +58,25 @@ private TriggerResult wrapResult(TriggerContext c) {
   }
 
   @Override
-  public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e) throws Exception {
+  public TriggerResult onElement(OnElementContext c) throws Exception {
     for (ExecutableTrigger<W> subTrigger : c.unfinishedSubTriggers()) {
       // Since subTriggers are all OnceTriggers, they must either CONTINUE or FIRE_AND_FINISH.
       // invokeElement will automatically mark the finish bit if they return FIRE_AND_FINISH.
-      subTrigger.invokeElement(c, e);
+      subTrigger.invokeElement(c);
     }
 
     return wrapResult(c);
   }
 
   @Override
-  public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
+  public MergeResult onMerge(OnMergeContext c) throws Exception {
     // CONTINUE if merging returns CONTINUE for at least one sub-trigger
     // FIRE_AND_FINISH if merging returns FIRE or FIRE_AND_FINISH for at least one sub-trigger
     //   *and* FIRE, FIRE_AND_FINISH, or FINISH for all other sub-triggers.
     // FINISH if merging returns FINISH for all sub-triggers.
     boolean fired = false;
     for (ExecutableTrigger<W> subTrigger : c.subTriggers()) {
-      MergeResult result = subTrigger.invokeMerge(c, e);
+      MergeResult result = subTrigger.invokeMerge(c);
       if (MergeResult.CONTINUE.equals(result)) {
         return MergeResult.CONTINUE;
       }
@@ -87,13 +87,13 @@ public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception {
-    if (c.isCurrentTrigger(e.getDestinationIndex())) {
+  public TriggerResult onTimer(OnTimerContext c) throws Exception {
+    if (c.isCurrentTrigger(c.getDestinationIndex())) {
       throw new IllegalStateException("AfterAll shouldn't receive any timers.");
     }
 
-    ExecutableTrigger<W> subTrigger = c.nextStepTowards(e.getDestinationIndex());
-    subTrigger.invokeTimer(c, e);
+    ExecutableTrigger<W> subTrigger = c.nextStepTowards(c.getDestinationIndex());
+    subTrigger.invokeTimer(c);
     return wrapResult(c);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index bbc18b7d374f6..50a34e4bf026f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -70,30 +70,30 @@ private TriggerResult wrapResult(TriggerContext c, TriggerResult subResult)
   }
 
   @Override
-  public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e) throws Exception {
+  public TriggerResult onElement(OnElementContext c) throws Exception {
     // If all the sub-triggers have finished, we should have already finished, so we know there is
     // at least one unfinished trigger.
     ExecutableTrigger<W> subTrigger = c.firstUnfinishedSubTrigger();
-    return wrapResult(c, subTrigger.invokeElement(c, e));
+    return wrapResult(c, subTrigger.invokeElement(c));
   }
 
   @Override
-  public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
+  public MergeResult onMerge(OnMergeContext c) throws Exception {
     // Iterate over the sub-triggers to identify the "current" sub-trigger.
     Iterator<ExecutableTrigger<W>> iterator = c.subTriggers().iterator();
     while (iterator.hasNext()) {
       ExecutableTrigger<W> subTrigger = iterator.next();
 
-      MergeResult mergeResult = subTrigger.invokeMerge(c, e);
+      MergeResult mergeResult = subTrigger.invokeMerge(c);
 
       if (MergeResult.CONTINUE.equals(mergeResult)) {
-        resetRemaining(c, e, iterator);
+        resetRemaining(c, c, iterator);
         return MergeResult.CONTINUE;
       } else if (MergeResult.FIRE.equals(mergeResult)) {
-        resetRemaining(c, e, iterator);
+        resetRemaining(c, c, iterator);
         return MergeResult.FIRE;
       } else if (MergeResult.FIRE_AND_FINISH.equals(mergeResult)) {
-        resetRemaining(c, e, iterator);
+        resetRemaining(c, c, iterator);
         return c.areAllSubtriggersFinished() ? MergeResult.FIRE_AND_FINISH : MergeResult.FIRE;
       }
     }
@@ -107,7 +107,7 @@ public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception
     return MergeResult.ALREADY_FINISHED;
   }
 
-  private void resetRemaining(TriggerContext c, OnMergeEvent<W> e,
+  private void resetRemaining(TriggerContext c, OnMergeContext e,
       Iterator<ExecutableTrigger<W>> triggers) throws Exception {
     while (triggers.hasNext()) {
       c.forTrigger(triggers.next()).resetTree(e.newWindow());
@@ -115,13 +115,13 @@ private void resetRemaining(TriggerContext c, OnMergeEvent<W> e,
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception {
-    if (c.isCurrentTrigger(e.getDestinationIndex())) {
+  public TriggerResult onTimer(OnTimerContext c) throws Exception {
+    if (c.isCurrentTrigger(c.getDestinationIndex())) {
       throw new IllegalStateException("AfterEach shouldn't receive timers.");
     }
 
-    ExecutableTrigger<W> timerChild = c.nextStepTowards(e.getDestinationIndex());
-    return wrapResult(c, timerChild.invokeTimer(c,  e));
+    ExecutableTrigger<W> timerChild = c.nextStepTowards(c.getDestinationIndex());
+    return wrapResult(c, timerChild.invokeTimer(c));
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
index a1213ae03c7bc..34670f694e954 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
@@ -50,9 +50,9 @@ public static <W extends BoundedWindow> OnceTrigger<W> of(
   }
 
   @Override
-  public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e) throws Exception {
+  public TriggerResult onElement(OnElementContext c) throws Exception {
     for (ExecutableTrigger<W> subTrigger : c.subTriggers()) {
-      if (subTrigger.invokeElement(c, e).isFire()) {
+      if (subTrigger.invokeElement(c).isFire()) {
         return TriggerResult.FIRE_AND_FINISH;
       }
     }
@@ -61,13 +61,13 @@ public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e) throws Exc
   }
 
   @Override
-  public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
+  public MergeResult onMerge(OnMergeContext c) throws Exception {
     // FINISH if merging returns FINISH for any sub-trigger.
     // FIRE_AND_FINISH if merging returns FIRE or FIRE_AND_FINISH for at least one sub-trigger.
     // CONTINUE otherwise
     boolean fired = false;
     for (ExecutableTrigger<W> subTrigger : c.subTriggers()) {
-      MergeResult mergeResult = subTrigger.invokeMerge(c, e);
+      MergeResult mergeResult = subTrigger.invokeMerge(c);
       if (MergeResult.ALREADY_FINISHED.equals(mergeResult)) {
         return MergeResult.ALREADY_FINISHED;
       } else if (mergeResult.isFire()) {
@@ -78,13 +78,13 @@ public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception {
-    if (c.isCurrentTrigger(e.getDestinationIndex())) {
+  public TriggerResult onTimer(OnTimerContext c) throws Exception {
+    if (c.isCurrentTrigger(c.getDestinationIndex())) {
       throw new IllegalStateException("AfterFirst shouldn't receive any timers.");
     }
 
-    ExecutableTrigger<W> subTrigger = c.nextStepTowards(e.getDestinationIndex());
-    return subTrigger.invokeTimer(c, e).isFire()
+    ExecutableTrigger<W> subTrigger = c.nextStepTowards(c.getDestinationIndex());
+    return subTrigger.invokeTimer(c).isFire()
         ? TriggerResult.FIRE_AND_FINISH
         : TriggerResult.CONTINUE;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index e23c3acbc07fb..1b34ba30c1997 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -55,28 +55,28 @@ public static <W extends BoundedWindow> AfterPane<W> elementCountAtLeast(int cou
   }
 
   @Override
-  public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e) throws Exception {
-    Integer count = c.lookup(ELEMENTS_IN_PANE_TAG, e.window());
+  public TriggerResult onElement(OnElementContext c) throws Exception {
+    Integer count = c.lookup(ELEMENTS_IN_PANE_TAG, c.window());
     if (count == null) {
       count = 0;
     }
     count++;
 
-    c.store(ELEMENTS_IN_PANE_TAG, e.window(), count);
+    c.store(ELEMENTS_IN_PANE_TAG, c.window(), count);
     return count >= countElems ? TriggerResult.FIRE_AND_FINISH : TriggerResult.CONTINUE;
   }
 
   @Override
-  public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
+  public MergeResult onMerge(OnMergeContext c) throws Exception {
     // If we've already received enough elements and finished in some window, then this trigger
     // is just finished.
-    if (e.finishedInAnyMergingWindow(c.current())) {
+    if (c.finishedInAnyMergingWindow(c.current())) {
       return MergeResult.ALREADY_FINISHED;
     }
 
     // Otherwise, compute the sum of elements in all the active panes
     int count = 0;
-    for (Entry<W, Integer> old : c.lookup(ELEMENTS_IN_PANE_TAG, e.oldWindows()).entrySet()) {
+    for (Entry<W, Integer> old : c.lookup(ELEMENTS_IN_PANE_TAG, c.oldWindows()).entrySet()) {
       if (old.getValue() != null) {
         count += old.getValue();
       }
@@ -86,13 +86,13 @@ public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception
     if (count >= countElems) {
       return MergeResult.FIRE_AND_FINISH;
     } else {
-      c.store(ELEMENTS_IN_PANE_TAG, e.newWindow(), count);
+      c.store(ELEMENTS_IN_PANE_TAG, c.newWindow(), count);
       return MergeResult.CONTINUE;
     }
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) {
+  public TriggerResult onTimer(OnTimerContext c) {
     return TriggerResult.CONTINUE;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index fe9d298147ad6..9449ac6a7e1f3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -61,45 +61,45 @@ protected AfterProcessingTime<W> newWith(
   }
 
   @Override
-  public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e)
+  public TriggerResult onElement(OnElementContext c)
       throws Exception {
-    Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, e.window());
+    Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, c.window());
     if (delayUntil == null) {
       delayUntil = computeTargetTimestamp(c.currentProcessingTime());
-      c.setTimer(e.window(), delayUntil, TimeDomain.PROCESSING_TIME);
-      c.store(DELAYED_UNTIL_TAG, e.window(), delayUntil);
+      c.setTimer(c.window(), delayUntil, TimeDomain.PROCESSING_TIME);
+      c.store(DELAYED_UNTIL_TAG, c.window(), delayUntil);
     }
 
     return TriggerResult.CONTINUE;
   }
 
   @Override
-  public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
+  public MergeResult onMerge(OnMergeContext c) throws Exception {
     // If the processing time timer has fired in any of the windows being merged, it would have
     // fired at the same point if it had been added to the merged window. So, we just report it as
     // finished.
-    if (e.finishedInAnyMergingWindow(c.current())) {
+    if (c.finishedInAnyMergingWindow(c.current())) {
       return MergeResult.ALREADY_FINISHED;
     }
 
     // Otherwise, determine the earliest delay for all of the windows, and delay to that point.
     Instant earliestTimer = BoundedWindow.TIMESTAMP_MAX_VALUE;
-    for (Instant delayedUntil : c.lookup(DELAYED_UNTIL_TAG, e.oldWindows()).values()) {
+    for (Instant delayedUntil : c.lookup(DELAYED_UNTIL_TAG, c.oldWindows()).values()) {
       if (delayedUntil != null && delayedUntil.isBefore(earliestTimer)) {
         earliestTimer = delayedUntil;
       }
     }
 
     if (earliestTimer != null) {
-      c.store(DELAYED_UNTIL_TAG, e.newWindow(), earliestTimer);
-      c.setTimer(e.newWindow(), earliestTimer, TimeDomain.PROCESSING_TIME);
+      c.store(DELAYED_UNTIL_TAG, c.newWindow(), earliestTimer);
+      c.setTimer(c.newWindow(), earliestTimer, TimeDomain.PROCESSING_TIME);
     }
 
     return MergeResult.CONTINUE;
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception {
+  public TriggerResult onTimer(OnTimerContext c) throws Exception {
     return TriggerResult.FIRE_AND_FINISH;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
index d89272a596a04..a64f36658c2b5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
@@ -37,45 +37,45 @@ public AfterSynchronizedProcessingTime() {
   }
 
   @Override
-  public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e)
+  public TriggerResult onElement(OnElementContext c)
       throws Exception {
-    Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, e.window());
+    Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, c.window());
     if (delayUntil == null) {
       delayUntil = c.currentProcessingTime();
-      c.setTimer(e.window(), delayUntil, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
-      c.store(DELAYED_UNTIL_TAG, e.window(), delayUntil);
+      c.setTimer(c.window(), delayUntil, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+      c.store(DELAYED_UNTIL_TAG, c.window(), delayUntil);
     }
 
     return TriggerResult.CONTINUE;
   }
 
   @Override
-  public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
+  public MergeResult onMerge(OnMergeContext c) throws Exception {
     // If the processing time timer has fired in any of the windows being merged, it would have
     // fired at the same point if it had been added to the merged window. So, we just report it as
     // finished.
-    if (e.finishedInAnyMergingWindow(c.current())) {
+    if (c.finishedInAnyMergingWindow(c.current())) {
       return MergeResult.ALREADY_FINISHED;
     }
 
     // Otherwise, determine the earliest delay for all of the windows, and delay to that point.
     Instant earliestTimer = BoundedWindow.TIMESTAMP_MAX_VALUE;
-    for (Instant delayedUntil : c.lookup(DELAYED_UNTIL_TAG, e.oldWindows()).values()) {
+    for (Instant delayedUntil : c.lookup(DELAYED_UNTIL_TAG, c.oldWindows()).values()) {
       if (delayedUntil != null && delayedUntil.isBefore(earliestTimer)) {
         earliestTimer = delayedUntil;
       }
     }
 
     if (earliestTimer != null) {
-      c.store(DELAYED_UNTIL_TAG, e.newWindow(), earliestTimer);
-      c.setTimer(e.newWindow(), earliestTimer, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+      c.store(DELAYED_UNTIL_TAG, c.newWindow(), earliestTimer);
+      c.setTimer(c.newWindow(), earliestTimer, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
     }
 
     return MergeResult.CONTINUE;
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception {
+  public TriggerResult onTimer(OnTimerContext c) throws Exception {
     return TriggerResult.FIRE_AND_FINISH;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index a7042fed8d4ca..5793ece8bb2df 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -92,23 +92,23 @@ private FromFirstElementInPane(
     }
 
     @Override
-    public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e) throws Exception {
-      Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, e.window());
+    public TriggerResult onElement(OnElementContext c) throws Exception {
+      Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, c.window());
       if (delayUntil == null) {
-        delayUntil = computeTargetTimestamp(e.eventTimestamp());
-        c.setTimer(e.window(), delayUntil, TimeDomain.EVENT_TIME);
-        c.store(DELAYED_UNTIL_TAG, e.window(), delayUntil);
+        delayUntil = computeTargetTimestamp(c.eventTimestamp());
+        c.setTimer(c.window(), delayUntil, TimeDomain.EVENT_TIME);
+        c.store(DELAYED_UNTIL_TAG, c.window(), delayUntil);
       }
 
       return TriggerResult.CONTINUE;
     }
 
     @Override
-    public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
+    public MergeResult onMerge(OnMergeContext c) throws Exception {
       // If the watermark time timer has fired in any of the windows being merged, it would have
       // fired at the same point if it had been added to the merged window. So, we just record it as
       // finished.
-      if (e.finishedInAnyMergingWindow(c.current())) {
+      if (c.finishedInAnyMergingWindow(c.current())) {
         return MergeResult.ALREADY_FINISHED;
       }
 
@@ -116,22 +116,22 @@ public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception
       // timestamp from the minimum (we could also just pick one, or try to record the arrival times
       // of this first element in each pane).
       Instant earliestTimer = BoundedWindow.TIMESTAMP_MAX_VALUE;
-      for (Instant delayedUntil : c.lookup(DELAYED_UNTIL_TAG, e.oldWindows()).values()) {
+      for (Instant delayedUntil : c.lookup(DELAYED_UNTIL_TAG, c.oldWindows()).values()) {
         if (delayedUntil != null && delayedUntil.isBefore(earliestTimer)) {
           earliestTimer = delayedUntil;
         }
       }
 
       if (earliestTimer != null) {
-        c.store(DELAYED_UNTIL_TAG, e.newWindow(), earliestTimer);
-        c.setTimer(e.newWindow(), earliestTimer, TimeDomain.EVENT_TIME);
+        c.store(DELAYED_UNTIL_TAG, c.newWindow(), earliestTimer);
+        c.setTimer(c.newWindow(), earliestTimer, TimeDomain.EVENT_TIME);
       }
 
       return MergeResult.CONTINUE;
     }
 
     @Override
-    public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception {
+    public TriggerResult onTimer(OnTimerContext c) throws Exception {
       return TriggerResult.FIRE_AND_FINISH;
     }
 
@@ -190,31 +190,31 @@ private FromEndOfWindow(
     }
 
     @Override
-    public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e) throws Exception {
-      c.setTimer(e.window(),
-          computeTargetTimestamp(e.window().maxTimestamp()), TimeDomain.EVENT_TIME);
+    public TriggerResult onElement(OnElementContext c) throws Exception {
+      c.setTimer(c.window(),
+          computeTargetTimestamp(c.window().maxTimestamp()), TimeDomain.EVENT_TIME);
       return TriggerResult.CONTINUE;
     }
 
     @Override
-    public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
+    public MergeResult onMerge(OnMergeContext c) throws Exception {
       // If the watermark was past the end of a window that is past the end of the new window,
       // then the watermark must also be past the end of this window. What's more, we've already
       // fired some elements for that trigger firing, so we report FINISHED (without firing).
-      for (W finishedWindow : e.getFinishedMergingWindows(c.current())) {
-        if (finishedWindow.maxTimestamp().isAfter(e.newWindow().maxTimestamp())) {
+      for (W finishedWindow : c.getFinishedMergingWindows(c.current())) {
+        if (finishedWindow.maxTimestamp().isAfter(c.newWindow().maxTimestamp())) {
           return MergeResult.ALREADY_FINISHED;
         }
       }
 
       // Otherwise, set a timer for this window, and return.
-      c.setTimer(e.newWindow(),
-          computeTargetTimestamp(e.newWindow().maxTimestamp()), TimeDomain.EVENT_TIME);
+      c.setTimer(c.newWindow(),
+          computeTargetTimestamp(c.newWindow().maxTimestamp()), TimeDomain.EVENT_TIME);
       return MergeResult.CONTINUE;
     }
 
     @Override
-    public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception {
+    public TriggerResult onTimer(OnTimerContext c) throws Exception {
       return TriggerResult.FIRE_AND_FINISH;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
index 688246ab579ea..29b79e4135674 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
@@ -46,19 +46,19 @@ public static <W extends BoundedWindow> DefaultTrigger<W> of() {
   }
 
   @Override
-  public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e) throws Exception {
-    c.setTimer(e.window(), e.window().maxTimestamp(), TimeDomain.EVENT_TIME);
+  public TriggerResult onElement(OnElementContext c) throws Exception {
+    c.setTimer(c.window(), c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
     return TriggerResult.CONTINUE;
   }
 
   @Override
-  public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
-    c.setTimer(e.newWindow(), e.newWindow().maxTimestamp(), TimeDomain.EVENT_TIME);
+  public MergeResult onMerge(OnMergeContext c) throws Exception {
+    c.setTimer(c.newWindow(), c.newWindow().maxTimestamp(), TimeDomain.EVENT_TIME);
     return MergeResult.CONTINUE;
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception {
+  public TriggerResult onTimer(OnTimerContext c) throws Exception {
     return TriggerResult.FIRE;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
index 6375b47a62505..f53ddae47d804 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
@@ -59,29 +59,29 @@ private Repeatedly(Trigger<W> repeated) {
 
 
   @Override
-  public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e)
+  public TriggerResult onElement(OnElementContext c)
       throws Exception {
-    TriggerResult result = c.subTrigger(REPEATED).invokeElement(c, e);
+    TriggerResult result = c.subTrigger(REPEATED).invokeElement(c);
     if (result.isFinish()) {
-      c.forTrigger(c.subTrigger(REPEATED)).resetTree(e.window());
+      c.forTrigger(c.subTrigger(REPEATED)).resetTree(c.window());
     }
     return result.isFire() ? TriggerResult.FIRE : TriggerResult.CONTINUE;
   }
 
   @Override
-  public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
-    MergeResult mergeResult = c.subTrigger(REPEATED).invokeMerge(c, e);
+  public MergeResult onMerge(OnMergeContext c) throws Exception {
+    MergeResult mergeResult = c.subTrigger(REPEATED).invokeMerge(c);
     if (mergeResult.isFinish()) {
-      c.forTrigger(c.subTrigger(REPEATED)).resetTree(e.newWindow());
+      c.forTrigger(c.subTrigger(REPEATED)).resetTree(c.newWindow());
     }
     return mergeResult.isFire() ? MergeResult.FIRE : MergeResult.CONTINUE;
   }
 
   @Override
-  public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception {
-    TriggerResult result = c.subTrigger(REPEATED).invokeTimer(c, e);
+  public TriggerResult onTimer(OnTimerContext c) throws Exception {
+    TriggerResult result = c.subTrigger(REPEATED).invokeTimer(c);
     if (result.isFinish()) {
-      c.forTrigger(c.subTrigger(REPEATED)).resetTree(e.window());
+      c.forTrigger(c.subTrigger(REPEATED)).resetTree(c.window());
     }
     return result.isFire() ? TriggerResult.FIRE : TriggerResult.CONTINUE;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 44142420fb5eb..9eb7dc2017559 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -293,16 +293,13 @@ protected Trigger(@Nullable List<Trigger<W>> subTriggers) {
 
   /**
    * Details about an invocation of {@link Trigger#onElement}.
-   *
-   * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
-   *            {@code OnElementEvent}
    */
-  public static class OnElementEvent<W extends BoundedWindow> {
+  public abstract class OnElementContext extends TriggerContext {
     private final Object value;
     private final Instant timestamp;
     private final W window;
 
-    public OnElementEvent(Object value, Instant timestamp, W window) {
+    public OnElementContext(Object value, Instant timestamp, W window) {
       this.value = value;
       this.timestamp = timestamp;
       this.window = window;
@@ -328,29 +325,30 @@ public Instant eventTimestamp() {
     public W window() {
       return window;
     }
+
+    /**
+     * Create an {@code OnElementContext} for executing the given trigger.
+     */
+    @Override
+    public abstract OnElementContext forTrigger(ExecutableTrigger<W> trigger);
   }
 
   /**
    * Called immediately after an element is first incorporated into a window.
    *
    * @param c the context to interact with
-   * @param e an event describing the cause of this callback being executed
    */
-  public abstract TriggerResult onElement(
-      TriggerContext c, OnElementEvent<W> e) throws Exception;
+  public abstract TriggerResult onElement(OnElementContext c) throws Exception;
 
   /**
    * Details about an invocation of {@link Trigger#onMerge}.
-   *
-   * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
-   *            {@code OnMergeEvent}
    */
-  public static class OnMergeEvent<W extends BoundedWindow> {
+  public abstract class OnMergeContext extends TriggerContext {
     private final Iterable<W> oldWindows;
     private final W newWindow;
-    private final Map<W, BitSet> finishedSets;
+    protected final Map<W, BitSet> finishedSets;
 
-    public OnMergeEvent(Iterable<W> oldWindows, W newWindow, Map<W, BitSet> finishedSets) {
+    public OnMergeContext(Iterable<W> oldWindows, W newWindow, Map<W, BitSet> finishedSets) {
       this.oldWindows = oldWindows;
       this.newWindow = newWindow;
       this.finishedSets = finishedSets;
@@ -399,6 +397,12 @@ public boolean apply(BitSet input) {
         }
       }).keySet();
     }
+
+    /**
+     * Create an {@code OnMergeContext} for executing the given trigger.
+     */
+    @Override
+    public abstract OnMergeContext forTrigger(ExecutableTrigger<W> trigger);
   }
 
   /**
@@ -414,40 +418,41 @@ public boolean apply(BitSet input) {
    * <p>The implementation does not need to clear out any state associated with the old windows.
    *
    * @param c the context to interact with
-   * @param e an event describnig the cause of this callback being executed
    */
-  public abstract MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception;
+  public abstract MergeResult onMerge(OnMergeContext c) throws Exception;
 
   /**
    * Details about an invocation of {@link Trigger#onTimer}.
-   *
-   * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
-   *            {@code OnTimerEvent}
    */
-  public static class OnTimerEvent<W extends BoundedWindow> {
+  public abstract class OnTimerContext extends TriggerContext {
 
-    private final TriggerId<W> triggerId;
+    protected final TriggerId<W> destinationId;
 
-    public OnTimerEvent(TriggerId<W> triggerId) {
-      this.triggerId = triggerId;
+    public OnTimerContext(TriggerId<W> destinationId) {
+      this.destinationId = destinationId;
     }
 
     public W window() {
-      return triggerId.window;
+      return destinationId.window;
     }
 
     public int getDestinationIndex() {
-      return triggerId.getTriggerIdx();
+      return destinationId.getTriggerIdx();
     }
+
+    /**
+     * Create an {@code OnTimerContext} for executing the given trigger.
+     */
+    @Override
+    public abstract OnTimerContext forTrigger(ExecutableTrigger<W> trigger);
   }
 
   /**
    * Called when a timer has fired for the trigger or one of its sub-triggers.
    *
    * @param c the context to interact with
-   * @param e identifier for the trigger that the timer is for.
    */
-  public abstract TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception;
+  public abstract TriggerResult onTimer(OnTimerContext c) throws Exception;
 
   /**
    * Clear any state associated with this trigger in the given window.
@@ -653,36 +658,36 @@ public final OnceTrigger<W> getContinuationTrigger() {
     }
 
     @Override
-    public TriggerResult onElement(TriggerContext c, OnElementEvent<W> e) throws Exception {
-      TriggerResult untilResult = c.subTrigger(UNTIL).invokeElement(c, e);
+    public TriggerResult onElement(OnElementContext c) throws Exception {
+      TriggerResult untilResult = c.subTrigger(UNTIL).invokeElement(c);
       if (untilResult != TriggerResult.CONTINUE) {
         return TriggerResult.FIRE_AND_FINISH;
       }
 
-      return c.subTrigger(ACTUAL).invokeElement(c, e);
+      return c.subTrigger(ACTUAL).invokeElement(c);
     }
 
     @Override
-    public MergeResult onMerge(TriggerContext c, OnMergeEvent<W> e) throws Exception {
-      MergeResult untilResult = c.subTrigger(UNTIL).invokeMerge(c, e);
+    public MergeResult onMerge(OnMergeContext c) throws Exception {
+      MergeResult untilResult = c.subTrigger(UNTIL).invokeMerge(c);
       if (untilResult == MergeResult.ALREADY_FINISHED) {
         return MergeResult.ALREADY_FINISHED;
       } else if (untilResult.isFire()) {
         return MergeResult.FIRE_AND_FINISH;
       } else {
         // was CONTINUE -- so merge the underlying trigger
-        return c.subTrigger(ACTUAL).invokeMerge(c, e);
+        return c.subTrigger(ACTUAL).invokeMerge(c);
       }
     }
 
     @Override
-    public TriggerResult onTimer(TriggerContext c, OnTimerEvent<W> e) throws Exception {
-      if (c.isCurrentTrigger(e.getDestinationIndex())) {
+    public TriggerResult onTimer(OnTimerContext c) throws Exception {
+      if (c.isCurrentTrigger(c.getDestinationIndex())) {
         throw new IllegalStateException("OrFinally shouldn't receive any timers.");
       }
 
-      ExecutableTrigger<W> destination = c.nextStepTowards(e.getDestinationIndex());
-      TriggerResult result = destination.invokeTimer(c, e);
+      ExecutableTrigger<W> destination = c.nextStepTowards(c.getDestinationIndex());
+      TriggerResult result = destination.invokeTimer(c);
       if (destination == c.subTrigger(UNTIL) && result.isFire()) {
         return TriggerResult.FIRE_AND_FINISH;
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
index d16096a3bb900..7d4ea78a32c52 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
@@ -19,9 +19,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.common.base.Preconditions;
@@ -125,14 +122,13 @@ public ExecutableTrigger<W> getSubTriggerContaining(int index) {
    * Invoke the {@link Trigger#onElement} method for this trigger, ensuring that the bits are
    * properly updated if the trigger finishes.
    */
-  public TriggerResult invokeElement(
-      Trigger<W>.TriggerContext c, OnElementEvent<W> e) throws Exception {
-    Trigger<W>.TriggerContext subContext = c.forTrigger(this);
+  public TriggerResult invokeElement(Trigger<W>.OnElementContext c) throws Exception {
+    Trigger<W>.OnElementContext subContext = c.forTrigger(this);
     if (subContext.isFinished()) {
       throw new IllegalStateException("Shouldn't invokeElement on finished triggers.");
     }
 
-    Trigger.TriggerResult result = trigger.onElement(subContext, e);
+    Trigger.TriggerResult result = trigger.onElement(subContext);
 
     if (result.isFinish()) {
       subContext.setFinished(true);
@@ -145,14 +141,13 @@ public TriggerResult invokeElement(
    * Invoke the {@link Trigger#onTimer} method for this trigger, ensuring that the bits are properly
    * updated if the trigger finishes.
    */
-  public TriggerResult invokeTimer(Trigger<W>.TriggerContext c, OnTimerEvent<W> e)
-      throws Exception {
-    Trigger<W>.TriggerContext subContext = c.forTrigger(this);
+  public TriggerResult invokeTimer(Trigger<W>.OnTimerContext c) throws Exception {
+    Trigger<W>.OnTimerContext subContext = c.forTrigger(this);
     if (subContext.isFinished()) {
       throw new IllegalStateException("Shouldn't invokeTimer on finished triggers.");
     }
 
-    Trigger.TriggerResult result = trigger.onTimer(subContext, e);
+    Trigger.TriggerResult result = trigger.onTimer(subContext);
     if (result.isFinish()) {
       subContext.setFinished(true);
     }
@@ -163,9 +158,9 @@ public TriggerResult invokeTimer(Trigger<W>.TriggerContext c, OnTimerEvent<W> e)
    * Invoke the {@link Trigger#onMerge} method for this trigger, ensuring that the bits are properly
    * updated.
    */
-  public MergeResult invokeMerge(Trigger<W>.TriggerContext c, OnMergeEvent<W> e) throws Exception {
-    Trigger<W>.TriggerContext subContext = c.forTrigger(this);
-    Trigger.MergeResult result = trigger.onMerge(subContext, e);
+  public MergeResult invokeMerge(Trigger<W>.OnMergeContext c) throws Exception {
+    Trigger<W>.OnMergeContext subContext = c.forTrigger(this);
+    Trigger.MergeResult result = trigger.onMerge(subContext);
     subContext.setFinished(result.isFinish());
     return result;
   }
@@ -190,9 +185,8 @@ public ExecutableOnceTrigger(OnceTrigger<W> trigger, int nextUnusedIndex) {
     }
 
     @Override
-    public TriggerResult invokeElement(
-        Trigger<W>.TriggerContext c, OnElementEvent<W> e) throws Exception {
-      TriggerResult result = super.invokeElement(c, e);
+    public TriggerResult invokeElement(Trigger<W>.OnElementContext c) throws Exception {
+      TriggerResult result = super.invokeElement(c);
       if (TriggerResult.FIRE.equals(result)) {
         throw new IllegalStateException("TriggerResult.FIRE returned from once trigger");
       }
@@ -200,9 +194,8 @@ public TriggerResult invokeElement(
     }
 
     @Override
-    public TriggerResult invokeTimer(
-        Trigger<W>.TriggerContext c, OnTimerEvent<W> e) throws Exception {
-      TriggerResult result = super.invokeTimer(c, e);
+    public TriggerResult invokeTimer(Trigger<W>.OnTimerContext c) throws Exception {
+      TriggerResult result = super.invokeTimer(c);
       if (TriggerResult.FIRE.equals(result)) {
         throw new IllegalStateException("TriggerResult.FIRE returned from once trigger");
       }
@@ -210,9 +203,8 @@ public TriggerResult invokeTimer(
     }
 
     @Override
-    public MergeResult invokeMerge(
-        Trigger<W>.TriggerContext c, OnMergeEvent<W> e) throws Exception {
-      MergeResult result = super.invokeMerge(c, e);
+    public MergeResult invokeMerge(Trigger<W>.OnMergeContext c) throws Exception {
+      MergeResult result = super.invokeMerge(c);
       if (MergeResult.FIRE.equals(result)) {
         throw new IllegalStateException("MergeResult.FIRE returned from once trigger");
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index af5abb1c9d774..f1adeedba09e4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -28,9 +28,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
@@ -275,11 +272,12 @@ public void onElement(WindowedValue<InputT> value) throws Exception {
           timerManager.currentWatermarkTime().isAfter(value.getTimestamp()));
 
       BitSet originalFinishedSet = (BitSet) finishedSet.clone();
-      OnElementEvent<W> e = new OnElementEvent<W>(value.getValue(), value.getTimestamp(), window);
+      OnElementContextImpl e = new OnElementContextImpl(
+          context(finishedSet), value.getValue(), value.getTimestamp(), window);
 
       // Update the trigger state as appropriate for the arrival of the element.
       // Must come before merge so the state is updated (for merging).
-      TriggerResult result = rootTrigger.invokeElement(context(finishedSet), e);
+      TriggerResult result = rootTrigger.invokeElement(e);
 
       // Make sure we merge before firing, in case a larger window is produced
       boolean stillExists = true;
@@ -333,18 +331,18 @@ public void onTimer(String timerTag) throws Exception {
       return;
     }
 
-    BitSet originalFinishedSet = (BitSet) finishedSet.clone();
-
     // Attempt to merge windows before continuing; that may remove the current window from
     // consideration.
     if (mergeIfAppropriate(window)) {
+      BitSet originalFinishedSet = (BitSet) finishedSet.clone();
       TriggerResult result = rootTrigger.invokeTimer(
-          context(finishedSet), new OnTimerEvent<W>(triggerId));
+          new OnTimerContextImpl(context(finishedSet), triggerId));
       handleResult(rootTrigger, window, originalFinishedSet, finishedSet, result);
     }
   }
 
-  private OnMergeEvent<W> createMergeEvent(Collection<W> toBeMerged, W resultWindow)
+  private OnMergeContextImpl createMergeEvent(
+      Trigger<W>.TriggerContext context, Collection<W> toBeMerged, W resultWindow)
       throws IOException {
     warmUpCache(
         toBeMerged.contains(resultWindow)
@@ -355,7 +353,7 @@ private OnMergeEvent<W> createMergeEvent(Collection<W> toBeMerged, W resultWindo
       finishedSets.put(window, lookupFinishedSet(window));
     }
 
-    return new OnMergeEvent<W>(toBeMerged, resultWindow, finishedSets.build());
+    return new OnMergeContextImpl(context, toBeMerged, resultWindow, finishedSets.build());
   }
 
   public void persist() throws Exception {
@@ -365,12 +363,12 @@ public void persist() throws Exception {
   }
 
   private void onMerge(Collection<W> toBeMerged, W resultWindow) throws Exception {
-    OnMergeEvent<W> e = createMergeEvent(toBeMerged, resultWindow);
     BitSet originalFinishedSet = lookupFinishedSet(resultWindow);
     BitSet finishedSet = (BitSet) originalFinishedSet.clone();
 
     Trigger<W>.TriggerContext context = context(finishedSet);
-    MergeResult result = rootTrigger.invokeMerge(context, e);
+    OnMergeContextImpl e = createMergeEvent(context, toBeMerged, resultWindow);
+    MergeResult result = rootTrigger.invokeMerge(e);
     if (MergeResult.ALREADY_FINISHED.equals(result)) {
       throw new IllegalStateException("Root trigger returned MergeResult.ALREADY_FINISHED.");
     }
@@ -626,6 +624,326 @@ public void setFinished(boolean finished) {
     }
   }
 
+  private class OnElementContextImpl extends Trigger<W>.OnElementContext {
+    public OnElementContextImpl(
+        Trigger<W>.TriggerContext delegate, Object value, Instant timestamp, W window) {
+      delegate.current().getSpec().super(value, timestamp, window);
+      this.delegate = delegate;
+    }
+
+    private Trigger<W>.TriggerContext delegate;
+
+    @Override
+    public Trigger<W>.OnElementContext forTrigger(ExecutableTrigger<W> trigger) {
+      return new OnElementContextImpl(delegate.forTrigger(trigger),
+          element(), eventTimestamp(), window());
+    }
+
+    @Override
+    public void setTimer(W window, Instant timestamp, TimeDomain timeDomain) throws IOException {
+      delegate.setTimer(window, timestamp, timeDomain);
+    }
+
+    @Override
+    public void deleteTimer(W window, TimeDomain timeDomain) throws IOException {
+      delegate.deleteTimer(window, timeDomain);
+    }
+
+    @Override
+    public Instant currentProcessingTime() {
+      return delegate.currentProcessingTime();
+    }
+
+    @Override
+    public <T> void store(CodedTupleTag<T> tag, W window, T value) throws IOException {
+      delegate.store(tag, window, value);
+    }
+
+    @Override
+    public <T> void remove(CodedTupleTag<T> tag, W window) throws IOException {
+      delegate.remove(tag, window);
+    }
+
+    @Override
+    public <T> T lookup(CodedTupleTag<T> tag, W window) throws IOException {
+      return delegate.lookup(tag, window);
+    }
+
+    @Override
+    public <T> Map<W, T> lookup(CodedTupleTag<T> tag, Iterable<W> windows) throws IOException {
+      return delegate.lookup(tag, windows);
+    }
+
+    @Override
+    public ExecutableTrigger<W> current() {
+      return delegate.current();
+    }
+
+    @Override
+    public Iterable<ExecutableTrigger<W>> subTriggers() {
+      return delegate.subTriggers();
+    }
+
+    @Override
+    public ExecutableTrigger<W> subTrigger(int subtriggerIndex) {
+      return delegate.subTrigger(subtriggerIndex);
+    }
+
+    @Override
+    public boolean isCurrentTrigger(int triggerIndex) {
+      return delegate.isCurrentTrigger(triggerIndex);
+    }
+
+    @Override
+    public ExecutableTrigger<W> nextStepTowards(int destinationIndex) {
+      return delegate.nextStepTowards(destinationIndex);
+    }
+
+    @Override
+    public boolean isFinished() {
+      return delegate.isFinished();
+    }
+
+    @Override
+    public boolean areAllSubtriggersFinished() {
+      return delegate.areAllSubtriggersFinished();
+    }
+
+    @Override
+    public Iterable<ExecutableTrigger<W>> unfinishedSubTriggers() {
+      return delegate.unfinishedSubTriggers();
+    }
+
+    @Override
+    public ExecutableTrigger<W> firstUnfinishedSubTrigger() {
+      return delegate.firstUnfinishedSubTrigger();
+    }
+
+    @Override
+    public void resetTree(W window) throws Exception {
+      delegate.resetTree(window);
+    }
+
+    @Override
+    public void setFinished(boolean finished) {
+      delegate.setFinished(finished);
+    }
+  }
+
+  private class OnMergeContextImpl extends Trigger<W>.OnMergeContext {
+
+    private final Trigger<W>.TriggerContext delegate;
+
+    public OnMergeContextImpl(
+        Trigger<W>.TriggerContext delegate,
+        Iterable<W> oldWindows, W newWindow, Map<W, BitSet> finishedSets) {
+      delegate.current().getSpec().super(oldWindows, newWindow, finishedSets);
+      this.delegate = delegate;
+    }
+
+    @Override
+    public Trigger<W>.OnMergeContext forTrigger(ExecutableTrigger<W> trigger) {
+      return new OnMergeContextImpl(delegate.forTrigger(trigger),
+          oldWindows(), newWindow(), finishedSets);
+    }
+
+    @Override
+    public void setTimer(W window, Instant timestamp, TimeDomain timeDomain) throws IOException {
+      delegate.setTimer(window, timestamp, timeDomain);
+    }
+
+    @Override
+    public void deleteTimer(W window, TimeDomain timeDomain) throws IOException {
+      delegate.deleteTimer(window, timeDomain);
+    }
+
+    @Override
+    public Instant currentProcessingTime() {
+      return delegate.currentProcessingTime();
+    }
+
+    @Override
+    public <T> void store(CodedTupleTag<T> tag, W window, T value) throws IOException {
+      delegate.store(tag, window, value);
+    }
+
+    @Override
+    public <T> void remove(CodedTupleTag<T> tag, W window) throws IOException {
+      delegate.remove(tag, window);
+    }
+
+    @Override
+    public <T> T lookup(CodedTupleTag<T> tag, W window) throws IOException {
+      return delegate.lookup(tag, window);
+    }
+
+    @Override
+    public <T> Map<W, T> lookup(CodedTupleTag<T> tag, Iterable<W> windows) throws IOException {
+      return delegate.lookup(tag, windows);
+    }
+
+    @Override
+    public ExecutableTrigger<W> current() {
+      return delegate.current();
+    }
+
+    @Override
+    public Iterable<ExecutableTrigger<W>> subTriggers() {
+      return delegate.subTriggers();
+    }
+
+    @Override
+    public ExecutableTrigger<W> subTrigger(int subtriggerIndex) {
+      return delegate.subTrigger(subtriggerIndex);
+    }
+
+    @Override
+    public boolean isCurrentTrigger(int triggerIndex) {
+      return delegate.isCurrentTrigger(triggerIndex);
+    }
+
+    @Override
+    public ExecutableTrigger<W> nextStepTowards(int destinationIndex) {
+      return delegate.nextStepTowards(destinationIndex);
+    }
+
+    @Override
+    public boolean isFinished() {
+      return delegate.isFinished();
+    }
+
+    @Override
+    public boolean areAllSubtriggersFinished() {
+      return delegate.areAllSubtriggersFinished();
+    }
+
+    @Override
+    public Iterable<ExecutableTrigger<W>> unfinishedSubTriggers() {
+      return delegate.unfinishedSubTriggers();
+    }
+
+    @Override
+    public ExecutableTrigger<W> firstUnfinishedSubTrigger() {
+      return delegate.firstUnfinishedSubTrigger();
+    }
+
+    @Override
+    public void resetTree(W window) throws Exception {
+      delegate.resetTree(window);
+    }
+
+    @Override
+    public void setFinished(boolean finished) {
+      delegate.setFinished(finished);
+    }
+  }
+
+  private class OnTimerContextImpl extends Trigger<W>.OnTimerContext {
+
+    private final Trigger<W>.TriggerContext delegate;
+    public OnTimerContextImpl(
+        Trigger<W>.TriggerContext delegate, TriggerId<W> triggerId) {
+      delegate.current().getSpec().super(triggerId);
+      this.delegate = delegate;
+    }
+
+    @Override
+    public Trigger<W>.OnTimerContext forTrigger(ExecutableTrigger<W> trigger) {
+      return new OnTimerContextImpl(delegate.forTrigger(trigger), destinationId);
+    }
+
+    @Override
+    public void setTimer(W window, Instant timestamp, TimeDomain timeDomain) throws IOException {
+      delegate.setTimer(window, timestamp, timeDomain);
+    }
+
+    @Override
+    public void deleteTimer(W window, TimeDomain timeDomain) throws IOException {
+      delegate.deleteTimer(window, timeDomain);
+    }
+
+    @Override
+    public Instant currentProcessingTime() {
+      return delegate.currentProcessingTime();
+    }
+
+    @Override
+    public <T> void store(CodedTupleTag<T> tag, W window, T value) throws IOException {
+      delegate.store(tag, window, value);
+    }
+
+    @Override
+    public <T> void remove(CodedTupleTag<T> tag, W window) throws IOException {
+      delegate.remove(tag, window);
+    }
+
+    @Override
+    public <T> T lookup(CodedTupleTag<T> tag, W window) throws IOException {
+      return delegate.lookup(tag, window);
+    }
+
+    @Override
+    public <T> Map<W, T> lookup(CodedTupleTag<T> tag, Iterable<W> windows) throws IOException {
+      return delegate.lookup(tag, windows);
+    }
+
+    @Override
+    public ExecutableTrigger<W> current() {
+      return delegate.current();
+    }
+
+    @Override
+    public Iterable<ExecutableTrigger<W>> subTriggers() {
+      return delegate.subTriggers();
+    }
+
+    @Override
+    public ExecutableTrigger<W> subTrigger(int subtriggerIndex) {
+      return delegate.subTrigger(subtriggerIndex);
+    }
+
+    @Override
+    public boolean isCurrentTrigger(int triggerIndex) {
+      return delegate.isCurrentTrigger(triggerIndex);
+    }
+
+    @Override
+    public ExecutableTrigger<W> nextStepTowards(int destinationIndex) {
+      return delegate.nextStepTowards(destinationIndex);
+    }
+
+    @Override
+    public boolean isFinished() {
+      return delegate.isFinished();
+    }
+
+    @Override
+    public boolean areAllSubtriggersFinished() {
+      return delegate.areAllSubtriggersFinished();
+    }
+
+    @Override
+    public Iterable<ExecutableTrigger<W>> unfinishedSubTriggers() {
+      return delegate.unfinishedSubTriggers();
+    }
+
+    @Override
+    public ExecutableTrigger<W> firstUnfinishedSubTrigger() {
+      return delegate.firstUnfinishedSubTrigger();
+    }
+
+    @Override
+    public void resetTree(W window) throws Exception {
+      delegate.resetTree(window);
+    }
+
+    @Override
+    public void setFinished(boolean finished) {
+      delegate.setFinished(finished);
+    }
+  }
+
+
   /**
    * Coder for Trigger IDs.
    */
@@ -665,7 +983,6 @@ public List<? extends Coder<?>> getCoderArguments() {
       return Arrays.asList(windowCoder);
     }
   }
-
   /**
    * Coder for the BitSet used to track child-trigger finished states.
    */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index 164b0a2c42e01..cf18fda86ac43 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -24,11 +24,7 @@
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
@@ -70,21 +66,16 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
-  @SuppressWarnings("unchecked")
-  private Trigger<IntervalWindow>.TriggerContext isTriggerContext() {
-    return Mockito.isA(TriggerContext.class);
-  }
-
   private void injectElement(int element, TriggerResult result1, TriggerResult result2)
       throws Exception {
     if (result1 != null) {
       when(mockTrigger1.onElement(
-          isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
+          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
           .thenReturn(result1);
     }
     if (result2 != null) {
       when(mockTrigger2.onElement(
-          isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
+          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
           .thenReturn(result2);
     }
     tester.injectElement(element, new Instant(element));
@@ -127,7 +118,7 @@ public void testOnTimerFire() throws Exception {
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
 
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executable1);
-    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+    when(mockTrigger1.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
     tester.advanceWatermark(new Instant(12));
 
@@ -146,7 +137,7 @@ public void testOnTimerFireAndFinish() throws Exception {
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executable2);
-    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+    when(mockTrigger2.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
 
     tester.advanceWatermark(new Instant(12));
@@ -171,11 +162,11 @@ public void testOnMergeFires() throws Exception {
     injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     when(mockTrigger1.onMerge(
-        isTriggerContext(), Mockito.<OnMergeEvent<IntervalWindow>>any()))
+        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.ALREADY_FINISHED);
 
     when(mockTrigger2.onMerge(
-        isTriggerContext(), Mockito.<OnMergeEvent<IntervalWindow>>any()))
+        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.FIRE_AND_FINISH);
 
     tester.doMerge();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index efaf84a47b174..82c91bc86195a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -24,11 +24,8 @@
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
@@ -68,21 +65,16 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
-  @SuppressWarnings("unchecked")
-  private Trigger<IntervalWindow>.TriggerContext isTriggerContext() {
-    return Mockito.isA(TriggerContext.class);
-  }
-
   private void injectElement(int element, TriggerResult result1, TriggerResult result2)
       throws Exception {
     if (result1 != null) {
       when(mockTrigger1.onElement(
-          isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
+          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
           .thenReturn(result1);
     }
     if (result2 != null) {
       when(mockTrigger2.onElement(
-          isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
+          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
           .thenReturn(result2);
     }
     tester.injectElement(element, new Instant(element));
@@ -133,7 +125,7 @@ public void testOnTimerFire() throws Exception {
     injectElement(1, TriggerResult.CONTINUE, null);
 
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executable1);
-    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.isA(OnTimerEvent.class)))
+    when(mockTrigger1.onTimer(Mockito.isA(OnTimerContext.class)))
         .thenReturn(TriggerResult.FIRE);
     tester.advanceWatermark(new Instant(12));
 
@@ -152,7 +144,7 @@ public void testOnTimerFinish() throws Exception {
     injectElement(1, TriggerResult.CONTINUE, null);
 
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executable1);
-    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.isA(OnTimerEvent.class)))
+    when(mockTrigger1.onTimer(Mockito.isA(OnTimerContext.class)))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
 
     tester.advanceWatermark(new Instant(12));
@@ -176,13 +168,12 @@ public void testOnMergeFinishes() throws Exception {
     injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     when(mockTrigger1.onMerge(
-        isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any()))
+        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.ALREADY_FINISHED);
 
     when(mockTrigger2.onMerge(
-        isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.FIRE_AND_FINISH);
+        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+        .thenReturn(MergeResult.FIRE_AND_FINISH);
     tester.doMerge();
 
     assertThat(tester.extractOutput(), Matchers.contains(
@@ -201,12 +192,10 @@ public void testOnMergeFires() throws Exception {
     injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     when(mockTrigger1.onMerge(
-        isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any()))
+        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.ALREADY_FINISHED);
     when(mockTrigger2.onMerge(
-        isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.FIRE);
+        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any())).thenReturn(MergeResult.FIRE);
     tester.doMerge();
 
     assertThat(tester.extractOutput(), Matchers.contains(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index e9f8cf2870a7e..8107ad57441be 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -24,11 +24,7 @@
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
@@ -70,21 +66,16 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
-  @SuppressWarnings("unchecked")
-  private Trigger<IntervalWindow>.TriggerContext isTriggerContext() {
-    return Mockito.isA(TriggerContext.class);
-  }
-
   private void injectElement(int element, TriggerResult result1, TriggerResult result2)
       throws Exception {
     if (result1 != null) {
       when(mockTrigger1.onElement(
-          isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
+          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
           .thenReturn(result1);
     }
     if (result2 != null) {
       when(mockTrigger2.onElement(
-          isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
+          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
           .thenReturn(result2);
     }
     tester.injectElement(element, new Instant(element));
@@ -126,7 +117,7 @@ public void testOnTimerFire() throws Exception {
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executable1);
-    when(mockTrigger1.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+    when(mockTrigger1.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
     tester.advanceWatermark(new Instant(12));
 
@@ -145,7 +136,7 @@ public void testOnTimerFinish() throws Exception {
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executable2);
-    when(mockTrigger2.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+    when(mockTrigger2.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
 
     tester.advanceWatermark(new Instant(12));
@@ -166,12 +157,11 @@ public void testOnMergeFires() throws Exception {
     injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     when(mockTrigger1.onMerge(
-        isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.CONTINUE);
+        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any())).thenReturn(MergeResult.CONTINUE);
 
     when(mockTrigger2.onMerge(
-        isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.FIRE_AND_FINISH);
+        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+        .thenReturn(MergeResult.FIRE_AND_FINISH);
     tester.doMerge();
 
     assertThat(tester.extractOutput(), Matchers.contains(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index 38a9420b2ab62..a35c1137d472a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -23,10 +23,6 @@
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
@@ -65,17 +61,11 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
-  @SuppressWarnings("unchecked")
-  private Trigger<IntervalWindow>.TriggerContext isTriggerContext() {
-    return Mockito.isA(TriggerContext.class);
-  }
-
-
   private void injectElement(int element, TriggerResult result1)
       throws Exception {
     if (result1 != null) {
       when(mockRepeated.onElement(
-          isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
+          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
           .thenReturn(result1);
     }
     tester.injectElement(element, new Instant(element));
@@ -107,28 +97,28 @@ public void testOnElementTimerFires() throws Exception {
     injectElement(1, TriggerResult.CONTINUE);
 
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executableRepeated);
-    when(mockRepeated.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+    when(mockRepeated.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE);
     tester.advanceWatermark(new Instant(12));
 
     injectElement(2, TriggerResult.CONTINUE);
 
     tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, executableRepeated);
-    when(mockRepeated.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+    when(mockRepeated.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
     tester.advanceWatermark(new Instant(13));
 
     injectElement(3, TriggerResult.CONTINUE);
 
     tester.setTimer(firstWindow, new Instant(13), TimeDomain.EVENT_TIME, executableRepeated);
-    when(mockRepeated.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+    when(mockRepeated.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
     tester.advanceWatermark(new Instant(14));
 
     injectElement(4, TriggerResult.CONTINUE);
 
     tester.setTimer(firstWindow, new Instant(14), TimeDomain.EVENT_TIME, executableRepeated);
-    when(mockRepeated.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+    when(mockRepeated.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE);
     tester.advanceWatermark(new Instant(15));
 
@@ -149,8 +139,8 @@ public void testMerge() throws Exception {
     injectElement(5, TriggerResult.CONTINUE);
 
     when(mockRepeated.onMerge(
-        isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.FIRE_AND_FINISH);
+        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+        .thenReturn(MergeResult.FIRE_AND_FINISH);
     tester.doMerge();
 
     assertThat(tester.extractOutput(), Matchers.contains(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
index 6b60b9fbe1433..c0010b258e146 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
@@ -69,20 +69,17 @@ protected TestTimeTrigger newWith(List<SerializableFunction<Instant, Instant>> t
     }
 
     @Override
-    public TriggerResult onElement(
-        TriggerContext c, OnElementEvent<IntervalWindow> e) throws Exception {
+    public TriggerResult onElement(OnElementContext c) throws Exception {
       return null;
     }
 
     @Override
-    public MergeResult onMerge(
-        TriggerContext c, OnMergeEvent<IntervalWindow> e) throws Exception {
+    public MergeResult onMerge(OnMergeContext c) throws Exception {
       return null;
     }
 
     @Override
-    public TriggerResult onTimer(
-        TriggerContext c, OnTimerEvent<IntervalWindow> e) throws Exception {
+    public TriggerResult onTimer(OnTimerContext c) throws Exception {
       return null;
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
index 117f42b75be87..57eae0dd7e93d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
@@ -24,12 +24,8 @@
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OrFinallyTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
@@ -71,21 +67,16 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
-  @SuppressWarnings("unchecked")
-  private Trigger<IntervalWindow>.TriggerContext isTriggerContext() {
-    return Mockito.isA(TriggerContext.class);
-  }
-
   private void injectElement(int element, TriggerResult result1, TriggerResult result2)
       throws Exception {
     if (result1 != null) {
       when(mockActual.onElement(
-          isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
+          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
           .thenReturn(result1);
     }
     if (result2 != null) {
       when(mockUntil.onElement(
-          isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
+          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
           .thenReturn(result2);
     }
     tester.injectElement(element, new Instant(element));
@@ -148,7 +139,7 @@ public void testOnTimerFiresWithUntil() throws Exception {
 
     // Timer fires for until, which says continue
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executableUntil);
-    when(mockUntil.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+    when(mockUntil.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
     tester.advanceWatermark(new Instant(12));
 
@@ -156,7 +147,7 @@ public void testOnTimerFiresWithUntil() throws Exception {
 
     // Timer fires for until, which says fire, so we stop repeating.
     tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, executableUntil);
-    when(mockUntil.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+    when(mockUntil.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
     tester.advanceWatermark(new Instant(13));
 
@@ -175,7 +166,7 @@ public void testOnTimerFinishesUntil() throws Exception {
 
     // Timer fires for until, which says continue
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executableUntil);
-    when(mockUntil.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+    when(mockUntil.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
     tester.advanceWatermark(new Instant(12));
 
@@ -185,7 +176,7 @@ public void testOnTimerFinishesUntil() throws Exception {
 
     // Timer fires for until, which says FIRE, so we fire and finish
     tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, executableUntil);
-    when(mockUntil.onTimer(isTriggerContext(), Mockito.<OnTimerEvent<IntervalWindow>>any()))
+    when(mockUntil.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
     tester.advanceWatermark(new Instant(13));
 
@@ -213,12 +204,10 @@ public void testMergeActualFires() throws Exception {
     injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     when(mockActual.onMerge(
-        isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.FIRE);
+        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any())).thenReturn(MergeResult.FIRE);
 
     when(mockUntil.onMerge(
-        isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.CONTINUE);
+        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any())).thenReturn(MergeResult.CONTINUE);
     tester.doMerge();
 
     assertThat(tester.extractOutput(), Matchers.contains(
@@ -236,12 +225,12 @@ public void testMergeUntilFires() throws Exception {
     injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     when(mockActual.onMerge(
-        isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.CONTINUE);
+        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+        .thenReturn(MergeResult.CONTINUE);
 
     when(mockUntil.onMerge(
-        isTriggerContext(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any())).thenReturn(MergeResult.FIRE_AND_FINISH);
+        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+        .thenReturn(MergeResult.FIRE_AND_FINISH);
 
     tester.doMerge();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
index 7d90f43ff5979..a13b8da46bd0c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
@@ -97,20 +97,19 @@ protected StubTrigger(Trigger<IntervalWindow>... subTriggers) {
 
     @Override
     public TriggerResult onElement(
-        TriggerContext c, OnElementEvent<IntervalWindow> e) throws Exception {
+        OnElementContext c) throws Exception {
       return TriggerResult.CONTINUE;
     }
 
     @Override
-    public MergeResult onMerge(TriggerContext c, OnMergeEvent<IntervalWindow> e)
+    public MergeResult onMerge(OnMergeContext c)
         throws Exception {
       return MergeResult.CONTINUE;
     }
 
     @Override
     public TriggerResult onTimer(
-        TriggerContext c,
-        OnTimerEvent<IntervalWindow> e)
+        OnTimerContext c)
         throws Exception {
       return TriggerResult.CONTINUE;
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index 981a458e5e1b7..d70d80346dd6c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -32,10 +32,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerEvent;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
@@ -66,16 +62,11 @@ public void setUp() {
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
-  @SuppressWarnings("unchecked")
-  private Trigger<IntervalWindow>.TriggerContext isTriggerContext() {
-    return Mockito.isA(TriggerContext.class);
-  }
-
   private void injectElement(TriggerTester<Integer, ?, IntervalWindow> tester,
       int element, TriggerResult result)
       throws Exception {
     when(mockTrigger.onElement(
-        isTriggerContext(), Mockito.<OnElementEvent<IntervalWindow>>any()))
+        Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
         .thenReturn(result);
     tester.injectElement(element, new Instant(element));
   }
@@ -276,13 +267,11 @@ public void testMergeBeforeFinalizing() throws Exception {
         Duration.millis(0));
 
     when(mockTrigger.onMerge(
-        Mockito.<Trigger<IntervalWindow>.TriggerContext>any(),
-        Mockito.<OnMergeEvent<IntervalWindow>>any()))
+        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.CONTINUE);
 
     when(mockTrigger.onTimer(
-        Mockito.<Trigger<IntervalWindow>.TriggerContext>any(),
-        Mockito.<OnTimerEvent<IntervalWindow>>any()))
+        Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE);
 
     // All on time data, verify watermark hold.

From 4bfff9c459b10896b364b4fa465c46f29bef8249 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 8 Jul 2015 09:43:21 -0700
Subject: [PATCH 0715/1541] Allow overwrite of registered Coder

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97782644
---
 .../dataflow/sdk/coders/CoderRegistry.java    | 41 ++++++++++---------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 90bae24a2e183..ca587c69933bd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -88,21 +88,20 @@ public void registerStandardCoders() {
   }
 
   /**
-   * Registers {@code coderClazz} as the default {@code Coder<T>}
-   * class to handle encoding and decoding instances of {@code clazz}
-   * of type {@code T}.
+   * Registers {@code coderClazz} as the default {@link Coder} class to handle encoding and
+   * decoding instances of {@code clazz}, overriding prior registrations if any exist.
    *
-   * <p> {@code coderClazz} should have a static factory method with the
-   * following signature:
+   * <p> Supposing {@code T} is the static type corresponding to the {@code clazz}, then
+   * {@code coderClazz} should have a static factory method with the following signature:
    *
    * <pre> {@code
    * public static Coder<T> of(Coder<X> argCoder1, Coder<Y> argCoder2, ...)
    * } </pre>
    *
-   * <p> This method will be called to create instances of {@code Coder<T>}
-   * for values of type {@code T}, passing Coders for each of the generic type
-   * parameters of {@code T}.  If {@code T} takes no generic type parameters,
-   * then the {@code of()} factory method should have no arguments.
+   * <p> This method will be called to create instances of {@code Coder<T>} for values of type
+   * {@code T}, passing Coders for each of the generic type parameters of {@code T}.  If {@code T}
+   * takes no generic type parameters, then the {@code of()} factory method should have no
+   * arguments.
    *
    * <p> If {@code T} is a parameterized type, then it should additionally
    * have a method with the following signature:
@@ -112,23 +111,27 @@ public void registerStandardCoders() {
    * } </pre>
    *
    * <p> This method will be called to decompose a value during the coder
-   * inference process, to automatically choose coders for the components
+   * inference process, to automatically choose coders for the components.
+   *
+   * @param clazz the class of objects to be encoded
+   * @param coderClazz a class with static factory methods to provide coders
    */
-  public void registerCoder(Class<?> clazz,
-                            Class<?> coderClazz) {
+  public void registerCoder(Class<?> clazz, Class<?> coderClazz) {
     registerCoder(clazz, CoderFactories.fromStaticMethods(coderClazz));
   }
 
-  public void registerCoder(Class<?> rawClazz,
-                            CoderFactory coderFactory) {
-    if (coderFactoryMap.put(rawClazz, coderFactory) != null) {
-      throw new IllegalArgumentException(
-          "Cannot register multiple default Coder factories for " + rawClazz);
-    }
+  /**
+   * Registers {@code coderFactory} as the default {@link CoderFactory} to produce {@code Coder}
+   * instances to decode and encode instances of {@code clazz}. This will override prior
+   * registrations if any exist.
+   */
+  public void registerCoder(Class<?> clazz, CoderFactory coderFactory) {
+    coderFactoryMap.put(clazz, coderFactory);
   }
 
   /**
-   * Registered the provided {@link Coder} for encoding all values of the specified {@code Class}.
+   * Register the provided {@link Coder} for encoding all values of the specified {@code Class}.
+   * This will override prior registrations if any exist.
    *
    * <p>Not for use with generic rawtypes. Instead, register a {@link CoderFactory} via
    * {@link #registerCoder(Class, CoderFactory)} or ensure your {@code Coder} class has the

From 0377302e75af327806a4c74293be75f1aa31a552 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 8 Jul 2015 09:55:54 -0700
Subject: [PATCH 0716/1541] Make the window available in the TriggerContext

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97783908
---
 .../sdk/transforms/windowing/AfterEach.java   | 12 +--
 .../sdk/transforms/windowing/AfterPane.java   |  6 +-
 .../windowing/AfterProcessingTime.java        | 10 +--
 .../AfterSynchronizedProcessingTime.java      | 10 +--
 .../transforms/windowing/AfterWatermark.java  | 20 ++---
 .../transforms/windowing/DefaultTrigger.java  |  6 +-
 .../sdk/transforms/windowing/Repeatedly.java  |  6 +-
 .../sdk/transforms/windowing/Trigger.java     | 38 +++-------
 .../dataflow/sdk/util/ExecutableTrigger.java  |  4 +-
 .../dataflow/sdk/util/TriggerExecutor.java    | 76 ++++++++++++-------
 .../sdk/util/ExecutableTriggerTest.java       |  4 +-
 11 files changed, 96 insertions(+), 96 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index 50a34e4bf026f..acbd6cd3f9104 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -87,13 +87,13 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
       MergeResult mergeResult = subTrigger.invokeMerge(c);
 
       if (MergeResult.CONTINUE.equals(mergeResult)) {
-        resetRemaining(c, c, iterator);
+        resetRemaining(c, iterator);
         return MergeResult.CONTINUE;
       } else if (MergeResult.FIRE.equals(mergeResult)) {
-        resetRemaining(c, c, iterator);
+        resetRemaining(c, iterator);
         return MergeResult.FIRE;
       } else if (MergeResult.FIRE_AND_FINISH.equals(mergeResult)) {
-        resetRemaining(c, c, iterator);
+        resetRemaining(c, iterator);
         return c.areAllSubtriggersFinished() ? MergeResult.FIRE_AND_FINISH : MergeResult.FIRE;
       }
     }
@@ -107,10 +107,10 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
     return MergeResult.ALREADY_FINISHED;
   }
 
-  private void resetRemaining(TriggerContext c, OnMergeContext e,
-      Iterator<ExecutableTrigger<W>> triggers) throws Exception {
+  private void resetRemaining(
+      TriggerContext c, Iterator<ExecutableTrigger<W>> triggers) throws Exception {
     while (triggers.hasNext()) {
-      c.forTrigger(triggers.next()).resetTree(e.newWindow());
+      c.forTrigger(triggers.next()).resetTree();
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index 1b34ba30c1997..b4c0acdbbc7f6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -86,7 +86,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
     if (count >= countElems) {
       return MergeResult.FIRE_AND_FINISH;
     } else {
-      c.store(ELEMENTS_IN_PANE_TAG, c.newWindow(), count);
+      c.store(ELEMENTS_IN_PANE_TAG, c.window(), count);
       return MergeResult.CONTINUE;
     }
   }
@@ -97,8 +97,8 @@ public TriggerResult onTimer(OnTimerContext c) {
   }
 
   @Override
-  public void clear(TriggerContext c, W window) throws Exception {
-    c.remove(ELEMENTS_IN_PANE_TAG, window);
+  public void clear(TriggerContext c) throws Exception {
+    c.remove(ELEMENTS_IN_PANE_TAG, c.window());
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 9449ac6a7e1f3..3fbf1805e4900 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -91,8 +91,8 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
     }
 
     if (earliestTimer != null) {
-      c.store(DELAYED_UNTIL_TAG, c.newWindow(), earliestTimer);
-      c.setTimer(c.newWindow(), earliestTimer, TimeDomain.PROCESSING_TIME);
+      c.store(DELAYED_UNTIL_TAG, c.window(), earliestTimer);
+      c.setTimer(c.window(), earliestTimer, TimeDomain.PROCESSING_TIME);
     }
 
     return MergeResult.CONTINUE;
@@ -104,9 +104,9 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
   }
 
   @Override
-  public void clear(TriggerContext c, W window) throws Exception {
-    c.remove(DELAYED_UNTIL_TAG, window);
-    c.deleteTimer(window, TimeDomain.PROCESSING_TIME);
+  public void clear(TriggerContext c) throws Exception {
+    c.remove(DELAYED_UNTIL_TAG, c.window());
+    c.deleteTimer(c.window(), TimeDomain.PROCESSING_TIME);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
index a64f36658c2b5..662c4a66d5dce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
@@ -67,8 +67,8 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
     }
 
     if (earliestTimer != null) {
-      c.store(DELAYED_UNTIL_TAG, c.newWindow(), earliestTimer);
-      c.setTimer(c.newWindow(), earliestTimer, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+      c.store(DELAYED_UNTIL_TAG, c.window(), earliestTimer);
+      c.setTimer(c.window(), earliestTimer, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
     }
 
     return MergeResult.CONTINUE;
@@ -80,9 +80,9 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
   }
 
   @Override
-  public void clear(TriggerContext c, W window) throws Exception {
-    c.remove(DELAYED_UNTIL_TAG, window);
-    c.deleteTimer(window, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+  public void clear(TriggerContext c) throws Exception {
+    c.remove(DELAYED_UNTIL_TAG, c.window());
+    c.deleteTimer(c.window(), TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 5793ece8bb2df..f4e6a0bb37bfd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -123,8 +123,8 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
       }
 
       if (earliestTimer != null) {
-        c.store(DELAYED_UNTIL_TAG, c.newWindow(), earliestTimer);
-        c.setTimer(c.newWindow(), earliestTimer, TimeDomain.EVENT_TIME);
+        c.store(DELAYED_UNTIL_TAG, c.window(), earliestTimer);
+        c.setTimer(c.window(), earliestTimer, TimeDomain.EVENT_TIME);
       }
 
       return MergeResult.CONTINUE;
@@ -136,9 +136,9 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
     }
 
     @Override
-    public void clear(TriggerContext c, W window) throws Exception {
-      c.remove(DELAYED_UNTIL_TAG, window);
-      c.deleteTimer(window, TimeDomain.EVENT_TIME);
+    public void clear(TriggerContext c) throws Exception {
+      c.remove(DELAYED_UNTIL_TAG, c.window());
+      c.deleteTimer(c.window(), TimeDomain.EVENT_TIME);
     }
 
     @Override
@@ -202,14 +202,14 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
       // then the watermark must also be past the end of this window. What's more, we've already
       // fired some elements for that trigger firing, so we report FINISHED (without firing).
       for (W finishedWindow : c.getFinishedMergingWindows(c.current())) {
-        if (finishedWindow.maxTimestamp().isAfter(c.newWindow().maxTimestamp())) {
+        if (finishedWindow.maxTimestamp().isAfter(c.window().maxTimestamp())) {
           return MergeResult.ALREADY_FINISHED;
         }
       }
 
       // Otherwise, set a timer for this window, and return.
-      c.setTimer(c.newWindow(),
-          computeTargetTimestamp(c.newWindow().maxTimestamp()), TimeDomain.EVENT_TIME);
+      c.setTimer(c.window(),
+          computeTargetTimestamp(c.window().maxTimestamp()), TimeDomain.EVENT_TIME);
       return MergeResult.CONTINUE;
     }
 
@@ -219,8 +219,8 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
     }
 
     @Override
-    public void clear(TriggerContext c, W window) throws Exception {
-      c.deleteTimer(window, TimeDomain.EVENT_TIME);
+    public void clear(TriggerContext c) throws Exception {
+      c.deleteTimer(c.window(), TimeDomain.EVENT_TIME);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
index 29b79e4135674..be132990d8aaa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
@@ -53,7 +53,7 @@ public TriggerResult onElement(OnElementContext c) throws Exception {
 
   @Override
   public MergeResult onMerge(OnMergeContext c) throws Exception {
-    c.setTimer(c.newWindow(), c.newWindow().maxTimestamp(), TimeDomain.EVENT_TIME);
+    c.setTimer(c.window(), c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
     return MergeResult.CONTINUE;
   }
 
@@ -63,8 +63,8 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
   }
 
   @Override
-  public void clear(TriggerContext c, W window) throws Exception {
-    c.deleteTimer(window, TimeDomain.EVENT_TIME);
+  public void clear(TriggerContext c) throws Exception {
+    c.deleteTimer(c.window(), TimeDomain.EVENT_TIME);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
index f53ddae47d804..acbbac2e0a371 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
@@ -63,7 +63,7 @@ public TriggerResult onElement(OnElementContext c)
       throws Exception {
     TriggerResult result = c.subTrigger(REPEATED).invokeElement(c);
     if (result.isFinish()) {
-      c.forTrigger(c.subTrigger(REPEATED)).resetTree(c.window());
+      c.forTrigger(c.subTrigger(REPEATED)).resetTree();
     }
     return result.isFire() ? TriggerResult.FIRE : TriggerResult.CONTINUE;
   }
@@ -72,7 +72,7 @@ public TriggerResult onElement(OnElementContext c)
   public MergeResult onMerge(OnMergeContext c) throws Exception {
     MergeResult mergeResult = c.subTrigger(REPEATED).invokeMerge(c);
     if (mergeResult.isFinish()) {
-      c.forTrigger(c.subTrigger(REPEATED)).resetTree(c.newWindow());
+      c.forTrigger(c.subTrigger(REPEATED)).resetTree();
     }
     return mergeResult.isFire() ? MergeResult.FIRE : MergeResult.CONTINUE;
   }
@@ -81,7 +81,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
   public TriggerResult onTimer(OnTimerContext c) throws Exception {
     TriggerResult result = c.subTrigger(REPEATED).invokeTimer(c);
     if (result.isFinish()) {
-      c.forTrigger(c.subTrigger(REPEATED)).resetTree(c.window());
+      c.forTrigger(c.subTrigger(REPEATED)).resetTree();
     }
     return result.isFire() ? TriggerResult.FIRE : TriggerResult.CONTINUE;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 9eb7dc2017559..98b5dc149cd80 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -276,12 +276,17 @@ public abstract <T> Map<W, T> lookup(
      * Clears all keyed state for triggers in the current sub-tree and unsets all the associated
      * finished bits.
      */
-    public abstract void resetTree(W window) throws Exception;
+    public abstract void resetTree() throws Exception;
 
     /**
      * Sets the finished bit for the current trigger.
      */
     public abstract void setFinished(boolean finished);
+
+    /**
+     * The window that the current context is executing in.
+     */
+    public abstract W window();
   }
 
   @Nullable
@@ -297,12 +302,10 @@ protected Trigger(@Nullable List<Trigger<W>> subTriggers) {
   public abstract class OnElementContext extends TriggerContext {
     private final Object value;
     private final Instant timestamp;
-    private final W window;
 
-    public OnElementContext(Object value, Instant timestamp, W window) {
+    public OnElementContext(Object value, Instant timestamp) {
       this.value = value;
       this.timestamp = timestamp;
-      this.window = window;
     }
 
     /**
@@ -319,13 +322,6 @@ public Instant eventTimestamp() {
       return timestamp;
     }
 
-    /**
-     * The window into which the element was assigned.
-     */
-    public W window() {
-      return window;
-    }
-
     /**
      * Create an {@code OnElementContext} for executing the given trigger.
      */
@@ -345,12 +341,10 @@ public W window() {
    */
   public abstract class OnMergeContext extends TriggerContext {
     private final Iterable<W> oldWindows;
-    private final W newWindow;
     protected final Map<W, BitSet> finishedSets;
 
-    public OnMergeContext(Iterable<W> oldWindows, W newWindow, Map<W, BitSet> finishedSets) {
+    public OnMergeContext(Iterable<W> oldWindows, Map<W, BitSet> finishedSets) {
       this.oldWindows = oldWindows;
-      this.newWindow = newWindow;
       this.finishedSets = finishedSets;
     }
 
@@ -361,13 +355,6 @@ public Iterable<W> oldWindows() {
       return oldWindows;
     }
 
-    /**
-     * The new window produced by merging the {@link #oldWindows()}.
-     */
-    public W newWindow() {
-      return newWindow;
-    }
-
     /** Return true if the trigger is finished in any window being merged. */
     public boolean finishedInAnyMergingWindow(ExecutableTrigger<W> trigger) {
       for (BitSet bitSet : finishedSets.values()) {
@@ -432,10 +419,6 @@ public OnTimerContext(TriggerId<W> destinationId) {
       this.destinationId = destinationId;
     }
 
-    public W window() {
-      return destinationId.window;
-    }
-
     public int getDestinationIndex() {
       return destinationId.getTriggerIdx();
     }
@@ -462,12 +445,11 @@ public int getDestinationIndex() {
    * of its state.
    *
    * @param c the context to interact with
-   * @param window the window that is being cleared
    */
-  public void clear(TriggerContext c, W window) throws Exception {
+  public void clear(TriggerContext c) throws Exception {
     if (subTriggers != null) {
       for (ExecutableTrigger<W> trigger : c.subTriggers()) {
-        trigger.invokeClear(c, window);
+        trigger.invokeClear(c);
       }
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
index 7d4ea78a32c52..8a6a0981a6b80 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
@@ -168,8 +168,8 @@ public MergeResult invokeMerge(Trigger<W>.OnMergeContext c) throws Exception {
   /**
    * Invoke clear for the current this trigger.
    */
-  public void invokeClear(Trigger<W>.TriggerContext c, W window) throws Exception {
-    trigger.clear(c.forTrigger(this), window);
+  public void invokeClear(Trigger<W>.TriggerContext c) throws Exception {
+    trigger.clear(c.forTrigger(this));
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index f1adeedba09e4..9605ebfd33ee2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -208,8 +208,8 @@ TriggerExecutor<K, InputT, OutputT, W> create(
         droppedDueToClosedWindow, droppedDueToLateness);
   }
 
-  private Trigger<W>.TriggerContext context(BitSet finishedSet) {
-    return new TriggerContextImpl(finishedSet, rootTrigger);
+  private Trigger<W>.TriggerContext context(BitSet finishedSet, W window) {
+    return new TriggerContextImpl(finishedSet, rootTrigger, window);
   }
 
   @VisibleForTesting BitSet lookupFinishedSet(W window) throws IOException {
@@ -273,7 +273,7 @@ public void onElement(WindowedValue<InputT> value) throws Exception {
 
       BitSet originalFinishedSet = (BitSet) finishedSet.clone();
       OnElementContextImpl e = new OnElementContextImpl(
-          context(finishedSet), value.getValue(), value.getTimestamp(), window);
+          context(finishedSet, window), value.getValue(), value.getTimestamp());
 
       // Update the trigger state as appropriate for the arrival of the element.
       // Must come before merge so the state is updated (for merging).
@@ -318,7 +318,7 @@ public void onTimer(String timerTag) throws Exception {
 
       // Perform final cleanup.
       activeWindows.remove(window);
-      rootTrigger.invokeClear(context(finishedSet), window);
+      rootTrigger.invokeClear(context(finishedSet, window));
       keyedState.remove(finishedSetTag(window));
       return;
     }
@@ -336,7 +336,7 @@ public void onTimer(String timerTag) throws Exception {
     if (mergeIfAppropriate(window)) {
       BitSet originalFinishedSet = (BitSet) finishedSet.clone();
       TriggerResult result = rootTrigger.invokeTimer(
-          new OnTimerContextImpl(context(finishedSet), triggerId));
+          new OnTimerContextImpl(context(finishedSet, window), triggerId));
       handleResult(rootTrigger, window, originalFinishedSet, finishedSet, result);
     }
   }
@@ -353,7 +353,7 @@ private OnMergeContextImpl createMergeEvent(
       finishedSets.put(window, lookupFinishedSet(window));
     }
 
-    return new OnMergeContextImpl(context, toBeMerged, resultWindow, finishedSets.build());
+    return new OnMergeContextImpl(context, toBeMerged, finishedSets.build());
   }
 
   public void persist() throws Exception {
@@ -366,7 +366,7 @@ private void onMerge(Collection<W> toBeMerged, W resultWindow) throws Exception
     BitSet originalFinishedSet = lookupFinishedSet(resultWindow);
     BitSet finishedSet = (BitSet) originalFinishedSet.clone();
 
-    Trigger<W>.TriggerContext context = context(finishedSet);
+    Trigger<W>.TriggerContext context = context(finishedSet, resultWindow);
     OnMergeContextImpl e = createMergeEvent(context, toBeMerged, resultWindow);
     MergeResult result = rootTrigger.invokeMerge(e);
     if (MergeResult.ALREADY_FINISHED.equals(result)) {
@@ -380,7 +380,7 @@ private void onMerge(Collection<W> toBeMerged, W resultWindow) throws Exception
     // Before we finish, we can clean up the state associated with the trigger in the old windows
     for (W windowBeingMerged : toBeMerged) {
       if (!resultWindow.equals(windowBeingMerged)) {
-        rootTrigger.invokeClear(context(lookupFinishedSet(windowBeingMerged)), windowBeingMerged);
+        rootTrigger.invokeClear(context(lookupFinishedSet(windowBeingMerged), windowBeingMerged));
         keyedState.remove(finishedSetTag(windowBeingMerged));
         deleteTimer(cleanupTimer(windowBeingMerged), TimeDomain.EVENT_TIME);
       }
@@ -429,7 +429,7 @@ private void handleResult(
     // If the trigger is finished, we can clear out its state as long as we keep the
     // IS_ROOT_FINISHED bit.
     if (result.isFinish()) {
-      trigger.invokeClear(context(finishedSet), window);
+      trigger.invokeClear(context(finishedSet, window));
     }
 
     if (!finishedSet.equals(originalFinishedSet)) {
@@ -483,11 +483,14 @@ private class TriggerContextImpl extends Trigger<W>.TriggerContext {
 
     private final BitSet finishedSet;
     private final ExecutableTrigger<W> trigger;
+    private final W window;
 
-    private TriggerContextImpl(BitSet finishedSet, ExecutableTrigger<W> trigger) {
+    private TriggerContextImpl(
+        BitSet finishedSet, ExecutableTrigger<W> trigger, W window) {
       trigger.getSpec().super();
       this.finishedSet = finishedSet;
       this.trigger = trigger;
+      this.window = window;
     }
 
     private TriggerId<W> triggerId(W window) {
@@ -552,7 +555,7 @@ public Instant currentProcessingTime() {
 
     @Override
     public Trigger<W>.TriggerContext forTrigger(ExecutableTrigger<W> trigger) {
-      return new TriggerContextImpl(finishedSet, trigger);
+      return new TriggerContextImpl(finishedSet, trigger, window);
     }
 
     @Override
@@ -613,21 +616,26 @@ public ExecutableTrigger<W> firstUnfinishedSubTrigger() {
     }
 
     @Override
-    public void resetTree(W window) throws Exception {
+    public void resetTree() throws Exception {
       finishedSet.clear(trigger.getTriggerIndex(), trigger.getFirstIndexAfterSubtree());
-      trigger.invokeClear(this, window);
+      trigger.invokeClear(this);
     }
 
     @Override
     public void setFinished(boolean finished) {
       finishedSet.set(trigger.getTriggerIndex(), finished);
     }
+
+    @Override
+    public W window() {
+      return window;
+    }
   }
 
   private class OnElementContextImpl extends Trigger<W>.OnElementContext {
     public OnElementContextImpl(
-        Trigger<W>.TriggerContext delegate, Object value, Instant timestamp, W window) {
-      delegate.current().getSpec().super(value, timestamp, window);
+        Trigger<W>.TriggerContext delegate, Object value, Instant timestamp) {
+      delegate.current().getSpec().super(value, timestamp);
       this.delegate = delegate;
     }
 
@@ -635,8 +643,7 @@ public OnElementContextImpl(
 
     @Override
     public Trigger<W>.OnElementContext forTrigger(ExecutableTrigger<W> trigger) {
-      return new OnElementContextImpl(delegate.forTrigger(trigger),
-          element(), eventTimestamp(), window());
+      return new OnElementContextImpl(delegate.forTrigger(trigger), element(), eventTimestamp());
     }
 
     @Override
@@ -720,14 +727,19 @@ public ExecutableTrigger<W> firstUnfinishedSubTrigger() {
     }
 
     @Override
-    public void resetTree(W window) throws Exception {
-      delegate.resetTree(window);
+    public void resetTree() throws Exception {
+      delegate.resetTree();
     }
 
     @Override
     public void setFinished(boolean finished) {
       delegate.setFinished(finished);
     }
+
+    @Override
+    public W window() {
+      return delegate.window();
+    }
   }
 
   private class OnMergeContextImpl extends Trigger<W>.OnMergeContext {
@@ -736,15 +748,14 @@ private class OnMergeContextImpl extends Trigger<W>.OnMergeContext {
 
     public OnMergeContextImpl(
         Trigger<W>.TriggerContext delegate,
-        Iterable<W> oldWindows, W newWindow, Map<W, BitSet> finishedSets) {
-      delegate.current().getSpec().super(oldWindows, newWindow, finishedSets);
+        Iterable<W> oldWindows, Map<W, BitSet> finishedSets) {
+      delegate.current().getSpec().super(oldWindows, finishedSets);
       this.delegate = delegate;
     }
 
     @Override
     public Trigger<W>.OnMergeContext forTrigger(ExecutableTrigger<W> trigger) {
-      return new OnMergeContextImpl(delegate.forTrigger(trigger),
-          oldWindows(), newWindow(), finishedSets);
+      return new OnMergeContextImpl(delegate.forTrigger(trigger), oldWindows(), finishedSets);
     }
 
     @Override
@@ -828,14 +839,19 @@ public ExecutableTrigger<W> firstUnfinishedSubTrigger() {
     }
 
     @Override
-    public void resetTree(W window) throws Exception {
-      delegate.resetTree(window);
+    public void resetTree() throws Exception {
+      delegate.resetTree();
     }
 
     @Override
     public void setFinished(boolean finished) {
       delegate.setFinished(finished);
     }
+
+    @Override
+    public W window() {
+      return delegate.window();
+    }
   }
 
   private class OnTimerContextImpl extends Trigger<W>.OnTimerContext {
@@ -933,16 +949,20 @@ public ExecutableTrigger<W> firstUnfinishedSubTrigger() {
     }
 
     @Override
-    public void resetTree(W window) throws Exception {
-      delegate.resetTree(window);
+    public void resetTree() throws Exception {
+      delegate.resetTree();
     }
 
     @Override
     public void setFinished(boolean finished) {
       delegate.setFinished(finished);
     }
-  }
 
+    @Override
+    public W window() {
+      return delegate.window();
+    }
+  }
 
   /**
    * Coder for Trigger IDs.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
index a13b8da46bd0c..742b801e14c28 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
@@ -115,9 +115,7 @@ public TriggerResult onTimer(
     }
 
     @Override
-    public void clear(
-        TriggerContext c,
-        IntervalWindow window) throws Exception {
+    public void clear(TriggerContext c) throws Exception {
     }
 
     @Override

From 6aa06d2b051e89168512a51d64028d89a20aef11 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 8 Jul 2015 10:37:10 -0700
Subject: [PATCH 0717/1541] Remove window parameter from Timer methods

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97788737
---
 .../windowing/AfterProcessingTime.java        |  6 ++--
 .../AfterSynchronizedProcessingTime.java      |  6 ++--
 .../transforms/windowing/AfterWatermark.java  | 14 ++++------
 .../transforms/windowing/DefaultTrigger.java  |  6 ++--
 .../sdk/transforms/windowing/Trigger.java     |  6 ++--
 .../dataflow/sdk/util/TriggerExecutor.java    | 28 +++++++++----------
 6 files changed, 31 insertions(+), 35 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 3fbf1805e4900..244caa80367df 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -66,7 +66,7 @@ public TriggerResult onElement(OnElementContext c)
     Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, c.window());
     if (delayUntil == null) {
       delayUntil = computeTargetTimestamp(c.currentProcessingTime());
-      c.setTimer(c.window(), delayUntil, TimeDomain.PROCESSING_TIME);
+      c.setTimer(delayUntil, TimeDomain.PROCESSING_TIME);
       c.store(DELAYED_UNTIL_TAG, c.window(), delayUntil);
     }
 
@@ -92,7 +92,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
 
     if (earliestTimer != null) {
       c.store(DELAYED_UNTIL_TAG, c.window(), earliestTimer);
-      c.setTimer(c.window(), earliestTimer, TimeDomain.PROCESSING_TIME);
+      c.setTimer(earliestTimer, TimeDomain.PROCESSING_TIME);
     }
 
     return MergeResult.CONTINUE;
@@ -106,7 +106,7 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
   @Override
   public void clear(TriggerContext c) throws Exception {
     c.remove(DELAYED_UNTIL_TAG, c.window());
-    c.deleteTimer(c.window(), TimeDomain.PROCESSING_TIME);
+    c.deleteTimer(TimeDomain.PROCESSING_TIME);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
index 662c4a66d5dce..baea947b187bf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
@@ -42,7 +42,7 @@ public TriggerResult onElement(OnElementContext c)
     Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, c.window());
     if (delayUntil == null) {
       delayUntil = c.currentProcessingTime();
-      c.setTimer(c.window(), delayUntil, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+      c.setTimer(delayUntil, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
       c.store(DELAYED_UNTIL_TAG, c.window(), delayUntil);
     }
 
@@ -68,7 +68,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
 
     if (earliestTimer != null) {
       c.store(DELAYED_UNTIL_TAG, c.window(), earliestTimer);
-      c.setTimer(c.window(), earliestTimer, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+      c.setTimer(earliestTimer, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
     }
 
     return MergeResult.CONTINUE;
@@ -82,7 +82,7 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
   @Override
   public void clear(TriggerContext c) throws Exception {
     c.remove(DELAYED_UNTIL_TAG, c.window());
-    c.deleteTimer(c.window(), TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+    c.deleteTimer(TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index f4e6a0bb37bfd..0ca27b44e5852 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -96,7 +96,7 @@ public TriggerResult onElement(OnElementContext c) throws Exception {
       Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, c.window());
       if (delayUntil == null) {
         delayUntil = computeTargetTimestamp(c.eventTimestamp());
-        c.setTimer(c.window(), delayUntil, TimeDomain.EVENT_TIME);
+        c.setTimer(delayUntil, TimeDomain.EVENT_TIME);
         c.store(DELAYED_UNTIL_TAG, c.window(), delayUntil);
       }
 
@@ -124,7 +124,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
 
       if (earliestTimer != null) {
         c.store(DELAYED_UNTIL_TAG, c.window(), earliestTimer);
-        c.setTimer(c.window(), earliestTimer, TimeDomain.EVENT_TIME);
+        c.setTimer(earliestTimer, TimeDomain.EVENT_TIME);
       }
 
       return MergeResult.CONTINUE;
@@ -138,7 +138,7 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
     @Override
     public void clear(TriggerContext c) throws Exception {
       c.remove(DELAYED_UNTIL_TAG, c.window());
-      c.deleteTimer(c.window(), TimeDomain.EVENT_TIME);
+      c.deleteTimer(TimeDomain.EVENT_TIME);
     }
 
     @Override
@@ -191,8 +191,7 @@ private FromEndOfWindow(
 
     @Override
     public TriggerResult onElement(OnElementContext c) throws Exception {
-      c.setTimer(c.window(),
-          computeTargetTimestamp(c.window().maxTimestamp()), TimeDomain.EVENT_TIME);
+      c.setTimer(computeTargetTimestamp(c.window().maxTimestamp()), TimeDomain.EVENT_TIME);
       return TriggerResult.CONTINUE;
     }
 
@@ -208,8 +207,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
       }
 
       // Otherwise, set a timer for this window, and return.
-      c.setTimer(c.window(),
-          computeTargetTimestamp(c.window().maxTimestamp()), TimeDomain.EVENT_TIME);
+      c.setTimer(computeTargetTimestamp(c.window().maxTimestamp()), TimeDomain.EVENT_TIME);
       return MergeResult.CONTINUE;
     }
 
@@ -220,7 +218,7 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
 
     @Override
     public void clear(TriggerContext c) throws Exception {
-      c.deleteTimer(c.window(), TimeDomain.EVENT_TIME);
+      c.deleteTimer(TimeDomain.EVENT_TIME);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
index be132990d8aaa..d13de3321e763 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
@@ -47,13 +47,13 @@ public static <W extends BoundedWindow> DefaultTrigger<W> of() {
 
   @Override
   public TriggerResult onElement(OnElementContext c) throws Exception {
-    c.setTimer(c.window(), c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
+    c.setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
     return TriggerResult.CONTINUE;
   }
 
   @Override
   public MergeResult onMerge(OnMergeContext c) throws Exception {
-    c.setTimer(c.window(), c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
+    c.setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
     return MergeResult.CONTINUE;
   }
 
@@ -64,7 +64,7 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
 
   @Override
   public void clear(TriggerContext c) throws Exception {
-    c.deleteTimer(c.window(), TimeDomain.EVENT_TIME);
+    c.deleteTimer(TimeDomain.EVENT_TIME);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 98b5dc149cd80..cbcb1be08dd1e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -182,19 +182,17 @@ public abstract class TriggerContext {
      * <p>Each trigger can have a single timer in per {@code timeDomain} and {@code window}. If the
      * trigger has already set a timer for a given domain and window, then setting overwrites it.
      *
-     * @param window the window the timer is being set for.
      * @param timestamp the time at which the trigger’s {@link Trigger#onTimer} callback should
      *        execute
      * @param timeDomain the domain that the {@code timestamp} applies to
      */
-    public abstract void setTimer(
-        W window, Instant timestamp, TimeDomain timeDomain) throws IOException;
+    public abstract void setTimer(Instant timestamp, TimeDomain timeDomain) throws IOException;
 
     /**
      * Removes the timer set in this trigger context for the given {@code window} and
      * {@code timeDomain}.
      */
-    public abstract void deleteTimer(W window, TimeDomain timeDomain) throws IOException;
+    public abstract void deleteTimer(TimeDomain timeDomain) throws IOException;
 
     /**
      * Returns the current processing time.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 9605ebfd33ee2..431b2b24732da 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -507,12 +507,12 @@ private <T> CodedTupleTag<T> codedTriggerIdTag(CodedTupleTag<T> tag, W window)
     }
 
     @Override
-    public void setTimer(W window, Instant timestamp, TimeDomain domain) throws IOException {
+    public void setTimer(Instant timestamp, TimeDomain domain) throws IOException {
       TriggerExecutor.this.setTimer(triggerId(window), timestamp, domain);
     }
 
     @Override
-    public void deleteTimer(W window, TimeDomain domain) throws IOException {
+    public void deleteTimer(TimeDomain domain) throws IOException {
       TriggerExecutor.this.deleteTimer(triggerId(window), domain);
     }
 
@@ -647,13 +647,13 @@ public Trigger<W>.OnElementContext forTrigger(ExecutableTrigger<W> trigger) {
     }
 
     @Override
-    public void setTimer(W window, Instant timestamp, TimeDomain timeDomain) throws IOException {
-      delegate.setTimer(window, timestamp, timeDomain);
+    public void setTimer(Instant timestamp, TimeDomain timeDomain) throws IOException {
+      delegate.setTimer(timestamp, timeDomain);
     }
 
     @Override
-    public void deleteTimer(W window, TimeDomain timeDomain) throws IOException {
-      delegate.deleteTimer(window, timeDomain);
+    public void deleteTimer(TimeDomain timeDomain) throws IOException {
+      delegate.deleteTimer(timeDomain);
     }
 
     @Override
@@ -759,13 +759,13 @@ public Trigger<W>.OnMergeContext forTrigger(ExecutableTrigger<W> trigger) {
     }
 
     @Override
-    public void setTimer(W window, Instant timestamp, TimeDomain timeDomain) throws IOException {
-      delegate.setTimer(window, timestamp, timeDomain);
+    public void setTimer(Instant timestamp, TimeDomain timeDomain) throws IOException {
+      delegate.setTimer(timestamp, timeDomain);
     }
 
     @Override
-    public void deleteTimer(W window, TimeDomain timeDomain) throws IOException {
-      delegate.deleteTimer(window, timeDomain);
+    public void deleteTimer(TimeDomain timeDomain) throws IOException {
+      delegate.deleteTimer(timeDomain);
     }
 
     @Override
@@ -869,13 +869,13 @@ public Trigger<W>.OnTimerContext forTrigger(ExecutableTrigger<W> trigger) {
     }
 
     @Override
-    public void setTimer(W window, Instant timestamp, TimeDomain timeDomain) throws IOException {
-      delegate.setTimer(window, timestamp, timeDomain);
+    public void setTimer(Instant timestamp, TimeDomain timeDomain) throws IOException {
+      delegate.setTimer(timestamp, timeDomain);
     }
 
     @Override
-    public void deleteTimer(W window, TimeDomain timeDomain) throws IOException {
-      delegate.deleteTimer(window, timeDomain);
+    public void deleteTimer(TimeDomain timeDomain) throws IOException {
+      delegate.deleteTimer(timeDomain);
     }
 
     @Override

From 7fa3533de9501e184bb9e6244bd399818cc619e0 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 8 Jul 2015 11:07:56 -0700
Subject: [PATCH 0718/1541] Cleanup hashCode and equals for some triggers

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97792503
---
 .../dataflow/sdk/transforms/windowing/AfterPane.java     | 9 +++++++--
 .../sdk/transforms/windowing/AfterProcessingTime.java    | 6 +++---
 .../sdk/transforms/windowing/AfterWatermark.java         | 8 ++++----
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index b4c0acdbbc7f6..9e99f50bb5bb4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -25,6 +25,7 @@
 
 import java.util.List;
 import java.util.Map.Entry;
+import java.util.Objects;
 
 /**
  * {@link Trigger}s that fire based on properties of the elements in the current pane.
@@ -103,8 +104,7 @@ public void clear(TriggerContext c) throws Exception {
 
   @Override
   public boolean isCompatible(Trigger<?> other) {
-    return (other instanceof AfterPane)
-        && countElems == ((AfterPane<?>) other).countElems;
+    return this.equals(other);
   }
 
   @Override
@@ -133,4 +133,9 @@ public boolean equals(Object obj) {
     AfterPane<?> that = (AfterPane<?>) obj;
     return this.countElems == that.countElems;
   }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(countElems);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 244caa80367df..c9b58ceb00e09 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -21,11 +21,11 @@
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.common.base.Objects;
 
 import org.joda.time.Instant;
 
 import java.util.List;
+import java.util.Objects;
 
 /**
  * {@code AfterProcessingTime} triggers fire based on the current processing time. They operate in
@@ -133,11 +133,11 @@ public boolean equals(Object obj) {
       return false;
     }
     AfterProcessingTime<?> that = (AfterProcessingTime<?>) obj;
-    return Objects.equal(this.timestampMappers, that.timestampMappers);
+    return Objects.equals(this.timestampMappers, that.timestampMappers);
   }
 
   @Override
   public int hashCode() {
-    return Objects.hashCode(this.timestampMappers);
+    return Objects.hash(getClass(), this.timestampMappers);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 0ca27b44e5852..cc8ea406249d9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -21,11 +21,11 @@
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.common.base.Objects;
 
 import org.joda.time.Instant;
 
 import java.util.List;
+import java.util.Objects;
 
 /**
  * <p>{@code AfterWatermark} triggers fire based on progress of the system watermark. This time is a
@@ -171,7 +171,7 @@ public boolean equals(Object obj) {
         return false;
       }
       FromFirstElementInPane<?> that = (FromFirstElementInPane<?>) obj;
-      return Objects.equal(this.timestampMappers, that.timestampMappers);
+      return Objects.equals(this.timestampMappers, that.timestampMappers);
     }
 
     @Override
@@ -251,12 +251,12 @@ public boolean equals(Object obj) {
         return false;
       }
       FromEndOfWindow<?> that = (FromEndOfWindow<?>) obj;
-      return Objects.equal(this.timestampMappers, that.timestampMappers);
+      return Objects.equals(this.timestampMappers, that.timestampMappers);
     }
 
     @Override
     public int hashCode() {
-      return Objects.hashCode(timestampMappers);
+      return Objects.hash(getClass(), timestampMappers);
     }
   }
 }

From 3641c8c807235b44ce7bd3a48d006d54d29726d6 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 8 Jul 2015 11:46:42 -0700
Subject: [PATCH 0719/1541] Move Timer and Merge specific methods into
 appropriate Contexts

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97796767
---
 .../sdk/transforms/windowing/AfterAll.java    |   4 +-
 .../sdk/transforms/windowing/AfterEach.java   |   4 +-
 .../sdk/transforms/windowing/AfterFirst.java  |   4 +-
 .../sdk/transforms/windowing/AfterPane.java   |   2 +-
 .../windowing/AfterProcessingTime.java        |   2 +-
 .../AfterSynchronizedProcessingTime.java      |   2 +-
 .../transforms/windowing/AfterWatermark.java  |   4 +-
 .../sdk/transforms/windowing/Trigger.java     |  59 +++-------
 .../dataflow/sdk/util/TriggerExecutor.java    | 108 +++++++-----------
 9 files changed, 64 insertions(+), 125 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
index d87035ad43094..9c41c78efbc4b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
@@ -88,11 +88,11 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
 
   @Override
   public TriggerResult onTimer(OnTimerContext c) throws Exception {
-    if (c.isCurrentTrigger(c.getDestinationIndex())) {
+    if (c.isDestination()) {
       throw new IllegalStateException("AfterAll shouldn't receive any timers.");
     }
 
-    ExecutableTrigger<W> subTrigger = c.nextStepTowards(c.getDestinationIndex());
+    ExecutableTrigger<W> subTrigger = c.nextStepTowardsDestination();
     subTrigger.invokeTimer(c);
     return wrapResult(c);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index acbd6cd3f9104..5b0648d62e5b9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -116,11 +116,11 @@ private void resetRemaining(
 
   @Override
   public TriggerResult onTimer(OnTimerContext c) throws Exception {
-    if (c.isCurrentTrigger(c.getDestinationIndex())) {
+    if (c.isDestination()) {
       throw new IllegalStateException("AfterEach shouldn't receive timers.");
     }
 
-    ExecutableTrigger<W> timerChild = c.nextStepTowards(c.getDestinationIndex());
+    ExecutableTrigger<W> timerChild = c.nextStepTowardsDestination();
     return wrapResult(c, timerChild.invokeTimer(c));
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
index 34670f694e954..762c749a6631c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
@@ -79,11 +79,11 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
 
   @Override
   public TriggerResult onTimer(OnTimerContext c) throws Exception {
-    if (c.isCurrentTrigger(c.getDestinationIndex())) {
+    if (c.isDestination()) {
       throw new IllegalStateException("AfterFirst shouldn't receive any timers.");
     }
 
-    ExecutableTrigger<W> subTrigger = c.nextStepTowards(c.getDestinationIndex());
+    ExecutableTrigger<W> subTrigger = c.nextStepTowardsDestination();
     return subTrigger.invokeTimer(c).isFire()
         ? TriggerResult.FIRE_AND_FINISH
         : TriggerResult.CONTINUE;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index 9e99f50bb5bb4..2e88035690b06 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -71,7 +71,7 @@ public TriggerResult onElement(OnElementContext c) throws Exception {
   public MergeResult onMerge(OnMergeContext c) throws Exception {
     // If we've already received enough elements and finished in some window, then this trigger
     // is just finished.
-    if (c.finishedInAnyMergingWindow(c.current())) {
+    if (c.finishedInAnyMergingWindow()) {
       return MergeResult.ALREADY_FINISHED;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index c9b58ceb00e09..3c4afbaf393ed 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -78,7 +78,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
     // If the processing time timer has fired in any of the windows being merged, it would have
     // fired at the same point if it had been added to the merged window. So, we just report it as
     // finished.
-    if (c.finishedInAnyMergingWindow(c.current())) {
+    if (c.finishedInAnyMergingWindow()) {
       return MergeResult.ALREADY_FINISHED;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
index baea947b187bf..eb5dbe46914f9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
@@ -54,7 +54,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
     // If the processing time timer has fired in any of the windows being merged, it would have
     // fired at the same point if it had been added to the merged window. So, we just report it as
     // finished.
-    if (c.finishedInAnyMergingWindow(c.current())) {
+    if (c.finishedInAnyMergingWindow()) {
       return MergeResult.ALREADY_FINISHED;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index cc8ea406249d9..ea2538814eaad 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -108,7 +108,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
       // If the watermark time timer has fired in any of the windows being merged, it would have
       // fired at the same point if it had been added to the merged window. So, we just record it as
       // finished.
-      if (c.finishedInAnyMergingWindow(c.current())) {
+      if (c.finishedInAnyMergingWindow()) {
         return MergeResult.ALREADY_FINISHED;
       }
 
@@ -200,7 +200,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
       // If the watermark was past the end of a window that is past the end of the new window,
       // then the watermark must also be past the end of this window. What's more, we've already
       // fired some elements for that trigger firing, so we report FINISHED (without firing).
-      for (W finishedWindow : c.getFinishedMergingWindows(c.current())) {
+      for (W finishedWindow : c.getFinishedMergingWindows()) {
         if (finishedWindow.maxTimestamp().isAfter(c.window().maxTimestamp())) {
           return MergeResult.ALREADY_FINISHED;
         }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index cbcb1be08dd1e..83fe3e27e8371 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -23,8 +23,6 @@
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Joiner;
 import com.google.common.base.Objects;
-import com.google.common.base.Predicate;
-import com.google.common.collect.Maps;
 
 import org.joda.time.Instant;
 
@@ -225,11 +223,6 @@ public abstract <T> Map<W, T> lookup(
      */
     public abstract TriggerContext forTrigger(ExecutableTrigger<W> trigger);
 
-    /**
-     * Access the executable version of the trigger currently being executed.
-     */
-    public abstract ExecutableTrigger<W> current();
-
     /**
      * Access the executable versions of the sub-triggers of the current trigger.
      */
@@ -240,16 +233,6 @@ public abstract <T> Map<W, T> lookup(
      */
     public abstract ExecutableTrigger<W> subTrigger(int subtriggerIndex);
 
-    /**
-     * Returns true if the given trigger index corresponds to the current trigger.
-     */
-    public abstract boolean isCurrentTrigger(int triggerIndex);
-
-    /**
-     * Returns the sub-trigger of the current trigger that is the next step towards the destination.
-     */
-    public abstract ExecutableTrigger<W> nextStepTowards(int destinationIndex);
-
     /**
      * Returns true if the current trigger is marked finished.
      */
@@ -354,34 +337,10 @@ public Iterable<W> oldWindows() {
     }
 
     /** Return true if the trigger is finished in any window being merged. */
-    public boolean finishedInAnyMergingWindow(ExecutableTrigger<W> trigger) {
-      for (BitSet bitSet : finishedSets.values()) {
-        if (bitSet.get(trigger.getTriggerIndex())) {
-          return true;
-        }
-      }
-      return false;
-    }
-
-    /** Return true if the trigger is finished in all the windows being merged. */
-    public boolean finishedInAllMergingWindows(ExecutableTrigger<W> trigger) {
-      for (BitSet bitSet : finishedSets.values()) {
-        if (!bitSet.get(trigger.getTriggerIndex())) {
-          return false;
-        }
-      }
-      return true;
-    }
+    public abstract boolean finishedInAnyMergingWindow();
 
     /** Return the merging windows in which the trigger is finished. */
-    public Iterable<W> getFinishedMergingWindows(final ExecutableTrigger<W> trigger) {
-      return Maps.filterValues(finishedSets, new Predicate<BitSet>() {
-        @Override
-        public boolean apply(BitSet input) {
-          return input.get(trigger.getTriggerIndex());
-        }
-      }).keySet();
-    }
+    public abstract Iterable<W> getFinishedMergingWindows();
 
     /**
      * Create an {@code OnMergeContext} for executing the given trigger.
@@ -421,6 +380,16 @@ public int getDestinationIndex() {
       return destinationId.getTriggerIdx();
     }
 
+    /**
+     * Returns the sub-trigger of the current trigger that is the next step towards the destination.
+     */
+    public abstract ExecutableTrigger<W> nextStepTowardsDestination();
+
+    /**
+     * Returns true if the given trigger index corresponds to the current trigger.
+     */
+    public abstract boolean isDestination();
+
     /**
      * Create an {@code OnTimerContext} for executing the given trigger.
      */
@@ -662,11 +631,11 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
 
     @Override
     public TriggerResult onTimer(OnTimerContext c) throws Exception {
-      if (c.isCurrentTrigger(c.getDestinationIndex())) {
+      if (c.isDestination()) {
         throw new IllegalStateException("OrFinally shouldn't receive any timers.");
       }
 
-      ExecutableTrigger<W> destination = c.nextStepTowards(c.getDestinationIndex());
+      ExecutableTrigger<W> destination = c.nextStepTowardsDestination();
       TriggerResult result = destination.invokeTimer(c);
       if (destination == c.subTrigger(UNTIL) && result.isFire()) {
         return TriggerResult.FIRE_AND_FINISH;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 431b2b24732da..4d94b928553f1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -45,6 +45,7 @@
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Iterables;
+import com.google.common.collect.Maps;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -208,7 +209,7 @@ TriggerExecutor<K, InputT, OutputT, W> create(
         droppedDueToClosedWindow, droppedDueToLateness);
   }
 
-  private Trigger<W>.TriggerContext context(BitSet finishedSet, W window) {
+  private TriggerContextImpl context(BitSet finishedSet, W window) {
     return new TriggerContextImpl(finishedSet, rootTrigger, window);
   }
 
@@ -342,7 +343,7 @@ public void onTimer(String timerTag) throws Exception {
   }
 
   private OnMergeContextImpl createMergeEvent(
-      Trigger<W>.TriggerContext context, Collection<W> toBeMerged, W resultWindow)
+      TriggerContextImpl context, Collection<W> toBeMerged, W resultWindow)
       throws IOException {
     warmUpCache(
         toBeMerged.contains(resultWindow)
@@ -366,7 +367,7 @@ private void onMerge(Collection<W> toBeMerged, W resultWindow) throws Exception
     BitSet originalFinishedSet = lookupFinishedSet(resultWindow);
     BitSet finishedSet = (BitSet) originalFinishedSet.clone();
 
-    Trigger<W>.TriggerContext context = context(finishedSet, resultWindow);
+    TriggerContextImpl context = context(finishedSet, resultWindow);
     OnMergeContextImpl e = createMergeEvent(context, toBeMerged, resultWindow);
     MergeResult result = rootTrigger.invokeMerge(e);
     if (MergeResult.ALREADY_FINISHED.equals(result)) {
@@ -554,25 +555,10 @@ public Instant currentProcessingTime() {
     }
 
     @Override
-    public Trigger<W>.TriggerContext forTrigger(ExecutableTrigger<W> trigger) {
+    public TriggerContextImpl forTrigger(ExecutableTrigger<W> trigger) {
       return new TriggerContextImpl(finishedSet, trigger, window);
     }
 
-    @Override
-    public ExecutableTrigger<W> current() {
-      return trigger;
-    }
-
-    @Override
-    public boolean isCurrentTrigger(int triggerIndex) {
-      return trigger.getTriggerIndex() == triggerIndex;
-    }
-
-    @Override
-    public ExecutableTrigger<W> nextStepTowards(int someTriggerIndex) {
-      return trigger.getSubTriggerContaining(someTriggerIndex);
-    }
-
     @Override
     public Iterable<ExecutableTrigger<W>> subTriggers() {
       return trigger.subTriggers();
@@ -633,14 +619,14 @@ public W window() {
   }
 
   private class OnElementContextImpl extends Trigger<W>.OnElementContext {
+    private final TriggerContextImpl delegate;
+
     public OnElementContextImpl(
-        Trigger<W>.TriggerContext delegate, Object value, Instant timestamp) {
-      delegate.current().getSpec().super(value, timestamp);
+        TriggerContextImpl delegate, Object value, Instant timestamp) {
+      delegate.trigger.getSpec().super(value, timestamp);
       this.delegate = delegate;
     }
 
-    private Trigger<W>.TriggerContext delegate;
-
     @Override
     public Trigger<W>.OnElementContext forTrigger(ExecutableTrigger<W> trigger) {
       return new OnElementContextImpl(delegate.forTrigger(trigger), element(), eventTimestamp());
@@ -681,11 +667,6 @@ public <T> Map<W, T> lookup(CodedTupleTag<T> tag, Iterable<W> windows) throws IO
       return delegate.lookup(tag, windows);
     }
 
-    @Override
-    public ExecutableTrigger<W> current() {
-      return delegate.current();
-    }
-
     @Override
     public Iterable<ExecutableTrigger<W>> subTriggers() {
       return delegate.subTriggers();
@@ -696,16 +677,6 @@ public ExecutableTrigger<W> subTrigger(int subtriggerIndex) {
       return delegate.subTrigger(subtriggerIndex);
     }
 
-    @Override
-    public boolean isCurrentTrigger(int triggerIndex) {
-      return delegate.isCurrentTrigger(triggerIndex);
-    }
-
-    @Override
-    public ExecutableTrigger<W> nextStepTowards(int destinationIndex) {
-      return delegate.nextStepTowards(destinationIndex);
-    }
-
     @Override
     public boolean isFinished() {
       return delegate.isFinished();
@@ -744,12 +715,12 @@ public W window() {
 
   private class OnMergeContextImpl extends Trigger<W>.OnMergeContext {
 
-    private final Trigger<W>.TriggerContext delegate;
+    private final TriggerContextImpl delegate;
 
     public OnMergeContextImpl(
-        Trigger<W>.TriggerContext delegate,
+        TriggerContextImpl delegate,
         Iterable<W> oldWindows, Map<W, BitSet> finishedSets) {
-      delegate.current().getSpec().super(oldWindows, finishedSets);
+      delegate.trigger.getSpec().super(oldWindows, finishedSets);
       this.delegate = delegate;
     }
 
@@ -793,11 +764,6 @@ public <T> Map<W, T> lookup(CodedTupleTag<T> tag, Iterable<W> windows) throws IO
       return delegate.lookup(tag, windows);
     }
 
-    @Override
-    public ExecutableTrigger<W> current() {
-      return delegate.current();
-    }
-
     @Override
     public Iterable<ExecutableTrigger<W>> subTriggers() {
       return delegate.subTriggers();
@@ -808,16 +774,6 @@ public ExecutableTrigger<W> subTrigger(int subtriggerIndex) {
       return delegate.subTrigger(subtriggerIndex);
     }
 
-    @Override
-    public boolean isCurrentTrigger(int triggerIndex) {
-      return delegate.isCurrentTrigger(triggerIndex);
-    }
-
-    @Override
-    public ExecutableTrigger<W> nextStepTowards(int destinationIndex) {
-      return delegate.nextStepTowards(destinationIndex);
-    }
-
     @Override
     public boolean isFinished() {
       return delegate.isFinished();
@@ -852,14 +808,33 @@ public void setFinished(boolean finished) {
     public W window() {
       return delegate.window();
     }
+
+    @Override
+    public boolean finishedInAnyMergingWindow() {
+      for (BitSet bitSet : finishedSets.values()) {
+        if (bitSet.get(delegate.trigger.getTriggerIndex())) {
+          return true;
+        }
+      }
+      return false;
+    }
+
+    @Override
+    public Iterable<W> getFinishedMergingWindows() {
+      return Maps.filterValues(finishedSets, new Predicate<BitSet>() {
+        @Override
+        public boolean apply(BitSet input) {
+          return input.get(delegate.trigger.getTriggerIndex());
+        }
+      }).keySet();
+    }
   }
 
   private class OnTimerContextImpl extends Trigger<W>.OnTimerContext {
 
-    private final Trigger<W>.TriggerContext delegate;
-    public OnTimerContextImpl(
-        Trigger<W>.TriggerContext delegate, TriggerId<W> triggerId) {
-      delegate.current().getSpec().super(triggerId);
+    private final TriggerContextImpl delegate;
+    public OnTimerContextImpl(TriggerContextImpl delegate, TriggerId<W> triggerId) {
+      delegate.trigger.getSpec().super(triggerId);
       this.delegate = delegate;
     }
 
@@ -903,11 +878,6 @@ public <T> Map<W, T> lookup(CodedTupleTag<T> tag, Iterable<W> windows) throws IO
       return delegate.lookup(tag, windows);
     }
 
-    @Override
-    public ExecutableTrigger<W> current() {
-      return delegate.current();
-    }
-
     @Override
     public Iterable<ExecutableTrigger<W>> subTriggers() {
       return delegate.subTriggers();
@@ -919,13 +889,13 @@ public ExecutableTrigger<W> subTrigger(int subtriggerIndex) {
     }
 
     @Override
-    public boolean isCurrentTrigger(int triggerIndex) {
-      return delegate.isCurrentTrigger(triggerIndex);
+    public boolean isDestination() {
+      return delegate.trigger.getTriggerIndex() == destinationId.getTriggerIdx();
     }
 
     @Override
-    public ExecutableTrigger<W> nextStepTowards(int destinationIndex) {
-      return delegate.nextStepTowards(destinationIndex);
+    public ExecutableTrigger<W> nextStepTowardsDestination() {
+      return delegate.trigger.getSubTriggerContaining(destinationId.getTriggerIdx());
     }
 
     @Override

From 796af68ef69e7c40c41866afe6176d29feffae8a Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Wed, 8 Jul 2015 11:51:36 -0700
Subject: [PATCH 0720/1541] Fix an invalid path on Windows platforms

Paths.get calls the platform's Filesystem.getPath, which validates that
the path contains legal characters for file names. Since * is an illegal
character for Windows, Paths.get can't be used for globs.
----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97797399
---
 .../java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java  | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
index 7a1b13d242228..30c6321f3f5dc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
@@ -47,7 +47,6 @@
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.PushbackInputStream;
-import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Random;
@@ -231,7 +230,7 @@ public void testMultipleFiles() throws Exception {
     }
 
     AvroSource<Bird> source =
-        AvroSource.from(Paths.get(tmpFolder.getRoot().toString(), baseName + "*").toString())
+        AvroSource.from(new File(tmpFolder.getRoot().toString(), baseName + "*").toString())
             .withSchema(Bird.class);
     List<Bird> actual = SourceTestUtils.readFromSource(source, null);
     assertThat(actual, containsInAnyOrder(expected.toArray()));

From 68bcd060b43e637dee14ecce40f5c37f8176b070 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 8 Jul 2015 13:16:39 -0700
Subject: [PATCH 0721/1541] Move OrFinallyTrigger into its own file

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97805739
---
 .../windowing/OrFinallyTrigger.java           |  91 +++++
 .../sdk/transforms/windowing/Trigger.java     |  69 ----
 .../windowing/OrFinallyTriggerTest.java       | 340 ++++++++++++++++++
 .../sdk/transforms/windowing/TriggerTest.java | 312 ----------------
 .../dataflow/sdk/util/CoderUtilsTest.java     |   5 +-
 5 files changed, 433 insertions(+), 384 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java
new file mode 100644
index 0000000000000..d6deb95ad25cc
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
+import com.google.common.annotations.VisibleForTesting;
+
+import org.joda.time.Instant;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Executes the {@code actual} trigger until it finishes or until the {@code until} trigger fires.
+ */
+class OrFinallyTrigger<W extends BoundedWindow> extends Trigger<W> {
+
+  private static final int ACTUAL = 0;
+  private static final int UNTIL = 1;
+
+  private static final long serialVersionUID = 0L;
+
+  @VisibleForTesting OrFinallyTrigger(Trigger<W> actual, Trigger.OnceTrigger<W> until) {
+    super(Arrays.asList(actual, until));
+  }
+
+  @Override
+  public Trigger.TriggerResult onElement(OnElementContext c) throws Exception {
+    Trigger.TriggerResult untilResult = c.subTrigger(UNTIL).invokeElement(c);
+    if (untilResult != TriggerResult.CONTINUE) {
+      return TriggerResult.FIRE_AND_FINISH;
+    }
+
+    return c.subTrigger(ACTUAL).invokeElement(c);
+  }
+
+  @Override
+  public Trigger.MergeResult onMerge(OnMergeContext c) throws Exception {
+    Trigger.MergeResult untilResult = c.subTrigger(UNTIL).invokeMerge(c);
+    if (untilResult == MergeResult.ALREADY_FINISHED) {
+      return MergeResult.ALREADY_FINISHED;
+    } else if (untilResult.isFire()) {
+      return MergeResult.FIRE_AND_FINISH;
+    } else {
+      // was CONTINUE -- so merge the underlying trigger
+      return c.subTrigger(ACTUAL).invokeMerge(c);
+    }
+  }
+
+  @Override
+  public Trigger.TriggerResult onTimer(OnTimerContext c) throws Exception {
+    if (c.isDestination()) {
+      throw new IllegalStateException("OrFinally shouldn't receive any timers.");
+    }
+
+    ExecutableTrigger<W> destination = c.nextStepTowardsDestination();
+    Trigger.TriggerResult result = destination.invokeTimer(c);
+    if (destination == c.subTrigger(UNTIL) && result.isFire()) {
+      return TriggerResult.FIRE_AND_FINISH;
+    }
+    return result;
+  }
+
+  @Override
+  public Instant getWatermarkThatGuaranteesFiring(W window) {
+    // This trigger fires once either the trigger or the until trigger fires.
+    Instant actualDeadline = subTriggers.get(ACTUAL).getWatermarkThatGuaranteesFiring(window);
+    Instant untilDeadline = subTriggers.get(UNTIL).getWatermarkThatGuaranteesFiring(window);
+    return actualDeadline.isBefore(untilDeadline) ? actualDeadline : untilDeadline;
+  }
+
+  @Override
+  public Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
+    return new OrFinallyTrigger<W>(
+        continuationTriggers.get(ACTUAL),
+        (Trigger.OnceTrigger<W>) continuationTriggers.get(UNTIL));
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 83fe3e27e8371..28222faca2ff2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -20,7 +20,6 @@
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Joiner;
 import com.google.common.base.Objects;
 
@@ -29,7 +28,6 @@
 import java.io.IOException;
 import java.io.Serializable;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.BitSet;
 import java.util.List;
 import java.util.Map;
@@ -591,71 +589,4 @@ public final OnceTrigger<W> getContinuationTrigger() {
       return (OnceTrigger<W>) continuation;
     }
   }
-
-  /**
-   * Executes the {@code actual} trigger until it finishes or until the {@code until} trigger fires.
-   */
-  @VisibleForTesting static class OrFinallyTrigger<W extends BoundedWindow> extends Trigger<W> {
-
-    private static final int ACTUAL = 0;
-    private static final int UNTIL = 1;
-
-    private static final long serialVersionUID = 0L;
-
-    @VisibleForTesting OrFinallyTrigger(Trigger<W> actual, OnceTrigger<W> until) {
-      super(Arrays.asList(actual, until));
-    }
-
-    @Override
-    public TriggerResult onElement(OnElementContext c) throws Exception {
-      TriggerResult untilResult = c.subTrigger(UNTIL).invokeElement(c);
-      if (untilResult != TriggerResult.CONTINUE) {
-        return TriggerResult.FIRE_AND_FINISH;
-      }
-
-      return c.subTrigger(ACTUAL).invokeElement(c);
-    }
-
-    @Override
-    public MergeResult onMerge(OnMergeContext c) throws Exception {
-      MergeResult untilResult = c.subTrigger(UNTIL).invokeMerge(c);
-      if (untilResult == MergeResult.ALREADY_FINISHED) {
-        return MergeResult.ALREADY_FINISHED;
-      } else if (untilResult.isFire()) {
-        return MergeResult.FIRE_AND_FINISH;
-      } else {
-        // was CONTINUE -- so merge the underlying trigger
-        return c.subTrigger(ACTUAL).invokeMerge(c);
-      }
-    }
-
-    @Override
-    public TriggerResult onTimer(OnTimerContext c) throws Exception {
-      if (c.isDestination()) {
-        throw new IllegalStateException("OrFinally shouldn't receive any timers.");
-      }
-
-      ExecutableTrigger<W> destination = c.nextStepTowardsDestination();
-      TriggerResult result = destination.invokeTimer(c);
-      if (destination == c.subTrigger(UNTIL) && result.isFire()) {
-        return TriggerResult.FIRE_AND_FINISH;
-      }
-      return result;
-    }
-
-    @Override
-    public Instant getWatermarkThatGuaranteesFiring(W window) {
-      // This trigger fires once either the trigger or the until trigger fires.
-      Instant actualDeadline = subTriggers.get(ACTUAL).getWatermarkThatGuaranteesFiring(window);
-      Instant untilDeadline = subTriggers.get(UNTIL).getWatermarkThatGuaranteesFiring(window);
-      return actualDeadline.isBefore(untilDeadline) ? actualDeadline : untilDeadline;
-    }
-
-    @Override
-    public Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
-      return new OrFinallyTrigger<W>(
-          continuationTriggers.get(ACTUAL),
-          (OnceTrigger<W>) continuationTriggers.get(UNTIL));
-    }
-  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
new file mode 100644
index 0000000000000..1b49a6dc3c6c1
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
@@ -0,0 +1,340 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
+import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.MockitoAnnotations;
+
+/**
+ * Tests for {@link OrFinallyTrigger}.
+ */
+@RunWith(JUnit4.class)
+public class OrFinallyTriggerTest {
+  @Mock private Trigger<IntervalWindow> mockActual;
+  @Mock private OnceTrigger<IntervalWindow> mockUntil;
+  private ExecutableTrigger<IntervalWindow> executableUntil;
+
+  private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
+  private IntervalWindow firstWindow;
+
+  public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
+    MockitoAnnotations.initMocks(this);
+
+    Trigger<IntervalWindow> underTest =
+        new OrFinallyTrigger<IntervalWindow>(mockActual, mockUntil);
+
+    tester = TriggerTester.nonCombining(
+        windowFn, underTest, AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
+    executableUntil = tester.getTrigger().subTriggers().get(1);
+    firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
+  }
+
+  private void injectElement(int element, TriggerResult result1, TriggerResult result2)
+      throws Exception {
+    if (result1 != null) {
+      when(mockActual.onElement(
+          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+          .thenReturn(result1);
+    }
+    if (result2 != null) {
+      when(mockUntil.onElement(
+          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+          .thenReturn(result2);
+    }
+    tester.injectElement(element, new Instant(element));
+  }
+
+  @Test
+  public void testOnElementActualFires() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.FIRE, TriggerResult.CONTINUE);
+    injectElement(2, TriggerResult.FIRE_AND_FINISH, TriggerResult.CONTINUE);
+
+    // This should do nothing (we've already fired and finished)
+    injectElement(3, TriggerResult.FIRE, TriggerResult.FIRE_AND_FINISH);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(2), 2, 0, 10)));
+    assertTrue(tester.isMarkedFinished(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        // We're storing that the root trigger has finished.
+        tester.finishedSet(firstWindow)));
+  }
+
+  @Test
+  public void testOnElementUntilFires() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertTrue(tester.isMarkedFinished(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        // We're storing that the root trigger has finished.
+        tester.finishedSet(firstWindow)));
+  }
+
+  @Test
+  public void testOnElementUntilFiresAndFinishes() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertTrue(tester.isMarkedFinished(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        // We're storing that the root trigger has finished.
+        tester.finishedSet(firstWindow)));
+  }
+
+  @Test
+  public void testOnTimerFiresWithUntil() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    // Timer fires for until, which says continue
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executableUntil);
+    when(mockUntil.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    tester.advanceWatermark(new Instant(12));
+
+    injectElement(2, TriggerResult.FIRE, TriggerResult.CONTINUE);
+
+    // Timer fires for until, which says fire, so we stop repeating.
+    tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, executableUntil);
+    when(mockUntil.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
+        .thenReturn(TriggerResult.FIRE_AND_FINISH);
+    tester.advanceWatermark(new Instant(13));
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertTrue(tester.isMarkedFinished(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.finishedSet(firstWindow)));
+  }
+
+  @Test
+  public void testOnTimerFinishesUntil() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    // Timer fires for until, which says continue
+    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executableUntil);
+    when(mockUntil.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    tester.advanceWatermark(new Instant(12));
+
+    injectElement(2, TriggerResult.FIRE, TriggerResult.CONTINUE);
+
+    injectElement(3, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    // Timer fires for until, which says FIRE, so we fire and finish
+    tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, executableUntil);
+    when(mockUntil.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
+        .thenReturn(TriggerResult.FIRE_AND_FINISH);
+    tester.advanceWatermark(new Instant(13));
+
+    // These timers shouldn't do anything -- at this point we've already finished
+    tester.setTimer(firstWindow, new Instant(13), TimeDomain.EVENT_TIME, executableUntil);
+    tester.advanceWatermark(new Instant(14));
+
+    tester.setTimer(firstWindow, new Instant(14), TimeDomain.EVENT_TIME, executableUntil);
+    tester.advanceWatermark(new Instant(15));
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 9, 0, 10)));
+    assertTrue(tester.isMarkedFinished(firstWindow));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        tester.finishedSet(firstWindow)));
+  }
+
+  @Test
+  public void testMergeActualFires() throws Exception {
+    setUp(Sessions.withGapDuration(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    when(mockActual.onMerge(
+        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any())).thenReturn(MergeResult.FIRE);
+
+    when(mockUntil.onMerge(
+        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any())).thenReturn(MergeResult.CONTINUE);
+    tester.doMerge();
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
+  }
+
+  @Test
+  public void testMergeUntilFires() throws Exception {
+    setUp(Sessions.withGapDuration(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    when(mockActual.onMerge(
+        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+        .thenReturn(MergeResult.CONTINUE);
+
+    when(mockUntil.onMerge(
+        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+        .thenReturn(MergeResult.FIRE_AND_FINISH);
+
+    tester.doMerge();
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
+    // the until fired during the merge
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        // We're storing that the root has finished
+        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
+  }
+
+  @Test
+  public void testFireDeadline() throws Exception {
+    BoundedWindow window = new IntervalWindow(new Instant(0), new Instant(10));
+
+    assertEquals(new Instant(9),
+        Repeatedly.forever(AfterWatermark.pastEndOfWindow())
+        .getWatermarkThatGuaranteesFiring(window));
+    assertEquals(new Instant(9), Repeatedly.forever(AfterWatermark.pastEndOfWindow())
+        .orFinally(AfterPane.elementCountAtLeast(1))
+        .getWatermarkThatGuaranteesFiring(window));
+    assertEquals(new Instant(9), Repeatedly.forever(AfterPane.elementCountAtLeast(1))
+        .orFinally(AfterWatermark.pastEndOfWindow())
+        .getWatermarkThatGuaranteesFiring(window));
+    assertEquals(new Instant(9),
+        AfterPane.elementCountAtLeast(100)
+            .orFinally(AfterWatermark.pastEndOfWindow())
+            .getWatermarkThatGuaranteesFiring(window));
+
+    assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
+        Repeatedly.forever(AfterPane.elementCountAtLeast(1))
+        .orFinally(AfterPane.elementCountAtLeast(10))
+        .getWatermarkThatGuaranteesFiring(window));
+  }
+
+  @Test
+  public void testOrFinallyRealTriggersFixedWindow() throws Exception {
+    // Test an orFinally with a composite trigger, and make sure it properly resets state, etc.
+    tester = TriggerTester.nonCombining(FixedWindows.of(Duration.millis(50)),
+        Repeatedly.<IntervalWindow>forever(
+            // This element count should never fire because the orFinally fires sooner, every time
+            AfterPane.<IntervalWindow>elementCountAtLeast(12)
+                .orFinally(AfterAll.<IntervalWindow>of(
+                    AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
+                        .plusDelayOf(Duration.millis(5)),
+                    AfterPane.<IntervalWindow>elementCountAtLeast(5)))),
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
+
+    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(50));
+
+    // First, fire processing time then the 5 element
+
+    tester.advanceProcessingTime(new Instant(0));
+    tester.injectElement(0, new Instant(0));
+    tester.injectElement(1, new Instant(0));
+    tester.injectElement(2, new Instant(1));
+    tester.injectElement(3, new Instant(1));
+    tester.advanceProcessingTime(new Instant(5));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+
+    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
+        Matchers.equalTo(tester.finishedSet(window)),
+        Matchers.equalTo(tester.bufferTag(window)),
+        Matchers.containsString("delayed-until"),
+        Matchers.containsString("elements-in-pane"),
+        Matchers.containsString("elements-in-pane"),
+        Matchers.equalTo(tester.earliestElementTag(window))));
+
+    tester.injectElement(4, new Instant(1));
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(0, 1, 2, 3, 4), 0, 0, 50)));
+
+    // Then fire 6 new elements, then processing time
+    tester.injectElement(6, new Instant(2));
+    tester.injectElement(7, new Instant(3));
+    tester.injectElement(8, new Instant(4));
+    tester.injectElement(9, new Instant(5));
+    tester.injectElement(10, new Instant(2));
+    tester.injectElement(11, new Instant(3));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    tester.advanceProcessingTime(new Instant(15));
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(6, 7, 8, 9, 10, 11), 2, 0, 50)));
+
+    // Finally, fire 3 more elements and verify the base of the orFinally doesn't fire.
+    tester.injectElement(100, new Instant(1));
+    tester.injectElement(101, new Instant(1));
+    tester.injectElement(102, new Instant(1));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+  }
+
+  @Test
+  public void testContinuation() throws Exception {
+    OnceTrigger<IntervalWindow> triggerA = AfterProcessingTime.pastFirstElementInPane();
+    OnceTrigger<IntervalWindow> triggerB = AfterWatermark.pastEndOfWindow();
+    Trigger<IntervalWindow> aOrFinallyB = triggerA.orFinally(triggerB);
+    Trigger<IntervalWindow> bOrFinallyA = triggerB.orFinally(triggerA);
+    assertEquals(
+        triggerA.getContinuationTrigger().orFinally(triggerB.getContinuationTrigger()),
+        aOrFinallyB.getContinuationTrigger());
+    assertEquals(
+        triggerB.getContinuationTrigger().orFinally(triggerA.getContinuationTrigger()),
+        bOrFinallyA.getContinuationTrigger());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
index 57eae0dd7e93d..83476dabffa81 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
@@ -13,317 +13,19 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.when;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OrFinallyTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
-import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.TriggerTester;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
-import org.hamcrest.Matchers;
-import org.joda.time.Duration;
-import org.joda.time.Instant;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
-import org.mockito.Mock;
-import org.mockito.Mockito;
-import org.mockito.MockitoAnnotations;
 
 /**
  * Tests for {@link Trigger}.
  */
 @RunWith(JUnit4.class)
 public class TriggerTest {
-  @Mock private Trigger<IntervalWindow> mockActual;
-  @Mock private OnceTrigger<IntervalWindow> mockUntil;
-  private ExecutableTrigger<IntervalWindow> executableUntil;
-
-  private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
-  private IntervalWindow firstWindow;
-
-  public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
-    MockitoAnnotations.initMocks(this);
-
-    Trigger<IntervalWindow> underTest =
-        new OrFinallyTrigger<IntervalWindow>(mockActual, mockUntil);
-
-    tester = TriggerTester.nonCombining(
-        windowFn, underTest, AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
-    executableUntil = tester.getTrigger().subTriggers().get(1);
-    firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
-  }
-
-  private void injectElement(int element, TriggerResult result1, TriggerResult result2)
-      throws Exception {
-    if (result1 != null) {
-      when(mockActual.onElement(
-          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-          .thenReturn(result1);
-    }
-    if (result2 != null) {
-      when(mockUntil.onElement(
-          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-          .thenReturn(result2);
-    }
-    tester.injectElement(element, new Instant(element));
-  }
-
-  @Test
-  public void testOnElementActualFires() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.FIRE, TriggerResult.CONTINUE);
-    injectElement(2, TriggerResult.FIRE_AND_FINISH, TriggerResult.CONTINUE);
-
-    // This should do nothing (we've already fired and finished)
-    injectElement(3, TriggerResult.FIRE, TriggerResult.FIRE_AND_FINISH);
-
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(2), 2, 0, 10)));
-    assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        // We're storing that the root trigger has finished.
-        tester.finishedSet(firstWindow)));
-  }
-
-  @Test
-  public void testOnElementUntilFires() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
-
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
-    assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        // We're storing that the root trigger has finished.
-        tester.finishedSet(firstWindow)));
-  }
-
-  @Test
-  public void testOnElementUntilFiresAndFinishes() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
-
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
-    assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        // We're storing that the root trigger has finished.
-        tester.finishedSet(firstWindow)));
-  }
-
-  @Test
-  public void testOnTimerFiresWithUntil() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-
-    // Timer fires for until, which says continue
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executableUntil);
-    when(mockUntil.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    tester.advanceWatermark(new Instant(12));
-
-    injectElement(2, TriggerResult.FIRE, TriggerResult.CONTINUE);
-
-    // Timer fires for until, which says fire, so we stop repeating.
-    tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, executableUntil);
-    when(mockUntil.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.FIRE_AND_FINISH);
-    tester.advanceWatermark(new Instant(13));
-
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
-    assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(firstWindow)));
-  }
-
-  @Test
-  public void testOnTimerFinishesUntil() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-
-    // Timer fires for until, which says continue
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executableUntil);
-    when(mockUntil.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    tester.advanceWatermark(new Instant(12));
-
-    injectElement(2, TriggerResult.FIRE, TriggerResult.CONTINUE);
-
-    injectElement(3, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-
-    // Timer fires for until, which says FIRE, so we fire and finish
-    tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, executableUntil);
-    when(mockUntil.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.FIRE_AND_FINISH);
-    tester.advanceWatermark(new Instant(13));
-
-    // These timers shouldn't do anything -- at this point we've already finished
-    tester.setTimer(firstWindow, new Instant(13), TimeDomain.EVENT_TIME, executableUntil);
-    tester.advanceWatermark(new Instant(14));
-
-    tester.setTimer(firstWindow, new Instant(14), TimeDomain.EVENT_TIME, executableUntil);
-    tester.advanceWatermark(new Instant(15));
-
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 9, 0, 10)));
-    assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(firstWindow)));
-  }
-
-  @Test
-  public void testMergeActualFires() throws Exception {
-    setUp(Sessions.withGapDuration(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-
-    when(mockActual.onMerge(
-        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any())).thenReturn(MergeResult.FIRE);
-
-    when(mockUntil.onMerge(
-        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any())).thenReturn(MergeResult.CONTINUE);
-    tester.doMerge();
-
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
-  }
-
-  @Test
-  public void testMergeUntilFires() throws Exception {
-    setUp(Sessions.withGapDuration(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-
-    when(mockActual.onMerge(
-        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
-        .thenReturn(MergeResult.CONTINUE);
-
-    when(mockUntil.onMerge(
-        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
-        .thenReturn(MergeResult.FIRE_AND_FINISH);
-
-    tester.doMerge();
-
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    // the until fired during the merge
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        // We're storing that the root has finished
-        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
-  }
-
-  @Test
-  public void testFireDeadline() throws Exception {
-    BoundedWindow window = new IntervalWindow(new Instant(0), new Instant(10));
-
-    assertEquals(new Instant(9),
-        Repeatedly.forever(AfterWatermark.pastEndOfWindow())
-        .getWatermarkThatGuaranteesFiring(window));
-    assertEquals(new Instant(9), Repeatedly.forever(AfterWatermark.pastEndOfWindow())
-        .orFinally(AfterPane.elementCountAtLeast(1))
-        .getWatermarkThatGuaranteesFiring(window));
-    assertEquals(new Instant(9), Repeatedly.forever(AfterPane.elementCountAtLeast(1))
-        .orFinally(AfterWatermark.pastEndOfWindow())
-        .getWatermarkThatGuaranteesFiring(window));
-    assertEquals(new Instant(9),
-        AfterPane.elementCountAtLeast(100)
-            .orFinally(AfterWatermark.pastEndOfWindow())
-            .getWatermarkThatGuaranteesFiring(window));
-
-    assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
-        Repeatedly.forever(AfterPane.elementCountAtLeast(1))
-        .orFinally(AfterPane.elementCountAtLeast(10))
-        .getWatermarkThatGuaranteesFiring(window));
-  }
-
-  @Test
-  public void testOrFinallyRealTriggersFixedWindow() throws Exception {
-    // Test an orFinally with a composite trigger, and make sure it properly resets state, etc.
-    tester = TriggerTester.nonCombining(FixedWindows.of(Duration.millis(50)),
-        Repeatedly.<IntervalWindow>forever(
-            // This element count should never fire because the orFinally fires sooner, every time
-            AfterPane.<IntervalWindow>elementCountAtLeast(12)
-                .orFinally(AfterAll.<IntervalWindow>of(
-                    AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
-                        .plusDelayOf(Duration.millis(5)),
-                    AfterPane.<IntervalWindow>elementCountAtLeast(5)))),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
-
-    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(50));
-
-    // First, fire processing time then the 5 element
-
-    tester.advanceProcessingTime(new Instant(0));
-    tester.injectElement(0, new Instant(0));
-    tester.injectElement(1, new Instant(0));
-    tester.injectElement(2, new Instant(1));
-    tester.injectElement(3, new Instant(1));
-    tester.advanceProcessingTime(new Instant(5));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        Matchers.equalTo(tester.finishedSet(window)),
-        Matchers.equalTo(tester.bufferTag(window)),
-        Matchers.containsString("delayed-until"),
-        Matchers.containsString("elements-in-pane"),
-        Matchers.containsString("elements-in-pane"),
-        Matchers.equalTo(tester.earliestElementTag(window))));
-
-    tester.injectElement(4, new Instant(1));
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(0, 1, 2, 3, 4), 0, 0, 50)));
-
-    // Then fire 6 new elements, then processing time
-    tester.injectElement(6, new Instant(2));
-    tester.injectElement(7, new Instant(3));
-    tester.injectElement(8, new Instant(4));
-    tester.injectElement(9, new Instant(5));
-    tester.injectElement(10, new Instant(2));
-    tester.injectElement(11, new Instant(3));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    tester.advanceProcessingTime(new Instant(15));
-
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(6, 7, 8, 9, 10, 11), 2, 0, 50)));
-
-    // Finally, fire 3 more elements and verify the base of the orFinally doesn't fire.
-    tester.injectElement(100, new Instant(1));
-    tester.injectElement(101, new Instant(1));
-    tester.injectElement(102, new Instant(1));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-  }
 
   @Test
   public void testTriggerToString() throws Exception {
@@ -331,18 +33,4 @@ public void testTriggerToString() throws Exception {
     assertEquals("Repeatedly(AfterWatermark.pastEndOfWindow([]))",
         Repeatedly.forever(AfterWatermark.pastEndOfWindow()).toString());
   }
-
-  @Test
-  public void testContinuation() throws Exception {
-    OnceTrigger<IntervalWindow> triggerA = AfterProcessingTime.pastFirstElementInPane();
-    OnceTrigger<IntervalWindow> triggerB = AfterWatermark.pastEndOfWindow();
-    Trigger<IntervalWindow> aOrFinallyB = triggerA.orFinally(triggerB);
-    Trigger<IntervalWindow> bOrFinallyA = triggerB.orFinally(triggerA);
-    assertEquals(
-        triggerA.getContinuationTrigger().orFinally(triggerB.getContinuationTrigger()),
-        aOrFinallyB.getContinuationTrigger());
-    assertEquals(
-        triggerB.getContinuationTrigger().orFinally(triggerA.getContinuationTrigger()),
-        bOrFinallyA.getContinuationTrigger());
-  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
index 47abc38d38c8b..d64d723e8a356 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
@@ -17,15 +17,14 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
-import static org.mockito.Mockito.any;
-import static org.mockito.Mockito.anyString;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyString;
 import static org.mockito.Mockito.doThrow;
 import static org.mockito.Mockito.mock;
 
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;

From 9b7012f815f1b2cbf114b99037b2f32f9ce773aa Mon Sep 17 00:00:00 2001
From: laraschmidt <laraschmidt@google.com>
Date: Wed, 8 Jul 2015 17:50:35 -0700
Subject: [PATCH 0722/1541] Clean up resources immediately and some small code
 cleanup

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97832317
---
 .../gcsio/GoogleCloudStorageReadChannel.java  | 53 ++++++++++---------
 1 file changed, 29 insertions(+), 24 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
index 335e74e79d715..0189c6b38db2d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
@@ -259,9 +259,6 @@ public int read(ByteBuffer buffer)
       return 0;
     }
 
-    // Perform a lazy seek if not done already.
-    performLazySeek();
-
     int totalBytesRead = 0;
     int retriesAttempted = 0;
 
@@ -269,6 +266,9 @@ public int read(ByteBuffer buffer)
     // in the first read. Therefore, loop till we either read the required number of
     // bytes or we reach end-of-stream.
     do {
+      // Perform a lazy seek if not done already.
+      performLazySeek();
+
       int remainingBeforeRead = buffer.remaining();
       try {
         int numBytesRead = readChannel.read(buffer);
@@ -296,6 +296,7 @@ public int read(ByteBuffer buffer)
         if (retriesAttempted == maxRetries) {
           LOG.warn("Already attempted max of {} retries while reading '{}'; throwing exception.",
               maxRetries, StorageResourceId.createReadableString(bucketName, objectName));
+          closeReadChannel();
           throw ioe;
         } else {
           if (retriesAttempted == 0) {
@@ -315,6 +316,7 @@ public int read(ByteBuffer buffer)
               LOG.warn("BackOff returned false; maximum total elapsed time exhausted. Giving up "
                       + "after {} retries for '{}'", retriesAttempted,
                       StorageResourceId.createReadableString(bucketName, objectName));
+              closeReadChannel();
               throw ioe;
             }
           } catch (InterruptedException ie) {
@@ -322,6 +324,7 @@ public int read(ByteBuffer buffer)
                 + "Giving up after {} retries for '{}'", retriesAttempted,
                 StorageResourceId.createReadableString(bucketName, objectName));
             ioe.addSuppressed(ie);
+            closeReadChannel();
             throw ioe;
           }
           LOG.info("Done sleeping before retry for '{}'; retry # {}.",
@@ -337,16 +340,10 @@ public int read(ByteBuffer buffer)
             currentPosition += partialRead;
           }
 
-          // Force the stream to be reopened by seeking to the current position.
-          long newPosition = currentPosition;
-          currentPosition = -1;
-          position(newPosition);
+          // Close the channel and mark it to be reopened on next performLazySeek.
+          closeReadChannel();
+          lazySeekPending = true;
 
-          // Before performing lazy seek, explicitly close the underlying channel if necessary.
-          if (lazySeekPending && readChannel != null) {
-            closeReadChannel();
-          }
-          performLazySeek();
         }
       } catch (RuntimeException r) {
         closeReadChannel();
@@ -398,7 +395,7 @@ public boolean isOpen() {
    * this is not possible, and the SSLSocketImpl was already responsible for performing local
    * cleanup at the time the exception was raised.
    */
-  private void closeReadChannel() {
+  protected void closeReadChannel() {
     if (readChannel != null) {
       try {
         readChannel.close();
@@ -652,19 +649,27 @@ protected InputStream openStreamAndSetMetadata(long newPosition)
             StorageResourceId.createReadableString(bucketName, objectName), newPosition);
         throw new IOException(msg, e);
       }
-    } catch (RuntimeException r) {
-      closeReadChannel();
-      throw r;
     }
-
-    InputStream content = response.getContent();
-    // If the file is gzip encoded, we requested the entire file and need to seek in the content
-    // to the desired position.  If it is not, we only requested the bytes we haven't read.
-    setSize(response, fileEncoding == FileEncoding.GZIPPED ? 0 : newPosition);
-    if (fileEncoding == FileEncoding.GZIPPED) {
-      content.skip(newPosition);
+    InputStream content = null;
+    try {
+      content = response.getContent();
+      // If the file is gzip encoded, we requested the entire file and need to seek in the content
+      // to the desired position.  If it is not, we only requested the bytes we haven't read.
+      setSize(response, fileEncoding == FileEncoding.GZIPPED ? 0 : newPosition);
+      if (fileEncoding == FileEncoding.GZIPPED) {
+        content.skip(newPosition);
+      }
+    } catch (IOException e) {
+      try {
+        if (content != null) {
+          content.close();
+        }
+      } catch (IOException closeException) {  // ignore error on close
+        LOG.debug("Caught exception on close after IOException thrown.", closeException);
+        e.addSuppressed(closeException);
+      }
+      throw e;
     }
-
     return content;
   }
 

From 78b5946d9a566f1b2fbf13af78156c06fe1835cd Mon Sep 17 00:00:00 2001
From: mshmulyan <mshmulyan@google.com>
Date: Thu, 9 Jul 2015 10:36:50 -0700
Subject: [PATCH 0723/1541] Windmill.proto changes. Adding counter field in
 ReportStatsRequest.

It shall be used to send updates for the global counters. First intended usage is to report java harness restarts.

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97886969
---
 sdk/src/main/proto/windmill.proto | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index 37326ba60b75b..1acfd694b9ba2 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -254,6 +254,7 @@ message ReportStatsRequest {
   required bytes key = 2;
   required fixed64 work_token = 3;
   repeated Exception exceptions = 4;
+  repeated Counter counter_updates = 5;
 }
 
 message ReportStatsResponse {

From a1b899aad8a5f7fd47e7d7df2045bbc35b1b40b3 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 9 Jul 2015 11:24:14 -0700
Subject: [PATCH 0724/1541] Windmill API changes for supporting timestamps and
 ids in PubsubIO.Write.

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97892089
---
 sdk/src/main/proto/windmill.proto | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index 1acfd694b9ba2..b7e15bcf548a6 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -66,6 +66,8 @@ message OutputMessageBundle {
 message PubSubMessageBundle {
   required string topic = 1;
   repeated Message messages = 2;
+  optional string timestamp_label = 3;
+  optional string id_label = 4;
 }
 
 message TimerBundle {

From 79712241318ccb60d13b039c44a1b743b49988d0 Mon Sep 17 00:00:00 2001
From: mwegiel <mwegiel@google.com>
Date: Thu, 9 Jul 2015 11:26:50 -0700
Subject: [PATCH 0725/1541] Implementation of shuffle load balancing for
 Dataflow

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97892311
---
 .../sdk/runners/worker/AvroSinkFactory.java   |  4 +-
 .../worker/MapTaskExecutorFactory.java        |  3 +-
 .../sdk/runners/worker/PubsubSink.java        |  4 +-
 .../sdk/runners/worker/ShuffleSink.java       | 57 +++++++++++++++----
 .../runners/worker/ShuffleSinkFactory.java    | 12 ++--
 .../sdk/runners/worker/SinkFactory.java       |  8 ++-
 .../sdk/runners/worker/TextSinkFactory.java   |  4 +-
 .../sdk/runners/worker/WindmillSink.java      |  5 +-
 .../runners/worker/AvroSinkFactoryTest.java   |  2 +-
 .../worker/GroupingShuffleReaderTest.java     |  8 ++-
 .../worker/PartitioningShuffleReaderTest.java |  8 ++-
 .../worker/ShuffleSinkFactoryTest.java        |  6 +-
 .../sdk/runners/worker/ShuffleSinkTest.java   | 22 +++++--
 .../sdk/runners/worker/SinkFactoryTest.java   | 19 +++++--
 .../runners/worker/TextSinkFactoryTest.java   |  6 +-
 .../worker/UngroupedShuffleReaderTest.java    |  8 ++-
 16 files changed, 136 insertions(+), 40 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
index 001d624223b37..c4b68eb8f2997 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
@@ -24,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.ValueOnlyWindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 
 /**
@@ -37,7 +38,8 @@ private AvroSinkFactory() {}
   public static <T> Sink<T> create(PipelineOptions options,
                                    CloudObject spec,
                                    Coder<T> coder,
-                                   ExecutionContext executionContext)
+                                   ExecutionContext executionContext,
+                                   CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
     return create(spec, coder);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index 56c8b5a23d2c0..ee0725861911d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -149,7 +149,8 @@ static WriteOperation createWriteOperation(PipelineOptions options,
       CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) throws Exception {
     WriteInstruction write = instruction.getWrite();
 
-    Sink sink = SinkFactory.create(options, write.getSink(), executionContext);
+    @SuppressWarnings("unchecked")
+    Sink sink = SinkFactory.create(options, write.getSink(), executionContext, addCounterMutator);
 
     OutputReceiver[] receivers =
         createOutputReceivers(instruction, counterPrefix, addCounterMutator, stateSampler, 0);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
index 9fd54fd437f8b..60ba95d907cb0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.protobuf.ByteString;
 
@@ -52,7 +53,8 @@ class PubsubSink<T> extends Sink<WindowedValue<T>> {
   public static <T> PubsubSink<T> create(PipelineOptions options,
                                          CloudObject spec,
                                          Coder<WindowedValue<T>> coder,
-                                         ExecutionContext context)
+                                         ExecutionContext context,
+                                         CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
     String topic = getString(spec, "pubsub_topic");
     return new PubsubSink<>(topic, coder, (StreamingModeExecutionContext) context);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
index 159d19e394101..6cde042495346 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
@@ -16,16 +16,21 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+
 import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -51,6 +56,10 @@ enum ShuffleKind {
 
   final ShuffleKind shuffleKind;
 
+  final PipelineOptions options;
+
+  final CounterSet.AddCounterMutator addCounterMutator;
+
   boolean shardByKey;
   boolean groupValues;
   boolean sortValues;
@@ -72,9 +81,12 @@ public static ShuffleKind parseShuffleKind(String shuffleKind) throws Exception
   }
 
   public ShuffleSink(PipelineOptions options, byte[] shuffleWriterConfig, ShuffleKind shuffleKind,
-      Coder<WindowedValue<T>> coder) throws Exception {
+      Coder<WindowedValue<T>> coder, CounterSet.AddCounterMutator addCounterMutator)
+      throws Exception {
     this.shuffleWriterConfig = shuffleWriterConfig;
     this.shuffleKind = shuffleKind;
+    this.options = options;
+    this.addCounterMutator = addCounterMutator;
     initCoder(coder);
   }
 
@@ -144,19 +156,38 @@ private void initCoder(Coder<WindowedValue<T>> coder) throws Exception {
 
   /**
    * Returns a SinkWriter that allows writing to this ShuffleSink,
-   * using the given ShuffleEntryWriter.
+   * using the given ShuffleEntryWriter. The dataset ID is used to
+   * construct names of counters that track per-worker per-dataset
+   * bytes written to shuffle.
    */
-  public SinkWriter<WindowedValue<T>> writer(ShuffleEntryWriter writer) throws IOException {
-    return new ShuffleSinkWriter(writer);
+  public SinkWriter<WindowedValue<T>> writer(ShuffleEntryWriter writer,
+                                             String datasetId) throws IOException {
+    return new ShuffleSinkWriter(writer, options, addCounterMutator, datasetId);
   }
 
   /** The SinkWriter for a ShuffleSink. */
   class ShuffleSinkWriter implements SinkWriter<WindowedValue<T>> {
-    ShuffleEntryWriter writer;
-    long seqNum = 0;
+    private static final String COUNTER_WORKER_PREFIX = "worker-";
+    private static final String COUNTER_DATASET_PREFIX = "-dataset-";
+    private static final String COUNTER_SUFFIX = "-shuffle-bytes";
 
-    ShuffleSinkWriter(ShuffleEntryWriter writer) throws IOException {
+    private ShuffleEntryWriter writer;
+    private long seqNum = 0;
+    private Counter<Long> perWorkerPerDatasetBytesCounter;
+
+    ShuffleSinkWriter(
+        ShuffleEntryWriter writer,
+        PipelineOptions options,
+        CounterSet.AddCounterMutator addCounterMutator,
+        String datasetId) throws IOException {
       this.writer = writer;
+      DataflowWorkerHarnessOptions dataflowOptions =
+          options.as(DataflowWorkerHarnessOptions.class);
+      this.perWorkerPerDatasetBytesCounter = addCounterMutator.addCounter(
+          Counter.longs(
+              COUNTER_WORKER_PREFIX + dataflowOptions.getWorkerId()
+              + COUNTER_DATASET_PREFIX + datasetId + COUNTER_SUFFIX,
+              SUM));
     }
 
     @Override
@@ -224,7 +255,11 @@ public long add(WindowedValue<T> windowedElem) throws IOException {
         valueBytes = CoderUtils.encodeToByteArray(windowedElemCoder, windowedElem);
       }
 
-      return writer.put(new ShuffleEntry(keyBytes, secondaryKeyBytes, valueBytes));
+      long bytes = writer.put(new ShuffleEntry(keyBytes, secondaryKeyBytes, valueBytes));
+      if (perWorkerPerDatasetBytesCounter != null) {
+        perWorkerPerDatasetBytesCounter.addValue(bytes);
+      }
+      return bytes;
     }
 
     @Override
@@ -236,7 +271,9 @@ public void close() throws IOException {
   @Override
   public SinkWriter<WindowedValue<T>> writer() throws IOException {
     Preconditions.checkArgument(shuffleWriterConfig != null);
-    return writer(new ChunkingShuffleEntryWriter(
-        new ApplianceShuffleWriter(shuffleWriterConfig, SHUFFLE_WRITER_BUFFER_SIZE)));
+    ApplianceShuffleWriter applianceWriter = new ApplianceShuffleWriter(
+        shuffleWriterConfig, SHUFFLE_WRITER_BUFFER_SIZE);
+    String datasetId = applianceWriter.getDatasetId();
+    return writer(new ChunkingShuffleEntryWriter(applianceWriter), datasetId);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactory.java
index 91572adced0d4..8f3897541f4c3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactory.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 
 /**
  * Creates a ShuffleSink from a CloudObject spec.
@@ -37,19 +38,22 @@ private ShuffleSinkFactory() {}
   public static <T> ShuffleSink<T> create(PipelineOptions options,
                                           CloudObject spec,
                                           Coder<WindowedValue<T>> coder,
-                                          ExecutionContext executionContext)
+                                          ExecutionContext executionContext,
+                                          CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
-    return create(options, spec, coder);
+    return create(options, spec, coder, addCounterMutator);
   }
 
   static <T> ShuffleSink<T> create(PipelineOptions options,
                                    CloudObject spec,
-                                   Coder<WindowedValue<T>> coder)
+                                   Coder<WindowedValue<T>> coder,
+                                   CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
     return new ShuffleSink<>(
         options,
         decodeBase64(getString(spec, PropertyNames.SHUFFLE_WRITER_CONFIG, null)),
         parseShuffleKind(getString(spec, PropertyNames.SHUFFLE_KIND)),
-        coder);
+        coder,
+        addCounterMutator);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
index 787c50e02e146..381174c60359b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
 import com.google.cloud.dataflow.sdk.util.Serializer;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
@@ -35,7 +36,8 @@
  * following signature:
  * <pre> {@code
  * static SomeSinkSubclass<T> create(PipelineOptions, CloudObject,
- *                                   Coder<T>, ExecutionContext);
+ *                                   Coder<T>, ExecutionContext,
+ *                                   CounterSet.AddCounterMutator);
  * } </pre>
  */
 public final class SinkFactory {
@@ -71,7 +73,8 @@ private SinkFactory() {}
   public static <T> Sink<T> create(
       PipelineOptions options,
       com.google.api.services.dataflow.model.Sink cloudSink,
-      ExecutionContext executionContext)
+      ExecutionContext executionContext,
+      CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
     Coder<T> coder = Serializer.deserialize(cloudSink.getCodec(), Coder.class);
     CloudObject object = CloudObject.fromSpec(cloudSink.getSpec());
@@ -89,6 +92,7 @@ public static <T> Sink<T> create(
           .withArg(CloudObject.class, object)
           .withArg(Coder.class, coder)
           .withArg(ExecutionContext.class, executionContext)
+          .withArg(CounterSet.AddCounterMutator.class, addCounterMutator)
           .build();
 
     } catch (ClassNotFoundException exn) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactory.java
index 5d5638416aa6a..d0b2b9b24b3c7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactory.java
@@ -24,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 
 /**
  * Creates a TextSink from a CloudObject spec.
@@ -35,7 +36,8 @@ private TextSinkFactory() {}
   public static <T> TextSink<T> create(PipelineOptions options,
                                        CloudObject spec,
                                        Coder<T> coder,
-                                       ExecutionContext executionContext)
+                                       ExecutionContext executionContext,
+                                       CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
     return create(spec, coder);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
index b806e99b40482..1aafa6f82dc57 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
@@ -30,6 +30,7 @@
 import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.protobuf.ByteString;
@@ -83,7 +84,9 @@ public static Collection<? extends BoundedWindow> decodeMetadataWindows(
   public static <T> WindmillSink<T> create(PipelineOptions options,
                                            CloudObject spec,
                                            Coder<WindowedValue<T>> coder,
-                                           ExecutionContext context) throws Exception {
+                                           ExecutionContext context,
+                                           CounterSet.AddCounterMutator addCounterMutator)
+      throws Exception {
     return new WindmillSink<>(getString(spec, "stream_id"), coder,
         (StreamingModeExecutionContext) context);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
index 50309f3bb6ff8..8c47d5655b306 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
@@ -53,7 +53,7 @@ Sink<?> runTestCreateAvroSink(String filename,
     cloudSink.setCodec(encoding);
 
     Sink<?> sink = SinkFactory.create(PipelineOptionsFactory.create(), cloudSink,
-                                      new BatchModeExecutionContext());
+        new BatchModeExecutionContext(), null);
     return sink;
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
index e5a6b1ec5027b..7c4711fe7035a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
@@ -41,6 +41,7 @@
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.Reiterable;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
@@ -102,16 +103,19 @@ private void runTestReadFromShuffle(
             KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(StringUtf8Coder.of())),
             IntervalWindow.getCoder());
 
+    CounterSet.AddCounterMutator addCounterMutator =
+        new CounterSet().getAddCounterMutator();
     // Write to shuffle with GROUP_KEYS ShuffleSink.
     ShuffleSink<KV<Integer, String>> shuffleSink = new ShuffleSink<>(
-        PipelineOptionsFactory.create(), null, ShuffleSink.ShuffleKind.GROUP_KEYS, sinkElemCoder);
+        PipelineOptionsFactory.create(), null, ShuffleSink.ShuffleKind.GROUP_KEYS,
+        sinkElemCoder, addCounterMutator);
 
     TestShuffleWriter shuffleWriter = new TestShuffleWriter();
 
     int kvCount = 0;
     List<Long> actualSizes = new ArrayList<>();
     try (Sink.SinkWriter<WindowedValue<KV<Integer, String>>> shuffleSinkWriter =
-        shuffleSink.writer(shuffleWriter)) {
+        shuffleSink.writer(shuffleWriter, "dataset")) {
       for (KV<Integer, List<String>> kvs : input) {
         Integer key = kvs.getKey();
         for (String value : kvs.getValue()) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
index 4219658a6ff2a..739467dc20152 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
@@ -70,14 +71,17 @@ private void runTestReadFromShuffle(List<WindowedValue<KV<Integer, String>>> exp
         KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()), IntervalWindow.getCoder());
 
     // Write to shuffle with PARTITION_KEYS ShuffleSink.
+     CounterSet.AddCounterMutator addCounterMutator =
+         new CounterSet().getAddCounterMutator();
     ShuffleSink<KV<Integer, String>> shuffleSink = new ShuffleSink<>(
-        PipelineOptionsFactory.create(), null, ShuffleSink.ShuffleKind.PARTITION_KEYS, elemCoder);
+        PipelineOptionsFactory.create(), null, ShuffleSink.ShuffleKind.PARTITION_KEYS,
+        elemCoder, addCounterMutator);
 
     TestShuffleWriter shuffleWriter = new TestShuffleWriter();
 
     List<Long> actualSizes = new ArrayList<>();
     try (Sink.SinkWriter<WindowedValue<KV<Integer, String>>> shuffleSinkWriter =
-        shuffleSink.writer(shuffleWriter)) {
+        shuffleSink.writer(shuffleWriter, "dataset")) {
       for (WindowedValue<KV<Integer, String>> value : expected) {
         actualSizes.add(shuffleSinkWriter.add(value));
       }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java
index 2c04c0c9f61e0..98c2ce59c2155 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java
@@ -30,6 +30,7 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 
 import org.hamcrest.core.IsInstanceOf;
@@ -58,9 +59,12 @@ ShuffleSink runTestCreateShuffleSinkHelper(byte[] shuffleWriterConfig,
     cloudSink.setSpec(spec);
     cloudSink.setCodec(encoding);
 
+    CounterSet.AddCounterMutator addCounterMutator =
+        new CounterSet().getAddCounterMutator();
     Sink<?> sink = SinkFactory.create(PipelineOptionsFactory.create(),
                                       cloudSink,
-                                      new BatchModeExecutionContext());
+                                      new BatchModeExecutionContext(),
+                                      addCounterMutator);
     Assert.assertThat(sink, new IsInstanceOf(ShuffleSink.class));
     ShuffleSink shuffleSink = (ShuffleSink) sink;
     Assert.assertArrayEquals(shuffleWriterConfig,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
index d950396cbd277..0b871d9cef81d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
@@ -28,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink.SinkWriter;
@@ -87,15 +88,18 @@ private void runTestWriteUngroupingShuffleSink(List<Integer> expected)
       throws Exception {
     Coder<WindowedValue<Integer>> windowedValueCoder =
         WindowedValue.getFullCoder(BigEndianIntegerCoder.of(), new GlobalWindows().windowCoder());
+    CounterSet.AddCounterMutator addCounterMutator =
+        new CounterSet().getAddCounterMutator();
     ShuffleSink<Integer> shuffleSink = new ShuffleSink<>(
         PipelineOptionsFactory.create(),
         null, ShuffleSink.ShuffleKind.UNGROUPED,
-        windowedValueCoder);
+        windowedValueCoder,
+        addCounterMutator);
 
     TestShuffleWriter shuffleWriter = new TestShuffleWriter();
     List<Long> actualSizes = new ArrayList<>();
     try (Sink.SinkWriter<WindowedValue<Integer>> shuffleSinkWriter =
-             shuffleSink.writer(shuffleWriter)) {
+             shuffleSink.writer(shuffleWriter, "dataset")) {
       for (Integer value : expected) {
         actualSizes.add(shuffleSinkWriter.add(WindowedValue.valueInGlobalWindow(value)));
       }
@@ -120,17 +124,20 @@ private void runTestWriteUngroupingShuffleSink(List<Integer> expected)
   void runTestWriteGroupingShuffleSink(
       List<KV<Integer, String>> expected)
       throws Exception {
+    CounterSet.AddCounterMutator addCounterMutator =
+        new CounterSet().getAddCounterMutator();
     ShuffleSink<KV<Integer, String>> shuffleSink = new ShuffleSink<>(
         PipelineOptionsFactory.create(),
         null, ShuffleSink.ShuffleKind.GROUP_KEYS,
         WindowedValue.getFullCoder(
             KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()),
-            IntervalWindow.getCoder()));
+            IntervalWindow.getCoder()),
+        addCounterMutator);
 
     TestShuffleWriter shuffleWriter = new TestShuffleWriter();
     List<Long> actualSizes = new ArrayList<>();
     try (SinkWriter<WindowedValue<KV<Integer, String>>> shuffleSinkWriter =
-             shuffleSink.writer(shuffleWriter)) {
+             shuffleSink.writer(shuffleWriter, "dataset")) {
       for (KV<Integer, String> kv : expected) {
         actualSizes.add(shuffleSinkWriter.add(
             WindowedValue.of(KV.of(kv.getKey(), kv.getValue()),
@@ -164,6 +171,8 @@ void runTestWriteGroupingShuffleSink(
   void runTestWriteGroupingSortingShuffleSink(
       List<KV<Integer, KV<String, Integer>>> expected)
       throws Exception {
+    CounterSet.AddCounterMutator addCounterMutator =
+        new CounterSet().getAddCounterMutator();
     ShuffleSink<KV<Integer, KV<String, Integer>>> shuffleSink =
         new ShuffleSink<>(
             PipelineOptionsFactory.create(),
@@ -173,12 +182,13 @@ void runTestWriteGroupingSortingShuffleSink(
                 KvCoder.of(BigEndianIntegerCoder.of(),
                            KvCoder.of(StringUtf8Coder.of(),
                                       BigEndianIntegerCoder.of())),
-                new GlobalWindows().windowCoder()));
+                new GlobalWindows().windowCoder()),
+            addCounterMutator);
 
     TestShuffleWriter shuffleWriter = new TestShuffleWriter();
     List<Long> actualSizes = new ArrayList<>();
     try (Sink.SinkWriter<WindowedValue<KV<Integer, KV<String, Integer>>>> shuffleSinkWriter =
-             shuffleSink.writer(shuffleWriter)) {
+             shuffleSink.writer(shuffleWriter, "dataset")) {
       for (KV<Integer, KV<String, Integer>> kv : expected) {
         actualSizes.add(shuffleSinkWriter.add(WindowedValue.valueInGlobalWindow(kv)));
       }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactoryTest.java
index 229bf47985744..fd177c566a816 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactoryTest.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 
 import org.hamcrest.CoreMatchers;
@@ -43,7 +44,8 @@ static class TestSinkFactory {
     public static TestSink create(PipelineOptions options,
                                   CloudObject o,
                                   Coder<Integer> coder,
-                                  ExecutionContext executionContext) {
+                                  ExecutionContext executionContext,
+                                  CounterSet.AddCounterMutator addCounterMutator) {
       return new TestSink();
     }
   }
@@ -77,9 +79,12 @@ public void testCreatePredefinedSink() throws Exception {
     cloudSink.setSpec(spec);
     cloudSink.setCodec(makeCloudEncoding("StringUtf8Coder"));
 
+    CounterSet.AddCounterMutator addCounterMutator =
+        new CounterSet().getAddCounterMutator();
     Sink<?> sink = SinkFactory.create(PipelineOptionsFactory.create(),
                                       cloudSink,
-                                      new BatchModeExecutionContext());
+                                      new BatchModeExecutionContext(),
+                                      addCounterMutator);
     Assert.assertThat(sink, new IsInstanceOf(TextSink.class));
   }
 
@@ -92,9 +97,12 @@ public void testCreateUserDefinedSink() throws Exception {
     cloudSink.setSpec(spec);
     cloudSink.setCodec(makeCloudEncoding("BigEndianIntegerCoder"));
 
+    CounterSet.AddCounterMutator addCounterMutator =
+        new CounterSet().getAddCounterMutator();
     Sink<?> sink = SinkFactory.create(PipelineOptionsFactory.create(),
                                       cloudSink,
-                                      new BatchModeExecutionContext());
+                                      new BatchModeExecutionContext(),
+                                      addCounterMutator);
     Assert.assertThat(sink, new IsInstanceOf(TestSink.class));
   }
 
@@ -106,9 +114,12 @@ public void testCreateUnknownSink() throws Exception {
     cloudSink.setSpec(spec);
     cloudSink.setCodec(makeCloudEncoding("StringUtf8Coder"));
     try {
+      CounterSet.AddCounterMutator addCounterMutator =
+          new CounterSet().getAddCounterMutator();
       SinkFactory.create(PipelineOptionsFactory.create(),
                          cloudSink,
-                         new BatchModeExecutionContext());
+                         new BatchModeExecutionContext(),
+                         addCounterMutator);
       Assert.fail("should have thrown an exception");
     } catch (Exception exn) {
       Assert.assertThat(exn.toString(),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java
index 4ee803d9e9bf9..1d3751ef0c930 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java
@@ -27,6 +27,7 @@
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 
 import org.hamcrest.core.IsInstanceOf;
@@ -66,9 +67,12 @@ void runTestCreateTextSink(String filename,
     cloudSink.setSpec(spec);
     cloudSink.setCodec(encoding);
 
+    CounterSet.AddCounterMutator addCounterMutator =
+        new CounterSet().getAddCounterMutator();
     Sink<?> sink = SinkFactory.create(PipelineOptionsFactory.create(),
                                       cloudSink,
-                                      new BatchModeExecutionContext());
+                                      new BatchModeExecutionContext(),
+                                      addCounterMutator);
     Assert.assertThat(sink, new IsInstanceOf(TextSink.class));
     TextSink<?> textSink = (TextSink<?>) sink;
     Assert.assertEquals(filename, textSink.namePrefix);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
index 574a3a618d4fe..0c2f7a8f0e3be 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
@@ -50,17 +51,20 @@ void runTestReadFromShuffle(List<Integer> expected) throws Exception {
     Coder<WindowedValue<Integer>> elemCoder =
         WindowedValue.getFullCoder(BigEndianIntegerCoder.of(), IntervalWindow.getCoder());
 
+    CounterSet.AddCounterMutator addCounterMutator =
+        new CounterSet().getAddCounterMutator();
     // Write to shuffle with UNGROUPED ShuffleSink.
     ShuffleSink<Integer> shuffleSink = new ShuffleSink<>(
         PipelineOptionsFactory.create(),
         null, ShuffleSink.ShuffleKind.UNGROUPED,
-        elemCoder);
+        elemCoder,
+        addCounterMutator);
 
     TestShuffleWriter shuffleWriter = new TestShuffleWriter();
 
     List<Long> actualSizes = new ArrayList<>();
     try (Sink.SinkWriter<WindowedValue<Integer>> shuffleSinkWriter =
-             shuffleSink.writer(shuffleWriter)) {
+             shuffleSink.writer(shuffleWriter, "dataset")) {
       for (Integer value : expected) {
         actualSizes.add(shuffleSinkWriter.add(
             WindowedValue.of(value, timestamp, Lists.newArrayList(window), null)));

From 0d7f1526964cfee8cf84d4678a1e9d3ffbda6a9a Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 9 Jul 2015 11:52:48 -0700
Subject: [PATCH 0726/1541] Windmill API changes for using system names for
 state.

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97894866
---
 sdk/src/main/proto/windmill.proto | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index b7e15bcf548a6..30739b568cbd6 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -242,6 +242,14 @@ message GetConfigRequest {
 
 message GetConfigResponse {
   repeated string cloud_works = 1;
+
+  message NameMapEntry {
+    optional string user_name = 1;
+    optional string system_name = 2;
+  }
+
+  // Map of user names to system names
+  repeated NameMapEntry name_map = 2;
 }
 
 // Reporting

From 3d0223a99a7997cb7b42ed9409cf0fa2149bc8a8 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 9 Jul 2015 13:15:44 -0700
Subject: [PATCH 0727/1541] Update Windmill API protos.

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97902021
---
 sdk/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 37d1cd7586eb3..0ea25a527791d 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -343,7 +343,7 @@
     <dependency>
       <groupId>com.google.cloud.dataflow</groupId>
       <artifactId>google-cloud-dataflow-java-proto-library-all</artifactId>
-      <version>0.4.150612</version>
+      <version>0.4.150709</version>
     </dependency>
 
     <dependency>

From 1787c97807ee46dd1cf4fb103b93e123270d2eab Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 9 Jul 2015 13:49:28 -0700
Subject: [PATCH 0728/1541] Use system names instead of user names for storing
 state

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97905218
---
 .../runners/worker/StreamingDataflowWorker.java  | 13 +++++++++++--
 .../sdk/util/StreamingModeExecutionContext.java  | 16 ++++++++++------
 .../BasicSerializableSourceFormatTest.java       |  5 ++---
 .../worker/StreamingDataflowWorkerTest.java      |  4 ++--
 .../util/StreamingModeExecutionContextTest.java  |  2 +-
 5 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index a4bb535faa0ec..8a6d1786516b4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -172,6 +172,9 @@ public static void main(String[] args) throws Exception {
   // Map of tokens to commit callbacks.
   private ConcurrentMap<Long, Runnable> commitCallbacks;
 
+  // Map of user state names to system state names.
+  private ConcurrentMap<String, String> stateNameMap;
+
   private ThreadFactory threadFactory;
   private BoundedQueueExecutor workUnitExecutor;
   private ExecutorService commitExecutor;
@@ -192,6 +195,7 @@ public StreamingDataflowWorker(
     this.mapTaskExecutors = new ConcurrentHashMap<>();
     this.readerCache = new ConcurrentHashMap<>();
     this.commitCallbacks = new ConcurrentHashMap<>();
+    this.stateNameMap = new ConcurrentHashMap<>();
     for (MapTask mapTask : mapTasks) {
       addComputation(mapTask);
     }
@@ -388,7 +392,7 @@ private void process(
       if (workerAndContext == null) {
         context =
             new StreamingModeExecutionContext(
-                computation, stateFetcher, readerCache.get(computation));
+                computation, stateFetcher, readerCache.get(computation), stateNameMap);
         worker = MapTaskExecutorFactory.create(options, mapTask, context);
         ReadOperation readOperation = worker.getReadOperation();
         // Disable progress updates since its results are unused for streaming
@@ -537,7 +541,9 @@ private void commitWork(Windmill.CommitWorkRequest request) {
   private void getConfig(String computation) {
     Windmill.GetConfigRequest request =
         Windmill.GetConfigRequest.newBuilder().addComputations(computation).build();
-    for (String serializedMapTask : windmillServer.getConfig(request).getCloudWorksList()) {
+
+    Windmill.GetConfigResponse response = windmillServer.getConfig(request);
+    for (String serializedMapTask : response.getCloudWorksList()) {
       try {
         addComputation(parseMapTask(serializedMapTask));
       } catch (IOException e) {
@@ -545,6 +551,9 @@ private void getConfig(String computation) {
         LOG.warn("Error: ", e);
       }
     }
+    for (Windmill.GetConfigResponse.NameMapEntry entry : response.getNameMapList()) {
+      stateNameMap.put(entry.getUserName(), entry.getSystemName());
+    }
   }
 
   private void buildCounters(CounterSet counterSet,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index 8d4c08dce3ea2..c1fb0a50aae9b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -59,15 +59,18 @@ public class StreamingModeExecutionContext extends DataflowExecutionContext {
   // Per-key cache of active Reader objects in use by this process.
   private ConcurrentMap<ByteString, UnboundedSource.UnboundedReader<?>> readerCache;
   private UnboundedSource.UnboundedReader<?> activeReader;
+  private ConcurrentMap<String, String> stateNameMap;
 
   public StreamingModeExecutionContext(
       String computation,
       StateFetcher stateFetcher,
-      ConcurrentMap<ByteString, UnboundedSource.UnboundedReader<?>> readerCache) {
+      ConcurrentMap<ByteString, UnboundedSource.UnboundedReader<?>> readerCache,
+      ConcurrentMap<String, String> stateNameMap) {
     this.computation = computation;
     this.stateFetcher = stateFetcher;
     this.sideInputCache = new HashMap<>();
     this.readerCache = readerCache;
+    this.stateNameMap = stateNameMap;
   }
 
   public void start(
@@ -373,12 +376,13 @@ class StepContext extends ExecutionContext.StepContext {
     public StepContext(String stepName) {
       super(stepName);
 
-      // Mangle such that there are no partially overlapping prefixes.
-      String mangledPrefix = stepName.length() + ":" + stepName;
+      String prefix =
+          (stateNameMap.containsKey(stepName) ? stateNameMap.get(stepName) : stepName) + ":";
+
       this.tagCache = new KeyedStateCache(
-          mangledPrefix,
-          CacheBuilder.newBuilder().build(new TagLoader(mangledPrefix)),
-          CacheBuilder.newBuilder().build(new TagListLoader(mangledPrefix)));
+          prefix,
+          CacheBuilder.newBuilder().build(new TagLoader(prefix)),
+          CacheBuilder.newBuilder().build(new TagListLoader(prefix)));
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 74c4f39b9d3c0..2e2b29932806b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -625,9 +625,8 @@ public void testUnboundedSplits() throws Exception {
 
   @Test
   public void testReadUnboundedReader() throws Exception {
-    StreamingModeExecutionContext context =
-        new StreamingModeExecutionContext(
-            null, null, new ConcurrentHashMap<ByteString, UnboundedSource.UnboundedReader<?>>());
+    StreamingModeExecutionContext context = new StreamingModeExecutionContext(
+        null, null, new ConcurrentHashMap<ByteString, UnboundedSource.UnboundedReader<?>>(), null);
 
     DataflowPipelineOptions options =
         PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 874f50c2f7cbd..a5a9ff59804aa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -675,8 +675,8 @@ public void testMergeWindows() throws Exception {
     String timer1Tag = "gAAAAAAAAACAAAAAAAAD6P____8P";
     String timer2Tag = "gAAAAAAAAACAAAAAAAAD6AA";
     String timer3Tag = "gAAAAAAAAACAAAAAAAAD6AAAAAA";
-    String bufferTag = "12:MergeWindowsgAAAAAAAAACAAAAAAAAD6A/__buffer";
-    String watermarkHoldTag = "12:MergeWindowsgAAAAAAAAACAAAAAAAAD6A/__watermark_hold";
+    String bufferTag = "MergeWindows:gAAAAAAAAACAAAAAAAAD6A/__buffer";
+    String watermarkHoldTag = "MergeWindows:gAAAAAAAAACAAAAAAAAD6A/__watermark_hold";
     String watermarkHoldData = "\000\\200\\000\\000\\000\\000\\000\\000\\000";
     String bufferData = "\000data0";
     String outputData = "\\377\\377\\377\\377\\001\\005data0\\000";
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
index 49e10831c5052..b6f55d1571938 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
@@ -61,7 +61,7 @@ private static TupleTag<Iterable<WindowedValue<String>>> newStringTag() {
   @Test
   public void testSideInputReaderReconstituted() {
     StreamingModeExecutionContext executionContext =
-        new StreamingModeExecutionContext("computation", stateFetcher, null);
+        new StreamingModeExecutionContext("computation", stateFetcher, null, null);
 
     PCollectionView<String> preview1 = PCollectionViewTesting.<String, String>testingView(
         newStringTag(), new ConstantViewFn<String, String>("view1"), StringUtf8Coder.of());

From 05514cc9e219c1e9cb5f7fabcc005fd953f591cc Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Thu, 9 Jul 2015 15:31:42 -0700
Subject: [PATCH 0729/1541] Adds SDK support for creating BigQuery sources
 using queries

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97915613
---
 .../cloud/dataflow/sdk/io/BigQueryIO.java     | 170 ++++++-
 .../dataflow/BigQueryIOTranslator.java        |  82 +---
 .../sdk/runners/worker/BigQueryReader.java    |  46 +-
 .../runners/worker/BigQueryReaderFactory.java |  24 +-
 .../sdk/util/BigQueryTableRowIterator.java    | 205 +++++++--
 .../dataflow/sdk/util/PropertyNames.java      |   1 +
 .../cloud/dataflow/sdk/io/BigQueryIOTest.java |  84 +++-
 .../worker/BigQueryReaderFactoryTest.java     |  28 +-
 .../runners/worker/BigQueryReaderTest.java    | 433 +++++++++++++++++-
 9 files changed, 904 insertions(+), 169 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index d1658d236f3b0..19556155ec2e6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -19,6 +19,7 @@
 import com.google.api.client.json.JsonFactory;
 import com.google.api.client.util.Preconditions;
 import com.google.api.services.bigquery.Bigquery;
+import com.google.api.services.bigquery.model.QueryRequest;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
@@ -48,7 +49,9 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.util.ApiErrorExtractor;
 import com.google.cloud.dataflow.sdk.util.BigQueryTableInserter;
+import com.google.cloud.dataflow.sdk.util.BigQueryTableRowIterator;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.Transport;
@@ -115,6 +118,23 @@
  *         .named("Read")
  *         .from("clouddataflow-readonly:samples.weather_stations");
  * }</pre>
+ *
+ * <p> Users may provide a query to read from rather than reading all of a BigQuery table. If
+ * specified, the result obtained by executing the specified query will be used as the data of the
+ * input transform.
+ *
+ * <pre>{@code
+ * PCollection<TableRow> shakespeare = pipeline.apply(
+ *     BigQueryIO.Read
+ *         .named("Read")
+ *         .fromQuery("SELECT year, mean_temp FROM samples.weather_stations");
+ * }</pre>
+ *
+ * <p> When creating a BigQuery input transform, users should provide either a query or a table.
+ * Pipeline will fail with a validation error in following cases.
+ * (1) Both a query and a table are provided
+ * (2) Neither a query or a table are provided
+ *
  * <p><h3>Writing</h3>
  * To write to a BigQuery table, apply a {@link BigQueryIO.Write} transformation.
  * This consumes a {@code PCollection<TableRow>} as input.
@@ -270,6 +290,13 @@ public static Bound from(String tableSpec) {
       return new Bound().from(tableSpec);
     }
 
+    /**
+     * Reads results received after executing the given query.
+     */
+    public static Bound fromQuery(String query) {
+      return new Bound().fromQuery(query);
+    }
+
     /**
      * Reads a BigQuery table specified as a TableReference object.
      */
@@ -292,15 +319,33 @@ public static class Bound extends PTransform<PInput, PCollection<TableRow>> {
       private static final long serialVersionUID = 0;
 
       TableReference table;
+      final String query;
       final boolean validate;
 
+      private static final String RESOURCE_NOT_FOUND_ERROR =
+          "BigQuery %1$s not found for table \"%2$s\" . Please create the %1$s before pipeline"
+          + " execution. If the %1$s is  created by an earlier stage of the pipeline, this"
+          + " validation can be disabled using #withoutValidation.";
+
+      private static final String UNABLE_TO_CONFIRM_PRESENCE_OF_RESOURCE_ERROR =
+          "Unable to confirm BigQuery %1$s presence for table \"%2$s\". If the %1$s is created by"
+          + " an earlier stage of the pipeline, this validation can be disabled using"
+          + " #withoutValidation.";
+
+      private static final String QUERY_VALIDATION_FAILURE_ERROR =
+          "Validation of query \"%1$s\" failed. If the query depends on an earlier stage of the"
+          + "pipeline, This validation can be disabled using #withoutValidation.";
+
       Bound() {
+        query = null;
+        table = null;
         this.validate = true;
       }
 
-      Bound(String name, TableReference reference, boolean validate) {
+      Bound(String name, String query, TableReference reference, boolean validate) {
         super(name);
         this.table = reference;
+        this.query = query;
         this.validate = validate;
       }
 
@@ -308,7 +353,7 @@ public static class Bound extends PTransform<PInput, PCollection<TableRow>> {
        * Sets the name associated with this transformation.
        */
       public Bound named(String name) {
-        return new Bound(name, table, validate);
+        return new Bound(name, query, table, validate);
       }
 
       /**
@@ -320,26 +365,115 @@ public Bound from(String tableSpec) {
         return from(parseTableSpec(tableSpec));
       }
 
+      /**
+       * Sets the BigQuery query to be used.
+       */
+      public Bound fromQuery(String query) {
+        return new Bound(name, query, table, validate);
+      }
+
       /**
        * Sets the table specification.
        */
       public Bound from(TableReference table) {
-        return new Bound(name, table, validate);
+        return new Bound(name, query, table, validate);
       }
 
       /**
        * Disable table validation.
        */
       public Bound withoutValidation() {
-        return new Bound(name, table, false);
+        return new Bound(name, query, table, false);
       }
 
+      /**
+       * Validates the current {@link PTransform}.
+       */
       @Override
-      public PCollection<TableRow> apply(PInput input) {
-        if (table == null) {
+      public void validate(PInput input) {
+        if (table == null && query == null) {
           throw new IllegalStateException(
-              "must set the table reference of a BigQueryIO.Read transform");
+              "Invalid BigQuery read operation, either table reference or query has to be set");
+        } else if (table != null && query != null) {
+          throw new IllegalStateException("Invalid BigQuery read operation. Specifies both a"
+              + " query and a table, only one of these should be provided");
         }
+
+        if (validate) {
+          // Check for source table/query presence for early failure notification.
+          // Note that a presence check can fail if the table or dataset are created by earlier
+          // stages of the pipeline or if a query depends on earlier stages of a pipeline. For these
+          // cases the withoutValidation method can be used to disable the check.
+          BigQueryOptions bqOptions = input.getPipeline().getOptions().as(BigQueryOptions.class);
+          if (table != null) {
+            verifyDatasetPresence(bqOptions, table);
+            verifyTablePresence(bqOptions, table);
+          }
+          if (query != null) {
+            dryRunQuery(bqOptions, query);
+          }
+        }
+      }
+
+      private static void dryRunQuery(BigQueryOptions options, String query) {
+        Bigquery client = Transport.newBigQueryClient(options).build();
+        QueryRequest request = new QueryRequest();
+        request.setQuery(query);
+        request.setDryRun(true);
+
+        try {
+          BigQueryTableRowIterator.executeWithBackOff(
+              client.jobs().query(options.getProject(), request), QUERY_VALIDATION_FAILURE_ERROR,
+              query);
+        } catch (Exception e) {
+          throw new IllegalArgumentException(
+              String.format(QUERY_VALIDATION_FAILURE_ERROR, query), e);
+        }
+      }
+
+      public static void verifyDatasetPresence(BigQueryOptions options, TableReference table) {
+        try {
+          Bigquery client = Transport.newBigQueryClient(options).build();
+          BigQueryTableRowIterator.executeWithBackOff(
+              client.datasets().get(table.getProjectId(), table.getDatasetId()),
+              RESOURCE_NOT_FOUND_ERROR, "dataset", BigQueryIO.toTableSpec(table));
+        } catch (Exception e) {
+          ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
+          if ((e instanceof IOException) && errorExtractor.itemNotFound((IOException) e)) {
+            throw new IllegalArgumentException(
+                String.format(RESOURCE_NOT_FOUND_ERROR, "dataset", BigQueryIO.toTableSpec(table)),
+                e);
+          } else {
+            throw new RuntimeException(
+                String.format(UNABLE_TO_CONFIRM_PRESENCE_OF_RESOURCE_ERROR, "dataset",
+                    BigQueryIO.toTableSpec(table)),
+                e);
+          }
+        }
+      }
+
+      public static void verifyTablePresence(BigQueryOptions options, TableReference table) {
+        try {
+          Bigquery client = Transport.newBigQueryClient(options).build();
+          BigQueryTableRowIterator.executeWithBackOff(
+              client.tables().get(table.getProjectId(), table.getDatasetId(), table.getTableId()),
+              RESOURCE_NOT_FOUND_ERROR, "table", BigQueryIO.toTableSpec(table));
+        } catch (Exception e) {
+          ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
+          if ((e instanceof IOException) && errorExtractor.itemNotFound((IOException) e)) {
+            throw new IllegalArgumentException(
+                String.format(RESOURCE_NOT_FOUND_ERROR, "table", BigQueryIO.toTableSpec(table)), e);
+          } else {
+            throw new RuntimeException(
+                String.format(UNABLE_TO_CONFIRM_PRESENCE_OF_RESOURCE_ERROR, "table",
+                    BigQueryIO.toTableSpec(table)),
+                e);
+          }
+        }
+      }
+
+      @Override
+      public PCollection<TableRow> apply(PInput input) {
         return PCollection.<TableRow>createPrimitiveOutputInternal(
             input.getPipeline(),
             WindowingStrategy.globalDefault(),
@@ -373,6 +507,10 @@ public TableReference getTable() {
         return table;
       }
 
+      public String getQuery() {
+        return query;
+      }
+
       /**
        * Returns true if table validation is enabled.
        */
@@ -1085,14 +1223,20 @@ private static void evaluateReadHelper(
       Read.Bound transform, DirectPipelineRunner.EvaluationContext context) {
     BigQueryOptions options = context.getPipelineOptions();
     Bigquery client = Transport.newBigQueryClient(options).build();
-    TableReference ref = transform.table;
-    if (ref.getProjectId() == null) {
-      ref.setProjectId(options.getProject());
+    if (transform.table != null && transform.table.getProjectId() == null) {
+      transform.table.setProjectId(options.getProject());
+    }
+
+    BigQueryReader reader = null;
+    if (transform.query != null) {
+      LOG.info("Reading from BigQuery query {}", transform.query);
+      reader = new BigQueryReader(client, transform.query, options.getProject());
+    } else {
+      reader = new BigQueryReader(client, transform.table);
+      LOG.info("Reading from BigQuery table {}", toTableSpec(transform.table));
     }
 
-    LOG.info("Reading from BigQuery table {}", toTableSpec(ref));
-    List<WindowedValue<TableRow>> elems =
-        ReaderUtils.readElemsFromReader(new BigQueryReader(client, ref));
+    List<WindowedValue<TableRow>> elems = ReaderUtils.readElemsFromReader(reader);
     LOG.info("Number of records read from BigQuery: {}", elems.size());
     context.setPCollectionWindowedValue(context.getOutput(transform), elems);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
index ddb90c84b6f04..ec2e2d3a198f1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
@@ -16,6 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.runners.dataflow;
 
+import static com.google.cloud.dataflow.sdk.io.BigQueryIO.Read.Bound.verifyDatasetPresence;
+import static com.google.cloud.dataflow.sdk.io.BigQueryIO.Read.Bound.verifyTablePresence;
+
 import com.google.api.client.json.JsonFactory;
 import com.google.api.services.bigquery.Bigquery;
 import com.google.api.services.bigquery.model.TableReference;
@@ -43,30 +46,26 @@ public class BigQueryIOTranslator {
   public static class ReadTranslator
       implements DataflowPipelineTranslator.TransformTranslator<BigQueryIO.Read.Bound> {
 
-    @Override
-    public void translate(BigQueryIO.Read.Bound transform,
-                          DataflowPipelineTranslator.TranslationContext context) {
-      TableReference table = transform.getTable();
-      if (table.getProjectId() == null) {
-        table.setProjectId(context.getPipelineOptions().getProject());
-      }
-
-      // Check for source table presence for early failure notification.
-      // Note that a presence check can fail if the table or dataset are created by earlier stages
-      // of the pipeline. For these cases the withoutValidation method can be used to disable
-      // the check.
-      if (transform.getValidate()) {
-        verifyDatasetPresence(context.getPipelineOptions(), table);
-        verifyTablePresence(context.getPipelineOptions(), table);
-      }
-
+@Override
+    public void translate(
+        BigQueryIO.Read.Bound transform, DataflowPipelineTranslator.TranslationContext context) {
       // Actual translation.
       context.addStep(transform, "ParallelRead");
       context.addInput(PropertyNames.FORMAT, "bigquery");
-      context.addInput(PropertyNames.BIGQUERY_TABLE, table.getTableId());
-      context.addInput(PropertyNames.BIGQUERY_DATASET, table.getDatasetId());
-      if (table.getProjectId() != null) {
-        context.addInput(PropertyNames.BIGQUERY_PROJECT, table.getProjectId());
+
+      if (transform.getQuery() != null) {
+        context.addInput(PropertyNames.BIGQUERY_QUERY, transform.getQuery());
+      } else {
+        TableReference table = transform.getTable();
+        if (table.getProjectId() == null) {
+          table.setProjectId(context.getPipelineOptions().getProject());
+        }
+
+        context.addInput(PropertyNames.BIGQUERY_TABLE, table.getTableId());
+        context.addInput(PropertyNames.BIGQUERY_DATASET, table.getDatasetId());
+        if (table.getProjectId() != null) {
+          context.addInput(PropertyNames.BIGQUERY_PROJECT, table.getProjectId());
+        }
       }
       context.addValueOnlyOutput(PropertyNames.OUTPUT, context.getOutput(transform));
     }
@@ -137,47 +136,6 @@ public void translate(BigQueryIO.Write.Bound transform,
     }
   }
 
-  /////////////////////////////////////////////////////////////////////////////
-
-  private static void verifyDatasetPresence(
-      BigQueryOptions options,
-      TableReference table) {
-    try {
-      Bigquery client = Transport.newBigQueryClient(options).build();
-      client.datasets().get(table.getProjectId(), table.getDatasetId())
-            .execute();
-    } catch (IOException e) {
-      ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
-      if (errorExtractor.itemNotFound(e)) {
-        throw new IllegalArgumentException(
-            "BigQuery dataset not found for table \"" + BigQueryIO.toTableSpec(table)
-            + "\". Please create the dataset before pipeline execution.");
-      } else {
-        throw new RuntimeException(
-            "unable to confirm BigQuery dataset presence", e);
-      }
-    }
-  }
-
-  private static void verifyTablePresence(
-      BigQueryOptions options,
-      TableReference table) {
-    try {
-      Bigquery client = Transport.newBigQueryClient(options).build();
-      client.tables().get(table.getProjectId(), table.getDatasetId(), table.getTableId())
-            .execute();
-    } catch (IOException e) {
-      ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
-      if (errorExtractor.itemNotFound(e)) {
-        throw new IllegalArgumentException(
-            "BigQuery table not found: " + BigQueryIO.toTableSpec(table), e);
-      } else {
-        throw new RuntimeException(
-            "unable to confirm BigQuery table presence", e);
-      }
-    }
-  }
-
   private static void verifyTableEmpty(
       BigQueryOptions options,
       TableReference table) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
index ad37d1c6e1840..505e69d323f5e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
@@ -40,6 +40,8 @@ public class BigQueryReader extends Reader<WindowedValue<TableRow>> {
   final TableReference tableRef;
   final BigQueryOptions bigQueryOptions;
   final Bigquery bigQueryClient;
+  final String query;
+  final String projectId;
 
   /** Builds a BigQuery source using pipeline options to instantiate a Bigquery client. */
   public BigQueryReader(BigQueryOptions bigQueryOptions, TableReference tableRef) {
@@ -48,6 +50,16 @@ public BigQueryReader(BigQueryOptions bigQueryOptions, TableReference tableRef)
     this.bigQueryOptions = bigQueryOptions;
     this.tableRef = tableRef;
     this.bigQueryClient = null;
+    this.query = null;
+    this.projectId = null;
+  }
+
+  public BigQueryReader(BigQueryOptions bigQueryOptions, String query, String projectId) {
+    this.bigQueryOptions = bigQueryOptions;
+    this.tableRef = null;
+    this.bigQueryClient = null;
+    this.query = query;
+    this.projectId = projectId;
   }
 
   /** Builds a BigQueryReader directly using a BigQuery client. */
@@ -55,14 +67,31 @@ public BigQueryReader(Bigquery bigQueryClient, TableReference tableRef) {
     this.bigQueryOptions = null;
     this.tableRef = tableRef;
     this.bigQueryClient = bigQueryClient;
+    this.query = null;
+    this.projectId = null;
+  }
+
+  public BigQueryReader(Bigquery bigQueryClient, String query, String projectId) {
+    this.bigQueryOptions = null;
+    this.tableRef = null;
+    this.bigQueryClient = bigQueryClient;
+    this.query = query;
+    this.projectId = projectId;
   }
 
   @Override
   public ReaderIterator<WindowedValue<TableRow>> iterator() throws IOException {
-    return new BigQueryReaderIterator(
-        bigQueryClient != null
-            ? bigQueryClient : Transport.newBigQueryClient(bigQueryOptions).build(),
-        tableRef);
+    if (tableRef != null) {
+      return new BigQueryReaderIterator(
+          bigQueryClient != null
+              ? bigQueryClient : Transport.newBigQueryClient(bigQueryOptions).build(),
+          tableRef);
+    } else {
+      return new BigQueryReaderIterator(
+          bigQueryClient != null
+              ? bigQueryClient : Transport.newBigQueryClient(bigQueryOptions).build(),
+          query, projectId);
+    }
   }
 
   /**
@@ -75,6 +104,10 @@ public BigQueryReaderIterator(Bigquery bigQueryClient, TableReference tableRef)
       rowIterator = new BigQueryTableRowIterator(bigQueryClient, tableRef);
     }
 
+    public BigQueryReaderIterator(Bigquery bigQueryClient, String query, String projectId) {
+      rowIterator = new BigQueryTableRowIterator(bigQueryClient, query, projectId);
+    }
+
     @Override
     protected boolean hasNextImpl() {
       return rowIterator.hasNext();
@@ -99,5 +132,10 @@ public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest)
       // as a side input for instance).
       return null;
     }
+
+    @Override
+    public void close() throws IOException {
+      rowIterator.close();
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
index 59158258efc61..42467cdb3d54a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
@@ -21,6 +21,7 @@
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
+import com.google.cloud.dataflow.sdk.options.GcpOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
@@ -35,11 +36,22 @@ private BigQueryReaderFactory() {}
 
   public static BigQueryReader create(PipelineOptions options, CloudObject spec, Coder<?> coder,
       ExecutionContext executionContext) throws Exception {
-    return new BigQueryReader(
-        options.as(BigQueryOptions.class),
-        new TableReference()
-            .setProjectId(getString(spec, PropertyNames.BIGQUERY_PROJECT))
-            .setDatasetId(getString(spec, PropertyNames.BIGQUERY_DATASET))
-            .setTableId(getString(spec, PropertyNames.BIGQUERY_TABLE)));
+    String query = getString(spec, PropertyNames.BIGQUERY_QUERY, null);
+    if (query != null) {
+      GcpOptions gcpOptions = options.as(GcpOptions.class);
+      return new BigQueryReader(options.as(BigQueryOptions.class), query, gcpOptions.getProject());
+    }
+
+    String tableId = getString(spec, PropertyNames.BIGQUERY_TABLE, null);
+    if (tableId != null) {
+      return new BigQueryReader(
+          options.as(BigQueryOptions.class),
+          new TableReference()
+              .setProjectId(getString(spec, PropertyNames.BIGQUERY_PROJECT))
+              .setDatasetId(getString(spec, PropertyNames.BIGQUERY_DATASET))
+              .setTableId(tableId));
+    }
+
+    throw new IllegalArgumentException("Either a table or a query has to be specified");
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
index c29c4d2e2cd7b..a5c16de808c3b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
@@ -16,12 +16,20 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
 import com.google.api.client.util.BackOff;
 import com.google.api.client.util.BackOffUtils;
 import com.google.api.client.util.Data;
 import com.google.api.client.util.Preconditions;
 import com.google.api.client.util.Sleeper;
 import com.google.api.services.bigquery.Bigquery;
+import com.google.api.services.bigquery.Bigquery.Jobs.Insert;
+import com.google.api.services.bigquery.model.Dataset;
+import com.google.api.services.bigquery.model.DatasetReference;
+import com.google.api.services.bigquery.model.Job;
+import com.google.api.services.bigquery.model.JobConfiguration;
+import com.google.api.services.bigquery.model.JobConfigurationQuery;
+import com.google.api.services.bigquery.model.JobReference;
 import com.google.api.services.bigquery.model.Table;
 import com.google.api.services.bigquery.model.TableDataList;
 import com.google.api.services.bigquery.model.TableFieldSchema;
@@ -29,6 +37,7 @@
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
 
+import org.joda.time.Duration;
 import org.joda.time.format.DateTimeFormat;
 import org.joda.time.format.DateTimeFormatter;
 import org.slf4j.Logger;
@@ -43,6 +52,7 @@
 import java.util.Map;
 import java.util.NoSuchElementException;
 import java.util.Objects;
+import java.util.Random;
 
 /**
  * Iterates over all rows in a table.
@@ -51,7 +61,8 @@ public class BigQueryTableRowIterator implements Iterator<TableRow>, Closeable {
   private static final Logger LOG = LoggerFactory.getLogger(BigQueryTableRowIterator.class);
 
   private final Bigquery client;
-  private final TableReference ref;
+  private TableReference ref;
+  private final String projectId;
   private TableSchema schema;
   private String pageToken;
   private Iterator<TableRow> rowIterator;
@@ -61,21 +72,37 @@ public class BigQueryTableRowIterator implements Iterator<TableRow>, Closeable {
   // The maximum number of times a BigQuery request will be retried
   private static final int MAX_RETRIES = 3;
   // Initial wait time for the backoff implementation
-  private static final int INITIAL_BACKOFF_MILLIS = 1000;
+  private static final Duration INITIAL_BACKOFF_TIME = Duration.standardSeconds(1);
+
+  // After sending a query to BQ service we will be polling the BQ service to check the status with
+  // following interval to check the status of query execution job
+  private static final Duration QUERY_COMPLETION_POLL_TIME = Duration.standardSeconds(1);
+
+  private final String query;
+  // Temporary dataset used to store query results.
+  private String temporaryDatasetId = null;
+  // Temporary table used to store query results.
+  private String temporaryTableId = null;
 
   public BigQueryTableRowIterator(Bigquery client, TableReference ref) {
     this.client = client;
     this.ref = ref;
+    this.query = null;
+    this.projectId = ref.getProjectId();
+  }
+
+  public BigQueryTableRowIterator(Bigquery client, String query, String projectId) {
+    this.client = client;
+    this.ref = null;
+    this.query = query;
+    this.projectId = projectId;
+
   }
 
   @Override
   public boolean hasNext() {
     try {
-      if (!isOpen()) {
-        open();
-      }
-
-      if (!rowIterator.hasNext() && !lastPage) {
+      if (rowIterator == null || (!rowIterator.hasNext() && !lastPage)) {
         readNext();
       }
     } catch (IOException | InterruptedException e) {
@@ -183,38 +210,134 @@ public void remove() {
     throw new UnsupportedOperationException();
   }
 
-  private void readNext() throws IOException, InterruptedException {
-    Bigquery.Tabledata.List list = client.tabledata()
-        .list(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
-    if (pageToken != null) {
-      list.setPageToken(pageToken);
+  // Create a new BigQuery dataset
+  private void createDataset(String datasetId) throws IOException, InterruptedException {
+    Dataset dataset = new Dataset();
+    DatasetReference reference = new DatasetReference();
+    reference.setProjectId(projectId);
+    reference.setDatasetId(datasetId);
+    dataset.setDatasetReference(reference);
+
+    String createDatasetError =
+        "Error when trying to create the temporary dataset " + datasetId + " in project "
+        + projectId;
+    executeWithBackOff(
+        client.datasets().insert(projectId, dataset), createDatasetError + " :{}");
+  }
+
+  // Delete the given table that is available in the given dataset.
+  private void deleteTable(String datasetId, String tableId)
+      throws IOException, InterruptedException {
+    executeWithBackOff(
+        client.tables().delete(projectId, datasetId, tableId),
+        "Error when trying to delete the temporary table " + datasetId + " in dataset " + datasetId
+        + " of project " + projectId + ". Manual deletion may be required. Error message : {}");
+  }
+
+  // Delete the given dataset. This will fail if the given dataset has any tables.
+  private void deleteDataset(String datasetId) throws IOException, InterruptedException {
+    executeWithBackOff(
+        client.datasets().delete(projectId, datasetId),
+        "Error when trying to delete the temporary dataset " + datasetId + " in project "
+        + projectId + ". Manual deletion may be required. Error message : {}");
+  }
+
+  private TableReference executeQueryAndWaitForCompletion()
+      throws IOException, InterruptedException {
+    // Create a temporary dataset to store results.
+    // Starting dataset name with an "_" so that it is hidden.
+    Random rnd = new Random(System.currentTimeMillis());
+    temporaryDatasetId = "_dataflow_temporary_dataset_" + rnd.nextInt(1000000);
+    temporaryTableId = "dataflow_temporary_table_" + rnd.nextInt(1000000);
+
+    createDataset(temporaryDatasetId);
+    Job job = new Job();
+    JobConfiguration config = new JobConfiguration();
+    JobConfigurationQuery queryConfig = new JobConfigurationQuery();
+    config.setQuery(queryConfig);
+    job.setConfiguration(config);
+    queryConfig.setQuery(query);
+    queryConfig.setAllowLargeResults(true);
+
+    TableReference destinationTable = new TableReference();
+    destinationTable.setProjectId(projectId);
+    destinationTable.setDatasetId(temporaryDatasetId);
+    destinationTable.setTableId(temporaryTableId);
+    queryConfig.setDestinationTable(destinationTable);
+
+    Insert insert = client.jobs().insert(projectId, job);
+    Job queryJob = executeWithBackOff(
+        insert, "Error when trying to execute the job for query " + query + " :{}");
+    JobReference jobId = queryJob.getJobReference();
+
+    while (true) {
+      Job pollJob = executeWithBackOff(
+          client.jobs().get(projectId, jobId.getJobId()),
+          "Error when trying to get status of the job for query " + query + " :{}");
+      if (pollJob.getStatus().getState().equals("DONE")) {
+        return pollJob.getConfiguration().getQuery().getDestinationTable();
+      }
+      try {
+        Thread.sleep(QUERY_COMPLETION_POLL_TIME.getMillis());
+      } catch (InterruptedException e) {
+        e.printStackTrace();
+      }
     }
+  }
 
+  // Execute a BQ request with exponential backoff and return the result.
+  // client - BQ request to be executed
+  // error - Formatted message to log if when a request fails. Takes exception message as a
+  // formatter parameter.
+  public static <T> T executeWithBackOff(AbstractGoogleClientRequest<T> client, String error,
+      Object... errorArgs) throws IOException, InterruptedException {
     Sleeper sleeper = Sleeper.DEFAULT;
-    BackOff backOff = new AttemptBoundedExponentialBackOff(MAX_RETRIES, INITIAL_BACKOFF_MILLIS);
+    BackOff backOff =
+        new AttemptBoundedExponentialBackOff(MAX_RETRIES, INITIAL_BACKOFF_TIME.getMillis());
 
-    TableDataList result = null;
+    T result = null;
     while (true) {
       try {
-        result = list.execute();
+        result = client.execute();
         break;
       } catch (IOException e) {
-        LOG.error("Error reading from BigQuery table {} of dataset {} : {}", ref.getTableId(),
-            ref.getDatasetId(), e.getMessage());
+        LOG.error(String.format(error, errorArgs), e.getMessage());
         if (!BackOffUtils.next(sleeper, backOff)) {
-          LOG.error("Aborting after {} retries.", MAX_RETRIES);
+          LOG.error(
+              String.format(error, errorArgs), "Failing after retrying " + MAX_RETRIES + " times.");
           throw e;
         }
       }
     }
 
+    return result;
+  }
+
+  private void readNext() throws IOException, InterruptedException {
+    if (query != null && ref == null) {
+      ref = executeQueryAndWaitForCompletion();
+    }
+    if (!isOpen()) {
+      open();
+    }
+
+    Bigquery.Tabledata.List list =
+        client.tabledata().list(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
+    if (pageToken != null) {
+      list.setPageToken(pageToken);
+    }
+
+    TableDataList result =
+        executeWithBackOff(list, "Error reading from BigQuery table %s of dataset %s : {}",
+            ref.getTableId(), ref.getDatasetId());
+
     pageToken = result.getPageToken();
-    rowIterator = result.getRows() != null ? result.getRows().iterator() :
-                  Collections.<TableRow>emptyIterator();
+    rowIterator =
+        result.getRows() != null
+            ? result.getRows().iterator() : Collections.<TableRow>emptyIterator();
 
     // The server may return a page token indefinitely on a zero-length table.
-    if (pageToken == null ||
-        result.getTotalRows() != null && result.getTotalRows() == 0) {
+    if (pageToken == null || result.getTotalRows() != null && result.getTotalRows() == 0) {
       lastPage = true;
     }
   }
@@ -223,6 +346,17 @@ private void readNext() throws IOException, InterruptedException {
   public void close() throws IOException {
     // Prevent any further requests.
     lastPage = true;
+
+    try {
+      // Deleting temporary table and dataset that gets generated when executing a query.
+      if (temporaryDatasetId != null) {
+        deleteTable(temporaryDatasetId, temporaryTableId);
+        deleteDataset(temporaryDatasetId);
+      }
+    } catch (InterruptedException e) {
+      throw new IOException(e);
+    }
+
   }
 
   private boolean isOpen() {
@@ -235,30 +369,11 @@ private boolean isOpen() {
    */
   private void open() throws IOException, InterruptedException {
     // Get table schema.
-    Bigquery.Tables.Get get = client.tables()
-        .get(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
-
-    Sleeper sleeper = Sleeper.DEFAULT;
-    BackOff backOff = new AttemptBoundedExponentialBackOff(MAX_RETRIES, INITIAL_BACKOFF_MILLIS);
-    Table table = null;
-
-    while (true) {
-      try {
-        table = get.execute();
-        break;
-      } catch (IOException e) {
-        LOG.error("Error opening BigQuery table {} of dataset {} : {}", ref.getTableId(),
-            ref.getDatasetId(), e.getMessage());
-        if (!BackOffUtils.next(sleeper, backOff)) {
-          LOG.error("Aborting after {} retries.", MAX_RETRIES);
-          throw e;
-        }
-      }
-    }
+    Bigquery.Tables.Get get =
+        client.tables().get(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
 
+    Table table = executeWithBackOff(get, "Error opening BigQuery table  %s of dataset %s  : {}",
+        ref.getTableId(), ref.getDatasetId());
     schema = table.getSchema();
-
-    // Read the first page of results.
-    readNext();
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
index beb32c8edc5a4..59b24009937fa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
@@ -26,6 +26,7 @@ public class PropertyNames {
   public static final String BIGQUERY_PROJECT = "project";
   public static final String BIGQUERY_SCHEMA = "schema";
   public static final String BIGQUERY_TABLE = "table";
+  public static final String BIGQUERY_QUERY = "bigquery_query";
   public static final String BIGQUERY_WRITE_DISPOSITION = "write_disposition";
   public static final String CO_GBK_RESULT_SCHEMA = "co_gbk_result_schema";
   public static final String COMBINE_FN = "combine_fn";
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
index 8874cebe1ada8..4af7492dfb367 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.io;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 
 import com.google.api.client.util.Data;
 import com.google.api.services.bigquery.model.TableReference;
@@ -29,14 +30,17 @@
 import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.WriteDisposition;
 import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 
+import org.hamcrest.Matchers;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.experimental.categories.Category;
 import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -50,17 +54,30 @@ public class BigQueryIOTest {
   @Rule
   public ExpectedException thrown = ExpectedException.none();
 
-  private void checkReadObject(
+  private void checkReadTableObject(
       BigQueryIO.Read.Bound bound, String project, String dataset, String table) {
-    checkReadObjectWithValidate(bound, project, dataset, table, true);
+    checkReadTableObjectWithValidate(bound, project, dataset, table, true);
   }
 
-  private void checkReadObjectWithValidate(
+  private void checkReadQueryObject(
+      BigQueryIO.Read.Bound bound, String query) {
+    checkReadQueryObjectWithValidate(bound, query, true);
+  }
+
+  private void checkReadTableObjectWithValidate(
       BigQueryIO.Read.Bound bound, String project, String dataset, String table, boolean validate) {
     assertEquals(project, bound.table.getProjectId());
     assertEquals(dataset, bound.table.getDatasetId());
     assertEquals(table, bound.table.getTableId());
-    assertEquals(validate, bound.validate);
+    assertNull(bound.query);
+    assertEquals(validate, bound.getValidate());
+  }
+
+  private void checkReadQueryObjectWithValidate(
+      BigQueryIO.Read.Bound bound, String query, boolean validate) {
+    assertNull(bound.table);
+    assertEquals(query, bound.query);
+    assertEquals(validate, bound.getValidate());
   }
 
   private void checkWriteObject(
@@ -91,26 +108,42 @@ public void setUp() {
   }
 
   @Test
-  public void testBuildSource() {
+  public void testBuildTableBasedSource() {
     BigQueryIO.Read.Bound bound = BigQueryIO.Read.named("ReadMyTable")
         .from("foo.com:project:somedataset.sometable");
-    checkReadObject(bound, "foo.com:project", "somedataset", "sometable");
+    checkReadTableObject(bound, "foo.com:project", "somedataset", "sometable");
   }
 
   @Test
-  public void testBuildSourcewithoutValidation() {
+  public void testBuildQueryBasedSource() {
+    BigQueryIO.Read.Bound bound = BigQueryIO.Read.named("ReadMyQuery")
+        .fromQuery("foo_query");
+    checkReadQueryObject(bound, "foo_query");
+  }
+
+  @Test
+  public void testBuildTableBasedSourceWithoutValidation() {
     // This test just checks that using withoutValidation will not trigger object
     // construction errors.
     BigQueryIO.Read.Bound bound = BigQueryIO.Read.named("ReadMyTable")
         .from("foo.com:project:somedataset.sometable").withoutValidation();
-    checkReadObjectWithValidate(bound, "foo.com:project", "somedataset", "sometable", false);
+    checkReadTableObjectWithValidate(bound, "foo.com:project", "somedataset", "sometable", false);
+  }
+
+  @Test
+  public void testBuildQueryBasedSourceWithoutValidation() {
+    // This test just checks that using withoutValidation will not trigger object
+    // construction errors.
+    BigQueryIO.Read.Bound bound = BigQueryIO.Read.named("ReadMyTable")
+        .fromQuery("some_query").withoutValidation();
+    checkReadQueryObjectWithValidate(bound, "some_query", false);
   }
 
   @Test
-  public void testBuildSourceWithDefaultProject() {
+  public void testBuildTableBasedSourceWithDefaultProject() {
     BigQueryIO.Read.Bound bound = BigQueryIO.Read.named("ReadMyTable")
         .from("somedataset.sometable");
-    checkReadObject(bound, null, "somedataset", "sometable");
+    checkReadTableObject(bound, null, "somedataset", "sometable");
   }
 
   @Test
@@ -121,13 +154,33 @@ public void testBuildSourceWithTableReference() {
         .setTableId("sometable");
     BigQueryIO.Read.Bound bound = BigQueryIO.Read.named("ReadMyTable")
         .from(table);
-    checkReadObject(bound, "foo.com:project", "somedataset", "sometable");
+    checkReadTableObject(bound, "foo.com:project", "somedataset", "sometable");
   }
 
-  @Test(expected = IllegalStateException.class)
-  public void testBuildSourceWithoutTable() {
+  @Test
+  @Category(RunnableOnService.class)
+  public void testBuildSourceWithoutTableOrQuery() {
     Pipeline p = TestPipeline.create();
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage(Matchers.containsString(
+        "Invalid BigQuery read operation, either table reference or query has to be set"));
     p.apply(BigQueryIO.Read.named("ReadMyTable"));
+    p.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testBuildSourceWithTableAndQuery() {
+    Pipeline p = TestPipeline.create();
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage(Matchers.containsString(
+        "Invalid BigQuery read operation. Specifies both a query and a table, only one of these"
+        + " should be provided"));
+    p.apply(
+        BigQueryIO.Read.named("ReadMyTable")
+            .from("foo.com:project:somedataset.sometable")
+            .fromQuery("query"));
+    p.run();
   }
 
   @Test
@@ -172,9 +225,12 @@ public void testBuildSinkWithTableReference() {
         null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY);
   }
 
-  @Test(expected = IllegalStateException.class)
+  @Test
+  @Category(RunnableOnService.class)
   public void testBuildSinkWithoutTable() {
     Pipeline p = TestPipeline.create();
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage(Matchers.containsString("must set the table reference"));
     p.apply(Create.<TableRow>of().withCoder(TableRowJsonCoder.of()))
         .apply(BigQueryIO.Write.named("WriteMyTable"));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
index 7aec39c1cddb9..bf319c330ca41 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
@@ -36,7 +36,7 @@
  */
 @RunWith(JUnit4.class)
 public class BigQueryReaderFactoryTest {
-  void runTestCreateBigQueryReader(
+  void runTestCreateBigQueryReaderFromTable(
       String project, String dataset, String table, CloudObject encoding) throws Exception {
     CloudObject spec = CloudObject.forClassName("BigQuerySource");
     addString(spec, "project", project);
@@ -56,9 +56,29 @@ void runTestCreateBigQueryReader(
     Assert.assertEquals(table, bigQueryReader.tableRef.getTableId());
   }
 
+  void runTestCreateBigQueryReaderFromQuery(String query, CloudObject encoding) throws Exception {
+    CloudObject spec = CloudObject.forClassName("BigQuerySource");
+    addString(spec, "bigquery_query", query);
+
+    Source cloudSource = new Source();
+    cloudSource.setSpec(spec);
+    cloudSource.setCodec(encoding);
+
+    Reader<?> reader = ReaderFactory.create(
+        PipelineOptionsFactory.create(), cloudSource, DirectModeExecutionContext.create());
+    Assert.assertThat(reader, new IsInstanceOf(BigQueryReader.class));
+    BigQueryReader bigQueryReader = (BigQueryReader) reader;
+    Assert.assertEquals(query, bigQueryReader.query);
+  }
+
+  @Test
+  public void testCreateBigQueryReaderFromQuery() throws Exception {
+    runTestCreateBigQueryReaderFromQuery("somequery", makeCloudEncoding("TableRowJsonCoder"));
+  }
+
   @Test
-  public void testCreateBigQueryReader() throws Exception {
-    runTestCreateBigQueryReader(
+  public void testCreateBigQueryReaderFromTable() throws Exception {
+    runTestCreateBigQueryReaderFromTable(
         "someproject", "somedataset", "sometable", makeCloudEncoding("TableRowJsonCoder"));
   }
 
@@ -66,7 +86,7 @@ public void testCreateBigQueryReader() throws Exception {
   public void testCreateBigQueryReaderCoderIgnored() throws Exception {
     // BigQuery sources do not need a coder because the TableRow objects are read directly from
     // the table using the BigQuery API.
-    runTestCreateBigQueryReader(
+    runTestCreateBigQueryReaderFromTable(
         "someproject", "somedataset", "sometable", makeCloudEncoding("BigEndianIntegerCoder"));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
index a01a206335fba..754592386c71a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
@@ -16,6 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import static org.mockito.Matchers.contains;
 import static org.mockito.Matchers.endsWith;
 import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.atLeastOnce;
@@ -37,9 +41,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.common.collect.Lists;
 
-import org.junit.After;
 import org.junit.Assert;
-import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -59,10 +61,12 @@
  */
 @RunWith(JUnit4.class)
 public class BigQueryReaderTest {
-
   private static final String PROJECT_ID = "project";
   private static final String DATASET = "dataset";
   private static final String TABLE = "table";
+  private static final String QUERY_JOB_ID = "query_job_id";
+  private static final String QUERY_JOB_RESPONSE_DATASET = "query_job_response_dataset";
+  private static final String QUERY_JOB_RESPONSE_TEMP_TABLE = "query_job_response_temp_table";
 
   private static final String GET_TABLE_REQUEST_PATH =
       String.format("projects/%s/datasets/%s/tables/%s", PROJECT_ID, DATASET, TABLE);
@@ -85,8 +89,9 @@ public class BigQueryReaderTest {
   //    {"name": "bool", "type": "BOOLEAN"},
   //    {"name": "string", "type": "STRING"}
   //  ]}
-  //]
-  private static final String GET_TABLE_RESPONSE_JSON = "{\n"
+  // ]
+  private static final String GET_TABLE_RESPONSE_JSON =
+      "{\n"
       + " \"schema\": {\n"
       + "  \"fields\": [\n"
       + "   {\n"
@@ -155,16 +160,17 @@ public class BigQueryReaderTest {
 
   // This is a real response (with some unused fields removed) for the table listed above, populated
   // with the following data:
-  //{"name": "Arthur", "integer": 42, "float": 3.14159, "bool": "false",
+  // {"name": "Arthur", "integer": 42, "float": 3.14159, "bool": "false",
   // "record": {"nestedInt": 43, "nestedFloat": "4.14159"},
   // "repeatedInt":[42, 43, 79]},
   //
-  //{"name": "Allison", "integer": 79, "float": 2.71828, "bool": "true",
+  // {"name": "Allison", "integer": 79, "float": 2.71828, "bool": "true",
   // "record": {"nestedInt": 80, "nestedFloat": "3.71828"},
   // "repeatedFloat":[3.14159, 2.71828],
   // "repeatedRecord":[{"bool":"true","string":"hello"},
   //                   {"bool":"false","string":"world"}]}
-  private static final String LIST_TABLEDATA_RESPONSE_JSON = "{\n"
+  private static final String LIST_TABLEDATA_RESPONSE_JSON =
+      "{\n"
       + " \"totalRows\": \"2\",\n"
       + " \"rows\": [\n"
       + "  {\n"
@@ -289,17 +295,403 @@ public class BigQueryReaderTest {
       + " ]\n"
       + "}";
 
+
+  private static final String INSERT_QUERY_JOB_PATH =
+      String.format("https://www.googleapis.com/bigquery/v2/projects/%s/jobs", PROJECT_ID);
+
+  // This is the actual response received for following requests (parameterized identifiers were
+  // updated).
+  //  POST https://www.googleapis.com/bigquery/v2/projects/project/jobs
+  //
+  //  {
+  //   "configuration": {
+  //    "query": {
+  //     "query": "SELECT name, integer from dataset.table"
+  //    }
+  //   }
+  //  }
+  private static final String INSERT_QUERY_JOB_RESPONSE = String.format(
+      "{\n"
+      + "\n"
+      + " \"kind\": \"bigquery#job\",\n"
+      + " \"etag\": \"\\\"Gn3Hpo5WaKnpFuT457VBDNMgZBw/GF2DiLTiF0s2MwsdQlV4UB-xaew\\\"\",\n"
+      + " \"id\": \"%1$s:%2$s\",\n"
+      + " \"selfLink\": \"https://www.googleapis.com/bigquery/v2/projects/%1$s/jobs/%2$s\",\n"
+      + " \"jobReference\": {\n"
+      + "  \"projectId\": \"%1$s\",\n"
+      + "  \"jobId\": \"%2$s\"\n"
+      + " },\n"
+      + " \"configuration\": {\n"
+      + "  \"query\": {\n"
+      + "   \"query\": \"SELECT name, integer from %4$s.%5$s\",\n"
+      + "   \"destinationTable\": {\n"
+      + "    \"projectId\": \"%1$s\",\n"
+      + "    \"datasetId\": \"%4$s\",\n"
+      + "    \"tableId\": \"%3$s\"\n"
+      + "   },\n"
+      + "   \"createDisposition\": \"CREATE_IF_NEEDED\",\n"
+      + "   \"writeDisposition\": \"WRITE_TRUNCATE\"\n"
+      + "  }\n"
+      + " },\n"
+      + " \"status\": {\n"
+      + "  \"state\": \"RUNNING\"\n"
+      + " },\n"
+      + " \"statistics\": {\n"
+      + "  \"creationTime\": \"1433378500260\",\n"
+      + "  \"startTime\": \"1433378500833\"\n"
+      + " },\n"
+      + " \"user_email\": \"user@gmail.com\"\n"
+      + "}",
+      PROJECT_ID, QUERY_JOB_ID, QUERY_JOB_RESPONSE_TEMP_TABLE, QUERY_JOB_RESPONSE_DATASET, TABLE);
+
+
+  private static final String GET_QUERY_JOB_STATUS_REQUEST_PATH = String.format(
+      "https://www.googleapis.com/bigquery/v2/projects/%s/jobs/%s", PROJECT_ID, QUERY_JOB_ID);
+
+
+  // This is the actual response received for following requests (parameterized identifiers were
+  // updated).
+  // GET
+  // https://www.googleapis.com/bigquery/v2/projects/project/jobs/query_job_id
+  private static final String GET_QUERY_JOB_STATUS_RESPONSE_JSON = String.format(
+      "{\n"
+      + "\n"
+      + " \"kind\": \"bigquery#job\",\n"
+      + " \"etag\": \"\\\"Gn3Hpo5WaKnpFuT457VBDNMgZBw/ZVEFinsDS6AZ04jebPWC9_isCpY\\\"\",\n"
+      + " \"id\": \"%1$s:%4$s\",\n"
+      + " \"selfLink\": \"https://www.googleapis.com/bigquery/v2/projects/%1$s/jobs/%4$s\",\n"
+      + " \"jobReference\": {\n"
+      + "  \"projectId\": \"%1$s\",\n"
+      + "  \"jobId\": \"%4$s\"\n"
+      + " },\n"
+      + " \"configuration\": {\n"
+      + "  \"query\": {\n"
+      + "   \"query\": \"SELECT name, integer from test_chamikara.bigquery_reader_test\",\n"
+      + "   \"destinationTable\": {\n"
+      + "    \"projectId\": \"%1$s\",\n"
+      + "    \"datasetId\": \"%2$s\",\n"
+      + "    \"tableId\": \"%3$s\"\n"
+      + "   },\n"
+      + "   \"createDisposition\": \"CREATE_IF_NEEDED\",\n"
+      + "   \"writeDisposition\": \"WRITE_TRUNCATE\",\n"
+      + "   \"priority\": \"INTERACTIVE\",\n"
+      + "   \"useQueryCache\": true\n"
+      + "  }\n"
+      + " },\n"
+      + " \"status\": {\n"
+      + "  \"state\": \"DONE\"\n"
+      + " },\n"
+      + " \"statistics\": {\n"
+      + "  \"creationTime\": \"1433374960768\",\n"
+      + "  \"startTime\": \"1433374961242\",\n"
+      + "  \"endTime\": \"1433374961532\",\n"
+      + "  \"totalBytesProcessed\": \"33\",\n"
+      + "  \"query\": {\n"
+      + "   \"totalBytesProcessed\": \"33\",\n"
+      + "   \"cacheHit\": false\n"
+      + "  }\n"
+      + " },\n"
+      + " \"user_email\": \"user@gmail.com\"\n"
+      + "}",
+      PROJECT_ID, QUERY_JOB_RESPONSE_DATASET, QUERY_JOB_RESPONSE_TEMP_TABLE, QUERY_JOB_ID);
+
+  private static final String LIST_QUERY_TABLE_DATA_REQUEST_PATH =
+      String.format("https://www.googleapis.com/bigquery/v2/projects/%s/datasets/%s/tables/%s/data",
+          PROJECT_ID, QUERY_JOB_RESPONSE_DATASET, QUERY_JOB_RESPONSE_TEMP_TABLE);
+
+  // This is the actual response received for following requests (parameterized identifiers were
+  // updated).
+  // GET
+  // https://www.googleapis.com/bigquery/v2/projects/project/datasets/query_job_response_dataset/
+  //   tables/query_job_response_temp_table/data
+  private static final String LIST_QUERY_TABLE_DATA_RESPONSE =
+      "{\n"
+      + " \"kind\": \"bigquery#tableDataList\",\n"
+      + " \"etag\": \"\\\"Gn3Hpo5WaKnpFuT457VBDNMgZBw/6Y6hxVy6yTmtI2EEgHfqg3w49yU\\\"\",\n"
+      + " \"totalRows\": \"2\",\n"
+      + " \"rows\": [\n"
+      + "  {\n"
+      + "   \"f\": [\n"
+      + "    {\n"
+      + "     \"v\": \"Arthur\"\n"
+      + "    },\n"
+      + "    {\n"
+      + "     \"v\": \"42\"\n"
+      + "    }\n"
+      + "   ]\n"
+      + "  },\n"
+      + "  {\n"
+      + "   \"f\": [\n"
+      + "    {\n"
+      + "     \"v\": \"Allison\"\n"
+      + "    },\n"
+      + "    {\n"
+      + "     \"v\": \"79\"\n"
+      + "    }\n"
+      + "   ]\n"
+      + "  }\n"
+      + " ]\n"
+      + "}";
+
+  private static final String QUERY_TABLE_GET_REQUEST_PATH =
+      String.format("https://www.googleapis.com/bigquery/v2/projects/%s/datasets/%s/tables/%s",
+          PROJECT_ID, QUERY_JOB_RESPONSE_DATASET, QUERY_JOB_RESPONSE_TEMP_TABLE);
+
+  // This is the actual response received for following requests (parameterized identifiers were
+  // updated).
+  // GET
+  // https://www.googleapis.com/bigquery/v2/projects/project/datasets/query_job_response_dataset/
+  //   tables/query_job_response_temp_table
+  private static final String QUERY_TABLE_GET_RESPONSE = String.format(
+      "{\n"
+      + "\n"
+      + " \"kind\": \"bigquery#table\",\n"
+      + " \"etag\": \"\\\"Gn3Hpo5WaKnpFuT457VBDNMgZBw/MTQzMzM3ODUwMDg4NA\\\"\",\n"
+      + " \"id\": \"%1$s:%2$s.%3$s\",\n"
+      + " \"selfLink\": \"https://www.googleapis.com/bigquery/v2/projects/%1$s/datasets/%2$s/"
+      + "tables/%3$s\",\n"
+      + " \"tableReference\": {\n"
+      + "  \"projectId\": \"%1$s\",\n"
+      + "  \"datasetId\": \"%2$s\",\n"
+      + "  \"tableId\": \"%3$s\"\n"
+      + " },\n"
+      + " \"schema\": {\n"
+      + "  \"fields\": [\n"
+      + "   {\n"
+      + "    \"name\": \"name\",\n"
+      + "    \"type\": \"STRING\",\n"
+      + "    \"mode\": \"NULLABLE\"\n"
+      + "   },\n"
+      + "   {\n"
+      + "    \"name\": \"integer\",\n"
+      + "    \"type\": \"INTEGER\",\n"
+      + "    \"mode\": \"NULLABLE\"\n"
+      + "   }\n"
+      + "  ]\n"
+      + " },\n"
+      + " \"numBytes\": \"33\",\n"
+      + " \"numRows\": \"2\",\n"
+      + " \"creationTime\": \"1433374961476\",\n"
+      + " \"expirationTime\": \"1433464900889\",\n"
+      + " \"lastModifiedTime\": \"1433378500884\",\n"
+      + " \"type\": \"TABLE\"\n"
+      + "}",
+      PROJECT_ID, QUERY_JOB_RESPONSE_DATASET, QUERY_JOB_RESPONSE_TEMP_TABLE);
+
+  private static final String QUERY_DATASET_INSERT_PATH =
+      String.format("https://www.googleapis.com/bigquery/v2/projects/%s/datasets", PROJECT_ID);
+
+  private static final String QUERY_DATASET_INSERT_RESPONSE = String.format(
+      "{\n"
+      + "\n"
+      + " \"kind\": \"bigquery#dataset\",\n"
+      + " \"etag\": \"\\\"Gn3Hpo5WaKnpFuT457VBDNMgZBw/67ZQLsO6a6iDLJ71ReSC-LWBBw4\\\"\",\n"
+      + " \"id\": \"%1$s:%2$s\",\n"
+      + " \"selfLink\": \"https://www.googleapis.com/bigquery/v2/projects/%1$s/datasets/%2$s\",\n"
+      + " \"datasetReference\": {\n"
+      + "  \"datasetId\": \"%2$s\",\n"
+      + "  \"projectId\": \"%1$s\"\n"
+      + " },\n"
+      + " \"access\": [\n"
+      + "  {\n"
+      + "   \"role\": \"OWNER\",\n"
+      + "   \"specialGroup\": \"projectOwners\"\n"
+      + "  },\n"
+      + "  {\n"
+      + "   \"role\": \"WRITER\",\n"
+      + "   \"specialGroup\": \"projectWriters\"\n"
+      + "  },\n"
+      + "  {\n"
+      + "   \"role\": \"READER\",\n"
+      + "   \"specialGroup\": \"projectReaders\"\n"
+      + "  }\n"
+      + " ],\n"
+      + " \"creationTime\": \"1436219427054\",\n"
+      + " \"lastModifiedTime\": \"1436219427054\"\n"
+      + "}",
+      PROJECT_ID, QUERY_JOB_RESPONSE_DATASET);
+
+  private static final String QUERY_TABLE_DELETE_PATH =
+      String.format("https://www.googleapis.com/bigquery/v2/projects/%s/", PROJECT_ID);
+
+  private static final String QUERY_TABLE_DELETE_RESPONSE = ""; // Empty body
+
+  private static final String QUERY_DATASET_DELETE_PATH =
+      String.format("https://www.googleapis.com/bigquery/v2/projects/%s/", PROJECT_ID);
+
+  private static final String QUERY_DATASET_DELETE_RESPONSE = ""; // Empty body
+
+  private static final String QUERY = "SELECT name, integer from dataset.table";
+
   @Mock
   private MockHttpTransport mockTransport;
 
   private Bigquery bigQueryClient;
 
-  @Before
-  public void setUp() throws Exception {
+  private void verifyDatasetInsert() throws IOException {
+    verify(mockTransport, times(1)).buildRequest(eq("POST"), endsWith(QUERY_DATASET_INSERT_PATH));
+  }
+
+  private void verifyInsertQueryJob() throws IOException {
+    verify(mockTransport, times(1)).buildRequest(eq("POST"), endsWith(INSERT_QUERY_JOB_PATH));
+  }
+
+  private void verifyQueryJobStatus() throws IOException {
+    verify(mockTransport, times(1))
+        .buildRequest(eq("GET"), endsWith(GET_QUERY_JOB_STATUS_REQUEST_PATH));
+  }
+
+  private void verifyQueryTableData() throws IOException {
+    verify(mockTransport, times(1))
+        .buildRequest(eq("GET"), endsWith(LIST_QUERY_TABLE_DATA_REQUEST_PATH));
+  }
+
+  private void verifyQueryTableGet() throws IOException {
+    verify(mockTransport, times(1)).buildRequest(eq("GET"), endsWith(QUERY_TABLE_GET_REQUEST_PATH));
+  }
+
+  private void verifyTableDelete() throws IOException {
+    verify(mockTransport, times(2)).buildRequest(eq("DELETE"), contains(QUERY_TABLE_DELETE_PATH));
+  }
+
+  private void verifyDatasetDelete() throws IOException {
+    verify(mockTransport, times(2)).buildRequest(eq("DELETE"), contains(QUERY_DATASET_DELETE_PATH));
+  }
+
+  @Test
+  public void testReadQuery() throws Exception {
+    setUpMockQuery();
+
+    bigQueryClient = new Bigquery(mockTransport, Transport.getJsonFactory(), null);
+    BigQueryReader reader = new BigQueryReader(bigQueryClient, QUERY, PROJECT_ID);
+    Reader.ReaderIterator<WindowedValue<TableRow>> iterator = reader.iterator();
+
+    assertTrue(iterator.hasNext());
+    TableRow row = iterator.next().getValue();
+
+    assertEquals("Arthur", row.get("name"));
+    assertEquals("42", row.get("integer"));
+
+    row = iterator.next().getValue();
+    assertEquals("Allison", row.get("name"));
+    assertEquals("79", row.get("integer"));
+
+    iterator.close();
+
+    verify(mockTransport, atLeastOnce()).supportsMethod("GET");
+    verify(mockTransport, atLeastOnce()).supportsMethod("DELETE");
+
+    verifyDatasetInsert();
+    verifyInsertQueryJob();
+    verifyQueryJobStatus();
+    verifyQueryTableData();
+    verifyQueryTableGet();
+    verifyTableDelete();
+    verifyDatasetDelete();
+
+    verifyNoMoreInteractions(mockTransport);
+  }
+
+  void setUpMockQuery() throws IOException {
     MockitoAnnotations.initMocks(this);
 
-    // To make sure that we retry requests, each invocation below throws an IOException first time
-    // they are invoked.
+    when(mockTransport.buildRequest(eq("POST"), endsWith(QUERY_DATASET_INSERT_PATH)))
+    .thenAnswer(new Answer<LowLevelHttpRequest>() {
+      @Override
+      public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
+        MockLowLevelHttpResponse response =
+            new MockLowLevelHttpResponse()
+                .setContentType(Json.MEDIA_TYPE)
+                .setContent(QUERY_DATASET_INSERT_RESPONSE);
+        return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
+            .setResponse(response);
+      }
+    });
+
+    when(mockTransport.buildRequest(eq("POST"), endsWith(INSERT_QUERY_JOB_PATH)))
+        .thenAnswer(new Answer<LowLevelHttpRequest>() {
+          @Override
+          public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
+            MockLowLevelHttpResponse response =
+                new MockLowLevelHttpResponse()
+                    .setContentType(Json.MEDIA_TYPE)
+                    .setContent(INSERT_QUERY_JOB_RESPONSE);
+            return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
+                .setResponse(response);
+          }
+        });
+
+    when(mockTransport.buildRequest(eq("GET"), endsWith(GET_QUERY_JOB_STATUS_REQUEST_PATH)))
+        .thenAnswer(new Answer<LowLevelHttpRequest>() {
+          @Override
+          public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
+            MockLowLevelHttpResponse response =
+                new MockLowLevelHttpResponse()
+                    .setContentType(Json.MEDIA_TYPE)
+                    .setContent(GET_QUERY_JOB_STATUS_RESPONSE_JSON);
+            return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
+                .setResponse(response);
+          }
+        });
+
+    when(mockTransport.buildRequest(eq("GET"), endsWith(LIST_QUERY_TABLE_DATA_REQUEST_PATH)))
+        .thenAnswer(new Answer<LowLevelHttpRequest>() {
+          @Override
+          public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
+            MockLowLevelHttpResponse response =
+                new MockLowLevelHttpResponse()
+                    .setContentType(Json.MEDIA_TYPE)
+                    .setContent(LIST_QUERY_TABLE_DATA_RESPONSE);
+            return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
+                .setResponse(response);
+          }
+        });
+
+    when(mockTransport.buildRequest(eq("GET"), endsWith(QUERY_TABLE_GET_REQUEST_PATH)))
+        .thenAnswer(new Answer<LowLevelHttpRequest>() {
+          @Override
+          public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
+            MockLowLevelHttpResponse response =
+                new MockLowLevelHttpResponse()
+                    .setContentType(Json.MEDIA_TYPE)
+                    .setContent(QUERY_TABLE_GET_RESPONSE);
+            return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
+                .setResponse(response);
+          }
+        });
+
+    when(mockTransport.buildRequest(eq("DELETE"), contains(QUERY_TABLE_DELETE_PATH)))
+    .thenAnswer(new Answer<LowLevelHttpRequest>() {
+      @Override
+      public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
+        MockLowLevelHttpResponse response =
+            new MockLowLevelHttpResponse()
+                .setContentType(Json.MEDIA_TYPE)
+                .setContent(QUERY_TABLE_DELETE_RESPONSE);
+        return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
+            .setResponse(response);
+      }
+    });
+
+    when(mockTransport.buildRequest(eq("DELETE"), contains(QUERY_DATASET_DELETE_PATH)))
+    .thenAnswer(new Answer<LowLevelHttpRequest>() {
+      @Override
+      public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
+        MockLowLevelHttpResponse response =
+            new MockLowLevelHttpResponse()
+                .setContentType(Json.MEDIA_TYPE)
+                .setContent(QUERY_DATASET_DELETE_RESPONSE);
+        return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
+            .setResponse(response);
+      }
+    });
+
+    when(mockTransport.supportsMethod("GET")).thenReturn(true);
+    when(mockTransport.supportsMethod("DELETE")).thenReturn(true);
+  }
+
+  void setUpMockTable() throws Exception {
+    MockitoAnnotations.initMocks(this);
     when(mockTransport.buildRequest(eq("GET"), endsWith(GET_TABLE_REQUEST_PATH)))
         .thenThrow(new IOException())
         .thenAnswer(new Answer<LowLevelHttpRequest>() {
@@ -327,14 +719,6 @@ public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable
           }
         });
     when(mockTransport.supportsMethod("GET")).thenReturn(true);
-
-    bigQueryClient = new Bigquery(mockTransport, Transport.getJsonFactory(), null);
-  }
-
-  @After
-  public void tearDown() throws IOException {
-    verify(mockTransport, atLeastOnce()).supportsMethod("GET");
-    verifyNoMoreInteractions(mockTransport);
   }
 
   private void verifyTableGet() throws IOException {
@@ -346,7 +730,11 @@ private void verifyTabledataList() throws IOException {
   }
 
   @Test
-  public void testRead() throws Exception {
+  public void testReadTable() throws Exception {
+    setUpMockTable();
+
+    bigQueryClient = new Bigquery(mockTransport, Transport.getJsonFactory(), null);
+
     BigQueryReader reader = new BigQueryReader(
         bigQueryClient,
         new TableReference().setProjectId(PROJECT_ID).setDatasetId(DATASET).setTableId(TABLE));
@@ -395,5 +783,8 @@ public void testRead() throws Exception {
 
     verifyTableGet();
     verifyTabledataList();
+
+    verify(mockTransport, atLeastOnce()).supportsMethod("GET");
+    verifyNoMoreInteractions(mockTransport);
   }
 }

From 44801c5627ef5d377493637af392bfd63d0c130e Mon Sep 17 00:00:00 2001
From: colinreid <colinreid@google.com>
Date: Thu, 9 Jul 2015 16:31:24 -0700
Subject: [PATCH 0730/1541] Log structured metadata for Cloud Logging

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97921289
---
 .../sdk/runners/worker/DataflowWorker.java    |   4 +-
 .../runners/worker/DataflowWorkerHarness.java |  24 ++-
 .../worker/StreamingDataflowWorker.java       |  12 +-
 .../DataflowWorkerLoggingFormatter.java       | 119 -----------
 .../logging/DataflowWorkerLoggingHandler.java | 173 +++++++++++++++
 .../DataflowWorkerLoggingInitializer.java     |  41 ++--
 .../logging/DataflowWorkerLoggingMDC.java     |  98 +++++++++
 .../sdk/util/common/worker/Operation.java     |   7 +
 .../worker/DataflowWorkerHarnessTest.java     |  75 ++++---
 .../worker/StreamingDataflowWorkerTest.java   |  10 +-
 .../DataflowWorkerLoggingFormatterTest.java   | 134 ------------
 .../DataflowWorkerLoggingHandlerTest.java     | 200 ++++++++++++++++++
 .../DataflowWorkerLoggingInitializerTest.java |  11 +-
 .../RestoreDataflowLoggingFormatterTest.java  |  52 -----
 ...er.java => RestoreDataflowLoggingMDC.java} |  25 ++-
 .../RestoreDataflowLoggingMDCTest.java        |  81 +++++++
 16 files changed, 677 insertions(+), 389 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandler.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingMDC.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandlerTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingFormatterTest.java
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/{RestoreDataflowLoggingFormatter.java => RestoreDataflowLoggingMDC.java} (56%)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingMDCTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index e58db20468677..8b6ffa336031a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -30,7 +30,7 @@
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
-import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingFormatter;
+import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingHandler;
 import com.google.cloud.dataflow.sdk.util.CloudCounterUtils;
 import com.google.cloud.dataflow.sdk.util.CloudMetricUtils;
 import com.google.cloud.dataflow.sdk.util.PCollectionViewWindow;
@@ -237,7 +237,7 @@ private void handleWorkError(WorkItem workItem, WorkExecutor worker, long nextRe
     Status error = new Status();
     error.setCode(2); // Code.UNKNOWN.  TODO: Replace with a generated definition.
     // TODO: Attach the stack trace as exception details, not to the message.
-    error.setMessage(DataflowWorkerLoggingFormatter.formatException(t));
+    error.setMessage(DataflowWorkerLoggingHandler.formatException(t));
 
     reportStatus(options, "Failure", workItem, worker == null ? null : worker.getOutputCounters(),
         worker == null ? null : worker.getOutputMetrics(), null/*sourceOperationResponse*/,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index 40ac95eedb95f..bb7d3a99c85ec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -32,8 +32,8 @@
 import com.google.api.services.dataflow.model.WorkItemStatus;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingFormatter;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingInitializer;
+import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
 import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
 import com.google.cloud.dataflow.sdk.util.GcsIOChannelFactory;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
@@ -185,8 +185,8 @@ static void processWork(DataflowWorkerHarnessOptions pipelineOptions,
   }
 
   static DataflowWorker create(DataflowWorkerHarnessOptions options) {
-    DataflowWorkerLoggingFormatter.setJobId(options.getJobId());
-    DataflowWorkerLoggingFormatter.setWorkerId(options.getWorkerId());
+    DataflowWorkerLoggingMDC.setJobId(options.getJobId());
+    DataflowWorkerLoggingMDC.setWorkerId(options.getWorkerId());
     options.setAppName(APPLICATION_NAME);
 
     // Configure standard IO factories.
@@ -254,12 +254,12 @@ public WorkItem getWorkItem() throws IOException {
 
       List<WorkItem> workItems = response.getWorkItems();
       if (workItems == null || workItems.isEmpty()) {
-        // We didn't lease any work
+        // We didn't lease any work.
         return null;
-      } else if (workItems.size() > 1){
+      } else if (workItems.size() > 1) {
         throw new IOException(
             "This version of the SDK expects no more than one work item from the service: "
-            + response);
+                + response);
       }
 
       WorkItem work = response.getWorkItems().get(0);
@@ -267,7 +267,17 @@ public WorkItem getWorkItem() throws IOException {
         return null;
       }
 
-      DataflowWorkerLoggingFormatter.setWorkId(Long.toString(work.getId()));
+      // Capture the work item's stage name.
+      if (work.getMapTask() != null) {
+        DataflowWorkerLoggingMDC.setStageName(work.getMapTask().getStageName());
+      } else if (work.getSeqMapTask() != null) {
+        DataflowWorkerLoggingMDC.setStageName(work.getSeqMapTask().getStageName());
+      } else {
+        // Only MapTask and SeqMapTask currently have a stage name.
+        DataflowWorkerLoggingMDC.setStageName(null);
+      }
+
+      DataflowWorkerLoggingMDC.setWorkId(Long.toString(work.getId()));
       // Looks like the work's a'ight.
       return work;
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 8a6d1786516b4..2b79098476c2e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -20,8 +20,8 @@
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingFormatter;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingInitializer;
+import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
 import com.google.cloud.dataflow.sdk.util.BoundedQueueExecutor;
@@ -234,8 +234,8 @@ public Thread newThread(Runnable r) {
     this.clientId = new Random().nextLong();
     this.lastException = new AtomicReference<>();
 
-    DataflowWorkerLoggingFormatter.setJobId(options.getJobId());
-    DataflowWorkerLoggingFormatter.setWorkerId(options.getWorkerId());
+    DataflowWorkerLoggingMDC.setJobId(options.getJobId());
+    DataflowWorkerLoggingMDC.setWorkerId(options.getWorkerId());
   }
 
   public void start() {
@@ -386,8 +386,9 @@ private void process(
     MapTaskExecutor worker = null;
 
     try {
-      DataflowWorkerLoggingFormatter.setWorkId(
+      DataflowWorkerLoggingMDC.setWorkId(
           work.getKey().toStringUtf8() + "-" + Long.toString(work.getWorkToken()));
+      DataflowWorkerLoggingMDC.setStageName(computation);
       WorkerAndContext workerAndContext = mapTaskExecutors.get(computation).poll();
       if (workerAndContext == null) {
         context =
@@ -482,7 +483,8 @@ public void run() {
         }
       }
     } finally {
-      DataflowWorkerLoggingFormatter.setWorkId(null);
+      DataflowWorkerLoggingMDC.setWorkId(null);
+      DataflowWorkerLoggingMDC.setStageName(null);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
deleted file mode 100644
index 24a7e7ff82d2b..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatter.java
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker.logging;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingInitializer.LEVELS;
-
-import com.google.common.base.MoreObjects;
-
-import org.joda.time.format.DateTimeFormatter;
-import org.joda.time.format.ISODateTimeFormat;
-
-import java.io.PrintWriter;
-import java.io.StringWriter;
-import java.util.logging.Formatter;
-import java.util.logging.LogRecord;
-
-/**
- * Formats {@link LogRecord} into the following format:
- * ISO8601Date LogLevel JobId WorkerId WorkId ThreadId LoggerName LogMessage
- * with one or more additional lines for any {@link Throwable} associated with
- * the {@link LogRecord}. The exception is output using
- * {@link Throwable#printStackTrace()}.
- */
-public class DataflowWorkerLoggingFormatter extends Formatter {
-  private static final DateTimeFormatter DATE_FORMATTER =
-      ISODateTimeFormat.dateTime().withZoneUTC();
-
-  private static final InheritableThreadLocal<String> jobId = new InheritableThreadLocal<>();
-  private static final InheritableThreadLocal<String> workerId = new InheritableThreadLocal<>();
-  private static final InheritableThreadLocal<String> workId = new InheritableThreadLocal<>();
-
-  /**
-   * Sets the Job ID of the current thread, which will be inherited by child threads.
-   */
-  public static void setJobId(String newJobId) {
-    jobId.set(newJobId);
-  }
-
-  /**
-   * Sets the Worker ID of the current thread, which will be inherited by child threads.
-   */
-  public static void setWorkerId(String newWorkerId) {
-    workerId.set(newWorkerId);
-  }
-
-  /**
-   * Sets the Work ID of the current thread, which will be inherited by child threads.
-   */
-  public static void setWorkId(String newWorkId) {
-    workId.set(newWorkId);
-  }
-
-  /**
-   * Gets the Job ID of the current thread.
-   */
-  public static String getJobId() {
-    return jobId.get();
-  }
-
-  /**
-   * Gets the Worker ID of the current thread.
-   */
-  public static String getWorkerId() {
-    return workerId.get();
-  }
-
-  /**
-   * Gets the Work ID of the current thread.
-   */
-  public static String getWorkId() {
-    return workId.get();
-  }
-
-  @Override
-  public String format(LogRecord record) {
-    String exception = formatException(record.getThrown());
-    return DATE_FORMATTER.print(record.getMillis())
-        + " " + MoreObjects.firstNonNull(LEVELS.get(record.getLevel()),
-                                         record.getLevel().getName())
-        + " " + record.getMessage()
-        + " [" + MoreObjects.firstNonNull(jobId.get(), "unknown")
-        + " " + MoreObjects.firstNonNull(workerId.get(), "unknown")
-        + " " + MoreObjects.firstNonNull(workId.get(), "unknown")
-        + " " + record.getThreadID()
-        + "] " + record.getLoggerName() + System.lineSeparator()
-        + (exception != null ? exception : "");
-  }
-
-  /**
-   * Formats the throwable as per {@link Throwable#printStackTrace()}.
-   *
-   * @param thrown The throwable to format.
-   * @return A string containing the contents of {@link Throwable#printStackTrace()}.
-   */
-  public static String formatException(Throwable thrown) {
-    if (thrown == null) {
-      return null;
-    }
-    StringWriter sw = new StringWriter();
-    PrintWriter pw = new PrintWriter(sw);
-    thrown.printStackTrace(pw);
-    pw.close();
-    return sw.toString();
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandler.java
new file mode 100644
index 0000000000000..11b79f774e001
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandler.java
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker.logging;
+
+import static com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingInitializer.LEVELS;
+
+import com.google.common.base.MoreObjects;
+
+import com.fasterxml.jackson.core.JsonEncoding;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.util.logging.ErrorManager;
+import java.util.logging.Handler;
+import java.util.logging.LogRecord;
+
+/**
+ * Formats {@link LogRecord} into JSON format for Cloud Logging.
+ * Any exception is represented using {@link Throwable#printStackTrace()}.
+ */
+public class DataflowWorkerLoggingHandler extends Handler {
+  /**
+   * Formats the throwable as per {@link Throwable#printStackTrace()}.
+   *
+   * @param thrown The throwable to format.
+   * @return A string containing the contents of {@link Throwable#printStackTrace()}.
+   */
+  public static String formatException(Throwable thrown) {
+    if (thrown == null) {
+      return null;
+    }
+    StringWriter sw = new StringWriter();
+    PrintWriter pw = new PrintWriter(sw);
+    thrown.printStackTrace(pw);
+    pw.close();
+    return sw.toString();
+  }
+
+  // Null after close().
+  private JsonGenerator generator;
+
+  /**
+   * Constructs a handler that writes to a file.
+   */
+  public DataflowWorkerLoggingHandler(String fileName) throws IOException {
+    this(new BufferedOutputStream(new FileOutputStream(new File(fileName), true /* append */)));
+  }
+
+  /**
+   * Constructs a handler that writes to an arbitrary output stream.
+   */
+  public DataflowWorkerLoggingHandler(OutputStream output) throws IOException {
+    generator = new ObjectMapper().getFactory().createGenerator(output, JsonEncoding.UTF8);
+  }
+
+  @Override
+  public synchronized void publish(LogRecord record) {
+    if (!isLoggable(record)) {
+      return;
+    }
+    try {
+      // Generating a JSON map like:
+      // {"timestamp": {"seconds": 1435835832, "nanos": 123456789}, ...  "message": "hello"}
+      generator.writeStartObject();
+      // Write the timestamp.
+      generator.writeFieldName("timestamp");
+      generator.writeStartObject();
+      generator.writeNumberField("seconds", record.getMillis() / 1000);
+      generator.writeNumberField("nanos", (record.getMillis() % 1000) * 1000000);
+      generator.writeEndObject();
+      // Write the severity.
+      generator.writeObjectField(
+          "severity",
+          MoreObjects.firstNonNull(LEVELS.get(record.getLevel()), record.getLevel().getName()));
+      // Write the other labels.
+      writeIfNotNull("message", record.getMessage());
+      writeIfNotNull("thread", String.valueOf(record.getThreadID()));
+      writeIfNotNull("job", DataflowWorkerLoggingMDC.getJobId());
+      writeIfNotNull("stage", DataflowWorkerLoggingMDC.getStageName());
+      writeIfNotNull("step", DataflowWorkerLoggingMDC.getStepName());
+      writeIfNotNull("worker", DataflowWorkerLoggingMDC.getWorkerId());
+      writeIfNotNull("work", DataflowWorkerLoggingMDC.getWorkId());
+      writeIfNotNull("logger", record.getLoggerName());
+      writeIfNotNull("exception", formatException(record.getThrown()));
+      generator.writeEndObject();
+      generator.writeRaw(System.lineSeparator());
+    } catch (IOException e) {
+      if (getErrorManager() != null) {
+        getErrorManager().error("Unable to publish", e, ErrorManager.WRITE_FAILURE);
+      }
+    }
+    // This implementation is based on that of java.util.logging.FileHandler, which flushes in a
+    // synchronized context like this. Unfortunately the maximum throughput for generating log
+    // entries will be the inverse of the flush latency. That could be as little as one hundred
+    // log entries per second on some systems. For higher throughput this should be changed to
+    // batch publish operations while writes and flushes are in flight on a different thread.
+    flush();
+  }
+
+  /**
+   * Check if a LogRecord will be logged.
+   * <p>
+   * This method checks if the <tt>LogRecord</tt> has an appropriate level and
+   * whether it satisfies any <tt>Filter</tt>.  It will also return false if
+   * the handler has been closed, or the LogRecord is null.
+   * <p>
+   */
+  @Override
+  public boolean isLoggable(LogRecord record) {
+    return generator != null && record != null && super.isLoggable(record);
+  }
+
+  /**
+   * Appends a JSON key/value pair if the specified val is not null.
+   */
+  private void writeIfNotNull(String name, String val) throws IOException {
+    if (val != null) {
+      generator.writeStringField(name, val);
+    }
+  }
+
+  @Override
+  public synchronized void flush() {
+    try {
+      if (generator != null) {
+        generator.flush();
+      }
+    } catch (IOException e) {
+      if (getErrorManager() != null) {
+        getErrorManager().error("Unable to flush", e, ErrorManager.FLUSH_FAILURE);
+      }
+    }
+  }
+
+  @Override
+  public synchronized void close() {
+    // Flush any in-flight content, though there should not actually be any because
+    // the generator is currently flushed in the synchronized publish() method.
+    flush();
+    // Close the generator and log file.
+    try {
+      if (generator != null) {
+        generator.close();
+      }
+    } catch (IOException e) {
+      if (getErrorManager() != null) {
+        getErrorManager().error("Unable to close", e, ErrorManager.CLOSE_FAILURE);
+      }
+    }
+    generator = null;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
index b285586646da6..5e4b8353b6a2c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
@@ -31,7 +31,6 @@
 import java.io.IOException;
 import java.util.List;
 import java.util.logging.FileHandler;
-import java.util.logging.Formatter;
 import java.util.logging.Handler;
 import java.util.logging.Level;
 import java.util.logging.LogManager;
@@ -39,26 +38,28 @@
 
 /**
  * Sets up {@link java.util.logging} configuration on the Dataflow worker with a
- * file logger. The file logger uses the {@link DataflowWorkerLoggingFormatter} format.
+ * file logger. The file logger uses the {@link DataflowWorkerLoggingHandler} format.
  * A user can override the logging level by customizing the options found within
  * {@link DataflowWorkerLoggingOptions}. A user can override the location by specifying the
  * Java system property "dataflow.worker.logging.location". The default log level is INFO
- * and the default location is a file named dataflow-worker.log within the systems temporary
+ * and the default location is a file named dataflow.json.log within the systems temporary
  * directory.
  */
 public class DataflowWorkerLoggingInitializer {
-  private static final String DEFAULT_LOGGING_LOCATION =
-      new File(System.getProperty("java.io.tmpdir"), "dataflow-worker.log").getPath();
   private static final String ROOT_LOGGER_NAME = "";
+  private static final String DEFAULT_LOGGING_LOCATION =
+      new File(System.getProperty("java.io.tmpdir"), "dataflow.json.log").getPath();
   private static final String DATAFLOW_WORKER_LOGGING_LOCATION = "dataflow.worker.logging.location";
+  private static final String DATAFLOW_WORKER_JSON_LOGGING_LOCATION =
+      "dataflow.worker.json.logging.location";
   static final ImmutableBiMap<Level, DataflowWorkerLoggingOptions.Level> LEVELS =
       ImmutableBiMap.<Level, DataflowWorkerLoggingOptions.Level>builder()
-      .put(Level.SEVERE, ERROR)
-      .put(Level.WARNING, WARN)
-      .put(Level.INFO, INFO)
-      .put(Level.FINE, DEBUG)
-      .put(Level.FINEST, TRACE)
-      .build();
+          .put(Level.SEVERE, ERROR)
+          .put(Level.WARNING, WARN)
+          .put(Level.INFO, INFO)
+          .put(Level.FINE, DEBUG)
+          .put(Level.FINEST, TRACE)
+          .build();
 
   /**
    * This default log level is overridden by the log level found at
@@ -81,14 +82,12 @@ public static synchronized void initialize() {
       return;
     }
     try {
-      Level logLevel = LEVELS.inverse().get(DEFAULT_LOG_LEVEL);
-      Formatter formatter = new DataflowWorkerLoggingFormatter();
-
-      fileHandler = new FileHandler(
-          System.getProperty(DATAFLOW_WORKER_LOGGING_LOCATION, DEFAULT_LOGGING_LOCATION),
-          true /* Append so that we don't squash existing logs */);
-      fileHandler.setFormatter(formatter);
-      fileHandler.setLevel(Level.ALL);
+      DataflowWorkerLoggingHandler loggingHandler =
+          new DataflowWorkerLoggingHandler(
+              System.getProperty(
+                  DATAFLOW_WORKER_JSON_LOGGING_LOCATION,
+                  System.getProperty(DATAFLOW_WORKER_LOGGING_LOCATION, DEFAULT_LOGGING_LOCATION)));
+      loggingHandler.setLevel(Level.ALL);
 
       // Reset the global log manager, get the root logger and remove the default log handlers.
       LogManager logManager = LogManager.getLogManager();
@@ -98,8 +97,9 @@ public static synchronized void initialize() {
         rootLogger.removeHandler(handler);
       }
 
+      Level logLevel = LEVELS.inverse().get(DEFAULT_LOG_LEVEL);
       rootLogger.setLevel(logLevel);
-      rootLogger.addHandler(fileHandler);
+      rootLogger.addHandler(loggingHandler);
     } catch (SecurityException | IOException e) {
       throw new ExceptionInInitializerError(e);
     }
@@ -110,6 +110,7 @@ public static synchronized void initialize() {
    */
   public static synchronized void configure(DataflowWorkerLoggingOptions options) {
     initialize();
+
     if (options.getDefaultWorkerLogLevel() != null) {
       LogManager.getLogManager().getLogger(ROOT_LOGGER_NAME).setLevel(
           LEVELS.inverse().get(options.getDefaultWorkerLogLevel()));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingMDC.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingMDC.java
new file mode 100644
index 0000000000000..4d387c22c1b0f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingMDC.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker.logging;
+
+/**
+ * Mapped diagnostic context for the Dataflow worker.
+ */
+public class DataflowWorkerLoggingMDC {
+  private static final InheritableThreadLocal<String> jobId = new InheritableThreadLocal<>();
+  private static final InheritableThreadLocal<String> stageName = new InheritableThreadLocal<>();
+  private static final InheritableThreadLocal<String> stepName = new InheritableThreadLocal<>();
+  private static final InheritableThreadLocal<String> workerId = new InheritableThreadLocal<>();
+  private static final InheritableThreadLocal<String> workId = new InheritableThreadLocal<>();
+
+  /**
+   * Sets the Job ID of the current thread, which will be inherited by child threads.
+   */
+  public static void setJobId(String newJobId) {
+    jobId.set(newJobId);
+  }
+
+  /**
+   * Sets the Stage Name of the current thread, which will be inherited by child threads.
+   */
+  public static void setStageName(String newStageName) {
+    stageName.set(newStageName);
+  }
+
+  /**
+   * Sets the Step Name of the current thread, which will be inherited by child threads.
+   */
+  public static void setStepName(String newStepName) {
+    stepName.set(newStepName);
+  }
+
+  /**
+   * Sets the Worker ID of the current thread, which will be inherited by child threads.
+   */
+  public static void setWorkerId(String newWorkerId) {
+    workerId.set(newWorkerId);
+  }
+
+  /**
+   * Sets the Work ID of the current thread, which will be inherited by child threads.
+   */
+  public static void setWorkId(String newWorkId) {
+    workId.set(newWorkId);
+  }
+
+  /**
+   * Gets the Job ID of the current thread.
+   */
+  public static String getJobId() {
+    return jobId.get();
+  }
+
+  /**
+   * Gets the Stage Name of the current thread.
+   */
+  public static String getStageName() {
+    return stageName.get();
+  }
+
+  /**
+   * Gets the Step Name of the current thread.
+   */
+  public static String getStepName() {
+    return stepName.get();
+  }
+
+  /**
+   * Gets the Worker ID of the current thread.
+   */
+  public static String getWorkerId() {
+    return workerId.get();
+  }
+
+  /**
+   * Gets the Work ID of the current thread.
+   */
+  public static String getWorkId() {
+    return workId.get();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
index e861749e811b6..ea32f33407bf1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
+import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 
 /**
@@ -35,6 +36,9 @@
  * finishing its consumers.
  */
 public abstract class Operation {
+  /** The name of this operation. */
+  public final String operationName;
+
   /**
    * The array of consuming receivers, one per operation output
    * "port" (e.g., DoFn main or side output).  A receiver might be
@@ -77,6 +81,7 @@ public Operation(String operationName,
                    String counterPrefix,
                    CounterSet.AddCounterMutator addCounterMutator,
                    StateSampler stateSampler) {
+    this.operationName = operationName;
     this.receivers = receivers;
     this.stateSampler = stateSampler;
     startState = stateSampler.stateForName(operationName + "-start");
@@ -135,6 +140,7 @@ public void start() throws Exception {
       checkUnstarted();
       initializationState = InitializationState.STARTED;
     }
+    DataflowWorkerLoggingMDC.setStepName(operationName);
   }
 
   /**
@@ -142,6 +148,7 @@ public void start() throws Exception {
    * predecessor producing operations have been finished.
    */
   public void finish() throws Exception {
+    DataflowWorkerLoggingMDC.setStepName(null);
     synchronized (initializationStateLock) {
       checkStarted();
       initializationState = InitializationState.FINISHED;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
index 1c7f8f3d9bf9b..8b906cde6d1ac 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
@@ -32,12 +32,14 @@
 import com.google.api.services.dataflow.Dataflow;
 import com.google.api.services.dataflow.model.LeaseWorkItemRequest;
 import com.google.api.services.dataflow.model.LeaseWorkItemResponse;
+import com.google.api.services.dataflow.model.MapTask;
+import com.google.api.services.dataflow.model.SeqMapTask;
 import com.google.api.services.dataflow.model.WorkItem;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingFormatter;
+import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
 import com.google.cloud.dataflow.sdk.testing.FastNanoClockAndSleeper;
-import com.google.cloud.dataflow.sdk.testing.RestoreDataflowLoggingFormatter;
+import com.google.cloud.dataflow.sdk.testing.RestoreDataflowLoggingMDC;
 import com.google.cloud.dataflow.sdk.testing.RestoreSystemProperties;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
 import com.google.cloud.dataflow.sdk.util.Transport;
@@ -60,7 +62,7 @@
 @RunWith(JUnit4.class)
 public class DataflowWorkerHarnessTest {
   @Rule public TestRule restoreSystemProperties = new RestoreSystemProperties();
-  @Rule public TestRule restoreLogging = new RestoreDataflowLoggingFormatter();
+  @Rule public TestRule restoreLogging = new RestoreDataflowLoggingMDC();
   @Rule public ExpectedException expectedException = ExpectedException.none();
   @Rule public FastNanoClockAndSleeper fastNanoClockAndSleeper = new FastNanoClockAndSleeper();
   @Mock private MockHttpTransport transport;
@@ -82,6 +84,10 @@ public void setUp() throws Exception {
 
     service = new Dataflow(transport, Transport.getJsonFactory(), null);
     pipelineOptions = PipelineOptionsFactory.as(DataflowWorkerHarnessOptions.class);
+    pipelineOptions.setProject(PROJECT_ID);
+    pipelineOptions.setJobId(JOB_ID);
+    pipelineOptions.setWorkerId(WORKER_ID);
+    pipelineOptions.setGcpCredential(new TestCredential());
   }
 
   @Test
@@ -98,23 +104,13 @@ public void testThatWeRetryIfTaskExecutionFailAgainAndAgain() throws Exception {
 
   @Test
   public void testCreationOfWorkerHarness() throws Exception {
-    pipelineOptions.setProject(PROJECT_ID);
-    pipelineOptions.setJobId(JOB_ID);
-    pipelineOptions.setWorkerId(WORKER_ID);
-    pipelineOptions.setGcpCredential(new TestCredential());
-
     assertNotNull(DataflowWorkerHarness.create(pipelineOptions));
-    assertEquals(JOB_ID, DataflowWorkerLoggingFormatter.getJobId());
-    assertEquals(WORKER_ID, DataflowWorkerLoggingFormatter.getWorkerId());
+    assertEquals(JOB_ID, DataflowWorkerLoggingMDC.getJobId());
+    assertEquals(WORKER_ID, DataflowWorkerLoggingMDC.getWorkerId());
   }
 
   @Test
   public void testCloudServiceCall() throws Exception {
-    pipelineOptions.setProject(PROJECT_ID);
-    pipelineOptions.setJobId(JOB_ID);
-    pipelineOptions.setWorkerId(WORKER_ID);
-    pipelineOptions.setGcpCredential(new TestCredential());
-
     WorkItem workItem = createWorkItem(PROJECT_ID, JOB_ID);
 
     when(request.execute()).thenReturn(generateMockResponse(workItem));
@@ -131,16 +127,43 @@ public void testCloudServiceCall() throws Exception {
         actualRequest.getWorkerCapabilities());
     assertEquals(ImmutableList.<String>of("map_task", "seq_map_task", "remote_source_task"),
         actualRequest.getWorkItemTypes());
-    assertEquals("1234", DataflowWorkerLoggingFormatter.getWorkId());
+    assertEquals("1234", DataflowWorkerLoggingMDC.getWorkId());
   }
 
   @Test
-  public void testCloudServiceCallNoWorkId() throws Exception {
-    pipelineOptions.setProject(PROJECT_ID);
-    pipelineOptions.setJobId(JOB_ID);
-    pipelineOptions.setWorkerId(WORKER_ID);
-    pipelineOptions.setGcpCredential(new TestCredential());
+  public void testCloudServiceCallMapTaskStagePropagation() throws Exception {
+    DataflowWorker.WorkUnitClient client =
+        new DataflowWorkerHarness.DataflowWorkUnitClient(service, pipelineOptions);
 
+    // Publish and acquire a map task work item, and verify we're now processing that stage.
+    final String stageName = "test_stage_name";
+    MapTask mapTask = new MapTask();
+    mapTask.setStageName(stageName);
+    WorkItem workItem = createWorkItem(PROJECT_ID, JOB_ID);
+    workItem.setMapTask(mapTask);
+    when(request.execute()).thenReturn(generateMockResponse(workItem));
+    assertEquals(workItem, client.getWorkItem());
+    assertEquals(stageName, DataflowWorkerLoggingMDC.getStageName());
+  }
+
+  @Test
+  public void testCloudServiceCallSeqMapTaskStagePropagation() throws Exception {
+    DataflowWorker.WorkUnitClient client =
+        new DataflowWorkerHarness.DataflowWorkUnitClient(service, pipelineOptions);
+
+    // Publish and acquire a seq map task work item, and verify we're now processing that stage.
+    final String stageName = "test_stage_name";
+    SeqMapTask seqMapTask = new SeqMapTask();
+    seqMapTask.setStageName(stageName);
+    WorkItem workItem = createWorkItem(PROJECT_ID, JOB_ID);
+    workItem.setSeqMapTask(seqMapTask);
+    when(request.execute()).thenReturn(generateMockResponse(workItem));
+    assertEquals(workItem, client.getWorkItem());
+    assertEquals(stageName, DataflowWorkerLoggingMDC.getStageName());
+  }
+
+  @Test
+  public void testCloudServiceCallNoWorkId() throws Exception {
     // If there's no work the service should return an empty work item.
     WorkItem workItem = new WorkItem();
 
@@ -162,11 +185,6 @@ public void testCloudServiceCallNoWorkId() throws Exception {
 
   @Test
   public void testCloudServiceCallNoWorkItem() throws Exception {
-    pipelineOptions.setProject(PROJECT_ID);
-    pipelineOptions.setJobId(JOB_ID);
-    pipelineOptions.setWorkerId(WORKER_ID);
-    pipelineOptions.setGcpCredential(new TestCredential());
-
     when(request.execute()).thenReturn(generateMockResponse());
 
     DataflowWorker.WorkUnitClient client =
@@ -188,11 +206,6 @@ public void testCloudServiceCallMultipleWorkItems() throws Exception {
     expectedException.expect(IOException.class);
     expectedException.expectMessage(
         "This version of the SDK expects no more than one work item from the service");
-    pipelineOptions.setProject(PROJECT_ID);
-    pipelineOptions.setJobId(JOB_ID);
-    pipelineOptions.setWorkerId(WORKER_ID);
-    pipelineOptions.setGcpCredential(new TestCredential());
-
 
     WorkItem workItem1 = createWorkItem(PROJECT_ID, JOB_ID);
     WorkItem workItem2 = createWorkItem(PROJECT_ID, JOB_ID);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index a5a9ff59804aa..abacc9df13517 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -46,6 +46,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
 import com.google.cloud.dataflow.sdk.runners.dataflow.CountingSource;
+import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.WorkItemCommitRequest;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
@@ -341,10 +342,15 @@ public void testBasicHarness() throws Exception {
         makeSinkInstruction(StringUtf8Coder.of(), 0));
 
     FakeWindmillServer server = new FakeWindmillServer();
-    StreamingDataflowWorker worker = new StreamingDataflowWorker(
-        Arrays.asList(defaultMapTask(instructions)), server, createTestingPipelineOptions());
+    DataflowWorkerHarnessOptions options = createTestingPipelineOptions();
+    StreamingDataflowWorker worker =
+        new StreamingDataflowWorker(Arrays.asList(defaultMapTask(instructions)), server, options);
     worker.start();
 
+    // Thread locals for the job and worker should have been updated for logging.
+    assertEquals(options.getJobId(), DataflowWorkerLoggingMDC.getJobId());
+    assertEquals(options.getWorkerId(), DataflowWorkerLoggingMDC.getWorkerId());
+
     final int numIters = 2000;
     for (int i = 0; i < numIters; ++i) {
       server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java
deleted file mode 100644
index 97650831acaac..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingFormatterTest.java
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker.logging;
-
-import static org.junit.Assert.assertEquals;
-
-import com.google.cloud.dataflow.sdk.testing.RestoreDataflowLoggingFormatter;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestRule;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.logging.Level;
-import java.util.logging.LogRecord;
-
-/** Unit tests for {@link DataflowWorkerLoggingFormatter}. */
-@RunWith(JUnit4.class)
-public class DataflowWorkerLoggingFormatterTest {
-  @Rule public TestRule restoreMDC = new RestoreDataflowLoggingFormatter();
-
-  @Test
-  public void testWithUnsetValuesInMDC() {
-    assertEquals(
-        "1970-01-01T00:00:00.001Z INFO test.message [unknown unknown unknown 2] LoggerName"
-        + System.lineSeparator(),
-        new DataflowWorkerLoggingFormatter().format(
-            createLogRecord("test.message", null)));
-  }
-
-  @Test
-  public void testWithMessage() {
-    DataflowWorkerLoggingFormatter.setJobId("testJobId");
-    DataflowWorkerLoggingFormatter.setWorkerId("testWorkerId");
-    DataflowWorkerLoggingFormatter.setWorkId("testWorkId");
-
-    assertEquals(
-        "1970-01-01T00:00:00.001Z INFO test.message [testJobId testWorkerId testWorkId 2] "
-        + "LoggerName" + System.lineSeparator(),
-        new DataflowWorkerLoggingFormatter().format(
-            createLogRecord("test.message", null)));
-  }
-
-  @Test
-  public void testWithMessageAndException() {
-    DataflowWorkerLoggingFormatter.setJobId("testJobId");
-    DataflowWorkerLoggingFormatter.setWorkerId("testWorkerId");
-    DataflowWorkerLoggingFormatter.setWorkId("testWorkId");
-
-    assertEquals(
-        "1970-01-01T00:00:00.001Z INFO test.message [testJobId testWorkerId testWorkId 2] "
-        + "LoggerName" + System.lineSeparator()
-        + "java.lang.Throwable: exception.test.message" + System.lineSeparator()
-        + "\tat declaringClass1.method1(file1.java:1)" + System.lineSeparator()
-        + "\tat declaringClass2.method2(file2.java:1)" + System.lineSeparator()
-        + "\tat declaringClass3.method3(file3.java:1)" + System.lineSeparator(),
-        new DataflowWorkerLoggingFormatter().format(
-            createLogRecord("test.message", createThrowable())));
-  }
-
-  @Test
-  public void testWithException() {
-    DataflowWorkerLoggingFormatter.setJobId("testJobId");
-    DataflowWorkerLoggingFormatter.setWorkerId("testWorkerId");
-    DataflowWorkerLoggingFormatter.setWorkId("testWorkId");
-
-    assertEquals(
-        "1970-01-01T00:00:00.001Z INFO null [testJobId testWorkerId testWorkId 2] LoggerName"
-        + System.lineSeparator()
-        + "java.lang.Throwable: exception.test.message" + System.lineSeparator()
-        + "\tat declaringClass1.method1(file1.java:1)" + System.lineSeparator()
-        + "\tat declaringClass2.method2(file2.java:1)" + System.lineSeparator()
-        + "\tat declaringClass3.method3(file3.java:1)" + System.lineSeparator(),
-        new DataflowWorkerLoggingFormatter().format(
-            createLogRecord(null, createThrowable())));
-  }
-
-  @Test
-  public void testWithoutExceptionOrMessage() {
-    DataflowWorkerLoggingFormatter.setJobId("testJobId");
-    DataflowWorkerLoggingFormatter.setWorkerId("testWorkerId");
-    DataflowWorkerLoggingFormatter.setWorkId("testWorkId");
-
-    assertEquals(
-        "1970-01-01T00:00:00.001Z INFO null [testJobId testWorkerId testWorkId 2] LoggerName"
-        + System.lineSeparator(),
-        new DataflowWorkerLoggingFormatter().format(
-            createLogRecord(null, null)));
-  }
-
-  /**
-   * @return A throwable with a fixed stack trace.
-   */
-  private Throwable createThrowable() {
-    Throwable throwable = new Throwable("exception.test.message");
-    throwable.setStackTrace(new StackTraceElement[]{
-        new StackTraceElement("declaringClass1", "method1", "file1.java", 1),
-        new StackTraceElement("declaringClass2", "method2", "file2.java", 1),
-        new StackTraceElement("declaringClass3", "method3", "file3.java", 1),
-    });
-    return throwable;
-  }
-
-  /**
-   * Creates and returns a LogRecord with a given message and throwable.
-   *
-   * @param message The message to place in the {@link LogRecord}
-   * @param throwable The throwable to place in the {@link LogRecord}
-   * @return A {@link LogRecord} with the given message and throwable.
-   */
-  private LogRecord createLogRecord(String message, Throwable throwable) {
-    LogRecord logRecord = new LogRecord(Level.INFO, message);
-    logRecord.setLoggerName("LoggerName");
-    logRecord.setMillis(1L);
-    logRecord.setThreadID(2);
-    logRecord.setThrown(throwable);
-    return logRecord;
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandlerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandlerTest.java
new file mode 100644
index 0000000000000..c941a9eddd62b
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandlerTest.java
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker.logging;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.testing.RestoreDataflowLoggingMDC;
+import com.google.common.base.Throwables;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestRule;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.logging.Level;
+import java.util.logging.LogRecord;
+
+/** Unit tests for {@link DataflowWorkerLoggingHandler}. */
+@RunWith(JUnit4.class)
+public class DataflowWorkerLoggingHandlerTest {
+  @Rule public TestRule restoreMDC = new RestoreDataflowLoggingMDC();
+
+  /** Returns the json-escaped string for the platform specific line separator.*/
+  private static String escapeNewline() {
+    try {
+      String quoted = new ObjectMapper().writeValueAsString(System.lineSeparator());
+      int len = quoted.length();
+      if (len < 3 || quoted.charAt(0) != '\"' || quoted.charAt(len - 1) != '\"') {
+        return "Failed to escape newline; expected quoted intermediate value";
+      }
+      // Strip the quotes.
+      return quoted.substring(1, len - 1);
+
+    } catch (JsonProcessingException e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  // Typically \n or \r\n
+  private static String escapedNewline = escapeNewline();
+
+  /**
+   * Encodes a LogRecord into a Json string.
+   */
+  private static String createJson(LogRecord record) throws IOException {
+    ByteArrayOutputStream output = new ByteArrayOutputStream();
+    DataflowWorkerLoggingHandler handler = new DataflowWorkerLoggingHandler(output);
+    // Format the record as JSON.
+    handler.publish(record);
+    // Decode the binary output as UTF-8 and return the generated string.
+    return new String(output.toByteArray(), StandardCharsets.UTF_8);
+  }
+
+  @Test
+  public void testWithUnsetValuesInMDC() throws IOException {
+    assertEquals(
+        "{\"timestamp\":{\"seconds\":0,\"nanos\":1000000},\"severity\":\"INFO\","
+            + "\"message\":\"test.message\",\"thread\":\"2\",\"logger\":\"LoggerName\"}"
+            + System.lineSeparator(),
+        createJson(createLogRecord("test.message", null)));
+  }
+
+  @Test
+  public void testWithAllValuesInMDC() throws IOException {
+    DataflowWorkerLoggingMDC.setJobId("testJobId");
+    DataflowWorkerLoggingMDC.setStageName("testStage");
+    DataflowWorkerLoggingMDC.setStepName("testStep");
+    DataflowWorkerLoggingMDC.setWorkerId("testWorkerId");
+    DataflowWorkerLoggingMDC.setWorkId("testWorkId");
+
+    createLogRecord("test.message", null);
+    assertEquals(
+        "{\"timestamp\":{\"seconds\":0,\"nanos\":1000000},\"severity\":\"INFO\","
+            + "\"message\":\"test.message\",\"thread\":\"2\",\"job\":\"testJobId\","
+            + "\"stage\":\"testStage\",\"step\":\"testStep\",\"worker\":\"testWorkerId\","
+            + "\"work\":\"testWorkId\",\"logger\":\"LoggerName\"}"
+            + System.lineSeparator(),
+        createJson(createLogRecord("test.message", null)));
+  }
+
+  @Test
+  public void testWithMessage() throws IOException {
+    DataflowWorkerLoggingMDC.setJobId("testJobId");
+    DataflowWorkerLoggingMDC.setWorkerId("testWorkerId");
+    DataflowWorkerLoggingMDC.setWorkId("testWorkId");
+
+    assertEquals(
+        "{\"timestamp\":{\"seconds\":0,\"nanos\":1000000},\"severity\":\"INFO\","
+            + "\"message\":\"test.message\",\"thread\":\"2\",\"job\":\"testJobId\","
+            + "\"worker\":\"testWorkerId\",\"work\":\"testWorkId\",\"logger\":\"LoggerName\"}"
+            + System.lineSeparator(),
+        createJson(createLogRecord("test.message", null)));
+  }
+
+  @Test
+  public void testWithMessageAndException() throws IOException {
+    DataflowWorkerLoggingMDC.setJobId("testJobId");
+    DataflowWorkerLoggingMDC.setWorkerId("testWorkerId");
+    DataflowWorkerLoggingMDC.setWorkId("testWorkId");
+
+    assertEquals(
+        "{\"timestamp\":{\"seconds\":0,\"nanos\":1000000},\"severity\":\"INFO\","
+            + "\"message\":\"test.message\",\"thread\":\"2\",\"job\":\"testJobId\","
+            + "\"worker\":\"testWorkerId\",\"work\":\"testWorkId\",\"logger\":\"LoggerName\","
+            + "\"exception\":\"java.lang.Throwable: exception.test.message"
+            + escapedNewline
+            + "\\tat declaringClass1.method1(file1.java:1)"
+            + escapedNewline
+            + "\\tat declaringClass2.method2(file2.java:1)"
+            + escapedNewline
+            + "\\tat declaringClass3.method3(file3.java:1)\\n\"}"
+            + System.lineSeparator(),
+        createJson(createLogRecord("test.message", createThrowable())));
+  }
+
+  @Test
+  public void testWithException() throws IOException {
+    DataflowWorkerLoggingMDC.setJobId("testJobId");
+    DataflowWorkerLoggingMDC.setWorkerId("testWorkerId");
+    DataflowWorkerLoggingMDC.setWorkId("testWorkId");
+
+    assertEquals(
+        "{\"timestamp\":{\"seconds\":0,\"nanos\":1000000},\"severity\":\"INFO\","
+            + "\"thread\":\"2\",\"job\":\"testJobId\",\"worker\":\"testWorkerId\","
+            + "\"work\":\"testWorkId\",\"logger\":\"LoggerName\","
+            + "\"exception\":\"java.lang.Throwable: exception.test.message"
+            + escapedNewline
+            + "\\tat declaringClass1.method1(file1.java:1)"
+            + escapedNewline
+            + "\\tat declaringClass2.method2(file2.java:1)"
+            + escapedNewline
+            + "\\tat declaringClass3.method3(file3.java:1)\\n\"}"
+            + System.lineSeparator(),
+        createJson(createLogRecord(null, createThrowable())));
+  }
+
+  @Test
+  public void testWithoutExceptionOrMessage() throws IOException {
+    DataflowWorkerLoggingMDC.setJobId("testJobId");
+    DataflowWorkerLoggingMDC.setWorkerId("testWorkerId");
+    DataflowWorkerLoggingMDC.setWorkId("testWorkId");
+
+    assertEquals(
+        "{\"timestamp\":{\"seconds\":0,\"nanos\":1000000},\"severity\":\"INFO\","
+            + "\"thread\":\"2\",\"job\":\"testJobId\",\"worker\":\"testWorkerId\","
+            + "\"work\":\"testWorkId\",\"logger\":\"LoggerName\"}"
+            + System.lineSeparator(),
+        createJson(createLogRecord(null, null)));
+  }
+
+  /**
+   * @return A throwable with a fixed stack trace.
+   */
+  private Throwable createThrowable() {
+    Throwable throwable = new Throwable("exception.test.message");
+    throwable.setStackTrace(new StackTraceElement[]{
+        new StackTraceElement("declaringClass1", "method1", "file1.java", 1),
+        new StackTraceElement("declaringClass2", "method2", "file2.java", 1),
+        new StackTraceElement("declaringClass3", "method3", "file3.java", 1),
+    });
+    return throwable;
+  }
+
+  /**
+   * Creates and returns a LogRecord with a given message and throwable.
+   *
+   * @param message The message to place in the {@link LogRecord}
+   * @param throwable The throwable to place in the {@link LogRecord}
+   * @return A {@link LogRecord} with the given message and throwable.
+   */
+  private LogRecord createLogRecord(String message, Throwable throwable) {
+    LogRecord logRecord = new LogRecord(Level.INFO, message);
+    logRecord.setLoggerName("LoggerName");
+    logRecord.setMillis(1L);
+    logRecord.setThreadID(2);
+    logRecord.setThrown(throwable);
+    return logRecord;
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
index 665c4f129a421..5df30c4ba6b3f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
@@ -28,7 +28,6 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-import java.util.logging.FileHandler;
 import java.util.logging.Handler;
 import java.util.logging.Level;
 import java.util.logging.LogManager;
@@ -54,7 +53,7 @@ public void testWithDefaults() {
     Logger rootLogger = LogManager.getLogManager().getLogger("");
     assertEquals(1, rootLogger.getHandlers().length);
     assertEquals(Level.INFO, rootLogger.getLevel());
-    assertTrue(isFileHandler(rootLogger.getHandlers()[0], Level.ALL));
+    assertTrue(isDataflowWorkerLoggingHandler(rootLogger.getHandlers()[0], Level.ALL));
   }
 
   @Test
@@ -69,7 +68,7 @@ public void testWithConfigurationOverride() {
     Logger rootLogger = LogManager.getLogManager().getLogger("");
     assertEquals(1, rootLogger.getHandlers().length);
     assertEquals(Level.WARNING, rootLogger.getLevel());
-    assertTrue(isFileHandler(rootLogger.getHandlers()[0], Level.ALL));
+    assertTrue(isDataflowWorkerLoggingHandler(rootLogger.getHandlers()[0], Level.ALL));
   }
 
   @Test
@@ -96,9 +95,7 @@ public void testWithCustomLogLevels() {
     assertTrue(aLogger.getUseParentHandlers());
   }
 
-  private boolean isFileHandler(Handler handler, Level level) {
-    return handler instanceof FileHandler
-        && level.equals(handler.getLevel())
-        && handler.getFormatter() instanceof DataflowWorkerLoggingFormatter;
+  private boolean isDataflowWorkerLoggingHandler(Handler handler, Level level) {
+    return handler instanceof DataflowWorkerLoggingHandler && level.equals(handler.getLevel());
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingFormatterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingFormatterTest.java
deleted file mode 100644
index 374223aa268ca..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingFormatterTest.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.testing;
-
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-
-import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingFormatter;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestRule;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/** Tests for {@link RestoreDataflowLoggingFormatter}. */
-@RunWith(JUnit4.class)
-public class RestoreDataflowLoggingFormatterTest {
-  @Rule public TestRule restoreDataflowLoggingFormatter = new RestoreDataflowLoggingFormatter();
-
-  /*
-   * Since these tests can run out of order, both test A and B verify that they
-   * could insert their property and that the other does not exist.
-   */
-  @Test
-  public void testLoggingParamsClearedA() {
-    DataflowWorkerLoggingFormatter.setJobId("job");
-    assertNotNull(DataflowWorkerLoggingFormatter.getJobId());
-    assertNull(DataflowWorkerLoggingFormatter.getWorkerId());
-  }
-
-  @Test
-  public void testLoggingParamsClearedB() {
-    DataflowWorkerLoggingFormatter.setWorkerId("worker");
-    assertNotNull(DataflowWorkerLoggingFormatter.getWorkerId());
-    assertNull(DataflowWorkerLoggingFormatter.getJobId());
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingFormatter.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingMDC.java
similarity index 56%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingFormatter.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingMDC.java
index 93b9ddd621f28..e64853e837e4a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingFormatter.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingMDC.java
@@ -16,32 +16,37 @@
 
 package com.google.cloud.dataflow.sdk.testing;
 
-import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingFormatter;
+import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
 
 import org.junit.rules.ExternalResource;
 
 /**
  * Saves and restores the current thread-local logging parameters for tests.
  */
-public class RestoreDataflowLoggingFormatter extends ExternalResource {
+public class RestoreDataflowLoggingMDC extends ExternalResource {
   private String previousJobId;
+  private String previousStageName;
+  private String previousStepName;
   private String previousWorkerId;
   private String previousWorkId;
 
-  public RestoreDataflowLoggingFormatter() {
-  }
+  public RestoreDataflowLoggingMDC() {}
 
   @Override
   protected void before() throws Throwable {
-    previousJobId = DataflowWorkerLoggingFormatter.getJobId();
-    previousWorkerId = DataflowWorkerLoggingFormatter.getWorkerId();
-    previousWorkId = DataflowWorkerLoggingFormatter.getWorkId();
+    previousJobId = DataflowWorkerLoggingMDC.getJobId();
+    previousStageName = DataflowWorkerLoggingMDC.getStageName();
+    previousStepName = DataflowWorkerLoggingMDC.getStepName();
+    previousWorkerId = DataflowWorkerLoggingMDC.getWorkerId();
+    previousWorkId = DataflowWorkerLoggingMDC.getWorkId();
   }
 
   @Override
   protected void after() {
-    DataflowWorkerLoggingFormatter.setJobId(previousJobId);
-    DataflowWorkerLoggingFormatter.setWorkerId(previousWorkerId);
-    DataflowWorkerLoggingFormatter.setWorkId(previousWorkId);
+    DataflowWorkerLoggingMDC.setJobId(previousJobId);
+    DataflowWorkerLoggingMDC.setStageName(previousStageName);
+    DataflowWorkerLoggingMDC.setStepName(previousStepName);
+    DataflowWorkerLoggingMDC.setWorkerId(previousWorkerId);
+    DataflowWorkerLoggingMDC.setWorkId(previousWorkId);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingMDCTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingMDCTest.java
new file mode 100644
index 0000000000000..c122b281bb938
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingMDCTest.java
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestRule;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link RestoreDataflowLoggingMDC}. */
+@RunWith(JUnit4.class)
+public class RestoreDataflowLoggingMDCTest {
+  @Rule public TestRule restoreMDC = new RestoreDataflowLoggingMDC();
+
+  /**
+   * Asserts that all fields of <tt>DataflowWorkerLoggingMDC</tt> field are null, except a single
+   * index with the expected value.
+   */
+  void assertOneNonNullThreadStatic(int idx, String expected) {
+    String[] strs =
+        new String[] {
+          DataflowWorkerLoggingMDC.getJobId(),
+          DataflowWorkerLoggingMDC.getStageName(),
+          DataflowWorkerLoggingMDC.getStepName(),
+          DataflowWorkerLoggingMDC.getWorkerId(),
+          DataflowWorkerLoggingMDC.getWorkId()
+        };
+    for (int i = 0; i < strs.length; i++) {
+      assertEquals(i == idx ? expected : null, strs[i]);
+    }
+  }
+
+  @Test
+  public void testLoggingParamsClearedA() {
+    DataflowWorkerLoggingMDC.setJobId("job");
+    assertOneNonNullThreadStatic(0, "job");
+  }
+
+  @Test
+  public void testLoggingParamsClearedB() {
+    DataflowWorkerLoggingMDC.setStageName("stage");
+    assertOneNonNullThreadStatic(1, "stage");
+  }
+
+  @Test
+  public void testLoggingParamsClearedC() {
+    DataflowWorkerLoggingMDC.setStepName("step");
+    assertOneNonNullThreadStatic(2, "step");
+  }
+
+  @Test
+  public void testLoggingParamsClearedD() {
+    DataflowWorkerLoggingMDC.setWorkerId("worker");
+    assertOneNonNullThreadStatic(3, "worker");
+  }
+
+  @Test
+  public void testLoggingParamsClearedE() {
+    DataflowWorkerLoggingMDC.setWorkId("work");
+    assertOneNonNullThreadStatic(4, "work");
+  }
+}

From d550b048876baf4a1e872ecbe6ee6f66510ee5e0 Mon Sep 17 00:00:00 2001
From: malo <malo@google.com>
Date: Thu, 9 Jul 2015 17:44:59 -0700
Subject: [PATCH 0731/1541] Bound the exponential backoff time for work
 requests

We bound it in terms of max interval rather than attempts.

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97927448
---
 .../runners/worker/DataflowWorkerHarness.java |  8 +-
 .../IntervalBoundedExponentialBackOff.java    | 88 ++++++++++++++++++
 .../worker/DataflowWorkerHarnessTest.java     | 32 +++++--
 ...IntervalBoundedExponentialBackOffTest.java | 90 +++++++++++++++++++
 4 files changed, 209 insertions(+), 9 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOff.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOffTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index bb7d3a99c85ec..f5d486b183b77 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -34,9 +34,9 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingInitializer;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
-import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
 import com.google.cloud.dataflow.sdk.util.GcsIOChannelFactory;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.IntervalBoundedExponentialBackOff;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.common.worker.WorkProgressUpdater;
@@ -76,7 +76,7 @@ public class DataflowWorkerHarness {
 
   // ExponentialBackOff parameters for the task retry strategy. Visible for testing.
   static final int BACKOFF_INITIAL_INTERVAL_MILLIS = 5000;  // 5 second
-  static final int BACKOFF_MAX_ATTEMPTS = 10;  // 10 attempts will take approx. 15 min.
+  static final int BACKOFF_MAX_INTERVAL_MILLIS = 5 * 60 * 1000;  // 5 min.
 
   /**
    * This uncaught exception handler logs the {@link Throwable} to the logger, {@link System#err}
@@ -98,8 +98,8 @@ public void uncaughtException(Thread t, Throwable e) {
    * Helper for initializing the BackOff used for retries.
    */
   private static BackOff createBackOff() {
-    return new AttemptBoundedExponentialBackOff(
-            BACKOFF_MAX_ATTEMPTS, BACKOFF_INITIAL_INTERVAL_MILLIS);
+    return new IntervalBoundedExponentialBackOff(
+            BACKOFF_MAX_INTERVAL_MILLIS, BACKOFF_INITIAL_INTERVAL_MILLIS);
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOff.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOff.java
new file mode 100644
index 0000000000000..9f60997f427d4
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOff.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.util.BackOff;
+import com.google.common.base.Preconditions;
+
+/**
+ * Implementation of {@link BackOff} that increases the back off period for each retry attempt
+ * using a randomization function that grows exponentially.
+ * <p>
+ * Example: The initial interval is .5 seconds and the maximum interval is 60 secs.
+ * For 14 tries the sequence will be (values in seconds):
+ *
+ * <pre>
+ retry#      retry_interval     randomized_interval
+ 1             0.5                [0.25,   0.75]
+ 2             0.75               [0.375,  1.125]
+ 3             1.125              [0.562,  1.687]
+ 4             1.687              [0.8435, 2.53]
+ 5             2.53               [1.265,  3.795]
+ 6             3.795              [1.897,  5.692]
+ 7             5.692              [2.846,  8.538]
+ 8             8.538              [4.269, 12.807]
+ 9            12.807              [6.403, 19.210]
+ 10           28.832              [14.416, 43.248]
+ 11           43.248              [21.624, 64.873]
+ 12           60.0                [30.0, 90.0]
+ 13           60.0                [30.0, 90.0]
+ 14           60.0                [30.0, 90.0]
+ * </pre>
+ *
+ * <p>
+ * Implementation is not thread-safe.
+ */
+public class IntervalBoundedExponentialBackOff implements BackOff {
+  public static final double DEFAULT_MULTIPLIER = 1.5;
+  public static final double DEFAULT_RANDOMIZATION_FACTOR = 0.5;
+  private final long maximumIntervalMillis;
+  private final long initialIntervalMillis;
+  private int currentAttempt;
+
+  public IntervalBoundedExponentialBackOff(int maximumIntervalMillis, long initialIntervalMillis) {
+    Preconditions.checkArgument(
+        maximumIntervalMillis > 0, "Maximum interval must be greater than zero.");
+    Preconditions.checkArgument(
+        initialIntervalMillis > 0, "Initial interval must be greater than zero.");
+    this.maximumIntervalMillis = maximumIntervalMillis;
+    this.initialIntervalMillis = initialIntervalMillis;
+    reset();
+  }
+
+  @Override
+  public void reset() {
+    currentAttempt = 1;
+  }
+
+  @Override
+  public long nextBackOffMillis() {
+    double currentIntervalMillis =
+        Math.min(
+            initialIntervalMillis * Math.pow(DEFAULT_MULTIPLIER, currentAttempt - 1),
+            maximumIntervalMillis);
+    double randomOffset =
+        (Math.random() * 2 - 1) * DEFAULT_RANDOMIZATION_FACTOR * currentIntervalMillis;
+    currentAttempt += 1;
+    return Math.round(currentIntervalMillis + randomOffset);
+  }
+
+  public boolean atMaxInterval() {
+    return initialIntervalMillis * Math.pow(DEFAULT_MULTIPLIER, currentAttempt - 1)
+        >= maximumIntervalMillis;
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
index 8b906cde6d1ac..e97ffe877f295 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
@@ -29,6 +29,7 @@
 import com.google.api.client.testing.http.MockHttpTransport;
 import com.google.api.client.testing.http.MockLowLevelHttpRequest;
 import com.google.api.client.testing.http.MockLowLevelHttpResponse;
+import com.google.api.client.util.Sleeper;
 import com.google.api.services.dataflow.Dataflow;
 import com.google.api.services.dataflow.model.LeaseWorkItemRequest;
 import com.google.api.services.dataflow.model.LeaseWorkItemResponse;
@@ -41,6 +42,7 @@
 import com.google.cloud.dataflow.sdk.testing.FastNanoClockAndSleeper;
 import com.google.cloud.dataflow.sdk.testing.RestoreDataflowLoggingMDC;
 import com.google.cloud.dataflow.sdk.testing.RestoreSystemProperties;
+import com.google.cloud.dataflow.sdk.util.IntervalBoundedExponentialBackOff;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.common.collect.ImmutableList;
@@ -57,10 +59,12 @@
 import org.mockito.MockitoAnnotations;
 
 import java.io.IOException;
+import java.util.concurrent.atomic.AtomicInteger;
 
 /** Unit tests for {@link DataflowWorkerHarness}. */
 @RunWith(JUnit4.class)
 public class DataflowWorkerHarnessTest {
+
   @Rule public TestRule restoreSystemProperties = new RestoreSystemProperties();
   @Rule public TestRule restoreLogging = new RestoreDataflowLoggingMDC();
   @Rule public ExpectedException expectedException = ExpectedException.none();
@@ -92,14 +96,32 @@ public void setUp() throws Exception {
 
   @Test
   public void testThatWeRetryIfTaskExecutionFailAgainAndAgain() throws Exception {
-    int numWorkers = Math.max(Runtime.getRuntime().availableProcessors(), 1);
+    final int numWorkers = Math.max(Runtime.getRuntime().availableProcessors(), 1);
     when(mockDataflowWorker.getAndPerformWork()).thenReturn(false);
+    final AtomicInteger sleepCount = new AtomicInteger(0);
+    final AtomicInteger illegalIntervalCount = new AtomicInteger(0);
     DataflowWorkerHarness.processWork(
-            pipelineOptions, mockDataflowWorker, fastNanoClockAndSleeper);
-    // Test that the backoff mechanism will retry the BACKOFF_MAX_ATTEMPTS number of times.
-    verify(mockDataflowWorker, times(numWorkers * DataflowWorkerHarness.BACKOFF_MAX_ATTEMPTS))
-        .getAndPerformWork();
+        pipelineOptions,
+        mockDataflowWorker,
+        new Sleeper() {
+          @Override
+          public void sleep(long millis) throws InterruptedException {
+            if ((millis
+                    > DataflowWorkerHarness.BACKOFF_MAX_INTERVAL_MILLIS
+                        * (1 + IntervalBoundedExponentialBackOff.DEFAULT_RANDOMIZATION_FACTOR))) {
+              // We count the times the sleep interval is greater than the backoff max interval with
+              // randomization to make sure it does not happen.
+              illegalIntervalCount.incrementAndGet();
+            }
+            if (sleepCount.incrementAndGet() > 1000) {
+              throw new InterruptedException("Stopping the retry loop.");
+            }
+          }
+        });
+    // Test that the backoff mechanism will allow at least 1000 failures.
+    verify(mockDataflowWorker, times(numWorkers + 1000)).getAndPerformWork();
     verifyNoMoreInteractions(mockDataflowWorker);
+    assertEquals(0, illegalIntervalCount.get());
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOffTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOffTest.java
new file mode 100644
index 0000000000000..6c69e52c126fc
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOffTest.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.allOf;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.lessThan;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+
+import com.google.api.client.util.BackOff;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Unit tests for {@link IntervalBoundedExponentialBackOff}. */
+@RunWith(JUnit4.class)
+public class IntervalBoundedExponentialBackOffTest {
+  @Rule public ExpectedException exception = ExpectedException.none();
+
+  @Test
+  public void testUsingInvalidInitialInterval() throws Exception {
+    exception.expect(IllegalArgumentException.class);
+    exception.expectMessage("Initial interval must be greater than zero.");
+    new IntervalBoundedExponentialBackOff(1000, 0L);
+  }
+
+  @Test
+  public void testUsingInvalidMaximumInterval() throws Exception {
+    exception.expect(IllegalArgumentException.class);
+    exception.expectMessage("Maximum interval must be greater than zero.");
+    new IntervalBoundedExponentialBackOff(-1, 10L);
+  }
+
+  @Test
+  public void testThatcertainNumberOfAttemptsReachesMaxInterval() throws Exception {
+    BackOff backOff = new IntervalBoundedExponentialBackOff(1000, 500);
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(500L), lessThan(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(500L), lessThan(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(500L), lessThan(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(500L), lessThan(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(500L), lessThan(1500L)));
+  }
+
+  @Test
+  public void testThatResettingAllowsReuse() throws Exception {
+    BackOff backOff = new IntervalBoundedExponentialBackOff(1000, 500);
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(500L), lessThan(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(500L), lessThan(1500L)));
+    backOff.reset();
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(500L), lessThan(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(500L), lessThan(1500L)));
+  }
+
+  @Test
+  public void testAtMaxInterval() throws Exception {
+    IntervalBoundedExponentialBackOff backOff = new IntervalBoundedExponentialBackOff(1000, 500);
+    assertFalse(backOff.atMaxInterval());
+    backOff.nextBackOffMillis();
+    assertFalse(backOff.atMaxInterval());
+    backOff.nextBackOffMillis();
+    assertTrue(backOff.atMaxInterval());
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(500L), lessThan(1500L)));
+  }
+}

From 6cffe10a2a7efbf1c04df57f06105405513ef1c9 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 9 Jul 2015 18:11:54 -0700
Subject: [PATCH 0732/1541] Internal testing changes

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97929582
---
 .../com/google/cloud/dataflow/sdk/transforms/ParDoTest.java | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 9dd2cd4256dc5..251035b378355 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -667,6 +667,7 @@ public void testParDoWithSideOutputsName() {
   }
 
   @Test
+  @Category(RunnableOnService.class)
   public void testParDoInCustomTransform() {
     Pipeline p = TestPipeline.create();
 
@@ -674,7 +675,7 @@ public void testParDoInCustomTransform() {
 
     PCollection<String> output = p
         .apply(Create.of(inputs))
-        .apply(new PTransform<PCollection<Integer>, PCollection<String>>() {
+        .apply("CustomTransform", new PTransform<PCollection<Integer>, PCollection<String>>() {
             @Override
             public PCollection<String> apply(PCollection<Integer> input) {
               return input.apply(ParDo.of(new TestDoFn()));
@@ -969,8 +970,7 @@ public void testMainOutputApplySideOutputNoCoder() {
     // Before fix, tuple.get(mainOutputTag).apply(...) would indirectly trigger
     // tuple.get(sideOutputTag).finishSpecifyingOutput(), which would crash
     // on a missing coder.
-    PCollection<Integer> foo = tuple
-        .get(mainOutputTag)
+    tuple.get(mainOutputTag)
         .setCoder(TestDummyCoder.of())
         .apply("Output1", ParDo.of(new DoFn<TestDummy, Integer>() {
           public void processElement(ProcessContext context) {

From ac0c5df43283daaf31e6baed27ada85503dc87b5 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 9 Jul 2015 18:22:24 -0700
Subject: [PATCH 0733/1541] Plumb timestampLabel and idLabel through
 PubsubSink.

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97930260
---
 .../sdk/runners/worker/PubsubSink.java        | 32 +++++++++++++------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
index 60ba95d907cb0..0ffaeed68b1e6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
@@ -38,14 +38,21 @@
  * @param <T> the type of the elements written to the sink
  */
 class PubsubSink<T> extends Sink<WindowedValue<T>> {
-  private String topic;
-  private Coder<WindowedValue<T>> coder;
-  private StreamingModeExecutionContext context;
-
-  PubsubSink(String topic,
-             Coder<WindowedValue<T>> coder,
-             StreamingModeExecutionContext context) {
+  private final String topic;
+  private final String timestampLabel;
+  private final String idLabel;
+  private final Coder<WindowedValue<T>> coder;
+  private final StreamingModeExecutionContext context;
+
+  PubsubSink(
+      String topic,
+      String timestampLabel,
+      String idLabel,
+      Coder<WindowedValue<T>> coder,
+      StreamingModeExecutionContext context) {
     this.topic = topic;
+    this.timestampLabel = timestampLabel;
+    this.idLabel = idLabel;
     this.coder = coder;
     this.context = context;
   }
@@ -57,7 +64,10 @@ public static <T> PubsubSink<T> create(PipelineOptions options,
                                          CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
     String topic = getString(spec, "pubsub_topic");
-    return new PubsubSink<>(topic, coder, (StreamingModeExecutionContext) context);
+    String timestampLabel = getString(spec, "pubsub_timestamp_label", "");
+    String idLabel = getString(spec, "pubsub_id_label", "");
+    return new PubsubSink<>(
+        topic, timestampLabel, idLabel, coder, (StreamingModeExecutionContext) context);
   }
 
   @Override
@@ -70,7 +80,11 @@ class PubsubWriter implements SinkWriter<WindowedValue<T>> {
     private Windmill.PubSubMessageBundle.Builder outputBuilder;
 
     private PubsubWriter(String topic) {
-      outputBuilder = Windmill.PubSubMessageBundle.newBuilder().setTopic(topic);
+      outputBuilder =
+          Windmill.PubSubMessageBundle.newBuilder()
+              .setTopic(topic)
+              .setTimestampLabel(timestampLabel)
+              .setIdLabel(idLabel);
     }
 
     private <T> ByteString encode(Coder<T> coder, T object) throws IOException {

From a25be36878ed1aebce74e1dfd7abcf0b7fb7a6da Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 10 Jul 2015 09:25:41 -0700
Subject: [PATCH 0734/1541] Fix incorrect "cast" for StreamingOptions

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97969727
---
 .../sdk/runners/worker/GroupAlsoByWindowsParDoFn.java        | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index 3e25b85255019..51ff7b77bc63f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -128,10 +128,7 @@ public ParDoFn create(
       @SuppressWarnings("unchecked")
       KvCoder<?, ?> kvCoder = (KvCoder<?, ?>) elemCoder;
 
-      boolean isStreamingPipeline = false;
-      if (options instanceof StreamingOptions) {
-        isStreamingPipeline = ((StreamingOptions) options).isStreaming();
-      }
+      boolean isStreamingPipeline = options.as(StreamingOptions.class).isStreaming();
 
       KeyedCombineFn<?, ?, ?, ?> maybeMergingCombineFn = null;
       if (combineFn != null) {

From 01bd710a8275e4d0a6e78291d1ce450871f31f1c Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 10 Jul 2015 12:12:36 -0700
Subject: [PATCH 0735/1541] Implement a new API for persisted state

The persisted state behaves like a table, where each row has a primary
key corresponding to the actual key being processed. Each row has a
secondary key produced by a StateNamespace.

Columns are identified by a StateTag, which includes an identifier, the
intended access pattern, and the necessary Coder information.

StateInternals turns a StateNamespace and a StateTag into an actual
instance of State, which allows interacting with the value stored in the
associated cell.

All reads from a cell produce a StateRead, which acts like a Future.
This allows queueing multiple StateRead requests and allowing them to
occur simultaneously.

----Release Notes----
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97985872
---
 .../sdk/annotations/Experimental.java         |   5 +-
 .../util/StreamingModeExecutionContext.java   |   1 +
 .../dataflow/sdk/util/state/BagState.java     |  24 +
 .../sdk/util/state/CombiningValueState.java   |  28 ++
 .../state/CombiningValueStateInternal.java    |  41 ++
 .../util/state/InMemoryStateInternals.java    | 200 ++++++++
 .../sdk/util/state/MergeableState.java        |  35 ++
 .../dataflow/sdk/util/state/MergedBag.java    |  75 +++
 .../sdk/util/state/MergedCombiningValue.java  | 100 ++++
 .../state/MergedWatermarkBagInternal.java     |  82 ++++
 .../sdk/util/state/MergingStateInternals.java |  91 ++++
 .../cloud/dataflow/sdk/util/state/State.java  |  30 ++
 .../sdk/util/state/StateContents.java         |  39 ++
 .../sdk/util/state/StateInternals.java        |  44 ++
 .../sdk/util/state/StateNamespace.java        |  34 ++
 .../sdk/util/state/StateNamespaceForTest.java |  52 ++
 .../sdk/util/state/StateNamespaces.java       | 151 ++++++
 .../dataflow/sdk/util/state/StateTable.java   |  80 +++
 .../dataflow/sdk/util/state/StateTag.java     |  63 +++
 .../dataflow/sdk/util/state/StateTags.java    | 266 ++++++++++
 .../dataflow/sdk/util/state/ValueState.java   |  30 ++
 .../util/state/WatermarkStateInternal.java    |  30 ++
 .../util/state/WindmillStateInternals.java    | 431 ++++++++++++++++
 .../sdk/util/state/WindmillStateReader.java   | 376 ++++++++++++++
 .../state/InMemoryStateInternalsTest.java     | 279 +++++++++++
 .../dataflow/sdk/util/state/StateTagTest.java |  94 ++++
 .../state/WindmillStateInternalsTest.java     | 464 ++++++++++++++++++
 .../util/state/WindmillStateReaderTest.java   | 253 ++++++++++
 28 files changed, 3397 insertions(+), 1 deletion(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/BagState.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueState.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueStateInternal.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergeableState.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedBag.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedCombiningValue.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkBagInternal.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/State.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContents.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ValueState.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateTagTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java
index e9dcd67efa527..e73d9a665140c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java
@@ -63,6 +63,9 @@ public enum Kind {
     TRIGGER,
 
     /** Aggregator-related experimental APIs. */
-    AGGREGATOR
+    AGGREGATOR,
+
+    /** State-related experimental APIs. */
+    STATE
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index c1fb0a50aae9b..21b053de5d966 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -56,6 +56,7 @@ public class StreamingModeExecutionContext extends DataflowExecutionContext {
   private StateFetcher stateFetcher;
   private Windmill.WorkItemCommitRequest.Builder outputBuilder;
   private Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
+
   // Per-key cache of active Reader objects in use by this process.
   private ConcurrentMap<ByteString, UnboundedSource.UnboundedReader<?>> readerCache;
   private UnboundedSource.UnboundedReader<?> activeReader;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/BagState.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/BagState.java
new file mode 100644
index 0000000000000..7f1e8bc7ad877
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/BagState.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+/**
+ * State containing a bag values. Items can be added to the bag and the contents read out.
+ *
+ * @param <T> The type of elements in the bag.
+ */
+public interface BagState<T> extends MergeableState<T, Iterable<T>> {
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueState.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueState.java
new file mode 100644
index 0000000000000..67e8698a3ee3f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueState.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+
+/**
+ * State that combines multiple {@code InputT} values using a {@link CombineFn} to produce a single
+ * {@code OutputT} value.
+ *
+ * @param <InputT> the type of values added to the state
+ * @param <OutputT> the type of value extracted from the state
+ */
+public interface CombiningValueState<InputT, OutputT> extends MergeableState<InputT, OutputT> {
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueStateInternal.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueStateInternal.java
new file mode 100644
index 0000000000000..26df372a4857b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueStateInternal.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+
+/**
+ * State for a single value that is managed by a {@link CombineFn}. This is an internal extension
+ * to {@link CombiningValueState} that includes the {@code AccumT} type.
+ *
+ * @param <InputT> the type of values added to the state
+ * @param <AccumT> the type of accumulator
+ * @param <OutputT> the type of value extracted from the state
+ */
+public interface CombiningValueStateInternal<InputT, AccumT, OutputT>
+    extends CombiningValueState<InputT, OutputT> {
+
+  /**
+   * Read the merged accumulator for this combining value.
+   */
+  StateContents<AccumT> getAccum();
+
+  /**
+   * Add an accumulator to this combining value. Depending on implementation this may immediately
+   * merge it with the previous accumulator, or may buffer this accumulator for a future merge.
+   */
+  void addAccum(AccumT accum);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
new file mode 100644
index 0000000000000..e06f7115990cf
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
+
+import org.joda.time.Instant;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * In-memory implementation of {@link StateInternals}. Used in {@code BatchModeExecutionContext}
+ * and for running tests that need state.
+ */
+public class InMemoryStateInternals extends MergingStateInternals {
+
+  private final StateTable inMemoryState = new StateTable() {
+    @Override
+    protected StateBinder binderForNamespace(final StateNamespace namespace) {
+      return new StateBinder() {
+
+        @Override
+        public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> coder) {
+          return new InMemoryValue<T>();
+        }
+
+        @Override
+        public <T> BagState<T> bindBag(final StateTag<BagState<T>> address, Coder<T> elemCoder) {
+          return new InMemoryBag<T>();
+        }
+
+        @Override
+        public <InputT, AccumT, OutputT>
+        CombiningValueStateInternal<InputT, AccumT, OutputT> bindCombiningValue(
+            StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
+            Coder<AccumT> accumCoder,
+            final CombineFn<InputT, AccumT, OutputT> combineFn) {
+          return new InMemoryCombiningValue<InputT, AccumT, OutputT>(combineFn);
+        }
+
+        @Override
+        public <T> WatermarkStateInternal bindWatermark(StateTag<WatermarkStateInternal> address) {
+          return new WatermarkBagInternalImplementation();
+        }
+      };
+    }
+  };
+
+  @Override
+  public <T extends State> T state(StateNamespace namespace, StateTag<T> address) {
+    return inMemoryState.get(namespace, address);
+  }
+
+  private final class InMemoryValue<T> implements ValueState<T> {
+    private T value = null;
+
+    @Override
+    public void clear() {
+      // Even though we're clearing we can't remove this from the in-memory state map, since
+      // other users may already have a handle on this Value.
+      value = null;
+    }
+
+    @Override
+    public StateContents<T> get() {
+      return new StateContents<T>() {
+        @Override
+        public T read() {
+          return value;
+        }
+      };
+    }
+
+    @Override
+    public void set(T input) {
+      this.value = input;
+    }
+  }
+
+  private final class WatermarkBagInternalImplementation implements WatermarkStateInternal {
+    private Instant minimumHold = null;
+
+    @Override
+    public void clear() {
+      // Even though we're clearing we can't remove this from the in-memory state map, since
+      // other users may already have a handle on this WatermarkBagInteranl.
+      minimumHold = null;
+    }
+
+    @Override
+    public StateContents<Instant> get() {
+      return new StateContents<Instant>() {
+        @Override
+        public Instant read() {
+          return minimumHold;
+        }
+      };
+    }
+
+    @Override
+    public void add(Instant watermarkHold) {
+      if (minimumHold == null || minimumHold.isAfter(watermarkHold)) {
+        minimumHold = watermarkHold;
+      }
+    }
+  }
+
+  private final class InMemoryCombiningValue<InputT, AccumT, OutputT>
+      implements CombiningValueStateInternal<InputT, AccumT, OutputT> {
+
+    private final CombineFn<InputT, AccumT, OutputT> combineFn;
+    private AccumT accum;
+
+    private InMemoryCombiningValue(CombineFn<InputT, AccumT, OutputT> combineFn) {
+      this.combineFn = combineFn;
+      accum = combineFn.createAccumulator();
+    }
+
+    @Override
+    public void clear() {
+      // Even though we're clearing we can't remove this from the in-memory state map, since
+      // other users may already have a handle on this CombiningValue.
+      accum = combineFn.createAccumulator();
+    }
+
+    @Override
+    public StateContents<OutputT> get() {
+      return new StateContents<OutputT>() {
+        @Override
+        public OutputT read() {
+          return combineFn.extractOutput(accum);
+        }
+      };
+    }
+
+    @Override
+    public void add(InputT input) {
+      accum = combineFn.addInput(accum, input);
+    }
+
+    @Override
+    public StateContents<AccumT> getAccum() {
+      return new StateContents<AccumT>() {
+        @Override
+        public AccumT read() {
+          return accum;
+        }
+      };
+
+    }
+
+    @Override
+    public void addAccum(AccumT accum) {
+      this.accum = combineFn.mergeAccumulators(Arrays.asList(this.accum, accum));
+    }
+  }
+
+  private static final class InMemoryBag<T> implements BagState<T> {
+    private final List<T> contents = new ArrayList<>();
+
+    @Override
+    public void clear() {
+      // Even though we're clearing we can't remove this from the in-memory state map, since
+      // other users may already have a handle on this Bag.
+      contents.clear();
+    }
+
+    @Override
+    public StateContents<Iterable<T>> get() {
+      return new StateContents<Iterable<T>>() {
+        @Override
+        public Iterable<T> read() {
+          return contents;
+        }
+      };
+    }
+
+    @Override
+    public void add(T input) {
+      contents.add(input);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergeableState.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergeableState.java
new file mode 100644
index 0000000000000..f3a18ff5ec842
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergeableState.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+/**
+ * {@code State} that is automatically mergeable and supports buffering values.
+ *
+ * @param <InputT> The type of values put into the buffer.
+ * @param <OutputT> The type of values extracted from the buffer.
+ */
+public interface MergeableState<InputT, OutputT> extends State {
+
+  /**
+   * Add a value to the buffer.
+   */
+  void add(InputT value);
+
+  /**
+   * Return the {@link StateContents} object to use for accessing the contents of the buffer.
+   */
+  StateContents<OutputT> get();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedBag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedBag.java
new file mode 100644
index 0000000000000..7aba2448e627c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedBag.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import com.google.common.base.Function;
+import com.google.common.collect.FluentIterable;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * Implementation of {@link BagState} reads from all the sources and writes to the specified result.
+ *
+ * @param <T> the type of elements in the bag
+ */
+class MergedBag<T> implements BagState<T> {
+
+  private final Collection<BagState<T>> sources;
+  private final BagState<T> result;
+
+  public MergedBag(Collection<BagState<T>> sources, BagState<T> result) {
+    this.sources = sources;
+    this.result = result;
+  }
+
+  @Override
+  public void clear() {
+    for (State source : sources) {
+      source.clear();
+    }
+    result.clear();
+  }
+
+  @Override
+  public void add(T input) {
+    result.add(input);
+  }
+
+  @Override
+  public StateContents<Iterable<T>> get() {
+    // Initiate the get's right away
+    final List<StateContents<Iterable<T>>> futures = new ArrayList<>(sources.size());
+    for (BagState<T> source : sources) {
+      futures.add(source.get());
+    }
+
+    // But defer the actual reads until later.
+    return new StateContents<Iterable<T>>() {
+      @Override
+      public Iterable<T> read() {
+        return FluentIterable.from(futures)
+            .transformAndConcat(new Function<StateContents<Iterable<T>>, Iterable<T>>() {
+              @Override
+              public Iterable<T> apply(StateContents<Iterable<T>> input) {
+                return input.read();
+              }
+        });
+      }
+    };
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedCombiningValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedCombiningValue.java
new file mode 100644
index 0000000000000..454db49e0ec20
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedCombiningValue.java
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * Base implementation of a {@link CombiningValueState} reading from multiple sources and writing to
+ * a single result.
+ *
+ * @param <InputT> the type of values added to the state
+ * @param <AccumT> the type of accumulators that are actually stored
+ * @param <OutputT> the type of value extracted from the state
+ */
+class MergedCombiningValue<InputT, AccumT, OutputT>
+    implements CombiningValueStateInternal<InputT, AccumT, OutputT> {
+
+  private final Collection<CombiningValueStateInternal<InputT, AccumT, OutputT>> sources;
+  private final CombiningValueStateInternal<InputT, AccumT, OutputT> result;
+  private final CombineFn<InputT, AccumT, OutputT> combineFn;
+
+  public MergedCombiningValue(
+      Collection<CombiningValueStateInternal<InputT, AccumT, OutputT>> sources,
+      CombiningValueStateInternal<InputT, AccumT, OutputT> result,
+      CombineFn<InputT, AccumT, OutputT> combineFn) {
+    this.sources = sources;
+    this.result = result;
+    this.combineFn = combineFn;
+  }
+
+  @Override
+  public void clear() {
+    for (State source : sources) {
+      source.clear();
+    }
+    result.clear();
+  }
+
+  @Override
+  public StateContents<OutputT> get() {
+    final StateContents<AccumT> accum = getAccum();
+    return new StateContents<OutputT>() {
+      @Override
+      public OutputT read() {
+        return combineFn.extractOutput(accum.read());
+      }
+    };
+  }
+
+  @Override
+  public void add(InputT input) {
+    result.add(input);
+  }
+
+  @Override
+  public void addAccum(AccumT accum) {
+    result.addAccum(accum);
+  }
+
+  @Override
+  public StateContents<AccumT> getAccum() {
+    final List<StateContents<AccumT>> futures = new ArrayList<>(sources.size());
+    for (CombiningValueStateInternal<InputT, AccumT, OutputT> source : sources) {
+      futures.add(source.getAccum());
+    }
+
+    return new StateContents<AccumT>() {
+      @Override
+      public AccumT read() {
+        List<AccumT> accumulators = new ArrayList<>(futures.size());
+        for (StateContents<AccumT> future : futures) {
+          accumulators.add(future.read());
+        }
+
+        // Combine the accumualtors and compact the underyling state.
+        AccumT combined = combineFn.mergeAccumulators(accumulators);
+        clear();
+        addAccum(combined);
+        return combined;
+      }
+    };
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkBagInternal.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkBagInternal.java
new file mode 100644
index 0000000000000..4c872416b92d8
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkBagInternal.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import org.joda.time.Instant;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * Implementation of {@link WatermarkStateInternal} reading from multiple sources and writing to a
+ * single result.
+ */
+class MergedWatermarkBagInternal implements WatermarkStateInternal {
+
+  private final Collection<WatermarkStateInternal> sources;
+  private final WatermarkStateInternal result;
+
+  public MergedWatermarkBagInternal(
+      Collection<WatermarkStateInternal> sources, WatermarkStateInternal result) {
+    this.sources = sources;
+    this.result = result;
+  }
+
+  @Override
+  public void clear() {
+    for (State source : sources) {
+      source.clear();
+    }
+    result.clear();
+  }
+
+  @Override
+  public void add(Instant watermarkHold) {
+    result.add(watermarkHold);
+  }
+
+  @Override
+  public StateContents<Instant> get() {
+    // Get the underlying StateContents's right away.
+    final List<StateContents<Instant>> reads = new ArrayList<>(sources.size());
+    for (WatermarkStateInternal source : sources) {
+      reads.add(source.get());
+    }
+
+    // But defer actually reading them.
+    return new StateContents<Instant>() {
+      @Override
+      public Instant read() {
+        Instant minimum = null;
+        for (StateContents<Instant> read : reads) {
+          Instant input = read.read();
+          if (minimum == null || (input != null && minimum.isAfter(input))) {
+            minimum = input;
+          }
+        }
+
+        // Also, compact the state
+        if (minimum != null) {
+          clear();
+          add(minimum);
+        }
+
+        return minimum;
+      }
+    };
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
new file mode 100644
index 0000000000000..befa13d91e621
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Abstract implementation of {@link StateInternals} that provides {@link #mergedState} in terms of
+ * {@link #state}.
+ */
+public abstract class MergingStateInternals implements StateInternals {
+
+  @Override
+  public <T extends MergeableState<?, ?>> T mergedState(
+      final Iterable<StateNamespace> sourceNamespaces,
+      final StateNamespace resultNamespace, StateTag<T> address) {
+    return address.bind(new StateBinder() {
+      @Override
+      public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> coder) {
+        throw new IllegalStateException(
+            "Value is not mergable. Should not be passed to mergedState");
+      }
+
+      @Override
+      public <T> BagState<T> bindBag(StateTag<BagState<T>> address, Coder<T> elemCoder) {
+        List<BagState<T>> sources = new ArrayList<>();
+        for (StateNamespace sourceNamespace : sourceNamespaces) {
+          // Skip adding the result namespace for now.
+          if (!sourceNamespace.equals(resultNamespace)) {
+            sources.add(state(sourceNamespace, address));
+          }
+        }
+
+        BagState<T> results = state(resultNamespace, address);
+        sources.add(results);
+        return new MergedBag<>(sources, results);
+      }
+
+      @Override
+      public <InputT, AccumT, OutputT>
+      CombiningValueStateInternal<InputT, AccumT, OutputT> bindCombiningValue(
+          StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
+          Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
+        List<CombiningValueStateInternal<InputT, AccumT, OutputT>> sources = new ArrayList<>();
+        for (StateNamespace sourceNamespace : sourceNamespaces) {
+          // Skip adding the result namespace for now.
+          if (!sourceNamespace.equals(resultNamespace)) {
+            sources.add(state(sourceNamespace, address));
+          }
+        }
+        CombiningValueStateInternal<InputT, AccumT, OutputT> result =
+            state(resultNamespace, address);
+        sources.add(result);
+        return new MergedCombiningValue<>(sources, result, combineFn);
+      }
+
+      @Override
+      public <T> WatermarkStateInternal bindWatermark(
+          StateTag<WatermarkStateInternal> address) {
+        List<WatermarkStateInternal> sources = new ArrayList<>();
+        for (StateNamespace sourceNamespace : sourceNamespaces) {
+          // Skip adding the result namespace for now.
+          if (!sourceNamespace.equals(resultNamespace)) {
+            sources.add(state(sourceNamespace, address));
+          }
+        }
+        WatermarkStateInternal result = state(resultNamespace, address);
+        sources.add(result);
+        return new MergedWatermarkBagInternal(sources, result);
+      }
+    });
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/State.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/State.java
new file mode 100644
index 0000000000000..f53e4c198fb41
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/State.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+/**
+ * Base interface for all state locations.
+ *
+ * <p>Specific types of state add appropriate accessors for reading and writing values, see
+ * {@link ValueState}, {@link BagState}, and {@link CombiningValueState}.
+ */
+public interface State {
+
+  /**
+   * Clear out the state location.
+   */
+  void clear();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContents.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContents.java
new file mode 100644
index 0000000000000..e00462fe5bae9
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContents.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
+
+/**
+ * A {@code StateContents} is produced by the read methods on all {@link State} objects.
+ * Calling {@link #read} returns the associated value.
+ *
+ * <p>This class is similar to {@link java.util.concurrent.Future}, but each invocation of
+ * {@link #read} need not return the same value.
+ *
+ * <p>Getting the {@code StateContents} from a read method indicates the desire to eventually
+ * read a value. Depending on the runner this may or may not immediately start the read.
+ *
+ * @param <T> The type of value returned by {@link #read}.
+ */
+@Experimental(Kind.STATE)
+public interface StateContents<T> {
+  /**
+   * Read the current value.
+   */
+  T read();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
new file mode 100644
index 0000000000000..dd400fb400239
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+/**
+ * {@code StateInternals} describes the functionality a runner needs to provide for the
+ * State API to be supported.
+ *
+ * <p> This is a low-level API intended for use by the Dataflow SDK. It should not be
+ * used directly, and is highly likely to change.
+ */
+public interface StateInternals  {
+
+  /**
+   * Return the state associated with {@code address} in the specified {@code namespace}.
+   */
+  <T extends State> T state(StateNamespace namespace, StateTag<T> address);
+
+  /**
+   * Return state that reads from all the source namespaces. Only required to ensure that
+   * resultNamespace contains all the data that is added.
+   *
+   * <p> Merging state is potentially destructive, in that it may move information from the
+   * {@code sourceNamespaces} to {@code resultNamespace}. As a result, after calling this all
+   * future calls should include as their namespaces a superset of
+   * {@code sourceNamespaces} and {@code resultNamespace}.
+   */
+  <T extends MergeableState<?, ?>> T mergedState(
+      Iterable<StateNamespace> sourceNamespaces, StateNamespace resultNamespace,
+      StateTag<T> address);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
new file mode 100644
index 0000000000000..4c0f9359cff9b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+/**
+ * A namespace used for scoping state stored with {@link StateInternals}.
+ *
+ * <p> Instances of {@code StateNamespace} are guaranteed to have a {@link #hashCode} and
+ * {@link #equals} that uniquely identify the namespace.
+ */
+public interface StateNamespace {
+
+  /**
+   * Return a {@link String} representation of the key. It is guaranteed that this
+   * {@code String} will uniquely identify the key.
+   *
+   * <p> This will encode the actual namespace as a {@code String}. It is
+   * preferable to use the {@code StateNamespace} object when possible.
+   */
+  String stringKey();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java
new file mode 100644
index 0000000000000..000244c7aeadb
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import java.util.Objects;
+
+/**
+ * A simple {@link StateNamespace} used for testing.
+ */
+public class StateNamespaceForTest implements StateNamespace {
+  private String key;
+
+  public StateNamespaceForTest(String key) {
+    this.key = key;
+  }
+
+  @Override
+  public String stringKey() {
+    return key;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    }
+
+    if (!(obj instanceof StateNamespaceForTest)) {
+      return false;
+    }
+
+    return Objects.equals(this.key, ((StateNamespaceForTest) obj).key);
+  }
+
+  @Override
+  public int hashCode() {
+    return key.hashCode();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
new file mode 100644
index 0000000000000..e830a31f1e81c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+
+import java.util.Objects;
+
+/**
+ * Factory methods for creating the {@link StateNamespace StateNamespaces}.
+ */
+public class StateNamespaces {
+
+  private enum Namespace {
+    GLOBAL,
+    WINDOW,
+    WINDOW_AND_TRIGGER;
+  }
+
+  public static StateNamespace global() {
+    return new GlobalNamespace();
+  }
+
+  public static <W extends BoundedWindow> StateNamespace window(Coder<W> windowCoder, W window) {
+    return new WindowNamespace<>(windowCoder, window);
+  }
+
+  public static <W extends BoundedWindow>
+  StateNamespace windowAndTrigger(Coder<W> windowCoder, W window, int triggerIdx) {
+    return new WindowAndTriggerNamespace<>(windowCoder, window, triggerIdx);
+  }
+
+  private StateNamespaces() {}
+
+  private static class GlobalNamespace implements StateNamespace {
+
+    @Override
+    public String stringKey() {
+      // + and / will never be produced by CoderUtils.encodeToBase64
+      return "+global+";
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      return obj == this || obj instanceof GlobalNamespace;
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(Namespace.GLOBAL);
+    }
+  }
+
+  private static class WindowNamespace<W extends BoundedWindow> implements StateNamespace {
+
+    private Coder<W> windowCoder;
+    private W window;
+
+    private WindowNamespace(Coder<W> windowCoder, W window) {
+      this.windowCoder = windowCoder;
+      this.window = window;
+    }
+
+    @Override
+    public String stringKey() {
+      try {
+        return CoderUtils.encodeToBase64(windowCoder, window);
+      } catch (CoderException e) {
+        throw new RuntimeException("Unable to generate string key from window " + window, e);
+      }
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (obj == this) {
+        return true;
+      }
+
+      if (!(obj instanceof WindowNamespace)) {
+        return false;
+      }
+
+      WindowNamespace<?> that = (WindowNamespace<?>) obj;
+      return Objects.equals(this.window, that.window);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(Namespace.WINDOW, window);
+    }
+  }
+
+  private static class WindowAndTriggerNamespace<W extends BoundedWindow>
+      implements StateNamespace {
+
+    private Coder<W> windowCoder;
+    private W window;
+    private int triggerIdx;
+
+    private WindowAndTriggerNamespace(Coder<W> windowCoder, W window, int triggerIdx) {
+      this.windowCoder = windowCoder;
+      this.window = window;
+      this.triggerIdx = triggerIdx;
+    }
+
+    @Override
+    public String stringKey() {
+      try {
+        return CoderUtils.encodeToBase64(windowCoder, window) + "/" + triggerIdx;
+      } catch (CoderException e) {
+        throw new RuntimeException("Unable to generate string key from window " + window, e);
+      }
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (obj == this) {
+        return true;
+      }
+
+      if (!(obj instanceof WindowAndTriggerNamespace)) {
+        return false;
+      }
+
+      WindowAndTriggerNamespace<?> that = (WindowAndTriggerNamespace<?>) obj;
+      return this.triggerIdx == that.triggerIdx
+          && Objects.equals(this.window, that.window);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(Namespace.WINDOW_AND_TRIGGER, window, triggerIdx);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java
new file mode 100644
index 0000000000000..38f2be75b2fc2
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
+import com.google.common.base.Supplier;
+import com.google.common.collect.Table;
+import com.google.common.collect.Tables;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Table mapping {@code StateNamespace} and {@code StateTag<?>} to a {@code State} instance.
+ */
+public abstract class StateTable {
+
+  private final Table<StateNamespace, StateTag<?>, State> stateTable =
+      Tables.newCustomTable(new HashMap<StateNamespace, Map<StateTag<?>, State>>(),
+          new Supplier<Map<StateTag<?>, State>>() {
+        @Override
+        public Map<StateTag<?>, State> get() {
+          return new HashMap<>();
+        }
+      });
+
+  public <StateT extends State> StateT get(
+      StateNamespace namespace, StateTag<StateT> tag) {
+    State storage = stateTable.get(namespace, tag);
+    if (storage != null) {
+      @SuppressWarnings("unchecked")
+      StateT typedStorage = (StateT) storage;
+      return typedStorage;
+    }
+
+    StateT typedStorage = tag.bind(binderForNamespace(namespace));
+    stateTable.put(namespace, tag, typedStorage);
+    return typedStorage;
+  }
+
+  public void clearNamespace(StateNamespace namespace) {
+    stateTable.rowKeySet().remove(namespace);
+  }
+
+  protected void clear() {
+    stateTable.clear();
+  }
+
+  public Iterable<State> values() {
+    return stateTable.values();
+  }
+
+  public boolean isNamespaceInUse(StateNamespace namespace) {
+    return stateTable.containsRow(namespace);
+  }
+
+  public Set<StateTag<?>> getAddressesInUse(StateNamespace namespace) {
+    return stateTable.row(namespace).keySet();
+  }
+
+  /**
+   * Provide the {@code StateBinder} to use for creating {@code Storage} instances
+   * in the specified {@code namespace}.
+   */
+  protected abstract StateBinder binderForNamespace(StateNamespace namespace);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
new file mode 100644
index 0000000000000..27f240592c421
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+
+/**
+ * An address for persistent state. This includes a unique identifier for the location, the
+ * information necessary to encode the value, and details about the intended access pattern.
+ *
+ * <p> State can be thought of as a sparse table, with each {@code StateTag} defining a column
+ * that has cells of type {@code StateT}.
+ *
+ * <p> Currently, this can only be used in a step immediately following a {@link GroupByKey}.
+ *
+ * @param <StateT> The type of state being tagged.
+ */
+@Experimental(Kind.STATE)
+public interface StateTag<StateT extends State> {
+
+  /**
+   * Visitor for binding a {@link StateTag} and to the associated {@link State}.
+   */
+  public interface StateBinder {
+    <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> coder);
+
+    <T> BagState<T> bindBag(StateTag<BagState<T>> address, Coder<T> elemCoder);
+
+    <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
+    bindCombiningValue(
+        StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
+        Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn);
+
+    <T> WatermarkStateInternal bindWatermark(StateTag<WatermarkStateInternal> address);
+  }
+
+  /**
+   * Returns the identifier for this state cell.
+   */
+  String getId();
+
+  /**
+   * Use the {@code binder} to create an instance of {@code StateT} appropriate for this address.
+   */
+  StateT bind(StateBinder binder);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
new file mode 100644
index 0000000000000..ed7ce44878e9e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
@@ -0,0 +1,266 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+
+import java.util.Objects;
+
+/**
+ * Static utility methods for creating {@link StateTag} instances.
+ */
+@Experimental(Kind.STATE)
+public class StateTags {
+
+  private StateTags() { }
+
+  /**
+   * Create a simple state tag for values of type {@code T}.
+   */
+  public static <T> StateTag<ValueState<T>> value(String id, Coder<T> valueCoder) {
+    return new ValueStateTag<>(id, valueCoder);
+  }
+
+  private abstract static class StateTagBase<StateT extends State> implements StateTag<StateT> {
+
+    private final String id;
+
+    protected StateTagBase(String id) {
+      this.id = id;
+    }
+
+    /**
+     * Returns the identifier for this state cell.
+     */
+    @Override
+    public String getId() {
+      return id;
+    }
+  }
+
+  /**
+   * Create a state tag for values that use a {@link CombineFn} to automatically merge
+   * multiple {@code InputT}s into a single {@code OutputT}.
+   */
+  public static <InputT, OutputT> StateTag<CombiningValueState<InputT, OutputT>>
+  combiningValue(String id, Coder<InputT> inputCoder, CombineFn<InputT, ?, OutputT> combineFn) {
+    return combiningValueInternal(id, inputCoder, combineFn);
+  }
+
+  private static <InputT, AccumT, OutputT> StateTag<CombiningValueState<InputT, OutputT>>
+  combiningValueInternal(
+      String id, Coder<InputT> inputCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
+    StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> internal =
+        new CombiningValueStateTag<InputT, AccumT, OutputT>(id, inputCoder, combineFn);
+
+    // This is a safe cast, since StateTag only supports reading, and
+    // CombiningValue<InputT, OutputT> is a super-interface of
+    // CombiningValueInternal<InputT, AccumT, OutputT>
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    StateTag<CombiningValueState<InputT, OutputT>> external = (StateTag) internal;
+    return external;
+  }
+
+  /**
+   * Create a state tag that is optimized for adding values frequently, and
+   * occasionally retrieving all the values that have been added.
+   */
+  public static <T> StateTag<BagState<T>> bag(String id, Coder<T> elemCoder) {
+    return new BagStateTag<T>(id, elemCoder);
+  }
+
+  /**
+   * Create a state tag for holding the watermark.
+   */
+  public static <T> StateTag<WatermarkStateInternal> watermarkStateInternal(String id) {
+    return new WatermarkStateTagInternal(id);
+  }
+
+  /**
+   * A value state cell for values of type {@code T}.
+   *
+   * @param <T> the type of value being stored
+   */
+  private static class ValueStateTag<T> extends StateTagBase<ValueState<T>> {
+
+    private final Coder<T> coder;
+
+    private ValueStateTag(String id, Coder<T> coder) {
+      super(id);
+      this.coder = coder;
+    }
+
+    @Override
+    public ValueState<T> bind(StateBinder visitor) {
+      return visitor.bindValue(this, coder);
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (obj == this) {
+        return true;
+      }
+
+      if (!(obj instanceof ValueStateTag)) {
+        return false;
+      }
+
+      ValueStateTag<?> that = (ValueStateTag<?>) obj;
+      return Objects.equals(this.getId(), that.getId())
+          && Objects.equals(this.coder, that.coder);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(getClass(), getId(), coder);
+    }
+  }
+
+  /**
+   * A general purpose state cell for values of type {@code T}.
+   *
+   * @param <InputT> the type of input values
+   * @param <AccumT> type of mutable accumulator values
+   * @param <OutputT> type of output values
+   */
+  private static class CombiningValueStateTag<InputT, AccumT, OutputT>
+      extends StateTagBase<CombiningValueStateInternal<InputT, AccumT, OutputT>> {
+
+    private final Coder<AccumT> accumCoder;
+    private final CombineFn<InputT, AccumT, OutputT> combineFn;
+
+    private CombiningValueStateTag(
+        String id, Coder<InputT> inputCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
+      super(id);
+
+      // TODO: This should use the actual CoderRegistry to ensure that it picks up
+      // any custom Coders, but that CoderRegistry isn't currently available on the
+      // worker.
+      CoderRegistry registry = new CoderRegistry();
+      registry.registerStandardCoders();
+
+      try {
+        this.accumCoder = combineFn.getAccumulatorCoder(registry, inputCoder);
+      } catch (CannotProvideCoderException e) {
+        throw new RuntimeException("Unable to determine accumulator coder for combineFn", e);
+      }
+
+      this.combineFn = combineFn;
+    }
+
+    @Override
+    public CombiningValueStateInternal<InputT, AccumT, OutputT> bind(StateBinder visitor) {
+      return visitor.bindCombiningValue(this, accumCoder, combineFn);
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (obj == this) {
+        return true;
+      }
+
+      if (!(obj instanceof CombiningValueStateTag)) {
+        return false;
+      }
+
+      CombiningValueStateTag<?, ?, ?> that = (CombiningValueStateTag<?, ?, ?>) obj;
+      return Objects.equals(this.getId(), that.getId())
+          && Objects.equals(this.accumCoder, that.accumCoder);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(getClass(), getId(), accumCoder);
+    }
+  }
+
+  /**
+   * A state cell optimized for bag-like access patterns (frequent additions, occasional reads
+   * of all the values).
+   *
+   * @param <T> the type of value in the bag
+   */
+  private static class BagStateTag<T> extends StateTagBase<BagState<T>> {
+
+    private final Coder<T> elemCoder;
+
+    private BagStateTag(String id, Coder<T> elemCoder) {
+      super(id);
+      this.elemCoder = elemCoder;
+    }
+
+    @Override
+    public BagState<T> bind(StateBinder visitor) {
+      return visitor.bindBag(this, elemCoder);
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (obj == this) {
+        return true;
+      }
+
+      if (!(obj instanceof BagStateTag)) {
+        return false;
+      }
+
+      BagStateTag<?> that = (BagStateTag<?>) obj;
+      return Objects.equals(this.getId(), that.getId())
+          && Objects.equals(this.elemCoder, that.elemCoder);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(getClass(), getId(), elemCoder);
+    }
+  }
+
+  private static class WatermarkStateTagInternal extends StateTagBase<WatermarkStateInternal> {
+
+    private WatermarkStateTagInternal(String id) {
+      super(id);
+    }
+
+    @Override
+    public WatermarkStateInternal bind(StateBinder visitor) {
+      return visitor.bindWatermark(this);
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (obj == this) {
+        return true;
+      }
+
+      if (!(obj instanceof WatermarkStateTagInternal)) {
+        return false;
+      }
+
+      WatermarkStateTagInternal that = (WatermarkStateTagInternal) obj;
+      return Objects.equals(this.getId(), that.getId());
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(getClass(), getId());
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ValueState.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ValueState.java
new file mode 100644
index 0000000000000..4bed0f88fd1f8
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ValueState.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
+
+/**
+ * sTate holding a single value.
+ *
+ * @param <T> The type of values being stored.
+ */
+@Experimental(Kind.STATE)
+public interface ValueState<T> extends State {
+  StateContents<T> get();
+  void set(T input);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java
new file mode 100644
index 0000000000000..42d2fcb9a88d1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
+
+import org.joda.time.Instant;
+
+/**
+ * State for holding up the watermark to the minimum of input {@code Instant}s.
+ *
+ * <p> This is intended for internal use only. The watermark will be held up based on the
+ * values that are added and only released when items are cleared.
+ */
+@Experimental(Kind.STATE)
+public interface WatermarkStateInternal extends MergeableState<Instant, Instant> {}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
new file mode 100644
index 0000000000000..7baefb694b4c0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
@@ -0,0 +1,431 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
+import com.google.common.collect.Iterables;
+import com.google.common.util.concurrent.Futures;
+import com.google.protobuf.ByteString;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Implementation of {@link StateInternals} using Windmill to manage the underlying data.
+ */
+public class WindmillStateInternals extends MergingStateInternals {
+
+  private final StateTable inMemoryState = new StateTable() {
+    @Override
+    protected StateBinder binderForNamespace(final StateNamespace namespace) {
+      return new StateBinder() {
+        @Override
+        public <T> BagState<T> bindBag(StateTag<BagState<T>> address, Coder<T> elemCoder) {
+          return new WindmillBag<>(encodeKey(namespace, address), elemCoder, reader);
+        }
+
+        @Override
+        public <T> WatermarkStateInternal bindWatermark(
+            StateTag<WatermarkStateInternal> address) {
+          return new WindmillWatermarkBag(encodeKey(namespace, address), reader);
+        }
+
+        @Override
+        public <InputT, AccumT, OutputT>
+        CombiningValueStateInternal<InputT, AccumT, OutputT> bindCombiningValue(
+            StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
+            Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
+          return new WindmillCombiningValue<>(
+              encodeKey(namespace, address), accumCoder, combineFn, reader);
+        }
+
+        @Override
+        public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> coder) {
+          return new WindmillValue<>(encodeKey(namespace, address), coder, reader);
+        }
+      };
+    }
+  };
+
+
+  private final String mangledPrefix;
+  private final WindmillStateReader reader;
+
+  public WindmillStateInternals(String mangledPrefix, WindmillStateReader reader) {
+    this.mangledPrefix = mangledPrefix;
+    this.reader = reader;
+  }
+
+  public void persist(final Windmill.WorkItemCommitRequest.Builder commitBuilder) {
+    // Call persist on each first, which may schedule some futures for reading.
+    for (State location : inMemoryState.values()) {
+      if (!(location instanceof WindmillState)) {
+        throw new IllegalStateException(String.format(
+            "%s wasn't created by %s -- unable to persist it",
+            location.getClass().getSimpleName(),
+            getClass().getSimpleName()));
+      }
+
+      try {
+        ((WindmillState) location).persist(commitBuilder);
+      } catch (IOException e) {
+        throw new RuntimeException("Unable to persist state", e);
+      }
+    }
+
+    // Kick off the fetches that prevent blind-writes. We do this before returning
+    // to ensure that the reads have happened before the persist actually happens.
+    reader.startBatchAndBlock();
+
+    // Clear out the map of already retrieved state instances.
+    inMemoryState.clear();
+  }
+
+  private ByteString encodeKey(StateNamespace namespace, StateTag<?> address) {
+    return ByteString.copyFromUtf8(String.format(
+        "%s/%s/%s", mangledPrefix, namespace.stringKey(), address.getId()));
+  }
+
+  private interface WindmillState {
+    void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) throws IOException;
+  }
+
+  @Override
+  public <T extends State> T state(StateNamespace namespace, StateTag<T> address) {
+    return inMemoryState.get(namespace, address);
+  }
+
+  private static class WindmillValue<T> implements ValueState<T>, WindmillState {
+
+    private final ByteString stateKey;
+    private final Coder<T> coder;
+    private final WindmillStateReader reader;
+
+    /** Whether we've modified the value since creation of this state. */
+    private boolean modified = false;
+    private T modifiedValue;
+
+    private WindmillValue(ByteString stateKey, Coder<T> coder, WindmillStateReader reader) {
+      this.stateKey = stateKey;
+      this.coder = coder;
+      this.reader = reader;
+    }
+
+    @Override
+    public void clear() {
+      modified = true;
+      modifiedValue = null;
+    }
+
+    @Override
+    public StateContents<T> get() {
+      final Future<T> future = modified ? null : reader.valueFuture(stateKey, coder);
+
+      return new StateContents<T>() {
+        @Override
+        public T read() {
+          try {
+            return modified ? modifiedValue : future.get();
+          } catch (InterruptedException | ExecutionException e) {
+            throw new RuntimeException("Unable to read value from state", e);
+          }
+        }
+      };
+    }
+
+    @Override
+    public void set(T value) {
+      modified = true;
+      modifiedValue = value;
+    }
+
+    @Override
+    public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) throws IOException {
+      if (!modified) {
+        // No in-memory changes.
+        return;
+      }
+
+      // We can't write without doing a read, so we need to kick off a read if we get here.
+      // Call reader.valueFuture directly, since our read() method will avoid actually reading from
+      // Windmill since the value is already inMemory.
+      reader.valueFuture(stateKey, coder);
+
+      ByteString.Output stream = ByteString.newOutput();
+      if (modifiedValue != null) {
+        coder.encode(modifiedValue, stream, Coder.Context.OUTER);
+      }
+
+      commitBuilder.addValueUpdatesBuilder()
+          .setTag(stateKey)
+          .getValueBuilder()
+          .setData(stream.toByteString());
+    }
+  }
+
+  private static class WindmillBag<T> implements BagState<T>, WindmillState {
+
+    private final ByteString stateKey;
+    private final Coder<T> elemCoder;
+    private final WindmillStateReader reader;
+
+    private boolean cleared = false;
+    private final List<T> localAdditions = new ArrayList<>();
+
+    private WindmillBag(ByteString stateKey, Coder<T> elemCoder, WindmillStateReader reader) {
+      this.stateKey = stateKey;
+      this.elemCoder = elemCoder;
+      this.reader = reader;
+    }
+
+    @Override
+    public void clear() {
+      cleared = true;
+      localAdditions.clear();
+    }
+
+    @Override
+    public StateContents<Iterable<T>> get() {
+      // If we clear after calling get() but before calling read(), technically we didn't need the
+      // underlying windmill read. But, we need to register the desire now if we aren't going to
+      // clear (in order to get it added to the prefetch).
+      final Future<Iterable<T>> persistedData = cleared
+          ? Futures.<Iterable<T>>immediateFuture(Collections.<T>emptyList())
+          : reader.listFuture(stateKey, elemCoder);
+
+      return new StateContents<Iterable<T>>() {
+        @Override
+        public Iterable<T> read() {
+          try {
+            // We need to check cleared again, because it may have become clear in between creating
+            // the future and calling read.
+            Iterable<T> input = cleared ? Collections.<T>emptyList() : persistedData.get();
+            return Iterables.concat(input, localAdditions);
+          } catch (InterruptedException | ExecutionException e) {
+            throw new RuntimeException("Unable to read state", e);
+          }
+        }
+      };
+    }
+
+    @Override
+    public void add(T input) {
+      localAdditions.add(input);
+    }
+
+    @Override
+    public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) throws IOException {
+      if (cleared) {
+        // If we do a delete, we need to have done a read to prevent Windmill complaining about
+        // blind deletes. We use the underlying reader, because we normally skip the actual read
+        // if we've already cleared the state.
+        reader.listFuture(stateKey, elemCoder);
+        commitBuilder.addListUpdatesBuilder()
+            .setTag(stateKey)
+            .setEndTimestamp(Long.MAX_VALUE);
+      }
+
+      byte[] zero = {0x0};
+
+      Windmill.TagList.Builder listUpdatesBuilder = commitBuilder.addListUpdatesBuilder();
+      listUpdatesBuilder.setTag(stateKey);
+      for (T value : localAdditions) {
+        ByteString.Output stream = ByteString.newOutput();
+
+        // Windmill does not support empty data for tag list state; prepend a zero byte.
+        stream.write(zero);
+
+        // Encode the value
+        elemCoder.encode(value, stream, Coder.Context.OUTER);
+
+        listUpdatesBuilder.addValuesBuilder()
+            .setData(stream.toByteString())
+            .setTimestamp(Long.MAX_VALUE);
+      }
+    }
+  }
+
+  private static class WindmillWatermarkBag implements WatermarkStateInternal, WindmillState {
+
+    private final ByteString stateKey;
+    private final WindmillStateReader reader;
+
+    private boolean cleared = false;
+    private Instant localAdditions = null;
+
+    private WindmillWatermarkBag(ByteString stateKey, WindmillStateReader reader) {
+      this.stateKey = stateKey;
+      this.reader = reader;
+    }
+
+    @Override
+    public void clear() {
+      cleared = true;
+      localAdditions = null;
+    }
+
+    @Override
+    public StateContents<Instant> get() {
+      // If we clear after calling get() but before calling read(), technically we didn't need the
+      // underlying windmill read. But, we need to register the desire now if we aren't going to
+      // clear (in order to get it added to the prefetch).
+      final Future<Instant> persistedData = cleared
+          ? Futures.<Instant>immediateFuture(null)
+          : reader.watermarkFuture(stateKey);
+
+      return new StateContents<Instant>() {
+        @Override
+        public Instant read() {
+          Instant value = localAdditions;
+          if (!cleared) {
+            try {
+              Instant persisted = persistedData.get();
+              if (value == null || (persisted != null && persisted.isBefore(value))) {
+                value = persisted;
+              }
+            } catch (InterruptedException | ExecutionException e) {
+              throw new RuntimeException("Unable to read state", e);
+            }
+          }
+          return value;
+        }
+      };
+    }
+
+    @Override
+    public void add(Instant watermarkHold) {
+      if (localAdditions == null || watermarkHold.isBefore(localAdditions)) {
+        localAdditions = watermarkHold;
+      }
+    }
+
+    @Override
+    public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) {
+      // If we do a delete, we need to have done a read
+      if (cleared) {
+        reader.watermarkFuture(stateKey);
+      }
+
+      if (cleared) {
+        commitBuilder.addListUpdatesBuilder()
+            .setTag(stateKey)
+            .setEndTimestamp(Long.MAX_VALUE);
+      }
+
+      ByteString zeroString = ByteString.copyFrom(new byte[] {0x0});
+
+      Windmill.TagList.Builder listUpdatesBuilder = commitBuilder.addListUpdatesBuilder();
+      listUpdatesBuilder
+          .setTag(stateKey)
+          .addValuesBuilder()
+              .setData(zeroString)
+              .setTimestamp(TimeUnit.MILLISECONDS.toMicros(localAdditions.getMillis()));
+    }
+  }
+
+  private static class WindmillCombiningValue<InputT, AccumT, OutputT>
+      implements CombiningValueStateInternal<InputT, AccumT, OutputT>, WindmillState {
+
+    private final WindmillBag<AccumT> bag;
+    private final CombineFn<InputT, AccumT, OutputT> combineFn;
+
+    /* We use a separate, in-memory AccumT rather than relying on the WindmillWatermarkBag's
+     * localAdditions, because we want to combine multiple InputT's to a single AccumT
+     * before adding it.
+     */
+    private AccumT localAdditionsAccum;
+    private boolean hasLocalAdditions = false;
+
+    private WindmillCombiningValue(ByteString stateKey, Coder<AccumT> accumCoder,
+        CombineFn<InputT, AccumT, OutputT> combineFn,
+        WindmillStateReader reader) {
+      this.bag = new WindmillBag<>(stateKey, accumCoder, reader);
+      this.combineFn = combineFn;
+      this.localAdditionsAccum = combineFn.createAccumulator();
+    }
+
+    @Override
+    public StateContents<OutputT> get() {
+      final StateContents<AccumT> accum = getAccum();
+      return new StateContents<OutputT>() {
+        @Override
+        public OutputT read() {
+          return combineFn.extractOutput(accum.read());
+        }
+      };
+    }
+
+    @Override
+    public void add(InputT input) {
+      hasLocalAdditions = true;
+      localAdditionsAccum = combineFn.addInput(localAdditionsAccum, input);
+    }
+
+    @Override
+    public void clear() {
+      bag.clear();
+      localAdditionsAccum = combineFn.createAccumulator();
+      hasLocalAdditions = false;
+    }
+
+    @Override
+    public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) throws IOException {
+      if (hasLocalAdditions) {
+        bag.add(localAdditionsAccum);
+        localAdditionsAccum = combineFn.createAccumulator();
+        hasLocalAdditions = false;
+      }
+      bag.persist(commitBuilder);
+    }
+
+    @Override
+    public StateContents<AccumT> getAccum() {
+      final StateContents<Iterable<AccumT>> future = bag.get();
+
+      return new StateContents<AccumT>() {
+        @Override
+        public AccumT read() {
+          Iterable<AccumT> accums = Iterables.concat(
+              future.read(), Collections.singleton(localAdditionsAccum));
+
+          // Compact things
+          AccumT merged = combineFn.mergeAccumulators(accums);
+          bag.clear();
+          localAdditionsAccum = merged;
+          hasLocalAdditions = true;
+          return merged;
+        }
+      };
+    }
+
+    @Override
+    public void addAccum(AccumT accum) {
+      hasLocalAdditions = true;
+      localAdditionsAccum = combineFn.mergeAccumulators(Arrays.asList(localAdditionsAccum, accum));
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
new file mode 100644
index 0000000000000..f4457fac70ffc
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
@@ -0,0 +1,376 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagList;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagValue;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Objects;
+import com.google.common.util.concurrent.ForwardingFuture;
+import com.google.common.util.concurrent.SettableFuture;
+import com.google.protobuf.ByteString;
+
+import org.joda.time.Instant;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+/**
+ * Reads persistent state from {@link Windmill}. Returns {@code Future}s containing the data that
+ * has been read. Will not initiate a read until {@link Future#get} is called, at which point all
+ * the pending futures will be read.
+ */
+public class WindmillStateReader {
+
+  private static class StateTag {
+    private enum Kind {
+      VALUE,
+      LIST,
+      WATERMARK;
+    }
+
+    private final Kind kind;
+    private final ByteString tag;
+
+    private StateTag(Kind kind, ByteString tag) {
+      this.kind = kind;
+      this.tag = tag;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) {
+        return true;
+      }
+
+      if (!(obj instanceof StateTag)) {
+        return false;
+      }
+
+      StateTag that = (StateTag) obj;
+      return Objects.equal(this.kind, that.kind)
+          && Objects.equal(this.tag, that.tag);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(kind, tag);
+    }
+
+    @Override
+    public String toString() {
+      return kind + " " + tag;
+    }
+  }
+
+  private static final Logger LOG = LoggerFactory.getLogger(WindmillStateReader.class);
+
+  private final WindmillServerStub windmill;
+  private final String computation;
+  private final ByteString key;
+  private final long workToken;
+
+  public WindmillStateReader(
+      WindmillServerStub windmill, String computation, ByteString key, long workToken) {
+    this.windmill = windmill;
+    this.computation = computation;
+    this.key = key;
+    this.workToken = workToken;
+  }
+
+  @VisibleForTesting ConcurrentLinkedQueue<StateTag> pendingLookups = new ConcurrentLinkedQueue<>();
+  private ConcurrentHashMap<StateTag, Coder<?>> coders = new ConcurrentHashMap<>();
+
+  private ConcurrentHashMap<StateTag, SettableFuture<?>> futures = new ConcurrentHashMap<>();
+
+  private <T> Future<T> stateFuture(StateTag tag, Coder<?> coder) {
+    SettableFuture<?> wildcardFuture = futures.get(tag);
+    if (wildcardFuture == null) {
+      // If we don't yet have a future, try to create one.
+      wildcardFuture = SettableFuture.<T>create();
+      SettableFuture<?> old = futures.putIfAbsent(tag, wildcardFuture);
+
+      if (old == null) {
+        // We won the race, queue the lookup and coder.
+        pendingLookups.add(tag);
+        if (coder != null) {
+          coders.putIfAbsent(tag, coder);
+        }
+      } else {
+        // We lost the race, use the other future.
+        wildcardFuture = old;
+      }
+    }
+
+    @SuppressWarnings("unchecked")
+    SettableFuture<T> typedFuture = (SettableFuture<T>) wildcardFuture;
+    return wrappedFuture(typedFuture);
+  }
+
+  public Future<Instant> watermarkFuture(ByteString encodedTag) {
+    return stateFuture(new StateTag(StateTag.Kind.WATERMARK, encodedTag), null);
+  }
+
+  public <T> Future<T> valueFuture(ByteString encodedTag, Coder<T> coder) {
+    return stateFuture(new StateTag(StateTag.Kind.VALUE, encodedTag), coder);
+  }
+
+  public <T> Future<Iterable<T>> listFuture(ByteString encodedTag, Coder<T> elemCoder) {
+    return stateFuture(new StateTag(StateTag.Kind.LIST, encodedTag), elemCoder);
+  }
+
+  private <T> Future<T> wrappedFuture(final Future<T> future) {
+    // If the underlying lookup is already complete, we don't need to create the wrapper.
+    if (future.isDone()) {
+      return future;
+    }
+
+    return new ForwardingFuture<T>() {
+      @Override
+      protected Future<T> delegate() {
+        return future;
+      }
+
+      @Override
+      public T get() throws InterruptedException, ExecutionException {
+        if (!future.isDone()) {
+          startBatchAndBlock();
+        }
+        return super.get();
+      }
+
+      @Override
+      public T get(long timeout, TimeUnit unit)
+          throws InterruptedException, ExecutionException, TimeoutException {
+        if (!future.isDone()) {
+          startBatchAndBlock();
+        }
+        return super.get(timeout, unit);
+      }
+    };
+  }
+
+  public void startBatchAndBlock() {
+    // First, drain work out of the pending lookups into a queue. These will be the items we fetch.
+    Map<ByteString, StateTag> toFetch = new HashMap<>();
+    while (!pendingLookups.isEmpty()) {
+      StateTag tag = pendingLookups.poll();
+      if (tag == null) {
+        break;
+      }
+
+      toFetch.put(tag.tag, tag);
+    }
+
+    Windmill.GetDataRequest request = createRequest(toFetch.values());
+    Windmill.GetDataResponse response = windmill.getData(request);
+    if (response == null) {
+      throw new RuntimeException("Windmill unexpectedly returned null for request " + request);
+    }
+
+    consumeResponse(response, toFetch);
+  }
+
+  private Windmill.GetDataRequest createRequest(Iterable<StateTag> toFetch) {
+    Windmill.GetDataRequest.Builder request = Windmill.GetDataRequest.newBuilder();
+    Windmill.KeyedGetDataRequest.Builder keyedDataBuilder = request
+        .addRequestsBuilder().setComputationId(computation)
+        .addRequestsBuilder().setKey(key).setWorkToken(workToken);
+
+    for (StateTag tag : toFetch) {
+      switch (tag.kind) {
+        case LIST:
+        case WATERMARK:
+          keyedDataBuilder.addListsToFetchBuilder()
+              .setTag(tag.tag)
+              .setEndTimestamp(Long.MAX_VALUE);
+          break;
+
+        case VALUE:
+          keyedDataBuilder.addValuesToFetchBuilder().setTag(tag.tag);
+          break;
+
+        default:
+          throw new RuntimeException("Unknown kind of tag requested: " + tag.kind);
+      }
+    }
+
+    return request.build();
+  }
+
+  private void consumeResponse(
+      Windmill.GetDataResponse response, Map<ByteString, StateTag> toFetch) {
+
+    // Validate the response is for our computation/key.
+    if (response.getDataCount() != 1) {
+      throw new RuntimeException(
+          "Expected exactly one computation in response, but was: " + response.getDataList());
+    }
+
+    if (!computation.equals(response.getData(0).getComputationId())) {
+      throw new RuntimeException("Expected data for computation " + computation
+          + " but was " + response.getData(0).getComputationId());
+    }
+
+    if (response.getData(0).getDataCount() != 1) {
+      throw new RuntimeException(
+          "Expected exactly one key in response, but was: " + response.getData(0).getDataList());
+    }
+
+    if (!key.equals(response.getData(0).getData(0).getKey())) {
+      throw new RuntimeException("Expected data for key " + key
+          + " but was " + response.getData(0).getData(0).getKey());
+    }
+
+    for (Windmill.TagList list : response.getData(0).getData(0).getListsList()) {
+      StateTag stateTag = toFetch.remove(list.getTag());
+      if (stateTag == null) {
+        throw new IllegalStateException("Received response for unrequested tag " + stateTag);
+      }
+
+      if (stateTag.kind == StateTag.Kind.LIST) {
+        consumeTagList(list, stateTag);
+      } else if (stateTag.kind == StateTag.Kind.WATERMARK) {
+        consumeWatermark(list, stateTag);
+      } else {
+        throw new IllegalStateException("Unexpected kind for TagList: " + stateTag);
+      }
+    }
+
+    for (Windmill.TagValue value : response.getData(0).getData(0).getValuesList()) {
+      StateTag stateTag = toFetch.remove(value.getTag());
+      if (stateTag == null) {
+        throw new IllegalStateException("Received response for unrequested tag " + stateTag);
+      } else if (stateTag.kind != StateTag.Kind.VALUE) {
+        throw new IllegalStateException("Unexpected kind for TagList: " + stateTag);
+      }
+
+      consumeTagValue(value, stateTag);
+    }
+
+    if (!toFetch.isEmpty()) {
+      throw new IllegalStateException(
+          "Didn't receive responses for all pending fetches. Missing: " + toFetch.values());
+    }
+  }
+
+  private <T> void consumeTagList(TagList list, StateTag stateTag) {
+    @SuppressWarnings("unchecked")
+    SettableFuture<Iterable<T>> future = (SettableFuture<Iterable<T>>) futures.get(stateTag);
+    if (future == null) {
+      throw new IllegalStateException("Missing future for " + stateTag);
+    } else if (future.isDone()) {
+      LOG.error("Future for {} is already done", stateTag);
+    }
+
+    if (list.getValuesCount() == 0) {
+      future.set(Collections.<T>emptyList());
+      return;
+    }
+
+    @SuppressWarnings("unchecked")
+    Coder<T> elemCoder = (Coder<T>) coders.remove(stateTag);
+    if (elemCoder == null) {
+      throw new IllegalStateException("Missing element coder for " + stateTag);
+    }
+
+    List<T> valueList = new ArrayList<>(list.getValuesCount());
+    for (Windmill.Value value : list.getValuesList()) {
+      if (value.hasData() && !value.getData().isEmpty()) {
+        // Drop the first byte of the data; it's the zero byte we prependend to avoid writing
+        // empty data.
+        InputStream inputStream = value.getData().substring(1).newInput();
+        try {
+          valueList.add(elemCoder.decode(inputStream, Coder.Context.OUTER));
+        } catch (IOException e) {
+          throw new IllegalStateException(
+              "Unable to decode tag list using " + elemCoder, e);
+        }
+      }
+    }
+
+    future.set(Collections.unmodifiableList(valueList));
+  }
+
+  private void consumeWatermark(TagList list, StateTag stateTag) {
+    @SuppressWarnings("unchecked")
+    SettableFuture<Instant> future = (SettableFuture<Instant>) futures.get(stateTag);
+    if (future == null) {
+      throw new IllegalStateException("Missing future for " + stateTag);
+    } else if (future.isDone()) {
+      LOG.error("Future for {} is already done", stateTag);
+    }
+
+    Instant hold = null;
+    for (Windmill.Value value : list.getValuesList()) {
+      if (value.hasData() && !value.getData().isEmpty()) {
+        Instant valueTimestamp =
+            new Instant(TimeUnit.MICROSECONDS.toMillis(value.getTimestamp()));
+        if (hold == null || valueTimestamp.isBefore(hold)) {
+          hold = valueTimestamp;
+        }
+      }
+    }
+
+    future.set(hold);
+  }
+
+  private <T> void consumeTagValue(TagValue tagValue, StateTag stateTag) {
+    @SuppressWarnings("unchecked")
+    SettableFuture<T> future = (SettableFuture<T>) futures.get(stateTag);
+    if (future == null) {
+      throw new IllegalStateException("Missing future for " + stateTag);
+    } else if (future.isDone()) {
+      LOG.error("Future for {} is already done", stateTag);
+    }
+
+    @SuppressWarnings("unchecked")
+    Coder<T> coder = (Coder<T>) coders.remove(stateTag);
+    if (coder == null) {
+      throw new IllegalStateException("Missing coder for " + stateTag);
+    }
+
+    if (tagValue.hasValue()
+        && tagValue.getValue().hasData()
+        && !tagValue.getValue().getData().isEmpty()) {
+      InputStream inputStream = tagValue.getValue().getData().newInput();
+      try {
+        T value = coder.decode(inputStream, Coder.Context.OUTER);
+        future.set(value);
+      } catch (IOException e) {
+        throw new IllegalStateException("Unable to decode value using " + coder, e);
+      }
+    } else {
+      future.set(null);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
new file mode 100644
index 0000000000000..60a6e8e157099
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
@@ -0,0 +1,279 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+
+/**
+ * Tests for {@link InMemoryStateInternals}.
+ */
+@RunWith(JUnit4.class)
+public class InMemoryStateInternalsTest {
+
+  private static final StateNamespace NAMESPACE_1 = new StateNamespaceForTest("ns1");
+  private static final StateNamespace NAMESPACE_2 = new StateNamespaceForTest("ns2");
+  private static final StateNamespace NAMESPACE_3 = new StateNamespaceForTest("ns3");
+
+  private static final StateTag<ValueState<String>> STRING_VALUE_ADDR =
+      StateTags.value("stringValue", StringUtf8Coder.of());
+  private static final StateTag<CombiningValueState<Integer, Integer>> SUM_INTEGER_ADDR =
+      StateTags.combiningValue("sumInteger", VarIntCoder.of(), new Sum.SumIntegerFn());
+  private static final StateTag<BagState<String>> STRING_BAG_ADDR =
+      StateTags.bag("stringBag", StringUtf8Coder.of());
+  private static final StateTag<WatermarkStateInternal> WATERMARK_BAG_ADDR =
+      StateTags.watermarkStateInternal("watermarkBag");
+
+  InMemoryStateInternals underTest = new InMemoryStateInternals();
+
+  @Test
+  public void testValue() throws Exception {
+    ValueState<String> value = underTest.state(NAMESPACE_1, STRING_VALUE_ADDR);
+
+    // State instances are cached, but depend on the namespace.
+    assertThat(underTest.state(NAMESPACE_1, STRING_VALUE_ADDR), Matchers.sameInstance(value));
+    assertThat(underTest.state(NAMESPACE_2, STRING_VALUE_ADDR),
+        Matchers.not(Matchers.sameInstance(value)));
+
+    assertThat(value.get().read(), Matchers.nullValue());
+    StateContents<String> readFuture = value.get();
+    value.set("hello");
+    assertThat(readFuture.read(), Matchers.equalTo("hello"));
+    assertThat(value.get().read(), Matchers.equalTo("hello"));
+    value.set("world");
+    assertThat(readFuture.read(), Matchers.equalTo("world"));
+
+    value.clear();
+    assertThat(value.get().read(), Matchers.nullValue());
+    assertThat(underTest.state(NAMESPACE_1, STRING_VALUE_ADDR), Matchers.sameInstance(value));
+  }
+
+  @Test
+  public void testBag() throws Exception {
+    BagState<String> value = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
+
+    // State instances are cached, but depend on the namespace.
+    assertEquals(value, underTest.state(NAMESPACE_1, STRING_BAG_ADDR));
+    assertFalse(value.equals(underTest.state(NAMESPACE_2, STRING_BAG_ADDR)));
+
+    assertThat(value.get().read(), Matchers.emptyIterable());
+    StateContents<Iterable<String>> readFuture = value.get();
+    value.add("hello");
+    assertThat(readFuture.read(), Matchers.containsInAnyOrder("hello"));
+    assertThat(value.get().read(), Matchers.containsInAnyOrder("hello"));
+
+    value.add("world");
+    assertThat(value.get().read(), Matchers.containsInAnyOrder("hello", "world"));
+
+    value.clear();
+    assertThat(value.get().read(), Matchers.emptyIterable());
+    assertThat(underTest.state(NAMESPACE_1, STRING_BAG_ADDR), Matchers.sameInstance(value));
+  }
+
+  @Test
+  public void testMergeBagIntoSource() throws Exception {
+    BagState<String> bag1 = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
+    BagState<String> bag2 = underTest.state(NAMESPACE_2, STRING_BAG_ADDR);
+
+    bag1.add("Hello");
+    bag2.add("World");
+    bag1.add("!");
+
+    BagState<String> merged = underTest.mergedState(
+        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_1, STRING_BAG_ADDR);
+
+    // Reading the merged bag gets both the contents
+    assertThat(merged.get().read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
+
+    // Adding to the merged bag adds to namespace 1
+    merged.add("...");
+    assertThat(merged.get().read(), Matchers.containsInAnyOrder("Hello", "World", "!", "..."));
+    assertThat(bag1.get().read(), Matchers.containsInAnyOrder("Hello", "!", "..."));
+    assertThat(bag2.get().read(), Matchers.not(Matchers.contains("...")));
+  }
+
+  @Test
+  public void testMergeBagIntoNewNamespace() throws Exception {
+    BagState<String> bag1 = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
+    BagState<String> bag2 = underTest.state(NAMESPACE_2, STRING_BAG_ADDR);
+
+    bag1.add("Hello");
+    bag2.add("World");
+    bag1.add("!");
+
+    BagState<String> merged = underTest.mergedState(
+        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_3, STRING_BAG_ADDR);
+
+    // Reading the merged bag gets both the contents
+    assertThat(merged.get().read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
+
+    // Adding to the merged bag adds to namespace 3
+    merged.add("...");
+    assertThat(merged.get().read(), Matchers.containsInAnyOrder("Hello", "World", "!", "..."));
+    assertThat(bag1.get().read(), Matchers.not(Matchers.contains("...")));
+    assertThat(bag2.get().read(), Matchers.not(Matchers.contains("...")));
+    assertThat(
+        underTest.state(NAMESPACE_3, STRING_BAG_ADDR).get().read(), Matchers.contains("..."));
+  }
+
+  @Test
+  public void testCombiningValue() throws Exception {
+    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
+
+    // State instances are cached, but depend on the namespace.
+    assertEquals(value, underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR));
+    assertFalse(value.equals(underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR)));
+
+    assertThat(value.get().read(), Matchers.equalTo(0));
+    StateContents<Integer> readFuture = value.get();
+    value.add(2);
+    assertThat(readFuture.read(), Matchers.equalTo(2));
+    assertThat(value.get().read(), Matchers.equalTo(2));
+
+    value.add(3);
+    assertThat(readFuture.read(), Matchers.equalTo(5));
+
+    value.clear();
+    assertThat(readFuture.read(), Matchers.equalTo(0));
+    assertThat(underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR), Matchers.sameInstance(value));
+  }
+
+  @Test
+  public void testMergeCombiningValueIntoSource() throws Exception {
+    CombiningValueState<Integer, Integer> value1 = underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
+    CombiningValueState<Integer, Integer> value2 = underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR);
+
+    value1.add(5);
+    value2.add(10);
+    value1.add(6);
+
+    assertThat(value1.get().read(), Matchers.equalTo(11));
+    assertThat(value2.get().read(), Matchers.equalTo(10));
+
+    CombiningValueState<Integer, Integer> merged = underTest.mergedState(
+        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_1, SUM_INTEGER_ADDR);
+
+    assertThat(value1.get().read(), Matchers.equalTo(11));
+    assertThat(value2.get().read(), Matchers.equalTo(10));
+    assertThat(merged.get().read(), Matchers.equalTo(21));
+
+    // Reading the merged value compressed the old values.
+    assertThat(value1.get().read(), Matchers.equalTo(21));
+    assertThat(value2.get().read(), Matchers.equalTo(0));
+
+    merged.add(8);
+    assertThat(merged.get().read(), Matchers.equalTo(29));
+    assertThat(value1.get().read(), Matchers.equalTo(29));
+    assertThat(value2.get().read(), Matchers.equalTo(0));
+  }
+
+  @Test
+  public void testMergeCombiningValueIntoNewNamespace() throws Exception {
+    CombiningValueState<Integer, Integer> value1 = underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
+    CombiningValueState<Integer, Integer> value2 = underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR);
+
+    value1.add(5);
+    value2.add(10);
+    value1.add(6);
+
+    assertThat(value1.get().read(), Matchers.equalTo(11));
+    assertThat(value2.get().read(), Matchers.equalTo(10));
+
+    CombiningValueState<Integer, Integer> merged = underTest.mergedState(
+        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_3, SUM_INTEGER_ADDR);
+
+    assertThat(value1.get().read(), Matchers.equalTo(11));
+    assertThat(value2.get().read(), Matchers.equalTo(10));
+    assertThat(merged.get().read(), Matchers.equalTo(21));
+
+    // Reading the merged value compressed the old values.
+    CombiningValueState<Integer, Integer> value3 = underTest.state(NAMESPACE_3, SUM_INTEGER_ADDR);
+    assertThat(value1.get().read(), Matchers.equalTo(0));
+    assertThat(value2.get().read(), Matchers.equalTo(0));
+    assertThat(value3.get().read(), Matchers.equalTo(21));
+
+    merged.add(8);
+    assertThat(merged.get().read(), Matchers.equalTo(29));
+    assertThat(value1.get().read(), Matchers.equalTo(0));
+    assertThat(value2.get().read(), Matchers.equalTo(0));
+    assertThat(value3.get().read(), Matchers.equalTo(29));
+  }
+
+  @Test
+  public void testWatermarkBag() throws Exception {
+    WatermarkStateInternal value = underTest.state(NAMESPACE_1, WATERMARK_BAG_ADDR);
+
+    // State instances are cached, but depend on the namespace.
+    assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_BAG_ADDR));
+    assertFalse(value.equals(underTest.state(NAMESPACE_2, WATERMARK_BAG_ADDR)));
+
+    assertThat(value.get().read(), Matchers.nullValue());
+    StateContents<Instant> readFuture = value.get();
+    value.add(new Instant(2000));
+    assertThat(readFuture.read(), Matchers.equalTo(new Instant(2000)));
+    assertThat(value.get().read(), Matchers.equalTo(new Instant(2000)));
+
+    value.add(new Instant(3000));
+    assertThat(readFuture.read(), Matchers.equalTo(new Instant(2000)));
+    assertThat(value.get().read(), Matchers.equalTo(new Instant(2000)));
+
+    value.add(new Instant(1000));
+    assertThat(readFuture.read(), Matchers.equalTo(new Instant(1000)));
+    assertThat(value.get().read(), Matchers.equalTo(new Instant(1000)));
+
+    value.clear();
+    assertThat(readFuture.read(), Matchers.equalTo(null));
+    assertThat(underTest.state(NAMESPACE_1, WATERMARK_BAG_ADDR), Matchers.sameInstance(value));
+  }
+
+  @Test
+  public void testMergeWatermarkIntoSource() throws Exception {
+    WatermarkStateInternal value1 = underTest.state(NAMESPACE_1, WATERMARK_BAG_ADDR);
+    WatermarkStateInternal value2 = underTest.state(NAMESPACE_2, WATERMARK_BAG_ADDR);
+
+    value1.add(new Instant(3000));
+    value2.add(new Instant(5000));
+    value1.add(new Instant(4000));
+    value2.add(new Instant(2000));
+
+    WatermarkStateInternal merged = underTest.mergedState(
+        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_1, WATERMARK_BAG_ADDR);
+
+    assertThat(value1.get().read(), Matchers.equalTo(new Instant(3000)));
+    assertThat(value2.get().read(), Matchers.equalTo(new Instant(2000)));
+    assertThat(merged.get().read(), Matchers.equalTo(new Instant(2000)));
+
+    // Reading the merged value compressed the old values
+    assertThat(value1.get().read(), Matchers.equalTo(new Instant(2000)));
+    assertThat(value2.get().read(), Matchers.equalTo(null));
+
+    merged.add(new Instant(1000));
+    assertThat(merged.get().read(), Matchers.equalTo(new Instant(1000)));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateTagTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateTagTest.java
new file mode 100644
index 0000000000000..3787228505c93
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateTagTest.java
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.transforms.Max;
+import com.google.cloud.dataflow.sdk.transforms.Min;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link StateTag}.
+ */
+@RunWith(JUnit4.class)
+public class StateTagTest {
+
+  @Test
+  public void testValueEquality() {
+    StateTag<?> fooVarInt1 = StateTags.value("foo", VarIntCoder.of());
+    StateTag<?> fooVarInt2 = StateTags.value("foo", VarIntCoder.of());
+    StateTag<?> fooBigEndian = StateTags.value("foo", BigEndianIntegerCoder.of());
+    StateTag<?> barVarInt = StateTags.value("bar", VarIntCoder.of());
+
+    assertEquals(fooVarInt1, fooVarInt2);
+    assertNotEquals(fooVarInt1, fooBigEndian);
+    assertNotEquals(fooVarInt1, barVarInt);
+  }
+
+  @Test
+  public void testBagEquality() {
+    StateTag<?> fooVarInt1 = StateTags.bag("foo", VarIntCoder.of());
+    StateTag<?> fooVarInt2 = StateTags.bag("foo", VarIntCoder.of());
+    StateTag<?> fooBigEndian = StateTags.bag("foo", BigEndianIntegerCoder.of());
+    StateTag<?> barVarInt = StateTags.bag("bar", VarIntCoder.of());
+
+    assertEquals(fooVarInt1, fooVarInt2);
+    assertNotEquals(fooVarInt1, fooBigEndian);
+    assertNotEquals(fooVarInt1, barVarInt);
+  }
+
+  @Test
+  public void testWatermarkBagEquality() {
+    StateTag<?> foo1 = StateTags.watermarkStateInternal("foo");
+    StateTag<?> foo2 = StateTags.watermarkStateInternal("foo");
+    StateTag<?> bar = StateTags.watermarkStateInternal("bar");
+
+    assertEquals(foo1, foo2);
+    assertNotEquals(foo1, bar);
+  }
+
+  @Test
+  public void testCombiningValueEquality() {
+    Coder<Integer> coder1 = VarIntCoder.of();
+    Coder<Integer> coder2 = BigEndianIntegerCoder.of();
+
+    StateTag<?> fooCoder1Max1 = StateTags.combiningValue("foo", coder1, new Max.MaxIntegerFn());
+    StateTag<?> fooCoder1Max2 = StateTags.combiningValue("foo", coder1, new Max.MaxIntegerFn());
+    StateTag<?> fooCoder1Min = StateTags.combiningValue("foo", coder1, new Min.MinIntegerFn());
+
+    StateTag<?> fooCoder2Max = StateTags.combiningValue("foo", coder2, new Max.MaxIntegerFn());
+    StateTag<?> barCoder1Max = StateTags.combiningValue("bar", coder1, new Max.MaxIntegerFn());
+
+    // Same name, coder and combineFn
+    assertEquals(fooCoder1Max1, fooCoder1Max2);
+    // Different combineFn, but we treat them as equal since we only serialize the bits.
+    assertEquals(fooCoder1Max1, fooCoder1Min);
+
+    // Different input coder coder.
+    assertNotEquals(fooCoder1Max1, fooCoder2Max);
+
+    // These StateTags have different IDs.
+    assertNotEquals(fooCoder1Max1, barCoder1Max);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
new file mode 100644
index 0000000000000..d2a4a766f0b3a
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
@@ -0,0 +1,464 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagList;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagValue;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.common.util.concurrent.SettableFuture;
+import com.google.protobuf.ByteString;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Instant;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.MockitoAnnotations;
+
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Tests for {@link WindmillStateInternals}.
+ */
+@RunWith(JUnit4.class)
+public class WindmillStateInternalsTest {
+
+  private static final StateNamespace NAMESPACE = new StateNamespaceForTest("ns");
+  private static final String MANGLED_PREFIX = "mangled";
+
+  private static final StateTag<CombiningValueState<Integer, Integer>> COMBINING_ADDR =
+      StateTags.combiningValue("combining", VarIntCoder.of(), new Sum.SumIntegerFn());
+  private final Coder<int[]> accumCoder =
+      new Sum.SumIntegerFn().getAccumulatorCoder(null, VarIntCoder.of());
+
+  @Mock
+  private WindmillStateReader mockReader;
+
+  private WindmillStateInternals underTest;
+
+  private ByteString key(StateNamespace namespace, String addrId) {
+    return ByteString.copyFromUtf8(MANGLED_PREFIX + "/" + namespace.stringKey() + "/" + addrId);
+  }
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+    underTest = new WindmillStateInternals(MANGLED_PREFIX, mockReader);
+  }
+
+  private <T> void waitAndSet(
+      final SettableFuture<T> future, final T value, final long millis) {
+    new Thread(new Runnable() {
+      @Override
+      public void run() {
+        try {
+          Thread.sleep(millis);
+        } catch (InterruptedException e) {
+          throw new RuntimeException("Interrupted before setting", e);
+        }
+        future.set(value);
+      }
+    }).run();
+  }
+
+  @Test
+  public void testBagAddBeforeRead() throws Exception {
+    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
+    BagState<String> bag = underTest.state(NAMESPACE, addr);
+
+    SettableFuture<Iterable<String>> future = SettableFuture.create();
+    when(mockReader.listFuture(key(NAMESPACE, "bag"), StringUtf8Coder.of())).thenReturn(future);
+
+    StateContents<Iterable<String>> result = bag.get();
+
+    bag.add("hello");
+    waitAndSet(future, Arrays.asList("world"), 200);
+    assertThat(result.read(), Matchers.containsInAnyOrder("hello", "world"));
+
+    bag.add("goodbye");
+    assertThat(result.read(), Matchers.containsInAnyOrder("hello", "world", "goodbye"));
+  }
+
+  @Test
+  public void testBagClearBeforeRead() throws Exception {
+    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
+    BagState<String> bag = underTest.state(NAMESPACE, addr);
+
+    bag.clear();
+    bag.add("hello");
+    assertThat(bag.get().read(), Matchers.containsInAnyOrder("hello"));
+
+    // Shouldn't need to read from windmill for this.
+    Mockito.verifyZeroInteractions(mockReader);
+  }
+
+  @Test
+  public void testBagAddPersist() throws Exception {
+    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
+    BagState<String> bag = underTest.state(NAMESPACE, addr);
+
+    bag.add("hello");
+
+    Windmill.WorkItemCommitRequest.Builder commitBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.persist(commitBuilder);
+
+    assertEquals(1, commitBuilder.getListUpdatesCount());
+
+    TagList listUpdates = commitBuilder.getListUpdates(0);
+    assertEquals(key(NAMESPACE, "bag"), listUpdates.getTag());
+    assertEquals(1, listUpdates.getValuesCount());
+    assertEquals("hello", listUpdates.getValues(0).getData().substring(1).toStringUtf8());
+
+    // Blind adds should not need to read the future.
+    Mockito.verify(mockReader).startBatchAndBlock();
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testBagClearPersist() throws Exception {
+    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
+    BagState<String> bag = underTest.state(NAMESPACE, addr);
+
+    bag.add("hello");
+    bag.clear();
+    bag.add("world");
+
+    Windmill.WorkItemCommitRequest.Builder commitBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.persist(commitBuilder);
+
+    assertEquals(2, commitBuilder.getListUpdatesCount());
+
+    TagList listClear = commitBuilder.getListUpdates(0);
+    assertEquals(key(NAMESPACE, "bag"), listClear.getTag());
+    assertEquals(Long.MAX_VALUE, listClear.getEndTimestamp());
+    assertEquals(0, listClear.getValuesCount());
+
+    TagList listUpdates = commitBuilder.getListUpdates(1);
+    assertEquals(key(NAMESPACE, "bag"), listUpdates.getTag());
+    assertEquals(1, listUpdates.getValuesCount());
+    assertEquals("world", listUpdates.getValues(0).getData().substring(1).toStringUtf8());
+
+    // Clear should need to read the future.
+    Mockito.verify(mockReader).listFuture(key(NAMESPACE, "bag"), StringUtf8Coder.of());
+    Mockito.verify(mockReader).startBatchAndBlock();
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testCombiningAddBeforeRead() throws Exception {
+    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
+
+    SettableFuture<Iterable<int[]>> future = SettableFuture.create();
+    when(mockReader.listFuture(key(NAMESPACE, COMBINING_ADDR.getId()), accumCoder))
+        .thenReturn(future);
+
+    StateContents<Integer> result = value.get();
+
+    value.add(5);
+    value.add(6);
+    waitAndSet(future, Arrays.asList(new int[]{8}, new int[]{10}), 200);
+    assertThat(result.read(), Matchers.equalTo(29));
+
+    // That get "compressed" the combiner. So, the underlying future should change:
+    future.set(Arrays.asList(new int[]{29}));
+
+    value.add(2);
+    assertThat(result.read(), Matchers.equalTo(31));
+  }
+
+  @Test
+  public void testCombiningClearBeforeRead() throws Exception {
+    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
+
+    value.clear();
+
+    StateContents<Integer> result = value.get();
+    value.add(5);
+    value.add(6);
+    assertThat(result.read(), Matchers.equalTo(11));
+
+    value.add(2);
+    assertThat(result.read(), Matchers.equalTo(13));
+
+    // Shouldn't need to read from windmill for this because we immediately cleared..
+    Mockito.verifyZeroInteractions(mockReader);
+  }
+
+  @Test
+  public void testCombiningAddPersist() throws Exception {
+    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
+
+    value.add(5);
+    value.add(6);
+
+    Windmill.WorkItemCommitRequest.Builder commitBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.persist(commitBuilder);
+
+    assertEquals(1, commitBuilder.getListUpdatesCount());
+
+    TagList listUpdates = commitBuilder.getListUpdates(0);
+    assertEquals(key(NAMESPACE, COMBINING_ADDR.getId()), listUpdates.getTag());
+    assertEquals(1, listUpdates.getValuesCount());
+    assertEquals(11,
+        CoderUtils.decodeFromByteArray(accumCoder,
+            listUpdates.getValues(0).getData().substring(1).toByteArray())[0]);
+
+    // Blind adds should not need to read the future.
+    Mockito.verify(mockReader).startBatchAndBlock();
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testCombiningClearPersist() throws Exception {
+    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
+
+    value.clear();
+    value.add(5);
+    value.add(6);
+
+    Windmill.WorkItemCommitRequest.Builder commitBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.persist(commitBuilder);
+
+    assertEquals(2, commitBuilder.getListUpdatesCount());
+
+    TagList listClear = commitBuilder.getListUpdates(0);
+    assertEquals(key(NAMESPACE, COMBINING_ADDR.getId()), listClear.getTag());
+    assertEquals(Long.MAX_VALUE, listClear.getEndTimestamp());
+    assertEquals(0, listClear.getValuesCount());
+
+    TagList listUpdates = commitBuilder.getListUpdates(1);
+    assertEquals(key(NAMESPACE, COMBINING_ADDR.getId()), listUpdates.getTag());
+    assertEquals(1, listUpdates.getValuesCount());
+    assertEquals(11,
+        CoderUtils.decodeFromByteArray(accumCoder,
+            listUpdates.getValues(0).getData().substring(1).toByteArray())[0]);
+
+    // Blind adds should not need to read the future.
+    Mockito.verify(mockReader).listFuture(key(NAMESPACE, COMBINING_ADDR.getId()), accumCoder);
+    Mockito.verify(mockReader).startBatchAndBlock();
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testWatermarkAddBeforeRead() throws Exception {
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal("watermark");
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+
+    SettableFuture<Instant> future = SettableFuture.create();
+    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"))).thenReturn(future);
+
+    StateContents<Instant> result = bag.get();
+
+    bag.add(new Instant(3000));
+    waitAndSet(future, new Instant(2000), 200);
+    assertThat(result.read(), Matchers.equalTo(new Instant(2000)));
+
+    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"));
+    Mockito.verifyNoMoreInteractions(mockReader);
+
+    // Adding another value doesn't create another future, but does update the result.
+    bag.add(new Instant(1000));
+    assertThat(result.read(), Matchers.equalTo(new Instant(1000)));
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testWatermarkClearBeforeRead() throws Exception {
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal("watermark");
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+
+    bag.clear();
+    assertThat(bag.get().read(), Matchers.nullValue());
+
+    bag.add(new Instant(300));
+    assertThat(bag.get().read(), Matchers.equalTo(new Instant(300)));
+
+    // Shouldn't need to read from windmill because the value is already available.
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testWatermarkPersist() throws Exception {
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal("watermark");
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+
+    bag.add(new Instant(1000));
+    bag.add(new Instant(2000));
+
+    Windmill.WorkItemCommitRequest.Builder commitBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.persist(commitBuilder);
+
+    assertEquals(1, commitBuilder.getListUpdatesCount());
+
+    TagList listUpdates = commitBuilder.getListUpdates(0);
+    assertEquals(key(NAMESPACE, "watermark"), listUpdates.getTag());
+    assertEquals(1, listUpdates.getValuesCount());
+    // Just the zero-byte.
+    assertEquals(1, listUpdates.getValues(0).getData().size());
+    assertEquals(TimeUnit.MILLISECONDS.toMicros(1000), listUpdates.getValues(0).getTimestamp());
+
+    // Blind adds should not need to read the future.
+    Mockito.verify(mockReader).startBatchAndBlock();
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testWatermarkClearPersist() throws Exception {
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal("watermark");
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+
+    bag.add(new Instant(500));
+    bag.clear();
+    bag.add(new Instant(1000));
+    bag.add(new Instant(2000));
+
+    Windmill.WorkItemCommitRequest.Builder commitBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.persist(commitBuilder);
+
+    assertEquals(2, commitBuilder.getListUpdatesCount());
+
+    TagList listClear = commitBuilder.getListUpdates(0);
+    assertEquals(key(NAMESPACE, "watermark"), listClear.getTag());
+    assertEquals(Long.MAX_VALUE, listClear.getEndTimestamp());
+    assertEquals(0, listClear.getValuesCount());
+
+    TagList listUpdates = commitBuilder.getListUpdates(1);
+    assertEquals(key(NAMESPACE, "watermark"), listUpdates.getTag());
+    assertEquals(1, listUpdates.getValuesCount());
+    // Just the zero-byte.
+    assertEquals(1, listUpdates.getValues(0).getData().size());
+    assertEquals(TimeUnit.MILLISECONDS.toMicros(1000), listUpdates.getValues(0).getTimestamp());
+
+    // Clearing requires reading the future.
+    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"));
+    Mockito.verify(mockReader).startBatchAndBlock();
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testValueSetBeforeRead() throws Exception {
+    StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
+    ValueState<String> value = underTest.state(NAMESPACE, addr);
+
+    value.set("Hello");
+
+    assertEquals("Hello", value.get().read());
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testValueClearBeforeRead() throws Exception {
+    StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
+    ValueState<String> value = underTest.state(NAMESPACE, addr);
+
+    value.clear();
+
+    assertEquals(null, value.get().read());
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testValueRead() throws Exception {
+    StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
+    ValueState<String> value = underTest.state(NAMESPACE, addr);
+
+    SettableFuture<String> future = SettableFuture.create();
+    when(mockReader.valueFuture(key(NAMESPACE, "value"), StringUtf8Coder.of())).thenReturn(future);
+    waitAndSet(future, "World", 200);
+
+    assertEquals("World", value.get().read());
+  }
+
+  @Test
+  public void testValueSetPersist() throws Exception {
+    StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
+    ValueState<String> value = underTest.state(NAMESPACE, addr);
+
+    value.set("Hi");
+
+    Windmill.WorkItemCommitRequest.Builder commitBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.persist(commitBuilder);
+
+    assertEquals(1, commitBuilder.getValueUpdatesCount());
+    TagValue valueUpdate = commitBuilder.getValueUpdates(0);
+    assertEquals(key(NAMESPACE, "value"), valueUpdate.getTag());
+    assertEquals("Hi", valueUpdate.getValue().getData().toStringUtf8());
+
+    // Setting a value requires a read to prevent blind writes.
+    Mockito.verify(mockReader).valueFuture(key(NAMESPACE, "value"), StringUtf8Coder.of());
+    Mockito.verify(mockReader).startBatchAndBlock();
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testValueClearPersist() throws Exception {
+    StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
+    ValueState<String> value = underTest.state(NAMESPACE, addr);
+
+    value.set("Hi");
+    value.clear();
+
+    Windmill.WorkItemCommitRequest.Builder commitBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.persist(commitBuilder);
+
+    assertEquals(1, commitBuilder.getValueUpdatesCount());
+    TagValue valueUpdate = commitBuilder.getValueUpdates(0);
+    assertEquals(key(NAMESPACE, "value"), valueUpdate.getTag());
+    assertEquals(0, valueUpdate.getValue().getData().size());
+
+    // Setting a value requires a read to prevent blind writes.
+    Mockito.verify(mockReader).valueFuture(key(NAMESPACE, "value"), StringUtf8Coder.of());
+    Mockito.verify(mockReader).startBatchAndBlock();
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testValueNoChangePersist() throws Exception {
+    StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
+    underTest.state(NAMESPACE, addr);
+
+    Windmill.WorkItemCommitRequest.Builder commitBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.persist(commitBuilder);
+
+    assertEquals(0, commitBuilder.getValueUpdatesCount());
+
+    // No changes shouldn't require getting any futures
+    Mockito.verify(mockReader).startBatchAndBlock();
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java
new file mode 100644
index 0000000000000..d550deaadfc56
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java
@@ -0,0 +1,253 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.KeyedGetDataRequest;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.ByteString.Output;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Instant;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.MockitoAnnotations;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Tests for {@link WindmillStateReader}.
+ */
+@RunWith(JUnit4.class)
+public class WindmillStateReaderTest {
+
+  private static final VarIntCoder INT_CODER = VarIntCoder.of();
+
+  private static final String COMPUTATION = "computation";
+  private static final ByteString DATA_KEY = ByteString.copyFromUtf8("DATA_KEY");
+  private static final long WORK_TOKEN = 5043L;
+
+  private static final ByteString STATE_KEY_1 = ByteString.copyFromUtf8("key1");
+  private static final ByteString STATE_KEY_2 = ByteString.copyFromUtf8("key2");
+
+  @Mock
+  private WindmillServerStub mockWindmill;
+
+  private WindmillStateReader underTest;
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+
+    underTest = new WindmillStateReader(mockWindmill, COMPUTATION, DATA_KEY, WORK_TOKEN);
+  }
+
+  private Windmill.Value intValue(int value, boolean padded) throws IOException {
+    Output output = ByteString.newOutput();
+
+    if (padded) {
+      byte[] zero = {0x0};
+      output.write(zero);
+    }
+    INT_CODER.encode(value, output, Coder.Context.OUTER);
+
+    return Windmill.Value.newBuilder()
+        .setData(output.toByteString())
+        .setTimestamp(TimeUnit.MILLISECONDS.toMicros(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()))
+        .build();
+  }
+
+  private Windmill.Value watermarkValue(Instant timestamp)  {
+    byte[] zero = {0x0};
+    ByteString zeroByteString = ByteString.copyFrom(zero);
+
+    return Windmill.Value.newBuilder()
+        .setData(zeroByteString)
+        .setTimestamp(TimeUnit.MILLISECONDS.toMicros(timestamp.getMillis()))
+        .build();
+  }
+
+  @Test
+  public void testReadList() throws Exception {
+    Future<Iterable<Integer>> future = underTest.listFuture(STATE_KEY_1, INT_CODER);
+    Mockito.verifyNoMoreInteractions(mockWindmill);
+
+    Windmill.GetDataRequest.Builder expectedRequest = Windmill.GetDataRequest.newBuilder();
+    expectedRequest
+        .addRequestsBuilder().setComputationId(COMPUTATION)
+        .addRequestsBuilder().setKey(DATA_KEY).setWorkToken(WORK_TOKEN)
+        .addListsToFetch(Windmill.TagList.newBuilder()
+            .setTag(STATE_KEY_1).setEndTimestamp(Long.MAX_VALUE));
+
+    Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
+    response
+        .addDataBuilder().setComputationId(COMPUTATION)
+        .addDataBuilder().setKey(DATA_KEY)
+        .addLists(Windmill.TagList.newBuilder()
+            .setTag(STATE_KEY_1)
+            .addValues(intValue(5, true))
+            .addValues(intValue(6, true)));
+
+    Mockito.when(mockWindmill.getData(expectedRequest.build())).thenReturn(response.build());
+
+    Iterable<Integer> results = future.get();
+    Mockito.verify(mockWindmill).getData(expectedRequest.build());
+    Mockito.verifyNoMoreInteractions(mockWindmill);
+
+    assertThat(results, Matchers.containsInAnyOrder(5, 6));
+  }
+
+  @Test
+  public void testReadValue() throws Exception {
+    Future<Integer> future = underTest.valueFuture(STATE_KEY_1, INT_CODER);
+    Mockito.verifyNoMoreInteractions(mockWindmill);
+
+    Windmill.GetDataRequest.Builder expectedRequest = Windmill.GetDataRequest.newBuilder();
+    expectedRequest
+        .addRequestsBuilder().setComputationId(COMPUTATION)
+        .addRequestsBuilder().setKey(DATA_KEY).setWorkToken(WORK_TOKEN)
+        .addValuesToFetch(Windmill.TagValue.newBuilder()
+            .setTag(STATE_KEY_1).build());
+    Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
+    response
+        .addDataBuilder().setComputationId(COMPUTATION)
+        .addDataBuilder().setKey(DATA_KEY)
+        .addValues(Windmill.TagValue.newBuilder()
+            .setTag(STATE_KEY_1).setValue(intValue(8, false)));
+
+    Mockito.when(mockWindmill.getData(expectedRequest.build())).thenReturn(response.build());
+
+    Integer result = future.get();
+    Mockito.verify(mockWindmill).getData(expectedRequest.build());
+    Mockito.verifyNoMoreInteractions(mockWindmill);
+
+    assertThat(result, Matchers.equalTo(8));
+  }
+
+  @Test
+  public void testReadWatermark() throws Exception {
+    Future<Instant> future = underTest.watermarkFuture(STATE_KEY_1);
+    Mockito.verifyNoMoreInteractions(mockWindmill);
+
+    Windmill.GetDataRequest.Builder expectedRequest = Windmill.GetDataRequest.newBuilder();
+    expectedRequest
+        .addRequestsBuilder().setComputationId(COMPUTATION)
+        .addRequestsBuilder().setKey(DATA_KEY).setWorkToken(WORK_TOKEN)
+        .addListsToFetch(Windmill.TagList.newBuilder()
+            .setTag(STATE_KEY_1).setEndTimestamp(Long.MAX_VALUE));
+
+    Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
+    response
+        .addDataBuilder().setComputationId(COMPUTATION)
+        .addDataBuilder().setKey(DATA_KEY)
+        .addLists(Windmill.TagList.newBuilder()
+            .setTag(STATE_KEY_1)
+            .addValues(watermarkValue(new Instant(5000)))
+            .addValues(watermarkValue(new Instant(6000))));
+
+    Mockito.when(mockWindmill.getData(expectedRequest.build())).thenReturn(response.build());
+
+    Instant result = future.get();
+    Mockito.verify(mockWindmill).getData(expectedRequest.build());
+
+    assertThat(result, Matchers.equalTo(new Instant(5000)));
+  }
+
+  @Test
+  public void testBatching() throws Exception {
+    // Reads two lists and verifies that we batch them up correctly.
+    Future<Instant> watermarkFuture = underTest.watermarkFuture(STATE_KEY_2);
+    Future<Iterable<Integer>> listFuture = underTest.listFuture(STATE_KEY_1, INT_CODER);
+
+    Mockito.verifyNoMoreInteractions(mockWindmill);
+
+    ArgumentCaptor<Windmill.GetDataRequest> request =
+        ArgumentCaptor.forClass(Windmill.GetDataRequest.class);
+
+    Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
+    response
+        .addDataBuilder().setComputationId(COMPUTATION)
+        .addDataBuilder().setKey(DATA_KEY)
+        .addLists(Windmill.TagList.newBuilder()
+            .setTag(STATE_KEY_2)
+            .addValues(watermarkValue(new Instant(5000)))
+            .addValues(watermarkValue(new Instant(6000))))
+        .addLists(Windmill.TagList.newBuilder()
+           .setTag(STATE_KEY_1)
+           .addValues(intValue(5, true))
+           .addValues(intValue(100, true)));
+
+    Mockito.when(mockWindmill.getData(Mockito.isA(Windmill.GetDataRequest.class)))
+        .thenReturn(response.build());
+    Instant result = watermarkFuture.get();
+    Mockito.verify(mockWindmill).getData(request.capture());
+
+    // Verify the request looks right.
+    assertThat(request.getValue().getRequestsCount(), Matchers.equalTo(1));
+    assertThat(request.getValue().getRequests(0).getComputationId(), Matchers.equalTo(COMPUTATION));
+    assertThat(request.getValue().getRequests(0).getRequestsCount(), Matchers.equalTo(1));
+    KeyedGetDataRequest keyedRequest = request.getValue().getRequests(0).getRequests(0);
+    assertThat(keyedRequest.getKey(), Matchers.equalTo(DATA_KEY));
+    assertThat(keyedRequest.getWorkToken(), Matchers.equalTo(WORK_TOKEN));
+    assertThat(keyedRequest.getListsToFetchCount(), Matchers.equalTo(2));
+    assertThat(keyedRequest.getListsToFetch(0).getEndTimestamp(), Matchers.equalTo(Long.MAX_VALUE));
+    assertThat(keyedRequest.getListsToFetch(1).getEndTimestamp(), Matchers.equalTo(Long.MAX_VALUE));
+
+    Collection<ByteString> requestedTags = Arrays.asList(
+        keyedRequest.getListsToFetch(0).getTag(), keyedRequest.getListsToFetch(1).getTag());
+    assertThat(requestedTags, Matchers.containsInAnyOrder(STATE_KEY_1, STATE_KEY_2));
+
+    // Verify the values returned to the user.
+    assertThat(result, Matchers.equalTo(new Instant(5000)));
+    Mockito.verifyNoMoreInteractions(mockWindmill);
+
+    assertThat(listFuture.get(), Matchers.containsInAnyOrder(5, 100));
+    Mockito.verifyNoMoreInteractions(mockWindmill);
+
+    // And verify that getting a future again returns the already completed future.
+    Future<Instant> watermarkFuture2 = underTest.watermarkFuture(STATE_KEY_2);
+    assertTrue(watermarkFuture2.isDone());
+  }
+
+  /**
+   * Tests that multiple reads for the same tag in the same batch are cached. We can't compare
+   * the futures since we've wrapped the delegate aronud them, so we just verify there is only
+   * one queued lookup.
+   */
+  @Test
+  public void testCachingWithinBatch() throws Exception {
+    underTest.watermarkFuture(STATE_KEY_1);
+    underTest.watermarkFuture(STATE_KEY_1);
+    assertEquals(1, underTest.pendingLookups.size());
+  }
+}

From 79c6a32c1374d8ee9d33d1c684fc1d24a4bcfcd7 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 10 Jul 2015 13:32:25 -0700
Subject: [PATCH 0736/1541] Fix flakiness in IntervalBoundedExponentialBackOff

It uses a random number in [0, 1), so the lower bound is inclusive.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=97992854
---
 ...IntervalBoundedExponentialBackOffTest.java | 42 +++++++++----------
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOffTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOffTest.java
index 6c69e52c126fc..32c4cf5065f6c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOffTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOffTest.java
@@ -18,14 +18,11 @@
 
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.Matchers.allOf;
-import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.hamcrest.Matchers.lessThan;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
-
-import com.google.api.client.util.BackOff;
-
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
@@ -37,6 +34,7 @@
 public class IntervalBoundedExponentialBackOffTest {
   @Rule public ExpectedException exception = ExpectedException.none();
 
+
   @Test
   public void testUsingInvalidInitialInterval() throws Exception {
     exception.expect(IllegalArgumentException.class);
@@ -53,28 +51,28 @@ public void testUsingInvalidMaximumInterval() throws Exception {
 
   @Test
   public void testThatcertainNumberOfAttemptsReachesMaxInterval() throws Exception {
-    BackOff backOff = new IntervalBoundedExponentialBackOff(1000, 500);
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(500L), lessThan(1500L)));
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(500L), lessThan(1500L)));
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(500L), lessThan(1500L)));
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(500L), lessThan(1500L)));
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(500L), lessThan(1500L)));
+    IntervalBoundedExponentialBackOff backOff = new IntervalBoundedExponentialBackOff(1000, 500);
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(249L), lessThan(751L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(374L), lessThan(1126L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L), lessThan(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L), lessThan(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L), lessThan(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L), lessThan(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L), lessThan(1500L)));
   }
 
   @Test
   public void testThatResettingAllowsReuse() throws Exception {
-    BackOff backOff = new IntervalBoundedExponentialBackOff(1000, 500);
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(500L), lessThan(1500L)));
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(500L), lessThan(1500L)));
+    IntervalBoundedExponentialBackOff backOff = new IntervalBoundedExponentialBackOff(1000, 500);
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(249L), lessThan(751L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(374L), lessThan(1126L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L), lessThan(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L), lessThan(1500L)));
     backOff.reset();
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(500L), lessThan(1500L)));
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(500L), lessThan(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(249L), lessThan(751L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(374L), lessThan(1126L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L), lessThan(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L), lessThan(1500L)));
   }
 
   @Test
@@ -85,6 +83,6 @@ public void testAtMaxInterval() throws Exception {
     assertFalse(backOff.atMaxInterval());
     backOff.nextBackOffMillis();
     assertTrue(backOff.atMaxInterval());
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(500L), lessThan(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L), lessThan(1500L)));
   }
 }

From abbfe43a4f9b654406c3faedeb65c0a42c3b6707 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 10 Jul 2015 18:56:38 -0700
Subject: [PATCH 0737/1541] Encode IntervalWindow as upper bound plus duration

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98017444
---
 .../dataflow/sdk/coders/DurationCoder.java    | 85 +++++++++++++++++
 .../dataflow/sdk/coders/InstantCoder.java     | 47 +++++++++-
 .../transforms/windowing/IntervalWindow.java  | 21 +++--
 .../sdk/coders/DurationCoderTest.java         | 42 +++++++++
 .../worker/StreamingDataflowWorkerTest.java   | 10 +-
 .../windowing/IntervalWindowTest.java         | 93 +++++++++++++++++++
 6 files changed, 282 insertions(+), 16 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DurationCoder.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DurationCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindowTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DurationCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DurationCoder.java
new file mode 100644
index 0000000000000..568d4be6b94a9
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DurationCoder.java
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import org.joda.time.Duration;
+import org.joda.time.ReadableDuration;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+/**
+ * A {@link Coder} for a joda {@link Duration}.
+ */
+public class DurationCoder extends AtomicCoder<ReadableDuration> {
+
+  private static final long serialVersionUID = 0L;
+
+  @JsonCreator
+  public static DurationCoder of() {
+    return INSTANCE;
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private static final DurationCoder INSTANCE = new DurationCoder();
+
+  private final Coder<Long> longCoder = VarLongCoder.of();
+
+  private DurationCoder() {}
+
+  private Long toLong(ReadableDuration value) {
+    return value.getMillis();
+  }
+
+  private ReadableDuration fromLong(Long decoded) {
+    return Duration.millis(decoded);
+  }
+
+  @Override
+  public void encode(ReadableDuration value, OutputStream outStream, Context context)
+      throws CoderException, IOException {
+    longCoder.encode(toLong(value), outStream, context);
+  }
+
+  @Override
+  public ReadableDuration decode(InputStream inStream, Context context)
+      throws CoderException, IOException {
+      return fromLong(longCoder.decode(inStream, context));
+  }
+
+  @Override
+  public boolean consistentWithEquals() {
+    return true;
+  }
+
+  @Override
+  public boolean isRegisterByteSizeObserverCheap(ReadableDuration value, Context context) {
+    return longCoder.isRegisterByteSizeObserverCheap(toLong(value), context);
+  }
+
+  @Override
+  public void registerByteSizeObserver(
+      ReadableDuration value, ElementByteSizeObserver observer, Context context) throws Exception {
+    longCoder.registerByteSizeObserver(toLong(value), observer, context);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
index 6916e88072c38..152163b8362bd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
@@ -16,6 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.common.base.Converter;
+
 import com.fasterxml.jackson.annotation.JsonCreator;
 
 import org.joda.time.Instant;
@@ -38,25 +41,59 @@ public static InstantCoder of() {
 
   private static final InstantCoder INSTANCE = new InstantCoder();
 
+  private final Coder<Long> longCoder = BigEndianLongCoder.of();
+
   private InstantCoder() {}
 
+  /**
+   * Converts {@link Instant} to a {@code Long} representing its millis-since-epoch,
+   * but shifted so that the byte representation of negative values are lexicographically
+   * ordered before the byte representation of positive values.
+   *
+   * <p>This deliberately utilizes the well-defined overflow for {@code Long} values.
+   * See http://docs.oracle.com/javase/specs/jls/se7/html/jls-15.html#jls-15.18.2
+   */
+  private static final Converter<Instant, Long> ORDER_PRESERVING_CONVERTER =
+      new Converter<Instant, Long>() {
+
+        @Override
+        protected Long doForward(Instant instant) {
+          return instant.getMillis() - Long.MIN_VALUE;
+        }
+
+        @Override
+        protected Instant doBackward(Long shiftedMillis) {
+          return new Instant(shiftedMillis + Long.MIN_VALUE);
+        }
+  };
+
   @Override
   public void encode(Instant value, OutputStream outStream, Context context)
       throws CoderException, IOException {
-    // Shift the millis by Long.MIN_VALUE so that negative values sort before positive
-    // values when encoded.  The overflow is well-defined:
-    // http://docs.oracle.com/javase/specs/jls/se7/html/jls-15.html#jls-15.18.2
-    BigEndianLongCoder.of().encode(value.getMillis() - Long.MIN_VALUE, outStream, context);
+    longCoder.encode(ORDER_PRESERVING_CONVERTER.convert(value), outStream, context);
   }
 
   @Override
   public Instant decode(InputStream inStream, Context context)
       throws CoderException, IOException {
-      return new Instant(BigEndianLongCoder.of().decode(inStream, context) + Long.MIN_VALUE);
+    return ORDER_PRESERVING_CONVERTER.reverse().convert(longCoder.decode(inStream, context));
   }
 
   @Override
   public boolean consistentWithEquals() {
     return true;
   }
+
+  @Override
+  public boolean isRegisterByteSizeObserverCheap(Instant value, Context context) {
+    return longCoder.isRegisterByteSizeObserverCheap(
+        ORDER_PRESERVING_CONVERTER.convert(value), context);
+  }
+
+  @Override
+  public void registerByteSizeObserver(
+      Instant value, ElementByteSizeObserver observer, Context context) throws Exception {
+    longCoder.registerByteSizeObserver(
+        ORDER_PRESERVING_CONVERTER.convert(value), observer, context);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
index 89fb09edb190c..ad1d50787b8ff 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
@@ -19,12 +19,14 @@
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.DurationCoder;
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
+import org.joda.time.ReadableDuration;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -55,7 +57,7 @@ public IntervalWindow(Instant start, Instant end) {
     this.end = end;
   }
 
-  public IntervalWindow(Instant start, Duration size) {
+  public IntervalWindow(Instant start, ReadableDuration size) {
     this.start = start;
     this.end = start.plus(size);
   }
@@ -157,17 +159,24 @@ public int compareTo(IntervalWindow o) {
   }
 
   /**
-   * Returns a Coder suitable for encoding IntervalWindows.
+   * Returns a {@link Coder} suitable for {@link IntervalWindow}.
    */
   public static Coder<IntervalWindow> getCoder() {
     return IntervalWindowCoder.of();
   }
 
-  @SuppressWarnings("serial")
+  /**
+   * Encodes an {@link IntervalWindow} as a pair of its upper bound and duration.
+   */
   private static class IntervalWindowCoder extends AtomicCoder<IntervalWindow> {
+
+    private static final long serialVersionUID = 0L;
+
     private static final IntervalWindowCoder INSTANCE =
         new IntervalWindowCoder();
+
     private static final Coder<Instant> instantCoder = InstantCoder.of();
+    private static final Coder<ReadableDuration> durationCoder = DurationCoder.of();
 
     @JsonCreator
     public static IntervalWindowCoder of() {
@@ -179,16 +188,16 @@ public void encode(IntervalWindow window,
                        OutputStream outStream,
                        Context context)
         throws IOException, CoderException {
-      instantCoder.encode(window.start, outStream, context.nested());
       instantCoder.encode(window.end, outStream, context.nested());
+      durationCoder.encode(new Duration(window.start, window.end), outStream, context.nested());
     }
 
     @Override
     public IntervalWindow decode(InputStream inStream, Context context)
         throws IOException, CoderException {
-      Instant start = instantCoder.decode(inStream, context.nested());
       Instant end = instantCoder.decode(inStream, context.nested());
-      return new IntervalWindow(start, end);
+      ReadableDuration duration = durationCoder.decode(inStream, context.nested());
+      return new IntervalWindow(end.minus(duration), end);
     }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DurationCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DurationCoderTest.java
new file mode 100644
index 0000000000000..824de8508b9e0
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DurationCoderTest.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.common.collect.Lists;
+import org.joda.time.Duration;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.List;
+
+/** Unit tests for {@link DurationCoder}. */
+@RunWith(JUnit4.class)
+public class DurationCoderTest {
+
+  private static final DurationCoder TEST_CODER = DurationCoder.of();
+  private static final List<Long> TEST_MILLIS =
+      Lists.newArrayList(0L, 1L, -1L, -255L, 256L, Long.MIN_VALUE, Long.MAX_VALUE);
+
+  @Test
+  public void testBasicEncoding() throws Exception {
+    for (long millis : TEST_MILLIS) {
+      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, Duration.millis(millis));
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index abacc9df13517..e8ec6b8ca3aa6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -678,11 +678,11 @@ public void testMergeWindows() throws Exception {
     Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
 
     // These tags and data are opaque strings and this is a change detector test.
-    String timer1Tag = "gAAAAAAAAACAAAAAAAAD6P____8P";
-    String timer2Tag = "gAAAAAAAAACAAAAAAAAD6AA";
-    String timer3Tag = "gAAAAAAAAACAAAAAAAAD6AAAAAA";
-    String bufferTag = "MergeWindows:gAAAAAAAAACAAAAAAAAD6A/__buffer";
-    String watermarkHoldTag = "MergeWindows:gAAAAAAAAACAAAAAAAAD6A/__watermark_hold";
+    String timer1Tag = "gAAAAAAAA-joB_____8P";
+    String timer2Tag = "gAAAAAAAA-joBwA";
+    String timer3Tag = timer2Tag + "AAAA";
+    String bufferTag = "MergeWindows:gAAAAAAAA-joBw/__buffer";
+    String watermarkHoldTag = "MergeWindows:gAAAAAAAA-joBw/__watermark_hold";
     String watermarkHoldData = "\000\\200\\000\\000\\000\\000\\000\\000\\000";
     String bufferData = "\000data0";
     String outputData = "\\377\\377\\377\\377\\001\\005data0\\000";
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindowTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindowTest.java
new file mode 100644
index 0000000000000..348e6424bafac
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindowTest.java
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.InstantCoder;
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.common.collect.Lists;
+
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import java.util.List;
+
+/**
+ * Tests for {@link Window}.
+ */
+@RunWith(JUnit4.class)
+public class IntervalWindowTest {
+
+  private static final Coder<IntervalWindow> TEST_CODER = IntervalWindow.getCoder();
+
+  private static final List<IntervalWindow> TEST_VALUES = Lists.newArrayList(
+      new IntervalWindow(new Instant(0), new Instant(0)),
+      new IntervalWindow(new Instant(0), new Instant(1000)),
+      new IntervalWindow(new Instant(-1000), new Instant(735)),
+      new IntervalWindow(new Instant(350), new Instant(60 * 60 * 1000)),
+      new IntervalWindow(new Instant(0), new Instant(24 * 60 * 60 * 1000)),
+      new IntervalWindow(
+          Instant.parse("2015-04-01T00:00:00Z"), Instant.parse("2015-04-01T11:45:13Z")));
+
+  @Test
+  public void testBasicEncoding() throws Exception {
+    for (IntervalWindow window : TEST_VALUES) {
+      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, window);
+    }
+  }
+
+  /**
+   * This is a change detector test for the sizes of encoded windows. Since these are present
+   * for every element of every windowed PCollection, the size matters.
+   *
+   * <p>This test documents the expectation that encoding as a (endpoint, duration) pair
+   * using big endian for the endpoint and variable length long for the duration should be about 25%
+   * smaller than encoding two big endian Long values.
+   */
+  @Test
+  public void testLengthsOfEncodingChoices() throws Exception {
+    Instant start = Instant.parse("2015-04-01T00:00:00Z");
+    Instant minuteEnd = Instant.parse("2015-04-01T00:01:00Z");
+    Instant hourEnd = Instant.parse("2015-04-01T01:00:00Z");
+    Instant dayEnd = Instant.parse("2015-04-02T00:00:00Z");
+
+    Coder<Instant> instantCoder = InstantCoder.of();
+    byte[] encodedStart = CoderUtils.encodeToByteArray(instantCoder, start);
+    byte[] encodedMinuteEnd = CoderUtils.encodeToByteArray(instantCoder, minuteEnd);
+    byte[] encodedHourEnd = CoderUtils.encodeToByteArray(instantCoder, hourEnd);
+    byte[] encodedDayEnd = CoderUtils.encodeToByteArray(instantCoder, dayEnd);
+
+    byte[] encodedMinuteWindow = CoderUtils.encodeToByteArray(
+        TEST_CODER, new IntervalWindow(start, minuteEnd));
+    byte[] encodedHourWindow = CoderUtils.encodeToByteArray(
+        TEST_CODER, new IntervalWindow(start, hourEnd));
+    byte[] encodedDayWindow = CoderUtils.encodeToByteArray(
+        TEST_CODER, new IntervalWindow(start, dayEnd));
+
+    assertThat(encodedMinuteWindow.length,
+        equalTo(encodedStart.length + encodedMinuteEnd.length - 5));
+    assertThat(encodedHourWindow.length,
+        equalTo(encodedStart.length + encodedHourEnd.length - 4));
+    assertThat(encodedDayWindow.length,
+        equalTo(encodedStart.length + encodedDayEnd.length - 4));
+  }
+}

From 4e3bc5d580c310f7eea46985eb986ab7cc3aea61 Mon Sep 17 00:00:00 2001
From: colinreid <colinreid@google.com>
Date: Sat, 11 Jul 2015 09:08:52 -0700
Subject: [PATCH 0738/1541] Fix DataflowWorkerLoggingHandlerTest on Windows

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98038658
---
 .../worker/logging/DataflowWorkerLoggingHandlerTest.java  | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandlerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandlerTest.java
index c941a9eddd62b..86ac40a6121b6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandlerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandlerTest.java
@@ -129,7 +129,9 @@ public void testWithMessageAndException() throws IOException {
             + escapedNewline
             + "\\tat declaringClass2.method2(file2.java:1)"
             + escapedNewline
-            + "\\tat declaringClass3.method3(file3.java:1)\\n\"}"
+            + "\\tat declaringClass3.method3(file3.java:1)"
+            + escapedNewline
+            + "\"}"
             + System.lineSeparator(),
         createJson(createLogRecord("test.message", createThrowable())));
   }
@@ -150,7 +152,9 @@ public void testWithException() throws IOException {
             + escapedNewline
             + "\\tat declaringClass2.method2(file2.java:1)"
             + escapedNewline
-            + "\\tat declaringClass3.method3(file3.java:1)\\n\"}"
+            + "\\tat declaringClass3.method3(file3.java:1)"
+            + escapedNewline
+            + "\"}"
             + System.lineSeparator(),
         createJson(createLogRecord(null, createThrowable())));
   }

From 49ac943c63e85a773f6ba55546653a7fdf01bff1 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 13 Jul 2015 12:33:42 -0700
Subject: [PATCH 0739/1541] Add encoding id to Coder

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98142232
---
 .../dataflow/sdk/coders/AtomicCoder.java      | 10 ++-
 .../cloud/dataflow/sdk/coders/AvroCoder.java  | 45 ++++++++--
 .../sdk/coders/BigEndianIntegerCoder.java     |  8 +-
 .../sdk/coders/BigEndianLongCoder.java        |  5 +-
 .../dataflow/sdk/coders/ByteArrayCoder.java   |  4 +-
 .../cloud/dataflow/sdk/coders/Coder.java      | 21 +++++
 .../dataflow/sdk/coders/CoderRegistry.java    |  2 -
 .../dataflow/sdk/coders/CollectionCoder.java  |  3 +-
 .../dataflow/sdk/coders/CustomCoder.java      | 21 ++++-
 .../dataflow/sdk/coders/DelegateCoder.java    | 37 +++++++++
 .../dataflow/sdk/coders/DoubleCoder.java      |  7 +-
 .../dataflow/sdk/coders/EntityCoder.java      |  3 +-
 .../dataflow/sdk/coders/InstantCoder.java     |  4 +-
 .../dataflow/sdk/coders/IterableCoder.java    |  3 +-
 .../sdk/coders/IterableLikeCoder.java         |  2 +-
 .../cloud/dataflow/sdk/coders/JAXBCoder.java  |  7 ++
 .../dataflow/sdk/coders/KvCoderBase.java      | 10 ++-
 .../cloud/dataflow/sdk/coders/ListCoder.java  |  3 +-
 .../dataflow/sdk/coders/Proto2Coder.java      | 53 ++++++++++--
 .../sdk/coders/SerializableCoder.java         | 14 +++-
 .../cloud/dataflow/sdk/coders/SetCoder.java   |  3 +-
 .../dataflow/sdk/coders/StandardCoder.java    | 44 ++++++++++
 .../sdk/coders/StringDelegateCoder.java       | 13 +++
 .../sdk/coders/TextualIntegerCoder.java       |  7 +-
 .../dataflow/sdk/coders/VarIntCoder.java      |  5 +-
 .../dataflow/sdk/coders/VarLongCoder.java     |  8 +-
 .../dataflow/sdk/testing/CoderProperties.java | 22 +++++
 .../cloud/dataflow/sdk/util/CoderUtils.java   |  1 +
 .../dataflow/sdk/util/PropertyNames.java      |  2 +
 .../dataflow/sdk/coders/AvroCoderTest.java    | 10 ++-
 .../sdk/coders/BigEndianIntegerCoderTest.java | 13 ++-
 .../sdk/coders/BigEndianLongCoderTest.java    | 13 ++-
 .../sdk/coders/ByteArrayCoderTest.java        | 26 +++---
 .../sdk/coders/CoderRegistryTest.java         | 12 ++-
 .../sdk/coders/CollectionCoderTest.java       | 13 ++-
 .../sdk/coders/DelegateCoderTest.java         | 83 +++++++++++++++++--
 .../dataflow/sdk/coders/DoubleCoderTest.java  | 58 +++++++++++++
 .../dataflow/sdk/coders/EntityCoderTest.java  | 13 ++-
 .../dataflow/sdk/coders/InstantCoderTest.java | 15 +++-
 .../sdk/coders/IterableCoderTest.java         | 20 ++++-
 .../dataflow/sdk/coders/JAXBCoderTest.java    | 14 +++-
 .../dataflow/sdk/coders/KvCoderTest.java      | 10 +++
 .../dataflow/sdk/coders/ListCoderTest.java    | 20 ++++-
 .../dataflow/sdk/coders/MapCoderTest.java     | 15 +++-
 .../dataflow/sdk/coders/Proto2CoderTest.java  | 10 +++
 .../sdk/coders/SerializableCoderTest.java     | 11 +++
 .../dataflow/sdk/coders/SetCoderTest.java     | 13 ++-
 .../sdk/coders/StringDelegateCoderTest.java   |  7 +-
 .../sdk/coders/TableRowJsonCoderTest.java     | 13 ++-
 .../sdk/coders/TextualIntegerCoderTest.java   | 13 ++-
 .../dataflow/sdk/coders/VarIntCoderTest.java  | 13 ++-
 .../dataflow/sdk/coders/VarLongCoderTest.java | 13 ++-
 52 files changed, 688 insertions(+), 107 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DoubleCoderTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
index fc9e382efcbd7..f40d1595dbb09 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
@@ -20,11 +20,10 @@
 import java.util.List;
 
 /**
- * An {@code AtomicCoder} is a {@link Coder} that has no component
- * {@link Coder}s or other state.
+ * An {@code AtomicCoder} is a {@link Coder} that has no component {@link Coder Coders} or other
+ * state.
  *
- * <p> Note that, unless the behavior is overriden,
- * atomic coders are presumed to be deterministic
+ * <p> Note that, unless the behavior is overridden, atomic coders are presumed to be deterministic
  * and all instances are considered equal.
  *
  * @param <T> the type of the values being transcoded
@@ -43,6 +42,9 @@ public List<Coder<?>> getCoderArguments() {
    * Returns a list of values contained in the provided example
    * value, one per type parameter. If there are no type parameters,
    * returns the empty list.
+   *
+   * @param exampleValue unused, but part of the latent interface expected by
+   * {@link CoderFactories#fromStaticMethods}
    */
   public static <T> List<Object> getInstanceComponents(T exampleValue) {
     return Collections.emptyList();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
index e0cb614c81cdf..1049698755716 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -18,7 +18,6 @@
 
 import static com.google.cloud.dataflow.sdk.util.Structs.addString;
 
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
@@ -103,13 +102,14 @@
  * The implementation attempts to determine if the Avro encoding of the given type will satisfy
  * the criteria of {@link Coder#verifyDeterministic} by inspecting both the type and the
  * Schema provided or generated by Avro. Only coders that are deterministic can be used in
- * {@link GroupByKey} operations.
+ * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} operations.
  *
  * @param <T> the type of elements handled by this coder
  */
-@SuppressWarnings("serial")
 public class AvroCoder<T> extends StandardCoder<T> {
 
+  private static final long serialVersionUID = 0L;
+
   /**
    * Returns an {@code AvroCoder} instance for the provided element class.
    * @param <T> the element type
@@ -181,6 +181,39 @@ protected AvroCoder(Class<T> type, Schema schema) {
     this.writer = createDatumWriter();
   }
 
+  /**
+   * The encoding identifier is designed to support evolution as per the design of Avro
+   * In order to use this class effectively, carefully read the Avro
+   * documentation at
+   * <a href="https://avro.apache.org/docs/1.7.7/spec.html#Schema+Resolution">Schema Resolution</a>
+   * to ensure that the old and new schema <i>match</i>.
+   *
+   * <p>In particular, this encoding identifier is guaranteed to be the same for {@code AvroCoder}
+   * instances of the same principal class, and otherwise distinct. The schema is not included
+   * in the identifier.
+   *
+   * <p>When modifying a class to be encoded as Avro, here are some guidelines; see the above link
+   * for greater detail.
+   *
+   * <ul>
+   * <li>Avoid changing field names.
+   * <li>Never remove a <code>required</code> field.
+   * <li>Only add <code>optional</code> fields, with sensible defaults.
+   * <li>When changing the type of a field, consult the Avro documentation to ensure the new and
+   * old types are interchangeable.
+   * </ul>
+   *
+   * <p>Code consuming this message class should be prepared to support <i>all</i> versions of
+   * the class until it is certain that no remaining serialized instances exist.
+   *
+   * <p>If backwards incompatible changes must be made, the best recourse is to change the name
+   * of your class.
+   */
+  @Override
+  public String getEncodingId() {
+    return type.getName();
+  }
+
   private Object writeReplace() {
     // When serialized by Java, instances of AvroCoder should be replaced by
     // a SerializedAvroCoderProxy.
@@ -262,6 +295,8 @@ public Schema getSchema() {
    * to remain final.
    */
   private static class SerializedAvroCoderProxy<T> implements Serializable {
+    private static final long serialVersionUID = 0L;
+
     private final Class<T> type;
     private final String schemaStr;
 
@@ -394,7 +429,7 @@ private void doCheck(String context, TypeDescriptor<?> type, Schema schema) {
           checkMap(context, type, schema);
           break;
         case RECORD:
-          checkRecord(context, type, schema);
+          checkRecord(type, schema);
           break;
         case UNION:
           checkUnion(context, type, schema);
@@ -447,7 +482,7 @@ private void checkUnion(String context, TypeDescriptor<?> type, Schema schema) {
       }
     }
 
-    private void checkRecord(String context, TypeDescriptor<?> type, Schema schema) {
+    private void checkRecord(TypeDescriptor<?> type, Schema schema) {
       // For a record, we want to make sure that all the fields are deterministic.
       Class<?> clazz = type.getRawType();
       for (org.apache.avro.Schema.Field fieldSchema : schema.getFields()) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
index 39df9b584649c..ae4c316cd4ba3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
@@ -29,18 +29,18 @@
 /**
  * A {@code BigEndianIntegerCoder} encodes {@code Integer}s in 4 bytes, big-endian.
  */
-@SuppressWarnings("serial")
 public class BigEndianIntegerCoder extends AtomicCoder<Integer> {
+
+  private static final long serialVersionUID = 0L;
+
   @JsonCreator
   public static BigEndianIntegerCoder of() {
     return INSTANCE;
   }
 
-
   /////////////////////////////////////////////////////////////////////////////
 
-  private static final BigEndianIntegerCoder INSTANCE =
-      new BigEndianIntegerCoder();
+  private static final BigEndianIntegerCoder INSTANCE = new BigEndianIntegerCoder();
 
   private BigEndianIntegerCoder() {}
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
index 9d9f89f23686e..ecba95fbec01d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
@@ -29,14 +29,15 @@
 /**
  * A {@code BigEndianLongCoder} encodes {@code Long}s in 8 bytes, big-endian.
  */
-@SuppressWarnings("serial")
 public class BigEndianLongCoder extends AtomicCoder<Long> {
+
+  private static final long serialVersionUID = 0L;
+
   @JsonCreator
   public static BigEndianLongCoder of() {
     return INSTANCE;
   }
 
-
   /////////////////////////////////////////////////////////////////////////////
 
   private static final BigEndianLongCoder INSTANCE = new BigEndianLongCoder();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
index 9010c236d7c27..12d0a23c7c9a6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
@@ -34,8 +34,10 @@
  * <p> If in a nested context, prefixes the encoded array with its
  * length, encoded via a {@link VarIntCoder}.
  */
-@SuppressWarnings("serial")
 public class ByteArrayCoder extends AtomicCoder<byte[]> {
+
+  private static final long serialVersionUID = 0L;
+
   @JsonCreator
   public static ByteArrayCoder of() {
     return INSTANCE;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
index 52126b86e5a7c..7fe0c27955ce3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
@@ -26,6 +26,7 @@
 import java.io.OutputStream;
 import java.io.Serializable;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.List;
 
 import javax.annotation.Nullable;
@@ -188,6 +189,26 @@ public void registerByteSizeObserver(
       T value, ElementByteSizeObserver observer, Context context)
       throws Exception;
 
+  /**
+   * An identifier for the binary format written by {@link #encode}.
+   *
+   * <p>This value, along with the fully qualified class name, forms an identifier for the
+   * binary format of this coder. If a {@code Coder} implementation is modified to write a new
+   * backwards-incompatible format, it is imperative that this method be correspondingly modified to
+   * return a distinct value.
+   *
+   * @see #getAllowedEncodings()
+   */
+  public String getEncodingId();
+
+  /**
+   * A collection of encodings supported by {@link #decode} in addition to the encoding
+   * from {@link #getEncodingId()} (which is assumed supported).
+   *
+   * @see #getEncodingId()
+   */
+  public Collection<String> getAllowedEncodings();
+
   /**
    * Exception thrown by {@link Coder#verifyDeterministic()} if the encoding is
    * not deterministic.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index ca587c69933bd..b371752329158 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -452,7 +452,6 @@ private <T> Coder<?>[] getDefaultCoders(
     // type parameters are not compatible, then the whole thing is certainly not
     // compatible.
     if ((codedType instanceof ParameterizedType) && !isNullOrEmpty(coder.getCoderArguments())) {
-      @SuppressWarnings("unchecked")
       Type[] typeArguments =
           ((ParameterizedType)
            candidateOkDescriptor.getSupertype(codedClass).getType())
@@ -534,7 +533,6 @@ Coder<?> getDefaultCoder(Type type, Map<Type, Coder<?>> typeCoderBindings)
       return coder;
     }
     if (type instanceof Class<?>) {
-      @SuppressWarnings("unchecked")
       Class<?> clazz = (Class<?>) type;
       return getDefaultCoder(clazz);
     } else if (type instanceof ParameterizedType) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
index f2600f3670728..342a44f182e25 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
@@ -30,9 +30,10 @@
  *
  * @param <T> the type of the elements of the Collections being transcoded
  */
-@SuppressWarnings("serial")
 public class CollectionCoder<T> extends IterableLikeCoder<T, Collection<T>> {
 
+  private static final long serialVersionUID = 0L;
+
   public static <T> CollectionCoder<T> of(Coder<T> elemCoder) {
     return new CollectionCoder<>(elemCoder);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
index ab968ed64311c..1bd8aa1dea754 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
@@ -17,23 +17,28 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+import static com.google.cloud.dataflow.sdk.util.Structs.addStringList;
+import static com.google.common.base.Preconditions.checkNotNull;
 
 import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
+import com.google.common.collect.Lists;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
 
 import java.io.Serializable;
+import java.util.Collection;
 
 /**
  * An abstract base class for writing {@link Coder}s that encodes itself via java
  * serialization.  Subclasses only need to implement the {@link Coder#encode}
  * and {@link Coder#decode} methods.
  *
- * <p>
- * Not to be confused with {@link SerializableCoder} that encodes serializables.
+ * <p>Not to be confused with {@link SerializableCoder} that encodes objects that implement the
+ * {@link Serializable} interface.
  *
  * @param <T> the type of elements handled by this coder
  */
@@ -70,6 +75,18 @@ public CloudObject asCloudObject() {
     addString(result, "serialized_coder",
         StringUtils.byteArrayToJsonString(
             SerializableUtils.serializeToByteArray(this)));
+
+    String encodingId = getEncodingId();
+    checkNotNull(encodingId, "Coder.getEncodingId() must not return null.");
+    if (!encodingId.isEmpty()) {
+      addString(result, PropertyNames.ENCODING_ID, encodingId);
+    }
+
+    Collection<String> allowedEncodings = getAllowedEncodings();
+    if (!allowedEncodings.isEmpty()) {
+      addStringList(result, PropertyNames.ALLOWED_ENCODINGS, Lists.newArrayList(allowedEncodings));
+    }
+
     return result;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
index df4cf8b9c080f..35fd9821042b6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
@@ -16,10 +16,14 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.common.collect.Lists;
+
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.Serializable;
+import java.util.Collection;
+import java.util.List;
 
 /**
  * A {@code DelegateCoder<T, IntermediateT>} wraps a {@link Coder} for {@code IntermediateT} and
@@ -66,6 +70,14 @@ public T decode(InputStream inStream, Context context) throws CoderException, IO
     return applyAndWrapExceptions(fromFn, coder.decode(inStream, context));
   }
 
+  /**
+   * Returns the coder used to encode/decode the intermediate values produced/consumed by the
+   * coding functions of this {@code DelegateCoder}.
+   */
+  public Coder<IntermediateT> getCoder() {
+    return coder;
+  }
+
   /**
    * A delegate coder is deterministic if the underlying coder is deterministic.
    * For this to be safe, the intermediate {@code CodingFunction<T, IntermediateT>} must
@@ -86,6 +98,31 @@ public String toString() {
     return "DelegateCoder(" + coder + ")";
   }
 
+  /**
+   * The encoding id for the binary format of the delegate coder is a combination of the underlying
+   * coder class and its encoding id.
+   *
+   * <p>Note that this omits any description of the coding functions. These should be modified with
+   * care.
+   */
+  @Override
+  public String getEncodingId() {
+    return delegateEncodingId(coder.getClass(), coder.getEncodingId());
+  }
+
+  @Override
+  public Collection<String> getAllowedEncodings() {
+    List<String> allowedEncodings = Lists.newArrayList();
+    for (String allowedEncoding : coder.getAllowedEncodings()) {
+      allowedEncodings.add(delegateEncodingId(coder.getClass(), allowedEncoding));
+    }
+    return allowedEncodings;
+  }
+
+  private String delegateEncodingId(Class<?> delegateClass, String encodingId) {
+    return String.format("%s:%s", delegateClass.getName(), encodingId);
+  }
+
   /////////////////////////////////////////////////////////////////////////////
 
   private <InputT, OutputT> OutputT applyAndWrapExceptions(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
index 1a3655ca6dd5d..46fa335759dd1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
@@ -27,16 +27,17 @@
 import java.io.UTFDataFormatException;
 
 /**
- * A {@code DoubleCoder} encodes {@code Doubles} in 8 bytes.
+ * A {@code DoubleCoder} encodes {@code Double} values in 8 bytes using Java serialization.
  */
-@SuppressWarnings("serial")
 public class DoubleCoder extends AtomicCoder<Double> {
+
+  private static final long serialVersionUID = 0L;
+
   @JsonCreator
   public static DoubleCoder of() {
     return INSTANCE;
   }
 
-
   /////////////////////////////////////////////////////////////////////////////
 
   private static final DoubleCoder INSTANCE = new DoubleCoder();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
index 5146f86694b77..05e1a433ca96d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
@@ -27,9 +27,10 @@
 /**
  * An {@code EntityCoder} is a {@link Coder} for {@link Entity} objects.
  */
-@SuppressWarnings("serial")
 public class EntityCoder extends AtomicCoder<Entity> {
 
+  private static final long serialVersionUID = 0L;
+
   @JsonCreator
   public static EntityCoder of() {
     return INSTANCE;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
index 152163b8362bd..cb402621f8402 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
@@ -30,8 +30,10 @@
 /**
  * A {@code InstantCoder} is a {@link Coder} for a joda {@link Instant}.
  */
-@SuppressWarnings("serial")
 public class InstantCoder extends AtomicCoder<Instant> {
+
+  private static final long serialVersionUID = 0L;
+
   @JsonCreator
   public static InstantCoder of() {
     return INSTANCE;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
index e5a27abb16581..b7e7a5432d88a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
@@ -34,9 +34,10 @@
  *
  * @param <T> the type of the elements of the Iterables being transcoded
  */
-@SuppressWarnings("serial")
 public class IterableCoder<T> extends IterableLikeCoder<T, Iterable<T>> {
 
+  private static final long serialVersionUID = 0L;
+
   public static <T> IterableCoder<T> of(Coder<T> elemCoder) {
     return new IterableCoder<>(elemCoder);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
index 5569251dc74ab..2da072e7fa740 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
@@ -175,7 +175,7 @@ public void registerByteSizeObserver(
     if (iterable instanceof ElementByteSizeObservableIterable) {
       observer.setLazy();
       ElementByteSizeObservableIterable<?, ?> observableIterable =
-          (ElementByteSizeObservableIterable) iterable;
+          (ElementByteSizeObservableIterable<?, ?>) iterable;
       observableIterable.addObserver(
           new IteratorObserver(observer, iterable instanceof Collection));
     } else {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java
index 667eda2819eb6..b2c5c0546d92c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java
@@ -36,7 +36,9 @@
  * @param <T> type of JAXB annotated objects that will be serialized.
  */
 public class JAXBCoder<T> extends AtomicCoder<T> {
+
   private static final long serialVersionUID = 0L;
+
   private final Class<T> jaxbClass;
   private transient Marshaller jaxbMarshaller = null;
   private transient Unmarshaller jaxbUnmarshaller = null;
@@ -89,6 +91,11 @@ public T decode(InputStream inStream, Context context) throws CoderException, IO
     }
   }
 
+  @Override
+  public String getEncodingId() {
+    return getJAXBClass().getName();
+  }
+
   ////////////////////////////////////////////////////////////////////////////////////
   // JSON Serialization details below
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java
index f418e574c592b..46e026252b0af 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java
@@ -28,11 +28,18 @@
  * {@link KvCoder} directly (as of this writing, Jackson2 walks off the end of
  * an array when it tries to deserialize a class with multiple generic type
  * parameters).  This class should be removed when possible.
+ *
  * @param <T> the type of values being transcoded
  */
 public abstract class KvCoderBase<T> extends StandardCoder<T> {
   private static final long serialVersionUID = 0;
 
+  /**
+   * A constructor used only for decoding from JSON.
+   *
+   * @param typeId present in the JSON encoding, but unused
+   * @param isPairLike present in the JSON encoding, but unused
+   */
   @JsonCreator
   public static KvCoderBase<?> of(
       // N.B. typeId is a required parameter here, since a field named "@type"
@@ -46,8 +53,7 @@ public static KvCoderBase<?> of(
       // deserialization to fail.
       @JsonProperty(value = "@type", required = false) String typeId,
       @JsonProperty(value = PropertyNames.IS_PAIR_LIKE, required = false) boolean isPairLike,
-      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-      List<Coder<?>> components) {
+      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS) List<Coder<?>> components) {
     return KvCoder.of(components);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
index 3f004c08ea72e..198854e93c0a3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
@@ -29,9 +29,10 @@
  *
  * @param <T> the type of the elements of the Lists being transcoded
  */
-@SuppressWarnings("serial")
 public class ListCoder<T> extends IterableLikeCoder<T, List<T>> {
 
+  private static final long serialVersionUID = 0L;
+
   public static <T> ListCoder<T> of(Coder<T> elemCoder) {
     return new ListCoder<>(elemCoder);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
index d021af08db8a0..2ff1936ce7d75 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
@@ -40,11 +40,12 @@
 
 /**
  * An encoder using Google Protocol Buffers 2 binary format.
- * <p>
- * To learn more about Protocol Buffers, visit:
+ *
+ * <p>To learn more about Protocol Buffers, visit:
  * <a href="https://developers.google.com/protocol-buffers">https://developers.google.com/protocol-buffers</a>
- * <p>
- * To use, specify the {@code Coder} type on a PCollection:
+ *
+ * <p>To use, specify the {@code Coder} type on a PCollection:
+ *
  * <pre>
  * {@code
  * PCollection<MyProto.Message> records =
@@ -52,9 +53,10 @@
  *          .setCoder(Proto2Coder.of(MyProto.Message.class));
  * }
  * </pre>
- * <p>
- * Custom message extensions are also supported, but the coder must be made
+ *
+ * <p>Custom message extensions are also supported, but the coder must be made
  * aware of them explicitly:
+ *
  * <pre>
  * {@code
  * PCollection<MyProto.Message> records =
@@ -67,6 +69,7 @@
  * @param <T> the type of elements handled by this coder, must extend {@code Message}
  */
 public class Proto2Coder<T extends Message> extends AtomicCoder<T> {
+
   private static final long serialVersionUID = 0;
 
   /** The class of Protobuf message to be encoded. */
@@ -175,9 +178,9 @@ public void encode(T value, OutputStream outStream, Context context) throws IOEx
   @Override
   public T decode(InputStream inStream, Context context) throws IOException {
     if (context.isWholeStream) {
-      return (T) getParser().parseFrom(inStream, getExtensionRegistry());
+      return getParser().parseFrom(inStream, getExtensionRegistry());
     } else {
-      return (T) getParser().parseDelimitedFrom(inStream, getExtensionRegistry());
+      return getParser().parseDelimitedFrom(inStream, getExtensionRegistry());
     }
   }
 
@@ -189,7 +192,6 @@ public boolean equals(Object other) {
     if (!(other instanceof Proto2Coder)) {
       return false;
     }
-    @SuppressWarnings("unchecked")
     Proto2Coder<?> otherCoder = (Proto2Coder<?>) other;
     return protoMessageClass.equals(otherCoder.protoMessageClass)
         && Sets.newHashSet(extensionHostClasses)
@@ -201,6 +203,39 @@ public int hashCode() {
     return Objects.hash(protoMessageClass, extensionHostClasses);
   }
 
+  /**
+   * The encoding identifier is designed to support evolution as per the design of Protocol
+   * Buffers. In order to use this class effectively, carefully follow the advice in the Protocol
+   * Buffers documentation at
+   * <a href="https://developers.google.com/protocol-buffers/docs/proto#updating">Updating
+   * A Message Type</a>.
+   *
+   * <p>In particular, the encoding identifier is guaranteed to be the same for {@code Proto2Coder}
+   * instances of the same principal message class, and otherwise distinct. Loaded extensions do not
+   * affect the id, nor does it encode the full schema.
+   *
+   * <p>When modifying a message class, here are the broadest guidelines; see the above link
+   * for greater detail.
+   *
+   * <ul>
+   * <li>Do not change the numeric tags for any fields.
+   * <li>Never remove a <code>required</code> field.
+   * <li>Only add <code>optional</code> or <code>repeated</code> fields, with sensible defaults.
+   * <li>When changing the type of a field, consult the Protocol Buffers documentation to ensure
+   * the new and old types are interchangeable.
+   * </ul>
+   *
+   * <p>Code consuming this message class should be prepared to support <i>all</i> versions of
+   * the class until it is certain that no remaining serialized instances exist.
+   *
+   * <p>If backwards incompatible changes must be made, the best recourse is to change the name
+   * of your Protocol Buffers message class.
+   */
+  @Override
+  public String getEncodingId() {
+    return protoMessageClass.getName();
+  }
+
   private transient Parser<T> memoizedParser;
 
   private Parser<T> getParser() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
index 3bdf776186b96..cf4b00a614a16 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
@@ -26,6 +26,7 @@
 import java.io.InputStream;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
+import java.io.ObjectStreamClass;
 import java.io.OutputStream;
 import java.io.Serializable;
 
@@ -47,9 +48,9 @@
  *
  * @param <T> the type of elements handled by this coder
  */
-@SuppressWarnings("serial")
-public class SerializableCoder<T extends Serializable>
-    extends AtomicCoder<T> {
+public class SerializableCoder<T extends Serializable> extends AtomicCoder<T> {
+
+  private static final long serialVersionUID = 0L;
 
   /**
    * Returns a {@code SerializableCoder} instance for the provided element class.
@@ -126,6 +127,13 @@ public T decode(InputStream inStream, Context context)
     }
   }
 
+  @Override
+  public String getEncodingId() {
+    return String.format("%s:%s",
+        type.getName(),
+        ObjectStreamClass.lookup(type).getSerialVersionUID());
+  }
+
   @Override
   public CloudObject asCloudObject() {
     CloudObject result = super.asCloudObject();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
index f76849f4dd0f1..c68706705acf7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
@@ -34,9 +34,10 @@
  *
  * @param <T> the type of the elements of the set
  */
-@SuppressWarnings("serial")
 public class SetCoder<T> extends IterableLikeCoder<T, Set<T>> {
 
+  private static final long serialVersionUID = 0L;
+
   /**
    * Produces a SetCoder with the given elementCoder.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
index 222b783d13f18..a4545154c151b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
@@ -17,17 +17,22 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import static com.google.cloud.dataflow.sdk.util.Structs.addList;
+import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+import static com.google.cloud.dataflow.sdk.util.Structs.addStringList;
+import static com.google.common.base.Preconditions.checkNotNull;
 
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.StructuralByteArray;
+import com.google.common.collect.Lists;
 import com.google.common.io.ByteStreams;
 import com.google.common.io.CountingOutputStream;
 
 import java.io.ByteArrayOutputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 
@@ -43,6 +48,34 @@ public abstract class StandardCoder<T> implements Coder<T> {
 
   protected StandardCoder() {}
 
+  /**
+   * A unique identifier for the binary format written by {@link #encode}. If a subclass is
+   * modified to write a new format, it is imperative that this method be overridden to return
+   * a distinct value.
+   *
+   * <p>By default, this is an empty string so only the fully qualified name of the class is
+   * used.
+   *
+   * @see #getAllowedEncodings()
+   */
+  @Override
+  public String getEncodingId() {
+    return "";
+  }
+
+  /**
+   * A collection of encodings supported by {@link #decode} in addition to the value of
+   * {@link #getEncodingId()}.
+   *
+   * <p>By default, this is empty.
+   *
+   *@see #getEncodingId()
+   */
+  @Override
+  public Collection<String> getAllowedEncodings() {
+    return Collections.emptyList();
+  }
+
   /**
    * Returns the list of {@code Coder}s that are components of this
    * {@code Coder}.  Returns an empty list if this is an {@link AtomicCoder} (or
@@ -105,6 +138,17 @@ public CloudObject asCloudObject() {
       addList(result, PropertyNames.COMPONENT_ENCODINGS, cloudComponents);
     }
 
+    String encodingId = getEncodingId();
+    checkNotNull(encodingId, "Coder.getEncodingId() must not return null.");
+    if (!encodingId.isEmpty()) {
+      addString(result, PropertyNames.ENCODING_ID, encodingId);
+    }
+
+    Collection<String> allowedEncodings = getAllowedEncodings();
+    if (!allowedEncodings.isEmpty()) {
+      addStringList(result, PropertyNames.ALLOWED_ENCODINGS, Lists.newArrayList(allowedEncodings));
+    }
+
     return result;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
index 139dfba83a6d2..4746a7799a66f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
@@ -33,6 +33,11 @@
  * it must be the case for any instance {@code x} that
  * {@code x.equals(new Clazz(x.toString()))}.
  *
+ * <p> This method of encoding is not designed for ease of evolution of {@code Clazz};
+ * it should only be used in cases where the class is stable or the encoding is not
+ * important. If evolution of the class is important, see {@link Proto2Coder}, {@link AvroCoder},
+ * or {@link JAXBCoder}.
+ *
  * @param <T> The type of objects coded.
  */
 public class StringDelegateCoder<T> extends DelegateCoder<T, String> {
@@ -74,4 +79,12 @@ public T apply(String input) throws
 
     this.clazz = clazz;
   }
+
+  /**
+   * The encoding id is the fully qualified name of the encoded/decoded class.
+   */
+  @Override
+  public String getEncodingId() {
+    return clazz.getName();
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
index 1b8452e305b88..772d23791dacc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
@@ -26,9 +26,10 @@
  * A {@code TextualIntegerCoder} encodes {@code Integer}s
  * as their textual, decimal, representation.
  */
-@SuppressWarnings("serial")
 public class TextualIntegerCoder extends AtomicCoder<Integer> {
 
+  private static final long serialVersionUID = 0L;
+
   @JsonCreator
   public static TextualIntegerCoder of() {
     return new TextualIntegerCoder();
@@ -62,8 +63,8 @@ public Integer decode(InputStream inStream, Context context)
   @Override
   public void verifyDeterministic() { }
 
-  protected long getEncodedElementByteSize(Integer value, Context context)
-      throws Exception {
+  @Override
+  protected long getEncodedElementByteSize(Integer value, Context context) throws Exception {
     if (value == null) {
       throw new CoderException("cannot encode a null Integer");
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
index 5ba363300a9f1..555dbbac8d1b8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
@@ -29,11 +29,12 @@
 /**
  * A {@code VarIntCoder} encodes {@code Integer}s using between 1 and 5 bytes. Negative
  * numbers always take 5 bytes, so {@link BigEndianIntegerCoder} may be preferable for
- * ints that are known to often be large or negative.
+ * integers that are known to often be large or negative.
  */
-@SuppressWarnings("serial")
 public class VarIntCoder extends AtomicCoder<Integer> {
 
+  private static final long serialVersionUID = 0L;
+
   @JsonCreator
   public static VarIntCoder of() {
     return INSTANCE;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
index 2d598468986b2..624334e3b0bd9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
@@ -31,18 +31,18 @@
  * numbers always take 10 bytes, so {@link BigEndianLongCoder} may be preferable for
  * longs that are known to often be large or negative.
  */
-@SuppressWarnings("serial")
 public class VarLongCoder extends AtomicCoder<Long> {
+
+  private static final long serialVersionUID = 0L;
+
   @JsonCreator
   public static VarLongCoder of() {
     return INSTANCE;
   }
 
-
   /////////////////////////////////////////////////////////////////////////////
 
-  private static final VarLongCoder INSTANCE =
-      new VarLongCoder();
+  private static final VarLongCoder INSTANCE = new VarLongCoder();
 
   private VarLongCoder() {}
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
index 9b6aa4ebae6a0..886b4563e82a7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
@@ -20,6 +20,7 @@
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.emptyIterable;
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.hasItem;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.fail;
@@ -27,8 +28,10 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.Serializer;
+import com.google.cloud.dataflow.sdk.util.Structs;
 import com.google.common.collect.Iterables;
 
 import java.io.ByteArrayInputStream;
@@ -36,6 +39,7 @@
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.List;
 
 /**
@@ -199,6 +203,24 @@ public static <T> void coderConsistentWithEqualsInContext(
             encode(coder, context, value2)));
   }
 
+  public static <T> void coderHasEncodingId(Coder<T> coder, String encodingId) throws Exception {
+    assertThat(coder.getEncodingId(), equalTo(encodingId));
+    assertThat(Structs.getString(coder.asCloudObject(), PropertyNames.ENCODING_ID, ""),
+        equalTo(encodingId));
+  }
+
+  public static <T> void coderAllowsEncoding(Coder<T> coder, String encodingId) throws Exception {
+    assertThat(coder.getAllowedEncodings(), hasItem(encodingId));
+    assertThat(
+        String.format("Expected to find \"%s\" in property \"%s\" of %s",
+            encodingId, PropertyNames.ALLOWED_ENCODINGS, coder.asCloudObject()),
+        Structs.getStrings(
+            coder.asCloudObject(),
+            PropertyNames.ALLOWED_ENCODINGS,
+            Collections.<String>emptyList()),
+        hasItem(encodingId));
+  }
+
   public static <T> void structuralValueConsistentWithEquals(
       Coder<T> coder, T value1, T value2)
       throws Exception {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
index c960832cda942..4e55be270a54f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
@@ -198,6 +198,7 @@ static final class Jackson2Module extends SimpleModule {
      * if there are no "."s in the ID.
      */
     private static final class Resolver extends TypeIdResolverBase {
+      @SuppressWarnings("unused") // Used via @JsonTypeIdResolver annotation on Mixin
       public Resolver() {
         super(TypeFactory.defaultInstance().constructType(Coder.class),
             TypeFactory.defaultInstance());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
index 59b24009937fa..6fbb6bdefa132 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
@@ -20,6 +20,7 @@
  * Constant property names used by the SDK in CloudWorkflow specifications.
  */
 public class PropertyNames {
+  public static final String ALLOWED_ENCODINGS = "allowed_encodings";
   public static final String APPEND_TRAILING_NEWLINES = "append_trailing_newlines";
   public static final String BIGQUERY_CREATE_DISPOSITION = "create_disposition";
   public static final String BIGQUERY_DATASET = "dataset";
@@ -45,6 +46,7 @@ public class PropertyNames {
   public static final String ELEMENT = "element";
   public static final String ELEMENTS = "elements";
   public static final String ENCODING = "encoding";
+  public static final String ENCODING_ID = "encoding_id";
   public static final String END_INDEX = "end_index";
   public static final String END_OFFSET = "end_offset";
   public static final String END_SHUFFLE_POSITION = "end_shuffle_position";
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
index fcfd3ff98b575..8664c16c88730 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
@@ -140,7 +140,7 @@ public void testAvroCoderEncoding() throws Exception {
     CloudObject encoding = coder.asCloudObject();
 
     Assert.assertThat(encoding.keySet(),
-        Matchers.containsInAnyOrder("@type", "type", "schema"));
+        Matchers.containsInAnyOrder("@type", "type", "schema", "encoding_id"));
   }
 
   @Test
@@ -151,6 +151,12 @@ public void testPojoEncoding() throws Exception {
     CoderProperties.coderDecodeEncodeEqual(coder, value);
   }
 
+  @Test
+  public void testPojoEncodingId() throws Exception {
+    AvroCoder<Pojo> coder = AvroCoder.of(Pojo.class);
+    CoderProperties.coderHasEncodingId(coder, Pojo.class.getName());
+  }
+
   @Test
   public void testGenericRecordEncoding() throws Exception {
     String schemaString =
@@ -530,7 +536,6 @@ public void testDeterminismHasGenericRecord() {
   private static class HasGenericRecord {
     @AvroSchema("{\"name\": \"bar\", \"type\": \"record\", \"fields\": ["
         + "{\"name\": \"foo\", \"type\": \"int\"}]}")
-    @SuppressWarnings("unused")
     GenericRecord genericRecord;
   }
 
@@ -544,7 +549,6 @@ public void testDeterminismHasCustomSchema() {
   private static class HasCustomSchema {
     @AvroSchema("{\"name\": \"bar\", \"type\": \"record\", \"fields\": ["
         + "{\"name\": \"foo\", \"type\": \"int\"}]}")
-    @SuppressWarnings("unused")
     int withCustomSchema;
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java
index f059aadaa82de..407297390285b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java
@@ -31,6 +31,8 @@
 @RunWith(JUnit4.class)
 public class BigEndianIntegerCoderTest {
 
+  private static final Coder<Integer> TEST_CODER = BigEndianIntegerCoder.of();
+
   private static final List<Integer> TEST_VALUES = Arrays.asList(
       -11, -3, -1, 0, 1, 5, 13, 29,
       Integer.MAX_VALUE,
@@ -38,9 +40,16 @@ public class BigEndianIntegerCoderTest {
 
   @Test
   public void testDecodeEncodeEqual() throws Exception {
-    Coder<Integer> coder = BigEndianIntegerCoder.of();
     for (Integer value : TEST_VALUES) {
-      CoderProperties.coderDecodeEncodeEqual(coder, value);
+      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
     }
   }
+
+  // This should never change. The definition of big endian encoding is fixed.
+  private static final String EXPECTED_ENCODING_ID = "";
+
+  @Test
+  public void testEncodingId() throws Exception {
+    CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java
index 54553de30d445..e4784e0ab3f9c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java
@@ -31,6 +31,8 @@
 @RunWith(JUnit4.class)
 public class BigEndianLongCoderTest {
 
+  private static final Coder<Long> TEST_CODER = BigEndianLongCoder.of();
+
   private static final List<Long> TEST_VALUES = Arrays.asList(
       -11L, -3L, -1L, 0L, 1L, 5L, 13L, 29L,
       Integer.MAX_VALUE + 131L,
@@ -40,9 +42,16 @@ public class BigEndianLongCoderTest {
 
   @Test
   public void testDecodeEncodeEqual() throws Exception {
-    Coder<Long> coder = BigEndianLongCoder.of();
     for (Long value : TEST_VALUES) {
-      CoderProperties.coderDecodeEncodeEqual(coder, value);
+      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
     }
   }
+
+  // This should never change. The definition of big endian is fixed.
+  private static final String EXPECTED_ENCODING_ID = "";
+
+  @Test
+  public void testEncodingId() throws Exception {
+    CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
index 306a3ab4b14cf..60f5c8dc73361 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
@@ -37,14 +37,15 @@
 @RunWith(JUnit4.class)
 public class ByteArrayCoderTest {
 
+  private static final ByteArrayCoder TEST_CODER = ByteArrayCoder.of();
+
   private static final byte[][] TEST_VALUES = {
     {0xa, 0xb, 0xc}, {}, {}, {0xd, 0xe}, {0xd, 0xe}, {}};
 
   @Test
   public void testDecodeEncodeEquals() throws Exception {
-    ByteArrayCoder coder = ByteArrayCoder.of();
     for (byte[] value : TEST_VALUES) {
-      CoderProperties.coderDecodeEncodeEqual(coder, value);
+      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
     }
   }
 
@@ -59,12 +60,11 @@ public void testRegisterByteSizeObserver() throws Exception {
 
   @Test
   public void testStructuralValueConsistentWithEquals() throws Exception {
-    ByteArrayCoder coder = ByteArrayCoder.of();
     // We know that byte array coders are NOT compatible with equals
     // (aka injective w.r.t. Object.equals)
     for (byte[] value1 : TEST_VALUES) {
       for (byte[] value2 : TEST_VALUES) {
-        CoderProperties.structuralValueConsistentWithEquals(coder, value1, value2);
+        CoderProperties.structuralValueConsistentWithEquals(TEST_CODER, value1, value2);
       }
     }
   }
@@ -72,10 +72,9 @@ public void testStructuralValueConsistentWithEquals() throws Exception {
   @Test
   public void testEncodeThenMutate() throws Exception {
     byte[] input = { 0x7, 0x3, 0xA, 0xf };
-    Coder<byte[]> coder = ByteArrayCoder.of();
-    byte[] encoded = CoderUtils.encodeToByteArray(coder, input);
+    byte[] encoded = CoderUtils.encodeToByteArray(TEST_CODER, input);
     input[1] = 0x9;
-    byte[] decoded = CoderUtils.decodeFromByteArray(coder, encoded);
+    byte[] decoded = CoderUtils.decodeFromByteArray(TEST_CODER, encoded);
 
     // now that I have mutated the input, the output should NOT match
     assertThat(input, not(equalTo(decoded)));
@@ -83,10 +82,9 @@ public void testEncodeThenMutate() throws Exception {
 
   @Test
   public void testEncodeAndOwn() throws Exception {
-    ByteArrayCoder coder = ByteArrayCoder.of();
     for (byte[] value : TEST_VALUES) {
-      byte[] encodedSlow = CoderUtils.encodeToByteArray(coder, value);
-      byte[] encodedFast = encodeToByteArrayAndOwn(coder, value);
+      byte[] encodedSlow = CoderUtils.encodeToByteArray(TEST_CODER, value);
+      byte[] encodedFast = encodeToByteArrayAndOwn(TEST_CODER, value);
       assertThat(encodedSlow, equalTo(encodedFast));
     }
   }
@@ -102,4 +100,12 @@ private static byte[] encodeToByteArrayAndOwn(
     coder.encodeAndOwn(value, os, context);
     return os.toByteArray();
   }
+
+  // If this changes, it implies the binary format has changed.
+  private static final String EXPECTED_ENCODING_ID = "";
+
+  @Test
+  public void testEncodingId() throws Exception {
+    CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index 4c5c6eca2def3..e0a9f6643b22d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -37,6 +37,7 @@
 import java.io.Serializable;
 import java.lang.reflect.Type;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 
@@ -264,7 +265,6 @@ public static MyValueCoder of() {
       return INSTANCE;
     }
 
-    @SuppressWarnings("unused") // this method an "override" of a latent static interface
     public static List<Object> getInstanceComponents(MyValue exampleValue) {
       return Arrays.asList();
     }
@@ -314,6 +314,16 @@ public void registerByteSizeObserver(
         throws Exception {
       observer.update(0L);
     }
+
+    @Override
+    public String getEncodingId() {
+      return getClass().getName();
+    }
+
+    @Override
+    public Collection<String> getAllowedEncodings() {
+      return Collections.singletonList(getEncodingId());
+    }
   }
 
   static class UnknownType { }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java
index 438cecc3bc541..684139ab1b1d0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java
@@ -35,6 +35,8 @@
 @RunWith(JUnit4.class)
 public class CollectionCoderTest {
 
+  private static final Coder<Collection<Integer>> TEST_CODER = CollectionCoder.of(VarIntCoder.of());
+
   private static final List<Collection<Integer>> TEST_VALUES = Arrays.<Collection<Integer>>asList(
       Collections.<Integer>emptyList(),
       Collections.<Integer>emptySet(),
@@ -45,9 +47,16 @@ public class CollectionCoderTest {
 
   @Test
   public void testDecodeEncodeContentsEqual() throws Exception {
-    Coder<Collection<Integer>> coder = CollectionCoder.of(VarIntCoder.of());
     for (Collection<Integer> value : TEST_VALUES) {
-      CoderProperties.coderDecodeEncodeContentsEqual(coder, value);
+      CoderProperties.coderDecodeEncodeContentsEqual(TEST_CODER, value);
     }
   }
+
+  // If this becomes nonempty, it implies the binary format has changed.
+  private static final String EXPECTED_ENCODING_ID = "";
+
+  @Test
+  public void testEncodingId() throws Exception {
+    CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java
index 519f26e430d84..cc8750646737a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java
@@ -25,7 +25,11 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.Serializable;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
@@ -33,18 +37,21 @@
 
 /** Unit tests for {@link DelegateCoder}. */
 @RunWith(JUnit4.class)
-public class DelegateCoderTest {
+public class DelegateCoderTest implements Serializable {
+
+  private static final long serialVersionUID = 0L;
 
   private static final List<Set<Integer>> TEST_VALUES = Arrays.<Set<Integer>>asList(
       Collections.<Integer>emptySet(),
       Collections.singleton(13),
       new HashSet<>(Arrays.asList(31, -5, 83)));
 
-  private static final Coder<Set<Integer>> coder = DelegateCoder.of(
+  private static final DelegateCoder<Set<Integer>, List<Integer>> TEST_CODER = DelegateCoder.of(
       ListCoder.of(VarIntCoder.of()),
       new DelegateCoder.CodingFunction<Set<Integer>, List<Integer>>() {
         private static final long serialVersionUID = 0;
 
+        @Override
         public List<Integer> apply(Set<Integer> input) {
           return Lists.newArrayList(input);
         }
@@ -52,6 +59,7 @@ public List<Integer> apply(Set<Integer> input) {
       new DelegateCoder.CodingFunction<List<Integer>, Set<Integer>>() {
         private static final long serialVersionUID = 0;
 
+        @Override
         public Set<Integer> apply(List<Integer> input) {
           return Sets.newHashSet(input);
         }
@@ -61,19 +69,84 @@ public Set<Integer> apply(List<Integer> input) {
   public void testDeterministic() throws Exception {
     for (Set<Integer> value : TEST_VALUES) {
       CoderProperties.coderDeterministic(
-          coder, value, Sets.newHashSet(value));
+          TEST_CODER, value, Sets.newHashSet(value));
     }
   }
 
   @Test
   public void testDecodeEncodeEqual() throws Exception {
     for (Set<Integer> value : TEST_VALUES) {
-      CoderProperties.coderDecodeEncodeEqual(coder, value);
+      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
     }
   }
 
   @Test
   public void testSerializable() throws Exception {
-    CoderProperties.coderSerializable(coder);
+    CoderProperties.coderSerializable(TEST_CODER);
+  }
+
+  private static final String TEST_ENCODING_ID = "test-encoding-id";
+  private static final String TEST_ALLOWED_ENCODING = "test-allowed-encoding";
+
+  private static class TestAllowedEncodingsCoder extends StandardCoder<Integer> {
+
+    private static final long serialVersionUID = 0L;
+
+    @Override
+    public void encode(Integer value, OutputStream outstream, Context context) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public Integer decode(InputStream instream, Context context) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void verifyDeterministic() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return Collections.emptyList();
+    }
+
+    @Override
+    public String getEncodingId() {
+      return TEST_ENCODING_ID;
+    }
+
+    @Override
+    public Collection<String> getAllowedEncodings() {
+      return Collections.singletonList(TEST_ALLOWED_ENCODING);
+    }
+  }
+
+  @Test
+  public void testEncodingId() throws Exception {
+    Coder<Integer> underlyingCoder = new TestAllowedEncodingsCoder();
+
+    Coder<Integer> trivialDelegateCoder = DelegateCoder.of(
+      underlyingCoder,
+      new DelegateCoder.CodingFunction<Integer, Integer>() {
+        private static final long serialVersionUID = 0;
+        @Override
+        public Integer apply(Integer input) {
+          return input;
+        }
+      },
+      new DelegateCoder.CodingFunction<Integer, Integer>() {
+        private static final long serialVersionUID = 0;
+        @Override
+        public Integer apply(Integer input) {
+          return input;
+        }
+      });
+    CoderProperties.coderHasEncodingId(
+        trivialDelegateCoder, TestAllowedEncodingsCoder.class.getName() + ":" + TEST_ENCODING_ID);
+    CoderProperties.coderAllowsEncoding(
+        trivialDelegateCoder,
+        TestAllowedEncodingsCoder.class.getName() + ":" + TEST_ALLOWED_ENCODING);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DoubleCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DoubleCoderTest.java
new file mode 100644
index 0000000000000..71a0f98ce70c4
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DoubleCoderTest.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Test case for {@link DoubleCoder}.
+ */
+@RunWith(JUnit4.class)
+public class DoubleCoderTest {
+
+  private static final Coder<Double> TEST_CODER = DoubleCoder.of();
+
+  private static final List<Double> TEST_VALUES = Arrays.asList(
+      0.0, -0.5, 0.5, 0.3, -0.3, 1.0, -43.89568740, 3.14159,
+      Double.MAX_VALUE,
+      Double.MIN_VALUE,
+      Double.POSITIVE_INFINITY,
+      Double.NEGATIVE_INFINITY,
+      Double.NaN);
+
+  @Test
+  public void testDecodeEncodeEqual() throws Exception {
+    for (Double value : TEST_VALUES) {
+      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
+    }
+  }
+
+  // This should never change. The format is fixed by Java.
+  private static final String EXPECTED_ENCODING_ID = "";
+
+  @Test
+  public void testEncodingId() throws Exception {
+    CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java
index 019e1160a14d1..d33867b019092 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java
@@ -36,6 +36,8 @@
 @RunWith(JUnit4.class)
 public class EntityCoderTest {
 
+  private static final Coder<Entity> TEST_CODER = EntityCoder.of();
+
   // Presumably if anything works, everything works,
   // as actual serialization is fully delegated to
   // autogenerated code from a well-tested library.
@@ -60,9 +62,16 @@ public class EntityCoderTest {
 
   @Test
   public void testDecodeEncodeEqual() throws Exception {
-    Coder<Entity> coder = EntityCoder.of();
     for (Entity value : TEST_VALUES) {
-      CoderProperties.coderDecodeEncodeEqual(coder, value);
+      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
     }
   }
+
+  // If this changes, it implies the binary format has changed.
+  private static final String EXPECTED_ENCODING_ID = "";
+
+  @Test
+  public void testEncodingId() throws Exception {
+    CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java
index f3455b07daf61..1c99168d7825c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java
@@ -36,14 +36,15 @@
 @RunWith(JUnit4.class)
 public class InstantCoderTest {
 
-  private final InstantCoder coder = InstantCoder.of();
+  private static final InstantCoder TEST_CODER = InstantCoder.of();
+
   private final List<Long> timestamps =
       Arrays.asList(0L, 1L, -1L, -255L, 256L, Long.MIN_VALUE, Long.MAX_VALUE);
 
   @Test
   public void testBasicEncoding() throws Exception {
     for (long timestamp : timestamps) {
-      CoderProperties.coderDecodeEncodeEqual(coder, new Instant(timestamp));
+      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, new Instant(timestamp));
     }
   }
 
@@ -54,7 +55,7 @@ public void testOrderedEncoding() throws Exception {
 
     List<byte[]> encodings = new ArrayList<>(sortedTimestamps.size());
     for (long timestamp : sortedTimestamps) {
-      encodings.add(CoderUtils.encodeToByteArray(coder, new Instant(timestamp)));
+      encodings.add(CoderUtils.encodeToByteArray(TEST_CODER, new Instant(timestamp)));
     }
 
     // Verify that the encodings were already sorted, since they were generated
@@ -64,4 +65,12 @@ public void testOrderedEncoding() throws Exception {
 
     Assert.assertEquals(encodings, sortedEncodings);
   }
+
+  // If this changes, it implies that the binary format has changed.
+  private static final String EXPECTED_ENCODING_ID = "";
+
+  @Test
+  public void testEncodingId() throws Exception {
+    CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java
index 575bcfde8f5f2..9400dc7dafb39 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java
@@ -18,9 +18,7 @@
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
-
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
-
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -34,6 +32,8 @@
 @RunWith(JUnit4.class)
 public class IterableCoderTest {
 
+  private static final Coder<Iterable<Integer>> TEST_CODER = IterableCoder.of(VarIntCoder.of());
+
   private static final List<Iterable<Integer>> TEST_VALUES = Arrays.<Iterable<Integer>>asList(
       Collections.<Integer>emptyList(),
       Collections.<Integer>singletonList(13),
@@ -42,10 +42,9 @@ public class IterableCoderTest {
 
   @Test
   public void testDecodeEncodeContentsInSameOrder() throws Exception {
-    Coder<Iterable<Integer>> coder = IterableCoder.of(VarIntCoder.of());
     for (Iterable<Integer> value : TEST_VALUES) {
       CoderProperties.<Integer, Iterable<Integer>>coderDecodeEncodeContentsInSameOrder(
-          coder, value);
+          TEST_CODER, value);
     }
   }
 
@@ -63,4 +62,17 @@ public void testGetInstanceComponentsEmpty() {
     List<Object> components = IterableCoder.getInstanceComponents(iterable);
     assertNull(components);
   }
+
+  @Test
+  public void testCoderSerializable() throws Exception {
+    CoderProperties.coderSerializable(TEST_CODER);
+  }
+
+  // If this changes, it implies that the binary format has changed.
+  private static final String EXPECTED_ENCODING_ID = "";
+
+  @Test
+  public void testEncodingId() throws Exception {
+    CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/JAXBCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/JAXBCoderTest.java
index e13bcb2fae9ba..7e1effff5df1b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/JAXBCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/JAXBCoderTest.java
@@ -16,9 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-
 import org.junit.Assert;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -28,8 +27,8 @@
 
 /** Unit tests for {@link JAXBCoder}. */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class JAXBCoderTest {
+
   @XmlRootElement
   static class TestType {
     private String testString = null;
@@ -88,6 +87,13 @@ public void testEncodeDecode() throws Exception {
 
   @Test
   public void testEncodable() throws Exception {
-    SerializableUtils.ensureSerializable(JAXBCoder.of(TestType.class));
+    CoderProperties.coderSerializable(JAXBCoder.of(TestType.class));
+  }
+
+  @Test
+  public void testEncodingId() throws Exception {
+    Coder<TestType> coder = JAXBCoder.of(TestType.class);
+    CoderProperties.coderHasEncodingId(
+        coder, TestType.class.getName());
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java
index 15fbf968d035f..ee0a5bb8bb4e2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java
@@ -65,4 +65,14 @@ public void testDecodeEncodeEqual() throws Exception {
       }
     }
   }
+
+  // If this changes, it implies the binary format has changed!
+  private static final String EXPECTED_ENCODING_ID = "";
+
+  @Test
+  public void testEncodingId() throws Exception {
+    CoderProperties.coderHasEncodingId(
+        KvCoder.of(VarIntCoder.of(), VarIntCoder.of()),
+        EXPECTED_ENCODING_ID);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java
index 62023fd4e9814..c5803c0d12ec4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java
@@ -18,7 +18,6 @@
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
-
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 
 import org.junit.Test;
@@ -34,6 +33,8 @@
 @RunWith(JUnit4.class)
 public class ListCoderTest {
 
+  private static final Coder<List<Integer>> TEST_CODER = ListCoder.of(VarIntCoder.of());
+
   private static final List<List<Integer>> TEST_VALUES = Arrays.<List<Integer>>asList(
       Collections.<Integer>emptyList(),
       Collections.singletonList(43),
@@ -42,9 +43,9 @@ public class ListCoderTest {
 
   @Test
   public void testDecodeEncodeContentsInSameOrder() throws Exception {
-    Coder<List<Integer>> coder = ListCoder.of(VarIntCoder.of());
     for (List<Integer> value : TEST_VALUES) {
-      CoderProperties.<Integer, List<Integer>>coderDecodeEncodeContentsInSameOrder(coder, value);
+      CoderProperties.<Integer, List<Integer>>coderDecodeEncodeContentsInSameOrder(
+          TEST_CODER, value);
     }
   }
 
@@ -69,4 +70,17 @@ public void testEmptyList() throws Exception {
     Coder<List<Integer>> coder = ListCoder.of(VarIntCoder.of());
     CoderProperties.<List<Integer>>coderDecodeEncodeEqual(coder, list);
   }
+
+  @Test
+  public void testCoderSerializable() throws Exception {
+    CoderProperties.coderSerializable(TEST_CODER);
+  }
+
+  // If this changes, it implies the binary format has changed.
+  private static final String EXPECTED_ENCODING_ID = "";
+
+  @Test
+  public void testEncodingId() throws Exception {
+    CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java
index 396a75b84b6d6..fabfbfec55431 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java
@@ -35,15 +35,18 @@
 /** Unit tests for {@link MapCoder}. */
 @RunWith(JUnit4.class)
 public class MapCoderTest {
+
+  private static final Coder<Map<Integer, String>> TEST_CODER =
+      MapCoder.of(VarIntCoder.of(), StringUtf8Coder.of());
+
   private static final List<Map<Integer, String>> TEST_VALUES = Arrays.<Map<Integer, String>>asList(
       Collections.<Integer, String>emptyMap(),
       new ImmutableMap.Builder<Integer, String>().put(1, "hello").put(-1, "foo").build());
 
   @Test
   public void testDecodeEncodeContentsInSameOrder() throws Exception {
-    Coder<Map<Integer, String>> coder = MapCoder.of(VarIntCoder.of(), StringUtf8Coder.of());
     for (Map<Integer, String> value : TEST_VALUES) {
-      CoderProperties.coderDecodeEncodeEqual(coder, value);
+      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
     }
   }
 
@@ -63,4 +66,12 @@ public void testGetInstanceComponentsEmpty() {
     List<Object> components = MapCoder.getInstanceComponents(map);
     assertNull(components);
   }
+
+  // If this changes, it implies the binary format has changed!
+  private static final String EXPECTED_ENCODING_ID = "";
+
+  @Test
+  public void testEncodingId() throws Exception {
+    CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
index 490b034087a06..e0143c2dcdf0b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
@@ -97,4 +97,14 @@ public void testCoderExtensionsSerialization() throws Exception {
         .withExtensionsFrom(Proto2CoderTestMessages.class);
     CoderProperties.coderSerializable(coder);
   }
+
+  @Test
+  public void testEncodingId() throws Exception {
+    Coder<MessageA> coderA = Proto2Coder.of(MessageA.class);
+    CoderProperties.coderHasEncodingId(coderA, MessageA.class.getName());
+
+    Proto2Coder<MessageC> coder = Proto2Coder.of(MessageC.class)
+        .withExtensionsFrom(Proto2CoderTestMessages.class);
+    CoderProperties.coderHasEncodingId(coder, MessageC.class.getName());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java
index 732fdcb2cc03d..0cb4f5b3c3682 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java
@@ -20,6 +20,7 @@
 import static org.junit.Assert.assertNull;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
@@ -52,6 +53,8 @@ public class SerializableCoderTest implements Serializable {
 
   @DefaultCoder(SerializableCoder.class)
   static class MyRecord implements Serializable {
+    private static final long serialVersionUID = 42L;
+
     public String value;
 
     public MyRecord(String value) {
@@ -209,4 +212,12 @@ public void testMixedWithNullsEncoding() throws Exception {
       assertEquals(0, is.available());
     }
   }
+
+  @Test
+  public void testPojoEncodingId() throws Exception {
+    Coder<MyRecord> coder = SerializableCoder.of(MyRecord.class);
+    CoderProperties.coderHasEncodingId(
+        coder,
+        String.format("%s:%s", MyRecord.class.getName(), MyRecord.serialVersionUID));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java
index 60f446ec70f52..ca50f389627b2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java
@@ -34,6 +34,8 @@
 @RunWith(JUnit4.class)
 public class SetCoderTest {
 
+  private static final Coder<Set<Integer>> TEST_CODER = SetCoder.of(VarIntCoder.of());
+
   private static final List<Set<Integer>> TEST_VALUES = Arrays.<Set<Integer>>asList(
       Collections.<Integer>emptySet(),
       Collections.singleton(13),
@@ -41,9 +43,16 @@ public class SetCoderTest {
 
   @Test
   public void testDecodeEncodeContentsEqual() throws Exception {
-    Coder<Set<Integer>> coder = SetCoder.of(VarIntCoder.of());
     for (Set<Integer> value : TEST_VALUES) {
-      CoderProperties.coderDecodeEncodeContentsEqual(coder, value);
+      CoderProperties.coderDecodeEncodeContentsEqual(TEST_CODER, value);
     }
   }
+
+  // If this changes, it implies the binary format has changed.
+  private static final String EXPECTED_ENCODING_ID = "";
+
+  @Test
+  public void testEncodingId() throws Exception {
+    CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java
index 5317324ccb786..46b3997b20ca9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoderTest.java
@@ -44,7 +44,6 @@ public class StringDelegateCoderTest {
 
   // Tests
 
-  @SuppressWarnings("deprecation")
   @Test
   public void testDeterministic() throws Exception, NonDeterministicException {
     uriCoder.verifyDeterministic();
@@ -64,4 +63,10 @@ public void testDecodeEncodeEqual() throws Exception {
   public void testSerializable() throws Exception {
     CoderProperties.coderSerializable(uriCoder);
   }
+
+  @Test
+  public void testEncodingId() throws Exception {
+    StringDelegateCoder<URI> coder = StringDelegateCoder.of(URI.class);
+    CoderProperties.coderHasEncodingId(coder, URI.class.getName());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java
index d15ccce3118fe..17e1ee5670388 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java
@@ -46,6 +46,8 @@ public TableRow build() {
     }
   }
 
+  private static final Coder<TableRow> TEST_CODER = TableRowJsonCoder.of();
+
   private static final List<TableRow> TEST_VALUES = Arrays.asList(
       new TableRowBuilder().build(),
       new TableRowBuilder().set("a", "1").build(),
@@ -54,9 +56,16 @@ public TableRow build() {
 
   @Test
   public void testDecodeEncodeEqual() throws Exception {
-    Coder<TableRow> coder = TableRowJsonCoder.of();
     for (TableRow value : TEST_VALUES) {
-      CoderProperties.coderDecodeEncodeEqual(coder, value);
+      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
     }
   }
+
+  // This identifier should only change if the JSON format of results from the BigQuery API changes.
+  private static final String EXPECTED_ENCODING_ID = "";
+
+  @Test
+  public void testEncodingId() throws Exception {
+    CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java
index 1454fddeb9df1..3fedb8d0f66b5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java
@@ -31,6 +31,8 @@
 @RunWith(JUnit4.class)
 public class TextualIntegerCoderTest {
 
+  private static final Coder<Integer> TEST_CODER = TextualIntegerCoder.of();
+
   private static final List<Integer> TEST_VALUES = Arrays.asList(
       -11, -3, -1, 0, 1, 5, 13, 29,
       Integer.MAX_VALUE,
@@ -38,9 +40,16 @@ public class TextualIntegerCoderTest {
 
   @Test
   public void testDecodeEncodeEqual() throws Exception {
-    Coder<Integer> coder = TextualIntegerCoder.of();
     for (Integer value : TEST_VALUES) {
-      CoderProperties.coderDecodeEncodeEqual(coder, value);
+      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
     }
   }
+
+  // This should never change. The textual representation of an integer is fixed.
+  private static final String EXPECTED_ENCODING_ID = "";
+
+  @Test
+  public void testEncodingId() throws Exception {
+    CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java
index 265c34431c819..4eb721658bcce 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java
@@ -31,6 +31,8 @@
 @RunWith(JUnit4.class)
 public class VarIntCoderTest {
 
+  private static final Coder<Integer> TEST_CODER = VarIntCoder.of();
+
   private static final List<Integer> TEST_VALUES = Arrays.asList(
       -11, -3, -1, 0, 1, 5, 13, 29,
       Integer.MAX_VALUE,
@@ -38,10 +40,17 @@ public class VarIntCoderTest {
 
   @Test
   public void testDecodeEncodeEqual() throws Exception {
-    Coder<Integer> coder = VarIntCoder.of();
     for (Integer value : TEST_VALUES) {
-      CoderProperties.coderDecodeEncodeEqual(coder, value);
+      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
     }
   }
+
+  // If this changes, it implies the binary format has changed.
+  private static final String EXPECTED_ENCODING_ID = "";
+
+  @Test
+  public void testEncodingId() throws Exception {
+    CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
+  }
 }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java
index 5ad3ec31a0a31..e840849533fdd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java
@@ -31,6 +31,8 @@
 @RunWith(JUnit4.class)
 public class VarLongCoderTest {
 
+  private static final Coder<Long> TEST_CODER = VarLongCoder.of();
+
   private static final List<Long> TEST_VALUES = Arrays.asList(
       -11L, -3L, -1L, 0L, 1L, 5L, 13L, 29L,
       Integer.MAX_VALUE + 131L,
@@ -40,9 +42,16 @@ public class VarLongCoderTest {
 
   @Test
   public void testDecodeEncodeEqual() throws Exception {
-    Coder<Long> coder = VarLongCoder.of();
     for (Long value : TEST_VALUES) {
-      CoderProperties.coderDecodeEncodeEqual(coder, value);
+      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
     }
   }
+
+  // If this changes, it implies the binary format has changed.
+  private static final String EXPECTED_ENCODING_ID = "";
+
+  @Test
+  public void testEncodingId() throws Exception {
+    CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
+  }
 }

From ac2138258705b704dbfb0f93b3646f90eee6e855 Mon Sep 17 00:00:00 2001
From: mshmulyan <mshmulyan@google.com>
Date: Mon, 13 Jul 2015 14:19:11 -0700
Subject: [PATCH 0740/1541] Windmill.proto changes. Adding cumulative flag to
 Counter.

Also making computation-definition fields in ReportStatsRequest optional as it is now used to report global counters as well.
----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98152613
---
 sdk/src/main/proto/windmill.proto | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index 30739b568cbd6..d8d968560f25c 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -197,6 +197,12 @@ message Counter {
 
   // Only set for MEAN. Count of elements contributing to the sum.
   optional int64 mean_count = 6;
+
+  // True if this metric is reported as the total cumulative aggregate
+  // value accumulated since the worker started working on this WorkItem.
+  // By default this is false, indicating that this metric is reported
+  // as a delta that is not associated with any WorkItem.
+   optional bool cumulative = 7;
 }
 
 message GlobalDataRequest {
@@ -260,9 +266,9 @@ message Exception {
 }
 
 message ReportStatsRequest {
-  required string computation_id = 1;
-  required bytes key = 2;
-  required fixed64 work_token = 3;
+  optional string computation_id = 1;
+  optional bytes key = 2;
+  optional fixed64 work_token = 3;
   repeated Exception exceptions = 4;
   repeated Counter counter_updates = 5;
 }

From 1f9a1b18a6d87ea7096b2b466056ca13ea9c6ef8 Mon Sep 17 00:00:00 2001
From: laraschmidt <laraschmidt@google.com>
Date: Mon, 13 Jul 2015 14:41:26 -0700
Subject: [PATCH 0741/1541] Upgrade the SDK to use google library versions
 1.20.0

----Release Notes----
Upgrade the SDK to use google library versions 1.20.0.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98155054
---
 sdk/pom.xml | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 0ea25a527791d..da454103ebdce 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -329,7 +329,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-dataflow</artifactId>
-      <version>v1b3-rev4-1.19.1</version>
+      <version>v1b3-rev4-1.20.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
@@ -361,10 +361,10 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-bigquery</artifactId>
-      <version>v2-rev187-1.19.1</version>
+      <version>v2-rev198-1.20.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.19.0 -->
+             in by a transitive dependency google-api-client 1.20.0 -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -375,10 +375,10 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-compute</artifactId>
-      <version>v1-rev46-1.19.1</version>
+      <version>v1-rev53-1.20.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.19.0 -->
+             in by a transitive dependency google-api-client 1.20.0 -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -389,10 +389,10 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-pubsub</artifactId>
-      <version>v1beta2-rev1-1.19.1</version>
+      <version>v1beta2-rev1-1.20.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.19.0 -->
+             in by a transitive dependency google-api-client 1.20.0 -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -406,7 +406,7 @@
       <version>v1-rev25-1.19.1</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.19.0 -->
+             in by a transitive dependency google-api-client 1.20.0 -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -417,10 +417,10 @@
     <dependency>
       <groupId>com.google.http-client</groupId>
       <artifactId>google-http-client-jackson2</artifactId>
-      <version>1.19.0</version>
+      <version>1.20.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.19.0 -->
+             in by a transitive dependency google-api-client 1.20.0 -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -431,16 +431,16 @@
     <dependency>
       <groupId>com.google.http-client</groupId>
       <artifactId>google-http-client</artifactId>
-      <version>1.19.0</version>
+      <version>1.20.0</version>
     </dependency>
 
     <dependency>
       <groupId>com.google.oauth-client</groupId>
       <artifactId>google-oauth-client-java6</artifactId>
-      <version>1.19.0</version>
+      <version>1.20.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.19.0 -->
+             in by a transitive dependency google-api-client 1.20.0 -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>

From adcb1d3ca7c7d4d432b6f77f21052356b402facb Mon Sep 17 00:00:00 2001
From: chernyak <chernyak@google.com>
Date: Mon, 13 Jul 2015 14:54:46 -0700
Subject: [PATCH 0742/1541] Proto only changes.

Changes to support system name -> computaion id mapping in streaming java harness.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98156545
---
 sdk/src/main/proto/windmill.proto | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index d8d968560f25c..7d9609c6f148a 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -256,6 +256,13 @@ message GetConfigResponse {
 
   // Map of user names to system names
   repeated NameMapEntry name_map = 2;
+
+  message SystemNameToComputationIdMapEntry {
+    optional string system_name = 1;
+    optional string computation_id = 2;
+  }
+  repeated SystemNameToComputationIdMapEntry
+    system_name_to_computation_id_map = 3;
 }
 
 // Reporting

From 2156f53d45332f4cd0f18b5c84c8edb925620d5b Mon Sep 17 00:00:00 2001
From: laraschmidt <laraschmidt@google.com>
Date: Mon, 13 Jul 2015 15:02:33 -0700
Subject: [PATCH 0743/1541] Fixing a off-by-one error

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98157281
---
 ...IntervalBoundedExponentialBackOffTest.java | 31 +++++++++++++------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOffTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOffTest.java
index 32c4cf5065f6c..8ad7aa6592feb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOffTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOffTest.java
@@ -20,6 +20,7 @@
 import static org.hamcrest.Matchers.allOf;
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.hamcrest.Matchers.lessThan;
+import static org.hamcrest.Matchers.lessThanOrEqualTo;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
@@ -54,11 +55,16 @@ public void testThatcertainNumberOfAttemptsReachesMaxInterval() throws Exception
     IntervalBoundedExponentialBackOff backOff = new IntervalBoundedExponentialBackOff(1000, 500);
     assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(249L), lessThan(751L)));
     assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(374L), lessThan(1126L)));
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L), lessThan(1500L)));
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L), lessThan(1500L)));
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L), lessThan(1500L)));
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L), lessThan(1500L)));
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L), lessThan(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L),
+        lessThanOrEqualTo(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L),
+        lessThanOrEqualTo(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L),
+        lessThanOrEqualTo(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L),
+        lessThanOrEqualTo(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L),
+        lessThanOrEqualTo(1500L)));
   }
 
   @Test
@@ -66,13 +72,17 @@ public void testThatResettingAllowsReuse() throws Exception {
     IntervalBoundedExponentialBackOff backOff = new IntervalBoundedExponentialBackOff(1000, 500);
     assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(249L), lessThan(751L)));
     assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(374L), lessThan(1126L)));
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L), lessThan(1500L)));
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L), lessThan(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L),
+        lessThanOrEqualTo(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L),
+        lessThanOrEqualTo(1500L)));
     backOff.reset();
     assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(249L), lessThan(751L)));
     assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(374L), lessThan(1126L)));
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L), lessThan(1500L)));
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L), lessThan(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L),
+        lessThanOrEqualTo(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L),
+        lessThanOrEqualTo(1500L)));
   }
 
   @Test
@@ -83,6 +93,7 @@ public void testAtMaxInterval() throws Exception {
     assertFalse(backOff.atMaxInterval());
     backOff.nextBackOffMillis();
     assertTrue(backOff.atMaxInterval());
-    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L), lessThan(1500L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThanOrEqualTo(500L),
+        lessThanOrEqualTo(1500L)));
   }
 }

From 7a184726a621aa43e7f21a97a6265de9fcb95d92 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 13 Jul 2015 15:21:45 -0700
Subject: [PATCH 0744/1541] Adds explicit watermark holds to the Windmill API.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98159353
---
 sdk/src/main/proto/windmill.proto | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index 7d9609c6f148a..951bc172c038d 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -106,6 +106,12 @@ message SourceState {
   repeated fixed64 finalize_ids = 2;
 }
 
+message WatermarkHold {
+  required bytes tag = 1;
+  repeated int64 timestamps = 2 [packed=true];
+  optional bool reset = 3;
+}
+
 message WorkItem {
   required bytes key = 1;
   required fixed64 work_token = 2;
@@ -210,7 +216,7 @@ message GlobalDataRequest {
   optional int64 existence_watermark_deadline = 2 [default=0x7FFFFFFFFFFFFFFF];
 }
 
-// next id: 14
+// next id: 15
 message WorkItemCommitRequest {
   required bytes key = 1;
   required fixed64 work_token = 2;
@@ -224,6 +230,7 @@ message WorkItemCommitRequest {
   repeated GlobalData global_data_updates = 10;
   optional SourceState source_state_updates = 12;
   optional int64 source_watermark = 13 [default=-0x8000000000000000];
+  repeated WatermarkHold watermark_holds = 14;
 
   // DEPRECATED
   repeated GlobalDataId global_data_id_requests = 9;

From 19319026de1cb3ec82597e30eb9087c88405593e Mon Sep 17 00:00:00 2001
From: zhuoyao <zhuoyao@google.com>
Date: Mon, 13 Jul 2015 15:53:28 -0700
Subject: [PATCH 0745/1541] SDK return error if user tries to update the same
 job multiple times.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98162474
---
 .../sdk/runners/DataflowPipelineRunner.java        | 14 +++++++++++---
 .../sdk/runners/DataflowPipelineRunnerTest.java    |  5 +++--
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 908175ea28d3d..b8ce34e0339b2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -329,9 +329,17 @@ public DataflowPipelineJob run(Pipeline pipeline) {
     // Error::Already_Exists.
     if (jobResult.getClientRequestId() != null && !jobResult.getClientRequestId().isEmpty()
         && !jobResult.getClientRequestId().equals(requestId)) {
-      throw new RuntimeException("The job you are trying to create with name " + newJob.getName()
-          + " already exists and is active in system with job id: " + jobResult.getId()
-          + ". If you want to submit a new job in parallel, try again with a different name.");
+      // If reloading a job.
+      if (options.getReload()) {
+        throw new RuntimeException("The job named " + newJob.getName() + " with id: " + reloadJobId
+            + " has already been updated into job id: " + jobResult.getId()
+            + " and cannot be updated again. ");
+      } else {
+        throw new RuntimeException("There is already an active job named " + newJob.getName()
+            + " with id: " + jobResult.getId()
+            + ". If you want to submit a second job, try again by setting a "
+            + "different name using --jobName.");
+      }
     }
 
     LOG.info("To access the Dataflow monitoring console, please navigate to {}",
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index d94ee4df8f90a..c2a3533c07aaa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -171,8 +171,9 @@ public void testRun() throws IOException {
   @Test
   public void testRunReturnDifferentRequestId() throws IOException {
     thrown.expect(RuntimeException.class);
-    thrown.expectMessage(Matchers.containsString(
-        "If you want to submit a new job in parallel, try again with a different name."));
+    thrown.expectMessage(
+        Matchers.containsString("If you want to submit a second job, try again by setting a "
+            + "different name using --jobName."));
 
     ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
     DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);

From 66680dbc951493087484721b9ff7c91a7e5b0f64 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 13 Jul 2015 16:51:54 -0700
Subject: [PATCH 0746/1541] Update Windmill API.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98167905
---
 sdk/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index da454103ebdce..18f9b921f907b 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -343,7 +343,7 @@
     <dependency>
       <groupId>com.google.cloud.dataflow</groupId>
       <artifactId>google-cloud-dataflow-java-proto-library-all</artifactId>
-      <version>0.4.150709</version>
+      <version>0.4.150713</version>
     </dependency>
 
     <dependency>

From 6d0510c0c6db5e4375fca69a2bd2391b51c80f8a Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 13 Jul 2015 17:12:43 -0700
Subject: [PATCH 0747/1541] Add Comparator based versions of Min and Max

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98169972
---
 .../cloud/dataflow/sdk/transforms/Max.java    | 197 +++++++++++-------
 .../cloud/dataflow/sdk/transforms/Min.java    | 184 ++++++++++------
 .../cloud/dataflow/sdk/transforms/Top.java    |   4 +-
 .../sdk/transforms/SimpleStatsFnsTest.java    |  19 ++
 4 files changed, 259 insertions(+), 145 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
index b8b87c9ba9e13..da98a119387da 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
@@ -16,14 +16,17 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import com.google.cloud.dataflow.sdk.transforms.Combine.BinaryCombineFn;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind;
 import com.google.cloud.dataflow.sdk.util.common.CounterProvider;
 
+import java.io.Serializable;
+import java.util.Comparator;
+
 /**
- * {@code PTransform}s for computing the maximum of the elements in a
- * {@code PCollection}, or the maximum of the values associated with
- * each key in a {@code PCollection} of {@code KV}s.
+ * {@code PTransform}s for computing the maximum of the elements in a {@code PCollection}, or the
+ * maximum of the values associated with each key in a {@code PCollection} of {@code KV}s.
  *
  * <p> Example 1: get the maximum of a {@code PCollection} of {@code Double}s.
  * <pre> {@code
@@ -42,23 +45,19 @@
 public class Max {
 
   /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<Integer>} and returns a
-   * {@code PCollection<Integer>} whose contents is the maximum of the
-   * input {@code PCollection}'s elements, or
-   * {@code Integer.MIN_VALUE} if there are no elements.
+   * Returns a {@code PTransform} that takes an input {@code PCollection<Integer>} and returns a
+   * {@code PCollection<Integer>} whose contents is the maximum of the input {@code PCollection}'s
+   * elements, or {@code Integer.MIN_VALUE} if there are no elements.
    */
   public static Combine.Globally<Integer, Integer> integersGlobally() {
     return Combine.globally(new MaxIntegerFn()).named("Max.Globally");
   }
 
   /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<KV<K, Integer>>} and returns a
-   * {@code PCollection<KV<K, Integer>>} that contains an output
-   * element mapping each distinct key in the input
-   * {@code PCollection} to the maximum of the values associated with
-   * that key in the input {@code PCollection}.
+   * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Integer>>} and
+   * returns a {@code PCollection<KV<K, Integer>>} that contains an output element mapping each
+   * distinct key in the input {@code PCollection} to the maximum of the values associated with that
+   * key in the input {@code PCollection}.
    *
    * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
@@ -67,23 +66,19 @@ public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() {
   }
 
   /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<Long>} and returns a
-   * {@code PCollection<Long>} whose contents is the maximum of the
-   * input {@code PCollection}'s elements, or
-   * {@code Long.MIN_VALUE} if there are no elements.
+   * Returns a {@code PTransform} that takes an input {@code PCollection<Long>} and returns a {@code
+   * PCollection<Long>} whose contents is the maximum of the input {@code PCollection}'s elements,
+   * or {@code Long.MIN_VALUE} if there are no elements.
    */
   public static Combine.Globally<Long, Long> longsGlobally() {
     return Combine.globally(new MaxLongFn()).named("Max.Globally");
   }
 
   /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<KV<K, Long>>} and returns a
-   * {@code PCollection<KV<K, Long>>} that contains an output
-   * element mapping each distinct key in the input
-   * {@code PCollection} to the maximum of the values associated with
-   * that key in the input {@code PCollection}.
+   * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Long>>} and returns a
+   * {@code PCollection<KV<K, Long>>} that contains an output element mapping each distinct key in
+   * the input {@code PCollection} to the maximum of the values associated with that key in the
+   * input {@code PCollection}.
    *
    * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
@@ -92,23 +87,19 @@ public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
   }
 
   /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<Double>} and returns a
-   * {@code PCollection<Double>} whose contents is the maximum of the
-   * input {@code PCollection}'s elements, or
-   * {@code Double.NEGATIVE_INFINITY} if there are no elements.
+   * Returns a {@code PTransform} that takes an input {@code PCollection<Double>} and returns a
+   * {@code PCollection<Double>} whose contents is the maximum of the input {@code PCollection}'s
+   * elements, or {@code Double.NEGATIVE_INFINITY} if there are no elements.
    */
   public static Combine.Globally<Double, Double> doublesGlobally() {
     return Combine.globally(new MaxDoubleFn()).named("Max.Globally");
   }
 
   /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<KV<K, Double>>} and returns a
-   * {@code PCollection<KV<K, Double>>} that contains an output
-   * element mapping each distinct key in the input
-   * {@code PCollection} to the maximum of the values associated with
-   * that key in the input {@code PCollection}.
+   * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Double>>} and returns
+   * a {@code PCollection<KV<K, Double>>} that contains an output element mapping each distinct key
+   * in the input {@code PCollection} to the maximum of the values associated with that key in the
+   * input {@code PCollection}.
    *
    * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
@@ -116,54 +107,112 @@ public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
     return Combine.<K, Double, Double>perKey(new MaxDoubleFn()).named("Max.PerKey");
   }
 
+  /**
+   * Returns a {@code PTransform} that takes an input {@code PCollection<T>} and returns a {@code
+   * PCollection<T>} whose contents is the maximum according to the natural ordering of {@code T}
+   * of the input {@code PCollection}'s elements, or {@code null} if there are no elements.
+   */
+  public static <T extends Comparable<? super T>>
+  Combine.Globally<T, T> globally() {
+    return Combine.<T, T>globally(MaxFn.<T>naturalOrder()).named("Max.Globally");
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, T>>} and returns a
+   * {@code PCollection<KV<K, T>>} that contains an output element mapping each distinct key in the
+   * input {@code PCollection} to the maximum according to the natural ordering of {@code T} of the
+   * values associated with that key in the input {@code PCollection}.
+   *
+   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   */
+  public static <K, T extends Comparable<? super T>>
+  Combine.PerKey<K, T, T> perKey() {
+    return Combine.<K, T, T>perKey(MaxFn.<T>naturalOrder()).named("Max.PerKey");
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input {@code PCollection<T>} and returns a {@code
+   * PCollection<T>} whose contents is the maximum of the input {@code PCollection}'s elements, or
+   * {@code null} if there are no elements.
+   */
+  public static <T, ComparatorT extends Comparator<? super T> & Serializable>
+  Combine.Globally<T, T> globally(ComparatorT comparator) {
+    return Combine.<T, T>globally(MaxFn.of(comparator)).named("Max.Globally");
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, T>>} and returns a
+   * {@code PCollection<KV<K, T>>} that contains one output element per key mapping each
+   * to the maximum of the values associated with that key in the input {@code PCollection}.
+   *
+   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   */
+  public static <K, T, ComparatorT extends Comparator<? super T> & Serializable>
+  Combine.PerKey<K, T, T> perKey(ComparatorT comparator) {
+    return Combine.<K, T, T>perKey(MaxFn.of(comparator)).named("Max.PerKey");
+  }
 
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * A {@code CombineFn} that computes the maximum of a set of elements
-   * of type {@code N}, useful as an argument to {@link Combine#globally}
-   * or {@link Combine#perKey}.
+   * A {@code CombineFn} that computes the maximum of a collection of elements of type {@code T}
+   * using an arbitrary {@link Comparator}, useful as an argument to {@link Combine#globally} or
+   * {@link Combine#perKey}.
    *
-   * @param <NumT> the type of the {@code Number}s being compared
+   * @param <T> the type of the values being compared
    */
-  @SuppressWarnings("serial")
-  public static class MaxFn<NumT extends Comparable<NumT>>
-      extends Combine.BinaryCombineFn<NumT> {
-
-    /** The smallest value of type NumT. */
-    private final NumT initialValue;
-
-    /**
-     * Constructs a combining function that computes the maximum over
-     * a collection of values of type {@code N}, given the smallest
-     * value of type {@code N}, which is the identity value for the
-     * maximum operation over {@code N}s.
-     */
-    public MaxFn(NumT initialValue) {
-      this.initialValue = initialValue;
+  public static class MaxFn<T> extends BinaryCombineFn<T> {
+
+    private static final long serialVersionUID = 0;
+
+    private final T identity;
+    private final Comparator<? super T> comparator;
+
+    private <ComparatorT extends Comparator<? super T> & Serializable> MaxFn(
+        T identity, ComparatorT comparator) {
+      this.identity = identity;
+      this.comparator = comparator;
+    }
+
+    public static <T, ComparatorT extends Comparator<? super T> & Serializable>
+    MaxFn<T> of(T identity, ComparatorT comparator) {
+      return new MaxFn<T>(identity, comparator);
+    }
+
+    public static <T, ComparatorT extends Comparator<? super T> & Serializable>
+    MaxFn<T> of(ComparatorT comparator) {
+      return new MaxFn<T>(null, comparator);
+    }
+
+    public static <T extends Comparable<? super T>> MaxFn<T> naturalOrder(T identity) {
+      return new MaxFn<T>(identity, new Top.Largest<T>());
+    }
+
+    public static <T extends Comparable<? super T>> MaxFn<T> naturalOrder() {
+      return new MaxFn<T>(null, new Top.Largest<T>());
     }
 
     @Override
-    public NumT apply(NumT a, NumT b) {
-      return a.compareTo(b) >= 0 ? a : b;
+    public T identity() {
+      return identity;
     }
 
     @Override
-    public NumT identity() {
-      return initialValue;
+    public T apply(T left, T right) {
+      return comparator.compare(left, right) >= 0 ? left : right;
     }
   }
 
   /**
-   * A {@code CombineFn} that computes the maximum of a collection
-   * of {@code Integer}s, useful as an argument to
-   * {@link Combine#globally} or {@link Combine#perKey}.
+   * A {@code CombineFn} that computes the maximum of a collection of {@code Integer}s, useful as an
+   * argument to {@link Combine#globally} or {@link Combine#perKey}.
    */
-  @SuppressWarnings("serial")
   public static class MaxIntegerFn extends MaxFn<Integer> implements
       CounterProvider<Integer> {
+    private static final long serialVersionUID = 0L;
+
     public MaxIntegerFn() {
-      super(Integer.MIN_VALUE);
+      super(Integer.MIN_VALUE, new Top.Largest<Integer>());
     }
 
     @Override
@@ -173,15 +222,15 @@ public Counter<Integer> getCounter(String name) {
   }
 
   /**
-   * A {@code CombineFn} that computes the maximum of a collection
-   * of {@code Long}s, useful as an argument to
-   * {@link Combine#globally} or {@link Combine#perKey}.
+   * A {@code CombineFn} that computes the maximum of a collection of {@code Long}s, useful as an
+   * argument to {@link Combine#globally} or {@link Combine#perKey}.
    */
-  @SuppressWarnings("serial")
   public static class MaxLongFn extends MaxFn<Long> implements
       CounterProvider<Long> {
+    private static final long serialVersionUID = 0L;
+
     public MaxLongFn() {
-      super(Long.MIN_VALUE);
+      super(Long.MIN_VALUE, new Top.Largest<Long>());
     }
 
     @Override
@@ -191,15 +240,15 @@ public Counter<Long> getCounter(String name) {
   }
 
   /**
-   * A {@code CombineFn} that computes the maximum of a collection
-   * of {@code Double}s, useful as an argument to
-   * {@link Combine#globally} or {@link Combine#perKey}.
+   * A {@code CombineFn} that computes the maximum of a collection of {@code Double}s, useful as an
+   * argument to {@link Combine#globally} or {@link Combine#perKey}.
    */
-  @SuppressWarnings("serial")
   public static class MaxDoubleFn extends MaxFn<Double> implements
       CounterProvider<Double> {
+    private static final long serialVersionUID = 0L;
+
     public MaxDoubleFn() {
-      super(Double.NEGATIVE_INFINITY);
+      super(Double.NEGATIVE_INFINITY, new Top.Largest<Double>());
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
index 7ee16e29bf283..7422790c970a5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
@@ -16,14 +16,17 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import com.google.cloud.dataflow.sdk.transforms.Combine.BinaryCombineFn;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind;
 import com.google.cloud.dataflow.sdk.util.common.CounterProvider;
 
+import java.io.Serializable;
+import java.util.Comparator;
+
 /**
- * {@code PTransform}s for computing the minimum of the elements in a
- * {@code PCollection}, or the minimum of the values associated with
- * each key in a {@code PCollection} of {@code KV}s.
+ * {@code PTransform}s for computing the minimum of the elements in a {@code PCollection}, or the
+ * minimum of the values associated with each key in a {@code PCollection} of {@code KV}s.
  *
  * <p> Example 1: get the minimum of a {@code PCollection} of {@code Double}s.
  * <pre> {@code
@@ -42,23 +45,19 @@
 public class Min {
 
   /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<Integer>} and returns a
-   * {@code PCollection<Integer>} whose contents is a single value that is
-   * the minimum of the input {@code PCollection}'s elements, or
-   * {@code Integer.MAX_VALUE} if there are no elements.
+   * Returns a {@code PTransform} that takes an input {@code PCollection<Integer>} and returns a
+   * {@code PCollection<Integer>} whose contents is a single value that is the minimum of the input
+   * {@code PCollection}'s elements, or {@code Integer.MAX_VALUE} if there are no elements.
    */
   public static Combine.Globally<Integer, Integer> integersGlobally() {
     return Combine.globally(new MinIntegerFn()).named("Min.Globally");
   }
 
   /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<KV<K, Integer>>} and returns a
-   * {@code PCollection<KV<K, Integer>>} that contains an output
-   * element mapping each distinct key in the input
-   * {@code PCollection} to the minimum of the values associated with
-   * that key in the input {@code PCollection}.
+   * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Integer>>} and
+   * returns a {@code PCollection<KV<K, Integer>>} that contains an output element mapping each
+   * distinct key in the input {@code PCollection} to the minimum of the values associated with that
+   * key in the input {@code PCollection}.
    *
    * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
@@ -67,23 +66,19 @@ public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() {
   }
 
   /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<Long>} and returns a
-   * {@code PCollection<Long>} whose contents is the minimum of the
-   * input {@code PCollection}'s elements, or
-   * {@code Long.MAX_VALUE} if there are no elements.
+   * Returns a {@code PTransform} that takes an input {@code PCollection<Long>} and returns a {@code
+   * PCollection<Long>} whose contents is the minimum of the input {@code PCollection}'s elements,
+   * or {@code Long.MAX_VALUE} if there are no elements.
    */
   public static Combine.Globally<Long, Long> longsGlobally() {
     return Combine.globally(new MinLongFn()).named("Min.Globally");
   }
 
   /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<KV<K, Long>>} and returns a
-   * {@code PCollection<KV<K, Long>>} that contains an output
-   * element mapping each distinct key in the input
-   * {@code PCollection} to the minimum of the values associated with
-   * that key in the input {@code PCollection}.
+   * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Long>>} and returns a
+   * {@code PCollection<KV<K, Long>>} that contains an output element mapping each distinct key in
+   * the input {@code PCollection} to the minimum of the values associated with that key in the
+   * input {@code PCollection}.
    *
    * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
@@ -92,23 +87,19 @@ public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
   }
 
   /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<Double>} and returns a
-   * {@code PCollection<Double>} whose contents is the minimum of the
-   * input {@code PCollection}'s elements, or
-   * {@code Double.POSITIVE_INFINITY} if there are no elements.
+   * Returns a {@code PTransform} that takes an input {@code PCollection<Double>} and returns a
+   * {@code PCollection<Double>} whose contents is the minimum of the input {@code PCollection}'s
+   * elements, or {@code Double.POSITIVE_INFINITY} if there are no elements.
    */
   public static Combine.Globally<Double, Double> doublesGlobally() {
     return Combine.globally(new MinDoubleFn()).named("Min.Globally");
   }
 
   /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<KV<K, Double>>} and returns a
-   * {@code PCollection<KV<K, Double>>} that contains an output
-   * element mapping each distinct key in the input
-   * {@code PCollection} to the minimum of the values associated with
-   * that key in the input {@code PCollection}.
+   * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Double>>} and returns
+   * a {@code PCollection<KV<K, Double>>} that contains an output element mapping each distinct key
+   * in the input {@code PCollection} to the minimum of the values associated with that key in the
+   * input {@code PCollection}.
    *
    * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
@@ -116,55 +107,112 @@ public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
     return Combine.<K, Double, Double>perKey(new MinDoubleFn()).named("Min.PerKey");
   }
 
+  /**
+   * Returns a {@code PTransform} that takes an input {@code PCollection<T>} and returns a {@code
+   * PCollection<T>} whose contents is the minimum according to the natural ordering of {@code T}
+   * of the input {@code PCollection}'s elements, or {@code null} if there are no elements.
+   */
+  public static <T extends Comparable<? super T>>
+  Combine.Globally<T, T> globally() {
+    return Combine.<T, T>globally(MinFn.<T>naturalOrder()).named("Min.Globally");
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, T>>} and returns a
+   * {@code PCollection<KV<K, T>>} that contains an output element mapping each distinct key in the
+   * input {@code PCollection} to the minimum according to the natural ordering of {@code T} of the
+   * values associated with that key in the input {@code PCollection}.
+   *
+   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   */
+  public static <K, T extends Comparable<? super T>>
+  Combine.PerKey<K, T, T> perKey() {
+    return Combine.<K, T, T>perKey(MinFn.<T>naturalOrder()).named("Min.PerKey");
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input {@code PCollection<T>} and returns a {@code
+   * PCollection<T>} whose contents is the minimum of the input {@code PCollection}'s elements, or
+   * {@code null} if there are no elements.
+   */
+  public static <T, ComparatorT extends Comparator<? super T> & Serializable>
+  Combine.Globally<T, T> globally(ComparatorT comparator) {
+    return Combine.<T, T>globally(MinFn.of(comparator)).named("Min.Globally");
+  }
+
+  /**
+   * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, T>>} and returns a
+   * {@code PCollection<KV<K, T>>} that contains one output element per key mapping each
+   * to the minimum of the values associated with that key in the input {@code PCollection}.
+   *
+   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   */
+  public static <K, T, ComparatorT extends Comparator<? super T> & Serializable>
+  Combine.PerKey<K, T, T> perKey(ComparatorT comparator) {
+    return Combine.<K, T, T>perKey(MinFn.of(comparator)).named("Min.PerKey");
+  }
 
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * A {@code CombineFn} that computes the minimum of a collection
-   * of elements of type {@code N}, useful as an
-   * argument to {@link Combine#globally} or {@link Combine#perKey}.
+   * A {@code CombineFn} that computes the maximum of a collection of elements of type {@code T}
+   * using an arbitrary {@link Comparator}, useful as an argument to {@link Combine#globally} or
+   * {@link Combine#perKey}.
    *
-   * @param <NumT> the type of the {@code Number}s being compared
+   * @param <T> the type of the values being compared
    */
-  public static class MinFn<NumT extends Comparable<NumT>>
-      extends Combine.BinaryCombineFn<NumT> {
+  public static class MinFn<T> extends BinaryCombineFn<T> {
+
     private static final long serialVersionUID = 0;
 
-    /** The largest value of type NumT. */
-    private final NumT initialValue;
-
-    /**
-     * Constructs a combining function that computes the minimum over
-     * a collection of values of type {@code N}, given the largest
-     * value of type {@code N}, which is the identity value for the
-     * minimum operation over {@code N}s.
-     */
-    public MinFn(NumT initialValue) {
-      this.initialValue = initialValue;
+    private final T identity;
+    private final Comparator<? super T> comparator;
+
+    private <ComparatorT extends Comparator<? super T> & Serializable> MinFn(
+        T identity, ComparatorT comparator) {
+      this.identity = identity;
+      this.comparator = comparator;
+    }
+
+    public static <T, ComparatorT extends Comparator<? super T> & Serializable>
+    MinFn<T> of(T identity, ComparatorT comparator) {
+      return new MinFn<T>(identity, comparator);
+    }
+
+    public static <T, ComparatorT extends Comparator<? super T> & Serializable>
+    MinFn<T> of(ComparatorT comparator) {
+      return new MinFn<T>(null, comparator);
+    }
+
+    public static <T extends Comparable<? super T>> MinFn<T> naturalOrder(T identity) {
+      return new MinFn<T>(identity, new Top.Largest<T>());
+    }
+
+    public static <T extends Comparable<? super T>> MinFn<T> naturalOrder() {
+      return new MinFn<T>(null, new Top.Largest<T>());
     }
 
     @Override
-    public NumT apply(NumT a, NumT b) {
-      return a.compareTo(b) <= 0 ? a : b;
+    public T identity() {
+      return identity;
     }
 
     @Override
-    public NumT identity() {
-      return initialValue;
+    public T apply(T left, T right) {
+      return comparator.compare(left, right) <= 0 ? left : right;
     }
   }
 
   /**
-   * A {@code CombineFn} that computes the minimum of a collection
-   * of {@code Integer}s, useful as an argument to
-   * {@link Combine#globally} or {@link Combine#perKey}.
+   * A {@code CombineFn} that computes the minimum of a collection of {@code Integer}s, useful as an
+   * argument to {@link Combine#globally} or {@link Combine#perKey}.
    */
   public static class MinIntegerFn extends MinFn<Integer> implements
       CounterProvider<Integer> {
     private static final long serialVersionUID = 0;
 
     public MinIntegerFn() {
-      super(Integer.MAX_VALUE);
+      super(Integer.MAX_VALUE, new Top.Largest<Integer>());
     }
 
     @Override
@@ -174,16 +222,15 @@ public Counter<Integer> getCounter(String name) {
   }
 
   /**
-   * A {@code CombineFn} that computes the minimum of a collection
-   * of {@code Long}s, useful as an argument to
-   * {@link Combine#globally} or {@link Combine#perKey}.
+   * A {@code CombineFn} that computes the minimum of a collection of {@code Long}s, useful as an
+   * argument to {@link Combine#globally} or {@link Combine#perKey}.
    */
   public static class MinLongFn extends MinFn<Long> implements
       CounterProvider<Long> {
     private static final long serialVersionUID = 0;
 
     public MinLongFn() {
-      super(Long.MAX_VALUE);
+      super(Long.MAX_VALUE, new Top.Largest<Long>());
     }
 
     @Override
@@ -193,16 +240,15 @@ public Counter<Long> getCounter(String name) {
   }
 
   /**
-   * A {@code CombineFn} that computes the minimum of a collection
-   * of {@code Double}s, useful as an argument to
-   * {@link Combine#globally} or {@link Combine#perKey}.
+   * A {@code CombineFn} that computes the minimum of a collection of {@code Double}s, useful as an
+   * argument to {@link Combine#globally} or {@link Combine#perKey}.
    */
   public static class MinDoubleFn extends MinFn<Double> implements
       CounterProvider<Double> {
     private static final long serialVersionUID = 0;
 
     public MinDoubleFn() {
-      super(Double.POSITIVE_INFINITY);
+      super(Double.POSITIVE_INFINITY, new Top.Largest<Double>());
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index 0b858d6ae2b06..46b8586a0e755 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -314,7 +314,7 @@ Combine.Globally<T, List<T>> largest(int count) {
    * A {@code Serializable} {@code Comparator} that that uses the compared elements' natural
    * ordering.
    */
-  public static class Largest<T extends Comparable<T>>
+  public static class Largest<T extends Comparable<? super T>>
       implements Comparator<T>, Serializable {
     private static final long serialVersionUID = 0L;
 
@@ -328,7 +328,7 @@ public int compare(T a, T b) {
    * {@code Serializable} {@code Comparator} that that uses the reverse of the compared elements'
    * natural ordering.
    */
-  public static class Smallest<T extends Comparable<T>>
+  public static class Smallest<T extends Comparable<? super T>>
       implements Comparator<T>, Serializable {
     private static final long serialVersionUID = 0L;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
index 3240de1b0c255..914f95dc2e736 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SimpleStatsFnsTest.java
@@ -18,11 +18,13 @@
 
 import static org.junit.Assert.assertEquals;
 
+import org.joda.time.Instant;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 
 /**
@@ -74,6 +76,23 @@ public TestCase(NumT min, NumT max, NumT sum, NumT... values) {
       new TestCase<>(3, 3, 3, 3),
       new TestCase<>(Integer.MAX_VALUE, Integer.MIN_VALUE, 0));
 
+  @Test
+  public void testInstantStats() {
+    assertEquals(new Instant(1000), Min.MinFn.<Instant>naturalOrder().apply(
+        Arrays.asList(new Instant(1000), new Instant(2000))));
+    assertEquals(null, Min.MinFn.<Instant>naturalOrder().apply(
+        Collections.<Instant>emptyList()));
+    assertEquals(new Instant(5000), Min.MinFn.<Instant>naturalOrder(new Instant(5000)).apply(
+        Collections.<Instant>emptyList()));
+
+    assertEquals(new Instant(2000), Max.MaxFn.<Instant>naturalOrder().apply(
+        Arrays.asList(new Instant(1000), new Instant(2000))));
+    assertEquals(null, Max.MaxFn.<Instant>naturalOrder().apply(
+        Collections.<Instant>emptyList()));
+    assertEquals(new Instant(5000), Max.MaxFn.<Instant>naturalOrder(new Instant(5000)).apply(
+        Collections.<Instant>emptyList()));
+  }
+
   @Test
   public void testDoubleStats() {
     for (TestCase<Double> t : DOUBLE_CASES) {

From 43a6f9d4f3c7839da09292457925fa858c73dbda Mon Sep 17 00:00:00 2001
From: chernyak <chernyak@google.com>
Date: Mon, 13 Jul 2015 17:39:51 -0700
Subject: [PATCH 0748/1541] Java streaming harness support for stable
 computation ids.

Receives system stage name to computation_id mapping from windmill. Backwards compatible.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98172476
---
 .../worker/StreamingDataflowWorker.java       | 23 +++++++++++++------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 2b79098476c2e..0270dccf7ec6c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -174,6 +174,7 @@ public static void main(String[] args) throws Exception {
 
   // Map of user state names to system state names.
   private ConcurrentMap<String, String> stateNameMap;
+  private ConcurrentMap<String, String> systemNameToComputationIdMap;
 
   private ThreadFactory threadFactory;
   private BoundedQueueExecutor workUnitExecutor;
@@ -196,6 +197,7 @@ public StreamingDataflowWorker(
     this.readerCache = new ConcurrentHashMap<>();
     this.commitCallbacks = new ConcurrentHashMap<>();
     this.stateNameMap = new ConcurrentHashMap<>();
+    this.systemNameToComputationIdMap = new ConcurrentHashMap<>();
     for (MapTask mapTask : mapTasks) {
       addComputation(mapTask);
     }
@@ -290,14 +292,17 @@ public void runStatusServer(int statusPort) {
   }
 
   private void addComputation(MapTask mapTask) {
-    String computation = mapTask.getSystemName();
-    if (!instructionMap.containsKey(computation)) {
-      LOG.info("Adding config for {}: {}", computation, mapTask);
-      outputMap.put(computation, new ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest>());
-      instructionMap.put(computation, mapTask);
-      mapTaskExecutors.put(computation, new ConcurrentLinkedQueue<WorkerAndContext>());
+    String computationId =
+        systemNameToComputationIdMap.containsKey(mapTask.getSystemName())
+            ? systemNameToComputationIdMap.get(mapTask.getSystemName())
+            : mapTask.getSystemName();
+    if (!instructionMap.containsKey(computationId)) {
+      LOG.info("Adding config for {}: {}", computationId, mapTask);
+      outputMap.put(computationId, new ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest>());
+      instructionMap.put(computationId, mapTask);
+      mapTaskExecutors.put(computationId, new ConcurrentLinkedQueue<WorkerAndContext>());
       readerCache.put(
-          computation, new ConcurrentHashMap<ByteString, UnboundedSource.UnboundedReader<?>>());
+          computationId, new ConcurrentHashMap<ByteString, UnboundedSource.UnboundedReader<?>>());
     }
   }
 
@@ -545,6 +550,10 @@ private void getConfig(String computation) {
         Windmill.GetConfigRequest.newBuilder().addComputations(computation).build();
 
     Windmill.GetConfigResponse response = windmillServer.getConfig(request);
+    for (Windmill.GetConfigResponse.SystemNameToComputationIdMapEntry entry :
+        response.getSystemNameToComputationIdMapList()) {
+      systemNameToComputationIdMap.put(entry.getSystemName(), entry.getComputationId());
+    }
     for (String serializedMapTask : response.getCloudWorksList()) {
       try {
         addComputation(parseMapTask(serializedMapTask));

From 4b527da606b71823583d8f316fdc7342bf4ea315 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 13 Jul 2015 21:40:09 -0700
Subject: [PATCH 0749/1541] Updates Windmill API to support reading watermark
 holds

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98184360
---
 sdk/src/main/proto/windmill.proto | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index 951bc172c038d..db145b9fac7ac 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -151,6 +151,7 @@ message KeyedGetDataRequest {
   required fixed64 work_token = 2;
   repeated TagValue values_to_fetch = 3;
   repeated TagList lists_to_fetch = 4;
+  repeated WatermarkHold watermark_holds_to_fetch = 5;
 }
 
 message ComputationGetDataRequest {
@@ -172,6 +173,7 @@ message KeyedGetDataResponse {
   optional bool failed = 2;
   repeated TagValue values = 3;
   repeated TagList lists = 4;
+  repeated WatermarkHold watermark_holds = 5;
 }
 
 message ComputationGetDataResponse {

From 67a2f3767018f1668e9b6be6be4a95e2ace6740b Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Tue, 14 Jul 2015 06:59:10 -0700
Subject: [PATCH 0750/1541] Replace 'reload' with 'update' terminology

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98210418
---
 .../options/DataflowPipelineDebugOptions.java |  6 ++---
 .../sdk/runners/DataflowPipelineRunner.java   | 27 ++++++++++---------
 .../runners/DataflowPipelineRunnerTest.java   |  8 +++---
 3 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
index 10cb8a0dfd14f..7e987d3c78cf8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
@@ -171,14 +171,14 @@ public Dataflow create(PipelineOptions options) {
   void setPubsubRootUrl(String value);
 
   /**
-   * Whether to reload the currently running pipeline with the same name as this one.
+   * Whether to update the currently running pipeline with the same name as this one.
    */
   @JsonIgnore
   @Description("If set, replace the existing pipeline with the name specified by --jobName with "
       + "this pipeline, preserving state.")
   @Experimental
-  boolean getReload();
-  void setReload(boolean value);
+  boolean getUpdate();
+  void setUpdate(boolean value);
 
   /**
    * Mapping of old PTranform names to new ones, specified as a semicolon-separated list of
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index b8ce34e0339b2..fa6eeb1407048 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -303,18 +303,20 @@ public DataflowPipelineJob run(Pipeline pipeline) {
       }
     }
 
-    String reloadJobId = null;
-    if (options.getReload()) {
-      reloadJobId = getJobIdFromName(options.getJobName());
+    String jobIdToUpdate = null;
+    if (options.getUpdate()) {
+      jobIdToUpdate = getJobIdFromName(options.getJobName());
       newJob.setTransformNameMapping(options.getTransformNameMapping());
     }
     Job jobResult;
     try {
       Dataflow.Projects.Jobs.Create createRequest =
-          dataflowClient.projects().jobs()
-          .create(options.getProject(), newJob);
-      if (reloadJobId != null) {
-        createRequest.setReplaceJobId(reloadJobId);
+          dataflowClient
+              .projects()
+              .jobs()
+              .create(options.getProject(), newJob);
+      if (jobIdToUpdate != null) {
+        createRequest.setReplaceJobId(jobIdToUpdate);
       }
       jobResult = createRequest.execute();
     } catch (GoogleJsonResponseException e) {
@@ -329,11 +331,12 @@ public DataflowPipelineJob run(Pipeline pipeline) {
     // Error::Already_Exists.
     if (jobResult.getClientRequestId() != null && !jobResult.getClientRequestId().isEmpty()
         && !jobResult.getClientRequestId().equals(requestId)) {
-      // If reloading a job.
-      if (options.getReload()) {
-        throw new RuntimeException("The job named " + newJob.getName() + " with id: " + reloadJobId
-            + " has already been updated into job id: " + jobResult.getId()
-            + " and cannot be updated again. ");
+      // If updating a job.
+      if (options.getUpdate()) {
+        throw new RuntimeException(
+            "The job named " + newJob.getName() + " with id: " + jobIdToUpdate
+                + " has already been updated into job id: " + jobResult.getId()
+                + " and cannot be updated again. ");
       } else {
         throw new RuntimeException("There is already an active job named " + newJob.getName()
             + " with id: " + jobResult.getId()
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index c2a3533c07aaa..af5390b332215 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -192,11 +192,11 @@ public void testRunReturnDifferentRequestId() throws IOException {
   }
 
   @Test
-  public void testReload() throws IOException {
+  public void testUpdate() throws IOException {
     ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
 
     DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
-    options.setReload(true);
+    options.setUpdate(true);
     options.setJobName("oldJobName");
     DataflowPipeline p = buildDataflowPipeline(options);
     DataflowPipelineJob job = p.run();
@@ -205,13 +205,13 @@ public void testReload() throws IOException {
   }
 
   @Test
-  public void testReloadNonExistentPipeline() throws IOException {
+  public void testUpdateNonExistentPipeline() throws IOException {
     thrown.expect(IllegalArgumentException.class);
     thrown.expectMessage("Could not find running job named badJobName");
 
     ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
     DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
-    options.setReload(true);
+    options.setUpdate(true);
     options.setJobName("badJobName");
     DataflowPipeline p = buildDataflowPipeline(options);
     p.run();

From 990282f5c495b6ade373a76bb6d8d9ba70286132 Mon Sep 17 00:00:00 2001
From: mariand <mariand@google.com>
Date: Tue, 14 Jul 2015 08:37:15 -0700
Subject: [PATCH 0751/1541] Made ByteCount for ShuffleSink consistent with
 ShuffleSources.

They both now use ShuffleEntry.length(). The position field is only filled for
entries read from shuffle, not written to shuffle, so excluded that from the
length computation.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98217005
---
 .../runners/worker/ChunkingShuffleEntryWriter.java | 14 ++++++--------
 .../sdk/runners/worker/ShuffleEntryWriter.java     |  5 ++---
 .../dataflow/sdk/runners/worker/ShuffleSink.java   |  4 +++-
 .../sdk/util/common/worker/ShuffleEntry.java       |  6 ++++--
 .../sdk/runners/worker/TestShuffleWriter.java      |  3 +--
 5 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleEntryWriter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleEntryWriter.java
index 7a0a6f7cd3519..c8a408d054aff 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleEntryWriter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleEntryWriter.java
@@ -51,14 +51,14 @@ public ChunkingShuffleEntryWriter(ShuffleWriter writer) {
   }
 
   @Override
-  public long put(ShuffleEntry entry) throws IOException {
+  public void put(ShuffleEntry entry) throws IOException {
     if (chunk.size() >= MAX_CHUNK_SIZE) {
       writeChunk();
     }
 
-    return putFixedLengthPrefixedByteArray(entry.getKey(), output)
-        + putFixedLengthPrefixedByteArray(entry.getSecondaryKey(), output)
-        + putFixedLengthPrefixedByteArray(entry.getValue(), output);
+    putFixedLengthPrefixedByteArray(entry.getKey(), output);
+    putFixedLengthPrefixedByteArray(entry.getSecondaryKey(), output);
+    putFixedLengthPrefixedByteArray(entry.getValue(), output);
   }
 
   @Override
@@ -75,15 +75,13 @@ private void writeChunk() throws IOException {
     }
   }
 
-  static int putFixedLengthPrefixedByteArray(byte[] data,
-                                             DataOutputStream output)
+  static void putFixedLengthPrefixedByteArray(byte[] data,
+                                              DataOutputStream output)
       throws IOException {
     if (data == null) {
       data = EMPTY_BYTES;
     }
-    int bytesWritten = output.size();
     output.writeInt(data.length);
     output.write(data, 0, data.length);
-    return output.size() - bytesWritten;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleEntryWriter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleEntryWriter.java
index 4b21a13550f57..bf1c21fed6fe0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleEntryWriter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleEntryWriter.java
@@ -29,10 +29,9 @@
 @NotThreadSafe
 interface ShuffleEntryWriter extends AutoCloseable {
   /**
-   * Writes an entry to a shuffle dataset. Returns the size
-   * in bytes of the data written.
+   * Writes an entry to a shuffle dataset.
    */
-  public long put(ShuffleEntry entry) throws IOException;
+  public void put(ShuffleEntry entry) throws IOException;
 
   @Override
   public void close() throws IOException;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
index 6cde042495346..6ccf09df02be6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
@@ -255,7 +255,9 @@ public long add(WindowedValue<T> windowedElem) throws IOException {
         valueBytes = CoderUtils.encodeToByteArray(windowedElemCoder, windowedElem);
       }
 
-      long bytes = writer.put(new ShuffleEntry(keyBytes, secondaryKeyBytes, valueBytes));
+      ShuffleEntry entry = new ShuffleEntry(keyBytes, secondaryKeyBytes, valueBytes);
+      writer.put(entry);
+      long bytes = entry.length();
       if (perWorkerPerDatasetBytesCounter != null) {
         perWorkerPerDatasetBytesCounter.addValue(bytes);
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntry.java
index 0ece836aa212e..c5f37c6e48c05 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntry.java
@@ -58,9 +58,11 @@ public byte[] getValue() {
     return value;
   }
 
+  /**
+   * Returns the size of this entry in bytes, excluding {@code position}.
+   */
   public int length() {
-    return (position == null ? 0 : position.length)
-        + (key == null ? 0 : key.length)
+    return (key == null ? 0 : key.length)
         + (secondaryKey == null ? 0 : secondaryKey.length)
         + (value == null ? 0 : value.length);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleWriter.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleWriter.java
index de5a5d8a07d5b..c7a2dfc67757e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleWriter.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleWriter.java
@@ -32,7 +32,7 @@ public class TestShuffleWriter implements ShuffleEntryWriter {
   public TestShuffleWriter() { }
 
   @Override
-  public long put(ShuffleEntry entry) {
+  public void put(ShuffleEntry entry) {
     if (closed) {
       throw new AssertionError("shuffle writer already closed");
     }
@@ -40,7 +40,6 @@ public long put(ShuffleEntry entry) {
 
     long size = entry.length();
     sizes.add(size);
-    return size;
   }
 
   @Override

From aaa640446ab668f96c672ca49a2020bf0fb5a00f Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 14 Jul 2015 11:39:43 -0700
Subject: [PATCH 0752/1541] Add support for Count.globally().asSingletonView()

This change removes the mostly extraneous Count.Globally class.
Now Count.globally() returns an appropriately named Combine.Globally
instance, exposing all of its features to the user of Count. This
makes the Count transforms more analogous to other composite Combine
transforms like Max, Mean, Min, and Sum.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98234802
---
 .../cloud/dataflow/sdk/transforms/Count.java  | 173 +++++++-----------
 1 file changed, 70 insertions(+), 103 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
index e4ef0910c562c..b74417269ae74 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
@@ -16,126 +16,59 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
 /**
  * {@code PTransorm}s to count the elements in a {@link PCollection}.
  *
- * <p> {@link PerElement Count.PerElement} can be used to count the number of occurrences of each
- * distinct element in the PCollection. {@link Globally Count.Globally} can
+ * <p> {@link Count#perElement()} can be used to count the number of occurrences of each
+ * distinct element in the PCollection. {@link Count#globally()} can
  * be used to count the total number of elements in a PCollection.
  */
 public class Count {
 
   /**
-   * Returns a {@link Globally Count.Globally} {@link PTransform}
-   * that counts the number of elements in its input {@link PCollection}.
-   *
-   * <p> See {@link Globally Count.Globally} for more details.
+   * Returns a {@link Combine.Globally} {@link PTransform} that counts the number of elements in
+   * its input {@link PCollection}.
    */
-  public static <T> Globally<T> globally() {
-    return new Globally<>();
+  public static <T> Combine.Globally<T, Long> globally() {
+    return Combine.globally(new CountFn<T>()).named("Count.Globally");
   }
 
   /**
-   * Returns a {@link PerElement Count.PerElement} {@link PTransform}
-   * that counts the number of occurrences of each element in its
-   * input {@link PCollection}.
-   *
-   * <p> See {@link PerElement Count.PerElement} for more details.
+   * Returns a {@link Combine.PerKey} {@link PTransform} that counts the number of elements
+   * associated with each key of its input {@link PCollection}.
    */
-  public static <T> PerElement<T> perElement() {
-    return new PerElement<>();
+  public static <K, V> Combine.PerKey<K, V, Long> perKey() {
+    return Combine.<K, V, Long>perKey(new CountFn<V>()).named("Count.PerKey");
   }
 
-  ///////////////////////////////////////
-
   /**
-   * {@code Count.Globally<T>} takes a {@code PCollection<T>} and returns a
-   * {@code PCollection<Long>} containing a single element that is the total
-   * number of elements in the {@code PCollection}.
+   * Returns a {@link PerElement Count.PerElement} {@link PTransform} that counts the number of
+   * occurrences of each element in its input {@link PCollection}.
    *
-   * <p> Example of use:
-   * <pre> {@code
-   * PCollection<String> words = ...;
-   * PCollection<Long> wordCount =
-   *     words.apply(Count.<String>globally());
-   * } </pre>
-   *
-   * @param <T> the type of the elements of the input {@code PCollection}
+   * <p> See {@link PerElement Count.PerElement} for more details.
    */
-  @SuppressWarnings("serial")
-  public static class Globally<T>
-      extends PTransform<PCollection<T>, PCollection<Long>> {
-
-    private final boolean withoutDefaults;
-    private final int fanout;
-
-    public Globally() {
-      this.withoutDefaults = false;
-      this.fanout = 0;
-    }
-
-    private Globally(boolean withoutDefaults, int fanout) {
-      this.withoutDefaults = withoutDefaults;
-      this.fanout = fanout;
-    }
-
-    /**
-     * Returns a {@link PTransform} identical to Globally(), but that does not attempt to
-     * provide a default value in the case of empty input.
-     */
-    public Globally<T> withoutDefaults() {
-      return new Globally<T>(true /* withoutDefaults */, fanout);
-    }
-
-    /**
-     * Returns a {@link PTransform} identical to Globally(), but that uses an intermedate combining
-     * node to improve performance.  See {@link Combine.Globally#withFanout}.
-     */
-    public Globally<T> withHotKeyFanout(int fanout) {
-      return new Globally<T>(withoutDefaults, fanout);
-    }
-
-    @Override
-    public PCollection<Long> apply(PCollection<T> input) {
-      Combine.Globally<Long, Long> sumGlobally;
-      if (withoutDefaults) {
-        sumGlobally = Sum.longsGlobally().withoutDefaults().withFanout(fanout);
-      } else {
-        sumGlobally = Sum.longsGlobally().withFanout(fanout);
-      }
-      return
-          input
-          .apply(ParDo.named("Init")
-                 .of(new DoFn<T, Long>() {
-                     @Override
-                     public void processElement(ProcessContext c) {
-                       c.output(1L);
-                     }
-                   }))
-          .apply(sumGlobally);
-    }
+  public static <T> PerElement<T> perElement() {
+    return new PerElement<>();
   }
 
   /**
    * {@code Count.PerElement<T>} takes a {@code PCollection<T>} and returns a
-   * {@code PCollection<KV<T, Long>>} representing a map from each
-   * distinct element of the input {@code PCollection} to the number of times
-   * that element occurs in the input.  Each of the keys in the output
-   * {@code PCollection} is unique.
+   * {@code PCollection<KV<T, Long>>} representing a map from each distinct element of the input
+   * {@code PCollection} to the number of times that element occurs in the input. Each key in the
+   * output {@code PCollection} is unique.
    *
-   * <p> This transform compares two values of type {@code T} by first
-   * encoding each element using the input {@code PCollection}'s
-   * {@code Coder}, then comparing the encoded bytes. Because of this,
-   * the input coder must be deterministic. (See
-   * {@link com.google.cloud.dataflow.sdk.coders.Coder#verifyDeterministic()} for more detail).
+   * <p> This transform compares two values of type {@code T} by first encoding each element using
+   * the input {@code PCollection}'s {@code Coder}, then comparing the encoded bytes. Because of
+   * this, the input coder must be deterministic.
+   * (See {@link com.google.cloud.dataflow.sdk.coders.Coder#verifyDeterministic()} for more detail).
    * Performing the comparison in this manner admits efficient parallel evaluation.
    *
-   * <p> By default, the {@code Coder} of the keys of the output
-   * {@code PCollection} is the same as the {@code Coder} of the
-   * elements of the input {@code PCollection}.
+   * <p> By default, the {@code Coder} of the keys of the output {@code PCollection} is the same as
+   * the {@code Coder} of the elements of the input {@code PCollection}.
    *
    * <p> Example of use:
    * <pre> {@code
@@ -144,27 +77,61 @@ public void processElement(ProcessContext c) {
    *     words.apply(Count.<String>perElement());
    * } </pre>
    *
-   * @param <T> the type of the elements of the input {@code PCollection}, and
-   * the type of the keys of the output {@code PCollection}
+   * @param <T> the type of the elements of the input {@code PCollection}, and the type of the keys
+   * of the output {@code PCollection}
    */
-  @SuppressWarnings("serial")
   public static class PerElement<T>
       extends PTransform<PCollection<T>, PCollection<KV<T, Long>>> {
 
+    private static final long serialVersionUID = 0L;
+
     public PerElement() { }
 
     @Override
     public PCollection<KV<T, Long>> apply(PCollection<T> input) {
       return
           input
-          .apply(ParDo.named("Init")
-                 .of(new DoFn<T, KV<T, Long>>() {
-                     @Override
-                     public void processElement(ProcessContext c) {
-                       c.output(KV.of(c.element(), 1L));
-                     }
-                   }))
-          .apply(Sum.<T>longsPerKey());
+          .apply(ParDo.named("Init").of(new DoFn<T, KV<T, Void>>() {
+            private static final long serialVersionUID = 0L;
+
+            @Override
+            public void processElement(ProcessContext c) {
+              c.output(KV.of(c.element(), (Void) null));
+            }
+          }))
+          .apply(Count.<T, Void>perKey());
+    }
+  }
+
+  /**
+   * A {@link CombineFn} that counts elements.
+   */
+  private static class CountFn<T> extends CombineFn<T, Long, Long> {
+
+    private static final long serialVersionUID = 0L;
+
+    @Override
+    public Long createAccumulator() {
+      return 0L;
+    }
+
+    @Override
+    public Long addInput(Long accumulator, T input) {
+      return accumulator + 1;
+    }
+
+    @Override
+    public Long mergeAccumulators(Iterable<Long> accumulators) {
+      long result = 0L;
+      for (Long accum : accumulators) {
+        result += accum;
+      }
+      return result;
+    }
+
+    @Override
+    public Long extractOutput(Long accumulator) {
+      return accumulator;
     }
   }
 }

From a22f8fdc376ad977fab4770d663722648c6f1bb4 Mon Sep 17 00:00:00 2001
From: laraschmidt <laraschmidt@google.com>
Date: Tue, 14 Jul 2015 12:02:38 -0700
Subject: [PATCH 0753/1541] Use bigdataoss gcs-connecter lib for GCS uploads
 and reads

Use the bigdataoss gcs-connector's util and gcsio library for GCS uploads and reads. Also several util classes are available for general util.

----Release Notes----
Moved to use the bigdataoss GCS connector.
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98236998
---
 sdk/pom.xml                                   |  12 +
 .../cloud/dataflow/sdk/io/BigQueryIO.java     |   3 +-
 .../cloud/dataflow/sdk/io/FileBasedSink.java  |   2 +-
 .../dataflow/BigQueryIOTranslator.java        |   2 +-
 .../dataflow/sdk/util/ApiErrorExtractor.java  | 104 ---
 .../sdk/util/BigQueryTableInserter.java       |   3 +-
 .../cloud/dataflow/sdk/util/GcsUtil.java      |  22 +-
 .../dataflow/sdk/util/ResilientOperation.java | 169 -----
 .../sdk/util/RetryBoundedBackOff.java         |  66 --
 .../dataflow/sdk/util/RetryDeterminer.java    |  93 ---
 .../sdk/util/gcsio/ClientRequestHelper.java   |  40 -
 .../gcsio/GoogleCloudStorageExceptions.java   |  82 --
 .../gcsio/GoogleCloudStorageReadChannel.java  | 702 ------------------
 .../gcsio/GoogleCloudStorageWriteChannel.java | 385 ----------
 ...gingMediaHttpUploaderProgressListener.java |  91 ---
 .../sdk/util/gcsio/StorageResourceId.java     | 165 ----
 .../cloud/dataflow/sdk/util/GcsUtilTest.java  |   6 +-
 ...MediaHttpUploaderProgressListenerTest.java |  85 ---
 18 files changed, 40 insertions(+), 1992 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiErrorExtractor.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ResilientOperation.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryBoundedBackOff.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryDeterminer.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/ClientRequestHelper.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageExceptions.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListener.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/StorageResourceId.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListenerTest.java

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 18f9b921f907b..f7572889eb771 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -462,6 +462,18 @@
       </exclusions>
     </dependency>
 
+    <dependency>
+      <groupId>com.google.cloud.bigdataoss</groupId>
+      <artifactId>gcsio</artifactId>
+      <version>1.4.1</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.cloud.bigdataoss</groupId>
+      <artifactId>util</artifactId>
+      <version>1.4.1</version>
+    </dependency>
+
     <dependency>
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 19556155ec2e6..e3a8007ccdac0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -49,7 +49,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.util.ApiErrorExtractor;
 import com.google.cloud.dataflow.sdk.util.BigQueryTableInserter;
 import com.google.cloud.dataflow.sdk.util.BigQueryTableRowIterator;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
@@ -63,6 +62,8 @@
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
 
+import com.google.cloud.hadoop.util.ApiErrorExtractor;
+
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
index 02a1e1fb73d95..2839d75c3d6b3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
@@ -28,7 +28,6 @@
 import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
 import com.google.cloud.dataflow.sdk.options.GcsOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.ApiErrorExtractor;
 import com.google.cloud.dataflow.sdk.util.FileIOChannelFactory;
 import com.google.cloud.dataflow.sdk.util.GcsIOChannelFactory;
 import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
@@ -36,6 +35,7 @@
 import com.google.cloud.dataflow.sdk.util.MimeTypes;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.cloud.hadoop.util.ApiErrorExtractor;
 import com.google.common.base.Preconditions;
 
 import org.slf4j.Logger;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
index ec2e2d3a198f1..3834a1e70ceae 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
@@ -26,11 +26,11 @@
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
-import com.google.cloud.dataflow.sdk.util.ApiErrorExtractor;
 import com.google.cloud.dataflow.sdk.util.BigQueryTableInserter;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.hadoop.util.ApiErrorExtractor;
 
 import java.io.IOException;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiErrorExtractor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiErrorExtractor.java
deleted file mode 100644
index b58d43f6c3b5c..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiErrorExtractor.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/**
- * Copyright 2015 Google Inc. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.googleapis.json.GoogleJsonError;
-import com.google.api.client.googleapis.json.GoogleJsonResponseException;
-import com.google.api.client.http.HttpStatusCodes;
-import com.google.common.annotations.VisibleForTesting;
-
-import java.io.IOException;
-
-/**
- * Translates exceptions from API calls into higher-level meaning, while allowing injectability
- * for testing how API errors are handled.
- */
-public class ApiErrorExtractor {
-
-  public static final int STATUS_CODE_CONFLICT = 409;
-  public static final int STATUS_CODE_RANGE_NOT_SATISFIABLE = 416;
-
-  /**
-   * Determines if the given exception indicates 'item not found'.
-   */
-  public boolean itemNotFound(IOException e) {
-    if (e instanceof GoogleJsonResponseException) {
-      return (getHttpStatusCode((GoogleJsonResponseException) e)) ==
-          HttpStatusCodes.STATUS_CODE_NOT_FOUND;
-    }
-    return false;
-  }
-
-  /**
-   * Determines if the given GoogleJsonError indicates 'item not found'.
-   */
-  public boolean itemNotFound(GoogleJsonError e) {
-    return e.getCode() == HttpStatusCodes.STATUS_CODE_NOT_FOUND;
-  }
-
-  /**
-   * Checks if HTTP status code indicates the error specified.
-   */
-  private boolean hasHttpCode(IOException e, int code) {
-    if (e instanceof GoogleJsonResponseException) {
-      return (getHttpStatusCode((GoogleJsonResponseException) e)) == code;
-    }
-    return false;
-  }
-
-  /**
-   * Determines if the given exception indicates 'conflict' (already exists).
-   */
-  public boolean alreadyExists(IOException e) {
-    return hasHttpCode(e, STATUS_CODE_CONFLICT);
-  }
-
-  /**
-   * Determines if the given exception indicates 'range not satisfiable'.
-   */
-  public boolean rangeNotSatisfiable(IOException e) {
-    return hasHttpCode(e, STATUS_CODE_RANGE_NOT_SATISFIABLE);
-  }
-
-  /**
-   * Determines if the given exception indicates 'access denied'.
-   */
-  public boolean accessDenied(GoogleJsonResponseException e) {
-    return getHttpStatusCode(e) == HttpStatusCodes.STATUS_CODE_FORBIDDEN;
-  }
-
-  /**
-   * Determines if the given exception indicates 'access denied', recursively checking inner
-   * getCause() if outer exception isn't an instance of the correct class.
-   */
-  public boolean accessDenied(IOException e) {
-    return (e.getCause() != null) &&
-        (e.getCause() instanceof GoogleJsonResponseException) &&
-        accessDenied((GoogleJsonResponseException) e.getCause());
-  }
-
-  /**
-   * Returns HTTP status code from the given exception.
-   *
-   * <p> Note: GoogleJsonResponseException.getStatusCode() method is marked final therefore
-   * it cannot be mocked using Mockito. We use this helper so that we can override it in tests.
-   */
-  @VisibleForTesting
-  int getHttpStatusCode(GoogleJsonResponseException e) {
-    return e.getStatusCode();
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
index ee9a2d1bebc6d..1efc1d2772062 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
@@ -27,6 +27,7 @@
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.WriteDisposition;
+import com.google.cloud.hadoop.util.ApiErrorExtractor;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -294,7 +295,7 @@ public Table tryCreateTable(TableSchema schema) throws IOException {
           .insert(ref.getProjectId(), ref.getDatasetId(), content)
           .execute();
     } catch (IOException e) {
-      if (new ApiErrorExtractor().alreadyExists(e)) {
+      if (new ApiErrorExtractor().itemAlreadyExists(e)) {
         LOG.info("The BigQuery table already exists.");
         return null;
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
index 2877070bec629..06093a431214c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
@@ -26,8 +26,14 @@
 import com.google.cloud.dataflow.sdk.options.GcsOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
-import com.google.cloud.dataflow.sdk.util.gcsio.GoogleCloudStorageReadChannel;
-import com.google.cloud.dataflow.sdk.util.gcsio.GoogleCloudStorageWriteChannel;
+import com.google.cloud.hadoop.gcsio.GoogleCloudStorageReadChannel;
+import com.google.cloud.hadoop.gcsio.GoogleCloudStorageWriteChannel;
+import com.google.cloud.hadoop.gcsio.ObjectWriteConditions;
+import com.google.cloud.hadoop.util.ApiErrorExtractor;
+import com.google.cloud.hadoop.util.AsyncWriteChannelOptions;
+import com.google.cloud.hadoop.util.ClientRequestHelper;
+import com.google.cloud.hadoop.util.ResilientOperation;
+import com.google.cloud.hadoop.util.RetryDeterminer;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ImmutableList;
 
@@ -38,6 +44,7 @@
 import java.nio.channels.SeekableByteChannel;
 import java.nio.channels.WritableByteChannel;
 import java.nio.file.NoSuchFileException;
+import java.util.Collections;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.concurrent.ExecutorService;
@@ -233,7 +240,8 @@ long fileSize(GcsPath path, BackOff backoff, Sleeper sleeper) throws IOException
   public SeekableByteChannel open(GcsPath path)
       throws IOException {
     return new GoogleCloudStorageReadChannel(storage, path.getBucket(),
-            path.getObject(), errorExtractor);
+            path.getObject(), errorExtractor,
+            new ClientRequestHelper<StorageObject>());
   }
 
   /**
@@ -249,12 +257,18 @@ public SeekableByteChannel open(GcsPath path)
    */
   public WritableByteChannel create(GcsPath path,
       String type) throws IOException {
-    return new GoogleCloudStorageWriteChannel(
+    GoogleCloudStorageWriteChannel channel = new GoogleCloudStorageWriteChannel(
         executorService,
         storage,
+        new ClientRequestHelper<StorageObject>(),
         path.getBucket(),
         path.getObject(),
+        (new AsyncWriteChannelOptions.Builder()).build(),
+        new ObjectWriteConditions(),
+        Collections.<String, String>emptyMap(),
         type);
+    channel.initialize();
+    return channel;
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ResilientOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ResilientOperation.java
deleted file mode 100644
index 2b9a03a5fbe2b..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ResilientOperation.java
+++ /dev/null
@@ -1,169 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- *  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software distributed under the
- * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
- * express or implied. See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
-import com.google.api.client.util.BackOff;
-import com.google.api.client.util.Preconditions;
-import com.google.api.client.util.Sleeper;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.concurrent.Callable;
-
-/**
- * A class which defines static functions to be called to make a user-provided function more
- * resilient by attempting retries.
- */
-public class ResilientOperation {
-  // Logger.
-  private static final Logger LOG = LoggerFactory.getLogger(ResilientOperation.class);
-
-  /**
-   * Retries the given executable function in the case of transient errors defined by the
-   * RetryDeterminer.
-   * <p>
-   * Does not support unchecked exceptions that are not instances of RuntimeException.
-   *
-   * @param callable CheckedCallable to retry execution of
-   * @param backoff BackOff to determine how long to sleep for
-   * @param retryDet RetryDeterminer to determine when to retry
-   * @param classType class type of X
-   * @param sleeper Used to sleep
-   * @param <T> Type of object returned by the call.
-   * @param <X> Type of exception thrown by the call.
-   * @throws X What is thrown from the executable or the RetryDeterminer
-   * @throws InterruptedException - Exception thrown from sleep
-   */
- @SuppressWarnings("unchecked")
-  public static <T, X extends Exception> T retry(CheckedCallable<T, X> callable, BackOff backoff,
-      RetryDeterminer<? super X> retryDet, Class<X> classType, Sleeper sleeper)
-      throws X, InterruptedException {
-    Preconditions.checkNotNull(backoff, "Must provide a non-null BackOff.");
-    Preconditions.checkNotNull(retryDet, "Must provide a non-null RetryDeterminer.");
-    Preconditions.checkNotNull(sleeper, "Must provide a non-null Sleeper.");
-    Preconditions.checkNotNull(callable, "Must provide a non-null Execitable object.");
-
-    X currentException = null;
-    do {
-      try {
-        return callable.call();
-      } catch (Exception e) {
-        if (classType.isInstance(e)) {  // e is something that extends X
-          currentException = (X) e;
-          if (!retryDet.shouldRetry(currentException)) {
-            throw currentException;
-          }
-        } else {
-          if (e instanceof RuntimeException) {
-            throw (RuntimeException) e;
-          }
-          LOG.warn("Retrying with unchecked exceptions that are not"
-              + " RuntimeExceptions is not supported.");
-          throw new RuntimeException("Retrying with unchecked exceptions that are not"
-              + " RuntimeExceptions is not supported.", e);
-        }
-      }
-    } while (nextSleep(backoff, sleeper, currentException));
-    LOG.warn("Attempted retries failed.");
-    throw currentException;
-  }
-
-  /**
-   * Retries the given executable function in the case of transient errors defined by the
-   * RetryDeterminer and uses default sleeper.
-   *
-   * @param callable CheckedCallable to retry execution of
-   * @param backoff BackOff to determine how long to sleep for
-   * @param retryDet RetryDeterminer to determine when to retry
-   * @param classType class type of X
-   * @param <T> Type of object returned by the call.
-   * @param <X> Type of exception thrown by the call.
-   * @throws X What is thrown from the executable or the RetryDeterminer
-   * @throws InterruptedException - Exception thrown from sleep
-   */
-  public static <T, X extends Exception> T retry(CheckedCallable<T, X> callable, BackOff backoff,
-      RetryDeterminer<? super X> retryDet, Class<X> classType) throws X, InterruptedException {
-    return retry(callable, backoff, retryDet, classType, Sleeper.DEFAULT);
-  }
-
-  /**
-   * Determines the amount to sleep for and sleeps if needed.
-   *
-   * @param backoff BackOff to determine how long to sleep for
-   * @param sleeper Used to sleep
-   * @param currentException exception that caused the retry and sleep. For logging.
-   * @throws InterruptedException if sleep is interrupted
-   */
-  private static boolean nextSleep(BackOff backoff, Sleeper sleeper, Exception currentException)
-      throws InterruptedException {
-    long backOffTime = 0;
-    try {
-      backOffTime = backoff.nextBackOffMillis();
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-    if (backOffTime == BackOff.STOP) {
-      return false;
-    }
-    LOG.info("Transient exception caught. Sleeping for " + backOffTime + ", then retrying."
-        + currentException);
-    sleeper.sleep(backOffTime);
-    return true;
-  }
-
-  /**
-   * Interface that allows a call that can throw an exception X.
-   * @param <T> Type of object returned by the call.
-   * @param <X> Type of exception thrown by the call.
-   */
-  // TODO: Replace with Guava's CheckedCallable when not in beta.
-  public interface CheckedCallable<T, X extends Exception> extends Callable<T> {
-    @Override
-    T call() throws X;
-  }
-
-  /**
-   * Returns a {@link CheckedCallable} that returns encompasses a
-   * {@link AbstractGoogleClientRequest} and can be used to retry the execute for a
-   * AbstractGoogleClientReqest.
-   *
-   * @param request The AbstractGoogleClientRequest to turn into a {@link CheckedCallable}.
-   * @return a CheckedCallable object that attempts a AbstractGoogleClientRequest
-   */
-  public static <V> CheckedCallable<V, IOException> getGoogleRequestCallable(
-      AbstractGoogleClientRequest<V> request){
-    return new AbstractGoogleClientRequestExecutor<V>(request);
-  }
-
-  /**
-   * Simple class to create a {@link CheckedCallable} from a {@link AbstractGoogleClientRequest}.
-   */
-  private static class AbstractGoogleClientRequestExecutor<T>
-      implements CheckedCallable<T, IOException> {
-    AbstractGoogleClientRequest<T> request;
-    private AbstractGoogleClientRequestExecutor (AbstractGoogleClientRequest<T> request) {
-      this.request = request;
-    }
-    @Override
-    public T call() throws IOException {
-      return request.execute();
-    }
-  }
-}
-
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryBoundedBackOff.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryBoundedBackOff.java
deleted file mode 100644
index a4392e535e524..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryBoundedBackOff.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- *  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software distributed under the
- * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
- * express or implied. See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.util.BackOff;
-import com.google.api.client.util.Preconditions;
-
-import java.io.IOException;
-
-import javax.annotation.concurrent.NotThreadSafe;
-
-/**
- * An implementation of {@link BackOff} that limits the number of calls on another {@link BackOff}.
- * This class will call the same methods of another BackOff until the maximum number of
- * retries are reached, and then it will return {@codeBackOff.STOP}.
- */
-@NotThreadSafe
-public class RetryBoundedBackOff implements BackOff {
-  private int retriesAttempted = 0;
-  private final int maxRetries;
-  private BackOff backoff;
-
-  /**
-   * RetryBoundedBackOff takes a {@link BackOff} and limits the retries.
-   *
-   * @param maxRetries Number of retries to attempt. Must be greater or equal to 0.
-   * @param backoff The underlying {@link BackOff} to use.
-   */
-  public RetryBoundedBackOff(int maxRetries, BackOff backoff) {
-    Preconditions.checkArgument(maxRetries >= 0,
-        "Maximum number of retries must not be less than 0.");
-    this.backoff = backoff;
-    this.maxRetries = maxRetries;
-  }
-
-  @Override
-  public void reset() throws IOException {
-    backoff.reset();
-    retriesAttempted = 0;
-  }
-
-  @Override
-  public long nextBackOffMillis() throws IOException {
-    if (retriesAttempted >= maxRetries) {
-      return BackOff.STOP;
-    }
-    long next = backoff.nextBackOffMillis();
-    if (next == BackOff.STOP) {
-      return BackOff.STOP;
-    }
-    retriesAttempted++;
-    return next;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryDeterminer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryDeterminer.java
deleted file mode 100644
index 051df6fd5785d..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryDeterminer.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- *  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software distributed under the
- * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
- * express or implied. See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.http.HttpResponseException;
-
-import java.io.IOException;
-import java.net.SocketException;
-import java.net.SocketTimeoutException;
-
-import javax.net.ssl.SSLException;
-
-/**
- * This abstract class is designed to tell if an exception is transient and should result in a
- * retry or not, and should result in a returned exception to the caller. Meant to be used with
- * a {@link ResilientOperation}.
- *
- * @param <X> The type of exception you are checking and could possibly return.
- */
-public abstract class RetryDeterminer<X extends Exception> {
-  /**
-   *  Retries when either SOCKET_ERRORS or SERVER_ERRORS would retry.
-   */
-  public static final RetryDeterminer<Exception> DEFAULT = new RetryDeterminer<Exception>() {
-    @Override
-    public boolean shouldRetry(Exception e) {
-      if (e instanceof IOException) {
-        return SOCKET_ERRORS.shouldRetry((IOException) e)
-            || SERVER_ERRORS.shouldRetry((IOException) e);
-      }
-      return false;
-    }
-  };
-
-  /**
-   * Socket errors retry determiner retries on socket exceptions and ssl exceptions. Note:
-   * Assumes that the new SSL connection would be re-established inside the retry. If this is not
-   * true, then retrying after a failed SSL connection would not help.
-   */
-  public static final RetryDeterminer<IOException> SOCKET_ERRORS =
-      new RetryDeterminer<IOException>() {
-    @Override
-    public boolean shouldRetry(IOException e) {
-      /* Assumes that the ssl connection happens within the retry. This is true for the {@link
-       * AbstractGoogleClientRequest} execute functions. SocketTimeoutExceptions are thrown only
-       * for timeouts and it is safe to retry on them even if connect isn't new. We want to pass
-       * any other exceptions back up including InterruptedExceptions.
-       */
-      return e instanceof SSLException || e instanceof SocketException
-          || e instanceof SocketTimeoutException;
-    }
-  };
-
-  /**
-   *  Server errors RetryDeterminer decides to retry on HttpResponseExceptions that return a 500.
-   */
-  public static final RetryDeterminer<IOException> SERVER_ERRORS =
-      new RetryDeterminer<IOException>() {
-    @Override
-    public boolean shouldRetry(IOException e) {
-      if (e instanceof HttpResponseException) {
-        HttpResponseException httpException = (HttpResponseException) e;
-        // TODO: Find what we should do for 500 codes that are not always transient.
-        return httpException.getStatusCode() / 100 == 5;
-      }
-      return false;
-    }
-  };
-
-  /**
-   * Determines if we should attempt a retry depending on the caught exception.
-   * <p>
-   * To indicate that no retry should be made, return false. If no retry,
-   * the exception should be returned to the user.
-   *
-   * @param e Exception of type X that can be examined to determine if a retry is possible.
-   * @return true if should retry, false otherwise
-   */
-  public abstract boolean shouldRetry(X e);
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/ClientRequestHelper.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/ClientRequestHelper.java
deleted file mode 100644
index f851cc72b6d0d..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/ClientRequestHelper.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/**
- * Copyright 2015 Google Inc. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.gcsio;
-
-import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
-import com.google.api.client.http.HttpHeaders;
-
-/**
- * ClientRequestHelper provides wrapper methods around final methods of AbstractGoogleClientRequest
- * to allow overriding them if necessary. Typically should be used for testing purposes only.
- */
-public class ClientRequestHelper {
-  /**
-   * Wraps AbstractGoogleClientRequest.getRequestHeaders().
-   */
-  public HttpHeaders getRequestHeaders(AbstractGoogleClientRequest clientRequest) {
-    return clientRequest.getRequestHeaders();
-  }
-
-  /**
-   * Wraps AbstractGoogleClientRequest.getMediaHttpUploader().
-   */
-  public void setChunkSize(AbstractGoogleClientRequest clientRequest, int chunkSize) {
-    clientRequest.getMediaHttpUploader().setChunkSize(chunkSize);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageExceptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageExceptions.java
deleted file mode 100644
index 268aa75712e13..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageExceptions.java
+++ /dev/null
@@ -1,82 +0,0 @@
-/**
- * Copyright 2015 Google Inc. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.gcsio;
-
-import com.google.api.client.util.Preconditions;
-import com.google.common.base.Strings;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.List;
-
-/**
- * Miscellaneous helper methods for standardizing the types of exceptions thrown by the various
- * GCS-based FileSystems.
- */
-public class GoogleCloudStorageExceptions {
-  /**
-   * Creates FileNotFoundException with suitable message for a GCS bucket or object.
-   */
-  public static FileNotFoundException getFileNotFoundException(
-      String bucketName, String objectName) {
-    Preconditions.checkArgument(!Strings.isNullOrEmpty(bucketName),
-        "bucketName must not be null or empty");
-    if (objectName == null) {
-      objectName = "";
-    }
-    return new FileNotFoundException(
-        String.format("Item not found: %s/%s", bucketName, objectName));
-  }
-
-  /**
-   * Creates a composite IOException out of multiple IOExceptions. If there is only a single
-   * {@code innerException}, it will be returned as-is without wrapping into an outer exception.
-   * it.
-   */
-  public static IOException createCompositeException(
-      List<IOException> innerExceptions) {
-    Preconditions.checkArgument(innerExceptions != null,
-        "innerExceptions must not be null");
-    Preconditions.checkArgument(innerExceptions.size() > 0,
-        "innerExceptions must contain at least one element");
-
-    if (innerExceptions.size() == 1) {
-      return innerExceptions.get(0);
-    }
-
-    IOException combined = new IOException("Multiple IOExceptions.");
-    for (IOException inner : innerExceptions) {
-      combined.addSuppressed(inner);
-    }
-    return combined;
-  }
-
-  /**
-   * Wraps the given IOException into another IOException, adding the given error message and a
-   * reference to the supplied bucket and object. It allows one to know which bucket and object
-   * were being accessed when the exception occurred for an operation.
-   */
-  public static IOException wrapException(IOException e, String message,
-      String bucketName, String objectName) {
-    String name = "bucket: " + bucketName;
-    if (!Strings.isNullOrEmpty(objectName)) {
-      name += ", object: " + objectName;
-    }
-    String fullMessage = String.format("%s: %s", message, name);
-    return new IOException(fullMessage, e);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
deleted file mode 100644
index 0189c6b38db2d..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageReadChannel.java
+++ /dev/null
@@ -1,702 +0,0 @@
-/**
- * Copyright 2015 Google Inc. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.gcsio;
-
-import com.google.api.client.http.HttpResponse;
-import com.google.api.client.util.BackOff;
-import com.google.api.client.util.BackOffUtils;
-import com.google.api.client.util.ExponentialBackOff;
-import com.google.api.client.util.NanoClock;
-import com.google.api.client.util.Preconditions;
-import com.google.api.client.util.Sleeper;
-import com.google.api.services.storage.Storage;
-import com.google.api.services.storage.model.StorageObject;
-import com.google.cloud.dataflow.sdk.util.ApiErrorExtractor;
-import com.google.cloud.dataflow.sdk.util.ResilientOperation;
-import com.google.cloud.dataflow.sdk.util.RetryBoundedBackOff;
-import com.google.cloud.dataflow.sdk.util.RetryDeterminer;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.ByteBuffer;
-import java.nio.channels.Channels;
-import java.nio.channels.ClosedChannelException;
-import java.nio.channels.ReadableByteChannel;
-import java.nio.channels.SeekableByteChannel;
-import java.util.regex.Pattern;
-
-/**
- * Provides seekable read access to GCS.
- */
-public class GoogleCloudStorageReadChannel implements SeekableByteChannel {
-  // Logger.
-  private static final Logger LOG = LoggerFactory.getLogger(GoogleCloudStorageReadChannel.class);
-
-  // Used to separate elements of a Content-Range
-  private static final Pattern SLASH = Pattern.compile("/");
-
-  // GCS access instance.
-  private Storage gcs;
-
-  // Name of the bucket containing the object being read.
-  private String bucketName;
-
-  // Name of the object being read.
-  private String objectName;
-
-  // Read channel.
-  private ReadableByteChannel readChannel;
-
-  // True if this channel is open, false otherwise.
-  private boolean channelIsOpen;
-
-  // Current read position in the channel.
-  private long currentPosition = -1;
-
-  // When a caller calls position(long) to set stream position, we record the target position
-  // and defer the actual seek operation until the caller tries to read from the channel.
-  // This allows us to avoid an unnecessary seek to position 0 that would take place on creation
-  // of this instance in cases where caller intends to start reading at some other offset.
-  // If lazySeekPending is set to true, it indicates that a target position has been set
-  // but the actual seek operation is still pending.
-  private boolean lazySeekPending;
-
-  // Size of the object being read.
-  private long size = -1;
-
-  // Maximum number of automatic retries when reading from the underlying channel without making
-  // progress; each time at least one byte is successfully read, the counter of attempted retries
-  // is reset.
-  // TODO: Wire this setting out to GHFS; it should correspond to adding the wiring for
-  // setting the equivalent value inside HttpRequest.java that determines the low-level retries
-  // during "execute()" calls. The default in HttpRequest.java is also 10.
-  private int maxRetries = 10;
-
-  // Helper delegate for turning IOExceptions from API calls into higher-level semantics.
-  private final ApiErrorExtractor errorExtractor;
-
-  // Sleeper used for waiting between retries.
-  private Sleeper sleeper = Sleeper.DEFAULT;
-
-  // The clock used by ExponentialBackOff to determine when the maximum total elapsed time has
-  // passed doing a series of retries.
-  private NanoClock clock = NanoClock.SYSTEM;
-
-  // Lazily initialized BackOff for sleeping between retries; only ever initialized if a retry is
-  // necessary.
-  private BackOff backOff = null;
-
-  // Settings used for instantiating the default BackOff used for determining wait time between
-  // retries. TODO: Wire these out to be settable by the Hadoop configs.
-  // The number of milliseconds to wait before the very first retry in a series of retries.
-  public static final int DEFAULT_BACKOFF_INITIAL_INTERVAL_MILLIS = 200;
-
-  // The amount of jitter introduced when computing the next retry sleep interval so that when
-  // many clients are retrying, they don't all retry at the same time.
-  public static final double DEFAULT_BACKOFF_RANDOMIZATION_FACTOR = 0.5;
-
-  // The base of the exponent used for exponential backoff; each subsequent sleep interval is
-  // roughly this many times the previous interval.
-  public static final double DEFAULT_BACKOFF_MULTIPLIER = 1.5;
-
-  // The maximum amount of sleep between retries; at this point, there will be no further
-  // exponential backoff. This prevents intervals from growing unreasonably large.
-  public static final int DEFAULT_BACKOFF_MAX_INTERVAL_MILLIS = 10 * 1000;
-
-  // The maximum total time elapsed since the first retry over the course of a series of retries.
-  // This makes it easier to bound the maximum time it takes to respond to a permanent failure
-  // without having to calculate the summation of a series of exponentiated intervals while
-  // accounting for the randomization of backoff intervals.
-  public static final int DEFAULT_BACKOFF_MAX_ELAPSED_TIME_MILLIS = 2 * 60 * 1000;
-
-  // For files that have Content-Encoding: gzip set in the file metadata, the size of the response
-  // from GCS is the size of the compressed file. However, the HTTP client wraps the content
-  // in a GZIPInputStream, so the number of bytes that can be read from the stream may be greater
-  // than the size of the response. In this case, we allow the position in the stream to be greater
-  // than size when the position is validated.
-  private FileEncoding fileEncoding = FileEncoding.UNINITIALIZED;
-
-  // ClientRequestHelper to be used instead of calling final methods in client requests.
-  private static ClientRequestHelper clientRequestHelper = new ClientRequestHelper();
-
-  /**
-   * Constructs an instance of GoogleCloudStorageReadChannel.
-   *
-   * @param gcs storage object instance
-   * @param bucketName name of the bucket containing the object to read
-   * @param objectName name of the object to read
-   * @throws java.io.FileNotFoundException if the given object does not exist
-   * @throws IOException on IO error
-   */
-  public GoogleCloudStorageReadChannel(
-      Storage gcs, String bucketName, String objectName, ApiErrorExtractor errorExtractor)
-      throws IOException {
-    this.gcs = gcs;
-    this.bucketName = bucketName;
-    this.objectName = objectName;
-    this.errorExtractor = errorExtractor;
-    channelIsOpen = true;
-    position(0);
-  }
-
-  /**
-   * Constructs an instance of GoogleCloudStorageReadChannel.
-   * Used for unit testing only. Do not use elsewhere.
-   *
-   * @throws IOException on IO error
-   */
-  GoogleCloudStorageReadChannel()
-      throws IOException {
-    this.errorExtractor = null;
-    channelIsOpen = true;
-    position(0);
-  }
-
-  /**
-   * Sets the ClientRequestHelper to be used instead of calling final methods in client requests.
-   */
-  static void setClientRequestHelper(ClientRequestHelper helper) {
-    clientRequestHelper = helper;
-  }
-
-  /**
-   * Sets the Sleeper used for sleeping between retries.
-   */
-  void setSleeper(Sleeper sleeper) {
-    Preconditions.checkArgument(sleeper != null, "sleeper must not be null!");
-    this.sleeper = sleeper;
-  }
-
-  /**
-   * Sets the clock to be used for determining when max total time has elapsed doing retries.
-   */
-  void setNanoClock(NanoClock clock) {
-    Preconditions.checkArgument(clock != null, "clock must not be null!");
-    this.clock = clock;
-  }
-
-  /**
-   * Sets the backoff for determining sleep duration between retries.
-   *
-   * @param backOff May be null to force the next usage to auto-initialize with default settings.
-   */
-  void setBackOff(BackOff backOff) {
-    this.backOff = backOff;
-  }
-
-  /**
-   * Gets the backoff used for determining sleep duration between retries. May be null if it was
-   * never lazily initialized.
-   */
-  BackOff getBackOff() {
-    return backOff;
-  }
-
-  /**
-   * Helper for reseting the BackOff used for retries. If no backoff is given, a generic
-   * one is initialized.
-   */
-  private BackOff resetOrCreateBackOff() throws IOException{
-    if (backOff != null){
-      backOff.reset();
-    } else {
-      backOff = new ExponentialBackOff.Builder()
-          .setInitialIntervalMillis(DEFAULT_BACKOFF_INITIAL_INTERVAL_MILLIS)
-          .setRandomizationFactor(DEFAULT_BACKOFF_RANDOMIZATION_FACTOR)
-          .setMultiplier(DEFAULT_BACKOFF_MULTIPLIER)
-          .setMaxIntervalMillis(DEFAULT_BACKOFF_MAX_INTERVAL_MILLIS)
-          .setMaxElapsedTimeMillis(DEFAULT_BACKOFF_MAX_ELAPSED_TIME_MILLIS)
-          .setNanoClock(clock)
-          .build();
-    }
-    return backOff;
-  }
-
-  /**
-   * Sets the number of times to automatically retry by re-opening the underlying readChannel
-   * whenever an exception occurs while reading from it. The count of attempted retries is reset
-   * whenever at least one byte is successfully read, so this number of retries refers to retries
-   * made without achieving any forward progress.
-   */
-  public void setMaxRetries(int maxRetries) {
-    this.maxRetries = maxRetries;
-  }
-
-  /**
-   * Reads from this channel and stores read data in the given buffer.
-   *
-   * <p> On unexpected failure, will attempt to close the channel and clean up state.
-   *
-   * @param buffer buffer to read data into
-   * @return number of bytes read or -1 on end-of-stream
-   * @throws java.io.IOException on IO error
-   */
-  @Override
-  public int read(ByteBuffer buffer)
-      throws IOException {
-    throwIfNotOpen();
-
-    // Don't try to read if the buffer has no space.
-    if (buffer.remaining() == 0) {
-      return 0;
-    }
-
-    int totalBytesRead = 0;
-    int retriesAttempted = 0;
-
-    // We read from a streaming source. We may not get all the bytes we asked for
-    // in the first read. Therefore, loop till we either read the required number of
-    // bytes or we reach end-of-stream.
-    do {
-      // Perform a lazy seek if not done already.
-      performLazySeek();
-
-      int remainingBeforeRead = buffer.remaining();
-      try {
-        int numBytesRead = readChannel.read(buffer);
-        checkIOPrecondition(numBytesRead != 0, "Read 0 bytes without blocking");
-        if (numBytesRead < 0) {
-          // Check that we didn't get a premature End of Stream signal by checking the number of
-          // bytes read against the stream size. Unfortunately we don't have information about the
-          // actual size of the data stream when stream compression is used, so we can only ignore
-          // this case here.
-          checkIOPrecondition(fileEncoding == FileEncoding.GZIPPED || currentPosition == size,
-              String.format(
-                  "Received end of stream result before all the file data has been received; "
-                  + "totalBytesRead: %s, currentPosition: %s, size: %s",
-                  totalBytesRead, currentPosition, size));
-          break;
-        }
-        totalBytesRead += numBytesRead;
-        currentPosition += numBytesRead;
-
-        // The count of retriesAttempted is per low-level readChannel.read call; each time we make
-        // progress we reset the retry counter.
-        retriesAttempted = 0;
-      } catch (IOException ioe) {
-        // TODO: Refactor any reusable logic for retries into a separate RetryHelper class.
-        if (retriesAttempted == maxRetries) {
-          LOG.warn("Already attempted max of {} retries while reading '{}'; throwing exception.",
-              maxRetries, StorageResourceId.createReadableString(bucketName, objectName));
-          closeReadChannel();
-          throw ioe;
-        } else {
-          if (retriesAttempted == 0) {
-            // If this is the first of a series of retries, we also want to reset the backOff
-            // to have fresh initial values.
-            resetOrCreateBackOff();
-          }
-
-          ++retriesAttempted;
-          LOG.warn("Got exception while reading '{}'; retry # {}. Sleeping...",
-              StorageResourceId.createReadableString(bucketName, objectName),
-              retriesAttempted, ioe);
-
-          try {
-            boolean backOffSuccessful = BackOffUtils.next(sleeper, backOff);
-            if (!backOffSuccessful) {
-              LOG.warn("BackOff returned false; maximum total elapsed time exhausted. Giving up "
-                      + "after {} retries for '{}'", retriesAttempted,
-                      StorageResourceId.createReadableString(bucketName, objectName));
-              closeReadChannel();
-              throw ioe;
-            }
-          } catch (InterruptedException ie) {
-            LOG.warn("Interrupted while sleeping before retry."
-                + "Giving up after {} retries for '{}'", retriesAttempted,
-                StorageResourceId.createReadableString(bucketName, objectName));
-            ioe.addSuppressed(ie);
-            closeReadChannel();
-            throw ioe;
-          }
-          LOG.info("Done sleeping before retry for '{}'; retry # {}.",
-              StorageResourceId.createReadableString(bucketName, objectName),
-              retriesAttempted);
-
-          if (buffer.remaining() != remainingBeforeRead) {
-            int partialRead = remainingBeforeRead - buffer.remaining();
-            LOG.info("Despite exception, had partial read of {} bytes; resetting retry count.",
-                partialRead);
-            retriesAttempted = 0;
-            totalBytesRead += partialRead;
-            currentPosition += partialRead;
-          }
-
-          // Close the channel and mark it to be reopened on next performLazySeek.
-          closeReadChannel();
-          lazySeekPending = true;
-
-        }
-      } catch (RuntimeException r) {
-        closeReadChannel();
-        throw r;
-      }
-    } while (buffer.remaining() > 0);
-
-    // If this method was called when the stream was already at EOF
-    // (indicated by totalBytesRead == 0) then return EOF else,
-    // return the number of bytes read.
-    boolean isEndOfStream = (totalBytesRead == 0);
-    if (isEndOfStream) {
-      // Check that we didn't get a premature End of Stream signal by checking the number of bytes
-      // read against the stream size. Unfortunately we don't have information about the actual size
-      // of the data stream when stream compression is used, so we can only ignore this case here.
-      checkIOPrecondition(fileEncoding == FileEncoding.GZIPPED || currentPosition == size,
-          String.format("Failed to read any data before all the file data has been received; "
-              + "currentPosition: %s, size: %s", currentPosition, size));
-      return -1;
-    } else {
-      return totalBytesRead;
-    }
-  }
-
-  @Override
-  public int write(ByteBuffer src) throws IOException {
-    throw new UnsupportedOperationException("Cannot mutate read-only channel");
-  }
-
-  /**
-   * Tells whether this channel is open.
-   *
-   * @return a value indicating whether this channel is open
-   */
-  @Override
-  public boolean isOpen() {
-    return channelIsOpen;
-  }
-
- /**
-   * Closes the underlying {@link ReadableByteChannel}.
-   *
-   * <p>Catches and ignores all exceptions as there is not a lot the user can do to fix errors here
-   * and a new connection will be needed. Especially SSLExceptions since the there's a high
-   * probability that SSL connections would be broken in a way that causes
-   * {@link Channel#close()} itself to throw an exception, even though underlying
-   * sockets have already been cleaned up; close() on an SSLSocketImpl requires a shutdown
-   * handshake in order to shutdown cleanly, and if the connection has been broken already, then
-   * this is not possible, and the SSLSocketImpl was already responsible for performing local
-   * cleanup at the time the exception was raised.
-   */
-  protected void closeReadChannel() {
-    if (readChannel != null) {
-      try {
-        readChannel.close();
-      } catch (Exception e) {
-        LOG.debug("Got an exception on readChannel.close(); ignoring it.", e);
-      } finally {
-        readChannel = null;
-      }
-    }
-  }
-
-  /**
-   * Closes this channel.
-   *
-   * @throws IOException on IO error
-   */
-  @Override
-  public void close() throws IOException {
-    if (!channelIsOpen) {
-      LOG.warn(
-          "Channel for {} is not open.",
-          StorageResourceId.createReadableString(bucketName, objectName));
-      return;
-    }
-    channelIsOpen = false;
-    closeReadChannel();
-  }
-
-  /**
-   * Returns this channel's current position.
-   *
-   * @return this channel's current position
-   */
-  @Override
-  public long position()
-      throws IOException {
-    throwIfNotOpen();
-    return currentPosition;
-  }
-
-  /**
-   * Sets this channel's position.
-   *
-   * @param newPosition the new position, counting the number of bytes from the beginning.
-   * @return this channel instance
-   * @throws java.io.FileNotFoundException if the underlying object does not exist.
-   * @throws IOException on IO error
-   */
-  @Override
-  public SeekableByteChannel position(long newPosition)
-      throws IOException {
-    throwIfNotOpen();
-
-    // If the position has not changed, avoid the expensive operation.
-    if (newPosition == currentPosition) {
-      return this;
-    }
-
-    validatePosition(newPosition);
-    currentPosition = newPosition;
-    lazySeekPending = true;
-    return this;
-  }
-
-  /**
-   * Returns size of the object to which this channel is connected.
-   *
-   * @return size of the object to which this channel is connected
-   * @throws IOException on IO error
-   */
-  @Override
-  public long size()
-      throws IOException {
-    throwIfNotOpen();
-    // Perform a lazy seek if not done already so that size of this channel is set correctly.
-    performLazySeek();
-    return size;
-  }
-
-  @Override
-  public SeekableByteChannel truncate(long size) throws IOException {
-    throw new UnsupportedOperationException("Cannot mutate read-only channel");
-  }
-
-  /**
-   * Sets size of this channel to the given value.
-   */
-  protected void setSize(long size) {
-    this.size = size;
-  }
-
-  /**
-   * Validates that the given position is valid for this channel.
-   */
-  protected void validatePosition(long newPosition) {
-    // Validate: 0 <= newPosition
-    if (newPosition < 0) {
-      throw new IllegalArgumentException(
-          String.format("Invalid seek offset: position value (%d) must be >= 0", newPosition));
-    }
-
-    // Validate: newPosition < size
-    // Note that we access this.size directly rather than calling size() to avoid initiating
-    // lazy seek that leads to recursive error. We validate newPosition < size only when size of
-    // this channel has been computed by a prior call. This means that position could be
-    // potentially set to an invalid value (>= size) by position(long). However, that error
-    // gets caught during lazy seek.
-    // If a file is gzip encoded, the size of the response may be less than the number of bytes
-    // that can be read. In this case, the new position may be a valid offset, and we proceed.
-    // If not, then the size of the response is the number of bytes that can be read and we throw
-    // an exception for an invalid seek.
-    if ((size >= 0) && (newPosition >= size) && (fileEncoding != FileEncoding.GZIPPED)) {
-      throw new IllegalArgumentException(String.format(
-          "Invalid seek offset: position value (%d) must be between 0 and %d", newPosition, size));
-    }
-  }
-
-  /**
-   * Seeks to the given position in the underlying stream.
-   *
-   * <p>Note: Seek is an expensive operation because a new stream is opened each time.
-   *
-   * @throws java.io.FileNotFoundException if the underlying object does not exist.
-   * @throws IOException on IO error
-   */
-  private void performLazySeek()
-      throws IOException {
-
-    // Return quickly if there is no pending seek operation.
-    if (!lazySeekPending) {
-      return;
-    }
-
-    // Close the underlying channel if it is open.
-    closeReadChannel();
-
-    // Open the stream and create the channel.
-    InputStream objectContentStream = openStreamAndSetMetadata(currentPosition);
-    readChannel = Channels.newChannel(objectContentStream);
-
-    lazySeekPending = false;
-  }
-
-  /**
-   * Retrieve the object's metadata from GCS.
-   *
-   * @throws IOException on IO error.
-   */
-  protected StorageObject getMetadata() throws IOException {
-    Storage.Objects.Get getObject = gcs.objects().get(bucketName, objectName);
-    try {
-      return ResilientOperation.retry(
-          ResilientOperation.getGoogleRequestCallable(getObject),
-          new RetryBoundedBackOff(3, resetOrCreateBackOff()),
-          RetryDeterminer.SOCKET_ERRORS, IOException.class, sleeper);
-    } catch (IOException e) {
-      if (errorExtractor.itemNotFound(e)) {
-        throw GoogleCloudStorageExceptions.getFileNotFoundException(bucketName, objectName);
-      }
-      String msg =
-          "Error reading " + StorageResourceId.createReadableString(bucketName, objectName);
-      throw new IOException(msg, e);
-    } catch (InterruptedException e) {  // From the sleep
-      throw new IOException("Thread interrupt received.", e);
-    }
-  }
-
-  /**
-   * Returns the FileEncoding of a file given its metadata. Currently supports GZIPPED and OTHER.
-   *
-   * @param metadata the object's metadata.
-   * @return FileEncoding.GZIPPED if the response from GCS will have gzip encoding or
-   *         FileEncoding.OTHER otherwise.
-   */
-  protected FileEncoding getEncoding(StorageObject metadata) {
-    String contentEncoding = metadata.getContentEncoding();
-    return contentEncoding != null && contentEncoding.contains("gzip") ? FileEncoding.GZIPPED
-        : FileEncoding.OTHER;
-  }
-
-  /**
-   * Set the size of the content.
-   *
-   * <p>First, we examine the Content-Length header.  If it does not exists, we then look for and
-   * parse the Content-Range header. If there is no way to determine the content length, an
-   * exception is thrown. If the Content-Length header is present, then the offset is added to this
-   * value (i.e., offset is the number of bytes that were not requested).
-   *
-   * @param response response to parse.
-   * @param offset the number of bytes that were not requested.
-   * @throws IOException on IO error.
-   */
-  protected void setSize(HttpResponse response, long offset) throws IOException {
-    String contentRange = response.getHeaders().getContentRange();
-    if (response.getHeaders().getContentLength() != null) {
-      size = response.getHeaders().getContentLength() + offset;
-    } else if (contentRange != null) {
-      String sizeStr = SLASH.split(contentRange)[1];
-      try {
-        size = Long.parseLong(sizeStr);
-      } catch (NumberFormatException e) {
-        throw new IOException(
-            "Could not determine size from response from Content-Range: " + contentRange, e);
-      }
-    } else {
-      throw new IOException("Could not determine size of response");
-    }
-  }
-
-  /**
-   * Opens the underlying stream, sets its position to the given value and initializes the object's
-   * metadata (size and encoding).
-   *
-   * <p>If the file encoding in GCS is gzip (and therefore the HTTP client will attempt to
-   * decompress it), the entire file is always requested and we seek to the position requested. If
-   * the file encoding is not gzip, only the remaining bytes to be read are requested from GCS.
-   *
-   * @param newPosition position to seek into the new stream.
-   * @throws IOException on IO error
-   */
-  protected InputStream openStreamAndSetMetadata(long newPosition)
-      throws IOException {
-    if (fileEncoding == FileEncoding.UNINITIALIZED) {
-      StorageObject metadata = getMetadata();
-      fileEncoding = getEncoding(metadata);
-    }
-    validatePosition(newPosition);
-    Storage.Objects.Get getObject = gcs.objects().get(bucketName, objectName);
-    // Set the range on the existing request headers that may have been initialized with things
-    // like user-agent already. If the file is gzip encoded, request the entire file.
-    clientRequestHelper.getRequestHeaders(getObject).setRange(
-        String.format("bytes=%d-", fileEncoding == FileEncoding.GZIPPED ? 0 : newPosition));
-    HttpResponse response;
-    try {
-      response = getObject.executeMedia();
-    } catch (IOException e) {
-      if (errorExtractor.itemNotFound(e)) {
-        throw GoogleCloudStorageExceptions
-            .getFileNotFoundException(bucketName, objectName);
-      } else if (errorExtractor.rangeNotSatisfiable(e)
-                 && newPosition == 0
-                 && size == -1) {
-        // We don't know the size yet (size == -1) and we're seeking to byte 0, but got 'range
-        // not satisfiable'; the object must be empty.
-        LOG.info("Got 'range not satisfiable' for reading {} at position 0; assuming empty.",
-            StorageResourceId.createReadableString(bucketName, objectName));
-        size = 0;
-        return new ByteArrayInputStream(new byte[0]);
-      } else {
-        String msg = String.format("Error reading %s at position %d",
-            StorageResourceId.createReadableString(bucketName, objectName), newPosition);
-        throw new IOException(msg, e);
-      }
-    }
-    InputStream content = null;
-    try {
-      content = response.getContent();
-      // If the file is gzip encoded, we requested the entire file and need to seek in the content
-      // to the desired position.  If it is not, we only requested the bytes we haven't read.
-      setSize(response, fileEncoding == FileEncoding.GZIPPED ? 0 : newPosition);
-      if (fileEncoding == FileEncoding.GZIPPED) {
-        content.skip(newPosition);
-      }
-    } catch (IOException e) {
-      try {
-        if (content != null) {
-          content.close();
-        }
-      } catch (IOException closeException) {  // ignore error on close
-        LOG.debug("Caught exception on close after IOException thrown.", closeException);
-        e.addSuppressed(closeException);
-      }
-      throw e;
-    }
-    return content;
-  }
-
-  /**
-   * Throws if this channel is not currently open.
-   */
-  private void throwIfNotOpen()
-      throws IOException {
-    if (!isOpen()) {
-      throw new ClosedChannelException();
-    }
-  }
-
-  /**
-   * Throws an IOException if precondition is false.
-   *
-   * <p>This method should be used in place of Preconditions.checkState in cases where the
-   * precondition is derived from the status of the IO operation. That makes it possible to retry
-   * the operation by catching IOException.
-   */
-  private void checkIOPrecondition(boolean precondition, String errorMessage) throws IOException {
-    if (!precondition) {
-      throw new IOException(errorMessage);
-    }
-  }
-
-  private static enum FileEncoding {
-    UNINITIALIZED, GZIPPED, OTHER;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
deleted file mode 100644
index e30edf58803be..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/GoogleCloudStorageWriteChannel.java
+++ /dev/null
@@ -1,385 +0,0 @@
-/**
- * Copyright 2015 Google Inc. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.gcsio;
-
-import com.google.api.client.http.HttpHeaders;
-import com.google.api.client.http.InputStreamContent;
-import com.google.api.client.util.Preconditions;
-import com.google.api.services.storage.Storage;
-import com.google.api.services.storage.model.StorageObject;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.PipedInputStream;
-import java.io.PipedOutputStream;
-import java.nio.ByteBuffer;
-import java.nio.channels.Channels;
-import java.nio.channels.ClosedByInterruptException;
-import java.nio.channels.ClosedChannelException;
-import java.nio.channels.WritableByteChannel;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Future;
-
-/**
- * Implements WritableByteChannel to provide write access to GCS.
- */
-public class GoogleCloudStorageWriteChannel
-    implements WritableByteChannel {
-
-  // The minimum logging interval for upload progress.
-  private static final long MIN_LOGGING_INTERVAL_MS = 60000L;
-
-  // Logger.
-  private static final Logger LOG = LoggerFactory.getLogger(GoogleCloudStorageWriteChannel.class);
-
-  // Buffering used in the upload path:
-  // There are a series of buffers used along the upload path. It is important to understand their
-  // function before tweaking their values.
-  //
-  // Note: Most values are already tweaked based on performance measurements. If you want to change
-  // buffer sizes, you should change only 1 buffer size at a time to make sure you understand
-  // the correlation between various buffers and their characteristics.
-  //
-  // Upload path:
-  // Uploading a file involves the following steps:
-  // -- caller creates a write stream. It involves creating a pipe between data writer (controlled
-  // by the caller) and data uploader.
-  // The writer and the uploader are on separate threads. That is, pipe operation is asynchronous
-  // between its
-  // two ends.
-  // -- caller puts data in a ByteBuffer and calls write(ByteBuffer). The write() method starts
-  // writing into sink end of the pipe. It blocks if pipe buffer is full till the other end
-  // reads data to make space.
-  // -- MediaHttpUploader code keeps on reading from the source end of the pipe till it has
-  // uploadBufferSize amount of data.
-  //
-  // The following buffers are involved along the above path:
-  // -- ByteBuffer passed by caller. We have no control over its size.
-  //
-  // -- Pipe buffer.
-  // size = UPLOAD_PIPE_BUFFER_SIZE_DEFAULT (1 MB)
-  // Increasing size does not have noticeable difference on performance.
-  //
-  // -- Buffer used by Java client
-  // code.
-  // size = UPLOAD_CHUNK_SIZE_DEFAULT (64 MB)
-
-  // A pipe that connects write channel used by caller to the input stream used by GCS uploader.
-  // The uploader reads from input stream, which blocks till a caller writes some data to the
-  // write channel (pipeSinkChannel below). The pipe is formed by connecting pipeSink to pipeSource.
-  private PipedOutputStream pipeSink;
-  private PipedInputStream pipeSource;
-
-  // Size of buffer used by upload pipe.
-  private int pipeBufferSize = UPLOAD_PIPE_BUFFER_SIZE_DEFAULT;
-
-  // A channel wrapper over pipeSink.
-  private WritableByteChannel pipeSinkChannel;
-
-  // Upload operation that takes place on a separate thread.
-  private UploadOperation uploadOperation;
-
-  // The future wrapping the upload operation.
-  private Future<StorageObject> uploadOperationFuture;
-
-  // Default GCS upload granularity.
-  private static final int GCS_UPLOAD_GRANULARITY = 8 * 1024 * 1024;
-
-  // Upper limit on object size.
-  // We use less than 250GB limit to avoid potential boundary errors
-  // in scotty/blobstore stack.
-  private static final long UPLOAD_MAX_SIZE = 249 * 1024 * 1024 * 1024L;
-
-  // Chunk size to use. Limit the amount of memory used in low memory
-  // environments such as small AppEngine instances.
-  private static final int UPLOAD_CHUNK_SIZE_DEFAULT =
-      Runtime.getRuntime().maxMemory() < 512 * 1024 * 1024
-      ? GCS_UPLOAD_GRANULARITY : 8 * GCS_UPLOAD_GRANULARITY;
-
-  // If true, we get very high write throughput but writing files larger than UPLOAD_MAX_SIZE
-  // will not succeed. Set it to false to allow larger files at lower throughput.
-  private static boolean limitFileSizeTo250Gb = false;
-
-  // Chunk size to use.
-  static int uploadBufferSize = UPLOAD_CHUNK_SIZE_DEFAULT;
-
-  // Default size of upload buffer.
-  public static final int UPLOAD_PIPE_BUFFER_SIZE_DEFAULT = 1 * 1024 * 1024;
-
-  // ClientRequestHelper to be used instead of calling final methods in client requests.
-  private static ClientRequestHelper clientRequestHelper = new ClientRequestHelper();
-
-  /**
-   * Allows running upload operation on a background thread.
-   */
-  static class UploadOperation
-      implements Callable<StorageObject> {
-
-    // Object to be uploaded. This object declared final for safe object publishing.
-    private final Storage.Objects.Insert insertObject;
-
-    // Read end of the pipe. This object declared final for safe object publishing.
-    private final InputStream pipeSource;
-
-    /**
-     * Constructs an instance of UploadOperation.
-     *
-     * @param insertObject object to be uploaded
-     */
-    public UploadOperation(Storage.Objects.Insert insertObject, InputStream pipeSource) {
-      this.insertObject = insertObject;
-      this.pipeSource = pipeSource;
-    }
-
-    /**
-     * Runs the upload operation.
-     */
-    @Override
-    public StorageObject call() throws Exception {
-      Exception exception = null;
-      try {
-        return insertObject.execute();
-      } catch (Exception e) {
-        exception = e;
-        LOG.error("Upload failure", e);
-      } finally {
-        try {
-          // Close this end of the pipe so that the writer at the other end
-          // will not hang indefinitely.
-          pipeSource.close();
-        } catch (IOException ioe) {
-          LOG.error("Error trying to close pipe.source()", ioe);
-          if (exception != null) {
-            exception.addSuppressed(ioe);
-          } else {
-            exception = ioe;
-          }
-        }
-      }
-      throw exception;
-    }
-  }
-
-  /**
-   * Constructs an instance of GoogleCloudStorageWriteChannel.
-   *
-   * @param threadPool thread pool to use for running the upload operation
-   * @param gcs storage object instance
-   * @param bucketName name of the bucket to create object in
-   * @param objectName name of the object to create
-   * @throws IOException on IO error
-   */
-  public GoogleCloudStorageWriteChannel(
-      ExecutorService threadPool, Storage gcs, String bucketName,
-      String objectName, String contentType)
-      throws IOException {
-    init(threadPool, gcs, bucketName, objectName, contentType);
-  }
-
-  /**
-   * Sets the ClientRequestHelper to be used instead of calling final methods in client requests.
-   */
-  static void setClientRequestHelper(ClientRequestHelper helper) {
-    clientRequestHelper = helper;
-  }
-
-  /**
-   * Writes contents of the given buffer to this channel.
-   * <p>
-   * Note: The data that one writes gets written to a pipe that must not block
-   * if the pipe has sufficient buffer space. A success code returned from this method
-   * does not mean that the specific data was successfully written to the underlying
-   * storage. It simply means that there is no error at present. The data upload
-   * may encounter an error on a separate thread. Such error is not ignored;
-   * it shows up as an exception during a subsequent call to write() or close().
-   * The only way to be sure of successful upload is when the close() method
-   * returns successfully.
-   *
-   * @param buffer buffer to write
-   * @throws IOException on IO error
-   */
-  @Override
-  public int write(ByteBuffer buffer)
-      throws IOException {
-    throwIfNotOpen();
-
-    // No point in writing further if upload failed on another thread.
-    if (uploadOperationFuture.isDone()) {
-      waitForCompletionAndThrowIfUploadFailed();
-    }
-
-    return pipeSinkChannel.write(buffer);
-  }
-
-  /**
-   * Tells whether this channel is open.
-   *
-   * @return a value indicating whether this channel is open
-   */
-  @Override
-  public boolean isOpen() {
-    return (pipeSinkChannel != null) && pipeSinkChannel.isOpen();
-  }
-
-  /**
-   * Closes this channel.
-   * <p>
-   * Note:
-   * The method returns only after all data has been successfully written to GCS
-   * or if there is a non-retry-able error.
-   *
-   * @throws IOException on IO error
-   */
-  @Override
-  public void close()
-      throws IOException {
-    throwIfNotOpen();
-    try {
-      pipeSinkChannel.close();
-      waitForCompletionAndThrowIfUploadFailed();
-    } finally {
-      pipeSinkChannel = null;
-      pipeSink = null;
-      pipeSource = null;
-      uploadOperation = null;
-      uploadOperationFuture = null;
-    }
-  }
-
-  /**
-   * Sets size of upload buffer used.
-   */
-  public static void setUploadBufferSize(int bufferSize) {
-    Preconditions.checkArgument(bufferSize > 0,
-        "Upload buffer size must be great than 0.");
-    if (bufferSize % GCS_UPLOAD_GRANULARITY != 0) {
-      LOG.warn("Upload buffer size should be a multiple of {} for best performance, got {}",
-          GCS_UPLOAD_GRANULARITY, bufferSize);
-    }
-    GoogleCloudStorageWriteChannel.uploadBufferSize = bufferSize;
-  }
-
-  /**
-   * Enables or disables hard limit of 250GB on size of uploaded files.
-   * <p>
-   * If enabled, we get very high write throughput but writing files larger than UPLOAD_MAX_SIZE
-   * will not succeed. Set it to false to allow larger files at lower throughput.
-   */
-  @Deprecated
-  public static void enableFileSizeLimit250Gb(boolean enableLimit) {
-    GoogleCloudStorageWriteChannel.limitFileSizeTo250Gb = enableLimit;
-  }
-
-  /**
-   * Initializes an instance of GoogleCloudStorageWriteChannel.
-   *
-   * @param threadPool thread pool to use for running the upload operation
-   * @param gcs storage object instance
-   * @param bucketName name of the bucket in which to create object
-   * @param objectName name of the object to create
-   * @throws IOException on IO error
-   */
-  private void init(
-      ExecutorService threadPool, Storage gcs, String bucketName,
-      String objectName, String contentType)
-      throws IOException {
-    // Create a pipe such that its one end is connected to the input stream used by
-    // the uploader and the other end is the write channel used by the caller.
-    pipeSource = new PipedInputStream(pipeBufferSize);
-    pipeSink = new PipedOutputStream(pipeSource);
-    pipeSinkChannel = Channels.newChannel(pipeSink);
-
-    // Connect pipe-source to the stream used by uploader.
-    InputStreamContent objectContentStream =
-        new InputStreamContent(contentType, pipeSource);
-    // Indicate that we do not know length of file in advance.
-    objectContentStream.setLength(-1);
-    objectContentStream.setCloseInputStream(false);
-    Storage.Objects.Insert insertObject =
-        gcs.objects().insert(bucketName, null, objectContentStream);
-    insertObject.setName(objectName);
-    insertObject.setDisableGZipContent(true);
-    insertObject.getMediaHttpUploader().setProgressListener(
-        new LoggingMediaHttpUploaderProgressListener(objectName, MIN_LOGGING_INTERVAL_MS));
-
-    // Insert necessary http headers to enable 250GB limit+high throughput if so configured.
-    if (limitFileSizeTo250Gb) {
-      HttpHeaders headers = clientRequestHelper.getRequestHeaders(insertObject);
-      headers.set("X-Goog-Upload-Desired-Chunk-Granularity", GCS_UPLOAD_GRANULARITY);
-      headers.set("X-Goog-Upload-Max-Raw-Size", UPLOAD_MAX_SIZE);
-    }
-    // Change chunk size from default value (10MB) to one that yields higher performance.
-    clientRequestHelper.setChunkSize(insertObject, uploadBufferSize);
-
-    // Given that the two ends of the pipe must operate asynchronous relative
-    // to each other, we need to start the upload operation on a separate thread.
-    uploadOperation = new UploadOperation(insertObject, pipeSource);
-    uploadOperationFuture = threadPool.submit(uploadOperation);
-  }
-
-  /**
-   * Throws if this channel is not currently open.
-   *
-   * @throws IOException on IO error
-   */
-  private void throwIfNotOpen()
-      throws IOException {
-    if (!isOpen()) {
-      throw new ClosedChannelException();
-    }
-  }
-
-  /**
-   * Throws if upload operation failed. Propagates any errors.
-   *
-   * @throws IOException on IO error
-   */
-  private StorageObject waitForCompletionAndThrowIfUploadFailed()
-      throws IOException {
-    try {
-      return uploadOperationFuture.get();
-    } catch (InterruptedException e) {
-      // If we were interrupted, we need to cancel the upload operation.
-      uploadOperationFuture.cancel(true);
-      IOException exception = new ClosedByInterruptException();
-      exception.addSuppressed(e);
-      throw exception;
-    } catch (ExecutionException e) {
-      if (e.getCause() instanceof Error) {
-        throw (Error) e.getCause();
-      } else {
-        throw new IOException(
-            String.format("Failed to write to GCS path %s.", getPrintableGCSPath()),
-            e.getCause());
-      }
-    }
-  }
-
-  /**
-   * Gets the printable GCS path of the current channel.
-   */
-  private String getPrintableGCSPath() {
-    // The bucket and object name are fields stored in the uploadOperation.
-    return String.format("gs://%s/%s", uploadOperation.insertObject.getBucket(),
-        uploadOperation.insertObject.getName());
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListener.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListener.java
deleted file mode 100644
index f4807912b129f..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListener.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/**
- * Copyright 2015 Google Inc. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.gcsio;
-
-import com.google.api.client.googleapis.media.MediaHttpUploader;
-import com.google.api.client.googleapis.media.MediaHttpUploader.UploadState;
-import com.google.api.client.googleapis.media.MediaHttpUploaderProgressListener;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-
-/**
- * Logs the status of uploads. At the beginning, during, and
- * at the end of the upload, emits relevant statistics such as how many bytes
- * uploaded and the rate at which the upload is progressing.
- * <p>
- * A new instance of this progress listener should be used for each MediaHttpUploader.
- */
-class LoggingMediaHttpUploaderProgressListener implements MediaHttpUploaderProgressListener {
-  private static final Logger LOG =
-      LoggerFactory.getLogger(MediaHttpUploaderProgressListener.class);
-  private static final double BYTES_IN_MB = 1048576.0;
-  private final long minLoggingInterval;
-  private final String name;
-  private long startTime;
-  private long prevTime;
-  private long prevUploadedBytes;
-
-  /**
-   * Creates a upload progress listener that emits relevant statistics about the
-   * progress of the upload.
-   * @param name The name of the resource being uploaded.
-   * @param minLoggingInterval The minimum amount of time (millis) between logging upload progress.
-   */
-  LoggingMediaHttpUploaderProgressListener(String name, long minLoggingInterval) {
-    this.name = name;
-    this.minLoggingInterval = minLoggingInterval;
-  }
-
-  @Override
-  public void progressChanged(MediaHttpUploader uploader) throws IOException {
-    progressChanged(LOG,
-        uploader.getUploadState(),
-        uploader.getNumBytesUploaded(),
-        System.currentTimeMillis());
-  }
-
-  void progressChanged(Logger log, UploadState uploadState, long bytesUploaded, long currentTime) {
-    switch (uploadState) {
-      case INITIATION_STARTED:
-        startTime = currentTime;
-        prevTime = currentTime;
-        log.debug("Uploading: {}", name);
-        break;
-      case MEDIA_IN_PROGRESS:
-        // Limit messages to be emitted for in progress uploads.
-        if (currentTime > prevTime + minLoggingInterval) {
-          double averageRate = (bytesUploaded / BYTES_IN_MB)
-                               / ((currentTime - startTime) / 1000.0);
-          double currentRate = ((bytesUploaded - prevUploadedBytes) / BYTES_IN_MB)
-                               / ((currentTime - prevTime) / 1000.0);
-          log.debug(String.format(
-              "Uploading: %s Average Rate: %.3f MiB/s, Current Rate: %.3f MiB/s, Total: %.3f MiB",
-              name, averageRate, currentRate, bytesUploaded / BYTES_IN_MB));
-          prevTime = currentTime;
-          prevUploadedBytes = bytesUploaded;
-        }
-        break;
-      case MEDIA_COMPLETE:
-        log.debug("Finished Uploading: {}", name);
-        break;
-      default:
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/StorageResourceId.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/StorageResourceId.java
deleted file mode 100644
index 8a0c2f1275b4a..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsio/StorageResourceId.java
+++ /dev/null
@@ -1,165 +0,0 @@
-/**
- * Copyright 2015 Google Inc. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.gcsio;
-
-import com.google.api.client.util.Preconditions;
-import com.google.common.base.Strings;
-
-import java.util.Objects;
-
-/**
- * Data struct representing either a GCS StorageObject, a GCS Bucket or the GCS root (gs://).
- * If both bucketName and objectName are null, the StorageResourceId refers to GCS root (gs://).
- * If bucketName is non-null, and objectName is null, then this refers to a GCS Bucket. Otherwise,
- * if bucketName and objectName are both non-null, this refers to a GCS StorageObject.
- */
-public class StorageResourceId {
-  // The singleton instance identifying the GCS root (gs://). Both getObjectName() and
-  // getBucketName() will return null.
-  public static final StorageResourceId ROOT = new StorageResourceId();
-
-  // Bucket name of this storage resource to be used with the Google Cloud Storage API.
-  private final String bucketName;
-
-  // Object name of this storage resource to be used with the Google Cloud Storage API.
-  private final String objectName;
-
-  // Human-readable String to be returned by toString(); kept as 'final' member for efficiency.
-  private final String readableString;
-
-  /**
-   * Constructor for a StorageResourceId that refers to the GCS root (gs://). Private because
-   * all external users should just use the singleton StorageResourceId.ROOT.
-   */
-  private StorageResourceId() {
-    this.bucketName = null;
-    this.objectName = null;
-    this.readableString = createReadableString(bucketName, objectName);
-  }
-
-  /**
-   * Constructor for a StorageResourceId representing a Bucket; {@code getObjectName()} will return
-   * null for a StorageResourceId that represents a Bucket.
-   *
-   * @param bucketName The bucket name of the resource. Must be non-empty and non-null.
-   */
-  public StorageResourceId(String bucketName) {
-    Preconditions.checkArgument(!Strings.isNullOrEmpty(bucketName),
-        "bucketName must not be null or empty");
-
-    this.bucketName = bucketName;
-    this.objectName = null;
-    this.readableString = createReadableString(bucketName, objectName);
-  }
-
-  /**
-   * Constructor for a StorageResourceId representing a full StorageObject, including bucketName
-   * and objectName.
-   *
-   * @param bucketName The bucket name of the resource. Must be non-empty and non-null.
-   * @param objectName The object name of the resource. Must be non-empty and non-null.
-   */
-  public StorageResourceId(String bucketName, String objectName) {
-    Preconditions.checkArgument(!Strings.isNullOrEmpty(bucketName),
-        "bucketName must not be null or empty");
-    Preconditions.checkArgument(!Strings.isNullOrEmpty(objectName),
-        "objectName must not be null or empty");
-
-    this.bucketName = bucketName;
-    this.objectName = objectName;
-    this.readableString = createReadableString(bucketName, objectName);
-  }
-
-  /**
-   * Returns true if this StorageResourceId represents a GCS StorageObject; if true, both
-   * {@code getBucketName} and {@code getObjectName} will be non-empty and non-null.
-   */
-  public boolean isStorageObject() {
-    return bucketName != null && objectName != null;
-  }
-
-  /**
-   * Returns true if this StorageResourceId represents a GCS Bucket; if true, then {@code
-   * getObjectName} will return null.
-   */
-  public boolean isBucket() {
-    return bucketName != null && objectName == null;
-  }
-
-  /**
-   * Returns true if this StorageResourceId represents the GCS root (gs://); if true, then
-   * both {@code getBucketName} and {@code getObjectName} will be null.
-   */
-  public boolean isRoot() {
-    return bucketName == null && objectName == null;
-  }
-
-  /**
-   * Gets the bucket name component of this resource identifier.
-   */
-  public String getBucketName() {
-    return bucketName;
-  }
-
-  /**
-   * Gets the object name component of this resource identifier.
-   */
-  public String getObjectName() {
-    return objectName;
-  }
-
-  /**
-   * Returns a string of the form gs://<bucketName>/<objectName>.
-   */
-  @Override
-  public String toString() {
-    return readableString;
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (obj instanceof StorageResourceId) {
-      StorageResourceId other = (StorageResourceId) obj;
-      return Objects.equals(bucketName, other.bucketName)
-          && Objects.equals(objectName, other.objectName);
-    }
-    return false;
-  }
-
-  @Override
-  public int hashCode() {
-    return readableString.hashCode();
-  }
-
-  /**
-   * Helper for standardizing the way various human-readable messages in logs/exceptions refer
-   * to a bucket/object pair.
-   */
-  public static String createReadableString(String bucketName, String objectName) {
-    if (bucketName == null && objectName == null) {
-      // TODO: Unify this method with other methods that convert bucketName/objectName
-      // to a URI; maybe use the single slash for compatibility.
-      return "gs://";
-    } else if (bucketName != null && objectName == null) {
-      return String.format("gs://%s", bucketName);
-    } else if (bucketName != null && objectName != null) {
-      return String.format("gs://%s/%s", bucketName, objectName);
-    }
-    throw new IllegalArgumentException(
-        String.format("Invalid bucketName/objectName pair: gs://%s/%s", bucketName, objectName));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
index 7e978016b5d4f..82ac0e47fcbd0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
@@ -36,7 +36,8 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.testing.FastNanoClockAndSleeper;
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
-import com.google.cloud.dataflow.sdk.util.gcsio.GoogleCloudStorageReadChannel;
+import com.google.cloud.hadoop.gcsio.GoogleCloudStorageReadChannel;
+import com.google.cloud.hadoop.util.ClientRequestHelper;
 import com.google.common.collect.ImmutableList;
 
 import org.junit.Rule;
@@ -330,7 +331,8 @@ public void testRetryFileSize() throws IOException {
   @Test
   public void testGCSChannelCloseIdempotent() throws IOException {
     SeekableByteChannel channel =
-        new GoogleCloudStorageReadChannel(null, "dummybucket", "dummyobject", null);
+        new GoogleCloudStorageReadChannel(null, "dummybucket", "dummyobject", null,
+        new ClientRequestHelper<StorageObject>());
     channel.close();
     channel.close();
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListenerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListenerTest.java
deleted file mode 100644
index ee8f518e3f613..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsio/LoggingMediaHttpUploaderProgressListenerTest.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/**
- * Copyright 2015 Google Inc. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.gcsio;
-
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.verifyNoMoreInteractions;
-import static org.mockito.Mockito.verifyZeroInteractions;
-
-import com.google.api.client.googleapis.media.MediaHttpUploader.UploadState;
-
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.Mock;
-import org.mockito.MockitoAnnotations;
-import org.slf4j.Logger;
-
-/** Unit tests for {@link LoggingMediaHttpUploaderProgressListener}. */
-@RunWith(JUnit4.class)
-public class LoggingMediaHttpUploaderProgressListenerTest {
-  @Mock
-  private Logger mockLogger;
-  private LoggingMediaHttpUploaderProgressListener listener;
-
-  @Before
-  public void setUp() {
-    MockitoAnnotations.initMocks(this);
-    listener = new LoggingMediaHttpUploaderProgressListener("NAME", 60000L);
-  }
-
-  @Test
-  public void testLoggingInitiation() {
-    listener.progressChanged(mockLogger, UploadState.INITIATION_STARTED, 0L, 0L);
-    verify(mockLogger).debug("Uploading: {}", "NAME");
-    verifyNoMoreInteractions(mockLogger);
-  }
-
-  @Test
-  public void testLoggingProgressAfterSixtySeconds() {
-    listener.progressChanged(mockLogger, UploadState.MEDIA_IN_PROGRESS, 10485760L, 60001L);
-    listener.progressChanged(mockLogger, UploadState.MEDIA_IN_PROGRESS, 104857600L, 120002L);
-    verify(mockLogger).debug(String.format(
-        "Uploading: NAME Average Rate: %.3f MiB/s, Current Rate: %.3f MiB/s, Total: %.3f MiB",
-        0.167, 0.167, 10.0));
-    verify(mockLogger).debug(String.format(
-        "Uploading: NAME Average Rate: %.3f MiB/s, Current Rate: %.3f MiB/s, Total: %.3f MiB",
-        0.833, 1.5, 100.0));
-    verifyNoMoreInteractions(mockLogger);
-  }
-
-  @Test
-  public void testSkippingLoggingAnInProgressUpdate() {
-    listener.progressChanged(mockLogger, UploadState.MEDIA_IN_PROGRESS, 104857600L, 60000L);
-    verifyZeroInteractions(mockLogger);
-  }
-
-  @Test
-  public void testLoggingCompletion() {
-    listener.progressChanged(mockLogger, UploadState.MEDIA_COMPLETE, 104857600L, 60000L);
-    verify(mockLogger).debug("Finished Uploading: {}", "NAME");
-    verifyNoMoreInteractions(mockLogger);
-  }
-
-  @Test
-  public void testOtherUpdatesIgnored() {
-    listener.progressChanged(mockLogger, UploadState.NOT_STARTED, 0L, 60001L);
-    listener.progressChanged(mockLogger, UploadState.INITIATION_COMPLETE, 0L, 60001L);
-    verifyZeroInteractions(mockLogger);
-  }
-}

From c17b66b2fcd40bf062b4fede0cee4617845e2907 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 14 Jul 2015 14:24:42 -0700
Subject: [PATCH 0754/1541] Improve message for permission errors during
 resource staging

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98250047
---
 .../cloud/dataflow/sdk/util/PackageUtil.java  | 16 +++-
 .../dataflow/sdk/util/PackageUtilTest.java    | 83 +++++++++++++++++++
 2 files changed, 98 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
index 26e88a3195d6b..17c692a98b002 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
@@ -22,6 +22,7 @@
 import com.google.api.client.util.BackOffUtils;
 import com.google.api.client.util.Sleeper;
 import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.cloud.hadoop.util.ApiErrorExtractor;
 import com.google.common.collect.TreeTraverser;
 import com.google.common.hash.Funnels;
 import com.google.common.hash.Hasher;
@@ -60,6 +61,11 @@ public class PackageUtil {
    */
   private static final int MAX_ATTEMPTS = 5;
 
+  /**
+   * Translates exceptions from API calls.
+   */
+  private static final ApiErrorExtractor ERROR_EXTRACTOR = new ApiErrorExtractor();
+
   /**
    * Creates a DataflowPackage containing information about how a classpath element should be
    * staged.
@@ -165,7 +171,15 @@ static List<DataflowPackage> stageClasspathElements(
             numUploaded++;
             break;
           } catch (IOException e) {
-            if (!backoff.atMaxAttempts()) {
+            if (ERROR_EXTRACTOR.accessDenied(e)) {
+              String errorMessage = String.format(
+                  "Uploaded failed due to permissions error, will NOT retry staging "
+                  + "of classpath %s. Please verify credentials are valid and that you have "
+                  + "write access to %s. Stale credentials can be resolved by executing "
+                  + "'gcloud auth login'.", classpathElement, target);
+              LOG.error(errorMessage);
+              throw new IOException(errorMessage, e);
+            } else if (!backoff.atMaxAttempts()) {
               LOG.warn("Upload attempt failed, sleeping before retrying staging of classpath: {}",
                   classpathElement, e);
               BackOffUtils.next(retrySleeper, backoff);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
index 1f5e182f2a618..5e75fd65a3525 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
@@ -21,6 +21,8 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.anyString;
 import static org.mockito.Mockito.times;
@@ -28,6 +30,21 @@
 import static org.mockito.Mockito.verifyNoMoreInteractions;
 import static org.mockito.Mockito.when;
 
+import com.google.api.client.googleapis.json.GoogleJsonError.ErrorInfo;
+import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.api.client.http.HttpRequest;
+import com.google.api.client.http.HttpResponse;
+import com.google.api.client.http.HttpStatusCodes;
+import com.google.api.client.http.HttpTransport;
+import com.google.api.client.http.LowLevelHttpRequest;
+import com.google.api.client.json.GenericJson;
+import com.google.api.client.json.Json;
+import com.google.api.client.json.JsonFactory;
+import com.google.api.client.json.jackson2.JacksonFactory;
+import com.google.api.client.testing.http.HttpTesting;
+import com.google.api.client.testing.http.MockHttpTransport;
+import com.google.api.client.testing.http.MockLowLevelHttpRequest;
+import com.google.api.client.testing.http.MockLowLevelHttpResponse;
 import com.google.api.services.dataflow.model.DataflowPackage;
 import com.google.cloud.dataflow.sdk.options.GcsOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
@@ -38,6 +55,7 @@
 import com.google.common.io.Files;
 import com.google.common.io.LineReader;
 
+import org.hamcrest.Matchers;
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
@@ -54,6 +72,7 @@
 import java.nio.charset.StandardCharsets;
 import java.nio.file.NoSuchFileException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.zip.ZipEntry;
@@ -273,6 +292,38 @@ public void testPackageUploadFailsWhenIOExceptionThrown() throws Exception {
     }
   }
 
+  @Test
+  public void testPackageUploadFailsWithPermissionsErrorGivesDetailedMessage() throws Exception {
+    File tmpFile = tmpFolder.newFile("file.txt");
+    Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
+    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    when(mockGcsUtil.fileSize(any(GcsPath.class)))
+        .thenThrow(new NoSuchFileException("some/path"));
+    when(mockGcsUtil.create(any(GcsPath.class), anyString()))
+        .thenThrow(new IOException("Failed to write to GCS path " + gcsStaging,
+            googleJsonResponseException(
+                HttpStatusCodes.STATUS_CODE_FORBIDDEN, "Permission denied", "Test message")));
+
+    try {
+      PackageUtil.stageClasspathElements(
+          ImmutableList.of(tmpFile.getAbsolutePath()),
+          gcsStaging.toString(), fastNanoClockAndSleeper);
+      fail("Expected RuntimeException");
+    } catch (RuntimeException e) {
+      assertTrue("Expected IOException containing detailed message.",
+          e.getCause() instanceof IOException);
+      assertThat(e.getCause().getMessage(),
+          Matchers.allOf(
+              Matchers.containsString("Uploaded failed due to permissions error"),
+              Matchers.containsString(
+                  "Stale credentials can be resolved by executing 'gcloud auth login'")));
+    } finally {
+      verify(mockGcsUtil).fileSize(any(GcsPath.class));
+      verify(mockGcsUtil).create(any(GcsPath.class), anyString());
+      verifyNoMoreInteractions(mockGcsUtil);
+    }
+  }
+
   @Test
   public void testPackageUploadEventuallySucceeds() throws Exception {
     Pipe pipe = Pipe.open();
@@ -366,4 +417,36 @@ public void testPackageUploadIsSkippedWithNonExistentResource() throws Exception
     assertEquals(Collections.EMPTY_LIST, PackageUtil.stageClasspathElements(
         ImmutableList.of(nonExistentFile), gcsStaging.toString()));
   }
+
+  /**
+   * Builds a fake GoogleJsonResponseException for testing API error handling.
+   */
+  private static GoogleJsonResponseException googleJsonResponseException(
+      final int status, final String reason, final String message) throws IOException {
+    final JsonFactory jsonFactory = new JacksonFactory();
+    HttpTransport transport = new MockHttpTransport() {
+      @Override
+      public LowLevelHttpRequest buildRequest(String method, String url) throws IOException {
+        ErrorInfo errorInfo = new ErrorInfo();
+        errorInfo.setReason(reason);
+        errorInfo.setMessage(message);
+        errorInfo.setFactory(jsonFactory);
+        GenericJson error = new GenericJson();
+        error.set("code", status);
+        error.set("errors", Arrays.asList(errorInfo));
+        error.setFactory(jsonFactory);
+        GenericJson errorResponse = new GenericJson();
+        errorResponse.set("error", error);
+        errorResponse.setFactory(jsonFactory);
+        return new MockLowLevelHttpRequest().setResponse(
+            new MockLowLevelHttpResponse().setContent(errorResponse.toPrettyString())
+            .setContentType(Json.MEDIA_TYPE).setStatusCode(status));
+        }
+    };
+    HttpRequest request =
+        transport.createRequestFactory().buildGetRequest(HttpTesting.SIMPLE_GENERIC_URL);
+    request.setThrowExceptionOnExecuteError(false);
+    HttpResponse response = request.execute();
+    return GoogleJsonResponseException.from(jsonFactory, response);
+  }
 }

From 60da6981b87db7296d8014bb332dcbc9ea59ad63 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 14 Jul 2015 17:57:45 -0700
Subject: [PATCH 0755/1541] Switch to new Persistent State API

Deletes the keyed state API and hooks everything up to the new State
API.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98269587
---
 .../sdk/runners/DirectPipelineRunner.java     |   3 +-
 .../MetricTrackingWindmillServerStub.java     |  75 ++++
 .../worker/StreamingDataflowWorker.java       |  29 +-
 .../dataflow/sdk/transforms/Combine.java      |  45 ++
 .../sdk/transforms/windowing/AfterPane.java   |  45 +-
 .../windowing/AfterProcessingTime.java        |  31 +-
 .../AfterSynchronizedProcessingTime.java      |  29 +-
 .../transforms/windowing/AfterWatermark.java  |  30 +-
 .../sdk/transforms/windowing/Trigger.java     |  39 +-
 .../dataflow/sdk/util/ActiveWindowSet.java    |   4 +-
 .../sdk/util/BatchModeExecutionContext.java   | 127 ++----
 .../sdk/util/CombiningOutputBuffer.java       | 114 -----
 .../sdk/util/DirectModeExecutionContext.java  |  50 ++-
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |  35 +-
 .../dataflow/sdk/util/ExecutionContext.java   |  79 +---
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  33 +-
 .../dataflow/sdk/util/KeyedStateCache.java    | 335 ---------------
 .../dataflow/sdk/util/ListOutputBuffer.java   |  58 ---
 .../sdk/util/MergingActiveWindowSet.java      |  31 +-
 .../sdk/util/NonMergingActiveWindowSet.java   |   4 +-
 .../cloud/dataflow/sdk/util/OutputBuffer.java |  79 ----
 .../cloud/dataflow/sdk/util/StateFetcher.java | 209 +--------
 .../util/StreamingGroupAlsoByWindowsDoFn.java |  27 +-
 .../util/StreamingModeExecutionContext.java   | 133 ++----
 .../util/StreamingSideInputDoFnRunner.java    | 127 ++++--
 .../dataflow/sdk/util/TriggerExecutor.java    | 402 ++++++++----------
 .../dataflow/sdk/util/WatermarkHold.java      |  62 +--
 .../dataflow/sdk/util/WindowingInternals.java | 102 +----
 .../util/state/InMemoryStateInternals.java    |  58 ++-
 .../dataflow/sdk/util/state/MergedBag.java    |  16 +-
 .../sdk/util/state/StateNamespaces.java       |  30 +-
 .../dataflow/sdk/util/state/StateTable.java   |   8 +-
 .../dataflow/sdk/util/state/StateTag.java     |   4 +-
 .../dataflow/sdk/util/state/StateTags.java    |  19 +-
 .../util/state/WindmillStateInternals.java    |  59 +--
 .../sdk/util/state/WindmillStateReader.java   |  39 +-
 .../dataflow/sdk/values/CodedTupleTag.java    |  76 ----
 .../dataflow/sdk/values/CodedTupleTagMap.java |  69 ---
 .../dataflow/sdk/values/PCollectionView.java  |   2 +-
 .../cloud/dataflow/sdk/WindowMatchers.java    |   2 +-
 .../BasicSerializableSourceFormatTest.java    |   3 +-
 .../worker/StreamingDataflowWorkerTest.java   | 209 ++++-----
 .../transforms/windowing/AfterAllTest.java    |  31 +-
 .../transforms/windowing/AfterEachTest.java   |  28 +-
 .../transforms/windowing/AfterFirstTest.java  |  21 +-
 .../transforms/windowing/AfterPaneTest.java   |  17 +-
 .../windowing/AfterProcessingTimeTest.java    |  35 +-
 .../AfterSynchronizedProcessingTimeTest.java  |  33 +-
 .../windowing/AfterWatermarkTest.java         |  52 ++-
 .../windowing/DefaultTriggerTest.java         |  10 +-
 .../windowing/OrFinallyTriggerTest.java       |  39 +-
 .../transforms/windowing/RepeatedlyTest.java  |  11 +-
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  |   4 +-
 .../sdk/util/KeyedStateCacheTest.java         | 366 ----------------
 .../dataflow/sdk/util/StateFetcherTest.java   | 124 +-----
 .../StreamingModeExecutionContextTest.java    |   2 +-
 .../StreamingSideInputDoFnRunnerTest.java     | 166 ++++----
 .../sdk/util/TriggerExecutorTest.java         |  30 +-
 .../dataflow/sdk/util/TriggerTester.java      | 236 ++++------
 .../state/WindmillStateInternalsTest.java     |  33 ++
 .../util/state/WindmillStateReaderTest.java   |  53 ++-
 61 files changed, 1350 insertions(+), 2872 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MetricTrackingWindmillServerStub.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningOutputBuffer.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedStateCache.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ListOutputBuffer.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/OutputBuffer.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/KeyedStateCacheTest.java
 rename sdk/src/{main => test}/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java (70%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 48d91b1579924..7652acd19a110 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -425,8 +425,7 @@ public static <V> ValueWithMetadata<V> of(WindowedValue<V> windowedValue) {
      * with this value set.  The key is the last key grouped by in the chain of
      * productions that produced this element.
      * These keys are used internally by {@link DirectPipelineRunner} for keeping
-     * {@link com.google.cloud.dataflow.sdk.util.WindowingInternals.KeyedState} separate
-     * across keys.
+     * persisted state separate across keys.
      */
     public ValueWithMetadata<V> withKey(Object key) {
       return new ValueWithMetadata<>(windowedValue, key);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MetricTrackingWindmillServerStub.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MetricTrackingWindmillServerStub.java
new file mode 100644
index 0000000000000..d62f3d4a9b977
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MetricTrackingWindmillServerStub.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
+
+import java.io.PrintWriter;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * Wrapper around a {@link WindmillServerStub} that tracks metrics for the number of in-flight
+ * requests.
+ */
+public class MetricTrackingWindmillServerStub {
+
+  private final AtomicInteger activeSideInputs = new AtomicInteger();
+  private final AtomicInteger activeStateReads = new AtomicInteger();
+  private final WindmillServerStub server;
+
+  public MetricTrackingWindmillServerStub(WindmillServerStub server) {
+    this.server = server;
+  }
+
+  public Windmill.GetDataResponse getStateData(Windmill.GetDataRequest request) {
+    activeStateReads.getAndIncrement();
+    try {
+      return server.getData(request);
+    } finally {
+      activeStateReads.getAndDecrement();
+    }
+  }
+
+  public Windmill.GetDataResponse getSideInputData(Windmill.GetDataRequest request) {
+    activeSideInputs.getAndIncrement();
+    try {
+      return server.getData(request);
+    } finally {
+      activeSideInputs.getAndDecrement();
+    }
+  }
+
+  public void printHtml(PrintWriter writer) {
+    writer.println("Active Fetches:");
+    writer.println("  Side Inputs: " + activeSideInputs.get());
+    writer.println("  State Reads: " + activeStateReads.get());
+  }
+
+  public AutoCloseable sideInput() {
+    return initiate(activeSideInputs);
+  }
+
+  private AutoCloseable initiate(final AtomicInteger counter) {
+    counter.getAndIncrement();
+    return new AutoCloseable() {
+      @Override
+      public void close() {
+        counter.getAndDecrement();
+      }
+    };
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 0270dccf7ec6c..16417fa508116 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -35,6 +35,7 @@
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
 import com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation;
+import com.google.cloud.dataflow.sdk.util.state.WindmillStateReader;
 import com.google.common.base.Preconditions;
 import com.google.protobuf.ByteString;
 
@@ -101,7 +102,7 @@ public KeyTokenInvalidException(String key) {
   /**
    * Returns whether an exception was caused by a {@link KeyTokenInvalidException}.
    */
-  private static boolean isKeyTokenInvalidException(Throwable t) {
+  public static boolean isKeyTokenInvalidException(Throwable t) {
     while (t != null) {
       if (t instanceof KeyTokenInvalidException) {
         return true;
@@ -186,7 +187,8 @@ public static void main(String[] args) throws Exception {
   private DataflowWorkerHarnessOptions options;
   private long clientId;
   private Server statusServer;
-  private AtomicReference<Throwable> lastException;
+  private final AtomicReference<Throwable> lastException;
+  private final MetricTrackingWindmillServerStub metricTrackingWindmillServer;
 
   public StreamingDataflowWorker(
       List<MapTask> mapTasks, WindmillServerStub server, DataflowWorkerHarnessOptions options) {
@@ -231,8 +233,9 @@ public Thread newThread(Runnable r) {
             },
             new ThreadPoolExecutor.DiscardPolicy());
     this.windmillServer = server;
+    this.metricTrackingWindmillServer = new MetricTrackingWindmillServerStub(server);
     this.running = new AtomicBoolean();
-    this.stateFetcher = new StateFetcher(server);
+    this.stateFetcher = new StateFetcher(metricTrackingWindmillServer);
     this.clientId = new Random().nextLong();
     this.lastException = new AtomicReference<>();
 
@@ -396,9 +399,8 @@ private void process(
       DataflowWorkerLoggingMDC.setStageName(computation);
       WorkerAndContext workerAndContext = mapTaskExecutors.get(computation).poll();
       if (workerAndContext == null) {
-        context =
-            new StreamingModeExecutionContext(
-                computation, stateFetcher, readerCache.get(computation), stateNameMap);
+        context = new StreamingModeExecutionContext(
+            stateFetcher, readerCache.get(computation), stateNameMap);
         worker = MapTaskExecutorFactory.create(options, mapTask, context);
         ReadOperation readOperation = worker.getReadOperation();
         // Disable progress updates since its results are unused for streaming
@@ -411,7 +413,9 @@ private void process(
         context = workerAndContext.getContext();
       }
 
-      context.start(work, inputDataWatermark, outputBuilder);
+      WindmillStateReader stateReader = new WindmillStateReader(
+          metricTrackingWindmillServer, computation, work.getKey(), work.getWorkToken());
+      context.start(work, inputDataWatermark, stateReader, outputBuilder);
 
       for (Long callbackId : context.getReadyCommitCallbackIds()) {
         final Runnable callback = commitCallbacks.remove(callbackId);
@@ -457,12 +461,9 @@ public void run() {
       t = t instanceof UserCodeException ? t.getCause() : t;
 
       if (isKeyTokenInvalidException(t)) {
-        LOG.debug(
-            "Execution of work for "
-                + computation
-                + " for key "
-                + work.getKey().toStringUtf8()
-                + " failed due to token expiration, will not retry locally.");
+        LOG.debug("Execution of work for {} for key {} failed due to token expiration, "
+            + "will not retry locally.",
+            computation, work.getKey().toStringUtf8());
       } else {
         LOG.error(
             "Execution of work for {} for key {} failed, retrying.",
@@ -723,7 +724,7 @@ private void printMetrics(PrintWriter response) {
       response.println("</li>");
     }
     response.println("</ul>");
-    stateFetcher.printHtml(response);
+    metricTrackingWindmillServer.printHtml(response);
   }
 
   private void printResources(PrintWriter response) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 7ce7fdd88688d..9314c6370a9f2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -499,6 +499,11 @@ public Coder<OutputT> getDefaultOutputCoder(
             throws CannotProvideCoderException {
           return CombineFn.this.getDefaultOutputCoder(registry, inputCoder);
         }
+
+        @Override
+        public CombineFn<InputT, AccumT, OutputT> forKey(K key, Coder<K> keyCoder) {
+          return CombineFn.this;
+        }
       };
     }
   }
@@ -1079,6 +1084,46 @@ public abstract static class KeyedCombineFn<K, InputT, AccumT, OutputT>
      */
     public abstract OutputT extractOutput(K key, AccumT accumulator);
 
+    /**
+     * Returns the a regular {@link CombineFn} that operates on a specific key.
+     */
+    public CombineFn<InputT, AccumT, OutputT> forKey(final K key, final Coder<K> keyCoder) {
+      return new CombineFn<InputT, AccumT, OutputT>() {
+
+        @Override
+        public AccumT createAccumulator() {
+          return KeyedCombineFn.this.createAccumulator(key);
+        }
+
+        @Override
+        public AccumT addInput(AccumT accumulator, InputT input) {
+          return KeyedCombineFn.this.addInput(key, accumulator, input);
+        }
+
+        @Override
+        public AccumT mergeAccumulators(Iterable<AccumT> accumulators) {
+          return KeyedCombineFn.this.mergeAccumulators(key, accumulators);
+        }
+
+        @Override
+        public OutputT extractOutput(AccumT accumulator) {
+          return KeyedCombineFn.this.extractOutput(key, accumulator);
+        }
+
+        @Override
+        public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<InputT> inputCoder)
+            throws CannotProvideCoderException {
+          return KeyedCombineFn.this.getAccumulatorCoder(registry, keyCoder, inputCoder);
+        }
+
+        @Override
+        public Coder<OutputT> getDefaultOutputCoder(
+            CoderRegistry registry, Coder<InputT> inputCoder) throws CannotProvideCoderException {
+          return KeyedCombineFn.this.getDefaultOutputCoder(registry, keyCoder, inputCoder);
+        }
+      };
+    }
+
     /**
      * Applies this {@code KeyedCombineFn} to a key and a collection
      * of input values to produce a combined output value.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index 2e88035690b06..3019395b7afab 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -17,14 +17,17 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
+import com.google.cloud.dataflow.sdk.util.state.StateContents;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.cloud.dataflow.sdk.util.state.StateTags;
 
 import org.joda.time.Instant;
 
 import java.util.List;
-import java.util.Map.Entry;
 import java.util.Objects;
 
 /**
@@ -38,8 +41,8 @@ public class AfterPane<W extends BoundedWindow> extends OnceTrigger<W>{
 
   private static final long serialVersionUID = 0L;
 
-  private static final CodedTupleTag<Integer> ELEMENTS_IN_PANE_TAG =
-      CodedTupleTag.of("elements-in-pane", VarIntCoder.of());
+  private static final StateTag<CombiningValueState<Long, Long>> ELEMENTS_IN_PANE_TAG =
+      StateTags.combiningValue("count", VarLongCoder.of(), new Sum.SumLongFn());
 
   private final int countElems;
 
@@ -57,13 +60,13 @@ public static <W extends BoundedWindow> AfterPane<W> elementCountAtLeast(int cou
 
   @Override
   public TriggerResult onElement(OnElementContext c) throws Exception {
-    Integer count = c.lookup(ELEMENTS_IN_PANE_TAG, c.window());
-    if (count == null) {
-      count = 0;
-    }
-    count++;
+    CombiningValueState<Long, Long> elementsInPane = c.access(ELEMENTS_IN_PANE_TAG);
+    StateContents<Long> countContents = elementsInPane.get();
+    elementsInPane.add(1L);
 
-    c.store(ELEMENTS_IN_PANE_TAG, c.window(), count);
+    // TODO: Consider waiting to read the value until the end of a bundle, since we don't need to
+    // fire immediately when the count exceeds countElems.
+    long count = countContents.read();
     return count >= countElems ? TriggerResult.FIRE_AND_FINISH : TriggerResult.CONTINUE;
   }
 
@@ -76,20 +79,10 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
     }
 
     // Otherwise, compute the sum of elements in all the active panes
-    int count = 0;
-    for (Entry<W, Integer> old : c.lookup(ELEMENTS_IN_PANE_TAG, c.oldWindows()).entrySet()) {
-      if (old.getValue() != null) {
-        count += old.getValue();
-      }
-    }
-
-    // And determine the final status from that.
-    if (count >= countElems) {
-      return MergeResult.FIRE_AND_FINISH;
-    } else {
-      c.store(ELEMENTS_IN_PANE_TAG, c.window(), count);
-      return MergeResult.CONTINUE;
-    }
+    CombiningValueState<Long, Long> elementsInPane =
+        c.accessAcrossMergingWindows(ELEMENTS_IN_PANE_TAG);
+    long count = elementsInPane.get().read();
+    return count >= countElems ? MergeResult.FIRE_AND_FINISH : MergeResult.CONTINUE;
   }
 
   @Override
@@ -99,7 +92,7 @@ public TriggerResult onTimer(OnTimerContext c) {
 
   @Override
   public void clear(TriggerContext c) throws Exception {
-    c.remove(ELEMENTS_IN_PANE_TAG, c.window());
+    c.access(ELEMENTS_IN_PANE_TAG).clear();
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 3c4afbaf393ed..47a93a7be0274 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -18,9 +18,12 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
+import com.google.cloud.dataflow.sdk.transforms.Min;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.cloud.dataflow.sdk.util.state.StateTags;
 
 import org.joda.time.Instant;
 
@@ -39,8 +42,8 @@ public class AfterProcessingTime<W extends BoundedWindow>
 
   private static final long serialVersionUID = 0L;
 
-  private static final CodedTupleTag<Instant> DELAYED_UNTIL_TAG =
-      CodedTupleTag.of("delayed-until", InstantCoder.of());
+  private static final StateTag<CombiningValueState<Instant, Instant>> DELAYED_UNTIL_TAG =
+      StateTags.combiningValue("delayed", InstantCoder.of(), Min.MinFn.<Instant>naturalOrder());
 
   private AfterProcessingTime(List<SerializableFunction<Instant, Instant>> transforms) {
     super(transforms);
@@ -63,11 +66,12 @@ protected AfterProcessingTime<W> newWith(
   @Override
   public TriggerResult onElement(OnElementContext c)
       throws Exception {
-    Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, c.window());
+    CombiningValueState<Instant, Instant> delayUntilState = c.access(DELAYED_UNTIL_TAG);
+    Instant delayUntil = delayUntilState.get().read();
     if (delayUntil == null) {
       delayUntil = computeTargetTimestamp(c.currentProcessingTime());
       c.setTimer(delayUntil, TimeDomain.PROCESSING_TIME);
-      c.store(DELAYED_UNTIL_TAG, c.window(), delayUntil);
+      delayUntilState.add(delayUntil);
     }
 
     return TriggerResult.CONTINUE;
@@ -82,16 +86,13 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
       return MergeResult.ALREADY_FINISHED;
     }
 
-    // Otherwise, determine the earliest delay for all of the windows, and delay to that point.
-    Instant earliestTimer = BoundedWindow.TIMESTAMP_MAX_VALUE;
-    for (Instant delayedUntil : c.lookup(DELAYED_UNTIL_TAG, c.oldWindows()).values()) {
-      if (delayedUntil != null && delayedUntil.isBefore(earliestTimer)) {
-        earliestTimer = delayedUntil;
-      }
-    }
-
+    // Determine the earliest point across all the windows, and delay to that.
+    CombiningValueState<Instant, Instant> mergingDelays =
+        c.accessAcrossMergingWindows(DELAYED_UNTIL_TAG);
+    Instant earliestTimer = mergingDelays.get().read();
     if (earliestTimer != null) {
-      c.store(DELAYED_UNTIL_TAG, c.window(), earliestTimer);
+      mergingDelays.clear();
+      mergingDelays.add(earliestTimer);
       c.setTimer(earliestTimer, TimeDomain.PROCESSING_TIME);
     }
 
@@ -105,7 +106,7 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
 
   @Override
   public void clear(TriggerContext c) throws Exception {
-    c.remove(DELAYED_UNTIL_TAG, c.window());
+    c.access(DELAYED_UNTIL_TAG).clear();
     c.deleteTimer(TimeDomain.PROCESSING_TIME);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
index eb5dbe46914f9..2bff7b40e0c9a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
@@ -16,9 +16,12 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
+import com.google.cloud.dataflow.sdk.transforms.Min;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.common.base.Objects;
 
 import org.joda.time.Instant;
@@ -29,8 +32,8 @@ class AfterSynchronizedProcessingTime<W extends BoundedWindow> extends OnceTrigg
 
   private static final long serialVersionUID = 0L;
 
-  private static final CodedTupleTag<Instant> DELAYED_UNTIL_TAG =
-      CodedTupleTag.of("delayed-until", InstantCoder.of());
+  private static final StateTag<CombiningValueState<Instant, Instant>> DELAYED_UNTIL_TAG =
+      StateTags.combiningValue("delayed", InstantCoder.of(), Min.MinFn.<Instant>naturalOrder());
 
   public AfterSynchronizedProcessingTime() {
     super(null);
@@ -39,11 +42,12 @@ public AfterSynchronizedProcessingTime() {
   @Override
   public TriggerResult onElement(OnElementContext c)
       throws Exception {
-    Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, c.window());
+    CombiningValueState<Instant, Instant> delayUntilState = c.access(DELAYED_UNTIL_TAG);
+    Instant delayUntil = delayUntilState.get().read();
     if (delayUntil == null) {
       delayUntil = c.currentProcessingTime();
       c.setTimer(delayUntil, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
-      c.store(DELAYED_UNTIL_TAG, c.window(), delayUntil);
+      delayUntilState.add(delayUntil);
     }
 
     return TriggerResult.CONTINUE;
@@ -59,15 +63,12 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
     }
 
     // Otherwise, determine the earliest delay for all of the windows, and delay to that point.
-    Instant earliestTimer = BoundedWindow.TIMESTAMP_MAX_VALUE;
-    for (Instant delayedUntil : c.lookup(DELAYED_UNTIL_TAG, c.oldWindows()).values()) {
-      if (delayedUntil != null && delayedUntil.isBefore(earliestTimer)) {
-        earliestTimer = delayedUntil;
-      }
-    }
-
+    CombiningValueState<Instant, Instant> mergingDelays =
+        c.accessAcrossMergingWindows(DELAYED_UNTIL_TAG);
+    Instant earliestTimer = mergingDelays.get().read();
     if (earliestTimer != null) {
-      c.store(DELAYED_UNTIL_TAG, c.window(), earliestTimer);
+      mergingDelays.clear();
+      mergingDelays.add(earliestTimer);
       c.setTimer(earliestTimer, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
     }
 
@@ -81,7 +82,7 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
 
   @Override
   public void clear(TriggerContext c) throws Exception {
-    c.remove(DELAYED_UNTIL_TAG, c.window());
+    c.access(DELAYED_UNTIL_TAG).clear();
     c.deleteTimer(TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index ea2538814eaad..eb67a65500766 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -18,9 +18,12 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
+import com.google.cloud.dataflow.sdk.transforms.Min;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.cloud.dataflow.sdk.util.state.StateTags;
 
 import org.joda.time.Instant;
 
@@ -83,8 +86,8 @@ private static class FromFirstElementInPane<W extends BoundedWindow> extends Aft
 
     private static final long serialVersionUID = 0L;
 
-    private static final CodedTupleTag<Instant> DELAYED_UNTIL_TAG =
-        CodedTupleTag.of("delayed-until", InstantCoder.of());
+    private static final StateTag<CombiningValueState<Instant, Instant>> DELAYED_UNTIL_TAG =
+        StateTags.combiningValue("delayed", InstantCoder.of(), Min.MinFn.<Instant>naturalOrder());
 
     private FromFirstElementInPane(
         List<SerializableFunction<Instant, Instant>> delayFunction) {
@@ -93,11 +96,12 @@ private FromFirstElementInPane(
 
     @Override
     public TriggerResult onElement(OnElementContext c) throws Exception {
-      Instant delayUntil = c.lookup(DELAYED_UNTIL_TAG, c.window());
+      CombiningValueState<Instant, Instant> delayUntilState = c.access(DELAYED_UNTIL_TAG);
+      Instant delayUntil = delayUntilState.get().read();
       if (delayUntil == null) {
         delayUntil = computeTargetTimestamp(c.eventTimestamp());
         c.setTimer(delayUntil, TimeDomain.EVENT_TIME);
-        c.store(DELAYED_UNTIL_TAG, c.window(), delayUntil);
+        delayUntilState.add(delayUntil);
       }
 
       return TriggerResult.CONTINUE;
@@ -115,15 +119,13 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
       // To have gotten here, we must not have fired in any of the oldWindows. Determine the event
       // timestamp from the minimum (we could also just pick one, or try to record the arrival times
       // of this first element in each pane).
-      Instant earliestTimer = BoundedWindow.TIMESTAMP_MAX_VALUE;
-      for (Instant delayedUntil : c.lookup(DELAYED_UNTIL_TAG, c.oldWindows()).values()) {
-        if (delayedUntil != null && delayedUntil.isBefore(earliestTimer)) {
-          earliestTimer = delayedUntil;
-        }
-      }
-
+      // Determine the earliest point across all the windows, and delay to that.
+      CombiningValueState<Instant, Instant> mergingDelays =
+          c.accessAcrossMergingWindows(DELAYED_UNTIL_TAG);
+      Instant earliestTimer = mergingDelays.get().read();
       if (earliestTimer != null) {
-        c.store(DELAYED_UNTIL_TAG, c.window(), earliestTimer);
+        mergingDelays.clear();
+        mergingDelays.add(earliestTimer);
         c.setTimer(earliestTimer, TimeDomain.EVENT_TIME);
       }
 
@@ -137,7 +139,7 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
 
     @Override
     public void clear(TriggerContext c) throws Exception {
-      c.remove(DELAYED_UNTIL_TAG, c.window());
+      c.access(DELAYED_UNTIL_TAG).clear();
       c.deleteTimer(TimeDomain.EVENT_TIME);
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 28222faca2ff2..faf4d90c9b623 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -19,7 +19,9 @@
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.util.state.MergeableState;
+import com.google.cloud.dataflow.sdk.util.state.State;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.common.base.Joiner;
 import com.google.common.base.Objects;
 
@@ -79,8 +81,7 @@
  *   with it anywhere in the system. A trigger moves to the executing state as soon as it
  *   processes in the current pane.
  *   <li> Executing - while the trigger is receiving items and may fire. While it is in this state,
- *   it may persist book-keeping information to {@code WindowingInternals.KeyedState},
- *   set timers, etc.
+ *   it may persist book-keeping information to persisted state, set timers, etc.
  *   <li> Finished - after a trigger finishes, all of its book-keeping data is cleaned up, and the
  *   system remembers only that it is finished. Entering this state causes us to discard any
  *   elements in the buffer for that window, as well.
@@ -90,8 +91,8 @@
  * trigger could reset its sub-triggers.
  *
  * <p> Triggers should not build up any state internally since they may be recreated
- * between invocations of the callbacks. All important values should be persisted to
- * {@code WindowingInternals.KeyedState} before the callback returns.
+ * between invocations of the callbacks. All important values should be persisted using
+ * state before the callback returns.
  *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
  *            {@code Trigger}
@@ -196,25 +197,16 @@ public abstract class TriggerContext {
     public abstract Instant currentProcessingTime();
 
     /**
-     * Updates the value stored in keyed state for the given {@code tag} and {@code window}.
+     * Access the storage for the given {@code address} in the current window.
      */
-    public abstract <T> void store(CodedTupleTag<T> tag, W window, T value) throws IOException;
+    public abstract <StorageT extends State> StorageT access(StateTag<StorageT> address);
 
     /**
-     * Removes the keyed state associated with the given {@code tag} and {@code window}.
+     * Access the storage for the given {@code address} in the all of the windows that
+     * merged into the current window.
      */
-    public abstract <T> void remove(CodedTupleTag<T> tag, W window) throws IOException;
-
-    /**
-     * Lookup the value stored for the given {@code tag} and {@code window}.
-     */
-    public abstract <T> T lookup(CodedTupleTag<T> tag, W window) throws IOException;
-
-    /**
-     * Lookup the value stored for a given {@code tag} in a bunch of {@code window}s.
-     */
-    public abstract <T> Map<W, T> lookup(
-        CodedTupleTag<T> tag, Iterable<W> windows) throws IOException;
+    public abstract <StorageT extends MergeableState<?, ?>> StorageT accessAcrossMergedWindows(
+        StateTag<StorageT> address);
 
     /**
      * Create a {@code TriggerContext} for executing the given trigger.
@@ -345,6 +337,13 @@ public Iterable<W> oldWindows() {
      */
     @Override
     public abstract OnMergeContext forTrigger(ExecutableTrigger<W> trigger);
+
+    /**
+     * Access a merged view of the storage for the given {@code address}
+     * in all of the windows being merged.
+     */
+    public abstract <StorageT extends MergeableState<?, ?>> StorageT accessAcrossMergingWindows(
+        StateTag<StorageT> address);
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
index 15aebedfd91a1..efe8a9815a663 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
@@ -16,9 +16,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.WindowingInternals.KeyedState;
 
-import java.io.IOException;
 import java.util.Collection;
 
 /**
@@ -38,7 +36,7 @@ public interface MergeCallback<W extends BoundedWindow> {
   /**
    * Save any state changes needed.
    */
-  void persist(KeyedState keyedState) throws IOException;
+  void persist();
 
   /**
    * Add a window to the {@code ActiveWindowSet}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
index 6c448a51d928a..9357126e4f8f8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
@@ -16,19 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
-import com.google.common.base.Function;
-import com.google.common.collect.FluentIterable;
+import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals;
+import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.Objects;
 
 /**
  * {@link ExecutionContext} for use in batch mode.
@@ -53,9 +44,29 @@ public ExecutionContext.StepContext createStepContext(String stepName) {
    * Sets the key of the work currently being processed.
    */
   public void setKey(Object key) {
+    if (!Objects.equals(key, this.key)) {
+      switchStateKey(key);
+    }
+
     this.key = key;
   }
 
+  /**
+   * @param newKey the key being switched to
+   */
+  protected void switchStateKey(Object newKey) {
+    // When the key changes, we clear out the in-memory state stored in the step contexts.
+    // In BatchMode a specific key is only processed in a single chunk
+    // because the state is either used after a GroupByKeyOnly where
+    // each key only occurs once, or after some ParDo's that preserved
+    // the key.
+    for (ExecutionContext.StepContext stepContext : getAllStepContexts()) {
+      InMemoryStateInternals stateInternals =
+          (InMemoryStateInternals) stepContext.stateInternals();
+      stateInternals.clear();
+    }
+  }
+
   /**
    * Returns the key of the work currently being processed.
    *
@@ -74,98 +85,16 @@ public TimerManager getTimerManager() {
    * {@link ExecutionContext.StepContext} used in batch mode.
    */
   class StepContext extends ExecutionContext.StepContext {
-    private Map<Object, Map<CodedTupleTag<?>, Object>> state = new HashMap<>();
-    private Map<Object, Map<CodedTupleTag<?>, List<?>>> tagLists =
-        new HashMap<>();
-
-    StepContext(String stepName) {
-      super(stepName);
-    }
-
-    @Override
-    public <T> void store(CodedTupleTag<T> tag, T value, Instant timestamp) {
-      // We never read the timestamp, and batch doesn't need it. So don't store it.
-      Map<CodedTupleTag<?>, Object> perKeyState = state.get(getKey());
-      if (perKeyState == null) {
-        perKeyState = new HashMap<>();
-        state.put(getKey(), perKeyState);
-      }
-      perKeyState.put(tag, value);
-    }
-
-    @Override
-    public <T> void remove(CodedTupleTag<T> tag) {
-      Map<CodedTupleTag<?>, Object> perKeyState = state.get(getKey());
-      if (perKeyState != null) {
-        perKeyState.remove(tag);
-      }
-    }
-
-    @Override
-    public CodedTupleTagMap lookup(Iterable<? extends CodedTupleTag<?>> tags) {
-      Map<CodedTupleTag<?>, Object> perKeyState = state.get(getKey());
-      if (perKeyState == null) {
-        return CodedTupleTagMap.empty();
-      }
 
-      Map<CodedTupleTag<?>, Object> map = new HashMap<>();
-      for (CodedTupleTag<?> tag : tags) {
-        map.put(tag, perKeyState.get(tag));
-      }
-      return CodedTupleTagMap.of(map);
-    }
-
-    @Override
-    public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp) {
-      // We never read the timestamp, and batch doesn't need it. So don't store it.
-      Map<CodedTupleTag<?>, List<?>> perKeyTagLists = tagLists.get(getKey());
-      if (perKeyTagLists == null) {
-        perKeyTagLists = new HashMap<>();
-        tagLists.put(getKey(), perKeyTagLists);
-      }
-      @SuppressWarnings("unchecked")
-      List<T> tagList = (List<T>) perKeyTagLists.get(tag);
-      if (tagList == null) {
-        tagList = new ArrayList<>();
-        perKeyTagLists.put(tag, tagList);
-      }
+    private final InMemoryStateInternals stateInternals = new InMemoryStateInternals();
 
-      tagList.add(value);
-    }
-
-    @Override
-    public <T> void deleteTagList(CodedTupleTag<T> tag) {
-      Map<CodedTupleTag<?>, List<?>> perKeyTagLists = tagLists.get(getKey());
-      if (perKeyTagLists != null) {
-        perKeyTagLists.remove(tag);
-      }
-    }
-
-    @Override
-    public <T> Iterable<T> readTagList(CodedTupleTag<T> tag) {
-      Map<CodedTupleTag<?>, List<?>> perKeyTagLists = tagLists.get(getKey());
-      if (perKeyTagLists == null) {
-        return Collections.emptyList();
-      }
-
-      @SuppressWarnings("unchecked")
-      List<T> list = (List<T>) perKeyTagLists.get(tag);
-      if (list == null) {
-        return Collections.emptyList();
-      }
-      return list;
+    private StepContext(String stepName) {
+      super(stepName);
     }
 
     @Override
-    public <T> Map<CodedTupleTag<T>, Iterable<T>> readTagLists(Iterable<CodedTupleTag<T>> tags)
-        throws IOException {
-      return FluentIterable.from(tags)
-          .toMap(new Function<CodedTupleTag<T>, Iterable<T>>() {
-            @Override
-            public Iterable<T> apply(CodedTupleTag<T> input) {
-              return readTagList(input);
-            }
-          });
+    public StateInternals stateInternals() {
+      return stateInternals;
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningOutputBuffer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningOutputBuffer.java
deleted file mode 100644
index 5e009c1fb6b58..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombiningOutputBuffer.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.common.base.Functions;
-import com.google.common.base.Predicates;
-import com.google.common.collect.FluentIterable;
-import com.google.common.collect.Iterables;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * An {@link OutputBuffer} that uses a {@link KeyedCombineFn} to combine multiple inputs, allowing
- * it to store a single {@code AccumT} rather than all of the {@code InputT} values.
- */
-class CombiningOutputBuffer<K, InputT, AccumT, OutputT, W extends BoundedWindow>
-    implements OutputBuffer<K, InputT, OutputT, W> {
-
-  private static final long serialVersionUID = 0L;
-
-  private final KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn;
-  private final CodedTupleTag<AccumT> accumTag;
-
-  private final Map<W, AccumT> inMemoryBuffer = new HashMap<>();
-
-  public static <K, InputT, AccumT, OutputT, W extends BoundedWindow>
-  CombiningOutputBuffer<K, InputT, AccumT, OutputT, W> create(
-      KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn,
-      Coder<K> keyCoder,
-      Coder<InputT> inputCoder) {
-    CoderRegistry coderRegistry = new CoderRegistry();
-    coderRegistry.registerStandardCoders();
-    try {
-      Coder<AccumT> accumCoder = combineFn.getAccumulatorCoder(coderRegistry, keyCoder, inputCoder);
-      return new CombiningOutputBuffer<>(BUFFER_NAME, combineFn, accumCoder);
-    } catch (CannotProvideCoderException e) {
-      throw new RuntimeException("Unable to determine coder for accumulator", e);
-    }
-  }
-
-  public CombiningOutputBuffer(String bufferName,
-      KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn,
-      Coder<AccumT> accumCoder) {
-    this.combineFn = combineFn;
-    this.accumTag = CodedTupleTag.of(bufferName, accumCoder);
-  }
-
-  @Override
-  public void addValue(OutputBuffer.Context<K, W> c, InputT input) throws IOException {
-    // Only write to the in-memory accumulator.
-    AccumT accum = inMemoryBuffer.get(c.window());
-    if (accum == null) {
-      accum = combineFn.createAccumulator(c.key());
-    }
-    inMemoryBuffer.put(c.window(), combineFn.addInput(c.key(), accum, input));
-  }
-
-  @Override
-  public OutputT extract(OutputBuffer.Context<K, W> c) throws IOException {
-    Iterable<AccumT> accums = FluentIterable.from(c.sourceWindows())
-        .transform(Functions.forMap(inMemoryBuffer, null))
-        .filter(Predicates.notNull())
-        .append(c.readBuffers(accumTag, c.sourceWindows()));
-
-    AccumT result = Iterables.isEmpty(accums)
-        ? null : combineFn.mergeAccumulators(c.key(), accums);
-
-    clear(c);
-    inMemoryBuffer.put(c.window(), result);
-
-    return result == null ? null : combineFn.extractOutput(c.key(), result);
-  }
-
-  private void clearSourceWindows(OutputBuffer.Context<K, W> c) throws IOException {
-    c.clearBuffers(accumTag, c.sourceWindows());
-    for (W window : c.sourceWindows()) {
-      inMemoryBuffer.remove(window);
-    }
-  }
-
-  @Override
-  public void clear(OutputBuffer.Context<K, W> c) throws IOException {
-    clearSourceWindows(c);
-  }
-
-  @Override
-  public void flush(OutputBuffer.Context<K, W> c) throws IOException {
-    for (Map.Entry<W, AccumT> entry : inMemoryBuffer.entrySet()) {
-      c.addToBuffer(entry.getKey(), accumTag, entry.getValue());
-    }
-    inMemoryBuffer.clear();
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
index 440d5101add1a..efa2245090498 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
@@ -16,9 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import static com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
-
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
+import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals;
+import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Preconditions;
 
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -33,7 +35,7 @@ public class DirectModeExecutionContext extends BatchModeExecutionContext {
   private List<ValueWithMetadata> output = new ArrayList<>();
   private Map<TupleTag<?>, List<ValueWithMetadata>> sideOutputs = new HashMap<>();
 
-  protected DirectModeExecutionContext() { }
+  protected DirectModeExecutionContext() {}
 
   public static DirectModeExecutionContext create() {
     return new DirectModeExecutionContext();
@@ -44,10 +46,18 @@ public ExecutionContext.StepContext createStepContext(String stepName) {
     return new StepContext(stepName);
   }
 
+  @Override
+  protected void switchStateKey(Object newKey) {
+    // The direct mode runner may reorder elements, so we need to keep
+    // around the state used for each key.
+    for (ExecutionContext.StepContext stepContext : getAllStepContexts()) {
+      ((StepContext) stepContext).switchKey(newKey);
+    }
+  }
+
   @Override
   public void noteOutput(WindowedValue<?> outputElem) {
-    output.add(ValueWithMetadata.of(outputElem)
-                                .withKey(getKey()));
+    output.add(ValueWithMetadata.of(outputElem).withKey(getKey()));
   }
 
   @Override
@@ -57,8 +67,7 @@ public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> outputElem) {
       output = new ArrayList<>();
       sideOutputs.put(tag, output);
     }
-    output.add(ValueWithMetadata.of(outputElem)
-                                .withKey(getKey()));
+    output.add(ValueWithMetadata.of(outputElem).withKey(getKey()));
   }
 
   public <T> List<ValueWithMetadata<T>> getOutput(TupleTag<T> tag) {
@@ -72,4 +81,31 @@ public <T> List<ValueWithMetadata<T>> getSideOutput(TupleTag<T> tag) {
       return new ArrayList<>();
     }
   }
+
+  /**
+   * {@link ExecutionContext.StepContext} used in direct mode.
+   */
+  class StepContext extends ExecutionContext.StepContext {
+
+    private final Map<Object, InMemoryStateInternals> stateInternals = new HashMap<>();
+    private InMemoryStateInternals currentStateInternals = null;
+
+    private StepContext(String stepName) {
+      super(stepName);
+      switchKey(null);
+    }
+
+    public void switchKey(Object newKey) {
+      currentStateInternals = stateInternals.get(newKey);
+      if (currentStateInternals == null) {
+        currentStateInternals = new InMemoryStateInternals();
+        stateInternals.put(newKey, currentStateInternals);
+      }
+    }
+
+    @Override
+    public StateInternals stateInternals() {
+      return Preconditions.checkNotNull(currentStateInternals);
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index bc2ffc65d21ad..b5ae3b5386aab 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -31,7 +31,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Throwables;
@@ -528,35 +528,6 @@ public void outputWindowedValue(OutputT output, Instant timestamp,
           context.outputWindowedValue(output, timestamp, windows, pane);
         }
 
-        @Override
-        public <T> void writeToTagList(CodedTupleTag<T> tag, T value)
-            throws IOException {
-          context.stepContext.writeToTagList(tag, value);
-        }
-
-        @Override
-        public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp)
-            throws IOException {
-          context.stepContext.writeToTagList(tag, value, timestamp);
-        }
-
-        @Override
-        public <T> void deleteTagList(CodedTupleTag<T> tag) {
-          context.stepContext.deleteTagList(tag);
-        }
-
-        @Override
-        public <T> Iterable<T> readTagList(CodedTupleTag<T> tag)
-            throws IOException {
-          return context.stepContext.readTagList(tag);
-        }
-
-        @Override
-        public <T> Map<CodedTupleTag<T>, Iterable<T>> readTagList(List<CodedTupleTag<T>> tags)
-            throws IOException {
-          return context.stepContext.readTagLists(tags);
-        }
-
         @Override
         public Collection<? extends BoundedWindow> windows() {
           return windowedValue.getWindows();
@@ -585,8 +556,8 @@ public <T> void writePCollectionViewData(
         }
 
         @Override
-        public WindowingInternals.KeyedState keyedState() {
-          return context.stepContext;
+        public StateInternals stateInternals() {
+          return context.stepContext.stateInternals();
         }
       };
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
index 15daa25dbb138..bf75d96c6cd13 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
@@ -18,15 +18,11 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
+import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
-import org.joda.time.Instant;
-
 import java.io.IOException;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.Map;
@@ -95,7 +91,7 @@ public <T, W extends BoundedWindow> void writePCollectionViewData(
   /**
    * Per-step, per-key context used for retrieving state.
    */
-  public abstract class StepContext implements WindowingInternals.KeyedState {
+  public abstract class StepContext {
     private final String stepName;
 
     public StepContext(String stepName) {
@@ -118,75 +114,6 @@ public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output) {
       ExecutionContext.this.noteSideOutput(tag, output);
     }
 
-    /**
-     * Stores the provided value in per-{@link com.google.cloud.dataflow.sdk.transforms.DoFn},
-     * per-key state.  This state is in the form of a map from tags to arbitrary
-     * encodable values.
-     *
-     * @throws IOException if encoding the given value fails
-     */
-    public abstract <T> void store(CodedTupleTag<T> tag, T value, Instant timestamp)
-        throws IOException;
-
-    @Override
-    public <T> void store(CodedTupleTag<T> tag, T value) throws IOException {
-      store(tag, value, BoundedWindow.TIMESTAMP_MAX_VALUE);
-    }
-
-    /**
-     * Loads the values from the per-{@link com.google.cloud.dataflow.sdk.transforms.DoFn},
-     * per-key state corresponding to the given tags.
-     *
-     * @throws IOException if decoding any of the requested values fails
-     */
-    @Override
-    public abstract CodedTupleTagMap lookup(Iterable<? extends CodedTupleTag<?>> tags)
-        throws IOException;
-
-    /**
-     * Loads the value from the per-{@link com.google.cloud.dataflow.sdk.transforms.DoFn},
-     * per-key state corresponding to the given tag.
-     *
-     * @throws IOException if decoding the value fails
-     */
-    @Override
-    public <T> T lookup(CodedTupleTag<T> tag) throws IOException {
-      return lookup(Arrays.asList(tag)).get(tag);
-    }
-
-    /**
-     * Writes the provided value to the list of values in stored state corresponding to the
-     * provided tag.
-     *
-     * @throws IOException if encoding the given value fails
-     */
-    public <T> void writeToTagList(CodedTupleTag<T> tag, T value) throws IOException {
-      writeToTagList(tag, value, BoundedWindow.TIMESTAMP_MAX_VALUE);
-    }
-
-    public abstract <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp)
-        throws IOException;
-
-    /**
-     * Deletes the list corresponding to the given tag.
-     */
-    public abstract <T> void deleteTagList(CodedTupleTag<T> tag);
-
-    /**
-     * Reads the elements of the list in stored state corresponding to the provided tag.
-     *
-     * @throws IOException if decoding any of the requested values fails
-     */
-    public <T> Iterable<T> readTagList(CodedTupleTag<T> tag) throws IOException {
-      return readTagLists(Arrays.asList(tag)).get(tag);
-    }
-
-    /**
-     * Reads the elements of the list in stored state corresponding to the provided tag.
-     *
-     * @throws IOException if decoding any of the requested values fails
-     */
-    public abstract <T> Map<CodedTupleTag<T>, Iterable<T>> readTagLists(
-        Iterable<CodedTupleTag<T>> tags) throws IOException;
+    public abstract StateInternals stateInternals();
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 117e196a3f5d1..99ba7572bf6a5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -20,8 +20,11 @@
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.state.MergeableState;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
 
@@ -49,20 +52,26 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
    * @param inputCoder the input coder to use
    */
   public static <K, V, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W>
-  createForIterable(WindowingStrategy<?, W> windowingStrategy, Coder<V> inputCoder) {
+  createForIterable(WindowingStrategy<?, W> windowingStrategy, final Coder<V> inputCoder) {
     @SuppressWarnings("unchecked")
     WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
 
     return GroupAlsoByWindowsViaIteratorsDoFn.isSupported(windowingStrategy)
         ? new GroupAlsoByWindowsViaIteratorsDoFn<K, V, W>()
-        : new GABWViaOutputBufferDoFn<>(noWildcard, new ListOutputBuffer<K, V, W>(inputCoder));
+        : new GABWViaOutputBufferDoFn<>(noWildcard,
+            new SerializableFunction<K, StateTag<? extends MergeableState<V, Iterable<V>>>>() {
+          @Override
+          public StateTag<? extends MergeableState<V, Iterable<V>>> apply(K key) {
+            return TriggerExecutor.listBuffer(inputCoder);
+          }
+        });
   }
 
   /**
    * Construct a {@link GroupAlsoByWindowsDoFn} using the {@code combineFn} if available.
    */
   public static <K, InputT, AccumT, OutputT, W extends BoundedWindow>
-      GroupAlsoByWindowsDoFn<K, InputT, OutputT, W>
+  GroupAlsoByWindowsDoFn<K, InputT, OutputT, W>
   create(
       final WindowingStrategy<?, W> windowingStrategy,
       final KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn,
@@ -75,8 +84,12 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
     return GroupAlsoByWindowsAndCombineDoFn.isSupported(windowingStrategy)
         ? new GroupAlsoByWindowsAndCombineDoFn<>(noWildcard.getWindowFn(), combineFn)
         : new GABWViaOutputBufferDoFn<>(noWildcard,
-            CombiningOutputBuffer.<K, InputT, AccumT, OutputT, W>create(
-                combineFn, keyCoder, inputCoder));
+            new SerializableFunction<K, StateTag<? extends MergeableState<InputT, OutputT>>>() {
+              @Override
+              public StateTag<? extends MergeableState<InputT, OutputT>> apply(K key) {
+                return TriggerExecutor.combiningBuffer(key, keyCoder, inputCoder, combineFn);
+              }
+        });
   }
 
   @SystemDoFnInternal
@@ -89,11 +102,12 @@ private static class GABWViaOutputBufferDoFn<K, InputT, OutputT, W extends Bound
         createAggregator(TriggerExecutor.DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
 
     private final WindowingStrategy<Object, W> strategy;
-    private final OutputBuffer<K, InputT, OutputT, W> outputBuffer;
+    private final
+    SerializableFunction<K, StateTag<? extends MergeableState<InputT, OutputT>>> outputBuffer;
 
     public GABWViaOutputBufferDoFn(
         WindowingStrategy<Object, W> windowingStrategy,
-        OutputBuffer<K, InputT, OutputT, W> outputBuffer) {
+        SerializableFunction<K, StateTag<? extends MergeableState<InputT, OutputT>>> outputBuffer) {
       this.strategy = windowingStrategy;
       this.outputBuffer = outputBuffer;
     }
@@ -105,8 +119,11 @@ public void processElement(
         throws Exception {
       K key = c.element().getKey();
       BatchTimerManager timerManager = new BatchTimerManager(Instant.now());
+
+      StateTag<? extends MergeableState<InputT, OutputT>> buffer = outputBuffer.apply(key);
+
       TriggerExecutor<K, InputT, OutputT, W> triggerExecutor = TriggerExecutor.create(
-          key, strategy, timerManager, outputBuffer, c.windowingInternals(),
+          key, strategy, timerManager, buffer, c.windowingInternals(),
           droppedDueToClosedWindow, droppedDueToLateness);
 
       for (WindowedValue<InputT> e : c.element().getValue()) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedStateCache.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedStateCache.java
deleted file mode 100644
index 812bc345116cb..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedStateCache.java
+++ /dev/null
@@ -1,335 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
-import com.google.common.base.Optional;
-import com.google.common.base.Predicate;
-import com.google.common.base.Throwables;
-import com.google.common.cache.LoadingCache;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.protobuf.ByteString;
-
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.TimeUnit;
-
-/**
- * A write-back cache for the tag and tag list state computed during a given stage.
- *
- * <p>This does not synchronize changes across multiple threads or multiple workers.
- */
-class KeyedStateCache {
-
-  private static final Predicate<TagListUpdates<?>> IS_DELETE_TAG_LIST =
-      new Predicate<TagListUpdates<?>>() {
-        @Override
-        public boolean apply(TagListUpdates<?> input) {
-          return input.isDelete;
-        }
-  };
-
-  private final LoadingCache<CodedTupleTag<?>, Optional<?>> tagCache;
-  private final Map<CodedTupleTag<?>, KeyedStateCache.TagUpdates<?>> localTagUpdates =
-      new LinkedHashMap<>();
-
-  private final LoadingCache<CodedTupleTag<?>, List<?>> tagListCache;
-  private final Map<CodedTupleTag<?>, KeyedStateCache.TagListUpdates<?>> localTagListUpdates =
-      new LinkedHashMap<>();
-
-  private String tagPrefix;
-
-  public KeyedStateCache(String tagPrefix,
-      LoadingCache<CodedTupleTag<?>, Optional<?>> tagCache,
-      LoadingCache<CodedTupleTag<?>, List<?>> tagListCache) {
-    this.tagPrefix = tagPrefix;
-    this.tagCache = tagCache;
-    this.tagListCache = tagListCache;
-  }
-
-  private <T> KeyedStateCache.TagUpdates<T> getOrCreateTagUpdate(CodedTupleTag<T> tag) {
-    @SuppressWarnings("unchecked")
-    KeyedStateCache.TagUpdates<T> update = (KeyedStateCache.TagUpdates<T>) localTagUpdates.get(tag);
-    if (update == null) {
-      update = new KeyedStateCache.TagUpdates<>();
-      localTagUpdates.put(tag, update);
-    }
-    return update;
-  }
-
-  private <T> KeyedStateCache.TagListUpdates<T> getOrCreateTagListUpdate(CodedTupleTag<T> tag) {
-    @SuppressWarnings("unchecked")
-    KeyedStateCache.TagListUpdates<T> update =
-        (KeyedStateCache.TagListUpdates<T>) localTagListUpdates.get(tag);
-    if (update == null) {
-      update = new KeyedStateCache.TagListUpdates<>();
-      localTagListUpdates.put(tag, update);
-    }
-    return update;
-  }
-
-  public void removeTags(CodedTupleTag<?>... tags) {
-    for (CodedTupleTag<?> tag : tags) {
-      getOrCreateTagUpdate(tag).markRemoved();
-    }
-  }
-
-  public <T> void store(CodedTupleTag<T> tag, T value, Instant timestamp) {
-    getOrCreateTagUpdate(tag).set(value, timestamp);
-  }
-
-  public Map<CodedTupleTag<?>, Object> lookupTags(Iterable<? extends CodedTupleTag<?>> tags)
-      throws IOException {
-    try {
-      ImmutableMap.Builder<CodedTupleTag<?>, Object> outputBuilder = ImmutableMap.builder();
-
-      // Figure out which tags can be fully satisfied with local data, and add them to the output.
-      // Other tags, will need to be looked up.
-      List<CodedTupleTag<?>> nonLocalTags = new ArrayList<>();
-      for (CodedTupleTag<?> tag : tags) {
-        TagUpdates<?> localUpdates = localTagUpdates.get(tag);
-        if (localUpdates != null) {
-          // ImmutableMap's can't hold null, so we just skip putting the value in if its null.
-          if (localUpdates.getUpdatedValue() != null) {
-            outputBuilder.put(tag, localUpdates.getUpdatedValue());
-          }
-        } else {
-          nonLocalTags.add(tag);
-        }
-      }
-
-      for (Map.Entry<CodedTupleTag<?>, Optional<?>> entry
-          : tagCache.getAll(nonLocalTags).entrySet()) {
-        if (entry.getValue().isPresent()) {
-          outputBuilder.put(entry.getKey(), entry.getValue().get());
-        }
-      }
-
-      return outputBuilder.build();
-    } catch (ExecutionException e) {
-      Throwables.propagateIfInstanceOf(e.getCause(), IOException.class);
-      throw Throwables.propagate(e.getCause());
-    }
-  }
-
-  public void removeTagLists(CodedTupleTag<?>... tagLists) {
-    for (CodedTupleTag<?> tagList : tagLists) {
-      getOrCreateTagListUpdate(tagList).markRemoved();
-    }
-  }
-
-  public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp) {
-    getOrCreateTagListUpdate(tag).add(value, timestamp);
-  }
-
-  public Map<CodedTupleTag<?>, Iterable<?>> readTagLists(Iterable<CodedTupleTag<?>> tags)
-      throws IOException  {
-    try {
-      ImmutableMap.Builder<CodedTupleTag<?>, Iterable<?>> outputBuilder = ImmutableMap.builder();
-
-      // Figure out which tags can be fully satisfied with local data, and add them to the output.
-      // Other tags, will need to be looked up.
-      List<CodedTupleTag<?>> nonDeletedTags = new ArrayList<>();
-      for (CodedTupleTag<?> tag : tags) {
-        TagListUpdates<?> localUpdates = localTagListUpdates.get(tag);
-        if (localUpdates != null && localUpdates.isDelete) {
-          // For locally deleted items, we don't need to do a lookup at all
-          outputBuilder.put(tag, localUpdates.getAddedItems());
-        } else {
-          nonDeletedTags.add(tag);
-        }
-      }
-
-      // For any non-deleted tag, look it up in the tagListCache, and build output by combining
-      ImmutableMap<CodedTupleTag<?>, List<?>> cachedContents = tagListCache.getAll(nonDeletedTags);
-      for (Map.Entry<CodedTupleTag<?>, List<?>> lookedUp : cachedContents.entrySet()) {
-        CodedTupleTag<?> tag = lookedUp.getKey();
-        TagListUpdates<?> localUpdates = localTagListUpdates.get(tag);
-        outputBuilder.put(tag, localUpdates == null
-            ? lookedUp.getValue() : localUpdates.mergeWith(lookedUp.getValue()));
-      }
-
-      return outputBuilder.build();
-    } catch (ExecutionException e) {
-      Throwables.propagateIfInstanceOf(e.getCause(), IOException.class);
-      throw Throwables.propagate(e.getCause());
-    }
-  }
-
-  public void flushTo(Windmill.WorkItemCommitRequest.Builder outputBuilder) throws IOException {
-    // Make sure that we've done lookups for the tag-writes, tag-deletes, and tag-list-deletes.
-    try {
-      tagCache.getAll(localTagUpdates.keySet());
-      tagListCache.getAll(Maps.filterValues(localTagListUpdates, IS_DELETE_TAG_LIST).keySet());
-    } catch (ExecutionException e) {
-      Throwables.propagateIfInstanceOf(e.getCause(), IOException.class);
-      throw Throwables.propagate(e.getCause());
-    }
-
-    // Flush the local tag and tag list updates to the commit request
-    for (Map.Entry<CodedTupleTag<?>, TagUpdates<?>> update : localTagUpdates.entrySet()) {
-      update.getValue().flushTo(update.getKey(), outputBuilder);
-    }
-
-    for (Map.Entry<CodedTupleTag<?>, TagListUpdates<?>> update : localTagListUpdates.entrySet()) {
-      update.getValue().flushTo(update.getKey(), outputBuilder);
-    }
-
-    // Clear the caches and local updates
-    tagCache.invalidateAll();
-    tagListCache.invalidateAll();
-    localTagUpdates.clear();
-    localTagListUpdates.clear();
-  }
-
-  private ByteString serializeTag(CodedTupleTag<?> tag) {
-    return ByteString.copyFromUtf8(tagPrefix + tag.getId());
-  }
-
-  private class TagUpdates<T> {
-    private T updatedValue;
-    private Instant updatedTimestamp;
-
-    boolean removed;
-
-    private void set(T newValue, Instant newTimestamp) {
-      removed = false;
-      updatedTimestamp = newTimestamp;
-      updatedValue = newValue;
-    }
-
-    public T getUpdatedValue() {
-      return updatedValue;
-    }
-
-    private void markRemoved() {
-      removed = true;
-      updatedTimestamp = BoundedWindow.TIMESTAMP_MAX_VALUE;
-      updatedValue = null;
-    }
-
-    private void flushTo(
-        CodedTupleTag<?> wildcardTag, Windmill.WorkItemCommitRequest.Builder outputBuilder)
-            throws CoderException, IOException {
-      Windmill.Value.Builder valueBuilder = outputBuilder.addValueUpdatesBuilder()
-          .setTag(serializeTag(wildcardTag))
-          .getValueBuilder();
-
-      if (removed) {
-        valueBuilder
-            .setTimestamp(Long.MAX_VALUE)
-            .setData(ByteString.EMPTY);
-      } else {
-        @SuppressWarnings("unchecked")
-        CodedTupleTag<T> tag = (CodedTupleTag<T>) wildcardTag;
-
-        ByteString.Output stream = ByteString.newOutput();
-        tag.getCoder().encode(updatedValue, stream, Coder.Context.OUTER);
-
-        valueBuilder
-            .setTimestamp(TimeUnit.MILLISECONDS.toMicros(updatedTimestamp.getMillis()))
-            .setData(stream.toByteString());
-      }
-    }
-  }
-
-  private class TagListUpdates<T> {
-    boolean isDelete = false;
-    List<TimestampedValue<T>> added = new ArrayList<>();
-
-    private void markRemoved() {
-      isDelete = true;
-      added.clear();
-    }
-
-
-    private void add(T value, Instant timestamp) {
-      added.add(TimestampedValue.of(value, timestamp));
-    }
-
-    private List<T> getAddedItems() {
-      List<T> addedItems = Lists.newArrayList();
-      for (TimestampedValue<T> item : added) {
-        addedItems.add(item.getValue());
-      }
-      return addedItems;
-    }
-
-    public List<T> mergeWith(List<?> wildcardValue) {
-      @SuppressWarnings("unchecked")
-      List<T> value = (List<T>) wildcardValue;
-      List<T> addedItems = getAddedItems();
-      List<T> all = new ArrayList<>(wildcardValue.size() + addedItems.size());
-      all.addAll(value);
-      all.addAll(addedItems);
-      return Collections.unmodifiableList(all);
-    }
-
-    private void flushTo(
-        CodedTupleTag<?> wildcardTag, Windmill.WorkItemCommitRequest.Builder outputBuilder)
-            throws IOException {
-      // First do the delete, if necessary and there were previously elements
-      try {
-        if (isDelete && tagListCache.get(wildcardTag).size() > 0) {
-          outputBuilder.addListUpdatesBuilder()
-              .setTag(serializeTag(wildcardTag))
-              .setEndTimestamp(Long.MAX_VALUE);
-        }
-      } catch (ExecutionException e) {
-        Throwables.propagateIfInstanceOf(e.getCause(), IOException.class);
-        throw Throwables.propagate(e.getCause());
-      }
-
-      // Then, add all the elements
-      if (added.size() > 0) {
-        @SuppressWarnings("unchecked")
-        CodedTupleTag<T> tag = (CodedTupleTag<T>) wildcardTag;
-
-        Windmill.TagList.Builder listBuilder = outputBuilder.addListUpdatesBuilder()
-            .setTag(serializeTag(wildcardTag));
-        for (TimestampedValue<T> value : added) {
-          ByteString.Output stream = ByteString.newOutput();
-
-          // Windmill does not support empty data for tag list state; prepend a zero byte.
-          byte[] zero = {0x0};
-          stream.write(zero);
-
-          // Encode the value
-          tag.getCoder().encode(value.getValue(), stream, Coder.Context.OUTER);
-
-          listBuilder.addValuesBuilder()
-              .setData(stream.toByteString())
-              .setTimestamp(TimeUnit.MILLISECONDS.toMicros(value.getTimestamp().getMillis()));
-        }
-      }
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ListOutputBuffer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ListOutputBuffer.java
deleted file mode 100644
index ccd15088ad7ed..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ListOutputBuffer.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.common.collect.Iterables;
-
-import java.io.IOException;
-
-/**
- * {code OutputBuffer} that buffers input values emitting all added values as an Iterable.
- */
-class ListOutputBuffer<K, T, W extends BoundedWindow>
-    implements OutputBuffer<K, T, Iterable<T>, W> {
-
-  private static final long serialVersionUID = 0L;
-
-  private CodedTupleTag<T> bufferTag;
-
-  public ListOutputBuffer(Coder<T> itemCoder) {
-    bufferTag = CodedTupleTag.of(BUFFER_NAME, itemCoder);
-  }
-
-  @Override
-  public void addValue(Context<K, W> c, T input) throws IOException {
-    c.addToBuffer(c.window(), bufferTag, input);
-  }
-
-  @Override
-  public Iterable<T> extract(Context<K, W> c) throws IOException {
-    Iterable<T> result = c.readBuffers(bufferTag, c.sourceWindows());
-    return Iterables.isEmpty(result) ? null : result;
-  }
-
-  @Override
-  public void clear(Context<K, W> c) throws IOException {
-    c.clearBuffers(bufferTag, c.sourceWindows());
-  }
-
-  @Override
-  public void flush(Context<K, W> c) throws IOException {
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
index 8ebb7f9391962..0b43ee596ca96 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
@@ -19,11 +19,12 @@
 import com.google.cloud.dataflow.sdk.coders.SetCoder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.WindowingInternals.KeyedState;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.util.state.StateInternals;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.cloud.dataflow.sdk.util.state.StateTags;
+import com.google.cloud.dataflow.sdk.util.state.ValueState;
 
-import java.io.IOException;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -41,12 +42,6 @@ public class MergingActiveWindowSet<W extends BoundedWindow>
 
   private final WindowFn<Object, W> windowFn;
 
-  /**
-   * Tag for storing the merge tree, the data structure that keeps
-   * track of which windows have been merged together.
-   */
-  private final CodedTupleTag<Map<W, Set<W>>> mergeTreeTag;
-
   /**
    * A map of live windows to windows that were merged into them.
    *
@@ -63,23 +58,23 @@ public class MergingActiveWindowSet<W extends BoundedWindow>
    */
   private final Map<W, Set<W>> originalMergeTree;
 
+  private final ValueState<Map<W, Set<W>>> mergeTreeValue;
 
-  public MergingActiveWindowSet(WindowFn<Object, W> windowFn, KeyedState keyedState)
-      throws IOException {
+  public MergingActiveWindowSet(WindowFn<Object, W> windowFn, StateInternals state) {
     this.windowFn = windowFn;
-    mergeTreeTag = CodedTupleTag.of(
-        "mergeTree", MapCoder.of(windowFn.windowCoder(), SetCoder.of(windowFn.windowCoder())));
 
-    mergeTree = emptyIfNull(keyedState.lookup(Arrays.asList(mergeTreeTag))
-        .get(mergeTreeTag));
+    StateTag<ValueState<Map<W, Set<W>>>> mergeTreeAddr = StateTags.value(
+        "mergeTree", MapCoder.of(windowFn.windowCoder(), SetCoder.of(windowFn.windowCoder())));
+    this.mergeTreeValue = state.state(StateNamespaces.global(), mergeTreeAddr);
+    this.mergeTree = emptyIfNull(mergeTreeValue.get().read());
 
     originalMergeTree = deepCopy(mergeTree);
   }
 
   @Override
-  public void persist(KeyedState keyedState) throws IOException {
+  public void persist() {
     if (!mergeTree.equals(originalMergeTree)) {
-      keyedState.store(mergeTreeTag, mergeTree);
+      mergeTreeValue.set(mergeTree);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java
index bac4d9861ca0c..8f0ec5101c34a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java
@@ -17,9 +17,7 @@
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.WindowingInternals.KeyedState;
 
-import java.io.IOException;
 import java.util.Collections;
 
 /**
@@ -32,7 +30,7 @@ public class NonMergingActiveWindowSet<W extends BoundedWindow>
     implements ActiveWindowSet<W> {
 
   @Override
-  public void persist(KeyedState keyedState) throws IOException {
+  public void persist() {
     // Nothing to persist.
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/OutputBuffer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/OutputBuffer.java
deleted file mode 100644
index 4030191f6a08d..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/OutputBuffer.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.io.Serializable;
-
-/**
- * Interface for buffering outputs.
- *
- * @param <K> The type of key being held by the buffer.
- * @param <InputT> The type of values added to the buffer.
- * @param <OutputT> The type of values extracted from the buffer.
- * @param <W> The type of windows being buffered.
- */
-public interface OutputBuffer<K, InputT, OutputT, W extends BoundedWindow>
-    extends Serializable {
-
-  public static final String BUFFER_NAME = "__buffer";
-
-  /**
-   * Context methods necessary for implementing {@code OutputBuffer}s.
-   *
-   * <p> Provides access to underlying (runner specific) storage for buffering values.
-   *
-   * @param <K> The type of key being held by the buffer.
-   * @param <W> The type of windows being buffered.
-   */
-  public interface Context<K, W extends BoundedWindow> {
-    K key();
-    W window();
-    Iterable<W> sourceWindows();
-
-    <T> void addToBuffer(
-        W window, CodedTupleTag<T> buffer, T value) throws IOException;
-    <T> void addToBuffer(
-        W window, CodedTupleTag<T> buffer, T value, Instant timestamp) throws IOException;
-    void clearBuffers(CodedTupleTag<?> buffer, Iterable<W> windows) throws IOException;
-    <T> Iterable<T> readBuffers(CodedTupleTag<T> buffer, Iterable<W> windows) throws IOException;
-  }
-
-  /**
-   * Add a value to this buffer.
-   */
-  void addValue(Context<K, W> c, InputT input) throws IOException;
-
-  /**
-   * Extract the output value from the {@code OutputBuffer}.
-   */
-  OutputT extract(Context<K, W> c) throws IOException;
-
-  /**
-   * Clear the contents of the output buffer.
-   */
-  void clear(Context<K, W> c) throws IOException;
-
-  /**
-   * Flush any buffered state.
-   */
-  void flush(Context<K, W> c) throws IOException;
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
index e51759ef86f36..85a16052a890b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
@@ -17,35 +17,23 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.runners.worker.StreamingDataflowWorker;
+import com.google.cloud.dataflow.sdk.runners.worker.MetricTrackingWindmillServerStub;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.base.Optional;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 import com.google.common.cache.Weigher;
-import com.google.common.collect.Iterables;
 import com.google.protobuf.ByteString;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.ArrayList;
 import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
 import java.util.Objects;
 import java.util.concurrent.Callable;
 import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
 
 /**
  * Class responsible for fetching state from the windmill server.
@@ -53,13 +41,11 @@
 public class StateFetcher {
   private static final Logger LOG = LoggerFactory.getLogger(StateFetcher.class);
 
-  private WindmillServerStub server;
   private Cache<SideInputId, SideInputCacheEntry> sideInputCache;
-  private AtomicInteger tagValueOutstandingFetches;
-  private AtomicInteger tagListOutstandingFetches;
-  private AtomicInteger sideInputOutstandingFetches;
 
-  public StateFetcher(WindmillServerStub server) {
+  private MetricTrackingWindmillServerStub server;
+
+  public StateFetcher(MetricTrackingWindmillServerStub server) {
     this(server, CacheBuilder
         .newBuilder()
         .maximumWeight(100000000 /* 100 MB */)
@@ -73,150 +59,10 @@ public int weigh(SideInputId id, SideInputCacheEntry entry) {
         .build());
   }
 
-  public StateFetcher(
-      WindmillServerStub server, Cache<SideInputId, SideInputCacheEntry> sideInputCache) {
+  public StateFetcher(MetricTrackingWindmillServerStub server,
+      Cache<SideInputId, SideInputCacheEntry> sideInputCache) {
     this.server = server;
     this.sideInputCache = sideInputCache;
-    this.tagValueOutstandingFetches = new AtomicInteger();
-    this.tagListOutstandingFetches = new AtomicInteger();
-    this.sideInputOutstandingFetches = new AtomicInteger();
-  }
-
-  public Map<CodedTupleTag<?>, Optional<?>> fetch(
-      String computation, ByteString key, long workToken, String prefix,
-      Iterable<? extends CodedTupleTag<?>> tags) throws CoderException, IOException {
-    if (Iterables.isEmpty(tags)) {
-      return Collections.emptyMap();
-    }
-
-    Windmill.KeyedGetDataRequest.Builder requestBuilder = Windmill.KeyedGetDataRequest.newBuilder()
-        .setKey(key)
-        .setWorkToken(workToken);
-
-
-    Map<ByteString, CodedTupleTag<?>> tagMap = new HashMap<>();
-    for (CodedTupleTag<?> tag : tags) {
-      ByteString tagString = ByteString.copyFromUtf8(prefix + tag.getId());
-      if (tagMap.put(tagString, tag) == null) {
-        requestBuilder.addValuesToFetch(Windmill.TagValue.newBuilder().setTag(tagString).build());
-      }
-    }
-
-    Map<CodedTupleTag<?>, Optional<?>> resultMap = new HashMap<>();
-    tagValueOutstandingFetches.getAndIncrement();
-    Windmill.KeyedGetDataResponse keyResponse;
-    try {
-      keyResponse = getResponse(computation, key, requestBuilder);
-    } finally {
-      tagValueOutstandingFetches.getAndDecrement();
-    }
-
-    for (Windmill.TagValue tv : keyResponse.getValuesList()) {
-      CodedTupleTag<?> tag = tagMap.get(tv.getTag());
-      if (tag != null) {
-        if (tv.getValue().hasData() && !tv.getValue().getData().isEmpty()) {
-          Object v = tag.getCoder().decode(tv.getValue().getData().newInput(), Coder.Context.OUTER);
-          resultMap.put(tag, Optional.of(v));
-        } else {
-          resultMap.put(tag, Optional.absent());
-        }
-      }
-    }
-
-    for (CodedTupleTag<?> tag : tags) {
-      if (!resultMap.containsKey(tag)) {
-        resultMap.put(tag, Optional.absent());
-      }
-    }
-
-    return resultMap;
-  }
-
-  public Map<CodedTupleTag<?>, List<?>> fetchList(
-      String computation, ByteString key, long workToken, String prefix,
-      Iterable<? extends CodedTupleTag<?>> tags)
-      throws IOException {
-    if (Iterables.isEmpty(tags)) {
-      return Collections.emptyMap();
-    }
-
-    Windmill.KeyedGetDataRequest.Builder requestBuilder = Windmill.KeyedGetDataRequest.newBuilder()
-        .setKey(key)
-        .setWorkToken(workToken);
-
-    Map<ByteString, CodedTupleTag<?>> tagMap = new HashMap<>();
-    for (CodedTupleTag<?> tag : tags) {
-      ByteString tagString = ByteString.copyFromUtf8(prefix + tag.getId());
-      if (tagMap.put(tagString, tag) == null) {
-        requestBuilder.addListsToFetch(Windmill.TagList.newBuilder()
-            .setTag(tagString)
-            .setEndTimestamp(Long.MAX_VALUE)
-            .build());
-      }
-    }
-
-    Map<CodedTupleTag<?>, List<?>> resultMap = new HashMap<>();
-    tagListOutstandingFetches.getAndIncrement();
-    Windmill.KeyedGetDataResponse keyResponse;
-    try {
-      keyResponse = getResponse(computation, key, requestBuilder);
-    } finally {
-      tagListOutstandingFetches.getAndDecrement();
-    }
-    for (Windmill.TagList tagList : keyResponse.getListsList()) {
-      CodedTupleTag<?> tag = tagMap.get(tagList.getTag());
-      resultMap.put(tag, decodeTagList(tag, tagList));
-    }
-
-    return resultMap;
-  }
-
-  private Windmill.KeyedGetDataResponse getResponse(
-      String computation, ByteString key,
-      Windmill.KeyedGetDataRequest.Builder requestBuilder) throws IOException {
-    Windmill.GetDataRequest request = Windmill.GetDataRequest.newBuilder()
-        .addRequests(
-            Windmill.ComputationGetDataRequest.newBuilder()
-            .setComputationId(computation)
-            .addRequests(requestBuilder.build())
-            .build())
-        .build();
-    Windmill.GetDataResponse response = server.getData(request);
-
-    if (response.getDataCount() != 1
-        || !response.getData(0).getComputationId().equals(computation)
-        || response.getData(0).getDataCount() != 1) {
-      throw new IOException("Invalid data response, expected single computation and key");
-    }
-
-    Windmill.KeyedGetDataResponse keyResponse = response.getData(0).getData(0);
-    if (!keyResponse.getKey().equals(key)) {
-      throw new IOException("Invalid data response, expected key "
-          + key.toStringUtf8() + " but got " + keyResponse.getKey().toStringUtf8());
-    }
-
-    if (keyResponse.getFailed()) {
-      throw new StreamingDataflowWorker.KeyTokenInvalidException(key.toStringUtf8());
-    }
-    return keyResponse;
-  }
-
-  private <T> List<T> decodeTagList(CodedTupleTag<T> tag, Windmill.TagList tagList)
-      throws IOException {
-    if (tag == null) {
-      throw new IOException("Unexpected tag list for tag: " + tagList.getTag());
-    }
-
-    List<T> valueList = new ArrayList<>();
-    for (Windmill.Value value : tagList.getValuesList()) {
-      if (value.hasData() && !value.getData().isEmpty()) {
-        valueList.add(
-          // Drop the first byte of the data; it's the zero byte we prepended to avoid writing
-          // empty data.
-          tag.getCoder().decode(value.getData().substring(1).newInput(), Coder.Context.OUTER));
-      }
-    }
-    return valueList;
   }
 
   /**
@@ -232,19 +78,23 @@ public enum SideInputState {
    * <p> If state is KNOWN_READY, attempt to fetch the data regardless of whether a
    * not-ready entry was cached.
    */
-  public <T> T fetchSideInput(
-      final PCollectionView<T> view, final BoundedWindow window, SideInputState state) {
-    final SideInputId id = new SideInputId(view.getTagInternal(), window);
+  public <T, SideWindowT extends BoundedWindow> T fetchSideInput(
+      final PCollectionView<T> view, final SideWindowT sideWindow, SideInputState state) {
+    final SideInputId id = new SideInputId(view.getTagInternal(), sideWindow);
 
     Callable<SideInputCacheEntry> fetchCallable = new Callable<SideInputCacheEntry>() {
       @Override
       public SideInputCacheEntry call() throws Exception {
-        Coder<BoundedWindow> windowCoder =
-            view.getWindowingStrategyInternal().getWindowFn().windowCoder();
+        @SuppressWarnings("unchecked")
+        WindowingStrategy<?, SideWindowT> sideWindowStrategy =
+            (WindowingStrategy<?, SideWindowT>) view.getWindowingStrategyInternal();
+
+        Coder<SideWindowT> windowCoder = sideWindowStrategy.getWindowFn().windowCoder();
 
         ByteString.Output windowStream = ByteString.newOutput();
-        windowCoder.encode(window, windowStream, Coder.Context.OUTER);
+        windowCoder.encode(sideWindow, windowStream, Coder.Context.OUTER);
 
+        @SuppressWarnings("unchecked")
         Windmill.GlobalDataRequest request =
             Windmill.GlobalDataRequest.newBuilder()
                 .setDataId(Windmill.GlobalDataId.newBuilder()
@@ -252,23 +102,17 @@ public SideInputCacheEntry call() throws Exception {
                     .setVersion(windowStream.toByteString())
                     .build())
                 .setExistenceWatermarkDeadline(
-                     TimeUnit.MILLISECONDS.toMicros(view.getWindowingStrategyInternal()
+                     TimeUnit.MILLISECONDS.toMicros(sideWindowStrategy
                          .getTrigger().getSpec()
-                         .getWatermarkThatGuaranteesFiring(window)
+                         .getWatermarkThatGuaranteesFiring(sideWindow)
                          .getMillis()))
                 .build();
 
-        Windmill.GetDataResponse response;
-        sideInputOutstandingFetches.getAndIncrement();
-        try {
-          response = server.getData(
-              Windmill.GetDataRequest.newBuilder()
-              .addGlobalDataFetchRequests(request)
-              .addGlobalDataToFetch(request.getDataId())
-              .build());
-        } finally {
-          sideInputOutstandingFetches.getAndDecrement();
-        }
+        Windmill.GetDataResponse response = server.getSideInputData(
+            Windmill.GetDataRequest.newBuilder()
+            .addGlobalDataFetchRequests(request)
+            .addGlobalDataToFetch(request.getDataId())
+            .build());
 
         Windmill.GlobalData data = response.getGlobalData(0);
 
@@ -318,13 +162,6 @@ public SideInputCacheEntry call() throws Exception {
     }
   }
 
-  public void printHtml(PrintWriter writer) {
-    writer.println("Active fetches:");
-    writer.println(" Values: " + tagValueOutstandingFetches.get());
-    writer.println(" Lists: " + tagListOutstandingFetches.get());
-    writer.println(" SideInputs: " + sideInputOutstandingFetches.get());
-  }
-
   /**
    * Struct representing a side input for a particular window.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index bc983a5dadf2e..75988f6c8d81b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -20,8 +20,11 @@
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.state.MergeableState;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
 
@@ -46,8 +49,12 @@ StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> create(
           final Coder<InputT> inputValueCoder) {
     Preconditions.checkNotNull(combineFn);
     return new StreamingGABWViaWindowSetDoFn<>(windowingStrategy,
-        CombiningOutputBuffer.<K, InputT, AccumT, OutputT, W>create(
-            combineFn, keyCoder, inputValueCoder));
+        new SerializableFunction<K, StateTag<? extends MergeableState<InputT, OutputT>>>() {
+      @Override
+      public StateTag<? extends MergeableState<InputT, OutputT>> apply(K key) {
+        return TriggerExecutor.combiningBuffer(key, keyCoder, inputValueCoder, combineFn);
+      }
+    });
   }
 
   public static <K, V, W extends BoundedWindow>
@@ -55,7 +62,13 @@ StreamingGroupAlsoByWindowsDoFn<K, V, Iterable<V>, W> createForIterable(
       final WindowingStrategy<?, W> windowingStrategy,
       final Coder<V> inputCoder) {
     return new StreamingGABWViaWindowSetDoFn<>(
-        windowingStrategy, new ListOutputBuffer<K, V, W>(inputCoder));
+        windowingStrategy,
+        new SerializableFunction<K, StateTag<? extends MergeableState<V, Iterable<V>>>>() {
+          @Override
+          public StateTag<? extends MergeableState<V, Iterable<V>>> apply(K key) {
+            return TriggerExecutor.listBuffer(inputCoder);
+          }
+        });
   }
 
   private static class StreamingGABWViaWindowSetDoFn<K, InputT, OutputT, W extends BoundedWindow>
@@ -67,12 +80,13 @@ private static class StreamingGABWViaWindowSetDoFn<K, InputT, OutputT, W extends
         createAggregator(TriggerExecutor.DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
 
     private final WindowingStrategy<Object, W> windowingStrategy;
-    private final OutputBuffer<K, InputT, OutputT, W> outputBuffer;
+    private final
+    SerializableFunction<K, StateTag<? extends MergeableState<InputT, OutputT>>> outputBuffer;
 
     private TriggerExecutor<K, InputT, OutputT, W> executor;
 
     public StreamingGABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
-        OutputBuffer<K, InputT, OutputT, W> outputBuffer) {
+        SerializableFunction<K, StateTag<? extends MergeableState<InputT, OutputT>>> outputBuffer) {
       @SuppressWarnings("unchecked")
       WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
       this.windowingStrategy = noWildcard;
@@ -82,8 +96,9 @@ public StreamingGABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
     private void initForKey(ProcessContext c, K key) throws Exception{
       if (executor == null) {
         TimerManager timerManager = c.windowingInternals().getTimerManager();
+        StateTag<? extends MergeableState<InputT, OutputT>> buffer = outputBuffer.apply(key);
         executor = TriggerExecutor.create(
-          key, windowingStrategy, timerManager, outputBuffer, c.windowingInternals(),
+          key, windowingStrategy, timerManager, buffer, c.windowingInternals(),
           droppedDueToClosedWindow, droppedDueToLateness);
       }
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index 21b053de5d966..d61979fe6a63c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -24,20 +24,18 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
+import com.google.cloud.dataflow.sdk.util.state.StateInternals;
+import com.google.cloud.dataflow.sdk.util.state.WindmillStateInternals;
+import com.google.cloud.dataflow.sdk.util.state.WindmillStateReader;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.base.Optional;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.cache.CacheLoader;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableSet;
 import com.google.protobuf.ByteString;
 
 import org.joda.time.Instant;
 
 import java.io.IOException;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -50,7 +48,6 @@
  * {@link ExecutionContext} for use in streaming mode.
  */
 public class StreamingModeExecutionContext extends DataflowExecutionContext {
-  private String computation;
   private Instant inputDataWatermark;
   private Windmill.WorkItem work;
   private StateFetcher stateFetcher;
@@ -61,13 +58,12 @@ public class StreamingModeExecutionContext extends DataflowExecutionContext {
   private ConcurrentMap<ByteString, UnboundedSource.UnboundedReader<?>> readerCache;
   private UnboundedSource.UnboundedReader<?> activeReader;
   private ConcurrentMap<String, String> stateNameMap;
+  private WindmillStateReader stateReader;
 
   public StreamingModeExecutionContext(
-      String computation,
       StateFetcher stateFetcher,
       ConcurrentMap<ByteString, UnboundedSource.UnboundedReader<?>> readerCache,
       ConcurrentMap<String, String> stateNameMap) {
-    this.computation = computation;
     this.stateFetcher = stateFetcher;
     this.sideInputCache = new HashMap<>();
     this.readerCache = readerCache;
@@ -77,16 +73,24 @@ public StreamingModeExecutionContext(
   public void start(
       Windmill.WorkItem work,
       Instant inputDataWatermark,
+      WindmillStateReader stateReader,
       Windmill.WorkItemCommitRequest.Builder outputBuilder) {
     this.work = work;
+    this.stateReader = stateReader;
     this.outputBuilder = outputBuilder;
     this.sideInputCache.clear();
     this.inputDataWatermark = inputDataWatermark;
+
+    for (ExecutionContext.StepContext stepContext : getAllStepContexts()) {
+      ((StepContext) stepContext).start(stateReader);
+    }
   }
 
   @Override
   public ExecutionContext.StepContext createStepContext(String stepName) {
-    return new StepContext(stepName);
+    StepContext context = new StepContext(stepName);
+    context.start(stateReader);
+    return context;
   }
 
   @Override
@@ -280,11 +284,7 @@ public Map<Long, Runnable> flushState() {
     Map<Long, Runnable> callbacks = new HashMap<>();
 
     for (ExecutionContext.StepContext stepContext : getAllStepContexts()) {
-      try {
-        ((StepContext) stepContext).flushState();
-      } catch (IOException e) {
-        throw new RuntimeException("Failed to flush state");
-      }
+      ((StepContext) stepContext).flushState();
     }
 
     Windmill.SourceState.Builder sourceStateBuilder = Windmill.SourceState.newBuilder();
@@ -329,106 +329,33 @@ public List<Long> getReadyCommitCallbackIds() {
     return work.getSourceState().getFinalizeIdsList();
   }
 
-  private class TagLoader extends CacheLoader<CodedTupleTag<?>, Optional<?>> {
-
-    private final String mangledPrefix;
-
-    private TagLoader(String mangledPrefix) {
-      this.mangledPrefix = mangledPrefix;
-    }
-
-    @Override
-    public Optional<?> load(CodedTupleTag<?> key) throws Exception {
-      return loadAll(Arrays.asList(key)).get(key);
-    }
-
-    @Override
-    public Map<CodedTupleTag<?>, Optional<?>> loadAll(
-        Iterable<? extends CodedTupleTag<?>> keys) throws Exception {
-      return  stateFetcher.fetch(
-          computation, getSerializedKey(), getWorkToken(), mangledPrefix, keys);
-    }
-  }
-
-  private class TagListLoader extends CacheLoader<CodedTupleTag<?>, List<?>> {
-
-    private final String mangledPrefix;
-
-    private TagListLoader(String mangledPrefix) {
-      this.mangledPrefix = mangledPrefix;
-    }
-
-    @Override
-    public List<?> load(CodedTupleTag<?> key) throws Exception {
-      return loadAll(Arrays.asList(key)).get(key);
-    }
-
-    @Override
-    public Map<CodedTupleTag<?>, List<?>> loadAll(
-        Iterable<? extends CodedTupleTag<?>> keys) throws Exception {
-      return stateFetcher.fetchList(
-          computation, getSerializedKey(), getWorkToken(), mangledPrefix, keys);
-    }
-  }
-
   class StepContext extends ExecutionContext.StepContext {
-    private KeyedStateCache tagCache;
+    private WindmillStateInternals stateInternals;
+    private String prefix;
 
     public StepContext(String stepName) {
       super(stepName);
 
-      String prefix =
-          (stateNameMap.containsKey(stepName) ? stateNameMap.get(stepName) : stepName) + ":";
-
-      this.tagCache = new KeyedStateCache(
-          prefix,
-          CacheBuilder.newBuilder().build(new TagLoader(prefix)),
-          CacheBuilder.newBuilder().build(new TagListLoader(prefix)));
-    }
-
-    @Override
-    public <T> void store(CodedTupleTag<T> tag, T value, Instant timestamp) {
-      tagCache.store(tag, value, timestamp);
-    }
-
-    @Override
-    public <T> void remove(CodedTupleTag<T> tag) {
-      tagCache.removeTags(tag);
-    }
-
-    @Override
-    public CodedTupleTagMap lookup(Iterable<? extends CodedTupleTag<?>> tags) throws IOException {
-      return CodedTupleTagMap.of(tagCache.lookupTags(tags));
-    }
-
-    @Override
-    public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp) {
-      tagCache.writeToTagList(tag, value, timestamp);
+      prefix = stateNameMap.get(stepName);
+      if (prefix == null) {
+        prefix = stepName;
+      }
     }
 
-    @Override
-    public <T> Iterable<T> readTagList(CodedTupleTag<T> tag) throws IOException {
-      return readTagLists(Arrays.asList(tag)).get(tag);
+    /**
+     * Update the {@code stateReader} used by this {@code StepContext}.
+     */
+    public void start(WindmillStateReader stateReader) {
+      this.stateInternals = new WindmillStateInternals(prefix, stateReader);
     }
 
-    @Override
-    public <T> Map<CodedTupleTag<T>, Iterable<T>> readTagLists(Iterable<CodedTupleTag<T>> tags)
-        throws IOException {
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      Iterable<CodedTupleTag<?>> wildcardTags = (Iterable) tags;
-      Map<CodedTupleTag<?>, Iterable<?>> wildcardMap = tagCache.readTagLists(wildcardTags);
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      Map<CodedTupleTag<T>, Iterable<T>> typedMap = (Map) wildcardMap;
-      return typedMap;
+    public void flushState() {
+      stateInternals.persist(outputBuilder);
     }
 
     @Override
-    public <T> void deleteTagList(CodedTupleTag<T> tag) {
-      tagCache.removeTagLists(tag);
-    }
-
-    public void flushState() throws IOException {
-      tagCache.flushTo(outputBuilder);
+    public StateInternals stateInternals() {
+      return Preconditions.checkNotNull(stateInternals);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
index c95cb8862bd12..4d74f6f6b12eb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
@@ -22,14 +22,23 @@
 import com.google.cloud.dataflow.sdk.coders.SetCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.GlobalDataRequest;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.util.state.BagState;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.cloud.dataflow.sdk.util.state.StateTags;
+import com.google.cloud.dataflow.sdk.util.state.ValueState;
+import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Throwables;
+import com.google.common.collect.Iterables;
 import com.google.protobuf.ByteString;
 
 import java.io.IOException;
@@ -52,11 +61,15 @@ public class StreamingSideInputDoFnRunner<InputT, OutputT, ReceiverT, W extends
     extends DoFnRunner<InputT, OutputT, ReceiverT> {
   private StepContext stepContext;
   private StreamingModeExecutionContext execContext;
-  private WindowingStrategy<?, W> windowingStrategy;
   private Map<String, PCollectionView<?>> sideInputViews;
-  private CodedTupleTag<Map<W, Set<Windmill.GlobalDataRequest>>> blockedMapTag;
+
+  private final StateTag<BagState<WindowedValue<InputT>>> elementsAddr;
+  private final StateTag<WatermarkStateInternal> watermarkHoldingAddr;
+  private final StateTag<ValueState<Map<W, Set<Windmill.GlobalDataRequest>>>> blockedMapAddr;
+
   private Map<W, Set<Windmill.GlobalDataRequest>> blockedMap;
-  private Coder<InputT> elemCoder;
+
+  private WindowFn<?, W> windowFn;
 
   public StreamingSideInputDoFnRunner(
       PipelineOptions options,
@@ -71,8 +84,12 @@ public StreamingSideInputDoFnRunner(
         mainOutputTag, sideOutputTags, stepContext,
         addCounterMutator, doFnInfo.getWindowingStrategy());
     this.stepContext = stepContext;
-    this.windowingStrategy = (WindowingStrategy) doFnInfo.getWindowingStrategy();
-    this.elemCoder = doFnInfo.getInputCoder();
+
+    WindowFn<?, ? extends BoundedWindow> wildcardWindowFn =
+        doFnInfo.getWindowingStrategy().getWindowFn();
+    @SuppressWarnings("unchecked")
+    WindowFn<?, W> typedWindowFn = (WindowFn<?, W>) wildcardWindowFn;
+    this.windowFn = typedWindowFn;
 
     this.sideInputViews = new HashMap<>();
     for (PCollectionView<?> view : doFnInfo.getSideInputViews()) {
@@ -80,20 +97,30 @@ public StreamingSideInputDoFnRunner(
     }
     this.execContext =
         (StreamingModeExecutionContext) stepContext.getExecutionContext();
-    this.blockedMapTag = CodedTupleTag.of("blockedMap:", MapCoder.of(
-        windowingStrategy.getWindowFn().windowCoder(),
-        SetCoder.of(Proto2Coder.of(Windmill.GlobalDataRequest.class))));
-    this.blockedMap = stepContext.lookup(blockedMapTag);
+
+    this.blockedMapAddr = blockedMapAddr(windowFn);
+    this.elementsAddr = StateTags.bag("elements",
+        WindowedValue.getFullCoder(doFnInfo.getInputCoder(), windowFn.windowCoder()));
+    this.watermarkHoldingAddr = StateTags.watermarkStateInternal("elementsWatermark");
+
+    this.blockedMap = stepContext.stateInternals().state(StateNamespaces.global(), blockedMapAddr)
+        .get().read();
     if (this.blockedMap == null) {
       this.blockedMap = new HashMap<>();
     }
   }
 
+  @VisibleForTesting static <W extends BoundedWindow>
+  StateTag<ValueState<Map<W, Set<GlobalDataRequest>>>> blockedMapAddr(WindowFn<?, W> windowFn) {
+    return StateTags.value("blockedMap", MapCoder.of(
+        windowFn.windowCoder(), SetCoder.of(Proto2Coder.of(Windmill.GlobalDataRequest.class))));
+  }
+
   /**
    * Computes the set of main input windows for which all side inputs are ready and cached.
    */
-  private Map<W, CodedTupleTag<WindowedValue<InputT>>> getReadyWindowTags() {
-    Map<W, CodedTupleTag<WindowedValue<InputT>>> readyWindowTags = new HashMap<>();
+  private Set<W> getReadyWindows() {
+    Set<W> readyWindows = new HashSet<>();
 
     for (Windmill.GlobalDataId id : execContext.getSideInputNotifications()) {
       if (sideInputViews.get(id.getTag()) == null) {
@@ -130,7 +157,7 @@ private Map<W, CodedTupleTag<WindowedValue<InputT>>> getReadyWindowTags() {
             }
 
             if (allSideInputsCached) {
-              readyWindowTags.put(window, getElemListTag(window));
+              readyWindows.add(window);
             }
           } catch (IOException e) {
             throw Throwables.propagate(e);
@@ -139,7 +166,7 @@ private Map<W, CodedTupleTag<WindowedValue<InputT>>> getReadyWindowTags() {
       }
     }
 
-    return readyWindowTags;
+    return readyWindows;
   }
 
   @Override
@@ -147,21 +174,22 @@ public void startBundle() {
     super.startBundle();
 
     // Find the set of ready windows.
-    Map<W, CodedTupleTag<WindowedValue<InputT>>> readyWindowTags = getReadyWindowTags();
+    Set<W> readyWindows = getReadyWindows();
 
-    // Fetch the elements for each of the ready windows.
-    Map<CodedTupleTag<WindowedValue<InputT>>, Iterable<WindowedValue<InputT>>> elementsPerWindow;
-    try {
-      elementsPerWindow = stepContext.readTagLists(readyWindowTags.values());
-    } catch (IOException e) {
-      throw Throwables.propagate(e);
+    // Pre-fetch the elements for each of the ready windows.
+    for (W window : readyWindows) {
+      elementBag(window).get();
+      WatermarkStateInternal watermarkHold = watermarkHold(window);
+      watermarkHold.get();
+      watermarkHold.clear();
     }
 
     // Run the DoFn code now that all side inputs are ready.
-    for (Map.Entry<W, CodedTupleTag<WindowedValue<InputT>>> entry : readyWindowTags.entrySet()) {
-      blockedMap.remove(entry.getKey());
+    for (W window : readyWindows) {
+      blockedMap.remove(window);
 
-      Iterable<WindowedValue<InputT>> elements = elementsPerWindow.get(entry.getValue());
+      BagState<WindowedValue<InputT>> elementsBag = elementBag(window);
+      Iterable<WindowedValue<InputT>> elements = elementsBag.get().read();
       try {
         for (WindowedValue<InputT> elem : elements) {
           fn.processElement(createProcessContext(elem));
@@ -172,7 +200,7 @@ public void startBundle() {
         throw new UserCodeException(t);
       }
 
-      stepContext.deleteTagList(entry.getValue());
+      elementsBag.clear();
     }
   }
 
@@ -195,18 +223,29 @@ private Set<Windmill.GlobalDataRequest> computeBlockedSideInputs(W window) throw
     return blocked;
   }
 
+  @VisibleForTesting BagState<WindowedValue<InputT>> elementBag(W window) {
+    return stepContext.stateInternals()
+        .state(StateNamespaces.window(windowFn.windowCoder(), window), elementsAddr);
+  }
+
+  @VisibleForTesting WatermarkStateInternal watermarkHold(W window) {
+    return stepContext.stateInternals()
+        .state(StateNamespaces.window(windowFn.windowCoder(), window), watermarkHoldingAddr);
+  }
+
   @Override
   public void invokeProcessElement(WindowedValue<InputT> elem) {
+    @SuppressWarnings("unchecked")
+    W window = (W) Iterables.getOnlyElement(elem.getWindows());
+
     // This can contain user code. Wrap it in case it throws an exception.
     try {
-      W window = (W) elem.getWindows().iterator().next();
-
       Set<Windmill.GlobalDataRequest> blocked = computeBlockedSideInputs(window);
       if (blocked == null) {
         fn.processElement(createProcessContext(elem));
       } else {
-        stepContext.writeToTagList(
-            getElemListTag(window), elem, elem.getTimestamp());
+        elementBag(window).add(elem);
+        watermarkHold(window).add(elem.getTimestamp());
 
         execContext.addBlockingSideInputs(blocked);
       }
@@ -220,20 +259,20 @@ public void invokeProcessElement(WindowedValue<InputT> elem) {
   @Override
   public void finishBundle() {
     super.finishBundle();
-    try {
-      stepContext.store(blockedMapTag, blockedMap);
-    } catch (IOException e) {
-      throw new RuntimeException("Exception while storing streaming side input info: ", e);
-    }
+    stepContext.stateInternals().state(StateNamespaces.global(), blockedMapAddr).set(blockedMap);
   }
 
-  private Windmill.GlobalDataRequest buildGlobalDataRequest(
-      PCollectionView<?> view, BoundedWindow window) throws IOException {
-    Coder<BoundedWindow> sideInputWindowCoder =
-        view.getWindowingStrategyInternal().getWindowFn().windowCoder();
+  private <SideWindowT extends BoundedWindow> Windmill.GlobalDataRequest buildGlobalDataRequest(
+      PCollectionView<?> view, BoundedWindow mainWindow) throws IOException {
+    @SuppressWarnings("unchecked")
+    WindowingStrategy<?, SideWindowT> sideWindowStrategy =
+        (WindowingStrategy<?, SideWindowT>) view.getWindowingStrategyInternal();
+
+    WindowFn<?, SideWindowT> sideWindowFn = sideWindowStrategy.getWindowFn();
+
+    Coder<SideWindowT> sideInputWindowCoder = sideWindowFn.windowCoder();
 
-    BoundedWindow sideInputWindow =
-        view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(window);
+    SideWindowT sideInputWindow = sideWindowFn.getSideInputWindow(mainWindow);
 
     ByteString.Output windowStream = ByteString.newOutput();
     sideInputWindowCoder.encode(sideInputWindow, windowStream, Coder.Context.OUTER);
@@ -244,17 +283,11 @@ private Windmill.GlobalDataRequest buildGlobalDataRequest(
             .setVersion(windowStream.toByteString())
             .build())
         .setExistenceWatermarkDeadline(
-            TimeUnit.MILLISECONDS.toMicros(view.getWindowingStrategyInternal()
+            TimeUnit.MILLISECONDS.toMicros(sideWindowStrategy
                 .getTrigger()
                 .getSpec()
                 .getWatermarkThatGuaranteesFiring(sideInputWindow)
                 .getMillis()))
         .build();
   }
-
-  private CodedTupleTag<WindowedValue<InputT>> getElemListTag(W window) throws IOException {
-    return CodedTupleTag.<WindowedValue<InputT>>of(
-        "e:" + CoderUtils.encodeToBase64(windowingStrategy.getWindowFn().windowCoder(), window),
-        WindowedValue.getFullCoder(elemCoder, windowingStrategy.getWindowFn().windowCoder()));
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 4d94b928553f1..29b49b4638ef4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.coders.StandardCoder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
@@ -34,13 +35,18 @@
 import com.google.cloud.dataflow.sdk.util.ActiveWindowSet.MergeCallback;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
+import com.google.cloud.dataflow.sdk.util.state.MergeableState;
+import com.google.cloud.dataflow.sdk.util.state.State;
+import com.google.cloud.dataflow.sdk.util.state.StateContents;
+import com.google.cloud.dataflow.sdk.util.state.StateInternals;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.cloud.dataflow.sdk.util.state.StateTags;
+import com.google.cloud.dataflow.sdk.util.state.ValueState;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Function;
 import com.google.common.base.Predicate;
-import com.google.common.base.Throwables;
 import com.google.common.collect.FluentIterable;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableSet;
@@ -57,11 +63,8 @@
 import java.util.Arrays;
 import java.util.BitSet;
 import java.util.Collection;
-import java.util.HashSet;
-import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 
 /**
  * Manages the execution of a trigger.
@@ -73,6 +76,11 @@
  */
 public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
 
+  private static final String BUFFER_NAME = "__buffer";
+
+  @VisibleForTesting static final StateTag<ValueState<BitSet>> FINISHED_BITS_TAG =
+      StateTags.value("finished_set", BitSetCoder.of());
+
   public static final String DROPPED_DUE_TO_CLOSED_WINDOW = "DroppedDueToClosedWindow";
   public static final String DROPPED_DUE_TO_LATENESS_COUNTER = "DroppedDueToLateness";
 
@@ -85,11 +93,11 @@ public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
 
   private final WindowingInternals<?, KV<K, OutputT>> windowingInternals;
   private final TimerManager timerManager;
-  private final WindowingInternals.KeyedState keyedState;
   private final Coder<TriggerId<W>> triggerIdCoder;
   private final ActiveWindowSet<W> activeWindows;
-  private final OutputBuffer<K, InputT, OutputT, W> outputBuffer;
+  private final StateInternals stateInternals;
 
+  @VisibleForTesting final StateTag<? extends MergeableState<InputT, OutputT>> buffer;
   private final WatermarkHold<W> watermarkHolder;
 
   private final K key;
@@ -101,92 +109,42 @@ public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
       WindowFn<Object, W> windowFn,
       TimerManager timerManager,
       ExecutableTrigger<W> rootTrigger,
-      WindowingInternals.KeyedState keyedState,
       WindowingInternals<?, KV<K, OutputT>> windowingInternals,
       AccumulationMode mode,
       Duration allowedLateness,
       ActiveWindowSet<W> activeWindows,
-      OutputBuffer<K, InputT, OutputT, W> outputBuffer,
+      StateTag<? extends MergeableState<InputT, OutputT>> outputBuffer,
       Aggregator<Long, Long> droppedDueToClosedWindow,
       Aggregator<Long, Long> droppedDueToLateness) {
     this.key = key;
     this.windowFn = windowFn;
     this.rootTrigger = rootTrigger;
-    this.keyedState = keyedState;
     this.windowingInternals = windowingInternals;
     this.allowedLateness = allowedLateness;
     this.activeWindows = activeWindows;
-    this.outputBuffer = outputBuffer;
+    this.buffer = outputBuffer;
     this.droppedDueToClosedWindow = droppedDueToClosedWindow;
     this.droppedDueToLateness = droppedDueToLateness;
     this.watermarkHolder = new WatermarkHold<W>(allowedLateness);
     this.timerManager = timerManager;
     this.mode = mode;
     this.triggerIdCoder = new TriggerIdCoder<>(windowFn.windowCoder());
+    this.stateInternals = windowingInternals.stateInternals();
   }
 
   private boolean isRootFinished(BitSet bitSet) {
     return bitSet.get(0);
   }
 
-  private OutputBuffer.Context<K, W> bufferContext(final W window) {
-    return new OutputBuffer.Context<K, W>() {
-      @Override
-      public K key() {
-        return key;
-      }
-
-      @Override
-      public W window() {
-        return window;
-      }
-
-      @Override
-      public Iterable<W> sourceWindows() {
-        return activeWindows.sourceWindows(window);
-      }
-
-      private <T> CodedTupleTag<T> windowedTag(W window, CodedTupleTag<T> tag)
-          throws CoderException {
-        return CodedTupleTag.of(
-            CoderUtils.encodeToBase64(windowFn.windowCoder(), window) + "/" + tag.getId(),
-            tag.getCoder());
-      }
-
-      @Override
-      public <T> void addToBuffer(W window, CodedTupleTag<T> buffer, T value) throws IOException {
-        windowingInternals.writeToTagList(windowedTag(window, buffer), value);
-      }
-
-      @Override
-      public <T> void addToBuffer(W window, CodedTupleTag<T> buffer, T value, Instant timestamp)
-          throws IOException {
-        windowingInternals.writeToTagList(windowedTag(window, buffer), value, timestamp);
-      }
-
-      @Override
-      public void clearBuffers(CodedTupleTag<?> buffer, Iterable<W> windows) throws IOException {
-        for (W window : windows) {
-          windowingInternals.deleteTagList(windowedTag(window, buffer));
-        }
-      }
-
-      @Override
-      public <T> Iterable<T> readBuffers(CodedTupleTag<T> buffer, Iterable<W> windows)
-          throws IOException {
-        List<CodedTupleTag<T>> tags = new ArrayList<>();
-        for (W window : windows) {
-          tags.add(windowedTag(window, buffer));
-        }
-        return Iterables.concat(windowingInternals.readTagList(tags).values());
-      }
-    };
+  public static <T> StateTag<? extends MergeableState<T, Iterable<T>>> listBuffer(Coder<T> coder) {
+    return StateTags.<T>bag(BUFFER_NAME, coder);
   }
 
-  public CodedTupleTag<BitSet> finishedSetTag(W window) throws CoderException {
-    return CodedTupleTag.of(
-        CoderUtils.encodeToBase64(windowFn.windowCoder(), window) + "/finished-set",
-        BitSetCoder.of());
+  public static <K, InputT, OutputT> StateTag<? extends MergeableState<InputT, OutputT>>
+  combiningBuffer(K key, Coder<K> keyCoder, Coder<InputT> inputCoder,
+      KeyedCombineFn<K, InputT, ?, OutputT> combineFn) {
+    return StateTags.<InputT, OutputT>combiningValue(
+        BUFFER_NAME, inputCoder, combineFn.forKey(key, keyCoder));
   }
 
   public static <K, InputT, OutputT, W extends BoundedWindow>
@@ -194,56 +152,61 @@ TriggerExecutor<K, InputT, OutputT, W> create(
       K key,
       WindowingStrategy<Object, W> windowingStrategy,
       TimerManager timerManager,
-      OutputBuffer<K, InputT, OutputT, W> outputBuffer,
+      StateTag<? extends MergeableState<InputT, OutputT>> outputBuffer,
       WindowingInternals<?, KV<K, OutputT>> windowingInternals,
       Aggregator<Long, Long> droppedDueToClosedWindow,
       Aggregator<Long, Long> droppedDueToLateness) throws Exception {
     ActiveWindowSet<W> activeWindows = windowingStrategy.getWindowFn().isNonMerging()
         ? new NonMergingActiveWindowSet<W>()
         : new MergingActiveWindowSet<W>(
-            windowingStrategy.getWindowFn(), windowingInternals.keyedState());
+            windowingStrategy.getWindowFn(), windowingInternals.stateInternals());
     return new TriggerExecutor<K, InputT, OutputT, W>(key,
         windowingStrategy.getWindowFn(), timerManager, windowingStrategy.getTrigger(),
-        windowingInternals.keyedState(), windowingInternals, windowingStrategy.getMode(),
+        windowingInternals, windowingStrategy.getMode(),
         windowingStrategy.getAllowedLateness(), activeWindows, outputBuffer,
         droppedDueToClosedWindow, droppedDueToLateness);
   }
 
-  private TriggerContextImpl context(BitSet finishedSet, W window) {
-    return new TriggerContextImpl(finishedSet, rootTrigger, window);
+  private Context context(W window) {
+    return new Context(window, StateNamespaces.window(windowFn.windowCoder(), window));
   }
 
-  @VisibleForTesting BitSet lookupFinishedSet(W window) throws IOException {
+  @VisibleForTesting BitSet lookupFinishedSet(W window) {
     // TODO: If we know that no trigger in the tree will ever finish, we don't need to do the
     // lookup. Right now, we special case this for the DefaultTrigger.
     if (rootTrigger.getSpec() instanceof DefaultTrigger) {
       return new BitSet(1);
     }
 
-    BitSet finishedSet = keyedState.lookup(finishedSetTag(window));
+    BitSet finishedSet =
+        stateInternals.state(windowNamespace(window), FINISHED_BITS_TAG).get().read();
     return finishedSet == null ? new BitSet(rootTrigger.getFirstIndexAfterSubtree()) : finishedSet;
   }
 
   /**
    * Issue a load for all the keyed state tags that we know we need for the given windows.
    */
-  private void warmUpCache(Iterable<W> windows) throws IOException {
+  private void warmUpCache(Iterable<W> windows) {
     if ((rootTrigger.getSpec() instanceof DefaultTrigger)) {
       return;
     }
 
     // Prepare the cache by loading keyed state for all the given windows.
-    Set<CodedTupleTag<?>> tags = new HashSet<>();
     for (W window : windows) {
-      tags.add(finishedSetTag(window));
+      stateInternals.state(windowNamespace(window), FINISHED_BITS_TAG).get();
     }
-    keyedState.lookup(tags);
   }
 
   private TriggerId<W> cleanupTimer(W window) {
     return new TriggerId<W>(window, FINAL_CLEANUP_PSEUDO_ID);
   }
 
+  private Trigger<W>.OnElementContext onElementContext(
+      BitSet finishedSet, W window, WindowedValue<InputT> value) {
+    TriggerContextImpl delegate = new TriggerContextImpl(finishedSet, rootTrigger, window);
+    return new OnElementContextImpl(delegate, value.getValue(), value.getTimestamp());
+  }
+
   public void onElement(WindowedValue<InputT> value) throws Exception {
     Instant minimumAllowedTimestamp = timerManager.currentWatermarkTime().minus(allowedLateness);
     if (minimumAllowedTimestamp.isAfter(value.getTimestamp())) {
@@ -268,17 +231,18 @@ public void onElement(WindowedValue<InputT> value) throws Exception {
       if (activeWindows.add(window)) {
         scheduleCleanup(window);
       }
-      outputBuffer.addValue(bufferContext(window), value.getValue());
-      watermarkHolder.addHold(bufferContext(window), value.getTimestamp(),
-          timerManager.currentWatermarkTime().isAfter(value.getTimestamp()));
 
       BitSet originalFinishedSet = (BitSet) finishedSet.clone();
-      OnElementContextImpl e = new OnElementContextImpl(
-          context(finishedSet, window), value.getValue(), value.getTimestamp());
+      Context context = context(window);
+
+      context.access(buffer).add(value.getValue());
+      watermarkHolder.addHold(context, value.getTimestamp(),
+          timerManager.currentWatermarkTime().isAfter(value.getTimestamp()));
 
       // Update the trigger state as appropriate for the arrival of the element.
       // Must come before merge so the state is updated (for merging).
-      TriggerResult result = rootTrigger.invokeElement(e);
+      TriggerResult result =
+          rootTrigger.invokeElement(onElementContext(finishedSet, window, value));
 
       // Make sure we merge before firing, in case a larger window is produced
       boolean stillExists = true;
@@ -308,19 +272,20 @@ public void onTimer(String timerTag) throws Exception {
     TriggerId<W> triggerId = CoderUtils.decodeFromBase64(triggerIdCoder, timerTag);
     W window = triggerId.window();
     BitSet finishedSet = lookupFinishedSet(window);
+    Context context = context(window);
 
     if (triggerId.getTriggerIdx() == FINAL_CLEANUP_PSEUDO_ID) {
       // TODO: Create appropriate Pane here.
       PaneInfo pane = PaneInfo.createPaneInternal();
       if (mergeIfAppropriate(window)) {
-        emitWindow(window, pane);
-        outputBuffer.clear(bufferContext(window));
+        emitWindow(context, pane);
+        context.accessAcrossSources(buffer).clear();
       }
 
       // Perform final cleanup.
       activeWindows.remove(window);
-      rootTrigger.invokeClear(context(finishedSet, window));
-      keyedState.remove(finishedSetTag(window));
+      rootTrigger.invokeClear(new TriggerContextImpl(finishedSet, rootTrigger, window));
+      stateInternals.state(windowNamespace(window), FINISHED_BITS_TAG).clear();
       return;
     }
 
@@ -336,15 +301,20 @@ public void onTimer(String timerTag) throws Exception {
     // consideration.
     if (mergeIfAppropriate(window)) {
       BitSet originalFinishedSet = (BitSet) finishedSet.clone();
-      TriggerResult result = rootTrigger.invokeTimer(
-          new OnTimerContextImpl(context(finishedSet, window), triggerId));
+      TriggerResult result =
+          rootTrigger.invokeTimer(onTimerContext(finishedSet, window, triggerId));
       handleResult(rootTrigger, window, originalFinishedSet, finishedSet, result);
     }
   }
 
+  private Trigger<W>.OnTimerContext onTimerContext(
+      BitSet finishedSet, W window, TriggerId<W> triggerId) {
+    TriggerContextImpl delegate = new TriggerContextImpl(finishedSet, rootTrigger, window);
+    return new OnTimerContextImpl(delegate, triggerId);
+  }
+
   private OnMergeContextImpl createMergeEvent(
-      TriggerContextImpl context, Collection<W> toBeMerged, W resultWindow)
-      throws IOException {
+      TriggerContextImpl context, Collection<W> toBeMerged, W resultWindow) {
     warmUpCache(
         toBeMerged.contains(resultWindow)
         ? toBeMerged
@@ -357,19 +327,17 @@ private OnMergeContextImpl createMergeEvent(
     return new OnMergeContextImpl(context, toBeMerged, finishedSets.build());
   }
 
-  public void persist() throws Exception {
-    activeWindows.persist(keyedState);
-    outputBuffer.flush(bufferContext(null));
-    watermarkHolder.flush(bufferContext(null));
+  public void persist() {
+    activeWindows.persist();
   }
 
   private void onMerge(Collection<W> toBeMerged, W resultWindow) throws Exception {
     BitSet originalFinishedSet = lookupFinishedSet(resultWindow);
     BitSet finishedSet = (BitSet) originalFinishedSet.clone();
 
-    TriggerContextImpl context = context(finishedSet, resultWindow);
-    OnMergeContextImpl e = createMergeEvent(context, toBeMerged, resultWindow);
-    MergeResult result = rootTrigger.invokeMerge(e);
+    OnMergeContextImpl mergeContext = createMergeEvent(
+        new TriggerContextImpl(finishedSet, rootTrigger, resultWindow), toBeMerged, resultWindow);
+    MergeResult result = rootTrigger.invokeMerge(mergeContext);
     if (MergeResult.ALREADY_FINISHED.equals(result)) {
       throw new IllegalStateException("Root trigger returned MergeResult.ALREADY_FINISHED.");
     }
@@ -381,8 +349,9 @@ private void onMerge(Collection<W> toBeMerged, W resultWindow) throws Exception
     // Before we finish, we can clean up the state associated with the trigger in the old windows
     for (W windowBeingMerged : toBeMerged) {
       if (!resultWindow.equals(windowBeingMerged)) {
-        rootTrigger.invokeClear(context(lookupFinishedSet(windowBeingMerged), windowBeingMerged));
-        keyedState.remove(finishedSetTag(windowBeingMerged));
+        rootTrigger.invokeClear(new TriggerContextImpl(
+            lookupFinishedSet(windowBeingMerged), rootTrigger, windowBeingMerged));
+        stateInternals.state(windowNamespace(windowBeingMerged), FINISHED_BITS_TAG).clear();
         deleteTimer(cleanupTimer(windowBeingMerged), TimeDomain.EVENT_TIME);
       }
     }
@@ -413,15 +382,16 @@ public void merge() throws Exception {
   private void handleResult(
       ExecutableTrigger<W> trigger, W window,
       BitSet originalFinishedSet, BitSet finishedSet, TriggerResult result) throws Exception {
+    Context context = context(window);
     if (result.isFire()) {
       // TODO: Obtain pain from ExecutableTrigger or TriggerResult.
       PaneInfo pane = PaneInfo.createPaneInternal();
-      emitWindow(window, pane);
+      emitWindow(context, pane);
     }
 
     if (result.isFinish()
         || (mode == AccumulationMode.DISCARDING_FIRED_PANES && result.isFire())) {
-      outputBuffer.clear(bufferContext(window));
+      context.accessAcrossSources(buffer).clear();
 
       // Remove the window from management (assume it is "done")
       activeWindows.remove(window);
@@ -430,25 +400,26 @@ private void handleResult(
     // If the trigger is finished, we can clear out its state as long as we keep the
     // IS_ROOT_FINISHED bit.
     if (result.isFinish()) {
-      trigger.invokeClear(context(finishedSet, window));
+      trigger.invokeClear(new TriggerContextImpl(finishedSet, rootTrigger, window));
     }
 
     if (!finishedSet.equals(originalFinishedSet)) {
-      keyedState.store(finishedSetTag(window), finishedSet);
+      context.access(FINISHED_BITS_TAG).set(finishedSet);
     }
   }
 
-  private void emitWindow(W window, PaneInfo pane) throws Exception {
-    Instant timestamp = watermarkHolder.extractAndRelease(bufferContext(window));
-    OutputT finalValue = outputBuffer.extract(bufferContext(window));
+  private void emitWindow(Context context, PaneInfo pane) throws Exception {
+    StateContents<OutputT> finalValue = context.accessAcrossSources(buffer).get();
+    Instant timestamp = watermarkHolder.extractAndRelease(context);
 
     // If there were any contents to output in the window, do so.
-    if (finalValue != null) {
+    if (timestamp != null) {
       // Emit the (current) final values for the window
-      KV<K, OutputT> value = KV.of(key, finalValue);
+      KV<K, OutputT> value = KV.of(key, finalValue.read());
 
       // Output the windowed value.
-      windowingInternals.outputWindowedValue(value, timestamp, Arrays.asList(window), pane);
+      windowingInternals.outputWindowedValue(
+          value, timestamp, Arrays.asList(context.window()), pane);
     }
   }
 
@@ -462,22 +433,39 @@ private void emitWindow(W window, PaneInfo pane) throws Exception {
     timerManager.deleteTimer(CoderUtils.encodeToBase64(triggerIdCoder, triggerId), domain);
   }
 
-  private <T> Map<W, T> lookupKeyedState(
-      Iterable<W> windows, Function<W, CodedTupleTag<T>> tagFn) throws IOException {
-    List<CodedTupleTag<T>> tags = new ArrayList<>();
-    for (W window : windows) {
-      tags.add(tagFn.apply(window));
+  private StateNamespace windowNamespace(W window) {
+    return StateNamespaces.window(windowFn.windowCoder(), window);
+  }
+
+  /**
+   * Context for general trigger execution, without a specific trigger selected.
+   */
+  class Context {
+    private final W window;
+    private final StateNamespace namespace;
+
+    private Context(W window, StateNamespace namespace) {
+      this.window = window;
+      this.namespace = namespace;
     }
 
-    CodedTupleTagMap tagMap = keyedState.lookup(tags);
+    public W window() {
+      return window;
+    }
 
-    Map<W, T> result = new LinkedHashMap<>();
-    int i = 0;
-    for (W window : windows) {
-      result.put(window, tagMap.get(tags.get(i++)));
+    public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
+      return stateInternals.state(namespace, address);
     }
 
-    return result;
+    public <StorageT extends MergeableState<?, ?>> StorageT accessAcrossSources(
+        StateTag<StorageT> address) {
+      List<StateNamespace> sourceNamespaces = new ArrayList<>();
+      for (W sourceWindow : activeWindows.sourceWindows(window)) {
+        sourceNamespaces.add(windowNamespace(sourceWindow));
+      }
+
+      return stateInternals.mergedState(sourceNamespaces, namespace, address);
+    }
   }
 
   private class TriggerContextImpl extends Trigger<W>.TriggerContext {
@@ -485,6 +473,7 @@ private class TriggerContextImpl extends Trigger<W>.TriggerContext {
     private final BitSet finishedSet;
     private final ExecutableTrigger<W> trigger;
     private final W window;
+    private final StateNamespace namespace;
 
     private TriggerContextImpl(
         BitSet finishedSet, ExecutableTrigger<W> trigger, W window) {
@@ -492,21 +481,14 @@ private TriggerContextImpl(
       this.finishedSet = finishedSet;
       this.trigger = trigger;
       this.window = window;
+      this.namespace = StateNamespaces.windowAndTrigger(
+          windowFn.windowCoder(), window, trigger.getTriggerIndex());
     }
 
     private TriggerId<W> triggerId(W window) {
       return new TriggerId<>(window, trigger.getTriggerIndex());
     }
 
-    private String triggerIdTag(W window) throws CoderException {
-      return CoderUtils.encodeToBase64(triggerIdCoder, triggerId(window));
-    }
-
-    private <T> CodedTupleTag<T> codedTriggerIdTag(CodedTupleTag<T> tag, W window)
-        throws CoderException {
-      return CodedTupleTag.of(tag.getId() + "-" + triggerIdTag(window), tag.getCoder());
-    }
-
     @Override
     public void setTimer(Instant timestamp, TimeDomain domain) throws IOException {
       TriggerExecutor.this.setTimer(triggerId(window), timestamp, domain);
@@ -517,38 +499,6 @@ public void deleteTimer(TimeDomain domain) throws IOException {
       TriggerExecutor.this.deleteTimer(triggerId(window), domain);
     }
 
-    @Override
-    public <T> void store(CodedTupleTag<T> tag, W window, T value) throws IOException {
-      CodedTupleTag<T> codedTriggerIdTag = codedTriggerIdTag(tag, window);
-      keyedState.store(codedTriggerIdTag, value);
-    }
-
-    @Override
-    public <T> void remove(CodedTupleTag<T> tag, W window) throws IOException {
-      CodedTupleTag<T> codedTriggerIdTag = codedTriggerIdTag(tag, window);
-      keyedState.remove(codedTriggerIdTag);
-    }
-
-    @Override
-    public <T> T lookup(CodedTupleTag<T> tag, W window) throws IOException {
-      return keyedState.lookup(codedTriggerIdTag(tag, window));
-    }
-
-    @Override
-    public <T> Map<W, T> lookup(
-        final CodedTupleTag<T> tag, final Iterable<W> windows) throws IOException {
-      return lookupKeyedState(windows, new Function<W, CodedTupleTag<T>>() {
-        @Override
-        public CodedTupleTag<T> apply(W window) {
-          try {
-            return codedTriggerIdTag(tag, window);
-          } catch (CoderException e) {
-            throw Throwables.propagate(e);
-          }
-        }
-      });
-    }
-
     @Override
     public Instant currentProcessingTime() {
       return timerManager.currentProcessingTime();
@@ -616,6 +566,23 @@ public void setFinished(boolean finished) {
     public W window() {
       return window;
     }
+
+    @Override
+    public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
+      return stateInternals.state(namespace, address);
+    }
+
+    @Override
+    public <StorageT extends MergeableState<?, ?>> StorageT accessAcrossMergedWindows(
+        StateTag<StorageT> address) {
+      List<StateNamespace> sourceNamespaces = new ArrayList<>();
+      for (W sourceWindow : activeWindows.sourceWindows(window)) {
+        sourceNamespaces.add(StateNamespaces.windowAndTrigger(
+            windowFn.windowCoder(), sourceWindow, trigger.getTriggerIndex()));
+      }
+
+      return stateInternals.mergedState(sourceNamespaces, namespace, address);
+    }
   }
 
   private class OnElementContextImpl extends Trigger<W>.OnElementContext {
@@ -647,26 +614,6 @@ public Instant currentProcessingTime() {
       return delegate.currentProcessingTime();
     }
 
-    @Override
-    public <T> void store(CodedTupleTag<T> tag, W window, T value) throws IOException {
-      delegate.store(tag, window, value);
-    }
-
-    @Override
-    public <T> void remove(CodedTupleTag<T> tag, W window) throws IOException {
-      delegate.remove(tag, window);
-    }
-
-    @Override
-    public <T> T lookup(CodedTupleTag<T> tag, W window) throws IOException {
-      return delegate.lookup(tag, window);
-    }
-
-    @Override
-    public <T> Map<W, T> lookup(CodedTupleTag<T> tag, Iterable<W> windows) throws IOException {
-      return delegate.lookup(tag, windows);
-    }
-
     @Override
     public Iterable<ExecutableTrigger<W>> subTriggers() {
       return delegate.subTriggers();
@@ -711,6 +658,17 @@ public void setFinished(boolean finished) {
     public W window() {
       return delegate.window();
     }
+
+    @Override
+    public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
+      return delegate.access(address);
+    }
+
+    @Override
+    public <StorageT extends MergeableState<?, ?>> StorageT accessAcrossMergedWindows(
+        StateTag<StorageT> address) {
+      return delegate.accessAcrossMergedWindows(address);
+    }
   }
 
   private class OnMergeContextImpl extends Trigger<W>.OnMergeContext {
@@ -744,26 +702,6 @@ public Instant currentProcessingTime() {
       return delegate.currentProcessingTime();
     }
 
-    @Override
-    public <T> void store(CodedTupleTag<T> tag, W window, T value) throws IOException {
-      delegate.store(tag, window, value);
-    }
-
-    @Override
-    public <T> void remove(CodedTupleTag<T> tag, W window) throws IOException {
-      delegate.remove(tag, window);
-    }
-
-    @Override
-    public <T> T lookup(CodedTupleTag<T> tag, W window) throws IOException {
-      return delegate.lookup(tag, window);
-    }
-
-    @Override
-    public <T> Map<W, T> lookup(CodedTupleTag<T> tag, Iterable<W> windows) throws IOException {
-      return delegate.lookup(tag, windows);
-    }
-
     @Override
     public Iterable<ExecutableTrigger<W>> subTriggers() {
       return delegate.subTriggers();
@@ -828,6 +766,29 @@ public boolean apply(BitSet input) {
         }
       }).keySet();
     }
+
+    @Override
+    public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
+      return delegate.access(address);
+    }
+
+    @Override
+    public <StorageT extends MergeableState<?, ?>> StorageT accessAcrossMergedWindows(
+        StateTag<StorageT> address) {
+      return delegate.accessAcrossMergedWindows(address);
+    }
+
+    @Override
+    public <StorageT extends MergeableState<?, ?>> StorageT accessAcrossMergingWindows(
+        StateTag<StorageT> address) {
+      List<StateNamespace> mergingNamespaces = new ArrayList<>();
+      for (W oldWindow : oldWindows()) {
+        mergingNamespaces.add(StateNamespaces.windowAndTrigger(
+            windowFn.windowCoder(), oldWindow, delegate.trigger.getTriggerIndex()));
+      }
+
+      return stateInternals.mergedState(mergingNamespaces, delegate.namespace, address);
+    }
   }
 
   private class OnTimerContextImpl extends Trigger<W>.OnTimerContext {
@@ -858,26 +819,6 @@ public Instant currentProcessingTime() {
       return delegate.currentProcessingTime();
     }
 
-    @Override
-    public <T> void store(CodedTupleTag<T> tag, W window, T value) throws IOException {
-      delegate.store(tag, window, value);
-    }
-
-    @Override
-    public <T> void remove(CodedTupleTag<T> tag, W window) throws IOException {
-      delegate.remove(tag, window);
-    }
-
-    @Override
-    public <T> T lookup(CodedTupleTag<T> tag, W window) throws IOException {
-      return delegate.lookup(tag, window);
-    }
-
-    @Override
-    public <T> Map<W, T> lookup(CodedTupleTag<T> tag, Iterable<W> windows) throws IOException {
-      return delegate.lookup(tag, windows);
-    }
-
     @Override
     public Iterable<ExecutableTrigger<W>> subTriggers() {
       return delegate.subTriggers();
@@ -932,6 +873,17 @@ public void setFinished(boolean finished) {
     public W window() {
       return delegate.window();
     }
+
+    @Override
+    public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
+      return delegate.access(address);
+    }
+
+    @Override
+    public <StorageT extends MergeableState<?, ?>> StorageT accessAcrossMergedWindows(
+        StateTag<StorageT> address) {
+      return delegate.accessAcrossMergedWindows(address);
+    }
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
index 3d68d186bf70a..f1dd546f10e30 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
@@ -15,18 +15,16 @@
  */
 package com.google.cloud.dataflow.sdk.util;
 
-import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.cloud.dataflow.sdk.util.state.StateTags;
+import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
 import com.google.common.annotations.VisibleForTesting;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 
-import java.io.IOException;
 import java.io.Serializable;
-import java.util.HashMap;
-import java.util.Map;
 
 /**
  * Implements the logic needed to hold the output watermark back to emit
@@ -38,8 +36,8 @@ public class WatermarkHold<W extends BoundedWindow> implements Serializable {
 
   private static final long serialVersionUID = 0L;
 
-  @VisibleForTesting static final CodedTupleTag<Instant> EARLIEST_ELEMENT_TAG =
-      CodedTupleTag.of("__watermark_hold", InstantCoder.of());
+  @VisibleForTesting static final StateTag<WatermarkStateInternal> HOLD_TAG =
+      StateTags.watermarkStateInternal("watermark_hold");
 
   private final Duration allowedLateness;
 
@@ -47,60 +45,28 @@ public WatermarkHold(Duration allowedLateness) {
     this.allowedLateness = allowedLateness;
   }
 
-  private final Map<W, Instant> inMemoryBuffer = new HashMap<>();
-
-  public void addHold(OutputBuffer.Context<?, W> c, Instant timestamp, boolean isLate) {
+  public void addHold(TriggerExecutor<?, ?, ?, W>.Context c, Instant timestamp, boolean isLate) {
     // If the element was late, then we want to put a hold in at the maxTimestamp for the end
     // of the window plus the allowed lateness to ensure that we don't output something
     // that is dropably late.
     Instant holdTo = isLate
         ? c.window().maxTimestamp().plus(allowedLateness)
         : timestamp;
-    Instant storedEarliest = inMemoryBuffer.get(c.window());
-    if (storedEarliest == null || holdTo.isBefore(storedEarliest)) {
-      inMemoryBuffer.put(c.window(), holdTo);
-    }
+    c.access(HOLD_TAG).add(holdTo);
   }
 
   /**
    * Get the timestamp to use for output. This is computed as the minimum timestamp
    * of any non-late elements that arrived in the current pane.
    */
-  public Instant extractAndRelease(OutputBuffer.Context<?, W> c) throws IOException {
-    // Normally, output at the earliest non-late element in the pane.
-    // If the pane is empty or all the elements were late, output at window.maxTimestamp().
-    Instant earliest = c.window().maxTimestamp();
-
-    // MIN of already-persisted values for the source windows.
-    for (Instant hold : c.readBuffers(EARLIEST_ELEMENT_TAG, c.sourceWindows())) {
-      if (hold != null && hold.isBefore(earliest)) {
-        earliest = hold;
-      }
-    }
-
-    // MIN of not-yet-persisted values for the source windows.
-    for (W window : c.sourceWindows()) {
-      Instant hold = inMemoryBuffer.remove(window);
-      if (hold != null && hold.isBefore(earliest)) {
-        earliest = hold;
-      }
-    }
-
-    c.clearBuffers(EARLIEST_ELEMENT_TAG, c.sourceWindows());
-    return earliest;
-  }
-
-  public void release(OutputBuffer.Context<?, W> c) throws IOException {
-    c.clearBuffers(EARLIEST_ELEMENT_TAG, c.sourceWindows());
-    for (W window : c.sourceWindows()) {
-      inMemoryBuffer.remove(window);
-    }
-  }
+  public Instant extractAndRelease(TriggerExecutor<?, ?, ?, W>.Context c) {
+    WatermarkStateInternal holdingBag = c.accessAcrossSources(HOLD_TAG);
 
-  public void flush(OutputBuffer.Context<?, W> c) throws IOException {
-    for (Map.Entry<W, Instant> entry : inMemoryBuffer.entrySet()) {
-      c.addToBuffer(entry.getKey(), EARLIEST_ELEMENT_TAG, entry.getValue(), entry.getValue());
+    Instant hold = holdingBag.get().read();
+    if (hold != null && hold.isAfter(c.window().maxTimestamp())) {
+      hold = c.window().maxTimestamp();
     }
-    inMemoryBuffer.clear();
+    holdingBag.clear();
+    return hold;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
index ac27c64dfee5a..24b19ba83ce6e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
@@ -16,12 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
+import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
@@ -29,8 +27,6 @@
 
 import java.io.IOException;
 import java.util.Collection;
-import java.util.List;
-import java.util.Map;
 
 /**
  * Interface that may be required by some (internal) {@code DoFn}s to implement windowing. It should
@@ -43,57 +39,7 @@
  */
 public interface WindowingInternals<InputT, OutputT> {
 
-  /**
-   * {@code KeyedState} maps {@link CodedTupleTag CodedTupleTags} to
-   * associated values.  The storage is persistent across bundles, and
-   * stored per-key. Specifically, for a given {@code CodedTupleTag<T>},
-   * each key will store a distinct {@code T} value.
-   */
-  @Experimental
-  public interface KeyedState {
-    /**
-     * Updates this {@code KeyedState} in place so that the given tag maps to the given value.
-     *
-     * @throws IOException if encoding the given value fails
-     */
-    public <T> void store(CodedTupleTag<T> tag, T value) throws IOException;
-
-    /**
-     * Removes the data associated with the given tag from {@code KeyedState}.
-     */
-    public <T> void remove(CodedTupleTag<T> tag);
-
-    /**
-     * Returns the value associated with the given tag in this
-     * {@code KeyedState}, or {@code null} if the tag has no asssociated
-     * value.
-     *
-     * <p> See {@link #lookup(Iterable)} to look up multiple tags at
-     * once.  It is significantly more efficient to look up multiple
-     * tags all at once rather than one at a time.
-     *
-     * @throws IOException if decoding the requested value fails
-     */
-    public <T> T lookup(CodedTupleTag<T> tag) throws IOException;
-
-    /**
-     * Returns a map from the given tags to the values associated with
-     * those tags in this {@code KeyedState}.  A tag will map to null if
-     * the tag had no associated value.
-     *
-     * <p> See {@link #lookup(CodedTupleTag)} to look up a single
-     * tag.
-     *
-     * @throws IOException if decoding any of the requested values fails, often
-     * a {@link com.google.cloud.dataflow.sdk.coders.CoderException}.
-     */
-    public CodedTupleTagMap lookup(Iterable<? extends CodedTupleTag<?>> tags) throws IOException;
-  }
-
-  /**
-   * Returns the persistent state associated with this key.
-   */
-  public WindowingInternals.KeyedState keyedState();
+  StateInternals stateInternals();
 
   /**
    * Output the value at the specified timestamp in the listed windows.
@@ -101,50 +47,6 @@ public interface KeyedState {
   void outputWindowedValue(OutputT output, Instant timestamp,
       Collection<? extends BoundedWindow> windows, PaneInfo pane);
 
-  /**
-   * Writes the provided value to the list of values in stored state corresponding to the
-   * provided tag.
-   *
-   * @throws IOException if encoding the given value fails
-   */
-  <T> void writeToTagList(CodedTupleTag<T> tag, T value) throws IOException;
-
-  /**
-   * Writes the provided value to the list of values in stored state corresponding to the
-   * provided tag.
-   *
-   * <p> This method should be used with caution. Unless the value is removed or updated with
-   * a new timestamp, the watermark will be held up and no output will be produced.
-   *
-   * @param timestamp the timestamp to associate with the value. The watermark will be held to
-   *        the given point and no downstream watermark triggers will fire.
-   *
-   * @throws IOException if encoding the given value fails
-   */
-  <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp) throws IOException;
-
-  /**
-   * Deletes the list corresponding to the given tag.
-   */
-  <T> void deleteTagList(CodedTupleTag<T> tag);
-
-  /**
-   * Reads the elements of the list in stored state corresponding to the provided tag.
-   * If the tag is undefined, will return an empty list rather than null.
-   *
-   * @throws IOException if decoding any of the requested values fails
-   */
-  <T> Iterable<T> readTagList(CodedTupleTag<T> tag) throws IOException;
-
-  /**
-   * Reads the elements of the lists in stored state corresponding to the provided tags.
-   * Any undefined tag will be an empty list rather than null.
-   *
-   * @throws IOException if decoding any of the requested values fails
-   */
-  <T> Map<CodedTupleTag<T>, Iterable<T>> readTagList(
-      List<CodedTupleTag<T>> tags) throws IOException;
-
   /**
    * Return the timer manager provided by the underlying system, or null if Timers need
    * to be emulated.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
index e06f7115990cf..0121132a52519 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
@@ -31,7 +31,11 @@
  */
 public class InMemoryStateInternals extends MergingStateInternals {
 
-  private final StateTable inMemoryState = new StateTable() {
+  private interface InMemoryState {
+    boolean isEmptyForTesting();
+  }
+
+  protected final StateTable inMemoryState = new StateTable() {
     @Override
     protected StateBinder binderForNamespace(final StateNamespace namespace) {
       return new StateBinder() {
@@ -63,12 +67,25 @@ public <T> WatermarkStateInternal bindWatermark(StateTag<WatermarkStateInternal>
     }
   };
 
+  public void clear() {
+    inMemoryState.clear();
+  }
+
+  /**
+   * Return true if the given state is empty. This is used by the test framework to make sure
+   * that the state has been properly cleaned up.
+   */
+  protected boolean isEmptyForTesting(State state) {
+    return ((InMemoryState) state).isEmptyForTesting();
+  }
+
   @Override
   public <T extends State> T state(StateNamespace namespace, StateTag<T> address) {
     return inMemoryState.get(namespace, address);
   }
 
-  private final class InMemoryValue<T> implements ValueState<T> {
+  private final class InMemoryValue<T> implements ValueState<T>, InMemoryState {
+    private boolean isCleared = true;
     private T value = null;
 
     @Override
@@ -76,6 +93,7 @@ public void clear() {
       // Even though we're clearing we can't remove this from the in-memory state map, since
       // other users may already have a handle on this Value.
       value = null;
+      isCleared = true;
     }
 
     @Override
@@ -90,17 +108,25 @@ public T read() {
 
     @Override
     public void set(T input) {
+      isCleared = false;
       this.value = input;
     }
+
+    @Override
+    public boolean isEmptyForTesting() {
+       return isCleared;
+    }
   }
 
-  private final class WatermarkBagInternalImplementation implements WatermarkStateInternal {
+  private final class WatermarkBagInternalImplementation
+      implements WatermarkStateInternal, InMemoryState {
+
     private Instant minimumHold = null;
 
     @Override
     public void clear() {
       // Even though we're clearing we can't remove this from the in-memory state map, since
-      // other users may already have a handle on this WatermarkBagInteranl.
+      // other users may already have a handle on this WatermarkBagInternal.
       minimumHold = null;
     }
 
@@ -120,11 +146,17 @@ public void add(Instant watermarkHold) {
         minimumHold = watermarkHold;
       }
     }
+
+    @Override
+    public boolean isEmptyForTesting() {
+       return minimumHold == null;
+    }
   }
 
   private final class InMemoryCombiningValue<InputT, AccumT, OutputT>
-      implements CombiningValueStateInternal<InputT, AccumT, OutputT> {
+      implements CombiningValueStateInternal<InputT, AccumT, OutputT>, InMemoryState {
 
+    private boolean isCleared = true;
     private final CombineFn<InputT, AccumT, OutputT> combineFn;
     private AccumT accum;
 
@@ -138,6 +170,7 @@ public void clear() {
       // Even though we're clearing we can't remove this from the in-memory state map, since
       // other users may already have a handle on this CombiningValue.
       accum = combineFn.createAccumulator();
+      isCleared = true;
     }
 
     @Override
@@ -152,6 +185,7 @@ public OutputT read() {
 
     @Override
     public void add(InputT input) {
+      isCleared = false;
       accum = combineFn.addInput(accum, input);
     }
 
@@ -163,16 +197,21 @@ public AccumT read() {
           return accum;
         }
       };
-
     }
 
     @Override
     public void addAccum(AccumT accum) {
+      isCleared = false;
       this.accum = combineFn.mergeAccumulators(Arrays.asList(this.accum, accum));
     }
+
+    @Override
+    public boolean isEmptyForTesting() {
+       return isCleared;
+    }
   }
 
-  private static final class InMemoryBag<T> implements BagState<T> {
+  private static final class InMemoryBag<T> implements BagState<T>, InMemoryState {
     private final List<T> contents = new ArrayList<>();
 
     @Override
@@ -196,5 +235,10 @@ public Iterable<T> read() {
     public void add(T input) {
       contents.add(input);
     }
+
+    @Override
+    public boolean isEmptyForTesting() {
+       return contents.isEmpty();
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedBag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedBag.java
index 7aba2448e627c..557b384ad3bc2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedBag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedBag.java
@@ -15,8 +15,7 @@
  */
 package com.google.cloud.dataflow.sdk.util.state;
 
-import com.google.common.base.Function;
-import com.google.common.collect.FluentIterable;
+import com.google.common.collect.Iterables;
 
 import java.util.ArrayList;
 import java.util.Collection;
@@ -62,13 +61,12 @@ public StateContents<Iterable<T>> get() {
     return new StateContents<Iterable<T>>() {
       @Override
       public Iterable<T> read() {
-        return FluentIterable.from(futures)
-            .transformAndConcat(new Function<StateContents<Iterable<T>>, Iterable<T>>() {
-              @Override
-              public Iterable<T> apply(StateContents<Iterable<T>> input) {
-                return input.read();
-              }
-        });
+        // Can't use FluentIterables#toList because some values may be legitimately null.
+        List<T> result = new ArrayList<>();
+        for (StateContents<Iterable<T>> future : futures) {
+          Iterables.addAll(result, future.read());
+        }
+        return result;
       }
     };
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
index e830a31f1e81c..8a1d1d9bcfed3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
@@ -48,7 +48,10 @@ StateNamespace windowAndTrigger(Coder<W> windowCoder, W window, int triggerIdx)
 
   private StateNamespaces() {}
 
-  private static class GlobalNamespace implements StateNamespace {
+  /**
+   * {@link StateNamespace} that is global to the current key being processed.
+   */
+  public static class GlobalNamespace implements StateNamespace {
 
     @Override
     public String stringKey() {
@@ -65,9 +68,17 @@ public boolean equals(Object obj) {
     public int hashCode() {
       return Objects.hash(Namespace.GLOBAL);
     }
+
+    @Override
+    public String toString() {
+      return "Global";
+    }
   }
 
-  private static class WindowNamespace<W extends BoundedWindow> implements StateNamespace {
+  /**
+   * {@link StateNamespace} that is scoped to a specific window.
+   */
+  public static class WindowNamespace<W extends BoundedWindow> implements StateNamespace {
 
     private Coder<W> windowCoder;
     private W window;
@@ -104,9 +115,17 @@ public boolean equals(Object obj) {
     public int hashCode() {
       return Objects.hash(Namespace.WINDOW, window);
     }
+
+    @Override
+    public String toString() {
+      return "Window(" + window + ")";
+    }
   }
 
-  private static class WindowAndTriggerNamespace<W extends BoundedWindow>
+  /**
+   * {@link StateNamespace} that is scoped to a particular window and trigger index.
+   */
+  public static class WindowAndTriggerNamespace<W extends BoundedWindow>
       implements StateNamespace {
 
     private Coder<W> windowCoder;
@@ -147,5 +166,10 @@ public boolean equals(Object obj) {
     public int hashCode() {
       return Objects.hash(Namespace.WINDOW_AND_TRIGGER, window, triggerIdx);
     }
+
+    @Override
+    public String toString() {
+      return "WindowAndTrigger(" + window + "," + triggerIdx + ")";
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java
index 38f2be75b2fc2..df0c302cf72fd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java
@@ -68,8 +68,12 @@ public boolean isNamespaceInUse(StateNamespace namespace) {
     return stateTable.containsRow(namespace);
   }
 
-  public Set<StateTag<?>> getAddressesInUse(StateNamespace namespace) {
-    return stateTable.row(namespace).keySet();
+  public Map<StateTag<?>, State> getTagsInUse(StateNamespace namespace) {
+    return stateTable.row(namespace);
+  }
+
+  public Set<StateNamespace> getNamespacesInUse() {
+    return stateTable.rowKeySet();
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
index 27f240592c421..463267d010970 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
@@ -21,6 +21,8 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 
+import java.io.Serializable;
+
 /**
  * An address for persistent state. This includes a unique identifier for the location, the
  * information necessary to encode the value, and details about the intended access pattern.
@@ -33,7 +35,7 @@
  * @param <StateT> The type of state being tagged.
  */
 @Experimental(Kind.STATE)
-public interface StateTag<StateT extends State> {
+public interface StateTag<StateT extends State> extends Serializable {
 
   /**
    * Visitor for binding a {@link StateTag} and to the associated {@link State}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
index ed7ce44878e9e..b044fe5feb92a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.common.base.MoreObjects;
 
 import java.util.Objects;
 
@@ -41,6 +42,8 @@ public static <T> StateTag<ValueState<T>> value(String id, Coder<T> valueCoder)
 
   private abstract static class StateTagBase<StateT extends State> implements StateTag<StateT> {
 
+    private static final long serialVersionUID = 0;
+
     private final String id;
 
     protected StateTagBase(String id) {
@@ -54,6 +57,11 @@ protected StateTagBase(String id) {
     public String getId() {
       return id;
     }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(getClass()).add("id", id).toString();
+    }
   }
 
   /**
@@ -101,6 +109,8 @@ public static <T> StateTag<WatermarkStateInternal> watermarkStateInternal(String
    */
   private static class ValueStateTag<T> extends StateTagBase<ValueState<T>> {
 
+    private static final long serialVersionUID = 0;
+
     private final Coder<T> coder;
 
     private ValueStateTag(String id, Coder<T> coder) {
@@ -144,6 +154,8 @@ public int hashCode() {
   private static class CombiningValueStateTag<InputT, AccumT, OutputT>
       extends StateTagBase<CombiningValueStateInternal<InputT, AccumT, OutputT>> {
 
+    private static final long serialVersionUID = 0;
+
     private final Coder<AccumT> accumCoder;
     private final CombineFn<InputT, AccumT, OutputT> combineFn;
 
@@ -160,7 +172,8 @@ private CombiningValueStateTag(
       try {
         this.accumCoder = combineFn.getAccumulatorCoder(registry, inputCoder);
       } catch (CannotProvideCoderException e) {
-        throw new RuntimeException("Unable to determine accumulator coder for combineFn", e);
+        throw new RuntimeException(
+            "Unable to determine accumulator coder for combineFn: " + combineFn.getClass(), e);
       }
 
       this.combineFn = combineFn;
@@ -200,6 +213,8 @@ public int hashCode() {
    */
   private static class BagStateTag<T> extends StateTagBase<BagState<T>> {
 
+    private static final long serialVersionUID = 0;
+
     private final Coder<T> elemCoder;
 
     private BagStateTag(String id, Coder<T> elemCoder) {
@@ -235,6 +250,8 @@ public int hashCode() {
 
   private static class WatermarkStateTagInternal extends StateTagBase<WatermarkStateInternal> {
 
+    private static final long serialVersionUID = 0;
+
     private WatermarkStateTagInternal(String id) {
       super(id);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
index 7baefb694b4c0..2ceb8b1444a87 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
@@ -72,11 +72,11 @@ public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> cod
   };
 
 
-  private final String mangledPrefix;
+  private final String prefix;
   private final WindmillStateReader reader;
 
-  public WindmillStateInternals(String mangledPrefix, WindmillStateReader reader) {
-    this.mangledPrefix = mangledPrefix;
+  public WindmillStateInternals(String prefix, WindmillStateReader reader) {
+    this.prefix = prefix;
     this.reader = reader;
   }
 
@@ -107,7 +107,7 @@ public void persist(final Windmill.WorkItemCommitRequest.Builder commitBuilder)
 
   private ByteString encodeKey(StateNamespace namespace, StateTag<?> address) {
     return ByteString.copyFromUtf8(String.format(
-        "%s/%s/%s", mangledPrefix, namespace.stringKey(), address.getId()));
+        "%s/%s/%s", prefix, namespace.stringKey(), address.getId()));
   }
 
   private interface WindmillState {
@@ -164,7 +164,8 @@ public void set(T value) {
     }
 
     @Override
-    public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) throws IOException {
+    public void persist(
+        Windmill.WorkItemCommitRequest.Builder commitBuilder) throws IOException {
       if (!modified) {
         // No in-memory changes.
         return;
@@ -183,7 +184,8 @@ public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) throws
       commitBuilder.addValueUpdatesBuilder()
           .setTag(stateKey)
           .getValueBuilder()
-          .setData(stream.toByteString());
+          .setData(stream.toByteString())
+          .setTimestamp(Long.MAX_VALUE);
     }
   }
 
@@ -249,22 +251,24 @@ public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) throws
             .setEndTimestamp(Long.MAX_VALUE);
       }
 
-      byte[] zero = {0x0};
 
-      Windmill.TagList.Builder listUpdatesBuilder = commitBuilder.addListUpdatesBuilder();
-      listUpdatesBuilder.setTag(stateKey);
-      for (T value : localAdditions) {
-        ByteString.Output stream = ByteString.newOutput();
+      if (!localAdditions.isEmpty()) {
+        byte[] zero = {0x0};
+        Windmill.TagList.Builder listUpdatesBuilder = commitBuilder.addListUpdatesBuilder();
+        listUpdatesBuilder.setTag(stateKey);
+        for (T value : localAdditions) {
+          ByteString.Output stream = ByteString.newOutput();
 
-        // Windmill does not support empty data for tag list state; prepend a zero byte.
-        stream.write(zero);
+          // Windmill does not support empty data for tag list state; prepend a zero byte.
+          stream.write(zero);
 
-        // Encode the value
-        elemCoder.encode(value, stream, Coder.Context.OUTER);
+          // Encode the value
+          elemCoder.encode(value, stream, Coder.Context.OUTER);
 
-        listUpdatesBuilder.addValuesBuilder()
-            .setData(stream.toByteString())
-            .setTimestamp(Long.MAX_VALUE);
+          listUpdatesBuilder.addValuesBuilder()
+              .setData(stream.toByteString())
+              .setTimestamp(Long.MAX_VALUE);
+        }
       }
     }
   }
@@ -328,22 +332,21 @@ public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) {
       // If we do a delete, we need to have done a read
       if (cleared) {
         reader.watermarkFuture(stateKey);
-      }
-
-      if (cleared) {
         commitBuilder.addListUpdatesBuilder()
             .setTag(stateKey)
             .setEndTimestamp(Long.MAX_VALUE);
       }
 
-      ByteString zeroString = ByteString.copyFrom(new byte[] {0x0});
+      if (localAdditions != null) {
+        ByteString zeroString = ByteString.copyFrom(new byte[] {0x0});
 
-      Windmill.TagList.Builder listUpdatesBuilder = commitBuilder.addListUpdatesBuilder();
-      listUpdatesBuilder
-          .setTag(stateKey)
-          .addValuesBuilder()
-              .setData(zeroString)
-              .setTimestamp(TimeUnit.MILLISECONDS.toMicros(localAdditions.getMillis()));
+        Windmill.TagList.Builder listUpdatesBuilder = commitBuilder.addListUpdatesBuilder();
+        listUpdatesBuilder
+            .setTag(stateKey)
+            .addValuesBuilder()
+            .setData(zeroString)
+            .setTimestamp(TimeUnit.MILLISECONDS.toMicros(localAdditions.getMillis()));
+      }
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
index f4457fac70ffc..aec694dd87875 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
@@ -16,10 +16,11 @@
 package com.google.cloud.dataflow.sdk.util.state;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.runners.worker.MetricTrackingWindmillServerStub;
+import com.google.cloud.dataflow.sdk.runners.worker.StreamingDataflowWorker;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagList;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagValue;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Objects;
 import com.google.common.util.concurrent.ForwardingFuture;
@@ -88,20 +89,22 @@ public int hashCode() {
 
     @Override
     public String toString() {
-      return kind + " " + tag;
+      return kind + " " + tag.toStringUtf8();
     }
   }
 
   private static final Logger LOG = LoggerFactory.getLogger(WindmillStateReader.class);
 
-  private final WindmillServerStub windmill;
   private final String computation;
   private final ByteString key;
   private final long workToken;
 
+  private final MetricTrackingWindmillServerStub metrics;
+
   public WindmillStateReader(
-      WindmillServerStub windmill, String computation, ByteString key, long workToken) {
-    this.windmill = windmill;
+      MetricTrackingWindmillServerStub metrics,
+      String computation, ByteString key, long workToken) {
+    this.metrics = metrics;
     this.computation = computation;
     this.key = key;
     this.workToken = workToken;
@@ -180,7 +183,7 @@ public T get(long timeout, TimeUnit unit)
   }
 
   public void startBatchAndBlock() {
-    // First, drain work out of the pending lookups into a queue. These will be the items we fetch.
+    // First, drain work out of the pending lookups into a map. These will be the items we fetch.
     Map<ByteString, StateTag> toFetch = new HashMap<>();
     while (!pendingLookups.isEmpty()) {
       StateTag tag = pendingLookups.poll();
@@ -188,11 +191,20 @@ public void startBatchAndBlock() {
         break;
       }
 
-      toFetch.put(tag.tag, tag);
+      if (toFetch.put(tag.tag, tag) != null) {
+        throw new IllegalStateException("Duplicate tags being fetched.");
+      }
+    }
+
+    // If we failed to drain anything, some other thread pulled it off the queue. We have no work
+    // to do.
+    if (toFetch.isEmpty()) {
+      return;
     }
 
     Windmill.GetDataRequest request = createRequest(toFetch.values());
-    Windmill.GetDataResponse response = windmill.getData(request);
+    Windmill.GetDataResponse response = metrics.getStateData(request);
+
     if (response == null) {
       throw new RuntimeException("Windmill unexpectedly returned null for request " + request);
     }
@@ -229,7 +241,6 @@ private Windmill.GetDataRequest createRequest(Iterable<StateTag> toFetch) {
 
   private void consumeResponse(
       Windmill.GetDataResponse response, Map<ByteString, StateTag> toFetch) {
-
     // Validate the response is for our computation/key.
     if (response.getDataCount() != 1) {
       throw new RuntimeException(
@@ -246,6 +257,16 @@ private void consumeResponse(
           "Expected exactly one key in response, but was: " + response.getData(0).getDataList());
     }
 
+    if (response.getData(0).getData(0).getFailed()) {
+      // Set up all the futures for this key to throw an exception:
+      StreamingDataflowWorker.KeyTokenInvalidException keyTokenInvalidException =
+          new StreamingDataflowWorker.KeyTokenInvalidException(key.toStringUtf8());
+      for (StateTag stateTag : toFetch.values()) {
+        futures.get(stateTag).setException(keyTokenInvalidException);
+      }
+      return;
+    }
+
     if (!key.equals(response.getData(0).getData(0).getKey())) {
       throw new RuntimeException("Expected data for key " + key
           + " but was " + response.getData(0).getData(0).getKey());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
deleted file mode 100644
index 0f37fba0b4801..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTag.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.values;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-
-/**
- * A {@link TupleTag} combined with the {@link Coder} to use for
- * values associated with the tag.
- *
- * <p> Used as tags in
- * {@link com.google.cloud.dataflow.sdk.util.WindowingInternals.KeyedState}.
- *
- * @param <T> the type of the values associated with this tag
- */
-@SuppressWarnings("serial")
-public class CodedTupleTag<T> extends TupleTag<T> {
-
-  private static final long serialVersionUID = 0L;
-
-  /**
-   * Returns a {@code CodedTupleTag} with the given id that uses the
-   * given {@code Coder} whenever a value associated with the tag
-   * needs to be serialized.
-   *
-   * <p> It is up to the user to ensure that two
-   * {@code CodedTupleTag}s with the same id actually mean the same
-   * tag and carry the same generic type parameter.  Violating this
-   * invariant can lead to hard-to-diagnose runtime type errors.
-   *
-   * <p> (An explicit id is required so that persistent keyed state
-   * saved by one run of a streaming program can be reused if that
-   * streaming program is upgraded to a new version.)
-   *
-   * @param <T> the type of the values associated with the tag
-   */
-  public static <T> CodedTupleTag<T> of(String id, Coder<T> coder) {
-    return new CodedTupleTag<>(id, coder);
-  }
-
-  /**
-   * Returns the {@code Coder} used for values associated with this tag.
-   */
-  public Coder<T> getCoder() {
-    return coder;
-  }
-
-
-  ///////////////////////////////////////////////
-
-  private final Coder<T> coder;
-
-  CodedTupleTag(String id, Coder<T> coder) {
-    super(id);
-    this.coder = coder;
-  }
-
-  @Override
-  public String toString() {
-    return "CodedTupleTag<" + getId() + ", " + coder + ">";
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java
deleted file mode 100644
index af5c95263e384..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/CodedTupleTagMap.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.values;
-
-import java.util.Collections;
-import java.util.Map;
-
-/**
- * A mapping of {@link CodedTupleTag}s to associated values.
- */
-public class CodedTupleTagMap {
-
-  private static final CodedTupleTagMap EMPTY =
-      of(Collections.<CodedTupleTag<?>, Object>emptyMap());
-
-  /**
-   * Returns a {@code CodedTupleTagMap} containing the given mappings.
-   *
-   * <p> It is up to the caller to ensure that the value associated
-   * with each CodedTupleTag in the map has the static type specified
-   * by that tag.
-   *
-   * <p> Intended for internal use only.
-   */
-  public static CodedTupleTagMap of(Map<CodedTupleTag<?>, Object> map) {
-    // TODO: Should we copy the Map here, to insulate this
-    // map from any changes to the original argument?
-    return new CodedTupleTagMap(map);
-  }
-
-  /**
-   * Returns an empty {@code CodedTupleTagMap}.
-   */
-  public static CodedTupleTagMap empty() {
-    return EMPTY;
-  }
-
-  /**
-   * Returns the value associated with the given tag in this
-   * {@code CodedTupleTagMap}, or {@code null} if the tag has no
-   * asssociated value.
-   */
-  @SuppressWarnings("unchecked")
-  public <T> T get(CodedTupleTag<T> tag) {
-    return (T) map.get(tag);
-  }
-
-  //////////////////////////////////////////////
-
-  private Map<CodedTupleTag<?>, Object> map;
-
-  CodedTupleTagMap(Map<CodedTupleTag<?>, Object> map) {
-    this.map = map;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
index 8ca8ce92b9ded..e57a9fbc659d5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
@@ -56,7 +56,7 @@ public interface PCollectionView<T> extends PValue, Serializable {
   /**
    * For internal use only.
    */
-  public WindowingStrategy getWindowingStrategyInternal();
+  public WindowingStrategy<?, ?> getWindowingStrategyInternal();
 
   /**
    * For internal use only.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java
index bba218ae3dc0f..3c58be35e1684 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java
@@ -56,7 +56,7 @@ public static <T> Matcher<WindowedValue<? extends T>> isSingleWindowedValue(
         new IntervalWindow(new Instant(windowStart), new Instant(windowEnd));
     return WindowMatchers.<T>isSingleWindowedValue(
         valueMatcher,
-        Matchers.equalTo(new Instant(timestamp)),
+        Matchers.describedAs("%0", Matchers.equalTo(new Instant(timestamp)), timestamp),
         Matchers.<BoundedWindow>equalTo(intervalWindow));
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 2e2b29932806b..62e5c1cee4b97 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -626,7 +626,7 @@ public void testUnboundedSplits() throws Exception {
   @Test
   public void testReadUnboundedReader() throws Exception {
     StreamingModeExecutionContext context = new StreamingModeExecutionContext(
-        null, null, new ConcurrentHashMap<ByteString, UnboundedSource.UnboundedReader<?>>(), null);
+        null, new ConcurrentHashMap<ByteString, UnboundedSource.UnboundedReader<?>>(), null);
 
     DataflowPipelineOptions options =
         PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
@@ -645,6 +645,7 @@ public void testReadUnboundedReader() throws Exception {
                   Windmill.SourceState.newBuilder().setState(state).build()) // Source state.
               .build(),
           new Instant(0),
+          null,
           Windmill.WorkItemCommitRequest.newBuilder());
 
       Reader.ReaderIterator<WindowedValue<KV<Integer, Integer>>> reader =
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index e8ec6b8ca3aa6..afb74cf3f8bf8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -71,6 +71,7 @@
 import com.google.protobuf.ByteString;
 import com.google.protobuf.TextFormat;
 
+import org.hamcrest.Matchers;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Test;
@@ -678,130 +679,104 @@ public void testMergeWindows() throws Exception {
     Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
 
     // These tags and data are opaque strings and this is a change detector test.
-    String timer1Tag = "gAAAAAAAA-joB_____8P";
-    String timer2Tag = "gAAAAAAAA-joBwA";
-    String timer3Tag = timer2Tag + "AAAA";
-    String bufferTag = "MergeWindows:gAAAAAAAA-joBw/__buffer";
-    String watermarkHoldTag = "MergeWindows:gAAAAAAAA-joBw/__watermark_hold";
-    String watermarkHoldData = "\000\\200\\000\\000\\000\\000\\000\\000\\000";
-    String bufferData = "\000data0";
-    String outputData = "\\377\\377\\377\\377\\001\\005data0\\000";
-
+    String window = "gAAAAAAAA-joBw";
+    ByteString timer1Tag = ByteString.copyFromUtf8("gAAAAAAAA-joB_____8P");
+    ByteString timer2Tag = ByteString.copyFromUtf8(window + "A");
+    ByteString timer3Tag = ByteString.copyFromUtf8(window + "AAAAA");
+    ByteString bufferTag = ByteString.copyFromUtf8("MergeWindows/" + window + "/__buffer");
+    ByteString watermarkHoldTag =
+        ByteString.copyFromUtf8("MergeWindows/" + window + "/watermark_hold");
+    ByteString bufferData = ByteString.copyFromUtf8("\000data0");
+    ByteString outputData = ByteString.copyFromUtf8("\\377\\377\\377\\377\\001\\005data0\\000");
     // These values are not essential to the change detector test
     long timer1Timestamp = 1000000L;
     long timer2Timestamp = 999000L;
 
-    WorkItemCommitRequest actualOutput = stripCounters(result.get(0L));
-
-    WorkItemCommitRequest expectedOutput = parseCommitRequest(
-        "key: \"" + DEFAULT_KEY_STRING + "\" " +
-            "work_token: 0 " +
-            "output_timers {" +
-            "  tag: \"" + timer1Tag + "\"" +
-            "  timestamp: " + timer1Timestamp +
-            "  type: WATERMARK" +
-            "}" +
-            "output_timers {" +
-            "  tag: \"" + timer2Tag + "\"" +
-            "  timestamp: " + timer2Timestamp +
-            "  type: WATERMARK" +
-            "} " +
-            "list_updates {" +
-            "  tag: \"" + bufferTag + "\"" +
-            "  values {" +
-            "    timestamp: 9223372036854775000" +
-            "    data: \"" + bufferData + "\"" +
-            "  }" +
-            "}" +
-            "list_updates {" +
-            "  tag: \"" + watermarkHoldTag + "\"" +
-            "  values {" +
-            "    timestamp: 0" +
-            "    data: \"" + watermarkHoldData + "\"" +
-            "  }" +
-        "}").build();
-
-    assertThat(actualOutput.getOutputTimersCount(), equalTo(expectedOutput.getOutputTimersCount()));
-    for (int i = 0; i < actualOutput.getOutputTimersCount(); i++) {
-      assertThat(actualOutput.getOutputTimers(i), equalTo(expectedOutput.getOutputTimers(i)));
-    }
-
-    assertThat(actualOutput.getListUpdatesCount(), equalTo(expectedOutput.getListUpdatesCount()));
-    for (int i = 0; i < actualOutput.getListUpdatesCount(); i++) {
-      assertThat(actualOutput.getListUpdates(i), equalTo(expectedOutput.getListUpdates(i)));
-    }
-
-    server.addWorkToOffer(buildTimerInput(
-        "work {" +
-            "  computation_id: \"" + DEFAULT_COMPUTATION_ID + "\"" +
-            "  input_data_watermark: 0" +
-            "  work {" +
-            "    key: \"" + DEFAULT_KEY_STRING + "\"" +
-            "    work_token: 1" +
-            "    timers {" +
-            "      timers {" +
-            "        tag: \"" + timer3Tag + "\"" +
-            "        timestamp: " + timer2Timestamp +
-            "      }" +
-            "    }" +
-            "  }" +
-        "}"));
-
-    server.addDataToOffer(buildData(
-        "data {" +
-            "  computation_id: \"" + DEFAULT_COMPUTATION_ID + "\"" +
-            "  data {" +
-            "    key: \"" + DEFAULT_KEY_STRING + "\"" +
-            "    lists {" +
-            "      tag: \"" + watermarkHoldTag + "\"" +
-            "      values {" +
-            "        timestamp: 0" +
-            "        data: \"" + watermarkHoldData + "\"" +
-            "      }" +
-            "    }" +
-            "  }" +
-        "}"));
-
-    server.addDataToOffer(buildData(
-        "data {" +
-            "  computation_id: \"" + DEFAULT_COMPUTATION_ID + "\"" +
-            "  data {" +
-            "    key: \"" + DEFAULT_KEY_STRING + "\"" +
-            "    lists {" +
-            "      tag: \"" + bufferTag + "\"" +
-            "      values {" +
-            "        timestamp: 0" +
-            "        data: \"" + bufferData + "\"" +
-            "      }" +
-            "    }" +
-            "  }" +
-        "}"));
+    WorkItemCommitRequest actualOutput = result.get(0L);
+
+    assertThat(actualOutput.getOutputTimersList(), Matchers.containsInAnyOrder(
+        Matchers.equalTo(Windmill.Timer.newBuilder()
+            .setTag(timer1Tag)
+            .setTimestamp(timer1Timestamp)
+            .setType(Windmill.Timer.Type.WATERMARK).build()),
+        Matchers.equalTo(Windmill.Timer.newBuilder()
+            .setTag(timer2Tag)
+            .setTimestamp(timer2Timestamp)
+            .setType(Windmill.Timer.Type.WATERMARK).build())));
+
+    assertThat(actualOutput.getListUpdatesList(), Matchers.containsInAnyOrder(
+        Matchers.equalTo(Windmill.TagList.newBuilder()
+            .setTag(bufferTag)
+            .addValues(Windmill.Value.newBuilder()
+                .setTimestamp(Long.MAX_VALUE)
+                .setData(bufferData)
+                .build())
+            .build()),
+        Matchers.equalTo(Windmill.TagList.newBuilder()
+            .setTag(watermarkHoldTag)
+            .addValues(Windmill.Value.newBuilder()
+                .setTimestamp(0)
+                .setData(ByteString.copyFrom(new byte[]{0b0}))
+                .build())
+            .build())));
+
+    Windmill.GetWorkResponse.Builder getWorkResponse = Windmill.GetWorkResponse.newBuilder();
+    getWorkResponse.addWorkBuilder()
+        .setComputationId(DEFAULT_COMPUTATION_ID)
+        .setInputDataWatermark(0)
+        .addWorkBuilder()
+        .setKey(ByteString.copyFromUtf8(DEFAULT_KEY_STRING))
+        .setWorkToken(1)
+        .getTimersBuilder().addTimersBuilder()
+        .setTag(timer3Tag)
+        .setTimestamp(timer2Timestamp);
+    server.addWorkToOffer(getWorkResponse.build());
+
+    Windmill.GetDataResponse.Builder dataResponse = Windmill.GetDataResponse.newBuilder();
+    Windmill.KeyedGetDataResponse.Builder dataBuilder = dataResponse.addDataBuilder()
+        .setComputationId(DEFAULT_COMPUTATION_ID)
+        .addDataBuilder()
+        .setKey(ByteString.copyFromUtf8(DEFAULT_KEY_STRING));
+    dataBuilder.addListsBuilder()
+        .setTag(bufferTag)
+        .addValuesBuilder()
+        .setTimestamp(0) // is ignored
+        .setData(bufferData);
+    dataBuilder.addListsBuilder()
+        .setTag(watermarkHoldTag)
+        .addValuesBuilder()
+        .setTimestamp(0)
+        .setData(ByteString.copyFrom(new byte[]{0b0}));
+    server.addDataToOffer(dataResponse.build());
 
     result = server.waitForAndGetCommits(1);
 
-    assertThat(stripCounters(result.get(1L)),
-        equalTo(setMessagesMetadata(windowAtZeroBytes(),
-            parseCommitRequest(
-                "key: \"" + DEFAULT_KEY_STRING + "\" " +
-                    "work_token: 1 " +
-                    "output_messages {" +
-                    "  destination_stream_id: \"" + DEFAULT_DESTINATION_STREAM_ID + "\"" +
-                    "  bundles {" +
-                    "    key: \"" + DEFAULT_KEY_STRING + "\"" +
-                    "    messages {" +
-                    "      timestamp: 0" +
-                    "      data: \"" + outputData + "\"" +
-                    "    }" +
-                    "  }" +
-                    "} " +
-                    "list_updates {" +
-                    "  tag: \"" + watermarkHoldTag + "\"" +
-                    "  end_timestamp: 9223372036854775807" +
-                    "}" +
-                    "list_updates {" +
-                    "  tag: \"" + bufferTag + "\"" +
-                    "  end_timestamp: 9223372036854775807" +
-                "}")).build()));
+    actualOutput = result.get(1L);
+
+    assertEquals(addNullPaneTag(ByteString.copyFrom(windowAtZeroBytes())),
+        actualOutput.getOutputMessages(0).getBundles(0).getMessages(0).getMetadata());
+
+    Windmill.OutputMessageBundle.Builder expectedOutputMessages =
+        Windmill.OutputMessageBundle.newBuilder();
+    expectedOutputMessages
+        .setDestinationStreamId(DEFAULT_DESTINATION_STREAM_ID)
+        .addBundlesBuilder()
+        .setKey(ByteString.copyFromUtf8(DEFAULT_KEY_STRING))
+        .addMessagesBuilder()
+        .setTimestamp(0)
+        .setData(outputData);
+
+    // Data was deleted
+    assertThat("" + actualOutput.getListUpdatesList(),
+        actualOutput.getListUpdatesList(), Matchers.containsInAnyOrder(
+        Matchers.equalTo(Windmill.TagList.newBuilder()
+            .setTag(bufferTag)
+            .setEndTimestamp(Long.MAX_VALUE)
+            .build()),
+        Matchers.equalTo(Windmill.TagList.newBuilder()
+            .setTag(watermarkHoldTag)
+            .setEndTimestamp(Long.MAX_VALUE)
+            .build())));
   }
 
   static class PrintFn extends DoFn<KV<Integer, Integer>, String> {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index cf18fda86ac43..303bf58415340 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -92,8 +92,8 @@ public void testOnElementT1FiresFirst() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(firstWindow)));
+
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @Test
@@ -106,8 +106,8 @@ public void testOnElementT2FiresFirst() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(firstWindow)));
+
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @SuppressWarnings("unchecked")
@@ -125,8 +125,8 @@ public void testOnTimerFire() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(firstWindow)));
+
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @SuppressWarnings("unchecked")
@@ -149,8 +149,8 @@ public void testOnTimerFireAndFinish() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
 
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(firstWindow)));
+
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @Test
@@ -174,8 +174,8 @@ public void testOnMergeFires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(1), new Instant(22)));
   }
 
   @Test
@@ -213,12 +213,6 @@ public void testAfterAllRealTriggersFixedWindow() throws Exception {
     tester.injectElement(5, new Instant(2));
 
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        Matchers.equalTo(tester.finishedSet(window)),
-        Matchers.equalTo(tester.bufferTag(window)),
-        Matchers.containsString("delayed-until"),
-        Matchers.containsString("elements-in-pane"),
-        Matchers.equalTo(tester.earliestElementTag(window))));
     tester.advanceProcessingTime(new Instant(5));
 
     assertThat(tester.extractOutput(), Matchers.contains(
@@ -237,9 +231,8 @@ public void testAfterAllRealTriggersFixedWindow() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(6, 7, 8, 9, 10), 2, 0, 50)));
 
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(50))));
-    // We're holding some finished bits for intermediate state in the AfterAll.
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(window)));
+    // We're holding some finished bits, but that should be it.
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(window);
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index 82c91bc86195a..13c1bb044dca0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -99,8 +99,8 @@ public void testOnElementT1Fires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(4), 4, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(firstWindow)));
+
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @Test
@@ -110,11 +110,6 @@ public void testOnElementT2Fires() throws Exception {
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.FIRE);
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
     assertFalse(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        // Buffering element 1; Ignored the trigger for T2 since we aren't there yet.
-        tester.bufferTag(firstWindow),
-        // Still holding the earliest element, waiting to fire
-        tester.earliestElementTag(firstWindow)));
   }
 
   @SuppressWarnings("unchecked")
@@ -133,7 +128,6 @@ public void testOnTimerFire() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
     assertFalse("Should still be waiting for the second trigger.",
         tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
   }
 
   @SuppressWarnings("unchecked")
@@ -155,8 +149,8 @@ public void testOnTimerFinish() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(2), 9, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(firstWindow)));
+
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @Test
@@ -179,8 +173,9 @@ public void testOnMergeFinishes() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
+
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(1), new Instant(22)));
   }
 
   @Test
@@ -200,9 +195,8 @@ public void testOnMergeFires() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(1), new Instant(22)));
   }
 
   @Test
@@ -243,8 +237,8 @@ public void testSequenceRealTriggersFixedWindow() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(14, 15), 14, 0, 50),
         isSingleWindowedValue(Matchers.containsInAnyOrder(16), 16, 0, 50)));
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(50))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(50)))));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(0), new Instant(50)));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index 8107ad57441be..4723af6374d57 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -91,8 +91,7 @@ public void testOnElementT1Fires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(firstWindow)));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @Test
@@ -105,8 +104,7 @@ public void testOnElementT2Fires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(firstWindow)));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @SuppressWarnings("unchecked")
@@ -124,8 +122,7 @@ public void testOnTimerFire() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(firstWindow)));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @SuppressWarnings("unchecked")
@@ -144,8 +141,7 @@ public void testOnTimerFinish() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
 
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(firstWindow)));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @Test
@@ -167,8 +163,9 @@ public void testOnMergeFires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
+
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(1), new Instant(22)));
   }
 
   @Test
@@ -230,8 +227,8 @@ public void testAfterFirstRealTriggersFixedWindow() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(9, 10, 11, 12, 13), 6, 0, 50)));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(50))));
     // Because none of the triggers every stay finished (we always immediately reset) there is no
-    // persisted keyed state.
-    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
+    // persisted per-window state.
+    tester.assertHasOnlyGlobalAndFinishedSetsFor();
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
index f1fcf49aa5e4d..4bdcd0d92fd55 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
@@ -63,8 +63,9 @@ public void testAfterPaneWithGlobalWindowsAndCombining() throws Exception {
 
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(10), new Instant(20))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(10)))));
+
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(0), new Instant(10)));
   }
 
   @Test
@@ -89,8 +90,9 @@ public void testAfterPaneWithFixedWindow() throws Exception {
 
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(10), new Instant(20))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(10)))));
+
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(0), new Instant(10)));
   }
 
   @Test
@@ -119,9 +121,10 @@ public void testAfterPaneWithMerging() throws Exception {
         WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 7, 7, 18)));
 
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(12))),
-        tester.finishedSet(new IntervalWindow(new Instant(7), new Instant(18)))));
+
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(1), new Instant(12)),
+        new IntervalWindow(new Instant(7), new Instant(18)));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
index e3dc0277c792f..2859b3a1001c3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
@@ -73,14 +73,15 @@ public void testAfterProcessingTimeWithFixedWindow() throws Exception {
         WindowMatchers.isSingleWindowedValue(Matchers.contains(4), 19, 10, 20),
         WindowMatchers.isSingleWindowedValue(Matchers.contains(5), 30, 30, 40)));
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(10))),
-        tester.finishedSet(new IntervalWindow(new Instant(10), new Instant(20))),
-        tester.finishedSet(new IntervalWindow(new Instant(30), new Instant(40)))));
+
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(0), new Instant(10)),
+        new IntervalWindow(new Instant(10), new Instant(20)),
+        new IntervalWindow(new Instant(30), new Instant(40)));
   }
 
   @Test
-  public void testAfterProcessingTimeWithMergingWindowAlreadyFired() throws Exception {
+  public void testAfterProcessingTimeWithMergingWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         Sessions.withGapDuration(windowDuration),
@@ -91,26 +92,18 @@ public void testAfterProcessingTimeWithMergingWindowAlreadyFired() throws Except
         Duration.millis(100));
 
     tester.advanceProcessingTime(new Instant(10));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-
     tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 15
+    tester.advanceProcessingTime(new Instant(11));
+    tester.injectElement(2, new Instant(2)); // in [2, 12), timer for 16
 
-    tester.advanceProcessingTime(new Instant(16));
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.contains(1), 1, 1, 11)));
-
-    // Because we discarded the previous window, we don't have it around to merge with.
-    tester.injectElement(2, new Instant(2)); // in [2, 12), timer for 21
-
-
-    tester.advanceProcessingTime(new Instant(100));
+    tester.advanceProcessingTime(new Instant(15));
+    // This fires, because the earliest element in [1, 12) arrived at time 10
     assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.contains(2), 2, 2, 12)));
+        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 1, 12)));
 
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(2), new Instant(12))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(11))),
-        tester.finishedSet(new IntervalWindow(new Instant(2), new Instant(12)))));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(1), new Instant(12)));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java
index 88afa5716431d..d76523bc92bcd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java
@@ -62,10 +62,33 @@ public void testAfterProcessingTimeWithFixedWindow() throws Exception {
     tester.injectElement(6, new Instant(2));
 
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(10)))));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(0), new Instant(10)));
   }
 
+  @Test
+  public void testAfterProcessingTimeWithMergingWindow() throws Exception {
+    Duration windowDuration = Duration.millis(10);
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        Sessions.withGapDuration(windowDuration),
+        underTest,
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
+
+    tester.advanceProcessingTime(new Instant(10));
+    tester.injectElement(1, new Instant(1)); // in [1, 11), synchronized timer for 10
+    tester.injectElement(2, new Instant(2)); // in [2, 12), synchronized timer for 10
+    tester.advanceProcessingTime(new Instant(11));
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 1, 12)));
+
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(1), new Instant(12)));
+  }
+
+
   @Test
   public void testAfterProcessingTimeWithMergingWindowAlreadyFired() throws Exception {
     Duration windowDuration = Duration.millis(10);
@@ -92,9 +115,9 @@ public void testAfterProcessingTimeWithMergingWindowAlreadyFired() throws Except
         WindowMatchers.isSingleWindowedValue(Matchers.contains(2), 2, 2, 12)));
 
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(2), new Instant(12))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(11))),
-        tester.finishedSet(new IntervalWindow(new Instant(2), new Instant(12)))));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(1), new Instant(11)),
+        new IntervalWindow(new Instant(2), new Instant(12)));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
index e057407bc039b..a1a20797624bc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
@@ -63,8 +63,8 @@ public void testFirstInPaneWithFixedWindow() throws Exception {
 
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(10), new Instant(20))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(10)))));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(0), new Instant(10)));
   }
 
   @Test
@@ -77,24 +77,19 @@ public void testFirstInPaneWithMerging() throws Exception {
         Duration.millis(100));
 
     tester.advanceWatermark(new Instant(1));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
     tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 6
-    tester.advanceWatermark(new Instant(7));
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.contains(1), 1, 1, 11)));
-
-    // Because we discarded the previous window, we don't have it around to merge with.
     tester.injectElement(2, new Instant(2)); // in [2, 12), timer for 7
+    tester.advanceWatermark(new Instant(6));
 
-    tester.advanceWatermark(new Instant(100));
+    // We merged, and updated the watermark timer to the earliest timer, which was still 6.
     assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.contains(2), 11, 2, 12)));
+        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 1, 12)));
+
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
 
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(2), new Instant(12))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(11))),
-        tester.finishedSet(new IntervalWindow(new Instant(2), new Instant(12)))));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(1), new Instant(12)));
   }
 
   @Test
@@ -122,8 +117,9 @@ public void testEndOfWindowFixedWindow() throws Exception {
 
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(10), new Instant(20))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(new IntervalWindow(new Instant(0), new Instant(10)))));
+
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(0), new Instant(10)));
   }
 
   @Test
@@ -136,24 +132,24 @@ public void testEndOfWindowWithMerging() throws Exception {
         Duration.millis(100));
 
     tester.advanceWatermark(new Instant(1));
+
+    tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 15
+    tester.injectElement(2, new Instant(2)); // in [2, 12), timer for 16
+    tester.advanceWatermark(new Instant(15));
+
+    // We merged, and updated the watermark timer to the end of the new window.
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
-    tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 16
+    tester.injectElement(3, new Instant(1)); // in [1, 11), timer for 15
     tester.advanceWatermark(new Instant(16));
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.contains(1), 1, 1, 11)));
-
-    // Because we discarded the previous window, we don't have it around to merge with.
-    tester.injectElement(2, new Instant(2)); // in [2, 12), timer for 17
 
-    tester.advanceWatermark(new Instant(100));
     assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.contains(2), 11, 2, 12)));
+        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 1, 12)));
+
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
 
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(2), new Instant(12))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(11))),
-        tester.finishedSet(new IntervalWindow(new Instant(2), new Instant(12)))));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(1), new Instant(12)));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
index 160af2aade804..12f2d7b4f987e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
@@ -70,7 +70,7 @@ public void testDefaultTriggerWithFixedWindow() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 15, 10, 20),
         isSingleWindowedValue(Matchers.contains(5), 30, 30, 40)));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(30), new Instant(40))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
+    tester.assertHasOnlyGlobalAndFinishedSetsFor();
   }
 
   @Test
@@ -97,7 +97,7 @@ public void testDefaultTriggerWithSessionWindow() throws Exception {
         isSingleWindowedValue(Matchers.contains(4), 30, 30, 40)));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(25))));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(30), new Instant(40))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
+    tester.assertHasOnlyGlobalAndFinishedSetsFor();
   }
 
   @Test
@@ -118,6 +118,7 @@ public void testDefaultTriggerWithSlidingWindow() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(3), 9, 5, 15)));
 
+    // This data is late, so it will hold the watermark to 109
     tester.injectElement(4, new Instant(8));
 
     // Late data means the merge tree might be empty
@@ -128,7 +129,7 @@ public void testDefaultTriggerWithSlidingWindow() throws Exception {
 
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(10))));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(15))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
+    tester.assertHasOnlyGlobalAndFinishedSetsFor();
   }
 
 
@@ -148,7 +149,7 @@ public void testDefaultTriggerWithContainedSessionWindow() throws Exception {
     Iterable<WindowedValue<Iterable<Integer>>> extractOutput = tester.extractOutput();
     assertThat(extractOutput, Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 1, 19)));
-    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
+    tester.assertHasOnlyGlobalAndFinishedSetsFor();
   }
 
   @Test
@@ -159,7 +160,6 @@ public void testFireDeadline() throws Exception {
         DefaultTrigger.of().getWatermarkThatGuaranteesFiring(GlobalWindow.INSTANCE));
   }
 
-
   @Test
   public void testContinuation() throws Exception {
     assertEquals(DefaultTrigger.of(), DefaultTrigger.of().getContinuationTrigger());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
index 1b49a6dc3c6c1..dd76856d8e04a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
@@ -18,7 +18,6 @@
 
 import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
@@ -95,9 +94,7 @@ public void testOnElementActualFires() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(2), 2, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        // We're storing that the root trigger has finished.
-        tester.finishedSet(firstWindow)));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @Test
@@ -110,9 +107,6 @@ public void testOnElementUntilFires() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        // We're storing that the root trigger has finished.
-        tester.finishedSet(firstWindow)));
   }
 
   @Test
@@ -125,9 +119,7 @@ public void testOnElementUntilFiresAndFinishes() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        // We're storing that the root trigger has finished.
-        tester.finishedSet(firstWindow)));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @Test
@@ -153,8 +145,8 @@ public void testOnTimerFiresWithUntil() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(firstWindow)));
+
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @Test
@@ -190,8 +182,8 @@ public void testOnTimerFinishesUntil() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(3), 9, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(firstWindow)));
+
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @Test
@@ -211,8 +203,7 @@ public void testMergeActualFires() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
+    tester.assertHasOnlyGlobalAndFinishedSetsFor();
   }
 
   @Test
@@ -237,9 +228,9 @@ public void testMergeUntilFires() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
     // the until fired during the merge
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        // We're storing that the root has finished
-        tester.finishedSet(new IntervalWindow(new Instant(1), new Instant(22)))));
+
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(1), new Instant(22)));
   }
 
   @Test
@@ -292,18 +283,12 @@ public void testOrFinallyRealTriggersFixedWindow() throws Exception {
     tester.advanceProcessingTime(new Instant(5));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        Matchers.equalTo(tester.finishedSet(window)),
-        Matchers.equalTo(tester.bufferTag(window)),
-        Matchers.containsString("delayed-until"),
-        Matchers.containsString("elements-in-pane"),
-        Matchers.containsString("elements-in-pane"),
-        Matchers.equalTo(tester.earliestElementTag(window))));
-
     tester.injectElement(4, new Instant(1));
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(0, 1, 2, 3, 4), 0, 0, 50)));
 
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(window);
+
     // Then fire 6 new elements, then processing time
     tester.injectElement(6, new Instant(2));
     tester.injectElement(7, new Instant(3));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index a35c1137d472a..f42b1f5380b46 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -84,10 +84,6 @@ public void testOnElement() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
     assertFalse(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.bufferTag(firstWindow),
-        // Holding the earliest not-yet-output element (4) waiting to fire.
-        tester.earliestElementTag(firstWindow)));
   }
 
   @Test
@@ -127,7 +123,8 @@ public void testOnElementTimerFires() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(2), 9, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 9, 0, 10)));
     assertFalse(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
+
+    tester.assertHasOnlyGlobalAndFinishedSetsFor();
   }
 
   @Test
@@ -145,8 +142,8 @@ public void testMerge() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    assertFalse(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor();
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index be926d3d2d2e0..020e28ea9e2a9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -360,8 +360,8 @@ KV<String, Long>, List> makeRunner(
         WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
         KeyedCombineFn<String, Long, ?, Long> combineFn) {
     GroupAlsoByWindowsDoFn<String, Long, Long, IntervalWindow> fn =
-        GroupAlsoByWindowsDoFn.create(
-            windowingStrategy, combineFn, StringUtf8Coder.of(), BigEndianLongCoder.of());
+        GroupAlsoByWindowsDoFn.create(windowingStrategy, combineFn,
+            StringUtf8Coder.of(), BigEndianLongCoder.of());
 
     return makeRunner(windowingStrategy, fn);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/KeyedStateCacheTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/KeyedStateCacheTest.java
deleted file mode 100644
index b610d81924e49..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/KeyedStateCacheTest.java
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertThat;
-import static org.mockito.Mockito.when;
-
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.WorkItemCommitRequest;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.common.base.Joiner;
-import com.google.common.base.Optional;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.cache.CacheLoader;
-import com.google.common.collect.ImmutableMap;
-
-import org.hamcrest.Matchers;
-import org.joda.time.Instant;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.Mock;
-import org.mockito.Mockito;
-import org.mockito.MockitoAnnotations;
-
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Tests for {@link KeyedStateCache}.
- */
-@RunWith(JUnit4.class)
-public class KeyedStateCacheTest {
-
-  private static final String MANGLED_STEP_PREFIX = "mangled-step-prefix-";
-
-  private static final CodedTupleTag<String> TAG1 = CodedTupleTag.of("tag1", StringUtf8Coder.of());
-  private static final CodedTupleTag<Integer> TAG2 = CodedTupleTag.of("tag2", VarIntCoder.of());
-  private static final CodedTupleTag<Integer> TAG3 = CodedTupleTag.of("tag3", VarIntCoder.of());
-
-  @Mock
-  private CacheLoader<CodedTupleTag<?>, Optional<?>> mockTagLoader;
-  @Mock
-  private CacheLoader<CodedTupleTag<?>, List<?>> mockTagListLoader;
-
-  private KeyedStateCache underTest;
-
-  private List<CodedTupleTag<?>> tags(CodedTupleTag<?>... tags) {
-    return Arrays.asList(tags);
-  }
-
-  private Iterable<? extends CodedTupleTag<?>> tagsMatcher(CodedTupleTag<?>... tags) {
-    return Mockito.argThat(Matchers.containsInAnyOrder(tags));
-  }
-
-  private <T> Map<CodedTupleTag<?>, Optional<?>> lookup(
-      CodedTupleTag<T> tag1, Optional<T> value1) {
-    return ImmutableMap.<CodedTupleTag<?>, Optional<?>>of(tag1, value1);
-  }
-
-  private <T1, T2> Map<CodedTupleTag<?>, Optional<?>> lookup(
-      CodedTupleTag<T1> tag1, Optional<T1> value1,
-      CodedTupleTag<T2> tag2, Optional<T2> value2) {
-    return ImmutableMap.of(tag1, value1, tag2, value2);
-  }
-
-  private <T> Map<CodedTupleTag<?>, List<?>> lookupList(
-      CodedTupleTag<T> tag1, List<T> value1) {
-    return ImmutableMap.<CodedTupleTag<?>, List<?>>of(tag1, value1);
-  }
-
-  private <T1, T2> Map<CodedTupleTag<?>, List<?>> lookupList(
-      CodedTupleTag<T1> tag1, List<T1> value1,
-      CodedTupleTag<T2> tag2, List<T2> value2) {
-    return ImmutableMap.of(tag1, value1, tag2, value2);
-  }
-
-  @Before
-  public void setUp() {
-    MockitoAnnotations.initMocks(this);
-    this.underTest = new KeyedStateCache(MANGLED_STEP_PREFIX,
-        CacheBuilder.newBuilder().build(mockTagLoader),
-        CacheBuilder.newBuilder().build(mockTagListLoader));
-  }
-
-  @Test
-  public void testGetTagCaches() throws Exception {
-    when(mockTagLoader.loadAll(tagsMatcher(TAG1, TAG2)))
-        .thenReturn(lookup(TAG1, Optional.of("hello"), TAG2, Optional.of(5)));
-
-    Map<CodedTupleTag<?>, Object> result = underTest.lookupTags(tags(TAG1, TAG2));
-    assertEquals(2, result.size());
-    assertEquals("hello", result.get(TAG1));
-    assertEquals(5, result.get(TAG2));
-
-    Mockito.verify(mockTagLoader).loadAll(tagsMatcher(TAG1, TAG2));
-    Mockito.verifyNoMoreInteractions(mockTagLoader);
-
-    when(mockTagLoader.loadAll(tagsMatcher(TAG3)))
-        .thenReturn(lookup(TAG3, Optional.of(8)));
-
-    result = underTest.lookupTags(tags(TAG2, TAG3));
-    assertEquals(2, result.size());
-    assertEquals(5, result.get(TAG2));
-    assertEquals(8, result.get(TAG3));
-
-    Mockito.verify(mockTagLoader).loadAll(tagsMatcher(TAG3));
-    Mockito.verifyNoMoreInteractions(mockTagLoader);
-  }
-
-  @Test
-  public void testGetTagLocalEdits() throws Exception {
-    underTest.store(TAG2, 42, new Instant(5));
-    underTest.store(TAG3, 5, new Instant(10));
-    underTest.removeTags(TAG3);
-
-    when(mockTagLoader.loadAll(tagsMatcher(TAG1))).thenReturn(lookup(TAG1, Optional.of("hello")));
-
-    Map<CodedTupleTag<?>, Object> result = underTest.lookupTags(tags(TAG1, TAG2, TAG3));
-    assertEquals("hello", result.get(TAG1));
-    assertEquals(42, result.get(TAG2));
-    assertNull(result.get(TAG3));
-
-    Mockito.verify(mockTagLoader).loadAll(tagsMatcher(TAG1));
-    Mockito.verifyNoMoreInteractions(mockTagLoader);
-
-    underTest.store(TAG1, "world", new Instant(22));
-    result = underTest.lookupTags(tags(TAG1));
-    assertEquals("world", result.get(TAG1));
-
-    Mockito.verifyNoMoreInteractions(mockTagLoader);
-  }
-
-  @Test
-  public void testFlushTagAlreadyRead() throws Exception {
-    // Read TAG1 and TAG2
-    when(mockTagLoader.loadAll(tagsMatcher(TAG1, TAG2)))
-        .thenReturn(lookup(TAG1, Optional.<String>absent(), TAG2, Optional.of(6)));
-    underTest.lookupTags(tags(TAG1, TAG2));
-
-    underTest.store(TAG2, 41, new Instant(5));
-    underTest.store(TAG3, 43, new Instant(6));
-    underTest.store(TAG2, 42, new Instant(7));
-    underTest.removeTags(TAG3);
-
-    // Load to prevent blind writes -- only need to read TAG3
-    when(mockTagLoader.loadAll(tagsMatcher(TAG3)))
-        .thenReturn(lookup(TAG3, Optional.of(6)));
-
-    Windmill.WorkItemCommitRequest.Builder outputBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.flushTo(outputBuilder);
-    WorkItemCommitRequest commitRequest = outputBuilder.buildPartial();
-
-    assertEquals(Joiner.on("\n").join(
-        "value_updates {",
-        "  tag: \"" + MANGLED_STEP_PREFIX + TAG2.getId() + "\"",
-        "  value {",
-        "    timestamp: 7000",
-        "    data: \"*\"",
-        "  }",
-        "}",
-        "value_updates {",
-        "  tag: \"" + MANGLED_STEP_PREFIX + TAG3.getId() + "\"",
-        "  value {",
-        "    timestamp: " + Long.MAX_VALUE,
-        "    data: \"\"",
-        "  }",
-        "}",
-        ""),
-        commitRequest.toString());
-
-    // Should load 3 to prevent blind delete
-    Mockito.verify(mockTagLoader).loadAll(tagsMatcher(TAG1, TAG2));
-    Mockito.verify(mockTagLoader).loadAll(tagsMatcher(TAG3));
-    Mockito.verifyNoMoreInteractions(mockTagLoader);
-  }
-
-  @Test
-  public void testFlushTagBlindWrites() throws Exception {
-    underTest.store(TAG2, 41, new Instant(5));
-    underTest.store(TAG3, 43, new Instant(6));
-    underTest.store(TAG2, 42, new Instant(7));
-    underTest.removeTags(TAG3);
-
-    // Load to prevent blind writes
-    when(mockTagLoader.loadAll(tagsMatcher(TAG2, TAG3)))
-        .thenReturn(lookup(TAG2, Optional.of(5), TAG3, Optional.of(6)));
-
-    Windmill.WorkItemCommitRequest.Builder outputBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.flushTo(outputBuilder);
-    WorkItemCommitRequest commitRequest = outputBuilder.buildPartial();
-
-    assertEquals(Joiner.on("\n").join(
-        "value_updates {",
-        "  tag: \"" + MANGLED_STEP_PREFIX + TAG2.getId() + "\"",
-        "  value {",
-        "    timestamp: 7000",
-        "    data: \"*\"",
-        "  }",
-        "}",
-        "value_updates {",
-        "  tag: \"" + MANGLED_STEP_PREFIX + TAG3.getId() + "\"",
-        "  value {",
-        "    timestamp: " + Long.MAX_VALUE,
-        "    data: \"\"",
-        "  }",
-        "}",
-        ""),
-        commitRequest.toString());
-
-    // Should load 3 to prevent blind delete
-    Mockito.verify(mockTagLoader).loadAll(tagsMatcher(TAG2, TAG3));
-    Mockito.verifyNoMoreInteractions(mockTagLoader);
-  }
-
-  @Test
-  @SuppressWarnings("unchecked")
-  public void testGetTagListIsCached() throws Exception {
-    when(mockTagListLoader.loadAll(tagsMatcher(TAG1, TAG2)))
-        .thenReturn(lookupList(
-            TAG1, Arrays.asList("hello", "world"),
-            TAG2, Arrays.asList(5, 10)));
-    when(mockTagListLoader.loadAll(tagsMatcher(TAG3)))
-    .thenReturn(lookupList(
-        TAG3, Arrays.asList(6, 7)));
-
-    Map<CodedTupleTag<?>, Iterable<?>> results = underTest.readTagLists(tags(TAG1, TAG2));
-    assertThat((Iterable<String>) results.get(TAG1), Matchers.contains("hello", "world"));
-    assertThat((Iterable<Integer>) results.get(TAG2), Matchers.contains(5, 10));
-
-    results = underTest.readTagLists(tags(TAG1, TAG2, TAG3));
-    assertThat((Iterable<String>) results.get(TAG1), Matchers.contains("hello", "world"));
-    assertThat((Iterable<Integer>) results.get(TAG2), Matchers.contains(5, 10));
-    assertThat((Iterable<Integer>) results.get(TAG3), Matchers.contains(6, 7));
-
-    Mockito.verify(mockTagListLoader).loadAll(tagsMatcher(TAG1, TAG2));
-    Mockito.verify(mockTagListLoader).loadAll(tagsMatcher(TAG3));
-    Mockito.verifyNoMoreInteractions(mockTagListLoader);
-  }
-
-  @Test
-  @SuppressWarnings("unchecked")
-  public void testGetTagListLocalEdits() throws Exception {
-    // First make local edits
-    underTest.writeToTagList(TAG1, "goodbye", new Instant(50));
-    underTest.writeToTagList(TAG1, "also", new Instant(55));
-    underTest.writeToTagList(TAG2, 15, new Instant(55));
-    underTest.writeToTagList(TAG3, 20, new Instant(60));
-    underTest.removeTagLists(TAG3);
-
-    // Now look things up
-    when(mockTagListLoader.loadAll(tagsMatcher(TAG1, TAG2)))
-        .thenReturn(lookupList(
-            TAG1, Arrays.asList("hello", "world"),
-            TAG2, Arrays.asList(5, 10)));
-    Map<CodedTupleTag<?>, Iterable<?>> results = underTest.readTagLists(tags(TAG1, TAG2, TAG3));
-    assertThat((Iterable<String>) results.get(TAG1),
-        Matchers.contains("hello", "world", "goodbye", "also"));
-    assertThat((Iterable<Integer>) results.get(TAG2), Matchers.contains(5, 10, 15));
-    assertThat((Iterable<Integer>) results.get(TAG3), Matchers.emptyIterable());
-
-    Mockito.verify(mockTagListLoader).loadAll(tagsMatcher(TAG1, TAG2));
-    Mockito.verifyNoMoreInteractions(mockTagListLoader);
-  }
-
-  @Test
-  @SuppressWarnings("unchecked")
-  public void testGetTagListLocalFlush() throws Exception {
-    // First make local edits
-    underTest.writeToTagList(TAG1, "goodbye", new Instant(50));
-    underTest.writeToTagList(TAG1, "also", new Instant(55));
-    underTest.writeToTagList(TAG2, 15, new Instant(55));
-    underTest.writeToTagList(TAG3, 20, new Instant(60));
-    underTest.removeTagLists(TAG3);
-
-    // User lookup -- shouldn't need to re-lookup
-    when(mockTagListLoader.loadAll(tagsMatcher(TAG1)))
-        .thenReturn(lookupList(
-            TAG1, Arrays.asList("hello", "world")));
-    underTest.readTagLists(tags(TAG1));
-    Mockito.verify(mockTagListLoader).loadAll(tagsMatcher(TAG1));
-    Mockito.verifyNoMoreInteractions(mockTagListLoader);
-
-    // When we flush, we should lookup TAG3 (to prevent blind deletes)
-    when(mockTagListLoader.loadAll(tagsMatcher(TAG3)))
-        .thenReturn(lookupList(TAG3, Arrays.asList(5)));
-
-    // Flush and verify output
-    Windmill.WorkItemCommitRequest.Builder outputBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.flushTo(outputBuilder);
-    WorkItemCommitRequest commitRequest = outputBuilder.buildPartial();
-
-    assertEquals(Joiner.on("\n").join(
-        "list_updates {",
-        "  tag: \"" + MANGLED_STEP_PREFIX + TAG1.getId() + "\"",
-        "  values {",
-        "    timestamp: 50000",
-        "    data: \"\\000goodbye\"",
-        "  }",
-        "  values {",
-        "    timestamp: 55000",
-        "    data: \"\\000also\"",
-        "  }",
-        "}",
-        "list_updates {",
-        "  tag: \"" + MANGLED_STEP_PREFIX + TAG2.getId() + "\"",
-        "  values {",
-        "    timestamp: 55000",
-        "    data: \"\\000\\017\"",
-        "  }",
-        "}",
-        "list_updates {",
-        "  tag: \"" + MANGLED_STEP_PREFIX + TAG3.getId() + "\"",
-        "  end_timestamp: " + Long.MAX_VALUE,
-        "}",
-        ""),
-        commitRequest.toString());
-
-    Mockito.verify(mockTagListLoader).loadAll(tagsMatcher(TAG3));
-    Mockito.verifyNoMoreInteractions(mockTagListLoader);
-  }
-
-  @Test
-  @SuppressWarnings("unchecked")
-  public void testGetTagListNoDeleteEmptyList() throws Exception {
-    underTest.removeTagLists(TAG3);
-
-    // When we flush, we should lookup TAG3 (to prevent blind deletes)
-    when(mockTagListLoader.loadAll(tagsMatcher(TAG3)))
-        .thenReturn(lookupList(TAG3, Arrays.<Integer>asList()));
-
-    // Flush and verify output
-    Windmill.WorkItemCommitRequest.Builder outputBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.flushTo(outputBuilder);
-    WorkItemCommitRequest commitRequest = outputBuilder.buildPartial();
-
-    assertEquals("", commitRequest.toString());
-
-    Mockito.verify(mockTagListLoader).loadAll(tagsMatcher(TAG3));
-    Mockito.verifyNoMoreInteractions(mockTagListLoader);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
index 2679831a82f5b..2d914a5c45a6c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
@@ -29,17 +29,15 @@
 import com.google.cloud.dataflow.sdk.coders.ListCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.MetricTrackingWindmillServerStub;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.View;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.common.base.Optional;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 import com.google.protobuf.ByteString;
@@ -54,7 +52,6 @@
 
 import java.util.Arrays;
 import java.util.List;
-import java.util.Map;
 import java.util.concurrent.TimeUnit;
 
 /** Unit tests for {@link StateFetcher}. */
@@ -62,118 +59,13 @@
 public class StateFetcherTest {
 
   @Mock
-  WindmillServerStub server;
+  MetricTrackingWindmillServerStub server;
 
   @Before
   public void setUp() {
     MockitoAnnotations.initMocks(this);
   }
 
-  @Test
-  public void testFetch() throws Exception {
-    StateFetcher fetcher = new StateFetcher(server);
-
-    when(server.getData(any(Windmill.GetDataRequest.class))).thenReturn(
-        Windmill.GetDataResponse.newBuilder()
-        .addData(Windmill.ComputationGetDataResponse.newBuilder()
-            .setComputationId("computation")
-            .addData(Windmill.KeyedGetDataResponse.newBuilder()
-                .setKey(ByteString.copyFromUtf8("key"))
-                .addValues(Windmill.TagValue.newBuilder()
-                    .setTag(ByteString.copyFromUtf8("p:tag1"))
-                    .setValue(Windmill.Value.newBuilder()
-                        .setTimestamp(0)
-                        .setData(ByteString.copyFromUtf8("data1"))
-                        .build())
-                    .build())
-                .addValues(Windmill.TagValue.newBuilder()
-                    .setTag(ByteString.copyFromUtf8("p:tag2"))
-                    .setValue(Windmill.Value.newBuilder()
-                        .setTimestamp(0)
-                        .setData(ByteString.copyFromUtf8("data2"))
-                        .build())
-                    .build())
-                .build())
-            .build())
-        .build());
-
-    Map<CodedTupleTag<?>, Optional<?>> data =
-        fetcher.fetch("computation", ByteString.copyFromUtf8("key"), 17L, "p:",
-            Arrays.asList(
-                CodedTupleTag.of("tag1", StringUtf8Coder.of()),
-                CodedTupleTag.of("tag2", StringUtf8Coder.of())));
-
-    verify(server).getData(
-        Windmill.GetDataRequest.newBuilder()
-        .addRequests(Windmill.ComputationGetDataRequest.newBuilder()
-            .setComputationId("computation")
-            .addRequests(Windmill.KeyedGetDataRequest.newBuilder()
-                .setKey(ByteString.copyFromUtf8("key"))
-                .setWorkToken(17L)
-                .addValuesToFetch(Windmill.TagValue.newBuilder()
-                    .setTag(ByteString.copyFromUtf8("p:tag1"))
-                    .build())
-                .addValuesToFetch(Windmill.TagValue.newBuilder()
-                    .setTag(ByteString.copyFromUtf8("p:tag2"))
-                    .build())
-                .build())
-            .build())
-        .build());
-
-    assertEquals(2, data.size());
-    assertEquals("data1", data.get(CodedTupleTag.of("tag1", StringUtf8Coder.of())).get());
-    assertEquals("data2", data.get(CodedTupleTag.of("tag2", StringUtf8Coder.of())).get());
-  }
-
-  @Test
-  public void testFetchList() throws Exception {
-    StateFetcher fetcher = new StateFetcher(server);
-
-    when(server.getData(any(Windmill.GetDataRequest.class))).thenReturn(
-        Windmill.GetDataResponse.newBuilder()
-        .addData(Windmill.ComputationGetDataResponse.newBuilder()
-            .setComputationId("computation")
-            .addData(Windmill.KeyedGetDataResponse.newBuilder()
-                .setKey(ByteString.copyFromUtf8("key"))
-                .addLists(Windmill.TagList.newBuilder()
-                    .setTag(ByteString.copyFromUtf8("p:tag1"))
-                    .addValues(Windmill.Value.newBuilder()
-                        .setTimestamp(0)
-                        .setData(ByteString.copyFromUtf8("\000data1"))
-                        .build())
-                    .addValues(Windmill.Value.newBuilder()
-                        .setTimestamp(1000)
-                        .setData(ByteString.copyFromUtf8("\000data2"))
-                        .build())
-                    .build())
-                .build())
-            .build())
-        .build());
-
-    CodedTupleTag<String> tag = CodedTupleTag.of("tag1", StringUtf8Coder.of());
-    @SuppressWarnings("unchecked")
-    List<String> data =
-        (List<String>) fetcher.fetchList("computation", ByteString.copyFromUtf8("key"), 17L, "p:",
-        Arrays.asList(tag)).get(tag);
-
-    verify(server).getData(
-        Windmill.GetDataRequest.newBuilder()
-        .addRequests(Windmill.ComputationGetDataRequest.newBuilder()
-            .setComputationId("computation")
-            .addRequests(Windmill.KeyedGetDataRequest.newBuilder()
-                .setKey(ByteString.copyFromUtf8("key"))
-                .setWorkToken(17L)
-                .addListsToFetch(Windmill.TagList.newBuilder()
-                    .setTag(ByteString.copyFromUtf8("p:tag1"))
-                    .setEndTimestamp(Long.MAX_VALUE)
-                    .build())
-                .build())
-            .build())
-        .build());
-
-    assertThat(data, contains("data1", "data2"));
-  }
-
   @Test
   public void testFetchGlobalDataBasic() throws Exception {
     StateFetcher fetcher = new StateFetcher(server);
@@ -192,7 +84,7 @@ public void testFetchGlobalDataBasic() throws Exception {
 
     // Test three calls in a row. First, data is not ready, then data is ready,
     // then the data is already cached.
-    when(server.getData(any(Windmill.GetDataRequest.class))).thenReturn(
+    when(server.getSideInputData(any(Windmill.GetDataRequest.class))).thenReturn(
         buildGlobalDataResponse(tag, ByteString.EMPTY, false, null),
         buildGlobalDataResponse(tag, ByteString.EMPTY, true, encodedIterable));
 
@@ -203,7 +95,7 @@ public void testFetchGlobalDataBasic() throws Exception {
     assertEquals("data",
         fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, SideInputState.KNOWN_READY));
 
-    verify(server, times(2)).getData(buildGlobalDataRequest(tag, ByteString.EMPTY));
+    verify(server, times(2)).getSideInputData(buildGlobalDataRequest(tag, ByteString.EMPTY));
     verifyNoMoreInteractions(server);
   }
 
@@ -239,7 +131,7 @@ public void testFetchGlobalDataCacheOverflow() throws Exception {
 
     // Test four calls in a row. First, fetch view1, then view2 (which evicts view1 from the cache),
     // then view 1 again twice.
-    when(server.getData(any(Windmill.GetDataRequest.class))).thenReturn(
+    when(server.getSideInputData(any(Windmill.GetDataRequest.class))).thenReturn(
         buildGlobalDataResponse(tag1, ByteString.EMPTY, true, encodedIterable1),
         buildGlobalDataResponse(tag2, ByteString.EMPTY, true, encodedIterable2),
         buildGlobalDataResponse(tag1, ByteString.EMPTY, true, encodedIterable1));
@@ -257,7 +149,7 @@ public void testFetchGlobalDataCacheOverflow() throws Exception {
     ArgumentCaptor<Windmill.GetDataRequest> captor =
         ArgumentCaptor.forClass(Windmill.GetDataRequest.class);
 
-    verify(server, times(3)).getData(captor.capture());
+    verify(server, times(3)).getSideInputData(captor.capture());
     verifyNoMoreInteractions(server);
 
     assertThat(captor.getAllValues(), contains(
@@ -280,13 +172,13 @@ public void testEmptyFetchGlobalData() throws Exception {
 
     // Test three calls in a row. First, data is not ready, then data is ready,
     // then the data is already cached.
-    when(server.getData(any(Windmill.GetDataRequest.class))).thenReturn(
+    when(server.getSideInputData(any(Windmill.GetDataRequest.class))).thenReturn(
         buildGlobalDataResponse(tag, ByteString.EMPTY, true, encodedIterable));
 
     assertEquals(0L,
         (long) fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, SideInputState.UNKNOWN));
 
-    verify(server).getData(buildGlobalDataRequest(tag, ByteString.EMPTY));
+    verify(server).getSideInputData(buildGlobalDataRequest(tag, ByteString.EMPTY));
     verifyNoMoreInteractions(server);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
index b6f55d1571938..55e43487ba8bb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
@@ -61,7 +61,7 @@ private static TupleTag<Iterable<WindowedValue<String>>> newStringTag() {
   @Test
   public void testSideInputReaderReconstituted() {
     StreamingModeExecutionContext executionContext =
-        new StreamingModeExecutionContext("computation", stateFetcher, null, null);
+        new StreamingModeExecutionContext(stateFetcher, null, null);
 
     PCollectionView<String> preview1 = PCollectionViewTesting.<String, String>testingView(
         newStringTag(), new ConstantViewFn<String, String>("view1"), StringUtf8Coder.of());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
index 6d2ebc6ff54cf..11ad86a11f7ef 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
@@ -17,16 +17,17 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import static org.hamcrest.Matchers.contains;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.eq;
-import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.GlobalDataRequest;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
@@ -36,11 +37,14 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
+import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+import com.google.cloud.dataflow.sdk.util.state.ValueState;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.protobuf.ByteString;
 
+import org.hamcrest.Matchers;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Before;
@@ -50,15 +54,12 @@
 import org.mockito.Mock;
 import org.mockito.Mockito;
 import org.mockito.MockitoAnnotations;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
 
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -67,56 +68,66 @@
 @RunWith(JUnit4.class)
 public class StreamingSideInputDoFnRunnerTest {
 
+  private static final FixedWindows WINDOW_FN = FixedWindows.of(Duration.millis(10));
+
   static TupleTag<String> mainOutputTag = new TupleTag<String>();
   @Mock StreamingModeExecutionContext execContext;
   @Mock ExecutionContext.StepContext stepContext;
   @Mock SideInputReader mockSideInputReader;
 
+  private final InMemoryStateInternals state = new InMemoryStateInternals();
+
   @Before
   public void setUp() {
     MockitoAnnotations.initMocks(this);
     when(stepContext.getExecutionContext()).thenReturn(execContext);
+    when(stepContext.stateInternals()).thenReturn(state);
+  }
+
+  private <T> List<WindowedValue<T>> getReceiver(
+      StreamingSideInputDoFnRunner<?, ?, List<WindowedValue<?>>, ?> runner,
+      TupleTag<T> outputTag) {
+    List<WindowedValue<?>> values = runner.getReceiver(outputTag);
+    @SuppressWarnings({"rawtypes", "unchecked"})
+    List<WindowedValue<T>> typedValues = (List) values;
+    return typedValues;
   }
 
   @Test
   public void testSideInputReady() throws Exception {
     PCollectionView<String> view = createView();
 
-    when(stepContext.lookup(any(CodedTupleTag.class))).thenReturn(new HashMap());
     when(execContext.getSideInputNotifications())
         .thenReturn(Arrays.<Windmill.GlobalDataId>asList());
     when(execContext.issueSideInputFetch(
              eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN)))
         .thenReturn(true);
-    @SuppressWarnings({"rawtypes", "unchecked"})
-    Iterable<PCollectionView<?>> anyIterable = (Iterable) any(Iterable.class);
-    when(execContext.getSideInputReaderForViews(anyIterable)).thenReturn(mockSideInputReader);
+    when(execContext.getSideInputReaderForViews(
+        Mockito.<Iterable<? extends PCollectionView<?>>>any())).thenReturn(mockSideInputReader);
     when(mockSideInputReader.contains(eq(view))).thenReturn(true);
     when(mockSideInputReader.get(eq(view), any(BoundedWindow.class))).thenReturn("data");
 
-    StreamingSideInputDoFnRunner<String, String, List, IntervalWindow> runner =
+    StreamingSideInputDoFnRunner<String, String, List<WindowedValue<?>>, IntervalWindow> runner =
         createRunner(Arrays.asList(view));
 
     runner.startBundle();
     runner.processElement(createDatum("e", 0));
     runner.finishBundle();
 
-    assertThat((List<WindowedValue<String>>) runner.getReceiver(mainOutputTag),
-        contains(createDatum("e:data", 0)));
+    assertThat(getReceiver(runner, mainOutputTag), contains(createDatum("e:data", 0)));
   }
 
   @Test
   public void testSideInputNotReady() throws Exception {
     PCollectionView<String> view = createView();
 
-    when(stepContext.lookup(any(CodedTupleTag.class))).thenReturn(new HashMap());
     when(execContext.getSideInputNotifications())
         .thenReturn(Arrays.<Windmill.GlobalDataId>asList());
     when(execContext.issueSideInputFetch(
              eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN)))
         .thenReturn(false);
 
-    StreamingSideInputDoFnRunner<String, String, List, IntervalWindow> runner =
+    StreamingSideInputDoFnRunner<String, String, List<WindowedValue<?>>, IntervalWindow> runner =
         createRunner(Arrays.asList(view));
 
     runner.startBundle();
@@ -127,9 +138,12 @@ public void testSideInputNotReady() throws Exception {
 
     IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
 
-    verify(stepContext).writeToTagList(
-        any(CodedTupleTag.class), eq(createDatum("e", 0)), eq(new Instant(0)));
-    verify(stepContext).store(any(CodedTupleTag.class), eq(
+    // Verify that we added the element to an appropriate tag list, and that we buffered the element
+    ValueState<Map<IntervalWindow, Set<GlobalDataRequest>>> blockedMapState =
+        state.state(StateNamespaces.global(),
+            StreamingSideInputDoFnRunner.blockedMapAddr(WINDOW_FN));
+    assertEquals(
+        blockedMapState.get().read(),
         Collections.singletonMap(
             window,
             Collections.singleton(Windmill.GlobalDataRequest.newBuilder()
@@ -137,9 +151,12 @@ public void testSideInputNotReady() throws Exception {
                     .setTag(view.getTagInternal().getId())
                     .setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(
                         IntervalWindow.getCoder(), window)))
-                    .build())
-                .setExistenceWatermarkDeadline(9000)
-                .build()))));
+                        .build())
+                        .setExistenceWatermarkDeadline(9000)
+                        .build())));
+    assertThat(runner.elementBag(createWindow(0)).get().read(),
+        Matchers.contains(createDatum("e", 0)));
+    assertEquals(runner.watermarkHold(createWindow(0)).get().read(), new Instant(0));
   }
 
   @Test
@@ -158,9 +175,16 @@ public void testSideInputNotification() throws Exception {
     Map<IntervalWindow, Set<Windmill.GlobalDataRequest>> blockedMap = new HashMap<>();
     blockedMap.put(window, requestSet);
 
-    when(stepContext
-        .lookup(Mockito.<CodedTupleTag<Map<IntervalWindow, Set<Windmill.GlobalDataRequest>>>>any()))
-        .thenReturn(blockedMap);
+    ValueState<Map<IntervalWindow, Set<GlobalDataRequest>>> blockedMapState =
+        state.state(StateNamespaces.global(),
+            StreamingSideInputDoFnRunner.blockedMapAddr(WINDOW_FN));
+    blockedMapState.set(blockedMap);
+
+    StreamingSideInputDoFnRunner<String, String, List<WindowedValue<?>>, IntervalWindow> runner =
+        createRunner(Arrays.asList(view));
+    runner.watermarkHold(createWindow(0)).add(new Instant(0));
+    runner.elementBag(createWindow(0)).add(createDatum("e", 0));
+
     when(execContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
     when(execContext.issueSideInputFetch(
              eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN)))
@@ -168,42 +192,19 @@ public void testSideInputNotification() throws Exception {
     when(execContext.issueSideInputFetch(
              eq(view), any(BoundedWindow.class), eq(SideInputState.KNOWN_READY)))
         .thenReturn(true);
-    when(execContext.getSideInputReaderForViews(any(Iterable.class)))
-        .thenReturn(mockSideInputReader);
+    when(execContext.getSideInputReaderForViews(
+        Mockito.<Iterable<? extends PCollectionView<?>>>any())).thenReturn(mockSideInputReader);
     when(mockSideInputReader.contains(eq(view))).thenReturn(true);
     when(mockSideInputReader.get(eq(view), any(BoundedWindow.class))).thenReturn("data");
-    when(stepContext.readTagLists(
-        Mockito.<Iterable<CodedTupleTag<WindowedValue<String>>>>any()))
-        .thenAnswer(readTagListAnswer(Arrays.asList(createDatum("e", 0))));
-
-    StreamingSideInputDoFnRunner<String, String, List, IntervalWindow> runner =
-        createRunner(Arrays.asList(view));
 
     runner.startBundle();
     runner.finishBundle();
 
-    assertThat((List<WindowedValue<String>>) runner.getReceiver(mainOutputTag),
-        contains(createDatum("e:data", 0)));
+    assertThat(getReceiver(runner, mainOutputTag), contains(createDatum("e:data", 0)));
 
-    verify(stepContext).store(any(CodedTupleTag.class), eq(new HashMap()));
-  }
-
-  private <T> Answer<Map<CodedTupleTag<T>, Iterable<T>>> readTagListAnswer(
-      final Iterable<T> answer) {
-    return new Answer<Map<CodedTupleTag<T>, Iterable<T>>>() {
-      @Override
-      public Map<CodedTupleTag<T>, Iterable<T>> answer(InvocationOnMock invocation)
-          throws Throwable {
-        Map<CodedTupleTag<T>, Iterable<T>> result = new LinkedHashMap<>();
-        @SuppressWarnings("unchecked")
-        Iterable<CodedTupleTag<T>> tags =
-            (Iterable<CodedTupleTag<T>>) invocation.getArguments()[0];
-        for (CodedTupleTag<T> tag : tags) {
-          result.put(tag, answer);
-        }
-        return result;
-      }
-    };
+    assertThat(blockedMapState.get().read().keySet(), Matchers.empty());
+    assertThat(runner.watermarkHold(createWindow(0)).get().read(), Matchers.nullValue());
+    assertThat(runner.elementBag(createWindow(0)).get().read(), Matchers.emptyIterable());
   }
 
   @Test
@@ -223,56 +224,59 @@ public void testMultipleSideInputs() throws Exception {
     Map<IntervalWindow, Set<Windmill.GlobalDataRequest>> blockedMap = new HashMap<>();
     blockedMap.put(window, requestSet);
 
-    when(stepContext.lookup(
-        Mockito.<CodedTupleTag<Map<IntervalWindow, Set<Windmill.GlobalDataRequest>>>>any()))
-        .thenReturn(blockedMap);
+    ValueState<Map<IntervalWindow, Set<GlobalDataRequest>>> blockedMapState =
+        state.state(StateNamespaces.global(),
+            StreamingSideInputDoFnRunner.blockedMapAddr(WINDOW_FN));
+    blockedMapState.set(blockedMap);
+
     when(execContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
     when(execContext.issueSideInputFetch(
              any(PCollectionView.class), any(BoundedWindow.class), any(SideInputState.class)))
         .thenReturn(true);
-    when(execContext.getSideInputReaderForViews(any(Iterable.class)))
-        .thenReturn(mockSideInputReader);
+    when(execContext.getSideInputReaderForViews(
+        Mockito.<Iterable<? extends PCollectionView<?>>>any())).thenReturn(mockSideInputReader);
     when(mockSideInputReader.contains(eq(view1))).thenReturn(true);
     when(mockSideInputReader.contains(eq(view2))).thenReturn(true);
     when(mockSideInputReader.get(eq(view1), any(BoundedWindow.class))).thenReturn("data1");
     when(mockSideInputReader.get(eq(view2), any(BoundedWindow.class))).thenReturn("data2");
-    when(stepContext.readTagLists(
-        Mockito.<Iterable<CodedTupleTag<WindowedValue<String>>>>any()))
-        .thenAnswer(readTagListAnswer(Arrays.asList(createDatum("e1", 0))));
 
-    StreamingSideInputDoFnRunner<String, String, List, IntervalWindow> runner =
+    StreamingSideInputDoFnRunner<String, String, List<WindowedValue<?>>, IntervalWindow> runner =
         createRunner(Arrays.asList(view1, view2));
+    runner.watermarkHold(createWindow(0)).add(new Instant(0));
+    runner.elementBag(createWindow(0)).add(createDatum("e1", 0));
 
     runner.startBundle();
     runner.processElement(createDatum("e2", 2));
     runner.finishBundle();
 
-    System.out.println(runner.getReceiver(mainOutputTag));
-
-    assertThat((List<WindowedValue<String>>) runner.getReceiver(mainOutputTag),
+    assertThat(getReceiver(runner, mainOutputTag),
         contains(createDatum("e1:data1:data2", 0), createDatum("e2:data1:data2", 2)));
 
-    verify(stepContext).store(any(CodedTupleTag.class), eq(new HashMap()));
+    assertThat(blockedMapState.get().read().keySet(), Matchers.empty());
+    assertThat(runner.watermarkHold(createWindow(0)).get().read(), Matchers.nullValue());
+    assertThat(runner.elementBag(createWindow(0)).get().read(), Matchers.emptyIterable());
   }
 
-  private StreamingSideInputDoFnRunner<String, String, List, IntervalWindow> createRunner(
-      List<PCollectionView<String>> views) throws Exception {
-    DoFnInfo doFnInfo = new DoFnInfo<String, String>(
-        new SideInputFn(views),
-        WindowingStrategy.of(FixedWindows.of(Duration.millis(10))),
-        (Iterable) views, StringUtf8Coder.of());
+  private StreamingSideInputDoFnRunner<String, String, List<WindowedValue<?>>, IntervalWindow>
+  createRunner(List<PCollectionView<String>> views) throws Exception {
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    Iterable<PCollectionView<?>> typedViews = (Iterable) views;
+
+    DoFnInfo<String, String> doFnInfo = new DoFnInfo<String, String>(
+        new SideInputFn(views), WindowingStrategy.of(WINDOW_FN),
+        typedViews, StringUtf8Coder.of());
 
-    return new StreamingSideInputDoFnRunner<String, String, List, IntervalWindow>(
+    return new StreamingSideInputDoFnRunner<String, String, List<WindowedValue<?>>, IntervalWindow>(
         PipelineOptionsFactory.create(),
         doFnInfo,
         mockSideInputReader,
-        new DoFnRunner.OutputManager<List>() {
+        new DoFnRunner.OutputManager<List<WindowedValue<?>>>() {
           @Override
-          public List initialize(TupleTag<?> tag) {
+          public List<WindowedValue<?>> initialize(TupleTag<?> tag) {
             return new ArrayList<>();
           }
           @Override
-          public void output(List list, WindowedValue<?> output) {
+          public void output(List<WindowedValue<?>> list, WindowedValue<?> output) {
             list.add(output);
           }
         },
@@ -304,7 +308,7 @@ public void processElement(ProcessContext c) {
   private PCollectionView<String> createView() {
     return TestPipeline.create()
         .apply(Create.<String>of().withCoder(StringUtf8Coder.of()))
-        .apply(Window.<String>into(FixedWindows.of(Duration.millis(10))))
+        .apply(Window.<String>into(WINDOW_FN))
         .apply(View.<String>asSingleton());
   }
 
@@ -312,9 +316,13 @@ private WindowedValue<String> createDatum(String element, long timestamp) {
     return WindowedValue.of(
         element,
         new Instant(timestamp),
-        Arrays.asList(new IntervalWindow(
-            new Instant(timestamp - timestamp % 10),
-            new Instant(timestamp - timestamp % 10 + 10))),
+        Arrays.asList(createWindow(timestamp)),
         null);
   }
+
+  private IntervalWindow createWindow(long timestamp) {
+    return new IntervalWindow(
+        new Instant(timestamp - timestamp % 10),
+        new Instant(timestamp - timestamp % 10 + 10));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index d70d80346dd6c..01538d6f09ca6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -81,10 +81,7 @@ public void testOnElementBufferingDiscarding() throws Exception {
         Duration.millis(100));
 
     injectElement(tester, 1, TriggerResult.CONTINUE);
-    assertTrue(tester.isWindowActive(firstWindow));
-
     injectElement(tester, 2, TriggerResult.FIRE);
-    assertFalse(tester.isWindowActive(firstWindow));
 
     injectElement(tester, 3, TriggerResult.FIRE_AND_FINISH);
 
@@ -95,9 +92,7 @@ public void testOnElementBufferingDiscarding() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(firstWindow)));
-    assertFalse(tester.isWindowActive(firstWindow));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
 
     assertEquals(1, tester.getElementsDroppedDueToClosedWindow());
     assertEquals(0, tester.getElementsDroppedDueToLateness());
@@ -113,8 +108,6 @@ public void testOnElementBufferingAccumulating() throws Exception {
         Duration.millis(100));
 
     injectElement(tester, 1, TriggerResult.CONTINUE);
-    assertTrue(tester.isWindowActive(firstWindow));
-
     injectElement(tester, 2, TriggerResult.FIRE);
     injectElement(tester, 3, TriggerResult.FIRE_AND_FINISH);
 
@@ -125,9 +118,7 @@ public void testOnElementBufferingAccumulating() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 3, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(firstWindow)));
-    assertFalse(tester.isWindowActive(firstWindow));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @Test
@@ -142,11 +133,7 @@ public void testOnElementCombiningDiscarding() throws Exception {
         Duration.millis(100));
 
     injectElement(tester, 2, TriggerResult.CONTINUE);
-    assertTrue(tester.isWindowActive(firstWindow));
-
     injectElement(tester, 3, TriggerResult.FIRE);
-    assertFalse(tester.isWindowActive(firstWindow));
-
     injectElement(tester, 4, TriggerResult.FIRE_AND_FINISH);
 
     // This element shouldn't be seen, because the trigger has finished
@@ -156,9 +143,7 @@ public void testOnElementCombiningDiscarding() throws Exception {
         isSingleWindowedValue(Matchers.equalTo(5), 2, 0, 10),
         isSingleWindowedValue(Matchers.equalTo(4), 4, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(firstWindow)));
-    assertFalse(tester.isWindowActive(firstWindow));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @Test
@@ -173,8 +158,6 @@ public void testOnElementCombiningAccumulating() throws Exception {
         Duration.millis(100));
 
     injectElement(tester, 1, TriggerResult.CONTINUE);
-    assertTrue(tester.isWindowActive(firstWindow));
-
     injectElement(tester, 2, TriggerResult.FIRE);
     injectElement(tester, 3, TriggerResult.FIRE_AND_FINISH);
 
@@ -185,9 +168,7 @@ public void testOnElementCombiningAccumulating() throws Exception {
         isSingleWindowedValue(Matchers.equalTo(3), 1, 0, 10),
         isSingleWindowedValue(Matchers.equalTo(6), 3, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.containsInAnyOrder(
-        tester.finishedSet(firstWindow)));
-    assertFalse(tester.isWindowActive(firstWindow));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @Test
@@ -252,8 +233,7 @@ public void testWatermarkHoldAndLateData() throws Exception {
 
     // And because we're past the end of window + allowed lateness, everything should be cleaned up.
     assertFalse(tester.isMarkedFinished(firstWindow));
-    assertThat(tester.getKeyedStateInUse(), Matchers.emptyIterable());
-    assertFalse(tester.isWindowActive(firstWindow));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor();
   }
 
   @Test
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
similarity index 70%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 9312f6d189bb3..f0a4c37841286 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -16,6 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
@@ -33,17 +37,21 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.WindowingInternals.KeyedState;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTag;
-import com.google.cloud.dataflow.sdk.values.CodedTupleTagMap;
+import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals;
+import com.google.cloud.dataflow.sdk.util.state.MergeableState;
+import com.google.cloud.dataflow.sdk.util.state.State;
+import com.google.cloud.dataflow.sdk.util.state.StateInternals;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Function;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.FluentIterable;
 import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Iterables;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -53,11 +61,9 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
-import java.util.PriorityQueue;
 import java.util.Set;
 import java.util.logging.Logger;
 
@@ -65,8 +71,8 @@
 
 /**
  * Test utility that runs a {@link WindowFn}, {@link Trigger} using in-memory stub implementations
- * to provide the {@link TimerManager}, {@link KeyedState}, and {@link WindowingInternals}
- * needed by {@link TriggerExecutor}.
+ * to provide the {@link TimerManager} and {@link WindowingInternals} needed by
+ * {@link TriggerExecutor}.
  *
  * <p>To have all interactions between the trigger and underlying components logged, call
  * {@link #logInteractions(boolean)}.
@@ -115,7 +121,7 @@ public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>
 
     return new TriggerTester<Integer, Iterable<Integer>, W>(
         strategy,
-        new ListOutputBuffer<String, Integer, W>(VarIntCoder.of()),
+        TriggerExecutor.listBuffer(VarIntCoder.of()),
         IterableCoder.of(VarIntCoder.of()));
   }
 
@@ -133,14 +139,13 @@ TriggerTester<Integer, OutputT, W> combining(
 
     return new TriggerTester<Integer, OutputT, W>(
         strategy,
-        CombiningOutputBuffer.<String, Integer, AccumT, OutputT, W>create(
-            combineFn, StringUtf8Coder.of(), VarIntCoder.of()),
+        TriggerExecutor.combiningBuffer(KEY, StringUtf8Coder.of(), VarIntCoder.of(), combineFn),
         outputCoder);
   }
 
   private TriggerTester(
       WindowingStrategy<?, W> wildcardStrategy,
-      OutputBuffer<String, InputT, OutputT, W> outputBuffer,
+      StateTag<? extends MergeableState<InputT, OutputT>> buffer,
       Coder<OutputT> outputCoder) throws Exception {
     @SuppressWarnings("unchecked")
     WindowingStrategy<Object, W> objectStrategy = (WindowingStrategy<Object, W>) wildcardStrategy;
@@ -150,7 +155,7 @@ private TriggerTester(
     this.outputCoder = outputCoder;
     executableTrigger = wildcardStrategy.getTrigger();
     this.triggerExecutor = TriggerExecutor.create(
-        KEY, objectStrategy, timerManager, outputBuffer, stubContexts,
+        KEY, objectStrategy, timerManager, buffer, stubContexts,
         droppedDueToClosedWindow, droppedDueToLateness);
   }
 
@@ -162,40 +167,51 @@ public void logInteractions(boolean logInteractions) {
     this.logInteractions = logInteractions;
   }
 
-  public boolean isMarkedFinished(W window) throws IOException {
+  public boolean isMarkedFinished(W window) {
     return triggerExecutor.lookupFinishedSet(window).get(0);
   }
 
-  /**
-   * Retrieve the tags of keyed state that is currently stored.
-   * @throws Exception
-   */
-  public Iterable<String> getKeyedStateInUse() throws Exception {
+  @SafeVarargs
+  public final void assertHasOnlyGlobalAndFinishedSetsFor(W... expectedWindows) {
     triggerExecutor.persist();
-    return stubContexts.getKeyedStateInUse();
-  }
 
-  public String finishedSet(W window) throws CoderException {
-    return triggerExecutor.finishedSetTag(window).getId();
-  }
+    Set<StateNamespace> expectedWindowsSet = new HashSet<>();
+    for (W expectedWindow : expectedWindows) {
+      expectedWindowsSet.add(windowNamespace(expectedWindow));
+    }
+    Set<StateNamespace> actualWindows = new HashSet<>();
+
+    for (StateNamespace namespace : stubContexts.state.getNamespacesInUse()) {
+      if (namespace instanceof StateNamespaces.GlobalNamespace) {
+        continue;
+      } else if (namespace instanceof StateNamespaces.WindowNamespace) {
+        Set<StateTag<?>> tagsInUse = stubContexts.state.getTagsInUse(namespace);
+        if (tagsInUse.isEmpty()) {
+          continue;
+        }
+
+        actualWindows.add(namespace);
+        if (!tagsInUse.equals(Collections.singleton(TriggerExecutor.FINISHED_BITS_TAG))) {
+          fail(namespace + " has unexpected states: " + tagsInUse);
+        }
+      } else if (namespace instanceof StateNamespaces.WindowAndTriggerNamespace) {
+        Set<StateTag<?>> tagsInUse = stubContexts.state.getTagsInUse(namespace);
+        assertTrue(namespace + " contains " + tagsInUse, tagsInUse.isEmpty());
+      } else {
+        fail("Unrecognized namespace " + namespace);
+      }
+    }
 
-  public String bufferTag(W window) throws IOException {
-    return CoderUtils.encodeToBase64(windowFn.windowCoder(), window)
-        + "/" + OutputBuffer.BUFFER_NAME;
+    assertEquals(expectedWindowsSet, actualWindows);
   }
 
-  public String earliestElementTag(W window) throws CoderException {
-    return CoderUtils.encodeToBase64(windowFn.windowCoder(), window)
-        + "/" + WatermarkHold.EARLIEST_ELEMENT_TAG.getId();
+  private StateNamespace windowNamespace(W window) {
+    return StateNamespaces.window(windowFn.windowCoder(), window);
   }
 
-  public Instant getWatermarkHold() throws Exception {
+  public Instant getWatermarkHold() {
     triggerExecutor.persist();
-    return stubContexts.minTagListTimestamp.peek();
-  }
-
-  public boolean isWindowActive(W window) throws Exception {
-    return Iterables.contains(getKeyedStateInUse(), earliestElementTag(window));
+    return stubContexts.state.minimumWatermarkHold();
   }
 
   public long getElementsDroppedDueToClosedWindow() {
@@ -263,84 +279,51 @@ public void setTimer(
         new TriggerId<W>(window, trigger.getTriggerIndex()), timestamp, domain);
   }
 
-  private class StubContexts
-      implements WindowingInternals<InputT, KV<String, OutputT>>, WindowingInternals.KeyedState {
-
-    private Map<CodedTupleTag<?>, List<?>> tagListValues = new HashMap<>();
-    private Map<CodedTupleTag<?>, Object> tagValues = new HashMap<>();
-    private List<WindowedValue<KV<String, OutputT>>> outputs = new ArrayList<>();
-
-    private Map<CodedTupleTag<?>, Instant> tagListTimestamps = new HashMap<>();
-    private PriorityQueue<Instant> minTagListTimestamp = new PriorityQueue<>();
+  private static class TestingInMemoryStateInternals extends InMemoryStateInternals {
 
-    @Override
-    public void outputWindowedValue(KV<String, OutputT> output, Instant timestamp,
-        Collection<? extends BoundedWindow> windows, PaneInfo pane) {
-      // Copy the output value (using coders) before capturing it.
-      KV<String, OutputT> copy = SerializableUtils.<KV<String, OutputT>>ensureSerializableByCoder(
-          KvCoder.of(StringUtf8Coder.of(), outputCoder), output, "outputForWindow");
-      WindowedValue<KV<String, OutputT>> value = WindowedValue.of(copy, timestamp, windows, pane);
-      logInteraction("Outputting: %s", value);
-      outputs.add(value);
+    protected Set<StateTag<?>> getTagsInUse(StateNamespace namespace) {
+      Set<StateTag<?>> inUse = new HashSet<>();
+      for (Map.Entry<StateTag<?>, State> entry : inMemoryState.getTagsInUse(namespace).entrySet()) {
+        if (!isEmptyForTesting(entry.getValue())) {
+          inUse.add(entry.getKey());
+        }
+      }
+      return inUse;
     }
 
-    public Set<String> getKeyedStateInUse() {
-      return FluentIterable
-          .from(tagListValues.keySet())
-          .append(tagValues.keySet())
-          .transform(new Function<CodedTupleTag<?>, String>() {
-            @Override
-            @Nullable
-            public String apply(CodedTupleTag<?> input) {
-              return input.getId();
-            }
-          })
-          .toSet();
+    public Set<StateNamespace> getNamespacesInUse() {
+      return inMemoryState.getNamespacesInUse();
     }
 
-    @Override
-    public <T> void writeToTagList(CodedTupleTag<T> tag, T value) throws IOException {
-      @SuppressWarnings("unchecked")
-      List<T> values = (List<T>) tagListValues.get(tag);
-      if (values == null) {
-        values = new ArrayList<>();
-        tagListValues.put(tag, values);
+    public Instant minimumWatermarkHold() {
+      Instant minimum = null;
+      for (State storage : inMemoryState.values()) {
+        if (storage instanceof WatermarkStateInternal) {
+          Instant hold = ((WatermarkStateInternal) storage).get().read();
+          if (minimum == null || (hold != null && hold.isBefore(minimum))) {
+            minimum = hold;
+          }
+        }
       }
-      values.add(value);
+      return minimum;
     }
+  }
 
-    @Override
-    public <T> void deleteTagList(CodedTupleTag<T> tag) {
-      tagListValues.remove(tag);
+  private class StubContexts implements WindowingInternals<InputT, KV<String, OutputT>> {
 
-      Instant hold = tagListTimestamps.remove(tag);
-      if (hold != null) {
-        minTagListTimestamp.remove(hold);
-      }
-    }
+    private TestingInMemoryStateInternals state = new TestingInMemoryStateInternals();
 
-    @Override
-    public <T> Iterable<T> readTagList(CodedTupleTag<T> tag) {
-      @SuppressWarnings("unchecked")
-      List<T> values = (List<T>) tagListValues.get(tag);
-      if (values == null) {
-        return Collections.emptyList();
-      } else {
-        return values;
-      }
-    }
+    private List<WindowedValue<KV<String, OutputT>>> outputs = new ArrayList<>();
 
     @Override
-    public <T> Map<CodedTupleTag<T>, Iterable<T>> readTagList(
-        List<CodedTupleTag<T>> tags) throws IOException {
-      return FluentIterable.from(tags)
-          .toMap(new Function<CodedTupleTag<T>, Iterable<T>>() {
-            @Override
-            @Nullable
-            public Iterable<T> apply(@Nullable CodedTupleTag<T> tag) {
-              return readTagList(tag);
-            }
-          });
+    public void outputWindowedValue(KV<String, OutputT> output, Instant timestamp,
+        Collection<? extends BoundedWindow> windows, PaneInfo pane) {
+      // Copy the output value (using coders) before capturing it.
+      KV<String, OutputT> copy = SerializableUtils.<KV<String, OutputT>>ensureSerializableByCoder(
+          KvCoder.of(StringUtf8Coder.of(), outputCoder), output, "outputForWindow");
+      WindowedValue<KV<String, OutputT>> value = WindowedValue.of(copy, timestamp, windows, pane);
+      logInteraction("Outputting: %s", value);
+      outputs.add(value);
     }
 
     @Override
@@ -360,49 +343,6 @@ public PaneInfo pane() {
       throw new UnsupportedOperationException(
           "Testing triggers should not use pane from WindowingInternals.");
     }
-
-    @Override
-    public <T> void store(CodedTupleTag<T> tag, T value) throws IOException {
-      tagValues.put(tag, value);
-    }
-
-    @Override
-    public <T> void writeToTagList(CodedTupleTag<T> tag, T value, Instant timestamp)
-        throws IOException {
-      writeToTagList(tag, value);
-
-      // We never use the timestamp, but for testing purposes we want to keep track of the minimum
-      // timestamp that is currently being stored, since this will be used to hold-up the watermark.
-      Instant old = tagListTimestamps.get(tag);
-      if (old == null || old.isAfter(timestamp)) {
-        minTagListTimestamp.remove(old);
-        tagListTimestamps.put(tag, timestamp);
-        minTagListTimestamp.add(timestamp);
-      }
-    }
-
-    @Override
-    public <T> void remove(CodedTupleTag<T> tag) {
-      tagValues.remove(tag);
-    }
-
-    @Override
-    public <T> T lookup(CodedTupleTag<T> tag) throws IOException {
-      @SuppressWarnings("unchecked")
-      T value = (T) tagValues.get(tag);
-      return value;
-    }
-
-    @SuppressWarnings("unchecked")
-    @Override
-    public CodedTupleTagMap lookup(Iterable<? extends CodedTupleTag<?>> tags) throws IOException {
-      LinkedHashMap<CodedTupleTag<?>, Object> result = new LinkedHashMap<>();
-      for (CodedTupleTag<?> tag : tags) {
-        result.put(tag, tagValues.get(tag));
-      }
-      return CodedTupleTagMap.of(result);
-    }
-
     @Override
     public <T> void writePCollectionViewData(TupleTag<?> tag, Iterable<WindowedValue<T>> data,
         Coder<T> elemCoder) throws IOException {
@@ -411,8 +351,8 @@ public <T> void writePCollectionViewData(TupleTag<?> tag, Iterable<WindowedValue
     }
 
     @Override
-    public WindowingInternals.KeyedState keyedState() {
-      return this;
+    public StateInternals stateInternals() {
+      return state;
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
index d2a4a766f0b3a..4189288248d8c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
@@ -17,6 +17,7 @@
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -172,6 +173,21 @@ public void testBagClearPersist() throws Exception {
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
+  @Test
+  public void testBagPersistEmpty() throws Exception {
+    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
+    BagState<String> bag = underTest.state(NAMESPACE, addr);
+
+    bag.clear();
+
+    Windmill.WorkItemCommitRequest.Builder commitBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.persist(commitBuilder);
+
+    // 1 list update = the clear
+    assertEquals(1, commitBuilder.getListUpdatesCount());
+  }
+
   @Test
   public void testCombiningAddBeforeRead() throws Exception {
     CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
@@ -367,6 +383,22 @@ public void testWatermarkClearPersist() throws Exception {
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
+  @Test
+  public void testWatermarkPersistEmpty() throws Exception {
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal("watermark");
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+
+    bag.add(new Instant(500));
+    bag.clear();
+
+    Windmill.WorkItemCommitRequest.Builder commitBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.persist(commitBuilder);
+
+    // 1 list update corresponds to deletion. There shouldn't be a list update adding items.
+    assertEquals(1, commitBuilder.getListUpdatesCount());
+  }
+
   @Test
   public void testValueSetBeforeRead() throws Exception {
     StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
@@ -416,6 +448,7 @@ public void testValueSetPersist() throws Exception {
     TagValue valueUpdate = commitBuilder.getValueUpdates(0);
     assertEquals(key(NAMESPACE, "value"), valueUpdate.getTag());
     assertEquals("Hi", valueUpdate.getValue().getData().toStringUtf8());
+    assertTrue(valueUpdate.isInitialized());
 
     // Setting a value requires a read to prevent blind writes.
     Mockito.verify(mockReader).valueFuture(key(NAMESPACE, "value"), StringUtf8Coder.of());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java
index d550deaadfc56..0fa93e52e494a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java
@@ -18,12 +18,14 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.MetricTrackingWindmillServerStub;
+import com.google.cloud.dataflow.sdk.runners.worker.StreamingDataflowWorker;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.KeyedGetDataRequest;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.protobuf.ByteString;
 import com.google.protobuf.ByteString.Output;
@@ -61,7 +63,7 @@ public class WindmillStateReaderTest {
   private static final ByteString STATE_KEY_2 = ByteString.copyFromUtf8("key2");
 
   @Mock
-  private WindmillServerStub mockWindmill;
+  private MetricTrackingWindmillServerStub mockWindmill;
 
   private WindmillStateReader underTest;
 
@@ -118,10 +120,10 @@ public void testReadList() throws Exception {
             .addValues(intValue(5, true))
             .addValues(intValue(6, true)));
 
-    Mockito.when(mockWindmill.getData(expectedRequest.build())).thenReturn(response.build());
+    Mockito.when(mockWindmill.getStateData(expectedRequest.build())).thenReturn(response.build());
 
     Iterable<Integer> results = future.get();
-    Mockito.verify(mockWindmill).getData(expectedRequest.build());
+    Mockito.verify(mockWindmill).getStateData(expectedRequest.build());
     Mockito.verifyNoMoreInteractions(mockWindmill);
 
     assertThat(results, Matchers.containsInAnyOrder(5, 6));
@@ -145,10 +147,10 @@ public void testReadValue() throws Exception {
         .addValues(Windmill.TagValue.newBuilder()
             .setTag(STATE_KEY_1).setValue(intValue(8, false)));
 
-    Mockito.when(mockWindmill.getData(expectedRequest.build())).thenReturn(response.build());
+    Mockito.when(mockWindmill.getStateData(expectedRequest.build())).thenReturn(response.build());
 
     Integer result = future.get();
-    Mockito.verify(mockWindmill).getData(expectedRequest.build());
+    Mockito.verify(mockWindmill).getStateData(expectedRequest.build());
     Mockito.verifyNoMoreInteractions(mockWindmill);
 
     assertThat(result, Matchers.equalTo(8));
@@ -175,10 +177,10 @@ public void testReadWatermark() throws Exception {
             .addValues(watermarkValue(new Instant(5000)))
             .addValues(watermarkValue(new Instant(6000))));
 
-    Mockito.when(mockWindmill.getData(expectedRequest.build())).thenReturn(response.build());
+    Mockito.when(mockWindmill.getStateData(expectedRequest.build())).thenReturn(response.build());
 
     Instant result = future.get();
-    Mockito.verify(mockWindmill).getData(expectedRequest.build());
+    Mockito.verify(mockWindmill).getStateData(expectedRequest.build());
 
     assertThat(result, Matchers.equalTo(new Instant(5000)));
   }
@@ -207,10 +209,10 @@ public void testBatching() throws Exception {
            .addValues(intValue(5, true))
            .addValues(intValue(100, true)));
 
-    Mockito.when(mockWindmill.getData(Mockito.isA(Windmill.GetDataRequest.class)))
+    Mockito.when(mockWindmill.getStateData(Mockito.isA(Windmill.GetDataRequest.class)))
         .thenReturn(response.build());
     Instant result = watermarkFuture.get();
-    Mockito.verify(mockWindmill).getData(request.capture());
+    Mockito.verify(mockWindmill).getStateData(request.capture());
 
     // Verify the request looks right.
     assertThat(request.getValue().getRequestsCount(), Matchers.equalTo(1));
@@ -239,6 +241,37 @@ public void testBatching() throws Exception {
     assertTrue(watermarkFuture2.isDone());
   }
 
+  @Test
+  public void testKeyTokenInvalid() throws Exception {
+    // Reads two lists and verifies that we batch them up correctly.
+    Future<Instant> watermarkFuture = underTest.watermarkFuture(STATE_KEY_2);
+    Future<Iterable<Integer>> listFuture = underTest.listFuture(STATE_KEY_1, INT_CODER);
+
+    Mockito.verifyNoMoreInteractions(mockWindmill);
+
+    Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
+    response
+        .addDataBuilder().setComputationId(COMPUTATION)
+        .addDataBuilder().setKey(DATA_KEY).setFailed(true);
+
+    Mockito.when(mockWindmill.getStateData(Mockito.isA(Windmill.GetDataRequest.class)))
+        .thenReturn(response.build());
+
+    try {
+      watermarkFuture.get();
+      fail("Expected KeyTokenInvalidException");
+    } catch (Throwable e) {
+      assertTrue(StreamingDataflowWorker.isKeyTokenInvalidException(e));
+    }
+
+    try {
+      listFuture.get();
+      fail("Expected KeyTokenInvalidException");
+    } catch (Throwable e) {
+      assertTrue(StreamingDataflowWorker.isKeyTokenInvalidException(e));
+    }
+  }
+
   /**
    * Tests that multiple reads for the same tag in the same batch are cached. We can't compare
    * the futures since we've wrapped the delegate aronud them, so we just verify there is only

From f542effb638eb00531d37ac913687f5544e99494 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 22 Jul 2015 17:16:13 -0700
Subject: [PATCH 0756/1541] Use StateNamespaces with Timers too

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98359965
---
 .../sdk/transforms/windowing/Trigger.java     |  41 +----
 .../dataflow/sdk/util/BatchTimerManager.java  |  22 +--
 .../util/StreamingGroupAlsoByWindowsDoFn.java |   6 +-
 .../util/StreamingModeExecutionContext.java   |   9 +-
 .../cloud/dataflow/sdk/util/TimerManager.java |   6 +-
 .../dataflow/sdk/util/TriggerExecutor.java    | 143 ++++++------------
 .../sdk/util/state/StateNamespace.java        |   4 +
 .../sdk/util/state/StateNamespaces.java       |  83 ++++++++--
 .../util/state/WindmillStateInternals.java    |   6 +-
 .../worker/StreamingDataflowWorkerTest.java   |  15 +-
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  38 ++---
 .../dataflow/sdk/util/TriggerTester.java      |  39 +----
 .../sdk/util/state/StateNamespacesTest.java   | 129 ++++++++++++++++
 .../state/WindmillStateInternalsTest.java     |   2 +-
 14 files changed, 321 insertions(+), 222 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateNamespacesTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index faf4d90c9b623..5304985e5f36b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -23,7 +23,6 @@
 import com.google.cloud.dataflow.sdk.util.state.State;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.common.base.Joiner;
-import com.google.common.base.Objects;
 
 import org.joda.time.Instant;
 
@@ -33,6 +32,7 @@
 import java.util.BitSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 
 import javax.annotation.Nullable;
 
@@ -367,14 +367,14 @@ public Iterable<W> oldWindows() {
    */
   public abstract class OnTimerContext extends TriggerContext {
 
-    protected final TriggerId<W> destinationId;
+    protected final int destinationIndex;
 
-    public OnTimerContext(TriggerId<W> destinationId) {
-      this.destinationId = destinationId;
+    public OnTimerContext(int destinationIndex) {
+      this.destinationIndex = destinationIndex;
     }
 
     public int getDestinationIndex() {
-      return destinationId.getTriggerIdx();
+      return destinationIndex;
     }
 
     /**
@@ -508,38 +508,13 @@ public boolean equals(Object obj) {
     }
     @SuppressWarnings("unchecked")
     Trigger<W> that = (Trigger<W>) obj;
-    return Objects.equal(getClass(), that.getClass())
-        && Objects.equal(subTriggers, that.subTriggers);
+    return Objects.equals(getClass(), that.getClass())
+        && Objects.equals(subTriggers, that.subTriggers);
   }
 
   @Override
   public int hashCode() {
-    return Objects.hashCode(getClass(), subTriggers);
-  }
-
-  /**
-   * Identifies a unique {@link Trigger} instance, by the window it is in and the identifier of the
-   * trigger within the trigger tree.
-   *
-   * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
-   *            {@code TriggerId}
-   */
-  public static class TriggerId<W extends BoundedWindow> {
-    private final W window;
-    private final int triggerId;
-
-    public TriggerId(W window, int triggerId) {
-      this.window = window;
-      this.triggerId = triggerId;
-    }
-
-    public W window() {
-      return window;
-    }
-
-    public int getTriggerIdx() {
-      return triggerId;
-    }
+    return Objects.hash(getClass(), subTriggers);
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
index fccdf05e475a1..700a1a97b3a45 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
 
 import org.joda.time.Instant;
 
@@ -31,10 +32,10 @@
 public class BatchTimerManager implements TimerManager {
 
   private PriorityQueue<BatchTimerManager.BatchTimer> watermarkTimers = new PriorityQueue<>(11);
-  private Map<String, BatchTimerManager.BatchTimer> watermarkTagToTimer = new HashMap<>();
+  private Map<StateNamespace, BatchTimerManager.BatchTimer> watermarkTagToTimer = new HashMap<>();
 
   private PriorityQueue<BatchTimerManager.BatchTimer> processingTimers = new PriorityQueue<>(11);
-  private Map<String, BatchTimerManager.BatchTimer> processingTagToTimer = new HashMap<>();
+  private Map<StateNamespace, BatchTimerManager.BatchTimer> processingTagToTimer = new HashMap<>();
 
   private Instant watermarkTime;
   private Instant processingTime;
@@ -43,10 +44,11 @@ private PriorityQueue<BatchTimerManager.BatchTimer> queue(TimerManager.TimeDomai
     return TimeDomain.EVENT_TIME.equals(domain) ? watermarkTimers : processingTimers;
   }
 
-  private Map<String, BatchTimer> map(TimeDomain domain) {
+  private Map<StateNamespace, BatchTimer> map(TimeDomain domain) {
     switch (domain) {
       case EVENT_TIME: return watermarkTagToTimer;
-      case PROCESSING_TIME: case SYNCHRONIZED_PROCESSING_TIME:
+      case PROCESSING_TIME:
+      case SYNCHRONIZED_PROCESSING_TIME:
         // Batch fires timers in order, and only starts a stage after the previous stage is done.
         // As a result, SYNCHRONIZED_PROCESSING_TIME is the same as PROCESSING_TIME.
         return processingTagToTimer;
@@ -60,7 +62,7 @@ public BatchTimerManager(Instant processingTime) {
   }
 
   @Override
-  public void setTimer(String tag, Instant timestamp, TimeDomain domain) {
+  public void setTimer(StateNamespace tag, Instant timestamp, TimeDomain domain) {
     BatchTimerManager.BatchTimer newTimer = new BatchTimerManager.BatchTimer(tag, timestamp);
 
     BatchTimerManager.BatchTimer oldTimer = map(domain).put(tag, newTimer);
@@ -71,7 +73,7 @@ public void setTimer(String tag, Instant timestamp, TimeDomain domain) {
   }
 
   @Override
-  public void deleteTimer(String tag, TimeDomain domain) {
+  public void deleteTimer(StateNamespace tag, TimeDomain domain) {
     queue(domain).remove(map(domain).get(tag));
   }
 
@@ -114,7 +116,7 @@ public void advanceProcessingTime(
    * @param domain The time domain that the tag is being fired on.
    */
   protected void fire(
-      TriggerExecutor<?, ?, ?, ?> triggerExecutor, String timerTag, TimeDomain domain)
+      TriggerExecutor<?, ?, ?, ?> triggerExecutor, StateNamespace timerTag, TimeDomain domain)
           throws Exception {
     triggerExecutor.onTimer(timerTag);
   }
@@ -124,7 +126,7 @@ private void advance(
           throws Exception {
 
     PriorityQueue<BatchTimer> timers = queue(domain);
-    Map<String, BatchTimer> map = map(domain);
+    Map<StateNamespace, BatchTimer> map = map(domain);
     boolean shouldFire = false;
 
     do {
@@ -147,10 +149,10 @@ private void advance(
    */
   private static class BatchTimer implements Comparable<BatchTimer> {
 
-    final String tag;
+    final StateNamespace tag;
     final Instant time;
 
-    public BatchTimer(String tag, Instant time) {
+    public BatchTimer(StateNamespace tag, Instant time) {
       this.tag = tag;
       this.time = time;
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 75988f6c8d81b..f41f760d634f5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -24,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.state.MergeableState;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
@@ -110,7 +111,10 @@ public void processElement(ProcessContext c) throws Exception {
       initForKey(c, key);
 
       if (c.element().isTimer()) {
-        executor.onTimer(c.element().tag());
+        Coder<W> windowCoder = windowingStrategy.getWindowFn().windowCoder();
+        String tag = c.element().tag();
+        executor.onTimer(
+            StateNamespaces.fromString(tag.substring(0, tag.length() - 1), windowCoder));
       } else {
         InputT value = c.element().element().getValue();
         executor.onElement(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index d61979fe6a63c..34c5e22805b74 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
 import com.google.cloud.dataflow.sdk.util.state.WindmillStateInternals;
 import com.google.cloud.dataflow.sdk.util.state.WindmillStateReader;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -97,21 +98,21 @@ public ExecutionContext.StepContext createStepContext(String stepName) {
   public TimerManager getTimerManager() {
     return new TimerManager() {
       @Override
-      public void setTimer(String timer, Instant timestamp, TimeDomain domain) {
+      public void setTimer(StateNamespace timer, Instant timestamp, TimeDomain domain) {
         long timestampMicros = TimeUnit.MILLISECONDS.toMicros(timestamp.getMillis());
         outputBuilder.addOutputTimers(
             Windmill.Timer.newBuilder()
             .setTimestamp(timestampMicros)
-            .setTag(ByteString.copyFromUtf8(timer))
+            .setTag(ByteString.copyFromUtf8(timer.stringKey() + "+"))
             .setType(timerType(domain))
             .build());
       }
 
       @Override
-      public void deleteTimer(String timer, TimeDomain domain) {
+      public void deleteTimer(StateNamespace timer, TimeDomain domain) {
         outputBuilder.addOutputTimers(
             Windmill.Timer.newBuilder()
-            .setTag(ByteString.copyFromUtf8(timer))
+            .setTag(ByteString.copyFromUtf8(timer.stringKey() + "+"))
             .setType(timerType(domain))
             .build());
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java
index 3b3665a60ee76..8b233d75c3bde 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+
 import org.joda.time.Instant;
 
 /**
@@ -56,12 +58,12 @@ public enum TimeDomain {
    * timestamp.  Timers are identified by their name, and can be moved
    * by calling {@code setTimer} again, or deleted with {@link #deleteTimer}.
    */
-  void setTimer(String timer, Instant timestamp, TimeDomain domain);
+  void setTimer(StateNamespace timer, Instant timestamp, TimeDomain domain);
 
   /**
    * Deletes the given timer.
    */
-  void deleteTimer(String timer, TimeDomain domain);
+  void deleteTimer(StateNamespace timer, TimeDomain domain);
 
   /**
    * Returns the current timestamp in the {@link TimeDomain#PROCESSING_TIME} time domain.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
index 29b49b4638ef4..c923cf1e6084c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
@@ -20,8 +20,6 @@
 import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.StandardCoder;
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
@@ -29,7 +27,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.ActiveWindowSet.MergeCallback;
@@ -41,6 +38,8 @@
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces.WindowAndTriggerNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces.WindowNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.cloud.dataflow.sdk.util.state.ValueState;
@@ -84,8 +83,6 @@ public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
   public static final String DROPPED_DUE_TO_CLOSED_WINDOW = "DroppedDueToClosedWindow";
   public static final String DROPPED_DUE_TO_LATENESS_COUNTER = "DroppedDueToLateness";
 
-  private static final int FINAL_CLEANUP_PSEUDO_ID = -1;
-
   private final WindowFn<Object, W> windowFn;
   private final ExecutableTrigger<W> rootTrigger;
   private final AccumulationMode mode;
@@ -93,7 +90,6 @@ public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
 
   private final WindowingInternals<?, KV<K, OutputT>> windowingInternals;
   private final TimerManager timerManager;
-  private final Coder<TriggerId<W>> triggerIdCoder;
   private final ActiveWindowSet<W> activeWindows;
   private final StateInternals stateInternals;
 
@@ -128,7 +124,6 @@ public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
     this.watermarkHolder = new WatermarkHold<W>(allowedLateness);
     this.timerManager = timerManager;
     this.mode = mode;
-    this.triggerIdCoder = new TriggerIdCoder<>(windowFn.windowCoder());
     this.stateInternals = windowingInternals.stateInternals();
   }
 
@@ -197,10 +192,6 @@ private void warmUpCache(Iterable<W> windows) {
     }
   }
 
-  private TriggerId<W> cleanupTimer(W window) {
-    return new TriggerId<W>(window, FINAL_CLEANUP_PSEUDO_ID);
-  }
-
   private Trigger<W>.OnElementContext onElementContext(
       BitSet finishedSet, W window, WindowedValue<InputT> value) {
     TriggerContextImpl delegate = new TriggerContextImpl(finishedSet, rootTrigger, window);
@@ -258,23 +249,25 @@ public void onElement(WindowedValue<InputT> value) throws Exception {
     }
   }
 
-  private void scheduleCleanup(W window) throws CoderException {
+  private void scheduleCleanup(W window) {
     // Set the timer for final cleanup. We add an extra millisecond since
     // maxTimestamp will be the maximum timestamp in the window, and we
     // want the maximum timestamp of an element outside the window.
     Instant cleanupTime = window.maxTimestamp()
         .plus(allowedLateness)
         .plus(Duration.millis(1));
-    setTimer(cleanupTimer(window), cleanupTime, TimeDomain.EVENT_TIME);
+    timerManager.setTimer(windowNamespace(window), cleanupTime, TimeDomain.EVENT_TIME);
   }
 
-  public void onTimer(String timerTag) throws Exception {
-    TriggerId<W> triggerId = CoderUtils.decodeFromBase64(triggerIdCoder, timerTag);
-    W window = triggerId.window();
-    BitSet finishedSet = lookupFinishedSet(window);
-    Context context = context(window);
+  public void onTimer(StateNamespace timerTag) throws Exception {
+    if (timerTag instanceof WindowNamespace) {
+      // The only timer set in the window namespace is the GC timer. Do garbage collection.
+      @SuppressWarnings("unchecked")
+      W window = ((WindowNamespace<W>) timerTag).getWindow();
+
+      BitSet finishedSet = lookupFinishedSet(window);
+      Context context = context(window);
 
-    if (triggerId.getTriggerIdx() == FINAL_CLEANUP_PSEUDO_ID) {
       // TODO: Create appropriate Pane here.
       PaneInfo pane = PaneInfo.createPaneInternal();
       if (mergeIfAppropriate(window)) {
@@ -286,29 +279,36 @@ public void onTimer(String timerTag) throws Exception {
       activeWindows.remove(window);
       rootTrigger.invokeClear(new TriggerContextImpl(finishedSet, rootTrigger, window));
       stateInternals.state(windowNamespace(window), FINISHED_BITS_TAG).clear();
-      return;
-    }
+    } else if (timerTag instanceof WindowAndTriggerNamespace) {
+      @SuppressWarnings("unchecked")
+      W window = ((WindowAndTriggerNamespace<W>) timerTag).getWindow();
+      int triggerIndex = ((WindowAndTriggerNamespace<?>) timerTag).getTriggerIndex();
 
-    // If we receive a timer for an already finished trigger tree, we can ignore it. Once the
-    // trigger is finished, it has reached a terminal state, and the trigger shouldn't be allowed
-    // to do anything.
-    if (isRootFinished(finishedSet)) {
-      // TODO: Add logging for this case since it means we failed to clean up the timer.
-      return;
-    }
+      BitSet finishedSet = lookupFinishedSet(window);
 
-    // Attempt to merge windows before continuing; that may remove the current window from
-    // consideration.
-    if (mergeIfAppropriate(window)) {
-      BitSet originalFinishedSet = (BitSet) finishedSet.clone();
-      TriggerResult result =
-          rootTrigger.invokeTimer(onTimerContext(finishedSet, window, triggerId));
-      handleResult(rootTrigger, window, originalFinishedSet, finishedSet, result);
+      // If we receive a timer for an already finished trigger tree, we can ignore it. Once the
+      // trigger is finished, it has reached a terminal state, and the trigger shouldn't be allowed
+      // to do anything.
+      if (isRootFinished(finishedSet)) {
+        // TODO: Add logging for this case since it means we failed to clean up the timer.
+        return;
+      }
+
+      // Attempt to merge windows before continuing; that may remove the current window from
+      // consideration.
+      if (mergeIfAppropriate(window)) {
+        BitSet originalFinishedSet = (BitSet) finishedSet.clone();
+        TriggerResult result =
+            rootTrigger.invokeTimer(onTimerContext(finishedSet, window, triggerIndex));
+        handleResult(rootTrigger, window, originalFinishedSet, finishedSet, result);
+      }
+    } else {
+      throw new IllegalArgumentException("Unexpected timer tag: " + timerTag);
     }
   }
 
   private Trigger<W>.OnTimerContext onTimerContext(
-      BitSet finishedSet, W window, TriggerId<W> triggerId) {
+      BitSet finishedSet, W window, int triggerId) {
     TriggerContextImpl delegate = new TriggerContextImpl(finishedSet, rootTrigger, window);
     return new OnTimerContextImpl(delegate, triggerId);
   }
@@ -351,8 +351,9 @@ private void onMerge(Collection<W> toBeMerged, W resultWindow) throws Exception
       if (!resultWindow.equals(windowBeingMerged)) {
         rootTrigger.invokeClear(new TriggerContextImpl(
             lookupFinishedSet(windowBeingMerged), rootTrigger, windowBeingMerged));
-        stateInternals.state(windowNamespace(windowBeingMerged), FINISHED_BITS_TAG).clear();
-        deleteTimer(cleanupTimer(windowBeingMerged), TimeDomain.EVENT_TIME);
+        StateNamespace namespaceBeingMerged = windowNamespace(windowBeingMerged);
+        stateInternals.state(namespaceBeingMerged, FINISHED_BITS_TAG).clear();
+        timerManager.deleteTimer(namespaceBeingMerged, TimeDomain.EVENT_TIME);
       }
     }
   }
@@ -423,16 +424,6 @@ private void emitWindow(Context context, PaneInfo pane) throws Exception {
     }
   }
 
-  @VisibleForTesting void setTimer(TriggerId<W> triggerId, Instant timestamp, TimeDomain domain)
-      throws CoderException {
-    timerManager.setTimer(CoderUtils.encodeToBase64(triggerIdCoder, triggerId), timestamp, domain);
-  }
-
-  @VisibleForTesting void deleteTimer(
-      TriggerId<W> triggerId, TimeDomain domain) throws CoderException {
-    timerManager.deleteTimer(CoderUtils.encodeToBase64(triggerIdCoder, triggerId), domain);
-  }
-
   private StateNamespace windowNamespace(W window) {
     return StateNamespaces.window(windowFn.windowCoder(), window);
   }
@@ -485,18 +476,14 @@ private TriggerContextImpl(
           windowFn.windowCoder(), window, trigger.getTriggerIndex());
     }
 
-    private TriggerId<W> triggerId(W window) {
-      return new TriggerId<>(window, trigger.getTriggerIndex());
-    }
-
     @Override
     public void setTimer(Instant timestamp, TimeDomain domain) throws IOException {
-      TriggerExecutor.this.setTimer(triggerId(window), timestamp, domain);
+      timerManager.setTimer(namespace, timestamp, domain);
     }
 
     @Override
     public void deleteTimer(TimeDomain domain) throws IOException {
-      TriggerExecutor.this.deleteTimer(triggerId(window), domain);
+      timerManager.deleteTimer(namespace, domain);
     }
 
     @Override
@@ -794,14 +781,15 @@ public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
   private class OnTimerContextImpl extends Trigger<W>.OnTimerContext {
 
     private final TriggerContextImpl delegate;
-    public OnTimerContextImpl(TriggerContextImpl delegate, TriggerId<W> triggerId) {
+
+    public OnTimerContextImpl(TriggerContextImpl delegate, int triggerId) {
       delegate.trigger.getSpec().super(triggerId);
       this.delegate = delegate;
     }
 
     @Override
     public Trigger<W>.OnTimerContext forTrigger(ExecutableTrigger<W> trigger) {
-      return new OnTimerContextImpl(delegate.forTrigger(trigger), destinationId);
+      return new OnTimerContextImpl(delegate.forTrigger(trigger), destinationIndex);
     }
 
     @Override
@@ -831,12 +819,12 @@ public ExecutableTrigger<W> subTrigger(int subtriggerIndex) {
 
     @Override
     public boolean isDestination() {
-      return delegate.trigger.getTriggerIndex() == destinationId.getTriggerIdx();
+      return delegate.trigger.getTriggerIndex() == destinationIndex;
     }
 
     @Override
     public ExecutableTrigger<W> nextStepTowardsDestination() {
-      return delegate.trigger.getSubTriggerContaining(destinationId.getTriggerIdx());
+      return delegate.trigger.getSubTriggerContaining(destinationIndex);
     }
 
     @Override
@@ -886,45 +874,6 @@ public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
     }
   }
 
-  /**
-   * Coder for Trigger IDs.
-   */
-  public static class TriggerIdCoder<W extends BoundedWindow> extends StandardCoder<TriggerId<W>> {
-
-    private static final long serialVersionUID = 1L;
-
-    private final Coder<W> windowCoder;
-    private transient Coder<Integer> triggerIdxCoder = VarIntCoder.of();
-
-    public TriggerIdCoder(Coder<W> windowCoder) {
-      this.windowCoder = windowCoder;
-    }
-
-    @Override
-    public void encode(TriggerId<W> triggerId, OutputStream outStream, Context context)
-        throws CoderException, IOException {
-      windowCoder.encode(triggerId.window(), outStream, context);
-      triggerIdxCoder.encode(triggerId.getTriggerIdx(), outStream, context);
-    }
-
-    @Override
-    public TriggerId<W> decode(InputStream inStream, Context context)
-        throws CoderException, IOException {
-      W window = windowCoder.decode(inStream, context);
-      Integer triggerIdx = triggerIdxCoder.decode(inStream, context);
-      return new TriggerId<>(window, triggerIdx);
-    }
-
-    @Override
-    public void verifyDeterministic() throws Coder.NonDeterministicException {
-      verifyDeterministic("TriggerIdCoder requires a deterministic windowCoder", windowCoder);
-    }
-
-    @Override
-    public List<? extends Coder<?>> getCoderArguments() {
-      return Arrays.asList(windowCoder);
-    }
-  }
   /**
    * Coder for the BitSet used to track child-trigger finished states.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
index 4c0f9359cff9b..256b4847a9ab9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
@@ -29,6 +29,10 @@ public interface StateNamespace {
    *
    * <p> This will encode the actual namespace as a {@code String}. It is
    * preferable to use the {@code StateNamespace} object when possible.
+   *
+   * <p> The string produced by the standard implementations will not contain a '+' character. This
+   * enables adding a '+' between the actual namespace and other information, if needed, to separate
+   * the two.
    */
   String stringKey();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
index 8a1d1d9bcfed3..3c1e87303c1c4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
@@ -19,7 +19,9 @@
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.common.base.Splitter;
 
+import java.util.List;
 import java.util.Objects;
 
 /**
@@ -53,10 +55,11 @@ private StateNamespaces() {}
    */
   public static class GlobalNamespace implements StateNamespace {
 
+    private static final String GLOBAL_STRING = "/";
+
     @Override
     public String stringKey() {
-      // + and / will never be produced by CoderUtils.encodeToBase64
-      return "+global+";
+      return GLOBAL_STRING;
     }
 
     @Override
@@ -80,6 +83,8 @@ public String toString() {
    */
   public static class WindowNamespace<W extends BoundedWindow> implements StateNamespace {
 
+    private static final String WINDOW_FORMAT = "/%s/";
+
     private Coder<W> windowCoder;
     private W window;
 
@@ -91,12 +96,16 @@ private WindowNamespace(Coder<W> windowCoder, W window) {
     @Override
     public String stringKey() {
       try {
-        return CoderUtils.encodeToBase64(windowCoder, window);
+        return String.format(WINDOW_FORMAT, CoderUtils.encodeToBase64(windowCoder, window));
       } catch (CoderException e) {
         throw new RuntimeException("Unable to generate string key from window " + window, e);
       }
     }
 
+    public W getWindow() {
+      return window;
+    }
+
     @Override
     public boolean equals(Object obj) {
       if (obj == this) {
@@ -128,20 +137,35 @@ public String toString() {
   public static class WindowAndTriggerNamespace<W extends BoundedWindow>
       implements StateNamespace {
 
+    private static final String WINDOW_AND_TRIGGER_FORMAT = "/%s/%s/";
+
+    private static final int TRIGGER_RADIX = 36;
     private Coder<W> windowCoder;
     private W window;
-    private int triggerIdx;
+    private int triggerIndex;
 
-    private WindowAndTriggerNamespace(Coder<W> windowCoder, W window, int triggerIdx) {
+    private WindowAndTriggerNamespace(Coder<W> windowCoder, W window, int triggerIndex) {
       this.windowCoder = windowCoder;
       this.window = window;
-      this.triggerIdx = triggerIdx;
+      this.triggerIndex = triggerIndex;
+    }
+
+    public W getWindow() {
+      return window;
+    }
+
+    public int getTriggerIndex() {
+      return triggerIndex;
     }
 
     @Override
     public String stringKey() {
       try {
-        return CoderUtils.encodeToBase64(windowCoder, window) + "/" + triggerIdx;
+        return String.format(WINDOW_AND_TRIGGER_FORMAT,
+            CoderUtils.encodeToBase64(windowCoder, window),
+            // Use base 36 so that can address 36 triggers in a single byte and still be human
+            // readable.
+            Integer.toString(triggerIndex, TRIGGER_RADIX).toUpperCase());
       } catch (CoderException e) {
         throw new RuntimeException("Unable to generate string key from window " + window, e);
       }
@@ -158,18 +182,57 @@ public boolean equals(Object obj) {
       }
 
       WindowAndTriggerNamespace<?> that = (WindowAndTriggerNamespace<?>) obj;
-      return this.triggerIdx == that.triggerIdx
+      return this.triggerIndex == that.triggerIndex
           && Objects.equals(this.window, that.window);
     }
 
     @Override
     public int hashCode() {
-      return Objects.hash(Namespace.WINDOW_AND_TRIGGER, window, triggerIdx);
+      return Objects.hash(Namespace.WINDOW_AND_TRIGGER, window, triggerIndex);
     }
 
     @Override
     public String toString() {
-      return "WindowAndTrigger(" + window + "," + triggerIdx + ")";
+      return "WindowAndTrigger(" + window + "," + triggerIndex + ")";
+    }
+  }
+
+  private static final Splitter SLASH_SPLITTER = Splitter.on('/');
+
+  /**
+   * Convert a {@code stringKey} produced using {@link StateNamespace#stringKey}
+   * on one of the namespaces produced by this class into the original
+   * {@link StateNamespace}.
+   */
+  public static <W extends BoundedWindow> StateNamespace fromString(
+      String stringKey, Coder<W> windowCoder) {
+    if (!stringKey.startsWith("/") || !stringKey.endsWith("/")) {
+      throw new RuntimeException("Invalid namespace string: '" + stringKey + "'");
+    }
+
+    if (GlobalNamespace.GLOBAL_STRING.equals(stringKey)) {
+      return global();
+    }
+
+    List<String> parts = SLASH_SPLITTER.splitToList(stringKey);
+    if (parts.size() != 3 && parts.size() != 4) {
+      throw new RuntimeException("Invalid namespace string: '" + stringKey + "'");
+    }
+    // Ends should be empty (we start and end with /)
+    if (!parts.get(0).isEmpty() || !parts.get(parts.size() - 1).isEmpty()) {
+      throw new RuntimeException("Invalid namespace string: '" + stringKey + "'");
+    }
+
+    try {
+      W window = CoderUtils.decodeFromBase64(windowCoder, parts.get(1));
+      if (parts.size() > 3) {
+        int index = Integer.parseInt(parts.get(2), WindowAndTriggerNamespace.TRIGGER_RADIX);
+        return windowAndTrigger(windowCoder, window, index);
+      } else {
+        return window(windowCoder, window);
+      }
+    } catch (Exception  e) {
+      throw new RuntimeException("Invalid namespace string: '" + stringKey + "'", e);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
index 2ceb8b1444a87..4e374622a6911 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
@@ -107,7 +107,11 @@ public void persist(final Windmill.WorkItemCommitRequest.Builder commitBuilder)
 
   private ByteString encodeKey(StateNamespace namespace, StateTag<?> address) {
     return ByteString.copyFromUtf8(String.format(
-        "%s/%s/%s", prefix, namespace.stringKey(), address.getId()));
+        // stringKey starts and ends with a slash. We don't need to seperate it from prefix, because
+        // the prefix is guaranteed to be unique and non-overlapping. We separate it from the
+        // StateTag ID by a '+' (which is guaranteed not to be in the stringKey) because the
+        // ID comes from the user.
+        "%s%s+%s", prefix, namespace.stringKey(), address.getId()));
   }
 
   private interface WindmillState {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index afb74cf3f8bf8..f27dc22dd3981 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -679,13 +679,12 @@ public void testMergeWindows() throws Exception {
     Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
 
     // These tags and data are opaque strings and this is a change detector test.
-    String window = "gAAAAAAAA-joBw";
-    ByteString timer1Tag = ByteString.copyFromUtf8("gAAAAAAAA-joB_____8P");
-    ByteString timer2Tag = ByteString.copyFromUtf8(window + "A");
-    ByteString timer3Tag = ByteString.copyFromUtf8(window + "AAAAA");
-    ByteString bufferTag = ByteString.copyFromUtf8("MergeWindows/" + window + "/__buffer");
+    String window = "/gAAAAAAAA-joBw/";
+    ByteString timer1Tag = ByteString.copyFromUtf8(window + "+");   // GC timer just has window
+    ByteString timer2Tag = ByteString.copyFromUtf8(window + "0/+"); // Trigger has index as well
+    ByteString bufferTag = ByteString.copyFromUtf8("MergeWindows" + window + "+__buffer");
     ByteString watermarkHoldTag =
-        ByteString.copyFromUtf8("MergeWindows/" + window + "/watermark_hold");
+        ByteString.copyFromUtf8("MergeWindows" + window + "+watermark_hold");
     ByteString bufferData = ByteString.copyFromUtf8("\000data0");
     ByteString outputData = ByteString.copyFromUtf8("\\377\\377\\377\\377\\001\\005data0\\000");
     // These values are not essential to the change detector test
@@ -694,6 +693,7 @@ public void testMergeWindows() throws Exception {
 
     WorkItemCommitRequest actualOutput = result.get(0L);
 
+    // Set timer1 and timer2
     assertThat(actualOutput.getOutputTimersList(), Matchers.containsInAnyOrder(
         Matchers.equalTo(Windmill.Timer.newBuilder()
             .setTag(timer1Tag)
@@ -721,6 +721,7 @@ public void testMergeWindows() throws Exception {
             .build())));
 
     Windmill.GetWorkResponse.Builder getWorkResponse = Windmill.GetWorkResponse.newBuilder();
+    // Timer2 has an earlier timestamp, so fire that first.
     getWorkResponse.addWorkBuilder()
         .setComputationId(DEFAULT_COMPUTATION_ID)
         .setInputDataWatermark(0)
@@ -728,7 +729,7 @@ public void testMergeWindows() throws Exception {
         .setKey(ByteString.copyFromUtf8(DEFAULT_KEY_STRING))
         .setWorkToken(1)
         .getTimersBuilder().addTimersBuilder()
-        .setTag(timer3Tag)
+        .setTag(timer2Tag)
         .setTimestamp(timer2Timestamp);
     server.addWorkToOffer(getWorkResponse.build());
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 9cfe996f3afdf..45380c71edff5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -31,9 +31,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
-import com.google.cloud.dataflow.sdk.util.TriggerExecutor.TriggerIdCoder;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
@@ -87,14 +86,16 @@ public TimerManager getTimerManager() {
     assertEquals(0, result.size());
   }
 
+  private <W extends BoundedWindow> String timerString(Coder<W> windowCoder, W window) {
+    return StateNamespaces.windowAndTrigger(windowCoder, window, 0).stringKey() + "+";
+  }
+
   @Test public void testFixedWindows() throws Exception {
     DoFnRunner<TimerOrElement<KV<String, String>>,
         KV<String, Iterable<String>>, List> runner =
         makeRunner(WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
     Coder<IntervalWindow> windowCoder = FixedWindows.of(Duration.millis(10)).windowCoder();
-    Coder<TriggerId<IntervalWindow>> triggerIdCoder =
-        new TriggerIdCoder<IntervalWindow>(windowCoder);
 
     runner.startBundle();
     when(mockTimerManager.currentWatermarkTime()).thenReturn(new Instant(0));
@@ -125,12 +126,12 @@ public TimerManager getTimerManager() {
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(0, 10), 0)),
+            timerString(windowCoder, window(0, 10)),
             new Instant(9), "k")));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(10, 20), 0)),
+            timerString(windowCoder, window(10, 20)),
             new Instant(19), "k")));
 
     runner.finishBundle();
@@ -161,8 +162,6 @@ public TimerManager getTimerManager() {
 
     Coder<IntervalWindow> windowCoder =
         SlidingWindows.of(Duration.millis(10)).every(Duration.millis(10)).windowCoder();
-    Coder<TriggerId<IntervalWindow>> triggerIdCoder =
-        new TriggerIdCoder<IntervalWindow>(windowCoder);
 
     runner.startBundle();
     when(mockTimerManager.currentWatermarkTime()).thenReturn(new Instant(0));
@@ -181,7 +180,7 @@ public TimerManager getTimerManager() {
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(-10, 10), 0)),
+            timerString(windowCoder, window(-10, 10)),
             new Instant(9), "k")));
 
     runner.processElement(WindowedValue.of(
@@ -192,12 +191,12 @@ public TimerManager getTimerManager() {
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(0, 20), 0)),
+            timerString(windowCoder, window(0, 20)),
             new Instant(19), "k")));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(10, 30), 0)),
+            timerString(windowCoder, window(10, 30)),
             new Instant(29), "k")));
 
     runner.finishBundle();
@@ -233,9 +232,6 @@ public TimerManager getTimerManager() {
 
     Coder<IntervalWindow> windowCoder =
         Sessions.withGapDuration(Duration.millis(10)).windowCoder();
-    Coder<TriggerId<IntervalWindow>> triggerIdCoder =
-        new TriggerIdCoder<IntervalWindow>(windowCoder);
-
     runner.startBundle();
     when(mockTimerManager.currentWatermarkTime()).thenReturn(new Instant(0));
 
@@ -265,17 +261,17 @@ public TimerManager getTimerManager() {
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(0, 10), 0)),
+            timerString(windowCoder, window(0, 10)),
             new Instant(9), "k")));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(0, 15), 0)),
+            timerString(windowCoder, window(0, 15)),
             new Instant(14), "k")));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(15, 25), 0)),
+            timerString(windowCoder, window(15, 25)),
             new Instant(24), "k")));
 
     runner.finishBundle();
@@ -339,8 +335,6 @@ public Long extractOutput(Long accumulator) {
 
     Coder<IntervalWindow> windowCoder =
         Sessions.withGapDuration(Duration.millis(10)).windowCoder();
-    Coder<TriggerId<IntervalWindow>> triggerIdCoder =
-        new TriggerIdCoder<IntervalWindow>(windowCoder);
 
     runner.startBundle();
     when(mockTimerManager.currentWatermarkTime()).thenReturn(new Instant(0));
@@ -373,17 +367,17 @@ public Long extractOutput(Long accumulator) {
     // and fire them as appropriate. This would essentially be the batch timer context.
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, Long>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(0, 10), 0)),
+            timerString(windowCoder, window(0, 10)),
             new Instant(9), "k")));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, Long>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(0, 15), 0)),
+            timerString(windowCoder, window(0, 15)),
             new Instant(14), "k")));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, Long>>timer(
-            CoderUtils.encodeToBase64(triggerIdCoder, new TriggerId<>(window(15, 25), 0)),
+            timerString(windowCoder, window(15, 25)),
             new Instant(24), "k")));
 
     runner.finishBundle();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index f0a4c37841286..b712fb35dca90 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -21,7 +21,6 @@
 import static org.junit.Assert.fail;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
@@ -34,7 +33,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerId;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
@@ -89,7 +87,7 @@ public class TriggerTester<InputT, OutputT, W extends BoundedWindow> {
   private Instant watermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
   private Instant processingTime = BoundedWindow.TIMESTAMP_MIN_VALUE;
 
-  private final BatchTimerManager timerManager = new LoggingBatchTimerManager(processingTime);
+  private final BatchTimerManager timerManager = new BatchTimerManager(processingTime);
   private final TriggerExecutor<String, InputT, OutputT, W> triggerExecutor;
   private final WindowFn<Object, W> windowFn;
   private final StubContexts stubContexts;
@@ -273,10 +271,10 @@ public void doMerge() throws Exception {
   }
 
   public void setTimer(
-      W window, Instant timestamp, TimeDomain domain, ExecutableTrigger<W> trigger)
-          throws CoderException {
-    triggerExecutor.setTimer(
-        new TriggerId<W>(window, trigger.getTriggerIndex()), timestamp, domain);
+      W window, Instant timestamp, TimeDomain domain, ExecutableTrigger<W> trigger) {
+    timerManager.setTimer(
+        StateNamespaces.windowAndTrigger(windowFn.windowCoder(), window, trigger.getTriggerIndex()),
+        timestamp, domain);
   }
 
   private static class TestingInMemoryStateInternals extends InMemoryStateInternals {
@@ -356,33 +354,6 @@ public StateInternals stateInternals() {
     }
   }
 
-  private class LoggingBatchTimerManager extends BatchTimerManager {
-
-    public LoggingBatchTimerManager(Instant processingTime) {
-      super(processingTime);
-    }
-
-    @Override
-    public void setTimer(String tag, Instant timestamp, TimeDomain domain) {
-      logInteraction("Setting timer '%s' for time %d in domain %s",
-          tag, timestamp.getMillis(), domain);
-      super.setTimer(tag, timestamp, domain);
-    }
-
-    @Override
-    public void deleteTimer(String tag, TimeDomain domain) {
-      logInteraction("Delete timer '%s' in domain %s", tag, domain);
-      super.deleteTimer(tag, domain);
-    }
-
-    @Override
-    protected void fire(TriggerExecutor<?, ?, ?, ?> triggerExecutor,
-        String tag, TimeDomain domain) throws Exception {
-      logInteraction("Firing timer '%s' in domain %s", tag, domain);
-      super.fire(triggerExecutor, tag, domain);
-    }
-  }
-
   private static class StubAssignContext<W extends BoundedWindow>
       extends WindowFn<Object, W>.AssignContext {
     private Object element;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateNamespacesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateNamespacesTest.java
new file mode 100644
index 0000000000000..933383db40fdf
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateNamespacesTest.java
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link StateNamespaces}.
+ */
+@RunWith(JUnit4.class)
+public class StateNamespacesTest {
+
+  private final Coder<IntervalWindow> intervalCoder = IntervalWindow.getCoder();
+
+  private IntervalWindow intervalWindow(long start, long end) {
+    return new IntervalWindow(new Instant(start), new Instant(end));
+  }
+
+  /**
+   * This test should not be changed. It verifies that the stringKey matches certain expectations.
+   * If this changes, the ability to reload any pipeline that has persisted these namespaces will
+   * be impacted.
+   */
+  @Test
+  public void testStability() {
+    StateNamespace global = StateNamespaces.global();
+    StateNamespace intervalWindow =
+        StateNamespaces.window(intervalCoder, intervalWindow(1000, 87392));
+    StateNamespace intervalWindowAndTrigger =
+        StateNamespaces.windowAndTrigger(intervalCoder, intervalWindow(1000, 87392), 57);
+    StateNamespace globalWindow = StateNamespaces.window(
+        GlobalWindow.Coder.INSTANCE, GlobalWindow.INSTANCE);
+    StateNamespace globalWindowAndTrigger = StateNamespaces.windowAndTrigger(
+        GlobalWindow.Coder.INSTANCE, GlobalWindow.INSTANCE, 12);
+
+    assertEquals("/", global.stringKey());
+    assertEquals("/gAAAAAABVWD4ogU/", intervalWindow.stringKey());
+    assertEquals("/gAAAAAABVWD4ogU/1L/", intervalWindowAndTrigger.stringKey());
+    assertEquals("//", globalWindow.stringKey());
+    assertEquals("//C/", globalWindowAndTrigger.stringKey());
+  }
+
+  /**
+   * Test that WindowAndTrigger namespaces are prefixed by the related Window namespace.
+   */
+  @Test
+  public void testIntervalWindowPrefixing() {
+    StateNamespace window =
+        StateNamespaces.window(intervalCoder, intervalWindow(1000, 87392));
+    StateNamespace windowAndTrigger = StateNamespaces.windowAndTrigger(
+        intervalCoder, intervalWindow(1000, 87392), 57);
+    assertThat(windowAndTrigger.stringKey(), Matchers.startsWith(window.stringKey()));
+    assertThat(StateNamespaces.global().stringKey(),
+        Matchers.not(Matchers.startsWith(window.stringKey())));
+  }
+
+  /**
+   * Test that WindowAndTrigger namespaces are prefixed by the related Window namespace.
+   */
+  @Test
+  public void testGlobalWindowPrefixing() {
+    StateNamespace window =
+        StateNamespaces.window(GlobalWindow.Coder.INSTANCE, GlobalWindow.INSTANCE);
+    StateNamespace windowAndTrigger = StateNamespaces.windowAndTrigger(
+        GlobalWindow.Coder.INSTANCE, GlobalWindow.INSTANCE, 57);
+    assertThat(windowAndTrigger.stringKey(), Matchers.startsWith(window.stringKey()));
+    assertThat(StateNamespaces.global().stringKey(),
+        Matchers.not(Matchers.startsWith(window.stringKey())));
+  }
+
+  @Test
+  public void testFromStringGlobal() {
+    assertStringKeyRoundTrips(intervalCoder, StateNamespaces.global());
+  }
+
+  @Test
+  public void testFromStringIntervalWindow() {
+    assertStringKeyRoundTrips(
+        intervalCoder, StateNamespaces.window(intervalCoder, intervalWindow(1000, 8000)));
+    assertStringKeyRoundTrips(
+        intervalCoder, StateNamespaces.window(intervalCoder, intervalWindow(1000, 8000)));
+
+    assertStringKeyRoundTrips(intervalCoder,
+        StateNamespaces.windowAndTrigger(intervalCoder, intervalWindow(1000, 8000), 18));
+    assertStringKeyRoundTrips(intervalCoder,
+        StateNamespaces.windowAndTrigger(intervalCoder, intervalWindow(1000, 8000), 19));
+    assertStringKeyRoundTrips(intervalCoder,
+        StateNamespaces.windowAndTrigger(intervalCoder, intervalWindow(2000, 8000), 19));
+  }
+
+  @Test
+  public void testFromStringGlobalWindow() {
+    assertStringKeyRoundTrips(GlobalWindow.Coder.INSTANCE, StateNamespaces.global());
+    assertStringKeyRoundTrips(GlobalWindow.Coder.INSTANCE,
+        StateNamespaces.window(GlobalWindow.Coder.INSTANCE, GlobalWindow.INSTANCE));
+    assertStringKeyRoundTrips(GlobalWindow.Coder.INSTANCE,
+        StateNamespaces.windowAndTrigger(GlobalWindow.Coder.INSTANCE, GlobalWindow.INSTANCE, 18));
+  }
+
+  private void assertStringKeyRoundTrips(
+      Coder<? extends BoundedWindow> coder, StateNamespace namespace) {
+    assertEquals(namespace, StateNamespaces.fromString(namespace.stringKey(), coder));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
index 4189288248d8c..3d2eac2088251 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
@@ -64,7 +64,7 @@ public class WindmillStateInternalsTest {
   private WindmillStateInternals underTest;
 
   private ByteString key(StateNamespace namespace, String addrId) {
-    return ByteString.copyFromUtf8(MANGLED_PREFIX + "/" + namespace.stringKey() + "/" + addrId);
+    return ByteString.copyFromUtf8(MANGLED_PREFIX + namespace.stringKey() + "+" + addrId);
   }
 
   @Before

From 33a4a4990922f7b0a261144c953af0760e502010 Mon Sep 17 00:00:00 2001
From: xzli <xzli@google.com>
Date: Wed, 15 Jul 2015 17:28:36 -0700
Subject: [PATCH 0757/1541] Minor fixes of documentation.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98360912
---
 .../google/cloud/dataflow/sdk/io/BigQueryIO.java   | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index e3a8007ccdac0..a67439fd3a88f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -132,9 +132,7 @@
  * }</pre>
  *
  * <p> When creating a BigQuery input transform, users should provide either a query or a table.
- * Pipeline will fail with a validation error in following cases.
- * (1) Both a query and a table are provided
- * (2) Neither a query or a table are provided
+ * Pipeline construction will fail with a validation error if neither or both are specified.
  *
  * <p><h3>Writing</h3>
  * To write to a BigQuery table, apply a {@link BigQueryIO.Write} transformation.
@@ -172,11 +170,11 @@
  *       .apply(BigQueryIO.Write
  *         .named("Write")
  *         .withSchema(schema)
- *       .to(new SerializableFunction<BoundedWindow, String>() {
- *             public String apply(BoundedWindow window) {
- *               return "my-project:output.output_table-" + window.toString();
- *             }
- *           }));
+ *         .to(new SerializableFunction<BoundedWindow, String>() {
+ *               public String apply(BoundedWindow window) {
+ *                 return "my-project:output.output_table-" + window.toString();
+ *               }
+ *             }));
  *
  * }</pre>
  *

From a51f6bf613288dfc6237f682847e447736f008a7 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 15 Jul 2015 18:34:50 -0700
Subject: [PATCH 0758/1541] Add a test for aligned and delayed AfterWatermark

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98365371
---
 .../windowing/AfterWatermarkTest.java         | 32 +++++++++++++++++++
 .../StreamingModeExecutionContextTest.java    | 25 +++++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
index a1a20797624bc..81cc0bc687198 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
@@ -67,6 +67,38 @@ public void testFirstInPaneWithFixedWindow() throws Exception {
         new IntervalWindow(new Instant(0), new Instant(10)));
   }
 
+  @Test
+  public void testAlignAndDelay() throws Exception {
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        FixedWindows.of(Duration.standardMinutes(1)),
+        AfterWatermark.<IntervalWindow>pastEndOfWindow()
+            .alignedTo(Duration.standardMinutes(1))
+            .plusDelayOf(Duration.standardMinutes(5)),
+        AccumulationMode.DISCARDING_FIRED_PANES,
+
+        // Don't drop right away at the end of the window, since we have a delay.
+        Duration.standardMinutes(10));
+
+    Instant zero = new Instant(0);
+
+    // first in window [0, 1m), timer set for 6m
+    tester.injectElement(1, zero.plus(Duration.standardSeconds(1)));
+    tester.injectElement(2, zero.plus(Duration.standardSeconds(5)));
+    tester.injectElement(3, zero.plus(Duration.standardSeconds(55)));
+
+    // Advance almost to 6m, but not quite. No output should be produced.
+    tester.advanceWatermark(zero.plus(Duration.standardMinutes(6)).minus(1));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+
+    // Advance to 6m and see our output
+    tester.advanceWatermark(zero.plus(Duration.standardMinutes(6)));
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(
+            Matchers.containsInAnyOrder(1, 2, 3),
+            zero.plus(Duration.standardSeconds(1)).getMillis(),
+            zero.getMillis(), zero.plus(Duration.standardMinutes(1)).getMillis())));
+  }
+
   @Test
   public void testFirstInPaneWithMerging() throws Exception {
     Duration windowDuration = Duration.millis(10);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
index 55e43487ba8bb..837ed4b38133d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
@@ -16,15 +16,20 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting;
 import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting.ConstantViewFn;
+import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaceForTest;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
+import org.joda.time.Instant;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -33,6 +38,7 @@
 import org.mockito.MockitoAnnotations;
 
 import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
 
 /**
  * Tests for {@link StreamingModeExecutionContext}.
@@ -53,6 +59,25 @@ private static TupleTag<Iterable<WindowedValue<String>>> newStringTag() {
     return new TupleTag<>();
   }
 
+  @Test
+  public void testTimerManagerSetTimer() {
+    StreamingModeExecutionContext executionContext =
+        new StreamingModeExecutionContext(stateFetcher, null, null);
+
+    Windmill.WorkItemCommitRequest.Builder outputBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    executionContext.start(null,  null,  null,  outputBuilder);
+    TimerManager timerManager = executionContext.getTimerManager();
+
+    timerManager.setTimer(
+        new StateNamespaceForTest("key"), new Instant(5000), TimeDomain.EVENT_TIME);
+
+    Windmill.Timer timer = outputBuilder.buildPartial().getOutputTimers(0);
+    assertEquals("key+", timer.getTag().toStringUtf8());
+    assertEquals(TimeUnit.MILLISECONDS.toMicros(5000), timer.getTimestamp());
+    assertEquals(Windmill.Timer.Type.WATERMARK, timer.getType());
+  }
+
   /**
    * Tests that the {@link SideInputReader} returned by the {@link StreamingModeExecutionContext}
    * contains the expected views when they are deserialized, as occurs on the

From 5e329be8d35f6824a53dfa12e3b811cd6162219e Mon Sep 17 00:00:00 2001
From: relax <relax@google.com>
Date: Thu, 16 Jul 2015 09:07:12 -0700
Subject: [PATCH 0759/1541] Fix typo in comment

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98406374
---
 .../cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java
index 6547efbc9ff61..d4045068f4431 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java
@@ -32,7 +32,7 @@
  * be treated as equal by {@code equals()} and {@code hashCode()}.
  */
 public abstract class BoundedWindow {
-  // The min and max timestmaps that won't overflow when they are converted to
+  // The min and max timestamps that won't overflow when they are converted to
   // usec.
   public static final Instant TIMESTAMP_MIN_VALUE =
       new Instant(TimeUnit.MICROSECONDS.toMillis(Long.MIN_VALUE));

From 6cc1ae1b4ca588765a5a4f4642ed95ffe76c6db6 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Thu, 16 Jul 2015 17:40:26 -0700
Subject: [PATCH 0760/1541] Separate trigger execution from the actual
 reduction logic

Moved creation of the various context classes into dedicated factories,
to cleanup the actual runner logic.

Pulled more of the TriggerContext into separate interfaces, simplifying
the implementation of the sharde parts.

----Release Notes----

None

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98452781
---
 .../sdk/transforms/windowing/AfterAll.java    |   6 +-
 .../sdk/transforms/windowing/AfterEach.java   |  12 +-
 .../sdk/transforms/windowing/AfterFirst.java  |   4 +-
 .../sdk/transforms/windowing/AfterPane.java   |   8 +-
 .../windowing/AfterProcessingTime.java        |  16 +-
 .../AfterSynchronizedProcessingTime.java      |  16 +-
 .../transforms/windowing/AfterWatermark.java  |  22 +-
 .../transforms/windowing/DefaultTrigger.java  |   6 +-
 .../windowing/OrFinallyTrigger.java           |  10 +-
 .../sdk/transforms/windowing/Repeatedly.java  |  16 +-
 .../sdk/transforms/windowing/Trigger.java     | 264 +++--
 .../dataflow/sdk/util/BatchTimerManager.java  |  21 +-
 .../cloud/dataflow/sdk/util/BitSetCoder.java  |  61 ++
 .../dataflow/sdk/util/ExecutableTrigger.java  |  10 +-
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  53 +-
 .../cloud/dataflow/sdk/util/ReduceFn.java     | 159 +++
 .../sdk/util/ReduceFnContextFactory.java      | 351 +++++++
 .../dataflow/sdk/util/ReduceFnRunner.java     | 466 +++++++++
 .../util/StreamingGroupAlsoByWindowsDoFn.java |  58 +-
 .../dataflow/sdk/util/SystemReduceFn.java     | 105 ++
 .../sdk/util/TriggerContextFactory.java       | 452 +++++++++
 .../dataflow/sdk/util/TriggerExecutor.java    | 912 ------------------
 .../dataflow/sdk/util/TriggerRunner.java      | 213 ++++
 .../dataflow/sdk/util/WatermarkHold.java      |  39 +-
 .../util/state/InMemoryStateInternals.java    |  30 +
 .../sdk/util/state/MergeableState.java        |   5 +
 .../dataflow/sdk/util/state/MergedBag.java    |  22 +
 .../sdk/util/state/MergedCombiningValue.java  |  22 +
 .../state/MergedWatermarkBagInternal.java     |  22 +
 .../sdk/util/state/StateNamespaces.java       |   8 +-
 .../util/state/WindmillStateInternals.java    |  59 +-
 .../sdk/util/state/WindmillStateReader.java   |  16 +-
 .../transforms/windowing/AfterAllTest.java    |   5 +-
 .../windowing/OrFinallyTriggerTest.java       |  11 +-
 .../sdk/util/TriggerExecutorTest.java         |   1 +
 .../dataflow/sdk/util/TriggerTester.java      |  37 +-
 .../state/InMemoryStateInternalsTest.java     |  43 +-
 .../state/WindmillStateInternalsTest.java     | 113 +++
 38 files changed, 2406 insertions(+), 1268 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BitSetCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
index 9c41c78efbc4b..3715b506cb52b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
@@ -50,7 +50,7 @@ public static <W extends BoundedWindow> OnceTrigger<W> of(
 
   private TriggerResult wrapResult(TriggerContext c) {
     // If all children have finished, then they must have each fired at least once.
-    if (c.areAllSubtriggersFinished()) {
+    if (c.trigger().areAllSubtriggersFinished()) {
       return TriggerResult.FIRE_AND_FINISH;
     }
 
@@ -59,7 +59,7 @@ private TriggerResult wrapResult(TriggerContext c) {
 
   @Override
   public TriggerResult onElement(OnElementContext c) throws Exception {
-    for (ExecutableTrigger<W> subTrigger : c.unfinishedSubTriggers()) {
+    for (ExecutableTrigger<W> subTrigger : c.trigger().unfinishedSubTriggers()) {
       // Since subTriggers are all OnceTriggers, they must either CONTINUE or FIRE_AND_FINISH.
       // invokeElement will automatically mark the finish bit if they return FIRE_AND_FINISH.
       subTrigger.invokeElement(c);
@@ -75,7 +75,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
     //   *and* FIRE, FIRE_AND_FINISH, or FINISH for all other sub-triggers.
     // FINISH if merging returns FINISH for all sub-triggers.
     boolean fired = false;
-    for (ExecutableTrigger<W> subTrigger : c.subTriggers()) {
+    for (ExecutableTrigger<W> subTrigger : c.trigger().subTriggers()) {
       MergeResult result = subTrigger.invokeMerge(c);
       if (MergeResult.CONTINUE.equals(result)) {
         return MergeResult.CONTINUE;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index 5b0648d62e5b9..506e4cb64ecbf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -63,7 +63,8 @@ public static <W extends BoundedWindow> Trigger<W> inOrder(Trigger<W>... trigger
   private TriggerResult wrapResult(TriggerContext c, TriggerResult subResult)
       throws Exception {
     if (subResult.isFire()) {
-      return c.areAllSubtriggersFinished() ? TriggerResult.FIRE_AND_FINISH : TriggerResult.FIRE;
+      return c.trigger().areAllSubtriggersFinished()
+          ? TriggerResult.FIRE_AND_FINISH : TriggerResult.FIRE;
     } else {
       return TriggerResult.CONTINUE;
     }
@@ -73,14 +74,14 @@ private TriggerResult wrapResult(TriggerContext c, TriggerResult subResult)
   public TriggerResult onElement(OnElementContext c) throws Exception {
     // If all the sub-triggers have finished, we should have already finished, so we know there is
     // at least one unfinished trigger.
-    ExecutableTrigger<W> subTrigger = c.firstUnfinishedSubTrigger();
+    ExecutableTrigger<W> subTrigger = c.trigger().firstUnfinishedSubTrigger();
     return wrapResult(c, subTrigger.invokeElement(c));
   }
 
   @Override
   public MergeResult onMerge(OnMergeContext c) throws Exception {
     // Iterate over the sub-triggers to identify the "current" sub-trigger.
-    Iterator<ExecutableTrigger<W>> iterator = c.subTriggers().iterator();
+    Iterator<ExecutableTrigger<W>> iterator = c.trigger().subTriggers().iterator();
     while (iterator.hasNext()) {
       ExecutableTrigger<W> subTrigger = iterator.next();
 
@@ -94,7 +95,8 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
         return MergeResult.FIRE;
       } else if (MergeResult.FIRE_AND_FINISH.equals(mergeResult)) {
         resetRemaining(c, iterator);
-        return c.areAllSubtriggersFinished() ? MergeResult.FIRE_AND_FINISH : MergeResult.FIRE;
+        return c.trigger().areAllSubtriggersFinished()
+            ? MergeResult.FIRE_AND_FINISH : MergeResult.FIRE;
       }
     }
 
@@ -110,7 +112,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
   private void resetRemaining(
       TriggerContext c, Iterator<ExecutableTrigger<W>> triggers) throws Exception {
     while (triggers.hasNext()) {
-      c.forTrigger(triggers.next()).resetTree();
+      c.forTrigger(triggers.next()).trigger().resetTree();
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
index 762c749a6631c..45a3291f7eb65 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
@@ -51,7 +51,7 @@ public static <W extends BoundedWindow> OnceTrigger<W> of(
 
   @Override
   public TriggerResult onElement(OnElementContext c) throws Exception {
-    for (ExecutableTrigger<W> subTrigger : c.subTriggers()) {
+    for (ExecutableTrigger<W> subTrigger : c.trigger().subTriggers()) {
       if (subTrigger.invokeElement(c).isFire()) {
         return TriggerResult.FIRE_AND_FINISH;
       }
@@ -66,7 +66,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
     // FIRE_AND_FINISH if merging returns FIRE or FIRE_AND_FINISH for at least one sub-trigger.
     // CONTINUE otherwise
     boolean fired = false;
-    for (ExecutableTrigger<W> subTrigger : c.subTriggers()) {
+    for (ExecutableTrigger<W> subTrigger : c.trigger().subTriggers()) {
       MergeResult mergeResult = subTrigger.invokeMerge(c);
       if (MergeResult.ALREADY_FINISHED.equals(mergeResult)) {
         return MergeResult.ALREADY_FINISHED;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index 3019395b7afab..f83f44de7c83c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -60,7 +60,7 @@ public static <W extends BoundedWindow> AfterPane<W> elementCountAtLeast(int cou
 
   @Override
   public TriggerResult onElement(OnElementContext c) throws Exception {
-    CombiningValueState<Long, Long> elementsInPane = c.access(ELEMENTS_IN_PANE_TAG);
+    CombiningValueState<Long, Long> elementsInPane = c.state().access(ELEMENTS_IN_PANE_TAG);
     StateContents<Long> countContents = elementsInPane.get();
     elementsInPane.add(1L);
 
@@ -74,13 +74,13 @@ public TriggerResult onElement(OnElementContext c) throws Exception {
   public MergeResult onMerge(OnMergeContext c) throws Exception {
     // If we've already received enough elements and finished in some window, then this trigger
     // is just finished.
-    if (c.finishedInAnyMergingWindow()) {
+    if (c.trigger().finishedInAnyMergingWindow()) {
       return MergeResult.ALREADY_FINISHED;
     }
 
     // Otherwise, compute the sum of elements in all the active panes
     CombiningValueState<Long, Long> elementsInPane =
-        c.accessAcrossMergingWindows(ELEMENTS_IN_PANE_TAG);
+        c.state().accessAcrossMergingWindows(ELEMENTS_IN_PANE_TAG);
     long count = elementsInPane.get().read();
     return count >= countElems ? MergeResult.FIRE_AND_FINISH : MergeResult.CONTINUE;
   }
@@ -92,7 +92,7 @@ public TriggerResult onTimer(OnTimerContext c) {
 
   @Override
   public void clear(TriggerContext c) throws Exception {
-    c.access(ELEMENTS_IN_PANE_TAG).clear();
+    c.state().access(ELEMENTS_IN_PANE_TAG).clear();
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 47a93a7be0274..bc830f2a857e7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -66,11 +66,11 @@ protected AfterProcessingTime<W> newWith(
   @Override
   public TriggerResult onElement(OnElementContext c)
       throws Exception {
-    CombiningValueState<Instant, Instant> delayUntilState = c.access(DELAYED_UNTIL_TAG);
+    CombiningValueState<Instant, Instant> delayUntilState = c.state().access(DELAYED_UNTIL_TAG);
     Instant delayUntil = delayUntilState.get().read();
     if (delayUntil == null) {
-      delayUntil = computeTargetTimestamp(c.currentProcessingTime());
-      c.setTimer(delayUntil, TimeDomain.PROCESSING_TIME);
+      delayUntil = computeTargetTimestamp(c.timers().currentProcessingTime());
+      c.timers().setTimer(delayUntil, TimeDomain.PROCESSING_TIME);
       delayUntilState.add(delayUntil);
     }
 
@@ -82,18 +82,18 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
     // If the processing time timer has fired in any of the windows being merged, it would have
     // fired at the same point if it had been added to the merged window. So, we just report it as
     // finished.
-    if (c.finishedInAnyMergingWindow()) {
+    if (c.trigger().finishedInAnyMergingWindow()) {
       return MergeResult.ALREADY_FINISHED;
     }
 
     // Determine the earliest point across all the windows, and delay to that.
     CombiningValueState<Instant, Instant> mergingDelays =
-        c.accessAcrossMergingWindows(DELAYED_UNTIL_TAG);
+        c.state().accessAcrossMergingWindows(DELAYED_UNTIL_TAG);
     Instant earliestTimer = mergingDelays.get().read();
     if (earliestTimer != null) {
       mergingDelays.clear();
       mergingDelays.add(earliestTimer);
-      c.setTimer(earliestTimer, TimeDomain.PROCESSING_TIME);
+      c.timers().setTimer(earliestTimer, TimeDomain.PROCESSING_TIME);
     }
 
     return MergeResult.CONTINUE;
@@ -106,8 +106,8 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
 
   @Override
   public void clear(TriggerContext c) throws Exception {
-    c.access(DELAYED_UNTIL_TAG).clear();
-    c.deleteTimer(TimeDomain.PROCESSING_TIME);
+    c.state().access(DELAYED_UNTIL_TAG).clear();
+    c.timers().deleteTimer(TimeDomain.PROCESSING_TIME);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
index 2bff7b40e0c9a..1af4d6babf910 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
@@ -42,11 +42,11 @@ public AfterSynchronizedProcessingTime() {
   @Override
   public TriggerResult onElement(OnElementContext c)
       throws Exception {
-    CombiningValueState<Instant, Instant> delayUntilState = c.access(DELAYED_UNTIL_TAG);
+    CombiningValueState<Instant, Instant> delayUntilState = c.state().access(DELAYED_UNTIL_TAG);
     Instant delayUntil = delayUntilState.get().read();
     if (delayUntil == null) {
-      delayUntil = c.currentProcessingTime();
-      c.setTimer(delayUntil, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+      delayUntil = c.timers().currentProcessingTime();
+      c.timers().setTimer(delayUntil, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
       delayUntilState.add(delayUntil);
     }
 
@@ -58,18 +58,18 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
     // If the processing time timer has fired in any of the windows being merged, it would have
     // fired at the same point if it had been added to the merged window. So, we just report it as
     // finished.
-    if (c.finishedInAnyMergingWindow()) {
+    if (c.trigger().finishedInAnyMergingWindow()) {
       return MergeResult.ALREADY_FINISHED;
     }
 
     // Otherwise, determine the earliest delay for all of the windows, and delay to that point.
     CombiningValueState<Instant, Instant> mergingDelays =
-        c.accessAcrossMergingWindows(DELAYED_UNTIL_TAG);
+        c.state().accessAcrossMergingWindows(DELAYED_UNTIL_TAG);
     Instant earliestTimer = mergingDelays.get().read();
     if (earliestTimer != null) {
       mergingDelays.clear();
       mergingDelays.add(earliestTimer);
-      c.setTimer(earliestTimer, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+      c.timers().setTimer(earliestTimer, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
     }
 
     return MergeResult.CONTINUE;
@@ -82,8 +82,8 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
 
   @Override
   public void clear(TriggerContext c) throws Exception {
-    c.access(DELAYED_UNTIL_TAG).clear();
-    c.deleteTimer(TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+    c.state().access(DELAYED_UNTIL_TAG).clear();
+    c.timers().deleteTimer(TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index eb67a65500766..00f78f64be47c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -96,11 +96,11 @@ private FromFirstElementInPane(
 
     @Override
     public TriggerResult onElement(OnElementContext c) throws Exception {
-      CombiningValueState<Instant, Instant> delayUntilState = c.access(DELAYED_UNTIL_TAG);
+      CombiningValueState<Instant, Instant> delayUntilState = c.state().access(DELAYED_UNTIL_TAG);
       Instant delayUntil = delayUntilState.get().read();
       if (delayUntil == null) {
         delayUntil = computeTargetTimestamp(c.eventTimestamp());
-        c.setTimer(delayUntil, TimeDomain.EVENT_TIME);
+        c.timers().setTimer(delayUntil, TimeDomain.EVENT_TIME);
         delayUntilState.add(delayUntil);
       }
 
@@ -112,7 +112,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
       // If the watermark time timer has fired in any of the windows being merged, it would have
       // fired at the same point if it had been added to the merged window. So, we just record it as
       // finished.
-      if (c.finishedInAnyMergingWindow()) {
+      if (c.trigger().finishedInAnyMergingWindow()) {
         return MergeResult.ALREADY_FINISHED;
       }
 
@@ -121,12 +121,12 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
       // of this first element in each pane).
       // Determine the earliest point across all the windows, and delay to that.
       CombiningValueState<Instant, Instant> mergingDelays =
-          c.accessAcrossMergingWindows(DELAYED_UNTIL_TAG);
+          c.state().accessAcrossMergingWindows(DELAYED_UNTIL_TAG);
       Instant earliestTimer = mergingDelays.get().read();
       if (earliestTimer != null) {
         mergingDelays.clear();
         mergingDelays.add(earliestTimer);
-        c.setTimer(earliestTimer, TimeDomain.EVENT_TIME);
+        c.timers().setTimer(earliestTimer, TimeDomain.EVENT_TIME);
       }
 
       return MergeResult.CONTINUE;
@@ -139,8 +139,8 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
 
     @Override
     public void clear(TriggerContext c) throws Exception {
-      c.access(DELAYED_UNTIL_TAG).clear();
-      c.deleteTimer(TimeDomain.EVENT_TIME);
+      c.state().access(DELAYED_UNTIL_TAG).clear();
+      c.timers().deleteTimer(TimeDomain.EVENT_TIME);
     }
 
     @Override
@@ -193,7 +193,7 @@ private FromEndOfWindow(
 
     @Override
     public TriggerResult onElement(OnElementContext c) throws Exception {
-      c.setTimer(computeTargetTimestamp(c.window().maxTimestamp()), TimeDomain.EVENT_TIME);
+      c.timers().setTimer(computeTargetTimestamp(c.window().maxTimestamp()), TimeDomain.EVENT_TIME);
       return TriggerResult.CONTINUE;
     }
 
@@ -202,14 +202,14 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
       // If the watermark was past the end of a window that is past the end of the new window,
       // then the watermark must also be past the end of this window. What's more, we've already
       // fired some elements for that trigger firing, so we report FINISHED (without firing).
-      for (W finishedWindow : c.getFinishedMergingWindows()) {
+      for (W finishedWindow : c.trigger().getFinishedMergingWindows()) {
         if (finishedWindow.maxTimestamp().isAfter(c.window().maxTimestamp())) {
           return MergeResult.ALREADY_FINISHED;
         }
       }
 
       // Otherwise, set a timer for this window, and return.
-      c.setTimer(computeTargetTimestamp(c.window().maxTimestamp()), TimeDomain.EVENT_TIME);
+      c.timers().setTimer(computeTargetTimestamp(c.window().maxTimestamp()), TimeDomain.EVENT_TIME);
       return MergeResult.CONTINUE;
     }
 
@@ -220,7 +220,7 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
 
     @Override
     public void clear(TriggerContext c) throws Exception {
-      c.deleteTimer(TimeDomain.EVENT_TIME);
+      c.timers().deleteTimer(TimeDomain.EVENT_TIME);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
index d13de3321e763..845764fd65af2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
@@ -47,13 +47,13 @@ public static <W extends BoundedWindow> DefaultTrigger<W> of() {
 
   @Override
   public TriggerResult onElement(OnElementContext c) throws Exception {
-    c.setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
+    c.timers().setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
     return TriggerResult.CONTINUE;
   }
 
   @Override
   public MergeResult onMerge(OnMergeContext c) throws Exception {
-    c.setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
+    c.timers().setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
     return MergeResult.CONTINUE;
   }
 
@@ -64,7 +64,7 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
 
   @Override
   public void clear(TriggerContext c) throws Exception {
-    c.deleteTimer(TimeDomain.EVENT_TIME);
+    c.timers().deleteTimer(TimeDomain.EVENT_TIME);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java
index d6deb95ad25cc..1af055ff717e1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java
@@ -39,24 +39,24 @@ class OrFinallyTrigger<W extends BoundedWindow> extends Trigger<W> {
 
   @Override
   public Trigger.TriggerResult onElement(OnElementContext c) throws Exception {
-    Trigger.TriggerResult untilResult = c.subTrigger(UNTIL).invokeElement(c);
+    Trigger.TriggerResult untilResult = c.trigger().subTrigger(UNTIL).invokeElement(c);
     if (untilResult != TriggerResult.CONTINUE) {
       return TriggerResult.FIRE_AND_FINISH;
     }
 
-    return c.subTrigger(ACTUAL).invokeElement(c);
+    return c.trigger().subTrigger(ACTUAL).invokeElement(c);
   }
 
   @Override
   public Trigger.MergeResult onMerge(OnMergeContext c) throws Exception {
-    Trigger.MergeResult untilResult = c.subTrigger(UNTIL).invokeMerge(c);
+    Trigger.MergeResult untilResult = c.trigger().subTrigger(UNTIL).invokeMerge(c);
     if (untilResult == MergeResult.ALREADY_FINISHED) {
       return MergeResult.ALREADY_FINISHED;
     } else if (untilResult.isFire()) {
       return MergeResult.FIRE_AND_FINISH;
     } else {
       // was CONTINUE -- so merge the underlying trigger
-      return c.subTrigger(ACTUAL).invokeMerge(c);
+      return c.trigger().subTrigger(ACTUAL).invokeMerge(c);
     }
   }
 
@@ -68,7 +68,7 @@ public Trigger.TriggerResult onTimer(OnTimerContext c) throws Exception {
 
     ExecutableTrigger<W> destination = c.nextStepTowardsDestination();
     Trigger.TriggerResult result = destination.invokeTimer(c);
-    if (destination == c.subTrigger(UNTIL) && result.isFire()) {
+    if (destination == c.trigger().subTrigger(UNTIL) && result.isFire()) {
       return TriggerResult.FIRE_AND_FINISH;
     }
     return result;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
index acbbac2e0a371..84297375bd98f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
@@ -61,27 +61,31 @@ private Repeatedly(Trigger<W> repeated) {
   @Override
   public TriggerResult onElement(OnElementContext c)
       throws Exception {
-    TriggerResult result = c.subTrigger(REPEATED).invokeElement(c);
+    TriggerResult result = c.trigger().subTrigger(REPEATED).invokeElement(c);
     if (result.isFinish()) {
-      c.forTrigger(c.subTrigger(REPEATED)).resetTree();
+      c.forTrigger(c.trigger().subTrigger(REPEATED)).trigger().resetTree();
     }
     return result.isFire() ? TriggerResult.FIRE : TriggerResult.CONTINUE;
   }
 
   @Override
   public MergeResult onMerge(OnMergeContext c) throws Exception {
-    MergeResult mergeResult = c.subTrigger(REPEATED).invokeMerge(c);
+    MergeResult mergeResult = c.trigger().subTrigger(REPEATED).invokeMerge(c);
     if (mergeResult.isFinish()) {
-      c.forTrigger(c.subTrigger(REPEATED)).resetTree();
+      c.forTrigger(c.trigger().subTrigger(REPEATED)).trigger().resetTree();
     }
     return mergeResult.isFire() ? MergeResult.FIRE : MergeResult.CONTINUE;
   }
 
   @Override
   public TriggerResult onTimer(OnTimerContext c) throws Exception {
-    TriggerResult result = c.subTrigger(REPEATED).invokeTimer(c);
+    if (c.isDestination()) {
+      throw new RuntimeException("Repeatedly shouldn't receive timers.");
+    }
+
+    TriggerResult result = c.trigger().subTrigger(REPEATED).invokeTimer(c);
     if (result.isFinish()) {
-      c.forTrigger(c.subTrigger(REPEATED)).resetTree();
+      c.forTrigger(c.trigger().subTrigger(REPEATED)).trigger().resetTree();
     }
     return result.isFire() ? TriggerResult.FIRE : TriggerResult.CONTINUE;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 5304985e5f36b..b2fd2b540d55d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -18,10 +18,9 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
+import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
+import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.state.MergeableState;
-import com.google.cloud.dataflow.sdk.util.state.State;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.common.base.Joiner;
 
 import org.joda.time.Instant;
@@ -29,9 +28,7 @@
 import java.io.IOException;
 import java.io.Serializable;
 import java.util.ArrayList;
-import java.util.BitSet;
 import java.util.List;
-import java.util.Map;
 import java.util.Objects;
 
 import javax.annotation.Nullable;
@@ -168,184 +165,181 @@ public TriggerResult getTriggerResult() {
   }
 
   /**
-   * Information accessible to all of the callbacks that are executed on a {@link Trigger}.
+   * Interface for accessing information about the trigger being executed and other triggers in the
+   * same tree.
    */
-  public abstract class TriggerContext {
-
-    /**
-     * Sets a timer to fire when the watermark or processing time is beyond the given timestamp.
-     * Timers are not gauranteed to fire immediately, but will be delivered at some time afterwards.
-     *
-     * <p>Each trigger can have a single timer in per {@code timeDomain} and {@code window}. If the
-     * trigger has already set a timer for a given domain and window, then setting overwrites it.
-     *
-     * @param timestamp the time at which the trigger’s {@link Trigger#onTimer} callback should
-     *        execute
-     * @param timeDomain the domain that the {@code timestamp} applies to
-     */
-    public abstract void setTimer(Instant timestamp, TimeDomain timeDomain) throws IOException;
-
-    /**
-     * Removes the timer set in this trigger context for the given {@code window} and
-     * {@code timeDomain}.
-     */
-    public abstract void deleteTimer(TimeDomain timeDomain) throws IOException;
-
-    /**
-     * Returns the current processing time.
-     */
-    public abstract Instant currentProcessingTime();
-
-    /**
-     * Access the storage for the given {@code address} in the current window.
-     */
-    public abstract <StorageT extends State> StorageT access(StateTag<StorageT> address);
-
-    /**
-     * Access the storage for the given {@code address} in the all of the windows that
-     * merged into the current window.
-     */
-    public abstract <StorageT extends MergeableState<?, ?>> StorageT accessAcrossMergedWindows(
-        StateTag<StorageT> address);
-
-    /**
-     * Create a {@code TriggerContext} for executing the given trigger.
-     */
-    public abstract TriggerContext forTrigger(ExecutableTrigger<W> trigger);
+  public interface TriggerInfo<W extends BoundedWindow> {
 
     /**
      * Access the executable versions of the sub-triggers of the current trigger.
      */
-    public abstract Iterable<ExecutableTrigger<W>> subTriggers();
+    Iterable<ExecutableTrigger<W>> subTriggers();
 
     /**
      * Access the executable version of the specified sub-trigger.
      */
-    public abstract ExecutableTrigger<W> subTrigger(int subtriggerIndex);
+    ExecutableTrigger<W> subTrigger(int subtriggerIndex);
 
     /**
      * Returns true if the current trigger is marked finished.
      */
-    public abstract boolean isFinished();
+    boolean isFinished();
 
     /**
      * Returns true if all the sub-triggers of the current trigger are marked finished.
      */
-    public abstract boolean areAllSubtriggersFinished();
+    boolean areAllSubtriggersFinished();
 
     /**
      * Returns an iterable over the unfinished sub-triggers of the current trigger.
      */
-    public abstract Iterable<ExecutableTrigger<W>> unfinishedSubTriggers();
+    Iterable<ExecutableTrigger<W>> unfinishedSubTriggers();
 
     /**
      * Returns the first unfinished sub-trigger.
      */
-    public abstract ExecutableTrigger<W> firstUnfinishedSubTrigger();
+    ExecutableTrigger<W> firstUnfinishedSubTrigger();
 
     /**
      * Clears all keyed state for triggers in the current sub-tree and unsets all the associated
      * finished bits.
      */
-    public abstract void resetTree() throws Exception;
+    void resetTree() throws Exception;
 
     /**
      * Sets the finished bit for the current trigger.
      */
-    public abstract void setFinished(boolean finished);
-
-    /**
-     * The window that the current context is executing in.
-     */
-    public abstract W window();
+    void setFinished(boolean finished);
   }
 
-  @Nullable
-  protected final List<Trigger<W>> subTriggers;
+  /**
+   * Interact with properties of the trigger being executed, with extensions to deal with the
+   * merging windows.
+   */
+  public interface MergingTriggerInfo<W extends BoundedWindow> extends TriggerInfo<W> {
 
-  protected Trigger(@Nullable List<Trigger<W>> subTriggers) {
-    this.subTriggers = subTriggers;
+    /** Return true if the trigger is finished in any window being merged. */
+    public abstract boolean finishedInAnyMergingWindow();
+
+    /** Return the merging windows in which the trigger is finished. */
+    public abstract Iterable<W> getFinishedMergingWindows();
   }
 
   /**
-   * Details about an invocation of {@link Trigger#onElement}.
+   * Interface for interacting with time.
    */
-  public abstract class OnElementContext extends TriggerContext {
-    private final Object value;
-    private final Instant timestamp;
-
-    public OnElementContext(Object value, Instant timestamp) {
-      this.value = value;
-      this.timestamp = timestamp;
-    }
-
+  public interface Timers {
     /**
-     * The element being handled by this call to {@link Trigger#onElement}.
+     * Sets a timer to fire when the watermark or processing time is beyond the given timestamp.
+     * Timers are not gauranteed to fire immediately, but will be delivered at some time afterwards.
+     *
+     * <p>Each trigger can have a single timer in per {@code timeDomain} and {@code window}. If the
+     * trigger has already set a timer for a given domain and window, then setting overwrites it.
+     *
+     * @param timestamp the time at which the trigger’s {@link Trigger#onTimer} callback should
+     *        execute
+     * @param timeDomain the domain that the {@code timestamp} applies to
      */
-    public Object element() {
-      return value;
-    }
+    public abstract void setTimer(Instant timestamp, TimeDomain timeDomain) throws IOException;
 
     /**
-     * The event timestamp of the element being processed.
+     * Removes the timer set in this trigger context for the given {@code window} and
+     * {@code timeDomain}.
      */
-    public Instant eventTimestamp() {
-      return timestamp;
-    }
+    public abstract void deleteTimer(TimeDomain timeDomain) throws IOException;
 
-    /**
-     * Create an {@code OnElementContext} for executing the given trigger.
-     */
-    @Override
-    public abstract OnElementContext forTrigger(ExecutableTrigger<W> trigger);
+    /** Returns the current processing time. */
+    public abstract Instant currentProcessingTime();
   }
 
   /**
-   * Called immediately after an element is first incorporated into a window.
-   *
-   * @param c the context to interact with
+   * Information accessible to all of the callbacks that are executed on a {@link Trigger}.
    */
-  public abstract TriggerResult onElement(OnElementContext c) throws Exception;
+  public abstract class TriggerContext {
+
+    /** Returns the interface for accessing trigger info. */
+    public abstract TriggerInfo<W> trigger();
+
+    /** Returns the interface for accessing persistent state. */
+    public abstract StateContext state();
+
+    /** The window that the current context is executing in. */
+    public abstract W window();
+
+    /** Returns the interface for accessing timers. */
+    public abstract Timers timers();
+
+    /** Create a sub-context for the given sub-trigger. */
+    public abstract TriggerContext forTrigger(ExecutableTrigger<W> trigger);
+  }
+
+  /**
+   * Details about an invocation of {@link Trigger#onElement}.
+   */
+  public abstract class OnElementContext extends TriggerContext {
+    /** The element being handled by this call to {@link Trigger#onElement}. */
+    public abstract Object element();
+
+    /** The event timestamp of the element currently being processed. */
+    public abstract Instant eventTimestamp();
+
+    /** Create an {@code OnElementContext} for executing the given trigger. */
+    @Override
+    public abstract OnElementContext forTrigger(ExecutableTrigger<W> trigger);
+  }
 
   /**
    * Details about an invocation of {@link Trigger#onMerge}.
    */
   public abstract class OnMergeContext extends TriggerContext {
-    private final Iterable<W> oldWindows;
-    protected final Map<W, BitSet> finishedSets;
+    /** The old windows that were merged. */
+    public abstract Iterable<W> oldWindows();
 
-    public OnMergeContext(Iterable<W> oldWindows, Map<W, BitSet> finishedSets) {
-      this.oldWindows = oldWindows;
-      this.finishedSets = finishedSets;
-    }
+    /** Create an {@code OnMergeContext} for executing the given trigger. */
+    @Override
+    public abstract OnMergeContext forTrigger(ExecutableTrigger<W> trigger);
 
-    /**
-     * The old windows that were merged.
-     */
-    public Iterable<W> oldWindows() {
-      return oldWindows;
-    }
+    @Override
+    public abstract MergingStateContext state();
 
-    /** Return true if the trigger is finished in any window being merged. */
-    public abstract boolean finishedInAnyMergingWindow();
+    @Override
+    public abstract MergingTriggerInfo<W> trigger();
+  }
 
-    /** Return the merging windows in which the trigger is finished. */
-    public abstract Iterable<W> getFinishedMergingWindows();
+  /**
+   * Details about an invocation of {@link Trigger#onTimer}.
+   */
+  public abstract class OnTimerContext extends TriggerContext {
 
     /**
-     * Create an {@code OnMergeContext} for executing the given trigger.
+     * Returns the sub-trigger of the current trigger that is the next step towards the trigger
+     * that set the timer that is being processed.
      */
-    @Override
-    public abstract OnMergeContext forTrigger(ExecutableTrigger<W> trigger);
+    public abstract ExecutableTrigger<W> nextStepTowardsDestination();
+
+    /** Returns true if the timer corresponds to the current trigger. */
+    public abstract boolean isDestination();
 
     /**
-     * Access a merged view of the storage for the given {@code address}
-     * in all of the windows being merged.
+     * Create an {@code OnTimerContext} for executing the given trigger.
      */
-    public abstract <StorageT extends MergeableState<?, ?>> StorageT accessAcrossMergingWindows(
-        StateTag<StorageT> address);
+    @Override
+    public abstract OnTimerContext forTrigger(ExecutableTrigger<W> trigger);
   }
 
+  @Nullable
+  protected final List<Trigger<W>> subTriggers;
+
+  protected Trigger(@Nullable List<Trigger<W>> subTriggers) {
+    this.subTriggers = subTriggers;
+  }
+
+
+  /**
+   * Called immediately after an element is first incorporated into a window.
+   */
+  public abstract TriggerResult onElement(OnElementContext c) throws Exception;
+
   /**
    * Called immediately after windows have been merged.
    *
@@ -357,47 +351,11 @@ public Iterable<W> oldWindows() {
    * in at least one of the windows being merged.
    *
    * <p>The implementation does not need to clear out any state associated with the old windows.
-   *
-   * @param c the context to interact with
    */
   public abstract MergeResult onMerge(OnMergeContext c) throws Exception;
 
-  /**
-   * Details about an invocation of {@link Trigger#onTimer}.
-   */
-  public abstract class OnTimerContext extends TriggerContext {
-
-    protected final int destinationIndex;
-
-    public OnTimerContext(int destinationIndex) {
-      this.destinationIndex = destinationIndex;
-    }
-
-    public int getDestinationIndex() {
-      return destinationIndex;
-    }
-
-    /**
-     * Returns the sub-trigger of the current trigger that is the next step towards the destination.
-     */
-    public abstract ExecutableTrigger<W> nextStepTowardsDestination();
-
-    /**
-     * Returns true if the given trigger index corresponds to the current trigger.
-     */
-    public abstract boolean isDestination();
-
-    /**
-     * Create an {@code OnTimerContext} for executing the given trigger.
-     */
-    @Override
-    public abstract OnTimerContext forTrigger(ExecutableTrigger<W> trigger);
-  }
-
   /**
    * Called when a timer has fired for the trigger or one of its sub-triggers.
-   *
-   * @param c the context to interact with
    */
   public abstract TriggerResult onTimer(OnTimerContext c) throws Exception;
 
@@ -407,12 +365,10 @@ public int getDestinationIndex() {
    * <p>This is called after a trigger has indicated it will never fire again. The trigger system
    * keeps enough information to know that the trigger is finished, so this trigger should clear all
    * of its state.
-   *
-   * @param c the context to interact with
    */
   public void clear(TriggerContext c) throws Exception {
     if (subTriggers != null) {
-      for (ExecutableTrigger<W> trigger : c.subTriggers()) {
+      for (ExecutableTrigger<W> trigger : c.trigger().subTriggers()) {
         trigger.invokeClear(c);
       }
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
index 700a1a97b3a45..89e97f80457c5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
@@ -100,15 +100,13 @@ public String toString() {
     return builder.toString();
   }
 
-  public void advanceWatermark(TriggerExecutor<?, ?, ?, ?> triggerExecutor, Instant newWatermark)
-      throws Exception {
-    advance(triggerExecutor, newWatermark, TimeDomain.EVENT_TIME);
+  public void advanceWatermark(ReduceFnRunner<?, ?, ?, ?> runner, Instant newWatermark) {
+    advance(runner, newWatermark, TimeDomain.EVENT_TIME);
     this.watermarkTime = newWatermark;
   }
 
-  public void advanceProcessingTime(
-      TriggerExecutor<?, ?, ?, ?> triggerExecutor, Instant newProcessingTime) throws Exception {
-    advance(triggerExecutor, newProcessingTime, TimeDomain.PROCESSING_TIME);
+  public void advanceProcessingTime(ReduceFnRunner<?, ?, ?, ?> runner, Instant newProcessingTime) {
+    advance(runner, newProcessingTime, TimeDomain.PROCESSING_TIME);
     this.processingTime = newProcessingTime;
   }
 
@@ -116,14 +114,11 @@ public void advanceProcessingTime(
    * @param domain The time domain that the tag is being fired on.
    */
   protected void fire(
-      TriggerExecutor<?, ?, ?, ?> triggerExecutor, StateNamespace timerTag, TimeDomain domain)
-          throws Exception {
-    triggerExecutor.onTimer(timerTag);
+      ReduceFnRunner<?, ?, ?, ?> runner, StateNamespace timerTag, TimeDomain domain) {
+    runner.onTimer(timerTag);
   }
 
-  private void advance(
-      TriggerExecutor<?, ?, ?, ?> triggerExecutor, Instant newTime, TimeDomain domain)
-          throws Exception {
+  private void advance(ReduceFnRunner<?, ?, ?, ?> runner, Instant newTime, TimeDomain domain) {
 
     PriorityQueue<BatchTimer> timers = queue(domain);
     Map<StateNamespace, BatchTimer> map = map(domain);
@@ -139,7 +134,7 @@ private void advance(
         timers.remove();
         map.remove(timer.tag);
 
-        fire(triggerExecutor, timer.tag, domain);
+        fire(runner, timer.tag, domain);
       }
     } while (shouldFire);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BitSetCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BitSetCoder.java
new file mode 100644
index 0000000000000..6e2b6bacd37d5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BitSetCoder.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.BitSet;
+
+/**
+ * Coder for the BitSet used to track child-trigger finished states.
+ */
+class BitSetCoder extends AtomicCoder<BitSet> {
+
+  private static final BitSetCoder INSTANCE = new BitSetCoder();
+  private static final long serialVersionUID = 1L;
+
+  private transient ByteArrayCoder byteArrayCoder = ByteArrayCoder.of();
+
+  private BitSetCoder() {}
+
+  public static BitSetCoder of() {
+    return INSTANCE;
+  }
+
+  @Override
+  public void encode(BitSet value, OutputStream outStream, Context context)
+      throws CoderException, IOException {
+    byteArrayCoder.encodeAndOwn(value.toByteArray(), outStream, context);
+  }
+
+  @Override
+  public BitSet decode(InputStream inStream, Context context)
+      throws CoderException, IOException {
+    return BitSet.valueOf(byteArrayCoder.decode(inStream, context));
+  }
+
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    verifyDeterministic(
+        "BitSetCoder requires its byteArrayCoder to be deterministic.",
+        byteArrayCoder);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
index 8a6a0981a6b80..ab0b36c219c3f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
@@ -124,14 +124,14 @@ public ExecutableTrigger<W> getSubTriggerContaining(int index) {
    */
   public TriggerResult invokeElement(Trigger<W>.OnElementContext c) throws Exception {
     Trigger<W>.OnElementContext subContext = c.forTrigger(this);
-    if (subContext.isFinished()) {
+    if (subContext.trigger().isFinished()) {
       throw new IllegalStateException("Shouldn't invokeElement on finished triggers.");
     }
 
     Trigger.TriggerResult result = trigger.onElement(subContext);
 
     if (result.isFinish()) {
-      subContext.setFinished(true);
+      subContext.trigger().setFinished(true);
     }
 
     return result;
@@ -143,13 +143,13 @@ public TriggerResult invokeElement(Trigger<W>.OnElementContext c) throws Excepti
    */
   public TriggerResult invokeTimer(Trigger<W>.OnTimerContext c) throws Exception {
     Trigger<W>.OnTimerContext subContext = c.forTrigger(this);
-    if (subContext.isFinished()) {
+    if (subContext.trigger().isFinished()) {
       throw new IllegalStateException("Shouldn't invokeTimer on finished triggers.");
     }
 
     Trigger.TriggerResult result = trigger.onTimer(subContext);
     if (result.isFinish()) {
-      subContext.setFinished(true);
+      subContext.trigger().setFinished(true);
     }
     return result;
   }
@@ -161,7 +161,7 @@ public TriggerResult invokeTimer(Trigger<W>.OnTimerContext c) throws Exception {
   public MergeResult invokeMerge(Trigger<W>.OnMergeContext c) throws Exception {
     Trigger<W>.OnMergeContext subContext = c.forTrigger(this);
     Trigger.MergeResult result = trigger.onMerge(subContext);
-    subContext.setFinished(result.isFinish());
+    subContext.trigger().setFinished(result.isFinish());
     return result;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 99ba7572bf6a5..16c618e359a8f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -20,11 +20,8 @@
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.state.MergeableState;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
 
@@ -52,19 +49,13 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
    * @param inputCoder the input coder to use
    */
   public static <K, V, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W>
-  createForIterable(WindowingStrategy<?, W> windowingStrategy, final Coder<V> inputCoder) {
+  createForIterable(WindowingStrategy<?, W> windowingStrategy, Coder<V> inputCoder) {
     @SuppressWarnings("unchecked")
     WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
 
     return GroupAlsoByWindowsViaIteratorsDoFn.isSupported(windowingStrategy)
         ? new GroupAlsoByWindowsViaIteratorsDoFn<K, V, W>()
-        : new GABWViaOutputBufferDoFn<>(noWildcard,
-            new SerializableFunction<K, StateTag<? extends MergeableState<V, Iterable<V>>>>() {
-          @Override
-          public StateTag<? extends MergeableState<V, Iterable<V>>> apply(K key) {
-            return TriggerExecutor.listBuffer(inputCoder);
-          }
-        });
+        : new GABWViaOutputBufferDoFn<>(noWildcard, SystemReduceFn.<K, V, W>buffering(inputCoder));
   }
 
   /**
@@ -84,12 +75,7 @@ public StateTag<? extends MergeableState<V, Iterable<V>>> apply(K key) {
     return GroupAlsoByWindowsAndCombineDoFn.isSupported(windowingStrategy)
         ? new GroupAlsoByWindowsAndCombineDoFn<>(noWildcard.getWindowFn(), combineFn)
         : new GABWViaOutputBufferDoFn<>(noWildcard,
-            new SerializableFunction<K, StateTag<? extends MergeableState<InputT, OutputT>>>() {
-              @Override
-              public StateTag<? extends MergeableState<InputT, OutputT>> apply(K key) {
-                return TriggerExecutor.combiningBuffer(key, keyCoder, inputCoder, combineFn);
-              }
-        });
+            SystemReduceFn.<K, InputT, OutputT, W>combining(keyCoder, inputCoder, combineFn));
   }
 
   @SystemDoFnInternal
@@ -97,19 +83,18 @@ private static class GABWViaOutputBufferDoFn<K, InputT, OutputT, W extends Bound
      extends GroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
 
     private final Aggregator<Long, Long> droppedDueToClosedWindow =
-        createAggregator(TriggerExecutor.DROPPED_DUE_TO_CLOSED_WINDOW, new Sum.SumLongFn());
+        createAggregator(ReduceFnRunner.DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER, new Sum.SumLongFn());
     private final Aggregator<Long, Long> droppedDueToLateness =
-        createAggregator(TriggerExecutor.DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
+        createAggregator(ReduceFnRunner.DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
 
     private final WindowingStrategy<Object, W> strategy;
-    private final
-    SerializableFunction<K, StateTag<? extends MergeableState<InputT, OutputT>>> outputBuffer;
+    private ReduceFn<K, InputT, OutputT, W> reduceFn;
 
     public GABWViaOutputBufferDoFn(
         WindowingStrategy<Object, W> windowingStrategy,
-        SerializableFunction<K, StateTag<? extends MergeableState<InputT, OutputT>>> outputBuffer) {
+        ReduceFn<K, InputT, OutputT, W> reduceFn) {
       this.strategy = windowingStrategy;
-      this.outputBuffer = outputBuffer;
+      this.reduceFn = reduceFn;
     }
 
     @Override
@@ -120,34 +105,32 @@ public void processElement(
       K key = c.element().getKey();
       BatchTimerManager timerManager = new BatchTimerManager(Instant.now());
 
-      StateTag<? extends MergeableState<InputT, OutputT>> buffer = outputBuffer.apply(key);
-
-      TriggerExecutor<K, InputT, OutputT, W> triggerExecutor = TriggerExecutor.create(
-          key, strategy, timerManager, buffer, c.windowingInternals(),
-          droppedDueToClosedWindow, droppedDueToLateness);
+      ReduceFnRunner<K, InputT, OutputT, W> runner = new ReduceFnRunner<>(
+          key, strategy, timerManager, c.windowingInternals(),
+          droppedDueToClosedWindow, droppedDueToLateness, reduceFn);
 
       for (WindowedValue<InputT> e : c.element().getValue()) {
         // First, handle anything that needs to happen for this element
-        triggerExecutor.onElement(e);
+        runner.processElement(e);
 
         // Then, since elements are sorted by their timestamp, advance the watermark and fire any
         // timers that need to be fired.
-        timerManager.advanceWatermark(triggerExecutor, e.getTimestamp());
+        timerManager.advanceWatermark(runner, e.getTimestamp());
 
         // Also, fire any processing timers that need to fire
-        timerManager.advanceProcessingTime(triggerExecutor, Instant.now());
+        timerManager.advanceProcessingTime(runner, Instant.now());
       }
 
       // Merge the active windows for the current key, to fire any data-based triggers.
-      triggerExecutor.merge();
+      runner.merge();
 
       // Finish any pending windows by advancing the watermark to infinity.
-      timerManager.advanceWatermark(triggerExecutor, new Instant(Long.MAX_VALUE));
+      timerManager.advanceWatermark(runner, new Instant(Long.MAX_VALUE));
 
       // Finally, advance the processing time to infinity to fire any timers.
-      timerManager.advanceProcessingTime(triggerExecutor, new Instant(Long.MAX_VALUE));
+      timerManager.advanceProcessingTime(runner, new Instant(Long.MAX_VALUE));
 
-      triggerExecutor.persist();
+      runner.persist();
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
new file mode 100644
index 0000000000000..4361387400a61
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.util.state.MergeableState;
+import com.google.cloud.dataflow.sdk.util.state.State;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+
+import org.joda.time.Instant;
+
+import java.io.Serializable;
+import java.util.Collection;
+import java.util.Map;
+
+/**
+ * Specification for processing to happen after elements have been grouped by key.
+ *
+ * @param <K> The type of key being processed.
+ * @param <InputT> The type of input values associated with the key.
+ * @param <OutputT> The output type that will be produced for each key.
+ * @param <W> The type of windows this operates on.
+ */
+public abstract class ReduceFn<K, InputT, OutputT, W extends BoundedWindow>
+    implements Serializable {
+
+  private static final long serialVersionUID = 0L;
+
+  /** Interface for interacting with persistent state. */
+  public interface StateContext {
+    /** Access the storage for the given {@code address} in the current window. */
+    <StateT extends State> StateT access(StateTag<StateT> address);
+
+    /**
+     * Access the storage for the given {@code address} in all of the windows that were
+     * merged into the current window including the current window.
+     *
+     * <p> If no windows were merged, this reads from just the current window.
+     */
+    <StateT extends MergeableState<?, ?>> StateT accessAcrossMergedWindows(
+        StateTag<StateT> address);
+  }
+
+  /** Interface for interacting with persistent state within {@link #onMerge}. */
+  public interface MergingStateContext extends StateContext {
+
+    /**
+     * Access a merged view of the storage for the given {@code address}
+     * in all of the windows being merged.
+     */
+    public abstract <StateT extends MergeableState<?, ?>> StateT accessAcrossMergingWindows(
+        StateTag<StateT> address);
+
+    /** Access a map from windows being merged to the associated {@code StateT}. */
+    public abstract <StateT extends State> Map<BoundedWindow, StateT> accessInEachMergingWindow(
+        StateTag<StateT> address);
+  }
+
+  /** Information accessible to all the processing methods in this {@code ReduceFn}. */
+  public abstract class Context {
+    /** Return the key that is being processed. */
+    public abstract K key();
+
+    /** The window that is being processed. */
+    public abstract W window();
+
+    /** Access the current {@link WindowingStrategy}. */
+    public abstract WindowingStrategy<?, W> windowingStrategy();
+
+    /**
+     * Return the interface for accessing state.
+     */
+    public abstract StateContext state();
+  }
+
+  /** Information accessible within {@link #processValue}. */
+  public abstract class ProcessValueContext extends Context {
+
+    /** Return the actual value being processed. */
+    public abstract InputT value();
+
+    /** Return the timestamp associated with the value. */
+    public abstract Instant timestamp();
+  }
+
+  /** Information accessible within {@link #onMerge}. */
+  public abstract class OnMergeContext extends Context {
+    /**
+     * Return the collection of windows that were merged.
+     *
+     * <p> Note that this may include the result window.
+     */
+    public abstract Collection<W> mergingWindows();
+
+    /** Return the interface for accessing state. */
+    @Override
+    public abstract MergingStateContext state();
+  }
+
+  /** Information accessible within {@link #onTrigger}. */
+  public abstract class OnTriggerContext extends Context {
+
+    /** Returns the {@link PaneInfo} for the trigger firing being processed. */
+    public abstract PaneInfo paneInfo();
+
+    /** Output the given value in the current window. */
+    public abstract void output(OutputT value);
+  }
+
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Called for each value of type {@code InputT} associated with the current key.
+   */
+  public abstract void processValue(ProcessValueContext c) throws Exception;
+
+  /**
+   * Called when windows are merged.
+   *
+   * <p> There are generally two strategies for implementing this and handling merging of state:
+   * <ul>
+   * <li> Lazily merge the state when outputting. This is especially easy if all the state is stored
+   * in {@link MergeableState}, since an automatically merged view can be retrieved.
+   * <li> Eagerly merge the state inside the {@link #onMerge} implementation. Load all the state
+   * from the merging windows and write it back to the result window. In this case the state in the
+   * result window should be cleared into between the read and write in case it was in the source
+   * windows.
+   * </ul>
+   */
+  public abstract void onMerge(OnMergeContext c) throws Exception;
+
+  /**
+   * Called when triggers fire.
+   *
+   * <p>Implementations of {@link ReduceFn} should call {@link OnTriggerContext#output} to emit
+   * any results that should be included in the pane produced by this trigger firing.
+   */
+  public abstract void onTrigger(OnTriggerContext c) throws Exception;
+
+  /**
+   * Called to clear any persisted state that the {@link ReduceFn} may be holding. This will be
+   * called when the windowing is closing and will receive no future interactions.
+   */
+  public abstract void clearState(Context c) throws Exception;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
new file mode 100644
index 0000000000000..7d585b84fe1a2
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
@@ -0,0 +1,351 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
+import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
+import com.google.cloud.dataflow.sdk.util.state.MergeableState;
+import com.google.cloud.dataflow.sdk.util.state.State;
+import com.google.cloud.dataflow.sdk.util.state.StateInternals;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.common.collect.ImmutableMap;
+
+import org.joda.time.Instant;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Factory for creating instances of the various {@link ReduceFn} contexts.
+ */
+class ReduceFnContextFactory<K, InputT, OutputT, W extends BoundedWindow> {
+
+  public interface OnTriggerCallbacks<OutputT> {
+    void output(OutputT toOutput);
+  }
+
+  private final K key;
+  private final ReduceFn<K, InputT, OutputT, W> reduceFn;
+  private final WindowingStrategy<?, W> windowingStrategy;
+  private StateInternals stateInternals;
+  private ActiveWindowSet<W> activeWindows;
+
+  ReduceFnContextFactory(
+      K key, ReduceFn<K, InputT, OutputT, W> reduceFn, WindowingStrategy<?, W> windowingStrategy,
+      StateInternals stateInternals, ActiveWindowSet<W> activeWindows) {
+    this.key = key;
+    this.reduceFn = reduceFn;
+    this.windowingStrategy = windowingStrategy;
+    this.stateInternals = stateInternals;
+    this.activeWindows = activeWindows;
+  }
+
+  private StateContextImpl<W> stateContext(W window) {
+    return new StateContextImpl<>(
+        activeWindows, windowingStrategy.getWindowFn().windowCoder(), stateInternals, window);
+  }
+
+  public ReduceFn<K, InputT, OutputT, W>.Context base(W window) {
+    return new ContextImpl(stateContext(window));
+  }
+
+  public ReduceFn<K, InputT, OutputT, W>.ProcessValueContext forValue(
+      W window, InputT value, Instant timestamp) {
+    return new ProcessValueContextImpl(stateContext(window), value, timestamp);
+  }
+
+  public ReduceFn<K, InputT, OutputT, W>.OnTriggerContext forTrigger(
+      W window, PaneInfo pane, OnTriggerCallbacks<OutputT> callbacks) {
+    return new OnTriggerContextImpl(stateContext(window), pane, callbacks);
+  }
+
+  public ReduceFn<K, InputT, OutputT, W>.OnMergeContext forMerge(
+      Collection<W> mergingWindows, W resultWindow) {
+    return new OnMergeContextImpl(
+        new MergingStateContextImpl<W>(stateContext(resultWindow), mergingWindows));
+  }
+
+  static class StateContextImpl<W extends BoundedWindow> implements ReduceFn.StateContext {
+
+    private final ActiveWindowSet<W> activeWindows;
+    private final W window;
+    protected StateNamespace namespace;
+    protected final Coder<W> windowCoder;
+    private final StateInternals stateInternals;
+
+    public StateContextImpl(
+        ActiveWindowSet<W> activeWindows,
+        Coder<W> windowCoder,
+        StateInternals stateInternals,
+        W window) {
+      this.activeWindows = activeWindows;
+      this.windowCoder = windowCoder;
+      this.stateInternals = stateInternals;
+      this.window = window;
+      this.namespace = namespaceFor(window);
+    }
+
+    protected StateNamespace namespaceFor(W window) {
+      return StateNamespaces.window(windowCoder, window);
+    }
+
+    W window() {
+      return window;
+    }
+
+    StateNamespace namespace() {
+      return namespace;
+    }
+
+    @Override
+    public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
+      return stateInternals.state(namespace, address);
+    }
+
+    @Override
+    public <StorageT extends MergeableState<?, ?>> StorageT accessAcrossMergedWindows(
+        StateTag<StorageT> address) {
+      List<StateNamespace> sourceNamespaces = new ArrayList<>();
+      for (W sourceWindow : activeWindows.sourceWindows(window)) {
+        sourceNamespaces.add(namespaceFor(sourceWindow));
+      }
+
+      return stateInternals.mergedState(sourceNamespaces, namespace, address);
+    }
+  }
+
+  static class MergingStateContextImpl<W extends BoundedWindow>
+      implements ReduceFn.MergingStateContext {
+
+    private final StateContextImpl<W> delegate;
+    private final Collection<W> mergingWindows;
+
+    public MergingStateContextImpl(StateContextImpl<W> delegate, Collection<W> mergingWindows) {
+      this.delegate = delegate;
+      this.mergingWindows = mergingWindows;
+    }
+
+    StateNamespace namespace() {
+      return delegate.namespace;
+    }
+
+    W window() {
+      return delegate.window();
+    }
+
+    Collection<W> mergingWindows() {
+      return mergingWindows;
+    }
+
+    @Override
+    public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
+      return delegate.access(address);
+    }
+
+    @Override
+    public <StorageT extends MergeableState<?, ?>> StorageT accessAcrossMergedWindows(
+        StateTag<StorageT> address) {
+      return delegate.accessAcrossMergedWindows(address);
+    }
+
+    @Override
+    public <StateT extends MergeableState<?, ?>> StateT accessAcrossMergingWindows(
+        StateTag<StateT> address) {
+      List<StateNamespace> mergingNamespaces = new ArrayList<>();
+      for (W mergingWindow : mergingWindows) {
+        mergingNamespaces.add(delegate.namespaceFor(mergingWindow));
+      }
+
+      return delegate.stateInternals.mergedState(mergingNamespaces, delegate.namespace, address);
+    }
+
+    @Override
+    public <StateT extends State> Map<BoundedWindow, StateT> accessInEachMergingWindow(
+        StateTag<StateT> address) {
+      ImmutableMap.Builder<BoundedWindow, StateT> builder = ImmutableMap.builder();
+      for (W mergingWindow : mergingWindows) {
+        StateT stateForWindow = delegate.stateInternals.state(
+            delegate.namespaceFor(mergingWindow), address);
+        builder.put(mergingWindow, stateForWindow);
+      }
+      return builder.build();
+    }
+  }
+
+  private class ContextImpl extends ReduceFn<K, InputT, OutputT, W>.Context {
+
+    private StateContextImpl<W> state;
+
+    private ContextImpl(StateContextImpl<W> state) {
+      reduceFn.super();
+      this.state = state;
+    }
+
+    @Override
+    public K key() {
+      return key;
+    }
+
+    @Override
+    public W window() {
+      return state.window;
+    }
+
+    @Override
+    public WindowingStrategy<?, W> windowingStrategy() {
+      return windowingStrategy;
+    }
+
+    @Override
+    public StateContext state() {
+      return state;
+    }
+  }
+
+  private class ProcessValueContextImpl
+      extends ReduceFn<K, InputT, OutputT, W>.ProcessValueContext {
+
+    private InputT value;
+    private Instant timestamp;
+    private StateContextImpl<W> state;
+
+    private ProcessValueContextImpl(StateContextImpl<W> state, InputT value, Instant timestamp) {
+      reduceFn.super();
+      this.state = state;
+      this.value = value;
+      this.timestamp = timestamp;
+    }
+
+    @Override
+    public K key() {
+      return key;
+    }
+
+    @Override
+    public W window() {
+      return state.window;
+    }
+
+    @Override
+    public WindowingStrategy<?, W> windowingStrategy() {
+      return windowingStrategy;
+    }
+
+    @Override
+    public StateContext state() {
+      return state;
+    }
+
+    @Override
+    public InputT value() {
+      return value;
+    }
+
+    @Override
+    public Instant timestamp() {
+      return timestamp;
+    }
+  }
+
+  private class OnTriggerContextImpl
+      extends ReduceFn<K, InputT, OutputT, W>.OnTriggerContext {
+
+    private final StateContextImpl<W> state;
+    private final PaneInfo paneInfo;
+    private final OnTriggerCallbacks<OutputT> callbacks;
+
+    private OnTriggerContextImpl(StateContextImpl<W> state,
+        PaneInfo paneInfo, OnTriggerCallbacks<OutputT> callbacks) {
+      reduceFn.super();
+      this.state = state;
+      this.paneInfo = paneInfo;
+      this.callbacks = callbacks;
+    }
+
+    @Override
+    public K key() {
+      return key;
+    }
+
+    @Override
+    public W window() {
+      return state.window;
+    }
+
+    @Override
+    public WindowingStrategy<?, W> windowingStrategy() {
+      return windowingStrategy;
+    }
+
+    @Override
+    public StateContext state() {
+      return state;
+    }
+
+    @Override
+    public PaneInfo paneInfo() {
+      return paneInfo;
+    }
+
+    @Override
+    public void output(OutputT value) {
+      callbacks.output(value);
+    }
+  }
+
+  private class OnMergeContextImpl
+      extends ReduceFn<K, InputT, OutputT, W>.OnMergeContext {
+
+    private final MergingStateContextImpl<W> state;
+
+    private OnMergeContextImpl(MergingStateContextImpl<W> state) {
+      reduceFn.super();
+      this.state = state;
+    }
+
+    @Override
+    public K key() {
+      return key;
+    }
+
+    @Override
+    public WindowingStrategy<?, W> windowingStrategy() {
+      return windowingStrategy;
+    }
+
+    @Override
+    public MergingStateContext state() {
+      return state;
+    }
+
+    @Override
+    public Collection<W> mergingWindows() {
+      return state.mergingWindows;
+    }
+
+    @Override
+    public W window() {
+      return state.delegate.window;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
new file mode 100644
index 0000000000000..20c78b0fe6468
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -0,0 +1,466 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.util.ActiveWindowSet.MergeCallback;
+import com.google.cloud.dataflow.sdk.util.ReduceFnContextFactory.OnTriggerCallbacks;
+import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
+import com.google.cloud.dataflow.sdk.util.state.StateContents;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces.WindowAndTriggerNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces.WindowNamespace;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Throwables;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Manages the execution of a {@link ReduceFn} after a {@link GroupByKeyOnly} has partitioned the
+ * {@link PCollection} by key.
+ *
+ * <p> The {@link #onTrigger} relies on a {@link TriggerRunner} to manage the execution of
+ * the triggering logic. The {@code ReduceFnRunner}s responsibilities are:
+ *
+ * <ul>
+ * <li>Tracking the windows that are active (have buffered data) as elements arrive and
+ * triggers are fired.
+ * <li>Holding the watermark based on the timestamps of elements in a pane and releasing it
+ * when the trigger fires.
+ * <li>Dropping data that exceeds the maximum allowed lateness.
+ * <li>Calling the appropriate callbacks on {@link ReduceFn} based on trigger execution, timer
+ * firings, etc.
+ * <li>Scheduling garbage collection of state associated with a specific window, and making that
+ * happen when the appropriate timer fires.
+ * </ul>
+ *
+ * @param <K> The type of key being processed.
+ * @param <InputT> The type of values associated with the key.
+ * @param <OutputT> The output type that will be produced for each key.
+ * @param <W> The type of windows this operates on.
+ */
+public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow>
+    implements MergeCallback<W> {
+
+  private static final Logger LOG = LoggerFactory.getLogger(ReduceFnRunner.class);
+
+  public static final String DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER = "DroppedDueToClosedWindow";
+  public static final String DROPPED_DUE_TO_LATENESS_COUNTER = "DroppedDueToLateness";
+
+  private final WindowingStrategy<Object, W> windowingStrategy;
+  private final TimerManager timerManager;
+  private final WindowingInternals<?, KV<K, OutputT>> windowingInternals;
+
+  private final Aggregator<Long, Long> droppedDueToClosedWindow;
+  private final Aggregator<Long, Long> droppedDueToLateness;
+
+  private final TriggerRunner<W> triggerRunner;
+
+  private final K key;
+  private final ActiveWindowSet<W> activeWindows;
+  private final WatermarkHold<W> watermarkHold;
+  private final ReduceFnContextFactory<K, InputT, OutputT, W> contextFactory;
+  private final ReduceFn<K, InputT, OutputT, W> reduceFn;
+
+  public ReduceFnRunner(
+      K key,
+      WindowingStrategy<?, W> windowingStrategy,
+      TimerManager timerManager,
+      WindowingInternals<?, KV<K, OutputT>> windowingInternals,
+      Aggregator<Long, Long> droppedDueToClosedWindow,
+      Aggregator<Long, Long> droppedDueToLateness,
+      ReduceFn<K, InputT, OutputT, W> reduceFn) {
+    this.key = key;
+    this.timerManager = timerManager;
+    this.windowingInternals = windowingInternals;
+    this.droppedDueToClosedWindow = droppedDueToClosedWindow;
+    this.droppedDueToLateness = droppedDueToLateness;
+    this.reduceFn = reduceFn;
+
+    @SuppressWarnings("unchecked")
+    WindowingStrategy<Object, W> objectWindowingStrategy =
+        (WindowingStrategy<Object, W>) windowingStrategy;
+    this.windowingStrategy = objectWindowingStrategy;
+
+    this.activeWindows = createActiveWindowSet();
+    this.contextFactory = new ReduceFnContextFactory<K, InputT, OutputT, W>(
+        key, reduceFn, this.windowingStrategy, this.windowingInternals.stateInternals(),
+        this.activeWindows);
+
+    this.watermarkHold = new WatermarkHold<>(windowingStrategy.getAllowedLateness());
+    this.triggerRunner = new TriggerRunner<>(
+        windowingStrategy.getTrigger(),
+        new TriggerContextFactory<>(timerManager, windowingStrategy,
+            this.windowingInternals.stateInternals(),
+            activeWindows));
+  }
+
+  private ActiveWindowSet<W> createActiveWindowSet() {
+    return windowingStrategy.getWindowFn().isNonMerging()
+        ? new NonMergingActiveWindowSet<W>()
+        : new MergingActiveWindowSet<W>(
+            windowingStrategy.getWindowFn(), windowingInternals.stateInternals());
+  }
+
+  @VisibleForTesting boolean isFinished(W window) {
+    return triggerRunner.isClosed(contextFactory.base(window).state());
+  }
+
+  public void processElement(WindowedValue<InputT> value) {
+    Lateness lateness = getLateness(value);
+    if (lateness.isPastAllowedLateness) {
+      // Drop the element in all assigned windows if it is past the allowed lateness limit.
+      droppedDueToLateness.addValue((long) value.getWindows().size());
+      return;
+    }
+
+    @SuppressWarnings("unchecked")
+    Collection<W> windows = (Collection<W>) value.getWindows();
+
+    // Prefetch in each of the windows
+    for (W window : windows) {
+      ReduceFn<K, InputT, OutputT, W>.ProcessValueContext context =
+          contextFactory.forValue(window, value.getValue(), value.getTimestamp());
+      triggerRunner.prefetchForValue(context.state());
+    }
+
+    // And process each of the windows
+    for (W window : windows) {
+      ReduceFn<K, InputT, OutputT, W>.ProcessValueContext context =
+          contextFactory.forValue(window, value.getValue(), value.getTimestamp());
+
+      // Check to see if the triggerRunner thinks the window is closed. If so, drop that window.
+      if (triggerRunner.isClosed(context.state())) {
+        droppedDueToClosedWindow.addValue(1L);
+        continue;
+      }
+
+      if (activeWindows.add(window)) {
+        scheduleCleanup(window);
+      }
+
+      // Update the watermark hold since the value will be part of the next pane.
+      watermarkHold.addHold(context, lateness.isLate);
+
+      // Execute the reduceFn, which will buffer the value as appropriate
+      try {
+        reduceFn.processValue(context);
+      } catch (Exception e) {
+        throw wrapMaybeUserException(e);
+      }
+
+      // Run the trigger and handle the result as appropriate
+      try {
+        handleTriggerResult(context, triggerRunner.processValue(context));
+      } catch (Exception e) {
+        Throwables.propagateIfPossible(e);
+        throw new RuntimeException("Failed to run trigger", e);
+      }
+    }
+  }
+
+  /**
+   * Attempt to merge all of the windows.
+   */
+  @VisibleForTesting void merge() throws Exception {
+    activeWindows.mergeIfAppropriate(null, this);
+  }
+
+  /**
+   * Make sure that all the state built up in this runner has been persisted.
+   */
+  public void persist() {
+    activeWindows.persist();
+  }
+
+  /**
+   * Called when windows merge.
+   */
+  @Override
+  public void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResultWindowNew) {
+    ReduceFn<K, InputT, OutputT, W>.OnMergeContext context =
+        contextFactory.forMerge(mergedWindows, resultWindow);
+
+    // Schedule state reads for trigger execution.
+    triggerRunner.prefetchForMerge(context.state());
+
+    // Run the reduceFn to perform any needed merging.
+    try {
+      reduceFn.onMerge(context);
+    } catch (Exception e) {
+      throw wrapMaybeUserException(e);
+    }
+
+    // Schedule cleanup if the window is new.
+    if (isResultWindowNew) {
+      scheduleCleanup(resultWindow);
+    }
+
+    // Have the trigger merge state as needed, and handle the result.
+    try {
+      handleTriggerResult(context,  triggerRunner.onMerge(context));
+    } catch (Exception e) {
+      Throwables.propagateIfPossible(e);
+      throw new RuntimeException("Failed to merge the triggers", e);
+    }
+
+    // Cleanup the trigger state in the old windows.
+    for (W mergedWindow : mergedWindows) {
+      if (!mergedWindow.equals(resultWindow)) {
+        cancelCleanup(mergedWindow);
+        try {
+          triggerRunner.clearEverything(contextFactory.base(mergedWindow));
+        } catch (Exception e) {
+          Throwables.propagateIfPossible(e);
+          throw new RuntimeException("Exception while clearing trigger state", e);
+        }
+      }
+    }
+  }
+
+  /**
+   * Called when a timer fires.
+   */
+  public void onTimer(StateNamespace namespace) {
+    // Currently, individual ReduceFn's don't specify timers. So, we either have a timer for
+    // the window (in general) which corresponds to the GC (cleanup) timer or a timer that was
+    // set by one of the triggers. Distinguish which one based on namespace, and dispatch
+    // accordingly.
+    if (namespace instanceof WindowNamespace) {
+      @SuppressWarnings("unchecked")
+      WindowNamespace<W> windowNamespace = (WindowNamespace<W>) namespace;
+
+      try {
+        doCleanup(windowNamespace.getWindow());
+      } catch (Exception e) {
+        LOG.error("Exception while garbage collecting window {}", windowNamespace.getWindow(), e);
+      }
+    } else if (namespace instanceof WindowAndTriggerNamespace) {
+      @SuppressWarnings("unchecked")
+      WindowAndTriggerNamespace<W> windowNamespace = (WindowAndTriggerNamespace<W>) namespace;
+      int triggerIndex = windowNamespace.getTriggerIndex();
+      ReduceFn<K, InputT, OutputT, W>.Context context =
+          contextFactory.base(windowNamespace.getWindow());
+
+      try {
+        handleTriggerResult(context, triggerRunner.onTimer(context, triggerIndex));
+      } catch (Exception e) {
+        Throwables.propagateIfPossible(e);
+        throw new RuntimeException("Exception in onTimer for trigger", e);
+      }
+    } else {
+      throw new IllegalArgumentException("Unexpected namespace for timer " + namespace);
+    }
+  }
+
+  /** Called when the cleanup timer has fired for the given window. */
+  private void doCleanup(W window) throws Exception {
+    ReduceFn<K, InputT, OutputT, W>.Context context = contextFactory.base(window);
+
+    // If the window is active, fire a pane.
+    if (!triggerRunner.isClosed(context.state())) {
+      // Before we fire, make sure this window doesn't get merged away. If it does, the merging
+      // should have cleaned up the window anyways.
+      try {
+        if (!activeWindows.mergeIfAppropriate(context.window(), this)) {
+          // The window was merged away. Either the onMerge fired the resulting window or the
+          // trigger wasn't ready to fire in the resulting window. Either way, we aren't ready to
+          // fire.
+
+          // Since we haven't committed the finished bits for this window, we won't skip it when
+          // merging. Instead, we'll examine the triggers that are pending on all merge windows,
+          // and select the right behavior.
+          return;
+        }
+      } catch (Exception e) {
+        Throwables.propagateIfPossible(e);
+        throw new RuntimeException("Exception while merging windows", e);
+      }
+
+      // Run onTrigger to produce the actual final pane contents.
+      onTrigger(context, true /* isFinal */);
+    }
+
+    // Cleanup the associated state.
+    reduceFn.clearState(context);
+    triggerRunner.clearEverything(context);
+  }
+
+  private void handleTriggerResult(
+      ReduceFn<K, InputT, OutputT, W>.Context context, TriggerRunner.Result result) {
+    // Unless the trigger is firing, there is nothing to do besides persisting the results.
+    if (!result.isFire()) {
+      result.persistFinishedSet(context.state());
+      return;
+    }
+
+    // Before we fire, make sure this window doesn't get merged away.
+    try {
+      if (!activeWindows.mergeIfAppropriate(context.window(), this)) {
+        // The window was merged away. Either the onMerge fired the resulting window or the trigger
+        // wasn't ready to fire in the resulting window. Either way, we aren't ready to fire.
+
+        // Since we haven't committed the finished bits for this window, we won't skip it when
+        // merging. Instead, we'll examine the triggers that are pending on all merge windows,
+        // and select the right behavior.
+        return;
+      }
+    } catch (Exception e) {
+      Throwables.propagateIfPossible(e);
+      throw new RuntimeException("Exception while merging windows", e);
+    }
+
+    // Run onTrigger to produce the actual pane contents.
+    onTrigger(context, result.isFinish());
+
+    // Cleanup buffered data if appropriate
+    if (shouldDiscardAfterFiring(result)) {
+      // Clear the reduceFn state
+      try {
+        reduceFn.clearState(context);
+      } catch (Exception e) {
+        throw wrapMaybeUserException(e);
+      }
+
+      // Remove the window from active set -- nothing is buffered.
+      activeWindows.remove(context.window());
+    }
+
+    if (result.isFinish()) {
+      // If we're finishing, clear up the trigger tree as well.
+      try {
+        triggerRunner.clearState(context);
+      } catch (Exception e) {
+        Throwables.propagateIfPossible(e);
+        throw new RuntimeException("Exception while clearing trigger state", e);
+      }
+    }
+
+    // Make sure we've persisted the finished bits.
+    result.persistFinishedSet(context.state());
+  }
+
+  /**
+   * Run the {@link ReduceFn#onTrigger} method and produce any necessary output.
+   *
+   * @param context the context for the pane to fire
+   * @param isFinal true if this will be the last triggering processed
+   */
+  private void onTrigger(final ReduceFn<K, InputT, OutputT, W>.Context context, boolean isFinal) {
+    // Make sure that we read the watermark along with any state needed to determine the output.
+    StateContents<Instant> timestampFuture = watermarkHold.extractAndRelease(context);
+
+    // Run the reduceFn, and buffer all the output in outputs.
+    final List<OutputT> outputs = new ArrayList<>();
+    PaneInfo pane = PaneInfo.createPaneInternal();
+    ReduceFn<K, InputT, OutputT, W>.OnTriggerContext triggerContext = contextFactory.forTrigger(
+        context.window(), pane, new OnTriggerCallbacks<OutputT>() {
+          @Override
+          public void output(OutputT toOutput) {
+            outputs.add(toOutput);
+          }
+    });
+
+    try {
+      reduceFn.onTrigger(triggerContext);
+    } catch (Exception e) {
+      throw wrapMaybeUserException(e);
+    }
+
+    // Now actually read the timestamp, and output each of the values.
+    Instant outputTimestamp = timestampFuture.read();
+    List<W> windows = Collections.singletonList(context.window());
+    for (OutputT output : outputs) {
+      windowingInternals.outputWindowedValue(KV.of(key, output), outputTimestamp, windows, pane);
+    }
+  }
+
+  private StateNamespace windowNamespace(W window) {
+    return StateNamespaces.window(windowingStrategy.getWindowFn().windowCoder(), window);
+  }
+
+  private void scheduleCleanup(W window) {
+    // Set the timer for final cleanup. We add an extra millisecond since
+    // maxTimestamp will be the maximum timestamp in the window, and we
+    // want the maximum timestamp of an element outside the window.
+    Instant cleanupTime = window.maxTimestamp()
+        .plus(windowingStrategy.getAllowedLateness())
+        .plus(Duration.millis(1));
+    timerManager.setTimer(windowNamespace(window), cleanupTime, TimeDomain.EVENT_TIME);
+  }
+
+  private void cancelCleanup(W window) {
+    timerManager.deleteTimer(windowNamespace(window), TimeDomain.EVENT_TIME);
+  }
+
+  private boolean shouldDiscardAfterFiring(TriggerRunner.Result result) {
+    return result.isFinish()
+        || (result.isFire()
+            && AccumulationMode.DISCARDING_FIRED_PANES == windowingStrategy.getMode());
+  }
+
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+
+  private enum Lateness {
+    NOT_LATE(false, false),
+    LATE(true, false),
+    PAST_ALLOWED_LATENESS(true, true);
+
+    private final boolean isLate;
+    private final boolean isPastAllowedLateness;
+
+    private Lateness(boolean isLate, boolean isPastAllowedLateness) {
+      this.isLate = isLate;
+      this.isPastAllowedLateness = isPastAllowedLateness;
+    }
+  }
+
+  private Lateness getLateness(WindowedValue<InputT> value) {
+    Instant latestAllowed =
+        timerManager.currentWatermarkTime().minus(windowingStrategy.getAllowedLateness());
+    if (value.getTimestamp().isBefore(latestAllowed)) {
+      return Lateness.PAST_ALLOWED_LATENESS;
+    } else if (value.getTimestamp().isBefore(timerManager.currentWatermarkTime())) {
+      return Lateness.LATE;
+    } else {
+      return Lateness.NOT_LATE;
+    }
+  }
+
+  private RuntimeException wrapMaybeUserException(Throwable t) {
+    if (reduceFn instanceof SystemReduceFn) {
+      throw Throwables.propagate(t);
+    } else {
+      // Any exceptions that happen inside a non-system ReduceFn are considered user code.
+      throw new UserCodeException(t);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index f41f760d634f5..d05d52c64e0d5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -20,12 +20,10 @@
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.state.MergeableState;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
 
@@ -47,15 +45,10 @@ StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> create(
           final WindowingStrategy<?, W> windowingStrategy,
           final KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn,
           final Coder<K> keyCoder,
-          final Coder<InputT> inputValueCoder) {
+          final Coder<InputT> inputCoder) {
     Preconditions.checkNotNull(combineFn);
     return new StreamingGABWViaWindowSetDoFn<>(windowingStrategy,
-        new SerializableFunction<K, StateTag<? extends MergeableState<InputT, OutputT>>>() {
-      @Override
-      public StateTag<? extends MergeableState<InputT, OutputT>> apply(K key) {
-        return TriggerExecutor.combiningBuffer(key, keyCoder, inputValueCoder, combineFn);
-      }
-    });
+        SystemReduceFn.<K, InputT, OutputT, W>combining(keyCoder, inputCoder, combineFn));
   }
 
   public static <K, V, W extends BoundedWindow>
@@ -63,44 +56,36 @@ StreamingGroupAlsoByWindowsDoFn<K, V, Iterable<V>, W> createForIterable(
       final WindowingStrategy<?, W> windowingStrategy,
       final Coder<V> inputCoder) {
     return new StreamingGABWViaWindowSetDoFn<>(
-        windowingStrategy,
-        new SerializableFunction<K, StateTag<? extends MergeableState<V, Iterable<V>>>>() {
-          @Override
-          public StateTag<? extends MergeableState<V, Iterable<V>>> apply(K key) {
-            return TriggerExecutor.listBuffer(inputCoder);
-          }
-        });
+        windowingStrategy, SystemReduceFn.<K, V, W>buffering(inputCoder));
   }
 
   private static class StreamingGABWViaWindowSetDoFn<K, InputT, OutputT, W extends BoundedWindow>
   extends StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
 
     private final Aggregator<Long, Long> droppedDueToClosedWindow =
-        createAggregator(TriggerExecutor.DROPPED_DUE_TO_CLOSED_WINDOW, new Sum.SumLongFn());
+        createAggregator(ReduceFnRunner.DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER, new Sum.SumLongFn());
     private final Aggregator<Long, Long> droppedDueToLateness =
-        createAggregator(TriggerExecutor.DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
+        createAggregator(ReduceFnRunner.DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
 
     private final WindowingStrategy<Object, W> windowingStrategy;
-    private final
-    SerializableFunction<K, StateTag<? extends MergeableState<InputT, OutputT>>> outputBuffer;
+    private ReduceFn<K, InputT, OutputT, W> reduceFn;
 
-    private TriggerExecutor<K, InputT, OutputT, W> executor;
+    private transient ReduceFnRunner<K, InputT, OutputT, W> runner;
 
     public StreamingGABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
-        SerializableFunction<K, StateTag<? extends MergeableState<InputT, OutputT>>> outputBuffer) {
+        ReduceFn<K, InputT, OutputT, W> reduceFn) {
       @SuppressWarnings("unchecked")
       WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
       this.windowingStrategy = noWildcard;
-      this.outputBuffer = outputBuffer;
+      this.reduceFn = reduceFn;
     }
 
     private void initForKey(ProcessContext c, K key) throws Exception{
-      if (executor == null) {
+      if (runner == null) {
         TimerManager timerManager = c.windowingInternals().getTimerManager();
-        StateTag<? extends MergeableState<InputT, OutputT>> buffer = outputBuffer.apply(key);
-        executor = TriggerExecutor.create(
-          key, windowingStrategy, timerManager, buffer, c.windowingInternals(),
-          droppedDueToClosedWindow, droppedDueToLateness);
+        runner = new ReduceFnRunner<>(
+            key, windowingStrategy, timerManager, c.windowingInternals(),
+            droppedDueToClosedWindow, droppedDueToLateness, reduceFn);
       }
     }
 
@@ -113,11 +98,12 @@ public void processElement(ProcessContext c) throws Exception {
       if (c.element().isTimer()) {
         Coder<W> windowCoder = windowingStrategy.getWindowFn().windowCoder();
         String tag = c.element().tag();
-        executor.onTimer(
-            StateNamespaces.fromString(tag.substring(0, tag.length() - 1), windowCoder));
+        StateNamespace namespace = StateNamespaces.fromString(
+            tag.substring(0, tag.length() - 1), windowCoder);
+        runner.onTimer(namespace);
       } else {
         InputT value = c.element().element().getValue();
-        executor.onElement(
+        runner.processElement(
             WindowedValue.of(
                 value,
                 c.timestamp(),
@@ -128,14 +114,14 @@ public void processElement(ProcessContext c) throws Exception {
 
     @Override
     public void finishBundle(Context c) throws Exception {
-      if (executor != null) {
+      if (runner != null) {
         // Merge before finishing the bundle in case it causes triggers to fire.
-        executor.merge();
-        executor.persist();
+        runner.merge();
+        runner.persist();
       }
 
       // Prepare this DoFn for reuse.
-      executor = null;
+      runner = null;
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
new file mode 100644
index 0000000000000..8aa33c97a6748
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.state.MergeableState;
+import com.google.cloud.dataflow.sdk.util.state.StateContents;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.cloud.dataflow.sdk.util.state.StateTags;
+
+/**
+ * {@link ReduceFn} implementing the default reduction behaviors of {@link GroupByKey}.
+ *
+ * @param <K> The type of key being processed.
+ * @param <InputT> The type of values associated with the key.
+ * @param <OutputT> The output type that will be produced for each key.
+ * @param <W> The type of windows this operates on.
+ */
+abstract class SystemReduceFn<K, InputT, OutputT, W extends BoundedWindow>
+    extends ReduceFn<K, InputT, OutputT, W> {
+
+  private static final long serialVersionUID = 0L;
+  private static final String BUFFER_NAME = "__buffer";
+
+  /**
+   * Create a {@link SystemReduceFn} that buffers all of the input values in persistent state,
+   * and produces an {@code Iterable<T>}.
+   */
+  public static <K, T, W extends BoundedWindow> ReduceFn<K, T, Iterable<T>, W> buffering(
+      final Coder<T> inputCoder) {
+    return new SystemReduceFn<K, T, Iterable<T>, W>() {
+      private static final long serialVersionUID = 0L;
+
+      @Override
+      protected StateTag<? extends MergeableState<T, Iterable<T>>> bufferTag(K key) {
+        return StateTags.bag(BUFFER_NAME, inputCoder);
+      }
+    };
+  }
+
+  /**
+   * Create a {@link SystemReduceFn} that combines all of the input values using a
+   * {@link CombineFn}.
+   */
+  public static
+  <K, InputT, OutputT, W extends BoundedWindow> ReduceFn<K, InputT, OutputT, W> combining(
+      final Coder<K> keyCoder, final Coder<InputT> inputCoder,
+      final KeyedCombineFn<K, InputT, ?, OutputT> combineFn) {
+    return new SystemReduceFn<K, InputT, OutputT, W>() {
+      private static final long serialVersionUID = 0L;
+
+      @Override
+      protected StateTag<? extends MergeableState<InputT, OutputT>> bufferTag(K key) {
+        return StateTags.<InputT, OutputT>combiningValue(
+            BUFFER_NAME, inputCoder, combineFn.forKey(key, keyCoder));
+      }
+    };
+  }
+
+  protected abstract StateTag<? extends MergeableState<InputT, OutputT>> bufferTag(K key);
+
+  @Override
+  public void processValue(ProcessValueContext c) throws Exception {
+    c.state().access(bufferTag(c.key())).add(c.value());
+  }
+
+  @Override
+  public void onMerge(OnMergeContext c) throws Exception {
+    // All of the state used by SystemReduceFn is mergeable. Rather than eagerly reading it in
+    // to perform the merge here, we wait until the output is desired, and combine the values
+    // from all the source windows at that point.
+  }
+
+  @Override
+  public void onTrigger(OnTriggerContext c) throws Exception {
+    MergeableState<InputT, OutputT> buffer =
+        c.state().accessAcrossMergedWindows(bufferTag(c.key()));
+    StateContents<OutputT> output = buffer.get();
+    if (!buffer.isEmpty().read()) {
+      c.output(output.read());
+    }
+  }
+
+  @Override
+  public void clearState(Context c) throws Exception {
+    c.state().accessAcrossMergedWindows(bufferTag(c.key())).clear();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
new file mode 100644
index 0000000000000..0c4ef26484fa5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
@@ -0,0 +1,452 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergingTriggerInfo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.Timers;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerInfo;
+import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
+import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
+import com.google.cloud.dataflow.sdk.util.ReduceFnContextFactory.MergingStateContextImpl;
+import com.google.cloud.dataflow.sdk.util.ReduceFnContextFactory.StateContextImpl;
+import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.state.StateInternals;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+import com.google.common.base.Predicate;
+import com.google.common.collect.FluentIterable;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Maps;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.util.BitSet;
+import java.util.Collection;
+import java.util.Map;
+
+/**
+ * Factory for creating instances of the various {@link Trigger} contexts.
+ */
+class TriggerContextFactory<W extends BoundedWindow> {
+
+  private TimerManager timerManager;
+
+  private final WindowingStrategy<?, W> windowingStrategy;
+  private StateInternals stateInternals;
+  private ActiveWindowSet<W> activeWindows;
+
+  TriggerContextFactory(TimerManager timerManager, WindowingStrategy<?, W> windowingStrategy,
+      StateInternals stateInternals, ActiveWindowSet<W> activeWindows) {
+    this.timerManager = timerManager;
+    this.windowingStrategy = windowingStrategy;
+    this.stateInternals = stateInternals;
+    this.activeWindows = activeWindows;
+  }
+
+  public Trigger<W>.TriggerContext base(
+      ReduceFn<?, ?, ?, W>.Context context, ExecutableTrigger<W> rootTrigger, BitSet finishedSet) {
+    return new TriggerContextImpl(context.window(), rootTrigger, finishedSet);
+  }
+
+  public Trigger<W>.OnElementContext create(
+      ReduceFn<?, ?, ?, W>.ProcessValueContext context,
+      ExecutableTrigger<W> rootTrigger, BitSet finishedSet) {
+    return new OnElementContextImpl(
+        context.window(), rootTrigger, finishedSet,
+        context.value(), context.timestamp());
+  }
+
+  public Trigger<W>.OnTimerContext create(
+      ReduceFn<?, ?, ?, W>.Context context,
+      ExecutableTrigger<W> rootTrigger, BitSet finishedSet,
+      int destinationIndex) {
+    return new OnTimerContextImpl(context.window(), rootTrigger, finishedSet, destinationIndex);
+  }
+
+  public Trigger<W>.OnMergeContext create(
+      ReduceFn<?, ?, ?, W>.OnMergeContext context,
+      ExecutableTrigger<W> rootTrigger, BitSet finishedSet,
+      Map<W, BitSet> finishedSets) {
+    return new OnMergeContextImpl(context.window(), rootTrigger, finishedSet,
+        context.mergingWindows(), finishedSets);
+  }
+
+  private class TriggerInfoImpl implements Trigger.TriggerInfo<W> {
+
+    protected final ExecutableTrigger<W> trigger;
+    protected final BitSet finishedSet;
+    private final Trigger<W>.TriggerContext context;
+
+    public TriggerInfoImpl(
+        ExecutableTrigger<W> trigger, BitSet finishedSet, Trigger<W>.TriggerContext context) {
+      this.trigger = trigger;
+      this.finishedSet = finishedSet;
+      this.context = context;
+    }
+
+    @Override
+    public Iterable<ExecutableTrigger<W>> subTriggers() {
+      return trigger.subTriggers();
+    }
+
+    @Override
+    public ExecutableTrigger<W> subTrigger(int subtriggerIndex) {
+      return trigger.subTriggers().get(subtriggerIndex);
+    }
+
+    @Override
+    public boolean isFinished() {
+      return finishedSet.get(trigger.getTriggerIndex());
+    }
+
+    @Override
+    public boolean areAllSubtriggersFinished() {
+      return Iterables.isEmpty(unfinishedSubTriggers());
+    }
+
+    @Override
+    public Iterable<ExecutableTrigger<W>> unfinishedSubTriggers() {
+      return FluentIterable
+          .from(trigger.subTriggers())
+          .filter(new Predicate<ExecutableTrigger<W>>() {
+            @Override
+            public boolean apply(ExecutableTrigger<W> input) {
+              return !finishedSet.get(input.getTriggerIndex());
+            }
+          });
+    }
+
+    @Override
+    public ExecutableTrigger<W> firstUnfinishedSubTrigger() {
+      for (ExecutableTrigger<W> subTrigger : trigger.subTriggers()) {
+        if (!finishedSet.get(subTrigger.getTriggerIndex())) {
+          return subTrigger;
+        }
+      }
+      return null;
+    }
+
+    @Override
+    public void resetTree() throws Exception {
+      finishedSet.clear(trigger.getTriggerIndex(), trigger.getFirstIndexAfterSubtree());
+      trigger.invokeClear(context);
+    }
+
+    @Override
+    public void setFinished(boolean finished) {
+      finishedSet.set(trigger.getTriggerIndex(), finished);
+    }
+  }
+
+  private class MergingTriggerInfoImpl
+      extends TriggerInfoImpl implements Trigger.MergingTriggerInfo<W> {
+
+    private final Map<W, BitSet> finishedSets;
+
+    public MergingTriggerInfoImpl(
+        ExecutableTrigger<W> trigger,
+        BitSet finishedSet,
+        Trigger<W>.TriggerContext context,
+        Map<W, BitSet> finishedSets) {
+      super(trigger, finishedSet, context);
+      this.finishedSets = finishedSets;
+    }
+
+    @Override
+    public boolean finishedInAnyMergingWindow() {
+      for (BitSet bitSet : finishedSets.values()) {
+        if (bitSet.get(trigger.getTriggerIndex())) {
+          return true;
+        }
+      }
+      return false;
+    }
+
+    @Override
+    public Iterable<W> getFinishedMergingWindows() {
+      return Maps.filterValues(finishedSets, new Predicate<BitSet>() {
+        @Override
+        public boolean apply(BitSet input) {
+          return input.get(trigger.getTriggerIndex());
+        }
+      }).keySet();
+    }
+  }
+
+  private class TimersImpl implements Trigger.Timers {
+
+    private final StateNamespace namespace;
+
+    public TimersImpl(StateNamespace namespace) {
+      this.namespace = namespace;
+    }
+
+    @Override
+    public void setTimer(Instant timestamp, TimeDomain timeDomain) throws IOException {
+      timerManager.setTimer(namespace, timestamp, timeDomain);
+    }
+
+    @Override
+    public void deleteTimer(TimeDomain timeDomain) throws IOException {
+      timerManager.deleteTimer(namespace, timeDomain);
+    }
+
+    @Override
+    public Instant currentProcessingTime() {
+      return timerManager.currentProcessingTime();
+    }
+  }
+
+  private StateContextImpl<W> triggerState(W window, ExecutableTrigger<W> trigger) {
+    return new TriggerStateContextImpl<W>(
+        activeWindows, windowingStrategy.getWindowFn().windowCoder(),
+        stateInternals, window, trigger);
+  }
+
+  private class TriggerStateContextImpl<W extends BoundedWindow> extends StateContextImpl<W> {
+
+    private int triggerIndex;
+
+    public TriggerStateContextImpl(ActiveWindowSet<W> activeWindows,
+        Coder<W> windowCoder, StateInternals stateInternals, W window,
+        ExecutableTrigger<W> trigger) {
+      super(activeWindows, windowCoder, stateInternals, window);
+      this.triggerIndex = trigger.getTriggerIndex();
+
+      // Annoyingly, since we hadn't set the triggerIndex yet (we can't do it before super)
+      // This will would otherwise have incorporated 0 as the trigger index.
+      this.namespace = namespaceFor(window);
+    }
+
+    @Override
+    protected StateNamespace namespaceFor(W window) {
+      return StateNamespaces.windowAndTrigger(windowCoder, window, triggerIndex);
+    }
+  }
+
+  private class TriggerContextImpl extends Trigger<W>.TriggerContext {
+
+    private final StateContextImpl<W> state;
+    private final TimersImpl timers;
+    private final TriggerInfoImpl triggerInfo;
+
+    private TriggerContextImpl(
+        W window,
+        ExecutableTrigger<W> trigger,
+        BitSet finishedSet) {
+      trigger.getSpec().super();
+      this.state = triggerState(window, trigger);
+      this.timers = new TimersImpl(state.namespace());
+      this.triggerInfo = new TriggerInfoImpl(trigger, finishedSet, this);
+    }
+
+    @Override
+    public Trigger<W>.TriggerContext forTrigger(ExecutableTrigger<W> trigger) {
+      return new TriggerContextImpl(state.window(), trigger, triggerInfo.finishedSet);
+    }
+
+    @Override
+    public TriggerInfo<W> trigger() {
+      return triggerInfo;
+    }
+
+    @Override
+    public StateContext state() {
+      return state;
+    }
+
+    @Override
+    public W window() {
+      return state.window();
+    }
+
+    @Override
+    public Timers timers() {
+      return timers;
+    }
+  }
+
+
+  private class OnElementContextImpl extends Trigger<W>.OnElementContext {
+
+    private final StateContextImpl<W> state;
+    private final TimersImpl timers;
+    private final TriggerInfoImpl triggerInfo;
+    private final Object element;
+    private final Instant eventTimestamp;
+
+    private OnElementContextImpl(
+        W window,
+        ExecutableTrigger<W> trigger,
+        BitSet finishedSet,
+        Object element,
+        Instant eventTimestamp) {
+      trigger.getSpec().super();
+      this.state = triggerState(window, trigger);
+      this.timers = new TimersImpl(state.namespace());
+      this.triggerInfo = new TriggerInfoImpl(trigger, finishedSet, this);
+      this.element = element;
+      this.eventTimestamp = eventTimestamp;
+    }
+
+    @Override
+    public Object element() {
+      return element;
+    }
+
+    @Override
+    public Instant eventTimestamp() {
+      return eventTimestamp;
+    }
+
+    @Override
+    public Trigger<W>.OnElementContext forTrigger(ExecutableTrigger<W> trigger) {
+      return new OnElementContextImpl(
+          state.window(), trigger, triggerInfo.finishedSet, element, eventTimestamp);
+    }
+
+    @Override
+    public TriggerInfo<W> trigger() {
+      return triggerInfo;
+    }
+
+    @Override
+    public StateContext state() {
+      return state;
+    }
+
+    @Override
+    public W window() {
+      return state.window();
+    }
+
+    @Override
+    public Timers timers() {
+      return timers;
+    }
+  }
+
+  private class OnTimerContextImpl extends Trigger<W>.OnTimerContext {
+
+    private final StateContextImpl<W> state;
+    private final TimersImpl timers;
+    private final TriggerInfoImpl triggerInfo;
+    private final int destinationIndex;
+
+    private OnTimerContextImpl(
+        W window,
+        ExecutableTrigger<W> trigger,
+        BitSet finishedSet,
+        int destinationIndex) {
+      trigger.getSpec().super();
+      this.state = triggerState(window, trigger);
+      this.timers = new TimersImpl(state.namespace());
+      this.triggerInfo = new TriggerInfoImpl(trigger, finishedSet, this);
+      this.destinationIndex = destinationIndex;
+    }
+
+    @Override
+    public Trigger<W>.OnTimerContext forTrigger(ExecutableTrigger<W> trigger) {
+      return new OnTimerContextImpl(
+          state.window(), trigger, triggerInfo.finishedSet, destinationIndex);
+    }
+
+    @Override
+    public TriggerInfo<W> trigger() {
+      return triggerInfo;
+    }
+
+    @Override
+    public StateContext state() {
+      return state;
+    }
+
+    @Override
+    public W window() {
+      return state.window();
+    }
+
+    @Override
+    public Timers timers() {
+      return timers;
+    }
+
+    @Override
+    public ExecutableTrigger<W> nextStepTowardsDestination() {
+      return triggerInfo.trigger.getSubTriggerContaining(destinationIndex);
+    }
+
+    @Override
+    public boolean isDestination() {
+      return triggerInfo.trigger.getTriggerIndex() == destinationIndex;
+    }
+  }
+
+  private class OnMergeContextImpl extends Trigger<W>.OnMergeContext {
+
+    private final MergingStateContextImpl<W> state;
+    private final TimersImpl timers;
+    private final MergingTriggerInfoImpl triggerInfo;
+
+    private OnMergeContextImpl(
+        W window,
+        ExecutableTrigger<W> trigger,
+        BitSet finishedSet,
+        Collection<W> mergingWindows,
+        Map<W, BitSet> finishedSets) {
+      trigger.getSpec().super();
+      this.state = new MergingStateContextImpl<>(triggerState(window, trigger), mergingWindows);
+      this.timers = new TimersImpl(state.namespace());
+      this.triggerInfo = new MergingTriggerInfoImpl(trigger, finishedSet, this, finishedSets);
+    }
+
+    @Override
+    public Trigger<W>.OnMergeContext forTrigger(ExecutableTrigger<W> trigger) {
+      return new OnMergeContextImpl(
+          state.window(), trigger, triggerInfo.finishedSet,
+          state.mergingWindows(), triggerInfo.finishedSets);
+    }
+
+    @Override
+    public Iterable<W> oldWindows() {
+      return state.mergingWindows();
+    }
+
+    @Override
+    public MergingStateContext state() {
+      return state;
+    }
+
+    @Override
+    public MergingTriggerInfo<W> trigger() {
+      return triggerInfo;
+    }
+
+    @Override
+    public W window() {
+      return state.window();
+    }
+
+    @Override
+    public Timers timers() {
+      return timers;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
deleted file mode 100644
index c923cf1e6084c..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerExecutor.java
+++ /dev/null
@@ -1,912 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
-import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.ActiveWindowSet.MergeCallback;
-import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.util.state.MergeableState;
-import com.google.cloud.dataflow.sdk.util.state.State;
-import com.google.cloud.dataflow.sdk.util.state.StateContents;
-import com.google.cloud.dataflow.sdk.util.state.StateInternals;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces.WindowAndTriggerNamespace;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces.WindowNamespace;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.cloud.dataflow.sdk.util.state.StateTags;
-import com.google.cloud.dataflow.sdk.util.state.ValueState;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Predicate;
-import com.google.common.collect.FluentIterable;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Maps;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.BitSet;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Manages the execution of a trigger.
- *
- * @param <K>
- * @param <InputT>
- * @param <OutputT>
- * @param <W> The type of windows this operates on.
- */
-public class TriggerExecutor<K, InputT, OutputT, W extends BoundedWindow> {
-
-  private static final String BUFFER_NAME = "__buffer";
-
-  @VisibleForTesting static final StateTag<ValueState<BitSet>> FINISHED_BITS_TAG =
-      StateTags.value("finished_set", BitSetCoder.of());
-
-  public static final String DROPPED_DUE_TO_CLOSED_WINDOW = "DroppedDueToClosedWindow";
-  public static final String DROPPED_DUE_TO_LATENESS_COUNTER = "DroppedDueToLateness";
-
-  private final WindowFn<Object, W> windowFn;
-  private final ExecutableTrigger<W> rootTrigger;
-  private final AccumulationMode mode;
-  private final Duration allowedLateness;
-
-  private final WindowingInternals<?, KV<K, OutputT>> windowingInternals;
-  private final TimerManager timerManager;
-  private final ActiveWindowSet<W> activeWindows;
-  private final StateInternals stateInternals;
-
-  @VisibleForTesting final StateTag<? extends MergeableState<InputT, OutputT>> buffer;
-  private final WatermarkHold<W> watermarkHolder;
-
-  private final K key;
-
-  private final Aggregator<Long, Long> droppedDueToClosedWindow;
-  private final Aggregator<Long, Long> droppedDueToLateness;
-
-  TriggerExecutor(K key,
-      WindowFn<Object, W> windowFn,
-      TimerManager timerManager,
-      ExecutableTrigger<W> rootTrigger,
-      WindowingInternals<?, KV<K, OutputT>> windowingInternals,
-      AccumulationMode mode,
-      Duration allowedLateness,
-      ActiveWindowSet<W> activeWindows,
-      StateTag<? extends MergeableState<InputT, OutputT>> outputBuffer,
-      Aggregator<Long, Long> droppedDueToClosedWindow,
-      Aggregator<Long, Long> droppedDueToLateness) {
-    this.key = key;
-    this.windowFn = windowFn;
-    this.rootTrigger = rootTrigger;
-    this.windowingInternals = windowingInternals;
-    this.allowedLateness = allowedLateness;
-    this.activeWindows = activeWindows;
-    this.buffer = outputBuffer;
-    this.droppedDueToClosedWindow = droppedDueToClosedWindow;
-    this.droppedDueToLateness = droppedDueToLateness;
-    this.watermarkHolder = new WatermarkHold<W>(allowedLateness);
-    this.timerManager = timerManager;
-    this.mode = mode;
-    this.stateInternals = windowingInternals.stateInternals();
-  }
-
-  private boolean isRootFinished(BitSet bitSet) {
-    return bitSet.get(0);
-  }
-
-  public static <T> StateTag<? extends MergeableState<T, Iterable<T>>> listBuffer(Coder<T> coder) {
-    return StateTags.<T>bag(BUFFER_NAME, coder);
-  }
-
-  public static <K, InputT, OutputT> StateTag<? extends MergeableState<InputT, OutputT>>
-  combiningBuffer(K key, Coder<K> keyCoder, Coder<InputT> inputCoder,
-      KeyedCombineFn<K, InputT, ?, OutputT> combineFn) {
-    return StateTags.<InputT, OutputT>combiningValue(
-        BUFFER_NAME, inputCoder, combineFn.forKey(key, keyCoder));
-  }
-
-  public static <K, InputT, OutputT, W extends BoundedWindow>
-  TriggerExecutor<K, InputT, OutputT, W> create(
-      K key,
-      WindowingStrategy<Object, W> windowingStrategy,
-      TimerManager timerManager,
-      StateTag<? extends MergeableState<InputT, OutputT>> outputBuffer,
-      WindowingInternals<?, KV<K, OutputT>> windowingInternals,
-      Aggregator<Long, Long> droppedDueToClosedWindow,
-      Aggregator<Long, Long> droppedDueToLateness) throws Exception {
-    ActiveWindowSet<W> activeWindows = windowingStrategy.getWindowFn().isNonMerging()
-        ? new NonMergingActiveWindowSet<W>()
-        : new MergingActiveWindowSet<W>(
-            windowingStrategy.getWindowFn(), windowingInternals.stateInternals());
-    return new TriggerExecutor<K, InputT, OutputT, W>(key,
-        windowingStrategy.getWindowFn(), timerManager, windowingStrategy.getTrigger(),
-        windowingInternals, windowingStrategy.getMode(),
-        windowingStrategy.getAllowedLateness(), activeWindows, outputBuffer,
-        droppedDueToClosedWindow, droppedDueToLateness);
-  }
-
-  private Context context(W window) {
-    return new Context(window, StateNamespaces.window(windowFn.windowCoder(), window));
-  }
-
-  @VisibleForTesting BitSet lookupFinishedSet(W window) {
-    // TODO: If we know that no trigger in the tree will ever finish, we don't need to do the
-    // lookup. Right now, we special case this for the DefaultTrigger.
-    if (rootTrigger.getSpec() instanceof DefaultTrigger) {
-      return new BitSet(1);
-    }
-
-    BitSet finishedSet =
-        stateInternals.state(windowNamespace(window), FINISHED_BITS_TAG).get().read();
-    return finishedSet == null ? new BitSet(rootTrigger.getFirstIndexAfterSubtree()) : finishedSet;
-  }
-
-  /**
-   * Issue a load for all the keyed state tags that we know we need for the given windows.
-   */
-  private void warmUpCache(Iterable<W> windows) {
-    if ((rootTrigger.getSpec() instanceof DefaultTrigger)) {
-      return;
-    }
-
-    // Prepare the cache by loading keyed state for all the given windows.
-    for (W window : windows) {
-      stateInternals.state(windowNamespace(window), FINISHED_BITS_TAG).get();
-    }
-  }
-
-  private Trigger<W>.OnElementContext onElementContext(
-      BitSet finishedSet, W window, WindowedValue<InputT> value) {
-    TriggerContextImpl delegate = new TriggerContextImpl(finishedSet, rootTrigger, window);
-    return new OnElementContextImpl(delegate, value.getValue(), value.getTimestamp());
-  }
-
-  public void onElement(WindowedValue<InputT> value) throws Exception {
-    Instant minimumAllowedTimestamp = timerManager.currentWatermarkTime().minus(allowedLateness);
-    if (minimumAllowedTimestamp.isAfter(value.getTimestamp())) {
-      // We drop the element in all assigned windows if it is too late.
-      droppedDueToLateness.addValue((long) value.getWindows().size());
-      return;
-    }
-
-    @SuppressWarnings("unchecked")
-    Collection<W> windows = (Collection<W>) value.getWindows();
-
-    warmUpCache(windows);
-
-    for (W window : windows) {
-      BitSet finishedSet = lookupFinishedSet(window);
-      if (isRootFinished(finishedSet)) {
-        // If the window was finished (and closed) drop the element.
-        droppedDueToClosedWindow.addValue(1L);
-        continue;
-      }
-
-      if (activeWindows.add(window)) {
-        scheduleCleanup(window);
-      }
-
-      BitSet originalFinishedSet = (BitSet) finishedSet.clone();
-      Context context = context(window);
-
-      context.access(buffer).add(value.getValue());
-      watermarkHolder.addHold(context, value.getTimestamp(),
-          timerManager.currentWatermarkTime().isAfter(value.getTimestamp()));
-
-      // Update the trigger state as appropriate for the arrival of the element.
-      // Must come before merge so the state is updated (for merging).
-      TriggerResult result =
-          rootTrigger.invokeElement(onElementContext(finishedSet, window, value));
-
-      // Make sure we merge before firing, in case a larger window is produced
-      boolean stillExists = true;
-      if (result.isFire()) {
-        stillExists = mergeIfAppropriate(window);
-      }
-
-      // Only invoke handleResult if the window is still active after merging. If not, the
-      // merge should have taken care of any firing behaviors that needed to happen.
-      if (stillExists) {
-        handleResult(rootTrigger, window, originalFinishedSet, finishedSet, result);
-      }
-    }
-  }
-
-  private void scheduleCleanup(W window) {
-    // Set the timer for final cleanup. We add an extra millisecond since
-    // maxTimestamp will be the maximum timestamp in the window, and we
-    // want the maximum timestamp of an element outside the window.
-    Instant cleanupTime = window.maxTimestamp()
-        .plus(allowedLateness)
-        .plus(Duration.millis(1));
-    timerManager.setTimer(windowNamespace(window), cleanupTime, TimeDomain.EVENT_TIME);
-  }
-
-  public void onTimer(StateNamespace timerTag) throws Exception {
-    if (timerTag instanceof WindowNamespace) {
-      // The only timer set in the window namespace is the GC timer. Do garbage collection.
-      @SuppressWarnings("unchecked")
-      W window = ((WindowNamespace<W>) timerTag).getWindow();
-
-      BitSet finishedSet = lookupFinishedSet(window);
-      Context context = context(window);
-
-      // TODO: Create appropriate Pane here.
-      PaneInfo pane = PaneInfo.createPaneInternal();
-      if (mergeIfAppropriate(window)) {
-        emitWindow(context, pane);
-        context.accessAcrossSources(buffer).clear();
-      }
-
-      // Perform final cleanup.
-      activeWindows.remove(window);
-      rootTrigger.invokeClear(new TriggerContextImpl(finishedSet, rootTrigger, window));
-      stateInternals.state(windowNamespace(window), FINISHED_BITS_TAG).clear();
-    } else if (timerTag instanceof WindowAndTriggerNamespace) {
-      @SuppressWarnings("unchecked")
-      W window = ((WindowAndTriggerNamespace<W>) timerTag).getWindow();
-      int triggerIndex = ((WindowAndTriggerNamespace<?>) timerTag).getTriggerIndex();
-
-      BitSet finishedSet = lookupFinishedSet(window);
-
-      // If we receive a timer for an already finished trigger tree, we can ignore it. Once the
-      // trigger is finished, it has reached a terminal state, and the trigger shouldn't be allowed
-      // to do anything.
-      if (isRootFinished(finishedSet)) {
-        // TODO: Add logging for this case since it means we failed to clean up the timer.
-        return;
-      }
-
-      // Attempt to merge windows before continuing; that may remove the current window from
-      // consideration.
-      if (mergeIfAppropriate(window)) {
-        BitSet originalFinishedSet = (BitSet) finishedSet.clone();
-        TriggerResult result =
-            rootTrigger.invokeTimer(onTimerContext(finishedSet, window, triggerIndex));
-        handleResult(rootTrigger, window, originalFinishedSet, finishedSet, result);
-      }
-    } else {
-      throw new IllegalArgumentException("Unexpected timer tag: " + timerTag);
-    }
-  }
-
-  private Trigger<W>.OnTimerContext onTimerContext(
-      BitSet finishedSet, W window, int triggerId) {
-    TriggerContextImpl delegate = new TriggerContextImpl(finishedSet, rootTrigger, window);
-    return new OnTimerContextImpl(delegate, triggerId);
-  }
-
-  private OnMergeContextImpl createMergeEvent(
-      TriggerContextImpl context, Collection<W> toBeMerged, W resultWindow) {
-    warmUpCache(
-        toBeMerged.contains(resultWindow)
-        ? toBeMerged
-        : ImmutableSet.<W>builder().addAll(toBeMerged).add(resultWindow).build());
-    ImmutableMap.Builder<W, BitSet> finishedSets = ImmutableMap.builder();
-    for (W window : toBeMerged) {
-      finishedSets.put(window, lookupFinishedSet(window));
-    }
-
-    return new OnMergeContextImpl(context, toBeMerged, finishedSets.build());
-  }
-
-  public void persist() {
-    activeWindows.persist();
-  }
-
-  private void onMerge(Collection<W> toBeMerged, W resultWindow) throws Exception {
-    BitSet originalFinishedSet = lookupFinishedSet(resultWindow);
-    BitSet finishedSet = (BitSet) originalFinishedSet.clone();
-
-    OnMergeContextImpl mergeContext = createMergeEvent(
-        new TriggerContextImpl(finishedSet, rootTrigger, resultWindow), toBeMerged, resultWindow);
-    MergeResult result = rootTrigger.invokeMerge(mergeContext);
-    if (MergeResult.ALREADY_FINISHED.equals(result)) {
-      throw new IllegalStateException("Root trigger returned MergeResult.ALREADY_FINISHED.");
-    }
-
-    // Commit the updated states
-    handleResult(
-        rootTrigger, resultWindow, originalFinishedSet, finishedSet, result.getTriggerResult());
-
-    // Before we finish, we can clean up the state associated with the trigger in the old windows
-    for (W windowBeingMerged : toBeMerged) {
-      if (!resultWindow.equals(windowBeingMerged)) {
-        rootTrigger.invokeClear(new TriggerContextImpl(
-            lookupFinishedSet(windowBeingMerged), rootTrigger, windowBeingMerged));
-        StateNamespace namespaceBeingMerged = windowNamespace(windowBeingMerged);
-        stateInternals.state(namespaceBeingMerged, FINISHED_BITS_TAG).clear();
-        timerManager.deleteTimer(namespaceBeingMerged, TimeDomain.EVENT_TIME);
-      }
-    }
-  }
-
-  /**
-   * Invoke merge if the windowFn supports it, and return a boolean indicating whether the window
-   * still exists.
-   */
-  private boolean mergeIfAppropriate(W window) throws Exception {
-    return activeWindows.mergeIfAppropriate(window, new MergeCallback<W>() {
-      @Override
-      public void onMerge(
-          Collection<W> mergedWindows, W resultWindow, boolean isResultNew) throws Exception {
-        TriggerExecutor.this.onMerge(mergedWindows, resultWindow);
-
-        if (isResultNew) {
-          scheduleCleanup(resultWindow);
-        }
-      }
-    });
-  }
-
-  public void merge() throws Exception {
-    mergeIfAppropriate(null);
-  }
-
-  private void handleResult(
-      ExecutableTrigger<W> trigger, W window,
-      BitSet originalFinishedSet, BitSet finishedSet, TriggerResult result) throws Exception {
-    Context context = context(window);
-    if (result.isFire()) {
-      // TODO: Obtain pain from ExecutableTrigger or TriggerResult.
-      PaneInfo pane = PaneInfo.createPaneInternal();
-      emitWindow(context, pane);
-    }
-
-    if (result.isFinish()
-        || (mode == AccumulationMode.DISCARDING_FIRED_PANES && result.isFire())) {
-      context.accessAcrossSources(buffer).clear();
-
-      // Remove the window from management (assume it is "done")
-      activeWindows.remove(window);
-    }
-
-    // If the trigger is finished, we can clear out its state as long as we keep the
-    // IS_ROOT_FINISHED bit.
-    if (result.isFinish()) {
-      trigger.invokeClear(new TriggerContextImpl(finishedSet, rootTrigger, window));
-    }
-
-    if (!finishedSet.equals(originalFinishedSet)) {
-      context.access(FINISHED_BITS_TAG).set(finishedSet);
-    }
-  }
-
-  private void emitWindow(Context context, PaneInfo pane) throws Exception {
-    StateContents<OutputT> finalValue = context.accessAcrossSources(buffer).get();
-    Instant timestamp = watermarkHolder.extractAndRelease(context);
-
-    // If there were any contents to output in the window, do so.
-    if (timestamp != null) {
-      // Emit the (current) final values for the window
-      KV<K, OutputT> value = KV.of(key, finalValue.read());
-
-      // Output the windowed value.
-      windowingInternals.outputWindowedValue(
-          value, timestamp, Arrays.asList(context.window()), pane);
-    }
-  }
-
-  private StateNamespace windowNamespace(W window) {
-    return StateNamespaces.window(windowFn.windowCoder(), window);
-  }
-
-  /**
-   * Context for general trigger execution, without a specific trigger selected.
-   */
-  class Context {
-    private final W window;
-    private final StateNamespace namespace;
-
-    private Context(W window, StateNamespace namespace) {
-      this.window = window;
-      this.namespace = namespace;
-    }
-
-    public W window() {
-      return window;
-    }
-
-    public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
-      return stateInternals.state(namespace, address);
-    }
-
-    public <StorageT extends MergeableState<?, ?>> StorageT accessAcrossSources(
-        StateTag<StorageT> address) {
-      List<StateNamespace> sourceNamespaces = new ArrayList<>();
-      for (W sourceWindow : activeWindows.sourceWindows(window)) {
-        sourceNamespaces.add(windowNamespace(sourceWindow));
-      }
-
-      return stateInternals.mergedState(sourceNamespaces, namespace, address);
-    }
-  }
-
-  private class TriggerContextImpl extends Trigger<W>.TriggerContext {
-
-    private final BitSet finishedSet;
-    private final ExecutableTrigger<W> trigger;
-    private final W window;
-    private final StateNamespace namespace;
-
-    private TriggerContextImpl(
-        BitSet finishedSet, ExecutableTrigger<W> trigger, W window) {
-      trigger.getSpec().super();
-      this.finishedSet = finishedSet;
-      this.trigger = trigger;
-      this.window = window;
-      this.namespace = StateNamespaces.windowAndTrigger(
-          windowFn.windowCoder(), window, trigger.getTriggerIndex());
-    }
-
-    @Override
-    public void setTimer(Instant timestamp, TimeDomain domain) throws IOException {
-      timerManager.setTimer(namespace, timestamp, domain);
-    }
-
-    @Override
-    public void deleteTimer(TimeDomain domain) throws IOException {
-      timerManager.deleteTimer(namespace, domain);
-    }
-
-    @Override
-    public Instant currentProcessingTime() {
-      return timerManager.currentProcessingTime();
-    }
-
-    @Override
-    public TriggerContextImpl forTrigger(ExecutableTrigger<W> trigger) {
-      return new TriggerContextImpl(finishedSet, trigger, window);
-    }
-
-    @Override
-    public Iterable<ExecutableTrigger<W>> subTriggers() {
-      return trigger.subTriggers();
-    }
-
-    @Override
-    public ExecutableTrigger<W> subTrigger(int subtriggerIndex) {
-      return trigger.subTriggers().get(subtriggerIndex);
-    }
-
-    @Override
-    public boolean isFinished() {
-      return finishedSet.get(trigger.getTriggerIndex());
-    }
-
-    @Override
-    public boolean areAllSubtriggersFinished() {
-      return Iterables.isEmpty(unfinishedSubTriggers());
-    }
-
-    @Override
-    public Iterable<ExecutableTrigger<W>> unfinishedSubTriggers() {
-      return FluentIterable
-          .from(trigger.subTriggers())
-          .filter(new Predicate<ExecutableTrigger<W>>() {
-            @Override
-            public boolean apply(ExecutableTrigger<W> input) {
-              return !finishedSet.get(input.getTriggerIndex());
-            }
-          });
-    }
-
-    @Override
-    public ExecutableTrigger<W> firstUnfinishedSubTrigger() {
-      for (ExecutableTrigger<W> subTrigger : trigger.subTriggers()) {
-        if (!finishedSet.get(subTrigger.getTriggerIndex())) {
-          return subTrigger;
-        }
-      }
-      return null;
-    }
-
-    @Override
-    public void resetTree() throws Exception {
-      finishedSet.clear(trigger.getTriggerIndex(), trigger.getFirstIndexAfterSubtree());
-      trigger.invokeClear(this);
-    }
-
-    @Override
-    public void setFinished(boolean finished) {
-      finishedSet.set(trigger.getTriggerIndex(), finished);
-    }
-
-    @Override
-    public W window() {
-      return window;
-    }
-
-    @Override
-    public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
-      return stateInternals.state(namespace, address);
-    }
-
-    @Override
-    public <StorageT extends MergeableState<?, ?>> StorageT accessAcrossMergedWindows(
-        StateTag<StorageT> address) {
-      List<StateNamespace> sourceNamespaces = new ArrayList<>();
-      for (W sourceWindow : activeWindows.sourceWindows(window)) {
-        sourceNamespaces.add(StateNamespaces.windowAndTrigger(
-            windowFn.windowCoder(), sourceWindow, trigger.getTriggerIndex()));
-      }
-
-      return stateInternals.mergedState(sourceNamespaces, namespace, address);
-    }
-  }
-
-  private class OnElementContextImpl extends Trigger<W>.OnElementContext {
-    private final TriggerContextImpl delegate;
-
-    public OnElementContextImpl(
-        TriggerContextImpl delegate, Object value, Instant timestamp) {
-      delegate.trigger.getSpec().super(value, timestamp);
-      this.delegate = delegate;
-    }
-
-    @Override
-    public Trigger<W>.OnElementContext forTrigger(ExecutableTrigger<W> trigger) {
-      return new OnElementContextImpl(delegate.forTrigger(trigger), element(), eventTimestamp());
-    }
-
-    @Override
-    public void setTimer(Instant timestamp, TimeDomain timeDomain) throws IOException {
-      delegate.setTimer(timestamp, timeDomain);
-    }
-
-    @Override
-    public void deleteTimer(TimeDomain timeDomain) throws IOException {
-      delegate.deleteTimer(timeDomain);
-    }
-
-    @Override
-    public Instant currentProcessingTime() {
-      return delegate.currentProcessingTime();
-    }
-
-    @Override
-    public Iterable<ExecutableTrigger<W>> subTriggers() {
-      return delegate.subTriggers();
-    }
-
-    @Override
-    public ExecutableTrigger<W> subTrigger(int subtriggerIndex) {
-      return delegate.subTrigger(subtriggerIndex);
-    }
-
-    @Override
-    public boolean isFinished() {
-      return delegate.isFinished();
-    }
-
-    @Override
-    public boolean areAllSubtriggersFinished() {
-      return delegate.areAllSubtriggersFinished();
-    }
-
-    @Override
-    public Iterable<ExecutableTrigger<W>> unfinishedSubTriggers() {
-      return delegate.unfinishedSubTriggers();
-    }
-
-    @Override
-    public ExecutableTrigger<W> firstUnfinishedSubTrigger() {
-      return delegate.firstUnfinishedSubTrigger();
-    }
-
-    @Override
-    public void resetTree() throws Exception {
-      delegate.resetTree();
-    }
-
-    @Override
-    public void setFinished(boolean finished) {
-      delegate.setFinished(finished);
-    }
-
-    @Override
-    public W window() {
-      return delegate.window();
-    }
-
-    @Override
-    public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
-      return delegate.access(address);
-    }
-
-    @Override
-    public <StorageT extends MergeableState<?, ?>> StorageT accessAcrossMergedWindows(
-        StateTag<StorageT> address) {
-      return delegate.accessAcrossMergedWindows(address);
-    }
-  }
-
-  private class OnMergeContextImpl extends Trigger<W>.OnMergeContext {
-
-    private final TriggerContextImpl delegate;
-
-    public OnMergeContextImpl(
-        TriggerContextImpl delegate,
-        Iterable<W> oldWindows, Map<W, BitSet> finishedSets) {
-      delegate.trigger.getSpec().super(oldWindows, finishedSets);
-      this.delegate = delegate;
-    }
-
-    @Override
-    public Trigger<W>.OnMergeContext forTrigger(ExecutableTrigger<W> trigger) {
-      return new OnMergeContextImpl(delegate.forTrigger(trigger), oldWindows(), finishedSets);
-    }
-
-    @Override
-    public void setTimer(Instant timestamp, TimeDomain timeDomain) throws IOException {
-      delegate.setTimer(timestamp, timeDomain);
-    }
-
-    @Override
-    public void deleteTimer(TimeDomain timeDomain) throws IOException {
-      delegate.deleteTimer(timeDomain);
-    }
-
-    @Override
-    public Instant currentProcessingTime() {
-      return delegate.currentProcessingTime();
-    }
-
-    @Override
-    public Iterable<ExecutableTrigger<W>> subTriggers() {
-      return delegate.subTriggers();
-    }
-
-    @Override
-    public ExecutableTrigger<W> subTrigger(int subtriggerIndex) {
-      return delegate.subTrigger(subtriggerIndex);
-    }
-
-    @Override
-    public boolean isFinished() {
-      return delegate.isFinished();
-    }
-
-    @Override
-    public boolean areAllSubtriggersFinished() {
-      return delegate.areAllSubtriggersFinished();
-    }
-
-    @Override
-    public Iterable<ExecutableTrigger<W>> unfinishedSubTriggers() {
-      return delegate.unfinishedSubTriggers();
-    }
-
-    @Override
-    public ExecutableTrigger<W> firstUnfinishedSubTrigger() {
-      return delegate.firstUnfinishedSubTrigger();
-    }
-
-    @Override
-    public void resetTree() throws Exception {
-      delegate.resetTree();
-    }
-
-    @Override
-    public void setFinished(boolean finished) {
-      delegate.setFinished(finished);
-    }
-
-    @Override
-    public W window() {
-      return delegate.window();
-    }
-
-    @Override
-    public boolean finishedInAnyMergingWindow() {
-      for (BitSet bitSet : finishedSets.values()) {
-        if (bitSet.get(delegate.trigger.getTriggerIndex())) {
-          return true;
-        }
-      }
-      return false;
-    }
-
-    @Override
-    public Iterable<W> getFinishedMergingWindows() {
-      return Maps.filterValues(finishedSets, new Predicate<BitSet>() {
-        @Override
-        public boolean apply(BitSet input) {
-          return input.get(delegate.trigger.getTriggerIndex());
-        }
-      }).keySet();
-    }
-
-    @Override
-    public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
-      return delegate.access(address);
-    }
-
-    @Override
-    public <StorageT extends MergeableState<?, ?>> StorageT accessAcrossMergedWindows(
-        StateTag<StorageT> address) {
-      return delegate.accessAcrossMergedWindows(address);
-    }
-
-    @Override
-    public <StorageT extends MergeableState<?, ?>> StorageT accessAcrossMergingWindows(
-        StateTag<StorageT> address) {
-      List<StateNamespace> mergingNamespaces = new ArrayList<>();
-      for (W oldWindow : oldWindows()) {
-        mergingNamespaces.add(StateNamespaces.windowAndTrigger(
-            windowFn.windowCoder(), oldWindow, delegate.trigger.getTriggerIndex()));
-      }
-
-      return stateInternals.mergedState(mergingNamespaces, delegate.namespace, address);
-    }
-  }
-
-  private class OnTimerContextImpl extends Trigger<W>.OnTimerContext {
-
-    private final TriggerContextImpl delegate;
-
-    public OnTimerContextImpl(TriggerContextImpl delegate, int triggerId) {
-      delegate.trigger.getSpec().super(triggerId);
-      this.delegate = delegate;
-    }
-
-    @Override
-    public Trigger<W>.OnTimerContext forTrigger(ExecutableTrigger<W> trigger) {
-      return new OnTimerContextImpl(delegate.forTrigger(trigger), destinationIndex);
-    }
-
-    @Override
-    public void setTimer(Instant timestamp, TimeDomain timeDomain) throws IOException {
-      delegate.setTimer(timestamp, timeDomain);
-    }
-
-    @Override
-    public void deleteTimer(TimeDomain timeDomain) throws IOException {
-      delegate.deleteTimer(timeDomain);
-    }
-
-    @Override
-    public Instant currentProcessingTime() {
-      return delegate.currentProcessingTime();
-    }
-
-    @Override
-    public Iterable<ExecutableTrigger<W>> subTriggers() {
-      return delegate.subTriggers();
-    }
-
-    @Override
-    public ExecutableTrigger<W> subTrigger(int subtriggerIndex) {
-      return delegate.subTrigger(subtriggerIndex);
-    }
-
-    @Override
-    public boolean isDestination() {
-      return delegate.trigger.getTriggerIndex() == destinationIndex;
-    }
-
-    @Override
-    public ExecutableTrigger<W> nextStepTowardsDestination() {
-      return delegate.trigger.getSubTriggerContaining(destinationIndex);
-    }
-
-    @Override
-    public boolean isFinished() {
-      return delegate.isFinished();
-    }
-
-    @Override
-    public boolean areAllSubtriggersFinished() {
-      return delegate.areAllSubtriggersFinished();
-    }
-
-    @Override
-    public Iterable<ExecutableTrigger<W>> unfinishedSubTriggers() {
-      return delegate.unfinishedSubTriggers();
-    }
-
-    @Override
-    public ExecutableTrigger<W> firstUnfinishedSubTrigger() {
-      return delegate.firstUnfinishedSubTrigger();
-    }
-
-    @Override
-    public void resetTree() throws Exception {
-      delegate.resetTree();
-    }
-
-    @Override
-    public void setFinished(boolean finished) {
-      delegate.setFinished(finished);
-    }
-
-    @Override
-    public W window() {
-      return delegate.window();
-    }
-
-    @Override
-    public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
-      return delegate.access(address);
-    }
-
-    @Override
-    public <StorageT extends MergeableState<?, ?>> StorageT accessAcrossMergedWindows(
-        StateTag<StorageT> address) {
-      return delegate.accessAcrossMergedWindows(address);
-    }
-  }
-
-  /**
-   * Coder for the BitSet used to track child-trigger finished states.
-   */
-  protected static class BitSetCoder extends AtomicCoder<BitSet> {
-
-    private static final BitSetCoder INSTANCE = new BitSetCoder();
-    private static final long serialVersionUID = 1L;
-
-    private transient ByteArrayCoder byteArrayCoder = ByteArrayCoder.of();
-
-    private BitSetCoder() {}
-
-    public static BitSetCoder of() {
-      return INSTANCE;
-    }
-
-    @Override
-    public void encode(BitSet value, OutputStream outStream, Context context)
-        throws CoderException, IOException {
-      byteArrayCoder.encodeAndOwn(value.toByteArray(), outStream, context);
-    }
-
-    @Override
-    public BitSet decode(InputStream inStream, Context context)
-        throws CoderException, IOException {
-      return BitSet.valueOf(byteArrayCoder.decode(inStream, context));
-    }
-
-    @Override
-    public void verifyDeterministic() throws NonDeterministicException {
-      verifyDeterministic(
-          "SubTriggerExecutor.BitSetCoder requires its byteArrayCoder to be deterministic.",
-          byteArrayCoder);
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
new file mode 100644
index 0000000000000..8ea912c688c08
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.cloud.dataflow.sdk.util.state.StateTags;
+import com.google.cloud.dataflow.sdk.util.state.ValueState;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableMap;
+
+import java.util.BitSet;
+import java.util.Map;
+
+/**
+ * Executes a trigger within the context provided by {@link ReduceFnRunner}.
+ *
+ * <p> This is responsible for:
+ *
+ * <ul>
+ * <li> Tracking the finished bits for the trigger tree, included whether the root trigger
+ * has finished.
+ * <li> Ensuring that the timer and state associated with each trigger node is separate.
+ * </ul>
+ *
+ * @param <W> The kind of windows being processed.
+ */
+public class TriggerRunner<W extends BoundedWindow> {
+
+  /**
+   * Result of trigger execution.
+   *
+   * <p> This includes the actual {@link TriggerResult} as well as an updated set of finished bits.
+   * The bits should typically be committed, but if the trigger fired we want to merge and apply
+   * the merging logic on the old finished bits, hence the need to delay committing these results.
+   */
+  public static class Result {
+
+    private final TriggerResult result;
+    private final boolean isFinishedSetUsed;
+    private final BitSet modifiedFinishedSet;
+
+    private Result(
+        TriggerResult result,
+        boolean isFinishedSetUsed,
+        BitSet modifiedFinishedSet) {
+      this.result = result;
+      this.isFinishedSetUsed = isFinishedSetUsed;
+      this.modifiedFinishedSet = modifiedFinishedSet;
+    }
+
+    public boolean isFire() {
+      return result.isFire();
+    }
+
+    public boolean isFinish() {
+      return result.isFinish();
+    }
+
+    public void persistFinishedSet(ReduceFn.StateContext state) {
+      if (!isFinishedSetUsed) {
+        return;
+      }
+
+      ValueState<BitSet> finishedSet = state.access(FINISHED_BITS_TAG);
+      if (!finishedSet.get().equals(modifiedFinishedSet)) {
+        if (modifiedFinishedSet.isEmpty()) {
+          finishedSet.clear();
+        } else {
+          finishedSet.set(modifiedFinishedSet);
+        }
+      }
+    }
+  }
+
+  @VisibleForTesting static final StateTag<ValueState<BitSet>> FINISHED_BITS_TAG =
+      StateTags.value("__finished_set", BitSetCoder.of());
+
+  private final ExecutableTrigger<W> rootTrigger;
+  private final TriggerContextFactory<W> contextFactory;
+
+  public TriggerRunner(ExecutableTrigger<W> rootTrigger, TriggerContextFactory<W> contextFactory) {
+    Preconditions.checkState(rootTrigger.getTriggerIndex() == 0);
+    this.rootTrigger = rootTrigger;
+    this.contextFactory = contextFactory;
+  }
+
+  private BitSet readFinishedBits(ValueState<BitSet> state) {
+    if (!isFinishedSetNeeded()) {
+      // If no trigger in the tree will ever have finished bits, then we don't need to read them.
+      // So that the code can be agnostic to that fact, we create a BitSet that is all 0 (not
+      // finished) for each trigger in the tree.
+      return new BitSet(rootTrigger.getFirstIndexAfterSubtree());
+    }
+
+    BitSet bitSet = state.get().read();
+    return bitSet == null ? new BitSet(rootTrigger.getFirstIndexAfterSubtree()) : bitSet;
+  }
+
+  /** Return true if the trigger is closed in the window corresponding to the specified state. */
+  public boolean isClosed(ReduceFn.StateContext state) {
+    return readFinishedBits(state.access(FINISHED_BITS_TAG)).get(0);
+  }
+
+  /**
+   * Run the trigger logic to deal with a new value.
+   */
+  public Result processValue(ReduceFn<?, ?, ?, W>.ProcessValueContext c) throws Exception {
+    // Clone so that we can detect changes and so that changes here don't pollute merging.
+    BitSet finishedSet = (BitSet) readFinishedBits(c.state().access(FINISHED_BITS_TAG)).clone();
+    Trigger<W>.OnElementContext triggerContext = contextFactory.create(c, rootTrigger, finishedSet);
+    TriggerResult result = rootTrigger.invokeElement(triggerContext);
+    return new Result(result, isFinishedSetNeeded(), finishedSet);
+  }
+
+  /**
+   * Run the trigger merging logic as part of executing the specified merge.
+   */
+  public Result onMerge(ReduceFn<?, ?, ?, W>.OnMergeContext c) throws Exception {
+    // Clone so that we can detect changes and so that changes here don't pollute merging.
+    BitSet finishedSet = (BitSet) readFinishedBits(c.state().access(FINISHED_BITS_TAG)).clone();
+
+    // And read the finished bits in each merging window.
+    ImmutableMap.Builder<W, BitSet> mergingFinishedSets = ImmutableMap.builder();
+    Map<BoundedWindow, ValueState<BitSet>> mergingFinishedSetState =
+        c.state().accessInEachMergingWindow(FINISHED_BITS_TAG);
+    for (W window : c.mergingWindows()) {
+      // Don't need to clone these, since the trigger context doesn't allow modification
+      mergingFinishedSets.put(window,
+          readFinishedBits(mergingFinishedSetState.get(window)));
+    }
+
+    Trigger<W>.OnMergeContext mergeContext =
+        contextFactory.create(c, rootTrigger, finishedSet, mergingFinishedSets.build());
+
+    // Run the merge from the trigger
+    MergeResult result = rootTrigger.invokeMerge(mergeContext);
+    if (MergeResult.ALREADY_FINISHED.equals(result)) {
+      throw new IllegalStateException("Root trigger returned MergeResult.ALREADY_FINISHED.");
+    }
+
+    return new Result(result.getTriggerResult(), isFinishedSetNeeded(), finishedSet);
+  }
+
+  /**
+   * Run the trigger logic appropriate for receiving a timer with the specified destination ID.
+   */
+  public Result onTimer(ReduceFn<?, ?, ?, W>.Context c, int destinationId) throws Exception {
+    // Clone so that we can detect changes and so that changes here don't pollute merging.
+    BitSet finishedSet = (BitSet) readFinishedBits(c.state().access(FINISHED_BITS_TAG)).clone();
+    Trigger<W>.OnTimerContext triggerContext =
+        contextFactory.create(c, rootTrigger, finishedSet, destinationId);
+    TriggerResult result = rootTrigger.invokeTimer(triggerContext);
+    return new Result(result, isFinishedSetNeeded(), finishedSet);
+  }
+
+  /**
+   * Clear the state used for executing triggers, but leave the finished set to indicate
+   * the window is closed.
+   */
+  public void clearState(ReduceFn<?, ?, ?, W>.Context c) throws Exception {
+    // Don't need to clone, because we'll be clearing the finished bits anyways.
+    BitSet finishedSet = readFinishedBits(c.state().access(FINISHED_BITS_TAG));
+    rootTrigger.invokeClear(contextFactory.base(c, rootTrigger, finishedSet));
+  }
+
+  /**
+   * Clear all the state for executing triggers, including the finished bits. This should only be
+   * called after the allowed lateness has elapsed, so that the window will never be recreated.
+   */
+  public void clearEverything(ReduceFn<?, ?, ?, W>.Context c) throws Exception {
+    clearState(c);
+    c.state().access(FINISHED_BITS_TAG).clear();
+  }
+
+  public void prefetchForValue(ReduceFn.StateContext state) {
+    if (isFinishedSetNeeded()) {
+      state.access(FINISHED_BITS_TAG).get();
+    }
+  }
+
+  public void prefetchForMerge(ReduceFn.MergingStateContext state) {
+    if (isFinishedSetNeeded()) {
+      for (ValueState<?> value : state.accessInEachMergingWindow(FINISHED_BITS_TAG).values()) {
+        value.get();
+      }
+    }
+  }
+
+  private boolean isFinishedSetNeeded() {
+    // TODO: If we know that no trigger in the tree will ever finish, we don't need to do the
+    // lookup. Right now, we special case this for the DefaultTrigger.
+    return !(rootTrigger.getSpec() instanceof DefaultTrigger);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
index f1dd546f10e30..57d4288ce7d3c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
@@ -16,6 +16,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
@@ -45,28 +46,36 @@ public WatermarkHold(Duration allowedLateness) {
     this.allowedLateness = allowedLateness;
   }
 
-  public void addHold(TriggerExecutor<?, ?, ?, W>.Context c, Instant timestamp, boolean isLate) {
-    // If the element was late, then we want to put a hold in at the maxTimestamp for the end
-    // of the window plus the allowed lateness to ensure that we don't output something
-    // that is dropably late.
+  /**
+   * Update the watermark hold to include the timestamp of the value in {@code c}.
+   *
+   * <p>If the value was late, then we hold to timestamp of the end of the window plus the
+   * allowed lateness to ensure that we don't output something that is dropably late.
+   */
+  public void addHold(ReduceFn<?, ?, ?, W>.ProcessValueContext c, boolean isLate) {
     Instant holdTo = isLate
         ? c.window().maxTimestamp().plus(allowedLateness)
-        : timestamp;
-    c.access(HOLD_TAG).add(holdTo);
+        : c.timestamp();
+    c.state().access(HOLD_TAG).add(holdTo);
   }
 
   /**
    * Get the timestamp to use for output. This is computed as the minimum timestamp
    * of any non-late elements that arrived in the current pane.
    */
-  public Instant extractAndRelease(TriggerExecutor<?, ?, ?, W>.Context c) {
-    WatermarkStateInternal holdingBag = c.accessAcrossSources(HOLD_TAG);
-
-    Instant hold = holdingBag.get().read();
-    if (hold != null && hold.isAfter(c.window().maxTimestamp())) {
-      hold = c.window().maxTimestamp();
-    }
-    holdingBag.clear();
-    return hold;
+  public StateContents<Instant> extractAndRelease(final ReduceFn<?, ?, ?, W>.Context c) {
+    final WatermarkStateInternal holdingBag = c.state().accessAcrossMergedWindows(HOLD_TAG);
+    final StateContents<Instant> holdFuture = holdingBag.get();
+    return new StateContents<Instant>() {
+      @Override
+      public Instant read() {
+        Instant hold = holdFuture.read();
+        if (hold == null || hold.isAfter(c.window().maxTimestamp())) {
+          hold = c.window().maxTimestamp();
+        }
+        holdingBag.clear();
+        return hold;
+      }
+    };
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
index 0121132a52519..f3622224ec73b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
@@ -151,6 +151,16 @@ public void add(Instant watermarkHold) {
     public boolean isEmptyForTesting() {
        return minimumHold == null;
     }
+
+    @Override
+    public StateContents<Boolean> isEmpty() {
+      return new StateContents<Boolean>() {
+        @Override
+        public Boolean read() {
+          return minimumHold == null;
+        }
+      };
+    }
   }
 
   private final class InMemoryCombiningValue<InputT, AccumT, OutputT>
@@ -199,6 +209,16 @@ public AccumT read() {
       };
     }
 
+    @Override
+    public StateContents<Boolean> isEmpty() {
+      return new StateContents<Boolean>() {
+        @Override
+        public Boolean read() {
+          return isCleared;
+        }
+      };
+    }
+
     @Override
     public void addAccum(AccumT accum) {
       isCleared = false;
@@ -240,5 +260,15 @@ public void add(T input) {
     public boolean isEmptyForTesting() {
        return contents.isEmpty();
     }
+
+    @Override
+    public StateContents<Boolean> isEmpty() {
+      return new StateContents<Boolean>() {
+        @Override
+        public Boolean read() {
+          return contents.isEmpty();
+        }
+      };
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergeableState.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergeableState.java
index f3a18ff5ec842..ed81e9e55aa6d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergeableState.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergeableState.java
@@ -32,4 +32,9 @@ public interface MergeableState<InputT, OutputT> extends State {
    * Return the {@link StateContents} object to use for accessing the contents of the buffer.
    */
   StateContents<OutputT> get();
+
+  /**
+   * Return true if this state is empty.
+   */
+  StateContents<Boolean> isEmpty();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedBag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedBag.java
index 557b384ad3bc2..76c8c67a15f3e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedBag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedBag.java
@@ -70,4 +70,26 @@ public Iterable<T> read() {
       }
     };
   }
+
+  @Override
+  public StateContents<Boolean> isEmpty() {
+    // Initiate the get's right away
+    final List<StateContents<Boolean>> futures = new ArrayList<>(sources.size());
+    for (BagState<T> source : sources) {
+      futures.add(source.isEmpty());
+    }
+
+    // But defer the actual reads until later.
+    return new StateContents<Boolean>() {
+      @Override
+      public Boolean read() {
+        for (StateContents<Boolean> future : futures) {
+          if (!future.read()) {
+            return false;
+          }
+        }
+        return true;
+      }
+    };
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedCombiningValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedCombiningValue.java
index 454db49e0ec20..4632223ef5565 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedCombiningValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedCombiningValue.java
@@ -97,4 +97,26 @@ public AccumT read() {
       }
     };
   }
+
+  @Override
+  public StateContents<Boolean> isEmpty() {
+    // Initiate the get's right away
+    final List<StateContents<Boolean>> futures = new ArrayList<>(sources.size());
+    for (CombiningValueStateInternal<InputT, AccumT, OutputT> source : sources) {
+      futures.add(source.isEmpty());
+    }
+
+    // But defer the actual reads until later.
+    return new StateContents<Boolean>() {
+      @Override
+      public Boolean read() {
+        for (StateContents<Boolean> future : futures) {
+          if (!future.read()) {
+            return false;
+          }
+        }
+        return true;
+      }
+    };
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkBagInternal.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkBagInternal.java
index 4c872416b92d8..8ad89f8f6234b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkBagInternal.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkBagInternal.java
@@ -79,4 +79,26 @@ public Instant read() {
       }
     };
   }
+
+  @Override
+  public StateContents<Boolean> isEmpty() {
+    // Initiate the get's right away
+    final List<StateContents<Boolean>> futures = new ArrayList<>(sources.size());
+    for (WatermarkStateInternal source : sources) {
+      futures.add(source.isEmpty());
+    }
+
+    // But defer the actual reads until later.
+    return new StateContents<Boolean>() {
+      @Override
+      public Boolean read() {
+        for (StateContents<Boolean> future : futures) {
+          if (!future.read()) {
+            return false;
+          }
+        }
+        return true;
+      }
+    };
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
index 3c1e87303c1c4..938cf12c87d63 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
@@ -93,6 +93,10 @@ private WindowNamespace(Coder<W> windowCoder, W window) {
       this.window = window;
     }
 
+    public W getWindow() {
+      return window;
+    }
+
     @Override
     public String stringKey() {
       try {
@@ -102,10 +106,6 @@ public String stringKey() {
       }
     }
 
-    public W getWindow() {
-      return window;
-    }
-
     @Override
     public boolean equals(Object obj) {
       if (obj == this) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
index 4e374622a6911..d18ffff347f61 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
@@ -71,7 +71,6 @@ public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> cod
     }
   };
 
-
   private final String prefix;
   private final WindmillStateReader reader;
 
@@ -238,6 +237,30 @@ public Iterable<T> read() {
       };
     }
 
+    @Override
+    public StateContents<Boolean> isEmpty() {
+      // If we clear after calling isEmpty() but before calling read(), technically we didn't need
+      // the underlying windmill read. But, we need to register the desire now if we aren't going to
+      // clear (in order to get it added to the prefetch).
+      final Future<Iterable<T>> persistedData = cleared
+          ? Futures.<Iterable<T>>immediateFuture(Collections.<T>emptyList())
+          : reader.listFuture(stateKey, elemCoder);
+
+      return new StateContents<Boolean>() {
+        @Override
+        public Boolean read() {
+          try {
+            // We need to check cleared again, because it may have become clear in between creating
+            // the future and calling read.
+            Iterable<T> input = cleared ? Collections.<T>emptyList() : persistedData.get();
+            return Iterables.isEmpty(input) && Iterables.isEmpty(localAdditions);
+          } catch (InterruptedException | ExecutionException e) {
+            throw new RuntimeException("Unable to read state", e);
+          }
+        }
+      };
+    }
+
     @Override
     public void add(T input) {
       localAdditions.add(input);
@@ -324,6 +347,27 @@ public Instant read() {
       };
     }
 
+    @Override
+    public StateContents<Boolean> isEmpty() {
+      // If we clear after calling get() but before calling read(), technically we didn't need the
+      // underlying windmill read. But, we need to register the desire now if we aren't going to
+      // clear (in order to get it added to the prefetch).
+      final Future<Instant> persistedData = cleared
+          ? Futures.<Instant>immediateFuture(null)
+          : reader.watermarkFuture(stateKey);
+
+      return new StateContents<Boolean>() {
+        @Override
+        public Boolean read() {
+          try {
+            return localAdditions == null && (cleared || persistedData.get() == null);
+          } catch (InterruptedException | ExecutionException e) {
+            throw new RuntimeException("Unable to read state", e);
+          }
+        }
+      };
+    }
+
     @Override
     public void add(Instant watermarkHold) {
       if (localAdditions == null || watermarkHold.isBefore(localAdditions)) {
@@ -429,6 +473,19 @@ public AccumT read() {
       };
     }
 
+    @Override
+    public StateContents<Boolean> isEmpty() {
+      final StateContents<Boolean> isEmptyFuture = bag.isEmpty();
+
+      return new StateContents<Boolean>() {
+        @Override
+        public Boolean read() {
+          return !hasLocalAdditions && isEmptyFuture.read();
+        }
+      };
+    }
+
+
     @Override
     public void addAccum(AccumT accum) {
       hasLocalAdditions = true;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
index aec694dd87875..ec2c6f64211c4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
@@ -209,7 +209,7 @@ public void startBatchAndBlock() {
       throw new RuntimeException("Windmill unexpectedly returned null for request " + request);
     }
 
-    consumeResponse(response, toFetch);
+    consumeResponse(request, response, toFetch);
   }
 
   private Windmill.GetDataRequest createRequest(Iterable<StateTag> toFetch) {
@@ -239,12 +239,15 @@ private Windmill.GetDataRequest createRequest(Iterable<StateTag> toFetch) {
     return request.build();
   }
 
-  private void consumeResponse(
+  private void consumeResponse(Windmill.GetDataRequest request,
       Windmill.GetDataResponse response, Map<ByteString, StateTag> toFetch) {
     // Validate the response is for our computation/key.
-    if (response.getDataCount() != 1) {
+    if (response.getDataCount() == 0) {
       throw new RuntimeException(
-          "Expected exactly one computation in response, but was: " + response.getDataList());
+          "No computation in response to request: " + request);
+    } else if (response.getDataCount() > 1) {
+      throw new RuntimeException(
+          "Expected exactly one computation in response, but got: " + response.getDataList());
     }
 
     if (!computation.equals(response.getData(0).getComputationId())) {
@@ -252,7 +255,10 @@ private void consumeResponse(
           + " but was " + response.getData(0).getComputationId());
     }
 
-    if (response.getData(0).getDataCount() != 1) {
+    if (response.getData(0).getDataCount() == 0) {
+      throw new RuntimeException(
+          "No key in response to request: " + request);
+    } else if (response.getData(0).getDataCount() > 1) {
       throw new RuntimeException(
           "Expected exactly one key in response, but was: " + response.getData(0).getDataList());
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index 303bf58415340..200100d0605b0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -201,7 +201,6 @@ public void testAfterAllRealTriggersFixedWindow() throws Exception {
                     .plusDelayOf(Duration.millis(5)))),
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
-    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(50));
 
     tester.advanceProcessingTime(new Instant(0));
     // 6 elements -> after pane fires
@@ -231,8 +230,8 @@ public void testAfterAllRealTriggersFixedWindow() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(6, 7, 8, 9, 10), 2, 0, 50)));
 
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(50))));
-    // We're holding some finished bits, but that should be it.
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(window);
+    // We've recently fired, so there are no finished sets.
+    tester.assertHasOnlyGlobalAndFinishedSetsFor();
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
index dd76856d8e04a..8028e655e81bf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
@@ -171,13 +171,6 @@ public void testOnTimerFinishesUntil() throws Exception {
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
     tester.advanceWatermark(new Instant(13));
 
-    // These timers shouldn't do anything -- at this point we've already finished
-    tester.setTimer(firstWindow, new Instant(13), TimeDomain.EVENT_TIME, executableUntil);
-    tester.advanceWatermark(new Instant(14));
-
-    tester.setTimer(firstWindow, new Instant(14), TimeDomain.EVENT_TIME, executableUntil);
-    tester.advanceWatermark(new Instant(15));
-
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(3), 9, 0, 10)));
@@ -271,8 +264,6 @@ public void testOrFinallyRealTriggersFixedWindow() throws Exception {
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
 
-    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(50));
-
     // First, fire processing time then the 5 element
 
     tester.advanceProcessingTime(new Instant(0));
@@ -287,7 +278,7 @@ public void testOrFinallyRealTriggersFixedWindow() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(0, 1, 2, 3, 4), 0, 0, 50)));
 
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(window);
+    tester.assertHasOnlyGlobalAndFinishedSetsFor();
 
     // Then fire 6 new elements, then processing time
     tester.injectElement(6, new Instant(2));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index 01538d6f09ca6..d139b6b524e0a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -215,6 +215,7 @@ public void testWatermarkHoldAndLateData() throws Exception {
 
     // All very late -- gets dropped.
     tester.advanceWatermark(new Instant(50));
+    assertEquals(null, tester.getWatermarkHold());
     injectElement(tester, 2, TriggerResult.FIRE);
     assertEquals(null, tester.getWatermarkHold());
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index b712fb35dca90..a69c656175dac 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -37,7 +37,6 @@
 import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals;
-import com.google.cloud.dataflow.sdk.util.state.MergeableState;
 import com.google.cloud.dataflow.sdk.util.state.State;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
@@ -88,7 +87,7 @@ public class TriggerTester<InputT, OutputT, W extends BoundedWindow> {
   private Instant processingTime = BoundedWindow.TIMESTAMP_MIN_VALUE;
 
   private final BatchTimerManager timerManager = new BatchTimerManager(processingTime);
-  private final TriggerExecutor<String, InputT, OutputT, W> triggerExecutor;
+  private final ReduceFnRunner<String, InputT, OutputT, W> runner;
   private final WindowFn<Object, W> windowFn;
   private final StubContexts stubContexts;
   private final Coder<OutputT> outputCoder;
@@ -98,9 +97,9 @@ public class TriggerTester<InputT, OutputT, W extends BoundedWindow> {
   private ExecutableTrigger<W> executableTrigger;
 
   private final InMemoryLongSumAggregator droppedDueToClosedWindow =
-      new InMemoryLongSumAggregator(TriggerExecutor.DROPPED_DUE_TO_CLOSED_WINDOW);
+      new InMemoryLongSumAggregator(ReduceFnRunner.DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER);
   private final InMemoryLongSumAggregator droppedDueToLateness =
-      new InMemoryLongSumAggregator(TriggerExecutor.DROPPED_DUE_TO_LATENESS_COUNTER);
+      new InMemoryLongSumAggregator(ReduceFnRunner.DROPPED_DUE_TO_LATENESS_COUNTER);
 
   private void logInteraction(String fmt, Object... args) {
     if (logInteractions) {
@@ -119,7 +118,7 @@ public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>
 
     return new TriggerTester<Integer, Iterable<Integer>, W>(
         strategy,
-        TriggerExecutor.listBuffer(VarIntCoder.of()),
+        SystemReduceFn.<String, Integer, W>buffering(VarIntCoder.of()),
         IterableCoder.of(VarIntCoder.of()));
   }
 
@@ -137,13 +136,14 @@ TriggerTester<Integer, OutputT, W> combining(
 
     return new TriggerTester<Integer, OutputT, W>(
         strategy,
-        TriggerExecutor.combiningBuffer(KEY, StringUtf8Coder.of(), VarIntCoder.of(), combineFn),
+        SystemReduceFn.<String, Integer, OutputT, W>combining(
+            StringUtf8Coder.of(), VarIntCoder.of(), combineFn),
         outputCoder);
   }
 
   private TriggerTester(
       WindowingStrategy<?, W> wildcardStrategy,
-      StateTag<? extends MergeableState<InputT, OutputT>> buffer,
+      ReduceFn<String, InputT, OutputT, W> reduceFn,
       Coder<OutputT> outputCoder) throws Exception {
     @SuppressWarnings("unchecked")
     WindowingStrategy<Object, W> objectStrategy = (WindowingStrategy<Object, W>) wildcardStrategy;
@@ -152,9 +152,10 @@ private TriggerTester(
     this.stubContexts = new StubContexts();
     this.outputCoder = outputCoder;
     executableTrigger = wildcardStrategy.getTrigger();
-    this.triggerExecutor = TriggerExecutor.create(
-        KEY, objectStrategy, timerManager, buffer, stubContexts,
-        droppedDueToClosedWindow, droppedDueToLateness);
+
+    this.runner = new ReduceFnRunner<>(
+        KEY, objectStrategy, timerManager, stubContexts,
+        droppedDueToClosedWindow, droppedDueToLateness, reduceFn);
   }
 
   public ExecutableTrigger<W> getTrigger() {
@@ -166,12 +167,12 @@ public void logInteractions(boolean logInteractions) {
   }
 
   public boolean isMarkedFinished(W window) {
-    return triggerExecutor.lookupFinishedSet(window).get(0);
+    return runner.isFinished(window);
   }
 
   @SafeVarargs
   public final void assertHasOnlyGlobalAndFinishedSetsFor(W... expectedWindows) {
-    triggerExecutor.persist();
+    runner.persist();
 
     Set<StateNamespace> expectedWindowsSet = new HashSet<>();
     for (W expectedWindow : expectedWindows) {
@@ -189,7 +190,7 @@ public final void assertHasOnlyGlobalAndFinishedSetsFor(W... expectedWindows) {
         }
 
         actualWindows.add(namespace);
-        if (!tagsInUse.equals(Collections.singleton(TriggerExecutor.FINISHED_BITS_TAG))) {
+        if (!tagsInUse.equals(Collections.singleton(TriggerRunner.FINISHED_BITS_TAG))) {
           fail(namespace + " has unexpected states: " + tagsInUse);
         }
       } else if (namespace instanceof StateNamespaces.WindowAndTriggerNamespace) {
@@ -208,7 +209,7 @@ private StateNamespace windowNamespace(W window) {
   }
 
   public Instant getWatermarkHold() {
-    triggerExecutor.persist();
+    runner.persist();
     return stubContexts.state.minimumWatermarkHold();
   }
 
@@ -244,7 +245,7 @@ public void advanceWatermark(Instant newWatermark) throws Exception {
         watermark.getMillis(), newWatermark.getMillis());
     logInteraction("Advancing watermark to %d", newWatermark.getMillis());
     watermark = newWatermark;
-    timerManager.advanceWatermark(triggerExecutor, newWatermark);
+    timerManager.advanceWatermark(runner, newWatermark);
   }
 
   /** Advance the processing time to the specified time, firing any timers that should fire. */
@@ -255,7 +256,7 @@ public void advanceProcessingTime(
         processingTime.getMillis(), newProcessingTime.getMillis());
     logInteraction("Advancing processing time to %d", newProcessingTime.getMillis());
     processingTime = newProcessingTime;
-    timerManager.advanceProcessingTime(triggerExecutor, newProcessingTime);
+    timerManager.advanceProcessingTime(runner, newProcessingTime);
   }
 
   public void injectElement(InputT value, Instant timestamp) throws Exception {
@@ -263,11 +264,11 @@ public void injectElement(InputT value, Instant timestamp) throws Exception {
         windowFn, value, timestamp, Arrays.asList(GlobalWindow.INSTANCE)));
     logInteraction("Element %s at time %d put in windows %s",
         value, timestamp.getMillis(), windows);
-    triggerExecutor.onElement(WindowedValue.of(value, timestamp, windows, null));
+    runner.processElement(WindowedValue.of(value, timestamp, windows, null));
   }
 
   public void doMerge() throws Exception {
-    triggerExecutor.merge();
+    runner.merge();
   }
 
   public void setTimer(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
index 60a6e8e157099..43d3032b08a72 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
@@ -48,7 +48,7 @@ public class InMemoryStateInternalsTest {
   private static final StateTag<BagState<String>> STRING_BAG_ADDR =
       StateTags.bag("stringBag", StringUtf8Coder.of());
   private static final StateTag<WatermarkStateInternal> WATERMARK_BAG_ADDR =
-      StateTags.watermarkStateInternal("watermarkBag");
+      StateTags.watermarkStateInternal("watermark");
 
   InMemoryStateInternals underTest = new InMemoryStateInternals();
 
@@ -96,6 +96,19 @@ public void testBag() throws Exception {
     assertThat(underTest.state(NAMESPACE_1, STRING_BAG_ADDR), Matchers.sameInstance(value));
   }
 
+  @Test
+  public void testBagIsEmpty() throws Exception {
+    BagState<String> value = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
+
+    assertThat(value.isEmpty().read(), Matchers.is(true));
+    StateContents<Boolean> readFuture = value.isEmpty();
+    value.add("hello");
+    assertThat(readFuture.read(), Matchers.is(false));
+
+    value.clear();
+    assertThat(readFuture.read(), Matchers.is(true));
+  }
+
   @Test
   public void testMergeBagIntoSource() throws Exception {
     BagState<String> bag1 = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
@@ -164,6 +177,19 @@ public void testCombiningValue() throws Exception {
     assertThat(underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR), Matchers.sameInstance(value));
   }
 
+  @Test
+  public void testCombiningIsEmpty() throws Exception {
+    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
+
+    assertThat(value.isEmpty().read(), Matchers.is(true));
+    StateContents<Boolean> readFuture = value.isEmpty();
+    value.add(5);
+    assertThat(readFuture.read(), Matchers.is(false));
+
+    value.clear();
+    assertThat(readFuture.read(), Matchers.is(true));
+  }
+
   @Test
   public void testMergeCombiningValueIntoSource() throws Exception {
     CombiningValueState<Integer, Integer> value1 = underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
@@ -226,7 +252,7 @@ public void testMergeCombiningValueIntoNewNamespace() throws Exception {
   }
 
   @Test
-  public void testWatermarkBag() throws Exception {
+  public void testWatermarkState() throws Exception {
     WatermarkStateInternal value = underTest.state(NAMESPACE_1, WATERMARK_BAG_ADDR);
 
     // State instances are cached, but depend on the namespace.
@@ -252,6 +278,19 @@ public void testWatermarkBag() throws Exception {
     assertThat(underTest.state(NAMESPACE_1, WATERMARK_BAG_ADDR), Matchers.sameInstance(value));
   }
 
+  @Test
+  public void testWatermarkStateIsEmpty() throws Exception {
+    WatermarkStateInternal value = underTest.state(NAMESPACE_1, WATERMARK_BAG_ADDR);
+
+    assertThat(value.isEmpty().read(), Matchers.is(true));
+    StateContents<Boolean> readFuture = value.isEmpty();
+    value.add(new Instant(1000));
+    assertThat(readFuture.read(), Matchers.is(false));
+
+    value.clear();
+    assertThat(readFuture.read(), Matchers.is(true));
+  }
+
   @Test
   public void testMergeWatermarkIntoSource() throws Exception {
     WatermarkStateInternal value1 = underTest.state(NAMESPACE_1, WATERMARK_BAG_ADDR);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
index 3d2eac2088251..56af5a6321f2f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
@@ -18,6 +18,7 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.never;
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -119,6 +120,48 @@ public void testBagClearBeforeRead() throws Exception {
     Mockito.verifyZeroInteractions(mockReader);
   }
 
+  @Test
+  public void testBagIsEmptyFalse() throws Exception {
+    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
+    BagState<String> bag = underTest.state(NAMESPACE, addr);
+
+    SettableFuture<Iterable<String>> future = SettableFuture.create();
+    when(mockReader.listFuture(key(NAMESPACE, "bag"), StringUtf8Coder.of())).thenReturn(future);
+    StateContents<Boolean> result = bag.isEmpty();
+    Mockito.verify(mockReader).listFuture(key(NAMESPACE, "bag"), StringUtf8Coder.of());
+
+    waitAndSet(future, Arrays.asList("world"), 200);
+    assertThat(result.read(), Matchers.is(false));
+  }
+
+  @Test
+  public void testBagIsEmptyTrue() throws Exception {
+    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
+    BagState<String> bag = underTest.state(NAMESPACE, addr);
+
+    SettableFuture<Iterable<String>> future = SettableFuture.create();
+    when(mockReader.listFuture(key(NAMESPACE, "bag"), StringUtf8Coder.of())).thenReturn(future);
+    StateContents<Boolean> result = bag.isEmpty();
+    Mockito.verify(mockReader).listFuture(key(NAMESPACE, "bag"), StringUtf8Coder.of());
+
+    waitAndSet(future, Arrays.<String>asList(), 200);
+    assertThat(result.read(), Matchers.is(true));
+  }
+
+  @Test
+  public void testBagIsEmptyAfterClear() throws Exception {
+    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
+    BagState<String> bag = underTest.state(NAMESPACE, addr);
+
+    bag.clear();
+    StateContents<Boolean> result = bag.isEmpty();
+    Mockito.verify(mockReader, never()).listFuture(key(NAMESPACE, "bag"), StringUtf8Coder.of());
+    assertThat(result.read(), Matchers.is(true));
+
+    bag.add("hello");
+    assertThat(result.read(), Matchers.is(false));
+  }
+
   @Test
   public void testBagAddPersist() throws Exception {
     StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
@@ -228,6 +271,34 @@ public void testCombiningClearBeforeRead() throws Exception {
     Mockito.verifyZeroInteractions(mockReader);
   }
 
+  @Test
+  public void testCombiningIsEmpty() throws Exception {
+    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
+
+    SettableFuture<Iterable<int[]>> future = SettableFuture.create();
+    when(mockReader.listFuture(key(NAMESPACE, COMBINING_ADDR.getId()), accumCoder))
+        .thenReturn(future);
+    StateContents<Boolean> result = value.isEmpty();
+    Mockito.verify(mockReader).listFuture(key(NAMESPACE, COMBINING_ADDR.getId()), accumCoder);
+
+    waitAndSet(future, Arrays.asList(new int[]{29}), 200);
+    assertThat(result.read(), Matchers.is(false));
+  }
+
+  @Test
+  public void testCombiningIsEmptyAfterClear() throws Exception {
+    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
+
+    value.clear();
+    StateContents<Boolean> result = value.isEmpty();
+    Mockito.verify(mockReader, never())
+        .listFuture(key(NAMESPACE, COMBINING_ADDR.getId()), accumCoder);
+    assertThat(result.read(), Matchers.is(true));
+
+    value.add(87);
+    assertThat(result.read(), Matchers.is(false));
+  }
+
   @Test
   public void testCombiningAddPersist() throws Exception {
     CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
@@ -323,6 +394,48 @@ public void testWatermarkClearBeforeRead() throws Exception {
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
+  @Test
+  public void testWatermarkIsEmptyWindmillHasData() throws Exception {
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal("watermark");
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+
+    SettableFuture<Instant> future = SettableFuture.create();
+    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"))).thenReturn(future);
+    StateContents<Boolean> result = bag.isEmpty();
+    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"));
+
+    waitAndSet(future, new Instant(1000), 200);
+    assertThat(result.read(), Matchers.is(false));
+  }
+
+  @Test
+  public void testWatermarkIsEmpty() throws Exception {
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal("watermark");
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+
+    SettableFuture<Instant> future = SettableFuture.create();
+    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"))).thenReturn(future);
+    StateContents<Boolean> result = bag.isEmpty();
+    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"));
+
+    waitAndSet(future, null, 200);
+    assertThat(result.read(), Matchers.is(true));
+  }
+
+  @Test
+  public void testWatermarkIsEmptyAfterClear() throws Exception {
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal("watermark");
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+
+    bag.clear();
+    StateContents<Boolean> result = bag.isEmpty();
+    Mockito.verify(mockReader, never()).watermarkFuture(key(NAMESPACE, addr.getId()));
+    assertThat(result.read(), Matchers.is(true));
+
+    bag.add(new Instant(1000));
+    assertThat(result.read(), Matchers.is(false));
+  }
+
   @Test
   public void testWatermarkPersist() throws Exception {
     StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal("watermark");

From 4a673c7b9b271065d23d694ea98e05d96d6e6429 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Thu, 16 Jul 2015 21:03:45 -0700
Subject: [PATCH 0761/1541] Set BigQuery table project ID before validation.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98463135
---
 .../cloud/dataflow/sdk/io/BigQueryIO.java     | 15 +++++++++++-
 .../dataflow/BigQueryIOTranslator.java        | 18 +++++++++++++--
 .../cloud/dataflow/sdk/io/BigQueryIOTest.java | 23 +++++++++++++++++++
 3 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index a67439fd3a88f..f794b09ad3cbd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -219,6 +219,11 @@ public class BigQueryIO {
 
   private static final Pattern TABLE_SPEC = Pattern.compile(DATASET_TABLE_REGEXP);
 
+  public static final String SET_PROJECT_FROM_OPTIONS_WARNING =
+      "No project specified for BigQuery table \"%1$s.%2$s\". Assuming it is in \"%3$s\". If the"
+      + " table is in a different project please specify it as a part of the BigQuery table"
+      + " definition.";
+
   /**
    * Parse a table specification in the form
    * "[project_id]:[dataset_id].[table_id]" or "[dataset_id].[table_id]".
@@ -398,12 +403,20 @@ public void validate(PInput input) {
               + " query and a table, only one of these should be provided");
         }
 
+        BigQueryOptions bqOptions = input.getPipeline().getOptions().as(BigQueryOptions.class);
+        if (table != null && table.getProjectId() == null) {
+          // If user does not specify a project we assume the table to be located in the project
+          // that owns the Dataflow job.
+          LOG.warn(String.format(SET_PROJECT_FROM_OPTIONS_WARNING, table.getDatasetId(),
+              table.getTableId(), bqOptions.getProject()));
+          table.setProjectId(bqOptions.getProject());
+        }
+
         if (validate) {
           // Check for source table/query presence for early failure notification.
           // Note that a presence check can fail if the table or dataset are created by earlier
           // stages of the pipeline or if a query depends on earlier stages of a pipeline. For these
           // cases the withoutValidation method can be used to disable the check.
-          BigQueryOptions bqOptions = input.getPipeline().getOptions().as(BigQueryOptions.class);
           if (table != null) {
             verifyDatasetPresence(bqOptions, table);
             verifyTablePresence(bqOptions, table);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
index 3834a1e70ceae..8e34f44db4d6d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
@@ -32,6 +32,9 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.hadoop.util.ApiErrorExtractor;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import java.io.IOException;
 
 /**
@@ -39,6 +42,7 @@
  */
 public class BigQueryIOTranslator {
   private static final JsonFactory JSON_FACTORY = Transport.getJsonFactory();
+  private static final Logger LOG = LoggerFactory.getLogger(BigQueryIOTranslator.class);
 
   /**
    * Implements BigQueryIO Read translation for the Dataflow backend.
@@ -58,7 +62,12 @@ public void translate(
       } else {
         TableReference table = transform.getTable();
         if (table.getProjectId() == null) {
-          table.setProjectId(context.getPipelineOptions().getProject());
+          // If user does not specify a project we assume the table to be located in the project
+          // that owns the Dataflow job.
+          String projectIdFromOptions = context.getPipelineOptions().getProject();
+          LOG.warn(String.format(BigQueryIO.SET_PROJECT_FROM_OPTIONS_WARNING, table.getDatasetId(),
+              table.getDatasetId(), table.getTableId(), projectIdFromOptions));
+          table.setProjectId(projectIdFromOptions);
         }
 
         context.addInput(PropertyNames.BIGQUERY_TABLE, table.getTableId());
@@ -88,7 +97,12 @@ public void translate(BigQueryIO.Write.Bound transform,
 
       TableReference table = transform.getTable();
       if (table.getProjectId() == null) {
-        table.setProjectId(context.getPipelineOptions().getProject());
+        // If user does not specify a project we assume the table to be located in the project
+        // that owns the Dataflow job.
+        String projectIdFromOptions = context.getPipelineOptions().getProject();
+        LOG.warn(String.format(BigQueryIO.SET_PROJECT_FROM_OPTIONS_WARNING, table.getDatasetId(),
+            table.getTableId(), projectIdFromOptions));
+        table.setProjectId(projectIdFromOptions);
       }
 
       // Check for destination table presence and emptiness for early failure notification.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
index 4af7492dfb367..6ed9298123cdf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
@@ -157,6 +157,29 @@ public void testBuildSourceWithTableReference() {
     checkReadTableObject(bound, "foo.com:project", "somedataset", "sometable");
   }
 
+  @Test
+  public void testValidateSetsDefaultProject() {
+    BigQueryOptions options = PipelineOptionsFactory.as(BigQueryOptions.class);
+    options.setProject("someproject");
+
+    Pipeline p = Pipeline.create(options);
+
+    TableReference tableRef = new TableReference();
+    tableRef.setDatasetId("somedataset");
+    tableRef.setTableId("sometable");
+
+    thrown.expect(RuntimeException.class);
+    // Message will be one of following depending on the execution environment.
+    thrown.expectMessage(
+        Matchers.either(Matchers.containsString("Unable to confirm BigQuery dataset presence"))
+            .or(Matchers.containsString("BigQuery dataset not found for table")));
+    try {
+      p.apply(BigQueryIO.Read.named("ReadMyTable").from(tableRef));
+    } finally {
+      Assert.assertEquals("someproject", tableRef.getProjectId());
+    }
+  }
+
   @Test
   @Category(RunnableOnService.class)
   public void testBuildSourceWithoutTableOrQuery() {

From bb9d2ef235be268e53ae358c53512c95f9081819 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Thu, 16 Jul 2015 22:24:25 -0700
Subject: [PATCH 0762/1541] Rename WatermarkBag to WatermarkState

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98466658
---
 .../dataflow/sdk/util/state/InMemoryStateInternals.java     | 4 ++--
 ...rkBagInternal.java => MergedWatermarkStateInternal.java} | 4 ++--
 .../dataflow/sdk/util/state/MergingStateInternals.java      | 2 +-
 .../dataflow/sdk/util/state/WindmillStateInternals.java     | 6 +++---
 4 files changed, 8 insertions(+), 8 deletions(-)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/{MergedWatermarkBagInternal.java => MergedWatermarkStateInternal.java} (96%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
index f3622224ec73b..7fd2e2c450eb5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
@@ -61,7 +61,7 @@ CombiningValueStateInternal<InputT, AccumT, OutputT> bindCombiningValue(
 
         @Override
         public <T> WatermarkStateInternal bindWatermark(StateTag<WatermarkStateInternal> address) {
-          return new WatermarkBagInternalImplementation();
+          return new WatermarkStateInternalImplementation();
         }
       };
     }
@@ -118,7 +118,7 @@ public boolean isEmptyForTesting() {
     }
   }
 
-  private final class WatermarkBagInternalImplementation
+  private final class WatermarkStateInternalImplementation
       implements WatermarkStateInternal, InMemoryState {
 
     private Instant minimumHold = null;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkBagInternal.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkStateInternal.java
similarity index 96%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkBagInternal.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkStateInternal.java
index 8ad89f8f6234b..e2af4db164edf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkBagInternal.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkStateInternal.java
@@ -25,12 +25,12 @@
  * Implementation of {@link WatermarkStateInternal} reading from multiple sources and writing to a
  * single result.
  */
-class MergedWatermarkBagInternal implements WatermarkStateInternal {
+class MergedWatermarkStateInternal implements WatermarkStateInternal {
 
   private final Collection<WatermarkStateInternal> sources;
   private final WatermarkStateInternal result;
 
-  public MergedWatermarkBagInternal(
+  public MergedWatermarkStateInternal(
       Collection<WatermarkStateInternal> sources, WatermarkStateInternal result) {
     this.sources = sources;
     this.result = result;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
index befa13d91e621..a54b95dd8fc7c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
@@ -84,7 +84,7 @@ public <T> WatermarkStateInternal bindWatermark(
         }
         WatermarkStateInternal result = state(resultNamespace, address);
         sources.add(result);
-        return new MergedWatermarkBagInternal(sources, result);
+        return new MergedWatermarkStateInternal(sources, result);
       }
     });
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
index d18ffff347f61..cf7d3225e8ee2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
@@ -51,7 +51,7 @@ public <T> BagState<T> bindBag(StateTag<BagState<T>> address, Coder<T> elemCoder
         @Override
         public <T> WatermarkStateInternal bindWatermark(
             StateTag<WatermarkStateInternal> address) {
-          return new WindmillWatermarkBag(encodeKey(namespace, address), reader);
+          return new WindmillWatermarkState(encodeKey(namespace, address), reader);
         }
 
         @Override
@@ -300,7 +300,7 @@ public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) throws
     }
   }
 
-  private static class WindmillWatermarkBag implements WatermarkStateInternal, WindmillState {
+  private static class WindmillWatermarkState implements WatermarkStateInternal, WindmillState {
 
     private final ByteString stateKey;
     private final WindmillStateReader reader;
@@ -308,7 +308,7 @@ private static class WindmillWatermarkBag implements WatermarkStateInternal, Win
     private boolean cleared = false;
     private Instant localAdditions = null;
 
-    private WindmillWatermarkBag(ByteString stateKey, WindmillStateReader reader) {
+    private WindmillWatermarkState(ByteString stateKey, WindmillStateReader reader) {
       this.stateKey = stateKey;
       this.reader = reader;
     }

From 62040d6a6d1a9d2ade608463a3245de24361630b Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 17 Jul 2015 13:37:15 -0700
Subject: [PATCH 0763/1541] Implement PaneInfo and populate it during trigger
 execution

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98518738
---
 .../worker/WindowingWindmillReader.java       |   5 +-
 .../sdk/transforms/windowing/PaneInfo.java    | 177 +++++++++++++++---
 .../dataflow/sdk/util/AssignWindowsDoFn.java  |   4 +-
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |   8 +-
 .../GroupAlsoByWindowsAndCombineDoFn.java     |   2 +-
 .../GroupAlsoByWindowsViaIteratorsDoFn.java   |   2 +-
 .../dataflow/sdk/util/PaneInfoTracker.java    |  84 +++++++++
 .../sdk/util/ReduceFnContextFactory.java      |  11 +-
 .../dataflow/sdk/util/ReduceFnRunner.java     |  21 ++-
 .../dataflow/sdk/util/WindowedValue.java      | 165 +++++++++-------
 .../sdk/util/state/WindmillStateReader.java   |   6 +-
 .../cloud/dataflow/sdk/WindowMatchers.java    |  23 +++
 .../worker/GroupingShuffleReaderTest.java     |   5 +-
 .../worker/PartitioningShuffleReaderTest.java |  21 ++-
 .../sdk/runners/worker/ShuffleSinkTest.java   |   3 +-
 .../worker/StreamingDataflowWorkerTest.java   |  59 +++---
 .../worker/UngroupedShuffleReaderTest.java    |   3 +-
 .../sdk/testing/PCollectionViewTesting.java   |   3 +-
 .../transforms/windowing/AfterAllTest.java    |   5 +-
 .../transforms/windowing/AfterEachTest.java   |   2 +-
 .../transforms/windowing/AfterFirstTest.java  |   7 +-
 .../windowing/DefaultTriggerTest.java         |  18 +-
 .../windowing/OrFinallyTriggerTest.java       |   4 +-
 .../transforms/windowing/PaneInfoTest.java    |  70 +++++++
 .../transforms/windowing/RepeatedlyTest.java  |   6 +-
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  |  35 ++--
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  31 +--
 .../StreamingSideInputDoFnRunnerTest.java     |   3 +-
 .../sdk/util/TriggerExecutorTest.java         |  62 ++++++
 .../dataflow/sdk/util/TriggerTester.java      |  41 +++-
 .../dataflow/sdk/util/WindowedValueTest.java  |   3 +-
 31 files changed, 683 insertions(+), 206 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfoTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
index af7e49d6ffa72..b1b5c4db19d06 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
@@ -16,8 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import static com.google.cloud.dataflow.sdk.util.TimerOrElement.TimerOrElementCoder;
-
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
@@ -28,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.TimerOrElement;
+import com.google.cloud.dataflow.sdk.util.TimerOrElement.TimerOrElementCoder;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
@@ -109,7 +108,7 @@ public WindowedValue<TimerOrElement<T>> next() throws IOException {
                                                           key),
                                   new Instant(timestampMillis),
                                   new ArrayList(),
-                                  null);
+                                  PaneInfo.DEFAULT);
         } else {
           throw new RuntimeException("Timer set on non-keyed DoFn");
         }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
index ce98babd6ffcc..9c32b25375148 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
@@ -19,34 +19,170 @@
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.common.base.MoreObjects;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableMap;
 
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.util.Objects;
 
 /**
- * Provides information about the pane this value belongs to, e.g. the index
- * of the trigger fire and its timeliness.
+ * Provides information about the pane this value belongs to. Every pane is implicitly associated
+ * with a window.
+ *
+ * <p> Note: This does not uniquely identify a pane, and should not be used for comparisons.
  */
 public final class PaneInfo {
-  private PaneInfo() { }
 
-  public static final PaneInfo DEFAULT_PANE = new PaneInfo();
+  /**
+   * Enumerates the possibilities for how the timing of this pane firing related to the watermark.
+   */
+  public enum Timing {
+    /** Pane was fired before the watermark passed the end of the window. */
+    EARLY,
+    /** First pane fired after the watermark passed the end of the window. */
+    ON_TIME,
+    /** Panes fired after the {@code ON_TIME} firing. */
+    LATE,
+    /**
+     * This element was not produced in a triggered pane and its relation to the watermark is
+     * unknown.
+     */
+    UNKNOWN;
+
+    // NOTE: Do not add fields or re-order them. The ordinal is used as part of
+    // the encoding.
+  }
+
+  private static byte encodedByte(boolean isFirst, boolean isLast, Timing timing) {
+    byte result = 0x0;
+    if (isFirst) {
+      result |= 1;
+    }
+    if (isLast) {
+      result |= 2;
+    }
+    result |= timing.ordinal() << 2;
+    return result;
+  }
+
+  private static final ImmutableMap<Byte, PaneInfo> BYTE_TO_PANE_INFO;
+  static {
+    ImmutableMap.Builder<Byte, PaneInfo> decodingBuilder = ImmutableMap.builder();
+    for (Timing timing : Timing.values()) {
+      register(decodingBuilder, new PaneInfo(true, true, timing));
+      register(decodingBuilder, new PaneInfo(true, false, timing));
+      register(decodingBuilder, new PaneInfo(false, true, timing));
+      register(decodingBuilder, new PaneInfo(false, false, timing));
+    }
+    BYTE_TO_PANE_INFO = decodingBuilder.build();
+  }
+
+  private static void register(ImmutableMap.Builder<Byte, PaneInfo> builder, PaneInfo info) {
+    builder.put(info.encodedByte, info);
+  }
+
+  private final byte encodedByte;
+
+  private final boolean isFirst;
+  private final boolean isLast;
+  private final Timing timing;
+
+  /**
+   * Until an element has been assigned to a window and had triggers processed, it doesn't belong
+   * to any pane. This is the default value assigned to elements read from sources, and those that
+   * have been assigned a window but not passed through execution of any trigger.
+   */
+  public static final PaneInfo DEFAULT = PaneInfo.createPane(false, false, Timing.UNKNOWN);
+
+  /**
+   * PaneInfo to use when there will be exactly one firing and it is on time.
+   */
+  public static final PaneInfo ON_TIME_AND_ONLY_FIRING =
+      PaneInfo.createPane(true, true, Timing.ON_TIME);
+
+  private PaneInfo(boolean isFirst, boolean isLast, Timing timing) {
+    this.encodedByte = encodedByte(isFirst, isLast, timing);
+    this.isFirst = isFirst;
+    this.isLast = isLast;
+    this.timing = timing;
+  }
+
+  /**
+   * Returns true if this pane corresponds to the {@link #DEFAULT} pane.
+   */
+  public boolean isDefault() {
+    return DEFAULT.equals(this);
+  }
+
+  /**
+   * Factory method to create a {@link PaneInfo} with the specified parameters.
+   */
+  public static PaneInfo createPane(
+      boolean isFirst, boolean isLast, Timing timing) {
+    return Preconditions.checkNotNull(BYTE_TO_PANE_INFO.get(encodedByte(isFirst, isLast, timing)));
+  }
+
+  public static PaneInfo decodePane(byte encodedPane) {
+    return Preconditions.checkNotNull(BYTE_TO_PANE_INFO.get(encodedPane));
+  }
+
+  /**
+   * Return true if there is no timing information for the current {@link PaneInfo}.
+   * This typically indicates that the current element has not been assigned to
+   * windows or passed through an operation that executes triggers yet.
+   */
+  public boolean isUnknown() {
+    return Timing.UNKNOWN.equals(timing);
+  }
 
   /**
-   * Returns whether this is the pane associated with the single, final
-   * firing of the watermark trigger.
+   * Return true if this is the first pane produced for the associated window.
    */
-  private boolean isDefaultPane() {
-    return true; // The only one supported so far.
+  public boolean isFirst() {
+    return isFirst;
+  }
+
+  /**
+   * Return true if this is the last pane that will be produced in the associated window.
+   */
+  public boolean isLast() {
+    return isLast;
+  }
+
+  /**
+   * Return true if this is the last pane that will be produced in the associated window.
+   */
+  public Timing getTiming() {
+    return timing;
+  }
+
+  int getEncodedByte() {
+    return encodedByte;
+  }
+
+  @Override
+  public int hashCode() {
+    // Just hash the encoded byte, because we know that it uniquely identifies the pane.
+    return Objects.hash(encodedByte);
   }
 
-  public static boolean isDefaultPane(PaneInfo pane) {
-    return pane == null || pane.isDefaultPane();
+  @Override
+  public boolean equals(Object obj) {
+    // Because we intern the PaneInfo objects, equals is the same as pointer equality.
+    return this == obj;
   }
 
-  public static PaneInfo createPaneInternal() {
-    return new PaneInfo();
+  @Override
+  public String toString() {
+    return MoreObjects.toStringHelper(getClass())
+        .omitNullValues()
+        .add("isFirst", isFirst ? true : null)
+        .add("isLast", isLast ? true : null)
+        .add("timing", timing)
+        .toString();
   }
 
   /**
@@ -60,25 +196,14 @@ public static class PaneInfoCoder extends AtomicCoder<PaneInfo> {
     @Override
     public void encode(PaneInfo value, OutputStream outStream, Coder.Context context)
         throws CoderException, IOException {
-      if (isDefaultPane(value)) {
-        outStream.write(0);
-      } else {
-        outStream.write(1);
-        // Actually encode.
-        throw new UnsupportedOperationException();
-      }
+      outStream.write(value.encodedByte);
     }
 
     @Override
     public PaneInfo decode(InputStream inStream, Coder.Context context)
         throws CoderException, IOException {
-      int encoding = inStream.read();
-      if (encoding == 0) {
-        return null;
-      } else {
-        // Actually decode.
-        throw new UnsupportedOperationException();
-      }
+      byte key = (byte) inStream.read();
+      return BYTE_TO_PANE_INFO.get(key);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
index aa14696abc206..4cb7b3724de2f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 
 import org.joda.time.Instant;
@@ -61,6 +62,7 @@ public Collection<? extends BoundedWindow> windows() {
                 }
               });
 
-    c.windowingInternals().outputWindowedValue(c.element(), c.timestamp(), windows, null);
+    c.windowingInternals()
+        .outputWindowedValue(c.element(), c.timestamp(), windows, PaneInfo.DEFAULT);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index b5ae3b5386aab..92f2afcca3101 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -356,24 +356,24 @@ protected <T> void sideOutputWindowedValue(TupleTag<T> tag, WindowedValue<T> win
     // ProcessContext's versions in DoFn.processElement.
     @Override
     public void output(OutputT output) {
-      outputWindowedValue(output, null, null, null);
+      outputWindowedValue(output, null, null, PaneInfo.DEFAULT);
     }
 
     @Override
     public void outputWithTimestamp(OutputT output, Instant timestamp) {
-      outputWindowedValue(output, timestamp, null, null);
+      outputWindowedValue(output, timestamp, null, PaneInfo.DEFAULT);
     }
 
     @Override
     public <T> void sideOutput(TupleTag<T> tag, T output) {
       Preconditions.checkNotNull(tag, "TupleTag passed to sideOutput cannot be null");
-      sideOutputWindowedValue(tag, output, null, null, null);
+      sideOutputWindowedValue(tag, output, null, null, PaneInfo.DEFAULT);
     }
 
     @Override
     public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
       Preconditions.checkNotNull(tag, "TupleTag passed to sideOutputWithTimestamp cannot be null");
-      sideOutputWindowedValue(tag, output, timestamp, null, null);
+      sideOutputWindowedValue(tag, output, timestamp, null, PaneInfo.DEFAULT);
     }
 
     private String generateInternalAggregatorName(String userName) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
index bfb6a8118802b..148b12da66fdf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
@@ -169,7 +169,7 @@ private void closeWindow(
         KV.of(key, combineFn.extractOutput(key, accum)),
         timestamp,
         Arrays.asList(w),
-        PaneInfo.DEFAULT_PANE);
+        PaneInfo.ON_TIME_AND_ONLY_FIRING);
   }
 
   private abstract class CombiningMergeContext extends WindowFn<Object, W>.MergeContext {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
index 61cb33573d3f8..6bfdfd090dc3e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
@@ -103,7 +103,7 @@ public void processElement(ProcessContext c) throws Exception {
               KV.of(key, (Iterable<V>) new WindowReiterable<V>(iterator, window)),
               e.getTimestamp(),
               Arrays.asList(window),
-              PaneInfo.DEFAULT_PANE);
+              PaneInfo.ON_TIME_AND_ONLY_FIRING);
         }
       }
       // Copy the iterator in case the next DoFn cached its version of the iterator instead
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
new file mode 100644
index 0000000000000..712a8792f900c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.PaneInfoCoder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
+import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
+import com.google.cloud.dataflow.sdk.util.state.StateContents;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.cloud.dataflow.sdk.util.state.StateTags;
+import com.google.cloud.dataflow.sdk.util.state.ValueState;
+import com.google.common.annotations.VisibleForTesting;
+
+import org.joda.time.Instant;
+
+/**
+ * Encapsulates the logic for tracking the current {@link PaneInfo} and producing new PaneInfo for
+ * a trigger firing.
+ */
+public class PaneInfoTracker {
+
+  private TimerManager timerManager;
+
+  public PaneInfoTracker(TimerManager timerManager) {
+    this.timerManager = timerManager;
+  }
+
+  @VisibleForTesting static final StateTag<ValueState<PaneInfo>> PANE_INFO_TAG =
+      StateTags.value("__pane_info", PaneInfoCoder.INSTANCE);
+
+  public void clear(StateContext state) {
+    state.access(PANE_INFO_TAG).clear();
+  }
+
+  public StateContents<PaneInfo> getNextPaneInfo(
+      ReduceFn<?, ?, ?, ?>.Context context, final boolean isFinal) {
+    final StateContents<PaneInfo> previousPaneFuture =
+        context.state().access(PaneInfoTracker.PANE_INFO_TAG).get();
+    final Instant endOfWindow = context.window().maxTimestamp();
+    final StateContext state = context.state();
+
+    return new StateContents<PaneInfo>() {
+      private PaneInfo result = null;
+
+      @Override
+      public PaneInfo read() {
+        if (result == null) {
+          PaneInfo previousPane = previousPaneFuture.read();
+          result = describePane(endOfWindow, previousPane, isFinal);
+          state.access(PANE_INFO_TAG).set(result);
+        }
+        return result;
+      }
+    };
+  }
+
+  private <W> PaneInfo describePane(Instant endOfWindow, PaneInfo previousPane, boolean isFinal) {
+    boolean isSpeculative = endOfWindow.isAfter(timerManager.currentWatermarkTime());
+    boolean isFirst = (previousPane == null);
+
+    Timing timing = Timing.EARLY;
+    if (!isSpeculative) {
+      boolean firstNonSpeculative =
+          previousPane == null || previousPane.getTiming() == Timing.EARLY;
+      timing = firstNonSpeculative ? Timing.ON_TIME : Timing.LATE;
+    }
+
+    return PaneInfo.createPane(isFirst, isFinal, timing);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
index 7d585b84fe1a2..e2089c470a5f4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
 import com.google.cloud.dataflow.sdk.util.state.MergeableState;
 import com.google.cloud.dataflow.sdk.util.state.State;
+import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
@@ -75,7 +76,7 @@ public ReduceFn<K, InputT, OutputT, W>.ProcessValueContext forValue(
   }
 
   public ReduceFn<K, InputT, OutputT, W>.OnTriggerContext forTrigger(
-      W window, PaneInfo pane, OnTriggerCallbacks<OutputT> callbacks) {
+      W window, StateContents<PaneInfo> pane, OnTriggerCallbacks<OutputT> callbacks) {
     return new OnTriggerContextImpl(stateContext(window), pane, callbacks);
   }
 
@@ -271,14 +272,14 @@ private class OnTriggerContextImpl
       extends ReduceFn<K, InputT, OutputT, W>.OnTriggerContext {
 
     private final StateContextImpl<W> state;
-    private final PaneInfo paneInfo;
+    private final StateContents<PaneInfo> pane;
     private final OnTriggerCallbacks<OutputT> callbacks;
 
     private OnTriggerContextImpl(StateContextImpl<W> state,
-        PaneInfo paneInfo, OnTriggerCallbacks<OutputT> callbacks) {
+        StateContents<PaneInfo> pane, OnTriggerCallbacks<OutputT> callbacks) {
       reduceFn.super();
       this.state = state;
-      this.paneInfo = paneInfo;
+      this.pane = pane;
       this.callbacks = callbacks;
     }
 
@@ -304,7 +305,7 @@ public StateContext state() {
 
     @Override
     public PaneInfo paneInfo() {
-      return paneInfo;
+      return pane.read();
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index 20c78b0fe6468..2df87aec67e3e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -90,6 +90,8 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow>
   private final ReduceFnContextFactory<K, InputT, OutputT, W> contextFactory;
   private final ReduceFn<K, InputT, OutputT, W> reduceFn;
 
+  private final PaneInfoTracker paneInfo;
+
   public ReduceFnRunner(
       K key,
       WindowingStrategy<?, W> windowingStrategy,
@@ -100,6 +102,7 @@ public ReduceFnRunner(
       ReduceFn<K, InputT, OutputT, W> reduceFn) {
     this.key = key;
     this.timerManager = timerManager;
+    this.paneInfo =  new PaneInfoTracker(timerManager);
     this.windowingInternals = windowingInternals;
     this.droppedDueToClosedWindow = droppedDueToClosedWindow;
     this.droppedDueToLateness = droppedDueToLateness;
@@ -237,7 +240,9 @@ public void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResul
       if (!mergedWindow.equals(resultWindow)) {
         cancelCleanup(mergedWindow);
         try {
-          triggerRunner.clearEverything(contextFactory.base(mergedWindow));
+          ReduceFn<K, InputT, OutputT, W>.Context mergedContext = contextFactory.base(mergedWindow);
+          triggerRunner.clearEverything(mergedContext);
+          paneInfo.clear(mergedContext.state());
         } catch (Exception e) {
           Throwables.propagateIfPossible(e);
           throw new RuntimeException("Exception while clearing trigger state", e);
@@ -312,6 +317,7 @@ private void doCleanup(W window) throws Exception {
     // Cleanup the associated state.
     reduceFn.clearState(context);
     triggerRunner.clearEverything(context);
+    paneInfo.clear(context.state());
   }
 
   private void handleTriggerResult(
@@ -358,6 +364,7 @@ private void handleTriggerResult(
       // If we're finishing, clear up the trigger tree as well.
       try {
         triggerRunner.clearState(context);
+        paneInfo.clear(context.state());
       } catch (Exception e) {
         Throwables.propagateIfPossible(e);
         throw new RuntimeException("Exception while clearing trigger state", e);
@@ -380,9 +387,11 @@ private void onTrigger(final ReduceFn<K, InputT, OutputT, W>.Context context, bo
 
     // Run the reduceFn, and buffer all the output in outputs.
     final List<OutputT> outputs = new ArrayList<>();
-    PaneInfo pane = PaneInfo.createPaneInternal();
+
+    StateContents<PaneInfo> paneFuture = paneInfo.getNextPaneInfo(context, isFinal);
+
     ReduceFn<K, InputT, OutputT, W>.OnTriggerContext triggerContext = contextFactory.forTrigger(
-        context.window(), pane, new OnTriggerCallbacks<OutputT>() {
+        context.window(), paneFuture, new OnTriggerCallbacks<OutputT>() {
           @Override
           public void output(OutputT toOutput) {
             outputs.add(toOutput);
@@ -398,6 +407,12 @@ public void output(OutputT toOutput) {
     // Now actually read the timestamp, and output each of the values.
     Instant outputTimestamp = timestampFuture.read();
     List<W> windows = Collections.singletonList(context.window());
+
+    // Make sure we read the paneFuture even if there is no output, since that commits the updated
+    // pane information.
+    PaneInfo pane = paneFuture.read();
+
+    // Produce the output values containing the pane.
     for (OutputT output : outputs) {
       windowingInternals.outputWindowedValue(KV.of(key, output), outputTimestamp, windows, pane);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index 36ce961d9e3e3..69ec2aea9b2fc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -30,6 +30,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.PaneInfoCoder;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.common.base.MoreObjects;
+import com.google.common.base.Preconditions;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
@@ -55,6 +57,7 @@
 public abstract class WindowedValue<V> {
 
   protected final V value;
+  protected final PaneInfo pane;
 
   /**
    * Returns a {@code WindowedValue} with the given value, timestamp,
@@ -65,10 +68,10 @@ public static <V> WindowedValue<V> of(
       Instant timestamp,
       Collection<? extends BoundedWindow> windows,
       PaneInfo pane) {
+    Preconditions.checkNotNull(pane);
 
-    if (windows.size() == 0 && BoundedWindow.TIMESTAMP_MIN_VALUE.equals(timestamp)
-        && PaneInfo.isDefaultPane(pane)) {
-      return valueInEmptyWindows(value);
+    if (windows.size() == 0 && BoundedWindow.TIMESTAMP_MIN_VALUE.equals(timestamp)) {
+      return valueInEmptyWindows(value, pane);
     } else if (windows.size() == 1) {
       return of(value, timestamp, windows.iterator().next(), pane);
     } else {
@@ -84,10 +87,11 @@ public static <V> WindowedValue<V> of(
       Instant timestamp,
       BoundedWindow window,
       PaneInfo pane) {
+    Preconditions.checkNotNull(pane);
+
     boolean isGlobal = GlobalWindow.INSTANCE.equals(window);
-    if (isGlobal && BoundedWindow.TIMESTAMP_MIN_VALUE.equals(timestamp)
-        && PaneInfo.isDefaultPane(pane)) {
-      return valueInGlobalWindow(value);
+    if (isGlobal && BoundedWindow.TIMESTAMP_MIN_VALUE.equals(timestamp)) {
+      return valueInGlobalWindow(value, pane);
     } else if (isGlobal) {
       return new TimestampedValueInGlobalWindow<>(value, timestamp, pane);
     } else {
@@ -96,11 +100,19 @@ public static <V> WindowedValue<V> of(
   }
 
   /**
-   * Returns a {@code WindowedValue} with the given value, default timestamp,
-   * pane, and {@code GlobalWindow}.
+   * Returns a {@code WindowedValue} with the given value in the {@link GlobalWindow} using the
+   * default timestamp and pane.
    */
   public static <V> WindowedValue<V> valueInGlobalWindow(V value) {
-    return new ValueInGlobalWindow<>(value);
+    return new ValueInGlobalWindow<>(value, PaneInfo.DEFAULT);
+  }
+
+  /**
+   * Returns a {@code WindowedValue} with the given value in the {@link GlobalWindow} using the
+   * default timestamp and the specified pane.
+   */
+  public static <V> WindowedValue<V> valueInGlobalWindow(V value, PaneInfo pane) {
+    return new ValueInGlobalWindow<>(value, pane);
   }
 
   /**
@@ -111,20 +123,29 @@ public static <V> WindowedValue<V> timestampedValueInGlobalWindow(V value, Insta
     if (BoundedWindow.TIMESTAMP_MIN_VALUE.equals(timestamp)) {
       return valueInGlobalWindow(value);
     } else {
-      return new TimestampedValueInGlobalWindow<>(value, timestamp, PaneInfo.DEFAULT_PANE);
+      return new TimestampedValueInGlobalWindow<>(value, timestamp, PaneInfo.DEFAULT);
     }
   }
 
   /**
-   * Returns a {@code WindowedValue} with the given value and default
-   * timestamp and pane and empty windows.
+   * Returns a {@code WindowedValue} with the given value in no windows, and the default timestamp
+   * and pane.
    */
   public static <V> WindowedValue<V> valueInEmptyWindows(V value) {
-    return new ValueInEmptyWindows<>(value);
+    return new ValueInEmptyWindows<V>(value, PaneInfo.DEFAULT);
   }
 
-  private WindowedValue(V value) {
+  /**
+   * Returns a {@code WindowedValue} with the given value in no windows, and the default timestamp
+   * and the specified pane.
+   */
+  public static <V> WindowedValue<V> valueInEmptyWindows(V value, PaneInfo pane) {
+    return new ValueInEmptyWindows<V>(value, pane);
+  }
+
+  private WindowedValue(V value, PaneInfo pane) {
     this.value = value;
+    this.pane = checkNotNull(pane);
   }
 
   /**
@@ -153,7 +174,9 @@ public V getValue() {
   /**
    * Returns the pane of this {@code WindowedValue} in its window.
    */
-  public abstract PaneInfo getPane();
+  public PaneInfo getPane() {
+    return pane;
+  }
 
   @Override
   public abstract boolean equals(Object o);
@@ -173,8 +196,8 @@ public V getValue() {
    */
   private abstract static class MinTimestampWindowedValue<V>
       extends WindowedValue<V> {
-    public MinTimestampWindowedValue(V value) {
-      super(value);
+    public MinTimestampWindowedValue(V value, PaneInfo pane) {
+      super(value, pane);
     }
 
     @Override
@@ -189,13 +212,13 @@ public Instant getTimestamp() {
    */
   private static class ValueInGlobalWindow<V>
       extends MinTimestampWindowedValue<V> {
-    public ValueInGlobalWindow(V value) {
-      super(value);
+    public ValueInGlobalWindow(V value, PaneInfo pane) {
+      super(value, pane);
     }
 
     @Override
     public <V> WindowedValue<V> withValue(V value) {
-      return new ValueInGlobalWindow<>(value);
+      return new ValueInGlobalWindow<>(value, pane);
     }
 
     @Override
@@ -203,16 +226,12 @@ public Collection<? extends BoundedWindow> getWindows() {
       return GLOBAL_WINDOWS;
     }
 
-    @Override
-    public PaneInfo getPane() {
-      return PaneInfo.DEFAULT_PANE;
-    }
-
     @Override
     public boolean equals(Object o) {
       if (o instanceof ValueInGlobalWindow) {
-        ValueInGlobalWindow<?> that = (ValueInGlobalWindow) o;
-        return Objects.equals(that.value, this.value);
+        ValueInGlobalWindow<?> that = (ValueInGlobalWindow<?>) o;
+        return Objects.equals(that.pane, this.pane)
+            && Objects.equals(that.value, this.value);
       } else {
         return false;
       }
@@ -220,12 +239,15 @@ public boolean equals(Object o) {
 
     @Override
     public int hashCode() {
-      return Objects.hash(value);
+      return Objects.hash(value, pane);
     }
 
     @Override
     public String toString() {
-      return "[ValueInGlobalWindow: " + value + "]";
+      return MoreObjects.toStringHelper(getClass())
+          .add("value", value)
+          .add("pane", pane)
+          .toString();
     }
   }
 
@@ -235,13 +257,13 @@ public String toString() {
    */
   private static class ValueInEmptyWindows<V>
       extends MinTimestampWindowedValue<V> {
-    public ValueInEmptyWindows(V value) {
-      super(value);
+    public ValueInEmptyWindows(V value, PaneInfo pane) {
+      super(value, pane);
     }
 
     @Override
     public <V> WindowedValue<V> withValue(V value) {
-      return new ValueInEmptyWindows<>(value);
+      return new ValueInEmptyWindows<>(value, pane);
     }
 
     @Override
@@ -249,16 +271,12 @@ public Collection<? extends BoundedWindow> getWindows() {
       return Collections.emptyList();
     }
 
-    @Override
-    public PaneInfo getPane() {
-      return PaneInfo.DEFAULT_PANE;
-    }
-
     @Override
     public boolean equals(Object o) {
       if (o instanceof ValueInEmptyWindows) {
-        ValueInEmptyWindows<?> that = (ValueInEmptyWindows) o;
-        return Objects.equals(that.value, this.value);
+        ValueInEmptyWindows<?> that = (ValueInEmptyWindows<?>) o;
+        return Objects.equals(that.pane, this.pane)
+            && Objects.equals(that.value, this.value);
       } else {
         return false;
       }
@@ -266,12 +284,15 @@ public boolean equals(Object o) {
 
     @Override
     public int hashCode() {
-      return Objects.hash(value);
+      return Objects.hash(value, pane);
     }
 
     @Override
     public String toString() {
-      return "[ValueInEmptyWindows: " + value + "]";
+      return MoreObjects.toStringHelper(getClass())
+          .add("value", value)
+          .add("pane", pane)
+          .toString();
     }
   }
 
@@ -282,25 +303,18 @@ public String toString() {
   private abstract static class TimestampedWindowedValue<V>
       extends WindowedValue<V> {
     protected final Instant timestamp;
-    protected final PaneInfo pane;
 
     public TimestampedWindowedValue(V value,
                                     Instant timestamp,
                                     PaneInfo pane) {
-      super(value);
+      super(value, pane);
       this.timestamp = checkNotNull(timestamp);
-      this.pane = pane;
     }
 
     @Override
     public Instant getTimestamp() {
       return timestamp;
     }
-
-    @Override
-    public PaneInfo getPane() {
-      return pane;
-    }
   }
 
   /**
@@ -329,8 +343,8 @@ public Collection<? extends BoundedWindow> getWindows() {
     public boolean equals(Object o) {
       if (o instanceof TimestampedValueInGlobalWindow) {
         TimestampedValueInGlobalWindow<?> that =
-            (TimestampedValueInGlobalWindow) o;
-        return this.timestamp.getMillis() == that.timestamp.getMillis()
+            (TimestampedValueInGlobalWindow<?>) o;
+        return this.timestamp.isEqual(that.timestamp) // don't compare chronology objects
             && Objects.equals(that.pane, this.pane)
             && Objects.equals(that.value, this.value);
       } else {
@@ -340,13 +354,16 @@ public boolean equals(Object o) {
 
     @Override
     public int hashCode() {
-      return Objects.hash(value) ^ ((int) timestamp.getMillis());
+      return Objects.hash(value, pane, timestamp.getMillis());
     }
 
     @Override
     public String toString() {
-      return "[ValueInGlobalWindow: " + value
-          + ", timestamp: " + timestamp.getMillis() + "]";
+      return MoreObjects.toStringHelper(getClass())
+          .add("value", value)
+          .add("timestamp", timestamp)
+          .add("pane", pane)
+          .toString();
     }
   }
 
@@ -380,11 +397,11 @@ public Collection<? extends BoundedWindow> getWindows() {
     public boolean equals(Object o) {
       if (o instanceof TimestampedValueInSingleWindow) {
         TimestampedValueInSingleWindow<?> that =
-            (TimestampedValueInSingleWindow) o;
+            (TimestampedValueInSingleWindow<?>) o;
         return Objects.equals(that.value, this.value)
-            && that.timestamp.isEqual(this.timestamp)
+            && this.timestamp.isEqual(that.timestamp) // don't compare chronology objects
             && Objects.equals(that.pane, this.pane)
-            && that.window.equals(this.window);
+            && Objects.equals(that.window, this.window);
       } else {
         return false;
       }
@@ -392,14 +409,17 @@ public boolean equals(Object o) {
 
     @Override
     public int hashCode() {
-      return Objects.hash(value, timestamp, window);
+      return Objects.hash(value, timestamp.getMillis(), pane, window);
     }
 
     @Override
     public String toString() {
-      return "[WindowedValue: " + value
-          + ", timestamp: " + timestamp.getMillis()
-          + ", window: " + window + "]";
+      return MoreObjects.toStringHelper(getClass())
+          .add("value", value)
+          .add("timestamp", timestamp)
+          .add("window", window)
+          .add("pane", pane)
+          .toString();
     }
   }
 
@@ -434,9 +454,9 @@ public Collection<? extends BoundedWindow> getWindows() {
     public boolean equals(Object o) {
       if (o instanceof TimestampedValueInMultipleWindows) {
         TimestampedValueInMultipleWindows<?> that =
-            (TimestampedValueInMultipleWindows) o;
-        if (Objects.equals(that.value, this.value)
-            && that.timestamp.isEqual(this.timestamp)
+            (TimestampedValueInMultipleWindows<?>) o;
+        if (this.timestamp.isEqual(that.timestamp) // don't compare chronology objects
+            && Objects.equals(that.value, this.value)
             && Objects.equals(that.pane, this.pane)) {
           ensureWindowsAreASet();
           that.ensureWindowsAreASet();
@@ -449,14 +469,17 @@ public boolean equals(Object o) {
     @Override
     public int hashCode() {
       ensureWindowsAreASet();
-      return Objects.hash(value, timestamp, windows);
+      return Objects.hash(value, timestamp.getMillis(), pane, windows);
     }
 
     @Override
     public String toString() {
-      return "[WindowedValue: " + value
-          + ", timestamp: " + timestamp.getMillis()
-          + ", windows: " + windows + "]";
+      return MoreObjects.toStringHelper(getClass())
+          .add("value", value)
+          .add("timestamp", timestamp)
+          .add("windows", windows)
+          .add("pane", pane)
+          .toString();
     }
 
     private void ensureWindowsAreASet() {
@@ -540,7 +563,6 @@ public static FullWindowedValueCoder<?> of(
       return of(components.get(0), window);
     }
 
-    @SuppressWarnings("unchecked")
     FullWindowedValueCoder(Coder<T> valueCoder,
                            Coder<? extends BoundedWindow> windowCoder) {
       super(valueCoder);
@@ -550,7 +572,10 @@ public static FullWindowedValueCoder<?> of(
       // windows of the class handled by windowCoder), so type
       // windowsCoder in a way that makes encode() and decode() work
       // right, and cast the window type away here.
-      this.windowsCoder = (Coder) CollectionCoder.of(this.windowCoder);
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      Coder<Collection<? extends BoundedWindow>> collectionCoder =
+          (Coder) CollectionCoder.of(this.windowCoder);
+      this.windowsCoder = collectionCoder;
     }
 
     public Coder<? extends BoundedWindow> getWindowCoder() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
index ec2c6f64211c4..ba840ab5607e4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
@@ -281,7 +281,8 @@ private void consumeResponse(Windmill.GetDataRequest request,
     for (Windmill.TagList list : response.getData(0).getData(0).getListsList()) {
       StateTag stateTag = toFetch.remove(list.getTag());
       if (stateTag == null) {
-        throw new IllegalStateException("Received response for unrequested tag " + stateTag);
+        throw new IllegalStateException(
+            "Received response for unrequested tag " + list.getTag().toStringUtf8());
       }
 
       if (stateTag.kind == StateTag.Kind.LIST) {
@@ -296,7 +297,8 @@ private void consumeResponse(Windmill.GetDataRequest request,
     for (Windmill.TagValue value : response.getData(0).getData(0).getValuesList()) {
       StateTag stateTag = toFetch.remove(value.getTag());
       if (stateTag == null) {
-        throw new IllegalStateException("Received response for unrequested tag " + stateTag);
+        throw new IllegalStateException(
+            "Received response for unrequested tag " + value.getTag().toStringUtf8());
       } else if (stateTag.kind != StateTag.Kind.VALUE) {
         throw new IllegalStateException("Unexpected kind for TagList: " + stateTag);
       }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java
index 3c58be35e1684..9d7cfc869458b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 
 import org.hamcrest.Description;
@@ -27,6 +28,7 @@
 import org.joda.time.Instant;
 
 import java.util.Collection;
+import java.util.Objects;
 
 /**
  * Matchers that are useful for working with Windowing, Timestamps, etc.
@@ -71,6 +73,27 @@ public static Matcher<IntervalWindow> intervalWindow(long start, long end) {
     return Matchers.equalTo(new IntervalWindow(new Instant(start), new Instant(end)));
   }
 
+  public static <T> Matcher<WindowedValue<? extends T>> valueWithPaneInfo(final PaneInfo paneInfo) {
+    return new TypeSafeMatcher<WindowedValue<? extends T>>() {
+      @Override
+      public void describeTo(Description description) {
+        description
+            .appendText("WindowedValue(paneInfo = ").appendValue(paneInfo).appendText(")");
+      }
+
+      @Override
+      protected boolean matchesSafely(WindowedValue<? extends T> item) {
+        return Objects.equals(item.getPane(), paneInfo);
+      }
+
+      @Override
+      protected void describeMismatchSafely(
+          WindowedValue<? extends T> item, Description mismatchDescription) {
+        mismatchDescription.appendValue(item.getPane());
+      }
+    };
+  }
+
   @SuppressWarnings({"unchecked", "rawtypes"})
   @SafeVarargs
   public static final <W extends BoundedWindow> Matcher<Iterable<W>> ofWindows(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
index 7c4711fe7035a..a55067a2a2147 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
@@ -38,6 +38,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
@@ -121,7 +122,9 @@ private void runTestReadFromShuffle(
         for (String value : kvs.getValue()) {
           ++kvCount;
           actualSizes.add(shuffleSinkWriter.add(
-              WindowedValue.of(KV.of(key, value), timestamp, Lists.newArrayList(window), null)));
+              WindowedValue.of(
+                  KV.of(key, value), timestamp, Lists.newArrayList(window),
+                  PaneInfo.DEFAULT)));
         }
       }
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
index 739467dc20152..e0e674a4fe3f3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
@@ -54,16 +55,16 @@ public class PartitioningShuffleReaderTest {
   private static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
 
   private static final List<WindowedValue<KV<Integer, String>>> KVS = Arrays.asList(
-      WindowedValue.of(KV.of(1, "in 1a"), timestamp, Lists.newArrayList(window), null),
-      WindowedValue.of(KV.of(1, "in 1b"), timestamp, Lists.newArrayList(window), null),
-      WindowedValue.of(KV.of(2, "in 2a"), timestamp, Lists.newArrayList(window), null),
-      WindowedValue.of(KV.of(2, "in 2b"), timestamp, Lists.newArrayList(window), null),
-      WindowedValue.of(KV.of(3, "in 3"), timestamp, Lists.newArrayList(window), null),
-      WindowedValue.of(KV.of(4, "in 4a"), timestamp, Lists.newArrayList(window), null),
-      WindowedValue.of(KV.of(4, "in 4b"), timestamp, Lists.newArrayList(window), null),
-      WindowedValue.of(KV.of(4, "in 4c"), timestamp, Lists.newArrayList(window), null),
-      WindowedValue.of(KV.of(4, "in 4d"), timestamp, Lists.newArrayList(window), null),
-      WindowedValue.of(KV.of(5, "in 5"), timestamp, Lists.newArrayList(window), null));
+      WindowedValue.of(KV.of(1, "in 1a"), timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT),
+      WindowedValue.of(KV.of(1, "in 1b"), timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT),
+      WindowedValue.of(KV.of(2, "in 2a"), timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT),
+      WindowedValue.of(KV.of(2, "in 2b"), timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT),
+      WindowedValue.of(KV.of(3, "in 3"), timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT),
+      WindowedValue.of(KV.of(4, "in 4a"), timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT),
+      WindowedValue.of(KV.of(4, "in 4b"), timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT),
+      WindowedValue.of(KV.of(4, "in 4c"), timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT),
+      WindowedValue.of(KV.of(4, "in 4d"), timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT),
+      WindowedValue.of(KV.of(5, "in 5"), timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT));
 
   private void runTestReadFromShuffle(List<WindowedValue<KV<Integer, String>>> expected)
       throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
index 0b871d9cef81d..0d963d4344cb4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -143,7 +144,7 @@ void runTestWriteGroupingShuffleSink(
             WindowedValue.of(KV.of(kv.getKey(), kv.getValue()),
                              timestamp,
                              Lists.newArrayList(window),
-                             null)));
+                             PaneInfo.DEFAULT)));
       }
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index f27dc22dd3981..d2dac4f273e07 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -36,6 +36,8 @@
 import com.google.api.services.dataflow.model.Source;
 import com.google.api.services.dataflow.model.WriteInstruction;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.CollectionCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.ListCoder;
@@ -54,6 +56,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
 import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
@@ -69,15 +73,18 @@
 import com.google.common.collect.Lists;
 import com.google.common.primitives.UnsignedLong;
 import com.google.protobuf.ByteString;
+import com.google.protobuf.ByteString.Output;
 import com.google.protobuf.TextFormat;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
+import org.junit.Assert;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
@@ -244,18 +251,12 @@ private Windmill.GetWorkResponse buildInput(String input, byte[] metadata) throw
           builder.getWorkBuilder(0).getWorkBuilder(0).getMessageBundlesBuilder(0);
       for (Windmill.Message.Builder messageBuilder :
           messageBundleBuilder.getMessagesBuilderList()) {
-        messageBuilder.setMetadata(addNullPaneTag(ByteString.copyFrom(metadata)));
+        messageBuilder.setMetadata(addPaneTag(PaneInfo.DEFAULT, metadata));
       }
     }
     return builder.build();
   }
 
-  private Windmill.GetDataResponse buildData(String input) throws Exception {
-    Windmill.GetDataResponse.Builder builder = Windmill.GetDataResponse.newBuilder();
-    TextFormat.merge(input, builder);
-    return builder.build();
-  }
-
   private Windmill.GetWorkResponse makeInput(int index, long timestamp) throws Exception {
     return buildInput(
         "work {" +
@@ -277,7 +278,9 @@ private Windmill.GetWorkResponse makeInput(int index, long timestamp) throws Exc
   }
 
   /**
-   * Returns a {@link WorkItemCommitRequest.Builder} parsed from the provided text format proto.
+   * Returns a
+   * {@link com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.WorkItemCommitRequest}
+   * builder parsed from the provided text format proto.
    */
   private WorkItemCommitRequest.Builder parseCommitRequest(String output) throws Exception {
     WorkItemCommitRequest.Builder builder = Windmill.WorkItemCommitRequest.newBuilder();
@@ -290,19 +293,19 @@ private WorkItemCommitRequest.Builder parseCommitRequest(String output) throws E
    * (it should only have one message).
    */
   private WorkItemCommitRequest.Builder setMessagesMetadata(
-      byte[] metadata, WorkItemCommitRequest.Builder builder) throws Exception {
-    if (metadata != null) {
+      PaneInfo pane, byte[] windowBytes, WorkItemCommitRequest.Builder builder) throws Exception {
+    if (windowBytes != null) {
       builder.getOutputMessagesBuilder(0)
           .getBundlesBuilder(0)
           .getMessagesBuilder(0)
-          .setMetadata(addNullPaneTag(ByteString.copyFrom(metadata)));
+          .setMetadata(addPaneTag(pane, windowBytes));
     }
     return builder;
   }
 
   private Windmill.WorkItemCommitRequest makeExpectedOutput(int index, long timestamp, String key)
       throws Exception {
-    return setMessagesMetadata(defaultWindowsBytes(),
+    return setMessagesMetadata(PaneInfo.DEFAULT, defaultWindowsBytes(),
         parseCommitRequest(
             "key: \"" + DEFAULT_KEY_STRING + "\" " +
             "work_token: " + index + " " +
@@ -320,8 +323,12 @@ private Windmill.WorkItemCommitRequest makeExpectedOutput(int index, long timest
         .build();
   }
 
-  private ByteString addNullPaneTag(ByteString windows) {
-    return ByteString.copyFrom(new byte[1]).concat(windows);
+  private ByteString addPaneTag(PaneInfo pane, byte[] windowBytes)
+      throws CoderException, IOException {
+    Output output = ByteString.newOutput();
+    PaneInfo.PaneInfoCoder.INSTANCE.encode(pane, output, Context.OUTER);
+    output.write(windowBytes);
+    return output.toByteString();
   }
 
   private DataflowWorkerHarnessOptions createTestingPipelineOptions() {
@@ -526,7 +533,7 @@ public void testTimers() throws Exception {
 
     assertThat(
         stripCounters(result.get(0L)),
-        equalTo(setMessagesMetadata(emptyWindowsBytes(),
+        equalTo(setMessagesMetadata(PaneInfo.DEFAULT, emptyWindowsBytes(),
             parseCommitRequest(
                 "key: \"" + keyStringForIndex(0) + "\" " +
                 "work_token: 0 " +
@@ -585,7 +592,7 @@ public void testAssignWindows() throws Exception {
 
     assertThat(
         stripCounters(result.get(0L)),
-        equalTo(setMessagesMetadata(windowAtZeroBytes(),
+        equalTo(setMessagesMetadata(PaneInfo.DEFAULT, windowAtZeroBytes(),
             parseCommitRequest(
                 "key: \"" + DEFAULT_KEY_STRING + "\" " +
                 "work_token: 0 " +
@@ -602,7 +609,7 @@ public void testAssignWindows() throws Exception {
             .build()));
 
     assertThat(stripCounters(result.get(1000000L)),
-        equalTo(setMessagesMetadata(windowAtOneSecondBytes(),
+        equalTo(setMessagesMetadata(PaneInfo.DEFAULT, windowAtOneSecondBytes(),
             parseCommitRequest(
                 "key: \"" + DEFAULT_KEY_STRING + "\" " +
                 "work_token: 1000000 " +
@@ -683,6 +690,7 @@ public void testMergeWindows() throws Exception {
     ByteString timer1Tag = ByteString.copyFromUtf8(window + "+");   // GC timer just has window
     ByteString timer2Tag = ByteString.copyFromUtf8(window + "0/+"); // Trigger has index as well
     ByteString bufferTag = ByteString.copyFromUtf8("MergeWindows" + window + "+__buffer");
+    ByteString paneInfoTag = ByteString.copyFromUtf8("MergeWindows" + window + "+__pane_info");
     ByteString watermarkHoldTag =
         ByteString.copyFromUtf8("MergeWindows" + window + "+watermark_hold");
     ByteString bufferData = ByteString.copyFromUtf8("\000data0");
@@ -748,14 +756,23 @@ public void testMergeWindows() throws Exception {
         .addValuesBuilder()
         .setTimestamp(0)
         .setData(ByteString.copyFrom(new byte[]{0b0}));
+    dataBuilder.addValuesBuilder()
+        .setTag(paneInfoTag)
+        .getValueBuilder()
+        .setTimestamp(0)
+        .setData(ByteString.EMPTY);
     server.addDataToOffer(dataResponse.build());
 
     result = server.waitForAndGetCommits(1);
 
     actualOutput = result.get(1L);
 
-    assertEquals(addNullPaneTag(ByteString.copyFrom(windowAtZeroBytes())),
-        actualOutput.getOutputMessages(0).getBundles(0).getMessages(0).getMetadata());
+
+    ByteString metadata =
+        actualOutput.getOutputMessages(0).getBundles(0).getMessages(0).getMetadata();
+    assertEquals(PaneInfo.createPane(true, false, Timing.EARLY),
+        PaneInfo.decodePane(metadata.byteAt(0)));
+    Assert.assertArrayEquals(windowAtZeroBytes(), metadata.substring(1).toByteArray());
 
     Windmill.OutputMessageBundle.Builder expectedOutputMessages =
         Windmill.OutputMessageBundle.newBuilder();
@@ -842,7 +859,7 @@ public void testUnboundedSources() throws Exception {
         UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
 
     assertThat(commit,
-        equalTo(setMessagesMetadata(
+        equalTo(setMessagesMetadata(PaneInfo.DEFAULT,
             CoderUtils.encodeToByteArray(
                 CollectionCoder.of(GlobalWindow.Coder.INSTANCE),
                 Arrays.asList(GlobalWindow.INSTANCE)),
@@ -917,7 +934,7 @@ public void testUnboundedSources() throws Exception {
     finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
 
     assertThat(commit,
-        equalTo(setMessagesMetadata(
+        equalTo(setMessagesMetadata(PaneInfo.DEFAULT,
             CoderUtils.encodeToByteArray(
                 CollectionCoder.of(GlobalWindow.Coder.INSTANCE),
                 Arrays.asList(GlobalWindow.INSTANCE)),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
index 0c2f7a8f0e3be..47a7f7b7f4d0f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
@@ -67,7 +68,7 @@ void runTestReadFromShuffle(List<Integer> expected) throws Exception {
              shuffleSink.writer(shuffleWriter, "dataset")) {
       for (Integer value : expected) {
         actualSizes.add(shuffleSinkWriter.add(
-            WindowedValue.of(value, timestamp, Lists.newArrayList(window), null)));
+            WindowedValue.of(value, timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT)));
       }
     }
     List<ShuffleEntry> records = shuffleWriter.getRecords();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java
index 45a5c1848effa..6046ef72f83c3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -196,7 +197,7 @@ public static <ElemT, ViewT> PCollectionView<ViewT> testingView(
    * Places the given {@code value} in the {@link #DEFAULT_NONEMPTY_WINDOW}.
    */
   public static <T> WindowedValue<T> valueInDefaultWindow(T value) {
-    return WindowedValue.of(value, DEFAULT_TIMESTAMP, DEFAULT_NONEMPTY_WINDOW, null);
+    return WindowedValue.of(value, DEFAULT_TIMESTAMP, DEFAULT_NONEMPTY_WINDOW, PaneInfo.DEFAULT);
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index 200100d0605b0..3bc1b75958572 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -230,8 +230,9 @@ public void testAfterAllRealTriggersFixedWindow() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(6, 7, 8, 9, 10), 2, 0, 50)));
 
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(50))));
-    // We've recently fired, so there are no finished sets.
-    tester.assertHasOnlyGlobalAndFinishedSetsFor();
+    // We're holding some finished bits, but that should be it.
+    tester.assertHasOnlyGlobalAndFinishedSetsAndPaneInfoFor(
+        new IntervalWindow(new Instant(0), new Instant(50)));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index 13c1bb044dca0..b57430ed0b94d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -195,7 +195,7 @@ public void testOnMergeFires() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+    tester.assertHasOnlyGlobalAndFinishedSetsAndPaneInfoFor(
         new IntervalWindow(new Instant(1), new Instant(22)));
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index 4723af6374d57..355bd332841a7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -226,9 +226,10 @@ public void testAfterFirstRealTriggersFixedWindow() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(9, 10, 11, 12, 13), 6, 0, 50)));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(50))));
-    // Because none of the triggers every stay finished (we always immediately reset) there is no
-    // persisted per-window state.
-    tester.assertHasOnlyGlobalAndFinishedSetsFor();
+    // Because none of the triggers ever stay finished (we always immediately reset) there is no
+    // persisted per-window state. But there may be pane-info.
+    tester.assertHasOnlyGlobalAndPaneInfoFor(
+        new IntervalWindow(new Instant(0), new Instant(50)));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
index 12f2d7b4f987e..269303599dfdf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
@@ -70,7 +70,10 @@ public void testDefaultTriggerWithFixedWindow() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 15, 10, 20),
         isSingleWindowedValue(Matchers.contains(5), 30, 30, 40)));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(30), new Instant(40))));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor();
+    tester.assertHasOnlyGlobalAndPaneInfoFor(
+        new IntervalWindow(new Instant(0), new Instant(10)),
+        new IntervalWindow(new Instant(10), new Instant(20)),
+        new IntervalWindow(new Instant(30), new Instant(40)));
   }
 
   @Test
@@ -97,7 +100,9 @@ public void testDefaultTriggerWithSessionWindow() throws Exception {
         isSingleWindowedValue(Matchers.contains(4), 30, 30, 40)));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(25))));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(30), new Instant(40))));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor();
+    tester.assertHasOnlyGlobalAndPaneInfoFor(
+        new IntervalWindow(new Instant(1), new Instant(25)),
+        new IntervalWindow(new Instant(30), new Instant(40)));
   }
 
   @Test
@@ -129,10 +134,12 @@ public void testDefaultTriggerWithSlidingWindow() throws Exception {
 
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(10))));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(15))));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor();
+    tester.assertHasOnlyGlobalAndPaneInfoFor(
+        new IntervalWindow(new Instant(-5), new Instant(5)),
+        new IntervalWindow(new Instant(0), new Instant(10)),
+        new IntervalWindow(new Instant(5), new Instant(15)));
   }
 
-
   @Test
   public void testDefaultTriggerWithContainedSessionWindow() throws Exception {
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
@@ -149,7 +156,8 @@ public void testDefaultTriggerWithContainedSessionWindow() throws Exception {
     Iterable<WindowedValue<Iterable<Integer>>> extractOutput = tester.extractOutput();
     assertThat(extractOutput, Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 1, 19)));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor();
+    tester.assertHasOnlyGlobalAndPaneInfoFor(
+        new IntervalWindow(new Instant(1), new Instant(19)));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
index 8028e655e81bf..1e3bc8d533351 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
@@ -196,7 +196,7 @@ public void testMergeActualFires() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor();
+    tester.assertHasOnlyGlobalAndPaneInfoFor(new IntervalWindow(new Instant(1), new Instant(22)));
   }
 
   @Test
@@ -278,7 +278,7 @@ public void testOrFinallyRealTriggersFixedWindow() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(0, 1, 2, 3, 4), 0, 0, 50)));
 
-    tester.assertHasOnlyGlobalAndFinishedSetsFor();
+    tester.assertHasOnlyGlobalAndPaneInfoFor(new IntervalWindow(new Instant(0), new Instant(50)));
 
     // Then fire 6 new elements, then processing time
     tester.injectElement(6, new Instant(2));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfoTest.java
new file mode 100644
index 0000000000000..abaca2d853a15
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfoTest.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertSame;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link PaneInfo}.
+ */
+@RunWith(JUnit4.class)
+public class PaneInfoTest {
+
+  @Test
+  public void testInterned() throws Exception {
+    assertSame(
+        PaneInfo.createPane(true, true, Timing.EARLY),
+        PaneInfo.createPane(true, true, Timing.EARLY));
+  }
+
+  @Test
+  public void testEncodingRoundTrip() throws Exception {
+    Coder<PaneInfo> coder = PaneInfo.PaneInfoCoder.INSTANCE;
+    for (Timing timing : Timing.values()) {
+      CoderProperties.coderDecodeEncodeEqual(coder, PaneInfo.createPane(false, false, timing));
+      CoderProperties.coderDecodeEncodeEqual(coder, PaneInfo.createPane(false, true, timing));
+      CoderProperties.coderDecodeEncodeEqual(coder, PaneInfo.createPane(true, false, timing));
+      CoderProperties.coderDecodeEncodeEqual(coder, PaneInfo.createPane(true, true, timing));
+    }
+  }
+
+  @Test
+  public void testEncodings() {
+    assertEquals("PaneInfo encoding assumes that there are only 4 Timing values.",
+        4, Timing.values().length);
+    assertEquals("PaneInfo encoding should remain the same.",
+        0x0, PaneInfo.createPane(false, false, Timing.EARLY).getEncodedByte());
+    assertEquals("PaneInfo encoding should remain the same.",
+        0x1, PaneInfo.createPane(true, false, Timing.EARLY).getEncodedByte());
+    assertEquals("PaneInfo encoding should remain the same.",
+        0x3, PaneInfo.createPane(true, true, Timing.EARLY).getEncodedByte());
+    assertEquals("PaneInfo encoding should remain the same.",
+        0x7, PaneInfo.createPane(true, true, Timing.ON_TIME).getEncodedByte());
+    assertEquals("PaneInfo encoding should remain the same.",
+        0xB, PaneInfo.createPane(true, true, Timing.LATE).getEncodedByte());
+    assertEquals("PaneInfo encoding should remain the same.",
+        0xF, PaneInfo.createPane(true, true, Timing.UNKNOWN).getEncodedByte());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index f42b1f5380b46..6c5cdfec8a672 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -124,7 +124,8 @@ public void testOnElementTimerFires() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 9, 0, 10)));
     assertFalse(tester.isMarkedFinished(firstWindow));
 
-    tester.assertHasOnlyGlobalAndFinishedSetsFor();
+    tester.assertHasOnlyGlobalAndPaneInfoFor(
+        new IntervalWindow(new Instant(0), new Instant(10)));
   }
 
   @Test
@@ -143,7 +144,8 @@ public void testMerge() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor();
+    tester.assertHasOnlyGlobalAndPaneInfoFor(
+        new IntervalWindow(new Instant(1), new Instant(22)));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index 020e28ea9e2a9..2be2de345962e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -30,6 +30,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -89,17 +90,17 @@ public class GroupAlsoByWindowsDoFnTest {
                 "v1",
                 new Instant(1),
                 Arrays.asList(window(0, 10)),
-                null),
+                PaneInfo.DEFAULT),
             WindowedValue.of(
                 "v2",
                 new Instant(2),
                 Arrays.asList(window(0, 10)),
-                null),
+                PaneInfo.DEFAULT),
             WindowedValue.of(
                 "v3",
                 new Instant(13),
                 Arrays.asList(window(10, 20)),
-                null)))));
+                PaneInfo.DEFAULT)))));
 
     runner.finishBundle();
 
@@ -136,12 +137,12 @@ public class GroupAlsoByWindowsDoFnTest {
                 "v1",
                 new Instant(5),
                 Arrays.asList(window(-10, 10), window(0, 20)),
-                null),
+                PaneInfo.DEFAULT),
             WindowedValue.of(
                 "v2",
                 new Instant(15),
                 Arrays.asList(window(0, 20), window(10, 30)),
-                null)))));
+                PaneInfo.DEFAULT)))));
 
     runner.finishBundle();
 
@@ -186,17 +187,17 @@ public class GroupAlsoByWindowsDoFnTest {
                 1L,
                 new Instant(5),
                 Arrays.asList(window(-10, 10), window(0, 20)),
-                null),
+                PaneInfo.DEFAULT),
             WindowedValue.of(
                 2L,
                 new Instant(15),
                 Arrays.asList(window(0, 20), window(10, 30)),
-                null),
+                PaneInfo.DEFAULT),
             WindowedValue.of(
                 4L,
                 new Instant(18),
                 Arrays.asList(window(0, 20), window(10, 30)),
-                null)))));
+                PaneInfo.DEFAULT)))));
 
     runner.finishBundle();
 
@@ -229,17 +230,17 @@ public class GroupAlsoByWindowsDoFnTest {
                 "v1",
                 new Instant(1),
                 Arrays.asList(window(0, 5)),
-                null),
+                PaneInfo.DEFAULT),
             WindowedValue.of(
                 "v2",
                 new Instant(4),
                 Arrays.asList(window(1, 5)),
-                null),
+                PaneInfo.DEFAULT),
             WindowedValue.of(
                 "v3",
                 new Instant(4),
                 Arrays.asList(window(0, 5)),
-                null)))));
+                PaneInfo.DEFAULT)))));
 
     runner.finishBundle();
 
@@ -275,17 +276,17 @@ public class GroupAlsoByWindowsDoFnTest {
                 "v1",
                 new Instant(0),
                 Arrays.asList(window(0, 10)),
-                null),
+                PaneInfo.DEFAULT),
             WindowedValue.of(
                 "v2",
                 new Instant(5),
                 Arrays.asList(window(5, 15)),
-                null),
+                PaneInfo.DEFAULT),
             WindowedValue.of(
                 "v3",
                 new Instant(15),
                 Arrays.asList(window(15, 25)),
-                null)))));
+                PaneInfo.DEFAULT)))));
 
     runner.finishBundle();
 
@@ -322,17 +323,17 @@ public class GroupAlsoByWindowsDoFnTest {
                 1L,
                 new Instant(0),
                 Arrays.asList(window(0, 10)),
-                null),
+                PaneInfo.DEFAULT),
             WindowedValue.of(
                 2L,
                 new Instant(5),
                 Arrays.asList(window(5, 15)),
-                null),
+                PaneInfo.DEFAULT),
             WindowedValue.of(
                 4L,
                 new Instant(15),
                 Arrays.asList(window(15, 25)),
-                null)))));
+                PaneInfo.DEFAULT)))));
 
     runner.finishBundle();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 45380c71edff5..3c0e70abb2323 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -29,6 +29,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -104,25 +105,25 @@ private <W extends BoundedWindow> String timerString(Coder<W> windowCoder, W win
         TimerOrElement.element(KV.of("k", "v1")),
         new Instant(1),
         Arrays.asList(window(0, 10)),
-        null));
+        PaneInfo.DEFAULT));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v2")),
         new Instant(2),
         Arrays.asList(window(0, 10)),
-        null));
+        PaneInfo.DEFAULT));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v0")),
         new Instant(0),
         Arrays.asList(window(0, 10)),
-        null));
+        PaneInfo.DEFAULT));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v3")),
         new Instant(13),
         Arrays.asList(window(10, 20)),
-        null));
+        PaneInfo.DEFAULT));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
@@ -170,13 +171,13 @@ private <W extends BoundedWindow> String timerString(Coder<W> windowCoder, W win
         TimerOrElement.element(KV.of("k", "v1")),
         new Instant(5),
         Arrays.asList(window(-10, 10), window(0, 20)),
-        null));
+        PaneInfo.DEFAULT));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v0")),
         new Instant(2),
         Arrays.asList(window(-10, 10), window(0, 20)),
-        null));
+        PaneInfo.DEFAULT));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
@@ -187,7 +188,7 @@ private <W extends BoundedWindow> String timerString(Coder<W> windowCoder, W win
         TimerOrElement.element(KV.of("k", "v2")),
         new Instant(5),
         Arrays.asList(window(0, 20), window(10, 30)),
-        null));
+        PaneInfo.DEFAULT));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
@@ -239,25 +240,25 @@ private <W extends BoundedWindow> String timerString(Coder<W> windowCoder, W win
         TimerOrElement.element(KV.of("k", "v1")),
         new Instant(0),
         Arrays.asList(window(0, 10)),
-        null));
+        PaneInfo.DEFAULT));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v2")),
         new Instant(5),
         Arrays.asList(window(5, 15)),
-        null));
+        PaneInfo.DEFAULT));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v3")),
         new Instant(15),
         Arrays.asList(window(15, 25)),
-        null));
+        PaneInfo.DEFAULT));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v0")),
         new Instant(3),
         Arrays.asList(window(3, 13)),
-        null));
+        PaneInfo.DEFAULT));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
         TimerOrElement.<KV<String, String>>timer(
@@ -343,25 +344,25 @@ public Long extractOutput(Long accumulator) {
         TimerOrElement.element(KV.of("k", 1L)),
         new Instant(0),
         Arrays.asList(window(0, 10)),
-        null));
+        PaneInfo.DEFAULT));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", 2L)),
         new Instant(5),
         Arrays.asList(window(5, 15)),
-        null));
+        PaneInfo.DEFAULT));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", 3L)),
         new Instant(15),
         Arrays.asList(window(15, 25)),
-        null));
+        PaneInfo.DEFAULT));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", 4L)),
         new Instant(3),
         Arrays.asList(window(3, 13)),
-        null));
+        PaneInfo.DEFAULT));
 
     // TODO: To simplify tests, create a timer manager that can sweep a watermark past some timers
     // and fire them as appropriate. This would essentially be the batch timer context.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
index 11ad86a11f7ef..67aa17047c24d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
@@ -35,6 +35,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
 import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals;
@@ -317,7 +318,7 @@ private WindowedValue<String> createDatum(String element, long timestamp) {
         element,
         new Instant(timestamp),
         Arrays.asList(createWindow(timestamp)),
-        null);
+        PaneInfo.DEFAULT);
   }
 
   private IntervalWindow createWindow(long timestamp) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index d139b6b524e0a..33c7380d9b29d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -23,11 +23,14 @@
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
+import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
@@ -237,6 +240,65 @@ public void testWatermarkHoldAndLateData() throws Exception {
     tester.assertHasOnlyGlobalAndFinishedSetsFor();
   }
 
+  @Test
+  public void testPaneInfoAllStates() throws Exception {
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        FixedWindows.of(Duration.millis(10)),
+        mockTrigger,
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
+
+    tester.advanceWatermark(new Instant(0));
+    injectElement(tester, 1, TriggerResult.FIRE);
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, false, Timing.EARLY))));
+
+    injectElement(tester, 2, TriggerResult.FIRE);
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.EARLY))));
+
+    tester.advanceWatermark(new Instant(15));
+    injectElement(tester, 3, TriggerResult.FIRE);
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.ON_TIME))));
+
+    injectElement(tester, 4, TriggerResult.FIRE);
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.LATE))));
+
+    injectElement(tester, 5, TriggerResult.FIRE_AND_FINISH);
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.LATE))));
+  }
+
+  @Test
+  public void testPaneInfoSkipToFinish() throws Exception {
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        FixedWindows.of(Duration.millis(10)),
+        mockTrigger,
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
+
+    tester.advanceWatermark(new Instant(0));
+    injectElement(tester, 1, TriggerResult.FIRE_AND_FINISH);
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, true, Timing.EARLY))));
+  }
+
+  @Test
+  public void testPaneInfoSkipToNonSpeculativeAndFinish() throws Exception {
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        FixedWindows.of(Duration.millis(10)),
+        mockTrigger,
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
+
+    tester.advanceWatermark(new Instant(15));
+    injectElement(tester, 1, TriggerResult.FIRE_AND_FINISH);
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, true, Timing.ON_TIME))));
+  }
+
   @Test
   public void testMergeBeforeFinalizing() throws Exception {
     // Verify that we merge windows before producing output so users don't see undesired
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index a69c656175dac..7c007db4308c4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -49,6 +49,8 @@
 import com.google.common.base.Preconditions;
 import com.google.common.collect.FluentIterable;
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -57,7 +59,6 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
@@ -68,8 +69,8 @@
 
 /**
  * Test utility that runs a {@link WindowFn}, {@link Trigger} using in-memory stub implementations
- * to provide the {@link TimerManager} and {@link WindowingInternals} needed by
- * {@link TriggerExecutor}.
+ * to provide the {@link TimerManager} and {@link WindowingInternals} needed to run
+ * {@code Trigger}s and {@code ReduceFn}s.
  *
  * <p>To have all interactions between the trigger and underlying components logged, call
  * {@link #logInteractions(boolean)}.
@@ -172,6 +173,32 @@ public boolean isMarkedFinished(W window) {
 
   @SafeVarargs
   public final void assertHasOnlyGlobalAndFinishedSetsFor(W... expectedWindows) {
+    assertHasOnlyGlobalAllowedTags(
+        ImmutableSet.copyOf(expectedWindows),
+        ImmutableSet.<StateTag<?>>of(TriggerRunner.FINISHED_BITS_TAG));
+  }
+
+  @SafeVarargs
+  public final void assertHasOnlyGlobalAndFinishedSetsAndPaneInfoFor(W... expectedWindows) {
+    assertHasOnlyGlobalAllowedTags(
+        ImmutableSet.copyOf(expectedWindows),
+        ImmutableSet.<StateTag<?>>of(
+            TriggerRunner.FINISHED_BITS_TAG, PaneInfoTracker.PANE_INFO_TAG));
+  }
+
+  @SafeVarargs
+  public final void assertHasOnlyGlobalAndPaneInfoFor(W... expectedWindows) {
+    assertHasOnlyGlobalAllowedTags(
+        ImmutableSet.copyOf(expectedWindows),
+        ImmutableSet.<StateTag<?>>of(PaneInfoTracker.PANE_INFO_TAG));
+  }
+
+  /**
+   * Verifies that the the set of windows that have any state stored is exactly
+   * {@code expectedWindows} and that each of these windows has only tags from {@code allowedTags}.
+   */
+  private void assertHasOnlyGlobalAllowedTags(
+      Set<W> expectedWindows, Set<StateTag<?>> allowedTags) {
     runner.persist();
 
     Set<StateNamespace> expectedWindowsSet = new HashSet<>();
@@ -188,9 +215,11 @@ public final void assertHasOnlyGlobalAndFinishedSetsFor(W... expectedWindows) {
         if (tagsInUse.isEmpty()) {
           continue;
         }
-
         actualWindows.add(namespace);
-        if (!tagsInUse.equals(Collections.singleton(TriggerRunner.FINISHED_BITS_TAG))) {
+        Set<StateTag<?>> unexpected = Sets.difference(tagsInUse, allowedTags);
+        if (unexpected.isEmpty()) {
+          continue;
+        } else {
           fail(namespace + " has unexpected states: " + tagsInUse);
         }
       } else if (namespace instanceof StateNamespaces.WindowAndTriggerNamespace) {
@@ -264,7 +293,7 @@ public void injectElement(InputT value, Instant timestamp) throws Exception {
         windowFn, value, timestamp, Arrays.asList(GlobalWindow.INSTANCE)));
     logInteraction("Element %s at time %d put in windows %s",
         value, timestamp.getMillis(), windows);
-    runner.processElement(WindowedValue.of(value, timestamp, windows, null));
+    runner.processElement(WindowedValue.of(value, timestamp, windows, PaneInfo.DEFAULT));
   }
 
   public void doMerge() throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/WindowedValueTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/WindowedValueTest.java
index 10ea5d58fee8d..3d856105474b5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/WindowedValueTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/WindowedValueTest.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 
 import org.joda.time.Instant;
 import org.junit.Assert;
@@ -40,7 +41,7 @@ public void testWindowedValueCoder() throws CoderException {
         new Instant(1234),
         Arrays.asList(new IntervalWindow(timestamp, timestamp.plus(1000)),
                       new IntervalWindow(timestamp.plus(1000), timestamp.plus(2000))),
-        null);
+        PaneInfo.DEFAULT);
 
     Coder<WindowedValue<String>> windowedValueCoder =
         WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder());

From 153ff2cc7474497df4da37ccaf7a0621fa2e2f4d Mon Sep 17 00:00:00 2001
From: mshmulyan <mshmulyan@google.com>
Date: Fri, 17 Jul 2015 13:41:00 -0700
Subject: [PATCH 0764/1541] Adding dataflow_java_harness_restarts,
 dataflow_java_harness_memory_utilization counters.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98519069
---
 .../worker/StreamingDataflowWorker.java       | 59 +++++++++++++++++--
 .../runners/worker/FakeWindmillServer.java    |  5 +-
 2 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 16417fa508116..e46ebbf775654 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -53,6 +53,8 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
+import java.util.Timer;
+import java.util.TimerTask;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.ConcurrentMap;
@@ -189,6 +191,7 @@ public static void main(String[] args) throws Exception {
   private Server statusServer;
   private final AtomicReference<Throwable> lastException;
   private final MetricTrackingWindmillServerStub metricTrackingWindmillServer;
+  private Timer globalCountersUpdatesTimer;
 
   public StreamingDataflowWorker(
       List<MapTask> mapTasks, WindmillServerStub server, DataflowWorkerHarnessOptions options) {
@@ -246,18 +249,30 @@ public Thread newThread(Runnable r) {
   public void start() {
     running.set(true);
     dispatchThread = threadFactory.newThread(new Runnable() {
-        @Override
-        public void run() {
-          dispatchLoop();
-        }
-      });
+      @Override
+      public void run() {
+        dispatchLoop();
+      }
+    });
     dispatchThread.setPriority(Thread.MIN_PRIORITY);
     dispatchThread.setName("DispatchThread");
     dispatchThread.start();
+    globalCountersUpdatesTimer = new Timer("GlobalCountersUpdates");
+    //  Report counters update every second.
+    globalCountersUpdatesTimer.schedule(new TimerTask() {
+      @Override
+      public void run() {
+        reportPeriodicStats();
+      }
+    }, 1000, 1000);
+    reportHarnessStartup();
   }
 
   public void stop() {
     try {
+      if (globalCountersUpdatesTimer != null) {
+        globalCountersUpdatesTimer.cancel();
+      }
       if (statusServer != null) {
         statusServer.stop();
       }
@@ -652,6 +667,40 @@ private boolean reportFailure(String computation, Windmill.WorkItem work, Throwa
     return !response.getFailed();
   }
 
+  private void reportHarnessStartup() {
+    Windmill.Counter.Builder counterBuilder = Windmill.Counter.newBuilder();
+    counterBuilder =
+        counterBuilder.setName("dataflow_java_harness_restarts")
+            .setKind(Windmill.Counter.Kind.SUM)
+            .setIntScalar(1);
+    Windmill.ReportStatsResponse response = windmillServer.reportStats(
+        Windmill.ReportStatsRequest.newBuilder().addCounterUpdates(counterBuilder).build());
+    if (response.getFailed()) {
+      LOG.warn("Failed to notify windmill on harness startup. dataflow_java_harness_restarts will "
+          + " not be incremented.");
+    }
+  }
+
+  private void reportPeriodicStats() {
+    Runtime rt = Runtime.getRuntime();
+    long usedMemory = rt.totalMemory() - rt.freeMemory();
+    long maxMemory =  rt.maxMemory();
+    Windmill.Counter.Builder counterBuilder = Windmill.Counter.newBuilder();
+    counterBuilder =
+        counterBuilder.setName("dataflow_java_harness_memory_utilization")
+            .setKind(Windmill.Counter.Kind.MEAN)
+            .setCumulative(true)
+            .setIntScalar(usedMemory)
+            .setMeanCount(maxMemory);
+    Windmill.ReportStatsResponse response = windmillServer.reportStats(
+        Windmill.ReportStatsRequest.newBuilder()
+            .addCounterUpdates(counterBuilder)
+            .build());
+    if (response.getFailed()) {
+      LOG.warn("Failed to send periodic counters to windmill.");
+    }
+  }
+
   private static class WorkerAndContext {
     public MapTaskExecutor worker;
     public StreamingModeExecutionContext context;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java
index f823ad30e7765..0cba644a31f93 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static org.junit.Assert.assertFalse;
+
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.CommitWorkResponse;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.ComputationCommitWorkRequest;
@@ -24,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.GetWorkResponse;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.WorkItemCommitRequest;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
+
 import java.util.Map;
 import java.util.Queue;
 import java.util.concurrent.ConcurrentHashMap;
@@ -41,7 +43,6 @@ class FakeWindmillServer extends WindmillServerStub {
   private LinkedBlockingQueue<Windmill.Exception> exceptions;
   private int commitsRequested = 0;
   private AtomicInteger expectedExceptionCount;
-
   public FakeWindmillServer() {
     workToOffer = new ConcurrentLinkedQueue<GetWorkResponse>();
     dataToOffer = new ConcurrentLinkedQueue<GetDataResponse>();
@@ -100,7 +101,7 @@ public Windmill.ReportStatsResponse reportStats(Windmill.ReportStatsRequest requ
       }
     }
 
-    if (expectedExceptionCount.getAndDecrement() > 0) {
+    if (request.getExceptionsList().isEmpty() || expectedExceptionCount.getAndDecrement() > 0) {
       return Windmill.ReportStatsResponse.newBuilder().build();
     } else {
       return Windmill.ReportStatsResponse.newBuilder().setFailed(true).build();

From 381dccecad0078bb5e70b7fe6f6a7570f498ba3e Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 17 Jul 2015 14:38:15 -0700
Subject: [PATCH 0765/1541] Track active keys and tokens in
 StreamingDataflowWorker

Rejects duplicate work items that arrive since they are already
being processed.
Serializes work for the same key to enable future caching
Adds visiblity of active keys.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98524496
---
 .../worker/StreamingDataflowWorker.java       | 136 +++++++++++--
 .../runners/worker/FakeWindmillServer.java    |  10 +
 .../worker/StreamingDataflowWorkerTest.java   | 189 ++++++++++++++----
 3 files changed, 279 insertions(+), 56 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index e46ebbf775654..5f8d821e6099b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -50,8 +50,11 @@
 import java.io.PrintWriter;
 import java.io.StringWriter;
 import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Queue;
 import java.util.Random;
 import java.util.Timer;
 import java.util.TimerTask;
@@ -168,6 +171,7 @@ public static void main(String[] args) throws Exception {
   private final ConcurrentMap<String, ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest>>
       outputMap;
   private final ConcurrentMap<String, ConcurrentLinkedQueue<WorkerAndContext>> mapTaskExecutors;
+  private final ConcurrentMap<String, ActiveWorkForComputation> activeWorkMap;
   // Per computation cache of active readers, keyed by split ID.
   private final ConcurrentMap<String, ConcurrentMap<ByteString, UnboundedSource.UnboundedReader<?>>>
       readerCache;
@@ -199,13 +203,11 @@ public StreamingDataflowWorker(
     this.instructionMap = new ConcurrentHashMap<>();
     this.outputMap = new ConcurrentHashMap<>();
     this.mapTaskExecutors = new ConcurrentHashMap<>();
+    this.activeWorkMap = new ConcurrentHashMap<>();
     this.readerCache = new ConcurrentHashMap<>();
     this.commitCallbacks = new ConcurrentHashMap<>();
     this.stateNameMap = new ConcurrentHashMap<>();
     this.systemNameToComputationIdMap = new ConcurrentHashMap<>();
-    for (MapTask mapTask : mapTasks) {
-      addComputation(mapTask);
-    }
     this.threadFactory = new ThreadFactory() {
         @Override
         public Thread newThread(Runnable r) {
@@ -242,6 +244,10 @@ public Thread newThread(Runnable r) {
     this.clientId = new Random().nextLong();
     this.lastException = new AtomicReference<>();
 
+    for (MapTask mapTask : mapTasks) {
+      addComputation(mapTask);
+    }
+
     DataflowWorkerLoggingMDC.setJobId(options.getJobId());
     DataflowWorkerLoggingMDC.setWorkerId(options.getWorkerId());
   }
@@ -319,6 +325,7 @@ private void addComputation(MapTask mapTask) {
       outputMap.put(computationId, new ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest>());
       instructionMap.put(computationId, mapTask);
       mapTaskExecutors.put(computationId, new ConcurrentLinkedQueue<WorkerAndContext>());
+      activeWorkMap.put(computationId, new ActiveWorkForComputation(workUnitExecutor));
       readerCache.put(
           computationId, new ConcurrentHashMap<ByteString, UnboundedSource.UnboundedReader<?>>());
     }
@@ -370,36 +377,50 @@ private void dispatchLoop() {
         if (!instructionMap.containsKey(computation)) {
           getConfig(computation);
         }
+        final MapTask mapTask = instructionMap.get(computation);
+        if (mapTask == null) {
+          LOG.warn(
+              "Received work for unknown computation: {}. Known computations are {}",
+              computation, instructionMap.keySet());
+          continue;
+        }
 
         long watermarkMicros = computationWork.getInputDataWatermark();
         final Instant inputDataWatermark = new Instant(watermarkMicros / 1000);
-
-        for (final Windmill.WorkItem work : computationWork.getWorkList()) {
-          workUnitExecutor.execute(new Runnable() {
+        ActiveWorkForComputation activeWork = activeWorkMap.get(computation);
+        for (final Windmill.WorkItem workItem : computationWork.getWorkList()) {
+          Work work = new Work(workItem.getWorkToken()) {
               @Override
               public void run() {
-                process(computation, inputDataWatermark, work);
+                process(computation, mapTask, inputDataWatermark, workItem);
               }
-            });
+            };
+          if (activeWork.activateWork(workItem.getKey(), work)) {
+            workUnitExecutor.execute(work);
+          }
         }
       }
     }
     LOG.info("Dispatch done");
   }
 
+  abstract static class Work implements Runnable {
+    private final long workToken;
+    public Work(long workToken) {
+      this.workToken = workToken;
+    }
+    public long getWorkToken() {
+      return workToken;
+    }
+  }
+
   private void process(
       final String computation,
+      final MapTask mapTask,
       final Instant inputDataWatermark,
       final Windmill.WorkItem work) {
     LOG.debug("Starting processing for {}:\n{}", computation, work);
 
-    MapTask mapTask = instructionMap.get(computation);
-    if (mapTask == null) {
-      LOG.info("Received work for unknown computation: {}. Known computations are {}",
-          computation, instructionMap.keySet());
-      return;
-    }
-
     Windmill.WorkItemCommitRequest.Builder outputBuilder =
         Windmill.WorkItemCommitRequest.newBuilder()
         .setKey(work.getKey())
@@ -494,7 +515,7 @@ public void run() {
               new Runnable() {
                 @Override
                 public void run() {
-                  process(computation, inputDataWatermark, work);
+                  process(computation, mapTask, inputDataWatermark, work);
                 }
               });
         } else {
@@ -542,6 +563,15 @@ public void run() {
           Windmill.CommitWorkRequest commitRequest = commitRequestBuilder.build();
           LOG.trace("Commit: {}", commitRequest);
           commitWork(commitRequest);
+          for (Windmill.ComputationCommitWorkRequest computationRequest :
+              commitRequest.getRequestsList()) {
+            ActiveWorkForComputation activeWork =
+                activeWorkMap.get(computationRequest.getComputationId());
+            for (Windmill.WorkItemCommitRequest workRequest :
+                computationRequest.getRequestsList()) {
+              activeWork.completeWork(workRequest.getKey());
+            }
+          }
         } else {
           break;
         }
@@ -701,6 +731,70 @@ private void reportPeriodicStats() {
     }
   }
 
+  /**
+   * Class representing the state of active work for a computation.
+   *
+   * <p> This class is synchronized, but only used from the dispatch and commit threads, so should
+   * not be heavily contended.  Still, blocking work should not be done by it.
+   */
+  static class ActiveWorkForComputation {
+    private Map<ByteString, Queue<Work>> activeWork = new HashMap<>();
+    private BoundedQueueExecutor executor;
+
+    ActiveWorkForComputation(BoundedQueueExecutor executor) {
+      this.executor = executor;
+    }
+
+    /**
+     * Mark the given key and work as active.  Returns whether the work is ready to be run
+     * immediately.
+     */
+    public synchronized boolean activateWork(ByteString key, Work work) {
+      Queue<Work> queue = activeWork.get(key);
+      if (queue == null) {
+        queue = new LinkedList<>();
+        activeWork.put(key, queue);
+        queue.add(work);
+        return true;
+      }
+      if (queue.peek().getWorkToken() != work.getWorkToken()) {
+        queue.add(work);
+      }
+      return false;
+    }
+
+    /**
+     * Marks the work for a the given key as complete.  Schedules queued work for the key if any.
+     */
+    public synchronized void completeWork(ByteString key) {
+      Queue<Work> queue = activeWork.get(key);
+      queue.poll();
+      if (queue.peek() != null) {
+        executor.forceExecute(queue.peek());
+      } else {
+        activeWork.remove(key);
+      }
+    }
+
+    public synchronized void printActiveWork(PrintWriter writer) {
+      writer.println("<ul>");
+      for (Map.Entry<ByteString, Queue<Work>> entry : activeWork.entrySet()) {
+        Queue<Work> queue = entry.getValue();
+        writer.print("<li>Key: ");
+        writer.print(entry.getKey().toStringUtf8());
+        writer.print(" Token: ");
+        writer.print(queue.peek().getWorkToken());
+        if (queue.size() > 1) {
+          writer.print("(");
+          writer.print(queue.size() - 1);
+          writer.print(" queued)");
+        }
+        writer.println("</li>");
+      }
+      writer.println("</ul>");
+    }
+  }
+
   private static class WorkerAndContext {
     public MapTaskExecutor worker;
     public StreamingModeExecutionContext context;
@@ -773,6 +867,16 @@ private void printMetrics(PrintWriter response) {
       response.println("</li>");
     }
     response.println("</ul>");
+    response.println("Active Keys: <ul>");
+    for (Map.Entry<String, ActiveWorkForComputation> computationEntry
+             : activeWorkMap.entrySet()) {
+      response.print("<li>");
+      response.print(computationEntry.getKey());
+      response.print(":");
+      computationEntry.getValue().printActiveWork(response);
+      response.println("</li>");
+    }
+    response.println("</ul>");
     metricTrackingWindmillServer.printHtml(response);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java
index 0cba644a31f93..d258d331108bf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java
@@ -108,6 +108,16 @@ public Windmill.ReportStatsResponse reportStats(Windmill.ReportStatsRequest requ
     }
   }
 
+
+  public void waitForEmptyWorkQueue() {
+    while (!workToOffer.isEmpty()) {
+      try {
+        Thread.sleep(1000);
+      } catch (InterruptedException expected) {
+      }
+    }
+  }
+
   public Map<Long, WorkItemCommitRequest> waitForAndGetCommits(int numCommits) {
     int maxTries = 10;
     while (maxTries-- > 0 && commitsReceived.size() < commitsRequested + numCommits) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index d2dac4f273e07..27fb9c667f44d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -59,6 +59,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
 import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
+import com.google.cloud.dataflow.sdk.util.BoundedQueueExecutor;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
@@ -83,6 +84,7 @@
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
+import org.mockito.Mockito;
 
 import java.io.IOException;
 import java.util.Arrays;
@@ -90,6 +92,7 @@
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
 
 /** Unit tests for {@link StreamingDataflowWorker}. */
@@ -258,11 +261,16 @@ private Windmill.GetWorkResponse buildInput(String input, byte[] metadata) throw
   }
 
   private Windmill.GetWorkResponse makeInput(int index, long timestamp) throws Exception {
+    return makeInput(index, timestamp, keyStringForIndex(index));
+  }
+
+  private Windmill.GetWorkResponse makeInput(int index, long timestamp, String key)
+      throws Exception {
     return buildInput(
         "work {" +
         "  computation_id: \"" + DEFAULT_COMPUTATION_ID + "\"" +
         "  work {" +
-        "    key: \"" + DEFAULT_KEY_STRING + "\"" +
+        "    key: \"" + key + "\"" +
         "    work_token: " + index +
         "    message_bundles {" +
         "      source_computation_id: \"" + DEFAULT_SOURCE_COMPUTATION_ID + "\"" +
@@ -303,24 +311,28 @@ private WorkItemCommitRequest.Builder setMessagesMetadata(
     return builder;
   }
 
-  private Windmill.WorkItemCommitRequest makeExpectedOutput(int index, long timestamp, String key)
+  private WorkItemCommitRequest.Builder makeExpectedOutput(int index, long timestamp)
       throws Exception {
+    return makeExpectedOutput(index, timestamp, keyStringForIndex(index), keyStringForIndex(index));
+  }
+
+  private WorkItemCommitRequest.Builder makeExpectedOutput(
+      int index, long timestamp, String key, String outKey) throws Exception {
     return setMessagesMetadata(PaneInfo.DEFAULT, defaultWindowsBytes(),
         parseCommitRequest(
-            "key: \"" + DEFAULT_KEY_STRING + "\" " +
+            "key: \"" + key + "\" " +
             "work_token: " + index + " " +
             "output_messages {" +
             "  destination_stream_id: \"" + DEFAULT_DESTINATION_STREAM_ID + "\"" +
             "  bundles {" +
-            "    key: \"" + key + "\"" +
+            "    key: \"" + outKey + "\"" +
             "    messages {" +
             "      timestamp: " + timestamp +
             "      data: \"" + dataStringForIndex(index) + "\"" +
             "      metadata: \"\"" +
             "    }" +
             "  }" +
-            "}"))
-        .build();
+            "}"));
   }
 
   private ByteString addPaneTag(PaneInfo pane, byte[] windowBytes)
@@ -369,8 +381,94 @@ public void testBasicHarness() throws Exception {
 
     for (int i = 0; i < numIters; ++i) {
       assertTrue(result.containsKey((long) i));
-      assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i), "key"),
+      assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i)).build(),
+                          stripCounters(result.get((long) i)));
+    }
+  }
+
+  static class BlockingFn extends DoFn<String, String> {
+    private static final long serialVersionUID = 0;
+    public static CountDownLatch blocker = new CountDownLatch(1);
+    public static CountDownLatch counter = new CountDownLatch(4);
+
+    @Override
+    public void processElement(ProcessContext c) throws InterruptedException {
+      counter.countDown();
+      blocker.await();
+      c.output(c.element());
+    }
+  }
+
+  @Test
+  public void testIgnoreRetriedKeys() throws Exception {
+    List<ParallelInstruction> instructions = Arrays.asList(
+        makeSourceInstruction(StringUtf8Coder.of()),
+        makeDoFnInstruction(new BlockingFn(), 0, StringUtf8Coder.of()),
+        makeSinkInstruction(StringUtf8Coder.of(), 0));
+
+    FakeWindmillServer server = new FakeWindmillServer();
+    DataflowWorkerHarnessOptions options = createTestingPipelineOptions();
+    StreamingDataflowWorker worker =
+        new StreamingDataflowWorker(Arrays.asList(defaultMapTask(instructions)), server, options);
+    worker.start();
+
+    // Thread locals for the job and worker should have been updated for logging.
+    assertEquals(options.getJobId(), DataflowWorkerLoggingMDC.getJobId());
+    assertEquals(options.getWorkerId(), DataflowWorkerLoggingMDC.getWorkerId());
+
+    final int numIters = 4;
+    for (int i = 0; i < numIters; ++i) {
+      server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
+    }
+
+    // Wait for keys to schedule.  They will be blocked.
+    BlockingFn.counter.await();
+
+    // Re-add the work, it should be ignored due to the keys being active.
+    for (int i = 0; i < numIters; ++i) {
+      // Same work token.
+      server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
+    }
+
+    // Give all added calls a chance to run.
+    server.waitForEmptyWorkQueue();
+
+    for (int i = 0; i < numIters; ++i) {
+      // Different work token same keys.
+      server.addWorkToOffer(
+          makeInput(i + numIters, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i)));
+    }
+
+    // Give all added calls a chance to run.
+    server.waitForEmptyWorkQueue();
+
+    // Release the blocked calls.
+    BlockingFn.blocker.countDown();
+
+    // Verify the output
+    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(numIters * 2);
+    for (int i = 0; i < numIters; ++i) {
+      assertTrue(result.containsKey((long) i));
+      assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i)).build(),
                           stripCounters(result.get((long) i)));
+      assertTrue(result.containsKey((long) i + numIters));
+      assertEquals(makeExpectedOutput(i + numIters, TimeUnit.MILLISECONDS.toMicros(i),
+              keyStringForIndex(i), keyStringForIndex(i)).build(),
+          stripCounters(result.get((long) i + numIters)));
+    }
+
+    // Re-add the work, it should process due to the keys no longer being active.
+    for (int i = 0; i < numIters; ++i) {
+      server.addWorkToOffer(makeInput(i + numIters * 2, TimeUnit.MILLISECONDS.toMicros(i),
+              keyStringForIndex(i)));
+    }
+    result = server.waitForAndGetCommits(numIters);
+    worker.stop();
+    for (int i = 0; i < numIters; ++i) {
+      assertTrue(result.containsKey((long) i + numIters * 2));
+      assertEquals(makeExpectedOutput(i + numIters * 2, TimeUnit.MILLISECONDS.toMicros(i),
+              keyStringForIndex(i), keyStringForIndex(i)).build(),
+          stripCounters(result.get((long) i + numIters * 2)));
     }
   }
 
@@ -403,11 +501,11 @@ public void testKeyChange() throws Exception {
 
     Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(2);
 
-    assertThat(stripCounters(result.get(0L)),
-        equalTo(makeExpectedOutput(0, 0, "key_data0")));
-
-    assertThat(stripCounters(result.get(1L)),
-        equalTo(makeExpectedOutput(1, TimeUnit.MILLISECONDS.toMicros(1), "key_data1")));
+    for (int i = 0; i < 2; i++) {
+      assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i),
+              keyStringForIndex(i), keyStringForIndex(i) + "_data" + i).build(),
+          stripCounters(result.get((long) i)));
+    }
   }
 
   static class TestExceptionFn extends DoFn<String, String> {
@@ -591,38 +689,14 @@ public void testAssignWindows() throws Exception {
     Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(2);
 
     assertThat(
-        stripCounters(result.get(0L)),
+        stripCounters(result.get((long) timestamp1)),
         equalTo(setMessagesMetadata(PaneInfo.DEFAULT, windowAtZeroBytes(),
-            parseCommitRequest(
-                "key: \"" + DEFAULT_KEY_STRING + "\" " +
-                "work_token: 0 " +
-                "output_messages {" +
-                "  destination_stream_id: \"" + DEFAULT_DESTINATION_STREAM_ID + "\"" +
-                "  bundles {" +
-                "    key: \"" + DEFAULT_KEY_STRING + "\"" +
-                "    messages {" +
-                "      timestamp: 0" +
-                "      data: \"" + dataStringForIndex(timestamp1) + "\"" +
-                "    }" +
-                "  }" +
-                "} "))
+                makeExpectedOutput(timestamp1, timestamp1))
             .build()));
 
-    assertThat(stripCounters(result.get(1000000L)),
+    assertThat(stripCounters(result.get((long) timestamp2)),
         equalTo(setMessagesMetadata(PaneInfo.DEFAULT, windowAtOneSecondBytes(),
-            parseCommitRequest(
-                "key: \"" + DEFAULT_KEY_STRING + "\" " +
-                "work_token: 1000000 " +
-                "output_messages {" +
-                "  destination_stream_id: \"" + DEFAULT_DESTINATION_STREAM_ID + "\"" +
-                "  bundles {" +
-                "    key: \"" + DEFAULT_KEY_STRING + "\"" +
-                "    messages {" +
-                "      timestamp: " + timestamp2 +
-                "      data: \"" + dataStringForIndex(timestamp2) + "\"" +
-                "    }" +
-                "  }" +
-                "} "))
+                makeExpectedOutput(timestamp2, timestamp2))
             .build()));
   }
 
@@ -957,4 +1031,39 @@ public void testUnboundedSources() throws Exception {
                 "} " +
                 "source_watermark: 9223372036854775000")).build()));
   }
+
+  private static class MockWork extends StreamingDataflowWorker.Work {
+    public MockWork(long workToken) {
+      super(workToken);
+    }
+    @Override
+    public void run() {}
+  }
+
+  @Test
+  public void testActiveWork() throws Exception {
+    BoundedQueueExecutor mockExecutor = Mockito.mock(BoundedQueueExecutor.class);
+    StreamingDataflowWorker.ActiveWorkForComputation activeWork =
+        new StreamingDataflowWorker.ActiveWorkForComputation(mockExecutor);
+
+    ByteString key1 = ByteString.copyFromUtf8("key1");
+    ByteString key2 = ByteString.copyFromUtf8("key2");
+
+    assertEquals(true, activeWork.activateWork(key1, new MockWork(1)));
+    activeWork.completeWork(key1);
+
+    assertEquals(true, activeWork.activateWork(key1, new MockWork(2)));
+    assertEquals(false, activeWork.activateWork(key1, new MockWork(2)));
+    assertEquals(false, activeWork.activateWork(key1, new MockWork(3)));
+    assertEquals(true, activeWork.activateWork(key2, new MockWork(4)));
+    activeWork.completeWork(key2);
+    Mockito.verifyNoMoreInteractions(mockExecutor);
+
+    activeWork.completeWork(key1);
+    Mockito.verify(mockExecutor).forceExecute(Mockito.<Runnable>any());
+    activeWork.completeWork(key1);
+
+    assertEquals(true, activeWork.activateWork(key1, new MockWork(1)));
+    activeWork.completeWork(key1);
+  }
 }

From 6b132ca44c063f3e7775e2ae556191990e4f6f78 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 17 Jul 2015 15:10:32 -0700
Subject: [PATCH 0766/1541] Expand javadoc for DoFn.ProcessContext.element()

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98527351
---
 .../cloud/dataflow/sdk/transforms/DoFn.java   | 30 +++++++++++++++----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 97713bd5d8b18..d72d52c0a1e76 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -64,6 +64,9 @@
  *
  * @param <InputT> the type of the (main) input elements
  * @param <OutputT> the type of the (main) output elements
+ *
+ * @see #processElement for details on implementing the transformation
+ * from {@code InputT} to {@code OutputT}.
  */
 @SuppressWarnings("serial")
 public abstract class DoFn<InputT, OutputT> implements Serializable {
@@ -82,8 +85,10 @@ public abstract class Context {
     /**
      * Adds the given element to the main output {@code PCollection}.
      *
-     * <p> Once passed to {@code output} the element should not be modified in
-     * any way.
+     * <p> Once passed to {@code output} the element should be considered
+     * immutable and not be modified in any way. It may be cached or retained
+     * by the Dataflow runtime or later steps in the pipeline, or used in
+     * other unspecified ways.
      *
      * <p> If invoked from {@link DoFn#processElement}, the output
      * element will have the same timestamp and be in the same windows
@@ -224,8 +229,10 @@ public abstract class ProcessContext extends Context {
     /**
      * Returns the input element to be processed.
      *
-     * <p> The element will not be changed -- it is safe to cache, etc.
-     * without copying.
+     * <p> The element should be considered immutable. The Dataflow runtime will not mutate the
+     * element, so it is safe to cache, etc. The element should not be mutated by any of the
+     * {@link DoFn} methods, because it may be cached elsewhere, retained by the Dataflow runtime,
+     * or used in other unspecified ways.
      */
     public abstract InputT element();
 
@@ -320,7 +327,20 @@ public void startBundle(Context c) throws Exception {
   }
 
   /**
-   * Processes an input element.
+   * Processes one input element.
+   *
+   * <p> The current element of the input {@code PCollection} is returned by
+   * {@link ProcessContext#element() c.element()}. It should be considered immutable. The Dataflow
+   * runtime will not mutate the element, so it is safe to cache, etc. The element should not be
+   * mutated by any of the {@link DoFn} methods, because it may be cached elsewhere, retained by the
+   * Dataflow runtime, or used in other unspecified ways.
+   *
+   * <p> A value is added to the main output {@code PCollection} by {@link ProcessContext#output}.
+   * Once passed to {@code output} the element should be considered immutable and not be modified in
+   * any way. It may be cached elsewhere, retained by the Dataflow runtime, or used in other
+   * unspecified ways.
+   *
+   * @see ProcessContext
    */
   public abstract void processElement(ProcessContext c) throws Exception;
 

From 9a2faf378f0b551737bec7504eb5c3e1c85ab960 Mon Sep 17 00:00:00 2001
From: relax <relax@google.com>
Date: Fri, 17 Jul 2015 16:41:22 -0700
Subject: [PATCH 0767/1541] Updates to the windmill API

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98534659
---
 sdk/src/main/proto/windmill.proto | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index db145b9fac7ac..e311155b17e9c 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -44,6 +44,7 @@ message Timer {
     DEPENDENT_REALTIME = 2;
   }
   optional Type type = 3 [default = WATERMARK];
+  optional string state_family = 4;
 }
 
 message InputMessageBundle {
@@ -82,12 +83,14 @@ message Value {
 message TagValue {
   required bytes tag = 1;
   optional Value value = 2;
+  optional string state_family = 3;
 }
 
 message TagList {
   required bytes tag = 1;
   optional int64 end_timestamp = 2 [default=-0x8000000000000000];
   repeated Value values = 3;
+  optional string state_family = 4;
 }
 
 message GlobalDataId {

From e9be9d03246ca7ca491f969389e8601c4af8ead4 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 17 Jul 2015 19:05:03 -0700
Subject: [PATCH 0768/1541] Improve performance of BigQueryIO.Write.

Previously, we had at most 50 keys/threads, each of which was limited to 64KB requests.  This resulted in a max throughput of ~10MB/s for a BigQueryIO.Write across an entire pipeline.
This CL lets each key use multiple threads in parallel to do inserts as well as having 50 keys per output table, which removes the performance restriction.  Now, Dataflow can handle up to the BigQuery limit of 100MB/s per table across any number of tables.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98542614
---
 .../cloud/dataflow/sdk/io/BigQueryIO.java     | 309 ++++++++++--------
 .../sdk/util/BigQueryTableInserter.java       |  79 +++--
 2 files changed, 233 insertions(+), 155 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index f794b09ad3cbd..1ddcd8c03d550 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -17,14 +17,16 @@
 package com.google.cloud.dataflow.sdk.io;
 
 import com.google.api.client.json.JsonFactory;
-import com.google.api.client.util.Preconditions;
 import com.google.api.services.bigquery.Bigquery;
 import com.google.api.services.bigquery.model.QueryRequest;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StandardCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
@@ -36,12 +38,10 @@
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.worker.BigQueryReader;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.Flatten;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.Values;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterFirst;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterProcessingTime;
@@ -61,8 +61,8 @@
 import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
-
 import com.google.cloud.hadoop.util.ApiErrorExtractor;
+import com.google.common.base.Preconditions;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
@@ -84,7 +84,6 @@
 import java.util.UUID;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ThreadLocalRandom;
-import java.util.concurrent.atomic.AtomicLong;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -797,7 +796,6 @@ public Bound toTableReference(
             writeDisposition, validate);
       }
 
-
       /**
        * Specifies the table schema, used if the table is created.
        */
@@ -912,25 +910,18 @@ public boolean getValidate() {
   /**
    * Implementation of DoFn to perform streaming BigQuery write.
    */
-  private static class StreamingWriteFn extends DoFn<TableRowInfo, Void> {
+  private static class StreamingWriteFn
+      extends DoFn<KV<ShardedKey<String>, Iterable<TableRowInfo>>, Void> {
     private static final long serialVersionUID = 0;
 
-    /** TableReference in JSON.  Use String to make the class Serializable. */
-    private final String jsonTableReference;
-
     /** TableSchema in JSON.  Use String to make the class Serializable. */
     private final String jsonTableSchema;
 
-    /** User function mapping windows to TableReference in JSON. */
-    private final SerializableFunction<BoundedWindow, TableReference> tableRefFunction;
-
-    private transient TableReference defaultTableReference;
-
-    /** JsonTableRows to accumulate BigQuery rows. */
-    private transient Map<BoundedWindow, List<TableRow>> tableRows;
+    /** JsonTableRows to accumulate BigQuery rows in order to batch writes. */
+    private transient Map<String, List<TableRow>> tableRows;
 
     /** The list of unique ids for each BigQuery table row. */
-    private transient Map<BoundedWindow, List<String>> uniqueIdsForTableRows;
+    private transient Map<String, List<String>> uniqueIdsForTableRows;
 
     /** The list of tables created so far, so we don't try the creation
         each time. */
@@ -938,16 +929,8 @@ private static class StreamingWriteFn extends DoFn<TableRowInfo, Void> {
         Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>());
 
     /** Constructor. */
-    StreamingWriteFn(TableReference table,
-        SerializableFunction<BoundedWindow, TableReference> tableRefFunction,
-        TableSchema schema) {
+    StreamingWriteFn(TableSchema schema) {
       try {
-        if (table != null) {
-          jsonTableReference = JSON_FACTORY.toString(table);
-        } else {
-          jsonTableReference = null;
-        }
-        this.tableRefFunction = tableRefFunction;
         jsonTableSchema = JSON_FACTORY.toString(schema);
       } catch (IOException e) {
         throw new RuntimeException("Cannot initialize BigQuery streaming writer.", e);
@@ -964,75 +947,54 @@ public void startBundle(Context context) {
     /** Accumulates the input into JsonTableRows and uniqueIdsForTableRows. */
     @Override
     public void processElement(ProcessContext context) {
-      TableRowInfo rowInfo = context.element();
-      List<TableRow> rows = getOrCreateMapListValue(tableRows, rowInfo.window);
-      rows.add(rowInfo.tableRow);
-      List<String> uniqueIds = getOrCreateMapListValue(uniqueIdsForTableRows, rowInfo.window);
-      uniqueIds.add(rowInfo.uniqueId);
+      String tableSpec = context.element().getKey().getKey();
+      List<TableRow> rows = getOrCreateMapListValue(tableRows, tableSpec);
+      List<String> uniqueIds = getOrCreateMapListValue(uniqueIdsForTableRows, tableSpec);
+      for (TableRowInfo rowInfo : context.element().getValue()) {
+        rows.add(rowInfo.tableRow);
+        uniqueIds.add(rowInfo.uniqueId);
+      }
     }
 
     /** Writes the accumulated rows into BigQuery with streaming API. */
     @Override
-    public void finishBundle(Context context) {
+    public void finishBundle(Context context) throws Exception {
       BigQueryOptions options = context.getPipelineOptions().as(BigQueryOptions.class);
+      Bigquery client = Transport.newBigQueryClient(options).build();
 
-      for (BoundedWindow window : tableRows.keySet()) {
-        TableReference tableReference = getOrCreateTableForWindow(options, window);
-        flushRows(options, tableReference, tableRows.get(window),
-            uniqueIdsForTableRows.get(window));
+      for (String tableSpec : tableRows.keySet()) {
+        TableReference tableReference = getOrCreateTable(options, tableSpec);
+        flushRows(client, tableReference, tableRows.get(tableSpec),
+            uniqueIdsForTableRows.get(tableSpec));
       }
       tableRows.clear();
       uniqueIdsForTableRows.clear();
     }
 
-    public TableReference getOrCreateTableForWindow(BigQueryOptions options, BoundedWindow window) {
-     try {
-       if (defaultTableReference != null) {
-         return defaultTableReference;
-       }
-
-       TableReference tableReference;
-       if (tableRefFunction != null) {
-         tableReference = tableRefFunction.apply(window);
-       } else {
-         tableReference = JSON_FACTORY.fromString(jsonTableReference, TableReference.class);
-       }
-       String tableSpec = toTableSpec(tableReference);
-
-       if (tableReference.getProjectId() == null) {
-         tableReference.setProjectId(options.getProject());
-       }
-
-       if (!createdTables.contains(tableSpec)) {
-          synchronized (createdTables) {
-            // Another thread may have succeeded in creating the table in the meanwhile, so
-            // check again. This check isn't needed for correctness, but we add it to prevent
-            // every thread from attempting a create and overwhelming our BigQuery quota.
-            if (!createdTables.contains(tableSpec)) {
-              TableSchema tableSchema = JSON_FACTORY.fromString(jsonTableSchema, TableSchema.class);
-              Bigquery client = Transport.newBigQueryClient(options).build();
-              BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
-              inserter.tryCreateTable(tableSchema);
-              createdTables.add(tableSpec);
-            }
+    public TableReference getOrCreateTable(BigQueryOptions options, String tableSpec)
+        throws IOException {
+      TableReference tableReference = parseTableSpec(tableSpec);
+      if (!createdTables.contains(tableSpec)) {
+        synchronized (createdTables) {
+          // Another thread may have succeeded in creating the table in the meanwhile, so
+          // check again. This check isn't needed for correctness, but we add it to prevent
+          // every thread from attempting a create and overwhelming our BigQuery quota.
+          if (!createdTables.contains(tableSpec)) {
+            TableSchema tableSchema = JSON_FACTORY.fromString(jsonTableSchema, TableSchema.class);
+            Bigquery client = Transport.newBigQueryClient(options).build();
+            BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
+            inserter.tryCreateTable(tableSchema);
+            createdTables.add(tableSpec);
           }
         }
-       if (tableRefFunction == null) {
-         // A constant table spec is used, and we've already created it. Cache that value so that we
-         // can elide the parsing/lookup on future calls to getOrCreateTableForWindow.
-         defaultTableReference = tableReference;
-       }
-       return tableReference;
-      } catch (IOException e) {
-        throw new RuntimeException(e);
-     }
+      }
+      return tableReference;
     }
 
     /** Writes the accumulated rows into BigQuery with streaming API. */
-    private void flushRows(BigQueryOptions options, TableReference tableReference,
+    private void flushRows(Bigquery client, TableReference tableReference,
         List<TableRow> tableRows, List<String> uniqueIds) {
       if (!tableRows.isEmpty()) {
-        Bigquery client = Transport.newBigQueryClient(options).build();
         try {
           BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
           inserter.insertAll(tableRows, uniqueIds);
@@ -1043,36 +1005,91 @@ private void flushRows(BigQueryOptions options, TableReference tableReference,
     }
   }
 
+  @DefaultCoder(AvroCoder.class)
+  private static class ShardedKey<K> {
+    private final K key;
+    private final int shardNumber;
 
-  private static class TableRowInfoCoder extends StandardCoder<TableRowInfo> {
+    public static <K> ShardedKey<K> of(K key, int shardNumber) {
+      return new ShardedKey<K>(key, shardNumber);
+    }
+
+    private ShardedKey(K key, int shardNumber) {
+      this.key = key;
+      this.shardNumber = shardNumber;
+    }
+
+    public K getKey() {
+      return key;
+    }
+
+    public int getShardNumber() {
+      return shardNumber;
+    }
+  }
+
+  /**
+   * A {@link Coder} for {@code ShardedKey}, using a wrapped key {@code Coder}.
+   */
+  public static class ShardedKeyCoder<KeyT>
+      extends StandardCoder<ShardedKey<KeyT>> {
     private static final long serialVersionUID = 0;
 
-    public static TableRowInfoCoder of(Coder<? extends BoundedWindow> windowCoder) {
-      return new TableRowInfoCoder(windowCoder);
+    public static <KeyT> ShardedKeyCoder<KeyT> of(Coder<KeyT> keyCoder) {
+      return new ShardedKeyCoder<>(keyCoder);
     }
 
     @JsonCreator
-    public static TableRowInfoCoder of(
+    public static <KeyT> ShardedKeyCoder<KeyT> of(
          @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-         List<Coder<? extends BoundedWindow>> components) {
+        List<Coder<KeyT>> components) {
       Preconditions.checkArgument(components.size() == 1,
           "Expecting 1 component, got " + components.size());
       return of(components.get(0));
     }
 
-    protected TableRowInfoCoder(Coder<? extends BoundedWindow> windowCoder) {
-      this.tableRowCoder = TableRowJsonCoder.of();
-      this.idCoder = StringUtf8Coder.of();
-      @SuppressWarnings("unchecked")
-      Coder<BoundedWindow> boundedWindowCoder = (Coder<BoundedWindow>) windowCoder;
-      this.windowCoder = boundedWindowCoder;
+    protected ShardedKeyCoder(Coder<KeyT> keyCoder) {
+      this.keyCoder = keyCoder;
+      this.shardNumberCoder = VarIntCoder.of();
     }
 
-     @Override
+    @Override
     public List<? extends Coder<?>> getCoderArguments() {
-      return Arrays.asList(windowCoder);
+      return Arrays.asList(keyCoder);
     }
 
+    @Override
+    public void encode(ShardedKey<KeyT> key, OutputStream outStream, Context context)
+        throws IOException {
+      keyCoder.encode(key.getKey(), outStream, context.nested());
+      shardNumberCoder.encode(key.getShardNumber(), outStream, context);
+    }
+
+    @Override
+    public ShardedKey<KeyT> decode(InputStream inStream, Context context)
+        throws IOException {
+      return new ShardedKey<KeyT>(
+          keyCoder.decode(inStream, context.nested()),
+          shardNumberCoder.decode(inStream, context));
+    }
+
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException {
+      keyCoder.verifyDeterministic();
+    }
+
+    Coder<KeyT> keyCoder;
+    VarIntCoder shardNumberCoder;
+  }
+
+  private static class TableRowInfoCoder extends AtomicCoder<TableRowInfo> {
+    private static final long serialVersionUID = 0;
+    private static final TableRowInfoCoder INSTANCE = new TableRowInfoCoder();
+
+    @JsonCreator
+    public static TableRowInfoCoder of() {
+      return INSTANCE;
+    }
 
     @Override
     public void encode(TableRowInfo value, OutputStream outStream, Context context)
@@ -1082,7 +1099,6 @@ public void encode(TableRowInfo value, OutputStream outStream, Context context)
       }
       tableRowCoder.encode(value.tableRow, outStream, context.nested());
       idCoder.encode(value.uniqueId, outStream, context.nested());
-      windowCoder.encode(value.window, outStream, context.nested());
     }
 
     @Override
@@ -1090,8 +1106,7 @@ public TableRowInfo decode(InputStream inStream, Context context)
       throws IOException {
       return new TableRowInfo(
           tableRowCoder.decode(inStream, context.nested()),
-          idCoder.decode(inStream, context.nested()),
-          windowCoder.decode(inStream, context.nested()));
+          idCoder.decode(inStream, context.nested()));
     }
 
     @Override
@@ -1099,53 +1114,87 @@ public void verifyDeterministic() throws NonDeterministicException {
       throw new NonDeterministicException(this, "TableRows are not deterministic.");
     }
 
-    TableRowJsonCoder tableRowCoder;
-    StringUtf8Coder idCoder;
-    Coder<BoundedWindow> windowCoder;
+    TableRowJsonCoder tableRowCoder = TableRowJsonCoder.of();
+    StringUtf8Coder idCoder = StringUtf8Coder.of();
   }
 
-   private static class TableRowInfo {
-     TableRowInfo(TableRow tableRow, String uniqueId, BoundedWindow window) {
-       this.tableRow = tableRow;
-       this.uniqueId = uniqueId;
-       this.window = window;
-     }
+  private static class TableRowInfo {
+    TableRowInfo(TableRow tableRow, String uniqueId) {
+      this.tableRow = tableRow;
+      this.uniqueId = uniqueId;
+    }
 
-     final TableRow tableRow;
-     final String uniqueId;
-     final BoundedWindow window;
-   };
+    final TableRow tableRow;
+    final String uniqueId;
+  };
 
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * Fn that tags each table row with a unique id.
+   * Fn that tags each table row with a unique id and destination table.
    * To avoid calling UUID.randomUUID() for each element, which can be costly,
    * a randomUUID is generated only once per bucket of data. The actual unique
    * id is created by concatenating this randomUUID with a sequential number.
    */
-  private static class TagWithUniqueIdsAndWindow extends DoFn<TableRow, KV<Integer, TableRowInfo>>
-        implements DoFn.RequiresWindowAccess {
+  private static class TagWithUniqueIdsAndTable
+      extends DoFn<TableRow, KV<ShardedKey<String>, TableRowInfo>>
+      implements DoFn.RequiresWindowAccess {
     private static final long serialVersionUID = 0;
 
+    /** TableSpec to write to. */
+    private final String tableSpec;
+
+    /** User function mapping windows to TableReference in JSON. */
+    private final SerializableFunction<BoundedWindow, TableReference> tableRefFunction;
+
     private transient String randomUUID;
-    private transient AtomicLong sequenceNo;
+    private transient long sequenceNo = 0L;
+
+    TagWithUniqueIdsAndTable(BigQueryOptions options, TableReference table,
+        SerializableFunction<BoundedWindow, TableReference> tableRefFunction) {
+      Preconditions.checkArgument(table == null ^ tableRefFunction == null,
+          "Exactly one of table or tableRefFunction should be set");
+      if (table != null) {
+        if (table.getProjectId() == null) {
+          table.setProjectId(options.as(BigQueryOptions.class).getProject());
+        }
+        this.tableSpec = toTableSpec(table);
+      } else {
+        tableSpec = null;
+      }
+      this.tableRefFunction = tableRefFunction;
+    }
+
 
     @Override
     public void startBundle(Context context) {
       randomUUID = UUID.randomUUID().toString();
-      sequenceNo = new AtomicLong();
     }
 
     /** Tag the input with a unique id. */
     @Override
-    public void processElement(ProcessContext context) {
-      String uniqueId = randomUUID + Long.toString(sequenceNo.getAndIncrement());
+    public void processElement(ProcessContext context) throws IOException {
+      String uniqueId = randomUUID + sequenceNo++;
       ThreadLocalRandom randomGenerator = ThreadLocalRandom.current();
+      String tableSpec = tableSpecFromWindow(
+          context.getPipelineOptions().as(BigQueryOptions.class), context.window());
       // We output on keys 0-50 to ensure that there's enough batching for
       // BigQuery.
-      context.output(KV.of(randomGenerator.nextInt(0, 50),
-          new TableRowInfo(context.element(), uniqueId, context.window())));
+      context.output(KV.of(ShardedKey.of(tableSpec, randomGenerator.nextInt(0, 50)),
+          new TableRowInfo(context.element(), uniqueId)));
+    }
+
+    private String tableSpecFromWindow(BigQueryOptions options, BoundedWindow window)
+        throws IOException {
+      if (tableSpec != null) {
+        return tableSpec;
+      } else {
+        TableReference table = tableRefFunction.apply(window);
+        if (table.getProjectId() == null) {
+          table.setProjectId(options.getProject());
+        }
+        return toTableSpec(table);
+      }
     }
   }
 
@@ -1192,27 +1241,23 @@ public PDone apply(PCollection<TableRow> input) {
       // To use this mechanism, each input TableRow is tagged with a generated
       // unique id, which is then passed to BigQuery and used to ignore duplicates.
 
-      PCollection<KV<Integer, TableRowInfo>> tagged =
-          input.apply(ParDo.of(new TagWithUniqueIdsAndWindow()));
+      PCollection<KV<ShardedKey<String>, TableRowInfo>> tagged = input.apply(ParDo.of(
+          new TagWithUniqueIdsAndTable(input.getPipeline().getOptions().as(BigQueryOptions.class),
+              tableReference, tableRefFunction)));
 
       // To prevent having the same TableRow processed more than once with regenerated
       // different unique ids, this implementation relies on "checkpointing", which is
       // achieved as a side effect of having StreamingWriteFn immediately follow a GBK.
       tagged
-          .setCoder(KvCoder.of(VarIntCoder.of(),
-              TableRowInfoCoder.of(
-              tagged.getWindowingStrategy().getWindowFn().windowCoder())))
-          .apply(Window.<KV<Integer, TableRowInfo>>into(new GlobalWindows())
-                       .triggering(Repeatedly.forever(
-                           AfterFirst.of(
-                               AfterProcessingTime.pastFirstElementInPane()
-                                                  .plusDelayOf(WRITE_BUFFER_WAIT),
-                               AfterPane.elementCountAtLeast(WRITE_BUFFER_COUNT))))
-                       .discardingFiredPanes())
-          .apply(GroupByKey.<Integer, TableRowInfo>create())
-          .apply(Values.<Iterable<TableRowInfo>>create())
-          .apply(Flatten.<TableRowInfo>iterables())
-          .apply(ParDo.of(new StreamingWriteFn(tableReference, tableRefFunction, tableSchema)));
+          .setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowInfoCoder.of()))
+          .apply(
+              Window.<KV<ShardedKey<String>, TableRowInfo>>into(new GlobalWindows())
+                  .triggering(Repeatedly.forever(AfterFirst.of(
+                      AfterProcessingTime.pastFirstElementInPane().plusDelayOf(WRITE_BUFFER_WAIT),
+                      AfterPane.elementCountAtLeast(WRITE_BUFFER_COUNT))))
+                  .discardingFiredPanes())
+          .apply(GroupByKey.<ShardedKey<String>, TableRowInfo>create())
+          .apply(ParDo.of(new StreamingWriteFn(tableSchema)));
 
       // Note that the implementation to return PDone here breaks the
       // implicit assumption about the job execution order.  If a user
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
index 1efc1d2772062..cae0072657c8b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
@@ -28,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.WriteDisposition;
 import com.google.cloud.hadoop.util.ApiErrorExtractor;
+import com.google.common.base.Throwables;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -36,6 +37,10 @@
 import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 
 import javax.annotation.Nullable;
 
@@ -61,6 +66,8 @@ public class BigQueryTableInserter {
   private final TableReference ref;
   private final long maxRowsPerBatch;
 
+  private static final ExecutorService executor = Executors.newFixedThreadPool(100);
+
   /**
    * Constructs a new row inserter.
    *
@@ -102,26 +109,26 @@ public void insertAll(List<TableRow> rowList,
           + "as many elements as rowList");
     }
 
-
     AttemptBoundedExponentialBackOff backoff = new AttemptBoundedExponentialBackOff(
         MAX_INSERT_ATTEMPTS,
         INITIAL_INSERT_BACKOFF_INTERVAL_MS);
 
-    List<TableDataInsertAllResponse.InsertErrors> allErrors = new ArrayList<>();
+    final List<TableDataInsertAllResponse.InsertErrors> allErrors = new ArrayList<>();
     // These lists contain the rows to publish. Initially the contain the entire list. If there are
     // failures, they will contain only the failed rows to be retried.
     List<TableRow> rowsToPublish = rowList;
     List<String> idsToPublish = insertIdList;
     while (true) {
-      List<TableRow> retryRows = new ArrayList<>();
-      List<String> retryIds = null;
-      if (idsToPublish != null) {
-        retryIds = new ArrayList<>();
-      }
+      final List<TableRow> retryRows = new ArrayList<>();
+      final List<String> retryIds = (idsToPublish != null) ? new ArrayList<String>() : null;
+
       int strideIndex = 0;
       // Upload in batches.
       List<TableDataInsertAllRequest.Rows> rows = new LinkedList<>();
       int dataSize = 0;
+
+      List<Future<?>> futures = new ArrayList<>();
+
       for (int i = 0; i < rowsToPublish.size(); ++i) {
         TableRow row = rowsToPublish.get(i);
         TableDataInsertAllRequest.Rows out = new TableDataInsertAllRequest.Rows();
@@ -137,30 +144,56 @@ public void insertAll(List<TableRow> rowList,
           TableDataInsertAllRequest content = new TableDataInsertAllRequest();
           content.setRows(rows);
 
-          Bigquery.Tabledata.InsertAll insert = client.tabledata()
+          final Bigquery.Tabledata.InsertAll insert = client.tabledata()
               .insertAll(ref.getProjectId(), ref.getDatasetId(), ref.getTableId(),
                   content);
-          TableDataInsertAllResponse response = insert.execute();
-          List<TableDataInsertAllResponse.InsertErrors> errors = response.getInsertErrors();
-          if (errors != null) {
-            allErrors.addAll(errors);
-            for (TableDataInsertAllResponse.InsertErrors error : errors) {
-              if (error.getIndex() == null) {
-                throw new IOException("Insert failed: " + allErrors);
-              }
 
-              int errorIndex = error.getIndex().intValue() + strideIndex;
-              retryRows.add(rowsToPublish.get(errorIndex));
-              if (retryIds != null) {
-                retryIds.add(idsToPublish.get(errorIndex));
+          final int finalStrideIndex = strideIndex;
+          final List<TableRow> finalRowsToPublish = rowsToPublish;
+          final List<String> finalIdsToPublish = idsToPublish;
+
+          futures.add(executor.submit(new Runnable() {
+              @Override
+              public void run() {
+                try {
+                  TableDataInsertAllResponse response = insert.execute();
+
+                  List<TableDataInsertAllResponse.InsertErrors> errors = response.getInsertErrors();
+                  if (errors != null) {
+                    synchronized (this) {
+                      allErrors.addAll(errors);
+                      for (TableDataInsertAllResponse.InsertErrors error : errors) {
+                        if (error.getIndex() == null) {
+                          throw new IOException("Insert failed: " + allErrors);
+                        }
+
+                        int errorIndex = error.getIndex().intValue() + finalStrideIndex;
+                        retryRows.add(finalRowsToPublish.get(errorIndex));
+                        if (retryIds != null) {
+                          retryIds.add(finalIdsToPublish.get(errorIndex));
+                        }
+                      }
+                    }
+                  }
+                } catch (IOException e) {
+                  throw new RuntimeException(e);
+                }
               }
-            }
-          }
+            }));
 
           dataSize = 0;
           strideIndex = i + 1;
-          rows.clear();
+          rows = new LinkedList<>();
+        }
+      }
+
+      try {
+        for (Future<?> future : futures) {
+          future.get();
         }
+      } catch (InterruptedException e) {
+      } catch (ExecutionException e) {
+        Throwables.propagate(e.getCause());
       }
 
       if (!allErrors.isEmpty() && !backoff.atMaxAttempts()) {

From 6cc9846cf4524d6409a57cea04cb784e7f5a35f4 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Sat, 18 Jul 2015 10:40:23 -0700
Subject: [PATCH 0769/1541] Roll back this change:

Improve performance of BigQueryIO.Write.

Previously, we had at most 50 keys/threads, each of which was limited to 64KB requests.  This resulted in a max throughput of ~10MB/s for a BigQueryIO.Write across an entire pipeline.
This CL lets each key use multiple threads in parallel to do inserts as well as having 50 keys per output table, which removes the performance restriction.  Now, Dataflow can handle up to the BigQuery limit of 100MB/s per table across any number of tables.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98565029
---
 .../cloud/dataflow/sdk/io/BigQueryIO.java     | 309 ++++++++----------
 .../sdk/util/BigQueryTableInserter.java       |  79 ++---
 2 files changed, 155 insertions(+), 233 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 1ddcd8c03d550..f794b09ad3cbd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -17,16 +17,14 @@
 package com.google.cloud.dataflow.sdk.io;
 
 import com.google.api.client.json.JsonFactory;
+import com.google.api.client.util.Preconditions;
 import com.google.api.services.bigquery.Bigquery;
 import com.google.api.services.bigquery.model.QueryRequest;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
-import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StandardCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
@@ -38,10 +36,12 @@
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.worker.BigQueryReader;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Flatten;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.Values;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterFirst;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterProcessingTime;
@@ -61,8 +61,8 @@
 import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
+
 import com.google.cloud.hadoop.util.ApiErrorExtractor;
-import com.google.common.base.Preconditions;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
@@ -84,6 +84,7 @@
 import java.util.UUID;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.atomic.AtomicLong;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -796,6 +797,7 @@ public Bound toTableReference(
             writeDisposition, validate);
       }
 
+
       /**
        * Specifies the table schema, used if the table is created.
        */
@@ -910,18 +912,25 @@ public boolean getValidate() {
   /**
    * Implementation of DoFn to perform streaming BigQuery write.
    */
-  private static class StreamingWriteFn
-      extends DoFn<KV<ShardedKey<String>, Iterable<TableRowInfo>>, Void> {
+  private static class StreamingWriteFn extends DoFn<TableRowInfo, Void> {
     private static final long serialVersionUID = 0;
 
+    /** TableReference in JSON.  Use String to make the class Serializable. */
+    private final String jsonTableReference;
+
     /** TableSchema in JSON.  Use String to make the class Serializable. */
     private final String jsonTableSchema;
 
-    /** JsonTableRows to accumulate BigQuery rows in order to batch writes. */
-    private transient Map<String, List<TableRow>> tableRows;
+    /** User function mapping windows to TableReference in JSON. */
+    private final SerializableFunction<BoundedWindow, TableReference> tableRefFunction;
+
+    private transient TableReference defaultTableReference;
+
+    /** JsonTableRows to accumulate BigQuery rows. */
+    private transient Map<BoundedWindow, List<TableRow>> tableRows;
 
     /** The list of unique ids for each BigQuery table row. */
-    private transient Map<String, List<String>> uniqueIdsForTableRows;
+    private transient Map<BoundedWindow, List<String>> uniqueIdsForTableRows;
 
     /** The list of tables created so far, so we don't try the creation
         each time. */
@@ -929,8 +938,16 @@ private static class StreamingWriteFn
         Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>());
 
     /** Constructor. */
-    StreamingWriteFn(TableSchema schema) {
+    StreamingWriteFn(TableReference table,
+        SerializableFunction<BoundedWindow, TableReference> tableRefFunction,
+        TableSchema schema) {
       try {
+        if (table != null) {
+          jsonTableReference = JSON_FACTORY.toString(table);
+        } else {
+          jsonTableReference = null;
+        }
+        this.tableRefFunction = tableRefFunction;
         jsonTableSchema = JSON_FACTORY.toString(schema);
       } catch (IOException e) {
         throw new RuntimeException("Cannot initialize BigQuery streaming writer.", e);
@@ -947,54 +964,75 @@ public void startBundle(Context context) {
     /** Accumulates the input into JsonTableRows and uniqueIdsForTableRows. */
     @Override
     public void processElement(ProcessContext context) {
-      String tableSpec = context.element().getKey().getKey();
-      List<TableRow> rows = getOrCreateMapListValue(tableRows, tableSpec);
-      List<String> uniqueIds = getOrCreateMapListValue(uniqueIdsForTableRows, tableSpec);
-      for (TableRowInfo rowInfo : context.element().getValue()) {
-        rows.add(rowInfo.tableRow);
-        uniqueIds.add(rowInfo.uniqueId);
-      }
+      TableRowInfo rowInfo = context.element();
+      List<TableRow> rows = getOrCreateMapListValue(tableRows, rowInfo.window);
+      rows.add(rowInfo.tableRow);
+      List<String> uniqueIds = getOrCreateMapListValue(uniqueIdsForTableRows, rowInfo.window);
+      uniqueIds.add(rowInfo.uniqueId);
     }
 
     /** Writes the accumulated rows into BigQuery with streaming API. */
     @Override
-    public void finishBundle(Context context) throws Exception {
+    public void finishBundle(Context context) {
       BigQueryOptions options = context.getPipelineOptions().as(BigQueryOptions.class);
-      Bigquery client = Transport.newBigQueryClient(options).build();
 
-      for (String tableSpec : tableRows.keySet()) {
-        TableReference tableReference = getOrCreateTable(options, tableSpec);
-        flushRows(client, tableReference, tableRows.get(tableSpec),
-            uniqueIdsForTableRows.get(tableSpec));
+      for (BoundedWindow window : tableRows.keySet()) {
+        TableReference tableReference = getOrCreateTableForWindow(options, window);
+        flushRows(options, tableReference, tableRows.get(window),
+            uniqueIdsForTableRows.get(window));
       }
       tableRows.clear();
       uniqueIdsForTableRows.clear();
     }
 
-    public TableReference getOrCreateTable(BigQueryOptions options, String tableSpec)
-        throws IOException {
-      TableReference tableReference = parseTableSpec(tableSpec);
-      if (!createdTables.contains(tableSpec)) {
-        synchronized (createdTables) {
-          // Another thread may have succeeded in creating the table in the meanwhile, so
-          // check again. This check isn't needed for correctness, but we add it to prevent
-          // every thread from attempting a create and overwhelming our BigQuery quota.
-          if (!createdTables.contains(tableSpec)) {
-            TableSchema tableSchema = JSON_FACTORY.fromString(jsonTableSchema, TableSchema.class);
-            Bigquery client = Transport.newBigQueryClient(options).build();
-            BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
-            inserter.tryCreateTable(tableSchema);
-            createdTables.add(tableSpec);
+    public TableReference getOrCreateTableForWindow(BigQueryOptions options, BoundedWindow window) {
+     try {
+       if (defaultTableReference != null) {
+         return defaultTableReference;
+       }
+
+       TableReference tableReference;
+       if (tableRefFunction != null) {
+         tableReference = tableRefFunction.apply(window);
+       } else {
+         tableReference = JSON_FACTORY.fromString(jsonTableReference, TableReference.class);
+       }
+       String tableSpec = toTableSpec(tableReference);
+
+       if (tableReference.getProjectId() == null) {
+         tableReference.setProjectId(options.getProject());
+       }
+
+       if (!createdTables.contains(tableSpec)) {
+          synchronized (createdTables) {
+            // Another thread may have succeeded in creating the table in the meanwhile, so
+            // check again. This check isn't needed for correctness, but we add it to prevent
+            // every thread from attempting a create and overwhelming our BigQuery quota.
+            if (!createdTables.contains(tableSpec)) {
+              TableSchema tableSchema = JSON_FACTORY.fromString(jsonTableSchema, TableSchema.class);
+              Bigquery client = Transport.newBigQueryClient(options).build();
+              BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
+              inserter.tryCreateTable(tableSchema);
+              createdTables.add(tableSpec);
+            }
           }
         }
-      }
-      return tableReference;
+       if (tableRefFunction == null) {
+         // A constant table spec is used, and we've already created it. Cache that value so that we
+         // can elide the parsing/lookup on future calls to getOrCreateTableForWindow.
+         defaultTableReference = tableReference;
+       }
+       return tableReference;
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+     }
     }
 
     /** Writes the accumulated rows into BigQuery with streaming API. */
-    private void flushRows(Bigquery client, TableReference tableReference,
+    private void flushRows(BigQueryOptions options, TableReference tableReference,
         List<TableRow> tableRows, List<String> uniqueIds) {
       if (!tableRows.isEmpty()) {
+        Bigquery client = Transport.newBigQueryClient(options).build();
         try {
           BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
           inserter.insertAll(tableRows, uniqueIds);
@@ -1005,91 +1043,36 @@ private void flushRows(Bigquery client, TableReference tableReference,
     }
   }
 
-  @DefaultCoder(AvroCoder.class)
-  private static class ShardedKey<K> {
-    private final K key;
-    private final int shardNumber;
 
-    public static <K> ShardedKey<K> of(K key, int shardNumber) {
-      return new ShardedKey<K>(key, shardNumber);
-    }
-
-    private ShardedKey(K key, int shardNumber) {
-      this.key = key;
-      this.shardNumber = shardNumber;
-    }
-
-    public K getKey() {
-      return key;
-    }
-
-    public int getShardNumber() {
-      return shardNumber;
-    }
-  }
-
-  /**
-   * A {@link Coder} for {@code ShardedKey}, using a wrapped key {@code Coder}.
-   */
-  public static class ShardedKeyCoder<KeyT>
-      extends StandardCoder<ShardedKey<KeyT>> {
+  private static class TableRowInfoCoder extends StandardCoder<TableRowInfo> {
     private static final long serialVersionUID = 0;
 
-    public static <KeyT> ShardedKeyCoder<KeyT> of(Coder<KeyT> keyCoder) {
-      return new ShardedKeyCoder<>(keyCoder);
+    public static TableRowInfoCoder of(Coder<? extends BoundedWindow> windowCoder) {
+      return new TableRowInfoCoder(windowCoder);
     }
 
     @JsonCreator
-    public static <KeyT> ShardedKeyCoder<KeyT> of(
+    public static TableRowInfoCoder of(
          @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-        List<Coder<KeyT>> components) {
+         List<Coder<? extends BoundedWindow>> components) {
       Preconditions.checkArgument(components.size() == 1,
           "Expecting 1 component, got " + components.size());
       return of(components.get(0));
     }
 
-    protected ShardedKeyCoder(Coder<KeyT> keyCoder) {
-      this.keyCoder = keyCoder;
-      this.shardNumberCoder = VarIntCoder.of();
+    protected TableRowInfoCoder(Coder<? extends BoundedWindow> windowCoder) {
+      this.tableRowCoder = TableRowJsonCoder.of();
+      this.idCoder = StringUtf8Coder.of();
+      @SuppressWarnings("unchecked")
+      Coder<BoundedWindow> boundedWindowCoder = (Coder<BoundedWindow>) windowCoder;
+      this.windowCoder = boundedWindowCoder;
     }
 
-    @Override
+     @Override
     public List<? extends Coder<?>> getCoderArguments() {
-      return Arrays.asList(keyCoder);
+      return Arrays.asList(windowCoder);
     }
 
-    @Override
-    public void encode(ShardedKey<KeyT> key, OutputStream outStream, Context context)
-        throws IOException {
-      keyCoder.encode(key.getKey(), outStream, context.nested());
-      shardNumberCoder.encode(key.getShardNumber(), outStream, context);
-    }
-
-    @Override
-    public ShardedKey<KeyT> decode(InputStream inStream, Context context)
-        throws IOException {
-      return new ShardedKey<KeyT>(
-          keyCoder.decode(inStream, context.nested()),
-          shardNumberCoder.decode(inStream, context));
-    }
-
-    @Override
-    public void verifyDeterministic() throws NonDeterministicException {
-      keyCoder.verifyDeterministic();
-    }
-
-    Coder<KeyT> keyCoder;
-    VarIntCoder shardNumberCoder;
-  }
-
-  private static class TableRowInfoCoder extends AtomicCoder<TableRowInfo> {
-    private static final long serialVersionUID = 0;
-    private static final TableRowInfoCoder INSTANCE = new TableRowInfoCoder();
-
-    @JsonCreator
-    public static TableRowInfoCoder of() {
-      return INSTANCE;
-    }
 
     @Override
     public void encode(TableRowInfo value, OutputStream outStream, Context context)
@@ -1099,6 +1082,7 @@ public void encode(TableRowInfo value, OutputStream outStream, Context context)
       }
       tableRowCoder.encode(value.tableRow, outStream, context.nested());
       idCoder.encode(value.uniqueId, outStream, context.nested());
+      windowCoder.encode(value.window, outStream, context.nested());
     }
 
     @Override
@@ -1106,7 +1090,8 @@ public TableRowInfo decode(InputStream inStream, Context context)
       throws IOException {
       return new TableRowInfo(
           tableRowCoder.decode(inStream, context.nested()),
-          idCoder.decode(inStream, context.nested()));
+          idCoder.decode(inStream, context.nested()),
+          windowCoder.decode(inStream, context.nested()));
     }
 
     @Override
@@ -1114,87 +1099,53 @@ public void verifyDeterministic() throws NonDeterministicException {
       throw new NonDeterministicException(this, "TableRows are not deterministic.");
     }
 
-    TableRowJsonCoder tableRowCoder = TableRowJsonCoder.of();
-    StringUtf8Coder idCoder = StringUtf8Coder.of();
+    TableRowJsonCoder tableRowCoder;
+    StringUtf8Coder idCoder;
+    Coder<BoundedWindow> windowCoder;
   }
 
-  private static class TableRowInfo {
-    TableRowInfo(TableRow tableRow, String uniqueId) {
-      this.tableRow = tableRow;
-      this.uniqueId = uniqueId;
-    }
+   private static class TableRowInfo {
+     TableRowInfo(TableRow tableRow, String uniqueId, BoundedWindow window) {
+       this.tableRow = tableRow;
+       this.uniqueId = uniqueId;
+       this.window = window;
+     }
 
-    final TableRow tableRow;
-    final String uniqueId;
-  };
+     final TableRow tableRow;
+     final String uniqueId;
+     final BoundedWindow window;
+   };
 
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * Fn that tags each table row with a unique id and destination table.
+   * Fn that tags each table row with a unique id.
    * To avoid calling UUID.randomUUID() for each element, which can be costly,
    * a randomUUID is generated only once per bucket of data. The actual unique
    * id is created by concatenating this randomUUID with a sequential number.
    */
-  private static class TagWithUniqueIdsAndTable
-      extends DoFn<TableRow, KV<ShardedKey<String>, TableRowInfo>>
-      implements DoFn.RequiresWindowAccess {
+  private static class TagWithUniqueIdsAndWindow extends DoFn<TableRow, KV<Integer, TableRowInfo>>
+        implements DoFn.RequiresWindowAccess {
     private static final long serialVersionUID = 0;
 
-    /** TableSpec to write to. */
-    private final String tableSpec;
-
-    /** User function mapping windows to TableReference in JSON. */
-    private final SerializableFunction<BoundedWindow, TableReference> tableRefFunction;
-
     private transient String randomUUID;
-    private transient long sequenceNo = 0L;
-
-    TagWithUniqueIdsAndTable(BigQueryOptions options, TableReference table,
-        SerializableFunction<BoundedWindow, TableReference> tableRefFunction) {
-      Preconditions.checkArgument(table == null ^ tableRefFunction == null,
-          "Exactly one of table or tableRefFunction should be set");
-      if (table != null) {
-        if (table.getProjectId() == null) {
-          table.setProjectId(options.as(BigQueryOptions.class).getProject());
-        }
-        this.tableSpec = toTableSpec(table);
-      } else {
-        tableSpec = null;
-      }
-      this.tableRefFunction = tableRefFunction;
-    }
-
+    private transient AtomicLong sequenceNo;
 
     @Override
     public void startBundle(Context context) {
       randomUUID = UUID.randomUUID().toString();
+      sequenceNo = new AtomicLong();
     }
 
     /** Tag the input with a unique id. */
     @Override
-    public void processElement(ProcessContext context) throws IOException {
-      String uniqueId = randomUUID + sequenceNo++;
+    public void processElement(ProcessContext context) {
+      String uniqueId = randomUUID + Long.toString(sequenceNo.getAndIncrement());
       ThreadLocalRandom randomGenerator = ThreadLocalRandom.current();
-      String tableSpec = tableSpecFromWindow(
-          context.getPipelineOptions().as(BigQueryOptions.class), context.window());
       // We output on keys 0-50 to ensure that there's enough batching for
       // BigQuery.
-      context.output(KV.of(ShardedKey.of(tableSpec, randomGenerator.nextInt(0, 50)),
-          new TableRowInfo(context.element(), uniqueId)));
-    }
-
-    private String tableSpecFromWindow(BigQueryOptions options, BoundedWindow window)
-        throws IOException {
-      if (tableSpec != null) {
-        return tableSpec;
-      } else {
-        TableReference table = tableRefFunction.apply(window);
-        if (table.getProjectId() == null) {
-          table.setProjectId(options.getProject());
-        }
-        return toTableSpec(table);
-      }
+      context.output(KV.of(randomGenerator.nextInt(0, 50),
+          new TableRowInfo(context.element(), uniqueId, context.window())));
     }
   }
 
@@ -1241,23 +1192,27 @@ public PDone apply(PCollection<TableRow> input) {
       // To use this mechanism, each input TableRow is tagged with a generated
       // unique id, which is then passed to BigQuery and used to ignore duplicates.
 
-      PCollection<KV<ShardedKey<String>, TableRowInfo>> tagged = input.apply(ParDo.of(
-          new TagWithUniqueIdsAndTable(input.getPipeline().getOptions().as(BigQueryOptions.class),
-              tableReference, tableRefFunction)));
+      PCollection<KV<Integer, TableRowInfo>> tagged =
+          input.apply(ParDo.of(new TagWithUniqueIdsAndWindow()));
 
       // To prevent having the same TableRow processed more than once with regenerated
       // different unique ids, this implementation relies on "checkpointing", which is
       // achieved as a side effect of having StreamingWriteFn immediately follow a GBK.
       tagged
-          .setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowInfoCoder.of()))
-          .apply(
-              Window.<KV<ShardedKey<String>, TableRowInfo>>into(new GlobalWindows())
-                  .triggering(Repeatedly.forever(AfterFirst.of(
-                      AfterProcessingTime.pastFirstElementInPane().plusDelayOf(WRITE_BUFFER_WAIT),
-                      AfterPane.elementCountAtLeast(WRITE_BUFFER_COUNT))))
-                  .discardingFiredPanes())
-          .apply(GroupByKey.<ShardedKey<String>, TableRowInfo>create())
-          .apply(ParDo.of(new StreamingWriteFn(tableSchema)));
+          .setCoder(KvCoder.of(VarIntCoder.of(),
+              TableRowInfoCoder.of(
+              tagged.getWindowingStrategy().getWindowFn().windowCoder())))
+          .apply(Window.<KV<Integer, TableRowInfo>>into(new GlobalWindows())
+                       .triggering(Repeatedly.forever(
+                           AfterFirst.of(
+                               AfterProcessingTime.pastFirstElementInPane()
+                                                  .plusDelayOf(WRITE_BUFFER_WAIT),
+                               AfterPane.elementCountAtLeast(WRITE_BUFFER_COUNT))))
+                       .discardingFiredPanes())
+          .apply(GroupByKey.<Integer, TableRowInfo>create())
+          .apply(Values.<Iterable<TableRowInfo>>create())
+          .apply(Flatten.<TableRowInfo>iterables())
+          .apply(ParDo.of(new StreamingWriteFn(tableReference, tableRefFunction, tableSchema)));
 
       // Note that the implementation to return PDone here breaks the
       // implicit assumption about the job execution order.  If a user
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
index cae0072657c8b..1efc1d2772062 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
@@ -28,7 +28,6 @@
 import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.WriteDisposition;
 import com.google.cloud.hadoop.util.ApiErrorExtractor;
-import com.google.common.base.Throwables;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -37,10 +36,6 @@
 import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
 
 import javax.annotation.Nullable;
 
@@ -66,8 +61,6 @@ public class BigQueryTableInserter {
   private final TableReference ref;
   private final long maxRowsPerBatch;
 
-  private static final ExecutorService executor = Executors.newFixedThreadPool(100);
-
   /**
    * Constructs a new row inserter.
    *
@@ -109,26 +102,26 @@ public void insertAll(List<TableRow> rowList,
           + "as many elements as rowList");
     }
 
+
     AttemptBoundedExponentialBackOff backoff = new AttemptBoundedExponentialBackOff(
         MAX_INSERT_ATTEMPTS,
         INITIAL_INSERT_BACKOFF_INTERVAL_MS);
 
-    final List<TableDataInsertAllResponse.InsertErrors> allErrors = new ArrayList<>();
+    List<TableDataInsertAllResponse.InsertErrors> allErrors = new ArrayList<>();
     // These lists contain the rows to publish. Initially the contain the entire list. If there are
     // failures, they will contain only the failed rows to be retried.
     List<TableRow> rowsToPublish = rowList;
     List<String> idsToPublish = insertIdList;
     while (true) {
-      final List<TableRow> retryRows = new ArrayList<>();
-      final List<String> retryIds = (idsToPublish != null) ? new ArrayList<String>() : null;
-
+      List<TableRow> retryRows = new ArrayList<>();
+      List<String> retryIds = null;
+      if (idsToPublish != null) {
+        retryIds = new ArrayList<>();
+      }
       int strideIndex = 0;
       // Upload in batches.
       List<TableDataInsertAllRequest.Rows> rows = new LinkedList<>();
       int dataSize = 0;
-
-      List<Future<?>> futures = new ArrayList<>();
-
       for (int i = 0; i < rowsToPublish.size(); ++i) {
         TableRow row = rowsToPublish.get(i);
         TableDataInsertAllRequest.Rows out = new TableDataInsertAllRequest.Rows();
@@ -144,56 +137,30 @@ public void insertAll(List<TableRow> rowList,
           TableDataInsertAllRequest content = new TableDataInsertAllRequest();
           content.setRows(rows);
 
-          final Bigquery.Tabledata.InsertAll insert = client.tabledata()
+          Bigquery.Tabledata.InsertAll insert = client.tabledata()
               .insertAll(ref.getProjectId(), ref.getDatasetId(), ref.getTableId(),
                   content);
+          TableDataInsertAllResponse response = insert.execute();
+          List<TableDataInsertAllResponse.InsertErrors> errors = response.getInsertErrors();
+          if (errors != null) {
+            allErrors.addAll(errors);
+            for (TableDataInsertAllResponse.InsertErrors error : errors) {
+              if (error.getIndex() == null) {
+                throw new IOException("Insert failed: " + allErrors);
+              }
 
-          final int finalStrideIndex = strideIndex;
-          final List<TableRow> finalRowsToPublish = rowsToPublish;
-          final List<String> finalIdsToPublish = idsToPublish;
-
-          futures.add(executor.submit(new Runnable() {
-              @Override
-              public void run() {
-                try {
-                  TableDataInsertAllResponse response = insert.execute();
-
-                  List<TableDataInsertAllResponse.InsertErrors> errors = response.getInsertErrors();
-                  if (errors != null) {
-                    synchronized (this) {
-                      allErrors.addAll(errors);
-                      for (TableDataInsertAllResponse.InsertErrors error : errors) {
-                        if (error.getIndex() == null) {
-                          throw new IOException("Insert failed: " + allErrors);
-                        }
-
-                        int errorIndex = error.getIndex().intValue() + finalStrideIndex;
-                        retryRows.add(finalRowsToPublish.get(errorIndex));
-                        if (retryIds != null) {
-                          retryIds.add(finalIdsToPublish.get(errorIndex));
-                        }
-                      }
-                    }
-                  }
-                } catch (IOException e) {
-                  throw new RuntimeException(e);
-                }
+              int errorIndex = error.getIndex().intValue() + strideIndex;
+              retryRows.add(rowsToPublish.get(errorIndex));
+              if (retryIds != null) {
+                retryIds.add(idsToPublish.get(errorIndex));
               }
-            }));
+            }
+          }
 
           dataSize = 0;
           strideIndex = i + 1;
-          rows = new LinkedList<>();
-        }
-      }
-
-      try {
-        for (Future<?> future : futures) {
-          future.get();
+          rows.clear();
         }
-      } catch (InterruptedException e) {
-      } catch (ExecutionException e) {
-        Throwables.propagate(e.getCause());
       }
 
       if (!allErrors.isEmpty() && !backoff.atMaxAttempts()) {

From d6458ea4c24064ee9a023741bde8ce5690270ecb Mon Sep 17 00:00:00 2001
From: chernyak <chernyak@google.com>
Date: Sun, 19 Jul 2015 18:26:06 -0700
Subject: [PATCH 0770/1541] Updates to the Windmill API

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98606809
---
 sdk/src/main/proto/windmill.proto | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index e311155b17e9c..ca97429ba75a3 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -113,6 +113,7 @@ message WatermarkHold {
   required bytes tag = 1;
   repeated int64 timestamps = 2 [packed=true];
   optional bool reset = 3;
+  optional string state_family = 4;
 }
 
 message WorkItem {

From b8a077e8e96f2c654f4f5249a511af33e3483b80 Mon Sep 17 00:00:00 2001
From: chernyak <chernyak@google.com>
Date: Sun, 19 Jul 2015 20:52:32 -0700
Subject: [PATCH 0771/1541] Updates to the Windmill API

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98610704
---
 sdk/src/main/proto/windmill.proto | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index ca97429ba75a3..cb3a86ec15fb4 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -102,6 +102,7 @@ message GlobalData {
   required GlobalDataId data_id = 1;
   optional bool is_ready = 2;
   optional bytes data = 3;
+  optional string state_family = 4;
 }
 
 message SourceState {
@@ -220,6 +221,7 @@ message Counter {
 message GlobalDataRequest {
   required GlobalDataId data_id = 1;
   optional int64 existence_watermark_deadline = 2 [default=0x7FFFFFFFFFFFFFFF];
+  optional string state_family = 3;
 }
 
 // next id: 15

From ccf38213257bf511d8f3ce872e41d0d1cd0feec2 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 20 Jul 2015 11:00:14 -0700
Subject: [PATCH 0772/1541] Examples reorganization

----Release Notes----
Reorganized examples to either be complete or cookbook.
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98654493
---
 .../google/cloud/dataflow/examples/WindowedWordCount.java   | 2 +-
 .../dataflow/examples/{ => complete}/AutoComplete.java      | 2 +-
 .../examples/{ => complete}/StreamingWordExtract.java       | 2 +-
 .../cloud/dataflow/examples/{ => complete}/TfIdf.java       | 2 +-
 .../examples/{ => complete}/TopWikipediaSessions.java       | 2 +-
 .../examples/{ => complete}/TrafficMaxLaneFlow.java         | 2 +-
 .../dataflow/examples/{ => complete}/TrafficRoutes.java     | 2 +-
 .../dataflow/examples/{ => cookbook}/BigQueryTornadoes.java | 2 +-
 .../examples/{ => cookbook}/DatastoreWordCount.java         | 3 ++-
 .../dataflow/examples/{ => complete}/AutoCompleteTest.java  | 6 +++---
 .../cloud/dataflow/examples/{ => complete}/TfIdfTest.java   | 2 +-
 .../examples/{ => complete}/TopWikipediaSessionsTest.java   | 2 +-
 .../examples/{ => cookbook}/BigQueryTornadoesTest.java      | 6 +++---
 13 files changed, 18 insertions(+), 17 deletions(-)
 rename examples/src/main/java/com/google/cloud/dataflow/examples/{ => complete}/AutoComplete.java (99%)
 rename examples/src/main/java/com/google/cloud/dataflow/examples/{ => complete}/StreamingWordExtract.java (99%)
 rename examples/src/main/java/com/google/cloud/dataflow/examples/{ => complete}/TfIdf.java (99%)
 rename examples/src/main/java/com/google/cloud/dataflow/examples/{ => complete}/TopWikipediaSessions.java (99%)
 rename examples/src/main/java/com/google/cloud/dataflow/examples/{ => complete}/TrafficMaxLaneFlow.java (99%)
 rename examples/src/main/java/com/google/cloud/dataflow/examples/{ => complete}/TrafficRoutes.java (99%)
 rename examples/src/main/java/com/google/cloud/dataflow/examples/{ => cookbook}/BigQueryTornadoes.java (99%)
 rename examples/src/main/java/com/google/cloud/dataflow/examples/{ => cookbook}/DatastoreWordCount.java (98%)
 rename examples/src/test/java/com/google/cloud/dataflow/examples/{ => complete}/AutoCompleteTest.java (96%)
 rename examples/src/test/java/com/google/cloud/dataflow/examples/{ => complete}/TfIdfTest.java (97%)
 rename examples/src/test/java/com/google/cloud/dataflow/examples/{ => complete}/TopWikipediaSessionsTest.java (97%)
 rename examples/src/test/java/com/google/cloud/dataflow/examples/{ => cookbook}/BigQueryTornadoesTest.java (92%)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
index 1591f5a826883..1130151eeb999 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
@@ -96,7 +96,7 @@
  * To run with unbounded input, set:
  * {@code --unbounded=true}.
  * Then, optionally specify the Google Cloud PubSub topic to read from via
- * {@code --pubsubTopic=/topics/PROJECT ID/YOUR-TOPIC-NAME}.
+ * {@code --pubsubTopic=projects/PROJECT ID/topics/YOUR_TOPIC_NAME}
  * If the topic does not exist, the pipeline will create one for you.  It will delete this topic
  * when it terminates.
  * The pipeline will automatically launch an auxiliary batch pipeline to populate the given
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
similarity index 99%
rename from examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
rename to examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
index 695be1f8c79ec..c3b12a5a7e5ca 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.complete;
 
 import com.google.api.services.bigquery.model.TableFieldSchema;
 import com.google.api.services.bigquery.model.TableReference;
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java
similarity index 99%
rename from examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java
rename to examples/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java
index 07ea40201768b..21b011aea1c56 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.complete;
 
 import com.google.api.services.bigquery.model.TableFieldSchema;
 import com.google.api.services.bigquery.model.TableRow;
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java
similarity index 99%
rename from examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
rename to examples/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java
index c51a9d8a2af27..ca96b6aa17f51 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.complete;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java
similarity index 99%
rename from examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
rename to examples/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java
index 7e5ddbf018f48..eef9bf1cab5be 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.complete;
 
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.cloud.dataflow.sdk.Pipeline;
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
similarity index 99%
rename from examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java
rename to examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
index 62274e49154b2..3f18aeb23631b 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.complete;
 
 import com.google.api.services.bigquery.model.TableFieldSchema;
 import com.google.api.services.bigquery.model.TableReference;
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
similarity index 99%
rename from examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java
rename to examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
index 2f4fb8c487925..0ee9c7487afe4 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.complete;
 
 import com.google.api.services.bigquery.model.TableFieldSchema;
 import com.google.api.services.bigquery.model.TableReference;
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java
similarity index 99%
rename from examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
rename to examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java
index 58331e47ce60c..095ad130730b8 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.cookbook;
 
 import com.google.api.services.bigquery.model.TableFieldSchema;
 import com.google.api.services.bigquery.model.TableRow;
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
similarity index 98%
rename from examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
rename to examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
index ec71434ea3ec8..b9474fd84dd5e 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.cookbook;
 
 import com.google.api.services.datastore.DatastoreV1;
 import com.google.api.services.datastore.DatastoreV1.Entity;
@@ -23,6 +23,7 @@
 import com.google.api.services.datastore.DatastoreV1.Query;
 import com.google.api.services.datastore.DatastoreV1.Value;
 import com.google.api.services.datastore.client.DatastoreHelper;
+import com.google.cloud.dataflow.examples.WordCount;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.io.DatastoreIO;
 import com.google.cloud.dataflow.sdk.io.TextIO;
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/AutoCompleteTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
similarity index 96%
rename from examples/src/test/java/com/google/cloud/dataflow/examples/AutoCompleteTest.java
rename to examples/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
index 4a57abb2d01e0..ca996e9a2d13c 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/AutoCompleteTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
@@ -14,10 +14,10 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.complete;
 
-import com.google.cloud.dataflow.examples.AutoComplete.CompletionCandidate;
-import com.google.cloud.dataflow.examples.AutoComplete.ComputeTopCompletions;
+import com.google.cloud.dataflow.examples.complete.AutoComplete.CompletionCandidate;
+import com.google.cloud.dataflow.examples.complete.AutoComplete.ComputeTopCompletions;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/complete/TfIdfTest.java
similarity index 97%
rename from examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java
rename to examples/src/test/java/com/google/cloud/dataflow/examples/complete/TfIdfTest.java
index 3acdc17f92e55..5ee136cee2a41 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/TfIdfTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/complete/TfIdfTest.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.complete;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.StringDelegateCoder;
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/TopWikipediaSessionsTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessionsTest.java
similarity index 97%
rename from examples/src/test/java/com/google/cloud/dataflow/examples/TopWikipediaSessionsTest.java
rename to examples/src/test/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessionsTest.java
index 5362c7f45b5c4..ce9de5140996a 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/TopWikipediaSessionsTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessionsTest.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.complete;
 
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.cloud.dataflow.sdk.Pipeline;
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/BigQueryTornadoesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoesTest.java
similarity index 92%
rename from examples/src/test/java/com/google/cloud/dataflow/examples/BigQueryTornadoesTest.java
rename to examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoesTest.java
index acdc88cf84a51..6dce4eddfa0ce 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/BigQueryTornadoesTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoesTest.java
@@ -14,11 +14,11 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.cookbook;
 
 import com.google.api.services.bigquery.model.TableRow;
-import com.google.cloud.dataflow.examples.BigQueryTornadoes.ExtractTornadoesFn;
-import com.google.cloud.dataflow.examples.BigQueryTornadoes.FormatCountsFn;
+import com.google.cloud.dataflow.examples.cookbook.BigQueryTornadoes.ExtractTornadoesFn;
+import com.google.cloud.dataflow.examples.cookbook.BigQueryTornadoes.FormatCountsFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
 import com.google.cloud.dataflow.sdk.values.KV;
 

From 93ba8184b74cd0a7ebf86eb0d071fbb1ba50332a Mon Sep 17 00:00:00 2001
From: relax <relax@google.com>
Date: Mon, 20 Jul 2015 14:57:06 -0700
Subject: [PATCH 0773/1541] Change state_family proto field from string to
 bytes

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98677744
---
 sdk/src/main/proto/windmill.proto | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index cb3a86ec15fb4..adc7c827a3adb 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -44,7 +44,7 @@ message Timer {
     DEPENDENT_REALTIME = 2;
   }
   optional Type type = 3 [default = WATERMARK];
-  optional string state_family = 4;
+  optional bytes state_family = 4;
 }
 
 message InputMessageBundle {
@@ -83,14 +83,14 @@ message Value {
 message TagValue {
   required bytes tag = 1;
   optional Value value = 2;
-  optional string state_family = 3;
+  optional bytes state_family = 3;
 }
 
 message TagList {
   required bytes tag = 1;
   optional int64 end_timestamp = 2 [default=-0x8000000000000000];
   repeated Value values = 3;
-  optional string state_family = 4;
+  optional bytes state_family = 4;
 }
 
 message GlobalDataId {

From d3a8a878361d2e02d8b1d8e948b69c9b5739d511 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 20 Jul 2015 16:04:23 -0700
Subject: [PATCH 0774/1541] Support record ids for UnboundedSources

When present, the record ID will be used to deduplicate records read from the Unbounded Source.  If not present, records that are read from the source multiple times will be not be deduplicated, although the system does guarantee that no further duplicates will be created downstream.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98684291
---
 .../cloud/dataflow/sdk/io/AvroSource.java     |   2 +-
 .../dataflow/sdk/io/CompressedSource.java     |   2 +-
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |   4 +-
 .../google/cloud/dataflow/sdk/io/Read.java    | 144 +++++++++++------
 .../dataflow/sdk/io/UnboundedSource.java      |  16 +-
 .../sdk/runners/DataflowPipelineRunner.java   | 149 ++++++++++++++++++
 .../runners/DataflowPipelineTranslator.java   |   2 +-
 .../BasicSerializableSourceFormat.java        |  34 ++--
 .../sdk/runners/dataflow/ReadTranslator.java  |  12 +-
 .../sdk/runners/worker/WindmillSink.java      |  19 ++-
 .../util/StreamingModeExecutionContext.java   |   3 +-
 .../dataflow/sdk/util/ValueWithRecordId.java  | 112 +++++++++++++
 .../BasicSerializableSourceFormatTest.java    |  16 +-
 .../sdk/runners/dataflow/CountingSource.java  |  34 +++-
 .../worker/StreamingDataflowWorkerTest.java   |  56 ++++---
 15 files changed, 496 insertions(+), 109 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ValueWithRecordId.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
index 770627fa5480c..bda90f6c30a1a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
@@ -138,7 +138,7 @@ public class AvroSource<T> extends BlockBasedSource<T> {
    * Creates a {@code Read} transform that will read from an {@code AvroSource} that is configured
    * to read records of the given type from a file pattern.
    */
-  public static <T> Read.Bound<T> readFromFileWithClass(String filePattern, Class<T> clazz) {
+  public static <T> Read.Bounded<T> readFromFileWithClass(String filePattern, Class<T> clazz) {
     return Read.from(new AvroSource<T>(filePattern, DEFAULT_MIN_BUNDLE_SIZE,
         ReflectData.get().getSchema(clazz).toString(), clazz, null, null));
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
index d7c2ffb4b48f2..3b0cab94d95ac 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
@@ -104,7 +104,7 @@ public abstract ReadableByteChannel createDecompressingChannel(ReadableByteChann
    * underlying {@link FileBasedSource} after decompressing it with a {@link
    * DecompressingChannelFactory}.
    */
-  public static <T> Read.Bound<T> readFromSource(
+  public static <T> Read.Bounded<T> readFromSource(
       FileBasedSource<T> sourceDelegate, DecompressingChannelFactory channelFactory) {
     return Read.from(new CompressedSource<>(sourceDelegate, channelFactory));
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index df743be868180..0d8984645624f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -169,7 +169,7 @@ public static Source read() {
    * Returns a {@code PTransform} that reads Datastore entities from the query
    * against the given dataset.
    */
-  public static Read.Bound<Entity> readFrom(String datasetId, Query query) {
+  public static Read.Bounded<Entity> readFrom(String datasetId, Query query) {
     return Read.from(new Source(DEFAULT_HOST, datasetId, query));
   }
 
@@ -177,7 +177,7 @@ public static Read.Bound<Entity> readFrom(String datasetId, Query query) {
    * Returns a {@code PTransform} that reads Datastore entities from the query
    * against the given dataset and host.
    */
-  public static Read.Bound<Entity> readFrom(String host, String datasetId, Query query) {
+  public static Read.Bounded<Entity> readFrom(String host, String datasetId, Query query) {
     return Read.from(new Source(host, datasetId, query));
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
index 7776167f23dcc..875464e57b6c4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
@@ -26,7 +26,6 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.common.base.Preconditions;
 
 import javax.annotation.Nullable;
 
@@ -42,88 +41,104 @@
  */
 public class Read {
   /**
-   * Returns a new {@code Read.Bound} {@code PTransform} with the given name.
+   * Returns a new {@code Read} {@code PTransform} builder with the given name.
    */
-  @SuppressWarnings({"rawtypes", "unchecked"})
-  public static Bound<?> named(String name) {
-    return new Bound(name, null);
+  public static Builder named(String name) {
+    return new Builder(name);
   }
 
   /**
-   * Returns a new {@code Read.Bound} {@code PTransform} reading from the given
+   * Returns a new {@code Read.Bounded} {@code PTransform} reading from the given
    * {@code BoundedSource}.
    */
-  public static <T> Bound<T> from(BoundedSource<T> source) {
-    return new Bound<>(null, source);
+  public static <T> Bounded<T> from(BoundedSource<T> source) {
+    return new Bounded<>(null, source);
   }
 
   /**
-   * Returns a new {@code Read.Bound} {@code PTransform} reading from the given
+   * Returns a new {@code Read.Unbounded} {@code PTransform} reading from the given
    * {@code UnboundedSource}.
    */
-  public static <T> Bound<T> from(UnboundedSource<T, ?> source) {
-    return new Bound<>(null, source);
+  public static <T> Unbounded<T> from(UnboundedSource<T, ?> source) {
+    return new Unbounded<>(null, source);
   }
 
   /**
-   * Implementation of the {@code Read} {@link PTransform} builder.
+   * Helper class for building {@code Read} transforms.
    */
-  public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
-    private static final long serialVersionUID = 0;
+  public static class Builder {
+    private final String name;
 
-    @Nullable
-    private final Source<T> source;
+    private Builder(String name) {
+      this.name = name;
+    }
 
-    private Bound(@Nullable String name, @Nullable Source<T> source) {
-      super(name);
-      this.source = source;
+    /**
+     * Returns a new {@code Read.Bounded} {@code PTransform} reading from the given
+     * {@code BoundedSource}.
+     */
+    public <T> Bounded<T> from(BoundedSource<T> source) {
+      return new Bounded<>(name, source);
     }
 
     /**
-     * Returns a new {@code Read} {@code PTransform} that's like this one but
-     * reads from the given {@code Source}.
-     *
-     * <p> Does not modify this object.
+     * Returns a new {@code Read.Unbounded} {@code PTransform} reading from the given
+     * {@code UnboundedSource}.
      */
-    public <T> Bound<T> from(Source<T> source) {
-      return new Bound<T>(name, source);
+    public <T> Unbounded<T> from(UnboundedSource<T, ?> source) {
+      return new Unbounded<>(name, source);
+    }
+  }
+
+  /**
+   * {@link PTransform} that reads from a {@link BoundedSource}.
+   */
+  public static class Bounded<T> extends PTransform<PInput, PCollection<T>> {
+    private static final long serialVersionUID = 0;
+
+    private final BoundedSource<T> source;
+
+    private Bounded(@Nullable String name, BoundedSource<T> source) {
+      super(name);
+      this.source = source;
     }
 
     /**
-     * Returns a new {@code Read} {@code PTransform} that's like this one but
+     * Returns a new {@code Bounded} {@code PTransform} that's like this one but
      * has the given name.
      *
      * <p> Does not modify this object.
      */
-    public Bound<T> named(String name) {
-      return new Bound<T>(name, source);
+    public Bounded<T> named(String name) {
+      return new Bounded<T>(name, source);
     }
 
     @Override
     protected Coder<T> getDefaultOutputCoder() {
-      Preconditions.checkNotNull(source, "source must be set");
       return source.getDefaultOutputCoder();
     }
 
     @Override
     public final PCollection<T> apply(PInput input) {
-      Preconditions.checkNotNull(source, "source must be set");
       source.validate();
-      return PCollection.<T>createPrimitiveOutputInternal(
-              input.getPipeline(),
-              WindowingStrategy.globalDefault(),
-              (source instanceof BoundedSource) ? IsBounded.BOUNDED : IsBounded.UNBOUNDED)
+
+      return PCollection.<T>createPrimitiveOutputInternal(input.getPipeline(),
+          WindowingStrategy.globalDefault(), IsBounded.BOUNDED)
           .setCoder(getDefaultOutputCoder());
     }
 
     /**
-     * Returns the {@code Source} used to create this {@code Read} {@code PTransform}.
+     * Returns the {@code BoundedSource} used to create this {@code Read} {@code PTransform}.
      */
-    @Nullable
-    public Source<T> getSource() {
+    public BoundedSource<T> getSource() {
       return source;
     }
 
+    @Override
+    public String getKindString() {
+      return "Read(" + approximateSimpleName(source.getClass()) + ")";
+    }
+
     static {
       registerDefaultTransformEvaluator();
     }
@@ -131,24 +146,63 @@ public Source<T> getSource() {
     @SuppressWarnings({"rawtypes", "unchecked"})
     private static void registerDefaultTransformEvaluator() {
       DirectPipelineRunner.registerDefaultTransformEvaluator(
-          Bound.class,
-          new DirectPipelineRunner.TransformEvaluator<Bound>() {
+          Bounded.class,
+          new DirectPipelineRunner.TransformEvaluator<Bounded>() {
             @Override
             public void evaluate(
-                Bound transform, DirectPipelineRunner.EvaluationContext context) {
-              if (transform.getSource() instanceof UnboundedSource) {
-                throw new IllegalArgumentException(
-                    "UnboundedSources are not supported in the DirectPipelineRunner");
-              }
+                Bounded transform, DirectPipelineRunner.EvaluationContext context) {
               BasicSerializableSourceFormat.evaluateReadHelper(transform, context);
             }
           });
     }
+  }
+
+  /**
+   * {@link PTransform} that reads from a {@link UnboundedSource}.
+   */
+  public static class Unbounded<T> extends PTransform<PInput, PCollection<T>> {
+    private static final long serialVersionUID = 0;
+
+    private final UnboundedSource<T, ?> source;
+
+    private Unbounded(@Nullable String name, UnboundedSource<T, ?> source) {
+      super(name);
+      this.source = source;
+    }
+
+    /**
+     * Returns a new {@code Unbounded} {@code PTransform} that's like this one but
+     * has the given name.
+     *
+     * <p> Does not modify this object.
+     */
+    public Unbounded<T> named(String name) {
+      return new Unbounded<T>(name, source);
+    }
+
+    @Override
+    protected Coder<T> getDefaultOutputCoder() {
+      return source.getDefaultOutputCoder();
+    }
+
+    @Override
+    public final PCollection<T> apply(PInput input) {
+      source.validate();
+
+      return PCollection.<T>createPrimitiveOutputInternal(
+          input.getPipeline(), WindowingStrategy.globalDefault(), IsBounded.UNBOUNDED);
+    }
+
+    /**
+     * Returns the {@code UnboundedSource} used to create this {@code Read} {@code PTransform}.
+     */
+    public UnboundedSource<T, ?> getSource() {
+      return source;
+    }
 
     @Override
     public String getKindString() {
       return "Read(" + approximateSimpleName(source.getClass()) + ")";
     }
   }
-
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
index 97876cae1f98e..f99ace1a5ca56 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
@@ -83,7 +83,19 @@ public abstract UnboundedReader<OutputT> createReader(
    * Returns a {@link Coder} for encoding and decoding the checkpoints for this source, or
    * null if the checkpoints do not need to be durably committed.
    */
-  @Nullable public abstract Coder<CheckpointMarkT> getCheckpointMarkCoder();
+  @Nullable
+  public abstract Coder<CheckpointMarkT> getCheckpointMarkCoder();
+
+  /**
+   * Returns whether this source requires explicit deduping.
+   *
+   * <p> This is needed if the underlying data source can return the same record multiple times,
+   * such a queuing system with a pull-ack model.  Sources where the records read are uniquely
+   * identified by the persisted state in the CheckpointMark do not need this.
+   */
+  public boolean requiresDeduping() {
+    return false;
+  }
 
   /**
    * A marker representing the progress and state of an {@link UnboundedReader}.
@@ -146,8 +158,6 @@ public interface UnboundedReader<OutputT> extends Source.Reader<OutputT> {
      * <p> This method has the same restrictions on when it can be called as {@link #getCurrent} and
      * {@link #getCurrentTimestamp}.
      *
-     * <p> Note: this is not yet supported by the DataflowPipelineRunner, and it will be ignored.
-     *
      * @throws NoSuchElementException if the reader is at the beginning of the input and
      *         {@link #start} or {@link #advance} wasn't called, or if the last {@link #start} or
      *         {@link #advance} returned {@code false}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index fa6eeb1407048..9b4cf38917531 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.runners;
 
+import static com.google.cloud.dataflow.sdk.util.StringUtils.approximateSimpleName;
+
 import com.google.api.client.googleapis.json.GoogleJsonResponseException;
 import com.google.api.client.util.Joiner;
 import com.google.api.services.dataflow.Dataflow;
@@ -29,10 +31,13 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.io.Read;
+import com.google.cloud.dataflow.sdk.io.UnboundedSource;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.JobSpecification;
+import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
 import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowAggregatorTransforms;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
@@ -41,10 +46,13 @@
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.transforms.WithKeys;
 import com.google.cloud.dataflow.sdk.transforms.Write;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.DataflowReleaseInfo;
@@ -56,8 +64,11 @@
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.StreamingPCollectionViewWriterFn;
 import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
@@ -189,6 +200,7 @@ private DataflowPipelineRunner(DataflowPipelineOptions options) {
         .put(View.AsIterable.class, StreamingViewAsIterable.class)
         .put(Write.Bound.class, StreamingWrite.class)
         .put(PubsubIO.Write.Bound.class, StreamingPubsubIOWrite.class)
+        .put(Read.Unbounded.class, StreamingUnboundedRead.class)
         .build();
   }
 
@@ -447,6 +459,143 @@ protected String getKindString() {
     }
   }
 
+  /**
+   * Specialized implementation for {@link Read.Unbounded} for the Dataflow runner in streaming
+   * mode.
+   *
+   * <p> In particular, if an UnboundedSource requires deduplication, then features of WindmillSink
+   * are leveraged to do the deduplication.
+   */
+  private static class StreamingUnboundedRead<T> extends PTransform<PInput, PCollection<T>> {
+    private static final long serialVersionUID = 0L;
+
+    private final UnboundedSource<T, ?> source;
+
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in apply()
+    public StreamingUnboundedRead(Read.Unbounded<T> transform) {
+      this.source = transform.getSource();;
+    }
+
+    @Override
+    protected Coder<T> getDefaultOutputCoder() {
+      return source.getDefaultOutputCoder();
+    }
+
+    @Override
+    public final PCollection<T> apply(PInput input) {
+      source.validate();
+
+      if (source.requiresDeduping()) {
+        return Pipeline.applyTransform(input, new ReadWithIds<T>(source))
+            .apply(new Deduplicate<T>());
+      } else {
+        return Pipeline.applyTransform(input, new ReadWithIds<T>(source))
+            .apply(ParDo.named("StripIds").of(
+                new DoFn<ValueWithRecordId<T>, T>() {
+                  private static final long serialVersionUID = 0L;
+
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    c.output(c.element().getValue());
+                  }
+                }));
+      }
+    }
+
+    /**
+     * {@link PTransform} that reads {@code (record,recordId)} pairs from an
+     * {@link UnboundedSource}.
+     */
+    private static class ReadWithIds<T>
+        extends PTransform<PInput, PCollection<ValueWithRecordId<T>>> {
+      private static final long serialVersionUID = 0L;
+      private final UnboundedSource<T, ?> source;
+
+      private ReadWithIds(UnboundedSource<T, ?> source) {
+        this.source = source;
+      }
+
+      @Override
+      public final PCollection<ValueWithRecordId<T>> apply(PInput input) {
+        return PCollection.<ValueWithRecordId<T>>createPrimitiveOutputInternal(
+            input.getPipeline(), WindowingStrategy.globalDefault(), IsBounded.UNBOUNDED);
+      }
+
+      @Override
+      protected Coder<ValueWithRecordId<T>> getDefaultOutputCoder() {
+        return ValueWithRecordId.ValueWithRecordIdCoder.of(source.getDefaultOutputCoder());
+      }
+
+      public UnboundedSource<T, ?> getSource() {
+        return source;
+      }
+    }
+
+    @Override
+    public String getKindString() {
+      return "Read(" + approximateSimpleName(source.getClass()) + ")";
+    }
+
+    static {
+      DataflowPipelineTranslator.registerTransformTranslator(
+          ReadWithIds.class, new ReadWithIdsTranslator());
+    }
+
+    private static class ReadWithIdsTranslator
+        implements DataflowPipelineTranslator.TransformTranslator<ReadWithIds<?>> {
+      @Override
+      public void translate(ReadWithIds<?> transform,
+          DataflowPipelineTranslator.TranslationContext context) {
+        BasicSerializableSourceFormat.translateReadHelper(
+            transform.getSource(), transform, context);
+      }
+    }
+  }
+
+  /**
+   * Remove values with duplicate ids.
+   */
+  private static class Deduplicate<T>
+      extends PTransform<PCollection<ValueWithRecordId<T>>, PCollection<T>> {
+    private static final long serialVersionUID = 0L;
+    // Use a finite set of keys to improve bundling.  Without this, the key space
+    // will be the space of ids which is potentially very large, which results in much
+    // more per-key overhead.
+    private static final int NUM_RESHARD_KEYS = 10000;
+    @Override
+    public PCollection<T> apply(PCollection<ValueWithRecordId<T>> input) {
+      return input
+          .apply(WithKeys.of(new SerializableFunction<ValueWithRecordId<T>, Integer>() {
+                    private static final long serialVersionUID = 0L;
+
+                    @Override
+                    public Integer apply(ValueWithRecordId<T> value) {
+                      return Arrays.hashCode(value.getId()) % NUM_RESHARD_KEYS;
+                    }
+                  }))
+          .apply(
+              Window.<KV<Integer, ValueWithRecordId<T>>>into(new GlobalWindows())
+                  .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1)))
+                  .discardingFiredPanes())
+          // WindmillSink will dedup based on ids in ValueWithRecordId.
+          .apply(GroupByKey.<Integer, ValueWithRecordId<T>>create())
+          .apply(ParDo.named("StripIds").of(
+              new DoFn<KV<Integer, Iterable<ValueWithRecordId<T>>>, T>() {
+                private static final long serialVersionUID = 0L;
+
+                @Override
+                public void processElement(ProcessContext c) {
+                  for (ValueWithRecordId<T> value : c.element().getValue()) {
+                    c.output(value.getValue());
+                  }
+                }
+              }));
+    }
+  }
+
   /**
    * Specialized implementation for {@link Create.Values} for the Dataflow runner in streaming mode.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 8c0ce73332607..d98ce65f18a98 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -1014,7 +1014,7 @@ private <T> void translateHelper(
     registerTransformTranslator(
         TextIO.Write.Bound.class, new TextIOTranslator.WriteTranslator());
 
-    registerTransformTranslator(Read.Bound.class, new ReadTranslator());
+    registerTransformTranslator(Read.Bounded.class, new ReadTranslator());
   }
 
   private static void translateInputs(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index 8d3530695c9c0..b56c9719e7725 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -50,13 +50,16 @@
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
+import com.google.cloud.dataflow.sdk.values.PValue;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 
@@ -171,8 +174,10 @@ public Reader.ReaderIterator<WindowedValue<T>> iterator() throws IOException {
         }
       };
     } else if (source instanceof UnboundedSource) {
-      return new UnboundedReader<T>(
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      Reader<WindowedValue<T>> reader = new UnboundedReader(
           options, spec, (StreamingModeExecutionContext) executionContext);
+      return reader;
     } else {
       throw new IllegalArgumentException("Unexpected source kind: " + source.getClass());
     }
@@ -181,7 +186,7 @@ public Reader.ReaderIterator<WindowedValue<T>> iterator() throws IOException {
   /**
    * {@link Reader} for reading from {@link UnboundedSource UnboundedSources}.
    */
-  private static class UnboundedReader<T> extends Reader<WindowedValue<T>> {
+  private static class UnboundedReader<T> extends Reader<WindowedValue<ValueWithRecordId<T>>> {
     private final PipelineOptions options;
     private final CloudObject spec;
     private final StreamingModeExecutionContext context;
@@ -195,7 +200,7 @@ private static class UnboundedReader<T> extends Reader<WindowedValue<T>> {
 
     @Override
     @SuppressWarnings("unchecked")
-    public Reader.ReaderIterator<WindowedValue<T>> iterator() {
+    public Reader.ReaderIterator<WindowedValue<ValueWithRecordId<T>>> iterator() {
       UnboundedSource.UnboundedReader<T> reader =
           (UnboundedSource.UnboundedReader<T>) context.getCachedReader();
       final boolean started = reader != null;
@@ -366,14 +371,10 @@ public static com.google.api.services.dataflow.model.Source serializeToCloudSour
   }
 
   public static <T> void evaluateReadHelper(
-      Read.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
+      Read.Bounded<T> transform, DirectPipelineRunner.EvaluationContext context) {
     try {
       List<DirectPipelineRunner.ValueWithMetadata<T>> output = new ArrayList<>();
-      Source<T> anySource = transform.getSource();
-      if (!(anySource instanceof BoundedSource)) {
-        throw new IllegalArgumentException("Unexpected read from a user-defined unbounded source");
-      }
-      BoundedSource<T> source = (BoundedSource<T>) anySource;
+      BoundedSource<T> source = transform.getSource();
       try (BoundedSource.BoundedReader<T> reader =
           source.createReader(context.getPipelineOptions(), null)) {
         for (boolean available = reader.start(); available; available = reader.advance()) {
@@ -389,10 +390,10 @@ public static <T> void evaluateReadHelper(
     }
   }
 
-  public static <T> void translateReadHelper(
-      Read.Bound<T> transform, DataflowPipelineTranslator.TranslationContext context) {
+  public static <T> void translateReadHelper(Source<T> source,
+      PTransform<?, ? extends PValue> transform,
+      DataflowPipelineTranslator.TranslationContext context) {
     try {
-      Source<T> source = transform.getSource();
       context.addStep(transform, "ParallelRead");
       context.addInput(PropertyNames.FORMAT, PropertyNames.CUSTOM_SOURCE_FORMAT);
       context.addInput(
@@ -558,7 +559,7 @@ public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest
   }
 
   private static class UnboundedReaderIterator<T>
-      implements Reader.ReaderIterator<WindowedValue<T>> {
+      implements Reader.ReaderIterator<WindowedValue<ValueWithRecordId<T>>> {
     // Commit at least once every 10 seconds or 10k records.  This keeps the watermark advancing
     // smoothly, and ensures that not too much work will have to be reprocessed in the event of
     // a crash.
@@ -566,11 +567,13 @@ private static class UnboundedReaderIterator<T>
     private static final Duration MAX_BUNDLE_READ_TIME = Duration.standardSeconds(10);
 
     private ReaderToIteratorAdapter<T> iteratorAdapter;
+    private UnboundedSource.UnboundedReader<T> reader;
     private Instant endTime;
     private int elemsRead;
 
     private UnboundedReaderIterator(UnboundedSource.UnboundedReader<T> reader, boolean started) {
       this.iteratorAdapter = new ReaderToIteratorAdapter<>(reader, started);
+      this.reader = reader;
       this.endTime = Instant.now().plus(MAX_BUNDLE_READ_TIME);
       this.elemsRead = 0;
     }
@@ -585,10 +588,11 @@ public boolean hasNext() throws IOException {
     }
 
     @Override
-    public WindowedValue<T> next() throws IOException {
+    public WindowedValue<ValueWithRecordId<T>> next() throws IOException {
       WindowedValue<T> result = iteratorAdapter.next();
       elemsRead++;
-      return result;
+      return result.withValue(
+          new ValueWithRecordId<>(result.getValue(), reader.getCurrentRecordId()));
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
index 25c785a3a3d67..212aa97af2ff2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
@@ -16,17 +16,17 @@
 
 package com.google.cloud.dataflow.sdk.runners.dataflow;
 
+import static com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
+import static com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
+
 import com.google.cloud.dataflow.sdk.io.Read;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
 
 /**
  * Translator for the {@code Read} {@code PTransform} for the Dataflow back-end.
  */
-public class ReadTranslator
-    implements DataflowPipelineTranslator.TransformTranslator<Read.Bound<?>> {
+public class ReadTranslator implements TransformTranslator<Read.Bounded<?>> {
   @Override
-  public void translate(
-      Read.Bound<?> transform, DataflowPipelineTranslator.TranslationContext context) {
-    BasicSerializableSourceFormat.translateReadHelper(transform, context);
+  public void translate(Read.Bounded<?> transform, TranslationContext context) {
+    BasicSerializableSourceFormat.translateReadHelper(transform.getSource(), transform, context);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
index 1aafa6f82dc57..3dc27d71814c8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+import static com.google.cloud.dataflow.sdk.util.ValueWithRecordId.ValueWithRecordIdCoder;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
@@ -28,6 +29,7 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -114,16 +116,28 @@ private <T> ByteString encode(Coder<T> coder, T object) throws IOException {
     @Override
     public long add(WindowedValue<T> data) throws IOException {
       ByteString key, value;
+      ByteString id = ByteString.EMPTY;
       ByteString metadata = encodeMetadata(windowsCoder, data.getWindows(), data.getPane());
       if (valueCoder instanceof KvCoder) {
         KvCoder kvCoder = (KvCoder) valueCoder;
         KV kv = (KV) data.getValue();
         key = encode(kvCoder.getKeyCoder(), kv.getKey());
-        value = encode(kvCoder.getValueCoder(), kv.getValue());
+        Coder valueCoder = kvCoder.getValueCoder();
+        // If ids are explicitly provided, use that instead of the windmill-generated id.
+        // This is used when reading an UnboundedSource to deduplicate records.
+        if (valueCoder instanceof ValueWithRecordIdCoder) {
+          ValueWithRecordId valueAndId = (ValueWithRecordId) kv.getValue();
+          value =
+              encode(((ValueWithRecordIdCoder) valueCoder).getValueCoder(), valueAndId.getValue());
+          id = ByteString.copyFrom(valueAndId.getId());
+        } else {
+          value = encode(valueCoder, kv.getValue());
+        }
       } else {
         key = context.getSerializedKey();
         value = encode(valueCoder, data.getValue());
       }
+
       Windmill.KeyedMessageBundle.Builder keyedOutput = productionMap.get(key);
       if (keyedOutput == null) {
         keyedOutput = Windmill.KeyedMessageBundle.newBuilder().setKey(key);
@@ -136,7 +150,8 @@ public long add(WindowedValue<T> data) throws IOException {
           .setData(value)
           .setMetadata(metadata);
       keyedOutput.addMessages(builder.build());
-      return key.size() + value.size() + metadata.size();
+      keyedOutput.addMessagesIds(id);
+      return key.size() + value.size() + metadata.size() + id.size();
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index 34c5e22805b74..5c41962b78cb7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -264,7 +264,6 @@ public UnboundedSource.UnboundedReader<?> getCachedReader() {
   }
 
   public void setActiveReader(UnboundedSource.UnboundedReader<?> reader) {
-    readerCache.put(getSerializedKey(), reader);
     activeReader = reader;
   }
 
@@ -322,6 +321,8 @@ public void run() {
       }
       outputBuilder.setSourceStateUpdates(sourceStateBuilder.build());
       outputBuilder.setSourceWatermark(TimeUnit.MILLISECONDS.toMicros(watermark.getMillis()));
+
+      readerCache.put(getSerializedKey(), activeReader);
     }
     return callbacks;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ValueWithRecordId.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ValueWithRecordId.java
new file mode 100644
index 0000000000000..caa130979f6f1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ValueWithRecordId.java
@@ -0,0 +1,112 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.common.base.Preconditions;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Immutable struct containing a value as well as a unique id identifying the value.
+ *
+ * @param <ValueT> the underlying value type
+ */
+public class ValueWithRecordId<ValueT> {
+  private final ValueT value;
+  private final byte[] id;
+
+  public ValueWithRecordId(ValueT value, byte[] id) {
+    this.value = value;
+    this.id = id;
+  }
+
+  public ValueT getValue() {
+    return value;
+  }
+
+  public byte[] getId() {
+    return id;
+  }
+
+  /**
+   * A {@link Coder} for {@code ValueWithRecordId}, using a wrapped value {@code Coder}.
+   */
+  public static class ValueWithRecordIdCoder<ValueT>
+      extends StandardCoder<ValueWithRecordId<ValueT>> {
+    private static final long serialVersionUID = 0;
+
+    public static <ValueT> ValueWithRecordIdCoder<ValueT> of(Coder<ValueT> valueCoder) {
+      return new ValueWithRecordIdCoder<>(valueCoder);
+    }
+
+    @JsonCreator
+    public static <ValueT> ValueWithRecordIdCoder<ValueT> of(
+         @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+        List<Coder<ValueT>> components) {
+      Preconditions.checkArgument(components.size() == 1,
+          "Expecting 1 component, got " + components.size());
+      return of(components.get(0));
+    }
+
+    protected ValueWithRecordIdCoder(Coder<ValueT> valueCoder) {
+      this.valueCoder = valueCoder;
+      this.idCoder = ByteArrayCoder.of();
+    }
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return Arrays.asList(valueCoder);
+    }
+
+    @Override
+    public void encode(ValueWithRecordId<ValueT> value, OutputStream outStream, Context context)
+        throws IOException {
+      valueCoder.encode(value.value, outStream, context.nested());
+      idCoder.encode(value.id, outStream, context);
+    }
+
+    @Override
+    public ValueWithRecordId<ValueT> decode(InputStream inStream, Context context)
+        throws IOException {
+      return new ValueWithRecordId<ValueT>(
+          valueCoder.decode(inStream, context.nested()),
+          idCoder.decode(inStream, context));
+    }
+
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException {
+      valueCoder.verifyDeterministic();
+    }
+
+    public Coder<ValueT> getValueCoder() {
+      return valueCoder;
+    }
+
+    Coder<ValueT> valueCoder;
+    ByteArrayCoder idCoder;
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 62e5c1cee4b97..2ab713d157260 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -23,6 +23,7 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.dictionaryToCloudSource;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceOperationResponseToCloudSourceOperationResponse;
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
 import static com.google.cloud.dataflow.sdk.util.SerializableUtils.deserializeFromByteArray;
 import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
 import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
@@ -33,6 +34,7 @@
 import static org.hamcrest.Matchers.allOf;
 import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.containsString;
+import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
@@ -51,6 +53,8 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.io.BoundedSource;
 import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
@@ -73,6 +77,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
@@ -634,7 +639,7 @@ public void testReadUnboundedReader() throws Exception {
 
     ByteString state = ByteString.EMPTY;
     for (int i = 0; i < 100; /* Incremented in inner loop */) {
-      WindowedValue<KV<Integer, Integer>> value;
+      WindowedValue<ValueWithRecordId<KV<Integer, Integer>>> value;
 
       // Initialize streaming context with state from previous iteration.
       context.start(
@@ -648,8 +653,8 @@ public void testReadUnboundedReader() throws Exception {
           null,
           Windmill.WorkItemCommitRequest.newBuilder());
 
-      Reader.ReaderIterator<WindowedValue<KV<Integer, Integer>>> reader =
-          BasicSerializableSourceFormat.<KV<Integer, Integer>>create(
+      Reader.ReaderIterator<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>> reader =
+          BasicSerializableSourceFormat.<ValueWithRecordId<KV<Integer, Integer>>>create(
                   options,
                   (CloudObject)
                       BasicSerializableSourceFormat.serializeToCloudSource(
@@ -662,7 +667,10 @@ public void testReadUnboundedReader() throws Exception {
       // Verify data.
       while (reader.hasNext()) {
         value = reader.next();
-        assertEquals(KV.of(0, i), value.getValue());
+        assertEquals(KV.of(0, i), value.getValue().getValue());
+        assertArrayEquals(
+            encodeToByteArray(KvCoder.of(VarIntCoder.of(), VarIntCoder.of()), KV.of(0, i)),
+            value.getValue().getId());
         assertThat(value.getWindows(), contains((BoundedWindow) GlobalWindow.INSTANCE));
         assertEquals(i, value.getTimestamp().getMillis());
         i++;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
index 2eb0dd70c9454..41d6b97a1e01f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.runners.dataflow;
 
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
+
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.DelegateCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
@@ -27,6 +29,7 @@
 
 import org.joda.time.Instant;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.ThreadLocalRandom;
@@ -47,22 +50,28 @@ public class CountingSource
   private static List<Integer> finalizeTracker;
   private final int numMessagesPerShard;
   private final int shardNumber;
+  private final boolean dedup;
 
   public static void setFinalizeTracker(List<Integer> finalizeTracker) {
     CountingSource.finalizeTracker = finalizeTracker;
   }
 
   public CountingSource(int numMessagesPerShard) {
-    this(numMessagesPerShard, -1);
+    this(numMessagesPerShard, -1, false);
+  }
+
+  public CountingSource withDedup() {
+    return new CountingSource(numMessagesPerShard, shardNumber, true);
   }
 
   private CountingSource withShardNumber(int shardNumber) {
-    return new CountingSource(numMessagesPerShard, shardNumber);
+    return new CountingSource(numMessagesPerShard, shardNumber, dedup);
   }
 
-  private CountingSource(int numMessagesPerShard, int shardNumber) {
+  private CountingSource(int numMessagesPerShard, int shardNumber, boolean dedup) {
     this.numMessagesPerShard = numMessagesPerShard;
     this.shardNumber = shardNumber;
+    this.dedup = dedup;
   }
 
   public int getShardNumber() {
@@ -112,6 +121,11 @@ public CounterMark apply(Integer input) {
         });
   }
 
+  @Override
+  public boolean requiresDeduping() {
+    return dedup;
+  }
+
   private class CountingSourceReader implements UnboundedReader<KV<Integer, Integer>> {
     private int current;
     private boolean done = false;
@@ -128,9 +142,15 @@ public boolean start() {
     @Override
     public boolean advance() {
       if (current < numMessagesPerShard - 1) {
+        // Occasionally return false to break apart bundles
         if (ThreadLocalRandom.current().nextInt(10) == 0) {
           return false;
         }
+
+        // If testing dedup, occasionally insert a duplicate value;
+        if (dedup && ThreadLocalRandom.current().nextInt(5) == 0) {
+          return true;
+        }
         current++;
         return true;
       } else {
@@ -151,9 +171,11 @@ public Instant getCurrentTimestamp() {
 
     @Override
     public byte[] getCurrentRecordId() {
-      byte[] id = new byte[16];
-      ThreadLocalRandom.current().nextBytes(id);
-      return id;
+      try {
+        return encodeToByteArray(KvCoder.of(VarIntCoder.of(), VarIntCoder.of()), getCurrent());
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 27fb9c667f44d..1852b3180f027 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -67,6 +67,7 @@
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
 import com.google.cloud.dataflow.sdk.util.TimerOrElement.TimerOrElementCoder;
+import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
@@ -331,6 +332,7 @@ private WorkItemCommitRequest.Builder makeExpectedOutput(
             "      data: \"" + dataStringForIndex(index) + "\"" +
             "      metadata: \"\"" +
             "    }" +
+            "    messages_ids: \"\"" +
             "  }" +
             "}"));
   }
@@ -643,6 +645,7 @@ public void testTimers() throws Exception {
                 "      timestamp: " + timestamp +
                 "      data: \"" + TimeUnit.MILLISECONDS.toSeconds(timestamp) + "\"" +
                 "    }" +
+                "    messages_ids: \"\"" +
                 "  }" +
                 "} "))
             .build()));
@@ -768,7 +771,7 @@ public void testMergeWindows() throws Exception {
     ByteString watermarkHoldTag =
         ByteString.copyFromUtf8("MergeWindows" + window + "+watermark_hold");
     ByteString bufferData = ByteString.copyFromUtf8("\000data0");
-    ByteString outputData = ByteString.copyFromUtf8("\\377\\377\\377\\377\\001\\005data0\\000");
+    ByteString outputData = ByteString.copyFromUtf8("\000\000\000\001\005data0");
     // These values are not essential to the change detector test
     long timer1Timestamp = 1000000L;
     long timer2Timestamp = 999000L;
@@ -841,7 +844,6 @@ public void testMergeWindows() throws Exception {
 
     actualOutput = result.get(1L);
 
-
     ByteString metadata =
         actualOutput.getOutputMessages(0).getBundles(0).getMessages(0).getMetadata();
     assertEquals(PaneInfo.createPane(true, false, Timing.EARLY),
@@ -850,13 +852,17 @@ public void testMergeWindows() throws Exception {
 
     Windmill.OutputMessageBundle.Builder expectedOutputMessages =
         Windmill.OutputMessageBundle.newBuilder();
-    expectedOutputMessages
+    Windmill.KeyedMessageBundle.Builder keyedBuilder = expectedOutputMessages
         .setDestinationStreamId(DEFAULT_DESTINATION_STREAM_ID)
         .addBundlesBuilder()
-        .setKey(ByteString.copyFromUtf8(DEFAULT_KEY_STRING))
-        .addMessagesBuilder()
+        .setKey(ByteString.copyFromUtf8(DEFAULT_KEY_STRING));
+    keyedBuilder.addMessagesBuilder()
         .setTimestamp(0)
-        .setData(outputData);
+        .setData(outputData)
+        .setMetadata(
+            ByteString.copyFrom(new byte[] {0b1}).concat(ByteString.copyFrom(windowAtZeroBytes())));
+    keyedBuilder.addMessagesIds(ByteString.EMPTY);
+    assertEquals(expectedOutputMessages.build(), actualOutput.getOutputMessages(0));
 
     // Data was deleted
     assertThat("" + actualOutput.getListUpdatesList(),
@@ -871,12 +877,12 @@ public void testMergeWindows() throws Exception {
             .build())));
   }
 
-  static class PrintFn extends DoFn<KV<Integer, Integer>, String> {
+  static class PrintFn extends DoFn<ValueWithRecordId<KV<Integer, Integer>>, String> {
     private static final long serialVersionUID = 0;
 
     @Override
     public void processElement(ProcessContext c) {
-      KV<Integer, Integer> elem = c.element();
+      KV<Integer, Integer> elem = c.element().getValue();
       c.output(elem.getKey() + ":" + elem.getValue());
     }
   }
@@ -890,20 +896,24 @@ public void testUnboundedSources() throws Exception {
     options.setNumWorkers(1);
 
     List<ParallelInstruction> instructions =
-        Arrays.asList(new ParallelInstruction()
-            .setSystemName("Read")
-            .setRead(new ReadInstruction()
-                .setSource(
-                    BasicSerializableSourceFormat.serializeToCloudSource(
-                        new CountingSource(1), options)))
-            .setOutputs(
-                Arrays.asList(new InstructionOutput()
-                    .setName("read_output")
-                    .setCodec(
-                        WindowedValue.getFullCoder(
-                            KvCoder.of(VarIntCoder.of(), VarIntCoder.of()),
-                            GlobalWindow.Coder.INSTANCE)
-                        .asCloudObject()))),
+        Arrays.asList(
+            new ParallelInstruction()
+                .setSystemName("Read")
+                .setRead(
+                    new ReadInstruction()
+                        .setSource(
+                            BasicSerializableSourceFormat.serializeToCloudSource(
+                                new CountingSource(1), options)))
+                .setOutputs(
+                    Arrays.asList(
+                        new InstructionOutput()
+                            .setName("read_output")
+                            .setCodec(
+                                WindowedValue.getFullCoder(
+                                        ValueWithRecordId.ValueWithRecordIdCoder.of(
+                                            KvCoder.of(VarIntCoder.of(), VarIntCoder.of())),
+                                        GlobalWindow.Coder.INSTANCE)
+                                    .asCloudObject()))),
             makeDoFnInstruction(
                 new PrintFn(), 0, StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE),
             makeSinkInstruction(StringUtf8Coder.of(), 1, GlobalWindow.Coder.INSTANCE));
@@ -948,6 +958,7 @@ public void testUnboundedSources() throws Exception {
                 "      timestamp: 0" +
                 "      data: \"0:0\"" +
                 "    }" +
+                "    messages_ids: \"\"" +
                 "  }" +
                 "} " +
                 "source_state_updates {" +
@@ -1023,6 +1034,7 @@ public void testUnboundedSources() throws Exception {
                 "      timestamp: 5000" +
                 "      data: \"1:5\"" +
                 "    }" +
+                "    messages_ids: \"\"" +
                 "  }" +
                 "} " +
                 "source_state_updates {" +

From 0046a67c86134ffc2f753119f430754d0fb8704a Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 20 Jul 2015 16:24:18 -0700
Subject: [PATCH 0775/1541] More details on expectations of StateInternals.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98686145
---
 .../cloud/dataflow/sdk/util/state/StateInternals.java    | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
index dd400fb400239..19b0c74c1bf56 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
@@ -19,6 +19,15 @@
  * {@code StateInternals} describes the functionality a runner needs to provide for the
  * State API to be supported.
  *
+ * <p> The SDK will only use this after elements have been partitioned by key. For instance, after a
+ * {@code GroupByKey} operation. The runner implementation must ensure that any writes using
+ * {@code StaeIntetrnals} are implicitly scoped to the key being processed and the specific step
+ * accessing state.
+ *
+ * <p>The runner implementation must also ensure that any writes to the associated state objects
+ * are persisted together with the completion status of the processing that produced these
+ * writes.
+ *
  * <p> This is a low-level API intended for use by the Dataflow SDK. It should not be
  * used directly, and is highly likely to change.
  */

From ce15b7198dce68a467c59af77dac040777f47db4 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 20 Jul 2015 17:03:11 -0700
Subject: [PATCH 0776/1541] Add WindowFn#getOutputTimestamp

----Release Notes----

Allow definitions of WindowFn to specify how input timestamps should map
to output timestamps. This allows limiting the interference between
overlapping windows.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98689716
---
 .../sdk/testing/WindowFnTestUtils.java        | 71 +++++++++++++++++++
 .../transforms/windowing/GlobalWindows.java   |  7 ++
 .../transforms/windowing/InvalidWindows.java  |  7 ++
 .../windowing/PartitioningWindowFn.java       |  5 ++
 .../sdk/transforms/windowing/Sessions.java    |  6 ++
 .../transforms/windowing/SlidingWindows.java  | 13 ++++
 .../sdk/transforms/windowing/WindowFn.java    | 21 +++++-
 .../windowing/FixedWindowsTest.java           | 10 +++
 .../transforms/windowing/SessionsTest.java    | 10 +++
 .../windowing/SlidingWindowsTest.java         | 18 +++++
 10 files changed, 167 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
index 6b49eff4f566b..18256f17f7bbc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
@@ -16,6 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.testing;
 
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 
@@ -23,6 +26,8 @@
 
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -70,6 +75,11 @@ public static <T, W extends BoundedWindow> Map<W, Set<String>> runWindowFn(
     return actual;
   }
 
+  public static <T, W extends BoundedWindow> Collection<W> assignedWindows(
+      WindowFn<T, W> windowFn, long timestamp) throws Exception {
+    return windowFn.assignWindows(new TestAssignContext<T, W>(new Instant(timestamp), windowFn));
+  }
+
   private static String timestampValue(long timestamp) {
     return "T" + new Instant(timestamp);
   }
@@ -182,4 +192,65 @@ public Set<V> get(W window) {
       return elements.get(window);
     }
   }
+
+  /**
+   * Assigns the given {@code timestamp} to windows using the specified {@code windowFn}, and
+   * verifies that result of {@code windowFn.getOutputTimestamp} for each window is within the
+   * proper bound.
+   */
+  public static <T, W extends BoundedWindow> void validateNonInterferingOutputTimes(
+      WindowFn<T, W> windowFn, long timestamp) throws Exception {
+    Collection<W> windows = WindowFnTestUtils.<T, W>assignedWindows(windowFn, timestamp);
+
+    Instant instant = new Instant(timestamp);
+    for (W window : windows) {
+      Instant outputTimestamp = windowFn.getOutputTime(instant, window);
+      assertFalse("getOutputTime must be greater than or equal to input timestamp",
+          outputTimestamp.isBefore(instant));
+      assertFalse("getOutputTime must be less than or equal to the max timestamp",
+          outputTimestamp.isAfter(window.maxTimestamp()));
+    }
+  }
+
+  /**
+   * Assigns the given {@code timestamp} to windows using the specified {@code windowFn}, and
+   * verifies that result of {@code windowFn.getOutputTimestamp} for later windows (as defined by
+   * {@code maxTimestamp} won't prevent the watermark from passing the end of earlier windows.
+   *
+   * <p> This verifies that overlapping windows don't interfere at all. Depending on the
+   * {@code windowFn} this may be stricter than desired.
+   */
+  public static <T, W extends BoundedWindow> void validateGetOutputTimestamp(
+      WindowFn<T, W> windowFn, long timestamp) throws Exception {
+    Collection<W> windows = WindowFnTestUtils.<T, W>assignedWindows(windowFn, timestamp);
+    List<W> sortedWindows = new ArrayList<>(windows);
+    Collections.sort(sortedWindows, new Comparator<BoundedWindow>() {
+      @Override
+      public int compare(BoundedWindow o1, BoundedWindow o2) {
+        return o1.maxTimestamp().compareTo(o2.maxTimestamp());
+      }
+    });
+
+    Instant instant = new Instant(timestamp);
+    Instant endOfPrevious = null;
+    for (W window : sortedWindows) {
+      Instant outputTimestamp = windowFn.getOutputTime(instant, window);
+      if (endOfPrevious == null) {
+        // If this is the first window, the output timestamp can be anything, as long as it is in
+        // the valid range.
+        assertFalse("getOutputTime must be greater than or equal to input timestamp",
+            outputTimestamp.isBefore(instant));
+        assertFalse("getOutputTime must be less than or equal to the max timestamp",
+            outputTimestamp.isAfter(window.maxTimestamp()));
+      } else {
+        // If this is a later window, the output timestamp must be after the end of the previous
+        // window
+        assertTrue("getOutputTime must be greater than the end of the previous window",
+            outputTimestamp.isAfter(endOfPrevious));
+        assertFalse("getOutputTime must be less than or equal to the max timestamp",
+            outputTimestamp.isAfter(window.maxTimestamp()));
+      }
+      endOfPrevious = window.maxTimestamp();
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
index 977eacbd3b8bf..4248066db978b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
@@ -18,6 +18,8 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 
+import org.joda.time.Instant;
+
 import java.util.Collection;
 import java.util.Collections;
 
@@ -54,4 +56,9 @@ public GlobalWindow getSideInputWindow(BoundedWindow window) {
   public boolean assignsToSingleWindow() {
     return true;
   }
+
+  @Override
+  public Instant getOutputTime(Instant inputTimestamp, GlobalWindow window) {
+    return inputTimestamp;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java
index 699b11bebbaf0..041ba0a63e618 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java
@@ -18,6 +18,8 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 
+import org.joda.time.Instant;
+
 import java.util.Collection;
 
 /**
@@ -79,4 +81,9 @@ && getOriginalWindowFn().isCompatible(
   public W getSideInputWindow(BoundedWindow window) {
     throw new UnsupportedOperationException("InvalidWindows is not allowed in side inputs");
   }
+
+  @Override
+  public Instant getOutputTime(Instant inputTimestamp, W window) {
+    return inputTimestamp;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
index 0cd3a85d5415f..d79bb8e7e0476 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
@@ -55,4 +55,9 @@ public W getSideInputWindow(final BoundedWindow window) {
   public boolean assignsToSingleWindow() {
     return true;
   }
+
+  @Override
+  public Instant getOutputTime(Instant inputTimestamp, W window) {
+    return inputTimestamp;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
index 27b998913dd60..7e282a968070d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 
 import org.joda.time.Duration;
+import org.joda.time.Instant;
 
 import java.util.Arrays;
 import java.util.Collection;
@@ -88,4 +89,9 @@ public IntervalWindow getSideInputWindow(BoundedWindow window) {
   public Duration getGapDuration() {
     return gapDuration;
   }
+
+  @Override
+  public Instant getOutputTime(Instant inputTimestamp, IntervalWindow window) {
+    return inputTimestamp;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
index 6206401a79c41..c2c68a9933f8e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
@@ -176,4 +176,17 @@ public Duration getOffset() {
     return offset;
   }
 
+  /**
+   * Ensure that later sliding windows have an output time that is past the end of earlier windows.
+   *
+   * <p> If this is the earliest sliding window containing {@code inputTimestamp}, that's fine.
+   * Otherwise, we pick the earliest time that doesn't overlap with earlier windows.
+   */
+  @Override
+  public Instant getOutputTime(Instant inputTimestamp, IntervalWindow window) {
+    Instant startOfLastSegment = window.maxTimestamp().minus(period);
+    return startOfLastSegment.isBefore(inputTimestamp)
+        ? inputTimestamp
+        : startOfLastSegment.plus(1);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
index db83a6ed954f5..d2bf95fd16549 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
@@ -58,7 +58,7 @@ public abstract class AssignContext {
 
     /**
      * Returns the windows the current element was in, prior to this
-     * {@code AssignFn} being called.
+     * {@code WindowFn} being called.
      */
     public abstract Collection<? extends BoundedWindow> windows();
   }
@@ -124,6 +124,25 @@ public abstract void merge(Collection<W> toBeMerged, W mergeResult)
    */
   public abstract W getSideInputWindow(final BoundedWindow window);
 
+  /**
+   * Returns the output timestamp to use for data depending on the given {@code inputTimestamp}
+   * in the specified {@code window}.
+   *
+    * <p> The result must be between {@code inputTimestamp} and {@code window.maxTimestamp()}
+   * (inclusive on both sides). If this {@link WindowFn} doesn't produce overlapping windows,
+   * this can (and typically should) just return {@code inputTimestamp}. If this does produce
+   * overlapping windows, it is suggested that the that the result in later overlapping windows is
+   * past the end of earlier windows so that the later windows don't prevent the watermark from
+   * progressing past the end of the earlier window.
+   *
+   * <p> Each {@code KV<K, Iterable<V>>} produced from a {@code GroupByKey} will be output at a
+   * timestamp that is the minimum of {@code getOutputTime} applied to the timestamp of all of
+   * the non-late {@code KV<K, V>} that were used as input to the {@code GroupByKey}. The watermark
+   * is also prevented from advancing past this minimum timestamp until after the
+   * {@code KV<K, Iterable<V>>} has been output.
+   */
+  public abstract Instant getOutputTime(Instant inputTimestamp, W window);
+
   /**
    * Returns true if this {@code WindowFn} never needs to merge any windows.
    */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindowsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindowsTest.java
index bd7ac02e3e6b0..935f22e016144 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindowsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindowsTest.java
@@ -25,6 +25,8 @@
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
+import com.google.cloud.dataflow.sdk.testing.WindowFnTestUtils;
+
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Test;
@@ -111,4 +113,12 @@ public void testEquality() {
     assertFalse(FixedWindows.of(new Duration(10)).isCompatible(
         FixedWindows.of(new Duration(20))));
   }
+
+  @Test
+  public void testValidOutputTimes() throws Exception {
+    for (long timestamp : Arrays.asList(200, 800, 700)) {
+      WindowFnTestUtils.validateGetOutputTimestamp(
+          FixedWindows.of(new Duration(500)), timestamp);
+    }
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SessionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SessionsTest.java
index db66841d1b9cc..8da9b1cbcc3a3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SessionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SessionsTest.java
@@ -21,6 +21,8 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
+import com.google.cloud.dataflow.sdk.testing.WindowFnTestUtils;
+
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Test;
@@ -97,4 +99,12 @@ public void testEquality() {
         Sessions.withGapDuration(new Duration(10)).isCompatible(
             Sessions.withGapDuration(new Duration(20))));
   }
+
+  @Test
+  public void testValidOutputTimes() throws Exception {
+    for (long timestamp : Arrays.asList(200, 800, 700)) {
+      WindowFnTestUtils.validateGetOutputTimestamp(
+          Sessions.withGapDuration(new Duration(500)), timestamp);
+    }
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java
index 04c0592ae328a..33c4b8b81672b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindowsTest.java
@@ -22,6 +22,8 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
+import com.google.cloud.dataflow.sdk.testing.WindowFnTestUtils;
+
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Test;
@@ -172,4 +174,20 @@ public void testGetSideInputWindow() {
         slidingWindows.getSideInputWindow(
             new IntervalWindow(new Instant(0), new Instant(1341))));
   }
+
+  @Test
+  public void testValidOutputTimes() throws Exception {
+    for (long timestamp : Arrays.asList(200, 800, 499, 500, 501, 700, 1000)) {
+      WindowFnTestUtils.validateGetOutputTimestamp(
+          SlidingWindows.of(new Duration(1000)).every(new Duration(500)), timestamp);
+    }
+  }
+
+  @Test
+  public void testOutputTimesNonInterference() throws Exception {
+    for (long timestamp : Arrays.asList(200, 800, 700)) {
+      WindowFnTestUtils.validateNonInterferingOutputTimes(
+          SlidingWindows.of(new Duration(1000)).every(new Duration(500)), timestamp);
+    }
+  }
 }

From 9a984f23d9fb05cfb014aa63ee530ece95a11389 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 20 Jul 2015 17:08:23 -0700
Subject: [PATCH 0777/1541] Improve performance of BigQueryIO.Write

Previously, we had at most 50 keys/threads, each of which was limited to 64KB requests.  This resulted in a max throughput of ~10MB/s for a BigQueryIO.Write across an entire pipeline.
This CL lets each key use multiple threads in parallel to do inserts as well as having 50 keys per output table, which removes the performance restriction.  Now, Dataflow can handle up to the BigQuery limit of 100MB/s per table across any number of tables.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98690333
---
 .../cloud/dataflow/sdk/io/BigQueryIO.java     | 306 ++++++++++--------
 .../sdk/util/BigQueryTableInserter.java       |  79 +++--
 2 files changed, 230 insertions(+), 155 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index f794b09ad3cbd..3a5cf652f64ae 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -17,12 +17,12 @@
 package com.google.cloud.dataflow.sdk.io;
 
 import com.google.api.client.json.JsonFactory;
-import com.google.api.client.util.Preconditions;
 import com.google.api.services.bigquery.Bigquery;
 import com.google.api.services.bigquery.model.QueryRequest;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
@@ -36,12 +36,10 @@
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.worker.BigQueryReader;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.Flatten;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.Values;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterFirst;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterProcessingTime;
@@ -61,8 +59,8 @@
 import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
-
 import com.google.cloud.hadoop.util.ApiErrorExtractor;
+import com.google.common.base.Preconditions;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
@@ -84,7 +82,6 @@
 import java.util.UUID;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ThreadLocalRandom;
-import java.util.concurrent.atomic.AtomicLong;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -797,7 +794,6 @@ public Bound toTableReference(
             writeDisposition, validate);
       }
 
-
       /**
        * Specifies the table schema, used if the table is created.
        */
@@ -912,25 +908,18 @@ public boolean getValidate() {
   /**
    * Implementation of DoFn to perform streaming BigQuery write.
    */
-  private static class StreamingWriteFn extends DoFn<TableRowInfo, Void> {
+  private static class StreamingWriteFn
+      extends DoFn<KV<ShardedKey<String>, Iterable<TableRowInfo>>, Void> {
     private static final long serialVersionUID = 0;
 
-    /** TableReference in JSON.  Use String to make the class Serializable. */
-    private final String jsonTableReference;
-
     /** TableSchema in JSON.  Use String to make the class Serializable. */
     private final String jsonTableSchema;
 
-    /** User function mapping windows to TableReference in JSON. */
-    private final SerializableFunction<BoundedWindow, TableReference> tableRefFunction;
-
-    private transient TableReference defaultTableReference;
-
-    /** JsonTableRows to accumulate BigQuery rows. */
-    private transient Map<BoundedWindow, List<TableRow>> tableRows;
+    /** JsonTableRows to accumulate BigQuery rows in order to batch writes. */
+    private transient Map<String, List<TableRow>> tableRows;
 
     /** The list of unique ids for each BigQuery table row. */
-    private transient Map<BoundedWindow, List<String>> uniqueIdsForTableRows;
+    private transient Map<String, List<String>> uniqueIdsForTableRows;
 
     /** The list of tables created so far, so we don't try the creation
         each time. */
@@ -938,16 +927,8 @@ private static class StreamingWriteFn extends DoFn<TableRowInfo, Void> {
         Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>());
 
     /** Constructor. */
-    StreamingWriteFn(TableReference table,
-        SerializableFunction<BoundedWindow, TableReference> tableRefFunction,
-        TableSchema schema) {
+    StreamingWriteFn(TableSchema schema) {
       try {
-        if (table != null) {
-          jsonTableReference = JSON_FACTORY.toString(table);
-        } else {
-          jsonTableReference = null;
-        }
-        this.tableRefFunction = tableRefFunction;
         jsonTableSchema = JSON_FACTORY.toString(schema);
       } catch (IOException e) {
         throw new RuntimeException("Cannot initialize BigQuery streaming writer.", e);
@@ -964,75 +945,54 @@ public void startBundle(Context context) {
     /** Accumulates the input into JsonTableRows and uniqueIdsForTableRows. */
     @Override
     public void processElement(ProcessContext context) {
-      TableRowInfo rowInfo = context.element();
-      List<TableRow> rows = getOrCreateMapListValue(tableRows, rowInfo.window);
-      rows.add(rowInfo.tableRow);
-      List<String> uniqueIds = getOrCreateMapListValue(uniqueIdsForTableRows, rowInfo.window);
-      uniqueIds.add(rowInfo.uniqueId);
+      String tableSpec = context.element().getKey().getKey();
+      List<TableRow> rows = getOrCreateMapListValue(tableRows, tableSpec);
+      List<String> uniqueIds = getOrCreateMapListValue(uniqueIdsForTableRows, tableSpec);
+      for (TableRowInfo rowInfo : context.element().getValue()) {
+        rows.add(rowInfo.tableRow);
+        uniqueIds.add(rowInfo.uniqueId);
+      }
     }
 
     /** Writes the accumulated rows into BigQuery with streaming API. */
     @Override
-    public void finishBundle(Context context) {
+    public void finishBundle(Context context) throws Exception {
       BigQueryOptions options = context.getPipelineOptions().as(BigQueryOptions.class);
+      Bigquery client = Transport.newBigQueryClient(options).build();
 
-      for (BoundedWindow window : tableRows.keySet()) {
-        TableReference tableReference = getOrCreateTableForWindow(options, window);
-        flushRows(options, tableReference, tableRows.get(window),
-            uniqueIdsForTableRows.get(window));
+      for (String tableSpec : tableRows.keySet()) {
+        TableReference tableReference = getOrCreateTable(options, tableSpec);
+        flushRows(client, tableReference, tableRows.get(tableSpec),
+            uniqueIdsForTableRows.get(tableSpec));
       }
       tableRows.clear();
       uniqueIdsForTableRows.clear();
     }
 
-    public TableReference getOrCreateTableForWindow(BigQueryOptions options, BoundedWindow window) {
-     try {
-       if (defaultTableReference != null) {
-         return defaultTableReference;
-       }
-
-       TableReference tableReference;
-       if (tableRefFunction != null) {
-         tableReference = tableRefFunction.apply(window);
-       } else {
-         tableReference = JSON_FACTORY.fromString(jsonTableReference, TableReference.class);
-       }
-       String tableSpec = toTableSpec(tableReference);
-
-       if (tableReference.getProjectId() == null) {
-         tableReference.setProjectId(options.getProject());
-       }
-
-       if (!createdTables.contains(tableSpec)) {
-          synchronized (createdTables) {
-            // Another thread may have succeeded in creating the table in the meanwhile, so
-            // check again. This check isn't needed for correctness, but we add it to prevent
-            // every thread from attempting a create and overwhelming our BigQuery quota.
-            if (!createdTables.contains(tableSpec)) {
-              TableSchema tableSchema = JSON_FACTORY.fromString(jsonTableSchema, TableSchema.class);
-              Bigquery client = Transport.newBigQueryClient(options).build();
-              BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
-              inserter.tryCreateTable(tableSchema);
-              createdTables.add(tableSpec);
-            }
+    public TableReference getOrCreateTable(BigQueryOptions options, String tableSpec)
+        throws IOException {
+      TableReference tableReference = parseTableSpec(tableSpec);
+      if (!createdTables.contains(tableSpec)) {
+        synchronized (createdTables) {
+          // Another thread may have succeeded in creating the table in the meanwhile, so
+          // check again. This check isn't needed for correctness, but we add it to prevent
+          // every thread from attempting a create and overwhelming our BigQuery quota.
+          if (!createdTables.contains(tableSpec)) {
+            TableSchema tableSchema = JSON_FACTORY.fromString(jsonTableSchema, TableSchema.class);
+            Bigquery client = Transport.newBigQueryClient(options).build();
+            BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
+            inserter.tryCreateTable(tableSchema);
+            createdTables.add(tableSpec);
           }
         }
-       if (tableRefFunction == null) {
-         // A constant table spec is used, and we've already created it. Cache that value so that we
-         // can elide the parsing/lookup on future calls to getOrCreateTableForWindow.
-         defaultTableReference = tableReference;
-       }
-       return tableReference;
-      } catch (IOException e) {
-        throw new RuntimeException(e);
-     }
+      }
+      return tableReference;
     }
 
     /** Writes the accumulated rows into BigQuery with streaming API. */
-    private void flushRows(BigQueryOptions options, TableReference tableReference,
+    private void flushRows(Bigquery client, TableReference tableReference,
         List<TableRow> tableRows, List<String> uniqueIds) {
       if (!tableRows.isEmpty()) {
-        Bigquery client = Transport.newBigQueryClient(options).build();
         try {
           BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
           inserter.insertAll(tableRows, uniqueIds);
@@ -1043,36 +1003,90 @@ private void flushRows(BigQueryOptions options, TableReference tableReference,
     }
   }
 
+  private static class ShardedKey<K> {
+    private final K key;
+    private final int shardNumber;
 
-  private static class TableRowInfoCoder extends StandardCoder<TableRowInfo> {
+    public static <K> ShardedKey<K> of(K key, int shardNumber) {
+      return new ShardedKey<K>(key, shardNumber);
+    }
+
+    private ShardedKey(K key, int shardNumber) {
+      this.key = key;
+      this.shardNumber = shardNumber;
+    }
+
+    public K getKey() {
+      return key;
+    }
+
+    public int getShardNumber() {
+      return shardNumber;
+    }
+  }
+
+  /**
+   * A {@link Coder} for {@code ShardedKey}, using a wrapped key {@code Coder}.
+   */
+  public static class ShardedKeyCoder<KeyT>
+      extends StandardCoder<ShardedKey<KeyT>> {
     private static final long serialVersionUID = 0;
 
-    public static TableRowInfoCoder of(Coder<? extends BoundedWindow> windowCoder) {
-      return new TableRowInfoCoder(windowCoder);
+    public static <KeyT> ShardedKeyCoder<KeyT> of(Coder<KeyT> keyCoder) {
+      return new ShardedKeyCoder<>(keyCoder);
     }
 
     @JsonCreator
-    public static TableRowInfoCoder of(
+    public static <KeyT> ShardedKeyCoder<KeyT> of(
          @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-         List<Coder<? extends BoundedWindow>> components) {
+        List<Coder<KeyT>> components) {
       Preconditions.checkArgument(components.size() == 1,
           "Expecting 1 component, got " + components.size());
       return of(components.get(0));
     }
 
-    protected TableRowInfoCoder(Coder<? extends BoundedWindow> windowCoder) {
-      this.tableRowCoder = TableRowJsonCoder.of();
-      this.idCoder = StringUtf8Coder.of();
-      @SuppressWarnings("unchecked")
-      Coder<BoundedWindow> boundedWindowCoder = (Coder<BoundedWindow>) windowCoder;
-      this.windowCoder = boundedWindowCoder;
+    protected ShardedKeyCoder(Coder<KeyT> keyCoder) {
+      this.keyCoder = keyCoder;
+      this.shardNumberCoder = VarIntCoder.of();
     }
 
-     @Override
+    @Override
     public List<? extends Coder<?>> getCoderArguments() {
-      return Arrays.asList(windowCoder);
+      return Arrays.asList(keyCoder);
     }
 
+    @Override
+    public void encode(ShardedKey<KeyT> key, OutputStream outStream, Context context)
+        throws IOException {
+      keyCoder.encode(key.getKey(), outStream, context.nested());
+      shardNumberCoder.encode(key.getShardNumber(), outStream, context);
+    }
+
+    @Override
+    public ShardedKey<KeyT> decode(InputStream inStream, Context context)
+        throws IOException {
+      return new ShardedKey<KeyT>(
+          keyCoder.decode(inStream, context.nested()),
+          shardNumberCoder.decode(inStream, context));
+    }
+
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException {
+      keyCoder.verifyDeterministic();
+    }
+
+    Coder<KeyT> keyCoder;
+    VarIntCoder shardNumberCoder;
+  }
+
+  private static class TableRowInfoCoder extends AtomicCoder<TableRowInfo> {
+    private static final long serialVersionUID = 0;
+    private static final TableRowInfoCoder INSTANCE = new TableRowInfoCoder();
+
+    @JsonCreator
+    public static TableRowInfoCoder of() {
+      return INSTANCE;
+    }
 
     @Override
     public void encode(TableRowInfo value, OutputStream outStream, Context context)
@@ -1082,7 +1096,6 @@ public void encode(TableRowInfo value, OutputStream outStream, Context context)
       }
       tableRowCoder.encode(value.tableRow, outStream, context.nested());
       idCoder.encode(value.uniqueId, outStream, context.nested());
-      windowCoder.encode(value.window, outStream, context.nested());
     }
 
     @Override
@@ -1090,8 +1103,7 @@ public TableRowInfo decode(InputStream inStream, Context context)
       throws IOException {
       return new TableRowInfo(
           tableRowCoder.decode(inStream, context.nested()),
-          idCoder.decode(inStream, context.nested()),
-          windowCoder.decode(inStream, context.nested()));
+          idCoder.decode(inStream, context.nested()));
     }
 
     @Override
@@ -1099,53 +1111,87 @@ public void verifyDeterministic() throws NonDeterministicException {
       throw new NonDeterministicException(this, "TableRows are not deterministic.");
     }
 
-    TableRowJsonCoder tableRowCoder;
-    StringUtf8Coder idCoder;
-    Coder<BoundedWindow> windowCoder;
+    TableRowJsonCoder tableRowCoder = TableRowJsonCoder.of();
+    StringUtf8Coder idCoder = StringUtf8Coder.of();
   }
 
-   private static class TableRowInfo {
-     TableRowInfo(TableRow tableRow, String uniqueId, BoundedWindow window) {
-       this.tableRow = tableRow;
-       this.uniqueId = uniqueId;
-       this.window = window;
-     }
+  private static class TableRowInfo {
+    TableRowInfo(TableRow tableRow, String uniqueId) {
+      this.tableRow = tableRow;
+      this.uniqueId = uniqueId;
+    }
 
-     final TableRow tableRow;
-     final String uniqueId;
-     final BoundedWindow window;
-   };
+    final TableRow tableRow;
+    final String uniqueId;
+  };
 
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * Fn that tags each table row with a unique id.
+   * Fn that tags each table row with a unique id and destination table.
    * To avoid calling UUID.randomUUID() for each element, which can be costly,
    * a randomUUID is generated only once per bucket of data. The actual unique
    * id is created by concatenating this randomUUID with a sequential number.
    */
-  private static class TagWithUniqueIdsAndWindow extends DoFn<TableRow, KV<Integer, TableRowInfo>>
-        implements DoFn.RequiresWindowAccess {
+  private static class TagWithUniqueIdsAndTable
+      extends DoFn<TableRow, KV<ShardedKey<String>, TableRowInfo>>
+      implements DoFn.RequiresWindowAccess {
     private static final long serialVersionUID = 0;
 
+    /** TableSpec to write to. */
+    private final String tableSpec;
+
+    /** User function mapping windows to TableReference in JSON. */
+    private final SerializableFunction<BoundedWindow, TableReference> tableRefFunction;
+
     private transient String randomUUID;
-    private transient AtomicLong sequenceNo;
+    private transient long sequenceNo = 0L;
+
+    TagWithUniqueIdsAndTable(BigQueryOptions options, TableReference table,
+        SerializableFunction<BoundedWindow, TableReference> tableRefFunction) {
+      Preconditions.checkArgument(table == null ^ tableRefFunction == null,
+          "Exactly one of table or tableRefFunction should be set");
+      if (table != null) {
+        if (table.getProjectId() == null) {
+          table.setProjectId(options.as(BigQueryOptions.class).getProject());
+        }
+        this.tableSpec = toTableSpec(table);
+      } else {
+        tableSpec = null;
+      }
+      this.tableRefFunction = tableRefFunction;
+    }
+
 
     @Override
     public void startBundle(Context context) {
       randomUUID = UUID.randomUUID().toString();
-      sequenceNo = new AtomicLong();
     }
 
     /** Tag the input with a unique id. */
     @Override
-    public void processElement(ProcessContext context) {
-      String uniqueId = randomUUID + Long.toString(sequenceNo.getAndIncrement());
+    public void processElement(ProcessContext context) throws IOException {
+      String uniqueId = randomUUID + sequenceNo++;
       ThreadLocalRandom randomGenerator = ThreadLocalRandom.current();
+      String tableSpec = tableSpecFromWindow(
+          context.getPipelineOptions().as(BigQueryOptions.class), context.window());
       // We output on keys 0-50 to ensure that there's enough batching for
       // BigQuery.
-      context.output(KV.of(randomGenerator.nextInt(0, 50),
-          new TableRowInfo(context.element(), uniqueId, context.window())));
+      context.output(KV.of(ShardedKey.of(tableSpec, randomGenerator.nextInt(0, 50)),
+          new TableRowInfo(context.element(), uniqueId)));
+    }
+
+    private String tableSpecFromWindow(BigQueryOptions options, BoundedWindow window)
+        throws IOException {
+      if (tableSpec != null) {
+        return tableSpec;
+      } else {
+        TableReference table = tableRefFunction.apply(window);
+        if (table.getProjectId() == null) {
+          table.setProjectId(options.getProject());
+        }
+        return toTableSpec(table);
+      }
     }
   }
 
@@ -1192,27 +1238,23 @@ public PDone apply(PCollection<TableRow> input) {
       // To use this mechanism, each input TableRow is tagged with a generated
       // unique id, which is then passed to BigQuery and used to ignore duplicates.
 
-      PCollection<KV<Integer, TableRowInfo>> tagged =
-          input.apply(ParDo.of(new TagWithUniqueIdsAndWindow()));
+      PCollection<KV<ShardedKey<String>, TableRowInfo>> tagged = input.apply(ParDo.of(
+          new TagWithUniqueIdsAndTable(input.getPipeline().getOptions().as(BigQueryOptions.class),
+              tableReference, tableRefFunction)));
 
       // To prevent having the same TableRow processed more than once with regenerated
       // different unique ids, this implementation relies on "checkpointing", which is
       // achieved as a side effect of having StreamingWriteFn immediately follow a GBK.
       tagged
-          .setCoder(KvCoder.of(VarIntCoder.of(),
-              TableRowInfoCoder.of(
-              tagged.getWindowingStrategy().getWindowFn().windowCoder())))
-          .apply(Window.<KV<Integer, TableRowInfo>>into(new GlobalWindows())
-                       .triggering(Repeatedly.forever(
-                           AfterFirst.of(
-                               AfterProcessingTime.pastFirstElementInPane()
-                                                  .plusDelayOf(WRITE_BUFFER_WAIT),
-                               AfterPane.elementCountAtLeast(WRITE_BUFFER_COUNT))))
-                       .discardingFiredPanes())
-          .apply(GroupByKey.<Integer, TableRowInfo>create())
-          .apply(Values.<Iterable<TableRowInfo>>create())
-          .apply(Flatten.<TableRowInfo>iterables())
-          .apply(ParDo.of(new StreamingWriteFn(tableReference, tableRefFunction, tableSchema)));
+          .setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowInfoCoder.of()))
+          .apply(
+              Window.<KV<ShardedKey<String>, TableRowInfo>>into(new GlobalWindows())
+                  .triggering(Repeatedly.forever(AfterFirst.of(
+                      AfterProcessingTime.pastFirstElementInPane().plusDelayOf(WRITE_BUFFER_WAIT),
+                      AfterPane.elementCountAtLeast(WRITE_BUFFER_COUNT))))
+                  .discardingFiredPanes())
+          .apply(GroupByKey.<ShardedKey<String>, TableRowInfo>create())
+          .apply(ParDo.of(new StreamingWriteFn(tableSchema)));
 
       // Note that the implementation to return PDone here breaks the
       // implicit assumption about the job execution order.  If a user
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
index 1efc1d2772062..cae0072657c8b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
@@ -28,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.WriteDisposition;
 import com.google.cloud.hadoop.util.ApiErrorExtractor;
+import com.google.common.base.Throwables;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -36,6 +37,10 @@
 import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 
 import javax.annotation.Nullable;
 
@@ -61,6 +66,8 @@ public class BigQueryTableInserter {
   private final TableReference ref;
   private final long maxRowsPerBatch;
 
+  private static final ExecutorService executor = Executors.newFixedThreadPool(100);
+
   /**
    * Constructs a new row inserter.
    *
@@ -102,26 +109,26 @@ public void insertAll(List<TableRow> rowList,
           + "as many elements as rowList");
     }
 
-
     AttemptBoundedExponentialBackOff backoff = new AttemptBoundedExponentialBackOff(
         MAX_INSERT_ATTEMPTS,
         INITIAL_INSERT_BACKOFF_INTERVAL_MS);
 
-    List<TableDataInsertAllResponse.InsertErrors> allErrors = new ArrayList<>();
+    final List<TableDataInsertAllResponse.InsertErrors> allErrors = new ArrayList<>();
     // These lists contain the rows to publish. Initially the contain the entire list. If there are
     // failures, they will contain only the failed rows to be retried.
     List<TableRow> rowsToPublish = rowList;
     List<String> idsToPublish = insertIdList;
     while (true) {
-      List<TableRow> retryRows = new ArrayList<>();
-      List<String> retryIds = null;
-      if (idsToPublish != null) {
-        retryIds = new ArrayList<>();
-      }
+      final List<TableRow> retryRows = new ArrayList<>();
+      final List<String> retryIds = (idsToPublish != null) ? new ArrayList<String>() : null;
+
       int strideIndex = 0;
       // Upload in batches.
       List<TableDataInsertAllRequest.Rows> rows = new LinkedList<>();
       int dataSize = 0;
+
+      List<Future<?>> futures = new ArrayList<>();
+
       for (int i = 0; i < rowsToPublish.size(); ++i) {
         TableRow row = rowsToPublish.get(i);
         TableDataInsertAllRequest.Rows out = new TableDataInsertAllRequest.Rows();
@@ -137,30 +144,56 @@ public void insertAll(List<TableRow> rowList,
           TableDataInsertAllRequest content = new TableDataInsertAllRequest();
           content.setRows(rows);
 
-          Bigquery.Tabledata.InsertAll insert = client.tabledata()
+          final Bigquery.Tabledata.InsertAll insert = client.tabledata()
               .insertAll(ref.getProjectId(), ref.getDatasetId(), ref.getTableId(),
                   content);
-          TableDataInsertAllResponse response = insert.execute();
-          List<TableDataInsertAllResponse.InsertErrors> errors = response.getInsertErrors();
-          if (errors != null) {
-            allErrors.addAll(errors);
-            for (TableDataInsertAllResponse.InsertErrors error : errors) {
-              if (error.getIndex() == null) {
-                throw new IOException("Insert failed: " + allErrors);
-              }
 
-              int errorIndex = error.getIndex().intValue() + strideIndex;
-              retryRows.add(rowsToPublish.get(errorIndex));
-              if (retryIds != null) {
-                retryIds.add(idsToPublish.get(errorIndex));
+          final int finalStrideIndex = strideIndex;
+          final List<TableRow> finalRowsToPublish = rowsToPublish;
+          final List<String> finalIdsToPublish = idsToPublish;
+
+          futures.add(executor.submit(new Runnable() {
+              @Override
+              public void run() {
+                try {
+                  TableDataInsertAllResponse response = insert.execute();
+
+                  List<TableDataInsertAllResponse.InsertErrors> errors = response.getInsertErrors();
+                  if (errors != null) {
+                    synchronized (this) {
+                      allErrors.addAll(errors);
+                      for (TableDataInsertAllResponse.InsertErrors error : errors) {
+                        if (error.getIndex() == null) {
+                          throw new IOException("Insert failed: " + allErrors);
+                        }
+
+                        int errorIndex = error.getIndex().intValue() + finalStrideIndex;
+                        retryRows.add(finalRowsToPublish.get(errorIndex));
+                        if (retryIds != null) {
+                          retryIds.add(finalIdsToPublish.get(errorIndex));
+                        }
+                      }
+                    }
+                  }
+                } catch (IOException e) {
+                  throw new RuntimeException(e);
+                }
               }
-            }
-          }
+            }));
 
           dataSize = 0;
           strideIndex = i + 1;
-          rows.clear();
+          rows = new LinkedList<>();
+        }
+      }
+
+      try {
+        for (Future<?> future : futures) {
+          future.get();
         }
+      } catch (InterruptedException e) {
+      } catch (ExecutionException e) {
+        Throwables.propagate(e.getCause());
       }
 
       if (!allErrors.isEmpty() && !backoff.atMaxAttempts()) {

From 9a7a14224c343e40baed75ba12b4f3af861c91bb Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 20 Jul 2015 17:29:58 -0700
Subject: [PATCH 0778/1541] Use WindowFn#getOutputTimestamp to select output
 times

This allows overlapping sliding windows to not block the watermark.

----Release Notes----

WindowFns that produce overlapping sliding windows can implement
getOutputTimestamp to prevent buffered data in later windows from
holding up the downstream watermark of earlier windows.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98692328
---
 .../dataflow/sdk/util/ReduceFnRunner.java     |  2 +-
 .../dataflow/sdk/util/WatermarkHold.java      | 21 ++++++++++++-------
 .../windowing/DefaultTriggerTest.java         |  4 ++--
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  6 ++++--
 4 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index 2df87aec67e3e..7ad99fdfa00c1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -118,7 +118,7 @@ public ReduceFnRunner(
         key, reduceFn, this.windowingStrategy, this.windowingInternals.stateInternals(),
         this.activeWindows);
 
-    this.watermarkHold = new WatermarkHold<>(windowingStrategy.getAllowedLateness());
+    this.watermarkHold = new WatermarkHold<>(windowingStrategy);
     this.triggerRunner = new TriggerRunner<>(
         windowingStrategy.getTrigger(),
         new TriggerContextFactory<>(timerManager, windowingStrategy,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
index 57d4288ce7d3c..e2596c455fc09 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
@@ -16,13 +16,13 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
 import com.google.common.annotations.VisibleForTesting;
 
-import org.joda.time.Duration;
 import org.joda.time.Instant;
 
 import java.io.Serializable;
@@ -40,22 +40,27 @@ public class WatermarkHold<W extends BoundedWindow> implements Serializable {
   @VisibleForTesting static final StateTag<WatermarkStateInternal> HOLD_TAG =
       StateTags.watermarkStateInternal("watermark_hold");
 
-  private final Duration allowedLateness;
+  private final WindowingStrategy<?, W> windowingStrategy;
 
-  public WatermarkHold(Duration allowedLateness) {
-    this.allowedLateness = allowedLateness;
+  public WatermarkHold(WindowingStrategy<?, W> windowingStrategy) {
+    this.windowingStrategy = windowingStrategy;
   }
 
   /**
    * Update the watermark hold to include the timestamp of the value in {@code c}.
    *
-   * <p>If the value was late, then we hold to timestamp of the end of the window plus the
-   * allowed lateness to ensure that we don't output something that is dropably late.
+   * <p>If the value was not late, then the input watermark must be less than the timestamp, and we
+   * can use {@link WindowFn#getOutputTime} to determine the appropriate output time.
+   *
+   * <p>If the value was late, we pessimistically assume the worst and attempt to hold the watermark
+   * to {@link BoundedWindow#maxTimestamp()} plus {@link WindowingStrategy#getAllowedLateness()}.
+   * That allows us to output the result at {@link BoundedWindow#maxTimestamp()} without being
+   * dropped.
    */
   public void addHold(ReduceFn<?, ?, ?, W>.ProcessValueContext c, boolean isLate) {
     Instant holdTo = isLate
-        ? c.window().maxTimestamp().plus(allowedLateness)
-        : c.timestamp();
+        ? c.window().maxTimestamp().plus(windowingStrategy.getAllowedLateness())
+        : windowingStrategy.getWindowFn().getOutputTime(c.timestamp(), c.window());
     c.state().access(HOLD_TAG).add(holdTo);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
index 269303599dfdf..df4ae0e2e32b0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
@@ -120,8 +120,8 @@ public void testDefaultTriggerWithSlidingWindow() throws Exception {
     tester.advanceWatermark(new Instant(100));
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, -5, 5),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 9, 5, 15)));
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 5, 0, 10),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 10, 5, 15)));
 
     // This data is late, so it will hold the watermark to 109
     tester.injectElement(4, new Instant(8));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 3c0e70abb2323..574bc83e09bdb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -216,13 +216,15 @@ private <W extends BoundedWindow> String timerString(Coder<W> windowCoder, W win
     WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
     assertEquals("k", item1.getValue().getKey());
     assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1", "v2"));
-    assertEquals(new Instant(2), item1.getTimestamp());
+    // For this sliding window, the minimum output timestmap was 10, since we didn't want to overlap
+    // with the previous window that was [-10, 10).
+    assertEquals(new Instant(10), item1.getTimestamp());
     assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(0, 20)));
 
     WindowedValue<KV<String, Iterable<String>>> item2 = result.get(2);
     assertEquals("k", item2.getValue().getKey());
     assertThat(item2.getValue().getValue(), Matchers.containsInAnyOrder("v2"));
-    assertEquals(new Instant(5), item2.getTimestamp());
+    assertEquals(new Instant(20), item2.getTimestamp());
     assertThat(item2.getWindows(), Matchers.<BoundedWindow>contains(window(10, 30)));
   }
 

From cbe82f78db83170b6c36efaf90fbd3e8e16c912e Mon Sep 17 00:00:00 2001
From: samuelw <samuelw@google.com>
Date: Mon, 20 Jul 2015 17:56:41 -0700
Subject: [PATCH 0779/1541] Fix pushback logic in StreamingDataflowWorker

Values were not reevaluated meaning we never exited pushback once it was entered.
----Release Notes----
Fix pushback logic in StreamingDataflowWorker causing it to stop processing.
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98694659
---
 .../worker/StreamingDataflowWorker.java       | 44 ++++++++++++-------
 .../worker/StreamingDataflowWorkerTest.java   | 11 +++++
 2 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 5f8d821e6099b..e85a390d45d08 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -339,27 +339,39 @@ private static void sleep(int millis) {
     }
   }
 
+  private static long lastPushbackLog = 0;
+
+  protected static boolean inPushback(Runtime rt) {
+    // If free memory is less than a percentage of total memory, block
+    // until current work drains and memory is released.
+    // Also force a GC to try to get under the memory threshold if possible.
+    long currentMemorySize = rt.totalMemory();
+    long memoryUsed = currentMemorySize - rt.freeMemory();
+    long maxMemory = rt.maxMemory();
+
+    if (memoryUsed <= maxMemory * PUSHBACK_THRESHOLD_RATIO) {
+      return false;
+    }
+
+    if (lastPushbackLog < System.currentTimeMillis() - 60 * 1000) {
+      LOG.warn(
+          "In pushback, not accepting new work. Using {}MB / {}MB ({}MB currently used by JVM)",
+          memoryUsed >> 20, maxMemory >> 20, currentMemorySize >> 20);
+      lastPushbackLog = System.currentTimeMillis();
+    }
+
+    return true;
+  }
+
   private void dispatchLoop() {
     LOG.info("Dispatch starting");
     Runtime rt = Runtime.getRuntime();
-    long lastPushbackLog = 0;
     while (running.get()) {
-
-      // If free memory is less than a percentage of total memory, block
-      // until current work drains and memory is released.
-      // Also force a GC to try to get under the memory threshold if possible.
-      long currentMemorySize = rt.totalMemory();
-      long memoryUsed = currentMemorySize - rt.freeMemory();
-      long maxMemory = rt.maxMemory();
-
-      while (memoryUsed > maxMemory * PUSHBACK_THRESHOLD_RATIO) {
-        if (lastPushbackLog < (lastPushbackLog = System.currentTimeMillis()) - 60 * 1000) {
-          LOG.warn(
-              "In pushback, not accepting new work. Using {}MB / {}MB ({}MB currently used by JVM)",
-              memoryUsed >> 20, maxMemory >> 20, currentMemorySize >> 20);
-          System.gc();
+      if (inPushback(rt)) {
+        System.gc();
+        while (inPushback(rt)) {
+          sleep(10);
         }
-        sleep(10);
       }
 
       int backoff = 1;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 1852b3180f027..cafe6a4598235 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -1078,4 +1078,15 @@ public void testActiveWork() throws Exception {
     assertEquals(true, activeWork.activateWork(key1, new MockWork(1)));
     activeWork.completeWork(key1);
   }
+
+  @Test
+  public void testPushback() throws Exception {
+    Runtime r = Mockito.mock(Runtime.class);
+    Mockito.when(r.maxMemory()).thenReturn(100000000L);
+    Mockito.when(r.freeMemory()).thenReturn(80000000L, 5000000L, 5000000L);
+    Mockito.when(r.totalMemory()).thenReturn(90000000L, 98000000L, 40000000L);
+    assertEquals(false, StreamingDataflowWorker.inPushback(r));
+    assertEquals(true, StreamingDataflowWorker.inPushback(r));
+    assertEquals(false, StreamingDataflowWorker.inPushback(r));
+  }
 }

From 3eed9949b50eccd139cd6ff872d2b7bcaaa755ce Mon Sep 17 00:00:00 2001
From: colinreid <colinreid@google.com>
Date: Thu, 7 May 2015 21:21:42 -0700
Subject: [PATCH 0780/1541] Logging improvements for stage and step

Adding automatic logging of stage execution in batch mode, and improving the accuracy of step information reported to Cloud Logging for batch and streaming.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98704577
---
 .../runners/worker/DataflowWorkerHarness.java | 44 +++++++++++++++++--
 .../cloud/dataflow/sdk/util/DoFnRunner.java   | 26 ++++++-----
 .../sdk/util/common/worker/Operation.java     |  3 --
 3 files changed, 56 insertions(+), 17 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index f5d486b183b77..3d616fa6c28f4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -44,6 +44,7 @@
 
 import org.joda.time.DateTime;
 import org.joda.time.Duration;
+import org.joda.time.Interval;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -201,6 +202,16 @@ static DataflowWorker create(DataflowWorkerHarnessOptions options) {
    */
   @ThreadSafe
   static class DataflowWorkUnitClient extends DataflowWorker.WorkUnitClient {
+    /**
+     * Work items are reported as complete using this class's reportWorkItemStatus() method
+     * on the same thread that requested the item using getWorkItem().
+     * This thread local variable is used to tag the current thread with the stage start time
+     * during getWorkItem() so that the elapsed execution time can be easily determined in
+     * reportWorkItemStatus(). A similar thread-local mechanism is used in DataflowWorkerLoggingMDC
+     * to track other metadata about the current operation being executed.
+     */
+    private static final ThreadLocal<DateTime> stageStartTime = new ThreadLocal<>();
+
     private final Dataflow dataflow;
     private final DataflowWorkerHarnessOptions options;
 
@@ -225,7 +236,10 @@ static DataflowWorkUnitClient fromOptions(DataflowWorkerHarnessOptions options)
     }
 
     /**
-     * Gets a WorkItem from the Dataflow service.
+     * Gets a {@link WorkItem} from the Dataflow service, or returns null if no work was found.
+     *
+     * <p> If work is returned, the calling thread should call reportWorkItemStatus after completing
+     * it and before requesting another work item.
      */
     @Override
     public WorkItem getWorkItem() throws IOException {
@@ -269,31 +283,53 @@ public WorkItem getWorkItem() throws IOException {
 
       // Capture the work item's stage name.
       if (work.getMapTask() != null) {
-        DataflowWorkerLoggingMDC.setStageName(work.getMapTask().getStageName());
+        String stage = work.getMapTask().getStageName();
+        DataflowWorkerLoggingMDC.setStageName(stage);
+        LOG.info("Starting MapTask stage {}", stage);
       } else if (work.getSeqMapTask() != null) {
-        DataflowWorkerLoggingMDC.setStageName(work.getSeqMapTask().getStageName());
+        String stage = work.getSeqMapTask().getStageName();
+        DataflowWorkerLoggingMDC.setStageName(stage);
+        LOG.info("Starting SeqMapTask stage {}", stage);
       } else {
         // Only MapTask and SeqMapTask currently have a stage name.
         DataflowWorkerLoggingMDC.setStageName(null);
       }
 
+      stageStartTime.set(DateTime.now());
       DataflowWorkerLoggingMDC.setWorkId(Long.toString(work.getId()));
       // Looks like the work's a'ight.
       return work;
     }
 
+    /**
+     * Reports the status of the most recently requested work item.
+     */
     @Override
     public WorkItemServiceState reportWorkItemStatus(WorkItemStatus workItemStatus)
         throws IOException {
+      DateTime endTime = DateTime.now();
       workItemStatus.setFactory(Transport.getJsonFactory());
       LOG.debug("Reporting work status: {}", workItemStatus);
+      // Log the stage execution time of finished stages that have a stage name.
+      if (workItemStatus.getCompleted() && DataflowWorkerLoggingMDC.getStageName() != null) {
+        DateTime startTime = stageStartTime.get();
+        if (startTime != null) {
+          // This thread should have been tagged with the stage start time during getWorkItem(),
+          Interval elapsed = new Interval(startTime, endTime);
+          int numErrors = workItemStatus.getErrors() == null
+              ? 0 : workItemStatus.getErrors().size();
+          LOG.info("Finished processing stage {} with {} errors in {} seconds ",
+              DataflowWorkerLoggingMDC.getStageName(), numErrors,
+              (double) elapsed.toDurationMillis() / 1000);
+        }
+      }
       ReportWorkItemStatusResponse result =
           dataflow.projects().jobs().workItems().reportStatus(
               options.getProject(), options.getJobId(),
               new ReportWorkItemStatusRequest()
               .setWorkerId(options.getWorkerId())
               .setWorkItemStatuses(Collections.singletonList(workItemStatus))
-              .setCurrentWorkerTime(toCloudTime(DateTime.now())))
+              .setCurrentWorkerTime(toCloudTime(endTime)))
           .execute();
       if (result == null || result.getWorkItemServiceStates() == null
           || result.getWorkItemServiceStates().size() != 1) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 92f2afcca3101..56313077d20f1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
@@ -146,17 +147,22 @@ public void startBundle() {
    * the current element.
    */
   public void processElement(WindowedValue<InputT> elem) {
-    if (elem.getWindows().size() <= 1
-        || (!RequiresWindowAccess.class.isAssignableFrom(fn.getClass())
-            && context.sideInputReader.isEmpty())) {
-      invokeProcessElement(elem);
-    } else {
-      // We could modify the windowed value (and the processContext) to
-      // avoid repeated allocations, but this is more straightforward.
-      for (BoundedWindow window : elem.getWindows()) {
-        invokeProcessElement(WindowedValue.of(
-            elem.getValue(), elem.getTimestamp(), window, elem.getPane()));
+    DataflowWorkerLoggingMDC.setStepName(context.stepContext.getStepName());
+    try {
+      if (elem.getWindows().size() <= 1
+          || (!RequiresWindowAccess.class.isAssignableFrom(fn.getClass())
+          && context.sideInputReader.isEmpty())) {
+        invokeProcessElement(elem);
+      } else {
+        // We could modify the windowed value (and the processContext) to
+        // avoid repeated allocations, but this is more straightforward.
+        for (BoundedWindow window : elem.getWindows()) {
+          invokeProcessElement(WindowedValue.of(
+              elem.getValue(), elem.getTimestamp(), window, elem.getPane()));
+        }
       }
+    } finally {
+      DataflowWorkerLoggingMDC.setStepName(null);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
index ea32f33407bf1..b38ccf194b056 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
-import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 
 /**
@@ -140,7 +139,6 @@ public void start() throws Exception {
       checkUnstarted();
       initializationState = InitializationState.STARTED;
     }
-    DataflowWorkerLoggingMDC.setStepName(operationName);
   }
 
   /**
@@ -148,7 +146,6 @@ public void start() throws Exception {
    * predecessor producing operations have been finished.
    */
   public void finish() throws Exception {
-    DataflowWorkerLoggingMDC.setStepName(null);
     synchronized (initializationStateLock) {
       checkStarted();
       initializationState = InitializationState.FINISHED;

From ca2a25f786f7dfa6edf9c5c0fe6ddf589bcce025 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 21 Jul 2015 08:11:02 -0700
Subject: [PATCH 0781/1541] List suggested properties when parsing command line
 arguments

We will suggest close matches to a property when PipelineOptionsFactory is parsing the command line arguments if the typo is at most 2 characters.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98735623
---
 .../sdk/options/PipelineOptionsFactory.java   | 32 ++++++++++--
 .../cloud/dataflow/sdk/util/StringUtils.java  | 52 +++++++++++++++++++
 .../options/PipelineOptionsFactoryTest.java   | 25 +++++++++
 .../dataflow/sdk/util/StringUtilsTest.java    | 11 ++++
 4 files changed, 117 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index b349ae47cd1fb..0e3af4d2180fe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunnerRegistrar;
 import com.google.cloud.dataflow.sdk.runners.worker.DataflowWorkerHarness;
+import com.google.cloud.dataflow.sdk.util.StringUtils;
 import com.google.cloud.dataflow.sdk.util.common.ReflectHelpers;
 import com.google.common.base.Function;
 import com.google.common.base.Joiner;
@@ -74,6 +75,8 @@
 import java.util.SortedSet;
 import java.util.TreeSet;
 
+import javax.annotation.Nullable;
+
 /**
  * Constructs a {@link PipelineOptions} or any derived interface that is composable to any other
  * derived interface of {@link PipelineOptions} via the {@link PipelineOptions#as} method. Being
@@ -1216,10 +1219,33 @@ private static <T extends PipelineOptions> Map<String, Object> parseObjects(
       propertyNamesToGetters.put(descriptor.getName(), descriptor.getReadMethod());
     }
     Map<String, Object> convertedOptions = Maps.newHashMap();
-    for (Map.Entry<String, Collection<String>> entry : options.asMap().entrySet()) {
+    for (final Map.Entry<String, Collection<String>> entry : options.asMap().entrySet()) {
       try {
-        Preconditions.checkArgument(propertyNamesToGetters.containsKey(entry.getKey()),
-            "Class %s missing a property named '%s'", klass, entry.getKey());
+        // Search for close matches for missing properties.
+        // Either off by one or off by two character errors.
+        if (!propertyNamesToGetters.containsKey(entry.getKey())) {
+          SortedSet<String> closestMatches = new TreeSet<String>(
+              Sets.filter(propertyNamesToGetters.keySet(), new Predicate<String>() {
+                @Override
+                public boolean apply(@Nullable String input) {
+                  return StringUtils.getLevenshteinDistance(entry.getKey(), input) <= 2;
+                }
+          }));
+          switch (closestMatches.size()) {
+            case 0:
+              throw new IllegalArgumentException(
+                  String.format("Class %s missing a property named '%s'.",
+                      klass, entry.getKey()));
+            case 1:
+              throw new IllegalArgumentException(
+                  String.format("Class %s missing a property named '%s'. Did you mean '%s'?",
+                      klass, entry.getKey(), Iterables.getOnlyElement(closestMatches)));
+            default:
+              throw new IllegalArgumentException(
+                  String.format("Class %s missing a property named '%s'. Did you mean one of %s?",
+                      klass, entry.getKey(), closestMatches));
+          }
+        }
 
         Method method = propertyNamesToGetters.get(entry.getKey());
         // Only allow empty argument values for String, String Array, and Collection.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
index 201068830f08a..31c44882aade0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
@@ -151,6 +151,58 @@ public static String approximatePTransformName(Class<?> clazz) {
         .replaceFirst("\\.Bound$", "");
   }
 
+  /**
+   * Calculate the Levenshtein distance between two strings.
+   *
+   * <p> The Levenshtein distance between two words is the minimum number of single-character edits
+   * (i.e. insertions, deletions or substitutions) required to change one string into the other.
+   */
+  public static int getLevenshteinDistance(final String s, final String t) {
+    Preconditions.checkNotNull(s);
+    Preconditions.checkNotNull(t);
+
+    // base cases
+    if (s.equals(t)) {
+      return 0;
+    }
+    if (s.length() == 0) {
+      return t.length();
+    }
+    if (t.length() == 0) {
+      return s.length();
+    }
+
+    // create two work arrays to store integer distances
+    final int[] v0 = new int[t.length() + 1];
+    final int[] v1 = new int[t.length() + 1];
+
+    // initialize v0 (the previous row of distances)
+    // this row is A[0][i]: edit distance for an empty s
+    // the distance is just the number of characters to delete from t
+    for (int i = 0; i < v0.length; i++) {
+      v0[i] = i;
+    }
+
+    for (int i = 0; i < s.length(); i++) {
+      // calculate v1 (current row distances) from the previous row v0
+
+      // first element of v1 is A[i+1][0]
+      //   edit distance is delete (i+1) chars from s to match empty t
+      v1[0] = i + 1;
+
+      // use formula to fill in the rest of the row
+      for (int j = 0; j < t.length(); j++) {
+        int cost = (s.charAt(i) == t.charAt(j)) ? 0 : 1;
+        v1[j + 1] = Math.min(Math.min(v1[j] + 1, v0[j + 1] + 1), v0[j] + cost);
+      }
+
+      // copy v1 (current row) to v0 (previous row) for next iteration
+      System.arraycopy(v1, 0, v0, 0, v0.length);
+    }
+
+    return v1[t.length()];
+  }
+
   private static String approximateSimpleName(Class<?> clazz, boolean dropOuterClassNames) {
     Preconditions.checkArgument(!clazz.isAnonymousClass(),
         "Attempted to get simple name of anonymous class");
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index 547b4171f2594..895c1e94ae272 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -607,6 +607,31 @@ public void testUsingArgumentWithUnknownPropertyIsNotAllowed() {
     PipelineOptionsFactory.fromArgs(args).create();
   }
 
+  interface SuggestedOptions extends PipelineOptions {
+    String getAbc();
+    void setAbc(String value);
+
+    String getAbcdefg();
+    void setAbcdefg(String value);
+  }
+
+  @Test
+  public void testUsingArgumentWithMisspelledPropertyGivesASuggestion() {
+    String[] args = new String[] {"--ab=value"};
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("missing a property named 'ab'. Did you mean 'abc'?");
+    PipelineOptionsFactory.fromArgs(args).as(SuggestedOptions.class);
+  }
+
+  @Test
+  public void testUsingArgumentWithMisspelledPropertyGivesMultipleSuggestions() {
+    String[] args = new String[] {"--abcde=value"};
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage(
+        "missing a property named 'abcde'. Did you mean one of [abc, abcdefg]?");
+    PipelineOptionsFactory.fromArgs(args).as(SuggestedOptions.class);
+  }
+
   @Test
   public void testUsingArgumentWithUnknownPropertyIsIgnoredWithoutStrictParsing() {
     String[] args = new String[] {"--unknownProperty=value"};
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java
index abb97805f0c03..bf55cabce9723 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java
@@ -137,4 +137,15 @@ public Object getInnerClassInstance() {
         StringUtils.approximatePTransformName(
             anonymousClassObj.getInnerClassInstance().getClass()));
   }
+
+  @Test
+  public void testLevenshteinDistance() {
+    assertEquals(0, StringUtils.getLevenshteinDistance("", "")); // equal
+    assertEquals(3, StringUtils.getLevenshteinDistance("", "abc")); // first empty
+    assertEquals(3, StringUtils.getLevenshteinDistance("abc", "")); // second empty
+    assertEquals(5, StringUtils.getLevenshteinDistance("abc", "12345")); // completely different
+    assertEquals(1, StringUtils.getLevenshteinDistance("abc", "ac")); // deletion
+    assertEquals(1, StringUtils.getLevenshteinDistance("abc", "ab1c")); // insertion
+    assertEquals(1, StringUtils.getLevenshteinDistance("abc", "a1c")); // modification
+  }
 }

From 12ee1e4fa7f9b7a3a8d568edba395a30c7c590ee Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 21 Jul 2015 10:09:56 -0700
Subject: [PATCH 0782/1541] Add tests for merging the various composite
 triggers

Run AfterEach.inOrder in parallel so that merging has enough information
to keep going.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98745200
---
 .../sdk/transforms/windowing/AfterEach.java   | 23 ++++++++--
 .../sdk/transforms/windowing/Trigger.java     |  8 ++++
 .../sdk/util/TriggerContextFactory.java       |  5 ++
 .../transforms/windowing/AfterAllTest.java    | 34 ++++++++++++++
 .../transforms/windowing/AfterEachTest.java   | 46 +++++++++++++++++--
 .../transforms/windowing/AfterFirstTest.java  | 33 +++++++++++++
 .../windowing/OrFinallyTriggerTest.java       | 32 +++++++++++++
 7 files changed, 175 insertions(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index 506e4cb64ecbf..3a1a3495acf92 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -37,7 +37,7 @@
  * <p> The following properties hold:
  * <ul>
  *   <li> {@code AfterEach.inOrder(AfterEach.inOrder(a, b), c)} behaves the same as
- *   {@code AfterEach.inOrder(a, b, c)}
+ *   {@code AfterEach.inOrder(a, b, c)} and {@code AfterEach.inOrder(a, AfterEach.inOrder(b, c)}.
  *   <li> {@code AfterEach.inOrder(Repeatedly.forever(a), b)} behaves the same as
  *   {@code Repeatedly.forever(a)}, since the repeated trigger never finishes.
  * </ul>
@@ -72,10 +72,27 @@ private TriggerResult wrapResult(TriggerContext c, TriggerResult subResult)
 
   @Override
   public TriggerResult onElement(OnElementContext c) throws Exception {
+    Iterator<ExecutableTrigger<W>> iterator = c.trigger().unfinishedSubTriggers().iterator();
+
     // If all the sub-triggers have finished, we should have already finished, so we know there is
     // at least one unfinished trigger.
-    ExecutableTrigger<W> subTrigger = c.trigger().firstUnfinishedSubTrigger();
-    return wrapResult(c, subTrigger.invokeElement(c));
+    TriggerResult firstResult = iterator.next().invokeElement(c);
+
+    // If onMerge might be called, we need to make sure we have proper state for future triggers.
+    if (c.trigger().isMerging()) {
+      if (firstResult.isFire()) {
+        // If we're firing, clear out all of the later subtriggers, since we don't want to pollute
+        // their state.
+        resetRemaining(c, iterator);
+      } else {
+        // Otherwise, iterate over all of them to build up some state.
+        while (iterator.hasNext()) {
+          iterator.next().invokeElement(c);
+        }
+      }
+    }
+
+    return wrapResult(c, firstResult);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index b2fd2b540d55d..eaa2a291cb176 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -170,6 +170,14 @@ public TriggerResult getTriggerResult() {
    */
   public interface TriggerInfo<W extends BoundedWindow> {
 
+    /**
+     * Returns true if the windowing strategy of the current {@code PCollection} is a merging
+     * WindowFn. If true, the trigger execution needs to keep enough information to support the
+     * possibility of {@link Trigger#onMerge} being called. If false, {@link Trigger#onMerge} will
+     * never be called.
+     */
+    boolean isMerging();
+
     /**
      * Access the executable versions of the sub-triggers of the current trigger.
      */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
index 0c4ef26484fa5..c59555386bd09 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
@@ -101,6 +101,11 @@ public TriggerInfoImpl(
       this.context = context;
     }
 
+    @Override
+    public boolean isMerging() {
+      return !windowingStrategy.getWindowFn().isNonMerging();
+    }
+
     @Override
     public Iterable<ExecutableTrigger<W>> subTriggers() {
       return trigger.subTriggers();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index 3bc1b75958572..18c507892fe03 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -23,6 +23,7 @@
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
+import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
@@ -235,6 +236,39 @@ public void testAfterAllRealTriggersFixedWindow() throws Exception {
         new IntervalWindow(new Instant(0), new Instant(50)));
   }
 
+  @Test
+  public void testAfterAllMergingWindowSomeFinished() throws Exception {
+    Duration windowDuration = Duration.millis(10);
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        Sessions.withGapDuration(windowDuration),
+        AfterAll.<IntervalWindow>of(
+            AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
+                .plusDelayOf(Duration.millis(5)),
+            AfterPane.<IntervalWindow>elementCountAtLeast(5)),
+        AccumulationMode.ACCUMULATING_FIRED_PANES,
+        Duration.millis(100));
+
+    tester.advanceProcessingTime(new Instant(10));
+    tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 15
+    tester.advanceProcessingTime(new Instant(15));
+    tester.injectElement(2, new Instant(1)); // in [1, 11) count = 1
+    tester.injectElement(3, new Instant(2)); // in [2, 12), timer for 16
+
+    // Enough data comes in for 2 that combined, we should fire
+    tester.injectElement(4, new Instant(2));
+    tester.injectElement(5, new Instant(2));
+
+    tester.doMerge();
+
+    // This fires, because the earliest element in [1, 12) arrived at time 10
+    assertThat(tester.extractOutput(), Matchers.contains(WindowMatchers.isSingleWindowedValue(
+        Matchers.containsInAnyOrder(1, 2, 3, 4, 5), 1, 1, 12)));
+
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(1), new Instant(12)));
+  }
+
   @Test
   public void testContinuation() throws Exception {
     OnceTrigger<IntervalWindow> trigger1 = AfterProcessingTime.pastFirstElementInPane();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index b57430ed0b94d..4039fdb51d78c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -23,6 +23,7 @@
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
+import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
@@ -84,7 +85,7 @@ private void injectElement(int element, TriggerResult result1, TriggerResult res
   public void testOnElementT1Fires() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
 
-    injectElement(1, TriggerResult.CONTINUE, null);
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
     injectElement(2, TriggerResult.FIRE, null);
@@ -117,7 +118,7 @@ public void testOnElementT2Fires() throws Exception {
   public void testOnTimerFire() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
 
-    injectElement(1, TriggerResult.CONTINUE, null);
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executable1);
     when(mockTrigger1.onTimer(Mockito.isA(OnTimerContext.class)))
@@ -135,7 +136,7 @@ public void testOnTimerFire() throws Exception {
   public void testOnTimerFinish() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
 
-    injectElement(1, TriggerResult.CONTINUE, null);
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executable1);
     when(mockTrigger1.onTimer(Mockito.isA(OnTimerContext.class)))
@@ -241,6 +242,45 @@ public void testSequenceRealTriggersFixedWindow() throws Exception {
         new IntervalWindow(new Instant(0), new Instant(50)));
   }
 
+  @Test
+  public void testAfterEachMergingWindowSomeFinished() throws Exception {
+    Duration windowDuration = Duration.millis(10);
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        Sessions.withGapDuration(windowDuration),
+        AfterEach.<IntervalWindow>inOrder(
+            AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
+                .plusDelayOf(Duration.millis(5)),
+            AfterPane.<IntervalWindow>elementCountAtLeast(5)),
+        AccumulationMode.ACCUMULATING_FIRED_PANES,
+        Duration.millis(100));
+
+    tester.advanceProcessingTime(new Instant(10));
+    tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 15
+    tester.advanceProcessingTime(new Instant(15));
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.contains(1), 1, 1, 11)));
+
+    tester.injectElement(2, new Instant(1)); // in [1, 11) count = 1
+    tester.injectElement(3, new Instant(2)); // in [2, 12), timer for 16
+
+    // [2, 12) tries to fire, but gets merged; count = 2
+    tester.advanceProcessingTime(new Instant(30));
+
+    tester.injectElement(4, new Instant(1));
+    tester.injectElement(5, new Instant(2));
+    tester.injectElement(6, new Instant(1)); // count = 5, but need to call merge to discover this
+
+    tester.doMerge();
+
+    // This fires, because the earliest element in [1, 12) arrived at time 10
+    assertThat(tester.extractOutput(), Matchers.contains(WindowMatchers.isSingleWindowedValue(
+        Matchers.containsInAnyOrder(1, 2, 3, 4, 5, 6), 1, 1, 12)));
+
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(1), new Instant(12)));
+  }
+
   @Test
   public void testContinuation() throws Exception {
     OnceTrigger<IntervalWindow> trigger1 = AfterProcessingTime.pastFirstElementInPane();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index 355bd332841a7..05d8cd69956aa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -23,6 +23,7 @@
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
+import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
@@ -232,6 +233,38 @@ public void testAfterFirstRealTriggersFixedWindow() throws Exception {
         new IntervalWindow(new Instant(0), new Instant(50)));
   }
 
+  @Test
+  public void testAfterFirstMergingWindowSomeFinished() throws Exception {
+    Duration windowDuration = Duration.millis(10);
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        Sessions.withGapDuration(windowDuration),
+        AfterFirst.<IntervalWindow>of(
+            AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
+                .plusDelayOf(Duration.millis(5)),
+            AfterPane.<IntervalWindow>elementCountAtLeast(5)),
+        AccumulationMode.ACCUMULATING_FIRED_PANES,
+        Duration.millis(100));
+
+    tester.advanceProcessingTime(new Instant(10));
+    tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 15
+    tester.injectElement(2, new Instant(1)); // in [1, 11) count = 1
+    tester.injectElement(3, new Instant(2)); // in [2, 12), timer for 16
+
+    // Enough data comes in for 2 that combined, we should fire
+    tester.injectElement(4, new Instant(2));
+    tester.injectElement(5, new Instant(2));
+
+    tester.doMerge();
+
+    // This fires, because the earliest element in [1, 12) arrived at time 10
+    assertThat(tester.extractOutput(), Matchers.contains(WindowMatchers.isSingleWindowedValue(
+        Matchers.containsInAnyOrder(1, 2, 3, 4, 5), 1, 1, 12)));
+
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(1), new Instant(12)));
+  }
+
   @Test
   public void testContinuation() throws Exception {
     OnceTrigger<IntervalWindow> trigger1 = AfterProcessingTime.pastFirstElementInPane();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
index 1e3bc8d533351..3a996c722150d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
@@ -22,6 +22,7 @@
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
+import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
@@ -300,6 +301,37 @@ public void testOrFinallyRealTriggersFixedWindow() throws Exception {
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
   }
 
+  @Test
+  public void testOrFinallyMergingWindowSomeFinished() throws Exception {
+    Duration windowDuration = Duration.millis(10);
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        Sessions.withGapDuration(windowDuration),
+        AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
+            .plusDelayOf(Duration.millis(5))
+            .orFinally(AfterPane.<IntervalWindow>elementCountAtLeast(5)),
+        AccumulationMode.ACCUMULATING_FIRED_PANES,
+        Duration.millis(100));
+
+    tester.advanceProcessingTime(new Instant(10));
+    tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 15
+    tester.injectElement(2, new Instant(1)); // in [1, 11) count = 1
+    tester.injectElement(3, new Instant(2)); // in [2, 12), timer for 16
+
+    // Enough data comes in for 2 that combined, we should fire
+    tester.injectElement(4, new Instant(2));
+    tester.injectElement(5, new Instant(2));
+
+    tester.doMerge();
+
+    // This fires, because the earliest element in [1, 12) arrived at time 10
+    assertThat(tester.extractOutput(), Matchers.contains(WindowMatchers.isSingleWindowedValue(
+        Matchers.containsInAnyOrder(1, 2, 3, 4, 5), 1, 1, 12)));
+
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(1), new Instant(12)));
+  }
+
   @Test
   public void testContinuation() throws Exception {
     OnceTrigger<IntervalWindow> triggerA = AfterProcessingTime.pastFirstElementInPane();

From a77283dac94439b3515b70d905ff28502a4241ba Mon Sep 17 00:00:00 2001
From: colinreid <colinreid@google.com>
Date: Tue, 21 Jul 2015 11:25:10 -0700
Subject: [PATCH 0783/1541] Restore step name context for logging after a DoFn
 returns output()

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98754035
---
 .../java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 56313077d20f1..b136ca0e4753d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -147,6 +147,10 @@ public void startBundle() {
    * the current element.
    */
   public void processElement(WindowedValue<InputT> elem) {
+    // Setup new thread local logging before running user code, and restore it after.
+    // Needs to happen here (per-element) since fusion may be running this as part of a call to
+    // output in an earlier step.
+    String previousStepName = DataflowWorkerLoggingMDC.getStepName();
     DataflowWorkerLoggingMDC.setStepName(context.stepContext.getStepName());
     try {
       if (elem.getWindows().size() <= 1
@@ -162,7 +166,7 @@ public void processElement(WindowedValue<InputT> elem) {
         }
       }
     } finally {
-      DataflowWorkerLoggingMDC.setStepName(null);
+      DataflowWorkerLoggingMDC.setStepName(previousStepName);
     }
   }
 

From 8ce574225bb085db28ff1271092c4aed6f99f29e Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Tue, 21 Jul 2015 12:29:30 -0700
Subject: [PATCH 0784/1541] Change state_family proto field back to string

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98760362
---
 sdk/src/main/proto/windmill.proto | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index adc7c827a3adb..cb3a86ec15fb4 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -44,7 +44,7 @@ message Timer {
     DEPENDENT_REALTIME = 2;
   }
   optional Type type = 3 [default = WATERMARK];
-  optional bytes state_family = 4;
+  optional string state_family = 4;
 }
 
 message InputMessageBundle {
@@ -83,14 +83,14 @@ message Value {
 message TagValue {
   required bytes tag = 1;
   optional Value value = 2;
-  optional bytes state_family = 3;
+  optional string state_family = 3;
 }
 
 message TagList {
   required bytes tag = 1;
   optional int64 end_timestamp = 2 [default=-0x8000000000000000];
   repeated Value values = 3;
-  optional bytes state_family = 4;
+  optional string state_family = 4;
 }
 
 message GlobalDataId {

From c39f48a0d83e594bb3f92bcef1aab44dc37f36e8 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Tue, 21 Jul 2015 12:41:40 -0700
Subject: [PATCH 0785/1541] Minor test change: adds a user-provided name to
 ease testing of counters

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98761829
---
 .../com/google/cloud/dataflow/sdk/transforms/ParDoTest.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 251035b378355..bb274a0b73373 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -361,7 +361,7 @@ public void testParDoEmpty() {
 
     PCollection<String> output = p
         .apply(Create.of(inputs).withCoder(VarIntCoder.of()))
-        .apply(ParDo.of(new TestDoFn()));
+        .apply("TestDoFn", ParDo.of(new TestDoFn()));
 
     DataflowAssert.that(output)
         .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs));

From 0ca041fcc6ae4a59400ad94773ef355e378036a9 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 21 Jul 2015 12:46:00 -0700
Subject: [PATCH 0786/1541] Multiplex timers in SDK

Runners may not guarantee that all timers for a given timestamp arrive
in the same work unit. They also may not guarantee the order of timers
arriving for a specific timestamp.

To alleviate both of these problems, update the SDK to multiplex several
SDK timers on a single runner timer. Specifically, every timer is now
uniquely identified by a TimerData, made up of the Window (represented as
a StateNamespace), time domain and timestamp.

When a timer fires, the SDK provides that information to all possibly
interested parties, but does so in a deterministic order, ensuring that
the final GC timer fires before other triggers, etc.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98762230
---
 .../worker/WindowingWindmillReader.java       |  40 +++-
 .../sdk/transforms/windowing/AfterAll.java    |  14 +-
 .../sdk/transforms/windowing/AfterEach.java   |  12 +-
 .../sdk/transforms/windowing/AfterFirst.java  |  12 +-
 .../windowing/AfterProcessingTime.java        |  19 +-
 .../AfterSynchronizedProcessingTime.java      |  19 +-
 .../transforms/windowing/AfterWatermark.java  |  29 ++-
 .../transforms/windowing/DefaultTrigger.java  |   4 +-
 .../windowing/OrFinallyTrigger.java           |  13 +-
 .../sdk/transforms/windowing/Repeatedly.java  |   4 -
 .../sdk/transforms/windowing/Trigger.java     |  49 +----
 .../sdk/util/BatchModeExecutionContext.java   |  10 +-
 .../sdk/util/BatchTimerInternals.java         | 101 ++++++++++
 .../dataflow/sdk/util/BatchTimerManager.java  | 180 ------------------
 .../sdk/util/DirectModeExecutionContext.java  |   5 +
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |   5 +-
 .../dataflow/sdk/util/ExecutionContext.java   |   6 +-
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  15 +-
 .../dataflow/sdk/util/PaneInfoTracker.java    |   8 +-
 .../cloud/dataflow/sdk/util/ReduceFn.java     |  37 +++-
 .../sdk/util/ReduceFnContextFactory.java      |  74 ++++++-
 .../dataflow/sdk/util/ReduceFnRunner.java     |  83 ++++----
 .../util/StreamingGroupAlsoByWindowsDoFn.java |  44 +++--
 .../util/StreamingModeExecutionContext.java   | 140 ++++++++------
 .../cloud/dataflow/sdk/util/TimeDomain.java   |  41 ++++
 .../dataflow/sdk/util/TimerInternals.java     | 126 ++++++++++++
 .../cloud/dataflow/sdk/util/TimerManager.java |  77 --------
 .../dataflow/sdk/util/TimerOrElement.java     |  52 ++---
 .../sdk/util/TriggerContextFactory.java       |  99 ++++------
 .../dataflow/sdk/util/TriggerRunner.java      |   5 +-
 .../dataflow/sdk/util/WindowingInternals.java |   2 +-
 .../worker/StreamingDataflowWorkerTest.java   | 129 +++++--------
 .../transforms/windowing/AfterAllTest.java    |  13 +-
 .../transforms/windowing/AfterEachTest.java   |  10 +-
 .../transforms/windowing/AfterFirstTest.java  |  37 +++-
 .../windowing/AfterProcessingTimeTest.java    |  19 ++
 .../AfterSynchronizedProcessingTimeTest.java  |  19 +-
 .../windowing/AfterWatermarkTest.java         |  17 ++
 .../windowing/OrFinallyTriggerTest.java       |  44 +----
 .../transforms/windowing/RepeatedlyTest.java  |  21 +-
 .../StreamingGroupAlsoByWindowsDoFnTest.java  | 107 +++++------
 .../StreamingModeExecutionContextTest.java    |  28 ++-
 .../sdk/util/TriggerExecutorTest.java         |   7 +-
 .../dataflow/sdk/util/TriggerTester.java      |  24 ++-
 44 files changed, 950 insertions(+), 850 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeDomain.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
index b1b5c4db19d06..82a597c0c5c0c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
@@ -25,11 +25,15 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.TimerOrElement;
 import com.google.cloud.dataflow.sdk.util.TimerOrElement.TimerOrElementCoder;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
 import com.google.cloud.dataflow.sdk.values.KV;
 
 import org.joda.time.Instant;
@@ -46,6 +50,7 @@
  */
 class WindowingWindmillReader<T> extends Reader<WindowedValue<TimerOrElement<T>>> {
   private final Coder<T> valueCoder;
+  private final Coder<? extends BoundedWindow> windowCoder;
   private final Coder<Collection<? extends BoundedWindow>> windowsCoder;
   private StreamingModeExecutionContext context;
 
@@ -54,6 +59,7 @@ class WindowingWindmillReader<T> extends Reader<WindowedValue<TimerOrElement<T>>
     FullWindowedValueCoder<TimerOrElement<T>> inputCoder =
         (FullWindowedValueCoder<TimerOrElement<T>>) coder;
     this.windowsCoder = inputCoder.getWindowsCoder();
+    this.windowCoder = inputCoder.getWindowCoder();
     this.valueCoder = ((TimerOrElementCoder<T>) inputCoder.getValueCoder()).getElementCoder();
     this.context = context;
   }
@@ -92,23 +98,43 @@ public boolean hasNext() throws IOException {
       return hasMoreMessages() || hasMoreTimers();
     }
 
+    private TimeDomain getTimeDomain(Windmill.Timer.Type type) {
+      switch (type) {
+        case REALTIME:
+          return TimeDomain.PROCESSING_TIME;
+        case DEPENDENT_REALTIME:
+          return TimeDomain.SYNCHRONIZED_PROCESSING_TIME;
+        case WATERMARK:
+          return TimeDomain.EVENT_TIME;
+        default:
+          throw new IllegalArgumentException("Unsupported timer type " + type);
+      }
+    }
+
+    private <W extends BoundedWindow> WindowedValue<TimerOrElement<T>> createTimer(
+        Object key, Windmill.Timer timer) {
+      String tag = timer.getTag().toStringUtf8();
+      String namespaceString = tag.substring(0, tag.indexOf('+'));
+      StateNamespace namespace = StateNamespaces.fromString(namespaceString, windowCoder);
+
+      Instant timestamp = new Instant(TimeUnit.MICROSECONDS.toMillis(timer.getTimestamp()));
+      TimerData timerData = TimerData.of(namespace, timestamp, getTimeDomain(timer.getType()));
+
+      return WindowedValue.<TimerOrElement<T>>of(
+          TimerOrElement.<T>timer(key, timerData), timestamp, new ArrayList<W>(), PaneInfo.DEFAULT);
+    }
+
     @Override
     public WindowedValue<TimerOrElement<T>> next() throws IOException {
       if (hasMoreTimers()) {
         if (valueCoder instanceof KvCoder) {
           Windmill.Timer timer = context.getWork().getTimers().getTimers(timerIndex++);
-          long timestampMillis = TimeUnit.MICROSECONDS.toMillis(timer.getTimestamp());
 
           KvCoder kvCoder = (KvCoder) valueCoder;
           Object key = kvCoder.getKeyCoder().decode(
               context.getSerializedKey().newInput(), Coder.Context.OUTER);
 
-          return WindowedValue.of(TimerOrElement.<T>timer(timer.getTag().toStringUtf8(),
-                                                          new Instant(timestampMillis),
-                                                          key),
-                                  new Instant(timestampMillis),
-                                  new ArrayList(),
-                                  PaneInfo.DEFAULT);
+          return createTimer(key, timer);
         } else {
           throw new RuntimeException("Timer set on non-keyed DoFn");
         }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
index 3715b506cb52b..d3c6a1d5b1d9a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
@@ -48,7 +48,7 @@ public static <W extends BoundedWindow> OnceTrigger<W> of(
     return new AfterAll<W>(Arrays.<Trigger<W>>asList(triggers));
   }
 
-  private TriggerResult wrapResult(TriggerContext c) {
+  private TriggerResult result(TriggerContext c) {
     // If all children have finished, then they must have each fired at least once.
     if (c.trigger().areAllSubtriggersFinished()) {
       return TriggerResult.FIRE_AND_FINISH;
@@ -65,7 +65,7 @@ public TriggerResult onElement(OnElementContext c) throws Exception {
       subTrigger.invokeElement(c);
     }
 
-    return wrapResult(c);
+    return result(c);
   }
 
   @Override
@@ -88,13 +88,13 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
 
   @Override
   public TriggerResult onTimer(OnTimerContext c) throws Exception {
-    if (c.isDestination()) {
-      throw new IllegalStateException("AfterAll shouldn't receive any timers.");
+    for (ExecutableTrigger<W> subTrigger : c.trigger().unfinishedSubTriggers()) {
+      // Since subTriggers are all OnceTriggers, they must either CONTINUE or FIRE_AND_FINISH.
+      // invokeTimer will automatically mark the finish bit if they return FIRE_AND_FINISH.
+      subTrigger.invokeTimer(c);
     }
 
-    ExecutableTrigger<W> subTrigger = c.nextStepTowardsDestination();
-    subTrigger.invokeTimer(c);
-    return wrapResult(c);
+    return result(c);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index 3a1a3495acf92..3108672bd00a6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -60,7 +60,7 @@ public static <W extends BoundedWindow> Trigger<W> inOrder(Trigger<W>... trigger
     return new AfterEach<W>(Arrays.<Trigger<W>>asList(triggers));
   }
 
-  private TriggerResult wrapResult(TriggerContext c, TriggerResult subResult)
+  private TriggerResult result(TriggerContext c, TriggerResult subResult)
       throws Exception {
     if (subResult.isFire()) {
       return c.trigger().areAllSubtriggersFinished()
@@ -92,7 +92,7 @@ public TriggerResult onElement(OnElementContext c) throws Exception {
       }
     }
 
-    return wrapResult(c, firstResult);
+    return result(c, firstResult);
   }
 
   @Override
@@ -135,12 +135,8 @@ private void resetRemaining(
 
   @Override
   public TriggerResult onTimer(OnTimerContext c) throws Exception {
-    if (c.isDestination()) {
-      throw new IllegalStateException("AfterEach shouldn't receive timers.");
-    }
-
-    ExecutableTrigger<W> timerChild = c.nextStepTowardsDestination();
-    return wrapResult(c, timerChild.invokeTimer(c));
+    // Only deliver to the currently active subtrigger
+    return result(c, c.trigger().firstUnfinishedSubTrigger().invokeTimer(c));
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
index 45a3291f7eb65..01e0f66ca72a5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
@@ -79,14 +79,12 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
 
   @Override
   public TriggerResult onTimer(OnTimerContext c) throws Exception {
-    if (c.isDestination()) {
-      throw new IllegalStateException("AfterFirst shouldn't receive any timers.");
+    for (ExecutableTrigger<W> subTrigger : c.trigger().subTriggers()) {
+      if (subTrigger.invokeTimer(c).isFire()) {
+        return TriggerResult.FIRE_AND_FINISH;
+      }
     }
-
-    ExecutableTrigger<W> subTrigger = c.nextStepTowardsDestination();
-    return subTrigger.invokeTimer(c).isFire()
-        ? TriggerResult.FIRE_AND_FINISH
-        : TriggerResult.CONTINUE;
+    return TriggerResult.CONTINUE;
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index bc830f2a857e7..006531a301ac9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -20,7 +20,7 @@
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.transforms.Min;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
@@ -101,13 +101,26 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
 
   @Override
   public TriggerResult onTimer(OnTimerContext c) throws Exception {
+    if (c.timeDomain() != TimeDomain.PROCESSING_TIME) {
+      return TriggerResult.CONTINUE;
+    }
+
+    Instant delayedUntil = c.state().access(DELAYED_UNTIL_TAG).get().read();
+    if (delayedUntil == null || delayedUntil.isAfter(c.timestamp())) {
+      return TriggerResult.CONTINUE;
+    }
+
     return TriggerResult.FIRE_AND_FINISH;
   }
 
   @Override
   public void clear(TriggerContext c) throws Exception {
-    c.state().access(DELAYED_UNTIL_TAG).clear();
-    c.timers().deleteTimer(TimeDomain.PROCESSING_TIME);
+    CombiningValueState<Instant, Instant> delayed = c.state().access(DELAYED_UNTIL_TAG);
+    Instant timestamp = delayed.get().read();
+    delayed.clear();
+    if (timestamp != null) {
+      c.timers().deleteTimer(timestamp, TimeDomain.PROCESSING_TIME);
+    }
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
index 1af4d6babf910..65910c6135a41 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
@@ -18,7 +18,7 @@
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.transforms.Min;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
@@ -77,13 +77,26 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
 
   @Override
   public TriggerResult onTimer(OnTimerContext c) throws Exception {
+    if (c.timeDomain() != TimeDomain.SYNCHRONIZED_PROCESSING_TIME) {
+      return TriggerResult.CONTINUE;
+    }
+
+    Instant delayedUntil = c.state().access(DELAYED_UNTIL_TAG).get().read();
+    if (delayedUntil == null || delayedUntil.isAfter(c.timestamp())) {
+      return TriggerResult.CONTINUE;
+    }
+
     return TriggerResult.FIRE_AND_FINISH;
   }
 
   @Override
   public void clear(TriggerContext c) throws Exception {
-    c.state().access(DELAYED_UNTIL_TAG).clear();
-    c.timers().deleteTimer(TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+    CombiningValueState<Instant, Instant> delayed = c.state().access(DELAYED_UNTIL_TAG);
+    Instant timestamp = delayed.get().read();
+    delayed.clear();
+    if (timestamp != null) {
+      c.timers().deleteTimer(timestamp, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+    }
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 00f78f64be47c..9bc9d4f879d38 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -20,7 +20,7 @@
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.transforms.Min;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
@@ -134,13 +134,26 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
 
     @Override
     public TriggerResult onTimer(OnTimerContext c) throws Exception {
+      if (c.timeDomain() != TimeDomain.EVENT_TIME) {
+        return TriggerResult.CONTINUE;
+      }
+
+      Instant delayedUntil = c.state().access(DELAYED_UNTIL_TAG).get().read();
+      if (delayedUntil == null || delayedUntil.isAfter(c.timestamp())) {
+        return TriggerResult.CONTINUE;
+      }
+
       return TriggerResult.FIRE_AND_FINISH;
     }
 
     @Override
     public void clear(TriggerContext c) throws Exception {
-      c.state().access(DELAYED_UNTIL_TAG).clear();
-      c.timers().deleteTimer(TimeDomain.EVENT_TIME);
+      CombiningValueState<Instant, Instant> delayed = c.state().access(DELAYED_UNTIL_TAG);
+      Instant timestamp = delayed.get().read();
+      delayed.clear();
+      if (timestamp != null) {
+        c.timers().deleteTimer(timestamp, TimeDomain.EVENT_TIME);
+      }
     }
 
     @Override
@@ -215,12 +228,18 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
 
     @Override
     public TriggerResult onTimer(OnTimerContext c) throws Exception {
-      return TriggerResult.FIRE_AND_FINISH;
+      if (c.timeDomain() != TimeDomain.EVENT_TIME
+          || c.timestamp().isBefore(computeTargetTimestamp(c.window().maxTimestamp()))) {
+        return TriggerResult.CONTINUE;
+      } else {
+        return TriggerResult.FIRE_AND_FINISH;
+      }
     }
 
     @Override
     public void clear(TriggerContext c) throws Exception {
-      c.timers().deleteTimer(TimeDomain.EVENT_TIME);
+      c.timers().deleteTimer(
+          computeTargetTimestamp(c.window().maxTimestamp()), TimeDomain.EVENT_TIME);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
index 845764fd65af2..8a2abdf75d2b2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
@@ -17,7 +17,7 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
 
 import org.joda.time.Instant;
 
@@ -64,7 +64,7 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
 
   @Override
   public void clear(TriggerContext c) throws Exception {
-    c.timers().deleteTimer(TimeDomain.EVENT_TIME);
+    c.timers().deleteTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java
index 1af055ff717e1..654914b76ab54 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java
@@ -15,7 +15,6 @@
  */
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.common.annotations.VisibleForTesting;
 
 import org.joda.time.Instant;
@@ -62,16 +61,12 @@ public Trigger.MergeResult onMerge(OnMergeContext c) throws Exception {
 
   @Override
   public Trigger.TriggerResult onTimer(OnTimerContext c) throws Exception {
-    if (c.isDestination()) {
-      throw new IllegalStateException("OrFinally shouldn't receive any timers.");
-    }
-
-    ExecutableTrigger<W> destination = c.nextStepTowardsDestination();
-    Trigger.TriggerResult result = destination.invokeTimer(c);
-    if (destination == c.trigger().subTrigger(UNTIL) && result.isFire()) {
+    Trigger.TriggerResult untilResult = c.trigger().subTrigger(UNTIL).invokeTimer(c);
+    if (untilResult != TriggerResult.CONTINUE) {
       return TriggerResult.FIRE_AND_FINISH;
     }
-    return result;
+
+    return c.trigger().subTrigger(ACTUAL).invokeTimer(c);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
index 84297375bd98f..bfea3e1233fd9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
@@ -79,10 +79,6 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
 
   @Override
   public TriggerResult onTimer(OnTimerContext c) throws Exception {
-    if (c.isDestination()) {
-      throw new RuntimeException("Repeatedly shouldn't receive timers.");
-    }
-
     TriggerResult result = c.trigger().subTrigger(REPEATED).invokeTimer(c);
     if (result.isFinish()) {
       c.forTrigger(c.trigger().subTrigger(REPEATED)).trigger().resetTree();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index eaa2a291cb176..13561e6e04e10 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -18,14 +18,14 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
+import com.google.cloud.dataflow.sdk.util.ReduceFn;
 import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
 import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
-import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.common.base.Joiner;
 
 import org.joda.time.Instant;
 
-import java.io.IOException;
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.List;
@@ -233,33 +233,6 @@ public interface MergingTriggerInfo<W extends BoundedWindow> extends TriggerInfo
     public abstract Iterable<W> getFinishedMergingWindows();
   }
 
-  /**
-   * Interface for interacting with time.
-   */
-  public interface Timers {
-    /**
-     * Sets a timer to fire when the watermark or processing time is beyond the given timestamp.
-     * Timers are not gauranteed to fire immediately, but will be delivered at some time afterwards.
-     *
-     * <p>Each trigger can have a single timer in per {@code timeDomain} and {@code window}. If the
-     * trigger has already set a timer for a given domain and window, then setting overwrites it.
-     *
-     * @param timestamp the time at which the trigger’s {@link Trigger#onTimer} callback should
-     *        execute
-     * @param timeDomain the domain that the {@code timestamp} applies to
-     */
-    public abstract void setTimer(Instant timestamp, TimeDomain timeDomain) throws IOException;
-
-    /**
-     * Removes the timer set in this trigger context for the given {@code window} and
-     * {@code timeDomain}.
-     */
-    public abstract void deleteTimer(TimeDomain timeDomain) throws IOException;
-
-    /** Returns the current processing time. */
-    public abstract Instant currentProcessingTime();
-  }
-
   /**
    * Information accessible to all of the callbacks that are executed on a {@link Trigger}.
    */
@@ -275,7 +248,7 @@ public abstract class TriggerContext {
     public abstract W window();
 
     /** Returns the interface for accessing timers. */
-    public abstract Timers timers();
+    public abstract ReduceFn.Timers timers();
 
     /** Create a sub-context for the given sub-trigger. */
     public abstract TriggerContext forTrigger(ExecutableTrigger<W> trigger);
@@ -319,14 +292,11 @@ public abstract class OnMergeContext extends TriggerContext {
    */
   public abstract class OnTimerContext extends TriggerContext {
 
-    /**
-     * Returns the sub-trigger of the current trigger that is the next step towards the trigger
-     * that set the timer that is being processed.
-     */
-    public abstract ExecutableTrigger<W> nextStepTowardsDestination();
+    /** Returns the time that the timer was set for. */
+    public abstract Instant timestamp();
 
-    /** Returns true if the timer corresponds to the current trigger. */
-    public abstract boolean isDestination();
+    /** Returns the time domain that thet timer was set for. */
+    public abstract TimeDomain timeDomain();
 
     /**
      * Create an {@code OnTimerContext} for executing the given trigger.
@@ -363,10 +333,11 @@ protected Trigger(@Nullable List<Trigger<W>> subTriggers) {
   public abstract MergeResult onMerge(OnMergeContext c) throws Exception;
 
   /**
-   * Called when a timer has fired for the trigger or one of its sub-triggers.
+   * Called when a timer has fired for the current window. Composite triggers should pass the event
+   * to all sub-triggers. Triggers that set timers should verify the timer matches what they set
+   * before processing the firing.
    */
   public abstract TriggerResult onTimer(OnTimerContext c) throws Exception;
-
   /**
    * Clear any state associated with this trigger in the given window.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
index 9357126e4f8f8..3a7626978d92c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
@@ -76,11 +76,6 @@ public Object getKey() {
     return key;
   }
 
-  @Override
-  public TimerManager getTimerManager() {
-    return null;
-  }
-
   /**
    * {@link ExecutionContext.StepContext} used in batch mode.
    */
@@ -96,5 +91,10 @@ private StepContext(String stepName) {
     public StateInternals stateInternals() {
       return stateInternals;
     }
+
+    @Override
+    public TimerInternals timerInternals() {
+      throw new UnsupportedOperationException("Batch mode cannot return timerInternals");
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java
new file mode 100644
index 0000000000000..9e76b23919771
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.common.base.MoreObjects;
+
+import org.joda.time.Instant;
+
+import java.util.PriorityQueue;
+
+/**
+ * TimerInternals that uses priority queues to manage the timers that are ready to fire.
+ */
+public class BatchTimerInternals implements TimerInternals {
+
+  private PriorityQueue<TimerData> watermarkTimers = new PriorityQueue<>(11);
+  private PriorityQueue<TimerData> processingTimers = new PriorityQueue<>(11);
+
+  private Instant watermarkTime;
+  private Instant processingTime;
+
+  private PriorityQueue<TimerData> queue(TimeDomain domain) {
+    return TimeDomain.EVENT_TIME.equals(domain) ? watermarkTimers : processingTimers;
+  }
+
+  public BatchTimerInternals(Instant processingTime) {
+    this.processingTime = processingTime;
+    this.watermarkTime = BoundedWindow.TIMESTAMP_MIN_VALUE;
+  }
+
+  @Override
+  public void setTimer(TimerData timer) {
+    queue(timer.getDomain()).add(timer);
+  }
+
+  @Override
+  public void deleteTimer(TimerData timer) {
+    queue(timer.getDomain()).remove(timer);
+  }
+
+  @Override
+  public Instant currentProcessingTime() {
+    return processingTime;
+  }
+
+  @Override
+  public Instant currentWatermarkTime() {
+    return watermarkTime;
+  }
+
+  @Override
+  public String toString() {
+    return MoreObjects.toStringHelper(getClass())
+        .add("watermarkTimers", watermarkTimers)
+        .add("processingTimers", processingTimers)
+        .toString();
+  }
+
+  public void advanceWatermark(ReduceFnRunner<?, ?, ?, ?> runner, Instant newWatermark) {
+    advance(runner, newWatermark, TimeDomain.EVENT_TIME);
+    this.watermarkTime = newWatermark;
+  }
+
+  public void advanceProcessingTime(ReduceFnRunner<?, ?, ?, ?> runner, Instant newProcessingTime) {
+    advance(runner, newProcessingTime, TimeDomain.PROCESSING_TIME);
+    this.processingTime = newProcessingTime;
+  }
+
+  private void advance(ReduceFnRunner<?, ?, ?, ?> runner, Instant newTime, TimeDomain domain) {
+    PriorityQueue<TimerData> timers = queue(domain);
+    boolean shouldFire = false;
+
+    do {
+      TimerData timer = timers.peek();
+      // Timers fire if the new time is >= the timer
+      shouldFire = timer != null && !newTime.isBefore(timer.getTimestamp());
+      if (shouldFire) {
+        // Remove before firing, so that if the trigger adds another identical
+        // timer we don't remove it.
+        timers.remove();
+
+        runner.onTimer(timer);
+      }
+    } while (shouldFire);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
deleted file mode 100644
index 89e97f80457c5..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerManager.java
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
-
-import org.joda.time.Instant;
-
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Objects;
-import java.util.PriorityQueue;
-
-/**
- * TimerManager that uses priority queues to manage the timers that are ready to fire.
- */
-public class BatchTimerManager implements TimerManager {
-
-  private PriorityQueue<BatchTimerManager.BatchTimer> watermarkTimers = new PriorityQueue<>(11);
-  private Map<StateNamespace, BatchTimerManager.BatchTimer> watermarkTagToTimer = new HashMap<>();
-
-  private PriorityQueue<BatchTimerManager.BatchTimer> processingTimers = new PriorityQueue<>(11);
-  private Map<StateNamespace, BatchTimerManager.BatchTimer> processingTagToTimer = new HashMap<>();
-
-  private Instant watermarkTime;
-  private Instant processingTime;
-
-  private PriorityQueue<BatchTimerManager.BatchTimer> queue(TimerManager.TimeDomain domain) {
-    return TimeDomain.EVENT_TIME.equals(domain) ? watermarkTimers : processingTimers;
-  }
-
-  private Map<StateNamespace, BatchTimer> map(TimeDomain domain) {
-    switch (domain) {
-      case EVENT_TIME: return watermarkTagToTimer;
-      case PROCESSING_TIME:
-      case SYNCHRONIZED_PROCESSING_TIME:
-        // Batch fires timers in order, and only starts a stage after the previous stage is done.
-        // As a result, SYNCHRONIZED_PROCESSING_TIME is the same as PROCESSING_TIME.
-        return processingTagToTimer;
-    }
-    throw new IllegalArgumentException("Unrecognized TimeDomain: " + domain);
-  }
-
-  public BatchTimerManager(Instant processingTime) {
-    this.processingTime = processingTime;
-    this.watermarkTime = BoundedWindow.TIMESTAMP_MIN_VALUE;
-  }
-
-  @Override
-  public void setTimer(StateNamespace tag, Instant timestamp, TimeDomain domain) {
-    BatchTimerManager.BatchTimer newTimer = new BatchTimerManager.BatchTimer(tag, timestamp);
-
-    BatchTimerManager.BatchTimer oldTimer = map(domain).put(tag, newTimer);
-    if (oldTimer != null) {
-      queue(domain).remove(oldTimer);
-    }
-    queue(domain).add(newTimer);
-  }
-
-  @Override
-  public void deleteTimer(StateNamespace tag, TimeDomain domain) {
-    queue(domain).remove(map(domain).get(tag));
-  }
-
-  @Override
-  public Instant currentProcessingTime() {
-    return processingTime;
-  }
-
-  @Override
-  public Instant currentWatermarkTime() {
-    return watermarkTime;
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder builder = new StringBuilder("BatchTimerManager [");
-    for (BatchTimer timer : watermarkTimers) {
-      builder.append("  ").append("Watermark ").append(timer.time).append(" = ").append(timer.tag);
-    }
-    for (BatchTimer timer : processingTimers) {
-      builder.append("  ").append("Processing ").append(timer.time).append(" = ").append(timer.tag);
-    }
-    builder.append("]");
-    return builder.toString();
-  }
-
-  public void advanceWatermark(ReduceFnRunner<?, ?, ?, ?> runner, Instant newWatermark) {
-    advance(runner, newWatermark, TimeDomain.EVENT_TIME);
-    this.watermarkTime = newWatermark;
-  }
-
-  public void advanceProcessingTime(ReduceFnRunner<?, ?, ?, ?> runner, Instant newProcessingTime) {
-    advance(runner, newProcessingTime, TimeDomain.PROCESSING_TIME);
-    this.processingTime = newProcessingTime;
-  }
-
-  /**
-   * @param domain The time domain that the tag is being fired on.
-   */
-  protected void fire(
-      ReduceFnRunner<?, ?, ?, ?> runner, StateNamespace timerTag, TimeDomain domain) {
-    runner.onTimer(timerTag);
-  }
-
-  private void advance(ReduceFnRunner<?, ?, ?, ?> runner, Instant newTime, TimeDomain domain) {
-
-    PriorityQueue<BatchTimer> timers = queue(domain);
-    Map<StateNamespace, BatchTimer> map = map(domain);
-    boolean shouldFire = false;
-
-    do {
-      BatchTimer timer = timers.peek();
-      // Timers fire if the new time is >= the timer
-      shouldFire = timer != null && !newTime.isBefore(timer.time);
-      if (shouldFire) {
-        // Remove before firing, so that if the trigger adds another identical
-        // timer we don't remove it.
-        timers.remove();
-        map.remove(timer.tag);
-
-        fire(runner, timer.tag, domain);
-      }
-    } while (shouldFire);
-  }
-
-  /**
-   * A timer is a tag and the time it should fire.
-   */
-  private static class BatchTimer implements Comparable<BatchTimer> {
-
-    final StateNamespace tag;
-    final Instant time;
-
-    public BatchTimer(StateNamespace tag, Instant time) {
-      this.tag = tag;
-      this.time = time;
-    }
-
-    @Override
-    public String toString() {
-      return time + ": " + tag;
-    }
-
-    @Override
-    public int compareTo(BatchTimer o) {
-      return time.compareTo(o.time);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(time, tag);
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      if (other instanceof BatchTimer) {
-        BatchTimer that = (BatchTimer) other;
-        return Objects.equals(this.time, that.time)
-            && Objects.equals(this.tag, that.tag);
-      }
-      return false;
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
index efa2245090498..c1e953089ff75 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
@@ -107,5 +107,10 @@ public void switchKey(Object newKey) {
     public StateInternals stateInternals() {
       return Preconditions.checkNotNull(currentStateInternals);
     }
+
+    @Override
+    public TimerInternals timerInternals() {
+      throw new UnsupportedOperationException("Direct mode cannot return timerInternals");
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index b136ca0e4753d..ec58621f146de 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -549,8 +549,8 @@ public PaneInfo pane() {
         }
 
         @Override
-        public TimerManager getTimerManager() {
-          return context.stepContext.getExecutionContext().getTimerManager();
+        public TimerInternals timerInternals() {
+          return context.stepContext.timerInternals();
         }
 
         @Override
@@ -558,6 +558,7 @@ public <T> void writePCollectionViewData(
             TupleTag<?> tag,
             Iterable<WindowedValue<T>> data,
             Coder<T> elemCoder) throws IOException {
+          @SuppressWarnings("unchecked")
           Coder<BoundedWindow> windowCoder = context.windowFn.windowCoder();
 
           context.stepContext.getExecutionContext().writePCollectionViewData(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
index bf75d96c6cd13..03d9d7a88b11e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
@@ -59,11 +59,6 @@ public Collection<StepContext> getAllStepContexts() {
    */
   public abstract StepContext createStepContext(String stepName);
 
-  /**
-   * Return the {@link TimerManager} to use with this context, or null if it should be emulated.
-   */
-  public abstract TimerManager getTimerManager();
-
   /**
    * Hook for subclasses to implement that will be called whenever
    * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#output}
@@ -115,5 +110,6 @@ public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output) {
     }
 
     public abstract StateInternals stateInternals();
+    public abstract TimerInternals timerInternals();
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 16c618e359a8f..66f96e1c0c031 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -103,10 +103,13 @@ public void processElement(
         KV<K, OutputT>>.ProcessContext c)
         throws Exception {
       K key = c.element().getKey();
-      BatchTimerManager timerManager = new BatchTimerManager(Instant.now());
+      // Used with Batch, we know that all the data is available for this key. We can't use the
+      // timer manager from the context because it doesn't exist. So we create one and emulate the
+      // watermark, knowing that we have all data and it is in timestamp order.
+      BatchTimerInternals timerInternals = new BatchTimerInternals(Instant.now());
 
       ReduceFnRunner<K, InputT, OutputT, W> runner = new ReduceFnRunner<>(
-          key, strategy, timerManager, c.windowingInternals(),
+          key, strategy, timerInternals, c.windowingInternals(),
           droppedDueToClosedWindow, droppedDueToLateness, reduceFn);
 
       for (WindowedValue<InputT> e : c.element().getValue()) {
@@ -115,20 +118,20 @@ public void processElement(
 
         // Then, since elements are sorted by their timestamp, advance the watermark and fire any
         // timers that need to be fired.
-        timerManager.advanceWatermark(runner, e.getTimestamp());
+        timerInternals.advanceWatermark(runner, e.getTimestamp());
 
         // Also, fire any processing timers that need to fire
-        timerManager.advanceProcessingTime(runner, Instant.now());
+        timerInternals.advanceProcessingTime(runner, Instant.now());
       }
 
       // Merge the active windows for the current key, to fire any data-based triggers.
       runner.merge();
 
       // Finish any pending windows by advancing the watermark to infinity.
-      timerManager.advanceWatermark(runner, new Instant(Long.MAX_VALUE));
+      timerInternals.advanceWatermark(runner, new Instant(Long.MAX_VALUE));
 
       // Finally, advance the processing time to infinity to fire any timers.
-      timerManager.advanceProcessingTime(runner, new Instant(Long.MAX_VALUE));
+      timerInternals.advanceProcessingTime(runner, new Instant(Long.MAX_VALUE));
 
       runner.persist();
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
index 712a8792f900c..9d51813c5a050 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
@@ -33,10 +33,10 @@
  */
 public class PaneInfoTracker {
 
-  private TimerManager timerManager;
+  private TimerInternals timerInternals;
 
-  public PaneInfoTracker(TimerManager timerManager) {
-    this.timerManager = timerManager;
+  public PaneInfoTracker(TimerInternals timerInternals) {
+    this.timerInternals = timerInternals;
   }
 
   @VisibleForTesting static final StateTag<ValueState<PaneInfo>> PANE_INFO_TAG =
@@ -69,7 +69,7 @@ public PaneInfo read() {
   }
 
   private <W> PaneInfo describePane(Instant endOfWindow, PaneInfo previousPane, boolean isFinal) {
-    boolean isSpeculative = endOfWindow.isAfter(timerManager.currentWatermarkTime());
+    boolean isSpeculative = endOfWindow.isAfter(timerInternals.currentWatermarkTime());
     boolean isFirst = (previousPane == null);
 
     Timing timing = Timing.EARLY;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
index 4361387400a61..02a3fa4fa28c2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
@@ -17,6 +17,7 @@
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.util.state.MergeableState;
 import com.google.cloud.dataflow.sdk.util.state.State;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
@@ -57,7 +58,6 @@ public interface StateContext {
 
   /** Interface for interacting with persistent state within {@link #onMerge}. */
   public interface MergingStateContext extends StateContext {
-
     /**
      * Access a merged view of the storage for the given {@code address}
      * in all of the windows being merged.
@@ -70,6 +70,34 @@ public abstract <StateT extends State> Map<BoundedWindow, StateT> accessInEachMe
         StateTag<StateT> address);
   }
 
+  /**
+   * Interface for interacting with time.
+   */
+  public interface Timers {
+    /**
+     * Sets a timer to fire when the watermark or processing time is beyond the given timestamp.
+     * Timers are not guaranteed to fire immediately, but will be delivered at some time afterwards.
+     *
+     * <p>As with {@link StateContext}, timers are implicitly scoped to the current window. All
+     * timer firings for a window will be received, but the implementation should choose to ignore
+     * those that are not applicable.
+     *
+     * @param timestamp the time at which the trigger’s {@link Trigger#onTimer} callback should
+     *        execute
+     * @param timeDomain the domain that the {@code timestamp} applies to
+     */
+    public abstract void setTimer(Instant timestamp, TimeDomain timeDomain);
+
+    /**
+     * Removes the timer set in this trigger context for the given {@code window}, {@code timestmap}
+     * and {@code timeDomain}.
+     */
+    public abstract void deleteTimer(Instant timestamp, TimeDomain timeDomain);
+
+    /** Returns the current processing time. */
+    public abstract Instant currentProcessingTime();
+  }
+
   /** Information accessible to all the processing methods in this {@code ReduceFn}. */
   public abstract class Context {
     /** Return the key that is being processed. */
@@ -81,10 +109,11 @@ public abstract class Context {
     /** Access the current {@link WindowingStrategy}. */
     public abstract WindowingStrategy<?, W> windowingStrategy();
 
-    /**
-     * Return the interface for accessing state.
-     */
+    /** Return the interface for accessing state. */
     public abstract StateContext state();
+
+    /** Return the interface for accessing timers. */
+    public abstract Timers timers();
   }
 
   /** Information accessible within {@link #processValue}. */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
index e2089c470a5f4..63d8ecb144733 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
@@ -20,13 +20,17 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
 import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
+import com.google.cloud.dataflow.sdk.util.ReduceFn.Timers;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.state.MergeableState;
 import com.google.cloud.dataflow.sdk.util.state.State;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces.WindowNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableMap;
 
 import org.joda.time.Instant;
@@ -50,15 +54,18 @@ public interface OnTriggerCallbacks<OutputT> {
   private final WindowingStrategy<?, W> windowingStrategy;
   private StateInternals stateInternals;
   private ActiveWindowSet<W> activeWindows;
+  private TimerInternals timerInternals;
 
   ReduceFnContextFactory(
       K key, ReduceFn<K, InputT, OutputT, W> reduceFn, WindowingStrategy<?, W> windowingStrategy,
-      StateInternals stateInternals, ActiveWindowSet<W> activeWindows) {
+      StateInternals stateInternals, ActiveWindowSet<W> activeWindows,
+      TimerInternals timerInternals) {
     this.key = key;
     this.reduceFn = reduceFn;
     this.windowingStrategy = windowingStrategy;
     this.stateInternals = stateInternals;
     this.activeWindows = activeWindows;
+    this.timerInternals = timerInternals;
   }
 
   private StateContextImpl<W> stateContext(W window) {
@@ -86,7 +93,34 @@ public ReduceFn<K, InputT, OutputT, W>.OnMergeContext forMerge(
         new MergingStateContextImpl<W>(stateContext(resultWindow), mergingWindows));
   }
 
-  static class StateContextImpl<W extends BoundedWindow> implements ReduceFn.StateContext {
+
+  private class TimersImpl implements ReduceFn.Timers {
+
+    private final StateNamespace namespace;
+
+    public TimersImpl(StateNamespace namespace) {
+      Preconditions.checkArgument(namespace instanceof WindowNamespace);
+      this.namespace = namespace;
+    }
+
+    @Override
+    public void setTimer(Instant timestamp, TimeDomain timeDomain) {
+      timerInternals.setTimer(TimerData.of(namespace, timestamp, timeDomain));
+    }
+
+    @Override
+    public void deleteTimer(Instant timestamp, TimeDomain timeDomain) {
+      timerInternals.deleteTimer(TimerData.of(namespace, timestamp, timeDomain));
+    }
+
+    @Override
+    public Instant currentProcessingTime() {
+      return timerInternals.currentProcessingTime();
+    }
+  }
+
+  static class StateContextImpl<W extends BoundedWindow>
+      implements ReduceFn.StateContext {
 
     private final ActiveWindowSet<W> activeWindows;
     private final W window;
@@ -195,11 +229,13 @@ public <StateT extends State> Map<BoundedWindow, StateT> accessInEachMergingWind
 
   private class ContextImpl extends ReduceFn<K, InputT, OutputT, W>.Context {
 
-    private StateContextImpl<W> state;
+    private final StateContextImpl<W> state;
+    private final TimersImpl timers;
 
     private ContextImpl(StateContextImpl<W> state) {
       reduceFn.super();
       this.state = state;
+      this.timers = new TimersImpl(state.namespace);
     }
 
     @Override
@@ -221,20 +257,27 @@ public WindowingStrategy<?, W> windowingStrategy() {
     public StateContext state() {
       return state;
     }
+
+    @Override
+    public Timers timers() {
+      return timers;
+    }
   }
 
   private class ProcessValueContextImpl
       extends ReduceFn<K, InputT, OutputT, W>.ProcessValueContext {
 
-    private InputT value;
-    private Instant timestamp;
-    private StateContextImpl<W> state;
+    private final InputT value;
+    private final Instant timestamp;
+    private final StateContextImpl<W> state;
+    private final TimersImpl timers;
 
     private ProcessValueContextImpl(StateContextImpl<W> state, InputT value, Instant timestamp) {
       reduceFn.super();
       this.state = state;
       this.value = value;
       this.timestamp = timestamp;
+      this.timers = new TimersImpl(state.namespace);
     }
 
     @Override
@@ -266,6 +309,11 @@ public InputT value() {
     public Instant timestamp() {
       return timestamp;
     }
+
+    @Override
+    public Timers timers() {
+      return timers;
+    }
   }
 
   private class OnTriggerContextImpl
@@ -274,6 +322,7 @@ private class OnTriggerContextImpl
     private final StateContextImpl<W> state;
     private final StateContents<PaneInfo> pane;
     private final OnTriggerCallbacks<OutputT> callbacks;
+    private final TimersImpl timers;
 
     private OnTriggerContextImpl(StateContextImpl<W> state,
         StateContents<PaneInfo> pane, OnTriggerCallbacks<OutputT> callbacks) {
@@ -281,6 +330,7 @@ private OnTriggerContextImpl(StateContextImpl<W> state,
       this.state = state;
       this.pane = pane;
       this.callbacks = callbacks;
+      this.timers = new TimersImpl(state.namespace);
     }
 
     @Override
@@ -312,16 +362,23 @@ public PaneInfo paneInfo() {
     public void output(OutputT value) {
       callbacks.output(value);
     }
+
+    @Override
+    public Timers timers() {
+      return timers;
+    }
   }
 
   private class OnMergeContextImpl
       extends ReduceFn<K, InputT, OutputT, W>.OnMergeContext {
 
     private final MergingStateContextImpl<W> state;
+    private final TimersImpl timers;
 
     private OnMergeContextImpl(MergingStateContextImpl<W> state) {
       reduceFn.super();
       this.state = state;
+      this.timers = new TimersImpl(state.delegate.namespace);
     }
 
     @Override
@@ -348,5 +405,10 @@ public Collection<W> mergingWindows() {
     public W window() {
       return state.delegate.window;
     }
+
+    @Override
+    public Timers timers() {
+      return timers;
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index 7ad99fdfa00c1..94897ed69499a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -21,19 +21,15 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.ActiveWindowSet.MergeCallback;
 import com.google.cloud.dataflow.sdk.util.ReduceFnContextFactory.OnTriggerCallbacks;
-import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces.WindowAndTriggerNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces.WindowNamespace;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Throwables;
 
-import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -76,7 +72,7 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow>
   public static final String DROPPED_DUE_TO_LATENESS_COUNTER = "DroppedDueToLateness";
 
   private final WindowingStrategy<Object, W> windowingStrategy;
-  private final TimerManager timerManager;
+  private final TimerInternals timerInternals;
   private final WindowingInternals<?, KV<K, OutputT>> windowingInternals;
 
   private final Aggregator<Long, Long> droppedDueToClosedWindow;
@@ -95,14 +91,14 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow>
   public ReduceFnRunner(
       K key,
       WindowingStrategy<?, W> windowingStrategy,
-      TimerManager timerManager,
+      TimerInternals timerInternals,
       WindowingInternals<?, KV<K, OutputT>> windowingInternals,
       Aggregator<Long, Long> droppedDueToClosedWindow,
       Aggregator<Long, Long> droppedDueToLateness,
       ReduceFn<K, InputT, OutputT, W> reduceFn) {
     this.key = key;
-    this.timerManager = timerManager;
-    this.paneInfo =  new PaneInfoTracker(timerManager);
+    this.timerInternals = timerInternals;
+    this.paneInfo =  new PaneInfoTracker(timerInternals);
     this.windowingInternals = windowingInternals;
     this.droppedDueToClosedWindow = droppedDueToClosedWindow;
     this.droppedDueToLateness = droppedDueToLateness;
@@ -116,13 +112,12 @@ public ReduceFnRunner(
     this.activeWindows = createActiveWindowSet();
     this.contextFactory = new ReduceFnContextFactory<K, InputT, OutputT, W>(
         key, reduceFn, this.windowingStrategy, this.windowingInternals.stateInternals(),
-        this.activeWindows);
+        this.activeWindows, timerInternals);
 
     this.watermarkHold = new WatermarkHold<>(windowingStrategy);
     this.triggerRunner = new TriggerRunner<>(
         windowingStrategy.getTrigger(),
-        new TriggerContextFactory<>(timerManager, windowingStrategy,
-            this.windowingInternals.stateInternals(),
+        new TriggerContextFactory<>(windowingStrategy, this.windowingInternals.stateInternals(),
             activeWindows));
   }
 
@@ -167,7 +162,7 @@ public void processElement(WindowedValue<InputT> value) {
       }
 
       if (activeWindows.add(window)) {
-        scheduleCleanup(window);
+        scheduleCleanup(context);
       }
 
       // Update the watermark hold since the value will be part of the next pane.
@@ -224,7 +219,7 @@ public void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResul
 
     // Schedule cleanup if the window is new.
     if (isResultWindowNew) {
-      scheduleCleanup(resultWindow);
+      scheduleCleanup(context);
     }
 
     // Have the trigger merge state as needed, and handle the result.
@@ -238,9 +233,9 @@ public void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResul
     // Cleanup the trigger state in the old windows.
     for (W mergedWindow : mergedWindows) {
       if (!mergedWindow.equals(resultWindow)) {
-        cancelCleanup(mergedWindow);
         try {
           ReduceFn<K, InputT, OutputT, W>.Context mergedContext = contextFactory.base(mergedWindow);
+          cancelCleanup(mergedContext);
           triggerRunner.clearEverything(mergedContext);
           paneInfo.clear(mergedContext.state());
         } catch (Exception e) {
@@ -254,35 +249,37 @@ public void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResul
   /**
    * Called when a timer fires.
    */
-  public void onTimer(StateNamespace namespace) {
-    // Currently, individual ReduceFn's don't specify timers. So, we either have a timer for
-    // the window (in general) which corresponds to the GC (cleanup) timer or a timer that was
-    // set by one of the triggers. Distinguish which one based on namespace, and dispatch
-    // accordingly.
-    if (namespace instanceof WindowNamespace) {
-      @SuppressWarnings("unchecked")
-      WindowNamespace<W> windowNamespace = (WindowNamespace<W>) namespace;
+  public void onTimer(TimerData timer) {
+    if (!(timer.getNamespace() instanceof WindowNamespace)) {
+      throw new IllegalArgumentException(
+          "Expected WindowNamespace, but was " + timer.getNamespace());
+    }
+
+    @SuppressWarnings("unchecked")
+    WindowNamespace<W> windowNamespace = (WindowNamespace<W>) timer.getNamespace();
+    W window = windowNamespace.getWindow();
 
+    if (TimeDomain.EVENT_TIME == timer.getDomain() && isCleanupTime(window, timer.getTimestamp())) {
       try {
         doCleanup(windowNamespace.getWindow());
       } catch (Exception e) {
         LOG.error("Exception while garbage collecting window {}", windowNamespace.getWindow(), e);
       }
-    } else if (namespace instanceof WindowAndTriggerNamespace) {
-      @SuppressWarnings("unchecked")
-      WindowAndTriggerNamespace<W> windowNamespace = (WindowAndTriggerNamespace<W>) namespace;
-      int triggerIndex = windowNamespace.getTriggerIndex();
+    } else {
       ReduceFn<K, InputT, OutputT, W>.Context context =
           contextFactory.base(windowNamespace.getWindow());
 
+      if (triggerRunner.isClosed(context.state())) {
+        LOG.info("Skipping timer for closed window " + context.window());
+        return;
+      }
+
       try {
-        handleTriggerResult(context, triggerRunner.onTimer(context, triggerIndex));
+        handleTriggerResult(context, triggerRunner.onTimer(context, timer));
       } catch (Exception e) {
         Throwables.propagateIfPossible(e);
         throw new RuntimeException("Exception in onTimer for trigger", e);
       }
-    } else {
-      throw new IllegalArgumentException("Unexpected namespace for timer " + namespace);
     }
   }
 
@@ -418,22 +415,20 @@ public void output(OutputT toOutput) {
     }
   }
 
-  private StateNamespace windowNamespace(W window) {
-    return StateNamespaces.window(windowingStrategy.getWindowFn().windowCoder(), window);
+  private Instant cleanupTime(W window) {
+    return window.maxTimestamp().plus(windowingStrategy.getAllowedLateness());
+  }
+
+  private boolean isCleanupTime(W window, Instant timestamp) {
+    return !timestamp.isBefore(cleanupTime(window));
   }
 
-  private void scheduleCleanup(W window) {
-    // Set the timer for final cleanup. We add an extra millisecond since
-    // maxTimestamp will be the maximum timestamp in the window, and we
-    // want the maximum timestamp of an element outside the window.
-    Instant cleanupTime = window.maxTimestamp()
-        .plus(windowingStrategy.getAllowedLateness())
-        .plus(Duration.millis(1));
-    timerManager.setTimer(windowNamespace(window), cleanupTime, TimeDomain.EVENT_TIME);
+  private void scheduleCleanup(ReduceFn<?, ?, ?, W>.Context context) {
+    context.timers().setTimer(cleanupTime(context.window()), TimeDomain.EVENT_TIME);
   }
 
-  private void cancelCleanup(W window) {
-    timerManager.deleteTimer(windowNamespace(window), TimeDomain.EVENT_TIME);
+  private void cancelCleanup(ReduceFn<?, ?, ?, W>.Context context) {
+    context.timers().deleteTimer(cleanupTime(context.window()), TimeDomain.EVENT_TIME);
   }
 
   private boolean shouldDiscardAfterFiring(TriggerRunner.Result result) {
@@ -460,10 +455,10 @@ private Lateness(boolean isLate, boolean isPastAllowedLateness) {
 
   private Lateness getLateness(WindowedValue<InputT> value) {
     Instant latestAllowed =
-        timerManager.currentWatermarkTime().minus(windowingStrategy.getAllowedLateness());
+        timerInternals.currentWatermarkTime().minus(windowingStrategy.getAllowedLateness());
     if (value.getTimestamp().isBefore(latestAllowed)) {
       return Lateness.PAST_ALLOWED_LATENESS;
-    } else if (value.getTimestamp().isBefore(timerManager.currentWatermarkTime())) {
+    } else if (value.getTimestamp().isBefore(timerInternals.currentWatermarkTime())) {
       return Lateness.LATE;
     } else {
       return Lateness.NOT_LATE;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index d05d52c64e0d5..800607cabcf12 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -22,8 +22,6 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
 
@@ -82,36 +80,42 @@ public StreamingGABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
 
     private void initForKey(ProcessContext c, K key) throws Exception{
       if (runner == null) {
-        TimerManager timerManager = c.windowingInternals().getTimerManager();
+        TimerInternals timerInternals = c.windowingInternals().timerInternals();
         runner = new ReduceFnRunner<>(
-            key, windowingStrategy, timerManager, c.windowingInternals(),
+            key, windowingStrategy, timerInternals, c.windowingInternals(),
             droppedDueToClosedWindow, droppedDueToLateness, reduceFn);
       }
     }
 
     @Override
     public void processElement(ProcessContext c) throws Exception {
-      @SuppressWarnings("unchecked")
-      K key = c.element().isTimer() ? (K) c.element().key() : c.element().element().getKey();
-      initForKey(c, key);
-
       if (c.element().isTimer()) {
-        Coder<W> windowCoder = windowingStrategy.getWindowFn().windowCoder();
-        String tag = c.element().tag();
-        StateNamespace namespace = StateNamespaces.fromString(
-            tag.substring(0, tag.length() - 1), windowCoder);
-        runner.onTimer(namespace);
+        processTimer(c);
       } else {
-        InputT value = c.element().element().getValue();
-        runner.processElement(
-            WindowedValue.of(
-                value,
-                c.timestamp(),
-                c.windowingInternals().windows(),
-                c.pane()));
+        processValue(c);
       }
     }
 
+
+    private void processTimer(ProcessContext c) throws Exception {
+      @SuppressWarnings("unchecked")
+      K key = (K) c.element().key();
+      initForKey(c, key);
+      runner.onTimer(c.element().getTimer());
+    }
+
+    private void processValue(ProcessContext c) throws Exception {
+      K key = c.element().element().getKey();
+      initForKey(c, key);
+      InputT value = c.element().element().getValue();
+      runner.processElement(
+          WindowedValue.of(
+              value,
+              c.timestamp(),
+              c.windowingInternals().windows(),
+              c.pane()));
+    }
+
     @Override
     public void finishBundle(Context c) throws Exception {
       if (runner != null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index 5c41962b78cb7..dce35945d4051 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -23,9 +23,7 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
-import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
 import com.google.cloud.dataflow.sdk.util.state.WindmillStateInternals;
 import com.google.cloud.dataflow.sdk.util.state.WindmillStateReader;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -40,6 +38,7 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Set;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.ThreadLocalRandom;
@@ -49,7 +48,6 @@
  * {@link ExecutionContext} for use in streaming mode.
  */
 public class StreamingModeExecutionContext extends DataflowExecutionContext {
-  private Instant inputDataWatermark;
   private Windmill.WorkItem work;
   private StateFetcher stateFetcher;
   private Windmill.WorkItemCommitRequest.Builder outputBuilder;
@@ -60,6 +58,7 @@ public class StreamingModeExecutionContext extends DataflowExecutionContext {
   private UnboundedSource.UnboundedReader<?> activeReader;
   private ConcurrentMap<String, String> stateNameMap;
   private WindmillStateReader stateReader;
+  private Instant inputDataWatermark;
 
   public StreamingModeExecutionContext(
       StateFetcher stateFetcher,
@@ -77,68 +76,23 @@ public void start(
       WindmillStateReader stateReader,
       Windmill.WorkItemCommitRequest.Builder outputBuilder) {
     this.work = work;
+    this.inputDataWatermark = inputDataWatermark;
     this.stateReader = stateReader;
     this.outputBuilder = outputBuilder;
     this.sideInputCache.clear();
-    this.inputDataWatermark = inputDataWatermark;
 
     for (ExecutionContext.StepContext stepContext : getAllStepContexts()) {
-      ((StepContext) stepContext).start(stateReader);
+      ((StepContext) stepContext).start(stateReader, inputDataWatermark);
     }
   }
 
   @Override
   public ExecutionContext.StepContext createStepContext(String stepName) {
     StepContext context = new StepContext(stepName);
-    context.start(stateReader);
+    context.start(stateReader, inputDataWatermark);
     return context;
   }
 
-  @Override
-  public TimerManager getTimerManager() {
-    return new TimerManager() {
-      @Override
-      public void setTimer(StateNamespace timer, Instant timestamp, TimeDomain domain) {
-        long timestampMicros = TimeUnit.MILLISECONDS.toMicros(timestamp.getMillis());
-        outputBuilder.addOutputTimers(
-            Windmill.Timer.newBuilder()
-            .setTimestamp(timestampMicros)
-            .setTag(ByteString.copyFromUtf8(timer.stringKey() + "+"))
-            .setType(timerType(domain))
-            .build());
-      }
-
-      @Override
-      public void deleteTimer(StateNamespace timer, TimeDomain domain) {
-        outputBuilder.addOutputTimers(
-            Windmill.Timer.newBuilder()
-            .setTag(ByteString.copyFromUtf8(timer.stringKey() + "+"))
-            .setType(timerType(domain))
-            .build());
-      }
-
-      @Override
-      public Instant currentProcessingTime() {
-        return Instant.now();
-      }
-
-      @Override
-      public Instant currentWatermarkTime() {
-        return inputDataWatermark;
-      }
-    };
-  }
-
-  private Windmill.Timer.Type timerType(TimeDomain domain) {
-    switch (domain) {
-      case EVENT_TIME: return Windmill.Timer.Type.WATERMARK;
-      case PROCESSING_TIME: return Windmill.Timer.Type.REALTIME;
-      case SYNCHRONIZED_PROCESSING_TIME: return Windmill.Timer.Type.DEPENDENT_REALTIME;
-      default:
-        throw new IllegalArgumentException("Unrecgonized TimeDomain: " + domain);
-    }
-  }
-
   @Override
   public SideInputReader getSideInputReader(Iterable<? extends SideInputInfo> sideInputInfos) {
     throw new UnsupportedOperationException(
@@ -287,9 +241,10 @@ public Map<Long, Runnable> flushState() {
       ((StepContext) stepContext).flushState();
     }
 
-    Windmill.SourceState.Builder sourceStateBuilder = Windmill.SourceState.newBuilder();
 
     if (activeReader != null) {
+      Windmill.SourceState.Builder sourceStateBuilder =
+          outputBuilder.getSourceStateUpdatesBuilder();
       final UnboundedSource.CheckpointMark checkpointMark = activeReader.getCheckpointMark();
       final Instant watermark = activeReader.getWatermark();
       long id = ThreadLocalRandom.current().nextLong();
@@ -319,7 +274,6 @@ public void run() {
         }
         sourceStateBuilder.setState(stream.toByteString());
       }
-      outputBuilder.setSourceStateUpdates(sourceStateBuilder.build());
       outputBuilder.setSourceWatermark(TimeUnit.MILLISECONDS.toMicros(watermark.getMillis()));
 
       readerCache.put(getSerializedKey(), activeReader);
@@ -331,8 +285,78 @@ public List<Long> getReadyCommitCallbackIds() {
     return work.getSourceState().getFinalizeIdsList();
   }
 
+  private static class WindmillTimerInternals implements TimerInternals {
+
+    private Map<TimerData, Boolean> timers = new HashMap<>();
+    private Instant inputDataWatermark;
+
+    public WindmillTimerInternals(Instant inputDataWatermark) {
+      this.inputDataWatermark = inputDataWatermark;
+    }
+
+    @Override
+    public void setTimer(TimerData timerKey) {
+      timers.put(timerKey, true);
+    }
+
+    @Override
+    public void deleteTimer(TimerData timerKey) {
+      timers.put(timerKey, false);
+    }
+
+    @Override
+    public Instant currentProcessingTime() {
+      return Instant.now();
+    }
+
+    @Override
+    public Instant currentWatermarkTime() {
+      return inputDataWatermark;
+    }
+
+    /**
+     * Produce a tag that is guaranteed to be unique for the given namespace, domain and timestamp.
+     *
+     * <p> This is necessary because Windmill will deduplicate based only on this tag.
+     */
+    private ByteString timerTag(TimerData key) {
+      String tagString = String.format("%s+%d:%d",
+          key.getNamespace().stringKey(), key.getDomain().ordinal(),
+          key.getTimestamp().getMillis());
+      return ByteString.copyFromUtf8(tagString);
+    }
+
+    public void persistTo(Windmill.WorkItemCommitRequest.Builder outputBuilder) {
+      for (Entry<TimerData, Boolean> entry : timers.entrySet()) {
+        Windmill.Timer.Builder timer = outputBuilder.addOutputTimersBuilder()
+            .setTag(timerTag(entry.getKey()))
+            .setType(timerType(entry.getKey().getDomain()));
+
+        // If the timer was being set (not deleted) then set a timestamp for it.
+        if (entry.getValue()) {
+          long timestampMicros =
+              TimeUnit.MILLISECONDS.toMicros(entry.getKey().getTimestamp().getMillis());
+          timer.setTimestamp(timestampMicros);
+        }
+      }
+      timers.clear();
+    }
+
+    private Windmill.Timer.Type timerType(TimeDomain domain) {
+      switch (domain) {
+        case EVENT_TIME: return Windmill.Timer.Type.WATERMARK;
+        case PROCESSING_TIME: return Windmill.Timer.Type.REALTIME;
+        case SYNCHRONIZED_PROCESSING_TIME: return Windmill.Timer.Type.DEPENDENT_REALTIME;
+        default:
+          throw new IllegalArgumentException("Unrecgonized TimeDomain: " + domain);
+      }
+    }
+  }
+
+
   class StepContext extends ExecutionContext.StepContext {
     private WindmillStateInternals stateInternals;
+    private WindmillTimerInternals timerInternals;
     private String prefix;
 
     public StepContext(String stepName) {
@@ -347,18 +371,26 @@ public StepContext(String stepName) {
     /**
      * Update the {@code stateReader} used by this {@code StepContext}.
      */
-    public void start(WindmillStateReader stateReader) {
+    public void start(WindmillStateReader stateReader, Instant inputDataWatermark) {
       this.stateInternals = new WindmillStateInternals(prefix, stateReader);
+      this.timerInternals = new WindmillTimerInternals(
+          Preconditions.checkNotNull(inputDataWatermark));
     }
 
     public void flushState() {
       stateInternals.persist(outputBuilder);
+      timerInternals.persistTo(outputBuilder);
     }
 
     @Override
     public StateInternals stateInternals() {
       return Preconditions.checkNotNull(stateInternals);
     }
+
+    @Override
+    public TimerInternals timerInternals() {
+      return Preconditions.checkNotNull(timerInternals);
+    }
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeDomain.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeDomain.java
new file mode 100644
index 0000000000000..75e086350a9e6
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeDomain.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+/**
+ * {@code TimeDomain} specifies whether an operation is based on
+ * timestamps of elements or current "real-world" time as reported while processing.
+ */
+public enum TimeDomain {
+  /**
+   * The {@code EVENT_TIME} domain corresponds to the timestamps on the elemnts. Time advances
+   * on the system watermark advances.
+   */
+  EVENT_TIME,
+
+  /**
+   * The {@code PROCESSING_TIME} domain corresponds to the current to the current (system) time.
+   * This is advanced during execution of the Dataflow pipeline.
+   */
+  PROCESSING_TIME,
+
+  /**
+   * Same as the {@code PROCESSING_TIME} domain, except it won't fire a timer set for time
+   * {@code T} until all timers from earlier stages set for a time earlier than {@code T} have
+   * fired.
+   */
+  SYNCHRONIZED_PROCESSING_TIME;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
new file mode 100644
index 0000000000000..ca2e433110af2
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.common.base.MoreObjects;
+
+import org.joda.time.Instant;
+
+import java.util.Objects;
+
+/**
+ * Encapsulate interaction with time within the execution environment.
+ *
+ * <p> This class allows setting and deleting timers, and also retrieving an
+ * estimate of the current time.
+ */
+public interface TimerInternals {
+
+  /**
+   * Writes out a timer to be fired when the watermark reaches the given
+   * timestamp.
+   *
+   * <p> The combination of {@code namespace}, {@code timestamp} and {@code domain} uniquely
+   * identify a timer. Multiple timers set for the same parameters can be safely deduplicated.
+   */
+  void setTimer(TimerData timerKey);
+
+  /**
+   * Deletes the given timer.
+   */
+  void deleteTimer(TimerData timerKey);
+
+  /**
+   * Returns the current timestamp in the {@link TimeDomain#PROCESSING_TIME} time domain.
+   */
+  Instant currentProcessingTime();
+
+  /**
+   * Returns an estimate of the current timestamp in the {@link TimeDomain#EVENT_TIME} time domain.
+   */
+  Instant currentWatermarkTime();
+
+  /**
+   * Data about a timer as represented within {@link TimerInternals}.
+   */
+  public static class TimerData implements Comparable<TimerData> {
+    private final StateNamespace namespace;
+    private final Instant timestamp;
+    private final TimeDomain domain;
+
+    private TimerData(StateNamespace namespace, Instant timestamp, TimeDomain domain) {
+      this.namespace = namespace;
+      this.timestamp = timestamp;
+      this.domain = domain;
+    }
+
+    public StateNamespace getNamespace() {
+      return namespace;
+    }
+
+    public Instant getTimestamp() {
+      return timestamp;
+    }
+
+    public TimeDomain getDomain() {
+      return domain;
+    }
+
+    /**
+     * Construct the {@code TimerKey} for the given parameters.
+     */
+    public static TimerData of(StateNamespace namespace, Instant timestamp, TimeDomain domain) {
+      return new TimerData(namespace, timestamp, domain);
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) {
+        return true;
+      }
+
+      if (!(obj instanceof TimerData)) {
+        return false;
+      }
+
+      TimerData that = (TimerData) obj;
+      return Objects.equals(this.domain, that.domain)
+          && this.timestamp.isEqual(that.timestamp)
+          && Objects.equals(this.namespace, that.namespace);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(domain, timestamp, namespace);
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(getClass())
+          .add("namespace", namespace)
+          .add("timestamp", timestamp)
+          .add("domain", domain)
+          .toString();
+    }
+
+    @Override
+    public int compareTo(TimerData o) {
+      return Long.compare(timestamp.getMillis(), o.getTimestamp().getMillis());
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java
deleted file mode 100644
index 8b233d75c3bde..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerManager.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
-
-import org.joda.time.Instant;
-
-/**
- * Encapsulate interaction with time within the execution environment.
- *
- * <p> This class allows setting and deleting timers, and also retrieving an
- * estimate of the current time.
- */
-public interface TimerManager {
-
-  /**
-   * {@code TimeDomain} specifies whether an operation is based on
-   * timestamps of elements or current "real-world" time as reported while processing.
-   */
-  public enum TimeDomain {
-    /**
-     * The {@code EVENT_TIME} domain corresponds to the timestamps on the elemnts. Time advances
-     * on the system watermark advances.
-     */
-    EVENT_TIME,
-
-    /**
-     * The {@code PROCESSING_TIME} domain corresponds to the current to the current (system) time.
-     * This is advanced during execution of the Dataflow pipeline.
-     */
-    PROCESSING_TIME,
-
-    /**
-     * Same as the {@code PROCESSING_TIME} domain, except it won't fire a timer set for time
-     * {@code T} until all timers from earlier stages set for a time earlier than {@code T} have
-     * fired.
-     */
-    SYNCHRONIZED_PROCESSING_TIME;
-  }
-
-  /**
-   * Writes out a timer to be fired when the watermark reaches the given
-   * timestamp.  Timers are identified by their name, and can be moved
-   * by calling {@code setTimer} again, or deleted with {@link #deleteTimer}.
-   */
-  void setTimer(StateNamespace timer, Instant timestamp, TimeDomain domain);
-
-  /**
-   * Deletes the given timer.
-   */
-  void deleteTimer(StateNamespace timer, TimeDomain domain);
-
-  /**
-   * Returns the current timestamp in the {@link TimeDomain#PROCESSING_TIME} time domain.
-   */
-  Instant currentProcessingTime();
-
-  /**
-   * Returns an estimate of the current timestamp in the {@link TimeDomain#EVENT_TIME} time domain.
-   */
-  Instant currentWatermarkTime();
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
index e2364a75cb481..9ec6e150386c6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
@@ -18,13 +18,12 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
 
-import org.joda.time.Instant;
-
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.util.Arrays;
@@ -43,9 +42,8 @@ public class TimerOrElement<ElemT> {
    *
    * @param <ElemT> the element type
    */
-  public static <ElemT> TimerOrElement<ElemT> timer(
-      String tag, Instant timestamp, Object key) {
-    return new TimerOrElement<>(tag, timestamp, key);
+  public static <ElemT> TimerOrElement<ElemT> timer(Object key, TimerData timerData) {
+    return new TimerOrElement<>(key, timerData);
   }
 
   /**
@@ -61,34 +59,24 @@ public static <ElemT> TimerOrElement<ElemT> element(ElemT element) {
    * Returns whether this is a timer or an element.
    */
   public boolean isTimer() {
-    return isTimer;
-  }
-
-  /**
-   * If this is a timer, returns its tag, otherwise throws an exception.
-   */
-  public String tag() {
-    if (!isTimer) {
-      throw new IllegalStateException("tag() called, but this is an element");
-    }
-    return tag;
+    return timer != null;
   }
 
   /**
-   * If this is a timer, returns its timestamp, otherwise throws an exception.
+   * If this is a timer, returns the associated {@link TimerData}. Otherwise, throws an exception.
    */
-  public Instant timestamp() {
-    if (!isTimer) {
-      throw new IllegalStateException("timestamp() called, but this is an element");
+  public TimerData getTimer() {
+    if (!isTimer()) {
+      throw new IllegalStateException("getTimer() called, but this is an element");
     }
-    return timestamp;
+    return timer;
   }
 
   /**
    * If this is a timer, returns its key, otherwise throws an exception.
    */
   public Object key() {
-    if (!isTimer) {
+    if (!isTimer()) {
       throw new IllegalStateException("key() called, but this is an element");
     }
     return key;
@@ -98,7 +86,7 @@ public Object key() {
    * If this is an element, returns it, otherwise throws an exception.
    */
   public ElemT element() {
-    if (isTimer) {
+    if (isTimer()) {
       throw new IllegalStateException("element() called, but this is a timer");
     }
     return element;
@@ -177,21 +165,19 @@ private TimerOrElementCoder(Coder<T> elemCoder) {
 
   //////////////////////////////////////////////////////////////////////////////
 
-  private boolean isTimer;
-  private String tag;
-  private Instant timestamp;
-  private Object key;
-  private ElemT element;
+  private final Object key;
+  private final TimerData timer;
+  private final ElemT element;
 
-  TimerOrElement(String tag, Instant timestamp, Object key) {
-    this.isTimer = true;
-    this.tag = tag;
-    this.timestamp = timestamp;
+  TimerOrElement(Object key, TimerData timer) {
     this.key = key;
+    this.timer = timer;
+    this.element = null;
   }
 
   TimerOrElement(ElemT element) {
-    this.isTimer = false;
+    this.key = null;
+    this.timer = null;
     this.element = element;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
index c59555386bd09..a76c6cd55dc78 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
@@ -19,13 +19,12 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergingTriggerInfo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.Timers;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerInfo;
 import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
 import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
+import com.google.cloud.dataflow.sdk.util.ReduceFn.Timers;
 import com.google.cloud.dataflow.sdk.util.ReduceFnContextFactory.MergingStateContextImpl;
 import com.google.cloud.dataflow.sdk.util.ReduceFnContextFactory.StateContextImpl;
-import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
@@ -36,7 +35,6 @@
 
 import org.joda.time.Instant;
 
-import java.io.IOException;
 import java.util.BitSet;
 import java.util.Collection;
 import java.util.Map;
@@ -46,15 +44,12 @@
  */
 class TriggerContextFactory<W extends BoundedWindow> {
 
-  private TimerManager timerManager;
-
   private final WindowingStrategy<?, W> windowingStrategy;
   private StateInternals stateInternals;
   private ActiveWindowSet<W> activeWindows;
 
-  TriggerContextFactory(TimerManager timerManager, WindowingStrategy<?, W> windowingStrategy,
-      StateInternals stateInternals, ActiveWindowSet<W> activeWindows) {
-    this.timerManager = timerManager;
+  TriggerContextFactory(WindowingStrategy<?, W> windowingStrategy, StateInternals stateInternals,
+      ActiveWindowSet<W> activeWindows) {
     this.windowingStrategy = windowingStrategy;
     this.stateInternals = stateInternals;
     this.activeWindows = activeWindows;
@@ -62,29 +57,30 @@ class TriggerContextFactory<W extends BoundedWindow> {
 
   public Trigger<W>.TriggerContext base(
       ReduceFn<?, ?, ?, W>.Context context, ExecutableTrigger<W> rootTrigger, BitSet finishedSet) {
-    return new TriggerContextImpl(context.window(), rootTrigger, finishedSet);
+    return new TriggerContextImpl(context.window(), context.timers(), rootTrigger, finishedSet);
   }
 
   public Trigger<W>.OnElementContext create(
       ReduceFn<?, ?, ?, W>.ProcessValueContext context,
       ExecutableTrigger<W> rootTrigger, BitSet finishedSet) {
     return new OnElementContextImpl(
-        context.window(), rootTrigger, finishedSet,
+        context.window(), context.timers(), rootTrigger, finishedSet,
         context.value(), context.timestamp());
   }
 
   public Trigger<W>.OnTimerContext create(
       ReduceFn<?, ?, ?, W>.Context context,
       ExecutableTrigger<W> rootTrigger, BitSet finishedSet,
-      int destinationIndex) {
-    return new OnTimerContextImpl(context.window(), rootTrigger, finishedSet, destinationIndex);
+      Instant timestamp, TimeDomain domain) {
+    return new OnTimerContextImpl(
+        context.window(), context.timers(), rootTrigger, finishedSet, timestamp, domain);
   }
 
   public Trigger<W>.OnMergeContext create(
       ReduceFn<?, ?, ?, W>.OnMergeContext context,
       ExecutableTrigger<W> rootTrigger, BitSet finishedSet,
       Map<W, BitSet> finishedSets) {
-    return new OnMergeContextImpl(context.window(), rootTrigger, finishedSet,
+    return new OnMergeContextImpl(context.window(), context.timers(), rootTrigger, finishedSet,
         context.mergingWindows(), finishedSets);
   }
 
@@ -195,30 +191,6 @@ public boolean apply(BitSet input) {
     }
   }
 
-  private class TimersImpl implements Trigger.Timers {
-
-    private final StateNamespace namespace;
-
-    public TimersImpl(StateNamespace namespace) {
-      this.namespace = namespace;
-    }
-
-    @Override
-    public void setTimer(Instant timestamp, TimeDomain timeDomain) throws IOException {
-      timerManager.setTimer(namespace, timestamp, timeDomain);
-    }
-
-    @Override
-    public void deleteTimer(TimeDomain timeDomain) throws IOException {
-      timerManager.deleteTimer(namespace, timeDomain);
-    }
-
-    @Override
-    public Instant currentProcessingTime() {
-      return timerManager.currentProcessingTime();
-    }
-  }
-
   private StateContextImpl<W> triggerState(W window, ExecutableTrigger<W> trigger) {
     return new TriggerStateContextImpl<W>(
         activeWindows, windowingStrategy.getWindowFn().windowCoder(),
@@ -249,22 +221,23 @@ protected StateNamespace namespaceFor(W window) {
   private class TriggerContextImpl extends Trigger<W>.TriggerContext {
 
     private final StateContextImpl<W> state;
-    private final TimersImpl timers;
+    private final Timers timers;
     private final TriggerInfoImpl triggerInfo;
 
     private TriggerContextImpl(
         W window,
+        Timers timers,
         ExecutableTrigger<W> trigger,
         BitSet finishedSet) {
       trigger.getSpec().super();
       this.state = triggerState(window, trigger);
-      this.timers = new TimersImpl(state.namespace());
+      this.timers = timers;
       this.triggerInfo = new TriggerInfoImpl(trigger, finishedSet, this);
     }
 
     @Override
     public Trigger<W>.TriggerContext forTrigger(ExecutableTrigger<W> trigger) {
-      return new TriggerContextImpl(state.window(), trigger, triggerInfo.finishedSet);
+      return new TriggerContextImpl(state.window(), timers, trigger, triggerInfo.finishedSet);
     }
 
     @Override
@@ -283,7 +256,7 @@ public W window() {
     }
 
     @Override
-    public Timers timers() {
+    public ReduceFn.Timers timers() {
       return timers;
     }
   }
@@ -292,20 +265,21 @@ public Timers timers() {
   private class OnElementContextImpl extends Trigger<W>.OnElementContext {
 
     private final StateContextImpl<W> state;
-    private final TimersImpl timers;
+    private final Timers timers;
     private final TriggerInfoImpl triggerInfo;
     private final Object element;
     private final Instant eventTimestamp;
 
     private OnElementContextImpl(
         W window,
+        Timers timers,
         ExecutableTrigger<W> trigger,
         BitSet finishedSet,
         Object element,
         Instant eventTimestamp) {
       trigger.getSpec().super();
       this.state = triggerState(window, trigger);
-      this.timers = new TimersImpl(state.namespace());
+      this.timers = timers;
       this.triggerInfo = new TriggerInfoImpl(trigger, finishedSet, this);
       this.element = element;
       this.eventTimestamp = eventTimestamp;
@@ -324,7 +298,7 @@ public Instant eventTimestamp() {
     @Override
     public Trigger<W>.OnElementContext forTrigger(ExecutableTrigger<W> trigger) {
       return new OnElementContextImpl(
-          state.window(), trigger, triggerInfo.finishedSet, element, eventTimestamp);
+          state.window(), timers, trigger, triggerInfo.finishedSet, element, eventTimestamp);
     }
 
     @Override
@@ -343,7 +317,7 @@ public W window() {
     }
 
     @Override
-    public Timers timers() {
+    public ReduceFn.Timers timers() {
       return timers;
     }
   }
@@ -351,26 +325,30 @@ public Timers timers() {
   private class OnTimerContextImpl extends Trigger<W>.OnTimerContext {
 
     private final StateContextImpl<W> state;
-    private final TimersImpl timers;
+    private final Timers timers;
     private final TriggerInfoImpl triggerInfo;
-    private final int destinationIndex;
+    private final Instant timestamp;
+    private final TimeDomain domain;
 
     private OnTimerContextImpl(
         W window,
+        Timers timers,
         ExecutableTrigger<W> trigger,
         BitSet finishedSet,
-        int destinationIndex) {
+        Instant timestamp,
+        TimeDomain domain) {
       trigger.getSpec().super();
       this.state = triggerState(window, trigger);
-      this.timers = new TimersImpl(state.namespace());
+      this.timers = timers;
       this.triggerInfo = new TriggerInfoImpl(trigger, finishedSet, this);
-      this.destinationIndex = destinationIndex;
+      this.timestamp = timestamp;
+      this.domain = domain;
     }
 
     @Override
     public Trigger<W>.OnTimerContext forTrigger(ExecutableTrigger<W> trigger) {
       return new OnTimerContextImpl(
-          state.window(), trigger, triggerInfo.finishedSet, destinationIndex);
+          state.window(), timers, trigger, triggerInfo.finishedSet, timestamp, domain);
     }
 
     @Override
@@ -389,43 +367,44 @@ public W window() {
     }
 
     @Override
-    public Timers timers() {
+    public ReduceFn.Timers timers() {
       return timers;
     }
 
     @Override
-    public ExecutableTrigger<W> nextStepTowardsDestination() {
-      return triggerInfo.trigger.getSubTriggerContaining(destinationIndex);
+    public Instant timestamp() {
+      return timestamp;
     }
 
     @Override
-    public boolean isDestination() {
-      return triggerInfo.trigger.getTriggerIndex() == destinationIndex;
+    public TimeDomain timeDomain() {
+      return domain;
     }
   }
 
   private class OnMergeContextImpl extends Trigger<W>.OnMergeContext {
 
     private final MergingStateContextImpl<W> state;
-    private final TimersImpl timers;
+    private final Timers timers;
     private final MergingTriggerInfoImpl triggerInfo;
 
     private OnMergeContextImpl(
         W window,
+        Timers timers,
         ExecutableTrigger<W> trigger,
         BitSet finishedSet,
         Collection<W> mergingWindows,
         Map<W, BitSet> finishedSets) {
       trigger.getSpec().super();
       this.state = new MergingStateContextImpl<>(triggerState(window, trigger), mergingWindows);
-      this.timers = new TimersImpl(state.namespace());
+      this.timers = timers;
       this.triggerInfo = new MergingTriggerInfoImpl(trigger, finishedSet, this, finishedSets);
     }
 
     @Override
     public Trigger<W>.OnMergeContext forTrigger(ExecutableTrigger<W> trigger) {
       return new OnMergeContextImpl(
-          state.window(), trigger, triggerInfo.finishedSet,
+          state.window(), timers, trigger, triggerInfo.finishedSet,
           state.mergingWindows(), triggerInfo.finishedSets);
     }
 
@@ -450,7 +429,7 @@ public W window() {
     }
 
     @Override
-    public Timers timers() {
+    public ReduceFn.Timers timers() {
       return timers;
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
index 8ea912c688c08..b800150d031dc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.cloud.dataflow.sdk.util.state.ValueState;
@@ -163,11 +164,11 @@ public Result onMerge(ReduceFn<?, ?, ?, W>.OnMergeContext c) throws Exception {
   /**
    * Run the trigger logic appropriate for receiving a timer with the specified destination ID.
    */
-  public Result onTimer(ReduceFn<?, ?, ?, W>.Context c, int destinationId) throws Exception {
+  public Result onTimer(ReduceFn<?, ?, ?, W>.Context c, TimerData timer) throws Exception {
     // Clone so that we can detect changes and so that changes here don't pollute merging.
     BitSet finishedSet = (BitSet) readFinishedBits(c.state().access(FINISHED_BITS_TAG)).clone();
     Trigger<W>.OnTimerContext triggerContext =
-        contextFactory.create(c, rootTrigger, finishedSet, destinationId);
+        contextFactory.create(c, rootTrigger, finishedSet, timer.getTimestamp(), timer.getDomain());
     TriggerResult result = rootTrigger.invokeTimer(triggerContext);
     return new Result(result, isFinishedSetNeeded(), finishedSet);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
index 24b19ba83ce6e..aa1ec00ee76bf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
@@ -51,7 +51,7 @@ void outputWindowedValue(OutputT output, Instant timestamp,
    * Return the timer manager provided by the underlying system, or null if Timers need
    * to be emulated.
    */
-  TimerManager getTimerManager();
+  TimerInternals timerInternals();
 
   /**
    * Access the windows the element is being processed in without "exploding" it.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index cafe6a4598235..1253a43938aaa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -596,61 +596,6 @@ public void processElement(ProcessContext c) {
     }
   }
 
-  @Test
-  public void testTimers() throws Exception {
-    KvCoder<String, String> kvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
-
-    long timestamp = 3000L;
-    String key = keyStringForIndex(0);
-
-    List<ParallelInstruction> instructions = Arrays.asList(
-        makeWindowingSourceInstruction(kvCoder),
-        makeDoFnInstruction(new TestTimerFn(key), 0, kvCoder),
-        makeSinkInstruction(kvCoder, 1));
-
-    FakeWindmillServer server = new FakeWindmillServer();
-
-    server.addWorkToOffer(buildTimerInput(
-        "work {" +
-        "  computation_id: \"" + DEFAULT_COMPUTATION_ID + "\"" +
-        "  work {" +
-        "    key: \"" + key + "\"" +
-        "    work_token: 0" +
-        "    timers {" +
-        "      timers {" +
-        "        tag: \"tag\"" +
-        "        timestamp: " + timestamp +
-        "      }" +
-        "    }" +
-        "  }" +
-        "}"));
-
-    StreamingDataflowWorker worker = new StreamingDataflowWorker(
-        Arrays.asList(defaultMapTask(instructions)), server, createTestingPipelineOptions());
-    worker.start();
-
-    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
-
-    assertThat(
-        stripCounters(result.get(0L)),
-        equalTo(setMessagesMetadata(PaneInfo.DEFAULT, emptyWindowsBytes(),
-            parseCommitRequest(
-                "key: \"" + keyStringForIndex(0) + "\" " +
-                "work_token: 0 " +
-                "output_messages {" +
-                "  destination_stream_id: \"" + DEFAULT_DESTINATION_STREAM_ID + "\"" +
-                "  bundles {" +
-                "    key: \"" + keyStringForIndex(0) + "\"" +
-                "    messages {" +
-                "      timestamp: " + timestamp +
-                "      data: \"" + TimeUnit.MILLISECONDS.toSeconds(timestamp) + "\"" +
-                "    }" +
-                "    messages_ids: \"\"" +
-                "  }" +
-                "} "))
-            .build()));
-  }
-
   @Test
   public void testAssignWindows() throws Exception {
     Duration gapDuration = Duration.standardSeconds(1);
@@ -764,29 +709,24 @@ public void testMergeWindows() throws Exception {
 
     // These tags and data are opaque strings and this is a change detector test.
     String window = "/gAAAAAAAA-joBw/";
-    ByteString timer1Tag = ByteString.copyFromUtf8(window + "+");   // GC timer just has window
-    ByteString timer2Tag = ByteString.copyFromUtf8(window + "0/+"); // Trigger has index as well
+    ByteString timerTag = ByteString.copyFromUtf8(window + "+0:999"); // GC timer just has window
     ByteString bufferTag = ByteString.copyFromUtf8("MergeWindows" + window + "+__buffer");
+    ByteString finishedTag = ByteString.copyFromUtf8("MergeWindows" + window + "+__finished_set");
     ByteString paneInfoTag = ByteString.copyFromUtf8("MergeWindows" + window + "+__pane_info");
     ByteString watermarkHoldTag =
         ByteString.copyFromUtf8("MergeWindows" + window + "+watermark_hold");
     ByteString bufferData = ByteString.copyFromUtf8("\000data0");
     ByteString outputData = ByteString.copyFromUtf8("\000\000\000\001\005data0");
     // These values are not essential to the change detector test
-    long timer1Timestamp = 1000000L;
-    long timer2Timestamp = 999000L;
+    long timerTimestamp = 999000L;
 
     WorkItemCommitRequest actualOutput = result.get(0L);
 
-    // Set timer1 and timer2
-    assertThat(actualOutput.getOutputTimersList(), Matchers.containsInAnyOrder(
+    // Set timer
+    assertThat(actualOutput.getOutputTimersList(), Matchers.contains(
         Matchers.equalTo(Windmill.Timer.newBuilder()
-            .setTag(timer1Tag)
-            .setTimestamp(timer1Timestamp)
-            .setType(Windmill.Timer.Type.WATERMARK).build()),
-        Matchers.equalTo(Windmill.Timer.newBuilder()
-            .setTag(timer2Tag)
-            .setTimestamp(timer2Timestamp)
+            .setTag(timerTag)
+            .setTimestamp(timerTimestamp)
             .setType(Windmill.Timer.Type.WATERMARK).build())));
 
     assertThat(actualOutput.getListUpdatesList(), Matchers.containsInAnyOrder(
@@ -806,16 +746,15 @@ public void testMergeWindows() throws Exception {
             .build())));
 
     Windmill.GetWorkResponse.Builder getWorkResponse = Windmill.GetWorkResponse.newBuilder();
-    // Timer2 has an earlier timestamp, so fire that first.
     getWorkResponse.addWorkBuilder()
         .setComputationId(DEFAULT_COMPUTATION_ID)
-        .setInputDataWatermark(0)
+        .setInputDataWatermark(timerTimestamp)
         .addWorkBuilder()
         .setKey(ByteString.copyFromUtf8(DEFAULT_KEY_STRING))
         .setWorkToken(1)
         .getTimersBuilder().addTimersBuilder()
-        .setTag(timer2Tag)
-        .setTimestamp(timer2Timestamp);
+        .setTag(timerTag)
+        .setTimestamp(timerTimestamp);
     server.addWorkToOffer(getWorkResponse.build());
 
     Windmill.GetDataResponse.Builder dataResponse = Windmill.GetDataResponse.newBuilder();
@@ -840,31 +779,49 @@ public void testMergeWindows() throws Exception {
         .setData(ByteString.EMPTY);
     server.addDataToOffer(dataResponse.build());
 
+    // Read from the finished set to prevent blind write
+    dataBuilder.clearLists();
+    dataBuilder.clearValues();
+    dataBuilder.addValuesBuilder()
+        .setTag(finishedTag)
+        .getValueBuilder()
+        .setTimestamp(0)
+        .setData(ByteString.EMPTY);
+    server.addDataToOffer(dataResponse.build());
+
     result = server.waitForAndGetCommits(1);
 
     actualOutput = result.get(1L);
 
+    assertEquals(DEFAULT_DESTINATION_STREAM_ID,
+        actualOutput.getOutputMessages(0).getDestinationStreamId());
+    assertEquals(DEFAULT_KEY_STRING,
+        actualOutput.getOutputMessages(0).getBundles(0).getKey().toStringUtf8());
+    assertEquals(0,
+        actualOutput.getOutputMessages(0).getBundles(0).getMessages(0).getTimestamp());
+    assertEquals(
+        outputData, actualOutput.getOutputMessages(0).getBundles(0).getMessages(0).getData());
+
     ByteString metadata =
         actualOutput.getOutputMessages(0).getBundles(0).getMessages(0).getMetadata();
-    assertEquals(PaneInfo.createPane(true, false, Timing.EARLY),
+    assertEquals(PaneInfo.createPane(true, true, Timing.ON_TIME),
         PaneInfo.decodePane(metadata.byteAt(0)));
     Assert.assertArrayEquals(windowAtZeroBytes(), metadata.substring(1).toByteArray());
 
-    Windmill.OutputMessageBundle.Builder expectedOutputMessages =
-        Windmill.OutputMessageBundle.newBuilder();
-    Windmill.KeyedMessageBundle.Builder keyedBuilder = expectedOutputMessages
-        .setDestinationStreamId(DEFAULT_DESTINATION_STREAM_ID)
-        .addBundlesBuilder()
-        .setKey(ByteString.copyFromUtf8(DEFAULT_KEY_STRING));
-    keyedBuilder.addMessagesBuilder()
-        .setTimestamp(0)
-        .setData(outputData)
-        .setMetadata(
-            ByteString.copyFrom(new byte[] {0b1}).concat(ByteString.copyFrom(windowAtZeroBytes())));
-    keyedBuilder.addMessagesIds(ByteString.EMPTY);
-    assertEquals(expectedOutputMessages.build(), actualOutput.getOutputMessages(0));
-
     // Data was deleted
+    assertThat("" + actualOutput.getValueUpdatesList(),
+        actualOutput.getValueUpdatesList(), Matchers.containsInAnyOrder(
+            Matchers.equalTo(Windmill.TagValue.newBuilder()
+                .setTag(paneInfoTag)
+                .setValue(Windmill.Value.newBuilder()
+                     .setTimestamp(Long.MAX_VALUE).setData(ByteString.EMPTY))
+                .build()),
+            Matchers.equalTo(Windmill.TagValue.newBuilder()
+                .setTag(finishedTag)
+                .setValue(Windmill.Value.newBuilder()
+                    .setTimestamp(Long.MAX_VALUE).setData(ByteString.EMPTY))
+                .build())));
+
     assertThat("" + actualOutput.getListUpdatesList(),
         actualOutput.getListUpdatesList(), Matchers.containsInAnyOrder(
         Matchers.equalTo(Windmill.TagList.newBuilder()
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index 18c507892fe03..0093f1536e3f7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -27,8 +27,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
-import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
@@ -49,8 +48,6 @@
 public class AfterAllTest {
   @Mock private OnceTrigger<IntervalWindow> mockTrigger1;
   @Mock private OnceTrigger<IntervalWindow> mockTrigger2;
-  private ExecutableTrigger<IntervalWindow> executable1;
-  private ExecutableTrigger<IntervalWindow> executable2;
 
   private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
@@ -62,8 +59,6 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
         AfterAll.of(mockTrigger1, mockTrigger2),
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
-    executable1 = tester.getTrigger().subTriggers().get(0);
-    executable2 = tester.getTrigger().subTriggers().get(1);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
@@ -118,9 +113,9 @@ public void testOnTimerFire() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
 
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executable1);
     when(mockTrigger1.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
+    tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
     tester.advanceWatermark(new Instant(12));
 
     assertThat(tester.extractOutput(), Matchers.contains(
@@ -137,9 +132,11 @@ public void testOnTimerFireAndFinish() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executable2);
+    when(mockTrigger1.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
     when(mockTrigger2.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
+    tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
 
     tester.advanceWatermark(new Instant(12));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index 4039fdb51d78c..0df66ba597c66 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -28,8 +28,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
-import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
@@ -51,7 +50,6 @@ public class AfterEachTest {
 
   @Mock private Trigger<IntervalWindow> mockTrigger1;
   @Mock private Trigger<IntervalWindow> mockTrigger2;
-  private ExecutableTrigger<IntervalWindow> executable1;
 
   private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
@@ -62,7 +60,6 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
         windowFn, AfterEach.inOrder(mockTrigger1, mockTrigger2),
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
-    executable1 = tester.getTrigger().subTriggers().get(0);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
@@ -120,10 +117,9 @@ public void testOnTimerFire() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executable1);
     when(mockTrigger1.onTimer(Mockito.isA(OnTimerContext.class)))
         .thenReturn(TriggerResult.FIRE);
-    tester.advanceWatermark(new Instant(12));
+    tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
@@ -138,9 +134,9 @@ public void testOnTimerFinish() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executable1);
     when(mockTrigger1.onTimer(Mockito.isA(OnTimerContext.class)))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
+    tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
 
     tester.advanceWatermark(new Instant(12));
     assertThat(tester.extractOutput(), Matchers.contains(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index 05d8cd69956aa..ddde847bc57a8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -27,8 +27,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
-import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
@@ -50,8 +49,6 @@ public class AfterFirstTest {
 
   @Mock private OnceTrigger<IntervalWindow> mockTrigger1;
   @Mock private OnceTrigger<IntervalWindow> mockTrigger2;
-  private ExecutableTrigger<IntervalWindow> executable1;
-  private ExecutableTrigger<IntervalWindow> executable2;
 
   private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
@@ -62,8 +59,6 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
         windowFn, AfterFirst.of(mockTrigger1, mockTrigger2),
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
-    executable1 = tester.getTrigger().subTriggers().get(0);
-    executable2 = tester.getTrigger().subTriggers().get(1);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
@@ -115,10 +110,9 @@ public void testOnTimerFire() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executable1);
     when(mockTrigger1.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
-    tester.advanceWatermark(new Instant(12));
+    tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
@@ -133,11 +127,12 @@ public void testOnTimerFinish() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executable2);
+    when(mockTrigger1.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
     when(mockTrigger2.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
+    tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
 
-    tester.advanceWatermark(new Instant(12));
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
 
@@ -145,6 +140,28 @@ public void testOnTimerFinish() throws Exception {
     tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testOnTimerContinue() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+
+    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+
+    when(mockTrigger1.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    when(mockTrigger2.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
+
+    injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+
+    assertTrue(tester.isMarkedFinished(firstWindow));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
+  }
+
   @Test
   public void testOnMergeFires() throws Exception {
     setUp(Sessions.withGapDuration(Duration.millis(10)));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
index 2859b3a1001c3..346bb2088f173 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
@@ -21,6 +21,7 @@
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.WindowMatchers;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
@@ -36,6 +37,24 @@
  */
 @RunWith(JUnit4.class)
 public class AfterProcessingTimeTest {
+  @Test
+  public void testAfterProcessingTimeIgnoresTimer() throws Exception {
+    Duration windowDuration = Duration.millis(10);
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        FixedWindows.of(windowDuration),
+        AfterProcessingTime
+            .<IntervalWindow>pastFirstElementInPane()
+            .plusDelayOf(Duration.millis(5)),
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
+
+    tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
+        new Instant(15), TimeDomain.PROCESSING_TIME);
+    tester.injectElement(1, new Instant(1));
+    tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
+        new Instant(5), TimeDomain.PROCESSING_TIME);
+  }
+
   @Test
   public void testAfterProcessingTimeWithFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java
index d76523bc92bcd..303084aff8754 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java
@@ -20,6 +20,7 @@
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.WindowMatchers;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
@@ -88,7 +89,6 @@ public void testAfterProcessingTimeWithMergingWindow() throws Exception {
         new IntervalWindow(new Instant(1), new Instant(12)));
   }
 
-
   @Test
   public void testAfterProcessingTimeWithMergingWindowAlreadyFired() throws Exception {
     Duration windowDuration = Duration.millis(10);
@@ -120,6 +120,23 @@ public void testAfterProcessingTimeWithMergingWindowAlreadyFired() throws Except
         new IntervalWindow(new Instant(2), new Instant(12)));
   }
 
+  @Test
+  public void testAfterSynchronizedProcessingTimeIgnoresTimer() throws Exception {
+    Duration windowDuration = Duration.millis(10);
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        FixedWindows.of(windowDuration),
+        new AfterSynchronizedProcessingTime<IntervalWindow>(),
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
+
+    tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
+        new Instant(15), TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+    tester.advanceProcessingTime(new Instant(5));
+    tester.injectElement(1, new Instant(1));
+    tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
+        new Instant(0), TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+  }
+
   @Test
   public void testFireDeadline() throws Exception {
     assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
index 81cc0bc687198..6ddb5d64a8c63 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
@@ -22,6 +22,7 @@
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.WindowMatchers;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
@@ -184,6 +185,22 @@ public void testEndOfWindowWithMerging() throws Exception {
         new IntervalWindow(new Instant(1), new Instant(12)));
   }
 
+  @Test
+  public void testEndOfWindowIgnoresTimer() throws Exception {
+    Duration windowDuration = Duration.millis(10);
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        FixedWindows.of(windowDuration),
+        AfterWatermark.<IntervalWindow>pastEndOfWindow(),
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
+
+    tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
+        new Instant(15), TimeDomain.EVENT_TIME);
+    tester.injectElement(1, new Instant(1));
+    tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
+        new Instant(9), TimeDomain.EVENT_TIME);
+  }
+
   @Test
   public void testFireDeadline() throws Exception {
     BoundedWindow window = new IntervalWindow(new Instant(0), new Instant(10));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
index 3a996c722150d..2fa0dcba07354 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
@@ -26,8 +26,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
-import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
@@ -48,7 +47,6 @@
 public class OrFinallyTriggerTest {
   @Mock private Trigger<IntervalWindow> mockActual;
   @Mock private OnceTrigger<IntervalWindow> mockUntil;
-  private ExecutableTrigger<IntervalWindow> executableUntil;
 
   private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
@@ -62,7 +60,6 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     tester = TriggerTester.nonCombining(
         windowFn, underTest, AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
-    executableUntil = tester.getTrigger().subTriggers().get(1);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
@@ -123,33 +120,6 @@ public void testOnElementUntilFiresAndFinishes() throws Exception {
     tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
-  @Test
-  public void testOnTimerFiresWithUntil() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-
-    // Timer fires for until, which says continue
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executableUntil);
-    when(mockUntil.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    tester.advanceWatermark(new Instant(12));
-
-    injectElement(2, TriggerResult.FIRE, TriggerResult.CONTINUE);
-
-    // Timer fires for until, which says fire, so we stop repeating.
-    tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, executableUntil);
-    when(mockUntil.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.FIRE_AND_FINISH);
-    tester.advanceWatermark(new Instant(13));
-
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
-    assertTrue(tester.isMarkedFinished(firstWindow));
-
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
-  }
-
   @Test
   public void testOnTimerFinishesUntil() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
@@ -157,24 +127,24 @@ public void testOnTimerFinishesUntil() throws Exception {
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     // Timer fires for until, which says continue
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executableUntil);
     when(mockUntil.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
-    tester.advanceWatermark(new Instant(12));
+    when(mockActual.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
 
     injectElement(2, TriggerResult.FIRE, TriggerResult.CONTINUE);
 
     injectElement(3, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
 
     // Timer fires for until, which says FIRE, so we fire and finish
-    tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, executableUntil);
     when(mockUntil.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
-    tester.advanceWatermark(new Instant(13));
+    tester.fireTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME);
 
-    assertThat(tester.extractOutput(), Matchers.contains(
+    assertThat(tester.extractOutput(), Matchers.containsInAnyOrder(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 9, 0, 10)));
+        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
 
     tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index 6c5cdfec8a672..3438a8247d168 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -24,8 +24,7 @@
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
-import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
@@ -45,7 +44,6 @@
 @RunWith(JUnit4.class)
 public class RepeatedlyTest {
   @Mock private Trigger<IntervalWindow> mockRepeated;
-  private ExecutableTrigger<IntervalWindow> executableRepeated;
 
   private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
@@ -57,7 +55,6 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
         windowFn, underTest,
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
-    executableRepeated = tester.getTrigger().subTriggers().get(0);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
@@ -92,36 +89,32 @@ public void testOnElementTimerFires() throws Exception {
 
     injectElement(1, TriggerResult.CONTINUE);
 
-    tester.setTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME, executableRepeated);
     when(mockRepeated.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE);
-    tester.advanceWatermark(new Instant(12));
+    tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
 
     injectElement(2, TriggerResult.CONTINUE);
 
-    tester.setTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME, executableRepeated);
     when(mockRepeated.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
-    tester.advanceWatermark(new Instant(13));
+    tester.fireTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME);
 
     injectElement(3, TriggerResult.CONTINUE);
 
-    tester.setTimer(firstWindow, new Instant(13), TimeDomain.EVENT_TIME, executableRepeated);
     when(mockRepeated.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
-    tester.advanceWatermark(new Instant(14));
+    tester.fireTimer(firstWindow, new Instant(13), TimeDomain.EVENT_TIME);
 
     injectElement(4, TriggerResult.CONTINUE);
 
-    tester.setTimer(firstWindow, new Instant(14), TimeDomain.EVENT_TIME, executableRepeated);
     when(mockRepeated.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE);
-    tester.advanceWatermark(new Instant(15));
+    tester.fireTimer(firstWindow, new Instant(14), TimeDomain.EVENT_TIME);
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(2), 9, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 9, 0, 10)));
+        isSingleWindowedValue(Matchers.containsInAnyOrder(2), 2, 0, 10),
+        isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 3, 0, 10)));
     assertFalse(tester.isMarkedFinished(firstWindow));
 
     tester.assertHasOnlyGlobalAndPaneInfoFor(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 574bc83e09bdb..0aa8a057d2206 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -32,7 +32,9 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -45,6 +47,7 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.mockito.Mock;
+import org.mockito.Mockito;
 import org.mockito.MockitoAnnotations;
 
 import java.util.ArrayList;
@@ -55,19 +58,24 @@
 @RunWith(JUnit4.class)
 @SuppressWarnings("rawtypes")
 public class StreamingGroupAlsoByWindowsDoFnTest {
-  ExecutionContext execContext;
-  CounterSet counters;
-  TupleTag<KV<String, Iterable<String>>> outputTag;
+  private ExecutionContext execContext;
+  private CounterSet counters;
+  private TupleTag<KV<String, Iterable<String>>> outputTag;
 
   @Mock
-  private TimerManager mockTimerManager;
+  private TimerInternals mockTimerInternals;
 
   @Before public void setUp() {
     MockitoAnnotations.initMocks(this);
     execContext = new DirectModeExecutionContext() {
+      // Normally timerInternals doesn't come from the execution context, but
+      // StreamingGroupAlsoByWindows expects it to. So, hook that up.
+
       @Override
-      public TimerManager getTimerManager() {
-        return mockTimerManager;
+      public ExecutionContext.StepContext createStepContext(String stepName) {
+        ExecutionContext.StepContext context = Mockito.spy(super.createStepContext(stepName));
+        Mockito.doReturn(mockTimerInternals).when(context).timerInternals();
+        return context;
       }
     };
     counters = new CounterSet();
@@ -87,8 +95,10 @@ public TimerManager getTimerManager() {
     assertEquals(0, result.size());
   }
 
-  private <W extends BoundedWindow> String timerString(Coder<W> windowCoder, W window) {
-    return StateNamespaces.windowAndTrigger(windowCoder, window, 0).stringKey() + "+";
+  private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
+      Coder<W> windowCoder, W window, Instant timestamp, TimeDomain domain) {
+    StateNamespace namespace = StateNamespaces.window(windowCoder, window);
+    return TimerOrElement.<KV<String, V>>timer("k", TimerData.of(namespace, timestamp, domain));
   }
 
   @Test public void testFixedWindows() throws Exception {
@@ -99,7 +109,7 @@ private <W extends BoundedWindow> String timerString(Coder<W> windowCoder, W win
     Coder<IntervalWindow> windowCoder = FixedWindows.of(Duration.millis(10)).windowCoder();
 
     runner.startBundle();
-    when(mockTimerManager.currentWatermarkTime()).thenReturn(new Instant(0));
+    when(mockTimerInternals.currentWatermarkTime()).thenReturn(new Instant(0));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v1")),
@@ -125,15 +135,11 @@ private <W extends BoundedWindow> String timerString(Coder<W> windowCoder, W win
         Arrays.asList(window(10, 20)),
         PaneInfo.DEFAULT));
 
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        TimerOrElement.<KV<String, String>>timer(
-            timerString(windowCoder, window(0, 10)),
-            new Instant(9), "k")));
+    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, String>timer(
+            windowCoder, window(0, 10), new Instant(9), TimeDomain.EVENT_TIME)));
 
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        TimerOrElement.<KV<String, String>>timer(
-            timerString(windowCoder, window(10, 20)),
-            new Instant(19), "k")));
+    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, String>timer(
+        windowCoder, window(10, 20), new Instant(19), TimeDomain.EVENT_TIME)));
 
     runner.finishBundle();
 
@@ -165,7 +171,7 @@ private <W extends BoundedWindow> String timerString(Coder<W> windowCoder, W win
         SlidingWindows.of(Duration.millis(10)).every(Duration.millis(10)).windowCoder();
 
     runner.startBundle();
-    when(mockTimerManager.currentWatermarkTime()).thenReturn(new Instant(0));
+    when(mockTimerInternals.currentWatermarkTime()).thenReturn(new Instant(0));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v1")),
@@ -179,10 +185,8 @@ private <W extends BoundedWindow> String timerString(Coder<W> windowCoder, W win
         Arrays.asList(window(-10, 10), window(0, 20)),
         PaneInfo.DEFAULT));
 
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        TimerOrElement.<KV<String, String>>timer(
-            timerString(windowCoder, window(-10, 10)),
-            new Instant(9), "k")));
+    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, String>timer(
+        windowCoder, window(-10, 10), new Instant(9), TimeDomain.EVENT_TIME)));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v2")),
@@ -190,15 +194,10 @@ private <W extends BoundedWindow> String timerString(Coder<W> windowCoder, W win
         Arrays.asList(window(0, 20), window(10, 30)),
         PaneInfo.DEFAULT));
 
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        TimerOrElement.<KV<String, String>>timer(
-            timerString(windowCoder, window(0, 20)),
-            new Instant(19), "k")));
-
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        TimerOrElement.<KV<String, String>>timer(
-            timerString(windowCoder, window(10, 30)),
-            new Instant(29), "k")));
+    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, String>timer(
+        windowCoder, window(0, 20), new Instant(19), TimeDomain.EVENT_TIME)));
+    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, String>timer(
+        windowCoder, window(10, 30), new Instant(29), TimeDomain.EVENT_TIME)));
 
     runner.finishBundle();
 
@@ -236,7 +235,7 @@ private <W extends BoundedWindow> String timerString(Coder<W> windowCoder, W win
     Coder<IntervalWindow> windowCoder =
         Sessions.withGapDuration(Duration.millis(10)).windowCoder();
     runner.startBundle();
-    when(mockTimerManager.currentWatermarkTime()).thenReturn(new Instant(0));
+    when(mockTimerInternals.currentWatermarkTime()).thenReturn(new Instant(0));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v1")),
@@ -262,20 +261,12 @@ private <W extends BoundedWindow> String timerString(Coder<W> windowCoder, W win
         Arrays.asList(window(3, 13)),
         PaneInfo.DEFAULT));
 
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        TimerOrElement.<KV<String, String>>timer(
-            timerString(windowCoder, window(0, 10)),
-            new Instant(9), "k")));
-
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        TimerOrElement.<KV<String, String>>timer(
-            timerString(windowCoder, window(0, 15)),
-            new Instant(14), "k")));
-
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        TimerOrElement.<KV<String, String>>timer(
-            timerString(windowCoder, window(15, 25)),
-            new Instant(24), "k")));
+    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, String>timer(
+        windowCoder, window(0, 10), new Instant(9), TimeDomain.EVENT_TIME)));
+    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, String>timer(
+        windowCoder, window(0, 15), new Instant(14), TimeDomain.EVENT_TIME)));
+    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, String>timer(
+        windowCoder, window(15, 25), new Instant(24), TimeDomain.EVENT_TIME)));
 
     runner.finishBundle();
 
@@ -340,7 +331,7 @@ public Long extractOutput(Long accumulator) {
         Sessions.withGapDuration(Duration.millis(10)).windowCoder();
 
     runner.startBundle();
-    when(mockTimerManager.currentWatermarkTime()).thenReturn(new Instant(0));
+    when(mockTimerInternals.currentWatermarkTime()).thenReturn(new Instant(0));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", 1L)),
@@ -366,22 +357,12 @@ public Long extractOutput(Long accumulator) {
         Arrays.asList(window(3, 13)),
         PaneInfo.DEFAULT));
 
-    // TODO: To simplify tests, create a timer manager that can sweep a watermark past some timers
-    // and fire them as appropriate. This would essentially be the batch timer context.
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        TimerOrElement.<KV<String, Long>>timer(
-            timerString(windowCoder, window(0, 10)),
-            new Instant(9), "k")));
-
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        TimerOrElement.<KV<String, Long>>timer(
-            timerString(windowCoder, window(0, 15)),
-            new Instant(14), "k")));
-
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        TimerOrElement.<KV<String, Long>>timer(
-            timerString(windowCoder, window(15, 25)),
-            new Instant(24), "k")));
+    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, Long>timer(
+        windowCoder, window(0, 10), new Instant(9), TimeDomain.EVENT_TIME)));
+    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, Long>timer(
+        windowCoder, window(0, 15), new Instant(14), TimeDomain.EVENT_TIME)));
+    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, Long>timer(
+        windowCoder, window(15, 25), new Instant(24), TimeDomain.EVENT_TIME)));
 
     runner.finishBundle();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
index 837ed4b38133d..a09a668b905c1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
@@ -24,8 +24,10 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting;
 import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting.ConstantViewFn;
-import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaceForTest;
+import com.google.cloud.dataflow.sdk.util.state.WindmillStateReader;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
@@ -38,6 +40,7 @@
 import org.mockito.MockitoAnnotations;
 
 import java.util.Arrays;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.TimeUnit;
 
 /**
@@ -47,7 +50,9 @@
 public class StreamingModeExecutionContextTest {
 
   @Mock
-  StateFetcher stateFetcher;
+  private StateFetcher stateFetcher;
+  @Mock
+  private WindmillStateReader stateReader;
 
   @Before
   public void setUp() {
@@ -60,20 +65,23 @@ private static TupleTag<Iterable<WindowedValue<String>>> newStringTag() {
   }
 
   @Test
-  public void testTimerManagerSetTimer() {
-    StreamingModeExecutionContext executionContext =
-        new StreamingModeExecutionContext(stateFetcher, null, null);
+  public void testTimerInternalsSetTimer() {
+    StreamingModeExecutionContext executionContext = new StreamingModeExecutionContext(
+        stateFetcher, null, new ConcurrentHashMap<String, String>());
 
     Windmill.WorkItemCommitRequest.Builder outputBuilder =
         Windmill.WorkItemCommitRequest.newBuilder();
-    executionContext.start(null,  null,  null,  outputBuilder);
-    TimerManager timerManager = executionContext.getTimerManager();
+    executionContext.start(null, new Instant(1000), stateReader, outputBuilder);
+    StepContext step = executionContext.getStepContext("step");
+
+    TimerInternals timerInternals = step.timerInternals();
 
-    timerManager.setTimer(
-        new StateNamespaceForTest("key"), new Instant(5000), TimeDomain.EVENT_TIME);
+    timerInternals.setTimer(
+        TimerData.of(new StateNamespaceForTest("key"), new Instant(5000), TimeDomain.EVENT_TIME));
+    executionContext.flushState();
 
     Windmill.Timer timer = outputBuilder.buildPartial().getOutputTimers(0);
-    assertEquals("key+", timer.getTag().toStringUtf8());
+    assertEquals("key+0:5000", timer.getTag().toStringUtf8());
     assertEquals(TimeUnit.MILLISECONDS.toMicros(5000), timer.getTimestamp());
     assertEquals(Windmill.Timer.Type.WATERMARK, timer.getType());
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index 33c7380d9b29d..5323f2edd5444 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -36,7 +36,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
 import org.hamcrest.Matchers;
@@ -322,10 +321,10 @@ public void testMergeBeforeFinalizing() throws Exception {
     injectElement(tester, 10, TriggerResult.CONTINUE); // [10-20)
 
     // Create a fake timer to fire
-    tester.setTimer(
-        new IntervalWindow(new Instant(1), new Instant(20)), new Instant(20),
-        TimeDomain.EVENT_TIME, tester.getTrigger());
     tester.advanceWatermark(new Instant(100));
+    tester.fireTimer(
+        new IntervalWindow(new Instant(1), new Instant(20)), new Instant(20),
+        TimeDomain.EVENT_TIME);
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 10), 1, 1, 20)));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 7c007db4308c4..0e9098edf7963 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -34,7 +34,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.TimerManager.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals;
 import com.google.cloud.dataflow.sdk.util.state.State;
@@ -69,7 +69,7 @@
 
 /**
  * Test utility that runs a {@link WindowFn}, {@link Trigger} using in-memory stub implementations
- * to provide the {@link TimerManager} and {@link WindowingInternals} needed to run
+ * to provide the {@link TimerInternals} and {@link WindowingInternals} needed to run
  * {@code Trigger}s and {@code ReduceFn}s.
  *
  * <p>To have all interactions between the trigger and underlying components logged, call
@@ -87,7 +87,7 @@ public class TriggerTester<InputT, OutputT, W extends BoundedWindow> {
   private Instant watermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
   private Instant processingTime = BoundedWindow.TIMESTAMP_MIN_VALUE;
 
-  private final BatchTimerManager timerManager = new BatchTimerManager(processingTime);
+  private final BatchTimerInternals timerInternals = new BatchTimerInternals(processingTime);
   private final ReduceFnRunner<String, InputT, OutputT, W> runner;
   private final WindowFn<Object, W> windowFn;
   private final StubContexts stubContexts;
@@ -155,7 +155,7 @@ private TriggerTester(
     executableTrigger = wildcardStrategy.getTrigger();
 
     this.runner = new ReduceFnRunner<>(
-        KEY, objectStrategy, timerManager, stubContexts,
+        KEY, objectStrategy, timerInternals, stubContexts,
         droppedDueToClosedWindow, droppedDueToLateness, reduceFn);
   }
 
@@ -274,7 +274,7 @@ public void advanceWatermark(Instant newWatermark) throws Exception {
         watermark.getMillis(), newWatermark.getMillis());
     logInteraction("Advancing watermark to %d", newWatermark.getMillis());
     watermark = newWatermark;
-    timerManager.advanceWatermark(runner, newWatermark);
+    timerInternals.advanceWatermark(runner, newWatermark);
   }
 
   /** Advance the processing time to the specified time, firing any timers that should fire. */
@@ -285,7 +285,7 @@ public void advanceProcessingTime(
         processingTime.getMillis(), newProcessingTime.getMillis());
     logInteraction("Advancing processing time to %d", newProcessingTime.getMillis());
     processingTime = newProcessingTime;
-    timerManager.advanceProcessingTime(runner, newProcessingTime);
+    timerInternals.advanceProcessingTime(runner, newProcessingTime);
   }
 
   public void injectElement(InputT value, Instant timestamp) throws Exception {
@@ -300,11 +300,9 @@ public void doMerge() throws Exception {
     runner.merge();
   }
 
-  public void setTimer(
-      W window, Instant timestamp, TimeDomain domain, ExecutableTrigger<W> trigger) {
-    timerManager.setTimer(
-        StateNamespaces.windowAndTrigger(windowFn.windowCoder(), window, trigger.getTriggerIndex()),
-        timestamp, domain);
+  public void fireTimer(W window, Instant timestamp, TimeDomain domain) {
+    runner.onTimer(TimerData.of(
+        StateNamespaces.window(windowFn.windowCoder(), window), timestamp, domain));
   }
 
   private static class TestingInMemoryStateInternals extends InMemoryStateInternals {
@@ -355,9 +353,9 @@ public void outputWindowedValue(KV<String, OutputT> output, Instant timestamp,
     }
 
     @Override
-    public TimerManager getTimerManager() {
+    public TimerInternals timerInternals() {
       throw new UnsupportedOperationException(
-          "getTimerManager() should not be called on StubContexts.");
+          "getTimerInternals() should not be called on StubContexts.");
     }
 
     @Override

From 1f594f0a8c71592ec224ba8a4474d6812e05c04e Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Tue, 21 Jul 2015 14:25:21 -0700
Subject: [PATCH 0787/1541] Remove unneeded ExecutionContext arg to
 BoundedSource.createReader

----Release Notes----
- BoundedSource.createReader (and related createReader methods on BoundedSource subclasses) now do not take an ExecutionContext as an argument, since there was no use for it.
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98773349
---
 .../cloud/dataflow/sdk/io/AvroSource.java     |  3 +-
 .../dataflow/sdk/io/BlockBasedSource.java     |  4 +-
 .../cloud/dataflow/sdk/io/BoundedSource.java  |  9 ++---
 .../dataflow/sdk/io/CompressedSource.java     |  6 +--
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |  4 +-
 .../dataflow/sdk/io/FileBasedSource.java      | 13 +++----
 .../cloud/dataflow/sdk/io/XmlSource.java      |  4 +-
 .../BasicSerializableSourceFormat.java        |  6 +--
 .../sdk/io/ByteOffsetBasedSourceTest.java     | 14 ++-----
 .../dataflow/sdk/io/CompressedSourceTest.java |  4 +-
 .../dataflow/sdk/io/FileBasedSourceTest.java  |  8 ++--
 .../dataflow/sdk/io/SourceTestUtils.java      |  4 +-
 .../cloud/dataflow/sdk/io/XmlSourceTest.java  | 38 +++++++++----------
 .../BasicSerializableSourceFormatTest.java    | 14 ++-----
 14 files changed, 51 insertions(+), 80 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
index bda90f6c30a1a..fd739fe5205a5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
@@ -19,7 +19,6 @@
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
@@ -318,7 +317,7 @@ public AvroSource<T> createForSubrangeOfFile(String fileName, long start, long e
   }
 
   @Override
-  public AvroReader<T> createSingleFileReader(PipelineOptions options, ExecutionContext context) {
+  public AvroReader<T> createSingleFileReader(PipelineOptions options) {
     return new AvroReader<T>(this);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
index 8cca06089a584..0fca998cec250 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
@@ -18,7 +18,6 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 
 import java.io.IOException;
 import java.util.NoSuchElementException;
@@ -88,8 +87,7 @@ public abstract BlockBasedSource<T> createForSubrangeOfFile(
    * Creates a {@code BlockBasedReader}.
    */
   @Override
-  public abstract BlockBasedReader<T> createSingleFileReader(
-      PipelineOptions options, ExecutionContext context);
+  public abstract BlockBasedReader<T> createSingleFileReader(PipelineOptions options);
 
   /**
    * A {@code Block} represents a block of records that can be read.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
index ed65c19210f94..2aedc4e39a337 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
@@ -18,13 +18,10 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 
 import java.io.IOException;
 import java.util.List;
 
-import javax.annotation.Nullable;
-
 /**
  * A {@link Source} that reads a finite amount of input and, because of that, supports
  * some additional operations.
@@ -67,8 +64,10 @@ public abstract List<? extends BoundedSource<T>> splitIntoBundles(
    */
   public abstract boolean producesSortedKeys(PipelineOptions options) throws Exception;
 
-  public abstract BoundedReader<T> createReader(
-      PipelineOptions options, @Nullable ExecutionContext executionContext) throws IOException;
+  /**
+   * Returns a new {@link BoundedReader} that reads from this source.
+   */
+  public abstract BoundedReader<T> createReader(PipelineOptions options) throws IOException;
 
   /**
    * A {@code Reader} that reads a bounded amount of input and supports some additional
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
index 3b0cab94d95ac..57bd39e77288d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
@@ -20,7 +20,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.io.FileBasedSource.FileBasedReader;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.common.base.Preconditions;
 
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
@@ -187,10 +186,9 @@ protected final boolean isSplittable() throws Exception {
    * <p>Uses the delegate source to create a single file reader for the delegate source.
    */
   @Override
-  public final CompressedReader<T> createSingleFileReader(
-      PipelineOptions options, ExecutionContext executionContext) {
+  public final CompressedReader<T> createSingleFileReader(PipelineOptions options) {
     return new CompressedReader<T>(
-        this, sourceDelegate.createSingleFileReader(options, executionContext));
+        this, sourceDelegate.createSingleFileReader(options));
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 0d8984645624f..9c4f0df296e43 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -51,7 +51,6 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Write;
 import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.RetryHttpRequestInitializer;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.Preconditions;
@@ -322,8 +321,7 @@ public List<Source> splitIntoBundles(long desiredBundleSizeBytes, PipelineOption
     }
 
     @Override
-    public BoundedReader<Entity> createReader(
-        PipelineOptions pipelineOptions, ExecutionContext executionContext) throws IOException {
+    public BoundedReader<Entity> createReader(PipelineOptions pipelineOptions) throws IOException {
       return new DatastoreReader(this, getDatastore(pipelineOptions));
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index 6ff99a5f8a457..f1ea42c61e5a7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -16,7 +16,6 @@
 
 import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.common.collect.ImmutableList;
@@ -155,8 +154,7 @@ public final FileBasedSource<T> createSourceForSubrange(long start, long end) {
    * source assuming the source represents a single file. File patterns will be handled by
    * {@code FileBasedSource} implementation automatically.
    */
-  public abstract FileBasedReader<T> createSingleFileReader(PipelineOptions options,
-                                                            ExecutionContext executionContext);
+  public abstract FileBasedReader<T> createSingleFileReader(PipelineOptions options);
 
   @Override
   public final long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
@@ -232,8 +230,7 @@ protected boolean isSplittable() throws Exception {
   }
 
   @Override
-  public final BoundedReader<T> createReader(PipelineOptions options,
-                                             ExecutionContext executionContext) throws IOException {
+  public final BoundedReader<T> createReader(PipelineOptions options) throws IOException {
     // Validate the current source prior to creating a reader for it.
     this.validate();
 
@@ -249,14 +246,14 @@ public final BoundedReader<T> createReader(PipelineOptions options,
           LOG.warn("Failed to get size of " + fileName, e);
           endOffset = Long.MAX_VALUE;
         }
-        fileReaders.add(createForSubrangeOfFile(fileName, 0, endOffset).createSingleFileReader(
-            options, executionContext));
+        fileReaders.add(
+            createForSubrangeOfFile(fileName, 0, endOffset).createSingleFileReader(options));
       }
       LOG.debug("Creating a reader for file pattern " + fileOrPatternSpec + " took "
           + (System.currentTimeMillis() - startTime) + " ms");
       return new FilePatternReader(this, fileReaders);
     } else {
-      return createSingleFileReader(options, executionContext);
+      return createSingleFileReader(options);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
index f58c7b5f2b104..dcc8fe5691373 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
@@ -18,7 +18,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.JAXBCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 
 import org.codehaus.stax2.XMLInputFactory2;
 
@@ -181,8 +180,7 @@ public FileBasedSource<T> createForSubrangeOfFile(String fileName, long start, l
   }
 
   @Override
-  public FileBasedReader<T> createSingleFileReader(
-      PipelineOptions options, ExecutionContext executionContext) {
+  public FileBasedReader<T> createSingleFileReader(PipelineOptions options) {
     return new XMLReader<T>(this);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index b56c9719e7725..34fd377062080 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -160,7 +160,7 @@ public OperationResponse performSourceOperation(OperationRequest request) throws
    */
   public static <T> Reader<WindowedValue<T>> create(
       final PipelineOptions options, final CloudObject spec, Coder<WindowedValue<T>> coder,
-      final ExecutionContext executionContext) throws Exception {
+      ExecutionContext executionContext) throws Exception {
     // The parameter "coder" is deliberately never used. It is an artifact of ReaderFactory:
     // some readers need a coder, some don't (i.e. for some it doesn't even make sense),
     // but ReaderFactory passes it to all readers anyway.
@@ -170,7 +170,7 @@ public static <T> Reader<WindowedValue<T>> create(
         @Override
         public Reader.ReaderIterator<WindowedValue<T>> iterator() throws IOException {
           return new BoundedReaderIterator<>(
-              ((BoundedSource<T>) source).createReader(options, executionContext));
+              ((BoundedSource<T>) source).createReader(options));
         }
       };
     } else if (source instanceof UnboundedSource) {
@@ -376,7 +376,7 @@ public static <T> void evaluateReadHelper(
       List<DirectPipelineRunner.ValueWithMetadata<T>> output = new ArrayList<>();
       BoundedSource<T> source = transform.getSource();
       try (BoundedSource.BoundedReader<T> reader =
-          source.createReader(context.getPipelineOptions(), null)) {
+          source.createReader(context.getPipelineOptions())) {
         for (boolean available = reader.start(); available; available = reader.advance()) {
           output.add(
               DirectPipelineRunner.ValueWithMetadata.of(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
index 608f3609fe5c7..1c6f4249464ff 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
@@ -25,7 +25,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -36,8 +35,6 @@
 import java.util.List;
 import java.util.NoSuchElementException;
 
-import javax.annotation.Nullable;
-
 /**
  * Tests code common to all offset-based sources.
  */
@@ -86,9 +83,7 @@ public long getMaxEndOffset(PipelineOptions options) {
     }
 
     @Override
-    public BoundedReader<Integer> createReader(
-        PipelineOptions options, @Nullable ExecutionContext executionContext)
-        throws IOException {
+    public BoundedReader<Integer> createReader(PipelineOptions options) throws IOException {
       return new CoarseByteRangeReader(this);
     }
   }
@@ -195,7 +190,7 @@ public void testReadingGranularityAndFractionConsumed() throws IOException {
     // in the face of that.
     PipelineOptions options = PipelineOptionsFactory.create();
     CoarseByteRangeSource source = new CoarseByteRangeSource(13, 35, 1, 10);
-    try (BoundedSource.BoundedReader<Integer> reader = source.createReader(options, null)) {
+    try (BoundedSource.BoundedReader<Integer> reader = source.createReader(options)) {
       List<Integer> items = new ArrayList<>();
 
       assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
@@ -215,7 +210,7 @@ public void testReadingGranularityAndFractionConsumed() throws IOException {
 
       source = new CoarseByteRangeSource(13, 17, 1, 10);
     }
-    try (BoundedSource.BoundedReader<Integer> reader = source.createReader(options, null)) {
+    try (BoundedSource.BoundedReader<Integer> reader = source.createReader(options)) {
       assertFalse(reader.start());
     }
   }
@@ -224,8 +219,7 @@ public void testReadingGranularityAndFractionConsumed() throws IOException {
   public void testSplitAtFraction() throws IOException {
     PipelineOptions options = PipelineOptionsFactory.create();
     CoarseByteRangeSource source = new CoarseByteRangeSource(13, 35, 1, 10);
-    try (CoarseByteRangeReader reader =
-            (CoarseByteRangeReader) source.createReader(options, null)) {
+    try (CoarseByteRangeReader reader = (CoarseByteRangeReader) source.createReader(options)) {
       List<Integer> originalItems = new ArrayList<>();
       assertTrue(reader.start());
       originalItems.add(reader.getCurrent());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
index e762364691f74..8193b89e41376 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
@@ -23,7 +23,6 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.primitives.Bytes;
 
@@ -194,8 +193,7 @@ public FileBasedSource<Byte> createForSubrangeOfFile(String fileName, long start
     }
 
     @Override
-    public ByteReader createSingleFileReader(
-        PipelineOptions options, ExecutionContext executionContext) {
+    public ByteReader createSingleFileReader(PipelineOptions options) {
       return new ByteReader(this);
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index 3eeaedd2a781c..15a49a0bdcef3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -35,7 +35,6 @@
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -120,8 +119,7 @@ public FileBasedSource<String> createForSubrangeOfFile(String fileName, long sta
     }
 
     @Override
-    public FileBasedReader<String> createSingleFileReader(
-        PipelineOptions options, ExecutionContext executionContext) {
+    public FileBasedReader<String> createSingleFileReader(PipelineOptions options) {
       if (splitHeader == null) {
         return new TestReader(this);
       } else {
@@ -390,7 +388,7 @@ public void testCloseUnstartedFilePatternReader() throws IOException {
 
     TestFileBasedSource source =
         new TestFileBasedSource(new File(file1.getParent(), "file*").getPath(), 64, null);
-    Reader<String> reader = source.createReader(options, null);
+    Reader<String> reader = source.createReader(options);
     // Closing an unstarted FilePatternReader should not throw an exception.
     try {
       reader.close();
@@ -412,7 +410,7 @@ public void testFractionConsumedWhenReadingFilepattern() throws IOException {
 
     TestFileBasedSource source =
         new TestFileBasedSource(file1.getParent() + "/" + "file*", 1024, null);
-    try (BoundedSource.BoundedReader<String> reader = source.createReader(null, null)) {
+    try (BoundedSource.BoundedReader<String> reader = source.createReader(null)) {
       double lastFractionConsumed = 0.0;
       assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
       assertTrue(reader.start());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
index 2135da553a89d..6803ca0091713 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
@@ -37,7 +37,7 @@ public class SourceTestUtils {
    */
   public static <T> List<T> readFromSource(BoundedSource<T> source, PipelineOptions options)
       throws IOException {
-    try (BoundedSource.BoundedReader<T> reader = source.createReader(options, null)) {
+    try (BoundedSource.BoundedReader<T> reader = source.createReader(options)) {
       return readFromUnstartedReader(reader);
     }
   }
@@ -144,7 +144,7 @@ public static <T> SplitAtFractionResult assertSplitAtFractionBehavior(
       BoundedSource<T> source, int numItemsToReadBeforeSplit, double splitFraction,
       ExpectedSplitOutcome expectedOutcome, PipelineOptions options) throws IOException {
     List<T> expectedItems = readFromSource(source, options);
-    try (BoundedSource.BoundedReader<T> reader = source.createReader(options, null)) {
+    try (BoundedSource.BoundedReader<T> reader = source.createReader(options)) {
       List<T> currentItems = new ArrayList<>();
       currentItems.addAll(readNItemsFromUnstartedReader(reader, numItemsToReadBeforeSplit));
       BoundedSource<T> residual = reader.splitAtFraction(splitFraction);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
index 0cba556daafa2..85eeb37eacee3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
@@ -277,7 +277,7 @@ public void testReadXMLTiny() throws IOException {
     assertThat(
         trainsToStrings(expectedResults),
         containsInAnyOrder(
-            trainsToStrings(readEverythingFromReader(source.createReader(null, null))).toArray()));
+            trainsToStrings(readEverythingFromReader(source.createReader(null))).toArray()));
   }
 
   @Test
@@ -300,7 +300,7 @@ public void testReadXMLWithMultiByteChars() throws IOException {
     assertThat(
         trainsToStrings(expectedResults),
         containsInAnyOrder(
-            trainsToStrings(readEverythingFromReader(source.createReader(null, null))).toArray()));
+            trainsToStrings(readEverythingFromReader(source.createReader(null))).toArray()));
   }
 
   @Test
@@ -323,7 +323,7 @@ public void testReadXMLWithMultiByteElementName() throws IOException {
     assertThat(
         trainsToStrings(expectedResults),
         containsInAnyOrder(
-            trainsToStrings(readEverythingFromReader(source.createReader(null, null))).toArray()));
+            trainsToStrings(readEverythingFromReader(source.createReader(null))).toArray()));
   }
 
   @Test
@@ -343,7 +343,7 @@ public void testSplitWithEmptyBundleAtEnd() throws Exception {
 
     List<Train> results = new ArrayList<>();
     for (FileBasedSource<Train> split : splits) {
-      results.addAll(readEverythingFromReader(split.createReader(null, null)));
+      results.addAll(readEverythingFromReader(split.createReader(null)));
     }
 
     List<Train> expectedResults = ImmutableList.of(
@@ -383,7 +383,7 @@ public void testReadXMLSmall() throws IOException {
     assertThat(
         trainsToStrings(expectedResults),
         containsInAnyOrder(
-            trainsToStrings(readEverythingFromReader(source.createReader(null, null))).toArray()));
+            trainsToStrings(readEverythingFromReader(source.createReader(null))).toArray()));
   }
 
   @Test
@@ -399,7 +399,7 @@ public void testReadXMLNoRootElement() throws IOException {
     exception.expect(NullPointerException.class);
     exception.expectMessage(
         "rootElement is null. Use builder method withRootElement() to set this.");
-    readEverythingFromReader(source.createReader(null, null));
+    readEverythingFromReader(source.createReader(null));
   }
 
   @Test
@@ -415,7 +415,7 @@ public void testReadXMLNoRecordElement() throws IOException {
     exception.expect(NullPointerException.class);
     exception.expectMessage(
         "recordElement is null. Use builder method withRecordElement() to set this.");
-    readEverythingFromReader(source.createReader(null, null));
+    readEverythingFromReader(source.createReader(null));
   }
 
   @Test
@@ -431,7 +431,7 @@ public void testReadXMLNoRecordClass() throws IOException {
     exception.expect(NullPointerException.class);
     exception.expectMessage(
         "recordClass is null. Use builder method withRecordClass() to set this.");
-    readEverythingFromReader(source.createReader(null, null));
+    readEverythingFromReader(source.createReader(null));
   }
 
   @Test
@@ -446,7 +446,7 @@ public void testReadXMLIncorrectRootElement() throws IOException {
             .withRecordClass(Train.class);
 
     exception.expectMessage("Unexpected close tag </trains>; expected </something>.");
-    readEverythingFromReader(source.createReader(null, null));
+    readEverythingFromReader(source.createReader(null));
   }
 
   @Test
@@ -460,7 +460,7 @@ public void testReadXMLIncorrectRecordElement() throws IOException {
             .withRecordElement("something")
             .withRecordClass(Train.class);
 
-    assertEquals(readEverythingFromReader(source.createReader(null, null)), new ArrayList<Train>());
+    assertEquals(readEverythingFromReader(source.createReader(null)), new ArrayList<Train>());
   }
 
   @XmlRootElement
@@ -483,7 +483,7 @@ public void testReadXMLInvalidRecordClass() throws IOException {
 
     // JAXB internationalizes the error message. So this is all we can match for.
     exception.expectMessage(both(containsString("name")).and(Matchers.containsString("something")));
-    try (Reader<WrongTrainType> reader = source.createReader(null, null)) {
+    try (Reader<WrongTrainType> reader = source.createReader(null)) {
 
       List<WrongTrainType> results = new ArrayList<>();
       for (boolean available = reader.start(); available; available = reader.advance()) {
@@ -512,7 +512,7 @@ public void testReadXMLNoBundleSize() throws IOException {
     assertThat(
         trainsToStrings(expectedResults),
         containsInAnyOrder(
-            trainsToStrings(readEverythingFromReader(source.createReader(null, null))).toArray()));
+            trainsToStrings(readEverythingFromReader(source.createReader(null))).toArray()));
   }
 
 
@@ -536,7 +536,7 @@ public void testReadXMLWithEmptyTags() throws IOException {
     assertThat(
         trainsToStrings(expectedResults),
         containsInAnyOrder(
-            trainsToStrings(readEverythingFromReader(source.createReader(null, null))).toArray()));
+            trainsToStrings(readEverythingFromReader(source.createReader(null))).toArray()));
   }
 
   @Test
@@ -584,7 +584,7 @@ public void testReadXMLWithAttributes() throws IOException {
     assertThat(
         trainsToStrings(expectedResults),
         containsInAnyOrder(
-            trainsToStrings(readEverythingFromReader(source.createReader(null, null))).toArray()));
+            trainsToStrings(readEverythingFromReader(source.createReader(null))).toArray()));
   }
 
   @Test
@@ -607,7 +607,7 @@ public void testReadXMLWithWhitespaces() throws IOException {
     assertThat(
         trainsToStrings(expectedResults),
         containsInAnyOrder(
-            trainsToStrings(readEverythingFromReader(source.createReader(null, null))).toArray()));
+            trainsToStrings(readEverythingFromReader(source.createReader(null))).toArray()));
   }
 
   @Test
@@ -626,7 +626,7 @@ public void testReadXMLLarge() throws IOException {
     assertThat(
         trainsToStrings(trains),
         containsInAnyOrder(
-            trainsToStrings(readEverythingFromReader(source.createReader(null, null))).toArray()));
+            trainsToStrings(readEverythingFromReader(source.createReader(null))).toArray()));
   }
 
   @Test
@@ -666,7 +666,7 @@ public void testSplitWithEmptyBundles() throws Exception {
 
     List<Train> results = new ArrayList<>();
     for (FileBasedSource<Train> split : splits) {
-      results.addAll(readEverythingFromReader(split.createReader(null, null)));
+      results.addAll(readEverythingFromReader(split.createReader(null)));
     }
 
     assertThat(trainsToStrings(trains), containsInAnyOrder(trainsToStrings(results).toArray()));
@@ -691,7 +691,7 @@ public void testXMLWithSplits() throws Exception {
 
     List<Train> results = new ArrayList<>();
     for (FileBasedSource<Train> split : splits) {
-      results.addAll(readEverythingFromReader(split.createReader(null, null)));
+      results.addAll(readEverythingFromReader(split.createReader(null)));
     }
     assertThat(trainsToStrings(trains), containsInAnyOrder(trainsToStrings(results).toArray()));
   }
@@ -713,7 +713,7 @@ public void testSplitAtFraction() throws Exception {
     List<? extends FileBasedSource<Train>> splits =
         fileSource.splitIntoBundles(file.length() / 3, null);
     for (BoundedSource<Train> splitSource : splits) {
-      int numItems = readEverythingFromReader(splitSource.createReader(null, null)).size();
+      int numItems = readEverythingFromReader(splitSource.createReader(null)).size();
       // Should not split while unstarted.
       assertSplitAtFractionFails(splitSource, 0, 0.7, options);
       assertSplitAtFractionSucceedsAndConsistent(splitSource, 1, 0.7, options);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 2ab713d157260..cb82f0f44313f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -74,7 +74,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
@@ -103,8 +102,6 @@
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.TimeUnit;
 
-import javax.annotation.Nullable;
-
 /**
  * Tests for {@code BasicSerializableSourceFormat}.
  */
@@ -160,8 +157,7 @@ public boolean producesSortedKeys(PipelineOptions options) throws Exception {
       }
 
       @Override
-      public BoundedReader<Integer> createReader(
-          PipelineOptions options, @Nullable ExecutionContext executionContext) throws IOException {
+      public BoundedReader<Integer> createReader(PipelineOptions options) throws IOException {
         return new RangeReader(this);
       }
 
@@ -314,7 +310,7 @@ public void testRangeProgressAndSplitAtFraction() throws Exception {
     DataflowPipelineOptions options =
         PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
     TestIO.Read source = TestIO.fromRange(10, 20);
-    try (BoundedSource.BoundedReader<Integer> reader = source.createReader(options, null)) {
+    try (BoundedSource.BoundedReader<Integer> reader = source.createReader(options)) {
       assertEquals(0, reader.getFractionConsumed().intValue());
       assertTrue(reader.start());
       assertEquals(0.1, reader.getFractionConsumed(), 1e-6);
@@ -423,8 +419,7 @@ public long getEstimatedSizeBytes(PipelineOptions options) {
     }
 
     @Override
-    public BoundedReader<Integer> createReader(PipelineOptions options, ExecutionContext context)
-        throws IOException {
+    public BoundedReader<Integer> createReader(PipelineOptions options) throws IOException {
       throw new UnsupportedOperationException();
     }
 
@@ -536,8 +531,7 @@ private static class SourceProducingFailingReader extends MockSource {
     private static final long serialVersionUID = -1288303253742972653L;
 
     @Override
-    public BoundedReader<Integer> createReader(
-        PipelineOptions options, @Nullable ExecutionContext executionContext) throws IOException {
+    public BoundedReader<Integer> createReader(PipelineOptions options) throws IOException {
       return new FailingReader(this);
     }
 

From 83b4e855381db3dfa62032e9a55661f7d0baa500 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Tue, 21 Jul 2015 14:35:02 -0700
Subject: [PATCH 0788/1541] Count Total Elements as SUM(the windows size for
 each WindowedValue)

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98774347
---
 .../runners/worker/DataflowOutputCounter.java |  92 +++++++++++
 .../worker/MapTaskExecutorFactory.java        |  14 +-
 .../sdk/runners/worker/ParDoFnBase.java       |   7 +-
 .../util/common/worker/ElementCounter.java    |  32 ++++
 .../worker/OutputObjectAndByteCounter.java    | 155 ++++++++++++++++++
 .../util/common/worker/OutputReceiver.java    | 155 ++----------------
 .../worker/MapTaskExecutorFactoryTest.java    |  31 ++--
 .../sdk/runners/worker/NormalParDoFnTest.java |  14 +-
 .../util/common/worker/ExecutorTestUtils.java |  62 +------
 .../common/worker/FlattenOperationTest.java   |  10 +-
 .../common/worker/MapTaskExecutorTest.java    |   5 +-
 .../OutputObjectAndByteCounterTest.java       |  83 ++++++++++
 .../common/worker/OutputReceiverTest.java     |  77 ++-------
 .../common/worker/ParDoOperationTest.java     |  22 +--
 .../PartialGroupByKeyOperationTest.java       |  31 ++--
 .../util/common/worker/ReadOperationTest.java |  22 +--
 .../common/worker/TestOutputReceiver.java     | 117 +++++++++++++
 17 files changed, 593 insertions(+), 336 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowOutputCounter.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ElementCounter.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounter.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounterTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/TestOutputReceiver.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowOutputCounter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowOutputCounter.java
new file mode 100644
index 0000000000000..cd0323d08bce1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowOutputCounter.java
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservable;
+import com.google.cloud.dataflow.sdk.util.common.worker.ElementCounter;
+import com.google.cloud.dataflow.sdk.util.common.worker.OutputObjectAndByteCounter;
+import com.google.common.annotations.VisibleForTesting;
+
+/**
+ * A Dataflow-specific version of {@link ElementCounter}, which specifies
+ * the object counter name differently as PhysicalElementCount.
+ * Additionally, it counts element windows as ElementCount.
+ */
+public class DataflowOutputCounter implements ElementCounter {
+  /** Number of physical element and multiple-window assignments that were serialized/processed. */
+  private static final String OBJECT_COUNTER_NAME = "-PhysicalElementCount";
+  /** Number of logical element and single window pairs that were processed. */
+  private static final String ELEMENT_COUNTER_NAME = "-ElementCount";
+  private static final String MEAN_BYTE_COUNTER_NAME = "-MeanByteCount";
+
+  private OutputObjectAndByteCounter objectAndByteCounter;
+  private Counter<Long> elementCount;
+
+  public DataflowOutputCounter(String outputName, CounterSet.AddCounterMutator addCounterMutator) {
+    this(outputName, null, addCounterMutator);
+  }
+
+  public DataflowOutputCounter(
+      String outputName, ElementByteSizeObservable<?> elementByteSizeObservable,
+      CounterSet.AddCounterMutator addCounterMutator) {
+    objectAndByteCounter =
+        new OutputObjectAndByteCounter(elementByteSizeObservable, addCounterMutator);
+    objectAndByteCounter.countObject(outputName + OBJECT_COUNTER_NAME);
+    objectAndByteCounter.countMeanByte(outputName + MEAN_BYTE_COUNTER_NAME);
+    elementCount =
+        addCounterMutator.addCounter(Counter.longs(outputName + ELEMENT_COUNTER_NAME, SUM));
+  }
+
+  @Override
+  public void update(Object elem) throws Exception {
+    objectAndByteCounter.update(elem);
+    long windowsSize = ((WindowedValue<?>) elem).getWindows().size();
+    if (windowsSize == 0) {
+      // GroupingShuffleReader produces ValueInEmptyWindows.
+      // For now, we count the element at least once to keep the current counter
+      // behavior.
+      elementCount.addValue(1L);
+    } else {
+      elementCount.addValue(windowsSize);
+    }
+  }
+
+  @Override
+  public void finishLazyUpdate(Object elem) {
+    objectAndByteCounter.finishLazyUpdate(elem);
+  }
+
+  @VisibleForTesting
+  static String getElementCounterName(String prefix) {
+    return prefix + ELEMENT_COUNTER_NAME;
+  }
+
+  @VisibleForTesting
+  static String getObjectCounterName(String prefix) {
+    return prefix + OBJECT_COUNTER_NAME;
+  }
+
+  @VisibleForTesting
+  static String getMeanByteCounterName(String prefix) {
+    return prefix + MEAN_BYTE_COUNTER_NAME;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index ee0725861911d..d1b7f091a7934 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -43,6 +43,7 @@
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservable;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.cloud.dataflow.sdk.util.common.worker.ElementCounter;
 import com.google.cloud.dataflow.sdk.util.common.worker.FlattenOperation;
 import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
 import com.google.cloud.dataflow.sdk.util.common.worker.Operation;
@@ -386,10 +387,15 @@ static OutputReceiver[] createOutputReceivers(ParallelInstruction instruction,
     OutputReceiver[] receivers = new OutputReceiver[numOutputs];
     for (int i = 0; i < numOutputs; i++) {
       InstructionOutput cloudOutput = instruction.getOutputs().get(i);
-      receivers[i] = new OutputReceiver(cloudOutput.getName(),
-          new ElementByteSizeObservableCoder(Serializer.deserialize(
-              cloudOutput.getCodec(), Coder.class)),
-          counterPrefix, addCounterMutator);
+      receivers[i] = new OutputReceiver();
+
+      @SuppressWarnings("unchecked")
+      ElementCounter outputCounter = new DataflowOutputCounter(
+          cloudOutput.getName(),
+          new ElementByteSizeObservableCoder<>(
+              Serializer.deserialize(cloudOutput.getCodec(), Coder.class)),
+          addCounterMutator);
+      receivers[i].addOutputCounter(outputCounter);
     }
     return receivers;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
index 10dd8b15fa96b..849577a2ea990 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
@@ -29,6 +29,7 @@
 import com.google.cloud.dataflow.sdk.util.StreamingSideInputDoFnRunner;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.ElementCounter;
 import com.google.cloud.dataflow.sdk.util.common.worker.OutputReceiver;
 import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
 import com.google.cloud.dataflow.sdk.util.common.worker.Receiver;
@@ -138,9 +139,9 @@ public Receiver initialize(TupleTag<?> tag) {
           // make it available to the OutputReceiver class in case
           // it wants to use it in naming output counters.  (It
           // doesn't today.)
-          String counterPrefix = "";
-          receiver = new OutputReceiver(
-              outputName, counterPrefix, addCounterMutator);
+          receiver = new OutputReceiver();
+          ElementCounter outputCounter = new DataflowOutputCounter(outputName, addCounterMutator);
+          receiver.addOutputCounter(outputCounter);
           undeclaredOutputs.put(tag, receiver);
         }
         return receiver;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ElementCounter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ElementCounter.java
new file mode 100644
index 0000000000000..eb90b06ff6f27
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ElementCounter.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+/**
+ * Abstract interface that counts elements processed.
+ */
+public interface ElementCounter {
+  /**
+   * Updates output counters.
+   */
+  public void update(Object elem) throws Exception;
+
+  /**
+   * Finishes output counters lazy updates.
+   */
+  public void finishLazyUpdate(Object elem);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounter.java
new file mode 100644
index 0000000000000..c7d737893bc6d
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounter.java
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservable;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+
+import java.util.Random;
+
+/**
+ * An {@link ElementCounter} that counts output objects, bytes, and mean bytes.
+ */
+public class OutputObjectAndByteCounter implements ElementCounter {
+  // Might be null, e.g., undeclared outputs will not have an
+  // elementByteSizeObservable.
+  private final ElementByteSizeObservable<Object> elementByteSizeObservable;
+  private final CounterSet.AddCounterMutator addCounterMutator;
+
+  private final Random randomGenerator = new Random();
+
+  // Lowest sampling probability: 0.001%.
+  private static final int SAMPLING_TOKEN_UPPER_BOUND = 1000000;
+  private static final int SAMPLING_CUTOFF = 10;
+  private int samplingToken = 0;
+
+  private Counter<Long> objectCount = null;
+  private Counter<Long> byteCount = null;
+  private Counter<Long> meanByteCount = null;
+  private ElementByteSizeObserver byteCountObserver = null;
+  private ElementByteSizeObserver meanByteCountObserver = null;
+
+  @SuppressWarnings("unchecked")
+  public OutputObjectAndByteCounter(
+      ElementByteSizeObservable<?> elementByteSizeObservable,
+      CounterSet.AddCounterMutator addCounterMutator) {
+    this.elementByteSizeObservable = (ElementByteSizeObservable<Object>) elementByteSizeObservable;
+    this.addCounterMutator = addCounterMutator;
+  }
+
+  /**
+   * Count output objects.
+   */
+  public OutputObjectAndByteCounter countObject(String objectCounterName) {
+    objectCount = addCounterMutator.addCounter(Counter.longs(objectCounterName, SUM));
+    return this;
+  }
+
+  /**
+   * Count output bytes.
+   */
+  public OutputObjectAndByteCounter countBytes(String bytesCounterName) {
+    if (elementByteSizeObservable != null) {
+      byteCount = addCounterMutator.addCounter(Counter.longs(bytesCounterName, SUM));
+      byteCountObserver = new ElementByteSizeObserver(byteCount);
+    }
+    return this;
+  }
+
+  /**
+   * Count output mean byte.
+   */
+  public OutputObjectAndByteCounter countMeanByte(String meanByteCounterName) {
+    if (elementByteSizeObservable != null) {
+      meanByteCount = addCounterMutator.addCounter(Counter.longs(meanByteCounterName, MEAN));
+      meanByteCountObserver = new ElementByteSizeObserver(meanByteCount);
+    }
+    return this;
+  }
+
+  public Counter<Long> getObjectCount() {
+    return objectCount;
+  }
+
+  public Counter<Long> getByteCount() {
+    return byteCount;
+  }
+
+  public Counter<Long> getMeanByteCount() {
+    return meanByteCount;
+  }
+
+  @Override
+  public void update(Object elem) throws Exception {
+    // Increment object counter.
+    if (objectCount != null) {
+      objectCount.addValue(1L);
+    }
+
+    // Increment byte counter.
+    if ((byteCountObserver != null || meanByteCountObserver != null)
+        && (sampleElement() || elementByteSizeObservable.isRegisterByteSizeObserverCheap(elem))) {
+      if (byteCountObserver != null) {
+        elementByteSizeObservable.registerByteSizeObserver(elem, byteCountObserver);
+      }
+      if (meanByteCountObserver != null) {
+        elementByteSizeObservable.registerByteSizeObserver(elem, meanByteCountObserver);
+      }
+
+      if (byteCountObserver != null && !byteCountObserver.getIsLazy()) {
+        byteCountObserver.advance();
+      }
+      if (meanByteCountObserver != null && !meanByteCountObserver.getIsLazy()) {
+        meanByteCountObserver.advance();
+      }
+    }
+  }
+
+  @Override
+  public void finishLazyUpdate(Object elem) {
+    // Advance lazy ElementByteSizeObservers, if any.
+    // Note that user's code is allowed to store the element of one
+    // DoFn.processElement() call and access it later on. We are still
+    // calling next() here, causing an update to byteCount. If user's
+    // code really accesses more element's pieces later on, their byte
+    // count would accrue against a future element. This is not ideal,
+    // but still approximately correct.
+    if (byteCountObserver != null && byteCountObserver.getIsLazy()) {
+      byteCountObserver.advance();
+    }
+    if (meanByteCountObserver != null && meanByteCountObserver.getIsLazy()) {
+      meanByteCountObserver.advance();
+    }
+  }
+
+  protected boolean sampleElement() {
+    // Sampling probability decreases as the element count is increasing.
+    // We unconditionally sample the first samplingCutoff elements. For the
+    // next samplingCutoff elements, the sampling probability drops from 100%
+    // to 50%. The probability of sampling the Nth element is:
+    // min(1, samplingCutoff / N), with an additional lower bound of
+    // samplingCutoff / samplingTokenUpperBound. This algorithm may be refined
+    // later.
+    samplingToken = Math.min(samplingToken + 1, SAMPLING_TOKEN_UPPER_BOUND);
+    return randomGenerator.nextInt(samplingToken) < SAMPLING_CUTOFF;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiver.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiver.java
index 7d7ae0dfb114c..12c77f75714a5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiver.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiver.java
@@ -16,85 +16,17 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservable;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Random;
 
 /**
  * Receiver that forwards each input it receives to each of a list of
- * output Receivers. Additionally, it tracks output counters, that is, size
+ * output Receivers. Additionally, it invokes output counters who track size
  * information for elements passing through.
  */
 public class OutputReceiver implements Receiver {
-  private final String outputName;
-  // Might be null, e.g., undeclared outputs will not have an
-  // elementByteSizeObservable.
-  private final ElementByteSizeObservable<Object> elementByteSizeObservable;
-  private final Counter<Long> elementCount;
-  private Counter<Long> byteCount = null;
-  private Counter<Long> meanByteCount = null;
-  private ElementByteSizeObserver byteCountObserver = null;
-  private ElementByteSizeObserver meanByteCountObserver = null;
   private final List<Receiver> outputs = new ArrayList<>();
-  private final Random randomGenerator = new Random();
-  private int samplingToken = 0;
-  private final int samplingTokenUpperBound = 1000000;  // Lowest sampling probability: 0.001%.
-  private final int samplingCutoff = 10;
-
-  public OutputReceiver(String outputName,
-                        String counterPrefix,
-                        CounterSet.AddCounterMutator addCounterMutator) {
-    this(outputName, null, counterPrefix, addCounterMutator);
-  }
-
-  @SuppressWarnings("unchecked")
-  public OutputReceiver(String outputName,
-                        ElementByteSizeObservable<?> elementByteSizeObservable,
-                        String counterPrefix,
-                        CounterSet.AddCounterMutator addCounterMutator) {
-    this.outputName = outputName;
-    this.elementByteSizeObservable = (ElementByteSizeObservable<Object>) elementByteSizeObservable;
-
-    elementCount = addCounterMutator.addCounter(
-        Counter.longs(elementsCounterName(counterPrefix, outputName), SUM));
-
-    if (elementByteSizeObservable != null) {
-      String bytesCounterName = bytesCounterName(counterPrefix, outputName);
-      if (bytesCounterName != null) {
-        byteCount = addCounterMutator.addCounter(
-            Counter.longs(bytesCounterName, SUM));
-        byteCountObserver = new ElementByteSizeObserver(byteCount);
-      }
-      String meanBytesCounterName =
-          meanBytesCounterName(counterPrefix, outputName);
-      if (meanBytesCounterName != null) {
-        meanByteCount = addCounterMutator.addCounter(
-            Counter.longs(meanBytesCounterName, MEAN));
-        meanByteCountObserver = new ElementByteSizeObserver(meanByteCount);
-      }
-    }
-  }
-
-  protected String elementsCounterName(String counterPrefix,
-                                       String outputName) {
-    return outputName + "-ElementCount";
-  }
-  protected String bytesCounterName(String counterPrefix,
-                                    String outputName) {
-    return null;
-  }
-  protected String meanBytesCounterName(String counterPrefix,
-                                        String outputName) {
-    return outputName + "-MeanByteCount";
-  }
+  private final List<ElementCounter> outputCounters = new ArrayList<>();
 
   /**
    * Adds a new receiver that this OutputReceiver forwards to.
@@ -103,42 +35,15 @@ public void addOutput(Receiver receiver) {
     outputs.add(receiver);
   }
 
-  @Override
-  public void process(Object elem) throws Exception {
-    // Increment element counter.
-    elementCount.addValue(1L);
-
-    // Increment byte counter.
-    boolean advanceByteCountObserver = false;
-    boolean advanceMeanByteCountObserver = false;
-    if ((byteCountObserver != null || meanByteCountObserver != null)
-        && (sampleElement()
-            || elementByteSizeObservable.isRegisterByteSizeObserverCheap(
-                elem))) {
+  public void addOutputCounter(ElementCounter outputCounter) {
+    outputCounters.add(outputCounter);
+  }
 
-      if (byteCountObserver != null) {
-        elementByteSizeObservable.registerByteSizeObserver(
-            elem, byteCountObserver);
-      }
-      if (meanByteCountObserver != null) {
-        elementByteSizeObservable.registerByteSizeObserver(
-            elem, meanByteCountObserver);
-      }
 
-      if (byteCountObserver != null) {
-        if (!byteCountObserver.getIsLazy()) {
-          byteCountObserver.advance();
-        } else {
-          advanceByteCountObserver = true;
-        }
-      }
-      if (meanByteCountObserver != null) {
-        if (!meanByteCountObserver.getIsLazy()) {
-          meanByteCountObserver.advance();
-        } else {
-          advanceMeanByteCountObserver = true;
-        }
-      }
+  @Override
+  public void process(Object elem) throws Exception {
+    for (ElementCounter counter : outputCounters) {
+      counter.update(elem);
     }
 
     // Fan-out.
@@ -148,47 +53,9 @@ public void process(Object elem) throws Exception {
       }
     }
 
-    // Advance lazy ElementByteSizeObservers, if any.
-    // Note that user's code is allowed to store the element of one
-    // DoFn.processElement() call and access it later on. We are still
-    // calling next() here, causing an update to byteCount. If user's
-    // code really accesses more element's pieces later on, their byte
-    // count would accrue against a future element. This is not ideal,
-    // but still approximately correct.
-    if (advanceByteCountObserver) {
-      byteCountObserver.advance();
+    for (ElementCounter counter : outputCounters) {
+      counter.finishLazyUpdate(elem);
     }
-    if (advanceMeanByteCountObserver) {
-      meanByteCountObserver.advance();
-    }
-  }
-
-  public String getName() {
-    return outputName;
-  }
-
-  public Counter<Long> getElementCount() {
-    return elementCount;
-  }
-
-  public Counter<Long> getByteCount() {
-    return byteCount;
-  }
-
-  public Counter<Long> getMeanByteCount() {
-    return meanByteCount;
-  }
-
-  protected boolean sampleElement() {
-    // Sampling probability decreases as the element count is increasing.
-    // We unconditionally sample the first samplingCutoff elements. For the
-    // next samplingCutoff elements, the sampling probability drops from 100%
-    // to 50%. The probability of sampling the Nth element is:
-    // min(1, samplingCutoff / N), with an additional lower bound of
-    // samplingCutoff / samplingTokenUpperBound. This algorithm may be refined
-    // later.
-    samplingToken = Math.min(samplingToken + 1, samplingTokenUpperBound);
-    return randomGenerator.nextInt(samplingToken) < samplingCutoff;
   }
 
   /** Invoked by tests only. */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index 4a481a82be966..f4c35b9c34e16 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -16,6 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.runners.worker.DataflowOutputCounter.getElementCounterName;
+import static com.google.cloud.dataflow.sdk.runners.worker.DataflowOutputCounter.getMeanByteCounterName;
+import static com.google.cloud.dataflow.sdk.runners.worker.DataflowOutputCounter.getObjectCounterName;
 import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
 import static com.google.cloud.dataflow.sdk.util.Structs.addString;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
@@ -114,24 +117,31 @@ public void testCreateMapTaskExecutor() throws Exception {
         (Counter<Long>) counterSet.getExistingCounter("test-other-msecs");
 
     assertEquals(
-        new CounterSet(Counter.longs("read_output_name-ElementCount", SUM).resetToValue(0L),
-            Counter.longs("read_output_name-MeanByteCount", MEAN).resetMeanToValue(0, 0L),
+        new CounterSet(
+            Counter.longs(getElementCounterName("read_output_name"), SUM).resetToValue(0L),
+            Counter.longs(getObjectCounterName("read_output_name"), SUM).resetToValue(0L),
+            Counter.longs(getMeanByteCounterName("read_output_name"), MEAN).resetMeanToValue(0, 0L),
             Counter.longs("Read-ByteCount", SUM).resetToValue(0L),
             Counter.longs("test-Read-start-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Read-process-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Read-finish-msecs", SUM).resetToValue(0L),
-            Counter.longs("DoFn1_output-ElementCount", SUM).resetToValue(0L),
-            Counter.longs("DoFn1_output-MeanByteCount", MEAN).resetMeanToValue(0, 0L),
+            Counter.longs(getElementCounterName("DoFn1_output"), SUM).resetToValue(0L),
+            Counter.longs(getObjectCounterName("DoFn1_output"), SUM).resetToValue(0L),
+            Counter.longs(getMeanByteCounterName("DoFn1_output"), MEAN).resetMeanToValue(0, 0L),
             Counter.longs("test-DoFn1-start-msecs", SUM).resetToValue(0L),
             Counter.longs("test-DoFn1-process-msecs", SUM).resetToValue(0L),
             Counter.longs("test-DoFn1-finish-msecs", SUM).resetToValue(0L),
-            Counter.longs("DoFnWithContext_output-ElementCount", SUM).resetToValue(0L),
-            Counter.longs("DoFnWithContext_output-MeanByteCount", MEAN).resetMeanToValue(0, 0L),
+            Counter.longs(getElementCounterName("DoFnWithContext_output"), SUM).resetToValue(0L),
+            Counter.longs(getObjectCounterName("DoFnWithContext_output"), SUM).resetToValue(0L),
+            Counter.longs(
+                getMeanByteCounterName("DoFnWithContext_output"), MEAN).resetMeanToValue(0, 0L),
             Counter.longs("test-DoFnWithContext-start-msecs", SUM).resetToValue(0L),
             Counter.longs("test-DoFnWithContext-process-msecs", SUM).resetToValue(0L),
             Counter.longs("test-DoFnWithContext-finish-msecs", SUM).resetToValue(0L),
-            Counter.longs("flatten_output_name-ElementCount", SUM).resetToValue(0L),
-            Counter.longs("flatten_output_name-MeanByteCount", MEAN).resetMeanToValue(0, 0L),
+            Counter.longs(getElementCounterName("flatten_output_name"), SUM).resetToValue(0L),
+            Counter.longs(getObjectCounterName("flatten_output_name"), SUM).resetToValue(0L),
+            Counter.longs(
+                getMeanByteCounterName("flatten_output_name"), MEAN).resetMeanToValue(0, 0L),
             Counter.longs("test-Flatten-start-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Flatten-process-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Flatten-finish-msecs", SUM).resetToValue(0L),
@@ -208,11 +218,12 @@ public void testCreateReadOperation() throws Exception {
     assertEquals(
         new CounterSet(
             Counter.longs("test-Read-start-msecs", SUM).resetToValue(0L),
-            Counter.longs("read_output_name-MeanByteCount", MEAN).resetMeanToValue(0, 0L),
             Counter.longs("Read-ByteCount", SUM).resetToValue(0L),
             Counter.longs("test-Read-finish-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Read-process-msecs", SUM),
-            Counter.longs("read_output_name-ElementCount", SUM).resetToValue(0L)),
+            Counter.longs(getMeanByteCounterName("read_output_name"), MEAN).resetMeanToValue(0, 0L),
+            Counter.longs(getElementCounterName("read_output_name"), SUM).resetToValue(0L),
+            Counter.longs(getObjectCounterName("read_output_name"), SUM).resetToValue(0L)),
         counterSet);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
index e256a4bf97d42..7c973a5f4a20a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.runners.worker.DataflowOutputCounter.getElementCounterName;
+import static com.google.cloud.dataflow.sdk.runners.worker.DataflowOutputCounter.getObjectCounterName;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
 import static org.hamcrest.CoreMatchers.containsString;
 import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
@@ -340,12 +342,12 @@ public void testUndeclaredSideOutputs() throws Exception {
 
     assertEquals(
         new CounterSet(
-            Counter.longs("implicit-undecl1-ElementCount", SUM)
-            .resetToValue(3L),
-            Counter.longs("implicit-undecl2-ElementCount", SUM)
-            .resetToValue(3L),
-            Counter.longs("implicit-undecl3-ElementCount", SUM)
-            .resetToValue(3L)),
+            Counter.longs(getElementCounterName("implicit-undecl1"), SUM).resetToValue(3L),
+            Counter.longs(getObjectCounterName("implicit-undecl1"), SUM).resetToValue(3L),
+            Counter.longs(getElementCounterName("implicit-undecl2"), SUM).resetToValue(3L),
+            Counter.longs(getObjectCounterName("implicit-undecl2"), SUM).resetToValue(3L),
+            Counter.longs(getElementCounterName("implicit-undecl3"), SUM).resetToValue(3L),
+            Counter.longs(getObjectCounterName("implicit-undecl3"), SUM).resetToValue(3L)),
         counters);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
index 63f88633aaf83..909180c499397 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
@@ -16,11 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
-import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.ElementByteSizeObservableCoder;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservable;
 
 import org.junit.Assert;
 
@@ -57,73 +55,21 @@ public TestOperation(int numOutputs) {
     TestOperation(int numOutputs, String counterPrefix,
         CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) {
       super("TestOperation",
-          createOutputReceivers(numOutputs, counterPrefix, addCounterMutator, stateSampler),
+          createOutputReceivers(numOutputs, counterPrefix, addCounterMutator),
           counterPrefix, addCounterMutator, stateSampler);
     }
 
     private static OutputReceiver[] createOutputReceivers(int numOutputs, String counterPrefix,
-        CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) {
+        CounterSet.AddCounterMutator addCounterMutator) {
       OutputReceiver[] receivers = new OutputReceiver[numOutputs];
       for (int i = 0; i < numOutputs; i++) {
-        receivers[i] =
-            new OutputReceiver("out_" + i, new ElementByteSizeObservableCoder(StringUtf8Coder.of()),
-                counterPrefix, addCounterMutator);
+        receivers[i] = new TestOutputReceiver("out_" + i,
+            new ElementByteSizeObservableCoder(StringUtf8Coder.of()), addCounterMutator);
       }
       return receivers;
     }
   }
 
-  /** An OutputReceiver that allows the output elements to be retrieved. */
-  public static class TestReceiver extends OutputReceiver {
-    List<Object> outputElems = new ArrayList<>();
-
-    public TestReceiver(CounterSet counterSet) {
-      this("test_receiver_out", counterSet);
-    }
-
-    public TestReceiver(Coder<?> coder) {
-      this(coder, new CounterSet());
-    }
-
-    public TestReceiver(Coder<?> coder, CounterSet counterSet) {
-      this("test_receiver_out", new ElementByteSizeObservableCoder(coder), counterSet, "test-");
-    }
-
-    public TestReceiver(CounterSet counterSet, String counterPrefix) {
-      this("test_receiver_out", counterSet, counterPrefix);
-    }
-
-    public TestReceiver(String outputName, CounterSet counterSet) {
-      this(outputName, counterSet, "test-");
-    }
-
-    public TestReceiver(String outputName, CounterSet counterSet, String counterPrefix) {
-      this(outputName, new ElementByteSizeObservableCoder(StringUtf8Coder.of()), counterSet,
-          counterPrefix);
-    }
-
-    public TestReceiver(ElementByteSizeObservable elementByteSizeObservable, CounterSet counterSet,
-        String counterPrefix) {
-      this("test_receiver_out", elementByteSizeObservable, counterSet, counterPrefix);
-    }
-
-    public TestReceiver(String outputName, ElementByteSizeObservable elementByteSizeObservable,
-        CounterSet counterSet, String counterPrefix) {
-      super(
-          outputName, elementByteSizeObservable, counterPrefix, counterSet.getAddCounterMutator());
-    }
-
-    @Override
-    public void process(Object elem) throws Exception {
-      super.process(elem);
-      outputElems.add(elem);
-    }
-
-    @Override
-    protected boolean sampleElement() {
-      return true;
-    }
-  }
 
   /** A {@code Reader<String>} that yields a specified set of values. */
   public static class TestReader extends Reader<String> {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java
index f7abe6884f65d..7586239d4f9bc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java
@@ -18,6 +18,8 @@
 
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getMeanByteCounterName;
+import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getObjectCounterName;
 
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -40,8 +42,7 @@ public void testRunFlattenOperation() throws Exception {
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(
         counterPrefix, counterSet.getAddCounterMutator());
-    ExecutorTestUtils.TestReceiver receiver =
-        new ExecutorTestUtils.TestReceiver(counterSet, counterPrefix);
+    TestOutputReceiver receiver = new TestOutputReceiver(counterSet);
 
     FlattenOperation flattenOperation =
         new FlattenOperation(receiver,
@@ -71,8 +72,9 @@ public void testRunFlattenOperation() throws Exception {
             Counter.longs("test-FlattenOperation-finish-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
                                    "test-FlattenOperation-finish-msecs")).getAggregate()),
-            Counter.longs("test_receiver_out-ElementCount", SUM).resetToValue(4L),
-            Counter.longs("test_receiver_out-MeanByteCount", MEAN).resetMeanToValue(4, 10L)),
+            Counter.longs(getObjectCounterName("test_receiver_out"), SUM).resetToValue(4L),
+            Counter.longs(getMeanByteCounterName("test_receiver_out"), MEAN)
+                .resetMeanToValue(4, 10L)),
         counterSet);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
index 1e02f7ead7b73..fb6baeeaf50d3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
@@ -31,7 +31,6 @@
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReceiver;
 
 import org.hamcrest.CoreMatchers;
 import org.junit.Assert;
@@ -220,7 +219,7 @@ public void testGetReadOperation() throws Exception {
 
     executor.close();
 
-    TestReceiver receiver = new TestReceiver(counterSet, counterPrefix);
+    TestOutputReceiver receiver = new TestOutputReceiver(counterSet);
     operations = Arrays.asList(new Operation[] {new TestReadOperation(
         receiver, counterPrefix, counterSet.getAddCounterMutator(), stateSampler)});
     executor = new MapTaskExecutor(operations, counterSet, stateSampler);
@@ -233,7 +232,7 @@ public void testGetProgressAndRequestSplit() throws Exception {
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    TestReceiver receiver = new TestReceiver(counterSet, counterPrefix);
+    TestOutputReceiver receiver = new TestOutputReceiver(counterSet);
     TestReadOperation operation = new TestReadOperation(
         receiver, counterPrefix, counterSet.getAddCounterMutator(), stateSampler);
     MapTaskExecutor executor =
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounterTest.java
new file mode 100644
index 0000000000000..6eed4f087bddb
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounterTest.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getMeanByteCounterName;
+import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getObjectCounterName;
+
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.Counter.CounterMean;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter;
+
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link OutputObjectAndByteCounter}.
+ */
+@RunWith(JUnit4.class)
+public class OutputObjectAndByteCounterTest {
+  @Rule
+  public final ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void testUpdate() throws Exception {
+    TestOutputCounter outputCounter = new TestOutputCounter();
+    outputCounter.update("hi");
+    outputCounter.finishLazyUpdate("hi");
+    outputCounter.update("bob");
+    outputCounter.finishLazyUpdate("bob");
+
+    CounterMean<Long> meanByteCount = outputCounter.getMeanByteCount().getMean();
+    Assert.assertEquals(5, (long) meanByteCount.getAggregate());
+    Assert.assertEquals(2, meanByteCount.getCount());
+  }
+
+  @Test
+  public void testIncorrectType() throws Exception {
+    TestOutputCounter outputCounter = new TestOutputCounter();
+    thrown.expect(ClassCastException.class);
+    outputCounter.update(5);
+  }
+
+  @Test
+  public void testNullArgument() throws Exception {
+    TestOutputCounter outputCounter = new TestOutputCounter();
+    thrown.expect(CoderException.class);
+    outputCounter.update(null);
+  }
+
+  @Test
+  public void testAddingCountersIntoCounterSet() throws Exception {
+    CounterSet counters = new CounterSet();
+    new TestOutputCounter(counters);
+
+    Assert.assertEquals(
+        new CounterSet(
+            Counter.longs(getMeanByteCounterName("output_name"), MEAN).resetMeanToValue(0, 0L),
+            Counter.longs(getObjectCounterName("output_name"), SUM).resetToValue(0L)),
+        counters);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java
index 2f71f3ef2ac10..77fe09460651a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java
@@ -16,16 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.ElementByteSizeObservableCoder;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.Counter.CounterMean;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReceiver;
+import com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter;
 
 import org.hamcrest.CoreMatchers;
 import org.junit.Assert;
@@ -38,60 +31,38 @@
  */
 @RunWith(JUnit4.class)
 public class OutputReceiverTest {
-  // We test OutputReceiver where every element is sampled.
-  static class TestOutputReceiver extends OutputReceiver {
-    public TestOutputReceiver() {
-      this(new CounterSet());
-    }
-
-    @SuppressWarnings("rawtypes")
-    public TestOutputReceiver(CounterSet counters) {
-      super("output_name",
-            new ElementByteSizeObservableCoder(StringUtf8Coder.of()),
-            "test-",
-            counters.getAddCounterMutator());
-    }
-
-    @Override
-    protected boolean sampleElement() {
-      return true;
-    }
-  }
 
   @Test
   public void testEmptyOutputReceiver() throws Exception {
-    TestOutputReceiver fanOut = new TestOutputReceiver();
+    OutputReceiver fanOut = new OutputReceiver();
+    TestOutputCounter outputCounter = new TestOutputCounter();
+    fanOut.addOutputCounter(outputCounter);
     fanOut.process("hi");
     fanOut.process("bob");
 
-    Assert.assertEquals("output_name", fanOut.getName());
-    Assert.assertEquals(2, (long) fanOut.getElementCount().getAggregate());
-
-    CounterMean<Long> meanByteCount = fanOut.getMeanByteCount().getMean();
+    CounterMean<Long> meanByteCount = outputCounter.getMeanByteCount().getMean();
     Assert.assertEquals(5, (long) meanByteCount.getAggregate());
     Assert.assertEquals(2, meanByteCount.getCount());
   }
 
   @Test
   public void testMultipleOutputReceiver() throws Exception {
-    TestOutputReceiver fanOut = new TestOutputReceiver();
+    OutputReceiver fanOut = new OutputReceiver();
+    TestOutputCounter outputCounter = new TestOutputCounter();
+    fanOut.addOutputCounter(outputCounter);
 
     CounterSet counters = new CounterSet();
-    String counterPrefix = "test-";
 
-    TestReceiver receiver1 = new TestReceiver(counters, counterPrefix);
+    TestOutputReceiver receiver1 = new TestOutputReceiver(counters);
     fanOut.addOutput(receiver1);
 
-    TestReceiver receiver2 = new TestReceiver(counters, counterPrefix);
+    TestOutputReceiver receiver2 = new TestOutputReceiver(counters);
     fanOut.addOutput(receiver2);
 
     fanOut.process("hi");
     fanOut.process("bob");
 
-    Assert.assertEquals("output_name", fanOut.getName());
-    Assert.assertEquals(2, (long) fanOut.getElementCount().getAggregate());
-
-    CounterMean<Long> meanByteCount = fanOut.getMeanByteCount().getMean();
+    CounterMean<Long> meanByteCount = outputCounter.getMeanByteCount().getMean();
     Assert.assertEquals(5, meanByteCount.getAggregate().longValue());
     Assert.assertEquals(2, meanByteCount.getCount());
     Assert.assertThat(receiver1.outputElems,
@@ -99,30 +70,4 @@ public void testMultipleOutputReceiver() throws Exception {
     Assert.assertThat(receiver2.outputElems,
         CoreMatchers.<Object>hasItems("hi", "bob"));
   }
-
-  @Test(expected = ClassCastException.class)
-  public void testIncorrectType() throws Exception {
-    TestOutputReceiver fanOut = new TestOutputReceiver();
-    fanOut.process(5);
-  }
-
-  @Test(expected = CoderException.class)
-  public void testNullArgument() throws Exception {
-    TestOutputReceiver fanOut = new TestOutputReceiver();
-    fanOut.process(null);
-  }
-
-  @Test
-  public void testAddingCountersIntoCounterSet() throws Exception {
-    CounterSet counters = new CounterSet();
-    new TestOutputReceiver(counters);
-
-    Assert.assertEquals(
-        new CounterSet(
-            Counter.longs("output_name-ElementCount", SUM)
-                .resetToValue(0L),
-            Counter.longs("output_name-MeanByteCount", MEAN)
-                .resetMeanToValue(0, 0L)),
-        counters);
-  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
index 44f2ec90ab0b8..0bcf6c2b9d398 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
@@ -18,6 +18,8 @@
 
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getMeanByteCounterName;
+import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getObjectCounterName;
 
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -68,8 +70,7 @@ public void testRunParDoOperation() throws Exception {
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(
         counterPrefix, counterSet.getAddCounterMutator());
-    ExecutorTestUtils.TestReceiver receiver =
-        new ExecutorTestUtils.TestReceiver(counterSet);
+    TestOutputReceiver receiver = new TestOutputReceiver(counterSet);
 
     ParDoOperation parDoOperation =
         new ParDoOperation(
@@ -97,17 +98,16 @@ public void testRunParDoOperation() throws Exception {
     Assert.assertEquals(
         new CounterSet(
             Counter.longs("test-ParDoOperation-start-msecs", SUM)
-              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                  "test-ParDoOperation-start-msecs")).getAggregate()),
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                    "test-ParDoOperation-start-msecs")).getAggregate()),
             Counter.longs("test-ParDoOperation-process-msecs", SUM)
-              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                  "test-ParDoOperation-process-msecs")).getAggregate()),
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                    "test-ParDoOperation-process-msecs")).getAggregate()),
             Counter.longs("test-ParDoOperation-finish-msecs", SUM)
-              .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                  "test-ParDoOperation-finish-msecs")).getAggregate()),
-            Counter.longs("test_receiver_out-ElementCount", SUM)
-                .resetToValue(6L),
-            Counter.longs("test_receiver_out-MeanByteCount", MEAN)
+                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
+                    "test-ParDoOperation-finish-msecs")).getAggregate()),
+            Counter.longs(getObjectCounterName("test_receiver_out"), SUM).resetToValue(6L),
+            Counter.longs(getMeanByteCounterName("test_receiver_out"), MEAN)
                 .resetMeanToValue(6, 33L)),
         counterSet);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
index da5f410cbf9f0..3e82f44cb6c4d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
@@ -18,6 +18,8 @@
 
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getMeanByteCounterName;
+import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getObjectCounterName;
 import static org.hamcrest.Matchers.anyOf;
 import static org.hamcrest.Matchers.empty;
 import static org.hamcrest.Matchers.hasItem;
@@ -38,7 +40,6 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReceiver;
 import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.BufferingGroupingTable;
 import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.Combiner;
 import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.CombiningGroupingTable;
@@ -73,12 +74,12 @@ public void testRunPartialGroupByKeyOperation() throws Exception {
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(
         counterPrefix, counterSet.getAddCounterMutator());
-    TestReceiver receiver =
-        new TestReceiver(
+    TestOutputReceiver receiver =
+        new TestOutputReceiver(
             new ElementByteSizeObservableCoder(
                 WindowedValue.getValueOnlyCoder(
                     KvCoder.of(keyCoder, IterableCoder.of(valueCoder)))),
-            counterSet, counterPrefix);
+            counterSet);
 
     PartialGroupByKeyOperation pgbkOperation =
         new PartialGroupByKeyOperation(
@@ -121,9 +122,8 @@ public void testRunPartialGroupByKeyOperation() throws Exception {
             Counter.longs("test-PartialGroupByKeyOperation-finish-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
                     "test-PartialGroupByKeyOperation-finish-msecs")).getAggregate()),
-            Counter.longs("test_receiver_out-ElementCount", SUM)
-                .resetToValue(3L),
-            Counter.longs("test_receiver_out-MeanByteCount", MEAN)
+            Counter.longs(getObjectCounterName("test_receiver_out"), SUM).resetToValue(3L),
+            Counter.longs(getMeanByteCounterName("test_receiver_out"), MEAN)
                 .resetMeanToValue(3, 49L)),
         counterSet);
   }
@@ -137,10 +137,10 @@ public void testRunPartialGroupByKeyOperationWithCombiner() throws Exception {
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(
         counterPrefix, counterSet.getAddCounterMutator());
-    TestReceiver receiver =
-        new TestReceiver(new ElementByteSizeObservableCoder(
-                             WindowedValue.getValueOnlyCoder(KvCoder.of(keyCoder, valueCoder))),
-            counterSet, counterPrefix);
+    TestOutputReceiver receiver = new TestOutputReceiver(
+        new ElementByteSizeObservableCoder(
+            WindowedValue.getValueOnlyCoder(KvCoder.of(keyCoder, valueCoder))),
+        counterSet);
 
     Combiner<WindowedValue<String>, Integer, Integer, Integer> combineFn =
         new Combiner<WindowedValue<String>, Integer, Integer, Integer>() {
@@ -204,9 +204,8 @@ public Integer extract(WindowedValue<String> key, Integer accumulator) {
             Counter.longs("test-PartialGroupByKeyOperation-finish-msecs", SUM)
                 .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
                     "test-PartialGroupByKeyOperation-finish-msecs")).getAggregate()),
-            Counter.longs("test_receiver_out-ElementCount", SUM)
-                .resetToValue(3L),
-            Counter.longs("test_receiver_out-MeanByteCount", MEAN)
+            Counter.longs(getObjectCounterName("test_receiver_out"), SUM).resetToValue(3L),
+            Counter.longs(getMeanByteCounterName("test_receiver_out"), MEAN)
                 .resetMeanToValue(3, 25L)),
         counterSet);
   }
@@ -269,7 +268,7 @@ public void testBufferingGroupingTable() throws Exception {
         new BufferingGroupingTable<>(
             1000, new IdentityGroupingKeyCreator(), new KvPairInfo(),
             new StringPowerSizeEstimator(), new StringPowerSizeEstimator());
-    TestReceiver receiver = new TestReceiver(
+    TestOutputReceiver receiver = new TestOutputReceiver(
         KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(StringUtf8Coder.of())));
 
     table.put("A", "a", receiver);
@@ -323,7 +322,7 @@ public Long extract(Object key, Long accumulator) {
             summingCombineFn,
             new StringPowerSizeEstimator(), new IdentitySizeEstimator());
 
-    TestReceiver receiver = new TestReceiver(
+    TestOutputReceiver receiver = new TestOutputReceiver(
         KvCoder.of(StringUtf8Coder.of(), BigEndianLongCoder.of()));
 
     table.put("A", 1, receiver);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
index 11e592b6109b4..4bea3bbcba916 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -26,6 +26,8 @@
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getMeanByteCounterName;
+import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getObjectCounterName;
 
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.equalTo;
@@ -41,7 +43,6 @@
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReceiver;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -91,7 +92,7 @@ public void testRunReadOperation() throws Exception {
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    TestReceiver receiver = new TestReceiver(counterSet, counterPrefix);
+    TestOutputReceiver receiver = new TestOutputReceiver(counterSet);
 
     ReadOperation readOperation = new ReadOperation(
         reader, receiver, counterPrefix, counterSet.getAddCounterMutator(), stateSampler);
@@ -102,8 +103,8 @@ public void testRunReadOperation() throws Exception {
     assertThat(receiver.outputElems, containsInAnyOrder((Object) "hi", "there", "", "bob"));
 
     assertCounterKindAndContents(counterSet, "ReadOperation-ByteCount", SUM, 2L + 5 + 0 + 3);
-    assertCounterKindAndContents(counterSet, "test_receiver_out-ElementCount", SUM, 4L);
-    assertCounterMean(counterSet, "test_receiver_out-MeanByteCount", 4, 10L);
+    assertCounterKindAndContents(counterSet, getObjectCounterName("test_receiver_out"), SUM, 4L);
+    assertCounterMean(counterSet, getMeanByteCounterName("test_receiver_out"), 4, 10L);
     assertCounterKind(counterSet, "test-ReadOperation-start-msecs", SUM);
     assertCounterKind(counterSet, "test-ReadOperation-process-msecs", SUM);
     assertCounterKind(counterSet, "test-ReadOperation-finish-msecs", SUM);
@@ -115,7 +116,7 @@ public void testGetProgress() throws Exception {
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
     final ReadOperation readOperation = new ReadOperation(new MockReader(iterator),
-        new OutputReceiver("out", "test-", counterSet.getAddCounterMutator()), counterPrefix,
+        new TestOutputReceiver("out", null, counterSet), counterPrefix,
         counterSet.getAddCounterMutator(),
         new StateSampler(counterPrefix, counterSet.getAddCounterMutator()));
     // Update progress not continuously, but so that it's never more than 1 record stale.
@@ -137,7 +138,7 @@ public void testGetProgress() throws Exception {
   public void testDynamicSplit() throws Exception {
     MockReaderIterator iterator = new MockReaderIterator(0, 10);
     CounterSet counterSet = new CounterSet();
-    MockOutputReceiver receiver = new MockOutputReceiver(counterSet.getAddCounterMutator());
+    MockOutputReceiver receiver = new MockOutputReceiver();
     ReadOperation readOperation = new ReadOperation(new MockReader(iterator), receiver, "test-",
         counterSet.getAddCounterMutator(),
         new StateSampler("test-", counterSet.getAddCounterMutator()));
@@ -191,7 +192,7 @@ public void testDynamicSplit() throws Exception {
   public void testDynamicSplitDoesNotBlock() throws Exception {
     MockReaderIterator iterator = new MockReaderIterator(0, 10);
     CounterSet counterSet = new CounterSet();
-    MockOutputReceiver receiver = new MockOutputReceiver(counterSet.getAddCounterMutator());
+    MockOutputReceiver receiver = new MockOutputReceiver();
     ReadOperation readOperation = new ReadOperation(new MockReader(iterator), receiver, "test-",
         counterSet.getAddCounterMutator(),
         new StateSampler("test-", counterSet.getAddCounterMutator()));
@@ -295,13 +296,12 @@ public ReaderIterator<Integer> iterator() throws IOException {
     }
   }
 
+  /**
+   * A mock {@link OutputReceiver} that blocks the read loop in {@link ReadOperation}.
+   */
   private static class MockOutputReceiver extends OutputReceiver {
     private Exchanger<Object> exchanger = new Exchanger<>();
 
-    MockOutputReceiver(CounterSet.AddCounterMutator mutator) {
-      super("out", "test-", mutator);
-    }
-
     @Override
     public void process(Object elem) throws Exception {
       exchanger.exchange(null);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/TestOutputReceiver.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/TestOutputReceiver.java
new file mode 100644
index 0000000000000..d5ff1573ce7ce
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/TestOutputReceiver.java
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.ElementByteSizeObservableCoder;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservable;
+import com.google.common.annotations.VisibleForTesting;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * An OutputReceiver that allows the output elements to be retrieved.
+ */
+public class TestOutputReceiver extends OutputReceiver {
+  private static final String OBJECT_COUNTER_NAME = "-ObjectCount";
+  private static final String MEAN_BYTE_COUNTER_NAME = "-MeanByteCount";
+
+  @VisibleForTesting
+  final List<Object> outputElems = new ArrayList<>();
+
+  public TestOutputReceiver(CounterSet counterSet) {
+    this("test_receiver_out", counterSet);
+  }
+
+  public TestOutputReceiver(Coder<?> coder) {
+    this(coder, new CounterSet());
+  }
+
+  public TestOutputReceiver(Coder<?> coder, CounterSet counterSet) {
+    this("test_receiver_out", new ElementByteSizeObservableCoder<>(coder), counterSet);
+  }
+
+  public TestOutputReceiver(String outputName, CounterSet counterSet) {
+    this(outputName, new ElementByteSizeObservableCoder<>(StringUtf8Coder.of()), counterSet);
+  }
+
+  public TestOutputReceiver(
+      ElementByteSizeObservable<?> elementByteSizeObservable, CounterSet counterSet) {
+    this("test_receiver_out", elementByteSizeObservable, counterSet);
+  }
+
+  public TestOutputReceiver(String outputName,
+      ElementByteSizeObservable<?> elementByteSizeObservable,
+      CounterSet counterSet) {
+    this(outputName, elementByteSizeObservable, counterSet.getAddCounterMutator());
+  }
+
+  public TestOutputReceiver(String string, ElementByteSizeObservable<?> elementByteSizeObservable,
+      AddCounterMutator addCounterMutator) {
+    ElementCounter outputCounter =
+        new TestOutputCounter(string, elementByteSizeObservable, addCounterMutator);
+    addOutputCounter(outputCounter);
+  }
+
+  @Override
+  public void process(Object elem) throws Exception {
+    super.process(elem);
+    outputElems.add(elem);
+  }
+
+  /**
+   * TestOutputCounter that samples every element.
+   */
+  public static class TestOutputCounter extends OutputObjectAndByteCounter {
+    public TestOutputCounter() {
+      this(new CounterSet());
+    }
+
+    @SuppressWarnings("rawtypes")
+    public TestOutputCounter(CounterSet counters) {
+      this("output_name", new ElementByteSizeObservableCoder<>(StringUtf8Coder.of()),
+          counters.getAddCounterMutator());
+    }
+
+    public TestOutputCounter(String outputName,
+        ElementByteSizeObservable<?> elementByteSizeObservable,
+        CounterSet.AddCounterMutator addCounterMutator) {
+      super(elementByteSizeObservable, addCounterMutator);
+      this.countObject(outputName + OBJECT_COUNTER_NAME);
+      this.countMeanByte(outputName + MEAN_BYTE_COUNTER_NAME);
+    }
+
+    @Override
+    protected boolean sampleElement() {
+      return true;
+    }
+
+    @VisibleForTesting
+    static String getObjectCounterName(String prefix) {
+      return prefix + OBJECT_COUNTER_NAME;
+    }
+
+    @VisibleForTesting
+    static String getMeanByteCounterName(String prefix) {
+      return prefix + MEAN_BYTE_COUNTER_NAME;
+    }
+  }
+}

From bc23c84f9d538e35bc54a6904ae304d837b1b7c6 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 21 Jul 2015 15:45:05 -0700
Subject: [PATCH 0789/1541] Prefetch state needed for triggers

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98781664
---
 .../sdk/transforms/windowing/AfterPane.java   | 16 +++++++
 .../windowing/AfterProcessingTime.java        | 17 ++++++++
 .../AfterSynchronizedProcessingTime.java      | 17 ++++++++
 .../transforms/windowing/AfterWatermark.java  | 17 ++++++++
 .../sdk/transforms/windowing/Trigger.java     | 43 +++++++++++++++++++
 .../dataflow/sdk/util/ReduceFnRunner.java     |  1 +
 .../dataflow/sdk/util/TriggerRunner.java      |  9 ++++
 7 files changed, 120 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index f83f44de7c83c..7cb83bb1843d8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -20,6 +20,8 @@
 import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
+import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
+import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
@@ -90,6 +92,20 @@ public TriggerResult onTimer(OnTimerContext c) {
     return TriggerResult.CONTINUE;
   }
 
+  @Override
+  public void prefetchOnElement(StateContext state) {
+    state.access(ELEMENTS_IN_PANE_TAG).get();
+  }
+
+  @Override
+  public void prefetchOnMerge(MergingStateContext state) {
+    state.accessAcrossMergingWindows(ELEMENTS_IN_PANE_TAG).get();
+  }
+
+  @Override
+  public void prefetchOnTimer(StateContext state) {
+  }
+
   @Override
   public void clear(TriggerContext c) throws Exception {
     c.state().access(ELEMENTS_IN_PANE_TAG).clear();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 006531a301ac9..4d146fefcda00 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -20,6 +20,8 @@
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.transforms.Min;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
+import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
@@ -113,6 +115,21 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
     return TriggerResult.FIRE_AND_FINISH;
   }
 
+  @Override
+  public void prefetchOnElement(StateContext state) {
+    state.access(DELAYED_UNTIL_TAG).get();
+  }
+
+  @Override
+  public void prefetchOnMerge(MergingStateContext state) {
+    state.accessAcrossMergingWindows(DELAYED_UNTIL_TAG).get();
+  }
+
+  @Override
+  public void prefetchOnTimer(StateContext state) {
+    state.access(DELAYED_UNTIL_TAG).get();
+  }
+
   @Override
   public void clear(TriggerContext c) throws Exception {
     CombiningValueState<Instant, Instant> delayed = c.state().access(DELAYED_UNTIL_TAG);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
index 65910c6135a41..1152bb6216dd2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
@@ -18,6 +18,8 @@
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.transforms.Min;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
+import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
+import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
@@ -89,6 +91,21 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
     return TriggerResult.FIRE_AND_FINISH;
   }
 
+  @Override
+  public void prefetchOnElement(StateContext state) {
+    state.access(DELAYED_UNTIL_TAG).get();
+  }
+
+  @Override
+  public void prefetchOnMerge(MergingStateContext state) {
+    state.accessAcrossMergingWindows(DELAYED_UNTIL_TAG).get();
+  }
+
+  @Override
+  public void prefetchOnTimer(StateContext state) {
+    state.access(DELAYED_UNTIL_TAG).get();
+  }
+
   @Override
   public void clear(TriggerContext c) throws Exception {
     CombiningValueState<Instant, Instant> delayed = c.state().access(DELAYED_UNTIL_TAG);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 9bc9d4f879d38..d79e7a1c9c149 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -20,6 +20,8 @@
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.transforms.Min;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
+import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
@@ -146,6 +148,21 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
       return TriggerResult.FIRE_AND_FINISH;
     }
 
+    @Override
+    public void prefetchOnElement(StateContext state) {
+      state.access(DELAYED_UNTIL_TAG).get();
+    }
+
+    @Override
+    public void prefetchOnMerge(MergingStateContext state) {
+      state.accessAcrossMergingWindows(DELAYED_UNTIL_TAG).get();
+    }
+
+    @Override
+    public void prefetchOnTimer(StateContext state) {
+      state.access(DELAYED_UNTIL_TAG).get();
+    }
+
     @Override
     public void clear(TriggerContext c) throws Exception {
       CombiningValueState<Instant, Instant> delayed = c.state().access(DELAYED_UNTIL_TAG);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 13561e6e04e10..edd509aebd6fd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -338,6 +338,49 @@ protected Trigger(@Nullable List<Trigger<W>> subTriggers) {
    * before processing the firing.
    */
   public abstract TriggerResult onTimer(OnTimerContext c) throws Exception;
+
+  /**
+   * Called to allow the trigger to prefetch any state it will likely need to read from during
+   * an {@link #onElement} call.
+   *
+   * @param state StateContext to prefetch from.
+   */
+  public void prefetchOnElement(StateContext state) {
+    if (subTriggers != null) {
+      for (Trigger<W> trigger : subTriggers) {
+        trigger.prefetchOnElement(state);
+      }
+    }
+  }
+
+  /**
+   * Called to allow the trigger to prefetch any state it will likely need to read from during
+   * an {@link #onMerge} call.
+   *
+   * @param state StateContext to prefetch from.
+   */
+  public void prefetchOnMerge(MergingStateContext state) {
+    if (subTriggers != null) {
+      for (Trigger<W> trigger : subTriggers) {
+        trigger.prefetchOnMerge(state);
+      }
+    }
+  }
+
+  /**
+   * Called to allow the trigger to prefetch any state it will likely need to read from during
+   * an {@link #onTimer} call.
+   *
+   * @param state StateContext to prefetch from.
+   */
+  public void prefetchOnTimer(StateContext state) {
+    if (subTriggers != null) {
+      for (Trigger<W> trigger : subTriggers) {
+        trigger.prefetchOnTimer(state);
+      }
+    }
+  }
+
   /**
    * Clear any state associated with this trigger in the given window.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index 94897ed69499a..2ebd17cd149a1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -269,6 +269,7 @@ public void onTimer(TimerData timer) {
       ReduceFn<K, InputT, OutputT, W>.Context context =
           contextFactory.base(windowNamespace.getWindow());
 
+      triggerRunner.prefetchForTimer(context.state());
       if (triggerRunner.isClosed(context.state())) {
         LOG.info("Skipping timer for closed window " + context.window());
         return;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
index b800150d031dc..b59a18c8daba6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
@@ -196,6 +196,7 @@ public void prefetchForValue(ReduceFn.StateContext state) {
     if (isFinishedSetNeeded()) {
       state.access(FINISHED_BITS_TAG).get();
     }
+    rootTrigger.getSpec().prefetchOnElement(state);
   }
 
   public void prefetchForMerge(ReduceFn.MergingStateContext state) {
@@ -204,6 +205,14 @@ public void prefetchForMerge(ReduceFn.MergingStateContext state) {
         value.get();
       }
     }
+    rootTrigger.getSpec().prefetchOnMerge(state);
+  }
+
+  public void prefetchForTimer(ReduceFn.StateContext state) {
+    if (isFinishedSetNeeded()) {
+      state.access(FINISHED_BITS_TAG).get();
+    }
+    rootTrigger.getSpec().prefetchOnElement(state);
   }
 
   private boolean isFinishedSetNeeded() {

From 3e892853c1e7e55ef6c4fa1a3cdfbd9ff482c6c1 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 21 Jul 2015 16:17:21 -0700
Subject: [PATCH 0790/1541] Fire empty panes if they have new PaneInfo of
 interest

Also, add tests for and fix a bug regarding duplicate timers in
BatchTimerManager.

----Release Notes----

Fire panes if they are the first pane after the watermark (ON_TIME) or
the final pane, even if they are empty.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98784740
---
 .../sdk/util/BatchTimerInternals.java         |  10 +-
 .../dataflow/sdk/util/SystemReduceFn.java     |   6 +-
 .../windowing/DefaultTriggerTest.java         |  18 +--
 .../sdk/util/BatchTimerInternalsTest.java     | 116 ++++++++++++++++++
 .../sdk/util/TriggerExecutorTest.java         |  34 ++---
 .../dataflow/sdk/util/TriggerTester.java      |  16 ++-
 6 files changed, 170 insertions(+), 30 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternalsTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java
index 9e76b23919771..38b848e752a6d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java
@@ -21,13 +21,19 @@
 
 import org.joda.time.Instant;
 
+import java.util.HashSet;
 import java.util.PriorityQueue;
+import java.util.Set;
 
 /**
  * TimerInternals that uses priority queues to manage the timers that are ready to fire.
  */
 public class BatchTimerInternals implements TimerInternals {
 
+  /** Set of timers that are scheduled used for deduplicating timers. */
+  private Set<TimerData> existingTimers = new HashSet<>();
+
+  // Keep these queues separate so we can advance over them separately.
   private PriorityQueue<TimerData> watermarkTimers = new PriorityQueue<>(11);
   private PriorityQueue<TimerData> processingTimers = new PriorityQueue<>(11);
 
@@ -45,7 +51,9 @@ public BatchTimerInternals(Instant processingTime) {
 
   @Override
   public void setTimer(TimerData timer) {
-    queue(timer.getDomain()).add(timer);
+    if (existingTimers.add(timer)) {
+      queue(timer.getDomain()).add(timer);
+    }
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
index 8aa33c97a6748..c03588955d499 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
 import com.google.cloud.dataflow.sdk.util.state.MergeableState;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
@@ -93,7 +94,10 @@ public void onTrigger(OnTriggerContext c) throws Exception {
     MergeableState<InputT, OutputT> buffer =
         c.state().accessAcrossMergedWindows(bufferTag(c.key()));
     StateContents<OutputT> output = buffer.get();
-    if (!buffer.isEmpty().read()) {
+
+    // Skip empty panes unless they have interesting pane info.
+    if (!buffer.isEmpty().read()
+        || c.paneInfo().isLast() || Timing.ON_TIME == c.paneInfo().getTiming()) {
       c.output(output.read());
     }
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
index df4ae0e2e32b0..99b2473b212d3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
@@ -32,6 +32,8 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.util.List;
+
 /**
  * Tests the {@link DefaultTrigger} in a variety of windowing modes.
  */
@@ -126,18 +128,18 @@ public void testDefaultTriggerWithSlidingWindow() throws Exception {
     // This data is late, so it will hold the watermark to 109
     tester.injectElement(4, new Instant(8));
 
-    // Late data means the merge tree might be empty
     tester.advanceWatermark(new Instant(101));
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(4), 9, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(4), 14, 5, 15)));
+    assertThat(tester.getWatermarkHold(), Matchers.equalTo(new Instant(109)));
+    tester.advanceWatermark(new Instant(120));
+    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
+    assertThat(output, Matchers.contains(
+        isSingleWindowedValue(Matchers.<Integer>emptyIterable(), 4, -5, 5),
+        isSingleWindowedValue(Matchers.contains(4), 9, 0, 10),
+        isSingleWindowedValue(Matchers.contains(4), 14, 5, 15)));
 
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(10))));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(15))));
-    tester.assertHasOnlyGlobalAndPaneInfoFor(
-        new IntervalWindow(new Instant(-5), new Instant(5)),
-        new IntervalWindow(new Instant(0), new Instant(10)),
-        new IntervalWindow(new Instant(5), new Instant(15)));
+    tester.assertHasOnlyGlobalState();
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternalsTest.java
new file mode 100644
index 0000000000000..b92f5a227f4f3
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternalsTest.java
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaceForTest;
+
+import org.joda.time.Instant;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.MockitoAnnotations;
+
+/**
+ * Tests for {@link BatchTimerInternals}.
+ */
+@RunWith(JUnit4.class)
+public class BatchTimerInternalsTest {
+
+  private static final StateNamespace NS1 = new StateNamespaceForTest("NS1");
+
+  @Mock
+  private ReduceFnRunner<?, ?, ?, ?> mockRunner;
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+  }
+
+  @Test
+  public void testFiringTimers() {
+    BatchTimerInternals underTest = new BatchTimerInternals(new Instant(0));
+    TimerData processingTime1 = TimerData.of(NS1, new Instant(19), TimeDomain.PROCESSING_TIME);
+    TimerData processingTime2 = TimerData.of(NS1, new Instant(29), TimeDomain.PROCESSING_TIME);
+
+    underTest.setTimer(processingTime1);
+    underTest.setTimer(processingTime2);
+
+    underTest.advanceProcessingTime(mockRunner, new Instant(20));
+    Mockito.verify(mockRunner).onTimer(processingTime1);
+    Mockito.verifyNoMoreInteractions(mockRunner);
+
+    // Advancing just a little shouldn't refire
+    underTest.advanceProcessingTime(mockRunner, new Instant(21));
+    Mockito.verifyNoMoreInteractions(mockRunner);
+
+    // Adding the timer and advancing a little should refire
+    underTest.setTimer(processingTime1);
+    Mockito.verify(mockRunner).onTimer(processingTime1);
+    underTest.advanceProcessingTime(mockRunner, new Instant(21));
+    Mockito.verifyNoMoreInteractions(mockRunner);
+
+    // And advancing the rest of the way should still have the other timer
+    underTest.advanceProcessingTime(mockRunner, new Instant(30));
+    Mockito.verify(mockRunner).onTimer(processingTime2);
+    Mockito.verifyNoMoreInteractions(mockRunner);
+  }
+
+  @Test
+  public void testTimerOrdering() {
+    BatchTimerInternals underTest = new BatchTimerInternals(new Instant(0));
+    TimerData watermarkTime1 = TimerData.of(NS1, new Instant(19), TimeDomain.EVENT_TIME);
+    TimerData processingTime1 = TimerData.of(NS1, new Instant(19), TimeDomain.PROCESSING_TIME);
+    TimerData watermarkTime2 = TimerData.of(NS1, new Instant(29), TimeDomain.EVENT_TIME);
+    TimerData processingTime2 = TimerData.of(NS1, new Instant(29), TimeDomain.PROCESSING_TIME);
+
+    underTest.setTimer(processingTime1);
+    underTest.setTimer(watermarkTime1);
+    underTest.setTimer(processingTime2);
+    underTest.setTimer(watermarkTime2);
+
+    underTest.advanceWatermark(mockRunner, new Instant(30));
+    Mockito.verify(mockRunner).onTimer(watermarkTime1);
+    Mockito.verify(mockRunner).onTimer(watermarkTime2);
+    Mockito.verifyNoMoreInteractions(mockRunner);
+
+    underTest.advanceProcessingTime(mockRunner, new Instant(30));
+    Mockito.verify(mockRunner).onTimer(processingTime1);
+    Mockito.verify(mockRunner).onTimer(processingTime2);
+    Mockito.verifyNoMoreInteractions(mockRunner);
+  }
+
+  @Test
+  public void testDeduplicate() {
+    BatchTimerInternals underTest = new BatchTimerInternals(new Instant(0));
+    TimerData watermarkTime = TimerData.of(NS1, new Instant(19), TimeDomain.EVENT_TIME);
+    TimerData processingTime = TimerData.of(NS1, new Instant(19), TimeDomain.PROCESSING_TIME);
+    underTest.setTimer(watermarkTime);
+    underTest.setTimer(watermarkTime);
+    underTest.setTimer(processingTime);
+    underTest.setTimer(processingTime);
+    underTest.advanceProcessingTime(mockRunner, new Instant(20));
+    underTest.advanceWatermark(mockRunner, new Instant(20));
+
+    Mockito.verify(mockRunner).onTimer(processingTime);
+    Mockito.verify(mockRunner).onTimer(watermarkTime);
+    Mockito.verifyNoMoreInteractions(mockRunner);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index 5323f2edd5444..c6e989e969c83 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -49,6 +49,8 @@
 import org.mockito.Mockito;
 import org.mockito.MockitoAnnotations;
 
+import java.util.List;
+
 /**
  * Tests for {@link TriggerExecutor}.
  */
@@ -224,7 +226,8 @@ public void testWatermarkHoldAndLateData() throws Exception {
     assertEquals(1, tester.getElementsDroppedDueToLateness());
     assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
 
-    assertThat(tester.extractOutput(), Matchers.contains(
+    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
+    assertThat(output, Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3, 2, 3, 4, 5), 4, 0, 10),
         // Output time is end of the window, because all the new data was late
@@ -234,6 +237,11 @@ public void testWatermarkHoldAndLateData() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(
             1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 8), 8, 0, 10)));
 
+    assertThat(
+        output.get(0).getPane(), Matchers.equalTo(PaneInfo.createPane(true, false, Timing.EARLY)));
+    assertThat(
+        output.get(3).getPane(), Matchers.equalTo(PaneInfo.createPane(false, true, Timing.EARLY)));
+
     // And because we're past the end of window + allowed lateness, everything should be cleaned up.
     assertFalse(tester.isMarkedFinished(firstWindow));
     tester.assertHasOnlyGlobalAndFinishedSetsFor();
@@ -308,25 +316,21 @@ public void testMergeBeforeFinalizing() throws Exception {
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(0));
 
-    when(mockTrigger.onMerge(
-        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
-        .thenReturn(MergeResult.CONTINUE);
-
-    when(mockTrigger.onTimer(
-        Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.FIRE);
-
     // All on time data, verify watermark hold.
     injectElement(tester, 1, TriggerResult.CONTINUE); // [1-11)
     injectElement(tester, 10, TriggerResult.CONTINUE); // [10-20)
 
-    // Create a fake timer to fire
+    // Finalizing forces us to merge to merge, but we aren't ready to fire yet.
+    when(mockTrigger.onMerge(
+        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+        .thenReturn(MergeResult.CONTINUE);
+
     tester.advanceWatermark(new Instant(100));
-    tester.fireTimer(
-        new IntervalWindow(new Instant(1), new Instant(20)), new Instant(20),
-        TimeDomain.EVENT_TIME);
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 10), 1, 1, 20)));
+    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
+    assertThat(output.size(), Matchers.equalTo(1));
+    assertThat(output.get(0), isSingleWindowedValue(Matchers.containsInAnyOrder(1, 10), 1, 1, 20));
+    assertThat(output.get(0).getPane(),
+        Matchers.equalTo(PaneInfo.createPane(true, true, Timing.EARLY)));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 0e9098edf7963..d1b843bbdb620 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -59,6 +59,7 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
@@ -173,22 +174,27 @@ public boolean isMarkedFinished(W window) {
 
   @SafeVarargs
   public final void assertHasOnlyGlobalAndFinishedSetsFor(W... expectedWindows) {
-    assertHasOnlyGlobalAllowedTags(
+    assertHasOnlyGlobalAndAllowedTags(
         ImmutableSet.copyOf(expectedWindows),
         ImmutableSet.<StateTag<?>>of(TriggerRunner.FINISHED_BITS_TAG));
   }
 
   @SafeVarargs
   public final void assertHasOnlyGlobalAndFinishedSetsAndPaneInfoFor(W... expectedWindows) {
-    assertHasOnlyGlobalAllowedTags(
+    assertHasOnlyGlobalAndAllowedTags(
         ImmutableSet.copyOf(expectedWindows),
         ImmutableSet.<StateTag<?>>of(
             TriggerRunner.FINISHED_BITS_TAG, PaneInfoTracker.PANE_INFO_TAG));
   }
 
+  public final void assertHasOnlyGlobalState() {
+    assertHasOnlyGlobalAndAllowedTags(
+        Collections.<W>emptySet(), Collections.<StateTag<?>>emptySet());
+  }
+
   @SafeVarargs
   public final void assertHasOnlyGlobalAndPaneInfoFor(W... expectedWindows) {
-    assertHasOnlyGlobalAllowedTags(
+    assertHasOnlyGlobalAndAllowedTags(
         ImmutableSet.copyOf(expectedWindows),
         ImmutableSet.<StateTag<?>>of(PaneInfoTracker.PANE_INFO_TAG));
   }
@@ -197,7 +203,7 @@ public final void assertHasOnlyGlobalAndPaneInfoFor(W... expectedWindows) {
    * Verifies that the the set of windows that have any state stored is exactly
    * {@code expectedWindows} and that each of these windows has only tags from {@code allowedTags}.
    */
-  private void assertHasOnlyGlobalAllowedTags(
+  private void assertHasOnlyGlobalAndAllowedTags(
       Set<W> expectedWindows, Set<StateTag<?>> allowedTags) {
     runner.persist();
 
@@ -253,7 +259,7 @@ public long getElementsDroppedDueToLateness() {
   /**
    * Retrieve the values that have been output to this time, and clear out the output accumulator.
    */
-  public Iterable<WindowedValue<OutputT>> extractOutput() {
+  public List<WindowedValue<OutputT>> extractOutput() {
     ImmutableList<WindowedValue<OutputT>> result = FluentIterable.from(stubContexts.outputs)
         .transform(new Function<WindowedValue<KV<String, OutputT>>, WindowedValue<OutputT>>() {
           @Override

From f5a208fe095f3bc5813cb423cbb7b13a56cf5826 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Tue, 21 Jul 2015 16:45:23 -0700
Subject: [PATCH 0791/1541] Support UnboundedSource in direct and batch modes

----Release Notes----
- UnboundedSources are now supported in batch and direct modes
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98787394
---
 .../io/BoundedReadFromUnboundedSource.java    | 272 ++++++++++++++++++
 .../google/cloud/dataflow/sdk/io/Read.java    |  23 ++
 .../sdk/runners/DataflowPipelineRunner.java   |  10 +-
 .../sdk/transforms/RemoveDuplicates.java      |  56 +++-
 .../dataflow/sdk/util/ValueWithRecordId.java  |  18 ++
 .../BoundedReadFromUnboundedSourceTest.java   | 134 +++++++++
 .../sdk/runners/dataflow/CountingSource.java  |   2 +-
 .../sdk/transforms/RemoveDuplicatesTest.java  |  52 ++++
 8 files changed, 547 insertions(+), 20 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSourceTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java
new file mode 100644
index 0000000000000..caed7582f5fd9
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java
@@ -0,0 +1,272 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import static com.google.cloud.dataflow.sdk.util.StringUtils.approximateSimpleName;
+
+import com.google.api.client.util.BackOff;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.RemoveDuplicates;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.util.IntervalBoundedExponentialBackOff;
+import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PInput;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+
+/**
+ * {@link PTransform} that reads a bounded amount of data from an {@link UnboundedSource},
+ * specified as one or both of a maximum number of elements or a maximum period of time to read.
+ *
+ * <p> Created by {@link Read}.
+ */
+class BoundedReadFromUnboundedSource<T> extends PTransform<PInput, PCollection<T>> {
+  private static final long serialVersionUID = 0L;
+  private final UnboundedSource<T, ?> source;
+  private final long maxNumRecords;
+  private final Duration maxReadTime;
+
+  /**
+   * Returns a new {@link BoundedReadFromUnboundedSource} that reads a bounded amount
+   * of data from the given {@link UnboundedSource}.  The bound is specified as a number
+   * of records to read.
+   *
+   * <p> This may take a long time to execute if the splits of this source are slow to read
+   * records.
+   */
+  public BoundedReadFromUnboundedSource<T> withMaxNumRecords(long maxNumRecords) {
+    return new BoundedReadFromUnboundedSource<T>(source, maxNumRecords, maxReadTime);
+  }
+
+  /**
+   * Returns a new {@link BoundedReadFromUnboundedSource} that reads a bounded amount
+   * of data from the given {@link UnboundedSource}.  The bound is specified as an amount
+   * of time to read for.  Each split of the source will read for this much time.
+   */
+  public BoundedReadFromUnboundedSource<T> withMaxReadTime(Duration maxReadTime) {
+    return new BoundedReadFromUnboundedSource<T>(source, maxNumRecords, maxReadTime);
+  }
+
+  BoundedReadFromUnboundedSource(
+      UnboundedSource<T, ?> source, long maxNumRecords, Duration maxReadTime) {
+    this.source = source;
+    this.maxNumRecords = maxNumRecords;
+    this.maxReadTime = maxReadTime;
+  }
+
+  @Override
+  public PCollection<T> apply(PInput input) {
+    PCollection<ValueWithRecordId<T>> read = Pipeline.applyTransform(input,
+        Read.from(new UnboundedToBoundedSourceAdapter<>(source, maxNumRecords, maxReadTime)));
+    if (source.requiresDeduping()) {
+      read = read.apply(RemoveDuplicates.withRepresentativeValueFn(
+          new SerializableFunction<ValueWithRecordId<T>, byte[]>() {
+            private static final long serialVersionUID = 0L;
+            @Override
+            public byte[] apply(ValueWithRecordId<T> input) {
+              return input.getId();
+            }
+          }));
+    }
+    return read.apply(ValueWithRecordId.<T>stripIds());
+  }
+
+  @Override
+  protected Coder<T> getDefaultOutputCoder() {
+    return source.getDefaultOutputCoder();
+  }
+
+  @Override
+  public String getKindString() {
+    return "Read(" + approximateSimpleName(source.getClass()) + ")";
+  }
+
+  private static class UnboundedToBoundedSourceAdapter<T>
+      extends BoundedSource<ValueWithRecordId<T>> {
+    private static final long serialVersionUID = 0L;
+    private final UnboundedSource<T, ?> source;
+    private final long maxNumRecords;
+    private final Duration maxReadTime;
+
+    private UnboundedToBoundedSourceAdapter(
+        UnboundedSource<T, ?> source, long maxNumRecords, Duration maxReadTime) {
+      this.source = source;
+      this.maxNumRecords = maxNumRecords;
+      this.maxReadTime = maxReadTime;
+    }
+
+    /**
+     * Divide the given number of records into {@code numSplits} approximately
+     * equal parts that sum to {@code numRecords}.
+     */
+    private static long[] splitNumRecords(long numRecords, int numSplits) {
+      long[] splitNumRecords = new long[numSplits];
+      for (int i = 0; i < numSplits; i++) {
+        splitNumRecords[i] = numRecords / numSplits;
+      }
+      for (int i = 0; i < numRecords % numSplits; i++) {
+        splitNumRecords[i] = splitNumRecords[i] + 1;
+      }
+      return splitNumRecords;
+    }
+
+    /**
+     * Pick a number of initial splits based on the number of records expected to be processed.
+     */
+    private static int numInitialSplits(long numRecords) {
+      final int maxSplits = 100;
+      final long recordsPerSplit = 10000;
+      return (int) Math.min(maxSplits, numRecords / recordsPerSplit + 1);
+    }
+
+    @Override
+    public List<? extends BoundedSource<ValueWithRecordId<T>>> splitIntoBundles(
+        long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
+      List<UnboundedToBoundedSourceAdapter<T>> result = new ArrayList<>();
+      int numInitialSplits = numInitialSplits(maxNumRecords);
+      List<? extends UnboundedSource<T, ?>> splits =
+          source.generateInitialSplits(numInitialSplits, options);
+      int numSplits = splits.size();
+      long[] numRecords = splitNumRecords(maxNumRecords, numSplits);
+      for (int i = 0; i < numSplits; i++) {
+        result.add(
+            new UnboundedToBoundedSourceAdapter<T>(splits.get(i), numRecords[i], maxReadTime));
+      }
+      return result;
+    }
+
+    @Override
+    public long getEstimatedSizeBytes(PipelineOptions options) {
+      // No way to estimate bytes, so returning 0.
+      return 0L;
+    }
+
+    @Override
+    public boolean producesSortedKeys(PipelineOptions options) {
+      return false;
+    }
+
+    @Override
+    public Coder<ValueWithRecordId<T>> getDefaultOutputCoder() {
+      return ValueWithRecordId.ValueWithRecordIdCoder.of(source.getDefaultOutputCoder());
+    }
+
+    @Override
+    public void validate() {
+      source.validate();
+    }
+
+    @Override
+    public BoundedReader<ValueWithRecordId<T>> createReader(PipelineOptions options) {
+      return new Reader(source.createReader(options, null));
+    }
+
+    private class Reader extends AbstractBoundedReader<ValueWithRecordId<T>> {
+      private long recordsRead = 0L;
+      private Instant endTime = Instant.now().plus(maxReadTime);
+      private UnboundedSource.UnboundedReader<T> reader;
+
+      private Reader(UnboundedSource.UnboundedReader<T> reader) {
+        this.recordsRead = 0L;
+        if (maxReadTime != null) {
+          this.endTime = Instant.now().plus(maxReadTime);
+        } else {
+          this.endTime = null;
+        }
+        this.reader = reader;
+      }
+
+      @Override
+      public boolean start() throws IOException {
+        if (maxNumRecords <= 0 || (maxReadTime != null && maxReadTime.getMillis() == 0)) {
+          return false;
+        }
+
+        recordsRead++;
+        if (reader.start()) {
+          return true;
+        } else {
+          return advanceWithBackoff();
+        }
+      }
+
+      @Override
+      public boolean advance() throws IOException {
+        if (recordsRead >= maxNumRecords) {
+          finalizeCheckpoint();
+          return false;
+        }
+        recordsRead++;
+        return advanceWithBackoff();
+      }
+
+      private boolean advanceWithBackoff() throws IOException {
+        // Try reading from the source with exponential backoff
+        BackOff backoff = new IntervalBoundedExponentialBackOff(10000, 10);
+        long nextSleep = backoff.nextBackOffMillis();
+        while (nextSleep != BackOff.STOP) {
+          if (endTime != null && Instant.now().isAfter(endTime)) {
+            finalizeCheckpoint();
+            return false;
+          }
+          if (reader.advance()) {
+            return true;
+          }
+          try {
+            Thread.sleep(nextSleep);
+          } catch (InterruptedException e) {}
+          nextSleep = backoff.nextBackOffMillis();
+        }
+        finalizeCheckpoint();
+        return false;
+      }
+
+      private void finalizeCheckpoint() throws IOException {
+        reader.getCheckpointMark().finalizeCheckpoint();
+      }
+
+      @Override
+      public ValueWithRecordId<T> getCurrent() throws NoSuchElementException {
+        return new ValueWithRecordId<>(reader.getCurrent(), reader.getCurrentRecordId());
+      }
+
+      @Override
+      public Instant getCurrentTimestamp() throws NoSuchElementException {
+        return reader.getCurrentTimestamp();
+      }
+
+      @Override
+      public void close() {}
+
+      @Override
+      public BoundedSource<ValueWithRecordId<T>> getCurrentSource() {
+        return UnboundedToBoundedSourceAdapter.this;
+      }
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
index 875464e57b6c4..b149530dff63b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
@@ -27,6 +27,8 @@
 import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PInput;
 
+import org.joda.time.Duration;
+
 import javax.annotation.Nullable;
 
 /**
@@ -180,6 +182,27 @@ public Unbounded<T> named(String name) {
       return new Unbounded<T>(name, source);
     }
 
+    /**
+     * Returns a new {@link BoundedReadFromUnboundedSource} that reads a bounded amount
+     * of data from the given {@link UnboundedSource}.  The bound is specified as a number
+     * of records to read.
+     *
+     * <p> This may take a long time to execute if the splits of this source are slow to read
+     * records.
+     */
+    public BoundedReadFromUnboundedSource<T> withMaxNumRecords(long maxNumRecords) {
+      return new BoundedReadFromUnboundedSource<T>(source, maxNumRecords, null);
+    }
+
+    /**
+     * Returns a new {@link BoundedReadFromUnboundedSource} that reads a bounded amount
+     * of data from the given {@link UnboundedSource}.  The bound is specified as an amount
+     * of time to read for.  Each split of the source will read for this much time.
+     */
+    public BoundedReadFromUnboundedSource<T> withMaxReadTime(Duration maxReadTime) {
+      return new BoundedReadFromUnboundedSource<T>(source, Long.MAX_VALUE, maxReadTime);
+    }
+
     @Override
     protected Coder<T> getDefaultOutputCoder() {
       return source.getDefaultOutputCoder();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 9b4cf38917531..c2a3a21e4f26d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -493,15 +493,7 @@ public final PCollection<T> apply(PInput input) {
             .apply(new Deduplicate<T>());
       } else {
         return Pipeline.applyTransform(input, new ReadWithIds<T>(source))
-            .apply(ParDo.named("StripIds").of(
-                new DoFn<ValueWithRecordId<T>, T>() {
-                  private static final long serialVersionUID = 0L;
-
-                  @Override
-                  public void processElement(ProcessContext c) {
-                    c.output(c.element().getValue());
-                  }
-                }));
+            .apply(ValueWithRecordId.<T>stripIds());
       }
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
index 07d9e4ec13b65..034f23e6b2c79 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
@@ -31,6 +31,10 @@
  * comparing the encoded bytes.  This admits efficient parallel
  * evaluation.
  *
+ * <p> Optionally, a function may be provided that maps each element to a representative
+ * value.  In this case, two elements will be considered duplicates if they have equal
+ * representative values, with equality being determined as above.
+ *
  * <p> By default, the {@code Coder} of the output {@code PCollection}
  * is the same as the {@code Coder} of the input {@code PCollection}.
  *
@@ -62,29 +66,61 @@ public class RemoveDuplicates<T> extends PTransform<PCollection<T>,
    * {@code PCollection}s
    */
   public static <T> RemoveDuplicates<T> create() {
-    return new RemoveDuplicates<>();
+    return new RemoveDuplicates<T>();
   }
 
-  private RemoveDuplicates() { }
+  /**
+   * Returns a {@code RemoveDuplicates<T, IdT>} {@code PTransform}.
+   *
+   * @param <T> the type of the elements of the input and output
+   * {@code PCollection}s
+   * @param <IdT> the type of the representative value used to dedup
+   */
+  public static <T, IdT> WithRepresentativeValues<T, IdT> withRepresentativeValueFn(
+      SerializableFunction<T, IdT> fn) {
+    return new WithRepresentativeValues<T, IdT>(fn);
+  }
 
   @Override
   public PCollection<T> apply(PCollection<T> in) {
-    return
-        in
+    return in
         .apply(ParDo.named("CreateIndex")
             .of(new DoFn<T, KV<T, Void>>() {
-              @Override
-              public void processElement(ProcessContext c) {
-                c.output(KV.of(c.element(), (Void) null));
-              }
-            }))
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    c.output(KV.of(c.element(), (Void) null));
+                  }
+                }))
         .apply(Combine.<T, Void>perKey(
             new SerializableFunction<Iterable<Void>, Void>() {
               @Override
               public Void apply(Iterable<Void> iter) {
                 return null; // ignore input
-              }
+                }
             }))
         .apply(Keys.<T>create());
   }
+
+  private static class WithRepresentativeValues<T, IdT>
+      extends PTransform<PCollection<T>, PCollection<T>> {
+    private SerializableFunction<T, IdT> fn;
+
+    private WithRepresentativeValues(SerializableFunction<T, IdT> fn) {
+      this.fn = fn;
+    }
+
+    @Override
+    public PCollection<T> apply(PCollection<T> in) {
+      return in
+          .apply(WithKeys.of(fn))
+          .apply(Combine.<IdT, T, T>perKey(
+              new Combine.BinaryCombineFn<T>() {
+                @Override
+                public T apply(T left, T right) {
+                  return left;
+                }
+              }))
+          .apply(Values.<T>create());
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ValueWithRecordId.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ValueWithRecordId.java
index caa130979f6f1..8a9ca30cf4635 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ValueWithRecordId.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ValueWithRecordId.java
@@ -19,6 +19,10 @@
 import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.Preconditions;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
@@ -109,4 +113,18 @@ public Coder<ValueT> getValueCoder() {
     Coder<ValueT> valueCoder;
     ByteArrayCoder idCoder;
   }
+
+  public static <T>
+      PTransform<PCollection<? extends ValueWithRecordId<T>>, PCollection<T>> stripIds() {
+    return ParDo.named("StripIds")
+        .of(
+            new DoFn<ValueWithRecordId<T>, T>() {
+              private static final long serialVersionUID = 0L;
+
+              @Override
+              public void processElement(ProcessContext c) {
+                c.output(c.element().getValue());
+              }
+            });
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSourceTest.java
new file mode 100644
index 0000000000000..ed3bd872e3691
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSourceTest.java
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.dataflow.CountingSource;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.joda.time.Duration;
+
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/** Unit tests for {@link BoundedReadFromUnboundedSource}. */
+@RunWith(JUnit4.class)
+public class BoundedReadFromUnboundedSourceTest {
+  private static final int NUM_RECORDS = 100;
+  private static List<Integer> finalizeTracker = null;
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testNoDedup() throws Exception {
+    test(false, false);
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testDedup() throws Exception {
+    test(true, false);
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testTimeBound() throws Exception {
+    test(false, true);
+  }
+
+  private static class Checker
+      implements SerializableFunction<Iterable<KV<Integer, Integer>>, Void> {
+    private static final long serialVersionUID = 0L;
+    private final boolean dedup;
+    private final boolean timeBound;
+
+    Checker(boolean dedup, boolean timeBound) {
+      this.dedup = dedup;
+      this.timeBound = timeBound;
+    }
+
+    @Override
+    public Void apply(Iterable<KV<Integer, Integer>> input) {
+      List<Integer> values = new ArrayList<>();
+      for (KV<Integer, Integer> kv : input) {
+        assertEquals(0, (int) kv.getKey());
+        values.add(kv.getValue());
+      }
+      if (timeBound) {
+        assertTrue(values.size() > 2);
+      } else if (dedup) {
+        // Verify that at least some data came through.  The chance of 90% of the input
+        // being duplicates is essentially zero.
+        assertTrue(values.size() > NUM_RECORDS / 10 && values.size() <= NUM_RECORDS);
+      } else {
+        assertEquals(NUM_RECORDS, values.size());
+      }
+      Collections.sort(values);
+      for (int i = 0; i < values.size(); i++) {
+        assertEquals(i, (int) values.get(i));
+              }
+      if (finalizeTracker != null) {
+        assertThat(finalizeTracker, containsInAnyOrder(values.size() - 1));
+      }
+      return null;
+    }
+  }
+
+  private void test(boolean dedup, boolean timeBound) throws Exception {
+    Pipeline p = TestPipeline.create();
+
+    if (p.getOptions().getRunner() == DirectPipelineRunner.class) {
+      finalizeTracker = new ArrayList<>();
+      CountingSource.setFinalizeTracker(finalizeTracker);
+    }
+    CountingSource source = new CountingSource(Integer.MAX_VALUE);
+    if (dedup) {
+      source = source.withDedup();
+    }
+    PCollection<KV<Integer, Integer>> output =
+        timeBound
+        ? p.apply(Read.from(source).withMaxReadTime(Duration.millis(200)))
+        : p.apply(Read.from(source).withMaxNumRecords(NUM_RECORDS));
+
+    List<KV<Integer, Integer>> expectedOutput = new ArrayList<>();
+    for (int i = 0; i < NUM_RECORDS; i++) {
+      expectedOutput.add(KV.of(0, i));
+    }
+
+    // Because some of the NUM_RECORDS elements read are dupes, the final output
+    // will only have output from 0 to n where n < NUM_RECORDS.
+    DataflowAssert.that(output).satisfies(new Checker(dedup, timeBound));
+
+    p.run();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
index 41d6b97a1e01f..0e914c572f458 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
@@ -57,7 +57,7 @@ public static void setFinalizeTracker(List<Integer> finalizeTracker) {
   }
 
   public CountingSource(int numMessagesPerShard) {
-    this(numMessagesPerShard, -1, false);
+    this(numMessagesPerShard, 0, false);
   }
 
   public CountingSource withDedup() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java
index 5573d09e6ea4f..1b4a99cd7433e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java
@@ -16,11 +16,15 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
 import org.junit.Test;
@@ -29,7 +33,9 @@
 import org.junit.runners.JUnit4;
 
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 /**
  * Tests for RemovedDuplicates.
@@ -80,4 +86,50 @@ public void testRemoveDuplicatesEmpty() {
         .containsInAnyOrder();
     p.run();
   }
+
+  private static class Keys implements SerializableFunction<KV<String, String>, String> {
+    private static final long serialVersionUID = 0L;
+    @Override
+    public String apply(KV<String, String> input) {
+      return input.getKey();
+    }
+  }
+
+  private static class Checker implements SerializableFunction<Iterable<KV<String, String>>, Void> {
+    private static final long serialVersionUID = 0L;
+
+    @Override
+    public Void apply(Iterable<KV<String, String>> input) {
+      Map<String, String> values = new HashMap<>();
+      for (KV<String, String> kv : input) {
+        values.put(kv.getKey(), kv.getValue());
+      }
+      assertEquals(2, values.size());
+      assertTrue(values.get("k1").equals("v1") || values.get("k1").equals("v2"));
+      assertEquals("v1", values.get("k2"));
+      return null;
+    }
+  }
+
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testRemoveDuplicatesWithRepresentativeValue() {
+    List<KV<String, String>> strings = Arrays.asList(
+        KV.of("k1", "v1"),
+        KV.of("k1", "v2"),
+        KV.of("k2", "v1"));
+
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, String>> input = p.apply(Create.of(strings));
+
+    PCollection<KV<String, String>> output =
+        input.apply(RemoveDuplicates.withRepresentativeValueFn(new Keys()));
+
+
+    DataflowAssert.that(output).satisfies(new Checker());
+
+    p.run();
+  }
 }

From de40cae35ced666239f79335267d31727d27bbc8 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 22 Jul 2015 08:15:05 -0700
Subject: [PATCH 0792/1541] Clean up documentation in examples

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98834765
---
 .../dataflow/examples/MinimalWordCount.java   |  32 +++--
 .../dataflow/examples/WindowedWordCount.java  | 115 ++++++++++--------
 .../cloud/dataflow/examples/WordCount.java    |  32 ++---
 3 files changed, 97 insertions(+), 82 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
index 096a84a7d5093..a51964dcec448 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
@@ -39,37 +39,33 @@
  * for more detailed examples that introduce additional concepts.
  *
  * <p> Concepts:
- * <ol>
- *   <li>Reading data from text files.</li>
- *   <li>Specifying 'inline' transforms.</li>
- *   <li>Counting a PCollection.</li>
- *   <li>Writing data to Cloud Storage as text files.</li>
- * </ol>
+ * <pre>
+ *   1. Reading data from text files
+ *   2. Specifying 'inline' transforms
+ *   3. Counting a PCollection
+ *   4. Writing data to Cloud Storage as text files
+ * </pre>
  *
- * <p> To execute this pipeline, first edit the code to set your project name and Google Cloud
- * Storage values. The specified GCS bucket(s) must already exist.
+ * <p> To execute this pipeline, first edit the code to set your project name, the staging
+ * location, and the output location. The specified GCS bucket(s) must already exist.
  *
  * <p> Then, run the pipeline as described in the README. It will be deployed and run using the
  * Dataflow service. No args are required to run the pipeline. You can see the results in your
  * output bucket in the GCS browser.
  */
-
-
 public class MinimalWordCount {
 
   public static void main(String[] args) {
-
     // Create a DataflowPipelineOptions object. This object lets us set various execution
     // options for our pipeline, such as the associated Cloud Platform project and a location in
     // Google Cloud Storage to stage files.
     DataflowPipelineOptions options = PipelineOptionsFactory.create()
       .as(DataflowPipelineOptions.class);
     options.setRunner(BlockingDataflowPipelineRunner.class);
-    // TODO: CHANGE THE FOLLOWING TWO SETTINGS.
-    // Your project name is required in order to run your pipeline on the Google Cloud.
-    options.setProject("SET-YOUR-PROJECT-NAME-HERE");
-    // Your Google Cloud Storage path for staging local files.
-    options.setStagingLocation("gs://SET-YOUR-BUCKET-NAME-HERE");
+    // CHANGE 1/3: Your project name is required in order to run your pipeline on the Google Cloud.
+    options.setProject("SET_YOUR_PROJECT_NAME_HERE");
+    // CHANGE 2/3: Your Google Cloud Storage path is required for staging local files.
+    options.setStagingLocation("gs://SET_YOUR_BUCKET_NAME_HERE/AND_STAGING_DIRECTORY");
 
     // Create the Pipeline object with the options we defined above.
     Pipeline p = Pipeline.create(options);
@@ -108,11 +104,11 @@ public void processElement(ProcessContext c) {
                          c.output(c.element().getKey() + ": " + c.element().getValue());
                        }
                      }))
-     // TODO: SPECIFY YOUR OUTPUT GCS PATH
      // Concept #4: Apply a write transform, TextIO.Write, at the end of the pipeline.
      // TextIO.Write writes the contents of a PCollection (in this case, our PCollection of
      // formatted strings) to a series of text files in Google Cloud Storage.
-     .apply(TextIO.Write.to("gs://YOUR-OUTPUT-BUCKET/AND-PREFIX"));
+     // CHANGE 3/3: The Google Cloud Storage path is required for outputting the results to.
+     .apply(TextIO.Write.to("gs://YOUR_OUTPUT_BUCKET/AND_OUTPUT_PREFIX"));
 
     // Run the pipeline.
     p.run();
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
index 1130151eeb999..2bec4255eef61 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
@@ -38,7 +38,6 @@
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -58,16 +57,17 @@
  * <p> Basic concepts, also in the MinimalWordCount and WordCount examples:
  * Reading text files; counting a PCollection; writing to GCS; executing a Pipeline both locally
  * and using the Dataflow service; defining DoFns; creating a custom aggregator;
- * user-defined Ptransforms; defining Pipeline options.
+ * user-defined PTransforms; defining PipelineOptions.
  *
  * <p> New Concepts:
- * <ol>
- *   <li>Unbounded and bounded pipeline input modes</li>
- *   <li>Adding timestamps to data.</li>
- *   <li>PubSub topics as sources</li>
- *   <li>Windowing</li>
- *   <li>Writing to BigQuery</li>
- * </ol>
+ * <pre>
+ *   1. Unbounded and bounded pipeline input modes
+ *   2. Adding timestamps to data
+ *   3. PubSub topics as sources
+ *   4. Windowing
+ *   5. Re-using PTransforms over windowed PCollections
+ *   6. Writing to BigQuery
+ * </pre>
  *
  * <p> To execute this pipeline locally, specify general pipeline configuration:
  * <pre>{@code
@@ -84,7 +84,7 @@
  * </pre>
  *
  * <p> Optionally specify the input file path via:
- *  {@code --inputFile=gs://INPUT_PATH},
+ * {@code --inputFile=gs://INPUT_PATH},
  * which defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt}.
  *
  * <p> Specify an output BigQuery dataset and optionally, a table for the output. If you don't
@@ -92,32 +92,30 @@
  * dataset, a dataset called {@code dataflow-examples} must already exist in your project.
  * {@code --bigQueryDataset=YOUR-DATASET --bigQueryTable=YOUR-NEW-TABLE-NAME}.
  *
- * <p> Decide whether you want your pipeline to run with 'bounded' or 'unbounded' input.
- * To run with unbounded input, set:
- * {@code --unbounded=true}.
- * Then, optionally specify the Google Cloud PubSub topic to read from via
- * {@code --pubsubTopic=projects/PROJECT ID/topics/YOUR_TOPIC_NAME}
- * If the topic does not exist, the pipeline will create one for you.  It will delete this topic
- * when it terminates.
- * The pipeline will automatically launch an auxiliary batch pipeline to populate the given
- * PubSub topic with the contents of the --inputFile, in order to make the example easy to run. If
- * you want to use an independently-populated PubSub topic, indicate this by setting --inputFile to
- * the empty string. In that case, the auxiliary pipeline will not be started.
+ * <p> Decide whether you want your pipeline to run with 'bounded' (such as files in GCS) or
+ * 'unbounded' input (such as a PubSub topic). To run with unbounded input, set
+ * {@code --unbounded=true}. Then, optionally specify the Google Cloud PubSub topic to read from
+ * via {@code --pubsubTopic=projects/PROJECT_ID/topics/YOUR_TOPIC_NAME}. If the topic does not
+ * exist, the pipeline will create one for you. It will delete this topic when it terminates.
+ * The pipeline will automatically launch an auxiliary batch pipeline to populate the given PubSub
+ * topic with the contents of the {@code --inputFile}, in order to make the example easy to run.
+ * If you want to use an independently-populated PubSub topic, indicate this by setting
+ * {@code --inputFile=""}. In that case, the auxiliary pipeline will not be started.
  *
  * <p> By default, the pipeline will do fixed windowing, on 1-minute windows.  You can
- * change this interval by setting the --windowSize parameter, e.g. --windowSize=10 for 10-minute
- * windows.
+ * change this interval by setting the {@code --windowSize} parameter, e.g. {@code --windowSize=10}
+ * for 10-minute windows.
  */
 public class WindowedWordCount {
-
     private static final Logger LOG = LoggerFactory.getLogger(WindowedWordCount.class);
     static final int WINDOW_SIZE = 1;  // Default window duration in minutes
 
   /**
    * A DoFn that sets the data element timestamp. This is a silly method, just for this example,
    * for the bounded data case.
-   * Imagine that many ghosts of Shakespeare are all typing madly at the same time to recreate his
-   * masterworks.  Each line of the corpus will get a random associated timestamp somewhere in a
+   *
+   * <p> Imagine that many ghosts of Shakespeare are all typing madly at the same time to recreate
+   * his masterworks. Each line of the corpus will get a random associated timestamp somewhere in a
    * 2-hour period.
    */
   static class AddTimestampFn extends DoFn<String, String> {
@@ -126,10 +124,12 @@ static class AddTimestampFn extends DoFn<String, String> {
 
     @Override
     public void processElement(ProcessContext c) {
-      // Generate a timestamp that falls somewhere in the past hour.
+      // Generate a timestamp that falls somewhere in the past two hours.
       long randomTimestamp = System.currentTimeMillis()
         - (int) (Math.random() * RAND_RANGE);
-      // Concept #2: Set the data element with that timestamp.
+      /**
+       * Concept #2: Set the data element with that timestamp.
+       */
       c.outputWithTimestamp(c.element(), new Instant(randomTimestamp));
     }
   }
@@ -195,25 +195,31 @@ public static interface Options
   }
 
   public static void main(String[] args) throws IOException {
-
     Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
 
-    // Set up required resources with DataflowExampleUtils.
-    // Concept #1: the Dataflow SDK lets us run the same pipeline with either a bounded or
-    // unbounded input source.
+    // DataflowExampleUtils creates the necessary input sources to simplify execution of this
+    // Pipeline.
     DataflowExampleUtils exampleDataflowUtils = new DataflowExampleUtils(options,
       options.isUnbounded());
+
     Pipeline pipeline = Pipeline.create(options);
 
+    /**
+     * Concept #1: the Dataflow SDK lets us run the same pipeline with either a bounded or
+     * unbounded input source.
+     */
     PCollection<String> input;
     if (options.isUnbounded()) {
       LOG.info("Reading from PubSub.");
-      // Concept #3: Read from the PubSub topic. A topic will be created if it wasn't
-      // specified as an arg. The data elements' timestamps will come from the pubsub injection.
+      /**
+       * Concept #3: Read from the PubSub topic. A topic will be created if it wasn't
+       * specified as an argument. The data elements' timestamps will come from the pubsub
+       * injection.
+       */
       input = pipeline
           .apply(PubsubIO.Read.topic(options.getPubsubTopic()));
     } else {
-      // Else, this is a bounded pipeline. Read from the GCS file.
+      /** Else, this is a bounded pipeline. Read from the GCS file. */
       input = pipeline
           .apply(TextIO.Read.from(options.getInputFile()))
           // Then-- to show windowing-- add an element timestamp, using an artificial time.
@@ -221,29 +227,40 @@ public static void main(String[] args) throws IOException {
           .apply(ParDo.of(new AddTimestampFn()));
     }
 
-    // Concept #4: Window into fixed windows, then run the same CountWords transform as in our
-    // standard WordCount example. The fixed window size for this example defaults to 1 minute
-    // (you can change this with a command-line option). See the documentation for more information
-    // on how fixed windows work, and for information on the other types of windowing
-    // available (e.g., sliding windows).
-    PCollection<KV<String, Long>> wordCounts = input
+    /**
+     * Concept #4: Window into fixed windows. The fixed window size for this example defaults to 1
+     * minute (you can change this with a command-line option). See the documentation for more
+     * information on how fixed windows work, and for information on the other types of windowing
+     * available (e.g., sliding windows).
+     */
+    PCollection<String> windowedWords = input
       .apply(Window.<String>into(
-        FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))))
-       .apply(new WordCount.CountWords());
+        FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))));
 
-    // Concept #5: Format the results for a BigQuery table, then write to BigQuery.
-    // The BigQuery output source supports both bounded and unbounded data.
+    /**
+     * Concept #5: Re-use our existing CountWords transform that does not have knowledge of
+     * windows over a PCollection containing windowed values.
+     */
+    PCollection<KV<String, Long>> wordCounts = windowedWords.apply(new WordCount.CountWords());
+
+    /**
+     * Concept #6: Format the results for a BigQuery table, then write to BigQuery.
+     * The BigQuery output source supports both bounded and unbounded data.
+     */
     wordCounts.apply(ParDo.of(new FormatAsTableRowFn()))
         .apply(BigQueryIO.Write.to(getTableReference(options)).withSchema(getSchema()));
 
     PipelineResult result = pipeline.run();
-    /* To mock unbounded input from PubSub, we'll now start an auxiliary 'injector' pipeline that
+
+    /**
+     * To mock unbounded input from PubSub, we'll now start an auxiliary 'injector' pipeline that
      * runs for a limited time, and publishes to the input PubSub topic.
      *
      * With an unbounded input source, you will need to explicitly shut down this pipeline when you
-     * are done with it, so that you are not charged for the instances. You can do this via the
-     * developer's console UI, or a ctl-C from the command line. The PubSub topic will also be
-     * deleted at this time.*/
+     * are done with it, so that you do not continue to be charged for the instances. You can do
+     * this via a ctrl-C from the command line, or from the developer's console UI for Dataflow
+     * pipelines. The PubSub topic will also be deleted at this time.
+     */
     exampleDataflowUtils.mockUnboundedSource(options.getInputFile(), result);
   }
 }
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
index 6bce413a738dd..02d91b4f36abc 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
@@ -34,8 +34,6 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
-import java.io.IOException;
-
 
 /**
  * An example that counts words in Shakespeare and includes Dataflow best practices.
@@ -54,11 +52,13 @@
  * Reading text files; counting a PCollection; writing to GCS.
  *
  * <p> New Concepts:
- * 1. Executing a Pipeline both locally and using the Dataflow service
- * 2. Using ParDo with static DoFns defined out-of-line
- * 3. Creating a custom aggregator
- * 4. Building a composite transform
- * 5. Using command-line arguments to set pipeline options
+ * <pre>
+ *   1. Executing a Pipeline both locally and using the Dataflow service
+ *   2. Using ParDo with static DoFns defined out-of-line
+ *   3. Creating a custom aggregator
+ *   4. Building a composite transform
+ *   5. Defining your own pipeline options
+ * </pre>
  *
  * <p> Concept #1: you can execute this pipeline either locally or using the Dataflow service.
  * These are now command-line options and not hard-coded as they were in the MinimalWordCount
@@ -98,9 +98,11 @@ public class WordCount {
   static class ExtractWordsFn extends DoFn<String, String> {
     private static final long serialVersionUID = 0;
 
-    // Concept #3: A custom aggregator can track values in your pipeline as it runs, and that value
-    // can be displayed in the Dataflow monitoring UI. This aggregator tracks the number of empty
-    // lines that ExtractWordsFn encounters.
+    /**
+     * Concept #3: A custom aggregator can track values in your pipeline as it runs, and that value
+     * can be displayed in the Dataflow Monitoring UI. This aggregator tracks the number of empty
+     * lines that ExtractWordsFn encounters.
+     */
     private final Aggregator<Long, Long> emptyLines =
         createAggregator("emptyLines", new Sum.SumLongFn());
 
@@ -164,9 +166,9 @@ public PCollection<KV<String, Long>> apply(PCollection<String> lines) {
   /**
    * Options supported by {@link WordCount}.
    *
-   * <p> Concept #5: defining your own configuration options. Here, you can add your own args to be
-   * processed by the command-line parser, and specify default values for them. You can then access
-   * the options values in your pipeline code.
+   * <p> Concept #5: Defining your own configuration options. Here, you can add your own arguments
+   * to be processed by the command-line parser, and specify default values for them. You can then
+   * access the options values in your pipeline code.
    *
    * <p> Inherits standard configuration options.
    */
@@ -182,7 +184,7 @@ public static interface WordCountOptions extends PipelineOptions {
     void setOutput(String value);
 
     /**
-     * Returns gs://${STAGING_LOCATION}/"counts.txt" as the default destination.
+     * Returns "gs://${YOUR_STAGING_DIRECTORY}/counts.txt" as the default destination.
      */
     public static class OutputFactory implements DefaultValueFactory<String> {
       @Override
@@ -199,7 +201,7 @@ public String create(PipelineOptions options) {
 
   }
 
-  public static void main(String[] args) throws IOException {
+  public static void main(String[] args) {
     WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
       .as(WordCountOptions.class);
     Pipeline p = Pipeline.create(options);

From 2e77b952a8e6c553e706ef748f596ee5fc9e1e54 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Wed, 22 Jul 2015 09:09:59 -0700
Subject: [PATCH 0793/1541] Update version of protobufs used by the SDK

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98839020
---
 sdk/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index f7572889eb771..c6031c8357021 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -329,7 +329,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-dataflow</artifactId>
-      <version>v1b3-rev4-1.20.0</version>
+      <version>v1b3-rev5-1.20.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->

From 2c733f19c5472ccd598c4857bd3ee999c9f11699 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 22 Jul 2015 12:30:23 -0700
Subject: [PATCH 0794/1541] Add support for UPDATED job state

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98858825
---
 .../cloud/dataflow/sdk/PipelineResult.java    |  26 +-
 .../BlockingDataflowPipelineRunner.java       |  29 +-
 .../DataflowJobCancelledException.java        |  32 ++
 ...ception.java => DataflowJobException.java} |   4 +-
 ...ava => DataflowJobExecutionException.java} |   7 +-
 .../runners/DataflowJobUpdatedException.java  |  45 +++
 .../sdk/runners/DataflowPipelineJob.java      | 118 +++++--
 ...ion.java => DataflowServiceException.java} |   6 +-
 .../testing/TestDataflowPipelineRunner.java   |   4 +-
 .../dataflow/sdk/util/MonitoringUtil.java     |   1 +
 .../BlockingDataflowPipelineRunnerTest.java   | 301 ++++++++++--------
 .../sdk/runners/DataflowPipelineJobTest.java  |  44 ++-
 12 files changed, 428 insertions(+), 189 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobCancelledException.java
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/{AbstractJobException.java => DataflowJobException.java} (89%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/{JobExecutionException.java => DataflowJobExecutionException.java} (78%)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobUpdatedException.java
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/{ServiceException.java => DataflowServiceException.java} (79%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java
index 2a191de2778bf..fece71b8486d0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java
@@ -47,22 +47,27 @@ <T> AggregatorValues<T> getAggregatorValues(Aggregator<?, T> aggregator)
   /** Named constants for common values for the job state. */
   public enum State {
     /** The job state could not be obtained or was not specified. */
-    UNKNOWN(false),
+    UNKNOWN(false, false),
     /** The job has been paused, or has not yet started. */
-    STOPPED(false),
+    STOPPED(false, false),
     /** The job is currently running. */
-    RUNNING(false),
+    RUNNING(false, false),
     /** The job has successfully completed. */
-    DONE(true),
+    DONE(true, false),
     /** The job has failed. */
-    FAILED(true),
+    FAILED(true, false),
     /** The job has been explicitly cancelled. */
-    CANCELLED(true);
+    CANCELLED(true, false),
+    /** The job has been updated. */
+    UPDATED(true, true);
 
     private final boolean terminal;
 
-    private State(boolean terminal) {
+    private final boolean hasReplacement;
+
+    private State(boolean terminal, boolean hasReplacement) {
       this.terminal = terminal;
+      this.hasReplacement = hasReplacement;
     }
 
     /**
@@ -74,6 +79,13 @@ public final boolean isTerminal() {
       return terminal;
     }
 
+    /**
+     * Returns {@code true} if this job state indicates that a replacement job exists.
+     */
+    public final boolean hasReplacementJob() {
+      return hasReplacement;
+    }
+
   }
 
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
index 8fcf0400ac876..196dce01e37fe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
@@ -78,8 +78,8 @@ public static BlockingDataflowPipelineRunner fromOptions(
   /**
    * {@inheritDoc}
    *
-   * @throws JobExecutionException if there is an exception during job execution.
-   * @throws ServiceException if there is an exception retrieving information about the job.
+   * @throws DataflowJobExecutionException if there is an exception during job execution.
+   * @throws DataflowServiceException if there is an exception retrieving information about the job.
    */
   @Override
   public DataflowPipelineJob run(Pipeline p) {
@@ -110,12 +110,12 @@ public void run() {
             new MonitoringUtil.PrintHandler(options.getJobMessageOutput()));
       } catch (IOException | InterruptedException ex) {
         LOG.debug("Exception caught while retrieving status for job {}", job.getJobId(), ex);
-        throw new ServiceException(
+        throw new DataflowServiceException(
             job, "Exception caught while retrieving status for job " + job.getJobId(), ex);
       }
 
       if (result == null) {
-        throw new ServiceException(
+        throw new DataflowServiceException(
             job, "Timed out while retrieving status for job " + job.getJobId());
       }
 
@@ -127,11 +127,24 @@ public void run() {
 
       if (result == State.DONE) {
         return job;
+      } else if (result == State.UPDATED) {
+        DataflowPipelineJob newJob = job.getReplacedByJob();
+        LOG.info("Job {} has been updated and is running as the new job with id {}."
+            + "To access the updated job on the Dataflow monitoring console, please navigate to {}",
+            job.getJobId(),
+            newJob.getJobId(),
+            MonitoringUtil.getJobMonitoringPageURL(newJob.getProjectId(), newJob.getJobId()));
+        throw new DataflowJobUpdatedException(
+            job,
+            String.format("Job %s updated; new job is %s.", job.getJobId(), newJob.getJobId()),
+            newJob);
+      } else if (result == State.CANCELLED) {
+        String message = String.format("Job %s cancelled by user", job.getJobId());
+        LOG.info(message);
+        throw new DataflowJobCancelledException(job, message);
       } else {
-        // TODO: introduce an exception that can wrap a JobState,
-        // so that detailed error information can be retrieved.
-        throw new JobExecutionException(job, "Job " + job.getJobId()
-            + " finished unsuccessfully with status " + result);
+        throw new DataflowJobExecutionException(job, "Job " + job.getJobId()
+            + " failed with status " + result);
       }
     } finally {
       Runtime.getRuntime().removeShutdownHook(shutdownHook);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobCancelledException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobCancelledException.java
new file mode 100644
index 0000000000000..dea40c1035b84
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobCancelledException.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+/**
+ * Signals that a job run by a {@link BlockingDataflowPipelineRunner} was updated during execution.
+ */
+public class DataflowJobCancelledException extends DataflowJobException {
+  private static final long serialVersionUID = 0L;
+
+  public DataflowJobCancelledException(DataflowPipelineJob job, String message) {
+    super(job, message, null);
+  }
+
+  public DataflowJobCancelledException(DataflowPipelineJob job, String message, Throwable cause) {
+    super(job, message, cause);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AbstractJobException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobException.java
similarity index 89%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AbstractJobException.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobException.java
index 63c66432e67d4..fa427da300d13 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AbstractJobException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobException.java
@@ -23,11 +23,11 @@
 /**
  * A {@link RuntimeException} that contains information about a {@link DataflowPipelineJob}.
  */
-public abstract class AbstractJobException extends RuntimeException {
+public abstract class DataflowJobException extends RuntimeException {
   private static final long serialVersionUID = 0L;
   private final DataflowPipelineJob job;
 
-  AbstractJobException(DataflowPipelineJob job, String message, @Nullable Throwable cause) {
+  DataflowJobException(DataflowPipelineJob job, String message, @Nullable Throwable cause) {
     super(message, cause);
     this.job = Objects.requireNonNull(job);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/JobExecutionException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobExecutionException.java
similarity index 78%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/JobExecutionException.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobExecutionException.java
index b6df318d373d1..27dcae73881aa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/JobExecutionException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobExecutionException.java
@@ -22,14 +22,15 @@
  * Signals that a job run by a {@link BlockingDataflowPipelineRunner} fails during execution, and
  * provides access to the failed job.
  */
-public class JobExecutionException extends AbstractJobException {
+public class DataflowJobExecutionException extends DataflowJobException {
   private static final long serialVersionUID = 0L;
 
-  JobExecutionException(DataflowPipelineJob job, String message) {
+  DataflowJobExecutionException(DataflowPipelineJob job, String message) {
     this(job, message, null);
   }
 
-  JobExecutionException(DataflowPipelineJob job, String message, @Nullable Throwable cause) {
+  DataflowJobExecutionException(
+      DataflowPipelineJob job, String message, @Nullable Throwable cause) {
     super(job, message, cause);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobUpdatedException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobUpdatedException.java
new file mode 100644
index 0000000000000..9ddb44d63e72c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobUpdatedException.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+/**
+ * Signals that a job run by a {@link BlockingDataflowPipelineRunner} was updated during execution.
+ */
+public class DataflowJobUpdatedException extends DataflowJobException {
+  private static final long serialVersionUID = 0L;
+
+  private DataflowPipelineJob replacedByJob;
+
+  public DataflowJobUpdatedException(
+      DataflowPipelineJob job, String message, DataflowPipelineJob replacedByJob) {
+    this(job, message, replacedByJob, null);
+  }
+
+  public DataflowJobUpdatedException(
+      DataflowPipelineJob job, String message, DataflowPipelineJob replacedByJob, Throwable cause) {
+    super(job, message, cause);
+    this.replacedByJob = replacedByJob;
+  }
+
+  /**
+   * The new job that replaces the job terminated with this exception.
+   */
+  public DataflowPipelineJob getReplacedByJob() {
+    return replacedByJob;
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
index f7c20c045e751..444952ec9c28a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
@@ -64,7 +64,7 @@ public class DataflowPipelineJob implements PipelineResult {
   /**
    * Google cloud project to associate this pipeline with.
    */
-  private String project;
+  private String projectId;
 
   /**
    * Client for the Dataflow service. This can be used to query the service
@@ -73,9 +73,16 @@ public class DataflowPipelineJob implements PipelineResult {
   private Dataflow dataflowClient;
 
   /**
-   * The state the job terminated in.
+   * The state the job terminated in or {@code null} if the job has not terminated.
    */
-  private State terminalState;
+  @Nullable
+  private State terminalState = null;
+
+  /**
+   * The job that replaced this one or {@code null} if the job has not been replaced.
+   */
+  @Nullable
+  private DataflowPipelineJob replacedByJob = null;
 
   private DataflowAggregatorTransforms aggregatorTransforms;
 
@@ -101,13 +108,13 @@ public class DataflowPipelineJob implements PipelineResult {
    *
    * @param projectId the project id
    * @param jobId the job id
-   * @param client the workflow client
+   * @param dataflowClient the client for the Dataflow Service
    */
-  public DataflowPipelineJob(String projectId, String jobId, Dataflow client,
+  public DataflowPipelineJob(String projectId, String jobId, Dataflow dataflowClient,
       DataflowAggregatorTransforms aggregatorTransforms) {
-    project = projectId;
+    this.projectId = projectId;
     this.jobId = jobId;
-    dataflowClient = client;
+    this.dataflowClient = dataflowClient;
     this.aggregatorTransforms = aggregatorTransforms;
   }
 
@@ -116,7 +123,23 @@ public String getJobId() {
   }
 
   public String getProjectId() {
-    return project;
+    return projectId;
+  }
+
+  /**
+   * Returns a new {@link DataflowPipelineJob} for the job that replaced this one, if applicable.
+   *
+   * @throws IllegalStateException if called before the job has terminated or if the job terminated
+   * but was not updated
+   */
+  public DataflowPipelineJob getReplacedByJob() {
+    if (terminalState == null) {
+      throw new IllegalStateException("getReplacedByJob() called before job terminated");
+    }
+    if (replacedByJob == null) {
+      throw new IllegalStateException("getReplacedByJob() called for job that was not replaced");
+    }
+    return replacedByJob;
   }
 
   public Dataflow getDataflowClient() {
@@ -171,7 +194,7 @@ State waitToFinish(
       Sleeper sleeper,
       NanoClock nanoClock)
           throws IOException, InterruptedException {
-    MonitoringUtil monitor = new MonitoringUtil(project, dataflowClient);
+    MonitoringUtil monitor = new MonitoringUtil(projectId, dataflowClient);
 
     long lastTimestamp = 0;
     BackOff backoff = timeUnit.toMillis(timeToWait) > 0
@@ -223,20 +246,23 @@ State waitToFinish(
    */
   public void cancel() throws IOException {
     Job content = new Job();
-    content.setProjectId(project);
+    content.setProjectId(projectId);
     content.setId(jobId);
     content.setRequestedState("JOB_STATE_CANCELLED");
     dataflowClient.projects().jobs()
-        .update(project, jobId, content)
+        .update(projectId, jobId, content)
         .execute();
   }
 
   @Override
   public State getState() {
+    if (terminalState != null) {
+      return terminalState;
+    }
+
     return getStateWithRetries(STATUS_POLLING_ATTEMPTS, Sleeper.DEFAULT);
   }
 
-
   /**
    * Attempts to get the state. Uses exponential backoff on failure up to the maximum number
    * of passed in attempts.
@@ -250,33 +276,67 @@ State getStateWithRetries(int attempts, Sleeper sleeper) {
     if (terminalState != null) {
       return terminalState;
     }
-    Job job = null;
+    try {
+      Job job = getJobWithRetries(attempts, sleeper);
+      return MonitoringUtil.toState(job.getCurrentState());
+    } catch (IOException exn) {
+      // The only IOException that getJobWithRetries is permitted to throw is the final IOException
+      // that caused the failure of retry. Other exceptions are wrapped in an unchecked exceptions
+      // and will propagate.
+      return State.UNKNOWN;
+    }
+  }
+
+  /**
+   * Attempts to get the underlying {@link Job}. Uses exponential backoff on failure up to the
+   * maximum number of passed in attempts.
+   *
+   * @param attempts The amount of attempts to make.
+   * @param sleeper Object used to do the sleeps between attempts.
+   * @return The underlying {@link Job} object.
+   * @throws IOException When the maximum number of retries is exhausted, the last exception is
+   * thrown.
+   */
+  @VisibleForTesting
+  Job getJobWithRetries(int attempts, Sleeper sleeper) throws IOException {
     AttemptBoundedExponentialBackOff backoff =
         new AttemptBoundedExponentialBackOff(attempts, STATUS_POLLING_INTERVAL);
-    boolean shouldRetry;
-    do {
+
+    // Retry loop ends in return or throw
+    while (true) {
       try {
-        job = dataflowClient
+        Job job = dataflowClient
             .projects()
             .jobs()
-            .get(project, jobId)
+            .get(projectId, jobId)
             .execute();
         State currentState = MonitoringUtil.toState(job.getCurrentState());
         if (currentState.isTerminal()) {
           terminalState = currentState;
+          replacedByJob = new DataflowPipelineJob(
+              getProjectId(), job.getReplacedByJobId(), dataflowClient, aggregatorTransforms);
+        }
+        return job;
+      } catch (IOException exn) {
+        LOG.warn("There were problems getting current job status: {}.", exn.getMessage());
+        LOG.debug("Exception information:", exn);
+
+        if (!nextBackOff(sleeper, backoff)) {
+          throw exn;
         }
-        return currentState;
-      } catch (IOException e) {
-        LOG.warn("There were problems getting current job status: {}.", e.getMessage());
-        LOG.debug("Exception information:", e);
-      }
-      try {
-        shouldRetry = BackOffUtils.next(sleeper, backoff);
-      } catch (InterruptedException | IOException e) {
-        throw Throwables.propagate(e);
       }
-    } while (shouldRetry);
-    return State.UNKNOWN;
+    }
+  }
+
+  /**
+   * Identical to {@link BackOffUtils#next} but without checked exceptions.
+   */
+  private boolean nextBackOff(Sleeper sleeper, BackOff backoff) {
+    try {
+      return BackOffUtils.next(sleeper, backoff);
+    } catch (InterruptedException | IOException e) {
+      throw Throwables.propagate(e);
+    }
   }
 
   @Override
@@ -299,7 +359,7 @@ private <OutputT> Map<String, OutputT> fromMetricUpdates(Aggregator<?, OutputT>
       } else {
         boolean terminal = getState().isTerminal();
         JobMetrics jobMetrics =
-            dataflowClient.projects().jobs().getMetrics(project, jobId).execute();
+            dataflowClient.projects().jobs().getMetrics(projectId, jobId).execute();
         metricUpdates = jobMetrics.getMetrics();
         if (terminal && jobMetrics.getMetrics() != null) {
           terminalMetricUpdates = metricUpdates;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/ServiceException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowServiceException.java
similarity index 79%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/ServiceException.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowServiceException.java
index 600379cbfc7e9..2bb63208f8b15 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/ServiceException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowServiceException.java
@@ -21,14 +21,14 @@
 /**
  * Signals there was an error retrieving information about a job from the Cloud Dataflow Service.
  */
-public class ServiceException extends AbstractJobException {
+public class DataflowServiceException extends DataflowJobException {
   private static final long serialVersionUID = 0L;
 
-  ServiceException(DataflowPipelineJob job, String message) {
+  DataflowServiceException(DataflowPipelineJob job, String message) {
     this(job, message, null);
   }
 
-  ServiceException(DataflowPipelineJob job, String message, @Nullable Throwable cause) {
+  DataflowServiceException(DataflowPipelineJob job, String message, @Nullable Throwable cause) {
     super(job, message, cause);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
index a499b022a4b4f..1c378eff22f7e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
@@ -21,9 +21,9 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult.State;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.DataflowJobExecutionException;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineJob;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.JobExecutionException;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
@@ -83,7 +83,7 @@ DataflowPipelineJob run(Pipeline pipeline, DataflowPipelineRunner runner) {
     final DataflowPipelineJob job;
     try {
       job = runner.run(pipeline);
-    } catch (JobExecutionException ex) {
+    } catch (DataflowJobExecutionException ex) {
       throw new IllegalStateException("The dataflow failed.");
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
index 3aa70ade08bec..d45018798d745 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
@@ -58,6 +58,7 @@ public final class MonitoringUtil {
           .put("JOB_STATE_DONE", State.DONE)
           .put("JOB_STATE_FAILED", State.FAILED)
           .put("JOB_STATE_CANCELLED", State.CANCELLED)
+          .put("JOB_STATE_UPDATED", State.UPDATED)
           .build();
 
   private String projectId;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
index 25c127d377176..f63f118eaa641 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
@@ -18,8 +18,6 @@
 
 import static org.hamcrest.CoreMatchers.equalTo;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
 import static org.mockito.Matchers.anyLong;
 import static org.mockito.Matchers.isA;
 import static org.mockito.Mockito.mock;
@@ -43,11 +41,7 @@
 import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
 
-import java.io.IOException;
-import java.util.Date;
 import java.util.concurrent.TimeUnit;
 
 /**
@@ -55,193 +49,240 @@
  */
 @RunWith(JUnit4.class)
 public class BlockingDataflowPipelineRunnerTest {
-  @Rule public ExpectedLogs expectedLogs = ExpectedLogs.none(BlockingDataflowPipelineRunner.class);
-  @Rule public ExpectedException expectedThrown = ExpectedException.none();
 
-  private static class JobIdMatcher<T extends AbstractJobException> extends TypeSafeMatcher<T> {
+  @Rule
+  public ExpectedLogs expectedLogs = ExpectedLogs.none(BlockingDataflowPipelineRunner.class);
 
-    private final Matcher<String> matcher;
+  @Rule
+  public ExpectedException expectedThrown = ExpectedException.none();
 
-    public JobIdMatcher(Matcher<String> matcher) {
+  /**
+   * A {@link Matcher} for a {@link DataflowJobException} that applies an underlying {@link Matcher}
+   * to the {@link DataflowPipelineJob} returned by {@link DataflowJobException#getJob()}.
+   */
+  private static class DataflowJobExceptionMatcher<T extends DataflowJobException>
+      extends TypeSafeMatcher<T> {
+
+    private final Matcher<DataflowPipelineJob> matcher;
+
+    public DataflowJobExceptionMatcher(Matcher<DataflowPipelineJob> matcher) {
         this.matcher = matcher;
     }
 
     @Override
     public boolean matchesSafely(T ex) {
-      return matcher.matches(ex.getJob().getJobId());
+      return matcher.matches(ex.getJob());
     }
 
     @Override
     protected void describeMismatchSafely(T item, Description description) {
-        description.appendText("jobId ");
+        description.appendText("job ");
         matcher.describeMismatch(item.getMessage(), description);
     }
 
     @Override
     public void describeTo(Description description) {
-      description.appendText("exception with jobId ");
+      description.appendText("exception with job matching ");
       description.appendDescriptionOf(matcher);
     }
 
     @Factory
-    public static <T extends AbstractJobException> Matcher<T> expectJobId(final String jobId) {
-      return new JobIdMatcher<T>(equalTo(jobId));
+    public static <T extends DataflowJobException> Matcher<T> expectJob(
+        Matcher<DataflowPipelineJob> matcher) {
+      return new DataflowJobExceptionMatcher<T>(matcher);
     }
-
   }
 
-  // This class mocks a call to DataflowPipelineJob.waitToFinish():
-  //    it blocks the thread to simulate waiting,
-  //    and releases the blocking once signaled
-  static class MockWaitToFinish implements Answer<State> {
-    NotificationHelper jobCompleted = new NotificationHelper();
+  /**
+   * A {@link Matcher} for a {@link DataflowPipelineJob} that applies an underlying {@link Matcher}
+   * to the return value of {@link DataflowPipelineJob#getJobId()}.
+   */
+  private static class JobIdMatcher<T extends DataflowPipelineJob> extends TypeSafeMatcher<T> {
+
+    private final Matcher<String> matcher;
+
+    public JobIdMatcher(Matcher<String> matcher) {
+        this.matcher = matcher;
+    }
+
+    @Override
+    public boolean matchesSafely(T job) {
+      return matcher.matches(job.getJobId());
+    }
+
+    @Override
+    protected void describeMismatchSafely(T item, Description description) {
+        description.appendText("jobId ");
+        matcher.describeMismatch(item.getJobId(), description);
+    }
 
     @Override
-    public State answer(
-        InvocationOnMock invocation) throws InterruptedException {
-      System.out.println("MockWaitToFinish.answer(): Wait for signaling job completion.");
-      assertTrue("Test did not receive mock job completion signal",
-          jobCompleted.waitTillSet(10000));
-
-      System.out.println("MockWaitToFinish.answer(): job completed.");
-      return State.DONE;
+    public void describeTo(Description description) {
+      description.appendText("job with jobId ");
+      description.appendDescriptionOf(matcher);
     }
 
-    public void signalJobComplete() {
-      jobCompleted.set();
+    @Factory
+    public static <T extends DataflowPipelineJob> Matcher<T> expectJobId(final String jobId) {
+      return new JobIdMatcher<T>(equalTo(jobId));
     }
   }
 
-  // Mini helper class for wait-notify
-  static class NotificationHelper {
-    private boolean isSet = false;
+  /**
+   * A {@link Matcher} for a {@link DataflowJobUpdatedException} that applies an underlying
+   * {@link Matcher} to the {@link DataflowPipelineJob} returned by
+   * {@link DataflowJobUpdatedException#getReplacedByJob()}.
+   */
+  private static class ReplacedByJobMatcher<T extends DataflowJobUpdatedException>
+      extends TypeSafeMatcher<T> {
+
+    private final Matcher<DataflowPipelineJob> matcher;
 
-    public synchronized void set() {
-      isSet = true;
-      notifyAll();
+    public ReplacedByJobMatcher(Matcher<DataflowPipelineJob> matcher) {
+        this.matcher = matcher;
     }
 
-    public synchronized boolean check() {
-      return isSet;
+    @Override
+    public boolean matchesSafely(T ex) {
+      return matcher.matches(ex.getReplacedByJob());
+    }
+
+    @Override
+    protected void describeMismatchSafely(T item, Description description) {
+        description.appendText("job ");
+        matcher.describeMismatch(item.getMessage(), description);
     }
 
-    public synchronized boolean waitTillSet(long timeout) throws InterruptedException {
-      long remainingTimeout = timeout;
-      long startTime = new Date().getTime();
-      while (!isSet && remainingTimeout > 0) {
-        wait(remainingTimeout);
-        remainingTimeout = timeout - (new Date().getTime() - startTime);
-      }
+    @Override
+    public void describeTo(Description description) {
+      description.appendText("exception with replacedByJob() ");
+      description.appendDescriptionOf(matcher);
+    }
 
-      return isSet;
+    @Factory
+    public static <T extends DataflowJobUpdatedException> Matcher<T> expectReplacedBy(
+        Matcher<DataflowPipelineJob> matcher) {
+      return new ReplacedByJobMatcher<T>(matcher);
     }
   }
 
-  @Test
-  public void testJobWaitComplete() throws IOException, InterruptedException {
-
-    DataflowPipelineRunner mockDataflowPipelineRunner = mock(DataflowPipelineRunner.class);
+  /**
+   * Creates a mocked {@link DataflowPipelineJob} with the given {@code projectId} and {@code jobId}
+   * that will immediately terminate in the provided {@code terminalState}.
+   *
+   * <p> The return value may be further mocked.
+   */
+  private DataflowPipelineJob createMockJob(
+      String projectId, String jobId, State terminalState) throws Exception {
     DataflowPipelineJob mockJob = mock(DataflowPipelineJob.class);
-    MockWaitToFinish mockWait = new MockWaitToFinish();
-
+    when(mockJob.getProjectId()).thenReturn(projectId);
+    when(mockJob.getJobId()).thenReturn(jobId);
     when(mockJob.waitToFinish(
         anyLong(), isA(TimeUnit.class), isA(MonitoringUtil.JobMessagesHandler.class)))
-        .thenAnswer(mockWait);
-    when(mockDataflowPipelineRunner.run(isA(Pipeline.class))).thenReturn(mockJob);
-
-    // Construct a BlockingDataflowPipelineRunner with mockDataflowPipelineRunner inside
-    final BlockingDataflowPipelineRunner blockingRunner =
-        new BlockingDataflowPipelineRunner(
-            mockDataflowPipelineRunner,
-            PipelineOptionsFactory.as(TestDataflowPipelineOptions.class));
-
-    final NotificationHelper executionStarted = new NotificationHelper();
-    final NotificationHelper jobCompleted = new NotificationHelper();
-
-    new Thread() {
-      @Override
-      public void run() {
-        executionStarted.set();
-
-        // Run on an empty test pipeline.
-        blockingRunner.run(DirectPipeline.createForTest());
-
-        // Test following code is not reached till mock job completion signal.
-        jobCompleted.set();
-      }
-    }.start();
-
-    assertTrue("'executionStarted' event not set till timeout.",
-        executionStarted.waitTillSet(2000));
-    assertFalse("Code after job completion should not be reached before mock signal.",
-        jobCompleted.check());
-
-    mockWait.signalJobComplete();
-    assertTrue("run() should return after job completion is mocked.",
-        jobCompleted.waitTillSet(2000));
-    expectedLogs.verifyInfo("Job finished with status DONE");
+        .thenReturn(terminalState);
+    return mockJob;
   }
 
   /**
-   * Returns a {@link BlockingDataflowPipelineRunner} that will execute
-   * a mock job with specified jobId and final state.
-   *
-   * @param jobId the id of the mock job.
-   * @param jobState the state of the mock job after the runner waits for it to finish.
+   * Returns a {@link BlockingDataflowPipelineRunner} that will return the provided a job to return.
+   * Some {@link PipelineOptions} will be extracted from the job, such as the project ID.
    */
-  private BlockingDataflowPipelineRunner mockBlockingRunnerHelper(String jobId, State jobState)
-      throws IOException, InterruptedException {
-    DataflowPipelineRunner mockDataflowPipelineRunner = mock(DataflowPipelineRunner.class);
-    DataflowPipelineJob mockJob = mock(DataflowPipelineJob.class);
+  private BlockingDataflowPipelineRunner createMockRunner(DataflowPipelineJob job)
+      throws Exception {
+    DataflowPipelineRunner mockRunner = mock(DataflowPipelineRunner.class);
+    TestDataflowPipelineOptions options =
+        PipelineOptionsFactory.as(TestDataflowPipelineOptions.class);
+    options.setProject(job.getProjectId());
 
-    when(mockJob.waitToFinish(
-        anyLong(), isA(TimeUnit.class), isA(MonitoringUtil.JobMessagesHandler.class)))
-        .thenReturn(jobState);
-    when(mockJob.getJobId()).thenReturn(jobId);
-    when(mockDataflowPipelineRunner.run(isA(Pipeline.class))).thenReturn(mockJob);
+    when(mockRunner.run(isA(Pipeline.class))).thenReturn(job);
 
-    // Construct a BlockingDataflowPipelineRunner with mockDataflowPipelineRunner inside.
-    final BlockingDataflowPipelineRunner blockingRunner =
-        new BlockingDataflowPipelineRunner(
-            mockDataflowPipelineRunner,
-            PipelineOptionsFactory.as(TestDataflowPipelineOptions.class));
+    return new BlockingDataflowPipelineRunner(mockRunner, options);
+  }
 
-    return blockingRunner;
+  /**
+   * Tests that the {@link BlockingDataflowPipelineRunner} returns normally when a job terminates in
+   * the {@link State#DONE DONE} state.
+   */
+  @Test
+  public void testJobDoneComplete() throws Exception {
+    createMockRunner(createMockJob("testJobDone-projectId", "testJobDone-jobId", State.DONE))
+        .run(DirectPipeline.createForTest());
+    expectedLogs.verifyInfo("Job finished with status DONE");
   }
 
+  /**
+   * Tests that the {@link BlockingDataflowPipelineRunner} throws the appropriate exception
+   * when a job terminates in the {@link State#FAILED FAILED} state.
+   */
   @Test
-  public void testFailedJobThrowsException() throws IOException, InterruptedException {
-    final BlockingDataflowPipelineRunner blockingRunner =
-        mockBlockingRunnerHelper("testFailedJob", State.FAILED);
-    expectedThrown.expect(JobExecutionException.class);
-    expectedThrown.expect(JobIdMatcher.expectJobId("testFailedJob"));
-    blockingRunner.run(DirectPipeline.createForTest());
+  public void testFailedJobThrowsException() throws Exception {
+    expectedThrown.expect(DataflowJobExecutionException.class);
+    expectedThrown.expect(DataflowJobExceptionMatcher.expectJob(
+        JobIdMatcher.expectJobId("testFailedJob-jobId")));
+    createMockRunner(createMockJob("testFailedJob-projectId", "testFailedJob-jobId", State.FAILED))
+        .run(DirectPipeline.createForTest());
   }
 
+  /**
+   * Tests that the {@link BlockingDataflowPipelineRunner} throws the appropriate exception
+   * when a job terminates in the {@link State#CANCELLED CANCELLED} state.
+   */
   @Test
-  public void testCanceledJobThrowsException() throws IOException, InterruptedException {
-    final BlockingDataflowPipelineRunner blockingRunner =
-        mockBlockingRunnerHelper("testCanceledJob", State.CANCELLED);
-    expectedThrown.expect(JobExecutionException.class);
-    expectedThrown.expect(JobIdMatcher.expectJobId("testCanceledJob"));
-    blockingRunner.run(DirectPipeline.createForTest());
+  public void testCancelledJobThrowsException() throws Exception {
+    expectedThrown.expect(DataflowJobCancelledException.class);
+    expectedThrown.expect(DataflowJobExceptionMatcher.expectJob(
+        JobIdMatcher.expectJobId("testCancelledJob-jobId")));
+    createMockRunner(
+        createMockJob("testCancelledJob-projectId", "testCancelledJob-jobId", State.CANCELLED))
+        .run(DirectPipeline.createForTest());
   }
 
+  /**
+   * Tests that the {@link BlockingDataflowPipelineRunner} throws the appropriate exception
+   * when a job terminates in the {@link State#UPDATED UPDATED} state.
+   */
   @Test
-  public void testUnknownJobThrowsServiceException() throws IOException, InterruptedException {
-    final BlockingDataflowPipelineRunner blockingRunner =
-        mockBlockingRunnerHelper("testUnknownJob", State.UNKNOWN);
+  public void testUpdatedJobThrowsException() throws Exception {
+    expectedThrown.expect(DataflowJobUpdatedException.class);
+    expectedThrown.expect(DataflowJobExceptionMatcher.expectJob(
+        JobIdMatcher.expectJobId("testUpdatedJob-jobId")));
+    expectedThrown.expect(ReplacedByJobMatcher.expectReplacedBy(
+        JobIdMatcher.expectJobId("testUpdatedJob-replacedByJobId")));
+    DataflowPipelineJob job =
+        createMockJob("testUpdatedJob-projectId", "testUpdatedJob-jobId", State.UPDATED);
+    DataflowPipelineJob replacedByJob =
+        createMockJob("testUpdatedJob-projectId", "testUpdatedJob-replacedByJobId", State.DONE);
+    when(job.getReplacedByJob()).thenReturn(replacedByJob);
+    createMockRunner(job).run(DirectPipeline.createForTest());
+  }
+
+  /**
+   * Tests that the {@link BlockingDataflowPipelineRunner} throws the appropriate exception
+   * when a job terminates in the {@link State#UNKNOWN UNKNOWN} state, indicating that the
+   * Dataflow service returned a state that the SDK is unfamiliar with (possibly because it
+   * is an old SDK relative the service).
+   */
+  @Test
+  public void testUnknownJobThrowsException() throws Exception {
     expectedThrown.expect(IllegalStateException.class);
-    blockingRunner.run(DirectPipeline.createForTest());
+    createMockRunner(
+        createMockJob("testUnknownJob-projectId", "testUnknownJob-jobId", State.UNKNOWN))
+        .run(DirectPipeline.createForTest());
   }
 
+  /**
+   * Tests that the {@link BlockingDataflowPipelineRunner} throws the appropriate exception
+   * when a job returns a {@code null} state, indicating that it failed to contact the service,
+   * including all of its built-in resilience logic.
+   */
   @Test
-  public void testNullJobThrowsServiceException() throws IOException, InterruptedException {
-    final BlockingDataflowPipelineRunner blockingRunner =
-        mockBlockingRunnerHelper("testNullJob", null);
-    expectedThrown.expect(ServiceException.class);
-    expectedThrown.expect(JobIdMatcher.expectJobId("testNullJob"));
-    blockingRunner.run(DirectPipeline.createForTest());
+  public void testNullJobThrowsException() throws Exception {
+    expectedThrown.expect(DataflowServiceException.class);
+    expectedThrown.expect(DataflowJobExceptionMatcher.expectJob(
+        JobIdMatcher.expectJobId("testNullJob-jobId")));
+    createMockRunner(
+        createMockJob("testNullJob-projectId", "testNullJob-jobId", null))
+        .run(DirectPipeline.createForTest());
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java
index 6080a2441096c..261cecb9620bc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJobTest.java
@@ -147,12 +147,11 @@ public void testWaitToFinishMessagesFail() throws Exception {
     assertEquals(null, state);
   }
 
-  @Test
-  public void testWaitToFinish() throws Exception {
+  public State mockWaitToFinishInState(State state) throws Exception {
     Dataflow.Projects.Jobs.Get statusRequest = mock(Dataflow.Projects.Jobs.Get.class);
 
     Job statusResponse = new Job();
-    statusResponse.setCurrentState("JOB_STATE_" + State.DONE.name());
+    statusResponse.setCurrentState("JOB_STATE_" + state.name());
 
     when(mockJobs.get(eq(PROJECT_ID), eq(JOB_ID))).thenReturn(statusRequest);
     when(statusRequest.execute()).thenReturn(statusResponse);
@@ -162,8 +161,43 @@ public void testWaitToFinish() throws Exception {
     DataflowPipelineJob job = new DataflowPipelineJob(
         PROJECT_ID, JOB_ID, mockWorkflowClient, dataflowAggregatorTransforms);
 
-    State state = job.waitToFinish(1, TimeUnit.MINUTES, null, fastClock, fastClock);
-    assertEquals(State.DONE, state);
+    return job.waitToFinish(1, TimeUnit.MINUTES, null, fastClock, fastClock);
+  }
+
+  /**
+   * Tests that the {@link DataflowPipelineJob} understands that the {@link State#DONE DONE}
+   * state is terminal.
+   */
+  @Test
+  public void testWaitToFinishDone() throws Exception {
+    assertEquals(State.DONE, mockWaitToFinishInState(State.DONE));
+  }
+
+  /**
+   * Tests that the {@link DataflowPipelineJob} understands that the {@link State#FAILED FAILED}
+   * state is terminal.
+   */
+  @Test
+  public void testWaitToFinishFailed() throws Exception {
+    assertEquals(State.FAILED, mockWaitToFinishInState(State.FAILED));
+  }
+
+  /**
+   * Tests that the {@link DataflowPipelineJob} understands that the {@link State#FAILED FAILED}
+   * state is terminal.
+   */
+  @Test
+  public void testWaitToFinishCancelled() throws Exception {
+    assertEquals(State.CANCELLED, mockWaitToFinishInState(State.CANCELLED));
+  }
+
+  /**
+   * Tests that the {@link DataflowPipelineJob} understands that the {@link State#FAILED FAILED}
+   * state is terminal.
+   */
+  @Test
+  public void testWaitToFinishUpdated() throws Exception {
+    assertEquals(State.UPDATED, mockWaitToFinishInState(State.UPDATED));
   }
 
   @Test

From c482c3effb3aa3a3d6669ff051ca6e1b49fa7a32 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 22 Jul 2015 12:41:24 -0700
Subject: [PATCH 0795/1541] Remove the Window.Trigger intermediate Builder
 class

----Release Notes----
Remove the Window.Trigger intermediate Builder class

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98859672
---
 .../sdk/transforms/windowing/Window.java      | 78 +++++++------------
 1 file changed, 27 insertions(+), 51 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 8ed40968cf9ee..af38634450468 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -250,10 +250,35 @@ public Bound<T> named(String name) {
      * has more details on the available triggers.
      */
     @Experimental(Experimental.Kind.TRIGGER)
-    public Triggering<T> triggering(Trigger<?> trigger) {
-      return new Triggering<T>(name, windowingStrategy.withTrigger(trigger));
+    public Bound<T> triggering(Trigger<?> trigger) {
+      return new Bound<T>(name, windowingStrategy.withTrigger(trigger));
     }
 
+   /**
+    * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
+    * Triggering behavior, and that discards elements in a pane after they are triggered.
+    *
+    * <p> Does not modify this transform.  The resulting {@code PTransform} is sufficiently
+    * specified to be applied, but more properties can still be specified.
+    */
+   public Bound<T> discardingFiredPanes() {
+     return new Bound<>(
+         name, windowingStrategy.withMode(AccumulationMode.DISCARDING_FIRED_PANES));
+   }
+
+   /**
+    * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
+    * Triggering behavior, and that accumulates elements in a pane after they are triggered.
+    *
+    * <p> Does not modify this transform.  The resulting {@code PTransform} is sufficiently
+    * specified to be applied, but more properties can still be specified.
+    */
+   @Experimental(Kind.TRIGGER)
+   public Bound<T> accumulatingFiredPanes() {
+     return new Bound<>(
+         name, windowingStrategy.withMode(AccumulationMode.ACCUMULATING_FIRED_PANES));
+   }
+
     /**
      * Override the amount of lateness allowed for data elements in the pipeline. Like
      * the other properties on this {@link Window} operation, this will be applied at
@@ -296,55 +321,6 @@ protected String getKindString() {
     }
   }
 
-  /**
-   * An incomplete {@code Window} transform that has a trigger specified but has an unspecified
-   * accumulation mode.
-   *
-   * <p> The currently available accumulation modes are:
-   *
-   * <ul>
-   *   <li> {@link Window.Triggering#discardingFiredPanes}, which causes the elements in a pane to
-   *   be discarded after the trigger fires and output is produced.
-   * </ul>
-   *
-   * <p> After specifying the accumulation mode the PTransform is complete and can be applied.
-   */
-  public static class Triggering<T> {
-
-    String name;
-    WindowingStrategy<? super T, ?> windowingStrategy;
-
-    Triggering(String name, WindowingStrategy<? super T, ?> windowingStrategy) {
-      this.name = name;
-      this.windowingStrategy = windowingStrategy;
-    }
-
-    /**
-     * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
-     * Triggering behavior, and that discards elements in a pane after they are triggered.
-     *
-     * <p> Does not modify this transform.  The resulting {@code PTransform} is sufficiently
-     * specified to be applied, but more properties can still be specified.
-     */
-    public Bound<T> discardingFiredPanes() {
-      return new Bound<>(
-          name, windowingStrategy.withMode(AccumulationMode.DISCARDING_FIRED_PANES));
-    }
-
-    /**
-     * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
-     * Triggering behavior, and that accumulates elements in a pane after they are triggered.
-     *
-     * <p> Does not modify this transform.  The resulting {@code PTransform} is sufficiently
-     * specified to be applied, but more properties can still be specified.
-     */
-    @Experimental(Kind.TRIGGER)
-    public Bound<T> accumulatingFiredPanes() {
-      return new Bound<>(
-          name, windowingStrategy.withMode(AccumulationMode.ACCUMULATING_FIRED_PANES));
-    }
-  }
-
   /////////////////////////////////////////////////////////////////////////////
 
   /**

From 95b36e084f970a38047d574f10e1a85e731bbea4 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Wed, 22 Jul 2015 12:57:09 -0700
Subject: [PATCH 0796/1541] Rename SimpleCombineFn to IterableCombineFn

Also allow customization of the buffer size.  A deprecated SimpleCombineFn is left in place to ease migration.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98861107
---
 .../dataflow/sdk/transforms/Combine.java      | 62 +++++++++++++++----
 .../cloud/dataflow/sdk/transforms/DoFn.java   |  2 +-
 .../sdk/transforms/DoFnWithContext.java       |  2 +-
 .../sdk/util/CounterAggregatorTest.java       |  4 +-
 4 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 9314c6370a9f2..38baff65c585f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -79,7 +79,7 @@ public class Combine {
    */
   public static <V> Globally<V, V> globally(
       SerializableFunction<Iterable<V>, V> combiner) {
-    return globally(SimpleCombineFn.of(combiner));
+    return globally(IterableCombineFn.of(combiner));
   }
 
   /**
@@ -120,7 +120,7 @@ public static <InputT, OutputT> Globally<InputT, OutputT> globally(
    */
   public static <K, V> PerKey<K, V, V> perKey(
       SerializableFunction<Iterable<V>, V> fn) {
-    return perKey(Combine.SimpleCombineFn.of(fn));
+    return perKey(Combine.IterableCombineFn.of(fn));
   }
 
   /**
@@ -197,7 +197,7 @@ private static <K, InputT, OutputT> PerKey<K, InputT, OutputT> fewKeys(
    */
   public static <K, V> GroupedValues<K, V, V> groupedValues(
       SerializableFunction<Iterable<V>, V> fn) {
-    return groupedValues(SimpleCombineFn.of(fn));
+    return groupedValues(IterableCombineFn.of(fn));
   }
 
   /**
@@ -1438,27 +1438,42 @@ public PCollectionView<OutputT> apply(PCollection<InputT> input) {
    * {@link #perKey(SerializableFunction)}, and
    * {@link #groupedValues(SerializableFunction)}.
    */
-  public static class SimpleCombineFn<V> extends CombineFn<V, List<V>, V> {
+  public static class IterableCombineFn<V> extends CombineFn<V, List<V>, V> {
     /**
      * Returns a {@code CombineFn} that uses the given
      * {@code SerializableFunction} to combine values.
      */
-    public static <V> SimpleCombineFn<V> of(
+    public static <V> IterableCombineFn<V> of(
         SerializableFunction<Iterable<V>, V> combiner) {
-      return new SimpleCombineFn<>(combiner);
+      return of(combiner, DEFAULT_BUFFER_SIZE);
     }
 
     /**
-     * The number of values to accumulate before invoking the combiner
-     * function to combine them.
+     * Returns a {@code CombineFn} that uses the given
+     * {@code SerializableFunction} to combine values,
+     * attempting to buffer at least {@code bufferSize}
+     * values between invocations.
      */
-    private static final int BUFFER_SIZE = 20;
+    public static <V> IterableCombineFn<V> of(
+        SerializableFunction<Iterable<V>, V> combiner, int bufferSize) {
+      return new IterableCombineFn<>(combiner, bufferSize);
+    }
+
+    private static final int DEFAULT_BUFFER_SIZE = 20;
 
     /** The combiner function. */
     private final SerializableFunction<Iterable<V>, V> combiner;
 
-    private SimpleCombineFn(SerializableFunction<Iterable<V>, V> combiner) {
+    /**
+     * The number of values to accumulate before invoking the combiner
+     * function to combine them.
+     */
+    private final int bufferSize;
+
+    private IterableCombineFn(
+        SerializableFunction<Iterable<V>, V> combiner, int bufferSize) {
       this.combiner = combiner;
+      this.bufferSize = bufferSize;
     }
 
     @Override
@@ -1469,7 +1484,7 @@ public List<V> createAccumulator() {
     @Override
     public List<V> addInput(List<V> accumulator, V input) {
       accumulator.add(input);
-      if (accumulator.size() > BUFFER_SIZE) {
+      if (accumulator.size() > bufferSize) {
         return mergeToSingleton(accumulator);
       } else {
         return accumulator;
@@ -1493,6 +1508,31 @@ private List<V> mergeToSingleton(Iterable<V> values) {
     }
   }
 
+  /**
+   * Converts a {@link SerializableFunction} from {@code Iterable<V>}s
+   * to {@code V}s into a simple {@link CombineFn} over {@code V}s.
+   *
+   * <p> @deprecated Use {@link IterableCombineFn} or the more space efficient
+   * {@link BinaryCombineFn} instead (which avoids buffering values).
+   */
+  @Deprecated
+  public static class SimpleCombineFn<V> extends IterableCombineFn<V> {
+
+    /**
+     * Returns a {@code CombineFn} that uses the given
+     * {@code SerializableFunction} to combine values.
+     */
+    @Deprecated
+    public static <V> SimpleCombineFn<V> of(
+        SerializableFunction<Iterable<V>, V> combiner) {
+      return new SimpleCombineFn<>(combiner);
+    }
+
+    protected SimpleCombineFn(SerializableFunction<Iterable<V>, V> combiner) {
+      super(combiner, IterableCombineFn.DEFAULT_BUFFER_SIZE);
+    }
+  }
+
 
   /////////////////////////////////////////////////////////////////////////////
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index d72d52c0a1e76..c048587fe8a90 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -425,7 +425,7 @@ protected TypeDescriptor<OutputT> getOutputTypeDescriptor() {
   protected final <AggInputT> Aggregator<AggInputT, AggInputT> createAggregator(String name,
       SerializableFunction<Iterable<AggInputT>, AggInputT> combiner) {
     checkNotNull(combiner, "combiner cannot be null.");
-    return createAggregator(name, Combine.SimpleCombineFn.of(combiner));
+    return createAggregator(name, Combine.IterableCombineFn.of(combiner));
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
index 940e76d87d321..a4834c6a858bd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
@@ -382,6 +382,6 @@ public interface ExtraContextFactory<InputT, OutputT> {
   public final <AggInputT> Aggregator<AggInputT, AggInputT> createAggregator(
       String name, SerializableFunction<Iterable<AggInputT>, AggInputT> combiner) {
     checkNotNull(combiner, "combiner cannot be null.");
-    return createAggregator(name, Combine.SimpleCombineFn.of(combiner));
+    return createAggregator(name, Combine.IterableCombineFn.of(combiner));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java
index 646afcea7a10c..b13634f00086b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java
@@ -29,7 +29,7 @@
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Combine.SimpleCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Combine.IterableCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Max;
 import com.google.cloud.dataflow.sdk.transforms.Min;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
@@ -245,7 +245,7 @@ public Integer extractOutput(List<Integer> accumulator) {
   public void testUnsupportedSerializableFunction() throws Exception {
     expectedEx.expect(IllegalArgumentException.class);
     expectedEx.expectMessage(Matchers.containsString("unsupported combiner"));
-    CombineFn<Integer, List<Integer>, Integer> combiner = SimpleCombineFn
+    CombineFn<Integer, List<Integer>, Integer> combiner = IterableCombineFn
         .<Integer>of(new SerializableFunction<Iterable<Integer>, Integer>() {
           @Override
           public Integer apply(Iterable<Integer> input) {

From 2dfe0d7538046d12d72ca617919fc0d1f77da47f Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 22 Jul 2015 14:15:29 -0700
Subject: [PATCH 0797/1541] Fix NullPointerException in Proto2Coder

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98868924
---
 .../google/cloud/dataflow/sdk/coders/Proto2Coder.java  | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
index 2ff1936ce7d75..32e540c98a279 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
@@ -38,6 +38,8 @@
 import java.util.List;
 import java.util.Objects;
 
+import javax.annotation.Nullable;
+
 /**
  * An encoder using Google Protocol Buffers 2 binary format.
  *
@@ -288,14 +290,16 @@ private ExtensionRegistry getExtensionRegistry() {
   @JsonCreator
   public static <T extends Message> Proto2Coder<T> of(
       @JsonProperty(PROTO_MESSAGE_CLASS) String protoMessageClassName,
-      @JsonProperty(PROTO_EXTENSION_HOSTS) List<String> extensionHostClassNames) {
+      @Nullable @JsonProperty(PROTO_EXTENSION_HOSTS) List<String> extensionHostClassNames) {
 
     try {
       @SuppressWarnings("unchecked")
       Class<T> protoMessageClass = (Class<T>) Class.forName(protoMessageClassName);
       List<Class<?>> extensionHostClasses = Lists.newArrayList();
-      for (String extensionHostClassName : extensionHostClassNames) {
-        extensionHostClasses.add(Class.forName(extensionHostClassName));
+      if (extensionHostClassNames != null) {
+        for (String extensionHostClassName : extensionHostClassNames) {
+          extensionHostClasses.add(Class.forName(extensionHostClassName));
+        }
       }
       return of(protoMessageClass).withExtensionsFrom(extensionHostClasses);
     } catch (ClassNotFoundException e) {

From 9a82f5fbd4b3ebb08ced5c8d622d002aae16c4b0 Mon Sep 17 00:00:00 2001
From: cushon <cushon@google.com>
Date: Wed, 22 Jul 2015 15:14:06 -0700
Subject: [PATCH 0798/1541] Work around a type inference change in javac

The javac compiler's behavior when handling wildcards and "capture" type
variables has been improved for conformance to the language specification. This
improves type checking behavior in certain unusual circumstances. It is also a
source-incompatible change: certain uses of wildcards that have compiled in the
past may fail to compile because of a program's reliance on the javac bug.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98876063
---
 .../com/google/cloud/dataflow/sdk/transforms/Combine.java     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 38baff65c585f..ed4e472c3b289 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -1417,8 +1417,8 @@ private GloballyAsSingletonView(
 
     @Override
     public PCollectionView<OutputT> apply(PCollection<InputT> input) {
-      PCollection<OutputT> combined = input
-          .apply(Combine.globally(fn).withoutDefaults().withFanout(fanout));
+      PCollection<OutputT> combined =
+          input.apply(Combine.<InputT, OutputT>globally(fn).withoutDefaults().withFanout(fanout));
       if (insertDefault) {
         return combined
             .apply(View.<OutputT>asSingleton().withDefaultValue(

From 8118e47e65174a3893e0f858311299aad8f79339 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Wed, 22 Jul 2015 17:15:33 -0700
Subject: [PATCH 0799/1541] Pass the internal step name to Windmill as a state
 family

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98890666
---
 sdk/pom.xml                                   |   2 +-
 .../runners/worker/AssignWindowsParDoFn.java  |   8 +-
 .../sdk/runners/worker/CombineValuesFn.java   |   7 +-
 .../worker/GroupAlsoByWindowsParDoFn.java     |   9 +-
 .../worker/MapTaskExecutorFactory.java        |   1 +
 .../sdk/runners/worker/NormalParDoFn.java     |   8 +-
 .../sdk/runners/worker/ParDoFnBase.java       |   5 +-
 .../sdk/runners/worker/ParDoFnFactory.java    |   3 +
 .../ReifyTimestampAndWindowsParDoFn.java      |  20 ++-
 .../worker/StreamingDataflowWorker.java       |   4 +
 .../dataflow/sdk/transforms/DoFnTester.java   |   2 +-
 .../cloud/dataflow/sdk/transforms/ParDo.java  |   2 +-
 .../sdk/transforms/windowing/Window.java      |   3 +-
 .../sdk/util/BatchModeExecutionContext.java   |   8 +-
 .../sdk/util/DirectModeExecutionContext.java  |   8 +-
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |   2 +-
 .../dataflow/sdk/util/ExecutionContext.java   |  34 ++--
 .../cloud/dataflow/sdk/util/StateFetcher.java |   5 +-
 .../util/StreamingModeExecutionContext.java   | 157 ++++++++++--------
 .../util/StreamingSideInputDoFnRunner.java    |  12 +-
 .../util/state/WindmillStateInternals.java    |  87 ++++++----
 .../sdk/util/state/WindmillStateReader.java   |  88 +++++-----
 .../runners/worker/CombineValuesFnTest.java   |   1 +
 .../worker/MapTaskExecutorFactoryTest.java    |  13 +-
 .../sdk/runners/worker/NormalParDoFnTest.java |   4 +
 .../runners/worker/ParDoFnFactoryTest.java    |   2 +
 .../worker/StreamingDataflowWorkerTest.java   |  37 +++--
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  |   2 +-
 .../dataflow/sdk/util/StateFetcherTest.java   |  28 ++--
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |   7 +-
 .../StreamingModeExecutionContextTest.java    |   2 +-
 .../StreamingSideInputDoFnRunnerTest.java     |  12 +-
 .../state/WindmillStateInternalsTest.java     | 117 ++++++++++---
 .../util/state/WindmillStateReaderTest.java   | 148 ++++++++++++-----
 34 files changed, 563 insertions(+), 285 deletions(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index c6031c8357021..8d6808302f972 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -343,7 +343,7 @@
     <dependency>
       <groupId>com.google.cloud.dataflow</groupId>
       <artifactId>google-cloud-dataflow-java-proto-library-all</artifactId>
-      <version>0.4.150713</version>
+      <version>0.4.150721</version>
     </dependency>
 
     <dependency>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
index eed860fc6dbf0..7c25211e97813 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
@@ -49,10 +49,12 @@ static AssignWindowsParDoFn of(
       PipelineOptions options,
       AssignWindowsDoFn<?, ?> fn,
       String stepName,
+      String transformName,
       DataflowExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
-    return new AssignWindowsParDoFn(options, fn, stepName, executionContext, addCounterMutator);
+    return new AssignWindowsParDoFn(
+        options, fn, stepName, transformName, executionContext, addCounterMutator);
   }
 
   /**
@@ -65,6 +67,7 @@ public ParDoFn create(
         PipelineOptions options,
         final CloudObject cloudUserFn,
         String stepName,
+        String transformName,
         @Nullable List<SideInputInfo> sideInputInfos,
         @Nullable List<MultiOutputInfo> multiOutputInfos,
         int numOutputs,
@@ -96,6 +99,7 @@ public ParDoFn create(
           options,
           assignFn,
           stepName,
+          transformName,
           executionContext,
           addCounterMutator);
     }
@@ -112,6 +116,7 @@ private AssignWindowsParDoFn(
       PipelineOptions options,
       AssignWindowsDoFn<?, ?> fn,
       String stepName,
+      String transformName,
       DataflowExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator) {
     super(
@@ -119,6 +124,7 @@ private AssignWindowsParDoFn(
         NullSideInputReader.empty(),
         Arrays.asList("output"),
         stepName,
+        transformName,
         executionContext,
         addCounterMutator);
     this.fn = fn;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
index 91420f64ea497..124e98c83b939 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
@@ -65,11 +65,12 @@ static CombineValuesFn of(
       Combine.KeyedCombineFn<?, ?, ?, ?> combineFn,
       String phase,
       String stepName,
+      String transformName,
       DataflowExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
     return new CombineValuesFn(
-        options, combineFn, phase, stepName, executionContext, addCounterMutator);
+        options, combineFn, phase, stepName, transformName, executionContext, addCounterMutator);
   }
 
   /**
@@ -82,6 +83,7 @@ public ParDoFn create(
         PipelineOptions options,
         final CloudObject cloudUserFn,
         String stepName,
+        String transformName,
         @Nullable List<SideInputInfo> sideInputInfos,
         @Nullable List<MultiOutputInfo> multiOutputInfos,
         int numOutputs,
@@ -114,6 +116,7 @@ public ParDoFn create(
           combineFn,
           phase,
           stepName,
+          transformName,
           executionContext,
           addCounterMutator);
     }
@@ -150,6 +153,7 @@ private CombineValuesFn(
       Combine.KeyedCombineFn<?, ?, ?, ?> combineFn,
       String phase,
       String stepName,
+      String transformName,
       DataflowExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator) {
     super(
@@ -157,6 +161,7 @@ private CombineValuesFn(
         NullSideInputReader.empty(),
         Arrays.asList("output"),
         stepName,
+        transformName,
         executionContext,
         addCounterMutator);
     this.phase = phase;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index 51ff7b77bc63f..da86e7b5d297d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -62,11 +62,12 @@ static GroupAlsoByWindowsParDoFn of(
       PipelineOptions options,
       DoFn<?, ?> groupAlsoByWindowsDoFn,
       String stepName,
+      String transformName,
       DataflowExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
-    return new GroupAlsoByWindowsParDoFn(
-        options, groupAlsoByWindowsDoFn, stepName, executionContext, addCounterMutator);
+    return new GroupAlsoByWindowsParDoFn(options, groupAlsoByWindowsDoFn, stepName, transformName,
+        executionContext, addCounterMutator);
   }
 
   /**
@@ -79,6 +80,7 @@ public ParDoFn create(
         PipelineOptions options,
         CloudObject cloudUserFn,
         String stepName,
+        String transformName,
         @Nullable List<SideInputInfo> sideInputInfos,
         @Nullable List<MultiOutputInfo> multiOutputInfos,
         int numOutputs,
@@ -150,6 +152,7 @@ public ParDoFn create(
           options,
           groupAlsoByWindowsDoFn,
           stepName,
+          transformName,
           executionContext,
           addCounterMutator);
     }
@@ -234,6 +237,7 @@ private GroupAlsoByWindowsParDoFn(
       PipelineOptions options,
       DoFn<?, ?> groupAlsoByWindowsDoFn,
       String stepName,
+      String transformName,
       DataflowExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator) {
     super(
@@ -241,6 +245,7 @@ private GroupAlsoByWindowsParDoFn(
         NullSideInputReader.empty(),
         Arrays.asList("output"),
         stepName,
+        transformName,
         executionContext,
         addCounterMutator);
     this.groupAlsoByWindowsDoFn = groupAlsoByWindowsDoFn;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index d1b7f091a7934..0130d39dd634f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -181,6 +181,7 @@ static ParDoOperation createParDoOperation(
         options,
         CloudObject.fromSpec(parDo.getUserFn()),
         instruction.getSystemName(),
+        instruction.getName(),
         parDo.getSideInputs(),
         parDo.getMultiOutputInfos(),
         parDo.getNumOutputs(),
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
index 1b77d50d322d3..3bb9eb6f729d6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
@@ -54,6 +54,7 @@ static NormalParDoFn of(
       SideInputReader sideInputReader,
       List<String> outputTags,
       String stepName,
+      String transformName,
       DataflowExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator) {
     return new NormalParDoFn(
@@ -62,6 +63,7 @@ static NormalParDoFn of(
         sideInputReader,
         outputTags,
         stepName,
+        transformName,
         executionContext,
         addCounterMutator);
   }
@@ -76,6 +78,7 @@ public ParDoFn create(
         PipelineOptions options,
         final CloudObject cloudUserFn,
         String stepName,
+        String transformName,
         @Nullable List<SideInputInfo> sideInputInfos,
         @Nullable List<MultiOutputInfo> multiOutputInfos,
         int numOutputs,
@@ -136,6 +139,7 @@ public ParDoFn create(
           sideInputReader,
           outputTags,
           stepName,
+          transformName,
           executionContext,
           addCounterMutator);
     }
@@ -150,9 +154,11 @@ private NormalParDoFn(
       SideInputReader sideInputReader,
       List<String> outputTags,
       String stepName,
+      String transformName,
       ExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator) {
-    super(options, sideInputReader, outputTags, stepName, executionContext, addCounterMutator);
+    super(options, sideInputReader, outputTags, stepName, transformName, executionContext,
+        addCounterMutator);
     // The userDoFn is serialized because a fresh copy is provided each time it is accessed.
     this.serializedDoFn = SerializableUtils.serializeToByteArray(doFnInfo.getDoFn());
     this.doFnInfo = doFnInfo;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
index 849577a2ea990..a47c7e1829ad5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
@@ -56,6 +56,7 @@ public abstract class ParDoFnBase implements ParDoFn {
   private final TupleTag<Object> mainOutputTag;
   private final List<TupleTag<?>> sideOutputTags;
   private final String stepName;
+  private final String transformName;
   private final ExecutionContext executionContext;
   private final CounterSet.AddCounterMutator addCounterMutator;
 
@@ -74,6 +75,7 @@ protected ParDoFnBase(
       SideInputReader sideInputReader,
       List<String> outputTags,
       String stepName,
+      String transformName,
       ExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator) {
     this.options = options;
@@ -91,6 +93,7 @@ protected ParDoFnBase(
       }
     }
     this.stepName = stepName;
+    this.transformName = transformName;
     this.executionContext = executionContext;
     this.addCounterMutator = addCounterMutator;
   }
@@ -109,7 +112,7 @@ public void startBundle(final Receiver... receivers) throws Exception {
 
     StepContext stepContext = null;
     if (executionContext != null) {
-      stepContext = executionContext.getStepContext(stepName);
+      stepContext = executionContext.getStepContext(stepName, transformName);
     }
 
     @SuppressWarnings("unchecked")
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
index 6a7489f6ad3fc..fc28ee0a4b8c4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
@@ -41,6 +41,7 @@ public ParDoFn create(
       PipelineOptions options,
       CloudObject cloudUserFn,
       String stepName,
+      String transformName,
       List<SideInputInfo> sideInputInfos,
       List<MultiOutputInfo> multiOutputInfos,
       int numOutputs,
@@ -72,6 +73,7 @@ public ParDoFn create(
         PipelineOptions options,
         CloudObject cloudUserFn,
         String stepName,
+        String transformName,
         List<SideInputInfo> sideInputInfos,
         List<MultiOutputInfo> multiOutputInfos,
         int numOutputs,
@@ -91,6 +93,7 @@ public ParDoFn create(
           options,
           cloudUserFn,
           stepName,
+          transformName,
           sideInputInfos,
           multiOutputInfos,
           numOutputs,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
index 005a41494131c..866a169c0ae4d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
@@ -43,12 +43,13 @@ static ReifyTimestampAndWindowsParDoFn of(
       PipelineOptions options,
       ReifyTimestampAndWindowsDoFn<?, ?> fn,
       String stepName,
+      String transformName,
       DataflowExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
 
     return new ReifyTimestampAndWindowsParDoFn(
-        options, fn, stepName, executionContext, addCounterMutator);
+        options, fn, stepName, transformName, executionContext, addCounterMutator);
   }
 
   /**
@@ -61,6 +62,7 @@ public ParDoFn create(
         PipelineOptions options,
         final CloudObject cloudUserFn,
         String stepName,
+        String transformName,
         @Nullable List<SideInputInfo> sideInputInfos,
         @Nullable List<MultiOutputInfo> multiOutputInfos,
         int numOutputs,
@@ -76,6 +78,7 @@ public ParDoFn create(
           options,
           fn,
           stepName,
+          transformName,
           executionContext,
           addCounterMutator);
     }
@@ -92,15 +95,18 @@ private ReifyTimestampAndWindowsParDoFn(
       PipelineOptions options,
       ReifyTimestampAndWindowsDoFn fn,
       String stepName,
+      String transformName,
       ExecutionContext executionContext,
       AddCounterMutator addCounterMutator) {
 
-    super(options,
-          NullSideInputReader.empty(),
-          Arrays.asList("output"),
-          stepName,
-          executionContext,
-          addCounterMutator);
+    super(
+        options,
+        NullSideInputReader.empty(),
+        Arrays.asList("output"),
+        stepName,
+        transformName,
+        executionContext,
+        addCounterMutator);
     this.fn = fn;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index e85a390d45d08..d4ec43a784ee3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -252,6 +252,10 @@ public Thread newThread(Runnable r) {
     DataflowWorkerLoggingMDC.setWorkerId(options.getWorkerId());
   }
 
+  void addStateNameMappings(Map<String, String> nameMap) {
+    stateNameMap.putAll(nameMap);
+  }
+
   public void start() {
     running.set(true);
     dispatchThread = threadFactory.newThread(new Runnable() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index 7c961769a7ef2..2b27267bf3719 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -364,7 +364,7 @@ void initializeState() {
         DirectSideInputReader.of(runnerSideInputs),
         mainOutputTag,
         sideOutputTags,
-        DirectModeExecutionContext.create().createStepContext("stepName"),
+        DirectModeExecutionContext.create().createStepContext("stepName", "stepName"),
         counterSet.getAddCounterMutator(),
         WindowingStrategy.globalDefault());
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index ff401d66d15a1..8c7ce21e807ce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -1095,7 +1095,7 @@ private static <InputT, OutputT> void evaluateHelper(
             DirectSideInputReader.of(sideInputValues),
             mainOutputTag,
             sideOutputTags,
-            executionContext.getStepContext(name),
+            executionContext.getStepContext(name, name),
             context.getAddCounterMutator(),
             input.getWindowingStrategy());
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index af38634450468..4ab16c04e642e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -387,6 +387,7 @@ private static <T> void evaluateHelper(
 
     TupleTag<T> outputTag = new TupleTag<>();
     DoFn<T, T> addWindowsDoFn = new AssignWindowsDoFn<>(transform.windowingStrategy.getWindowFn());
+    String name = context.getStepName(transform);
     DoFnRunner<T, T, List> addWindowsRunner =
         DoFnRunner.createWithListOutputs(
             context.getPipelineOptions(),
@@ -394,7 +395,7 @@ private static <T> void evaluateHelper(
             NullSideInputReader.empty(),
             outputTag,
             new ArrayList<TupleTag<?>>(),
-            executionContext.getStepContext(context.getStepName(transform)),
+            executionContext.getStepContext(name, name),
             context.getAddCounterMutator(),
             transform.windowingStrategy);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
index 3a7626978d92c..4e691e2983672 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
@@ -36,8 +36,8 @@ public BatchModeExecutionContext() { }
    * Create a new {@link ExecutionContext.StepContext}.
    */
   @Override
-  public ExecutionContext.StepContext createStepContext(String stepName) {
-    return new StepContext(stepName);
+  public ExecutionContext.StepContext createStepContext(String stepName, String transformName) {
+    return new StepContext(stepName, transformName);
   }
 
   /**
@@ -83,8 +83,8 @@ class StepContext extends ExecutionContext.StepContext {
 
     private final InMemoryStateInternals stateInternals = new InMemoryStateInternals();
 
-    private StepContext(String stepName) {
-      super(stepName);
+    private StepContext(String stepName, String transformName) {
+      super(stepName, transformName);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
index c1e953089ff75..42496b90b0140 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
@@ -42,8 +42,8 @@ public static DirectModeExecutionContext create() {
   }
 
   @Override
-  public ExecutionContext.StepContext createStepContext(String stepName) {
-    return new StepContext(stepName);
+  public ExecutionContext.StepContext createStepContext(String stepName, String transformName) {
+    return new StepContext(stepName, transformName);
   }
 
   @Override
@@ -90,8 +90,8 @@ class StepContext extends ExecutionContext.StepContext {
     private final Map<Object, InMemoryStateInternals> stateInternals = new HashMap<>();
     private InMemoryStateInternals currentStateInternals = null;
 
-    private StepContext(String stepName) {
-      super(stepName);
+    private StepContext(String stepName, String transformName) {
+      super(stepName, transformName);
       switchKey(null);
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index ec58621f146de..67694a437ec10 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -561,7 +561,7 @@ public <T> void writePCollectionViewData(
           @SuppressWarnings("unchecked")
           Coder<BoundedWindow> windowCoder = context.windowFn.windowCoder();
 
-          context.stepContext.getExecutionContext().writePCollectionViewData(
+          context.stepContext.writePCollectionViewData(
               tag, data, IterableCoder.of(WindowedValue.getFullCoder(elemCoder, windowCoder)),
               window(), windowCoder);
         }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
index 03d9d7a88b11e..3c40196dcd929 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
@@ -37,10 +37,10 @@ public abstract class ExecutionContext {
   /**
    * Returns the {@link StepContext} associated with the given step.
    */
-  public StepContext getStepContext(String stepName) {
+  public StepContext getStepContext(String stepName, String transformName) {
     StepContext context = cachedStepContexts.get(stepName);
     if (context == null) {
-      context = createStepContext(stepName);
+      context = createStepContext(stepName, transformName);
       cachedStepContexts.put(stepName, context);
     }
     return context;
@@ -57,7 +57,7 @@ public Collection<StepContext> getAllStepContexts() {
    * Implementations should override this to create the specific type
    * of {@link StepContext} they neeed.
    */
-  public abstract StepContext createStepContext(String stepName);
+  public abstract StepContext createStepContext(String stepName, String transformName);
 
   /**
    * Hook for subclasses to implement that will be called whenever
@@ -73,30 +73,26 @@ public void noteOutput(WindowedValue<?> output) {}
    */
   public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output) {}
 
-  /**
-   * Writes the given {@link PCollectionView} data to a globally accessible location.
-   */
-  public <T, W extends BoundedWindow> void writePCollectionViewData(
-      TupleTag<?> tag,
-      Iterable<WindowedValue<T>> data, Coder<Iterable<WindowedValue<T>>> dataCoder,
-      W window, Coder<W> windowCoder) throws IOException {
-    throw new UnsupportedOperationException("Not implemented.");
-  }
-
   /**
    * Per-step, per-key context used for retrieving state.
    */
   public abstract class StepContext {
     private final String stepName;
+    private final String transformName;
 
-    public StepContext(String stepName) {
+    public StepContext(String stepName, String transformName) {
       this.stepName = stepName;
+      this.transformName = transformName;
     }
 
     public String getStepName() {
       return stepName;
     }
 
+    public String getTransformName() {
+      return transformName;
+    }
+
     public ExecutionContext getExecutionContext() {
       return ExecutionContext.this;
     }
@@ -109,6 +105,16 @@ public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output) {
       ExecutionContext.this.noteSideOutput(tag, output);
     }
 
+    /**
+     * Writes the given {@link PCollectionView} data to a globally accessible location.
+     */
+    public <T, W extends BoundedWindow> void writePCollectionViewData(
+        TupleTag<?> tag,
+        Iterable<WindowedValue<T>> data, Coder<Iterable<WindowedValue<T>>> dataCoder,
+        W window, Coder<W> windowCoder) throws IOException {
+      throw new UnsupportedOperationException("Not implemented.");
+    }
+
     public abstract StateInternals stateInternals();
     public abstract TimerInternals timerInternals();
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
index 85a16052a890b..8cc7e79846c39 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
@@ -78,8 +78,8 @@ public enum SideInputState {
    * <p> If state is KNOWN_READY, attempt to fetch the data regardless of whether a
    * not-ready entry was cached.
    */
-  public <T, SideWindowT extends BoundedWindow> T fetchSideInput(
-      final PCollectionView<T> view, final SideWindowT sideWindow, SideInputState state) {
+  public <T, SideWindowT extends BoundedWindow> T fetchSideInput(final PCollectionView<T> view,
+      final SideWindowT sideWindow, final String stateFamily, SideInputState state) {
     final SideInputId id = new SideInputId(view.getTagInternal(), sideWindow);
 
     Callable<SideInputCacheEntry> fetchCallable = new Callable<SideInputCacheEntry>() {
@@ -101,6 +101,7 @@ public SideInputCacheEntry call() throws Exception {
                     .setTag(view.getTagInternal().getId())
                     .setVersion(windowStream.toByteString())
                     .build())
+                .setStateFamily(stateFamily)
                 .setExistenceWatermarkDeadline(
                      TimeUnit.MILLISECONDS.toMicros(sideWindowStrategy
                          .getTrigger().getSpec()
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index dce35945d4051..653e26e4262f6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -87,8 +87,8 @@ public void start(
   }
 
   @Override
-  public ExecutionContext.StepContext createStepContext(String stepName) {
-    StepContext context = new StepContext(stepName);
+  public ExecutionContext.StepContext createStepContext(String stepName, String transformName) {
+    StepContext context = new StepContext(stepName, transformName);
     context.start(stateReader, inputDataWatermark);
     return context;
   }
@@ -106,23 +106,12 @@ public SideInputReader getSideInputReaderForViews(Iterable<? extends PCollection
     return StreamingModeSideInputReader.of(views, this);
   }
 
-  /**
-   * Fetch the given side input asynchronously and return true if it is present.
-   */
-  public boolean issueSideInputFetch(
-      PCollectionView<?> view, BoundedWindow mainInputWindow, SideInputState state) {
-    BoundedWindow sideInputWindow =
-        view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(mainInputWindow);
-    return fetchSideInput(view, sideInputWindow, state) != null;
-  }
-
   /**
    * Fetches the requested sideInput, and maintains a view of the cache that doesn't remove
    * items until the active work item is finished.
    */
-  private <T> T fetchSideInput(
-      PCollectionView<T> view, BoundedWindow sideInputWindow, SideInputState state) {
-
+  private <T> T fetchSideInput(PCollectionView<T> view, BoundedWindow sideInputWindow,
+      String stateFamily, SideInputState state) {
     Map<BoundedWindow, Object> tagCache = sideInputCache.get(view.getTagInternal());
     if (tagCache == null) {
       tagCache = new HashMap<>();
@@ -137,7 +126,7 @@ private <T> T fetchSideInput(
             "Expected side input to be cached. Tag: "
             + view.getTagInternal().getId());
       }
-      T typed = stateFetcher.fetchSideInput(view, sideInputWindow, state);
+      T typed = stateFetcher.fetchSideInput(view, sideInputWindow, stateFamily, state);
       sideInput = typed;
       if (sideInput != null) {
         tagCache.put(sideInputWindow, sideInput);
@@ -150,53 +139,10 @@ private <T> T fetchSideInput(
     }
   }
 
-  @Override
-  public <T, W extends BoundedWindow> void writePCollectionViewData(
-      TupleTag<?> tag,
-      Iterable<WindowedValue<T>> data, Coder<Iterable<WindowedValue<T>>> dataCoder,
-      W window, Coder<W> windowCoder) throws IOException {
-    if (getSerializedKey().size() != 0) {
-      throw new IllegalStateException("writePCollectionViewData must follow a Combine.globally");
-    }
-
-    ByteString.Output dataStream = ByteString.newOutput();
-    dataCoder.encode(data, dataStream, Coder.Context.OUTER);
-
-    ByteString.Output windowStream = ByteString.newOutput();
-    windowCoder.encode(window, windowStream, Coder.Context.OUTER);
-
-    outputBuilder.addGlobalDataUpdates(
-        Windmill.GlobalData.newBuilder()
-        .setDataId(
-            Windmill.GlobalDataId.newBuilder()
-            .setTag(tag.getId())
-            .setVersion(windowStream.toByteString())
-            .build())
-        .setData(dataStream.toByteString())
-        .build());
-  }
-
   public Iterable<Windmill.GlobalDataId> getSideInputNotifications() {
     return work.getGlobalDataIdNotificationsList();
   }
 
-  /**
-   * Note that there is data on the current key that is blocked on the given side input.
-   */
-  public void addBlockingSideInput(Windmill.GlobalDataRequest sideInput) {
-    outputBuilder.addGlobalDataRequests(sideInput);
-    outputBuilder.addGlobalDataIdRequests(sideInput.getDataId());
-  }
-
-  /**
-   * Note that there is data on the current key that is blocked on the given side inputs.
-   */
-  public void addBlockingSideInputs(Iterable<Windmill.GlobalDataRequest> sideInputs) {
-    for (Windmill.GlobalDataRequest sideInput : sideInputs) {
-      addBlockingSideInput(sideInput);
-    }
-  }
-
   public ByteString getSerializedKey() {
     return work.getKey();
   }
@@ -289,9 +235,11 @@ private static class WindmillTimerInternals implements TimerInternals {
 
     private Map<TimerData, Boolean> timers = new HashMap<>();
     private Instant inputDataWatermark;
+    private String stateFamily;
 
-    public WindmillTimerInternals(Instant inputDataWatermark) {
+    public WindmillTimerInternals(String stateFamily, Instant inputDataWatermark) {
       this.inputDataWatermark = inputDataWatermark;
+      this.stateFamily = stateFamily;
     }
 
     @Override
@@ -331,6 +279,9 @@ public void persistTo(Windmill.WorkItemCommitRequest.Builder outputBuilder) {
         Windmill.Timer.Builder timer = outputBuilder.addOutputTimersBuilder()
             .setTag(timerTag(entry.getKey()))
             .setType(timerType(entry.getKey().getDomain()));
+        if (stateFamily != null) {
+          timer.setStateFamily(stateFamily);
+        }
 
         // If the timer was being set (not deleted) then set a timestamp for it.
         if (entry.getValue()) {
@@ -358,13 +309,20 @@ class StepContext extends ExecutionContext.StepContext {
     private WindmillStateInternals stateInternals;
     private WindmillTimerInternals timerInternals;
     private String prefix;
+    private String stateFamily;
 
-    public StepContext(String stepName) {
-      super(stepName);
+    public StepContext(String stepName, String transformName) {
+      super(stepName, transformName);
 
-      prefix = stateNameMap.get(stepName);
-      if (prefix == null) {
-        prefix = stepName;
+      if (stateNameMap.isEmpty()) {
+        this.prefix = transformName;
+        this.stateFamily = "";
+      } else {
+        this.prefix = stateNameMap.get(transformName);
+        if (prefix == null) {
+          this.prefix = "";
+        }
+        this.stateFamily = prefix;
       }
     }
 
@@ -372,9 +330,10 @@ public StepContext(String stepName) {
      * Update the {@code stateReader} used by this {@code StepContext}.
      */
     public void start(WindmillStateReader stateReader, Instant inputDataWatermark) {
-      this.stateInternals = new WindmillStateInternals(prefix, stateReader);
+      boolean useStateFamilies = !stateNameMap.isEmpty();
+      this.stateInternals = new WindmillStateInternals(prefix, useStateFamilies, stateReader);
       this.timerInternals = new WindmillTimerInternals(
-          Preconditions.checkNotNull(inputDataWatermark));
+          stateFamily, Preconditions.checkNotNull(inputDataWatermark));
     }
 
     public void flushState() {
@@ -382,6 +341,67 @@ public void flushState() {
       timerInternals.persistTo(outputBuilder);
     }
 
+    @Override
+    public <T, W extends BoundedWindow> void writePCollectionViewData(
+        TupleTag<?> tag,
+        Iterable<WindowedValue<T>> data, Coder<Iterable<WindowedValue<T>>> dataCoder,
+        W window, Coder<W> windowCoder) throws IOException {
+      if (getSerializedKey().size() != 0) {
+        throw new IllegalStateException("writePCollectionViewData must follow a Combine.globally");
+      }
+
+      ByteString.Output dataStream = ByteString.newOutput();
+      dataCoder.encode(data, dataStream, Coder.Context.OUTER);
+
+      ByteString.Output windowStream = ByteString.newOutput();
+      windowCoder.encode(window, windowStream, Coder.Context.OUTER);
+
+      Windmill.GlobalData.Builder builder = Windmill.GlobalData.newBuilder()
+          .setDataId(
+              Windmill.GlobalDataId.newBuilder()
+              .setTag(tag.getId())
+              .setVersion(windowStream.toByteString())
+              .build())
+          .setData(dataStream.toByteString());
+      if (stateFamily != null) {
+        builder.setStateFamily(stateFamily);
+      }
+
+      outputBuilder.addGlobalDataUpdates(builder.build());
+    }
+
+    /**
+     * Fetch the given side input asynchronously and return true if it is present.
+     */
+    public boolean issueSideInputFetch(
+        PCollectionView<?> view, BoundedWindow mainInputWindow, SideInputState state) {
+      BoundedWindow sideInputWindow =
+          view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(mainInputWindow);
+      return fetchSideInput(view, sideInputWindow, stateFamily, state) != null;
+    }
+
+    /**
+     * Note that there is data on the current key that is blocked on the given side input.
+     */
+    public void addBlockingSideInput(Windmill.GlobalDataRequest sideInput) {
+      if (stateFamily != null) {
+        sideInput =
+            Windmill.GlobalDataRequest.newBuilder(sideInput).setStateFamily(stateFamily).build();
+      }
+      outputBuilder.addGlobalDataRequests(sideInput);
+      outputBuilder.addGlobalDataIdRequests(sideInput.getDataId());
+    }
+
+    /**
+     * Note that there is data on the current key that is blocked on the given side inputs.
+     */
+    public void addBlockingSideInputs(Iterable<Windmill.GlobalDataRequest> sideInputs) {
+      for (Windmill.GlobalDataRequest sideInput : sideInputs) {
+        addBlockingSideInput(sideInput);
+      }
+    }
+
+
     @Override
     public StateInternals stateInternals() {
       return Preconditions.checkNotNull(stateInternals);
@@ -418,7 +438,8 @@ public <T> T get(PCollectionView<T> view, BoundedWindow window) {
         throw new RuntimeException("get() called with unknown view");
       }
 
-      return context.fetchSideInput(view, window, SideInputState.CACHED_IN_WORKITEM);
+      return context.fetchSideInput(
+          view, window, null /* unused stateFamily */, SideInputState.CACHED_IN_WORKITEM);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
index 4d74f6f6b12eb..ebde9d42f2046 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
@@ -59,7 +59,7 @@
  */
 public class StreamingSideInputDoFnRunner<InputT, OutputT, ReceiverT, W extends BoundedWindow>
     extends DoFnRunner<InputT, OutputT, ReceiverT> {
-  private StepContext stepContext;
+  private StreamingModeExecutionContext.StepContext stepContext;
   private StreamingModeExecutionContext execContext;
   private Map<String, PCollectionView<?>> sideInputViews;
 
@@ -83,7 +83,7 @@ public StreamingSideInputDoFnRunner(
     super(options, doFnInfo.getDoFn(), sideInputReader, outputManager,
         mainOutputTag, sideOutputTags, stepContext,
         addCounterMutator, doFnInfo.getWindowingStrategy());
-    this.stepContext = stepContext;
+    this.stepContext = (StreamingModeExecutionContext.StepContext) stepContext;
 
     WindowFn<?, ? extends BoundedWindow> wildcardWindowFn =
         doFnInfo.getWindowingStrategy().getWindowFn();
@@ -147,10 +147,10 @@ private Set<W> getReadyWindows() {
             W window = entry.getKey();
             boolean allSideInputsCached = true;
             for (PCollectionView<?> view : sideInputViews.values()) {
-              if (!execContext.issueSideInputFetch(
+              if (!stepContext.issueSideInputFetch(
                   view, window, SideInputState.KNOWN_READY)) {
                 Windmill.GlobalDataRequest request = buildGlobalDataRequest(view, window);
-                execContext.addBlockingSideInput(request);
+                stepContext.addBlockingSideInput(request);
                 windowBlockedSet.add(request);
                 allSideInputsCached = false;
               }
@@ -211,7 +211,7 @@ private Set<Windmill.GlobalDataRequest> computeBlockedSideInputs(W window) throw
     Set<Windmill.GlobalDataRequest> blocked = blockedMap.get(window);
     if (blocked == null) {
       for (PCollectionView<?> view : sideInputViews.values()) {
-        if (!execContext.issueSideInputFetch(view, window, SideInputState.UNKNOWN)) {
+        if (!stepContext.issueSideInputFetch(view, window, SideInputState.UNKNOWN)) {
           if (blocked == null) {
             blocked = new HashSet<>();
             blockedMap.put(window, blocked);
@@ -247,7 +247,7 @@ public void invokeProcessElement(WindowedValue<InputT> elem) {
         elementBag(window).add(elem);
         watermarkHold(window).add(elem.getTimestamp());
 
-        execContext.addBlockingSideInputs(blocked);
+        stepContext.addBlockingSideInputs(blocked);
       }
     } catch (Throwable t) {
       // Exception in user code.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
index cf7d3225e8ee2..360ca5b8225e8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
@@ -45,13 +45,13 @@ protected StateBinder binderForNamespace(final StateNamespace namespace) {
       return new StateBinder() {
         @Override
         public <T> BagState<T> bindBag(StateTag<BagState<T>> address, Coder<T> elemCoder) {
-          return new WindmillBag<>(encodeKey(namespace, address), elemCoder, reader);
+          return new WindmillBag<>(encodeKey(namespace, address), stateFamily, elemCoder, reader);
         }
 
         @Override
         public <T> WatermarkStateInternal bindWatermark(
             StateTag<WatermarkStateInternal> address) {
-          return new WindmillWatermarkState(encodeKey(namespace, address), reader);
+          return new WindmillWatermarkState(encodeKey(namespace, address), stateFamily, reader);
         }
 
         @Override
@@ -60,23 +60,32 @@ CombiningValueStateInternal<InputT, AccumT, OutputT> bindCombiningValue(
             StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
             Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
           return new WindmillCombiningValue<>(
-              encodeKey(namespace, address), accumCoder, combineFn, reader);
+              encodeKey(namespace, address), stateFamily, accumCoder, combineFn, reader);
         }
 
         @Override
         public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> coder) {
-          return new WindmillValue<>(encodeKey(namespace, address), coder, reader);
+          return new WindmillValue<>(encodeKey(namespace, address), stateFamily, coder, reader);
         }
       };
     }
   };
 
   private final String prefix;
+  private final String stateFamily;
   private final WindmillStateReader reader;
+  private final boolean useStateFamilies;
 
-  public WindmillStateInternals(String prefix, WindmillStateReader reader) {
+  public WindmillStateInternals(String prefix, boolean useStateFamilies,
+      WindmillStateReader reader) {
     this.prefix = prefix;
+    if (useStateFamilies) {
+      this.stateFamily = prefix;
+    } else {
+      this.stateFamily = "";
+    }
     this.reader = reader;
+    this.useStateFamilies = useStateFamilies;
   }
 
   public void persist(final Windmill.WorkItemCommitRequest.Builder commitBuilder) {
@@ -105,12 +114,18 @@ public void persist(final Windmill.WorkItemCommitRequest.Builder commitBuilder)
   }
 
   private ByteString encodeKey(StateNamespace namespace, StateTag<?> address) {
-    return ByteString.copyFromUtf8(String.format(
-        // stringKey starts and ends with a slash. We don't need to seperate it from prefix, because
-        // the prefix is guaranteed to be unique and non-overlapping. We separate it from the
-        // StateTag ID by a '+' (which is guaranteed not to be in the stringKey) because the
-        // ID comes from the user.
-        "%s%s+%s", prefix, namespace.stringKey(), address.getId()));
+    if (useStateFamilies) {
+      // We don't use prefix here, since it's being set as the stateFamily.
+      return ByteString.copyFromUtf8(
+          String.format("%s+%s", namespace.stringKey(), address.getId()));
+    } else {
+      // stringKey starts and ends with a slash. We don't need to seperate it from prefix, because
+      // the prefix is guaranteed to be unique and non-overlapping. We separate it from the
+      // StateTag ID by a '+' (which is guaranteed not to be in the stringKey) because the
+      // ID comes from the user.
+      return ByteString.copyFromUtf8(String.format(
+          "%s%s+%s", prefix, namespace.stringKey(), address.getId()));
+    }
   }
 
   private interface WindmillState {
@@ -125,6 +140,7 @@ public <T extends State> T state(StateNamespace namespace, StateTag<T> address)
   private static class WindmillValue<T> implements ValueState<T>, WindmillState {
 
     private final ByteString stateKey;
+    private final String stateFamily;
     private final Coder<T> coder;
     private final WindmillStateReader reader;
 
@@ -132,8 +148,10 @@ private static class WindmillValue<T> implements ValueState<T>, WindmillState {
     private boolean modified = false;
     private T modifiedValue;
 
-    private WindmillValue(ByteString stateKey, Coder<T> coder, WindmillStateReader reader) {
+    private WindmillValue(ByteString stateKey, String stateFamily, Coder<T> coder,
+        WindmillStateReader reader) {
       this.stateKey = stateKey;
+      this.stateFamily = stateFamily;
       this.coder = coder;
       this.reader = reader;
     }
@@ -146,7 +164,7 @@ public void clear() {
 
     @Override
     public StateContents<T> get() {
-      final Future<T> future = modified ? null : reader.valueFuture(stateKey, coder);
+      final Future<T> future = modified ? null : reader.valueFuture(stateKey, stateFamily, coder);
 
       return new StateContents<T>() {
         @Override
@@ -177,15 +195,17 @@ public void persist(
       // We can't write without doing a read, so we need to kick off a read if we get here.
       // Call reader.valueFuture directly, since our read() method will avoid actually reading from
       // Windmill since the value is already inMemory.
-      reader.valueFuture(stateKey, coder);
+      reader.valueFuture(stateKey, stateFamily, coder);
 
       ByteString.Output stream = ByteString.newOutput();
       if (modifiedValue != null) {
         coder.encode(modifiedValue, stream, Coder.Context.OUTER);
       }
 
-      commitBuilder.addValueUpdatesBuilder()
+      commitBuilder
+          .addValueUpdatesBuilder()
           .setTag(stateKey)
+          .setStateFamily(stateFamily)
           .getValueBuilder()
           .setData(stream.toByteString())
           .setTimestamp(Long.MAX_VALUE);
@@ -195,14 +215,17 @@ public void persist(
   private static class WindmillBag<T> implements BagState<T>, WindmillState {
 
     private final ByteString stateKey;
+    private final String stateFamily;
     private final Coder<T> elemCoder;
     private final WindmillStateReader reader;
 
     private boolean cleared = false;
     private final List<T> localAdditions = new ArrayList<>();
 
-    private WindmillBag(ByteString stateKey, Coder<T> elemCoder, WindmillStateReader reader) {
+    private WindmillBag(ByteString stateKey, String stateFamily, Coder<T> elemCoder,
+        WindmillStateReader reader) {
       this.stateKey = stateKey;
+      this.stateFamily = stateFamily;
       this.elemCoder = elemCoder;
       this.reader = reader;
     }
@@ -220,7 +243,7 @@ public StateContents<Iterable<T>> get() {
       // clear (in order to get it added to the prefetch).
       final Future<Iterable<T>> persistedData = cleared
           ? Futures.<Iterable<T>>immediateFuture(Collections.<T>emptyList())
-          : reader.listFuture(stateKey, elemCoder);
+          : reader.listFuture(stateKey, stateFamily, elemCoder);
 
       return new StateContents<Iterable<T>>() {
         @Override
@@ -244,7 +267,7 @@ public StateContents<Boolean> isEmpty() {
       // clear (in order to get it added to the prefetch).
       final Future<Iterable<T>> persistedData = cleared
           ? Futures.<Iterable<T>>immediateFuture(Collections.<T>emptyList())
-          : reader.listFuture(stateKey, elemCoder);
+          : reader.listFuture(stateKey, stateFamily, elemCoder);
 
       return new StateContents<Boolean>() {
         @Override
@@ -272,17 +295,18 @@ public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) throws
         // If we do a delete, we need to have done a read to prevent Windmill complaining about
         // blind deletes. We use the underlying reader, because we normally skip the actual read
         // if we've already cleared the state.
-        reader.listFuture(stateKey, elemCoder);
+        reader.listFuture(stateKey, stateFamily, elemCoder);
         commitBuilder.addListUpdatesBuilder()
             .setTag(stateKey)
+            .setStateFamily(stateFamily)
             .setEndTimestamp(Long.MAX_VALUE);
       }
 
 
       if (!localAdditions.isEmpty()) {
         byte[] zero = {0x0};
-        Windmill.TagList.Builder listUpdatesBuilder = commitBuilder.addListUpdatesBuilder();
-        listUpdatesBuilder.setTag(stateKey);
+        Windmill.TagList.Builder listUpdatesBuilder =
+            commitBuilder.addListUpdatesBuilder().setTag(stateKey).setStateFamily(stateFamily);
         for (T value : localAdditions) {
           ByteString.Output stream = ByteString.newOutput();
 
@@ -303,13 +327,16 @@ public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) throws
   private static class WindmillWatermarkState implements WatermarkStateInternal, WindmillState {
 
     private final ByteString stateKey;
+    private final String stateFamily;
     private final WindmillStateReader reader;
 
     private boolean cleared = false;
     private Instant localAdditions = null;
 
-    private WindmillWatermarkState(ByteString stateKey, WindmillStateReader reader) {
+    private WindmillWatermarkState(ByteString stateKey, String stateFamily,
+        WindmillStateReader reader) {
       this.stateKey = stateKey;
+      this.stateFamily = stateFamily;
       this.reader = reader;
     }
 
@@ -326,7 +353,7 @@ public StateContents<Instant> get() {
       // clear (in order to get it added to the prefetch).
       final Future<Instant> persistedData = cleared
           ? Futures.<Instant>immediateFuture(null)
-          : reader.watermarkFuture(stateKey);
+          : reader.watermarkFuture(stateKey, stateFamily);
 
       return new StateContents<Instant>() {
         @Override
@@ -354,7 +381,7 @@ public StateContents<Boolean> isEmpty() {
       // clear (in order to get it added to the prefetch).
       final Future<Instant> persistedData = cleared
           ? Futures.<Instant>immediateFuture(null)
-          : reader.watermarkFuture(stateKey);
+          : reader.watermarkFuture(stateKey, stateFamily);
 
       return new StateContents<Boolean>() {
         @Override
@@ -379,18 +406,19 @@ public void add(Instant watermarkHold) {
     public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) {
       // If we do a delete, we need to have done a read
       if (cleared) {
-        reader.watermarkFuture(stateKey);
+        reader.watermarkFuture(stateKey, stateFamily);
         commitBuilder.addListUpdatesBuilder()
             .setTag(stateKey)
+            .setStateFamily(stateFamily)
             .setEndTimestamp(Long.MAX_VALUE);
       }
 
       if (localAdditions != null) {
         ByteString zeroString = ByteString.copyFrom(new byte[] {0x0});
 
-        Windmill.TagList.Builder listUpdatesBuilder = commitBuilder.addListUpdatesBuilder();
-        listUpdatesBuilder
+        commitBuilder.addListUpdatesBuilder()
             .setTag(stateKey)
+            .setStateFamily(stateFamily)
             .addValuesBuilder()
             .setData(zeroString)
             .setTimestamp(TimeUnit.MILLISECONDS.toMicros(localAdditions.getMillis()));
@@ -411,10 +439,11 @@ private static class WindmillCombiningValue<InputT, AccumT, OutputT>
     private AccumT localAdditionsAccum;
     private boolean hasLocalAdditions = false;
 
-    private WindmillCombiningValue(ByteString stateKey, Coder<AccumT> accumCoder,
+    private WindmillCombiningValue(ByteString stateKey, String stateFamily,
+        Coder<AccumT> accumCoder,
         CombineFn<InputT, AccumT, OutputT> combineFn,
         WindmillStateReader reader) {
-      this.bag = new WindmillBag<>(stateKey, accumCoder, reader);
+      this.bag = new WindmillBag<>(stateKey, stateFamily, accumCoder, reader);
       this.combineFn = combineFn;
       this.localAdditionsAccum = combineFn.createAccumulator();
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
index ba840ab5607e4..c289c2c416f45 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagValue;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Objects;
+import com.google.common.base.Preconditions;
 import com.google.common.util.concurrent.ForwardingFuture;
 import com.google.common.util.concurrent.SettableFuture;
 import com.google.protobuf.ByteString;
@@ -35,9 +36,9 @@
 import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Collections;
-import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
-import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.ExecutionException;
@@ -61,10 +62,12 @@ private enum Kind {
 
     private final Kind kind;
     private final ByteString tag;
+    private final String stateFamily;
 
-    private StateTag(Kind kind, ByteString tag) {
+    private StateTag(Kind kind, ByteString tag, String stateFamily) {
       this.kind = kind;
       this.tag = tag;
+      this.stateFamily = Preconditions.checkNotNull(stateFamily);
     }
 
     @Override
@@ -79,17 +82,18 @@ public boolean equals(Object obj) {
 
       StateTag that = (StateTag) obj;
       return Objects.equal(this.kind, that.kind)
-          && Objects.equal(this.tag, that.tag);
+          && Objects.equal(this.tag, that.tag)
+          && Objects.equal(this.stateFamily, that.stateFamily);
     }
 
     @Override
     public int hashCode() {
-      return Objects.hashCode(kind, tag);
+      return Objects.hashCode(kind, tag, stateFamily);
     }
 
     @Override
     public String toString() {
-      return kind + " " + tag.toStringUtf8();
+      return "Tag(" + kind + "," + tag.toStringUtf8() + "," + stateFamily + ")";
     }
   }
 
@@ -139,16 +143,17 @@ private <T> Future<T> stateFuture(StateTag tag, Coder<?> coder) {
     return wrappedFuture(typedFuture);
   }
 
-  public Future<Instant> watermarkFuture(ByteString encodedTag) {
-    return stateFuture(new StateTag(StateTag.Kind.WATERMARK, encodedTag), null);
+  public Future<Instant> watermarkFuture(ByteString encodedTag, String stateFamily) {
+    return stateFuture(new StateTag(StateTag.Kind.WATERMARK, encodedTag, stateFamily), null);
   }
 
-  public <T> Future<T> valueFuture(ByteString encodedTag, Coder<T> coder) {
-    return stateFuture(new StateTag(StateTag.Kind.VALUE, encodedTag), coder);
+  public <T> Future<T> valueFuture(ByteString encodedTag, String stateFamily, Coder<T> coder) {
+    return stateFuture(new StateTag(StateTag.Kind.VALUE, encodedTag, stateFamily), coder);
   }
 
-  public <T> Future<Iterable<T>> listFuture(ByteString encodedTag, Coder<T> elemCoder) {
-    return stateFuture(new StateTag(StateTag.Kind.LIST, encodedTag), elemCoder);
+  public <T> Future<Iterable<T>> listFuture(ByteString encodedTag, String stateFamily,
+      Coder<T> elemCoder) {
+    return stateFuture(new StateTag(StateTag.Kind.LIST, encodedTag, stateFamily), elemCoder);
   }
 
   private <T> Future<T> wrappedFuture(final Future<T> future) {
@@ -183,15 +188,15 @@ public T get(long timeout, TimeUnit unit)
   }
 
   public void startBatchAndBlock() {
-    // First, drain work out of the pending lookups into a map. These will be the items we fetch.
-    Map<ByteString, StateTag> toFetch = new HashMap<>();
+    // First, drain work out of the pending lookups into a set. These will be the items we fetch.
+    HashSet<StateTag> toFetch = new HashSet<>();
     while (!pendingLookups.isEmpty()) {
       StateTag tag = pendingLookups.poll();
       if (tag == null) {
         break;
       }
 
-      if (toFetch.put(tag.tag, tag) != null) {
+      if (!toFetch.add(tag)) {
         throw new IllegalStateException("Duplicate tags being fetched.");
       }
     }
@@ -202,7 +207,7 @@ public void startBatchAndBlock() {
       return;
     }
 
-    Windmill.GetDataRequest request = createRequest(toFetch.values());
+    Windmill.GetDataRequest request = createRequest(toFetch);
     Windmill.GetDataResponse response = metrics.getStateData(request);
 
     if (response == null) {
@@ -224,11 +229,14 @@ private Windmill.GetDataRequest createRequest(Iterable<StateTag> toFetch) {
         case WATERMARK:
           keyedDataBuilder.addListsToFetchBuilder()
               .setTag(tag.tag)
+              .setStateFamily(tag.stateFamily)
               .setEndTimestamp(Long.MAX_VALUE);
           break;
 
         case VALUE:
-          keyedDataBuilder.addValuesToFetchBuilder().setTag(tag.tag);
+          keyedDataBuilder.addValuesToFetchBuilder()
+              .setTag(tag.tag)
+              .setStateFamily(tag.stateFamily);
           break;
 
         default:
@@ -240,7 +248,7 @@ private Windmill.GetDataRequest createRequest(Iterable<StateTag> toFetch) {
   }
 
   private void consumeResponse(Windmill.GetDataRequest request,
-      Windmill.GetDataResponse response, Map<ByteString, StateTag> toFetch) {
+      Windmill.GetDataResponse response, Set<StateTag> toFetch) {
     // Validate the response is for our computation/key.
     if (response.getDataCount() == 0) {
       throw new RuntimeException(
@@ -267,7 +275,7 @@ private void consumeResponse(Windmill.GetDataRequest request,
       // Set up all the futures for this key to throw an exception:
       StreamingDataflowWorker.KeyTokenInvalidException keyTokenInvalidException =
           new StreamingDataflowWorker.KeyTokenInvalidException(key.toStringUtf8());
-      for (StateTag stateTag : toFetch.values()) {
+      for (StateTag stateTag : toFetch) {
         futures.get(stateTag).setException(keyTokenInvalidException);
       }
       return;
@@ -279,36 +287,40 @@ private void consumeResponse(Windmill.GetDataRequest request,
     }
 
     for (Windmill.TagList list : response.getData(0).getData(0).getListsList()) {
-      StateTag stateTag = toFetch.remove(list.getTag());
-      if (stateTag == null) {
-        throw new IllegalStateException(
-            "Received response for unrequested tag " + list.getTag().toStringUtf8());
+      String stateFamily = list.getStateFamily();
+      StateTag stateTagList = new StateTag(
+          StateTag.Kind.LIST, list.getTag(), stateFamily);
+      if (toFetch.remove(stateTagList)) {
+        consumeTagList(list, stateTagList);
+        continue;
       }
 
-      if (stateTag.kind == StateTag.Kind.LIST) {
-        consumeTagList(list, stateTag);
-      } else if (stateTag.kind == StateTag.Kind.WATERMARK) {
-        consumeWatermark(list, stateTag);
-      } else {
-        throw new IllegalStateException("Unexpected kind for TagList: " + stateTag);
+      StateTag stateTagWatermark = new StateTag(
+          StateTag.Kind.WATERMARK, list.getTag(), stateFamily);
+      if (toFetch.remove(stateTagWatermark)) {
+        consumeWatermark(list, stateTagWatermark);
+        continue;
       }
+
+      throw new IllegalStateException(
+          "Received response for unrequested tag " + list.getTag().toStringUtf8());
     }
 
     for (Windmill.TagValue value : response.getData(0).getData(0).getValuesList()) {
-      StateTag stateTag = toFetch.remove(value.getTag());
-      if (stateTag == null) {
-        throw new IllegalStateException(
-            "Received response for unrequested tag " + value.getTag().toStringUtf8());
-      } else if (stateTag.kind != StateTag.Kind.VALUE) {
-        throw new IllegalStateException("Unexpected kind for TagList: " + stateTag);
+      String stateFamily = value.getStateFamily();
+      StateTag stateTag = new StateTag(
+          StateTag.Kind.VALUE, value.getTag(), stateFamily);
+      if (toFetch.remove(stateTag)) {
+        consumeTagValue(value, stateTag);
+        continue;
       }
-
-      consumeTagValue(value, stateTag);
+      throw new IllegalStateException(
+          "Received response for unrequested tag " + value.getTag().toStringUtf8());
     }
 
     if (!toFetch.isEmpty()) {
       throw new IllegalStateException(
-          "Didn't receive responses for all pending fetches. Missing: " + toFetch.values());
+          "Didn't receive responses for all pending fetches. Missing: " + toFetch);
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
index d00244c087111..1cd8635ede209 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
@@ -198,6 +198,7 @@ private static ParDoFn createCombineValuesFn(
             PipelineOptionsFactory.create(),
             spec,
             "name",
+            "transformName",
             null, // no side inputs
             null, // no side outputs
             1, // single main output
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index f4c35b9c34e16..0b28dbf2195b4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -155,9 +155,10 @@ public void testCreateMapTaskExecutor() throws Exception {
 
   @Test
   public void testExecutionContextPlumbing() throws Exception {
-    List<ParallelInstruction> instructions =
-        Arrays.asList(createReadInstruction("Read"), createParDoInstruction(0, 0, "DoFn1"),
-            createParDoInstruction(1, 0, "DoFnWithContext"), createWriteInstruction(2, 0, "Write"));
+    List<ParallelInstruction> instructions = Arrays.asList(createReadInstruction("Read"),
+        createParDoInstruction(0, 0, "DoFn1", "DoFnUserName"),
+        createParDoInstruction(1, 0, "DoFnWithContext", "DoFnWithContextUserName"),
+        createWriteInstruction(2, 0, "Write"));
 
     MapTask mapTask = new MapTask();
     mapTask.setInstructions(instructions);
@@ -301,6 +302,11 @@ public void processElement(ProcessContext c) {}
 
   static ParallelInstruction createParDoInstruction(
       int producerIndex, int producerOutputNum, String systemName) {
+    return createParDoInstruction(producerIndex, producerOutputNum, systemName, "");
+  }
+
+  static ParallelInstruction createParDoInstruction(
+      int producerIndex, int producerOutputNum, String systemName, String userName) {
     InstructionInput cloudInput = new InstructionInput();
     cloudInput.setProducerInstructionIndex(producerIndex);
     cloudInput.setOutputNum(producerOutputNum);
@@ -328,6 +334,7 @@ static ParallelInstruction createParDoInstruction(
     instruction.setParDo(parDoInstruction);
     instruction.setOutputs(Arrays.asList(output));
     instruction.setSystemName(systemName);
+    instruction.setName(userName);
     return instruction;
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
index 7c973a5f4a20a..08417386870c6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
@@ -161,6 +161,7 @@ public void testNormalParDoFn() throws Exception {
         DirectSideInputReader.of(sideInputValues),
         outputTags,
         "doFn",
+        "doFn",
         DataflowExecutionContext.withoutSideInputs(),
         (new CounterSet()).getAddCounterMutator());
 
@@ -223,6 +224,7 @@ public void testUnexpectedNumberOfReceivers() throws Exception {
         DirectSideInputReader.of(sideInputValues),
         outputTags,
         "doFn",
+        "doFn",
         DataflowExecutionContext.withoutSideInputs(),
         (new CounterSet()).getAddCounterMutator());
 
@@ -265,6 +267,7 @@ public void testErrorPropagation() throws Exception {
         DirectSideInputReader.of(sideInputValues),
         outputTags,
         "doFn",
+        "doFn",
         DataflowExecutionContext.withoutSideInputs(),
         (new CounterSet()).getAddCounterMutator());
 
@@ -333,6 +336,7 @@ public void testUndeclaredSideOutputs() throws Exception {
         NullSideInputReader.empty(),
         Arrays.asList("output", "declared"),
         "doFn",
+        "doFn",
         DataflowExecutionContext.withoutSideInputs(),
         counters.getAddCounterMutator());
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
index f22a0a6b6eb90..559c5233e006a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
@@ -96,6 +96,7 @@ public void testCreateNormalParDoFn() throws Exception {
         PipelineOptionsFactory.create(),
         cloudUserFn,
         "name",
+        "transformName",
         null,
         multiOutputInfos,
         1,
@@ -136,6 +137,7 @@ public void testCreateUnknownParDoFn() throws Exception {
           PipelineOptionsFactory.create(),
           cloudUserFn,
           "name",
+          "transformName",
           null,
           null,
           1,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 1253a43938aaa..d30d6900832a4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -90,7 +90,7 @@
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
-import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.CountDownLatch;
@@ -127,17 +127,13 @@ private static final byte[] windowAtOneSecondBytes() throws Exception {
         DEFAULT_WINDOW_COLLECTION_CODER, Arrays.asList(WINDOW_AT_ONE_SECOND));
   }
 
-  private static final byte[] emptyWindowsBytes() throws Exception {
-    return CoderUtils.encodeToByteArray(
-        DEFAULT_WINDOW_COLLECTION_CODER, Collections.<IntervalWindow>emptyList());
-  }
-
   // Default values that are unimportant for correctness, but must be consistent
   // between pieces of this test suite
   private static final String DEFAULT_COMPUTATION_ID = "computation";
   private static final String DEFAULT_MAP_STAGE_NAME = "computation";
   private static final String DEFAULT_MAP_SYSTEM_NAME = "computation";
   private static final String DEFAULT_PARDO_SYSTEM_NAME = "parDo";
+  private static final String DEFAULT_PARDO_USER_NAME = "parDoUserName";
   private static final String DEFAULT_SOURCE_SYSTEM_NAME = "source";
   private static final String DEFAULT_SINK_SYSTEM_NAME = "sink";
   private static final String DEFAULT_SOURCE_COMPUTATION_ID = "upstream";
@@ -194,6 +190,7 @@ private ParallelInstruction makeDoFnInstruction(
             SerializableUtils.serializeToByteArray(new DoFnInfo<>(doFn, null))));
     return new ParallelInstruction()
         .setSystemName(DEFAULT_PARDO_SYSTEM_NAME)
+        .setName(DEFAULT_PARDO_USER_NAME)
         .setParDo(new ParDoInstruction()
             .setInput(
                 new InstructionInput().setProducerInstructionIndex(producerIndex).setOutputNum(0))
@@ -608,6 +605,7 @@ public void testAssignWindows() throws Exception {
     ParallelInstruction addWindowsInstruction =
         new ParallelInstruction()
         .setSystemName("AssignWindows")
+        .setName("AssignWindows")
         .setParDo(new ParDoInstruction()
             .setInput(new InstructionInput().setProducerInstructionIndex(0).setOutputNum(0))
             .setNumOutputs(1)
@@ -667,7 +665,8 @@ public void testMergeWindows() throws Exception {
 
     ParallelInstruction mergeWindowsInstruction =
         new ParallelInstruction()
-        .setSystemName("MergeWindows")
+        .setSystemName("MergeWindows-System")
+        .setName("MergeWindowsStep")
         .setParDo(new ParDoInstruction()
             .setInput(new InstructionInput().setProducerInstructionIndex(0).setOutputNum(0))
             .setNumOutputs(1)
@@ -685,6 +684,9 @@ public void testMergeWindows() throws Exception {
 
     StreamingDataflowWorker worker = new StreamingDataflowWorker(
         Arrays.asList(defaultMapTask(instructions)), server, createTestingPipelineOptions());
+    Map<String, String> nameMap = new HashMap<>();
+    nameMap.put("MergeWindowsStep", "MergeWindows");
+    worker.addStateNameMappings(nameMap);
     worker.start();
 
     server.addWorkToOffer(buildInput(
@@ -710,11 +712,12 @@ public void testMergeWindows() throws Exception {
     // These tags and data are opaque strings and this is a change detector test.
     String window = "/gAAAAAAAA-joBw/";
     ByteString timerTag = ByteString.copyFromUtf8(window + "+0:999"); // GC timer just has window
-    ByteString bufferTag = ByteString.copyFromUtf8("MergeWindows" + window + "+__buffer");
-    ByteString finishedTag = ByteString.copyFromUtf8("MergeWindows" + window + "+__finished_set");
-    ByteString paneInfoTag = ByteString.copyFromUtf8("MergeWindows" + window + "+__pane_info");
+    ByteString bufferTag = ByteString.copyFromUtf8(window + "+__buffer");
+    ByteString finishedTag = ByteString.copyFromUtf8(window + "+__finished_set");
+    ByteString paneInfoTag = ByteString.copyFromUtf8(window + "+__pane_info");
     ByteString watermarkHoldTag =
-        ByteString.copyFromUtf8("MergeWindows" + window + "+watermark_hold");
+        ByteString.copyFromUtf8(window + "+watermark_hold");
+    String stateFamily = "MergeWindows";
     ByteString bufferData = ByteString.copyFromUtf8("\000data0");
     ByteString outputData = ByteString.copyFromUtf8("\000\000\000\001\005data0");
     // These values are not essential to the change detector test
@@ -726,12 +729,14 @@ public void testMergeWindows() throws Exception {
     assertThat(actualOutput.getOutputTimersList(), Matchers.contains(
         Matchers.equalTo(Windmill.Timer.newBuilder()
             .setTag(timerTag)
+            .setStateFamily(stateFamily)
             .setTimestamp(timerTimestamp)
             .setType(Windmill.Timer.Type.WATERMARK).build())));
 
     assertThat(actualOutput.getListUpdatesList(), Matchers.containsInAnyOrder(
         Matchers.equalTo(Windmill.TagList.newBuilder()
             .setTag(bufferTag)
+            .setStateFamily(stateFamily)
             .addValues(Windmill.Value.newBuilder()
                 .setTimestamp(Long.MAX_VALUE)
                 .setData(bufferData)
@@ -739,6 +744,7 @@ public void testMergeWindows() throws Exception {
             .build()),
         Matchers.equalTo(Windmill.TagList.newBuilder()
             .setTag(watermarkHoldTag)
+            .setStateFamily(stateFamily)
             .addValues(Windmill.Value.newBuilder()
                 .setTimestamp(0)
                 .setData(ByteString.copyFrom(new byte[]{0b0}))
@@ -754,6 +760,7 @@ public void testMergeWindows() throws Exception {
         .setWorkToken(1)
         .getTimersBuilder().addTimersBuilder()
         .setTag(timerTag)
+        .setStateFamily(stateFamily)
         .setTimestamp(timerTimestamp);
     server.addWorkToOffer(getWorkResponse.build());
 
@@ -764,16 +771,19 @@ public void testMergeWindows() throws Exception {
         .setKey(ByteString.copyFromUtf8(DEFAULT_KEY_STRING));
     dataBuilder.addListsBuilder()
         .setTag(bufferTag)
+        .setStateFamily(stateFamily)
         .addValuesBuilder()
         .setTimestamp(0) // is ignored
         .setData(bufferData);
     dataBuilder.addListsBuilder()
         .setTag(watermarkHoldTag)
+        .setStateFamily(stateFamily)
         .addValuesBuilder()
         .setTimestamp(0)
         .setData(ByteString.copyFrom(new byte[]{0b0}));
     dataBuilder.addValuesBuilder()
         .setTag(paneInfoTag)
+        .setStateFamily(stateFamily)
         .getValueBuilder()
         .setTimestamp(0)
         .setData(ByteString.EMPTY);
@@ -784,6 +794,7 @@ public void testMergeWindows() throws Exception {
     dataBuilder.clearValues();
     dataBuilder.addValuesBuilder()
         .setTag(finishedTag)
+        .setStateFamily(stateFamily)
         .getValueBuilder()
         .setTimestamp(0)
         .setData(ByteString.EMPTY);
@@ -813,11 +824,13 @@ public void testMergeWindows() throws Exception {
         actualOutput.getValueUpdatesList(), Matchers.containsInAnyOrder(
             Matchers.equalTo(Windmill.TagValue.newBuilder()
                 .setTag(paneInfoTag)
+                .setStateFamily(stateFamily)
                 .setValue(Windmill.Value.newBuilder()
                      .setTimestamp(Long.MAX_VALUE).setData(ByteString.EMPTY))
                 .build()),
             Matchers.equalTo(Windmill.TagValue.newBuilder()
                 .setTag(finishedTag)
+                .setStateFamily(stateFamily)
                 .setValue(Windmill.Value.newBuilder()
                     .setTimestamp(Long.MAX_VALUE).setData(ByteString.EMPTY))
                 .build())));
@@ -826,10 +839,12 @@ public void testMergeWindows() throws Exception {
         actualOutput.getListUpdatesList(), Matchers.containsInAnyOrder(
         Matchers.equalTo(Windmill.TagList.newBuilder()
             .setTag(bufferTag)
+            .setStateFamily(stateFamily)
             .setEndTimestamp(Long.MAX_VALUE)
             .build()),
         Matchers.equalTo(Windmill.TagList.newBuilder()
             .setTag(watermarkHoldTag)
+            .setStateFamily(stateFamily)
             .setEndTimestamp(Long.MAX_VALUE)
             .build())));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index 2be2de345962e..854831ad6efc9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -378,7 +378,7 @@ KV<String, OutputT>, List> makeRunner(
             NullSideInputReader.empty(),
             (TupleTag<KV<String, OutputT>>) (TupleTag) outputTag,
             new ArrayList<TupleTag<?>>(),
-            execContext.createStepContext("merge"),
+            execContext.createStepContext("merge", "merge"),
             counters.getAddCounterMutator(),
             windowingStrategy);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
index 2d914a5c45a6c..6e5fd94b977ea 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
@@ -57,6 +57,7 @@
 /** Unit tests for {@link StateFetcher}. */
 @RunWith(JUnit4.class)
 public class StateFetcherTest {
+  private static final String STATE_FAMILY = "state";
 
   @Mock
   MetricTrackingWindmillServerStub server;
@@ -88,12 +89,14 @@ public void testFetchGlobalDataBasic() throws Exception {
         buildGlobalDataResponse(tag, ByteString.EMPTY, false, null),
         buildGlobalDataResponse(tag, ByteString.EMPTY, true, encodedIterable));
 
-    assertEquals(null, fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, SideInputState.UNKNOWN));
-    assertEquals(null, fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, SideInputState.UNKNOWN));
-    assertEquals("data",
-        fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, SideInputState.KNOWN_READY));
-    assertEquals("data",
-        fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, SideInputState.KNOWN_READY));
+    assertEquals(null,
+        fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, STATE_FAMILY, SideInputState.UNKNOWN));
+    assertEquals(null,
+        fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, STATE_FAMILY, SideInputState.UNKNOWN));
+    assertEquals("data", fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, STATE_FAMILY,
+                             SideInputState.KNOWN_READY));
+    assertEquals("data", fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, STATE_FAMILY,
+                             SideInputState.KNOWN_READY));
 
     verify(server, times(2)).getSideInputData(buildGlobalDataRequest(tag, ByteString.EMPTY));
     verifyNoMoreInteractions(server);
@@ -137,14 +140,14 @@ public void testFetchGlobalDataCacheOverflow() throws Exception {
         buildGlobalDataResponse(tag1, ByteString.EMPTY, true, encodedIterable1));
 
     assertEquals("data1",
-        fetcher.fetchSideInput(view1, GlobalWindow.INSTANCE, SideInputState.UNKNOWN));
+        fetcher.fetchSideInput(view1, GlobalWindow.INSTANCE, STATE_FAMILY, SideInputState.UNKNOWN));
     assertEquals("data2",
-        fetcher.fetchSideInput(view2, GlobalWindow.INSTANCE, SideInputState.UNKNOWN));
+        fetcher.fetchSideInput(view2, GlobalWindow.INSTANCE, STATE_FAMILY, SideInputState.UNKNOWN));
     cache.invalidateAll();
     assertEquals("data1",
-        fetcher.fetchSideInput(view1, GlobalWindow.INSTANCE, SideInputState.UNKNOWN));
+        fetcher.fetchSideInput(view1, GlobalWindow.INSTANCE, STATE_FAMILY, SideInputState.UNKNOWN));
     assertEquals("data1",
-        fetcher.fetchSideInput(view1, GlobalWindow.INSTANCE, SideInputState.UNKNOWN));
+        fetcher.fetchSideInput(view1, GlobalWindow.INSTANCE, STATE_FAMILY, SideInputState.UNKNOWN));
 
     ArgumentCaptor<Windmill.GetDataRequest> captor =
         ArgumentCaptor.forClass(Windmill.GetDataRequest.class);
@@ -175,8 +178,8 @@ public void testEmptyFetchGlobalData() throws Exception {
     when(server.getSideInputData(any(Windmill.GetDataRequest.class))).thenReturn(
         buildGlobalDataResponse(tag, ByteString.EMPTY, true, encodedIterable));
 
-    assertEquals(0L,
-        (long) fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, SideInputState.UNKNOWN));
+    assertEquals(0L, (long) fetcher.fetchSideInput(
+        view, GlobalWindow.INSTANCE, STATE_FAMILY, SideInputState.UNKNOWN));
 
     verify(server).getSideInputData(buildGlobalDataRequest(tag, ByteString.EMPTY));
     verifyNoMoreInteractions(server);
@@ -208,6 +211,7 @@ private Windmill.GetDataRequest buildGlobalDataRequest(
         .addGlobalDataFetchRequests(
              Windmill.GlobalDataRequest.newBuilder()
                  .setDataId(id)
+                 .setStateFamily(STATE_FAMILY)
                  .setExistenceWatermarkDeadline(
                       TimeUnit.MILLISECONDS.toMicros(
                           GlobalWindow.INSTANCE.maxTimestamp().getMillis()))
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 0aa8a057d2206..3151663e806a8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -72,8 +72,9 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
       // StreamingGroupAlsoByWindows expects it to. So, hook that up.
 
       @Override
-      public ExecutionContext.StepContext createStepContext(String stepName) {
-        ExecutionContext.StepContext context = Mockito.spy(super.createStepContext(stepName));
+      public ExecutionContext.StepContext createStepContext(String stepName, String transformName) {
+        ExecutionContext.StepContext context =
+            Mockito.spy(super.createStepContext(stepName, transformName));
         Mockito.doReturn(mockTimerInternals).when(context).timerInternals();
         return context;
       }
@@ -413,7 +414,7 @@ private DoFnRunner<TimerOrElement<KV<String, Long>>, KV<String, Long>, List> mak
             NullSideInputReader.empty(),
             (TupleTag<KV<String, OutputT>>) (TupleTag) outputTag,
             new ArrayList<TupleTag<?>>(),
-            execContext.createStepContext("merge"),
+            execContext.createStepContext("merge", "merge"),
             counters.getAddCounterMutator(),
             windowingStrategy);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
index a09a668b905c1..d4d012d79b740 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
@@ -72,7 +72,7 @@ public void testTimerInternalsSetTimer() {
     Windmill.WorkItemCommitRequest.Builder outputBuilder =
         Windmill.WorkItemCommitRequest.newBuilder();
     executionContext.start(null, new Instant(1000), stateReader, outputBuilder);
-    StepContext step = executionContext.getStepContext("step");
+    StepContext step = executionContext.getStepContext("step", "transform");
 
     TimerInternals timerInternals = step.timerInternals();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
index 67aa17047c24d..28111a339b611 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
@@ -73,7 +73,7 @@ public class StreamingSideInputDoFnRunnerTest {
 
   static TupleTag<String> mainOutputTag = new TupleTag<String>();
   @Mock StreamingModeExecutionContext execContext;
-  @Mock ExecutionContext.StepContext stepContext;
+  @Mock StreamingModeExecutionContext.StepContext stepContext;
   @Mock SideInputReader mockSideInputReader;
 
   private final InMemoryStateInternals state = new InMemoryStateInternals();
@@ -100,7 +100,7 @@ public void testSideInputReady() throws Exception {
 
     when(execContext.getSideInputNotifications())
         .thenReturn(Arrays.<Windmill.GlobalDataId>asList());
-    when(execContext.issueSideInputFetch(
+    when(stepContext.issueSideInputFetch(
              eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN)))
         .thenReturn(true);
     when(execContext.getSideInputReaderForViews(
@@ -124,7 +124,7 @@ public void testSideInputNotReady() throws Exception {
 
     when(execContext.getSideInputNotifications())
         .thenReturn(Arrays.<Windmill.GlobalDataId>asList());
-    when(execContext.issueSideInputFetch(
+    when(stepContext.issueSideInputFetch(
              eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN)))
         .thenReturn(false);
 
@@ -187,10 +187,10 @@ public void testSideInputNotification() throws Exception {
     runner.elementBag(createWindow(0)).add(createDatum("e", 0));
 
     when(execContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
-    when(execContext.issueSideInputFetch(
+    when(stepContext.issueSideInputFetch(
              eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN)))
         .thenReturn(false);
-    when(execContext.issueSideInputFetch(
+    when(stepContext.issueSideInputFetch(
              eq(view), any(BoundedWindow.class), eq(SideInputState.KNOWN_READY)))
         .thenReturn(true);
     when(execContext.getSideInputReaderForViews(
@@ -231,7 +231,7 @@ public void testMultipleSideInputs() throws Exception {
     blockedMapState.set(blockedMap);
 
     when(execContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
-    when(execContext.issueSideInputFetch(
+    when(stepContext.issueSideInputFetch(
              any(PCollectionView.class), any(BoundedWindow.class), any(SideInputState.class)))
         .thenReturn(true);
     when(execContext.getSideInputReaderForViews(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
index 56af5a6321f2f..ac6cbe64f81ed 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
@@ -52,7 +52,7 @@
 public class WindmillStateInternalsTest {
 
   private static final StateNamespace NAMESPACE = new StateNamespaceForTest("ns");
-  private static final String MANGLED_PREFIX = "mangled";
+  private static final String STATE_FAMILY = "family";
 
   private static final StateTag<CombiningValueState<Integer, Integer>> COMBINING_ADDR =
       StateTags.combiningValue("combining", VarIntCoder.of(), new Sum.SumIntegerFn());
@@ -65,13 +65,17 @@ public class WindmillStateInternalsTest {
   private WindmillStateInternals underTest;
 
   private ByteString key(StateNamespace namespace, String addrId) {
-    return ByteString.copyFromUtf8(MANGLED_PREFIX + namespace.stringKey() + "+" + addrId);
+    return key("", namespace, addrId);
+  }
+
+  private ByteString key(String prefix, StateNamespace namespace, String addrId) {
+    return ByteString.copyFromUtf8(prefix + namespace.stringKey() + "+" + addrId);
   }
 
   @Before
   public void setUp() {
     MockitoAnnotations.initMocks(this);
-    underTest = new WindmillStateInternals(MANGLED_PREFIX, mockReader);
+    underTest = new WindmillStateInternals(STATE_FAMILY, true, mockReader);
   }
 
   private <T> void waitAndSet(
@@ -95,7 +99,8 @@ public void testBagAddBeforeRead() throws Exception {
     BagState<String> bag = underTest.state(NAMESPACE, addr);
 
     SettableFuture<Iterable<String>> future = SettableFuture.create();
-    when(mockReader.listFuture(key(NAMESPACE, "bag"), StringUtf8Coder.of())).thenReturn(future);
+    when(mockReader.listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of()))
+        .thenReturn(future);
 
     StateContents<Iterable<String>> result = bag.get();
 
@@ -126,9 +131,11 @@ public void testBagIsEmptyFalse() throws Exception {
     BagState<String> bag = underTest.state(NAMESPACE, addr);
 
     SettableFuture<Iterable<String>> future = SettableFuture.create();
-    when(mockReader.listFuture(key(NAMESPACE, "bag"), StringUtf8Coder.of())).thenReturn(future);
+    when(mockReader.listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of()))
+        .thenReturn(future);
     StateContents<Boolean> result = bag.isEmpty();
-    Mockito.verify(mockReader).listFuture(key(NAMESPACE, "bag"), StringUtf8Coder.of());
+    Mockito.verify(mockReader)
+        .listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of());
 
     waitAndSet(future, Arrays.asList("world"), 200);
     assertThat(result.read(), Matchers.is(false));
@@ -140,9 +147,11 @@ public void testBagIsEmptyTrue() throws Exception {
     BagState<String> bag = underTest.state(NAMESPACE, addr);
 
     SettableFuture<Iterable<String>> future = SettableFuture.create();
-    when(mockReader.listFuture(key(NAMESPACE, "bag"), StringUtf8Coder.of())).thenReturn(future);
+    when(mockReader.listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of()))
+        .thenReturn(future);
     StateContents<Boolean> result = bag.isEmpty();
-    Mockito.verify(mockReader).listFuture(key(NAMESPACE, "bag"), StringUtf8Coder.of());
+    Mockito.verify(mockReader)
+        .listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of());
 
     waitAndSet(future, Arrays.<String>asList(), 200);
     assertThat(result.read(), Matchers.is(true));
@@ -155,7 +164,8 @@ public void testBagIsEmptyAfterClear() throws Exception {
 
     bag.clear();
     StateContents<Boolean> result = bag.isEmpty();
-    Mockito.verify(mockReader, never()).listFuture(key(NAMESPACE, "bag"), StringUtf8Coder.of());
+    Mockito.verify(mockReader, never())
+        .listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of());
     assertThat(result.read(), Matchers.is(true));
 
     bag.add("hello");
@@ -211,7 +221,8 @@ public void testBagClearPersist() throws Exception {
     assertEquals("world", listUpdates.getValues(0).getData().substring(1).toStringUtf8());
 
     // Clear should need to read the future.
-    Mockito.verify(mockReader).listFuture(key(NAMESPACE, "bag"), StringUtf8Coder.of());
+    Mockito.verify(mockReader)
+        .listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of());
     Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
@@ -231,12 +242,35 @@ public void testBagPersistEmpty() throws Exception {
     assertEquals(1, commitBuilder.getListUpdatesCount());
   }
 
+  @Test
+  public void testBagNoStateFamilies() throws Exception {
+    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader);
+
+    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
+    BagState<String> bag = underTest.state(NAMESPACE, addr);
+
+    bag.add("hello");
+    bag.clear();
+    bag.add("world");
+
+    Windmill.WorkItemCommitRequest.Builder commitBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.persist(commitBuilder);
+
+    // Clear should need to read the future.
+    Mockito.verify(mockReader)
+        .listFuture(key(STATE_FAMILY, NAMESPACE, "bag"), "", StringUtf8Coder.of());
+    Mockito.verify(mockReader).startBatchAndBlock();
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+
   @Test
   public void testCombiningAddBeforeRead() throws Exception {
     CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
 
     SettableFuture<Iterable<int[]>> future = SettableFuture.create();
-    when(mockReader.listFuture(key(NAMESPACE, COMBINING_ADDR.getId()), accumCoder))
+    when(mockReader.listFuture(key(NAMESPACE, COMBINING_ADDR.getId()), STATE_FAMILY, accumCoder))
         .thenReturn(future);
 
     StateContents<Integer> result = value.get();
@@ -276,10 +310,11 @@ public void testCombiningIsEmpty() throws Exception {
     CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
 
     SettableFuture<Iterable<int[]>> future = SettableFuture.create();
-    when(mockReader.listFuture(key(NAMESPACE, COMBINING_ADDR.getId()), accumCoder))
+    when(mockReader.listFuture(key(NAMESPACE, COMBINING_ADDR.getId()), STATE_FAMILY, accumCoder))
         .thenReturn(future);
     StateContents<Boolean> result = value.isEmpty();
-    Mockito.verify(mockReader).listFuture(key(NAMESPACE, COMBINING_ADDR.getId()), accumCoder);
+    Mockito.verify(mockReader)
+        .listFuture(key(NAMESPACE, COMBINING_ADDR.getId()), STATE_FAMILY, accumCoder);
 
     waitAndSet(future, Arrays.asList(new int[]{29}), 200);
     assertThat(result.read(), Matchers.is(false));
@@ -292,7 +327,7 @@ public void testCombiningIsEmptyAfterClear() throws Exception {
     value.clear();
     StateContents<Boolean> result = value.isEmpty();
     Mockito.verify(mockReader, never())
-        .listFuture(key(NAMESPACE, COMBINING_ADDR.getId()), accumCoder);
+        .listFuture(key(NAMESPACE, COMBINING_ADDR.getId()), STATE_FAMILY, accumCoder);
     assertThat(result.read(), Matchers.is(true));
 
     value.add(87);
@@ -351,7 +386,8 @@ public void testCombiningClearPersist() throws Exception {
             listUpdates.getValues(0).getData().substring(1).toByteArray())[0]);
 
     // Blind adds should not need to read the future.
-    Mockito.verify(mockReader).listFuture(key(NAMESPACE, COMBINING_ADDR.getId()), accumCoder);
+    Mockito.verify(mockReader)
+        .listFuture(key(NAMESPACE, COMBINING_ADDR.getId()), STATE_FAMILY, accumCoder);
     Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
@@ -362,7 +398,7 @@ public void testWatermarkAddBeforeRead() throws Exception {
     WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
 
     SettableFuture<Instant> future = SettableFuture.create();
-    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"))).thenReturn(future);
+    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY)).thenReturn(future);
 
     StateContents<Instant> result = bag.get();
 
@@ -370,7 +406,7 @@ public void testWatermarkAddBeforeRead() throws Exception {
     waitAndSet(future, new Instant(2000), 200);
     assertThat(result.read(), Matchers.equalTo(new Instant(2000)));
 
-    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"));
+    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
     Mockito.verifyNoMoreInteractions(mockReader);
 
     // Adding another value doesn't create another future, but does update the result.
@@ -400,9 +436,9 @@ public void testWatermarkIsEmptyWindmillHasData() throws Exception {
     WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
 
     SettableFuture<Instant> future = SettableFuture.create();
-    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"))).thenReturn(future);
+    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY)).thenReturn(future);
     StateContents<Boolean> result = bag.isEmpty();
-    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"));
+    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
 
     waitAndSet(future, new Instant(1000), 200);
     assertThat(result.read(), Matchers.is(false));
@@ -414,9 +450,9 @@ public void testWatermarkIsEmpty() throws Exception {
     WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
 
     SettableFuture<Instant> future = SettableFuture.create();
-    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"))).thenReturn(future);
+    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY)).thenReturn(future);
     StateContents<Boolean> result = bag.isEmpty();
-    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"));
+    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
 
     waitAndSet(future, null, 200);
     assertThat(result.read(), Matchers.is(true));
@@ -429,7 +465,7 @@ public void testWatermarkIsEmptyAfterClear() throws Exception {
 
     bag.clear();
     StateContents<Boolean> result = bag.isEmpty();
-    Mockito.verify(mockReader, never()).watermarkFuture(key(NAMESPACE, addr.getId()));
+    Mockito.verify(mockReader, never()).watermarkFuture(key(NAMESPACE, addr.getId()), STATE_FAMILY);
     assertThat(result.read(), Matchers.is(true));
 
     bag.add(new Instant(1000));
@@ -491,7 +527,7 @@ public void testWatermarkClearPersist() throws Exception {
     assertEquals(TimeUnit.MILLISECONDS.toMicros(1000), listUpdates.getValues(0).getTimestamp());
 
     // Clearing requires reading the future.
-    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"));
+    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
     Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
@@ -512,6 +548,17 @@ public void testWatermarkPersistEmpty() throws Exception {
     assertEquals(1, commitBuilder.getListUpdatesCount());
   }
 
+  @Test
+  public void testWatermarkNoStateFamilies() throws Exception {
+    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader);
+
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal("watermark");
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+    bag.get();
+    Mockito.verify(mockReader).watermarkFuture(key(STATE_FAMILY, NAMESPACE, "watermark"), "");
+  }
+
+
   @Test
   public void testValueSetBeforeRead() throws Exception {
     StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
@@ -540,7 +587,8 @@ public void testValueRead() throws Exception {
     ValueState<String> value = underTest.state(NAMESPACE, addr);
 
     SettableFuture<String> future = SettableFuture.create();
-    when(mockReader.valueFuture(key(NAMESPACE, "value"), StringUtf8Coder.of())).thenReturn(future);
+    when(mockReader.valueFuture(key(NAMESPACE, "value"), STATE_FAMILY, StringUtf8Coder.of()))
+        .thenReturn(future);
     waitAndSet(future, "World", 200);
 
     assertEquals("World", value.get().read());
@@ -564,7 +612,8 @@ public void testValueSetPersist() throws Exception {
     assertTrue(valueUpdate.isInitialized());
 
     // Setting a value requires a read to prevent blind writes.
-    Mockito.verify(mockReader).valueFuture(key(NAMESPACE, "value"), StringUtf8Coder.of());
+    Mockito.verify(mockReader)
+        .valueFuture(key(NAMESPACE, "value"), STATE_FAMILY, StringUtf8Coder.of());
     Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
@@ -587,7 +636,8 @@ public void testValueClearPersist() throws Exception {
     assertEquals(0, valueUpdate.getValue().getData().size());
 
     // Setting a value requires a read to prevent blind writes.
-    Mockito.verify(mockReader).valueFuture(key(NAMESPACE, "value"), StringUtf8Coder.of());
+    Mockito.verify(mockReader)
+        .valueFuture(key(NAMESPACE, "value"), STATE_FAMILY, StringUtf8Coder.of());
     Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
@@ -607,4 +657,19 @@ public void testValueNoChangePersist() throws Exception {
     Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
+
+  @Test
+  public void testValueNoStateFamilies() throws Exception {
+    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader);
+
+    StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
+    ValueState<String> value = underTest.state(NAMESPACE, addr);
+
+    SettableFuture<String> future = SettableFuture.create();
+    when(mockReader.valueFuture(key(STATE_FAMILY, NAMESPACE, "value"), "", StringUtf8Coder.of()))
+        .thenReturn(future);
+    waitAndSet(future, "World", 200);
+
+    assertEquals("World", value.get().read());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java
index 0fa93e52e494a..93bfbfe69d6de 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java
@@ -61,6 +61,7 @@ public class WindmillStateReaderTest {
 
   private static final ByteString STATE_KEY_1 = ByteString.copyFromUtf8("key1");
   private static final ByteString STATE_KEY_2 = ByteString.copyFromUtf8("key2");
+  private static final String STATE_FAMILY = "family";
 
   @Mock
   private MetricTrackingWindmillServerStub mockWindmill;
@@ -101,7 +102,7 @@ private Windmill.Value watermarkValue(Instant timestamp)  {
 
   @Test
   public void testReadList() throws Exception {
-    Future<Iterable<Integer>> future = underTest.listFuture(STATE_KEY_1, INT_CODER);
+    Future<Iterable<Integer>> future = underTest.listFuture(STATE_KEY_1, STATE_FAMILY, INT_CODER);
     Mockito.verifyNoMoreInteractions(mockWindmill);
 
     Windmill.GetDataRequest.Builder expectedRequest = Windmill.GetDataRequest.newBuilder();
@@ -109,7 +110,7 @@ public void testReadList() throws Exception {
         .addRequestsBuilder().setComputationId(COMPUTATION)
         .addRequestsBuilder().setKey(DATA_KEY).setWorkToken(WORK_TOKEN)
         .addListsToFetch(Windmill.TagList.newBuilder()
-            .setTag(STATE_KEY_1).setEndTimestamp(Long.MAX_VALUE));
+            .setTag(STATE_KEY_1).setStateFamily(STATE_FAMILY).setEndTimestamp(Long.MAX_VALUE));
 
     Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
     response
@@ -117,6 +118,7 @@ public void testReadList() throws Exception {
         .addDataBuilder().setKey(DATA_KEY)
         .addLists(Windmill.TagList.newBuilder()
             .setTag(STATE_KEY_1)
+            .setStateFamily(STATE_FAMILY)
             .addValues(intValue(5, true))
             .addValues(intValue(6, true)));
 
@@ -131,21 +133,32 @@ public void testReadList() throws Exception {
 
   @Test
   public void testReadValue() throws Exception {
-    Future<Integer> future = underTest.valueFuture(STATE_KEY_1, INT_CODER);
+    Future<Integer> future = underTest.valueFuture(STATE_KEY_1, STATE_FAMILY, INT_CODER);
     Mockito.verifyNoMoreInteractions(mockWindmill);
 
     Windmill.GetDataRequest.Builder expectedRequest = Windmill.GetDataRequest.newBuilder();
     expectedRequest
-        .addRequestsBuilder().setComputationId(COMPUTATION)
-        .addRequestsBuilder().setKey(DATA_KEY).setWorkToken(WORK_TOKEN)
-        .addValuesToFetch(Windmill.TagValue.newBuilder()
-            .setTag(STATE_KEY_1).build());
+        .addRequestsBuilder()
+        .setComputationId(COMPUTATION)
+        .addRequestsBuilder()
+        .setKey(DATA_KEY)
+        .setWorkToken(WORK_TOKEN)
+        .addValuesToFetch(
+            Windmill.TagValue.newBuilder()
+                .setTag(STATE_KEY_1)
+                .setStateFamily(STATE_FAMILY)
+                .build());
     Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
     response
-        .addDataBuilder().setComputationId(COMPUTATION)
-        .addDataBuilder().setKey(DATA_KEY)
-        .addValues(Windmill.TagValue.newBuilder()
-            .setTag(STATE_KEY_1).setValue(intValue(8, false)));
+        .addDataBuilder()
+        .setComputationId(COMPUTATION)
+        .addDataBuilder()
+        .setKey(DATA_KEY)
+        .addValues(
+            Windmill.TagValue.newBuilder()
+                .setTag(STATE_KEY_1)
+                .setStateFamily(STATE_FAMILY)
+                .setValue(intValue(8, false)));
 
     Mockito.when(mockWindmill.getStateData(expectedRequest.build())).thenReturn(response.build());
 
@@ -158,24 +171,34 @@ public void testReadValue() throws Exception {
 
   @Test
   public void testReadWatermark() throws Exception {
-    Future<Instant> future = underTest.watermarkFuture(STATE_KEY_1);
+    Future<Instant> future = underTest.watermarkFuture(STATE_KEY_1, STATE_FAMILY);
     Mockito.verifyNoMoreInteractions(mockWindmill);
 
     Windmill.GetDataRequest.Builder expectedRequest = Windmill.GetDataRequest.newBuilder();
     expectedRequest
-        .addRequestsBuilder().setComputationId(COMPUTATION)
-        .addRequestsBuilder().setKey(DATA_KEY).setWorkToken(WORK_TOKEN)
-        .addListsToFetch(Windmill.TagList.newBuilder()
-            .setTag(STATE_KEY_1).setEndTimestamp(Long.MAX_VALUE));
+        .addRequestsBuilder()
+        .setComputationId(COMPUTATION)
+        .addRequestsBuilder()
+        .setKey(DATA_KEY)
+        .setWorkToken(WORK_TOKEN)
+        .addListsToFetch(
+            Windmill.TagList.newBuilder()
+                .setTag(STATE_KEY_1)
+                .setStateFamily(STATE_FAMILY)
+                .setEndTimestamp(Long.MAX_VALUE));
 
     Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
     response
-        .addDataBuilder().setComputationId(COMPUTATION)
-        .addDataBuilder().setKey(DATA_KEY)
-        .addLists(Windmill.TagList.newBuilder()
-            .setTag(STATE_KEY_1)
-            .addValues(watermarkValue(new Instant(5000)))
-            .addValues(watermarkValue(new Instant(6000))));
+        .addDataBuilder()
+        .setComputationId(COMPUTATION)
+        .addDataBuilder()
+        .setKey(DATA_KEY)
+        .addLists(
+            Windmill.TagList.newBuilder()
+                .setTag(STATE_KEY_1)
+                .setStateFamily(STATE_FAMILY)
+                .addValues(watermarkValue(new Instant(5000)))
+                .addValues(watermarkValue(new Instant(6000))));
 
     Mockito.when(mockWindmill.getStateData(expectedRequest.build())).thenReturn(response.build());
 
@@ -188,8 +211,9 @@ public void testReadWatermark() throws Exception {
   @Test
   public void testBatching() throws Exception {
     // Reads two lists and verifies that we batch them up correctly.
-    Future<Instant> watermarkFuture = underTest.watermarkFuture(STATE_KEY_2);
-    Future<Iterable<Integer>> listFuture = underTest.listFuture(STATE_KEY_1, INT_CODER);
+    Future<Instant> watermarkFuture = underTest.watermarkFuture(STATE_KEY_2, STATE_FAMILY);
+    Future<Iterable<Integer>> listFuture =
+        underTest.listFuture(STATE_KEY_1, STATE_FAMILY, INT_CODER);
 
     Mockito.verifyNoMoreInteractions(mockWindmill);
 
@@ -198,16 +222,22 @@ public void testBatching() throws Exception {
 
     Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
     response
-        .addDataBuilder().setComputationId(COMPUTATION)
-        .addDataBuilder().setKey(DATA_KEY)
-        .addLists(Windmill.TagList.newBuilder()
-            .setTag(STATE_KEY_2)
-            .addValues(watermarkValue(new Instant(5000)))
-            .addValues(watermarkValue(new Instant(6000))))
-        .addLists(Windmill.TagList.newBuilder()
-           .setTag(STATE_KEY_1)
-           .addValues(intValue(5, true))
-           .addValues(intValue(100, true)));
+        .addDataBuilder()
+        .setComputationId(COMPUTATION)
+        .addDataBuilder()
+        .setKey(DATA_KEY)
+        .addLists(
+            Windmill.TagList.newBuilder()
+                .setTag(STATE_KEY_2)
+                .setStateFamily(STATE_FAMILY)
+                .addValues(watermarkValue(new Instant(5000)))
+                .addValues(watermarkValue(new Instant(6000))))
+        .addLists(
+            Windmill.TagList.newBuilder()
+                .setTag(STATE_KEY_1)
+                .setStateFamily(STATE_FAMILY)
+                .addValues(intValue(5, true))
+                .addValues(intValue(100, true)));
 
     Mockito.when(mockWindmill.getStateData(Mockito.isA(Windmill.GetDataRequest.class)))
         .thenReturn(response.build());
@@ -237,15 +267,55 @@ public void testBatching() throws Exception {
     Mockito.verifyNoMoreInteractions(mockWindmill);
 
     // And verify that getting a future again returns the already completed future.
-    Future<Instant> watermarkFuture2 = underTest.watermarkFuture(STATE_KEY_2);
+    Future<Instant> watermarkFuture2 = underTest.watermarkFuture(STATE_KEY_2, STATE_FAMILY);
     assertTrue(watermarkFuture2.isDone());
   }
 
+  @Test
+  public void testNoStateFamily() throws Exception {
+    Future<Integer> future = underTest.valueFuture(STATE_KEY_1, "", INT_CODER);
+    Mockito.verifyNoMoreInteractions(mockWindmill);
+
+    Windmill.GetDataRequest.Builder expectedRequest = Windmill.GetDataRequest.newBuilder();
+    expectedRequest
+        .addRequestsBuilder()
+        .setComputationId(COMPUTATION)
+        .addRequestsBuilder()
+        .setKey(DATA_KEY)
+        .setWorkToken(WORK_TOKEN)
+        .addValuesToFetch(
+            Windmill.TagValue.newBuilder()
+                .setTag(STATE_KEY_1)
+                .setStateFamily("")
+                .build());
+    Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
+    response
+        .addDataBuilder()
+        .setComputationId(COMPUTATION)
+        .addDataBuilder()
+        .setKey(DATA_KEY)
+        .addValues(
+            Windmill.TagValue.newBuilder()
+                .setTag(STATE_KEY_1)
+                .setStateFamily("")
+                .setValue(intValue(8, false)));
+
+    Mockito.when(mockWindmill.getStateData(expectedRequest.build())).thenReturn(response.build());
+
+    Integer result = future.get();
+    Mockito.verify(mockWindmill).getStateData(expectedRequest.build());
+    Mockito.verifyNoMoreInteractions(mockWindmill);
+
+    assertThat(result, Matchers.equalTo(8));
+
+  }
+
   @Test
   public void testKeyTokenInvalid() throws Exception {
     // Reads two lists and verifies that we batch them up correctly.
-    Future<Instant> watermarkFuture = underTest.watermarkFuture(STATE_KEY_2);
-    Future<Iterable<Integer>> listFuture = underTest.listFuture(STATE_KEY_1, INT_CODER);
+    Future<Instant> watermarkFuture = underTest.watermarkFuture(STATE_KEY_2, STATE_FAMILY);
+    Future<Iterable<Integer>> listFuture =
+        underTest.listFuture(STATE_KEY_1, STATE_FAMILY, INT_CODER);
 
     Mockito.verifyNoMoreInteractions(mockWindmill);
 
@@ -279,8 +349,8 @@ public void testKeyTokenInvalid() throws Exception {
    */
   @Test
   public void testCachingWithinBatch() throws Exception {
-    underTest.watermarkFuture(STATE_KEY_1);
-    underTest.watermarkFuture(STATE_KEY_1);
+    underTest.watermarkFuture(STATE_KEY_1, STATE_FAMILY);
+    underTest.watermarkFuture(STATE_KEY_1, STATE_FAMILY);
     assertEquals(1, underTest.pendingLookups.size());
   }
 }

From 33fa9d41144f97352f9b10fcd678dc7ed5e6f5d0 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 22 Jul 2015 21:11:49 -0700
Subject: [PATCH 0800/1541] Infer the default GCP project from gCloud

----Release Notes----
Removed the requirement to explicitly set --project if gCloud has the default project configuration set.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98902859
---
 .../dataflow/sdk/options/GcpOptions.java      | 75 +++++++++++++++
 .../dataflow/sdk/options/GcpOptionsTest.java  | 95 +++++++++++++++++++
 2 files changed, 170 insertions(+)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GcpOptionsTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
index 7fad0b42d3236..3c61ef7342741 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
@@ -20,12 +20,22 @@
 import com.google.cloud.dataflow.sdk.util.CredentialFactory;
 import com.google.cloud.dataflow.sdk.util.GcpCredentialFactory;
 import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.io.Files;
 
 import com.fasterxml.jackson.annotation.JsonIgnore;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import java.io.File;
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.security.GeneralSecurityException;
+import java.util.Locale;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 /**
  * Options used to configure Google Cloud Platform project and credentials.
@@ -65,6 +75,7 @@ public interface GcpOptions extends GoogleApiDebugOptions, PipelineOptions {
    */
   @Description("Project id. Required when running a Dataflow in the cloud. "
       + "See https://cloud.google.com/storage/docs/projects for further details.")
+  @Default.InstanceFactory(DefaultProjectFactory.class)
   String getProject();
   void setProject(String value);
 
@@ -168,6 +179,70 @@ void setCredentialFactoryClass(
   Credential getGcpCredential();
   void setGcpCredential(Credential value);
 
+  /**
+   * Attempts to get infer the default project based upon the environment this application
+   * is executing within. Currently this only supports getting the default project from gCloud.
+   */
+  public static class DefaultProjectFactory implements DefaultValueFactory<String> {
+    private static final Logger LOG = LoggerFactory.getLogger(DefaultProjectFactory.class);
+
+    @Override
+    public String create(PipelineOptions options) {
+      try {
+        File configDir;
+        if (getEnvironment().containsKey("CLOUDSDK_CONFIG")) {
+          configDir = new File(getEnvironment().get("CLOUDSDK_CONFIG"));
+        } else if (isWindows() && getEnvironment().containsKey("APPDATA")) {
+          configDir = new File(getEnvironment().get("APPDATA"), "gcloud");
+        } else {
+          configDir = new File(System.getProperty("user.home"), ".config/gcloud");
+        }
+        String section = null;
+        Pattern projectPattern = Pattern.compile("^project\\s*=\\s*(.*)$");
+        Pattern sectionPattern = Pattern.compile("^\\[(.*)\\]$");
+        for (String line : Files.readLines(
+            new File(configDir, "properties"), StandardCharsets.UTF_8)) {
+          line = line.trim();
+          if (line.isEmpty() || line.startsWith(";")) {
+            continue;
+          }
+          Matcher matcher = sectionPattern.matcher(line);
+          if (matcher.matches()) {
+            section = matcher.group(1);
+          } else if (section == null || section.equals("core")) {
+            matcher = projectPattern.matcher(line);
+            if (matcher.matches()) {
+              String project = matcher.group(1).trim();
+              LOG.info("Inferred default GCP project '{}' from gCloud. If this is the incorrect "
+                  + "project, please cancel this Pipeline and specify the command-line "
+                  + "argument --project.", project);
+              return project;
+            }
+          }
+        }
+      } catch (IOException expected) {
+        LOG.debug("Failed to find default project.", expected);
+      }
+      // return null if can't determine
+      return null;
+    }
+
+    /**
+     * Returns true if running on the Windows OS.
+     */
+    private static boolean isWindows() {
+      return System.getProperty("os.name").toLowerCase(Locale.ENGLISH).contains("windows");
+    }
+
+    /**
+     * Used to mock out getting environment variables.
+     */
+    @VisibleForTesting
+    Map<String, String> getEnvironment() {
+        return System.getenv();
+    }
+  }
+
   /**
    * Attempts to load the GCP credentials. See
    * {@link CredentialFactory#getCredential()} for more details.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GcpOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GcpOptionsTest.java
new file mode 100644
index 0000000000000..1a11175680c4e
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GcpOptionsTest.java
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.options;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.options.GcpOptions.DefaultProjectFactory;
+import com.google.cloud.dataflow.sdk.testing.RestoreSystemProperties;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.io.Files;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.rules.TestRule;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+import java.nio.charset.StandardCharsets;
+import java.util.Map;
+
+/** Tests for {@link GcpOptions}. */
+@RunWith(JUnit4.class)
+public class GcpOptionsTest {
+  @Rule public TestRule restoreSystemProperties = new RestoreSystemProperties();
+  @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  @Test
+  public void testGetProjectFromCloudSdkConfigEnv() throws Exception {
+    Map<String, String> environment =
+        ImmutableMap.of("CLOUDSDK_CONFIG", tmpFolder.getRoot().getAbsolutePath());
+    assertEquals("test-project",
+        runGetProjectTest(tmpFolder.newFile("properties"), environment));
+  }
+
+  @Test
+  public void testGetProjectFromAppDataEnv() throws Exception {
+    Map<String, String> environment =
+        ImmutableMap.of("APPDATA", tmpFolder.getRoot().getAbsolutePath());
+    System.setProperty("os.name", "windows");
+    assertEquals("test-project",
+        runGetProjectTest(new File(tmpFolder.newFolder("gcloud"), "properties"),
+            environment));
+  }
+
+  @Test
+  public void testGetProjectFromUserHomeEnv() throws Exception {
+    Map<String, String> environment = ImmutableMap.of();
+    System.setProperty("user.home", tmpFolder.getRoot().getAbsolutePath());
+    assertEquals("test-project",
+        runGetProjectTest(
+            new File(tmpFolder.newFolder(".config", "gcloud"), "properties"),
+            environment));
+  }
+
+  @Test
+  public void testUnableToGetDefaultProject() throws Exception {
+    System.setProperty("user.home", tmpFolder.getRoot().getAbsolutePath());
+    DefaultProjectFactory projectFactory = spy(new DefaultProjectFactory());
+    when(projectFactory.getEnvironment()).thenReturn(ImmutableMap.<String, String>of());
+    assertNull(projectFactory.create(PipelineOptionsFactory.create()));
+  }
+
+  private static String runGetProjectTest(File path, Map<String, String> environment)
+      throws Exception {
+    String properties = String.format("[core]%n"
+        + "account = test-account@google.com%n"
+        + "project = test-project%n"
+        + "%n"
+        + "[dataflow]%n"
+        + "magic = true%n");
+    Files.write(properties, path, StandardCharsets.UTF_8);
+    DefaultProjectFactory projectFactory = spy(new DefaultProjectFactory());
+    when(projectFactory.getEnvironment()).thenReturn(environment);
+    return projectFactory.create(PipelineOptionsFactory.create());
+  }
+}
+

From 6c55fe87f7e74611f016d5895b0e97970812276e Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Thu, 23 Jul 2015 13:22:32 -0700
Subject: [PATCH 0801/1541] Remove the SDK defaults for worker configuration

----Release Notes----
Removed the default values for numWorkers, maxNumWorkers, and similar settings such that if these are unspecified, the service can pick an appropriate value.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98964118
---
 .../options/DataflowPipelineWorkerPoolOptions.java    | 11 ++++-------
 .../dataflow/sdk/runners/DataflowPipelineRunner.java  |  6 ++++--
 .../sdk/runners/DataflowPipelineTranslator.java       |  3 ++-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index d814ffdaa0153..77a5e5b29d157 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -41,8 +41,8 @@ public interface DataflowPipelineWorkerPoolOptions extends PipelineOptions {
    */
   @Description("Number of workers to use when executing the Dataflow job. Note that "
       + "selection of an autoscaling algorithm other then \"NONE\" will affect the "
-      + "size of the worker pool.")
-  @Default.Integer(3)
+      + "size of the worker pool. If left unspecified, the Dataflow service will "
+      + "determine the number of workers.")
   int getNumWorkers();
   void setNumWorkers(int value);
 
@@ -72,7 +72,6 @@ public String getAlgorithm() {
   @Description("[Experimental] The autoscaling algorithm to use for the workerpool. "
       + "NONE: does not change the size of the worker pool. "
       + "BASIC: autoscale the worker pool size up to maxNumWorkers until the job completes.")
-  @Default.Enum("NONE")
   @Experimental(Experimental.Kind.AUTOSCALING)
   AutoscalingAlgorithmType getAutoscalingAlgorithm();
   void setAutoscalingAlgorithm(AutoscalingAlgorithmType value);
@@ -81,8 +80,7 @@ public String getAlgorithm() {
    * The maximum number of workers to use when using workerpool autoscaling.
    */
   @Description("[Experimental] The maximum number of workers to use when using workerpool "
-      + "autoscaling.")
-  @Default.Integer(20)
+      + "autoscaling. If left unspecified, the Dataflow service will compute a ceiling.")
   @Experimental(Experimental.Kind.AUTOSCALING)
   int getMaxNumWorkers();
   void setMaxNumWorkers(int value);
@@ -137,7 +135,6 @@ public String getApiServiceName() {
    * Type of API for handling cluster management, i.e. resizing, healthchecking, etc.
    */
   @Description("Type of API for handling cluster management, i.e. resizing, healthchecking, etc.")
-  @Default.Enum("COMPUTE_ENGINE")
   ClusterManagerApiType getClusterManagerApi();
   void setClusterManagerApi(ClusterManagerApiType value);
 
@@ -204,6 +201,7 @@ public String getTeardownPolicyName() {
   @Description("Specifies what type of worker pool should be used. Should never be modified when "
       + "using the Dataflow service")
   @Default.String("harness")
+  @Hidden
   String getWorkerPoolType();
   void setWorkerPoolType(String value);
 
@@ -211,7 +209,6 @@ public String getTeardownPolicyName() {
    * Specifies what type of persistent disk should be used.
    */
   @Description("Specifies what type of persistent disk should be used.")
-  @Default.String("compute.googleapis.com/projects//zones//diskTypes/pd-standard")
   String getWorkerDiskType();
   void setWorkerDiskType(String value);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index c2a3a21e4f26d..6beaecfa60677 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -282,8 +282,10 @@ public DataflowPipelineJob run(Pipeline pipeline) {
           dataflowOptions.getPathValidator().verifyPath(options.getTempLocation()));
     }
     newJob.getEnvironment().setDataset(options.getTempDatasetId());
-    newJob.getEnvironment().setClusterManagerApiService(
-        options.getClusterManagerApi().getApiServiceName());
+    if (options.getClusterManagerApi() != null) {
+      newJob.getEnvironment().setClusterManagerApiService(
+          options.getClusterManagerApi().getApiServiceName());
+    }
     newJob.getEnvironment().setExperiments(options.getExperiments());
 
     // Requirements about the service.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index d98ce65f18a98..34bb53897338c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -442,7 +442,8 @@ public Job translate(List<DataflowPackage> packages) {
       if (options.getDiskSizeGb() > 0) {
         workerPool.setDiskSizeGb(options.getDiskSizeGb());
       }
-      if (!options.getAutoscalingAlgorithm().equals(AutoscalingAlgorithmType.NONE)) {
+      if (options.getAutoscalingAlgorithm() != null
+          && !options.getAutoscalingAlgorithm().equals(AutoscalingAlgorithmType.NONE)) {
         AutoscalingSettings settings = new AutoscalingSettings();
         settings.setAlgorithm(options.getAutoscalingAlgorithm().getAlgorithm());
         settings.setMaxNumWorkers(options.getMaxNumWorkers());

From 45e336af98191e206f9a3e4fc29270a26be9b49b Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 20 Jul 2015 14:16:47 -0700
Subject: [PATCH 0802/1541] Fix View.AsSingleton with default values in
 streaming

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98969726
---
 .../sdk/runners/DataflowPipelineRunner.java        | 10 +++++++---
 .../google/cloud/dataflow/sdk/transforms/View.java | 14 ++++++++++++++
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 6beaecfa60677..05daa29f95638 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -790,19 +790,23 @@ private static class StreamingViewAsSingleton<T>
       extends PTransform<PCollection<T>, PCollectionView<T>> {
     private static final long serialVersionUID = 0L;
 
+    private View.AsSingleton<T> transform;
+
     /**
      * Builds an instance of this class from the overridden transform.
      */
     @SuppressWarnings("unused") // used via reflection in apply()
-    public StreamingViewAsSingleton(View.AsSingleton<T> transform) { }
+    public StreamingViewAsSingleton(View.AsSingleton<T> transform) {
+      this.transform = transform;
+    }
 
     @Override
     public PCollectionView<T> apply(PCollection<T> input) {
       PCollectionView<T> view = PCollectionViews.singletonView(
           input.getPipeline(),
           input.getWindowingStrategy(),
-          false, // no default
-          null,  // unused default value
+          transform.hasDefaultValue(),
+          transform.defaultValue(),
           input.getCoder());
       return input
           .apply(ParDo.of(new WrapAsList<T>()))
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index 87b73b087c4c9..e04186ba3d819 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -260,6 +260,20 @@ private AsSingleton(T defaultValue) {
       this.hasDefault = true;
     }
 
+    /**
+     * Returns whether this transform has a default value.
+     */
+    public boolean hasDefaultValue() {
+      return hasDefault;
+    }
+
+    /**
+     * Returns the default value of this transform, or null if there isn't one.
+     */
+    public T defaultValue() {
+      return defaultValue;
+    }
+
     /**
      * Default value to return for windows with no value in them.
      */

From 51cfffcfe0996894df43558794d8497daba0ea78 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Thu, 23 Jul 2015 15:58:33 -0700
Subject: [PATCH 0803/1541] Rollback: Remove the SDK defaults for worker
 configuration

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=98979717
---
 .../options/DataflowPipelineWorkerPoolOptions.java    | 11 +++++++----
 .../dataflow/sdk/runners/DataflowPipelineRunner.java  |  6 ++----
 .../sdk/runners/DataflowPipelineTranslator.java       |  3 +--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index 77a5e5b29d157..d814ffdaa0153 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -41,8 +41,8 @@ public interface DataflowPipelineWorkerPoolOptions extends PipelineOptions {
    */
   @Description("Number of workers to use when executing the Dataflow job. Note that "
       + "selection of an autoscaling algorithm other then \"NONE\" will affect the "
-      + "size of the worker pool. If left unspecified, the Dataflow service will "
-      + "determine the number of workers.")
+      + "size of the worker pool.")
+  @Default.Integer(3)
   int getNumWorkers();
   void setNumWorkers(int value);
 
@@ -72,6 +72,7 @@ public String getAlgorithm() {
   @Description("[Experimental] The autoscaling algorithm to use for the workerpool. "
       + "NONE: does not change the size of the worker pool. "
       + "BASIC: autoscale the worker pool size up to maxNumWorkers until the job completes.")
+  @Default.Enum("NONE")
   @Experimental(Experimental.Kind.AUTOSCALING)
   AutoscalingAlgorithmType getAutoscalingAlgorithm();
   void setAutoscalingAlgorithm(AutoscalingAlgorithmType value);
@@ -80,7 +81,8 @@ public String getAlgorithm() {
    * The maximum number of workers to use when using workerpool autoscaling.
    */
   @Description("[Experimental] The maximum number of workers to use when using workerpool "
-      + "autoscaling. If left unspecified, the Dataflow service will compute a ceiling.")
+      + "autoscaling.")
+  @Default.Integer(20)
   @Experimental(Experimental.Kind.AUTOSCALING)
   int getMaxNumWorkers();
   void setMaxNumWorkers(int value);
@@ -135,6 +137,7 @@ public String getApiServiceName() {
    * Type of API for handling cluster management, i.e. resizing, healthchecking, etc.
    */
   @Description("Type of API for handling cluster management, i.e. resizing, healthchecking, etc.")
+  @Default.Enum("COMPUTE_ENGINE")
   ClusterManagerApiType getClusterManagerApi();
   void setClusterManagerApi(ClusterManagerApiType value);
 
@@ -201,7 +204,6 @@ public String getTeardownPolicyName() {
   @Description("Specifies what type of worker pool should be used. Should never be modified when "
       + "using the Dataflow service")
   @Default.String("harness")
-  @Hidden
   String getWorkerPoolType();
   void setWorkerPoolType(String value);
 
@@ -209,6 +211,7 @@ public String getTeardownPolicyName() {
    * Specifies what type of persistent disk should be used.
    */
   @Description("Specifies what type of persistent disk should be used.")
+  @Default.String("compute.googleapis.com/projects//zones//diskTypes/pd-standard")
   String getWorkerDiskType();
   void setWorkerDiskType(String value);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 05daa29f95638..3752eeab80177 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -282,10 +282,8 @@ public DataflowPipelineJob run(Pipeline pipeline) {
           dataflowOptions.getPathValidator().verifyPath(options.getTempLocation()));
     }
     newJob.getEnvironment().setDataset(options.getTempDatasetId());
-    if (options.getClusterManagerApi() != null) {
-      newJob.getEnvironment().setClusterManagerApiService(
-          options.getClusterManagerApi().getApiServiceName());
-    }
+    newJob.getEnvironment().setClusterManagerApiService(
+        options.getClusterManagerApi().getApiServiceName());
     newJob.getEnvironment().setExperiments(options.getExperiments());
 
     // Requirements about the service.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 34bb53897338c..d98ce65f18a98 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -442,8 +442,7 @@ public Job translate(List<DataflowPackage> packages) {
       if (options.getDiskSizeGb() > 0) {
         workerPool.setDiskSizeGb(options.getDiskSizeGb());
       }
-      if (options.getAutoscalingAlgorithm() != null
-          && !options.getAutoscalingAlgorithm().equals(AutoscalingAlgorithmType.NONE)) {
+      if (!options.getAutoscalingAlgorithm().equals(AutoscalingAlgorithmType.NONE)) {
         AutoscalingSettings settings = new AutoscalingSettings();
         settings.setAlgorithm(options.getAutoscalingAlgorithm().getAlgorithm());
         settings.setMaxNumWorkers(options.getMaxNumWorkers());

From 9dc25abedc6220a69db94545658a09af8a27911f Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 24 Jul 2015 08:33:38 -0700
Subject: [PATCH 0804/1541] Allow specifying each part of a Window operation
 separately

Add creation-time validation that non-default triggers include an
accumulation mode and allowed lateness.

----Release Notes----

Non-default triggering requires specifying allowed lateness.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99030547
---
 .../cloud/dataflow/sdk/io/BigQueryIO.java     |  38 +--
 .../sdk/runners/DataflowPipelineRunner.java   |  30 +-
 .../runners/DataflowPipelineTranslator.java   |   3 +-
 .../sdk/transforms/windowing/Window.java      | 262 +++++++++++++++---
 .../cloud/dataflow/sdk/util/Reshuffle.java    |  82 ++++++
 .../dataflow/sdk/util/WindowingStrategy.java  | 119 +++++---
 .../sdk/transforms/windowing/WindowTest.java  |  72 ++++-
 7 files changed, 481 insertions(+), 125 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 3a5cf652f64ae..dc6b4feea5087 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -36,21 +36,15 @@
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.worker.BigQueryReader;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterFirst;
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterProcessingTime;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.BigQueryTableInserter;
 import com.google.cloud.dataflow.sdk.util.BigQueryTableRowIterator;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.ReaderUtils;
+import com.google.cloud.dataflow.sdk.util.Reshuffle;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
@@ -65,7 +59,6 @@
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
 
-import org.joda.time.Duration;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -909,7 +902,7 @@ public boolean getValidate() {
    * Implementation of DoFn to perform streaming BigQuery write.
    */
   private static class StreamingWriteFn
-      extends DoFn<KV<ShardedKey<String>, Iterable<TableRowInfo>>, Void> {
+      extends DoFn<KV<ShardedKey<String>, TableRowInfo>, Void> {
     private static final long serialVersionUID = 0;
 
     /** TableSchema in JSON.  Use String to make the class Serializable. */
@@ -948,10 +941,9 @@ public void processElement(ProcessContext context) {
       String tableSpec = context.element().getKey().getKey();
       List<TableRow> rows = getOrCreateMapListValue(tableRows, tableSpec);
       List<String> uniqueIds = getOrCreateMapListValue(uniqueIdsForTableRows, tableSpec);
-      for (TableRowInfo rowInfo : context.element().getValue()) {
-        rows.add(rowInfo.tableRow);
-        uniqueIds.add(rowInfo.uniqueId);
-      }
+
+      rows.add(context.element().getValue().tableRow);
+      uniqueIds.add(context.element().getValue().uniqueId);
     }
 
     /** Writes the accumulated rows into BigQuery with streaming API. */
@@ -1123,7 +1115,7 @@ private static class TableRowInfo {
 
     final TableRow tableRow;
     final String uniqueId;
-  };
+  }
 
   /////////////////////////////////////////////////////////////////////////////
 
@@ -1181,8 +1173,7 @@ public void processElement(ProcessContext context) throws IOException {
           new TableRowInfo(context.element(), uniqueId)));
     }
 
-    private String tableSpecFromWindow(BigQueryOptions options, BoundedWindow window)
-        throws IOException {
+    private String tableSpecFromWindow(BigQueryOptions options, BoundedWindow window) {
       if (tableSpec != null) {
         return tableSpec;
       } else {
@@ -1204,10 +1195,6 @@ private String tableSpecFromWindow(BigQueryOptions options, BoundedWindow window
   private static class StreamWithDeDup extends PTransform<PCollection<TableRow>, PDone> {
     private static final long serialVersionUID = 0;
 
-    // TODO: Consider making these configurable.
-    private static final int WRITE_BUFFER_COUNT = 100;
-    private static final Duration WRITE_BUFFER_WAIT = Duration.standardSeconds(1);
-
     private final transient TableReference tableReference;
     private final SerializableFunction<BoundedWindow, TableReference> tableRefFunction;
     private final transient TableSchema tableSchema;
@@ -1244,16 +1231,11 @@ public PDone apply(PCollection<TableRow> input) {
 
       // To prevent having the same TableRow processed more than once with regenerated
       // different unique ids, this implementation relies on "checkpointing", which is
-      // achieved as a side effect of having StreamingWriteFn immediately follow a GBK.
+      // achieved as a side effect of having StreamingWriteFn immediately follow a GBK,
+      // performed by Reshuffle.
       tagged
           .setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowInfoCoder.of()))
-          .apply(
-              Window.<KV<ShardedKey<String>, TableRowInfo>>into(new GlobalWindows())
-                  .triggering(Repeatedly.forever(AfterFirst.of(
-                      AfterProcessingTime.pastFirstElementInPane().plusDelayOf(WRITE_BUFFER_WAIT),
-                      AfterPane.elementCountAtLeast(WRITE_BUFFER_COUNT))))
-                  .discardingFiredPanes())
-          .apply(GroupByKey.<ShardedKey<String>, TableRowInfo>create())
+          .apply(Reshuffle.<ShardedKey<String>, TableRowInfo>of())
           .apply(ParDo.of(new StreamingWriteFn(tableSchema)));
 
       // Note that the implementation to return PDone here breaks the
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 3752eeab80177..f945d6891c0af 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -52,7 +52,6 @@
 import com.google.cloud.dataflow.sdk.transforms.Write;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.DataflowReleaseInfo;
@@ -62,6 +61,7 @@
 import com.google.cloud.dataflow.sdk.util.PCollectionViews;
 import com.google.cloud.dataflow.sdk.util.PathValidator;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.Reshuffle;
 import com.google.cloud.dataflow.sdk.util.StreamingPCollectionViewWriterFn;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
@@ -79,6 +79,7 @@
 
 import org.joda.time.DateTimeUtils;
 import org.joda.time.DateTimeZone;
+import org.joda.time.Duration;
 import org.joda.time.format.DateTimeFormat;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -568,21 +569,16 @@ public Integer apply(ValueWithRecordId<T> value) {
                       return Arrays.hashCode(value.getId()) % NUM_RESHARD_KEYS;
                     }
                   }))
-          .apply(
-              Window.<KV<Integer, ValueWithRecordId<T>>>into(new GlobalWindows())
-                  .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1)))
-                  .discardingFiredPanes())
-          // WindmillSink will dedup based on ids in ValueWithRecordId.
-          .apply(GroupByKey.<Integer, ValueWithRecordId<T>>create())
+          // Reshuffle will dedup based on ids in ValueWithRecordId by sing the data through
+          // WindmillSink.
+          .apply(Reshuffle.<Integer, ValueWithRecordId<T>>of())
           .apply(ParDo.named("StripIds").of(
-              new DoFn<KV<Integer, Iterable<ValueWithRecordId<T>>>, T>() {
+              new DoFn<KV<Integer, ValueWithRecordId<T>>, T>() {
                 private static final long serialVersionUID = 0L;
 
                 @Override
                 public void processElement(ProcessContext c) {
-                  for (ValueWithRecordId<T> value : c.element().getValue()) {
-                    c.output(value.getValue());
-                  }
+                  c.output(c.element().getValue().getValue());
                 }
               }));
     }
@@ -658,11 +654,15 @@ public PCollection<T> apply(PInput input) {
             .apply(ParDo.of(new OutputNullKv()))
             .apply("GlobalSingleton", Window.<KV<Void, Void>>into(new GlobalWindows())
                 .triggering(AfterPane.elementCountAtLeast(1))
+                .withAllowedLateness(Duration.ZERO)
                 .discardingFiredPanes())
-                .apply(GroupByKey.<Void, Void>create())
-                .apply(Window.<KV<Void, Iterable<Void>>>into(new GlobalWindows()))
-                .apply(ParDo.of(new OutputElements<>(transform.getElements(), coder)))
-                .setCoder(coder);
+            .apply(GroupByKey.<Void, Void>create())
+            // Go back to the default windowing strategy, so that our setting allowed lateness
+            // doesn't count as the user having set it.
+            .setWindowingStrategyInternal(WindowingStrategy.globalDefault())
+            .apply(Window.<KV<Void, Iterable<Void>>>into(new GlobalWindows()))
+            .apply(ParDo.of(new OutputElements<>(transform.getElements(), coder)))
+            .setCoder(coder);
       } catch (CannotProvideCoderException e) {
         throw new IllegalArgumentException("Unable to infer a coder and no Coder was specified. "
             + "Please set a coder by invoking Create.withCoder() explicitly.", e);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index d98ce65f18a98..bda018e49fd88 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -982,7 +982,8 @@ private <T> void translateHelper(
             context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
             context.addOutput(PropertyNames.OUTPUT, context.getOutput(transform));
 
-            byte[] serializedBytes = serializeToByteArray(transform.getWindowingStrategy());
+            WindowingStrategy<?, ?> strategy = context.getOutput(transform).getWindowingStrategy();
+            byte[] serializedBytes = serializeToByteArray(strategy);
             String serializedJson = byteArrayToJsonString(serializedBytes);
             assert Arrays.equals(serializedBytes,
                                  jsonStringToByteArray(serializedJson));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 4ab16c04e642e..42f9ec5f3d4a9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
@@ -38,6 +39,8 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import javax.annotation.Nullable;
+
 /**
  * {@code Window} logically divides up or groups the elements of a
  * {@link PCollection} into finite windows according to a {@link WindowFn}.
@@ -171,6 +174,58 @@ public static <T> Bound<T> into(WindowFn<? super T, ?> fn) {
     return new Unbound().into(fn);
   }
 
+  /**
+   * Sets a non-default trigger for this {@code Window} {@code PTransform}.
+   * Elements that are assigned to a specific window will be output when
+   * the trigger fires.
+   *
+   * <p> Must also specify allowed lateness using {@link #withAllowedLateness} and accumulation
+   * mode using either {@link #discardingFiredPanes()} or {@link #accumulatingFiredPanes()}.
+   */
+  @Experimental(Kind.TRIGGER)
+  public static <T> Bound<T> triggering(Trigger<?> trigger) {
+    return new Unbound().triggering(trigger);
+  }
+
+  /**
+   * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
+   * Triggering behavior, and that discards elements in a pane after they are triggered.
+   *
+   * <p> Does not modify this transform.  The resulting {@code PTransform} is sufficiently
+   * specified to be applied, but more properties can still be specified.
+   */
+  @Experimental(Kind.TRIGGER)
+  public static <T> Bound<T> discardingFiredPanes() {
+    return new Unbound().discardingFiredPanes();
+  }
+
+  /**
+   * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
+   * Triggering behavior, and that accumulates elements in a pane after they are triggered.
+   *
+   * <p> Does not modify this transform.  The resulting {@code PTransform} is sufficiently
+   * specified to be applied, but more properties can still be specified.
+   */
+  @Experimental(Kind.TRIGGER)
+  public static <T> Bound<T> accumulatingFiredPanes() {
+    return new Unbound().accumulatingFiredPanes();
+  }
+
+  /**
+   * Override the amount of lateness allowed for data elements in the pipeline. Like
+   * the other properties on this {@link Window} operation, this will be applied at
+   * the next {@link GroupByKey}. Any elements that are later than this as decided by
+   * the system-maintained watermark will be dropped.
+   *
+   * <p>This value also determines how long state will be kept around for old windows.
+   * Once no elements will be added to a window (because this duration has passed) any state
+   * associated with the window will be cleaned up.
+   */
+  @Experimental(Kind.TRIGGER)
+  public static <T> Bound<T> withAllowedLateness(Duration allowedLateness) {
+    return new Unbound().withAllowedLateness(allowedLateness);
+  }
+
   /**
    * An incomplete {@code Window} transform, with unbound input/output type.
    *
@@ -208,7 +263,62 @@ public Unbound named(String name) {
      * but more properties can still be specified.
      */
     public <T> Bound<T> into(WindowFn<? super T, ?> fn) {
-      return new Bound<>(name, WindowingStrategy.of(fn));
+      return new Bound<T>(name).into(fn);
+    }
+
+    /**
+     * Sets a non-default trigger for this {@code Window} {@code PTransform}.
+     * Elements that are assigned to a specific window will be output when
+     * the trigger fires.
+     *
+     * <p> {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger}
+     * has more details on the available triggers.
+     *
+     * <p> Must also specify allowed lateness using {@link #withAllowedLateness} and accumulation
+     * mode using either {@link #discardingFiredPanes()} or {@link #accumulatingFiredPanes()}.
+     */
+    @Experimental(Kind.TRIGGER)
+    public <T> Bound<T> triggering(Trigger<?> trigger) {
+      return new Bound<T>(name).triggering(trigger);
+    }
+
+    /**
+     * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
+     * Triggering behavior, and that discards elements in a pane after they are triggered.
+     *
+     * <p> Does not modify this transform.  The resulting {@code PTransform} is sufficiently
+     * specified to be applied, but more properties can still be specified.
+     */
+    @Experimental(Kind.TRIGGER)
+    public <T> Bound<T> discardingFiredPanes() {
+      return new Bound<T>(name).discardingFiredPanes();
+    }
+
+    /**
+     * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
+     * Triggering behavior, and that accumulates elements in a pane after they are triggered.
+     *
+     * <p> Does not modify this transform.  The resulting {@code PTransform} is sufficiently
+     * specified to be applied, but more properties can still be specified.
+     */
+    @Experimental(Kind.TRIGGER)
+    public <T> Bound<T> accumulatingFiredPanes() {
+      return new Bound<T>(name).accumulatingFiredPanes();
+    }
+
+    /**
+     * Override the amount of lateness allowed for data elements in the pipeline. Like
+     * the other properties on this {@link Window} operation, this will be applied at
+     * the next {@link GroupByKey}. Any elements that are later than this as decided by
+     * the system-maintained watermark will be dropped.
+     *
+     * <p>This value also determines how long state will be kept around for old windows.
+     * Once no elements will be added to a window (because this duration has passed) any state
+     * associated with the window will be cleaned up.
+     */
+    @Experimental(Kind.TRIGGER)
+    public <T> Bound<T> withAllowedLateness(Duration allowedLateness) {
+      return new Bound<T>(name).withAllowedLateness(allowedLateness);
     }
   }
 
@@ -221,11 +331,40 @@ public <T> Bound<T> into(WindowFn<? super T, ?> fn) {
   @SuppressWarnings("serial")
   public static class Bound<T> extends PTransform<PCollection<T>, PCollection<T>> {
 
-    WindowingStrategy<? super T, ?> windowingStrategy;
+    @Nullable private final WindowFn<? super T, ?> windowFn;
+    @Nullable private final Trigger<?> trigger;
+    @Nullable private final AccumulationMode mode;
+    @Nullable private final Duration allowedLateness;
 
-    Bound(String name, WindowingStrategy<? super T, ?> windowingStrategy) {
+    private Bound(String name,
+        @Nullable WindowFn<? super T, ?> windowFn, @Nullable Trigger<?> trigger,
+        @Nullable AccumulationMode mode, @Nullable Duration allowedLateness) {
       super(name);
-      this.windowingStrategy = windowingStrategy;
+      this.windowFn = windowFn;
+      this.trigger = trigger;
+      this.mode = mode;
+      this.allowedLateness = allowedLateness;
+    }
+
+    private Bound(String name) {
+      this(name, null, null, null, null);
+    }
+
+    /**
+     * Returns a new {@code Window} {@code PTransform} that's like this
+     * transform but that will use the given {@link WindowFn}, and that has
+     * its input and output types bound.  Does not modify this transform.  The
+     * resulting {@code PTransform} is sufficiently specified to be applied,
+     * but more properties can still be specified.
+     */
+    private Bound<T> into(WindowFn<? super T, ?> windowFn) {
+      try {
+        windowFn.windowCoder().verifyDeterministic();
+      } catch (NonDeterministicException e) {
+        throw new IllegalArgumentException("Window coders must be deterministic.", e);
+      }
+
+      return new Bound<>(name, windowFn, trigger, mode, allowedLateness);
     }
 
     /**
@@ -238,7 +377,7 @@ public static class Bound<T> extends PTransform<PCollection<T>, PCollection<T>>
      * explanation.
      */
     public Bound<T> named(String name) {
-      return new Bound<>(name, windowingStrategy);
+      return new Bound<>(name, windowFn, trigger, mode, allowedLateness);
     }
 
     /**
@@ -248,10 +387,13 @@ public Bound<T> named(String name) {
      *
      * <p> {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger}
      * has more details on the available triggers.
+     *
+     * <p> Must also specify allowed lateness using {@link #withAllowedLateness} and accumulation
+     * mode using either {@link #discardingFiredPanes()} or {@link #accumulatingFiredPanes()}.
      */
-    @Experimental(Experimental.Kind.TRIGGER)
+    @Experimental(Kind.TRIGGER)
     public Bound<T> triggering(Trigger<?> trigger) {
-      return new Bound<T>(name, windowingStrategy.withTrigger(trigger));
+      return new Bound<T>(name, windowFn, trigger, mode, allowedLateness);
     }
 
    /**
@@ -261,9 +403,10 @@ public Bound<T> triggering(Trigger<?> trigger) {
     * <p> Does not modify this transform.  The resulting {@code PTransform} is sufficiently
     * specified to be applied, but more properties can still be specified.
     */
+    @Experimental(Kind.TRIGGER)
    public Bound<T> discardingFiredPanes() {
-     return new Bound<>(
-         name, windowingStrategy.withMode(AccumulationMode.DISCARDING_FIRED_PANES));
+     return new Bound<T>(name,
+         windowFn, trigger, AccumulationMode.DISCARDING_FIRED_PANES, allowedLateness);
    }
 
    /**
@@ -275,8 +418,8 @@ public Bound<T> discardingFiredPanes() {
     */
    @Experimental(Kind.TRIGGER)
    public Bound<T> accumulatingFiredPanes() {
-     return new Bound<>(
-         name, windowingStrategy.withMode(AccumulationMode.ACCUMULATING_FIRED_PANES));
+     return new Bound<T>(name,
+         windowFn, trigger, AccumulationMode.ACCUMULATING_FIRED_PANES, allowedLateness);
    }
 
     /**
@@ -289,21 +432,64 @@ public Bound<T> accumulatingFiredPanes() {
      * Once no elements will be added to a window (because this duration has passed) any state
      * associated with the window will be cleaned up.
      */
-    @Experimental(Experimental.Kind.TRIGGER)
+    @Experimental(Kind.TRIGGER)
     public Bound<T> withAllowedLateness(Duration allowedLateness) {
-      return new Bound<>(name, windowingStrategy.withAllowedLateness(allowedLateness));
+      return new Bound<T>(name, windowFn, trigger, mode, allowedLateness);
+    }
+
+    private WindowingStrategy<?, ?> getOutputStrategy(WindowingStrategy<?, ?> inputStrategy) {
+      WindowingStrategy<?, ?> result = inputStrategy;
+      if (windowFn != null) {
+        result = result.withWindowFn(windowFn);
+      }
+      if (trigger != null) {
+        result = result.withTrigger(trigger);
+      }
+      if (mode != null) {
+        result = result.withMode(mode);
+      }
+      if (allowedLateness != null) {
+        result = result.withAllowedLateness(allowedLateness);
+      }
+      return result;
+    }
+
+    @Override
+    public void validate(PCollection<T> input) {
+      WindowingStrategy<?, ?> outputStrategy = getOutputStrategy(input.getWindowingStrategy());
+
+      // Make sure that the windowing strategy is complete & valid.
+      if (outputStrategy.isTriggerSpecified()
+          && !(outputStrategy.getTrigger().getSpec() instanceof DefaultTrigger)) {
+        if (!outputStrategy.isAllowedLatenessSpecified()) {
+          throw new IllegalArgumentException(
+              "Calling .triggering() to specify a trigger requires that the allowed lateness be"
+              + " specified using .withAllowedLateness() to set the upper bound on how late data"
+              + " can arrive before being dropped. See Javadoc for more details.");
+        }
+
+        if (!outputStrategy.isModeSpecified()) {
+          throw new IllegalArgumentException(
+              "Calling .triggering() to specify a trigger requires that the accumulation mode be"
+              + " specified using .discardingFiredPanes() or .accumulatingFiredPanes()."
+              + " See Javadoc for more details.");
+        }
+      }
     }
 
     @Override
     public PCollection<T> apply(PCollection<T> input) {
-      // Propagate the allowed lateness unless it was explicitly set.
-      if (windowingStrategy.isDefaultAllowedLateness()) {
-        windowingStrategy = windowingStrategy.withAllowedLateness(
-            input.getWindowingStrategy().getAllowedLateness());
+      WindowingStrategy<?, ?> outputStrategy = getOutputStrategy(input.getWindowingStrategy());
+      if (windowFn != null) {
+        // If the windowFn changed, we create a primitive, and run the AssignWindows operation here.
+        return PCollection.<T>createPrimitiveOutputInternal(
+            input.getPipeline(), outputStrategy, input.isBounded());
+      } else {
+        // If the windowFn didn't change, we just run a pass-through transform and then set the
+        // new windowing strategy.
+        return input.apply(Window.<T>identity()).setWindowingStrategyInternal(outputStrategy);
       }
 
-      return PCollection.<T>createPrimitiveOutputInternal(
-          input.getPipeline(), windowingStrategy, input.isBounded());
     }
 
     @Override
@@ -311,18 +497,25 @@ protected Coder<?> getDefaultOutputCoder(PCollection<T> input) {
       return input.getCoder();
     }
 
-    public WindowingStrategy<? super T, ?> getWindowingStrategy() {
-      return windowingStrategy;
-    }
-
     @Override
     protected String getKindString() {
-      return "Window.Into(" + windowingStrategy + ")";
+      return "Window.Into()";
     }
   }
 
   /////////////////////////////////////////////////////////////////////////////
 
+  private static <T> PTransform<PCollection<? extends T>, PCollection<T>> identity() {
+    return ParDo.named("Identity").of(new DoFn<T, T>() {
+
+      private static final long serialVersionUID = 0L;
+
+      @Override public void processElement(ProcessContext c) {
+        c.output(c.element());
+      }
+    });
+  }
+
   /**
    * Creates a {@code Window} {@code PTransform} that does not change assigned
    * windows, but will cause windows to be merged again as part of the next
@@ -344,11 +537,8 @@ public PCollection<T> apply(PCollection<T> input) {
       WindowingStrategy<?, ?> outputWindowingStrategy = getOutputWindowing(
           input.getWindowingStrategy());
 
-      return input.apply(ParDo.named("Identity").of(new DoFn<T, T>() {
-                @Override public void processElement(ProcessContext c) {
-                  c.output(c.element());
-                }
-              })).setWindowingStrategyInternal(outputWindowingStrategy);
+      return input.apply(Window.<T>identity())
+          .setWindowingStrategyInternal(outputWindowingStrategy);
     }
 
     private <W extends BoundedWindow> WindowingStrategy<?, W> getOutputWindowing(
@@ -378,16 +568,24 @@ public void evaluate(
         });
   }
 
-  private static <T> void evaluateHelper(
+  private static <T, W extends BoundedWindow> void evaluateHelper(
       Bound<T> transform,
       DirectPipelineRunner.EvaluationContext context) {
+
+    // If this use of Window didn't change the WindowFn, there is nothing to do.
+    if (transform.windowFn == null) {
+      throw new IllegalStateException("Shouldn't reach evaluateHelper with no windowFn");
+    }
+
     PCollection<T> input = context.getInput(transform);
 
     DirectModeExecutionContext executionContext = DirectModeExecutionContext.create();
 
     TupleTag<T> outputTag = new TupleTag<>();
-    DoFn<T, T> addWindowsDoFn = new AssignWindowsDoFn<>(transform.windowingStrategy.getWindowFn());
+    WindowFn<? super T, W> windowFn = (WindowFn<? super T, W>) transform.windowFn;
     String name = context.getStepName(transform);
+    @SuppressWarnings("unchecked")
+    DoFn<T, T> addWindowsDoFn = new AssignWindowsDoFn<T, W>(windowFn);
     DoFnRunner<T, T, List> addWindowsRunner =
         DoFnRunner.createWithListOutputs(
             context.getPipelineOptions(),
@@ -397,7 +595,7 @@ private static <T> void evaluateHelper(
             new ArrayList<TupleTag<?>>(),
             executionContext.getStepContext(name, name),
             context.getAddCounterMutator(),
-            transform.windowingStrategy);
+            context.getOutput(transform).getWindowingStrategy());
 
     addWindowsRunner.startBundle();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java
new file mode 100644
index 0000000000000..68e1f1fa0d953
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.joda.time.Duration;
+
+/**
+ * A {@link PTransform} to reshuffle the elements based on their key.
+ *
+ * <p> Performs a {@link GroupByKey} so that the data is key-partitioned. Configures the
+ * {@link WindowingStrategy} so that no data is dropped, but doesn't affect the need for
+ * the user to specify allowed lateness and accumulation mode before a user-inserted GroupByKey.
+ *
+ * @param <K> The type of key being reshuffled on.
+ * @param <V> The type of value being reshuffled.
+ */
+public class Reshuffle<K, V>
+  extends PTransform<PCollection<KV<K, V>>, PCollection<KV<K, V>>> {
+
+  private static final long serialVersionUID = 0L;
+
+  private Reshuffle() {
+  }
+
+  public static <K, V> Reshuffle<K, V> of() {
+    return new Reshuffle<K, V>();
+  }
+
+  @Override
+  public PCollection<KV<K, V>> apply(PCollection<KV<K, V>> input) {
+    WindowingStrategy<?, ?> originalStrategy = input.getWindowingStrategy();
+    Window.Bound<KV<K, V>> rewindow = Window
+        .<KV<K, V>>triggering(Repeatedly.<BoundedWindow>forever(
+            AfterPane.<BoundedWindow>elementCountAtLeast(1)))
+        .discardingFiredPanes();
+    if (!originalStrategy.isAllowedLatenessSpecified()) {
+      rewindow = rewindow.withAllowedLateness(
+          Duration.millis(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()));
+    }
+
+    return input.apply(rewindow)
+        .apply(GroupByKey.<K, V>create())
+        // Set the windowing strategy directly, so that it doesn't get counted as the user having
+        // set allowed lateness.
+        .setWindowingStrategyInternal(originalStrategy)
+        .apply(ParDo.named("ExpandIterable").of(
+            new DoFn<KV<K, Iterable<V>>, KV<K, V>>() {
+              private static final long serialVersionUID = 0;
+              @Override
+              public void processElement(ProcessContext c) {
+                K key = c.element().getKey();
+                for (V value : c.element().getValue()) {
+                  c.output(KV.of(key, value));
+                }
+              }
+            }));
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
index ecf6df6e6c03b..958f315e487c5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
@@ -56,75 +55,117 @@ public enum AccumulationMode {
   private final ExecutableTrigger<W> trigger;
   private final AccumulationMode mode;
   private final Duration allowedLateness;
+  private final boolean triggerSpecified;
+  private final boolean modeSpecified;
+  private final boolean allowedLatenessSpecified;
 
   private WindowingStrategy(
-      WindowFn<T, W> windowFn, ExecutableTrigger<W> trigger, AccumulationMode mode,
-      Duration allowedLateness) {
+      WindowFn<T, W> windowFn,
+      ExecutableTrigger<W> trigger, boolean triggerSpecified,
+      AccumulationMode mode, boolean modeSpecified,
+      Duration allowedLateness, boolean allowedLatenessSpecified) {
     this.windowFn = windowFn;
     this.trigger = trigger;
+    this.triggerSpecified = triggerSpecified;
     this.mode = mode;
+    this.modeSpecified = modeSpecified;
     this.allowedLateness = allowedLateness;
+    this.allowedLatenessSpecified = allowedLatenessSpecified;
   }
 
+  /**
+   * Return a fully specified, default windowing strategy.
+   */
   public static WindowingStrategy<Object, GlobalWindow> globalDefault() {
     return DEFAULT;
   }
 
-  /**
-   * Create a {@code WindowingStrategy} for the given {@code windowFn}, using the
-   * {@link DefaultTrigger}.
-   */
-  public static <T, W extends BoundedWindow> WindowingStrategy<T, W> of(WindowFn<T, W> windowFn) {
-    try {
-      windowFn.windowCoder().verifyDeterministic();
-    } catch (NonDeterministicException e) {
-      throw new IllegalArgumentException("Window coders must be deterministic.", e);
-    }
+  public static <T, W extends BoundedWindow> WindowingStrategy<T, W> of(
+      WindowFn<T, W> windowFn) {
+    return new WindowingStrategy<>(windowFn,
+        ExecutableTrigger.create(DefaultTrigger.<W>of()), false,
+        AccumulationMode.DISCARDING_FIRED_PANES, false,
+        DEFAULT_ALLOWED_LATENESS, false);
+  }
 
-    ExecutableTrigger<W> defaultTrigger = ExecutableTrigger.create(DefaultTrigger.<W>of());
-    return new WindowingStrategy<>(
-        windowFn, defaultTrigger, AccumulationMode.DISCARDING_FIRED_PANES, null);
+  public WindowFn<T, W> getWindowFn() {
+    return windowFn;
   }
 
-  public WindowingStrategy<T, W> withTrigger(Trigger<?> wildcardTrigger) {
-    @SuppressWarnings("unchecked")
-    Trigger<W> trigger = (Trigger<W>) wildcardTrigger;
-    return new WindowingStrategy<T, W>(
-        windowFn, ExecutableTrigger.create(trigger), mode, allowedLateness);
+  public ExecutableTrigger<W> getTrigger() {
+    return trigger;
   }
 
-  public WindowingStrategy<T, W> withMode(AccumulationMode mode) {
-    return new WindowingStrategy<T, W>(windowFn, trigger, mode, allowedLateness);
+  public boolean isTriggerSpecified() {
+    return triggerSpecified;
   }
 
-  public WindowingStrategy<T, W> withWindowFn(WindowFn<?, ?> wildcardWindowFn) {
-    @SuppressWarnings("unchecked")
-    WindowFn<T, W> windowFn = (WindowFn<T, W>) wildcardWindowFn;
-    return new WindowingStrategy<T, W>(windowFn, trigger, mode, allowedLateness);
+  public Duration getAllowedLateness() {
+    return allowedLateness;
   }
 
-  public WindowingStrategy<T, W> withAllowedLateness(Duration allowedLateness) {
-    return new WindowingStrategy<T, W>(windowFn, trigger, mode, allowedLateness);
+  public boolean isAllowedLatenessSpecified() {
+    return allowedLatenessSpecified;
   }
 
-  public WindowFn<T, W> getWindowFn() {
-    return windowFn;
+  public AccumulationMode getMode() {
+    return mode;
   }
 
-  public ExecutableTrigger<W> getTrigger() {
-    return trigger;
+  public boolean isModeSpecified() {
+    return modeSpecified;
   }
 
-  public Duration getAllowedLateness() {
-    return allowedLateness == null ? DEFAULT_ALLOWED_LATENESS : allowedLateness;
+  /**
+   * Returns a {@link WindowingStrategy} identical to {@code this} but with the trigger set to
+   * {@code wildcardTrigger}.
+   */
+  public WindowingStrategy<T, W> withTrigger(Trigger<?> wildcardTrigger) {
+    @SuppressWarnings("unchecked")
+    Trigger<W> typedTrigger = (Trigger<W>) wildcardTrigger;
+    return new WindowingStrategy<T, W>(
+        windowFn,
+        ExecutableTrigger.create(typedTrigger), true,
+        mode, modeSpecified,
+        allowedLateness, allowedLatenessSpecified);
   }
 
-  public boolean isDefaultAllowedLateness() {
-    return allowedLateness == null;
+  /**
+   * Returns a {@link WindowingStrategy} identical to {@code this} but with the accumulation mode
+   * set to {@code mode}.
+   */
+  public WindowingStrategy<T, W> withMode(AccumulationMode mode) {
+    return new WindowingStrategy<T, W>(
+        windowFn,
+        trigger, triggerSpecified,
+        mode, true,
+        allowedLateness, allowedLatenessSpecified);
   }
 
-  public AccumulationMode getMode() {
-    return mode;
+  /**
+   * Returns a {@link WindowingStrategy} identical to {@code this} but with the window function
+   * set to {@code wildcardWindowFn}.
+   */
+  public WindowingStrategy<T, W> withWindowFn(WindowFn<?, ?> wildcardWindowFn) {
+    @SuppressWarnings("unchecked")
+    WindowFn<T, W> typedWindowFn = (WindowFn<T, W>) wildcardWindowFn;
+    return new WindowingStrategy<T, W>(
+        typedWindowFn,
+        trigger, triggerSpecified,
+        mode, modeSpecified,
+        allowedLateness, allowedLatenessSpecified);
+  }
+
+  /**
+   * Returns a {@link WindowingStrategy} identical to {@code this} but with the allowed lateness
+   * set to {@code allowedLateness}.
+   */
+  public WindowingStrategy<T, W> withAllowedLateness(Duration allowedLateness) {
+    return new WindowingStrategy<T, W>(
+        windowFn,
+        trigger, triggerSpecified,
+        mode, modeSpecified,
+        allowedLateness, true);
   }
 
   @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java
index 00e7f1b3a60f0..d22e7b896ca68 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java
@@ -47,7 +47,7 @@ public class WindowTest {
   public ExpectedException thrown = ExpectedException.none();
 
   @Test
-  public void testBasicWindowIntoSettings() {
+  public void testWindowIntoSetWindowfn() {
     WindowingStrategy<?, ?> strategy = TestPipeline.create()
       .apply(Create.of("hello", "world").withCoder(StringUtf8Coder.of()))
       .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(10))))
@@ -59,40 +59,64 @@ public void testBasicWindowIntoSettings() {
 
   @Test
   public void testWindowIntoTriggersAndAccumulating() {
+    FixedWindows fixed10 = FixedWindows.of(Duration.standardMinutes(10));
+    Repeatedly<BoundedWindow> trigger = Repeatedly.forever(AfterPane.elementCountAtLeast(5));
     WindowingStrategy<?, ?> strategy = TestPipeline.create()
       .apply(Create.of("hello", "world").withCoder(StringUtf8Coder.of()))
-      .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(10)))
-          .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(5)))
-          .accumulatingFiredPanes())
+      .apply(Window.<String>into(fixed10)
+          .triggering(trigger)
+          .accumulatingFiredPanes()
+          .withAllowedLateness(Duration.ZERO))
       .getWindowingStrategy();
 
-    assertTrue(strategy.getWindowFn() instanceof FixedWindows);
-    assertTrue(strategy.getTrigger().getSpec() instanceof Repeatedly);
+    assertEquals(fixed10, strategy.getWindowFn());
+    assertEquals(trigger, strategy.getTrigger().getSpec());
     assertEquals(AccumulationMode.ACCUMULATING_FIRED_PANES, strategy.getMode());
   }
 
+  @Test
+  public void testWindowPropagatesEachPart() {
+    FixedWindows fixed10 = FixedWindows.of(Duration.standardMinutes(10));
+    Repeatedly<BoundedWindow> trigger = Repeatedly.forever(AfterPane.elementCountAtLeast(5));
+    WindowingStrategy<?, ?> strategy = TestPipeline.create()
+      .apply(Create.of("hello", "world").withCoder(StringUtf8Coder.of()))
+      .apply("Mode", Window.<String>accumulatingFiredPanes())
+      .apply("Lateness", Window.<String>withAllowedLateness(Duration.standardDays(1)))
+      .apply("Trigger", Window.<String>triggering(trigger))
+      .apply("Window", Window.<String>into(fixed10))
+      .getWindowingStrategy();
+
+    assertEquals(fixed10, strategy.getWindowFn());
+    assertEquals(trigger, strategy.getTrigger().getSpec());
+    assertEquals(AccumulationMode.ACCUMULATING_FIRED_PANES, strategy.getMode());
+    assertEquals(Duration.standardDays(1), strategy.getAllowedLateness());
+  }
+
   @Test
   public void testWindowIntoPropagatesLateness() {
+    FixedWindows fixed10 = FixedWindows.of(Duration.standardMinutes(10));
+    FixedWindows fixed25 = FixedWindows.of(Duration.standardMinutes(25));
     WindowingStrategy<?, ?> strategy = TestPipeline.create()
         .apply(Create.of("hello", "world").withCoder(StringUtf8Coder.of()))
-        .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(10)))
+        .apply(Window.named("WindowInto10").<String>into(fixed10)
             .withAllowedLateness(Duration.standardDays(1))
             .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(5)))
             .accumulatingFiredPanes())
-        .apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(25))))
+        .apply(Window.named("WindowInto25").<String>into(fixed25))
         .getWindowingStrategy();
 
     assertEquals(Duration.standardDays(1), strategy.getAllowedLateness());
+    assertEquals(fixed25, strategy.getWindowFn());
   }
 
   @Test
   public void testWindowGetName() {
-    assertEquals("Window.Into(FixedWindows, DefaultTrigger, DISCARDING_FIRED_PANES)",
+    assertEquals("Window.Into()",
         Window.<String>into(FixedWindows.of(Duration.standardMinutes(10))).getName());
   }
 
   @Test
-  public void testNonDeterministicWindowing() throws NonDeterministicException {
+  public void testNonDeterministicWindowCoder() throws NonDeterministicException {
     FixedWindows mockWindowFn = Mockito.mock(FixedWindows.class);
     @SuppressWarnings({"unchecked", "rawtypes"})
     Class<Coder<IntervalWindow>> coderClazz = (Class) Coder.class;
@@ -107,4 +131,32 @@ public void testNonDeterministicWindowing() throws NonDeterministicException {
     thrown.expectMessage("Window coders must be deterministic");
     Window.into(mockWindowFn);
   }
+
+  @Test
+  public void testMissingMode() {
+    FixedWindows fixed10 = FixedWindows.of(Duration.standardMinutes(10));
+    Repeatedly<BoundedWindow> trigger = Repeatedly.forever(AfterPane.elementCountAtLeast(5));
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("requires that the accumulation mode");
+    TestPipeline.create()
+      .apply(Create.of("hello", "world").withCoder(StringUtf8Coder.of()))
+      .apply("Window", Window.<String>into(fixed10))
+      .apply("Lateness", Window.<String>withAllowedLateness(Duration.standardDays(1)))
+      .apply("Trigger", Window.<String>triggering(trigger));
+  }
+
+  @Test
+  public void testMissingLateness() {
+    FixedWindows fixed10 = FixedWindows.of(Duration.standardMinutes(10));
+    Repeatedly<BoundedWindow> trigger = Repeatedly.forever(AfterPane.elementCountAtLeast(5));
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("requires that the allowed lateness");
+    TestPipeline.create()
+      .apply(Create.of("hello", "world").withCoder(StringUtf8Coder.of()))
+      .apply("Mode", Window.<String>accumulatingFiredPanes())
+      .apply("Window", Window.<String>into(fixed10))
+      .apply("Trigger", Window.<String>triggering(trigger));
+  }
 }

From b4ae2767d3d3506390ddbd399c33c063607c4073 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 24 Jul 2015 09:26:12 -0700
Subject: [PATCH 0805/1541] Reduce the per-value cost of SystemReduceFn

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99034491
---
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  8 +--
 .../util/StreamingGroupAlsoByWindowsDoFn.java |  8 +--
 .../dataflow/sdk/util/SystemReduceFn.java     | 61 +++++++++++++------
 .../dataflow/sdk/util/state/StateTags.java    | 13 ++--
 .../dataflow/sdk/util/TriggerTester.java      |  4 +-
 5 files changed, 60 insertions(+), 34 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 66f96e1c0c031..2c1435bc74e37 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -88,13 +88,13 @@ private static class GABWViaOutputBufferDoFn<K, InputT, OutputT, W extends Bound
         createAggregator(ReduceFnRunner.DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
 
     private final WindowingStrategy<Object, W> strategy;
-    private ReduceFn<K, InputT, OutputT, W> reduceFn;
+    private SystemReduceFn.Factory<K, InputT, OutputT, W> reduceFnFactory;
 
     public GABWViaOutputBufferDoFn(
         WindowingStrategy<Object, W> windowingStrategy,
-        ReduceFn<K, InputT, OutputT, W> reduceFn) {
+        SystemReduceFn.Factory<K, InputT, OutputT, W> reduceFnFactory) {
       this.strategy = windowingStrategy;
-      this.reduceFn = reduceFn;
+      this.reduceFnFactory = reduceFnFactory;
     }
 
     @Override
@@ -110,7 +110,7 @@ public void processElement(
 
       ReduceFnRunner<K, InputT, OutputT, W> runner = new ReduceFnRunner<>(
           key, strategy, timerInternals, c.windowingInternals(),
-          droppedDueToClosedWindow, droppedDueToLateness, reduceFn);
+          droppedDueToClosedWindow, droppedDueToLateness, reduceFnFactory.create(key));
 
       for (WindowedValue<InputT> e : c.element().getValue()) {
         // First, handle anything that needs to happen for this element
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index 800607cabcf12..c51e8aeca13b2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -66,16 +66,16 @@ private static class StreamingGABWViaWindowSetDoFn<K, InputT, OutputT, W extends
         createAggregator(ReduceFnRunner.DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
 
     private final WindowingStrategy<Object, W> windowingStrategy;
-    private ReduceFn<K, InputT, OutputT, W> reduceFn;
+    private SystemReduceFn.Factory<K, InputT, OutputT, W> reduceFnFactory;
 
     private transient ReduceFnRunner<K, InputT, OutputT, W> runner;
 
     public StreamingGABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
-        ReduceFn<K, InputT, OutputT, W> reduceFn) {
+        SystemReduceFn.Factory<K, InputT, OutputT, W> reduceFnFactory) {
       @SuppressWarnings("unchecked")
       WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
       this.windowingStrategy = noWildcard;
-      this.reduceFn = reduceFn;
+      this.reduceFnFactory = reduceFnFactory;
     }
 
     private void initForKey(ProcessContext c, K key) throws Exception{
@@ -83,7 +83,7 @@ private void initForKey(ProcessContext c, K key) throws Exception{
         TimerInternals timerInternals = c.windowingInternals().timerInternals();
         runner = new ReduceFnRunner<>(
             key, windowingStrategy, timerInternals, c.windowingInternals(),
-            droppedDueToClosedWindow, droppedDueToLateness, reduceFn);
+            droppedDueToClosedWindow, droppedDueToLateness, reduceFnFactory.create(key));
       }
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
index c03588955d499..e349f84281f96 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
@@ -21,11 +21,15 @@
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
+import com.google.cloud.dataflow.sdk.util.state.BagState;
+import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
 import com.google.cloud.dataflow.sdk.util.state.MergeableState;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 
+import java.io.Serializable;
+
 /**
  * {@link ReduceFn} implementing the default reduction behaviors of {@link GroupByKey}.
  *
@@ -34,52 +38,73 @@
  * @param <OutputT> The output type that will be produced for each key.
  * @param <W> The type of windows this operates on.
  */
-abstract class SystemReduceFn<K, InputT, OutputT, W extends BoundedWindow>
+class SystemReduceFn<K, InputT, OutputT, W extends BoundedWindow>
     extends ReduceFn<K, InputT, OutputT, W> {
 
   private static final long serialVersionUID = 0L;
   private static final String BUFFER_NAME = "__buffer";
 
   /**
-   * Create a {@link SystemReduceFn} that buffers all of the input values in persistent state,
-   * and produces an {@code Iterable<T>}.
+   * Factory that produces {@link SystemReduceFn} instances for specific keys.
+   *
+   * @param <K> The type of key being processed.
+   * @param <InputT> The type of values associated with the key.
+   * @param <OutputT> The output type that will be produced for each key.
+   * @param <W> The type of windows this operates on.
+   */
+  public interface Factory<K, InputT, OutputT, W extends BoundedWindow> extends Serializable {
+    ReduceFn<K, InputT, OutputT, W> create(K key);
+  }
+
+  /**
+   * Create a factory that produces {@link SystemReduceFn} instances that that buffer all of the
+   * input values in persistent state and produces an {@code Iterable<T>}.
    */
-  public static <K, T, W extends BoundedWindow> ReduceFn<K, T, Iterable<T>, W> buffering(
+  public static <K, T, W extends BoundedWindow> Factory<K, T, Iterable<T>, W> buffering(
       final Coder<T> inputCoder) {
-    return new SystemReduceFn<K, T, Iterable<T>, W>() {
+    final StateTag<BagState<T>> bufferTag = StateTags.bag(BUFFER_NAME, inputCoder);
+    return new Factory<K, T, Iterable<T>, W>() {
+
       private static final long serialVersionUID = 0L;
 
       @Override
-      protected StateTag<? extends MergeableState<T, Iterable<T>>> bufferTag(K key) {
-        return StateTags.bag(BUFFER_NAME, inputCoder);
+      public ReduceFn<K, T, Iterable<T>, W> create(K key) {
+        return new SystemReduceFn<K, T, Iterable<T>, W>(bufferTag);
       }
     };
   }
 
   /**
-   * Create a {@link SystemReduceFn} that combines all of the input values using a
-   * {@link CombineFn}.
+   * Create a factory that produces {@link SystemReduceFn} instances that combine all of the input
+   * values using a {@link CombineFn}.
    */
   public static
-  <K, InputT, OutputT, W extends BoundedWindow> ReduceFn<K, InputT, OutputT, W> combining(
+  <K, InputT, OutputT, W extends BoundedWindow> Factory<K, InputT, OutputT, W> combining(
       final Coder<K> keyCoder, final Coder<InputT> inputCoder,
       final KeyedCombineFn<K, InputT, ?, OutputT> combineFn) {
-    return new SystemReduceFn<K, InputT, OutputT, W>() {
+    return new Factory<K, InputT, OutputT, W>() {
+
       private static final long serialVersionUID = 0L;
 
       @Override
-      protected StateTag<? extends MergeableState<InputT, OutputT>> bufferTag(K key) {
-        return StateTags.<InputT, OutputT>combiningValue(
-            BUFFER_NAME, inputCoder, combineFn.forKey(key, keyCoder));
+      public ReduceFn<K, InputT, OutputT, W> create(K key) {
+        StateTag<CombiningValueState<InputT, OutputT>> bufferTag =
+            StateTags.<InputT, OutputT>combiningValue(
+                BUFFER_NAME, inputCoder, combineFn.forKey(key, keyCoder));
+        return new SystemReduceFn<K, InputT, OutputT, W>(bufferTag);
       }
     };
   }
 
-  protected abstract StateTag<? extends MergeableState<InputT, OutputT>> bufferTag(K key);
+  private StateTag<? extends MergeableState<InputT, OutputT>> bufferTag;
+
+  public SystemReduceFn(StateTag<? extends MergeableState<InputT, OutputT>> bufferTag) {
+    this.bufferTag = bufferTag;
+  }
 
   @Override
   public void processValue(ProcessValueContext c) throws Exception {
-    c.state().access(bufferTag(c.key())).add(c.value());
+    c.state().access(bufferTag).add(c.value());
   }
 
   @Override
@@ -92,7 +117,7 @@ public void onMerge(OnMergeContext c) throws Exception {
   @Override
   public void onTrigger(OnTriggerContext c) throws Exception {
     MergeableState<InputT, OutputT> buffer =
-        c.state().accessAcrossMergedWindows(bufferTag(c.key()));
+        c.state().accessAcrossMergedWindows(bufferTag);
     StateContents<OutputT> output = buffer.get();
 
     // Skip empty panes unless they have interesting pane info.
@@ -104,6 +129,6 @@ public void onTrigger(OnTriggerContext c) throws Exception {
 
   @Override
   public void clearState(Context c) throws Exception {
-    c.state().accessAcrossMergedWindows(bufferTag(c.key())).clear();
+    c.state().accessAcrossMergedWindows(bufferTag).clear();
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
index b044fe5feb92a..62cd515158ae3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
@@ -156,6 +156,13 @@ private static class CombiningValueStateTag<InputT, AccumT, OutputT>
 
     private static final long serialVersionUID = 0;
 
+    // TODO: This should use the CoderRegistry from the running pipelie to ensure that it picks up
+    // any custom Coders, but that CoderRegistry isn't currently available on the worker.
+    private static final CoderRegistry registry = new CoderRegistry();
+    static {
+      registry.registerStandardCoders();
+    }
+
     private final Coder<AccumT> accumCoder;
     private final CombineFn<InputT, AccumT, OutputT> combineFn;
 
@@ -163,12 +170,6 @@ private CombiningValueStateTag(
         String id, Coder<InputT> inputCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
       super(id);
 
-      // TODO: This should use the actual CoderRegistry to ensure that it picks up
-      // any custom Coders, but that CoderRegistry isn't currently available on the
-      // worker.
-      CoderRegistry registry = new CoderRegistry();
-      registry.registerStandardCoders();
-
       try {
         this.accumCoder = combineFn.getAccumulatorCoder(registry, inputCoder);
       } catch (CannotProvideCoderException e) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index d1b843bbdb620..ea3ffe6a7afec 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -120,7 +120,7 @@ public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>
 
     return new TriggerTester<Integer, Iterable<Integer>, W>(
         strategy,
-        SystemReduceFn.<String, Integer, W>buffering(VarIntCoder.of()),
+        SystemReduceFn.<String, Integer, W>buffering(VarIntCoder.of()).create(KEY),
         IterableCoder.of(VarIntCoder.of()));
   }
 
@@ -139,7 +139,7 @@ TriggerTester<Integer, OutputT, W> combining(
     return new TriggerTester<Integer, OutputT, W>(
         strategy,
         SystemReduceFn.<String, Integer, OutputT, W>combining(
-            StringUtf8Coder.of(), VarIntCoder.of(), combineFn),
+            StringUtf8Coder.of(), VarIntCoder.of(), combineFn).create(KEY),
         outputCoder);
   }
 

From 20b247d5f19fbe9d74eb0509c96b8f770c95e9b7 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 24 Jul 2015 10:41:00 -0700
Subject: [PATCH 0806/1541] Add Coder wire format tests

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99041340
---
 .../dataflow/sdk/testing/CoderProperties.java | 75 +++++++++++++++++
 .../sdk/coders/BigEndianIntegerCoderTest.java | 21 +++++
 .../sdk/coders/BigEndianLongCoderTest.java    | 23 ++++++
 .../sdk/coders/ByteArrayCoderTest.java        | 24 +++++-
 .../sdk/coders/CollectionCoderTest.java       | 21 ++++-
 .../dataflow/sdk/coders/DoubleCoderTest.java  | 24 ++++++
 .../sdk/coders/DurationCoderTest.java         | 34 +++++++-
 .../dataflow/sdk/coders/EntityCoderTest.java  | 17 ++++
 .../dataflow/sdk/coders/InstantCoderTest.java | 37 +++++++--
 .../sdk/coders/IterableCoderTest.java         | 17 ++++
 .../dataflow/sdk/coders/KvCoderTest.java      | 26 ++++++
 .../dataflow/sdk/coders/ListCoderTest.java    | 16 ++++
 .../dataflow/sdk/coders/MapCoderTest.java     | 17 +++-
 .../sdk/coders/PrintBase64Encodings.java      | 81 +++++++++++++++++++
 .../dataflow/sdk/coders/SetCoderTest.java     | 18 ++++-
 .../sdk/coders/StringUtf8CoderTest.java       | 23 +++++-
 .../sdk/coders/TableRowJsonCoderTest.java     | 15 ++++
 .../sdk/coders/TextualIntegerCoderTest.java   | 21 +++++
 .../dataflow/sdk/coders/VarIntCoderTest.java  | 21 +++++
 .../dataflow/sdk/coders/VarLongCoderTest.java | 23 ++++++
 20 files changed, 538 insertions(+), 16 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/PrintBase64Encodings.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
index 886b4563e82a7..6493332100cee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
@@ -28,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.Serializer;
@@ -241,6 +242,80 @@ public static <T> void structuralValueConsistentWithEqualsInContext(
             encode(coder, context, value2)));
   }
 
+
+  private static final String DECODING_WIRE_FORMAT_MESSAGE =
+      "Decoded value from known wire format does not match expected value."
+      + " This probably means that this Coder no longer correctly decodes"
+      + " a prior wire format. Changing the wire formats this Coder can read"
+      + " should be avoided, as it is likely to cause breakage."
+      + " If you truly intend to change the backwards compatibility for this Coder "
+      + " then you must remove any now-unsupported encodings from getAllowedEncodings().";
+
+  public static <T> void coderDecodesBase64(Coder<T> coder, String base64Encoding, T value)
+      throws Exception {
+    assertThat(DECODING_WIRE_FORMAT_MESSAGE, CoderUtils.decodeFromBase64(coder, base64Encoding),
+        equalTo(value));
+  }
+
+  public static <T> void coderDecodesBase64(
+      Coder<T> coder, List<String> base64Encodings, List<T> values) throws Exception {
+    assertThat("List of base64 encodings has different size than List of values",
+        base64Encodings.size(), equalTo(values.size()));
+
+    for (int i = 0; i < base64Encodings.size(); i++) {
+      coderDecodesBase64(coder, base64Encodings.get(i), values.get(i));
+    }
+  }
+
+  private static final String ENCODING_WIRE_FORMAT_MESSAGE =
+      "Encoded value does not match expected wire format."
+      + " Changing the wire format should be avoided, as it is likely to cause breakage."
+      + " If you truly intend to change the wire format for this Coder "
+      + " then you must update getEncodingId() to a new value and add any supported"
+      + " prior formats to getAllowedEncodings()."
+      + " See com.google.cloud.dataflow.sdk.coders.PrintBase64Encoding for how to generate"
+      + " new test data.";
+
+  public static <T> void coderEncodesBase64(Coder<T> coder, T value, String base64Encoding)
+      throws Exception {
+    assertThat(ENCODING_WIRE_FORMAT_MESSAGE, CoderUtils.encodeToBase64(coder, value),
+        equalTo(base64Encoding));
+  }
+
+  public static <T> void coderEncodesBase64(
+      Coder<T> coder, List<T> values, List<String> base64Encodings) throws Exception {
+    assertThat("List of base64 encodings has different size than List of values",
+        base64Encodings.size(), equalTo(values.size()));
+
+    for (int i = 0; i < base64Encodings.size(); i++) {
+      coderEncodesBase64(coder, values.get(i), base64Encodings.get(i));
+    }
+  }
+
+  @SuppressWarnings("unchecked")
+  public static <T, IterableT extends Iterable<T>> void coderDecodesBase64ContentsEqual(
+      Coder<IterableT> coder, String base64Encoding, IterableT expected) throws Exception {
+
+    IterableT result = CoderUtils.decodeFromBase64(coder, base64Encoding);
+    if (Iterables.isEmpty(expected)) {
+      assertThat(ENCODING_WIRE_FORMAT_MESSAGE, result, emptyIterable());
+    } else {
+      assertThat(ENCODING_WIRE_FORMAT_MESSAGE, result,
+          containsInAnyOrder((T[]) Iterables.toArray(expected, Object.class)));
+    }
+  }
+
+  public static <T, IterableT extends Iterable<T>> void coderDecodesBase64ContentsEqual(
+      Coder<IterableT> coder, List<String> base64Encodings, List<IterableT> expected)
+          throws Exception {
+    assertThat("List of base64 encodings has different size than List of values",
+        base64Encodings.size(), equalTo(expected.size()));
+
+    for (int i = 0; i < base64Encodings.size(); i++) {
+      coderDecodesBase64ContentsEqual(coder, base64Encodings.get(i), expected.get(i));
+    }
+  }
+
   //////////////////////////////////////////////////////////////////////////
 
   private static <T> byte[] encode(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java
index 407297390285b..7d3a3bfc04c95 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java
@@ -52,4 +52,25 @@ public void testDecodeEncodeEqual() throws Exception {
   public void testEncodingId() throws Exception {
     CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
   }
+
+  /**
+   * Generated data to check that the wire format has not changed. To regenerate, see
+   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
+   */
+  private static final List<String> TEST_ENCODINGS = Arrays.asList(
+      "____9Q",
+      "_____Q",
+      "_____w",
+      "AAAAAA",
+      "AAAAAQ",
+      "AAAABQ",
+      "AAAADQ",
+      "AAAAHQ",
+      "f____w",
+      "gAAAAA");
+
+  @Test
+  public void testWireFormatEncode() throws Exception {
+    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java
index e4784e0ab3f9c..ebc2b00c0f576 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java
@@ -54,4 +54,27 @@ public void testDecodeEncodeEqual() throws Exception {
   public void testEncodingId() throws Exception {
     CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
   }
+
+  /**
+   * Generated data to check that the wire format has not changed. To regenerate, see
+   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
+   */
+  private static final List<String> TEST_ENCODINGS = Arrays.asList(
+      "__________U",
+      "__________0",
+      "__________8",
+      "AAAAAAAAAAA",
+      "AAAAAAAAAAE",
+      "AAAAAAAAAAU",
+      "AAAAAAAAAA0",
+      "AAAAAAAAAB0",
+      "AAAAAIAAAII",
+      "_____3___-M",
+      "f_________8",
+      "gAAAAAAAAAA");
+
+  @Test
+  public void testWireFormatEncode() throws Exception {
+    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
index 60f5c8dc73361..e80bf5239f147 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
@@ -30,6 +30,8 @@
 
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
 
 /**
  * Unit tests for {@link ByteArrayCoder}.
@@ -39,8 +41,11 @@ public class ByteArrayCoderTest {
 
   private static final ByteArrayCoder TEST_CODER = ByteArrayCoder.of();
 
-  private static final byte[][] TEST_VALUES = {
-    {0xa, 0xb, 0xc}, {}, {}, {0xd, 0xe}, {0xd, 0xe}, {}};
+  private static final List<byte[]> TEST_VALUES = Arrays.asList(
+    new byte[]{0xa, 0xb, 0xc},
+    new byte[]{0xd, 0x3},
+    new byte[]{0xd, 0xe},
+    new byte[]{});
 
   @Test
   public void testDecodeEncodeEquals() throws Exception {
@@ -108,4 +113,19 @@ private static byte[] encodeToByteArrayAndOwn(
   public void testEncodingId() throws Exception {
     CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
   }
+
+  /**
+   * Generated data to check that the wire format has not changed. To regenerate, see
+   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
+   */
+  private static final List<String> TEST_ENCODINGS = Arrays.asList(
+      "CgsM",
+      "DQM",
+      "DQ4",
+      "");
+
+  @Test
+  public void testWireFormatEncode() throws Exception {
+    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java
index 684139ab1b1d0..6b552da394ad1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java
@@ -25,9 +25,9 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.TreeSet;
 
 /**
  * Test case for {@link CollectionCoder}.
@@ -43,7 +43,7 @@ public class CollectionCoderTest {
       Collections.singletonList(13),
       Arrays.asList(1, 2, 3, 4),
       new LinkedList<>(Arrays.asList(7, 6, 5)),
-      new HashSet<>(Arrays.asList(31, -5, 83)));
+      new TreeSet<>(Arrays.asList(31, -5, 83)));
 
   @Test
   public void testDecodeEncodeContentsEqual() throws Exception {
@@ -59,4 +59,21 @@ public void testDecodeEncodeContentsEqual() throws Exception {
   public void testEncodingId() throws Exception {
     CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
   }
+
+  /**
+   * Generated data to check that the wire format has not changed. To regenerate, see
+   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
+   */
+  private static final List<String> TEST_ENCODINGS = Arrays.asList(
+      "AAAAAA",
+      "AAAAAA",
+      "AAAAAQ0",
+      "AAAABAECAwQ",
+      "AAAAAwcGBQ",
+      "AAAAA_v___8PH1M");
+
+  @Test
+  public void testWireFormat() throws Exception {
+    CoderProperties.coderDecodesBase64ContentsEqual(TEST_CODER, TEST_ENCODINGS, TEST_VALUES);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DoubleCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DoubleCoderTest.java
index 71a0f98ce70c4..e53830c439bc1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DoubleCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DoubleCoderTest.java
@@ -55,4 +55,28 @@ public void testDecodeEncodeEqual() throws Exception {
   public void testEncodingId() throws Exception {
     CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
   }
+
+  /**
+   * Generated data to check that the wire format has not changed. To regenerate, see
+   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
+   */
+  private static final List<String> TEST_ENCODINGS = Arrays.asList(
+      "AAAAAAAAAAA",
+      "v-AAAAAAAAA",
+      "P-AAAAAAAAA",
+      "P9MzMzMzMzM",
+      "v9MzMzMzMzM",
+      "P_AAAAAAAAA",
+      "wEXypeJ9ODo",
+      "QAkh-fAbhm4",
+      "f-________8",
+      "AAAAAAAAAAE",
+      "f_AAAAAAAAA",
+      "__AAAAAAAAA",
+      "f_gAAAAAAAA");
+
+  @Test
+  public void testWireFormatEncode() throws Exception {
+    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DurationCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DurationCoderTest.java
index 824de8508b9e0..37782cde7384c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DurationCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DurationCoderTest.java
@@ -18,11 +18,14 @@
 
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 import com.google.common.collect.Lists;
+
 import org.joda.time.Duration;
+import org.joda.time.ReadableDuration;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.util.Arrays;
 import java.util.List;
 
 /** Unit tests for {@link DurationCoder}. */
@@ -33,10 +36,37 @@ public class DurationCoderTest {
   private static final List<Long> TEST_MILLIS =
       Lists.newArrayList(0L, 1L, -1L, -255L, 256L, Long.MIN_VALUE, Long.MAX_VALUE);
 
+  private static final List<ReadableDuration> TEST_VALUES;
+
+  static {
+    TEST_VALUES = Lists.newArrayList();
+    for (long millis : TEST_MILLIS) {
+      TEST_VALUES.add(Duration.millis(millis));
+    }
+  }
+
   @Test
   public void testBasicEncoding() throws Exception {
-    for (long millis : TEST_MILLIS) {
-      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, Duration.millis(millis));
+    for (ReadableDuration value : TEST_VALUES) {
+      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
     }
   }
+
+  /**
+   * Generated data to check that the wire format has not changed. To regenerate, see
+   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
+   */
+  private static final List<String> TEST_ENCODINGS = Arrays.asList(
+      "AA",
+      "AQ",
+      "____________AQ",
+      "gf7_________AQ",
+      "gAI",
+      "gICAgICAgICAAQ",
+      "__________9_");
+
+  @Test
+  public void testWireFormatEncode() throws Exception {
+    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java
index d33867b019092..cd0e2a73b676e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java
@@ -74,4 +74,21 @@ public void testDecodeEncodeEqual() throws Exception {
   public void testEncodingId() throws Exception {
     CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
   }
+
+  /**
+   * Generated data to check that the wire format has not changed. To regenerate, see
+   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
+   */
+  private static final List<String> TEST_ENCODINGS = Arrays.asList(
+      "AAAAGwoZEhcKCFRlc3RLaW5kGgtlbXB0eUVudGl0eQ",
+      "AAAAnQoiEiAKCFRlc3RLaW5kGhR0ZXN0U2ltcGxlUHJvcGVydGllcxISCgx0cnVlUHJvcGVydHkiAggBEhMKDWZhbHNl"
+          + "UHJvcGVydHkiAggAEhoKDnN0cmluZ1Byb3BlcnR5IgiKAQVoZWxsbxIVCg9pbnRlZ2VyUHJvcGVydHkiAhADEh"
+          + "sKDmRvdWJsZVByb3BlcnR5IgkZ8ZvCSgVV-b8",
+      "AAAAVAoeEhwKCFRlc3RLaW5kGhB0ZXN0TmVzdGVkRW50aXR5EjIKDmVudGl0eVByb3BlcnR5IiAyHhIcCg5zdHJpbmdQ"
+          + "cm9wZXJ0eSIKigEHZ29vZGJ5ZQ");
+
+  @Test
+  public void testWireFormatEncode() throws Exception {
+      CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java
index 1c99168d7825c..f9bf5a2a8e647 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java
@@ -18,10 +18,10 @@
 
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.common.collect.Lists;
 import com.google.common.primitives.UnsignedBytes;
 
 import org.joda.time.Instant;
-
 import org.junit.Assert;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -38,19 +38,28 @@ public class InstantCoderTest {
 
   private static final InstantCoder TEST_CODER = InstantCoder.of();
 
-  private final List<Long> timestamps =
+  private static final List<Long> TEST_TIMESTAMPS =
       Arrays.asList(0L, 1L, -1L, -255L, 256L, Long.MIN_VALUE, Long.MAX_VALUE);
 
+  private static final List<Instant> TEST_VALUES;
+
+  static {
+    TEST_VALUES = Lists.newArrayList();
+    for (long timestamp : TEST_TIMESTAMPS) {
+      TEST_VALUES.add(new Instant(timestamp));
+    }
+  }
+
   @Test
   public void testBasicEncoding() throws Exception {
-    for (long timestamp : timestamps) {
-      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, new Instant(timestamp));
+    for (Instant value : TEST_VALUES) {
+      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
     }
   }
 
   @Test
   public void testOrderedEncoding() throws Exception {
-    List<Long> sortedTimestamps = new ArrayList<>(timestamps);
+    List<Long> sortedTimestamps = new ArrayList<>(TEST_TIMESTAMPS);
     Collections.sort(sortedTimestamps);
 
     List<byte[]> encodings = new ArrayList<>(sortedTimestamps.size());
@@ -73,4 +82,22 @@ public void testOrderedEncoding() throws Exception {
   public void testEncodingId() throws Exception {
     CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
   }
+
+  /**
+   * Generated data to check that the wire format has not changed. To regenerate, see
+   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
+   */
+  private static final List<String> TEST_ENCODINGS = Arrays.asList(
+      "gAAAAAAAAAA",
+      "gAAAAAAAAAE",
+      "f_________8",
+      "f________wE",
+      "gAAAAAAAAQA",
+      "AAAAAAAAAAA",
+      "__________8");
+
+  @Test
+  public void testWireFormatEncode() throws Exception {
+    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java
index 9400dc7dafb39..eaec06e163cde 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java
@@ -18,7 +18,9 @@
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
+
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -75,4 +77,19 @@ public void testCoderSerializable() throws Exception {
   public void testEncodingId() throws Exception {
     CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
   }
+
+  /**
+   * Generated data to check that the wire format has not changed. To regenerate, see
+   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
+   */
+  private static final List<String> TEST_ENCODINGS = Arrays.asList(
+      "AAAAAA",
+      "AAAAAQ0",
+      "AAAABAECAwQ",
+      "AAAAAwcGBQ");
+
+  @Test
+  public void testWireFormatEncode() throws Exception {
+    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java
index ee0a5bb8bb4e2..9d33bbc0d575d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java
@@ -26,6 +26,7 @@
 
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.List;
 import java.util.Map;
 
 /**
@@ -75,4 +76,29 @@ public void testEncodingId() throws Exception {
         KvCoder.of(VarIntCoder.of(), VarIntCoder.of()),
         EXPECTED_ENCODING_ID);
   }
+
+  /**
+   * Homogeneously typed test value for ease of use with the wire format test utility.
+   */
+  private static final Coder<KV<String, Integer>> TEST_CODER =
+      KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of());
+
+  private static final List<KV<String, Integer>> TEST_VALUES = Arrays.asList(
+      KV.of("", -1),
+      KV.of("hello", 0),
+      KV.of("goodbye", Integer.MAX_VALUE));
+
+  /**
+   * Generated data to check that the wire format has not changed. To regenerate, see
+   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
+   */
+  private static final List<String> TEST_ENCODINGS = Arrays.asList(
+      "AP____8P",
+      "BWhlbGxvAA",
+      "B2dvb2RieWX_____Bw");
+
+  @Test
+  public void testWireFormatEncode() throws Exception {
+    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java
index c5803c0d12ec4..e0a31e7be0e24 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java
@@ -18,6 +18,7 @@
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
+
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 
 import org.junit.Test;
@@ -83,4 +84,19 @@ public void testCoderSerializable() throws Exception {
   public void testEncodingId() throws Exception {
     CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
   }
+
+  /**
+   * Generated data to check that the wire format has not changed. To regenerate, see
+   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
+   */
+  private static final List<String> TEST_ENCODINGS = Arrays.asList(
+      "AAAAAA",
+      "AAAAASs",
+      "AAAABAECAwQ",
+      "AAAAAwcGBQ");
+
+  @Test
+  public void testWireFormatEncode() throws Exception {
+      CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java
index fabfbfec55431..6be1f3f8cd6d3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java
@@ -31,6 +31,7 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.TreeMap;
 
 /** Unit tests for {@link MapCoder}. */
 @RunWith(JUnit4.class)
@@ -41,7 +42,8 @@ public class MapCoderTest {
 
   private static final List<Map<Integer, String>> TEST_VALUES = Arrays.<Map<Integer, String>>asList(
       Collections.<Integer, String>emptyMap(),
-      new ImmutableMap.Builder<Integer, String>().put(1, "hello").put(-1, "foo").build());
+      new TreeMap<Integer, String>(new ImmutableMap.Builder<Integer, String>()
+          .put(1, "hello").put(-1, "foo").build()));
 
   @Test
   public void testDecodeEncodeContentsInSameOrder() throws Exception {
@@ -74,4 +76,17 @@ public void testGetInstanceComponentsEmpty() {
   public void testEncodingId() throws Exception {
     CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
   }
+
+  /**
+   * Generated data to check that the wire format has not changed. To regenerate, see
+   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
+   */
+  private static final List<String> TEST_ENCODINGS = Arrays.asList(
+      "AAAAAA",
+      "AAAAAv____8PA2ZvbwEFaGVsbG8");
+
+  @Test
+  public void testWireFormatEncode() throws Exception {
+    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/PrintBase64Encodings.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/PrintBase64Encodings.java
new file mode 100644
index 0000000000000..aa6a0977f822d
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/PrintBase64Encodings.java
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.common.base.Joiner;
+import com.google.common.collect.Lists;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import java.util.List;
+
+/**
+ * A command-line utility for printing the base-64 encodings of test values, for generating exact
+ * wire format tests.
+ *
+ * <p> For internal use only.
+ *
+ * <p> Example invocation via maven:
+ * {@code
+ *   mvn test-compile exec:java \
+ *       -Dexec.classpathScope=test \
+ *       -Dexec.mainClass=com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings
+ *       -Dexec.args='com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoderTest.TEST_CODER \
+ *           com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoderTest.TEST_VALUES'
+ * }
+ */
+public class PrintBase64Encodings {
+
+  /**
+   * Gets a field even if it is private, which the test data generally will be.
+   */
+  private static Field getField(Class<?> clazz, String fieldName) throws Exception {
+    for (Field field : clazz.getDeclaredFields()) {
+      if (field.getName().equals(fieldName)) {
+        if (!Modifier.isPublic(field.getModifiers())) {
+          field.setAccessible(true);
+        }
+        return field;
+      }
+    }
+    throw new NoSuchFieldException(clazz.getCanonicalName() + "." + fieldName);
+  }
+
+  private static Object getFullyQualifiedValue(String fullyQualifiedName) throws Exception {
+    int lastDot = fullyQualifiedName.lastIndexOf(".");
+    String className = fullyQualifiedName.substring(0, lastDot);
+    String fieldName = fullyQualifiedName.substring(lastDot + 1);
+
+    Class<?> clazz = Class.forName(className);
+    Field field = getField(clazz, fieldName);
+    return field.get(null);
+  }
+
+  public static void main(String[] argv) throws Exception {
+    @SuppressWarnings("unchecked")
+    Coder<Object> testCoder = (Coder<Object>) getFullyQualifiedValue(argv[0]);
+    @SuppressWarnings("unchecked")
+    List<Object> testValues = (List<Object>) getFullyQualifiedValue(argv[1]);
+
+    List<String> base64Encodings = Lists.newArrayList();
+    for (Object value : testValues) {
+      base64Encodings.add(CoderUtils.encodeToBase64(testCoder, value));
+    }
+    System.out.println(String.format("\"%s\"", Joiner.on("\",\n\"").join(base64Encodings)));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java
index ca50f389627b2..102a2c3b67044 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java
@@ -24,9 +24,9 @@
 
 import java.util.Arrays;
 import java.util.Collections;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
+import java.util.TreeSet;
 
 /**
  * Test case for {@link SetCoder}.
@@ -39,7 +39,7 @@ public class SetCoderTest {
   private static final List<Set<Integer>> TEST_VALUES = Arrays.<Set<Integer>>asList(
       Collections.<Integer>emptySet(),
       Collections.singleton(13),
-      new HashSet<>(Arrays.asList(31, -5, 83)));
+      new TreeSet<>(Arrays.asList(31, -5, 83)));
 
   @Test
   public void testDecodeEncodeContentsEqual() throws Exception {
@@ -55,4 +55,18 @@ public void testDecodeEncodeContentsEqual() throws Exception {
   public void testEncodingId() throws Exception {
     CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
   }
+
+  /**
+   * Generated data to check that the wire format has not changed. To regenerate, see
+   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
+   */
+  private static final List<String> TEST_ENCODINGS = Arrays.asList(
+      "AAAAAA",
+      "AAAAAQ0",
+      "AAAAA_v___8PH1M");
+
+  @Test
+  public void testWireFormatEncode() throws Exception {
+    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringUtf8CoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringUtf8CoderTest.java
index 1a43babf89434..5c3bb28afef92 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringUtf8CoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringUtf8CoderTest.java
@@ -31,6 +31,8 @@
 @RunWith(JUnit4.class)
 public class StringUtf8CoderTest {
 
+  private static final Coder<String> TEST_CODER = StringUtf8Coder.of();
+
   private static final List<String> TEST_VALUES = Arrays.asList(
       "", "a", "13", "hello",
       "a longer string with spaces and all that",
@@ -39,9 +41,26 @@ public class StringUtf8CoderTest {
 
   @Test
   public void testDecodeEncodeEqual() throws Exception {
-    Coder<String> coder = StringUtf8Coder.of();
     for (String value : TEST_VALUES) {
-      CoderProperties.coderDecodeEncodeEqual(coder, value);
+      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
     }
   }
+
+  /**
+   * Generated data to check that the wire format has not changed. To regenerate, see
+   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
+   */
+  private static final List<String> TEST_ENCODINGS = Arrays.asList(
+      "",
+      "YQ",
+      "MTM",
+      "aGVsbG8",
+      "YSBsb25nZXIgc3RyaW5nIHdpdGggc3BhY2VzIGFuZCBhbGwgdGhhdA",
+      "YSBzdHJpbmcgd2l0aCBhIAogbmV3bGluZQ",
+      "44K544K_44Oq44Oz44Kw");
+
+  @Test
+  public void testWireFormatEncode() throws Exception {
+    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java
index 17e1ee5670388..d376928823654 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoderTest.java
@@ -68,4 +68,19 @@ public void testDecodeEncodeEqual() throws Exception {
   public void testEncodingId() throws Exception {
     CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
   }
+
+  /**
+   * Generated data to check that the wire format has not changed. To regenerate, see
+   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
+   */
+  private static final List<String> TEST_ENCODINGS = Arrays.asList(
+      "e30",
+      "eyJhIjoiMSJ9",
+      "eyJiIjozLjE0fQ",
+      "eyJhIjoiMSIsImIiOnRydWUsImMiOiJoaSJ9");
+
+  @Test
+  public void testWireFormatEncode() throws Exception {
+    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java
index 3fedb8d0f66b5..c61b9c2c0d234 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java
@@ -52,4 +52,25 @@ public void testDecodeEncodeEqual() throws Exception {
   public void testEncodingId() throws Exception {
     CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
   }
+
+  /**
+   * Generated data to check that the wire format has not changed. To regenerate, see
+   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
+   */
+  private static final List<String> TEST_ENCODINGS = Arrays.asList(
+      "LTEx",
+      "LTM",
+      "LTE",
+      "MA",
+      "MQ",
+      "NQ",
+      "MTM",
+      "Mjk",
+      "MjE0NzQ4MzY0Nw",
+      "LTIxNDc0ODM2NDg");
+
+  @Test
+  public void testWireFormatEncode() throws Exception {
+    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java
index 4eb721658bcce..fb507daa25f7a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java
@@ -52,5 +52,26 @@ public void testDecodeEncodeEqual() throws Exception {
   public void testEncodingId() throws Exception {
     CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
   }
+
+  /**
+   * Generated data to check that the wire format has not changed. To regenerate, see
+   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
+   */
+  private static final List<String> TEST_ENCODINGS = Arrays.asList(
+      "9f___w8",
+      "_f___w8",
+      "_____w8",
+      "AA",
+      "AQ",
+      "BQ",
+      "DQ",
+      "HQ",
+      "_____wc",
+      "gICAgAg");
+
+  @Test
+  public void testWireFormatEncode() throws Exception {
+    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
+  }
 }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java
index e840849533fdd..8acd76f149d21 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java
@@ -54,4 +54,27 @@ public void testDecodeEncodeEqual() throws Exception {
   public void testEncodingId() throws Exception {
     CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
   }
+
+  /**
+   * Generated data to check that the wire format has not changed. To regenerate, see
+   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
+   */
+  private static final List<String> TEST_ENCODINGS = Arrays.asList(
+      "9f__________AQ",
+      "_f__________AQ",
+      "____________AQ",
+      "AA",
+      "AQ",
+      "BQ",
+      "DQ",
+      "HQ",
+      "goGAgAg",
+      "4_____f_____AQ",
+      "__________9_",
+      "gICAgICAgICAAQ");
+
+  @Test
+  public void testWireFormatEncode() throws Exception {
+    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
+  }
 }

From 502808389bbb61895e3f0a2a76cef3d371b69022 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 24 Jul 2015 10:41:11 -0700
Subject: [PATCH 0807/1541] Fix a typo in comment

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99041367
---
 .../cloud/dataflow/sdk/runners/DataflowPipelineRunner.java      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index f945d6891c0af..b9d24c26502ec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -569,7 +569,7 @@ public Integer apply(ValueWithRecordId<T> value) {
                       return Arrays.hashCode(value.getId()) % NUM_RESHARD_KEYS;
                     }
                   }))
-          // Reshuffle will dedup based on ids in ValueWithRecordId by sing the data through
+          // Reshuffle will dedup based on ids in ValueWithRecordId by passing the data through
           // WindmillSink.
           .apply(Reshuffle.<Integer, ValueWithRecordId<T>>of())
           .apply(ParDo.named("StripIds").of(

From b43c798e89f62e841445b76e8f6468095f5fca57 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 24 Jul 2015 15:37:25 -0700
Subject: [PATCH 0808/1541] Improve types in ShuffleReaderFactoryTest

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99068790
---
 .../dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
index 6278cd5ab6697..9fcda2fe0584f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
@@ -50,7 +50,7 @@
 public class ShuffleReaderFactoryTest {
   <T extends Reader> T runTestCreateShuffleReader(byte[] shuffleReaderConfig,
       @Nullable String start, @Nullable String end, CloudObject encoding,
-      BatchModeExecutionContext context, Class<?> shuffleReaderClass, String shuffleSourceAlias)
+      BatchModeExecutionContext context, Class<T> shuffleReaderClass, String shuffleSourceAlias)
       throws Exception {
     CloudObject spec = CloudObject.forClassName(shuffleSourceAlias);
     addString(spec, "shuffle_reader_config", encodeBase64String(shuffleReaderConfig));

From ce315e2254ae833ed26d1bad635005db46af6d18 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 7 Jan 2015 17:04:51 -0800
Subject: [PATCH 0809/1541] Drop timer firings for windows that have expired

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99075632
---
 .../cloud/dataflow/sdk/util/ReduceFnRunner.java    | 14 +++++++++++---
 .../dataflow/sdk/util/TriggerExecutorTest.java     |  4 ++++
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index 2ebd17cd149a1..4573da410626c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -260,18 +260,25 @@ public void onTimer(TimerData timer) {
     W window = windowNamespace.getWindow();
 
     if (TimeDomain.EVENT_TIME == timer.getDomain() && isCleanupTime(window, timer.getTimestamp())) {
+      // This may be a redundant cleanup (if we had lingering watermark timers after the cleanup
+      // time but it shouldn't hurt anything for us to process those again).
       try {
         doCleanup(windowNamespace.getWindow());
       } catch (Exception e) {
         LOG.error("Exception while garbage collecting window {}", windowNamespace.getWindow(), e);
       }
     } else {
-      ReduceFn<K, InputT, OutputT, W>.Context context =
-          contextFactory.base(windowNamespace.getWindow());
+      // Skip timers for expired windows.
+      if (isCleanupTime(window, timerInternals.currentWatermarkTime())) {
+        return;
+      }
+
+      ReduceFn<K, InputT, OutputT, W>.Context context = contextFactory.base(window);
 
       triggerRunner.prefetchForTimer(context.state());
+
+      // Skip timers for windows that were closed by triggers, but haven't expired yet.
       if (triggerRunner.isClosed(context.state())) {
-        LOG.info("Skipping timer for closed window " + context.window());
         return;
       }
 
@@ -420,6 +427,7 @@ private Instant cleanupTime(W window) {
     return window.maxTimestamp().plus(windowingStrategy.getAllowedLateness());
   }
 
+  /** Return true if {@code timestamp} is past the cleanup time for {@code window}. */
   private boolean isCleanupTime(W window, Instant timestamp) {
     return !timestamp.isBefore(cleanupTime(window));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index c6e989e969c83..46f915e36cce5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -223,6 +223,10 @@ public void testWatermarkHoldAndLateData() throws Exception {
     injectElement(tester, 2, TriggerResult.FIRE);
     assertEquals(null, tester.getWatermarkHold());
 
+    // Late timers are ignored
+    tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
+        new Instant(12), TimeDomain.EVENT_TIME);
+
     assertEquals(1, tester.getElementsDroppedDueToLateness());
     assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
 

From d94fe57aac677002ee7299d6397360c24aaec638 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Sat, 11 Jul 2015 17:32:06 -0700
Subject: [PATCH 0810/1541] Distinguish system StateTags by a one-character
 prefix

Use these distinguished StateTags in system level functions.

Finalize (and shorten) state tag names used by system level functions.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99077613
---
 .../sdk/transforms/windowing/AfterPane.java   |   3 +-
 .../windowing/AfterProcessingTime.java        |   7 -
 .../AfterSynchronizedProcessingTime.java      |   9 +-
 .../transforms/windowing/AfterWatermark.java  |   7 -
 .../sdk/transforms/windowing/TimeTrigger.java |   9 ++
 .../sdk/util/MergingActiveWindowSet.java      |   5 +-
 .../dataflow/sdk/util/PaneInfoTracker.java    |   2 +-
 .../util/StreamingSideInputDoFnRunner.java    |   7 +-
 .../dataflow/sdk/util/SystemReduceFn.java     |   9 +-
 .../dataflow/sdk/util/TriggerRunner.java      |   2 +-
 .../dataflow/sdk/util/WatermarkHold.java      |   2 +-
 .../dataflow/sdk/util/state/StateTags.java    | 153 ++++++++++++------
 .../worker/StreamingDataflowWorkerTest.java   |  38 +----
 .../state/WindmillStateInternalsTest.java     |  20 ++-
 14 files changed, 143 insertions(+), 130 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index 7cb83bb1843d8..6bf61cdc01a50 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -44,7 +44,8 @@ public class AfterPane<W extends BoundedWindow> extends OnceTrigger<W>{
   private static final long serialVersionUID = 0L;
 
   private static final StateTag<CombiningValueState<Long, Long>> ELEMENTS_IN_PANE_TAG =
-      StateTags.combiningValue("count", VarLongCoder.of(), new Sum.SumLongFn());
+      StateTags.makeSystemTagInternal(StateTags.combiningValue(
+          "count", VarLongCoder.of(), new Sum.SumLongFn()));
 
   private final int countElems;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 4d146fefcda00..c02a701771945 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -17,15 +17,11 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.coders.InstantCoder;
-import com.google.cloud.dataflow.sdk.transforms.Min;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
 import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.cloud.dataflow.sdk.util.state.StateTags;
 
 import org.joda.time.Instant;
 
@@ -44,9 +40,6 @@ public class AfterProcessingTime<W extends BoundedWindow>
 
   private static final long serialVersionUID = 0L;
 
-  private static final StateTag<CombiningValueState<Instant, Instant>> DELAYED_UNTIL_TAG =
-      StateTags.combiningValue("delayed", InstantCoder.of(), Min.MinFn.<Instant>naturalOrder());
-
   private AfterProcessingTime(List<SerializableFunction<Instant, Instant>> transforms) {
     super(transforms);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
index 1152bb6216dd2..91182d6a810b9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
@@ -15,15 +15,13 @@
  */
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import com.google.cloud.dataflow.sdk.coders.InstantCoder;
-import com.google.cloud.dataflow.sdk.transforms.Min;
+import static com.google.cloud.dataflow.sdk.transforms.windowing.TimeTrigger.DELAYED_UNTIL_TAG;
+
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
 import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.common.base.Objects;
 
 import org.joda.time.Instant;
@@ -34,9 +32,6 @@ class AfterSynchronizedProcessingTime<W extends BoundedWindow> extends OnceTrigg
 
   private static final long serialVersionUID = 0L;
 
-  private static final StateTag<CombiningValueState<Instant, Instant>> DELAYED_UNTIL_TAG =
-      StateTags.combiningValue("delayed", InstantCoder.of(), Min.MinFn.<Instant>naturalOrder());
-
   public AfterSynchronizedProcessingTime() {
     super(null);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index d79e7a1c9c149..3fee0a61a378e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -17,15 +17,11 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.coders.InstantCoder;
-import com.google.cloud.dataflow.sdk.transforms.Min;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
 import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.cloud.dataflow.sdk.util.state.StateTags;
 
 import org.joda.time.Instant;
 
@@ -88,9 +84,6 @@ private static class FromFirstElementInPane<W extends BoundedWindow> extends Aft
 
     private static final long serialVersionUID = 0L;
 
-    private static final StateTag<CombiningValueState<Instant, Instant>> DELAYED_UNTIL_TAG =
-        StateTags.combiningValue("delayed", InstantCoder.of(), Min.MinFn.<Instant>naturalOrder());
-
     private FromFirstElementInPane(
         List<SerializableFunction<Instant, Instant>> delayFunction) {
       super(delayFunction);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
index 996a3d922bb18..41be9fb2dd3ff 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
@@ -17,8 +17,13 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.coders.InstantCoder;
+import com.google.cloud.dataflow.sdk.transforms.Min;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
+import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.common.collect.ImmutableList;
 
 import org.joda.time.Duration;
@@ -37,6 +42,10 @@
 public abstract class TimeTrigger<W extends BoundedWindow, T extends TimeTrigger<W, T>>
     extends OnceTrigger<W> {
 
+  protected static final StateTag<CombiningValueState<Instant, Instant>> DELAYED_UNTIL_TAG =
+      StateTags.makeSystemTagInternal(StateTags.combiningValue(
+          "delayed", InstantCoder.of(), Min.MinFn.<Instant>naturalOrder()));
+
   private static final long serialVersionUID = 0L;
 
   protected static final List<SerializableFunction<Instant, Instant>> IDENTITY;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
index 0b43ee596ca96..0ccb282ea0057 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
@@ -63,8 +63,9 @@ public class MergingActiveWindowSet<W extends BoundedWindow>
   public MergingActiveWindowSet(WindowFn<Object, W> windowFn, StateInternals state) {
     this.windowFn = windowFn;
 
-    StateTag<ValueState<Map<W, Set<W>>>> mergeTreeAddr = StateTags.value(
-        "mergeTree", MapCoder.of(windowFn.windowCoder(), SetCoder.of(windowFn.windowCoder())));
+    StateTag<ValueState<Map<W, Set<W>>>> mergeTreeAddr = StateTags.makeSystemTagInternal(
+        StateTags.value("tree",
+            MapCoder.of(windowFn.windowCoder(), SetCoder.of(windowFn.windowCoder()))));
     this.mergeTreeValue = state.state(StateNamespaces.global(), mergeTreeAddr);
     this.mergeTree = emptyIfNull(mergeTreeValue.get().read());
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
index 9d51813c5a050..f8e19f9223bda 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
@@ -40,7 +40,7 @@ public PaneInfoTracker(TimerInternals timerInternals) {
   }
 
   @VisibleForTesting static final StateTag<ValueState<PaneInfo>> PANE_INFO_TAG =
-      StateTags.value("__pane_info", PaneInfoCoder.INSTANCE);
+      StateTags.makeSystemTagInternal(StateTags.value("pane", PaneInfoCoder.INSTANCE));
 
   public void clear(StateContext state) {
     state.access(PANE_INFO_TAG).clear();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
index ebde9d42f2046..2cb81cdbb28d8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
@@ -99,9 +99,10 @@ public StreamingSideInputDoFnRunner(
         (StreamingModeExecutionContext) stepContext.getExecutionContext();
 
     this.blockedMapAddr = blockedMapAddr(windowFn);
-    this.elementsAddr = StateTags.bag("elements",
-        WindowedValue.getFullCoder(doFnInfo.getInputCoder(), windowFn.windowCoder()));
-    this.watermarkHoldingAddr = StateTags.watermarkStateInternal("elementsWatermark");
+    this.elementsAddr = StateTags.makeSystemTagInternal(StateTags.bag("elem",
+        WindowedValue.getFullCoder(doFnInfo.getInputCoder(), windowFn.windowCoder())));
+    this.watermarkHoldingAddr =
+        StateTags.makeSystemTagInternal(StateTags.watermarkStateInternal("hold"));
 
     this.blockedMap = stepContext.stateInternals().state(StateNamespaces.global(), blockedMapAddr)
         .get().read();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
index e349f84281f96..70f55eda85bcd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
@@ -42,7 +42,7 @@ class SystemReduceFn<K, InputT, OutputT, W extends BoundedWindow>
     extends ReduceFn<K, InputT, OutputT, W> {
 
   private static final long serialVersionUID = 0L;
-  private static final String BUFFER_NAME = "__buffer";
+  private static final String BUFFER_NAME = "buf";
 
   /**
    * Factory that produces {@link SystemReduceFn} instances for specific keys.
@@ -62,7 +62,8 @@ public interface Factory<K, InputT, OutputT, W extends BoundedWindow> extends Se
    */
   public static <K, T, W extends BoundedWindow> Factory<K, T, Iterable<T>, W> buffering(
       final Coder<T> inputCoder) {
-    final StateTag<BagState<T>> bufferTag = StateTags.bag(BUFFER_NAME, inputCoder);
+    final StateTag<BagState<T>> bufferTag =
+        StateTags.makeSystemTagInternal(StateTags.bag(BUFFER_NAME, inputCoder));
     return new Factory<K, T, Iterable<T>, W>() {
 
       private static final long serialVersionUID = 0L;
@@ -89,8 +90,8 @@ <K, InputT, OutputT, W extends BoundedWindow> Factory<K, InputT, OutputT, W> com
       @Override
       public ReduceFn<K, InputT, OutputT, W> create(K key) {
         StateTag<CombiningValueState<InputT, OutputT>> bufferTag =
-            StateTags.<InputT, OutputT>combiningValue(
-                BUFFER_NAME, inputCoder, combineFn.forKey(key, keyCoder));
+            StateTags.makeSystemTagInternal(StateTags.<InputT, OutputT>combiningValue(
+                BUFFER_NAME, inputCoder, combineFn.forKey(key, keyCoder)));
         return new SystemReduceFn<K, InputT, OutputT, W>(bufferTag);
       }
     };
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
index b59a18c8daba6..895d300275288 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
@@ -93,7 +93,7 @@ public void persistFinishedSet(ReduceFn.StateContext state) {
   }
 
   @VisibleForTesting static final StateTag<ValueState<BitSet>> FINISHED_BITS_TAG =
-      StateTags.value("__finished_set", BitSetCoder.of());
+      StateTags.makeSystemTagInternal(StateTags.value("closed", BitSetCoder.of()));
 
   private final ExecutableTrigger<W> rootTrigger;
   private final TriggerContextFactory<W> contextFactory;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
index e2596c455fc09..bff2d99f93021 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
@@ -38,7 +38,7 @@ public class WatermarkHold<W extends BoundedWindow> implements Serializable {
   private static final long serialVersionUID = 0L;
 
   @VisibleForTesting static final StateTag<WatermarkStateInternal> HOLD_TAG =
-      StateTags.watermarkStateInternal("watermark_hold");
+      StateTags.makeSystemTagInternal(StateTags.watermarkStateInternal("hold"));
 
   private final WindowingStrategy<?, W> windowingStrategy;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
index 62cd515158ae3..54b0d1a9f656e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
@@ -31,37 +31,24 @@
 @Experimental(Kind.STATE)
 public class StateTags {
 
+  private enum StateKind {
+    SYSTEM('s'),
+    USER('u');
+
+    private char prefix;
+
+    StateKind(char prefix) {
+      this.prefix = prefix;
+    }
+  }
+
   private StateTags() { }
 
   /**
    * Create a simple state tag for values of type {@code T}.
    */
   public static <T> StateTag<ValueState<T>> value(String id, Coder<T> valueCoder) {
-    return new ValueStateTag<>(id, valueCoder);
-  }
-
-  private abstract static class StateTagBase<StateT extends State> implements StateTag<StateT> {
-
-    private static final long serialVersionUID = 0;
-
-    private final String id;
-
-    protected StateTagBase(String id) {
-      this.id = id;
-    }
-
-    /**
-     * Returns the identifier for this state cell.
-     */
-    @Override
-    public String getId() {
-      return id;
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(getClass()).add("id", id).toString();
-    }
+    return new ValueStateTag<>(StateKind.USER, id, valueCoder);
   }
 
   /**
@@ -73,11 +60,27 @@ public String toString() {
     return combiningValueInternal(id, inputCoder, combineFn);
   }
 
+  // TODO: This should use the CoderRegistry from the running pipelie to ensure that it picks up
+  // any custom Coders, but that CoderRegistry isn't currently available on the worker.
+  private static final CoderRegistry REGISTRY = new CoderRegistry();
+  static {
+    REGISTRY.registerStandardCoders();
+  }
+
   private static <InputT, AccumT, OutputT> StateTag<CombiningValueState<InputT, OutputT>>
   combiningValueInternal(
       String id, Coder<InputT> inputCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
+    Coder<AccumT> accumCoder;
+    try {
+      accumCoder = combineFn.getAccumulatorCoder(REGISTRY, inputCoder);
+    } catch (CannotProvideCoderException e) {
+      throw new RuntimeException(
+          "Unable to determine accumulator coder for combineFn: " + combineFn.getClass(), e);
+    }
+
     StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> internal =
-        new CombiningValueStateTag<InputT, AccumT, OutputT>(id, inputCoder, combineFn);
+        new CombiningValueStateTag<InputT, AccumT, OutputT>(
+            StateKind.USER, id, accumCoder, combineFn);
 
     // This is a safe cast, since StateTag only supports reading, and
     // CombiningValue<InputT, OutputT> is a super-interface of
@@ -92,14 +95,54 @@ public String toString() {
    * occasionally retrieving all the values that have been added.
    */
   public static <T> StateTag<BagState<T>> bag(String id, Coder<T> elemCoder) {
-    return new BagStateTag<T>(id, elemCoder);
+    return new BagStateTag<T>(StateKind.USER, id, elemCoder);
   }
 
   /**
    * Create a state tag for holding the watermark.
    */
   public static <T> StateTag<WatermarkStateInternal> watermarkStateInternal(String id) {
-    return new WatermarkStateTagInternal(id);
+    return new WatermarkStateTagInternal(StateKind.USER, id);
+  }
+
+  /**
+   * Convert an arbitrary {@code StateTag} to a system-internal tag that is guaranteed not to
+   * collide with any user tags.
+   */
+  public static <StateT extends State> StateTag<StateT> makeSystemTagInternal(
+      StateTag<StateT> tag) {
+    if (!(tag instanceof StateTagBase)) {
+      throw new IllegalArgumentException("Unexpected StateTag " + tag);
+    }
+    return ((StateTagBase<StateT>) tag).asKind(StateKind.SYSTEM);
+  }
+
+  private abstract static class StateTagBase<StateT extends State> implements StateTag<StateT> {
+
+    private static final long serialVersionUID = 0;
+
+    private final StateKind kind;
+    protected final String id;
+
+    protected StateTagBase(StateKind kind, String id) {
+      this.kind = kind;
+      this.id = id;
+    }
+
+    /**
+     * Returns the identifier for this state cell.
+     */
+    @Override
+    public String getId() {
+      return kind.prefix + id;
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(getClass()).add("id", id).toString();
+    }
+
+    protected abstract StateTag<StateT> asKind(StateKind kind);
   }
 
   /**
@@ -113,8 +156,8 @@ private static class ValueStateTag<T> extends StateTagBase<ValueState<T>> {
 
     private final Coder<T> coder;
 
-    private ValueStateTag(String id, Coder<T> coder) {
-      super(id);
+    private ValueStateTag(StateKind kind, String id, Coder<T> coder) {
+      super(kind, id);
       this.coder = coder;
     }
 
@@ -142,6 +185,11 @@ public boolean equals(Object obj) {
     public int hashCode() {
       return Objects.hash(getClass(), getId(), coder);
     }
+
+    @Override
+    protected StateTag<ValueState<T>> asKind(StateKind kind) {
+      return new ValueStateTag<T>(kind, id, coder);
+    }
   }
 
   /**
@@ -156,28 +204,15 @@ private static class CombiningValueStateTag<InputT, AccumT, OutputT>
 
     private static final long serialVersionUID = 0;
 
-    // TODO: This should use the CoderRegistry from the running pipelie to ensure that it picks up
-    // any custom Coders, but that CoderRegistry isn't currently available on the worker.
-    private static final CoderRegistry registry = new CoderRegistry();
-    static {
-      registry.registerStandardCoders();
-    }
-
     private final Coder<AccumT> accumCoder;
     private final CombineFn<InputT, AccumT, OutputT> combineFn;
 
     private CombiningValueStateTag(
-        String id, Coder<InputT> inputCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
-      super(id);
-
-      try {
-        this.accumCoder = combineFn.getAccumulatorCoder(registry, inputCoder);
-      } catch (CannotProvideCoderException e) {
-        throw new RuntimeException(
-            "Unable to determine accumulator coder for combineFn: " + combineFn.getClass(), e);
-      }
-
+        StateKind kind, String id,
+        Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
+      super(kind, id);
       this.combineFn = combineFn;
+      this.accumCoder = accumCoder;
     }
 
     @Override
@@ -204,6 +239,12 @@ public boolean equals(Object obj) {
     public int hashCode() {
       return Objects.hash(getClass(), getId(), accumCoder);
     }
+
+    @Override
+    protected StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> asKind(
+        StateKind kind) {
+      return new CombiningValueStateTag<>(kind, id, accumCoder, combineFn);
+    }
   }
 
   /**
@@ -218,8 +259,8 @@ private static class BagStateTag<T> extends StateTagBase<BagState<T>> {
 
     private final Coder<T> elemCoder;
 
-    private BagStateTag(String id, Coder<T> elemCoder) {
-      super(id);
+    private BagStateTag(StateKind kind, String id, Coder<T> elemCoder) {
+      super(kind, id);
       this.elemCoder = elemCoder;
     }
 
@@ -247,14 +288,19 @@ public boolean equals(Object obj) {
     public int hashCode() {
       return Objects.hash(getClass(), getId(), elemCoder);
     }
+
+    @Override
+    protected StateTag<BagState<T>> asKind(StateKind kind) {
+      return new BagStateTag<>(kind, id, elemCoder);
+    }
   }
 
   private static class WatermarkStateTagInternal extends StateTagBase<WatermarkStateInternal> {
 
     private static final long serialVersionUID = 0;
 
-    private WatermarkStateTagInternal(String id) {
-      super(id);
+    private WatermarkStateTagInternal(StateKind kind, String id) {
+      super(kind, id);
     }
 
     @Override
@@ -280,5 +326,10 @@ public boolean equals(Object obj) {
     public int hashCode() {
       return Objects.hash(getClass(), getId());
     }
+
+    @Override
+    protected StateTag<WatermarkStateInternal> asKind(StateKind kind) {
+      return new WatermarkStateTagInternal(kind, id);
+    }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index d30d6900832a4..7c1e8b589174b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -58,7 +58,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
-import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.BoundedQueueExecutor;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
@@ -238,12 +237,6 @@ private MapTask defaultMapTask(List<ParallelInstruction> instructions) {
         .setInstructions(instructions);
   }
 
-  private Windmill.GetWorkResponse buildTimerInput(String input) throws Exception {
-    Windmill.GetWorkResponse.Builder builder = Windmill.GetWorkResponse.newBuilder();
-    TextFormat.merge(input, builder);
-    return builder.build();
-  }
-
   private Windmill.GetWorkResponse buildInput(String input, byte[] metadata) throws Exception {
     Windmill.GetWorkResponse.Builder builder = Windmill.GetWorkResponse.newBuilder();
     TextFormat.merge(input, builder);
@@ -570,29 +563,6 @@ public void testExceptions() throws Exception {
     exception = server.getException();
   }
 
-  /**
-   * An {@link AssignWindowsDoFn} that does not actually assign windows to values,
-   * but extracts their timestamps, which are output in {@code KV} pairs on a fixed key.
-   *
-   * <p>The key and value of the input are both discarded.
-   */
-  private static class TestTimerFn
-      extends AssignWindowsDoFn<KV<String, String>, BoundedWindow> {
-    private static final long serialVersionUID = 0;
-
-    private final String key;
-
-    public TestTimerFn(String key) {
-      super(null);
-      this.key = key;
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      c.output(KV.of(key, Long.toString(c.timestamp().getMillis())));
-    }
-  }
-
   @Test
   public void testAssignWindows() throws Exception {
     Duration gapDuration = Duration.standardSeconds(1);
@@ -712,11 +682,11 @@ public void testMergeWindows() throws Exception {
     // These tags and data are opaque strings and this is a change detector test.
     String window = "/gAAAAAAAA-joBw/";
     ByteString timerTag = ByteString.copyFromUtf8(window + "+0:999"); // GC timer just has window
-    ByteString bufferTag = ByteString.copyFromUtf8(window + "+__buffer");
-    ByteString finishedTag = ByteString.copyFromUtf8(window + "+__finished_set");
-    ByteString paneInfoTag = ByteString.copyFromUtf8(window + "+__pane_info");
+    ByteString bufferTag = ByteString.copyFromUtf8(window + "+sbuf");
+    ByteString finishedTag = ByteString.copyFromUtf8(window + "+sclosed");
+    ByteString paneInfoTag = ByteString.copyFromUtf8(window + "+spane");
     ByteString watermarkHoldTag =
-        ByteString.copyFromUtf8(window + "+watermark_hold");
+        ByteString.copyFromUtf8(window + "+shold");
     String stateFamily = "MergeWindows";
     ByteString bufferData = ByteString.copyFromUtf8("\000data0");
     ByteString outputData = ByteString.copyFromUtf8("\000\000\000\001\005data0");
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
index ac6cbe64f81ed..9e5a5d79c514e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
@@ -69,7 +69,7 @@ private ByteString key(StateNamespace namespace, String addrId) {
   }
 
   private ByteString key(String prefix, StateNamespace namespace, String addrId) {
-    return ByteString.copyFromUtf8(prefix + namespace.stringKey() + "+" + addrId);
+    return ByteString.copyFromUtf8(prefix + namespace.stringKey() + "+u" + addrId);
   }
 
   @Before
@@ -270,7 +270,7 @@ public void testCombiningAddBeforeRead() throws Exception {
     CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
 
     SettableFuture<Iterable<int[]>> future = SettableFuture.create();
-    when(mockReader.listFuture(key(NAMESPACE, COMBINING_ADDR.getId()), STATE_FAMILY, accumCoder))
+    when(mockReader.listFuture(key(NAMESPACE, "combining"), STATE_FAMILY, accumCoder))
         .thenReturn(future);
 
     StateContents<Integer> result = value.get();
@@ -310,11 +310,10 @@ public void testCombiningIsEmpty() throws Exception {
     CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
 
     SettableFuture<Iterable<int[]>> future = SettableFuture.create();
-    when(mockReader.listFuture(key(NAMESPACE, COMBINING_ADDR.getId()), STATE_FAMILY, accumCoder))
+    when(mockReader.listFuture(key(NAMESPACE, "combining"), STATE_FAMILY, accumCoder))
         .thenReturn(future);
     StateContents<Boolean> result = value.isEmpty();
-    Mockito.verify(mockReader)
-        .listFuture(key(NAMESPACE, COMBINING_ADDR.getId()), STATE_FAMILY, accumCoder);
+    Mockito.verify(mockReader).listFuture(key(NAMESPACE, "combining"), STATE_FAMILY, accumCoder);
 
     waitAndSet(future, Arrays.asList(new int[]{29}), 200);
     assertThat(result.read(), Matchers.is(false));
@@ -327,7 +326,7 @@ public void testCombiningIsEmptyAfterClear() throws Exception {
     value.clear();
     StateContents<Boolean> result = value.isEmpty();
     Mockito.verify(mockReader, never())
-        .listFuture(key(NAMESPACE, COMBINING_ADDR.getId()), STATE_FAMILY, accumCoder);
+        .listFuture(key(NAMESPACE, "combining"), STATE_FAMILY, accumCoder);
     assertThat(result.read(), Matchers.is(true));
 
     value.add(87);
@@ -348,7 +347,7 @@ public void testCombiningAddPersist() throws Exception {
     assertEquals(1, commitBuilder.getListUpdatesCount());
 
     TagList listUpdates = commitBuilder.getListUpdates(0);
-    assertEquals(key(NAMESPACE, COMBINING_ADDR.getId()), listUpdates.getTag());
+    assertEquals(key(NAMESPACE, "combining"), listUpdates.getTag());
     assertEquals(1, listUpdates.getValuesCount());
     assertEquals(11,
         CoderUtils.decodeFromByteArray(accumCoder,
@@ -374,20 +373,19 @@ public void testCombiningClearPersist() throws Exception {
     assertEquals(2, commitBuilder.getListUpdatesCount());
 
     TagList listClear = commitBuilder.getListUpdates(0);
-    assertEquals(key(NAMESPACE, COMBINING_ADDR.getId()), listClear.getTag());
+    assertEquals(key(NAMESPACE, "combining"), listClear.getTag());
     assertEquals(Long.MAX_VALUE, listClear.getEndTimestamp());
     assertEquals(0, listClear.getValuesCount());
 
     TagList listUpdates = commitBuilder.getListUpdates(1);
-    assertEquals(key(NAMESPACE, COMBINING_ADDR.getId()), listUpdates.getTag());
+    assertEquals(key(NAMESPACE, "combining"), listUpdates.getTag());
     assertEquals(1, listUpdates.getValuesCount());
     assertEquals(11,
         CoderUtils.decodeFromByteArray(accumCoder,
             listUpdates.getValues(0).getData().substring(1).toByteArray())[0]);
 
     // Blind adds should not need to read the future.
-    Mockito.verify(mockReader)
-        .listFuture(key(NAMESPACE, COMBINING_ADDR.getId()), STATE_FAMILY, accumCoder);
+    Mockito.verify(mockReader).listFuture(key(NAMESPACE, "combining"), STATE_FAMILY, accumCoder);
     Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }

From c2c01bad100a1c6348101f434a3fe140be1c593c Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 24 Jul 2015 17:53:18 -0700
Subject: [PATCH 0811/1541] Separate ExecutionContext interface from
 BaseExecutionContext

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99078889
---
 .../sdk/runners/worker/ReaderFactory.java     |   6 +-
 .../sdk/util/BaseExecutionContext.java        | 141 ++++++++++++++++++
 .../sdk/util/BatchModeExecutionContext.java   |   6 +-
 .../sdk/util/DirectModeExecutionContext.java  |   4 +-
 .../dataflow/sdk/util/ExecutionContext.java   |  94 +++++-------
 .../util/StreamingModeExecutionContext.java   |   4 +-
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  |   2 +-
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |   2 +-
 .../StreamingModeExecutionContextTest.java    |   3 +-
 9 files changed, 194 insertions(+), 68 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
index 9e0c780b86aeb..dd3272213299a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
@@ -87,8 +87,10 @@ private ReaderFactory() {}
    * @throws Exception if the source could not be decoded and
    * constructed
    */
-  public static <T> Reader<T> create(@Nullable PipelineOptions options, Source cloudSource,
-      @Nullable ExecutionContext executionContext) throws Exception {
+  public static <T> Reader<T> create(
+      @Nullable PipelineOptions options, Source cloudSource,
+      @Nullable ExecutionContext executionContext)
+          throws Exception {
     cloudSource = CloudSourceUtils.flattenBaseSpecs(cloudSource);
 
     CloudObject object = CloudObject.fromSpec(cloudSource.getSpec());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java
new file mode 100644
index 0000000000000..78ed9b7790041
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.state.StateInternals;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Base class for implementations of {@link ExecutionContext}.
+ *
+ * <p> A concrete subclass should implement {@link #createStepContext} to create the appropriate
+ * {@link ExecutionContext.StepContext} implementation. Any {@code StepContext} created will
+ * be cached for the lifetime of this {@link ExecutionContext}.
+ */
+public abstract class BaseExecutionContext implements ExecutionContext {
+
+  private Map<String, ExecutionContext.StepContext> cachedStepContexts = new HashMap<>();
+
+  /**
+   * Implementations should override this to create the specific type
+   * of {@link StepContext} they need.
+   */
+  protected abstract ExecutionContext.StepContext createStepContext(
+      String stepName, String transformName);
+
+
+  /**
+   * Returns the {@link StepContext} associated with the given step.
+   */
+  @Override
+  public ExecutionContext.StepContext getStepContext(String stepName, String transformName) {
+    ExecutionContext.StepContext context = cachedStepContexts.get(stepName);
+    if (context == null) {
+      context = createStepContext(stepName, transformName);
+      cachedStepContexts.put(stepName, context);
+    }
+    return context;
+  }
+
+  /**
+   * Returns a collection view of all of the {@link StepContext}s.
+   */
+  @Override
+  public Collection<ExecutionContext.StepContext> getAllStepContexts() {
+    return cachedStepContexts.values();
+  }
+
+  /**
+   * Hook for subclasses to implement that will be called whenever
+   * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#output}
+   * is called.
+   */
+  @Override
+  public void noteOutput(WindowedValue<?> output) {}
+
+  /**
+   * Hook for subclasses to implement that will be called whenever
+   * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#sideOutput}
+   * is called.
+   */
+  @Override
+  public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output) {}
+
+  /**
+   * Base class for implementations of {@link ExecutionContext.StepContext}.
+   *
+   * <p> To complete a concrete subclass, implement {@link #timerInternals} and
+   * {@link #stateInternals}.
+   */
+  public abstract static class StepContext implements ExecutionContext.StepContext {
+    private final ExecutionContext executionContext;
+    private final String stepName;
+    private final String transformName;
+
+    public StepContext(ExecutionContext executionContext, String stepName, String transformName) {
+      this.executionContext = executionContext;
+      this.stepName = stepName;
+      this.transformName = transformName;
+    }
+
+    @Override
+    public String getStepName() {
+      return stepName;
+    }
+
+    @Override
+    public String getTransformName() {
+      return transformName;
+    }
+
+    @Override
+    public ExecutionContext getExecutionContext() {
+      return executionContext;
+    }
+
+    @Override
+    public void noteOutput(WindowedValue<?> output) {
+      executionContext.noteOutput(output);
+    }
+
+    @Override
+    public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output) {
+      executionContext.noteSideOutput(tag, output);
+    }
+
+    @Override
+    public <T, W extends BoundedWindow> void writePCollectionViewData(
+        TupleTag<?> tag,
+        Iterable<WindowedValue<T>> data, Coder<Iterable<WindowedValue<T>>> dataCoder,
+        W window, Coder<W> windowCoder) throws IOException {
+      throw new UnsupportedOperationException("Not implemented.");
+    }
+
+    @Override
+    public abstract StateInternals stateInternals();
+
+    @Override
+    public abstract TimerInternals timerInternals();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
index 4e691e2983672..da4d3ae5b6350 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
@@ -24,7 +24,7 @@
 /**
  * {@link ExecutionContext} for use in batch mode.
  */
-public class BatchModeExecutionContext extends ExecutionContext {
+public class BatchModeExecutionContext extends BaseExecutionContext {
   private Object key;
 
   /**
@@ -79,12 +79,12 @@ public Object getKey() {
   /**
    * {@link ExecutionContext.StepContext} used in batch mode.
    */
-  class StepContext extends ExecutionContext.StepContext {
+  class StepContext extends BaseExecutionContext.StepContext {
 
     private final InMemoryStateInternals stateInternals = new InMemoryStateInternals();
 
     private StepContext(String stepName, String transformName) {
-      super(stepName, transformName);
+      super(BatchModeExecutionContext.this, stepName, transformName);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
index 42496b90b0140..4c3f2534f8882 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
@@ -85,13 +85,13 @@ public <T> List<ValueWithMetadata<T>> getSideOutput(TupleTag<T> tag) {
   /**
    * {@link ExecutionContext.StepContext} used in direct mode.
    */
-  class StepContext extends ExecutionContext.StepContext {
+  class StepContext extends BaseExecutionContext.StepContext {
 
     private final Map<Object, InMemoryStateInternals> stateInternals = new HashMap<>();
     private InMemoryStateInternals currentStateInternals = null;
 
     private StepContext(String stepName, String transformName) {
-      super(stepName, transformName);
+      super(DirectModeExecutionContext.this, stepName, transformName);
       switchKey(null);
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
index 3c40196dcd929..363506fa61ecb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
@@ -19,103 +19,87 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
 import java.io.IOException;
 import java.util.Collection;
-import java.util.HashMap;
-import java.util.Map;
 
 /**
  * Context for the current execution. This is guaranteed to exist during processing,
  * but does not necessarily persist between different batches of work.
  */
-public abstract class ExecutionContext {
-  private Map<String, StepContext> cachedStepContexts = new HashMap<>();
-
+public interface ExecutionContext {
   /**
    * Returns the {@link StepContext} associated with the given step.
    */
-  public StepContext getStepContext(String stepName, String transformName) {
-    StepContext context = cachedStepContexts.get(stepName);
-    if (context == null) {
-      context = createStepContext(stepName, transformName);
-      cachedStepContexts.put(stepName, context);
-    }
-    return context;
-  }
+  StepContext getStepContext(String stepName, String transformName);
 
   /**
    * Returns a collection view of all of the {@link StepContext}s.
    */
-  public Collection<StepContext> getAllStepContexts() {
-    return cachedStepContexts.values();
-  }
-
-  /**
-   * Implementations should override this to create the specific type
-   * of {@link StepContext} they neeed.
-   */
-  public abstract StepContext createStepContext(String stepName, String transformName);
+  Collection<StepContext> getAllStepContexts();
 
   /**
    * Hook for subclasses to implement that will be called whenever
    * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#output}
    * is called.
    */
-  public void noteOutput(WindowedValue<?> output) {}
+  void noteOutput(WindowedValue<?> output);
 
   /**
    * Hook for subclasses to implement that will be called whenever
    * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#sideOutput}
    * is called.
    */
-  public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output) {}
+  void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output);
 
   /**
    * Per-step, per-key context used for retrieving state.
    */
-  public abstract class StepContext {
-    private final String stepName;
-    private final String transformName;
-
-    public StepContext(String stepName, String transformName) {
-      this.stepName = stepName;
-      this.transformName = transformName;
-    }
+  public interface StepContext {
 
-    public String getStepName() {
-      return stepName;
-    }
+    /**
+     * The name of the step.
+     */
+    String getStepName();
 
-    public String getTransformName() {
-      return transformName;
-    }
+    /**
+     * The name of the transform for the step.
+     */
+    String getTransformName();
 
-    public ExecutionContext getExecutionContext() {
-      return ExecutionContext.this;
-    }
+    /**
+     * The context in which this step is executing.
+     */
+    ExecutionContext getExecutionContext();
 
-    public void noteOutput(WindowedValue<?> output) {
-      ExecutionContext.this.noteOutput(output);
-    }
+    /**
+     * Hook for subclasses to implement that will be called whenever
+     * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#output}
+     * is called.
+     */
+    void noteOutput(WindowedValue<?> output);
 
-    public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output) {
-      ExecutionContext.this.noteSideOutput(tag, output);
-    }
+    /**
+     * Hook for subclasses to implement that will be called whenever
+     * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#sideOutput}
+     * is called.
+     */
+    void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output);
 
     /**
      * Writes the given {@link PCollectionView} data to a globally accessible location.
      */
-    public <T, W extends BoundedWindow> void writePCollectionViewData(
+    <T, W extends BoundedWindow> void writePCollectionViewData(
         TupleTag<?> tag,
-        Iterable<WindowedValue<T>> data, Coder<Iterable<WindowedValue<T>>> dataCoder,
-        W window, Coder<W> windowCoder) throws IOException {
-      throw new UnsupportedOperationException("Not implemented.");
-    }
+        Iterable<WindowedValue<T>> data,
+        Coder<Iterable<WindowedValue<T>>> dataCoder,
+        W window,
+        Coder<W> windowCoder)
+            throws IOException;
+
+    StateInternals stateInternals();
 
-    public abstract StateInternals stateInternals();
-    public abstract TimerInternals timerInternals();
+    TimerInternals timerInternals();
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index 653e26e4262f6..601ae179cd21b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -305,14 +305,14 @@ private Windmill.Timer.Type timerType(TimeDomain domain) {
   }
 
 
-  class StepContext extends ExecutionContext.StepContext {
+  class StepContext extends BaseExecutionContext.StepContext {
     private WindmillStateInternals stateInternals;
     private WindmillTimerInternals timerInternals;
     private String prefix;
     private String stateFamily;
 
     public StepContext(String stepName, String transformName) {
-      super(stepName, transformName);
+      super(StreamingModeExecutionContext.this, stepName, transformName);
 
       if (stateNameMap.isEmpty()) {
         this.prefix = transformName;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index 854831ad6efc9..34a4ec0d632f1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -378,7 +378,7 @@ KV<String, OutputT>, List> makeRunner(
             NullSideInputReader.empty(),
             (TupleTag<KV<String, OutputT>>) (TupleTag) outputTag,
             new ArrayList<TupleTag<?>>(),
-            execContext.createStepContext("merge", "merge"),
+            execContext.getStepContext("merge", "merge"),
             counters.getAddCounterMutator(),
             windowingStrategy);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 3151663e806a8..5dec4e1c14463 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -414,7 +414,7 @@ private DoFnRunner<TimerOrElement<KV<String, Long>>, KV<String, Long>, List> mak
             NullSideInputReader.empty(),
             (TupleTag<KV<String, OutputT>>) (TupleTag) outputTag,
             new ArrayList<TupleTag<?>>(),
-            execContext.createStepContext("merge", "merge"),
+            execContext.getStepContext("merge", "merge"),
             counters.getAddCounterMutator(),
             windowingStrategy);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
index d4d012d79b740..1a39618b53c35 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
@@ -24,7 +24,6 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting;
 import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting.ConstantViewFn;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaceForTest;
 import com.google.cloud.dataflow.sdk.util.state.WindmillStateReader;
@@ -72,7 +71,7 @@ public void testTimerInternalsSetTimer() {
     Windmill.WorkItemCommitRequest.Builder outputBuilder =
         Windmill.WorkItemCommitRequest.newBuilder();
     executionContext.start(null, new Instant(1000), stateReader, outputBuilder);
-    StepContext step = executionContext.getStepContext("step", "transform");
+    ExecutionContext.StepContext step = executionContext.getStepContext("step", "transform");
 
     TimerInternals timerInternals = step.timerInternals();
 

From a33ebe9ca4aee1274bff80e547af204a5cdb5715 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 24 Jul 2015 18:04:14 -0700
Subject: [PATCH 0812/1541] Remove System.out.println from StateSamplerTest

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99079456
---
 .../sdk/util/common/worker/StateSamplerTest.java | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
index 0bdbd2592acfe..2dc694a7cd9b7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
@@ -31,6 +31,7 @@
 @RunWith(JUnit4.class)
 public class StateSamplerTest {
   public static long getCounterLongValue(CounterSet counters, String name) {
+    @SuppressWarnings("unchecked")
     Counter<Long> counter = (Counter<Long>) counters.getExistingCounter(name);
     return counter.getAggregate();
   }
@@ -60,12 +61,11 @@ public void basicTest() throws InterruptedException {
     long s1 = getCounterLongValue(counters, "test-1-msecs");
     long s2 = getCounterLongValue(counters, "test-2-msecs");
 
-    System.out.println("basic s1: " + s1);
-    System.out.println("basic s2: " + s2);
-
     long toleranceMs = periodMs;
     assertTrue(s1 + s2 >= 4 * periodMs - toleranceMs);
     assertTrue(s1 + s2 <= 10 * periodMs + toleranceMs);
+
+    stateSampler.close();
   }
 
   @Test
@@ -103,13 +103,11 @@ public void nestingTest() throws InterruptedException {
     long s2 = getCounterLongValue(counters, "test-2-msecs");
     long s3 = getCounterLongValue(counters, "test-3-msecs");
 
-    System.out.println("s1: " + s1);
-    System.out.println("s2: " + s2);
-    System.out.println("s3: " + s3);
-
     long toleranceMs = periodMs;
     assertTrue(s1 + s2 + s3 >= 4 * periodMs - toleranceMs);
     assertTrue(s1 + s2 + s3 <= 16 * periodMs + toleranceMs);
+
+    stateSampler.close();
   }
 
   @Test
@@ -125,8 +123,10 @@ public void nonScopedTest() throws InterruptedException {
     stateSampler.setState(previousState);
     long tolerance = periodMs;
     long s = getCounterLongValue(counters, "test-1-msecs");
-    System.out.println("s: " + s);
+
     assertTrue(s >= periodMs - tolerance);
     assertTrue(s <= 4 * periodMs + tolerance);
+
+    stateSampler.close();
   }
 }

From c4cf644e142b04d86088b7298a4b392e6f6131c2 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 24 Jul 2015 20:19:08 -0700
Subject: [PATCH 0813/1541] Add DoFnRunner.ListOutputManager

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99084779
---
 .../dataflow/sdk/transforms/DoFnTester.java   |  5 +-
 .../cloud/dataflow/sdk/transforms/ParDo.java  |  5 +-
 .../sdk/transforms/windowing/Window.java      |  6 ++-
 .../cloud/dataflow/sdk/util/DoFnRunner.java   | 51 ++++++++++---------
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  | 38 +++++++-------
 .../StreamingGroupAlsoByWindowsDoFnTest.java  | 45 ++++++++--------
 6 files changed, 81 insertions(+), 69 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index 2b27267bf3719..c12a503ce1062 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -324,7 +324,7 @@ enum State { UNSTARTED, STARTED, FINISHED }
   DoFn<InputT, OutputT> fn;
 
   /** The DoFnRunner if processing is in progress. */
-  DoFnRunner<InputT, OutputT, List> fnRunner;
+  DoFnRunner<InputT, OutputT, List<WindowedValue<?>>> fnRunner;
 
   /** Counters for user-defined Aggregators if processing is in progress. */
   CounterSet counterSet;
@@ -358,10 +358,11 @@ void initializeState() {
         : sideInputs.entrySet()) {
       runnerSideInputs = runnerSideInputs.and(entry.getKey().getTagInternal(), entry.getValue());
     }
-    fnRunner = DoFnRunner.createWithListOutputs(
+    fnRunner = DoFnRunner.create(
         options,
         fn,
         DirectSideInputReader.of(runnerSideInputs),
+        new DoFnRunner.ListOutputManager(),
         mainOutputTag,
         sideOutputTags,
         DirectModeExecutionContext.create().createStepContext("stepName", "stepName"),
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 8c7ce21e807ce..c59ee4742b324 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -1088,11 +1088,12 @@ private static <InputT, OutputT> void evaluateHelper(
           context.getPCollectionView(view));
     }
 
-    DoFnRunner<InputT, OutputT, List> fnRunner =
-        DoFnRunner.createWithListOutputs(
+    DoFnRunner<InputT, OutputT, List<WindowedValue<?>>> fnRunner =
+        DoFnRunner.create(
             context.getPipelineOptions(),
             fn,
             DirectSideInputReader.of(sideInputValues),
+            new DoFnRunner.ListOutputManager(),
             mainOutputTag,
             sideOutputTags,
             executionContext.getStepContext(name, name),
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 42f9ec5f3d4a9..90c37782701a9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -29,6 +29,7 @@
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
 import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -586,11 +587,12 @@ private static <T, W extends BoundedWindow> void evaluateHelper(
     String name = context.getStepName(transform);
     @SuppressWarnings("unchecked")
     DoFn<T, T> addWindowsDoFn = new AssignWindowsDoFn<T, W>(windowFn);
-    DoFnRunner<T, T, List> addWindowsRunner =
-        DoFnRunner.createWithListOutputs(
+    DoFnRunner<T, T, List<WindowedValue<?>>> addWindowsRunner =
+        DoFnRunner.create(
             context.getPipelineOptions(),
             addWindowsDoFn,
             NullSideInputReader.empty(),
+            new DoFnRunner.ListOutputManager(),
             outputTag,
             new ArrayList<TupleTag<?>>(),
             executionContext.getStepContext(name, name),
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 67694a437ec10..6232e5c3b2517 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -37,11 +37,12 @@
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Throwables;
 import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 
 import org.joda.time.Instant;
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.Iterator;
@@ -105,29 +106,31 @@ public static <InputT, OutputT, ReceiverT> DoFnRunner<InputT, OutputT, ReceiverT
         mainOutputTag, sideOutputTags, stepContext, addCounterMutator, windowingStrategy);
   }
 
-  @SuppressWarnings({"rawtypes", "unchecked"})
-  public static <InputT, OutputT> DoFnRunner<InputT, OutputT, List> createWithListOutputs(
-      PipelineOptions options,
-      DoFn<InputT, OutputT> fn,
-      SideInputReader sideInputReader,
-      TupleTag<OutputT> mainOutputTag,
-      List<TupleTag<?>> sideOutputTags,
-      StepContext stepContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      WindowingStrategy<?, ?> windowingStrategy) {
-    return create(
-        options, fn, sideInputReader,
-        new OutputManager<List>() {
-          @Override
-          public List initialize(TupleTag<?> tag) {
-            return new ArrayList<>();
-          }
-          @Override
-          public void output(List list, WindowedValue<?> output) {
-            list.add(output);
-          }
-        },
-        mainOutputTag, sideOutputTags, stepContext, addCounterMutator, windowingStrategy);
+  /**
+   * An implementation of {@link OutputManager} using simple lists, for testing and in-memory
+   * contexts such as the {@link com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner}.
+   */
+  public static class ListOutputManager implements OutputManager<List<WindowedValue<?>>> {
+
+    private Map<TupleTag<?>, List<WindowedValue<?>>> outputLists = Maps.newHashMap();
+
+    @Override
+    public List<WindowedValue<?>> initialize(TupleTag<?> tag) {
+      List<WindowedValue<?>> list = Lists.newArrayList();
+      outputLists.put(tag, list);
+      return list;
+    }
+    @Override
+    public void output(List<WindowedValue<?>> list, WindowedValue<?> output) {
+      list.add(output);
+    }
+
+    public <T> List<WindowedValue<T>> getOutput(TupleTag<T> tag) {
+      // Safe cast by design, inexpressible in Java without rawtypes
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      List<WindowedValue<T>> outputList = (List) outputLists.get(tag);
+      return outputList;
+    }
   }
 
   /** Calls {@link DoFn#startBundle}. */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index 34a4ec0d632f1..dd4ade75c3c01 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -65,21 +65,21 @@ public class GroupAlsoByWindowsDoFnTest {
 
   @Test public void testEmpty() throws Exception {
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
-        KV<String, Iterable<String>>, List> runner =
+        KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
         makeRunner(WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
     runner.startBundle();
 
     runner.finishBundle();
 
-    List<KV<String, Iterable<String>>> result = runner.getReceiver(outputTag);
+    List<KV<String, Iterable<String>>> result = (List) runner.getReceiver(outputTag);
 
     assertEquals(0, result.size());
   }
 
   @Test public void testFixedWindows() throws Exception {
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
-        KV<String, Iterable<String>>, List> runner =
+        KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
         makeRunner(WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
     runner.startBundle();
@@ -104,7 +104,7 @@ public class GroupAlsoByWindowsDoFnTest {
 
     runner.finishBundle();
 
-    List<WindowedValue<KV<String, Iterable<String>>>> result = runner.getReceiver(outputTag);
+    List<WindowedValue<KV<String, Iterable<String>>>> result = (List) runner.getReceiver(outputTag);
 
     assertEquals(2, result.size());
 
@@ -125,7 +125,7 @@ public class GroupAlsoByWindowsDoFnTest {
 
   @Test public void testSlidingWindows() throws Exception {
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
-        KV<String, Iterable<String>>, List> runner =
+        KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
         makeRunner(WindowingStrategy.of(
             SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))));
 
@@ -146,7 +146,7 @@ public class GroupAlsoByWindowsDoFnTest {
 
     runner.finishBundle();
 
-    List<WindowedValue<KV<String, Iterable<String>>>> result = runner.getReceiver(outputTag);
+    List<WindowedValue<KV<String, Iterable<String>>>> result = (List) runner.getReceiver(outputTag);
 
     assertEquals(3, result.size());
 
@@ -174,7 +174,8 @@ public class GroupAlsoByWindowsDoFnTest {
 
   @Test public void testSlidingWindowsCombine() throws Exception {
     CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
-    DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>, KV<String, Long>, List> runner =
+    DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>, KV<String, Long>,
+        List<WindowedValue<?>>> runner =
         makeRunner(
             WindowingStrategy.of(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))),
             combineFn.<String>asKeyedFn());
@@ -201,7 +202,7 @@ public class GroupAlsoByWindowsDoFnTest {
 
     runner.finishBundle();
 
-    List<WindowedValue<KV<String, Long>>> result = runner.getReceiver(outputTag);
+    List<WindowedValue<KV<String, Long>>> result = (List) runner.getReceiver(outputTag);
 
     assertEquals(3, result.size());
 
@@ -219,7 +220,7 @@ public class GroupAlsoByWindowsDoFnTest {
 
   @Test public void testDiscontiguousWindows() throws Exception {
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
-        KV<String, Iterable<String>>, List> runner =
+        KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
         makeRunner(WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
     runner.startBundle();
@@ -244,7 +245,7 @@ public class GroupAlsoByWindowsDoFnTest {
 
     runner.finishBundle();
 
-    List<WindowedValue<KV<String, Iterable<String>>>> result = runner.getReceiver(outputTag);
+    List<WindowedValue<KV<String, Iterable<String>>>> result = (List) runner.getReceiver(outputTag);
 
     assertEquals(2, result.size());
 
@@ -265,7 +266,7 @@ public class GroupAlsoByWindowsDoFnTest {
 
   @Test public void testSessions() throws Exception {
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
-        KV<String, Iterable<String>>, List> runner =
+        KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
         makeRunner(WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))));
 
     runner.startBundle();
@@ -290,7 +291,7 @@ public class GroupAlsoByWindowsDoFnTest {
 
     runner.finishBundle();
 
-    List<WindowedValue<KV<String, Iterable<String>>>> result = runner.getReceiver(outputTag);
+    List<WindowedValue<KV<String, Iterable<String>>>> result = (List) runner.getReceiver(outputTag);
 
     assertEquals(2, result.size());
 
@@ -312,7 +313,7 @@ public class GroupAlsoByWindowsDoFnTest {
   @Test public void testSessionsCombine() throws Exception {
     CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
     DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>,
-        KV<String, Long>, List> runner =
+        KV<String, Long>, List<WindowedValue<?>>> runner =
         makeRunner(WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))),
                    combineFn.<String>asKeyedFn());
     runner.startBundle();
@@ -337,7 +338,7 @@ public class GroupAlsoByWindowsDoFnTest {
 
     runner.finishBundle();
 
-    List<WindowedValue<KV<String, Long>>> result = runner.getReceiver(outputTag);
+    List<WindowedValue<KV<String, Long>>> result = (List) runner.getReceiver(outputTag);
 
     assertThat(result, Matchers.contains(
         WindowMatchers.isSingleWindowedValue(
@@ -349,7 +350,7 @@ public class GroupAlsoByWindowsDoFnTest {
   }
 
   private DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
-      KV<String, Iterable<String>>, List> makeRunner(
+      KV<String, Iterable<String>>, List<WindowedValue<?>>> makeRunner(
           WindowingStrategy<? super String, IntervalWindow> windowingStrategy) {
     GroupAlsoByWindowsDoFn<String, String, Iterable<String>, IntervalWindow> fn =
         GroupAlsoByWindowsDoFn.createForIterable(windowingStrategy, StringUtf8Coder.of());
@@ -357,7 +358,7 @@ KV<String, Iterable<String>>, List> makeRunner(
   }
 
   private DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>,
-      KV<String, Long>, List> makeRunner(
+      KV<String, Long>, List<WindowedValue<?>>> makeRunner(
         WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
         KeyedCombineFn<String, Long, ?, Long> combineFn) {
     GroupAlsoByWindowsDoFn<String, Long, Long, IntervalWindow> fn =
@@ -368,14 +369,15 @@ KV<String, Long>, List> makeRunner(
   }
 
   private <InputT, OutputT> DoFnRunner<KV<String, Iterable<WindowedValue<InputT>>>,
-    KV<String, OutputT>, List> makeRunner(
+    KV<String, OutputT>, List<WindowedValue<?>>> makeRunner(
         WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
         GroupAlsoByWindowsDoFn<String, InputT, OutputT, IntervalWindow> fn) {
     return
-        DoFnRunner.createWithListOutputs(
+        DoFnRunner.create(
             PipelineOptionsFactory.create(),
             fn,
             NullSideInputReader.empty(),
+            new DoFnRunner.ListOutputManager(),
             (TupleTag<KV<String, OutputT>>) (TupleTag) outputTag,
             new ArrayList<TupleTag<?>>(),
             execContext.getStepContext("merge", "merge"),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 5dec4e1c14463..e4d47bbc365ae 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -56,7 +56,6 @@
 
 /** Unit tests for {@link StreamingGroupAlsoByWindowsDoFn}. */
 @RunWith(JUnit4.class)
-@SuppressWarnings("rawtypes")
 public class StreamingGroupAlsoByWindowsDoFnTest {
   private ExecutionContext execContext;
   private CounterSet counters;
@@ -84,7 +83,8 @@ public ExecutionContext.StepContext createStepContext(String stepName, String tr
   }
 
   @Test public void testEmpty() throws Exception {
-    DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>, List> runner =
+    DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>,
+        List<WindowedValue<?>>> runner =
         makeRunner(WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
     runner.startBundle();
@@ -104,7 +104,7 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
 
   @Test public void testFixedWindows() throws Exception {
     DoFnRunner<TimerOrElement<KV<String, String>>,
-        KV<String, Iterable<String>>, List> runner =
+        KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
         makeRunner(WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
     Coder<IntervalWindow> windowCoder = FixedWindows.of(Duration.millis(10)).windowCoder();
@@ -144,8 +144,8 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
 
     runner.finishBundle();
 
-    @SuppressWarnings("unchecked")
-    List<WindowedValue<KV<String, Iterable<String>>>> result = runner.getReceiver(outputTag);
+    @SuppressWarnings({"rawtypes", "unchecked"})
+    List<WindowedValue<KV<String, Iterable<String>>>> result = (List) runner.getReceiver(outputTag);
 
     assertEquals(2, result.size());
 
@@ -164,7 +164,7 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
 
   @Test public void testSlidingWindows() throws Exception {
     DoFnRunner<TimerOrElement<KV<String, String>>,
-        KV<String, Iterable<String>>, List> runner =
+        KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
         makeRunner(WindowingStrategy.of(
             SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))));
 
@@ -202,8 +202,8 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
 
     runner.finishBundle();
 
-    @SuppressWarnings("unchecked")
-    List<WindowedValue<KV<String, Iterable<String>>>> result = runner.getReceiver(outputTag);
+    @SuppressWarnings({"rawtypes", "unchecked"})
+    List<WindowedValue<KV<String, Iterable<String>>>> result = (List) runner.getReceiver(outputTag);
 
     assertEquals(3, result.size());
 
@@ -230,7 +230,7 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
 
   @Test public void testSessions() throws Exception {
     DoFnRunner<TimerOrElement<KV<String, String>>,
-        KV<String, Iterable<String>>, List> runner =
+        KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
         makeRunner(WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))));
 
     Coder<IntervalWindow> windowCoder =
@@ -271,8 +271,8 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
 
     runner.finishBundle();
 
-    @SuppressWarnings("unchecked")
-    List<WindowedValue<KV<String, Iterable<String>>>> result = runner.getReceiver(outputTag);
+    @SuppressWarnings({"rawtypes", "unchecked"})
+    List<WindowedValue<KV<String, Iterable<String>>>> result = (List) runner.getReceiver(outputTag);
 
     assertEquals(2, result.size());
 
@@ -324,7 +324,7 @@ public Long extractOutput(Long accumulator) {
   @Test public void testSessionsCombine() throws Exception {
     CombineFn<Long, ?, Long> combineFn = new SumLongs();
     DoFnRunner<TimerOrElement<KV<String, Long>>,
-        KV<String, Long>, List> runner =
+        KV<String, Long>, List<WindowedValue<?>>> runner =
         makeRunner(WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))),
                    combineFn.<String>asKeyedFn());
 
@@ -367,8 +367,8 @@ public Long extractOutput(Long accumulator) {
 
     runner.finishBundle();
 
-    @SuppressWarnings("unchecked")
-    List<WindowedValue<KV<String, Long>>> result = runner.getReceiver(outputTag);
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    List<WindowedValue<KV<String, Long>>> result = (List) runner.getReceiver(outputTag);
 
     assertEquals(2, result.size());
 
@@ -385,16 +385,18 @@ public Long extractOutput(Long accumulator) {
     assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(15, 25)));
   }
 
-  private DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>, List>
-      makeRunner(WindowingStrategy<? super String, IntervalWindow> windowingStrategy) {
+  private DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>,
+      List<WindowedValue<?>>> makeRunner(
+          WindowingStrategy<? super String, IntervalWindow> windowingStrategy) {
     StreamingGroupAlsoByWindowsDoFn<String, String, Iterable<String>, IntervalWindow> fn =
         StreamingGroupAlsoByWindowsDoFn.createForIterable(windowingStrategy, StringUtf8Coder.of());
     return makeRunner(windowingStrategy, fn);
   }
 
-  private DoFnRunner<TimerOrElement<KV<String, Long>>, KV<String, Long>, List> makeRunner(
-        WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
-        KeyedCombineFn<String, Long, ?, Long> combineFn) {
+  private DoFnRunner<TimerOrElement<KV<String, Long>>, KV<String, Long>, List<WindowedValue<?>>>
+      makeRunner(
+          WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
+          KeyedCombineFn<String, Long, ?, Long> combineFn) {
     StreamingGroupAlsoByWindowsDoFn<String, Long, Long, IntervalWindow> fn =
         StreamingGroupAlsoByWindowsDoFn.create(
             windowingStrategy, combineFn, StringUtf8Coder.of(), BigEndianLongCoder.of());
@@ -403,15 +405,16 @@ private DoFnRunner<TimerOrElement<KV<String, Long>>, KV<String, Long>, List> mak
   }
 
   private <InputT, OutputT>
-      DoFnRunner<TimerOrElement<KV<String, InputT>>, KV<String, OutputT>, List>
+      DoFnRunner<TimerOrElement<KV<String, InputT>>, KV<String, OutputT>, List<WindowedValue<?>>>
       makeRunner(
           WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
           StreamingGroupAlsoByWindowsDoFn<String, InputT, OutputT, IntervalWindow> fn) {
     return
-        DoFnRunner.createWithListOutputs(
+        DoFnRunner.create(
             PipelineOptionsFactory.create(),
             fn,
             NullSideInputReader.empty(),
+            new DoFnRunner.ListOutputManager(),
             (TupleTag<KV<String, OutputT>>) (TupleTag) outputTag,
             new ArrayList<TupleTag<?>>(),
             execContext.getStepContext("merge", "merge"),

From 2ee610b1b64e8cf0a6fd416d869048cc739a31a6 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Sun, 26 Jul 2015 20:38:36 -0700
Subject: [PATCH 0814/1541] Update tests to use DoFnRunner.ListOutputManager

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99150521
---
 .../dataflow/sdk/transforms/DoFnTester.java   |  37 +++---
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |   6 -
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  | 118 ++++++++++++------
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  66 ++++++----
 .../StreamingSideInputDoFnRunnerTest.java     |  49 +++-----
 5 files changed, 162 insertions(+), 114 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index c12a503ce1062..ee9cddb5d6ed3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -230,14 +230,15 @@ public void finishBundle() {
    */
   public List<OutputT> peekOutputElements() {
     // TODO: Should we return an unmodifiable list?
-    return Lists.transform(fnRunner.getReceiver(mainOutputTag),
-                           new Function<Object, OutputT>() {
-                             @Override
-                             @SuppressWarnings("unchecked")
-                             public OutputT apply(Object input) {
-                               return ((WindowedValue<OutputT>) input).getValue();
-                             }
-                           });
+    return Lists.transform(
+        outputManager.getOutput(mainOutputTag),
+        new Function<Object, OutputT>() {
+          @Override
+          @SuppressWarnings("unchecked")
+          public OutputT apply(Object input) {
+            return ((WindowedValue<OutputT>) input).getValue();
+          }
+        });
 
   }
 
@@ -272,12 +273,13 @@ public List<OutputT> takeOutputElements() {
    */
   public <T> List<T> peekSideOutputElements(TupleTag<T> tag) {
     // TODO: Should we return an unmodifiable list?
-    return Lists.transform(fnRunner.getReceiver(tag),
-                           new Function<Object, T>() {
-                             @Override
-                             public T apply(Object input) {
-                               return ((WindowedValue<T>) input).getValue();
-                             }});
+    return Lists.transform(
+        outputManager.getOutput(tag),
+        new Function<Object, T>() {
+          @Override
+          public T apply(Object input) {
+            return ((WindowedValue<T>) input).getValue();
+          }});
   }
 
   /**
@@ -323,6 +325,9 @@ enum State { UNSTARTED, STARTED, FINISHED }
   /** The original DoFn under test, if started. */
   DoFn<InputT, OutputT> fn;
 
+  /** The ListOutputManager to examine the outputs. */
+  DoFnRunner.ListOutputManager outputManager;
+
   /** The DoFnRunner if processing is in progress. */
   DoFnRunner<InputT, OutputT, List<WindowedValue<?>>> fnRunner;
 
@@ -341,6 +346,7 @@ enum State { UNSTARTED, STARTED, FINISHED }
 
   void resetState() {
     fn = null;
+    outputManager = null;
     fnRunner = null;
     counterSet = null;
     state = State.UNSTARTED;
@@ -358,11 +364,12 @@ void initializeState() {
         : sideInputs.entrySet()) {
       runnerSideInputs = runnerSideInputs.and(entry.getKey().getTagInternal(), entry.getValue());
     }
+    outputManager = new DoFnRunner.ListOutputManager();
     fnRunner = DoFnRunner.create(
         options,
         fn,
         DirectSideInputReader.of(runnerSideInputs),
-        new DoFnRunner.ListOutputManager(),
+        outputManager,
         mainOutputTag,
         sideOutputTags,
         DirectModeExecutionContext.create().createStepContext("stepName", "stepName"),
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 6232e5c3b2517..5031014eba38f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -66,7 +66,6 @@ public interface OutputManager<ReceiverT> {
 
     /** Outputs a single element to the provided receiver. */
     public void output(ReceiverT receiver, WindowedValue<?> output);
-
   }
 
   /** The DoFn being run. */
@@ -197,11 +196,6 @@ public void finishBundle() {
     }
   }
 
-  /** Returns the receiver who gets outputs with the provided tag. */
-  public ReceiverT getReceiver(TupleTag<?> tag) {
-    return context.getReceiver(tag);
-  }
-
   /**
    * A concrete implementation of {@link DoFn.Context} used for running
    * a {@link DoFn}.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index dd4ade75c3c01..a30232a337a0c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -51,36 +51,39 @@
 
 /** Unit tests for {@link GroupAlsoByWindowsDoFn}. */
 @RunWith(JUnit4.class)
-@SuppressWarnings({"rawtypes", "unchecked"})
 public class GroupAlsoByWindowsDoFnTest {
   ExecutionContext execContext;
   CounterSet counters;
-  TupleTag<KV<String, Iterable<String>>> outputTag;
 
   @Before public void setUp() {
     execContext = new DirectModeExecutionContext();
     counters = new CounterSet();
-    outputTag = new TupleTag<>();
   }
 
   @Test public void testEmpty() throws Exception {
+    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
         KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
-        makeRunner(WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
+        makeRunner(
+            outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
     runner.startBundle();
 
     runner.finishBundle();
 
-    List<KV<String, Iterable<String>>> result = (List) runner.getReceiver(outputTag);
+    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
 
     assertEquals(0, result.size());
   }
 
   @Test public void testFixedWindows() throws Exception {
+    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
         KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
-        makeRunner(WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
+        makeRunner(
+            outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
     runner.startBundle();
 
@@ -104,7 +107,7 @@ public class GroupAlsoByWindowsDoFnTest {
 
     runner.finishBundle();
 
-    List<WindowedValue<KV<String, Iterable<String>>>> result = (List) runner.getReceiver(outputTag);
+    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
 
     assertEquals(2, result.size());
 
@@ -124,10 +127,15 @@ public class GroupAlsoByWindowsDoFnTest {
   }
 
   @Test public void testSlidingWindows() throws Exception {
+    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
         KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
-        makeRunner(WindowingStrategy.of(
-            SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))));
+        makeRunner(
+            outputTag,
+            outputManager,
+            WindowingStrategy.of(
+                SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))));
 
     runner.startBundle();
 
@@ -146,7 +154,7 @@ public class GroupAlsoByWindowsDoFnTest {
 
     runner.finishBundle();
 
-    List<WindowedValue<KV<String, Iterable<String>>>> result = (List) runner.getReceiver(outputTag);
+    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
 
     assertEquals(3, result.size());
 
@@ -173,10 +181,14 @@ public class GroupAlsoByWindowsDoFnTest {
   }
 
   @Test public void testSlidingWindowsCombine() throws Exception {
+    TupleTag<KV<String, Long>> outputTag = new TupleTag<>();
     CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>, KV<String, Long>,
         List<WindowedValue<?>>> runner =
         makeRunner(
+            outputTag,
+            outputManager,
             WindowingStrategy.of(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))),
             combineFn.<String>asKeyedFn());
 
@@ -202,7 +214,7 @@ public class GroupAlsoByWindowsDoFnTest {
 
     runner.finishBundle();
 
-    List<WindowedValue<KV<String, Long>>> result = (List) runner.getReceiver(outputTag);
+    List<WindowedValue<KV<String, Long>>> result = outputManager.getOutput(outputTag);
 
     assertEquals(3, result.size());
 
@@ -219,9 +231,12 @@ public class GroupAlsoByWindowsDoFnTest {
   }
 
   @Test public void testDiscontiguousWindows() throws Exception {
+    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
         KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
-        makeRunner(WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
+        makeRunner(
+            outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
     runner.startBundle();
 
@@ -245,7 +260,7 @@ public class GroupAlsoByWindowsDoFnTest {
 
     runner.finishBundle();
 
-    List<WindowedValue<KV<String, Iterable<String>>>> result = (List) runner.getReceiver(outputTag);
+    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
 
     assertEquals(2, result.size());
 
@@ -265,9 +280,12 @@ public class GroupAlsoByWindowsDoFnTest {
   }
 
   @Test public void testSessions() throws Exception {
+    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
         KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
-        makeRunner(WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))));
+        makeRunner(outputTag, outputManager,
+            WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))));
 
     runner.startBundle();
 
@@ -291,7 +309,7 @@ public class GroupAlsoByWindowsDoFnTest {
 
     runner.finishBundle();
 
-    List<WindowedValue<KV<String, Iterable<String>>>> result = (List) runner.getReceiver(outputTag);
+    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
 
     assertEquals(2, result.size());
 
@@ -311,11 +329,16 @@ public class GroupAlsoByWindowsDoFnTest {
   }
 
   @Test public void testSessionsCombine() throws Exception {
+    TupleTag<KV<String, Long>> outputTag = new TupleTag<>();
     CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>,
         KV<String, Long>, List<WindowedValue<?>>> runner =
-        makeRunner(WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))),
-                   combineFn.<String>asKeyedFn());
+        makeRunner(
+            outputTag,
+            outputManager,
+            WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))),
+            combineFn.<String>asKeyedFn());
     runner.startBundle();
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
@@ -338,7 +361,7 @@ public class GroupAlsoByWindowsDoFnTest {
 
     runner.finishBundle();
 
-    List<WindowedValue<KV<String, Long>>> result = (List) runner.getReceiver(outputTag);
+    List<WindowedValue<KV<String, Long>>> result = outputManager.getOutput(outputTag);
 
     assertThat(result, Matchers.contains(
         WindowMatchers.isSingleWindowedValue(
@@ -349,40 +372,53 @@ public class GroupAlsoByWindowsDoFnTest {
             15, 15, 25)));
   }
 
-  private DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
-      KV<String, Iterable<String>>, List<WindowedValue<?>>> makeRunner(
+  private <ReceiverT>
+      DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
+          KV<String, Iterable<String>>, ReceiverT>
+      makeRunner(
+          TupleTag<KV<String, Iterable<String>>> outputTag,
+          DoFnRunner.OutputManager<ReceiverT> outputManager,
           WindowingStrategy<? super String, IntervalWindow> windowingStrategy) {
+
     GroupAlsoByWindowsDoFn<String, String, Iterable<String>, IntervalWindow> fn =
         GroupAlsoByWindowsDoFn.createForIterable(windowingStrategy, StringUtf8Coder.of());
-    return makeRunner(windowingStrategy, fn);
+
+    return makeRunner(outputTag, outputManager, windowingStrategy, fn);
   }
 
-  private DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>,
-      KV<String, Long>, List<WindowedValue<?>>> makeRunner(
-        WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
-        KeyedCombineFn<String, Long, ?, Long> combineFn) {
+  private <ReceiverT>
+      DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>, KV<String, Long>, ReceiverT>
+      makeRunner(
+          TupleTag<KV<String, Long>> outputTag,
+          DoFnRunner.OutputManager<ReceiverT> outputManager,
+          WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
+          KeyedCombineFn<String, Long, ?, Long> combineFn) {
+
     GroupAlsoByWindowsDoFn<String, Long, Long, IntervalWindow> fn =
-        GroupAlsoByWindowsDoFn.create(windowingStrategy, combineFn,
-            StringUtf8Coder.of(), BigEndianLongCoder.of());
+        GroupAlsoByWindowsDoFn.create(
+            windowingStrategy, combineFn, StringUtf8Coder.of(), BigEndianLongCoder.of());
 
-    return makeRunner(windowingStrategy, fn);
+    return makeRunner(outputTag, outputManager, windowingStrategy, fn);
   }
 
-  private <InputT, OutputT> DoFnRunner<KV<String, Iterable<WindowedValue<InputT>>>,
-    KV<String, OutputT>, List<WindowedValue<?>>> makeRunner(
+  private <InputT, OutputT, ReceiverT>
+      DoFnRunner<KV<String, Iterable<WindowedValue<InputT>>>, KV<String, OutputT>, ReceiverT>
+      makeRunner(
+        TupleTag<KV<String, OutputT>> outputTag,
+        DoFnRunner.OutputManager<ReceiverT> outputManager,
         WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
         GroupAlsoByWindowsDoFn<String, InputT, OutputT, IntervalWindow> fn) {
-    return
-        DoFnRunner.create(
-            PipelineOptionsFactory.create(),
-            fn,
-            NullSideInputReader.empty(),
-            new DoFnRunner.ListOutputManager(),
-            (TupleTag<KV<String, OutputT>>) (TupleTag) outputTag,
-            new ArrayList<TupleTag<?>>(),
-            execContext.getStepContext("merge", "merge"),
-            counters.getAddCounterMutator(),
-            windowingStrategy);
+
+    return DoFnRunner.create(
+        PipelineOptionsFactory.create(),
+        fn,
+        NullSideInputReader.empty(),
+        outputManager,
+        outputTag,
+        new ArrayList<TupleTag<?>>(),
+        execContext.getStepContext("merge", "merge"),
+        counters.getAddCounterMutator(),
+        windowingStrategy);
   }
 
   private BoundedWindow window(long start, long end) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index e4d47bbc365ae..82dbd0e508a48 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -59,7 +59,6 @@
 public class StreamingGroupAlsoByWindowsDoFnTest {
   private ExecutionContext execContext;
   private CounterSet counters;
-  private TupleTag<KV<String, Iterable<String>>> outputTag;
 
   @Mock
   private TimerInternals mockTimerInternals;
@@ -79,19 +78,21 @@ public ExecutionContext.StepContext createStepContext(String stepName, String tr
       }
     };
     counters = new CounterSet();
-    outputTag = new TupleTag<>();
   }
 
   @Test public void testEmpty() throws Exception {
+    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>,
         List<WindowedValue<?>>> runner =
-        makeRunner(WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
+        makeRunner(
+            outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
     runner.startBundle();
 
     runner.finishBundle();
 
-    List<?> result = runner.getReceiver(outputTag);
+    List<?> result = outputManager.getOutput(outputTag);
 
     assertEquals(0, result.size());
   }
@@ -103,9 +104,12 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
   }
 
   @Test public void testFixedWindows() throws Exception {
+    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     DoFnRunner<TimerOrElement<KV<String, String>>,
         KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
-        makeRunner(WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
+        makeRunner(
+            outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
     Coder<IntervalWindow> windowCoder = FixedWindows.of(Duration.millis(10)).windowCoder();
 
@@ -144,8 +148,7 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
 
     runner.finishBundle();
 
-    @SuppressWarnings({"rawtypes", "unchecked"})
-    List<WindowedValue<KV<String, Iterable<String>>>> result = (List) runner.getReceiver(outputTag);
+    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
 
     assertEquals(2, result.size());
 
@@ -163,9 +166,14 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
   }
 
   @Test public void testSlidingWindows() throws Exception {
+    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     DoFnRunner<TimerOrElement<KV<String, String>>,
         KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
-        makeRunner(WindowingStrategy.of(
+        makeRunner(
+            outputTag,
+            outputManager,
+            WindowingStrategy.of(
             SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))));
 
     Coder<IntervalWindow> windowCoder =
@@ -202,8 +210,7 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
 
     runner.finishBundle();
 
-    @SuppressWarnings({"rawtypes", "unchecked"})
-    List<WindowedValue<KV<String, Iterable<String>>>> result = (List) runner.getReceiver(outputTag);
+    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
 
     assertEquals(3, result.size());
 
@@ -229,9 +236,14 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
   }
 
   @Test public void testSessions() throws Exception {
+    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     DoFnRunner<TimerOrElement<KV<String, String>>,
         KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
-        makeRunner(WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))));
+        makeRunner(
+            outputTag,
+            outputManager,
+            WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))));
 
     Coder<IntervalWindow> windowCoder =
         Sessions.withGapDuration(Duration.millis(10)).windowCoder();
@@ -271,8 +283,7 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
 
     runner.finishBundle();
 
-    @SuppressWarnings({"rawtypes", "unchecked"})
-    List<WindowedValue<KV<String, Iterable<String>>>> result = (List) runner.getReceiver(outputTag);
+    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
 
     assertEquals(2, result.size());
 
@@ -322,11 +333,16 @@ public Long extractOutput(Long accumulator) {
   }
 
   @Test public void testSessionsCombine() throws Exception {
+    TupleTag<KV<String, Long>> outputTag = new TupleTag<>();
     CombineFn<Long, ?, Long> combineFn = new SumLongs();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     DoFnRunner<TimerOrElement<KV<String, Long>>,
         KV<String, Long>, List<WindowedValue<?>>> runner =
-        makeRunner(WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))),
-                   combineFn.<String>asKeyedFn());
+        makeRunner(
+            outputTag,
+            outputManager,
+            WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))),
+            combineFn.<String>asKeyedFn());
 
     Coder<IntervalWindow> windowCoder =
         Sessions.withGapDuration(Duration.millis(10)).windowCoder();
@@ -367,8 +383,7 @@ public Long extractOutput(Long accumulator) {
 
     runner.finishBundle();
 
-    @SuppressWarnings({"unchecked", "rawtypes"})
-    List<WindowedValue<KV<String, Long>>> result = (List) runner.getReceiver(outputTag);
+    List<WindowedValue<KV<String, Long>>> result = outputManager.getOutput(outputTag);
 
     assertEquals(2, result.size());
 
@@ -387,26 +402,35 @@ public Long extractOutput(Long accumulator) {
 
   private DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>,
       List<WindowedValue<?>>> makeRunner(
+          TupleTag<KV<String, Iterable<String>>> outputTag,
+          DoFnRunner.OutputManager<List<WindowedValue<?>>> outputManager,
           WindowingStrategy<? super String, IntervalWindow> windowingStrategy) {
+
     StreamingGroupAlsoByWindowsDoFn<String, String, Iterable<String>, IntervalWindow> fn =
         StreamingGroupAlsoByWindowsDoFn.createForIterable(windowingStrategy, StringUtf8Coder.of());
-    return makeRunner(windowingStrategy, fn);
+
+    return makeRunner(outputTag, outputManager, windowingStrategy, fn);
   }
 
   private DoFnRunner<TimerOrElement<KV<String, Long>>, KV<String, Long>, List<WindowedValue<?>>>
       makeRunner(
+          TupleTag<KV<String, Long>> outputTag,
+          DoFnRunner.OutputManager<List<WindowedValue<?>>> outputManager,
           WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
           KeyedCombineFn<String, Long, ?, Long> combineFn) {
+
     StreamingGroupAlsoByWindowsDoFn<String, Long, Long, IntervalWindow> fn =
         StreamingGroupAlsoByWindowsDoFn.create(
             windowingStrategy, combineFn, StringUtf8Coder.of(), BigEndianLongCoder.of());
 
-    return makeRunner(windowingStrategy, fn);
+    return makeRunner(outputTag, outputManager, windowingStrategy, fn);
   }
 
   private <InputT, OutputT>
       DoFnRunner<TimerOrElement<KV<String, InputT>>, KV<String, OutputT>, List<WindowedValue<?>>>
       makeRunner(
+          TupleTag<KV<String, OutputT>> outputTag,
+          DoFnRunner.OutputManager<List<WindowedValue<?>>> outputManager,
           WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
           StreamingGroupAlsoByWindowsDoFn<String, InputT, OutputT, IntervalWindow> fn) {
     return
@@ -414,8 +438,8 @@ List<WindowedValue<?>>> makeRunner(
             PipelineOptionsFactory.create(),
             fn,
             NullSideInputReader.empty(),
-            new DoFnRunner.ListOutputManager(),
-            (TupleTag<KV<String, OutputT>>) (TupleTag) outputTag,
+            outputManager,
+            outputTag,
             new ArrayList<TupleTag<?>>(),
             execContext.getStepContext("merge", "merge"),
             counters.getAddCounterMutator(),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
index 28111a339b611..9c615cd38ead7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
@@ -56,7 +56,6 @@
 import org.mockito.Mockito;
 import org.mockito.MockitoAnnotations;
 
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
@@ -85,15 +84,6 @@ public void setUp() {
     when(stepContext.stateInternals()).thenReturn(state);
   }
 
-  private <T> List<WindowedValue<T>> getReceiver(
-      StreamingSideInputDoFnRunner<?, ?, List<WindowedValue<?>>, ?> runner,
-      TupleTag<T> outputTag) {
-    List<WindowedValue<?>> values = runner.getReceiver(outputTag);
-    @SuppressWarnings({"rawtypes", "unchecked"})
-    List<WindowedValue<T>> typedValues = (List) values;
-    return typedValues;
-  }
-
   @Test
   public void testSideInputReady() throws Exception {
     PCollectionView<String> view = createView();
@@ -108,14 +98,15 @@ public void testSideInputReady() throws Exception {
     when(mockSideInputReader.contains(eq(view))).thenReturn(true);
     when(mockSideInputReader.get(eq(view), any(BoundedWindow.class))).thenReturn("data");
 
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     StreamingSideInputDoFnRunner<String, String, List<WindowedValue<?>>, IntervalWindow> runner =
-        createRunner(Arrays.asList(view));
+        createRunner(outputManager, Arrays.asList(view));
 
     runner.startBundle();
     runner.processElement(createDatum("e", 0));
     runner.finishBundle();
 
-    assertThat(getReceiver(runner, mainOutputTag), contains(createDatum("e:data", 0)));
+    assertThat(outputManager.getOutput(mainOutputTag), contains(createDatum("e:data", 0)));
   }
 
   @Test
@@ -128,14 +119,15 @@ public void testSideInputNotReady() throws Exception {
              eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN)))
         .thenReturn(false);
 
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     StreamingSideInputDoFnRunner<String, String, List<WindowedValue<?>>, IntervalWindow> runner =
-        createRunner(Arrays.asList(view));
+        createRunner(outputManager, Arrays.asList(view));
 
     runner.startBundle();
     runner.processElement(createDatum("e", 0));
     runner.finishBundle();
 
-    assertTrue(runner.getReceiver(mainOutputTag).isEmpty());
+    assertTrue(outputManager.getOutput(mainOutputTag).isEmpty());
 
     IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
 
@@ -181,8 +173,9 @@ public void testSideInputNotification() throws Exception {
             StreamingSideInputDoFnRunner.blockedMapAddr(WINDOW_FN));
     blockedMapState.set(blockedMap);
 
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     StreamingSideInputDoFnRunner<String, String, List<WindowedValue<?>>, IntervalWindow> runner =
-        createRunner(Arrays.asList(view));
+        createRunner(outputManager, Arrays.asList(view));
     runner.watermarkHold(createWindow(0)).add(new Instant(0));
     runner.elementBag(createWindow(0)).add(createDatum("e", 0));
 
@@ -201,7 +194,7 @@ public void testSideInputNotification() throws Exception {
     runner.startBundle();
     runner.finishBundle();
 
-    assertThat(getReceiver(runner, mainOutputTag), contains(createDatum("e:data", 0)));
+    assertThat(outputManager.getOutput(mainOutputTag), contains(createDatum("e:data", 0)));
 
     assertThat(blockedMapState.get().read().keySet(), Matchers.empty());
     assertThat(runner.watermarkHold(createWindow(0)).get().read(), Matchers.nullValue());
@@ -241,8 +234,9 @@ public void testMultipleSideInputs() throws Exception {
     when(mockSideInputReader.get(eq(view1), any(BoundedWindow.class))).thenReturn("data1");
     when(mockSideInputReader.get(eq(view2), any(BoundedWindow.class))).thenReturn("data2");
 
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     StreamingSideInputDoFnRunner<String, String, List<WindowedValue<?>>, IntervalWindow> runner =
-        createRunner(Arrays.asList(view1, view2));
+        createRunner(outputManager, Arrays.asList(view1, view2));
     runner.watermarkHold(createWindow(0)).add(new Instant(0));
     runner.elementBag(createWindow(0)).add(createDatum("e1", 0));
 
@@ -250,7 +244,7 @@ public void testMultipleSideInputs() throws Exception {
     runner.processElement(createDatum("e2", 2));
     runner.finishBundle();
 
-    assertThat(getReceiver(runner, mainOutputTag),
+    assertThat(outputManager.getOutput(mainOutputTag),
         contains(createDatum("e1:data1:data2", 0), createDatum("e2:data1:data2", 2)));
 
     assertThat(blockedMapState.get().read().keySet(), Matchers.empty());
@@ -258,8 +252,10 @@ public void testMultipleSideInputs() throws Exception {
     assertThat(runner.elementBag(createWindow(0)).get().read(), Matchers.emptyIterable());
   }
 
-  private StreamingSideInputDoFnRunner<String, String, List<WindowedValue<?>>, IntervalWindow>
-  createRunner(List<PCollectionView<String>> views) throws Exception {
+  private <ReceiverT> StreamingSideInputDoFnRunner<String, String, ReceiverT, IntervalWindow>
+      createRunner(
+          DoFnRunner.OutputManager<ReceiverT> outputManager, List<PCollectionView<String>> views)
+          throws Exception {
     @SuppressWarnings({"unchecked", "rawtypes"})
     Iterable<PCollectionView<?>> typedViews = (Iterable) views;
 
@@ -267,20 +263,11 @@ public void testMultipleSideInputs() throws Exception {
         new SideInputFn(views), WindowingStrategy.of(WINDOW_FN),
         typedViews, StringUtf8Coder.of());
 
-    return new StreamingSideInputDoFnRunner<String, String, List<WindowedValue<?>>, IntervalWindow>(
+    return new StreamingSideInputDoFnRunner<String, String, ReceiverT, IntervalWindow>(
         PipelineOptionsFactory.create(),
         doFnInfo,
         mockSideInputReader,
-        new DoFnRunner.OutputManager<List<WindowedValue<?>>>() {
-          @Override
-          public List<WindowedValue<?>> initialize(TupleTag<?> tag) {
-            return new ArrayList<>();
-          }
-          @Override
-          public void output(List<WindowedValue<?>> list, WindowedValue<?> output) {
-            list.add(output);
-          }
-        },
+        outputManager,
         mainOutputTag,
         Arrays.<TupleTag<?>>asList(),
         stepContext,

From ccd4e0a53f1001782586682e8b7a3e929d11119a Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 27 Jul 2015 08:14:06 -0700
Subject: [PATCH 0815/1541] Use random seed for DirectPipelineRunner unless
 specified

Previously, the DirectPipelineRunner always used the same seed for
pseudorandom operations designed to catch errors in tests that depend
on accidental properties not guaranteed by the Dataflow service.

Now, the seed used is random unless an explicit seed is set
in DirectPipelineOptions.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99180111
---
 .../sdk/options/DirectPipelineOptions.java    | 12 ++++
 .../sdk/runners/DirectPipelineRunner.java     | 72 ++++++++++++-------
 .../sdk/runners/DirectPipelineRunnerTest.java |  8 +++
 .../sdk/testing/DataflowAssertTest.java       | 10 ++-
 4 files changed, 74 insertions(+), 28 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java
index a974fbbc0216b..04f165a8b4da2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java
@@ -17,6 +17,9 @@
 package com.google.cloud.dataflow.sdk.options;
 
 import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
 
 /**
  * Options that can be used to configure the {@link DirectPipeline}.
@@ -25,4 +28,13 @@ public interface DirectPipelineOptions extends
     ApplicationNameOptions, BigQueryOptions, GcsOptions, GcpOptions,
     PipelineOptions, StreamingOptions {
 
+  /**
+   * The random seed to use for pseudorandom behaviors in the {@link DirectPipelineRunner}.
+   * If not explicitly specified, a random seed will be generated.
+   */
+  @JsonIgnore
+  @Description("The random seed to use for pseudorandom behaviors in the DirectPipelineRunner."
+      + " If not explicitly specified, a random seed will be generated.")
+  Long getDirectPipelineRunnerRandomSeed();
+  void setDirectPipelineRunnerRandomSeed(Long value);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 7652acd19a110..b919a7d5b5bf4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -79,6 +79,11 @@ public class DirectPipelineRunner
     extends PipelineRunner<DirectPipelineRunner.EvaluationResults> {
   private static final Logger LOG = LoggerFactory.getLogger(DirectPipelineRunner.class);
 
+  /**
+   * A source of random data, which can be seeded if determinism is desired.
+   */
+  private Random rand;
+
   /**
    * A map from PTransform class to the corresponding
    * TransformEvaluator to use to evaluate that transform.
@@ -225,7 +230,7 @@ private <K, InputT, AccumT, OutputT> PCollection<KV<K, OutputT>> applyTestCombin
       PCollection<KV<K, Iterable<InputT>>> input) {
 
     PCollection<KV<K, OutputT>> output = input
-        .apply(ParDo.of(TestCombineDoFn.create(transform, input, testSerializability)));
+        .apply(ParDo.of(TestCombineDoFn.create(transform, input, testSerializability, rand)));
 
     try {
       output.setCoder(transform.getDefaultOutputCoder(input));
@@ -236,30 +241,27 @@ private <K, InputT, AccumT, OutputT> PCollection<KV<K, OutputT>> applyTestCombin
   }
 
   /**
-   * The implementation may split the {@link KeyedCombineFn} into ADD, MERGE
-   * and EXTRACT phases (see {@code com.google.cloud.dataflow.sdk.runners.worker.CombineValuesFn}).
-   * In order to emulate
-   * this for the {@link DirectPipelineRunner} and provide an experience
-   * closer to the service, go through heavy seralizability checks for
-   * the equivalent of the results of the ADD phase, but after the
-   * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}
-   * shuffle, and the MERGE phase. Doing these checks
-   * ensure that not only is the accumulator coder serializable, but
-   * the accumulator coder can actually serialize the data in
-   * question.
+   * The implementation may split the {@link KeyedCombineFn} into ADD, MERGE and EXTRACT phases (
+   * see {@code com.google.cloud.dataflow.sdk.runners.worker.CombineValuesFn}). In order to emulate
+   * this for the {@link DirectPipelineRunner} and provide an experience closer to the service, go
+   * through heavy serializability checks for the equivalent of the results of the ADD phase, but
+   * after the {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} shuffle, and the MERGE
+   * phase. Doing these checks ensure that not only is the accumulator coder serializable, but
+   * the accumulator coder can actually serialize the data in question.
    */
-  // @VisibleForTesting
-  @SuppressWarnings("serial")
   public static class TestCombineDoFn<K, InputT, AccumT, OutputT>
       extends DoFn<KV<K, Iterable<InputT>>, KV<K, OutputT>> {
+    private static final long serialVersionUID = 0L;
     private final KeyedCombineFn<? super K, ? super InputT, AccumT, OutputT> fn;
     private final Coder<AccumT> accumCoder;
     private final boolean testSerializability;
+    private final Random rand;
 
     public static <K, InputT, AccumT, OutputT> TestCombineDoFn<K, InputT, AccumT, OutputT> create(
         Combine.GroupedValues<K, InputT, OutputT> transform,
         PCollection<KV<K, Iterable<InputT>>> input,
-        boolean testSerializability) {
+        boolean testSerializability,
+        Random rand) {
 
       Coder<AccumT> accumCoder;
       try {
@@ -273,16 +275,19 @@ public static <K, InputT, AccumT, OutputT> TestCombineDoFn<K, InputT, AccumT, Ou
       return new TestCombineDoFn(
           transform.getFn(),
           accumCoder,
-          testSerializability);
+          testSerializability,
+          rand);
     }
 
     public TestCombineDoFn(
         KeyedCombineFn<? super K, ? super InputT, AccumT, OutputT> fn,
         Coder<AccumT> accumCoder,
-        boolean testSerializability) {
+        boolean testSerializability,
+        Random rand) {
       this.fn = fn;
       this.accumCoder = accumCoder;
       this.testSerializability = testSerializability;
+      this.rand = rand;
     }
 
     @Override
@@ -291,7 +296,7 @@ public void processElement(ProcessContext c) throws Exception {
       Iterable<InputT> values = c.element().getValue();
       List<AccumT> groupedPostShuffle =
           ensureSerializableByCoder(ListCoder.of(accumCoder),
-              addInputsRandomly(fn, key, values, new Random()),
+              addInputsRandomly(fn, key, values, rand),
               "After addInputs of KeyedCombineFn " + fn.toString());
       AccumT merged =
           ensureSerializableByCoder(accumCoder,
@@ -302,8 +307,11 @@ public void processElement(ProcessContext c) throws Exception {
       c.output(KV.of(key, fn.extractOutput(key, merged)));
     }
 
-    // Create a random list of accumulators from the given list of values
-    // @VisibleForTesting
+    /**
+     * Create a random list of accumulators from the given list of values.
+     *
+     * <p>Visible for testing purposes only.
+     */
     public static <K, AccumT, InputT> List<AccumT> addInputsRandomly(
         KeyedCombineFn<? super K, ? super InputT, AccumT, ?> fn,
         K key,
@@ -352,7 +360,7 @@ public <T> T ensureSerializableByCoder(
   public EvaluationResults run(Pipeline pipeline) {
     LOG.info("Executing pipeline using the DirectPipelineRunner.");
 
-    Evaluator evaluator = new Evaluator();
+    Evaluator evaluator = new Evaluator(rand);
     evaluator.run(pipeline);
 
     // Log all counter values for debugging purposes.
@@ -630,12 +638,15 @@ class Evaluator implements PipelineVisitor, EvaluationContext {
      */
     private final Map<PTransform<?, ?>, String> fullNames = new HashMap<>();
 
-    // Use a random number generator with a fixed seed, so execution
-    // using this evaluator is deterministic.  (If the user-defined
-    // functions, transforms, and coders are deterministic.)
-    Random rand = new Random(0);
+    private Random rand;
 
-    public Evaluator() {}
+    public Evaluator() {
+      this(new Random());
+    }
+
+    public Evaluator(Random rand) {
+      this.rand = rand;
+    }
 
     public void run(Pipeline pipeline) {
       pipeline.traverseTopologically(this);
@@ -953,6 +964,15 @@ private DirectPipelineRunner(DirectPipelineOptions options) {
     this.options = options;
     // (Re-)register standard IO factories. Clobbers any prior credentials.
     IOChannelUtils.registerStandardIOFactories(options);
+    long randomSeed;
+    if (options.getDirectPipelineRunnerRandomSeed() != null) {
+      randomSeed = options.getDirectPipelineRunnerRandomSeed();
+    } else {
+      randomSeed = new Random().nextLong();
+    }
+
+    LOG.info("DirectPipelineRunner using random seed {}.", randomSeed);
+    rand = new Random(randomSeed);
   }
 
   public DirectPipelineOptions getPipelineOptions() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java
index 2b180d3c66eac..aedfc3843f1c0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java
@@ -17,9 +17,11 @@
 package com.google.cloud.dataflow.sdk.runners;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.Create;
@@ -91,4 +93,10 @@ public void processElement(ProcessContext context) {
 
     pipeline.run();
   }
+
+  @Test
+  public void testDirectPipelineOptions() {
+    DirectPipelineOptions options = PipelineOptionsFactory.create().as(DirectPipelineOptions.class);
+    assertNull(options.getDirectPipelineRunnerRandomSeed());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
index 8868176b766e3..0d240d9d06bca 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
@@ -18,6 +18,7 @@
 
 import static org.hamcrest.core.StringContains.containsString;
 import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
@@ -42,6 +43,7 @@
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.Serializable;
+import java.util.regex.Pattern;
 
 /**
  * Test case for {@link DataflowAssert}.
@@ -198,8 +200,12 @@ public void testContainsInAnyOrderFalse() throws Exception {
       try {
         pipeline.run();
       } catch (AssertionError exc) {
-        assertThat(exc.getMessage(),
-            containsString("Expected: iterable over [<4>, <7>, <3>, <2>, <1>] in any order"));
+        // A loose pattern, but should get the job done.
+        Pattern expectedPattern = Pattern.compile(
+            "Expected: iterable over \\[((<4>|<7>|<3>|<2>|<1>)(, )?){5}\\] in any order");
+        assertTrue("Expected error message from DataflowAssert with substring matching "
+            + expectedPattern + " but the message was \"" + exc.getMessage() + "\"",
+            expectedPattern.matcher(exc.getMessage()).find());
         return;
       }
     } else if (pipeline.getRunner() instanceof TestDataflowPipelineRunner) {

From 39ba98739a00e8493dff0aa2290d322a031b6d09 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 27 Jul 2015 10:43:39 -0700
Subject: [PATCH 0816/1541] Move GroupByKey validation into validate method

----Release Notes----
Re-enabled verification of GroupByKey usage. Specififically, the key
must have a deterministic coder and using GroupByKey with an Unbounded
PCollection requires windowing or triggers.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99194010
---
 .../sdk/runners/DataflowPipelineRunner.java   |   9 +-
 .../dataflow/sdk/transforms/GroupByKey.java   |  93 +++++++-------
 .../dataflow/sdk/values/PCollection.java      |   5 +-
 .../sdk/transforms/GroupByKeyTest.java        | 116 +++++++++++++++---
 4 files changed, 161 insertions(+), 62 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index b9d24c26502ec..65065f1b31e27 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -73,6 +73,7 @@
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.cloud.dataflow.sdk.values.POutput;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
 import com.google.common.collect.ImmutableMap;
@@ -188,7 +189,7 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
     return new DataflowPipelineRunner(dataflowOptions);
   }
 
-  private DataflowPipelineRunner(DataflowPipelineOptions options) {
+  @VisibleForTesting protected DataflowPipelineRunner(DataflowPipelineOptions options) {
     this.options = options;
     this.dataflowClient = options.getDataflowClient();
     this.translator = DataflowPipelineTranslator.fromOptions(options);
@@ -224,7 +225,9 @@ public <OutputT extends POutput, InputT extends PInput> OutputT apply(
       @SuppressWarnings("unchecked")
       OutputT outputT = (OutputT) PCollection.createPrimitiveOutputInternal(
           pc.getPipeline(),
-          pc.getWindowingStrategy(),
+          transform instanceof GroupByKey
+              ? ((GroupByKey<?, ?>) transform).updateWindowingStrategy(pc.getWindowingStrategy())
+              : pc.getWindowingStrategy(),
           pc.isBounded());
       return outputT;
 
@@ -662,7 +665,7 @@ public PCollection<T> apply(PInput input) {
             .setWindowingStrategyInternal(WindowingStrategy.globalDefault())
             .apply(Window.<KV<Void, Iterable<Void>>>into(new GlobalWindows()))
             .apply(ParDo.of(new OutputElements<>(transform.getElements(), coder)))
-            .setCoder(coder);
+            .setCoder(coder).setIsBoundedInternal(IsBounded.BOUNDED);
       } catch (CannotProvideCoderException e) {
         throw new IllegalArgumentException("Unable to infer a coder and no Coder was specified. "
             + "Please set a coder by invoking Create.withCoder() explicitly.", e);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index df55c74deace3..b76089629d1ac 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -176,18 +176,60 @@ public boolean fewKeys() {
   /////////////////////////////////////////////////////////////////////////////
 
   @Override
-  public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
-    // This operation groups by the combination of key and window,
-    // merging windows as needed, using the windows assigned to the
-    // key/value input elements and the window merge operation of the
-    // window function associated with the input PCollection.
+  public void validate(PCollection<KV<K, V>> input) {
     WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
+    // Verify that the input PCollection is bounded, or that there is windowing/triggering being
+    // used. Without this, the watermark (at end of global window) will never be reached.
+    if (windowingStrategy.getWindowFn() instanceof GlobalWindows
+        && windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger
+        && input.isBounded() != IsBounded.BOUNDED) {
+      throw new IllegalStateException("GroupByKey cannot be applied to non-bounded PCollection in "
+          + "the GlobalWindow without a trigger. Use a Window.into or Window.triggering transform "
+          + "prior to GroupByKey.");
+    }
+
+    // Verify that the input Coder<KV<K, V>> is a KvCoder<K, V>, and that
+    // the key coder is deterministic.
+    Coder<K> keyCoder = getKeyCoder(input.getCoder());
+    try {
+      keyCoder.verifyDeterministic();
+    } catch (NonDeterministicException e) {
+      throw new IllegalStateException(
+          "the keyCoder of a GroupByKey must be deterministic", e);
+    }
+
+    // Validate the window merge function.
     if (windowingStrategy.getWindowFn() instanceof InvalidWindows) {
       String cause = ((InvalidWindows<?>) windowingStrategy.getWindowFn()).getCause();
       throw new IllegalStateException(
           "GroupByKey must have a valid Window merge function.  "
           + "Invalid because: " + cause);
     }
+  }
+
+  public WindowingStrategy<?, ?> updateWindowingStrategy(WindowingStrategy<?, ?> inputStrategy) {
+    WindowFn<?, ?> inputWindowFn = inputStrategy.getWindowFn();
+    if (!inputWindowFn.isNonMerging()) {
+      // Prevent merging windows again, without explicit user
+      // involvement, e.g., by Window.into() or Window.remerge().
+      inputWindowFn = new InvalidWindows<>(
+          "WindowFn has already been consumed by previous GroupByKey", inputWindowFn);
+    }
+
+    // We also switch to the continuation trigger associated with the current trigger.
+    return inputStrategy
+        .withWindowFn(inputWindowFn)
+        .withTrigger(inputStrategy.getTrigger().getSpec().getContinuationTrigger());
+  }
+
+  @Override
+  public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
+    // This operation groups by the combination of key and window,
+    // merging windows as needed, using the windows assigned to the
+    // key/value input elements and the window merge operation of the
+    // window function associated with the input PCollection.
+    WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
+
     // By default, implement GroupByKey[AndWindow] via a series of lower-level
     // operations.
     return input
@@ -206,7 +248,10 @@ public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
         .apply(new SortValuesByTimestamp<K, V>())
 
         // Group each key's values by window, merging windows as needed.
-        .apply(new GroupAlsoByWindow<K, V>(windowingStrategy));
+        .apply(new GroupAlsoByWindow<K, V>(windowingStrategy))
+
+        // And update the windowing strategy as appropriate.
+        .setWindowingStrategyInternal(updateWindowingStrategy(windowingStrategy));
   }
 
   @Override
@@ -383,46 +428,12 @@ public PCollection<KV<K, Iterable<V>>> apply(
   public static class GroupByKeyOnly<K, V>
       extends PTransform<PCollection<KV<K, V>>,
                          PCollection<KV<K, Iterable<V>>>> {
-    @Override
-    public void validate(PCollection<KV<K, V>> input) {
-      WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
-      if (windowingStrategy.getWindowFn() instanceof GlobalWindows
-          && windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger
-          && input.isBounded() != IsBounded.BOUNDED) {
-        throw new IllegalStateException("Non-bounded PCollection cannot be "
-            + "processed with GlobalWindow and DefaultTrigger for GroupByKey."
-            + "Use Window.into transform prior to GroupByKey.");
-      }
-      // Verify that the input Coder<KV<K, V>> is a KvCoder<K, V>, and that
-      // the key coder is deterministic.
-      Coder<K> keyCoder = getKeyCoder(input.getCoder());
-      try {
-        keyCoder.verifyDeterministic();
-      } catch (NonDeterministicException e) {
-        throw new IllegalStateException(
-            "the keyCoder of a GroupByKey must be deterministic", e);
-      }
-    }
 
     @SuppressWarnings({"rawtypes", "unchecked"})
     @Override
     public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
-      WindowingStrategy<?, ?> oldWindowingStrategy = input.getWindowingStrategy();
-      WindowFn<?, ?> newWindowFn = oldWindowingStrategy.getWindowFn();
-      if (!newWindowFn.isNonMerging()) {
-        // Prevent merging windows again, without explicit user
-        // involvement, e.g., by Window.into() or Window.remerge().
-        newWindowFn = new InvalidWindows(
-            "WindowFn has already been consumed by previous GroupByKey", newWindowFn);
-      }
-
-      // We also switch to the continuation trigger associated with the current trigger.
-      WindowingStrategy<?, ?> newWindowingStrategy = oldWindowingStrategy
-          .withWindowFn(newWindowFn)
-          .withTrigger(oldWindowingStrategy.getTrigger().getSpec().getContinuationTrigger());
-
       return PCollection.<KV<K, Iterable<V>>>createPrimitiveOutputInternal(
-          input.getPipeline(), newWindowingStrategy, input.isBounded());
+          input.getPipeline(), input.getWindowingStrategy(), input.isBounded());
     }
 
     /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
index 07a60da9006f8..3c1c1e4215b46 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
@@ -181,7 +181,6 @@ public IsBounded isBounded() {
     return isBounded;
   }
 
-
   /////////////////////////////////////////////////////////////////////////////
   // Internal details below here.
 
@@ -223,8 +222,10 @@ public PCollection<T> setWindowingStrategyInternal(WindowingStrategy<?, ?> windo
 
   /**
    * Sets the {@link PCollection.IsBounded} of this {@code PCollection}.
+   *
+   * <p> For use by internal transformations only.
    */
-  private PCollection<T> setIsBoundedInternal(IsBounded isBounded) {
+  public PCollection<T> setIsBoundedInternal(IsBounded isBounded) {
     this.isBounded = isBounded;
     return this;
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
index 6033033376a1b..18c1e4236b5b3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
@@ -26,6 +26,11 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.MapCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
@@ -33,7 +38,10 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.util.NoopPathValidator;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
 import org.joda.time.Duration;
@@ -57,7 +65,7 @@
 public class GroupByKeyTest {
 
   @Rule
-  public ExpectedException expectedEx = ExpectedException.none();
+  public ExpectedException thrown = ExpectedException.none();
 
   @Test
   @Category(RunnableOnService.class)
@@ -167,8 +175,6 @@ public void testGroupByKeyEmpty() {
 
   @Test
   public void testGroupByKeyNonDeterministic() throws Exception {
-    expectedEx.expect(IllegalStateException.class);
-    expectedEx.expectMessage("must be deterministic");
 
     List<KV<Map<String, String>, Integer>> ungroupedPairs = Arrays.asList();
 
@@ -180,9 +186,9 @@ public void testGroupByKeyNonDeterministic() throws Exception {
                 KvCoder.of(MapCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()),
                     BigEndianIntegerCoder.of())));
 
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("must be deterministic");
     input.apply(GroupByKey.<Map<String, String>, Integer>create());
-
-    p.run();
   }
 
   @Test
@@ -230,9 +236,30 @@ public void testWindowFnInvalidation() {
                     Duration.standardMinutes(1)))));
   }
 
+  /**
+   * Create a test pipeline that uses the {@link DataflowPipelineRunner} so that {@link GroupByKey}
+   * is not expanded. This is used for verifying that even without expansion the proper errors show
+   * up.
+   */
+  private Pipeline createTestServiceRunner() {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setRunner(DataflowPipelineRunner.class);
+    options.setProject("someproject");
+    options.setStagingLocation("gs://staging");
+    options.setPathValidatorClass(NoopPathValidator.class);
+    options.setDataflowClient(null);
+    return Pipeline.create(options);
+  }
+
+  private Pipeline createTestDirectRunner() {
+    DirectPipelineOptions options = PipelineOptionsFactory.as(DirectPipelineOptions.class);
+    options.setRunner(DirectPipelineRunner.class);
+    return Pipeline.create(options);
+  }
+
   @Test
-  public void testInvalidWindows() {
-    Pipeline p = TestPipeline.create();
+  public void testInvalidWindowsDirect() {
+    Pipeline p = createTestDirectRunner();
 
     List<KV<String, Integer>> ungroupedPairs = Arrays.asList();
 
@@ -242,15 +269,30 @@ public void testInvalidWindows() {
         .apply(Window.<KV<String, Integer>>into(
             Sessions.withGapDuration(Duration.standardMinutes(1))));
 
-    try {
-      input
-          .apply("GroupByKey", GroupByKey.<String, Integer>create())
-          .apply("GroupByKeyAgain", GroupByKey.<String, Iterable<Integer>>create());
-      Assert.fail("Exception should have been thrown");
-    } catch (IllegalStateException e) {
-      Assert.assertTrue(e.getMessage().startsWith(
-          "GroupByKey must have a valid Window merge function."));
-    }
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("GroupByKey must have a valid Window merge function");
+    input
+        .apply("GroupByKey", GroupByKey.<String, Integer>create())
+        .apply("GroupByKeyAgain", GroupByKey.<String, Iterable<Integer>>create());
+  }
+
+  @Test
+  public void testInvalidWindowsService() {
+    Pipeline p = createTestServiceRunner();
+
+    List<KV<String, Integer>> ungroupedPairs = Arrays.asList();
+
+    PCollection<KV<String, Integer>> input =
+        p.apply(Create.of(ungroupedPairs)
+            .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())))
+        .apply(Window.<KV<String, Integer>>into(
+            Sessions.withGapDuration(Duration.standardMinutes(1))));
+
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("GroupByKey must have a valid Window merge function");
+    input
+        .apply("GroupByKey", GroupByKey.<String, Integer>create())
+        .apply("GroupByKeyAgain", GroupByKey.<String, Iterable<Integer>>create());
   }
 
   @Test
@@ -278,6 +320,48 @@ public void testRemerge() {
             Sessions.withGapDuration(Duration.standardMinutes(1))));
   }
 
+  @Test
+  public void testGroupByKeyDirectUnbounded() {
+    Pipeline p = createTestDirectRunner();
+
+    PCollection<KV<String, Integer>> input = p
+        .apply(new PTransform<PBegin, PCollection<KV<String, Integer>>>() {
+          @Override
+          public PCollection<KV<String, Integer>> apply(PBegin input) {
+            return PCollection.createPrimitiveOutputInternal(input.getPipeline(),
+                WindowingStrategy.globalDefault(), PCollection.IsBounded.UNBOUNDED);
+          }
+        });
+
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage(
+        "GroupByKey cannot be applied to non-bounded PCollection in the GlobalWindow without "
+        + "a trigger. Use a Window.into or Window.triggering transform prior to GroupByKey.");
+
+    input.apply("GroupByKey", GroupByKey.<String, Integer>create());
+  }
+
+  @Test
+  public void testGroupByKeyServiceUnbounded() {
+    Pipeline p = createTestServiceRunner();
+
+    PCollection<KV<String, Integer>> input = p
+        .apply(new PTransform<PBegin, PCollection<KV<String, Integer>>>() {
+          @Override
+          public PCollection<KV<String, Integer>> apply(PBegin input) {
+            return PCollection.createPrimitiveOutputInternal(input.getPipeline(),
+                WindowingStrategy.globalDefault(), PCollection.IsBounded.UNBOUNDED);
+          }
+        });
+
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage(
+        "GroupByKey cannot be applied to non-bounded PCollection in the GlobalWindow without "
+        + "a trigger. Use a Window.into or Window.triggering transform prior to GroupByKey.");
+
+    input.apply("GroupByKey", GroupByKey.<String, Integer>create());
+  }
+
   @Test
   public void testGroupByKeyGetName() {
     Assert.assertEquals("GroupByKey", GroupByKey.<String, Integer>create().getName());

From 5817c42e114c6ae32971e4bc565d65f236d16027 Mon Sep 17 00:00:00 2001
From: Prabeesh K <prabeesh.k@namshi.com>
Date: Mon, 3 Aug 2015 17:47:23 +0400
Subject: [PATCH 0817/1541] #45 Fix broken links

---
 examples/README.md | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/examples/README.md b/examples/README.md
index 6a8fc3e11ed16..2303c2c82809b 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -11,17 +11,17 @@ times each word occurs in the input.
 Besides `WordCount`, the following examples are included:
 
  <ul>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/AutoComplete.java">AutoComplete</a>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java">AutoComplete</a>
   &mdash; An example that computes the most popular hash tags for every
   prefix, which can be used for auto-completion. Demonstrates how to use the
   same pipeline in both streaming and batch, combiners, and composite
   transforms.</li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/BigQueryTornadoes.java">BigQueryTornadoes</a>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java">BigQueryTornadoes</a>
   &mdash; An example that reads the public samples of weather data from Google
   BigQuery, counts the number of tornadoes that occur in each month, and
   writes the results to BigQuery. Demonstrates reading/writing BigQuery,
   counting a <code>PCollection</code>, and user-defined <code>PTransforms</code>.</li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/CombinePerKeyExamples.java">CombinePerKeyExamples</a>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java">CombinePerKeyExamples</a>
   &mdash; An example that reads the public &quot;Shakespeare&quot; data, and for
   each word in the dataset that exceeds a given length, generates a string
   containing the list of play names in which that word appears. Output is saved
@@ -30,15 +30,15 @@ Besides `WordCount`, the following examples are included:
   <code>PCollection</code>; also how to use an <code>Aggregator</code> to track
   information in the Google Developers Console.
   </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/DatastoreWordCount.java">DatastoreWordCount</a>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java">DatastoreWordCount</a>
   &mdash; An example that shows you how to read from Google Cloud Datastore.</li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/DeDupExample.java">DeDupExample</a>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java">DeDupExample</a>
   &mdash; An example that uses Shakespeare's plays as plain text files, and
   removes duplicate lines across all the files. Demonstrates the
   <code>RemoveDuplicates</code>, <code>TextIO.Read</code>,
   and <code>TextIO.Write</code> transforms, and how to wire transforms together.
   </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/FilterExamples.java">FilterExamples</a>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java">FilterExamples</a>
   &mdash; An example that shows different approaches to filtering, including
   selection and projection. It also shows how to dynamically set parameters
   by defining and using new pipeline options, and use how to use a value derived
@@ -46,14 +46,14 @@ Besides `WordCount`, the following examples are included:
   <code>Options</code> configuration, and using pipeline-derived data as a side
   input.
   </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/JoinExamples.java">JoinExamples</a>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java">JoinExamples</a>
   &mdash; An example that shows how to join two collections. It uses a
   sample of the <a href="http://goo.gl/OB6oin">GDELT &quot;world event&quot;
   data</a>, joining the event <code>action</code> country code against a table
   that maps country codes to country names. Demonstrates the <code>Join</code>
   operation, and using multiple input sources.
   </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/MaxPerKeyExamples.java">MaxPerKeyExamples</a>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java">MaxPerKeyExamples</a>
   &mdash; An example that reads the public samples of weather data from BigQuery,
   and finds the maximum temperature (<code>mean_temp</code>) for each month.
   Demonstrates the <code>Max</code> statistical combination transform, and how to
@@ -64,35 +64,35 @@ Besides `WordCount`, the following examples are included:
   files into a Google Cloud Pub/Sub topic, line by line. This example can be
   useful for testing streaming pipelines.
   </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/StreamingWordExtract.java">StreamingWordExtract</a>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java">StreamingWordExtract</a>
   &mdash; A streaming pipeline example that inputs lines of text from a Cloud
   Pub/Sub topic, splits each line into individual words, capitalizes those
   words, and writes the output to a BigQuery table.
   </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TfIdf.java">TfIdf</a>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java">TfIdf</a>
   &mdash; An example that computes a basic TF-IDF search table for a directory or
   Cloud Storage prefix. Demonstrates joining data, side inputs, and logging.
   </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TopWikipediaSessions.java">TopWikipediaSessions</a>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java">TopWikipediaSessions</a>
   &mdash; An example that reads Wikipedia edit data from Cloud Storage and
   computes the user with the longest string of edits separated by no more than
   an hour within each month. Demonstrates using Cloud Dataflow
   <code>Windowing</code> to perform time-based aggregations of data.
   </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java">TrafficMaxLaneFlow</a>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java">TrafficMaxLaneFlow</a>
   &mdash; A streaming Cloud Dataflow example using BigQuery output in the
   <code>traffic sensor</code> domain. Demonstrates the Cloud Dataflow streaming
   runner, sliding windows, Cloud Pub/Sub topic ingestion, the use of the
   <code>AvroCoder</code> to encode a custom class, and custom
   <code>Combine</code> transforms.
   </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java">TrafficRoutes</a>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java">TrafficRoutes</a>
   &mdash; A streaming Cloud Dataflow example using BigQuery output in the
   <code>traffic sensor</code> domain. Demonstrates the Cloud Dataflow streaming
   runner, <code>GroupByKey</code>, keyed state, sliding windows, and Cloud
   Pub/Sub topic ingestion.
   </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/WindowingWordCount.java">WindowingWordCount</a>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java">WindowedWordCount</a>
   &mdash; An example that applies windowing to &quot;Shakespeare&quot; data in a
   `WordCount` pipeline.
   </li>
@@ -195,5 +195,5 @@ to a Pub/Sub topic, is provided in [`traffic_pubsub_generator.py`](https://githu
 **Note:** If you set `--streaming=false`, these traffic pipelines will run in batch mode,
 using the timestamps applied to the original dataset to process the data in
 a batch. For further information on how these pipelines operate, see
-<a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficMaxLaneFlow.java">TrafficMaxLaneFlow</a>
-and <a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/TrafficRoutes.java">TrafficRoutes</a>.
+<a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java">TrafficMaxLaneFlow</a>
+and <a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java">TrafficRoutes</a>.

From 676e61b4876b905089fdfef7ee49505b8bdd25d2 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 27 Jul 2015 14:48:14 -0700
Subject: [PATCH 0818/1541] Improve GCS bucket access/missing error messaging

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99220350
---
 .../sdk/runners/DataflowPipelineRunner.java   |   4 +-
 .../sdk/util/DataflowPathValidator.java       |  30 ++++-
 .../cloud/dataflow/sdk/util/GcsUtil.java      |  46 +++++++
 .../dataflow/sdk/util/PathValidator.java      |   3 +-
 .../BlockingDataflowPipelineRunnerTest.java   |   2 +
 .../runners/DataflowPipelineRunnerTest.java   |  51 +++++++-
 .../sdk/runners/DataflowPipelineTest.java     |   2 +
 .../DataflowPipelineTranslatorTest.java       |   8 ++
 .../TestDataflowPipelineRunnerTest.java       |   4 +
 .../sdk/util/DataflowPathValidatorTest.java   |  23 +++-
 .../cloud/dataflow/sdk/util/GcsUtilTest.java  | 121 ++++++++++++++++++
 11 files changed, 282 insertions(+), 12 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 65065f1b31e27..5771d73e41551 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -151,10 +151,10 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
 
     PathValidator validator = dataflowOptions.getPathValidator();
     if (dataflowOptions.getStagingLocation() != null) {
-      validator.verifyPath(dataflowOptions.getStagingLocation());
+      validator.validateOutputFilePrefixSupported(dataflowOptions.getStagingLocation());
     }
     if (dataflowOptions.getTempLocation() != null) {
-      validator.verifyPath(dataflowOptions.getTempLocation());
+      validator.validateOutputFilePrefixSupported(dataflowOptions.getTempLocation());
     }
     if (Strings.isNullOrEmpty(dataflowOptions.getTempLocation())) {
       dataflowOptions.setTempLocation(dataflowOptions.getStagingLocation());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
index c913d570c32df..817fdc53791c4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
@@ -21,6 +21,8 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 
+import java.io.IOException;
+
 /**
  * GCP implementation of {@link PathValidator}. Only GCS paths are allowed.
  */
@@ -36,17 +38,29 @@ public static DataflowPathValidator fromOptions(PipelineOptions options) {
     return new DataflowPathValidator(options.as(DataflowPipelineOptions.class));
   }
 
+  /**
+   * Validates the the input GCS path is accessible and that the path
+   * is well formed.
+   */
   @Override
   public String validateInputFilePatternSupported(String filepattern) {
     GcsPath gcsPath = getGcsPath(filepattern);
     Preconditions.checkArgument(
         dataflowOptions.getGcsUtil().isGcsPatternSupported(gcsPath.getObject()));
-    return verifyPath(filepattern);
+    String returnValue = verifyPath(filepattern);
+    verifyPathIsAccessible(filepattern, "Could not find file %s");
+    return returnValue;
   }
 
+  /**
+   * Validates the the output GCS path is accessible and that the path
+   * is well formed.
+   */
   @Override
   public String validateOutputFilePrefixSupported(String filePrefix) {
-    return verifyPath(filePrefix);
+    String returnValue = verifyPath(filePrefix);
+    verifyPathIsAccessible(filePrefix, "Output path does not exist or is not writeable: %s");
+    return returnValue;
   }
 
   @Override
@@ -59,6 +73,18 @@ public String verifyPath(String path) {
     return gcsPath.toResourceName();
   }
 
+  private void verifyPathIsAccessible(String path, String errorMessage) {
+    GcsPath gcsPath = getGcsPath(path);
+    try {
+      Preconditions.checkArgument(dataflowOptions.getGcsUtil().bucketExists(gcsPath),
+        errorMessage, path);
+    } catch (IOException e) {
+      throw new RuntimeException(
+          String.format("Unable to verify that GCS bucket gs://%s exists.", gcsPath.getBucket()),
+          e);
+    }
+  }
+
   private GcsPath getGcsPath(String path) {
     try {
       return GcsPath.fromUri(path);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
index 06093a431214c..067ace959adfa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.api.client.googleapis.json.GoogleJsonResponseException;
 import com.google.api.client.util.BackOff;
 import com.google.api.client.util.Preconditions;
 import com.google.api.client.util.Sleeper;
@@ -271,6 +272,51 @@ public WritableByteChannel create(GcsPath path,
     return channel;
   }
 
+  /**
+   * Returns whether the GCS bucket exists. If the bucket exists, it must
+   * be accessible otherwise the permissions exception will be propagated.
+   */
+  public boolean bucketExists(GcsPath path) throws IOException {
+    return bucketExists(path, new AttemptBoundedExponentialBackOff(4, 200), Sleeper.DEFAULT);
+  }
+
+  /**
+   * Returns whether the GCS bucket exists. This will return false if the bucket
+   * is inaccessible due to permissions.
+   */
+  @VisibleForTesting
+  boolean bucketExists(GcsPath path, BackOff backoff, Sleeper sleeper) throws IOException {
+    Storage.Buckets.Get getBucket =
+        storage.buckets().get(path.getBucket());
+
+      try {
+        ResilientOperation.retry(
+            ResilientOperation.getGoogleRequestCallable(getBucket),
+            backoff,
+            new RetryDeterminer<IOException>() {
+              @Override
+              public boolean shouldRetry(IOException e) {
+                if (errorExtractor.itemNotFound(e) || errorExtractor.accessDenied(e)) {
+                  return false;
+                }
+                return RetryDeterminer.SOCKET_ERRORS.shouldRetry(e);
+              }
+            },
+            IOException.class,
+            sleeper);
+        return true;
+      } catch (GoogleJsonResponseException e) {
+        if (errorExtractor.itemNotFound(e) || errorExtractor.accessDenied(e)) {
+          return false;
+        }
+        throw e;
+      } catch (InterruptedException e) {
+        throw new IOException(
+            String.format("Error while attempting to verify existence of bucket gs://%s",
+                path.getBucket()), e);
+     }
+  }
+
   /**
    * Expands glob expressions to regular expressions.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PathValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PathValidator.java
index 20bddcba2b14a..658de2a78f9ec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PathValidator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PathValidator.java
@@ -37,7 +37,8 @@ public interface PathValidator {
   public String validateOutputFilePrefixSupported(String filePrefix);
 
   /**
-   * Validate that a path is conforming.
+   * Validate that a path is a valid path and that the path
+   * is accessible.
    *
    * @param path The path to verify.
    * @return The post-validation path.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
index f63f118eaa641..69ebb562a0787 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
@@ -30,6 +30,7 @@
 import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
 import com.google.cloud.dataflow.sdk.testing.TestDataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
+import com.google.cloud.dataflow.sdk.util.NoopPathValidator;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
 
 import org.hamcrest.Description;
@@ -292,6 +293,7 @@ public void testToString() {
     options.setProject("TestProject");
     options.setTempLocation("gs://test/temp/location");
     options.setGcpCredential(new TestCredential());
+    options.setPathValidatorClass(NoopPathValidator.class);
     assertEquals("BlockingDataflowPipelineRunner#TestJobName",
         BlockingDataflowPipelineRunner.fromOptions(options).toString());
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index af5390b332215..5fab1d386c0f6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -45,6 +45,7 @@
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.DataflowReleaseInfo;
 import com.google.cloud.dataflow.sdk.util.GcsUtil;
+import com.google.cloud.dataflow.sdk.util.NoopPathValidator;
 import com.google.cloud.dataflow.sdk.util.PackageUtil;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
@@ -133,7 +134,7 @@ private static Dataflow buildMockDataflow(
     return mockDataflowClient;
   }
 
-  private GcsUtil buildMockGcsUtil() throws IOException {
+  private GcsUtil buildMockGcsUtil(boolean bucketExists) throws IOException {
     GcsUtil mockGcsUtil = mock(GcsUtil.class);
     when(mockGcsUtil.create(
         any(GcsPath.class), anyString()))
@@ -141,6 +142,7 @@ private GcsUtil buildMockGcsUtil() throws IOException {
             Files.createTempFile("channel-", ".tmp"),
             StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE));
     when(mockGcsUtil.isGcsPatternSupported(anyString())).thenReturn(true);
+    when(mockGcsUtil.bucketExists(any(GcsPath.class))).thenReturn(bucketExists);
     return mockGcsUtil;
   }
 
@@ -152,7 +154,7 @@ private DataflowPipelineOptions buildPipelineOptions(
     // Set FILES_PROPERTY to empty to prevent a default value calculated from classpath.
     options.setFilesToStage(new LinkedList<String>());
     options.setDataflowClient(buildMockDataflow(jobCaptor));
-    options.setGcsUtil(buildMockGcsUtil());
+    options.setGcsUtil(buildMockGcsUtil(true /* bucket exists */));
     options.setGcpCredential(new TestCredential());
     return options;
   }
@@ -221,7 +223,7 @@ public void testUpdateNonExistentPipeline() throws IOException {
   public void testRunWithFiles() throws IOException {
     // Test that the function DataflowPipelineRunner.stageFiles works as
     // expected.
-    GcsUtil mockGcsUtil = buildMockGcsUtil();
+    GcsUtil mockGcsUtil = buildMockGcsUtil(true /* bucket exists */);
     final String gcsStaging = "gs://somebucket/some/path";
     final String gcsTemp = "gs://somebucket/some/temp/path";
     final String cloudDataflowDataset = "somedataset";
@@ -332,7 +334,7 @@ public void detectClassPathResourceWithNonFileResources() throws Exception {
   }
 
   @Test
-  public void testGcsStagingLocationInitialization() {
+  public void testGcsStagingLocationInitialization() throws Exception {
     // Test that the staging location is initialized correctly.
     String gcsTemp = "gs://somebucket/some/temp/path";
 
@@ -341,6 +343,8 @@ public void testGcsStagingLocationInitialization() {
     options.setTempLocation(gcsTemp);
     options.setProject("testProject");
     options.setGcpCredential(new TestCredential());
+    options.setGcsUtil(buildMockGcsUtil(true /* bucket exists */));
+
     DataflowPipelineRunner.fromOptions(options);
 
     assertNotNull(options.getStagingLocation());
@@ -436,6 +440,38 @@ public void testInvalidStagingLocation() throws IOException {
     }
   }
 
+  @Test
+  public void testNonExistentTempLocation() throws IOException {
+    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+    GcsUtil mockGcsUtil = buildMockGcsUtil(false /* bucket exists */);
+    DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+    options.setGcsUtil(mockGcsUtil);
+    options.setTempLocation("gs://non-existent-bucket/location");
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage(containsString(
+        "Output path does not exist or is not writeable: gs://non-existent-bucket/location"));
+    DataflowPipelineRunner.fromOptions(options);
+    assertValidJob(jobCaptor.getValue());
+  }
+
+  @Test
+  public void testNonExistentStagingLocation() throws IOException {
+    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+    GcsUtil mockGcsUtil = buildMockGcsUtil(false /* bucket exists */);
+    DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+    options.setGcsUtil(mockGcsUtil);
+    options.setStagingLocation("gs://non-existent-bucket/location");
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage(containsString(
+        "Output path does not exist or is not writeable: gs://non-existent-bucket/location"));
+    DataflowPipelineRunner.fromOptions(options);
+    assertValidJob(jobCaptor.getValue());
+  }
+
   @Test
   public void testNoProjectFails() {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
@@ -465,23 +501,25 @@ public void testNoStagingLocationAndNoTempLocationFails() {
   }
 
   @Test
-  public void testStagingLocationAndNoTempLocationSucceeds() {
+  public void testStagingLocationAndNoTempLocationSucceeds() throws Exception {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setRunner(DataflowPipelineRunner.class);
     options.setGcpCredential(new TestCredential());
     options.setProject("foo");
     options.setStagingLocation("gs://spam/ham/eggs");
+    options.setGcsUtil(buildMockGcsUtil(true /* bucket exists */));
 
     DataflowPipelineRunner.fromOptions(options);
   }
 
   @Test
-  public void testTempLocationAndNoStagingLocationSucceeds() {
+  public void testTempLocationAndNoStagingLocationSucceeds() throws Exception {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setRunner(DataflowPipelineRunner.class);
     options.setGcpCredential(new TestCredential());
     options.setProject("foo");
     options.setTempLocation("gs://spam/ham/eggs");
+    options.setGcsUtil(buildMockGcsUtil(true /* bucket exists */));
 
     DataflowPipelineRunner.fromOptions(options);
   }
@@ -659,6 +697,7 @@ public void testToString() {
     options.setProject("TestProject");
     options.setTempLocation("gs://test/temp/location");
     options.setGcpCredential(new TestCredential());
+    options.setPathValidatorClass(NoopPathValidator.class);
     assertEquals("DataflowPipelineRunner#TestJobName",
         DataflowPipelineRunner.fromOptions(options).toString());
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTest.java
index 988dd15cfa1e4..71604fbdcd6a6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTest.java
@@ -20,6 +20,7 @@
 
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.util.NoopPathValidator;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
 
 import org.junit.Test;
@@ -36,6 +37,7 @@ public void testToString() {
     options.setProject("TestProject");
     options.setTempLocation("gs://test/temp/location");
     options.setGcpCredential(new TestCredential());
+    options.setPathValidatorClass(NoopPathValidator.class);
     assertEquals("DataflowPipeline#TestJobName",
         DataflowPipeline.create(options).toString());
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 746ef5c51e0f2..3f171bf8e81ff 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -21,6 +21,8 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyString;
 import static org.mockito.Matchers.argThat;
 import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.mock;
@@ -47,6 +49,7 @@
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.util.GcsUtil;
 import com.google.cloud.dataflow.sdk.util.OutputReference;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
@@ -131,6 +134,10 @@ private static Dataflow buildMockDataflow(
   }
 
   private static DataflowPipelineOptions buildPipelineOptions() throws IOException {
+    GcsUtil mockGcsUtil = mock(GcsUtil.class);
+    when(mockGcsUtil.bucketExists(any(GcsPath.class))).thenReturn(true);
+    when(mockGcsUtil.isGcsPatternSupported(anyString())).thenCallRealMethod();
+
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
     options.setJobName("some-job-name");
@@ -138,6 +145,7 @@ private static DataflowPipelineOptions buildPipelineOptions() throws IOException
     options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString());
     options.setFilesToStage(new LinkedList<String>());
     options.setDataflowClient(buildMockDataflow(new IsValidCreateRequest()));
+    options.setGcsUtil(mockGcsUtil);
     return options;
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunnerTest.java
index bf0c067894c28..278d1b364ec7f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunnerTest.java
@@ -37,6 +37,8 @@
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineJob;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.util.GcsUtil;
+import com.google.cloud.dataflow.sdk.util.NoopPathValidator;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -62,6 +64,7 @@ public class TestDataflowPipelineRunnerTest {
   @Rule public ExpectedException expectedException = ExpectedException.none();
   @Mock private MockHttpTransport transport;
   @Mock private MockLowLevelHttpRequest request;
+  @Mock private GcsUtil mockGcsUtil;
 
   private TestDataflowPipelineOptions options;
   private Dataflow service;
@@ -80,6 +83,7 @@ public void setUp() throws Exception {
     options.setGcpCredential(new TestCredential());
     options.setDataflowClient(service);
     options.setRunner(TestDataflowPipelineRunner.class);
+    options.setPathValidatorClass(NoopPathValidator.class);
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidatorTest.java
index e33d4d6f7dffc..19d5adc7cffa3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidatorTest.java
@@ -16,9 +16,14 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Mockito.when;
+
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 
 import org.junit.Before;
 import org.junit.Rule;
@@ -26,19 +31,26 @@
 import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
 
 /** Tests for {@link DataflowPathValidator}. */
 @RunWith(JUnit4.class)
 public class DataflowPathValidatorTest {
   @Rule public ExpectedException expectedException = ExpectedException.none();
 
+  @Mock private GcsUtil mockGcsUtil;
   private DataflowPathValidator validator;
 
   @Before
-  public void setUp() {
+  public void setUp() throws Exception {
+    MockitoAnnotations.initMocks(this);
+    when(mockGcsUtil.bucketExists(any(GcsPath.class))).thenReturn(true);
+    when(mockGcsUtil.isGcsPatternSupported(anyString())).thenCallRealMethod();
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
     options.setRunner(DataflowPipelineRunner.class);
+    options.setGcsUtil(mockGcsUtil);
     validator = new DataflowPathValidator(options);
   }
 
@@ -55,6 +67,15 @@ public void testInvalidFilePattern() {
     validator.validateInputFilePatternSupported("/local/path");
   }
 
+  @Test
+  public void testWhenBucketDoesNotExist() throws Exception {
+    when(mockGcsUtil.bucketExists(any(GcsPath.class))).thenReturn(false);
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage(
+        "Could not find file gs://non-existent-bucket/location");
+    validator.validateInputFilePatternSupported("gs://non-existent-bucket/location");
+  }
+
   @Test
   public void testValidOutputPrefix() {
     validator.validateOutputFilePrefixSupported("gs://bucket/path");
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
index 82ac0e47fcbd0..698520308d555 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
@@ -18,18 +18,32 @@
 
 import static org.hamcrest.Matchers.contains;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
+import com.google.api.client.googleapis.json.GoogleJsonError.ErrorInfo;
+import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.api.client.http.HttpRequest;
+import com.google.api.client.http.HttpResponse;
 import com.google.api.client.http.HttpStatusCodes;
+import com.google.api.client.http.HttpTransport;
+import com.google.api.client.http.LowLevelHttpRequest;
+import com.google.api.client.json.GenericJson;
+import com.google.api.client.json.Json;
+import com.google.api.client.json.JsonFactory;
+import com.google.api.client.json.jackson2.JacksonFactory;
+import com.google.api.client.testing.http.HttpTesting;
 import com.google.api.client.testing.http.MockHttpTransport;
+import com.google.api.client.testing.http.MockLowLevelHttpRequest;
 import com.google.api.client.testing.http.MockLowLevelHttpResponse;
 import com.google.api.client.util.BackOff;
 import com.google.api.client.util.Throwables;
 import com.google.api.services.storage.Storage;
+import com.google.api.services.storage.model.Bucket;
 import com.google.api.services.storage.model.Objects;
 import com.google.api.services.storage.model.StorageObject;
 import com.google.cloud.dataflow.sdk.options.GcsOptions;
@@ -53,6 +67,7 @@
 import java.nio.channels.SeekableByteChannel;
 import java.nio.file.NoSuchFileException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutorService;
@@ -328,6 +343,80 @@ public void testRetryFileSize() throws IOException {
     assertEquals(mockBackOff.nextBackOffMillis(), BackOff.STOP);
   }
 
+  @Test
+  public void testBucketExists() throws IOException {
+    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
+    pipelineOptions.setGcpCredential(new TestCredential());
+    GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
+
+    Storage mockStorage = Mockito.mock(Storage.class);
+    gcsUtil.setStorageClient(mockStorage);
+
+    Storage.Buckets mockStorageObjects = Mockito.mock(Storage.Buckets.class);
+    Storage.Buckets.Get mockStorageGet = Mockito.mock(Storage.Buckets.Get.class);
+
+    BackOff mockBackOff = new AttemptBoundedExponentialBackOff(3, 200);
+
+    when(mockStorage.buckets()).thenReturn(mockStorageObjects);
+    when(mockStorageObjects.get("testbucket")).thenReturn(mockStorageGet);
+    when(mockStorageGet.execute())
+        .thenThrow(new SocketTimeoutException("SocketException"))
+        .thenReturn(new Bucket());
+
+    assertTrue(gcsUtil.bucketExists(GcsPath.fromComponents("testbucket", "testobject"),
+        mockBackOff, new FastNanoClockAndSleeper()));
+  }
+
+  @Test
+  public void testBucketDoesNotExistBecauseOfAccessError() throws IOException {
+    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
+    pipelineOptions.setGcpCredential(new TestCredential());
+    GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
+
+    Storage mockStorage = Mockito.mock(Storage.class);
+    gcsUtil.setStorageClient(mockStorage);
+
+    Storage.Buckets mockStorageObjects = Mockito.mock(Storage.Buckets.class);
+    Storage.Buckets.Get mockStorageGet = Mockito.mock(Storage.Buckets.Get.class);
+
+    BackOff mockBackOff = new AttemptBoundedExponentialBackOff(3, 200);
+    GoogleJsonResponseException expectedException =
+        googleJsonResponseException(HttpStatusCodes.STATUS_CODE_FORBIDDEN,
+            "Waves hand mysteriously", "These aren't the buckets your looking for");
+
+    when(mockStorage.buckets()).thenReturn(mockStorageObjects);
+    when(mockStorageObjects.get("testbucket")).thenReturn(mockStorageGet);
+    when(mockStorageGet.execute())
+        .thenThrow(expectedException);
+
+    assertFalse(gcsUtil.bucketExists(GcsPath.fromComponents("testbucket", "testobject"),
+        mockBackOff, new FastNanoClockAndSleeper()));
+  }
+
+  @Test
+  public void testBucketDoesNotExist() throws IOException {
+    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
+    pipelineOptions.setGcpCredential(new TestCredential());
+    GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
+
+    Storage mockStorage = Mockito.mock(Storage.class);
+    gcsUtil.setStorageClient(mockStorage);
+
+    Storage.Buckets mockStorageObjects = Mockito.mock(Storage.Buckets.class);
+    Storage.Buckets.Get mockStorageGet = Mockito.mock(Storage.Buckets.Get.class);
+
+    BackOff mockBackOff = new AttemptBoundedExponentialBackOff(3, 200);
+
+    when(mockStorage.buckets()).thenReturn(mockStorageObjects);
+    when(mockStorageObjects.get("testbucket")).thenReturn(mockStorageGet);
+    when(mockStorageGet.execute())
+        .thenThrow(googleJsonResponseException(HttpStatusCodes.STATUS_CODE_NOT_FOUND,
+            "It don't exist", "Nothing here to see"));
+
+    assertFalse(gcsUtil.bucketExists(GcsPath.fromComponents("testbucket", "testobject"),
+        mockBackOff, new FastNanoClockAndSleeper()));
+  }
+
   @Test
   public void testGCSChannelCloseIdempotent() throws IOException {
     SeekableByteChannel channel =
@@ -336,4 +425,36 @@ public void testGCSChannelCloseIdempotent() throws IOException {
     channel.close();
     channel.close();
   }
+
+  /**
+   * Builds a fake GoogleJsonResponseException for testing API error handling.
+   */
+  private static GoogleJsonResponseException googleJsonResponseException(
+      final int status, final String reason, final String message) throws IOException {
+    final JsonFactory jsonFactory = new JacksonFactory();
+    HttpTransport transport = new MockHttpTransport() {
+      @Override
+      public LowLevelHttpRequest buildRequest(String method, String url) throws IOException {
+        ErrorInfo errorInfo = new ErrorInfo();
+        errorInfo.setReason(reason);
+        errorInfo.setMessage(message);
+        errorInfo.setFactory(jsonFactory);
+        GenericJson error = new GenericJson();
+        error.set("code", status);
+        error.set("errors", Arrays.asList(errorInfo));
+        error.setFactory(jsonFactory);
+        GenericJson errorResponse = new GenericJson();
+        errorResponse.set("error", error);
+        errorResponse.setFactory(jsonFactory);
+        return new MockLowLevelHttpRequest().setResponse(
+            new MockLowLevelHttpResponse().setContent(errorResponse.toPrettyString())
+            .setContentType(Json.MEDIA_TYPE).setStatusCode(status));
+        }
+    };
+    HttpRequest request =
+        transport.createRequestFactory().buildGetRequest(HttpTesting.SIMPLE_GENERIC_URL);
+    request.setThrowExceptionOnExecuteError(false);
+    HttpResponse response = request.execute();
+    return GoogleJsonResponseException.from(jsonFactory, response);
+  }
 }

From f448712476c36c49deda381449981b9789d25d0a Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 27 Jul 2015 15:27:39 -0700
Subject: [PATCH 0819/1541] DEBUG log level for DirectPipelineRunner random
 seed

The purpose of this log message is to allow a broken run to
be reproduced. Since the DirectPipelineRunner is essentially
for testing purposes and not long jobs, it is preferable to
have a silent log until a failure occurs and then re-run
at log level DEBUG if needed to learn the seed, and from there
use a deterministic seed to work on the failure.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99224524
---
 .../google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index b919a7d5b5bf4..d62e17a0f89f4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -971,7 +971,7 @@ private DirectPipelineRunner(DirectPipelineOptions options) {
       randomSeed = new Random().nextLong();
     }
 
-    LOG.info("DirectPipelineRunner using random seed {}.", randomSeed);
+    LOG.debug("DirectPipelineRunner using random seed {}.", randomSeed);
     rand = new Random(randomSeed);
   }
 

From 706b0420021cb74efdaaa917ca8ddbc7123f2911 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Mon, 27 Jul 2015 15:27:42 -0700
Subject: [PATCH 0820/1541] Remove cluster manager API selection from SDK.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99224531
---
 .../DataflowPipelineWorkerPoolOptions.java    | 26 -------------------
 .../sdk/runners/DataflowPipelineRunner.java   |  2 --
 2 files changed, 28 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index d814ffdaa0153..40e8fd2c3a4ac 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -115,32 +115,6 @@ public String getAlgorithm() {
   String getZone();
   void setZone(String value);
 
-  /**
-   * Type of API for handling cluster management, i.e. resizing, healthchecking, etc.
-   */
-  public enum ClusterManagerApiType {
-    COMPUTE_ENGINE("compute.googleapis.com"),
-    REPLICA_POOL("replicapool.googleapis.com");
-
-    private final String apiServiceName;
-
-    private ClusterManagerApiType(String apiServiceName) {
-      this.apiServiceName = apiServiceName;
-    }
-
-    public String getApiServiceName() {
-      return this.apiServiceName;
-    }
-  }
-
-  /**
-   * Type of API for handling cluster management, i.e. resizing, healthchecking, etc.
-   */
-  @Description("Type of API for handling cluster management, i.e. resizing, healthchecking, etc.")
-  @Default.Enum("COMPUTE_ENGINE")
-  ClusterManagerApiType getClusterManagerApi();
-  void setClusterManagerApi(ClusterManagerApiType value);
-
   /**
    * Machine type to create Dataflow worker VMs as.
    * <p>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 5771d73e41551..869565b936c29 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -286,8 +286,6 @@ public DataflowPipelineJob run(Pipeline pipeline) {
           dataflowOptions.getPathValidator().verifyPath(options.getTempLocation()));
     }
     newJob.getEnvironment().setDataset(options.getTempDatasetId());
-    newJob.getEnvironment().setClusterManagerApiService(
-        options.getClusterManagerApi().getApiServiceName());
     newJob.getEnvironment().setExperiments(options.getExperiments());
 
     // Requirements about the service.

From 07396ad5b37a20c86b784a456fee4951a612861a Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 27 Jul 2015 15:41:27 -0700
Subject: [PATCH 0821/1541] Update special-cases of GroupAlsoByWindow to use
 getOutputTimestamp

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99225882
---
 .../sdk/transforms/windowing/WindowFn.java    |  3 ++
 .../GroupAlsoByWindowsAndCombineDoFn.java     |  2 +-
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  2 +-
 .../GroupAlsoByWindowsViaIteratorsDoFn.java   |  7 +++-
 .../dataflow/sdk/transforms/ParDoTest.java    | 37 ++++++++++++++++++-
 .../transforms/windowing/WindowingTest.java   |  6 +--
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  |  8 ++--
 7 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
index d2bf95fd16549..016da7c9f1289 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
@@ -140,6 +140,9 @@ public abstract void merge(Collection<W> toBeMerged, W mergeResult)
    * the non-late {@code KV<K, V>} that were used as input to the {@code GroupByKey}. The watermark
    * is also prevented from advancing past this minimum timestamp until after the
    * {@code KV<K, Iterable<V>>} has been output.
+   *
+   * <p> This function should be monotonic across input timestamps. Specifically, if {@code A < B},
+   * then {@code getOutputTime(A, window) <= getOutputTime(B, window)}.
    */
   public abstract Instant getOutputTime(Instant inputTimestamp, W window);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
index 148b12da66fdf..e1a00f24ffafb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
@@ -167,7 +167,7 @@ private void closeWindow(
     checkState(accum != null && timestamp != null);
     c.windowingInternals().outputWindowedValue(
         KV.of(key, combineFn.extractOutput(key, accum)),
-        timestamp,
+        windowFn.getOutputTime(timestamp, w),
         Arrays.asList(w),
         PaneInfo.ON_TIME_AND_ONLY_FIRING);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 2c1435bc74e37..fc5b065c456e9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -54,7 +54,7 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
     WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
 
     return GroupAlsoByWindowsViaIteratorsDoFn.isSupported(windowingStrategy)
-        ? new GroupAlsoByWindowsViaIteratorsDoFn<K, V, W>()
+        ? new GroupAlsoByWindowsViaIteratorsDoFn<K, V, W>(windowingStrategy)
         : new GABWViaOutputBufferDoFn<>(noWildcard, SystemReduceFn.<K, V, W>buffering(inputCoder));
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
index 6bfdfd090dc3e..436db56daf196 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
@@ -48,6 +48,7 @@
 @SuppressWarnings("serial")
 class GroupAlsoByWindowsViaIteratorsDoFn<K, V, W extends BoundedWindow>
     extends GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W> {
+  private final WindowingStrategy<?, W> strategy;
 
   public static boolean isSupported(WindowingStrategy<?, ?> strategy) {
     if (!strategy.getWindowFn().isNonMerging()) {
@@ -70,6 +71,10 @@ public static boolean isSupported(WindowingStrategy<?, ?> strategy) {
     return true;
   }
 
+  public GroupAlsoByWindowsViaIteratorsDoFn(WindowingStrategy<?, W> strategy) {
+    this.strategy = strategy;
+  }
+
   @Override
   public void processElement(ProcessContext c) throws Exception {
     K key = c.element().getKey();
@@ -101,7 +106,7 @@ public void processElement(ProcessContext c) throws Exception {
           windows.put(window.maxTimestamp(), window);
           c.windowingInternals().outputWindowedValue(
               KV.of(key, (Iterable<V>) new WindowReiterable<V>(iterator, window)),
-              e.getTimestamp(),
+              strategy.getWindowFn().getOutputTime(e.getTimestamp(), (W) window),
               Arrays.asList(window),
               PaneInfo.ON_TIME_AND_ONLY_FIRING);
         }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index bb274a0b73373..d36c3d8c8effe 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -1072,6 +1072,41 @@ public void testParDoShiftTimestampInvalid() {
     p.run();
   }
 
+  private static class Checker implements SerializableFunction<Iterable<String>, Void> {
+    private static long serialVersionUID = 0L;
+    @Override
+    public Void apply(Iterable<String> input) {
+      boolean foundStart = false;
+      boolean foundElement = false;
+      boolean foundFinish = false;
+      for (String str : input) {
+        if (str.equals("elem:1:1")) {
+          if (foundElement) {
+            throw new AssertionError("Received duplicate element");
+          }
+          foundElement = true;
+        } else if (str.equals("start:2:2")) {
+          foundStart = true;
+        } else if (str.equals("finish:3:3")) {
+          foundFinish = true;
+        } else {
+          throw new AssertionError("Got unexpected value: " + str);
+        }
+      }
+      if (!foundStart) {
+        throw new AssertionError("Missing \"start:2:2\"");
+      }
+      if (!foundElement) {
+        throw new AssertionError("Missing \"elem:1:1\"");
+      }
+      if (!foundFinish) {
+        throw new AssertionError("Missing \"finish:3:3\"");
+      }
+
+      return null;
+    }
+  }
+
   @Test
   @Category(RunnableOnService.class)
   public void testWindowingInStartAndFinishBundle() {
@@ -1101,7 +1136,7 @@ public void finishBundle(Context c) {
                 }))
         .apply(ParDo.of(new PrintingDoFn()));
 
-    DataflowAssert.that(output).containsInAnyOrder("elem:1:1", "start:2:2", "finish:3:3");
+    DataflowAssert.that(output).satisfies(new Checker());
 
     p.run();
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
index 9a28c2c63bb15..9f5d3a5ba82f3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
@@ -131,10 +131,10 @@ public void testNonPartitioningWindowing() {
 
     DataflowAssert.that(output).containsInAnyOrder(
         output("a", 1, 1, -5, 5),
-        output("a", 2, 1, 0, 10),
-        output("a", 1, 7, 5, 15),
+        output("a", 2, 5, 0, 10),
+        output("a", 1, 10, 5, 15),
         output("b", 1, 8, 0, 10),
-        output("b", 1, 8, 5, 15));
+        output("b", 1, 10, 5, 15));
 
     p.run();
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index a30232a337a0c..6fca1b77b2611 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -168,14 +168,14 @@ public class GroupAlsoByWindowsDoFnTest {
     WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
     assertEquals("k", item1.getValue().getKey());
     assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v1", "v2"));
-    assertEquals(new Instant(5), item1.getTimestamp());
+    assertEquals(new Instant(10), item1.getTimestamp());
     assertThat(item1.getWindows(),
         Matchers.contains(window(0, 20)));
 
     WindowedValue<KV<String, Iterable<String>>> item2 = result.get(2);
     assertEquals("k", item2.getValue().getKey());
     assertThat(item2.getValue().getValue(), Matchers.contains("v2"));
-    assertEquals(new Instant(15), item2.getTimestamp());
+    assertEquals(new Instant(20), item2.getTimestamp());
     assertThat(item2.getWindows(),
         Matchers.contains(window(10, 30)));
   }
@@ -224,10 +224,10 @@ public class GroupAlsoByWindowsDoFnTest {
             5, -10, 10),
         WindowMatchers.isSingleWindowedValue(
             KvMatcher.isKv(Matchers.equalTo("k"), Matchers.equalTo(7L)),
-            5, 0, 20),
+            10, 0, 20),
         WindowMatchers.isSingleWindowedValue(
             KvMatcher.isKv(Matchers.equalTo("k"), Matchers.equalTo(6L)),
-            15, 10, 30)));
+            20, 10, 30)));
   }
 
   @Test public void testDiscontiguousWindows() throws Exception {

From 9eaff94dd0a6485aee182b24db372e7a076ca157 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Mon, 27 Jul 2015 15:59:57 -0700
Subject: [PATCH 0822/1541] Fixes two typos in BigQuery validation error
 messages.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99227622
---
 .../java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index dc6b4feea5087..4e7bcbd990a41 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -318,7 +318,7 @@ public static class Bound extends PTransform<PInput, PCollection<TableRow>> {
 
       private static final String RESOURCE_NOT_FOUND_ERROR =
           "BigQuery %1$s not found for table \"%2$s\" . Please create the %1$s before pipeline"
-          + " execution. If the %1$s is  created by an earlier stage of the pipeline, this"
+          + " execution. If the %1$s is created by an earlier stage of the pipeline, this"
           + " validation can be disabled using #withoutValidation.";
 
       private static final String UNABLE_TO_CONFIRM_PRESENCE_OF_RESOURCE_ERROR =
@@ -328,7 +328,7 @@ public static class Bound extends PTransform<PInput, PCollection<TableRow>> {
 
       private static final String QUERY_VALIDATION_FAILURE_ERROR =
           "Validation of query \"%1$s\" failed. If the query depends on an earlier stage of the"
-          + "pipeline, This validation can be disabled using #withoutValidation.";
+          + " pipeline, This validation can be disabled using #withoutValidation.";
 
       Bound() {
         query = null;

From b002e888ebc000c4c89fce5abdf91a0c42b451f9 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 27 Jul 2015 17:03:21 -0700
Subject: [PATCH 0823/1541] Add an example that brings in debugging/testing
 concepts

----Release Notes----
Add DebuggingWordCount, an example that shows different tools such as logging and DataflowAssert that can help during the development and testing process.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99233701
---
 .../dataflow/examples/DebuggingWordCount.java | 168 ++++++++++++++++++
 .../dataflow/examples/MinimalWordCount.java   |   7 +-
 .../dataflow/examples/WindowedWordCount.java  |   8 +-
 .../cloud/dataflow/examples/WordCount.java    |  26 +--
 .../examples/DebuggingWordCountTest.java      |  45 +++++
 5 files changed, 230 insertions(+), 24 deletions(-)
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
 create mode 100644 examples/src/test/java/com/google/cloud/dataflow/examples/DebuggingWordCountTest.java

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
new file mode 100644
index 0000000000000..514e433277ecd
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.cloud.dataflow.examples.WordCount.WordCountOptions;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Pattern;
+
+
+/**
+ * An example that verifies word counts in Shakespeare and includes Dataflow best practices.
+ *
+ * <p> This class, {@link DebuggingWordCount}, is the third in a series of four successively more
+ * detailed 'word count' examples. You may first want to take a look at {@link MinimalWordCount}
+ * and {@link WordCount}. After you've looked at this example, then see the
+ * {@link WindowedWordCount} pipeline, for introduction of additional concepts.
+ *
+ * <p> Basic concepts, also in the MinimalWordCount and WordCount examples:
+ * Reading text files; counting a PCollection; executing a Pipeline both locally
+ * and using the Dataflow service; defining DoFns.
+ *
+ * <p> New Concepts:
+ * <pre>
+ *   1. Logging to Cloud Logging
+ *   2. Controlling Dataflow worker log levels
+ *   3. Creating a custom aggregator
+ *   4. Testing your Pipeline via DataflowAssert
+ * </pre>
+ *
+ * <p> To execute this pipeline locally, specify general pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
+ *
+ * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
+ *
+ * <p> Concept #2: Dataflow workers which execute user code are configured to log to Cloud
+ * Logging by default at "INFO" log level and higher. One may override log levels for specific
+ * logging namespaces by specifying
+ * {@code --workerLogLevelOverrides=Name1#Level1,Name2#Level2,...}. For example, by specifying
+ * {@code --workerLogLevelOverrides=com.google.cloud.dataflow.examples.DebuggingWordCount#DEBUG}
+ * when executing this pipeline using the Dataflow service, Cloud Logging would contain only
+ * "DEBUG" or higher level logs for the DebuggingWordCount class in addition to the default
+ * "INFO" or higher level logs. In addition, the default Dataflow worker logging
+ * configuration can be overridden by specifying
+ * {@code --defaultWorkerLogLevel=<one of TRACE, DEBUG, INFO, WARN, ERROR>}. For example,
+ * by specifying {@code --defaultWorkerLogLevel=DEBUG} when executing this pipeline with
+ * the Dataflow service, Cloud Logging would contain all "DEBUG" or higher level logs. Note
+ * that changing the default worker log level to TRACE or DEBUG will significantly increase
+ * the amount of logs output.
+ *
+ * <p> The input file defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt} and can be
+ * overridden with {@code --inputFile}.
+ */
+public class DebuggingWordCount {
+  /** A DoFn that filters for a specific key based upon a regular expression. */
+  public static class FilterTextFn extends DoFn<KV<String, Long>, KV<String, Long>> {
+    private static final long serialVersionUID = 0;
+    /**
+     * Concept #1: The logger below uses the fully qualified class name of FilterTextFn
+     * as the logger. All log statements emitted by this logger will be referenced by this name
+     * and will be visible in the Cloud Logging UI. Learn more at https://cloud.google.com/logging
+     * about the Cloud Logging UI.
+     */
+    private static final Logger LOG = LoggerFactory.getLogger(FilterTextFn.class);
+
+    private final Pattern filter;
+    public FilterTextFn(String pattern) {
+      filter = Pattern.compile(pattern);
+    }
+
+    /**
+     * Concept #3: A custom aggregator can track values in your pipeline as it runs. Those
+     * values will be displayed in the Dataflow Monitoring UI when this pipeline is run using the
+     * Dataflow service. These aggregators below track the number of matched and unmatched words.
+     * Learn more at https://cloud.google.com/dataflow/pipelines/dataflow-monitoring-intf about
+     * the Dataflow Monitoring UI.
+     */
+    private final Aggregator<Long, Long> matchedWords =
+        createAggregator("matchedWords", new Sum.SumLongFn());
+    private final Aggregator<Long, Long> unmatchedWords =
+        createAggregator("umatchedWords", new Sum.SumLongFn());
+
+    @Override
+    public void processElement(ProcessContext c) {
+      if (filter.matcher(c.element().getKey()).matches()) {
+        // Log at the "DEBUG" level each element that we match. When executing this pipeline
+        // using the Dataflow service, these log lines will appear in the Cloud Logging UI
+        // only if the log level is set to "DEBUG" or lower.
+        LOG.debug("Matched: " + c.element().getKey());
+        matchedWords.addValue(1L);
+        c.output(c.element());
+      } else {
+        // Log at the "TRACE" level each element that is not matched. Different log levels
+        // can be used to control the verbosity of logging providing an effective mechanism
+        // to filter less important information.
+        LOG.trace("Did not match: " + c.element().getKey());
+        unmatchedWords.addValue(1L);
+      }
+    }
+  }
+
+  public static void main(String[] args) {
+    WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
+      .as(WordCountOptions.class);
+    Pipeline p = Pipeline.create(options);
+
+    PCollection<KV<String, Long>> filteredWords =
+        p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
+         .apply(new WordCount.CountWords())
+         .apply(ParDo.of(new FilterTextFn("Flourish|stomach")));
+
+    /**
+     * Concept #4: DataflowAssert is a set of convenient PTransforms in the style of
+     * Hamcrest's collection matchers that can be used when writing Pipeline level tests
+     * to validate the contents of PCollections. DataflowAssert is best used in unit tests
+     * with small data sets but is demonstrated here as a teaching tool.
+     *
+     * <p> Below we verify that the set of filtered words matches our expected counts. Note
+     * that DataflowAssert does not provide any output and that successful completion of the
+     * Pipeline implies that the expectations were met. Learn more at
+     * https://cloud.google.com/dataflow/pipelines/testing-your-pipeline on how to test
+     * your Pipeline and see {@link DebuggingWordCountTest} for an example unit test.
+     */
+    List<KV<String, Long>> expectedResults = Arrays.asList(
+        KV.of("Flourish", 3L),
+        KV.of("stomach", 1L));
+    DataflowAssert.that(filteredWords).containsInAnyOrder(expectedResults);
+
+    p.run();
+  }
+}
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
index a51964dcec448..bc434e7c6b8ff 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
@@ -30,13 +30,14 @@
 /**
  * An example that counts words in Shakespeare.
  *
- * <p> This class, {@link MinimalWordCount}, is the first in a series of three successively more
+ * <p> This class, {@link MinimalWordCount}, is the first in a series of four successively more
  * detailed 'word count' examples. Here, for simplicity, we don't show any error-checking or
  * argument processing, and focus on construction of the pipeline, which chains together the
  * application of core transforms.
  *
- * <p> Next, see the {@link WordCount} pipeline, and then the {@link WindowedWordCount} pipeline,
- * for more detailed examples that introduce additional concepts.
+ * <p> Next, see the {@link WordCount} pipeline, then the {@link DebuggingWordCount}, and finally
+ * the {@link WindowedWordCount} pipeline, for more detailed examples that introduce additional
+ * concepts.
  *
  * <p> Concepts:
  * <pre>
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
index 2bec4255eef61..45ba4c704be1a 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
@@ -50,11 +50,11 @@
  * An example that counts words in text, and can run over either unbounded or bounded input
  * collections.
  *
- * <p> This class, {@link WindowedWordCount}, is the third in a series of three successively more
- * detailed 'word count' examples. First take a look at {@link MinimalWordCount} and {@link
- * WordCount}. This class extends the {@link WordCount} class.
+ * <p> This class, {@link WindowedWordCount}, is the last in a series of four successively more
+ * detailed 'word count' examples. First take a look at {@link MinimalWordCount},
+ * {@link WordCount}, and {@link DebuggingWordCount}.
  *
- * <p> Basic concepts, also in the MinimalWordCount and WordCount examples:
+ * <p> Basic concepts, also in the MinimalWordCount, WordCount, and DebuggingWordCount examples:
  * Reading text files; counting a PCollection; writing to GCS; executing a Pipeline both locally
  * and using the Dataflow service; defining DoFns; creating a custom aggregator;
  * user-defined PTransforms; defining PipelineOptions.
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
index 02d91b4f36abc..9255cdf800382 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
@@ -38,10 +38,10 @@
 /**
  * An example that counts words in Shakespeare and includes Dataflow best practices.
  *
- * <p> This class, {@link WordCount}, is the second in a series of three successively more detailed
- * 'word count' examples. You may first want to take a look at {@link MinimalWordCount}. After
- * you've looked at this example, then see the {@link WindowedWordCount} pipeline, for introduction
- * of additional concepts.
+ * <p> This class, {@link WordCount}, is the second in a series of four successively more detailed
+ * 'word count' examples. You may first want to take a look at {@link MinimalWordCount}.
+ * After you've looked at this example, then see the {@link DebuggingWordCount}
+ * pipeline, for introduction of additional concepts.
  *
  * <p> For a detailed walkthrough of this example, see
  *   <a href="https://cloud.google.com/dataflow/java-sdk/wordcount-example">
@@ -55,9 +55,8 @@
  * <pre>
  *   1. Executing a Pipeline both locally and using the Dataflow service
  *   2. Using ParDo with static DoFns defined out-of-line
- *   3. Creating a custom aggregator
- *   4. Building a composite transform
- *   5. Defining your own pipeline options
+ *   3. Building a composite transform
+ *   4. Defining your own pipeline options
  * </pre>
  *
  * <p> Concept #1: you can execute this pipeline either locally or using the Dataflow service.
@@ -98,18 +97,11 @@ public class WordCount {
   static class ExtractWordsFn extends DoFn<String, String> {
     private static final long serialVersionUID = 0;
 
-    /**
-     * Concept #3: A custom aggregator can track values in your pipeline as it runs, and that value
-     * can be displayed in the Dataflow Monitoring UI. This aggregator tracks the number of empty
-     * lines that ExtractWordsFn encounters.
-     */
     private final Aggregator<Long, Long> emptyLines =
         createAggregator("emptyLines", new Sum.SumLongFn());
 
     @Override
     public void processElement(ProcessContext c) {
-      // Keep track of the number of empty lines. (When using the [Blocking]DataflowPipelineRunner,
-      // Aggregators are shown in the monitoring UI.)
       if (c.element().trim().isEmpty()) {
         emptyLines.addValue(1L);
       }
@@ -140,7 +132,7 @@ public void processElement(ProcessContext c) {
    * A PTransform that converts a PCollection containing lines of text into a PCollection of
    * formatted word counts.
    *
-   * <p> Concept #4: This is a custom composite transform that bundles two transforms (ParDo and
+   * <p> Concept #3: This is a custom composite transform that bundles two transforms (ParDo and
    * Count) as a reusable PTransform subclass. Using composite transforms allows for easy reuse,
    * modular testing, and an improved monitoring experience.
    */
@@ -166,7 +158,7 @@ public PCollection<KV<String, Long>> apply(PCollection<String> lines) {
   /**
    * Options supported by {@link WordCount}.
    *
-   * <p> Concept #5: Defining your own configuration options. Here, you can add your own arguments
+   * <p> Concept #4: Defining your own configuration options. Here, you can add your own arguments
    * to be processed by the command-line parser, and specify default values for them. You can then
    * access the options values in your pipeline code.
    *
@@ -206,7 +198,7 @@ public static void main(String[] args) {
       .as(WordCountOptions.class);
     Pipeline p = Pipeline.create(options);
 
-    // Concepts #2 and #4: Our pipeline applies the composite CountWords transform, and passes the
+    // Concepts #2 and #3: Our pipeline applies the composite CountWords transform, and passes the
     // static FormatAsTextFn() to the ParDo transform.
     p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
      .apply(new CountWords())
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/DebuggingWordCountTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/DebuggingWordCountTest.java
new file mode 100644
index 0000000000000..77d7bc878a9cd
--- /dev/null
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/DebuggingWordCountTest.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.common.io.Files;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Tests for {@link DebuggingWordCount}.
+ */
+@RunWith(JUnit4.class)
+public class DebuggingWordCountTest {
+  @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  @Test
+  public void testDebuggingWordCount() throws Exception {
+    File file = tmpFolder.newFile();
+    Files.write("stomach secret Flourish message Flourish here Flourish", file,
+        StandardCharsets.UTF_8);
+    DebuggingWordCount.main(new String[]{"--inputFile=" + file.getAbsolutePath()});
+  }
+}
+

From 7aa430ca925d37b4f04bfbbdfd4d7c188c2d3b8d Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Mon, 27 Jul 2015 17:15:15 -0700
Subject: [PATCH 0824/1541] Adds information about service accounts to source
 javadocs.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99234876
---
 .../java/com/google/cloud/dataflow/sdk/io/AvroIO.java    | 6 ++++++
 .../com/google/cloud/dataflow/sdk/io/AvroSource.java     | 6 ++++++
 .../com/google/cloud/dataflow/sdk/io/BigQueryIO.java     | 9 +++++++++
 .../com/google/cloud/dataflow/sdk/io/DatastoreIO.java    | 9 +++++++++
 .../java/com/google/cloud/dataflow/sdk/io/PubsubIO.java  | 6 ++++++
 .../java/com/google/cloud/dataflow/sdk/io/TextIO.java    | 6 ++++++
 .../java/com/google/cloud/dataflow/sdk/io/XmlSource.java | 6 ++++++
 .../sdk/runners/BlockingDataflowPipelineRunner.java      | 9 +++++++++
 .../dataflow/sdk/runners/DataflowPipelineRunner.java     | 9 +++++++++
 .../cloud/dataflow/sdk/runners/DirectPipelineRunner.java | 9 +++++++++
 10 files changed, 75 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index 1903226b2b1a7..336417fb2d5cf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -114,6 +114,12 @@
  *                           .withSchema(schema)
  *                           .withSuffix(".avro"));
  * } </pre>
+ *
+ * <p><h3>Permissions</h3>
+ * Permission requirements depend on the
+ * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner PipelineRunner} that is
+ * used to execute the Dataflow job. Please refer to the documentation of corresponding
+ * {@code PipelineRunner}s for more details.
  */
 public class AvroIO {
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
index fd739fe5205a5..b1deceb6773af 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
@@ -103,6 +103,12 @@
  * </pre>
  *
  * @param <T> The type of records to be read from the source.
+ *
+ * <p><h3>Permissions</h3>
+ * Permission requirements depend on the
+ * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner PipelineRunner} that is
+ * used to execute the Dataflow job. Please refer to the documentation of corresponding
+ * {@code PipelineRunner}s for more details.
  */
 // JAVADOCSTYLE ON
 @Experimental(Experimental.Kind.SOURCE_SINK)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 4e7bcbd990a41..12c39adaf4bbb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -171,6 +171,15 @@
  * <p> Per-window tables are not yet supported in batch mode.
  *
  * @see <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html">TableRow</a>
+ *
+ * <p><h3>Permissions</h3>
+ * Permission requirements depend on the
+ * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner PipelineRunner} that is
+ * used to execute the Dataflow job. Please refer to the documentation of corresponding
+ * {@code PipelineRunner}s for more details.
+ *
+ * <p>Please see <a href="https://cloud.google.com/bigquery/access-control">BigQuery Access Control
+ * </a> for security and permission related information specific to BigQuery.
  */
 public class BigQueryIO {
   private static final Logger LOG = LoggerFactory.getLogger(BigQueryIO.class);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 9c4f0df296e43..204f82bee4c97 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -144,6 +144,15 @@
  * or insert) mutations. Please read
  * <a href="https://cloud.google.com/datastore/docs/concepts/entities">Entities, Properties, and
  * Keys</a> for more information about entity keys.
+ *
+ * <p><h3>Permissions</h3>
+ * Permission requirements depend on the
+ * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner PipelineRunner} that is
+ * used to execute the Dataflow job. Please refer to the documentation of corresponding
+ * {@code PipelineRunner}s for more details.
+ *
+ * <p>Please see <a href="https://cloud.google.com/datastore/docs/activate">Cloud Datastore Sign Up
+ * </a>for security and permission related information specific to Datastore.
  */
 @Experimental(Experimental.Kind.SOURCE_SINK)
 public class DatastoreIO {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 58233d3e57c13..fe265b1ad159f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -62,6 +62,12 @@
 /**
  * Read and Write {@link PTransform}s for Pub/Sub streams. These transforms create
  * and consume unbounded {@link com.google.cloud.dataflow.sdk.values.PCollection}s.
+ *
+ * <p><h3>Permissions</h3>
+ * Permission requirements depend on the
+ * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner PipelineRunner} that is
+ * used to execute the Dataflow job. Please refer to the documentation of corresponding
+ * {@code PipelineRunner}s for more details.
  */
 public class PubsubIO {
   private static final Logger LOG = LoggerFactory.getLogger(PubsubIO.class);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index f77e0f30e28c9..0aa69ed9776af 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -101,6 +101,12 @@
  *                           .withSuffix(".txt")
  *                           .withCoder(TextualIntegerCoder.of()));
  * } </pre>
+ *
+ * <p><h3>Permissions</h3>
+ * Permission requirements depend on the
+ * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner PipelineRunner} that is
+ * used to execute the Dataflow job. Please refer to the documentation of corresponding
+ * {@code PipelineRunner}s for more details.
  */
 public class TextIO {
   public static final Coder<String> DEFAULT_TEXT_CODER = StringUtf8Coder.of();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
index dcc8fe5691373..ac001b74394b4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
@@ -99,6 +99,12 @@
  *
  * @param <T> Type of the objects that represent the records of the XML file. The
  *        {@code PCollection} generated by this source will be of this type.
+ *
+ * <p><h3>Permissions</h3>
+ * Permission requirements depend on the
+ * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner PipelineRunner} that is
+ * used to execute the Dataflow job. Please refer to the documentation of corresponding
+ * {@code PipelineRunner}s for more details.
  */
 // JAVADOCSTYLE ON
 public class XmlSource<T> extends FileBasedSource<T> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
index 196dce01e37fe..8a751b7b7e0ad 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
@@ -43,6 +43,15 @@
  *
  * <p> Returns the final job state, or throws an exception if the job
  * fails or cannot be monitored.
+ *
+ * <p><h3>Permissions</h3>
+ * When reading from a Dataflow source or writing to a Dataflow sink using
+ * {@code BlockingDataflowPipelineRunner}, the Google cloudservices account and the Google compute
+ * engine service account of the GCP project running the Dataflow Job will need access to the
+ * corresponding source/sink.
+ *
+ * <p> Please see <a href="https://cloud.google.com/dataflow/security-and-permissions">Google Cloud
+ * Dataflow Security and Permissions</a> for more details.
  */
 public class BlockingDataflowPipelineRunner extends
     PipelineRunner<DataflowPipelineJob> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 869565b936c29..f5d3f38206466 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -105,6 +105,15 @@
  * pipeline by first translating them to the Dataflow representation
  * using the {@link DataflowPipelineTranslator} and then submitting
  * them to a Dataflow service for execution.
+ *
+ * <p><h3>Permissions</h3>
+ * When reading from a Dataflow source or writing to a Dataflow sink using
+ * {@code DataflowPipelineRunner}, the Google cloudservices account and the Google compute engine
+ * service account of the GCP project running the Dataflow Job will need access to the corresponding
+ * source/sink.
+ *
+ * <p> Please see <a href="https://cloud.google.com/dataflow/security-and-permissions">Google Cloud
+ * Dataflow Security and Permissions</a> for more details.
  */
 public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob> {
   private static final Logger LOG = LoggerFactory.getLogger(DataflowPipelineRunner.class);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index d62e17a0f89f4..7b6606ca4a2e4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -73,6 +73,15 @@
  * any optimization.  Useful for small local execution and tests.
  *
  * <p> Throws an exception from {@link #run} if execution fails.
+ *
+ * <p><h3>Permissions</h3>
+ * When reading from a Dataflow source or writing to a Dataflow sink using
+ * {@code DirectPipelineRunner}, the Cloud Platform account that you configured with the
+ * <a href="https://cloud.google.com/sdk/gcloud">gcloud</a> executable will need access to the
+ * corresponding source/sink.
+ *
+ * <p> Please see <a href="https://cloud.google.com/dataflow/security-and-permissions">Google Cloud
+ * Dataflow Security and Permissions</a> for more details.
  */
 @SuppressWarnings({"rawtypes", "unchecked"})
 public class DirectPipelineRunner

From bf2f09339e33297db3bb8e1711d562661556d28c Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 27 Jul 2015 17:35:00 -0700
Subject: [PATCH 0825/1541] Use new watermark hold API.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99236482
---
 .../util/state/WindmillStateInternals.java    |  12 +--
 .../sdk/util/state/WindmillStateReader.java   | 102 ++++++++++--------
 .../worker/StreamingDataflowWorkerTest.java   |  27 ++---
 .../state/WindmillStateInternalsTest.java     |  32 +++---
 .../util/state/WindmillStateReaderTest.java   |  67 +++++-------
 5 files changed, 112 insertions(+), 128 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
index 360ca5b8225e8..6cb64613bcf6f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
@@ -407,21 +407,17 @@ public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) {
       // If we do a delete, we need to have done a read
       if (cleared) {
         reader.watermarkFuture(stateKey, stateFamily);
-        commitBuilder.addListUpdatesBuilder()
+        commitBuilder.addWatermarkHoldsBuilder()
             .setTag(stateKey)
             .setStateFamily(stateFamily)
-            .setEndTimestamp(Long.MAX_VALUE);
+            .setReset(true);
       }
 
       if (localAdditions != null) {
-        ByteString zeroString = ByteString.copyFrom(new byte[] {0x0});
-
-        commitBuilder.addListUpdatesBuilder()
+        commitBuilder.addWatermarkHoldsBuilder()
             .setTag(stateKey)
             .setStateFamily(stateFamily)
-            .addValuesBuilder()
-            .setData(zeroString)
-            .setTimestamp(TimeUnit.MILLISECONDS.toMicros(localAdditions.getMillis()));
+            .addTimestamps(TimeUnit.MILLISECONDS.toMicros(localAdditions.getMillis()));
       }
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
index c289c2c416f45..d1374c9b71e60 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
@@ -226,15 +226,23 @@ private Windmill.GetDataRequest createRequest(Iterable<StateTag> toFetch) {
     for (StateTag tag : toFetch) {
       switch (tag.kind) {
         case LIST:
-        case WATERMARK:
-          keyedDataBuilder.addListsToFetchBuilder()
+          keyedDataBuilder
+              .addListsToFetchBuilder()
               .setTag(tag.tag)
               .setStateFamily(tag.stateFamily)
               .setEndTimestamp(Long.MAX_VALUE);
           break;
 
+        case WATERMARK:
+          keyedDataBuilder
+              .addWatermarkHoldsToFetchBuilder()
+              .setTag(tag.tag)
+              .setStateFamily(tag.stateFamily);
+          break;
+
         case VALUE:
-          keyedDataBuilder.addValuesToFetchBuilder()
+          keyedDataBuilder
+              .addValuesToFetchBuilder()
               .setTag(tag.tag)
               .setStateFamily(tag.stateFamily);
           break;
@@ -248,30 +256,34 @@ private Windmill.GetDataRequest createRequest(Iterable<StateTag> toFetch) {
   }
 
   private void consumeResponse(Windmill.GetDataRequest request,
-      Windmill.GetDataResponse response, Set<StateTag> toFetch) {
+      Windmill.GetDataResponse getDataResponse, Set<StateTag> toFetch) {
     // Validate the response is for our computation/key.
-    if (response.getDataCount() == 0) {
+    if (getDataResponse.getDataCount() == 0) {
       throw new RuntimeException(
           "No computation in response to request: " + request);
-    } else if (response.getDataCount() > 1) {
-      throw new RuntimeException(
-          "Expected exactly one computation in response, but got: " + response.getDataList());
+    } else if (getDataResponse.getDataCount() > 1) {
+      throw new RuntimeException("Expected exactly one computation in response, but got: "
+          + getDataResponse.getDataList());
     }
 
-    if (!computation.equals(response.getData(0).getComputationId())) {
+    Windmill.ComputationGetDataResponse computationResponse = getDataResponse.getData(0);
+
+    if (!computation.equals(computationResponse.getComputationId())) {
       throw new RuntimeException("Expected data for computation " + computation
-          + " but was " + response.getData(0).getComputationId());
+          + " but was " + computationResponse.getComputationId());
     }
 
-    if (response.getData(0).getDataCount() == 0) {
+    if (computationResponse.getDataCount() == 0) {
       throw new RuntimeException(
           "No key in response to request: " + request);
-    } else if (response.getData(0).getDataCount() > 1) {
+    } else if (computationResponse.getDataCount() > 1) {
       throw new RuntimeException(
-          "Expected exactly one key in response, but was: " + response.getData(0).getDataList());
+          "Expected exactly one key in response, but was: " + computationResponse.getDataList());
     }
 
-    if (response.getData(0).getData(0).getFailed()) {
+    Windmill.KeyedGetDataResponse response = computationResponse.getData(0);
+
+    if (response.getFailed()) {
       // Set up all the futures for this key to throw an exception:
       StreamingDataflowWorker.KeyTokenInvalidException keyTokenInvalidException =
           new StreamingDataflowWorker.KeyTokenInvalidException(key.toStringUtf8());
@@ -281,41 +293,40 @@ private void consumeResponse(Windmill.GetDataRequest request,
       return;
     }
 
-    if (!key.equals(response.getData(0).getData(0).getKey())) {
+    if (!key.equals(response.getKey())) {
       throw new RuntimeException("Expected data for key " + key
-          + " but was " + response.getData(0).getData(0).getKey());
+          + " but was " + response.getKey());
     }
 
-    for (Windmill.TagList list : response.getData(0).getData(0).getListsList()) {
-      String stateFamily = list.getStateFamily();
-      StateTag stateTagList = new StateTag(
-          StateTag.Kind.LIST, list.getTag(), stateFamily);
-      if (toFetch.remove(stateTagList)) {
-        consumeTagList(list, stateTagList);
-        continue;
-      }
 
-      StateTag stateTagWatermark = new StateTag(
-          StateTag.Kind.WATERMARK, list.getTag(), stateFamily);
-      if (toFetch.remove(stateTagWatermark)) {
-        consumeWatermark(list, stateTagWatermark);
-        continue;
+    for (Windmill.TagList list : response.getListsList()) {
+      StateTag stateTag = new StateTag(
+          StateTag.Kind.LIST, list.getTag(), list.getStateFamily());
+      if (!toFetch.remove(stateTag)) {
+        throw new IllegalStateException(
+            "Received response for unrequested tag " + stateTag + ". Pending tags: " + toFetch);
       }
+      consumeTagList(list, stateTag);
+    }
 
-      throw new IllegalStateException(
-          "Received response for unrequested tag " + list.getTag().toStringUtf8());
+    for (Windmill.WatermarkHold hold : response.getWatermarkHoldsList()) {
+      StateTag stateTag = new StateTag(
+          StateTag.Kind.WATERMARK, hold.getTag(), hold.getStateFamily());
+      if (!toFetch.remove(stateTag)) {
+        throw new IllegalStateException(
+            "Received response for unrequested tag " + stateTag + ". Pending tags: " + toFetch);
+      }
+      consumeWatermark(hold, stateTag);
     }
 
-    for (Windmill.TagValue value : response.getData(0).getData(0).getValuesList()) {
-      String stateFamily = value.getStateFamily();
+    for (Windmill.TagValue value : response.getValuesList()) {
       StateTag stateTag = new StateTag(
-          StateTag.Kind.VALUE, value.getTag(), stateFamily);
-      if (toFetch.remove(stateTag)) {
-        consumeTagValue(value, stateTag);
-        continue;
+          StateTag.Kind.VALUE, value.getTag(), value.getStateFamily());
+      if (!toFetch.remove(stateTag)) {
+        throw new IllegalStateException(
+            "Received response for unrequested tag " + stateTag + ". Pending tags: " + toFetch);
       }
-      throw new IllegalStateException(
-          "Received response for unrequested tag " + value.getTag().toStringUtf8());
+      consumeTagValue(value, stateTag);
     }
 
     if (!toFetch.isEmpty()) {
@@ -362,7 +373,7 @@ private <T> void consumeTagList(TagList list, StateTag stateTag) {
     future.set(Collections.unmodifiableList(valueList));
   }
 
-  private void consumeWatermark(TagList list, StateTag stateTag) {
+  private void consumeWatermark(Windmill.WatermarkHold watermarkHold, StateTag stateTag) {
     @SuppressWarnings("unchecked")
     SettableFuture<Instant> future = (SettableFuture<Instant>) futures.get(stateTag);
     if (future == null) {
@@ -372,13 +383,10 @@ private void consumeWatermark(TagList list, StateTag stateTag) {
     }
 
     Instant hold = null;
-    for (Windmill.Value value : list.getValuesList()) {
-      if (value.hasData() && !value.getData().isEmpty()) {
-        Instant valueTimestamp =
-            new Instant(TimeUnit.MICROSECONDS.toMillis(value.getTimestamp()));
-        if (hold == null || valueTimestamp.isBefore(hold)) {
-          hold = valueTimestamp;
-        }
+    for (long timestamp : watermarkHold.getTimestampsList()) {
+      Instant instant = new Instant(TimeUnit.MICROSECONDS.toMillis(timestamp));
+      if (hold == null || instant.isBefore(hold)) {
+        hold = instant;
       }
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 7c1e8b589174b..484321d8488bb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -711,16 +711,16 @@ public void testMergeWindows() throws Exception {
                 .setTimestamp(Long.MAX_VALUE)
                 .setData(bufferData)
                 .build())
-            .build()),
-        Matchers.equalTo(Windmill.TagList.newBuilder()
+            .build())));
+
+    assertThat(actualOutput.getWatermarkHoldsList(), Matchers.containsInAnyOrder(
+        Matchers.equalTo(Windmill.WatermarkHold.newBuilder()
             .setTag(watermarkHoldTag)
             .setStateFamily(stateFamily)
-            .addValues(Windmill.Value.newBuilder()
-                .setTimestamp(0)
-                .setData(ByteString.copyFrom(new byte[]{0b0}))
-                .build())
+            .addTimestamps(0)
             .build())));
 
+
     Windmill.GetWorkResponse.Builder getWorkResponse = Windmill.GetWorkResponse.newBuilder();
     getWorkResponse.addWorkBuilder()
         .setComputationId(DEFAULT_COMPUTATION_ID)
@@ -745,12 +745,10 @@ public void testMergeWindows() throws Exception {
         .addValuesBuilder()
         .setTimestamp(0) // is ignored
         .setData(bufferData);
-    dataBuilder.addListsBuilder()
+    dataBuilder.addWatermarkHoldsBuilder()
         .setTag(watermarkHoldTag)
         .setStateFamily(stateFamily)
-        .addValuesBuilder()
-        .setTimestamp(0)
-        .setData(ByteString.copyFrom(new byte[]{0b0}));
+        .addTimestamps(0);
     dataBuilder.addValuesBuilder()
         .setTag(paneInfoTag)
         .setStateFamily(stateFamily)
@@ -761,6 +759,7 @@ public void testMergeWindows() throws Exception {
 
     // Read from the finished set to prevent blind write
     dataBuilder.clearLists();
+    dataBuilder.clearWatermarkHolds();
     dataBuilder.clearValues();
     dataBuilder.addValuesBuilder()
         .setTag(finishedTag)
@@ -811,11 +810,13 @@ public void testMergeWindows() throws Exception {
             .setTag(bufferTag)
             .setStateFamily(stateFamily)
             .setEndTimestamp(Long.MAX_VALUE)
-            .build()),
-        Matchers.equalTo(Windmill.TagList.newBuilder()
+            .build())));
+
+    assertThat(actualOutput.getWatermarkHoldsList(), Matchers.containsInAnyOrder(
+        Matchers.equalTo(Windmill.WatermarkHold.newBuilder()
             .setTag(watermarkHoldTag)
             .setStateFamily(stateFamily)
-            .setEndTimestamp(Long.MAX_VALUE)
+            .setReset(true)
             .build())));
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
index 9e5a5d79c514e..89e63e63b4a18 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
@@ -482,14 +482,11 @@ public void testWatermarkPersist() throws Exception {
         Windmill.WorkItemCommitRequest.newBuilder();
     underTest.persist(commitBuilder);
 
-    assertEquals(1, commitBuilder.getListUpdatesCount());
+    assertEquals(1, commitBuilder.getWatermarkHoldsCount());
 
-    TagList listUpdates = commitBuilder.getListUpdates(0);
-    assertEquals(key(NAMESPACE, "watermark"), listUpdates.getTag());
-    assertEquals(1, listUpdates.getValuesCount());
-    // Just the zero-byte.
-    assertEquals(1, listUpdates.getValues(0).getData().size());
-    assertEquals(TimeUnit.MILLISECONDS.toMicros(1000), listUpdates.getValues(0).getTimestamp());
+    Windmill.WatermarkHold watermarkHold = commitBuilder.getWatermarkHolds(0);
+    assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
+    assertEquals(TimeUnit.MILLISECONDS.toMicros(1000), watermarkHold.getTimestamps(0));
 
     // Blind adds should not need to read the future.
     Mockito.verify(mockReader).startBatchAndBlock();
@@ -510,19 +507,16 @@ public void testWatermarkClearPersist() throws Exception {
         Windmill.WorkItemCommitRequest.newBuilder();
     underTest.persist(commitBuilder);
 
-    assertEquals(2, commitBuilder.getListUpdatesCount());
+    assertEquals(2, commitBuilder.getWatermarkHoldsCount());
 
-    TagList listClear = commitBuilder.getListUpdates(0);
-    assertEquals(key(NAMESPACE, "watermark"), listClear.getTag());
-    assertEquals(Long.MAX_VALUE, listClear.getEndTimestamp());
-    assertEquals(0, listClear.getValuesCount());
+    Windmill.WatermarkHold clear = commitBuilder.getWatermarkHolds(0);
+    assertEquals(key(NAMESPACE, "watermark"), clear.getTag());
+    assertEquals(0, clear.getTimestampsCount());
 
-    TagList listUpdates = commitBuilder.getListUpdates(1);
-    assertEquals(key(NAMESPACE, "watermark"), listUpdates.getTag());
-    assertEquals(1, listUpdates.getValuesCount());
-    // Just the zero-byte.
-    assertEquals(1, listUpdates.getValues(0).getData().size());
-    assertEquals(TimeUnit.MILLISECONDS.toMicros(1000), listUpdates.getValues(0).getTimestamp());
+    Windmill.WatermarkHold update = commitBuilder.getWatermarkHolds(1);
+    assertEquals(key(NAMESPACE, "watermark"), update.getTag());
+    assertEquals(1, update.getTimestampsCount());
+    assertEquals(TimeUnit.MILLISECONDS.toMicros(1000), update.getTimestamps(0));
 
     // Clearing requires reading the future.
     Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
@@ -543,7 +537,7 @@ public void testWatermarkPersistEmpty() throws Exception {
     underTest.persist(commitBuilder);
 
     // 1 list update corresponds to deletion. There shouldn't be a list update adding items.
-    assertEquals(1, commitBuilder.getListUpdatesCount());
+    assertEquals(1, commitBuilder.getWatermarkHoldsCount());
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java
index 93bfbfe69d6de..d08eda853effd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java
@@ -42,8 +42,6 @@
 import org.mockito.MockitoAnnotations;
 
 import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collection;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 
@@ -175,30 +173,23 @@ public void testReadWatermark() throws Exception {
     Mockito.verifyNoMoreInteractions(mockWindmill);
 
     Windmill.GetDataRequest.Builder expectedRequest = Windmill.GetDataRequest.newBuilder();
-    expectedRequest
-        .addRequestsBuilder()
+    expectedRequest.addRequestsBuilder()
         .setComputationId(COMPUTATION)
         .addRequestsBuilder()
         .setKey(DATA_KEY)
         .setWorkToken(WORK_TOKEN)
-        .addListsToFetch(
-            Windmill.TagList.newBuilder()
-                .setTag(STATE_KEY_1)
-                .setStateFamily(STATE_FAMILY)
-                .setEndTimestamp(Long.MAX_VALUE));
+        .addWatermarkHoldsToFetch(
+            Windmill.WatermarkHold.newBuilder().setTag(STATE_KEY_1).setStateFamily(STATE_FAMILY));
 
     Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
     response
-        .addDataBuilder()
-        .setComputationId(COMPUTATION)
-        .addDataBuilder()
-        .setKey(DATA_KEY)
-        .addLists(
-            Windmill.TagList.newBuilder()
-                .setTag(STATE_KEY_1)
-                .setStateFamily(STATE_FAMILY)
-                .addValues(watermarkValue(new Instant(5000)))
-                .addValues(watermarkValue(new Instant(6000))));
+        .addDataBuilder().setComputationId(COMPUTATION)
+        .addDataBuilder().setKey(DATA_KEY)
+        .addWatermarkHolds(Windmill.WatermarkHold.newBuilder()
+            .setTag(STATE_KEY_1)
+            .setStateFamily(STATE_FAMILY)
+            .addTimestamps(5000000)
+            .addTimestamps(6000000));
 
     Mockito.when(mockWindmill.getStateData(expectedRequest.build())).thenReturn(response.build());
 
@@ -222,22 +213,18 @@ public void testBatching() throws Exception {
 
     Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
     response
-        .addDataBuilder()
-        .setComputationId(COMPUTATION)
-        .addDataBuilder()
-        .setKey(DATA_KEY)
-        .addLists(
-            Windmill.TagList.newBuilder()
-                .setTag(STATE_KEY_2)
-                .setStateFamily(STATE_FAMILY)
-                .addValues(watermarkValue(new Instant(5000)))
-                .addValues(watermarkValue(new Instant(6000))))
-        .addLists(
-            Windmill.TagList.newBuilder()
-                .setTag(STATE_KEY_1)
-                .setStateFamily(STATE_FAMILY)
-                .addValues(intValue(5, true))
-                .addValues(intValue(100, true)));
+        .addDataBuilder().setComputationId(COMPUTATION)
+        .addDataBuilder().setKey(DATA_KEY)
+        .addWatermarkHolds(Windmill.WatermarkHold.newBuilder()
+            .setTag(STATE_KEY_2)
+            .setStateFamily(STATE_FAMILY)
+            .addTimestamps(5000000)
+            .addTimestamps(6000000))
+        .addLists(Windmill.TagList.newBuilder()
+            .setTag(STATE_KEY_1)
+            .setStateFamily(STATE_FAMILY)
+            .addValues(intValue(5, true))
+            .addValues(intValue(100, true)));
 
     Mockito.when(mockWindmill.getStateData(Mockito.isA(Windmill.GetDataRequest.class)))
         .thenReturn(response.build());
@@ -251,13 +238,11 @@ public void testBatching() throws Exception {
     KeyedGetDataRequest keyedRequest = request.getValue().getRequests(0).getRequests(0);
     assertThat(keyedRequest.getKey(), Matchers.equalTo(DATA_KEY));
     assertThat(keyedRequest.getWorkToken(), Matchers.equalTo(WORK_TOKEN));
-    assertThat(keyedRequest.getListsToFetchCount(), Matchers.equalTo(2));
+    assertThat(keyedRequest.getListsToFetchCount(), Matchers.equalTo(1));
     assertThat(keyedRequest.getListsToFetch(0).getEndTimestamp(), Matchers.equalTo(Long.MAX_VALUE));
-    assertThat(keyedRequest.getListsToFetch(1).getEndTimestamp(), Matchers.equalTo(Long.MAX_VALUE));
-
-    Collection<ByteString> requestedTags = Arrays.asList(
-        keyedRequest.getListsToFetch(0).getTag(), keyedRequest.getListsToFetch(1).getTag());
-    assertThat(requestedTags, Matchers.containsInAnyOrder(STATE_KEY_1, STATE_KEY_2));
+    assertThat(keyedRequest.getListsToFetch(0).getTag(), Matchers.equalTo(STATE_KEY_1));
+    assertThat(keyedRequest.getWatermarkHoldsToFetchCount(), Matchers.equalTo(1));
+    assertThat(keyedRequest.getWatermarkHoldsToFetch(0).getTag(), Matchers.equalTo(STATE_KEY_2));
 
     // Verify the values returned to the user.
     assertThat(result, Matchers.equalTo(new Instant(5000)));

From d386ef7f13a359ac24b5743dfef12d7a8ace56f5 Mon Sep 17 00:00:00 2001
From: amyu <amyu@google.com>
Date: Mon, 27 Jul 2015 18:39:24 -0700
Subject: [PATCH 0826/1541] Move PubsubFileInjector to examples.common

----Release Notes----
Move PubsubFileInjector to examples.common.
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99241394
---
 .../cloud/dataflow/examples/common/DataflowExampleUtils.java    | 1 -
 .../dataflow/examples/{ => common}/PubsubFileInjector.java      | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)
 rename examples/src/main/java/com/google/cloud/dataflow/examples/{ => common}/PubsubFileInjector.java (98%)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
index 6f08c98a1ea73..58ec174715a02 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
@@ -29,7 +29,6 @@
 import com.google.api.services.dataflow.Dataflow;
 import com.google.api.services.pubsub.Pubsub;
 import com.google.api.services.pubsub.model.Topic;
-import com.google.cloud.dataflow.examples.PubsubFileInjector;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult;
 import com.google.cloud.dataflow.sdk.io.TextIO;
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java
similarity index 98%
rename from examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
rename to examples/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java
index 0757240b525d5..bee5115e3b99c 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.examples;
+package com.google.cloud.dataflow.examples.common;
 
 import com.google.api.services.pubsub.Pubsub;
 import com.google.api.services.pubsub.model.PublishRequest;

From b249f3f49031bf899edd3ae716672b0a64d0768d Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 28 Jul 2015 10:35:45 -0700
Subject: [PATCH 0827/1541] Remove unneeded methods and type params from
 DoFnRunner

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99291596
---
 .../sdk/runners/worker/ParDoFnBase.java       |  35 +++---
 .../dataflow/sdk/transforms/DoFnTester.java   |   2 +-
 .../cloud/dataflow/sdk/transforms/ParDo.java  |   2 +-
 .../sdk/transforms/windowing/Window.java      |   4 +-
 .../cloud/dataflow/sdk/util/DoFnRunner.java   | 113 +++++++++---------
 .../util/StreamingSideInputDoFnRunner.java    |   7 +-
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  |  38 +++---
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  30 ++---
 .../StreamingSideInputDoFnRunnerTest.java     |  15 ++-
 9 files changed, 114 insertions(+), 132 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
index a47c7e1829ad5..e49b4a5409f20 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
@@ -42,6 +42,8 @@
 import java.util.List;
 import java.util.Map;
 
+import javax.annotation.Nullable;
+
 /**
  * A base class providing simple set up, processing, and tear down for a wrapped
  * {@link DoFn}.
@@ -61,7 +63,7 @@ public abstract class ParDoFnBase implements ParDoFn {
   private final CounterSet.AddCounterMutator addCounterMutator;
 
   /** The DoFnRunner executing a batch. Null between batches. */
-  private DoFnRunner<Object, Object, Receiver> fnRunner;
+  private DoFnRunner<Object, Object> fnRunner;
 
   public ExecutionContext getExecutionContext() {
     return executionContext;
@@ -118,21 +120,23 @@ public void startBundle(final Receiver... receivers) throws Exception {
     @SuppressWarnings("unchecked")
     DoFnInfo<Object, Object> doFnInfo = (DoFnInfo<Object, Object>) getDoFnInfo();
 
-    OutputManager<Receiver> outputManager = new OutputManager<Receiver>() {
-      final Map<TupleTag<?>, OutputReceiver> undeclaredOutputs =
-      new HashMap<>();
+    OutputManager outputManager = new OutputManager() {
+      final Map<TupleTag<?>, OutputReceiver> undeclaredOutputs = new HashMap<>();
 
-      @Override
-      public Receiver initialize(TupleTag<?> tag) {
-        // Declared outputs.
+      @Nullable
+      private Receiver getReceiverOrNull(TupleTag<?> tag) {
         if (tag.equals(mainOutputTag)) {
           return receivers[0];
         } else if (sideOutputTags.contains(tag)) {
           return receivers[sideOutputTags.indexOf(tag) + 1];
+        } else {
+          return undeclaredOutputs.get(tag);
         }
+      }
 
-        // Undeclared outputs.
-        OutputReceiver receiver = undeclaredOutputs.get(tag);
+      @Override
+      public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
+        Receiver receiver = getReceiverOrNull(tag);
         if (receiver == null) {
           // A new undeclared output.
           // TODO: plumb through the operationName, so that we can
@@ -142,16 +146,13 @@ public Receiver initialize(TupleTag<?> tag) {
           // make it available to the OutputReceiver class in case
           // it wants to use it in naming output counters.  (It
           // doesn't today.)
-          receiver = new OutputReceiver();
+          OutputReceiver undeclaredReceiver = new OutputReceiver();
           ElementCounter outputCounter = new DataflowOutputCounter(outputName, addCounterMutator);
-          receiver.addOutputCounter(outputCounter);
-          undeclaredOutputs.put(tag, receiver);
+          undeclaredReceiver.addOutputCounter(outputCounter);
+          undeclaredOutputs.put(tag, undeclaredReceiver);
+          receiver = undeclaredReceiver;
         }
-        return receiver;
-      }
 
-      @Override
-      public void output(Receiver receiver, WindowedValue<?> output) {
         try {
           receiver.process(output);
         } catch (Throwable t) {
@@ -161,7 +162,7 @@ public void output(Receiver receiver, WindowedValue<?> output) {
     };
 
     if (options.as(StreamingOptions.class).isStreaming() && !sideInputReader.isEmpty()) {
-      fnRunner = new StreamingSideInputDoFnRunner<Object, Object, Receiver, BoundedWindow>(
+      fnRunner = new StreamingSideInputDoFnRunner<Object, Object, BoundedWindow>(
           options,
           doFnInfo,
           sideInputReader,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index ee9cddb5d6ed3..f8342c1845aba 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -329,7 +329,7 @@ enum State { UNSTARTED, STARTED, FINISHED }
   DoFnRunner.ListOutputManager outputManager;
 
   /** The DoFnRunner if processing is in progress. */
-  DoFnRunner<InputT, OutputT, List<WindowedValue<?>>> fnRunner;
+  DoFnRunner<InputT, OutputT> fnRunner;
 
   /** Counters for user-defined Aggregators if processing is in progress. */
   CounterSet counterSet;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index c59ee4742b324..4fbb18e11b94c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -1088,7 +1088,7 @@ private static <InputT, OutputT> void evaluateHelper(
           context.getPCollectionView(view));
     }
 
-    DoFnRunner<InputT, OutputT, List<WindowedValue<?>>> fnRunner =
+    DoFnRunner<InputT, OutputT> fnRunner =
         DoFnRunner.create(
             context.getPipelineOptions(),
             fn,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 90c37782701a9..3308d000b3d44 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -29,7 +29,6 @@
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
 import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -38,7 +37,6 @@
 import org.joda.time.Duration;
 
 import java.util.ArrayList;
-import java.util.List;
 
 import javax.annotation.Nullable;
 
@@ -587,7 +585,7 @@ private static <T, W extends BoundedWindow> void evaluateHelper(
     String name = context.getStepName(transform);
     @SuppressWarnings("unchecked")
     DoFn<T, T> addWindowsDoFn = new AssignWindowsDoFn<T, W>(windowFn);
-    DoFnRunner<T, T, List<WindowedValue<?>>> addWindowsRunner =
+    DoFnRunner<T, T> addWindowsRunner =
         DoFnRunner.create(
             context.getPipelineOptions(),
             addWindowsDoFn,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 5031014eba38f..f72813dfd710f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -39,46 +39,44 @@
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
 
 import org.joda.time.Instant;
 
 import java.io.IOException;
 import java.util.Collection;
-import java.util.HashMap;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 /**
  * Runs a DoFn by constructing the appropriate contexts and passing them in.
  *
  * @param <InputT> the type of the DoFn's (main) input elements
  * @param <OutputT> the type of the DoFn's (main) output elements
- * @param <ReceiverT> the type of object that receives outputs
  */
-public class DoFnRunner<InputT, OutputT, ReceiverT> {
+public class DoFnRunner<InputT, OutputT> {
 
   /** Information about how to create output receivers and output to them. */
-  public interface OutputManager<ReceiverT> {
+  public interface OutputManager {
 
-    /** Returns the receiver to use for a given tag. */
-    public ReceiverT initialize(TupleTag<?> tag);
-
-    /** Outputs a single element to the provided receiver. */
-    public void output(ReceiverT receiver, WindowedValue<?> output);
+    /** Outputs a single element to the receiver indicated by the given {@link TupleTag}. */
+    public <T> void output(TupleTag<T> tag, WindowedValue<T> output);
   }
 
   /** The DoFn being run. */
   public final DoFn<InputT, OutputT> fn;
 
   /** The context used for running the DoFn. */
-  public final DoFnContext<InputT, OutputT, ReceiverT> context;
+  public final DoFnContext<InputT, OutputT> context;
 
   DoFnRunner(
       PipelineOptions options,
       DoFn<InputT, OutputT> fn,
       SideInputReader sideInputReader,
-      OutputManager<ReceiverT> outputManager,
+      OutputManager outputManager,
       TupleTag<OutputT> mainOutputTag,
       List<TupleTag<?>> sideOutputTags,
       StepContext stepContext,
@@ -86,15 +84,22 @@ public interface OutputManager<ReceiverT> {
       WindowingStrategy<?, ?> windowingStrategy) {
     this.fn = fn;
     this.context = new DoFnContext<>(
-        options, fn, sideInputReader, outputManager, mainOutputTag, sideOutputTags, stepContext,
-        addCounterMutator, windowingStrategy == null ? null : windowingStrategy.getWindowFn());
+        options,
+        fn,
+        sideInputReader,
+        outputManager,
+        mainOutputTag,
+        sideOutputTags,
+        stepContext,
+        addCounterMutator,
+        windowingStrategy == null ? null : windowingStrategy.getWindowFn());
   }
 
-  public static <InputT, OutputT, ReceiverT> DoFnRunner<InputT, OutputT, ReceiverT> create(
+  public static <InputT, OutputT> DoFnRunner<InputT, OutputT> create(
       PipelineOptions options,
       DoFn<InputT, OutputT> fn,
       SideInputReader sideInputReader,
-      OutputManager<ReceiverT> outputManager,
+      OutputManager outputManager,
       TupleTag<OutputT> mainOutputTag,
       List<TupleTag<?>> sideOutputTags,
       StepContext stepContext,
@@ -109,26 +114,30 @@ public static <InputT, OutputT, ReceiverT> DoFnRunner<InputT, OutputT, ReceiverT
    * An implementation of {@link OutputManager} using simple lists, for testing and in-memory
    * contexts such as the {@link com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner}.
    */
-  public static class ListOutputManager implements OutputManager<List<WindowedValue<?>>> {
+  public static class ListOutputManager implements OutputManager {
 
     private Map<TupleTag<?>, List<WindowedValue<?>>> outputLists = Maps.newHashMap();
 
     @Override
-    public List<WindowedValue<?>> initialize(TupleTag<?> tag) {
-      List<WindowedValue<?>> list = Lists.newArrayList();
-      outputLists.put(tag, list);
-      return list;
-    }
-    @Override
-    public void output(List<WindowedValue<?>> list, WindowedValue<?> output) {
-      list.add(output);
+    public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      List<WindowedValue<T>> outputList = (List) outputLists.get(tag);
+
+      if (outputList == null) {
+        outputList = Lists.newArrayList();
+        @SuppressWarnings({"rawtypes", "unchecked"})
+        List<WindowedValue<?>> untypedList = (List) outputList;
+        outputLists.put(tag, untypedList);
+      }
+
+      outputList.add(output);
     }
 
     public <T> List<WindowedValue<T>> getOutput(TupleTag<T> tag) {
       // Safe cast by design, inexpressible in Java without rawtypes
       @SuppressWarnings({"rawtypes", "unchecked"})
       List<WindowedValue<T>> outputList = (List) outputLists.get(tag);
-      return outputList;
+      return (outputList != null) ? outputList : Collections.<WindowedValue<T>>emptyList();
     }
   }
 
@@ -202,26 +211,30 @@ public void finishBundle() {
    *
    * @param <InputT> the type of the DoFn's (main) input elements
    * @param <OutputT> the type of the DoFn's (main) output elements
-   * @param <R> the type of object that receives outputs
    */
-  private static class DoFnContext<InputT, OutputT, ReceiverT>
+  private static class DoFnContext<InputT, OutputT>
       extends DoFn<InputT, OutputT>.Context {
     private static final int MAX_SIDE_OUTPUTS = 1000;
 
     final PipelineOptions options;
     final DoFn<InputT, OutputT> fn;
     final SideInputReader sideInputReader;
-    final OutputManager<ReceiverT> outputManager;
-    final Map<TupleTag<?>, ReceiverT> outputMap;
+    final OutputManager outputManager;
     final TupleTag<OutputT> mainOutputTag;
     final StepContext stepContext;
     final CounterSet.AddCounterMutator addCounterMutator;
     final WindowFn windowFn;
 
+    /**
+     * The set of known output tags, some of which may be undeclared, so we can throw an
+     * exception when it exceeds {@link #MAX_SIDE_OUTPUTS}.
+     */
+    private Set<TupleTag<?>> outputTags;
+
     public DoFnContext(PipelineOptions options,
                        DoFn<InputT, OutputT> fn,
                        SideInputReader sideInputReader,
-                       OutputManager<ReceiverT> outputManager,
+                       OutputManager outputManager,
                        TupleTag<OutputT> mainOutputTag,
                        List<TupleTag<?>> sideOutputTags,
                        StepContext stepContext,
@@ -233,26 +246,19 @@ public DoFnContext(PipelineOptions options,
       this.sideInputReader = sideInputReader;
       this.outputManager = outputManager;
       this.mainOutputTag = mainOutputTag;
-      this.outputMap = new HashMap<>();
-      outputMap.put(mainOutputTag, outputManager.initialize(mainOutputTag));
+      this.outputTags = Sets.newHashSet();
+
+      outputTags.add(mainOutputTag);
       for (TupleTag<?> sideOutputTag : sideOutputTags) {
-        outputMap.put(sideOutputTag, outputManager.initialize(sideOutputTag));
+        outputTags.add(sideOutputTag);
       }
+
       this.stepContext = stepContext;
       this.addCounterMutator = addCounterMutator;
       this.windowFn = windowFn;
       super.setupDelegateAggregators();
     }
 
-    public ReceiverT getReceiver(TupleTag<?> tag) {
-      ReceiverT receiver = outputMap.get(tag);
-      if (receiver == null) {
-        throw new IllegalArgumentException(
-            "calling getReceiver() with unknown tag " + tag);
-      }
-      return receiver;
-    }
-
     //////////////////////////////////////////////////////////////////////////////
 
     @Override
@@ -318,7 +324,7 @@ void outputWindowedValue(
     }
 
     void outputWindowedValue(WindowedValue<OutputT> windowedElem) {
-      outputManager.output(outputMap.get(mainOutputTag), windowedElem);
+      outputManager.output(mainOutputTag, windowedElem);
       if (stepContext != null) {
         stepContext.noteOutput(windowedElem);
       }
@@ -333,26 +339,19 @@ protected <T> void sideOutputWindowedValue(TupleTag<T> tag,
     }
 
     protected <T> void sideOutputWindowedValue(TupleTag<T> tag, WindowedValue<T> windowedElem) {
-      ReceiverT receiver = outputMap.get(tag);
-      if (receiver == null) {
+      if (!outputTags.contains(tag)) {
         // This tag wasn't declared nor was it seen before during this execution.
         // Thus, this must be a new, undeclared and unconsumed output.
-
         // To prevent likely user errors, enforce the limit on the number of side
         // outputs.
-        if (outputMap.size() >= MAX_SIDE_OUTPUTS) {
+        if (outputTags.size() >= MAX_SIDE_OUTPUTS) {
           throw new IllegalArgumentException(
-              "the number of side outputs has exceeded a limit of "
-              + MAX_SIDE_OUTPUTS);
+              "the number of side outputs has exceeded a limit of " + MAX_SIDE_OUTPUTS);
         }
-
-        // Register the new TupleTag with outputManager and add an entry for it in
-        // the outputMap.
-        receiver = outputManager.initialize(tag);
-        outputMap.put(tag, receiver);
+        outputTags.add(tag);
       }
 
-      outputManager.output(receiver, windowedElem);
+      outputManager.output(tag, windowedElem);
       if (stepContext != null) {
         stepContext.noteSideOutput(tag, windowedElem);
       }
@@ -417,11 +416,11 @@ static class DoFnProcessContext<InputT, OutputT>
 
 
     final DoFn<InputT, OutputT> fn;
-    final DoFnContext<InputT, OutputT, ?> context;
+    final DoFnContext<InputT, OutputT> context;
     final WindowedValue<InputT> windowedValue;
 
     public DoFnProcessContext(DoFn<InputT, OutputT> fn,
-                              DoFnContext<InputT, OutputT, ?> context,
+                              DoFnContext<InputT, OutputT> context,
                               WindowedValue<InputT> windowedValue) {
       fn.super();
       this.fn = fn;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
index 2cb81cdbb28d8..0ffeb0bca3450 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
@@ -54,11 +54,10 @@
  *
  * @param <InputT> the type of the DoFn's (main) input elements
  * @param <OutputT> the type of the DoFn's (main) output elements
- * @param <ReceiverT> the type of object that receives outputs
  * @param <W> the type of the windows of the main input
  */
-public class StreamingSideInputDoFnRunner<InputT, OutputT, ReceiverT, W extends BoundedWindow>
-    extends DoFnRunner<InputT, OutputT, ReceiverT> {
+public class StreamingSideInputDoFnRunner<InputT, OutputT, W extends BoundedWindow>
+    extends DoFnRunner<InputT, OutputT> {
   private StreamingModeExecutionContext.StepContext stepContext;
   private StreamingModeExecutionContext execContext;
   private Map<String, PCollectionView<?>> sideInputViews;
@@ -75,7 +74,7 @@ public StreamingSideInputDoFnRunner(
       PipelineOptions options,
       DoFnInfo<InputT, OutputT> doFnInfo,
       SideInputReader sideInputReader,
-      OutputManager<ReceiverT> outputManager,
+      OutputManager outputManager,
       TupleTag<OutputT> mainOutputTag,
       List<TupleTag<?>> sideOutputTags,
       StepContext stepContext,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index 6fca1b77b2611..39ce09b905dc7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -63,8 +63,7 @@ public class GroupAlsoByWindowsDoFnTest {
   @Test public void testEmpty() throws Exception {
     TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
-        KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
+    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>, KV<String, Iterable<String>>> runner =
         makeRunner(
             outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
@@ -80,8 +79,7 @@ public class GroupAlsoByWindowsDoFnTest {
   @Test public void testFixedWindows() throws Exception {
     TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
-        KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
+    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>, KV<String, Iterable<String>>> runner =
         makeRunner(
             outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
@@ -129,8 +127,7 @@ public class GroupAlsoByWindowsDoFnTest {
   @Test public void testSlidingWindows() throws Exception {
     TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
-        KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
+    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>, KV<String, Iterable<String>>> runner =
         makeRunner(
             outputTag,
             outputManager,
@@ -184,8 +181,7 @@ public class GroupAlsoByWindowsDoFnTest {
     TupleTag<KV<String, Long>> outputTag = new TupleTag<>();
     CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>, KV<String, Long>,
-        List<WindowedValue<?>>> runner =
+    DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>, KV<String, Long>> runner =
         makeRunner(
             outputTag,
             outputManager,
@@ -233,8 +229,7 @@ public class GroupAlsoByWindowsDoFnTest {
   @Test public void testDiscontiguousWindows() throws Exception {
     TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
-        KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
+    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>, KV<String, Iterable<String>>> runner =
         makeRunner(
             outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
@@ -282,9 +277,10 @@ public class GroupAlsoByWindowsDoFnTest {
   @Test public void testSessions() throws Exception {
     TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
-        KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
-        makeRunner(outputTag, outputManager,
+    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>, KV<String, Iterable<String>>> runner =
+        makeRunner(
+            outputTag,
+            outputManager,
             WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))));
 
     runner.startBundle();
@@ -332,8 +328,7 @@ public class GroupAlsoByWindowsDoFnTest {
     TupleTag<KV<String, Long>> outputTag = new TupleTag<>();
     CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>,
-        KV<String, Long>, List<WindowedValue<?>>> runner =
+    DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>, KV<String, Long>> runner =
         makeRunner(
             outputTag,
             outputManager,
@@ -373,11 +368,10 @@ public class GroupAlsoByWindowsDoFnTest {
   }
 
   private <ReceiverT>
-      DoFnRunner<KV<String, Iterable<WindowedValue<String>>>,
-          KV<String, Iterable<String>>, ReceiverT>
+      DoFnRunner<KV<String, Iterable<WindowedValue<String>>>, KV<String, Iterable<String>>>
       makeRunner(
           TupleTag<KV<String, Iterable<String>>> outputTag,
-          DoFnRunner.OutputManager<ReceiverT> outputManager,
+          DoFnRunner.OutputManager outputManager,
           WindowingStrategy<? super String, IntervalWindow> windowingStrategy) {
 
     GroupAlsoByWindowsDoFn<String, String, Iterable<String>, IntervalWindow> fn =
@@ -387,10 +381,10 @@ public class GroupAlsoByWindowsDoFnTest {
   }
 
   private <ReceiverT>
-      DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>, KV<String, Long>, ReceiverT>
+      DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>, KV<String, Long>>
       makeRunner(
           TupleTag<KV<String, Long>> outputTag,
-          DoFnRunner.OutputManager<ReceiverT> outputManager,
+          DoFnRunner.OutputManager outputManager,
           WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
           KeyedCombineFn<String, Long, ?, Long> combineFn) {
 
@@ -402,10 +396,10 @@ public class GroupAlsoByWindowsDoFnTest {
   }
 
   private <InputT, OutputT, ReceiverT>
-      DoFnRunner<KV<String, Iterable<WindowedValue<InputT>>>, KV<String, OutputT>, ReceiverT>
+      DoFnRunner<KV<String, Iterable<WindowedValue<InputT>>>, KV<String, OutputT>>
       makeRunner(
         TupleTag<KV<String, OutputT>> outputTag,
-        DoFnRunner.OutputManager<ReceiverT> outputManager,
+        DoFnRunner.OutputManager outputManager,
         WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
         GroupAlsoByWindowsDoFn<String, InputT, OutputT, IntervalWindow> fn) {
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 82dbd0e508a48..3bef794030992 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -83,8 +83,7 @@ public ExecutionContext.StepContext createStepContext(String stepName, String tr
   @Test public void testEmpty() throws Exception {
     TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>,
-        List<WindowedValue<?>>> runner =
+    DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>> runner =
         makeRunner(
             outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
@@ -106,8 +105,7 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
   @Test public void testFixedWindows() throws Exception {
     TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<TimerOrElement<KV<String, String>>,
-        KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
+    DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>> runner =
         makeRunner(
             outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
@@ -168,8 +166,7 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
   @Test public void testSlidingWindows() throws Exception {
     TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<TimerOrElement<KV<String, String>>,
-        KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
+    DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>> runner =
         makeRunner(
             outputTag,
             outputManager,
@@ -238,8 +235,7 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
   @Test public void testSessions() throws Exception {
     TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<TimerOrElement<KV<String, String>>,
-        KV<String, Iterable<String>>, List<WindowedValue<?>>> runner =
+    DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>> runner =
         makeRunner(
             outputTag,
             outputManager,
@@ -336,8 +332,7 @@ public Long extractOutput(Long accumulator) {
     TupleTag<KV<String, Long>> outputTag = new TupleTag<>();
     CombineFn<Long, ?, Long> combineFn = new SumLongs();
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<TimerOrElement<KV<String, Long>>,
-        KV<String, Long>, List<WindowedValue<?>>> runner =
+    DoFnRunner<TimerOrElement<KV<String, Long>>, KV<String, Long>> runner =
         makeRunner(
             outputTag,
             outputManager,
@@ -400,10 +395,9 @@ public Long extractOutput(Long accumulator) {
     assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(15, 25)));
   }
 
-  private DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>,
-      List<WindowedValue<?>>> makeRunner(
+  private DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>> makeRunner(
           TupleTag<KV<String, Iterable<String>>> outputTag,
-          DoFnRunner.OutputManager<List<WindowedValue<?>>> outputManager,
+          DoFnRunner.OutputManager outputManager,
           WindowingStrategy<? super String, IntervalWindow> windowingStrategy) {
 
     StreamingGroupAlsoByWindowsDoFn<String, String, Iterable<String>, IntervalWindow> fn =
@@ -412,10 +406,9 @@ List<WindowedValue<?>>> makeRunner(
     return makeRunner(outputTag, outputManager, windowingStrategy, fn);
   }
 
-  private DoFnRunner<TimerOrElement<KV<String, Long>>, KV<String, Long>, List<WindowedValue<?>>>
-      makeRunner(
+  private DoFnRunner<TimerOrElement<KV<String, Long>>, KV<String, Long>> makeRunner(
           TupleTag<KV<String, Long>> outputTag,
-          DoFnRunner.OutputManager<List<WindowedValue<?>>> outputManager,
+          DoFnRunner.OutputManager outputManager,
           WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
           KeyedCombineFn<String, Long, ?, Long> combineFn) {
 
@@ -427,10 +420,9 @@ List<WindowedValue<?>>> makeRunner(
   }
 
   private <InputT, OutputT>
-      DoFnRunner<TimerOrElement<KV<String, InputT>>, KV<String, OutputT>, List<WindowedValue<?>>>
-      makeRunner(
+      DoFnRunner<TimerOrElement<KV<String, InputT>>, KV<String, OutputT>> makeRunner(
           TupleTag<KV<String, OutputT>> outputTag,
-          DoFnRunner.OutputManager<List<WindowedValue<?>>> outputManager,
+          DoFnRunner.OutputManager outputManager,
           WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
           StreamingGroupAlsoByWindowsDoFn<String, InputT, OutputT, IntervalWindow> fn) {
     return
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
index 9c615cd38ead7..67138764b3a3d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
@@ -99,7 +99,7 @@ public void testSideInputReady() throws Exception {
     when(mockSideInputReader.get(eq(view), any(BoundedWindow.class))).thenReturn("data");
 
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    StreamingSideInputDoFnRunner<String, String, List<WindowedValue<?>>, IntervalWindow> runner =
+    StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner =
         createRunner(outputManager, Arrays.asList(view));
 
     runner.startBundle();
@@ -120,7 +120,7 @@ public void testSideInputNotReady() throws Exception {
         .thenReturn(false);
 
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    StreamingSideInputDoFnRunner<String, String, List<WindowedValue<?>>, IntervalWindow> runner =
+    StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner =
         createRunner(outputManager, Arrays.asList(view));
 
     runner.startBundle();
@@ -174,7 +174,7 @@ public void testSideInputNotification() throws Exception {
     blockedMapState.set(blockedMap);
 
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    StreamingSideInputDoFnRunner<String, String, List<WindowedValue<?>>, IntervalWindow> runner =
+    StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner =
         createRunner(outputManager, Arrays.asList(view));
     runner.watermarkHold(createWindow(0)).add(new Instant(0));
     runner.elementBag(createWindow(0)).add(createDatum("e", 0));
@@ -235,7 +235,7 @@ public void testMultipleSideInputs() throws Exception {
     when(mockSideInputReader.get(eq(view2), any(BoundedWindow.class))).thenReturn("data2");
 
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    StreamingSideInputDoFnRunner<String, String, List<WindowedValue<?>>, IntervalWindow> runner =
+    StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner =
         createRunner(outputManager, Arrays.asList(view1, view2));
     runner.watermarkHold(createWindow(0)).add(new Instant(0));
     runner.elementBag(createWindow(0)).add(createDatum("e1", 0));
@@ -252,9 +252,8 @@ public void testMultipleSideInputs() throws Exception {
     assertThat(runner.elementBag(createWindow(0)).get().read(), Matchers.emptyIterable());
   }
 
-  private <ReceiverT> StreamingSideInputDoFnRunner<String, String, ReceiverT, IntervalWindow>
-      createRunner(
-          DoFnRunner.OutputManager<ReceiverT> outputManager, List<PCollectionView<String>> views)
+  private <ReceiverT> StreamingSideInputDoFnRunner<String, String, IntervalWindow>
+      createRunner(DoFnRunner.OutputManager outputManager, List<PCollectionView<String>> views)
           throws Exception {
     @SuppressWarnings({"unchecked", "rawtypes"})
     Iterable<PCollectionView<?>> typedViews = (Iterable) views;
@@ -263,7 +262,7 @@ public void testMultipleSideInputs() throws Exception {
         new SideInputFn(views), WindowingStrategy.of(WINDOW_FN),
         typedViews, StringUtf8Coder.of());
 
-    return new StreamingSideInputDoFnRunner<String, String, ReceiverT, IntervalWindow>(
+    return new StreamingSideInputDoFnRunner<String, String, IntervalWindow>(
         PipelineOptionsFactory.create(),
         doFnInfo,
         mockSideInputReader,

From d99712d88c50a3080a487dca2e0fee4bb11defb0 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Tue, 28 Jul 2015 11:29:17 -0700
Subject: [PATCH 0828/1541] Adds an option to get the index and non-speculative
 index of a Pane

These options let one further distinguish between multiple
firings of a trigger.

Also renames PaneInfo.DEFAULT to PaneInfo.NO_FIRING, as the former
implies that it's related to the default trigger firing.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99297727
---
 .../worker/WindowingWindmillReader.java       |   5 +-
 .../sdk/transforms/windowing/PaneInfo.java    | 190 +++++++++++++++---
 .../dataflow/sdk/util/AssignWindowsDoFn.java  |   2 +-
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |   8 +-
 .../dataflow/sdk/util/PaneInfoTracker.java    |  24 ++-
 .../dataflow/sdk/util/ReduceFnRunner.java     |   7 +-
 .../dataflow/sdk/util/WindowedValue.java      |   6 +-
 .../worker/GroupingShuffleReaderTest.java     |   2 +-
 .../worker/PartitioningShuffleReaderTest.java |  23 ++-
 .../sdk/runners/worker/ShuffleSinkTest.java   |   2 +-
 .../worker/StreamingDataflowWorkerTest.java   |  12 +-
 .../worker/UngroupedShuffleReaderTest.java    |   2 +-
 .../sdk/testing/PCollectionViewTesting.java   |   2 +-
 .../transforms/windowing/PaneInfoTest.java    |  15 +-
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  |  34 ++--
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  30 +--
 .../StreamingSideInputDoFnRunnerTest.java     |   2 +-
 .../sdk/util/TriggerExecutorTest.java         |  62 +++++-
 .../dataflow/sdk/util/TriggerTester.java      |   2 +-
 .../dataflow/sdk/util/WindowedValueTest.java  |   2 +-
 20 files changed, 322 insertions(+), 110 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
index 82a597c0c5c0c..eafd4c478f2ee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
@@ -121,7 +121,10 @@ private <W extends BoundedWindow> WindowedValue<TimerOrElement<T>> createTimer(
       TimerData timerData = TimerData.of(namespace, timestamp, getTimeDomain(timer.getType()));
 
       return WindowedValue.<TimerOrElement<T>>of(
-          TimerOrElement.<T>timer(key, timerData), timestamp, new ArrayList<W>(), PaneInfo.DEFAULT);
+          TimerOrElement.<T>timer(key, timerData),
+          timestamp,
+          new ArrayList<W>(),
+          PaneInfo.NO_FIRING);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
index 9c32b25375148..e0a8409ca49b0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
@@ -22,6 +22,7 @@
 import com.google.common.base.MoreObjects;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableMap;
+import com.google.protobuf.CodedOutputStream;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -72,10 +73,11 @@ private static byte encodedByte(boolean isFirst, boolean isLast, Timing timing)
   static {
     ImmutableMap.Builder<Byte, PaneInfo> decodingBuilder = ImmutableMap.builder();
     for (Timing timing : Timing.values()) {
-      register(decodingBuilder, new PaneInfo(true, true, timing));
-      register(decodingBuilder, new PaneInfo(true, false, timing));
-      register(decodingBuilder, new PaneInfo(false, true, timing));
-      register(decodingBuilder, new PaneInfo(false, false, timing));
+      long onTimeIndex = timing == Timing.EARLY ? -1 : 0;
+      register(decodingBuilder, new PaneInfo(true, true, timing, 0, onTimeIndex));
+      register(decodingBuilder, new PaneInfo(true, false, timing, 0, onTimeIndex));
+      register(decodingBuilder, new PaneInfo(false, true, timing, -1, onTimeIndex));
+      register(decodingBuilder, new PaneInfo(false, false, timing, -1, onTimeIndex));
     }
     BYTE_TO_PANE_INFO = decodingBuilder.build();
   }
@@ -89,40 +91,48 @@ private static void register(ImmutableMap.Builder<Byte, PaneInfo> builder, PaneI
   private final boolean isFirst;
   private final boolean isLast;
   private final Timing timing;
+  private final long index;
+  private final long nonSpeculativeIndex;
 
   /**
    * Until an element has been assigned to a window and had triggers processed, it doesn't belong
-   * to any pane. This is the default value assigned to elements read from sources, and those that
-   * have been assigned a window but not passed through execution of any trigger.
+   * to any pane. This is the value assigned to elements read from sources, and those that have
+   * been assigned a window but not passed through execution of any trigger.
    */
-  public static final PaneInfo DEFAULT = PaneInfo.createPane(false, false, Timing.UNKNOWN);
+  public static final PaneInfo NO_FIRING =
+      PaneInfo.createPane(false, false, Timing.UNKNOWN, 0, 0);
 
   /**
    * PaneInfo to use when there will be exactly one firing and it is on time.
    */
   public static final PaneInfo ON_TIME_AND_ONLY_FIRING =
-      PaneInfo.createPane(true, true, Timing.ON_TIME);
+      PaneInfo.createPane(true, true, Timing.ON_TIME, 0, 0);
 
-  private PaneInfo(boolean isFirst, boolean isLast, Timing timing) {
+  private PaneInfo(boolean isFirst, boolean isLast, Timing timing, long index, long onTimeIndex) {
     this.encodedByte = encodedByte(isFirst, isLast, timing);
     this.isFirst = isFirst;
     this.isLast = isLast;
     this.timing = timing;
+    this.index = index;
+    this.nonSpeculativeIndex = onTimeIndex;
   }
 
-  /**
-   * Returns true if this pane corresponds to the {@link #DEFAULT} pane.
-   */
-  public boolean isDefault() {
-    return DEFAULT.equals(this);
+  public static PaneInfo createPane(boolean isFirst, boolean isLast, Timing timing) {
+    Preconditions.checkArgument(isFirst, "Indices must be provided for non-first pane info.");
+    return createPane(isFirst, isLast, timing, 0, timing == Timing.EARLY ? -1 : 0);
   }
 
   /**
    * Factory method to create a {@link PaneInfo} with the specified parameters.
    */
   public static PaneInfo createPane(
-      boolean isFirst, boolean isLast, Timing timing) {
-    return Preconditions.checkNotNull(BYTE_TO_PANE_INFO.get(encodedByte(isFirst, isLast, timing)));
+      boolean isFirst, boolean isLast, Timing timing, long index, long onTimeIndex) {
+    if (isFirst || timing == Timing.UNKNOWN) {
+      return Preconditions.checkNotNull(
+          BYTE_TO_PANE_INFO.get(encodedByte(isFirst, isLast, timing)));
+    } else {
+      return new PaneInfo(isFirst, isLast, timing, index, onTimeIndex);
+    }
   }
 
   public static PaneInfo decodePane(byte encodedPane) {
@@ -142,6 +152,9 @@ public boolean isUnknown() {
    * Return true if this is the first pane produced for the associated window.
    */
   public boolean isFirst() {
+    if (timing == Timing.UNKNOWN) {
+      throw new UnsupportedOperationException("Undefined for non-trigger-firing pane.");
+    }
     return isFirst;
   }
 
@@ -149,6 +162,9 @@ public boolean isFirst() {
    * Return true if this is the last pane that will be produced in the associated window.
    */
   public boolean isLast() {
+    if (timing == Timing.UNKNOWN) {
+      throw new UnsupportedOperationException("Undefined for non-trigger-firing pane.");
+    }
     return isLast;
   }
 
@@ -159,20 +175,55 @@ public Timing getTiming() {
     return timing;
   }
 
+  /**
+   * The zero-based index of this trigger firing that produced this pane. i.e.
+   * 0 for the first time the timer fires, 1 for the next time, etc.
+   *
+   * <p> A given (key, window, pane-index) is guaranteed to be unique in the
+   * output of a group-by-key operation.
+   */
+  public long getIndex() {
+    if (timing == Timing.UNKNOWN) {
+      throw new UnsupportedOperationException("Undefined for non-trigger-firing pane.");
+    }
+    return index;
+  }
+
+  /**
+   * The zero-based index of this trigger firing among non-speculative panes, i.e.
+   * 0 for the first non-{@link Timing#EARLY} timer firing, 1 for the next one, etc.
+   *
+   * <p> Always -1 for speculative data.
+   */
+  public long getNonSpeculativeIndex() {
+    if (timing == Timing.UNKNOWN) {
+      throw new UnsupportedOperationException("Undefined for non-trigger-firing pane.");
+    }
+    return nonSpeculativeIndex;
+  }
+
   int getEncodedByte() {
     return encodedByte;
   }
 
   @Override
   public int hashCode() {
-    // Just hash the encoded byte, because we know that it uniquely identifies the pane.
-    return Objects.hash(encodedByte);
+    return Objects.hash(encodedByte, index, nonSpeculativeIndex);
   }
 
   @Override
   public boolean equals(Object obj) {
-    // Because we intern the PaneInfo objects, equals is the same as pointer equality.
-    return this == obj;
+    if (this == obj) {
+      // Simple PaneInfos are interned.
+      return true;
+    } else if (obj instanceof PaneInfo) {
+      PaneInfo that = (PaneInfo) obj;
+      return this.encodedByte == that.encodedByte
+          && this.index == that.index
+          && this.nonSpeculativeIndex == that.nonSpeculativeIndex;
+    } else {
+      return false;
+    }
   }
 
   @Override
@@ -182,6 +233,8 @@ public String toString() {
         .add("isFirst", isFirst ? true : null)
         .add("isLast", isLast ? true : null)
         .add("timing", timing)
+        .add("index", index)
+        .add("onTimeIndex", nonSpeculativeIndex > 0 ? nonSpeculativeIndex : null)
         .toString();
   }
 
@@ -191,19 +244,106 @@ public String toString() {
   public static class PaneInfoCoder extends AtomicCoder<PaneInfo> {
     private static final long serialVersionUID = 0;
 
+    private static enum Encoding {
+      FIRST,
+      ONE_INDEX,
+      TWO_INDICES;
+
+      // NOTE: Do not reorder fields. The ordinal is used as part of
+      // the encoding.
+
+      public final byte tag;
+
+      private Encoding() {
+        assert ordinal() < 16;
+        tag = (byte) (ordinal() << 4);
+      }
+
+      public static Encoding fromTag(byte b) {
+        return Encoding.values()[b >> 4];
+      }
+    }
+
+    private Encoding chooseEncoding(PaneInfo value) {
+      if (value.index == 0 && value.nonSpeculativeIndex == 0 || value.timing == Timing.UNKNOWN) {
+        return Encoding.FIRST;
+      } else if (value.index == value.nonSpeculativeIndex || value.timing == Timing.EARLY) {
+        return Encoding.ONE_INDEX;
+      } else {
+        return Encoding.TWO_INDICES;
+      }
+    }
+
     public static final PaneInfoCoder INSTANCE = new PaneInfoCoder();
 
     @Override
-    public void encode(PaneInfo value, OutputStream outStream, Coder.Context context)
+    public void encode(PaneInfo value, final OutputStream outStream, Coder.Context context)
         throws CoderException, IOException {
-      outStream.write(value.encodedByte);
+      Encoding encoding = chooseEncoding(value);
+      switch (chooseEncoding(value)) {
+        case FIRST:
+          outStream.write(value.encodedByte);
+          break;
+        case ONE_INDEX:
+          outStream.write(value.encodedByte | encoding.tag);
+          writeVarLong(value.index, outStream);
+          break;
+        case TWO_INDICES:
+          outStream.write(value.encodedByte | encoding.tag);
+          writeVarLong(value.index, outStream);
+          writeVarLong(value.nonSpeculativeIndex, outStream);
+          break;
+        default:
+          throw new CoderException("Unknown encoding " + encoding);
+      }
     }
 
     @Override
-    public PaneInfo decode(InputStream inStream, Coder.Context context)
+    public PaneInfo decode(final InputStream inStream, Coder.Context context)
         throws CoderException, IOException {
-      byte key = (byte) inStream.read();
-      return BYTE_TO_PANE_INFO.get(key);
+      byte keyAndTag = (byte) inStream.read();
+      PaneInfo base = BYTE_TO_PANE_INFO.get((byte) (keyAndTag & 0x0F));
+      long index, onTimeIndex;
+      switch (Encoding.fromTag(keyAndTag)) {
+        case FIRST:
+          return base;
+        case ONE_INDEX:
+          index = readVarLong(inStream);
+          onTimeIndex = base.timing == Timing.EARLY ? -1 : index;
+          break;
+        case TWO_INDICES:
+          index = readVarLong(inStream);
+          onTimeIndex = readVarLong(inStream);
+          break;
+        default:
+          throw new CoderException("Unknown encoding " + (keyAndTag & 0xF0));
+      }
+      return new PaneInfo(base.isFirst, base.isLast, base.timing, index, onTimeIndex);
+    }
+
+    private void writeVarLong(long value, OutputStream outStream) throws IOException {
+      CodedOutputStream out = CodedOutputStream.newInstance(outStream);
+      out.writeRawVarint64(value);
+      out.flush();
+    }
+
+    private long readVarLong(InputStream inStream) throws CoderException, IOException {
+      // This is CodedInputStream.readRawVarint64(), inlined to avoid readahead
+      // of the underlying input stream.
+      int shift = 0;
+      long result = 0;
+      while (shift < 64) {
+        int b = inStream.read();
+        if (b < 0) {
+          throw new CoderException("end of stream while decoding varint");
+        }
+        result |= (long) (b & 0x7F) << shift;
+        if ((b & 0x80) == 0) {
+          return result;
+        }
+        shift += 7;
+      }
+      throw new CoderException("malformed varint");
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
index 4cb7b3724de2f..1e0c6ad84b2a7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
@@ -63,6 +63,6 @@ public Collection<? extends BoundedWindow> windows() {
               });
 
     c.windowingInternals()
-        .outputWindowedValue(c.element(), c.timestamp(), windows, PaneInfo.DEFAULT);
+        .outputWindowedValue(c.element(), c.timestamp(), windows, PaneInfo.NO_FIRING);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index f72813dfd710f..71c9d7fa0046c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -362,24 +362,24 @@ protected <T> void sideOutputWindowedValue(TupleTag<T> tag, WindowedValue<T> win
     // ProcessContext's versions in DoFn.processElement.
     @Override
     public void output(OutputT output) {
-      outputWindowedValue(output, null, null, PaneInfo.DEFAULT);
+      outputWindowedValue(output, null, null, PaneInfo.NO_FIRING);
     }
 
     @Override
     public void outputWithTimestamp(OutputT output, Instant timestamp) {
-      outputWindowedValue(output, timestamp, null, PaneInfo.DEFAULT);
+      outputWindowedValue(output, timestamp, null, PaneInfo.NO_FIRING);
     }
 
     @Override
     public <T> void sideOutput(TupleTag<T> tag, T output) {
       Preconditions.checkNotNull(tag, "TupleTag passed to sideOutput cannot be null");
-      sideOutputWindowedValue(tag, output, null, null, PaneInfo.DEFAULT);
+      sideOutputWindowedValue(tag, output, null, null, PaneInfo.NO_FIRING);
     }
 
     @Override
     public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
       Preconditions.checkNotNull(tag, "TupleTag passed to sideOutputWithTimestamp cannot be null");
-      sideOutputWindowedValue(tag, output, timestamp, null, PaneInfo.DEFAULT);
+      sideOutputWindowedValue(tag, output, timestamp, null, PaneInfo.NO_FIRING);
     }
 
     private String generateInternalAggregatorName(String userName) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
index f8e19f9223bda..09ae97e2b8745 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
@@ -61,24 +61,34 @@ public PaneInfo read() {
         if (result == null) {
           PaneInfo previousPane = previousPaneFuture.read();
           result = describePane(endOfWindow, previousPane, isFinal);
-          state.access(PANE_INFO_TAG).set(result);
         }
         return result;
       }
     };
   }
 
+  public void storeCurrentPaneInfo(ReduceFn<?, ?, ?, ?>.Context context, PaneInfo currentPane) {
+    context.state().access(PANE_INFO_TAG).set(currentPane);
+  }
+
   private <W> PaneInfo describePane(Instant endOfWindow, PaneInfo previousPane, boolean isFinal) {
     boolean isSpeculative = endOfWindow.isAfter(timerInternals.currentWatermarkTime());
     boolean isFirst = (previousPane == null);
 
-    Timing timing = Timing.EARLY;
-    if (!isSpeculative) {
-      boolean firstNonSpeculative =
-          previousPane == null || previousPane.getTiming() == Timing.EARLY;
-      timing = firstNonSpeculative ? Timing.ON_TIME : Timing.LATE;
+    long index = isFirst ? 0 : previousPane.getIndex() + 1;
+    long nonSpeculativeIndex;
+    Timing timing;
+    if (isSpeculative) {
+      timing = Timing.EARLY;
+      nonSpeculativeIndex = -1;
+    } else if (previousPane == null || previousPane.getTiming() == Timing.EARLY) {
+      timing = Timing.ON_TIME;
+      nonSpeculativeIndex = 0;
+    } else {
+      timing = Timing.LATE;
+      nonSpeculativeIndex = previousPane.getNonSpeculativeIndex() + 1;
     }
 
-    return PaneInfo.createPane(isFirst, isFinal, timing);
+    return PaneInfo.createPane(isFirst, isFinal, timing, index, nonSpeculativeIndex);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index 4573da410626c..0a6fd59550c77 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -413,10 +413,13 @@ public void output(OutputT toOutput) {
     Instant outputTimestamp = timestampFuture.read();
     List<W> windows = Collections.singletonList(context.window());
 
-    // Make sure we read the paneFuture even if there is no output, since that commits the updated
-    // pane information.
     PaneInfo pane = paneFuture.read();
 
+    // Update (increment) the stored PaneInfo iff this pane is visible to the user.
+    if (outputs.size() > 0) {
+      paneInfo.storeCurrentPaneInfo(context, pane);
+    }
+
     // Produce the output values containing the pane.
     for (OutputT output : outputs) {
       windowingInternals.outputWindowedValue(KV.of(key, output), outputTimestamp, windows, pane);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index 69ec2aea9b2fc..b4b2c293d8452 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -104,7 +104,7 @@ public static <V> WindowedValue<V> of(
    * default timestamp and pane.
    */
   public static <V> WindowedValue<V> valueInGlobalWindow(V value) {
-    return new ValueInGlobalWindow<>(value, PaneInfo.DEFAULT);
+    return new ValueInGlobalWindow<>(value, PaneInfo.NO_FIRING);
   }
 
   /**
@@ -123,7 +123,7 @@ public static <V> WindowedValue<V> timestampedValueInGlobalWindow(V value, Insta
     if (BoundedWindow.TIMESTAMP_MIN_VALUE.equals(timestamp)) {
       return valueInGlobalWindow(value);
     } else {
-      return new TimestampedValueInGlobalWindow<>(value, timestamp, PaneInfo.DEFAULT);
+      return new TimestampedValueInGlobalWindow<>(value, timestamp, PaneInfo.NO_FIRING);
     }
   }
 
@@ -132,7 +132,7 @@ public static <V> WindowedValue<V> timestampedValueInGlobalWindow(V value, Insta
    * and pane.
    */
   public static <V> WindowedValue<V> valueInEmptyWindows(V value) {
-    return new ValueInEmptyWindows<V>(value, PaneInfo.DEFAULT);
+    return new ValueInEmptyWindows<V>(value, PaneInfo.NO_FIRING);
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
index a55067a2a2147..ab932c224380c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
@@ -124,7 +124,7 @@ private void runTestReadFromShuffle(
           actualSizes.add(shuffleSinkWriter.add(
               WindowedValue.of(
                   KV.of(key, value), timestamp, Lists.newArrayList(window),
-                  PaneInfo.DEFAULT)));
+                  PaneInfo.NO_FIRING)));
         }
       }
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
index e0e674a4fe3f3..faf3e0a136798 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
@@ -16,13 +16,14 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.NO_FIRING;
+
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
@@ -55,16 +56,16 @@ public class PartitioningShuffleReaderTest {
   private static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
 
   private static final List<WindowedValue<KV<Integer, String>>> KVS = Arrays.asList(
-      WindowedValue.of(KV.of(1, "in 1a"), timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT),
-      WindowedValue.of(KV.of(1, "in 1b"), timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT),
-      WindowedValue.of(KV.of(2, "in 2a"), timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT),
-      WindowedValue.of(KV.of(2, "in 2b"), timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT),
-      WindowedValue.of(KV.of(3, "in 3"), timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT),
-      WindowedValue.of(KV.of(4, "in 4a"), timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT),
-      WindowedValue.of(KV.of(4, "in 4b"), timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT),
-      WindowedValue.of(KV.of(4, "in 4c"), timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT),
-      WindowedValue.of(KV.of(4, "in 4d"), timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT),
-      WindowedValue.of(KV.of(5, "in 5"), timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT));
+      WindowedValue.of(KV.of(1, "in 1a"), timestamp, Lists.newArrayList(window), NO_FIRING),
+      WindowedValue.of(KV.of(1, "in 1b"), timestamp, Lists.newArrayList(window), NO_FIRING),
+      WindowedValue.of(KV.of(2, "in 2a"), timestamp, Lists.newArrayList(window), NO_FIRING),
+      WindowedValue.of(KV.of(2, "in 2b"), timestamp, Lists.newArrayList(window), NO_FIRING),
+      WindowedValue.of(KV.of(3, "in 3"), timestamp, Lists.newArrayList(window), NO_FIRING),
+      WindowedValue.of(KV.of(4, "in 4a"), timestamp, Lists.newArrayList(window), NO_FIRING),
+      WindowedValue.of(KV.of(4, "in 4b"), timestamp, Lists.newArrayList(window), NO_FIRING),
+      WindowedValue.of(KV.of(4, "in 4c"), timestamp, Lists.newArrayList(window), NO_FIRING),
+      WindowedValue.of(KV.of(4, "in 4d"), timestamp, Lists.newArrayList(window), NO_FIRING),
+      WindowedValue.of(KV.of(5, "in 5"), timestamp, Lists.newArrayList(window), NO_FIRING));
 
   private void runTestReadFromShuffle(List<WindowedValue<KV<Integer, String>>> expected)
       throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
index 0d963d4344cb4..9a4eaa8965578 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
@@ -144,7 +144,7 @@ void runTestWriteGroupingShuffleSink(
             WindowedValue.of(KV.of(kv.getKey(), kv.getValue()),
                              timestamp,
                              Lists.newArrayList(window),
-                             PaneInfo.DEFAULT)));
+                             PaneInfo.NO_FIRING)));
       }
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 484321d8488bb..b71d5c202e52f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -245,7 +245,7 @@ private Windmill.GetWorkResponse buildInput(String input, byte[] metadata) throw
           builder.getWorkBuilder(0).getWorkBuilder(0).getMessageBundlesBuilder(0);
       for (Windmill.Message.Builder messageBuilder :
           messageBundleBuilder.getMessagesBuilderList()) {
-        messageBuilder.setMetadata(addPaneTag(PaneInfo.DEFAULT, metadata));
+        messageBuilder.setMetadata(addPaneTag(PaneInfo.NO_FIRING, metadata));
       }
     }
     return builder.build();
@@ -309,7 +309,7 @@ private WorkItemCommitRequest.Builder makeExpectedOutput(int index, long timesta
 
   private WorkItemCommitRequest.Builder makeExpectedOutput(
       int index, long timestamp, String key, String outKey) throws Exception {
-    return setMessagesMetadata(PaneInfo.DEFAULT, defaultWindowsBytes(),
+    return setMessagesMetadata(PaneInfo.NO_FIRING, defaultWindowsBytes(),
         parseCommitRequest(
             "key: \"" + key + "\" " +
             "work_token: " + index + " " +
@@ -606,12 +606,12 @@ public void testAssignWindows() throws Exception {
 
     assertThat(
         stripCounters(result.get((long) timestamp1)),
-        equalTo(setMessagesMetadata(PaneInfo.DEFAULT, windowAtZeroBytes(),
+        equalTo(setMessagesMetadata(PaneInfo.NO_FIRING, windowAtZeroBytes(),
                 makeExpectedOutput(timestamp1, timestamp1))
             .build()));
 
     assertThat(stripCounters(result.get((long) timestamp2)),
-        equalTo(setMessagesMetadata(PaneInfo.DEFAULT, windowAtOneSecondBytes(),
+        equalTo(setMessagesMetadata(PaneInfo.NO_FIRING, windowAtOneSecondBytes(),
                 makeExpectedOutput(timestamp2, timestamp2))
             .build()));
   }
@@ -886,7 +886,7 @@ public void testUnboundedSources() throws Exception {
         UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
 
     assertThat(commit,
-        equalTo(setMessagesMetadata(PaneInfo.DEFAULT,
+        equalTo(setMessagesMetadata(PaneInfo.NO_FIRING,
             CoderUtils.encodeToByteArray(
                 CollectionCoder.of(GlobalWindow.Coder.INSTANCE),
                 Arrays.asList(GlobalWindow.INSTANCE)),
@@ -962,7 +962,7 @@ public void testUnboundedSources() throws Exception {
     finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
 
     assertThat(commit,
-        equalTo(setMessagesMetadata(PaneInfo.DEFAULT,
+        equalTo(setMessagesMetadata(PaneInfo.NO_FIRING,
             CoderUtils.encodeToByteArray(
                 CollectionCoder.of(GlobalWindow.Coder.INSTANCE),
                 Arrays.asList(GlobalWindow.INSTANCE)),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
index 47a7f7b7f4d0f..cdfdd2baefd9e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
@@ -68,7 +68,7 @@ void runTestReadFromShuffle(List<Integer> expected) throws Exception {
              shuffleSink.writer(shuffleWriter, "dataset")) {
       for (Integer value : expected) {
         actualSizes.add(shuffleSinkWriter.add(
-            WindowedValue.of(value, timestamp, Lists.newArrayList(window), PaneInfo.DEFAULT)));
+            WindowedValue.of(value, timestamp, Lists.newArrayList(window), PaneInfo.NO_FIRING)));
       }
     }
     List<ShuffleEntry> records = shuffleWriter.getRecords();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java
index 6046ef72f83c3..175af94a58c41 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java
@@ -197,7 +197,7 @@ public static <ElemT, ViewT> PCollectionView<ViewT> testingView(
    * Places the given {@code value} in the {@link #DEFAULT_NONEMPTY_WINDOW}.
    */
   public static <T> WindowedValue<T> valueInDefaultWindow(T value) {
-    return WindowedValue.of(value, DEFAULT_TIMESTAMP, DEFAULT_NONEMPTY_WINDOW, PaneInfo.DEFAULT);
+    return WindowedValue.of(value, DEFAULT_TIMESTAMP, DEFAULT_NONEMPTY_WINDOW, PaneInfo.NO_FIRING);
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfoTest.java
index abaca2d853a15..62ac2a1fad811 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfoTest.java
@@ -43,10 +43,15 @@ public void testInterned() throws Exception {
   public void testEncodingRoundTrip() throws Exception {
     Coder<PaneInfo> coder = PaneInfo.PaneInfoCoder.INSTANCE;
     for (Timing timing : Timing.values()) {
-      CoderProperties.coderDecodeEncodeEqual(coder, PaneInfo.createPane(false, false, timing));
-      CoderProperties.coderDecodeEncodeEqual(coder, PaneInfo.createPane(false, true, timing));
-      CoderProperties.coderDecodeEncodeEqual(coder, PaneInfo.createPane(true, false, timing));
-      CoderProperties.coderDecodeEncodeEqual(coder, PaneInfo.createPane(true, true, timing));
+      long onTimeIndex = timing == Timing.EARLY ? -1 : 37;
+      CoderProperties.coderDecodeEncodeEqual(
+          coder, PaneInfo.createPane(false, false, timing, 389, onTimeIndex));
+      CoderProperties.coderDecodeEncodeEqual(
+          coder, PaneInfo.createPane(false, true, timing, 5077, onTimeIndex));
+      CoderProperties.coderDecodeEncodeEqual(
+          coder, PaneInfo.createPane(true, false, timing, 0, 0));
+      CoderProperties.coderDecodeEncodeEqual(
+          coder, PaneInfo.createPane(true, true, timing, 0, 0));
     }
   }
 
@@ -55,7 +60,7 @@ public void testEncodings() {
     assertEquals("PaneInfo encoding assumes that there are only 4 Timing values.",
         4, Timing.values().length);
     assertEquals("PaneInfo encoding should remain the same.",
-        0x0, PaneInfo.createPane(false, false, Timing.EARLY).getEncodedByte());
+        0x0, PaneInfo.createPane(false, false, Timing.EARLY, 1, -1).getEncodedByte());
     assertEquals("PaneInfo encoding should remain the same.",
         0x1, PaneInfo.createPane(true, false, Timing.EARLY).getEncodedByte());
     assertEquals("PaneInfo encoding should remain the same.",
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index 39ce09b905dc7..b319310ce1799 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -91,17 +91,17 @@ public class GroupAlsoByWindowsDoFnTest {
                 "v1",
                 new Instant(1),
                 Arrays.asList(window(0, 10)),
-                PaneInfo.DEFAULT),
+                PaneInfo.NO_FIRING),
             WindowedValue.of(
                 "v2",
                 new Instant(2),
                 Arrays.asList(window(0, 10)),
-                PaneInfo.DEFAULT),
+                PaneInfo.NO_FIRING),
             WindowedValue.of(
                 "v3",
                 new Instant(13),
                 Arrays.asList(window(10, 20)),
-                PaneInfo.DEFAULT)))));
+                PaneInfo.NO_FIRING)))));
 
     runner.finishBundle();
 
@@ -142,12 +142,12 @@ public class GroupAlsoByWindowsDoFnTest {
                 "v1",
                 new Instant(5),
                 Arrays.asList(window(-10, 10), window(0, 20)),
-                PaneInfo.DEFAULT),
+                PaneInfo.NO_FIRING),
             WindowedValue.of(
                 "v2",
                 new Instant(15),
                 Arrays.asList(window(0, 20), window(10, 30)),
-                PaneInfo.DEFAULT)))));
+                PaneInfo.NO_FIRING)))));
 
     runner.finishBundle();
 
@@ -196,17 +196,17 @@ public class GroupAlsoByWindowsDoFnTest {
                 1L,
                 new Instant(5),
                 Arrays.asList(window(-10, 10), window(0, 20)),
-                PaneInfo.DEFAULT),
+                PaneInfo.NO_FIRING),
             WindowedValue.of(
                 2L,
                 new Instant(15),
                 Arrays.asList(window(0, 20), window(10, 30)),
-                PaneInfo.DEFAULT),
+                PaneInfo.NO_FIRING),
             WindowedValue.of(
                 4L,
                 new Instant(18),
                 Arrays.asList(window(0, 20), window(10, 30)),
-                PaneInfo.DEFAULT)))));
+                PaneInfo.NO_FIRING)))));
 
     runner.finishBundle();
 
@@ -241,17 +241,17 @@ public class GroupAlsoByWindowsDoFnTest {
                 "v1",
                 new Instant(1),
                 Arrays.asList(window(0, 5)),
-                PaneInfo.DEFAULT),
+                PaneInfo.NO_FIRING),
             WindowedValue.of(
                 "v2",
                 new Instant(4),
                 Arrays.asList(window(1, 5)),
-                PaneInfo.DEFAULT),
+                PaneInfo.NO_FIRING),
             WindowedValue.of(
                 "v3",
                 new Instant(4),
                 Arrays.asList(window(0, 5)),
-                PaneInfo.DEFAULT)))));
+                PaneInfo.NO_FIRING)))));
 
     runner.finishBundle();
 
@@ -291,17 +291,17 @@ public class GroupAlsoByWindowsDoFnTest {
                 "v1",
                 new Instant(0),
                 Arrays.asList(window(0, 10)),
-                PaneInfo.DEFAULT),
+                PaneInfo.NO_FIRING),
             WindowedValue.of(
                 "v2",
                 new Instant(5),
                 Arrays.asList(window(5, 15)),
-                PaneInfo.DEFAULT),
+                PaneInfo.NO_FIRING),
             WindowedValue.of(
                 "v3",
                 new Instant(15),
                 Arrays.asList(window(15, 25)),
-                PaneInfo.DEFAULT)))));
+                PaneInfo.NO_FIRING)))));
 
     runner.finishBundle();
 
@@ -342,17 +342,17 @@ public class GroupAlsoByWindowsDoFnTest {
                 1L,
                 new Instant(0),
                 Arrays.asList(window(0, 10)),
-                PaneInfo.DEFAULT),
+                PaneInfo.NO_FIRING),
             WindowedValue.of(
                 2L,
                 new Instant(5),
                 Arrays.asList(window(5, 15)),
-                PaneInfo.DEFAULT),
+                PaneInfo.NO_FIRING),
             WindowedValue.of(
                 4L,
                 new Instant(15),
                 Arrays.asList(window(15, 25)),
-                PaneInfo.DEFAULT)))));
+                PaneInfo.NO_FIRING)))));
 
     runner.finishBundle();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 3bef794030992..947b614eef5a7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -118,25 +118,25 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
         TimerOrElement.element(KV.of("k", "v1")),
         new Instant(1),
         Arrays.asList(window(0, 10)),
-        PaneInfo.DEFAULT));
+        PaneInfo.NO_FIRING));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v2")),
         new Instant(2),
         Arrays.asList(window(0, 10)),
-        PaneInfo.DEFAULT));
+        PaneInfo.NO_FIRING));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v0")),
         new Instant(0),
         Arrays.asList(window(0, 10)),
-        PaneInfo.DEFAULT));
+        PaneInfo.NO_FIRING));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v3")),
         new Instant(13),
         Arrays.asList(window(10, 20)),
-        PaneInfo.DEFAULT));
+        PaneInfo.NO_FIRING));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, String>timer(
             windowCoder, window(0, 10), new Instant(9), TimeDomain.EVENT_TIME)));
@@ -183,13 +183,13 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
         TimerOrElement.element(KV.of("k", "v1")),
         new Instant(5),
         Arrays.asList(window(-10, 10), window(0, 20)),
-        PaneInfo.DEFAULT));
+        PaneInfo.NO_FIRING));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v0")),
         new Instant(2),
         Arrays.asList(window(-10, 10), window(0, 20)),
-        PaneInfo.DEFAULT));
+        PaneInfo.NO_FIRING));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, String>timer(
         windowCoder, window(-10, 10), new Instant(9), TimeDomain.EVENT_TIME)));
@@ -198,7 +198,7 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
         TimerOrElement.element(KV.of("k", "v2")),
         new Instant(5),
         Arrays.asList(window(0, 20), window(10, 30)),
-        PaneInfo.DEFAULT));
+        PaneInfo.NO_FIRING));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, String>timer(
         windowCoder, window(0, 20), new Instant(19), TimeDomain.EVENT_TIME)));
@@ -250,25 +250,25 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
         TimerOrElement.element(KV.of("k", "v1")),
         new Instant(0),
         Arrays.asList(window(0, 10)),
-        PaneInfo.DEFAULT));
+        PaneInfo.NO_FIRING));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v2")),
         new Instant(5),
         Arrays.asList(window(5, 15)),
-        PaneInfo.DEFAULT));
+        PaneInfo.NO_FIRING));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v3")),
         new Instant(15),
         Arrays.asList(window(15, 25)),
-        PaneInfo.DEFAULT));
+        PaneInfo.NO_FIRING));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", "v0")),
         new Instant(3),
         Arrays.asList(window(3, 13)),
-        PaneInfo.DEFAULT));
+        PaneInfo.NO_FIRING));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, String>timer(
         windowCoder, window(0, 10), new Instant(9), TimeDomain.EVENT_TIME)));
@@ -349,25 +349,25 @@ public Long extractOutput(Long accumulator) {
         TimerOrElement.element(KV.of("k", 1L)),
         new Instant(0),
         Arrays.asList(window(0, 10)),
-        PaneInfo.DEFAULT));
+        PaneInfo.NO_FIRING));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", 2L)),
         new Instant(5),
         Arrays.asList(window(5, 15)),
-        PaneInfo.DEFAULT));
+        PaneInfo.NO_FIRING));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", 3L)),
         new Instant(15),
         Arrays.asList(window(15, 25)),
-        PaneInfo.DEFAULT));
+        PaneInfo.NO_FIRING));
 
     runner.processElement(WindowedValue.of(
         TimerOrElement.element(KV.of("k", 4L)),
         new Instant(3),
         Arrays.asList(window(3, 13)),
-        PaneInfo.DEFAULT));
+        PaneInfo.NO_FIRING));
 
     runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, Long>timer(
         windowCoder, window(0, 10), new Instant(9), TimeDomain.EVENT_TIME)));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
index 67138764b3a3d..73e2798f90c75 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
@@ -304,7 +304,7 @@ private WindowedValue<String> createDatum(String element, long timestamp) {
         element,
         new Instant(timestamp),
         Arrays.asList(createWindow(timestamp)),
-        PaneInfo.DEFAULT);
+        PaneInfo.NO_FIRING);
   }
 
   private IntervalWindow createWindow(long timestamp) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index 46f915e36cce5..7b19d263352fe 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -17,6 +17,9 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
@@ -242,9 +245,11 @@ public void testWatermarkHoldAndLateData() throws Exception {
             1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 8), 8, 0, 10)));
 
     assertThat(
-        output.get(0).getPane(), Matchers.equalTo(PaneInfo.createPane(true, false, Timing.EARLY)));
+               output.get(0).getPane(),
+        Matchers.equalTo(PaneInfo.createPane(true, false, Timing.EARLY)));
     assertThat(
-        output.get(3).getPane(), Matchers.equalTo(PaneInfo.createPane(false, true, Timing.EARLY)));
+        output.get(3).getPane(),
+        Matchers.equalTo(PaneInfo.createPane(false, true, Timing.EARLY, 3, -1)));
 
     // And because we're past the end of window + allowed lateness, everything should be cleaned up.
     assertFalse(tester.isMarkedFinished(firstWindow));
@@ -266,20 +271,20 @@ public void testPaneInfoAllStates() throws Exception {
 
     injectElement(tester, 2, TriggerResult.FIRE);
     assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.EARLY))));
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.EARLY, 1, -1))));
 
     tester.advanceWatermark(new Instant(15));
     injectElement(tester, 3, TriggerResult.FIRE);
     assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.ON_TIME))));
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.ON_TIME, 2, 0))));
 
     injectElement(tester, 4, TriggerResult.FIRE);
     assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.LATE))));
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.LATE, 3, 1))));
 
     injectElement(tester, 5, TriggerResult.FIRE_AND_FINISH);
     assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.LATE))));
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.LATE, 4, 2))));
   }
 
   @Test
@@ -364,4 +369,49 @@ public void testDropDataMultipleWindows() throws Exception {
     assertEquals(3, tester.getElementsDroppedDueToLateness());
     assertEquals(1, tester.getElementsDroppedDueToClosedWindow());
   }
+
+  @Test
+  public void testIdempotentUninterestingPanes() throws Exception {
+    // Test uninteresting (empty) panes don't increment the index or otherwise
+    // modify PaneInfo.
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        FixedWindows.of(Duration.millis(10)),
+        mockTrigger,
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
+
+    // Inject a couple of on-time elements and fire at the window end.
+    injectElement(tester, 1, TriggerResult.CONTINUE);
+    injectElement(tester, 2, TriggerResult.CONTINUE);
+    tester.advanceWatermark(new Instant(12));
+    when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
+                    .thenReturn(TriggerResult.FIRE);
+    tester.fireTimer(firstWindow, new Instant(10), TimeDomain.EVENT_TIME);
+
+    // Fire another timer (with no data, so it's an uninteresting pane).
+    when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
+                    .thenReturn(TriggerResult.FIRE);
+    tester.fireTimer(firstWindow, new Instant(10), TimeDomain.EVENT_TIME);
+
+    // Finish it off with another datum.
+    injectElement(tester, 3, TriggerResult.FIRE_AND_FINISH);
+
+    // The intermediate trigger firing shouldn't result in any output.
+    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
+    assertThat(output.size(), equalTo(2));
+
+    // The on-time pane is as expected.
+    assertThat(output.get(0), isSingleWindowedValue(containsInAnyOrder(1, 2), 1, 0, 10));
+
+    // The late pane has the correct indices.
+    assertThat(output.get(1).getValue(), contains(3));
+    assertThat(output.get(1).getPane(),
+        equalTo(PaneInfo.createPane(false, true, Timing.LATE, 1, 1)));
+
+    assertTrue(tester.isMarkedFinished(firstWindow));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
+
+    assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
+    assertEquals(0, tester.getElementsDroppedDueToLateness());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index ea3ffe6a7afec..717541fa758c1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -299,7 +299,7 @@ public void injectElement(InputT value, Instant timestamp) throws Exception {
         windowFn, value, timestamp, Arrays.asList(GlobalWindow.INSTANCE)));
     logInteraction("Element %s at time %d put in windows %s",
         value, timestamp.getMillis(), windows);
-    runner.processElement(WindowedValue.of(value, timestamp, windows, PaneInfo.DEFAULT));
+    runner.processElement(WindowedValue.of(value, timestamp, windows, PaneInfo.NO_FIRING));
   }
 
   public void doMerge() throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/WindowedValueTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/WindowedValueTest.java
index 3d856105474b5..01b31ad6e6ba4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/WindowedValueTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/WindowedValueTest.java
@@ -41,7 +41,7 @@ public void testWindowedValueCoder() throws CoderException {
         new Instant(1234),
         Arrays.asList(new IntervalWindow(timestamp, timestamp.plus(1000)),
                       new IntervalWindow(timestamp.plus(1000), timestamp.plus(2000))),
-        PaneInfo.DEFAULT);
+        PaneInfo.NO_FIRING);
 
     Coder<WindowedValue<String>> windowedValueCoder =
         WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder());

From 65251077cc698850c2832cc7d2327c98b669fb22 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Tue, 28 Jul 2015 12:16:31 -0700
Subject: [PATCH 0829/1541] Update SDK to correctly set replaceJobId on the
 Job.

----Release Notes----
Update SDK to correctly set replaceJobId on the Job instead of the Create request.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99302893
---
 .../dataflow/sdk/runners/DataflowPipelineRunner.java  | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index f5d3f38206466..0660a879f6f1c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -330,18 +330,15 @@ public DataflowPipelineJob run(Pipeline pipeline) {
     if (options.getUpdate()) {
       jobIdToUpdate = getJobIdFromName(options.getJobName());
       newJob.setTransformNameMapping(options.getTransformNameMapping());
+      newJob.setReplaceJobId(jobIdToUpdate);
     }
     Job jobResult;
     try {
-      Dataflow.Projects.Jobs.Create createRequest =
-          dataflowClient
+      jobResult = dataflowClient
               .projects()
               .jobs()
-              .create(options.getProject(), newJob);
-      if (jobIdToUpdate != null) {
-        createRequest.setReplaceJobId(jobIdToUpdate);
-      }
-      jobResult = createRequest.execute();
+              .create(options.getProject(), newJob)
+              .execute();
     } catch (GoogleJsonResponseException e) {
         throw new RuntimeException("Failed to create a workflow job: "
             + (e.getDetails() != null ? e.getDetails().getMessage() : e), e);

From ef6e98d17421a4c6c5ff3fa08be4fe21ef94611e Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Tue, 28 Jul 2015 14:18:47 -0700
Subject: [PATCH 0830/1541] Replace "reload" with "update" in SDK.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99314775
---
 sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java | 4 ++--
 .../google/cloud/dataflow/sdk/options/PipelineOptions.java    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index 7b644c0f51633..fce5bebf2a989 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -324,12 +324,12 @@ OutputT applyInternal(String name, InputT input,
           break;
         case WARNING:
           LOG.warn("Transform {} does not have a stable unique name. "
-              + "This will prevent reloading of pipelines.", fullName);
+              + "This will prevent updating of pipelines.", fullName);
           break;
         case ERROR:
           throw new IllegalStateException(
               "Transform " + fullName + " does not have a stable unique name. "
-              + "This will prevent reloading of pipelines.");
+              + "This will prevent updating of pipelines.");
         default:
           throw new IllegalArgumentException(
               "Unrecognized value for stable unique names: " + getOptions().getStableUniqueNames());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
index 363ce35ff0f48..6a81662adc810 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
@@ -232,7 +232,7 @@ public static enum CheckEnabled {
 
   @Validation.Required
   @Description("Whether to check for stable unique names on each stage. This is necessary to "
-      + "support reloading streaming pipelines.")
+      + "support updating streaming pipelines.")
   @Default.Enum("WARNING")
   CheckEnabled getStableUniqueNames();
   void setStableUniqueNames(CheckEnabled enabled);

From 728d51a380df5fffcf646014840bbf8743f13ddc Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 28 Jul 2015 16:30:20 -0700
Subject: [PATCH 0831/1541] Small bug-fix: Reread after writing the merged
 accumulator

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99327693
---
 .../cloud/dataflow/sdk/util/state/MergedCombiningValue.java  | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedCombiningValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedCombiningValue.java
index 4632223ef5565..56451a771e721 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedCombiningValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedCombiningValue.java
@@ -93,7 +93,10 @@ public AccumT read() {
         AccumT combined = combineFn.mergeAccumulators(accumulators);
         clear();
         addAccum(combined);
-        return combined;
+
+        // This should be in memory. Since we may have mutated combined by
+        // incorporating it into the result, we re-read it from memory.
+        return result.getAccum().read();
       }
     };
   }

From d9fb9a200ea6bcd67fa64e04ae57fd4b31e7f6fa Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 29 Jul 2015 17:54:51 -0700
Subject: [PATCH 0832/1541] Add support for JSON -> object/map parsing in
 PipelineOptions

----Release Notes----
Add support for JSON -> object/map parsing in PipelineOptions.
Remove restriction on ';' and '=' being in transform names.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99433481
---
 .../dataflow/examples/DebuggingWordCount.java |  17 ++-
 .../google/cloud/dataflow/sdk/Pipeline.java   |   5 -
 .../options/DataflowPipelineDebugOptions.java |  37 +-----
 .../options/DataflowWorkerLoggingOptions.java | 112 ++++++++----------
 .../sdk/options/GoogleApiDebugOptions.java    |  84 ++++---------
 .../sdk/options/PipelineOptionsFactory.java   |  79 ++++++++----
 .../DataflowWorkerLoggingInitializer.java     |   9 +-
 .../cloud/dataflow/sdk/util/Transport.java    |  41 +------
 .../DataflowPipelineDebugOptionsTest.java     |  17 ++-
 .../DataflowWorkerLoggingOptionsTest.java     |  35 +++---
 .../options/GoogleApiDebugOptionsTest.java    |  27 +++--
 .../options/PipelineOptionsFactoryTest.java   |  50 +++++++-
 .../DataflowWorkerLoggingInitializerTest.java |  10 +-
 13 files changed, 248 insertions(+), 275 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
index 514e433277ecd..6bc7185a5a7a5 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
@@ -72,13 +72,18 @@
  *
  * <p> Concept #2: Dataflow workers which execute user code are configured to log to Cloud
  * Logging by default at "INFO" log level and higher. One may override log levels for specific
- * logging namespaces by specifying
- * {@code --workerLogLevelOverrides=Name1#Level1,Name2#Level2,...}. For example, by specifying
- * {@code --workerLogLevelOverrides=com.google.cloud.dataflow.examples.DebuggingWordCount#DEBUG}
+ * logging namespaces by specifying:
+ * <pre><code>
+ *   --workerLogLevelOverrides={"Name1":"Level1","Name2":"Level2",...}
+ * </code></pre>
+ * For example, by specifying:
+ * <pre><code>
+ *   --workerLogLevelOverrides={"com.google.cloud.dataflow.examples":"DEBUG"}
+ * </code></pre>
  * when executing this pipeline using the Dataflow service, Cloud Logging would contain only
- * "DEBUG" or higher level logs for the DebuggingWordCount class in addition to the default
- * "INFO" or higher level logs. In addition, the default Dataflow worker logging
- * configuration can be overridden by specifying
+ * "DEBUG" or higher level logs for the {@code com.google.cloud.dataflow.examples} package in
+ * addition to the default "INFO" or higher level logs. In addition, the default Dataflow worker
+ * logging configuration can be overridden by specifying
  * {@code --defaultWorkerLogLevel=<one of TRACE, DEBUG, INFO, WARN, ERROR>}. For example,
  * by specifying {@code --defaultWorkerLogLevel=DEBUG} when executing this pipeline with
  * the Dataflow service, Cloud Logging would contain all "DEBUG" or higher level logs. Note
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index fce5bebf2a989..d4ee15746b15a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -309,11 +309,6 @@ OutputT applyInternal(String name, InputT input,
 
     TransformTreeNode parent = transforms.getCurrent();
     String namePrefix = parent.getFullName();
-
-    if (name.contains(";") || name.contains("=")) {
-      throw new IllegalArgumentException(
-          "PTransform names may not contain ';' or '='. Saw " + name);
-    }
     String fullName = uniquifyInternal(namePrefix, name);
 
     boolean nameIsUnique = fullName.equals(buildName(namePrefix, name));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
index 7e987d3c78cf8..61bb7064ee663 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
@@ -24,13 +24,11 @@
 import com.google.cloud.dataflow.sdk.util.PathValidator;
 import com.google.cloud.dataflow.sdk.util.Stager;
 import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.common.base.Preconditions;
 
-import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonIgnore;
 
-import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 /**
  * Internal. Options used to control execution of the Dataflow SDK for
@@ -186,11 +184,12 @@ public Dataflow create(PipelineOptions options) {
    */
   @JsonIgnore
   @Description(
-      "Mapping of old PTranform names to new ones, specified as a semicolon-separated "
-      + "list of oldName=newName pairs. To mark a transform as deleted, omit newName.")
+      "Mapping of old PTranform names to new ones, specified as JSON "
+      + "{\"oldName\":\"newName\",...}. To mark a transform as deleted, make newName the empty "
+      + "string.")
   @Experimental
-  NameMap getTransformNameMapping();
-  void setTransformNameMapping(NameMap value);
+  Map<String, String> getTransformNameMapping();
+  void setTransformNameMapping(Map<String, String> value);
 
   /**
    * Custom windmill_main binary to use with the streaming runner.
@@ -230,28 +229,4 @@ public Stager create(PipelineOptions options) {
           .build();
     }
   }
-
-  /**
-   * Map of old names to new names.
-   */
-  public static class NameMap extends HashMap<String, String> {
-    private static final long serialVersionUID = 0L;
-    private static final String ENTRY_SEPARATOR = ";";
-    private static final String VALUE_SEPARATOR = "=";
-
-    /**
-     * Parses a NameMap from a String.
-     */
-    @JsonCreator
-    public static NameMap create(String value) {
-      NameMap result = new NameMap();
-      for (String entry : value.split(ENTRY_SEPARATOR)) {
-        String[] splitEntry = entry.split(VALUE_SEPARATOR, -1);
-        Preconditions.checkArgument(splitEntry.length == 2,
-            "Invalid value for --nameMapping.  Should be in old1=new1;old2=new2 format");
-        result.put(splitEntry[0], splitEntry[1]);
-      }
-      return result;
-    }
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
index 5e47db999c247..cbac6e3dd1fdc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
@@ -18,9 +18,10 @@
 import com.google.common.base.Preconditions;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonValue;
 
 import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
 
 /**
  * Options that are used to control logging configuration on the Dataflow worker.
@@ -47,23 +48,22 @@ public enum Level {
    * <p>
    * Later options with equivalent names override earlier options.
    * <p>
-   * See {@link WorkerLogLevelOverride} for more information on how to configure logging
+   * See {@link WorkerLogLevelOverrides} for more information on how to configure logging
    * on a per {@link Class}, {@link Package}, or name basis. If used from the command line,
-   * the expected format is {@code Name#Level}, further details on
-   * {@link WorkerLogLevelOverride#create(String)}.
+   * the expected format is {"Name":"Level",...}, further details on
+   * {@link WorkerLogLevelOverrides#from}.
    */
   @Description("This option controls the log levels for specifically named loggers. "
-      + "The expected format is Name#Level. The Dataflow worker uses java.util.logging, which "
-      + "supports a logging hierarchy based off of names that are \".\" separated. "
-      + "For example, by specifying the value \"a.b.c.Foo#DEBUG\", the logger for the class "
-      + "\"a.b.c.Foo\" will be configured to output logs at the DEBUG level. Similarly, "
-      + "by specifying the value \"a.b.c#WARN\", all loggers underneath the \"a.b.c\" package "
-      + "will be configured to output logs at the WARN level. Note that multiple overrides can "
-      + "be specified and that later values with equivalent names override earlier values. Also, "
-      + "note that when multiple overrides are specified, the exact name followed by the closest "
-      + "parent takes precedence.")
-  WorkerLogLevelOverride[] getWorkerLogLevelOverrides();
-  void setWorkerLogLevelOverrides(WorkerLogLevelOverride... workerLogLevelOverrides);
+      + "The expected format is {\"Name\":\"Level\",...}. The Dataflow worker uses "
+      + "java.util.logging, which supports a logging hierarchy based off of names that are '.' "
+      + "separated. For example, by specifying the value {\"a.b.c.Foo\":\"DEBUG\"}, the logger "
+      + "for the class 'a.b.c.Foo' will be configured to output logs at the DEBUG level. "
+      + "Similarly, by specifying the value {\"a.b.c\":\"WARN\"}, all loggers underneath the "
+      + "'a.b.c' package will be configured to output logs at the WARN level. Also, note that "
+      + "when multiple overrides are specified, the exact name followed by the closest parent "
+      + "takes precedence.")
+  WorkerLogLevelOverrides getWorkerLogLevelOverrides();
+  void setWorkerLogLevelOverrides(WorkerLogLevelOverrides value);
 
   /**
    * Defines a log level override for a specific class, package, or name.
@@ -84,29 +84,33 @@ public enum Level {
    * Note that by specifying multiple overrides, the exact name followed by the closest parent
    * takes precedence.
    */
-  public static class WorkerLogLevelOverride {
-    private static final String SEPARATOR = "#";
+  public static class WorkerLogLevelOverrides extends HashMap<String, Level> {
+    private static final long serialVersionUID = 0;
 
     /**
      * Overrides the default log level for the passed in class.
      * <p>
-     * This is equivalent to calling {@link #forName(String, DataflowWorkerLoggingOptions.Level)}
+     * This is equivalent to calling
+     * {@link #addOverrideForName(String, DataflowWorkerLoggingOptions.Level)}
      * and passing in the {@link Class#getName() class name}.
      */
-    public static WorkerLogLevelOverride forClass(Class<?> klass, Level level) {
+    public WorkerLogLevelOverrides addOverrideForClass(Class<?> klass, Level level) {
       Preconditions.checkNotNull(klass, "Expected class to be not null.");
-      return forName(klass.getName(), level);
+      addOverrideForName(klass.getName(), level);
+      return this;
     }
 
     /**
      * Overrides the default log level for the passed in package.
      * <p>
-     * This is equivalent to calling {@link #forName(String, DataflowWorkerLoggingOptions.Level)}
+     * This is equivalent to calling
+     * {@link #addOverrideForName(String, DataflowWorkerLoggingOptions.Level)}
      * and passing in the {@link Package#getName() package name}.
      */
-    public static WorkerLogLevelOverride forPackage(Package pkg, Level level) {
+    public WorkerLogLevelOverrides addOverrideForPackage(Package pkg, Level level) {
       Preconditions.checkNotNull(pkg, "Expected package to be not null.");
-      return forName(pkg.getName(), level);
+      addOverrideForName(pkg.getName(), level);
+      return this;
     }
 
     /**
@@ -116,56 +120,36 @@ public static WorkerLogLevelOverride forPackage(Package pkg, Level level) {
      * override the log level of all loggers that have the passed in name or
      * a parent logger that has the passed in name.
      */
-    public static WorkerLogLevelOverride forName(String name, Level level) {
+    public WorkerLogLevelOverrides addOverrideForName(String name, Level level) {
       Preconditions.checkNotNull(name, "Expected name to be not null.");
       Preconditions.checkNotNull(level,
           "Expected level to be one of %s.", Arrays.toString(Level.values()));
-      return new WorkerLogLevelOverride(name, level);
+      put(name, level);
+      return this;
     }
 
     /**
-     * Expects a value of the form {@code Name#Level}. The {@code Name} generally
-     * represents the fully qualified Java {@link Class#getName() class name},
-     * or fully qualified Java {@link Package#getName() package name}, or custom
-     * logger name. The {@code Level} represents the log level and must be one
-     * of {@link Level}.
+     * Expects a map keyed by logger {@code Name}s with values representing {@code Level}s.
+     * The {@code Name} generally represents the fully qualified Java
+     * {@link Class#getName() class name}, or fully qualified Java
+     * {@link Package#getName() package name}, or custom logger name. The {@code Level}
+     * represents the log level and must be one of {@link Level}.
      */
     @JsonCreator
-    public static WorkerLogLevelOverride create(String value) {
-      Preconditions.checkNotNull(value, "Expected value to be not null.");
-      Preconditions.checkArgument(value.contains(SEPARATOR),
-          "Expected '#' separator but none found within '%s'.", value);
-      String[] parts = value.split(SEPARATOR, 2);
-      Level level;
-      try {
-        level = Level.valueOf(parts[1]);
-      } catch (IllegalArgumentException e) {
-        throw new IllegalArgumentException(String.format(
-            "Unsupported log level '%s' requested. Must be one of %s.",
-            parts[1], Arrays.toString(Level.values())));
-      }
-      return forName(parts[0], level);
-    }
+    public static WorkerLogLevelOverrides from(Map<String, String> values) {
+      Preconditions.checkNotNull(values, "Expected values to be not null.");
+      WorkerLogLevelOverrides overrides = new WorkerLogLevelOverrides();
+      for (Map.Entry<String, String> entry : values.entrySet()) {
+        try {
+          overrides.addOverrideForName(entry.getKey(), Level.valueOf(entry.getValue()));
+        } catch (IllegalArgumentException e) {
+          throw new IllegalArgumentException(String.format(
+              "Unsupported log level '%s' requested for %s. Must be one of %s.",
+              entry.getValue(), entry.getKey(), Arrays.toString(Level.values())));
+        }
 
-    private final String name;
-    private final Level level;
-    private WorkerLogLevelOverride(String name, Level level) {
-      this.name = name;
-      this.level = level;
-    }
-
-    public String getName() {
-      return name;
-    }
-
-    public Level getLevel() {
-      return level;
-    }
-
-    @JsonValue
-    @Override
-    public String toString() {
-      return name + SEPARATOR + level;
+      }
+      return overrides;
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
index a0eaf586454f4..7324f19d8ddb4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
@@ -19,14 +19,10 @@
 import com.google.api.client.googleapis.services.AbstractGoogleClient;
 import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
 import com.google.api.client.googleapis.services.GoogleClientRequestInitializer;
-import com.google.common.base.Preconditions;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonValue;
 
 import java.io.IOException;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
+import java.util.HashMap;
+import java.util.Map;
 
 /**
  * These options configure debug settings for Google API clients created within the Dataflow SDK.
@@ -36,86 +32,50 @@ public interface GoogleApiDebugOptions extends PipelineOptions {
    * This option enables tracing of API calls to Google services used within the Dataflow SDK.
    */
   @Description("This option enables tracing of API calls to Google services used within the "
-      + "Dataflow SDK. Values are expected in the format \"ApiName#TraceDestination\" where the "
-      + "ApiName represents the request classes canonical name. The TraceDestination is a "
-      + "logical trace consumer to whom the trace will be reported. Typically, \"producer\" is "
+      + "Dataflow SDK. Values are expected in JSON format {\"ApiName\":\"TraceDestination\",...} "
+      + "where the ApiName represents the request classes canonical name. The TraceDestination is "
+      + "a logical trace consumer to whom the trace will be reported. Typically, \"producer\" is "
       + "the right destination to use: this makes API traces available to the team offering the "
       + "API. Note that by enabling this option, the contents of the requests to and from "
       + "Google Cloud services will be made available to Google. For example, by specifying "
-      + "\"Dataflow#producer\", all calls to the Dataflow service will be made available to "
+      + "{\"Dataflow\":\"producer\"}, all calls to the Dataflow service will be made available to "
       + "Google, specifically to the Google Cloud Dataflow team.")
-  GoogleApiTracer[] getGoogleApiTrace();
-  void setGoogleApiTrace(GoogleApiTracer... commands);
+  GoogleApiTracer getGoogleApiTrace();
+  void setGoogleApiTrace(GoogleApiTracer commands);
 
   /**
    * A {@link GoogleClientRequestInitializer} that adds the trace destination to Google API calls.
    */
-  public static class GoogleApiTracer implements GoogleClientRequestInitializer {
-    private static final Pattern COMMAND_LINE_PATTERN = Pattern.compile("([^#]*)#(.*)");
+  public static class GoogleApiTracer extends HashMap<String, String>
+      implements GoogleClientRequestInitializer {
+    private static final long serialVersionUID = 0;
+
     /**
      * Creates a {@link GoogleApiTracer} that sets the trace destination on all
      * calls that match the given client type.
      */
-    public static GoogleApiTracer create(AbstractGoogleClient client, String traceDestination) {
-      return new GoogleApiTracer(client.getClass().getCanonicalName(), traceDestination);
+    public GoogleApiTracer addTraceFor(AbstractGoogleClient client, String traceDestination) {
+      put(client.getClass().getCanonicalName(), traceDestination);
+      return this;
     }
 
     /**
      * Creates a {@link GoogleApiTracer} that sets the trace {@code traceDestination} on all
      * calls that match for the given request type.
      */
-    public static GoogleApiTracer create(
+    public GoogleApiTracer addTraceFor(
         AbstractGoogleClientRequest<?> request, String traceDestination) {
-      return new GoogleApiTracer(request.getClass().getCanonicalName(), traceDestination);
-    }
-
-    /**
-     * Creates a {@link GoogleClientRequestInitializer} that adds the trace destination
-     * based upon the passed in value.
-     * <p>
-     * The {@code value} represents a string containing {@code ApiName#TraceDestination}.
-     * The {@code ApiName} is used to match against the request class
-     * {@link Class#getCanonicalName() canonical name} to determine the requests to which the
-     * {@code TraceDestination} should be added.
-     * <p>
-     * For example, to match:
-     * <ul>
-     *   <li>all Google API calls: {@code #TraceDestination}
-     *   <li>all Dataflow API calls: {@code Dataflow#TraceDestination}
-     *   <li>all Dataflow V1B3 API calls: {@code Dataflow.V1b3#TraceDestination}
-     *   <li>all Dataflow V1B3 Jobs API calls: {@code Dataflow.V1b3.Projects.Jobs#TraceDestination}
-     *   <li>all Dataflow V1B3 Jobs Get calls:
-     *       {@code Dataflow.V1b3.Projects.Jobs.Get#TraceDestination}
-     *   <li>all Job creation calls in any version: {@code Jobs.Create#TraceDestination}
-     * </ul>
-     */
-    @JsonCreator
-    public static GoogleApiTracer create(String value) {
-      Matcher matcher = COMMAND_LINE_PATTERN.matcher(value);
-      Preconditions.checkArgument(matcher.find() && matcher.groupCount() == 2,
-          "Unable to parse '%s', expected format 'ClientRequestName#TraceDestination'", value);
-      return new GoogleApiTracer(matcher.group(1), matcher.group(2));
-    }
-
-    private final String clientRequestName;
-    private final String traceDestination;
-
-    private GoogleApiTracer(String clientRequestName, String traceDestination) {
-      this.clientRequestName = clientRequestName;
-      this.traceDestination = traceDestination;
+      put(request.getClass().getCanonicalName(), traceDestination);
+      return this;
     }
 
     @Override
     public void initialize(AbstractGoogleClientRequest<?> request) throws IOException {
-      if (request.getClass().getCanonicalName().contains(clientRequestName)) {
-        request.set("$trace", traceDestination);
+      for (Map.Entry<String, String> entry : this.entrySet()) {
+        if (request.getClass().getCanonicalName().contains(entry.getKey())) {
+          request.set("$trace", entry.getValue());
+        }
       }
     }
-
-    @JsonValue
-    @Override
-    public String toString() {
-      return clientRequestName + "#" + traceDestination;
-    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 0e3af4d2180fe..aaacc5e27b685 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -132,14 +132,18 @@ public static <T extends PipelineOptions> T as(Class<T> klass) {
    *   --readOnly (shorthand for boolean properties, will set the "readOnly" property to "true")
    *   --x=1 --x=2 --x=3 (list style property, will set the "x" property to [1, 2, 3])
    *   --x=1,2,3 (shorthand list style property, will set the "x" property to [1, 2, 3])
+   *   --complexObject='{"key1":"value1",...} (JSON format for all other complex types)
    * </pre>
-   * Properties are able to bound to {@link String} and Java primitives {@code boolean},
-   * {@code byte}, {@code short}, {@code int}, {@code long}, {@code float}, {@code double} and
-   * their primitive wrapper classes.
    * <p>
-   * List style properties are able to be bound to {@code boolean[]}, {@code char[]},
+   * Simple properties are able to bound to {@link String}, {@link Class}, enums and Java
+   * primitives {@code boolean}, {@code byte}, {@code short}, {@code int}, {@code long},
+   * {@code float}, {@code double} and their primitive wrapper classes.
+   * <p>
+   * Simple list style properties are able to be bound to {@code boolean[]}, {@code char[]},
    * {@code short[]}, {@code int[]}, {@code long[]}, {@code float[]}, {@code double[]},
-   * {@code String[]} and {@code List<String>}.
+   * {@code Class[]}, enum arrays, {@code String[]}, and {@code List<String>}.
+   * <p>
+   * JSON format is required for all other types.
    * <p>
    * By default, strict parsing is enabled and arguments must conform to be either
    * {@code --booleanArgName} or {@code --argName=argValue}. Strict parsing can be disabled with
@@ -199,14 +203,18 @@ private Builder(String[] args, boolean validation,
      *   --readOnly (shorthand for boolean properties, will set the "readOnly" property to "true")
      *   --x=1 --x=2 --x=3 (list style property, will set the "x" property to [1, 2, 3])
      *   --x=1,2,3 (shorthand list style property, will set the "x" property to [1, 2, 3])
+     *   --complexObject='{"key1":"value1",...} (JSON format for all other complex types)
      * </pre>
-     * Properties are able to bound to {@link String} and Java primitives {@code boolean},
-     * {@code byte}, {@code short}, {@code int}, {@code long}, {@code float}, {@code double} and
-     * their primitive wrapper classes.
      * <p>
-     * List style properties are able to be bound to {@code boolean[]}, {@code char[]},
+     * Simple properties are able to bound to {@link String}, {@link Class}, enums and Java
+     * primitives {@code boolean}, {@code byte}, {@code short}, {@code int}, {@code long},
+     * {@code float}, {@code double} and their primitive wrapper classes.
+     * <p>
+     * Simple list style properties are able to be bound to {@code boolean[]}, {@code char[]},
      * {@code short[]}, {@code int[]}, {@code long[]}, {@code float[]}, {@code double[]},
-     * {@code String[]} and {@code List<String>}.
+     * {@code Class[]}, enum arrays, {@code String[]}, and {@code List<String>}.
+     * <p>
+     * JSON format is required for all other types.
      * <p>
      * By default, strict parsing is enabled and arguments must conform to be either
      * {@code --booleanArgName} or {@code --argName=argValue}. Strict parsing can be disabled with
@@ -410,7 +418,23 @@ Class<T> getProxyClass() {
     }
   }
 
-
+  private static final Set<Class<?>> SIMPLE_TYPES = ImmutableSet.<Class<?>>builder()
+      .add(boolean.class)
+      .add(Boolean.class)
+      .add(char.class)
+      .add(Character.class)
+      .add(short.class)
+      .add(Short.class)
+      .add(int.class)
+      .add(Integer.class)
+      .add(long.class)
+      .add(Long.class)
+      .add(float.class)
+      .add(Float.class)
+      .add(double.class)
+      .add(Double.class)
+      .add(String.class)
+      .add(Class.class).build();
   private static final Logger LOG = LoggerFactory.getLogger(PipelineOptionsFactory.class);
   @SuppressWarnings("rawtypes")
   private static final Class<?>[] EMPTY_CLASS_ARRAY = new Class[0];
@@ -1147,17 +1171,20 @@ public boolean apply(Method input) {
    *   --project=MyProject (simple property, will set the "project" property to "MyProject")
    *   --readOnly=true (for boolean properties, will set the "readOnly" property to "true")
    *   --readOnly (shorthand for boolean properties, will set the "readOnly" property to "true")
-   *   --x=1 --x=2 --x=3 (list style property, will set the "x" property to [1, 2, 3])
-   *   --x=1,2,3 (shorthand list style property, will set the "x" property to [1, 2, 3])
+   *   --x=1 --x=2 --x=3 (list style simple property, will set the "x" property to [1, 2, 3])
+   *   --x=1,2,3 (shorthand list style simple property, will set the "x" property to [1, 2, 3])
+   *   --complexObject='{"key1":"value1",...} (JSON format for all other complex types)
    * </pre>
    *
-   * <p> Properties are able to bound to {@link String} and Java primitives {@code boolean},
-   * {@code byte}, {@code short}, {@code int}, {@code long}, {@code float}, {@code double}
-   * and their primitive wrapper classes.
+   * <p> Simple properties are able to bound to {@link String}, {@link Class}, enums and Java
+   * primitives {@code boolean}, {@code byte}, {@code short}, {@code int}, {@code long},
+   * {@code float}, {@code double} and their primitive wrapper classes.
    *
-   * <p> List style properties are able to be bound to {@code boolean[]}, {@code char[]},
+   * <p> Simple list style properties are able to be bound to {@code boolean[]}, {@code char[]},
    * {@code short[]}, {@code int[]}, {@code long[]}, {@code float[]}, {@code double[]},
-   * {@code String[]}, and {@code List<String>}.
+   * {@code Class[]}, enum arrays, {@code String[]}, and {@code List<String>}.
+   *
+   * <p> JSON format is required for all other types.
    *
    * <p> If strict parsing is enabled, options must start with '--', and not have an empty argument
    * name or value based upon the positioning of the '='. Empty or null arguments will be ignored
@@ -1257,7 +1284,9 @@ public boolean apply(@Nullable String input) {
               "Unknown 'runner' specified '%s', supported pipeline runners %s",
               runner, Sets.newTreeSet(SUPPORTED_PIPELINE_RUNNERS.keySet()));
           convertedOptions.put("runner", SUPPORTED_PIPELINE_RUNNERS.get(runner));
-        } else if (returnType.isArray() || Collection.class.isAssignableFrom(returnType)) {
+        } else if ((returnType.isArray() && (SIMPLE_TYPES.contains(returnType.getComponentType())
+                || returnType.getComponentType().isEnum()))
+            || Collection.class.isAssignableFrom(returnType)) {
           // Split any strings with ","
           List<String> values = FluentIterable.from(entry.getValue())
               .transformAndConcat(new Function<String, Iterable<String>>() {
@@ -1275,12 +1304,22 @@ public Iterable<String> apply(String input) {
             }
           }
           convertedOptions.put(entry.getKey(), MAPPER.convertValue(values, type));
-        } else {
+        } else if (SIMPLE_TYPES.contains(returnType) || returnType.isEnum()) {
           String value = Iterables.getOnlyElement(entry.getValue());
           Preconditions.checkArgument(returnType.equals(String.class) || !value.isEmpty(),
               "Empty argument value is only allowed for String, String Array, and Collection,"
                + " but received: " + returnType);
           convertedOptions.put(entry.getKey(), MAPPER.convertValue(value, type));
+        } else {
+          String value = Iterables.getOnlyElement(entry.getValue());
+          Preconditions.checkArgument(returnType.equals(String.class) || !value.isEmpty(),
+              "Empty argument value is only allowed for String, String Array, and Collection,"
+               + " but received: " + returnType);
+          try {
+            convertedOptions.put(entry.getKey(), MAPPER.readValue(value, type));
+          } catch (IOException e) {
+            throw new IllegalArgumentException("Unable to parse JSON value " + value, e);
+          }
         }
       } catch (IllegalArgumentException e) {
         if (strictParsing) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
index 5e4b8353b6a2c..b936017108118 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
@@ -24,12 +24,12 @@
 
 import com.google.api.client.util.Lists;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions;
-import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.WorkerLogLevelOverride;
 import com.google.common.collect.ImmutableBiMap;
 
 import java.io.File;
 import java.io.IOException;
 import java.util.List;
+import java.util.Map;
 import java.util.logging.FileHandler;
 import java.util.logging.Handler;
 import java.util.logging.Level;
@@ -117,9 +117,10 @@ public static synchronized void configure(DataflowWorkerLoggingOptions options)
     }
 
     if (options.getWorkerLogLevelOverrides() != null) {
-      for (WorkerLogLevelOverride loggerOverride : options.getWorkerLogLevelOverrides()) {
-        Logger logger = Logger.getLogger(loggerOverride.getName());
-        logger.setLevel(LEVELS.inverse().get(loggerOverride.getLevel()));
+      for (Map.Entry<String, DataflowWorkerLoggingOptions.Level> loggerOverride :
+          options.getWorkerLogLevelOverrides().entrySet()) {
+        Logger logger = Logger.getLogger(loggerOverride.getKey());
+        logger.setLevel(LEVELS.inverse().get(loggerOverride.getValue()));
         configuredLoggers.add(logger);
       }
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
index 5470ef7ff121b..6dee26e80eaca 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
@@ -17,9 +17,6 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
-import com.google.api.client.googleapis.services.AbstractGoogleClient.Builder;
-import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
-import com.google.api.client.googleapis.services.GoogleClientRequestInitializer;
 import com.google.api.client.http.HttpTransport;
 import com.google.api.client.json.JsonFactory;
 import com.google.api.client.json.jackson2.JacksonFactory;
@@ -31,7 +28,6 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.GcsOptions;
-import com.google.common.base.MoreObjects;
 
 import java.io.IOException;
 import java.net.MalformedURLException;
@@ -101,8 +97,7 @@ private static ApiComponents apiComponentsFromUrl(String urlString) {
     return new Bigquery.Builder(getTransport(), getJsonFactory(),
         new RetryHttpRequestInitializer(options.getGcpCredential()))
         .setApplicationName(options.getAppName())
-        .setGoogleClientRequestInitializer(
-            new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
+        .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
   }
 
   /**
@@ -117,8 +112,7 @@ private static ApiComponents apiComponentsFromUrl(String urlString) {
         new RetryHttpRequestInitializer(options.getGcpCredential()))
         .setRootUrl(options.getPubsubRootUrl())
         .setApplicationName(options.getAppName())
-        .setGoogleClientRequestInitializer(
-            new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
+        .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
   }
 
   /**
@@ -139,8 +133,7 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
         .setApplicationName(options.getAppName())
         .setRootUrl(components.rootUrl)
         .setServicePath(components.servicePath)
-        .setGoogleClientRequestInitializer(
-            new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
+        .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
   }
 
   /**
@@ -151,8 +144,7 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
       newRawDataflowClient(DataflowPipelineOptions options) {
     return newDataflowClient(options)
         .setHttpRequestInitializer(options.getGcpCredential())
-        .setGoogleClientRequestInitializer(
-            new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
+        .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
   }
 
   /**
@@ -170,8 +162,7 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
             // logging it by default clutters the output during file staging.
             options.getGcpCredential(), Arrays.asList(404)))
         .setApplicationName(options.getAppName())
-        .setGoogleClientRequestInitializer(
-            new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
+        .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
     if (servicePath != null) {
       ApiComponents components = apiComponentsFromUrl(servicePath);
       storageBuilder.setRootUrl(components.rootUrl);
@@ -179,26 +170,4 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
     }
     return storageBuilder;
   }
-
-  /**
-   * Allows multiple {@link GoogleClientRequestInitializer}s to be chained together for use with
-   * {@link Builder}.
-   */
-  private static final class ChainedGoogleClientRequestInitializer
-      implements GoogleClientRequestInitializer {
-    private static final GoogleClientRequestInitializer[] EMPTY_ARRAY =
-        new GoogleClientRequestInitializer[]{};
-    private final GoogleClientRequestInitializer[] chain;
-
-    private ChainedGoogleClientRequestInitializer(GoogleClientRequestInitializer... initializer) {
-      this.chain = MoreObjects.firstNonNull(initializer, EMPTY_ARRAY);
-    }
-
-    @Override
-    public void initialize(AbstractGoogleClientRequest<?> request) throws IOException {
-      for (GoogleClientRequestInitializer initializer : chain) {
-        initializer.initialize(request);
-      }
-    }
-  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptionsTest.java
index 050d995df0f2b..9fea097050552 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptionsTest.java
@@ -16,13 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
-import static com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions.NameMap;
 import static org.hamcrest.Matchers.hasEntry;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 
-import com.fasterxml.jackson.databind.ObjectMapper;
-
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -30,14 +27,14 @@
 /** Tests for {@link DataflowPipelineDebugOptions}. */
 @RunWith(JUnit4.class)
 public class DataflowPipelineDebugOptionsTest {
-  private static final ObjectMapper MAPPER = new ObjectMapper();
-
   @Test
   public void testTransformNameMapping() throws Exception {
-    NameMap map = MAPPER.convertValue("a=b;foo=;bar=baz", NameMap.class);
-    assertEquals(3, map.size());
-    assertThat(map, hasEntry("a", "b"));
-    assertThat(map, hasEntry("foo", ""));
-    assertThat(map, hasEntry("bar", "baz"));
+    DataflowPipelineDebugOptions options = PipelineOptionsFactory
+        .fromArgs(new String[]{"--transformNameMapping={\"a\":\"b\",\"foo\":\"\",\"bar\":\"baz\"}"})
+        .as(DataflowPipelineDebugOptions.class);
+    assertEquals(3, options.getTransformNameMapping().size());
+    assertThat(options.getTransformNameMapping(), hasEntry("a", "b"));
+    assertThat(options.getTransformNameMapping(), hasEntry("foo", ""));
+    assertThat(options.getTransformNameMapping(), hasEntry("bar", "baz"));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptionsTest.java
index fffef0e888f28..82ec48a5f5ba5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptionsTest.java
@@ -18,7 +18,8 @@
 import static com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.Level.WARN;
 import static org.junit.Assert.assertEquals;
 
-import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.WorkerLogLevelOverride;
+import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.WorkerLogLevelOverrides;
+import com.google.common.collect.ImmutableMap;
 
 import com.fasterxml.jackson.databind.ObjectMapper;
 
@@ -38,33 +39,35 @@ public class DataflowWorkerLoggingOptionsTest {
   public void testWorkerLogLevelOverrideWithInvalidLogLevel() {
     expectedException.expect(IllegalArgumentException.class);
     expectedException.expectMessage("Unsupported log level");
-    WorkerLogLevelOverride.create("Name#FakeLevel");
+    WorkerLogLevelOverrides.from(ImmutableMap.of("Name", "FakeLevel"));
   }
 
   @Test
-  public void testWorkerLogLevelOverrideForClass() {
-    assertEquals("org.junit.Test#WARN",
-        MAPPER.convertValue(WorkerLogLevelOverride.forClass(Test.class, WARN), String.class));
+  public void testWorkerLogLevelOverrideForClass() throws Exception {
+    assertEquals("{\"org.junit.Test\":\"WARN\"}",
+        MAPPER.writeValueAsString(
+            new WorkerLogLevelOverrides().addOverrideForClass(Test.class, WARN)));
   }
 
   @Test
-  public void testWorkerLogLevelOverrideForPackage() {
-    assertEquals("org.junit#WARN",
-        MAPPER.convertValue(
-            WorkerLogLevelOverride.forPackage(Test.class.getPackage(), WARN), String.class));
+  public void testWorkerLogLevelOverrideForPackage() throws Exception {
+    assertEquals("{\"org.junit\":\"WARN\"}",
+        MAPPER.writeValueAsString(
+            new WorkerLogLevelOverrides().addOverrideForPackage(Test.class.getPackage(), WARN)));
   }
 
   @Test
-  public void testWorkerLogLevelOverrideForName() {
-    assertEquals("A#WARN",
-        MAPPER.convertValue(WorkerLogLevelOverride.forName("A", WARN), String.class));
+  public void testWorkerLogLevelOverrideForName() throws Exception {
+    assertEquals("{\"A\":\"WARN\"}",
+        MAPPER.writeValueAsString(
+            new WorkerLogLevelOverrides().addOverrideForName("A", WARN)));
   }
 
   @Test
-  public void testSerializationAndDeserializationOf() {
-    String testValue = "A#WARN";
+  public void testSerializationAndDeserializationOf() throws Exception {
+    String testValue = "{\"A\":\"WARN\"}";
     assertEquals(testValue,
-        MAPPER.convertValue(
-            MAPPER.convertValue(testValue, WorkerLogLevelOverride.class), String.class));
+        MAPPER.writeValueAsString(
+            MAPPER.readValue(testValue, WorkerLogLevelOverrides.class)));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
index 375306cd40493..3a16cf5dee251 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
@@ -38,7 +38,8 @@
 public class GoogleApiDebugOptionsTest {
   @Test
   public void testWhenTracingMatches() throws Exception {
-    String[] args = new String[] {"--googleApiTrace=Projects.Jobs.Get#GetTraceDestination"};
+    String[] args =
+        new String[] {"--googleApiTrace={\"Projects.Jobs.Get\":\"GetTraceDestination\"}"};
     DataflowPipelineOptions options =
         PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
@@ -52,7 +53,7 @@ public void testWhenTracingMatches() throws Exception {
 
   @Test
   public void testWhenTracingDoesNotMatch() throws Exception {
-    String[] args = new String[] {"--googleApiTrace=Projects.Jobs.Create#testToken"};
+    String[] args = new String[] {"--googleApiTrace={\"Projects.Jobs.Create\":\"testToken\"}"};
     DataflowPipelineOptions options =
         PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
@@ -67,8 +68,8 @@ public void testWhenTracingDoesNotMatch() throws Exception {
   @Test
   public void testWithMultipleTraces() throws Exception {
     String[] args = new String[] {
-        "--googleApiTrace=Projects.Jobs.Create#CreateTraceDestination,"
-        + "Projects.Jobs.Get#GetTraceDestination"};
+        "--googleApiTrace={\"Projects.Jobs.Create\":\"CreateTraceDestination\","
+        + "\"Projects.Jobs.Get\":\"GetTraceDestination\"}"};
     DataflowPipelineOptions options =
         PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
@@ -86,7 +87,7 @@ public void testWithMultipleTraces() throws Exception {
 
   @Test
   public void testMatchingAllDataflowCalls() throws Exception {
-    String[] args = new String[] {"--googleApiTrace=Dataflow#TraceDestination"};
+    String[] args = new String[] {"--googleApiTrace={\"Dataflow\":\"TraceDestination\"}"};
     DataflowPipelineOptions options =
         PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
@@ -106,8 +107,8 @@ public void testMatchingAllDataflowCalls() throws Exception {
   public void testMatchingAgainstClient() throws Exception {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
-    options.setGoogleApiTrace(new GoogleApiTracer[] {
-        GoogleApiTracer.create(Transport.newDataflowClient(options).build(), "TraceDestination")});
+    options.setGoogleApiTrace(new GoogleApiTracer().addTraceFor(
+        Transport.newDataflowClient(options).build(), "TraceDestination"));
 
     Get getRequest =
         options.getDataflowClient().projects().jobs().get("testProjectId", "testJobId");
@@ -122,9 +123,9 @@ public void testMatchingAgainstClient() throws Exception {
   public void testMatchingAgainstRequestType() throws Exception {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
-    options.setGoogleApiTrace(new GoogleApiTracer[] {GoogleApiTracer.create(
+    options.setGoogleApiTrace(new GoogleApiTracer().addTraceFor(
         Transport.newDataflowClient(options).build().projects().jobs()
-            .get("aProjectId", "aJobId"), "TraceDestination")});
+            .get("aProjectId", "aJobId"), "TraceDestination"));
 
     Get getRequest =
         options.getDataflowClient().projects().jobs().get("testProjectId", "testJobId");
@@ -136,11 +137,11 @@ public void testMatchingAgainstRequestType() throws Exception {
   }
 
   @Test
-  public void testDeserializationAndSerializationOfGoogleApiTracer() {
-    String serializedValue = "Api#Token";
+  public void testDeserializationAndSerializationOfGoogleApiTracer() throws Exception {
+    String serializedValue = "{\"Api\":\"Token\"}";
     ObjectMapper objectMapper = new ObjectMapper();
     assertEquals(serializedValue,
-        objectMapper.convertValue(
-            objectMapper.convertValue(serializedValue, GoogleApiTracer.class), String.class));
+        objectMapper.writeValueAsString(
+            objectMapper.readValue(serializedValue, GoogleApiTracer.class)));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index 895c1e94ae272..972ceab58ab1b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -35,6 +35,7 @@
 import com.google.common.collect.ListMultimap;
 
 import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonProperty;
 
 import org.junit.Rule;
 import org.junit.Test;
@@ -46,6 +47,7 @@
 import java.io.ByteArrayOutputStream;
 import java.io.PrintStream;
 import java.util.List;
+import java.util.Map;
 
 /** Tests for {@link PipelineOptionsFactory}. */
 @RunWith(JUnit4.class)
@@ -323,6 +325,11 @@ public void testEmptyValueNotAllowed() {
     PipelineOptionsFactory.fromArgs(args).as(Primitives.class);
   }
 
+  /** Enum used for testing PipelineOptions CLI parsing. */
+  public enum TestEnum {
+    Value, Value2
+  }
+
   /** A test interface containing all supported objects. */
   public static interface Objects extends PipelineOptions {
     Boolean getBoolean();
@@ -347,6 +354,8 @@ public static interface Objects extends PipelineOptions {
     void setEmptyString(String value);
     Class<?> getClassValue();
     void setClassValue(Class<?> value);
+    TestEnum getEnum();
+    void setEnum(TestEnum value);
   }
 
   @Test
@@ -362,7 +371,8 @@ public void testObjects() {
         "--double=12.3",
         "--string=stringValue",
         "--emptyString=",
-        "--classValue=" + PipelineOptionsFactoryTest.class.getName()};
+        "--classValue=" + PipelineOptionsFactoryTest.class.getName(),
+        "--enum=" + TestEnum.Value};
 
     Objects options = PipelineOptionsFactory.fromArgs(args).as(Objects.class);
     assertTrue(options.getBoolean());
@@ -376,6 +386,37 @@ public void testObjects() {
     assertEquals("stringValue", options.getString());
     assertTrue(options.getEmptyString().isEmpty());
     assertEquals(PipelineOptionsFactoryTest.class, options.getClassValue());
+    assertEquals(TestEnum.Value, options.getEnum());
+  }
+
+  /** A test class for verifying JSON -> Object conversion. */
+  public static class ComplexType {
+    String value;
+    String value2;
+    public ComplexType(@JsonProperty("key") String value, @JsonProperty("key2") String value2) {
+      this.value = value;
+      this.value2 = value2;
+    }
+  }
+
+  /** A test interface for verifying JSON -> complex type conversion. */
+  interface ComplexTypes extends PipelineOptions {
+    Map<String, String> getMap();
+    void setMap(Map<String, String> value);
+
+    ComplexType getObject();
+    void setObject(ComplexType value);
+  }
+
+  @Test
+  public void testComplexTypes() {
+    String[] args = new String[] {
+        "--map={\"key\":\"value\",\"key2\":\"value2\"}",
+        "--object={\"key\":\"value\",\"key2\":\"value2\"}"};
+    ComplexTypes options = PipelineOptionsFactory.fromArgs(args).as(ComplexTypes.class);
+    assertEquals(ImmutableMap.of("key", "value", "key2", "value2"), options.getMap());
+    assertEquals("value", options.getObject().value);
+    assertEquals("value2", options.getObject().value2);
   }
 
   @Test
@@ -406,6 +447,8 @@ public static interface Arrays extends PipelineOptions {
     void setString(String[] value);
     Class<?>[] getClassValue();
     void setClassValue(Class<?>[] value);
+    TestEnum[] getEnum();
+    void setEnum(TestEnum[] value);
   }
 
   @Test
@@ -437,7 +480,9 @@ public void testArrays() {
         "--string=stringValue2",
         "--string=stringValue3",
         "--classValue=" + PipelineOptionsFactory.class.getName(),
-        "--classValue=" + PipelineOptionsFactoryTest.class.getName()};
+        "--classValue=" + PipelineOptionsFactoryTest.class.getName(),
+        "--enum=" + TestEnum.Value,
+        "--enum=" + TestEnum.Value2};
 
     Arrays options = PipelineOptionsFactory.fromArgs(args).as(Arrays.class);
     boolean[] bools = options.getBoolean();
@@ -453,6 +498,7 @@ public void testArrays() {
     assertArrayEquals(new Class[] {PipelineOptionsFactory.class,
                                    PipelineOptionsFactoryTest.class},
         options.getClassValue());
+    assertArrayEquals(new TestEnum[] {TestEnum.Value, TestEnum.Value2}, options.getEnum());
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
index 5df30c4ba6b3f..1b8cba7b0d34e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
@@ -20,7 +20,7 @@
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions;
-import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.WorkerLogLevelOverride;
+import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.WorkerLogLevelOverrides;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 
 import org.junit.After;
@@ -75,11 +75,9 @@ public void testWithConfigurationOverride() {
   public void testWithCustomLogLevels() {
     DataflowWorkerLoggingOptions options =
         PipelineOptionsFactory.as(DataflowWorkerLoggingOptions.class);
-    options.setWorkerLogLevelOverrides(
-        new WorkerLogLevelOverride[] {
-            WorkerLogLevelOverride.forName("A", DataflowWorkerLoggingOptions.Level.DEBUG),
-            WorkerLogLevelOverride.forName("B", DataflowWorkerLoggingOptions.Level.ERROR),
-        });
+    options.setWorkerLogLevelOverrides(new WorkerLogLevelOverrides()
+        .addOverrideForName("A", DataflowWorkerLoggingOptions.Level.DEBUG)
+        .addOverrideForName("B", DataflowWorkerLoggingOptions.Level.ERROR));
 
     DataflowWorkerLoggingInitializer.initialize();
     DataflowWorkerLoggingInitializer.configure(options);

From 34d2c2fd862344b0043aa5acb1329f52ea2177a0 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 29 Jul 2015 18:27:25 -0700
Subject: [PATCH 0833/1541] Add "-injector" as a suffix to the job name for
 WindowedWordCount

This fixes an issue where a unique job name constraint would be hit when executing the WindowedWordCount example because both the WindowedWordCount and injector pipelines used the same job name (generated by JobNameFactory which can only generate a unique name
once per second, and the pipelines were submitted in [] succession).

----Release Notes----
Fixed issue preventing execution of WindowedWordCount example.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99435755
---
 .../cloud/dataflow/examples/common/DataflowExampleUtils.java     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
index 58ec174715a02..eaf24d5aadc5b 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
@@ -274,6 +274,7 @@ public void runInjectorPipeline(String inputFile, String topic) {
     copiedOptions.setStreaming(false);
     copiedOptions.setNumWorkers(
         options.as(ExamplePubsubTopicOptions.class).getInjectorNumWorkers());
+    copiedOptions.setJobName(options.getJobName() + "-injector");
     Pipeline injectorPipeline = Pipeline.create(copiedOptions);
     injectorPipeline.apply(TextIO.Read.from(inputFile))
                     .apply(IntraBundleParallelization

From b2e302239603b8d10aa7ce393df0dfe92cb82909 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Wed, 29 Jul 2015 22:49:01 -0700
Subject: [PATCH 0834/1541] Allow accessing properties of NO_FIRING pane

This pane is now considered the first, last, and only
element in the window with 0 index and non-speculative index.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99447937
---
 .../sdk/transforms/windowing/PaneInfo.java    | 23 +++++--------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
index e0a8409ca49b0..0bffd0c1abe2a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.common.base.MoreObjects;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableMap;
@@ -95,15 +96,15 @@ private static void register(ImmutableMap.Builder<Byte, PaneInfo> builder, PaneI
   private final long nonSpeculativeIndex;
 
   /**
-   * Until an element has been assigned to a window and had triggers processed, it doesn't belong
-   * to any pane. This is the value assigned to elements read from sources, and those that have
-   * been assigned a window but not passed through execution of any trigger.
+   * {@code PaneInfo} to use for elements on (and before) initial window assignemnt (including
+   * elements read from sources) before they have passed through a {@link GroupByKey} and are
+   * associated with a particular trigger firing.
    */
   public static final PaneInfo NO_FIRING =
-      PaneInfo.createPane(false, false, Timing.UNKNOWN, 0, 0);
+      PaneInfo.createPane(true, true, Timing.UNKNOWN, 0, 0);
 
   /**
-   * PaneInfo to use when there will be exactly one firing and it is on time.
+   * {@code PaneInfo} to use when there will be exactly one firing and it is on time.
    */
   public static final PaneInfo ON_TIME_AND_ONLY_FIRING =
       PaneInfo.createPane(true, true, Timing.ON_TIME, 0, 0);
@@ -152,9 +153,6 @@ public boolean isUnknown() {
    * Return true if this is the first pane produced for the associated window.
    */
   public boolean isFirst() {
-    if (timing == Timing.UNKNOWN) {
-      throw new UnsupportedOperationException("Undefined for non-trigger-firing pane.");
-    }
     return isFirst;
   }
 
@@ -162,9 +160,6 @@ public boolean isFirst() {
    * Return true if this is the last pane that will be produced in the associated window.
    */
   public boolean isLast() {
-    if (timing == Timing.UNKNOWN) {
-      throw new UnsupportedOperationException("Undefined for non-trigger-firing pane.");
-    }
     return isLast;
   }
 
@@ -183,9 +178,6 @@ public Timing getTiming() {
    * output of a group-by-key operation.
    */
   public long getIndex() {
-    if (timing == Timing.UNKNOWN) {
-      throw new UnsupportedOperationException("Undefined for non-trigger-firing pane.");
-    }
     return index;
   }
 
@@ -196,9 +188,6 @@ public long getIndex() {
    * <p> Always -1 for speculative data.
    */
   public long getNonSpeculativeIndex() {
-    if (timing == Timing.UNKNOWN) {
-      throw new UnsupportedOperationException("Undefined for non-trigger-firing pane.");
-    }
     return nonSpeculativeIndex;
   }
 

From 6051bb3b2747ce91c4ad6cc5656f4c6ce54895ec Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 30 Jul 2015 03:52:36 -0700
Subject: [PATCH 0835/1541] Update exceptions when the job already exists or
 has been updated

The new exceptions are sub classes of the existing exception that was
thrown providing backwards compatibility.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99462554
---
 .../DataflowJobAlreadyExistsException.java    | 32 ++++++++
 .../DataflowJobAlreadyUpdatedException.java   | 31 ++++++++
 .../sdk/runners/DataflowPipelineRunner.java   | 52 +++++++------
 .../runners/DataflowPipelineRunnerTest.java   | 78 ++++++++++++-------
 4 files changed, 140 insertions(+), 53 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyExistsException.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyUpdatedException.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyExistsException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyExistsException.java
new file mode 100644
index 0000000000000..5c5ebc2891eb7
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyExistsException.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+/**
+ * An exception that is thrown if the unique job name constraint of the Dataflow
+ * service is broken because an existing job with the same job name is currently active.
+ * The {@link DataflowPipelineJob} contained within this exception contains information
+ * about the pre-existing job.
+ */
+public class DataflowJobAlreadyExistsException extends DataflowJobException {
+  private static final long serialVersionUID = 0L;
+
+  public DataflowJobAlreadyExistsException(
+      DataflowPipelineJob job, String message) {
+    super(job, message, null);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyUpdatedException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyUpdatedException.java
new file mode 100644
index 0000000000000..8a3edf5921014
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyUpdatedException.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners;
+
+/**
+ * An exception that is thrown if the existing job has already been updated within the Dataflow
+ * service and is no longer able to be updated. The {@link DataflowPipelineJob} contained within
+ * this exception contains information about the pre-existing updated job.
+ */
+public class DataflowJobAlreadyUpdatedException extends DataflowJobException {
+  private static final long serialVersionUID = 0L;
+
+  public DataflowJobAlreadyUpdatedException(
+      DataflowPipelineJob job, String message) {
+    super(job, message, null);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 0660a879f6f1c..ac0cad1e46834 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -340,28 +340,45 @@ public DataflowPipelineJob run(Pipeline pipeline) {
               .create(options.getProject(), newJob)
               .execute();
     } catch (GoogleJsonResponseException e) {
-        throw new RuntimeException("Failed to create a workflow job: "
+      throw new RuntimeException("Failed to create a workflow job: "
             + (e.getDetails() != null ? e.getDetails().getMessage() : e), e);
     } catch (IOException e) {
       throw new RuntimeException("Failed to create a workflow job", e);
     }
+
+    // Obtain all of the extractors from the PTransforms used in the pipeline so the
+    // DataflowPipelineJob has access to them.
+    AggregatorPipelineExtractor aggregatorExtractor = new AggregatorPipelineExtractor(pipeline);
+    Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorSteps =
+        aggregatorExtractor.getAggregatorSteps();
+
+    DataflowAggregatorTransforms aggregatorTransforms =
+        new DataflowAggregatorTransforms(aggregatorSteps, jobSpecification.getStepNames());
+
+    // Use a raw client for post-launch monitoring, as status calls may fail
+    // regularly and need not be retried automatically.
+    DataflowPipelineJob dataflowPipelineJob =
+        new DataflowPipelineJob(options.getProject(), jobResult.getId(),
+            Transport.newRawDataflowClient(options).build(), aggregatorTransforms);
+
     // If the service returned client request id, the SDK needs to compare it
     // with the original id generated in the request, if they are not the same
     // (i.e., the returned job is not created by this request), throw
-    // Error::Already_Exists.
+    // DataflowJobAlreadyExistsException or DataflowJobAlreadyUpdatedExcetpion
+    // depending on whether this is a reload or not.
     if (jobResult.getClientRequestId() != null && !jobResult.getClientRequestId().isEmpty()
         && !jobResult.getClientRequestId().equals(requestId)) {
       // If updating a job.
       if (options.getUpdate()) {
-        throw new RuntimeException(
-            "The job named " + newJob.getName() + " with id: " + jobIdToUpdate
-                + " has already been updated into job id: " + jobResult.getId()
-                + " and cannot be updated again. ");
+        throw new DataflowJobAlreadyUpdatedException(dataflowPipelineJob,
+            String.format("The job named %s with id: %s has already been updated into job id: %s "
+                + "and cannot be updated again.",
+                newJob.getName(), jobIdToUpdate, jobResult.getId()));
       } else {
-        throw new RuntimeException("There is already an active job named " + newJob.getName()
-            + " with id: " + jobResult.getId()
-            + ". If you want to submit a second job, try again by setting a "
-            + "different name using --jobName.");
+        throw new DataflowJobAlreadyExistsException(dataflowPipelineJob,
+            String.format("There is already an active job named %s with id: %s. If you want "
+                + "to submit a second job, try again by setting a different name using --jobName.",
+                newJob.getName(), jobResult.getId()));
       }
     }
 
@@ -372,21 +389,6 @@ public DataflowPipelineJob run(Pipeline pipeline) {
     LOG.info("To cancel the job using the 'gcloud' tool, run:\n> {}",
         MonitoringUtil.getGcloudCancelCommand(options, jobResult.getId()));
 
-    // Obtain all of the extractors from the PTransforms used in the pipeline so the
-    // DataflowPipelineJob has access to them.
-    AggregatorPipelineExtractor aggregatorExtractor = new AggregatorPipelineExtractor(pipeline);
-    Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorSteps =
-        aggregatorExtractor.getAggregatorSteps();
-
-    DataflowAggregatorTransforms aggregatorTransforms =
-        new DataflowAggregatorTransforms(aggregatorSteps, jobSpecification.getStepNames());
-
-    // Use a raw client for post-launch monitoring, as status calls may fail
-    // regularly and need not be retried automatically.
-    DataflowPipelineJob dataflowPipelineJob =
-        new DataflowPipelineJob(options.getProject(), jobResult.getId(),
-            Transport.newRawDataflowClient(options).build(), aggregatorTransforms);
-
     return dataflowPipelineJob;
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 5fab1d386c0f6..bb2c0488d4e25 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -146,6 +146,11 @@ private GcsUtil buildMockGcsUtil(boolean bucketExists) throws IOException {
     return mockGcsUtil;
   }
 
+  private DataflowPipelineOptions buildPipelineOptions() throws IOException {
+    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+    return buildPipelineOptions(jobCaptor);
+  }
+
   private DataflowPipelineOptions buildPipelineOptions(
       ArgumentCaptor<Job> jobCaptor) throws IOException {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
@@ -172,16 +177,10 @@ public void testRun() throws IOException {
 
   @Test
   public void testRunReturnDifferentRequestId() throws IOException {
-    thrown.expect(RuntimeException.class);
-    thrown.expectMessage(
-        Matchers.containsString("If you want to submit a second job, try again by setting a "
-            + "different name using --jobName."));
-
-    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
-    DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+    DataflowPipelineOptions options = buildPipelineOptions();
     Dataflow mockDataflowClient = options.getDataflowClient();
     Dataflow.Projects.Jobs.Create mockRequest = mock(Dataflow.Projects.Jobs.Create.class);
-    when(mockDataflowClient.projects().jobs().create(eq("someProject"), jobCaptor.capture()))
+    when(mockDataflowClient.projects().jobs().create(eq("someProject"), any(Job.class)))
         .thenReturn(mockRequest);
     Job resultJob = new Job();
     resultJob.setId("newid");
@@ -190,7 +189,15 @@ public void testRunReturnDifferentRequestId() throws IOException {
     when(mockRequest.execute()).thenReturn(resultJob);
 
     DataflowPipeline p = buildDataflowPipeline(options);
-    p.run();
+    try {
+      p.run();
+      fail("Expected DataflowJobAlreadyExistsException");
+    } catch (DataflowJobAlreadyExistsException expected) {
+      assertThat(expected.getMessage(),
+          containsString("If you want to submit a second job, try again by setting a "
+            + "different name using --jobName."));
+      assertEquals(expected.getJob().getJobId(), resultJob.getId());
+    }
   }
 
   @Test
@@ -211,14 +218,40 @@ public void testUpdateNonExistentPipeline() throws IOException {
     thrown.expect(IllegalArgumentException.class);
     thrown.expectMessage("Could not find running job named badJobName");
 
-    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
-    DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+    DataflowPipelineOptions options = buildPipelineOptions();
     options.setUpdate(true);
     options.setJobName("badJobName");
     DataflowPipeline p = buildDataflowPipeline(options);
     p.run();
   }
 
+  @Test
+  public void testUpdateAlreadyUpdatedPipeline() throws IOException {
+    DataflowPipelineOptions options = buildPipelineOptions();
+    options.setUpdate(true);
+    options.setJobName("oldJobName");
+    Dataflow mockDataflowClient = options.getDataflowClient();
+    Dataflow.Projects.Jobs.Create mockRequest = mock(Dataflow.Projects.Jobs.Create.class);
+    when(mockDataflowClient.projects().jobs().create(eq("someProject"), any(Job.class)))
+        .thenReturn(mockRequest);
+    Job resultJob = new Job();
+    resultJob.setId("newid");
+    // Return a different request id.
+    resultJob.setClientRequestId("different_request_id");
+    when(mockRequest.execute()).thenReturn(resultJob);
+
+    DataflowPipeline p = buildDataflowPipeline(options);
+    try {
+      p.run();
+      fail("Expected DataflowJobAlreadyUpdatedException");
+    } catch (DataflowJobAlreadyUpdatedException expected) {
+      assertThat(expected.getMessage(),
+          containsString("The job named oldjobname with id: oldJobId has already been updated "
+              + "into job id: newid and cannot be updated again."));
+      assertEquals(expected.getJob().getJobId(), resultJob.getId());
+    }
+  }
+
   @Test
   public void testRunWithFiles() throws IOException {
     // Test that the function DataflowPipelineRunner.stageFiles works as
@@ -291,9 +324,7 @@ public void testRunWithFiles() throws IOException {
 
   @Test
   public void runWithDefaultFilesToStage() throws Exception {
-    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
-
-    DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+    DataflowPipelineOptions options = buildPipelineOptions();
     options.setFilesToStage(null);
     DataflowPipelineRunner.fromOptions(options);
     assertTrue(!options.getFilesToStage().isEmpty());
@@ -421,9 +452,7 @@ public void testInvalidTempLocation() throws IOException {
 
   @Test
   public void testInvalidStagingLocation() throws IOException {
-    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
-
-    DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+    DataflowPipelineOptions options = buildPipelineOptions();
     options.setStagingLocation("file://my/staging/location");
     try {
       DataflowPipelineRunner.fromOptions(options);
@@ -536,9 +565,7 @@ public void testInvalidJobName() throws IOException {
         "JobName invalid");
 
     for (int i = 0; i < invalidNames.size(); ++i) {
-      ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
-
-      DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+      DataflowPipelineOptions options = buildPipelineOptions();
       options.setJobName(invalidNames.get(i));
 
       try {
@@ -558,9 +585,7 @@ public void testValidJobName() throws IOException {
         "this-one-is-fairly-long-01234567890123456789");
 
     for (String name : names) {
-      ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
-
-      DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+      DataflowPipelineOptions options = buildPipelineOptions();
       options.setJobName(name);
 
       DataflowPipelineRunner runner = DataflowPipelineRunner
@@ -611,9 +636,7 @@ public void testTransformTranslatorMissing() throws IOException {
   @Test
   public void testTransformTranslator() throws IOException {
     // Test that we can provide a custom translation
-    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
-
-    DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+    DataflowPipelineOptions options = buildPipelineOptions();
     DataflowPipeline p = DataflowPipeline.create(options);
     TestTransform transform = new TestTransform();
 
@@ -674,8 +697,7 @@ public void visitValue(PValue value, TransformTreeNode producer) {
 
   @Test
   public void testApplyIsScopedToExactClass() throws IOException {
-    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
-    DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+    DataflowPipelineOptions options = buildPipelineOptions();
     DataflowPipeline p = DataflowPipeline.create(options);
 
     Create.TimestampedValues<String> transform =

From bcac3c2f5ba9100cab1724bd7318a031791ef65b Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Thu, 30 Jul 2015 09:09:09 -0700
Subject: [PATCH 0836/1541] Skips publishing empty lines in Pubsub injector

Empty lines have to be skipped because pubsub doesn't allow
empty messages.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99481279
---
 .../cloud/dataflow/examples/common/PubsubFileInjector.java | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java
index bee5115e3b99c..ddbcf4a615ae6 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java
@@ -36,7 +36,7 @@
 
 /**
  * A batch Dataflow pipeline for injecting a set of GCS files into
- * a PubSub topic line by line.
+ * a PubSub topic line by line. Empty lines are skipped.
  *
  * <p> This is useful for testing streaming
  * pipelines. Note that since batch pipelines might retry chunks, this
@@ -69,7 +69,7 @@ public Bound publish(String outputTopic) {
     }
   }
 
-  /** A DoFn that publishes lines to Google Cloud PubSub. */
+  /** A DoFn that publishes non-empty lines to Google Cloud PubSub. */
   public static class Bound extends DoFn<String, Void> {
     private static final long serialVersionUID = 0;
 
@@ -91,6 +91,9 @@ public void startBundle(Context context) {
 
     @Override
     public void processElement(ProcessContext c) throws IOException {
+      if (c.element().isEmpty()) {
+        return;
+      }
       PubsubMessage pubsubMessage = new PubsubMessage();
       pubsubMessage.encodeData(c.element().getBytes());
       if (timestampLabelKey != null) {

From 4398382ab713ebb6486a80854b0d9b1c07109fd6 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Thu, 30 Jul 2015 09:54:12 -0700
Subject: [PATCH 0837/1541] Rollback: Add support for JSON -> object/map
 parsing

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99485305
---
 .../dataflow/examples/DebuggingWordCount.java |  17 +--
 .../google/cloud/dataflow/sdk/Pipeline.java   |   5 +
 .../options/DataflowPipelineDebugOptions.java |  37 +++++-
 .../options/DataflowWorkerLoggingOptions.java | 112 ++++++++++--------
 .../sdk/options/GoogleApiDebugOptions.java    |  84 +++++++++----
 .../sdk/options/PipelineOptionsFactory.java   |  79 ++++--------
 .../DataflowWorkerLoggingInitializer.java     |   9 +-
 .../cloud/dataflow/sdk/util/Transport.java    |  41 ++++++-
 .../DataflowPipelineDebugOptionsTest.java     |  17 +--
 .../DataflowWorkerLoggingOptionsTest.java     |  35 +++---
 .../options/GoogleApiDebugOptionsTest.java    |  27 ++---
 .../options/PipelineOptionsFactoryTest.java   |  50 +-------
 .../DataflowWorkerLoggingInitializerTest.java |  10 +-
 13 files changed, 275 insertions(+), 248 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
index 6bc7185a5a7a5..514e433277ecd 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
@@ -72,18 +72,13 @@
  *
  * <p> Concept #2: Dataflow workers which execute user code are configured to log to Cloud
  * Logging by default at "INFO" log level and higher. One may override log levels for specific
- * logging namespaces by specifying:
- * <pre><code>
- *   --workerLogLevelOverrides={"Name1":"Level1","Name2":"Level2",...}
- * </code></pre>
- * For example, by specifying:
- * <pre><code>
- *   --workerLogLevelOverrides={"com.google.cloud.dataflow.examples":"DEBUG"}
- * </code></pre>
+ * logging namespaces by specifying
+ * {@code --workerLogLevelOverrides=Name1#Level1,Name2#Level2,...}. For example, by specifying
+ * {@code --workerLogLevelOverrides=com.google.cloud.dataflow.examples.DebuggingWordCount#DEBUG}
  * when executing this pipeline using the Dataflow service, Cloud Logging would contain only
- * "DEBUG" or higher level logs for the {@code com.google.cloud.dataflow.examples} package in
- * addition to the default "INFO" or higher level logs. In addition, the default Dataflow worker
- * logging configuration can be overridden by specifying
+ * "DEBUG" or higher level logs for the DebuggingWordCount class in addition to the default
+ * "INFO" or higher level logs. In addition, the default Dataflow worker logging
+ * configuration can be overridden by specifying
  * {@code --defaultWorkerLogLevel=<one of TRACE, DEBUG, INFO, WARN, ERROR>}. For example,
  * by specifying {@code --defaultWorkerLogLevel=DEBUG} when executing this pipeline with
  * the Dataflow service, Cloud Logging would contain all "DEBUG" or higher level logs. Note
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index d4ee15746b15a..fce5bebf2a989 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -309,6 +309,11 @@ OutputT applyInternal(String name, InputT input,
 
     TransformTreeNode parent = transforms.getCurrent();
     String namePrefix = parent.getFullName();
+
+    if (name.contains(";") || name.contains("=")) {
+      throw new IllegalArgumentException(
+          "PTransform names may not contain ';' or '='. Saw " + name);
+    }
     String fullName = uniquifyInternal(namePrefix, name);
 
     boolean nameIsUnique = fullName.equals(buildName(namePrefix, name));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
index 61bb7064ee663..7e987d3c78cf8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
@@ -24,11 +24,13 @@
 import com.google.cloud.dataflow.sdk.util.PathValidator;
 import com.google.cloud.dataflow.sdk.util.Stager;
 import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.common.base.Preconditions;
 
+import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonIgnore;
 
+import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
 /**
  * Internal. Options used to control execution of the Dataflow SDK for
@@ -184,12 +186,11 @@ public Dataflow create(PipelineOptions options) {
    */
   @JsonIgnore
   @Description(
-      "Mapping of old PTranform names to new ones, specified as JSON "
-      + "{\"oldName\":\"newName\",...}. To mark a transform as deleted, make newName the empty "
-      + "string.")
+      "Mapping of old PTranform names to new ones, specified as a semicolon-separated "
+      + "list of oldName=newName pairs. To mark a transform as deleted, omit newName.")
   @Experimental
-  Map<String, String> getTransformNameMapping();
-  void setTransformNameMapping(Map<String, String> value);
+  NameMap getTransformNameMapping();
+  void setTransformNameMapping(NameMap value);
 
   /**
    * Custom windmill_main binary to use with the streaming runner.
@@ -229,4 +230,28 @@ public Stager create(PipelineOptions options) {
           .build();
     }
   }
+
+  /**
+   * Map of old names to new names.
+   */
+  public static class NameMap extends HashMap<String, String> {
+    private static final long serialVersionUID = 0L;
+    private static final String ENTRY_SEPARATOR = ";";
+    private static final String VALUE_SEPARATOR = "=";
+
+    /**
+     * Parses a NameMap from a String.
+     */
+    @JsonCreator
+    public static NameMap create(String value) {
+      NameMap result = new NameMap();
+      for (String entry : value.split(ENTRY_SEPARATOR)) {
+        String[] splitEntry = entry.split(VALUE_SEPARATOR, -1);
+        Preconditions.checkArgument(splitEntry.length == 2,
+            "Invalid value for --nameMapping.  Should be in old1=new1;old2=new2 format");
+        result.put(splitEntry[0], splitEntry[1]);
+      }
+      return result;
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
index cbac6e3dd1fdc..5e47db999c247 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
@@ -18,10 +18,9 @@
 import com.google.common.base.Preconditions;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonValue;
 
 import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
 
 /**
  * Options that are used to control logging configuration on the Dataflow worker.
@@ -48,22 +47,23 @@ public enum Level {
    * <p>
    * Later options with equivalent names override earlier options.
    * <p>
-   * See {@link WorkerLogLevelOverrides} for more information on how to configure logging
+   * See {@link WorkerLogLevelOverride} for more information on how to configure logging
    * on a per {@link Class}, {@link Package}, or name basis. If used from the command line,
-   * the expected format is {"Name":"Level",...}, further details on
-   * {@link WorkerLogLevelOverrides#from}.
+   * the expected format is {@code Name#Level}, further details on
+   * {@link WorkerLogLevelOverride#create(String)}.
    */
   @Description("This option controls the log levels for specifically named loggers. "
-      + "The expected format is {\"Name\":\"Level\",...}. The Dataflow worker uses "
-      + "java.util.logging, which supports a logging hierarchy based off of names that are '.' "
-      + "separated. For example, by specifying the value {\"a.b.c.Foo\":\"DEBUG\"}, the logger "
-      + "for the class 'a.b.c.Foo' will be configured to output logs at the DEBUG level. "
-      + "Similarly, by specifying the value {\"a.b.c\":\"WARN\"}, all loggers underneath the "
-      + "'a.b.c' package will be configured to output logs at the WARN level. Also, note that "
-      + "when multiple overrides are specified, the exact name followed by the closest parent "
-      + "takes precedence.")
-  WorkerLogLevelOverrides getWorkerLogLevelOverrides();
-  void setWorkerLogLevelOverrides(WorkerLogLevelOverrides value);
+      + "The expected format is Name#Level. The Dataflow worker uses java.util.logging, which "
+      + "supports a logging hierarchy based off of names that are \".\" separated. "
+      + "For example, by specifying the value \"a.b.c.Foo#DEBUG\", the logger for the class "
+      + "\"a.b.c.Foo\" will be configured to output logs at the DEBUG level. Similarly, "
+      + "by specifying the value \"a.b.c#WARN\", all loggers underneath the \"a.b.c\" package "
+      + "will be configured to output logs at the WARN level. Note that multiple overrides can "
+      + "be specified and that later values with equivalent names override earlier values. Also, "
+      + "note that when multiple overrides are specified, the exact name followed by the closest "
+      + "parent takes precedence.")
+  WorkerLogLevelOverride[] getWorkerLogLevelOverrides();
+  void setWorkerLogLevelOverrides(WorkerLogLevelOverride... workerLogLevelOverrides);
 
   /**
    * Defines a log level override for a specific class, package, or name.
@@ -84,33 +84,29 @@ public enum Level {
    * Note that by specifying multiple overrides, the exact name followed by the closest parent
    * takes precedence.
    */
-  public static class WorkerLogLevelOverrides extends HashMap<String, Level> {
-    private static final long serialVersionUID = 0;
+  public static class WorkerLogLevelOverride {
+    private static final String SEPARATOR = "#";
 
     /**
      * Overrides the default log level for the passed in class.
      * <p>
-     * This is equivalent to calling
-     * {@link #addOverrideForName(String, DataflowWorkerLoggingOptions.Level)}
+     * This is equivalent to calling {@link #forName(String, DataflowWorkerLoggingOptions.Level)}
      * and passing in the {@link Class#getName() class name}.
      */
-    public WorkerLogLevelOverrides addOverrideForClass(Class<?> klass, Level level) {
+    public static WorkerLogLevelOverride forClass(Class<?> klass, Level level) {
       Preconditions.checkNotNull(klass, "Expected class to be not null.");
-      addOverrideForName(klass.getName(), level);
-      return this;
+      return forName(klass.getName(), level);
     }
 
     /**
      * Overrides the default log level for the passed in package.
      * <p>
-     * This is equivalent to calling
-     * {@link #addOverrideForName(String, DataflowWorkerLoggingOptions.Level)}
+     * This is equivalent to calling {@link #forName(String, DataflowWorkerLoggingOptions.Level)}
      * and passing in the {@link Package#getName() package name}.
      */
-    public WorkerLogLevelOverrides addOverrideForPackage(Package pkg, Level level) {
+    public static WorkerLogLevelOverride forPackage(Package pkg, Level level) {
       Preconditions.checkNotNull(pkg, "Expected package to be not null.");
-      addOverrideForName(pkg.getName(), level);
-      return this;
+      return forName(pkg.getName(), level);
     }
 
     /**
@@ -120,36 +116,56 @@ public WorkerLogLevelOverrides addOverrideForPackage(Package pkg, Level level) {
      * override the log level of all loggers that have the passed in name or
      * a parent logger that has the passed in name.
      */
-    public WorkerLogLevelOverrides addOverrideForName(String name, Level level) {
+    public static WorkerLogLevelOverride forName(String name, Level level) {
       Preconditions.checkNotNull(name, "Expected name to be not null.");
       Preconditions.checkNotNull(level,
           "Expected level to be one of %s.", Arrays.toString(Level.values()));
-      put(name, level);
-      return this;
+      return new WorkerLogLevelOverride(name, level);
     }
 
     /**
-     * Expects a map keyed by logger {@code Name}s with values representing {@code Level}s.
-     * The {@code Name} generally represents the fully qualified Java
-     * {@link Class#getName() class name}, or fully qualified Java
-     * {@link Package#getName() package name}, or custom logger name. The {@code Level}
-     * represents the log level and must be one of {@link Level}.
+     * Expects a value of the form {@code Name#Level}. The {@code Name} generally
+     * represents the fully qualified Java {@link Class#getName() class name},
+     * or fully qualified Java {@link Package#getName() package name}, or custom
+     * logger name. The {@code Level} represents the log level and must be one
+     * of {@link Level}.
      */
     @JsonCreator
-    public static WorkerLogLevelOverrides from(Map<String, String> values) {
-      Preconditions.checkNotNull(values, "Expected values to be not null.");
-      WorkerLogLevelOverrides overrides = new WorkerLogLevelOverrides();
-      for (Map.Entry<String, String> entry : values.entrySet()) {
-        try {
-          overrides.addOverrideForName(entry.getKey(), Level.valueOf(entry.getValue()));
-        } catch (IllegalArgumentException e) {
-          throw new IllegalArgumentException(String.format(
-              "Unsupported log level '%s' requested for %s. Must be one of %s.",
-              entry.getValue(), entry.getKey(), Arrays.toString(Level.values())));
-        }
-
+    public static WorkerLogLevelOverride create(String value) {
+      Preconditions.checkNotNull(value, "Expected value to be not null.");
+      Preconditions.checkArgument(value.contains(SEPARATOR),
+          "Expected '#' separator but none found within '%s'.", value);
+      String[] parts = value.split(SEPARATOR, 2);
+      Level level;
+      try {
+        level = Level.valueOf(parts[1]);
+      } catch (IllegalArgumentException e) {
+        throw new IllegalArgumentException(String.format(
+            "Unsupported log level '%s' requested. Must be one of %s.",
+            parts[1], Arrays.toString(Level.values())));
       }
-      return overrides;
+      return forName(parts[0], level);
+    }
+
+    private final String name;
+    private final Level level;
+    private WorkerLogLevelOverride(String name, Level level) {
+      this.name = name;
+      this.level = level;
+    }
+
+    public String getName() {
+      return name;
+    }
+
+    public Level getLevel() {
+      return level;
+    }
+
+    @JsonValue
+    @Override
+    public String toString() {
+      return name + SEPARATOR + level;
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
index 7324f19d8ddb4..a0eaf586454f4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
@@ -19,10 +19,14 @@
 import com.google.api.client.googleapis.services.AbstractGoogleClient;
 import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
 import com.google.api.client.googleapis.services.GoogleClientRequestInitializer;
+import com.google.common.base.Preconditions;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonValue;
 
 import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 /**
  * These options configure debug settings for Google API clients created within the Dataflow SDK.
@@ -32,50 +36,86 @@ public interface GoogleApiDebugOptions extends PipelineOptions {
    * This option enables tracing of API calls to Google services used within the Dataflow SDK.
    */
   @Description("This option enables tracing of API calls to Google services used within the "
-      + "Dataflow SDK. Values are expected in JSON format {\"ApiName\":\"TraceDestination\",...} "
-      + "where the ApiName represents the request classes canonical name. The TraceDestination is "
-      + "a logical trace consumer to whom the trace will be reported. Typically, \"producer\" is "
+      + "Dataflow SDK. Values are expected in the format \"ApiName#TraceDestination\" where the "
+      + "ApiName represents the request classes canonical name. The TraceDestination is a "
+      + "logical trace consumer to whom the trace will be reported. Typically, \"producer\" is "
       + "the right destination to use: this makes API traces available to the team offering the "
       + "API. Note that by enabling this option, the contents of the requests to and from "
       + "Google Cloud services will be made available to Google. For example, by specifying "
-      + "{\"Dataflow\":\"producer\"}, all calls to the Dataflow service will be made available to "
+      + "\"Dataflow#producer\", all calls to the Dataflow service will be made available to "
       + "Google, specifically to the Google Cloud Dataflow team.")
-  GoogleApiTracer getGoogleApiTrace();
-  void setGoogleApiTrace(GoogleApiTracer commands);
+  GoogleApiTracer[] getGoogleApiTrace();
+  void setGoogleApiTrace(GoogleApiTracer... commands);
 
   /**
    * A {@link GoogleClientRequestInitializer} that adds the trace destination to Google API calls.
    */
-  public static class GoogleApiTracer extends HashMap<String, String>
-      implements GoogleClientRequestInitializer {
-    private static final long serialVersionUID = 0;
-
+  public static class GoogleApiTracer implements GoogleClientRequestInitializer {
+    private static final Pattern COMMAND_LINE_PATTERN = Pattern.compile("([^#]*)#(.*)");
     /**
      * Creates a {@link GoogleApiTracer} that sets the trace destination on all
      * calls that match the given client type.
      */
-    public GoogleApiTracer addTraceFor(AbstractGoogleClient client, String traceDestination) {
-      put(client.getClass().getCanonicalName(), traceDestination);
-      return this;
+    public static GoogleApiTracer create(AbstractGoogleClient client, String traceDestination) {
+      return new GoogleApiTracer(client.getClass().getCanonicalName(), traceDestination);
     }
 
     /**
      * Creates a {@link GoogleApiTracer} that sets the trace {@code traceDestination} on all
      * calls that match for the given request type.
      */
-    public GoogleApiTracer addTraceFor(
+    public static GoogleApiTracer create(
         AbstractGoogleClientRequest<?> request, String traceDestination) {
-      put(request.getClass().getCanonicalName(), traceDestination);
-      return this;
+      return new GoogleApiTracer(request.getClass().getCanonicalName(), traceDestination);
+    }
+
+    /**
+     * Creates a {@link GoogleClientRequestInitializer} that adds the trace destination
+     * based upon the passed in value.
+     * <p>
+     * The {@code value} represents a string containing {@code ApiName#TraceDestination}.
+     * The {@code ApiName} is used to match against the request class
+     * {@link Class#getCanonicalName() canonical name} to determine the requests to which the
+     * {@code TraceDestination} should be added.
+     * <p>
+     * For example, to match:
+     * <ul>
+     *   <li>all Google API calls: {@code #TraceDestination}
+     *   <li>all Dataflow API calls: {@code Dataflow#TraceDestination}
+     *   <li>all Dataflow V1B3 API calls: {@code Dataflow.V1b3#TraceDestination}
+     *   <li>all Dataflow V1B3 Jobs API calls: {@code Dataflow.V1b3.Projects.Jobs#TraceDestination}
+     *   <li>all Dataflow V1B3 Jobs Get calls:
+     *       {@code Dataflow.V1b3.Projects.Jobs.Get#TraceDestination}
+     *   <li>all Job creation calls in any version: {@code Jobs.Create#TraceDestination}
+     * </ul>
+     */
+    @JsonCreator
+    public static GoogleApiTracer create(String value) {
+      Matcher matcher = COMMAND_LINE_PATTERN.matcher(value);
+      Preconditions.checkArgument(matcher.find() && matcher.groupCount() == 2,
+          "Unable to parse '%s', expected format 'ClientRequestName#TraceDestination'", value);
+      return new GoogleApiTracer(matcher.group(1), matcher.group(2));
+    }
+
+    private final String clientRequestName;
+    private final String traceDestination;
+
+    private GoogleApiTracer(String clientRequestName, String traceDestination) {
+      this.clientRequestName = clientRequestName;
+      this.traceDestination = traceDestination;
     }
 
     @Override
     public void initialize(AbstractGoogleClientRequest<?> request) throws IOException {
-      for (Map.Entry<String, String> entry : this.entrySet()) {
-        if (request.getClass().getCanonicalName().contains(entry.getKey())) {
-          request.set("$trace", entry.getValue());
-        }
+      if (request.getClass().getCanonicalName().contains(clientRequestName)) {
+        request.set("$trace", traceDestination);
       }
     }
+
+    @JsonValue
+    @Override
+    public String toString() {
+      return clientRequestName + "#" + traceDestination;
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index aaacc5e27b685..0e3af4d2180fe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -132,18 +132,14 @@ public static <T extends PipelineOptions> T as(Class<T> klass) {
    *   --readOnly (shorthand for boolean properties, will set the "readOnly" property to "true")
    *   --x=1 --x=2 --x=3 (list style property, will set the "x" property to [1, 2, 3])
    *   --x=1,2,3 (shorthand list style property, will set the "x" property to [1, 2, 3])
-   *   --complexObject='{"key1":"value1",...} (JSON format for all other complex types)
    * </pre>
+   * Properties are able to bound to {@link String} and Java primitives {@code boolean},
+   * {@code byte}, {@code short}, {@code int}, {@code long}, {@code float}, {@code double} and
+   * their primitive wrapper classes.
    * <p>
-   * Simple properties are able to bound to {@link String}, {@link Class}, enums and Java
-   * primitives {@code boolean}, {@code byte}, {@code short}, {@code int}, {@code long},
-   * {@code float}, {@code double} and their primitive wrapper classes.
-   * <p>
-   * Simple list style properties are able to be bound to {@code boolean[]}, {@code char[]},
+   * List style properties are able to be bound to {@code boolean[]}, {@code char[]},
    * {@code short[]}, {@code int[]}, {@code long[]}, {@code float[]}, {@code double[]},
-   * {@code Class[]}, enum arrays, {@code String[]}, and {@code List<String>}.
-   * <p>
-   * JSON format is required for all other types.
+   * {@code String[]} and {@code List<String>}.
    * <p>
    * By default, strict parsing is enabled and arguments must conform to be either
    * {@code --booleanArgName} or {@code --argName=argValue}. Strict parsing can be disabled with
@@ -203,18 +199,14 @@ private Builder(String[] args, boolean validation,
      *   --readOnly (shorthand for boolean properties, will set the "readOnly" property to "true")
      *   --x=1 --x=2 --x=3 (list style property, will set the "x" property to [1, 2, 3])
      *   --x=1,2,3 (shorthand list style property, will set the "x" property to [1, 2, 3])
-     *   --complexObject='{"key1":"value1",...} (JSON format for all other complex types)
      * </pre>
+     * Properties are able to bound to {@link String} and Java primitives {@code boolean},
+     * {@code byte}, {@code short}, {@code int}, {@code long}, {@code float}, {@code double} and
+     * their primitive wrapper classes.
      * <p>
-     * Simple properties are able to bound to {@link String}, {@link Class}, enums and Java
-     * primitives {@code boolean}, {@code byte}, {@code short}, {@code int}, {@code long},
-     * {@code float}, {@code double} and their primitive wrapper classes.
-     * <p>
-     * Simple list style properties are able to be bound to {@code boolean[]}, {@code char[]},
+     * List style properties are able to be bound to {@code boolean[]}, {@code char[]},
      * {@code short[]}, {@code int[]}, {@code long[]}, {@code float[]}, {@code double[]},
-     * {@code Class[]}, enum arrays, {@code String[]}, and {@code List<String>}.
-     * <p>
-     * JSON format is required for all other types.
+     * {@code String[]} and {@code List<String>}.
      * <p>
      * By default, strict parsing is enabled and arguments must conform to be either
      * {@code --booleanArgName} or {@code --argName=argValue}. Strict parsing can be disabled with
@@ -418,23 +410,7 @@ Class<T> getProxyClass() {
     }
   }
 
-  private static final Set<Class<?>> SIMPLE_TYPES = ImmutableSet.<Class<?>>builder()
-      .add(boolean.class)
-      .add(Boolean.class)
-      .add(char.class)
-      .add(Character.class)
-      .add(short.class)
-      .add(Short.class)
-      .add(int.class)
-      .add(Integer.class)
-      .add(long.class)
-      .add(Long.class)
-      .add(float.class)
-      .add(Float.class)
-      .add(double.class)
-      .add(Double.class)
-      .add(String.class)
-      .add(Class.class).build();
+
   private static final Logger LOG = LoggerFactory.getLogger(PipelineOptionsFactory.class);
   @SuppressWarnings("rawtypes")
   private static final Class<?>[] EMPTY_CLASS_ARRAY = new Class[0];
@@ -1171,20 +1147,17 @@ public boolean apply(Method input) {
    *   --project=MyProject (simple property, will set the "project" property to "MyProject")
    *   --readOnly=true (for boolean properties, will set the "readOnly" property to "true")
    *   --readOnly (shorthand for boolean properties, will set the "readOnly" property to "true")
-   *   --x=1 --x=2 --x=3 (list style simple property, will set the "x" property to [1, 2, 3])
-   *   --x=1,2,3 (shorthand list style simple property, will set the "x" property to [1, 2, 3])
-   *   --complexObject='{"key1":"value1",...} (JSON format for all other complex types)
+   *   --x=1 --x=2 --x=3 (list style property, will set the "x" property to [1, 2, 3])
+   *   --x=1,2,3 (shorthand list style property, will set the "x" property to [1, 2, 3])
    * </pre>
    *
-   * <p> Simple properties are able to bound to {@link String}, {@link Class}, enums and Java
-   * primitives {@code boolean}, {@code byte}, {@code short}, {@code int}, {@code long},
-   * {@code float}, {@code double} and their primitive wrapper classes.
+   * <p> Properties are able to bound to {@link String} and Java primitives {@code boolean},
+   * {@code byte}, {@code short}, {@code int}, {@code long}, {@code float}, {@code double}
+   * and their primitive wrapper classes.
    *
-   * <p> Simple list style properties are able to be bound to {@code boolean[]}, {@code char[]},
+   * <p> List style properties are able to be bound to {@code boolean[]}, {@code char[]},
    * {@code short[]}, {@code int[]}, {@code long[]}, {@code float[]}, {@code double[]},
-   * {@code Class[]}, enum arrays, {@code String[]}, and {@code List<String>}.
-   *
-   * <p> JSON format is required for all other types.
+   * {@code String[]}, and {@code List<String>}.
    *
    * <p> If strict parsing is enabled, options must start with '--', and not have an empty argument
    * name or value based upon the positioning of the '='. Empty or null arguments will be ignored
@@ -1284,9 +1257,7 @@ public boolean apply(@Nullable String input) {
               "Unknown 'runner' specified '%s', supported pipeline runners %s",
               runner, Sets.newTreeSet(SUPPORTED_PIPELINE_RUNNERS.keySet()));
           convertedOptions.put("runner", SUPPORTED_PIPELINE_RUNNERS.get(runner));
-        } else if ((returnType.isArray() && (SIMPLE_TYPES.contains(returnType.getComponentType())
-                || returnType.getComponentType().isEnum()))
-            || Collection.class.isAssignableFrom(returnType)) {
+        } else if (returnType.isArray() || Collection.class.isAssignableFrom(returnType)) {
           // Split any strings with ","
           List<String> values = FluentIterable.from(entry.getValue())
               .transformAndConcat(new Function<String, Iterable<String>>() {
@@ -1304,22 +1275,12 @@ public Iterable<String> apply(String input) {
             }
           }
           convertedOptions.put(entry.getKey(), MAPPER.convertValue(values, type));
-        } else if (SIMPLE_TYPES.contains(returnType) || returnType.isEnum()) {
-          String value = Iterables.getOnlyElement(entry.getValue());
-          Preconditions.checkArgument(returnType.equals(String.class) || !value.isEmpty(),
-              "Empty argument value is only allowed for String, String Array, and Collection,"
-               + " but received: " + returnType);
-          convertedOptions.put(entry.getKey(), MAPPER.convertValue(value, type));
         } else {
           String value = Iterables.getOnlyElement(entry.getValue());
           Preconditions.checkArgument(returnType.equals(String.class) || !value.isEmpty(),
               "Empty argument value is only allowed for String, String Array, and Collection,"
                + " but received: " + returnType);
-          try {
-            convertedOptions.put(entry.getKey(), MAPPER.readValue(value, type));
-          } catch (IOException e) {
-            throw new IllegalArgumentException("Unable to parse JSON value " + value, e);
-          }
+          convertedOptions.put(entry.getKey(), MAPPER.convertValue(value, type));
         }
       } catch (IllegalArgumentException e) {
         if (strictParsing) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
index b936017108118..5e4b8353b6a2c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
@@ -24,12 +24,12 @@
 
 import com.google.api.client.util.Lists;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.WorkerLogLevelOverride;
 import com.google.common.collect.ImmutableBiMap;
 
 import java.io.File;
 import java.io.IOException;
 import java.util.List;
-import java.util.Map;
 import java.util.logging.FileHandler;
 import java.util.logging.Handler;
 import java.util.logging.Level;
@@ -117,10 +117,9 @@ public static synchronized void configure(DataflowWorkerLoggingOptions options)
     }
 
     if (options.getWorkerLogLevelOverrides() != null) {
-      for (Map.Entry<String, DataflowWorkerLoggingOptions.Level> loggerOverride :
-          options.getWorkerLogLevelOverrides().entrySet()) {
-        Logger logger = Logger.getLogger(loggerOverride.getKey());
-        logger.setLevel(LEVELS.inverse().get(loggerOverride.getValue()));
+      for (WorkerLogLevelOverride loggerOverride : options.getWorkerLogLevelOverrides()) {
+        Logger logger = Logger.getLogger(loggerOverride.getName());
+        logger.setLevel(LEVELS.inverse().get(loggerOverride.getLevel()));
         configuredLoggers.add(logger);
       }
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
index 6dee26e80eaca..5470ef7ff121b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
@@ -17,6 +17,9 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
+import com.google.api.client.googleapis.services.AbstractGoogleClient.Builder;
+import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
+import com.google.api.client.googleapis.services.GoogleClientRequestInitializer;
 import com.google.api.client.http.HttpTransport;
 import com.google.api.client.json.JsonFactory;
 import com.google.api.client.json.jackson2.JacksonFactory;
@@ -28,6 +31,7 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.GcsOptions;
+import com.google.common.base.MoreObjects;
 
 import java.io.IOException;
 import java.net.MalformedURLException;
@@ -97,7 +101,8 @@ private static ApiComponents apiComponentsFromUrl(String urlString) {
     return new Bigquery.Builder(getTransport(), getJsonFactory(),
         new RetryHttpRequestInitializer(options.getGcpCredential()))
         .setApplicationName(options.getAppName())
-        .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
+        .setGoogleClientRequestInitializer(
+            new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
   }
 
   /**
@@ -112,7 +117,8 @@ private static ApiComponents apiComponentsFromUrl(String urlString) {
         new RetryHttpRequestInitializer(options.getGcpCredential()))
         .setRootUrl(options.getPubsubRootUrl())
         .setApplicationName(options.getAppName())
-        .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
+        .setGoogleClientRequestInitializer(
+            new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
   }
 
   /**
@@ -133,7 +139,8 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
         .setApplicationName(options.getAppName())
         .setRootUrl(components.rootUrl)
         .setServicePath(components.servicePath)
-        .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
+        .setGoogleClientRequestInitializer(
+            new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
   }
 
   /**
@@ -144,7 +151,8 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
       newRawDataflowClient(DataflowPipelineOptions options) {
     return newDataflowClient(options)
         .setHttpRequestInitializer(options.getGcpCredential())
-        .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
+        .setGoogleClientRequestInitializer(
+            new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
   }
 
   /**
@@ -162,7 +170,8 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
             // logging it by default clutters the output during file staging.
             options.getGcpCredential(), Arrays.asList(404)))
         .setApplicationName(options.getAppName())
-        .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
+        .setGoogleClientRequestInitializer(
+            new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
     if (servicePath != null) {
       ApiComponents components = apiComponentsFromUrl(servicePath);
       storageBuilder.setRootUrl(components.rootUrl);
@@ -170,4 +179,26 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
     }
     return storageBuilder;
   }
+
+  /**
+   * Allows multiple {@link GoogleClientRequestInitializer}s to be chained together for use with
+   * {@link Builder}.
+   */
+  private static final class ChainedGoogleClientRequestInitializer
+      implements GoogleClientRequestInitializer {
+    private static final GoogleClientRequestInitializer[] EMPTY_ARRAY =
+        new GoogleClientRequestInitializer[]{};
+    private final GoogleClientRequestInitializer[] chain;
+
+    private ChainedGoogleClientRequestInitializer(GoogleClientRequestInitializer... initializer) {
+      this.chain = MoreObjects.firstNonNull(initializer, EMPTY_ARRAY);
+    }
+
+    @Override
+    public void initialize(AbstractGoogleClientRequest<?> request) throws IOException {
+      for (GoogleClientRequestInitializer initializer : chain) {
+        initializer.initialize(request);
+      }
+    }
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptionsTest.java
index 9fea097050552..050d995df0f2b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptionsTest.java
@@ -16,10 +16,13 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
+import static com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions.NameMap;
 import static org.hamcrest.Matchers.hasEntry;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 
+import com.fasterxml.jackson.databind.ObjectMapper;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -27,14 +30,14 @@
 /** Tests for {@link DataflowPipelineDebugOptions}. */
 @RunWith(JUnit4.class)
 public class DataflowPipelineDebugOptionsTest {
+  private static final ObjectMapper MAPPER = new ObjectMapper();
+
   @Test
   public void testTransformNameMapping() throws Exception {
-    DataflowPipelineDebugOptions options = PipelineOptionsFactory
-        .fromArgs(new String[]{"--transformNameMapping={\"a\":\"b\",\"foo\":\"\",\"bar\":\"baz\"}"})
-        .as(DataflowPipelineDebugOptions.class);
-    assertEquals(3, options.getTransformNameMapping().size());
-    assertThat(options.getTransformNameMapping(), hasEntry("a", "b"));
-    assertThat(options.getTransformNameMapping(), hasEntry("foo", ""));
-    assertThat(options.getTransformNameMapping(), hasEntry("bar", "baz"));
+    NameMap map = MAPPER.convertValue("a=b;foo=;bar=baz", NameMap.class);
+    assertEquals(3, map.size());
+    assertThat(map, hasEntry("a", "b"));
+    assertThat(map, hasEntry("foo", ""));
+    assertThat(map, hasEntry("bar", "baz"));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptionsTest.java
index 82ec48a5f5ba5..fffef0e888f28 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptionsTest.java
@@ -18,8 +18,7 @@
 import static com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.Level.WARN;
 import static org.junit.Assert.assertEquals;
 
-import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.WorkerLogLevelOverrides;
-import com.google.common.collect.ImmutableMap;
+import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.WorkerLogLevelOverride;
 
 import com.fasterxml.jackson.databind.ObjectMapper;
 
@@ -39,35 +38,33 @@ public class DataflowWorkerLoggingOptionsTest {
   public void testWorkerLogLevelOverrideWithInvalidLogLevel() {
     expectedException.expect(IllegalArgumentException.class);
     expectedException.expectMessage("Unsupported log level");
-    WorkerLogLevelOverrides.from(ImmutableMap.of("Name", "FakeLevel"));
+    WorkerLogLevelOverride.create("Name#FakeLevel");
   }
 
   @Test
-  public void testWorkerLogLevelOverrideForClass() throws Exception {
-    assertEquals("{\"org.junit.Test\":\"WARN\"}",
-        MAPPER.writeValueAsString(
-            new WorkerLogLevelOverrides().addOverrideForClass(Test.class, WARN)));
+  public void testWorkerLogLevelOverrideForClass() {
+    assertEquals("org.junit.Test#WARN",
+        MAPPER.convertValue(WorkerLogLevelOverride.forClass(Test.class, WARN), String.class));
   }
 
   @Test
-  public void testWorkerLogLevelOverrideForPackage() throws Exception {
-    assertEquals("{\"org.junit\":\"WARN\"}",
-        MAPPER.writeValueAsString(
-            new WorkerLogLevelOverrides().addOverrideForPackage(Test.class.getPackage(), WARN)));
+  public void testWorkerLogLevelOverrideForPackage() {
+    assertEquals("org.junit#WARN",
+        MAPPER.convertValue(
+            WorkerLogLevelOverride.forPackage(Test.class.getPackage(), WARN), String.class));
   }
 
   @Test
-  public void testWorkerLogLevelOverrideForName() throws Exception {
-    assertEquals("{\"A\":\"WARN\"}",
-        MAPPER.writeValueAsString(
-            new WorkerLogLevelOverrides().addOverrideForName("A", WARN)));
+  public void testWorkerLogLevelOverrideForName() {
+    assertEquals("A#WARN",
+        MAPPER.convertValue(WorkerLogLevelOverride.forName("A", WARN), String.class));
   }
 
   @Test
-  public void testSerializationAndDeserializationOf() throws Exception {
-    String testValue = "{\"A\":\"WARN\"}";
+  public void testSerializationAndDeserializationOf() {
+    String testValue = "A#WARN";
     assertEquals(testValue,
-        MAPPER.writeValueAsString(
-            MAPPER.readValue(testValue, WorkerLogLevelOverrides.class)));
+        MAPPER.convertValue(
+            MAPPER.convertValue(testValue, WorkerLogLevelOverride.class), String.class));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
index 3a16cf5dee251..375306cd40493 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
@@ -38,8 +38,7 @@
 public class GoogleApiDebugOptionsTest {
   @Test
   public void testWhenTracingMatches() throws Exception {
-    String[] args =
-        new String[] {"--googleApiTrace={\"Projects.Jobs.Get\":\"GetTraceDestination\"}"};
+    String[] args = new String[] {"--googleApiTrace=Projects.Jobs.Get#GetTraceDestination"};
     DataflowPipelineOptions options =
         PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
@@ -53,7 +52,7 @@ public void testWhenTracingMatches() throws Exception {
 
   @Test
   public void testWhenTracingDoesNotMatch() throws Exception {
-    String[] args = new String[] {"--googleApiTrace={\"Projects.Jobs.Create\":\"testToken\"}"};
+    String[] args = new String[] {"--googleApiTrace=Projects.Jobs.Create#testToken"};
     DataflowPipelineOptions options =
         PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
@@ -68,8 +67,8 @@ public void testWhenTracingDoesNotMatch() throws Exception {
   @Test
   public void testWithMultipleTraces() throws Exception {
     String[] args = new String[] {
-        "--googleApiTrace={\"Projects.Jobs.Create\":\"CreateTraceDestination\","
-        + "\"Projects.Jobs.Get\":\"GetTraceDestination\"}"};
+        "--googleApiTrace=Projects.Jobs.Create#CreateTraceDestination,"
+        + "Projects.Jobs.Get#GetTraceDestination"};
     DataflowPipelineOptions options =
         PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
@@ -87,7 +86,7 @@ public void testWithMultipleTraces() throws Exception {
 
   @Test
   public void testMatchingAllDataflowCalls() throws Exception {
-    String[] args = new String[] {"--googleApiTrace={\"Dataflow\":\"TraceDestination\"}"};
+    String[] args = new String[] {"--googleApiTrace=Dataflow#TraceDestination"};
     DataflowPipelineOptions options =
         PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
@@ -107,8 +106,8 @@ public void testMatchingAllDataflowCalls() throws Exception {
   public void testMatchingAgainstClient() throws Exception {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
-    options.setGoogleApiTrace(new GoogleApiTracer().addTraceFor(
-        Transport.newDataflowClient(options).build(), "TraceDestination"));
+    options.setGoogleApiTrace(new GoogleApiTracer[] {
+        GoogleApiTracer.create(Transport.newDataflowClient(options).build(), "TraceDestination")});
 
     Get getRequest =
         options.getDataflowClient().projects().jobs().get("testProjectId", "testJobId");
@@ -123,9 +122,9 @@ public void testMatchingAgainstClient() throws Exception {
   public void testMatchingAgainstRequestType() throws Exception {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
-    options.setGoogleApiTrace(new GoogleApiTracer().addTraceFor(
+    options.setGoogleApiTrace(new GoogleApiTracer[] {GoogleApiTracer.create(
         Transport.newDataflowClient(options).build().projects().jobs()
-            .get("aProjectId", "aJobId"), "TraceDestination"));
+            .get("aProjectId", "aJobId"), "TraceDestination")});
 
     Get getRequest =
         options.getDataflowClient().projects().jobs().get("testProjectId", "testJobId");
@@ -137,11 +136,11 @@ public void testMatchingAgainstRequestType() throws Exception {
   }
 
   @Test
-  public void testDeserializationAndSerializationOfGoogleApiTracer() throws Exception {
-    String serializedValue = "{\"Api\":\"Token\"}";
+  public void testDeserializationAndSerializationOfGoogleApiTracer() {
+    String serializedValue = "Api#Token";
     ObjectMapper objectMapper = new ObjectMapper();
     assertEquals(serializedValue,
-        objectMapper.writeValueAsString(
-            objectMapper.readValue(serializedValue, GoogleApiTracer.class)));
+        objectMapper.convertValue(
+            objectMapper.convertValue(serializedValue, GoogleApiTracer.class), String.class));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index 972ceab58ab1b..895c1e94ae272 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -35,7 +35,6 @@
 import com.google.common.collect.ListMultimap;
 
 import com.fasterxml.jackson.annotation.JsonIgnore;
-import com.fasterxml.jackson.annotation.JsonProperty;
 
 import org.junit.Rule;
 import org.junit.Test;
@@ -47,7 +46,6 @@
 import java.io.ByteArrayOutputStream;
 import java.io.PrintStream;
 import java.util.List;
-import java.util.Map;
 
 /** Tests for {@link PipelineOptionsFactory}. */
 @RunWith(JUnit4.class)
@@ -325,11 +323,6 @@ public void testEmptyValueNotAllowed() {
     PipelineOptionsFactory.fromArgs(args).as(Primitives.class);
   }
 
-  /** Enum used for testing PipelineOptions CLI parsing. */
-  public enum TestEnum {
-    Value, Value2
-  }
-
   /** A test interface containing all supported objects. */
   public static interface Objects extends PipelineOptions {
     Boolean getBoolean();
@@ -354,8 +347,6 @@ public static interface Objects extends PipelineOptions {
     void setEmptyString(String value);
     Class<?> getClassValue();
     void setClassValue(Class<?> value);
-    TestEnum getEnum();
-    void setEnum(TestEnum value);
   }
 
   @Test
@@ -371,8 +362,7 @@ public void testObjects() {
         "--double=12.3",
         "--string=stringValue",
         "--emptyString=",
-        "--classValue=" + PipelineOptionsFactoryTest.class.getName(),
-        "--enum=" + TestEnum.Value};
+        "--classValue=" + PipelineOptionsFactoryTest.class.getName()};
 
     Objects options = PipelineOptionsFactory.fromArgs(args).as(Objects.class);
     assertTrue(options.getBoolean());
@@ -386,37 +376,6 @@ public void testObjects() {
     assertEquals("stringValue", options.getString());
     assertTrue(options.getEmptyString().isEmpty());
     assertEquals(PipelineOptionsFactoryTest.class, options.getClassValue());
-    assertEquals(TestEnum.Value, options.getEnum());
-  }
-
-  /** A test class for verifying JSON -> Object conversion. */
-  public static class ComplexType {
-    String value;
-    String value2;
-    public ComplexType(@JsonProperty("key") String value, @JsonProperty("key2") String value2) {
-      this.value = value;
-      this.value2 = value2;
-    }
-  }
-
-  /** A test interface for verifying JSON -> complex type conversion. */
-  interface ComplexTypes extends PipelineOptions {
-    Map<String, String> getMap();
-    void setMap(Map<String, String> value);
-
-    ComplexType getObject();
-    void setObject(ComplexType value);
-  }
-
-  @Test
-  public void testComplexTypes() {
-    String[] args = new String[] {
-        "--map={\"key\":\"value\",\"key2\":\"value2\"}",
-        "--object={\"key\":\"value\",\"key2\":\"value2\"}"};
-    ComplexTypes options = PipelineOptionsFactory.fromArgs(args).as(ComplexTypes.class);
-    assertEquals(ImmutableMap.of("key", "value", "key2", "value2"), options.getMap());
-    assertEquals("value", options.getObject().value);
-    assertEquals("value2", options.getObject().value2);
   }
 
   @Test
@@ -447,8 +406,6 @@ public static interface Arrays extends PipelineOptions {
     void setString(String[] value);
     Class<?>[] getClassValue();
     void setClassValue(Class<?>[] value);
-    TestEnum[] getEnum();
-    void setEnum(TestEnum[] value);
   }
 
   @Test
@@ -480,9 +437,7 @@ public void testArrays() {
         "--string=stringValue2",
         "--string=stringValue3",
         "--classValue=" + PipelineOptionsFactory.class.getName(),
-        "--classValue=" + PipelineOptionsFactoryTest.class.getName(),
-        "--enum=" + TestEnum.Value,
-        "--enum=" + TestEnum.Value2};
+        "--classValue=" + PipelineOptionsFactoryTest.class.getName()};
 
     Arrays options = PipelineOptionsFactory.fromArgs(args).as(Arrays.class);
     boolean[] bools = options.getBoolean();
@@ -498,7 +453,6 @@ public void testArrays() {
     assertArrayEquals(new Class[] {PipelineOptionsFactory.class,
                                    PipelineOptionsFactoryTest.class},
         options.getClassValue());
-    assertArrayEquals(new TestEnum[] {TestEnum.Value, TestEnum.Value2}, options.getEnum());
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
index 1b8cba7b0d34e..5df30c4ba6b3f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
@@ -20,7 +20,7 @@
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions;
-import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.WorkerLogLevelOverrides;
+import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.WorkerLogLevelOverride;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 
 import org.junit.After;
@@ -75,9 +75,11 @@ public void testWithConfigurationOverride() {
   public void testWithCustomLogLevels() {
     DataflowWorkerLoggingOptions options =
         PipelineOptionsFactory.as(DataflowWorkerLoggingOptions.class);
-    options.setWorkerLogLevelOverrides(new WorkerLogLevelOverrides()
-        .addOverrideForName("A", DataflowWorkerLoggingOptions.Level.DEBUG)
-        .addOverrideForName("B", DataflowWorkerLoggingOptions.Level.ERROR));
+    options.setWorkerLogLevelOverrides(
+        new WorkerLogLevelOverride[] {
+            WorkerLogLevelOverride.forName("A", DataflowWorkerLoggingOptions.Level.DEBUG),
+            WorkerLogLevelOverride.forName("B", DataflowWorkerLoggingOptions.Level.ERROR),
+        });
 
     DataflowWorkerLoggingInitializer.initialize();
     DataflowWorkerLoggingInitializer.configure(options);

From 5dea2fa0e6d5351142a461c4168ebbbbdf019da1 Mon Sep 17 00:00:00 2001
From: mariand <mariand@google.com>
Date: Thu, 30 Jul 2015 10:55:04 -0700
Subject: [PATCH 0838/1541] Fixed the description for --stableUniqueNames

The flag is not necessarily related to streaming pipelines only.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99491934
---
 .../com/google/cloud/dataflow/sdk/options/PipelineOptions.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
index 6a81662adc810..447808768842b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
@@ -232,7 +232,7 @@ public static enum CheckEnabled {
 
   @Validation.Required
   @Description("Whether to check for stable unique names on each stage. This is necessary to "
-      + "support updating streaming pipelines.")
+      + "support updating of pipelines.")
   @Default.Enum("WARNING")
   CheckEnabled getStableUniqueNames();
   void setStableUniqueNames(CheckEnabled enabled);

From e5b7eb7141947041229c14a52c380d374f6f347e Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Thu, 30 Jul 2015 11:48:27 -0700
Subject: [PATCH 0839/1541] Add a Coder<AccumT> to the serialization of
 CombineFns

This use the complete CoderRegistry from pipeline construction, rather
than trying to figure out the coder on the worker.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99498047
---
 .../dataflow/sdk/coders/CoderRegistry.java    |  1 +
 .../runners/DataflowPipelineTranslator.java   | 19 ++---
 .../sdk/runners/DirectPipelineRunner.java     | 15 ++--
 .../sdk/runners/worker/CombineValuesFn.java   | 10 +--
 .../worker/GroupAlsoByWindowsParDoFn.java     | 54 ++++++++----
 .../worker/MapTaskExecutorFactory.java        |  4 +-
 .../dataflow/sdk/transforms/Combine.java      | 69 +++++++++------
 .../sdk/transforms/windowing/AfterPane.java   |  2 +-
 .../sdk/transforms/windowing/TimeTrigger.java |  2 +-
 .../dataflow/sdk/util/AppliedCombineFn.java   | 84 +++++++++++++++++++
 .../sdk/util/GroupAlsoByWindowsDoFn.java      | 10 +--
 .../util/StreamingGroupAlsoByWindowsDoFn.java |  8 +-
 .../dataflow/sdk/util/SystemReduceFn.java     | 11 ++-
 .../dataflow/sdk/util/state/StateTags.java    | 43 ++++++----
 .../runners/worker/CombineValuesFnTest.java   | 16 ++--
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  | 21 +++--
 .../StreamingGroupAlsoByWindowsDoFnTest.java  | 16 ++--
 .../dataflow/sdk/util/TriggerTester.java      | 11 ++-
 .../state/InMemoryStateInternalsTest.java     |  3 +-
 .../dataflow/sdk/util/state/StateTagTest.java | 18 ++--
 .../state/WindmillStateInternalsTest.java     |  3 +-
 21 files changed, 288 insertions(+), 132 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppliedCombineFn.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index b371752329158..47f03673a98d7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -62,6 +62,7 @@
  * </ul>
  */
 public class CoderRegistry implements CoderProvider {
+
   private static final Logger LOG = LoggerFactory.getLogger(CoderRegistry.class);
 
   public CoderRegistry() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index bda018e49fd88..a3169d3a55ab3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -39,7 +39,6 @@
 import com.google.api.services.dataflow.model.WorkerPool;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
@@ -67,6 +66,7 @@
 import com.google.cloud.dataflow.sdk.transforms.View;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.OutputReference;
@@ -810,17 +810,16 @@ private <K, InputT, OutputT> void translateHelper(
               DataflowPipelineTranslator.TranslationContext context) {
             context.addStep(transform, "CombineValues");
             context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
+
+            AppliedCombineFn<? super K, ? super InputT, ?, OutputT> fn =
+                transform.getAppliedFn(
+                    context.getInput(transform).getPipeline().getCoderRegistry(),
+                    context.getInput(transform).getCoder());
+
+            context.addEncodingInput(fn.getAccumulatorCoder());
             context.addInput(
                 PropertyNames.SERIALIZED_FN,
-                byteArrayToJsonString(serializeToByteArray(transform.getFn())));
-            try {
-              context.addEncodingInput(transform.getAccumulatorCoder(
-                  context.getInput(transform).getPipeline().getCoderRegistry(),
-                  context.getInput(transform)));
-            } catch (CannotProvideCoderException exc) {
-              throw new IllegalStateException(
-                "Could not determine coder for input to Combine.GroupedValues", exc);
-            }
+                byteArrayToJsonString(serializeToByteArray(fn)));
             context.addOutput(PropertyNames.OUTPUT, context.getOutput(transform));
           }
         });
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 7b6606ca4a2e4..d50a3571bbadf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -38,6 +38,7 @@
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
@@ -272,18 +273,12 @@ public static <K, InputT, AccumT, OutputT> TestCombineDoFn<K, InputT, AccumT, Ou
         boolean testSerializability,
         Random rand) {
 
-      Coder<AccumT> accumCoder;
-      try {
-        accumCoder = (Coder<AccumT>) transform.getAccumulatorCoder(
-            input.getPipeline().getCoderRegistry(), input);
-      } catch (CannotProvideCoderException exc) {
-        throw new IllegalArgumentException(
-          "Transform " + transform + " failed to provide a coder for its accumulator type");
-      }
+      AppliedCombineFn<? super K, ? super InputT, ?, OutputT> fn = transform.getAppliedFn(
+            input.getPipeline().getCoderRegistry(), input.getCoder());
 
       return new TestCombineDoFn(
-          transform.getFn(),
-          accumCoder,
+          fn.getFn(),
+          fn.getAccumulatorCoder(),
           testSerializability,
           rand);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
index 124e98c83b939..120f21d9fbaf7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
@@ -23,7 +23,9 @@
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
@@ -102,10 +104,8 @@ public ParDoFn create(
           SerializableUtils.deserializeFromByteArray(
               getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
               "serialized user fn");
-      Preconditions.checkArgument(
-          deserializedFn instanceof Combine.KeyedCombineFn);
-      Combine.KeyedCombineFn<?, ?, ?, ?> combineFn =
-          (Combine.KeyedCombineFn<?, ?, ?, ?>) deserializedFn;
+      Preconditions.checkArgument(deserializedFn instanceof AppliedCombineFn);
+      AppliedCombineFn<?, ?, ?, ?> combineFn = (AppliedCombineFn<?, ?, ?, ?>) deserializedFn;
 
       // Get the combine phase, default to ALL. (The implementation
       // doesn't have to split the combiner).
@@ -113,7 +113,7 @@ public ParDoFn create(
 
       return CombineValuesFn.of(
           options,
-          combineFn,
+          combineFn.getFn(),
           phase,
           stepName,
           transformName,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index da86e7b5d297d..a668d31bba638 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -22,13 +22,17 @@
 
 import com.google.api.services.dataflow.model.MultiOutputInfo;
 import com.google.api.services.dataflow.model.SideInputInfo;
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.ListCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.worker.CombineValuesFn.CombinePhase;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
@@ -104,14 +108,14 @@ public ParDoFn create(
       WindowingStrategy windowingStrategy = (WindowingStrategy) windowingStrategyObj;
 
       byte[] serializedCombineFn = getBytes(cloudUserFn, PropertyNames.COMBINE_FN, null);
-      KeyedCombineFn<?, ?, ?, ?> combineFn = null;
+      AppliedCombineFn<?, ?, ?, ?> combineFn = null;
       if (serializedCombineFn != null) {
         Object combineFnObj = SerializableUtils.deserializeFromByteArray(
             serializedCombineFn, "serialized combine fn");
         Preconditions.checkArgument(
-            combineFnObj instanceof KeyedCombineFn,
-            "unexpected kind of KeyedCombineFn: " + combineFnObj.getClass().getName());
-        combineFn = (KeyedCombineFn<?, ?, ?, ?>) combineFnObj;
+            combineFnObj instanceof AppliedCombineFn,
+            "unexpected kind of AppliedCombineFn: " + combineFnObj.getClass().getName());
+        combineFn = (AppliedCombineFn<?, ?, ?, ?>) combineFnObj;
       }
 
       Map<String, Object> inputCoderObject = getObject(cloudUserFn, PropertyNames.INPUT_CODER);
@@ -132,14 +136,14 @@ public ParDoFn create(
 
       boolean isStreamingPipeline = options.as(StreamingOptions.class).isStreaming();
 
-      KeyedCombineFn<?, ?, ?, ?> maybeMergingCombineFn = null;
+      @Nullable AppliedCombineFn<?, ?, ?, ?> maybeMergingCombineFn = null;
       if (combineFn != null) {
         String phase = getString(cloudUserFn, PropertyNames.PHASE, CombinePhase.ALL);
         Preconditions.checkArgument(
             phase.equals(CombinePhase.ALL) || phase.equals(CombinePhase.MERGE),
             "Unexpected phase: " + phase);
         if (phase.equals(CombinePhase.MERGE)) {
-          maybeMergingCombineFn = new MergingKeyedCombineFn<>(combineFn);
+          maybeMergingCombineFn = makeAppliedMergingFunction(combineFn);
         } else {
           maybeMergingCombineFn = combineFn;
         }
@@ -156,7 +160,7 @@ public ParDoFn create(
           executionContext,
           addCounterMutator);
     }
-  };
+  }
 
   @Override
   protected DoFnInfo<?, ?> getDoFnInfo() {
@@ -168,15 +172,15 @@ public ParDoFn create(
       boolean isStreamingPipeline,
       WindowingStrategy windowingStrategy,
       KvCoder kvCoder,
-      KeyedCombineFn maybeMergingCombineFn) {
+      @Nullable AppliedCombineFn maybeMergingCombineFn) {
+
     if (isStreamingPipeline) {
       if (maybeMergingCombineFn == null) {
         return StreamingGroupAlsoByWindowsDoFn.createForIterable(
             windowingStrategy, kvCoder.getValueCoder());
       } else {
         return StreamingGroupAlsoByWindowsDoFn.create(
-            windowingStrategy, maybeMergingCombineFn,
-            kvCoder.getKeyCoder(), kvCoder.getValueCoder());
+            windowingStrategy, maybeMergingCombineFn, kvCoder.getKeyCoder());
       }
     } else {
       if (maybeMergingCombineFn == null) {
@@ -184,19 +188,27 @@ public ParDoFn create(
             windowingStrategy, kvCoder.getValueCoder());
       } else {
         return GroupAlsoByWindowsDoFn.create(
-            windowingStrategy, maybeMergingCombineFn,
-            kvCoder.getKeyCoder(), kvCoder.getValueCoder());
+            windowingStrategy, maybeMergingCombineFn, kvCoder.getKeyCoder());
       }
     }
   }
 
+  private static <K, AccumT> AppliedCombineFn<K, AccumT, List<AccumT>, AccumT>
+  makeAppliedMergingFunction(AppliedCombineFn<K, ?, AccumT, ?> appliedFn) {
+    MergingKeyedCombineFn<K, AccumT> mergingCombineFn = new MergingKeyedCombineFn<>(appliedFn);
+    return AppliedCombineFn.<K, AccumT, List<AccumT>, AccumT>withAccumulatorCoder(
+        mergingCombineFn, ListCoder.of(appliedFn.getAccumulatorCoder()));
+  }
+
   static class MergingKeyedCombineFn<K, AccumT>
       extends KeyedCombineFn<K, AccumT, List<AccumT>, AccumT> {
 
     private static final long serialVersionUID = 0;
-    final KeyedCombineFn<K, ?, AccumT, ?> keyedCombineFn;
-    MergingKeyedCombineFn(KeyedCombineFn<K, ?, AccumT, ?> keyedCombineFn) {
-      this.keyedCombineFn = keyedCombineFn;
+
+    final AppliedCombineFn<K, ?, AccumT, ?> appliedCombineFn;
+
+    MergingKeyedCombineFn(AppliedCombineFn<K, ?, AccumT, ?> keyedCombineFn) {
+      this.appliedCombineFn = keyedCombineFn;
     }
     @Override
     public List<AccumT> createAccumulator(K key) {
@@ -219,16 +231,22 @@ public List<AccumT> mergeAccumulators(K key, Iterable<List<AccumT>> accumulators
     @Override
     public AccumT extractOutput(K key, List<AccumT> accumulator) {
       if (accumulator.size() == 0) {
-        return keyedCombineFn.createAccumulator(key);
+        return appliedCombineFn.getFn().createAccumulator(key);
       } else {
-        return keyedCombineFn.mergeAccumulators(key, accumulator);
+        return appliedCombineFn.getFn().mergeAccumulators(key, accumulator);
       }
     }
     private List<AccumT> mergeToSingleton(K key, Iterable<AccumT> accumulators) {
       List<AccumT> singleton = new ArrayList<>();
-      singleton.add(keyedCombineFn.mergeAccumulators(key, accumulators));
+      singleton.add(appliedCombineFn.getFn().mergeAccumulators(key, accumulators));
       return singleton;
     }
+
+    @Override
+    public Coder<List<AccumT>> getAccumulatorCoder(CoderRegistry registry, Coder<K> keyCoder,
+        Coder<AccumT> inputCoder) throws CannotProvideCoderException {
+      return ListCoder.of(appliedCombineFn.getAccumulatorCoder());
+    }
   }
 
   private final DoFn<?, ?> groupAlsoByWindowsDoFn;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index 0130d39dd634f..3bd43422307f3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -32,6 +32,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
@@ -237,6 +238,7 @@ static PartialGroupByKeyOperation createPartialGroupByKeyOperation(PipelineOptio
     return operation;
   }
 
+  @SuppressWarnings({"rawtypes", "unchecked"})
   static ValueCombiner createValueCombiner(PartialGroupByKeyInstruction pgbk) throws Exception {
     if (pgbk.getValueCombiningFn() == null) {
       return null;
@@ -245,7 +247,7 @@ static ValueCombiner createValueCombiner(PartialGroupByKeyInstruction pgbk) thro
     Object deserializedFn = SerializableUtils.deserializeFromByteArray(
         getBytes(CloudObject.fromSpec(pgbk.getValueCombiningFn()), PropertyNames.SERIALIZED_FN),
         "serialized combine fn");
-    return new ValueCombiner((Combine.KeyedCombineFn) deserializedFn);
+    return new ValueCombiner(((AppliedCombineFn) deserializedFn).getFn());
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index ed4e472c3b289..d2fefc2d0eb13 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -29,6 +29,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.common.CounterProvider;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -338,6 +340,7 @@ public static <K, InputT, OutputT> GroupedValues<K, InputT, OutputT> groupedValu
    * @param <OutputT> type of output values
    */
   public abstract static class CombineFn<InputT, AccumT, OutputT> implements Serializable {
+
     /**
      * Returns a new, mutable accumulator value, representing the
      * accumulation of zero input values.
@@ -611,14 +614,18 @@ public Coder<V> getDefaultOutputCoder(CoderRegistry registry, Coder<V> inputCode
       return inputCoder;
     }
 
+    /**
+     * Holds a single value value of type {@code V} which may or may not be present.
+     */
     private static class Holder<V> {
-      public V value;
-      public boolean present;
-      public Holder() { }
-      public Holder(V value) {
+      private V value;
+      private boolean present;
+      private Holder() { }
+      private Holder(V value) {
         set(value);
       }
-      public void set(V value) {
+
+      private void set(V value) {
         this.present = true;
         this.value = value;
       }
@@ -1042,6 +1049,7 @@ public final OutputT extractOutput(AccumT accumulator) {
    */
   public abstract static class KeyedCombineFn<K, InputT, AccumT, OutputT>
       implements Serializable {
+
     /**
      * Returns a new, mutable accumulator value representing the
      * accumulation of zero input values.
@@ -1665,11 +1673,29 @@ public PCollection<KV<K, OutputT>> apply(PCollection<KV<K, InputT>> input) {
     }
 
     private <AccumT> PCollection<KV<K, OutputT>> applyHelper(PCollection<KV<K, InputT>> input) {
+
       // Name the accumulator type.
       @SuppressWarnings("unchecked")
       final KeyedCombineFn<K, InputT, AccumT, OutputT> fn =
           (KeyedCombineFn<K, InputT, AccumT, OutputT>) this.fn;
 
+      if (!(input.getCoder() instanceof KvCoder)) {
+        throw new IllegalStateException(
+            "Expected input coder to be KvCoder, but was " + input.getCoder());
+      }
+
+      @SuppressWarnings("unchecked")
+      final KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) input.getCoder();
+      final Coder<AccumT> accumCoder;
+
+      try {
+        accumCoder = fn.getAccumulatorCoder(
+            input.getPipeline().getCoderRegistry(),
+            inputCoder.getKeyCoder(), inputCoder.getValueCoder());
+      } catch (CannotProvideCoderException e) {
+        throw new IllegalStateException("Unable to determine accumulator coder.", e);
+      }
+
       // A CombineFn's mergeAccumulator can be applied in a tree-like fashon.
       // Here we shard the key using an integer nonce, combine on that partial
       // set of values, then drop the nonce and do a final combine of the
@@ -1699,13 +1725,10 @@ public AccumT extractOutput(KV<K, Integer> key, AccumT accumulator) {
             public Coder<AccumT> getAccumulatorCoder(
                 CoderRegistry registry, Coder<KV<K, Integer>> keyCoder, Coder<InputT> inputCoder)
                 throws CannotProvideCoderException {
-              return fn.getAccumulatorCoder(
-                  registry, ((KvCoder<K, Integer>) keyCoder).getKeyCoder(), inputCoder);
+              return accumCoder;
             }
       };
 
-      @SuppressWarnings("unchecked")
-      final KvCoder<K, InputT> inputCoder = ((KvCoder<K, InputT>) input.getCoder());
       KeyedCombineFn<K, AccumT, AccumT, OutputT> hotPostCombine =
           new KeyedCombineFn<K, AccumT, AccumT, OutputT>() {
             @Override
@@ -1731,6 +1754,12 @@ public Coder<OutputT> getDefaultOutputCoder(
                 throws CannotProvideCoderException {
               return fn.getDefaultOutputCoder(registry, keyCoder, inputCoder.getValueCoder());
             }
+
+            @Override
+            public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<K> keyCoder,
+                Coder<AccumT> inputCoder) throws CannotProvideCoderException {
+              return accumCoder;
+            }
       };
 
       // Use the provided hotKeyFanout fn to split into "hot" and "cold" keys,
@@ -1853,7 +1882,7 @@ public static class GroupedValues<K, InputT, OutputT>
     private final KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn;
 
     private GroupedValues(KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
-      this.fn = fn;
+      this.fn = SerializableUtils.clone(fn);
     }
 
     /**
@@ -1886,12 +1915,14 @@ public void processElement(ProcessContext c) {
       return output;
     }
 
+    public AppliedCombineFn<? super K, ? super InputT, ?, OutputT> getAppliedFn(
+        CoderRegistry registry, Coder<? extends KV<K, ? extends Iterable<InputT>>> inputCoder) {
+      KvCoder<K, InputT> kvCoder = getKvCoder(inputCoder);
+      return AppliedCombineFn.withInputCoder(fn, registry, kvCoder);
+    }
+
     private KvCoder<K, InputT> getKvCoder(
         Coder<? extends KV<K, ? extends Iterable<InputT>>> inputCoder) {
-      /*
-      Coder<? extends KV<K, ? extends Iterable<InputT>>> inputCoder =
-          getInput().getCoder();
-          */
       if (!(inputCoder instanceof KvCoder)) {
         throw new IllegalStateException(
             "Combine.GroupedValues requires its input to use KvCoder");
@@ -1911,16 +1942,6 @@ private KvCoder<K, InputT> getKvCoder(
       return KvCoder.of(keyCoder, inputValueCoder);
     }
 
-    @SuppressWarnings("unchecked")
-    public Coder<?> getAccumulatorCoder(
-        CoderRegistry coderRegistry,
-        PCollection<? extends KV<K, ? extends Iterable<InputT>>> input)
-        throws CannotProvideCoderException {
-      KvCoder<K, InputT> kvCoder = getKvCoder(input.getCoder());
-      return ((KeyedCombineFn<K, InputT, ?, OutputT>) fn).getAccumulatorCoder(
-          coderRegistry, kvCoder.getKeyCoder(), kvCoder.getValueCoder());
-    }
-
     @Override
     public Coder<KV<K, OutputT>> getDefaultOutputCoder(
         PCollection<? extends KV<K, ? extends Iterable<InputT>>> input)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index 6bf61cdc01a50..85e292d7cbf3f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -44,7 +44,7 @@ public class AfterPane<W extends BoundedWindow> extends OnceTrigger<W>{
   private static final long serialVersionUID = 0L;
 
   private static final StateTag<CombiningValueState<Long, Long>> ELEMENTS_IN_PANE_TAG =
-      StateTags.makeSystemTagInternal(StateTags.combiningValue(
+      StateTags.makeSystemTagInternal(StateTags.combiningValueFromInputInternal(
           "count", VarLongCoder.of(), new Sum.SumLongFn()));
 
   private final int countElems;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
index 41be9fb2dd3ff..a0102fefb47c7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
@@ -43,7 +43,7 @@ public abstract class TimeTrigger<W extends BoundedWindow, T extends TimeTrigger
     extends OnceTrigger<W> {
 
   protected static final StateTag<CombiningValueState<Instant, Instant>> DELAYED_UNTIL_TAG =
-      StateTags.makeSystemTagInternal(StateTags.combiningValue(
+      StateTags.makeSystemTagInternal(StateTags.combiningValueFromInputInternal(
           "delayed", InstantCoder.of(), Min.MinFn.<Instant>naturalOrder()));
 
   private static final long serialVersionUID = 0L;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppliedCombineFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppliedCombineFn.java
new file mode 100644
index 0000000000000..cef5c59d3ccc2
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppliedCombineFn.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+
+import java.io.Serializable;
+
+/**
+ * A {@link KeyedCombineFn} with a fixed accumulator coder. This is created from a specific
+ * application of the {@link KeyedCombineFn}.
+ *
+ *  <p>Because the {@code AccumT} may reference {@code InputT}, the specific {@code Coder<AccumT>}
+ *  may depend on the {@code Coder<InputT>}.
+ *
+ * @param <K> type of keys
+ * @param <InputT> type of input values
+ * @param <AccumT> type of mutable accumulator values
+ * @param <OutputT> type of output values
+ */
+public class AppliedCombineFn<K, InputT, AccumT, OutputT> implements Serializable {
+
+  private static final long serialVersionUID = 0L;
+
+  private final KeyedCombineFn<K, InputT, AccumT, OutputT> fn;
+  private final Coder<AccumT> accumulatorCoder;
+
+  private AppliedCombineFn(
+      KeyedCombineFn<K, InputT, AccumT, OutputT> fn, Coder<AccumT> accumulatorCoder) {
+    this.fn = fn;
+    this.accumulatorCoder = accumulatorCoder;
+  }
+
+  public static <K, InputT, AccumT, OutputT> AppliedCombineFn<K, InputT, AccumT, OutputT>
+  withAccumulatorCoder(KeyedCombineFn<? super K, ? super InputT, AccumT, OutputT> fn,
+      Coder<AccumT> accumCoder) {
+    // Casting down the K and InputT is safe because they're only used as inputs.
+    @SuppressWarnings("unchecked")
+    KeyedCombineFn<K, InputT, AccumT, OutputT> clonedFn =
+        (KeyedCombineFn<K, InputT, AccumT, OutputT>) SerializableUtils.clone(fn);
+    return new AppliedCombineFn<>(clonedFn, accumCoder);
+  }
+
+  public static <K, InputT, AccumT, OutputT> AppliedCombineFn<K, InputT, AccumT, OutputT>
+  withInputCoder(KeyedCombineFn<? super K, ? super InputT, AccumT, OutputT> fn,
+      CoderRegistry registry, KvCoder<K, InputT> kvCoder) {
+    // Casting down the K and InputT is safe because they're only used as inputs.
+    @SuppressWarnings("unchecked")
+    KeyedCombineFn<K, InputT, AccumT, OutputT> clonedFn =
+        (KeyedCombineFn<K, InputT, AccumT, OutputT>) SerializableUtils.clone(fn);
+    try {
+      Coder<AccumT> accumulatorCoder = clonedFn.getAccumulatorCoder(
+          registry, kvCoder.getKeyCoder(), kvCoder.getValueCoder());
+      return new AppliedCombineFn<>(clonedFn, accumulatorCoder);
+    } catch (CannotProvideCoderException e) {
+      throw new IllegalStateException("Could not determine coder for accumulator", e);
+    }
+  }
+
+  public KeyedCombineFn<K, InputT, AccumT, OutputT> getFn() {
+    return fn;
+  }
+
+  public Coder<AccumT> getAccumulatorCoder() {
+    return accumulatorCoder;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index fc5b065c456e9..e35d9dd95e554 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -18,7 +18,6 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
@@ -65,17 +64,16 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
   GroupAlsoByWindowsDoFn<K, InputT, OutputT, W>
   create(
       final WindowingStrategy<?, W> windowingStrategy,
-      final KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn,
-      final Coder<K> keyCoder,
-      final Coder<InputT> inputCoder) {
+      final AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn,
+      final Coder<K> keyCoder) {
     Preconditions.checkNotNull(combineFn);
 
     @SuppressWarnings("unchecked")
     WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
     return GroupAlsoByWindowsAndCombineDoFn.isSupported(windowingStrategy)
-        ? new GroupAlsoByWindowsAndCombineDoFn<>(noWildcard.getWindowFn(), combineFn)
+        ? new GroupAlsoByWindowsAndCombineDoFn<>(noWildcard.getWindowFn(), combineFn.getFn())
         : new GABWViaOutputBufferDoFn<>(noWildcard,
-            SystemReduceFn.<K, InputT, OutputT, W>combining(keyCoder, inputCoder, combineFn));
+            SystemReduceFn.<K, InputT, AccumT, OutputT, W>combining(keyCoder, combineFn));
   }
 
   @SystemDoFnInternal
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
index c51e8aeca13b2..3cdb2df4235fc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
@@ -18,7 +18,6 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
@@ -41,12 +40,11 @@ public abstract class StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W exte
   public static <K, InputT, AccumT, OutputT, W extends BoundedWindow>
       StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> create(
           final WindowingStrategy<?, W> windowingStrategy,
-          final KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn,
-          final Coder<K> keyCoder,
-          final Coder<InputT> inputCoder) {
+          final AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn,
+          final Coder<K> keyCoder) {
     Preconditions.checkNotNull(combineFn);
     return new StreamingGABWViaWindowSetDoFn<>(windowingStrategy,
-        SystemReduceFn.<K, InputT, OutputT, W>combining(keyCoder, inputCoder, combineFn));
+        SystemReduceFn.<K, InputT, AccumT, OutputT, W>combining(keyCoder, combineFn));
   }
 
   public static <K, V, W extends BoundedWindow>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
index 70f55eda85bcd..d24a588ff8c6f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
@@ -17,7 +17,6 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
@@ -80,9 +79,8 @@ public ReduceFn<K, T, Iterable<T>, W> create(K key) {
    * values using a {@link CombineFn}.
    */
   public static
-  <K, InputT, OutputT, W extends BoundedWindow> Factory<K, InputT, OutputT, W> combining(
-      final Coder<K> keyCoder, final Coder<InputT> inputCoder,
-      final KeyedCombineFn<K, InputT, ?, OutputT> combineFn) {
+  <K, InputT, AccumT, OutputT, W extends BoundedWindow> Factory<K, InputT, OutputT, W> combining(
+      final Coder<K> keyCoder, final AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
     return new Factory<K, InputT, OutputT, W>() {
 
       private static final long serialVersionUID = 0L;
@@ -90,8 +88,9 @@ <K, InputT, OutputT, W extends BoundedWindow> Factory<K, InputT, OutputT, W> com
       @Override
       public ReduceFn<K, InputT, OutputT, W> create(K key) {
         StateTag<CombiningValueState<InputT, OutputT>> bufferTag =
-            StateTags.makeSystemTagInternal(StateTags.<InputT, OutputT>combiningValue(
-                BUFFER_NAME, inputCoder, combineFn.forKey(key, keyCoder)));
+            StateTags.makeSystemTagInternal(StateTags.<InputT, AccumT, OutputT>combiningValue(
+                BUFFER_NAME, combineFn.getAccumulatorCoder(),
+                combineFn.getFn().forKey(key, keyCoder)));
         return new SystemReduceFn<K, InputT, OutputT, W>(bufferTag);
       }
     };
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
index 54b0d1a9f656e..7b51711f34d08 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
@@ -31,6 +31,11 @@
 @Experimental(Kind.STATE)
 public class StateTags {
 
+  private static final CoderRegistry STANDARD_REGISTRY = new CoderRegistry();
+  static {
+    STANDARD_REGISTRY.registerStandardCoders();
+  }
+
   private enum StateKind {
     SYSTEM('s'),
     USER('u');
@@ -55,29 +60,35 @@ public static <T> StateTag<ValueState<T>> value(String id, Coder<T> valueCoder)
    * Create a state tag for values that use a {@link CombineFn} to automatically merge
    * multiple {@code InputT}s into a single {@code OutputT}.
    */
-  public static <InputT, OutputT> StateTag<CombiningValueState<InputT, OutputT>>
-  combiningValue(String id, Coder<InputT> inputCoder, CombineFn<InputT, ?, OutputT> combineFn) {
-    return combiningValueInternal(id, inputCoder, combineFn);
-  }
-
-  // TODO: This should use the CoderRegistry from the running pipelie to ensure that it picks up
-  // any custom Coders, but that CoderRegistry isn't currently available on the worker.
-  private static final CoderRegistry REGISTRY = new CoderRegistry();
-  static {
-    REGISTRY.registerStandardCoders();
+  public static <InputT, AccumT, OutputT> StateTag<CombiningValueState<InputT, OutputT>>
+  combiningValue(
+      String id, Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
+    return combiningValueInternal(id, accumCoder, combineFn);
   }
 
-  private static <InputT, AccumT, OutputT> StateTag<CombiningValueState<InputT, OutputT>>
-  combiningValueInternal(
+  /**
+   * Create a state tag for values that use a {@link CombineFn} to automatically merge
+   * multiple {@code InputT}s into a single {@code OutputT}.
+   *
+   * <p> This determines the {@code Coder<AccumT>} from the given {@code Coder<InputT>}, and
+   * should only be used to initialize static values.
+   */
+  public static <InputT, AccumT, OutputT> StateTag<CombiningValueState<InputT, OutputT>>
+  combiningValueFromInputInternal(
       String id, Coder<InputT> inputCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
-    Coder<AccumT> accumCoder;
     try {
-      accumCoder = combineFn.getAccumulatorCoder(REGISTRY, inputCoder);
+      Coder<AccumT> accumCoder = combineFn.getAccumulatorCoder(STANDARD_REGISTRY, inputCoder);
+      return combiningValueInternal(id, accumCoder, combineFn);
     } catch (CannotProvideCoderException e) {
-      throw new RuntimeException(
-          "Unable to determine accumulator coder for combineFn: " + combineFn.getClass(), e);
+      throw new IllegalArgumentException(
+          "Unable to determine accumulator coder for " + combineFn.getClass().getSimpleName()
+          + " from " + inputCoder, e);
     }
+  }
 
+  private static <InputT, AccumT, OutputT> StateTag<CombiningValueState<InputT, OutputT>>
+  combiningValueInternal(
+      String id, Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
     StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> internal =
         new CombiningValueStateTag<InputT, AccumT, OutputT>(
             StateKind.USER, id, accumCoder, combineFn);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
index 1cd8635ede209..1e638b4a5a2df 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
@@ -29,6 +29,7 @@
 import com.google.cloud.dataflow.sdk.coders.DeterministicStandardCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
@@ -184,14 +185,17 @@ public void process(Object outputElem) {
 
   @SuppressWarnings("rawtypes")
   private static ParDoFn createCombineValuesFn(
-      String phase, Combine.KeyedCombineFn combineFn) throws Exception {
+      String phase, Combine.KeyedCombineFn combineFn, Coder<?> accumCoder) throws Exception {
     // This partially mirrors the work that
     // com.google.cloud.dataflow.sdk.transforms.Combine.translateHelper
     // does, at least for the KeyedCombineFn. The phase is generated
     // by the back-end.
     CloudObject spec = CloudObject.forClassName("CombineValuesFn");
+    @SuppressWarnings("unchecked")
+    AppliedCombineFn appliedCombineFn =
+        AppliedCombineFn.withAccumulatorCoder(combineFn, accumCoder);
     addString(spec, PropertyNames.SERIALIZED_FN,
-        byteArrayToJsonString(serializeToByteArray(combineFn)));
+        byteArrayToJsonString(serializeToByteArray(appliedCombineFn)));
     addString(spec, PropertyNames.PHASE, phase);
 
     return parDoFnFactory.create(
@@ -215,7 +219,7 @@ public void testCombineValuesFnAll() throws Exception {
         (new MeanInts()).asKeyedFn();
 
     ParDoFn combineParDoFn = createCombineValuesFn(
-        CombineValuesFn.CombinePhase.ALL, combiner);
+        CombineValuesFn.CombinePhase.ALL, combiner, new CountSumCoder());
 
     combineParDoFn.startBundle(receiver);
     combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
@@ -243,7 +247,7 @@ public void testCombineValuesFnAdd() throws Exception {
         MeanInts.CountSum, String> combiner = mean.asKeyedFn();
 
     ParDoFn combineParDoFn = createCombineValuesFn(
-        CombineValuesFn.CombinePhase.ADD, combiner);
+        CombineValuesFn.CombinePhase.ADD, combiner, new CountSumCoder());
 
     combineParDoFn.startBundle(receiver);
     combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
@@ -271,7 +275,7 @@ public void testCombineValuesFnMerge() throws Exception {
         MeanInts.CountSum, String> combiner = mean.asKeyedFn();
 
     ParDoFn combineParDoFn = createCombineValuesFn(
-        CombineValuesFn.CombinePhase.MERGE, combiner);
+        CombineValuesFn.CombinePhase.MERGE, combiner, new CountSumCoder());
 
     combineParDoFn.startBundle(receiver);
     combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
@@ -303,7 +307,7 @@ public void testCombineValuesFnExtract() throws Exception {
         MeanInts.CountSum, String> combiner = mean.asKeyedFn();
 
     ParDoFn combineParDoFn = createCombineValuesFn(
-        CombineValuesFn.CombinePhase.EXTRACT, combiner);
+        CombineValuesFn.CombinePhase.EXTRACT, combiner, new CountSumCoder());
 
     combineParDoFn.startBundle(receiver);
     combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index b319310ce1799..9ea2d6496b4f0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -21,11 +21,12 @@
 
 import com.google.cloud.dataflow.sdk.TestUtils.KvMatcher;
 import com.google.cloud.dataflow.sdk.WindowMatchers;
-import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
@@ -181,12 +182,16 @@ public class GroupAlsoByWindowsDoFnTest {
     TupleTag<KV<String, Long>> outputTag = new TupleTag<>();
     CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
+
+    AppliedCombineFn<String, Long, ?, Long> appliedFn = AppliedCombineFn.withInputCoder(
+        combineFn.<String>asKeyedFn(), new CoderRegistry(),
+        KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()));
     DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>, KV<String, Long>> runner =
         makeRunner(
             outputTag,
             outputManager,
             WindowingStrategy.of(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))),
-            combineFn.<String>asKeyedFn());
+            appliedFn);
 
     runner.startBundle();
 
@@ -328,12 +333,15 @@ public class GroupAlsoByWindowsDoFnTest {
     TupleTag<KV<String, Long>> outputTag = new TupleTag<>();
     CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
+    AppliedCombineFn<String, Long, ?, Long> appliedFn = AppliedCombineFn.withInputCoder(
+        combineFn.<String>asKeyedFn(), new CoderRegistry(),
+        KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()));
     DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>, KV<String, Long>> runner =
         makeRunner(
             outputTag,
             outputManager,
             WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))),
-            combineFn.<String>asKeyedFn());
+            appliedFn);
     runner.startBundle();
 
     runner.processElement(WindowedValue.valueInEmptyWindows(
@@ -386,11 +394,10 @@ public class GroupAlsoByWindowsDoFnTest {
           TupleTag<KV<String, Long>> outputTag,
           DoFnRunner.OutputManager outputManager,
           WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
-          KeyedCombineFn<String, Long, ?, Long> combineFn) {
+          AppliedCombineFn<String, Long, ?, Long> combineFn) {
 
     GroupAlsoByWindowsDoFn<String, Long, Long, IntervalWindow> fn =
-        GroupAlsoByWindowsDoFn.create(
-            windowingStrategy, combineFn, StringUtf8Coder.of(), BigEndianLongCoder.of());
+        GroupAlsoByWindowsDoFn.create(windowingStrategy, combineFn, StringUtf8Coder.of());
 
     return makeRunner(outputTag, outputManager, windowingStrategy, fn);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index 947b614eef5a7..ed3ce999df2d8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -22,10 +22,11 @@
 
 import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
@@ -331,13 +332,19 @@ public Long extractOutput(Long accumulator) {
   @Test public void testSessionsCombine() throws Exception {
     TupleTag<KV<String, Long>> outputTag = new TupleTag<>();
     CombineFn<Long, ?, Long> combineFn = new SumLongs();
+    CoderRegistry registry = new CoderRegistry();
+    registry.registerStandardCoders();
+
+    AppliedCombineFn<String, Long, ?, Long> appliedCombineFn = AppliedCombineFn.withInputCoder(
+        combineFn.asKeyedFn(), registry, KvCoder.of(StringUtf8Coder.of(), BigEndianLongCoder.of()));
+
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     DoFnRunner<TimerOrElement<KV<String, Long>>, KV<String, Long>> runner =
         makeRunner(
             outputTag,
             outputManager,
             WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))),
-            combineFn.<String>asKeyedFn());
+            appliedCombineFn);
 
     Coder<IntervalWindow> windowCoder =
         Sessions.withGapDuration(Duration.millis(10)).windowCoder();
@@ -410,11 +417,10 @@ private DoFnRunner<TimerOrElement<KV<String, Long>>, KV<String, Long>> makeRunne
           TupleTag<KV<String, Long>> outputTag,
           DoFnRunner.OutputManager outputManager,
           WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
-          KeyedCombineFn<String, Long, ?, Long> combineFn) {
+          AppliedCombineFn<String, Long, ?, Long> combineFn) {
 
     StreamingGroupAlsoByWindowsDoFn<String, Long, Long, IntervalWindow> fn =
-        StreamingGroupAlsoByWindowsDoFn.create(
-            windowingStrategy, combineFn, StringUtf8Coder.of(), BigEndianLongCoder.of());
+        StreamingGroupAlsoByWindowsDoFn.create(windowingStrategy, combineFn, StringUtf8Coder.of());
 
     return makeRunner(outputTag, outputManager, windowingStrategy, fn);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 717541fa758c1..134f81add3ef6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -21,6 +21,7 @@
 import static org.junit.Assert.fail;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
@@ -136,10 +137,16 @@ TriggerTester<Integer, OutputT, W> combining(
         .withMode(mode)
         .withAllowedLateness(allowedDataLateness);
 
+    CoderRegistry registry = new CoderRegistry();
+    registry.registerStandardCoders();
+    AppliedCombineFn<String, Integer, AccumT, OutputT> fn =
+        AppliedCombineFn.<String, Integer, AccumT, OutputT>withInputCoder(
+            combineFn, registry, KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()));
+
     return new TriggerTester<Integer, OutputT, W>(
         strategy,
-        SystemReduceFn.<String, Integer, OutputT, W>combining(
-            StringUtf8Coder.of(), VarIntCoder.of(), combineFn).create(KEY),
+        SystemReduceFn.<String, Integer, AccumT, OutputT, W>combining(
+            StringUtf8Coder.of(), fn).create(KEY),
         outputCoder);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
index 43d3032b08a72..4b1e162d4e9cd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
@@ -44,7 +44,8 @@ public class InMemoryStateInternalsTest {
   private static final StateTag<ValueState<String>> STRING_VALUE_ADDR =
       StateTags.value("stringValue", StringUtf8Coder.of());
   private static final StateTag<CombiningValueState<Integer, Integer>> SUM_INTEGER_ADDR =
-      StateTags.combiningValue("sumInteger", VarIntCoder.of(), new Sum.SumIntegerFn());
+      StateTags.combiningValueFromInputInternal(
+          "sumInteger", VarIntCoder.of(), new Sum.SumIntegerFn());
   private static final StateTag<BagState<String>> STRING_BAG_ADDR =
       StateTags.bag("stringBag", StringUtf8Coder.of());
   private static final StateTag<WatermarkStateInternal> WATERMARK_BAG_ADDR =
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateTagTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateTagTest.java
index 3787228505c93..4705ef1b0dea9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateTagTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateTagTest.java
@@ -22,7 +22,9 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.transforms.Max;
+import com.google.cloud.dataflow.sdk.transforms.Max.MaxIntegerFn;
 import com.google.cloud.dataflow.sdk.transforms.Min;
+import com.google.cloud.dataflow.sdk.transforms.Min.MinIntegerFn;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -70,15 +72,17 @@ public void testWatermarkBagEquality() {
 
   @Test
   public void testCombiningValueEquality() {
-    Coder<Integer> coder1 = VarIntCoder.of();
-    Coder<Integer> coder2 = BigEndianIntegerCoder.of();
+    MaxIntegerFn maxFn = new Max.MaxIntegerFn();
+    Coder<Integer> input1 = VarIntCoder.of();
+    Coder<Integer> input2 = BigEndianIntegerCoder.of();
+    MinIntegerFn minFn = new Min.MinIntegerFn();
 
-    StateTag<?> fooCoder1Max1 = StateTags.combiningValue("foo", coder1, new Max.MaxIntegerFn());
-    StateTag<?> fooCoder1Max2 = StateTags.combiningValue("foo", coder1, new Max.MaxIntegerFn());
-    StateTag<?> fooCoder1Min = StateTags.combiningValue("foo", coder1, new Min.MinIntegerFn());
+    StateTag<?> fooCoder1Max1 = StateTags.combiningValueFromInputInternal("foo", input1, maxFn);
+    StateTag<?> fooCoder1Max2 = StateTags.combiningValueFromInputInternal("foo", input1, maxFn);
+    StateTag<?> fooCoder1Min = StateTags.combiningValueFromInputInternal("foo", input1, minFn);
 
-    StateTag<?> fooCoder2Max = StateTags.combiningValue("foo", coder2, new Max.MaxIntegerFn());
-    StateTag<?> barCoder1Max = StateTags.combiningValue("bar", coder1, new Max.MaxIntegerFn());
+    StateTag<?> fooCoder2Max = StateTags.combiningValueFromInputInternal("foo", input2, maxFn);
+    StateTag<?> barCoder1Max = StateTags.combiningValueFromInputInternal("bar", input1, maxFn);
 
     // Same name, coder and combineFn
     assertEquals(fooCoder1Max1, fooCoder1Max2);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
index 89e63e63b4a18..9122afab56a59 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
@@ -55,7 +55,8 @@ public class WindmillStateInternalsTest {
   private static final String STATE_FAMILY = "family";
 
   private static final StateTag<CombiningValueState<Integer, Integer>> COMBINING_ADDR =
-      StateTags.combiningValue("combining", VarIntCoder.of(), new Sum.SumIntegerFn());
+      StateTags.combiningValueFromInputInternal(
+          "combining", VarIntCoder.of(), new Sum.SumIntegerFn());
   private final Coder<int[]> accumCoder =
       new Sum.SumIntegerFn().getAccumulatorCoder(null, VarIntCoder.of());
 

From 439b0ff801295fa19741bad095d14bd3a509fc37 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Thu, 30 Jul 2015 12:42:59 -0700
Subject: [PATCH 0840/1541] Remove workerPoolType from pipelineOptions

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99503265
---
 .../sdk/options/DataflowPipelineWorkerPoolOptions.java | 10 ----------
 .../sdk/runners/DataflowPipelineTranslator.java        |  1 -
 2 files changed, 11 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index 40e8fd2c3a4ac..a69028d48cbfe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -171,16 +171,6 @@ public String getTeardownPolicyName() {
   List<String> getFilesToStage();
   void setFilesToStage(List<String> value);
 
-  /**
-   * Specifies what type of worker pool should be used.
-   * <p> This is an internal option, and should not be set when using the Dataflow service.
-   */
-  @Description("Specifies what type of worker pool should be used. Should never be modified when "
-      + "using the Dataflow service")
-  @Default.String("harness")
-  String getWorkerPoolType();
-  void setWorkerPoolType(String value);
-
   /**
    * Specifies what type of persistent disk should be used.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index a3169d3a55ab3..49f306810e41f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -405,7 +405,6 @@ public Job translate(List<DataflowPackage> packages) {
 
       WorkerPool workerPool = new WorkerPool();
 
-      workerPool.setKind(options.getWorkerPoolType());
       if (options.getTeardownPolicy() != null) {
         workerPool.setTeardownPolicy(options.getTeardownPolicy().getTeardownPolicyName());
       }

From ad7a34dba7a505847c687725101f28d9584d2594 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 30 Jul 2015 13:14:00 -0700
Subject: [PATCH 0841/1541] Re-add support for JSON -> object/map parsing

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99506278
---
 .../dataflow/examples/DebuggingWordCount.java |  17 ++-
 .../google/cloud/dataflow/sdk/Pipeline.java   |   5 -
 .../options/DataflowPipelineDebugOptions.java |  37 +-----
 .../options/DataflowWorkerLoggingOptions.java | 112 ++++++++----------
 .../sdk/options/GoogleApiDebugOptions.java    |  84 ++++---------
 .../sdk/options/PipelineOptionsFactory.java   |  79 ++++++++----
 .../DataflowWorkerLoggingInitializer.java     |   9 +-
 .../cloud/dataflow/sdk/util/Transport.java    |  41 +------
 .../DataflowPipelineDebugOptionsTest.java     |  17 ++-
 .../DataflowWorkerLoggingOptionsTest.java     |  35 +++---
 .../options/GoogleApiDebugOptionsTest.java    |  27 +++--
 .../options/PipelineOptionsFactoryTest.java   |  50 +++++++-
 .../DataflowWorkerLoggingInitializerTest.java |  10 +-
 13 files changed, 248 insertions(+), 275 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
index 514e433277ecd..6bc7185a5a7a5 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
@@ -72,13 +72,18 @@
  *
  * <p> Concept #2: Dataflow workers which execute user code are configured to log to Cloud
  * Logging by default at "INFO" log level and higher. One may override log levels for specific
- * logging namespaces by specifying
- * {@code --workerLogLevelOverrides=Name1#Level1,Name2#Level2,...}. For example, by specifying
- * {@code --workerLogLevelOverrides=com.google.cloud.dataflow.examples.DebuggingWordCount#DEBUG}
+ * logging namespaces by specifying:
+ * <pre><code>
+ *   --workerLogLevelOverrides={"Name1":"Level1","Name2":"Level2",...}
+ * </code></pre>
+ * For example, by specifying:
+ * <pre><code>
+ *   --workerLogLevelOverrides={"com.google.cloud.dataflow.examples":"DEBUG"}
+ * </code></pre>
  * when executing this pipeline using the Dataflow service, Cloud Logging would contain only
- * "DEBUG" or higher level logs for the DebuggingWordCount class in addition to the default
- * "INFO" or higher level logs. In addition, the default Dataflow worker logging
- * configuration can be overridden by specifying
+ * "DEBUG" or higher level logs for the {@code com.google.cloud.dataflow.examples} package in
+ * addition to the default "INFO" or higher level logs. In addition, the default Dataflow worker
+ * logging configuration can be overridden by specifying
  * {@code --defaultWorkerLogLevel=<one of TRACE, DEBUG, INFO, WARN, ERROR>}. For example,
  * by specifying {@code --defaultWorkerLogLevel=DEBUG} when executing this pipeline with
  * the Dataflow service, Cloud Logging would contain all "DEBUG" or higher level logs. Note
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index fce5bebf2a989..d4ee15746b15a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -309,11 +309,6 @@ OutputT applyInternal(String name, InputT input,
 
     TransformTreeNode parent = transforms.getCurrent();
     String namePrefix = parent.getFullName();
-
-    if (name.contains(";") || name.contains("=")) {
-      throw new IllegalArgumentException(
-          "PTransform names may not contain ';' or '='. Saw " + name);
-    }
     String fullName = uniquifyInternal(namePrefix, name);
 
     boolean nameIsUnique = fullName.equals(buildName(namePrefix, name));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
index 7e987d3c78cf8..61bb7064ee663 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
@@ -24,13 +24,11 @@
 import com.google.cloud.dataflow.sdk.util.PathValidator;
 import com.google.cloud.dataflow.sdk.util.Stager;
 import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.common.base.Preconditions;
 
-import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonIgnore;
 
-import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 /**
  * Internal. Options used to control execution of the Dataflow SDK for
@@ -186,11 +184,12 @@ public Dataflow create(PipelineOptions options) {
    */
   @JsonIgnore
   @Description(
-      "Mapping of old PTranform names to new ones, specified as a semicolon-separated "
-      + "list of oldName=newName pairs. To mark a transform as deleted, omit newName.")
+      "Mapping of old PTranform names to new ones, specified as JSON "
+      + "{\"oldName\":\"newName\",...}. To mark a transform as deleted, make newName the empty "
+      + "string.")
   @Experimental
-  NameMap getTransformNameMapping();
-  void setTransformNameMapping(NameMap value);
+  Map<String, String> getTransformNameMapping();
+  void setTransformNameMapping(Map<String, String> value);
 
   /**
    * Custom windmill_main binary to use with the streaming runner.
@@ -230,28 +229,4 @@ public Stager create(PipelineOptions options) {
           .build();
     }
   }
-
-  /**
-   * Map of old names to new names.
-   */
-  public static class NameMap extends HashMap<String, String> {
-    private static final long serialVersionUID = 0L;
-    private static final String ENTRY_SEPARATOR = ";";
-    private static final String VALUE_SEPARATOR = "=";
-
-    /**
-     * Parses a NameMap from a String.
-     */
-    @JsonCreator
-    public static NameMap create(String value) {
-      NameMap result = new NameMap();
-      for (String entry : value.split(ENTRY_SEPARATOR)) {
-        String[] splitEntry = entry.split(VALUE_SEPARATOR, -1);
-        Preconditions.checkArgument(splitEntry.length == 2,
-            "Invalid value for --nameMapping.  Should be in old1=new1;old2=new2 format");
-        result.put(splitEntry[0], splitEntry[1]);
-      }
-      return result;
-    }
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
index 5e47db999c247..cbac6e3dd1fdc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
@@ -18,9 +18,10 @@
 import com.google.common.base.Preconditions;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonValue;
 
 import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
 
 /**
  * Options that are used to control logging configuration on the Dataflow worker.
@@ -47,23 +48,22 @@ public enum Level {
    * <p>
    * Later options with equivalent names override earlier options.
    * <p>
-   * See {@link WorkerLogLevelOverride} for more information on how to configure logging
+   * See {@link WorkerLogLevelOverrides} for more information on how to configure logging
    * on a per {@link Class}, {@link Package}, or name basis. If used from the command line,
-   * the expected format is {@code Name#Level}, further details on
-   * {@link WorkerLogLevelOverride#create(String)}.
+   * the expected format is {"Name":"Level",...}, further details on
+   * {@link WorkerLogLevelOverrides#from}.
    */
   @Description("This option controls the log levels for specifically named loggers. "
-      + "The expected format is Name#Level. The Dataflow worker uses java.util.logging, which "
-      + "supports a logging hierarchy based off of names that are \".\" separated. "
-      + "For example, by specifying the value \"a.b.c.Foo#DEBUG\", the logger for the class "
-      + "\"a.b.c.Foo\" will be configured to output logs at the DEBUG level. Similarly, "
-      + "by specifying the value \"a.b.c#WARN\", all loggers underneath the \"a.b.c\" package "
-      + "will be configured to output logs at the WARN level. Note that multiple overrides can "
-      + "be specified and that later values with equivalent names override earlier values. Also, "
-      + "note that when multiple overrides are specified, the exact name followed by the closest "
-      + "parent takes precedence.")
-  WorkerLogLevelOverride[] getWorkerLogLevelOverrides();
-  void setWorkerLogLevelOverrides(WorkerLogLevelOverride... workerLogLevelOverrides);
+      + "The expected format is {\"Name\":\"Level\",...}. The Dataflow worker uses "
+      + "java.util.logging, which supports a logging hierarchy based off of names that are '.' "
+      + "separated. For example, by specifying the value {\"a.b.c.Foo\":\"DEBUG\"}, the logger "
+      + "for the class 'a.b.c.Foo' will be configured to output logs at the DEBUG level. "
+      + "Similarly, by specifying the value {\"a.b.c\":\"WARN\"}, all loggers underneath the "
+      + "'a.b.c' package will be configured to output logs at the WARN level. Also, note that "
+      + "when multiple overrides are specified, the exact name followed by the closest parent "
+      + "takes precedence.")
+  WorkerLogLevelOverrides getWorkerLogLevelOverrides();
+  void setWorkerLogLevelOverrides(WorkerLogLevelOverrides value);
 
   /**
    * Defines a log level override for a specific class, package, or name.
@@ -84,29 +84,33 @@ public enum Level {
    * Note that by specifying multiple overrides, the exact name followed by the closest parent
    * takes precedence.
    */
-  public static class WorkerLogLevelOverride {
-    private static final String SEPARATOR = "#";
+  public static class WorkerLogLevelOverrides extends HashMap<String, Level> {
+    private static final long serialVersionUID = 0;
 
     /**
      * Overrides the default log level for the passed in class.
      * <p>
-     * This is equivalent to calling {@link #forName(String, DataflowWorkerLoggingOptions.Level)}
+     * This is equivalent to calling
+     * {@link #addOverrideForName(String, DataflowWorkerLoggingOptions.Level)}
      * and passing in the {@link Class#getName() class name}.
      */
-    public static WorkerLogLevelOverride forClass(Class<?> klass, Level level) {
+    public WorkerLogLevelOverrides addOverrideForClass(Class<?> klass, Level level) {
       Preconditions.checkNotNull(klass, "Expected class to be not null.");
-      return forName(klass.getName(), level);
+      addOverrideForName(klass.getName(), level);
+      return this;
     }
 
     /**
      * Overrides the default log level for the passed in package.
      * <p>
-     * This is equivalent to calling {@link #forName(String, DataflowWorkerLoggingOptions.Level)}
+     * This is equivalent to calling
+     * {@link #addOverrideForName(String, DataflowWorkerLoggingOptions.Level)}
      * and passing in the {@link Package#getName() package name}.
      */
-    public static WorkerLogLevelOverride forPackage(Package pkg, Level level) {
+    public WorkerLogLevelOverrides addOverrideForPackage(Package pkg, Level level) {
       Preconditions.checkNotNull(pkg, "Expected package to be not null.");
-      return forName(pkg.getName(), level);
+      addOverrideForName(pkg.getName(), level);
+      return this;
     }
 
     /**
@@ -116,56 +120,36 @@ public static WorkerLogLevelOverride forPackage(Package pkg, Level level) {
      * override the log level of all loggers that have the passed in name or
      * a parent logger that has the passed in name.
      */
-    public static WorkerLogLevelOverride forName(String name, Level level) {
+    public WorkerLogLevelOverrides addOverrideForName(String name, Level level) {
       Preconditions.checkNotNull(name, "Expected name to be not null.");
       Preconditions.checkNotNull(level,
           "Expected level to be one of %s.", Arrays.toString(Level.values()));
-      return new WorkerLogLevelOverride(name, level);
+      put(name, level);
+      return this;
     }
 
     /**
-     * Expects a value of the form {@code Name#Level}. The {@code Name} generally
-     * represents the fully qualified Java {@link Class#getName() class name},
-     * or fully qualified Java {@link Package#getName() package name}, or custom
-     * logger name. The {@code Level} represents the log level and must be one
-     * of {@link Level}.
+     * Expects a map keyed by logger {@code Name}s with values representing {@code Level}s.
+     * The {@code Name} generally represents the fully qualified Java
+     * {@link Class#getName() class name}, or fully qualified Java
+     * {@link Package#getName() package name}, or custom logger name. The {@code Level}
+     * represents the log level and must be one of {@link Level}.
      */
     @JsonCreator
-    public static WorkerLogLevelOverride create(String value) {
-      Preconditions.checkNotNull(value, "Expected value to be not null.");
-      Preconditions.checkArgument(value.contains(SEPARATOR),
-          "Expected '#' separator but none found within '%s'.", value);
-      String[] parts = value.split(SEPARATOR, 2);
-      Level level;
-      try {
-        level = Level.valueOf(parts[1]);
-      } catch (IllegalArgumentException e) {
-        throw new IllegalArgumentException(String.format(
-            "Unsupported log level '%s' requested. Must be one of %s.",
-            parts[1], Arrays.toString(Level.values())));
-      }
-      return forName(parts[0], level);
-    }
+    public static WorkerLogLevelOverrides from(Map<String, String> values) {
+      Preconditions.checkNotNull(values, "Expected values to be not null.");
+      WorkerLogLevelOverrides overrides = new WorkerLogLevelOverrides();
+      for (Map.Entry<String, String> entry : values.entrySet()) {
+        try {
+          overrides.addOverrideForName(entry.getKey(), Level.valueOf(entry.getValue()));
+        } catch (IllegalArgumentException e) {
+          throw new IllegalArgumentException(String.format(
+              "Unsupported log level '%s' requested for %s. Must be one of %s.",
+              entry.getValue(), entry.getKey(), Arrays.toString(Level.values())));
+        }
 
-    private final String name;
-    private final Level level;
-    private WorkerLogLevelOverride(String name, Level level) {
-      this.name = name;
-      this.level = level;
-    }
-
-    public String getName() {
-      return name;
-    }
-
-    public Level getLevel() {
-      return level;
-    }
-
-    @JsonValue
-    @Override
-    public String toString() {
-      return name + SEPARATOR + level;
+      }
+      return overrides;
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
index a0eaf586454f4..7324f19d8ddb4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
@@ -19,14 +19,10 @@
 import com.google.api.client.googleapis.services.AbstractGoogleClient;
 import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
 import com.google.api.client.googleapis.services.GoogleClientRequestInitializer;
-import com.google.common.base.Preconditions;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonValue;
 
 import java.io.IOException;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
+import java.util.HashMap;
+import java.util.Map;
 
 /**
  * These options configure debug settings for Google API clients created within the Dataflow SDK.
@@ -36,86 +32,50 @@ public interface GoogleApiDebugOptions extends PipelineOptions {
    * This option enables tracing of API calls to Google services used within the Dataflow SDK.
    */
   @Description("This option enables tracing of API calls to Google services used within the "
-      + "Dataflow SDK. Values are expected in the format \"ApiName#TraceDestination\" where the "
-      + "ApiName represents the request classes canonical name. The TraceDestination is a "
-      + "logical trace consumer to whom the trace will be reported. Typically, \"producer\" is "
+      + "Dataflow SDK. Values are expected in JSON format {\"ApiName\":\"TraceDestination\",...} "
+      + "where the ApiName represents the request classes canonical name. The TraceDestination is "
+      + "a logical trace consumer to whom the trace will be reported. Typically, \"producer\" is "
       + "the right destination to use: this makes API traces available to the team offering the "
       + "API. Note that by enabling this option, the contents of the requests to and from "
       + "Google Cloud services will be made available to Google. For example, by specifying "
-      + "\"Dataflow#producer\", all calls to the Dataflow service will be made available to "
+      + "{\"Dataflow\":\"producer\"}, all calls to the Dataflow service will be made available to "
       + "Google, specifically to the Google Cloud Dataflow team.")
-  GoogleApiTracer[] getGoogleApiTrace();
-  void setGoogleApiTrace(GoogleApiTracer... commands);
+  GoogleApiTracer getGoogleApiTrace();
+  void setGoogleApiTrace(GoogleApiTracer commands);
 
   /**
    * A {@link GoogleClientRequestInitializer} that adds the trace destination to Google API calls.
    */
-  public static class GoogleApiTracer implements GoogleClientRequestInitializer {
-    private static final Pattern COMMAND_LINE_PATTERN = Pattern.compile("([^#]*)#(.*)");
+  public static class GoogleApiTracer extends HashMap<String, String>
+      implements GoogleClientRequestInitializer {
+    private static final long serialVersionUID = 0;
+
     /**
      * Creates a {@link GoogleApiTracer} that sets the trace destination on all
      * calls that match the given client type.
      */
-    public static GoogleApiTracer create(AbstractGoogleClient client, String traceDestination) {
-      return new GoogleApiTracer(client.getClass().getCanonicalName(), traceDestination);
+    public GoogleApiTracer addTraceFor(AbstractGoogleClient client, String traceDestination) {
+      put(client.getClass().getCanonicalName(), traceDestination);
+      return this;
     }
 
     /**
      * Creates a {@link GoogleApiTracer} that sets the trace {@code traceDestination} on all
      * calls that match for the given request type.
      */
-    public static GoogleApiTracer create(
+    public GoogleApiTracer addTraceFor(
         AbstractGoogleClientRequest<?> request, String traceDestination) {
-      return new GoogleApiTracer(request.getClass().getCanonicalName(), traceDestination);
-    }
-
-    /**
-     * Creates a {@link GoogleClientRequestInitializer} that adds the trace destination
-     * based upon the passed in value.
-     * <p>
-     * The {@code value} represents a string containing {@code ApiName#TraceDestination}.
-     * The {@code ApiName} is used to match against the request class
-     * {@link Class#getCanonicalName() canonical name} to determine the requests to which the
-     * {@code TraceDestination} should be added.
-     * <p>
-     * For example, to match:
-     * <ul>
-     *   <li>all Google API calls: {@code #TraceDestination}
-     *   <li>all Dataflow API calls: {@code Dataflow#TraceDestination}
-     *   <li>all Dataflow V1B3 API calls: {@code Dataflow.V1b3#TraceDestination}
-     *   <li>all Dataflow V1B3 Jobs API calls: {@code Dataflow.V1b3.Projects.Jobs#TraceDestination}
-     *   <li>all Dataflow V1B3 Jobs Get calls:
-     *       {@code Dataflow.V1b3.Projects.Jobs.Get#TraceDestination}
-     *   <li>all Job creation calls in any version: {@code Jobs.Create#TraceDestination}
-     * </ul>
-     */
-    @JsonCreator
-    public static GoogleApiTracer create(String value) {
-      Matcher matcher = COMMAND_LINE_PATTERN.matcher(value);
-      Preconditions.checkArgument(matcher.find() && matcher.groupCount() == 2,
-          "Unable to parse '%s', expected format 'ClientRequestName#TraceDestination'", value);
-      return new GoogleApiTracer(matcher.group(1), matcher.group(2));
-    }
-
-    private final String clientRequestName;
-    private final String traceDestination;
-
-    private GoogleApiTracer(String clientRequestName, String traceDestination) {
-      this.clientRequestName = clientRequestName;
-      this.traceDestination = traceDestination;
+      put(request.getClass().getCanonicalName(), traceDestination);
+      return this;
     }
 
     @Override
     public void initialize(AbstractGoogleClientRequest<?> request) throws IOException {
-      if (request.getClass().getCanonicalName().contains(clientRequestName)) {
-        request.set("$trace", traceDestination);
+      for (Map.Entry<String, String> entry : this.entrySet()) {
+        if (request.getClass().getCanonicalName().contains(entry.getKey())) {
+          request.set("$trace", entry.getValue());
+        }
       }
     }
-
-    @JsonValue
-    @Override
-    public String toString() {
-      return clientRequestName + "#" + traceDestination;
-    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 0e3af4d2180fe..aaacc5e27b685 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -132,14 +132,18 @@ public static <T extends PipelineOptions> T as(Class<T> klass) {
    *   --readOnly (shorthand for boolean properties, will set the "readOnly" property to "true")
    *   --x=1 --x=2 --x=3 (list style property, will set the "x" property to [1, 2, 3])
    *   --x=1,2,3 (shorthand list style property, will set the "x" property to [1, 2, 3])
+   *   --complexObject='{"key1":"value1",...} (JSON format for all other complex types)
    * </pre>
-   * Properties are able to bound to {@link String} and Java primitives {@code boolean},
-   * {@code byte}, {@code short}, {@code int}, {@code long}, {@code float}, {@code double} and
-   * their primitive wrapper classes.
    * <p>
-   * List style properties are able to be bound to {@code boolean[]}, {@code char[]},
+   * Simple properties are able to bound to {@link String}, {@link Class}, enums and Java
+   * primitives {@code boolean}, {@code byte}, {@code short}, {@code int}, {@code long},
+   * {@code float}, {@code double} and their primitive wrapper classes.
+   * <p>
+   * Simple list style properties are able to be bound to {@code boolean[]}, {@code char[]},
    * {@code short[]}, {@code int[]}, {@code long[]}, {@code float[]}, {@code double[]},
-   * {@code String[]} and {@code List<String>}.
+   * {@code Class[]}, enum arrays, {@code String[]}, and {@code List<String>}.
+   * <p>
+   * JSON format is required for all other types.
    * <p>
    * By default, strict parsing is enabled and arguments must conform to be either
    * {@code --booleanArgName} or {@code --argName=argValue}. Strict parsing can be disabled with
@@ -199,14 +203,18 @@ private Builder(String[] args, boolean validation,
      *   --readOnly (shorthand for boolean properties, will set the "readOnly" property to "true")
      *   --x=1 --x=2 --x=3 (list style property, will set the "x" property to [1, 2, 3])
      *   --x=1,2,3 (shorthand list style property, will set the "x" property to [1, 2, 3])
+     *   --complexObject='{"key1":"value1",...} (JSON format for all other complex types)
      * </pre>
-     * Properties are able to bound to {@link String} and Java primitives {@code boolean},
-     * {@code byte}, {@code short}, {@code int}, {@code long}, {@code float}, {@code double} and
-     * their primitive wrapper classes.
      * <p>
-     * List style properties are able to be bound to {@code boolean[]}, {@code char[]},
+     * Simple properties are able to bound to {@link String}, {@link Class}, enums and Java
+     * primitives {@code boolean}, {@code byte}, {@code short}, {@code int}, {@code long},
+     * {@code float}, {@code double} and their primitive wrapper classes.
+     * <p>
+     * Simple list style properties are able to be bound to {@code boolean[]}, {@code char[]},
      * {@code short[]}, {@code int[]}, {@code long[]}, {@code float[]}, {@code double[]},
-     * {@code String[]} and {@code List<String>}.
+     * {@code Class[]}, enum arrays, {@code String[]}, and {@code List<String>}.
+     * <p>
+     * JSON format is required for all other types.
      * <p>
      * By default, strict parsing is enabled and arguments must conform to be either
      * {@code --booleanArgName} or {@code --argName=argValue}. Strict parsing can be disabled with
@@ -410,7 +418,23 @@ Class<T> getProxyClass() {
     }
   }
 
-
+  private static final Set<Class<?>> SIMPLE_TYPES = ImmutableSet.<Class<?>>builder()
+      .add(boolean.class)
+      .add(Boolean.class)
+      .add(char.class)
+      .add(Character.class)
+      .add(short.class)
+      .add(Short.class)
+      .add(int.class)
+      .add(Integer.class)
+      .add(long.class)
+      .add(Long.class)
+      .add(float.class)
+      .add(Float.class)
+      .add(double.class)
+      .add(Double.class)
+      .add(String.class)
+      .add(Class.class).build();
   private static final Logger LOG = LoggerFactory.getLogger(PipelineOptionsFactory.class);
   @SuppressWarnings("rawtypes")
   private static final Class<?>[] EMPTY_CLASS_ARRAY = new Class[0];
@@ -1147,17 +1171,20 @@ public boolean apply(Method input) {
    *   --project=MyProject (simple property, will set the "project" property to "MyProject")
    *   --readOnly=true (for boolean properties, will set the "readOnly" property to "true")
    *   --readOnly (shorthand for boolean properties, will set the "readOnly" property to "true")
-   *   --x=1 --x=2 --x=3 (list style property, will set the "x" property to [1, 2, 3])
-   *   --x=1,2,3 (shorthand list style property, will set the "x" property to [1, 2, 3])
+   *   --x=1 --x=2 --x=3 (list style simple property, will set the "x" property to [1, 2, 3])
+   *   --x=1,2,3 (shorthand list style simple property, will set the "x" property to [1, 2, 3])
+   *   --complexObject='{"key1":"value1",...} (JSON format for all other complex types)
    * </pre>
    *
-   * <p> Properties are able to bound to {@link String} and Java primitives {@code boolean},
-   * {@code byte}, {@code short}, {@code int}, {@code long}, {@code float}, {@code double}
-   * and their primitive wrapper classes.
+   * <p> Simple properties are able to bound to {@link String}, {@link Class}, enums and Java
+   * primitives {@code boolean}, {@code byte}, {@code short}, {@code int}, {@code long},
+   * {@code float}, {@code double} and their primitive wrapper classes.
    *
-   * <p> List style properties are able to be bound to {@code boolean[]}, {@code char[]},
+   * <p> Simple list style properties are able to be bound to {@code boolean[]}, {@code char[]},
    * {@code short[]}, {@code int[]}, {@code long[]}, {@code float[]}, {@code double[]},
-   * {@code String[]}, and {@code List<String>}.
+   * {@code Class[]}, enum arrays, {@code String[]}, and {@code List<String>}.
+   *
+   * <p> JSON format is required for all other types.
    *
    * <p> If strict parsing is enabled, options must start with '--', and not have an empty argument
    * name or value based upon the positioning of the '='. Empty or null arguments will be ignored
@@ -1257,7 +1284,9 @@ public boolean apply(@Nullable String input) {
               "Unknown 'runner' specified '%s', supported pipeline runners %s",
               runner, Sets.newTreeSet(SUPPORTED_PIPELINE_RUNNERS.keySet()));
           convertedOptions.put("runner", SUPPORTED_PIPELINE_RUNNERS.get(runner));
-        } else if (returnType.isArray() || Collection.class.isAssignableFrom(returnType)) {
+        } else if ((returnType.isArray() && (SIMPLE_TYPES.contains(returnType.getComponentType())
+                || returnType.getComponentType().isEnum()))
+            || Collection.class.isAssignableFrom(returnType)) {
           // Split any strings with ","
           List<String> values = FluentIterable.from(entry.getValue())
               .transformAndConcat(new Function<String, Iterable<String>>() {
@@ -1275,12 +1304,22 @@ public Iterable<String> apply(String input) {
             }
           }
           convertedOptions.put(entry.getKey(), MAPPER.convertValue(values, type));
-        } else {
+        } else if (SIMPLE_TYPES.contains(returnType) || returnType.isEnum()) {
           String value = Iterables.getOnlyElement(entry.getValue());
           Preconditions.checkArgument(returnType.equals(String.class) || !value.isEmpty(),
               "Empty argument value is only allowed for String, String Array, and Collection,"
                + " but received: " + returnType);
           convertedOptions.put(entry.getKey(), MAPPER.convertValue(value, type));
+        } else {
+          String value = Iterables.getOnlyElement(entry.getValue());
+          Preconditions.checkArgument(returnType.equals(String.class) || !value.isEmpty(),
+              "Empty argument value is only allowed for String, String Array, and Collection,"
+               + " but received: " + returnType);
+          try {
+            convertedOptions.put(entry.getKey(), MAPPER.readValue(value, type));
+          } catch (IOException e) {
+            throw new IllegalArgumentException("Unable to parse JSON value " + value, e);
+          }
         }
       } catch (IllegalArgumentException e) {
         if (strictParsing) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
index 5e4b8353b6a2c..b936017108118 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
@@ -24,12 +24,12 @@
 
 import com.google.api.client.util.Lists;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions;
-import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.WorkerLogLevelOverride;
 import com.google.common.collect.ImmutableBiMap;
 
 import java.io.File;
 import java.io.IOException;
 import java.util.List;
+import java.util.Map;
 import java.util.logging.FileHandler;
 import java.util.logging.Handler;
 import java.util.logging.Level;
@@ -117,9 +117,10 @@ public static synchronized void configure(DataflowWorkerLoggingOptions options)
     }
 
     if (options.getWorkerLogLevelOverrides() != null) {
-      for (WorkerLogLevelOverride loggerOverride : options.getWorkerLogLevelOverrides()) {
-        Logger logger = Logger.getLogger(loggerOverride.getName());
-        logger.setLevel(LEVELS.inverse().get(loggerOverride.getLevel()));
+      for (Map.Entry<String, DataflowWorkerLoggingOptions.Level> loggerOverride :
+          options.getWorkerLogLevelOverrides().entrySet()) {
+        Logger logger = Logger.getLogger(loggerOverride.getKey());
+        logger.setLevel(LEVELS.inverse().get(loggerOverride.getValue()));
         configuredLoggers.add(logger);
       }
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
index 5470ef7ff121b..6dee26e80eaca 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
@@ -17,9 +17,6 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
-import com.google.api.client.googleapis.services.AbstractGoogleClient.Builder;
-import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
-import com.google.api.client.googleapis.services.GoogleClientRequestInitializer;
 import com.google.api.client.http.HttpTransport;
 import com.google.api.client.json.JsonFactory;
 import com.google.api.client.json.jackson2.JacksonFactory;
@@ -31,7 +28,6 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.GcsOptions;
-import com.google.common.base.MoreObjects;
 
 import java.io.IOException;
 import java.net.MalformedURLException;
@@ -101,8 +97,7 @@ private static ApiComponents apiComponentsFromUrl(String urlString) {
     return new Bigquery.Builder(getTransport(), getJsonFactory(),
         new RetryHttpRequestInitializer(options.getGcpCredential()))
         .setApplicationName(options.getAppName())
-        .setGoogleClientRequestInitializer(
-            new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
+        .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
   }
 
   /**
@@ -117,8 +112,7 @@ private static ApiComponents apiComponentsFromUrl(String urlString) {
         new RetryHttpRequestInitializer(options.getGcpCredential()))
         .setRootUrl(options.getPubsubRootUrl())
         .setApplicationName(options.getAppName())
-        .setGoogleClientRequestInitializer(
-            new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
+        .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
   }
 
   /**
@@ -139,8 +133,7 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
         .setApplicationName(options.getAppName())
         .setRootUrl(components.rootUrl)
         .setServicePath(components.servicePath)
-        .setGoogleClientRequestInitializer(
-            new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
+        .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
   }
 
   /**
@@ -151,8 +144,7 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
       newRawDataflowClient(DataflowPipelineOptions options) {
     return newDataflowClient(options)
         .setHttpRequestInitializer(options.getGcpCredential())
-        .setGoogleClientRequestInitializer(
-            new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
+        .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
   }
 
   /**
@@ -170,8 +162,7 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
             // logging it by default clutters the output during file staging.
             options.getGcpCredential(), Arrays.asList(404)))
         .setApplicationName(options.getAppName())
-        .setGoogleClientRequestInitializer(
-            new ChainedGoogleClientRequestInitializer(options.getGoogleApiTrace()));
+        .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
     if (servicePath != null) {
       ApiComponents components = apiComponentsFromUrl(servicePath);
       storageBuilder.setRootUrl(components.rootUrl);
@@ -179,26 +170,4 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
     }
     return storageBuilder;
   }
-
-  /**
-   * Allows multiple {@link GoogleClientRequestInitializer}s to be chained together for use with
-   * {@link Builder}.
-   */
-  private static final class ChainedGoogleClientRequestInitializer
-      implements GoogleClientRequestInitializer {
-    private static final GoogleClientRequestInitializer[] EMPTY_ARRAY =
-        new GoogleClientRequestInitializer[]{};
-    private final GoogleClientRequestInitializer[] chain;
-
-    private ChainedGoogleClientRequestInitializer(GoogleClientRequestInitializer... initializer) {
-      this.chain = MoreObjects.firstNonNull(initializer, EMPTY_ARRAY);
-    }
-
-    @Override
-    public void initialize(AbstractGoogleClientRequest<?> request) throws IOException {
-      for (GoogleClientRequestInitializer initializer : chain) {
-        initializer.initialize(request);
-      }
-    }
-  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptionsTest.java
index 050d995df0f2b..9fea097050552 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptionsTest.java
@@ -16,13 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
-import static com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions.NameMap;
 import static org.hamcrest.Matchers.hasEntry;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 
-import com.fasterxml.jackson.databind.ObjectMapper;
-
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -30,14 +27,14 @@
 /** Tests for {@link DataflowPipelineDebugOptions}. */
 @RunWith(JUnit4.class)
 public class DataflowPipelineDebugOptionsTest {
-  private static final ObjectMapper MAPPER = new ObjectMapper();
-
   @Test
   public void testTransformNameMapping() throws Exception {
-    NameMap map = MAPPER.convertValue("a=b;foo=;bar=baz", NameMap.class);
-    assertEquals(3, map.size());
-    assertThat(map, hasEntry("a", "b"));
-    assertThat(map, hasEntry("foo", ""));
-    assertThat(map, hasEntry("bar", "baz"));
+    DataflowPipelineDebugOptions options = PipelineOptionsFactory
+        .fromArgs(new String[]{"--transformNameMapping={\"a\":\"b\",\"foo\":\"\",\"bar\":\"baz\"}"})
+        .as(DataflowPipelineDebugOptions.class);
+    assertEquals(3, options.getTransformNameMapping().size());
+    assertThat(options.getTransformNameMapping(), hasEntry("a", "b"));
+    assertThat(options.getTransformNameMapping(), hasEntry("foo", ""));
+    assertThat(options.getTransformNameMapping(), hasEntry("bar", "baz"));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptionsTest.java
index fffef0e888f28..82ec48a5f5ba5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptionsTest.java
@@ -18,7 +18,8 @@
 import static com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.Level.WARN;
 import static org.junit.Assert.assertEquals;
 
-import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.WorkerLogLevelOverride;
+import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.WorkerLogLevelOverrides;
+import com.google.common.collect.ImmutableMap;
 
 import com.fasterxml.jackson.databind.ObjectMapper;
 
@@ -38,33 +39,35 @@ public class DataflowWorkerLoggingOptionsTest {
   public void testWorkerLogLevelOverrideWithInvalidLogLevel() {
     expectedException.expect(IllegalArgumentException.class);
     expectedException.expectMessage("Unsupported log level");
-    WorkerLogLevelOverride.create("Name#FakeLevel");
+    WorkerLogLevelOverrides.from(ImmutableMap.of("Name", "FakeLevel"));
   }
 
   @Test
-  public void testWorkerLogLevelOverrideForClass() {
-    assertEquals("org.junit.Test#WARN",
-        MAPPER.convertValue(WorkerLogLevelOverride.forClass(Test.class, WARN), String.class));
+  public void testWorkerLogLevelOverrideForClass() throws Exception {
+    assertEquals("{\"org.junit.Test\":\"WARN\"}",
+        MAPPER.writeValueAsString(
+            new WorkerLogLevelOverrides().addOverrideForClass(Test.class, WARN)));
   }
 
   @Test
-  public void testWorkerLogLevelOverrideForPackage() {
-    assertEquals("org.junit#WARN",
-        MAPPER.convertValue(
-            WorkerLogLevelOverride.forPackage(Test.class.getPackage(), WARN), String.class));
+  public void testWorkerLogLevelOverrideForPackage() throws Exception {
+    assertEquals("{\"org.junit\":\"WARN\"}",
+        MAPPER.writeValueAsString(
+            new WorkerLogLevelOverrides().addOverrideForPackage(Test.class.getPackage(), WARN)));
   }
 
   @Test
-  public void testWorkerLogLevelOverrideForName() {
-    assertEquals("A#WARN",
-        MAPPER.convertValue(WorkerLogLevelOverride.forName("A", WARN), String.class));
+  public void testWorkerLogLevelOverrideForName() throws Exception {
+    assertEquals("{\"A\":\"WARN\"}",
+        MAPPER.writeValueAsString(
+            new WorkerLogLevelOverrides().addOverrideForName("A", WARN)));
   }
 
   @Test
-  public void testSerializationAndDeserializationOf() {
-    String testValue = "A#WARN";
+  public void testSerializationAndDeserializationOf() throws Exception {
+    String testValue = "{\"A\":\"WARN\"}";
     assertEquals(testValue,
-        MAPPER.convertValue(
-            MAPPER.convertValue(testValue, WorkerLogLevelOverride.class), String.class));
+        MAPPER.writeValueAsString(
+            MAPPER.readValue(testValue, WorkerLogLevelOverrides.class)));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
index 375306cd40493..3a16cf5dee251 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptionsTest.java
@@ -38,7 +38,8 @@
 public class GoogleApiDebugOptionsTest {
   @Test
   public void testWhenTracingMatches() throws Exception {
-    String[] args = new String[] {"--googleApiTrace=Projects.Jobs.Get#GetTraceDestination"};
+    String[] args =
+        new String[] {"--googleApiTrace={\"Projects.Jobs.Get\":\"GetTraceDestination\"}"};
     DataflowPipelineOptions options =
         PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
@@ -52,7 +53,7 @@ public void testWhenTracingMatches() throws Exception {
 
   @Test
   public void testWhenTracingDoesNotMatch() throws Exception {
-    String[] args = new String[] {"--googleApiTrace=Projects.Jobs.Create#testToken"};
+    String[] args = new String[] {"--googleApiTrace={\"Projects.Jobs.Create\":\"testToken\"}"};
     DataflowPipelineOptions options =
         PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
@@ -67,8 +68,8 @@ public void testWhenTracingDoesNotMatch() throws Exception {
   @Test
   public void testWithMultipleTraces() throws Exception {
     String[] args = new String[] {
-        "--googleApiTrace=Projects.Jobs.Create#CreateTraceDestination,"
-        + "Projects.Jobs.Get#GetTraceDestination"};
+        "--googleApiTrace={\"Projects.Jobs.Create\":\"CreateTraceDestination\","
+        + "\"Projects.Jobs.Get\":\"GetTraceDestination\"}"};
     DataflowPipelineOptions options =
         PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
@@ -86,7 +87,7 @@ public void testWithMultipleTraces() throws Exception {
 
   @Test
   public void testMatchingAllDataflowCalls() throws Exception {
-    String[] args = new String[] {"--googleApiTrace=Dataflow#TraceDestination"};
+    String[] args = new String[] {"--googleApiTrace={\"Dataflow\":\"TraceDestination\"}"};
     DataflowPipelineOptions options =
         PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
@@ -106,8 +107,8 @@ public void testMatchingAllDataflowCalls() throws Exception {
   public void testMatchingAgainstClient() throws Exception {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
-    options.setGoogleApiTrace(new GoogleApiTracer[] {
-        GoogleApiTracer.create(Transport.newDataflowClient(options).build(), "TraceDestination")});
+    options.setGoogleApiTrace(new GoogleApiTracer().addTraceFor(
+        Transport.newDataflowClient(options).build(), "TraceDestination"));
 
     Get getRequest =
         options.getDataflowClient().projects().jobs().get("testProjectId", "testJobId");
@@ -122,9 +123,9 @@ public void testMatchingAgainstClient() throws Exception {
   public void testMatchingAgainstRequestType() throws Exception {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setGcpCredential(new TestCredential());
-    options.setGoogleApiTrace(new GoogleApiTracer[] {GoogleApiTracer.create(
+    options.setGoogleApiTrace(new GoogleApiTracer().addTraceFor(
         Transport.newDataflowClient(options).build().projects().jobs()
-            .get("aProjectId", "aJobId"), "TraceDestination")});
+            .get("aProjectId", "aJobId"), "TraceDestination"));
 
     Get getRequest =
         options.getDataflowClient().projects().jobs().get("testProjectId", "testJobId");
@@ -136,11 +137,11 @@ public void testMatchingAgainstRequestType() throws Exception {
   }
 
   @Test
-  public void testDeserializationAndSerializationOfGoogleApiTracer() {
-    String serializedValue = "Api#Token";
+  public void testDeserializationAndSerializationOfGoogleApiTracer() throws Exception {
+    String serializedValue = "{\"Api\":\"Token\"}";
     ObjectMapper objectMapper = new ObjectMapper();
     assertEquals(serializedValue,
-        objectMapper.convertValue(
-            objectMapper.convertValue(serializedValue, GoogleApiTracer.class), String.class));
+        objectMapper.writeValueAsString(
+            objectMapper.readValue(serializedValue, GoogleApiTracer.class)));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index 895c1e94ae272..972ceab58ab1b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -35,6 +35,7 @@
 import com.google.common.collect.ListMultimap;
 
 import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonProperty;
 
 import org.junit.Rule;
 import org.junit.Test;
@@ -46,6 +47,7 @@
 import java.io.ByteArrayOutputStream;
 import java.io.PrintStream;
 import java.util.List;
+import java.util.Map;
 
 /** Tests for {@link PipelineOptionsFactory}. */
 @RunWith(JUnit4.class)
@@ -323,6 +325,11 @@ public void testEmptyValueNotAllowed() {
     PipelineOptionsFactory.fromArgs(args).as(Primitives.class);
   }
 
+  /** Enum used for testing PipelineOptions CLI parsing. */
+  public enum TestEnum {
+    Value, Value2
+  }
+
   /** A test interface containing all supported objects. */
   public static interface Objects extends PipelineOptions {
     Boolean getBoolean();
@@ -347,6 +354,8 @@ public static interface Objects extends PipelineOptions {
     void setEmptyString(String value);
     Class<?> getClassValue();
     void setClassValue(Class<?> value);
+    TestEnum getEnum();
+    void setEnum(TestEnum value);
   }
 
   @Test
@@ -362,7 +371,8 @@ public void testObjects() {
         "--double=12.3",
         "--string=stringValue",
         "--emptyString=",
-        "--classValue=" + PipelineOptionsFactoryTest.class.getName()};
+        "--classValue=" + PipelineOptionsFactoryTest.class.getName(),
+        "--enum=" + TestEnum.Value};
 
     Objects options = PipelineOptionsFactory.fromArgs(args).as(Objects.class);
     assertTrue(options.getBoolean());
@@ -376,6 +386,37 @@ public void testObjects() {
     assertEquals("stringValue", options.getString());
     assertTrue(options.getEmptyString().isEmpty());
     assertEquals(PipelineOptionsFactoryTest.class, options.getClassValue());
+    assertEquals(TestEnum.Value, options.getEnum());
+  }
+
+  /** A test class for verifying JSON -> Object conversion. */
+  public static class ComplexType {
+    String value;
+    String value2;
+    public ComplexType(@JsonProperty("key") String value, @JsonProperty("key2") String value2) {
+      this.value = value;
+      this.value2 = value2;
+    }
+  }
+
+  /** A test interface for verifying JSON -> complex type conversion. */
+  interface ComplexTypes extends PipelineOptions {
+    Map<String, String> getMap();
+    void setMap(Map<String, String> value);
+
+    ComplexType getObject();
+    void setObject(ComplexType value);
+  }
+
+  @Test
+  public void testComplexTypes() {
+    String[] args = new String[] {
+        "--map={\"key\":\"value\",\"key2\":\"value2\"}",
+        "--object={\"key\":\"value\",\"key2\":\"value2\"}"};
+    ComplexTypes options = PipelineOptionsFactory.fromArgs(args).as(ComplexTypes.class);
+    assertEquals(ImmutableMap.of("key", "value", "key2", "value2"), options.getMap());
+    assertEquals("value", options.getObject().value);
+    assertEquals("value2", options.getObject().value2);
   }
 
   @Test
@@ -406,6 +447,8 @@ public static interface Arrays extends PipelineOptions {
     void setString(String[] value);
     Class<?>[] getClassValue();
     void setClassValue(Class<?>[] value);
+    TestEnum[] getEnum();
+    void setEnum(TestEnum[] value);
   }
 
   @Test
@@ -437,7 +480,9 @@ public void testArrays() {
         "--string=stringValue2",
         "--string=stringValue3",
         "--classValue=" + PipelineOptionsFactory.class.getName(),
-        "--classValue=" + PipelineOptionsFactoryTest.class.getName()};
+        "--classValue=" + PipelineOptionsFactoryTest.class.getName(),
+        "--enum=" + TestEnum.Value,
+        "--enum=" + TestEnum.Value2};
 
     Arrays options = PipelineOptionsFactory.fromArgs(args).as(Arrays.class);
     boolean[] bools = options.getBoolean();
@@ -453,6 +498,7 @@ public void testArrays() {
     assertArrayEquals(new Class[] {PipelineOptionsFactory.class,
                                    PipelineOptionsFactoryTest.class},
         options.getClassValue());
+    assertArrayEquals(new TestEnum[] {TestEnum.Value, TestEnum.Value2}, options.getEnum());
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
index 5df30c4ba6b3f..1b8cba7b0d34e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
@@ -20,7 +20,7 @@
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions;
-import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.WorkerLogLevelOverride;
+import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.WorkerLogLevelOverrides;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 
 import org.junit.After;
@@ -75,11 +75,9 @@ public void testWithConfigurationOverride() {
   public void testWithCustomLogLevels() {
     DataflowWorkerLoggingOptions options =
         PipelineOptionsFactory.as(DataflowWorkerLoggingOptions.class);
-    options.setWorkerLogLevelOverrides(
-        new WorkerLogLevelOverride[] {
-            WorkerLogLevelOverride.forName("A", DataflowWorkerLoggingOptions.Level.DEBUG),
-            WorkerLogLevelOverride.forName("B", DataflowWorkerLoggingOptions.Level.ERROR),
-        });
+    options.setWorkerLogLevelOverrides(new WorkerLogLevelOverrides()
+        .addOverrideForName("A", DataflowWorkerLoggingOptions.Level.DEBUG)
+        .addOverrideForName("B", DataflowWorkerLoggingOptions.Level.ERROR));
 
     DataflowWorkerLoggingInitializer.initialize();
     DataflowWorkerLoggingInitializer.configure(options);

From cb28e246391c28ff572625a657a3944007e367ea Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Thu, 30 Jul 2015 13:41:22 -0700
Subject: [PATCH 0842/1541] Improve the error message of skewed
 outputWithTimestamp

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99508947
---
 .../cloud/dataflow/sdk/transforms/DoFn.java   | 13 +++++----
 .../cloud/dataflow/sdk/util/DoFnRunner.java   | 12 ++++++---
 .../dataflow/sdk/transforms/ParDoTest.java    | 27 ++++++++++++++++---
 3 files changed, 41 insertions(+), 11 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index c048587fe8a90..90489c98aec0b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -287,13 +287,16 @@ public abstract class ProcessContext extends Context {
   }
 
   /**
-   * Returns the allowed timestamp skew duration, which is the maximum
-   * duration that timestamps can be shifted backward in
+   * Returns the allowed timestamp skew duration, which is the maximum duration that output
+   * timestamps can be shifted backward (relative the input timestamp) in
    * {@link DoFn.Context#outputWithTimestamp}.
    *
-   * <p> The default value is {@code Duration.ZERO}, in which case
-   * timestamps can only be shifted forward to future.  For infinite
-   * skew, return {@code Duration.millis(Long.MAX_VALUE)}.
+   * <p>Outputting earlier than the current input timestamp can cause the output element to appear
+   * in a different (earlier) window. In addition, if the skew exceeds the allowed lateness it may
+   * result in the output being dropped because it is too late.
+   *
+   * <p> The default value is {@code Duration.ZERO}, in which case timestamps can only be shifted
+   * forward to future. For infinite skew, return {@code Duration.millis(Long.MAX_VALUE)}.
    */
   public Duration getAllowedTimestampSkew() {
     return Duration.ZERO;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 71c9d7fa0046c..89f05deae1e1d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -42,6 +42,7 @@
 import com.google.common.collect.Sets;
 
 import org.joda.time.Instant;
+import org.joda.time.format.PeriodFormat;
 
 import java.io.IOException;
 import java.util.Collection;
@@ -520,9 +521,14 @@ public Collection<? extends BoundedWindow> windows() {
     }
 
     private void checkTimestamp(Instant timestamp) {
-      Preconditions.checkArgument(
-          !timestamp.isBefore(windowedValue.getTimestamp().minus(fn.getAllowedTimestampSkew())),
-          "Timestamp %s exceeds allowed maximum skew.", timestamp);
+      if (timestamp.isBefore(windowedValue.getTimestamp().minus(fn.getAllowedTimestampSkew()))) {
+        throw new IllegalArgumentException(String.format(
+            "Cannot output with timestamp %s. Output timestamps must be no earlier than the "
+            + "timestamp of the current input (%s) minus the allowed skew (%s). See the "
+            + "DoFn#getAllowedTimestmapSkew() Javadoc for details on changing the allowed skew.",
+            timestamp, windowedValue.getTimestamp(),
+            PeriodFormat.getDefault().print(fn.getAllowedTimestampSkew().toPeriod())));
+      }
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index d36c3d8c8effe..d11211da2772c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -1063,17 +1063,38 @@ public void testParDoShiftTimestampInvalid() {
 
     p.apply(Create.of(Arrays.asList(3, 42, 6)))
         .apply(ParDo.of(new TestOutputTimestampDoFn()))
-        .apply(ParDo.of(new TestShiftTimestampDoFn(Duration.millis(1000),
+        .apply(ParDo.of(new TestShiftTimestampDoFn(Duration.millis(1000), // allowed skew = 1 second
+                                                   Duration.millis(-1001))))
+        .apply(ParDo.of(new TestFormatTimestampDoFn()));
+
+    thrown.expect(RuntimeException.class);
+    thrown.expectMessage("Cannot output with timestamp");
+    thrown.expectMessage(
+        "Output timestamps must be no earlier than the timestamp of the current input");
+    thrown.expectMessage("minus the allowed skew (1 second).");
+    p.run();
+  }
+
+  @Test
+  public void testParDoShiftTimestampInvalidZeroAllowed() {
+    Pipeline p = TestPipeline.create();
+
+    p.apply(Create.of(Arrays.asList(3, 42, 6)))
+        .apply(ParDo.of(new TestOutputTimestampDoFn()))
+        .apply(ParDo.of(new TestShiftTimestampDoFn(Duration.ZERO,
                                                    Duration.millis(-1001))))
         .apply(ParDo.of(new TestFormatTimestampDoFn()));
 
     thrown.expect(RuntimeException.class);
-    thrown.expectMessage("allowed maximum skew");
+    thrown.expectMessage("Cannot output with timestamp");
+    thrown.expectMessage(
+        "Output timestamps must be no earlier than the timestamp of the current input");
+    thrown.expectMessage("minus the allowed skew (0 milliseconds).");
     p.run();
   }
 
   private static class Checker implements SerializableFunction<Iterable<String>, Void> {
-    private static long serialVersionUID = 0L;
+    private static final long serialVersionUID = 0L;
     @Override
     public Void apply(Iterable<String> input) {
       boolean foundStart = false;

From ac51a3c1f669dc339b014fa905298e898b31fe92 Mon Sep 17 00:00:00 2001
From: hurwitz <hurwitz@google.com>
Date: Thu, 30 Jul 2015 13:55:01 -0700
Subject: [PATCH 0843/1541] Two trivial fixes to comments in
 MinimalWordCount.java

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99510311
---
 .../google/cloud/dataflow/examples/MinimalWordCount.java    | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
index bc434e7c6b8ff..6d7e9273a28f9 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
@@ -58,8 +58,8 @@ public class MinimalWordCount {
 
   public static void main(String[] args) {
     // Create a DataflowPipelineOptions object. This object lets us set various execution
-    // options for our pipeline, such as the associated Cloud Platform project and a location in
-    // Google Cloud Storage to stage files.
+    // options for our pipeline, such as the associated Cloud Platform project and the location
+    // in Google Cloud Storage to stage files.
     DataflowPipelineOptions options = PipelineOptionsFactory.create()
       .as(DataflowPipelineOptions.class);
     options.setRunner(BlockingDataflowPipelineRunner.class);
@@ -77,7 +77,7 @@ public static void main(String[] args) {
     // of input text files. TextIO.Read returns a PCollection where each element is one line from
     // the input text (a set of Shakespeare's texts).
     p.apply(TextIO.Read.from("gs://dataflow-samples/shakespeare/*"))
-     // Concept #2:  Apply a ParDo transform to our PCollection of text lines. This ParDo invokes a
+     // Concept #2: Apply a ParDo transform to our PCollection of text lines. This ParDo invokes a
      // DoFn (defined in-line) on each element that tokenizes the text line into individual words.
      // The ParDo returns a PCollection<String>, where each element is an individual word in
      // Shakespeare's collected texts.

From e9f687d61f33f1942437e6f8afe5e17073c10917 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 30 Jul 2015 13:55:12 -0700
Subject: [PATCH 0844/1541] Improve streaming sdk integration tests

Make streaming sdk integration tests fail quickly if either the service or the
pipeline has an error

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99510332
---
 .../testing/TestDataflowPipelineRunner.java   | 91 ++++++++++++-------
 .../TestDataflowPipelineRunnerTest.java       | 79 +++++++++++++++-
 2 files changed, 135 insertions(+), 35 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
index 1c378eff22f7e..9fff070f884c3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.testing;
 
+import com.google.api.services.dataflow.model.JobMessage;
 import com.google.api.services.dataflow.model.JobMetrics;
 import com.google.api.services.dataflow.model.MetricUpdate;
 import com.google.cloud.dataflow.sdk.Pipeline;
@@ -38,6 +39,7 @@
 
 import java.io.IOException;
 import java.math.BigDecimal;
+import java.util.List;
 import java.util.concurrent.Callable;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
@@ -111,14 +113,32 @@ public Optional<Boolean> call() throws Exception {
             }
           }
         });
-        job.waitToFinish(-1L, TimeUnit.SECONDS, messageHandler);
+        State finalState = job.waitToFinish(10L, TimeUnit.MINUTES, new JobMessagesHandler() {
+            @Override
+            public void process(List<JobMessage> messages) {
+              messageHandler.process(messages);
+              for (JobMessage message : messages) {
+                if (message.getMessageImportance() != null
+                    && message.getMessageImportance().equals("JOB_MESSAGE_ERROR")) {
+                  LOG.info("Dataflow job {} threw exception, cancelling. Exception was: {}",
+                      job.getJobId(), message.getMessageText());
+                  try {
+                    job.cancel();
+                  } catch (Exception e) {
+                    throw Throwables.propagate(e);
+                  }
+                }
+              }
+            }
+          });
+        if (finalState == null || finalState == State.RUNNING) {
+          LOG.info("Dataflow job {} took longer than 10 minutes to complete, cancelling.",
+              job.getJobId());
+          job.cancel();
+        }
         result = resultFuture.get();
       } else {
         job.waitToFinish(-1, TimeUnit.SECONDS, messageHandler);
-        if (job.getState() != State.DONE) {
-          // TODO: Get an exception from the remote service.
-          throw new IllegalStateException("The dataflow failed.");
-        }
         result = checkForSuccess(job);
       }
       if (!result.isPresent()) {
@@ -147,42 +167,49 @@ public <OutputT extends POutput, InputT extends PInput> OutputT apply(
 
   Optional<Boolean> checkForSuccess(DataflowPipelineJob job)
       throws IOException {
+    State state = job.getState();
+    if (state == State.FAILED || state == State.CANCELLED) {
+      LOG.info("The pipeline failed");
+      return Optional.of(false);
+    }
+
     JobMetrics metrics = job.getDataflowClient().projects().jobs()
         .getMetrics(job.getProjectId(), job.getJobId()).execute();
 
     if (metrics == null || metrics.getMetrics() == null) {
       LOG.warn("Metrics not present for Dataflow job {}.", job.getJobId());
-      return Optional.absent();
-    }
-
-    int successes = 0;
-    int failures = 0;
-    for (MetricUpdate metric : metrics.getMetrics()) {
-      if (metric.getName() == null || metric.getName().getContext() == null
-          || !metric.getName().getContext().containsKey(TENTATIVE_COUNTER)) {
-        // Don't double count using the non-tentative version of the metric.
-        continue;
+    } else {
+      int successes = 0;
+      int failures = 0;
+      for (MetricUpdate metric : metrics.getMetrics()) {
+        if (metric.getName() == null || metric.getName().getContext() == null
+            || !metric.getName().getContext().containsKey(TENTATIVE_COUNTER)) {
+          // Don't double count using the non-tentative version of the metric.
+          continue;
+        }
+        if (DataflowAssert.SUCCESS_COUNTER.equals(metric.getName().getName())) {
+          successes += ((BigDecimal) metric.getScalar()).intValue();
+        } else if (DataflowAssert.FAILURE_COUNTER.equals(metric.getName().getName())) {
+          failures += ((BigDecimal) metric.getScalar()).intValue();
+        }
       }
-      if (DataflowAssert.SUCCESS_COUNTER.equals(metric.getName().getName())) {
-        successes += ((BigDecimal) metric.getScalar()).intValue();
-      } else if (DataflowAssert.FAILURE_COUNTER.equals(metric.getName().getName())) {
-        failures += ((BigDecimal) metric.getScalar()).intValue();
+
+      if (failures > 0) {
+        LOG.info("Found result while running Dataflow job {}. Found {} success, {} failures out of "
+            + "{} expected assertions.", job.getJobId(), successes, failures,
+            expectedNumberOfAssertions);
+        return Optional.of(false);
+      } else if (successes >= expectedNumberOfAssertions) {
+        LOG.info("Found result while running Dataflow job {}. Found {} success, {} failures out of "
+            + "{} expected assertions.", job.getJobId(), successes, failures,
+            expectedNumberOfAssertions);
+        return Optional.of(true);
       }
-    }
 
-    if (failures > 0) {
-      LOG.info("Found result while running Dataflow job {}. Found {} success, {} failures out of "
-          + "{} expected assertions.", job.getJobId(), successes, failures,
-          expectedNumberOfAssertions);
-      return Optional.of(false);
-    } else if (successes >= expectedNumberOfAssertions) {
-      LOG.info("Found result while running Dataflow job {}. Found {} success, {} failures out of "
-          + "{} expected assertions.", job.getJobId(), successes, failures,
-          expectedNumberOfAssertions);
-      return Optional.of(true);
+      LOG.info("Running Dataflow job {}. Found {} success, {} failures out of {} expected "
+          + "assertions.", job.getJobId(), successes, failures, expectedNumberOfAssertions);
     }
-    LOG.info("Running Dataflow job {}. Found {} success, {} failures out of {} expected "
-        + "assertions.", job.getJobId(), successes, failures, expectedNumberOfAssertions);
+
     return Optional.<Boolean>absent();
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunnerTest.java
index 278d1b364ec7f..d39ef2e4ae8b7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunnerTest.java
@@ -20,6 +20,9 @@
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.anyString;
 import static org.mockito.Mockito.doCallRealMethod;
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
 import com.google.api.client.http.LowLevelHttpResponse;
@@ -28,6 +31,7 @@
 import com.google.api.client.testing.http.MockLowLevelHttpRequest;
 import com.google.api.client.testing.http.MockLowLevelHttpResponse;
 import com.google.api.services.dataflow.Dataflow;
+import com.google.api.services.dataflow.model.JobMessage;
 import com.google.api.services.dataflow.model.JobMetrics;
 import com.google.api.services.dataflow.model.MetricStructuredName;
 import com.google.api.services.dataflow.model.MetricUpdate;
@@ -38,14 +42,18 @@
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.util.GcsUtil;
+import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
+import com.google.cloud.dataflow.sdk.util.MonitoringUtil.JobMessagesHandler;
 import com.google.cloud.dataflow.sdk.util.NoopPathValidator;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
+import com.google.cloud.dataflow.sdk.util.TimeUtil;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.Optional;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
 
+import org.joda.time.Instant;
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
@@ -55,8 +63,12 @@
 import org.mockito.Mock;
 import org.mockito.Mockito;
 import org.mockito.MockitoAnnotations;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
 
 import java.math.BigDecimal;
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
 
 /** Tests for {@link TestDataflowPipelineRunner}. */
 @RunWith(JUnit4.class)
@@ -184,7 +196,8 @@ public void testRunStreamingJobThatFails() throws Exception {
 
   @Test
   public void testCheckingForSuccessWhenDataflowAssertSucceeds() throws Exception {
-    DataflowPipelineJob job = new DataflowPipelineJob("test-project", "test-job", service, null);
+    DataflowPipelineJob job =
+        spy(new DataflowPipelineJob("test-project", "test-job", service, null));
     Pipeline p = TestPipeline.create(options);
     PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
     DataflowAssert.that(pc).containsInAnyOrder(1, 2, 3);
@@ -192,12 +205,14 @@ public void testCheckingForSuccessWhenDataflowAssertSucceeds() throws Exception
     TestDataflowPipelineRunner runner = (TestDataflowPipelineRunner) p.getRunner();
     when(request.execute()).thenReturn(
         generateMockMetricResponse(true /* success */, true /* tentative */));
+    doReturn(State.DONE).when(job).getState();
     assertEquals(Optional.of(true), runner.checkForSuccess(job));
   }
 
   @Test
   public void testCheckingForSuccessWhenDataflowAssertFails() throws Exception {
-    DataflowPipelineJob job = new DataflowPipelineJob("test-project", "test-job", service, null);
+    DataflowPipelineJob job =
+        spy(new DataflowPipelineJob("test-project", "test-job", service, null));
     Pipeline p = TestPipeline.create(options);
     PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
     DataflowAssert.that(pc).containsInAnyOrder(1, 2, 3);
@@ -205,12 +220,14 @@ public void testCheckingForSuccessWhenDataflowAssertFails() throws Exception {
     TestDataflowPipelineRunner runner = (TestDataflowPipelineRunner) p.getRunner();
     when(request.execute()).thenReturn(
         generateMockMetricResponse(false /* success */, true /* tentative */));
+    doReturn(State.DONE).when(job).getState();
     assertEquals(Optional.of(false), runner.checkForSuccess(job));
   }
 
   @Test
   public void testCheckingForSuccessSkipsNonTentativeMetrics() throws Exception {
-    DataflowPipelineJob job = new DataflowPipelineJob("test-project", "test-job", service, null);
+    DataflowPipelineJob job =
+        spy(new DataflowPipelineJob("test-project", "test-job", service, null));
     Pipeline p = TestPipeline.create(options);
     PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
     DataflowAssert.that(pc).containsInAnyOrder(1, 2, 3);
@@ -218,6 +235,7 @@ public void testCheckingForSuccessSkipsNonTentativeMetrics() throws Exception {
     TestDataflowPipelineRunner runner = (TestDataflowPipelineRunner) p.getRunner();
     when(request.execute()).thenReturn(
         generateMockMetricResponse(true /* success */, false /* tentative */));
+    doReturn(State.RUNNING).when(job).getState();
     assertEquals(Optional.absent(), runner.checkForSuccess(job));
   }
 
@@ -241,4 +259,59 @@ private LowLevelHttpResponse generateMockMetricResponse(boolean success, boolean
     response.setContent(jobMetrics.toPrettyString());
     return response;
   }
+
+  @Test
+  public void testStreamingPipelineFailsIfServiceFails() throws Exception {
+    DataflowPipelineJob job =
+        spy(new DataflowPipelineJob("test-project", "test-job", service, null));
+    Pipeline p = TestPipeline.create(options);
+    PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
+    DataflowAssert.that(pc).containsInAnyOrder(1, 2, 3);
+
+    TestDataflowPipelineRunner runner = (TestDataflowPipelineRunner) p.getRunner();
+    when(request.execute()).thenReturn(
+        generateMockMetricResponse(true /* success */, false /* tentative */));
+    doReturn(State.FAILED).when(job).getState();
+    assertEquals(Optional.of(false), runner.checkForSuccess(job));
+  }
+
+  @Test
+  public void testStreamingPipelineFailsIfException() throws Exception {
+    expectedException.expect(IllegalStateException.class);
+    expectedException.expectMessage("The dataflow failed.");
+
+    options.setStreaming(true);
+    Pipeline p = TestPipeline.create(options);
+    PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
+    DataflowAssert.that(pc).containsInAnyOrder(1, 2, 3);
+
+    DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
+    when(mockJob.getDataflowClient()).thenReturn(service);
+    when(mockJob.getState()).thenReturn(State.RUNNING);
+    when(mockJob.getProjectId()).thenReturn("test-project");
+    when(mockJob.getJobId()).thenReturn("test-job");
+    when(mockJob.waitToFinish(any(Long.class), any(TimeUnit.class), any(JobMessagesHandler.class)))
+        .thenAnswer(new Answer<State>() {
+          @Override
+          public State answer(InvocationOnMock invocation) {
+            JobMessage message = new JobMessage();
+            message.setMessageText("FooException");
+            message.setTime(TimeUtil.toCloudTime(Instant.now()));
+            message.setMessageImportance("JOB_MESSAGE_ERROR");
+            ((MonitoringUtil.JobMessagesHandler) invocation.getArguments()[2])
+                .process(Arrays.asList(message));
+            return State.CANCELLED;
+          }
+        });
+
+    DataflowPipelineRunner mockRunner = Mockito.mock(DataflowPipelineRunner.class);
+    when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);
+
+    when(request.execute()).thenReturn(
+        generateMockMetricResponse(false /* success */, true /* tentative */));
+    TestDataflowPipelineRunner runner = (TestDataflowPipelineRunner) p.getRunner();
+    runner.run(p, mockRunner);
+
+    verify(mockJob).cancel();
+  }
 }

From af285e4875efe6127770e51b6f54c93b235a93fb Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 30 Jul 2015 16:30:56 -0700
Subject: [PATCH 0845/1541] WindowedWordCount: cleanup concept numbering

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99525465
---
 .../cloud/dataflow/examples/WindowedWordCount.java       | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
index 45ba4c704be1a..db36701b86e8b 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
@@ -111,8 +111,8 @@ public class WindowedWordCount {
     static final int WINDOW_SIZE = 1;  // Default window duration in minutes
 
   /**
-   * A DoFn that sets the data element timestamp. This is a silly method, just for this example,
-   * for the bounded data case.
+   * Concept #2: A DoFn that sets the data element timestamp. This is a silly method, just for
+   * this example, for the bounded data case.
    *
    * <p> Imagine that many ghosts of Shakespeare are all typing madly at the same time to recreate
    * his masterworks. Each line of the corpus will get a random associated timestamp somewhere in a
@@ -162,10 +162,9 @@ private static TableSchema getSchema() {
   }
 
   /**
-   * Concept #5: We'll stream the results to a BigQuery table. The BigQuery output source is one
+   * Concept #6: We'll stream the results to a BigQuery table. The BigQuery output source is one
    * that supports both bounded and unbounded data. This is a helper method that creates a
    * TableReference from input options, to tell the pipeline where to write its BigQuery results.
-   *
    */
   private static TableReference getTableReference(Options options) {
     TableReference tableRef = new TableReference();
@@ -222,7 +221,7 @@ public static void main(String[] args) throws IOException {
       /** Else, this is a bounded pipeline. Read from the GCS file. */
       input = pipeline
           .apply(TextIO.Read.from(options.getInputFile()))
-          // Then-- to show windowing-- add an element timestamp, using an artificial time.
+          // Concept #2: Add an element timestamp, using an artificial time just to show windowing.
           // See AddTimestampFn for more detail on this.
           .apply(ParDo.of(new AddTimestampFn()));
     }

From 2725c642008538e9661511e0738f3af6e2f1f926 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 30 Jul 2015 17:26:07 -0700
Subject: [PATCH 0846/1541] Give early errors when using unsupported bounded
 sources in streaming

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99530302
---
 .../sdk/runners/DataflowPipelineRunner.java   | 88 +++++++++++++++++--
 .../runners/DataflowPipelineRunnerTest.java   | 70 +++++++++++++++
 2 files changed, 152 insertions(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index ac0cad1e46834..81d353d01d8d6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.runners;
 
+import static com.google.cloud.dataflow.sdk.util.StringUtils.approximatePTransformName;
 import static com.google.cloud.dataflow.sdk.util.StringUtils.approximateSimpleName;
 
 import com.google.api.client.googleapis.json.GoogleJsonResponseException;
@@ -30,8 +31,11 @@
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.io.AvroIO;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.io.Read;
+import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
@@ -212,6 +216,12 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
         .put(Write.Bound.class, StreamingWrite.class)
         .put(PubsubIO.Write.Bound.class, StreamingPubsubIOWrite.class)
         .put(Read.Unbounded.class, StreamingUnboundedRead.class)
+        .put(Read.Bounded.class, StreamingUnsupportedIO.class)
+        .put(AvroIO.Read.Bound.class, StreamingUnsupportedIO.class)
+        .put(AvroIO.Write.Bound.class, StreamingUnsupportedIO.class)
+        .put(BigQueryIO.Read.Bound.class, StreamingUnsupportedIO.class)
+        .put(TextIO.Read.Bound.class, StreamingUnsupportedIO.class)
+        .put(TextIO.Write.Bound.class, StreamingUnsupportedIO.class)
         .build();
   }
 
@@ -484,7 +494,7 @@ private static class StreamingUnboundedRead<T> extends PTransform<PInput, PColle
     /**
      * Builds an instance of this class from the overridden transform.
      */
-    @SuppressWarnings("unused") // used via reflection in apply()
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
     public StreamingUnboundedRead(Read.Unbounded<T> transform) {
       this.source = transform.getSource();;
     }
@@ -604,7 +614,7 @@ private static class StreamingCreate<T> extends PTransform<PInput, PCollection<T
     /**
      * Builds an instance of this class from the overridden transform.
      */
-    @SuppressWarnings("unused") // used via reflection in apply()
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
     public StreamingCreate(Create.Values<T> transform) {
       this.transform = transform;
     }
@@ -691,7 +701,7 @@ private static class StreamingViewAsMap<K, V>
       extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, V>>> {
     private static final long serialVersionUID = 0L;
 
-    @SuppressWarnings("unused") // used via reflection in apply()
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
     public StreamingViewAsMap(View.AsMap<K, V> transform) { }
 
     @Override
@@ -724,7 +734,7 @@ private static class StreamingViewAsMultimap<K, V>
     /**
      * Builds an instance of this class from the overridden transform.
      */
-    @SuppressWarnings("unused") // used via reflection in apply()
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
     public StreamingViewAsMultimap(View.AsMultimap<K, V> transform) { }
 
     @Override
@@ -758,7 +768,7 @@ private static class StreamingViewAsIterable<T>
     /**
      * Builds an instance of this class from the overridden transform.
      */
-    @SuppressWarnings("unused") // used via reflection in apply()
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
     public StreamingViewAsIterable(View.AsIterable<T> transform) { }
 
     @Override
@@ -802,7 +812,7 @@ private static class StreamingViewAsSingleton<T>
     /**
      * Builds an instance of this class from the overridden transform.
      */
-    @SuppressWarnings("unused") // used via reflection in apply()
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
     public StreamingViewAsSingleton(View.AsSingleton<T> transform) {
       this.transform = transform;
     }
@@ -827,6 +837,72 @@ protected String getKindString() {
     }
   }
 
+  /**
+   * Specialized expansion for unsupported IO transforms that throws an error.
+   */
+  private static class StreamingUnsupportedIO<InputT extends PInput, OutputT extends POutput>
+      extends PTransform<InputT, OutputT> {
+    private static final long serialVersionUID = 0L;
+
+    private PTransform<?, ?> transform;
+
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+    public StreamingUnsupportedIO(AvroIO.Read.Bound<?> transform) {
+      this.transform = transform;
+    }
+
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+    public StreamingUnsupportedIO(BigQueryIO.Read.Bound transform) {
+      this.transform = transform;
+    }
+
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+    public StreamingUnsupportedIO(TextIO.Read.Bound<?> transform) {
+      this.transform = transform;
+    }
+
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+    public StreamingUnsupportedIO(Read.Bounded<?> transform) {
+      this.transform = transform;
+    }
+
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+    public StreamingUnsupportedIO(AvroIO.Write.Bound<?> transform) {
+      this.transform = transform;
+    }
+
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+    public StreamingUnsupportedIO(TextIO.Write.Bound<?> transform) {
+      this.transform = transform;
+    }
+
+    @Override
+    public OutputT apply(InputT input) {
+      throw new UnsupportedOperationException(
+          "The DataflowPipelineRunner in streaming mode does not support " +
+          approximatePTransformName(transform.getClass()));
+    }
+
+  }
+
   @Override
   public String toString() {
     return "DataflowPipelineRunner#" + options.getJobName();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index bb2c0488d4e25..5f1357a88db54 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -37,8 +37,12 @@
 import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.io.AvroIO;
+import com.google.cloud.dataflow.sdk.io.AvroSource;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.Create;
@@ -51,6 +55,8 @@
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PDone;
+import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.cloud.dataflow.sdk.values.PValue;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.common.collect.ImmutableList;
@@ -723,4 +729,68 @@ public void testToString() {
     assertEquals("DataflowPipelineRunner#TestJobName",
         DataflowPipelineRunner.fromOptions(options).toString());
   }
+
+  private static PipelineOptions makeStreamingOptions() {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setRunner(DataflowPipelineRunner.class);
+    options.setStreaming(true);
+    options.setJobName("TestJobName");
+    options.setProject("TestProject");
+    options.setTempLocation("gs://test/temp/location");
+    options.setGcpCredential(new TestCredential());
+    options.setPathValidatorClass(NoopPathValidator.class);
+    return options;
+  }
+
+  private void testUnsupportedSource(PTransform<PInput, ?> source, String name) throws Exception {
+    thrown.expect(UnsupportedOperationException.class);
+    thrown.expectMessage(
+        "The DataflowPipelineRunner in streaming mode does not support " + name);
+
+    Pipeline p = Pipeline.create(makeStreamingOptions());
+    p.apply(source);
+    p.run();
+  }
+
+  @Test
+  public void testBoundedSourceUnsupportedInStreaming() throws Exception {
+    testUnsupportedSource(AvroSource.readFromFileWithClass("foo", String.class), "Read.Bounded");
+  }
+
+  @Test
+  public void testBigQueryIOSourceUnsupportedInStreaming() throws Exception {
+    testUnsupportedSource(
+        BigQueryIO.Read.from("project:bar.baz").withoutValidation(), "BigQueryIO.Read");
+  }
+
+  @Test
+  public void testAvroIOSourceUnsupportedInStreaming() throws Exception {
+    testUnsupportedSource(AvroIO.Read.from("foo"), "AvroIO.Read");
+  }
+
+  @Test
+  public void testTextIOSourceUnsupportedInStreaming() throws Exception {
+    testUnsupportedSource(TextIO.Read.from("foo"), "TextIO.Read");
+  }
+
+  private void testUnsupportedSink(PTransform<PCollection<String>, PDone> sink, String name)
+      throws Exception {
+    thrown.expect(UnsupportedOperationException.class);
+    thrown.expectMessage(
+        "The DataflowPipelineRunner in streaming mode does not support " + name);
+
+    Pipeline p = Pipeline.create(makeStreamingOptions());
+    p.apply(Create.of("foo")).apply(sink);
+    p.run();
+  }
+
+  @Test
+  public void testAvroIOSinkUnsupportedInStreaming() throws Exception {
+    testUnsupportedSink(AvroIO.Write.to("foo").withSchema(String.class), "AvroIO.Write");
+  }
+
+  @Test
+  public void testTextIOSinkUnsupportedInStreaming() throws Exception {
+    testUnsupportedSink(TextIO.Write.to("foo"), "TextIO.Write");
+  }
 }

From 43ace3bb1dafd914b3c28343b576d394b36ed0a6 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 30 Jul 2015 17:54:11 -0700
Subject: [PATCH 0847/1541] Fixes bugs in hot-key combining

Skips cold path in hot key combining when we know it will be unused and properly handles accumulation mode.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99532301
---
 .../dataflow/sdk/transforms/Combine.java      | 60 +++++++++++++------
 .../dataflow/sdk/transforms/CombineTest.java  | 32 ++++++++++
 2 files changed, 74 insertions(+), 18 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index d2fefc2d0eb13..5f5c38f90a196 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -31,6 +31,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.CounterProvider;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -1618,7 +1619,7 @@ public PerKey<K, InputT, OutputT> named(String name) {
      */
     public PerKeyWithHotKeyFanout<K, InputT, OutputT> withHotKeyFanout(
         SerializableFunction<? super K, Integer> hotKeyFanout) {
-      return new PerKeyWithHotKeyFanout<K, InputT, OutputT>(name, fn, hotKeyFanout);
+      return new PerKeyWithHotKeyFanout<K, InputT, OutputT>(name, fn, true, hotKeyFanout);
     }
 
     /**
@@ -1626,7 +1627,7 @@ public PerKeyWithHotKeyFanout<K, InputT, OutputT> withHotKeyFanout(
      * constant value for every key.
      */
     public PerKeyWithHotKeyFanout<K, InputT, OutputT> withHotKeyFanout(final int hotKeyFanout) {
-      return withHotKeyFanout(
+      return new PerKeyWithHotKeyFanout<K, InputT, OutputT>(name, fn, false,
           new SerializableFunction<K, Integer>(){
             @Override
             public Integer apply(K unused) {
@@ -1657,13 +1658,16 @@ public static class PerKeyWithHotKeyFanout<K, InputT, OutputT>
       extends PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> {
 
     private final transient KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn;
+    private final boolean hasColdKeys;
     private final SerializableFunction<? super K, Integer> hotKeyFanout;
 
     private PerKeyWithHotKeyFanout(String name,
         KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn,
+        boolean hasColdKeys,
         SerializableFunction<? super K, Integer> hotKeyFanout) {
       super(name);
       this.fn = fn;
+      this.hasColdKeys = hasColdKeys;
       this.hotKeyFanout = hotKeyFanout;
     }
 
@@ -1768,7 +1772,7 @@ public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<K> keyCod
       final TupleTag<KV<K, InputT>> cold = new TupleTag<>();
       PCollectionTuple split = input.apply(
           ParDo.named("AddNonce").of(
-              new DoFn<KV<K, InputT>, KV<K, InputT>>() {
+              new DoFn<KV<K, InputT>, KV<KV<K, Integer>, InputT>>() {
                 transient int counter;
                 @Override
                 public void startBundle(Context c) {
@@ -1779,22 +1783,31 @@ public void startBundle(Context c) {
                 @Override
                 public void processElement(ProcessContext c) {
                   KV<K, InputT> kv = c.element();
-                  int spread = hotKeyFanout.apply(kv.getKey());
-                  if (spread <= 1) {
-                    c.output(kv);
+                  int spread = Math.max(1, hotKeyFanout.apply(kv.getKey()));
+                  if (hasColdKeys && spread <= 1) {
+                    c.sideOutput(cold, kv);
                   } else {
                     int nonce = counter++ % spread;
-                    c.sideOutput(hot, KV.of(KV.of(kv.getKey(), nonce), kv.getValue()));
+                    c.output(KV.of(KV.of(kv.getKey(), nonce), kv.getValue()));
                   }
                 }
               })
-          .withOutputTags(cold, TupleTagList.of(hot)));
+          .withOutputTags(hot, hasColdKeys ? TupleTagList.of(cold) : TupleTagList.empty()));
+
+      // The first level of combine should never use accumulating mode.
+      WindowingStrategy<?, ?> preCombineStrategy = input.getWindowingStrategy();
+      if (preCombineStrategy.getMode()
+          == WindowingStrategy.AccumulationMode.ACCUMULATING_FIRED_PANES) {
+        preCombineStrategy = preCombineStrategy.withMode(
+            WindowingStrategy.AccumulationMode.DISCARDING_FIRED_PANES);
+      }
 
       // Combine the hot and cold keys separately.
-      PCollection<KV<K, OutputT>> combinedHot = split
+      PCollection<KV<K, AccumT>> intermediateHot = split
           .get(hot)
           .setCoder(KvCoder.of(KvCoder.of(inputCoder.getKeyCoder(), VarIntCoder.of()),
                                inputCoder.getValueCoder()))
+          .setWindowingStrategyInternal(preCombineStrategy)
           .apply("PreCombineHot", Combine.perKey(hotPreCombine))
           .apply(ParDo.named("StripNonce").of(
               new DoFn<KV<KV<K, Integer>, AccumT>, KV<K, AccumT>>() {
@@ -1803,16 +1816,27 @@ public void processElement(ProcessContext c) {
                   c.output(KV.of(c.element().getKey().getKey(), c.element().getValue()));
                 }
               }))
-          .apply(Window.<KV<K, AccumT>>remerge())
+          .apply(Window.<KV<K, AccumT>>remerge());
+
+      // The final combine should use the originally-requested accumulation mode.
+      WindowingStrategy<?, ?> postCombineStrategy =
+          intermediateHot.getWindowingStrategy().withMode(input.getWindowingStrategy().getMode());
+
+      PCollection<KV<K, OutputT>> combinedHot = intermediateHot
+          .setWindowingStrategyInternal(postCombineStrategy)
           .apply("PostCombineHot", Combine.perKey(hotPostCombine));
-      PCollection<KV<K, OutputT>> combinedCold = split
-          .get(cold)
-          .setCoder(inputCoder)
-          .apply("CombineCold", Combine.perKey(fn));
-
-      // Return the union of the hot and cold key results.
-      return PCollectionList.of(combinedHot).and(combinedCold)
-          .apply(Flatten.<KV<K, OutputT>>pCollections());
+      if (hasColdKeys) {
+        PCollection<KV<K, OutputT>> combinedCold = split
+            .get(cold)
+            .setCoder(inputCoder)
+            .apply("CombineCold", Combine.perKey(fn));
+
+        // Return the union of the hot and cold key results.
+        return PCollectionList.of(combinedHot).and(combinedCold)
+            .apply(Flatten.<KV<K, OutputT>>pCollections());
+      } else {
+        return combinedHot;
+      }
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 8bbf0c2794d8b..7b66d415207a8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -40,7 +40,10 @@
 import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
@@ -351,6 +354,35 @@ public void testHotKeyCombining() {
     p.run();
   }
 
+  private static class GetLast extends DoFn<Integer, Integer> {
+    private static final long serialVersionUID = 0L;
+    @Override
+    public void processElement(ProcessContext c) {
+      if (c.pane().isLast()) {
+        c.output(c.element());
+      }
+    }
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testHotKeyCombiningWithAccumulationMode() {
+    Pipeline p = TestPipeline.create();
+    PCollection<Integer> input = p.apply(Create.of(1, 2, 3, 4, 5));
+
+    PCollection<Integer> output = input
+        .apply(Window.<Integer>into(new GlobalWindows())
+            .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1)))
+            .accumulatingFiredPanes()
+            .withAllowedLateness(new Duration(0)))
+        .apply(Sum.integersGlobally().withoutDefaults().withFanout(2))
+        .apply(ParDo.of(new GetLast()));
+
+    DataflowAssert.that(output).containsInAnyOrder(15);
+
+    p.run();
+  }
+
   @Test
   public void testBinaryCombineFn() {
     Pipeline p = TestPipeline.create();

From 2ba3efcd4c4c794ebbfffccb9c30e91b254be09b Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 30 Jul 2015 19:55:09 -0700
Subject: [PATCH 0848/1541] Improve choice of number of unbounded source splits

Now that numWorkers is not set by default, provide a default desired number of UnboundedSource splits

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99538617
---
 .../dataflow/BasicSerializableSourceFormat.java   | 15 +++++++++++++--
 .../BasicSerializableSourceFormatTest.java        |  3 +--
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index 34fd377062080..a0128a2efee75 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -326,6 +326,16 @@ public static Source<?> deserializeFromCloudSource(Map<String, Object> spec) thr
     return source;
   }
 
+  private static int getDesiredNumUnboundedSourceSplits(DataflowPipelineOptions options) {
+    if (options.getMaxNumWorkers() > 0) {
+      return options.getMaxNumWorkers();
+    } else if (options.getNumWorkers() > 0) {
+      return options.getNumWorkers() * 3;
+    } else {
+      return 20;
+    }
+  }
+
   public static com.google.api.services.dataflow.model.Source serializeToCloudSource(
       Source<?> source, PipelineOptions options) throws Exception {
     com.google.api.services.dataflow.model.Source cloudSource =
@@ -354,9 +364,10 @@ public static com.google.api.services.dataflow.model.Source serializeToCloudSour
       UnboundedSource<?, ?> unboundedSource = (UnboundedSource<?, ?>) source;
       metadata.setInfinite(true);
       List<String> encodedSplits = new ArrayList<>();
+      int desiredNumSplits =
+          getDesiredNumUnboundedSourceSplits(options.as(DataflowPipelineOptions.class));
       for (UnboundedSource<?, ?> split :
-          unboundedSource.generateInitialSplits(
-              options.as(DataflowPipelineOptions.class).getNumWorkers() * 2, options)) {
+          unboundedSource.generateInitialSplits(desiredNumSplits, options)) {
         encodedSplits.add(encodeBase64String(serializeToByteArray(split)));
       }
       Preconditions.checkArgument(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index cb82f0f44313f..15b8ec5d1ca97 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -608,13 +608,12 @@ private static SourceSplitResponse performSplit(
   public void testUnboundedSplits() throws Exception {
     DataflowPipelineOptions options =
         PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    options.setNumWorkers(5);
     com.google.api.services.dataflow.model.Source source =
         BasicSerializableSourceFormat.serializeToCloudSource(
             new CountingSource(Integer.MAX_VALUE), options);
     List<String> serializedSplits =
         getStrings(source.getSpec(), BasicSerializableSourceFormat.SERIALIZED_SOURCE_SPLITS, null);
-    assertEquals(10, serializedSplits.size());
+    assertEquals(20, serializedSplits.size());
     for (String serializedSplit : serializedSplits) {
       assertTrue(
           deserializeFromByteArray(decodeBase64(serializedSplit), "source")

From a94104dfe17170f048945abd03572038b2750709 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 30 Jul 2015 22:26:52 -0700
Subject: [PATCH 0849/1541] DataflowExampleUtils: fix swapped javadoc

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99545177
---
 .../examples/common/DataflowExampleUtils.java    | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
index eaf24d5aadc5b..f790d5e08c62c 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
@@ -107,13 +107,11 @@ public void setupResourcesAndRunner(boolean isUnbounded) throws IOException {
   }
 
   /**
-   * Sets up the BigQuery table with the given schema.
+   * Sets up the Google Cloud Pub/Sub topic.
    *
-   * <p> If the table already exists, the schema has to match the given one. Otherwise, the example
-   * will throw a RuntimeException. If the table doesn't exist, a new table with the given schema
-   * will be created.
+   * <p> If the topic doesn't exist, a new topic with the given name will be created.
    *
-   * @throws IOException if there is a problem setting up the BigQuery table
+   * @throws IOException if there is a problem setting up the Pub/Sub topic
    */
   public void setupPubsubTopic() throws IOException {
     ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
@@ -126,11 +124,13 @@ public void setupPubsubTopic() throws IOException {
   }
 
   /**
-   * Sets up the Google Cloud Pub/Sub topic.
+   * Sets up the BigQuery table with the given schema.
    *
-   * <p> If the topic doesn't exist, a new topic with the given name will be created.
+   * <p> If the table already exists, the schema has to match the given one. Otherwise, the example
+   * will throw a RuntimeException. If the table doesn't exist, a new table with the given schema
+   * will be created.
    *
-   * @throws IOException if there is a problem setting up the Pub/Sub topic
+   * @throws IOException if there is a problem setting up the BigQuery table
    */
   public void setupBigQueryTable() throws IOException {
     ExampleBigQueryTableOptions bigQueryTableOptions =

From 2d74c38bddfe593f0220652d0010ac9141fb3b5f Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 30 Jul 2015 22:37:23 -0700
Subject: [PATCH 0850/1541] PackageUtil: warn user on long classpath

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99545691
---
 .../cloud/dataflow/sdk/util/PackageUtil.java  | 14 ++++++++++++
 .../dataflow/sdk/util/PackageUtilTest.java    | 22 +++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
index 17c692a98b002..c48c048407591 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
@@ -52,6 +52,10 @@
 /** Helper routines for packages. */
 public class PackageUtil {
   private static final Logger LOG = LoggerFactory.getLogger(PackageUtil.class);
+  /**
+   * A reasonable upper bound on the number of jars required to launch a Dataflow job.
+   */
+  public static final int SANE_CLASSPATH_SIZE = 1000;
   /**
    * The initial interval to use between package staging attempts.
    */
@@ -113,6 +117,16 @@ static List<DataflowPackage> stageClasspathElements(
       Sleeper retrySleeper) {
     LOG.info("Uploading {} files from PipelineOptions.filesToStage to staging location to "
         + "prepare for execution.", classpathElements.size());
+
+    if (classpathElements.size() > SANE_CLASSPATH_SIZE) {
+      LOG.warn("Your classpath contains {} elements, which Google Cloud Dataflow automatically "
+          + "copies to all workers. Having this many entries on your classpath may be indicative "
+          + "of an issue in your pipeline. You may want to consider trimming the classpath to "
+          + "necessary dependencies only, using --filesToStage pipeline option to override "
+          + "what files are being staged, or bundling several dependencies into one.",
+          classpathElements.size());
+    }
+
     ArrayList<DataflowPackage> packages = new ArrayList<>();
 
     if (stagingPath == null) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
index 5e75fd65a3525..bfe05989b9641 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
@@ -48,10 +48,12 @@
 import com.google.api.services.dataflow.model.DataflowPackage;
 import com.google.cloud.dataflow.sdk.options.GcsOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
 import com.google.cloud.dataflow.sdk.testing.FastNanoClockAndSleeper;
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
 import com.google.common.io.Files;
 import com.google.common.io.LineReader;
 
@@ -81,6 +83,7 @@
 /** Tests for PackageUtil. */
 @RunWith(JUnit4.class)
 public class PackageUtilTest {
+  @Rule public ExpectedLogs logged = ExpectedLogs.none(PackageUtil.class);
   @Rule
   public TemporaryFolder tmpFolder = new TemporaryFolder();
 
@@ -188,6 +191,25 @@ public void testPackageNamingWithDirectoriesHavingSameContentsButDifferentNames(
     assertFalse(target1.getLocation().equals(target2.getLocation()));
   }
 
+  @Test
+  public void testPackageUploadWithLargeClasspathLogsWarning() throws IOException {
+    File tmpFile = tmpFolder.newFile("file.txt");
+    Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
+    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    // all files will be present and cached so no upload needed.
+    when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(tmpFile.length());
+
+    List<String> classpathElements = Lists.newLinkedList();
+    for (int i = 0; i < 1005; ++i) {
+      String eltName = "element" + i;
+      classpathElements.add(eltName + '=' + tmpFile.getAbsolutePath());
+    }
+
+    PackageUtil.stageClasspathElements(classpathElements, gcsStaging.toString());
+
+    logged.verifyWarn("Your classpath contains 1005 elements, which Google Cloud Dataflow");
+  }
+
   @Test
   public void testPackageUploadWithFileSucceeds() throws Exception {
     Pipe pipe = Pipe.open();

From b895ef7dfd78bbd2d77f8143857d62c82582f38e Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Thu, 30 Jul 2015 22:45:57 -0700
Subject: [PATCH 0851/1541] Updates custom FileBasedSource size estimation.

This change updates custom FileBasedSource to compute estimated size of file-patterns that expand into large number of files by sampling.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99546034
---
 .../dataflow/sdk/io/FileBasedSource.java      | 44 +++++++++++++++--
 .../dataflow/sdk/io/FileBasedSourceTest.java  | 47 +++++++++++++++++++
 2 files changed, 87 insertions(+), 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index f1ea42c61e5a7..557e735a8c91e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -29,6 +29,7 @@
 import java.nio.channels.SeekableByteChannel;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.List;
 import java.util.ListIterator;
 import java.util.NoSuchElementException;
@@ -56,6 +57,10 @@
 public abstract class FileBasedSource<T> extends ByteOffsetBasedSource<T> {
   private static final long serialVersionUID = 0;
   private static final Logger LOG = LoggerFactory.getLogger(FileBasedSource.class);
+  private static final float FRACTION_OF_FILES_TO_STAT = 0.01f;
+
+  // Package-private for testing
+  static final int MAX_NUMBER_OF_FILES_FOR_AN_EXACT_STAT = 100;
 
   private final String fileOrPatternSpec;
   private final Mode mode;
@@ -168,11 +173,15 @@ public final long getEstimatedSizeBytes(PipelineOptions options) throws Exceptio
       long startTime = System.currentTimeMillis();
       long totalSize = 0;
       Collection<String> inputs = factory.match(fileOrPatternSpec);
-      for (String input : inputs) {
-        totalSize += factory.getSizeBytes(input);
+      if (inputs.size() <= MAX_NUMBER_OF_FILES_FOR_AN_EXACT_STAT) {
+        totalSize = getExactTotalSizeOfFiles(inputs, factory);
+        LOG.debug("Size estimation of all files of pattern " + fileOrPatternSpec + " took "
+           + (System.currentTimeMillis() - startTime) + " ms");
+      } else {
+        totalSize = getEstimatedSizeOfFilesBySampling(inputs, factory);
+        LOG.debug("Size estimation of pattern " + fileOrPatternSpec + " by sampling took "
+           + (System.currentTimeMillis() - startTime) + " ms");
       }
-      LOG.debug("Size estimation of file pattern " + fileOrPatternSpec + " took "
-          + (System.currentTimeMillis() - startTime) + " ms");
       return totalSize;
     } else {
       long start = getStartOffset();
@@ -181,6 +190,33 @@ public final long getEstimatedSizeBytes(PipelineOptions options) throws Exceptio
     }
   }
 
+  // Get the exact total size of the given set of files.
+  private static long getExactTotalSizeOfFiles(
+      Collection<String> files, IOChannelFactory ioChannelFactory) throws IOException {
+    long totalSize = 0;
+    for (String file : files) {
+      totalSize += ioChannelFactory.getSizeBytes(file);
+    }
+    return totalSize;
+  }
+
+  // Estimate the total size of the given set of files through sampling and extrapolation.
+  // Currently we use uniform sampling which requires a linear sampling size for a reasonable
+  // estimate.
+  // TODO: Implement a more efficient sampling mechanism.
+  private static long getEstimatedSizeOfFilesBySampling(
+      Collection<String> files, IOChannelFactory ioChannelFactory) throws IOException {
+    int sampleSize = (int) (FRACTION_OF_FILES_TO_STAT * files.size());
+    sampleSize = Math.max(MAX_NUMBER_OF_FILES_FOR_AN_EXACT_STAT, sampleSize);
+
+    List<String> selectedFiles = new ArrayList<String>(files);
+    Collections.shuffle(selectedFiles);
+    selectedFiles = selectedFiles.subList(0, sampleSize);
+
+    return files.size() * getExactTotalSizeOfFiles(selectedFiles, ioChannelFactory)
+        / selectedFiles.size();
+  }
+
   @Override
   public final List<? extends FileBasedSource<T>> splitIntoBundles(long desiredBundleSizeBytes,
       PipelineOptions options) throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index 15a49a0bdcef3..ffe3cfe4211b9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -765,6 +765,53 @@ public void testEstimatedSizeOfFilePattern() throws Exception {
         file1.length() + file2.length() + file3.length(), source.getEstimatedSizeBytes(null));
   }
 
+  @Test
+  public void testEstimatedSizeOfFilePatternThroughSamplingEqualSize() throws Exception {
+    // When all files are of equal size, we should get the exact size.
+    int numFilesToTest = FileBasedSource.MAX_NUMBER_OF_FILES_FOR_AN_EXACT_STAT * 2;
+    File file0 = null;
+    for (int i = 0; i < numFilesToTest; i++) {
+      List<String> data = createStringDataset(3, 20);
+      File file = createFileWithData("file" + i, data);
+      if (i == 0) {
+        file0 = file;
+      }
+    }
+
+    long actualTotalSize = file0.length() * numFilesToTest;
+    TestFileBasedSource source =
+        new TestFileBasedSource(new File(file0.getParent(), "file*").getPath(), 64, null);
+    assertEquals(actualTotalSize, source.getEstimatedSizeBytes(null));
+  }
+
+  @Test
+  public void testEstimatedSizeOfFilePatternThroughSamplingDifferentSizes() throws Exception {
+    float tolerableError = 0.2f;
+    int numFilesToTest = FileBasedSource.MAX_NUMBER_OF_FILES_FOR_AN_EXACT_STAT * 2;
+    File file0 = null;
+
+    // Keeping sizes of files close to each other to make sure that the test passes reliably.
+    Random rand = new Random(System.currentTimeMillis());
+    int dataSizeBase = 100;
+    int dataSizeDelta = 10;
+
+    long actualTotalSize = 0;
+    for (int i = 0; i < numFilesToTest; i++) {
+      List<String> data = createStringDataset(
+          3, (int) (dataSizeBase + rand.nextFloat() * dataSizeDelta * 2 - dataSizeDelta));
+      File file = createFileWithData("file" + i, data);
+      if (i == 0) {
+        file0 = file;
+      }
+      actualTotalSize += file.length();
+    }
+
+    TestFileBasedSource source =
+        new TestFileBasedSource(new File(file0.getParent(), "file*").getPath(), 64, null);
+    assertEquals((double) actualTotalSize, (double) source.getEstimatedSizeBytes(null),
+        actualTotalSize * tolerableError);
+  }
+
   @Test
   public void testReadAllSplitsOfFilePattern() throws Exception {
     PipelineOptions options = PipelineOptionsFactory.create();

From 21c3130fb91e9de5e6838c477c18814cecfe2896 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Fri, 31 Jul 2015 12:46:04 -0700
Subject: [PATCH 0852/1541] Prints an error when a custom source split exceeds
 RPC limit.

Adds a method to SourceOperationExcutor that can determine if a given OperationResponse with a split response is larger than the RPC limit of Dataflow service (pre-defined) and logs an error message.

Calls this method from DataflowWorker if a GoogleJSONResponseException gets thrown when trying to submit a WorkItem with a source operation response.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99597253
---
 .../sdk/runners/worker/DataflowWorker.java    | 18 +++++++-
 .../worker/SourceOperationExecutor.java       | 44 +++++++++++++++++++
 2 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 8b6ffa336031a..f3e8e8a5c7fb8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -16,11 +16,14 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceOperationExecutor.SPLIT_RESPONSE_TOO_LARGE_ERROR;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceOperationExecutor.isSplitResponseTooLarge;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceOperationResponseToSourceOperationResponse;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceOperationResponseToCloudSourceOperationResponse;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
 
+import com.google.api.client.googleapis.json.GoogleJsonResponseException;
 import com.google.api.services.dataflow.model.MetricUpdate;
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.api.services.dataflow.model.Status;
@@ -143,6 +146,7 @@ private boolean doWork(WorkItem workItem) throws IOException {
     LOG.debug("Executing: {}", workItem);
 
     WorkExecutor worker = null;
+    SourceFormat.OperationResponse operationResponse = null;
     long nextReportIndex = workItem.getInitialReportIndex();
     try {
       // Populate PipelineOptions with data from work unit.
@@ -185,14 +189,24 @@ private boolean doWork(WorkItem workItem) throws IOException {
       // Report job success.
       // TODO: Find out a generic way for the WorkExecutor to report work-specific results
       // into the work update.
-      SourceFormat.OperationResponse operationResponse =
+      operationResponse =
           (worker instanceof SourceOperationExecutor)
               ? cloudSourceOperationResponseToSourceOperationResponse(
                   ((SourceOperationExecutor) worker).getResponse())
               : null;
-      reportStatus(
+
+      try {
+        reportStatus(
           options, "Success", workItem, counters, metrics, operationResponse, null/*errors*/,
           nextReportIndex);
+      } catch (GoogleJsonResponseException e) {
+        if ((operationResponse != null) && (worker instanceof SourceOperationExecutor)) {
+          if (isSplitResponseTooLarge(operationResponse)) {
+            throw new RuntimeException(SPLIT_RESPONSE_TOO_LARGE_ERROR, e);
+          }
+        }
+        throw e;
+      }
 
       return true;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
index f67f27b59dcbc..ad14cb7dd4e82 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
@@ -22,20 +22,34 @@
 import com.google.api.services.dataflow.model.Source;
 import com.google.api.services.dataflow.model.SourceOperationRequest;
 import com.google.api.services.dataflow.model.SourceOperationResponse;
+import com.google.api.services.dataflow.model.SourceSplitResponse;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.DataflowSourceOperationResponse;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
+import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
 import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
+
 /**
  * An executor for a source operation, defined by a {@code SourceOperationRequest}.
  */
 @SuppressWarnings("resource")
 public class SourceOperationExecutor extends WorkExecutor {
   private static final Logger LOG = LoggerFactory.getLogger(MapTaskExecutor.class);
+  public static final String SPLIT_RESPONSE_TOO_LARGE_ERROR =
+      "Total size of the BoundedSource objects generated by splitIntoBundles() operation is larger"
+      + " than the allowable limit. For more information, please check the corresponding FAQ"
+      + " entry at :\n"
+      + "https://cloud.google.com/dataflow/faq";
+  // This limit is only used to produce an error message. Actual current limit offered by the
+  // service may be different.
+  private static final int SOURCE_OPERATION_RESPONSE_SIZE_LIMIT_MB = 20;
 
   private final PipelineOptions options;
   private final SourceOperationRequest request;
@@ -73,4 +87,34 @@ public void execute() throws Exception {
   public SourceOperationResponse getResponse() {
     return response;
   }
+
+  static boolean isSplitResponseTooLarge(SourceFormat.OperationResponse operationResponse) {
+    try {
+      if (isSplitOperationResponse(operationResponse)
+          && isSplitOperationTooLargeForDataflowService(operationResponse)) {
+        return true;
+      }
+    } catch (UnsupportedEncodingException e) {
+      LOG.error("Could not log the source operation warning due to: " + e.getMessage());
+    }
+    return false;
+  }
+
+  private static boolean isSplitOperationTooLargeForDataflowService(
+      SourceFormat.OperationResponse operationResponse) throws UnsupportedEncodingException {
+    SourceSplitResponse splitResponse =
+        ((DataflowSourceOperationResponse) operationResponse).cloudResponse.getSplit();
+    int size = splitResponse.toString().getBytes(StandardCharsets.UTF_8).length;
+    return size >= SOURCE_OPERATION_RESPONSE_SIZE_LIMIT_MB * 1024 * 1024;
+  }
+
+  private static boolean isSplitOperationResponse(
+      SourceFormat.OperationResponse operationResponse) {
+    if (operationResponse instanceof DataflowSourceOperationResponse) {
+      return (
+          ((DataflowSourceOperationResponse) operationResponse).cloudResponse.getSplit() != null);
+    }
+
+    return false;
+  }
 }

From ac9196537be1bcb872ea061be8406044d9d0411f Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 31 Jul 2015 20:01:42 -0700
Subject: [PATCH 0853/1541] Improve Object overrides in CombineTest

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99627742
---
 .../dataflow/sdk/transforms/CombineTest.java  | 32 +++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 7b66d415207a8..9b1a0850b417c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -52,6 +52,7 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.common.base.MoreObjects;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Sets;
 
@@ -72,6 +73,7 @@
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Objects;
 import java.util.Random;
 import java.util.Set;
 
@@ -584,7 +586,7 @@ public void registerByteSizeObserver(
   }
 
   /** Example AccumulatingCombineFn. */
-  public static class MeanInts extends
+  private static class MeanInts extends
       Combine.AccumulatingCombineFn<Integer, MeanInts.CountSum, Double> {
     private static final Coder<Long> LONG_CODER = BigEndianLongCoder.of();
     private static final Coder<Double> DOUBLE_CODER = DoubleCoder.of();
@@ -615,6 +617,32 @@ public void mergeAccumulator(CountSum accumulator) {
       public Double extractOutput() {
         return count == 0 ? 0.0 : sum / count;
       }
+
+      @Override
+      public int hashCode() {
+        return Objects.hash(count, sum);
+      }
+
+      @Override
+      public boolean equals(Object obj) {
+        if (obj == this) {
+          return true;
+        }
+        if (!(obj instanceof CountSum)) {
+          return false;
+        }
+        CountSum other = (CountSum) obj;
+        return this.count == other.count
+            && (Math.abs(this.sum - other.sum) < 0.1);
+      }
+
+      @Override
+      public String toString() {
+        return MoreObjects.toStringHelper(this)
+            .add("count", count)
+            .add("sum", sum)
+            .toString();
+      }
     }
 
     @Override
@@ -631,7 +659,7 @@ public Coder<CountSum> getAccumulatorCoder(
     /**
      * A {@link Coder} for {@link CountSum}.
      */
-    public class CountSumCoder extends CustomCoder<CountSum> {
+    private class CountSumCoder extends CustomCoder<CountSum> {
       @Override
       public void encode(CountSum value, OutputStream outStream,
           Context context) throws CoderException, IOException {

From 08c17438b544a2e3a319ff6147b9dd232dd2bc7c Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Fri, 31 Jul 2015 20:11:22 -0700
Subject: [PATCH 0854/1541] Restore: Remove the SDK worker defaults

----Release Notes----
Removed the default values for numWorkers, maxNumWorkers, and similar settings such that if these are unspecified, the service can pick an appropriate value.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99628117
---
 .../DataflowPipelineWorkerPoolOptions.java    |  9 ++--
 .../runners/DataflowPipelineTranslator.java   |  3 +-
 .../DataflowPipelineTranslatorTest.java       | 47 +++++++++++++++++++
 3 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index a69028d48cbfe..23bb50dd74440 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -41,8 +41,8 @@ public interface DataflowPipelineWorkerPoolOptions extends PipelineOptions {
    */
   @Description("Number of workers to use when executing the Dataflow job. Note that "
       + "selection of an autoscaling algorithm other then \"NONE\" will affect the "
-      + "size of the worker pool.")
-  @Default.Integer(3)
+      + "size of the worker pool. If left unspecified, the Dataflow service will "
+      + "determine the number of workers.")
   int getNumWorkers();
   void setNumWorkers(int value);
 
@@ -72,7 +72,6 @@ public String getAlgorithm() {
   @Description("[Experimental] The autoscaling algorithm to use for the workerpool. "
       + "NONE: does not change the size of the worker pool. "
       + "BASIC: autoscale the worker pool size up to maxNumWorkers until the job completes.")
-  @Default.Enum("NONE")
   @Experimental(Experimental.Kind.AUTOSCALING)
   AutoscalingAlgorithmType getAutoscalingAlgorithm();
   void setAutoscalingAlgorithm(AutoscalingAlgorithmType value);
@@ -81,8 +80,7 @@ public String getAlgorithm() {
    * The maximum number of workers to use when using workerpool autoscaling.
    */
   @Description("[Experimental] The maximum number of workers to use when using workerpool "
-      + "autoscaling.")
-  @Default.Integer(20)
+      + "autoscaling. If left unspecified, the Dataflow service will compute a ceiling.")
   @Experimental(Experimental.Kind.AUTOSCALING)
   int getMaxNumWorkers();
   void setMaxNumWorkers(int value);
@@ -175,7 +173,6 @@ public String getTeardownPolicyName() {
    * Specifies what type of persistent disk should be used.
    */
   @Description("Specifies what type of persistent disk should be used.")
-  @Default.String("compute.googleapis.com/projects//zones//diskTypes/pd-standard")
   String getWorkerDiskType();
   void setWorkerDiskType(String value);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 49f306810e41f..47e5e0d9a5db9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -48,7 +48,6 @@
 import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.dataflow.AvroIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BigQueryIOTranslator;
@@ -441,7 +440,7 @@ public Job translate(List<DataflowPackage> packages) {
       if (options.getDiskSizeGb() > 0) {
         workerPool.setDiskSizeGb(options.getDiskSizeGb());
       }
-      if (!options.getAutoscalingAlgorithm().equals(AutoscalingAlgorithmType.NONE)) {
+      if (options.getAutoscalingAlgorithm() != null) {
         AutoscalingSettings settings = new AutoscalingSettings();
         settings.setAlgorithm(options.getAutoscalingAlgorithm().getAlgorithm());
         settings.setMaxNumWorkers(options.getMaxNumWorkers());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 3f171bf8e81ff..943f143b6535c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -40,6 +40,7 @@
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
 import com.google.cloud.dataflow.sdk.transforms.Count;
@@ -213,6 +214,52 @@ public void testNetworkConfigMissing() throws IOException {
     assertNull(job.getEnvironment().getWorkerPools().get(0).getNetwork());
   }
 
+  @Test
+  public void testScalingAlgorithmMissing() throws IOException {
+    DataflowPipelineOptions options = buildPipelineOptions();
+
+    Pipeline p = buildPipeline(options);
+    p.traverseTopologically(new RecordingPipelineVisitor());
+    Job job =
+        DataflowPipelineTranslator.fromOptions(options)
+            .translate(p, Collections.<DataflowPackage>emptyList())
+            .getJob();
+
+    assertEquals(1, job.getEnvironment().getWorkerPools().size());
+    assertNull(
+        job
+            .getEnvironment()
+            .getWorkerPools()
+            .get(0)
+            .getAutoscalingSettings());
+  }
+
+  @Test
+  public void testScalingAlgorithmNone() throws IOException {
+    final DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType noScaling =
+        DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType.NONE;
+
+    DataflowPipelineOptions options = buildPipelineOptions();
+    options.setAutoscalingAlgorithm(noScaling);
+
+    Pipeline p = buildPipeline(options);
+    p.traverseTopologically(new RecordingPipelineVisitor());
+    Job job =
+        DataflowPipelineTranslator.fromOptions(options)
+            .translate(p, Collections.<DataflowPackage>emptyList())
+            .getJob();
+
+    assertEquals(1, job.getEnvironment().getWorkerPools().size());
+    assertEquals(
+        "AUTOSCALING_ALGORITHM_NONE",
+        job
+            .getEnvironment()
+            .getWorkerPools()
+            .get(0)
+            .getAutoscalingSettings()
+            .getAlgorithm());
+  }
+
   @Test
   public void testZoneConfig() throws IOException {
     final String testZone = "test-zone-1";

From b1b14b26b739886d7ea1eceaddebfb1be509fb55 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 31 Jul 2015 20:47:07 -0700
Subject: [PATCH 0855/1541] Improve Object overrides in WriteTest

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99629199
---
 .../dataflow/sdk/transforms/WriteTest.java    | 63 ++++++++++++++++++-
 1 file changed, 61 insertions(+), 2 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java
index 3db39a3942f66..adae6f3689e33 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java
@@ -32,10 +32,10 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest.TestPipelineOptions;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.MoreObjects;
 
 import org.joda.time.Duration;
 import org.junit.Test;
@@ -47,7 +47,9 @@
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Objects;
 import java.util.Set;
+import java.util.UUID;
 
 /**
  * Tests for the Write PTransform.
@@ -95,7 +97,7 @@ public void runWrite(List<String> inputs, boolean windowed) {
     // Flag to validate that the pipeline options are passed to the Sink
     String[] args = {"--testFlag=test_value"};
     PipelineOptions options = PipelineOptionsFactory.fromArgs(args).as(WriteOptions.class);
-    Pipeline p = TestPipeline.create(options);
+    Pipeline p = Pipeline.create(options);
 
     // Clear the sink's contents.
     sinkContents.clear();
@@ -150,6 +152,30 @@ private void assertTestFlagPresent(PipelineOptions options) {
     private boolean hasCorrectState() {
       return validateCalled && createCalled;
     }
+
+    /**
+     * Implementation of equals() that indicates all test sinks are equal.
+     */
+    @Override
+    public boolean equals(Object other) {
+      if (!(other instanceof TestSink)) {
+        return false;
+      }
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(getClass());
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(this)
+          .add("createCalled", createCalled)
+          .add("validateCalled", validateCalled)
+          .toString();
+    }
   }
 
   private static class TestSinkWriteOperation extends WriteOperation<String, TestWriterResult> {
@@ -165,6 +191,7 @@ private enum State {
     private boolean coderCalled = false;
 
     private final TestSink sink;
+    private final UUID id = UUID.randomUUID();
 
     public TestSinkWriteOperation(TestSink sink) {
       this.sink = sink;
@@ -212,6 +239,38 @@ public Coder<TestWriterResult> getWriterResultCoder() {
       coderCalled = true;
       return SerializableCoder.of(TestWriterResult.class);
     }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(this)
+          .add("id", id)
+          .add("sink", sink)
+          .add("state", state)
+          .add("coderCalled", coderCalled)
+          .toString();
+    }
+
+    /**
+     * Implementation of equals() that does not depend on the state of the write operation,
+     * but only its specification. In general, write operations will have interesting
+     * specifications, but for a {@link TestSinkWriteOperation}, it is not the case. Instead,
+     * a unique identifier (that is serialized along with it) is used to simulate such a
+     * specification.
+     */
+    @Override
+    public boolean equals(Object other) {
+      if (!(other instanceof TestSinkWriteOperation)) {
+        return false;
+      }
+      TestSinkWriteOperation otherOperation = (TestSinkWriteOperation) other;
+      return sink.equals(otherOperation.sink)
+          && id.equals(otherOperation.id);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(id, sink);
+    }
   }
 
   private static class TestWriterResult implements Serializable {

From a3b101a29c6a5a855882060d9c52f7817152e13a Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 31 Jul 2015 21:12:43 -0700
Subject: [PATCH 0856/1541] Improve Object overrides in CombineValuesTest

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99630049
---
 .../runners/worker/CombineValuesFnTest.java   | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
index 1e638b4a5a2df..0f5400316e1c1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
@@ -40,6 +40,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.Receiver;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.base.MoreObjects;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -53,6 +54,7 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Objects;
 
 /**
  * Tests for CombineValuesFn.
@@ -94,22 +96,29 @@ public CountSum(long count, double sum) {
 
       @Override
       public int hashCode() {
-        return KV.of(count, sum).hashCode();
+        return Objects.hash(count, sum);
       }
 
       @Override
       public boolean equals(Object obj) {
-        if (obj == null || !(obj instanceof CountSum)) {
-          return false;
-        }
         if (obj == this) {
           return true;
         }
-
+        if (!(obj instanceof CountSum)) {
+          return false;
+        }
         CountSum other = (CountSum) obj;
         return (this.count == other.count)
             && (Math.abs(this.sum - other.sum) < 0.1);
       }
+
+      @Override
+      public String toString() {
+        return MoreObjects.toStringHelper(this)
+            .add("count", count)
+            .add("sum", sum)
+            .toString();
+      }
     }
 
     @Override

From 63ae5139f66f3c0baded7243dd8292f7256325f9 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 31 Jul 2015 21:53:21 -0700
Subject: [PATCH 0857/1541] Clean up GABWViaIteratorsDoFn

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99631038
---
 .../GroupAlsoByWindowsViaIteratorsDoFn.java     | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
index 436db56daf196..c014f56332c9f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
@@ -24,7 +24,9 @@
 import com.google.cloud.dataflow.sdk.util.common.Reiterable;
 import com.google.cloud.dataflow.sdk.util.common.Reiterator;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.base.MoreObjects;
 import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.Iterables;
 import com.google.common.collect.ListMultimap;
 
 import org.joda.time.Instant;
@@ -101,12 +103,15 @@ public void processElement(ProcessContext c) throws Exception {
         // If this window is not already in the active set, emit a new WindowReiterable
         // corresponding to this window, starting at this element in the input Reiterable.
         if (!windows.containsEntry(window.maxTimestamp(), window)) {
+          // This window was produced by strategy.getWindowFn()
+          @SuppressWarnings("unchecked")
+          W typedWindow = (W) window;
           // Iterating through the WindowReiterable may advance iterator as an optimization
           // for as long as it detects that there are no new windows.
           windows.put(window.maxTimestamp(), window);
           c.windowingInternals().outputWindowedValue(
               KV.of(key, (Iterable<V>) new WindowReiterable<V>(iterator, window)),
-              strategy.getWindowFn().getOutputTime(e.getTimestamp(), (W) window),
+              strategy.getWindowFn().getOutputTime(e.getTimestamp(), typedWindow),
               Arrays.asList(window),
               PaneInfo.ON_TIME_AND_ONLY_FIRING);
         }
@@ -155,13 +160,9 @@ public Reiterator<V> iterator() {
 
     @Override
     public String toString() {
-      StringBuilder result = new StringBuilder();
-      result.append("WR{");
-      for (V v : this) {
-        result.append(v.toString()).append(',');
-      }
-      result.append("}");
-      return result.toString();
+      return MoreObjects.toStringHelper(this)
+          .addValue(Iterables.toString(this))
+          .toString();
     }
   }
 

From aa4f50c7b50b676f3b223bdb30eaa89d42dc0913 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Fri, 31 Jul 2015 22:15:28 -0700
Subject: [PATCH 0858/1541] Fixes bugs in hot-key combining

If a key was not consistently declared as hot or cold, some values would
end up going down the cold path and others the hot, resulting in incomplete
aggregation.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99631736
---
 .../dataflow/sdk/transforms/Combine.java      | 170 +++++++++++++-----
 .../sdk/transforms/windowing/PaneInfo.java    |  39 +---
 .../dataflow/sdk/transforms/CombineTest.java  |  11 ++
 3 files changed, 148 insertions(+), 72 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 5f5c38f90a196..2e7d35db13d6a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -24,12 +24,14 @@
 import com.google.cloud.dataflow.sdk.coders.DelegateCoder;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.CounterProvider;
@@ -45,6 +47,9 @@
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterables;
 
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
@@ -1619,7 +1624,7 @@ public PerKey<K, InputT, OutputT> named(String name) {
      */
     public PerKeyWithHotKeyFanout<K, InputT, OutputT> withHotKeyFanout(
         SerializableFunction<? super K, Integer> hotKeyFanout) {
-      return new PerKeyWithHotKeyFanout<K, InputT, OutputT>(name, fn, true, hotKeyFanout);
+      return new PerKeyWithHotKeyFanout<K, InputT, OutputT>(name, fn, hotKeyFanout);
     }
 
     /**
@@ -1627,7 +1632,7 @@ public PerKeyWithHotKeyFanout<K, InputT, OutputT> withHotKeyFanout(
      * constant value for every key.
      */
     public PerKeyWithHotKeyFanout<K, InputT, OutputT> withHotKeyFanout(final int hotKeyFanout) {
-      return new PerKeyWithHotKeyFanout<K, InputT, OutputT>(name, fn, false,
+      return new PerKeyWithHotKeyFanout<K, InputT, OutputT>(name, fn,
           new SerializableFunction<K, Integer>(){
             @Override
             public Integer apply(K unused) {
@@ -1658,16 +1663,13 @@ public static class PerKeyWithHotKeyFanout<K, InputT, OutputT>
       extends PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> {
 
     private final transient KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn;
-    private final boolean hasColdKeys;
     private final SerializableFunction<? super K, Integer> hotKeyFanout;
 
     private PerKeyWithHotKeyFanout(String name,
         KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn,
-        boolean hasColdKeys,
         SerializableFunction<? super K, Integer> hotKeyFanout) {
       super(name);
       this.fn = fn;
-      this.hasColdKeys = hasColdKeys;
       this.hotKeyFanout = hotKeyFanout;
     }
 
@@ -1699,6 +1701,9 @@ private <AccumT> PCollection<KV<K, OutputT>> applyHelper(PCollection<KV<K, Input
       } catch (CannotProvideCoderException e) {
         throw new IllegalStateException("Unable to determine accumulator coder.", e);
       }
+      Coder<InputOrAccum<InputT, AccumT>> inputOrAccumCoder =
+          new InputOrAccum.InputOrAccumCoder<InputT, AccumT>(
+              inputCoder.getValueCoder(), accumCoder);
 
       // A CombineFn's mergeAccumulator can be applied in a tree-like fashon.
       // Here we shard the key using an integer nonce, combine on that partial
@@ -1733,16 +1738,19 @@ public Coder<AccumT> getAccumulatorCoder(
             }
       };
 
-      KeyedCombineFn<K, AccumT, AccumT, OutputT> hotPostCombine =
-          new KeyedCombineFn<K, AccumT, AccumT, OutputT>() {
+      KeyedCombineFn<K, InputOrAccum<InputT, AccumT>, AccumT, OutputT> postCombine =
+          new KeyedCombineFn<K, InputOrAccum<InputT, AccumT>, AccumT, OutputT>() {
             @Override
             public AccumT createAccumulator(K key) {
               return fn.createAccumulator(key);
             }
             @Override
-            public AccumT addInput(K key, AccumT accumulator, AccumT value) {
-              return fn.mergeAccumulators(
-                  key, ImmutableList.of(accumulator, value));
+            public AccumT addInput(K key, AccumT accumulator, InputOrAccum<InputT, AccumT> value) {
+              if (value.accum == null) {
+                return fn.addInput(key, accumulator, value.input);
+              } else {
+                return fn.mergeAccumulators(key, ImmutableList.of(accumulator, value.accum));
+              }
             }
             @Override
             public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators) {
@@ -1754,14 +1762,16 @@ public OutputT extractOutput(K key, AccumT accumulator) {
             }
             @Override
             public Coder<OutputT> getDefaultOutputCoder(
-                CoderRegistry registry, Coder<K> keyCoder, Coder<AccumT> accumulatorCoder)
+                CoderRegistry registry,
+                Coder<K> keyCoder,
+                Coder<InputOrAccum<InputT, AccumT>> accumulatorCoder)
                 throws CannotProvideCoderException {
               return fn.getDefaultOutputCoder(registry, keyCoder, inputCoder.getValueCoder());
             }
 
             @Override
             public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<K> keyCoder,
-                Coder<AccumT> inputCoder) throws CannotProvideCoderException {
+                Coder<InputOrAccum<InputT, AccumT>> inputCoder) throws CannotProvideCoderException {
               return accumCoder;
             }
       };
@@ -1772,7 +1782,7 @@ public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<K> keyCod
       final TupleTag<KV<K, InputT>> cold = new TupleTag<>();
       PCollectionTuple split = input.apply(
           ParDo.named("AddNonce").of(
-              new DoFn<KV<K, InputT>, KV<KV<K, Integer>, InputT>>() {
+              new DoFn<KV<K, InputT>, KV<K, InputT>>() {
                 transient int counter;
                 @Override
                 public void startBundle(Context c) {
@@ -1784,15 +1794,15 @@ public void startBundle(Context c) {
                 public void processElement(ProcessContext c) {
                   KV<K, InputT> kv = c.element();
                   int spread = Math.max(1, hotKeyFanout.apply(kv.getKey()));
-                  if (hasColdKeys && spread <= 1) {
-                    c.sideOutput(cold, kv);
+                  if (spread <= 1) {
+                    c.output(kv);
                   } else {
                     int nonce = counter++ % spread;
-                    c.output(KV.of(KV.of(kv.getKey(), nonce), kv.getValue()));
+                    c.sideOutput(hot, KV.of(KV.of(kv.getKey(), nonce), kv.getValue()));
                   }
                 }
               })
-          .withOutputTags(hot, hasColdKeys ? TupleTagList.of(cold) : TupleTagList.empty()));
+          .withOutputTags(cold, TupleTagList.of(hot)));
 
       // The first level of combine should never use accumulating mode.
       WindowingStrategy<?, ?> preCombineStrategy = input.getWindowingStrategy();
@@ -1803,39 +1813,119 @@ public void processElement(ProcessContext c) {
       }
 
       // Combine the hot and cold keys separately.
-      PCollection<KV<K, AccumT>> intermediateHot = split
+      PCollection<KV<K, InputOrAccum<InputT, AccumT>>> precombinedHot = split
           .get(hot)
           .setCoder(KvCoder.of(KvCoder.of(inputCoder.getKeyCoder(), VarIntCoder.of()),
                                inputCoder.getValueCoder()))
           .setWindowingStrategyInternal(preCombineStrategy)
           .apply("PreCombineHot", Combine.perKey(hotPreCombine))
           .apply(ParDo.named("StripNonce").of(
-              new DoFn<KV<KV<K, Integer>, AccumT>, KV<K, AccumT>>() {
+              new DoFn<KV<KV<K, Integer>, AccumT>,
+                       KV<K, InputOrAccum<InputT, AccumT>>>() {
                 @Override
                 public void processElement(ProcessContext c) {
-                  c.output(KV.of(c.element().getKey().getKey(), c.element().getValue()));
+                  c.output(KV.of(
+                      c.element().getKey().getKey(),
+                      InputOrAccum.<InputT, AccumT>accum(c.element().getValue())));
                 }
               }))
-          .apply(Window.<KV<K, AccumT>>remerge());
-
-      // The final combine should use the originally-requested accumulation mode.
-      WindowingStrategy<?, ?> postCombineStrategy =
-          intermediateHot.getWindowingStrategy().withMode(input.getWindowingStrategy().getMode());
-
-      PCollection<KV<K, OutputT>> combinedHot = intermediateHot
-          .setWindowingStrategyInternal(postCombineStrategy)
-          .apply("PostCombineHot", Combine.perKey(hotPostCombine));
-      if (hasColdKeys) {
-        PCollection<KV<K, OutputT>> combinedCold = split
-            .get(cold)
-            .setCoder(inputCoder)
-            .apply("CombineCold", Combine.perKey(fn));
-
-        // Return the union of the hot and cold key results.
-        return PCollectionList.of(combinedHot).and(combinedCold)
-            .apply(Flatten.<KV<K, OutputT>>pCollections());
-      } else {
-        return combinedHot;
+          .setCoder(KvCoder.of(inputCoder.getKeyCoder(), inputOrAccumCoder))
+          .apply(Window.<KV<K, InputOrAccum<InputT, AccumT>>>remerge())
+          .setWindowingStrategyInternal(input.getWindowingStrategy());
+      PCollection<KV<K, InputOrAccum<InputT, AccumT>>> preprocessedCold = split
+          .get(cold)
+          .setCoder(inputCoder)
+          .apply(ParDo.named("PrepareCold").of(
+              new DoFn<KV<K, InputT>, KV<K, InputOrAccum<InputT, AccumT>>>() {
+                @Override
+                public void processElement(ProcessContext c) {
+                  c.output(KV.of(c.element().getKey(),
+                                 InputOrAccum.<InputT, AccumT>input(c.element().getValue())));
+                }
+              }))
+          .setCoder(KvCoder.of(inputCoder.getKeyCoder(), inputOrAccumCoder));
+
+      // Combine the union of the pre-processed hot and cold key results.
+      return PCollectionList.of(precombinedHot).and(preprocessedCold)
+          .apply(Flatten.<KV<K, InputOrAccum<InputT, AccumT>>>pCollections())
+          .apply("PostCombine", Combine.perKey(postCombine));
+    }
+
+    /**
+     * Used to store either an input or accumulator value, for flattening
+     * the hot and cold key paths.
+     */
+    private static class InputOrAccum<InputT, AccumT> {
+      public final InputT input;
+      public final AccumT accum;
+
+      private InputOrAccum(InputT input, AccumT aggr) {
+        this.input = input;
+        this.accum = aggr;
+      }
+
+      public static <InputT, AccumT> InputOrAccum<InputT, AccumT> input(InputT input) {
+        return new InputOrAccum<InputT, AccumT>(input, null);
+      }
+
+      public static <InputT, AccumT> InputOrAccum<InputT, AccumT> accum(AccumT aggr) {
+        return new InputOrAccum<InputT, AccumT>(null, aggr);
+      }
+
+      private static class InputOrAccumCoder<InputT, AccumT>
+          extends StandardCoder<InputOrAccum<InputT, AccumT>> {
+
+        private static final long serialVersionUID = 0L;
+
+        private final Coder<InputT> inputCoder;
+        private final Coder<AccumT> accumCoder;
+
+        public InputOrAccumCoder(Coder<InputT> inputCoder, Coder<AccumT> accumCoder) {
+          this.inputCoder = inputCoder;
+          this.accumCoder = accumCoder;
+        }
+
+        @JsonCreator
+        @SuppressWarnings({"rawtypes", "unchecked"})
+        public static <InputT, AccumT> InputOrAccumCoder<InputT, AccumT> of(
+            @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+            List<Coder<?>> elementCoders) {
+          return new InputOrAccumCoder(elementCoders.get(0), elementCoders.get(1));
+        }
+
+        @Override
+        public void encode(
+            InputOrAccum<InputT, AccumT> value, OutputStream outStream, Coder.Context context)
+            throws CoderException, IOException {
+          if (value.input != null) {
+            outStream.write(0);
+            inputCoder.encode(value.input, outStream, context);
+          } else {
+            outStream.write(1);
+            accumCoder.encode(value.accum, outStream, context);
+          }
+        }
+
+        @Override
+        public InputOrAccum<InputT, AccumT> decode(InputStream inStream, Coder.Context context)
+            throws CoderException, IOException {
+          if (inStream.read() == 0) {
+            return InputOrAccum.<InputT, AccumT>input(inputCoder.decode(inStream, context));
+          } else {
+            return InputOrAccum.<InputT, AccumT>accum(accumCoder.decode(inStream, context));
+          }
+        }
+
+        @Override
+        public List<? extends Coder<?>> getCoderArguments() {
+          return ImmutableList.of(inputCoder, accumCoder);
+        }
+
+        @Override
+        public void verifyDeterministic() throws Coder.NonDeterministicException {
+          inputCoder.verifyDeterministic();
+          accumCoder.verifyDeterministic();
+        }
       }
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
index 0bffd0c1abe2a..e3e7f7d594129 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
@@ -20,10 +20,10 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+import com.google.cloud.dataflow.sdk.util.VarInt;
 import com.google.common.base.MoreObjects;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableMap;
-import com.google.protobuf.CodedOutputStream;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -275,12 +275,12 @@ public void encode(PaneInfo value, final OutputStream outStream, Coder.Context c
           break;
         case ONE_INDEX:
           outStream.write(value.encodedByte | encoding.tag);
-          writeVarLong(value.index, outStream);
+          VarInt.encode(value.index, outStream);
           break;
         case TWO_INDICES:
           outStream.write(value.encodedByte | encoding.tag);
-          writeVarLong(value.index, outStream);
-          writeVarLong(value.nonSpeculativeIndex, outStream);
+          VarInt.encode(value.index, outStream);
+          VarInt.encode(value.nonSpeculativeIndex, outStream);
           break;
         default:
           throw new CoderException("Unknown encoding " + encoding);
@@ -297,42 +297,17 @@ public PaneInfo decode(final InputStream inStream, Coder.Context context)
         case FIRST:
           return base;
         case ONE_INDEX:
-          index = readVarLong(inStream);
+          index = VarInt.decodeLong(inStream);
           onTimeIndex = base.timing == Timing.EARLY ? -1 : index;
           break;
         case TWO_INDICES:
-          index = readVarLong(inStream);
-          onTimeIndex = readVarLong(inStream);
+          index = VarInt.decodeLong(inStream);
+          onTimeIndex = VarInt.decodeLong(inStream);
           break;
         default:
           throw new CoderException("Unknown encoding " + (keyAndTag & 0xF0));
       }
       return new PaneInfo(base.isFirst, base.isLast, base.timing, index, onTimeIndex);
     }
-
-    private void writeVarLong(long value, OutputStream outStream) throws IOException {
-      CodedOutputStream out = CodedOutputStream.newInstance(outStream);
-      out.writeRawVarint64(value);
-      out.flush();
-    }
-
-    private long readVarLong(InputStream inStream) throws CoderException, IOException {
-      // This is CodedInputStream.readRawVarint64(), inlined to avoid readahead
-      // of the underlying input stream.
-      int shift = 0;
-      long result = 0;
-      while (shift < 64) {
-        int b = inStream.read();
-        if (b < 0) {
-          throw new CoderException("end of stream while decoding varint");
-        }
-        result |= (long) (b & 0x7F) << shift;
-        if ((b & 0x80) == 0) {
-          return result;
-        }
-        shift += 7;
-      }
-      throw new CoderException("malformed varint");
-    }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 9b1a0850b417c..d633979e77517 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -333,6 +333,14 @@ public Integer apply(String input) {
         }
       };
 
+  private static final SerializableFunction<String, Integer> splitHotKeyFanout =
+      new SerializableFunction<String, Integer>() {
+        @Override
+        public Integer apply(String input) {
+          return Math.random() < 0.5 ? 3 : 0;
+        }
+      };
+
   @Test
   @Category(RunnableOnService.class)
   public void testHotKeyCombining() {
@@ -347,11 +355,14 @@ public void testHotKeyCombining() {
         Combine.perKey(mean).withHotKeyFanout(hotKeyFanout));
     PCollection<KV<String, Double>> hotMean = input.apply("HotMean",
         Combine.perKey(mean).withHotKeyFanout(5));
+    PCollection<KV<String, Double>> splitMean = input.apply("SplitMean",
+        Combine.perKey(mean).withHotKeyFanout(splitHotKeyFanout));
 
     List<KV<String, Double>> expected = Arrays.asList(KV.of("a", 2.0), KV.of("b", 7.0));
     DataflowAssert.that(coldMean).containsInAnyOrder(expected);
     DataflowAssert.that(warmMean).containsInAnyOrder(expected);
     DataflowAssert.that(hotMean).containsInAnyOrder(expected);
+    DataflowAssert.that(splitMean).containsInAnyOrder(expected);
 
     p.run();
   }

From f06655e978153dca5439bdf955440faf333799ae Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 31 Jul 2015 22:20:12 -0700
Subject: [PATCH 0859/1541] Improve Object overrides for ValueWithRecordId

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99631857
---
 .../dataflow/sdk/util/ValueWithRecordId.java  | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ValueWithRecordId.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ValueWithRecordId.java
index 8a9ca30cf4635..fbc6f0337774c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ValueWithRecordId.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ValueWithRecordId.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.MoreObjects;
 import com.google.common.base.Preconditions;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
@@ -33,6 +34,7 @@
 import java.io.OutputStream;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Objects;
 
 /**
  * Immutable struct containing a value as well as a unique id identifying the value.
@@ -56,6 +58,32 @@ public byte[] getId() {
     return id;
   }
 
+  @Override
+  public String toString() {
+    return MoreObjects.toStringHelper(this)
+        .add("id", id)
+        .add("value", value)
+        .toString();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (this == other) {
+      return true;
+    }
+    if (!(other instanceof ValueWithRecordId)) {
+      return false;
+    }
+    ValueWithRecordId<?> otherRecord = (ValueWithRecordId<?>) other;
+    return Objects.deepEquals(id, otherRecord.id)
+        && Objects.deepEquals(value, otherRecord.value);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(Arrays.hashCode(id), value);
+  }
+
   /**
    * A {@link Coder} for {@code ValueWithRecordId}, using a wrapped value {@code Coder}.
    */

From 9e7c533063556397a6eee07263447ae153853436 Mon Sep 17 00:00:00 2001
From: wan <wan@google.com>
Date: Sat, 1 Aug 2015 16:19:26 -0700
Subject: [PATCH 0860/1541] Exposes dataset ID in ApplianceShuffleReader

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99655776
---
 .../sdk/runners/worker/ApplianceShuffleReader.java         | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java
index c6db59c9d618a..e708e05271284 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java
@@ -33,7 +33,7 @@ public final class ApplianceShuffleReader implements ShuffleReader {
   }
 
   /**
-   * Pointer to the underlying native shuffle reader object.
+   * Pointer to the underlying C++ ShuffleReader object.
    */
   private long nativePointer;
 
@@ -52,11 +52,14 @@ public void finalize() {
 
   /**
    * Native methods for interacting with the underlying native shuffle client
-   * code.
+   * code.  {@code createFromConfig()} returns a pointer to a newly created
+   * C++ ShuffleReader object.
    */
   private native long createFromConfig(byte[] shuffleReaderConfig);
   private native void destroy();
 
+  public native String getDatasetId();
+
   @Override
   public native ReadChunkResult readIncludingPosition(
       byte[] startPosition, byte[] endPosition) throws IOException;

From 27fa2378e65122cfbabec6f6d20124cc9607cce5 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 3 Aug 2015 09:34:43 -0700
Subject: [PATCH 0861/1541] Fix basic Object overrides for KV

Previously KV.equals() would be false if the KV contained
equivalent arrays. Now it will be true. KV.hashCode() is
fixed in the same way. KV.toString() now uses a standard
format.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99730521
---
 .../google/cloud/dataflow/sdk/values/KV.java  | 30 +++++++++++--------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
index 2eb2c0a4a6dca..9a1b524a7c2c1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
@@ -17,8 +17,11 @@
 package com.google.cloud.dataflow.sdk.values;
 
 import com.google.cloud.dataflow.sdk.transforms.SerializableComparator;
+import com.google.common.base.MoreObjects;
 
 import java.io.Serializable;
+import java.util.Arrays;
+import java.util.Objects;
 
 /**
  * An immutable key/value pair.
@@ -62,18 +65,17 @@ private KV(K key, V value) {
   }
 
   @Override
-  public boolean equals(Object o) {
-    if (this == o) {
+  public boolean equals(Object other) {
+    if (this == other) {
       return true;
     }
-    if (o instanceof KV) {
-      KV<?, ?> that = (KV<?, ?>) o;
-      return (this.key == null ? that.key == null
-              : this.key.equals(that.key))
-          && (this.value == null ? that.value == null
-              : this.value.equals(that.value));
+    if (!(other instanceof KV)) {
+      return false;
     }
-    return false;
+    KV<?, ?> otherKv = (KV<?, ?>) other;
+    // Arrays are very common as values and keys, so deepEquals is mandatory
+    return Objects.deepEquals(this.key, this.key)
+        && Objects.deepEquals(this.value, otherKv.value);
   }
 
   /** Orders the {@link KV} by the key. A null key is less than any non-null key. */
@@ -110,13 +112,15 @@ public int compare(KV<K, V> a, KV<K, V> b) {
 
   @Override
   public int hashCode() {
-    return getClass().hashCode()
-        + (key == null ? 0 : key.hashCode())
-        + (value == null ? 0 : value.hashCode());
+    // Objects.deepEquals requires Arrays.deepHashCode for correctness
+    return Arrays.deepHashCode(new Object[]{key, value});
   }
 
   @Override
   public String toString() {
-    return "KV(" + key + ", " + value + ")";
+    return MoreObjects.toStringHelper(this)
+        .addValue(key)
+        .addValue(value)
+        .toString();
   }
 }

From 8b5d59912ce648c4be8c115e59744b7cc7cd084c Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 3 Aug 2015 10:04:48 -0700
Subject: [PATCH 0862/1541] Add @SafeVarargs to DataflowAssert

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99733380
---
 .../google/cloud/dataflow/sdk/testing/DataflowAssert.java    | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 3e4c12b18bdc4..2cecd12e86771 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -284,7 +284,8 @@ public IterableAssert<T> containsInAnyOrder(Iterable<T> expectedElements) {
      *
      * <p> Returns this {@code IterableAssert}.
      */
-    public IterableAssert<T> containsInAnyOrder(T... expectedElements) {
+    @SafeVarargs
+    public final IterableAssert<T> containsInAnyOrder(T... expectedElements) {
       return satisfies(
         new AssertContainsInAnyOrderRelation<T>(),
         Arrays.asList(expectedElements));
@@ -634,6 +635,7 @@ private static class AssertContainsInAnyOrder<T>
 
     private T[] expected;
 
+    @SafeVarargs
     public AssertContainsInAnyOrder(T... expected) {
       this.expected = expected;
     }
@@ -663,6 +665,7 @@ public Void apply(Iterable<T> actual) {
   private static class AssertContainsInOrder<T> implements SerializableFunction<Iterable<T>, Void> {
     private T[] expected;
 
+    @SafeVarargs
     public AssertContainsInOrder(T... expected) {
       this.expected = expected;
     }

From bd74967d29bd8c341a13607bf102bbab06c17c04 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Mon, 3 Aug 2015 10:16:58 -0700
Subject: [PATCH 0863/1541] Add THROUGHPUT_BASED autoscaling algorithm
 specifier

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99734720
---
 .../sdk/options/DataflowPipelineWorkerPoolOptions.java     | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index 23bb50dd74440..c29f016e3167a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -54,8 +54,11 @@ public enum AutoscalingAlgorithmType {
     /** Use numWorkers machines. Do not autoscale the worker pool. */
     NONE("AUTOSCALING_ALGORITHM_NONE"),
 
-    /** Autoscale the workerpool size up to maxNumWorkers until the job completes. */
-    BASIC("AUTOSCALING_ALGORITHM_BASIC");
+    @Deprecated
+    BASIC("AUTOSCALING_ALGORITHM_BASIC"),
+
+    /** Autoscale the workerpool based on throughput (up to maxNumWorkers). */
+    THROUGHPUT_BASED("AUTOSCALING_ALGORITHM_BASIC");
 
     private final String algorithm;
 

From 0f13518a096d413f0231b1b85a671ef4e4d051a0 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 3 Aug 2015 10:59:49 -0700
Subject: [PATCH 0864/1541] Clean up IntraBundleParallelizationTest

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99739254
---
 .../IntraBundleParallelizationTest.java           | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java
index 0a4d535b699e5..55d64f88beeae 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java
@@ -23,7 +23,8 @@
 import static org.hamcrest.Matchers.lessThan;
 import static org.hamcrest.Matchers.lessThanOrEqualTo;
 
-import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 
 import org.junit.Assert;
 import org.junit.Before;
@@ -181,7 +182,7 @@ public void testIntraBundleParallelizationGetName() {
   }
 
   private long run(int numElements, int maxParallelism, DoFn<Integer, Integer> doFn) {
-    DirectPipeline p = DirectPipeline.createForTest();
+    Pipeline pipeline = TestPipeline.create();
 
     ArrayList<Integer> data = new ArrayList<>(numElements);
     for (int i = 0; i < numElements; ++i) {
@@ -189,14 +190,14 @@ private long run(int numElements, int maxParallelism, DoFn<Integer, Integer> doF
     }
 
     ConcurrencyMeasuringFn<Integer> downstream = new ConcurrencyMeasuringFn<>();
-    p
-    .apply(Create.of(data))
-    .apply(IntraBundleParallelization.of(doFn).withMaxParallelism(maxParallelism))
-    .apply(ParDo.of(downstream));
+    pipeline
+        .apply(Create.of(data))
+        .apply(IntraBundleParallelization.of(doFn).withMaxParallelism(maxParallelism))
+        .apply(ParDo.of(downstream));
 
     long startTime = System.currentTimeMillis();
 
-    p.run();
+    pipeline.run();
 
     // Downstream methods should not see parallel threads.
     Assert.assertEquals(1, maxConcurrency);

From 2e9f71dfdac8810568e5a7fec89d18260cb83c04 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Mon, 3 Aug 2015 11:11:42 -0700
Subject: [PATCH 0865/1541] Remove a test that overrides API endpoints

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99740939
---
 .../dataflow/sdk/util/MonitoringUtilTest.java     | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java
index b5fa772e7608c..c94450d6d2043 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MonitoringUtilTest.java
@@ -129,21 +129,6 @@ public void testDontOverrideEndpointWithDefaultApi() {
     assertEquals("gcloud alpha dataflow jobs --project=someProject cancel 1234", cancelCommand);
   }
 
-  @Test
-  public void testOverridesEndpointWithStagedApi() {
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    options.setProject(PROJECT_ID);
-    options.setGcpCredential(new TestCredential());
-    String stagingApiRoot = "https://staging.com/";
-    options.setApiRootUrl(stagingApiRoot);
-    String cancelCommand = MonitoringUtil.getGcloudCancelCommand(options, JOB_ID);
-    assertEquals(
-        "CLOUDSDK_API_ENDPOINT_OVERRIDES_DATAFLOW=https://staging.com/v1b3/projects/ "
-        + "gcloud alpha dataflow jobs --project=someProject cancel 1234",
-        cancelCommand);
-  }
-
   @Test
   public void testOverridesEndpointWithStagedDataflowEndpoint() {
     DataflowPipelineOptions options =

From ed02bf153cdeb74a0b09ca61967a441d11e06a5c Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 3 Aug 2015 11:26:57 -0700
Subject: [PATCH 0866/1541] Mark Coder encoding id methods experimental

----Release Notes----
Marked Coder encoding id methods experimental

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99742758
---
 .../sdk/annotations/Experimental.java         |  3 ++
 .../cloud/dataflow/sdk/coders/Coder.java      | 38 +++++++++++++------
 .../sdk/coders/DelegateCoderTest.java         |  1 -
 3 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java
index e73d9a665140c..d524715066fec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java
@@ -65,6 +65,9 @@ public enum Kind {
     /** Aggregator-related experimental APIs. */
     AGGREGATOR,
 
+    /** Experimental APIs for Coder binary format identifiers. */
+    CODER_ENCODING_ID,
+
     /** State-related experimental APIs. */
     STATE
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
index 7fe0c27955ce3..2cb9d43375ab9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.common.base.Joiner;
@@ -34,18 +36,20 @@
 /**
  * A {@code Coder<T>} defines how to encode and decode values of type {@code T} into byte streams.
  *
- * <p> All methods of a {@code Coder<T>} are required to be thread safe.
+ * <p>All methods of a {@link Coder} are required to be thread safe.
  *
- * <p> {@code Coder}s are serialized during job creation and deserialized
+ * <p>{@link Coder} instances are serialized during job creation and deserialized
  * before use, via JSON serialization.
  *
- * <p> See {@link SerializableCoder} for an example of a {@code Coder} that adds
- * a custom field to the {@code Coder} serialization. It provides a
- * constructor annotated with {@link
- * com.fasterxml.jackson.annotation.JsonCreator}, which is a factory method
- * used when deserializing a {@code Coder} instance.
+ * <p>See {@link SerializableCoder} for an example of a {@code Coder} that adds a custom field to
+ * the {@link Coder} serialization. It provides a constructor annotated with
+ * {@link com.fasterxml.jackson.annotation.JsonCreator}, which is a factory method used when
+ * deserializing a {@link Coder} instance.
  *
- * <p> See {@link KvCoder} for an example of a nested {@code Coder} type.
+ * <p>See {@link KvCoder} for an example of a nested {@code Coder} type.
+ *
+ * <p>The binary format of a {@link Coder} is identified by {@link #getEncodingId()}; be sure to
+ * understand the requirements for evolving coder formats.
  *
  * @param <T> the type of the values being transcoded
  */
@@ -193,20 +197,30 @@ public void registerByteSizeObserver(
    * An identifier for the binary format written by {@link #encode}.
    *
    * <p>This value, along with the fully qualified class name, forms an identifier for the
-   * binary format of this coder. If a {@code Coder} implementation is modified to write a new
-   * backwards-incompatible format, it is imperative that this method be correspondingly modified to
-   * return a distinct value.
+   * binary format of this coder. Whenever this value changes, the new encoding is considered
+   * incompatible with the prior format: It is presumed that the prior version of the coder will
+   * be unable to correctly read the new format and the new version of the coder will be unable to
+   * correctly read the old format.
    *
-   * @see #getAllowedEncodings()
+   * <p>If the format is changed in a backwards-compatible way (the Coder can still accept data from
+   * the prior format), such as by adding optional fields to a Protocol Buffer or Avro definition,
+   * and you want Dataflow to understand that the new coder is compatible with the prior coder,
+   * this value must remain unchanged. It is then the responsibility of {@link #decode} to correctly
+   * read data from the prior format.
    */
+  @Experimental(Kind.CODER_ENCODING_ID)
   public String getEncodingId();
 
   /**
    * A collection of encodings supported by {@link #decode} in addition to the encoding
    * from {@link #getEncodingId()} (which is assumed supported).
    *
+   * <p><i>This information is not currently used for any purpose</i>. It is descriptive only,
+   * and this method is subject to change.
+   *
    * @see #getEncodingId()
    */
+  @Experimental(Kind.CODER_ENCODING_ID)
   public Collection<String> getAllowedEncodings();
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java
index cc8750646737a..81b8ec857bb9b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java
@@ -17,7 +17,6 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
-
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 

From 5e5c833327b020aba59819948303ffe77773108c Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 3 Aug 2015 12:05:11 -0700
Subject: [PATCH 0867/1541] Add basic PCollectionTupleTest

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99747038
---
 .../sdk/values/PCollectionTupleTest.java      | 50 +++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionTupleTest.java

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionTupleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionTupleTest.java
new file mode 100644
index 0000000000000..217da49e60c8d
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionTupleTest.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
+import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Unit tests for {@link PCollectionTuple}. */
+@RunWith(JUnit4.class)
+public final class PCollectionTupleTest {
+  @Test
+  public void testOfThenHas() {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Object> pCollection = PCollection.createPrimitiveOutputInternal(
+        pipeline, WindowingStrategy.globalDefault(), IsBounded.BOUNDED);
+    TupleTag<Object> tag = new TupleTag<>();
+
+    assertTrue(PCollectionTuple.of(tag, pCollection).has(tag));
+  }
+
+  @Test
+  public void testEmpty() {
+    Pipeline pipeline = TestPipeline.create();
+    TupleTag<Object> tag = new TupleTag<>();
+    assertFalse(PCollectionTuple.empty(pipeline).has(tag));
+  }
+}

From 93b9e8e1e6418442323aa4571996be4158f7d0fa Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Mon, 3 Aug 2015 18:32:43 -0700
Subject: [PATCH 0868/1541] Validates BigQuery datasets in both batch and
 streaming

Moves the validation functions around and invokes them in
Write.Bound.apply() rather than in the translator, to make sure they
are invoked in both Batch and Streaming cases, and also in
all runners.

Previously, we would validate the existence of the BigQuery
dataset being written to via BigQueryIO.Write only in batch
pipelines, and only with Dataflow runners (but not the Direct
runner, which would not invoke the translator).

----Release Notes----

Existence of BigQuery datasets in BigQueryIO.Write is now validated
in both batch and streaming modes, and in all runners.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99783085
---
 .../cloud/dataflow/sdk/io/BigQueryIO.java     | 149 ++++++++++++------
 .../dataflow/BigQueryIOTranslator.java        |  52 +-----
 .../cloud/dataflow/sdk/io/BigQueryIOTest.java |  40 ++++-
 3 files changed, 137 insertions(+), 104 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 12c39adaf4bbb..acf8887be60f7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -58,7 +58,6 @@
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -223,6 +222,16 @@ public class BigQueryIO {
       + " table is in a different project please specify it as a part of the BigQuery table"
       + " definition.";
 
+  private static final String RESOURCE_NOT_FOUND_ERROR =
+      "BigQuery %1$s not found for table \"%2$s\" . Please create the %1$s before pipeline"
+          + " execution. If the %1$s is created by an earlier stage of the pipeline, this"
+          + " validation can be disabled using #withoutValidation.";
+
+  private static final String UNABLE_TO_CONFIRM_PRESENCE_OF_RESOURCE_ERROR =
+      "Unable to confirm BigQuery %1$s presence for table \"%2$s\". If the %1$s is created by"
+          + " an earlier stage of the pipeline, this validation can be disabled using"
+          + " #withoutValidation.";
+
   /**
    * Parse a table specification in the form
    * "[project_id]:[dataset_id].[table_id]" or "[dataset_id].[table_id]".
@@ -325,16 +334,6 @@ public static class Bound extends PTransform<PInput, PCollection<TableRow>> {
       final String query;
       final boolean validate;
 
-      private static final String RESOURCE_NOT_FOUND_ERROR =
-          "BigQuery %1$s not found for table \"%2$s\" . Please create the %1$s before pipeline"
-          + " execution. If the %1$s is created by an earlier stage of the pipeline, this"
-          + " validation can be disabled using #withoutValidation.";
-
-      private static final String UNABLE_TO_CONFIRM_PRESENCE_OF_RESOURCE_ERROR =
-          "Unable to confirm BigQuery %1$s presence for table \"%2$s\". If the %1$s is created by"
-          + " an earlier stage of the pipeline, this validation can be disabled using"
-          + " #withoutValidation.";
-
       private static final String QUERY_VALIDATION_FAILURE_ERROR =
           "Validation of query \"%1$s\" failed. If the query depends on an earlier stage of the"
           + " pipeline, This validation can be disabled using #withoutValidation.";
@@ -442,47 +441,6 @@ private static void dryRunQuery(BigQueryOptions options, String query) {
         }
       }
 
-      public static void verifyDatasetPresence(BigQueryOptions options, TableReference table) {
-        try {
-          Bigquery client = Transport.newBigQueryClient(options).build();
-          BigQueryTableRowIterator.executeWithBackOff(
-              client.datasets().get(table.getProjectId(), table.getDatasetId()),
-              RESOURCE_NOT_FOUND_ERROR, "dataset", BigQueryIO.toTableSpec(table));
-        } catch (Exception e) {
-          ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
-          if ((e instanceof IOException) && errorExtractor.itemNotFound((IOException) e)) {
-            throw new IllegalArgumentException(
-                String.format(RESOURCE_NOT_FOUND_ERROR, "dataset", BigQueryIO.toTableSpec(table)),
-                e);
-          } else {
-            throw new RuntimeException(
-                String.format(UNABLE_TO_CONFIRM_PRESENCE_OF_RESOURCE_ERROR, "dataset",
-                    BigQueryIO.toTableSpec(table)),
-                e);
-          }
-        }
-      }
-
-      public static void verifyTablePresence(BigQueryOptions options, TableReference table) {
-        try {
-          Bigquery client = Transport.newBigQueryClient(options).build();
-          BigQueryTableRowIterator.executeWithBackOff(
-              client.tables().get(table.getProjectId(), table.getDatasetId(), table.getTableId()),
-              RESOURCE_NOT_FOUND_ERROR, "table", BigQueryIO.toTableSpec(table));
-        } catch (Exception e) {
-          ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
-          if ((e instanceof IOException) && errorExtractor.itemNotFound((IOException) e)) {
-            throw new IllegalArgumentException(
-                String.format(RESOURCE_NOT_FOUND_ERROR, "table", BigQueryIO.toTableSpec(table)), e);
-          } else {
-            throw new RuntimeException(
-                String.format(UNABLE_TO_CONFIRM_PRESENCE_OF_RESOURCE_ERROR, "table",
-                    BigQueryIO.toTableSpec(table)),
-                e);
-          }
-        }
-      }
-
       @Override
       public PCollection<TableRow> apply(PInput input) {
         return PCollection.<TableRow>createPrimitiveOutputInternal(
@@ -824,6 +782,28 @@ public Bound withoutValidation() {
             writeDisposition, false);
       }
 
+      private static void verifyTableEmpty(
+          BigQueryOptions options,
+          TableReference table) {
+        try {
+          Bigquery client = Transport.newBigQueryClient(options).build();
+          BigQueryTableInserter inserter = new BigQueryTableInserter(client, table);
+          if (!inserter.isEmpty()) {
+            throw new IllegalArgumentException(
+                "BigQuery table is not empty: " + BigQueryIO.toTableSpec(table));
+          }
+        } catch (IOException e) {
+          ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
+          if (errorExtractor.itemNotFound(e)) {
+            // Nothing to do. If the table does not exist, it is considered empty.
+          } else {
+            throw new RuntimeException(
+                "Unable to confirm BigQuery table emptiness for table "
+                    + BigQueryIO.toTableSpec(table), e);
+          }
+        }
+      }
+
       @Override
       public PDone apply(PCollection<TableRow> input) {
         BigQueryOptions options = input.getPipeline().getOptions().as(BigQueryOptions.class);
@@ -843,6 +823,30 @@ public PDone apply(PCollection<TableRow> input) {
               + "however no schema was provided.");
         }
 
+        if (table != null && table.getProjectId() == null) {
+          // If user does not specify a project we assume the table to be located in the project
+          // that owns the Dataflow job.
+          String projectIdFromOptions = options.getProject();
+          LOG.warn(String.format(BigQueryIO.SET_PROJECT_FROM_OPTIONS_WARNING, table.getDatasetId(),
+              table.getTableId(), projectIdFromOptions));
+          table.setProjectId(projectIdFromOptions);
+        }
+
+        // Check for destination table presence and emptiness for early failure notification.
+        // Note that a presence check can fail if the table or dataset are created by earlier stages
+        // of the pipeline. For these cases the withoutValidation method can be used to disable
+        // the check.
+        // Unfortunately we can't validate anything early in case tableRefFunction is specified.
+        if (table != null && validate) {
+          verifyDatasetPresence(options, table);
+          if (getCreateDisposition() == BigQueryIO.Write.CreateDisposition.CREATE_NEVER) {
+            verifyTablePresence(options, table);
+          }
+          if (getWriteDisposition() == BigQueryIO.Write.WriteDisposition.WRITE_EMPTY) {
+            verifyTableEmpty(options, table);
+          }
+        }
+
         // In streaming, BigQuery write is taken care of by StreamWithDeDup transform.
         // We also currently do this if a tablespec function is specified.
         if (options.isStreaming() || tableRefFunction != null) {
@@ -905,6 +909,47 @@ public boolean getValidate() {
     }
   }
 
+  public static void verifyDatasetPresence(BigQueryOptions options, TableReference table) {
+    try {
+      Bigquery client = Transport.newBigQueryClient(options).build();
+      BigQueryTableRowIterator.executeWithBackOff(
+          client.datasets().get(table.getProjectId(), table.getDatasetId()),
+          RESOURCE_NOT_FOUND_ERROR, "dataset", BigQueryIO.toTableSpec(table));
+    } catch (Exception e) {
+      ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
+      if ((e instanceof IOException) && errorExtractor.itemNotFound((IOException) e)) {
+        throw new IllegalArgumentException(
+            String.format(RESOURCE_NOT_FOUND_ERROR, "dataset", BigQueryIO.toTableSpec(table)),
+            e);
+      } else {
+        throw new RuntimeException(
+            String.format(UNABLE_TO_CONFIRM_PRESENCE_OF_RESOURCE_ERROR, "dataset",
+                BigQueryIO.toTableSpec(table)),
+            e);
+      }
+    }
+  }
+
+  public static void verifyTablePresence(BigQueryOptions options, TableReference table) {
+    try {
+      Bigquery client = Transport.newBigQueryClient(options).build();
+      BigQueryTableRowIterator.executeWithBackOff(
+          client.tables().get(table.getProjectId(), table.getDatasetId(), table.getTableId()),
+          RESOURCE_NOT_FOUND_ERROR, "table", BigQueryIO.toTableSpec(table));
+    } catch (Exception e) {
+      ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
+      if ((e instanceof IOException) && errorExtractor.itemNotFound((IOException) e)) {
+        throw new IllegalArgumentException(
+            String.format(RESOURCE_NOT_FOUND_ERROR, "table", BigQueryIO.toTableSpec(table)), e);
+      } else {
+        throw new RuntimeException(
+            String.format(UNABLE_TO_CONFIRM_PRESENCE_OF_RESOURCE_ERROR, "table",
+                BigQueryIO.toTableSpec(table)),
+            e);
+      }
+    }
+  }
+
   /////////////////////////////////////////////////////////////////////////////
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
index 8e34f44db4d6d..2aa6c9f0d34ba 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
@@ -16,21 +16,14 @@
 
 package com.google.cloud.dataflow.sdk.runners.dataflow;
 
-import static com.google.cloud.dataflow.sdk.io.BigQueryIO.Read.Bound.verifyDatasetPresence;
-import static com.google.cloud.dataflow.sdk.io.BigQueryIO.Read.Bound.verifyTablePresence;
-
 import com.google.api.client.json.JsonFactory;
-import com.google.api.services.bigquery.Bigquery;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.cloud.dataflow.sdk.coders.TableRowJsonCoder;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
-import com.google.cloud.dataflow.sdk.util.BigQueryTableInserter;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.hadoop.util.ApiErrorExtractor;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -50,7 +43,7 @@ public class BigQueryIOTranslator {
   public static class ReadTranslator
       implements DataflowPipelineTranslator.TransformTranslator<BigQueryIO.Read.Bound> {
 
-@Override
+    @Override
     public void translate(
         BigQueryIO.Read.Bound transform, DataflowPipelineTranslator.TranslationContext context) {
       // Actual translation.
@@ -96,28 +89,6 @@ public void translate(BigQueryIO.Write.Bound transform,
       }
 
       TableReference table = transform.getTable();
-      if (table.getProjectId() == null) {
-        // If user does not specify a project we assume the table to be located in the project
-        // that owns the Dataflow job.
-        String projectIdFromOptions = context.getPipelineOptions().getProject();
-        LOG.warn(String.format(BigQueryIO.SET_PROJECT_FROM_OPTIONS_WARNING, table.getDatasetId(),
-            table.getTableId(), projectIdFromOptions));
-        table.setProjectId(projectIdFromOptions);
-      }
-
-      // Check for destination table presence and emptiness for early failure notification.
-      // Note that a presence check can fail if the table or dataset are created by earlier stages
-      // of the pipeline. For these cases the withoutValidation method can be used to disable
-      // the check.
-      if (transform.getValidate()) {
-        verifyDatasetPresence(context.getPipelineOptions(), table);
-        if (transform.getCreateDisposition() == BigQueryIO.Write.CreateDisposition.CREATE_NEVER) {
-          verifyTablePresence(context.getPipelineOptions(), table);
-        }
-        if (transform.getWriteDisposition() == BigQueryIO.Write.WriteDisposition.WRITE_EMPTY) {
-          verifyTableEmpty(context.getPipelineOptions(), table);
-        }
-      }
 
       // Actual translation.
       context.addStep(transform, "ParallelWrite");
@@ -149,25 +120,4 @@ public void translate(BigQueryIO.Write.Bound transform,
       context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
     }
   }
-
-  private static void verifyTableEmpty(
-      BigQueryOptions options,
-      TableReference table) {
-    try {
-      Bigquery client = Transport.newBigQueryClient(options).build();
-      BigQueryTableInserter inserter = new BigQueryTableInserter(client, table);
-      if (!inserter.isEmpty()) {
-        throw new IllegalArgumentException(
-            "BigQuery table is not empty: " + BigQueryIO.toTableSpec(table));
-      }
-    } catch (IOException e) {
-      ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
-      if (errorExtractor.itemNotFound(e)) {
-        // Nothing to do. If the table does not exist, it is considered empty.
-      } else {
-        throw new RuntimeException(
-            "unable to confirm BigQuery table emptiness", e);
-      }
-    }
-  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
index 6ed9298123cdf..a3dc18f4623ab 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
@@ -158,7 +158,7 @@ public void testBuildSourceWithTableReference() {
   }
 
   @Test
-  public void testValidateSetsDefaultProject() {
+  public void testValidateReadSetsDefaultProject() {
     BigQueryOptions options = PipelineOptionsFactory.as(BigQueryOptions.class);
     options.setProject("someproject");
 
@@ -318,6 +318,44 @@ public void testBuildSinkWithWriteDispositionEmpty() {
         null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY);
   }
 
+
+  private void testWriteValidatesDataset(boolean streaming) {
+    BigQueryOptions options = PipelineOptionsFactory.as(BigQueryOptions.class);
+    options.setProject("someproject");
+    options.setStreaming(streaming);
+
+    Pipeline p = Pipeline.create(options);
+
+    TableReference tableRef = new TableReference();
+    tableRef.setDatasetId("somedataset");
+    tableRef.setTableId("sometable");
+
+    thrown.expect(RuntimeException.class);
+    // Message will be one of following depending on the execution environment.
+    thrown.expectMessage(
+        Matchers.either(Matchers.containsString("Unable to confirm BigQuery dataset presence"))
+            .or(Matchers.containsString("BigQuery dataset not found for table")));
+    try {
+      p.apply(Create.<TableRow>of().withCoder(TableRowJsonCoder.of()))
+       .apply(BigQueryIO.Write.named("WriteMyTable")
+           .to(tableRef)
+           .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
+           .withSchema(new TableSchema()));
+    } finally {
+      Assert.assertEquals("someproject", tableRef.getProjectId());
+    }
+  }
+
+  @Test
+  public void testWriteValidatesDatasetBatch() {
+    testWriteValidatesDataset(false);
+  }
+
+  @Test
+  public void testWriteValidatesDatasetStreaming() {
+    testWriteValidatesDataset(true);
+  }
+
   @Test
   public void testTableParsing() {
     TableReference ref = BigQueryIO

From 4e5117ee47092118b4aa776ff7e65ce9ec1cba68 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 3 Aug 2015 19:37:38 -0700
Subject: [PATCH 0869/1541] Improve coder incompatibility error messages

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99786114
---
 .../dataflow/sdk/coders/CoderRegistry.java    | 89 +++++++++++++++----
 .../sdk/coders/CoderRegistryTest.java         | 68 ++++++++++----
 2 files changed, 119 insertions(+), 38 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 47f03673a98d7..c5e81927a4a9f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -380,10 +380,15 @@ private <T> Coder<?>[] getDefaultCoders(
     Map<Type, Coder<?>> context = new HashMap<>();
     for (int i = 0; i < knownCoders.length; i++) {
       if (knownCoders[i] != null) {
-        if (!isCompatible(knownCoders[i], typeArgs[i])) {
+        try {
+          verifyCompatible(knownCoders[i], typeArgs[i]);
+        } catch (IncompatibleCoderException exn) {
           throw new IllegalArgumentException(
-              "Cannot encode elements of type " + typeArgs[i]
-                  + " with " + knownCoders[i]);
+              String.format("Provided coders for type arguments of %s contain incompatibilities:"
+                  + " Cannot encode elements of type %s with coder %s",
+                  baseClass,
+                  typeArgs[i], knownCoders[i]),
+              exn);
         }
         context.putAll(getTypeToCoderBindings(typeArgs[i], knownCoders[i]));
       }
@@ -407,12 +412,42 @@ private <T> Coder<?>[] getDefaultCoders(
 
   /////////////////////////////////////////////////////////////////////////////
 
+  /**
+   * Thrown when a coder cannot possibly encode a type, yet has been proposed as a coder
+   * for that type.
+   */
+  static class IncompatibleCoderException extends RuntimeException {
+    private static final long serialVersionUID = 0L;
+    private Coder<?> coder;
+    private Type type;
+
+    public IncompatibleCoderException(String message, Coder<?> coder, Type type) {
+      super(message);
+      this.coder = coder;
+      this.type = type;
+    }
+
+    public IncompatibleCoderException(String message, Coder<?> coder, Type type, Throwable cause) {
+      super(message, cause);
+      this.coder = coder;
+      this.type = type;
+    }
+
+    public Coder<?> getCoder() {
+      return coder;
+    }
+
+    public Type getType() {
+      return type;
+    }
+  }
+
   /**
    * Returns {@code true} if the given coder can possibly encode elements
    * of the given type.
    */
-  static <T, CoderT extends Coder<T>, CandidateT> boolean
-      isCompatible(CoderT coder, Type candidateType) {
+  static <T, CoderT extends Coder<T>, CandidateT>
+      void verifyCompatible(CoderT coder, Type candidateType) throws IncompatibleCoderException {
 
     // Various representations of the coder's class
     @SuppressWarnings("unchecked")
@@ -436,13 +471,17 @@ private <T> Coder<?>[] getDefaultCoders(
     // If coder has type Coder<T> where the actual value of T is lost
     // to erasure, then we cannot rule it out.
     if (candidateType instanceof TypeVariable) {
-      return true;
+      return;
     }
 
     // If the raw types are not compatible, we can certainly rule out
     // coder compatibility
     if (!codedClass.isAssignableFrom(candidateClass)) {
-      return false;
+      throw new IncompatibleCoderException(
+          String.format("Cannot encode elements of type %s with coder %s because the"
+              + " coded type %s is not assignable from %s",
+              candidateType, coder, codedClass, candidateType),
+          coder, candidateType);
     }
     // we have established that this is a covariant upcast... though
     // coders are invariant, we are just checking one direction
@@ -453,22 +492,34 @@ private <T> Coder<?>[] getDefaultCoders(
     // type parameters are not compatible, then the whole thing is certainly not
     // compatible.
     if ((codedType instanceof ParameterizedType) && !isNullOrEmpty(coder.getCoderArguments())) {
-      Type[] typeArguments =
-          ((ParameterizedType)
-           candidateOkDescriptor.getSupertype(codedClass).getType())
-          .getActualTypeArguments();
+      ParameterizedType parameterizedSupertype = ((ParameterizedType)
+           candidateOkDescriptor.getSupertype(codedClass).getType());
+      Type[] typeArguments = parameterizedSupertype.getActualTypeArguments();
       List<? extends Coder<?>> typeArgumentCoders = coder.getCoderArguments();
-      assert typeArguments.length == typeArgumentCoders.size();
-      for (int i = 0; i < typeArguments.length; i++) {
-        if (!isCompatible(
-                typeArgumentCoders.get(i),
-                candidateDescriptor.resolveType(typeArguments[i]).getType())) {
-          return false;
+      if (typeArguments.length < typeArgumentCoders.size()) {
+        throw new IncompatibleCoderException(
+            String.format("Cannot encode elements of type %s with coder %s:"
+                + " the generic supertype %s has %s type parameters, which is less than the"
+                + " number of coder arguments %s has (%s).",
+                candidateOkDescriptor, coder,
+                parameterizedSupertype, typeArguments.length,
+                coder, typeArgumentCoders.size()),
+            coder, candidateOkDescriptor.getType());
+      }
+      for (int i = 0; i < typeArgumentCoders.size(); i++) {
+        try {
+          verifyCompatible(
+              typeArgumentCoders.get(i),
+              candidateDescriptor.resolveType(typeArguments[i]).getType());
+        } catch (IncompatibleCoderException exn) {
+          throw new IncompatibleCoderException(
+              String.format("Cannot encode elements of type %s with coder %s"
+                  + " because some component coder is incompatible",
+                  candidateType, coder),
+              coder, candidateType, exn);
         }
       }
     }
-
-    return true; // For all we can tell.
   }
 
   private static boolean isNullOrEmpty(Collection<?> c) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index e0a9f6643b22d..ece19e58bcebf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -17,13 +17,13 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
 
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry.IncompatibleCoderException;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.collect.ImmutableList;
 
 import org.junit.Rule;
 import org.junit.Test;
@@ -49,7 +49,7 @@
 public class CoderRegistryTest {
 
   @Rule
-  public ExpectedException expectedException = ExpectedException.none();
+  public ExpectedException thrown = ExpectedException.none();
 
   public static CoderRegistry getStandardRegistry() {
     CoderRegistry registry = new CoderRegistry();
@@ -108,7 +108,7 @@ public List<Coder<?>> getCoderArguments() {
 
   @Test
   public void testRegisterInstantiatedCoderInvalidRawtype() throws Exception {
-    expectedException.expect(IllegalArgumentException.class);
+    thrown.expect(IllegalArgumentException.class);
     CoderRegistry registry = new CoderRegistry();
     registry.registerCoder(List.class, new MyListCoder());
   }
@@ -122,7 +122,7 @@ public void testSimpleDefaultCoder() throws Exception {
   @Test
   public void testSimpleUnknownDefaultCoder() throws Exception {
     CoderRegistry registry = getStandardRegistry();
-    expectedException.expect(CannotProvideCoderException.class);
+    thrown.expect(CannotProvideCoderException.class);
     registry.getDefaultCoder(UnknownType.class);
   }
 
@@ -148,15 +148,15 @@ public void testParameterizedDefaultCoderUnknown() throws Exception {
     TypeDescriptor<List<UnknownType>> listUnknownToken =
         new TypeDescriptor<List<UnknownType>>() {};
 
-    expectedException.expect(CannotProvideCoderException.class);
+    thrown.expect(CannotProvideCoderException.class);
     registry.getDefaultCoder(listUnknownToken);
   }
 
   @Test
   public void testParameterizedDefaultCoderWrongMethod() throws Exception {
     CoderRegistry registry = getStandardRegistry();
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("new TypeDescriptor<List<...>>(){}");
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("new TypeDescriptor<List<...>>(){}");
     registry.getDefaultCoder(List.class);
   }
 
@@ -238,19 +238,49 @@ public void testGetDefaultCoderFromNestedKvValue() throws Exception {
 
   @Test
   public void testTypeCompatibility() throws Exception {
-    assertTrue(CoderRegistry.isCompatible(
-        BigEndianIntegerCoder.of(), Integer.class));
-    assertFalse(CoderRegistry.isCompatible(
-        BigEndianIntegerCoder.of(), String.class));
-
-    assertFalse(CoderRegistry.isCompatible(
-        ListCoder.of(BigEndianIntegerCoder.of()), Integer.class));
-    assertTrue(CoderRegistry.isCompatible(
+    CoderRegistry.verifyCompatible(BigEndianIntegerCoder.of(), Integer.class);
+    CoderRegistry.verifyCompatible(
         ListCoder.of(BigEndianIntegerCoder.of()),
-        new TypeDescriptor<List<Integer>>() {}.getType()));
-    assertFalse(CoderRegistry.isCompatible(
+        new TypeDescriptor<List<Integer>>() {}.getType());
+  }
+
+  @Test
+  public void testIntVersusStringIncompatibility() throws Exception {
+    thrown.expect(IncompatibleCoderException.class);
+    thrown.expectMessage("not assignable");
+    CoderRegistry.verifyCompatible(BigEndianIntegerCoder.of(), String.class);
+  }
+
+  private static class TooManyComponentCoders<T> extends ListCoder<T> {
+    public TooManyComponentCoders(Coder<T> actualComponentCoder) {
+      super(actualComponentCoder);
+    }
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return ImmutableList.<Coder<?>>builder()
+          .addAll(super.getCoderArguments())
+          .add(BigEndianLongCoder.of())
+          .build();
+    }
+  }
+
+  @Test
+  public void testTooManyCoderArguments() throws Exception {
+    thrown.expect(IncompatibleCoderException.class);
+    thrown.expectMessage("type parameters");
+    thrown.expectMessage("less than the number of coder arguments");
+    CoderRegistry.verifyCompatible(
+        new TooManyComponentCoders<>(BigEndianIntegerCoder.of()), List.class);
+  }
+
+  @Test
+  public void testComponentIncompatibility() throws Exception {
+    thrown.expect(IncompatibleCoderException.class);
+    thrown.expectMessage("component coder is incompatible");
+    CoderRegistry.verifyCompatible(
         ListCoder.of(BigEndianIntegerCoder.of()),
-        new TypeDescriptor<List<String>>() {}.getType()));
+        new TypeDescriptor<List<String>>() {}.getType());
   }
 
   static class MyGenericClass<FooT, BazT> { }

From 7e45e085794b96c0d33fbd2d7e542cff57757b2f Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 4 Aug 2015 09:43:36 -0700
Subject: [PATCH 0870/1541] De-lint some tests

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99830066
---
 .../cloud/dataflow/sdk/util/gcsfs/GcsPathTest.java    | 11 +++++------
 .../cloud/dataflow/sdk/values/TypeDescriptorTest.java |  2 +-
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPathTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPathTest.java
index b58a26c7f4d03..3aefa6d0f0088 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPathTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPathTest.java
@@ -28,7 +28,6 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-import java.io.IOException;
 import java.net.URI;
 import java.nio.file.Path;
 import java.nio.file.Paths;
@@ -75,7 +74,7 @@ static final class TestCase {
   );
 
   @Test
-  public void testGcsPathParsing() throws IOException {
+  public void testGcsPathParsing() throws Exception {
     for (TestCase testCase : PATH_TEST_CASES) {
       String uriString = testCase.uri;
 
@@ -96,7 +95,7 @@ public void testGcsPathParsing() throws IOException {
   }
 
   @Test
-  public void testParentRelationship() throws IOException {
+  public void testParentRelationship() throws Exception {
     GcsPath path = GcsPath.fromComponents("bucket", "then/object");
     assertEquals("bucket", path.getBucket());
     assertEquals("then/object", path.getObject());
@@ -135,7 +134,7 @@ public void testParentRelationship() throws IOException {
   }
 
   @Test
-  public void testRelativeParent() throws IOException {
+  public void testRelativeParent() throws Exception {
     GcsPath path = GcsPath.fromComponents(null, "a/b");
     GcsPath parent = path.getParent();
     assertEquals("a/", parent.toString());
@@ -145,7 +144,7 @@ public void testRelativeParent() throws IOException {
   }
 
   @Test
-  public void testUriSupport() throws IOException {
+  public void testUriSupport() throws Exception {
     URI uri = URI.create("gs://bucket/some/path");
 
     GcsPath path = GcsPath.fromUri(uri);
@@ -160,7 +159,7 @@ public void testUriSupport() throws IOException {
   }
 
   @Test
-  public void testBucketParsing() throws IOException {
+  public void testBucketParsing() throws Exception {
     GcsPath path = GcsPath.fromUri("gs://bucket");
     GcsPath path2 = GcsPath.fromUri("gs://bucket/");
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypeDescriptorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypeDescriptorTest.java
index df80c8582364f..49fbe5937a372 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypeDescriptorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypeDescriptorTest.java
@@ -81,12 +81,12 @@ public void testTypeDescriptorNested() throws Exception {
   }
 
   private static class Id<T> {
+    @SuppressWarnings("unused") // used via reflection
     public T identity(T thingie) {
       return thingie;
     }
   }
 
-
   @Test
   public void testGetArgumentTypes() throws Exception {
     Method identity = Id.class.getDeclaredMethod("identity", Object.class);

From d480d4909431f929a667b040eb4fe78fac93521c Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 4 Aug 2015 09:43:46 -0700
Subject: [PATCH 0871/1541] De-lint a few tests

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99830079
---
 .../sdk/runners/worker/NormalParDoFnTest.java |  14 +--
 .../sdk/transforms/join/CoGbkResultTest.java  |   8 ++
 .../dataflow/sdk/util/ApiSurfaceTest.java     |  11 +-
 .../dataflow/sdk/util/BigQueryUtilTest.java   | 108 +++++++++---------
 .../cloud/dataflow/sdk/util/VarIntTest.java   |  12 +-
 5 files changed, 77 insertions(+), 76 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
index 08417386870c6..6a61ea9adb8a8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
@@ -62,12 +62,12 @@ static class TestDoFn extends DoFn<Integer, String> {
     enum State { UNSTARTED, STARTED, PROCESSING, FINISHED }
     State state = State.UNSTARTED;
 
-    List<TupleTag> sideOutputTupleTags;
+    List<TupleTag<String>> sideOutputTupleTags;
 
     public TestDoFn(List<String> sideOutputTags) {
       sideOutputTupleTags = new ArrayList<>();
       for (String sideOutputTag : sideOutputTags) {
-        sideOutputTupleTags.add(new TupleTag(sideOutputTag));
+        sideOutputTupleTags.add(new TupleTag<String>(sideOutputTag));
       }
     }
 
@@ -96,7 +96,7 @@ public void finishBundle(Context c) {
 
     private void outputToAll(Context c, String value) {
       c.output(value);
-      for (TupleTag sideOutputTupleTag : sideOutputTupleTags) {
+      for (TupleTag<String> sideOutputTupleTag : sideOutputTupleTags) {
         c.sideOutput(sideOutputTupleTag,
                      sideOutputTupleTag.getId() + ": " + value);
       }
@@ -144,7 +144,7 @@ public void testNormalParDoFn() throws Exception {
     List<String> sideOutputTags = Arrays.asList("tag1", "tag2", "tag3");
 
     TestDoFn fn = new TestDoFn(sideOutputTags);
-    DoFnInfo fnInfo = new DoFnInfo(fn, WindowingStrategy.globalDefault());
+    DoFnInfo<?, ?> fnInfo = new DoFnInfo<>(fn, WindowingStrategy.globalDefault());
     TestReceiver receiver = new TestReceiver();
     TestReceiver receiver1 = new TestReceiver();
     TestReceiver receiver2 = new TestReceiver();
@@ -213,7 +213,7 @@ public void testNormalParDoFn() throws Exception {
   @Test
   public void testUnexpectedNumberOfReceivers() throws Exception {
     TestDoFn fn = new TestDoFn(Collections.<String>emptyList());
-    DoFnInfo fnInfo = new DoFnInfo(fn, WindowingStrategy.globalDefault());
+    DoFnInfo<?, ?> fnInfo = new DoFnInfo<>(fn, WindowingStrategy.globalDefault());
     TestReceiver receiver = new TestReceiver();
 
     PTuple sideInputValues = PTuple.empty();
@@ -256,7 +256,7 @@ private List<String> stackTraceFrameStrings(Throwable t) {
   @Test
   public void testErrorPropagation() throws Exception {
     TestErrorDoFn fn = new TestErrorDoFn();
-    DoFnInfo fnInfo = new DoFnInfo(fn, WindowingStrategy.globalDefault());
+    DoFnInfo<?, ?> fnInfo = new DoFnInfo<>(fn, WindowingStrategy.globalDefault());
     TestReceiver receiver = new TestReceiver();
 
     PTuple sideInputValues = PTuple.empty();
@@ -328,7 +328,7 @@ public void testErrorPropagation() throws Exception {
   @Test
   public void testUndeclaredSideOutputs() throws Exception {
     TestDoFn fn = new TestDoFn(Arrays.asList("declared", "undecl1", "undecl2", "undecl3"));
-    DoFnInfo fnInfo = new DoFnInfo(fn, WindowingStrategy.globalDefault());
+    DoFnInfo<?, ?> fnInfo = new DoFnInfo<>(fn, WindowingStrategy.globalDefault());
     CounterSet counters = new CounterSet();
     NormalParDoFn normalParDoFn = NormalParDoFn.of(
         PipelineOptionsFactory.create(),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultTest.java
index f7d5b1f439d94..da14d8a412e5e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultTest.java
@@ -97,16 +97,24 @@ public Reiterator<RawUnionValue> iterator() {
     public Reiterator<RawUnionValue> iterator(final int start) {
       return new Reiterator<RawUnionValue>() {
         int pos = start;
+
+        @Override
         public boolean hasNext() {
           return pos < tags.length;
         }
+
+        @Override
         public RawUnionValue next() {
           maxPos = Math.max(pos + 1, maxPos);
           return new RawUnionValue(tags[pos], pos++);
         }
+
+        @Override
         public void remove() {
           throw new UnsupportedOperationException();
         }
+
+        @Override
         public Reiterator<RawUnionValue> copy() {
           return iterator(pos);
         }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java
index 1dc86398855e6..f505d41d91602 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java
@@ -70,7 +70,7 @@ public void testOurApiSurface() throws Exception {
     }
   }
 
-  private boolean classIsAllowed(Class clazz) {
+  private boolean classIsAllowed(Class<?> clazz) {
     return  clazz.getName().startsWith("com.google.cloud.dataflow");
   }
 
@@ -100,15 +100,6 @@ public void testExposedReturnType() throws Exception {
     assertExposed(ExposedReturnType.class, Exposed.class);
   }
 
-  private static interface ExposedReturnTypeVarBound {
-    <T extends Exposed> T getList();
-  }
-
-  @Test
-  public void testExposedReturnTypeVarBound() throws Exception {
-    assertExposed(ExposedReturnTypeVarBound.class, Exposed.class);
-  }
-
   private static interface ExposedParameterTypeVarBound {
     <T extends Exposed> void getList(T whatever);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
index b145852d2e71b..c68c3a1801060 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
@@ -189,24 +189,25 @@ public void testReadWithTime() throws IOException {
     TableDataList dataList = rawDataList(rawRow("Arthur", "1.430397296789E9", 42));
     onTableList(dataList);
 
-    BigQueryTableRowIterator iterator = new BigQueryTableRowIterator(
+    try (BigQueryTableRowIterator iterator = new BigQueryTableRowIterator(
         mockClient,
-        BigQueryIO.parseTableSpec("project:dataset.table"));
+        BigQueryIO.parseTableSpec("project:dataset.table"))) {
 
-    Assert.assertTrue(iterator.hasNext());
-    TableRow row = iterator.next();
+      Assert.assertTrue(iterator.hasNext());
+      TableRow row = iterator.next();
 
-    Assert.assertTrue(row.containsKey("name"));
-    Assert.assertTrue(row.containsKey("time"));
-    Assert.assertTrue(row.containsKey("answer"));
-    Assert.assertEquals("Arthur", row.get("name"));
-    Assert.assertEquals("2015-04-30 12:34:56.789 UTC", row.get("time"));
-    Assert.assertEquals(42, row.get("answer"));
+      Assert.assertTrue(row.containsKey("name"));
+      Assert.assertTrue(row.containsKey("time"));
+      Assert.assertTrue(row.containsKey("answer"));
+      Assert.assertEquals("Arthur", row.get("name"));
+      Assert.assertEquals("2015-04-30 12:34:56.789 UTC", row.get("time"));
+      Assert.assertEquals(42, row.get("answer"));
 
-    Assert.assertFalse(iterator.hasNext());
+      Assert.assertFalse(iterator.hasNext());
 
-    verifyTableGet();
-    verifyTabledataList();
+      verifyTableGet();
+      verifyTabledataList();
+    }
   }
 
   private TableRow rawRow(Object...args) {
@@ -229,22 +230,23 @@ public void testRead() throws IOException {
     TableDataList dataList = rawDataList(rawRow("Arthur", 42));
     onTableList(dataList);
 
-    BigQueryTableRowIterator iterator = new BigQueryTableRowIterator(
+    try (BigQueryTableRowIterator iterator = new BigQueryTableRowIterator(
         mockClient,
-        BigQueryIO.parseTableSpec("project:dataset.table"));
+        BigQueryIO.parseTableSpec("project:dataset.table"))) {
 
-    Assert.assertTrue(iterator.hasNext());
-    TableRow row = iterator.next();
+      Assert.assertTrue(iterator.hasNext());
+      TableRow row = iterator.next();
 
-    Assert.assertTrue(row.containsKey("name"));
-    Assert.assertTrue(row.containsKey("answer"));
-    Assert.assertEquals("Arthur", row.get("name"));
-    Assert.assertEquals(42, row.get("answer"));
+      Assert.assertTrue(row.containsKey("name"));
+      Assert.assertTrue(row.containsKey("answer"));
+      Assert.assertEquals("Arthur", row.get("name"));
+      Assert.assertEquals(42, row.get("answer"));
 
-    Assert.assertFalse(iterator.hasNext());
+      Assert.assertFalse(iterator.hasNext());
 
-    verifyTableGet();
-    verifyTabledataList();
+      verifyTableGet();
+      verifyTabledataList();
+    }
   }
 
   @Test
@@ -258,14 +260,15 @@ public void testReadEmpty() throws IOException {
         .setTotalRows(0L);
     onTableList(dataList);
 
-    BigQueryTableRowIterator iterator = new BigQueryTableRowIterator(
+    try (BigQueryTableRowIterator iterator = new BigQueryTableRowIterator(
         mockClient,
-        BigQueryIO.parseTableSpec("project:dataset.table"));
+        BigQueryIO.parseTableSpec("project:dataset.table"))) {
 
-    Assert.assertFalse(iterator.hasNext());
+      Assert.assertFalse(iterator.hasNext());
 
-    verifyTableGet();
-    verifyTabledataList();
+      verifyTableGet();
+      verifyTabledataList();
+    }
   }
 
   @Test
@@ -285,24 +288,26 @@ public void testReadMultiPage() throws IOException {
         .thenReturn(page1)
         .thenReturn(page2);
 
-    BigQueryTableRowIterator iterator = new BigQueryTableRowIterator(
+    try (BigQueryTableRowIterator iterator = new BigQueryTableRowIterator(
         mockClient,
-        BigQueryIO.parseTableSpec("project:dataset.table"));
-    List<String> names = new LinkedList<>();
-    Iterators.addAll(names,
-        Iterators.transform(iterator, new Function<TableRow, String>(){
-          @Override
-          public String apply(TableRow input) {
-            return (String) input.get("name");
-          }
-        }));
-
-    Assert.assertThat(names, Matchers.hasItems("Row1", "Row2"));
+        BigQueryIO.parseTableSpec("project:dataset.table"))) {
 
-    verifyTableGet();
-    verifyTabledataList();
-    // The second call should have used a page token.
-    verify(mockTabledataList).setPageToken("page2");
+      List<String> names = new LinkedList<>();
+      Iterators.addAll(names,
+          Iterators.transform(iterator, new Function<TableRow, String>(){
+            @Override
+            public String apply(TableRow input) {
+              return (String) input.get("name");
+            }
+          }));
+
+      Assert.assertThat(names, Matchers.hasItems("Row1", "Row2"));
+
+      verifyTableGet();
+      verifyTabledataList();
+      // The second call should have used a page token.
+      verify(mockTabledataList).setPageToken("page2");
+    }
   }
 
   @Test
@@ -316,13 +321,14 @@ public void testReadOpenFailure() throws IOException {
     when(mockTablesGet.execute())
         .thenThrow(new IOException("No such table"));
 
-    BigQueryTableRowIterator iterator = new BigQueryTableRowIterator(
+    try (BigQueryTableRowIterator iterator = new BigQueryTableRowIterator(
         mockClient,
-        BigQueryIO.parseTableSpec("project:dataset.table"));
-    try {
-      Assert.assertFalse(iterator.hasNext());  // throws.
-    } finally {
-      verifyTableGet();
+        BigQueryIO.parseTableSpec("project:dataset.table"))) {
+      try {
+        Assert.assertFalse(iterator.hasNext());  // throws.
+      } finally {
+        verifyTableGet();
+      }
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/VarIntTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/VarIntTest.java
index 3288e7826cbd8..ca233ed124409 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/VarIntTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/VarIntTest.java
@@ -205,8 +205,7 @@ public void decodeThrowsExceptionForOverflow() throws IOException {
         (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, 0x02 };
 
     thrown.expect(IOException.class);
-
-    long parsed = decodeLong(tooLargeNumber);
+    decodeLong(tooLargeNumber);
   }
 
   @Test
@@ -214,8 +213,7 @@ public void decodeThrowsExceptionForIntOverflow() throws IOException {
     byte[] encoded = encodeLong(1L << 32);
 
     thrown.expect(IOException.class);
-
-    int parsed = decodeInt(encoded);
+    decodeInt(encoded);
   }
 
   @Test
@@ -223,8 +221,7 @@ public void decodeThrowsExceptionForIntUnderflow() throws IOException {
     byte[] encoded = encodeLong(-1);
 
     thrown.expect(IOException.class);
-
-    int parsed = decodeInt(encoded);
+    decodeInt(encoded);
   }
 
   @Test
@@ -233,8 +230,7 @@ public void decodeThrowsExceptionForNonterminated() throws IOException {
       { (byte) 0xff, (byte) 0xff };
 
     thrown.expect(IOException.class);
-
-    long parsed = decodeLong(nonTerminatedNumber);
+    decodeLong(nonTerminatedNumber);
   }
 
   @Test

From f4b792eb4a800557eed8a9889eac9778513680b2 Mon Sep 17 00:00:00 2001
From: austern <austern@google.com>
Date: Tue, 4 Aug 2015 13:22:18 -0700
Subject: [PATCH 0872/1541] Changed Project Name to Project ID in comments.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99852786
---
 .../google/cloud/dataflow/examples/MinimalWordCount.java    | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
index 6d7e9273a28f9..d31ca8113b1c7 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
@@ -47,7 +47,7 @@
  *   4. Writing data to Cloud Storage as text files
  * </pre>
  *
- * <p> To execute this pipeline, first edit the code to set your project name, the staging
+ * <p> To execute this pipeline, first edit the code to set your project ID, the staging
  * location, and the output location. The specified GCS bucket(s) must already exist.
  *
  * <p> Then, run the pipeline as described in the README. It will be deployed and run using the
@@ -63,8 +63,8 @@ public static void main(String[] args) {
     DataflowPipelineOptions options = PipelineOptionsFactory.create()
       .as(DataflowPipelineOptions.class);
     options.setRunner(BlockingDataflowPipelineRunner.class);
-    // CHANGE 1/3: Your project name is required in order to run your pipeline on the Google Cloud.
-    options.setProject("SET_YOUR_PROJECT_NAME_HERE");
+    // CHANGE 1/3: Your project ID is required in order to run your pipeline on the Google Cloud.
+    options.setProject("SET_YOUR_PROJECT_ID_HERE");
     // CHANGE 2/3: Your Google Cloud Storage path is required for staging local files.
     options.setStagingLocation("gs://SET_YOUR_BUCKET_NAME_HERE/AND_STAGING_DIRECTORY");
 

From 703d9307376a34f3a8c2b125782721088b19fd1b Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 4 Aug 2015 13:45:44 -0700
Subject: [PATCH 0873/1541] Use a regex to detect potential Project ID problems

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99855104
---
 .../sdk/runners/DataflowPipelineRunner.java   |  19 +++
 .../cloud/dataflow/sdk/transforms/DoFn.java   |  13 +-
 .../options/DataflowPipelineOptionsTest.java  |   1 -
 .../BlockingDataflowPipelineRunnerTest.java   |   3 +-
 .../runners/DataflowPipelineRunnerTest.java   | 116 ++++++++++++++----
 .../sdk/runners/DataflowPipelineTest.java     |   2 +-
 6 files changed, 120 insertions(+), 34 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 81d353d01d8d6..99723799c0af1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -140,6 +140,14 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
   // Environment version information
   private static final String ENVIRONMENT_MAJOR_VERSION = "3";
 
+  /**
+   * Project IDs must contain lowercase letters, digits, or dashes.
+   * IDs must start with a letter and may not end with a dash.
+   * This regex isn't exact - this allows for patterns that would be rejected by
+   * the service, but this is sufficient for basic validation of project IDs.
+   */
+  public static final String PROJECT_ID_REGEXP = "[a-z][-a-z0-9:.]+[a-z0-9]";
+
   /**
    * Construct a runner from the provided options.
    *
@@ -147,6 +155,7 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
    * @return The newly created runner.
    */
   public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
+
     // (Re-)register standard IO factories. Clobbers any prior credentials.
     IOChannelUtils.registerStandardIOFactories(options);
 
@@ -199,6 +208,16 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
             + "[-a-z0-9], starting with a letter and ending with a letter "
             + "or number");
 
+    // Verify project
+    String project = dataflowOptions.getProject();
+    if (project.matches("[0-9]*")) {
+      throw new IllegalArgumentException("Project ID '" + project
+          + "' invalid. Please make sure you specified the Project ID, not project number.");
+    } else if (!project.matches(PROJECT_ID_REGEXP)) {
+      throw new IllegalArgumentException("Project ID '" + project
+          + "' invalid. Please make sure you specified the Project ID, not project description.");
+    }
+
     return new DataflowPipelineRunner(dataflowOptions);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 90489c98aec0b..c048587fe8a90 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -287,16 +287,13 @@ public abstract class ProcessContext extends Context {
   }
 
   /**
-   * Returns the allowed timestamp skew duration, which is the maximum duration that output
-   * timestamps can be shifted backward (relative the input timestamp) in
+   * Returns the allowed timestamp skew duration, which is the maximum
+   * duration that timestamps can be shifted backward in
    * {@link DoFn.Context#outputWithTimestamp}.
    *
-   * <p>Outputting earlier than the current input timestamp can cause the output element to appear
-   * in a different (earlier) window. In addition, if the skew exceeds the allowed lateness it may
-   * result in the output being dropped because it is too late.
-   *
-   * <p> The default value is {@code Duration.ZERO}, in which case timestamps can only be shifted
-   * forward to future. For infinite skew, return {@code Duration.millis(Long.MAX_VALUE)}.
+   * <p> The default value is {@code Duration.ZERO}, in which case
+   * timestamps can only be shifted forward to future.  For infinite
+   * skew, return {@code Duration.millis(Long.MAX_VALUE)}.
    */
   public Duration getAllowedTimestampSkew() {
     return Duration.ZERO;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptionsTest.java
index 03edfacd0ab89..0207ba4b07c24 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptionsTest.java
@@ -80,7 +80,6 @@ public void testUserNameIsLong() {
     assertEquals("a234567890-abcdeabcdeabcdeabcdeabcdeabcde-1208190706", options.getJobName());
   }
 
-
   @Test
   public void testUtf8UserNameAndApplicationNameIsNormalized() {
     resetDateTimeProviderRule.setDateTimeFixed("2014-12-08T19:07:06.698Z");
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
index 69ebb562a0787..3572bcbfdc3d2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult.State;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
 import com.google.cloud.dataflow.sdk.testing.TestDataflowPipelineOptions;
@@ -290,7 +291,7 @@ public void testNullJobThrowsException() throws Exception {
   public void testToString() {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setJobName("TestJobName");
-    options.setProject("TestProject");
+    options.setProject("test-project");
     options.setTempLocation("gs://test/temp/location");
     options.setGcpCredential(new TestCredential());
     options.setPathValidatorClass(NoopPathValidator.class);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 5f1357a88db54..8a3bb963514ab 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -61,7 +61,9 @@
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.common.collect.ImmutableList;
 
+import org.hamcrest.Description;
 import org.hamcrest.Matchers;
+import org.hamcrest.TypeSafeMatcher;
 import org.joda.time.Instant;
 import org.junit.Rule;
 import org.junit.Test;
@@ -91,6 +93,9 @@
 @RunWith(JUnit4.class)
 @SuppressWarnings("serial")
 public class DataflowPipelineRunnerTest {
+
+  private static final String PROJECT_ID = "some-project";
+
   @Rule
   public TemporaryFolder tmpFolder = new TemporaryFolder();
   @Rule
@@ -123,9 +128,9 @@ private static Dataflow buildMockDataflow(
 
     when(mockDataflowClient.projects()).thenReturn(mockProjects);
     when(mockProjects.jobs()).thenReturn(mockJobs);
-    when(mockJobs.create(eq("someProject"), jobCaptor.capture()))
+    when(mockJobs.create(eq(PROJECT_ID), jobCaptor.capture()))
         .thenReturn(mockRequest);
-    when(mockJobs.list(eq("someProject"))).thenReturn(mockList);
+    when(mockJobs.list(eq(PROJECT_ID))).thenReturn(mockList);
     when(mockList.setPageToken(anyString())).thenReturn(mockList);
     when(mockList.execute())
         .thenReturn(new ListJobsResponse().setJobs(
@@ -160,7 +165,7 @@ private DataflowPipelineOptions buildPipelineOptions() throws IOException {
   private DataflowPipelineOptions buildPipelineOptions(
       ArgumentCaptor<Job> jobCaptor) throws IOException {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
-    options.setProject("someProject");
+    options.setProject(PROJECT_ID);
     options.setTempLocation("gs://somebucket/some/path");
     // Set FILES_PROPERTY to empty to prevent a default value calculated from classpath.
     options.setFilesToStage(new LinkedList<String>());
@@ -186,7 +191,7 @@ public void testRunReturnDifferentRequestId() throws IOException {
     DataflowPipelineOptions options = buildPipelineOptions();
     Dataflow mockDataflowClient = options.getDataflowClient();
     Dataflow.Projects.Jobs.Create mockRequest = mock(Dataflow.Projects.Jobs.Create.class);
-    when(mockDataflowClient.projects().jobs().create(eq("someProject"), any(Job.class)))
+    when(mockDataflowClient.projects().jobs().create(eq(PROJECT_ID), any(Job.class)))
         .thenReturn(mockRequest);
     Job resultJob = new Job();
     resultJob.setId("newid");
@@ -238,24 +243,31 @@ public void testUpdateAlreadyUpdatedPipeline() throws IOException {
     options.setJobName("oldJobName");
     Dataflow mockDataflowClient = options.getDataflowClient();
     Dataflow.Projects.Jobs.Create mockRequest = mock(Dataflow.Projects.Jobs.Create.class);
-    when(mockDataflowClient.projects().jobs().create(eq("someProject"), any(Job.class)))
+    when(mockDataflowClient.projects().jobs().create(eq(PROJECT_ID), any(Job.class)))
         .thenReturn(mockRequest);
-    Job resultJob = new Job();
+    final Job resultJob = new Job();
     resultJob.setId("newid");
     // Return a different request id.
     resultJob.setClientRequestId("different_request_id");
     when(mockRequest.execute()).thenReturn(resultJob);
 
     DataflowPipeline p = buildDataflowPipeline(options);
-    try {
-      p.run();
-      fail("Expected DataflowJobAlreadyUpdatedException");
-    } catch (DataflowJobAlreadyUpdatedException expected) {
-      assertThat(expected.getMessage(),
-          containsString("The job named oldjobname with id: oldJobId has already been updated "
-              + "into job id: newid and cannot be updated again."));
-      assertEquals(expected.getJob().getJobId(), resultJob.getId());
-    }
+
+    thrown.expect(DataflowJobAlreadyUpdatedException.class);
+    thrown.expect(new TypeSafeMatcher<DataflowJobAlreadyUpdatedException>() {
+      @Override
+      public void describeTo(Description description) {
+        description.appendText("Expected job ID: " + resultJob.getId());
+      }
+
+      @Override
+      protected boolean matchesSafely(DataflowJobAlreadyUpdatedException item) {
+        return resultJob.getId().equals(item.getJob().getJobId());
+      }
+    });
+    thrown.expectMessage("The job named oldjobname with id: oldJobId has already been updated "
+        + "into job id: newid and cannot be updated again.");
+    p.run();
   }
 
   @Test
@@ -288,7 +300,7 @@ public void testRunWithFiles() throws IOException {
     options.setStagingLocation(gcsStaging);
     options.setTempLocation(gcsTemp);
     options.setTempDatasetId(cloudDataflowDataset);
-    options.setProject("someProject");
+    options.setProject(PROJECT_ID);
     options.setJobName("job");
     options.setDataflowClient(buildMockDataflow(jobCaptor));
     options.setGcsUtil(mockGcsUtil);
@@ -378,7 +390,7 @@ public void testGcsStagingLocationInitialization() throws Exception {
     // Set temp location (required), and check that staging location is set.
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setTempLocation(gcsTemp);
-    options.setProject("testProject");
+    options.setProject(PROJECT_ID);
     options.setGcpCredential(new TestCredential());
     options.setGcsUtil(buildMockGcsUtil(true /* bucket exists */));
 
@@ -521,10 +533,68 @@ public void testNoProjectFails() {
   }
 
   @Test
-  public void testNoStagingLocationAndNoTempLocationFails() {
+  public void testProjectId() throws IOException {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setRunner(DataflowPipelineRunner.class);
+    options.setProject("foo-12345");
+
+    options.setStagingLocation("gs://spam/ham/eggs");
+    options.setGcsUtil(buildMockGcsUtil(true /* bucket exists */));
+    options.setGcpCredential(new TestCredential());
+
+    DataflowPipelineRunner.fromOptions(options);
+  }
+
+  @Test
+  public void testProjectPrefix() throws IOException {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setRunner(DataflowPipelineRunner.class);
+    options.setProject("google.com:some-project-12345");
+
+    options.setStagingLocation("gs://spam/ham/eggs");
+    options.setGcsUtil(buildMockGcsUtil(true /* bucket exists */));
+    options.setGcpCredential(new TestCredential());
+
+    DataflowPipelineRunner.fromOptions(options);
+  }
+
+  @Test
+  public void testProjectNumber() throws IOException {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setRunner(DataflowPipelineRunner.class);
+    options.setProject("12345");
+
+    options.setStagingLocation("gs://spam/ham/eggs");
+    options.setGcsUtil(buildMockGcsUtil(true /* bucket exists */));
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Project ID");
+    thrown.expectMessage("project number");
+
+    DataflowPipelineRunner.fromOptions(options);
+  }
+
+  @Test
+  public void testProjectDescription() throws IOException {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setRunner(DataflowPipelineRunner.class);
+    options.setProject("some project");
+
+    options.setStagingLocation("gs://spam/ham/eggs");
+    options.setGcsUtil(buildMockGcsUtil(true /* bucket exists */));
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Project ID");
+    thrown.expectMessage("project description");
+
+    DataflowPipelineRunner.fromOptions(options);
+  }
+
+  @Test
+  public void testNoStagingLocationAndNoTempLocationFails() throws IOException {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setRunner(DataflowPipelineRunner.class);
-    options.setProject("foo");
+    options.setProject("foo-project");
 
     thrown.expect(IllegalArgumentException.class);
     thrown.expectMessage("Missing required value for group");
@@ -540,7 +610,7 @@ public void testStagingLocationAndNoTempLocationSucceeds() throws Exception {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setRunner(DataflowPipelineRunner.class);
     options.setGcpCredential(new TestCredential());
-    options.setProject("foo");
+    options.setProject("foo-project");
     options.setStagingLocation("gs://spam/ham/eggs");
     options.setGcsUtil(buildMockGcsUtil(true /* bucket exists */));
 
@@ -552,7 +622,7 @@ public void testTempLocationAndNoStagingLocationSucceeds() throws Exception {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setRunner(DataflowPipelineRunner.class);
     options.setGcpCredential(new TestCredential());
-    options.setProject("foo");
+    options.setProject("foo-project");
     options.setTempLocation("gs://spam/ham/eggs");
     options.setGcsUtil(buildMockGcsUtil(true /* bucket exists */));
 
@@ -722,7 +792,7 @@ public void testApplyIsScopedToExactClass() throws IOException {
   public void testToString() {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setJobName("TestJobName");
-    options.setProject("TestProject");
+    options.setProject("test-project");
     options.setTempLocation("gs://test/temp/location");
     options.setGcpCredential(new TestCredential());
     options.setPathValidatorClass(NoopPathValidator.class);
@@ -735,7 +805,7 @@ private static PipelineOptions makeStreamingOptions() {
     options.setRunner(DataflowPipelineRunner.class);
     options.setStreaming(true);
     options.setJobName("TestJobName");
-    options.setProject("TestProject");
+    options.setProject("test-project");
     options.setTempLocation("gs://test/temp/location");
     options.setGcpCredential(new TestCredential());
     options.setPathValidatorClass(NoopPathValidator.class);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTest.java
index 71604fbdcd6a6..c34627cd23b2e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTest.java
@@ -34,7 +34,7 @@ public class DataflowPipelineTest {
   public void testToString() {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setJobName("TestJobName");
-    options.setProject("TestProject");
+    options.setProject("project-id");
     options.setTempLocation("gs://test/temp/location");
     options.setGcpCredential(new TestCredential());
     options.setPathValidatorClass(NoopPathValidator.class);

From 8db0d84fd01c6401603bd0fe7a9326aa3f944e98 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Tue, 4 Aug 2015 21:41:10 -0700
Subject: [PATCH 0874/1541] Internal testing change: change capitalization in
 an exception

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99890648
---
 .../main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index acf8887be60f7..9d9f4d8dc9887 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -798,7 +798,7 @@ private static void verifyTableEmpty(
             // Nothing to do. If the table does not exist, it is considered empty.
           } else {
             throw new RuntimeException(
-                "Unable to confirm BigQuery table emptiness for table "
+                "unable to confirm BigQuery table emptiness for table "
                     + BigQueryIO.toTableSpec(table), e);
           }
         }

From d2dc4dd4ea1e5ea568a0326c101181a0dececa05 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Tue, 4 Aug 2015 21:42:03 -0700
Subject: [PATCH 0875/1541] Remove experimental annotations from update flags

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99890680
---
 .../dataflow/sdk/options/DataflowPipelineDebugOptions.java     | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
index 61bb7064ee663..12bfad07ddde6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
@@ -50,6 +50,7 @@ public interface DataflowPipelineDebugOptions extends PipelineOptions {
   @Description("[Experimental] Dataflow provides a number of experimental features that can "
       + "be enabled with this flag. Please sync with the Dataflow team before enabling any "
       + "experiments.")
+  @Experimental
   List<String> getExperiments();
   void setExperiments(List<String> value);
 
@@ -174,7 +175,6 @@ public Dataflow create(PipelineOptions options) {
   @JsonIgnore
   @Description("If set, replace the existing pipeline with the name specified by --jobName with "
       + "this pipeline, preserving state.")
-  @Experimental
   boolean getUpdate();
   void setUpdate(boolean value);
 
@@ -187,7 +187,6 @@ public Dataflow create(PipelineOptions options) {
       "Mapping of old PTranform names to new ones, specified as JSON "
       + "{\"oldName\":\"newName\",...}. To mark a transform as deleted, make newName the empty "
       + "string.")
-  @Experimental
   Map<String, String> getTransformNameMapping();
   void setTransformNameMapping(Map<String, String> value);
 

From bba74335a38691882333c0cc99d7ba407d241a53 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 5 Aug 2015 03:26:35 -0700
Subject: [PATCH 0876/1541] Add a /threadz handler to the batch
 DataflowWorker's status server

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99906480
---
 .../sdk/runners/worker/DataflowWorker.java    | 28 ++++++++++--
 .../runners/worker/DataflowWorkerTest.java    | 44 +++++++++++++++++++
 2 files changed, 68 insertions(+), 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index f3e8e8a5c7fb8..9303e3284ca02 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -62,9 +62,9 @@
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
+import java.util.Map;
 
 import javax.annotation.Nullable;
-
 import javax.servlet.ServletException;
 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;
@@ -401,15 +401,20 @@ public SideInputReader getSideInputReaderForViews(
   }
 
   /**
-   * Runs the status server to report worker health.
+   * Runs the status server to report worker health on the specified port.
    */
   public void runStatusServer(int statusPort) {
-    statusServer = new Server(statusPort);
+    LOG.info("Status server started on port {}", statusPort);
+    runStatusServer(new Server(statusPort));
+  }
+
+  // @VisibleForTesting
+  void runStatusServer(Server server) {
+    statusServer = server;
     statusServer.setHandler(new StatusHandler());
     try {
       // Run status server in separate thread.
       statusServer.start();
-      LOG.info("Status server started on port {}", statusPort);
     } catch (Exception e) {
       LOG.warn("Status server failed to start: ", e);
     }
@@ -428,10 +433,25 @@ public void handle(String target, Request baseRequest, HttpServletRequest reques
         response.setStatus(HttpServletResponse.SC_OK);
         // Right now, we always return "ok".
         responseWriter.println("ok");
+      } else if (target.equals("/threadz")) {
+        response.setStatus(HttpServletResponse.SC_OK);
+        printThreads(responseWriter);
       } else {
         response.setStatus(HttpServletResponse.SC_NOT_FOUND);
         responseWriter.println("not found");
       }
     }
+
+    private void printThreads(PrintWriter response) {
+      Map<Thread, StackTraceElement[]> stacks = Thread.getAllStackTraces();
+      for (Map.Entry<Thread,  StackTraceElement[]> entry : stacks.entrySet()) {
+        Thread thread = entry.getKey();
+        response.println("--- Thread: " + thread + " State: "
+            + thread.getState() + " stack: ---");
+        for (StackTraceElement element : entry.getValue()) {
+          response.println("  " + element);
+        }
+      }
+    }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
index b39470a6f41e1..91ef97197ff1d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
@@ -14,6 +14,7 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static org.hamcrest.Matchers.containsString;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
@@ -29,6 +30,8 @@
 import com.google.cloud.dataflow.sdk.testing.FastNanoClockAndSleeper;
 import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
 
+import org.eclipse.jetty.server.LocalConnector;
+import org.eclipse.jetty.server.Server;
 import org.hamcrest.CoreMatchers;
 import org.hamcrest.Description;
 import org.hamcrest.Matcher;
@@ -109,6 +112,47 @@ public void testStopProgressReportInCaseOfFailure() throws Exception {
       verify(mockProgressUpdater, times(1)).stopReportingProgress();
   }
 
+  @Test
+  public void testHealthzHandler() throws Exception {
+    String response = testStatusServer(
+        "GET /healthz HTTP/1.1\nhost: localhost\n\n");
+    assertThat(response, containsString("HTTP/1.1 200 OK"));
+    assertThat(response, containsString("ok"));
+  }
+
+  @Test
+  public void testThreadzHandler() throws Exception {
+    String response = testStatusServer(
+        "GET /threadz HTTP/1.1\nhost: localhost\n\n");
+    assertThat(response, containsString("HTTP/1.1 200 OK"));
+    assertThat(response, containsString("--- Thread: "));
+    // testThreadzHandler should be somewhere in the stack trace of one of the threads.
+    assertThat(response, containsString("testThreadzHandler"));
+  }
+
+  @Test
+  public void testUnknownHandler() throws Exception {
+    String response = testStatusServer(
+        "GET /missinghandlerz HTTP/1.1\nhost: localhost\n\n");
+    assertThat(response, containsString("HTTP/1.1 404 Not Found"));
+  }
+
+  private String testStatusServer(String request) throws Exception {
+    Server server = new Server();
+    LocalConnector connector = new LocalConnector(server);
+    try {
+      DataflowWorker worker = new DataflowWorker(mockWorkUnitClient, options);
+      worker.runStatusServer(server);
+      connector.start();
+      return connector.getResponses(request);
+    } finally {
+      connector.stop();
+      connector.join();
+      server.stop();
+      server.join();
+    }
+  }
+
   private Matcher<WorkItemStatus> cloudWorkHasErrors(final long expectedReportIndex) {
     return new TypeSafeMatcher<WorkItemStatus>() {
       @Override

From b4c90b29f1b994bf17c087a0eeb4e1f072c14683 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 5 Aug 2015 09:19:57 -0700
Subject: [PATCH 0877/1541] AvroCoder: enable @Nullable fields to be
 deterministic

In Avro's reflected schema, @Nullable fields show up as a union of null
and the base type, but without the Avro @Union annotation. Modify the
determinism checks for Union types to handle this case.

Of course, the (non-null) field being checked must also be
deterministic.

See also: https://avro.apache.org/docs/1.7.7/api/java/org/apache/
          avro/reflect/ReflectData.html#makeNullable(
          org.apache.avro.Schema)

----Release Notes----
AvroCoder now correctly supports deterministic @Nullable fields

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99926786
---
 .../cloud/dataflow/sdk/coders/AvroCoder.java  | 18 ++++++++-
 .../dataflow/sdk/coders/AvroCoderTest.java    | 39 +++++++++++++++++++
 2 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
index 1049698755716..3832016433f01 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -464,8 +464,22 @@ private void checkString(String context, TypeDescriptor<?> type) {
       }
     }
 
-    private void checkUnion(String context, TypeDescriptor<?> type, Schema schema) {
+   private static final Schema AVRO_NULL_SCHEMA = Schema.create(Schema.Type.NULL);
+
+   private void checkUnion(String context, TypeDescriptor<?> type, Schema schema) {
+      final List<Schema> unionTypes = schema.getTypes();
+
       if (!type.getRawType().isAnnotationPresent(Union.class)) {
+        // First check for @Nullable field, which shows up as a union of field type and null.
+        if (unionTypes.size() == 2 && unionTypes.contains(AVRO_NULL_SCHEMA)) {
+          // Find the Schema that is not NULL and recursively check that it is deterministic.
+          Schema nullableFieldSchema = unionTypes.get(0).equals(AVRO_NULL_SCHEMA)
+              ? unionTypes.get(1) : unionTypes.get(0);
+          doCheck(context, type, nullableFieldSchema);
+          return;
+        }
+
+        // Otherwise report a schema error.
         reportError(context, "Expected type %s to have @Union annotation", type);
         return;
       }
@@ -474,7 +488,7 @@ private void checkUnion(String context, TypeDescriptor<?> type, Schema schema) {
       String baseClassContext = type.getRawType().getName();
 
       // For a union, we need to make sure that each possible instantiation is deterministic.
-      for (Schema concrete : schema.getTypes()) {
+      for (Schema concrete : unionTypes) {
         @SuppressWarnings("unchecked")
         TypeDescriptor<?> unionType = TypeDescriptor.of(ReflectData.get().getClass(concrete));
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
index 8664c16c88730..44f53101bad2c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
@@ -41,6 +41,7 @@
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.reflect.AvroName;
 import org.apache.avro.reflect.AvroSchema;
+import org.apache.avro.reflect.Nullable;
 import org.apache.avro.reflect.ReflectData;
 import org.apache.avro.reflect.Stringable;
 import org.apache.avro.reflect.Union;
@@ -670,4 +671,42 @@ public void testAvroCoderCyclicRecords() {
         .endRecord()),
         reason("cyclicRecord.cycle", "cyclicRecord appears recursively"));
   }
+
+  private static class NullableField {
+    @SuppressWarnings("unused")
+    @Nullable private String nullable;
+  }
+
+  @Test
+  public void testNullableField() {
+    assertDeterministic(AvroCoder.of(NullableField.class));
+  }
+
+  private static class NullableNonDeterministicField {
+    @SuppressWarnings("unused")
+    @Nullable private NonDeterministicArray nullableNonDetArray;
+  }
+
+  private static class NullableCyclic {
+    @SuppressWarnings("unused")
+    @Nullable private NullableCyclic nullableNullableCyclicField;
+  }
+
+  private static class NullableCyclicField {
+    @SuppressWarnings("unused")
+    @Nullable private Cyclic nullableCyclicField;
+  }
+
+  @Test
+  public void testNullableNonDeterministicField() {
+    assertNonDeterministic(AvroCoder.of(NullableCyclic.class),
+        reasonField(NullableCyclic.class, "nullableNullableCyclicField",
+            NullableCyclic.class.getName() + " appears recursively"));
+    assertNonDeterministic(AvroCoder.of(NullableCyclicField.class),
+        reasonField(Cyclic.class, "cyclicField",
+            Cyclic.class.getName() + " appears recursively"));
+    assertNonDeterministic(AvroCoder.of(NullableNonDeterministicField.class),
+        reasonField(UnorderedMapClass.class, "mapField",
+            " may not be deterministically ordered"));
+  }
 }

From 3659d25779f62a52404e0411ce4dc7ca8bff946c Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 5 Aug 2015 10:17:03 -0700
Subject: [PATCH 0878/1541] Fix typo in KV.equals() and add tests

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99931814
---
 .../google/cloud/dataflow/sdk/values/KV.java  |  2 +-
 .../cloud/dataflow/sdk/values/KVTest.java     | 39 +++++++++++++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
index 9a1b524a7c2c1..ac2a9b3ee9c1e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
@@ -74,7 +74,7 @@ public boolean equals(Object other) {
     }
     KV<?, ?> otherKv = (KV<?, ?>) other;
     // Arrays are very common as values and keys, so deepEquals is mandatory
-    return Objects.deepEquals(this.key, this.key)
+    return Objects.deepEquals(this.key, otherKv.key)
         && Objects.deepEquals(this.value, otherKv.value);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/KVTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/KVTest.java
index 86555078872c5..75d4fc5a6bf9f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/KVTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/KVTest.java
@@ -16,7 +16,12 @@
 
 package com.google.cloud.dataflow.sdk.values;
 
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.not;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+
+import com.google.common.collect.ImmutableList;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -41,6 +46,40 @@ private int compareInt(Integer a, Integer b) {
     }
   }
 
+  @Test
+  public void testEquals() {
+    // Neither position are arrays
+    assertThat(KV.of(1, 2), equalTo(KV.of(1, 2)));
+
+    // Key is array
+    assertThat(KV.of(new int[]{1, 2}, 3), equalTo(KV.of(new int[]{1, 2}, 3)));
+
+    // Value is array
+    assertThat(KV.of(1, new int[]{2, 3}), equalTo(KV.of(1, new int[]{2, 3})));
+
+    // Both are arrays
+    assertThat(KV.of(new int[]{1, 2}, new int[]{3, 4}),
+        equalTo(KV.of(new int[]{1, 2}, new int[]{3, 4})));
+
+    // Unfortunately, deep equals only goes so far
+    assertThat(KV.of(ImmutableList.of(new int[]{1, 2}), 3),
+        not(equalTo(KV.of(ImmutableList.of(new int[]{1, 2}), 3))));
+    assertThat(KV.of(1, ImmutableList.of(new int[]{2, 3})),
+        not(equalTo(KV.of(1, ImmutableList.of(new int[]{2, 3})))));
+
+    // Key is array and differs
+    assertThat(KV.of(new int[]{1, 2}, 3), not(equalTo(KV.of(new int[]{1, 37}, 3))));
+
+    // Key is non-array and differs
+    assertThat(KV.of(1, new int[]{2, 3}), not(equalTo(KV.of(37, new int[]{1, 2}))));
+
+    // Value is array and differs
+    assertThat(KV.of(1, new int[]{2, 3}), not(equalTo(KV.of(1, new int[]{37, 3}))));
+
+    // Value is non-array and differs
+    assertThat(KV.of(new byte[]{1, 2}, 3), not(equalTo(KV.of(new byte[]{1, 2}, 37))));
+  }
+
   @Test
   public void testOrderByKey() {
     Comparator<KV<Integer, Integer>> orderByKey = new KV.OrderByKey<>();

From c0eb445990f3f76345ca97ba14b2c1bf58a1ca8c Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 5 Aug 2015 10:17:22 -0700
Subject: [PATCH 0879/1541] Fix some Warnings

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99931841
---
 .../dataflow/sdk/testing/CoderProperties.java | 18 ++++-----
 .../sdk/transforms/ApproximateQuantiles.java  |  3 +-
 .../sdk/transforms/join/CoGbkResult.java      | 37 +++++++++++++------
 .../worker/InMemoryReaderFactoryTest.java     |  2 +-
 .../runners/worker/InMemoryReaderTest.java    |  4 +-
 5 files changed, 38 insertions(+), 26 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
index 6493332100cee..7a03f9d36f7c1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
@@ -61,8 +61,8 @@ public class CoderProperties {
 
   /**
    * Verifies that for the given {@code Coder<T>}, and values of
-   * type {@code T}, if the values are equal then the encoded bytes are equal,
-   * in any {@link Coder.Context}.
+   * type {@code T}, if the values are equal then the encoded bytes are equal, in any
+   * {@code Coder.Context}.
    */
   public static <T> void coderDeterministic(
       Coder<T> coder, T value1, T value2)
@@ -73,7 +73,7 @@ public static <T> void coderDeterministic(
   }
 
   /**
-   * Verifies that for the given {@code Coder<T>}, {@link Coder.Context}, and values of
+   * Verifies that for the given {@code Coder<T>}, {@code Coder.Context}, and values of
    * type {@code T}, if the values are equal then the encoded bytes are equal.
    */
   public static <T> void coderDeterministicInContext(
@@ -94,7 +94,7 @@ public static <T> void coderDeterministicInContext(
   /**
    * Verifies that for the given {@code Coder<T>},
    * and value of type {@code T}, encoding followed by decoding yields an
-   * equal value of type {@code T}, in any {@link Coder.Context}.
+   * equal value of type {@code T}, in any {@code Coder.Context}.
    */
   public static <T> void coderDecodeEncodeEqual(
       Coder<T> coder, T value)
@@ -105,7 +105,7 @@ public static <T> void coderDecodeEncodeEqual(
   }
 
   /**
-   * Verifies that for the given {@code Coder<T>}, {@link Coder.Context},
+   * Verifies that for the given {@code Coder<T>}, {@code Coder.Context},
    * and value of type {@code T}, encoding followed by decoding yields an
    * equal value of type {@code T}.
    */
@@ -118,7 +118,7 @@ public static <T> void coderDecodeEncodeEqualInContext(
   /**
    * Verifies that for the given {@code Coder<Collection<T>>},
    * and value of type {@code Collection<T>}, encoding followed by decoding yields an
-   * equal value of type {@code Collection<T>}, in any {@link Coder.Context}.
+   * equal value of type {@code Collection<T>}, in any {@code Coder.Context}.
    */
   public static <T, CollectionT extends Collection<T>> void coderDecodeEncodeContentsEqual(
       Coder<CollectionT> coder, CollectionT value)
@@ -131,7 +131,7 @@ public static <T, CollectionT extends Collection<T>> void coderDecodeEncodeConte
   /**
    * Verifies that for the given {@code Coder<Collection<T>>},
    * and value of type {@code Collection<T>}, encoding followed by decoding yields an
-   * equal value of type {@code Collection<T>}, in the given {@link Coder.Context}.
+   * equal value of type {@code Collection<T>}, in the given {@code Coder.Context}.
    */
   @SuppressWarnings("unchecked")
   public static <T, CollectionT extends Collection<T>> void coderDecodeEncodeContentsEqualInContext(
@@ -150,7 +150,7 @@ public static <T, CollectionT extends Collection<T>> void coderDecodeEncodeConte
   /**
    * Verifies that for the given {@code Coder<Collection<T>>},
    * and value of type {@code Collection<T>}, encoding followed by decoding yields an
-   * equal value of type {@code Collection<T>}, in any {@link Coder.Context}.
+   * equal value of type {@code Collection<T>}, in any {@code Coder.Context}.
    */
   public static <T, IterableT extends Iterable<T>> void coderDecodeEncodeContentsInSameOrder(
       Coder<IterableT> coder, IterableT value)
@@ -164,7 +164,7 @@ public static <T, IterableT extends Iterable<T>> void coderDecodeEncodeContentsI
   /**
    * Verifies that for the given {@code Coder<Iterable<T>>},
    * and value of type {@code Iterable<T>}, encoding followed by decoding yields an
-   * equal value of type {@code Collection<T>}, in the given {@link Coder.Context}.
+   * equal value of type {@code Collection<T>}, in the given {@code Coder.Context}.
    */
   @SuppressWarnings("unchecked")
   public static <T, IterableT extends Iterable<T>> void
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
index 5c19a3337197b..595914e0b8690 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
@@ -280,8 +280,7 @@ ApproximateQuantilesCombineFn<T, ComparatorT> create(
     }
 
     /**
-     * Like {@link #create(int, Comparator)}, but sorts
-     * values using their natural ordering.
+     * Like {@link #create(int, Comparator)}, but sorts values using their natural ordering.
      */
     public static <T extends Comparable<T>>
         ApproximateQuantilesCombineFn<T, Top.Largest<T>> create(int numQuantiles) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
index 4528d1bc9cc24..cf386d164ba03 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
@@ -34,6 +34,7 @@
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -44,6 +45,7 @@
 import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Objects;
 
 /**
  * A row result of a {@link CoGroupByKey}.  This is a tuple of {@link Iterable}s produced for
@@ -123,22 +125,28 @@ public CoGbkResult(
       final Boolean[] containsTag = new Boolean[schema.size()];
       for (int unionTag = 0; unionTag < schema.size(); unionTag++) {
         final int unionTag0 = unionTag;
-        final Iterable<?> head = valueMap.get(unionTag);
-        valueMap.set(
-            unionTag,
-            new Iterable() {
-              Reiterator<RawUnionValue> start = tail.copy();
-              @Override
-              public Iterator iterator() {
-                return Iterators.concat(
-                    head.iterator(),
-                    new UnionValueIterator<>(unionTag0, tail.copy(), containsTag));
-              }
-            });
+        updateUnionTag(tail, containsTag, unionTag, unionTag0);
       }
     }
   }
 
+  private <T> void updateUnionTag(
+      final Reiterator<RawUnionValue> tail, final Boolean[] containsTag,
+      int unionTag, final int unionTag0) {
+    @SuppressWarnings("unchecked")
+    final Iterable<T> head = (Iterable<T>) valueMap.get(unionTag);
+    valueMap.set(
+        unionTag,
+        new Iterable<T>() {
+          @Override
+          public Iterator<T> iterator() {
+            return Iterators.concat(
+                head.iterator(),
+                new UnionValueIterator<T>(unionTag0, tail.copy(), containsTag));
+          }
+        });
+  }
+
   public boolean isEmpty() {
     for (Iterable<?> tagValues : valueMap) {
       if (tagValues.iterator().hasNext()) {
@@ -295,6 +303,11 @@ public boolean equals(Object other) {
       return schema.equals(((CoGbkResultCoder) other).schema);
     }
 
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(schema);
+    }
+
     @Override
     public void verifyDeterministic() throws NonDeterministicException {
       verifyDeterministic(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
index 2b79eef26e176..79913c0bc288c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
@@ -72,7 +72,7 @@ <T> void runTestCreateInMemoryReader(List<T> elements, Long start, Long end, int
     Reader<?> reader = ReaderFactory.create(
         PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext());
     Assert.assertThat(reader, new IsInstanceOf(InMemoryReader.class));
-    InMemoryReader inMemoryReader = (InMemoryReader<?>) reader;
+    InMemoryReader<?> inMemoryReader = (InMemoryReader<?>) reader;
     Assert.assertEquals(encodedElements(elements, coder), inMemoryReader.encodedElements);
     Assert.assertEquals(expectedStart, inMemoryReader.startIndex);
     Assert.assertEquals(expectedEnd, inMemoryReader.endIndex);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
index 42dd1bf07470b..cd4471b0bfa76 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
@@ -178,8 +178,8 @@ public void testDynamicSplit() throws Exception {
     }
 
     // Proposed split position is after the current stop (end) position, no update.
-    try (InMemoryReader.InMemoryReaderIterator iterator =
-        (InMemoryReader.InMemoryReaderIterator) inMemoryReader.iterator()) {
+    try (InMemoryReader<Integer>.InMemoryReaderIterator iterator =
+        (InMemoryReader<Integer>.InMemoryReaderIterator) inMemoryReader.iterator()) {
       // Poke the iterator so that we can test dynamic splitting.
       assertTrue(iterator.hasNext());
       assertNull(iterator.requestDynamicSplit(splitRequestAtIndex(5L)));

From 20fc63a9b5955f3fa33115761c8bd824edea25ea Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Wed, 5 Aug 2015 10:28:45 -0700
Subject: [PATCH 0880/1541] Updating dependency google-api-services-dataflow

----Release Notes----
Updating dependency google-api-services-dataflow from v1b3-rev5-1.20.0 to v1b3-rev8-1.20.0

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99933062
---
 sdk/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 8d6808302f972..c37f803b26016 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -329,7 +329,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-dataflow</artifactId>
-      <version>v1b3-rev5-1.20.0</version>
+      <version>v1b3-rev8-1.20.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->

From ca2d22e198212c38e76b8b91ae8698dc2d877666 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 5 Aug 2015 10:34:24 -0700
Subject: [PATCH 0881/1541] Add immutability checking to direct evaluation of
 ParDo

----Release Notes----
Added checks to DirectPipelineRunner to help ensure that your DoFn
is correct with respect to the longstanding requirement that inputs
and outputs must not be modified.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99933623
---
 .../google/cloud/dataflow/sdk/Pipeline.java   |  18 +-
 .../cloud/dataflow/sdk/transforms/ParDo.java  | 191 ++++++++-
 .../cloud/dataflow/sdk/util/CoderUtils.java   |  14 +-
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |   3 +-
 .../sdk/util/IllegalMutationException.java    |  54 +++
 .../dataflow/sdk/util/MutationDetector.java   |  31 ++
 .../dataflow/sdk/util/MutationDetectors.java  | 182 ++++++++
 .../cloud/dataflow/sdk/PipelineTest.java      |  26 +-
 .../sdk/runners/DirectPipelineRunnerTest.java |   9 +-
 .../dataflow/sdk/transforms/ParDoTest.java    | 402 ++++++++++++------
 .../dataflow/sdk/transforms/ViewTest.java     |   7 +-
 .../sdk/util/MutationDetectorsTest.java       | 148 +++++++
 12 files changed, 906 insertions(+), 179 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IllegalMutationException.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MutationDetector.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MutationDetectors.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MutationDetectorsTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index d4ee15746b15a..f42a381190819 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -105,6 +105,22 @@
 public class Pipeline {
   private static final Logger LOG = LoggerFactory.getLogger(Pipeline.class);
 
+  /**
+   * Thrown during pipeline execution, whenever user code within a pipeline throws an exception.
+   *
+   * <p>The exception thrown during pipeline execution may be retrieved via {@link #getCause}.
+   */
+  public static class PipelineExecutionException extends RuntimeException {
+    private static final long serialVersionUID = 0L;
+
+    /**
+     * Wraps {@code cause} into a {@code PipelineExecutionException}.
+     */
+    public PipelineExecutionException(Throwable cause) {
+      super(cause);
+    }
+  }
+
   /////////////////////////////////////////////////////////////////////////////
   // Public operations.
 
@@ -163,7 +179,7 @@ public PipelineResult run() {
       // is caused by the caught UserCodeException, thereby splicing
       // out all the stack frames in between the PipelineRunner itself
       // and where the worker calls into the user's code.
-      throw new RuntimeException(e.getCause());
+      throw new PipelineExecutionException(e.getCause());
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 4fbb18e11b94c..fa1c522462101 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -19,13 +19,19 @@
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DirectSideInputReader;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.IllegalMutationException;
+import com.google.cloud.dataflow.sdk.util.MutationDetector;
+import com.google.cloud.dataflow.sdk.util.MutationDetectors;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.util.SideInputReader;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
+import com.google.cloud.dataflow.sdk.util.UserCodeException;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -35,11 +41,14 @@
 import com.google.cloud.dataflow.sdk.values.TupleTagList;
 import com.google.cloud.dataflow.sdk.values.TypedPValue;
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Maps;
 
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.ConcurrentMap;
+
+import javax.annotation.Nullable;
 
 /**
  * {@code ParDo} is the core element-wise transform in Google Cloud
@@ -871,9 +880,10 @@ public <InputT> BoundMulti<InputT, OutputT> of(DoFnWithContext<InputT, OutputT>
    * @param <InputT> the type of the (main) input {@code PCollection} elements
    * @param <OutputT> the type of the main output {@code PCollection} elements
    */
-  @SuppressWarnings("serial")
   public static class BoundMulti<InputT, OutputT>
       extends PTransform<PCollection<? extends InputT>, PCollectionTuple> {
+    private static final long serialVersionUID = 0L;
+
     // Inherits name.
     List<PCollectionView<?>> sideInputs;
     TupleTag<OutputT> mainOutputTag;
@@ -1009,13 +1019,16 @@ private static <InputT, OutputT> void evaluateSingleHelper(
 
     DirectModeExecutionContext executionContext = DirectModeExecutionContext.create();
 
+    PCollectionTuple outputs = PCollectionTuple.of(mainOutputTag, context.getOutput(transform));
+
     evaluateHelper(
         transform.fn,
         context.getStepName(transform),
         context.getInput(transform),
         transform.sideInputs,
         mainOutputTag,
-        new ArrayList<TupleTag<?>>(),
+        Collections.<TupleTag<?>>emptyList(),
+        outputs,
         context,
         executionContext);
 
@@ -1052,11 +1065,13 @@ private static <InputT, OutputT> void evaluateMultiHelper(
         transform.sideInputs,
         transform.mainOutputTag,
         transform.sideOutputTags.getAll(),
+        context.getOutput(transform),
         context,
         executionContext);
 
     for (Map.Entry<TupleTag<?>, PCollection<?>> entry
         : context.getOutput(transform).getAll().entrySet()) {
+      @SuppressWarnings("unchecked")
       TupleTag<Object> tag = (TupleTag<Object>) entry.getKey();
       @SuppressWarnings("unchecked")
       PCollection<Object> pc = (PCollection<Object>) entry.getValue();
@@ -1069,51 +1084,191 @@ private static <InputT, OutputT> void evaluateMultiHelper(
     }
   }
 
-  private static <InputT, OutputT> void evaluateHelper(
+  /**
+   * Evaluates a single-output or multi-output {@link ParDo} directly.
+   *
+   * <p>This evaluation method is intended for use in testing scenarios; it is designed for clarity
+   * and correctness-checking, not speed.
+   *
+   * <p>Of particular note, this performs best-effort checking that inputs and outputs are not
+   * mutated in violation of the requirements upon a {@link DoFn}.
+   */
+  private static <InputT, OutputT, ActualInputT extends InputT> void evaluateHelper(
       DoFn<InputT, OutputT> doFn,
-      String name,
-      PCollection<? extends InputT> input,
+      String stepName,
+      PCollection<ActualInputT> input,
       List<PCollectionView<?>> sideInputs,
       TupleTag<OutputT> mainOutputTag,
       List<TupleTag<?>> sideOutputTags,
+      PCollectionTuple outputs,
       DirectPipelineRunner.EvaluationContext context,
       DirectModeExecutionContext executionContext) {
     // TODO: Run multiple shards?
     DoFn<InputT, OutputT> fn = context.ensureSerializable(doFn);
 
-    PTuple sideInputValues = PTuple.empty();
-    for (PCollectionView<?> view : sideInputs) {
-      sideInputValues = sideInputValues.and(
-          view.getTagInternal(),
-          context.getPCollectionView(view));
-    }
+    SideInputReader sideInputReader = makeSideInputReader(context, sideInputs);
+
+    // When evaluating via the DirectPipelineRunner, this output manager checks each output for
+    // illegal mutations when the next output comes along. We then verify again after finishBundle()
+    // The common case we expect this to catch is a user mutating an input in order to repeatedly
+    // emit "variations".
+    ImmutabilityCheckingOutputManager<ActualInputT> outputManager =
+        new ImmutabilityCheckingOutputManager<>(
+            fn.getClass().getSimpleName(),
+            new DoFnRunner.ListOutputManager(),
+            outputs);
 
     DoFnRunner<InputT, OutputT> fnRunner =
         DoFnRunner.create(
             context.getPipelineOptions(),
             fn,
-            DirectSideInputReader.of(sideInputValues),
-            new DoFnRunner.ListOutputManager(),
+            sideInputReader,
+            outputManager,
             mainOutputTag,
             sideOutputTags,
-            executionContext.getStepContext(name, name),
+            executionContext.getStepContext(stepName, stepName),
             context.getAddCounterMutator(),
             input.getWindowingStrategy());
 
     fnRunner.startBundle();
 
-    for (DirectPipelineRunner.ValueWithMetadata<? extends InputT> elem
+    for (DirectPipelineRunner.ValueWithMetadata<ActualInputT> elem
              : context.getPCollectionValuesWithMetadata(input)) {
       if (elem.getValue() instanceof KV) {
         // In case the DoFn needs keyed state, set the implicit keys to the keys
         // in the input elements.
-        executionContext.setKey(((KV) elem.getValue()).getKey());
+        @SuppressWarnings("unchecked")
+        KV<?, ?> kvElem = (KV<?, ?>) elem.getValue();
+        executionContext.setKey(kvElem.getKey());
       } else {
         executionContext.setKey(elem.getKey());
       }
-      fnRunner.processElement((WindowedValue<InputT>) elem.getWindowedValue());
+
+      // We check the input for mutations only through the call span of processElement.
+      // This will miss some cases, but the check is ad hoc and best effort. The common case
+      // is that the input is mutated to be used for output.
+      try {
+        MutationDetector inputMutationDetector = MutationDetectors.forValueWithCoder(
+            elem.getWindowedValue().getValue(), input.getCoder());
+        @SuppressWarnings("unchecked")
+        WindowedValue<InputT> windowedElem = ((WindowedValue<InputT>) elem.getWindowedValue());
+        fnRunner.processElement(windowedElem);
+        inputMutationDetector.verifyUnmodified();
+      } catch (CoderException e) {
+        throw new UserCodeException(e);
+      } catch (IllegalMutationException exn) {
+        throw new IllegalMutationException(
+            String.format("DoFn %s mutated input value %s of class %s (new value was %s)."
+                + " Input values must not be mutated in any way.",
+                fn.getClass().getSimpleName(),
+                exn.getSavedValue(), exn.getSavedValue().getClass(), exn.getNewValue()),
+            exn.getSavedValue(),
+            exn.getNewValue(),
+            exn);
+      }
     }
 
+    // Note that the input could have been retained and mutated prior to this final output,
+    // but for now it degrades readability too much to be worth trying to catch that particular
+    // corner case.
     fnRunner.finishBundle();
+    outputManager.verifyLatestOutputsUnmodified();
+  }
+
+  private static SideInputReader makeSideInputReader(
+      DirectPipelineRunner.EvaluationContext context, List<PCollectionView<?>> sideInputs) {
+    PTuple sideInputValues = PTuple.empty();
+    for (PCollectionView<?> view : sideInputs) {
+      sideInputValues = sideInputValues.and(
+          view.getTagInternal(),
+          context.getPCollectionView(view));
+    }
+    return DirectSideInputReader.of(sideInputValues);
+  }
+
+  /**
+   * A {@link DoFnRunner.OutputManager} that provides facilities for checking output values for
+   * illegal mutations.
+   *
+   * <p>When used via the try-with-resources pattern, it is guaranteed that every value passed
+   * to {@link #output} will have been checked for illegal mutation.
+   */
+  private static class ImmutabilityCheckingOutputManager<InputT>
+      implements DoFnRunner.OutputManager, AutoCloseable {
+
+    private final DoFnRunner.OutputManager underlyingOutputManager;
+    private final ConcurrentMap<TupleTag<?>, MutationDetector> mutationDetectorForTag;
+    private final PCollectionTuple outputs;
+    private String doFnName;
+
+    public ImmutabilityCheckingOutputManager(
+        String doFnName,
+        DoFnRunner.OutputManager underlyingOutputManager,
+        PCollectionTuple outputs) {
+      this.doFnName = doFnName;
+      this.underlyingOutputManager = underlyingOutputManager;
+      this.outputs = outputs;
+      this.mutationDetectorForTag = Maps.newConcurrentMap();
+    }
+
+    @Override
+    public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
+
+      // Skip verifying undeclared outputs, since we don't have coders for them.
+      if (outputs.has(tag)) {
+        try {
+          MutationDetector newDetector =
+              MutationDetectors.forValueWithCoder(
+                  output.getValue(), outputs.get(tag).getCoder());
+          MutationDetector priorDetector = mutationDetectorForTag.put(tag, newDetector);
+          if (priorDetector != null) {
+            verifyOutputUnmodified(priorDetector);
+          }
+        } catch (CoderException e) {
+          throw new UserCodeException(e);
+        }
+      }
+
+      // Actually perform the output.
+      underlyingOutputManager.output(tag, output);
+    }
+
+    /**
+     * Throws {@link IllegalMutationException} if the prior output for any tag has been mutated
+     * since being output.
+     */
+    public void verifyLatestOutputsUnmodified() {
+      for (MutationDetector detector : mutationDetectorForTag.values()) {
+        verifyOutputUnmodified(detector);
+      }
+    }
+
+    /**
+     * Adapts the error message from the provided {@code detector}.
+     *
+     * <p>The {@code detector} may be null, in which case no check is performed. This is merely
+     * to consolidate null checking to this method.
+     */
+    private <T> void verifyOutputUnmodified(@Nullable MutationDetector detector) {
+      try {
+        detector.verifyUnmodified();
+      } catch (IllegalMutationException exn) {
+        throw new IllegalMutationException(String.format(
+            "DoFn %s mutated value %s after it was output (new value was %s)."
+                + " Values must not be mutated in any way after being output.",
+                doFnName, exn.getSavedValue(), exn.getNewValue()),
+            exn.getSavedValue(), exn.getNewValue(),
+            exn);
+      }
+    }
+
+    /**
+     * When used in a {@code try}-with-resources block, verifies all of the latest outputs upon
+     * {@link #close()}.
+     */
+    @Override
+    public void close() {
+      verifyLatestOutputsUnmodified();
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
index 4e55be270a54f..2dddbcdf1a897 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
@@ -68,7 +68,9 @@ private CoderUtils() {}  // Non-instantiable
   /**
    * Encodes the given value using the specified Coder, and returns
    * the encoded bytes.
-   * This function is non-reentrant due to the use of ThreadLocal.
+   *
+   * <p>This function is not reentrant; it should not be called from methods of the provided
+   * {@link Coder}.
    */
   public static <T> byte[] encodeToByteArray(Coder<T> coder, T value) throws CoderException {
     return encodeToByteArray(coder, value, Coder.Context.OUTER);
@@ -140,6 +142,16 @@ private static <T> T decodeFromSafeStream(
     }
   }
 
+  /**
+   * Clones the given value by encoding and then decoding it with the specified Coder.
+   *
+   * <p>This function is not reentrant; it should not be called from methods of the provided
+   * {@link Coder}.
+   */
+  public static <T> T clone(Coder<T> coder, T value) throws CoderException {
+    return decodeFromByteArray(coder, encodeToByteArray(coder, value, Coder.Context.OUTER));
+  }
+
   /**
    * Encodes the given value using the specified Coder, and returns the Base64 encoding of the
    * encoded bytes.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 89f05deae1e1d..72b0743966332 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -300,7 +300,8 @@ public Collection<? extends BoundedWindow> windows() {
             }
           });
         } catch (Exception e) {
-          throw new RuntimeException(e);
+          Throwables.propagateIfInstanceOf(e, UserCodeException.class);
+          throw new UserCodeException(e);
         }
       }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IllegalMutationException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IllegalMutationException.java
new file mode 100644
index 0000000000000..99fee1220e7f9
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IllegalMutationException.java
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+/**
+ * Thrown when a value appears to have been mutated, but that mutation is forbidden.
+ */
+public class IllegalMutationException extends RuntimeException {
+  private static final long serialVersionUID = 0L;
+
+  private Object savedValue;
+  private Object newValue;
+
+  public IllegalMutationException(String message, Object savedValue, Object newValue) {
+    super(message);
+    this.savedValue = savedValue;
+    this.newValue = newValue;
+  }
+
+  public IllegalMutationException(
+      String message, Object savedValue, Object newValue, Throwable cause) {
+    super(message, cause);
+    this.savedValue = savedValue;
+    this.newValue = newValue;
+  }
+
+  /**
+   * The original value, before the illegal mutation.
+   */
+  public Object getSavedValue() {
+    return savedValue;
+  }
+
+  /**
+   * The value after the illegal mutation.
+   */
+  public Object getNewValue() {
+    return newValue;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MutationDetector.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MutationDetector.java
new file mode 100644
index 0000000000000..51e65ab878cb5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MutationDetector.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+/**
+ * An object for detecting illegal mutations.
+ *
+ * <p>The {@link AutoCloseable} aspect of this interface allows use in a try-with-resources
+ * style, where the implementing class may choose to perform a final mutation check upon
+ * {@link #close()}.
+ */
+public interface MutationDetector extends AutoCloseable {
+  /**
+   * @throws IllegalMutationException if illegal mutations are detected.
+   */
+  void verifyUnmodified();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MutationDetectors.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MutationDetectors.java
new file mode 100644
index 0000000000000..412e3eb725209
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MutationDetectors.java
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.common.base.Throwables;
+
+import java.util.Arrays;
+import java.util.Objects;
+
+/**
+ * Static methods for creating and working with {@link MutationDetector}.
+ */
+public class MutationDetectors {
+
+  private MutationDetectors() {}
+
+  /**
+     * Creates a new {@code MutationDetector} for the provided {@code value} that uses the provided
+     * {@link Coder} to perform deep copies and comparisons by serializing and deserializing values.
+     *
+     * <p>It is permissible for {@code value} to be {@code null}. Since {@code null} is immutable,
+     * the mutation check will always succeed.
+     */
+  public static <T> MutationDetector forValueWithCoder(T value, Coder<T> coder)
+      throws CoderException {
+    if (value == null) {
+      return noopMutationDetector();
+    } else {
+      return new CodedValueMutationDetector<>(value, coder);
+    }
+  }
+
+  /**
+   * Creates a new {@code MutationDetector} that always succeeds.
+   *
+   * <p>This is useful, for example, for providing a very efficient mutation detector for a value
+   * which is already immutable by design.
+   */
+  public static MutationDetector noopMutationDetector() {
+    return new NoopMutationDetector();
+  }
+
+  /**
+   * A {@link MutationDetector} for {@code null}, which is immutable.
+   */
+  private static class NoopMutationDetector implements MutationDetector {
+
+    @Override
+    public void verifyUnmodified() { }
+
+    @Override
+    public void close() { }
+  }
+
+  /**
+   * Given a value of type {@code T} and a {@link Coder} for that type, provides facilities to save
+   * check that the value has not changed.
+   *
+   * @param <T> the type of values checked for mutation
+   */
+  private static class CodedValueMutationDetector<T> implements MutationDetector {
+
+    private final Coder<T> coder;
+
+    /**
+     * A saved pointer to an in-memory value provided upon construction, which we will check for
+     * forbidden mutations.
+     */
+    private final T possiblyModifiedObject;
+
+    /**
+     * A saved encoded copy of the same value as {@link #possiblyModifiedObject}. Naturally, it
+     * will not change if {@link #possiblyModifiedObject} is mutated.
+     */
+    private final byte[] encodedOriginalObject;
+
+    /**
+     * The object decoded from {@link #encodedOriginalObject}. It will be used during every call to
+     * {@link #verifyUnmodified}, which could be called many times throughout the lifetime of this
+     * {@link CodedValueMutationDetector}.
+     */
+    private final T clonedOriginalObject;
+
+    /**
+     * Create a mutation detector for the provided {@code value}, using the provided {@link Coder}
+     * for cloning and checking serialized forms for equality.
+     */
+    public CodedValueMutationDetector(T value, Coder<T> coder) throws CoderException {
+      this.coder = coder;
+      this.possiblyModifiedObject = value;
+      this.encodedOriginalObject = CoderUtils.encodeToByteArray(coder, value);
+      this.clonedOriginalObject = CoderUtils.decodeFromByteArray(coder, encodedOriginalObject);
+    }
+
+    @Override
+    public void verifyUnmodified() {
+      try {
+        verifyUnmodifiedThrowingCheckedExceptions();
+      } catch (CoderException exn) {
+        Throwables.propagate(exn);
+      }
+    }
+
+    private void verifyUnmodifiedThrowingCheckedExceptions() throws CoderException {
+      // If either object believes they are equal, we trust that and short-circuit deeper checks.
+      if (Objects.equals(possiblyModifiedObject, clonedOriginalObject)
+          || Objects.equals(clonedOriginalObject, possiblyModifiedObject)) {
+        return;
+      }
+
+      // Since retainedObject is in general an instance of a subclass of T, when it is cloned to
+      // clonedObject using a Coder<T>, the two will generally be equivalent viewed as a T, but in
+      // general neither retainedObject.equals(clonedObject) nor clonedObject.equals(retainedObject)
+      // will hold.
+      //
+      // For example, CoderUtils.clone(IterableCoder<Integer>, IterableSubclass<Integer>) will
+      // produce an ArrayList<Integer> with the same contents as the IterableSubclass, but the
+      // latter will quite reasonably not consider itself equivalent to an ArrayList (and vice
+      // versa).
+      //
+      // To enable a reasonable comparison, we clone retainedObject again here, converting it to
+      // the same sort of T that the Coder<T> output when it created clonedObject.
+      T clonedPossiblyModifiedObject = CoderUtils.clone(coder, possiblyModifiedObject);
+
+      // If deepEquals() then we trust the equals implementation.
+      // This deliberately allows fields to escape this check.
+      if (Objects.deepEquals(clonedPossiblyModifiedObject, clonedOriginalObject)) {
+        return;
+      }
+
+      // If not deepEquals(), the class may just have a poor equals() implementation.
+      // So we next try checking their serialized forms. We re-serialize instead of checking
+      // encodedObject, because the Coder may treat it differently.
+      //
+      // For example, an unbounded Iterable will be encoded in an unbounded way, but decoded into an
+      // ArrayList, which will then be re-encoded in a bounded format. So we really do need to
+      // encode-decode-encode retainedObject.
+      if (Arrays.equals(
+          CoderUtils.encodeToByteArray(coder, clonedOriginalObject),
+          CoderUtils.encodeToByteArray(coder, clonedPossiblyModifiedObject))) {
+        return;
+      }
+
+      // If we got here, then they are not deepEquals() and do not have deepEquals() encodings.
+      // Even if there is some conceptual sense in which the objects are equivalent, it has not
+      // been adequately expressed in code.
+      illegalMutation(clonedOriginalObject, clonedPossiblyModifiedObject);
+    }
+
+    private void illegalMutation(T previousValue, T newValue) throws CoderException {
+      throw new IllegalMutationException(
+          String.format("Value %s mutated illegally, new value was %s."
+              + " Encoding was %s, now %s.",
+              previousValue, newValue,
+              CoderUtils.encodeToBase64(coder, previousValue),
+              CoderUtils.encodeToBase64(coder, newValue)),
+          previousValue, newValue);
+    }
+
+    @Override
+    public void close() {
+      verifyUnmodified();
+    }
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
index 6e96fc98ffde4..faf4794105161 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
@@ -16,12 +16,14 @@
 
 package com.google.cloud.dataflow.sdk;
 
-import static org.hamcrest.CoreMatchers.containsString;
-import static org.hamcrest.core.IsInstanceOf.instanceOf;
-import static org.hamcrest.core.IsNot.not;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.instanceOf;
+import static org.hamcrest.Matchers.isA;
+import static org.hamcrest.Matchers.not;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.fail;
 
+import com.google.cloud.dataflow.sdk.Pipeline.PipelineExecutionException;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
@@ -95,20 +97,10 @@ public void testPipelineUserExceptionHandling() {
         new TestPipelineRunnerThrowingUserException());
 
     // Check pipeline runner correctly catches user errors.
-    try {
-      p.run();
-      fail("Should have thrown an exception.");
-    } catch (RuntimeException exn) {
-      // Make sure users don't have to worry about the
-      // UserCodeException wrapper.
-      Assert.assertThat(exn, not(instanceOf(UserCodeException.class)));
-      // Assert that the message is correct.
-      Assert.assertThat(
-          exn.getMessage(), containsString("user code exception"));
-      // Cause should be IllegalStateException.
-      Assert.assertThat(
-          exn.getCause(), instanceOf(IllegalStateException.class));
-    }
+    thrown.expect(PipelineExecutionException.class);
+    thrown.expectCause(isA(IllegalStateException.class));
+    thrown.expectMessage("user code exception");
+    p.run();
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java
index aedfc3843f1c0..29110fb9d6d06 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.runners;
 
+import static org.hamcrest.Matchers.isA;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 
@@ -28,7 +29,6 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 
-import org.hamcrest.Matchers;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
@@ -74,9 +74,6 @@ public T decode(
 
   @Test
   public void testCoderException() {
-    expectedException.expect(RuntimeException.class);
-    expectedException.expectMessage("CrashDuringCoding");
-    expectedException.expectCause(Matchers.<CoderException>instanceOf(CoderException.class));
     DirectPipeline pipeline = DirectPipeline.createForTest();
 
     pipeline
@@ -91,7 +88,9 @@ public void processElement(ProcessContext context) {
         }))
         .setCoder(new CrashingCoder<String>());
 
-    pipeline.run();
+      expectedException.expect(RuntimeException.class);
+      expectedException.expectCause(isA(CoderException.class));
+      pipeline.run();
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index d11211da2772c..311f1b3c685e4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -19,6 +19,7 @@
 import static com.google.cloud.dataflow.sdk.util.SerializableUtils.serializeToByteArray;
 import static com.google.cloud.dataflow.sdk.util.StringUtils.byteArrayToJsonString;
 import static com.google.cloud.dataflow.sdk.util.StringUtils.jsonStringToByteArray;
+import static org.hamcrest.Matchers.isA;
 import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
 import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
 import static org.hamcrest.core.AnyOf.anyOf;
@@ -30,15 +31,19 @@
 
 import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.Pipeline.PipelineExecutionException;
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.ListCoder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.util.IllegalMutationException;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
@@ -321,129 +326,129 @@ public void processElement(ProcessContext c) throws Exception {
   @Test
   @Category(RunnableOnService.class)
   public void testParDo() {
-    Pipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
-    PCollection<String> output = p
+    PCollection<String> output = pipeline
         .apply(Create.of(inputs))
         .apply(ParDo.of(new TestDoFn()));
 
     DataflowAssert.that(output)
         .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs));
 
-    p.run();
+    pipeline.run();
   }
 
   @Test
   @Category(RunnableOnService.class)
   public void testParDo2() {
-    Pipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
-    PCollection<String> output = p
+    PCollection<String> output = pipeline
         .apply(Create.of(inputs))
         .apply(ParDo.of(new TestDoFnWithContext()));
 
     DataflowAssert.that(output)
         .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs));
 
-    p.run();
+    pipeline.run();
   }
 
   @Test
   @Category(RunnableOnService.class)
   public void testParDoEmpty() {
-    Pipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
     List<Integer> inputs = Arrays.asList();
 
-    PCollection<String> output = p
+    PCollection<String> output = pipeline
         .apply(Create.of(inputs).withCoder(VarIntCoder.of()))
         .apply("TestDoFn", ParDo.of(new TestDoFn()));
 
     DataflowAssert.that(output)
         .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs));
 
-    p.run();
+    pipeline.run();
   }
 
   @Test
   @Category(RunnableOnService.class)
   public void testParDoWithSideOutputs() {
-    Pipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
-    TupleTag<String> mainTag = new TupleTag<String>("main"){};
-    TupleTag<String> sideTag1 = new TupleTag<String>("side1"){};
-    TupleTag<String> sideTag2 = new TupleTag<String>("side2"){};
-    TupleTag<String> sideTag3 = new TupleTag<String>("side3"){};
-    TupleTag<String> sideTagUnwritten = new TupleTag<String>("sideUnwritten"){};
+    TupleTag<String> mainOutputTag = new TupleTag<String>("main"){};
+    TupleTag<String> sideOutputTag1 = new TupleTag<String>("side1"){};
+    TupleTag<String> sideOutputTag2 = new TupleTag<String>("side2"){};
+    TupleTag<String> sideOutputTag3 = new TupleTag<String>("side3"){};
+    TupleTag<String> sideOutputTagUnwritten = new TupleTag<String>("sideUnwritten"){};
 
-    PCollectionTuple outputs = p
+    PCollectionTuple outputs = pipeline
         .apply(Create.of(inputs))
         .apply(ParDo
                .of(new TestDoFn(
                    Arrays.<PCollectionView<Integer>>asList(),
-                   Arrays.asList(sideTag1, sideTag2, sideTag3)))
+                   Arrays.asList(sideOutputTag1, sideOutputTag2, sideOutputTag3)))
                .withOutputTags(
-                   mainTag,
-                   TupleTagList.of(sideTag3).and(sideTag1)
-                   .and(sideTagUnwritten).and(sideTag2)));
+                   mainOutputTag,
+                   TupleTagList.of(sideOutputTag3).and(sideOutputTag1)
+                   .and(sideOutputTagUnwritten).and(sideOutputTag2)));
 
-    DataflowAssert.that(outputs.get(mainTag))
+    DataflowAssert.that(outputs.get(mainOutputTag))
         .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs));
 
-    DataflowAssert.that(outputs.get(sideTag1))
+    DataflowAssert.that(outputs.get(sideOutputTag1))
         .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs)
-                   .fromSideOutput(sideTag1));
-    DataflowAssert.that(outputs.get(sideTag2))
+                   .fromSideOutput(sideOutputTag1));
+    DataflowAssert.that(outputs.get(sideOutputTag2))
         .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs)
-                   .fromSideOutput(sideTag2));
-    DataflowAssert.that(outputs.get(sideTag3))
+                   .fromSideOutput(sideOutputTag2));
+    DataflowAssert.that(outputs.get(sideOutputTag3))
         .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs)
-                   .fromSideOutput(sideTag3));
-    DataflowAssert.that(outputs.get(sideTagUnwritten)).containsInAnyOrder();
+                   .fromSideOutput(sideOutputTag3));
+    DataflowAssert.that(outputs.get(sideOutputTagUnwritten)).containsInAnyOrder();
 
-    p.run();
+    pipeline.run();
   }
 
   @Test
   @Category(RunnableOnService.class)
   public void testParDoWithOnlySideOutputs() {
-    Pipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
-    final TupleTag<Void> mainTag = new TupleTag<Void>("main"){};
-    final TupleTag<Integer> sideTag = new TupleTag<Integer>("side"){};
+    final TupleTag<Void> mainOutputTag = new TupleTag<Void>("main"){};
+    final TupleTag<Integer> sideOutputTag = new TupleTag<Integer>("side"){};
 
-    PCollectionTuple outputs = p
+    PCollectionTuple outputs = pipeline
         .apply(Create.of(inputs))
-        .apply(ParDo.withOutputTags(mainTag, TupleTagList.of(sideTag))
+        .apply(ParDo.withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag))
             .of(new DoFn<Integer, Void>(){
                 @Override
                 public void processElement(ProcessContext c) {
-                  c.sideOutput(sideTag, c.element());
+                  c.sideOutput(sideOutputTag, c.element());
                 }}));
 
-    DataflowAssert.that(outputs.get(mainTag)).containsInAnyOrder();
-    DataflowAssert.that(outputs.get(sideTag)).containsInAnyOrder(inputs);
+    DataflowAssert.that(outputs.get(mainOutputTag)).containsInAnyOrder();
+    DataflowAssert.that(outputs.get(sideOutputTag)).containsInAnyOrder(inputs);
 
-    p.run();
+    pipeline.run();
   }
 
   @Test
   public void testParDoWritingToUndeclaredSideOutput() {
-    Pipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
     TupleTag<String> sideTag = new TupleTag<String>("side"){};
 
-    PCollection<String> output = p
+    PCollection<String> output = pipeline
         .apply(Create.of(inputs))
         .apply(ParDo.of(new TestDoFn(
             Arrays.<PCollectionView<Integer>>asList(),
@@ -452,13 +457,13 @@ public void testParDoWritingToUndeclaredSideOutput() {
     DataflowAssert.that(output)
         .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs));
 
-    p.run();
+    pipeline.run();
   }
 
   @Test
   public void testParDoUndeclaredSideOutputLimit() {
-    Pipeline p = TestPipeline.create();
-    PCollection<Integer> input = p.apply(Create.of(Arrays.asList(3)));
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> input = pipeline.apply(Create.of(Arrays.asList(3)));
 
     // Success for a total of 1000 outputs.
     input
@@ -474,7 +479,7 @@ public void processElement(ProcessContext c) {
                 c.sideOutput(new TupleTag<String>(){}, "side");
               }
             }}));
-    p.run();
+    pipeline.run();
 
     // Failure for a total of 1001 outputs.
     input
@@ -488,27 +493,27 @@ public void processElement(ProcessContext c) {
 
     thrown.expect(RuntimeException.class);
     thrown.expectMessage("the number of side outputs has exceeded a limit");
-    p.run();
+    pipeline.run();
   }
 
   @Test
   @Category(RunnableOnService.class)
   public void testParDoWithSideInputs() {
-    Pipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
-    PCollectionView<Integer> sideInput1 = p
+    PCollectionView<Integer> sideInput1 = pipeline
         .apply("CreateSideInput1", Create.of(11))
         .apply("ViewSideInput1", View.<Integer>asSingleton());
-    PCollectionView<Integer> sideInputUnread = p
+    PCollectionView<Integer> sideInputUnread = pipeline
         .apply("CreateSideInputUnread", Create.of(-3333))
         .apply("ViewSideInputUnread", View.<Integer>asSingleton());
-    PCollectionView<Integer> sideInput2 =         p
+    PCollectionView<Integer> sideInput2 =         pipeline
         .apply("CreateSideInput2", Create.of(222))
         .apply("ViewSideInput2", View.<Integer>asSingleton());
 
-    PCollection<String> output = p
+    PCollection<String> output = pipeline
         .apply(Create.of(inputs))
         .apply(ParDo.withSideInputs(sideInput1, sideInputUnread, sideInput2)
             .of(new TestDoFn(
@@ -520,69 +525,69 @@ public void testParDoWithSideInputs() {
                    .forInput(inputs)
                    .andSideInputs(11, 222));
 
-    p.run();
+    pipeline.run();
   }
 
   @Test
   public void testParDoReadingFromUnknownSideInput() {
-    Pipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
-    PCollectionView<Integer> sideView = p
+    PCollectionView<Integer> sideView = pipeline
         .apply("Create3", Create.of(3))
         .apply(View.<Integer>asSingleton());
 
-    p.apply("CreateMain", Create.of(inputs))
+    pipeline.apply("CreateMain", Create.of(inputs))
         .apply(ParDo.of(new TestDoFn(
             Arrays.<PCollectionView<Integer>>asList(sideView),
             Arrays.<TupleTag<String>>asList())));
 
     thrown.expect(RuntimeException.class);
     thrown.expectMessage("calling sideInput() with unknown view");
-    p.run();
+    pipeline.run();
   }
 
   @Test
   public void testParDoWithErrorInStartBatch() {
-    Pipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
-    p.apply(Create.of(inputs))
+    pipeline.apply(Create.of(inputs))
         .apply(ParDo.of(new TestStartBatchErrorDoFn()));
 
     thrown.expect(RuntimeException.class);
     thrown.expectMessage("test error in initialize");
-    p.run();
+    pipeline.run();
   }
 
   @Test
   public void testParDoWithErrorInProcessElement() {
-    Pipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
-    p.apply(Create.of(inputs))
+    pipeline.apply(Create.of(inputs))
         .apply(ParDo.of(new TestProcessElementErrorDoFn()));
 
     thrown.expect(RuntimeException.class);
     thrown.expectMessage("test error in process");
-    p.run();
+    pipeline.run();
   }
 
   @Test
   public void testParDoWithErrorInFinishBatch() {
-    Pipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
-    p.apply(Create.of(inputs))
+    pipeline.apply(Create.of(inputs))
         .apply(ParDo.of(new TestFinishBatchErrorDoFn()));
 
     thrown.expect(RuntimeException.class);
     thrown.expectMessage("test error in finalize");
-    p.run();
+    pipeline.run();
   }
 
   @Test
@@ -640,11 +645,11 @@ public void testParDoGetName() {
   public void testParDoWithSideOutputsName() {
     Pipeline p = TestPipeline.create();
 
-    TupleTag<String> mainTag = new TupleTag<String>("main"){};
-    TupleTag<String> sideTag1 = new TupleTag<String>("side1"){};
-    TupleTag<String> sideTag2 = new TupleTag<String>("side2"){};
-    TupleTag<String> sideTag3 = new TupleTag<String>("side3"){};
-    TupleTag<String> sideTagUnwritten = new TupleTag<String>("sideUnwritten"){};
+    TupleTag<String> mainOutputTag = new TupleTag<String>("main"){};
+    TupleTag<String> sideOutputTag1 = new TupleTag<String>("side1"){};
+    TupleTag<String> sideOutputTag2 = new TupleTag<String>("side2"){};
+    TupleTag<String> sideOutputTag3 = new TupleTag<String>("side3"){};
+    TupleTag<String> sideOutputTagUnwritten = new TupleTag<String>("sideUnwritten"){};
 
     PCollectionTuple outputs = p
         .apply(Create.of(Arrays.asList(3, -42, 666))).setName("MyInput")
@@ -652,28 +657,28 @@ public void testParDoWithSideOutputsName() {
                .named("MyParDo")
                .of(new TestDoFn(
                    Arrays.<PCollectionView<Integer>>asList(),
-                   Arrays.asList(sideTag1, sideTag2, sideTag3)))
+                   Arrays.asList(sideOutputTag1, sideOutputTag2, sideOutputTag3)))
                .withOutputTags(
-                   mainTag,
-                   TupleTagList.of(sideTag3).and(sideTag1)
-                   .and(sideTagUnwritten).and(sideTag2)));
-
-    assertEquals("MyParDo.main", outputs.get(mainTag).getName());
-    assertEquals("MyParDo.side1", outputs.get(sideTag1).getName());
-    assertEquals("MyParDo.side2", outputs.get(sideTag2).getName());
-    assertEquals("MyParDo.side3", outputs.get(sideTag3).getName());
+                   mainOutputTag,
+                   TupleTagList.of(sideOutputTag3).and(sideOutputTag1)
+                   .and(sideOutputTagUnwritten).and(sideOutputTag2)));
+
+    assertEquals("MyParDo.main", outputs.get(mainOutputTag).getName());
+    assertEquals("MyParDo.side1", outputs.get(sideOutputTag1).getName());
+    assertEquals("MyParDo.side2", outputs.get(sideOutputTag2).getName());
+    assertEquals("MyParDo.side3", outputs.get(sideOutputTag3).getName());
     assertEquals("MyParDo.sideUnwritten",
-                 outputs.get(sideTagUnwritten).getName());
+                 outputs.get(sideOutputTagUnwritten).getName());
   }
 
   @Test
   @Category(RunnableOnService.class)
   public void testParDoInCustomTransform() {
-    Pipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
     List<Integer> inputs = Arrays.asList(3, -42, 666);
 
-    PCollection<String> output = p
+    PCollection<String> output = pipeline
         .apply(Create.of(inputs))
         .apply("CustomTransform", new PTransform<PCollection<Integer>, PCollection<String>>() {
             @Override
@@ -687,14 +692,14 @@ public PCollection<String> apply(PCollection<Integer> input) {
     DataflowAssert.that(output)
         .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs));
 
-    p.run();
+    pipeline.run();
   }
 
   @Test
   public void testMultiOutputChaining() {
-    Pipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
-    PCollectionTuple filters = p
+    PCollectionTuple filters = pipeline
         .apply(Create.of(Arrays.asList(3, 4, 5, 6)))
         .apply(new MultiFilter());
     PCollection<Integer> by2 = filters.get(MultiFilter.BY2);
@@ -708,7 +713,7 @@ public void testMultiOutputChaining() {
 
     DataflowAssert.that(by2then3).containsInAnyOrder(6);
     DataflowAssert.that(by3then2).containsInAnyOrder(6);
-    p.run();
+    pipeline.run();
   }
 
   @Test
@@ -896,12 +901,12 @@ public void testSideOutputUnknownCoder() throws Exception {
     PCollection<Integer> input = pipeline
         .apply(Create.of(Arrays.asList(1, 2, 3)));
 
-    final TupleTag<Integer> mainTag = new TupleTag<Integer>();
-    final TupleTag<TestDummy> sideTag = new TupleTag<TestDummy>();
-    input.apply(ParDo.of(new SideOutputDummyFn(sideTag))
-        .withOutputTags(mainTag, TupleTagList.of(sideTag)));
+    final TupleTag<Integer> mainOutputTag = new TupleTag<Integer>("main");
+    final TupleTag<TestDummy> sideOutputTag = new TupleTag<TestDummy>("unknownSide");
+    input.apply(ParDo.of(new SideOutputDummyFn(sideOutputTag))
+        .withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag)));
 
-    thrown.expect(IllegalStateException.class);
+    thrown.expect(PipelineExecutionException.class);
     thrown.expectMessage("Unable to infer a default Coder");
     pipeline.run();
   }
@@ -912,18 +917,19 @@ public void testSideOutputUnregisteredExplicitCoder() throws Exception {
     PCollection<Integer> input = pipeline
         .apply(Create.of(Arrays.asList(1, 2, 3)));
 
-    final TupleTag<Integer> mainTag = new TupleTag<Integer>();
-    final TupleTag<TestDummy> sideTag = new TupleTag<TestDummy>();
-    PCollectionTuple outputTuple = input.apply(ParDo.of(new SideOutputDummyFn(sideTag))
-        .withOutputTags(mainTag, TupleTagList.of(sideTag)));
+    final TupleTag<Integer> mainOutputTag = new TupleTag<Integer>("main");
+    final TupleTag<TestDummy> sideOutputTag = new TupleTag<TestDummy>("unregisteredSide");
+    PCollectionTuple outputTuple = input.apply(ParDo.of(new SideOutputDummyFn(sideOutputTag))
+        .withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag)));
 
-    outputTuple.get(sideTag).setCoder(new TestDummyCoder());
+    outputTuple.get(sideOutputTag).setCoder(new TestDummyCoder());
 
-    outputTuple.get(sideTag).apply(View.<TestDummy>asSingleton());
+    outputTuple.get(sideOutputTag).apply(View.<TestDummy>asSingleton());
 
-    assertEquals(new TestDummyCoder(), outputTuple.get(sideTag).getCoder());
-    outputTuple.get(sideTag).finishSpecifyingOutput(); // Check for crashes
-    assertEquals(new TestDummyCoder(), outputTuple.get(sideTag).getCoder()); // Check for corruption
+    assertEquals(new TestDummyCoder(), outputTuple.get(sideOutputTag).getCoder());
+    outputTuple.get(sideOutputTag).finishSpecifyingOutput(); // Check for crashes
+    assertEquals(new TestDummyCoder(),
+        outputTuple.get(sideOutputTag).getCoder()); // Check for corruption
     pipeline.run();
   }
 
@@ -933,12 +939,12 @@ public void testMainOutputUnregisteredExplicitCoder() {
     PCollection<Integer> input = pipeline
         .apply(Create.of(Arrays.asList(1, 2, 3)));
 
-    final TupleTag<TestDummy> mainTag = new TupleTag<TestDummy>();
-    final TupleTag<Integer> sideTag = new TupleTag<Integer>() {};
-    PCollectionTuple outputTuple = input.apply(ParDo.of(new MainOutputDummyFn(sideTag))
-        .withOutputTags(mainTag, TupleTagList.of(sideTag)));
+    final TupleTag<TestDummy> mainOutputTag = new TupleTag<TestDummy>("unregisteredMain");
+    final TupleTag<Integer> sideOutputTag = new TupleTag<Integer>("side") {};
+    PCollectionTuple outputTuple = input.apply(ParDo.of(new MainOutputDummyFn(sideOutputTag))
+        .withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag)));
 
-    outputTuple.get(mainTag).setCoder(new TestDummyCoder());
+    outputTuple.get(mainOutputTag).setCoder(new TestDummyCoder());
 
     pipeline.run();
   }
@@ -950,8 +956,8 @@ public void testMainOutputApplySideOutputNoCoder() {
     // side output.
 
     Pipeline pipeline = TestPipeline.create();
-    final TupleTag<TestDummy> mainOutputTag = new TupleTag<TestDummy>();
-    final TupleTag<TestDummy> sideOutputTag = new TupleTag<TestDummy>();
+    final TupleTag<TestDummy> mainOutputTag = new TupleTag<TestDummy>("main");
+    final TupleTag<TestDummy> sideOutputTag = new TupleTag<TestDummy>("side");
     PCollectionTuple tuple = pipeline
         .apply(Create.of(new TestDummy())
             .withCoder(TestDummyCoder.of()))
@@ -973,7 +979,7 @@ public void testMainOutputApplySideOutputNoCoder() {
     tuple.get(mainOutputTag)
         .setCoder(TestDummyCoder.of())
         .apply("Output1", ParDo.of(new DoFn<TestDummy, Integer>() {
-          public void processElement(ProcessContext context) {
+          @Override public void processElement(ProcessContext context) {
             context.output(1);
           }
         }));
@@ -985,10 +991,10 @@ public void processElement(ProcessContext context) {
 
   @Test
   public void testParDoOutputWithTimestamp() {
-    Pipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
     PCollection<Integer> input =
-        p.apply(Create.of(Arrays.asList(3, 42, 6)));
+        pipeline.apply(Create.of(Arrays.asList(3, 42, 6)));
 
     PCollection<String> output =
         input
@@ -1001,29 +1007,29 @@ public void testParDoOutputWithTimestamp() {
                    "processing: 42, timestamp: 42",
                    "processing: 6, timestamp: 6");
 
-    p.run();
+    pipeline.run();
   }
 
   @Test
   public void testParDoSideOutputWithTimestamp() {
-    Pipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
     PCollection<Integer> input =
-        p.apply(Create.of(Arrays.asList(3, 42, 6)));
+        pipeline.apply(Create.of(Arrays.asList(3, 42, 6)));
 
-    final TupleTag<Integer> mainTag = new TupleTag<Integer>(){};
-    final TupleTag<Integer> sideTag = new TupleTag<Integer>(){};
+    final TupleTag<Integer> mainOutputTag = new TupleTag<Integer>("main"){};
+    final TupleTag<Integer> sideOutputTag = new TupleTag<Integer>("side"){};
 
     PCollection<String> output =
         input
-        .apply(ParDo.withOutputTags(mainTag, TupleTagList.of(sideTag)).of(
+        .apply(ParDo.withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag)).of(
             new DoFn<Integer, Integer>() {
               @Override
               public void processElement(ProcessContext c) {
                 c.sideOutputWithTimestamp(
-                    sideTag, c.element(), new Instant(c.element().longValue()));
+                    sideOutputTag, c.element(), new Instant(c.element().longValue()));
               }
-            })).get(sideTag)
+            })).get(sideOutputTag)
         .apply(ParDo.of(new TestShiftTimestampDoFn(Duration.ZERO, Duration.ZERO)))
         .apply(ParDo.of(new TestFormatTimestampDoFn()));
 
@@ -1032,15 +1038,15 @@ public void processElement(ProcessContext c) {
                    "processing: 42, timestamp: 42",
                    "processing: 6, timestamp: 6");
 
-    p.run();
+    pipeline.run();
   }
 
   @Test
   public void testParDoShiftTimestamp() {
-    Pipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
     PCollection<Integer> input =
-        p.apply(Create.of(Arrays.asList(3, 42, 6)));
+        pipeline.apply(Create.of(Arrays.asList(3, 42, 6)));
 
     PCollection<String> output =
         input
@@ -1054,14 +1060,14 @@ public void testParDoShiftTimestamp() {
                    "processing: 42, timestamp: -958",
                    "processing: 6, timestamp: -994");
 
-    p.run();
+    pipeline.run();
   }
 
   @Test
   public void testParDoShiftTimestampInvalid() {
-    Pipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
-    p.apply(Create.of(Arrays.asList(3, 42, 6)))
+    pipeline.apply(Create.of(Arrays.asList(3, 42, 6)))
         .apply(ParDo.of(new TestOutputTimestampDoFn()))
         .apply(ParDo.of(new TestShiftTimestampDoFn(Duration.millis(1000), // allowed skew = 1 second
                                                    Duration.millis(-1001))))
@@ -1072,14 +1078,14 @@ public void testParDoShiftTimestampInvalid() {
     thrown.expectMessage(
         "Output timestamps must be no earlier than the timestamp of the current input");
     thrown.expectMessage("minus the allowed skew (1 second).");
-    p.run();
+    pipeline.run();
   }
 
   @Test
   public void testParDoShiftTimestampInvalidZeroAllowed() {
-    Pipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
-    p.apply(Create.of(Arrays.asList(3, 42, 6)))
+    pipeline.apply(Create.of(Arrays.asList(3, 42, 6)))
         .apply(ParDo.of(new TestOutputTimestampDoFn()))
         .apply(ParDo.of(new TestShiftTimestampDoFn(Duration.ZERO,
                                                    Duration.millis(-1001))))
@@ -1090,7 +1096,7 @@ public void testParDoShiftTimestampInvalidZeroAllowed() {
     thrown.expectMessage(
         "Output timestamps must be no earlier than the timestamp of the current input");
     thrown.expectMessage("minus the allowed skew (0 milliseconds).");
-    p.run();
+    pipeline.run();
   }
 
   private static class Checker implements SerializableFunction<Iterable<String>, Void> {
@@ -1131,9 +1137,9 @@ public Void apply(Iterable<String> input) {
   @Test
   @Category(RunnableOnService.class)
   public void testWindowingInStartAndFinishBundle() {
-    Pipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
-    PCollection<String> output = p
+    PCollection<String> output = pipeline
         .apply(Create.timestamped(TimestampedValue.of("elem", new Instant(1))))
         .apply(Window.<String>into(FixedWindows.of(Duration.millis(1))))
         .apply(ParDo.of(new DoFn<String, String>() {
@@ -1159,14 +1165,14 @@ public void finishBundle(Context c) {
 
     DataflowAssert.that(output).satisfies(new Checker());
 
-    p.run();
+    pipeline.run();
   }
 
   @Test
   public void testWindowingInStartBundleException() {
-    Pipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
-    p
+    pipeline
         .apply(Create.timestamped(TimestampedValue.of("elem", new Instant(1))))
         .apply(Window.<String>into(FixedWindows.of(Duration.millis(1))))
         .apply(ParDo.of(new DoFn<String, String>() {
@@ -1182,6 +1188,136 @@ public void processElement(ProcessContext c) {
                 }));
 
     thrown.expectMessage("WindowFn attempted to access input timestamp when none was available");
-    p.run();
+    pipeline.run();
+  }
+
+  /**
+   * Tests that a {@link DoFn} that mutates an output with a good equals() fails in the
+   * {@link DirectPipelineRunner}.
+   */
+  @Test
+  public void testMutatingOutputThenOutputDoFnError() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    pipeline
+        .apply(Create.of(42))
+        .apply(ParDo.of(new DoFn<Integer, List<Integer>>() {
+          private static final long serialVersionUID = 0L;
+          @Override public void processElement(ProcessContext c) {
+            List<Integer> outputList = Arrays.asList(1, 2, 3, 4);
+            c.output(outputList);
+            outputList.set(0, 37);
+            c.output(outputList);
+          }
+        }));
+
+    thrown.expect(PipelineExecutionException.class);
+    thrown.expectCause(isA(IllegalMutationException.class));
+    thrown.expectMessage("output");
+    thrown.expectMessage("must not be mutated");
+    pipeline.run();
+  }
+
+  /**
+   * Tests that a {@link DoFn} that mutates an output with a good equals() fails in the
+   * {@link DirectPipelineRunner}.
+   */
+  @Test
+  public void testMutatingOutputThenTerminateDoFnError() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    pipeline
+        .apply(Create.of(42))
+        .apply(ParDo.of(new DoFn<Integer, List<Integer>>() {
+          private static final long serialVersionUID = 0L;
+          @Override public void processElement(ProcessContext c) {
+            List<Integer> outputList = Arrays.asList(1, 2, 3, 4);
+            c.output(outputList);
+            outputList.set(0, 37);
+          }
+        }));
+
+    thrown.expect(IllegalMutationException.class);
+    thrown.expectMessage("output");
+    thrown.expectMessage("must not be mutated");
+    pipeline.run();
+  }
+
+  /**
+   * Tests that a {@link DoFn} that mutates an output with a bad equals() still fails
+   * in the {@link DirectPipelineRunner}.
+   */
+  @Test
+  public void testMutatingOutputCoderDoFnError() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    pipeline
+        .apply(Create.of(42))
+        .apply(ParDo.of(new DoFn<Integer, byte[]>() {
+          private static final long serialVersionUID = 0L;
+          @Override public void processElement(ProcessContext c) {
+            byte[] outputArray = new byte[]{0x1, 0x2, 0x3};
+            c.output(outputArray);
+            outputArray[0] = 0xa;
+            c.output(outputArray);
+          }
+        }));
+
+    thrown.expect(PipelineExecutionException.class);
+    thrown.expectCause(isA(IllegalMutationException.class));
+    thrown.expectMessage("output");
+    thrown.expectMessage("must not be mutated");
+    pipeline.run();
+  }
+
+  /**
+   * Tests that a {@link DoFn} that mutates its input with a good equals() fails in the
+   * {@link DirectPipelineRunner}.
+   */
+  @Test
+  public void testMutatingInputDoFnError() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    pipeline
+        .apply(Create.of(Arrays.asList(1, 2, 3), Arrays.asList(4, 5, 6))
+            .withCoder(ListCoder.of(VarIntCoder.of())))
+        .apply(ParDo.of(new DoFn<List<Integer>, Integer>() {
+          private static final long serialVersionUID = 0L;
+          @Override public void processElement(ProcessContext c) {
+            List<Integer> inputList = c.element();
+            inputList.set(0, 37);
+            c.output(12);
+          }
+        }));
+
+    thrown.expect(IllegalMutationException.class);
+    thrown.expectMessage("input");
+    thrown.expectMessage("must not be mutated");
+    pipeline.run();
+  }
+
+  /**
+   * Tests that a {@link DoFn} that mutates an input with a bad equals() still fails
+   * in the {@link DirectPipelineRunner}.
+   */
+  @Test
+  public void testMutatingInputCoderDoFnError() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    pipeline
+        .apply(Create.of(new byte[]{0x1, 0x2, 0x3}, new byte[]{0x4, 0x5, 0x6}))
+        .apply(ParDo.of(new DoFn<byte[], Integer>() {
+          private static final long serialVersionUID = 0L;
+          @Override public void processElement(ProcessContext c) {
+            byte[] inputArray = c.element();
+            inputArray[0] = 0xa;
+            c.output(13);
+          }
+        }));
+
+    thrown.expect(IllegalMutationException.class);
+    thrown.expectMessage("input");
+    thrown.expectMessage("must not be mutated");
+    pipeline.run();
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index 55ef7ca9dc98e..1b14467510c34 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -16,10 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
-import static org.hamcrest.CoreMatchers.isA;
+import static org.hamcrest.Matchers.isA;
 import static org.junit.Assert.assertEquals;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.Pipeline.PipelineExecutionException;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
@@ -105,7 +106,7 @@ public void processElement(ProcessContext c) {
               }
             }));
 
-    thrown.expect(RuntimeException.class);
+    thrown.expect(PipelineExecutionException.class);
     thrown.expectCause(isA(NoSuchElementException.class));
     thrown.expectMessage("Empty");
     thrown.expectMessage("PCollection");
@@ -131,7 +132,7 @@ public void processElement(ProcessContext c) {
               }
             }));
 
-    thrown.expect(RuntimeException.class);
+    thrown.expect(PipelineExecutionException.class);
     thrown.expectCause(isA(IllegalArgumentException.class));
     thrown.expectMessage("PCollection");
     thrown.expectMessage("more than one");
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MutationDetectorsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MutationDetectorsTest.java
new file mode 100644
index 0000000000000..41816c92b3c07
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MutationDetectorsTest.java
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.ListCoder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.common.collect.FluentIterable;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Tests for {@link MutationDetectors}.
+ */
+@RunWith(JUnit4.class)
+public class MutationDetectorsTest {
+
+  @Rule public ExpectedException thrown = ExpectedException.none();
+
+  /**
+   * Tests that {@link MutationDetectors#forValueWithCoder} detects a mutation to a list.
+   */
+  @Test
+  public void testMutatingList() throws Exception {
+    List<Integer> value = Arrays.asList(1, 2, 3, 4);
+    MutationDetector detector =
+        MutationDetectors.forValueWithCoder(value, ListCoder.of(VarIntCoder.of()));
+    value.set(0, 37);
+
+    thrown.expect(IllegalMutationException.class);
+    detector.verifyUnmodified();
+  }
+
+  /**
+   * Tests that {@link MutationDetectors#forValueWithCoder} does not false positive on a
+   * {@link LinkedList} that will clone as an {@link ArrayList}.
+   */
+  @Test
+  public void testUnmodifiedLinkedList() throws Exception {
+    List<Integer> value = Lists.newLinkedList(Arrays.asList(1, 2, 3, 4));
+    MutationDetector detector =
+        MutationDetectors.forValueWithCoder(value, ListCoder.of(VarIntCoder.of()));
+    detector.verifyUnmodified();
+  }
+
+  /**
+   * Tests that {@link MutationDetectors#forValueWithCoder} does not false positive on a
+   * {@link LinkedList} coded as an {@link Iterable}.
+   */
+  @Test
+  public void testImmutableList() throws Exception {
+    List<Integer> value = Lists.newLinkedList(Arrays.asList(1, 2, 3, 4));
+    MutationDetector detector =
+        MutationDetectors.forValueWithCoder(value, IterableCoder.of(VarIntCoder.of()));
+    detector.verifyUnmodified();
+  }
+
+  /**
+   * Tests that {@link MutationDetectors#forValueWithCoder} does not false positive on a
+   * {@link Set} coded as an {@link Iterable}.
+   */
+  @Test
+  public void testImmutableSet() throws Exception {
+    Set<Integer> value = Sets.newHashSet(Arrays.asList(1, 2, 3, 4));
+    MutationDetector detector =
+        MutationDetectors.forValueWithCoder(value, IterableCoder.of(VarIntCoder.of()));
+    detector.verifyUnmodified();
+  }
+
+  /**
+   * Tests that {@link MutationDetectors#forValueWithCoder} does not false positive on an
+   * {@link Iterable} that is not known to be bounded; after coder-based cloning the bound
+   * will be known and it will be a {@link List} so it will encode more compactly the second
+   * time around.
+   */
+  @Test
+  public void testImmutableIterable() throws Exception {
+    Iterable<Integer> value = FluentIterable.from(Arrays.asList(1, 2, 3, 4)).cycle().limit(50);
+    MutationDetector detector =
+        MutationDetectors.forValueWithCoder(value, IterableCoder.of(VarIntCoder.of()));
+    detector.verifyUnmodified();
+  }
+
+  /**
+   * Tests that {@link MutationDetectors#forValueWithCoder} detects a mutation to a byte array.
+   */
+  @Test
+  public void testMutatingArray() throws Exception {
+    byte[] value = new byte[]{0x1, 0x2, 0x3, 0x4};
+    MutationDetector detector =
+        MutationDetectors.forValueWithCoder(value, ByteArrayCoder.of());
+    value[0] = 0xa;
+    thrown.expect(IllegalMutationException.class);
+    detector.verifyUnmodified();
+  }
+
+  /**
+   * Tests that {@link MutationDetectors#forValueWithCoder} does not false positive on an
+   * array, even though it will decode is another array which Java will not say is {@code equals}.
+   */
+  @Test
+  public void testUnmodifiedArray() throws Exception {
+    byte[] value = new byte[]{0x1, 0x2, 0x3, 0x4};
+    MutationDetector detector =
+        MutationDetectors.forValueWithCoder(value, ByteArrayCoder.of());
+    detector.verifyUnmodified();
+  }
+
+  /**
+   * Tests that {@link MutationDetectors#forValueWithCoder} does not false positive on an
+   * list of arrays, even when some array is set to a deeply equal array that is not {@code equals}.
+   */
+  @Test
+  public void testEquivalentListOfArrays() throws Exception {
+    List<byte[]> value = Arrays.asList(new byte[]{0x1}, new byte[]{0x2, 0x3}, new byte[]{0x4});
+    MutationDetector detector =
+        MutationDetectors.forValueWithCoder(value, ListCoder.of(ByteArrayCoder.of()));
+    value.set(0, new byte[]{0x1});
+    detector.verifyUnmodified();
+  }
+}
+

From 332f9a3a79e0445667473bddf1ed9baa64d40c71 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Wed, 5 Aug 2015 11:12:16 -0700
Subject: [PATCH 0882/1541] Process event time timers before processing time
 ones

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99937936
---
 .../worker/WindowingWindmillReader.java       |  65 ++++++---
 .../dataflow/sdk/util/TimerOrElement.java     |  22 ++++
 .../worker/WindowingWindmillReaderTest.java   | 124 ++++++++++++++++++
 .../sdk/util/TriggerExecutorTest.java         |  50 +++++++
 4 files changed, 240 insertions(+), 21 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReaderTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
index eafd4c478f2ee..3314cf942d7e6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
@@ -42,6 +42,7 @@
 import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.List;
 import java.util.concurrent.TimeUnit;
 
 /**
@@ -80,7 +81,29 @@ class WindowingWindmillReaderIterator
   extends AbstractReaderIterator<WindowedValue<TimerOrElement<T>>> {
     private int bundleIndex = 0;
     private int messageIndex = 0;
-    private int timerIndex = 0;
+    private int processingTimeTimerIndex = 0;
+    private int eventTimeTimerIndex = 0;
+    Object key = null;
+    private List<WindowedValue<TimerOrElement<T>>> eventTimeTimers;
+    private List<WindowedValue<TimerOrElement<T>>> processingTimeTimers;
+
+    private WindowingWindmillReaderIterator() throws IOException {
+      if (valueCoder instanceof KvCoder) {
+        key = ((KvCoder) valueCoder).getKeyCoder().decode(
+            context.getSerializedKey().newInput(), Coder.Context.OUTER);
+      }
+
+      eventTimeTimers = new ArrayList<>();
+      processingTimeTimers = new ArrayList<>();
+      for (Windmill.Timer rawTimer : context.getWork().getTimers().getTimersList()) {
+        WindowedValue<TimerOrElement<T>> timer = createTimer(rawTimer);
+        if (timer.getValue().getTimer().getDomain() == TimeDomain.EVENT_TIME) {
+          eventTimeTimers.add(timer);
+        } else {
+          processingTimeTimers.add(timer);
+        }
+      }
+    }
 
     private boolean hasMoreMessages() {
       Windmill.WorkItem work = context.getWork();
@@ -88,14 +111,17 @@ private boolean hasMoreMessages() {
           messageIndex < work.getMessageBundles(bundleIndex).getMessagesCount();
     }
 
-    private boolean hasMoreTimers() {
-      Windmill.WorkItem work = context.getWork();
-      return work.hasTimers() && timerIndex < work.getTimers().getTimersCount();
+    private boolean hasMoreProcessingTimeTimers() {
+      return processingTimeTimerIndex < processingTimeTimers.size();
+    }
+
+    private boolean hasMoreEventTimeTimers() {
+      return eventTimeTimerIndex < eventTimeTimers.size();
     }
 
     @Override
     public boolean hasNext() throws IOException {
-      return hasMoreMessages() || hasMoreTimers();
+      return hasMoreMessages() || hasMoreProcessingTimeTimers() || hasMoreEventTimeTimers();
     }
 
     private TimeDomain getTimeDomain(Windmill.Timer.Type type) {
@@ -112,7 +138,7 @@ private TimeDomain getTimeDomain(Windmill.Timer.Type type) {
     }
 
     private <W extends BoundedWindow> WindowedValue<TimerOrElement<T>> createTimer(
-        Object key, Windmill.Timer timer) {
+        Windmill.Timer timer) {
       String tag = timer.getTag().toStringUtf8();
       String namespaceString = tag.substring(0, tag.indexOf('+'));
       StateNamespace namespace = StateNamespaces.fromString(namespaceString, windowCoder);
@@ -129,15 +155,15 @@ private <W extends BoundedWindow> WindowedValue<TimerOrElement<T>> createTimer(
 
     @Override
     public WindowedValue<TimerOrElement<T>> next() throws IOException {
-      if (hasMoreTimers()) {
+      if (hasMoreEventTimeTimers()) {
         if (valueCoder instanceof KvCoder) {
-          Windmill.Timer timer = context.getWork().getTimers().getTimers(timerIndex++);
-
-          KvCoder kvCoder = (KvCoder) valueCoder;
-          Object key = kvCoder.getKeyCoder().decode(
-              context.getSerializedKey().newInput(), Coder.Context.OUTER);
-
-          return createTimer(key, timer);
+          return eventTimeTimers.get(eventTimeTimerIndex++);
+        } else {
+          throw new RuntimeException("Timer set on non-keyed DoFn");
+        }
+      } else if (hasMoreProcessingTimeTimers()) {
+        if (valueCoder instanceof KvCoder) {
+          return processingTimeTimers.get(processingTimeTimerIndex++);
         } else {
           throw new RuntimeException("Timer set on non-keyed DoFn");
         }
@@ -161,14 +187,11 @@ public WindowedValue<TimerOrElement<T>> next() throws IOException {
         PaneInfo pane = WindmillSink.decodeMetadataPane(message.getMetadata());
         if (valueCoder instanceof KvCoder) {
           KvCoder kvCoder = (KvCoder) valueCoder;
-          InputStream key = context.getSerializedKey().newInput();
-          notifyElementRead(key.available() + data.available() + metadata.available());
+          notifyElementRead(
+              context.getSerializedKey().size() + data.available() + metadata.available());
           return WindowedValue.of(
-              TimerOrElement.element((T) KV.of(decode(kvCoder.getKeyCoder(), key),
-                                               decode(kvCoder.getValueCoder(), data))),
-              timestampMillis,
-              windows,
-              pane);
+              TimerOrElement.element((T) KV.of(key, decode(kvCoder.getValueCoder(), data))),
+              timestampMillis, windows, pane);
         } else {
           notifyElementRead(data.available() + metadata.available());
           return WindowedValue.of(TimerOrElement.element(decode(valueCoder, data)),
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
index 9ec6e150386c6..8128a22f07e98 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
@@ -28,6 +28,7 @@
 import java.io.OutputStream;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Objects;
 
 /**
  * Class representing either a timer, or arbitrary element.
@@ -92,6 +93,27 @@ public ElemT element() {
     return element;
   }
 
+  @Override
+  public boolean equals(Object other) {
+    if (!(other instanceof TimerOrElement)) {
+      return false;
+    }
+    TimerOrElement that = (TimerOrElement) other;
+    if (this.isTimer() && that.isTimer()) {
+      return Objects.equals(this.getTimer(), that.getTimer())
+          && Objects.equals(this.key(), that.key());
+    } else if (!this.isTimer() && !that.isTimer()) {
+      return Objects.equals(this.element(), that.element());
+    } else {
+      return false;
+    }
+  }
+
+  @Override
+  public int hashCode() {
+    return isTimer() ? Objects.hash(key(), getTimer()) : Objects.hash(element());
+  }
+
   /**
    * Coder that forwards {@code ByteSizeObserver} calls to an underlying element coder.
    * {@code TimerOrElement} objects never need to be encoded, so this class does not
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReaderTest.java
new file mode 100644
index 0000000000000..5497b5fdad353
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReaderTest.java
@@ -0,0 +1,124 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
+import static com.google.cloud.dataflow.sdk.util.common.worker.Reader.ReaderIterator;
+import static org.hamcrest.Matchers.contains;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimerOrElement;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.protobuf.ByteString;
+
+import org.joda.time.Instant;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/** Unit tests for {@link WindowingWindmillReader}. */
+@RunWith(JUnit4.class)
+public class WindowingWindmillReaderTest {
+  private static final String STATE_FAMILY = "state";
+  private static final String KEY = "key";
+  private static final ByteString SERIALIZED_KEY = ByteString.copyFromUtf8("key");
+  private static final StateNamespace STATE_NAMESPACE =
+      StateNamespaces.window(GlobalWindow.Coder.INSTANCE, GlobalWindow.INSTANCE);
+
+  @Mock
+  StreamingModeExecutionContext mockContext;
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+  }
+
+
+  // Make sure that event time timers are processed before processing time ones.
+  @Test
+  public void testTimerOrdering() throws Exception {
+    when(mockContext.getWork()).thenReturn(
+        Windmill.WorkItem.newBuilder()
+        .setKey(SERIALIZED_KEY)
+        .setWorkToken(17)
+        .setTimers(Windmill.TimerBundle.newBuilder()
+            .addTimers(makeSerializedTimer("Processing-1", Windmill.Timer.Type.REALTIME))
+            .addTimers(makeSerializedTimer("Event-1", Windmill.Timer.Type.WATERMARK))
+            .addTimers(makeSerializedTimer("Processing-2", Windmill.Timer.Type.REALTIME))
+            .addTimers(makeSerializedTimer("Event-2", Windmill.Timer.Type.WATERMARK))
+            .build())
+        .build());
+    when(mockContext.getSerializedKey()).thenReturn(SERIALIZED_KEY);
+
+    Coder<WindowedValue<TimerOrElement<KV<String, String>>>> coder =
+        WindowedValue.getFullCoder(
+            TimerOrElement.TimerOrElementCoder.of(
+                KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())),
+            GlobalWindow.Coder.INSTANCE);
+
+    ReaderIterator<WindowedValue<TimerOrElement<KV<String, String>>>> iterator =
+        WindowingWindmillReader.<KV<String, String>>create(null, null, coder, mockContext)
+            .iterator();
+    List<WindowedValue<TimerOrElement<KV<String, String>>>> result = new ArrayList<>();
+    while (iterator.hasNext()) {
+      result.add(iterator.next());
+    }
+
+    assertThat(result, contains(
+        makeTimer(TimeDomain.EVENT_TIME),
+        makeTimer(TimeDomain.EVENT_TIME),
+        makeTimer(TimeDomain.PROCESSING_TIME),
+        makeTimer(TimeDomain.PROCESSING_TIME)));
+  }
+
+  private static Windmill.Timer makeSerializedTimer(String name, Windmill.Timer.Type type) {
+    return Windmill.Timer.newBuilder()
+        .setTag(ByteString.copyFromUtf8(STATE_NAMESPACE.stringKey() + "+" + name))
+        .setTimestamp(0)
+        .setType(type)
+        .setStateFamily(STATE_FAMILY)
+        .build();
+  }
+
+  private static WindowedValue<TimerOrElement<KV<String, String>>> makeTimer(TimeDomain domain) {
+    return WindowedValue.<TimerOrElement<KV<String, String>>>of(
+        TimerOrElement.<KV<String, String>>timer(
+            KEY, TimerData.of(STATE_NAMESPACE, new Instant(0), domain)),
+        new Instant(0), Arrays.<BoundedWindow>asList(), PaneInfo.NO_FIRING);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index 7b19d263352fe..f9bca4ed58343 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -24,16 +24,22 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterFirst;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterProcessingTime;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
@@ -51,6 +57,8 @@
 import org.mockito.Mock;
 import org.mockito.Mockito;
 import org.mockito.MockitoAnnotations;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
 
 import java.util.List;
 
@@ -414,4 +422,46 @@ public void testIdempotentUninterestingPanes() throws Exception {
     assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
     assertEquals(0, tester.getElementsDroppedDueToLateness());
   }
+
+  private class ResultCaptor<T> implements Answer<T> {
+    private T result = null;
+    public T get() {
+      return result;
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    public T answer(InvocationOnMock invocationOnMock) throws Throwable {
+      result = (T) invocationOnMock.callRealMethod();
+      return result;
+    }
+  }
+
+  @Test
+  public void testMultipleTimerTypes() throws Exception {
+    Trigger<IntervalWindow> trigger = spy(Repeatedly.forever(
+        AfterFirst.of(AfterProcessingTime.<IntervalWindow>pastFirstElementInPane().plusDelayOf(
+                          Duration.millis(10)),
+            AfterWatermark.<IntervalWindow>pastEndOfWindow())));
+
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester =
+        TriggerTester.nonCombining(
+            FixedWindows.of(Duration.millis(10)),
+            trigger,
+            AccumulationMode.DISCARDING_FIRED_PANES,
+            Duration.standardDays(1));
+
+    tester.injectElement(1, new Instant(1));
+
+    ResultCaptor<TriggerResult> result = new ResultCaptor<>();
+    doAnswer(result)
+        .when(trigger)
+        .onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any());
+    tester.advanceWatermark(new Instant(1000));
+    assertEquals(TriggerResult.FIRE, result.get());
+
+    tester.advanceProcessingTime(Instant.now().plus(Duration.millis(10)));
+    // Verify that the only onTimer call was the one from advancing the watermark.
+    verify(trigger).onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any());
+  }
 }

From 1c79c457124224798fb20de4c2fee232982e309b Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 5 Aug 2015 11:23:29 -0700
Subject: [PATCH 0883/1541] Fix some warnings

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99939221
---
 .../google/cloud/dataflow/sdk/io/AvroIO.java  | 36 +++++++-----
 .../sdk/runners/DirectPipelineRunner.java     |  2 +-
 .../worker/ByteArrayShufflePosition.java      |  8 +--
 .../sdk/runners/worker/DataflowWorker.java    |  2 +-
 .../worker/MapTaskExecutorFactory.java        | 57 ++++++++++---------
 .../runners/worker/AvroSinkFactoryTest.java   |  2 +-
 6 files changed, 60 insertions(+), 47 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index 336417fb2d5cf..80e3e2e058bad 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -322,14 +322,18 @@ public boolean needsValidation() {
       }
 
       static {
+        @SuppressWarnings("rawtypes")
+        DirectPipelineRunner.TransformEvaluator<Bound> transformEvaluator =
+            new DirectPipelineRunner.TransformEvaluator<Bound>() {
+          @Override
+          @SuppressWarnings("unchecked")
+          public void evaluate(
+              Bound transform, DirectPipelineRunner.EvaluationContext context) {
+            evaluateReadHelper(transform, context);
+          }
+        };
         DirectPipelineRunner.registerDefaultTransformEvaluator(
-            Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
-              @Override
-              public void evaluate(
-                  Bound transform, DirectPipelineRunner.EvaluationContext context) {
-                evaluateReadHelper(transform, context);
-              }
-            });
+            Bound.class, transformEvaluator);
       }
     }
   }
@@ -670,14 +674,18 @@ public boolean needsValidation() {
       }
 
       static {
+        @SuppressWarnings("rawtypes")
+        DirectPipelineRunner.TransformEvaluator<Bound> transformEvaluator =
+            new DirectPipelineRunner.TransformEvaluator<Bound>() {
+          @Override
+          @SuppressWarnings("unchecked")
+          public void evaluate(
+              Bound transform, DirectPipelineRunner.EvaluationContext context) {
+            evaluateWriteHelper(transform, context);
+          }
+        };
         DirectPipelineRunner.registerDefaultTransformEvaluator(
-            Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
-              @Override
-              public void evaluate(
-                  Bound transform, DirectPipelineRunner.EvaluationContext context) {
-                evaluateWriteHelper(transform, context);
-              }
-            });
+            Bound.class, transformEvaluator);
       }
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index d50a3571bbadf..a385dca35ce69 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -121,7 +121,7 @@ public class DirectPipelineRunner
   public static <TransformT extends PTransform<?, ?>>
   void registerDefaultTransformEvaluator(
       Class<TransformT> transformClass,
-      TransformEvaluator<TransformT> transformEvaluator) {
+      TransformEvaluator<? super TransformT> transformEvaluator) {
     if (defaultTransformEvaluators.put(transformClass, transformEvaluator)
         != null) {
       throw new IllegalArgumentException(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
index 62bff895bed81..ea8462a5fd7e7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
@@ -32,7 +32,8 @@
  * encoded in a way such that lexicographic ordering of the bytes is consistent with the inherent
  * ordering of {@link GroupingShuffleReader} positions.
  */
-public class ByteArrayShufflePosition implements Comparable, ShufflePosition {
+public class ByteArrayShufflePosition
+    implements Comparable<ByteArrayShufflePosition>, ShufflePosition {
   private final byte[] position;
 
   public ByteArrayShufflePosition(byte[] position) {
@@ -91,11 +92,10 @@ public String toString() {
   }
 
   @Override
-  public int compareTo(Object o) {
+  public int compareTo(ByteArrayShufflePosition o) {
     if (this == o) {
       return 0;
     }
-    return UnsignedBytes.lexicographicalComparator().compare(
-        position, ((ByteArrayShufflePosition) o).position);
+    return UnsignedBytes.lexicographicalComparator().compare(position, o.position);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 9303e3284ca02..0b896e263a2b4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -329,7 +329,7 @@ static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
     } else if (dynamicSplitResult instanceof BasicSerializableSourceFormat.BoundedSourceSplit) {
       status.setDynamicSourceSplit(
           BasicSerializableSourceFormat.toSourceSplit(
-              (BasicSerializableSourceFormat.BoundedSourceSplit) dynamicSplitResult, options));
+              (BasicSerializableSourceFormat.BoundedSourceSplit<?>) dynamicSplitResult, options));
     } else if (dynamicSplitResult != null) {
       throw new IllegalArgumentException(
           "Unexpected type of dynamic split result: " + dynamicSplitResult);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index 3bd43422307f3..0ba48dd4d2c1d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -129,7 +129,7 @@ static ReadOperation createReadOperation(
       PipelineOptions options,
       ParallelInstruction instruction,
       DataflowExecutionContext executionContext,
-      List<Operation> priorOperations,
+      @SuppressWarnings("unused") List<Operation> priorOperations,
       String counterPrefix,
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler)
@@ -151,8 +151,8 @@ static WriteOperation createWriteOperation(PipelineOptions options,
       CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) throws Exception {
     WriteInstruction write = instruction.getWrite();
 
-    @SuppressWarnings("unchecked")
-    Sink sink = SinkFactory.create(options, write.getSink(), executionContext, addCounterMutator);
+    Sink<?> sink =
+        SinkFactory.create(options, write.getSink(), executionContext, addCounterMutator);
 
     OutputReceiver[] receivers =
         createOutputReceivers(instruction, counterPrefix, addCounterMutator, stateSampler, 0);
@@ -201,8 +201,10 @@ static ParDoOperation createParDoOperation(
     return operation;
   }
 
-  static PartialGroupByKeyOperation createPartialGroupByKeyOperation(PipelineOptions options,
-      ParallelInstruction instruction, ExecutionContext executionContext,
+  static PartialGroupByKeyOperation createPartialGroupByKeyOperation(
+      @SuppressWarnings("unused") PipelineOptions options,
+      ParallelInstruction instruction,
+      @SuppressWarnings("unused") ExecutionContext executionContext,
       List<Operation> priorOperations, String counterPrefix,
       CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) throws Exception {
     PartialGroupByKeyInstruction pgbk = instruction.getPartialGroupByKey();
@@ -224,13 +226,13 @@ static PartialGroupByKeyOperation createPartialGroupByKeyOperation(PipelineOptio
     OutputReceiver[] receivers =
         createOutputReceivers(instruction, counterPrefix, addCounterMutator, stateSampler, 1);
 
-    PartialGroupByKeyOperation.Combiner valueCombiner = createValueCombiner(pgbk);
+    PartialGroupByKeyOperation.Combiner<?, ?, ?, ?> valueCombiner = createValueCombiner(pgbk);
 
     PartialGroupByKeyOperation operation = new PartialGroupByKeyOperation(
         instruction.getSystemName(),
-        new WindowingCoderGroupingKeyCreator(keyCoder),
-        new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)),
-        new CoderSizeEstimator(valueCoder), 0.001 /*sizeEstimatorSampleRate*/, valueCombiner,
+        new WindowingCoderGroupingKeyCreator<>(keyCoder),
+        new CoderSizeEstimator<>(WindowedValue.getValueOnlyCoder(keyCoder)),
+        new CoderSizeEstimator<>(valueCoder), 0.001 /*sizeEstimatorSampleRate*/, valueCombiner,
         PairInfo.create(), receivers, counterPrefix, addCounterMutator, stateSampler);
 
     attachInput(operation, pgbk.getInput(), priorOperations);
@@ -314,48 +316,49 @@ public Object makeOutputPair(Object key, Object values) {
    * Implements PGBKOp.GroupingKeyCreator via Coder.
    */
   // TODO: Actually support window merging in the combiner table.
-  public static class WindowingCoderGroupingKeyCreator
-      implements GroupingKeyCreator {
+  public static class WindowingCoderGroupingKeyCreator<K>
+      implements GroupingKeyCreator<WindowedValue<K>> {
 
     private static final Instant ignored = BoundedWindow.TIMESTAMP_MIN_VALUE;
 
-    private final Coder coder;
+    private final Coder<K> coder;
 
-    public WindowingCoderGroupingKeyCreator(Coder coder) {
+    public WindowingCoderGroupingKeyCreator(Coder<K> coder) {
       this.coder = coder;
     }
 
     @Override
-    public Object createGroupingKey(Object key) throws Exception {
-      WindowedValue<?> windowedKey = (WindowedValue<?>) key;
+    public Object createGroupingKey(WindowedValue<K> key) throws Exception {
       // Ignore timestamp for grouping purposes.
       // The PGBK output will inherit the timestamp of one of its inputs.
       return WindowedValue.of(
-          coder.structuralValue(windowedKey.getValue()),
+          coder.structuralValue(key.getValue()),
           ignored,
-          windowedKey.getWindows(),
-          windowedKey.getPane());
+          key.getWindows(),
+          key.getPane());
     }
   }
 
   /**
    * Implements PGBKOp.SizeEstimator via Coder.
    */
-  public static class CoderSizeEstimator implements PartialGroupByKeyOperation.SizeEstimator {
-    final Coder coder;
+  public static class CoderSizeEstimator<T>implements PartialGroupByKeyOperation.SizeEstimator<T> {
+    final Coder<T> coder;
 
-    public CoderSizeEstimator(Coder coder) {
+    public CoderSizeEstimator(Coder<T> coder) {
       this.coder = coder;
     }
 
     @Override
-    public long estimateSize(Object value) throws Exception {
+    public long estimateSize(T value) throws Exception {
       return CoderUtils.encodeToByteArray(coder, value).length;
     }
   }
 
-  static FlattenOperation createFlattenOperation(PipelineOptions options,
-      ParallelInstruction instruction, ExecutionContext executionContext,
+  static FlattenOperation createFlattenOperation(
+      @SuppressWarnings("unused") PipelineOptions options,
+      ParallelInstruction instruction,
+      @SuppressWarnings("unused") ExecutionContext executionContext,
       List<Operation> priorOperations, String counterPrefix,
       CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) throws Exception {
     FlattenInstruction flatten = instruction.getFlatten();
@@ -378,8 +381,10 @@ static FlattenOperation createFlattenOperation(PipelineOptions options,
    * ParallelInstruction definition.
    */
   static OutputReceiver[] createOutputReceivers(ParallelInstruction instruction,
-      String counterPrefix, CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler, int expectedNumOutputs) throws Exception {
+      @SuppressWarnings("unused") String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator,
+      @SuppressWarnings("unused") StateSampler stateSampler,
+      int expectedNumOutputs) throws Exception {
     int numOutputs = 0;
     if (instruction.getOutputs() != null) {
       numOutputs = instruction.getOutputs().size();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
index 8c47d5655b306..299dbc12b9e35 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
@@ -65,7 +65,7 @@ public void testCreateAvroByteSink() throws Exception {
         pathToAvroFile, coder.asCloudObject());
 
     Assert.assertThat(sink, new IsInstanceOf(AvroByteSink.class));
-    AvroByteSink avroSink = (AvroByteSink) sink;
+    AvroByteSink<?> avroSink = (AvroByteSink<?>) sink;
     Assert.assertEquals(pathToAvroFile, avroSink.avroSink.filenamePrefix);
     Assert.assertEquals(coder, avroSink.coder);
   }

From 988f6bd999a4a196c60bc90e9cd5ebb6092d0cc2 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 5 Aug 2015 11:26:01 -0700
Subject: [PATCH 0884/1541] Minor cleanup of ShuffleSinkFactoryTest

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99939561
---
 .../sdk/runners/worker/ShuffleSinkFactoryTest.java        | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java
index 98c2ce59c2155..9ee44696d6e85 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java
@@ -94,7 +94,7 @@ void runTestCreatePartitioningShuffleSink(byte[] shuffleWriterConfig,
                                             Coder<?> keyCoder,
                                             Coder<?> valueCoder)
       throws Exception {
-    FullWindowedValueCoder<?> coder = (FullWindowedValueCoder<?>) WindowedValue.getFullCoder(
+    FullWindowedValueCoder<?> coder = WindowedValue.getFullCoder(
         KvCoder.of(keyCoder, valueCoder), IntervalWindow.getCoder());
     ShuffleSink shuffleSink = runTestCreateShuffleSinkHelper(
         shuffleWriterConfig, "partition_keys", coder.asCloudObject(), coder);
@@ -116,7 +116,7 @@ void runTestCreateGroupingShuffleSink(byte[] shuffleWriterConfig,
                                         Coder<?> keyCoder,
                                         Coder<?> valueCoder)
       throws Exception {
-    FullWindowedValueCoder<?> coder = (FullWindowedValueCoder<?>) WindowedValue.getFullCoder(
+    FullWindowedValueCoder<?> coder = WindowedValue.getFullCoder(
         KvCoder.of(keyCoder, valueCoder), IntervalWindow.getCoder());
     ShuffleSink shuffleSink = runTestCreateShuffleSinkHelper(
         shuffleWriterConfig, "group_keys", coder.asCloudObject(), coder);
@@ -137,7 +137,7 @@ void runTestCreateGroupingSortingShuffleSink(byte[] shuffleWriterConfig,
                                                Coder<?> sortKeyCoder,
                                                Coder<?> sortValueCoder)
       throws Exception {
-    FullWindowedValueCoder<?> coder = (FullWindowedValueCoder<?>) WindowedValue.getFullCoder(
+    FullWindowedValueCoder<?> coder = WindowedValue.getFullCoder(
         KvCoder.of(keyCoder, KvCoder.of(sortKeyCoder, sortValueCoder)),
         IntervalWindow.getCoder());
     ShuffleSink shuffleSink = runTestCreateShuffleSinkHelper(
@@ -157,7 +157,7 @@ void runTestCreateGroupingSortingShuffleSink(byte[] shuffleWriterConfig,
 
   @Test
   public void testCreateUngroupingShuffleSink() throws Exception {
-    FullWindowedValueCoder<?> coder = (FullWindowedValueCoder<?>) WindowedValue.getFullCoder(
+    FullWindowedValueCoder<?> coder = WindowedValue.getFullCoder(
         StringUtf8Coder.of(), IntervalWindow.getCoder());
     runTestCreateUngroupingShuffleSink(
         new byte[]{(byte) 0xE1},

From fa9f2daaabd555c484e9f76a1487cbd655812e6a Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Wed, 5 Aug 2015 11:48:11 -0700
Subject: [PATCH 0885/1541] Replace NoSuchFileException with
 FileNotFoundException

Makes PackageUtil compatible with AppEngine's whitelist.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99941944
---
 .../dataflow/sdk/util/FileIOChannelFactory.java  |  8 +++++++-
 .../google/cloud/dataflow/sdk/util/GcsUtil.java  |  8 ++++----
 .../dataflow/sdk/util/IOChannelFactory.java      |  4 ++--
 .../cloud/dataflow/sdk/util/IOChannelUtils.java  |  4 ++--
 .../cloud/dataflow/sdk/util/PackageUtil.java     |  4 ++--
 .../sdk/util/FileIOChannelFactoryTest.java       |  9 ++++++---
 .../cloud/dataflow/sdk/util/GcsUtilTest.java     | 11 ++++++-----
 .../cloud/dataflow/sdk/util/PackageUtilTest.java | 16 ++++++++--------
 8 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
index 81f81e5be9e97..ed13a7ad6ef46 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
@@ -23,6 +23,7 @@
 import java.io.File;
 import java.io.FileFilter;
 import java.io.FileInputStream;
+import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.nio.channels.Channels;
@@ -30,6 +31,7 @@
 import java.nio.channels.WritableByteChannel;
 import java.nio.file.FileSystems;
 import java.nio.file.Files;
+import java.nio.file.NoSuchFileException;
 import java.nio.file.PathMatcher;
 import java.nio.file.Paths;
 import java.util.Collection;
@@ -104,7 +106,11 @@ public WritableByteChannel create(String spec, String mimeType)
 
   @Override
   public long getSizeBytes(String spec) throws IOException {
-    return Files.size(FileSystems.getDefault().getPath(spec));
+    try {
+      return Files.size(FileSystems.getDefault().getPath(spec));
+    } catch (NoSuchFileException e) {
+      throw new FileNotFoundException(e.getReason());
+    }
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
index 067ace959adfa..592789bcc2017 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
@@ -41,10 +41,10 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.nio.channels.SeekableByteChannel;
 import java.nio.channels.WritableByteChannel;
-import java.nio.file.NoSuchFileException;
 import java.util.Collections;
 import java.util.LinkedList;
 import java.util.List;
@@ -198,7 +198,7 @@ public List<GcsPath> expand(GcsPath gcsPattern) throws IOException {
   }
 
   /**
-   * Returns the file size from GCS or throws {@link NoSuchFileException}
+   * Returns the file size from GCS or throws {@link FileNotFoundException}
    * if the resource does not exist.
    */
   public long fileSize(GcsPath path) throws IOException {
@@ -206,7 +206,7 @@ public long fileSize(GcsPath path) throws IOException {
   }
 
   /**
-   * Returns the file size from GCS or throws {@link NoSuchFileException}
+   * Returns the file size from GCS or throws {@link FileNotFoundException}
    * if the resource does not exist.
    */
   @VisibleForTesting
@@ -223,7 +223,7 @@ long fileSize(GcsPath path, BackOff backoff, Sleeper sleeper) throws IOException
         return object.getSize().longValue();
       } catch (Exception e) {
         if (e instanceof IOException && errorExtractor.itemNotFound((IOException) e)) {
-          throw new NoSuchFileException(path.toString());
+          throw new FileNotFoundException(path.toString());
         }
         throw new IOException("Unable to get file size", e);
      }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
index ef9b0687e58f4..52d5a4fdaccac 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
@@ -16,10 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.channels.WritableByteChannel;
-import java.nio.file.NoSuchFileException;
 import java.util.Collection;
 
 /**
@@ -66,7 +66,7 @@ public interface IOChannelFactory {
    *
    * <p>The specification is not expanded; it is used verbatim.
    *
-   * <p>{@link NoSuchFileException} will be thrown if the resource does not exist.
+   * <p>{@link FileNotFoundException} will be thrown if the resource does not exist.
    */
   long getSizeBytes(String spec) throws IOException;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
index e3a4c40c9785f..e20cf0586b15c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
@@ -19,9 +19,9 @@
 import com.google.cloud.dataflow.sdk.options.GcsOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.nio.channels.WritableByteChannel;
-import java.nio.file.NoSuchFileException;
 import java.text.DecimalFormat;
 import java.util.Arrays;
 import java.util.Collections;
@@ -111,7 +111,7 @@ public static WritableByteChannel create(String prefix, String shardTemplate,
    *
    * <p>The specification is not expanded; it is used verbatim.
    *
-   * <p>{@link NoSuchFileException} will be thrown if the resource does not exist.
+   * <p>{@link FileNotFoundException} will be thrown if the resource does not exist.
    */
   public static long getSizeBytes(String spec) throws IOException {
     return getFactory(spec).getSizeBytes(spec);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
index c48c048407591..000f024c1f345 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
@@ -37,12 +37,12 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.File;
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.nio.channels.Channels;
 import java.nio.channels.WritableByteChannel;
 import java.nio.charset.StandardCharsets;
-import java.nio.file.NoSuchFileException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
@@ -168,7 +168,7 @@ static List<DataflowPackage> stageClasspathElements(
             numCached++;
             continue;
           }
-        } catch (NoSuchFileException expected) {
+        } catch (FileNotFoundException expected) {
           // If the file doesn't exist, it means we need to upload it.
         }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java
index 721fb8ea5559f..1a8ab86153e1c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java
@@ -27,6 +27,7 @@
 import org.hamcrest.Matchers;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -37,13 +38,13 @@
 import java.io.Writer;
 import java.nio.channels.Channels;
 import java.nio.charset.StandardCharsets;
-import java.nio.file.NoSuchFileException;
 import java.nio.file.Path;
 import java.util.List;
 
 /** Tests for {@link FileIOChannelFactory}. */
 @RunWith(JUnit4.class)
 public class FileIOChannelFactoryTest {
+  @Rule public ExpectedException thrown = ExpectedException.none();
   @Rule public TemporaryFolder temporaryFolder = new TemporaryFolder();
   private FileIOChannelFactory factory = new FileIOChannelFactory();
 
@@ -85,8 +86,9 @@ public void testReadWithExistingFile() throws Exception {
     assertEquals(expected, data);
   }
 
-  @Test(expected = FileNotFoundException.class)
+  @Test
   public void testReadNonExistentFile() throws Exception {
+    thrown.expect(FileNotFoundException.class);
     factory
         .open(
             temporaryFolder
@@ -162,8 +164,9 @@ public void testGetSizeBytes() throws Exception {
     assertEquals(data.length(), factory.getSizeBytes(file.getPath()));
   }
 
-  @Test(expected = NoSuchFileException.class)
+  @Test
   public void testGetSizeBytesForNonExistentFile() throws Exception {
+    thrown.expect(FileNotFoundException.class);
     factory.getSizeBytes(
         factory.resolve(temporaryFolder.getRoot().getPath(), "non-existent-file"));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
index 698520308d555..e18681169b3dc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
@@ -61,11 +61,11 @@
 import org.junit.runners.JUnit4;
 import org.mockito.Mockito;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.math.BigInteger;
 import java.net.SocketTimeoutException;
 import java.nio.channels.SeekableByteChannel;
-import java.nio.file.NoSuchFileException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -77,7 +77,7 @@
 /** Test case for {@link GcsUtil}. */
 @RunWith(JUnit4.class)
 public class GcsUtilTest {
-  @Rule public ExpectedException exception = ExpectedException.none();
+  @Rule public ExpectedException thrown = ExpectedException.none();
 
   @Test
   public void testGlobTranslation() {
@@ -232,8 +232,8 @@ public void testRecursiveGlobExpansionFails() throws IOException {
     GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
     GcsPath pattern = GcsPath.fromUri("gs://testbucket/test**");
 
-    exception.expect(IllegalArgumentException.class);
-    exception.expectMessage("Unsupported wildcard usage");
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Unsupported wildcard usage");
     gcsUtil.expand(pattern);
   }
 
@@ -299,7 +299,7 @@ public void testGetSizeBytes() throws Exception {
     assertEquals(1000, gcsUtil.fileSize(GcsPath.fromComponents("testbucket", "testobject")));
   }
 
-  @Test(expected = NoSuchFileException.class)
+  @Test
   public void testGetSizeBytesWhenFileNotFound() throws Exception {
     MockLowLevelHttpResponse notFoundResponse = new MockLowLevelHttpResponse();
     notFoundResponse.setContent("");
@@ -314,6 +314,7 @@ public void testGetSizeBytesWhenFileNotFound() throws Exception {
 
     gcsUtil.setStorageClient(new Storage(mockTransport, Transport.getJsonFactory(), null));
 
+    thrown.expect(FileNotFoundException.class);
     gcsUtil.fileSize(GcsPath.fromComponents("testbucket", "testobject"));
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
index bfe05989b9641..7ba72fdec68f9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
@@ -68,11 +68,11 @@
 import org.mockito.MockitoAnnotations;
 
 import java.io.File;
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.nio.channels.Channels;
 import java.nio.channels.Pipe;
 import java.nio.charset.StandardCharsets;
-import java.nio.file.NoSuchFileException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -217,7 +217,7 @@ public void testPackageUploadWithFileSucceeds() throws Exception {
     Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
     when(mockGcsUtil.fileSize(any(GcsPath.class)))
-        .thenThrow(new NoSuchFileException("some/path"));
+        .thenThrow(new FileNotFoundException("some/path"));
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
@@ -248,7 +248,7 @@ public void testPackageUploadWithDirectorySucceeds() throws Exception {
 
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
     when(mockGcsUtil.fileSize(any(GcsPath.class)))
-        .thenThrow(new NoSuchFileException("some/path"));
+        .thenThrow(new FileNotFoundException("some/path"));
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     PackageUtil.stageClasspathElements(
@@ -276,7 +276,7 @@ public void testPackageUploadWithEmptyDirectorySucceeds() throws Exception {
 
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
     when(mockGcsUtil.fileSize(any(GcsPath.class)))
-        .thenThrow(new NoSuchFileException("some/path"));
+        .thenThrow(new FileNotFoundException("some/path"));
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
@@ -299,7 +299,7 @@ public void testPackageUploadFailsWhenIOExceptionThrown() throws Exception {
     Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
     when(mockGcsUtil.fileSize(any(GcsPath.class)))
-        .thenThrow(new NoSuchFileException("some/path"));
+        .thenThrow(new FileNotFoundException("some/path"));
     when(mockGcsUtil.create(any(GcsPath.class), anyString()))
         .thenThrow(new IOException("Fake Exception: Upload error"));
 
@@ -320,7 +320,7 @@ public void testPackageUploadFailsWithPermissionsErrorGivesDetailedMessage() thr
     Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
     when(mockGcsUtil.fileSize(any(GcsPath.class)))
-        .thenThrow(new NoSuchFileException("some/path"));
+        .thenThrow(new FileNotFoundException("some/path"));
     when(mockGcsUtil.create(any(GcsPath.class), anyString()))
         .thenThrow(new IOException("Failed to write to GCS path " + gcsStaging,
             googleJsonResponseException(
@@ -353,7 +353,7 @@ public void testPackageUploadEventuallySucceeds() throws Exception {
     Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
     when(mockGcsUtil.fileSize(any(GcsPath.class)))
-        .thenThrow(new NoSuchFileException("some/path"));
+        .thenThrow(new FileNotFoundException("some/path"));
     when(mockGcsUtil.create(any(GcsPath.class), anyString()))
         .thenThrow(new IOException("Fake Exception: 410 Gone")) // First attempt fails
         .thenReturn(pipe.sink());                               // second attempt succeeds
@@ -415,7 +415,7 @@ public void testPackageUploadWithExplicitPackageName() throws Exception {
     final String overriddenName = "alias.txt";
 
     when(mockGcsUtil.fileSize(any(GcsPath.class)))
-        .thenThrow(new NoSuchFileException("some/path"));
+        .thenThrow(new FileNotFoundException("some/path"));
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     List<DataflowPackage> targets = PackageUtil.stageClasspathElements(

From 2116e5b4dc58920cbe00006871129ccf89d8daec Mon Sep 17 00:00:00 2001
From: relax <relax@google.com>
Date: Wed, 5 Aug 2015 11:59:54 -0700
Subject: [PATCH 0886/1541] Fix javadoc example for BigQuery#to()

window.toString() produces characters that are illegal in a BigQuery
table name. Fix Javadoc example for BigQuery.to() to generate legal
table names.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99943158
---
 .../java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java     | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 9d9f4d8dc9887..7ec6cb2f457e9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -161,7 +161,9 @@
  *         .withSchema(schema)
  *         .to(new SerializableFunction<BoundedWindow, String>() {
  *               public String apply(BoundedWindow window) {
- *                 return "my-project:output.output_table-" + window.toString();
+ *                 String dayString = DateTimeFormat.forPattern("yyyy_MM_dd").parseDateTime(
+ *                   ((DaysWindow) window).getStartDate());
+ *                 return "my-project:output.output_table_" + dayString;
  *               }
  *             }));
  *

From 0ebbd99de8b806ab3b4945467feb318cb962564e Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Wed, 5 Aug 2015 12:05:20 -0700
Subject: [PATCH 0887/1541] Set big query schema in the pipeline option

This enables set up/tear down messages in WindowedWordCount example.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99943697
---
 .../com/google/cloud/dataflow/examples/WindowedWordCount.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
index db36701b86e8b..100c164ca3fa8 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
@@ -195,7 +195,7 @@ public static interface Options
 
   public static void main(String[] args) throws IOException {
     Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
-
+    options.setBigQuerySchema(getSchema());
     // DataflowExampleUtils creates the necessary input sources to simplify execution of this
     // Pipeline.
     DataflowExampleUtils exampleDataflowUtils = new DataflowExampleUtils(options,

From 12d30050410d35bffededab8116fa1144b288e97 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 5 Aug 2015 12:31:52 -0700
Subject: [PATCH 0888/1541] Validate that the GroupByKey inside the View is OK

Previously, the exception would come from within Combine, buried in
either one of the composite View PTransforms or in one of the Streaming
replacements for those.

To make sure that error is actionable, identify these as a problem with
using these as a SideInput.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99946113
---
 .../dataflow/sdk/transforms/GroupByKey.java   |  24 +-
 .../cloud/dataflow/sdk/transforms/View.java   |  71 +++++-
 .../dataflow/sdk/transforms/ViewTest.java     | 220 ++++++++++++++++++
 3 files changed, 300 insertions(+), 15 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index b76089629d1ac..7d5c1957d9d63 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -175,8 +175,7 @@ public boolean fewKeys() {
 
   /////////////////////////////////////////////////////////////////////////////
 
-  @Override
-  public void validate(PCollection<KV<K, V>> input) {
+  public static void applicableTo(PCollection<?> input) {
     WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
     // Verify that the input PCollection is bounded, or that there is windowing/triggering being
     // used. Without this, the watermark (at end of global window) will never be reached.
@@ -188,6 +187,19 @@ public void validate(PCollection<KV<K, V>> input) {
           + "prior to GroupByKey.");
     }
 
+    // Validate the window merge function.
+    if (windowingStrategy.getWindowFn() instanceof InvalidWindows) {
+      String cause = ((InvalidWindows<?>) windowingStrategy.getWindowFn()).getCause();
+      throw new IllegalStateException(
+          "GroupByKey must have a valid Window merge function.  "
+              + "Invalid because: " + cause);
+    }
+  }
+
+  @Override
+  public void validate(PCollection<KV<K, V>> input) {
+    applicableTo(input);
+
     // Verify that the input Coder<KV<K, V>> is a KvCoder<K, V>, and that
     // the key coder is deterministic.
     Coder<K> keyCoder = getKeyCoder(input.getCoder());
@@ -197,14 +209,6 @@ public void validate(PCollection<KV<K, V>> input) {
       throw new IllegalStateException(
           "the keyCoder of a GroupByKey must be deterministic", e);
     }
-
-    // Validate the window merge function.
-    if (windowingStrategy.getWindowFn() instanceof InvalidWindows) {
-      String cause = ((InvalidWindows<?>) windowingStrategy.getWindowFn()).getCause();
-      throw new IllegalStateException(
-          "GroupByKey must have a valid Window merge function.  "
-          + "Invalid because: " + cause);
-    }
   }
 
   public WindowingStrategy<?, ?> updateWindowingStrategy(WindowingStrategy<?, ?> inputStrategy) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index e04186ba3d819..8d26cc1c91f90 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -30,7 +30,6 @@
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
-import java.util.NoSuchElementException;
 
 /**
  * Transforms for creating {@link PCollectionView}s from {@link PCollection}s,
@@ -146,7 +145,7 @@ private View() { }
    * }</pre>
    *
    * <p> If the input {@link PCollection} is empty,
-   * throws {@link NoSuchElementException} in the consuming
+   * throws {@link java.util.NoSuchElementException} in the consuming
    * {@link DoFn}.
    *
    * <p> If the input {@link PCollection} contains more than one
@@ -165,7 +164,7 @@ public static <T> AsSingleton<T> asSingleton() {
    * <p> The resulting list is required to fit in memory.
    */
   public static <T> PTransform<PCollection<T>, PCollectionView<List<T>>> asList() {
-    return Combine.globally(new Concatenate<T>()).asSingletonView();
+    return new AsList<>();
   }
 
   /**
@@ -226,12 +225,47 @@ public static <K, V> AsMultimap<K, V> asMultimap() {
    *
    * <p> Instantiate via {@link View#asIterable}.
    */
-  public static class AsIterable<T> extends PTransform<
-      PCollection<T>, PCollectionView<Iterable<T>>> {
+  public static class AsList<T> extends PTransform<PCollection<T>, PCollectionView<List<T>>> {
+    private static final long serialVersionUID = 0;
+
+    private AsList() { }
+
+    @Override
+    public void validate(PCollection<T> input) {
+      try {
+        GroupByKey.applicableTo(input);
+      } catch (IllegalStateException e) {
+        throw new IllegalStateException("Unable to create a side-input view from input", e);
+      }
+    }
+
+    @Override
+    public PCollectionView<List<T>> apply(PCollection<T> input) {
+      return input.apply(Combine.globally(new Concatenate<T>()).asSingletonView());
+    }
+  }
+
+  /**
+   * A {@link PTransform} that produces a {@link PCollectionView} of a singleton
+   * {@link PCollection} yielding the single element it contains.
+   *
+   * <p> Instantiate via {@link View#asIterable}.
+   */
+  public static class AsIterable<T>
+      extends PTransform<PCollection<T>, PCollectionView<Iterable<T>>> {
     private static final long serialVersionUID = 0;
 
     private AsIterable() { }
 
+    @Override
+    public void validate(PCollection<T> input) {
+      try {
+        GroupByKey.applicableTo(input);
+      } catch (IllegalStateException e) {
+        throw new IllegalStateException("Unable to create a side-input view from input", e);
+      }
+    }
+
     @Override
     public PCollectionView<Iterable<T>> apply(PCollection<T> input) {
       return input.apply(CreatePCollectionView.<T, Iterable<T>>of(PCollectionViews.iterableView(
@@ -281,6 +315,15 @@ public AsSingleton<T> withDefaultValue(T defaultValue) {
       return new AsSingleton<>(defaultValue);
     }
 
+    @Override
+    public void validate(PCollection<T> input) {
+      try {
+        GroupByKey.applicableTo(input);
+      } catch (IllegalStateException e) {
+        throw new IllegalStateException("Unable to create a side-input view from input", e);
+      }
+    }
+
     @Override
     public PCollectionView<T> apply(PCollection<T> input) {
       return input.apply(CreatePCollectionView.<T, T>of(PCollectionViews.singletonView(
@@ -304,6 +347,15 @@ public static class AsMultimap<K, V>
 
     private AsMultimap() { }
 
+    @Override
+    public void validate(PCollection<KV<K, V>> input) {
+      try {
+        GroupByKey.applicableTo(input);
+      } catch (IllegalStateException e) {
+        throw new IllegalStateException("Unable to create a side-input view from input", e);
+      }
+    }
+
     @Override
     public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
       return input.apply(CreatePCollectionView.<KV<K, V>, Map<K, Iterable<V>>>of(
@@ -343,6 +395,15 @@ public AsMap<K, V> withSingletonValues() {
       return this;
     }
 
+    @Override
+    public void validate(PCollection<KV<K, V>> input) {
+      try {
+        GroupByKey.applicableTo(input);
+      } catch (IllegalStateException e) {
+        throw new IllegalStateException("Unable to create a side-input view from input", e);
+      }
+    }
+
     @Override
     public PCollectionView<Map<K, V>> apply(PCollection<KV<K, V>> input) {
       return input.apply(CreatePCollectionView.<KV<K, V>, Map<K, V>>of(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index 1b14467510c34..08b7703539cc7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -23,23 +23,34 @@
 import com.google.cloud.dataflow.sdk.Pipeline.PipelineExecutionException;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.util.NoopPathValidator;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.common.base.Preconditions;
 
+import org.hamcrest.Matchers;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.internal.matchers.ThrowableMessageMatcher;
 import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -414,4 +425,213 @@ public void testViewGetName() {
     assertEquals("View.AsMap", View.<String, Integer>asMap().getName());
     assertEquals("View.AsMultimap", View.<String, Integer>asMultimap().getName());
   }
+
+  private Pipeline createTestBatchRunner() {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setRunner(DataflowPipelineRunner.class);
+    options.setProject("someproject");
+    options.setStagingLocation("gs://staging");
+    options.setPathValidatorClass(NoopPathValidator.class);
+    options.setDataflowClient(null);
+    return Pipeline.create(options);
+  }
+
+  private Pipeline createTestStreamingRunner() {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setRunner(DataflowPipelineRunner.class);
+    options.setStreaming(true);
+    options.setProject("someproject");
+    options.setStagingLocation("gs://staging");
+    options.setPathValidatorClass(NoopPathValidator.class);
+    options.setDataflowClient(null);
+    return Pipeline.create(options);
+  }
+
+  private Pipeline createTestDirectRunner() {
+    DirectPipelineOptions options = PipelineOptionsFactory.as(DirectPipelineOptions.class);
+    options.setRunner(DirectPipelineRunner.class);
+    return Pipeline.create(options);
+  }
+
+  private void testViewUnbounded(Pipeline pipeline,
+      PTransform<PCollection<KV<String, Integer>>, ? extends PCollectionView<?>> view) {
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("Unable to create a side-input view from input");
+    thrown.expectCause(
+        ThrowableMessageMatcher.hasMessage(Matchers.containsString("non-bounded PCollection")));
+    pipeline
+        .apply(new PTransform<PBegin, PCollection<KV<String, Integer>>>() {
+          @Override
+          public PCollection<KV<String, Integer>> apply(PBegin input) {
+            return PCollection.createPrimitiveOutputInternal(input.getPipeline(),
+                WindowingStrategy.globalDefault(), PCollection.IsBounded.UNBOUNDED);
+          }
+        })
+        .apply(view);
+  }
+
+  private void testViewNonmerging(Pipeline pipeline,
+      PTransform<PCollection<KV<String, Integer>>, ? extends PCollectionView<?>> view) {
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("Unable to create a side-input view from input");
+    thrown.expectCause(
+        ThrowableMessageMatcher.hasMessage(Matchers.containsString("Consumed by GroupByKey")));
+    pipeline
+        .apply(Create.<KV<String, Integer>>of(KV.of("hello", 5)))
+        .apply(Window.<KV<String, Integer>>into(new InvalidWindows<>(
+            "Consumed by GroupByKey", FixedWindows.of(Duration.standardHours(1)))))
+        .apply(view);
+  }
+
+  @Test
+  public void testViewUnboundedAsSingletonBatch() {
+    testViewUnbounded(createTestBatchRunner(), View.<KV<String, Integer>>asSingleton());
+  }
+
+  @Test
+  public void testViewUnboundedAsSingletonStreaming() {
+    testViewUnbounded(createTestStreamingRunner(), View.<KV<String, Integer>>asSingleton());
+  }
+
+  @Test
+  public void testViewUnboundedAsSingletonDirect() {
+    testViewUnbounded(createTestDirectRunner(), View.<KV<String, Integer>>asSingleton());
+  }
+
+  @Test
+  public void testViewUnboundedAsIterableBatch() {
+    testViewUnbounded(createTestBatchRunner(), View.<KV<String, Integer>>asIterable());
+  }
+
+  @Test
+  public void testViewUnboundedAsIterableStreaming() {
+    testViewUnbounded(createTestStreamingRunner(), View.<KV<String, Integer>>asIterable());
+  }
+
+  @Test
+  public void testViewUnboundedAsIterableDirect() {
+    testViewUnbounded(createTestDirectRunner(), View.<KV<String, Integer>>asIterable());
+  }
+
+  @Test
+  public void testViewUnboundedAsListBatch() {
+    testViewUnbounded(createTestBatchRunner(), View.<KV<String, Integer>>asList());
+  }
+
+  @Test
+  public void testViewUnboundedAsListStreaming() {
+    testViewUnbounded(createTestStreamingRunner(), View.<KV<String, Integer>>asList());
+  }
+
+  @Test
+  public void testViewUnboundedAsListDirect() {
+    testViewUnbounded(createTestDirectRunner(), View.<KV<String, Integer>>asList());
+  }
+
+  @Test
+  public void testViewUnboundedAsMapBatch() {
+    testViewUnbounded(createTestBatchRunner(), View.<String, Integer>asMap());
+  }
+
+  @Test
+  public void testViewUnboundedAsMapStreaming() {
+    testViewUnbounded(createTestStreamingRunner(), View.<String, Integer>asMap());
+  }
+
+  @Test
+  public void testViewUnboundedAsMapDirect() {
+    testViewUnbounded(createTestDirectRunner(), View.<String, Integer>asMap());
+  }
+
+
+  @Test
+  public void testViewUnboundedAsMultimapBatch() {
+    testViewUnbounded(createTestBatchRunner(), View.<String, Integer>asMultimap());
+  }
+
+  @Test
+  public void testViewUnboundedAsMultimapStreaming() {
+    testViewUnbounded(createTestStreamingRunner(), View.<String, Integer>asMultimap());
+  }
+
+  @Test
+  public void testViewUnboundedAsMultimapDirect() {
+    testViewUnbounded(createTestDirectRunner(), View.<String, Integer>asMultimap());
+  }
+
+  @Test
+  public void testViewNonmergingAsSingletonBatch() {
+    testViewNonmerging(createTestBatchRunner(), View.<KV<String, Integer>>asSingleton());
+  }
+
+  @Test
+  public void testViewNonmergingAsSingletonStreaming() {
+    testViewNonmerging(createTestStreamingRunner(), View.<KV<String, Integer>>asSingleton());
+  }
+
+  @Test
+  public void testViewNonmergingAsSingletonDirect() {
+    testViewNonmerging(createTestDirectRunner(), View.<KV<String, Integer>>asSingleton());
+  }
+
+  @Test
+  public void testViewNonmergingAsIterableBatch() {
+    testViewNonmerging(createTestBatchRunner(), View.<KV<String, Integer>>asIterable());
+  }
+
+  @Test
+  public void testViewNonmergingAsIterableStreaming() {
+    testViewNonmerging(createTestStreamingRunner(), View.<KV<String, Integer>>asIterable());
+  }
+
+  @Test
+  public void testViewNonmergingAsIterableDirect() {
+    testViewNonmerging(createTestDirectRunner(), View.<KV<String, Integer>>asIterable());
+  }
+
+  @Test
+  public void testViewNonmergingAsListBatch() {
+    testViewNonmerging(createTestBatchRunner(), View.<KV<String, Integer>>asList());
+  }
+
+  @Test
+  public void testViewNonmergingAsListStreaming() {
+    testViewNonmerging(createTestStreamingRunner(), View.<KV<String, Integer>>asList());
+  }
+
+  @Test
+  public void testViewNonmergingAsListDirect() {
+    testViewNonmerging(createTestDirectRunner(), View.<KV<String, Integer>>asList());
+  }
+
+  @Test
+  public void testViewNonmergingAsMapBatch() {
+    testViewNonmerging(createTestBatchRunner(), View.<String, Integer>asMap());
+  }
+
+  @Test
+  public void testViewNonmergingAsMapStreaming() {
+    testViewNonmerging(createTestStreamingRunner(), View.<String, Integer>asMap());
+  }
+
+  @Test
+  public void testViewNonmergingAsMapDirect() {
+    testViewNonmerging(createTestDirectRunner(), View.<String, Integer>asMap());
+  }
+
+
+  @Test
+  public void testViewNonmergingAsMultimapBatch() {
+    testViewNonmerging(createTestBatchRunner(), View.<String, Integer>asMultimap());
+  }
+
+  @Test
+  public void testViewNonmergingAsMultimapStreaming() {
+    testViewNonmerging(createTestStreamingRunner(), View.<String, Integer>asMultimap());
+  }
+
+  @Test
+  public void testViewNonmergingAsMultimapDirect() {
+    testViewNonmerging(createTestDirectRunner(), View.<String, Integer>asMultimap());
+  }
 }

From 5b7c0d5c5732dcd33e6dd2ae5657c3e32c08881e Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 17 Jul 2015 12:40:25 -0700
Subject: [PATCH 0889/1541] Update SDK to depend on PubSub V1

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99946779
---
 sdk/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index c37f803b26016..96e9febe179ea 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -389,7 +389,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-pubsub</artifactId>
-      <version>v1beta2-rev1-1.20.0</version>
+      <version>v1-rev3-1.20.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.20.0 -->

From cf946ca174efe80a02e8815356f8a19b0f5b203e Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Wed, 5 Aug 2015 12:48:36 -0700
Subject: [PATCH 0890/1541] Several fixes for PubsubIO.PubsubReader

1. fix NullPointerException happened when subscription was not set.
2. fix NullPointerException when getReceivedMessages() is null.
3. set required MaxMessages in pullRequest.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99947420
---
 .../google/cloud/dataflow/sdk/io/PubsubIO.java  | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index fe265b1ad159f..0deceda6fe65b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -579,6 +579,7 @@ public Duration getMaxReadTime() {
 
       private class PubsubReader extends DoFn<Void, T> {
         private static final long serialVersionUID = 0L;
+        private static final int DEFAULT_PULL_SIZE = 100;
 
         @Override
         public void processElement(ProcessContext c) throws IOException {
@@ -586,8 +587,8 @@ public void processElement(ProcessContext c) throws IOException {
               Transport.newPubsubClient(c.getPipelineOptions().as(DataflowPipelineOptions.class))
                   .build();
 
-          String subscription = getSubscription().asV1Beta2Path();
-          if (subscription == null) {
+          String subscription;
+          if (getSubscription() == null) {
             String topic = getTopic().asV1Beta2Path();
             String[] split = topic.split("/");
             subscription = "projects/" + split[1] + "/subscriptions/" + split[3]
@@ -600,6 +601,8 @@ public void processElement(ProcessContext c) throws IOException {
             } catch (Exception e) {
               throw new RuntimeException("Failed to create subscription: ", e);
             }
+          } else {
+             subscription = getSubscription().asV1Beta2Path();
           }
 
           Instant endTime = getMaxReadTime() == null
@@ -613,14 +616,18 @@ public void processElement(ProcessContext c) throws IOException {
               PullRequest pullRequest = new PullRequest().setReturnImmediately(false);
               if (getMaxNumRecords() > 0) {
                 pullRequest.setMaxMessages(getMaxNumRecords() - messages.size());
+              } else {
+                pullRequest.setMaxMessages(DEFAULT_PULL_SIZE);
               }
 
               PullResponse pullResponse =
                   pubsubClient.projects().subscriptions().pull(subscription, pullRequest).execute();
               List<String> ackIds = new ArrayList<>();
-              for (ReceivedMessage received : pullResponse.getReceivedMessages()) {
-                messages.add(received.getMessage());
-                ackIds.add(received.getAckId());
+              if (pullResponse.getReceivedMessages() != null) {
+                for (ReceivedMessage received : pullResponse.getReceivedMessages()) {
+                  messages.add(received.getMessage());
+                  ackIds.add(received.getAckId());
+                }
               }
 
               if (ackIds.size() != 0) {

From da9d5a62eab0206207c0f3b529ce5d526629b774 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 5 Aug 2015 12:52:05 -0700
Subject: [PATCH 0891/1541] Minor cleanup of PipelineRunnerTest

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99947725
---
 .../cloud/dataflow/sdk/runners/PipelineRunnerTest.java    | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerTest.java
index 39e37f9de81a6..2a434d2283544 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerTest.java
@@ -33,8 +33,6 @@
 import org.mockito.Mock;
 import org.mockito.MockitoAnnotations;
 
-import java.io.IOException;
-
 /**
  * Tests for DataflowPipelineRunner.
  */
@@ -50,7 +48,7 @@ public void setUp() {
   }
 
   @Test
-  public void testLongName() throws IOException {
+  public void testLongName() throws Exception {
     // Check we can create a pipeline runner using the full class name.
     DirectPipelineOptions options = PipelineOptionsFactory.as(DirectPipelineOptions.class);
     options.setAppName("test");
@@ -63,7 +61,7 @@ public void testLongName() throws IOException {
   }
 
   @Test
-  public void testShortName() throws IOException {
+  public void testShortName() throws Exception {
     // Check we can create a pipeline runner using the short class name.
     DirectPipelineOptions options = PipelineOptionsFactory.as(DirectPipelineOptions.class);
     options.setAppName("test");
@@ -76,7 +74,7 @@ public void testShortName() throws IOException {
   }
 
   @Test
-  public void testAppNameDefault() throws IOException {
+  public void testAppNameDefault() throws Exception {
     ApplicationNameOptions options = PipelineOptionsFactory.as(ApplicationNameOptions.class);
     Assert.assertEquals(PipelineRunnerTest.class.getSimpleName(),
         options.getAppName());

From 7d41adcb15f8ae66bf4d26d30c5b68eba094dd6c Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 5 Aug 2015 12:58:47 -0700
Subject: [PATCH 0892/1541] Do not warn for 404s that are not known to be
 errors

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99948344
---
 .../sdk/util/RetryHttpRequestInitializer.java |  4 +--
 .../cloud/dataflow/sdk/util/Transport.java    | 25 +++++++++++--------
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
index f6e0337c2d685..c2672f4f091ce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
@@ -42,8 +42,8 @@
 /**
  * Implements a request initializer that adds retry handlers to all
  * HttpRequests.
- * <p>
- * This allows chaining through to another HttpRequestInitializer, since
+ *
+ * <p>This allows chaining through to another HttpRequestInitializer, since
  * clients have exactly one HttpRequestInitializer, and Credential is also
  * a required HttpRequestInitializer.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
index 6dee26e80eaca..afd3404389e03 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
@@ -28,12 +28,12 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.GcsOptions;
+import com.google.common.collect.ImmutableList;
 
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.security.GeneralSecurityException;
-import java.util.Arrays;
 
 /**
  * Helpers for cloud communication.
@@ -88,28 +88,30 @@ private static ApiComponents apiComponentsFromUrl(String urlString) {
 
   /**
    * Returns a BigQuery client builder.
-   * <p>
-   * Note: this client's endpoint is <b>not</b> modified by the
+   *
+   * <p>Note: this client's endpoint is <b>not</b> modified by the
    * {@link DataflowPipelineDebugOptions#getApiRootUrl()} option.
    */
   public static Bigquery.Builder
       newBigQueryClient(BigQueryOptions options) {
     return new Bigquery.Builder(getTransport(), getJsonFactory(),
-        new RetryHttpRequestInitializer(options.getGcpCredential()))
+        // Do not log 404. It clutters the output and is possibly even required by the caller.
+        new RetryHttpRequestInitializer(options.getGcpCredential(), ImmutableList.of(404)))
         .setApplicationName(options.getAppName())
         .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
   }
 
   /**
    * Returns a Pubsub client builder.
-   * <p>
-   * Note: this client's endpoint is <b>not</b> modified by the
+   *
+   * <p>Note: this client's endpoint is <b>not</b> modified by the
    * {@link DataflowPipelineDebugOptions#getApiRootUrl()} option.
    */
   public static Pubsub.Builder
       newPubsubClient(DataflowPipelineOptions options) {
     return new Pubsub.Builder(getTransport(), getJsonFactory(),
-        new RetryHttpRequestInitializer(options.getGcpCredential()))
+        // Do not log 404. It clutters the output and is possibly even required by the caller.
+        new RetryHttpRequestInitializer(options.getGcpCredential(), ImmutableList.of(404)))
         .setRootUrl(options.getPubsubRootUrl())
         .setApplicationName(options.getAppName())
         .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
@@ -129,7 +131,8 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
 
     return new Dataflow.Builder(getTransport(),
         getJsonFactory(),
-        new RetryHttpRequestInitializer(options.getGcpCredential()))
+        // Do not log 404. It clutters the output and is possibly even required by the caller.
+        new RetryHttpRequestInitializer(options.getGcpCredential(), ImmutableList.of(404)))
         .setApplicationName(options.getAppName())
         .setRootUrl(components.rootUrl)
         .setServicePath(components.servicePath)
@@ -149,8 +152,8 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
 
   /**
    * Returns a Cloud Storage client builder.
-   * <p>
-   * Note: this client's endpoint is <b>not</b> modified by the
+   *
+   * <p>Note: this client's endpoint is <b>not</b> modified by the
    * {@link DataflowPipelineDebugOptions#getApiRootUrl()} option.
    */
   public static Storage.Builder
@@ -160,7 +163,7 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
         new RetryHttpRequestInitializer(
             // Do not log the code 404. Code up the stack will deal with 404's if needed, and
             // logging it by default clutters the output during file staging.
-            options.getGcpCredential(), Arrays.asList(404)))
+            options.getGcpCredential(), ImmutableList.of(404)))
         .setApplicationName(options.getAppName())
         .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
     if (servicePath != null) {

From ba3ef64488c7157f46cec466e9cb2b8a6cf73ca5 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 5 Aug 2015 13:09:13 -0700
Subject: [PATCH 0893/1541] Fix resource not closed warnings

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99949289
---
 .../sdk/util/FileIOChannelFactory.java        |  3 +
 .../dataflow/sdk/util/IOChannelUtils.java     |  2 +
 .../CopyableSeekableByteChannelTest.java      | 63 +++++++++---------
 .../util/ExposedByteArrayInputStreamTest.java |  9 +--
 .../common/worker/MapTaskExecutorTest.java    | 64 +++++++++++--------
 5 files changed, 80 insertions(+), 61 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
index ed13a7ad6ef46..e65d8701469b5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
@@ -86,7 +86,10 @@ public boolean accept(File pathname) {
   @Override
   public ReadableByteChannel open(String spec) throws IOException {
     LOG.debug("opening file {}", spec);
+    @SuppressWarnings("resource") // The caller is responsible for closing the channel.
     FileInputStream inputStream = new FileInputStream(spec);
+    // Use this method for creating the channel (rather than new FileChannel) so that we get
+    // regular FileNotFoundException. Closing the underyling channel will close the inputStream.
     return inputStream.getChannel();
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
index e20cf0586b15c..336c741b430ee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
@@ -88,6 +88,8 @@ public static WritableByteChannel create(String prefix, String shardTemplate,
                     mimeType);
     }
 
+    // It is the callers responsibility to close this channel.
+    @SuppressWarnings("resource")
     ShardingWritableByteChannel shardingChannel =
         new ShardingWritableByteChannel();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannelTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannelTest.java
index f31ff6987cc4c..0091d7710d23e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannelTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannelTest.java
@@ -40,37 +40,38 @@ public void copiedChannelShouldMaintainIndependentPosition()
         new FakeSeekableByteChannel("Hello, world! :-)".getBytes());
     base.position(1);
 
-    CopyableSeekableByteChannel chan = new CopyableSeekableByteChannel(base);
-    assertThat(chan.position(), equalTo((long) 1));
-
-    CopyableSeekableByteChannel copy = chan.copy();
-    assertThat(copy.position(), equalTo((long) 1));
-
-    assertThat(chan.read(dst), equalTo(6));
-    assertThat(chan.position(), equalTo((long) 7));
-    assertThat(new String(dst.array()), equalTo("ello, "));
-    dst.rewind();
-
-    assertThat(copy.position(), equalTo((long) 1));
-    copy.position(3);
-    assertThat(copy.read(dst), equalTo(6));
-    assertThat(copy.position(), equalTo((long) 9));
-    assertThat(new String(dst.array()), equalTo("lo, wo"));
-    dst.rewind();
-
-    assertThat(chan.read(dst), equalTo(6));
-    assertThat(chan.position(), equalTo((long) 13));
-    assertThat(new String(dst.array()), equalTo("world!"));
-    dst.rewind();
-
-    assertThat(chan.read(dst), equalTo(4));
-    assertThat(chan.position(), equalTo((long) 17));
-    assertThat(new String(dst.array()), equalTo(" :-)d!"));
-    dst.rewind();
-
-    assertThat(copy.position(), equalTo((long) 9));
-    assertThat(copy.read(dst), equalTo(6));
-    assertThat(new String(dst.array()), equalTo("rld! :"));
+    try (CopyableSeekableByteChannel chan = new CopyableSeekableByteChannel(base)) {
+      assertThat(chan.position(), equalTo((long) 1));
+
+      CopyableSeekableByteChannel copy = chan.copy();
+      assertThat(copy.position(), equalTo((long) 1));
+
+      assertThat(chan.read(dst), equalTo(6));
+      assertThat(chan.position(), equalTo((long) 7));
+      assertThat(new String(dst.array()), equalTo("ello, "));
+      dst.rewind();
+
+      assertThat(copy.position(), equalTo((long) 1));
+      copy.position(3);
+      assertThat(copy.read(dst), equalTo(6));
+      assertThat(copy.position(), equalTo((long) 9));
+      assertThat(new String(dst.array()), equalTo("lo, wo"));
+      dst.rewind();
+
+      assertThat(chan.read(dst), equalTo(6));
+      assertThat(chan.position(), equalTo((long) 13));
+      assertThat(new String(dst.array()), equalTo("world!"));
+      dst.rewind();
+
+      assertThat(chan.read(dst), equalTo(4));
+      assertThat(chan.position(), equalTo((long) 17));
+      assertThat(new String(dst.array()), equalTo(" :-)d!"));
+      dst.rewind();
+
+      assertThat(copy.position(), equalTo((long) 9));
+      assertThat(copy.read(dst), equalTo(6));
+      assertThat(new String(dst.array()), equalTo("rld! :"));
+    }
   }
 
   private static final class FakeSeekableByteChannel
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayInputStreamTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayInputStreamTest.java
index d0d9b9aebebbb..00830e96cdb15 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayInputStreamTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayInputStreamTest.java
@@ -39,10 +39,11 @@ public class ExposedByteArrayInputStreamTest {
 
   @Test
   public void testConstructWithEmptyArray() throws IOException {
-    ExposedByteArrayInputStream s = new ExposedByteArrayInputStream(new byte[0]);
-    assertEquals(0, s.available());
-    byte[] data = s.readAll();
-    assertEquals(0, data.length);
+    try (ExposedByteArrayInputStream s = new ExposedByteArrayInputStream(new byte[0])) {
+      assertEquals(0, s.available());
+      byte[] data = s.readAll();
+      assertEquals(0, data.length);
+    }
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
index fb6baeeaf50d3..f72116d4ee77f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
@@ -34,7 +34,9 @@
 
 import org.hamcrest.CoreMatchers;
 import org.junit.Assert;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -47,18 +49,22 @@
  */
 @RunWith(JUnit4.class)
 public class MapTaskExecutorTest {
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
   static class TestOperation extends Operation {
     String label;
     List<String> log;
 
     private static CounterSet counterSet = new CounterSet();
     private static String counterPrefix = "test-";
-    private static StateSampler stateSampler =
+    private static StateSampler testStateSampler =
         new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
 
     TestOperation(String label, List<String> log) {
       super(label, new OutputReceiver[] {}, counterPrefix, counterSet.getAddCounterMutator(),
-          stateSampler);
+          testStateSampler);
       this.label = label;
       this.log = log;
     }
@@ -188,43 +194,49 @@ public void testGetOutputCounters() throws Exception {
   }
 
   @Test
-  public void testGetReadOperation() throws Exception {
+  public void testNoOperation() throws Exception {
+    // Test MapTaskExecutor without a single operation.
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    // Test MapTaskExecutor without a single operation.
-    MapTaskExecutor executor =
-        new MapTaskExecutor(new ArrayList<Operation>(), counterSet, stateSampler);
-
-    try {
+    try (MapTaskExecutor executor =
+        new MapTaskExecutor(new ArrayList<Operation>(), counterSet, stateSampler)) {
+      thrown.expect(IllegalStateException.class);
+      thrown.expectMessage("has no operation");
       executor.getReadOperation();
-      Assert.fail("Expected IllegalStateException.");
-    } catch (IllegalStateException e) {
-      // Exception expected
     }
+  }
 
-    List<Operation> operations = Arrays.asList(new Operation[] {
+  @Test
+  public void testNoReadOperation() throws Exception {
+    // Test MapTaskExecutor without ReadOperation.
+    CounterSet counterSet = new CounterSet();
+    String counterPrefix = "test-";
+    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
+    List<Operation> operations = Arrays.<Operation>asList(
         new TestOperation("o1", counterPrefix, counterSet.getAddCounterMutator(), stateSampler, 1),
         new TestOperation(
-            "o2", counterPrefix, counterSet.getAddCounterMutator(), stateSampler, 2)});
-    // Test MapTaskExecutor without ReadOperation.
-    executor = new MapTaskExecutor(operations, counterSet, stateSampler);
+            "o2", counterPrefix, counterSet.getAddCounterMutator(), stateSampler, 2));
 
-    try {
+    try (MapTaskExecutor executor = new MapTaskExecutor(operations, counterSet, stateSampler)) {
+      thrown.expect(IllegalStateException.class);
+      thrown.expectMessage("is not a ReadOperation");
       executor.getReadOperation();
-      Assert.fail("Expected IllegalStateException.");
-    } catch (IllegalStateException e) {
-      // Exception expected
     }
+  }
 
-    executor.close();
-
+  @Test
+  public void testValidOperations() throws Exception {
+    CounterSet counterSet = new CounterSet();
+    String counterPrefix = "test-";
+    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
     TestOutputReceiver receiver = new TestOutputReceiver(counterSet);
-    operations = Arrays.asList(new Operation[] {new TestReadOperation(
-        receiver, counterPrefix, counterSet.getAddCounterMutator(), stateSampler)});
-    executor = new MapTaskExecutor(operations, counterSet, stateSampler);
-    Assert.assertEquals(operations.get(0), executor.getReadOperation());
-    executor.close();
+    List<Operation> operations = Arrays.<Operation>asList(
+        new TestReadOperation(receiver, counterPrefix,
+            counterSet.getAddCounterMutator(), stateSampler));
+    try (MapTaskExecutor executor = new MapTaskExecutor(operations, counterSet, stateSampler)) {
+      Assert.assertEquals(operations.get(0), executor.getReadOperation());
+    }
   }
 
   @Test

From 2c1e819185fff58d6186f000a909aa2e60f78ca9 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 5 Aug 2015 13:42:14 -0700
Subject: [PATCH 0894/1541] De-lint MapTaskExecutorFactoryTest

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99952331
---
 .../dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index 0b28dbf2195b4..c9f7399328cf0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -316,7 +316,7 @@ static ParallelInstruction createParDoInstruction(
     String serializedFn =
         StringUtils.byteArrayToJsonString(
             SerializableUtils.serializeToByteArray(
-                new DoFnInfo(fn, WindowingStrategy.globalDefault())));
+                new DoFnInfo<>(fn, WindowingStrategy.globalDefault())));
 
     CloudObject cloudUserFn = CloudObject.forClassName("DoFn");
     addString(cloudUserFn, PropertyNames.SERIALIZED_FN, serializedFn);

From a0ddeb2a68588e5165a4fdf8c11122e23522644a Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 5 Aug 2015 15:02:02 -0700
Subject: [PATCH 0895/1541] De-lint TransformTreeTest

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99961325
---
 .../google/cloud/dataflow/sdk/runners/TransformTreeTest.java   | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
index f98eb313988a4..0c1eb605a43ae 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
@@ -39,7 +39,6 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-import java.io.IOException;
 import java.util.Arrays;
 import java.util.EnumSet;
 
@@ -169,7 +168,7 @@ public void testOutputChecking() throws Exception {
   }
 
   @Test
-  public void testMultiGraphSetup() throws IOException {
+  public void testMultiGraphSetup() throws Exception {
     Pipeline p = DirectPipeline.createForTest();
 
     PCollection<Integer> input = p.begin()

From 7d3c78c1d4f9da98612cc6e042b555b209db312e Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 5 Aug 2015 15:27:17 -0700
Subject: [PATCH 0896/1541] Minor clean up of DataflowWorkProgressUpdaterTest

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99964253
---
 .../runners/worker/DataflowWorkProgressUpdaterTest.java   | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
index c34bbf7e84657..13c654ac1e219 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -72,7 +72,6 @@
 import org.mockito.Mock;
 import org.mockito.MockitoAnnotations;
 
-import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
@@ -81,7 +80,6 @@
 
 /** Unit tests for {@link DataflowWorkProgressUpdater}. */
 @RunWith(JUnit4.class)
-@SuppressWarnings("resource")
 public class DataflowWorkProgressUpdaterTest {
   static class TestMapTaskExecutor extends MapTaskExecutor {
     ApproximateProgress progress = null;
@@ -136,7 +134,7 @@ public void setWorkerProgress(ApproximateProgress progress) {
   private long nowMillis;
 
   @Before
-  public void initMocksAndWorkflowServiceAndWorkerAndWork() throws IOException {
+  public void initMocksAndWorkflowServiceAndWorkerAndWork() throws Exception {
     MockitoAnnotations.initMocks(this);
 
     options = PipelineOptionsFactory.as(DataflowWorkerHarnessOptions.class);
@@ -320,7 +318,7 @@ private static Counter<?> makeCounter(int i) {
   }
 
   private static Metric<?> makeMetric(int i) {
-    return new DoubleMetric(String.valueOf(i), (double) i);
+    return new DoubleMetric(String.valueOf(i), i);
   }
 
   private void setUpMetrics(int n) {
@@ -336,7 +334,7 @@ private void setUpProgress(ApproximateProgress progress) {
 
   private WorkItemServiceState generateServiceState(long leaseExpirationTimestamp,
       int progressReportIntervalMs, Position suggestedStopPosition,
-      long nextReportIndex) throws IOException {
+      long nextReportIndex) throws Exception {
     WorkItemServiceState responseState = new WorkItemServiceState();
     responseState.setFactory(Transport.getJsonFactory());
     responseState.setLeaseExpireTime(toCloudTime(new Instant(leaseExpirationTimestamp)));

From b507dc5061b1126622eb86ffff384f8a980b8cec Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 5 Aug 2015 16:09:12 -0700
Subject: [PATCH 0897/1541] Require getEncodingId() for anonymous CustomCoder
 subclasses

----Release Notes----
Anonymous CustomCoder subclasses now require a getEncodingId() override.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99968606
---
 .../dataflow/sdk/coders/CustomCoder.java      |  20 +++-
 .../sdk/runners/DirectPipelineRunner.java     |   3 +
 .../dataflow/sdk/transforms/Combine.java      | 107 ++++++++++--------
 .../dataflow/sdk/coders/CustomCoderTest.java  |  51 +++++++++
 .../dataflow/sdk/coders/DefaultCoderTest.java |  11 +-
 .../dataflow/sdk/transforms/CombineTest.java  |   5 +
 6 files changed, 144 insertions(+), 53 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
index 1bd8aa1dea754..42434004eb3c3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
@@ -34,8 +34,11 @@
 
 /**
  * An abstract base class for writing {@link Coder}s that encodes itself via java
- * serialization.  Subclasses only need to implement the {@link Coder#encode}
- * and {@link Coder#decode} methods.
+ * serialization.
+ *
+ * <p>To complete an implementation, subclasses must implement {@link Coder#encode}
+ * and {@link Coder#decode} methods. Anonymous subclasses must furthermore override
+ * {@link #getEncodingId}.
  *
  * <p>Not to be confused with {@link SerializableCoder} that encodes objects that implement the
  * {@link Serializable} interface.
@@ -58,6 +61,7 @@ public static CustomCoder<?> of(
       // updating constructed values, so it would throw an exception, causing
       // deserialization to fail.
       @JsonProperty(value = "@type", required = false) String typeId,
+      @JsonProperty(value = "encoding_id", required = false) String encodingId,
       @JsonProperty("type") String type,
       @JsonProperty("serialized_coder") String serializedCoder) {
     return (CustomCoder<?>) SerializableUtils.deserializeFromByteArray(
@@ -97,6 +101,18 @@ public void verifyDeterministic() throws NonDeterministicException {
         + " or they are presumed nondeterministic.");
   }
 
+  @Override
+  public String getEncodingId() {
+    if (getClass().isAnonymousClass()) {
+      throw new UnsupportedOperationException(
+          String.format("Anonymous CustomCoder subclass %s must override getEncodingId()."
+              + " Otherwise, convert to a named class and getEncodingId() will be automatically"
+              + " generated from the fully qualified class name.",
+              getClass()));
+    }
+    return getClass().getCanonicalName();
+  }
+
   // This coder inherits isRegisterByteSizeObserverCheap,
   // getEncodedElementByteSize and registerByteSizeObserver
   // from StandardCoder. Override if we can do better.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index a385dca35ce69..297a87e5d2edf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -292,6 +292,9 @@ public TestCombineDoFn(
       this.accumCoder = accumCoder;
       this.testSerializability = testSerializability;
       this.rand = rand;
+
+      // Check that this does not crash, specifically to catch anonymous CustomCoder subclasses.
+      this.accumCoder.getEncodingId();
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 2e7d35db13d6a..86fb9bbc74d86 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -525,7 +525,7 @@ public CombineFn<InputT, AccumT, OutputT> forKey(K key, Coder<K> keyCoder) {
    * easily expressed as binary operations.
    */
   public abstract static class BinaryCombineFn<V> extends
-      CombineFn<V, BinaryCombineFn.Holder<V>, V> {
+      CombineFn<V, Holder<V>, V> {
 
     /**
      * Applies the binary operation to the two operands, returning the result.
@@ -579,64 +579,79 @@ public V extractOutput(Holder<V> accumulator) {
     }
 
     @Override
-    public Coder<Holder<V>> getAccumulatorCoder(CoderRegistry registry, final Coder<V> inputCoder) {
-      return new CustomCoder<Holder<V>>() {
+    public Coder<Holder<V>> getAccumulatorCoder(CoderRegistry registry, Coder<V> inputCoder) {
+      return new HolderCoder<>(inputCoder);
+    }
 
-        @Override
-        public List<Coder<?>> getCoderArguments() {
-          return Arrays.<Coder<?>>asList(inputCoder);
-        }
+    @Override
+    public Coder<V> getDefaultOutputCoder(CoderRegistry registry, Coder<V> inputCoder) {
+      return inputCoder;
+    }
 
-        @Override
-        public void encode(Holder<V> accumulator, OutputStream outStream, Context context)
-            throws CoderException, IOException {
-          if (accumulator.present) {
-            outStream.write(1);
-            inputCoder.encode(accumulator.value, outStream, context);
-          } else {
-            outStream.write(0);
-          }
-        }
+  }
 
-        @Override
-        public Holder<V> decode(InputStream inStream, Context context)
-            throws CoderException, IOException {
-          if (inStream.read() == 1) {
-            return new Holder<>(inputCoder.decode(inStream, context));
-          } else {
-            return new Holder<>();
-          }
-        }
+  /**
+   * Holds a single value value of type {@code V} which may or may not be present.
+   *
+   * <p>Used only as a private accumulator class. The type appears in public interfaces, but from
+   * a public perspective, it has no accessible members.
+   */
+  public static class Holder<V> {
+    private V value;
+    private boolean present;
+    private Holder() { }
+    private Holder(V value) {
+      set(value);
+    }
 
-        @Override
-        public void verifyDeterministic() throws NonDeterministicException {
-          inputCoder.verifyDeterministic();
-        }
-      };
+    private void set(V value) {
+      this.present = true;
+      this.value = value;
+    }
+  }
+
+  /**
+   * A {@link Coder} for a {@link Holder}.
+   */
+  private static class HolderCoder<V> extends CustomCoder<Holder<V>> {
+
+    private Coder<V> valueCoder;
+
+    public HolderCoder(Coder<V> valueCoder) {
+      this.valueCoder = valueCoder;
     }
 
     @Override
-    public Coder<V> getDefaultOutputCoder(CoderRegistry registry, Coder<V> inputCoder) {
-      return inputCoder;
+    public List<Coder<?>> getCoderArguments() {
+      return Arrays.<Coder<?>>asList(valueCoder);
     }
 
-    /**
-     * Holds a single value value of type {@code V} which may or may not be present.
-     */
-    private static class Holder<V> {
-      private V value;
-      private boolean present;
-      private Holder() { }
-      private Holder(V value) {
-        set(value);
+    @Override
+    public void encode(Holder<V> accumulator, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+      if (accumulator.present) {
+        outStream.write(1);
+        valueCoder.encode(accumulator.value, outStream, context);
+      } else {
+        outStream.write(0);
       }
+    }
 
-      private void set(V value) {
-        this.present = true;
-        this.value = value;
+    @Override
+    public Holder<V> decode(InputStream inStream, Context context)
+        throws CoderException, IOException {
+      if (inStream.read() == 1) {
+        return new Holder<>(valueCoder.decode(inStream, context));
+      } else {
+        return new Holder<>();
       }
     }
-  }
+
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException {
+      valueCoder.verifyDeterministic();
+    }
+  };
 
   /**
    * An abstract subclass of {@link CombineFn} for implementing combiners that are more
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java
index c648c7216fc8a..b58119916806b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java
@@ -22,7 +22,9 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 
 import org.junit.Assert;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -37,6 +39,9 @@
 @SuppressWarnings("serial")
 public class CustomCoderTest {
 
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
   private static class MyCustomCoder extends CustomCoder<KV<String, Long>> {
     private final String key;
 
@@ -82,4 +87,50 @@ public void testEncodeDecode() throws Exception {
   public void testEncodable() throws Exception {
     SerializableUtils.ensureSerializable(new MyCustomCoder("key"));
   }
+
+  @Test
+  public void testEncodingId() throws Exception {
+    CoderProperties.coderHasEncodingId(new MyCustomCoder("foo"),
+        MyCustomCoder.class.getCanonicalName());
+  }
+
+  @Test
+  public void testAnonymousEncodingIdError() throws Exception {
+    thrown.expect(UnsupportedOperationException.class);
+    thrown.expectMessage("Anonymous CustomCoder subclass");
+    thrown.expectMessage("must override getEncodingId()");
+    new CustomCoder<Integer>() {
+
+      @Override
+      public void encode(Integer kv, OutputStream out, Context context) {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public Integer decode(InputStream inStream, Context context) {
+        throw new UnsupportedOperationException();
+      }
+    }.getEncodingId();
+  }
+
+  @Test
+  public void testAnonymousEncodingIdOk() throws Exception {
+    new CustomCoder<Integer>() {
+
+      @Override
+      public void encode(Integer kv, OutputStream out, Context context) {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public Integer decode(InputStream inStream, Context context) {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public String getEncodingId() {
+        return "A user must specify this. It can contain any character, including these: !$#%$@.";
+      }
+    }.getEncodingId();
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
index b0105c1ad0b33..aabae8aaef223 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
@@ -61,23 +61,23 @@ private static class SerializableBase implements Serializable {
   private static class SerializableRecord extends SerializableBase {
   }
 
-  @DefaultCoder(CustomCoder.class)
+  @DefaultCoder(CustomSerializableCoder.class)
   private static class CustomRecord extends SerializableBase {
   }
 
   private static class Unknown {
   }
 
-  private static class CustomCoder extends SerializableCoder<CustomRecord> {
+  private static class CustomSerializableCoder extends SerializableCoder<CustomRecord> {
     // Extending SerializableCoder isn't trivial, but it can be done.
     @SuppressWarnings("unchecked")
     public static <T extends Serializable> SerializableCoder<T> of(Class<T> recordType) {
        Preconditions.checkArgument(
            CustomRecord.class.isAssignableFrom(recordType));
-       return (SerializableCoder<T>) new CustomCoder();
+       return (SerializableCoder<T>) new CustomSerializableCoder();
     }
 
-    protected CustomCoder() {
+    protected CustomSerializableCoder() {
       super(CustomRecord.class);
     }
   }
@@ -89,7 +89,8 @@ public void testDefaultCoderClasses() throws Exception {
         instanceOf(SerializableCoder.class));
     assertThat(registry.getDefaultCoder(SerializableRecord.class),
         instanceOf(SerializableCoder.class));
-    assertThat(registry.getDefaultCoder(CustomRecord.class), instanceOf(CustomCoder.class));
+    assertThat(registry.getDefaultCoder(CustomRecord.class),
+        instanceOf(CustomSerializableCoder.class));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index d633979e77517..87bf64790bc6e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -738,6 +738,11 @@ public Accumulator decode(InputStream inStream, Coder.Context context)
             throws CoderException, IOException {
           return new Accumulator(StringUtf8Coder.of().decode(inStream, context));
         }
+
+        @Override
+        public String getEncodingId() {
+          return "CombineTest.TestKeyedCombineFn.getAccumulatorCoder()";
+        }
       };
     }
 

From e575a4ea74cddc0c24c5304f3def503272bee1f9 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Wed, 5 Aug 2015 16:13:22 -0700
Subject: [PATCH 0898/1541] Switch Readers to be abstract classes

----Release Notes----
- Source.Reader, BoundedSource.BoundedReader, UnboundedSource.UnboundedReader are now abstract classes,
and AbstractBoundedReader has been merged into BoundedReader.
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99969122
---
 .../io/BoundedReadFromUnboundedSource.java    |  2 +-
 .../cloud/dataflow/sdk/io/BoundedSource.java  | 40 +++++++++----------
 .../sdk/io/ByteOffsetBasedSource.java         |  2 +-
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |  2 +-
 .../dataflow/sdk/io/FileBasedSource.java      |  2 +-
 .../google/cloud/dataflow/sdk/io/Source.java  | 29 ++++----------
 .../dataflow/sdk/io/UnboundedSource.java      | 14 +++----
 .../BasicSerializableSourceFormatTest.java    |  4 +-
 .../sdk/runners/dataflow/CountingSource.java  |  2 +-
 9 files changed, 41 insertions(+), 56 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java
index caed7582f5fd9..4cf13794c6f23 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java
@@ -186,7 +186,7 @@ public BoundedReader<ValueWithRecordId<T>> createReader(PipelineOptions options)
       return new Reader(source.createReader(options, null));
     }
 
-    private class Reader extends AbstractBoundedReader<ValueWithRecordId<T>> {
+    private class Reader extends BoundedReader<ValueWithRecordId<T>> {
       private long recordsRead = 0L;
       private Instant endTime = Instant.now().plus(maxReadTime);
       private UnboundedSource.UnboundedReader<T> reader;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
index 2aedc4e39a337..71b1308912863 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
@@ -18,9 +18,13 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+
+import org.joda.time.Instant;
 
 import java.io.IOException;
 import java.util.List;
+import java.util.NoSuchElementException;
 
 /**
  * A {@link Source} that reads a finite amount of input and, because of that, supports
@@ -97,7 +101,7 @@ public abstract List<? extends BoundedSource<T>> splitIntoBundles(
    * at <i>dataflow-feedback@google.com</i>.
    */
   @Experimental(Experimental.Kind.SOURCE_SINK)
-  public interface BoundedReader<T> extends Source.Reader<T> {
+  public abstract static class BoundedReader<T> extends Source.Reader<T> {
     /**
      * Returns a value in [0, 1] representing approximately what fraction of the source
      * ({@link #getCurrentSource}) this reader has read so far.
@@ -108,13 +112,18 @@ public interface BoundedReader<T> extends Source.Reader<T> {
      *   <li>Should return 1 after a {@link #start} or {@link #advance} call that returns false.
      *   <li>The returned values should be non-decreasing (though they don't have to be unique).
      * </ul>
+     *
+     * <p> By default, returns null to indicate that this cannot be estimated.
+     *
      * @return A value in [0, 1] representing the fraction of this reader's current input
      *   read so far, or {@code null} if such an estimate is not available.
      */
-    Double getFractionConsumed();
+    public Double getFractionConsumed() {
+      return null;
+    }
 
     @Override
-    BoundedSource<T> getCurrentSource();
+    public abstract BoundedSource<T> getCurrentSource();
 
     /**
      * Tells the reader to narrow the range of the input it's going to read and give up
@@ -167,28 +176,19 @@ public interface BoundedReader<T> extends Source.Reader<T> {
      *
      * <p>{@link com.google.cloud.dataflow.sdk.io.range.RangeTracker} makes it easy to implement
      * this method safely and correctly.
+     *
+     * <p> By default, returns null to indicate that splitting is not possible.
      */
-    BoundedSource<T> splitAtFraction(double fraction);
-  }
-
-  /**
-   * A base class implementing some optional methods of {@link BoundedReader} in a default way:
-   * <ul>
-   *   <li>Progress estimation ({@link #getFractionConsumed}) is not supported.
-   *   <li>Dynamic splitting ({@link #splitAtFraction}) is not supported.
-   * </ul>
-   * @param <T>
-   */
-  public abstract static class AbstractBoundedReader<T>
-      extends AbstractReader<T> implements BoundedReader<T> {
-    @Override
-    public Double getFractionConsumed() {
+    public BoundedSource<T> splitAtFraction(double fraction) {
       return null;
     }
 
+    /**
+     * By default, returns the minimum possible timestamp.
+     */
     @Override
-    public BoundedSource<T> splitAtFraction(double fraction) {
-      return null;
+    public Instant getCurrentTimestamp() throws NoSuchElementException {
+      return BoundedWindow.TIMESTAMP_MIN_VALUE;
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
index f15bff30de26d..b287e77a4cfd4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
@@ -157,7 +157,7 @@ public String toString() {
    * A {@link Source.Reader} that implements code common
    * to readers of all {@link ByteOffsetBasedSource}s.
    */
-  public abstract static class ByteOffsetBasedReader<T> extends AbstractBoundedReader<T> {
+  public abstract static class ByteOffsetBasedReader<T> extends BoundedReader<T> {
     private static final Logger LOG = LoggerFactory.getLogger(ByteOffsetBasedReader.class);
 
     private ByteOffsetBasedSource<T> source;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 204f82bee4c97..418d16a72f5c7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -672,7 +672,7 @@ public DatastoreWriteResult(long recordsWritten) {
    * <p> Timestamped records are currently not supported.
    * All records implicitly have the timestamp of {@code BoundedWindow.TIMESTAMP_MIN_VALUE}.
    */
-  public static class DatastoreReader extends BoundedSource.AbstractBoundedReader<Entity> {
+  public static class DatastoreReader extends BoundedSource.BoundedReader<Entity> {
     private final Source source;
 
     /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index 557e735a8c91e..1b8be1b0eacef 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -511,7 +511,7 @@ public void close() throws IOException {
   }
 
   // An internal Reader implementation that concatenates a sequence of FileBasedReaders.
-  private class FilePatternReader extends AbstractBoundedReader<T> {
+  private class FilePatternReader extends BoundedReader<T> {
     private final FileBasedSource<T> source;
     private final List<FileBasedReader<T>> fileReaders;
     final ListIterator<FileBasedReader<T>> fileReadersIterator;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
index 69098f1d531c4..158af9f253990 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
@@ -18,7 +18,6 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 
 import org.joda.time.Instant;
 
@@ -119,7 +118,7 @@ public abstract class Source<T> implements Serializable {
    * <p>
    * Note: this interface is a work-in-progress and may change.
    */
-  public interface Reader<T> extends AutoCloseable {
+  public abstract static class Reader<T> implements AutoCloseable {
     /**
      * Initializes the reader and advances the reader to the first record.
      *
@@ -129,14 +128,14 @@ public interface Reader<T> extends AutoCloseable {
      *
      * @return {@code true} if a record was read, {@code false} if there is no more input available.
      */
-    public boolean start() throws IOException;
+    public abstract boolean start() throws IOException;
 
     /**
      * Advances the reader to the next valid record.
      *
      * @return {@code true} if a record was read, {@code false} if there is no more input available.
      */
-    public boolean advance() throws IOException;
+    public abstract boolean advance() throws IOException;
 
     /**
      * Returns the value of the data item that was read by the last {@link #start} or
@@ -150,7 +149,7 @@ public interface Reader<T> extends AutoCloseable {
      *         {@link #start} or {@link #advance} wasn't called, or if the last {@link #start} or
      *         {@link #advance} returned {@code false}.
      */
-    public T getCurrent() throws NoSuchElementException;
+    public abstract T getCurrent() throws NoSuchElementException;
 
     /**
      * Returns the timestamp associated with the current data item.
@@ -165,13 +164,13 @@ public interface Reader<T> extends AutoCloseable {
      *         {@link #start} or {@link #advance} wasn't called, or if the last {@link #start} or
      *         {@link #advance} returned {@code false}.
      */
-    public Instant getCurrentTimestamp() throws NoSuchElementException;
+    public abstract Instant getCurrentTimestamp() throws NoSuchElementException;
 
     /**
      * Closes the reader. The reader cannot be used after this method is called.
      */
     @Override
-    public void close() throws IOException;
+    public abstract void close() throws IOException;
 
     /**
      * Returns a {@code Source} describing the same input that this {@code Reader} reads
@@ -180,20 +179,6 @@ public interface Reader<T> extends AutoCloseable {
      * A reader created from the result of {@code getCurrentSource}, if consumed, MUST
      * return the same data items as the current reader.
      */
-    public Source<T> getCurrentSource();
-  }
-
-  /**
-   * A base class implementing optional methods of {@link Reader} in a default way:
-   * <ul>
-   *   <li>All values have the timestamp of {@code BoundedWindow.TIMESTAMP_MIN_VALUE}.
-   * </ul>
-   * @param <T>
-   */
-  public abstract static class AbstractReader<T> implements Reader<T> {
-    @Override
-    public Instant getCurrentTimestamp() throws NoSuchElementException {
-      return BoundedWindow.TIMESTAMP_MIN_VALUE;
-    }
+    public abstract Source<T> getCurrentSource();
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
index f99ace1a5ca56..d2beeab90ff1e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
@@ -122,7 +122,7 @@ public interface CheckpointMark {
    * <p> A given {@code UnboundedReader} object will only be accessed by a single thread at once.
    */
   @Experimental(Experimental.Kind.SOURCE_SINK)
-  public interface UnboundedReader<OutputT> extends Source.Reader<OutputT> {
+  public abstract static class UnboundedReader<OutputT> extends Source.Reader<OutputT> {
     /**
      * Initializes the reader and advances the reader to the first record.
      *
@@ -136,7 +136,7 @@ public interface UnboundedReader<OutputT> extends Source.Reader<OutputT> {
      * called again on the same {@code UnboundedReader} object; it will only be called again when a
      * new reader object is constructed for the same source, e.g. on recovery.
      */
-    boolean start() throws IOException;
+    public abstract boolean start() throws IOException;
 
     /**
      * Advances the reader to the next valid record.
@@ -145,7 +145,7 @@ public interface UnboundedReader<OutputT> extends Source.Reader<OutputT> {
      * available. Future calls to {@link #advance} may return {@code true} once more data is
      * available.
      */
-    boolean advance() throws IOException;
+    public abstract boolean advance() throws IOException;
 
     /**
      * Returns a unique identifier for the current record.  This should be the same for each
@@ -162,7 +162,7 @@ public interface UnboundedReader<OutputT> extends Source.Reader<OutputT> {
      *         {@link #start} or {@link #advance} wasn't called, or if the last {@link #start} or
      *         {@link #advance} returned {@code false}.
      */
-    byte[] getCurrentRecordId() throws NoSuchElementException;
+    public abstract byte[] getCurrentRecordId() throws NoSuchElementException;
 
     /**
      * Returns a lower bound on timestamps of future elements read by this reader.
@@ -186,7 +186,7 @@ public interface UnboundedReader<OutputT> extends Source.Reader<OutputT> {
      * <p> May be called after {@link #advance} or {@link #start} has returned false, but not before
      * {@link #start} has been called.
      */
-    Instant getWatermark();
+    public abstract Instant getWatermark();
 
     /**
      * Returns a {@link CheckpointMark} representing the progress of this {@code UnboundedReader}.
@@ -201,13 +201,13 @@ public interface UnboundedReader<OutputT> extends Source.Reader<OutputT> {
      * <p> May be called after {@link #advance} or {@link #start} has returned false, but not before
      * {@link #start} has been called.
      */
-    CheckpointMark getCheckpointMark();
+    public abstract CheckpointMark getCheckpointMark();
 
     /**
      * Returns the {@link UnboundedSource} that created this reader.  This will not change over the
      * life of the reader.
      */
     @Override
-    UnboundedSource<OutputT, ?> getCurrentSource();
+    public abstract UnboundedSource<OutputT, ?> getCurrentSource();
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 15b8ec5d1ca97..75d6df8874f3b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -174,7 +174,7 @@ public Coder<Integer> getDefaultOutputCoder() {
         return BigEndianIntegerCoder.of();
       }
 
-      private static class RangeReader extends AbstractBoundedReader<Integer> {
+      private static class RangeReader extends BoundedReader<Integer> {
         // To verify that BasicSerializableSourceFormat calls our methods according to protocol.
         enum State {
           UNSTARTED,
@@ -481,7 +481,7 @@ public void testSplittingProducedInvalidSource() throws Exception {
     performSplit(cloudSource, options);
   }
 
-  private static class FailingReader implements BoundedSource.BoundedReader<Integer> {
+  private static class FailingReader extends BoundedSource.BoundedReader<Integer> {
     private BoundedSource<Integer> source;
 
     private FailingReader(BoundedSource<Integer> source) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
index 0e914c572f458..2aa0596a0bc3d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
@@ -126,7 +126,7 @@ public boolean requiresDeduping() {
     return dedup;
   }
 
-  private class CountingSourceReader implements UnboundedReader<KV<Integer, Integer>> {
+  private class CountingSourceReader extends UnboundedReader<KV<Integer, Integer>> {
     private int current;
     private boolean done = false;
 

From 09b943a0741edbd66fd34c5f2d294d683f25b922 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Wed, 5 Aug 2015 17:52:47 -0700
Subject: [PATCH 0899/1541] Do small amount of backoff in worker for
 UnboundedSources

This reduces CPU used when the source does not have a high rate of input

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99978547
---
 .../dataflow/BasicSerializableSourceFormat.java  | 16 +++++++++++++++-
 .../sdk/runners/dataflow/CountingSource.java     |  5 -----
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index a0128a2efee75..c819fe75d7dfc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -28,6 +28,7 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 import static com.google.cloud.dataflow.sdk.util.Structs.getStrings;
 
+import com.google.api.client.util.BackOff;
 import com.google.api.client.util.Base64;
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.api.services.dataflow.model.DerivedSource;
@@ -51,6 +52,7 @@
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
@@ -595,7 +597,19 @@ public boolean hasNext() throws IOException {
           || Instant.now().isAfter(endTime)) {
         return false;
       }
-      return iteratorAdapter.hasNext();
+
+      // Backoff starting at 100ms, for approximately 1s total. 100+150+225+337.5~=1000.
+      BackOff backoff = new AttemptBoundedExponentialBackOff(5, 100);
+      while (!iteratorAdapter.hasNext()) {
+        long nextBackoff = backoff.nextBackOffMillis();
+        if (nextBackoff == BackOff.STOP) {
+          return false;
+        }
+        try {
+          Thread.sleep(nextBackoff);
+        } catch (InterruptedException e) {}
+      }
+      return true;
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
index 2aa0596a0bc3d..759aefe07ae03 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
@@ -142,11 +142,6 @@ public boolean start() {
     @Override
     public boolean advance() {
       if (current < numMessagesPerShard - 1) {
-        // Occasionally return false to break apart bundles
-        if (ThreadLocalRandom.current().nextInt(10) == 0) {
-          return false;
-        }
-
         // If testing dedup, occasionally insert a duplicate value;
         if (dedup && ThreadLocalRandom.current().nextInt(5) == 0) {
           return true;

From 069215c378d9ed432702193aedf3568c5a536771 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 5 Aug 2015 17:53:48 -0700
Subject: [PATCH 0900/1541] New semantics for empty panes

ON_TIME panes are only received if there is an associated AfterWatermark
trigger. If there is an AfterWatermark trigger and it produces the
ON_TIME pane, it will fire even if the pane is empty (has no data).

Final panes (those with isLast = true) will only be fired when empty if
explicitly requested, by specifying ClosingBehavior.FIRE_ALWAYS when
specifying the allowed lateness.

----Release Notes----

Limited when empty-panes are produced to places where it has been
explicitly requested.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99978628
---
 .../sdk/transforms/windowing/PaneInfo.java    |   2 +-
 .../sdk/transforms/windowing/Window.java      |  79 ++++++-
 .../sdk/util/BatchTimerInternals.java         |   5 +-
 .../dataflow/sdk/util/PaneInfoTracker.java    |  30 ++-
 .../cloud/dataflow/sdk/util/ReduceFn.java     |   6 +
 .../dataflow/sdk/util/ReduceFnRunner.java     | 193 ++++++++++++------
 .../dataflow/sdk/util/SystemReduceFn.java     |  17 +-
 .../sdk/util/TriggerContextFactory.java       |  40 +++-
 .../dataflow/sdk/util/WatermarkHold.java      |  84 ++++++--
 .../dataflow/sdk/util/WindowingStrategy.java  |  34 ++-
 .../worker/StreamingDataflowWorkerTest.java   |  21 +-
 .../dataflow/sdk/transforms/CombineTest.java  |   3 +-
 .../transforms/windowing/AfterEachTest.java   |   1 -
 .../windowing/DefaultTriggerTest.java         |   1 -
 .../sdk/util/TriggerExecutorTest.java         | 102 ++++++++-
 .../dataflow/sdk/util/TriggerTester.java      |  20 +-
 16 files changed, 494 insertions(+), 144 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
index e3e7f7d594129..e8d337e7c06df 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
@@ -223,7 +223,7 @@ public String toString() {
         .add("isLast", isLast ? true : null)
         .add("timing", timing)
         .add("index", index)
-        .add("onTimeIndex", nonSpeculativeIndex > 0 ? nonSpeculativeIndex : null)
+        .add("onTimeIndex", nonSpeculativeIndex != -1 ? nonSpeculativeIndex : null)
         .toString();
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 3308d000b3d44..d67c3c0b5ce6c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -145,6 +145,25 @@
  * <p> See {@link Trigger} for details on the available triggers.
  */
 public class Window {
+
+  /**
+   * Specifies the conditions under which a final pane will be created when a window is permanently
+   * closed.
+   */
+  public enum ClosingBehavior {
+    /**
+     * Always fire the last pane. Even if there is no new data since the previous firing, an element
+     * with {@link PaneInfo#isLast()} {@code true} will be produced.
+     */
+    FIRE_ALWAYS,
+    /**
+     * Only fire the last pane if there is new data since the previous firing.
+     *
+     * <p> This is the default behavior.
+     */
+    FIRE_IF_NON_EMPTY;
+  }
+
   /**
    * Creates a {@code Window} {@code PTransform} with the given name.
    *
@@ -314,11 +333,29 @@ public <T> Bound<T> accumulatingFiredPanes() {
      * <p>This value also determines how long state will be kept around for old windows.
      * Once no elements will be added to a window (because this duration has passed) any state
      * associated with the window will be cleaned up.
+     *
+     * <p> Depending on the trigger this may not produce a pane with {@link PaneInfo#isLast}. See
+     * {@link ClosingBehavior#FIRE_IF_NON_EMPTY} for more details.
      */
     @Experimental(Kind.TRIGGER)
     public <T> Bound<T> withAllowedLateness(Duration allowedLateness) {
       return new Bound<T>(name).withAllowedLateness(allowedLateness);
     }
+
+    /**
+     * Override the amount of lateness allowed for data elements in the pipeline. Like
+     * the other properties on this {@link Window} operation, this will be applied at
+     * the next {@link GroupByKey}. Any elements that are later than this as decided by
+     * the system-maintained watermark will be dropped.
+     *
+     * <p>This value also determines how long state will be kept around for old windows.
+     * Once no elements will be added to a window (because this duration has passed) any state
+     * associated with the window will be cleaned up.
+     */
+    @Experimental(Kind.TRIGGER)
+    public <T> Bound<T> withAllowedLateness(Duration allowedLateness, ClosingBehavior behavior) {
+      return new Bound<T>(name).withAllowedLateness(allowedLateness, behavior);
+    }
   }
 
   /**
@@ -334,19 +371,22 @@ public static class Bound<T> extends PTransform<PCollection<T>, PCollection<T>>
     @Nullable private final Trigger<?> trigger;
     @Nullable private final AccumulationMode mode;
     @Nullable private final Duration allowedLateness;
+    @Nullable private final ClosingBehavior closingBehavior;
 
     private Bound(String name,
         @Nullable WindowFn<? super T, ?> windowFn, @Nullable Trigger<?> trigger,
-        @Nullable AccumulationMode mode, @Nullable Duration allowedLateness) {
+        @Nullable AccumulationMode mode, @Nullable Duration allowedLateness,
+        ClosingBehavior behavior) {
       super(name);
       this.windowFn = windowFn;
       this.trigger = trigger;
       this.mode = mode;
       this.allowedLateness = allowedLateness;
+      this.closingBehavior = behavior;
     }
 
     private Bound(String name) {
-      this(name, null, null, null, null);
+      this(name, null, null, null, null, null);
     }
 
     /**
@@ -363,7 +403,7 @@ private Bound<T> into(WindowFn<? super T, ?> windowFn) {
         throw new IllegalArgumentException("Window coders must be deterministic.", e);
       }
 
-      return new Bound<>(name, windowFn, trigger, mode, allowedLateness);
+      return new Bound<>(name, windowFn, trigger, mode, allowedLateness, closingBehavior);
     }
 
     /**
@@ -376,7 +416,7 @@ private Bound<T> into(WindowFn<? super T, ?> windowFn) {
      * explanation.
      */
     public Bound<T> named(String name) {
-      return new Bound<>(name, windowFn, trigger, mode, allowedLateness);
+      return new Bound<>(name, windowFn, trigger, mode, allowedLateness, closingBehavior);
     }
 
     /**
@@ -392,7 +432,7 @@ public Bound<T> named(String name) {
      */
     @Experimental(Kind.TRIGGER)
     public Bound<T> triggering(Trigger<?> trigger) {
-      return new Bound<T>(name, windowFn, trigger, mode, allowedLateness);
+      return new Bound<T>(name, windowFn, trigger, mode, allowedLateness, closingBehavior);
     }
 
    /**
@@ -405,7 +445,8 @@ public Bound<T> triggering(Trigger<?> trigger) {
     @Experimental(Kind.TRIGGER)
    public Bound<T> discardingFiredPanes() {
      return new Bound<T>(name,
-         windowFn, trigger, AccumulationMode.DISCARDING_FIRED_PANES, allowedLateness);
+         windowFn, trigger, AccumulationMode.DISCARDING_FIRED_PANES,
+         allowedLateness, closingBehavior);
    }
 
    /**
@@ -418,7 +459,8 @@ public Bound<T> discardingFiredPanes() {
    @Experimental(Kind.TRIGGER)
    public Bound<T> accumulatingFiredPanes() {
      return new Bound<T>(name,
-         windowFn, trigger, AccumulationMode.ACCUMULATING_FIRED_PANES, allowedLateness);
+         windowFn, trigger, AccumulationMode.ACCUMULATING_FIRED_PANES,
+         allowedLateness, closingBehavior);
    }
 
     /**
@@ -430,10 +472,28 @@ public Bound<T> accumulatingFiredPanes() {
      * <p>This value also determines how long state will be kept around for old windows.
      * Once no elements will be added to a window (because this duration has passed) any state
      * associated with the window will be cleaned up.
+     *
+     * <p> Depending on the trigger this may not produce a pane with {@link PaneInfo#isLast}. See
+     * {@link ClosingBehavior#FIRE_IF_NON_EMPTY} for more details.
      */
     @Experimental(Kind.TRIGGER)
     public Bound<T> withAllowedLateness(Duration allowedLateness) {
-      return new Bound<T>(name, windowFn, trigger, mode, allowedLateness);
+      return new Bound<T>(name, windowFn, trigger, mode, allowedLateness, closingBehavior);
+    }
+
+    /**
+     * Override the amount of lateness allowed for data elements in the pipeline. Like
+     * the other properties on this {@link Window} operation, this will be applied at
+     * the next {@link GroupByKey}. Any elements that are later than this as decided by
+     * the system-maintained watermark will be dropped.
+     *
+     * <p>This value also determines how long state will be kept around for old windows.
+     * Once no elements will be added to a window (because this duration has passed) any state
+     * associated with the window will be cleaned up.
+     */
+    @Experimental(Kind.TRIGGER)
+    public Bound<T> withAllowedLateness(Duration allowedLateness, ClosingBehavior behavior) {
+      return new Bound<T>(name, windowFn, trigger, mode, allowedLateness, behavior);
     }
 
     private WindowingStrategy<?, ?> getOutputStrategy(WindowingStrategy<?, ?> inputStrategy) {
@@ -450,6 +510,9 @@ public Bound<T> withAllowedLateness(Duration allowedLateness) {
       if (allowedLateness != null) {
         result = result.withAllowedLateness(allowedLateness);
       }
+      if (closingBehavior != null) {
+        result = result.withClosingBehavior(closingBehavior);
+      }
       return result;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java
index 38b848e752a6d..d70900d3877ce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java
@@ -58,6 +58,7 @@ public void setTimer(TimerData timer) {
 
   @Override
   public void deleteTimer(TimerData timer) {
+    existingTimers.remove(timer);
     queue(timer.getDomain()).remove(timer);
   }
 
@@ -80,13 +81,13 @@ public String toString() {
   }
 
   public void advanceWatermark(ReduceFnRunner<?, ?, ?, ?> runner, Instant newWatermark) {
-    advance(runner, newWatermark, TimeDomain.EVENT_TIME);
     this.watermarkTime = newWatermark;
+    advance(runner, newWatermark, TimeDomain.EVENT_TIME);
   }
 
   public void advanceProcessingTime(ReduceFnRunner<?, ?, ?, ?> runner, Instant newProcessingTime) {
-    advance(runner, newProcessingTime, TimeDomain.PROCESSING_TIME);
     this.processingTime = newProcessingTime;
+    advance(runner, newProcessingTime, TimeDomain.PROCESSING_TIME);
   }
 
   private void advance(ReduceFnRunner<?, ?, ?, ?> runner, Instant newTime, TimeDomain domain) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
index 09ae97e2b8745..3b7858c1e71bc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
@@ -46,23 +46,18 @@ public void clear(StateContext state) {
     state.access(PANE_INFO_TAG).clear();
   }
 
-  public StateContents<PaneInfo> getNextPaneInfo(
-      ReduceFn<?, ?, ?, ?>.Context context, final boolean isFinal) {
+
+  public StateContents<PaneInfo> getNextPaneInfo(ReduceFn<?, ?, ?, ?>.Context context,
+      final boolean isForWatermarkTrigger, final boolean isFinal) {
     final StateContents<PaneInfo> previousPaneFuture =
         context.state().access(PaneInfoTracker.PANE_INFO_TAG).get();
     final Instant endOfWindow = context.window().maxTimestamp();
-    final StateContext state = context.state();
 
     return new StateContents<PaneInfo>() {
-      private PaneInfo result = null;
-
       @Override
       public PaneInfo read() {
-        if (result == null) {
-          PaneInfo previousPane = previousPaneFuture.read();
-          result = describePane(endOfWindow, previousPane, isFinal);
-        }
-        return result;
+        PaneInfo previousPane = previousPaneFuture.read();
+        return describePane(endOfWindow, previousPane, isForWatermarkTrigger, isFinal);
       }
     };
   }
@@ -71,22 +66,21 @@ public void storeCurrentPaneInfo(ReduceFn<?, ?, ?, ?>.Context context, PaneInfo
     context.state().access(PANE_INFO_TAG).set(currentPane);
   }
 
-  private <W> PaneInfo describePane(Instant endOfWindow, PaneInfo previousPane, boolean isFinal) {
+  private <W> PaneInfo describePane(Instant endOfWindow, PaneInfo prevPane,
+      boolean isForWatermarkTrigger, boolean isFinal) {
     boolean isSpeculative = endOfWindow.isAfter(timerInternals.currentWatermarkTime());
-    boolean isFirst = (previousPane == null);
+    boolean isFirst = (prevPane == null);
 
-    long index = isFirst ? 0 : previousPane.getIndex() + 1;
+    long index = isFirst ? 0 : prevPane.getIndex() + 1;
     long nonSpeculativeIndex;
     Timing timing;
     if (isSpeculative) {
       timing = Timing.EARLY;
       nonSpeculativeIndex = -1;
-    } else if (previousPane == null || previousPane.getTiming() == Timing.EARLY) {
-      timing = Timing.ON_TIME;
-      nonSpeculativeIndex = 0;
     } else {
-      timing = Timing.LATE;
-      nonSpeculativeIndex = previousPane.getNonSpeculativeIndex() + 1;
+      boolean firstNonSpeculative = prevPane == null || prevPane.getTiming() == Timing.EARLY;
+      timing = (isForWatermarkTrigger && firstNonSpeculative) ? Timing.ON_TIME : Timing.LATE;
+      nonSpeculativeIndex = firstNonSpeculative ? 0 : prevPane.getNonSpeculativeIndex() + 1;
     }
 
     return PaneInfo.createPane(isFirst, isFinal, timing, index, nonSpeculativeIndex);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
index 02a3fa4fa28c2..e39ecc54cf3f7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
@@ -180,6 +180,12 @@ public abstract class OnTriggerContext extends Context {
    */
   public abstract void onTrigger(OnTriggerContext c) throws Exception;
 
+  /**
+   * Called before {@link onTrigger} is invoked to provide an opportunity to prefetch any needed
+   * state.
+   */
+  public void prefetchOnTrigger(StateContext c) { }
+
   /**
    * Called to clear any persisted state that the {@link ReduceFn} may be holding. This will be
    * called when the windowing is closing and will receive no future interactions.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index 0a6fd59550c77..6d30e8ecb745c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -19,9 +19,13 @@
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
 import com.google.cloud.dataflow.sdk.util.ActiveWindowSet.MergeCallback;
 import com.google.cloud.dataflow.sdk.util.ReduceFnContextFactory.OnTriggerCallbacks;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
+import com.google.cloud.dataflow.sdk.util.TriggerRunner.Result;
+import com.google.cloud.dataflow.sdk.util.WatermarkHold.WatermarkInfo;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces.WindowNamespace;
@@ -34,11 +38,12 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 
+import javax.annotation.Nullable;
+
 /**
  * Manages the execution of a {@link ReduceFn} after a {@link GroupByKeyOnly} has partitioned the
  * {@link PCollection} by key.
@@ -161,7 +166,16 @@ public void processElement(WindowedValue<InputT> value) {
         continue;
       }
 
+      // If this is a new window
       if (activeWindows.add(window)) {
+        // Hold for possibly empty panes
+        if (timerInternals.currentWatermarkTime().isAfter(window.maxTimestamp())) {
+          watermarkHold.holdForFinal(context);
+        } else {
+          watermarkHold.holdForOnTime(context);
+        }
+
+        // And schedule cleanup
         scheduleCleanup(context);
       }
 
@@ -177,7 +191,7 @@ public void processElement(WindowedValue<InputT> value) {
 
       // Run the trigger and handle the result as appropriate
       try {
-        handleTriggerResult(context, triggerRunner.processValue(context));
+        handleTriggerResult(context, false, triggerRunner.processValue(context));
       } catch (Exception e) {
         Throwables.propagateIfPossible(e);
         throw new RuntimeException("Failed to run trigger", e);
@@ -204,27 +218,23 @@ public void persist() {
    */
   @Override
   public void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResultWindowNew) {
-    ReduceFn<K, InputT, OutputT, W>.OnMergeContext context =
+    ReduceFn<K, InputT, OutputT, W>.OnMergeContext resultContext =
         contextFactory.forMerge(mergedWindows, resultWindow);
 
     // Schedule state reads for trigger execution.
-    triggerRunner.prefetchForMerge(context.state());
+    triggerRunner.prefetchForMerge(resultContext.state());
 
     // Run the reduceFn to perform any needed merging.
     try {
-      reduceFn.onMerge(context);
+      reduceFn.onMerge(resultContext);
     } catch (Exception e) {
       throw wrapMaybeUserException(e);
     }
 
-    // Schedule cleanup if the window is new.
-    if (isResultWindowNew) {
-      scheduleCleanup(context);
-    }
-
     // Have the trigger merge state as needed, and handle the result.
+    Result triggerResult;
     try {
-      handleTriggerResult(context,  triggerRunner.onMerge(context));
+      triggerResult = triggerRunner.onMerge(resultContext);
     } catch (Exception e) {
       Throwables.propagateIfPossible(e);
       throw new RuntimeException("Failed to merge the triggers", e);
@@ -236,6 +246,7 @@ public void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResul
         try {
           ReduceFn<K, InputT, OutputT, W>.Context mergedContext = contextFactory.base(mergedWindow);
           cancelCleanup(mergedContext);
+          watermarkHold.releaseFinal(mergedContext);
           triggerRunner.clearEverything(mergedContext);
           paneInfo.clear(mergedContext.state());
         } catch (Exception e) {
@@ -244,6 +255,20 @@ public void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResul
         }
       }
     }
+
+    // Schedule cleanup if the window is new. Do this after cleaning up the old state in case one
+    // of them had a timer at the same point.
+    if (isResultWindowNew) {
+      if (timerInternals.currentWatermarkTime().isAfter(resultWindow.maxTimestamp())) {
+        watermarkHold.holdForFinal(resultContext);
+      } else {
+        watermarkHold.holdForOnTime(resultContext);
+      }
+      scheduleCleanup(resultContext);
+    }
+
+    // Handle the trigger result as appropriate.
+    handleTriggerResult(resultContext, false, triggerResult);
   }
 
   /**
@@ -258,12 +283,26 @@ public void onTimer(TimerData timer) {
     @SuppressWarnings("unchecked")
     WindowNamespace<W> windowNamespace = (WindowNamespace<W>) timer.getNamespace();
     W window = windowNamespace.getWindow();
+    ReduceFn<K, InputT, OutputT, W>.Context context = contextFactory.base(window);
 
-    if (TimeDomain.EVENT_TIME == timer.getDomain() && isCleanupTime(window, timer.getTimestamp())) {
-      // This may be a redundant cleanup (if we had lingering watermark timers after the cleanup
-      // time but it shouldn't hurt anything for us to process those again).
+    // If this timer firing is at the watermark, then it may cause a trigger firing of an
+    // AfterWatermark trigger.
+    boolean isAtWatermark = TimeDomain.EVENT_TIME == timer.getDomain()
+        && !timer.getTimestamp().isBefore(window.maxTimestamp());
+
+    if (TimeDomain.EVENT_TIME == timer.getDomain()
+        && (isCleanupTime(window, timer.getTimestamp())
+            || isCleanupTime(window, timerInternals.currentWatermarkTime()))) {
+      // If it looks like this was a watermark firing, see if the trigger tree was waiting for it.
+      // If it fires, then we know there was a pending AfterWatermark trigger.
+      if (isAtWatermark) {
+        TriggerRunner.Result timerResult = runTriggersForTimer(context, timer);
+        isAtWatermark = (timerResult != null && timerResult.isFire());
+      }
+
+      // Do the actual cleanup
       try {
-        doCleanup(windowNamespace.getWindow());
+        doCleanup(context, isAtWatermark);
       } catch (Exception e) {
         LOG.error("Exception while garbage collecting window {}", windowNamespace.getWindow(), e);
       }
@@ -273,28 +312,41 @@ public void onTimer(TimerData timer) {
         return;
       }
 
-      ReduceFn<K, InputT, OutputT, W>.Context context = contextFactory.base(window);
-
-      triggerRunner.prefetchForTimer(context.state());
-
-      // Skip timers for windows that were closed by triggers, but haven't expired yet.
-      if (triggerRunner.isClosed(context.state())) {
-        return;
+      TriggerRunner.Result timerResult = runTriggersForTimer(context, timer);
+      if (timerResult != null) {
+        handleTriggerResult(context, isAtWatermark, timerResult);
       }
 
-      try {
-        handleTriggerResult(context, triggerRunner.onTimer(context, timer));
-      } catch (Exception e) {
-        Throwables.propagateIfPossible(e);
-        throw new RuntimeException("Exception in onTimer for trigger", e);
+      // After this timer has fired, we're no longer on-time.
+      if (isAtWatermark) {
+        watermarkHold.releaseOnTime(context);
+        scheduleCleanup(context);
       }
     }
   }
 
-  /** Called when the cleanup timer has fired for the given window. */
-  private void doCleanup(W window) throws Exception {
-    ReduceFn<K, InputT, OutputT, W>.Context context = contextFactory.base(window);
+  @Nullable
+  private TriggerRunner.Result runTriggersForTimer(
+      ReduceFn<K, InputT, OutputT, W>.Context context, TimerData timer) {
+
+    triggerRunner.prefetchForTimer(context.state());
 
+    // Skip timers for windows that were closed by triggers, but haven't expired yet.
+    if (triggerRunner.isClosed(context.state())) {
+      return null;
+    }
+
+    try {
+      return triggerRunner.onTimer(context, timer);
+    } catch (Exception e) {
+      Throwables.propagateIfPossible(e);
+      throw new RuntimeException("Exception in onTimer for trigger", e);
+    }
+  }
+
+  /** Called when the cleanup timer has fired for the given window. */
+  private void doCleanup(
+      ReduceFn<K, InputT, OutputT, W>.Context context, boolean maybeAtWatermark) throws Exception {
     // If the window is active, fire a pane.
     if (!triggerRunner.isClosed(context.state())) {
       // Before we fire, make sure this window doesn't get merged away. If it does, the merging
@@ -316,17 +368,19 @@ private void doCleanup(W window) throws Exception {
       }
 
       // Run onTrigger to produce the actual final pane contents.
-      onTrigger(context, true /* isFinal */);
+      onTrigger(context, maybeAtWatermark, true /* isFinal */);
     }
 
     // Cleanup the associated state.
     reduceFn.clearState(context);
     triggerRunner.clearEverything(context);
     paneInfo.clear(context.state());
+    watermarkHold.releaseOnTime(context);
   }
 
   private void handleTriggerResult(
-      ReduceFn<K, InputT, OutputT, W>.Context context, TriggerRunner.Result result) {
+      ReduceFn<K, InputT, OutputT, W>.Context context,
+      boolean maybeAtWatermark, TriggerRunner.Result result) {
     // Unless the trigger is firing, there is nothing to do besides persisting the results.
     if (!result.isFire()) {
       result.persistFinishedSet(context.state());
@@ -350,7 +404,7 @@ private void handleTriggerResult(
     }
 
     // Run onTrigger to produce the actual pane contents.
-    onTrigger(context, result.isFinish());
+    onTrigger(context, maybeAtWatermark, result.isFinish());
 
     // Cleanup buffered data if appropriate
     if (shouldDiscardAfterFiring(result)) {
@@ -370,6 +424,7 @@ private void handleTriggerResult(
       try {
         triggerRunner.clearState(context);
         paneInfo.clear(context.state());
+        watermarkHold.releaseFinal(context);
       } catch (Exception e) {
         Throwables.propagateIfPossible(e);
         throw new RuntimeException("Exception while clearing trigger state", e);
@@ -380,26 +435,58 @@ private void handleTriggerResult(
     result.persistFinishedSet(context.state());
   }
 
+  public static <T> StateContents<T> stateContentsOf(final T value) {
+    return new StateContents<T>() {
+      @Override
+      public T read() {
+        return value;
+      }
+    };
+  }
+
   /**
    * Run the {@link ReduceFn#onTrigger} method and produce any necessary output.
    *
    * @param context the context for the pane to fire
+   * @param isAtWatermark true if this triggering is for an AfterWatermark trigger
    * @param isFinal true if this will be the last triggering processed
    */
-  private void onTrigger(final ReduceFn<K, InputT, OutputT, W>.Context context, boolean isFinal) {
-    // Make sure that we read the watermark along with any state needed to determine the output.
-    StateContents<Instant> timestampFuture = watermarkHold.extractAndRelease(context);
-
-    // Run the reduceFn, and buffer all the output in outputs.
-    final List<OutputT> outputs = new ArrayList<>();
-
-    StateContents<PaneInfo> paneFuture = paneInfo.getNextPaneInfo(context, isFinal);
+  private void onTrigger(final ReduceFn<K, InputT, OutputT, W>.Context context,
+      boolean isAtWatermark, boolean isFinal) {
+    StateContents<WatermarkInfo> outputTimestampFuture = watermarkHold.extractAndRelease(context);
+    StateContents<PaneInfo> paneFuture =
+        paneInfo.getNextPaneInfo(context, isAtWatermark, isFinal);
+
+    reduceFn.prefetchOnTrigger(context.state());
+
+    final PaneInfo pane = paneFuture.read();
+    final WatermarkInfo watermarkInfo = outputTimestampFuture.read();
+
+    boolean shouldOutput =
+        // The pane is non-empty
+        watermarkInfo.isNonEmpty()
+        // This is the final pane, and the user has asked for it even if its empty
+        || (isFinal && windowingStrategy.getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS)
+        // This is the on-time firing, and the user explicitly requested it.
+        || (isAtWatermark && pane.getTiming() == Timing.ON_TIME);
+
+    // If there is nothing to output, we're done.
+    if (!shouldOutput) {
+      return;
+    }
 
+    // Run reduceFn.onTrigger method.
+    final List<W> windows = Collections.singletonList(context.window());
     ReduceFn<K, InputT, OutputT, W>.OnTriggerContext triggerContext = contextFactory.forTrigger(
         context.window(), paneFuture, new OnTriggerCallbacks<OutputT>() {
           @Override
           public void output(OutputT toOutput) {
-            outputs.add(toOutput);
+            // We're going to output panes, so commit the (now used) PaneInfo.
+            paneInfo.storeCurrentPaneInfo(context, pane);
+
+            // Output the actual value.
+            windowingInternals.outputWindowedValue(
+                KV.of(key, toOutput), watermarkInfo.getOutputTimestamp(), windows, pane);
           }
     });
 
@@ -408,22 +495,6 @@ public void output(OutputT toOutput) {
     } catch (Exception e) {
       throw wrapMaybeUserException(e);
     }
-
-    // Now actually read the timestamp, and output each of the values.
-    Instant outputTimestamp = timestampFuture.read();
-    List<W> windows = Collections.singletonList(context.window());
-
-    PaneInfo pane = paneFuture.read();
-
-    // Update (increment) the stored PaneInfo iff this pane is visible to the user.
-    if (outputs.size() > 0) {
-      paneInfo.storeCurrentPaneInfo(context, pane);
-    }
-
-    // Produce the output values containing the pane.
-    for (OutputT output : outputs) {
-      windowingInternals.outputWindowedValue(KV.of(key, output), outputTimestamp, windows, pane);
-    }
   }
 
   private Instant cleanupTime(W window) {
@@ -436,7 +507,11 @@ private boolean isCleanupTime(W window, Instant timestamp) {
   }
 
   private void scheduleCleanup(ReduceFn<?, ?, ?, W>.Context context) {
-    context.timers().setTimer(cleanupTime(context.window()), TimeDomain.EVENT_TIME);
+    if (timerInternals.currentWatermarkTime().isAfter(context.window().maxTimestamp())) {
+      context.timers().setTimer(cleanupTime(context.window()), TimeDomain.EVENT_TIME);
+    } else {
+      context.timers().setTimer(context.window().maxTimestamp(), TimeDomain.EVENT_TIME);
+    }
   }
 
   private void cancelCleanup(ReduceFn<?, ?, ?, W>.Context context) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
index d24a588ff8c6f..d5c8b48bb422d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
@@ -19,11 +19,9 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
 import com.google.cloud.dataflow.sdk.util.state.BagState;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
 import com.google.cloud.dataflow.sdk.util.state.MergeableState;
-import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 
@@ -114,17 +112,14 @@ public void onMerge(OnMergeContext c) throws Exception {
     // from all the source windows at that point.
   }
 
+  @Override
+  public void prefetchOnTrigger(com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext c) {
+    c.accessAcrossMergedWindows(bufferTag).get();
+  }
+
   @Override
   public void onTrigger(OnTriggerContext c) throws Exception {
-    MergeableState<InputT, OutputT> buffer =
-        c.state().accessAcrossMergedWindows(bufferTag);
-    StateContents<OutputT> output = buffer.get();
-
-    // Skip empty panes unless they have interesting pane info.
-    if (!buffer.isEmpty().read()
-        || c.paneInfo().isLast() || Timing.ON_TIME == c.paneInfo().getTiming()) {
-      c.output(output.read());
-    }
+    c.output(c.state().accessAcrossMergedWindows(bufferTag).get().read());
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
index a76c6cd55dc78..f774d48f9f9b9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
@@ -156,6 +156,38 @@ public void setFinished(boolean finished) {
     }
   }
 
+  private class TriggerTimers implements Timers {
+
+    private final Timers timers;
+    private final W window;
+
+    public TriggerTimers(W window, Timers timers) {
+      this.timers = timers;
+      this.window = window;
+    }
+
+    @Override
+    public void setTimer(Instant timestamp, TimeDomain timeDomain) {
+      timers.setTimer(timestamp, timeDomain);
+    }
+
+    @Override
+    public void deleteTimer(Instant timestamp, TimeDomain timeDomain) {
+      if (timeDomain == TimeDomain.EVENT_TIME
+          && timestamp.equals(window.maxTimestamp())) {
+        // Don't allow triggers to unset the at-max-timestamp timer. This is necessary for on-time
+        // state transitions.
+        return;
+      }
+      timers.deleteTimer(timestamp, timeDomain);
+    }
+
+    @Override
+    public Instant currentProcessingTime() {
+      return timers.currentProcessingTime();
+    }
+  }
+
   private class MergingTriggerInfoImpl
       extends TriggerInfoImpl implements Trigger.MergingTriggerInfo<W> {
 
@@ -231,7 +263,7 @@ private TriggerContextImpl(
         BitSet finishedSet) {
       trigger.getSpec().super();
       this.state = triggerState(window, trigger);
-      this.timers = timers;
+      this.timers = new TriggerTimers(window, timers);
       this.triggerInfo = new TriggerInfoImpl(trigger, finishedSet, this);
     }
 
@@ -279,7 +311,7 @@ private OnElementContextImpl(
         Instant eventTimestamp) {
       trigger.getSpec().super();
       this.state = triggerState(window, trigger);
-      this.timers = timers;
+      this.timers = new TriggerTimers(window, timers);
       this.triggerInfo = new TriggerInfoImpl(trigger, finishedSet, this);
       this.element = element;
       this.eventTimestamp = eventTimestamp;
@@ -339,7 +371,7 @@ private OnTimerContextImpl(
         TimeDomain domain) {
       trigger.getSpec().super();
       this.state = triggerState(window, trigger);
-      this.timers = timers;
+      this.timers = new TriggerTimers(window, timers);
       this.triggerInfo = new TriggerInfoImpl(trigger, finishedSet, this);
       this.timestamp = timestamp;
       this.domain = domain;
@@ -397,7 +429,7 @@ private OnMergeContextImpl(
         Map<W, BitSet> finishedSets) {
       trigger.getSpec().super();
       this.state = new MergingStateContextImpl<>(triggerState(window, trigger), mergingWindows);
-      this.timers = timers;
+      this.timers = new TriggerTimers(window, timers);
       this.triggerInfo = new MergingTriggerInfoImpl(trigger, finishedSet, this, finishedSets);
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
index bff2d99f93021..bff1a4a207dfe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
@@ -16,6 +16,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
@@ -23,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
 import com.google.common.annotations.VisibleForTesting;
 
+import org.joda.time.Duration;
 import org.joda.time.Instant;
 
 import java.io.Serializable;
@@ -37,9 +39,14 @@ public class WatermarkHold<W extends BoundedWindow> implements Serializable {
 
   private static final long serialVersionUID = 0L;
 
-  @VisibleForTesting static final StateTag<WatermarkStateInternal> HOLD_TAG =
+  /** Watermark hold used for the actual data-based hold. */
+  @VisibleForTesting static final StateTag<WatermarkStateInternal> DATA_HOLD_TAG =
       StateTags.makeSystemTagInternal(StateTags.watermarkStateInternal("hold"));
 
+  /** Watermark hold used for potential empty panes. */
+  @VisibleForTesting static final StateTag<WatermarkStateInternal> PANE_HOLD_TAG =
+      StateTags.makeSystemTagInternal(StateTags.watermarkStateInternal("pane-hold"));
+
   private final WindowingStrategy<?, W> windowingStrategy;
 
   public WatermarkHold(WindowingStrategy<?, W> windowingStrategy) {
@@ -61,26 +68,79 @@ public void addHold(ReduceFn<?, ?, ?, W>.ProcessValueContext c, boolean isLate)
     Instant holdTo = isLate
         ? c.window().maxTimestamp().plus(windowingStrategy.getAllowedLateness())
         : windowingStrategy.getWindowFn().getOutputTime(c.timestamp(), c.window());
-    c.state().access(HOLD_TAG).add(holdTo);
+    c.state().access(DATA_HOLD_TAG).add(holdTo);
   }
 
   /**
-   * Get the timestamp to use for output. This is computed as the minimum timestamp
-   * of any non-late elements that arrived in the current pane.
+   * Get information from the watermark hold for outputting.
+   *
+   * <p> The output timestamp is the minimum of getOutputTimestamp applied to the non-late elements
+   * that arrived in the current pane.
    */
-  public StateContents<Instant> extractAndRelease(final ReduceFn<?, ?, ?, W>.Context c) {
-    final WatermarkStateInternal holdingBag = c.state().accessAcrossMergedWindows(HOLD_TAG);
-    final StateContents<Instant> holdFuture = holdingBag.get();
-    return new StateContents<Instant>() {
+  public StateContents<WatermarkInfo> extractAndRelease(final ReduceFn<?, ?, ?, W>.Context c) {
+    final WatermarkStateInternal dataHold = c.state().accessAcrossMergedWindows(DATA_HOLD_TAG);
+    final StateContents<Instant> holdFuture = dataHold.get();
+    return new StateContents<WatermarkInfo>() {
       @Override
-      public Instant read() {
+      public WatermarkInfo read() {
         Instant hold = holdFuture.read();
-        if (hold == null || hold.isAfter(c.window().maxTimestamp())) {
+        boolean nonEmpty = true;
+        if (hold == null) {
+          nonEmpty = false;
+          hold = c.window().maxTimestamp();
+        } else if (hold.isAfter(c.window().maxTimestamp())) {
           hold = c.window().maxTimestamp();
         }
-        holdingBag.clear();
-        return hold;
+
+        // Clear the bag (to release the watermark)
+        dataHold.clear();
+
+        return new WatermarkInfo(hold, nonEmpty);
       }
     };
   }
+
+  public void holdForOnTime(final ReduceFn<?, ?, ?, W>.Context c) {
+    c.state().access(PANE_HOLD_TAG).add(c.window().maxTimestamp());
+  }
+
+  public void holdForFinal(final ReduceFn<?, ?, ?, W>.Context c) {
+    if (c.windowingStrategy().getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS) {
+      c.state().access(PANE_HOLD_TAG)
+           .add(c.window().maxTimestamp().plus(c.windowingStrategy().getAllowedLateness()));
+    }
+  }
+
+  public void releaseOnTime(final ReduceFn<?, ?, ?, W>.Context c) {
+    c.state().access(PANE_HOLD_TAG).clear();
+    if (c.windowingStrategy().getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS
+        && c.windowingStrategy().getAllowedLateness().isLongerThan(Duration.ZERO)) {
+      holdForFinal(c);
+    }
+  }
+
+  public void releaseFinal(final ReduceFn<?, ?, ?, W>.Context c) {
+    c.state().access(PANE_HOLD_TAG).clear();
+  }
+
+  /**
+   * Information retrieved from the watermark hold.
+   */
+  public static class WatermarkInfo {
+    private final Instant outputTimestamp;
+    private final boolean nonEmpty;
+
+    public WatermarkInfo(Instant outputTimestamp, boolean nonEmpty) {
+      this.outputTimestamp = outputTimestamp;
+      this.nonEmpty = nonEmpty;
+    }
+
+    public Instant getOutputTimestamp() {
+      return outputTimestamp;
+    }
+
+    public boolean isNonEmpty() {
+      return nonEmpty;
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
index 958f315e487c5..7fd0b40074401 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 
 import org.joda.time.Duration;
@@ -55,6 +56,7 @@ public enum AccumulationMode {
   private final ExecutableTrigger<W> trigger;
   private final AccumulationMode mode;
   private final Duration allowedLateness;
+  private final ClosingBehavior closingBehavior;
   private final boolean triggerSpecified;
   private final boolean modeSpecified;
   private final boolean allowedLatenessSpecified;
@@ -63,7 +65,8 @@ private WindowingStrategy(
       WindowFn<T, W> windowFn,
       ExecutableTrigger<W> trigger, boolean triggerSpecified,
       AccumulationMode mode, boolean modeSpecified,
-      Duration allowedLateness, boolean allowedLatenessSpecified) {
+      Duration allowedLateness, boolean allowedLatenessSpecified,
+      ClosingBehavior closingBehavior) {
     this.windowFn = windowFn;
     this.trigger = trigger;
     this.triggerSpecified = triggerSpecified;
@@ -71,6 +74,7 @@ private WindowingStrategy(
     this.modeSpecified = modeSpecified;
     this.allowedLateness = allowedLateness;
     this.allowedLatenessSpecified = allowedLatenessSpecified;
+    this.closingBehavior = closingBehavior;
   }
 
   /**
@@ -85,7 +89,8 @@ public static <T, W extends BoundedWindow> WindowingStrategy<T, W> of(
     return new WindowingStrategy<>(windowFn,
         ExecutableTrigger.create(DefaultTrigger.<W>of()), false,
         AccumulationMode.DISCARDING_FIRED_PANES, false,
-        DEFAULT_ALLOWED_LATENESS, false);
+        DEFAULT_ALLOWED_LATENESS, false,
+        ClosingBehavior.FIRE_IF_NON_EMPTY);
   }
 
   public WindowFn<T, W> getWindowFn() {
@@ -116,6 +121,10 @@ public boolean isModeSpecified() {
     return modeSpecified;
   }
 
+  public ClosingBehavior getClosingBehavior() {
+    return closingBehavior;
+  }
+
   /**
    * Returns a {@link WindowingStrategy} identical to {@code this} but with the trigger set to
    * {@code wildcardTrigger}.
@@ -127,7 +136,8 @@ public WindowingStrategy<T, W> withTrigger(Trigger<?> wildcardTrigger) {
         windowFn,
         ExecutableTrigger.create(typedTrigger), true,
         mode, modeSpecified,
-        allowedLateness, allowedLatenessSpecified);
+        allowedLateness, allowedLatenessSpecified,
+        closingBehavior);
   }
 
   /**
@@ -139,7 +149,8 @@ public WindowingStrategy<T, W> withMode(AccumulationMode mode) {
         windowFn,
         trigger, triggerSpecified,
         mode, true,
-        allowedLateness, allowedLatenessSpecified);
+        allowedLateness, allowedLatenessSpecified,
+        closingBehavior);
   }
 
   /**
@@ -153,7 +164,8 @@ public WindowingStrategy<T, W> withWindowFn(WindowFn<?, ?> wildcardWindowFn) {
         typedWindowFn,
         trigger, triggerSpecified,
         mode, modeSpecified,
-        allowedLateness, allowedLatenessSpecified);
+        allowedLateness, allowedLatenessSpecified,
+        closingBehavior);
   }
 
   /**
@@ -165,7 +177,17 @@ public WindowingStrategy<T, W> withAllowedLateness(Duration allowedLateness) {
         windowFn,
         trigger, triggerSpecified,
         mode, modeSpecified,
-        allowedLateness, true);
+        allowedLateness, true,
+        closingBehavior);
+  }
+
+  public WindowingStrategy<T, W> withClosingBehavior(ClosingBehavior closingBehavior) {
+    return new WindowingStrategy<T, W>(
+        windowFn,
+        trigger, triggerSpecified,
+        mode, modeSpecified,
+        allowedLateness, allowedLatenessSpecified,
+        closingBehavior);
   }
 
   @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index b71d5c202e52f..5fcdbecf7fdf2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -685,6 +685,7 @@ public void testMergeWindows() throws Exception {
     ByteString bufferTag = ByteString.copyFromUtf8(window + "+sbuf");
     ByteString finishedTag = ByteString.copyFromUtf8(window + "+sclosed");
     ByteString paneInfoTag = ByteString.copyFromUtf8(window + "+spane");
+    ByteString paneHoldTag = ByteString.copyFromUtf8(window + "+spane-hold");
     ByteString watermarkHoldTag =
         ByteString.copyFromUtf8(window + "+shold");
     String stateFamily = "MergeWindows";
@@ -703,7 +704,7 @@ public void testMergeWindows() throws Exception {
             .setTimestamp(timerTimestamp)
             .setType(Windmill.Timer.Type.WATERMARK).build())));
 
-    assertThat(actualOutput.getListUpdatesList(), Matchers.containsInAnyOrder(
+    assertThat(actualOutput.getListUpdatesList(), Matchers.contains(
         Matchers.equalTo(Windmill.TagList.newBuilder()
             .setTag(bufferTag)
             .setStateFamily(stateFamily)
@@ -718,9 +719,13 @@ public void testMergeWindows() throws Exception {
             .setTag(watermarkHoldTag)
             .setStateFamily(stateFamily)
             .addTimestamps(0)
+            .build()),
+        Matchers.equalTo(Windmill.WatermarkHold.newBuilder()
+            .setTag(paneHoldTag)
+            .setStateFamily(stateFamily)
+            .addTimestamps(999000 /* end of the window */)
             .build())));
 
-
     Windmill.GetWorkResponse.Builder getWorkResponse = Windmill.GetWorkResponse.newBuilder();
     getWorkResponse.addWorkBuilder()
         .setComputationId(DEFAULT_COMPUTATION_ID)
@@ -767,12 +772,17 @@ public void testMergeWindows() throws Exception {
         .getValueBuilder()
         .setTimestamp(0)
         .setData(ByteString.EMPTY);
+    dataBuilder.addWatermarkHoldsBuilder()
+        .setTag(paneHoldTag)
+        .setStateFamily(stateFamily)
+        .addTimestamps(999000);
     server.addDataToOffer(dataResponse.build());
 
     result = server.waitForAndGetCommits(1);
 
     actualOutput = result.get(1L);
 
+    assertEquals(1, actualOutput.getOutputMessagesCount());
     assertEquals(DEFAULT_DESTINATION_STREAM_ID,
         actualOutput.getOutputMessages(0).getDestinationStreamId());
     assertEquals(DEFAULT_KEY_STRING,
@@ -805,7 +815,7 @@ public void testMergeWindows() throws Exception {
                 .build())));
 
     assertThat("" + actualOutput.getListUpdatesList(),
-        actualOutput.getListUpdatesList(), Matchers.containsInAnyOrder(
+        actualOutput.getListUpdatesList(), Matchers.contains(
         Matchers.equalTo(Windmill.TagList.newBuilder()
             .setTag(bufferTag)
             .setStateFamily(stateFamily)
@@ -817,6 +827,11 @@ public void testMergeWindows() throws Exception {
             .setTag(watermarkHoldTag)
             .setStateFamily(stateFamily)
             .setReset(true)
+            .build()),
+        Matchers.equalTo(Windmill.WatermarkHold.newBuilder()
+            .setTag(paneHoldTag)
+            .setStateFamily(stateFamily)
+            .setReset(true)
             .build())));
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 87bf64790bc6e..e1b965cf3b99e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -46,6 +46,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
@@ -387,7 +388,7 @@ public void testHotKeyCombiningWithAccumulationMode() {
         .apply(Window.<Integer>into(new GlobalWindows())
             .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1)))
             .accumulatingFiredPanes()
-            .withAllowedLateness(new Duration(0)))
+            .withAllowedLateness(new Duration(0), ClosingBehavior.FIRE_ALWAYS))
         .apply(Sum.integersGlobally().withoutDefaults().withFanout(2))
         .apply(ParDo.of(new GetLast()));
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index 0df66ba597c66..a63a7eb4b679d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -136,7 +136,6 @@ public void testOnTimerFinish() throws Exception {
 
     when(mockTrigger1.onTimer(Mockito.isA(OnTimerContext.class)))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
-    tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
 
     tester.advanceWatermark(new Instant(12));
     assertThat(tester.extractOutput(), Matchers.contains(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
index 99b2473b212d3..d281d93fdd821 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
@@ -133,7 +133,6 @@ public void testDefaultTriggerWithSlidingWindow() throws Exception {
     tester.advanceWatermark(new Instant(120));
     List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
     assertThat(output, Matchers.contains(
-        isSingleWindowedValue(Matchers.<Integer>emptyIterable(), 4, -5, 5),
         isSingleWindowedValue(Matchers.contains(4), 9, 0, 10),
         isSingleWindowedValue(Matchers.contains(4), 14, 5, 15)));
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index f9bca4ed58343..defc90736e97d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -33,6 +33,7 @@
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterFirst;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterProcessingTime;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
@@ -45,6 +46,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 
 import org.hamcrest.Matchers;
@@ -200,7 +202,9 @@ public void testWatermarkHoldAndLateData() throws Exception {
     injectElement(tester, 3, TriggerResult.CONTINUE);
     assertEquals(new Instant(1), tester.getWatermarkHold());
     injectElement(tester, 2, TriggerResult.FIRE);
-    assertEquals(null, tester.getWatermarkHold());
+
+    // Holding for the end-of-window transition.
+    assertEquals(new Instant(9), tester.getWatermarkHold());
 
     assertEquals(0, tester.getElementsDroppedDueToLateness());
     assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
@@ -209,16 +213,18 @@ public void testWatermarkHoldAndLateData() throws Exception {
     tester.advanceWatermark(new Instant(4));
     injectElement(tester, 2, TriggerResult.CONTINUE);
     injectElement(tester, 3, TriggerResult.CONTINUE);
-    assertEquals(new Instant(19), tester.getWatermarkHold());
+    assertEquals(new Instant(9), tester.getWatermarkHold());
     injectElement(tester, 5, TriggerResult.CONTINUE);
     assertEquals(new Instant(5), tester.getWatermarkHold());
     injectElement(tester, 4, TriggerResult.FIRE);
 
     // All late -- output at end of window timestamp.
+    when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
+         .thenReturn(TriggerResult.CONTINUE);
     tester.advanceWatermark(new Instant(8));
     injectElement(tester, 6, TriggerResult.CONTINUE);
     injectElement(tester, 5, TriggerResult.CONTINUE);
-    assertEquals(new Instant(19), tester.getWatermarkHold());
+    assertEquals(new Instant(9), tester.getWatermarkHold());
     injectElement(tester, 4, TriggerResult.FIRE);
 
     // This is "pending" at the time the watermark makes it way-late.
@@ -255,9 +261,11 @@ public void testWatermarkHoldAndLateData() throws Exception {
     assertThat(
                output.get(0).getPane(),
         Matchers.equalTo(PaneInfo.createPane(true, false, Timing.EARLY)));
+
+    // By the time this firing is produced, the input WM already passed the end of the window.
     assertThat(
         output.get(3).getPane(),
-        Matchers.equalTo(PaneInfo.createPane(false, true, Timing.EARLY, 3, -1)));
+        Matchers.equalTo(PaneInfo.createPane(false, true, Timing.LATE, 3, 0)));
 
     // And because we're past the end of window + allowed lateness, everything should be cleaned up.
     assertFalse(tester.isMarkedFinished(firstWindow));
@@ -272,6 +280,9 @@ public void testPaneInfoAllStates() throws Exception {
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
 
+    when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+
     tester.advanceWatermark(new Instant(0));
     injectElement(tester, 1, TriggerResult.FIRE);
     assertThat(tester.extractOutput(), Matchers.contains(
@@ -284,7 +295,8 @@ public void testPaneInfoAllStates() throws Exception {
     tester.advanceWatermark(new Instant(15));
     injectElement(tester, 3, TriggerResult.FIRE);
     assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.ON_TIME, 2, 0))));
+        // This is late, because the trigger wasn't waiting for AfterWatermark
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.LATE, 2, 0))));
 
     injectElement(tester, 4, TriggerResult.FIRE);
     assertThat(tester.extractOutput(), Matchers.contains(
@@ -295,6 +307,70 @@ public void testPaneInfoAllStates() throws Exception {
         WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.LATE, 4, 2))));
   }
 
+  @Test
+  public void testPaneInfoAllStatesAfterWatermark() throws Exception {
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        WindowingStrategy.of(FixedWindows.of(Duration.millis(10)))
+            .withTrigger(Repeatedly.<IntervalWindow>forever(
+                AfterFirst.<IntervalWindow>of(
+                    AfterPane.<IntervalWindow>elementCountAtLeast(2),
+                    AfterWatermark.<IntervalWindow>pastEndOfWindow())))
+            .withMode(AccumulationMode.DISCARDING_FIRED_PANES)
+            .withAllowedLateness(Duration.millis(100))
+            .withClosingBehavior(ClosingBehavior.FIRE_ALWAYS));
+
+    tester.advanceWatermark(new Instant(0));
+    tester.injectElement(1, new Instant(1));
+    tester.injectElement(2, new Instant(2));
+
+    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
+    assertThat(output, Matchers.contains(
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, false, Timing.EARLY, 0, -1))));
+    assertThat(output, Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+
+    tester.advanceWatermark(new Instant(50));
+
+    // We should get the ON_TIME pane even though it is empty,
+    // because we have an AfterWatermark.pastEndOfWindow() trigger.
+    output = tester.extractOutput();
+    assertThat(output, Matchers.contains(
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.ON_TIME, 1, 0))));
+    assertThat(output, Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.emptyIterable(), 9, 0, 10)));
+
+    // We should get the final pane even though it is empty.
+    tester.advanceWatermark(new Instant(150));
+    output = tester.extractOutput();
+    assertThat(output, Matchers.contains(
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.LATE, 2, 1))));
+    assertThat(output, Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.emptyIterable(), 9, 0, 10)));
+  }
+
+  @Test
+  public void testPaneInfoFinalAndOnTime() throws Exception {
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        WindowingStrategy.of(FixedWindows.of(Duration.millis(10)))
+            .withTrigger(
+                Repeatedly.<IntervalWindow>forever(AfterPane.<IntervalWindow>elementCountAtLeast(2))
+                .orFinally(AfterWatermark.<IntervalWindow>pastEndOfWindow()))
+            .withMode(AccumulationMode.DISCARDING_FIRED_PANES)
+            .withAllowedLateness(Duration.millis(100))
+            .withClosingBehavior(ClosingBehavior.FIRE_ALWAYS));
+
+    tester.advanceWatermark(new Instant(0));
+    tester.injectElement(1, new Instant(1));
+    tester.injectElement(2, new Instant(2));
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, false, Timing.EARLY, 0, -1))));
+
+    tester.advanceWatermark(new Instant(150));
+    assertThat(tester.extractOutput(), Matchers.contains(
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.ON_TIME, 1, 0))));
+  }
+
   @Test
   public void testPaneInfoSkipToFinish() throws Exception {
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
@@ -320,7 +396,7 @@ public void testPaneInfoSkipToNonSpeculativeAndFinish() throws Exception {
     tester.advanceWatermark(new Instant(15));
     injectElement(tester, 1, TriggerResult.FIRE_AND_FINISH);
     assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, true, Timing.ON_TIME))));
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, true, Timing.LATE))));
   }
 
   @Test
@@ -338,16 +414,19 @@ public void testMergeBeforeFinalizing() throws Exception {
     injectElement(tester, 10, TriggerResult.CONTINUE); // [10-20)
 
     // Finalizing forces us to merge to merge, but we aren't ready to fire yet.
-    when(mockTrigger.onMerge(
-        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+    when(mockTrigger.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.CONTINUE);
 
+    // Wasn't waiting for the ON_TIME firing
+    when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+
     tester.advanceWatermark(new Instant(100));
     List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
     assertThat(output.size(), Matchers.equalTo(1));
     assertThat(output.get(0), isSingleWindowedValue(Matchers.containsInAnyOrder(1, 10), 1, 1, 20));
     assertThat(output.get(0).getPane(),
-        Matchers.equalTo(PaneInfo.createPane(true, true, Timing.EARLY)));
+        Matchers.equalTo(PaneInfo.createPane(true, true, Timing.LATE, 0, 0)));
   }
 
   @Test
@@ -391,9 +470,12 @@ public void testIdempotentUninterestingPanes() throws Exception {
     // Inject a couple of on-time elements and fire at the window end.
     injectElement(tester, 1, TriggerResult.CONTINUE);
     injectElement(tester, 2, TriggerResult.CONTINUE);
+    when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
     tester.advanceWatermark(new Instant(12));
+
     when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-                    .thenReturn(TriggerResult.FIRE);
+        .thenReturn(TriggerResult.FIRE);
     tester.fireTimer(firstWindow, new Instant(10), TimeDomain.EVENT_TIME);
 
     // Fire another timer (with no data, so it's an uninteresting pane).
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 134f81add3ef6..9c5ec63b9e2e3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -110,6 +110,14 @@ private void logInteraction(String fmt, Object... args) {
     }
   }
 
+  public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>, W> nonCombining(
+      WindowingStrategy<?, W> windowingStrategy) throws Exception {
+    return new TriggerTester<Integer, Iterable<Integer>, W>(
+        windowingStrategy,
+        SystemReduceFn.<String, Integer, W>buffering(VarIntCoder.of()).create(KEY),
+        IterableCoder.of(VarIntCoder.of()));
+  }
+
   public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>, W> nonCombining(
       WindowFn<?, W> windowFn, Trigger<W> trigger, AccumulationMode mode,
       Duration allowedDataLateness) throws Exception {
@@ -118,11 +126,7 @@ public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>
         .withTrigger(trigger)
         .withMode(mode)
         .withAllowedLateness(allowedDataLateness);
-
-    return new TriggerTester<Integer, Iterable<Integer>, W>(
-        strategy,
-        SystemReduceFn.<String, Integer, W>buffering(VarIntCoder.of()).create(KEY),
-        IterableCoder.of(VarIntCoder.of()));
+    return nonCombining(strategy);
   }
 
   public static <W extends BoundedWindow, AccumT, OutputT>
@@ -191,7 +195,9 @@ public final void assertHasOnlyGlobalAndFinishedSetsAndPaneInfoFor(W... expected
     assertHasOnlyGlobalAndAllowedTags(
         ImmutableSet.copyOf(expectedWindows),
         ImmutableSet.<StateTag<?>>of(
-            TriggerRunner.FINISHED_BITS_TAG, PaneInfoTracker.PANE_INFO_TAG));
+            TriggerRunner.FINISHED_BITS_TAG,
+            PaneInfoTracker.PANE_INFO_TAG,
+            WatermarkHold.PANE_HOLD_TAG));
   }
 
   public final void assertHasOnlyGlobalState() {
@@ -203,7 +209,7 @@ public final void assertHasOnlyGlobalState() {
   public final void assertHasOnlyGlobalAndPaneInfoFor(W... expectedWindows) {
     assertHasOnlyGlobalAndAllowedTags(
         ImmutableSet.copyOf(expectedWindows),
-        ImmutableSet.<StateTag<?>>of(PaneInfoTracker.PANE_INFO_TAG));
+        ImmutableSet.<StateTag<?>>of(PaneInfoTracker.PANE_INFO_TAG, WatermarkHold.PANE_HOLD_TAG));
   }
 
   /**

From fed743a2a53053c09a209a34046cf0fe90ae0e78 Mon Sep 17 00:00:00 2001
From: boyuan <boyuan@google.com>
Date: Wed, 5 Aug 2015 18:36:30 -0700
Subject: [PATCH 0901/1541] Transparent error messages for workflow job
 creation failures

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99982318
---
 .../sdk/runners/DataflowPipelineRunner.java    | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 99723799c0af1..4a9cffcad8e22 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -18,6 +18,7 @@
 
 import static com.google.cloud.dataflow.sdk.util.StringUtils.approximatePTransformName;
 import static com.google.cloud.dataflow.sdk.util.StringUtils.approximateSimpleName;
+import static java.nio.charset.StandardCharsets.UTF_8;
 
 import com.google.api.client.googleapis.json.GoogleJsonResponseException;
 import com.google.api.client.util.Joiner;
@@ -140,6 +141,9 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
   // Environment version information
   private static final String ENVIRONMENT_MAJOR_VERSION = "3";
 
+  // The limit of CreateJob request size.
+  private static final int CREATE_JOB_REQUEST_LIMIT_BYTES = 10 * 1024 * 1024;
+
   /**
    * Project IDs must contain lowercase letters, digits, or dashes.
    * IDs must start with a letter and may not end with a dash.
@@ -369,8 +373,18 @@ public DataflowPipelineJob run(Pipeline pipeline) {
               .create(options.getProject(), newJob)
               .execute();
     } catch (GoogleJsonResponseException e) {
-      throw new RuntimeException("Failed to create a workflow job: "
-            + (e.getDetails() != null ? e.getDetails().getMessage() : e), e);
+      String errorMessages = "Unexpected errors";
+      if (e.getDetails() != null) {
+        if (newJob.toString().getBytes(UTF_8).length >= CREATE_JOB_REQUEST_LIMIT_BYTES) {
+          errorMessages = "The size of the serialized JSON representation of the pipeline "
+              + "exceeds the allowable limit. "
+              + "For more information, please check the FAQ link below:\n"
+              + "https://cloud.google.com/dataflow/faq";
+        } else {
+          errorMessages = e.getDetails().getMessage();
+        }
+      }
+      throw new RuntimeException("Failed to create a workflow job: " + errorMessages, e);
     } catch (IOException e) {
       throw new RuntimeException("Failed to create a workflow job", e);
     }

From 417d12a9c168e0ecf199cf7807d4da9f265d02de Mon Sep 17 00:00:00 2001
From: boyuan <boyuan@google.com>
Date: Wed, 5 Aug 2015 18:51:49 -0700
Subject: [PATCH 0902/1541] Makes SourceTestUtils compare structural values

Source testing utilities depend on standard assertions and matchers
to compare elements read by sources.
In general the elements may not implement equals/hashCode properly,
however every source has a Coder and every Coder can call
Coder.structuralValue() to produce a structural value
whose equals/hashCode is consistent with equality of encoded format.
So we use this structural value to compare elements read by sources.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99983304
---
 .../sdk/io/ByteOffsetBasedSourceTest.java     |   5 +-
 .../dataflow/sdk/io/FileBasedSourceTest.java  |   4 +-
 .../dataflow/sdk/io/SourceTestUtils.java      | 142 ++++++++++++------
 3 files changed, 102 insertions(+), 49 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
index 1c6f4249464ff..8a2221d4273cb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
@@ -22,6 +22,7 @@
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
@@ -74,7 +75,7 @@ public void validate() {}
 
     @Override
     public Coder<Integer> getDefaultOutputCoder() {
-      return null;
+      return BigEndianIntegerCoder.of();
     }
 
     @Override
@@ -251,7 +252,7 @@ public void testSplitAtFraction() throws IOException {
   }
 
   @Test
-  public void testSplitAtFractionExhaustive() throws IOException {
+  public void testSplitAtFractionExhaustive() throws Exception {
     PipelineOptions options = PipelineOptionsFactory.create();
     CoarseByteRangeSource original = new CoarseByteRangeSource(13, 35, 1, 10);
     assertSplitAtFractionExhaustive(original, options);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index ffe3cfe4211b9..981e1dbc9fec6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -848,7 +848,7 @@ public void testReadAllSplitsOfFilePattern() throws Exception {
   }
 
   @Test
-  public void testSplitAtFraction() throws IOException {
+  public void testSplitAtFraction() throws Exception {
     PipelineOptions options = PipelineOptionsFactory.create();
     File file = createFileWithData("file", createStringDataset(3, 100));
 
@@ -865,7 +865,7 @@ public void testSplitAtFraction() throws IOException {
   }
 
   @Test
-  public void testSplitAtFractionExhaustive() throws IOException {
+  public void testSplitAtFractionExhaustive() throws Exception {
     PipelineOptions options = PipelineOptionsFactory.create();
     // Smaller file for exhaustive testing.
     File file = createFileWithData("file", createStringDataset(3, 20));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
index 6803ca0091713..6cc18a9fabc32 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
@@ -16,12 +16,15 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
+import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
+import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 
 import java.io.IOException;
@@ -32,6 +35,23 @@
  * Utilities for testing {@link Source} classes.
  */
 public class SourceTestUtils {
+  /**
+   * Testing utilities below depend on standard assertions and matchers to compare elements read by
+   * sources. In general the elements may not implement {@link equals}/{@link hashCode} properly,
+   * however every source has a {@link Coder} and every {@code Coder} can
+   * produce a {@link Coder#structuralValue()} whose {@code equals}/{@code hashCode} is
+   * consistent with equality of encoded format.
+   * So we use this {@link Coder#structuralValue()} to compare elements read by sources.
+   */
+  private static <T> List<Object> createStructuralValues(Coder<T> coder, List<T> list)
+      throws Exception {
+    List<Object> result = new ArrayList<>();
+    for (T elem : list) {
+      result.add(coder.structuralValue(elem));
+    }
+    return result;
+  }
+
   /**
    * Reads all elements from the given {@link BoundedSource}.
    */
@@ -78,15 +98,28 @@ public static <T> List<T> readNItemsFromUnstartedReader(Source.Reader<T> reader,
    * the records read from the list of sources is equal to the records read from the reference
    * source.
    */
-  public static <T> void assertSourcesEqualReferenceSource(BoundedSource<T> referenceSource,
-      List<? extends BoundedSource<T>> sources, PipelineOptions options) throws IOException {
+  public static <T> void assertSourcesEqualReferenceSource(
+      BoundedSource<T> referenceSource,
+      List<? extends BoundedSource<T>> sources,
+      PipelineOptions options)
+      throws Exception {
+    Coder<T> coder = referenceSource.getDefaultOutputCoder();
     List<T> referenceRecords = readFromSource(referenceSource, options);
     List<T> bundleRecords = new ArrayList<>();
     for (BoundedSource<T> source : sources) {
+      assertThat(
+          "Coder type for source "
+              + source
+              + " is not compatible with Coder type for referenceSource "
+              + referenceSource,
+          source.getDefaultOutputCoder(),
+          equalTo(coder));
       List<T> elems = readFromSource(source, options);
       bundleRecords.addAll(elems);
     }
-    assertThat(bundleRecords, containsInAnyOrder(referenceRecords.toArray()));
+    List<Object> bundleValues = createStructuralValues(coder, bundleRecords);
+    List<Object> referenceValues = createStructuralValues(coder, referenceRecords);
+    assertThat(bundleValues, containsInAnyOrder(referenceValues.toArray()));
   }
 
   /**
@@ -94,10 +127,13 @@ public static <T> void assertSourcesEqualReferenceSource(BoundedSource<T> refere
    * records as the reader.
    */
   public static <T> void assertUnstartedReaderReadsSameAsItsSource(
-      BoundedSource.BoundedReader<T> reader, PipelineOptions options) throws IOException {
+      BoundedSource.BoundedReader<T> reader, PipelineOptions options) throws Exception {
+    Coder<T> coder = reader.getCurrentSource().getDefaultOutputCoder();
     List<T> expected = readFromUnstartedReader(reader);
     List<T> actual = readFromSource(reader.getCurrentSource(), options);
-    assertEquals(expected, actual);
+    List<Object> expectedValues = createStructuralValues(coder, expected);
+    List<Object> actualValues = createStructuralValues(coder, actual);
+    assertThat(actualValues, containsInAnyOrder(expectedValues.toArray()));
   }
 
   /**
@@ -141,8 +177,12 @@ public SplitAtFractionResult(int numPrimaryItems, int numResidualItems) {
    */
 
   public static <T> SplitAtFractionResult assertSplitAtFractionBehavior(
-      BoundedSource<T> source, int numItemsToReadBeforeSplit, double splitFraction,
-      ExpectedSplitOutcome expectedOutcome, PipelineOptions options) throws IOException {
+      BoundedSource<T> source,
+      int numItemsToReadBeforeSplit,
+      double splitFraction,
+      ExpectedSplitOutcome expectedOutcome,
+      PipelineOptions options)
+      throws Exception {
     List<T> expectedItems = readFromSource(source, options);
     try (BoundedSource.BoundedReader<T> reader = source.createReader(options)) {
       List<T> currentItems = new ArrayList<>();
@@ -179,33 +219,31 @@ public static <T> SplitAtFractionResult assertSplitAtFractionBehavior(
             numItemsToReadBeforeSplit > 0
                 ? readFromStartedReader(reader)
                 : readFromUnstartedReader(reader));
-        assertEquals(
-            "Continued reading after split yielded different items than primary source: "
-                + " split at "
-                + splitFraction
-                + " after reading "
-                + numItemsToReadBeforeSplit
-                + " items, original source: "
-                + source
-                + ", primary source: "
-                + primary,
-            primaryItems,
-            currentItems);
-        assertEquals(
-            "Items in primary and residual sources after split do not add up "
-                + "to items in the original source. "
-                + "Split at "
-                + splitFraction
-                + " after reading "
-                + numItemsToReadBeforeSplit
-                + " items; original source: "
-                + source
-                + ", primary: "
-                + primary
-                + ", residual: "
-                + residual,
-            expectedItems,
-            totalItems);
+        String errorMsgForPrimarySourceComp =
+            String.format(
+                "Continued reading after split yielded different items than primary source: "
+                    + "split at %s after reading %s items, original source: %s, primary source: %s",
+                splitFraction,
+                numItemsToReadBeforeSplit,
+                source,
+                primary);
+        String errorMsgForTotalSourceComp =
+            String.format(
+                "Items in primary and residual sources after split do not add up to items "
+                    + "in the original source. Split at %s after reading %s items; "
+                    + "original source: %s, primary: %s, residual: %s",
+                splitFraction,
+                numItemsToReadBeforeSplit,
+                source,
+                primary,
+                residual);
+        Coder<T> coder = reader.getCurrentSource().getDefaultOutputCoder();
+        List<Object> primaryValues = createStructuralValues(coder, primaryItems);
+        List<Object> currentValues = createStructuralValues(coder, currentItems);
+        List<Object> expectedValues = createStructuralValues(coder, expectedItems);
+        List<Object> totalValues = createStructuralValues(coder, totalItems);
+        assertThat(errorMsgForPrimarySourceComp, currentValues, contains(primaryValues.toArray()));
+        assertThat(errorMsgForTotalSourceComp, totalValues, contains(expectedValues.toArray()));
         return new SplitAtFractionResult(primaryItems.size(), residualItems.size());
       }
       return new SplitAtFractionResult(primaryItems.size(), -1);
@@ -226,20 +264,30 @@ public static <T> SplitAtFractionResult assertSplitAtFractionBehavior(
    *   assert: items in original source == items in primary + items in residual
    * </pre>
    */
-  public static <T> void assertSplitAtFractionSucceedsAndConsistent(BoundedSource<T> source,
-      int numItemsToReadBeforeSplit, double splitFraction, PipelineOptions options)
-      throws IOException {
-    assertSplitAtFractionBehavior(source, numItemsToReadBeforeSplit, splitFraction,
-        ExpectedSplitOutcome.MUST_SUCCEED_AND_BE_CONSISTENT, options);
+  public static <T> void assertSplitAtFractionSucceedsAndConsistent(
+      BoundedSource<T> source,
+      int numItemsToReadBeforeSplit,
+      double splitFraction,
+      PipelineOptions options)
+      throws Exception {
+    assertSplitAtFractionBehavior(
+        source,
+        numItemsToReadBeforeSplit,
+        splitFraction,
+        ExpectedSplitOutcome.MUST_SUCCEED_AND_BE_CONSISTENT,
+        options);
   }
 
   /**
    * Asserts that the {@code source}'s reader fails to {@code splitAtFraction(fraction)}
    * after reading {@code numItemsToReadBeforeSplit} items.
    */
-  public static <T> void assertSplitAtFractionFails(BoundedSource<T> source,
-      int numItemsToReadBeforeSplit, double splitFraction, PipelineOptions options)
-      throws IOException {
+  public static <T> void assertSplitAtFractionFails(
+      BoundedSource<T> source,
+      int numItemsToReadBeforeSplit,
+      double splitFraction,
+      PipelineOptions options)
+      throws Exception {
     assertSplitAtFractionBehavior(
         source, numItemsToReadBeforeSplit, splitFraction, ExpectedSplitOutcome.MUST_FAIL, options);
   }
@@ -251,8 +299,12 @@ public static <T> void assertSplitAtFractionFails(BoundedSource<T> source,
    * results are consistent if a split succeeds.
    */
   public static <T> void assertSplitAtFractionBinary(
-      BoundedSource<T> source, int numItemsToBeReadBeforeSplit, double firstSplitFraction,
-      double secondSplitFraction, PipelineOptions options) throws IOException {
+      BoundedSource<T> source,
+      int numItemsToBeReadBeforeSplit,
+      double firstSplitFraction,
+      double secondSplitFraction,
+      PipelineOptions options)
+      throws Exception {
     if (secondSplitFraction - firstSplitFraction < 0.0001) {
       return;
     }
@@ -286,7 +338,7 @@ public static <T> void assertSplitAtFractionBinary(
    * results are consistent if a split succeeds.
    */
   public static <T> void assertSplitAtFractionExhaustive(
-      BoundedSource<T> source, PipelineOptions options) throws IOException {
+      BoundedSource<T> source, PipelineOptions options) throws Exception {
     List<T> expectedItems = readFromSource(source, options);
     for (int i = 0; i < expectedItems.size(); i++) {
       assertSplitAtFractionBinary(source, i, 0.0, 1.0, options);

From cff1652fb6afbc362704f3d4702a569049ef164a Mon Sep 17 00:00:00 2001
From: takidau <takidau@google.com>
Date: Wed, 5 Aug 2015 19:18:01 -0700
Subject: [PATCH 0903/1541] Make AfterWatermark.pastEndOfWindow not be a
 TimeTrigger.

----Release Notes----
Removed the the methods for adjusting watermark triggers to avoid misuse.
Removed an unecessary generic parameter from TimeTrigger.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99985133
---
 .../windowing/AfterProcessingTime.java        |  3 +-
 .../transforms/windowing/AfterWatermark.java  | 66 ++++++++-----------
 .../sdk/transforms/windowing/TimeTrigger.java | 19 +++---
 .../transforms/windowing/AfterAllTest.java    |  2 +-
 .../transforms/windowing/AfterEachTest.java   |  2 +-
 .../transforms/windowing/AfterFirstTest.java  |  2 +-
 .../windowing/AfterProcessingTimeTest.java    |  2 +-
 .../windowing/AfterWatermarkTest.java         | 36 +++++-----
 .../transforms/windowing/TimeTriggerTest.java |  6 +-
 .../sdk/transforms/windowing/TriggerTest.java |  4 +-
 10 files changed, 64 insertions(+), 78 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index c02a701771945..8565fb4c5aaaf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -35,8 +35,7 @@
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used
  */
 @Experimental(Experimental.Kind.TRIGGER)
-public class AfterProcessingTime<W extends BoundedWindow>
-    extends TimeTrigger<W, AfterProcessingTime<W>> {
+public class AfterProcessingTime<W extends BoundedWindow> extends TimeTrigger<W> {
 
   private static final long serialVersionUID = 0L;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 3fee0a61a378e..80e47938c7f1b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
 import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
@@ -56,31 +57,32 @@
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used.
  */
 @Experimental(Experimental.Kind.TRIGGER)
-public abstract class AfterWatermark<W extends BoundedWindow>
-    extends TimeTrigger<W, AfterWatermark<W>> {
+public class AfterWatermark<W extends BoundedWindow> {
 
   private static final long serialVersionUID = 0L;
 
-  protected AfterWatermark(List<SerializableFunction<Instant, Instant>> transforms) {
-    super(transforms);
-  }
+  // Static factory class.
+  private AfterWatermark() {}
 
   /**
    * Creates a trigger that fires when the watermark passes timestamp of the first element added to
    * the pane.
    */
-  static <W extends BoundedWindow> AfterWatermark<W> pastFirstElementInPane() {
-    return new FromFirstElementInPane<W>(IDENTITY);
+  static <W extends BoundedWindow> TimeTrigger<W> pastFirstElementInPane() {
+    return new FromFirstElementInPane<W>(TimeTrigger.IDENTITY);
   }
 
   /**
    * Creates a trigger that fires when the watermark passes the end of the window.
    */
-  public static <W extends BoundedWindow> AfterWatermark<W> pastEndOfWindow() {
-    return new FromEndOfWindow<W>(IDENTITY);
+  public static <W extends BoundedWindow> OnceTrigger<W> pastEndOfWindow() {
+    return new FromEndOfWindow<W>();
   }
 
-  private static class FromFirstElementInPane<W extends BoundedWindow> extends AfterWatermark<W> {
+  /**
+   * A watermark trigger targeted relative to the event time of the first element in the pane.
+   */
+  private static class FromFirstElementInPane<W extends BoundedWindow> extends TimeTrigger<W> {
 
     private static final long serialVersionUID = 0L;
 
@@ -172,7 +174,7 @@ public Instant getWatermarkThatGuaranteesFiring(W window) {
     }
 
     @Override
-    protected AfterWatermark<W> newWith(
+    protected FromFirstElementInPane<W> newWith(
         List<SerializableFunction<Instant, Instant>> transforms) {
       return new FromFirstElementInPane<W>(transforms);
     }
@@ -205,18 +207,20 @@ public int hashCode() {
     }
   }
 
-  private static class FromEndOfWindow<W extends BoundedWindow> extends AfterWatermark<W> {
+  /**
+   * A watermark trigger targeted relative to the end of the window.
+   */
+  private static class FromEndOfWindow<W extends BoundedWindow> extends OnceTrigger<W> {
 
     private static final long serialVersionUID = 0L;
 
-    private FromEndOfWindow(
-        List<SerializableFunction<Instant, Instant>> composed) {
-      super(composed);
+    private FromEndOfWindow() {
+      super(null);
     }
 
     @Override
     public TriggerResult onElement(OnElementContext c) throws Exception {
-      c.timers().setTimer(computeTargetTimestamp(c.window().maxTimestamp()), TimeDomain.EVENT_TIME);
+      c.timers().setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
       return TriggerResult.CONTINUE;
     }
 
@@ -232,14 +236,14 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
       }
 
       // Otherwise, set a timer for this window, and return.
-      c.timers().setTimer(computeTargetTimestamp(c.window().maxTimestamp()), TimeDomain.EVENT_TIME);
+      c.timers().setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
       return MergeResult.CONTINUE;
     }
 
     @Override
     public TriggerResult onTimer(OnTimerContext c) throws Exception {
       if (c.timeDomain() != TimeDomain.EVENT_TIME
-          || c.timestamp().isBefore(computeTargetTimestamp(c.window().maxTimestamp()))) {
+          || c.timestamp().isBefore(c.window().maxTimestamp())) {
         return TriggerResult.CONTINUE;
       } else {
         return TriggerResult.FIRE_AND_FINISH;
@@ -248,46 +252,32 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
 
     @Override
     public void clear(TriggerContext c) throws Exception {
-      c.timers().deleteTimer(
-          computeTargetTimestamp(c.window().maxTimestamp()), TimeDomain.EVENT_TIME);
+      c.timers().deleteTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
     }
 
     @Override
     public Instant getWatermarkThatGuaranteesFiring(W window) {
-      return computeTargetTimestamp(window.maxTimestamp());
+      return window.maxTimestamp();
     }
 
     @Override
-    protected AfterWatermark<W> newWith(
-        List<SerializableFunction<Instant, Instant>> transforms) {
-      return new FromEndOfWindow<>(transforms);
-    }
-
-    @Override
-    public OnceTrigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
+    public FromEndOfWindow<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
       return this;
     }
 
     @Override
     public String toString() {
-      return "AfterWatermark.pastEndOfWindow(" + timestampMappers + ")";
+      return "AfterWatermark.pastEndOfWindow()";
     }
 
     @Override
     public boolean equals(Object obj) {
-      if (this == obj) {
-        return true;
-      }
-      if (!(obj instanceof FromEndOfWindow)) {
-        return false;
-      }
-      FromEndOfWindow<?> that = (FromEndOfWindow<?>) obj;
-      return Objects.equals(this.timestampMappers, that.timestampMappers);
+      return obj instanceof FromEndOfWindow;
     }
 
     @Override
     public int hashCode() {
-      return Objects.hash(getClass(), timestampMappers);
+      return Objects.hash(getClass());
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
index a0102fefb47c7..b648dc6bccc25 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
@@ -35,12 +35,9 @@
  * Support for manipulating the time at which time-based {@link Trigger}s fire.
  *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used.
- * @param <T> {@code TimeTrigger} subclass produced by modifying the current {@code TimeTrigger}.
- *     Typically, this is the self type.
  */
 @Experimental(Experimental.Kind.TRIGGER)
-public abstract class TimeTrigger<W extends BoundedWindow, T extends TimeTrigger<W, T>>
-    extends OnceTrigger<W> {
+public abstract class TimeTrigger<W extends BoundedWindow> extends OnceTrigger<W> {
 
   protected static final StateTag<CombiningValueState<Instant, Instant>> DELAYED_UNTIL_TAG =
       StateTags.makeSystemTagInternal(StateTags.combiningValueFromInputInternal(
@@ -80,7 +77,7 @@ protected Instant computeTargetTimestamp(Instant time) {
    * @param delay the delay to add
    * @return An updated time trigger that will wait the additional time before firing.
    */
-  public T plusDelayOf(final Duration delay) {
+  public TimeTrigger<W> plusDelayOf(final Duration delay) {
     return newWith(delayFn(delay));
   }
 
@@ -102,7 +99,7 @@ public Instant apply(Instant input) {
    * <p> TODO: Consider sharing this with FixedWindows, and bring over the equivalent of
    * CalendarWindows.
    */
-  public T alignedTo(final Duration size, final Instant offset) {
+  public TimeTrigger<W> alignedTo(final Duration size, final Instant offset) {
     return newWith(alignFn(size, offset));
   }
 
@@ -123,7 +120,7 @@ public Instant apply(Instant point) {
    * Aligns the time to be the smallest multiple of {@code size} greater than the timestamp
    * since the epoch.
    */
-  public T alignedTo(final Duration size) {
+  public TimeTrigger<W> alignedTo(final Duration size) {
     return alignedTo(size, new Instant(0));
   }
 
@@ -141,7 +138,7 @@ public T alignedTo(final Duration size) {
    * @param timestampMapper Function that will be invoked on the proposed trigger time to determine
    *        the time at which the trigger should actually fire.
    */
-  public T mappedTo(SerializableFunction<Instant, Instant> timestampMapper) {
+  public TimeTrigger<W> mappedTo(SerializableFunction<Instant, Instant> timestampMapper) {
     return newWith(timestampMapper);
   }
 
@@ -151,11 +148,11 @@ public boolean isCompatible(Trigger<?> other) {
       return false;
     }
 
-    TimeTrigger<?, ?> that = (TimeTrigger<?, ?>) other;
+    TimeTrigger<?> that = (TimeTrigger<?>) other;
     return this.timestampMappers.equals(that.timestampMappers);
   }
 
-  private T newWith(SerializableFunction<Instant, Instant> timestampMapper) {
+  private TimeTrigger<W> newWith(SerializableFunction<Instant, Instant> timestampMapper) {
     return newWith(ImmutableList.<SerializableFunction<Instant, Instant>>builder()
         .addAll(timestampMappers)
         .add(timestampMapper)
@@ -169,5 +166,5 @@ private T newWith(SerializableFunction<Instant, Instant> timestampMapper) {
    * @param transform The new transform to apply to target times.
    * @return a new {@code TimeTrigger}.
    */
-  protected abstract T newWith(List<SerializableFunction<Instant, Instant>> transform);
+  protected abstract TimeTrigger<W> newWith(List<SerializableFunction<Instant, Instant>> transform);
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index 0093f1536e3f7..69d6627768217 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -182,7 +182,7 @@ public void testFireDeadline() throws Exception {
 
     assertEquals(new Instant(19),
         AfterAll.of(AfterWatermark.pastEndOfWindow(),
-                     AfterWatermark.pastEndOfWindow().plusDelayOf(Duration.millis(10)))
+                    AfterWatermark.pastFirstElementInPane().plusDelayOf(Duration.millis(10)))
             .getWatermarkThatGuaranteesFiring(window));
     assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
         AfterAll.of(AfterWatermark.pastEndOfWindow(), AfterPane.elementCountAtLeast(1))
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index a63a7eb4b679d..c77d7ab0f9f6d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -201,7 +201,7 @@ public void testFireDeadline() throws Exception {
 
     assertEquals(new Instant(9),
         AfterEach.inOrder(AfterWatermark.pastEndOfWindow(),
-                      AfterWatermark.pastEndOfWindow().plusDelayOf(Duration.millis(10)))
+                          AfterWatermark.pastFirstElementInPane().plusDelayOf(Duration.millis(10)))
             .getWatermarkThatGuaranteesFiring(window));
     assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
         AfterEach.inOrder(AfterPane.elementCountAtLeast(2), AfterWatermark.pastEndOfWindow())
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index ddde847bc57a8..7a21226b97be3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -192,7 +192,7 @@ public void testFireDeadline() throws Exception {
 
     assertEquals(new Instant(9),
         AfterFirst.of(AfterWatermark.pastEndOfWindow(),
-                       AfterWatermark.pastEndOfWindow().plusDelayOf(Duration.millis(10)))
+                       AfterWatermark.pastFirstElementInPane().plusDelayOf(Duration.millis(10)))
             .getWatermarkThatGuaranteesFiring(window));
     assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
         AfterFirst.of(AfterPane.elementCountAtLeast(2), AfterPane.elementCountAtLeast(1))
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
index 346bb2088f173..38b9405028ce7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
@@ -134,7 +134,7 @@ public void testFireDeadline() throws Exception {
 
   @Test
   public void testContinuation() throws Exception {
-    AfterProcessingTime<?> firstElementPlus1 =
+    TimeTrigger<?> firstElementPlus1 =
         AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.standardHours(1));
     assertEquals(
         new AfterSynchronizedProcessingTime<>(),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
index 6ddb5d64a8c63..37453d0191550 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
@@ -72,7 +72,7 @@ public void testFirstInPaneWithFixedWindow() throws Exception {
   public void testAlignAndDelay() throws Exception {
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         FixedWindows.of(Duration.standardMinutes(1)),
-        AfterWatermark.<IntervalWindow>pastEndOfWindow()
+        AfterWatermark.<IntervalWindow>pastFirstElementInPane()
             .alignedTo(Duration.standardMinutes(1))
             .plusDelayOf(Duration.standardMinutes(5)),
         AccumulationMode.DISCARDING_FIRED_PANES,
@@ -130,16 +130,17 @@ public void testEndOfWindowFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         FixedWindows.of(windowDuration),
-        AfterWatermark.<IntervalWindow>pastEndOfWindow().plusDelayOf(Duration.millis(5)),
+        AfterWatermark.<IntervalWindow>pastEndOfWindow(),
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
 
-    tester.injectElement(1, new Instant(1)); // first in window [0, 10), timer set for 15
-    tester.advanceWatermark(new Instant(11));
+    tester.injectElement(1, new Instant(1)); // first in window [0, 10), timer set for 9
+    tester.advanceWatermark(new Instant(8));
+    assertThat(tester.extractOutput(), Matchers.emptyIterable());
     tester.injectElement(2, new Instant(9));
     tester.injectElement(3, new Instant(8));
 
-    tester.advanceWatermark(new Instant(15));
+    tester.advanceWatermark(new Instant(9));
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10)));
 
@@ -160,21 +161,21 @@ public void testEndOfWindowWithMerging() throws Exception {
     Duration windowDuration = Duration.millis(10);
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
         Sessions.withGapDuration(windowDuration),
-        AfterWatermark.<IntervalWindow>pastEndOfWindow().plusDelayOf(Duration.millis(5)),
+        AfterWatermark.<IntervalWindow>pastEndOfWindow(),
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
 
     tester.advanceWatermark(new Instant(1));
 
-    tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 15
-    tester.injectElement(2, new Instant(2)); // in [2, 12), timer for 16
-    tester.advanceWatermark(new Instant(15));
+    tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 10
+    tester.injectElement(2, new Instant(2)); // in [2, 12), timer for 11
+    tester.advanceWatermark(new Instant(10));
 
     // We merged, and updated the watermark timer to the end of the new window.
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
-    tester.injectElement(3, new Instant(1)); // in [1, 11), timer for 15
-    tester.advanceWatermark(new Instant(16));
+    tester.injectElement(3, new Instant(1)); // in [1, 11), timer for 10
+    tester.advanceWatermark(new Instant(11));
 
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 1, 12)));
@@ -211,20 +212,19 @@ public void testFireDeadline() throws Exception {
         AfterWatermark.pastEndOfWindow().getWatermarkThatGuaranteesFiring(GlobalWindow.INSTANCE));
     assertEquals(new Instant(19),
         AfterWatermark
-            .pastEndOfWindow()
+            .pastFirstElementInPane()
             .plusDelayOf(Duration.millis(10)).getWatermarkThatGuaranteesFiring(window));
   }
 
   @Test
   public void testContinuation() throws Exception {
-    AfterWatermark<?> endOfWindowPlus1 =
-        AfterWatermark.pastEndOfWindow().plusDelayOf(Duration.standardMinutes(1));
-    assertEquals(endOfWindowPlus1, endOfWindowPlus1.getContinuationTrigger());
+    Trigger<?> endOfWindow = AfterWatermark.pastEndOfWindow();
+    assertEquals(endOfWindow, endOfWindow.getContinuationTrigger());
     assertEquals(
-        endOfWindowPlus1,
-        endOfWindowPlus1.getContinuationTrigger().getContinuationTrigger());
+        endOfWindow,
+        endOfWindow.getContinuationTrigger().getContinuationTrigger());
 
-    AfterWatermark<?> firstElementAligned =
+    Trigger<?> firstElementAligned =
         AfterWatermark.pastFirstElementInPane().alignedTo(Duration.standardDays(1));
     assertEquals(
         firstElementAligned,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
index c0010b258e146..574ad716cd5db 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
@@ -36,8 +36,8 @@
 public class TimeTriggerTest {
   @Test
   public void testAlignTo() {
-    TestTimeTrigger size10 = new TestTimeTrigger().alignedTo(new Duration(10));
-    TestTimeTrigger size10offset5 =
+    TimeTrigger<?> size10 = new TestTimeTrigger().alignedTo(new Duration(10));
+    TimeTrigger<?> size10offset5 =
         new TestTimeTrigger().alignedTo(new Duration(10), new Instant(5));
 
     assertEquals(new Instant(100), size10.computeTargetTimestamp(new Instant(100)));
@@ -46,7 +46,7 @@ public void testAlignTo() {
     assertEquals(new Instant(115), size10offset5.computeTargetTimestamp(new Instant(110)));
   }
 
-  private static class TestTimeTrigger extends TimeTrigger<IntervalWindow, TestTimeTrigger> {
+  private static class TestTimeTrigger extends TimeTrigger<IntervalWindow> {
 
     private static final long serialVersionUID = 0L;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
index 83476dabffa81..8e29a4076b68b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
@@ -29,8 +29,8 @@ public class TriggerTest {
 
   @Test
   public void testTriggerToString() throws Exception {
-    assertEquals("AfterWatermark.pastEndOfWindow([])", AfterWatermark.pastEndOfWindow().toString());
-    assertEquals("Repeatedly(AfterWatermark.pastEndOfWindow([]))",
+    assertEquals("AfterWatermark.pastEndOfWindow()", AfterWatermark.pastEndOfWindow().toString());
+    assertEquals("Repeatedly(AfterWatermark.pastEndOfWindow())",
         Repeatedly.forever(AfterWatermark.pastEndOfWindow()).toString());
   }
 }

From 6a5dc93b91a092308b66d9b94b3749fa387aef76 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 5 Aug 2015 20:18:39 -0700
Subject: [PATCH 0904/1541] Remove dead code from WorkExecutorTest

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99988881
---
 .../cloud/dataflow/sdk/util/common/worker/WorkExecutorTest.java  | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutorTest.java
index 249e42c626c71..918ac4ad7840c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutorTest.java
@@ -35,7 +35,6 @@
 @RunWith(JUnit4.class)
 public class WorkExecutorTest {
   private WorkExecutor mapWorker;
-  private WorkExecutor seqMapWorker;
 
   @Before
   public void setUp() {

From 31fdc1524a42975224c13e2cbb5a2b7ff8f2a085 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 5 Aug 2015 21:15:00 -0700
Subject: [PATCH 0905/1541] Add @Override to a bunch of methods which should
 have been tagged

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99993357
---
 .../examples/complete/AutoCompleteTest.java   |  2 +
 .../dataflow/sdk/io/UnboundedSource.java      |  2 +
 .../sdk/runners/worker/TextReader.java        |  5 --
 .../sdk/util/BoundedQueueExecutor.java        |  1 +
 .../sdk/util/common/TaggedReiteratorList.java |  7 ++
 .../worker/PartialGroupByKeyOperation.java    | 16 ++++
 .../dataflow/sdk/values/POutputValueBase.java |  1 +
 .../cloud/dataflow/sdk/values/PValueBase.java |  1 +
 .../sdk/runners/dataflow/CountingSource.java  |  4 +
 .../dataflow/sdk/transforms/CombineTest.java  |  6 ++
 .../util/common/TaggedReiteratorListTest.java | 11 +++
 .../PartialGroupByKeyOperationTest.java       | 84 +++++++++++--------
 12 files changed, 101 insertions(+), 39 deletions(-)

diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
index ca996e9a2d13c..cb5f9d2be4a70 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
@@ -90,6 +90,7 @@ public void testAutoComplete() {
                         new SerializableFunction<KV<String, List<CompletionCandidate>>, Boolean>() {
                           private static final long serialVersionUID = 0;
 
+                          @Override
                           public Boolean apply(KV<String, List<CompletionCandidate>> element) {
                             return element.getKey().length() <= 2;
                           }
@@ -173,6 +174,7 @@ private static class ReifyTimestamps<T>
       extends PTransform<PCollection<TimestampedValue<T>>, PCollection<T>> {
     private static final long serialVersionUID = 0;
 
+    @Override
     public PCollection<T> apply(PCollection<TimestampedValue<T>> input) {
       return input.apply(ParDo.of(new DoFn<TimestampedValue<T>, T>() {
         private static final long serialVersionUID = 0;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
index d2beeab90ff1e..06834f48151fe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
@@ -136,6 +136,7 @@ public abstract static class UnboundedReader<OutputT> extends Source.Reader<Outp
      * called again on the same {@code UnboundedReader} object; it will only be called again when a
      * new reader object is constructed for the same source, e.g. on recovery.
      */
+    @Override
     public abstract boolean start() throws IOException;
 
     /**
@@ -145,6 +146,7 @@ public abstract static class UnboundedReader<OutputT> extends Source.Reader<Outp
      * available. Future calls to {@link #advance} may return {@code true} once more data is
      * available.
      */
+    @Override
     public abstract boolean advance() throws IOException;
 
     /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
index 7ffcb973be02e..1d321c3c33c0d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
@@ -310,11 +310,6 @@ public byte lastByteRead() {
       return buf[pos];
     }
 
-    public int bytesBuffered() {
-      assert end >= start : end + " must be >= " + start;
-      return end - start;
-    }
-
     /**
      * Copies data from the input buffer to the output buffer.
      *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java
index f1c3f2ae0dd2d..7569a73bf7f13 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java
@@ -33,6 +33,7 @@ private static class ReducableSemaphore extends Semaphore {
       super(permits);
     }
 
+    @Override
     public void reducePermits(int permits) {
       super.reducePermits(permits);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java
index 3cd34e67b1a36..12ff568dfce5a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java
@@ -87,15 +87,22 @@ private PeekingReiterator<Object> getStart(int tag) {
   private static final PeekingReiterator<Object> EMPTY_TAIL =
       new PeekingReiterator<Object>(
           new Reiterator<Object>() {
+            @Override
             public boolean hasNext() {
               return false;
             }
+
+            @Override
             public Object next() {
               throw new NoSuchElementException();
             }
+
+            @Override
             public void remove() {
               throw new IllegalArgumentException();
             }
+
+            @Override
             public Reiterator<Object> copy() {
               throw new IllegalArgumentException();
             }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
index a21297913f4b1..f1a851721b0bd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
@@ -391,15 +391,23 @@ public GroupingTableEntry<K, V, List<V>> createTableEntry(final K key) throws Ex
       return new GroupingTableEntry<K, V, List<V>>() {
         long size = keySizer.estimateSize(key);
         final List<V> values = new ArrayList<>();
+
+        @Override
         public K getKey() {
           return key;
         }
+
+        @Override
         public List<V> getValue() {
           return values;
         }
+
+        @Override
         public long getSize() {
           return size;
         }
+
+        @Override
         public void add(V value) throws Exception {
           values.add(value);
           size += BYTES_PER_JVM_WORD + valueSizer.estimateSize(value);
@@ -436,15 +444,23 @@ public GroupingTableEntry<K, InputT, AccumT> createTableEntry(final K key) throw
         final long keySize = keySizer.estimateSize(key);
         AccumT accumulator = combiner.createAccumulator(key);
         long accumulatorSize = 0; // never used before a value is added...
+
+        @Override
         public K getKey() {
           return key;
         }
+
+        @Override
         public AccumT getValue() {
           return accumulator;
         }
+
+        @Override
         public long getSize() {
           return keySize + accumulatorSize;
         }
+
+        @Override
         public void add(InputT value) throws Exception {
           accumulator = combiner.add(key, accumulator, value);
           accumulatorSize = accumulatorSizer.estimateSize(accumulator);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
index 8fab7dcba0ebb..169d34814d418 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
@@ -91,6 +91,7 @@ public void recordAsOutput(AppliedPTransform<?, ?, ?> transform) {
    * to do nothing. Override if your {@link PValue} requires
    * finalization.
    */
+  @Override
   public void finishSpecifyingOutput() { }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
index 5a2264b12c2ac..514dad72c2f51 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
@@ -48,6 +48,7 @@ public abstract class PValueBase extends POutputValueBase implements PValue {
    *
    * @throws IllegalStateException if the name hasn't been set yet
    */
+  @Override
   public String getName() {
     if (name == null) {
       throw new IllegalStateException("name not set");
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
index 759aefe07ae03..681a95781e57e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
@@ -33,6 +33,7 @@
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.ThreadLocalRandom;
+
 import javax.annotation.Nullable;
 
 /**
@@ -78,6 +79,7 @@ public int getShardNumber() {
     return shardNumber;
   }
 
+  @Override
   public List<CountingSource> generateInitialSplits(int desiredNumSplits, PipelineOptions options) {
     List<CountingSource> splits = new ArrayList<>();
     for (int i = 0; i < desiredNumSplits; i++) {
@@ -108,6 +110,7 @@ public Coder<CounterMark> getCheckpointMarkCoder() {
         new DelegateCoder.CodingFunction<CounterMark, Integer>() {
           private static final long serialVersionUID = 0L;
 
+          @Override
           public Integer apply(CounterMark input) {
             return input.current;
           }
@@ -115,6 +118,7 @@ public Integer apply(CounterMark input) {
         new DelegateCoder.CodingFunction<Integer, CounterMark>() {
           private static final long serialVersionUID = 0L;
 
+          @Override
           public CounterMark apply(Integer input) {
             return new CounterMark(input);
           }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index e1b965cf3b99e..973d5578516c8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -423,12 +423,16 @@ public void testBinaryCombineFnWithNulls() {
   }
 
   private static final class TestProdInt extends Combine.BinaryCombineIntegerFn {
+    @Override
     public int apply(int left, int right) {
       return left * right;
     }
+
+    @Override
     public int identity() {
       return 1;
     }
+
     @Override
     public Counter<Integer> getCounter(String name) {
       throw new UnsupportedOperationException();
@@ -436,6 +440,7 @@ public Counter<Integer> getCounter(String name) {
   }
 
   private static final class TestProdObj extends Combine.BinaryCombineFn<Integer> {
+    @Override
     public Integer apply(Integer left, Integer right) {
       return left * right;
     }
@@ -445,6 +450,7 @@ public Integer apply(Integer left, Integer right) {
    * Computes the product, considering null values to be 2.
    */
   private static final class NullCombiner extends Combine.BinaryCombineFn<Integer> {
+    @Override
     public Integer apply(Integer left, Integer right) {
       return (left == null ? 2 : left) * (right == null ? 2 : right);
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorListTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorListTest.java
index 1bdc420cdb5d9..6cd38111489b4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorListTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorListTest.java
@@ -174,15 +174,23 @@ private TestReiterator(TaggedValue[] values, int pos) {
       this.values = values;
       this.pos = pos;
     }
+
+    @Override
     public boolean hasNext() {
       return pos < values.length;
     }
+
+    @Override
     public TaggedValue next() {
       return values[pos++];
     }
+
+    @Override
     public void remove() {
       throw new IllegalArgumentException();
     }
+
+    @Override
     public TestReiterator copy() {
       return new TestReiterator(values, pos);
     }
@@ -190,9 +198,12 @@ public TestReiterator copy() {
 
   private static class TaggedValueExtractor
       implements TaggedReiteratorList.TagExtractor<TaggedValue> {
+    @Override
     public int getTag(TaggedValue elem) {
       return elem.tag;
     }
+
+    @Override
     public String getValue(TaggedValue elem) {
       return elem.value;
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
index 3e82f44cb6c4d..7bee96910990d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
@@ -144,23 +144,31 @@ public void testRunPartialGroupByKeyOperationWithCombiner() throws Exception {
 
     Combiner<WindowedValue<String>, Integer, Integer, Integer> combineFn =
         new Combiner<WindowedValue<String>, Integer, Integer, Integer>() {
-          public Integer createAccumulator(WindowedValue<String> key) {
-            return 0;
-          }
-          public Integer add(WindowedValue<String> key, Integer accumulator, Integer value) {
-            return accumulator + value;
-          }
-          public Integer merge(WindowedValue<String> key, Iterable<Integer> accumulators) {
-            Integer sum = 0;
-            for (Integer part : accumulators) {
-              sum += part;
-            }
-            return sum;
-          }
-          public Integer extract(WindowedValue<String> key, Integer accumulator) {
-            return accumulator;
-          }
-        };
+
+      @Override
+      public Integer createAccumulator(WindowedValue<String> key) {
+        return 0;
+      }
+
+      @Override
+      public Integer add(WindowedValue<String> key, Integer accumulator, Integer value) {
+        return accumulator + value;
+      }
+
+      @Override
+      public Integer merge(WindowedValue<String> key, Iterable<Integer> accumulators) {
+        Integer sum = 0;
+        for (Integer part : accumulators) {
+          sum += part;
+        }
+        return sum;
+      }
+
+      @Override
+      public Integer extract(WindowedValue<String> key, Integer accumulator) {
+        return accumulator;
+      }
+    };
 
     PartialGroupByKeyOperation pgbkOperation =
         new PartialGroupByKeyOperation(
@@ -298,23 +306,31 @@ public void testBufferingGroupingTable() throws Exception {
   public void testCombiningGroupingTable() throws Exception {
     Combiner<Object, Integer, Long, Long> summingCombineFn =
         new Combiner<Object, Integer, Long, Long>() {
-          public Long createAccumulator(Object key) {
-            return 0L;
-          }
-          public Long add(Object key, Long accumulator, Integer value) {
-            return accumulator + value;
-          }
-          public Long merge(Object key, Iterable<Long> accumulators) {
-            long sum = 0;
-            for (Long part : accumulators) {
-              sum += part;
-            }
-            return sum;
-          }
-          public Long extract(Object key, Long accumulator) {
-            return accumulator;
-          }
-        };
+
+      @Override
+      public Long createAccumulator(Object key) {
+        return 0L;
+      }
+
+      @Override
+      public Long add(Object key, Long accumulator, Integer value) {
+        return accumulator + value;
+      }
+
+      @Override
+      public Long merge(Object key, Iterable<Long> accumulators) {
+        long sum = 0;
+        for (Long part : accumulators) {
+          sum += part;
+        }
+        return sum;
+      }
+
+      @Override
+      public Long extract(Object key, Long accumulator) {
+        return accumulator;
+      }
+    };
 
     CombiningGroupingTable<String, Integer, Long> table =
         new CombiningGroupingTable<String, Integer, Long>(

From 8cbe53fb777c6faf37bcb0e509c368279c4fe397 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 5 Aug 2015 21:53:15 -0700
Subject: [PATCH 0906/1541] Minor clean up of StateSamplerTest

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99997882
---
 .../util/common/worker/StateSamplerTest.java  | 115 +++++++++---------
 1 file changed, 56 insertions(+), 59 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
index 2dc694a7cd9b7..d0b366641a671 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
@@ -40,93 +40,90 @@ public static long getCounterLongValue(CounterSet counters, String name) {
   public void basicTest() throws InterruptedException {
     CounterSet counters = new CounterSet();
     long periodMs = 50;
-    StateSampler stateSampler = new StateSampler("test-",
-        counters.getAddCounterMutator(), periodMs);
+    try (StateSampler stateSampler =
+        new StateSampler("test-", counters.getAddCounterMutator(), periodMs)) {
 
-    int state1 = stateSampler.stateForName("1");
-    int state2 = stateSampler.stateForName("2");
+      int state1 = stateSampler.stateForName("1");
+      int state2 = stateSampler.stateForName("2");
 
-    try (StateSampler.ScopedState s1 =
-      stateSampler.scopedState(state1)) {
-      assert s1 != null;
-      Thread.sleep(2 * periodMs);
-    }
-
-    try (StateSampler.ScopedState s2 =
-      stateSampler.scopedState(state2)) {
-      assert s2 != null;
-      Thread.sleep(3 * periodMs);
-    }
+      try (StateSampler.ScopedState s1 =
+          stateSampler.scopedState(state1)) {
+        assert s1 != null;
+        Thread.sleep(2 * periodMs);
+      }
 
-    long s1 = getCounterLongValue(counters, "test-1-msecs");
-    long s2 = getCounterLongValue(counters, "test-2-msecs");
+      try (StateSampler.ScopedState s2 =
+          stateSampler.scopedState(state2)) {
+        assert s2 != null;
+        Thread.sleep(3 * periodMs);
+      }
 
-    long toleranceMs = periodMs;
-    assertTrue(s1 + s2 >= 4 * periodMs - toleranceMs);
-    assertTrue(s1 + s2 <= 10 * periodMs + toleranceMs);
+      long s1 = getCounterLongValue(counters, "test-1-msecs");
+      long s2 = getCounterLongValue(counters, "test-2-msecs");
 
-    stateSampler.close();
+      long toleranceMs = periodMs;
+      assertTrue(s1 + s2 >= 4 * periodMs - toleranceMs);
+      assertTrue(s1 + s2 <= 10 * periodMs + toleranceMs);
+    }
   }
 
   @Test
   public void nestingTest() throws InterruptedException {
     CounterSet counters = new CounterSet();
     long periodMs = 50;
-    StateSampler stateSampler = new StateSampler("test-",
-        counters.getAddCounterMutator(), periodMs);
+    try (StateSampler stateSampler =
+        new StateSampler("test-", counters.getAddCounterMutator(), periodMs)) {
 
-    int state1 = stateSampler.stateForName("1");
-    int state2 = stateSampler.stateForName("2");
-    int state3 = stateSampler.stateForName("3");
+      int state1 = stateSampler.stateForName("1");
+      int state2 = stateSampler.stateForName("2");
+      int state3 = stateSampler.stateForName("3");
 
-    try (StateSampler.ScopedState s1 =
-        stateSampler.scopedState(state1)) {
-      assert s1 != null;
-      Thread.sleep(2 * periodMs);
-
-      try (StateSampler.ScopedState s2 =
-          stateSampler.scopedState(state2)) {
-        assert s2 != null;
+      try (StateSampler.ScopedState s1 =
+          stateSampler.scopedState(state1)) {
+        assert s1 != null;
         Thread.sleep(2 * periodMs);
 
-        try (StateSampler.ScopedState s3 =
-            stateSampler.scopedState(state3)) {
-          assert s3 != null;
+        try (StateSampler.ScopedState s2 =
+            stateSampler.scopedState(state2)) {
+          assert s2 != null;
           Thread.sleep(2 * periodMs);
+
+          try (StateSampler.ScopedState s3 =
+              stateSampler.scopedState(state3)) {
+            assert s3 != null;
+            Thread.sleep(2 * periodMs);
+          }
+          Thread.sleep(periodMs);
         }
         Thread.sleep(periodMs);
       }
-      Thread.sleep(periodMs);
-    }
 
-    long s1 = getCounterLongValue(counters, "test-1-msecs");
-    long s2 = getCounterLongValue(counters, "test-2-msecs");
-    long s3 = getCounterLongValue(counters, "test-3-msecs");
+      long s1 = getCounterLongValue(counters, "test-1-msecs");
+      long s2 = getCounterLongValue(counters, "test-2-msecs");
+      long s3 = getCounterLongValue(counters, "test-3-msecs");
 
-    long toleranceMs = periodMs;
-    assertTrue(s1 + s2 + s3 >= 4 * periodMs - toleranceMs);
-    assertTrue(s1 + s2 + s3 <= 16 * periodMs + toleranceMs);
-
-    stateSampler.close();
+      long toleranceMs = periodMs;
+      assertTrue(s1 + s2 + s3 >= 4 * periodMs - toleranceMs);
+      assertTrue(s1 + s2 + s3 <= 16 * periodMs + toleranceMs);
+    }
   }
 
   @Test
   public void nonScopedTest() throws InterruptedException {
     CounterSet counters = new CounterSet();
     long periodMs = 50;
-    StateSampler stateSampler = new StateSampler("test-",
-        counters.getAddCounterMutator(), periodMs);
-
-    int state1 = stateSampler.stateForName("1");
-    int previousState = stateSampler.setState(state1);
-    Thread.sleep(2 * periodMs);
-    stateSampler.setState(previousState);
-    long tolerance = periodMs;
-    long s = getCounterLongValue(counters, "test-1-msecs");
+    try (StateSampler stateSampler = new StateSampler("test-",
+        counters.getAddCounterMutator(), periodMs)) {
 
-    assertTrue(s >= periodMs - tolerance);
-    assertTrue(s <= 4 * periodMs + tolerance);
+      int state1 = stateSampler.stateForName("1");
+      int previousState = stateSampler.setState(state1);
+      Thread.sleep(2 * periodMs);
+      stateSampler.setState(previousState);
+      long tolerance = periodMs;
+      long s = getCounterLongValue(counters, "test-1-msecs");
 
-    stateSampler.close();
+      assertTrue(s >= periodMs - tolerance);
+      assertTrue(s <= 4 * periodMs + tolerance);
+    }
   }
 }

From e13e59b4beaa769b0d861bda1cb93fc234ef05c8 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 5 Aug 2015 22:04:07 -0700
Subject: [PATCH 0907/1541] Fix some unused code warnings

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=99999962
---
 .../cookbook/CombinePerKeyExamplesTest.java   |  6 +--
 .../sdk/runners/worker/AvroSinkFactory.java   |  1 +
 .../sdk/runners/worker/PubsubSink.java        |  1 +
 .../sdk/runners/worker/ReaderFactory.java     |  4 +-
 .../dataflow/sdk/testing/DataflowAssert.java  | 45 -------------------
 .../sdk/testing/WindowFnTestUtils.java        | 11 -----
 .../dataflow/sdk/util/common/Counter.java     |  4 +-
 .../dataflow/sdk/io/CompressedSourceTest.java |  1 -
 .../cloud/dataflow/sdk/io/XmlSourceTest.java  |  1 +
 .../runners/worker/AvroByteReaderTest.java    |  3 +-
 .../dataflow/sdk/transforms/ParDoTest.java    |  1 +
 .../sdk/util/common/ReflectHelpersTest.java   | 20 +++------
 .../util/state/WindmillStateReaderTest.java   | 10 -----
 13 files changed, 15 insertions(+), 93 deletions(-)

diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamplesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamplesTest.java
index ad872a43ed089..fe4823d0994f2 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamplesTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamplesTest.java
@@ -58,7 +58,8 @@ public class CombinePerKeyExamplesTest {
       "king_lear,macbeth");
   private static final KV<String, String> combinedTuple2 = KV.of("snuffleupaguses", "king_lear");
 
-  static final KV[] COMBINED_TUPLES_ARRAY = new KV[] {
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  static final KV<String, String>[] COMBINED_TUPLES_ARRAY = new KV[] {
     combinedTuple1, combinedTuple2
   };
 
@@ -75,7 +76,7 @@ public void testExtractLargeWordsFn() {
     List<KV<String, String>> results = extractLargeWordsFn.processBatch(ROWS_ARRAY);
     Assert.assertThat(results, CoreMatchers.hasItem(tuple1));
     Assert.assertThat(results, CoreMatchers.hasItem(tuple2));
-    Assert.assertThat(results, CoreMatchers.hasItem(tuple2));
+    Assert.assertThat(results, CoreMatchers.hasItem(tuple3));
   }
 
   @Test
@@ -86,5 +87,4 @@ public void testFormatShakespeareOutputFn() {
     Assert.assertThat(results, CoreMatchers.hasItem(resultRow1));
     Assert.assertThat(results, CoreMatchers.hasItem(resultRow2));
   }
-
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
index c4b68eb8f2997..fcc7f2d48e0e5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
@@ -35,6 +35,7 @@ public final class AvroSinkFactory {
   // Do not instantiate.
   private AvroSinkFactory() {}
 
+  @SuppressWarnings("unused")
   public static <T> Sink<T> create(PipelineOptions options,
                                    CloudObject spec,
                                    Coder<T> coder,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
index 0ffaeed68b1e6..bc5cd1692d615 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
@@ -57,6 +57,7 @@ class PubsubSink<T> extends Sink<WindowedValue<T>> {
     this.context = context;
   }
 
+  @SuppressWarnings("unused")
   public static <T> PubsubSink<T> create(PipelineOptions options,
                                          CloudObject spec,
                                          Coder<WindowedValue<T>> coder,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
index dd3272213299a..4c77c1c2673d6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
@@ -107,9 +107,7 @@ public static <T> Reader<T> create(
       coder = Serializer.deserialize(cloudSource.getCodec(), Coder.class);
     }
     try {
-      return InstanceBuilder.ofType(new TypeDescriptor<Reader<T>>() {
-          private static final long serialVersionUID = 0;
-      })
+      return InstanceBuilder.ofType(new TypeDescriptor<Reader<T>>() {})
           .fromClassName(sourceFactoryClassName)
           .fromFactoryMethod("create")
           .withArg(PipelineOptions.class, options)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 2cecd12e86771..e8940f8a6e1f9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.testing;
 
-import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertThat;
@@ -657,36 +656,6 @@ public Void apply(Iterable<T> actual) {
     }
   }
 
-  /**
-   * A {@link SerializableFunction} that verifies that an {@code Iterable} contains
-   * the expected items in the provided order.
-   */
-  @SuppressWarnings("serial")
-  private static class AssertContainsInOrder<T> implements SerializableFunction<Iterable<T>, Void> {
-    private T[] expected;
-
-    @SafeVarargs
-    public AssertContainsInOrder(T... expected) {
-      this.expected = expected;
-    }
-
-    @SuppressWarnings("unchecked")
-    public AssertContainsInOrder(Collection<T> expected) {
-      this((T[]) expected.toArray());
-    }
-
-    @SuppressWarnings("unchecked")
-    public AssertContainsInOrder(Iterable<T> expected) {
-      this(Lists.newArrayList(expected));
-    }
-
-    @Override
-    public Void apply(Iterable<T> actual) {
-      assertThat(actual, contains(expected));
-      return null;
-    }
-  }
-
   ////////////////////////////////////////////////////////////
 
   /**
@@ -725,18 +694,4 @@ public SerializableFunction<Iterable<T>, Void> assertFor(Iterable<T> expectedEle
       return new AssertContainsInAnyOrder<T>(expectedElements);
     }
   }
-
-  /**
-   * A {@code AssertRelation} implementating the binary function that two iterables have equal
-   * contents, in the same order.
-   */
-  private static class AssertContainsInOrderRelation<T>
-      implements AssertRelation<Iterable<T>, Iterable<T>> {
-    private static final long serialVersionUID = 0;
-
-    @Override
-    public SerializableFunction<Iterable<T>, Void> assertFor(Iterable<T> expectedElements) {
-      return new AssertContainsInOrder<T>(expectedElements);
-    }
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
index 18256f17f7bbc..c52580bdbe870 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
@@ -143,7 +143,6 @@ public void merge(Collection<W> toBeMerged, W mergeResult) {
   private static class TestWindowSet<W extends BoundedWindow, V> {
 
     private Map<W, Set<V>> elements = new HashMap<>();
-    private List<Set<V>> emitted = new ArrayList<>();
 
     public void put(W window, V value) {
       Set<V> all = elements.get(window);
@@ -154,10 +153,6 @@ public void put(W window, V value) {
       all.add(value);
     }
 
-    public void remove(W window) {
-      elements.remove(window);
-    }
-
     public void merge(Collection<W> otherWindows, W window) {
       if (otherWindows.isEmpty()) {
         return;
@@ -176,16 +171,10 @@ public void merge(Collection<W> otherWindows, W window) {
       elements.put(window, merged);
     }
 
-    public void markCompleted(W window) {}
-
     public Collection<W> windows() {
       return elements.keySet();
     }
 
-    public boolean contains(W window) {
-      return elements.containsKey(window);
-    }
-
     // For testing.
 
     public Set<V> get(W window) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
index a9179bf92c83d..32acfef28dd41 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
@@ -222,9 +222,7 @@ public AggregationKind getKind() {
    * Returns the counter's type.
    */
   public Class<?> getType() {
-    return new TypeDescriptor<T>(getClass()) {
-      private static final long serialVersionUID = 0;
-    }.getRawType();
+    return new TypeDescriptor<T>(getClass()) {}.getRawType();
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
index 8193b89e41376..7b81720bc9d16 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
@@ -208,7 +208,6 @@ public Coder<Byte> getDefaultOutputCoder() {
     }
 
     private static class ByteReader extends FileBasedReader<Byte> {
-      private static final long serialVersionUID = 0;
       ByteBuffer buff = ByteBuffer.allocate(1);
       Byte current;
       long offset = 0;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
index 85eeb37eacee3..0f1298d29436c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
@@ -465,6 +465,7 @@ public void testReadXMLIncorrectRecordElement() throws IOException {
 
   @XmlRootElement
   private static class WrongTrainType {
+    @SuppressWarnings("unused")
     public String something;
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
index 491585b7311f9..a756ec26bcb40 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
@@ -127,8 +127,7 @@ private <T> void runTestRead(List<List<T>> elemsList, Coder<T> coder, boolean re
   private <T> List<T> readElems(String filename, @Nullable Long startOffset,
       @Nullable Long endOffset, Coder<T> coder, List<Integer> actualSizes) throws Exception {
     AvroByteReader<T> avroReader = new AvroByteReader<>(filename, startOffset, endOffset, coder);
-    ExecutorTestUtils.TestReaderObserver observer =
-        new ExecutorTestUtils.TestReaderObserver(avroReader, actualSizes);
+    new ExecutorTestUtils.TestReaderObserver(avroReader, actualSizes);
 
     List<T> actualElems = new ArrayList<>();
     try (Reader.ReaderIterator<T> iterator = avroReader.iterator()) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 311f1b3c685e4..1ea9c2cbe5f97 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -743,6 +743,7 @@ public static TestDummyCoder of() {
       return INSTANCE;
     }
 
+    @SuppressWarnings("unused") // used to create a CoderFactory
     public static List<Object> getInstanceComponents(TestDummy exampleValue) {
       return Collections.emptyList();
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java
index 002ffbe7fe71b..f7d678ba8e185 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java
@@ -101,36 +101,26 @@ public void testTypeFormatterOnArrays() throws Exception {
   public void testTypeFormatterWithGenerics() throws Exception {
     assertEquals("Map<Integer, String>",
         ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(
-            new TypeDescriptor<Map<Integer, String>>() {
-              private static final long serialVersionUID = 0;
-            }.getType()));
+            new TypeDescriptor<Map<Integer, String>>() {}.getType()));
     assertEquals("Map<?, String>",
         ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(
-            new TypeDescriptor<Map<?, String>>() {
-              private static final long serialVersionUID = 0;
-            }.getType()));
+            new TypeDescriptor<Map<?, String>>() {}.getType()));
     assertEquals("Map<? extends Integer, String>",
         ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(
-            new TypeDescriptor<Map<? extends Integer, String>>() {
-              private static final long serialVersionUID = 0;
-            }.getType()));
+            new TypeDescriptor<Map<? extends Integer, String>>() {}.getType()));
   }
 
   @Test
   public <T> void testTypeFormatterWithWildcards() throws Exception {
     assertEquals("Map<T, T>",
         ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(
-            new TypeDescriptor<Map<T, T>>() {
-              private static final long serialVersionUID = 0;
-            }.getType()));
+            new TypeDescriptor<Map<T, T>>() {}.getType()));
   }
 
   @Test
   public <InputT, OutputT> void testTypeFormatterWithMultipleWildcards() throws Exception {
     assertEquals("Map<? super InputT, ? extends OutputT>",
         ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(
-            new TypeDescriptor<Map<? super InputT, ? extends OutputT>>() {
-              private static final long serialVersionUID = 0;
-            }.getType()));
+            new TypeDescriptor<Map<? super InputT, ? extends OutputT>>() {}.getType()));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java
index d08eda853effd..b0506f43f86af 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java
@@ -88,16 +88,6 @@ private Windmill.Value intValue(int value, boolean padded) throws IOException {
         .build();
   }
 
-  private Windmill.Value watermarkValue(Instant timestamp)  {
-    byte[] zero = {0x0};
-    ByteString zeroByteString = ByteString.copyFrom(zero);
-
-    return Windmill.Value.newBuilder()
-        .setData(zeroByteString)
-        .setTimestamp(TimeUnit.MILLISECONDS.toMicros(timestamp.getMillis()))
-        .build();
-  }
-
   @Test
   public void testReadList() throws Exception {
     Future<Iterable<Integer>> future = underTest.listFuture(STATE_KEY_1, STATE_FAMILY, INT_CODER);

From 84b2b7664ccb09e871b3e49ee3decb9a19b7d623 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 6 Aug 2015 08:15:28 -0700
Subject: [PATCH 0908/1541] Eliminate warnings in DirectModeExecutionContext

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100031494
---
 .../sdk/util/DirectModeExecutionContext.java     | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
index 4c3f2534f8882..8eda365f9e53d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
@@ -32,8 +32,8 @@
  */
 public class DirectModeExecutionContext extends BatchModeExecutionContext {
 
-  private List<ValueWithMetadata> output = new ArrayList<>();
-  private Map<TupleTag<?>, List<ValueWithMetadata>> sideOutputs = new HashMap<>();
+  private List<ValueWithMetadata<?>> output = new ArrayList<>();
+  private Map<TupleTag<?>, List<ValueWithMetadata<?>>> sideOutputs = new HashMap<>();
 
   protected DirectModeExecutionContext() {}
 
@@ -62,7 +62,7 @@ public void noteOutput(WindowedValue<?> outputElem) {
 
   @Override
   public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> outputElem) {
-    List<ValueWithMetadata> output = sideOutputs.get(tag);
+    List<ValueWithMetadata<?>> output = sideOutputs.get(tag);
     if (output == null) {
       output = new ArrayList<>();
       sideOutputs.put(tag, output);
@@ -70,13 +70,17 @@ public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> outputElem) {
     output.add(ValueWithMetadata.of(outputElem).withKey(getKey()));
   }
 
-  public <T> List<ValueWithMetadata<T>> getOutput(TupleTag<T> tag) {
-    return (List) output;
+  public <T> List<ValueWithMetadata<T>> getOutput(@SuppressWarnings("unused") TupleTag<T> tag) {
+    @SuppressWarnings({"unchecked", "rawtypes"}) // Cast not expressible without rawtypes
+    List<ValueWithMetadata<T>> typedOutput = (List) output;
+    return typedOutput;
   }
 
   public <T> List<ValueWithMetadata<T>> getSideOutput(TupleTag<T> tag) {
     if (sideOutputs.containsKey(tag)) {
-      return (List) sideOutputs.get(tag);
+      @SuppressWarnings({"unchecked", "rawtypes"}) // Cast not expressible without rawtypes
+      List<ValueWithMetadata<T>> typedOutput = (List) sideOutputs.get(tag);
+      return typedOutput;
     } else {
       return new ArrayList<>();
     }

From e872e0b55b131df432bdf3d34879a78dfdbba1c0 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 6 Aug 2015 10:07:45 -0700
Subject: [PATCH 0909/1541] TrafficMaxLaneFlow: fix typo

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100040714
---
 .../cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
index 3f18aeb23631b..6cda9ceeea8ed 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
@@ -156,7 +156,7 @@ public Integer getTotalFlow() {
   /**
    * Extract flow information for each of the 8 lanes in a reading, and output as separate tuples.
    * This will let us determine which lane has the max flow for that station over the span of the
-   * window, and output not only the max flow from that calculcation, but other associated
+   * window, and output not only the max flow from that calculation, but other associated
    * information. The number of lanes for which data is present depends upon which freeway the data
    * point comes from.
    */

From e73989824130ba33079b04475a0c46daecf5d626 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Thu, 6 Aug 2015 10:56:59 -0700
Subject: [PATCH 0910/1541] Remove warnings about never-thrown declared
 exceptions

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100045945
---
 .../runners/worker/ChunkingShuffleBatchReader.java |  4 ++--
 .../sdk/runners/worker/GroupingShuffleReader.java  |  5 +++--
 .../runners/worker/PartitioningShuffleReader.java  |  3 ++-
 .../dataflow/sdk/runners/worker/ShuffleSink.java   |  9 ++++-----
 .../runners/worker/SourceOperationExecutor.java    | 14 +++-----------
 .../sdk/runners/worker/UngroupedShuffleReader.java |  7 ++++---
 .../sdk/runners/DataflowPipelineRunnerTest.java    |  2 +-
 .../runners/DataflowPipelineTranslatorTest.java    |  3 +--
 .../dataflow/sdk/runners/PipelineRunnerTest.java   |  6 +++---
 .../dataflow/sdk/runners/TransformTreeTest.java    |  2 +-
 .../worker/DataflowWorkProgressUpdaterTest.java    |  8 ++++----
 11 files changed, 28 insertions(+), 35 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java
index 114e701f0684d..84c62aee848ba 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java
@@ -38,7 +38,7 @@ final class ChunkingShuffleBatchReader implements ShuffleBatchReader {
   /**
    * @param reader used to read from a shuffle dataset
    */
-  public ChunkingShuffleBatchReader(ShuffleReader reader) throws IOException {
+  public ChunkingShuffleBatchReader(ShuffleReader reader) {
     this.reader = reader;
   }
 
@@ -89,7 +89,7 @@ static byte[] getFixedLengthPrefixedByteArray(DataInputStream dataInputStream)
     if (length < 0) {
       throw new IOException("invalid length: " + length);
     }
-    byte[] data = new byte[(int) length];
+    byte[] data = new byte[length];
     ByteStreams.readFully(dataInputStream, data);
     return data;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index 394c95f5a53e6..bc553314631d6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -101,6 +101,8 @@ private void initCoder(Coder<WindowedValue<KV<K, Iterable<V>>>> coder) throws Ex
       throw new Exception("unexpected kind of coder for elements read from "
           + "a key-grouping shuffle: " + elemCoder);
     }
+
+    @SuppressWarnings("unchecked")
     KvCoder<K, Iterable<V>> kvCoder = (KvCoder<K, Iterable<V>>) elemCoder;
     this.keyCoder = kvCoder.getKeyCoder();
     Coder<Iterable<V>> kvValueCoder = kvCoder.getValueCoder();
@@ -112,8 +114,7 @@ private void initCoder(Coder<WindowedValue<KV<K, Iterable<V>>>> coder) throws Ex
     this.valueCoder = iterCoder.getElemCoder();
   }
 
-  final ReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> iterator(ShuffleEntryReader reader)
-      throws IOException {
+  final ReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> iterator(ShuffleEntryReader reader) {
     return new GroupingShuffleReaderIterator(reader);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
index bfb9d82806ee8..dc2001e1f5f6f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
@@ -70,6 +70,7 @@ private void initCoder(Coder<WindowedValue<KV<K, V>>> coder) throws Exception {
       throw new Exception("unexpected kind of coder for elements read from "
           + "a key-partitioning shuffle: " + elemCoder);
     }
+    @SuppressWarnings("unchecked")
     KvCoder<K, V> kvCoder = (KvCoder<K, V>) elemCoder;
     this.keyCoder = kvCoder.getKeyCoder();
     windowedValueCoder = windowedElemCoder.withValueCoder(kvCoder.getValueCoder());
@@ -82,7 +83,7 @@ public ReaderIterator<WindowedValue<KV<K, V>>> iterator() throws IOException {
         new ChunkingShuffleBatchReader(new ApplianceShuffleReader(shuffleReaderConfig))));
   }
 
-  ReaderIterator<WindowedValue<KV<K, V>>> iterator(ShuffleEntryReader reader) throws IOException {
+  ReaderIterator<WindowedValue<KV<K, V>>> iterator(ShuffleEntryReader reader) {
     return new PartitioningShuffleReaderIterator(reader);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
index 6ccf09df02be6..402c2c4ab4199 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
@@ -123,7 +123,7 @@ private void initCoder(Coder<WindowedValue<T>> coder) throws Exception {
         throw new Exception("unexpected kind of coder for elements written to "
             + "a key-grouping shuffle");
       }
-      KvCoder<?, ?> kvCoder = (KvCoder) elemCoder;
+      KvCoder<?, ?> kvCoder = (KvCoder<?, ?>) elemCoder;
       this.keyCoder = kvCoder.getKeyCoder();
       this.valueCoder = kvCoder.getValueCoder();
       if (sortValues) {
@@ -133,7 +133,7 @@ private void initCoder(Coder<WindowedValue<T>> coder) throws Exception {
           throw new Exception("unexpected kind of coder for values written to "
               + "a value-sorting shuffle");
         }
-        KvCoder<?, ?> kvValueCoder = (KvCoder) valueCoder;
+        KvCoder<?, ?> kvValueCoder = (KvCoder<?, ?>) valueCoder;
         this.sortKeyCoder = kvValueCoder.getKeyCoder();
         this.sortValueCoder = kvValueCoder.getValueCoder();
       } else {
@@ -160,8 +160,7 @@ private void initCoder(Coder<WindowedValue<T>> coder) throws Exception {
    * construct names of counters that track per-worker per-dataset
    * bytes written to shuffle.
    */
-  public SinkWriter<WindowedValue<T>> writer(ShuffleEntryWriter writer,
-                                             String datasetId) throws IOException {
+  public SinkWriter<WindowedValue<T>> writer(ShuffleEntryWriter writer, String datasetId) {
     return new ShuffleSinkWriter(writer, options, addCounterMutator, datasetId);
   }
 
@@ -179,7 +178,7 @@ class ShuffleSinkWriter implements SinkWriter<WindowedValue<T>> {
         ShuffleEntryWriter writer,
         PipelineOptions options,
         CounterSet.AddCounterMutator addCounterMutator,
-        String datasetId) throws IOException {
+        String datasetId) {
       this.writer = writer;
       DataflowWorkerHarnessOptions dataflowOptions =
           options.as(DataflowWorkerHarnessOptions.class);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
index ad14cb7dd4e82..7b67d53fc051a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
@@ -33,7 +33,6 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.UnsupportedEncodingException;
 import java.nio.charset.StandardCharsets;
 
 /**
@@ -89,19 +88,12 @@ public SourceOperationResponse getResponse() {
   }
 
   static boolean isSplitResponseTooLarge(SourceFormat.OperationResponse operationResponse) {
-    try {
-      if (isSplitOperationResponse(operationResponse)
-          && isSplitOperationTooLargeForDataflowService(operationResponse)) {
-        return true;
-      }
-    } catch (UnsupportedEncodingException e) {
-      LOG.error("Could not log the source operation warning due to: " + e.getMessage());
-    }
-    return false;
+    return isSplitOperationResponse(operationResponse)
+        && isSplitOperationTooLargeForDataflowService(operationResponse);
   }
 
   private static boolean isSplitOperationTooLargeForDataflowService(
-      SourceFormat.OperationResponse operationResponse) throws UnsupportedEncodingException {
+      SourceFormat.OperationResponse operationResponse) {
     SourceSplitResponse splitResponse =
         ((DataflowSourceOperationResponse) operationResponse).cloudResponse.getSplit();
     int size = splitResponse.toString().getBytes(StandardCharsets.UTF_8).length;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
index 379c36b3ba5f3..f7023edaf6a6f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
@@ -43,7 +43,8 @@ public class UngroupedShuffleReader<T> extends Reader<T> {
   final String stopShufflePosition;
   final Coder<T> coder;
 
-  public UngroupedShuffleReader(PipelineOptions options, byte[] shuffleReaderConfig,
+  public UngroupedShuffleReader(
+      @SuppressWarnings("unused") PipelineOptions options, byte[] shuffleReaderConfig,
       @Nullable String startShufflePosition, @Nullable String stopShufflePosition, Coder<T> coder) {
     this.shuffleReaderConfig = shuffleReaderConfig;
     this.startShufflePosition = startShufflePosition;
@@ -58,7 +59,7 @@ public ReaderIterator<T> iterator() throws IOException {
         new ChunkingShuffleBatchReader(new ApplianceShuffleReader(shuffleReaderConfig))));
   }
 
-  ReaderIterator<T> iterator(ShuffleEntryReader reader) throws IOException {
+  ReaderIterator<T> iterator(ShuffleEntryReader reader) {
     return new UngroupedShuffleReaderIterator(reader);
   }
 
@@ -69,7 +70,7 @@ ReaderIterator<T> iterator(ShuffleEntryReader reader) throws IOException {
   class UngroupedShuffleReaderIterator extends AbstractBoundedReaderIterator<T> {
     Iterator<ShuffleEntry> iterator;
 
-    UngroupedShuffleReaderIterator(ShuffleEntryReader reader) throws IOException {
+    UngroupedShuffleReaderIterator(ShuffleEntryReader reader) {
       this.iterator = reader.read(
           ByteArrayShufflePosition.fromBase64(startShufflePosition),
           ByteArrayShufflePosition.fromBase64(stopShufflePosition));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 8a3bb963514ab..0b472645897f4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -591,7 +591,7 @@ public void testProjectDescription() throws IOException {
   }
 
   @Test
-  public void testNoStagingLocationAndNoTempLocationFails() throws IOException {
+  public void testNoStagingLocationAndNoTempLocationFails() {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setRunner(DataflowPipelineRunner.class);
     options.setProject("foo-project");
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 943f143b6535c..f617dabb4fd1e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -105,8 +105,7 @@ public boolean matches(Object o) {
     }
   }
 
-  private DataflowPipeline buildPipeline(DataflowPipelineOptions options)
-      throws IOException {
+  private DataflowPipeline buildPipeline(DataflowPipelineOptions options) {
     DataflowPipeline p = DataflowPipeline.create(options);
 
     p.apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/object"))
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerTest.java
index 2a434d2283544..4b1f786dc6cad 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerTest.java
@@ -48,7 +48,7 @@ public void setUp() {
   }
 
   @Test
-  public void testLongName() throws Exception {
+  public void testLongName() {
     // Check we can create a pipeline runner using the full class name.
     DirectPipelineOptions options = PipelineOptionsFactory.as(DirectPipelineOptions.class);
     options.setAppName("test");
@@ -61,7 +61,7 @@ public void testLongName() throws Exception {
   }
 
   @Test
-  public void testShortName() throws Exception {
+  public void testShortName() {
     // Check we can create a pipeline runner using the short class name.
     DirectPipelineOptions options = PipelineOptionsFactory.as(DirectPipelineOptions.class);
     options.setAppName("test");
@@ -74,7 +74,7 @@ public void testShortName() throws Exception {
   }
 
   @Test
-  public void testAppNameDefault() throws Exception {
+  public void testAppNameDefault() {
     ApplicationNameOptions options = PipelineOptionsFactory.as(ApplicationNameOptions.class);
     Assert.assertEquals(PipelineRunnerTest.class.getSimpleName(),
         options.getAppName());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
index 0c1eb605a43ae..fa7411cfb5dbe 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
@@ -168,7 +168,7 @@ public void testOutputChecking() throws Exception {
   }
 
   @Test
-  public void testMultiGraphSetup() throws Exception {
+  public void testMultiGraphSetup() {
     Pipeline p = DirectPipeline.createForTest();
 
     PCollection<Integer> input = p.begin()
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
index 13c654ac1e219..b76ebba36fed3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -31,8 +31,8 @@
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
-import static org.mockito.Mockito.any;
-import static org.mockito.Mockito.argThat;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.argThat;
 import static org.mockito.Mockito.timeout;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.verifyNoMoreInteractions;
@@ -134,7 +134,7 @@ public void setWorkerProgress(ApproximateProgress progress) {
   private long nowMillis;
 
   @Before
-  public void initMocksAndWorkflowServiceAndWorkerAndWork() throws Exception {
+  public void initMocksAndWorkflowServiceAndWorkerAndWork() {
     MockitoAnnotations.initMocks(this);
 
     options = PipelineOptionsFactory.as(DataflowWorkerHarnessOptions.class);
@@ -334,7 +334,7 @@ private void setUpProgress(ApproximateProgress progress) {
 
   private WorkItemServiceState generateServiceState(long leaseExpirationTimestamp,
       int progressReportIntervalMs, Position suggestedStopPosition,
-      long nextReportIndex) throws Exception {
+      long nextReportIndex) {
     WorkItemServiceState responseState = new WorkItemServiceState();
     responseState.setFactory(Transport.getJsonFactory());
     responseState.setLeaseExpireTime(toCloudTime(new Instant(leaseExpirationTimestamp)));

From d85d5522b3570d866f6f6852c84af2f2fe153317 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Thu, 6 Aug 2015 11:27:28 -0700
Subject: [PATCH 0911/1541] Improvements to offset-based sources

* Renames ByteOffsetBased{Source,Reader} to simply
  OffsetBased{Source,Reader}, introducing getBytesPerOffset()
  as a translation layer.
* Extracts logic for updating the rangeTracker into the base
  class, rather than requiring subclasses to do it in their
  advance/start methods, which is error-prone.
* Pulls some parts of FileBased{Source,Reader} into OffsetBased
  classes because they are already relevant at that level of
  abstraction.
----Release Notes----
* Renames ByteOffsetBased{Source,Reader} to simply
  OffsetBased{Source,Reader}, introducing getBytesPerOffset()
  as a translation layer.
* When directly subclassing OffsetBasedReader, the subclass now
  has to override startImpl and advanceImpl, rather than start
  and advance. The protected variable rangeTracker is now hidden
  and updated by base class automatically. To indicate split points,
  use the method isAtSplitPoint().
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100049286
---
 .../cloud/dataflow/sdk/io/AvroSource.java     |   4 +-
 .../dataflow/sdk/io/FileBasedSource.java      |  53 ++-----
 ...asedSource.java => OffsetBasedSource.java} | 141 +++++++++++++-----
 .../cloud/dataflow/sdk/io/XmlSource.java      |   4 +-
 ...ceTest.java => OffsetBasedSourceTest.java} |  81 +++++-----
 5 files changed, 163 insertions(+), 120 deletions(-)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/io/{ByteOffsetBasedSource.java => OffsetBasedSource.java} (51%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/io/{ByteOffsetBasedSourceTest.java => OffsetBasedSourceTest.java} (76%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
index b1deceb6773af..0cbf8f7405491 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
@@ -154,7 +154,7 @@ public static <T> Read.Bounded<T> readFromFileWithClass(String filePattern, Clas
    * than {@code GenericRecord}.
    */
   public static AvroSource<GenericRecord> from(String fileNameOrPattern) {
-    return new AvroSource<GenericRecord>(
+    return new AvroSource<>(
         fileNameOrPattern, DEFAULT_MIN_BUNDLE_SIZE, null, GenericRecord.class, null, null);
   }
 
@@ -187,7 +187,7 @@ public <X> AvroSource<X> withSchema(Class<X> clazz) {
 
   /**
    * Returns an {@code AvroSource} that's like this one but uses the supplied minimum bundle size.
-   * Refer to {@link ByteOffsetBasedSource} for a description of {@code minBundleSize} and its use.
+   * Refer to {@link OffsetBasedSource} for a description of {@code minBundleSize} and its use.
    */
   public AvroSource<T> withMinBundleSize(long minBundleSize) {
     return new AvroSource<T>(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index 1b8be1b0eacef..9c3f230edbb48 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -39,7 +39,7 @@
  * file-based custom source.
  *
  * <p>A file-based {@code Source} is a {@code Source} backed by a file pattern defined as a Java
- * glob, a single file, or a offset range for a single file. See {@link ByteOffsetBasedSource} and
+ * glob, a single file, or a offset range for a single file. See {@link OffsetBasedSource} and
  * {@link com.google.cloud.dataflow.sdk.io.range.RangeTracker} for semantics of offset ranges.
  *
  * <p>This source stores a {@code String} that is an {@link IOChannelFactory} specification for a
@@ -54,7 +54,7 @@
  *
  * @param <T> Type of records represented by the source.
  */
-public abstract class FileBasedSource<T> extends ByteOffsetBasedSource<T> {
+public abstract class FileBasedSource<T> extends OffsetBasedSource<T> {
   private static final long serialVersionUID = 0;
   private static final Logger LOG = LoggerFactory.getLogger(FileBasedSource.class);
   private static final float FRACTION_OF_FILES_TO_STAT = 0.01f;
@@ -77,7 +77,7 @@ public enum Mode {
    * Create a {@code FileBaseSource} based on a file or a file pattern specification. This
    * constructor must be used when creating a new {@code FileBasedSource} for a file pattern.
    *
-   * <p> See {@link ByteOffsetBasedSource} for a detailed description of {@code minBundleSize}.
+   * <p> See {@link OffsetBasedSource} for a detailed description of {@code minBundleSize}.
    *
    * @param fileOrPatternSpec {@link IOChannelFactory} specification of file or file pattern
    *        represented by the {@link FileBasedSource}.
@@ -95,7 +95,7 @@ public FileBasedSource(String fileOrPatternSpec, long minBundleSize) {
    * Additionally, this constructor must be used to create new {@code FileBasedSource}s when
    * subclasses implement the method {@link #createForSubrangeOfFile}.
    *
-   * <p> See {@link ByteOffsetBasedSource} for detailed descriptions of {@code minBundleSize},
+   * <p> See {@link OffsetBasedSource} for detailed descriptions of {@code minBundleSize},
    * {@code startOffset}, and {@code endOffset}.
    *
    * @param fileName {@link IOChannelFactory} specification of the file represented by the
@@ -238,7 +238,7 @@ public final List<? extends FileBasedSource<T>> splitIntoBundles(long desiredBun
     } else {
       if (isSplittable()) {
         List<FileBasedSource<T>> splitResults = new ArrayList<>();
-        for (ByteOffsetBasedSource<T> split :
+        for (OffsetBasedSource<T> split :
             super.splitIntoBundles(desiredBundleSizeBytes, options)) {
           splitResults.add((FileBasedSource<T>) split);
         }
@@ -359,11 +359,6 @@ private static Collection<String> expandFilePattern(String fileOrPatternSpec) th
    * {@link SeekableByteChannel}, which may be used by subclass to traverse back in the channel to
    * determine the correct starting position.
    *
-   * <h2>Split Points</h2>
-   * File-based sources support the notion of <i>split points</i>, as defined in
-   * {@link com.google.cloud.dataflow.sdk.io.range.RangeTracker}, using
-   * {@link FileBasedReader#isAtSplitPoint}.
-   *
    * <h2>Reading Records</h2>
    *
    * <p>Sequential reading is implemented using {@link #readNextRecord}.
@@ -387,10 +382,8 @@ private static Collection<String> expandFilePattern(String fileOrPatternSpec) th
    * <p> Since this class implements {@link Source.Reader} it guarantees thread safety. Abstract
    * methods defined here will not be accessed by more than one thread concurrently.
    */
-  public abstract static class FileBasedReader<T> extends ByteOffsetBasedReader<T> {
+  public abstract static class FileBasedReader<T> extends OffsetBasedReader<T> {
     private ReadableByteChannel channel = null;
-    private boolean finished = false; // Reader has finished advancing.
-    private boolean startCalled = false;
 
     /**
      * Subclasses should not perform IO operations at the constructor. All IO operations should be
@@ -408,9 +401,8 @@ public FileBasedSource<T> getCurrentSource() {
     }
 
     @Override
-    public final boolean start() throws IOException {
+    protected final boolean startImpl() throws IOException {
       FileBasedSource<T> source = getCurrentSource();
-      Preconditions.checkState(!startCalled, "start() should only be called once");
       IOChannelFactory factory = IOChannelUtils.getFactory(source.getFileOrPatternSpec());
       this.channel = factory.open(source.getFileOrPatternSpec());
 
@@ -428,30 +420,14 @@ public final boolean start() throws IOException {
       }
 
       startReading(channel);
-      startCalled = true;
 
       // Advance once to load the first record.
-      return advance();
+      return advanceImpl();
     }
 
     @Override
-    public final boolean advance() throws IOException {
-      Preconditions.checkState(startCalled, "advance() called before calling start()");
-      if (finished) {
-        return false;
-      }
-
-      if (!readNextRecord()) {
-        // End of the stream reached.
-        finished = true;
-        return false;
-      }
-      if (!rangeTracker.tryReturnRecordAt(isAtSplitPoint(), getCurrentOffset())) {
-        finished = true;
-        return false;
-      }
-
-      return true;
+    protected final boolean advanceImpl() throws IOException {
+      return readNextRecord();
     }
 
     /**
@@ -466,15 +442,6 @@ public void close() throws IOException {
       }
     }
 
-    /**
-     * Specifies if the current record of the reader is at a split point.
-     *
-     * <p>This returns {@code true} if the last record returned by {@link #readNextRecord} is at a
-     * split point, {@code false} otherwise. Please refer to {@link FileBasedSource.FileBasedReader
-     * FileBasedReader} for the definition of split points.
-     */
-    protected abstract boolean isAtSplitPoint();
-
     /**
      * Performs any initialization of the subclass of {@code FileBasedReader} that involves IO
      * operations. Will only be invoked once and before that invocation the base class will seek the
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java
similarity index 51%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java
index b287e77a4cfd4..bac4f750b7415 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java
@@ -22,27 +22,30 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.NoSuchElementException;
 
 /**
- * A {@link Source} that uses byte offsets to define starting and ending positions.
+ * A {@link Source} that uses offsets to define starting and ending positions.
  *
- * <p>Extend this class to implement your own byte offset based custom source.
+ * <p>Extend this class to implement your own offset based custom source.
  * {@link FileBasedSource}, which is a subclass of this, adds additional functionality useful for
  * custom sources that are based on files. If possible implementors should start from
- * {@code FileBasedSource} instead of {@code ByteOffsetBasedSource}.
+ * {@code FileBasedSource} instead of {@code OffsetBasedSource}.
  *
- * <p>This is a common base class for all sources that use a byte offset range. It stores the range
+ * <p>This is a common base class for all sources that use an offset range. It stores the range
  * and implements splitting into bundles. This should be used for sources that can be cheaply read
- * starting at any given byte offset.
+ * starting at any given offset.
  *
- * <p>Consult {@link RangeTracker} for important semantics
- * common to all sources defined by a range of positions of a certain type.
+ * <p>Consult {@link RangeTracker} for important semantics common to all sources defined by a range
+ * of positions of a certain type, including the semantics of split points
+ * ({@link OffsetBasedReader#isAtSplitPoint}).
  *
  * @param <T> Type of records represented by the source.
  */
-public abstract class ByteOffsetBasedSource<T> extends BoundedSource<T> {
+public abstract class OffsetBasedSource<T> extends BoundedSource<T> {
   private static final long serialVersionUID = 0;
 
   private final long startOffset;
@@ -50,17 +53,18 @@ public abstract class ByteOffsetBasedSource<T> extends BoundedSource<T> {
   private final long minBundleSize;
 
   /**
-   * @param startOffset starting byte offset (inclusive) of the source. Must be non-negative.
+   * @param startOffset starting offset (inclusive) of the source. Must be non-negative.
    *
-   * @param endOffset ending byte offset (exclusive) of the source. Any
+   * @param endOffset ending offset (exclusive) of the source. Any
    *        {@code offset >= getMaxEndOffset()}, e.g., {@code Long.MAX_VALUE}, means the same as
    *        {@code getMaxEndOffset()}. Must be {@code >= startOffset}.
    *
-   * @param minBundleSize minimum bundle size in bytes that should be used when splitting the source
-   *        into sub-sources. This will not be respected if the total range of the source is smaller
-   *        than the specified {@code minBundleSize}. Must be non-negative.
+   * @param minBundleSize minimum bundle size in offset units that should be used when splitting the
+   *                      source into sub-sources. This will not be respected if the total range of
+   *                      the source is smaller than the specified {@code minBundleSize}.
+   *                      Must be non-negative.
    */
-  public ByteOffsetBasedSource(long startOffset, long endOffset, long minBundleSize) {
+  public OffsetBasedSource(long startOffset, long endOffset, long minBundleSize) {
     this.startOffset = startOffset;
     this.endOffset = endOffset;
     this.minBundleSize = minBundleSize;
@@ -91,24 +95,32 @@ public long getMinBundleSize() {
   }
 
   @Override
-  public List<? extends ByteOffsetBasedSource<T>> splitIntoBundles(
+  public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
+    long trueEndOffset = (endOffset == Long.MAX_VALUE) ? getMaxEndOffset(options) : endOffset;
+    return getBytesPerOffset() * (trueEndOffset - getStartOffset() + 1);
+  }
+
+  @Override
+  public List<? extends OffsetBasedSource<T>> splitIntoBundles(
       long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
     // Split the range into bundles based on the desiredBundleSizeBytes. Final bundle is adjusted to
-    // make sure that we do not end up with a too small bundle at the end. If desiredBundleSizeBytes
-    // is smaller than the minBundleSize of the source then minBundleSize will be used instead.
+    // make sure that we do not end up with a too small bundle at the end. If the desired bundle
+    // size is smaller than the minBundleSize of the source then minBundleSize will be used instead.
 
-    desiredBundleSizeBytes = Math.max(desiredBundleSizeBytes, minBundleSize);
+    long desiredBundleSizeOffsetUnits = Math.max(
+        Math.max(1, desiredBundleSizeBytes / getBytesPerOffset()),
+        minBundleSize);
 
-    List<ByteOffsetBasedSource<T>> subSources = new ArrayList<>();
+    List<OffsetBasedSource<T>> subSources = new ArrayList<>();
     long start = startOffset;
     long maxEnd = Math.min(endOffset, getMaxEndOffset(options));
 
     while (start < maxEnd) {
-      long end = start + desiredBundleSizeBytes;
+      long end = start + desiredBundleSizeOffsetUnits;
       end = Math.min(end, maxEnd);
       // Avoid having a too small bundle at the end and ensure that we respect minBundleSize.
-      long remainingBytes = maxEnd - end;
-      if ((remainingBytes < desiredBundleSizeBytes / 4) || (remainingBytes < minBundleSize)) {
+      long remaining = maxEnd - end;
+      if ((remaining < desiredBundleSizeOffsetUnits / 4) || (remaining < minBundleSize)) {
         end = maxEnd;
       }
       subSources.add(createSourceForSubrange(start, end));
@@ -141,6 +153,15 @@ public String toString() {
     return "[" + startOffset + ", " + endOffset + ")";
   }
 
+  /**
+   * Returns approximately how many bytes of data correspond to a single offset in this source.
+   * Used for translation between this source's range and methods defined in terms of bytes, such
+   * as {@link #getEstimatedSizeBytes} and {@link #splitIntoBundles}.
+   */
+  public long getBytesPerOffset() {
+    return 1L;
+  }
+
   /**
    * Returns the exact ending offset of the current source. This will be used if the source was
    * constructed with an endOffset value {@code Long.MAX_VALUE}.
@@ -148,30 +169,39 @@ public String toString() {
   public abstract long getMaxEndOffset(PipelineOptions options) throws Exception;
 
   /**
-   * Returns a {@code ByteOffsetBasedSource} for a subrange of the current source. [start, end) will
+   * Returns an {@code OffsetBasedSource} for a subrange of the current source. [start, end) will
    * be within the range [startOffset, endOffset] of the current source.
    */
-  public abstract ByteOffsetBasedSource<T> createSourceForSubrange(long start, long end);
+  public abstract OffsetBasedSource<T> createSourceForSubrange(long start, long end);
 
   /**
-   * A {@link Source.Reader} that implements code common
-   * to readers of all {@link ByteOffsetBasedSource}s.
+   * A {@link Source.Reader} that implements code common to readers of all
+   * {@link OffsetBasedSource}s.
+   *
+   * <p>Subclasses have to implement:
+   * <ul>
+   *   <li>The methods {@link #startImpl} and {@link #advanceImpl} for reading the
+   *   first or subsequent records.
+   *   <li>The methods {@link #getCurrent}, {@link #getCurrentOffset}, and optionally
+   *   {@link #isAtSplitPoint} and {@link #getCurrentTimestamp} to access properties of
+   *   the last record successfully read by {@link #startImpl} or {@link #advanceImpl}.
+   * </ul>
    */
-  public abstract static class ByteOffsetBasedReader<T> extends BoundedReader<T> {
-    private static final Logger LOG = LoggerFactory.getLogger(ByteOffsetBasedReader.class);
+  public abstract static class OffsetBasedReader<T> extends BoundedReader<T> {
+    private static final Logger LOG = LoggerFactory.getLogger(OffsetBasedReader.class);
 
-    private ByteOffsetBasedSource<T> source;
+    private OffsetBasedSource<T> source;
     /**
      * The {@link OffsetRangeTracker} managing the range and current position of the source.
      * Subclasses MUST use it before returning records from {@link #start} or {@link #advance}:
      * see documentation of {@link RangeTracker}.
      */
-    protected final OffsetRangeTracker rangeTracker;
+    private final OffsetRangeTracker rangeTracker;
 
     /**
-     * @param source the {@code ByteOffsetBasedSource} to be read by the current reader.
+     * @param source the {@code OffsetBasedSource} to be read by the current reader.
      */
-    public ByteOffsetBasedReader(ByteOffsetBasedSource<T> source) {
+    public OffsetBasedReader(OffsetBasedSource<T> source) {
       this.source = source;
       this.rangeTracker = new OffsetRangeTracker(source.getStartOffset(), source.getEndOffset());
     }
@@ -183,10 +213,43 @@ public ByteOffsetBasedReader(ByteOffsetBasedSource<T> source) {
      * <p>If no such call has been made yet, the return value is unspecified.
      * <p>See {@link RangeTracker} for description of offset semantics.
      */
-    protected abstract long getCurrentOffset();
+    protected abstract long getCurrentOffset() throws NoSuchElementException;
+
+    /**
+     * Returns whether the current record is at a split point (i.e., whether the current record
+     * would be the first record to be read by a source with a specified start offset of
+     * {@link #getCurrentOffset}).
+     *
+     * <p>See detailed documentation about split points in {@link RangeTracker}.
+     */
+    protected boolean isAtSplitPoint() throws NoSuchElementException {
+      return true;
+    }
+
+    @Override
+    public final boolean start() throws IOException {
+      return startImpl() && rangeTracker.tryReturnRecordAt(isAtSplitPoint(), getCurrentOffset());
+    }
+
+    @Override
+    public final boolean advance() throws IOException {
+      return advanceImpl() && rangeTracker.tryReturnRecordAt(isAtSplitPoint(), getCurrentOffset());
+    }
+
+    /**
+     * Same as {@link BoundedReader#start}, except {@link OffsetBasedReader} base class
+     * takes care of coordinating against concurrent calls to {@link #splitAtFraction}.
+     */
+    protected abstract boolean startImpl() throws IOException;
+
+    /**
+     * Same as {@link BoundedReader#advance}, except {@link OffsetBasedReader} base class
+     * takes care of coordinating against concurrent calls to {@link #splitAtFraction}.
+     */
+    protected abstract boolean advanceImpl() throws IOException;
 
     @Override
-    public ByteOffsetBasedSource<T> getCurrentSource() {
+    public OffsetBasedSource<T> getCurrentSource() {
       return source;
     }
 
@@ -196,24 +259,24 @@ public Double getFractionConsumed() {
     }
 
     @Override
-    public ByteOffsetBasedSource<T> splitAtFraction(double fraction) {
+    public final OffsetBasedSource<T> splitAtFraction(double fraction) {
       if (rangeTracker.getStopPosition() == Long.MAX_VALUE) {
         LOG.debug(
-            "Refusing to split unbounded ByteOffsetBasedReader {} at fraction {}",
+            "Refusing to split unbounded OffsetBasedReader {} at fraction {}",
             rangeTracker, fraction);
         return null;
       }
       long splitOffset = rangeTracker.getPositionForFractionConsumed(fraction);
       LOG.debug(
-          "Proposing to split ByteOffsetBasedReader {} at fraction {} (offset {})",
+          "Proposing to split OffsetBasedReader {} at fraction {} (offset {})",
           rangeTracker, fraction, splitOffset);
       if (!rangeTracker.trySplitAtPosition(splitOffset)) {
         return null;
       }
       long start = source.getStartOffset();
       long end = source.getEndOffset();
-      ByteOffsetBasedSource<T> primary = source.createSourceForSubrange(start, splitOffset);
-      ByteOffsetBasedSource<T> residual = source.createSourceForSubrange(splitOffset, end);
+      OffsetBasedSource<T> primary = source.createSourceForSubrange(start, splitOffset);
+      OffsetBasedSource<T> residual = source.createSourceForSubrange(splitOffset, end);
       this.source = primary;
       return residual;
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
index ac001b74394b4..1e067248cff51 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
@@ -108,7 +108,7 @@
  */
 // JAVADOCSTYLE ON
 public class XmlSource<T> extends FileBasedSource<T> {
-  static final long serialVersionUID = 0L;
+  private static final long serialVersionUID = 0L;
 
   private static final String XML_VERSION = "1.1";
   private static final int DEFAULT_MIN_BUNDLE_SIZE = 8 * 1024;
@@ -155,7 +155,7 @@ public XmlSource<T> withRecordClass(Class<T> recordClass) {
 
   /**
    * Sets a parameter {@code minBundleSize} for the minimum bundle size of the source. Please refer
-   * to {@link ByteOffsetBasedSource} for the definition of minBundleSize.  This is an optional
+   * to {@link OffsetBasedSource} for the definition of minBundleSize.  This is an optional
    * parameter.
    */
   public XmlSource<T> withMinBundleSize(long minBundleSize) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSourceTest.java
similarity index 76%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSourceTest.java
index 8a2221d4273cb..3766cddae5403 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ByteOffsetBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSourceTest.java
@@ -40,24 +40,23 @@
  * Tests code common to all offset-based sources.
  */
 @RunWith(JUnit4.class)
-public class ByteOffsetBasedSourceTest {
-
-  // A byte-offset based source that yields its own current offset
+public class OffsetBasedSourceTest {
+  // An offset-based source with 4 bytes per offset that yields its own current offset
   // and rounds the start and end offset to the nearest multiple of a given number,
   // e.g. reading [13, 48) with granularity 10 gives records with values [20, 50).
-  private static class CoarseByteRangeSource extends ByteOffsetBasedSource<Integer> {
+  private static class CoarseRangeSource extends OffsetBasedSource<Integer> {
     private static final long serialVersionUID = 0L;
     private long granularity;
 
-    public CoarseByteRangeSource(
+    public CoarseRangeSource(
         long startOffset, long endOffset, long minBundleSize, long granularity) {
       super(startOffset, endOffset, minBundleSize);
       this.granularity = granularity;
     }
 
     @Override
-    public ByteOffsetBasedSource<Integer> createSourceForSubrange(long start, long end) {
-      return new CoarseByteRangeSource(start, end, getMinBundleSize(), granularity);
+    public OffsetBasedSource<Integer> createSourceForSubrange(long start, long end) {
+      return new CoarseRangeSource(start, end, getMinBundleSize(), granularity);
     }
 
     @Override
@@ -78,6 +77,11 @@ public Coder<Integer> getDefaultOutputCoder() {
       return BigEndianIntegerCoder.of();
     }
 
+    @Override
+    public long getBytesPerOffset() {
+      return 4;
+    }
+
     @Override
     public long getMaxEndOffset(PipelineOptions options) {
       return getEndOffset();
@@ -85,16 +89,16 @@ public long getMaxEndOffset(PipelineOptions options) {
 
     @Override
     public BoundedReader<Integer> createReader(PipelineOptions options) throws IOException {
-      return new CoarseByteRangeReader(this);
+      return new CoarseRangeReader(this);
     }
   }
 
-  private static class CoarseByteRangeReader
-      extends ByteOffsetBasedSource.ByteOffsetBasedReader<Integer> {
+  private static class CoarseRangeReader
+      extends OffsetBasedSource.OffsetBasedReader<Integer> {
     private long current = -1;
     private long granularity;
 
-    public CoarseByteRangeReader(CoarseByteRangeSource source) {
+    public CoarseRangeReader(CoarseRangeSource source) {
       super(source);
       this.granularity = source.granularity;
     }
@@ -105,18 +109,18 @@ protected long getCurrentOffset() {
     }
 
     @Override
-    public boolean start() throws IOException {
+    public boolean startImpl() throws IOException {
       current = getCurrentSource().getStartOffset();
       while (current % granularity != 0) {
         ++current;
       }
-      return rangeTracker.tryReturnRecordAt(true, current);
+      return true;
     }
 
     @Override
-    public boolean advance() throws IOException {
+    public boolean advanceImpl() throws IOException {
       ++current;
-      return rangeTracker.tryReturnRecordAt(current % granularity == 0, current);
+      return true;
     }
 
     @Override
@@ -124,15 +128,20 @@ public Integer getCurrent() throws NoSuchElementException {
       return (int) current;
     }
 
+    @Override
+    public boolean isAtSplitPoint() {
+      return current % granularity == 0;
+    }
+
     @Override
     public void close() throws IOException { }
   }
 
-  public static void assertSplitsAre(List<? extends ByteOffsetBasedSource<?>> splits,
+  public static void assertSplitsAre(List<? extends OffsetBasedSource<?>> splits,
       long[] expectedBoundaries) {
     assertEquals(splits.size(), expectedBoundaries.length - 1);
     int i = 0;
-    for (ByteOffsetBasedSource<?> split : splits) {
+    for (OffsetBasedSource<?> split : splits) {
       assertEquals(split.getStartOffset(), expectedBoundaries[i]);
       assertEquals(split.getEndOffset(), expectedBoundaries[i + 1]);
       i++;
@@ -144,10 +153,11 @@ public void testSplitPositionsZeroStart() throws Exception {
     long start = 0;
     long end = 1000;
     long minBundleSize = 50;
-    long desiredBundleSize = 150;
-    CoarseByteRangeSource testSource = new CoarseByteRangeSource(start, end, minBundleSize, 1);
+    CoarseRangeSource testSource = new CoarseRangeSource(start, end, minBundleSize, 1);
     long[] boundaries = {0, 150, 300, 450, 600, 750, 900, 1000};
-    assertSplitsAre(testSource.splitIntoBundles(desiredBundleSize, null), boundaries);
+    assertSplitsAre(
+        testSource.splitIntoBundles(150 * testSource.getBytesPerOffset(), null),
+        boundaries);
   }
 
   @Test
@@ -155,10 +165,11 @@ public void testSplitPositionsNonZeroStart() throws Exception {
     long start = 300;
     long end = 1000;
     long minBundleSize = 50;
-    long desiredBundleSize = 150;
-    CoarseByteRangeSource testSource = new CoarseByteRangeSource(start, end, minBundleSize, 1);
+    CoarseRangeSource testSource = new CoarseRangeSource(start, end, minBundleSize, 1);
     long[] boundaries = {300, 450, 600, 750, 900, 1000};
-    assertSplitsAre(testSource.splitIntoBundles(desiredBundleSize, null), boundaries);
+    assertSplitsAre(
+        testSource.splitIntoBundles(150 * testSource.getBytesPerOffset(), null),
+        boundaries);
   }
 
   @Test
@@ -166,10 +177,11 @@ public void testMinBundleSize() throws Exception {
     long start = 300;
     long end = 1000;
     long minBundleSize = 150;
-    long desiredBundleSize = 100;
-    CoarseByteRangeSource testSource = new CoarseByteRangeSource(start, end, minBundleSize, 1);
+    CoarseRangeSource testSource = new CoarseRangeSource(start, end, minBundleSize, 1);
     long[] boundaries = {300, 450, 600, 750, 1000};
-    assertSplitsAre(testSource.splitIntoBundles(desiredBundleSize, null), boundaries);
+    assertSplitsAre(
+        testSource.splitIntoBundles(100 * testSource.getBytesPerOffset(), null),
+        boundaries);
   }
 
   @Test
@@ -177,11 +189,12 @@ public void testSplitPositionsCollapseEndBundle() throws Exception {
     long start = 0;
     long end = 1000;
     long minBundleSize = 50;
-    long desiredBundleSize = 110;
-    CoarseByteRangeSource testSource = new CoarseByteRangeSource(start, end, minBundleSize, 1);
+    CoarseRangeSource testSource = new CoarseRangeSource(start, end, minBundleSize, 1);
     // Last 10 bytes should collapse to the previous bundle.
     long[] boundaries = {0, 110, 220, 330, 440, 550, 660, 770, 880, 1000};
-    assertSplitsAre(testSource.splitIntoBundles(desiredBundleSize, null), boundaries);
+    assertSplitsAre(
+        testSource.splitIntoBundles(110 * testSource.getBytesPerOffset(), null),
+        boundaries);
   }
 
   @Test
@@ -190,7 +203,7 @@ public void testReadingGranularityAndFractionConsumed() throws IOException {
     // (note: this is testing test code), and that getFractionConsumed works sensibly
     // in the face of that.
     PipelineOptions options = PipelineOptionsFactory.create();
-    CoarseByteRangeSource source = new CoarseByteRangeSource(13, 35, 1, 10);
+    CoarseRangeSource source = new CoarseRangeSource(13, 35, 1, 10);
     try (BoundedSource.BoundedReader<Integer> reader = source.createReader(options)) {
       List<Integer> items = new ArrayList<>();
 
@@ -209,7 +222,7 @@ public void testReadingGranularityAndFractionConsumed() throws IOException {
       assertEquals(20, items.get(0).intValue());
       assertEquals(39, items.get(items.size() - 1).intValue());
 
-      source = new CoarseByteRangeSource(13, 17, 1, 10);
+      source = new CoarseRangeSource(13, 17, 1, 10);
     }
     try (BoundedSource.BoundedReader<Integer> reader = source.createReader(options)) {
       assertFalse(reader.start());
@@ -219,8 +232,8 @@ public void testReadingGranularityAndFractionConsumed() throws IOException {
   @Test
   public void testSplitAtFraction() throws IOException {
     PipelineOptions options = PipelineOptionsFactory.create();
-    CoarseByteRangeSource source = new CoarseByteRangeSource(13, 35, 1, 10);
-    try (CoarseByteRangeReader reader = (CoarseByteRangeReader) source.createReader(options)) {
+    CoarseRangeSource source = new CoarseRangeSource(13, 35, 1, 10);
+    try (CoarseRangeReader reader = (CoarseRangeReader) source.createReader(options)) {
       List<Integer> originalItems = new ArrayList<>();
       assertTrue(reader.start());
       originalItems.add(reader.getCurrent());
@@ -254,7 +267,7 @@ public void testSplitAtFraction() throws IOException {
   @Test
   public void testSplitAtFractionExhaustive() throws Exception {
     PipelineOptions options = PipelineOptionsFactory.create();
-    CoarseByteRangeSource original = new CoarseByteRangeSource(13, 35, 1, 10);
+    CoarseRangeSource original = new CoarseRangeSource(13, 35, 1, 10);
     assertSplitAtFractionExhaustive(original, options);
   }
 }

From 9333aaf7a36cecaa22ad792d43e70fc01412665e Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Thu, 6 Aug 2015 12:13:11 -0700
Subject: [PATCH 0912/1541] Clean up maven build

1. Disable Javadoc warnings for undocumented parameters, returns, etc.
2. Redirect test output to files.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100053730
---
 examples/pom.xml | 2 ++
 pom.xml          | 1 +
 sdk/pom.xml      | 2 ++
 3 files changed, 5 insertions(+)

diff --git a/examples/pom.xml b/examples/pom.xml
index 828b0bf481dd6..87e2505e02dc9 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -118,7 +118,9 @@
 
           <subpackages>com.google.cloud.dataflow.examples</subpackages>
           <additionalparam>-exclude com.google.cloud.dataflow.sdk.runners.worker:com.google.cloud.dataflow.sdk.runners.dataflow:com.google.cloud.dataflow.sdk.util</additionalparam>
+          <additionalparam>-Xdoclint:-missing</additionalparam>
           <use>false</use>
+          <quiet>true</quiet>
           <bottom><![CDATA[<br>]]></bottom>
 
           <offlineLinks>
diff --git a/pom.xml b/pom.xml
index 5f8ec01c66670..3fc603cbb15b0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -187,6 +187,7 @@
             </systemPropertyVariables>
             <useManifestOnlyJar>false</useManifestOnlyJar>
             <trimStackTrace>false</trimStackTrace>
+            <redirectTestOutputToFile>true</redirectTestOutputToFile>
           </configuration>
           <dependencies>
             <dependency>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 96e9febe179ea..ecfafb8a535e1 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -153,7 +153,9 @@
 
           <subpackages>com.google.cloud.dataflow.sdk</subpackages>
           <additionalparam>-exclude com.google.cloud.dataflow.sdk.runners.worker:com.google.cloud.dataflow.sdk.runners.dataflow:com.google.cloud.dataflow.sdk.util</additionalparam>
+          <additionalparam>-Xdoclint:-missing</additionalparam>
           <use>false</use>
+          <quiet>true</quiet>
           <bottom><![CDATA[<br>]]></bottom>
 
           <offlineLinks>

From d39d21b33abce11857c113bd5f6150b1f29aa3de Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 6 Aug 2015 13:26:12 -0700
Subject: [PATCH 0913/1541] Move scripts for automated testing to a directory

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100060284
---
 .travis.yml                                   | 2 +-
 test_wordcount.sh => travis/test_wordcount.sh | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename test_wordcount.sh => travis/test_wordcount.sh (100%)

diff --git a/.travis.yml b/.travis.yml
index a9e4b84d71244..dc1c218f263a9 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -40,4 +40,4 @@ install:
 
 script:
   - travis_retry mvn verify -U
-  - travis_retry ./test_wordcount.sh
+  - travis_retry travis/test_wordcount.sh
diff --git a/test_wordcount.sh b/travis/test_wordcount.sh
similarity index 100%
rename from test_wordcount.sh
rename to travis/test_wordcount.sh

From 5d1563346cebd2d55678c45a4f779b595836489d Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 6 Aug 2015 15:20:45 -0700
Subject: [PATCH 0914/1541] Attempt to fix Travis working directory

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100072268
---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index dc1c218f263a9..8897f9b5bf715 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -40,4 +40,4 @@ install:
 
 script:
   - travis_retry mvn verify -U
-  - travis_retry travis/test_wordcount.sh
+  - travis_retry travis/test_wordcount.sh ..

From 082b4101adb3b833ebcefeedff3e26f796049314 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Thu, 6 Aug 2015 15:52:26 -0700
Subject: [PATCH 0915/1541] Dataflow Travis script: fix relative paths

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100075278
---
 .travis.yml              |  2 +-
 travis/test_wordcount.sh | 13 ++-----------
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 8897f9b5bf715..dc1c218f263a9 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -40,4 +40,4 @@ install:
 
 script:
   - travis_retry mvn verify -U
-  - travis_retry travis/test_wordcount.sh ..
+  - travis_retry travis/test_wordcount.sh
diff --git a/travis/test_wordcount.sh b/travis/test_wordcount.sh
index bcacc1ebff014..2e8a58b8cbe0d 100755
--- a/travis/test_wordcount.sh
+++ b/travis/test_wordcount.sh
@@ -17,17 +17,8 @@
 set -e
 set -o pipefail
 
-MYDIR=$(dirname $0) || exit 2
-cd $MYDIR
-
-TOPDIR="."
-if [[ $# -gt 0 ]]
-then
-  TOPDIR="$1"
-fi
-
 PASS=1
-JAR_FILE=$TOPDIR/examples/target/google-cloud-dataflow-java-examples-all-bundled-manual_build.jar
+JAR_FILE=examples/target/google-cloud-dataflow-java-examples-all-bundled-manual_build.jar
 
 function check_result_hash {
   local name=$1
@@ -60,7 +51,7 @@ function run_via_mvn {
   local expected_hash=$3
 
   local outfile_prefix="$(get_outfile_prefix "$name")" || exit 2
-  local cmd='mvn exec:java -f '"$TOPDIR"'/pom.xml -pl examples \
+  local cmd='mvn exec:java -f pom.xml -pl examples \
     -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
     -Dexec.args="--runner=DirectPipelineRunner --inputFile='"$input"' --output='"$outfile_prefix"'"'
   echo "$name: Running $cmd" >&2

From 53ce2d0189e4cb3d969aa71c0c2446a7f9d43b60 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Thu, 6 Aug 2015 16:01:57 -0700
Subject: [PATCH 0916/1541] Only pass -Xdoclint:-missing on JDK 8 and newer.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100076140
---
 examples/pom.xml |  2 +-
 pom.xml          | 12 ++++++++++++
 sdk/pom.xml      |  2 +-
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index 87e2505e02dc9..c24654b451bd8 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -118,7 +118,7 @@
 
           <subpackages>com.google.cloud.dataflow.examples</subpackages>
           <additionalparam>-exclude com.google.cloud.dataflow.sdk.runners.worker:com.google.cloud.dataflow.sdk.runners.dataflow:com.google.cloud.dataflow.sdk.util</additionalparam>
-          <additionalparam>-Xdoclint:-missing</additionalparam>
+          <additionalparam>${dataflow.javadoc_opts}</additionalparam>
           <use>false</use>
           <quiet>true</quiet>
           <bottom><![CDATA[<br>]]></bottom>
diff --git a/pom.xml b/pom.xml
index 3fc603cbb15b0..acabba699867a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -72,6 +72,18 @@
     <module>examples</module>
   </modules>
 
+  <profiles>
+    <profile>
+      <id>doclint-java8-disable</id>
+      <activation>
+        <jdk>[1.8,)</jdk>
+      </activation>
+      <properties>
+        <dataflow.javadoc_opts>-Xdoclint:-missing</dataflow.javadoc_opts>
+      </properties>
+    </profile>
+  </profiles>
+
   <build>
     <pluginManagement>
       <plugins>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index ecfafb8a535e1..e6b79b5eff4e0 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -153,7 +153,7 @@
 
           <subpackages>com.google.cloud.dataflow.sdk</subpackages>
           <additionalparam>-exclude com.google.cloud.dataflow.sdk.runners.worker:com.google.cloud.dataflow.sdk.runners.dataflow:com.google.cloud.dataflow.sdk.util</additionalparam>
-          <additionalparam>-Xdoclint:-missing</additionalparam>
+          <additionalparam>${dataflow.javadoc_opts}</additionalparam>
           <use>false</use>
           <quiet>true</quiet>
           <bottom><![CDATA[<br>]]></bottom>

From c6f488bb3d57f3f19610e86c283e9035d0f22f42 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Thu, 6 Aug 2015 22:13:15 -0700
Subject: [PATCH 0917/1541] Further hardening custom FileBasedSource.

* Improves the efficiency of custom FileBasedSource for large file patterns by parallelizing requests to GCS.

* Throws an error if a custom source create more than 16000 allowed number of splits.

* Fixes a case where we go OOM trying to determine if a failed split response is too large.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100097621
---
 .../dataflow/sdk/io/FileBasedSource.java      | 42 ++++++++++++++++---
 .../BasicSerializableSourceFormat.java        | 15 +++++++
 .../worker/SourceOperationExecutor.java       | 18 +++++---
 .../dataflow/sdk/io/FileBasedSourceTest.java  | 18 ++++++++
 4 files changed, 82 insertions(+), 11 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index 9c3f230edbb48..245e87c34ff32 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -19,6 +19,11 @@
 import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import com.google.common.util.concurrent.Futures;
+import com.google.common.util.concurrent.ListenableFuture;
+import com.google.common.util.concurrent.ListeningExecutorService;
+import com.google.common.util.concurrent.MoreExecutors;
 
 import org.joda.time.Instant;
 import org.slf4j.Logger;
@@ -33,6 +38,8 @@
 import java.util.List;
 import java.util.ListIterator;
 import java.util.NoSuchElementException;
+import java.util.concurrent.Callable;
+import java.util.concurrent.Executors;
 
 /**
  * A common base class for all file-based {@link Source}s. Extend this class to implement your own
@@ -62,9 +69,17 @@ public abstract class FileBasedSource<T> extends OffsetBasedSource<T> {
   // Package-private for testing
   static final int MAX_NUMBER_OF_FILES_FOR_AN_EXACT_STAT = 100;
 
+  // Size of the thread pool to be used for sending requests to GCS.
+  // Package-private for testing.
+  static final int SPLITTING_THREAD_POOL_SIZE = 128;
+
   private final String fileOrPatternSpec;
   private final Mode mode;
 
+  // Thread pool to be used for parallelizing requests to GCS.
+  private static ListeningExecutorService service =
+      MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(SPLITTING_THREAD_POOL_SIZE));
+
   /**
    * A given {@code FileBasedSource} represents a file resource of one of these types.
    */
@@ -217,9 +232,21 @@ private static long getEstimatedSizeOfFilesBySampling(
         / selectedFiles.size();
   }
 
+  private ListenableFuture<List<? extends FileBasedSource<T>>> createFuture(final String file,
+      final long desiredBundleSizeBytes, final PipelineOptions options,
+      ListeningExecutorService service) {
+    return service.submit(new Callable<List<? extends FileBasedSource<T>>>() {
+      @Override
+      public List<? extends FileBasedSource<T>> call() throws Exception {
+        return createForSubrangeOfFile(file, 0, Long.MAX_VALUE)
+            .splitIntoBundles(desiredBundleSizeBytes, options);
+      }
+    });
+  }
+
   @Override
-  public final List<? extends FileBasedSource<T>> splitIntoBundles(long desiredBundleSizeBytes,
-      PipelineOptions options) throws Exception {
+  public final List<? extends FileBasedSource<T>> splitIntoBundles(
+      long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
     // This implementation of method splitIntoBundles is provided to simplify subclasses. Here we
     // split a FileBasedSource based on a file pattern to FileBasedSources based on full single
     // files. For files that can be efficiently seeked, we further split FileBasedSources based on
@@ -227,11 +254,14 @@ public final List<? extends FileBasedSource<T>> splitIntoBundles(long desiredBun
 
     if (mode == Mode.FILEPATTERN) {
       long startTime = System.currentTimeMillis();
-      List<FileBasedSource<T>> splitResults = new ArrayList<>();
-      for (String file : FileBasedSource.expandFilePattern(fileOrPatternSpec)) {
-        splitResults.addAll(createForSubrangeOfFile(file, 0, Long.MAX_VALUE).splitIntoBundles(
-            desiredBundleSizeBytes, options));
+      List<ListenableFuture<List<? extends FileBasedSource<T>>>> futures = new ArrayList<>();
+
+      for (final String file : FileBasedSource.expandFilePattern(fileOrPatternSpec)) {
+        futures.add(createFuture(file, desiredBundleSizeBytes, options, service));
       }
+      List<? extends FileBasedSource<T>> splitResults =
+          ImmutableList.copyOf(Iterables.concat(Futures.allAsList(futures).get()));
+
       LOG.debug("Splitting the source based on file pattern " + fileOrPatternSpec + " took "
           + (System.currentTimeMillis() - startTime) + " ms");
       return splitResults;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index c819fe75d7dfc..3e6063191f7bc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -88,6 +88,15 @@ public class BasicSerializableSourceFormat implements SourceFormat {
   @VisibleForTesting static final String SERIALIZED_SOURCE_SPLITS = "serialized_source_splits";
   private static final long DEFAULT_DESIRED_BUNDLE_SIZE_BYTES = 64 * (1 << 20);
 
+  public static final String TOO_MANY_SOURCE_SPLITS_ERROR =
+      "Total number of Source objects generated by splitIntoBundles() operation, %d, is"
+      + " larger than the allowable limit, %d. For more information, please check the corresponding"
+      + " FAQ entry at:\n"
+      + "https://cloud.google.com/dataflow/faq";
+
+  // Maximum number of custom source splits currently supported by Dataflow.
+  private static final int MAX_NUMBER_OF_SPLITS = 16000;
+
   private static final Logger LOG = LoggerFactory.getLogger(BasicSerializableSourceFormat.class);
 
   private final PipelineOptions options;
@@ -275,6 +284,12 @@ private SourceSplitResponse performSplit(SourceSplitRequest request) throws Exce
     }
     List<? extends BoundedSource<?>> bundles =
         source.splitIntoBundles(desiredBundleSizeBytes, options);
+
+    if (bundles.size() > MAX_NUMBER_OF_SPLITS) {
+      throw new IOException(
+          String.format(TOO_MANY_SOURCE_SPLITS_ERROR, bundles.size(), MAX_NUMBER_OF_SPLITS));
+    }
+
     LOG.debug("Splitting produced {} bundles", bundles.size());
     for (BoundedSource<?> split : bundles) {
       try {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
index 7b67d53fc051a..d153c446ac45d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
@@ -33,7 +33,7 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.nio.charset.StandardCharsets;
+import java.io.IOException;
 
 /**
  * An executor for a source operation, defined by a {@code SourceOperationRequest}.
@@ -94,10 +94,18 @@ static boolean isSplitResponseTooLarge(SourceFormat.OperationResponse operationR
 
   private static boolean isSplitOperationTooLargeForDataflowService(
       SourceFormat.OperationResponse operationResponse) {
-    SourceSplitResponse splitResponse =
-        ((DataflowSourceOperationResponse) operationResponse).cloudResponse.getSplit();
-    int size = splitResponse.toString().getBytes(StandardCharsets.UTF_8).length;
-    return size >= SOURCE_OPERATION_RESPONSE_SIZE_LIMIT_MB * 1024 * 1024;
+    try {
+      SourceSplitResponse splitResponse =
+          ((DataflowSourceOperationResponse) operationResponse).cloudResponse.getSplit();
+      int size = splitResponse.getFactory().toByteArray(operationResponse).length;
+      return size >= SOURCE_OPERATION_RESPONSE_SIZE_LIMIT_MB * 1024 * 1024;
+    } catch (OutOfMemoryError e) {
+      LOG.error("Got exception when trying to serialize split response: " + e.getMessage());
+      // We will go out of memory if split response is extremely large.
+      return true;
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
   }
 
   private static boolean isSplitOperationResponse(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index 981e1dbc9fec6..d997b9d35d7e3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -397,6 +397,24 @@ public void testCloseUnstartedFilePatternReader() throws IOException {
     }
   }
 
+  @Test
+  public void testSplittingUsingFullThreadPool() throws Exception {
+    int numFiles = FileBasedSource.SPLITTING_THREAD_POOL_SIZE * 5;
+    File file0 = null;
+    for (int i = 0; i < numFiles; i++) {
+      List<String> data = createStringDataset(3, 1000);
+      File file = createFileWithData("file" + i, data);
+      if (i == 0) {
+        file0 = file;
+      }
+    }
+
+    TestFileBasedSource source =
+        new TestFileBasedSource(file0.getParent() + "/" + "file*", Long.MAX_VALUE, null);
+    List<? extends BoundedSource<String>> splits = source.splitIntoBundles(Long.MAX_VALUE, null);
+    assertEquals(numFiles, splits.size());
+  }
+
   @Test
   public void testFractionConsumedWhenReadingFilepattern() throws IOException {
     List<String> data1 = createStringDataset(3, 1000);

From d5a91499d79cb9ee1c664fc3e363ea5e447dc304 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Sat, 8 Aug 2015 23:59:02 -0700
Subject: [PATCH 0918/1541] Fix NoClassDefFoundError in DebuggingWordCount

It removed test scopes of junit and hamcrest in examples/pom.xml

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100218144
---
 examples/pom.xml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index c24654b451bd8..4ea13293b62c4 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -299,20 +299,18 @@
       <version>1.7.7</version>
     </dependency>
 
-    <!-- test dependencies -->
-
+    <!-- Hamcrest and JUnit are required dependencies of DataflowAssert,
+         which is used in the main code of DebuggingWordCount example. -->
     <dependency>
       <groupId>org.hamcrest</groupId>
       <artifactId>hamcrest-all</artifactId>
       <version>1.3</version>
-      <scope>test</scope>
     </dependency>
 
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
       <version>4.11</version>
-      <scope>test</scope>
     </dependency>
   </dependencies>
 </project>

From 0787fdc53f5c018d7c2b92749d9f013efd864aa6 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Sun, 9 Aug 2015 02:12:15 -0700
Subject: [PATCH 0919/1541] Reduce state reads for default triggering and
 discarding

1. Don't clear the closed bits since they'll never be written.
2. Combine the Pane Hold and Watermark Hold
3. In discarding mode the buffer emptiness matches the pane.
   In accumulation mode track that separately.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100221489
---
 .../dataflow/sdk/util/NonEmptyPanes.java      | 114 ++++++++++++++++++
 .../cloud/dataflow/sdk/util/ReduceFn.java     |   8 ++
 .../dataflow/sdk/util/ReduceFnRunner.java     |  54 +++++----
 .../dataflow/sdk/util/SystemReduceFn.java     |   6 +
 .../dataflow/sdk/util/TriggerRunner.java      |   4 +-
 .../dataflow/sdk/util/WatermarkHold.java      |  48 ++------
 .../util/state/InMemoryStateInternals.java    |   6 +
 .../worker/StreamingDataflowWorkerTest.java   |  34 +-----
 .../sdk/util/TriggerExecutorTest.java         |  91 +++++++++++++-
 .../dataflow/sdk/util/TriggerTester.java      |   4 +-
 10 files changed, 271 insertions(+), 98 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
new file mode 100644
index 0000000000000..b1f13c13b8777
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
+import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
+import com.google.cloud.dataflow.sdk.util.state.StateContents;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.cloud.dataflow.sdk.util.state.StateTags;
+
+/**
+ * Tracks which windows have non-empty panes. Specifically, which windows have new elements since
+ * their last triggering.
+ *
+ * @param <W> The kind of windows being tracked.
+ */
+public abstract class NonEmptyPanes<W extends BoundedWindow> {
+
+  public static <W extends BoundedWindow> NonEmptyPanes<W> create(
+      WindowingStrategy<?, W> strategy, ReduceFn<?, ?, ?, W> reduceFn) {
+    if (strategy.getMode() == AccumulationMode.DISCARDING_FIRED_PANES) {
+      return new DiscardingModeNonEmptyPanes<>(reduceFn);
+    } else {
+      return new GeneralNonEmptyPanes<>();
+    }
+  }
+
+  /**
+   * Record that some content has been added to the window in {@code context}, and therefore the
+   * current pane is not empty.
+   */
+  public abstract void recordContent(ReduceFn<?, ?, ?, W>.Context context);
+
+  /**
+   * Record that the given pane is empty.
+   */
+  public abstract void clearPane(ReduceFn<?, ?, ?, W>.Context context);
+
+  /**
+   * Return true if the current pane for the window in {@code context} is non-empty.
+   */
+  public abstract StateContents<Boolean> isEmpty(ReduceFn<?, ?, ?, W>.Context context);
+
+  /**
+   * An implementation of {@code NonEmptyPanes} optimized for use with discarding mode. Uses the
+   * presence of data in the accumulation buffer to record non-empty panes.
+   */
+  private static class DiscardingModeNonEmptyPanes<W extends BoundedWindow>
+      extends NonEmptyPanes<W> {
+
+    private ReduceFn<?, ?, ?, W> reduceFn;
+
+    private DiscardingModeNonEmptyPanes(ReduceFn<?, ?, ?, W> reduceFn) {
+      this.reduceFn = reduceFn;
+    }
+
+    @Override
+    public StateContents<Boolean> isEmpty(ReduceFn<?, ?, ?, W>.Context context) {
+      return reduceFn.isEmpty(context.state());
+    }
+
+    @Override
+    public void recordContent(ReduceFn<?, ?, ?, W>.Context context) {
+      // Nothing to do -- the reduceFn is tracking contents
+    }
+
+    @Override
+    public void clearPane(ReduceFn<?, ?, ?, W>.Context context) {
+      // Nothing to do -- the reduceFn is tracking contents
+    }
+  }
+
+  /**
+   * An implementation of {@code NonEmptyPanes} for general use.
+   */
+  private static class GeneralNonEmptyPanes<W extends BoundedWindow> extends NonEmptyPanes<W> {
+
+    private static final StateTag<CombiningValueState<Long, Long>> PANE_ADDITIONS_TAG =
+        StateTags.makeSystemTagInternal(StateTags.combiningValueFromInputInternal(
+            "count", VarLongCoder.of(), new Sum.SumLongFn()));
+
+    @Override
+    public void recordContent(ReduceFn<?, ?, ?, W>.Context context) {
+      context.state().access(PANE_ADDITIONS_TAG).add(1L);
+    }
+
+    @Override
+    public void clearPane(ReduceFn<?, ?, ?, W>.Context context) {
+      context.state().accessAcrossMergedWindows(PANE_ADDITIONS_TAG).clear();
+    }
+
+    @Override
+    public StateContents<Boolean> isEmpty(ReduceFn<?, ?, ?, W>.Context context) {
+      return context.state().accessAcrossMergedWindows(PANE_ADDITIONS_TAG).isEmpty();
+    }
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
index e39ecc54cf3f7..693b5dcc678b3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.util.state.MergeableState;
 import com.google.cloud.dataflow.sdk.util.state.State;
+import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 
 import org.joda.time.Instant;
@@ -183,6 +184,8 @@ public abstract class OnTriggerContext extends Context {
   /**
    * Called before {@link onTrigger} is invoked to provide an opportunity to prefetch any needed
    * state.
+   *
+   * @param c Context to use prefetch from.
    */
   public void prefetchOnTrigger(StateContext c) { }
 
@@ -191,4 +194,9 @@ public void prefetchOnTrigger(StateContext c) { }
    * called when the windowing is closing and will receive no future interactions.
    */
   public abstract void clearState(Context c) throws Exception;
+
+  /**
+   * Returns true if the there is no buffered state.
+   */
+  public abstract StateContents<Boolean> isEmpty(StateContext c);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index 6d30e8ecb745c..068226034bc38 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -25,7 +25,6 @@
 import com.google.cloud.dataflow.sdk.util.ReduceFnContextFactory.OnTriggerCallbacks;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.TriggerRunner.Result;
-import com.google.cloud.dataflow.sdk.util.WatermarkHold.WatermarkInfo;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces.WindowNamespace;
@@ -90,8 +89,8 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow>
   private final WatermarkHold<W> watermarkHold;
   private final ReduceFnContextFactory<K, InputT, OutputT, W> contextFactory;
   private final ReduceFn<K, InputT, OutputT, W> reduceFn;
-
   private final PaneInfoTracker paneInfo;
+  private final NonEmptyPanes<W> nonEmptyPanes;
 
   public ReduceFnRunner(
       K key,
@@ -114,6 +113,7 @@ public ReduceFnRunner(
         (WindowingStrategy<Object, W>) windowingStrategy;
     this.windowingStrategy = objectWindowingStrategy;
 
+    this.nonEmptyPanes = NonEmptyPanes.create(this.windowingStrategy, this.reduceFn);
     this.activeWindows = createActiveWindowSet();
     this.contextFactory = new ReduceFnContextFactory<K, InputT, OutputT, W>(
         key, reduceFn, this.windowingStrategy, this.windowingInternals.stateInternals(),
@@ -168,16 +168,10 @@ public void processElement(WindowedValue<InputT> value) {
 
       // If this is a new window
       if (activeWindows.add(window)) {
-        // Hold for possibly empty panes
-        if (timerInternals.currentWatermarkTime().isAfter(window.maxTimestamp())) {
-          watermarkHold.holdForFinal(context);
-        } else {
-          watermarkHold.holdForOnTime(context);
-        }
-
         // And schedule cleanup
         scheduleCleanup(context);
       }
+      nonEmptyPanes.recordContent(context);
 
       // Update the watermark hold since the value will be part of the next pane.
       watermarkHold.addHold(context, lateness.isLate);
@@ -199,6 +193,14 @@ public void processElement(WindowedValue<InputT> value) {
     }
   }
 
+  private void holdForEmptyPanes(ReduceFn<K, InputT, OutputT, W>.Context context) {
+    if (timerInternals.currentWatermarkTime().isAfter(context.window().maxTimestamp())) {
+      watermarkHold.holdForFinal(context);
+    } else {
+      watermarkHold.holdForOnTime(context);
+    }
+  }
+
   /**
    * Attempt to merge all of the windows.
    */
@@ -246,7 +248,6 @@ public void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResul
         try {
           ReduceFn<K, InputT, OutputT, W>.Context mergedContext = contextFactory.base(mergedWindow);
           cancelCleanup(mergedContext);
-          watermarkHold.releaseFinal(mergedContext);
           triggerRunner.clearEverything(mergedContext);
           paneInfo.clear(mergedContext.state());
         } catch (Exception e) {
@@ -259,11 +260,6 @@ public void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResul
     // Schedule cleanup if the window is new. Do this after cleaning up the old state in case one
     // of them had a timer at the same point.
     if (isResultWindowNew) {
-      if (timerInternals.currentWatermarkTime().isAfter(resultWindow.maxTimestamp())) {
-        watermarkHold.holdForFinal(resultContext);
-      } else {
-        watermarkHold.holdForOnTime(resultContext);
-      }
       scheduleCleanup(resultContext);
     }
 
@@ -317,9 +313,9 @@ public void onTimer(TimerData timer) {
         handleTriggerResult(context, isAtWatermark, timerResult);
       }
 
-      // After this timer has fired, we're no longer on-time.
-      if (isAtWatermark) {
-        watermarkHold.releaseOnTime(context);
+      // Since we processed an on-time firing, we should schedule the GC timer.
+      if (TimeDomain.EVENT_TIME == timer.getDomain()
+          && timer.getTimestamp().isEqual(window.maxTimestamp())) {
         scheduleCleanup(context);
       }
     }
@@ -372,6 +368,7 @@ private void doCleanup(
     }
 
     // Cleanup the associated state.
+    nonEmptyPanes.clearPane(context);
     reduceFn.clearState(context);
     triggerRunner.clearEverything(context);
     paneInfo.clear(context.state());
@@ -406,6 +403,9 @@ private void handleTriggerResult(
     // Run onTrigger to produce the actual pane contents.
     onTrigger(context, maybeAtWatermark, result.isFinish());
 
+    // Now that we've triggered, the pane is empty.
+    nonEmptyPanes.clearPane(context);
+
     // Cleanup buffered data if appropriate
     if (shouldDiscardAfterFiring(result)) {
       // Clear the reduceFn state
@@ -453,23 +453,27 @@ public T read() {
    */
   private void onTrigger(final ReduceFn<K, InputT, OutputT, W>.Context context,
       boolean isAtWatermark, boolean isFinal) {
-    StateContents<WatermarkInfo> outputTimestampFuture = watermarkHold.extractAndRelease(context);
+    StateContents<Instant> outputTimestampFuture = watermarkHold.extractAndRelease(context);
     StateContents<PaneInfo> paneFuture =
         paneInfo.getNextPaneInfo(context, isAtWatermark, isFinal);
+    StateContents<Boolean> isEmptyFuture = nonEmptyPanes.isEmpty(context);
 
     reduceFn.prefetchOnTrigger(context.state());
 
     final PaneInfo pane = paneFuture.read();
-    final WatermarkInfo watermarkInfo = outputTimestampFuture.read();
+    final Instant outputTimestamp = outputTimestampFuture.read();
 
     boolean shouldOutput =
-        // The pane is non-empty
-        watermarkInfo.isNonEmpty()
-        // This is the final pane, and the user has asked for it even if its empty
+        // If the pane is not empty
+        !isEmptyFuture.read()
+        // or this is the final pane, and the user has asked for it even if its empty
         || (isFinal && windowingStrategy.getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS)
-        // This is the on-time firing, and the user explicitly requested it.
+        // or this is the on-time firing, and the user explicitly requested it.
         || (isAtWatermark && pane.getTiming() == Timing.ON_TIME);
 
+    // We've consumed the empty pane hold by reading it, so reinstate that, if necessary.
+    holdForEmptyPanes(context);
+
     // If there is nothing to output, we're done.
     if (!shouldOutput) {
       return;
@@ -486,7 +490,7 @@ public void output(OutputT toOutput) {
 
             // Output the actual value.
             windowingInternals.outputWindowedValue(
-                KV.of(key, toOutput), watermarkInfo.getOutputTimestamp(), windows, pane);
+                KV.of(key, toOutput), outputTimestamp, windows, pane);
           }
     });
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
index d5c8b48bb422d..1796b21bbd5f5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.util.state.BagState;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
 import com.google.cloud.dataflow.sdk.util.state.MergeableState;
+import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 
@@ -126,4 +127,9 @@ public void onTrigger(OnTriggerContext c) throws Exception {
   public void clearState(Context c) throws Exception {
     c.state().accessAcrossMergedWindows(bufferTag).clear();
   }
+
+  @Override
+  public StateContents<Boolean> isEmpty(StateContext state) {
+    return state.accessAcrossMergedWindows(bufferTag).isEmpty();
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
index 895d300275288..619f4685e5327 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
@@ -189,7 +189,9 @@ public void clearState(ReduceFn<?, ?, ?, W>.Context c) throws Exception {
    */
   public void clearEverything(ReduceFn<?, ?, ?, W>.Context c) throws Exception {
     clearState(c);
-    c.state().access(FINISHED_BITS_TAG).clear();
+    if (isFinishedSetNeeded()) {
+      c.state().access(FINISHED_BITS_TAG).clear();
+    }
   }
 
   public void prefetchForValue(ReduceFn.StateContext state) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
index bff1a4a207dfe..147e84ab92be6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
@@ -43,10 +43,6 @@ public class WatermarkHold<W extends BoundedWindow> implements Serializable {
   @VisibleForTesting static final StateTag<WatermarkStateInternal> DATA_HOLD_TAG =
       StateTags.makeSystemTagInternal(StateTags.watermarkStateInternal("hold"));
 
-  /** Watermark hold used for potential empty panes. */
-  @VisibleForTesting static final StateTag<WatermarkStateInternal> PANE_HOLD_TAG =
-      StateTags.makeSystemTagInternal(StateTags.watermarkStateInternal("pane-hold"));
-
   private final WindowingStrategy<?, W> windowingStrategy;
 
   public WatermarkHold(WindowingStrategy<?, W> windowingStrategy) {
@@ -77,42 +73,39 @@ public void addHold(ReduceFn<?, ?, ?, W>.ProcessValueContext c, boolean isLate)
    * <p> The output timestamp is the minimum of getOutputTimestamp applied to the non-late elements
    * that arrived in the current pane.
    */
-  public StateContents<WatermarkInfo> extractAndRelease(final ReduceFn<?, ?, ?, W>.Context c) {
+  public StateContents<Instant> extractAndRelease(final ReduceFn<?, ?, ?, W>.Context c) {
     final WatermarkStateInternal dataHold = c.state().accessAcrossMergedWindows(DATA_HOLD_TAG);
     final StateContents<Instant> holdFuture = dataHold.get();
-    return new StateContents<WatermarkInfo>() {
+    return new StateContents<Instant>() {
       @Override
-      public WatermarkInfo read() {
+      public Instant read() {
         Instant hold = holdFuture.read();
-        boolean nonEmpty = true;
-        if (hold == null) {
-          nonEmpty = false;
-          hold = c.window().maxTimestamp();
-        } else if (hold.isAfter(c.window().maxTimestamp())) {
+        if (hold == null || hold.isAfter(c.window().maxTimestamp())) {
           hold = c.window().maxTimestamp();
         }
 
         // Clear the bag (to release the watermark)
         dataHold.clear();
 
-        return new WatermarkInfo(hold, nonEmpty);
+        return hold;
       }
     };
   }
 
   public void holdForOnTime(final ReduceFn<?, ?, ?, W>.Context c) {
-    c.state().access(PANE_HOLD_TAG).add(c.window().maxTimestamp());
+    c.state().accessAcrossMergedWindows(DATA_HOLD_TAG).add(c.window().maxTimestamp());
   }
 
   public void holdForFinal(final ReduceFn<?, ?, ?, W>.Context c) {
     if (c.windowingStrategy().getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS) {
-      c.state().access(PANE_HOLD_TAG)
+      c.state().accessAcrossMergedWindows(DATA_HOLD_TAG)
            .add(c.window().maxTimestamp().plus(c.windowingStrategy().getAllowedLateness()));
     }
   }
 
   public void releaseOnTime(final ReduceFn<?, ?, ?, W>.Context c) {
-    c.state().access(PANE_HOLD_TAG).clear();
+    c.state().accessAcrossMergedWindows(DATA_HOLD_TAG).clear();
+
     if (c.windowingStrategy().getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS
         && c.windowingStrategy().getAllowedLateness().isLongerThan(Duration.ZERO)) {
       holdForFinal(c);
@@ -120,27 +113,6 @@ public void releaseOnTime(final ReduceFn<?, ?, ?, W>.Context c) {
   }
 
   public void releaseFinal(final ReduceFn<?, ?, ?, W>.Context c) {
-    c.state().access(PANE_HOLD_TAG).clear();
-  }
-
-  /**
-   * Information retrieved from the watermark hold.
-   */
-  public static class WatermarkInfo {
-    private final Instant outputTimestamp;
-    private final boolean nonEmpty;
-
-    public WatermarkInfo(Instant outputTimestamp, boolean nonEmpty) {
-      this.outputTimestamp = outputTimestamp;
-      this.nonEmpty = nonEmpty;
-    }
-
-    public Instant getOutputTimestamp() {
-      return outputTimestamp;
-    }
-
-    public boolean isNonEmpty() {
-      return nonEmpty;
-    }
+    c.state().accessAcrossMergedWindows(DATA_HOLD_TAG).clear();
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
index 7fd2e2c450eb5..da18405be3ddc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
@@ -24,6 +24,7 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Objects;
 
 /**
  * In-memory implementation of {@link StateInternals}. Used in {@code BatchModeExecutionContext}
@@ -161,6 +162,11 @@ public Boolean read() {
         }
       };
     }
+
+    @Override
+    public String toString() {
+      return Objects.toString(minimumHold);
+    }
   }
 
   private final class InMemoryCombiningValue<InputT, AccumT, OutputT>
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 5fcdbecf7fdf2..e19206bd3490c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -683,9 +683,7 @@ public void testMergeWindows() throws Exception {
     String window = "/gAAAAAAAA-joBw/";
     ByteString timerTag = ByteString.copyFromUtf8(window + "+0:999"); // GC timer just has window
     ByteString bufferTag = ByteString.copyFromUtf8(window + "+sbuf");
-    ByteString finishedTag = ByteString.copyFromUtf8(window + "+sclosed");
     ByteString paneInfoTag = ByteString.copyFromUtf8(window + "+spane");
-    ByteString paneHoldTag = ByteString.copyFromUtf8(window + "+spane-hold");
     ByteString watermarkHoldTag =
         ByteString.copyFromUtf8(window + "+shold");
     String stateFamily = "MergeWindows";
@@ -714,16 +712,11 @@ public void testMergeWindows() throws Exception {
                 .build())
             .build())));
 
-    assertThat(actualOutput.getWatermarkHoldsList(), Matchers.containsInAnyOrder(
+    assertThat(actualOutput.getWatermarkHoldsList(), Matchers.contains(
         Matchers.equalTo(Windmill.WatermarkHold.newBuilder()
             .setTag(watermarkHoldTag)
             .setStateFamily(stateFamily)
             .addTimestamps(0)
-            .build()),
-        Matchers.equalTo(Windmill.WatermarkHold.newBuilder()
-            .setTag(paneHoldTag)
-            .setStateFamily(stateFamily)
-            .addTimestamps(999000 /* end of the window */)
             .build())));
 
     Windmill.GetWorkResponse.Builder getWorkResponse = Windmill.GetWorkResponse.newBuilder();
@@ -766,16 +759,6 @@ public void testMergeWindows() throws Exception {
     dataBuilder.clearLists();
     dataBuilder.clearWatermarkHolds();
     dataBuilder.clearValues();
-    dataBuilder.addValuesBuilder()
-        .setTag(finishedTag)
-        .setStateFamily(stateFamily)
-        .getValueBuilder()
-        .setTimestamp(0)
-        .setData(ByteString.EMPTY);
-    dataBuilder.addWatermarkHoldsBuilder()
-        .setTag(paneHoldTag)
-        .setStateFamily(stateFamily)
-        .addTimestamps(999000);
     server.addDataToOffer(dataResponse.build());
 
     result = server.waitForAndGetCommits(1);
@@ -800,18 +783,12 @@ public void testMergeWindows() throws Exception {
 
     // Data was deleted
     assertThat("" + actualOutput.getValueUpdatesList(),
-        actualOutput.getValueUpdatesList(), Matchers.containsInAnyOrder(
+        actualOutput.getValueUpdatesList(), Matchers.contains(
             Matchers.equalTo(Windmill.TagValue.newBuilder()
                 .setTag(paneInfoTag)
                 .setStateFamily(stateFamily)
                 .setValue(Windmill.Value.newBuilder()
                      .setTimestamp(Long.MAX_VALUE).setData(ByteString.EMPTY))
-                .build()),
-            Matchers.equalTo(Windmill.TagValue.newBuilder()
-                .setTag(finishedTag)
-                .setStateFamily(stateFamily)
-                .setValue(Windmill.Value.newBuilder()
-                    .setTimestamp(Long.MAX_VALUE).setData(ByteString.EMPTY))
                 .build())));
 
     assertThat("" + actualOutput.getListUpdatesList(),
@@ -822,16 +799,11 @@ public void testMergeWindows() throws Exception {
             .setEndTimestamp(Long.MAX_VALUE)
             .build())));
 
-    assertThat(actualOutput.getWatermarkHoldsList(), Matchers.containsInAnyOrder(
+    assertThat(actualOutput.getWatermarkHoldsList(), Matchers.contains(
         Matchers.equalTo(Windmill.WatermarkHold.newBuilder()
             .setTag(watermarkHoldTag)
             .setStateFamily(stateFamily)
             .setReset(true)
-            .build()),
-        Matchers.equalTo(Windmill.WatermarkHold.newBuilder()
-            .setTag(paneHoldTag)
-            .setStateFamily(stateFamily)
-            .setReset(true)
             .build())));
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index defc90736e97d..e552b890db79f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -348,6 +348,47 @@ public void testPaneInfoAllStatesAfterWatermark() throws Exception {
         WindowMatchers.isSingleWindowedValue(Matchers.emptyIterable(), 9, 0, 10)));
   }
 
+  @Test
+  public void testPaneInfoAllStatesAfterWatermarkAccumulating() throws Exception {
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        WindowingStrategy.of(FixedWindows.of(Duration.millis(10)))
+            .withTrigger(Repeatedly.<IntervalWindow>forever(
+                AfterFirst.<IntervalWindow>of(
+                    AfterPane.<IntervalWindow>elementCountAtLeast(2),
+                    AfterWatermark.<IntervalWindow>pastEndOfWindow())))
+            .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES)
+            .withAllowedLateness(Duration.millis(100))
+            .withClosingBehavior(ClosingBehavior.FIRE_ALWAYS));
+
+    tester.advanceWatermark(new Instant(0));
+    tester.injectElement(1, new Instant(1));
+    tester.injectElement(2, new Instant(2));
+
+    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
+    assertThat(output, Matchers.contains(
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, false, Timing.EARLY, 0, -1))));
+    assertThat(output, Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+
+    tester.advanceWatermark(new Instant(50));
+
+    // We should get the ON_TIME pane even though it is empty,
+    // because we have an AfterWatermark.pastEndOfWindow() trigger.
+    output = tester.extractOutput();
+    assertThat(output, Matchers.contains(
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.ON_TIME, 1, 0))));
+    assertThat(output, Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 9, 0, 10)));
+
+    // We should get the final pane even though it is empty.
+    tester.advanceWatermark(new Instant(150));
+    output = tester.extractOutput();
+    assertThat(output, Matchers.contains(
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.LATE, 2, 1))));
+    assertThat(output, Matchers.contains(
+        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 9, 0, 10)));
+  }
+
   @Test
   public void testPaneInfoFinalAndOnTime() throws Exception {
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
@@ -458,7 +499,7 @@ public void testDropDataMultipleWindows() throws Exception {
   }
 
   @Test
-  public void testIdempotentUninterestingPanes() throws Exception {
+  public void testIdempotentEmptyPanes() throws Exception {
     // Test uninteresting (empty) panes don't increment the index or otherwise
     // modify PaneInfo.
     TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
@@ -505,6 +546,54 @@ public void testIdempotentUninterestingPanes() throws Exception {
     assertEquals(0, tester.getElementsDroppedDueToLateness());
   }
 
+  @Test
+  public void testIdempotentEmptyPanesAccumulating() throws Exception {
+    // Test uninteresting (empty) panes don't increment the index or otherwise
+    // modify PaneInfo.
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        FixedWindows.of(Duration.millis(10)),
+        mockTrigger,
+        AccumulationMode.ACCUMULATING_FIRED_PANES,
+        Duration.millis(100));
+
+    // Inject a couple of on-time elements and fire at the window end.
+    injectElement(tester, 1, TriggerResult.CONTINUE);
+    injectElement(tester, 2, TriggerResult.CONTINUE);
+    when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    tester.advanceWatermark(new Instant(12));
+
+    when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
+        .thenReturn(TriggerResult.FIRE);
+    tester.fireTimer(firstWindow, new Instant(10), TimeDomain.EVENT_TIME);
+
+    // Fire another timer (with no data, so it's an uninteresting pane).
+    when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
+                    .thenReturn(TriggerResult.FIRE);
+    tester.fireTimer(firstWindow, new Instant(10), TimeDomain.EVENT_TIME);
+
+    // Finish it off with another datum.
+    injectElement(tester, 3, TriggerResult.FIRE_AND_FINISH);
+
+    // The intermediate trigger firing shouldn't result in any output.
+    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
+    assertThat(output.size(), equalTo(2));
+
+    // The on-time pane is as expected.
+    assertThat(output.get(0), isSingleWindowedValue(containsInAnyOrder(1, 2), 1, 0, 10));
+
+    // The late pane has the correct indices.
+    assertThat(output.get(1).getValue(), containsInAnyOrder(1, 2, 3));
+    assertThat(output.get(1).getPane(),
+        equalTo(PaneInfo.createPane(false, true, Timing.LATE, 1, 1)));
+
+    assertTrue(tester.isMarkedFinished(firstWindow));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
+
+    assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
+    assertEquals(0, tester.getElementsDroppedDueToLateness());
+  }
+
   private class ResultCaptor<T> implements Answer<T> {
     private T result = null;
     public T get() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 9c5ec63b9e2e3..8ba05bbd45edf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -197,7 +197,7 @@ public final void assertHasOnlyGlobalAndFinishedSetsAndPaneInfoFor(W... expected
         ImmutableSet.<StateTag<?>>of(
             TriggerRunner.FINISHED_BITS_TAG,
             PaneInfoTracker.PANE_INFO_TAG,
-            WatermarkHold.PANE_HOLD_TAG));
+            WatermarkHold.DATA_HOLD_TAG));
   }
 
   public final void assertHasOnlyGlobalState() {
@@ -209,7 +209,7 @@ public final void assertHasOnlyGlobalState() {
   public final void assertHasOnlyGlobalAndPaneInfoFor(W... expectedWindows) {
     assertHasOnlyGlobalAndAllowedTags(
         ImmutableSet.copyOf(expectedWindows),
-        ImmutableSet.<StateTag<?>>of(PaneInfoTracker.PANE_INFO_TAG, WatermarkHold.PANE_HOLD_TAG));
+        ImmutableSet.<StateTag<?>>of(PaneInfoTracker.PANE_INFO_TAG, WatermarkHold.DATA_HOLD_TAG));
   }
 
   /**

From cd1a1d3674df571451a364d3c41b19cbde7541ab Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Sun, 9 Aug 2015 09:57:56 -0700
Subject: [PATCH 0920/1541] Fully Qualify inner type name in DeDupExample

This removes the last warnings in the Examples directory.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100231751
---
 .../google/cloud/dataflow/examples/cookbook/DeDupExample.java  | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java
index 837ef647f91cc..8a62209c00496 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java
@@ -32,7 +32,8 @@
  * duplicate lines across all the files. (The output does not preserve any input order).
  *
  * <p> Concepts: the RemoveDuplicates transform, and how to wire transforms together.
- * Demonstrates {@link TextIO.Read}/{@link RemoveDuplicates}/{@link TextIO.Write}.
+ * Demonstrates {@link com.google.cloud.dataflow.sdk.io.TextIO.Read}/
+ * {@link RemoveDuplicates}/{@link com.google.cloud.dataflow.sdk.io.TextIO.Write}.
  *
  * <p> To execute this pipeline locally, specify general pipeline configuration:
  *   --project=YOUR_PROJECT_ID

From eaa9c1b71a8b47a2dd52fec6640c2a1111603d8e Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Sat, 25 Jul 2015 15:01:47 -0700
Subject: [PATCH 0921/1541] Fix PubsubIO name validation

Allow uppercase letters and a few more special characters.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100238547
---
 .../cloud/dataflow/sdk/io/PubsubIO.java       |  2 +-
 .../cloud/dataflow/sdk/io/PubsubIOTest.java   | 34 +++++++++++++++++++
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 0deceda6fe65b..0c1c4528cffef 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -95,7 +95,7 @@ public class PubsubIO {
       Pattern.compile("/topics/([^/]+)/(.+)");
 
   private static final Pattern PUBSUB_NAME_REGEXP =
-      Pattern.compile("[a-z][-._a-z0-9]+[a-z0-9]");
+      Pattern.compile("[a-zA-Z][-._~%+a-zA-Z0-9]+");
 
   private static final int PUBSUB_NAME_MAX_LENGTH = 255;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/PubsubIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/PubsubIOTest.java
index 46ea7a1cff0a2..ad0fcafdcd2d9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/PubsubIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/PubsubIOTest.java
@@ -18,7 +18,9 @@
 
 import static org.junit.Assert.assertEquals;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -27,6 +29,8 @@
  */
 @RunWith(JUnit4.class)
 public class PubsubIOTest {
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
 
   @Test
   public void testPubsubIOGetName() {
@@ -39,4 +43,34 @@ public void testPubsubIOGetName() {
     assertEquals("WriteMyTopic",
         PubsubIO.Write.named("WriteMyTopic").topic("projects/myproject/topics/mytopic").getName());
   }
+
+  @Test
+  public void testTopicValidationSuccess() throws Exception {
+    PubsubIO.Read.topic("projects/my-project/topics/abc");
+    PubsubIO.Read.topic("projects/my-project/topics/ABC");
+    PubsubIO.Read.topic("projects/my-project/topics/AbC-DeF");
+    PubsubIO.Read.topic("projects/my-project/topics/AbC-1234");
+    PubsubIO.Read.topic("projects/my-project/topics/AbC-1234-_.~%+-_.~%+-_.~%+-abc");
+    PubsubIO.Read.topic(new StringBuilder().append("projects/my-project/topics/A-really-long-one-")
+        .append("111111111111111111111111111111111111111111111111111111111111111111111111111111111")
+        .append("111111111111111111111111111111111111111111111111111111111111111111111111111111111")
+        .append("11111111111111111111111111111111111111111111111111111111111111111111111111")
+        .toString());
+  }
+
+  @Test
+  public void testTopicValidationBadCharacter() throws Exception {
+    thrown.expect(IllegalArgumentException.class);
+    PubsubIO.Read.topic("projects/my-project/topics/abc-*-abc");
+  }
+
+  @Test
+  public void testTopicValidationTooLong() throws Exception {
+    thrown.expect(IllegalArgumentException.class);
+    PubsubIO.Read.topic(new StringBuilder().append("projects/my-project/topics/A-really-long-one-")
+        .append("111111111111111111111111111111111111111111111111111111111111111111111111111111111")
+        .append("111111111111111111111111111111111111111111111111111111111111111111111111111111111")
+        .append("1111111111111111111111111111111111111111111111111111111111111111111111111111")
+        .toString());
+  }
 }

From 28227357c5d3967d40d4b7c7cbc8d0ab7a7425ec Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Sun, 9 Aug 2015 16:06:56 -0700
Subject: [PATCH 0922/1541] Update example READMEs to reflect the examples
 re-org

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100239967
---
 examples/README.md | 109 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 82 insertions(+), 27 deletions(-)

diff --git a/examples/README.md b/examples/README.md
index 2303c2c82809b..32501a65306bd 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -4,18 +4,67 @@ The examples included in this module serve to demonstrate the basic
 functionality of Google Cloud Dataflow, and act as starting points for
 the development of more complex pipelines.
 
-A good starting point for new users is our [`WordCount`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java)
-example, which runs over the provided input text file(s) and computes how many
-times each word occurs in the input.
-
-Besides `WordCount`, the following examples are included:
+In addition to `WordCount`, further examples are included. They are organized into "Cookbook"
+and "Complete" subpackages. The "cookbook" examples show how to define common data analysis patterns
+when you're building a Dataflow pipeline. The "complete" directory contains some end-to-end examples
+that tell more complete stories than the patterns in the "cookbook" directory.
+
+
+## WordCount
+
+A good starting point for new users is our set of Word Count examples in the top-level
+[examples directory](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples).
+The canonical 'word count' task runs over input text file(s) and computes how many times
+each word occurs in the input.  These examples, and an accompanying
+[walkthrough](https://cloud.google.com/dataflow/examples/wordcount-example), demonstrate a series of
+four successively more detailed word count example pipelines that perform this task.
+
+The
+[MinimalWordCount](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java)
+example shows a basic word count [Pipeline](https://cloud.google.com/dataflow/model/pipelines),
+and introduces how to [read](https://cloud.google.com/dataflow/model/reading-and-writing-data)
+from text files; shows how to
+[Count](https://cloud.google.com/dataflow/model/library-transforms) a
+[Pcollection](https://cloud.google.com/dataflow/model/pcollection); basic use of a
+[ParDo](https://cloud.google.com/dataflow/model/par-do); and how to write data to Google Cloud
+Storage as text files.
+
+The
+[WordCount](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java)
+example shows how to execute a Pipeline both locally and using the Dataflow service; how to use
+command-line arguments to set pipeline options; and introduces some pipeline design concepts:
+creating custom [PTransforms](https://cloud.google.com/dataflow/model/composite-transforms)
+(composite transforms); and using [ParDo](https://cloud.google.com/dataflow/model/par-do) with
+static
+[DoFns](https://cloud.google.com/dataflow/java-sdk/JavaDoc/com/google/cloud/dataflow/sdk/transforms/DoFn)
+defined out-of-line.
+
+The
+[DebuggingWordCount](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java)
+example shows how to
+[log to Cloud Logging](https://cloud.google.com/dataflow/pipelines/logging), so that your log
+messages can be viewed from the Dataflow Monitoring UI; controlling Dataflow worker log levels;
+creating a
+[custom aggregator](https://cloud.google.com/dataflow/java-sdk/JavaDoc/com/google/cloud/dataflow/sdk/transforms/Aggregator);
+and testing your Pipeline via
+[DataflowAssert](https://cloud.google.com/dataflow/java-sdk/JavaDoc/com/google/cloud/dataflow/sdk/testing/DataflowAssert).
+
+
+Then, the
+[WindowedWordCount](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java)
+example shows how to run over either unbounded or bounded input collections; how to use a
+[PubSub](https://cloud.google.com/pubsub) topic as an
+[input source](https://cloud.google.com/dataflow/model/reading-and-writing-data#PubsubIO); how
+to do [windowing](https://cloud.google.com/dataflow/model/windowing), and use data element
+timestamps; and how to write to BigQuery.
+
+
+## 'Cookbook' examples
+
+The ['Cookbook'](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook)
+directory, which shows common and useful patterns, includes the following examples:
 
  <ul>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java">AutoComplete</a>
-  &mdash; An example that computes the most popular hash tags for every
-  prefix, which can be used for auto-completion. Demonstrates how to use the
-  same pipeline in both streaming and batch, combiners, and composite
-  transforms.</li>
   <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java">BigQueryTornadoes</a>
   &mdash; An example that reads the public samples of weather data from Google
   BigQuery, counts the number of tornadoes that occur in each month, and
@@ -24,11 +73,10 @@ Besides `WordCount`, the following examples are included:
   <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java">CombinePerKeyExamples</a>
   &mdash; An example that reads the public &quot;Shakespeare&quot; data, and for
   each word in the dataset that exceeds a given length, generates a string
-  containing the list of play names in which that word appears. Output is saved
-  in a Google BigQuery table. Demonstrates the <code>Combine.perKey</code>
+  containing the list of play names in which that word appears.
+  Demonstrates the <code>Combine.perKey</code>
   transform, which lets you combine the values in a key-grouped
-  <code>PCollection</code>; also how to use an <code>Aggregator</code> to track
-  information in the Google Developers Console.
+  <code>PCollection</code>.
   </li>
   <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java">DatastoreWordCount</a>
   &mdash; An example that shows you how to read from Google Cloud Datastore.</li>
@@ -59,11 +107,22 @@ Besides `WordCount`, the following examples are included:
   Demonstrates the <code>Max</code> statistical combination transform, and how to
   find the max-per-key group.
   </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/PubsubFileInjector.java">PubsubFileInjector</a>
-  &mdash; A batch Cloud Dataflow pipeline for injecting a set of Cloud Storage
-  files into a Google Cloud Pub/Sub topic, line by line. This example can be
-  useful for testing streaming pipelines.
-  </li>
+  </ul>
+
+## 'Complete' examples
+
+The
+['Complete'](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete)
+directory contains examples that tell more complete end-to-end stories, and are more like the
+actual pipelines that you would build than are the 'cookbook' examples.  It includes the
+following examples:
+
+<ul>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java">AutoComplete</a>
+  &mdash; An example that computes the most popular hash tags for every
+  prefix, which can be used for auto-completion. Demonstrates how to use the
+  same pipeline in both streaming and batch, combiners, and composite
+  transforms.</li>
   <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java">StreamingWordExtract</a>
   &mdash; A streaming pipeline example that inputs lines of text from a Cloud
   Pub/Sub topic, splits each line into individual words, capitalizes those
@@ -92,10 +151,6 @@ Besides `WordCount`, the following examples are included:
   runner, <code>GroupByKey</code>, keyed state, sliding windows, and Cloud
   Pub/Sub topic ingestion.
   </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java">WindowedWordCount</a>
-  &mdash; An example that applies windowing to &quot;Shakespeare&quot; data in a
-  `WordCount` pipeline.
-  </li>
   </ul>
 
 ## Running the Examples
@@ -145,8 +200,8 @@ Platform:
     --stagingLocation=<YOUR CLOUD STORAGE LOCATION> \
     --runner=BlockingDataflowPipelineRunner
 
-Other examples can be run similarly by replacing the `WordCount` class name with
-`BigQueryTornadoes`, `DatastoreWordCount`, `TfIdf`, `TopWikipediaSessions`, etc.
+Other examples can be run similarly by replacing the `WordCount` class path with the example classpath, e.g.
+`com.google.cloud.dataflow.examples.cookbook.BigQueryTornadoes`,
 and adjusting runtime options under the `Dexec.args` parameter, as specified in
 the example itself. If you are running the streaming pipeline examples, see the
 additional setup instruction, below.
@@ -164,7 +219,7 @@ publication of *traffic sensor* data to a
 You can run the example with default settings using the following command:
 
     mvn exec:java -pl examples \
-    -Dexec.mainClass=com.google.cloud.dataflow.examples.TrafficMaxLaneFlow \
+    -Dexec.mainClass=com.google.cloud.dataflow.examples.complete.TrafficMaxLaneFlow \
     -Dexec.args="--project=<YOUR CLOUD PLATFORM PROJECT NAME> \
     --stagingLocation=<YOUR CLOUD STORAGE LOCATION> \
     --runner=DataflowPipelineRunner \
@@ -183,7 +238,7 @@ This file contains real traffic sensor data from San Diego freeways. See
 <a href="http://storage.googleapis.com/aju-sd-traffic/freeway_detector_config/Freeways-Metadata-2010_01_01/copyright(san%20diego).txt">this file</a>
 for copyright information.
 
-You may override the default `--inputFile` with an alternative complete
+You may override the default `--inputFile` with an alternative larger
 data set (~2GB). It is provided in the Google Cloud Storage bucket
 `gs://dataflow-samples/traffic_sensor/Freeways-5Minaa2010-01-01_to_2010-02-15.csv`.
 

From 88207e39e41fc6e7be9ce3c57230e70f6cc27290 Mon Sep 17 00:00:00 2001
From: fjp <fjp@google.com>
Date: Sun, 9 Aug 2015 16:25:09 -0700
Subject: [PATCH 0923/1541] Update READMEs

Correct instructions to refer to project id not project name.
Add explicit 'compile' to execution instructions.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100240428
---
 examples/README.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/examples/README.md b/examples/README.md
index 32501a65306bd..bf58d848b9730 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -160,7 +160,7 @@ After building and installing the `SDK` and `Examples` modules, as explained in
 you can execute the `WordCount` and other example pipelines using the
 `DirectPipelineRunner` on your local machine:
 
-    mvn exec:java -pl examples \
+    mvn compile exec:java -pl examples \
     -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
     -Dexec.args="--inputFile=<INPUT FILE PATTERN> --output=<OUTPUT FILE>"
 
@@ -172,15 +172,15 @@ You should have a Google Cloud Platform project that has a Cloud Dataflow API en
 a Google Cloud Storage bucket that will serve as a staging location, and installed and
 authenticated Google Cloud SDK. In this case, invoke the example as follows:
 
-    mvn exec:java -pl examples \
+    mvn compile exec:java -pl examples \
     -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
-    -Dexec.args="--project=<YOUR CLOUD PLATFORM PROJECT NAME> \
+    -Dexec.args="--project=<YOUR CLOUD PLATFORM PROJECT ID> \
     --stagingLocation=<YOUR CLOUD STORAGE LOCATION> \
     --runner=BlockingDataflowPipelineRunner"
 
 Your Cloud Storage location should be entered in the form of
 `gs://bucket/path/to/staging/directory`. The Cloud Platform project refers to
-its name (not number).
+your project id (not the project number or the descriptive name).
 
 Alternatively, you may choose to bundle all dependencies into a single JAR and
 execute it outside of the Maven environment. For example, after building and
@@ -196,7 +196,7 @@ Platform:
 
     java -cp examples/target/google-cloud-dataflow-java-examples-all-bundled-manual_build.jar \
     com.google.cloud.dataflow.examples.WordCount \
-    --project=<YOUR CLOUD PLATFORM PROJECT NAME> \
+    --project=<YOUR CLOUD PLATFORM PROJECT ID> \
     --stagingLocation=<YOUR CLOUD STORAGE LOCATION> \
     --runner=BlockingDataflowPipelineRunner
 
@@ -218,9 +218,9 @@ publication of *traffic sensor* data to a
 [Google Cloud Pub/Sub](https://cloud.google.com/pubsub/docs) topic.
 You can run the example with default settings using the following command:
 
-    mvn exec:java -pl examples \
+    mvn compile exec:java -pl examples \
     -Dexec.mainClass=com.google.cloud.dataflow.examples.complete.TrafficMaxLaneFlow \
-    -Dexec.args="--project=<YOUR CLOUD PLATFORM PROJECT NAME> \
+    -Dexec.args="--project=<YOUR CLOUD PLATFORM PROJECT ID> \
     --stagingLocation=<YOUR CLOUD STORAGE LOCATION> \
     --runner=DataflowPipelineRunner \
     --streaming=true"

From c6b05d98dedd66e06dd7207e673ea604516cfe89 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Sun, 9 Aug 2015 17:00:30 -0700
Subject: [PATCH 0924/1541] Update example READMEs to reflect the examples
 re-org

Also, add a README for the Travis directory

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100241175
---
 .../dataflow/examples/complete/README.md      | 44 +++++++++++++++
 .../dataflow/examples/cookbook/README.md      | 55 +++++++++++++++++++
 travis/README.md                              |  4 ++
 3 files changed, 103 insertions(+)
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/complete/README.md
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/README.md
 create mode 100644 travis/README.md

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/README.md b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/README.md
new file mode 100644
index 0000000000000..5fba15494e9b1
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/README.md
@@ -0,0 +1,44 @@
+
+# "Complete" Examples
+
+This directory contains end-to-end example pipelines that perform complex data processing tasks. They include:
+
+<ul>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java">AutoComplete</a>
+  &mdash; An example that computes the most popular hash tags for every
+  prefix, which can be used for auto-completion. Demonstrates how to use the
+  same pipeline in both streaming and batch, combiners, and composite
+  transforms.</li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java">StreamingWordExtract</a>
+  &mdash; A streaming pipeline example that inputs lines of text from a Cloud
+  Pub/Sub topic, splits each line into individual words, capitalizes those
+  words, and writes the output to a BigQuery table.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java">TfIdf</a>
+  &mdash; An example that computes a basic TF-IDF search table for a directory or
+  Cloud Storage prefix. Demonstrates joining data, side inputs, and logging.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java">TopWikipediaSessions</a>
+  &mdash; An example that reads Wikipedia edit data from Cloud Storage and
+  computes the user with the longest string of edits separated by no more than
+  an hour within each month. Demonstrates using Cloud Dataflow
+  <code>Windowing</code> to perform time-based aggregations of data.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java">TrafficMaxLaneFlow</a>
+  &mdash; A streaming Cloud Dataflow example using BigQuery output in the
+  <code>traffic sensor</code> domain. Demonstrates the Cloud Dataflow streaming
+  runner, sliding windows, Cloud Pub/Sub topic ingestion, the use of the
+  <code>AvroCoder</code> to encode a custom class, and custom
+  <code>Combine</code> transforms.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java">TrafficRoutes</a>
+  &mdash; A streaming Cloud Dataflow example using BigQuery output in the
+  <code>traffic sensor</code> domain. Demonstrates the Cloud Dataflow streaming
+  runner, <code>GroupByKey</code>, keyed state, sliding windows, and Cloud
+  Pub/Sub topic ingestion.
+  </li>
+  </ul>
+
+See the [documentation](https://cloud.google.com/dataflow/getting-started) and the [Examples
+README](../../../../../../../../../README.md) for
+information about how to run these examples.
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/README.md b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/README.md
new file mode 100644
index 0000000000000..99f3080a06ba2
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/README.md
@@ -0,0 +1,55 @@
+
+# "Cookbook" Examples
+
+This directory holds simple "cookbook" examples, which show how to define
+commonly-used data analysis patterns that you would likely incorporate into a
+larger Dataflow pipeline. They include:
+
+ <ul>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java">BigQueryTornadoes</a>
+  &mdash; An example that reads the public samples of weather data from Google
+  BigQuery, counts the number of tornadoes that occur in each month, and
+  writes the results to BigQuery. Demonstrates reading/writing BigQuery,
+  counting a <code>PCollection</code>, and user-defined <code>PTransforms</code>.</li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java">CombinePerKeyExamples</a>
+  &mdash; An example that reads the public &quot;Shakespeare&quot; data, and for
+  each word in the dataset that exceeds a given length, generates a string
+  containing the list of play names in which that word appears.
+  Demonstrates the <code>Combine.perKey</code>
+  transform, which lets you combine the values in a key-grouped
+  <code>PCollection</code>.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java">DatastoreWordCount</a>
+  &mdash; An example that shows you how to read from Google Cloud Datastore.</li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java">DeDupExample</a>
+  &mdash; An example that uses Shakespeare's plays as plain text files, and
+  removes duplicate lines across all the files. Demonstrates the
+  <code>RemoveDuplicates</code>, <code>TextIO.Read</code>,
+  and <code>TextIO.Write</code> transforms, and how to wire transforms together.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java">FilterExamples</a>
+  &mdash; An example that shows different approaches to filtering, including
+  selection and projection. It also shows how to dynamically set parameters
+  by defining and using new pipeline options, and use how to use a value derived
+  by a pipeline. Demonstrates the <code>Mean</code> transform,
+  <code>Options</code> configuration, and using pipeline-derived data as a side
+  input.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java">JoinExamples</a>
+  &mdash; An example that shows how to join two collections. It uses a
+  sample of the <a href="http://goo.gl/OB6oin">GDELT &quot;world event&quot;
+  data</a>, joining the event <code>action</code> country code against a table
+  that maps country codes to country names. Demonstrates the <code>Join</code>
+  operation, and using multiple input sources.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java">MaxPerKeyExamples</a>
+  &mdash; An example that reads the public samples of weather data from BigQuery,
+  and finds the maximum temperature (<code>mean_temp</code>) for each month.
+  Demonstrates the <code>Max</code> statistical combination transform, and how to
+  find the max-per-key group.
+  </li>
+  </ul>
+
+See the [documentation](https://cloud.google.com/dataflow/getting-started) and the [Examples
+README](../../../../../../../../../README.md) for
+information about how to run these examples.
diff --git a/travis/README.md b/travis/README.md
new file mode 100644
index 0000000000000..f7d89999b3d94
--- /dev/null
+++ b/travis/README.md
@@ -0,0 +1,4 @@
+# Travis Scripts
+
+This directory contains scripts used for [Travis CI](https://travis-ci.org/GoogleCloudPlatform/DataflowJavaSDK)
+testing.

From 5fbcc606bd38f8c985c96f217d7f0613933f130a Mon Sep 17 00:00:00 2001
From: fjp <fjp@google.com>
Date: Sun, 9 Aug 2015 19:42:44 -0700
Subject: [PATCH 0925/1541] Add link to DataflowJavaSDK-examples

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100245835
---
 README.md          | 29 ++++++++++++++++++-----------
 examples/README.md | 44 +-------------------------------------------
 2 files changed, 19 insertions(+), 54 deletions(-)

diff --git a/README.md b/README.md
index b7d1bc9671028..27a5b6ef041ee 100644
--- a/README.md
+++ b/README.md
@@ -6,20 +6,27 @@ processing pipelines. This repository hosts the open-sourced Cloud Dataflow SDK
 for Java, which can be used to run pipelines against the Google Cloud Dataflow
 Service.
 
-The contents of this repository are also available as released artifacts in the
-[Maven Central Repository](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.google.cloud.dataflow%22).
-You can bypass this GitHub repository and depend directly on the released
-artifacts from Maven Central by adding the following dependency to development
+[General usage](https://cloud.google.com/dataflow/getting-started) of Google
+Cloud Dataflow does **not** require use of this repository. Instead:
+
+1. depend directly on a specific
+[version](https://cloud.google.com/dataflow/release-notes/java) of the SDK in the
+[Maven Central Repository](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.google.cloud.dataflow%22) 
+by adding the following dependency to development
 environments like Eclipse or Apache Maven:
 
-    <dependency>
-      <groupId>com.google.cloud.dataflow</groupId>
-      <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
-      <version>version_number</version>
-    </dependency>
+        <dependency>
+          <groupId>com.google.cloud.dataflow</groupId>
+          <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
+          <version>version_number</version>
+        </dependency>
+
+1. download the example pipelines from the separate
+[DataflowJavaSDK-examples](https://github.com/GoogleCloudPlatform/DataflowJavaSDK-examples)
+repository.
 
-Please replace `version_number` with one of the supported versions from our
-[Release Notes](https://cloud.google.com/dataflow/release-notes/java).
+However, if you'd like to contribute to the SDK, write your own PipelineRunner,
+or just dig in for the fun of it, please stay with us here!
 
 ## Status [![Build Status](https://travis-ci.org/GoogleCloudPlatform/DataflowJavaSDK.svg?branch=master)](https://travis-ci.org/GoogleCloudPlatform/DataflowJavaSDK)
 
diff --git a/examples/README.md b/examples/README.md
index bf58d848b9730..732e02364e2b3 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -188,7 +188,7 @@ installing as usual, you can execute the following commands to create the
 bundled JAR of the `Examples` module and execute it both locally and in Cloud
 Platform:
 
-    mvn bundle:bundle -pl examples
+    mvn package
 
     java -cp examples/target/google-cloud-dataflow-java-examples-all-bundled-manual_build.jar \
     com.google.cloud.dataflow.examples.WordCount \
@@ -210,45 +210,3 @@ Note that when running Maven on Microsoft Windows platform, backslashes (`\`)
 under the `Dexec.args` parameter should be escaped with another backslash. For
 example, input file pattern of `c:\*.txt` should be entered as `c:\\*.txt`.
 
-### Running the "Traffic" Streaming Examples
-
-The `TrafficMaxLaneFlow` and `TrafficRoutes` pipelines, when run in
-streaming mode (with the `--streaming=true` option), require the
-publication of *traffic sensor* data to a
-[Google Cloud Pub/Sub](https://cloud.google.com/pubsub/docs) topic.
-You can run the example with default settings using the following command:
-
-    mvn compile exec:java -pl examples \
-    -Dexec.mainClass=com.google.cloud.dataflow.examples.complete.TrafficMaxLaneFlow \
-    -Dexec.args="--project=<YOUR CLOUD PLATFORM PROJECT ID> \
-    --stagingLocation=<YOUR CLOUD STORAGE LOCATION> \
-    --runner=DataflowPipelineRunner \
-    --streaming=true"
-
-By default, they use a separate batch pipeline to publish previously gathered
-traffic sensor data to the Cloud Pub/Sub topic, which is used as an input source
-for the streaming pipeline.
-
-The default traffic sensor data `--inputFile` is downloaded from
-
-    curl -O \
-    http://storage.googleapis.com/aju-sd-traffic/unzipped/Freeways-5Minaa2010-01-01_to_2010-02-15_test2.csv
-
-This file contains real traffic sensor data from San Diego freeways. See
-<a href="http://storage.googleapis.com/aju-sd-traffic/freeway_detector_config/Freeways-Metadata-2010_01_01/copyright(san%20diego).txt">this file</a>
-for copyright information.
-
-You may override the default `--inputFile` with an alternative larger
-data set (~2GB). It is provided in the Google Cloud Storage bucket
-`gs://dataflow-samples/traffic_sensor/Freeways-5Minaa2010-01-01_to_2010-02-15.csv`.
-
-You may also set `--inputFile` to an empty string, which will disable
-the automatic Pub/Sub injection, and allow you to use separate tool to control
-the input to this example. An example code, which publishes traffic sensor data
-to a Pub/Sub topic, is provided in [`traffic_pubsub_generator.py`](https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher)
-
-**Note:** If you set `--streaming=false`, these traffic pipelines will run in batch mode,
-using the timestamps applied to the original dataset to process the data in
-a batch. For further information on how these pipelines operate, see
-<a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java">TrafficMaxLaneFlow</a>
-and <a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java">TrafficRoutes</a>.

From eb20fb108d3edd84d8c1fff79c26b1617c580e80 Mon Sep 17 00:00:00 2001
From: fjp <fjp@google.com>
Date: Sun, 9 Aug 2015 19:48:20 -0700
Subject: [PATCH 0926/1541] Whitespace cleanup

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100245943
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 27a5b6ef041ee..67787d0112c85 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ Cloud Dataflow does **not** require use of this repository. Instead:
 
 1. depend directly on a specific
 [version](https://cloud.google.com/dataflow/release-notes/java) of the SDK in the
-[Maven Central Repository](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.google.cloud.dataflow%22) 
+[Maven Central Repository](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.google.cloud.dataflow%22)
 by adding the following dependency to development
 environments like Eclipse or Apache Maven:
 

From 0849dafb87bd2e8c618cb1b3291747c55af25a65 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 10 Aug 2015 12:10:12 -0700
Subject: [PATCH 0927/1541] Combine the additionalparams values

Only the last additionalparams actually affects the run.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100301345
---
 examples/pom.xml | 3 +--
 pom.xml          | 1 +
 sdk/pom.xml      | 3 +--
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index 4ea13293b62c4..bef0694f82a42 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -117,8 +117,7 @@
           <doctitle>Google Cloud Dataflow Examples</doctitle>
 
           <subpackages>com.google.cloud.dataflow.examples</subpackages>
-          <additionalparam>-exclude com.google.cloud.dataflow.sdk.runners.worker:com.google.cloud.dataflow.sdk.runners.dataflow:com.google.cloud.dataflow.sdk.util</additionalparam>
-          <additionalparam>${dataflow.javadoc_opts}</additionalparam>
+          <additionalparam>-exclude com.google.cloud.dataflow.sdk.runners.worker:com.google.cloud.dataflow.sdk.runners.dataflow:com.google.cloud.dataflow.sdk.util ${dataflow.javadoc_opts}</additionalparam>
           <use>false</use>
           <quiet>true</quiet>
           <bottom><![CDATA[<br>]]></bottom>
diff --git a/pom.xml b/pom.xml
index acabba699867a..50e5ebb2220be 100644
--- a/pom.xml
+++ b/pom.xml
@@ -64,6 +64,7 @@
 
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <dataflow.javadoc_opts></dataflow.javadoc_opts>
   </properties>
 
   <packaging>pom</packaging>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index e6b79b5eff4e0..1a85092f31d7c 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -152,8 +152,7 @@
           <overview>../overview.html</overview>
 
           <subpackages>com.google.cloud.dataflow.sdk</subpackages>
-          <additionalparam>-exclude com.google.cloud.dataflow.sdk.runners.worker:com.google.cloud.dataflow.sdk.runners.dataflow:com.google.cloud.dataflow.sdk.util</additionalparam>
-          <additionalparam>${dataflow.javadoc_opts}</additionalparam>
+          <additionalparam>-exclude com.google.cloud.dataflow.sdk.runners.worker:com.google.cloud.dataflow.sdk.runners.dataflow:com.google.cloud.dataflow.sdk.util ${dataflow.javadoc_opts}</additionalparam>
           <use>false</use>
           <quiet>true</quiet>
           <bottom><![CDATA[<br>]]></bottom>

From d427ab72c545a9fd63600ac3402e80e5c69a4a28 Mon Sep 17 00:00:00 2001
From: fjp <fjp@google.com>
Date: Mon, 10 Aug 2015 18:13:55 -0700
Subject: [PATCH 0928/1541] Update Examples README

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100336871
---
 examples/README.md | 219 ++++++++++-----------------------------------
 1 file changed, 47 insertions(+), 172 deletions(-)

diff --git a/examples/README.md b/examples/README.md
index 732e02364e2b3..f1442454e696a 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,176 +1,45 @@
-# Google Cloud Dataflow SDK for Java Examples
+# Example Pipelines
 
 The examples included in this module serve to demonstrate the basic
 functionality of Google Cloud Dataflow, and act as starting points for
 the development of more complex pipelines.
 
-In addition to `WordCount`, further examples are included. They are organized into "Cookbook"
-and "Complete" subpackages. The "cookbook" examples show how to define common data analysis patterns
-when you're building a Dataflow pipeline. The "complete" directory contains some end-to-end examples
-that tell more complete stories than the patterns in the "cookbook" directory.
-
-
-## WordCount
-
-A good starting point for new users is our set of Word Count examples in the top-level
-[examples directory](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples).
-The canonical 'word count' task runs over input text file(s) and computes how many times
-each word occurs in the input.  These examples, and an accompanying
-[walkthrough](https://cloud.google.com/dataflow/examples/wordcount-example), demonstrate a series of
-four successively more detailed word count example pipelines that perform this task.
-
-The
-[MinimalWordCount](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java)
-example shows a basic word count [Pipeline](https://cloud.google.com/dataflow/model/pipelines),
-and introduces how to [read](https://cloud.google.com/dataflow/model/reading-and-writing-data)
-from text files; shows how to
-[Count](https://cloud.google.com/dataflow/model/library-transforms) a
-[Pcollection](https://cloud.google.com/dataflow/model/pcollection); basic use of a
-[ParDo](https://cloud.google.com/dataflow/model/par-do); and how to write data to Google Cloud
-Storage as text files.
-
-The
-[WordCount](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java)
-example shows how to execute a Pipeline both locally and using the Dataflow service; how to use
-command-line arguments to set pipeline options; and introduces some pipeline design concepts:
-creating custom [PTransforms](https://cloud.google.com/dataflow/model/composite-transforms)
-(composite transforms); and using [ParDo](https://cloud.google.com/dataflow/model/par-do) with
-static
-[DoFns](https://cloud.google.com/dataflow/java-sdk/JavaDoc/com/google/cloud/dataflow/sdk/transforms/DoFn)
-defined out-of-line.
-
-The
-[DebuggingWordCount](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java)
-example shows how to
-[log to Cloud Logging](https://cloud.google.com/dataflow/pipelines/logging), so that your log
-messages can be viewed from the Dataflow Monitoring UI; controlling Dataflow worker log levels;
-creating a
-[custom aggregator](https://cloud.google.com/dataflow/java-sdk/JavaDoc/com/google/cloud/dataflow/sdk/transforms/Aggregator);
-and testing your Pipeline via
-[DataflowAssert](https://cloud.google.com/dataflow/java-sdk/JavaDoc/com/google/cloud/dataflow/sdk/testing/DataflowAssert).
-
-
-Then, the
-[WindowedWordCount](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java)
-example shows how to run over either unbounded or bounded input collections; how to use a
-[PubSub](https://cloud.google.com/pubsub) topic as an
-[input source](https://cloud.google.com/dataflow/model/reading-and-writing-data#PubsubIO); how
-to do [windowing](https://cloud.google.com/dataflow/model/windowing), and use data element
-timestamps; and how to write to BigQuery.
-
-
-## 'Cookbook' examples
-
-The ['Cookbook'](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook)
-directory, which shows common and useful patterns, includes the following examples:
-
- <ul>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java">BigQueryTornadoes</a>
-  &mdash; An example that reads the public samples of weather data from Google
-  BigQuery, counts the number of tornadoes that occur in each month, and
-  writes the results to BigQuery. Demonstrates reading/writing BigQuery,
-  counting a <code>PCollection</code>, and user-defined <code>PTransforms</code>.</li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java">CombinePerKeyExamples</a>
-  &mdash; An example that reads the public &quot;Shakespeare&quot; data, and for
-  each word in the dataset that exceeds a given length, generates a string
-  containing the list of play names in which that word appears.
-  Demonstrates the <code>Combine.perKey</code>
-  transform, which lets you combine the values in a key-grouped
-  <code>PCollection</code>.
-  </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java">DatastoreWordCount</a>
-  &mdash; An example that shows you how to read from Google Cloud Datastore.</li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java">DeDupExample</a>
-  &mdash; An example that uses Shakespeare's plays as plain text files, and
-  removes duplicate lines across all the files. Demonstrates the
-  <code>RemoveDuplicates</code>, <code>TextIO.Read</code>,
-  and <code>TextIO.Write</code> transforms, and how to wire transforms together.
-  </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java">FilterExamples</a>
-  &mdash; An example that shows different approaches to filtering, including
-  selection and projection. It also shows how to dynamically set parameters
-  by defining and using new pipeline options, and use how to use a value derived
-  by a pipeline. Demonstrates the <code>Mean</code> transform,
-  <code>Options</code> configuration, and using pipeline-derived data as a side
-  input.
-  </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java">JoinExamples</a>
-  &mdash; An example that shows how to join two collections. It uses a
-  sample of the <a href="http://goo.gl/OB6oin">GDELT &quot;world event&quot;
-  data</a>, joining the event <code>action</code> country code against a table
-  that maps country codes to country names. Demonstrates the <code>Join</code>
-  operation, and using multiple input sources.
-  </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java">MaxPerKeyExamples</a>
-  &mdash; An example that reads the public samples of weather data from BigQuery,
-  and finds the maximum temperature (<code>mean_temp</code>) for each month.
-  Demonstrates the <code>Max</code> statistical combination transform, and how to
-  find the max-per-key group.
-  </li>
-  </ul>
-
-## 'Complete' examples
-
-The
-['Complete'](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete)
-directory contains examples that tell more complete end-to-end stories, and are more like the
-actual pipelines that you would build than are the 'cookbook' examples.  It includes the
-following examples:
-
-<ul>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java">AutoComplete</a>
-  &mdash; An example that computes the most popular hash tags for every
-  prefix, which can be used for auto-completion. Demonstrates how to use the
-  same pipeline in both streaming and batch, combiners, and composite
-  transforms.</li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java">StreamingWordExtract</a>
-  &mdash; A streaming pipeline example that inputs lines of text from a Cloud
-  Pub/Sub topic, splits each line into individual words, capitalizes those
-  words, and writes the output to a BigQuery table.
-  </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java">TfIdf</a>
-  &mdash; An example that computes a basic TF-IDF search table for a directory or
-  Cloud Storage prefix. Demonstrates joining data, side inputs, and logging.
-  </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java">TopWikipediaSessions</a>
-  &mdash; An example that reads Wikipedia edit data from Cloud Storage and
-  computes the user with the longest string of edits separated by no more than
-  an hour within each month. Demonstrates using Cloud Dataflow
-  <code>Windowing</code> to perform time-based aggregations of data.
-  </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java">TrafficMaxLaneFlow</a>
-  &mdash; A streaming Cloud Dataflow example using BigQuery output in the
-  <code>traffic sensor</code> domain. Demonstrates the Cloud Dataflow streaming
-  runner, sliding windows, Cloud Pub/Sub topic ingestion, the use of the
-  <code>AvroCoder</code> to encode a custom class, and custom
-  <code>Combine</code> transforms.
-  </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java">TrafficRoutes</a>
-  &mdash; A streaming Cloud Dataflow example using BigQuery output in the
-  <code>traffic sensor</code> domain. Demonstrates the Cloud Dataflow streaming
-  runner, <code>GroupByKey</code>, keyed state, sliding windows, and Cloud
-  Pub/Sub topic ingestion.
-  </li>
-  </ul>
-
-## Running the Examples
-
-After building and installing the `SDK` and `Examples` modules, as explained in this
-[README](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/README.md),
-you can execute the `WordCount` and other example pipelines using the
-`DirectPipelineRunner` on your local machine:
+## Word Count
+
+A good starting point for new users is our set of
+[word count](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples) examples, which computes word frequencies.  This series of four successively more detailed pipelines is described in detail in the accompanying [walkthrough](https://cloud.google.com/dataflow/examples/wordcount-example).
+
+1. [`MinimalWordCount`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java) is the simplest word count pipeline and introduces basic concepts like [Pipelines](https://cloud.google.com/dataflow/model/pipelines),
+[PCollections](https://cloud.google.com/dataflow/model/pcollection),
+[ParDo](https://cloud.google.com/dataflow/model/par-do),
+and [reading and writing data](https://cloud.google.com/dataflow/model/reading-and-writing-data) from external storage.
+
+1. [`WordCount`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java) introduces Dataflow best practices like [PipelineOptions](https://cloud.google.com/dataflow/pipelines/constructing-your-pipeline#Creating) and custom [PTransforms](https://cloud.google.com/dataflow/model/composite-transforms).
+
+1. [`DebuggingWordCount`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java)
+shows how to view live aggregators in the [Dataflow Monitoring Interface](https://cloud.google.com/dataflow/pipelines/dataflow-monitoring-intf), get the most out of
+[Cloud Logging](https://cloud.google.com/dataflow/pipelines/logging) integration, and start writing
+[good tests](https://cloud.google.com/dataflow/pipelines/testing-your-pipeline).
+
+1. [`WindowedWordCount`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java) shows how to run the same pipeline over either unbounded PCollections in streaming mode or bounded PCollections in batch mode.
+
+## Building and Running
+
+The examples in this repository can be built and executed from the root directory by running:
+
+    mvn compile exec:java -pl examples \
+    -Dexec.mainClass=<MAIN CLASS> \
+    -Dexec.args="<EXAMPLE-SPECIFIC ARGUMENTS>"
+
+For example, you can execute the `WordCount` pipeline on your local machine as follows:
 
     mvn compile exec:java -pl examples \
     -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
-    -Dexec.args="--inputFile=<INPUT FILE PATTERN> --output=<OUTPUT FILE>"
+    -Dexec.args="--inputFile=<LOCAL INPUT FILE> --output=<LOCAL OUTPUT FILE>"
 
-You can use the `BlockingDataflowPipelineRunner` to execute the `WordCount` example on
-Google Cloud Dataflow Service using managed resources in the Google Cloud Platform.
-Start by following the general Cloud Dataflow
-[Getting Started](https://cloud.google.com/dataflow/getting-started) instructions.
-You should have a Google Cloud Platform project that has a Cloud Dataflow API enabled,
-a Google Cloud Storage bucket that will serve as a staging location, and installed and
-authenticated Google Cloud SDK. In this case, invoke the example as follows:
+Once you have followed the general Cloud Dataflow
+[Getting Started](https://cloud.google.com/dataflow/getting-started) instructions, you can execute
+the same pipeline on fully managed resources in Google Cloud Platform:
 
     mvn compile exec:java -pl examples \
     -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
@@ -178,14 +47,14 @@ authenticated Google Cloud SDK. In this case, invoke the example as follows:
     --stagingLocation=<YOUR CLOUD STORAGE LOCATION> \
     --runner=BlockingDataflowPipelineRunner"
 
-Your Cloud Storage location should be entered in the form of
-`gs://bucket/path/to/staging/directory`. The Cloud Platform project refers to
-your project id (not the project number or the descriptive name).
+Make sure to use your project id, not the project number or the descriptive name.
+The Cloud Storage location should be entered in the form of
+`gs://bucket/path/to/staging/directory`.
 
 Alternatively, you may choose to bundle all dependencies into a single JAR and
-execute it outside of the Maven environment. For example, after building and
-installing as usual, you can execute the following commands to create the
-bundled JAR of the `Examples` module and execute it both locally and in Cloud
+execute it outside of the Maven environment. For example, you can execute the
+following commands to create the
+bundled JAR of the examples and execute it both locally and in Cloud
 Platform:
 
     mvn package
@@ -203,10 +72,16 @@ Platform:
 Other examples can be run similarly by replacing the `WordCount` class path with the example classpath, e.g.
 `com.google.cloud.dataflow.examples.cookbook.BigQueryTornadoes`,
 and adjusting runtime options under the `Dexec.args` parameter, as specified in
-the example itself. If you are running the streaming pipeline examples, see the
-additional setup instruction, below.
+the example itself.
 
 Note that when running Maven on Microsoft Windows platform, backslashes (`\`)
 under the `Dexec.args` parameter should be escaped with another backslash. For
 example, input file pattern of `c:\*.txt` should be entered as `c:\\*.txt`.
 
+## Beyond Word Count
+
+After you've finished running your first few word count pipelines, take a look at the [`cookbook`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook)
+directory for some common and useful patterns like joining, filtering, and combining.
+
+The [`complete`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete)
+directory contains a few realistic end-to-end pipelines.

From 92b85ab76297b007ce56869708b8c497410d6cbf Mon Sep 17 00:00:00 2001
From: fjp <fjp@google.com>
Date: Tue, 11 Aug 2015 18:17:03 -0700
Subject: [PATCH 0929/1541] Clarify commandline quoting for DebuggingWordCount

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100438351
---
 .../cloud/dataflow/examples/DebuggingWordCount.java  | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
index 6bc7185a5a7a5..02bdc2e0d57a7 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
@@ -62,14 +62,24 @@
  * }
  * </pre>
  *
- * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <p> To execute this pipeline using the Dataflow service and the additional logging discussed
+ * below, specify pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
  *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
  *   --runner=BlockingDataflowPipelineRunner
+ *   --workerLogLevelOverrides={"com.google.cloud.dataflow.examples":"DEBUG"}
  * }
  * </pre>
  *
+ * <p> Note that when you run via <code>mvn exec</code>, you may need to escape
+ * the quotations as appropriate for your shell. For example, in <code>bash</code>:
+ * <pre>
+ * mvn compile exec:java ... \
+     -Dexec.args="... \
+       --workerLogLevelOverrides={\\\"com.google.cloud.dataflow.examples\\\":\\\"DEBUG\\\"}"
+ * </pre>
+ *
  * <p> Concept #2: Dataflow workers which execute user code are configured to log to Cloud
  * Logging by default at "INFO" log level and higher. One may override log levels for specific
  * logging namespaces by specifying:

From f345da19ebf85ad36359f94705579009561dd445 Mon Sep 17 00:00:00 2001
From: fjp <fjp@google.com>
Date: Tue, 11 Aug 2015 18:26:47 -0700
Subject: [PATCH 0930/1541] Clean up javadoc

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100438934
---
 .../google/cloud/dataflow/examples/DebuggingWordCount.java    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
index 02bdc2e0d57a7..023666bd3d950 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
@@ -76,8 +76,8 @@
  * the quotations as appropriate for your shell. For example, in <code>bash</code>:
  * <pre>
  * mvn compile exec:java ... \
-     -Dexec.args="... \
-       --workerLogLevelOverrides={\\\"com.google.cloud.dataflow.examples\\\":\\\"DEBUG\\\"}"
+ *   -Dexec.args="... \
+ *     --workerLogLevelOverrides={\\\"com.google.cloud.dataflow.examples\\\":\\\"DEBUG\\\"}"
  * </pre>
  *
  * <p> Concept #2: Dataflow workers which execute user code are configured to log to Cloud

From f971e4be41e0e602068f065188ca3fb4fe9636d0 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Mon, 10 Aug 2015 17:18:58 -0700
Subject: [PATCH 0931/1541] Dataflow GA: update GitHub's README.md to remove
 Beta language

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100331508
---
 README.md | 35 ++++++++++++++---------------------
 1 file changed, 14 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index 67787d0112c85..5da046ec6b4d1 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Google Cloud Dataflow SDK for Java (Beta)
+# Google Cloud Dataflow SDK for Java
 
 [Google Cloud Dataflow](https://cloud.google.com/dataflow/) provides a simple,
 powerful programming model for building both batch and streaming parallel data
@@ -10,8 +10,8 @@ Service.
 Cloud Dataflow does **not** require use of this repository. Instead:
 
 1. depend directly on a specific
-[version](https://cloud.google.com/dataflow/release-notes/java) of the SDK in the
-[Maven Central Repository](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.google.cloud.dataflow%22)
+[version](https://cloud.google.com/dataflow/release-notes/java) of the SDK in
+the [Maven Central Repository](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.google.cloud.dataflow%22)
 by adding the following dependency to development
 environments like Eclipse or Apache Maven:
 
@@ -30,17 +30,8 @@ or just dig in for the fun of it, please stay with us here!
 
 ## Status [![Build Status](https://travis-ci.org/GoogleCloudPlatform/DataflowJavaSDK.svg?branch=master)](https://travis-ci.org/GoogleCloudPlatform/DataflowJavaSDK)
 
-The SDK is publicly available as a Beta release, and might be changed in
-backward-incompatible ways.
-
-The Google Cloud Dataflow Service is also publicly available in Beta under the
-following conditions:
-
-* Your use of Google Cloud Dataflow is governed by the Google Cloud Platform
-  Terms of Service. The foregoing   notwithstanding, Google Cloud Dataflow is
-  currently in Beta release and might be changed in backward-incompatible ways.
-  It is not subject to any SLA or deprecation policy and is not recommended for
-  production use.
+Both the SDK and the Dataflow Service are generally available, open to all
+developers, and considered stable and fully qualified for production use.
 
 ## Overview
 
@@ -79,18 +70,20 @@ Additionally, you can run Dataflow pipelines on an
 
 ## Getting Started
 
-This repository consists of three parts:
+This repository consists of the following parts:
 
-* The [`SDK`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk)
+* The [`sdk`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk)
 module provides a set of basic Java APIs to program against.
-* The [`Examples`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples)
+* The [`examples`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples)
 module provides a few samples to get started. We recommend starting with the
 `WordCount` example.
-* The [`Contrib`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/contrib)
+* The [`eclipse`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/eclipse)
+directory contains the starter project for the Eclipse environment.
+* The [`contrib`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/contrib)
 directory hosts community-contributed Dataflow modules.
 
-The following command will build both modules and install them in your local
-Maven repository:
+The following command will build both the `sdk` and `example` modules and
+install them in your local Maven repository:
 
     mvn clean install
 
@@ -129,4 +122,4 @@ on GitHub to report any bugs, comments or questions regarding SDK development.
 
 * [Google Cloud Dataflow](https://cloud.google.com/dataflow/)
 * [Dataflow Concepts and Programming Model](https://cloud.google.com/dataflow/model/programming-model)
-* [Javadoc](https://cloud.google.com/dataflow/java-sdk/JavaDoc/index)
+* [Java API Reference](https://cloud.google.com/dataflow/java-sdk/JavaDoc/index)

From b240fc1d6e7f858fb128126ae70033082008119c Mon Sep 17 00:00:00 2001
From: Tom White <tom@cloudera.com>
Date: Wed, 10 Jun 2015 17:58:55 +0100
Subject: [PATCH 0932/1541] Add contrib module for HadoopFileSource.

---
 contrib/hadoop/AUTHORS.md                     |   6 +
 contrib/hadoop/README.md                      |  21 +
 contrib/hadoop/pom.xml                        | 175 +++++++
 .../contrib/hadoop/HadoopFileSource.java      | 485 ++++++++++++++++++
 .../contrib/hadoop/WritableCoder.java         | 110 ++++
 .../contrib/hadoop/HadoopFileSourceTest.java  | 187 +++++++
 .../contrib/hadoop/WritableCoderTest.java     |  35 ++
 7 files changed, 1019 insertions(+)
 create mode 100644 contrib/hadoop/AUTHORS.md
 create mode 100644 contrib/hadoop/README.md
 create mode 100644 contrib/hadoop/pom.xml
 create mode 100644 contrib/hadoop/src/main/java/com/google/cloud/dataflow/contrib/hadoop/HadoopFileSource.java
 create mode 100644 contrib/hadoop/src/main/java/com/google/cloud/dataflow/contrib/hadoop/WritableCoder.java
 create mode 100644 contrib/hadoop/src/test/java/com/google/cloud/dataflow/contrib/hadoop/HadoopFileSourceTest.java
 create mode 100644 contrib/hadoop/src/test/java/com/google/cloud/dataflow/contrib/hadoop/WritableCoderTest.java

diff --git a/contrib/hadoop/AUTHORS.md b/contrib/hadoop/AUTHORS.md
new file mode 100644
index 0000000000000..05b3cdd26f7a7
--- /dev/null
+++ b/contrib/hadoop/AUTHORS.md
@@ -0,0 +1,6 @@
+# Authors of hadoop
+
+The following is the official list of authors for copyright purposes of this community-contributed module.
+
+    Cloudera
+    Tom White, tom [at] cloudera [dot] com
diff --git a/contrib/hadoop/README.md b/contrib/hadoop/README.md
new file mode 100644
index 0000000000000..9c9bf83cdc196
--- /dev/null
+++ b/contrib/hadoop/README.md
@@ -0,0 +1,21 @@
+Hadoop
+======
+
+This library provides Dataflow sources and sinks to make it possible to read and write
+Apache Hadoop file formats from Dataflow programs.
+
+Currently, only the read path is implemented. A `HadoopFileSource` allows any Hadoop
+`FileInputFormat` to be read as a `PCollection`.
+
+A `HadoopFileSource` can be read from using the `com.google.cloud.dataflow.sdk.io.Read`
+transform. For example:
+
+    HadoopFileSource<K, V> source = HadoopFileSource.from(path, MyInputFormat.class,
+      MyKey.class, MyValue.class);
+    PCollection<KV<MyKey, MyValue>> records = Read.from(mySource);
+
+Alternatively, the `readFrom` method is a convenience method that returns a read
+transform. For example:
+
+    PCollection<KV<MyKey, MyValue>> records = HadoopFileSource.readFrom(path,
+      MyInputFormat.class, MyKey.class, MyValue.class);
diff --git a/contrib/hadoop/pom.xml b/contrib/hadoop/pom.xml
new file mode 100644
index 0000000000000..e6565092bb1f3
--- /dev/null
+++ b/contrib/hadoop/pom.xml
@@ -0,0 +1,175 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  ~ Copyright (C) 2015 The Google Cloud Dataflow Hadoop Library Authors
+  ~
+  ~ Licensed under the Apache License, Version 2.0 (the "License"); you may not
+  ~ use this file except in compliance with the License. You may obtain a copy of
+  ~ the License at
+  ~
+  ~ http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+  ~ License for the specific language governing permissions and limitations under
+  ~ the License.
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <groupId>com.google.cloud.dataflow</groupId>
+  <artifactId>google-cloud-dataflow-java-contrib-hadoop</artifactId>
+  <name>Google Cloud Dataflow Hadoop Library</name>
+  <description>Library to read and write Hadoop file formats from Dataflow.</description>
+  <version>0.0.1-SNAPSHOT</version>
+  <packaging>jar</packaging>
+
+  <licenses>
+    <license>
+      <name>Apache License, Version 2.0</name>
+      <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+      <distribution>repo</distribution>
+    </license>
+  </licenses>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <google-cloud-dataflow-version>manual_build</google-cloud-dataflow-version>
+  </properties>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>3.2</version>
+        <configuration>
+          <source>1.7</source>
+          <target>1.7</target>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+        <version>2.12</version>
+        <dependencies>
+          <dependency>
+            <groupId>com.puppycrawl.tools</groupId>
+            <artifactId>checkstyle</artifactId>
+            <version>6.6</version>
+          </dependency>
+        </dependencies>
+        <configuration>
+          <configLocation>../../checkstyle.xml</configLocation>
+          <consoleOutput>true</consoleOutput>
+          <failOnViolation>true</failOnViolation>
+          <includeTestSourceDirectory>true</includeTestSourceDirectory>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>check</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <!-- Source plugin for generating source and test-source JARs. -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-source-plugin</artifactId>
+        <version>2.4</version>
+        <executions>
+          <execution>
+            <id>attach-sources</id>
+            <phase>compile</phase>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>attach-test-sources</id>
+            <phase>test-compile</phase>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-javadoc-plugin</artifactId>
+        <configuration>
+          <windowtitle>Google Cloud Dataflow Hadoop Contrib</windowtitle>
+          <doctitle>Google Cloud Dataflow Hadoop Contrib</doctitle>
+
+          <subpackages>com.google.cloud.dataflow.contrib.hadoop</subpackages>
+          <use>false</use>
+          <bottom><![CDATA[<br>]]></bottom>
+
+          <offlineLinks>
+            <offlineLink>
+              <url>https://cloud.google.com/dataflow/java-sdk/JavaDoc/</url>
+              <location>${basedir}/../javadoc/dataflow-sdk-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>http://docs.guava-libraries.googlecode.com/git-history/release18/javadoc/</url>
+              <location>${basedir}/../javadoc/guava-docs</location>
+            </offlineLink>
+          </offlineLinks>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+            <phase>package</phase>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+  <dependencies>
+    <dependency>
+      <groupId>com.google.cloud.dataflow</groupId>
+      <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
+      <version>${google-cloud-dataflow-version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-client</artifactId>
+      <version>2.7.0</version>
+      <scope>provided</scope>
+    </dependency>
+
+    <!-- test dependencies -->
+
+    <dependency>
+      <groupId>org.hamcrest</groupId>
+      <artifactId>hamcrest-all</artifactId>
+      <version>1.3</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>4.11</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.cloud.dataflow</groupId>
+      <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
+      <version>${google-cloud-dataflow-version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+</project>
diff --git a/contrib/hadoop/src/main/java/com/google/cloud/dataflow/contrib/hadoop/HadoopFileSource.java b/contrib/hadoop/src/main/java/com/google/cloud/dataflow/contrib/hadoop/HadoopFileSource.java
new file mode 100644
index 0000000000000..152758108cdd2
--- /dev/null
+++ b/contrib/hadoop/src/main/java/com/google/cloud/dataflow/contrib/hadoop/HadoopFileSource.java
@@ -0,0 +1,485 @@
+/*
+ * Copyright (C) 2015 The Google Cloud Dataflow Hadoop Library Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.contrib.hadoop;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.io.BoundedSource;
+import com.google.cloud.dataflow.sdk.io.Read;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.NoSuchElementException;
+import javax.annotation.Nullable;
+
+/**
+ * A {@code BoundedSource} for reading files resident in a Hadoop filesystem using a
+ * Hadoop file-based input format.
+ *
+ * <p>To read a {@link com.google.cloud.dataflow.sdk.values.PCollection} of
+ * {@link com.google.cloud.dataflow.sdk.values.KV} key-value pairs from one or more
+ * Hadoop files, use {@link HadoopFileSource#from} to specify the path(s) of the files to
+ * read, the Hadoop {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}, the
+ * key class and the value class.
+ *
+ * <p>A {@code HadoopFileSource} can be read from using the
+ * {@link com.google.cloud.dataflow.sdk.io.Read} transform. For example:
+ *
+ * <pre>
+ * {@code
+ * HadoopFileSource<K, V> source = HadoopFileSource.from(path, MyInputFormat.class,
+ *   MyKey.class, MyValue.class);
+ * PCollection<KV<MyKey, MyValue>> records = Read.from(mySource);
+ * }
+ * </pre>
+ *
+ * <p>The {@link HadoopFileSource#readFrom} method is a convenience method
+ * that returns a read transform. For example:
+ *
+ * <pre>
+ * {@code
+ * PCollection<KV<MyKey, MyValue>> records = HadoopFileSource.readFrom(path,
+ *   MyInputFormat.class, MyKey.class, MyValue.class);
+ * }
+ * </pre>
+ *
+ * Implementation note: Since Hadoop's {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}
+ * determines the input splits, this class extends {@link BoundedSource} rather than
+ * {@link com.google.cloud.dataflow.sdk.io.OffsetBasedSource}, since the latter
+ * dictates input splits.
+
+ * @param <K> The type of keys to be read from the source.
+ * @param <V> The type of values to be read from the source.
+ */
+public class HadoopFileSource<K, V> extends BoundedSource<KV<K, V>> {
+  private static final long serialVersionUID = 0L;
+
+  private final String filepattern;
+  private final Class<? extends FileInputFormat<?, ?>> formatClass;
+  private final Class<K> keyClass;
+  private final Class<V> valueClass;
+  private final SerializableSplit serializableSplit;
+
+  /**
+   * Creates a {@code Read} transform that will read from an {@code HadoopFileSource}
+   * with the given file name or pattern ("glob") using the given Hadoop
+   * {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat},
+   * with key-value types specified by the given key class and value class.
+   */
+  public static <K, V, T extends FileInputFormat<K, V>> Read.Bounded<KV<K, V>> readFrom(
+      String filepattern, Class<T> formatClass, Class<K> keyClass, Class<V> valueClass) {
+    return Read.from(from(filepattern, formatClass, keyClass, valueClass));
+  }
+
+  /**
+   * Creates a {@code HadoopFileSource} that reads from the given file name or pattern ("glob")
+   * using the given Hadoop {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat},
+   * with key-value types specified by the given key class and value class.
+   */
+  public static <K, V, T extends FileInputFormat<K, V>> HadoopFileSource<K, V> from(
+      String filepattern, Class<T> formatClass, Class<K> keyClass, Class<V> valueClass) {
+    @SuppressWarnings("unchecked")
+    HadoopFileSource<K, V> source = (HadoopFileSource<K, V>)
+        new HadoopFileSource(filepattern, formatClass, keyClass, valueClass);
+    return source;
+  }
+
+  /**
+   * Create a {@code HadoopFileSource} based on a file or a file pattern specification.
+   */
+  private HadoopFileSource(String filepattern,
+      Class<? extends FileInputFormat<?, ?>> formatClass, Class<K> keyClass,
+      Class<V> valueClass) {
+    this(filepattern, formatClass, keyClass, valueClass, null);
+  }
+
+  /**
+   * Create a {@code HadoopFileSource} based on a single Hadoop input split, which won't be
+   * split up further.
+   */
+  private HadoopFileSource(String filepattern,
+      Class<? extends FileInputFormat<?, ?>> formatClass, Class<K> keyClass,
+      Class<V> valueClass, SerializableSplit serializableSplit) {
+    this.filepattern = filepattern;
+    this.formatClass = formatClass;
+    this.keyClass = keyClass;
+    this.valueClass = valueClass;
+    this.serializableSplit = serializableSplit;
+  }
+
+  public String getFilepattern() {
+    return filepattern;
+  }
+
+  public Class<? extends FileInputFormat<?, ?>> getFormatClass() {
+    return formatClass;
+  }
+
+  public Class<K> getKeyClass() {
+    return keyClass;
+  }
+
+  public Class<V> getValueClass() {
+    return valueClass;
+  }
+
+  @Override
+  public void validate() {
+    Preconditions.checkNotNull(filepattern,
+        "need to set the filepattern of a HadoopFileSource");
+    Preconditions.checkNotNull(formatClass,
+        "need to set the format class of a HadoopFileSource");
+    Preconditions.checkNotNull(keyClass,
+        "need to set the key class of a HadoopFileSource");
+    Preconditions.checkNotNull(valueClass,
+        "need to set the value class of a HadoopFileSource");
+  }
+
+  @Override
+  public List<? extends BoundedSource<KV<K, V>>> splitIntoBundles(long desiredBundleSizeBytes,
+      PipelineOptions options) throws Exception {
+    if (serializableSplit == null) {
+      return Lists.transform(computeSplits(desiredBundleSizeBytes),
+          new Function<InputSplit, BoundedSource<KV<K, V>>>() {
+        @Nullable @Override
+        public BoundedSource<KV<K, V>> apply(@Nullable InputSplit inputSplit) {
+          return new HadoopFileSource<K, V>(filepattern, formatClass, keyClass,
+              valueClass, new SerializableSplit(inputSplit));
+        }
+      });
+    } else {
+      return ImmutableList.of(this);
+    }
+  }
+
+  private FileInputFormat<?, ?> createFormat(Job job) throws IOException, IllegalAccessException,
+      InstantiationException {
+    Path path = new Path(filepattern);
+    FileInputFormat.addInputPath(job, path);
+    return formatClass.newInstance();
+  }
+
+  private List<InputSplit> computeSplits(long desiredBundleSizeBytes) throws IOException,
+      IllegalAccessException, InstantiationException {
+    Job job = Job.getInstance();
+    FileInputFormat.setMinInputSplitSize(job, desiredBundleSizeBytes);
+    FileInputFormat.setMaxInputSplitSize(job, desiredBundleSizeBytes);
+    return createFormat(job).getSplits(job);
+  }
+
+  @Override
+  public BoundedReader<KV<K, V>> createReader(PipelineOptions options) throws IOException {
+    this.validate();
+
+    if (serializableSplit == null) {
+      return new HadoopFileReader<>(this, filepattern, formatClass);
+    } else {
+      return new HadoopFileReader<>(this, filepattern, formatClass,
+          serializableSplit.getSplit());
+    }
+  }
+
+  @Override
+  public Coder<KV<K, V>> getDefaultOutputCoder() {
+    return KvCoder.of(getDefaultCoder(keyClass), getDefaultCoder(valueClass));
+  }
+
+  @SuppressWarnings("unchecked")
+  private <T> Coder<T> getDefaultCoder(Class<T> c) {
+    if (Writable.class.isAssignableFrom(c)) {
+      Class<? extends Writable> writableClass = (Class<? extends Writable>) c;
+      return (Coder<T>) WritableCoder.of(writableClass);
+    } else if (Void.class.equals(c)) {
+      return (Coder<T>) VoidCoder.of();
+    }
+    // TODO: how to use registered coders here?
+    throw new IllegalStateException("Cannot find coder for " + c);
+  }
+
+  // BoundedSource
+
+  @Override
+  public long getEstimatedSizeBytes(PipelineOptions options) {
+    long size = 0;
+    try {
+      Job job = Job.getInstance(); // new instance
+      for (FileStatus st : listStatus(createFormat(job), job)) {
+        size += st.getLen();
+      }
+    } catch (IOException | NoSuchMethodException | InvocationTargetException |
+        IllegalAccessException | InstantiationException e) {
+      // ignore, and return 0
+    }
+    return size;
+  }
+
+  private <K, V> List<FileStatus> listStatus(FileInputFormat<K, V> format,
+      JobContext jobContext) throws NoSuchMethodException, InvocationTargetException,
+      IllegalAccessException {
+    // FileInputFormat#listStatus is protected, so call using reflection
+    Method listStatus = FileInputFormat.class.getDeclaredMethod("listStatus", JobContext.class);
+    listStatus.setAccessible(true);
+    @SuppressWarnings("unchecked")
+    List<FileStatus> stat = (List<FileStatus>) listStatus.invoke(format, jobContext);
+    return stat;
+  }
+
+  @Override
+  public boolean producesSortedKeys(PipelineOptions options) throws Exception {
+    return false;
+  }
+
+  static class HadoopFileReader<K, V> extends BoundedSource.BoundedReader<KV<K, V>> {
+
+    private final BoundedSource<KV<K, V>> source;
+    private final String filepattern;
+    private final Class formatClass;
+
+    private FileInputFormat<?, ?> format;
+    private TaskAttemptContext attemptContext;
+    private List<InputSplit> splits;
+    private ListIterator<InputSplit> splitsIterator;
+    private Configuration conf;
+    private RecordReader<K, V> currentReader;
+    private KV<K, V> currentPair;
+
+    /**
+     * Create a {@code HadoopFileReader} based on a file or a file pattern specification.
+     */
+    public HadoopFileReader(BoundedSource<KV<K, V>> source, String filepattern,
+        Class<? extends FileInputFormat<?, ?>> formatClass) {
+      this(source, filepattern, formatClass, null);
+    }
+
+    /**
+     * Create a {@code HadoopFileReader} based on a single Hadoop input split.
+     */
+    public HadoopFileReader(BoundedSource<KV<K, V>> source, String filepattern,
+        Class<? extends FileInputFormat<?, ?>> formatClass, InputSplit split) {
+      this.source = source;
+      this.filepattern = filepattern;
+      this.formatClass = formatClass;
+      if (split != null) {
+        this.splits = ImmutableList.of(split);
+        this.splitsIterator = splits.listIterator();
+      }
+    }
+
+    @Override
+    public boolean start() throws IOException {
+      Job job = Job.getInstance(); // new instance
+      Path path = new Path(filepattern);
+      FileInputFormat.addInputPath(job, path);
+
+      try {
+        @SuppressWarnings("unchecked")
+        FileInputFormat<K, V> f = (FileInputFormat<K, V>) formatClass.newInstance();
+        this.format = f;
+      } catch (InstantiationException | IllegalAccessException e) {
+        throw new IOException("Cannot instantiate file input format " + formatClass, e);
+      }
+      this.attemptContext = new TaskAttemptContextImpl(job.getConfiguration(),
+          new TaskAttemptID());
+
+      if (splitsIterator == null) {
+        this.splits = format.getSplits(job);
+        this.splitsIterator = splits.listIterator();
+      }
+      this.conf = job.getConfiguration();
+      return advance();
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      try {
+        if (currentReader != null && currentReader.nextKeyValue()) {
+          currentPair = nextPair();
+          return true;
+        } else {
+          while (splitsIterator.hasNext()) {
+            // advance the reader and see if it has records
+            InputSplit nextSplit = splitsIterator.next();
+            @SuppressWarnings("unchecked")
+            RecordReader<K, V> reader =
+                (RecordReader<K, V>) format.createRecordReader(nextSplit, attemptContext);
+            if (currentReader != null) {
+              currentReader.close();
+            }
+            currentReader = reader;
+            currentReader.initialize(nextSplit, attemptContext);
+            if (currentReader.nextKeyValue()) {
+              currentPair = nextPair();
+              return true;
+            }
+            currentReader.close();
+            currentReader = null;
+          }
+          // either no next split or all readers were empty
+          currentPair = null;
+          return false;
+        }
+      } catch (InterruptedException e) {
+        Thread.currentThread().interrupt();
+        throw new IOException(e);
+      }
+    }
+
+    @SuppressWarnings("unchecked")
+    private KV<K, V> nextPair() throws IOException, InterruptedException {
+      K key = currentReader.getCurrentKey();
+      V value = currentReader.getCurrentValue();
+      // clone Writable objects since they are reused between calls to RecordReader#nextKeyValue
+      if (key instanceof Writable) {
+        key = (K) WritableUtils.clone((Writable) key, conf);
+      }
+      if (value instanceof Writable) {
+        value = (V) WritableUtils.clone((Writable) value, conf);
+      }
+      return KV.of(key, value);
+    }
+
+    @Override
+    public KV<K, V> getCurrent() throws NoSuchElementException {
+      if (currentPair == null) {
+        throw new NoSuchElementException();
+      }
+      return currentPair;
+    }
+
+    @Override
+    public void close() throws IOException {
+      if (currentReader != null) {
+        currentReader.close();
+        currentReader = null;
+      }
+      currentPair = null;
+    }
+
+    @Override
+    public BoundedSource<KV<K, V>> getCurrentSource() {
+      return source;
+    }
+
+    // BoundedReader
+
+    @Override
+    public Double getFractionConsumed() {
+      if (currentReader == null) {
+        return 0.0;
+      }
+      if (splits.isEmpty()) {
+        return 1.0;
+      }
+      int index = splitsIterator.previousIndex();
+      int numReaders = splits.size();
+      if (index == numReaders) {
+        return 1.0;
+      }
+      double before = 1.0 * index / numReaders;
+      double after = 1.0 * (index + 1) / numReaders;
+      Double fractionOfCurrentReader = getProgress();
+      if (fractionOfCurrentReader == null) {
+        return before;
+      }
+      return before + fractionOfCurrentReader * (after - before);
+    }
+
+    private Double getProgress() {
+      try {
+        return (double) currentReader.getProgress();
+      } catch (IOException | InterruptedException e) {
+        return null;
+      }
+    }
+
+    @Override
+    public BoundedSource<KV<K, V>> splitAtFraction(double fraction) {
+      // Not yet supported. To implement this, the sizes of the splits should be used to
+      // calculate the remaining splits that constitute the given fraction, then a
+      // new source backed by those splits should be returned.
+      return null;
+    }
+  }
+
+  /**
+   * A wrapper to allow Hadoop {@link org.apache.hadoop.mapreduce.InputSplit}s to be
+   * serialized using Java's standard serialization mechanisms. Note that the InputSplit
+   * has to be Writable (which most are).
+   */
+  public static class SerializableSplit implements Externalizable {
+    private static final long serialVersionUID = 0L;
+
+    private InputSplit split;
+
+    public SerializableSplit() {
+    }
+
+    public SerializableSplit(InputSplit split) {
+      Preconditions.checkArgument(split instanceof Writable, "Split is not writable: " +
+          split);
+      this.split = split;
+    }
+
+    public InputSplit getSplit() {
+      return split;
+    }
+
+    @Override
+    public void writeExternal(ObjectOutput out) throws IOException {
+      out.writeUTF(split.getClass().getCanonicalName());
+      ((Writable) split).write(out);
+    }
+
+    @Override
+    public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
+      String className = in.readUTF();
+      try {
+        split = (InputSplit) Class.forName(className).newInstance();
+        ((Writable) split).readFields(in);
+      } catch (InstantiationException | IllegalAccessException e) {
+        throw new IOException(e);
+      }
+    }
+  }
+
+
+}
diff --git a/contrib/hadoop/src/main/java/com/google/cloud/dataflow/contrib/hadoop/WritableCoder.java b/contrib/hadoop/src/main/java/com/google/cloud/dataflow/contrib/hadoop/WritableCoder.java
new file mode 100644
index 0000000000000..5dba58d39c19b
--- /dev/null
+++ b/contrib/hadoop/src/main/java/com/google/cloud/dataflow/contrib/hadoop/WritableCoder.java
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2015 The Google Cloud Dataflow Hadoop Library Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.contrib.hadoop;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import org.apache.hadoop.io.Writable;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.List;
+
+/**
+ * A {@code WritableCoder} is a {@link com.google.cloud.dataflow.sdk.coders.Coder} for a
+ * Java class that implements {@link org.apache.hadoop.io.Writable}.
+ *
+ * <p> To use, specify the coder type on a PCollection:
+ * <pre>
+ * {@code
+ *   PCollection<MyRecord> records =
+ *       foo.apply(...).setCoder(WritableCoder.of(MyRecord.class));
+ * }
+ * </pre>
+ *
+ * @param <T> the type of elements handled by this coder
+ */
+public class WritableCoder<T extends Writable> extends StandardCoder<T> {
+  private static final long serialVersionUID = 0L;
+
+  /**
+   * Returns a {@code WritableCoder} instance for the provided element class.
+   * @param <T> the element type
+   */
+  public static <T extends Writable> WritableCoder<T> of(Class<T> clazz) {
+    return new WritableCoder<>(clazz);
+  }
+
+  @JsonCreator
+  @SuppressWarnings("unchecked")
+  public static WritableCoder<?> of(@JsonProperty("type") String classType)
+      throws ClassNotFoundException {
+    Class<?> clazz = Class.forName(classType);
+    if (!Writable.class.isAssignableFrom(clazz)) {
+      throw new ClassNotFoundException(
+          "Class " + classType + " does not implement Writable");
+    }
+    return of((Class<? extends Writable>) clazz);
+  }
+
+  private final Class<T> type;
+
+  public WritableCoder(Class<T> type) {
+    this.type = type;
+  }
+
+  @Override
+  public void encode(T value, OutputStream outStream, Context context) throws IOException {
+    value.write(new DataOutputStream(outStream));
+  }
+
+  @Override
+  public T decode(InputStream inStream, Context context) throws IOException {
+    try {
+      T t = type.newInstance();
+      t.readFields(new DataInputStream(inStream));
+      return t;
+    } catch (InstantiationException | IllegalAccessException e) {
+      throw new CoderException("unable to deserialize record", e);
+    }
+  }
+
+  @Override
+  public List<Coder<?>> getCoderArguments() {
+    return null;
+  }
+
+  @Override
+  public CloudObject asCloudObject() {
+    CloudObject result = super.asCloudObject();
+    result.put("type", type.getName());
+    return result;
+  }
+
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    throw new NonDeterministicException(this,
+        "Hadoop Writable may be non-deterministic.");
+  }
+
+}
diff --git a/contrib/hadoop/src/test/java/com/google/cloud/dataflow/contrib/hadoop/HadoopFileSourceTest.java b/contrib/hadoop/src/test/java/com/google/cloud/dataflow/contrib/hadoop/HadoopFileSourceTest.java
new file mode 100644
index 0000000000000..515176528d78a
--- /dev/null
+++ b/contrib/hadoop/src/test/java/com/google/cloud/dataflow/contrib/hadoop/HadoopFileSourceTest.java
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2015 The Google Cloud Dataflow Hadoop Library Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.contrib.hadoop;
+
+import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.readFromSource;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import com.google.cloud.dataflow.sdk.io.BoundedSource;
+import com.google.cloud.dataflow.sdk.io.Source;
+import com.google.cloud.dataflow.sdk.io.SourceTestUtils;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.values.KV;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.Writer;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+/**
+ * Tests for HadoopFileSource.
+ */
+public class HadoopFileSourceTest {
+
+  Random random = new Random(0L);
+
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  @Test
+  public void testFullyReadSingleFile() throws Exception {
+    PipelineOptions options = PipelineOptionsFactory.create();
+    List<KV<IntWritable, Text>> expectedResults = createRandomRecords(3, 10, 0);
+    File file = createFileWithData("tmp.seq", expectedResults);
+
+    HadoopFileSource<IntWritable, Text> source =
+        HadoopFileSource.from(file.toString(), SequenceFileInputFormat.class,
+            IntWritable.class, Text.class);
+
+    assertEquals(file.length(), source.getEstimatedSizeBytes(null));
+
+    assertThat(expectedResults, containsInAnyOrder(readFromSource(source, options).toArray()));
+  }
+
+  @Test
+  public void testFullyReadFilePattern() throws IOException {
+    PipelineOptions options = PipelineOptionsFactory.create();
+    List<KV<IntWritable, Text>> data1 = createRandomRecords(3, 10, 0);
+    File file1 = createFileWithData("file1", data1);
+
+    List<KV<IntWritable, Text>> data2 = createRandomRecords(3, 10, 10);
+    createFileWithData("file2", data2);
+
+    List<KV<IntWritable, Text>> data3 = createRandomRecords(3, 10, 20);
+    createFileWithData("file3", data3);
+
+    List<KV<IntWritable, Text>> data4 = createRandomRecords(3, 10, 30);
+    createFileWithData("otherfile", data4);
+
+    HadoopFileSource<IntWritable, Text> source =
+        HadoopFileSource.from(new File(file1.getParent(), "file*").toString(),
+            SequenceFileInputFormat.class, IntWritable.class, Text.class);
+    List<KV<IntWritable, Text>> expectedResults = new ArrayList<>();
+    expectedResults.addAll(data1);
+    expectedResults.addAll(data2);
+    expectedResults.addAll(data3);
+    assertThat(expectedResults, containsInAnyOrder(readFromSource(source, options).toArray()));
+  }
+
+  @Test
+  public void testCloseUnstartedFilePatternReader() throws IOException {
+    PipelineOptions options = PipelineOptionsFactory.create();
+    List<KV<IntWritable, Text>> data1 = createRandomRecords(3, 10, 0);
+    File file1 = createFileWithData("file1", data1);
+
+    List<KV<IntWritable, Text>> data2 = createRandomRecords(3, 10, 10);
+    createFileWithData("file2", data2);
+
+    List<KV<IntWritable, Text>> data3 = createRandomRecords(3, 10, 20);
+    createFileWithData("file3", data3);
+
+    List<KV<IntWritable, Text>> data4 = createRandomRecords(3, 10, 30);
+    createFileWithData("otherfile", data4);
+
+    HadoopFileSource<IntWritable, Text> source =
+        HadoopFileSource.from(new File(file1.getParent(), "file*").toString(),
+            SequenceFileInputFormat.class, IntWritable.class, Text.class);
+    Source.Reader<KV<IntWritable, Text>> reader = source.createReader(options);
+    // Closing an unstarted FilePatternReader should not throw an exception.
+    try {
+      reader.close();
+    } catch (Exception e) {
+      fail("Closing an unstarted FilePatternReader should not throw an exception");
+    }
+  }
+
+  @Test
+  public void testSplits() throws Exception {
+    PipelineOptions options = PipelineOptionsFactory.create();
+
+    List<KV<IntWritable, Text>> expectedResults = createRandomRecords(3, 10000, 0);
+    File file = createFileWithData("tmp.avro", expectedResults);
+
+    HadoopFileSource<IntWritable, Text> source =
+        HadoopFileSource.from(file.toString(), SequenceFileInputFormat.class,
+            IntWritable.class, Text.class);
+
+    // Assert that the source produces the expected records
+    assertEquals(expectedResults, readFromSource(source, options));
+
+    // Split with a small bundle size (has to be at least size of sync interval)
+    List<? extends BoundedSource<KV<IntWritable, Text>>> splits = source
+        .splitIntoBundles(SequenceFile.SYNC_INTERVAL, options);
+    assertTrue(splits.size() > 2);
+    SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options);
+    int nonEmptySplits = 0;
+    for (BoundedSource<KV<IntWritable, Text>> subSource : splits) {
+      if (readFromSource(subSource, options).size() > 0) {
+        nonEmptySplits += 1;
+      }
+    }
+    assertTrue(nonEmptySplits > 2);
+  }
+
+  private File createFileWithData(String filename, List<KV<IntWritable, Text>> records)
+      throws IOException {
+    File tmpFile = tmpFolder.newFile(filename);
+    try (Writer writer = SequenceFile.createWriter(new Configuration(),
+          Writer.keyClass(IntWritable.class), Writer.valueClass(Text.class),
+          Writer.file(new Path(tmpFile.toURI())))) {
+
+      for (KV<IntWritable, Text> record : records) {
+        writer.append(record.getKey(), record.getValue());
+      }
+    }
+    return tmpFile;
+  }
+
+  private List<KV<IntWritable, Text>> createRandomRecords(int dataItemLength,
+      int numItems, int offset) {
+    List<KV<IntWritable, Text>> records = new ArrayList<>();
+    for (int i = 0; i < numItems; i++) {
+      IntWritable key = new IntWritable(i + offset);
+      Text value = new Text(createRandomString(dataItemLength));
+      records.add(KV.of(key, value));
+    }
+    return records;
+  }
+
+  private String createRandomString(int length) {
+    char[] chars = "abcdefghijklmnopqrstuvwxyz".toCharArray();
+    StringBuilder builder = new StringBuilder();
+    for (int i = 0; i < length; i++) {
+      builder.append(chars[random.nextInt(chars.length)]);
+    }
+    return builder.toString();
+  }
+
+}
diff --git a/contrib/hadoop/src/test/java/com/google/cloud/dataflow/contrib/hadoop/WritableCoderTest.java b/contrib/hadoop/src/test/java/com/google/cloud/dataflow/contrib/hadoop/WritableCoderTest.java
new file mode 100644
index 0000000000000..d8d4c4b9e42e6
--- /dev/null
+++ b/contrib/hadoop/src/test/java/com/google/cloud/dataflow/contrib/hadoop/WritableCoderTest.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2015 The Google Cloud Dataflow Hadoop Library Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.contrib.hadoop;
+
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import org.apache.hadoop.io.IntWritable;
+import org.junit.Test;
+
+/**
+ * Tests for WritableCoder.
+ */
+public class WritableCoderTest {
+
+  @Test
+  public void testIntWritableEncoding() throws Exception {
+    IntWritable value = new IntWritable(42);
+    WritableCoder<IntWritable> coder = WritableCoder.of(IntWritable.class);
+
+    CoderProperties.coderDecodeEncodeEqual(coder, value);
+  }
+}

From b0f84d4f7b2f6acc3267042b6372ab482d13a80a Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 12 Aug 2015 14:04:36 -0700
Subject: [PATCH 0933/1541] Added a flag to control the number of threads on a
 worker

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100511028
---
 .../options/DataflowPipelineDebugOptions.java |  9 +++
 .../sdk/runners/DataflowPipelineRunner.java   | 10 +++
 .../runners/worker/DataflowWorkerHarness.java |  9 ++-
 .../worker/StreamingDataflowWorker.java       | 11 ++-
 .../runners/DataflowPipelineRunnerTest.java   | 19 +++++
 .../DataflowPipelineTranslatorTest.java       |  1 +
 .../worker/DataflowWorkerHarnessTest.java     | 20 ++++++
 .../worker/StreamingDataflowWorkerTest.java   | 70 +++++++++++++++++--
 8 files changed, 140 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
index 12bfad07ddde6..e4f0c3d897dd4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
@@ -197,6 +197,15 @@ public Dataflow create(PipelineOptions options) {
   String getOverrideWindmillBinary();
   void setOverrideWindmillBinary(String value);
 
+  /**
+   * Number of threads to use on the Dataflow worker harness. If left unspecified,
+   * the Dataflow service will compute an appropriate number of threads to use.
+   */
+  @Description("Number of threads to use on the Dataflow worker harness. If left unspecified, "
+      + "the Dataflow service will compute an appropriate number of threads to use.")
+  int getNumberOfWorkerHarnessThreads();
+  void setNumberOfWorkerHarnessThreads(int value);
+
   /**
    * Creates a {@link PathValidator} object using the class specified in
    * {@link #getPathValidatorClass()}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 4a9cffcad8e22..19eabfd97f7db 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -38,6 +38,7 @@
 import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
@@ -222,6 +223,15 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
           + "' invalid. Please make sure you specified the Project ID, not project description.");
     }
 
+    DataflowPipelineDebugOptions debugOptions =
+        dataflowOptions.as(DataflowPipelineDebugOptions.class);
+    // Verify the number of worker threads is a valid value
+    if (debugOptions.getNumberOfWorkerHarnessThreads() < 0) {
+      throw new IllegalArgumentException("Number of worker harness threads '"
+          + debugOptions.getNumberOfWorkerHarnessThreads()
+          + "' invalid. Please make sure the value is non-negative.");
+    }
+
     return new DataflowPipelineRunner(dataflowOptions);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index 3d616fa6c28f4..edfe56bfa0ebf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -170,7 +170,7 @@ public Boolean call() {
   // Visible for testing.
   static void processWork(DataflowWorkerHarnessOptions pipelineOptions,
       final DataflowWorker worker, Sleeper sleeper) throws InterruptedException {
-    int numThreads = Math.max(Runtime.getRuntime().availableProcessors(), 1);
+    int numThreads = chooseNumberOfThreads(pipelineOptions);
     ExecutorService executor = pipelineOptions.getExecutorService();
     final List<Callable<Boolean>> tasks = new LinkedList<>();
 
@@ -197,6 +197,13 @@ static DataflowWorker create(DataflowWorkerHarnessOptions options) {
     return new DataflowWorker(client, options);
   }
 
+  private static int chooseNumberOfThreads(DataflowWorkerHarnessOptions pipelineOptions) {
+    if (pipelineOptions.getNumberOfWorkerHarnessThreads() != 0) {
+      return pipelineOptions.getNumberOfWorkerHarnessThreads();
+    }
+    return Math.max(Runtime.getRuntime().availableProcessors(), 1);
+  }
+
   /**
    * A Dataflow WorkUnit client that fetches WorkItems from the Dataflow service.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index d4ec43a784ee3..ff41b08d13880 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -217,7 +217,7 @@ public Thread newThread(Runnable r) {
         }
       };
     this.workUnitExecutor = new BoundedQueueExecutor(
-        MAX_PROCESSING_THREADS, THREAD_EXPIRATION_TIME_SEC, TimeUnit.SECONDS,
+        chooseMaximumNumberOfThreads(options), THREAD_EXPIRATION_TIME_SEC, TimeUnit.SECONDS,
         MAX_WORK_UNITS_QUEUED, threadFactory);
     this.commitExecutor =
         new ThreadPoolExecutor(
@@ -252,6 +252,13 @@ public Thread newThread(Runnable r) {
     DataflowWorkerLoggingMDC.setWorkerId(options.getWorkerId());
   }
 
+  private static int chooseMaximumNumberOfThreads(DataflowWorkerHarnessOptions pipelineOptions) {
+    if (pipelineOptions.getNumberOfWorkerHarnessThreads() != 0) {
+      return pipelineOptions.getNumberOfWorkerHarnessThreads();
+    }
+    return MAX_PROCESSING_THREADS;
+  }
+
   void addStateNameMappings(Map<String, String> nameMap) {
     stateNameMap.putAll(nameMap);
   }
@@ -869,7 +876,7 @@ private void printHeader(PrintWriter response) {
   private void printMetrics(PrintWriter response) {
     response.println("<h2>Metrics</h2>");
     response.println("Worker Threads: " + workUnitExecutor.getPoolSize()
-        + "/" + MAX_PROCESSING_THREADS + "<br>");
+        + "/" + workUnitExecutor.getMaximumPoolSize() + "<br>");
     response.println("Active Threads: " + workUnitExecutor.getActiveCount() + "<br>");
     response.println("Work Queue Size: " + workUnitExecutor.getQueue().size()
         + "/" + MAX_WORK_UNITS_QUEUED + "<br>");
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 0b472645897f4..48884cc521d70 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -41,6 +41,7 @@
 import com.google.cloud.dataflow.sdk.io.AvroSource;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
@@ -590,6 +591,24 @@ public void testProjectDescription() throws IOException {
     DataflowPipelineRunner.fromOptions(options);
   }
 
+  @Test
+  public void testInvalidNumberOfWorkerHarnessThreads() throws IOException {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setRunner(DataflowPipelineRunner.class);
+    options.setProject("foo-12345");
+
+    options.setStagingLocation("gs://spam/ham/eggs");
+    options.setGcsUtil(buildMockGcsUtil(true /* bucket exists */));
+
+    options.as(DataflowPipelineDebugOptions.class).setNumberOfWorkerHarnessThreads(-1);
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Number of worker harness threads");
+    thrown.expectMessage("Please make sure the value is non-negative.");
+
+    DataflowPipelineRunner.fromOptions(options);
+  }
+
   @Test
   public void testNoStagingLocationAndNoTempLocationFails() {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index f617dabb4fd1e..fceecac9677bc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -175,6 +175,7 @@ public void testSettingOfSdkPipelineOptions() throws IOException {
           .put("stagingLocation", "gs://somebucket/some/path/staging")
           .put("stableUniqueNames", "WARNING")
           .put("streaming", false)
+          .put("numberOfWorkerHarnessThreads", 0)
           .build()),
         job.getEnvironment().getSdkPipelineOptions());
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
index e97ffe877f295..88cbbea5d4f90 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
@@ -124,6 +124,26 @@ public void sleep(long millis) throws InterruptedException {
     assertEquals(0, illegalIntervalCount.get());
   }
 
+  @Test
+  public void testNumberOfWorkerHarnessThreadsIsHonored() throws Exception {
+    final int expectedNumberOfThreads = 5;
+    pipelineOptions.setNumberOfWorkerHarnessThreads(expectedNumberOfThreads);
+
+    when(mockDataflowWorker.getAndPerformWork()).thenReturn(false);
+    DataflowWorkerHarness.processWork(
+        pipelineOptions,
+        mockDataflowWorker,
+        new Sleeper() {
+          @Override
+          public void sleep(long millis) throws InterruptedException {
+            throw new InterruptedException("Stopping the retry loop.");
+          }
+        });
+    // Verify that the number of requested worker harness threads is honored.
+    verify(mockDataflowWorker, times(expectedNumberOfThreads)).getAndPerformWork();
+    verifyNoMoreInteractions(mockDataflowWorker);
+  }
+
   @Test
   public void testCreationOfWorkerHarness() throws Exception {
     assertNotNull(DataflowWorkerHarness.create(pipelineOptions));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index e19206bd3490c..864c4898e22b9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -25,6 +25,7 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import com.google.api.services.dataflow.model.InstructionInput;
 import com.google.api.services.dataflow.model.InstructionOutput;
@@ -81,9 +82,13 @@
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Assert;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.TestRule;
+import org.junit.runner.Description;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
+import org.junit.runners.model.Statement;
 import org.mockito.Mockito;
 
 import java.io.IOException;
@@ -93,7 +98,9 @@
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.Semaphore;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
 
 /** Unit tests for {@link StreamingDataflowWorker}. */
 @RunWith(JUnit4.class)
@@ -140,6 +147,8 @@ private static final byte[] windowAtOneSecondBytes() throws Exception {
   private static final String DEFAULT_DATA_STRING = "data";
   private static final String DEFAULT_DESTINATION_STREAM_ID = "out";
 
+  @Rule public BlockingFn blockingFn = new BlockingFn();
+
   private String keyStringForIndex(int index) {
     return DEFAULT_KEY_STRING + index;
   }
@@ -378,24 +387,40 @@ public void testBasicHarness() throws Exception {
     }
   }
 
-  static class BlockingFn extends DoFn<String, String> {
+  static class BlockingFn extends DoFn<String, String> implements TestRule {
     private static final long serialVersionUID = 0;
     public static CountDownLatch blocker = new CountDownLatch(1);
-    public static CountDownLatch counter = new CountDownLatch(4);
+    public static Semaphore counter = new Semaphore(0);
+    public static AtomicInteger callCounter = new AtomicInteger(0);
 
     @Override
     public void processElement(ProcessContext c) throws InterruptedException {
-      counter.countDown();
+      callCounter.incrementAndGet();
+      counter.release();
       blocker.await();
       c.output(c.element());
     }
+
+    @Override
+    public Statement apply(final Statement base, final Description description) {
+      return new Statement() {
+        @Override
+        public void evaluate() throws Throwable {
+          blocker = new CountDownLatch(1);
+          counter = new Semaphore(0);
+          callCounter = new AtomicInteger();
+          base.evaluate();
+        }
+      };
+    }
   }
 
   @Test
   public void testIgnoreRetriedKeys() throws Exception {
+    final int numIters = 4;
     List<ParallelInstruction> instructions = Arrays.asList(
         makeSourceInstruction(StringUtf8Coder.of()),
-        makeDoFnInstruction(new BlockingFn(), 0, StringUtf8Coder.of()),
+        makeDoFnInstruction(blockingFn, 0, StringUtf8Coder.of()),
         makeSinkInstruction(StringUtf8Coder.of(), 0));
 
     FakeWindmillServer server = new FakeWindmillServer();
@@ -408,13 +433,12 @@ public void testIgnoreRetriedKeys() throws Exception {
     assertEquals(options.getJobId(), DataflowWorkerLoggingMDC.getJobId());
     assertEquals(options.getWorkerId(), DataflowWorkerLoggingMDC.getWorkerId());
 
-    final int numIters = 4;
     for (int i = 0; i < numIters; ++i) {
       server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
     }
 
     // Wait for keys to schedule.  They will be blocked.
-    BlockingFn.counter.await();
+    BlockingFn.counter.acquire(numIters);
 
     // Re-add the work, it should be ignored due to the keys being active.
     for (int i = 0; i < numIters; ++i) {
@@ -464,6 +488,40 @@ public void testIgnoreRetriedKeys() throws Exception {
     }
   }
 
+  @Test(timeout = 10000)
+  public void testNumberOfWorkerHarnessThreadsIsHonored() throws Exception {
+    int expectedNumberOfThreads = 5;
+    List<ParallelInstruction> instructions = Arrays.asList(
+        makeSourceInstruction(StringUtf8Coder.of()),
+        makeDoFnInstruction(blockingFn, 0, StringUtf8Coder.of()),
+        makeSinkInstruction(StringUtf8Coder.of(), 0));
+
+    FakeWindmillServer server = new FakeWindmillServer();
+    DataflowWorkerHarnessOptions options = createTestingPipelineOptions();
+    options.setNumberOfWorkerHarnessThreads(expectedNumberOfThreads);
+
+    StreamingDataflowWorker worker =
+        new StreamingDataflowWorker(Arrays.asList(defaultMapTask(instructions)), server, options);
+    worker.start();
+
+    for (int i = 0; i < expectedNumberOfThreads * 2; ++i) {
+      server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
+    }
+
+    // This will fail to complete if the number of threads is less than the amount of work.
+    // Forcing this test to timeout.
+    BlockingFn.counter.acquire(expectedNumberOfThreads);
+
+    // Attempt to acquire an additional permit, if we were able to then that means
+    // too many items were being processed concurrently.
+    if (BlockingFn.counter.tryAcquire(500, TimeUnit.MILLISECONDS)) {
+      fail("Expected number of threads " + expectedNumberOfThreads + " does not match actual "
+          + "number of work items processed concurrently " + BlockingFn.callCounter.get() + ".");
+    }
+
+    BlockingFn.blocker.countDown();
+  }
+
   static class ChangeKeysFn extends DoFn<KV<String, String>, KV<String, String>> {
     private static final long serialVersionUID = 0;
 

From 7c0edb858cc64bb7fc9879b0d97d9416033c3743 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Thu, 13 Aug 2015 13:36:01 -0700
Subject: [PATCH 0934/1541] Fix DataflowAssert usage in
 BasicSerializableSourceFormatTest

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100608865
---
 .../sdk/runners/dataflow/BasicSerializableSourceFormatTest.java | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 75d6df8874f3b..0dc6f948e06ec 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -289,6 +289,7 @@ public void testDirectPipelineWithoutTimestamps() throws Exception {
         .apply(Sample.<Integer>any(1));
 
     DataflowAssert.thatSingleton(sum).isEqualTo(145);
+    p.run();
   }
 
   @Test
@@ -300,6 +301,7 @@ public void testDirectPipelineWithTimestamps() throws Exception {
          .apply(Sum.integersGlobally().withoutDefaults());
     // Should group into [10 11] [12 13 14] [15 16 17] [18 19].
     DataflowAssert.that(sums).containsInAnyOrder(21, 37, 39, 48);
+    p.run();
   }
 
   @Test

From 87df0358d6ca81f3897d4984ca2aaf821a3317f3 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 13 Aug 2015 15:36:27 -0700
Subject: [PATCH 0935/1541] PackageUtil: make directory staging correct,
 simpler, and fewer I/Os

1. Directories were recursively traversed three times before:
   computing hash, computing staged size, and uploading. Unify
   the hash and size computations to have fewer I/Os total. In
   the process switch from using a custom recursive hash to a
   simpler hash of the zipped directory.
2. Neither the old method of Files.preOrderTraversal nor the new
   ZipFile are guaranteed to be deterministic, so enforce determinism
   in ZipFile by ordering the recursive traversal and adding mod
   time to files.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100622107
---
 .../cloud/dataflow/sdk/util/PackageUtil.java  | 199 ++++++++++--------
 .../runners/DataflowPipelineRunnerTest.java   |  13 +-
 .../dataflow/sdk/util/PackageUtilTest.java    |  75 ++++---
 3 files changed, 155 insertions(+), 132 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
index 000f024c1f345..a906d96db8bc6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
@@ -23,11 +23,9 @@
 import com.google.api.client.util.Sleeper;
 import com.google.api.services.dataflow.model.DataflowPackage;
 import com.google.cloud.hadoop.util.ApiErrorExtractor;
-import com.google.common.collect.TreeTraverser;
 import com.google.common.hash.Funnels;
 import com.google.common.hash.Hasher;
 import com.google.common.hash.Hashing;
-import com.google.common.io.ByteStreams;
 import com.google.common.io.CountingOutputStream;
 import com.google.common.io.Files;
 
@@ -42,10 +40,11 @@
 import java.io.OutputStream;
 import java.nio.channels.Channels;
 import java.nio.channels.WritableByteChannel;
-import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
+import java.util.Objects;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipOutputStream;
 
@@ -72,28 +71,59 @@ public class PackageUtil {
 
   /**
    * Creates a DataflowPackage containing information about how a classpath element should be
-   * staged.
+   * staged, including the staging destination as well as its size and hash.
    *
    * @param classpathElement The local path for the classpath element.
-   * @param stagingPath The base location in for staged classpath elements.
+   * @param stagingPath The base location for staged classpath elements.
    * @param overridePackageName If non-null, use the given value as the package name
    *                            instead of generating one automatically.
    * @return The package.
    */
+  @Deprecated
   public static DataflowPackage createPackage(File classpathElement,
       String stagingPath, String overridePackageName) {
+    return createPackageAttributes(classpathElement, stagingPath, overridePackageName)
+        .getDataflowPackage();
+  }
+
+  /**
+   * Compute and cache the attributes of a classpath element that we will need to stage it.
+   *
+   * @param classpathElement the file or directory to be staged.
+   * @param stagingPath The base location for staged classpath elements.
+   * @param overridePackageName If non-null, use the given value as the package name
+   *                            instead of generating one automatically.
+   * @return a {@link PackageAttributes} that containing metadata about the object to be staged.
+   */
+  static PackageAttributes createPackageAttributes(File classpathElement,
+      String stagingPath, String overridePackageName) {
     try {
-      String contentHash = computeContentHash(classpathElement);
+      boolean directory = classpathElement.isDirectory();
 
-      // Drop the directory prefixes, and form the filename + hash + extension.
-      String uniqueName = getUniqueContentName(classpathElement, contentHash);
+      // Compute size and hash in one pass over file or directory.
+      Hasher hasher = Hashing.md5().newHasher();
+      OutputStream hashStream = Funnels.asOutputStream(hasher);
+      CountingOutputStream countingOutputStream = new CountingOutputStream(hashStream);
 
-      String resourcePath = IOChannelUtils.resolve(stagingPath, uniqueName);
+      if (!directory) {
+        // Files are staged as-is.
+        Files.asByteSource(classpathElement).copyTo(countingOutputStream);
+      } else {
+        // Directories are recursively zipped.
+        zipDirectory(classpathElement, countingOutputStream);
+      }
+
+      long size = countingOutputStream.getCount();
+      String hash = Base64Variants.MODIFIED_FOR_URL.encode(hasher.hash().asBytes());
 
+      // Create the DataflowPackage with staging name and location.
+      String uniqueName = getUniqueContentName(classpathElement, hash);
+      String resourcePath = IOChannelUtils.resolve(stagingPath, uniqueName);
       DataflowPackage target = new DataflowPackage();
       target.setName(overridePackageName != null ? overridePackageName : uniqueName);
       target.setLocation(resourcePath);
-      return target;
+
+      return new PackageAttributes(size, hash, directory, target);
     } catch (IOException e) {
       throw new RuntimeException("Package setup failure for " + classpathElement, e);
     }
@@ -151,9 +181,9 @@ static List<DataflowPackage> stageClasspathElements(
         continue;
       }
 
-      DataflowPackage workflowPackage = createPackage(
-          file, stagingPath, packageName);
+      PackageAttributes attributes = createPackageAttributes(file, stagingPath, packageName);
 
+      DataflowPackage workflowPackage = attributes.getDataflowPackage();
       packages.add(workflowPackage);
       String target = workflowPackage.getLocation();
 
@@ -162,7 +192,7 @@ static List<DataflowPackage> stageClasspathElements(
       try {
         try {
           long remoteLength = IOChannelUtils.getSizeBytes(target);
-          if (remoteLength == getClasspathElementLength(classpathElement)) {
+          if (remoteLength == attributes.getSize()) {
             LOG.debug("Skipping classpath element already staged: {} at {}",
                 classpathElement, target);
             numCached++;
@@ -217,27 +247,6 @@ static List<DataflowPackage> stageClasspathElements(
     return packages;
   }
 
-  /**
-   * If classpathElement is a file, then the files length is returned, otherwise the length
-   * of the copied stream is returned.
-   *
-   * @param classpathElement The local path for the classpath element.
-   * @return The length of the classpathElement.
-   */
-  private static long getClasspathElementLength(String classpathElement) throws IOException {
-    File file = new File(classpathElement);
-    if (file.isFile()) {
-      return file.length();
-    }
-
-    CountingOutputStream countingOutputStream =
-        new CountingOutputStream(ByteStreams.nullOutputStream());
-    try (WritableByteChannel channel = Channels.newChannel(countingOutputStream)) {
-      copyContent(classpathElement, channel);
-    }
-    return countingOutputStream.getCount();
-  }
-
   /**
    * Returns a unique name for a file with a given content hash.
    * <p>
@@ -259,26 +268,6 @@ static String getUniqueContentName(File classpathElement, String contentHash) {
     return fileName + "-" + contentHash + "." + fileExtension;
   }
 
-  /**
-   * Computes a message digest of the file/directory contents, returning a base64 string that is
-   * suitable for use in URLs.
-   */
-  private static String computeContentHash(File classpathElement) throws IOException {
-    TreeTraverser<File> files = Files.fileTreeTraverser();
-    Hasher hasher = Hashing.md5().newHasher();
-    for (File currentFile : files.preOrderTraversal(classpathElement)) {
-      String relativePath = relativize(currentFile, classpathElement);
-      hasher.putString(relativePath, StandardCharsets.UTF_8);
-      if (currentFile.isDirectory()) {
-        hasher.putLong(-1L);
-        continue;
-      }
-      hasher.putLong(currentFile.length());
-      Files.asByteSource(currentFile).copyTo(Funnels.asOutputStream(hasher));
-    }
-    return Base64Variants.MODIFIED_FOR_URL.encode(hasher.hash().asBytes());
-  }
-
   /**
    * Copies the contents of the classpathElement to the output channel.
    * <p>
@@ -333,7 +322,8 @@ private static void zipDirectory(
    * @param directoryName the string-representation of the parent directory
    *     name. Might be an empty name, or a name containing multiple directory
    *     names separated by "/". The directory name must be a valid name
-   *     according to the file system limitations.
+   *     according to the file system limitations. The directory name should be
+   *     empty or should end in "/".
    * @param zos the zipstream to write to
    * @throws IOException the zipping failed, e.g. because the output was not
    *     writeable.
@@ -342,52 +332,81 @@ private static void zipDirectoryInternal(
       File inputFile,
       String directoryName,
       ZipOutputStream zos) throws IOException {
-    final String entryName;
-    if ("".equals(directoryName)) {
-      // no parent directories yet.
-      entryName = inputFile.getName();
-    } else {
-      entryName = directoryName + "/" + inputFile.getName();
-    }
+    String entryName = directoryName + inputFile.getName();
     if (inputFile.isDirectory()) {
-      // We are hitting a sub-directory. Start the recursion.
-      // Add the empty entry for a subdirectory if we have no children files.
-      // Don't add it if we have them, as this is incompatible with certain
-      // implementations of unzip.
-      if (inputFile.list().length == 0) {
-        ZipEntry entry = new ZipEntry(entryName + "/");
-        zos.putNextEntry(entry);
-      } else {
+      entryName += "/";
+
+      // We are hitting a sub-directory. Recursively add children to zip in deterministic,
+      // sorted order.
+      File[] childFiles = inputFile.listFiles();
+      if (childFiles.length > 0) {
+        Arrays.sort(childFiles);
         // loop through the directory content, and zip the files
-        for (File file : inputFile.listFiles()) {
+        for (File file : childFiles) {
           zipDirectoryInternal(file, entryName, zos);
         }
+
+        // Since this directory has children, exit now without creating a zipentry specific to
+        // this directory. The entry for a non-entry directory is incompatible with certain
+        // implementations of unzip.
+        return;
       }
-    } else {
-      // Put the next zip-entry into the zipoutputstream.
-      ZipEntry entry = new ZipEntry(entryName);
-      zos.putNextEntry(entry);
+    }
+
+    // Put the zip-entry for this file or empty directory into the zipoutputstream.
+    ZipEntry entry = new ZipEntry(entryName);
+    entry.setTime(inputFile.lastModified());
+    zos.putNextEntry(entry);
+
+    // Copy file contents into zipoutput stream.
+    if (inputFile.isFile()) {
       Files.asByteSource(inputFile).copyTo(zos);
     }
   }
 
   /**
-   * Constructs a relative path between file and root.
-   * <p>
-   * This function will attempt to use {@link java.nio.file.Path#relativize} and
-   * will fallback to using {@link java.net.URI#relativize} in AppEngine.
-   *
-   * @param file The file for which the relative path is being constructed for.
-   * @param root The root from which the relative path should be constructed.
-   * @return The relative path between the file and root.
+   * Holds the metadata necessary to stage a file or confirm that a staged file has not changed.
    */
-  private static String relativize(File file, File root) {
-    if (AppEngineEnvironment.IS_APP_ENGINE) {
-      // AppEngine doesn't allow for java.nio.file.Path to be used so we rely on
-      // using URIs, but URIs are broken for UNC paths, which AppEngine doesn't
-      // use. See for more details: http://wiki.eclipse.org/Eclipse/UNC_Paths
-      return root.toURI().relativize(file.toURI()).getPath();
+  static class PackageAttributes {
+    private final boolean directory;
+    private final long size;
+    private final String hash;
+    private DataflowPackage dataflowPackage;
+
+    public PackageAttributes(long size, String hash, boolean directory,
+        DataflowPackage dataflowPackage) {
+      this.size = size;
+      this.hash = Objects.requireNonNull(hash, "hash");
+      this.directory = directory;
+      this.dataflowPackage = Objects.requireNonNull(dataflowPackage, "dataflowPackage");
+    }
+
+    /**
+     * @return the dataflowPackage
+     */
+    public DataflowPackage getDataflowPackage() {
+      return dataflowPackage;
+    }
+
+    /**
+     * @return the directory
+     */
+    public boolean isDirectory() {
+      return directory;
+    }
+
+    /**
+     * @return the size
+     */
+    public long getSize() {
+      return size;
+    }
+
+    /**
+     * @return the hash
+     */
+    public String getHash() {
+      return hash;
     }
-    return root.toPath().relativize(file.toPath()).toString();
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 48884cc521d70..c7398f9e3b6b6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.runners;
 
 import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.startsWith;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
@@ -51,7 +52,6 @@
 import com.google.cloud.dataflow.sdk.util.DataflowReleaseInfo;
 import com.google.cloud.dataflow.sdk.util.GcsUtil;
 import com.google.cloud.dataflow.sdk.util.NoopPathValidator;
-import com.google.cloud.dataflow.sdk.util.PackageUtil;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
@@ -286,12 +286,7 @@ public void testRunWithFiles() throws IOException {
     File temp2 = File.createTempFile("DataflowPipelineRunnerTest2", "txt");
     temp2.deleteOnExit();
 
-    DataflowPackage expectedPackage1 = PackageUtil.createPackage(
-        temp1, gcsStaging, null);
-
     String overridePackageName = "alias.txt";
-    DataflowPackage expectedPackage2 = PackageUtil.createPackage(
-        temp2, gcsStaging, overridePackageName);
 
     ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
@@ -320,12 +315,10 @@ public void testRunWithFiles() throws IOException {
         workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().size());
     DataflowPackage workflowPackage1 =
         workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().get(0);
-    assertEquals(expectedPackage1.getName(), workflowPackage1.getName());
-    assertEquals(expectedPackage1.getLocation(), workflowPackage1.getLocation());
+    assertThat(workflowPackage1.getName(), startsWith(temp1.getName()));
     DataflowPackage workflowPackage2 =
         workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().get(1);
-    assertEquals(expectedPackage2.getName(), workflowPackage2.getName());
-    assertEquals(expectedPackage2.getLocation(), workflowPackage2.getLocation());
+    assertEquals(overridePackageName, workflowPackage2.getName());
 
     assertEquals(
         "storage.googleapis.com/somebucket/some/temp/path",
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
index 7ba72fdec68f9..5c5055860fd2e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
@@ -18,7 +18,7 @@
 
 import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
@@ -50,6 +50,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
 import com.google.cloud.dataflow.sdk.testing.FastNanoClockAndSleeper;
+import com.google.cloud.dataflow.sdk.util.PackageUtil.PackageAttributes;
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterables;
@@ -106,15 +107,18 @@ public void setUp() {
   @Test
   public void testPackageNamingWithFileHavingExtension() throws Exception {
     File tmpFile = tmpFolder.newFile("file.txt");
-    Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
+    String contents = "This is a test!";
+    Files.write(contents, tmpFile, StandardCharsets.UTF_8);
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
 
-    DataflowPackage target =
-        PackageUtil.createPackage(tmpFile, gcsStaging.toString(), null);
+    PackageAttributes attr =
+        PackageUtil.createPackageAttributes(tmpFile, gcsStaging.toString(), null);
+    DataflowPackage target = attr.getDataflowPackage();
 
-    assertEquals("file-SAzzqSB2zmoIgNHC9A2G0A.txt", target.getName());
-    assertEquals("gs://somebucket/base/path/file-SAzzqSB2zmoIgNHC9A2G0A.txt",
+    assertEquals("file-cC7coLIYHBXUV-rKw53jmw.txt", target.getName());
+    assertEquals("gs://somebucket/base/path/file-cC7coLIYHBXUV-rKw53jmw.txt",
         target.getLocation());
+    assertEquals(contents.length(), attr.getSize());
   }
 
   @Test
@@ -123,12 +127,12 @@ public void testPackageNamingWithFileMissingExtension() throws Exception {
     Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
 
-    DataflowPackage target =
-        PackageUtil.createPackage(tmpFile, gcsStaging.toString(), null);
+    PackageAttributes target =
+        PackageUtil.createPackageAttributes(tmpFile, gcsStaging.toString(), null);
 
-    assertEquals("file-SAzzqSB2zmoIgNHC9A2G0A", target.getName());
-    assertEquals("gs://somebucket/base/path/file-SAzzqSB2zmoIgNHC9A2G0A",
-        target.getLocation());
+    assertEquals("file-cC7coLIYHBXUV-rKw53jmw", target.getDataflowPackage().getName());
+    assertEquals("gs://somebucket/base/path/file-cC7coLIYHBXUV-rKw53jmw",
+        target.getDataflowPackage().getLocation());
   }
 
   @Test
@@ -136,14 +140,17 @@ public void testPackageNamingWithDirectory() throws Exception {
     File tmpDirectory = tmpFolder.newFolder("folder");
     File tmpFile = tmpFolder.newFile("folder/file.txt");
     Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
+    tmpFile.setLastModified(0);
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
 
-    DataflowPackage target =
-        PackageUtil.createPackage(tmpDirectory, gcsStaging.toString(), null);
+    PackageAttributes attr =
+        PackageUtil.createPackageAttributes(tmpDirectory, gcsStaging.toString(), null);
+    DataflowPackage target = attr.getDataflowPackage();
 
-    assertEquals("folder-9MHI5fxducQ06t3IG9MC-g.zip", target.getName());
-    assertEquals("gs://somebucket/base/path/folder-9MHI5fxducQ06t3IG9MC-g.zip",
+    assertEquals("folder-5n8NFLL1nYzz4BJ5C4t3rA.zip", target.getName());
+    assertEquals("gs://somebucket/base/path/folder-5n8NFLL1nYzz4BJ5C4t3rA.zip",
                  target.getLocation());
+    assertEquals(145L, attr.getSize());
   }
 
   @Test
@@ -160,13 +167,15 @@ public void testPackageNamingWithFilesHavingSameContentsButDifferentNames() thro
 
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
 
-    DataflowPackage target1 =
-        PackageUtil.createPackage(tmpDirectory1, gcsStaging.toString(), null);
-    DataflowPackage target2 =
-        PackageUtil.createPackage(tmpDirectory2, gcsStaging.toString(), null);
+    PackageAttributes target1 =
+        PackageUtil.createPackageAttributes(tmpDirectory1, gcsStaging.toString(), null);
+    PackageAttributes target2 =
+        PackageUtil.createPackageAttributes(tmpDirectory2, gcsStaging.toString(), null);
 
-    assertFalse(target1.getName().equals(target2.getName()));
-    assertFalse(target1.getLocation().equals(target2.getLocation()));
+    assertNotEquals(target1.getDataflowPackage().getName(),
+        target2.getDataflowPackage().getName());
+    assertNotEquals(target1.getDataflowPackage().getLocation(),
+        target2.getDataflowPackage().getLocation());
   }
 
   @Test
@@ -182,13 +191,15 @@ public void testPackageNamingWithDirectoriesHavingSameContentsButDifferentNames(
 
     GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
 
-    DataflowPackage target1 =
-        PackageUtil.createPackage(tmpDirectory1, gcsStaging.toString(), null);
-    DataflowPackage target2 =
-        PackageUtil.createPackage(tmpDirectory2, gcsStaging.toString(), null);
+    PackageAttributes target1 =
+        PackageUtil.createPackageAttributes(tmpDirectory1, gcsStaging.toString(), null);
+    PackageAttributes target2 =
+        PackageUtil.createPackageAttributes(tmpDirectory2, gcsStaging.toString(), null);
 
-    assertFalse(target1.getName().equals(target2.getName()));
-    assertFalse(target1.getLocation().equals(target2.getLocation()));
+    assertNotEquals(target1.getDataflowPackage().getName(),
+        target2.getDataflowPackage().getName());
+    assertNotEquals(target1.getDataflowPackage().getLocation(),
+        target2.getDataflowPackage().getLocation());
   }
 
   @Test
@@ -228,8 +239,8 @@ public void testPackageUploadWithFileSucceeds() throws Exception {
     verify(mockGcsUtil).create(any(GcsPath.class), anyString());
     verifyNoMoreInteractions(mockGcsUtil);
 
-    assertEquals("file-SAzzqSB2zmoIgNHC9A2G0A.txt", target.getName());
-    assertEquals("gs://somebucket/base/path/file-SAzzqSB2zmoIgNHC9A2G0A.txt",
+    assertEquals("file-cC7coLIYHBXUV-rKw53jmw.txt", target.getName());
+    assertEquals("gs://somebucket/base/path/file-cC7coLIYHBXUV-rKw53jmw.txt",
         target.getLocation());
     assertEquals("This is a test!",
         new LineReader(Channels.newReader(pipe.source(), "UTF-8")).readLine());
@@ -287,8 +298,8 @@ public void testPackageUploadWithEmptyDirectorySucceeds() throws Exception {
     verify(mockGcsUtil).create(any(GcsPath.class), anyString());
     verifyNoMoreInteractions(mockGcsUtil);
 
-    assertEquals("folder-wstW9MW_ZW-soJhufroDCA.zip", target.getName());
-    assertEquals("gs://somebucket/base/path/folder-wstW9MW_ZW-soJhufroDCA.zip",
+    assertEquals("folder-ds2yutlYLSPB9vTYaCGNbA.zip", target.getName());
+    assertEquals("gs://somebucket/base/path/folder-ds2yutlYLSPB9vTYaCGNbA.zip",
         target.getLocation());
     assertNull(new ZipInputStream(Channels.newInputStream(pipe.source())).getNextEntry());
   }
@@ -427,7 +438,7 @@ public void testPackageUploadWithExplicitPackageName() throws Exception {
     verifyNoMoreInteractions(mockGcsUtil);
 
     assertEquals(overriddenName, target.getName());
-    assertEquals("gs://somebucket/base/path/file-SAzzqSB2zmoIgNHC9A2G0A.txt",
+    assertEquals("gs://somebucket/base/path/file-cC7coLIYHBXUV-rKw53jmw.txt",
         target.getLocation());
   }
 

From a1870c5dd759204644b44690e196ca8254e3ead3 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 13 Aug 2015 17:42:58 -0700
Subject: [PATCH 0936/1541] Redirect stdout and stderr to a logger in the
 worker harness

Allows users who accidentally use stdout and stderr to still see
their logs in cloud logging.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100634349
---
 .../DataflowWorkerLoggingInitializer.java     |  10 ++
 .../JulLoggerPrintStreamAdapterFactory.java   | 128 ++++++++++++++++++
 .../DataflowWorkerLoggingInitializerTest.java |  40 ++++++
 ...ulLoggerPrintStreamAdapterFactoryTest.java |  82 +++++++++++
 .../dataflow/sdk/testing/ExpectedLogs.java    |  16 ++-
 5 files changed, 273 insertions(+), 3 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/JulLoggerPrintStreamAdapterFactory.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/JulLoggerPrintStreamAdapterFactoryTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
index b936017108118..f75d926c22ae5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
@@ -28,6 +28,7 @@
 
 import java.io.File;
 import java.io.IOException;
+import java.io.PrintStream;
 import java.util.List;
 import java.util.Map;
 import java.util.logging.FileHandler;
@@ -73,6 +74,8 @@ public class DataflowWorkerLoggingInitializer {
    * so if they are garbage collection, our hierarchical configuration will be lost. */
   private static List<Logger> configuredLoggers = Lists.newArrayList();
   private static FileHandler fileHandler;
+  private static PrintStream originalStdOut;
+  private static PrintStream originalStdErr;
 
   /**
    * Sets up the initial logging configuration.
@@ -100,6 +103,11 @@ public static synchronized void initialize() {
       Level logLevel = LEVELS.inverse().get(DEFAULT_LOG_LEVEL);
       rootLogger.setLevel(logLevel);
       rootLogger.addHandler(loggingHandler);
+
+      originalStdOut = System.out;
+      originalStdErr = System.err;
+      System.setOut(JulLoggerPrintStreamAdapterFactory.create("System.out", Level.INFO));
+      System.setErr(JulLoggerPrintStreamAdapterFactory.create("System.err", Level.SEVERE));
     } catch (SecurityException | IOException e) {
       throw new ExceptionInInitializerError(e);
     }
@@ -130,5 +138,7 @@ public static synchronized void configure(DataflowWorkerLoggingOptions options)
   static void reset() {
     configuredLoggers = Lists.newArrayList();
     fileHandler = null;
+    System.setOut(originalStdOut);
+    System.setErr(originalStdErr);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/JulLoggerPrintStreamAdapterFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/JulLoggerPrintStreamAdapterFactory.java
new file mode 100644
index 0000000000000..86ffa281ccf5c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/JulLoggerPrintStreamAdapterFactory.java
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker.logging;
+
+import com.google.common.base.Throwables;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.PrintStream;
+import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * A {@link PrintStream} factory that creates {@link PrintStream}s which output
+ * to the named JUL {@link Logger} at the specified {@link Level}.
+ */
+class JulLoggerPrintStreamAdapterFactory {
+  private static final AtomicBoolean outputWarning = new AtomicBoolean(false);
+
+  /**
+   * Creates a {@link PrintStream} which redirects all output to a JUL {@link Logger}
+   * with the given {@code name} at the specified {@code level}.
+   */
+  static PrintStream create(String name, Level level) {
+    try {
+      return new PrintStream(
+          new JulLoggerAdapterOutputStream(name, level),
+          false, StandardCharsets.UTF_8.name());
+    } catch (UnsupportedEncodingException e) {
+      throw Throwables.propagate(e);
+    }
+  }
+
+  /**
+   * An output stream adapter which is able to take a stream of UTF-8 data and output
+   * to a named JUL logger. The log messages will be buffered until the system
+   * dependent new line separator is seen, at which point the buffered string will be
+   * output.
+   */
+  private static class JulLoggerAdapterOutputStream extends OutputStream {
+    private static final String LOGGING_DISCLAIMER = String.format(
+        "Please use a logger instead of System.out or System.err.%n"
+        + "Please switch to using org.slf4j.Logger.%n"
+        + "See: https://cloud.google.com/dataflow/pipelines/logging");
+    // This limits the number of bytes which we buffer in case we don't see a newline character.
+    private static final int BUFFER_LIMIT = 1 << 14; // 16384 bytes
+    private static final byte[] NEW_LINE = System.lineSeparator().getBytes(StandardCharsets.UTF_8);
+    private Logger logger;
+    private ByteArrayOutputStream baos;
+    private Level logLevel;
+    private int matched = 0;
+
+    private JulLoggerAdapterOutputStream(String name, Level logLevel) {
+      this.logger = Logger.getLogger(name);
+      this.logLevel = logLevel;
+      this.baos = new ByteArrayOutputStream(BUFFER_LIMIT);
+    }
+
+    @Override
+    public void write(int b) {
+      if (outputWarning.compareAndSet(false, true)) {
+        logger.warning(LOGGING_DISCLAIMER);
+      }
+      baos.write(b);
+      // Check to see if the next byte matches further into new line string.
+      if (NEW_LINE[matched] == b) {
+        matched += 1;
+        // If we have matched the entire new line, output the contents of the buffer.
+        if (matched == NEW_LINE.length) {
+          output();
+        }
+      } else {
+        // Reset the match
+        matched = 0;
+      }
+      if (baos.size() == BUFFER_LIMIT) {
+        output();
+      }
+    }
+
+    @Override
+    public void flush() throws IOException {
+      output();
+    }
+
+    @Override
+    public void close() throws IOException {
+      output();
+    }
+
+    private void output() {
+      // If nothing was output, do not log anything
+      if (baos.size() == 0) {
+        return;
+      }
+      try {
+        String message = baos.toString(StandardCharsets.UTF_8.name());
+        // Strip the new line if it exists
+        if (message.endsWith(System.lineSeparator())) {
+          message = message.substring(0, message.length() - System.lineSeparator().length());
+        }
+        logger.log(logLevel, message);
+      } catch (UnsupportedEncodingException e) {
+        logger.severe(String.format("Unable to decode string output to stdout/stderr %s", e));
+      }
+      matched = 0;
+      baos.reset();
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
index 1b8cba7b0d34e..0af9f877b14c6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
@@ -22,11 +22,14 @@
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.WorkerLogLevelOverrides;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
 
 import org.junit.After;
 import org.junit.Test;
+import org.junit.runner.Description;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
+import org.junit.runners.model.Statement;
 
 import java.util.logging.Handler;
 import java.util.logging.Level;
@@ -96,4 +99,41 @@ public void testWithCustomLogLevels() {
   private boolean isDataflowWorkerLoggingHandler(Handler handler, Level level) {
     return handler instanceof DataflowWorkerLoggingHandler && level.equals(handler.getLevel());
   }
+
+  @Test
+  public void testSystemOutToLogger() throws Throwable {
+    DataflowWorkerLoggingInitializer.initialize();
+    Description description = Description.createTestDescription(
+        DataflowWorkerLoggingInitializerTest.class, "testSystemOutToLogger");
+    ExpectedLogs systemOut = ExpectedLogs.none("System.out");
+    // We evaluate the test rule with the logging inside of it explicitly since
+    // DataflowWorkerLoggingInitializer.initialize() resets all log handlers
+    // not allowing us to use ExpectedLogs as a test rule.
+    systemOut.apply(new Statement() {
+      @Override
+      public void evaluate() {
+        System.out.println("afterInitialization");
+      }
+    }, description).evaluate();
+    systemOut.verifyInfo("afterInitialization");
+  }
+
+  @Test
+  public void testSystemErrToLogger() throws Throwable {
+    DataflowWorkerLoggingInitializer.initialize();
+    Description description = Description.createTestDescription(
+        DataflowWorkerLoggingInitializerTest.class, "testSystemErrToLogger");
+    ExpectedLogs systemErr = ExpectedLogs.none("System.err");
+    // We evaluate the test rule with the logging inside of it explicitly since
+    // DataflowWorkerLoggingInitializer.initialize() resets all log handlers
+    // not allowing us to use ExpectedLogs as a test rule.
+    systemErr.apply(new Statement() {
+      @Override
+      public void evaluate() {
+        System.err.println("afterInitialization");
+      }
+    }, description).evaluate();
+    systemErr.verifyError("afterInitialization");
+  }
 }
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/JulLoggerPrintStreamAdapterFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/JulLoggerPrintStreamAdapterFactoryTest.java
new file mode 100644
index 0000000000000..07ea5aafe606c
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/JulLoggerPrintStreamAdapterFactoryTest.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker.logging;
+
+import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.PrintStream;
+import java.util.logging.Level;
+
+/** Tests for {@link JulLoggerPrintStreamAdapterFactory}. */
+@RunWith(JUnit4.class)
+public class JulLoggerPrintStreamAdapterFactoryTest {
+  private static final String NAME = "test";
+  @Rule public ExpectedLogs expectedLogs = ExpectedLogs.none(NAME);
+
+  @Test
+  public void testLogOnNewLine() {
+    PrintStream printStream = JulLoggerPrintStreamAdapterFactory.create(NAME, Level.INFO);
+    printStream.println("blah");
+    expectedLogs.verifyInfo("blah");
+  }
+
+  @Test
+  public void testLogOnlyUptoNewLine() {
+    PrintStream printStream = JulLoggerPrintStreamAdapterFactory.create(NAME, Level.INFO);
+    printStream.println("blah");
+    printStream.print("foo");
+    expectedLogs.verifyInfo("blah");
+    expectedLogs.verifyNotLogged("foo");
+  }
+
+  @Test
+  public void testLogMultiLine() {
+    PrintStream printStream = JulLoggerPrintStreamAdapterFactory.create(NAME, Level.INFO);
+    printStream.format("blah%nfoo%n");
+    expectedLogs.verifyInfo("blah");
+    expectedLogs.verifyInfo("foo");
+  }
+
+  @Test
+  public void testDontLogIfNoNewLine() {
+    PrintStream printStream = JulLoggerPrintStreamAdapterFactory.create(NAME, Level.INFO);
+    printStream.print("blah");
+    expectedLogs.verifyNotLogged("blah");
+  }
+
+  @Test
+  public void testLogOnFlush() {
+    PrintStream printStream = JulLoggerPrintStreamAdapterFactory.create(NAME, Level.INFO);
+    printStream.print("blah");
+    printStream.flush();
+    expectedLogs.verifyInfo("blah");
+  }
+
+  @Test
+  public void testLogOnClose() {
+    PrintStream printStream = JulLoggerPrintStreamAdapterFactory.create(NAME, Level.INFO);
+    printStream.print("blah");
+    printStream.close();
+    expectedLogs.verifyInfo("blah");
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java
index e262e68594c67..48449b3400449 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogs.java
@@ -42,6 +42,16 @@
  * used.
  */
 public class ExpectedLogs extends ExternalResource {
+  /**
+   * Returns a {@link TestRule} that captures logs for the given logger name.
+   *
+   * @param name The logger name to capture logs for.
+   * @return A {@link ExpectedLogs} test rule.
+   */
+  public static ExpectedLogs none(String name) {
+    return new ExpectedLogs(name);
+  }
+
   /**
    * Returns a {@link TestRule} that captures logs for the given class.
    *
@@ -49,7 +59,7 @@ public class ExpectedLogs extends ExternalResource {
    * @return A {@link ExpectedLogs} test rule.
    */
   public static ExpectedLogs none(Class<?> klass) {
-    return new ExpectedLogs(klass);
+    return ExpectedLogs.none(klass.getName());
   }
 
   /**
@@ -266,8 +276,8 @@ protected void after() {
   private final Formatter logFormatter = new SimpleFormatter();
   private Level previousLevel;
 
-  private ExpectedLogs(Class<?> klass) {
-    log = Logger.getLogger(klass.getName());
+  private ExpectedLogs(String name) {
+    log = Logger.getLogger(name);
     logSaver = new LogSaver();
   }
 

From 4434d1b0ec0eb0cbe9bd0b4ef706cfe63ef01d6a Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Fri, 14 Aug 2015 10:16:38 -0700
Subject: [PATCH 0937/1541] DataflowPipelineRunner: fix typo

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100684065
---
 .../cloud/dataflow/sdk/runners/DataflowPipelineRunner.java      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 19eabfd97f7db..51f2ab25b46c4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -539,7 +539,7 @@ private static class StreamingUnboundedRead<T> extends PTransform<PInput, PColle
      */
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
     public StreamingUnboundedRead(Read.Unbounded<T> transform) {
-      this.source = transform.getSource();;
+      this.source = transform.getSource();
     }
 
     @Override

From 32d79637268f0caf2499e53ba34052eccf447f25 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 14 Aug 2015 11:34:03 -0700
Subject: [PATCH 0938/1541] Change how the default value is selected in
 SingletonPCollectionView

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100692798
---
 .../dataflow/sdk/util/PCollectionViews.java      | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
index b6c9861549c7e..7dba59d387290 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.PValueBase;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -58,7 +59,7 @@ public static <T, W extends BoundedWindow> PCollectionView<T> singletonView(
       boolean hasDefault,
       T defaultValue,
       Coder<T> valueCoder) {
-    return new SingletonPCollectionView(
+    return new SingletonPCollectionView<>(
         pipeline, windowingStrategy, hasDefault, defaultValue, valueCoder);
   }
 
@@ -70,7 +71,7 @@ public static <T, W extends BoundedWindow> PCollectionView<Iterable<T>> iterable
       Pipeline pipeline,
       WindowingStrategy<?, W> windowingStrategy,
       Coder<T> valueCoder) {
-    return new IterablePCollectionView(pipeline, windowingStrategy, valueCoder);
+    return new IterablePCollectionView<>(pipeline, windowingStrategy, valueCoder);
   }
 
   /**
@@ -135,14 +136,15 @@ protected T fromElements(Iterable<WindowedValue<T>> contents) {
         }
       }
 
-      if (encodedDefaultValue != null && !contents.iterator().hasNext()) {
-        return defaultValue;
-      }
       try {
         return Iterables.getOnlyElement(contents).getValue();
       } catch (NoSuchElementException exc) {
-        throw new NoSuchElementException(
-            "Empty PCollection accessed as a singleton view.");
+        if (encodedDefaultValue != null) {
+          return defaultValue;
+        } else {
+          throw new NoSuchElementException(
+              "Empty PCollection accessed as a singleton view.");
+        }
       } catch (IllegalArgumentException exc) {
         throw new IllegalArgumentException(
             "PCollection with more than one element "

From 1a4c05a01f1a4fd30cf3ee5e3f44341faadaa96a Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Fri, 14 Aug 2015 12:23:59 -0700
Subject: [PATCH 0939/1541] Remove unnecessary semicolons

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100697339
---
 .../cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java | 2 +-
 .../sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java     | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/Combine.java  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
index 7c25211e97813..ca4f1e8f08e0d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
@@ -103,7 +103,7 @@ public ParDoFn create(
           executionContext,
           addCounterMutator);
     }
-  };
+  }
 
   @Override
   protected DoFnInfo<?, ?> getDoFnInfo() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
index 866a169c0ae4d..70ccaa641870a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
@@ -82,7 +82,7 @@ public ParDoFn create(
           executionContext,
           addCounterMutator);
     }
-  };
+  }
 
   @Override
   protected DoFnInfo<?, ?> getDoFnInfo() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 86fb9bbc74d86..974490a3edc9b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -651,7 +651,7 @@ public Holder<V> decode(InputStream inStream, Context context)
     public void verifyDeterministic() throws NonDeterministicException {
       valueCoder.verifyDeterministic();
     }
-  };
+  }
 
   /**
    * An abstract subclass of {@link CombineFn} for implementing combiners that are more

From 14b99d2212c0d3e3b03fb0d3a5c41c300bfca49e Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 14 Aug 2015 13:59:40 -0700
Subject: [PATCH 0940/1541] Properly deactive work when it non-retriably fails

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100705143
---
 .../worker/StreamingDataflowWorker.java       |  2 +
 .../worker/StreamingDataflowWorkerTest.java   | 41 +++++++++++++++++++
 2 files changed, 43 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index ff41b08d13880..6511a8e2794b7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -523,6 +523,7 @@ public void run() {
         LOG.debug("Execution of work for {} for key {} failed due to token expiration, "
             + "will not retry locally.",
             computation, work.getKey().toStringUtf8());
+        activeWorkMap.get(computation).completeWork(work.getKey());
       } else {
         LOG.error(
             "Execution of work for {} for key {} failed, retrying.",
@@ -545,6 +546,7 @@ public void run() {
           // If we failed to report the error, the item is invalid and should
           // not be retried internally.  It will be retried at the higher level.
           LOG.debug("Aborting processing due to exception reporting failure");
+          activeWorkMap.get(computation).completeWork(work.getKey());
         }
       }
     } finally {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 864c4898e22b9..5e7e53de15cf0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -522,6 +522,47 @@ public void testNumberOfWorkerHarnessThreadsIsHonored() throws Exception {
     BlockingFn.blocker.countDown();
   }
 
+  static class KeyTokenInvalidFn extends DoFn<KV<String, String>, KV<String, String>> {
+    private static final long serialVersionUID = 0;
+    static boolean thrown = false;
+
+    @Override
+    public void processElement(ProcessContext c) {
+      if (!thrown) {
+        thrown = true;
+        throw new StreamingDataflowWorker.KeyTokenInvalidException("key");
+      } else {
+        c.output(c.element());
+      }
+    }
+  }
+
+  @Test
+  public void testKeyTokenInvalidException() throws Exception {
+    KvCoder<String, String> kvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
+
+    List<ParallelInstruction> instructions = Arrays.asList(
+        makeSourceInstruction(kvCoder),
+        makeDoFnInstruction(new KeyTokenInvalidFn(), 0, kvCoder),
+        makeSinkInstruction(kvCoder, 1));
+
+    FakeWindmillServer server = new FakeWindmillServer();
+    server.addWorkToOffer(makeInput(0, 0, "key"));
+
+    StreamingDataflowWorker worker = new StreamingDataflowWorker(
+        Arrays.asList(defaultMapTask(instructions)), server, createTestingPipelineOptions());
+    worker.start();
+
+    server.waitForEmptyWorkQueue();
+
+    server.addWorkToOffer(makeInput(1, 0, "key"));
+
+    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
+
+    assertEquals(makeExpectedOutput(1, 0, "key", "key").build(), stripCounters(result.get(1L)));
+    assertEquals(1, result.size());
+  }
+
   static class ChangeKeysFn extends DoFn<KV<String, String>, KV<String, String>> {
     private static final long serialVersionUID = 0;
 

From 28a570491e100aef05910070dc0b4dcb9553b5ed Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Sun, 26 Jul 2015 17:23:30 -0700
Subject: [PATCH 0941/1541] Adds package-info for sdk.io.range

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100721988
---
 .../dataflow/sdk/io/range/package-info.java   | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/package-info.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/package-info.java
new file mode 100644
index 0000000000000..beb77bf0add1a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/package-info.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/**
+ * Provides thread-safe helpers for implementing dynamic work rebalancing in position-based
+ * bounded sources.
+ *
+ * <p>See {@link com.google.cloud.dataflow.sdk.io.range.RangeTracker} to get started.
+ */
+package com.google.cloud.dataflow.sdk.io.range;

From 6eef6e50e51931b01075539a26631840af8f2e64 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 17 Aug 2015 09:28:39 -0700
Subject: [PATCH 0942/1541] Fix javadoc formatting with respect to <p> tags

Also some other minor javadoc fixes.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100830769
---
 .../dataflow/examples/DebuggingWordCount.java |  18 +--
 .../dataflow/examples/MinimalWordCount.java   |  10 +-
 .../dataflow/examples/WindowedWordCount.java  |  22 ++--
 .../cloud/dataflow/examples/WordCount.java    |  20 +--
 .../examples/common/DataflowExampleUtils.java |   8 +-
 .../examples/common/PubsubFileInjector.java   |   2 +-
 .../examples/complete/AutoComplete.java       |  14 +-
 .../complete/StreamingWordExtract.java        |  10 +-
 .../dataflow/examples/complete/TfIdf.java     |  12 +-
 .../complete/TopWikipediaSessions.java        |  12 +-
 .../examples/complete/TrafficMaxLaneFlow.java |  14 +-
 .../examples/complete/TrafficRoutes.java      |  12 +-
 .../examples/cookbook/BigQueryTornadoes.java  |  18 +--
 .../cookbook/CombinePerKeyExamples.java       |  14 +-
 .../examples/cookbook/DatastoreWordCount.java |  14 +-
 .../examples/cookbook/DeDupExample.java       |  12 +-
 .../examples/cookbook/FilterExamples.java     |  18 +--
 .../examples/cookbook/JoinExamples.java       |  10 +-
 .../examples/cookbook/MaxPerKeyExamples.java  |  14 +-
 .../google/cloud/dataflow/sdk/Pipeline.java   |  24 ++--
 .../dataflow/sdk/coders/AtomicCoder.java      |   2 +-
 .../cloud/dataflow/sdk/coders/AvroCoder.java  |  22 ++--
 .../dataflow/sdk/coders/ByteArrayCoder.java   |   2 +-
 .../cloud/dataflow/sdk/coders/Coder.java      |  14 +-
 .../dataflow/sdk/coders/CoderFactories.java   |  12 +-
 .../dataflow/sdk/coders/CoderRegistry.java    |  26 ++--
 .../dataflow/sdk/coders/DefaultCoder.java     |   8 +-
 .../dataflow/sdk/coders/DelegateCoder.java    |   2 +-
 .../cloud/dataflow/sdk/coders/MapCoder.java   |   2 +-
 .../sdk/coders/SerializableCoder.java         |   4 +-
 .../cloud/dataflow/sdk/coders/SetCoder.java   |   2 +-
 .../sdk/coders/StringDelegateCoder.java       |   8 +-
 .../dataflow/sdk/coders/package-info.java     |   6 +-
 .../google/cloud/dataflow/sdk/io/AvroIO.java  |  40 +++---
 .../cloud/dataflow/sdk/io/BigQueryIO.java     |  98 +++++++-------
 .../io/BoundedReadFromUnboundedSource.java    |   4 +-
 .../cloud/dataflow/sdk/io/BoundedSource.java  |  10 +-
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |  16 +--
 .../dataflow/sdk/io/FileBasedSource.java      |   6 +-
 .../cloud/dataflow/sdk/io/PubsubIO.java       |  26 ++--
 .../google/cloud/dataflow/sdk/io/Read.java    |  10 +-
 .../dataflow/sdk/io/ShardNameTemplate.java    |  16 +--
 .../google/cloud/dataflow/sdk/io/Source.java  |  39 +++---
 .../google/cloud/dataflow/sdk/io/TextIO.java  |  48 +++----
 .../dataflow/sdk/io/UnboundedSource.java      |  46 +++----
 .../google/cloud/dataflow/sdk/io/XmlSink.java |   6 +-
 .../cloud/dataflow/sdk/io/XmlSource.java      |  12 +-
 .../sdk/io/range/OffsetRangeTracker.java      |   2 +-
 .../sdk/options/ApplicationNameOptions.java   |   4 +-
 .../options/DataflowPipelineDebugOptions.java |   8 +-
 .../sdk/options/DataflowPipelineOptions.java  |  22 ++--
 .../DataflowPipelineWorkerPoolOptions.java    |  22 ++--
 .../options/DataflowWorkerHarnessOptions.java |   2 +-
 .../options/DataflowWorkerLoggingOptions.java |  22 ++--
 .../cloud/dataflow/sdk/options/Default.java   |   4 +-
 .../sdk/options/DefaultValueFactory.java      |   4 +-
 .../dataflow/sdk/options/GcpOptions.java      |  14 +-
 .../dataflow/sdk/options/PipelineOptions.java |  38 +++---
 .../sdk/options/PipelineOptionsFactory.java   |  92 ++++++-------
 .../sdk/options/PipelineOptionsRegistrar.java |   8 +-
 .../sdk/options/PipelineOptionsValidator.java |   4 +-
 .../sdk/options/ProxyInvocationHandler.java   |   8 +-
 .../dataflow/sdk/options/package-info.java    |   2 +-
 .../cloud/dataflow/sdk/package-info.java      |   4 +-
 .../BlockingDataflowPipelineRunner.java       |   6 +-
 .../sdk/runners/DataflowPipeline.java         |   2 +-
 .../sdk/runners/DataflowPipelineRunner.java   |   4 +-
 .../runners/DataflowPipelineTranslator.java   |   4 +-
 .../sdk/runners/DirectPipelineRunner.java     |  24 ++--
 .../dataflow/sdk/runners/PipelineRunner.java  |   2 +-
 .../sdk/runners/PipelineRunnerRegistrar.java  |   8 +-
 .../sdk/runners/RecordingPipelineVisitor.java |   2 +-
 .../sdk/runners/TransformHierarchy.java       |   2 +-
 .../sdk/runners/TransformTreeNode.java        |   8 +-
 .../BasicSerializableSourceFormat.java        |  10 +-
 .../worker/ApplianceShuffleReader.java        |   2 +-
 .../worker/ApplianceShuffleWriter.java        |   2 +-
 .../sdk/runners/worker/ConcatReader.java      |   2 +-
 .../sdk/runners/worker/DataflowWorker.java    |   2 +-
 .../runners/worker/DataflowWorkerHarness.java |  14 +-
 .../worker/GroupingShuffleRangeTracker.java   |   2 +-
 .../worker/LazyMultiReaderIterator.java       |   4 +-
 .../sdk/runners/worker/OrderedCode.java       |  29 ++--
 .../sdk/runners/worker/ReaderFactory.java     |   2 +-
 .../sdk/runners/worker/SinkFactory.java       |   2 +-
 .../worker/StreamingDataflowWorker.java       |   2 +-
 .../dataflow/sdk/runners/worker/TextSink.java |   2 +-
 .../logging/DataflowWorkerLoggingHandler.java |   5 +-
 .../dataflow/sdk/testing/CoderProperties.java |   6 +-
 .../dataflow/sdk/testing/DataflowAssert.java  |  30 ++---
 .../dataflow/sdk/testing/TestPipeline.java    |   8 +-
 .../sdk/testing/WindowFnTestUtils.java        |   2 +-
 .../dataflow/sdk/transforms/Aggregator.java   |   6 +-
 .../sdk/transforms/ApproximateQuantiles.java  |  44 +++----
 .../sdk/transforms/ApproximateUnique.java     |  18 +--
 .../dataflow/sdk/transforms/Combine.java      | 124 +++++++++---------
 .../cloud/dataflow/sdk/transforms/Count.java  |  10 +-
 .../cloud/dataflow/sdk/transforms/Create.java |  46 +++----
 .../cloud/dataflow/sdk/transforms/DoFn.java   |  56 ++++----
 .../dataflow/sdk/transforms/DoFnTester.java   |  22 ++--
 .../sdk/transforms/DoFnWithContext.java       |  42 +++---
 .../cloud/dataflow/sdk/transforms/Filter.java |  28 ++--
 .../dataflow/sdk/transforms/Flatten.java      |  10 +-
 .../dataflow/sdk/transforms/GroupByKey.java   |  20 +--
 .../IntraBundleParallelization.java           |  18 +--
 .../cloud/dataflow/sdk/transforms/Keys.java   |   6 +-
 .../cloud/dataflow/sdk/transforms/KvSwap.java |   4 +-
 .../cloud/dataflow/sdk/transforms/Max.java    |  14 +-
 .../cloud/dataflow/sdk/transforms/Mean.java   |   8 +-
 .../cloud/dataflow/sdk/transforms/Min.java    |  14 +-
 .../dataflow/sdk/transforms/PTransform.java   |  40 +++---
 .../cloud/dataflow/sdk/transforms/ParDo.java  | 114 ++++++++--------
 .../dataflow/sdk/transforms/Partition.java    |   6 +-
 .../sdk/transforms/RemoveDuplicates.java      |  12 +-
 .../cloud/dataflow/sdk/transforms/Sample.java |  10 +-
 .../cloud/dataflow/sdk/transforms/Sum.java    |   4 +-
 .../cloud/dataflow/sdk/transforms/Top.java    |  80 +++++------
 .../cloud/dataflow/sdk/transforms/Values.java |   6 +-
 .../cloud/dataflow/sdk/transforms/View.java   |  38 +++---
 .../dataflow/sdk/transforms/WithKeys.java     |   4 +-
 .../sdk/transforms/join/CoGbkResult.java      |  12 +-
 .../sdk/transforms/join/CoGroupByKey.java     |   2 +-
 .../dataflow/sdk/transforms/package-info.java |   2 +-
 .../sdk/transforms/windowing/AfterEach.java   |   4 +-
 .../transforms/windowing/AfterWatermark.java  |   2 +-
 .../transforms/windowing/BoundedWindow.java   |   2 +-
 .../transforms/windowing/CalendarWindows.java |  26 ++--
 .../transforms/windowing/FixedWindows.java    |   2 +-
 .../sdk/transforms/windowing/PaneInfo.java    |   6 +-
 .../sdk/transforms/windowing/Sessions.java    |   2 +-
 .../transforms/windowing/SlidingWindows.java  |   6 +-
 .../sdk/transforms/windowing/TimeTrigger.java |   2 +-
 .../sdk/transforms/windowing/Trigger.java     |  18 +--
 .../sdk/transforms/windowing/Window.java      |  62 ++++-----
 .../sdk/transforms/windowing/WindowFn.java    |  16 +--
 .../transforms/windowing/package-info.java    |  10 +-
 .../sdk/util/AppEngineEnvironment.java        |   4 +-
 ...temptAndTimeBoundedExponentialBackOff.java |   4 +-
 .../AttemptBoundedExponentialBackOff.java     |  29 ++--
 .../sdk/util/BaseExecutionContext.java        |   4 +-
 .../sdk/util/BatchModeExecutionContext.java   |   2 +-
 .../sdk/util/BigQueryTableInserter.java       |  16 +--
 .../sdk/util/BigQueryTableRowIterator.java    |   2 +-
 .../cloud/dataflow/sdk/util/CloudObject.java  |   8 +-
 .../cloud/dataflow/sdk/util/CoderUtils.java   |   9 +-
 .../dataflow/sdk/util/CounterAggregator.java  |   2 +-
 .../cloud/dataflow/sdk/util/Credentials.java  |   8 +-
 .../cloud/dataflow/sdk/util/GcsUtil.java      |   8 +-
 .../dataflow/sdk/util/IOChannelFactory.java   |  24 ++--
 .../dataflow/sdk/util/IOChannelUtils.java     |  28 ++--
 .../dataflow/sdk/util/InstanceBuilder.java    |  46 +++----
 .../IntervalBoundedExponentialBackOff.java    |  37 +++---
 .../sdk/util/MergingActiveWindowSet.java      |   2 +-
 .../dataflow/sdk/util/PCollectionViews.java   |   8 +-
 .../cloud/dataflow/sdk/util/PTuple.java       |  10 +-
 .../cloud/dataflow/sdk/util/PackageUtil.java  |  12 +-
 .../cloud/dataflow/sdk/util/ReduceFn.java     |   6 +-
 .../dataflow/sdk/util/ReduceFnRunner.java     |   2 +-
 .../cloud/dataflow/sdk/util/Reshuffle.java    |   2 +-
 .../dataflow/sdk/util/SerializableUtils.java  |   6 +-
 .../cloud/dataflow/sdk/util/Serializer.java   |  12 +-
 .../sdk/util/ShardingWritableByteChannel.java |   8 +-
 .../cloud/dataflow/sdk/util/StateFetcher.java |   2 +-
 .../util/StreamingModeExecutionContext.java   |   2 +-
 .../cloud/dataflow/sdk/util/StringUtils.java  |  16 +--
 .../cloud/dataflow/sdk/util/TimeUtil.java     |  12 +-
 .../dataflow/sdk/util/TimerInternals.java     |   4 +-
 .../dataflow/sdk/util/TriggerRunner.java      |   4 +-
 .../cloud/dataflow/sdk/util/VarInt.java       |   2 +-
 .../dataflow/sdk/util/WatermarkHold.java      |   2 +-
 .../dataflow/sdk/util/common/CounterSet.java  |   6 +-
 .../sdk/util/common/TaggedReiteratorList.java |   2 +-
 .../sdk/util/common/worker/Operation.java     |  16 +--
 .../worker/PartialGroupByKeyOperation.java    |   4 +-
 .../sdk/util/common/worker/ReadOperation.java |  12 +-
 .../sdk/util/common/worker/Reader.java        |  28 ++--
 .../dataflow/sdk/util/common/worker/Sink.java |   2 +-
 .../sdk/util/common/worker/SourceFormat.java  |   6 +-
 .../sdk/util/common/worker/StateSampler.java  |   2 +-
 .../sdk/util/common/worker/WorkExecutor.java  |   4 +-
 .../dataflow/sdk/util/gcsfs/GcsPath.java      |  22 ++--
 .../sdk/util/state/StateInternals.java        |   6 +-
 .../sdk/util/state/StateNamespace.java        |   6 +-
 .../dataflow/sdk/util/state/StateTag.java     |   4 +-
 .../dataflow/sdk/util/state/StateTags.java    |   2 +-
 .../util/state/WatermarkStateInternal.java    |   2 +-
 .../google/cloud/dataflow/sdk/values/KV.java  |   8 +-
 .../cloud/dataflow/sdk/values/PBegin.java     |  11 +-
 .../dataflow/sdk/values/PCollection.java      |  28 ++--
 .../dataflow/sdk/values/PCollectionList.java  |  16 +--
 .../dataflow/sdk/values/PCollectionTuple.java |  12 +-
 .../cloud/dataflow/sdk/values/PInput.java     |  10 +-
 .../cloud/dataflow/sdk/values/POutput.java    |  14 +-
 .../dataflow/sdk/values/POutputValueBase.java |   8 +-
 .../cloud/dataflow/sdk/values/PValue.java     |   2 +-
 .../cloud/dataflow/sdk/values/PValueBase.java |  10 +-
 .../dataflow/sdk/values/TimestampedValue.java |   2 +-
 .../cloud/dataflow/sdk/values/TupleTag.java   |  12 +-
 .../dataflow/sdk/values/TupleTagList.java     |   8 +-
 .../dataflow/sdk/values/TypeDescriptor.java   |   2 +-
 .../dataflow/sdk/values/package-info.java     |   6 +-
 .../sdk/coders/PrintBase64Encodings.java      |   4 +-
 .../dataflow/sdk/io/DatastoreIOTest.java      |   4 +-
 .../dataflow/sdk/io/SourceTestUtils.java      |   2 +-
 .../BlockingDataflowPipelineRunnerTest.java   |   2 +-
 .../DataflowPipelineTranslatorTest.java       |   2 +-
 .../sdk/runners/TransformTreeTest.java        |   2 +-
 .../sdk/runners/dataflow/CountingSource.java  |   2 +-
 .../sdk/testing/DataflowJUnitTestRunner.java  |  10 +-
 .../dataflow/sdk/transforms/CombineTest.java  |   2 +-
 210 files changed, 1414 insertions(+), 1423 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
index 023666bd3d950..67258243cf073 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
@@ -39,16 +39,16 @@
 /**
  * An example that verifies word counts in Shakespeare and includes Dataflow best practices.
  *
- * <p> This class, {@link DebuggingWordCount}, is the third in a series of four successively more
+ * <p>This class, {@link DebuggingWordCount}, is the third in a series of four successively more
  * detailed 'word count' examples. You may first want to take a look at {@link MinimalWordCount}
  * and {@link WordCount}. After you've looked at this example, then see the
  * {@link WindowedWordCount} pipeline, for introduction of additional concepts.
  *
- * <p> Basic concepts, also in the MinimalWordCount and WordCount examples:
+ * <p>Basic concepts, also in the MinimalWordCount and WordCount examples:
  * Reading text files; counting a PCollection; executing a Pipeline both locally
  * and using the Dataflow service; defining DoFns.
  *
- * <p> New Concepts:
+ * <p>New Concepts:
  * <pre>
  *   1. Logging to Cloud Logging
  *   2. Controlling Dataflow worker log levels
@@ -56,13 +56,13 @@
  *   4. Testing your Pipeline via DataflowAssert
  * </pre>
  *
- * <p> To execute this pipeline locally, specify general pipeline configuration:
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
  * }
  * </pre>
  *
- * <p> To execute this pipeline using the Dataflow service and the additional logging discussed
+ * <p>To execute this pipeline using the Dataflow service and the additional logging discussed
  * below, specify pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
@@ -72,7 +72,7 @@
  * }
  * </pre>
  *
- * <p> Note that when you run via <code>mvn exec</code>, you may need to escape
+ * <p>Note that when you run via <code>mvn exec</code>, you may need to escape
  * the quotations as appropriate for your shell. For example, in <code>bash</code>:
  * <pre>
  * mvn compile exec:java ... \
@@ -80,7 +80,7 @@
  *     --workerLogLevelOverrides={\\\"com.google.cloud.dataflow.examples\\\":\\\"DEBUG\\\"}"
  * </pre>
  *
- * <p> Concept #2: Dataflow workers which execute user code are configured to log to Cloud
+ * <p>Concept #2: Dataflow workers which execute user code are configured to log to Cloud
  * Logging by default at "INFO" log level and higher. One may override log levels for specific
  * logging namespaces by specifying:
  * <pre><code>
@@ -100,7 +100,7 @@
  * that changing the default worker log level to TRACE or DEBUG will significantly increase
  * the amount of logs output.
  *
- * <p> The input file defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt} and can be
+ * <p>The input file defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt} and can be
  * overridden with {@code --inputFile}.
  */
 public class DebuggingWordCount {
@@ -167,7 +167,7 @@ public static void main(String[] args) {
      * to validate the contents of PCollections. DataflowAssert is best used in unit tests
      * with small data sets but is demonstrated here as a teaching tool.
      *
-     * <p> Below we verify that the set of filtered words matches our expected counts. Note
+     * <p>Below we verify that the set of filtered words matches our expected counts. Note
      * that DataflowAssert does not provide any output and that successful completion of the
      * Pipeline implies that the expectations were met. Learn more at
      * https://cloud.google.com/dataflow/pipelines/testing-your-pipeline on how to test
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
index d31ca8113b1c7..bad18c9c5803e 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
@@ -30,16 +30,16 @@
 /**
  * An example that counts words in Shakespeare.
  *
- * <p> This class, {@link MinimalWordCount}, is the first in a series of four successively more
+ * <p>This class, {@link MinimalWordCount}, is the first in a series of four successively more
  * detailed 'word count' examples. Here, for simplicity, we don't show any error-checking or
  * argument processing, and focus on construction of the pipeline, which chains together the
  * application of core transforms.
  *
- * <p> Next, see the {@link WordCount} pipeline, then the {@link DebuggingWordCount}, and finally
+ * <p>Next, see the {@link WordCount} pipeline, then the {@link DebuggingWordCount}, and finally
  * the {@link WindowedWordCount} pipeline, for more detailed examples that introduce additional
  * concepts.
  *
- * <p> Concepts:
+ * <p>Concepts:
  * <pre>
  *   1. Reading data from text files
  *   2. Specifying 'inline' transforms
@@ -47,10 +47,10 @@
  *   4. Writing data to Cloud Storage as text files
  * </pre>
  *
- * <p> To execute this pipeline, first edit the code to set your project ID, the staging
+ * <p>To execute this pipeline, first edit the code to set your project ID, the staging
  * location, and the output location. The specified GCS bucket(s) must already exist.
  *
- * <p> Then, run the pipeline as described in the README. It will be deployed and run using the
+ * <p>Then, run the pipeline as described in the README. It will be deployed and run using the
  * Dataflow service. No args are required to run the pipeline. You can see the results in your
  * output bucket in the GCS browser.
  */
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
index 100c164ca3fa8..f755516c862aa 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
@@ -50,16 +50,16 @@
  * An example that counts words in text, and can run over either unbounded or bounded input
  * collections.
  *
- * <p> This class, {@link WindowedWordCount}, is the last in a series of four successively more
+ * <p>This class, {@link WindowedWordCount}, is the last in a series of four successively more
  * detailed 'word count' examples. First take a look at {@link MinimalWordCount},
  * {@link WordCount}, and {@link DebuggingWordCount}.
  *
- * <p> Basic concepts, also in the MinimalWordCount, WordCount, and DebuggingWordCount examples:
+ * <p>Basic concepts, also in the MinimalWordCount, WordCount, and DebuggingWordCount examples:
  * Reading text files; counting a PCollection; writing to GCS; executing a Pipeline both locally
  * and using the Dataflow service; defining DoFns; creating a custom aggregator;
  * user-defined PTransforms; defining PipelineOptions.
  *
- * <p> New Concepts:
+ * <p>New Concepts:
  * <pre>
  *   1. Unbounded and bounded pipeline input modes
  *   2. Adding timestamps to data
@@ -69,13 +69,13 @@
  *   6. Writing to BigQuery
  * </pre>
  *
- * <p> To execute this pipeline locally, specify general pipeline configuration:
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
  * }
  * </pre>
  *
- * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
  *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
@@ -83,16 +83,16 @@
  * }
  * </pre>
  *
- * <p> Optionally specify the input file path via:
+ * <p>Optionally specify the input file path via:
  * {@code --inputFile=gs://INPUT_PATH},
  * which defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt}.
  *
- * <p> Specify an output BigQuery dataset and optionally, a table for the output. If you don't
+ * <p>Specify an output BigQuery dataset and optionally, a table for the output. If you don't
  * specify the table, one will be created for you using the job name. If you don't specify the
  * dataset, a dataset called {@code dataflow-examples} must already exist in your project.
  * {@code --bigQueryDataset=YOUR-DATASET --bigQueryTable=YOUR-NEW-TABLE-NAME}.
  *
- * <p> Decide whether you want your pipeline to run with 'bounded' (such as files in GCS) or
+ * <p>Decide whether you want your pipeline to run with 'bounded' (such as files in GCS) or
  * 'unbounded' input (such as a PubSub topic). To run with unbounded input, set
  * {@code --unbounded=true}. Then, optionally specify the Google Cloud PubSub topic to read from
  * via {@code --pubsubTopic=projects/PROJECT_ID/topics/YOUR_TOPIC_NAME}. If the topic does not
@@ -102,7 +102,7 @@
  * If you want to use an independently-populated PubSub topic, indicate this by setting
  * {@code --inputFile=""}. In that case, the auxiliary pipeline will not be started.
  *
- * <p> By default, the pipeline will do fixed windowing, on 1-minute windows.  You can
+ * <p>By default, the pipeline will do fixed windowing, on 1-minute windows.  You can
  * change this interval by setting the {@code --windowSize} parameter, e.g. {@code --windowSize=10}
  * for 10-minute windows.
  */
@@ -114,7 +114,7 @@ public class WindowedWordCount {
    * Concept #2: A DoFn that sets the data element timestamp. This is a silly method, just for
    * this example, for the bounded data case.
    *
-   * <p> Imagine that many ghosts of Shakespeare are all typing madly at the same time to recreate
+   * <p>Imagine that many ghosts of Shakespeare are all typing madly at the same time to recreate
    * his masterworks. Each line of the corpus will get a random associated timestamp somewhere in a
    * 2-hour period.
    */
@@ -177,7 +177,7 @@ private static TableReference getTableReference(Options options) {
   /**
    * Options supported by {@link WindowedWordCount}.
    *
-   * <p> Inherits standard example configuration options, which allow specification of the BigQuery
+   * <p>Inherits standard example configuration options, which allow specification of the BigQuery
    * table and the PubSub topic, as well as the {@link WordCount.WordCountOptions} support for
    * specification of the input file.
    */
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
index 9255cdf800382..a605db4e57df6 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
@@ -38,20 +38,20 @@
 /**
  * An example that counts words in Shakespeare and includes Dataflow best practices.
  *
- * <p> This class, {@link WordCount}, is the second in a series of four successively more detailed
+ * <p>This class, {@link WordCount}, is the second in a series of four successively more detailed
  * 'word count' examples. You may first want to take a look at {@link MinimalWordCount}.
  * After you've looked at this example, then see the {@link DebuggingWordCount}
  * pipeline, for introduction of additional concepts.
  *
- * <p> For a detailed walkthrough of this example, see
+ * <p>For a detailed walkthrough of this example, see
  *   <a href="https://cloud.google.com/dataflow/java-sdk/wordcount-example">
  *   https://cloud.google.com/dataflow/java-sdk/wordcount-example
  *   </a>
  *
- * <p> Basic concepts, also in the MinimalWordCount example:
+ * <p>Basic concepts, also in the MinimalWordCount example:
  * Reading text files; counting a PCollection; writing to GCS.
  *
- * <p> New Concepts:
+ * <p>New Concepts:
  * <pre>
  *   1. Executing a Pipeline both locally and using the Dataflow service
  *   2. Using ParDo with static DoFns defined out-of-line
@@ -59,7 +59,7 @@
  *   4. Defining your own pipeline options
  * </pre>
  *
- * <p> Concept #1: you can execute this pipeline either locally or using the Dataflow service.
+ * <p>Concept #1: you can execute this pipeline either locally or using the Dataflow service.
  * These are now command-line options and not hard-coded as they were in the MinimalWordCount
  * example.
  * To execute this pipeline locally, specify general pipeline configuration:
@@ -72,7 +72,7 @@
  *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
  * }</pre>
  *
- * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
  *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
@@ -84,7 +84,7 @@
  *   --output=gs://YOUR_OUTPUT_PREFIX
  * }</pre>
  *
- * <p> The input file defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt} and can be
+ * <p>The input file defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt} and can be
  * overridden with {@code --inputFile}.
  */
 public class WordCount {
@@ -132,7 +132,7 @@ public void processElement(ProcessContext c) {
    * A PTransform that converts a PCollection containing lines of text into a PCollection of
    * formatted word counts.
    *
-   * <p> Concept #3: This is a custom composite transform that bundles two transforms (ParDo and
+   * <p>Concept #3: This is a custom composite transform that bundles two transforms (ParDo and
    * Count) as a reusable PTransform subclass. Using composite transforms allows for easy reuse,
    * modular testing, and an improved monitoring experience.
    */
@@ -158,11 +158,11 @@ public PCollection<KV<String, Long>> apply(PCollection<String> lines) {
   /**
    * Options supported by {@link WordCount}.
    *
-   * <p> Concept #4: Defining your own configuration options. Here, you can add your own arguments
+   * <p>Concept #4: Defining your own configuration options. Here, you can add your own arguments
    * to be processed by the command-line parser, and specify default values for them. You can then
    * access the options values in your pipeline code.
    *
-   * <p> Inherits standard configuration options.
+   * <p>Inherits standard configuration options.
    */
   public static interface WordCountOptions extends PipelineOptions {
     @Description("Path of the file to read from")
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
index f790d5e08c62c..73d44eb25d3e4 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
@@ -53,7 +53,7 @@
  * The utility class that sets up and tears down external resources, starts the Google Cloud Pub/Sub
  * injector, and cancels the streaming and the injector pipelines once the program terminates.
  *
- * <p> It is used to run Dataflow examples, such as TrafficMaxLaneFlow and TrafficRoutes.
+ * <p>It is used to run Dataflow examples, such as TrafficMaxLaneFlow and TrafficRoutes.
  */
 public class DataflowExampleUtils {
 
@@ -109,7 +109,7 @@ public void setupResourcesAndRunner(boolean isUnbounded) throws IOException {
   /**
    * Sets up the Google Cloud Pub/Sub topic.
    *
-   * <p> If the topic doesn't exist, a new topic with the given name will be created.
+   * <p>If the topic doesn't exist, a new topic with the given name will be created.
    *
    * @throws IOException if there is a problem setting up the Pub/Sub topic
    */
@@ -126,7 +126,7 @@ public void setupPubsubTopic() throws IOException {
   /**
    * Sets up the BigQuery table with the given schema.
    *
-   * <p> If the table already exists, the schema has to match the given one. Otherwise, the example
+   * <p>If the table already exists, the schema has to match the given one. Otherwise, the example
    * will throw a RuntimeException. If the table doesn't exist, a new table with the given schema
    * will be created.
    *
@@ -266,7 +266,7 @@ public void setupRunner() {
   /**
    * Runs the batch injector for the streaming pipeline.
    *
-   * <p> The injector pipeline will read from the given text file, and inject data
+   * <p>The injector pipeline will read from the given text file, and inject data
    * into the Google Cloud Pub/Sub topic.
    */
   public void runInjectorPipeline(String inputFile, String topic) {
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java
index ddbcf4a615ae6..97a535da1442c 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java
@@ -38,7 +38,7 @@
  * A batch Dataflow pipeline for injecting a set of GCS files into
  * a PubSub topic line by line. Empty lines are skipped.
  *
- * <p> This is useful for testing streaming
+ * <p>This is useful for testing streaming
  * pipelines. Note that since batch pipelines might retry chunks, this
  * does _not_ guarantee exactly-once injection of file data. Some lines may
  * be published multiple times.
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
index c3b12a5a7e5ca..f7e0f9e188566 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
@@ -71,10 +71,10 @@
  * An example that computes the most popular hash tags
  * for every prefix, which can be used for auto-completion.
  *
- * <p> Concepts: Using the same pipeline in both streaming and batch, combiners,
- *               composite transforms.
+ * <p>Concepts: Using the same pipeline in both streaming and batch, combiners,
+ *              composite transforms.
  *
- * <p> To execute this pipeline using the Dataflow service in batch mode,
+ * <p>To execute this pipeline using the Dataflow service in batch mode,
  * specify pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
@@ -83,7 +83,7 @@
  *   --inputFile=gs://path/to/input*.txt
  * }</pre>
  *
- * <p> To execute this pipeline using the Dataflow service in streaming mode,
+ * <p>To execute this pipeline using the Dataflow service in streaming mode,
  * specify pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
@@ -93,7 +93,7 @@
  *   --streaming
  * }</pre>
  *
- * <p> This will update the datastore every 10 seconds based on the last
+ * <p>This will update the datastore every 10 seconds based on the last
  * 30 minutes of data received.
  */
 public class AutoComplete {
@@ -188,7 +188,7 @@ public Integer apply(String input) {
   /**
    * Cheaper but higher latency.
    *
-   * <p> Returns two PCollections, the first is top prefixes of size greater
+   * <p>Returns two PCollections, the first is top prefixes of size greater
    * than minPrefix, and the second is top prefixes of size exactly
    * minPrefix.
    */
@@ -434,7 +434,7 @@ public void processElement(ProcessContext c) {
   /**
    * Options supported by this class.
    *
-   * <p> Inherits standard Dataflow configuration options.
+   * <p>Inherits standard Dataflow configuration options.
    */
   private static interface Options extends ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
     @Description("Input text file")
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java
index 21b011aea1c56..879119caa43c6 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java
@@ -39,24 +39,24 @@
 /**
  * A streaming Dataflow Example using BigQuery output.
  *
- * <p> This pipeline example reads lines of text from a PubSub topic, splits each line
+ * <p>This pipeline example reads lines of text from a PubSub topic, splits each line
  * into individual words, capitalizes those words, and writes the output to
  * a BigQuery table.
  *
- * <p> By default, the example will run a separate pipeline to inject the data from the default
+ * <p>By default, the example will run a separate pipeline to inject the data from the default
  * {@literal --inputFile} to the Pub/Sub {@literal --pubsubTopic}. It will make it available for
  * the streaming pipeline to process. You may override the default {@literal --inputFile} with the
  * file of your choosing. You may also set {@literal --inputFile} to an empty string, which will
  * disable the automatic Pub/Sub injection, and allow you to use separate tool to control the input
  * to this example.
  *
- * <p> The example is configured to use the default Pub/Sub topic and the default BigQuery table
+ * <p>The example is configured to use the default Pub/Sub topic and the default BigQuery table
  * from the example common package (there are no defaults for a general Dataflow pipeline).
  * You can override them by using the {@literal --pubsubTopic}, {@literal --bigQueryDataset}, and
  * {@literal --bigQueryTable} options. If the Pub/Sub topic or the BigQuery table do not exist,
  * the example will try to create them.
  *
- * <p> The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
+ * <p>The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
  * and then exits.
  */
 public class StreamingWordExtract {
@@ -115,7 +115,7 @@ static TableSchema getSchema() {
   /**
    * Options supported by {@link StreamingWordExtract}.
    *
-   * <p> Inherits standard configuration options.
+   * <p>Inherits standard configuration options.
    */
   private interface StreamingWordExtractOptions
       extends ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java
index ca96b6aa17f51..fc10de55f7a95 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java
@@ -64,9 +64,9 @@
 /**
  * An example that computes a basic TF-IDF search table for a directory or GCS prefix.
  *
- * <p> Concepts: joining data; side inputs; logging
+ * <p>Concepts: joining data; side inputs; logging
  *
- * <p> To execute this pipeline locally, specify general pipeline configuration:
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
  * }</pre>
@@ -75,7 +75,7 @@
  *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
  * }</pre>
  *
- * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
  *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
@@ -84,14 +84,14 @@
  *   --output=gs://YOUR_OUTPUT_PREFIX
  * }</pre>
  *
- * <p> The default input is {@code gs://dataflow-samples/shakespeare/} and can be overridden with
+ * <p>The default input is {@code gs://dataflow-samples/shakespeare/} and can be overridden with
  * {@code --input}.
  */
 public class TfIdf {
   /**
    * Options supported by {@link TfIdf}.
-   * <p>
-   * Inherits standard configuration options.
+   *
+   * <p>Inherits standard configuration options.
    */
   private static interface Options extends PipelineOptions {
     @Description("Path to the directory or GCS prefix containing files to read from")
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java
index eef9bf1cab5be..890f0fc2d1dd9 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java
@@ -49,12 +49,12 @@
  * An example that reads Wikipedia edit data from Cloud Storage and computes the user with
  * the longest string of edits separated by no more than an hour within each month.
  *
- * <p> Concepts: Using Windowing to perform time-based aggregations of data.
+ * <p>Concepts: Using Windowing to perform time-based aggregations of data.
  *
- * <p> It is not recommended to execute this pipeline locally, given the size of the default input
+ * <p>It is not recommended to execute this pipeline locally, given the size of the default input
  * data.
  *
- * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
  *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
@@ -66,10 +66,10 @@
  *   --output=gs://YOUR_OUTPUT_PREFIX
  * }</pre>
  *
- * <p> The default input is {@code gs://dataflow-samples/wikipedia_edits/*.json} and can be
+ * <p>The default input is {@code gs://dataflow-samples/wikipedia_edits/*.json} and can be
  * overridden with {@code --input}.
  *
- * <p> The input for this example is large enough that it's a good place to enable (experimental)
+ * <p>The input for this example is large enough that it's a good place to enable (experimental)
  * autoscaling:
  * <pre>{@code
  *   --autoscalingAlgorithm=BASIC
@@ -203,7 +203,7 @@ public void processElement(ProcessContext c) {
   /**
    * Options supported by this class.
    *
-   * <p> Inherits standard Dataflow configuration options.
+   * <p>Inherits standard Dataflow configuration options.
    */
   private static interface Options extends PipelineOptions {
     @Description(
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
index 6cda9ceeea8ed..f185dc8dcf79a 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
@@ -62,13 +62,13 @@
  * <p>Concepts: The batch and streaming runners, sliding windows, Google Cloud Pub/Sub
  * topic injection, use of the AvroCoder to encode a custom class, and custom Combine transforms.
  *
- * <p> This example analyzes traffic sensor data using SlidingWindows. For each window,
+ * <p>This example analyzes traffic sensor data using SlidingWindows. For each window,
  * it finds the lane that had the highest flow recorded, for each sensor station. It writes
  * those max values along with auxiliary info to a BigQuery table.
  *
- * <p> In batch mode, the pipeline reads traffic sensor data from {@literal --inputFile}.
+ * <p>In batch mode, the pipeline reads traffic sensor data from {@literal --inputFile}.
  *
- * <p> In streaming mode, the pipeline reads the data from a Pub/Sub topic.
+ * <p>In streaming mode, the pipeline reads the data from a Pub/Sub topic.
  * By default, the example will run a separate pipeline to inject the data from the default
  * {@literal --inputFile} to the Pub/Sub {@literal --pubsubTopic}. It will make it available for
  * the streaming pipeline to process. You may override the default {@literal --inputFile} with the
@@ -78,13 +78,13 @@
  * is provided in
  * <a href="https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher"></a>.
  *
- * <p> The example is configured to use the default Pub/Sub topic and the default BigQuery table
+ * <p>The example is configured to use the default Pub/Sub topic and the default BigQuery table
  * from the example common package (there are no defaults for a general Dataflow pipeline).
  * You can override them by using the {@literal --pubsubTopic}, {@literal --bigQueryDataset}, and
  * {@literal --bigQueryTable} options. If the Pub/Sub topic or the BigQuery table do not exist,
  * the example will try to create them.
  *
- * <p> The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
+ * <p>The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
  * and then exits.
  */
 public class TrafficMaxLaneFlow {
@@ -300,8 +300,8 @@ public PCollection<TableRow> apply(PCollection<KV<String, LaneInfo>> flowInfo) {
 
   /**
     * Options supported by {@link TrafficMaxLaneFlow}.
-    * <p>
-    * Inherits standard configuration options.
+    *
+    * <p>Inherits standard configuration options.
     */
   private interface TrafficMaxLaneFlowOptions
       extends DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
index 0ee9c7487afe4..e8abb62bfd2b0 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
@@ -66,13 +66,13 @@
  * <p>Concepts: The batch and streaming runners, GroupByKey, sliding windows, and
  * Google Cloud Pub/Sub topic injection.
  *
- * <p> This example analyzes traffic sensor data using SlidingWindows. For each window,
+ * <p>This example analyzes traffic sensor data using SlidingWindows. For each window,
  * it calculates the average speed over the window for some small set of predefined 'routes',
  * and looks for 'slowdowns' in those routes. It writes its results to a BigQuery table.
  *
- * <p> In batch mode, the pipeline reads traffic sensor data from {@literal --inputFile}.
+ * <p>In batch mode, the pipeline reads traffic sensor data from {@literal --inputFile}.
  *
- * <p> In streaming mode, the pipeline reads the data from a Pub/Sub topic.
+ * <p>In streaming mode, the pipeline reads the data from a Pub/Sub topic.
  * By default, the example will run a separate pipeline to inject the data from the default
  * {@literal --inputFile} to the Pub/Sub {@literal --pubsubTopic}. It will make it available for
  * the streaming pipeline to process. You may override the default {@literal --inputFile} with the
@@ -82,13 +82,13 @@
  * is provided in
  * <a href="https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher"></a>.
  *
- * <p> The example is configured to use the default Pub/Sub topic and the default BigQuery table
+ * <p>The example is configured to use the default Pub/Sub topic and the default BigQuery table
  * from the example common package (there are no defaults for a general Dataflow pipeline).
  * You can override them by using the {@literal --pubsubTopic}, {@literal --bigQueryDataset}, and
  * {@literal --bigQueryTable} options. If the Pub/Sub topic or the BigQuery table do not exist,
  * the example will try to create them.
  *
- * <p> The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
+ * <p>The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
  * and then exits.
  */
 
@@ -314,7 +314,7 @@ public PCollection<TableRow> apply(PCollection<KV<String, StationSpeed>> station
   /**
   * Options supported by {@link TrafficRoutes}.
   *
-  * <p> Inherits standard configuration options.
+  * <p>Inherits standard configuration options.
   */
   private interface TrafficRoutesOptions
       extends DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java
index 095ad130730b8..deb9d559bff52 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java
@@ -40,12 +40,12 @@
  * An example that reads the public samples of weather data from BigQuery, counts the number of
  * tornadoes that occur in each month, and writes the results to BigQuery.
  *
- * <p> Concepts: Reading/writing BigQuery; counting a PCollection; user-defined PTransforms
+ * <p>Concepts: Reading/writing BigQuery; counting a PCollection; user-defined PTransforms
  *
- * <p> Note: Before running this example, you must create a BigQuery dataset to contain your output
+ * <p>Note: Before running this example, you must create a BigQuery dataset to contain your output
  * table.
  *
- * <p> To execute this pipeline locally, specify general pipeline configuration:
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
  * }
@@ -55,7 +55,7 @@
  *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
  * }</pre>
  *
- * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
  *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
@@ -67,7 +67,7 @@
  *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
  * }</pre>
  *
- * <p> The BigQuery input table defaults to {@code clouddataflow-readonly:samples.weather_stations}
+ * <p>The BigQuery input table defaults to {@code clouddataflow-readonly:samples.weather_stations}
  * and can be overridden with {@code --input}.
  */
 public class BigQueryTornadoes {
@@ -109,8 +109,8 @@ public void processElement(ProcessContext c) {
 
   /**
    * Takes rows from a table and generates a table of counts.
-   * <p>
-   * The input schema is described by
+   *
+   * <p>The input schema is described by
    * https://developers.google.com/bigquery/docs/dataset-gsod .
    * The output contains the total number of tornadoes found in each month in
    * the following schema:
@@ -144,8 +144,8 @@ public PCollection<TableRow> apply(PCollection<TableRow> rows) {
 
   /**
    * Options supported by {@link BigQueryTornadoes}.
-   * <p>
-   * Inherits standard configuration options.
+   *
+   * <p>Inherits standard configuration options.
    */
   private static interface Options extends PipelineOptions {
     @Description("Table to read from, specified as "
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java
index da975ff34b0e1..4a68955325c6f 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java
@@ -49,10 +49,10 @@
  * key-grouped Collection, and how to use an Aggregator to track information in the
  * Monitoring UI.
  *
- * <p> Note: Before running this example, you must create a BigQuery dataset to contain your output
+ * <p>Note: Before running this example, you must create a BigQuery dataset to contain your output
  * table.
  *
- * <p> To execute this pipeline locally, specify general pipeline configuration:
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
  * }
@@ -62,7 +62,7 @@
  *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
  * }</pre>
  *
- * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
  *   --stagingLocation=gs://<STAGING DIRECTORY>
@@ -74,7 +74,7 @@
  *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
  * }</pre>
  *
- * <p> The BigQuery input table defaults to {@code publicdata:samples.shakespeare} and can
+ * <p>The BigQuery input table defaults to {@code publicdata:samples.shakespeare} and can
  * be overridden with {@code --input}.
  */
 public class CombinePerKeyExamples {
@@ -132,7 +132,7 @@ public void processElement(ProcessContext c) {
    * in which that word appears. It does this via the Combine.perKey
    * transform, with the ConcatWords combine function.
    *
-   * <p> Combine.perKey is similar to a GroupByKey followed by a ParDo, but
+   * <p>Combine.perKey is similar to a GroupByKey followed by a ParDo, but
    * has more restricted semantics that allow it to be executed more
    * efficiently. These records are then formatted as BQ table rows.
    */
@@ -188,8 +188,8 @@ public String apply(Iterable<String> input) {
 
   /**
    * Options supported by {@link CombinePerKeyExamples}.
-   * <p>
-   * Inherits standard configuration options.
+   *
+   * <p>Inherits standard configuration options.
    */
   private static interface Options extends PipelineOptions {
     @Description("Table to read from, specified as "
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
index b9474fd84dd5e..a0c2b798bd258 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
@@ -41,27 +41,27 @@
 /**
  * A WordCount example using DatastoreIO.
  *
- * <p> This example shows how to use DatastoreIO to read from Datastore and
+ * <p>This example shows how to use DatastoreIO to read from Datastore and
  * write the results to Cloud Storage.  Note that this example will write
  * data to Datastore, which may incur charge for Datastore operations.
  *
- * <p> To run this example, users need to use gcloud to get credential for Datastore:
+ * <p>To run this example, users need to use gcloud to get credential for Datastore:
  * <pre>{@code
  * $ gcloud auth login
  * }</pre>
  *
- * <p> Note that the environment variable CLOUDSDK_EXTRA_SCOPES must be set
+ * <p>Note that the environment variable CLOUDSDK_EXTRA_SCOPES must be set
  * to the same value when executing a Datastore pipeline, as the local auth
  * cache is keyed by the requested scopes.
  *
- * <p> To run this pipeline locally, the following options must be provided:
+ * <p>To run this pipeline locally, the following options must be provided:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
  *   --dataset=YOUR_DATASET_ID
  *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PATH]
  * }</pre>
  *
- * <p> To run this example using Dataflow service, you must additionally
+ * <p>To run this example using Dataflow service, you must additionally
  * provide either {@literal --stagingLocation} or {@literal --tempLocation}, and
  * select one of the Dataflow pipeline runners, eg
  * {@literal --runner=BlockingDataflowPipelineRunner}.
@@ -117,8 +117,8 @@ public void processElement(ProcessContext c) {
 
   /**
    * Options supported by {@link DatastoreWordCount}.
-   * <p>
-   * Inherits standard configuration options.
+   *
+   * <p>Inherits standard configuration options.
    */
   public static interface Options extends PipelineOptions {
     @Description("Path of the file to read from and store to Datastore")
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java
index 8a62209c00496..9873561e5e72a 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java
@@ -31,31 +31,31 @@
  * This example uses as input Shakespeare's plays as plaintext files, and will remove any
  * duplicate lines across all the files. (The output does not preserve any input order).
  *
- * <p> Concepts: the RemoveDuplicates transform, and how to wire transforms together.
+ * <p>Concepts: the RemoveDuplicates transform, and how to wire transforms together.
  * Demonstrates {@link com.google.cloud.dataflow.sdk.io.TextIO.Read}/
  * {@link RemoveDuplicates}/{@link com.google.cloud.dataflow.sdk.io.TextIO.Write}.
  *
- * <p> To execute this pipeline locally, specify general pipeline configuration:
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
  *   --project=YOUR_PROJECT_ID
  * and a local output file or output prefix on GCS:
  *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
  *
- * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
  *   --project=YOUR_PROJECT_ID
  *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
  *   --runner=BlockingDataflowPipelineRunner
  * and an output prefix on GCS:
  *   --output=gs://YOUR_OUTPUT_PREFIX
  *
- * <p> The input defaults to {@code gs://dataflow-samples/shakespeare/*} and can be
+ * <p>The input defaults to {@code gs://dataflow-samples/shakespeare/*} and can be
  * overridden with {@code --input}.
  */
 public class DeDupExample {
 
   /**
    * Options supported by {@link DeDupExample}.
-   * <p>
-   * Inherits standard configuration options.
+   *
+   * <p>Inherits standard configuration options.
    */
   private static interface Options extends PipelineOptions {
     @Description("Path to the directory or GCS prefix containing files to read from")
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java
index df041e8c51874..bde934858fb8d 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java
@@ -43,18 +43,18 @@
  * transform. It shows how to dynamically set parameters by defining and using new pipeline options,
  * and how to use a value derived by the pipeline.
  *
- * <p> Concepts: The Mean transform; Options configuration; using pipeline-derived data as a side
+ * <p>Concepts: The Mean transform; Options configuration; using pipeline-derived data as a side
  * input; approaches to filtering, selection, and projection.
  *
- * <p> The example reads public samples of weather data from BigQuery. It performs a
+ * <p>The example reads public samples of weather data from BigQuery. It performs a
  * projection on the data, finds the global mean of the temperature readings, filters on readings
  * for a single given month, and then outputs only data (for that month) that has a mean temp
  * smaller than the derived global mean.
 *
- * <p> Note: Before running this example, you must create a BigQuery dataset to contain your output
+ * <p>Note: Before running this example, you must create a BigQuery dataset to contain your output
  * table.
  *
- * <p> To execute this pipeline locally, specify general pipeline configuration:
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
  * }
@@ -67,7 +67,7 @@
  * </pre>
  * where optional parameter {@code --monthFilter} is set to a number 1-12.
  *
- * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
  *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
@@ -82,7 +82,7 @@
  * </pre>
  * where optional parameter {@code --monthFilter} is set to a number 1-12.
  *
- * <p> The BigQuery input table defaults to {@code clouddataflow-readonly:samples.weather_stations}
+ * <p>The BigQuery input table defaults to {@code clouddataflow-readonly:samples.weather_stations}
  * and can be overridden with {@code --input}.
  */
 public class FilterExamples {
@@ -118,7 +118,7 @@ public void processElement(ProcessContext c){
   /**
    * Implements 'filter' functionality.
    *
-   * <p> Examines each row in the input table. Outputs only rows from the month
+   * <p>Examines each row in the input table. Outputs only rows from the month
    * monthFilter, which is passed in as a parameter during construction of this DoFn.
    */
   static class FilterSingleMonthDataFn extends DoFn<TableRow, TableRow> {
@@ -218,8 +218,8 @@ public void processElement(ProcessContext c) {
 
   /**
    * Options supported by {@link FilterExamples}.
-   * <p>
-   * Inherits standard configuration options.
+   *
+   * <p>Inherits standard configuration options.
    */
   private static interface Options extends PipelineOptions {
     @Description("Table to read from, specified as "
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java
index eb0b9423e5f7b..dc7d3f6488fac 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java
@@ -38,9 +38,9 @@
  * It uses a sample of the GDELT 'world event' data (http://goo.gl/OB6oin), joining the event
  * 'action' country code against a table that maps country codes to country names.
  *
- * <p> Concepts: Join operation; multiple input sources.
+ * <p>Concepts: Join operation; multiple input sources.
  *
- * <p> To execute this pipeline locally, specify general pipeline configuration:
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
  * }
@@ -50,7 +50,7 @@
  *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
  * }</pre>
  *
- * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
  *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
@@ -168,8 +168,8 @@ public void processElement(ProcessContext c) {
 
   /**
    * Options supported by {@link JoinExamples}.
-   * <p>
-   * Inherits standard configuration options.
+   *
+   * <p>Inherits standard configuration options.
    */
   private static interface Options extends PipelineOptions {
     @Description("Path of the file to write to")
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java
index d8b5fa6570cc6..739679aed5e1a 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java
@@ -40,13 +40,13 @@
  * An example that reads the public samples of weather data from BigQuery, and finds
  * the maximum temperature ('mean_temp') for each month.
  *
- * <p> Concepts: The 'Max' statistical combination function, and how to find the max per
+ * <p>Concepts: The 'Max' statistical combination function, and how to find the max per
  * key group.
  *
- * <p> Note: Before running this example, you must create a BigQuery dataset to contain your output
+ * <p>Note: Before running this example, you must create a BigQuery dataset to contain your output
  * table.
  *
- * <p> To execute this pipeline locally, specify general pipeline configuration:
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
  * }
@@ -56,7 +56,7 @@
  *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
  * }</pre>
  *
- * <p> To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
  *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
@@ -68,7 +68,7 @@
  *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
  * }</pre>
  *
- * <p> The BigQuery input table defaults to {@code clouddataflow-readonly:samples.weather_stations }
+ * <p>The BigQuery input table defaults to {@code clouddataflow-readonly:samples.weather_stations }
  * and can be overridden with {@code --input}.
  */
 public class MaxPerKeyExamples {
@@ -137,8 +137,8 @@ public PCollection<TableRow> apply(PCollection<TableRow> rows) {
 
   /**
    * Options supported by {@link MaxPerKeyExamples}.
-   * <p>
-   * Inherits standard configuration options.
+   *
+   * <p>Inherits standard configuration options.
    */
   private static interface Options extends PipelineOptions {
     @Description("Table to read from, specified as "
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index f42a381190819..8828806798c28 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -48,19 +48,19 @@
  * {@link com.google.cloud.dataflow.sdk.values.PCollection}s
  * that the {@link PTransform}s consume and produce.
  *
- * <p> After a {@code Pipeline} has been constructed, it can be executed,
+ * <p>After a {@code Pipeline} has been constructed, it can be executed,
  * using a default or an explicit {@link PipelineRunner}.
  *
- * <p> Multiple {@code Pipeline}s can be constructed and executed independently
+ * <p>Multiple {@code Pipeline}s can be constructed and executed independently
  * and concurrently.
  *
- * <p> Each {@code Pipeline} is self-contained and isolated from any other
+ * <p>Each {@code Pipeline} is self-contained and isolated from any other
  * {@code Pipeline}.  The {@link PValue PValues} that are inputs and outputs of each of a
  * {@code Pipeline}'s {@link PTransform PTransforms} are also owned by that {@code Pipeline}.
  * A {@code PValue} owned by one {@code Pipeline} can be read only by {@code PTransform}s
  * also owned by that {@code Pipeline}.
  *
- * <p> Here's a typical example of use:
+ * <p>Here's a typical example of use:
  * <pre> {@code
  * // Start by defining the options for the pipeline.
  * PipelineOptions options = PipelineOptionsFactory.create();
@@ -160,7 +160,7 @@ public <OutputT extends POutput> OutputT apply(
    * This name is used in various places, including the monitoring UI, logging,
    * and to stably identify this application node in the job graph.
    *
-   * <p> Alias for {@code begin().apply(name, root)}.
+   * <p>Alias for {@code begin().apply(name, root)}.
    */
   public <OutputT extends POutput> OutputT apply(
       String name, PTransform<? super PBegin, OutputT> root) {
@@ -243,10 +243,10 @@ public interface PipelineVisitor {
    * Pipeline's PTransforms and PValues, in forward
    * topological order.
    *
-   * <p> Traversal of the pipeline causes PTransform and PValue instances to
+   * <p>Traversal of the pipeline causes PTransform and PValue instances to
    * be marked as finished, at which point they may no longer be modified.
    *
-   * <p> Typically invoked by {@link PipelineRunner} subclasses.
+   * <p>Typically invoked by {@link PipelineRunner} subclasses.
    */
   public void traverseTopologically(PipelineVisitor visitor) {
     Set<PValue> visitedValues = new HashSet<>();
@@ -275,7 +275,7 @@ OutputT applyTransform(InputT input,
    * of the transform. This name is used in various places, including the monitoring UI,
    * logging, and to stably identify this application node in the job graph.
    *
-   * <p> Called by {@link PInput} subclasses in their {@code apply} methods.
+   * <p>Called by {@link PInput} subclasses in their {@code apply} methods.
    */
   public static <InputT extends PInput, OutputT extends POutput>
   OutputT applyTransform(String name, InputT input,
@@ -390,10 +390,10 @@ OutputT applyInternal(String name, InputT input,
   /**
    * Verifies that the output of a PTransform is correctly defined.
    *
-   * <p> A non-composite transform must have all
+   * <p>A non-composite transform must have all
    * of its outputs registered as produced by the transform.
    *
-   * <p> A composite transform must have all of its outputs
+   * <p>A composite transform must have all of its outputs
    * registered as produced by the contained primitive transforms.
    * They have each had the above check performed already, when
    * they were applied, so the only possible failure state is
@@ -461,7 +461,7 @@ public String getFullNameForTesting(PTransform<?, ?> transform) {
    * Returns a unique name for a transform with the given prefix (from
    * enclosing transforms) and initial name.
    *
-   * <p> For internal use only.
+   * <p>For internal use only.
    */
   private String uniquifyInternal(String namePrefix, String origName) {
     String name = origName;
@@ -486,7 +486,7 @@ private String buildName(String namePrefix, String name) {
   /**
    * Adds the given PValue to this Pipeline.
    *
-   * <p> For internal use only.
+   * <p>For internal use only.
    */
   public void addValueInternal(PValue value) {
     this.values.add(value);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
index f40d1595dbb09..4353f42279e2f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
@@ -23,7 +23,7 @@
  * An {@code AtomicCoder} is a {@link Coder} that has no component {@link Coder Coders} or other
  * state.
  *
- * <p> Note that, unless the behavior is overridden, atomic coders are presumed to be deterministic
+ * <p>Note that, unless the behavior is overridden, atomic coders are presumed to be deterministic
  * and all instances are considered equal.
  *
  * @param <T> the type of the values being transcoded
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
index 3832016433f01..43872b6f77e06 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -65,13 +65,13 @@
 /**
  * A {@link Coder} using Avro binary format.
  *
- * <p>
- * The Avro schema is generated using reflection on the element type, using
+ *
+ * <p>The Avro schema is generated using reflection on the element type, using
  * Avro's <a href="http://avro.apache.org/docs/current/api/java/index.html">
  * org.apache.avro.reflect.ReflectData</a>,
  * and encoded as part of the {@code Coder} instance.
- * <p>
- * For complete details about schema generation and how it can be controlled please see
+ *
+ * <p>For complete details about schema generation and how it can be controlled please see
  * the <a href="http://avro.apache.org/docs/current/api/java/index.html">
  * org.apache.avro.reflect package</a>.
  * Only concrete classes with a no-argument constructor can be mapped to Avro records.
@@ -81,8 +81,8 @@
  * org.apache.avro.reflect.Nullable</a> or a
  * <a href="http://avro.apache.org/docs/current/api/java/org/apache/avro/reflect/Union.html">
  * org.apache.avro.reflect.Union</a> containing null.
- * <p>
- * To use, specify the {@code Coder} type on a PCollection:
+ *
+ * <p>To use, specify the {@code Coder} type on a PCollection:
  * <pre>
  * {@code
  * PCollection<MyCustomElement> records =
@@ -90,16 +90,16 @@
  *          .setCoder(AvroCoder.of(MyCustomElement.class);
  * }
  * </pre>
- * <p>
- * or annotate the element class using {@code @DefaultCoder}.
+ *
+ * <p>or annotate the element class using {@code @DefaultCoder}.
  * <pre><code>
  * {@literal @}DefaultCoder(AvroCoder.class)
  * public class MyCustomElement {
  *   ...
  * }
  * </code></pre>
- * <p>
- * The implementation attempts to determine if the Avro encoding of the given type will satisfy
+ *
+ * <p>The implementation attempts to determine if the Avro encoding of the given type will satisfy
  * the criteria of {@link Coder#verifyDeterministic} by inspecting both the type and the
  * Schema provided or generated by Avro. Only coders that are deterministic can be used in
  * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} operations.
@@ -130,7 +130,7 @@ public static AvroCoder<GenericRecord> of(Schema schema) {
    * Returns an {@code AvroCoder} instance for the provided element type
    * using the provided Avro schema.
    *
-   * <p> If the type argument is GenericRecord, the schema may be arbitrary.
+   * <p>If the type argument is GenericRecord, the schema may be arbitrary.
    * Otherwise, the schema must correspond to the type provided.
    *
    * @param <T> the element type
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
index 12d0a23c7c9a6..b73e4a415b7b6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
@@ -31,7 +31,7 @@
 /**
  * A {@code ByteArrayCoder} encodes {@code byte[]} objects.
  *
- * <p> If in a nested context, prefixes the encoded array with its
+ * <p>If in a nested context, prefixes the encoded array with its
  * length, encoded via a {@link VarIntCoder}.
  */
 public class ByteArrayCoder extends AtomicCoder<byte[]> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
index 2cb9d43375ab9..d5e8b9d4a729f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
@@ -125,7 +125,7 @@ public T decode(InputStream inStream, Context context)
   /**
    * Throw {@link NonDeterministicException} if the coding is not deterministic.
    *
-   * <p> In order for a {@code Coder} to be considered deterministic,
+   * <p>In order for a {@code Coder} to be considered deterministic,
    * the following must be true:
    * <ul>
    *   <li>two values that compare as equal (via {@code Object.equals()}
@@ -145,7 +145,7 @@ public T decode(InputStream inStream, Context context)
    * equal only when they are also equal according to {@code Object.equals()}.
    * (and also implements a compatible {@code Object.hasCode()})
    *
-   * <p> This most notably false for arrays. It will generally
+   * <p>This most notably false for arrays. It will generally
    * be false when {@code Object.equals()} compares object identity,
    * rather than performing a semantic/structural comparison.
    */
@@ -156,16 +156,16 @@ public T decode(InputStream inStream, Context context)
    * that represents structural equality on the argument.
    * (and also implements a compatible {@code Object.hashCode()}).
    *
-   * <p> For any two objects of type T, if their encoded bytes
+   * <p>For any two objects of type T, if their encoded bytes
    * are the same, then their structural values are equal
    * according to {@code Object.equals()}.
    *
-   * <p> Most notably, the structural value for an array coder
+   * <p>Most notably, the structural value for an array coder
    * should perform a structural comparison of the contents of
    * the arrays, rather than the default behavior of
    * comparing according to object identity.
    *
-   * <p> See also {@link #consistentWithEquals()}.
+   * <p>See also {@link #consistentWithEquals()}.
    */
   public Object structuralValue(T value) throws Exception;
 
@@ -175,7 +175,7 @@ public T decode(InputStream inStream, Context context)
    * calculate the byte size of the element to be coded in roughly
    * constant time (or lazily).
    *
-   * <p> Not intended to be called by user code, but instead by
+   * <p>Not intended to be called by user code, but instead by
    * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner}
    * implementations.
    */
@@ -185,7 +185,7 @@ public T decode(InputStream inStream, Context context)
    * Notifies the {@code ElementByteSizeObserver} about the byte size
    * of the encoded value using this {@code Coder}.
    *
-   * <p> Not intended to be called by user code, but instead by
+   * <p>Not intended to be called by user code, but instead by
    * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner}
    * implementations.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactories.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactories.java
index 10103029ed538..82b40a489fd8b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactories.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactories.java
@@ -36,7 +36,7 @@ private CoderFactories() { } // Static utility class
    * Creates a {@link CoderFactory} built from particular static methods of a class that
    * implements {@link Coder}.
    *
-   * <p> The class must have the following static methods:
+   * <p>The class must have the following static methods:
    *
    * <ul>
    * <li> {@code
@@ -47,24 +47,24 @@ private CoderFactories() { } // Static utility class
    * }
    * </ul>
    *
-   * <p> The {@code of(...)} method will be used to construct a
+   * <p>The {@code of(...)} method will be used to construct a
    * {@code Coder<T>} from component {@link Coder}s.
    * It must accept one {@link Coder} argument for each
    * generic type parameter of {@code T}. If {@code T} takes no generic
    * type parameters, then the {@code of()} factory method should take
    * no arguments.
    *
-   * <p> The {@code getInstanceComponents} method will be used to
+   * <p>The {@code getInstanceComponents} method will be used to
    * decompose a value during the {@link Coder} inference process,
    * to automatically choose coders for the components.
    *
-   * <p> Note that the class {@code T} to be coded may be a
+   * <p>Note that the class {@code T} to be coded may be a
    * not-yet-specialized generic class.
    * For a generic class {@code MyClass<X>} and an actual type parameter
    * {@code Foo}, the {@link CoderFactoryFromStaticMethods} will
    * accept any {@code Coder<Foo>} and produce a {@code Coder<MyClass<Foo>>}.
    *
-   * <p> For example, the {@link CoderFactory} returned by
+   * <p>For example, the {@link CoderFactory} returned by
    * {@code fromStaticMethods(ListCoder.class)}
    * will produce a {@code Coder<List<X>>} for any {@code Coder Coder<X>}.
    */
@@ -76,7 +76,7 @@ public static <T> CoderFactory fromStaticMethods(Class<T> clazz) {
    * Creates a {@link CoderFactory} that always returns the
    * given coder.
    *
-   * <p> The {@code getInstanceComponents} method of this
+   * <p>The {@code getInstanceComponents} method of this
    * {@link CoderFactory} always returns an empty list.
    */
   public static <T> CoderFactory forCoder(Coder<T> coder) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index c5e81927a4a9f..6a1bbee097084 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -49,7 +49,7 @@
  * and looking up and instantiating the default
  * {@link Coder} for a Java type.
  *
- * <p> {@code CoderRegistry} uses the following mechanisms to determine a
+ * <p>{@code CoderRegistry} uses the following mechanisms to determine a
  * default {@link Coder} for a Java class, in order of precedence:
  * <ul>
  *   <li> Registration: coders can be registered explicitly via
@@ -92,26 +92,26 @@ public void registerStandardCoders() {
    * Registers {@code coderClazz} as the default {@link Coder} class to handle encoding and
    * decoding instances of {@code clazz}, overriding prior registrations if any exist.
    *
-   * <p> Supposing {@code T} is the static type corresponding to the {@code clazz}, then
+   * <p>Supposing {@code T} is the static type corresponding to the {@code clazz}, then
    * {@code coderClazz} should have a static factory method with the following signature:
    *
    * <pre> {@code
    * public static Coder<T> of(Coder<X> argCoder1, Coder<Y> argCoder2, ...)
    * } </pre>
    *
-   * <p> This method will be called to create instances of {@code Coder<T>} for values of type
+   * <p>This method will be called to create instances of {@code Coder<T>} for values of type
    * {@code T}, passing Coders for each of the generic type parameters of {@code T}.  If {@code T}
    * takes no generic type parameters, then the {@code of()} factory method should have no
    * arguments.
    *
-   * <p> If {@code T} is a parameterized type, then it should additionally
+   * <p>If {@code T} is a parameterized type, then it should additionally
    * have a method with the following signature:
    *
    * <pre> {@code
    * public static List<Object> getInstanceComponents(T exampleValue);
    * } </pre>
    *
-   * <p> This method will be called to decompose a value during the coder
+   * <p>This method will be called to decompose a value during the coder
    * inference process, to automatically choose coders for the components.
    *
    * @param clazz the class of objects to be encoded
@@ -279,21 +279,21 @@ public <T> Coder<T> getDefaultCoder(T exampleValue) throws CannotProvideCoderExc
    * use by default for it, in the context of subClass's specialization of
    * baseClass.
    *
-   * <p> If no coder can be inferred for a particular type parameter,
+   * <p>If no coder can be inferred for a particular type parameter,
    * then that type variable will be absent from the returned map.
    *
-   * <p> For example, if baseClass is Map.class and subClass extends
+   * <p>For example, if baseClass is Map.class and subClass extends
    * {@code Map<String, Integer>} then this will return the registered Coders
    * to use for String and Integer as a {"K": stringCoder, "V": intCoder} Map.
    * The knownCoders parameter can be used to provide known coders for any of
    * the parameters that will be used to infer the others.
    *
-   * <p> Note that inference is attempted for every type variable.
+   * <p>Note that inference is attempted for every type variable.
    * For a type {@code MyType<One, Two, Three>} inference will will be
    * attempted for all of {@code One}, {@code Two}, {@code Three},
    * even if the requester only wants a coder for {@code Two}.
    *
-   * <p> For this reason, {@code getDefaultCoders} (plural) does not throw
+   * <p>For this reason, {@code getDefaultCoders} (plural) does not throw
    * an exception if a coder for a particular type variable cannot be
    * desired coder or throw a {@link CannotProvideCoderException} when appropriate.
    *
@@ -327,21 +327,21 @@ public <T> Map<Type, Coder<?>> getDefaultCoders(
    * Coder to use by default for it, in the context of {@code subClass}'s specialization
    * of {@code baseClass}.
    *
-   * <p> If a coder cannot be inferred for a type variable, its slot in the
+   * <p>If a coder cannot be inferred for a type variable, its slot in the
    * resulting array will be {@code null}.
    *
-   * <p> For example, if {@code baseClass} is {@code Map.class} and {@code subClass}
+   * <p>For example, if {@code baseClass} is {@code Map.class} and {@code subClass}
    * extends {@code Map<String, Integer>} then this will return the registered Coders
    * to use for {@code String} and {@code Integer}, in that order.
    * The {@code knownCoders} parameter can be used to provide known coders
    * for any of the parameters that will be used to infer the others.
    *
-   * <p> Note that inference is attempted for every type variable.
+   * <p>Note that inference is attempted for every type variable.
    * For a type {@code MyType<One, Two, Three>} inference will will be
    * attempted for all of {@code One}, {@code Two}, {@code Three},
    * even if the requester only wants a coder for {@code Two}.
    *
-   * <p> For this reason {@code getDefaultCoders} (plural) does not throw
+   * <p>For this reason {@code getDefaultCoders} (plural) does not throw
    * an exception if a coder for a particular type variable cannot be
    * inferred. Instead, it results in a {@code null} in the array.
    * It is the responsibility of the caller (usually {@link #getDefaultCoder}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java
index ad91552de6e9a..e1657e3806479 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java
@@ -27,13 +27,13 @@
  * specifies a default {@link Coder} class to handle encoding and decoding
  * instances of the annotated class.
  *
- * <p> The specified {@code Coder} must implement a function with the following
+ * <p>The specified {@code Coder} must implement a function with the following
  * signature:
  * <pre>{@code
  * public static Coder<T> of(Class<T> clazz) {...}
  * }</pre>
  *
- * <p> For example, to configure the use of Java serialization as the default
+ * <p>For example, to configure the use of Java serialization as the default
  * for a class, annotate the class to use
  * {@link com.google.cloud.dataflow.sdk.coders.SerializableCoder} as follows:the
  *
@@ -44,7 +44,7 @@
  * }
  * </code></pre>
  *
- * <p> Similarly, to configure the use of
+ * <p>Similarly, to configure the use of
  * {@link com.google.cloud.dataflow.sdk.coders.AvroCoder} as the default:
  * <pre><code>
  * {@literal @}DefaultCoder(AvroCoder.class)
@@ -54,7 +54,7 @@
  * }
  * </code></pre>
  *
- * <p> Coders specified explicitly via
+ * <p>Coders specified explicitly via
  * {@link com.google.cloud.dataflow.sdk.values.PCollection#setCoder(Coder)
  *  PCollection.setCoder}
  * take precedence, followed by Coders registered at runtime via
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
index 35fd9821042b6..414e310c14198 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
@@ -31,7 +31,7 @@
  * to/from {@code IntermediateT} and then encoding/decoding using the underlying
  * {@code Coder<IntermediateT>}.
  *
- * <p> The conversions from {@code T} to {@code IntermediateT} and vice versa
+ * <p>The conversions from {@code T} to {@code IntermediateT} and vice versa
  * must be supplied as {@link CodingFunction}, a serializable
  * function that may throw any {@code Exception}. If a thrown
  * exception is an instance of {@link CoderException} or
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
index 543fe85de3793..d084c1f119723 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
@@ -127,7 +127,7 @@ public List<? extends Coder<?>> getCoderArguments() {
   /**
    * Not all maps have a deterministic encoding.
    *
-   * <p> For example, HashMap comparison does not depend on element order, so
+   * <p>For example, HashMap comparison does not depend on element order, so
    * two HashMap instances may be equal but produce different encodings.
    */
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
index cf4b00a614a16..23eed5d3403df 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
@@ -34,7 +34,7 @@
  * A {@code SerializableCoder} is a {@link Coder} for a
  * Java class that implements {@link java.io.Serializable}.
  *
- * <p> To use, specify the coder type on a PCollection:
+ * <p>To use, specify the coder type on a PCollection:
  * <pre>
  * {@code
  *   PCollection<MyRecord> records =
@@ -42,7 +42,7 @@
  * }
  * </pre>
  *
- * <p> SerializableCoder does not guarantee a deterministic encoding, as Java
+ * <p>SerializableCoder does not guarantee a deterministic encoding, as Java
  * Serialization may produce different binary encodings for two equivalent
  * objects.
  *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
index c68706705acf7..8e75f8fe2b9cf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
@@ -60,7 +60,7 @@ public static SetCoder<?> of(
   /**
    * Not all sets have a deterministic encoding.
    *
-   * <p> For example, {@code HashSet} comparison does not depend on element order, so
+   * <p>For example, {@code HashSet} comparison does not depend on element order, so
    * two {@code HashSet} instances may be equal but produce different encodings.
    */
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
index 4746a7799a66f..1bdbd04fe185e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
@@ -22,18 +22,18 @@
  * A {@code StringDelegateCoder<T>} wraps a {@code Coder<String>}
  * and encodes/decodes values of type {@code T} via string representations.
  *
- * <p> To decode, the input byte stream is decoded to
+ * <p>To decode, the input byte stream is decoded to
  * a {@code String}, and this is passed to the single-arg
  * constructor for {@code T}.
  *
- * <p> To encode, the input value is converted via {@code toString()},
+ * <p>To encode, the input value is converted via {@code toString()},
  * and this string is encoded.
  *
- * <p> In order for this to operate correctly for a class {@code Clazz},
+ * <p>In order for this to operate correctly for a class {@code Clazz},
  * it must be the case for any instance {@code x} that
  * {@code x.equals(new Clazz(x.toString()))}.
  *
- * <p> This method of encoding is not designed for ease of evolution of {@code Clazz};
+ * <p>This method of encoding is not designed for ease of evolution of {@code Clazz};
  * it should only be used in cases where the class is stable or the encoding is not
  * important. If evolution of the class is important, see {@link Proto2Coder}, {@link AvroCoder},
  * or {@link JAXBCoder}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java
index b26e07ba0efeb..6d3f47b8fd4e7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java
@@ -18,14 +18,14 @@
  * Defines {@link com.google.cloud.dataflow.sdk.coders.Coder}s
  * to specify how data is encoded to and decoded from byte strings.
  *
- * <p> During execution of a Pipeline, elements in a
+ * <p>During execution of a Pipeline, elements in a
  * {@link com.google.cloud.dataflow.sdk.values.PCollection}
  * may need to be encoded into byte strings.
  * This happens both at the beginning and end of a pipeline when data is read from and written to
  * persistent storage and also during execution of a pipeline when elements are communicated between
  * machines.
  *
- * <p> Exactly when PCollection elements are encoded during execution depends on which
+ * <p>Exactly when PCollection elements are encoded during execution depends on which
  * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner} is being used and how that runner
  * chooses to execute the pipeline. As such, Dataflow requires that all PCollections have an
  * appropriate Coder in case it becomes necessary. In many cases, the Coder can be inferred from
@@ -35,7 +35,7 @@
  * {@link com.google.cloud.dataflow.sdk.values.PCollection#setCoder(Coder)} or per type using the
  * {@link com.google.cloud.dataflow.sdk.coders.DefaultCoder} annotation.
  *
- * <p> This package provides a number of coders for common types like {@code Integer},
+ * <p>This package provides a number of coders for common types like {@code Integer},
  * {@code String}, and {@code List}, as well as coders like
  * {@link com.google.cloud.dataflow.sdk.coders.AvroCoder} that can be used to encode many custom
  * types.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index 80e3e2e058bad..ebc158fa0c475 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -48,7 +48,7 @@
 /**
  * {@link PTransform}s for reading and writing Avro files.
  *
- * <p> To read a {@link PCollection} from one or more Avro files, use
+ * <p>To read a {@link PCollection} from one or more Avro files, use
  * {@link AvroIO.Read}, specifying {@link AvroIO.Read#from} to specify
  * the path of the file(s) to read from (e.g., a local filename or
  * filename pattern if running locally, or a Google Cloud Storage
@@ -56,7 +56,7 @@
  * {@code "gs://<bucket>/<filepath>"}), and optionally
  * {@link AvroIO.Read#named} to specify the name of the pipeline step.
  *
- * <p> It is required to specify {@link AvroIO.Read#withSchema}. To
+ * <p>It is required to specify {@link AvroIO.Read#withSchema}. To
  * read specific records, such as Avro-generated classes, provide an
  * Avro-generated class type. To read GenericRecords, provide either
  * an org.apache.avro.Schema or a schema in a JSON-encoded string form.
@@ -82,7 +82,7 @@
  *                        .withSchema(schema));
  * } </pre>
  *
- * <p> To write a {@link PCollection} to one or more Avro files, use
+ * <p>To write a {@link PCollection} to one or more Avro files, use
  * {@link AvroIO.Write}, specifying {@link AvroIO.Write#to} to specify
  * the path of the file to write to (e.g., a local filename or sharded
  * filename pattern if running locally, or a Google Cloud Storage
@@ -90,7 +90,7 @@
  * {@code "gs://<bucket>/<filepath>"}), and optionally
  * {@link AvroIO.Write#named} to specify the name of the pipeline step.
  *
- * <p> It is required to specify {@link AvroIO.Write#withSchema}. To
+ * <p>It is required to specify {@link AvroIO.Write#withSchema}. To
  * write specific records, such as Avro-generated classes, provide an
  * Avro-generated class type. To write GenericRecords, provide either
  * an org.apache.avro.Schema or a schema in a JSON-encoded string form.
@@ -181,7 +181,7 @@ public static Bound<GenericRecord> withSchema(String schema) {
      * Returns a AvroIO.Read PTransform that has GCS path validation on
      * pipeline creation disabled.
      *
-     * <p> This can be useful in the case where the GCS input location does
+     * <p>This can be useful in the case where the GCS input location does
      * not exist at the pipeline creation time, but is expected to be available
      * at execution time.
      */
@@ -276,7 +276,7 @@ public Bound<GenericRecord> withSchema(String schema) {
        * that has GCS input path validation on pipeline creation disabled.
        * Does not modify this object.
        *
-       * <p> This can be useful in the case where the GCS input location does
+       * <p>This can be useful in the case where the GCS input location does
        * not exist at the pipeline creation time, but is expected to be
        * available at execution time.
        */
@@ -359,7 +359,7 @@ public static Bound<GenericRecord> named(String name) {
      * the form {@code "gs://<bucket>/<filepath>"})
      * (if running locally or via the Google Cloud Dataflow service).
      *
-     * <p> The files written will begin with this prefix, followed by
+     * <p>The files written will begin with this prefix, followed by
      * a shard identifier (see {@link Bound#withNumShards}, and end
      * in a common extension, if given by {@link Bound#withSuffix}.
      */
@@ -378,7 +378,7 @@ public static Bound<GenericRecord> withSuffix(String filenameSuffix) {
     /**
      * Returns an AvroIO.Write PTransform that uses the provided shard count.
      *
-     * <p> Constraining the number of shards is likely to reduce
+     * <p>Constraining the number of shards is likely to reduce
      * the performance of a pipeline. Setting this value is not recommended
      * unless you require a specific number of output files.
      *
@@ -393,7 +393,7 @@ public static Bound<GenericRecord> withNumShards(int numShards) {
      * Returns an AvroIO.Write PTransform that uses the given shard name
      * template.
      *
-     * <p> See {@link ShardNameTemplate} for a description of shard templates.
+     * <p>See {@link ShardNameTemplate} for a description of shard templates.
      */
     public static Bound<GenericRecord> withShardNameTemplate(String shardTemplate) {
       return new Bound<>(GenericRecord.class).withShardNameTemplate(shardTemplate);
@@ -403,7 +403,7 @@ public static Bound<GenericRecord> withShardNameTemplate(String shardTemplate) {
      * Returns an AvroIO.Write PTransform that forces a single file as
      * output.
      *
-     * <p> Constraining the number of shards is likely to reduce
+     * <p>Constraining the number of shards is likely to reduce
      * the performance of a pipeline.  Setting this value is not recommended
      * unless you require a specific number of output files.
      */
@@ -442,7 +442,7 @@ public static Bound<GenericRecord> withSchema(String schema) {
      * Returns a AvroIO.Write PTransform that has GCS path validation on
      * pipeline creation disabled.
      *
-     * <p> This can be useful in the case where the GCS output location does
+     * <p>This can be useful in the case where the GCS output location does
      * not exist at the pipeline creation time, but is expected to be available
      * at execution time.
      */
@@ -505,9 +505,9 @@ public Bound<T> named(String name) {
        * Returns a new AvroIO.Write PTransform that's like this one but
        * that writes to the file(s) with the given filename prefix.
        *
-       * <p> See {@link Write#to(String) Write.to(String)} for more information.
+       * <p>See {@link Write#to(String) Write.to(String)} for more information.
        *
-       * <p> Does not modify this object.
+       * <p>Does not modify this object.
        */
       public Bound<T> to(String filenamePrefix) {
         validateOutputComponent(filenamePrefix);
@@ -519,7 +519,7 @@ public Bound<T> to(String filenamePrefix) {
        * Returns a new AvroIO.Write PTransform that's like this one but
        * that writes to the file(s) with the given filename suffix.
        *
-       * <p> Does not modify this object.
+       * <p>Does not modify this object.
        *
        * @see ShardNameTemplate
        */
@@ -533,11 +533,11 @@ public Bound<T> withSuffix(String filenameSuffix) {
        * Returns a new AvroIO.Write PTransform that's like this one but
        * that uses the provided shard count.
        *
-       * <p> Constraining the number of shards is likely to reduce
+       * <p>Constraining the number of shards is likely to reduce
        * the performance of a pipeline.  Setting this value is not recommended
        * unless you require a specific number of output files.
        *
-       * <p> Does not modify this object.
+       * <p>Does not modify this object.
        *
        * @param numShards the number of shards to use, or 0 to let the system
        *                  decide.
@@ -553,7 +553,7 @@ public Bound<T> withNumShards(int numShards) {
        * Returns a new AvroIO.Write PTransform that's like this one but
        * that uses the given shard name template.
        *
-       * <p> Does not modify this object.
+       * <p>Does not modify this object.
        *
        * @see ShardNameTemplate
        */
@@ -566,10 +566,10 @@ public Bound<T> withShardNameTemplate(String shardTemplate) {
        * Returns a new AvroIO.Write PTransform that's like this one but
        * that forces a single file as output.
        *
-       * <p> This is a shortcut for
+       * <p>This is a shortcut for
        * {@code .withNumShards(1).withShardNameTemplate("")}
        *
-       * <p> Does not modify this object.
+       * <p>Does not modify this object.
        */
       public Bound<T> withoutSharding() {
         return new Bound<>(name, filenamePrefix, filenameSuffix, 1, "", type, schema, validate);
@@ -611,7 +611,7 @@ public Bound<GenericRecord> withSchema(String schema) {
        * that has GCS output path validation on pipeline creation disabled.
        * Does not modify this object.
        *
-       * <p> This can be useful in the case where the GCS output location does
+       * <p>This can be useful in the case where the GCS output location does
        * not exist at the pipeline creation time, but is expected to be
        * available at execution time.
        */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 7ec6cb2f457e9..e5c3527a9f62d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -58,6 +58,7 @@
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -88,8 +89,8 @@
  *   <li>{@code datasetId}: the BigQuery dataset id, unique within a project.
  *   <li>{@code tableId}: a table id, unique within a dataset.
  * </ul>
- * <p>
- * BigQuery table references are stored as a {@link TableReference}, which comes
+ *
+ * <p>BigQuery table references are stored as a {@link TableReference}, which comes
  * from the <a href="https://cloud.google.com/bigquery/client-libraries">
  * BigQuery Java Client API</a>.
  * Tables can be referred to as Strings, with or without the {@code projectId}.
@@ -109,7 +110,7 @@
  *         .from("clouddataflow-readonly:samples.weather_stations");
  * }</pre>
  *
- * <p> Users may provide a query to read from rather than reading all of a BigQuery table. If
+ * <p>Users may provide a query to read from rather than reading all of a BigQuery table. If
  * specified, the result obtained by executing the specified query will be used as the data of the
  * input transform.
  *
@@ -120,13 +121,12 @@
  *         .fromQuery("SELECT year, mean_temp FROM samples.weather_stations");
  * }</pre>
  *
- * <p> When creating a BigQuery input transform, users should provide either a query or a table.
+ * <p>When creating a BigQuery input transform, users should provide either a query or a table.
  * Pipeline construction will fail with a validation error if neither or both are specified.
  *
  * <p><h3>Writing</h3>
  * To write to a BigQuery table, apply a {@link BigQueryIO.Write} transformation.
  * This consumes a {@code PCollection<TableRow>} as input.
- * <p>
  * <pre>{@code
  * PCollection<TableRow> quotes = ...
  *
@@ -141,8 +141,8 @@
  *     .withSchema(schema)
  *     .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
  * }</pre>
- * <p>
- * See {@link BigQueryIO.Write} for details on how to specify if a write should
+ *
+ * <p>See {@link BigQueryIO.Write} for details on how to specify if a write should
  * append to an existing table, replace the table, or verify that the table is
  * empty. Note that the dataset being written to must already exist. Write
  * dispositions are not supported in streaming mode.
@@ -169,7 +169,7 @@
  *
  * }</pre>
  *
- * <p> Per-window tables are not yet supported in batch mode.
+ * <p>Per-window tables are not yet supported in batch mode.
  *
  * @see <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html">TableRow</a>
  *
@@ -237,8 +237,8 @@ public class BigQueryIO {
   /**
    * Parse a table specification in the form
    * "[project_id]:[dataset_id].[table_id]" or "[dataset_id].[table_id]".
-   * <p>
-   * If the project id is omitted, the default project id is used.
+   *
+   * <p>If the project id is omitted, the default project id is used.
    */
   public static TableReference parseTableSpec(String tableSpec) {
     Matcher match = TABLE_SPEC.matcher(tableSpec);
@@ -271,8 +271,8 @@ public static String toTableSpec(TableReference ref) {
   /**
    * A {@link PTransform} that reads from a BigQuery table and returns a
    * {@link PCollection} of {@link TableRow TableRows} containing each of the rows of the table.
-   * <p>
-   * Each TableRow record contains values indexed by column name.  Here is a
+   *
+   * <p>Each TableRow record contains values indexed by column name.  Here is a
    * sample processing function that processes a "line" column from rows:
    * <pre><code>
    * static class ExtractWordsFn extends DoFn{@literal <TableRow, String>} {
@@ -362,8 +362,8 @@ public Bound named(String name) {
 
       /**
        * Sets the table specification.
-       * <p>
-       * Refer to {@link #parseTableSpec(String)} for the specification format.
+       *
+       * <p>Refer to {@link #parseTableSpec(String)} for the specification format.
        */
       public Bound from(String tableSpec) {
         return from(parseTableSpec(tableSpec));
@@ -496,20 +496,20 @@ public boolean getValidate() {
   /**
    * A {@link PTransform} that writes a {@link PCollection} containing {@link TableRow TableRows}
    * to a BigQuery table.
-   * <p>
-   * By default, tables will be created if they do not exist, which
+   *
+   * <p>By default, tables will be created if they do not exist, which
    * corresponds to a {@code CreateDisposition.CREATE_IF_NEEDED} disposition
    * that matches the default of BigQuery's Jobs API.  A schema must be
    * provided (via {@link Write#withSchema}), or else the transform may fail
    * at runtime with an {@link java.lang.IllegalArgumentException}.
-   * <p>
-   * The dataset being written must already exist.
-   * <p>
-   * By default, writes require an empty table, which corresponds to
+   *
+   * <p>The dataset being written must already exist.
+   *
+   * <p>By default, writes require an empty table, which corresponds to
    * a {@code WriteDisposition.WRITE_EMPTY} disposition that matches the
    * default of BigQuery's Jobs API.
-   * <p>
-   * Here is a sample transform that produces TableRow values containing
+   *
+   * <p>Here is a sample transform that produces TableRow values containing
    * "word" and "count" columns:
    * <pre><code>
    * static class FormatCountsFn extends DoFnP{@literal <KV<String, Long>, TableRow>} {
@@ -531,20 +531,20 @@ public static class Write {
     public enum CreateDisposition {
       /**
        * Specifics that tables should not be created.
-       * <p>
-       * If the output table does not exist, the write fails.
+       *
+       * <p>If the output table does not exist, the write fails.
        */
       CREATE_NEVER,
 
       /**
        * Specifies that tables should be created if needed. This is the default
        * behavior.
-       * <p>
-       * Requires that a table schema is provided via {@link Write#withSchema}.
+       *
+       * <p>Requires that a table schema is provided via {@link Write#withSchema}.
        * This precondition is checked before starting a job. The schema is
        * not required to match an existing table's schema.
-       * <p>
-       * When this transformation is executed, if the output table does not
+       *
+       * <p>When this transformation is executed, if the output table does not
        * exist, the table is created from the provided schema. Note that even if
        * the table exists, it may be recreated if necessary when paired with a
        * {@link WriteDisposition#WRITE_TRUNCATE}.
@@ -560,8 +560,8 @@ public enum CreateDisposition {
     public enum WriteDisposition {
       /**
        * Specifies that write should replace a table.
-       * <p>
-       * The replacement may occur in multiple steps - for instance by first
+       *
+       * <p>The replacement may occur in multiple steps - for instance by first
        * removing the existing table, then creating a replacement, then filling
        * it in.  This is not an atomic operation, and external programs may
        * see the table in any of these intermediate steps.
@@ -576,10 +576,10 @@ public enum WriteDisposition {
       /**
        * Specifies that the output table must be empty. This is the default
        * behavior.
-       * <p>
-       * If the output table is not empty, the write fails at runtime.
-       * <p>
-       * This check may occur long before data is written, and does not
+       *
+       * <p>If the output table is not empty, the write fails at runtime.
+       *
+       * <p>This check may occur long before data is written, and does not
        * guarantee exclusive access to the table.  If two programs are run
        * concurrently, each specifying the same output table and
        * a {@link WriteDisposition} of {@code WRITE_EMPTY}, it is possible
@@ -597,8 +597,8 @@ public static Bound named(String name) {
 
     /**
      * Creates a write transformation for the given table specification.
-     * <p>
-     * Refer to {@link #parseTableSpec(String)} for the specification format.
+     *
+     * <p>Refer to {@link #parseTableSpec(String)} for the specification format.
      */
     public static Bound to(String tableSpec) {
       return new Bound().to(tableSpec);
@@ -612,11 +612,11 @@ public static Bound to(TableReference table) {
     /** Creates a write transformation from a function that maps windows to table specifications.
      * Each time a new window is encountered, this function will be called and the resulting table
      * will be created. Records within that window will be written to the associated table.
-     * <p>
-     * See {@link #parseTableSpec(String)} for the format that tableSpecFunction should return.
-     * <p>
-     * tableSpecFunction should be determinstic. When given the same window, it should always return
-     * the same table specification.
+     *
+     * <p>See {@link #parseTableSpec(String)} for the format that tableSpecFunction should return.
+     *
+     * <p>tableSpecFunction should be determinstic. When given the same window, it should always
+     * return the same table specification.
      */
     public static Bound to(SerializableFunction<BoundedWindow, String> tableSpecFunction) {
       return new Bound().to(tableSpecFunction);
@@ -631,8 +631,8 @@ public static Bound toTableReference(
 
     /**
      * Specifies a table schema to use in table creation.
-     * <p>
-     * The schema is required only if writing to a table that does not already
+     *
+     * <p>The schema is required only if writing to a table that does not already
      * exist, and {@link BigQueryIO.Write.CreateDisposition} is set to
      * {@code CREATE_IF_NEEDED}.
      */
@@ -730,8 +730,8 @@ public Bound named(String name) {
 
       /**
        * Specifies the table specification.
-       * <p>
-       * Refer to {@link #parseTableSpec(String)} for the specification format.
+       *
+       * <p>Refer to {@link #parseTableSpec(String)} for the specification format.
        */
       public Bound to(String tableSpec) {
         return to(parseTableSpec(tableSpec));
@@ -1308,8 +1308,8 @@ public PDone apply(PCollection<TableRow> input) {
 
   /**
    * Direct mode read evaluator.
-   * <p>
-   * This loads the entire table into an in-memory PCollection.
+   *
+   * <p>This loads the entire table into an in-memory PCollection.
    */
   private static void evaluateReadHelper(
       Read.Bound transform, DirectPipelineRunner.EvaluationContext context) {
@@ -1344,8 +1344,8 @@ private static <K, V> List<V> getOrCreateMapListValue(Map<K, List<V>> map, K key
 
   /**
    * Direct mode write evaluator.
-   * <p>
-   * This writes the entire table in a single BigQuery request.
+   *
+   * <p>This writes the entire table in a single BigQuery request.
    * The table will be created if necessary.
    */
   private static void evaluateWriteHelper(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java
index 4cf13794c6f23..a2b15b08a9192 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java
@@ -43,7 +43,7 @@
  * {@link PTransform} that reads a bounded amount of data from an {@link UnboundedSource},
  * specified as one or both of a maximum number of elements or a maximum period of time to read.
  *
- * <p> Created by {@link Read}.
+ * <p>Created by {@link Read}.
  */
 class BoundedReadFromUnboundedSource<T> extends PTransform<PInput, PCollection<T>> {
   private static final long serialVersionUID = 0L;
@@ -56,7 +56,7 @@ class BoundedReadFromUnboundedSource<T> extends PTransform<PInput, PCollection<T
    * of data from the given {@link UnboundedSource}.  The bound is specified as a number
    * of records to read.
    *
-   * <p> This may take a long time to execute if the splits of this source are slow to read
+   * <p>This may take a long time to execute if the splits of this source are slow to read
    * records.
    */
   public BoundedReadFromUnboundedSource<T> withMaxNumRecords(long maxNumRecords) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
index 71b1308912863..dcadcbd9b9ac8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
@@ -41,7 +41,7 @@
  * ({@link BoundedReader#splitAtFraction}).
  * </ul>
  *
- * <p> To use this class for supporting your custom input type, derive your class
+ * <p>To use this class for supporting your custom input type, derive your class
  * class from it, and override the abstract methods. For an example, see {@link DatastoreIO}.
  *
  * @param <T> Type of records read by the source.
@@ -113,7 +113,7 @@ public abstract static class BoundedReader<T> extends Source.Reader<T> {
      *   <li>The returned values should be non-decreasing (though they don't have to be unique).
      * </ul>
      *
-     * <p> By default, returns null to indicate that this cannot be estimated.
+     * <p>By default, returns null to indicate that this cannot be estimated.
      *
      * @return A value in [0, 1] representing the fraction of this reader's current input
      *   read so far, or {@code null} if such an estimate is not available.
@@ -150,8 +150,8 @@ public Double getFractionConsumed() {
      * For example, a reader that reads a range of offsets <i>[A, B)</i> in a file might implement
      * this method by truncating the current range to <i>[A, A + fraction*(B-A))</i> and returning
      * a Source representing the range <i>[A + fraction*(B-A), B)</i>.
-     * <p>
-     * This method should return {@code null} if the split cannot be performed for this fraction
+     *
+     * <p>This method should return {@code null} if the split cannot be performed for this fraction
      * while satisfying the semantics above. E.g., a reader that reads a range of offsets
      * in a file should return {@code null} if it is already past the position in its range
      * corresponding to the given fraction. In this case, the method MUST have no effect
@@ -177,7 +177,7 @@ public Double getFractionConsumed() {
      * <p>{@link com.google.cloud.dataflow.sdk.io.range.RangeTracker} makes it easy to implement
      * this method safely and correctly.
      *
-     * <p> By default, returns null to indicate that splitting is not possible.
+     * <p>By default, returns null to indicate that splitting is not possible.
      */
     public BoundedSource<T> splitAtFraction(double fraction) {
       return null;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 418d16a72f5c7..d917024a1672e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -71,25 +71,25 @@
  * <a href="https://developers.google.com/datastore/">Google Cloud Datastore</a>
  * entities.
  *
- * <p> The {@link DatastoreIO} class provides an API to Read and Write a
+ * <p>The {@link DatastoreIO} class provides an API to Read and Write a
  * {@link PCollection} of Datastore Entity.  This API currently requires an
  * authentication workaround described below.
  *
- * <p> Datastore is a fully managed NoSQL data storage service.
+ * <p>Datastore is a fully managed NoSQL data storage service.
  * An Entity is an object in Datastore, analogous to a row in traditional
  * database table.  DatastoreIO supports Read/Write from/to Datastore within
  * Dataflow SDK service.
  *
- * <p> To use {@link DatastoreIO}, users must use gcloud to get credential for Datastore:
+ * <p>To use {@link DatastoreIO}, users must use gcloud to get credential for Datastore:
  * <pre>
   * $ gcloud auth login
  * </pre>
  *
- * <p> Note that the environment variable CLOUDSDK_EXTRA_SCOPES must be set
+ * <p>Note that the environment variable CLOUDSDK_EXTRA_SCOPES must be set
  * to the same value when executing a Datastore pipeline, as the local auth
  * cache is keyed by the requested scopes.
  *
- * <p> To read a {@link PCollection} from a query to Datastore, use
+ * <p>To read a {@link PCollection} from a query to Datastore, use
  * {@link DatastoreIO#read} and its methods {#link DatastoreIO.Read#withDataset}
  * and {#link DatastoreIO.Read#withQuery} to specify dataset to read, the query
  * to read from, and optionally {@link DatastoreIO.Source#withHost} to specify
@@ -109,7 +109,7 @@
  * p.run();
  * } </pre>
  *
- * <p> or:
+ * <p>or:
  *
  * <pre> {@code
  * // Read a query from Datastore
@@ -669,7 +669,7 @@ public DatastoreWriteResult(long recordsWritten) {
   /**
    * A {@link Source.Reader} over the records from a query of the datastore.
 
-   * <p> Timestamped records are currently not supported.
+   * <p>Timestamped records are currently not supported.
    * All records implicitly have the timestamp of {@code BoundedWindow.TIMESTAMP_MIN_VALUE}.
    */
   public static class DatastoreReader extends BoundedSource.BoundedReader<Entity> {
@@ -698,7 +698,7 @@ public static class DatastoreReader extends BoundedSource.BoundedReader<Entity>
     /**
      * Maximum number of results to request per query.
      *
-     * <p> Must be set, or it may result in an I/O error when querying
+     * <p>Must be set, or it may result in an I/O error when querying
      * Cloud Datastore.
      */
     private static final int QUERY_LIMIT = 500;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index 245e87c34ff32..1c18b40440bdb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -92,7 +92,7 @@ public enum Mode {
    * Create a {@code FileBaseSource} based on a file or a file pattern specification. This
    * constructor must be used when creating a new {@code FileBasedSource} for a file pattern.
    *
-   * <p> See {@link OffsetBasedSource} for a detailed description of {@code minBundleSize}.
+   * <p>See {@link OffsetBasedSource} for a detailed description of {@code minBundleSize}.
    *
    * @param fileOrPatternSpec {@link IOChannelFactory} specification of file or file pattern
    *        represented by the {@link FileBasedSource}.
@@ -110,7 +110,7 @@ public FileBasedSource(String fileOrPatternSpec, long minBundleSize) {
    * Additionally, this constructor must be used to create new {@code FileBasedSource}s when
    * subclasses implement the method {@link #createForSubrangeOfFile}.
    *
-   * <p> See {@link OffsetBasedSource} for detailed descriptions of {@code minBundleSize},
+   * <p>See {@link OffsetBasedSource} for detailed descriptions of {@code minBundleSize},
    * {@code startOffset}, and {@code endOffset}.
    *
    * @param fileName {@link IOChannelFactory} specification of the file represented by the
@@ -409,7 +409,7 @@ private static Collection<String> expandFilePattern(String fileOrPatternSpec) th
    *
    * <h2>Thread Safety</h2>
    *
-   * <p> Since this class implements {@link Source.Reader} it guarantees thread safety. Abstract
+   * <p>Since this class implements {@link Source.Reader} it guarantees thread safety. Abstract
    * methods defined here will not be accessed by more than one thread concurrently.
    */
   public abstract static class FileBasedReader<T> extends OffsetBasedReader<T> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 0c1c4528cffef..dc6c3b34de579 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -262,7 +262,7 @@ public String asV1Beta2Path() {
    * returns a {@code PCollection<String>} containing the items from
    * the stream.
    *
-   * <p> When running with a runner that only supports bounded {@code PCollection}s
+   * <p>When running with a runner that only supports bounded {@code PCollection}s
    * (such as DirectPipelineRunner or DataflowPipelineRunner without --streaming), only a
    * bounded portion of the input Pubsub stream can be processed.  As such, either
    * {@link Bound#maxNumRecords} or {@link Bound#maxReadTime} must be set.
@@ -289,7 +289,7 @@ public static Bound<String> named(String name) {
      * <li>Cannot begin with 'goog' prefix.</li>
      * </ul>
      *
-     * <p> Dataflow will start reading data published on this topic from the time the pipeline is
+     * <p>Dataflow will start reading data published on this topic from the time the pipeline is
      * started. Any data published on the topic before the pipeline is started will not be read by
      * Dataflow.
      */
@@ -326,16 +326,16 @@ public static Bound<String> subscription(String subscription) {
      * the number of milliseconds since the Unix epoch. For example, if using the joda time classes,
      * org.joda.time.Instant.getMillis() returns the correct value for this label.
      *
-     * <p> If {@code <timestampLabel>} is not provided, the system will generate record timestamps
+     * <p>If {@code <timestampLabel>} is not provided, the system will generate record timestamps
      * the first time it sees each record. All windowing will be done relative to these timestamps.
      *
-     * <p> By default windows are emitted based on an estimate of when this source is likely
+     * <p>By default windows are emitted based on an estimate of when this source is likely
      * done producing data for a given timestamp (referred to as the Watermark; see
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark} for more details).
      * Any late data will be handled by the trigger specified with the windowing strategy -- by
      * default it will be output immediately.
      *
-     * <p> Note that the system can guarantee that no late data will ever be seen when it assigns
+     * <p>Note that the system can guarantee that no late data will ever be seen when it assigns
      * timestamps by arrival time (i.e. {@code timestampLabel} is not provided).
      */
     public static Bound<String> timestampLabel(String timestampLabel) {
@@ -348,7 +348,7 @@ public static Bound<String> timestampLabel(String timestampLabel) {
      * specifies the label name. The label value sent to PubSub can be any string value that
      * uniquely identifies this record.
      *
-     * <p> If idLabel is not provided, Dataflow cannot guarantee that no duplicate data will be
+     * <p>If idLabel is not provided, Dataflow cannot guarantee that no duplicate data will be
      * delivered on the PubSub stream. In this case,  deduplication of the stream will be
      * stricly best effort.
      */
@@ -360,7 +360,7 @@ public static Bound<String> idLabel(String idLabel) {
      * Creates and returns a PubsubIO.Read PTransform that uses the given
      * {@code Coder<T>} to decode PubSub record into a value of type {@code T}.
      *
-     * <p> By default, uses {@link StringUtf8Coder}, which just
+     * <p>By default, uses {@link StringUtf8Coder}, which just
      * returns the text lines as Java strings.
      *
      * @param <T> the type of the decoded elements, and the elements
@@ -373,7 +373,7 @@ public static <T> Bound<T> withCoder(Coder<T> coder) {
     /**
      * Sets the maximum number of records that will be read from Pubsub.
      *
-     * <p> Either this or {@link #maxReadTime} must be set for use as a bounded
+     * <p>Either this or {@link #maxReadTime} must be set for use as a bounded
      * {@code PCollection}.
      */
     public static Bound<String> maxNumRecords(int maxNumRecords) {
@@ -383,7 +383,7 @@ public static Bound<String> maxNumRecords(int maxNumRecords) {
     /**
      * Sets the maximum duration during which records will be read from Pubsub.
      *
-     * <p> Either this or {@link #maxNumRecords} must be set for use as a bounded
+     * <p>Either this or {@link #maxNumRecords} must be set for use as a bounded
      * {@code PCollection}.
      */
     public static Bound<String> maxReadTime(Duration maxReadTime) {
@@ -446,7 +446,7 @@ public Bound<T> named(String name) {
        * Returns a new PubsubIO.Read PTransform that's like this one but reading from the
        * given subscription. Does not modify the object.
        *
-       * <p> Multiple readers reading from the same subscription will each receive
+       * <p>Multiple readers reading from the same subscription will each receive
        * some arbirary portion of the data.  Most likely, separate readers should
        * use their own subscriptions.
        */
@@ -498,7 +498,7 @@ public <X> Bound<X> withCoder(Coder<X> coder) {
       /**
        * Sets the maximum number of records that will be read from Pubsub.
        *
-       * <p> Setting either this or {@link #maxNumRecords} will cause the output {@code PCollection}
+       * <p>Setting either this or {@link #maxNumRecords} will cause the output {@code PCollection}
        * to be bounded.
        */
       public Bound<T> maxNumRecords(int maxNumRecords) {
@@ -509,7 +509,7 @@ public Bound<T> maxNumRecords(int maxNumRecords) {
       /**
        * Sets the maximum duration during which records will be read from Pubsub.
        *
-       * <p> Setting either this or {@link #maxNumRecords} will cause the output {@code PCollection}
+       * <p>Setting either this or {@link #maxNumRecords} will cause the output {@code PCollection}
        * to be bounded.
        */
       public Bound<T> maxReadTime(Duration maxReadTime) {
@@ -722,7 +722,7 @@ public static Bound<String> idLabel(String idLabel) {
      * {@code Coder<T>} to encode each of the elements of the input
      * {@code PCollection<T>} into an output PubSub record.
      *
-     * <p> By default, uses {@link StringUtf8Coder}, which writes input
+     * <p>By default, uses {@link StringUtf8Coder}, which writes input
      * Java strings directly as records.
      *
      * @param <T> the type of the elements of the input PCollection
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
index b149530dff63b..b0eb49df02aa2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
@@ -33,8 +33,8 @@
 
 /**
  * A {@link PTransform} for reading from a {@link Source}.
- * <p>
- * Usage example:
+ *
+ * <p>Usage example:
  * <pre>
  * Pipeline p = Pipeline.create();
  * p.apply(Read.from(new MySource().withFoo("foo").withBar("bar"))
@@ -109,7 +109,7 @@ private Bounded(@Nullable String name, BoundedSource<T> source) {
      * Returns a new {@code Bounded} {@code PTransform} that's like this one but
      * has the given name.
      *
-     * <p> Does not modify this object.
+     * <p>Does not modify this object.
      */
     public Bounded<T> named(String name) {
       return new Bounded<T>(name, source);
@@ -176,7 +176,7 @@ private Unbounded(@Nullable String name, UnboundedSource<T, ?> source) {
      * Returns a new {@code Unbounded} {@code PTransform} that's like this one but
      * has the given name.
      *
-     * <p> Does not modify this object.
+     * <p>Does not modify this object.
      */
     public Unbounded<T> named(String name) {
       return new Unbounded<T>(name, source);
@@ -187,7 +187,7 @@ public Unbounded<T> named(String name) {
      * of data from the given {@link UnboundedSource}.  The bound is specified as a number
      * of records to read.
      *
-     * <p> This may take a long time to execute if the splits of this source are slow to read
+     * <p>This may take a long time to execute if the splits of this source are slow to read
      * records.
      */
     public BoundedReadFromUnboundedSource<T> withMaxNumRecords(long maxNumRecords) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ShardNameTemplate.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ShardNameTemplate.java
index 930a9596f8e39..7270012768090 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ShardNameTemplate.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ShardNameTemplate.java
@@ -19,24 +19,24 @@
 /**
  * Standard shard naming templates.
  *
- * <p> Shard naming templates are strings that may contain placeholders for
+ * <p>Shard naming templates are strings that may contain placeholders for
  * the shard number and shard count.  When constructing a filename for a
  * particular shard number, the upper-case letters 'S' and 'N' are replaced
  * with the 0-padded shard number and shard count respectively.
  *
- * <p> Left-padding of the numbers enables lexicographical sorting of the
+ * <p>Left-padding of the numbers enables lexicographical sorting of the
  * resulting filenames.  If the shard number or count are too large for the
  * space provided in the template, then the result may no longer sort
  * lexicographically.  For example, a shard template of "S-of-N", for 200
  * shards, will result in outputs named "0-of-200", ... '10-of-200',
  * '100-of-200", etc.
  *
- * <p> Shard numbers start with 0, so the last shard number is the shard count
+ * <p>Shard numbers start with 0, so the last shard number is the shard count
  * minus one.  For example, the template "-SSSSS-of-NNNNN" will be
  * instantiated as "-00000-of-01000" for the first shard (shard 0) of a
  * 1000-way sharded output.
  *
- * <p> A shard name template is typically provided along with a name prefix
+ * <p>A shard name template is typically provided along with a name prefix
  * and suffix, which allows constructing complex paths that have embedded
  * shard information.  For example, outputs in the form
  * "gs://bucket/path-01-of-99.txt" could be constructed by providing the
@@ -49,11 +49,11 @@
  *                   .withSuffix(".txt"))
  * }</pre>
  *
- * <p> In the example above, you could make parts of the output configurable
+ * <p>In the example above, you could make parts of the output configurable
  * by users without the user having to specify all components of the output
  * name.
  *
- * <p> If a shard name template does not contain any repeating 'S', then
+ * <p>If a shard name template does not contain any repeating 'S', then
  * the output shard count must be 1, as otherwise the same filename would be
  * generated for multiple shards.
  */
@@ -61,7 +61,7 @@ public class ShardNameTemplate {
   /**
    * Shard name containing the index and max.
    *
-   * <p> Eg: [prefix]-00000-of-00100[suffix] and
+   * <p>Eg: [prefix]-00000-of-00100[suffix] and
    * [prefix]-00001-of-00100[suffix]
    */
   public static final String INDEX_OF_MAX = "-SSSSS-of-NNNNN";
@@ -69,7 +69,7 @@ public class ShardNameTemplate {
   /**
    * Shard is a file within a directory.
    *
-   * <p> Eg: [prefix]/part-00000[suffix] and [prefix]/part-00001[suffix]
+   * <p>Eg: [prefix]/part-00000[suffix] and [prefix]/part-00001[suffix]
    */
   public static final String DIRECTORY_CONTAINER = "/part-SSSSS";
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
index 158af9f253990..54237c4dd7c4a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
@@ -28,11 +28,11 @@
 /**
  * Base class for defining input formats and creating a {@code Source} for reading the input.
  *
- * <p> This class is not intended to be subclassed directly. Instead, to define
+ * <p>This class is not intended to be subclassed directly. Instead, to define
  * a bounded source (a source which produces a finite amount of input), subclass
  * {@link BoundedSource}; to define an unbounded source, subclass {@link UnboundedSource}.
  *
- * <p> A {@code Source} passed to a {@code Read} transform must be
+ * <p>A {@code Source} passed to a {@code Read} transform must be
  * {@code Serializable}.  This allows the {@code Source} instance
  * created in this "main program" to be sent (in serialized form) to
  * remote worker machines and reconstituted for each batch of elements
@@ -41,11 +41,11 @@
  * non-transient instance variable state will be serialized in the main program
  * and then deserialized on remote worker machines.
  *
- * <p> {@code Source} classes MUST be effectively immutable. The only acceptable use of
+ * <p>{@code Source} classes MUST be effectively immutable. The only acceptable use of
  * mutable fields is to cache the results of expensive operations, and such fields MUST be
  * marked {@code transient}.
  *
- * <p> {@code Source} objects should override {@link Object#toString}, as it will be
+ * <p>{@code Source} objects should override {@link Object#toString}, as it will be
  * used in important error and debugging messages.
  *
  * @param <T> Type of elements read by the source.
@@ -69,16 +69,15 @@ public abstract class Source<T> implements Serializable {
 
   /**
    * The interface that readers of custom input sources must implement.
-   * <p>
-   * This interface is deliberately distinct from {@link java.util.Iterator} because
+   *
+   * <p>This interface is deliberately distinct from {@link java.util.Iterator} because
    * the current model tends to be easier to program and more efficient in practice
    * for iterating over sources such as files, databases etc. (rather than pure collections).
    *
-   * <p>
-   * {@code Reader} implementations do not need to be thread-safe; they may only be accessed
+   * <p>{@code Reader} implementations do not need to be thread-safe; they may only be accessed
    * by a single thread at once.
    *
-   * <p> Callers of {@code Readers} must obey the following access pattern:
+   * <p>Callers of {@code Readers} must obey the following access pattern:
    * <ul>
    * <li> One call to {@link Reader#start}
    * <ul><li>If {@link Reader#start} returned true, any number of calls to {@code getCurrent}*
@@ -89,8 +88,7 @@ public abstract class Source<T> implements Serializable {
    *   methods</ul>
    * </ul>
    *
-   * <p>
-   * For example, if the reader is reading a fixed set of data:
+   * <p>For example, if the reader is reading a fixed set of data:
    * <pre>
    * for (boolean available = reader.start(); available; available = reader.advance()) {
    *   T item = reader.getCurrent();
@@ -99,7 +97,7 @@ public abstract class Source<T> implements Serializable {
    * }
    * </pre>
    *
-   * <p> If the set of data being read is continually growing:
+   * <p>If the set of data being read is continually growing:
    * <pre>
    * boolean available = reader.start();
    * while (true) {
@@ -115,14 +113,13 @@ public abstract class Source<T> implements Serializable {
    * }
    * </pre>
    *
-   * <p>
-   * Note: this interface is a work-in-progress and may change.
+   * <p>Note: this interface is a work-in-progress and may change.
    */
   public abstract static class Reader<T> implements AutoCloseable {
     /**
      * Initializes the reader and advances the reader to the first record.
      *
-     * <p> This method should be called exactly once. The invocation should occur prior to calling
+     * <p>This method should be called exactly once. The invocation should occur prior to calling
      * {@link #advance} or {@link #getCurrent}. This method may perform expensive operations that
      * are needed to initialize the reader.
      *
@@ -142,7 +139,7 @@ public abstract static class Reader<T> implements AutoCloseable {
      * {@link #advance} call. The returned value must be effectively immutable and remain valid
      * indefinitely.
      *
-     * <p> Multiple calls to this method without an intervening call to {@link #advance} should
+     * <p>Multiple calls to this method without an intervening call to {@link #advance} should
      * return the same result.
      *
      * @throws java.util.NoSuchElementException if the reader is at the beginning of the input and
@@ -153,11 +150,11 @@ public abstract static class Reader<T> implements AutoCloseable {
 
     /**
      * Returns the timestamp associated with the current data item.
-     * <p>
-     * If the source does not support timestamps, this should return
+     *
+     * <p>If the source does not support timestamps, this should return
      * {@code BoundedWindow.TIMESTAMP_MIN_VALUE}.
      *
-     * <p> Multiple calls to this method without an intervening call to {@link #advance} should
+     * <p>Multiple calls to this method without an intervening call to {@link #advance} should
      * return the same result.
      *
      * @throws NoSuchElementException if the reader is at the beginning of the input and
@@ -175,8 +172,8 @@ public abstract static class Reader<T> implements AutoCloseable {
     /**
      * Returns a {@code Source} describing the same input that this {@code Reader} reads
      * (including items already read).
-     * <p>
-     * A reader created from the result of {@code getCurrentSource}, if consumed, MUST
+     *
+     * <p>A reader created from the result of {@code getCurrentSource}, if consumed, MUST
      * return the same data items as the current reader.
      */
     public abstract Source<T> getCurrentSource();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 0aa69ed9776af..c3f7f4ae3fd11 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -49,7 +49,7 @@
 /**
  * {@link PTransform}s for reading and writing text files.
  *
- * <p> To read a {@link PCollection} from one or more text files, use
+ * <p>To read a {@link PCollection} from one or more text files, use
  * {@link TextIO.Read}, specifying {@link TextIO.Read#from} to specify
  * the path of the file(s) to read from (e.g., a local filename or
  * filename pattern if running locally, or a Google Cloud Storage
@@ -74,7 +74,7 @@
  *                        .withCoder(TextualIntegerCoder.of()));
  * } </pre>
  *
- * <p> To write a {@link PCollection} to one or more text files, use
+ * <p>To write a {@link PCollection} to one or more text files, use
  * {@link TextIO.Write}, specifying {@link TextIO.Write#to} to specify
  * the path of the file to write to (e.g., a local filename or sharded
  * filename pattern if running locally, or a Google Cloud Storage
@@ -84,10 +84,10 @@
  * and/or {@link TextIO.Write#withCoder} to specify the Coder to use
  * to encode the Java values into text lines.
  *
- * <p> Any existing files with the same names as generated output files
+ * <p>Any existing files with the same names as generated output files
  * will be overwritten.
  *
- * <p> For example:
+ * <p>For example:
  * <pre> {@code
  * // A simple Write to a local file (only runs locally):
  * PCollection<String> lines = ...;
@@ -144,7 +144,7 @@ public static Bound<String> from(String filepattern) {
      * {@code Coder<T>} to decode each of the lines of the file into a
      * value of type {@code T}.
      *
-     * <p> By default, uses {@link StringUtf8Coder}, which just
+     * <p>By default, uses {@link StringUtf8Coder}, which just
      * returns the text lines as Java strings.
      *
      * @param <T> the type of the decoded elements, and the elements
@@ -158,7 +158,7 @@ public static <T> Bound<T> withCoder(Coder<T> coder) {
      * Returns a TextIO.Read PTransform that has GCS path validation on
      * pipeline creation disabled.
      *
-     * <p> This can be useful in the case where the GCS input does not
+     * <p>This can be useful in the case where the GCS input does not
      * exist at the pipeline creation time, but is expected to be
      * available at execution time.
      */
@@ -170,7 +170,7 @@ public static Bound<String> withoutValidation() {
      * Returns a TextIO.Read PTransform that reads from a file with the
      * specified compression type.
      *
-     * <p> If no compression type is specified, the default is AUTO. In this
+     * <p>If no compression type is specified, the default is AUTO. In this
      * mode, the compression type of the file is determined by its extension
      * (e.g., *.gz is gzipped, *.bz2 is bzipped, all other extensions are
      * uncompressed).
@@ -256,7 +256,7 @@ public <X> Bound<X> withCoder(Coder<X> coder) {
        * that has GCS path validation on pipeline creation disabled.
        * Does not modify this object.
        *
-       * <p> This can be useful in the case where the GCS input does not
+       * <p>This can be useful in the case where the GCS input does not
        * exist at the pipeline creation time, but is expected to be
        * available at execution time.
        */
@@ -269,13 +269,13 @@ public Bound<T> withoutValidation() {
        * reads from input sources using the specified compression type.
        * Does not modify this object.
        *
-       * <p> If AUTO compression type is specified, a compression type is
+       * <p>If AUTO compression type is specified, a compression type is
        * selected on a per-file basis, based on the file's extension (e.g.,
        * .gz will be processed as a gzipped file, .bz will be processed
        * as a bzipped file, other extensions with be treated as uncompressed
        * input).
        *
-       * <p> If no compression type is specified, the default is AUTO.
+       * <p>If no compression type is specified, the default is AUTO.
        */
       public Bound<T> withCompressionType(TextIO.CompressionType compressionType) {
         return new Bound<>(name, filepattern, coder, validate, compressionType);
@@ -349,7 +349,7 @@ public static Bound<String> named(String name) {
      * the form {@code "gs://<bucket>/<filepath>"})
      * (if running locally or via the Google Cloud Dataflow service).
      *
-     * <p> The files written will begin with this prefix, followed by
+     * <p>The files written will begin with this prefix, followed by
      * a shard identifier (see {@link Bound#withNumShards}, and end
      * in a common extension, if given by {@link Bound#withSuffix}.
      */
@@ -368,7 +368,7 @@ public static Bound<String> withSuffix(String nameExtension) {
     /**
      * Returns a TextIO.Write PTransform that uses the provided shard count.
      *
-     * <p> Constraining the number of shards is likely to reduce
+     * <p>Constraining the number of shards is likely to reduce
      * the performance of a pipeline.  Setting this value is not recommended
      * unless you require a specific number of output files.
      *
@@ -383,7 +383,7 @@ public static Bound<String> withNumShards(int numShards) {
      * Returns a TextIO.Write PTransform that uses the given shard name
      * template.
      *
-     * <p> See {@link ShardNameTemplate} for a description of shard templates.
+     * <p>See {@link ShardNameTemplate} for a description of shard templates.
      */
     public static Bound<String> withShardNameTemplate(String shardTemplate) {
       return new Bound<>(DEFAULT_TEXT_CODER).withShardNameTemplate(shardTemplate);
@@ -402,7 +402,7 @@ public static Bound<String> withoutSharding() {
      * {@code Coder<T>} to encode each of the elements of the input
      * {@code PCollection<T>} into an output text line.
      *
-     * <p> By default, uses {@link StringUtf8Coder}, which writes input
+     * <p>By default, uses {@link StringUtf8Coder}, which writes input
      * Java strings directly as output lines.
      *
      * @param <T> the type of the elements of the input PCollection
@@ -415,7 +415,7 @@ public static <T> Bound<T> withCoder(Coder<T> coder) {
      * Returns a TextIO.Write PTransform that has GCS path validation on
      * pipeline creation disabled.
      *
-     * <p> This can be useful in the case where the GCS output location does
+     * <p>This can be useful in the case where the GCS output location does
      * not exist at the pipeline creation time, but is expected to be available
      * at execution time.
      */
@@ -481,9 +481,9 @@ public Bound<T> named(String name) {
        * Returns a new TextIO.Write PTransform that's like this one but
        * that writes to the file(s) with the given filename prefix.
        *
-       * <p> See {@link Write#to(String) Write.to(String)} for more information.
+       * <p>See {@link Write#to(String) Write.to(String)} for more information.
        *
-       * <p> Does not modify this object.
+       * <p>Does not modify this object.
        */
       public Bound<T> to(String filenamePrefix) {
         validateOutputComponent(filenamePrefix);
@@ -495,7 +495,7 @@ public Bound<T> to(String filenamePrefix) {
        * Returns a new TextIO.Write PTransform that's like this one but
        * that writes to the file(s) with the given filename suffix.
        *
-       * <p> Does not modify this object.
+       * <p>Does not modify this object.
        *
        * @see ShardNameTemplate
        */
@@ -509,11 +509,11 @@ public Bound<T> withSuffix(String nameExtension) {
        * Returns a new TextIO.Write PTransform that's like this one but
        * that uses the provided shard count.
        *
-       * <p> Constraining the number of shards is likely to reduce
+       * <p>Constraining the number of shards is likely to reduce
        * the performance of a pipeline.  Setting this value is not recommended
        * unless you require a specific number of output files.
        *
-       * <p> Does not modify this object.
+       * <p>Does not modify this object.
        *
        * @param numShards the number of shards to use, or 0 to let the system
        *                  decide.
@@ -529,7 +529,7 @@ public Bound<T> withNumShards(int numShards) {
        * Returns a new TextIO.Write PTransform that's like this one but
        * that uses the given shard name template.
        *
-       * <p> Does not modify this object.
+       * <p>Does not modify this object.
        *
        * @see ShardNameTemplate
        */
@@ -542,10 +542,10 @@ public Bound<T> withShardNameTemplate(String shardTemplate) {
        * Returns a new TextIO.Write PTransform that's like this one but
        * that forces a single file as output.
        *
-       * <p> This is a shortcut for
+       * <p>This is a shortcut for
        * {@code .withNumShards(1).withShardNameTemplate("")}
        *
-       * <p> Does not modify this object.
+       * <p>Does not modify this object.
        */
       public Bound<T> withoutSharding() {
         return new Bound<>(name, filenamePrefix, filenameSuffix, coder, 1, "", validate);
@@ -569,7 +569,7 @@ public <X> Bound<X> withCoder(Coder<X> coder) {
        * that has GCS output path validation on pipeline creation disabled.
        * Does not modify this object.
        *
-       * <p> This can be useful in the case where the GCS output location does
+       * <p>This can be useful in the case where the GCS output location does
        * not exist at the pipeline creation time, but is expected to be
        * available at execution time.
        */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
index 06834f48151fe..baaccf49b6a76 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
@@ -40,7 +40,7 @@
  *   do not guarantee that a given record will only be read a single time.
  * </ul>
  *
- * <p> See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window} and
+ * <p>See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window} and
  * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger} for more information on
  * timestamps and watermarks.
  *
@@ -56,16 +56,16 @@ public abstract class UnboundedSource<
    * that should be used when executing the workflow.  Each split should return a separate partition
    * of the input data.
    *
-   * <p> For example, for a source reading from a growing directory of files, each split
+   * <p>For example, for a source reading from a growing directory of files, each split
    * could correspond to a prefix of file names.
    *
-   * <p> Some sources are not splittable, such as reading from a single TCP stream.  In that
+   * <p>Some sources are not splittable, such as reading from a single TCP stream.  In that
    * case, only a single split should be returned.
    *
-   * <p> Some data sources automatically partition their data among readers.  For these types of
+   * <p>Some data sources automatically partition their data among readers.  For these types of
    * inputs, {@code n} identical replicas of the top-level source can be returned.
    *
-   * <p> The size of the returned list should be as close to {@code desiredNumSplits}
+   * <p>The size of the returned list should be as close to {@code desiredNumSplits}
    * as possible, but does not have to match exactly.  A low number of splits
    * will limit the amount of parallelism in the source.
    */
@@ -89,7 +89,7 @@ public abstract UnboundedReader<OutputT> createReader(
   /**
    * Returns whether this source requires explicit deduping.
    *
-   * <p> This is needed if the underlying data source can return the same record multiple times,
+   * <p>This is needed if the underlying data source can return the same record multiple times,
    * such a queuing system with a pull-ack model.  Sources where the records read are uniquely
    * identified by the persisted state in the CheckpointMark do not need this.
    */
@@ -100,17 +100,17 @@ public boolean requiresDeduping() {
   /**
    * A marker representing the progress and state of an {@link UnboundedReader}.
    *
-   * <p> For example, this could be offsets in a set of files being read.
+   * <p>For example, this could be offsets in a set of files being read.
    */
   public interface CheckpointMark {
     /**
      * Perform any finalization that needs to happen after a bundle of data read from
      * the source has been processed and committed.
      *
-     * <p> For example, this could be sending acknowledgement requests to an external
+     * <p>For example, this could be sending acknowledgement requests to an external
      * data source such as Pub/Sub.
      *
-     * <p> This may be called from any thread, potentially at the same time as calls to the
+     * <p>This may be called from any thread, potentially at the same time as calls to the
      * {@code UnboundedReader} that created it.
      */
     void finalizeCheckpoint() throws IOException;
@@ -119,18 +119,18 @@ public interface CheckpointMark {
   /**
    * A {@code Reader} that reads an unbounded amount of input.
    *
-   * <p> A given {@code UnboundedReader} object will only be accessed by a single thread at once.
+   * <p>A given {@code UnboundedReader} object will only be accessed by a single thread at once.
    */
   @Experimental(Experimental.Kind.SOURCE_SINK)
   public abstract static class UnboundedReader<OutputT> extends Source.Reader<OutputT> {
     /**
      * Initializes the reader and advances the reader to the first record.
      *
-     * <p> This method should be called exactly once. The invocation should occur prior to calling
+     * <p>This method should be called exactly once. The invocation should occur prior to calling
      * {@link #advance} or {@link #getCurrent}. This method may perform expensive operations that
      * are needed to initialize the reader.
      *
-     * <p> Returns {@code true} if a record was read, {@code false} if there is no more input
+     * <p>Returns {@code true} if a record was read, {@code false} if there is no more input
      * currently available.  Future calls to {@link #advance} may return {@code true} once more data
      * is available. Regardless of the return value of {@code start}, {@code start} will not be
      * called again on the same {@code UnboundedReader} object; it will only be called again when a
@@ -142,7 +142,7 @@ public abstract static class UnboundedReader<OutputT> extends Source.Reader<Outp
     /**
      * Advances the reader to the next valid record.
      *
-     * <p> Returns {@code true} if a record was read, {@code false} if there is no more input
+     * <p>Returns {@code true} if a record was read, {@code false} if there is no more input
      * available. Future calls to {@link #advance} may return {@code true} once more data is
      * available.
      */
@@ -153,11 +153,11 @@ public abstract static class UnboundedReader<OutputT> extends Source.Reader<Outp
      * Returns a unique identifier for the current record.  This should be the same for each
      * instance of the same logical record read from the underlying data source.
      *
-     * <p> For example, this could be a hash of the record contents, or a logical ID present in
+     * <p>For example, this could be a hash of the record contents, or a logical ID present in
      * the record.  If this is generated as a hash of the record contents, it should be at least 16
      * bytes (128 bits) to avoid collisions.
      *
-     * <p> This method has the same restrictions on when it can be called as {@link #getCurrent} and
+     * <p>This method has the same restrictions on when it can be called as {@link #getCurrent} and
      * {@link #getCurrentTimestamp}.
      *
      * @throws NoSuchElementException if the reader is at the beginning of the input and
@@ -169,23 +169,23 @@ public abstract static class UnboundedReader<OutputT> extends Source.Reader<Outp
     /**
      * Returns a lower bound on timestamps of future elements read by this reader.
      *
-     * <p> This can be approximate.  If records are read that violate this guarantee, they will be
+     * <p>This can be approximate.  If records are read that violate this guarantee, they will be
      * considered late, which will affect how they will be processed.  See
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window} for more information on
      * late data and how to handle it.
      *
-     * <p> This bound should be as tight as possible.  Downstream windows will not be able to close
+     * <p>This bound should be as tight as possible.  Downstream windows will not be able to close
      * until this watermark passes the end of the window.
      *
-     * <p> For example, a source may know that the records it reads will be in timestamp order.  In
+     * <p>For example, a source may know that the records it reads will be in timestamp order.  In
      * this case, the watermark can be the timestamp of the last record read minus one.  For a
      * source that does not have natural timestamps, timestamps can be set to the time of
      * reading, in which case the watermark is the current clock time.
      *
-     * <p> See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window} and
+     * <p>See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window} and
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger} for more
      * information on timestamps and watermarks.
-     * <p> May be called after {@link #advance} or {@link #start} has returned false, but not before
+     * <p>May be called after {@link #advance} or {@link #start} has returned false, but not before
      * {@link #start} has been called.
      */
     public abstract Instant getWatermark();
@@ -193,14 +193,14 @@ public abstract static class UnboundedReader<OutputT> extends Source.Reader<Outp
     /**
      * Returns a {@link CheckpointMark} representing the progress of this {@code UnboundedReader}.
      *
-     * <p> The elements read up until this is called will be processed together as a bundle. Once
+     * <p>The elements read up until this is called will be processed together as a bundle. Once
      * the result of this processing has been durably committed,
      * {@link CheckpointMark#finalizeCheckpoint} will be called on the {@link CheckpointMark}
      * object.
      *
-     * <p> The returned object should not be modified.
+     * <p>The returned object should not be modified.
      *
-     * <p> May be called after {@link #advance} or {@link #start} has returned false, but not before
+     * <p>May be called after {@link #advance} or {@link #start} has returned false, but not before
      * {@link #start} has been called.
      */
     public abstract CheckpointMark getCheckpointMark();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
index 67a5242c13938..49e004a65f0ab 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
@@ -92,8 +92,6 @@
  *
  * <p>The following will produce XML output with a root element named "words" from a PCollection of
  * WordFrequency objects:
- *
- * <p>
  * <pre>
  * p.apply(Write.to(
  *  XmlSink.ofRecordClass(WordFrequency.class)
@@ -102,7 +100,6 @@
  * </pre>
  *
  * <p>The output of which will look like:
- *
  * <pre>
  * {@code
  * <words>
@@ -129,8 +126,7 @@
  *
  *  ...
  * </words>
- * }
- * </pre>
+ * }</pre>
  */
 // JAVADOCSTYLE ON
 @SuppressWarnings("checkstyle:javadocstyle")
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
index 1e067248cff51..4883517639310 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
@@ -53,7 +53,7 @@
  * created by passing an {@code XmlSource} object to {@code Read.from()}. Please note the
  * example given below.
  *
- * <p> The XML file must be of the following form where root and record are XML element names that
+ * <p>The XML file must be of the following form where root and record are XML element names that
  * are defined by the user. Root is the name of the root element of the XML document.
  *
  * <pre>
@@ -68,7 +68,7 @@
  * }
  * </pre>
  *
- * <p> Basically the XML document should contain a set of record elements where a record may contain
+ * <p>Basically the XML document should contain a set of record elements where a record may contain
  * arbitrary XML content. Root and/or record elements may additionally contain an arbitrary number
  * of XML attributes. Users must provide the name of the root element and record
  * element when creating the source. Additionally users must provide a class of a JAXB annotated
@@ -87,10 +87,10 @@
  * }
  * </pre>
  *
- * <p> Currently only XML files that use character encoding UTF-8 are supported. Using a file that
+ * <p>Currently only XML files that use character encoding UTF-8 are supported. Using a file that
  * has a different character encoding may result in loss of data.
  *
- * <p> To use {@code XmlSource}, explicitly declare dependencies on following two jars from Woodstox
+ * <p>To use {@code XmlSource}, explicitly declare dependencies on following two jars from Woodstox
  * StAX XML parser.
  * (1) stax2-api-3.1.1.jar
  * (2) woodstox-core-asl-4.1.2.jar
@@ -227,8 +227,8 @@ public Class<T> getRecordClass() {
   /**
    * A {@link Source.Reader} for reading JAXB annotated Java objects from an XML file. The XML
    * file should be of the form defined at {@link XmlSource}.
-   * <p>
-   * Timestamped values are currently unsupported - all values implicitly have the timestamp
+   *
+   * <p>Timestamped values are currently unsupported - all values implicitly have the timestamp
    * of {@code BoundedWindow.TIMESTAMP_MIN_VALUE}.
    *
    * @param <T> Type of objects that will be read by the reader.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/OffsetRangeTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/OffsetRangeTracker.java
index f4fb729b75670..b237217496059 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/OffsetRangeTracker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/OffsetRangeTracker.java
@@ -36,7 +36,7 @@ public class OffsetRangeTracker implements RangeTracker<Long> {
    * Offset corresponding to infinity. This can only be used as the upper-bound of a range, and
    * indicates reading all of the records until the end without specifying exactly what the end is.
    *
-   * <p> Infinite ranges cannot be split because it is impossible to estimate progress within them.
+   * <p>Infinite ranges cannot be split because it is impossible to estimate progress within them.
    */
   public static final long OFFSET_INFINITY = Long.MAX_VALUE;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java
index 7fa19144e571d..60d62d3754895 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java
@@ -22,8 +22,8 @@
 public interface ApplicationNameOptions extends PipelineOptions {
   /**
    * Name of application, for display purposes.
-   * <p>
-   * Defaults to the name of the class that constructs the {@link PipelineOptions}
+   *
+   * <p>Defaults to the name of the class that constructs the {@link PipelineOptions}
    * via the {@link PipelineOptionsFactory}.
    */
   @Description("Name of application for display purposes. Defaults to the name of the class that "
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
index e4f0c3d897dd4..cf65e8d654173 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
@@ -42,10 +42,10 @@ public interface DataflowPipelineDebugOptions extends PipelineOptions {
   /**
    * The list of backend experiments to enable.
    *
-   * <p> Dataflow provides a number of experimental features that can be enabled
+   * <p>Dataflow provides a number of experimental features that can be enabled
    * with this flag.
    *
-   * <p> Please sync with the Dataflow team before enabling any experiments.
+   * <p>Please sync with the Dataflow team before enabling any experiments.
    */
   @Description("[Experimental] Dataflow provides a number of experimental features that can "
       + "be enabled with this flag. Please sync with the Dataflow team before enabling any "
@@ -69,10 +69,10 @@ public interface DataflowPipelineDebugOptions extends PipelineOptions {
   /**
    * Dataflow endpoint to use.
    *
-   * <p> Defaults to the current version of the Google Cloud Dataflow
+   * <p>Defaults to the current version of the Google Cloud Dataflow
    * API, at the time the current SDK version was released.
    *
-   * <p> If the string contains "://", then this is treated as a url,
+   * <p>If the string contains "://", then this is treated as a url,
    * otherwise {@link #getApiRootUrl()} is used as the root
    * url.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
index b49fdfe3b0326..889cf31b20dd5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
@@ -45,10 +45,10 @@ public interface DataflowPipelineOptions extends
 
   /**
    * GCS path for temporary files, e.g. gs://bucket/object
-   * <p>
-   * Must be a valid Cloud Storage url, beginning with the prefix "gs://"
-   * <p>
-   * At least one of {@link #getTempLocation()} or {@link #getStagingLocation()} must be set. If
+   *
+   * <p>Must be a valid Cloud Storage url, beginning with the prefix "gs://"
+   *
+   * <p>At least one of {@link #getTempLocation()} or {@link #getStagingLocation()} must be set. If
    * {@link #getTempLocation()} is not set, then the Dataflow pipeline defaults to using
    * {@link #getStagingLocation()}.
    */
@@ -62,10 +62,10 @@ public interface DataflowPipelineOptions extends
 
   /**
    * GCS path for staging local files, e.g. gs://bucket/object
-   * <p>
-   * Must be a valid Cloud Storage url, beginning with the prefix "gs://"
-   * <p>
-   * At least one of {@link #getTempLocation()} or {@link #getStagingLocation()} must be set. If
+   *
+   * <p>Must be a valid Cloud Storage url, beginning with the prefix "gs://"
+   *
+   * <p>At least one of {@link #getTempLocation()} or {@link #getStagingLocation()} must be set. If
    * {@link #getTempLocation()} is not set, then the Dataflow pipeline defaults to using
    * {@link #getStagingLocation()}.
    */
@@ -94,9 +94,9 @@ public interface DataflowPipelineOptions extends
    * local system user name (if available), and the current time. The normalization makes sure that
    * the job name matches the required pattern of [a-z]([-a-z0-9]*[a-z0-9])? and length limit of 40
    * characters.
-   * <p>
-   * This job name factory is only able to generate one unique name per second per application and
-   * user combination.
+   *
+   * <p>This job name factory is only able to generate one unique name per second per application
+   * and user combination.
    */
   public static class JobNameFactory implements DefaultValueFactory<String> {
     private static final DateTimeFormatter FORMATTER =
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index c29f016e3167a..85b0b5eedc2ed 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -99,7 +99,7 @@ public String getAlgorithm() {
    * GCE <a href="https://cloud.google.com/compute/docs/networking">network</a> for launching
    * workers.
    *
-   * <p> Default is up to the Dataflow service.
+   * <p>Default is up to the Dataflow service.
    */
   @Description("GCE network for launching workers. Default is up to the Dataflow service.")
   String getNetwork();
@@ -109,7 +109,7 @@ public String getAlgorithm() {
    * GCE <a href="https://developers.google.com/compute/docs/zones"
    * >availability zone</a> for launching workers.
    *
-   * <p> Default is up to the Dataflow service.
+   * <p>Default is up to the Dataflow service.
    */
   @Description("GCE availability zone for launching workers. "
       + "Default is up to the Dataflow service.")
@@ -118,11 +118,11 @@ public String getAlgorithm() {
 
   /**
    * Machine type to create Dataflow worker VMs as.
-   * <p>
-   * See <a href="https://cloud.google.com/compute/docs/machine-types">GCE machine types</a>
+   *
+   * <p>See <a href="https://cloud.google.com/compute/docs/machine-types">GCE machine types</a>
    * for a list of valid options.
-   * <p>
-   * If unset, the Dataflow service will choose a reasonable default.
+   *
+   * <p>If unset, the Dataflow service will choose a reasonable default.
    */
   @Description("Machine type to create Dataflow worker VMs as. See "
       + "https://cloud.google.com/compute/docs/machine-types for a list of valid options. "
@@ -151,7 +151,7 @@ public String getTeardownPolicyName() {
   /**
    * The teardown policy for the VMs.
    *
-   * <p> By default this is left unset and the service sets the default policy.
+   * <p>By default this is left unset and the service sets the default policy.
    */
   @Description("The teardown policy for the VMs. By default this is left unset "
       + "and the service sets the default policy.")
@@ -160,10 +160,10 @@ public String getTeardownPolicyName() {
 
   /**
    * List of local files to make available to workers.
-   * <p>
-   * Files are placed on the worker's classpath.
-   * <p>
-   * The default value is the list of jars from the main program's classpath.
+   *
+   * <p>Files are placed on the worker's classpath.
+   *
+   * <p>The default value is the list of jars from the main program's classpath.
    */
   @Description("Files to stage on GCS and make available to workers. "
       + "Files are placed on the worker's classpath. "
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java
index 6e9fad030ec92..e4b1d725701ed 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java
@@ -41,7 +41,7 @@ public interface DataflowWorkerHarnessOptions extends DataflowPipelineOptions {
   /**
    * The size of the worker's in-memory cache, in megabytes.
    *
-   * <p> Currently, this cache is used for storing read values of side inputs.
+   * <p>Currently, this cache is used for storing read values of side inputs.
    */
   @Description("The size of the worker's in-memory cache, in megabytes.")
   @Default.Integer(100)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
index cbac6e3dd1fdc..98abe457342e8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
@@ -45,10 +45,10 @@ public enum Level {
 
   /**
    * This option controls the log levels for specifically named loggers.
-   * <p>
-   * Later options with equivalent names override earlier options.
-   * <p>
-   * See {@link WorkerLogLevelOverrides} for more information on how to configure logging
+   *
+   * <p>Later options with equivalent names override earlier options.
+   *
+   * <p>See {@link WorkerLogLevelOverrides} for more information on how to configure logging
    * on a per {@link Class}, {@link Package}, or name basis. If used from the command line,
    * the expected format is {"Name":"Level",...}, further details on
    * {@link WorkerLogLevelOverrides#from}.
@@ -68,7 +68,7 @@ public enum Level {
   /**
    * Defines a log level override for a specific class, package, or name.
    *
-   * <p> {@code java.util.logging} is used on the Dataflow worker harness and supports
+   * <p>{@code java.util.logging} is used on the Dataflow worker harness and supports
    * a logging hierarchy based off of names that are "." separated. It is a common
    * pattern to have the logger for a given class share the same name as the class itself.
    * Given the classes {@code a.b.c.Foo}, {@code a.b.c.Xyz}, and {@code a.b.Bar}, with
@@ -89,8 +89,8 @@ public static class WorkerLogLevelOverrides extends HashMap<String, Level> {
 
     /**
      * Overrides the default log level for the passed in class.
-     * <p>
-     * This is equivalent to calling
+     *
+     * <p>This is equivalent to calling
      * {@link #addOverrideForName(String, DataflowWorkerLoggingOptions.Level)}
      * and passing in the {@link Class#getName() class name}.
      */
@@ -102,8 +102,8 @@ public WorkerLogLevelOverrides addOverrideForClass(Class<?> klass, Level level)
 
     /**
      * Overrides the default log level for the passed in package.
-     * <p>
-     * This is equivalent to calling
+     *
+     * <p>This is equivalent to calling
      * {@link #addOverrideForName(String, DataflowWorkerLoggingOptions.Level)}
      * and passing in the {@link Package#getName() package name}.
      */
@@ -115,8 +115,8 @@ public WorkerLogLevelOverrides addOverrideForPackage(Package pkg, Level level) {
 
     /**
      * Overrides the default log level for the passed in name.
-     * <p>
-     * Note that because of the hierarchical nature of logger names, this will
+     *
+     * <p>Note that because of the hierarchical nature of logger names, this will
      * override the log level of all loggers that have the passed in name or
      * a parent logger that has the passed in name.
      */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java
index 22463ec9f9d6b..07f03b9381697 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java
@@ -129,8 +129,8 @@
   /**
    * Value must be of type {@link DefaultValueFactory} and have a default constructor.
    * Value is instantiated and then used as a type factory to generate the default.
-   * <p>
-   * See {@link DefaultValueFactory} for more details.
+   *
+   * <p>See {@link DefaultValueFactory} for more details.
    */
   @Target(ElementType.METHOD)
   @Retention(RetentionPolicy.RUNTIME)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java
index d02f00f5e51bf..fecc65088612a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java
@@ -21,8 +21,8 @@
  * be an instance factory to produce default values for a given getter on {@link PipelineOptions}.
  * When a property on a {@link PipelineOptions} is fetched, and is currently unset, the default
  * value factory will be instantiated and invoked.
- * <p>
- * Care must be taken to not produce an infinite loop when accessing other fields on the
+ *
+ * <p>Care must be taken to not produce an infinite loop when accessing other fields on the
  * {@link PipelineOptions} object.
  *
  * @param <T> The type of object this factory produces.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
index 3c61ef7342741..bd2928a74e0a8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
@@ -39,9 +39,9 @@
 
 /**
  * Options used to configure Google Cloud Platform project and credentials.
- * <p>
- * These options configure which of the following 4 different mechanisms for obtaining a credential
- * are used:
+ *
+ * <p>These options configure which of the following 4 different mechanisms for obtaining a
+ * credential are used:
  * <ol>
  *   <li>
  *     It can fetch the
@@ -82,8 +82,8 @@ public interface GcpOptions extends GoogleApiDebugOptions, PipelineOptions {
   /**
    * This option controls which file to use when attempting to create the credentials using the
    * service account method.
-   * <p>
-   * This option if specified, needs be combined with the
+   *
+   * <p>This option if specified, needs be combined with the
    * {@link GcpOptions#getServiceAccountName() serviceAccountName}.
    */
   @JsonIgnore
@@ -96,8 +96,8 @@ public interface GcpOptions extends GoogleApiDebugOptions, PipelineOptions {
   /**
    * This option controls which service account to use when attempting to create the credentials
    * using the service account method.
-   * <p>
-   * This option if specified, needs be combined with the
+   *
+   * <p>This option if specified, needs be combined with the
    * {@link GcpOptions#getServiceAccountKeyfile() serviceAccountKeyfile}.
    */
   @JsonIgnore
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
index 447808768842b..075a9c6c48b8f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
@@ -40,19 +40,19 @@
  * to create custom configuration options specific to your {@link Pipeline},
  * for both local execution and execution via {@link PipelineRunner}.
  *
- * <p> {@link PipelineOptions} and their subinterfaces represent a collection of properties
+ * <p>{@link PipelineOptions} and their subinterfaces represent a collection of properties
  * which can be manipulated in a type safe manner. {@link PipelineOptions} is backed by a
  * dynamic {@link Proxy} which allows for type safe manipulation of properties in an extensible
  * fashion through plain old Java interfaces.
  *
- * <p> {@link PipelineOptions} can be created with {@link PipelineOptionsFactory#create()}
+ * <p>{@link PipelineOptions} can be created with {@link PipelineOptionsFactory#create()}
  * and {@link PipelineOptionsFactory#as(Class)}. They can be created
  * from command-line arguments with {@link PipelineOptionsFactory#fromArgs(String[])}.
  * They can be converted to another type by invoking {@link PipelineOptions#as(Class)} and
  * can be accessed from within a {@link DoFn} by invoking
  * {@link Context#getPipelineOptions()}.
  *
- * <p> For example:
+ * <p>For example:
  * <pre> {@code
  * // The most common way to construct PipelineOptions is via command-line argument parsing:
  * public static void main(String[] args) {
@@ -94,13 +94,13 @@
  * By having PipelineOptionsFactory as your command-line interpreter, you will provide
  * a standardized way for users to interact with your application via the command-line.
  *
- * <p> To define your own {@link PipelineOptions}, you create an interface which
+ * <p>To define your own {@link PipelineOptions}, you create an interface which
  * extends {@link PipelineOptions} and define getter/setter pairs. These
  * getter/setter pairs define a collection of
  * <a href="https://docs.oracle.com/javase/tutorial/javabeans/writing/properties.html">
  * JavaBean properties</a>.
  *
- * <p> For example:
+ * <p>For example:
  * <pre> {@code
  *  // Creates a user defined property called "myProperty"
  *  public interface MyOptions extends PipelineOptions {
@@ -109,7 +109,7 @@
  *  }
  * } </pre>
  *
- * <p> Note: Please see the section on Registration below when using custom property types.
+ * <p>Note: Please see the section on Registration below when using custom property types.
  *
  * <h3>Restrictions</h3>
  *
@@ -134,20 +134,20 @@
  * with useful information which is output when {@code --help}
  * is invoked via {@link PipelineOptionsFactory#fromArgs(String[])}.
  *
- * <p> {@link Default @Default} represents a set of annotations that can be used to annotate getter
+ * <p>{@link Default @Default} represents a set of annotations that can be used to annotate getter
  * properties on {@link PipelineOptions} with information representing the default value to be
  * returned if no value is specified.
  *
- * <p> {@link Hidden @Hidden} hides an option from being listed when {@code --help}
+ * <p>{@link Hidden @Hidden} hides an option from being listed when {@code --help}
  * is invoked via {@link PipelineOptionsFactory#fromArgs(String[])}.
  *
- * <p> {@link Validation @Validation} represents a set of annotations that can be used to annotate
+ * <p>{@link Validation @Validation} represents a set of annotations that can be used to annotate
  * getter properties on {@link PipelineOptions} with information representing the validation
  * criteria to be used when validating with the {@link PipelineOptionsValidator}. Validation
  * will be performed if during construction of the {@link PipelineOptions},
  * {@link PipelineOptionsFactory#withValidation()} is invoked.
  *
- * <p> {@link JsonIgnore @JsonIgnore} is used to prevent a property from being serialized and
+ * <p>{@link JsonIgnore @JsonIgnore} is used to prevent a property from being serialized and
  * available during execution of {@link DoFn}. See the Serialization section below for more
  * details.
  *
@@ -159,15 +159,15 @@
  * also lists the registered {@link PipelineOptions} when {@code --help}
  * is invoked via {@link PipelineOptionsFactory#fromArgs(String[])}.
  *
- * <p> Registration can be performed by invoking {@link PipelineOptionsFactory#register} within
+ * <p>Registration can be performed by invoking {@link PipelineOptionsFactory#register} within
  * a users application or via automatic registration by creating a {@link ServiceLoader} entry
  * and a concrete implementation of the {@link PipelineOptionsRegistrar} interface.
  *
- * <p> It is optional but recommended to use one of the many build time tools such as
+ * <p>It is optional but recommended to use one of the many build time tools such as
  * {@link com.google.auto.service.AutoService} to generate the necessary META-INF
  * files automatically.
  *
- * <p> A list of registered options can be fetched from
+ * <p>A list of registered options can be fetched from
  * {@link PipelineOptionsFactory#getRegisteredOptions()}.
  *
  * <h2>Serialization Of PipelineOptions</h2>
@@ -177,14 +177,14 @@
  * {@link ObjectMapper} or the getter method for the property annotated with
  * {@link JsonIgnore @JsonIgnore}.
  *
- * <p> Jackson supports serialization of many types and supports a useful set of
+ * <p>Jackson supports serialization of many types and supports a useful set of
  * <a href="https://github.com/FasterXML/jackson-annotations">annotations</a> to aid in
  * serialization of custom types. We point you to the public
  * <a href="https://github.com/FasterXML/jackson">Jackson documentation</a> when attempting
  * to add serialization support for your custom types. See {@link GoogleApiTracer} for an
  * example using the Jackson annotations to serialize and deserialize a custom type.
  *
- * <p> Note: It is an error to have the same property available in multiple interfaces with only
+ * <p>Note: It is an error to have the same property available in multiple interfaces with only
  * some of them being annotated with {@link JsonIgnore @JsonIgnore}. It is also an error to mark a
  * setter for a property with {@link JsonIgnore @JsonIgnore}.
  */
@@ -195,8 +195,8 @@ public interface PipelineOptions {
   /**
    * Transforms this object into an object of type {@code <T>} saving each property
    * that has been manipulated. {@code <T>} must extend {@link PipelineOptions}.
-   * <p>
-   * If {@code <T>} is not registered with the {@link PipelineOptionsFactory}, then we
+   *
+   * <p>If {@code <T>} is not registered with the {@link PipelineOptionsFactory}, then we
    * attempt to verify that {@code <T>} is composable with every interface that this
    * instance of the {@code PipelineOptions} has seen.
    *
@@ -208,8 +208,8 @@ public interface PipelineOptions {
   /**
    * Makes a deep clone of this object, and transforms the cloned object into the specified
    * type {@code kls}. See {@link #as} for more information about the conversion.
-   * <p>
-   * Properties that are marked with {@code @JsonIgnore} will not be cloned.
+   *
+   * <p>Properties that are marked with {@code @JsonIgnore} will not be cloned.
    */
   <T extends PipelineOptions> T cloneAs(Class<T> kls);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index aaacc5e27b685..563762fa662de 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -91,8 +91,8 @@
  *   <li>The derived interface of {@link PipelineOptions} must be composable with every interface
  *       registered with this factory.
  * </ul>
- * <p>
- * See the <a
+ *
+ * <p>See the <a
  * href="http://www.oracle.com/technetwork/java/javase/documentation/spec-136004.html">JavaBeans
  * specification</a> for more details as to what constitutes a property.
  */
@@ -113,7 +113,7 @@ public static PipelineOptions create() {
    * This sets the {@link ApplicationNameOptions#getAppName() "appName"} to the calling
    * {@link Class#getSimpleName() classes simple name}.
    *
-   * <p> Note that {@code <T>} must be composable with every registered interface with this factory.
+   * <p>Note that {@code <T>} must be composable with every registered interface with this factory.
    * See {@link PipelineOptionsFactory#validateWellFormed(Class, Set)} for more details.
    *
    * @return An object that implements {@code <T>}.
@@ -124,8 +124,8 @@ public static <T extends PipelineOptions> T as(Class<T> klass) {
 
   /**
    * Sets the command line arguments to parse when constructing the {@link PipelineOptions}.
-   * <p>
-   * Example GNU style command line arguments:
+   *
+   * <p>Example GNU style command line arguments:
    * <pre>
    *   --project=MyProject (simple property, will set the "project" property to "MyProject")
    *   --readOnly=true (for boolean properties, will set the "readOnly" property to "true")
@@ -134,23 +134,23 @@ public static <T extends PipelineOptions> T as(Class<T> klass) {
    *   --x=1,2,3 (shorthand list style property, will set the "x" property to [1, 2, 3])
    *   --complexObject='{"key1":"value1",...} (JSON format for all other complex types)
    * </pre>
-   * <p>
-   * Simple properties are able to bound to {@link String}, {@link Class}, enums and Java
+   *
+   * <p>Simple properties are able to bound to {@link String}, {@link Class}, enums and Java
    * primitives {@code boolean}, {@code byte}, {@code short}, {@code int}, {@code long},
    * {@code float}, {@code double} and their primitive wrapper classes.
-   * <p>
-   * Simple list style properties are able to be bound to {@code boolean[]}, {@code char[]},
+   *
+   * <p>Simple list style properties are able to be bound to {@code boolean[]}, {@code char[]},
    * {@code short[]}, {@code int[]}, {@code long[]}, {@code float[]}, {@code double[]},
    * {@code Class[]}, enum arrays, {@code String[]}, and {@code List<String>}.
-   * <p>
-   * JSON format is required for all other types.
-   * <p>
-   * By default, strict parsing is enabled and arguments must conform to be either
+   *
+   * <p>JSON format is required for all other types.
+   *
+   * <p>By default, strict parsing is enabled and arguments must conform to be either
    * {@code --booleanArgName} or {@code --argName=argValue}. Strict parsing can be disabled with
    * {@link Builder#withoutStrictParsing()}. Empty or null arguments will be ignored whether
    * or not strict parsing is enabled.
-   * <p>
-   * Help information can be output to {@link System#out} by specifying {@code --help} as an
+   *
+   * <p>Help information can be output to {@link System#out} by specifying {@code --help} as an
    * argument. After help is printed, the application will exit. Specifying only {@code --help}
    * will print out the list of
    * {@link PipelineOptionsFactory#getRegisteredOptions() registered options}
@@ -195,8 +195,8 @@ private Builder(String[] args, boolean validation,
 
     /**
      * Sets the command line arguments to parse when constructing the {@link PipelineOptions}.
-     * <p>
-     * Example GNU style command line arguments:
+     *
+     * <p>Example GNU style command line arguments:
      * <pre>
      *   --project=MyProject (simple property, will set the "project" property to "MyProject")
      *   --readOnly=true (for boolean properties, will set the "readOnly" property to "true")
@@ -205,23 +205,23 @@ private Builder(String[] args, boolean validation,
      *   --x=1,2,3 (shorthand list style property, will set the "x" property to [1, 2, 3])
      *   --complexObject='{"key1":"value1",...} (JSON format for all other complex types)
      * </pre>
-     * <p>
-     * Simple properties are able to bound to {@link String}, {@link Class}, enums and Java
+     *
+     * <p>Simple properties are able to bound to {@link String}, {@link Class}, enums and Java
      * primitives {@code boolean}, {@code byte}, {@code short}, {@code int}, {@code long},
      * {@code float}, {@code double} and their primitive wrapper classes.
-     * <p>
-     * Simple list style properties are able to be bound to {@code boolean[]}, {@code char[]},
+     *
+     * <p>Simple list style properties are able to be bound to {@code boolean[]}, {@code char[]},
      * {@code short[]}, {@code int[]}, {@code long[]}, {@code float[]}, {@code double[]},
      * {@code Class[]}, enum arrays, {@code String[]}, and {@code List<String>}.
-     * <p>
-     * JSON format is required for all other types.
-     * <p>
-     * By default, strict parsing is enabled and arguments must conform to be either
+     *
+     * <p>JSON format is required for all other types.
+     *
+     * <p>By default, strict parsing is enabled and arguments must conform to be either
      * {@code --booleanArgName} or {@code --argName=argValue}. Strict parsing can be disabled with
      * {@link Builder#withoutStrictParsing()}. Empty or null arguments will be ignored whether
      * or not strict parsing is enabled.
-     * <p>
-     * Help information can be output to {@link System#out} by specifying {@code --help} as an
+     *
+     * <p>Help information can be output to {@link System#out} by specifying {@code --help} as an
      * argument. After help is printed, the application will exit. Specifying only {@code --help}
      * will print out the list of
      * {@link PipelineOptionsFactory#getRegisteredOptions() registered options}
@@ -266,9 +266,10 @@ public PipelineOptions create() {
     /**
      * Creates and returns an object that implements {@code <T>} using the values configured on
      * this builder during construction.
-     * <p>
-     * Note that {@code <T>} must be composable with every registered interface with this factory.
-     * See {@link PipelineOptionsFactory#validateWellFormed(Class, Set)} for more details.
+     *
+     * <p>Note that {@code <T>} must be composable with every registered interface with this
+     * factory. See {@link PipelineOptionsFactory#validateWellFormed(Class, Set)} for more
+     * details.
      *
      * @return An object that implements {@code <T>}.
      */
@@ -306,8 +307,8 @@ public <T extends PipelineOptions> T as(Class<T> klass) {
    * {@link PipelineOptionsFactory#printHelp(PrintStream)} and
    * {@link PipelineOptionsFactory#printHelp(PrintStream, Class)} variant.
    * Prints to the specified {@link PrintStream}, and exits if requested.
-   * <p>
-   * Visible for testing.
+   *
+   * <p>Visible for testing.
    * {@code printStream} and {@code exit} used for testing.
    */
   @SuppressWarnings("unchecked")
@@ -865,8 +866,9 @@ public static DataflowWorkerHarnessOptions createFromSystemProperties() throws I
   /**
    * This method is meant to emulate the behavior of {@link Introspector#getBeanInfo(Class, int)}
    * to construct the list of {@link PropertyDescriptor}.
-   * <p>
-   * TODO: Swap back to using Introspector once the proxy class issue with AppEngine is resolved.
+   *
+   * <p>TODO: Swap back to using Introspector once the proxy class issue with AppEngine is
+   * resolved.
    */
   private static List<PropertyDescriptor> getPropertyDescriptors(Class<?> beanClass)
       throws IntrospectionException {
@@ -1165,7 +1167,7 @@ public boolean apply(Method input) {
   /**
    * Splits string arguments based upon expected pattern of --argName=value.
    *
-   * <p> Example GNU style command line arguments:
+   * <p>Example GNU style command line arguments:
    *
    * <pre>
    *   --project=MyProject (simple property, will set the "project" property to "MyProject")
@@ -1176,17 +1178,17 @@ public boolean apply(Method input) {
    *   --complexObject='{"key1":"value1",...} (JSON format for all other complex types)
    * </pre>
    *
-   * <p> Simple properties are able to bound to {@link String}, {@link Class}, enums and Java
+   * <p>Simple properties are able to bound to {@link String}, {@link Class}, enums and Java
    * primitives {@code boolean}, {@code byte}, {@code short}, {@code int}, {@code long},
    * {@code float}, {@code double} and their primitive wrapper classes.
    *
-   * <p> Simple list style properties are able to be bound to {@code boolean[]}, {@code char[]},
+   * <p>Simple list style properties are able to be bound to {@code boolean[]}, {@code char[]},
    * {@code short[]}, {@code int[]}, {@code long[]}, {@code float[]}, {@code double[]},
    * {@code Class[]}, enum arrays, {@code String[]}, and {@code List<String>}.
    *
-   * <p> JSON format is required for all other types.
+   * <p>JSON format is required for all other types.
    *
-   * <p> If strict parsing is enabled, options must start with '--', and not have an empty argument
+   * <p>If strict parsing is enabled, options must start with '--', and not have an empty argument
    * name or value based upon the positioning of the '='. Empty or null arguments will be ignored
    * whether or not strict parsing is enabled.
    */
@@ -1224,14 +1226,14 @@ private static ListMultimap<String, String> parseCommandLine(
   /**
    * Using the parsed string arguments, we convert the strings to the expected
    * return type of the methods that are found on the passed-in class.
-   * <p>
-   * For any return type that is expected to be an array or a collection, we further
+   *
+   * <p>For any return type that is expected to be an array or a collection, we further
    * split up each string on ','.
-   * <p>
-   * We special case the "runner" option. It is mapped to the class of the {@link PipelineRunner}
+   *
+   * <p>We special case the "runner" option. It is mapped to the class of the {@link PipelineRunner}
    * based off of the {@link PipelineRunner}s simple class name.
-   * <p>
-   * If strict parsing is enabled, unknown options or options that cannot be converted to
+   *
+   * <p>If strict parsing is enabled, unknown options or options that cannot be converted to
    * the expected java type using an {@link ObjectMapper} will be ignored.
    */
   private static <T extends PipelineOptions> Map<String, Object> parseObjects(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java
index d23fe682ddb14..05374ee005636 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java
@@ -22,12 +22,12 @@
  * {@link PipelineOptions} creators have the ability to automatically have their
  * {@link PipelineOptions} registered with this SDK by creating a {@link ServiceLoader} entry
  * and a concrete implementation of this interface.
- * <p>
- * Note that automatic registration of any
+ *
+ * <p>Note that automatic registration of any
  * {@link com.google.cloud.dataflow.sdk.options.PipelineOptions} requires users
  * conform to the limitations discussed on {@link PipelineOptionsFactory#register(Class)}.
- * <p>
- * It is optional but recommended to use one of the many build time tools such as
+ *
+ * <p>It is optional but recommended to use one of the many build time tools such as
  * {@link com.google.auto.service.AutoService} to generate the necessary META-INF
  * files automatically.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
index cfa923e0e4159..b5612c40a3252 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
@@ -35,8 +35,8 @@ public class PipelineOptionsValidator {
   /**
    * Validates that the passed {@link PipelineOptions} conforms to all the validation criteria from
    * the passed in interface.
-   * <p>
-   * Note that the interface requested must conform to the validation criteria specified on
+   *
+   * <p>Note that the interface requested must conform to the validation criteria specified on
    * {@link PipelineOptions#as(Class)}.
    *
    * @param klass The interface to fetch validation criteria from.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
index 819df4b09f280..527f712ca49f2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
@@ -63,13 +63,13 @@
 /**
  * Represents and {@link InvocationHandler} for a {@link Proxy}. The invocation handler uses bean
  * introspection of the proxy class to store and retrieve values based off of the property name.
- * <p>
- * Unset properties use the {@code @Default} metadata on the getter to return values. If there
+ *
+ * <p>Unset properties use the {@code @Default} metadata on the getter to return values. If there
  * is no {@code @Default} annotation on the getter, then a <a
  * href="https://docs.oracle.com/javase/tutorial/java/nutsandbolts/datatypes.html">default</a> as
  * per the Java Language Specification for the expected return type is returned.
- * <p>
- * In addition to the getter/setter pairs, this proxy invocation handler supports
+ *
+ * <p>In addition to the getter/setter pairs, this proxy invocation handler supports
  * {@link Object#equals(Object)}, {@link Object#hashCode()}, {@link Object#toString()} and
  * {@link PipelineOptions#as(Class)}.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/package-info.java
index b56a5186e1522..cef995f115919 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/package-info.java
@@ -18,7 +18,7 @@
  * Defines {@link com.google.cloud.dataflow.sdk.options.PipelineOptions} for
  * configuring pipeline execution.
  *
- * <p> {@link com.google.cloud.dataflow.sdk.options.PipelineOptions} encapsulates the various
+ * <p>{@link com.google.cloud.dataflow.sdk.options.PipelineOptions} encapsulates the various
  * parameters that describe how a pipeline should be run. {@code PipelineOptions} are created
  * using a {@link com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory}.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java
index 73eeedff24ecb..5567f038ece3a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java
@@ -19,13 +19,13 @@
  * streaming parallel data processing
  * {@link com.google.cloud.dataflow.sdk.Pipeline}s.
  *
- * <p> To use the Google Cloud Dataflow SDK, you build a
+ * <p>To use the Google Cloud Dataflow SDK, you build a
  * {@link com.google.cloud.dataflow.sdk.Pipeline}, which manages a graph of
  * {@link com.google.cloud.dataflow.sdk.transforms.PTransform}s
  * and the {@link com.google.cloud.dataflow.sdk.values.PCollection}s that
  * the PTransforms consume and produce.
  *
- * <p> Each Pipeline has a
+ * <p>Each Pipeline has a
  * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner} to specify
  * where and how it should run after pipeline construction is complete.
  *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
index 8a751b7b7e0ad..a27402877d456 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
@@ -39,9 +39,9 @@
  * A {@link PipelineRunner} that's like {@link DataflowPipelineRunner}
  * but that waits for the launched job to finish.
  *
- * <p> Prints out job status updates and console messages while it waits.
+ * <p>Prints out job status updates and console messages while it waits.
  *
- * <p> Returns the final job state, or throws an exception if the job
+ * <p>Returns the final job state, or throws an exception if the job
  * fails or cannot be monitored.
  *
  * <p><h3>Permissions</h3>
@@ -50,7 +50,7 @@
  * engine service account of the GCP project running the Dataflow Job will need access to the
  * corresponding source/sink.
  *
- * <p> Please see <a href="https://cloud.google.com/dataflow/security-and-permissions">Google Cloud
+ * <p>Please see <a href="https://cloud.google.com/dataflow/security-and-permissions">Google Cloud
  * Dataflow Security and Permissions</a> for more details.
  */
 public class BlockingDataflowPipelineRunner extends
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java
index d1752cacd5d8d..5a78624f3c4e6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java
@@ -25,7 +25,7 @@
  * {@link DataflowPipelineJob} when it is
  * {@link com.google.cloud.dataflow.sdk.Pipeline#run()}.
  *
- * <p> This is not intended for use by users of Cloud Dataflow.
+ * <p>This is not intended for use by users of Cloud Dataflow.
  * Instead, use {@link Pipeline#create(PipelineOptions)} to initialize a
  * {@link Pipeline}.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 51f2ab25b46c4..f34983b717bbc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -118,7 +118,7 @@
  * service account of the GCP project running the Dataflow Job will need access to the corresponding
  * source/sink.
  *
- * <p> Please see <a href="https://cloud.google.com/dataflow/security-and-permissions">Google Cloud
+ * <p>Please see <a href="https://cloud.google.com/dataflow/security-and-permissions">Google Cloud
  * Dataflow Security and Permissions</a> for more details.
  */
 public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob> {
@@ -526,7 +526,7 @@ protected String getKindString() {
    * Specialized implementation for {@link Read.Unbounded} for the Dataflow runner in streaming
    * mode.
    *
-   * <p> In particular, if an UnboundedSource requires deduplication, then features of WindmillSink
+   * <p>In particular, if an UnboundedSource requires deduplication, then features of WindmillSink
    * are leveraged to do the deduplication.
    */
   private static class StreamingUnboundedRead<T> extends PTransform<PInput, PCollection<T>> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 47e5e0d9a5db9..2ce78f8eb391e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -114,7 +114,7 @@ public class DataflowPipelineTranslator {
    * A map from {@link PTransform} subclass to the corresponding
    * {@link TransformTranslator} to use to translate that transform.
    *
-   * <p> A static map that contains system-wide defaults.
+   * <p>A static map that contains system-wide defaults.
    */
   private static Map<Class, TransformTranslator> transformTranslators =
       new HashMap<>();
@@ -257,7 +257,7 @@ public interface TranslationContext {
      * Adds a pre-defined step to the Dataflow workflow. The given PTransform should be
      * consistent with the Step, in terms of input, output and coder types.
      *
-     * <p> This is a low-level operation, when using this method it is up to
+     * <p>This is a low-level operation, when using this method it is up to
      * the caller to ensure that names do not collide.
      */
     public void addStep(PTransform<?, ? extends PValue> transform, Step step);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 297a87e5d2edf..4da2bab9ae3e1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -73,7 +73,7 @@
  * Executes the operations in the pipeline directly, in this process, without
  * any optimization.  Useful for small local execution and tests.
  *
- * <p> Throws an exception from {@link #run} if execution fails.
+ * <p>Throws an exception from {@link #run} if execution fails.
  *
  * <p><h3>Permissions</h3>
  * When reading from a Dataflow source or writing to a Dataflow sink using
@@ -81,7 +81,7 @@
  * <a href="https://cloud.google.com/sdk/gcloud">gcloud</a> executable will need access to the
  * corresponding source/sink.
  *
- * <p> Please see <a href="https://cloud.google.com/dataflow/security-and-permissions">Google Cloud
+ * <p>Please see <a href="https://cloud.google.com/dataflow/security-and-permissions">Google Cloud
  * Dataflow Security and Permissions</a> for more details.
  */
 @SuppressWarnings({"rawtypes", "unchecked"})
@@ -98,7 +98,7 @@ public class DirectPipelineRunner
    * A map from PTransform class to the corresponding
    * TransformEvaluator to use to evaluate that transform.
    *
-   * <p> A static map that contains system-wide defaults.
+   * <p>A static map that contains system-wide defaults.
    */
   private static Map<Class, TransformEvaluator> defaultTransformEvaluators =
       new HashMap<>();
@@ -107,7 +107,7 @@ public class DirectPipelineRunner
    * A map from PTransform class to the corresponding
    * TransformEvaluator to use to evaluate that transform.
    *
-   * <p> An instance map that contains bindings for this DirectPipelineRunner.
+   * <p>An instance map that contains bindings for this DirectPipelineRunner.
    * Bindings in this map override those in the default map.
    */
   private Map<Class, TransformEvaluator> localTransformEvaluators =
@@ -186,9 +186,9 @@ public static DirectPipelineRunner createForTest() {
    * Enable runtime testing to verify that all functions and {@link Coder}
    * instances can be serialized.
    *
-   * <p> Enabled by default.
+   * <p>Enabled by default.
    *
-   * <p> This method modifies the {@code DirectPipelineRunner} instance and
+   * <p>This method modifies the {@code DirectPipelineRunner} instance and
    * returns itself.
    */
   public DirectPipelineRunner withSerializabilityTesting(boolean enable) {
@@ -199,9 +199,9 @@ public DirectPipelineRunner withSerializabilityTesting(boolean enable) {
   /**
    * Enable runtime testing to verify that all values can be encoded.
    *
-   * <p> Enabled by default.
+   * <p>Enabled by default.
    *
-   * <p> This method modifies the {@code DirectPipelineRunner} instance and
+   * <p>This method modifies the {@code DirectPipelineRunner} instance and
    * returns itself.
    */
   public DirectPipelineRunner withEncodabilityTesting(boolean enable) {
@@ -213,11 +213,11 @@ public DirectPipelineRunner withEncodabilityTesting(boolean enable) {
    * Enable runtime testing to verify that functions do not depend on order
    * of the elements.
    *
-   * <p> This is accomplished by randomizing the order of elements.
+   * <p>This is accomplished by randomizing the order of elements.
    *
-   * <p> Enabled by default.
+   * <p>Enabled by default.
    *
-   * <p> This method modifies the {@code DirectPipelineRunner} instance and
+   * <p>This method modifies the {@code DirectPipelineRunner} instance and
    * returns itself.
    */
   public DirectPipelineRunner withUnorderednessTesting(boolean enable) {
@@ -607,7 +607,7 @@ <T> List<T> randomizeIfUnordered(List<T> elements,
      * given Coder by encoding it and then decoding it, and returning
      * the result. Otherwise returns the argument unchanged.
      *
-     * <p> Error context is prefixed to any thrown exceptions.
+     * <p>Error context is prefixed to any thrown exceptions.
      */
     <T> T ensureSerializableByCoder(Coder<T> coder,
                                     T data, String errorContext);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java
index 9a2d3e29b47f6..26d8e1e66662c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java
@@ -66,7 +66,7 @@ public static PipelineRunner<? extends PipelineResult> fromOptions(PipelineOptio
   /**
    * Applies a transform to the given input, returning the output.
    *
-   * <p> The default implementation calls PTransform.apply(input), but can be overridden
+   * <p>The default implementation calls PTransform.apply(input), but can be overridden
    * to customize behavior for a particular runner.
    */
   public <OutputT extends POutput, InputT extends PInput> OutputT apply(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java
index 954bace372ce9..b6dce95d373c4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java
@@ -22,13 +22,13 @@
  * {@link PipelineRunner} creators have the ability to automatically have their
  * {@link PipelineRunner} registered with this SDK by creating a {@link ServiceLoader} entry
  * and a concrete implementation of this interface.
- * <p>
- * Note that automatic registration of any
+ *
+ * <p>Note that automatic registration of any
  * {@link com.google.cloud.dataflow.sdk.options.PipelineOptions} requires users
  * conform to the limit that each {@link PipelineRunner}'s
  * {@link Class#getSimpleName() simple name} must be unique.
- * <p>
- * It is optional but recommended to use one of the many build time tools such as
+ *
+ * <p>It is optional but recommended to use one of the many build time tools such as
  * {@link com.google.auto.service.AutoService} to generate the necessary
  * META-INF files automatically.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java
index b32668ba48ddd..ca02b39d1307a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java
@@ -27,7 +27,7 @@
  * Provides a simple {@link com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor}
  * that records the transformation tree.
  *
- * <p> Provided for internal unit tests.
+ * <p>Provided for internal unit tests.
  */
 public class RecordingPipelineVisitor implements Pipeline.PipelineVisitor {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java
index 037f9f51a78bf..e574f6b282220 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java
@@ -66,7 +66,7 @@ public void popNode() {
   /**
    * Adds an input to the given node.
    *
-   * <p> This forces the producing node to be finished.
+   * <p>This forces the producing node to be finished.
    */
   public void addInput(TransformTreeNode node, PInput input) {
     for (PValue i : input.expand()) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
index b5a70042d728c..2649458e347f9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
@@ -65,7 +65,7 @@ public class TransformTreeNode {
   /**
    * Creates a new TransformTreeNode with the given parent and transform.
    *
-   * <p> EnclosingNode and transform may both be null for
+   * <p>EnclosingNode and transform may both be null for
    * a root-level node, which holds all other nodes.
    *
    * @param enclosingNode the composite node containing this node
@@ -103,7 +103,7 @@ public TransformTreeNode getEnclosingNode() {
   /**
    * Adds a composite operation to the transform node.
    *
-   * <p> As soon as a node is added, the transform node is considered a
+   * <p>As soon as a node is added, the transform node is considered a
    * composite operation instead of a primitive transform.
    */
   public void addComposite(TransformTreeNode node) {
@@ -192,7 +192,7 @@ public Collection<? extends PValue> getExpandedOutputs() {
   /**
    * Visit the transform node.
    *
-   * <p> Provides an ordered visit of the input values, the primitive
+   * <p>Provides an ordered visit of the input values, the primitive
    * transform (or child nodes for composite transforms), then the
    * output values.
    */
@@ -230,7 +230,7 @@ public void visit(Pipeline.PipelineVisitor visitor,
   /**
    * Finish specifying a transform.
    *
-   * <p> All inputs are finished first, then the transform, then
+   * <p>All inputs are finished first, then the transform, then
    * all outputs.
    */
   public void finishSpecifying() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index 3e6063191f7bc..6a12185922b4d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -78,8 +78,8 @@
 
 /**
  * A helper class for supporting sources defined as {@code Source}.
- * <p>
- * Provides a bridge between the high-level {@code Source} API and the raw
+ *
+ * <p>Provides a bridge between the high-level {@code Source} API and the raw
  * API-level {@code SourceFormat} API, by encoding the serialized
  * {@code Source} in a parameter of the API {@code Source} message.
  */
@@ -118,6 +118,7 @@ public BoundedSourceSplit(BoundedSource<T> primary, BoundedSource<T> residual) {
       this.residual = residual;
     }
 
+    @Override
     public String toString() {
       return String.format("<primary: %s; residual: %s>", primary, residual);
     }
@@ -437,8 +438,9 @@ public static <T> void translateReadHelper(Source<T> source,
    * Adapter from the {@code Source.Reader} interface to {@code Iterator},
    * wrapping every value into the global window. Proper windowing will be assigned by the
    * subsequent Window transform.
-   * <p>
-   * TODO: Consider changing the API of Reader.ReaderIterator so this adapter wouldn't be needed.
+   *
+   * <p>TODO: Consider changing the API of Reader.ReaderIterator so this adapter wouldn't be
+   * needed.
    */
   private static class ReaderToIteratorAdapter<T> {
     private enum NextState {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java
index e708e05271284..5bea5a41d603b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java
@@ -24,7 +24,7 @@
  * ApplianceShuffleReader reads chunks of data from a shuffle dataset
  * for a position range.
  *
- * <p> It is a JNI wrapper of an equivalent C++ class.
+ * <p>It is a JNI wrapper of an equivalent C++ class.
  */
 @ThreadSafe
 public final class ApplianceShuffleReader implements ShuffleReader {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java
index f3de2e9f9303f..2e4601c844d1c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java
@@ -22,7 +22,7 @@
 /**
  * ApplianceShuffleWriter writes chunks of data to a shuffle dataset.
  *
- * <p> It is a JNI wrapper of an equivalent C++ class.
+ * <p>It is a JNI wrapper of an equivalent C++ class.
  */
 @ThreadSafe
 public final class ApplianceShuffleWriter implements ShuffleWriter {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
index 35fa251be1b6b..e3a1d576de874 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
@@ -33,7 +33,7 @@
  * Reader}s for sources lazily, i.e. only when elements from the particular {@code Reader} are about
  * to be read.
  *
- * <p> This class does does not cache {@code Reader}s and creates a new {@code Reader} every time a
+ * <p>This class does does not cache {@code Reader}s and creates a new {@code Reader} every time a
  * new {@code ReaderIterator} has to be created. Because of this, multiple iterators created using
  * the same {@code ConcatReader} will not be able to share any state between each other. This design
  * was chosen since keeping a large number of {@code Reader} objects alive within a single
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 0b896e263a2b4..4f5280fd05c66 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -126,7 +126,7 @@ public DataflowWorker(WorkUnitClient workUnitClient, DataflowWorkerHarnessOption
    * Gets WorkItem and performs it; returns true if work was
    * successfully completed.
    *
-   * <p> getAndPerformWork may throw if there is a failure of the
+   * <p>getAndPerformWork may throw if there is a failure of the
    * WorkUnitClient.
    */
   public boolean getAndPerformWork() throws IOException {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index edfe56bfa0ebf..cd713ddeb1745 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -60,13 +60,13 @@
 
 /**
  * This is a harness for executing WorkItem tasks in Java workers.
- * <p>
- * The worker fetches WorkItem units from the Dataflow Service.
+ *
+ * <p>The worker fetches WorkItem units from the Dataflow Service.
  * When the work is complete, the program sends results via the worker service API.
- * <p>
- * Returns status code 0 on successful completion, 1 on any uncaught failures.
- * <p>
- * TODO: add support for VM initialization via config.
+ *
+ * <p>Returns status code 0 on successful completion, 1 on any uncaught failures.
+ *
+ * <p>TODO: add support for VM initialization via config.
  * During initialization, we should take a configuration that specifies
  * an initialization function, allowing user code to run on VM startup.
  */
@@ -245,7 +245,7 @@ static DataflowWorkUnitClient fromOptions(DataflowWorkerHarnessOptions options)
     /**
      * Gets a {@link WorkItem} from the Dataflow service, or returns null if no work was found.
      *
-     * <p> If work is returned, the calling thread should call reportWorkItemStatus after completing
+     * <p>If work is returned, the calling thread should call reportWorkItemStatus after completing
      * it and before requesting another work item.
      */
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTracker.java
index 2db9abcd28ce6..55a9569d7339f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTracker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTracker.java
@@ -28,7 +28,7 @@
  * A {@link RangeTracker} for positions used by {@code GroupingShuffleReader}
  * ({@code ByteArrayShufflePosition}).
  *
- * <p> These positions roughly correspond to hashes of keys. In case of hash collisions,
+ * <p>These positions roughly correspond to hashes of keys. In case of hash collisions,
  * multiple groups can have the same position. In that case, the first group at a particular
  * position is considered a split point (because it is the first to be returned when reading
  * a position range starting at this position), others are not.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
index e9c97e347ab65..563d5e8ad4bff 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
@@ -25,10 +25,10 @@
 /**
  * Implements a ReaderIterator over a collection of inputs.
  *
- * <p> The sources are used sequentially, each consumed entirely before moving
+ * <p>The sources are used sequentially, each consumed entirely before moving
  * to the next source.
  *
- * <p> The input is lazily constructed by using the abstract method {@code open}
+ * <p>The input is lazily constructed by using the abstract method {@code open}
  * to create a source iterator for inputs on demand.  This allows the resources
  * to be produced lazily, as an open source iterator may consume process
  * resources such as file descriptors.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java
index 28b6eb79be824..37f8a1be2ef93 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java
@@ -29,8 +29,8 @@
  * lexicographically compared to yield the same comparison value that
  * would have been generated if the encoded items had been compared
  * one by one according to their type.
- * <p>
- * More precisely, suppose:
+ *
+ * <p>More precisely, suppose:
  * <ol>
  *  <li> byte array A is generated by encoding the sequence of items [A_1..A_n]
  *  <li> byte array B is generated by encoding the sequence of items [B_1..B_n]
@@ -41,8 +41,7 @@
  *    Comparing A vs. B lexicographically is the same as comparing
  *    the vectors [A_1..A_n] and [B_1..B_n] lexicographically.
  *
- * <p>
- * <b>This class is NOT thread safe.</b>
+ * <p><b>This class is NOT thread safe.</b>
  */
 public class OrderedCode {
   // We want to encode a few extra symbols in strings:
@@ -157,8 +156,7 @@ public OrderedCode(){
    * Creates OrderedCode from a given encoded byte array. Typically used at
    * decoding time.
    *
-   * <p>
-   * <b> For better performance, it uses the input array provided (not a copy).
+   * <p><b> For better performance, it uses the input array provided (not a copy).
    * Therefore the input array should not be modified.</b>
    */
   public OrderedCode(byte[] encodedByteArray) {
@@ -170,8 +168,7 @@ public OrderedCode(byte[] encodedByteArray) {
    * byte array, followed by a separator and appends the result to its
    * internal encoded byte array store.
    *
-   * <p>
-   * It works with the input array,
+   * <p>It works with the input array,
    * so the input array 'value' should not be modified till the method returns.
    *
    * @param value bytes to be written.
@@ -223,8 +220,8 @@ public void writeBytes(byte[] value) {
   /**
    * Encodes the long item, in big-endian format, and appends the result to its
    * internal encoded byte array store.
-   * <p>
-   * Note that the specified long is treated like a uint64, e.g.
+   *
+   * <p>Note that the specified long is treated like a uint64, e.g.
    * {@code new OrderedCode().writeNumIncreasing(-1L).getEncodedBytes()}
    * is greater than
    * {@code new OrderedCode().writeNumIncreasing(Long.MAX_VALUE).getEncodedBytes()}.
@@ -269,8 +266,8 @@ int getSignedEncodingLength(long n) {
   /**
    * Encodes the long item, in big-endian format, and appends the result to its
    * internal encoded byte array store.
-   * <p>
-   * Note that the specified long is treated like an int64, i.e.
+   *
+   * <p>Note that the specified long is treated like an int64, i.e.
    * {@code new OrderedCode().writeNumIncreasing(-1L).getEncodedBytes()}
    * is less than
    * {@code new OrderedCode().writeNumIncreasing(0L).getEncodedBytes()}.
@@ -321,8 +318,8 @@ public void writeInfinity() {
    * also can be used to write a fixed number of bytes that will be
    * read back using {@link #readBytes(int)}.
    *
-   * <p>
-   * It stores the input array in the store,
+   *
+   * <p>It stores the input array in the store,
    * so the input array 'value' should not be modified.
    *
    * @param value bytes to be written.
@@ -621,8 +618,8 @@ public byte[] readBytes(int len) {
    * Returns the encoded bytes that represent the current state of the
    * OrderedCode.
    *
-   * <p>
-   * <b> NOTE: This method returns OrederedCode's internal array (not a
+   *
+   * <p><b> NOTE: This method returns OrederedCode's internal array (not a
    * copy) for better performance. Therefore the returned array should not be
    * modified.</b>
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
index 4c77c1c2673d6..0252201ce564b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
@@ -35,7 +35,7 @@
 /**
  * Constructs a Reader from a Dataflow API Source definition.
  *
- * <p> A ReaderFactory concrete "subclass" should define a method with the
+ * <p>A ReaderFactory concrete "subclass" should define a method with the
  * following signature:
  * <pre> {@code
  * static SomeReaderSubclass<T> create(PipelineOptions, CloudObject,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
index 381174c60359b..6607271bb6f35 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
@@ -32,7 +32,7 @@
 /**
  * Constructs a Sink from a Dataflow service protocol Sink definition.
  *
- * <p> A SinkFactory concrete "subclass" should define a method with the
+ * <p>A SinkFactory concrete "subclass" should define a method with the
  * following signature:
  * <pre> {@code
  * static SomeSinkSubclass<T> create(PipelineOptions, CloudObject,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 6511a8e2794b7..1f7beddf08dc5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -759,7 +759,7 @@ private void reportPeriodicStats() {
   /**
    * Class representing the state of active work for a computation.
    *
-   * <p> This class is synchronized, but only used from the dispatch and commit threads, so should
+   * <p>This class is synchronized, but only used from the dispatch and commit threads, so should
    * not be heavily contended.  Still, blocking work should not be done by it.
    */
   static class ActiveWorkForComputation {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
index 3842438dcb715..f48183cc17880 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
@@ -64,7 +64,7 @@ private static byte[] getNewline() {
   /**
    * For testing only.
    *
-   * <p> Used by simple tests that write to a single unsharded file.
+   * <p>Used by simple tests that write to a single unsharded file.
    */
   public static <V> TextSink<WindowedValue<V>> createForTest(
       String filename,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandler.java
index 11b79f774e001..6387048aff855 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandler.java
@@ -120,11 +120,10 @@ public synchronized void publish(LogRecord record) {
 
   /**
    * Check if a LogRecord will be logged.
-   * <p>
-   * This method checks if the <tt>LogRecord</tt> has an appropriate level and
+   *
+   * <p>This method checks if the <tt>LogRecord</tt> has an appropriate level and
    * whether it satisfies any <tt>Filter</tt>.  It will also return false if
    * the handler has been closed, or the LogRecord is null.
-   * <p>
    */
   @Override
   public boolean isLoggable(LogRecord record) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
index 7a03f9d36f7c1..4f0db99e6fff7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
@@ -46,9 +46,9 @@
 /**
  * Properties for use in {@link Coder} tests. These are implemented with junit assertions
  * rather than as predicates for the sake of error messages.
- * <p>
- * We serialize and deserialize the coder to make sure that any state information required by the
- * coder is preserved. This causes tests written such that coders that lose information during
+ *
+ * <p>We serialize and deserialize the coder to make sure that any state information required by
+ * the coder is preserved. This causes tests written such that coders that lose information during
  * serialization or change state during encoding/decoding will fail.
  */
 public class CoderProperties {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index e8940f8a6e1f9..c5dc59d02a7c9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -66,7 +66,7 @@
  * <p>Note that the {@code DataflowAssert} call must precede the call
  * to {@link Pipeline#run}.
  *
- * <p> Examples of use:
+ * <p>Examples of use:
  * <pre>{@code
  * Pipeline p = TestPipeline.create();
  * ...
@@ -163,7 +163,7 @@ public static <T> SingletonAssert<T> thatSingleton(PCollection<T> actual) {
   /**
    * Constructs a {@link SingletonAssert} for the value of the provided {@link PCollection}.
    *
-   * <p> Note that the actual value must be coded by a {@link KvCoder},
+   * <p>Note that the actual value must be coded by a {@link KvCoder},
    * not just any {@code Coder<K, V>}.
    */
   public static <K, V> SingletonAssert<Map<K, Iterable<V>>>
@@ -181,7 +181,7 @@ public static <T> SingletonAssert<T> thatSingleton(PCollection<T> actual) {
    * Constructs a {@link SingletonAssert} for the value of the provided {@link PCollection},
    * which must have at most one value per key.
    *
-   * <p> Note that the actual value must be coded by a {@link KvCoder},
+   * <p>Note that the actual value must be coded by a {@link KvCoder},
    * not just any {@code Coder<K, V>}.
    */
   public static <K, V> SingletonAssert<Map<K, V>> thatMap(PCollection<KV<K, V>> actual) {
@@ -217,7 +217,7 @@ protected IterableAssert(
      * Sets the coder to use for elements of type {@code T}, as needed
      * for internal purposes.
      *
-     * <p> Returns this {@code IterableAssert}.
+     * <p>Returns this {@code IterableAssert}.
      */
     public IterableAssert<T> setCoder(Coder<T> coderOrNull) {
       this.coder = Optional.fromNullable(coderOrNull);
@@ -241,7 +241,7 @@ public Coder<T> getCoder() {
     /**
      * Applies a {@link SerializableFunction} to check the elements of the {@code Iterable}.
      *
-     * <p> Returns this {@code IterableAssert}.
+     * <p>Returns this {@code IterableAssert}.
      */
     public IterableAssert<T> satisfies(SerializableFunction<Iterable<T>, Void> checkerFn) {
       pipeline.apply(
@@ -253,7 +253,7 @@ public IterableAssert<T> satisfies(SerializableFunction<Iterable<T>, Void> check
     /**
      * Applies a {@link SerializableFunction} to check the elements of the {@code Iterable}.
      *
-     * <p> Returns this {@code IterableAssert}.
+     * <p>Returns this {@code IterableAssert}.
      */
     public IterableAssert<T> satisfies(
         AssertRelation<Iterable<T>, Iterable<T>> relation,
@@ -271,7 +271,7 @@ public IterableAssert<T> satisfies(
      * Checks that the {@code Iterable} contains the expected elements, in any
      * order.
      *
-     * <p> Returns this {@code IterableAssert}.
+     * <p>Returns this {@code IterableAssert}.
      */
     public IterableAssert<T> containsInAnyOrder(Iterable<T> expectedElements) {
       return satisfies(new AssertContainsInAnyOrderRelation<T>(), expectedElements);
@@ -281,7 +281,7 @@ public IterableAssert<T> containsInAnyOrder(Iterable<T> expectedElements) {
      * Checks that the {@code Iterable} contains the expected elements, in any
      * order.
      *
-     * <p> Returns this {@code IterableAssert}.
+     * <p>Returns this {@code IterableAssert}.
      */
     @SafeVarargs
     public final IterableAssert<T> containsInAnyOrder(T... expectedElements) {
@@ -313,7 +313,7 @@ protected SingletonAssert(
      * Sets the coder to use for elements of type {@code T}, as needed
      * for internal purposes.
      *
-     * <p> Returns this {@code IterableAssert}.
+     * <p>Returns this {@code IterableAssert}.
      */
     public SingletonAssert<T> setCoder(Coder<T> coderOrNull) {
       this.coder = Optional.fromNullable(coderOrNull);
@@ -337,7 +337,7 @@ public Coder<T> getCoder() {
      * Applies a {@link SerializableFunction} to check the value of this
      * {@code SingletonAssert}'s view.
      *
-     * <p> Returns this {@code SingletonAssert}.
+     * <p>Returns this {@code SingletonAssert}.
      */
     public SingletonAssert<T> satisfies(SerializableFunction<T, Void> checkerFn) {
       pipeline.apply(
@@ -350,7 +350,7 @@ public SingletonAssert<T> satisfies(SerializableFunction<T, Void> checkerFn) {
      * Applies an {@link AssertRelation} to check the provided relation against the
      * value of this assert and the provided expected value.
      *
-     * <p> Returns this {@code SingletonAssert}.
+     * <p>Returns this {@code SingletonAssert}.
      */
     public SingletonAssert<T> satisfies(
         AssertRelation<T, T> relation,
@@ -369,7 +369,7 @@ public SingletonAssert<T> satisfies(
      * Checks that the value of this {@code SingletonAssert}'s view is equal
      * to the expected value.
      *
-     * <p> Returns this {@code SingletonAssert}.
+     * <p>Returns this {@code SingletonAssert}.
      */
     public SingletonAssert<T> isEqualTo(T expectedValue) {
       return satisfies(new AssertIsEqualToRelation<T>(), expectedValue);
@@ -460,11 +460,11 @@ public PCollectionView<T> apply(PBegin input) {
    * and an assertion over {@code ActualT}, and checks it within a dataflow
    * pipeline.
    *
-   * <p> Note that the entire assertion must be serializable. If
+   * <p>Note that the entire assertion must be serializable. If
    * you need to make assertions involving multiple inputs
    * that are each not serializable, use TwoSideInputAssert.
    *
-   * <p> This is generally useful for assertion functions that
+   * <p>This is generally useful for assertion functions that
    * are serializable but whose underlying data may not have a coder.
    */
   static class OneSideInputAssert<ActualT>
@@ -532,7 +532,7 @@ public void processElement(ProcessContext c) {
    * over {@code A} and {@code B}, and checks that the relation holds
    * within a dataflow pipeline.
    *
-   * <p> This is useful when either/both of {@code A} and {@code B}
+   * <p>This is useful when either/both of {@code A} and {@code B}
    * are not serializable, but have coders (provided
    * by the underlying {@link PCollection}s).
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
index b674136bbacc5..b77b2555ba990 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
@@ -39,11 +39,11 @@
  * A creator of test pipelines that can be used inside of tests that can be
  * configured to run locally or against the live service.
  *
- * <p> It is recommended to tag hand-selected tests for this purpose using the
+ * <p>It is recommended to tag hand-selected tests for this purpose using the
  * RunnableOnService Category annotation, as each test run against the service
  * will spin up and tear down a single VM.
  *
- * <p> In order to run tests on the dataflow pipeline service, the following
+ * <p>In order to run tests on the dataflow pipeline service, the following
  * conditions must be met:
  * <ul>
  * <li> runIntegrationTestOnService System property must be set to true.
@@ -52,7 +52,7 @@
  * <li> Jars containing the SDK and test classes must be added to the test classpath.
  * </ul>
  *
- * <p> Use {@link DataflowAssert} for tests, as it integrates with this test
+ * <p>Use {@link DataflowAssert} for tests, as it integrates with this test
  * harness in both direct and remote execution modes.  For example:
  *
  * <pre>{@code
@@ -73,7 +73,7 @@ public class TestPipeline extends Pipeline {
   /**
    * Creates and returns a new test pipeline.
    *
-   * <p> Use {@link DataflowAssert} to add tests, then call
+   * <p>Use {@link DataflowAssert} to add tests, then call
    * {@link Pipeline#run} to execute the pipeline and check the tests.
    */
   public static TestPipeline create() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
index c52580bdbe870..300e1c5ff11e4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
@@ -206,7 +206,7 @@ public static <T, W extends BoundedWindow> void validateNonInterferingOutputTime
    * verifies that result of {@code windowFn.getOutputTimestamp} for later windows (as defined by
    * {@code maxTimestamp} won't prevent the watermark from passing the end of earlier windows.
    *
-   * <p> This verifies that overlapping windows don't interfere at all. Depending on the
+   * <p>This verifies that overlapping windows don't interfere at all. Depending on the
    * {@code windowFn} this may be stricter than desired.
    */
   public static <T, W extends BoundedWindow> void validateGetOutputTimestamp(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
index 5c98df88b96ee..71f330c3521c1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
@@ -22,17 +22,17 @@
  * An {@code Aggregator<InputT>} enables monitoring of values of type {@code InputT},
  * to be combined across all bundles.
  *
- * <p> Aggregators are created by calling {@link DoFn#createAggregator},
+ * <p>Aggregators are created by calling {@link DoFn#createAggregator},
  * typically from the {@link DoFn} constructor. Elements can be added to the
  * {@code Aggregator} by calling {@link Aggregator#addValue}.
  *
- * <p> Aggregators are visible in the monitoring UI, when the pipeline is run
+ * <p>Aggregators are visible in the monitoring UI, when the pipeline is run
  * using DataflowPipelineRunner or BlockingDataflowPipelineRunner, along with
  * their current value. Aggregators may not become visible until the system
  * begins executing the ParDo transform that created them and/or their initial
  * value is changed.
  *
- * <p> Example:
+ * <p>Example:
  * <pre> {@code
  * class MyDoFn extends DoFn<String, String> {
  *   private Aggregator<Integer, Integer> myAggregator;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
index 595914e0b8690..2179df3a453fe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
@@ -63,20 +63,20 @@ public class ApproximateQuantiles {
    * of the input {@code PCollection}.  This gives an idea of the
    * distribution of the input elements.
    *
-   * <p> The computed {@code List} is of size {@code numQuantiles},
+   * <p>The computed {@code List} is of size {@code numQuantiles},
    * and contains the input elements' minimum value,
    * {@code numQuantiles-2} intermediate values, and maximum value, in
    * sorted order, using the given {@code Comparator} to order values.
    * To compute traditional {@code N}-tiles, one should use
    * {@code ApproximateQuantiles.globally(compareFn, N+1)}.
    *
-   * <p> If there are fewer input elements than {@code numQuantiles},
+   * <p>If there are fewer input elements than {@code numQuantiles},
    * then the result {@code List} will contain all the input elements,
    * in sorted order.
    *
-   * <p> The argument {@code Comparator} must be {@code Serializable}.
+   * <p>The argument {@code Comparator} must be {@code Serializable}.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<String> pc = ...;
    * PCollection<List<String>> quantiles =
@@ -119,27 +119,27 @@ PTransform<PCollection<T>, PCollection<List<T>>> globally(int numQuantiles) {
    * input {@code PCollection}.  This gives an idea of the
    * distribution of the input values for each key.
    *
-   * <p> Each of the computed {@code List}s is of size {@code numQuantiles},
+   * <p>Each of the computed {@code List}s is of size {@code numQuantiles},
    * and contains the input values' minimum value,
    * {@code numQuantiles-2} intermediate values, and maximum value, in
    * sorted order, using the given {@code Comparator} to order values.
    * To compute traditional {@code N}-tiles, one should use
    * {@code ApproximateQuantiles.perKey(compareFn, N+1)}.
    *
-   * <p> If a key has fewer than {@code numQuantiles} values
+   * <p>If a key has fewer than {@code numQuantiles} values
    * associated with it, then that key's output {@code List} will
    * contain all the key's input values, in sorted order.
    *
-   * <p> The argument {@code Comparator} must be {@code Serializable}.
+   * <p>The argument {@code Comparator} must be {@code Serializable}.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<KV<Integer, String>> pc = ...;
    * PCollection<KV<Integer, List<String>>> quantilesPerKey =
    *     pc.apply(ApproximateQuantiles.<Integer, String>perKey(stringCompareFn, 11));
    * } </pre>
    *
-   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
    *
    * @param <K> the type of the keys in the input and output
    *        {@code PCollection}s
@@ -187,14 +187,14 @@ PTransform<PCollection<T>, PCollection<List<T>>> globally(int numQuantiles) {
    * {@code N}-tiles, one should use
    * {@code ApproximateQuantilesCombineFn#create(N+1)}.
    *
-   * <p> If there are fewer values to combine than
+   * <p>If there are fewer values to combine than
    * {@code numQuantiles}, then the result {@code List} will contain all the
    * values being combined, in sorted order.
    *
-   * <p> Values are ordered using either a specified
+   * <p>Values are ordered using either a specified
    * {@code Comparator} or the values' natural ordering.
    *
-   * <p> To evaluate the quantiles we use the "New Algorithm" described here:
+   * <p>To evaluate the quantiles we use the "New Algorithm" described here:
    * <pre>
    *   [MRL98] Manku, Rajagopalan &amp; Lindsay, "Approximate Medians and other
    *   Quantiles in One Pass and with Limited Memory", Proc. 1998 ACM
@@ -202,8 +202,8 @@ PTransform<PCollection<T>, PCollection<List<T>>> globally(int numQuantiles) {
    *   http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.6.6513&amp;rep=rep1&amp;type=pdf
    * </pre>
    *
-   * <p> The default error bound is {@code 1 / N}, though in practice
-   * the accuracy tends to be much better.  <p> See
+   * <p>The default error bound is {@code 1 / N}, though in practice
+   * the accuracy tends to be much better.  <p>See
    * {@link #create(int, Comparator, long, double)} for
    * more information about the meaning of {@code epsilon}, and
    * {@link #withEpsilon} for a convenient way to adjust it.
@@ -266,9 +266,9 @@ private ApproximateQuantilesCombineFn(
      * {@code numQuantiles} elements will appear in the output list,
      * including the minimum and maximum.
      *
-     * <p> The {@code Comparator} must be {@code Serializable}.
+     * <p>The {@code Comparator} must be {@code Serializable}.
      *
-     * <p> The default error bound is {@code 1 / numQuantiles}, which
+     * <p>The default error bound is {@code 1 / numQuantiles}, which
      * holds as long as the number of elements is less than
      * {@link #DEFAULT_MAX_NUM_ELEMENTS}.
      */
@@ -292,7 +292,7 @@ ApproximateQuantilesCombineFn<T, Top.Largest<T>> create(int numQuantiles) {
      * this one except that it uses the specified {@code epsilon}
      * value.  Does not modify this combiner.
      *
-     * <p> See {@link #create(int, Comparator, long,
+     * <p>See {@link #create(int, Comparator, long,
      * double)} for more information about the meaning of
      * {@code epsilon}.
      */
@@ -305,7 +305,7 @@ public ApproximateQuantilesCombineFn<T, ComparatorT> withEpsilon(double epsilon)
      * this one except that it uses the specified {@code maxNumElements}
      * value.  Does not modify this combiner.
      *
-     * <p> See {@link #create(int, Comparator, long, double)} for more
+     * <p>See {@link #create(int, Comparator, long, double)} for more
      * information about the meaning of {@code maxNumElements}.
      */
     public ApproximateQuantilesCombineFn<T, ComparatorT> withMaxInputSize(
@@ -319,9 +319,9 @@ public ApproximateQuantilesCombineFn<T, ComparatorT> withMaxInputSize(
      * {@code numQuantiles} elements will appear in the output list,
      * including the minimum and maximum.
      *
-     * <p> The {@code Comparator} must be {@code Serializable}.
+     * <p>The {@code Comparator} must be {@code Serializable}.
      *
-     * <p> The default error bound is {@code epsilon}, which holds as long
+     * <p>The default error bound is {@code epsilon}, which holds as long
      * as the number of elements is less than {@code maxNumElements}.
      * Specifically, if one considers the input as a sorted list x_1, ..., x_N,
      * then the distance between the each exact quantile x_c and its
@@ -580,8 +580,8 @@ public int compare(Sized<T> a, Sized<T> b) {
     /**
      * Outputs numQuantiles elements consisting of the minimum, maximum, and
      * numQuantiles - 2 evenly spaced intermediate elements.
-     * <p>
-     * Returns the empty list if no elements have been added.
+     *
+     * <p>Returns the empty list if no elements have been added.
      */
     @Override
     public List<T> extractOutput() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
index 331a077c8b76a..d65e7544c9d78 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
@@ -48,7 +48,7 @@ public class ApproximateUnique {
    * that is an estimate of the number of distinct elements in the
    * input {@code PCollection}.
    *
-   * <p> The {@code sampleSize} parameter controls the estimation
+   * <p>The {@code sampleSize} parameter controls the estimation
    * error.  The error is about {@code 2 / sqrt(sampleSize)}, so for
    * {@code ApproximateUnique.globally(10000)} the estimation error is
    * about 2%.  Similarly, for {@code ApproximateUnique.of(16)} the
@@ -57,15 +57,15 @@ public class ApproximateUnique {
    * will be exact with extremely high probability (the chance of a
    * hash collision is about {@code sampleSize^2 / 2^65}).
    *
-   * <p> This transform approximates the number of elements in a set
+   * <p>This transform approximates the number of elements in a set
    * by computing the top {@code sampleSize} hash values, and using
    * that to extrapolate the size of the entire set of hash values by
    * assuming the rest of the hash values are as densely distributed
    * as the top {@code sampleSize}.
    *
-   * <p> See also {@link #globally(double)}.
+   * <p>See also {@link #globally(double)}.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<String> pc = ...;
    * PCollection<Long> approxNumDistinct =
@@ -105,13 +105,13 @@ public static <T> Globally<T> globally(double maximumEstimationError) {
    * estimate of the number of distinct values associated with that
    * key in the input {@code PCollection}.
    *
-   * <p> See {@link #globally(int)} for an explanation of the
+   * <p>See {@link #globally(int)} for an explanation of the
    * {@code sampleSize} parameter.  A separate sampling is computed
    * for each distinct key of the input.
    *
-   * <p> See also {@link #perKey(double)}.
+   * <p>See also {@link #perKey(double)}.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<KV<Integer, String>> pc = ...;
    * PCollection<KV<Integer, Long>> approxNumDistinctPerKey =
@@ -262,12 +262,12 @@ public PCollection<KV<K, Long>> apply(PCollection<KV<K, V>> input) {
    * {@code CombineFn} that computes an estimate of the number of
    * distinct values that were combined.
    *
-   * <p> Hashes input elements, computes the top {@code sampleSize}
+   * <p>Hashes input elements, computes the top {@code sampleSize}
    * hash values, and uses those to extrapolate the size of the entire
    * set of hash values by assuming the rest of the hash values are as
    * densely distributed as the top {@code sampleSize}.
    *
-   * <p> Used to implement
+   * <p>Used to implement
    * {@link #globally(int) ApproximatUnique.globally(...)} and
    * {@link #perKey(int) ApproximatUnique.perKey(...)}.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 974490a3edc9b..cdf51de15bade 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -77,13 +77,13 @@ public class Combine {
    * single value in the output {@code PCollection}.  The types of the input
    * elements and the output elements must be the same.
    *
-   * <p> If the input {@code PCollection} is windowed into {@link GlobalWindows},
+   * <p>If the input {@code PCollection} is windowed into {@link GlobalWindows},
    * a default value in the {@link GlobalWindow} will be output if the input
    * {@code PCollection} is empty.  To use this with inputs with other windowing,
    * either {@link Globally#withoutDefaults} or {@link Globally#asSingletonView}
    * must be called.
    *
-   * <p> See {@link Globally Combine.Globally} for more information.
+   * <p>See {@link Globally Combine.Globally} for more information.
    */
   public static <V> Globally<V, V> globally(
       SerializableFunction<Iterable<V>, V> combiner) {
@@ -97,13 +97,13 @@ public static <V> Globally<V, V> globally(
    * single value in the output {@code PCollection}.  The types of the input
    * elements and the output elements can differ
    *
-   * <p> If the input {@code PCollection} is windowed into {@link GlobalWindows},
+   * <p>If the input {@code PCollection} is windowed into {@link GlobalWindows},
    * a default value in the {@link GlobalWindow} will be output if the input
    * {@code PCollection} is empty.  To use this with inputs with other windowing,
    * either {@link Globally#withoutDefaults} or {@link Globally#asSingletonView}
    * must be called.
    *
-   * <p> See {@link Globally Combine.Globally} for more information.
+   * <p>See {@link Globally Combine.Globally} for more information.
    */
   public static <InputT, OutputT> Globally<InputT, OutputT> globally(
       CombineFn<? super InputT, ?, OutputT> fn) {
@@ -118,13 +118,13 @@ public static <InputT, OutputT> Globally<InputT, OutputT> globally(
    * of {@code KV}s mapping each distinct key to its combined value for each
    * window.
    *
-   * <p> Each output element is in the window by which its corresponding input
+   * <p>Each output element is in the window by which its corresponding input
    * was grouped, and has the timestamp of the end of that window.  The output
    * {@code PCollection} has the same
    * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
    * as the input.
    *
-   * <p> See {@link PerKey Combine.PerKey} for more information.
+   * <p>See {@link PerKey Combine.PerKey} for more information.
    */
   public static <K, V> PerKey<K, V, V> perKey(
       SerializableFunction<Iterable<V>, V> fn) {
@@ -139,13 +139,13 @@ public static <K, V> PerKey<K, V, V> perKey(
    * of {@code KV}s mapping each distinct key to its combined value for each
    * window.
    *
-   * <p> Each output element is in the window by which its corresponding input
+   * <p>Each output element is in the window by which its corresponding input
    * was grouped, and has the timestamp of the end of that window.  The output
    * {@code PCollection} has the same
    * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
    * as the input.
    *
-   * <p> See {@link PerKey Combine.PerKey} for more information.
+   * <p>See {@link PerKey Combine.PerKey} for more information.
    */
   public static <K, InputT, OutputT> PerKey<K, InputT, OutputT> perKey(
       CombineFn<? super InputT, ?, OutputT> fn) {
@@ -160,13 +160,13 @@ public static <K, InputT, OutputT> PerKey<K, InputT, OutputT> perKey(
    * {@code PCollection} of {@code KV}s mapping each distinct key to
    * its combined value for each window.
    *
-   * <p> Each output element is in the window by which its corresponding input
+   * <p>Each output element is in the window by which its corresponding input
    * was grouped, and has the timestamp of the end of that window.  The output
    * {@code PCollection} has the same
    * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
    * as the input.
    *
-   * <p> See {@link PerKey Combine.PerKey} for more information.
+   * <p>See {@link PerKey Combine.PerKey} for more information.
    */
   public static <K, InputT, OutputT> PerKey<K, InputT, OutputT> perKey(
       KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
@@ -191,15 +191,15 @@ private static <K, InputT, OutputT> PerKey<K, InputT, OutputT> fewKeys(
    * with a key, ignoring the key.  The type of the input and
    * output values must be the same.
    *
-   * <p> Each output element has the same timestamp and is in the same window
+   * <p>Each output element has the same timestamp and is in the same window
    * as its corresponding input element, and the output
    * {@code PCollection} has the same
    * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
    * associated with it as the input.
    *
-   * <p> See {@link GroupedValues Combine.GroupedValues} for more information.
+   * <p>See {@link GroupedValues Combine.GroupedValues} for more information.
    *
-   * <p> Note that {@link #perKey(SerializableFunction)} is typically
+   * <p>Note that {@link #perKey(SerializableFunction)} is typically
    * more convenient to use than {@link GroupByKey} followed by
    * {@code groupedValues(...)}.
    */
@@ -217,15 +217,15 @@ public static <K, V> GroupedValues<K, V, V> groupedValues(
    * key, ignoring the key.  The types of the input and output values
    * can differ.
    *
-   * <p> Each output element has the same timestamp and is in the same window
+   * <p>Each output element has the same timestamp and is in the same window
    * as its corresponding input element, and the output
    * {@code PCollection} has the same
    * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
    * associated with it as the input.
    *
-   * <p> See {@link GroupedValues Combine.GroupedValues} for more information.
+   * <p>See {@link GroupedValues Combine.GroupedValues} for more information.
    *
-   * <p> Note that {@link #perKey(CombineFn)} is typically
+   * <p>Note that {@link #perKey(CombineFn)} is typically
    * more convenient to use than {@link GroupByKey} followed by
    * {@code groupedValues(...)}.
    */
@@ -243,15 +243,15 @@ public static <K, InputT, OutputT> GroupedValues<K, InputT, OutputT> groupedValu
    * each key.  The combining function is provided the key.  The types
    * of the input and output values can differ.
    *
-   * <p> Each output element has the same timestamp and is in the same window
+   * <p>Each output element has the same timestamp and is in the same window
    * as its corresponding input element, and the output
    * {@code PCollection} has the same
    * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
    * associated with it as the input.
    *
-   * <p> See {@link GroupedValues Combine.GroupedValues} for more information.
+   * <p>See {@link GroupedValues Combine.GroupedValues} for more information.
    *
-   * <p> Note that {@link #perKey(KeyedCombineFn)} is typically
+   * <p>Note that {@link #perKey(KeyedCombineFn)} is typically
    * more convenient to use than {@link GroupByKey} followed by
    * {@code groupedValues(...)}.
    */
@@ -269,7 +269,7 @@ public static <K, InputT, OutputT> GroupedValues<K, InputT, OutputT> groupedValu
    * output value of type {@code OutputT}.  It does this via one or more
    * intermediate mutable accumulator values of type {@code AccumT}.
    *
-   * <p> The overall process to combine a collection of input
+   * <p>The overall process to combine a collection of input
    * {@code InputT} values into a single output {@code OutputT} value is as
    * follows:
    *
@@ -301,7 +301,7 @@ public static <K, InputT, OutputT> GroupedValues<K, InputT, OutputT> groupedValu
    *
    * </ol>
    *
-   * <p> For example:
+   * <p>For example:
    * <pre> {@code
    * public class AverageFn extends CombineFn<Integer, AverageFn.Accum, Double> {
    *   public static class Accum {
@@ -331,7 +331,7 @@ public static <K, InputT, OutputT> GroupedValues<K, InputT, OutputT> groupedValu
    * PCollection<Double> average = pc.apply(Combine.globally(new AverageFn()));
    * } </pre>
    *
-   * <p> Combining functions used by {@link Combine.Globally},
+   * <p>Combining functions used by {@link Combine.Globally},
    * {@link Combine.PerKey}, {@link Combine.GroupedValues}, and
    * {@code PTransforms} derived from them should be
    * <i>associative</i> and <i>commutative</i>.  Associativity is
@@ -357,7 +357,7 @@ public abstract static class CombineFn<InputT, AccumT, OutputT> implements Seria
      * Adds the given input value to the given accumulator, returning the
      * new accumulator value.
      *
-     * <P> For efficiency, the input accumulator may be modified and returned.
+     * <p>For efficiency, the input accumulator may be modified and returned.
      */
     public abstract AccumT addInput(AccumT accumulator, InputT input);
 
@@ -365,7 +365,7 @@ public abstract static class CombineFn<InputT, AccumT, OutputT> implements Seria
      * Returns an accumulator representing the accumulation of all the
      * input values accumulated in the merging accumulators.
      *
-     * <p> May modify any of the argument accumulators.  May return a
+     * <p>May modify any of the argument accumulators.  May return a
      * fresh accumulator, or may return one of the (modified) argument
      * accumulators.
      */
@@ -381,7 +381,7 @@ public abstract static class CombineFn<InputT, AccumT, OutputT> implements Seria
      * Applies this {@code CombineFn} to a collection of input values
      * to produce a combined output value.
      *
-     * <p> Useful when testing the behavior of a {@code CombineFn}
+     * <p>Useful when testing the behavior of a {@code CombineFn}
      * separately from a {@code Combine} transform.
      */
     public OutputT apply(Iterable<? extends InputT> inputs) {
@@ -397,7 +397,7 @@ public OutputT apply(Iterable<? extends InputT> inputs) {
      * about the output type of this {@code CombineFn} instance's
      * most-derived class.
      *
-     * <p> In the normal case of a concrete {@code CombineFn} subclass with
+     * <p>In the normal case of a concrete {@code CombineFn} subclass with
      * no generic type parameters of its own, this will be a complete
      * non-generic type.
      */
@@ -430,12 +430,12 @@ public TypeDescriptor<OutputT> getOutputType() {
      * Returns the {@code Coder} to use for accumulator {@code AccumT}
      * values, or null if it is not able to be inferred.
      *
-     * <p> By default, uses the knowledge of the {@code Coder} being used
+     * <p>By default, uses the knowledge of the {@code Coder} being used
      * for {@code InputT} values and the enclosing {@code Pipeline}'s
      * {@code CoderRegistry} to try to infer the Coder for {@code AccumT}
      * values.
      *
-     * <p> This is the Coder used to send data through a communication-intensive
+     * <p>This is the Coder used to send data through a communication-intensive
      * shuffle step, so a compact and efficient representation may have
      * significant performance benefits.
      */
@@ -450,7 +450,7 @@ public Coder<AccumT> getAccumulatorCoder(
      * Returns the {@code Coder} to use by default for output
      * {@code OutputT} values, or null if it is not able to be inferred.
      *
-     * <p> By default, uses the knowledge of the {@code Coder} being
+     * <p>By default, uses the knowledge of the {@code Coder} being
      * used for input {@code InputT} values and the enclosing
      * {@code Pipeline}'s {@code CoderRegistry} to try to infer the
      * Coder for {@code OutputT} values.
@@ -892,7 +892,7 @@ private double[] wrap(double value) {
    * are automatically provided.  This can reduce the code required to
    * implement a {@code CombineFn}.
    *
-   * <p> For example, the example from {@link CombineFn} above can be
+   * <p>For example, the example from {@link CombineFn} above can be
    * expressed using {@code AccumulatingCombineFn} more concisely as
    * follows:
    *
@@ -990,7 +990,7 @@ public final OutputT extractOutput(AccumT accumulator) {
    * {@code OutputT}.  It does this via one or more intermediate mutable
    * accumulator values of type {@code AccumT}.
    *
-   * <p> The overall process to combine a collection of input
+   * <p>The overall process to combine a collection of input
    * {@code InputT} values associated with an input {@code K} key into a
    * single output {@code OutputT} value is as follows:
    *
@@ -1022,10 +1022,10 @@ public final OutputT extractOutput(AccumT accumulator) {
    *
    * </ol>
    *
-   * <p> All of these operations are passed the {@code K} key that the
+   * <p>All of these operations are passed the {@code K} key that the
    * values being combined are associated with.
    *
-   * <p> For example:
+   * <p>For example:
    * <pre> {@code
    * public class ConcatFn
    *     extends KeyedCombineFn<String, Integer, ConcatFn.Accum, String> {
@@ -1054,7 +1054,7 @@ public final OutputT extractOutput(AccumT accumulator) {
    *     Combine.perKey(new ConcatFn()));
    * } </pre>
    *
-   * <p> Keyed combining functions used by {@link Combine.PerKey},
+   * <p>Keyed combining functions used by {@link Combine.PerKey},
    * {@link Combine.GroupedValues}, and {@code PTransforms} derived
    * from them should be <i>associative</i> and <i>commutative</i>.
    * Associativity is required because input values are first broken
@@ -1084,7 +1084,7 @@ public abstract static class KeyedCombineFn<K, InputT, AccumT, OutputT>
      * Adds the given input value to the given accumulator,
      * modifying the accumulator.
      *
-     * <P> For efficiency, the input accumulator may be modified and returned.
+     * <p>For efficiency, the input accumulator may be modified and returned.
      *
      * @param key the key that all the accumulated values using the
      * accumulator are associated with
@@ -1095,7 +1095,7 @@ public abstract static class KeyedCombineFn<K, InputT, AccumT, OutputT>
      * Returns an accumulator representing the accumulation of all the
      * input values accumulated in the merging accumulators.
      *
-     * <p> May modify any of the argument accumulators.  May return a
+     * <p>May modify any of the argument accumulators.  May return a
      * fresh accumulator, or may return one of the (modified) argument
      * accumulators.
      *
@@ -1157,7 +1157,7 @@ public Coder<OutputT> getDefaultOutputCoder(
      * Applies this {@code KeyedCombineFn} to a key and a collection
      * of input values to produce a combined output value.
      *
-     * <p> Useful when testing the behavior of a {@code KeyedCombineFn}
+     * <p>Useful when testing the behavior of a {@code KeyedCombineFn}
      * separately from a {@code Combine} transform.
      */
     public OutputT apply(K key, Iterable<? extends InputT> inputs) {
@@ -1172,12 +1172,12 @@ public OutputT apply(K key, Iterable<? extends InputT> inputs) {
      * Returns the {@code Coder} to use for accumulator {@code AccumT}
      * values, or null if it is not able to be inferred.
      *
-     * <p> By default, uses the knowledge of the {@code Coder} being
+     * <p>By default, uses the knowledge of the {@code Coder} being
      * used for {@code K} keys and input {@code InputT} values and the
      * enclosing {@code Pipeline}'s {@code CoderRegistry} to try to
      * infer the Coder for {@code AccumT} values.
      *
-     * <p> This is the Coder used to send data through a communication-intensive
+     * <p>This is the Coder used to send data through a communication-intensive
      * shuffle step, so a compact and efficient representation may have
      * significant performance benefits.
      */
@@ -1195,7 +1195,7 @@ public Coder<AccumT> getAccumulatorCoder(
      * Returns the {@code Coder} to use by default for output
      * {@code OutputT} values, or null if it is not able to be inferred.
      *
-     * <p> By default, uses the knowledge of the {@code Coder} being
+     * <p>By default, uses the knowledge of the {@code Coder} being
      * used for {@code K} keys and input {@code InputT} values and the
      * enclosing {@code Pipeline}'s {@code CoderRegistry} to try to
      * infer the Coder for {@code OutputT} values.
@@ -1253,28 +1253,28 @@ public Coder<OutputT> getDefaultOutputCoder(
    * conjunctions and disjunctions of booleans, statistical
    * aggregations, etc.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<Integer> pc = ...;
    * PCollection<Integer> sum = pc.apply(
    *     Combine.globally(new Sum.SumIntegerFn()));
    * } </pre>
    *
-   * <p> Combining can happen in parallel, with different subsets of the
+   * <p>Combining can happen in parallel, with different subsets of the
    * input {@code PCollection} being combined separately, and their
    * intermediate results combined further, in an arbitrary tree
    * reduction pattern, until a single result value is produced.
    *
-   * <p> If the input {@code PCollection} is windowed into {@link GlobalWindows},
+   * <p>If the input {@code PCollection} is windowed into {@link GlobalWindows},
    * a default value in the {@link GlobalWindow} will be output if the input
    * {@code PCollection} is empty.  To use this with inputs with other windowing,
    * either {@link #withoutDefaults} or {@link #asSingletonView} must be called.
    *
-   * <p> By default, the {@code Coder} of the output {@code PValue<OutputT>}
+   * <p>By default, the {@code Coder} of the output {@code PValue<OutputT>}
    * is inferred from the concrete type of the
    * {@code CombineFn<InputT, AccumT, OutputT>}'s output type {@code OutputT}.
    *
-   * <p> See also {@link #perKey}/{@link PerKey Combine.PerKey} and
+   * <p>See also {@link #perKey}/{@link PerKey Combine.PerKey} and
    * {@link #groupedValues}/{@link GroupedValues Combine.GroupedValues}, which
    * are useful for combining values associated with each key in
    * a {@code PCollection} of {@code KV}s.
@@ -1334,7 +1334,7 @@ public Globally<InputT, OutputT> withoutDefaults() {
      * Returns a {@link PTransform} identical to this, but that uses an intermediate node
      * to combine parts of the data to reduce load on the final global combine step.
      *
-     * <p> The {@code fanout} parameter determines the number of intermediate keys
+     * <p>The {@code fanout} parameter determines the number of intermediate keys
      * that will be used.
      */
     public Globally<InputT, OutputT> withFanout(int fanout) {
@@ -1401,28 +1401,28 @@ public void processElement(DoFn<Void, OutputT>.ProcessContext c) {
    * conjunctions and disjunctions of booleans, statistical
    * aggregations, etc.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<Integer> pc = ...;
    * PCollection<Integer> sum = pc.apply(
    *     Combine.globally(new Sum.SumIntegerFn()));
    * } </pre>
    *
-   * <p> Combining can happen in parallel, with different subsets of the
+   * <p>Combining can happen in parallel, with different subsets of the
    * input {@code PCollection} being combined separately, and their
    * intermediate results combined further, in an arbitrary tree
    * reduction pattern, until a single result value is produced.
    *
-   * <p> If a value is requested from the view for a window that is not present
+   * <p>If a value is requested from the view for a window that is not present
    * and {@code insertDefault} is true, the result of calling the {@code CombineFn}
    * on empty input will returned. If {@code insertDefault} is false, an
    * exception will be thrown instead.
    *
-   * <p> By default, the {@code Coder} of the output {@code PValue<OutputT>}
+   * <p>By default, the {@code Coder} of the output {@code PValue<OutputT>}
    * is inferred from the concrete type of the
    * {@code CombineFn<InputT, AccumT, OutputT>}'s output type {@code OutputT}.
    *
-   * <p> See also {@link #perKey}/{@link PerKey Combine.PerKey} and
+   * <p>See also {@link #perKey}/{@link PerKey Combine.PerKey} and
    * {@link #groupedValues}/{@link GroupedValues Combine.GroupedValues}, which
    * are useful for combining values associated with each key in
    * a {@code PCollection} of {@code KV}s.
@@ -1462,7 +1462,7 @@ public PCollectionView<OutputT> apply(PCollection<InputT> input) {
    * Converts a {@link SerializableFunction} from {@code Iterable<V>}s
    * to {@code V}s into a simple {@link CombineFn} over {@code V}s.
    *
-   * <p> Used in the implementation of convenience methods like
+   * <p>Used in the implementation of convenience methods like
    * {@link #globally(SerializableFunction)},
    * {@link #perKey(SerializableFunction)}, and
    * {@link #groupedValues(SerializableFunction)}.
@@ -1541,7 +1541,7 @@ private List<V> mergeToSingleton(Iterable<V> values) {
    * Converts a {@link SerializableFunction} from {@code Iterable<V>}s
    * to {@code V}s into a simple {@link CombineFn} over {@code V}s.
    *
-   * <p> @deprecated Use {@link IterableCombineFn} or the more space efficient
+   * <p>@deprecated Use {@link IterableCombineFn} or the more space efficient
    * {@link BinaryCombineFn} instead (which avoids buffering values).
    */
   @Deprecated
@@ -1574,13 +1574,13 @@ protected SimpleCombineFn(SerializableFunction<Iterable<V>, V> combiner) {
    * distinct key of the input {@code PCollection} to the corresponding
    * combined value.  {@code InputT} and {@code OutputT} are often the same.
    *
-   * <p> This is a concise shorthand for an application of
+   * <p>This is a concise shorthand for an application of
    * {@link GroupByKey} followed by an application of
    * {@link GroupedValues Combine.GroupedValues}.  See those
    * operations for more details on how keys are compared for equality
    * and on the default {@code Coder} for the output.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<KV<String, Double>> salesRecords = ...;
    * PCollection<KV<String, Double>> totalSalesPerPerson =
@@ -1588,7 +1588,7 @@ protected SimpleCombineFn(SerializableFunction<Iterable<V>, V> combiner) {
    *         new Sum.SumDoubleFn()));
    * } </pre>
    *
-   * <p> Each output element is in the window by which its corresponding input
+   * <p>Each output element is in the window by which its corresponding input
    * was grouped, and has the timestamp of the end of that window.  The output
    * {@code PCollection} has the same
    * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
@@ -1961,7 +1961,7 @@ public void verifyDeterministic() throws Coder.NonDeterministicException {
    * of numbers, conjunctions and disjunctions of booleans, statistical
    * aggregations, etc.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<KV<String, Integer>> pc = ...;
    * PCollection<KV<String, Iterable<Integer>>> groupedByKey = pc.apply(
@@ -1971,31 +1971,31 @@ public void verifyDeterministic() throws Coder.NonDeterministicException {
    *         new Sum.SumIntegerFn()));
    * } </pre>
    *
-   * <p> See also {@link #perKey}/{@link PerKey Combine.PerKey}, which
+   * <p>See also {@link #perKey}/{@link PerKey Combine.PerKey}, which
    * captures the common pattern of "combining by key" in a
    * single easy-to-use {@code PTransform}.
    *
-   * <p> Combining for different keys can happen in parallel.  Moreover,
+   * <p>Combining for different keys can happen in parallel.  Moreover,
    * combining of the {@code Iterable<InputT>} values associated a single
    * key can happen in parallel, with different subsets of the values
    * being combined separately, and their intermediate results combined
    * further, in an arbitrary tree reduction pattern, until a single
    * result value is produced for each key.
    *
-   * <p> By default, the {@code Coder} of the keys of the output
+   * <p>By default, the {@code Coder} of the keys of the output
    * {@code PCollection<KV<K, OutputT>>} is that of the keys of the input
    * {@code PCollection<KV<K, InputT>>}, and the {@code Coder} of the values
    * of the output {@code PCollection<KV<K, OutputT>>} is inferred from the
    * concrete type of the {@code KeyedCombineFn<K, InputT, AccumT, OutputT>}'s output
    * type {@code OutputT}.
    *
-   * <p> Each output element has the same timestamp and is in the same window
+   * <p>Each output element has the same timestamp and is in the same window
    * as its corresponding input element, and the output
    * {@code PCollection} has the same
    * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
    * associated with it as the input.
    *
-   * <p> See also {@link #globally}/{@link Globally Combine.Globally}, which
+   * <p>See also {@link #globally}/{@link Globally Combine.Globally}, which
    * combines all the values in a {@code PCollection} into a
    * single value in a {@code PCollection}.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
index b74417269ae74..1864a4e2f45db 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
@@ -23,7 +23,7 @@
 /**
  * {@code PTransorm}s to count the elements in a {@link PCollection}.
  *
- * <p> {@link Count#perElement()} can be used to count the number of occurrences of each
+ * <p>{@link Count#perElement()} can be used to count the number of occurrences of each
  * distinct element in the PCollection. {@link Count#globally()} can
  * be used to count the total number of elements in a PCollection.
  */
@@ -49,7 +49,7 @@ public static <K, V> Combine.PerKey<K, V, Long> perKey() {
    * Returns a {@link PerElement Count.PerElement} {@link PTransform} that counts the number of
    * occurrences of each element in its input {@link PCollection}.
    *
-   * <p> See {@link PerElement Count.PerElement} for more details.
+   * <p>See {@link PerElement Count.PerElement} for more details.
    */
   public static <T> PerElement<T> perElement() {
     return new PerElement<>();
@@ -61,16 +61,16 @@ public static <T> PerElement<T> perElement() {
    * {@code PCollection} to the number of times that element occurs in the input. Each key in the
    * output {@code PCollection} is unique.
    *
-   * <p> This transform compares two values of type {@code T} by first encoding each element using
+   * <p>This transform compares two values of type {@code T} by first encoding each element using
    * the input {@code PCollection}'s {@code Coder}, then comparing the encoded bytes. Because of
    * this, the input coder must be deterministic.
    * (See {@link com.google.cloud.dataflow.sdk.coders.Coder#verifyDeterministic()} for more detail).
    * Performing the comparison in this manner admits efficient parallel evaluation.
    *
-   * <p> By default, the {@code Coder} of the keys of the output {@code PCollection} is the same as
+   * <p>By default, the {@code Coder} of the keys of the output {@code PCollection} is the same as
    * the {@code Coder} of the elements of the input {@code PCollection}.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<String> words = ...;
    * PCollection<KV<String, Long>> wordCounts =
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index 040b5b8cc5590..6d017420c6a51 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -50,7 +50,7 @@
  * known when the pipeline is constructed and returns a
  * {@code PCollection<T>} containing the elements.
  *
- * <p> Example of use:
+ * <p>Example of use:
  * <pre> {@code
  * Pipeline p = ...;
  *
@@ -63,19 +63,19 @@
  *                            BigEndianIntegerCoder.of())));
  * } </pre>
  *
- * <p> {@code Create} can automatically determine the {@code Coder} to use
+ * <p>{@code Create} can automatically determine the {@code Coder} to use
  * if all elements have the same run-time class, and a default coder is registered for that
  * class. See {@link CoderRegistry} for details on how defaults are determined.
  *
- * <p> If a coder can not be inferred, {@link Create.Values#withCoder} must be called
+ * <p>If a coder can not be inferred, {@link Create.Values#withCoder} must be called
  * explicitly to set the encoding of the resulting
  * {@code PCollection}.
  *
- * <p> A good use for {@code Create} is when a {@code PCollection}
+ * <p>A good use for {@code Create} is when a {@code PCollection}
  * needs to be created without dependencies on files or other external
  * entities.  This is especially useful during testing.
  *
- * <p> Caveat: {@code Create} only supports small in-memory datasets,
+ * <p>Caveat: {@code Create} only supports small in-memory datasets,
  * particularly when submitting jobs to the Google Cloud Dataflow
  * service.
  *
@@ -88,13 +88,13 @@ public class Create<T> {
    * {@link PCollection} containing elements of the provided
    * {@code Iterable}.
    *
-   * <p> The argument should not be modified after this is called.
+   * <p>The argument should not be modified after this is called.
    *
-   * <p> The elements of the output {@link PCollection} will have a timestamp of negative infinity,
+   * <p>The elements of the output {@link PCollection} will have a timestamp of negative infinity,
    * see {@link Create#timestamped} for a way of creating a {@code PCollection} with timestamped
    * elements.
    *
-   * <p> By default, {@code Create.Values} can automatically determine the {@code Coder} to use
+   * <p>By default, {@code Create.Values} can automatically determine the {@code Coder} to use
    * if all elements have the same run-time class, and a default coder is registered for that
    * class. See {@link CoderRegistry} for details on how defaults are determined.
    * Otherwise, use {@link Create.Values#withCoder} to set the coder explicitly.
@@ -107,13 +107,13 @@ public static <T> Values<T> of(Iterable<T> elems) {
    * Returns a new {@code Create.Values} transform that produces a
    * {@link PCollection} containing the specified elements.
    *
-   * <p> The elements will have a timestamp of negative infinity, see
+   * <p>The elements will have a timestamp of negative infinity, see
    * {@link Create#timestamped} for a way of creating a {@code PCollection}
    * with timestamped elements.
    *
-   * <p> The argument should not be modified after this is called.
+   * <p>The argument should not be modified after this is called.
    *
-   * <p> By default, {@code Create.Values} can automatically determine the {@code Coder} to use
+   * <p>By default, {@code Create.Values} can automatically determine the {@code Coder} to use
    * if all elements have the same run-time class, and a default coder is registered for that
    * class. See {@link CoderRegistry} for details on how defaults are determined.
    * Otherwise, use {@link Create.Values#withCoder} to set the coder explicitly.
@@ -128,11 +128,11 @@ public static <T> Values<T> of(T... elems) {
    * {@link PCollection} of {@link KV}s corresponding to the keys and
    * values of the specified {@code Map}.
    *
-   * <p> The elements will have a timestamp of negative infinity, see
+   * <p>The elements will have a timestamp of negative infinity, see
    * {@link Create#timestamped} for a way of creating a {@code PCollection}
    * with timestamped elements.
    *
-   * <p> By default, {@code Create.Values} can automatically determine the {@code Coder} to use
+   * <p>By default, {@code Create.Values} can automatically determine the {@code Coder} to use
    * if all elements have the same run-time class, and a default coder is registered for that
    * class. See {@link CoderRegistry} for details on how defaults are determined.
    * Otherwise, use {@link Create.Values#withCoder} to set the coder explicitly.
@@ -150,9 +150,9 @@ public static <K, V> Values<KV<K, V>> of(Map<K, V> elems) {
    * {@link PCollection} containing the elements of the provided {@code Iterable}
    * with the specified timestamps.
    *
-   * <p> The argument should not be modified after this is called.
+   * <p>The argument should not be modified after this is called.
    *
-   * <p> By default, {@code Create.TimestampedValues} can automatically determine the {@code Coder}
+   * <p>By default, {@code Create.TimestampedValues} can automatically determine the {@code Coder}
    * to use if all elements have the same run-time class, and a default coder is registered for
    * that class. See {@link CoderRegistry} for details on how defaults are determined.
    * Otherwise, use {@link Create.TimestampedValues#withCoder} to set the coder explicitly.
@@ -165,7 +165,7 @@ public static <T> TimestampedValues<T> timestamped(Iterable<TimestampedValue<T>>
    * Returns a new {@link Create.TimestampedValues} transform that produces a {@link PCollection}
    * containing the specified elements with the specified timestamps.
    *
-   * <p> The argument should not be modified after this is called.
+   * <p>The argument should not be modified after this is called.
    */
   @SafeVarargs
   public static <T> TimestampedValues<T> timestamped(
@@ -177,9 +177,9 @@ public static <T> TimestampedValues<T> timestamped(
    * Returns a new root transform that produces a {@link PCollection} containing
    * the specified elements with the specified timestamps.
    *
-   * <p> The arguments should not be modified after this is called.
+   * <p>The arguments should not be modified after this is called.
    *
-   * <p> By default, {@code Create.TimestampedValues} can automatically determine the {@code Coder}
+   * <p>By default, {@code Create.TimestampedValues} can automatically determine the {@code Coder}
    * to use if all elements have the same run-time class, and a default coder is registered for
    * that class. See {@link CoderRegistry} for details on how defaults are determined.
    * Otherwise, use {@link Create.TimestampedValues#withCoder} to set the coder explicitly.
@@ -212,11 +212,11 @@ public static class Values<T> extends PTransform<PInput, PCollection<T>> {
      * {@code Coder<T>} to decode each of the objects into a
      * value of type {@code T}.
      *
-     * <p> By default, {@code Create.Values} can automatically determine the {@code Coder} to use
+     * <p>By default, {@code Create.Values} can automatically determine the {@code Coder} to use
      * if all elements have the same run-time class, and a default coder is registered for that
      * class. See {@link CoderRegistry} for details on how defaults are determined.
      *
-     * <p> Note that for {@link Create.Values} with no elements, the {@link VoidCoder} is used.
+     * <p>Note that for {@link Create.Values} with no elements, the {@link VoidCoder} is used.
      */
     public Values<T> withCoder(Coder<T> coder) {
       return new Values<>(elems, Optional.of(coder));
@@ -307,7 +307,7 @@ public Coder<T> getDefaultOutputCoder(PInput input) throws CannotProvideCoderExc
      * Constructs a {@code Create.Values} transform that produces a
      * {@link PCollection} containing the specified elements.
      *
-     * <p> The arguments should not be modified after this is called.
+     * <p>The arguments should not be modified after this is called.
      */
     private Values(Iterable<T> elems, Optional<Coder<T>> coder) {
       this.elems = elems;
@@ -327,12 +327,12 @@ public static class TimestampedValues<T> extends Values<T> {
      * {@code Coder<T>} to decode each of the objects into a
      * value of type {@code T}.
      *
-     * <p> By default, {@code Create.TimestampedValues} can automatically determine the
+     * <p>By default, {@code Create.TimestampedValues} can automatically determine the
      * {@code Coder} to use if all elements have the same run-time class, and a default coder is
      * registered for that class. See {@link CoderRegistry} for details on how defaults are
      * determined.
      *
-     * <p> Note that for {@link Create.TimestampedValues with no elements}, the {@link VoidCoder}
+     * <p>Note that for {@link Create.TimestampedValues with no elements}, the {@link VoidCoder}
      * is used.
      */
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index c048587fe8a90..ec23640aa2f96 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -47,18 +47,18 @@
  * elements of the input
  * {@link com.google.cloud.dataflow.sdk.values.PCollection}.
  *
- * <p> See {@link ParDo} for more explanation, examples of use, and
+ * <p>See {@link ParDo} for more explanation, examples of use, and
  * discussion of constraints on {@code DoFn}s, including their
  * serializability, lack of access to global shared mutable state,
  * requirements for failure tolerance, and benefits of optimization.
  *
- * <p> {@code DoFn}s can be tested in the context of a particular
+ * <p>{@code DoFn}s can be tested in the context of a particular
  * {@code Pipeline} by running that {@code Pipeline} on sample input
  * and then checking its output.  Unit testing of a {@code DoFn},
  * separately from any {@code ParDo} transform or {@code Pipeline},
  * can be done via the {@link DoFnTester} harness.
  *
- * <p> {@link DoFnWithContext} (currently experimental) offers an alternative
+ * <p>{@link DoFnWithContext} (currently experimental) offers an alternative
  * mechanism for accessing {@link ProcessContext#window()} without the need
  * to implement {@link RequiresWindowAccess}.
  *
@@ -85,16 +85,16 @@ public abstract class Context {
     /**
      * Adds the given element to the main output {@code PCollection}.
      *
-     * <p> Once passed to {@code output} the element should be considered
+     * <p>Once passed to {@code output} the element should be considered
      * immutable and not be modified in any way. It may be cached or retained
      * by the Dataflow runtime or later steps in the pipeline, or used in
      * other unspecified ways.
      *
-     * <p> If invoked from {@link DoFn#processElement}, the output
+     * <p>If invoked from {@link DoFn#processElement}, the output
      * element will have the same timestamp and be in the same windows
      * as the input element passed to {@link DoFn#processElement}).
      *
-     * <p> If invoked from {@link #startBundle} or {@link #finishBundle},
+     * <p>If invoked from {@link #startBundle} or {@link #finishBundle},
      * this will attempt to use the
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
      * of the input {@code PCollection} to determine what windows the element
@@ -108,15 +108,15 @@ public abstract class Context {
      * Adds the given element to the main output {@code PCollection},
      * with the given timestamp.
      *
-     * <p> Once passed to {@code outputWithTimestamp} the element should not be
+     * <p>Once passed to {@code outputWithTimestamp} the element should not be
      * modified in any way.
      *
-     * <p> If invoked from {@link DoFn#processElement}), the timestamp
+     * <p>If invoked from {@link DoFn#processElement}), the timestamp
      * must not be older than the input element's timestamp minus
      * {@link DoFn#getAllowedTimestampSkew}.  The output element will
      * be in the same windows as the input element.
      *
-     * <p> If invoked from {@link #startBundle} or {@link #finishBundle},
+     * <p>If invoked from {@link #startBundle} or {@link #finishBundle},
      * this will attempt to use the
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
      * of the input {@code PCollection} to determine what windows the element
@@ -130,18 +130,18 @@ public abstract class Context {
      * Adds the given element to the side output {@code PCollection} with the
      * given tag.
      *
-     * <p> Once passed to {@code sideOutput} the element should not be modified
+     * <p>Once passed to {@code sideOutput} the element should not be modified
      * in any way.
      *
-     * <p> The caller of {@code ParDo} uses {@link ParDo#withOutputTags} to
+     * <p>The caller of {@code ParDo} uses {@link ParDo#withOutputTags} to
      * specify the tags of side outputs that it consumes. Non-consumed side
      * outputs, e.g., outputs for monitoring purposes only, don't necessarily
      * need to be specified.
      *
-     * <p> The output element will have the same timestamp and be in the same
+     * <p>The output element will have the same timestamp and be in the same
      * windows as the input element passed to {@link DoFn#processElement}).
      *
-     * <p> If invoked from {@link #startBundle} or {@link #finishBundle},
+     * <p>If invoked from {@link #startBundle} or {@link #finishBundle},
      * this will attempt to use the
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
      * of the input {@code PCollection} to determine what windows the element
@@ -159,15 +159,15 @@ public abstract class Context {
      * Adds the given element to the specified side output {@code PCollection},
      * with the given timestamp.
      *
-     * <p> Once passed to {@code sideOutputWithTimestamp} the element should not be
+     * <p>Once passed to {@code sideOutputWithTimestamp} the element should not be
      * modified in any way.
      *
-     * <p> If invoked from {@link DoFn#processElement}), the timestamp
+     * <p>If invoked from {@link DoFn#processElement}), the timestamp
      * must not be older than the input element's timestamp minus
      * {@link DoFn#getAllowedTimestampSkew}.  The output element will
      * be in the same windows as the input element.
      *
-     * <p> If invoked from {@link #startBundle} or {@link #finishBundle},
+     * <p>If invoked from {@link #startBundle} or {@link #finishBundle},
      * this will attempt to use the
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
      * of the input {@code PCollection} to determine what windows the element
@@ -229,7 +229,7 @@ public abstract class ProcessContext extends Context {
     /**
      * Returns the input element to be processed.
      *
-     * <p> The element should be considered immutable. The Dataflow runtime will not mutate the
+     * <p>The element should be considered immutable. The Dataflow runtime will not mutate the
      * element, so it is safe to cache, etc. The element should not be mutated by any of the
      * {@link DoFn} methods, because it may be cached elsewhere, retained by the Dataflow runtime,
      * or used in other unspecified ways.
@@ -240,7 +240,7 @@ public abstract class ProcessContext extends Context {
      * Returns the value of the side input for the window corresponding to the
      * window of the main input element.
      *
-     * <p> See
+     * <p>See
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn#getSideInputWindow}
      * for how this corresponding window is determined.
      *
@@ -252,7 +252,7 @@ public abstract class ProcessContext extends Context {
     /**
      * Returns the timestamp of the input element.
      *
-     * <p> See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window}
+     * <p>See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window}
      * for more information.
      */
     public abstract Instant timestamp();
@@ -260,7 +260,7 @@ public abstract class ProcessContext extends Context {
     /**
      * Returns the window into which the input element has been assigned.
      *
-     * <p> See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window}
+     * <p>See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window}
      * for more information.
      *
      * @throws UnsupportedOperationException if this {@link DoFn} does
@@ -272,7 +272,7 @@ public abstract class ProcessContext extends Context {
      * Returns information about the pane within this window into which the
      * input element has been assigned.
      *
-     * <p> Generally all data is in a single, uninteresting pane unless custom
+     * <p>Generally all data is in a single, uninteresting pane unless custom
      * triggering and/or late data has been explicitly requested.
      * See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window}
      * for more information.
@@ -291,7 +291,7 @@ public abstract class ProcessContext extends Context {
    * duration that timestamps can be shifted backward in
    * {@link DoFn.Context#outputWithTimestamp}.
    *
-   * <p> The default value is {@code Duration.ZERO}, in which case
+   * <p>The default value is {@code Duration.ZERO}, in which case
    * timestamps can only be shifted forward to future.  For infinite
    * skew, return {@code Duration.millis(Long.MAX_VALUE)}.
    */
@@ -321,7 +321,7 @@ public DoFn() {
   /**
    * Prepares this {@code DoFn} instance for processing a batch of elements.
    *
-   * <p> By default, does nothing.
+   * <p>By default, does nothing.
    */
   public void startBundle(Context c) throws Exception {
   }
@@ -329,13 +329,13 @@ public void startBundle(Context c) throws Exception {
   /**
    * Processes one input element.
    *
-   * <p> The current element of the input {@code PCollection} is returned by
+   * <p>The current element of the input {@code PCollection} is returned by
    * {@link ProcessContext#element() c.element()}. It should be considered immutable. The Dataflow
    * runtime will not mutate the element, so it is safe to cache, etc. The element should not be
    * mutated by any of the {@link DoFn} methods, because it may be cached elsewhere, retained by the
    * Dataflow runtime, or used in other unspecified ways.
    *
-   * <p> A value is added to the main output {@code PCollection} by {@link ProcessContext#output}.
+   * <p>A value is added to the main output {@code PCollection} by {@link ProcessContext#output}.
    * Once passed to {@code output} the element should be considered immutable and not be modified in
    * any way. It may be cached elsewhere, retained by the Dataflow runtime, or used in other
    * unspecified ways.
@@ -348,7 +348,7 @@ public void startBundle(Context c) throws Exception {
    * Finishes processing this batch of elements.  This {@code DoFn}
    * instance will be thrown away after this operation returns.
    *
-   * <p> By default, does nothing.
+   * <p>By default, does nothing.
    */
   public void finishBundle(Context c) throws Exception {
   }
@@ -361,7 +361,7 @@ public void finishBundle(Context c) throws Exception {
    * about the input type of this {@code DoFn} instance's most-derived
    * class.
    *
-   * <p> See {@link #getOutputTypeDescriptor} for more discussion.
+   * <p>See {@link #getOutputTypeDescriptor} for more discussion.
    */
   protected TypeDescriptor<InputT> getInputTypeDescriptor() {
     return new TypeDescriptor<InputT>(getClass()) {};
@@ -372,7 +372,7 @@ protected TypeDescriptor<InputT> getInputTypeDescriptor() {
    * about the output type of this {@code DoFn} instance's
    * most-derived class.
    *
-   * <p> In the normal case of a concrete {@code DoFn} subclass with
+   * <p>In the normal case of a concrete {@code DoFn} subclass with
    * no generic type parameters of its own (including anonymous inner
    * classes), this will be a complete non-generic type, which is good
    * for choosing a default output {@code Coder<OutputT>} for the output
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index f8342c1845aba..73a526ce585f9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -41,7 +41,7 @@
 /**
  * A harness for unit-testing a {@link DoFn}.
  *
- * <p> For example:
+ * <p>For example:
  *
  * <pre> {@code
  * DoFn<InputT, OutputT> fn = ...;
@@ -90,10 +90,10 @@ public static <InputT, OutputT> DoFnTester<InputT, OutputT> of(DoFn<InputT, Outp
    * Registers the tuple of values of the side input {@link PCollectionView}s to
    * pass to the {@link DoFn} under test.
    *
-   * <p> If needed, first creates a fresh instance of the {@link DoFn}
+   * <p>If needed, first creates a fresh instance of the {@link DoFn}
    * under test.
    *
-   * <p> If this isn't called, {@code DoFnTester} assumes the
+   * <p>If this isn't called, {@code DoFnTester} assumes the
    * {@link DoFn} takes no side inputs.
    */
   public void setSideInputs(Map<PCollectionView<?>, Iterable<WindowedValue<?>>> sideInputs) {
@@ -105,10 +105,10 @@ public void setSideInputs(Map<PCollectionView<?>, Iterable<WindowedValue<?>>> si
    * Registers the values of a side input {@link PCollectionView} to
    * pass to the {@link DoFn} under test.
    *
-   * <p> If needed, first creates a fresh instance of the {@code DoFn}
+   * <p>If needed, first creates a fresh instance of the {@code DoFn}
    * under test.
    *
-   * <p> If this isn't called, {@code DoFnTester} assumes the
+   * <p>If this isn't called, {@code DoFnTester} assumes the
    * {@code DoFn} takes no side inputs.
    */
   public void setSideInput(PCollectionView<?> sideInput, Iterable<WindowedValue<?>> value) {
@@ -139,9 +139,9 @@ public WindowedValue<?> apply(Object input) {
    * {@code DoFn} under test to output to side output
    * {@code PCollection}s.
    *
-   * <p> If needed, first creates a fresh instance of the DoFn under test.
+   * <p>If needed, first creates a fresh instance of the DoFn under test.
    *
-   * <p> If this isn't called, {@code DoFnTester} assumes the
+   * <p>If this isn't called, {@code DoFnTester} assumes the
    * {@code DoFn} doesn't emit to any side outputs.
    */
   public void setSideOutputTags(TupleTagList sideOutputTags) {
@@ -167,7 +167,7 @@ public List<OutputT> processBatch(InputT... inputElements) {
   /**
    * Calls {@link DoFn#startBundle} on the {@code DoFn} under test.
    *
-   * <p> If needed, first creates a fresh instance of the DoFn under test.
+   * <p>If needed, first creates a fresh instance of the DoFn under test.
    */
   public void startBundle() {
     resetState();
@@ -181,7 +181,7 @@ public void startBundle() {
    * context where {@link DoFn.ProcessContext#element} returns the
    * given element.
    *
-   * <p> Will call {@link #startBundle} automatically, if it hasn't
+   * <p>Will call {@link #startBundle} automatically, if it hasn't
    * already been called.
    *
    * @throws IllegalStateException if the {@code DoFn} under test has already
@@ -200,7 +200,7 @@ public void processElement(InputT element) {
   /**
    * Calls {@link DoFn#finishBundle} of the {@code DoFn} under test.
    *
-   * <p> Will call {@link #startBundle} automatically, if it hasn't
+   * <p>Will call {@link #startBundle} automatically, if it hasn't
    * already been called.
    *
    * @throws IllegalStateException if the {@code DoFn} under test has already
@@ -225,7 +225,7 @@ public void finishBundle() {
    * @see #takeOutputElements
    * @see #clearOutputElements
    *
-   * <p> TODO: provide accessors that take and return {@code WindowedValue}s
+   * <p>TODO: provide accessors that take and return {@code WindowedValue}s
    * in order to test timestamp- and window-sensitive DoFns.
    */
   public List<OutputT> peekOutputElements() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
index a4834c6a858bd..ccd86a5f3962e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
@@ -46,12 +46,12 @@
  * elements of the input
  * {@link com.google.cloud.dataflow.sdk.values.PCollection}.
  *
- * <p> See {@link ParDo} for more explanation, examples of use, and
+ * <p>See {@link ParDo} for more explanation, examples of use, and
  * discussion of constraints on {@code DoFnWithContext}s, including their
  * serializability, lack of access to global shared mutable state,
  * requirements for failure tolerance, and benefits of optimization.
  *
- * <p> {@code DoFnWithContext}s can be tested in a particular
+ * <p>{@code DoFnWithContext}s can be tested in a particular
  * {@code Pipeline} by running that {@code Pipeline} on sample input
  * and then checking its output.  Unit testing of a {@code DoFnWithContext},
  * separately from any {@code ParDo} transform or {@code Pipeline},
@@ -61,9 +61,9 @@
  * that satisfies the requirements described there. See the {@link ProcessElement}
  * for details.
  *
- * <p> This functionality is experimental and likely to change.
+ * <p>This functionality is experimental and likely to change.
  *
- * <p> Example usage:
+ * <p>Example usage:
  *
  * <pre> {@code
  * PCollection<String> lines = ... ;
@@ -96,15 +96,15 @@ public abstract class Context {
     /**
      * Adds the given element to the main output {@code PCollection}.
      *
-     * <p> Once passed to {@code output} the element should not be modified in
+     * <p>Once passed to {@code output} the element should not be modified in
      * any way.
      *
-     * <p> If invoked from {@link ProcessElement}, the output
+     * <p>If invoked from {@link ProcessElement}, the output
      * element will have the same timestamp and be in the same windows
      * as the input element passed to the method annotated with
      * {@code @ProcessElement}.
      *
-     * <p> If invoked from {@link StartBundle} or {@link FinishBundle},
+     * <p>If invoked from {@link StartBundle} or {@link FinishBundle},
      * this will attempt to use the
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
      * of the input {@code PCollection} to determine what windows the element
@@ -118,15 +118,15 @@ public abstract class Context {
      * Adds the given element to the main output {@code PCollection},
      * with the given timestamp.
      *
-     * <p> Once passed to {@code outputWithTimestamp} the element should not be
+     * <p>Once passed to {@code outputWithTimestamp} the element should not be
      * modified in any way.
      *
-     * <p> If invoked from {@link ProcessElement}), the timestamp
+     * <p>If invoked from {@link ProcessElement}), the timestamp
      * must not be older than the input element's timestamp minus
      * {@link DoFn#getAllowedTimestampSkew}.  The output element will
      * be in the same windows as the input element.
      *
-     * <p> If invoked from {@link StartBundle} or {@link FinishBundle},
+     * <p>If invoked from {@link StartBundle} or {@link FinishBundle},
      * this will attempt to use the
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
      * of the input {@code PCollection} to determine what windows the element
@@ -140,18 +140,18 @@ public abstract class Context {
      * Adds the given element to the side output {@code PCollection} with the
      * given tag.
      *
-     * <p> Once passed to {@code sideOutput} the element should not be modified
+     * <p>Once passed to {@code sideOutput} the element should not be modified
      * in any way.
      *
-     * <p> The caller of {@code ParDo} uses {@link ParDo#withOutputTags} to
+     * <p>The caller of {@code ParDo} uses {@link ParDo#withOutputTags} to
      * specify the tags of side outputs that it consumes. Non-consumed side
      * outputs, e.g., outputs for monitoring purposes only, don't necessarily
      * need to be specified.
      *
-     * <p> The output element will have the same timestamp and be in the same
+     * <p>The output element will have the same timestamp and be in the same
      * windows as the input element passed to {@link ProcessElement}).
      *
-     * <p> If invoked from {@link StartBundle} or {@link FinishBundle},
+     * <p>If invoked from {@link StartBundle} or {@link FinishBundle},
      * this will attempt to use the
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
      * of the input {@code PCollection} to determine what windows the element
@@ -169,15 +169,15 @@ public abstract class Context {
      * Adds the given element to the specified side output {@code PCollection},
      * with the given timestamp.
      *
-     * <p> Once passed to {@code sideOutputWithTimestamp} the element should not be
+     * <p>Once passed to {@code sideOutputWithTimestamp} the element should not be
      * modified in any way.
      *
-     * <p> If invoked from {@link ProcessElement}), the timestamp
+     * <p>If invoked from {@link ProcessElement}), the timestamp
      * must not be older than the input element's timestamp minus
      * {@link DoFn#getAllowedTimestampSkew}.  The output element will
      * be in the same windows as the input element.
      *
-     * <p> If invoked from {@link StartBundle} or {@link FinishBundle},
+     * <p>If invoked from {@link StartBundle} or {@link FinishBundle},
      * this will attempt to use the
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
      * of the input {@code PCollection} to determine what windows the element
@@ -201,7 +201,7 @@ public abstract class ProcessContext extends Context {
     /**
      * Returns the input element to be processed.
      *
-     * <p> The element will not be changed -- it is safe to cache, etc.
+     * <p>The element will not be changed -- it is safe to cache, etc.
      * without copying.
      */
     public abstract InputT element();
@@ -218,7 +218,7 @@ public abstract class ProcessContext extends Context {
     /**
      * Returns the timestamp of the input element.
      *
-     * <p> See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window}
+     * <p>See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window}
      * for more information.
      */
     public abstract Instant timestamp();
@@ -246,7 +246,7 @@ public Duration getAllowedTimestampSkew() {
    * about the input type of this {@code DoFnWithContext} instance's most-derived
    * class.
    *
-   * <p> See {@link #getOutputTypeDescriptor} for more discussion.
+   * <p>See {@link #getOutputTypeDescriptor} for more discussion.
    */
   protected TypeDescriptor<InputT> getInputTypeDescriptor() {
     return new TypeDescriptor<InputT>(getClass()) {};
@@ -257,7 +257,7 @@ protected TypeDescriptor<InputT> getInputTypeDescriptor() {
    * about the output type of this {@code DoFnWithContext} instance's
    * most-derived class.
    *
-   * <p> In the normal case of a concrete {@code DoFnWithContext} subclass with
+   * <p>In the normal case of a concrete {@code DoFnWithContext} subclass with
    * no generic type parameters of its own (including anonymous inner
    * classes), this will be a complete non-generic type, which is good
    * for choosing a default output {@code Coder<O>} for the output
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
index d4b9a7d08bc85..88d39d6290f08 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
@@ -35,14 +35,14 @@ public class Filter<T> extends PTransform<PCollection<T>,
    * elements that satisfy the given predicate.  The predicate must be
    * a {@code SerializableFunction<T, Boolean>}.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<String> wordList = ...;
    * PCollection<String> longWords =
    *     wordList.apply(Filter.by(new MatchIfWordLengthGT(6)));
    * } </pre>
    *
-   * <p> See also {@link #lessThan}, {@link #lessThanEq},
+   * <p>See also {@link #lessThan}, {@link #lessThanEq},
    * {@link #greaterThan}, {@link #greaterThanEq}, which return elements
    * satisfying various inequalities with the specified value based on
    * the elements' natural ordering.
@@ -65,19 +65,19 @@ public void processElement(ProcessContext c) {
    * elements that are less than a given value, based on the
    * elements' natural ordering. Elements must be {@code Comparable}.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<Integer> listOfNumbers = ...;
    * PCollection<Integer> smallNumbers =
    *     listOfNumbers.apply(Filter.lessThan(10));
    * } </pre>
    *
-   * <p> See also {@link #lessThanEq}, {@link #greaterThanEq},
+   * <p>See also {@link #lessThanEq}, {@link #greaterThanEq},
    * and {@link #greaterThan}, which return elements satisfying various
    * inequalities with the specified value based on the elements'
    * natural ordering.
    *
-   * <p> See also {@link #by}, which returns elements
+   * <p>See also {@link #by}, which returns elements
    * that satisfy the given predicate.
    */
   public static <T extends Comparable<T>>
@@ -98,19 +98,19 @@ public void processElement(ProcessContext c) {
    * elements that are greater than a given value, based on the
    * elements' natural ordering. Elements must be {@code Comparable}.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<Integer> listOfNumbers = ...;
    * PCollection<Integer> largeNumbers =
    *     listOfNumbers.apply(Filter.greaterThan(1000));
    * } </pre>
    *
-   * <p> See also {@link #greaterThanEq}, {@link #lessThan},
+   * <p>See also {@link #greaterThanEq}, {@link #lessThan},
    * and {@link #lessThanEq}, which return elements satisfying various
    * inequalities with the specified value based on the elements'
    * natural ordering.
    *
-   * <p> See also {@link #by}, which returns elements
+   * <p>See also {@link #by}, which returns elements
    * that satisfy the given predicate.
    */
   public static <T extends Comparable<T>>
@@ -131,19 +131,19 @@ public void processElement(ProcessContext c) {
    * elements that are less than or equal to a given value, based on the
    * elements' natural ordering. Elements must be {@code Comparable}.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<Integer> listOfNumbers = ...;
    * PCollection<Integer> smallOrEqualNumbers =
    *     listOfNumbers.apply(Filter.lessThanEq(10));
    * } </pre>
    *
-   * <p> See also {@link #lessThan}, {@link #greaterThanEq},
+   * <p>See also {@link #lessThan}, {@link #greaterThanEq},
    * and {@link #greaterThan}, which return elements satisfying various
    * inequalities with the specified value based on the elements'
    * natural ordering.
    *
-   * <p> See also {@link #by}, which returns elements
+   * <p>See also {@link #by}, which returns elements
    * that satisfy the given predicate.
    */
   public static <T extends Comparable<T>>
@@ -164,19 +164,19 @@ public void processElement(ProcessContext c) {
    * elements that are greater than or equal to a given value, based on
    * the elements' natural ordering. Elements must be {@code Comparable}.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<Integer> listOfNumbers = ...;
    * PCollection<Integer> largeOrEqualNumbers =
    *     listOfNumbers.apply(Filter.greaterThanEq(1000));
    * } </pre>
    *
-   * <p> See also {@link #greaterThan}, {@link #lessThan},
+   * <p>See also {@link #greaterThan}, {@link #lessThan},
    * and {@link #lessThanEq}, which return elements satisfying various
    * inequalities with the specified value based on the elements'
    * natural ordering.
    *
-   * <p> See also {@link #by}, which returns elements
+   * <p>See also {@link #by}, which returns elements
    * that satisfy the given predicate.
    */
   public static <T extends Comparable<T>>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index fa6f46c3e5e2f..6ba2afd3f30de 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -36,7 +36,7 @@
  * {@code PCollection}s.  The name "Flatten" suggests taking a list of
  * lists and flattening them into a single list.
  *
- * <p> Example of use:
+ * <p>Example of use:
  * <pre> {@code
  * PCollection<String> pc1 = ...;
  * PCollection<String> pc2 = ...;
@@ -45,7 +45,7 @@
  * PCollection<String> merged = pcs.apply(Flatten.<String>pCollections());
  * } </pre>
  *
- * <p> By default, the {@code Coder} of the output {@code PCollection}
+ * <p>By default, the {@code Coder} of the output {@code PCollection}
  * is the same as the {@code Coder} of the first {@code PCollection}
  * in the input {@code PCollectionList} (if the
  * {@code PCollectionList} is non-empty).
@@ -58,7 +58,7 @@ public class Flatten {
    * into a {@link PCollection} containing all the elements of all
    * the {@link PCollection}s in its input.
    *
-   * <p> If any of the inputs to {@code Flatten<T>} require window merging,
+   * <p>If any of the inputs to {@code Flatten<T>} require window merging,
    * all inputs must have equal {@link WindowFn}s.
    * The output elements of {@code Flatten<T>} are in the same windows and
    * have the same timestamps as their corresponding input elements.  The output
@@ -77,13 +77,13 @@ public static <T> FlattenPCollectionList<T> pCollections() {
    * and returns a {@code PCollection<T>} containing all the elements from
    * all the {@code Iterable}s.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<Iterable<Integer>> pcOfIterables = ...;
    * PCollection<Integer> pc = pcOfIterables.apply(Flatten.<Integer>iterables());
    * } </pre>
    *
-   * <p> By default, the output {@code PCollection} encodes its elements
+   * <p>By default, the output {@code PCollection} encodes its elements
    * using the same {@code Coder} that the input uses for
    * the elements in its {@code Iterable}.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 7d5c1957d9d63..6fbda9335d033 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -56,12 +56,12 @@
  * the input.  Each key in the output {@code PCollection} is unique within
  * each window.
  *
- * <p> {@code GroupByKey} is analogous to converting a multi-map into
+ * <p>{@code GroupByKey} is analogous to converting a multi-map into
  * a uni-map, and related to {@code GROUP BY} in SQL.  It corresponds
  * to the "shuffle" step between the Mapper and the Reducer in the
  * MapReduce framework.
  *
- * <p> Two keys of type {@code K} are compared for equality
+ * <p>Two keys of type {@code K} are compared for equality
  * <b>not</b> by regular Java {@link Object#equals}, but instead by
  * first encoding each of the keys using the {@code Coder} of the
  * keys of the input {@code PCollection}, and then comparing the
@@ -70,13 +70,13 @@
  * {@link Coder#verifyDeterministic()}).  If the key {@code Coder} is not
  * deterministic, an exception is thrown at runtime.
  *
- * <p> By default, the {@code Coder} of the keys of the output
+ * <p>By default, the {@code Coder} of the keys of the output
  * {@code PCollection} is the same as that of the keys of the input,
  * and the {@code Coder} of the elements of the {@code Iterable}
  * values of the output {@code PCollection} is the same as the
  * {@code Coder} of the values of the input.
  *
- * <p> Example of use:
+ * <p>Example of use:
  * <pre> {@code
  * PCollection<KV<String, Doc>> urlDocPairs = ...;
  * PCollection<KV<String, Iterable<Doc>>> urlToDocs =
@@ -90,18 +90,18 @@
  *       }}));
  * } </pre>
  *
- * <p> {@code GroupByKey} is a key primitive in data-parallel
+ * <p>{@code GroupByKey} is a key primitive in data-parallel
  * processing, since it is the main way to efficiently bring
  * associated data together into one location.  It is also a key
  * determiner of the performance of a data-parallel pipeline.
  *
- * <p> See {@link com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey}
+ * <p>See {@link com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey}
  * for a way to group multiple input PCollections by a common key at once.
  *
- * <p> See {@link Combine.PerKey} for a common pattern of
+ * <p>See {@link Combine.PerKey} for a common pattern of
  * {@code GroupByKey} followed by {@link Combine.GroupedValues}.
  *
- * <p> When grouping, windows that can be merged according to the {@link WindowFn}
+ * <p>When grouping, windows that can be merged according to the {@link WindowFn}
  * of the input {@code PCollection} will be merged together, and a window pane
  * corresponding to the new, merged window will be created. The items in this pane
  * will be emitted when a trigger fires. By default this will be when the input
@@ -113,12 +113,12 @@
  * the pane. The output {@code PCollection} will have the same {@link WindowFn}
  * as the input.
  *
- * <p> If the input {@code PCollection} contains late data (see
+ * <p>If the input {@code PCollection} contains late data (see
  * {@link com.google.cloud.dataflow.sdk.io.PubsubIO.Read.Bound#timestampLabel}
  * for an example of how this can occur), then there may be multiple elements
  * output by a {@code GroupByKey} that correspond to the same key and window.
  *
- * <p> If the {@link WindowFn} of the input requires merging, it is not
+ * <p>If the {@link WindowFn} of the input requires merging, it is not
  * valid to apply another {@code GroupByKey} without first applying a new
  * {@link WindowFn}.
  *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
index c5a9ab1bc43e1..415ea3532bfdb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
@@ -39,11 +39,11 @@
  * Provides multi-threading of {@link DoFn}s, using threaded execution to
  * process multiple elements concurrently within a bundle.
  *
- * <p> Note, that each Dataflow worker will already process multiple bundles
+ * <p>Note, that each Dataflow worker will already process multiple bundles
  * concurrently and usage of this class is meant only for cases where processing
  * elements from within a bundle is limited by blocking calls.
  *
- * <p> CPU intensive or IO intensive tasks are in general a poor fit for parallelization.
+ * <p>CPU intensive or IO intensive tasks are in general a poor fit for parallelization.
  * This is because a limited resource that is already maximally utilized does not
  * benefit from sub-division of work. The parallelization will increase the amount of time
  * to process each element yet the throughput for processing will remain relatively the same.
@@ -53,7 +53,7 @@
  * share of the maximum write rate) will take at least 6 seconds to complete (there is additional
  * overhead in the extra parallelization).
  *
- * <p> To parallelize a {@link DoFn} to 10 threads:
+ * <p>To parallelize a {@link DoFn} to 10 threads:
  * <pre>{@code
  * PCollection<T> data = ...;
  * data.apply(
@@ -61,7 +61,7 @@
  *                             .withMaxParallelism(10)));
  * }</pre>
  *
- * <p> An uncaught exception from the wrapped {@link DoFn} will result in the exception
+ * <p>An uncaught exception from the wrapped {@link DoFn} will result in the exception
  * being rethrown in later calls to {@link MultiThreadedIntraBundleProcessingDoFn#processElement}
  * or a call to {@link MultiThreadedIntraBundleProcessingDoFn#finishBundle}.
  */
@@ -70,7 +70,7 @@ public class IntraBundleParallelization {
    * Creates a {@link IntraBundleParallelization} {@link PTransform} for the given
    * {@link DoFn} that processes elements using multiple threads.
    *
-   * <p> Note that the specified {@code doFn} needs to be thread safe.
+   * <p>Note that the specified {@code doFn} needs to be thread safe.
    */
   public static <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT> doFn) {
     return new Unbound().of(doFn);
@@ -87,7 +87,7 @@ public static Unbound withMaxParallelism(int maxParallelism) {
   /**
    * An incomplete {@code IntraBundleParallelization} transform, with unbound input/output types.
    *
-   * <p> Before being applied, {@link IntraBundleParallelization.Unbound#of} must be
+   * <p>Before being applied, {@link IntraBundleParallelization.Unbound#of} must be
    * invoked to specify the {@link DoFn} to invoke, which will also
    * bind the input/output types of this {@code PTransform}.
    */
@@ -116,7 +116,7 @@ public Unbound withMaxParallelism(int maxParallelism) {
      * Returns a new {@link IntraBundleParallelization} {@link PTransform} like this one
      * with the specified {@link DoFn}.
      *
-     * <p> Note that the specified {@code doFn} needs to be thread safe.
+     * <p>Note that the specified {@code doFn} needs to be thread safe.
      */
     public <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT> doFn) {
       return new Bound<>(doFn, maxParallelism);
@@ -129,7 +129,7 @@ public <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT> doFn) {
    * with all its outputs collected into an output
    * {@code PCollection<OutputT>}.
    *
-   * <p> Note that the specified {@code doFn} needs to be thread safe.
+   * <p>Note that the specified {@code doFn} needs to be thread safe.
    *
    * @param <InputT> the type of the (main) input {@code PCollection} elements
    * @param <OutputT> the type of the (main) output {@code PCollection} elements
@@ -159,7 +159,7 @@ public Bound<InputT, OutputT> withMaxParallelism(int maxParallelism) {
      * Returns a new {@link IntraBundleParallelization} {@link PTransform} like this one
      * with the specified {@link DoFn}.
      *
-     * <p> Note that the specified {@code doFn} needs to be thread safe.
+     * <p>Note that the specified {@code doFn} needs to be thread safe.
      */
     public <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT> doFn) {
       return new Bound<>(doFn, maxParallelism);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java
index 5de4fbc35a798..113545c221f09 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java
@@ -23,19 +23,19 @@
  * {@code Keys<K>} takes a {@code PCollection} of {@code KV<K, V>}s and
  * returns a {@code PCollection<K>} of the keys.
  *
- * <p> Example of use:
+ * <p>Example of use:
  * <pre> {@code
  * PCollection<KV<String, Long>> wordCounts = ...;
  * PCollection<String> words = wordCounts.apply(Keys.<String>create());
  * } </pre>
  *
- * <p> Each output element has the same timestamp and is in the same windows
+ * <p>Each output element has the same timestamp and is in the same windows
  * as its corresponding input element, and the output {@code PCollection}
  * has the same
  * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
  * associated with it as the input.
  *
- * <p> See also {@link Values}.
+ * <p>See also {@link Values}.
  *
  * @param <K> the type of the keys in the input {@code PCollection},
  * and the type of the elements in the output {@code PCollection}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
index f4266f8c3408f..afce62efa9dd3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
@@ -24,14 +24,14 @@
  * returns a {@code PCollection<KV<V, K>>}, where all the keys and
  * values have been swapped.
  *
- * <p> Example of use:
+ * <p>Example of use:
  * <pre> {@code
  * PCollection<String, Long> wordsToCounts = ...;
  * PCollection<Long, String> countsToWords =
  *     wordToCounts.apply(KvSwap.<String, Long>create());
  * } </pre>
  *
- * <p> Each output element has the same timestamp and is in the same windows
+ * <p>Each output element has the same timestamp and is in the same windows
  * as its corresponding input element, and the output {@code PCollection}
  * has the same
  * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
index da98a119387da..0047548b22c0e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
@@ -28,13 +28,13 @@
  * {@code PTransform}s for computing the maximum of the elements in a {@code PCollection}, or the
  * maximum of the values associated with each key in a {@code PCollection} of {@code KV}s.
  *
- * <p> Example 1: get the maximum of a {@code PCollection} of {@code Double}s.
+ * <p>Example 1: get the maximum of a {@code PCollection} of {@code Double}s.
  * <pre> {@code
  * PCollection<Double> input = ...;
  * PCollection<Double> max = input.apply(Max.doublesGlobally());
  * } </pre>
  *
- * <p> Example 2: calculate the maximum of the {@code Integer}s
+ * <p>Example 2: calculate the maximum of the {@code Integer}s
  * associated with each unique key (which is of type {@code String}).
  * <pre> {@code
  * PCollection<KV<String, Integer>> input = ...;
@@ -59,7 +59,7 @@ public static Combine.Globally<Integer, Integer> integersGlobally() {
    * distinct key in the input {@code PCollection} to the maximum of the values associated with that
    * key in the input {@code PCollection}.
    *
-   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
   public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() {
     return Combine.<K, Integer, Integer>perKey(new MaxIntegerFn()).named("Max.PerKey");
@@ -80,7 +80,7 @@ public static Combine.Globally<Long, Long> longsGlobally() {
    * the input {@code PCollection} to the maximum of the values associated with that key in the
    * input {@code PCollection}.
    *
-   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
   public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
     return Combine.<K, Long, Long>perKey(new MaxLongFn()).named("Max.PerKey");
@@ -101,7 +101,7 @@ public static Combine.Globally<Double, Double> doublesGlobally() {
    * in the input {@code PCollection} to the maximum of the values associated with that key in the
    * input {@code PCollection}.
    *
-   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
   public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
     return Combine.<K, Double, Double>perKey(new MaxDoubleFn()).named("Max.PerKey");
@@ -123,7 +123,7 @@ Combine.Globally<T, T> globally() {
    * input {@code PCollection} to the maximum according to the natural ordering of {@code T} of the
    * values associated with that key in the input {@code PCollection}.
    *
-   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
   public static <K, T extends Comparable<? super T>>
   Combine.PerKey<K, T, T> perKey() {
@@ -145,7 +145,7 @@ Combine.Globally<T, T> globally(ComparatorT comparator) {
    * {@code PCollection<KV<K, T>>} that contains one output element per key mapping each
    * to the maximum of the values associated with that key in the input {@code PCollection}.
    *
-   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
   public static <K, T, ComparatorT extends Comparator<? super T> & Serializable>
   Combine.PerKey<K, T, T> perKey(ComparatorT comparator) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
index 832a089cd4b2d..96b968dd599fa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
@@ -36,13 +36,13 @@
  * mean of the values associated with each key in a
  * {@code PCollection} of {@code KV}s.
  *
- * <p> Example 1: get the mean of a {@code PCollection} of {@code Long}s.
+ * <p>Example 1: get the mean of a {@code PCollection} of {@code Long}s.
  * <pre> {@code
  * PCollection<Long> input = ...;
  * PCollection<Double> mean = input.apply(Mean.<Long>globally());
  * } </pre>
  *
- * <p> Example 2: calculate the mean of the {@code Integer}s
+ * <p>Example 2: calculate the mean of the {@code Integer}s
  * associated with each unique key (which is of type {@code String}).
  * <pre> {@code
  * PCollection<KV<String, Integer>> input = ...;
@@ -75,7 +75,7 @@ public static <NumT extends Number> Combine.Globally<NumT, Double> globally() {
    * {@code PCollection} to the mean of the values associated with
    * that key in the input {@code PCollection}.
    *
-   * <p> See {@link Combine.PerKey} for how this affects timestamps and bucketing.
+   * <p>See {@link Combine.PerKey} for how this affects timestamps and bucketing.
    *
    * @param <K> the type of the keys
    * @param <NumT> the type of the {@code Number}s being combined
@@ -93,7 +93,7 @@ public static <K, NumT extends Number> Combine.PerKey<K, NumT, Double> perKey()
    * {@code N}, useful as an argument to {@link Combine#globally} or
    * {@link Combine#perKey}.
    *
-   * <p> Returns {@code Double.NaN} if combining zero elements.
+   * <p>Returns {@code Double.NaN} if combining zero elements.
    *
    * @param <NumT> the type of the {@code Number}s being combined
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
index 7422790c970a5..9a8b0c9b8e2ec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
@@ -28,13 +28,13 @@
  * {@code PTransform}s for computing the minimum of the elements in a {@code PCollection}, or the
  * minimum of the values associated with each key in a {@code PCollection} of {@code KV}s.
  *
- * <p> Example 1: get the minimum of a {@code PCollection} of {@code Double}s.
+ * <p>Example 1: get the minimum of a {@code PCollection} of {@code Double}s.
  * <pre> {@code
  * PCollection<Double> input = ...;
  * PCollection<Double> min = input.apply(Min.doublesGlobally());
  * } </pre>
  *
- * <p> Example 2: calculate the minimum of the {@code Integer}s
+ * <p>Example 2: calculate the minimum of the {@code Integer}s
  * associated with each unique key (which is of type {@code String}).
  * <pre> {@code
  * PCollection<KV<String, Integer>> input = ...;
@@ -59,7 +59,7 @@ public static Combine.Globally<Integer, Integer> integersGlobally() {
    * distinct key in the input {@code PCollection} to the minimum of the values associated with that
    * key in the input {@code PCollection}.
    *
-   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
   public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() {
     return Combine.<K, Integer, Integer>perKey(new MinIntegerFn()).named("Min.PerKey");
@@ -80,7 +80,7 @@ public static Combine.Globally<Long, Long> longsGlobally() {
    * the input {@code PCollection} to the minimum of the values associated with that key in the
    * input {@code PCollection}.
    *
-   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
   public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
    return Combine.<K, Long, Long>perKey(new MinLongFn()).named("Min.PerKey");
@@ -101,7 +101,7 @@ public static Combine.Globally<Double, Double> doublesGlobally() {
    * in the input {@code PCollection} to the minimum of the values associated with that key in the
    * input {@code PCollection}.
    *
-   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
   public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
     return Combine.<K, Double, Double>perKey(new MinDoubleFn()).named("Min.PerKey");
@@ -123,7 +123,7 @@ Combine.Globally<T, T> globally() {
    * input {@code PCollection} to the minimum according to the natural ordering of {@code T} of the
    * values associated with that key in the input {@code PCollection}.
    *
-   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
   public static <K, T extends Comparable<? super T>>
   Combine.PerKey<K, T, T> perKey() {
@@ -145,7 +145,7 @@ Combine.Globally<T, T> globally(ComparatorT comparator) {
    * {@code PCollection<KV<K, T>>} that contains one output element per key mapping each
    * to the minimum of the values associated with that key in the input {@code PCollection}.
    *
-   * <p> See {@link Combine.PerKey} for how this affects timestamps and windowing.
+   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
    */
   public static <K, T, ComparatorT extends Comparator<? super T> & Serializable>
   Combine.PerKey<K, T, T> perKey(ComparatorT comparator) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
index 380fe82f4672b..d1ffb204b1023 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
@@ -33,7 +33,7 @@
  * {@code InputT} (some subtype of {@link PInput}) and produces an
  * {@code OutputT} (some subtype of {@link POutput}).
  *
- * <p> Common PTransforms include root PTransforms like
+ * <p>Common PTransforms include root PTransforms like
  * {@link com.google.cloud.dataflow.sdk.io.TextIO.Read},
  * {@link Create}, processing and
  * conversion operations like {@link ParDo},
@@ -44,7 +44,7 @@
  * {@link com.google.cloud.dataflow.sdk.io.TextIO.Write}.  Users also
  * define their own application-specific composite PTransforms.
  *
- * <p> Each {@code PTransform<InputT, OutputT>} has a single
+ * <p>Each {@code PTransform<InputT, OutputT>} has a single
  * {@code InputT} type and a single {@code OutputT} type.  Many
  * PTransforms conceptually transform one input value to one output
  * value, and in this case {@code InputT} and {@code Output} are
@@ -65,7 +65,7 @@
  * to combine multiple values into a single bundle for passing into or
  * returning from the PTransform.
  *
- * <p> A {@code PTransform<InputT, OutputT>} is invoked by calling
+ * <p>A {@code PTransform<InputT, OutputT>} is invoked by calling
  * {@code apply()} on its {@code InputT}, returning its {@code OutputT}.
  * Calls can be chained to concisely create linear pipeline segments.
  * For example:
@@ -79,7 +79,7 @@
  *        .apply(ParDo.of(new MyDoFn2<KV<K,V>,T2>()));
  * } </pre>
  *
- * <p> PTransform operations have unique names, which are used by the
+ * <p>PTransform operations have unique names, which are used by the
  * system when explaining what's going on during optimization and
  * execution.  Each PTransform gets a system-provided default name,
  * but it's a good practice to specify an explicit name, where
@@ -92,11 +92,11 @@
  * ...
  * } </pre>
  *
- * <p> Each PCollection output produced by a PTransform,
+ * <p>Each PCollection output produced by a PTransform,
  * either directly or within a "bundling" class, automatically gets
  * its own name derived from the name of its producing PTransform.
  *
- * <p> Each PCollection output produced by a PTransform
+ * <p>Each PCollection output produced by a PTransform
  * also records a {@link com.google.cloud.dataflow.sdk.coders.Coder}
  * that specifies how the elements of that PCollection
  * are to be encoded as a byte string, if necessary.  The
@@ -122,7 +122,7 @@
  * before that output is used as an input to another PTransform, or
  * before the enclosing Pipeline is run.
  *
- * <p> A small number of PTransforms are implemented natively by the
+ * <p>A small number of PTransforms are implemented natively by the
  * Google Cloud Dataflow SDK; such PTransforms simply return an
  * output value as their apply implementation.
  * The majority of PTransforms are
@@ -140,22 +140,22 @@
  *
  * <h3>Note on Serialization</h3>
  *
- * <p> {@code PTransform} doesn't actually support serialization, despite
+ * <p>{@code PTransform} doesn't actually support serialization, despite
  * implementing {@code Serializable}.
  *
- * <p> {@code PTransform} is marked {@code Serializable} solely
+ * <p>{@code PTransform} is marked {@code Serializable} solely
  * because it is common for an anonymous {@code DoFn},
  * instance to be created within an
  * {@code apply()} method of a composite {@code PTransform}.
  *
- * <p> Each of those {@code *Fn}s is {@code Serializable}, but
+ * <p>Each of those {@code *Fn}s is {@code Serializable}, but
  * unfortunately its instance state will contain a reference to the
  * enclosing {@code PTransform} instance, and so attempt to serialize
  * the {@code PTransform} instance, even though the {@code *Fn}
  * instance never references anything about the enclosing
  * {@code PTransform}.
  *
- * <p> To allow such anonymous {@code *Fn}s to be written
+ * <p>To allow such anonymous {@code *Fn}s to be written
  * conveniently, {@code PTransform} is marked as {@code Serializable},
  * and includes dummy {@code writeObject()} and {@code readObject()}
  * operations that do not save or restore any state.
@@ -175,13 +175,13 @@ public abstract class PTransform<InputT extends PInput, OutputT extends POutput>
    * Applies this {@code PTransform} on the given {@code InputT}, and returns its
    * {@code Output}.
    *
-   * <p> Composite transforms, which are defined in terms of other transforms,
+   * <p>Composite transforms, which are defined in terms of other transforms,
    * should return the output of one of the composed transforms.  Non-composite
    * transforms, which do not apply any transforms internally, should return
    * a new unbound output and register evaluators (via backend-specific
    * registration methods).
    *
-   * <p> The default implementation throws an exception.  A derived class must
+   * <p>The default implementation throws an exception.  A derived class must
    * either implement apply, or else each runner must supply a custom
    * implementation via
    * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner#apply}.
@@ -198,14 +198,14 @@ public OutputT apply(InputT input) {
    * verify this transform is fully specified and applicable to the specified
    * input.
    *
-   * <p> By default, does nothing.
+   * <p>By default, does nothing.
    */
   public void validate(InputT input) { }
 
   /**
    * Returns the transform name.
    *
-   * <p> This name is provided by the transform creator and is not required to be unique.
+   * <p>This name is provided by the transform creator and is not required to be unique.
    */
   public String getName() {
     return name != null ? name : getKindString();
@@ -244,9 +244,9 @@ public String toString() {
    * Returns the name to use by default for this {@code PTransform}
    * (not including the names of any enclosing {@code PTransform}s).
    *
-   * <p> By default, returns the base name of this {@code PTransform}'s class.
+   * <p>By default, returns the base name of this {@code PTransform}'s class.
    *
-   * <p> The caller is responsible for ensuring that names of applied
+   * <p>The caller is responsible for ensuring that names of applied
    * {@code PTransform}s are unique, e.g., by adding a uniquifying
    * suffix when needed.
    */
@@ -274,7 +274,7 @@ private void readObject(ObjectInputStream oos) {
    * Returns the default {@code Coder} to use for the output of this
    * single-output {@code PTransform}.
    *
-   * <p> By default, always throws
+   * <p>By default, always throws
    *
    * @throws CannotProvideCoderException if no coder can be inferred
    */
@@ -289,7 +289,7 @@ protected Coder<?> getDefaultOutputCoder() throws CannotProvideCoderException {
    *
    * @throws CannotProvideCoderException if none can be inferred.
    *
-   * <p> By default, always throws.
+   * <p>By default, always throws.
    */
   protected Coder<?> getDefaultOutputCoder(@SuppressWarnings("unused") InputT input)
       throws CannotProvideCoderException {
@@ -302,7 +302,7 @@ protected Coder<?> getDefaultOutputCoder(@SuppressWarnings("unused") InputT inpu
    *
    * @throws CannotProvideCoderException if none can be inferred.
    *
-   * <p> By default, always throws.
+   * <p>By default, always throws.
    */
   public <T> Coder<T> getDefaultOutputCoder(
       InputT input, @SuppressWarnings("unused") TypedPValue<T> output)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index fa1c522462101..0d7d1a942c6d6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -57,18 +57,18 @@
  * {@code PCollection<InputT>} to produce zero or more output elements, all
  * of which are collected into the output {@code PCollection<OutputT>}.
  *
- * <p> Elements are processed independently, and possibly in parallel across
+ * <p>Elements are processed independently, and possibly in parallel across
  * distributed cloud resources.
  *
- * <p> The {@code ParDo} processing style is similar to what happens inside
+ * <p>The {@code ParDo} processing style is similar to what happens inside
  * the "Mapper" or "Reducer" class of a MapReduce-style algorithm.
  *
  * <h2>{@code DoFn}s</h2>
  *
- * <p> The function to use to process each element is specified by a
+ * <p>The function to use to process each element is specified by a
  * {@link DoFn}.
  *
- * <p> Conceptually, when a {@code ParDo} transform is executed, the
+ * <p>Conceptually, when a {@code ParDo} transform is executed, the
  * elements of the input {@code PCollection<InputT>} are first divided up
  * into some number of "batches".  These are farmed off to distributed
  * worker machines (or run locally, if using the
@@ -86,7 +86,7 @@
  * batches of output elements from all of the {@code DoFn} instances
  * are "flattened" together into the output {@code PCollection<OutputT>}.
  *
- * <p> For example:
+ * <p>For example:
  *
  * <pre> {@code
  * PCollection<String> lines = ...;
@@ -107,7 +107,7 @@
  *         }}));
  * } </pre>
  *
- * <p> Each output element has the same timestamp and is in the same windows
+ * <p>Each output element has the same timestamp and is in the same windows
  * as its corresponding input element, and the output {@code PCollection<OutputT>}
  * has the same
  * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
@@ -115,7 +115,7 @@
  *
  * <h2>Naming {@code ParDo}s</h2>
  *
- * <p> A {@code ParDo} transform can be given a name using
+ * <p>A {@code ParDo} transform can be given a name using
  * {@link #named}.  While the system will automatically provide a name
  * if none is specified explicitly, it is still a good practice to
  * provide an explicit name, since that will probably make monitoring
@@ -132,7 +132,7 @@
  *
  * <h2>Side Inputs</h2>
  *
- * <p> While a {@code ParDo} iterates over a single "main input"
+ * <p>While a {@code ParDo} iterates over a single "main input"
  * {@code PCollection}, it can take additional "side input"
  * {@code PCollectionView}s. These side input
  * {@code PCollectionView}s express styles of accessing
@@ -161,7 +161,7 @@
  *
  * <h2>Side Outputs</h2>
  *
- * <p> Optionally, a {@code ParDo} transform can produce multiple
+ * <p>Optionally, a {@code ParDo} transform can produce multiple
  * output {@code PCollection}s, both a "main output"
  * {@code PCollection<OutputT>} plus any number of "side output"
  * {@code PCollection}s, each keyed by a distinct {@link TupleTag},
@@ -228,7 +228,7 @@
  *
  * <h2>Properties May Be Specified In Any Order</h2>
  *
- * <p> Several properties can be specified for a {@code ParDo}
+ * <p>Several properties can be specified for a {@code ParDo}
  * {@code PTransform}, including name, side inputs, side output tags,
  * and {@code DoFn} to invoke.  Only the {@code DoFn} is required; the
  * name is encouraged but not required, and side inputs and side
@@ -236,7 +236,7 @@
  * properties can be specified in any order, as long as they're
  * specified before the {@code ParDo} {@code PTransform} is applied.
  *
- * <p> The approach used to allow these properties to be specified in
+ * <p>The approach used to allow these properties to be specified in
  * any order, with some properties omitted, is to have each of the
  * property "setter" methods defined as static factory methods on
  * {@code ParDo} itself, which return an instance of either
@@ -250,7 +250,7 @@
  * {@code DoFn} specified.  Only {@code ParDo.Bound} instances can be
  * applied.
  *
- * <p> Another benefit of this approach is that it reduces the number
+ * <p>Another benefit of this approach is that it reduces the number
  * of type parameters that need to be specified manually.  In
  * particular, the input and output types of the {@code ParDo}
  * {@code PTransform} are inferred automatically from the type
@@ -258,11 +258,11 @@
  *
  * <h2>Output Coders</h2>
  *
- * <p> By default, the {@code Coder<OutputT>} of the
+ * <p>By default, the {@code Coder<OutputT>} of the
  * elements of the main output {@code PCollection<OutputT>} is inferred from the
  * concrete type of the {@code DoFn<InputT, OutputT>}'s output type {@code Output}.
  *
- * <p> By default, the {@code Coder<X>} of the elements of a side output
+ * <p>By default, the {@code Coder<X>} of the elements of a side output
  * {@code PCollection<X>} is inferred from the concrete type of the
  * corresponding {@code TupleTag<X>}'s type {@code X}.  To be
  * successful, the {@code TupleTag} should be created as an instance
@@ -282,7 +282,7 @@
  *
  * <h2>Serializability of {@code DoFn}s</h2>
  *
- * <p> A {@code DoFn} passed to a {@code ParDo} transform must be
+ * <p>A {@code DoFn} passed to a {@code ParDo} transform must be
  * {@code Serializable}.  This allows the {@code DoFn} instance
  * created in this "main program" to be sent (in serialized form) to
  * remote worker machines and reconstituted for each batch of elements
@@ -292,12 +292,12 @@
  * deserialized on remote worker machines for each batch of elements
  * to process.
  *
- * <p> To aid in ensuring that {@code DoFn}s are properly
+ * <p>To aid in ensuring that {@code DoFn}s are properly
  * {@code Serializable}, even local execution using the
  * {@link DirectPipelineRunner} will serialize and then deserialize
  * {@code DoFn}s before executing them on a batch.
  *
- * <p> {@code DoFn}s expressed as anonymous inner classes can be
+ * <p>{@code DoFn}s expressed as anonymous inner classes can be
  * convenient, but due to a quirk in Java's rules for serializability,
  * non-static inner or nested classes (including anonymous inner
  * classes) automatically capture their enclosing class's instance in
@@ -305,7 +305,7 @@
  * intended in the serialized state of a {@code DoFn}, or even things
  * that aren't {@code Serializable}.
  *
- * <p> There are two ways to avoid unintended serialized state in a
+ * <p>There are two ways to avoid unintended serialized state in a
  * {@code DoFn}:
  *
  * <ul>
@@ -317,16 +317,16 @@
  *
  * </ul>
  *
- * <p> Both these approaches ensure that there is no implicit enclosing
+ * <p>Both these approaches ensure that there is no implicit enclosing
  * class instance serialized along with the {@code DoFn} instance.
  *
- * <p> Prior to Java 8, any local variables of the enclosing
+ * <p>Prior to Java 8, any local variables of the enclosing
  * method referenced from within an anonymous inner class need to be
  * marked as {@code final}.  If defining the {@code DoFn} as a named
  * static class, such variables would be passed as explicit
  * constructor arguments and stored in explicit instance variables.
  *
- * <p> There are three main ways to initialize the state of a
+ * <p>There are three main ways to initialize the state of a
  * {@code DoFn} instance processing a batch:
  *
  * <ul>
@@ -357,7 +357,7 @@
  *
  * <h2>No Global Shared State</h2>
  *
- * <p> {@code ParDo} operations are intended to be able to run in
+ * <p>{@code ParDo} operations are intended to be able to run in
  * parallel across multiple worker machines.  This precludes easy
  * sharing and updating mutable state across those machines.  There is
  * no support in the Google Cloud Dataflow system for communicating
@@ -374,7 +374,7 @@
  *
  * <h2>Fault Tolerance</h2>
  *
- * <p> In a distributed system, things can fail: machines can crash,
+ * <p>In a distributed system, things can fail: machines can crash,
  * machines can be unable to communicate across the network, etc.
  * While individual failures are rare, the larger the job, the greater
  * the chance that something, somewhere, will fail.  The Google Cloud
@@ -399,7 +399,7 @@
  *
  * <h2>Optimization</h2>
  *
- * <p> The Google Cloud Dataflow service automatically optimizes a
+ * <p>The Google Cloud Dataflow service automatically optimizes a
  * pipeline before it is executed.  A key optimization, <i>fusion</i>,
  * relates to ParDo operations.  If one ParDo operation produces a
  * PCollection that is then consumed as the main input of another
@@ -410,13 +410,13 @@
  * they will be fused into a single ParDo that makes just one pass
  * over the input PCollection; this is "sibling fusion".
  *
- * <p> If after fusion there are no more unfused references to a
+ * <p>If after fusion there are no more unfused references to a
  * PCollection (e.g., one between a producer ParDo and a consumer
  * ParDo), the PCollection itself is "fused away" and won't ever be
  * written to disk, saving all the I/O and space expense of
  * constructing it.
  *
- * <p> The Google Cloud Dataflow service applies fusion as much as
+ * <p>The Google Cloud Dataflow service applies fusion as much as
  * possible, greatly reducing the cost of executing pipelines.  As a
  * result, it is essentially "free" to write ParDo operations in a
  * very modular, composable style, each ParDo operation doing one
@@ -437,9 +437,9 @@ public class ParDo {
   /**
    * Creates a {@code ParDo} {@code PTransform} with the given name.
    *
-   * <p> See the discussion of Naming above for more explanation.
+   * <p>See the discussion of Naming above for more explanation.
    *
-   * <p> The resulting {@code PTransform} is incomplete, and its
+   * <p>The resulting {@code PTransform} is incomplete, and its
    * input/output types are not yet bound.  Use
    * {@link ParDo.Unbound#of} to specify the {@link DoFn} to
    * invoke, which will also bind the input/output types of this
@@ -453,15 +453,15 @@ public static Unbound named(String name) {
    * Creates a {@code ParDo} {@code PTransform} with the given
    * side inputs.
    *
-   * <p> Side inputs are {@link PCollectionView}s, whose contents are
+   * <p>Side inputs are {@link PCollectionView}s, whose contents are
    * computed during pipeline execution and then made accessible to
    * {@code DoFn} code via {@link DoFn.ProcessContext#sideInput sideInput}. Each
    * invocation of the {@code DoFn} receives the same values for these
    * side inputs.
    *
-   * <p> See the discussion of Side Inputs above for more explanation.
+   * <p>See the discussion of Side Inputs above for more explanation.
    *
-   * <p> The resulting {@code PTransform} is incomplete, and its
+   * <p>The resulting {@code PTransform} is incomplete, and its
    * input/output types are not yet bound.  Use
    * {@link ParDo.Unbound#of} to specify the {@link DoFn} to
    * invoke, which will also bind the input/output types of this
@@ -474,13 +474,13 @@ public static Unbound withSideInputs(PCollectionView<?>... sideInputs) {
   /**
     * Creates a {@code ParDo} with the given side inputs.
     *
-   * <p> Side inputs are {@link PCollectionView}s, whose contents are
+   * <p>Side inputs are {@link PCollectionView}s, whose contents are
    * computed during pipeline execution and then made accessible to
    * {@code DoFn} code via {@link DoFn.ProcessContext#sideInput sideInput}.
    *
-   * <p> See the discussion of Side Inputs above for more explanation.
+   * <p>See the discussion of Side Inputs above for more explanation.
    *
-   * <p> The resulting {@code PTransform} is incomplete, and its
+   * <p>The resulting {@code PTransform} is incomplete, and its
    * input/output types are not yet bound.  Use
    * {@link ParDo.Unbound#of} to specify the {@link DoFn} to
    * invoke, which will also bind the input/output types of this
@@ -496,7 +496,7 @@ public static Unbound withSideInputs(
    * output {@link PCollection}s will be referenced using the given main
    * output and side output tags.
    *
-   * <p> {@link TupleTag}s are used to name (with its static element
+   * <p>{@link TupleTag}s are used to name (with its static element
    * type {@code T}) each main and side output {@code PCollection<T>}.
    * This {@code PTransform}'s {@link DoFn} emits elements to the main
    * output {@code PCollection} as normal, using
@@ -509,9 +509,9 @@ public static Unbound withSideInputs(
    * {@link PCollectionTuple#get}, passing the output's tag as an
    * argument.
    *
-   * <p> See the discussion of Side Outputs above for more explanation.
+   * <p>See the discussion of Side Outputs above for more explanation.
    *
-   * <p> The resulting {@code PTransform} is incomplete, and its input
+   * <p>The resulting {@code PTransform} is incomplete, and its input
    * type is not yet bound.  Use {@link ParDo.UnboundMulti#of}
    * to specify the {@link DoFn} to invoke, which will also bind the
    * input type of this {@code PTransform}.
@@ -526,7 +526,7 @@ public static <OutputT> UnboundMulti<OutputT> withOutputTags(
    * Creates a {@code ParDo} {@code PTransform} that will invoke the
    * given {@link DoFn} function.
    *
-   * <p> The resulting {@code PTransform}'s types have been bound, with the
+   * <p>The resulting {@code PTransform}'s types have been bound, with the
    * input being a {@code PCollection<InputT>} and the output a
    * {@code PCollection<OutputT>}, inferred from the types of the argument
    * {@code DoFn<InputT, OutputT>}.  It is ready to be applied, or further
@@ -545,13 +545,13 @@ public static <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT>
    * Creates a {@code ParDo} {@code PTransform} that will invoke the
    * given {@link DoFnWithContext} function.
    *
-   * <p> The resulting {@code PTransform}'s types have been bound, with the
+   * <p>The resulting {@code PTransform}'s types have been bound, with the
    * input being a {@code PCollection<InputT>} and the output a
    * {@code PCollection<OutputT>}, inferred from the types of the argument
    * {@code DoFn<InputT, OutputT>}.  It is ready to be applied, or further
    * properties can be set on it first.
    *
-   * <p> {@link DoFnWithContext} is an experimental alternative to
+   * <p>{@link DoFnWithContext} is an experimental alternative to
    * {@link DoFn} which simplifies accessing the window of the element.
    */
   @Experimental
@@ -562,7 +562,7 @@ public static <InputT, OutputT> Bound<InputT, OutputT> of(DoFnWithContext<InputT
   /**
    * An incomplete {@code ParDo} transform, with unbound input/output types.
    *
-   * <p> Before being applied, {@link ParDo.Unbound#of} must be
+   * <p>Before being applied, {@link ParDo.Unbound#of} must be
    * invoked to specify the {@link DoFn} to invoke, which will also
    * bind the input/output types of this {@code PTransform}.
    */
@@ -583,7 +583,7 @@ public static class Unbound {
      * transform but with the specified name.  Does not modify this
      * transform.  The resulting transform is still incomplete.
      *
-     * <p> See the discussion of Naming above for more explanation.
+     * <p>See the discussion of Naming above for more explanation.
      */
     public Unbound named(String name) {
       return new Unbound(name, sideInputs);
@@ -595,7 +595,7 @@ public Unbound named(String name) {
      * Does not modify this transform. The resulting transform is
      * still incomplete.
      *
-     * <p> See the discussion of Side Inputs above and on
+     * <p>See the discussion of Side Inputs above and on
      * {@link ParDo#withSideInputs} for more explanation.
      */
     public Unbound withSideInputs(PCollectionView<?>... sideInputs) {
@@ -607,7 +607,7 @@ public Unbound withSideInputs(PCollectionView<?>... sideInputs) {
      * transform but with the specified side inputs.  Does not modify
      * this transform.  The resulting transform is still incomplete.
      *
-     * <p> See the discussion of Side Inputs above and on
+     * <p>See the discussion of Side Inputs above and on
      * {@link ParDo#withSideInputs} for more explanation.
      */
     public Unbound withSideInputs(
@@ -621,7 +621,7 @@ public Unbound withSideInputs(
      * tags.  Does not modify this transform.  The resulting transform
      * is still incomplete.
      *
-     * <p> See the discussion of Side Outputs above and on
+     * <p>See the discussion of Side Outputs above and on
      * {@link ParDo#withOutputTags} for more explanation.
      */
     public <OutputT> UnboundMulti<OutputT> withOutputTags(TupleTag<OutputT> mainOutputTag,
@@ -661,7 +661,7 @@ public <InputT, OutputT> Bound<InputT, OutputT> of(DoFnWithContext<InputT, Outpu
    * with all its outputs collected into an output
    * {@code PCollection<OutputT>}.
    *
-   * <p> A multi-output form of this transform can be created with
+   * <p>A multi-output form of this transform can be created with
    * {@link ParDo.Bound#withOutputTags}.
    *
    * @param <InputT> the type of the (main) input {@code PCollection} elements
@@ -687,7 +687,7 @@ public static class Bound<InputT, OutputT>
      * {@code PTransform} but with the specified name.  Does not
      * modify this {@code PTransform}.
      *
-     * <p> See the discussion of Naming above for more explanation.
+     * <p>See the discussion of Naming above for more explanation.
      */
     public Bound<InputT, OutputT> named(String name) {
       return new Bound<>(name, sideInputs, fn);
@@ -698,7 +698,7 @@ public Bound<InputT, OutputT> named(String name) {
      * {@code PTransform} but with the specified side inputs.  Does not
      * modify this {@code PTransform}.
      *
-     * <p> See the discussion of Side Inputs above and on
+     * <p>See the discussion of Side Inputs above and on
      * {@link ParDo#withSideInputs} for more explanation.
      */
     public Bound<InputT, OutputT> withSideInputs(PCollectionView<?>... sideInputs) {
@@ -710,7 +710,7 @@ public Bound<InputT, OutputT> withSideInputs(PCollectionView<?>... sideInputs) {
      * {@code PTransform} but with the specified side inputs.  Does not
      * modify this {@code PTransform}.
      *
-     * <p> See the discussion of Side Inputs above and on
+     * <p>See the discussion of Side Inputs above and on
      * {@link ParDo#withSideInputs} for more explanation.
      */
     public Bound<InputT, OutputT> withSideInputs(
@@ -723,7 +723,7 @@ public Bound<InputT, OutputT> withSideInputs(
      * that's like this {@code PTransform} but with the specified main
      * and side output tags.  Does not modify this {@code PTransform}.
      *
-     * <p> See the discussion of Side Outputs above and on
+     * <p>See the discussion of Side Outputs above and on
      * {@link ParDo#withOutputTags} for more explanation.
      */
     public BoundMulti<InputT, OutputT> withOutputTags(TupleTag<OutputT> mainOutputTag,
@@ -777,7 +777,7 @@ public List<PCollectionView<?>> getSideInputs() {
    * An incomplete multi-output {@code ParDo} transform, with unbound
    * input type.
    *
-   * <p> Before being applied, {@link ParDo.UnboundMulti#of} must be
+   * <p>Before being applied, {@link ParDo.UnboundMulti#of} must be
    * invoked to specify the {@link DoFn} to invoke, which will also
    * bind the input type of this {@code PTransform}.
    *
@@ -804,7 +804,7 @@ public static class UnboundMulti<OutputT> {
      * this transform but with the specified name.  Does not modify
      * this transform.  The resulting transform is still incomplete.
      *
-     * <p> See the discussion of Naming above for more explanation.
+     * <p>See the discussion of Naming above for more explanation.
      */
     public UnboundMulti<OutputT> named(String name) {
       return new UnboundMulti<>(
@@ -817,7 +817,7 @@ public UnboundMulti<OutputT> named(String name) {
      * modify this transform.  The resulting transform is still
      * incomplete.
      *
-     * <p> See the discussion of Side Inputs above and on
+     * <p>See the discussion of Side Inputs above and on
      * {@link ParDo#withSideInputs} for more explanation.
      */
     public UnboundMulti<OutputT> withSideInputs(
@@ -833,7 +833,7 @@ public UnboundMulti<OutputT> withSideInputs(
      * modify this transform.  The resulting transform is still
      * incomplete.
      *
-     * <p> See the discussion of Side Inputs above and on
+     * <p>See the discussion of Side Inputs above and on
      * {@link ParDo#withSideInputs} for more explanation.
      */
     public UnboundMulti<OutputT> withSideInputs(
@@ -907,7 +907,7 @@ public static class BoundMulti<InputT, OutputT>
      * that's like this {@code PTransform} but with the specified
      * name.  Does not modify this {@code PTransform}.
      *
-     * <p> See the discussion of Naming above for more explanation.
+     * <p>See the discussion of Naming above for more explanation.
      */
     public BoundMulti<InputT, OutputT> named(String name) {
       return new BoundMulti<>(
@@ -919,7 +919,7 @@ public BoundMulti<InputT, OutputT> named(String name) {
      * that's like this {@code PTransform} but with the specified side
      * inputs.  Does not modify this {@code PTransform}.
      *
-     * <p> See the discussion of Side Inputs above and on
+     * <p>See the discussion of Side Inputs above and on
      * {@link ParDo#withSideInputs} for more explanation.
      */
     public BoundMulti<InputT, OutputT> withSideInputs(
@@ -934,7 +934,7 @@ public BoundMulti<InputT, OutputT> withSideInputs(
      * that's like this {@code PTransform} but with the specified side
      * inputs.  Does not modify this {@code PTransform}.
      *
-     * <p> See the discussion of Side Inputs above and on
+     * <p>See the discussion of Side Inputs above and on
      * {@link ParDo#withSideInputs} for more explanation.
      */
     public BoundMulti<InputT, OutputT> withSideInputs(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
index 5bf51c33af95e..2905c1204e69b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
@@ -32,7 +32,7 @@
  * returns a {@code PCollectionList<T>} that bundles {@code N}
  * {@code PCollection<T>}s containing the split elements.
  *
- * <p> Example of use:
+ * <p>Example of use:
  * <pre> {@code
  * PCollection<Student> students = ...;
  * // Split students up into 10 partitions, by percentile:
@@ -48,11 +48,11 @@
  * }
  * } </pre>
  *
- * <p> By default, the {@code Coder} of each of the
+ * <p>By default, the {@code Coder} of each of the
  * {@code PCollection}s in the output {@code PCollectionList} is the
  * same as the {@code Coder} of the input {@code PCollection}.
  *
- * <p> Each output element has the same timestamp and is in the same windows
+ * <p>Each output element has the same timestamp and is in the same windows
  * as its corresponding input element, and each output {@code PCollection}
  * has the same
  * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
index 034f23e6b2c79..8c31776b2257c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
@@ -25,28 +25,28 @@
  * input but with duplicate elements removed such that each element is
  * unique within each window.
  *
- * <p> Two values of type {@code T} are compared for equality <b>not</b> by
+ * <p>Two values of type {@code T} are compared for equality <b>not</b> by
  * regular Java {@link Object#equals}, but instead by first encoding
  * each of the elements using the {@code PCollection}'s {@code Coder}, and then
  * comparing the encoded bytes.  This admits efficient parallel
  * evaluation.
  *
- * <p> Optionally, a function may be provided that maps each element to a representative
+ * <p>Optionally, a function may be provided that maps each element to a representative
  * value.  In this case, two elements will be considered duplicates if they have equal
  * representative values, with equality being determined as above.
  *
- * <p> By default, the {@code Coder} of the output {@code PCollection}
+ * <p>By default, the {@code Coder} of the output {@code PCollection}
  * is the same as the {@code Coder} of the input {@code PCollection}.
  *
- * <p> Each output element is in the same window as its corresponding input
+ * <p>Each output element is in the same window as its corresponding input
  * element, and has the timestamp of the end of that window.  The output
  * {@code PCollection} has the same
  * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
  * as the input.
  *
- * <p> Does not preserve any order the input PCollection might have had.
+ * <p>Does not preserve any order the input PCollection might have had.
  *
- * <p> Example of use:
+ * <p>Example of use:
  * <pre> {@code
  * PCollection<String> words = ...;
  * PCollection<String> uniqueWords =
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
index ff29b05ff894d..bbf1f7929ee05 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
@@ -44,14 +44,14 @@ public class Sample {
    * produces a new {@code PCollection<T>} containing up to limit
    * elements of the input {@code PCollection}.
    *
-   * <p> If limit is less than or equal to the size of the input
+   * <p>If limit is less than or equal to the size of the input
    * {@code PCollection}, then all the input's elements will be selected.
    *
-   * <p> All of the elements of the output {@code PCollection} should fit into
+   * <p>All of the elements of the output {@code PCollection} should fit into
    * main memory of a single worker machine.  This operation does not
    * run in parallel.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<String> input = ...;
    * PCollection<String> output = input.apply(Sample.<String>any(100));
@@ -73,7 +73,7 @@ public static <T> PTransform<PCollection<T>, PCollection<T>> any(long limit) {
    * {@code sampleSize} elements, then the output {@code Iterable<T>}
    * will be all the input's elements.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<String> pc = ...;
    * PCollection<Iterable<String>> sampleOfSize10 =
@@ -101,7 +101,7 @@ public static <T> PTransform<PCollection<T>, PCollection<T>> any(long limit) {
    * all the values associated with that key in the input
    * {@code PCollection}.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<KV<String, Integer>> pc = ...;
    * PCollection<KV<String, Iterable<Integer>>> sampleOfSize10PerKey =
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
index c05eba8b75a87..a610140a070cc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
@@ -24,13 +24,13 @@
  * {@code PCollection}, or the sum of the values associated with
  * each key in a {@code PCollection} of {@code KV}s.
  *
- * <p> Example 1: get the sum of a {@code PCollection} of {@code Double}s.
+ * <p>Example 1: get the sum of a {@code PCollection} of {@code Double}s.
  * <pre> {@code
  * PCollection<Double> input = ...;
  * PCollection<Double> sum = input.apply(Sum.doublesGlobally());
  * } </pre>
  *
- * <p> Example 2: calculate the sum of the {@code Integer}s
+ * <p>Example 2: calculate the sum of the {@code Integer}s
  * associated with each unique key (which is of type {@code String}).
  * <pre> {@code
  * PCollection<KV<String, Integer>> input = ...;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index 46b8586a0e755..7724cef4b00d9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -55,29 +55,29 @@ public class Top {
    * given {@code Comparator<T>}.  The {@code Comparator<T>} must also
    * be {@code Serializable}.
    *
-   * <p> If {@code count} {@code <} the number of elements in the
+   * <p>If {@code count} {@code <} the number of elements in the
    * input {@code PCollection}, then all the elements of the input
    * {@code PCollection} will be in the resulting
    * {@code List}, albeit in sorted order.
    *
-   * <p> All the elements of the result's {@code List}
+   * <p>All the elements of the result's {@code List}
    * must fit into the memory of a single machine.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<Student> students = ...;
    * PCollection<List<Student>> top10Students =
    *     students.apply(Top.of(10, new CompareStudentsByAvgGrade()));
    * } </pre>
    *
-   * <p> By default, the {@code Coder} of the output {@code PCollection}
+   * <p>By default, the {@code Coder} of the output {@code PCollection}
    * is a {@code ListCoder} of the {@code Coder} of the elements of
    * the input {@code PCollection}.
    *
-   * <p> See also {@link #smallest} and {@link #largest}, which sort
+   * <p>See also {@link #smallest} and {@link #largest}, which sort
    * {@code Comparable} elements using their natural ordering.
    *
-   * <p> See also {@link #perKey}, {@link #smallestPerKey}, and
+   * <p>See also {@link #perKey}, {@link #smallestPerKey}, and
    * {@link #largestPerKey}, which take a {@code PCollection} of
    * {@code KV}s and return the top values associated with each key.
    */
@@ -93,30 +93,30 @@ Combine.Globally<T, List<T>> of(int count, ComparatorT compareFn) {
    * {@code PCollection<T>}, in increasing order, sorted according to
    * their natural order.
    *
-   * <p> If {@code count} {@code <} the number of elements in the
+   * <p>If {@code count} {@code <} the number of elements in the
    * input {@code PCollection}, then all the elements of the input
    * {@code PCollection} will be in the resulting {@code PCollection}'s
    * {@code List}, albeit in sorted order.
    *
-   * <p> All the elements of the result {@code List}
+   * <p>All the elements of the result {@code List}
    * must fit into the memory of a single machine.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<Integer> values = ...;
    * PCollection<List<Integer>> smallest10Values = values.apply(Top.smallest(10));
    * } </pre>
    *
-   * <p> By default, the {@code Coder} of the output {@code PCollection}
+   * <p>By default, the {@code Coder} of the output {@code PCollection}
    * is a {@code ListCoder} of the {@code Coder} of the elements of
    * the input {@code PCollection}.
    *
-   * <p> See also {@link #largest}.
+   * <p>See also {@link #largest}.
    *
-   * <p> See also {@link #of}, which sorts using a user-specified
+   * <p>See also {@link #of}, which sorts using a user-specified
    * {@code Comparator} function.
    *
-   * <p> See also {@link #perKey}, {@link #smallestPerKey}, and
+   * <p>See also {@link #perKey}, {@link #smallestPerKey}, and
    * {@link #largestPerKey}, which take a {@code PCollection} of
    * {@code KV}s and return the top values associated with each key.
    */
@@ -133,30 +133,30 @@ Combine.Globally<T, List<T>> smallest(int count) {
    * {@code PCollection<T>}, in decreasing order, sorted according to
    * their natural order.
    *
-   * <p> If {@code count} {@code <} the number of elements in the
+   * <p>If {@code count} {@code <} the number of elements in the
    * input {@code PCollection}, then all the elements of the input
    * {@code PCollection} will be in the resulting {@code PCollection}'s
    * {@code List}, albeit in sorted order.
    *
-   * <p> All the elements of the result's {@code List}
+   * <p>All the elements of the result's {@code List}
    * must fit into the memory of a single machine.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<Integer> values = ...;
    * PCollection<List<Integer>> largest10Values = values.apply(Top.largest(10));
    * } </pre>
    *
-   * <p> By default, the {@code Coder} of the output {@code PCollection}
+   * <p>By default, the {@code Coder} of the output {@code PCollection}
    * is a {@code ListCoder} of the {@code Coder} of the elements of
    * the input {@code PCollection}.
    *
-   * <p> See also {@link #smallest}.
+   * <p>See also {@link #smallest}.
    *
-   * <p> See also {@link #of}, which sorts using a user-specified
+   * <p>See also {@link #of}, which sorts using a user-specified
    * {@code Comparator} function.
    *
-   * <p> See also {@link #perKey}, {@link #smallestPerKey}, and
+   * <p>See also {@link #perKey}, {@link #smallestPerKey}, and
    * {@link #largestPerKey}, which take a {@code PCollection} of
    * {@code KV}s and return the top values associated with each key.
    */
@@ -177,16 +177,16 @@ Combine.Globally<T, List<T>> largest(int count) {
    * the given {@code Comparator<V>}.  The
    * {@code Comparator<V>} must also be {@code Serializable}.
    *
-   * <p> If there are fewer than {@code count} values associated with
+   * <p>If there are fewer than {@code count} values associated with
    * a particular key, then all those values will be in the result
    * mapping for that key, albeit in sorted order.
    *
-   * <p> All the values associated with a single key must fit into the
+   * <p>All the values associated with a single key must fit into the
    * memory of a single machine, but there can be many more
    * {@code KV}s in the resulting {@code PCollection} than can fit
    * into the memory of a single machine.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<KV<School, Student>> studentsBySchool = ...;
    * PCollection<KV<School, List<Student>>> top10StudentsBySchool =
@@ -194,17 +194,17 @@ Combine.Globally<T, List<T>> largest(int count) {
    *         Top.perKey(10, new CompareStudentsByAvgGrade()));
    * } </pre>
    *
-   * <p> By default, the {@code Coder} of the keys of the output
+   * <p>By default, the {@code Coder} of the keys of the output
    * {@code PCollection} is the same as that of the keys of the input
    * {@code PCollection}, and the {@code Coder} of the values of the
    * output {@code PCollection} is a {@code ListCoder} of the
    * {@code Coder} of the values of the input {@code PCollection}.
    *
-   * <p> See also {@link #smallestPerKey} and {@link #largestPerKey}, which
+   * <p>See also {@link #smallestPerKey} and {@link #largestPerKey}, which
    * sort {@code Comparable<V>} values using their natural
    * ordering.
    *
-   * <p> See also {@link #of}, {@link #smallest}, and {@link #largest}, which
+   * <p>See also {@link #of}, {@link #smallest}, and {@link #largest}, which
    * take a {@code PCollection} and return the top elements.
    */
   public static <K, V, ComparatorT extends Comparator<V> & Serializable>
@@ -224,34 +224,34 @@ Combine.Globally<T, List<T>> largest(int count) {
    * {@code PCollection<KV<K, V>>}, in increasing order, sorted
    * according to their natural order.
    *
-   * <p> If there are fewer than {@code count} values associated with
+   * <p>If there are fewer than {@code count} values associated with
    * a particular key, then all those values will be in the result
    * mapping for that key, albeit in sorted order.
    *
-   * <p> All the values associated with a single key must fit into the
+   * <p>All the values associated with a single key must fit into the
    * memory of a single machine, but there can be many more
    * {@code KV}s in the resulting {@code PCollection} than can fit
    * into the memory of a single machine.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<KV<String, Integer>> keyedValues = ...;
    * PCollection<KV<String, List<Integer>>> smallest10ValuesPerKey =
    *     keyedValues.apply(Top.smallestPerKey(10));
    * } </pre>
    *
-   * <p> By default, the {@code Coder} of the keys of the output
+   * <p>By default, the {@code Coder} of the keys of the output
    * {@code PCollection} is the same as that of the keys of the input
    * {@code PCollection}, and the {@code Coder} of the values of the
    * output {@code PCollection} is a {@code ListCoder} of the
    * {@code Coder} of the values of the input {@code PCollection}.
    *
-   * <p> See also {@link #largestPerKey}.
+   * <p>See also {@link #largestPerKey}.
    *
-   * <p> See also {@link #perKey}, which sorts values using a user-specified
+   * <p>See also {@link #perKey}, which sorts values using a user-specified
    * {@code Comparator} function.
    *
-   * <p> See also {@link #of}, {@link #smallest}, and {@link #largest}, which
+   * <p>See also {@link #of}, {@link #smallest}, and {@link #largest}, which
    * take a {@code PCollection} and return the top elements.
    */
   public static <K, V extends Comparable<V>>
@@ -272,34 +272,34 @@ Combine.Globally<T, List<T>> largest(int count) {
    * {@code PCollection<KV<K, V>>}, in decreasing order, sorted
    * according to their natural order.
    *
-   * <p> If there are fewer than {@code count} values associated with
+   * <p>If there are fewer than {@code count} values associated with
    * a particular key, then all those values will be in the result
    * mapping for that key, albeit in sorted order.
    *
-   * <p> All the values associated with a single key must fit into the
+   * <p>All the values associated with a single key must fit into the
    * memory of a single machine, but there can be many more
    * {@code KV}s in the resulting {@code PCollection} than can fit
    * into the memory of a single machine.
    *
-   * <p> Example of use:
+   * <p>Example of use:
    * <pre> {@code
    * PCollection<KV<String, Integer>> keyedValues = ...;
    * PCollection<KV<String, List<Integer>>> largest10ValuesPerKey =
    *     keyedValues.apply(Top.largestPerKey(10));
    * } </pre>
    *
-   * <p> By default, the {@code Coder} of the keys of the output
+   * <p>By default, the {@code Coder} of the keys of the output
    * {@code PCollection} is the same as that of the keys of the input
    * {@code PCollection}, and the {@code Coder} of the values of the
    * output {@code PCollection} is a {@code ListCoder} of the
    * {@code Coder} of the values of the input {@code PCollection}.
    *
-   * <p> See also {@link #smallestPerKey}.
+   * <p>See also {@link #smallestPerKey}.
    *
-   * <p> See also {@link #perKey}, which sorts values using a user-specified
+   * <p>See also {@link #perKey}, which sorts values using a user-specified
    * {@code Comparator} function.
    *
-   * <p> See also {@link #of}, {@link #smallest}, and {@link #largest}, which
+   * <p>See also {@link #of}, {@link #smallest}, and {@link #largest}, which
    * take a {@code PCollection} and return the top elements.
    */
   public static <K, V extends Comparable<V>>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java
index 2a5c440c17563..2525fc573d986 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java
@@ -23,19 +23,19 @@
  * {@code Values<V>} takes a {@code PCollection} of {@code KV<K, V>}s and
  * returns a {@code PCollection<V>} of the values.
  *
- * <p> Example of use:
+ * <p>Example of use:
  * <pre> {@code
  * PCollection<KV<String, Long>> wordCounts = ...;
  * PCollection<Long> counts = wordCounts.apply(Values.<String>create());
  * } </pre>
  *
- * <p> Each output element has the same timestamp and is in the same windows
+ * <p>Each output element has the same timestamp and is in the same windows
  * as its corresponding input element, and the output {@code PCollection}
  * has the same
  * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
  * associated with it as the input.
  *
- * <p> See also {@link Keys}.
+ * <p>See also {@link Keys}.
  *
  * @param <V> the type of the values in the input {@code PCollection},
  * and the type of the elements in the output {@code PCollection}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index 8d26cc1c91f90..1b6dd2531a42b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -37,7 +37,7 @@
  * to {@link ParDo} transforms. These transforms support viewing a {@link PCollection}
  * as a single value, an iterable, a map, or a multimap.
  *
- * <p> For a {@link PCollection} that contains a single value of type {@code T}
+ * <p>For a {@link PCollection} that contains a single value of type {@code T}
  * per window, such as the output of {@link Combine#globally},
  * use {@link View#asSingleton()} to prepare it for use as a side input:
  *
@@ -49,7 +49,7 @@
  * }
  * </pre>
  *
- * <p> For a small {@link PCollection} that can fit entirely in memory,
+ * <p>For a small {@link PCollection} that can fit entirely in memory,
  * use {@link View#asList()} to prepare it for use as a {@code List}.
  * When read as a side input, the entire list will be cached in memory.
  *
@@ -60,7 +60,7 @@
  * }
  * </pre>
  *
- * <p> If a {@link PCollection} of {@code KV<K, V>} is known to
+ * <p>If a {@link PCollection} of {@code KV<K, V>} is known to
  * have a single value for each key, then use {@link View#asMap()}
  * to view it as a {@code Map<K, V>}:
  *
@@ -71,7 +71,7 @@
  * }
  * </pre>
  *
- * <p> Otherwise, to access a {@link PCollection} of {@code KV<K, V>} as a
+ * <p>Otherwise, to access a {@link PCollection} of {@code KV<K, V>} as a
  * {@code Map<K, Iterable<V>>} side input, use {@link View#asMultimap()}:
  *
  * <pre>
@@ -81,7 +81,7 @@
  * }
  * </pre>
  *
- * <p> To iterate over an entire window of a {@link PCollection} via
+ * <p>To iterate over an entire window of a {@link PCollection} via
  * side input, use {@link View#asIterable()}:
  *
  * <pre>
@@ -92,11 +92,11 @@
  * </pre>
  *
  *
- * <p> Both {@link View#asMultimap()} and {@link View#asMap()} are useful
+ * <p>Both {@link View#asMultimap()} and {@link View#asMap()} are useful
  * for implementing lookup based "joins" with the main input, when the
  * side input is small enough to fit into memory.
  *
- * <p> For example, if you represent a page on a website via some {@code Page} object and
+ * <p>For example, if you represent a page on a website via some {@code Page} object and
  * have some type {@code UrlVisits} logging that a URL was visited, you could convert these
  * to more fully structured {@code PageVisit} objects using a side input, something like the
  * following:
@@ -122,7 +122,7 @@
  * }
  * </pre>
  *
- * <p> See {@link ParDo#withSideInputs} for details on how to access
+ * <p>See {@link ParDo#withSideInputs} for details on how to access
  * this variable inside a {@link ParDo} over another {@link PCollection}.
  */
 public class View {
@@ -144,11 +144,11 @@ private View() { }
    *     .apply(View.asSingleton());
    * }</pre>
    *
-   * <p> If the input {@link PCollection} is empty,
+   * <p>If the input {@link PCollection} is empty,
    * throws {@link java.util.NoSuchElementException} in the consuming
    * {@link DoFn}.
    *
-   * <p> If the input {@link PCollection} contains more than one
+   * <p>If the input {@link PCollection} contains more than one
    * element, throws {@link IllegalArgumentException} in the
    * consuming {@link DoFn}.
    */
@@ -161,7 +161,7 @@ public static <T> AsSingleton<T> asSingleton() {
    * {@code List} containing all of its elements, to be consumed as
    * a side input.
    *
-   * <p> The resulting list is required to fit in memory.
+   * <p>The resulting list is required to fit in memory.
    */
   public static <T> PTransform<PCollection<T>, PCollectionView<List<T>>> asList() {
     return new AsList<>();
@@ -194,7 +194,7 @@ public static <T> AsIterable<T> asIterable() {
    *     .apply(View.asMap());
    * }</pre>
    *
-   * <p> Currently, the resulting map is required to fit into memory.
+   * <p>Currently, the resulting map is required to fit into memory.
    */
   public static <K, V> AsMap<K, V> asMap() {
     return new AsMap<K, V>();
@@ -213,7 +213,7 @@ public static <K, V> AsMap<K, V> asMap() {
    * PCollectionView<Map<K, V>> output = input.apply(View.asMultimap());
    * }</pre>
    *
-   * <p> Currently, the resulting map is required to fit into memory.
+   * <p>Currently, the resulting map is required to fit into memory.
    */
   public static <K, V> AsMultimap<K, V> asMultimap() {
     return new AsMultimap<K, V>();
@@ -223,7 +223,7 @@ public static <K, V> AsMultimap<K, V> asMultimap() {
    * A {@link PTransform} that produces a {@link PCollectionView} of a singleton
    * {@link PCollection} yielding the single element it contains.
    *
-   * <p> Instantiate via {@link View#asIterable}.
+   * <p>Instantiate via {@link View#asIterable}.
    */
   public static class AsList<T> extends PTransform<PCollection<T>, PCollectionView<List<T>>> {
     private static final long serialVersionUID = 0;
@@ -249,7 +249,7 @@ public PCollectionView<List<T>> apply(PCollection<T> input) {
    * A {@link PTransform} that produces a {@link PCollectionView} of a singleton
    * {@link PCollection} yielding the single element it contains.
    *
-   * <p> Instantiate via {@link View#asIterable}.
+   * <p>Instantiate via {@link View#asIterable}.
    */
   public static class AsIterable<T>
       extends PTransform<PCollection<T>, PCollectionView<Iterable<T>>> {
@@ -277,7 +277,7 @@ public PCollectionView<Iterable<T>> apply(PCollection<T> input) {
    * A {@link PTransform} that produces a {@link PCollectionView} of a singleton
    * {@link PCollection} yielding the single element it contains.
    *
-   * <p> Instantiate via {@link View#asIterable}.
+   * <p>Instantiate via {@link View#asIterable}.
    */
   public static class AsSingleton<T> extends PTransform<PCollection<T>, PCollectionView<T>> {
     private static final long serialVersionUID = 0;
@@ -339,7 +339,7 @@ public PCollectionView<T> apply(PCollection<T> input) {
    * A {@link PTransform} that produces a {@link PCollectionView} of a keyed {@link PCollection}
    * yielding a map of keys to all associated values.
    *
-   * <p> Instantiate via {@link View#asMap}.
+   * <p>Instantiate via {@link View#asMap}.
    */
   public static class AsMultimap<K, V>
       extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, Iterable<V>>>> {
@@ -379,7 +379,7 @@ public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
    *     .apply(View.asMap());
    * }</pre>
    *
-   * <p> Instantiate via {@link View#asMap}.
+   * <p>Instantiate via {@link View#asMap}.
    */
   public static class AsMap<K, V>
       extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, V>>> {
@@ -420,7 +420,7 @@ public PCollectionView<Map<K, V>> apply(PCollection<KV<K, V>> input) {
   /**
    * Creates a primitive {@link PCollectionView}.
    *
-   * <p> For internal use only by runner implementors.
+   * <p>For internal use only by runner implementors.
    *
    * @param <ElemT> The type of the elements of the input PCollection
    * @param <ViewT> The type associated with the {@link PCollectionView} used as a side input
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
index a5c5b538c5124..c2c8f5535643e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
@@ -31,7 +31,7 @@
  * of the values in the input {@code PCollection} has been paired with
  * either the constant key or a key computed from the value.
  *
- * <p> Example of use:
+ * <p>Example of use:
  * <pre> {@code
  * PCollection<String> words = ...;
  * PCollection<KV<Integer, String>> lengthsToWords =
@@ -39,7 +39,7 @@
  *         public Integer apply(String s) { return s.length(); } }));
  * } </pre>
  *
- * <p> Each output element has the same timestamp and is in the same windows
+ * <p>Each output element has the same timestamp and is in the same windows
  * as its corresponding input element, and the output {@code PCollection}
  * has the same
  * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
index cf386d164ba03..da44984e0bda3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
@@ -172,8 +172,8 @@ public String toString() {
    * Returns the values from the table represented by the given
    * {@code TupleTag<V>} as an {@code Iterable<V>} (which may be empty if there
    * are no results).
-   * <p>
-   * If tag was not part of the original CoGroupByKey,
+   *
+   * <p>If tag was not part of the original CoGroupByKey,
    * throws an IllegalArgumentException.
    */
   public <V> Iterable<V> getAll(TupleTag<V> tag) {
@@ -190,8 +190,8 @@ public <V> Iterable<V> getAll(TupleTag<V> tag) {
   /**
    * If there is a singleton value for the given tag, returns it.
    * Otherwise, throws an IllegalArgumentException.
-   * <p>
-   * If tag was not part of the original CoGroupByKey,
+   *
+   * <p>If tag was not part of the original CoGroupByKey,
    * throws an IllegalArgumentException.
    */
   public <V> V getOnly(TupleTag<V> tag) {
@@ -201,8 +201,8 @@ public <V> V getOnly(TupleTag<V> tag) {
   /**
    * If there is a singleton value for the given tag, returns it.  If there is
    * no value for the given tag, returns the defaultValue.
-   * <p>
-   * If tag was not part of the original CoGroupByKey,
+   *
+   * <p>If tag was not part of the original CoGroupByKey,
    * throws an IllegalArgumentException.
    */
   public <V> V getOnly(TupleTag<V> tag, V defaultValue) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
index 57fff5c59b8ae..92b8e6b63d5b5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
@@ -40,7 +40,7 @@
  * {@link com.google.cloud.dataflow.sdk.values.TupleTag}
  * supplied with the initial table.
  *
- * <p> Example of performing a {@link CoGroupByKey} followed by a
+ * <p>Example of performing a {@link CoGroupByKey} followed by a
  * {@link ParDo} that consumes
  * the results:
  * <pre> <code>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java
index 1db5ffb97dfbf..3c041f6736e5b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java
@@ -23,7 +23,7 @@
  * and produces an
  * {@code OutputT} (some subtype of {@link com.google.cloud.dataflow.sdk.values.POutput}).
  *
- * <p> Common PTransforms include root PTransforms like
+ * <p>Common PTransforms include root PTransforms like
  * {@link com.google.cloud.dataflow.sdk.io.TextIO.Read} and
  * {@link com.google.cloud.dataflow.sdk.transforms.Create}, processing and
  * conversion operations like {@link com.google.cloud.dataflow.sdk.transforms.ParDo},
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index 3108672bd00a6..a58df67245f59 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -32,9 +32,9 @@
  * and any time it fires the {@code AfterEach} fires. When the currently executing
  * sub-trigger finishes, the {@code AfterEach} starts executing the next sub-trigger.
  *
- * <p> {@code AfterEach.inOrder(t1, t2, ...)} finishes when all of the sub-triggers have finished.
+ * <p>{@code AfterEach.inOrder(t1, t2, ...)} finishes when all of the sub-triggers have finished.
  *
- * <p> The following properties hold:
+ * <p>The following properties hold:
  * <ul>
  *   <li> {@code AfterEach.inOrder(AfterEach.inOrder(a, b), c)} behaves the same as
  *   {@code AfterEach.inOrder(a, b, c)} and {@code AfterEach.inOrder(a, AfterEach.inOrder(b, c)}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 80e47938c7f1b..bcc7012144d85 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -52,7 +52,7 @@
  * once when the watermark passes the end of the window and then immediately therafter when any
  * late data arrive, is one such example.
  *
- * <p> The watermark is the clock that defines {@link TimeDomain#EVENT_TIME}.
+ * <p>The watermark is the clock that defines {@link TimeDomain#EVENT_TIME}.
  *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java
index d4045068f4431..6b3ba58802671 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java
@@ -27,7 +27,7 @@
  * every window, at some point in time, all data for that window will have
  * arrived and can be processed together.
  *
- * <p> Windows must also implement {@link Object#equals} and
+ * <p>Windows must also implement {@link Object#equals} and
  * {@link Object#hashCode} such that windows that are logically equal will
  * be treated as equal by {@code equals()} and {@code hashCode()}.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
index c4a30c16972f8..f15dff4344504 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
@@ -29,7 +29,7 @@
  * A collection of {@link WindowFn}s that windows values into calendar-based
  * windows such as spans of days, months, or years.
  *
- * <p> For example, to group data into quarters that change on the 15th, use
+ * <p>For example, to group data into quarters that change on the 15th, use
  * {@code CalendarWindows.months(3).withStartingMonth(2014, 1).beginningOnDay(15)}.
  */
 public class CalendarWindows {
@@ -37,7 +37,7 @@ public class CalendarWindows {
   /**
    * Returns a {@link WindowFn} that windows elements into periods measured by days.
    *
-   * <p> For example, {@code CalendarWindows.days(1)} will window elements into
+   * <p>For example, {@code CalendarWindows.days(1)} will window elements into
    * separate windows for each day.
    */
   public static DaysWindows days(int number) {
@@ -47,7 +47,7 @@ public static DaysWindows days(int number) {
   /**
    * Returns a {@link WindowFn} that windows elements into periods measured by weeks.
    *
-   * <p> For example, {@code CalendarWindows.weeks(1, DateTimeConstants.TUESDAY)} will
+   * <p>For example, {@code CalendarWindows.weeks(1, DateTimeConstants.TUESDAY)} will
    * window elements into week-long windows starting on Tuesdays.
    */
   public static DaysWindows weeks(int number, int startDayOfWeek) {
@@ -60,7 +60,7 @@ public static DaysWindows weeks(int number, int startDayOfWeek) {
   /**
    * Returns a {@link WindowFn} that windows elements into periods measured by months.
    *
-   * <p> For example,
+   * <p>For example,
    * {@code CalendarWindows.months(8).withStartingMonth(2014, 1).beginningOnDay(10)}
    * will window elements into 8 month windows where that start on the 10th day of month,
    * and the first window begins in January 2014.
@@ -72,7 +72,7 @@ public static MonthsWindows months(int number) {
   /**
    * Returns a {@link WindowFn} that windows elements into periods measured by years.
    *
-   * <p> For example,
+   * <p>For example,
    * {@code CalendarWindows.years(1).withTimeZone(DateTimeZone.forId("America/Los_Angeles"))}
    * will window elements into year-long windows that start at midnight on Jan 1, in the
    * America/Los_Angeles time zone.
@@ -84,10 +84,10 @@ public static YearsWindows years(int number) {
   /**
    * A {@link WindowFn} that windows elements into periods measured by days.
    *
-   * <p> By default, periods of multiple days are measured starting at the
+   * <p>By default, periods of multiple days are measured starting at the
    * epoch.  This can be overridden with {@link #withStartingDay}.
    *
-   * <p> The time zone used to determine calendar boundaries is UTC, unless this
+   * <p>The time zone used to determine calendar boundaries is UTC, unless this
    * is overridden with the {@link #withTimeZone} method.
    */
   public static class DaysWindows extends PartitioningWindowFn<Object, IntervalWindow> {
@@ -160,13 +160,13 @@ public DateTimeZone getTimeZone() {
   /**
    * A {@link WindowFn} that windows elements into periods measured by months.
    *
-   * <p> By default, periods of multiple months are measured starting at the
+   * <p>By default, periods of multiple months are measured starting at the
    * epoch.  This can be overridden with {@link #withStartingMonth}.
    *
-   * <p> Months start on the first day of each calendar month, unless overridden by
+   * <p>Months start on the first day of each calendar month, unless overridden by
    * {@link #beginningOnDay}.
    *
-   * <p> The time zone used to determine calendar boundaries is UTC, unless this
+   * <p>The time zone used to determine calendar boundaries is UTC, unless this
    * is overridden with the {@link #withTimeZone} method.
    */
   public static class MonthsWindows extends PartitioningWindowFn<Object, IntervalWindow> {
@@ -253,13 +253,13 @@ public DateTimeZone getTimeZone() {
   /**
    * A {@link WindowFn} that windows elements into periods measured by years.
    *
-   * <p> By default, periods of multiple years are measured starting at the
+   * <p>By default, periods of multiple years are measured starting at the
    * epoch.  This can be overridden with {@link #withStartingYear}.
    *
-   * <p> Years start on the first day of each calendar year, unless overridden by
+   * <p>Years start on the first day of each calendar year, unless overridden by
    * {@link #beginningOnDay}.
    *
-   * <p> The time zone used to determine calendar boundaries is UTC, unless this
+   * <p>The time zone used to determine calendar boundaries is UTC, unless this
    * is overridden with the {@link #withTimeZone} method.
    */
   public static class YearsWindows extends PartitioningWindowFn<Object, IntervalWindow> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
index d473bf398204c..96208ee399d5f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
@@ -24,7 +24,7 @@
 /**
  * A {@link WindowFn} that windows values into fixed-size timestamp-based windows.
  *
- * <p> For example, in order to partition the data into 10 minute windows:
+ * <p>For example, in order to partition the data into 10 minute windows:
  * <pre> {@code
  * PCollection<Integer> items = ...;
  * PCollection<Integer> windowedItems = items.apply(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
index e8d337e7c06df..a0b4602ec1382 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
@@ -34,7 +34,7 @@
  * Provides information about the pane this value belongs to. Every pane is implicitly associated
  * with a window.
  *
- * <p> Note: This does not uniquely identify a pane, and should not be used for comparisons.
+ * <p>Note: This does not uniquely identify a pane, and should not be used for comparisons.
  */
 public final class PaneInfo {
 
@@ -174,7 +174,7 @@ public Timing getTiming() {
    * The zero-based index of this trigger firing that produced this pane. i.e.
    * 0 for the first time the timer fires, 1 for the next time, etc.
    *
-   * <p> A given (key, window, pane-index) is guaranteed to be unique in the
+   * <p>A given (key, window, pane-index) is guaranteed to be unique in the
    * output of a group-by-key operation.
    */
   public long getIndex() {
@@ -185,7 +185,7 @@ public long getIndex() {
    * The zero-based index of this trigger firing among non-speculative panes, i.e.
    * 0 for the first non-{@link Timing#EARLY} timer firing, 1 for the next one, etc.
    *
-   * <p> Always -1 for speculative data.
+   * <p>Always -1 for speculative data.
    */
   public long getNonSpeculativeIndex() {
     return nonSpeculativeIndex;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
index 7e282a968070d..0472cfbfad2c1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
@@ -28,7 +28,7 @@
  * A {@link WindowFn} windowing values into sessions separated by {@link #gapDuration}-long
  * periods with no elements.
  *
- * <p> For example, in order to window data into session with at least 10 minute
+ * <p>For example, in order to window data into session with at least 10 minute
  * gaps in between them:
  * <pre> {@code
  * PCollection<Integer> pc = ...;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
index c2c68a9933f8e..9facd4c34deb1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
@@ -29,7 +29,7 @@
  * A {@link WindowFn} that windows values into possibly overlapping fixed-size
  * timestamp-based windows.
  *
- * <p> For example, in order to window data into 10 minute windows that
+ * <p>For example, in order to window data into 10 minute windows that
  * update every minute:
  * <pre> {@code
  * PCollection<Integer> items = ...;
@@ -60,7 +60,7 @@ public class SlidingWindows extends NonMergingWindowFn<Object, IntervalWindow> {
    * Assigns timestamps into half-open intervals of the form
    * [N * period, N * period + size), where 0 is the epoch.
    *
-   * <p> If {@link SlidingWindows#every} is not called, the period defaults
+   * <p>If {@link SlidingWindows#every} is not called, the period defaults
    * to the largest time unit smaller than the given duration.  For example,
    * specifying a size of 5 seconds will result in a default period of 1 second.
    */
@@ -179,7 +179,7 @@ public Duration getOffset() {
   /**
    * Ensure that later sliding windows have an output time that is past the end of earlier windows.
    *
-   * <p> If this is the earliest sliding window containing {@code inputTimestamp}, that's fine.
+   * <p>If this is the earliest sliding window containing {@code inputTimestamp}, that's fine.
    * Otherwise, we pick the earliest time that doesn't overlap with earlier windows.
    */
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
index b648dc6bccc25..0233880af1d7f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
@@ -96,7 +96,7 @@ public Instant apply(Instant input) {
    * Aligns timestamps to the smallest multiple of {@code size} since the {@code offset} greater
    * than the timestamp.
    *
-   * <p> TODO: Consider sharing this with FixedWindows, and bring over the equivalent of
+   * <p>TODO: Consider sharing this with FixedWindows, and bring over the equivalent of
    * CalendarWindows.
    */
   public TimeTrigger<W> alignedTo(final Duration size, final Instant offset) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index edd509aebd6fd..041d6d23be2d6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -39,7 +39,7 @@
  * {@link WindowFn}, and then passed to the associated {@code Trigger} to determine if the
  * {@code Window}s contents should be output.
  *
- * <p> See {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} and {@link Window}
+ * <p>See {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} and {@link Window}
  * for more information about how grouping with windows works.
  *
  * <p>The elements that are assigned to a window since the last time it was fired (or since the
@@ -87,7 +87,7 @@
  * <p>Once finished, a trigger cannot return itself back to an earlier state, however a composite
  * trigger could reset its sub-triggers.
  *
- * <p> Triggers should not build up any state internally since they may be recreated
+ * <p>Triggers should not build up any state internally since they may be recreated
  * between invocations of the callbacks. All important values should be persisted using
  * state before the callback returns.
  *
@@ -136,7 +136,7 @@ public enum MergeResult {
      * A trigger can only return {@code ALREADY_FINISHED} from {@code onMerge}, and it should only
      * be returned if the trigger was previously finished in at least one window.
      *
-     * <p> Returning this indicates that the sub-trigger should be treated as finished in the output
+     * <p>Returning this indicates that the sub-trigger should be treated as finished in the output
      * window.
      */
     ALREADY_FINISHED(false, true, null);
@@ -321,11 +321,11 @@ protected Trigger(@Nullable List<Trigger<W>> subTriggers) {
   /**
    * Called immediately after windows have been merged.
    *
-   * <p> Leaf triggers should determine their result by inspecting their status and any state
+   * <p>Leaf triggers should determine their result by inspecting their status and any state
    * in the merging windows. Composite triggers should determine their result by calling
    * {@link ExecutableTrigger#invokeMerge} on their sub-triggers, and applying appropriate logic.
    *
-   * <p> A trigger can only return {@link MergeResult#ALREADY_FINISHED} if it is marked as finished
+   * <p>A trigger can only return {@link MergeResult#ALREADY_FINISHED} if it is marked as finished
    * in at least one of the windows being merged.
    *
    * <p>The implementation does not need to clear out any state associated with the old windows.
@@ -430,10 +430,10 @@ public Trigger<W> getContinuationTrigger() {
    * for a given window had there been input data.  This is a static property of a trigger
    * that does not depend on its state.
    *
-   * <p> For triggers that do not fire based on the watermark advancing, returns
+   * <p>For triggers that do not fire based on the watermark advancing, returns
    * {@link BoundedWindow#TIMESTAMP_MAX_VALUE}.
    *
-   * <p> This estimate is used to determine that there are no elements in a side-input window, which
+   * <p>This estimate is used to determine that there are no elements in a side-input window, which
    * causes the default value to be used instead.
    */
   public abstract Instant getWatermarkThatGuaranteesFiring(W window);
@@ -499,7 +499,7 @@ public int hashCode() {
    * Specify an ending condition for this trigger. If the {@code until} fires then the combination
    * fires.
    *
-   * <p> The expression {@code t1.orFinally(t2)} fires every time {@code t1} fires, and finishes
+   * <p>The expression {@code t1.orFinally(t2)} fires every time {@code t1} fires, and finishes
    * as soon as either {@code t1} finishes or {@code t2} fires, in which case it fires one last time
    * for {@code t2}. Both {@code t1} and {@code t2} are executed in parallel. This means that
    * {@code t1} may have fired since {@code t2} started, so not all of the elements that {@code t2}
@@ -511,7 +511,7 @@ public int hashCode() {
    *     .orFinally(AfterPane.elementCountAtLeast(5))
    * } </pre>
    *
-   * <p> Note that if {@code t1} is {@link OnceTrigger}, then {@code t1.orFinally(t2)} is the same
+   * <p>Note that if {@code t1} is {@link OnceTrigger}, then {@code t1.orFinally(t2)} is the same
    * as {@code AfterFirst.of(t1, t2)}.
    */
   public Trigger<W> orFinally(OnceTrigger<W> until) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index d67c3c0b5ce6c..32506c88ba154 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -49,19 +49,19 @@
  * including one within composite transforms, will group by the combination of
  * keys and windows.
 
- * <p> See {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}
+ * <p>See {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}
  * for more information about how grouping with windows works.
  *
  * <h2> Windowing </h2>
  *
- * <p> Windowing a {@code PCollection} divides the elements into windows based
+ * <p>Windowing a {@code PCollection} divides the elements into windows based
  * on the associated event time for each element. This is especially useful
  * for {@code PCollection}s with unbounded size, since it allows operating on
  * a sub-group of the elements placed into a related window. For {@code PCollection}s
  * with a bounded size (aka. conventional batch mode), by default, all data is
  * implicitly in a single window, unless {@code Window} is applied.
  *
- * <p> For example, a simple form of windowing divides up the data into
+ * <p>For example, a simple form of windowing divides up the data into
  * fixed-width time intervals, using {@link FixedWindows}.
  * The following example demonstrates how to use {@code Window} in a pipeline
  * that counts the number of occurrences of strings each minute:
@@ -74,13 +74,13 @@
  *   Count.<String>perElement());
  * } </pre>
  *
- * <p> Let (data, timestamp) denote a data element along with its timestamp.
+ * <p>Let (data, timestamp) denote a data element along with its timestamp.
  * Then, if the input to this pipeline consists of
  * {("foo", 15s), ("bar", 30s), ("foo", 45s), ("foo", 1m30s)},
  * the output will be
  * {(KV("foo", 2), 1m), (KV("bar", 1), 1m), (KV("foo", 1), 2m)}
  *
- * <p> Several predefined {@link WindowFn}s are provided:
+ * <p>Several predefined {@link WindowFn}s are provided:
  * <ul>
  *  <li> {@link FixedWindows} partitions the timestamps into fixed-width intervals.
  *  <li> {@link SlidingWindows} places data into overlapping fixed-width intervals.
@@ -93,19 +93,19 @@
  *
  * <h2> Triggers </h2>
  *
- * <p> {@link Window.Bound#triggering(Trigger)} allows specifying a trigger to control when
+ * <p>{@link Window.Bound#triggering(Trigger)} allows specifying a trigger to control when
  * (in processing time) results for the given window can be produced. If unspecified, the default
  * behavior is to trigger first when the watermark passes the end of the window, and then trigger
  * again every time there is late arriving data.
  *
- * <p> Elements are added to the current window pane as they arrive. When the root trigger fires,
+ * <p>Elements are added to the current window pane as they arrive. When the root trigger fires,
  * output is produced based on the elements in the current pane.
  *
  * <p>Depending on the trigger, this can be used both to output partial results
  * early during the processing of the whole window, and to deal with late
  * arriving in batches.
  *
- * <p> Continuing the earlier example, if we wanted to emit the values that were available
+ * <p>Continuing the earlier example, if we wanted to emit the values that were available
  * when the watermark passed the end of the window, and then output any late arriving
  * elements once-per (actual hour) hour until we have finished processing the next 24-hours of data.
  * (The use of watermark time to stop processing tends to be more robust if the data source is slow
@@ -126,7 +126,7 @@
  *   Count.<String>perElement());
  * } </pre>
  *
- * <p> On the other hand, if we wanted to get early results every minute of processing
+ * <p>On the other hand, if we wanted to get early results every minute of processing
  * time (for which there were new elements in the given window) we could do the following:
  *
  * <pre> {@code
@@ -138,11 +138,11 @@
  *              .orFinally(AfterWatermark.pastEndOfWindow())));
  * } </pre>
  *
- * <p> After a {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} the trigger is reset to
+ * <p>After a {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} the trigger is reset to
  * the default trigger. If you want to produce early results from a pipeline consisting of multiple
  * {@code GroupByKey}s, you must set a trigger before <i>each</i> {@code GroupByKey}.
  *
- * <p> See {@link Trigger} for details on the available triggers.
+ * <p>See {@link Trigger} for details on the available triggers.
  */
 public class Window {
 
@@ -159,7 +159,7 @@ public enum ClosingBehavior {
     /**
      * Only fire the last pane if there is new data since the previous firing.
      *
-     * <p> This is the default behavior.
+     * <p>This is the default behavior.
      */
     FIRE_IF_NON_EMPTY;
   }
@@ -167,10 +167,10 @@ public enum ClosingBehavior {
   /**
    * Creates a {@code Window} {@code PTransform} with the given name.
    *
-   * <p> See the discussion of Naming in
+   * <p>See the discussion of Naming in
    * {@link com.google.cloud.dataflow.sdk.transforms.ParDo} for more explanation.
    *
-   * <p> The resulting {@code PTransform} is incomplete, and its input/output
+   * <p>The resulting {@code PTransform} is incomplete, and its input/output
    * type is not yet bound.  Use {@link Window.Unbound#into} to specify the
    * {@link WindowFn} to use, which will also bind the input/output type of this
    * {@code PTransform}.
@@ -183,7 +183,7 @@ public static Unbound named(String name) {
    * Creates a {@code Window} {@code PTransform} that uses the given
    * {@link WindowFn} to window the data.
    *
-   * <p> The resulting {@code PTransform}'s types have been bound, with both the
+   * <p>The resulting {@code PTransform}'s types have been bound, with both the
    * input and output being a {@code PCollection<T>}, inferred from the types of
    * the argument {@code WindowFn<T, B>}.  It is ready to be applied, or further
    * properties can be set on it first.
@@ -197,7 +197,7 @@ public static <T> Bound<T> into(WindowFn<? super T, ?> fn) {
    * Elements that are assigned to a specific window will be output when
    * the trigger fires.
    *
-   * <p> Must also specify allowed lateness using {@link #withAllowedLateness} and accumulation
+   * <p>Must also specify allowed lateness using {@link #withAllowedLateness} and accumulation
    * mode using either {@link #discardingFiredPanes()} or {@link #accumulatingFiredPanes()}.
    */
   @Experimental(Kind.TRIGGER)
@@ -209,7 +209,7 @@ public static <T> Bound<T> triggering(Trigger<?> trigger) {
    * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
    * Triggering behavior, and that discards elements in a pane after they are triggered.
    *
-   * <p> Does not modify this transform.  The resulting {@code PTransform} is sufficiently
+   * <p>Does not modify this transform.  The resulting {@code PTransform} is sufficiently
    * specified to be applied, but more properties can still be specified.
    */
   @Experimental(Kind.TRIGGER)
@@ -221,7 +221,7 @@ public static <T> Bound<T> discardingFiredPanes() {
    * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
    * Triggering behavior, and that accumulates elements in a pane after they are triggered.
    *
-   * <p> Does not modify this transform.  The resulting {@code PTransform} is sufficiently
+   * <p>Does not modify this transform.  The resulting {@code PTransform} is sufficiently
    * specified to be applied, but more properties can still be specified.
    */
   @Experimental(Kind.TRIGGER)
@@ -247,7 +247,7 @@ public static <T> Bound<T> withAllowedLateness(Duration allowedLateness) {
   /**
    * An incomplete {@code Window} transform, with unbound input/output type.
    *
-   * <p> Before being applied, {@link Window.Unbound#into} must be
+   * <p>Before being applied, {@link Window.Unbound#into} must be
    * invoked to specify the {@link WindowFn} to invoke, which will also
    * bind the input/output type of this {@code PTransform}.
    */
@@ -265,7 +265,7 @@ public static class Unbound {
      * transform but with the specified name.  Does not modify this
      * transform.  The resulting transform is still incomplete.
      *
-     * <p> See the discussion of Naming in
+     * <p>See the discussion of Naming in
      * {@link com.google.cloud.dataflow.sdk.transforms.ParDo} for more
      * explanation.
      */
@@ -289,10 +289,10 @@ public <T> Bound<T> into(WindowFn<? super T, ?> fn) {
      * Elements that are assigned to a specific window will be output when
      * the trigger fires.
      *
-     * <p> {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger}
+     * <p>{@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger}
      * has more details on the available triggers.
      *
-     * <p> Must also specify allowed lateness using {@link #withAllowedLateness} and accumulation
+     * <p>Must also specify allowed lateness using {@link #withAllowedLateness} and accumulation
      * mode using either {@link #discardingFiredPanes()} or {@link #accumulatingFiredPanes()}.
      */
     @Experimental(Kind.TRIGGER)
@@ -304,7 +304,7 @@ public <T> Bound<T> triggering(Trigger<?> trigger) {
      * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
      * Triggering behavior, and that discards elements in a pane after they are triggered.
      *
-     * <p> Does not modify this transform.  The resulting {@code PTransform} is sufficiently
+     * <p>Does not modify this transform.  The resulting {@code PTransform} is sufficiently
      * specified to be applied, but more properties can still be specified.
      */
     @Experimental(Kind.TRIGGER)
@@ -316,7 +316,7 @@ public <T> Bound<T> discardingFiredPanes() {
      * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
      * Triggering behavior, and that accumulates elements in a pane after they are triggered.
      *
-     * <p> Does not modify this transform.  The resulting {@code PTransform} is sufficiently
+     * <p>Does not modify this transform.  The resulting {@code PTransform} is sufficiently
      * specified to be applied, but more properties can still be specified.
      */
     @Experimental(Kind.TRIGGER)
@@ -334,7 +334,7 @@ public <T> Bound<T> accumulatingFiredPanes() {
      * Once no elements will be added to a window (because this duration has passed) any state
      * associated with the window will be cleaned up.
      *
-     * <p> Depending on the trigger this may not produce a pane with {@link PaneInfo#isLast}. See
+     * <p>Depending on the trigger this may not produce a pane with {@link PaneInfo#isLast}. See
      * {@link ClosingBehavior#FIRE_IF_NON_EMPTY} for more details.
      */
     @Experimental(Kind.TRIGGER)
@@ -411,7 +411,7 @@ private Bound<T> into(WindowFn<? super T, ?> windowFn) {
      * {@code PTransform} but with the specified name.  Does not
      * modify this {@code PTransform}.
      *
-     * <p> See the discussion of Naming in
+     * <p>See the discussion of Naming in
      * {@link com.google.cloud.dataflow.sdk.transforms.ParDo} for more
      * explanation.
      */
@@ -424,10 +424,10 @@ public Bound<T> named(String name) {
      * Elements that are assigned to a specific window will be output when
      * the trigger fires.
      *
-     * <p> {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger}
+     * <p>{@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger}
      * has more details on the available triggers.
      *
-     * <p> Must also specify allowed lateness using {@link #withAllowedLateness} and accumulation
+     * <p>Must also specify allowed lateness using {@link #withAllowedLateness} and accumulation
      * mode using either {@link #discardingFiredPanes()} or {@link #accumulatingFiredPanes()}.
      */
     @Experimental(Kind.TRIGGER)
@@ -439,7 +439,7 @@ public Bound<T> triggering(Trigger<?> trigger) {
     * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
     * Triggering behavior, and that discards elements in a pane after they are triggered.
     *
-    * <p> Does not modify this transform.  The resulting {@code PTransform} is sufficiently
+    * <p>Does not modify this transform.  The resulting {@code PTransform} is sufficiently
     * specified to be applied, but more properties can still be specified.
     */
     @Experimental(Kind.TRIGGER)
@@ -453,7 +453,7 @@ public Bound<T> discardingFiredPanes() {
     * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
     * Triggering behavior, and that accumulates elements in a pane after they are triggered.
     *
-    * <p> Does not modify this transform.  The resulting {@code PTransform} is sufficiently
+    * <p>Does not modify this transform.  The resulting {@code PTransform} is sufficiently
     * specified to be applied, but more properties can still be specified.
     */
    @Experimental(Kind.TRIGGER)
@@ -473,7 +473,7 @@ public Bound<T> accumulatingFiredPanes() {
      * Once no elements will be added to a window (because this duration has passed) any state
      * associated with the window will be cleaned up.
      *
-     * <p> Depending on the trigger this may not produce a pane with {@link PaneInfo#isLast}. See
+     * <p>Depending on the trigger this may not produce a pane with {@link PaneInfo#isLast}. See
      * {@link ClosingBehavior#FIRE_IF_NON_EMPTY} for more details.
      */
     @Experimental(Kind.TRIGGER)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
index 016da7c9f1289..4254301ff6add 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
@@ -29,7 +29,7 @@
  * information on how {@code WindowFn}s are used and for a library of
  * predefined {@code WindowFn}s.
  *
- * <p> Users will generally want to use the predefined
+ * <p>Users will generally want to use the predefined
  * {@code WindowFn}s, but it is  also possible to create new
  * subclasses.
  * TODO: Describe how to properly create {@code WindowFn}s.
@@ -82,11 +82,11 @@ public abstract class MergeContext {
      * Signals to the framework that the windows in {@code toBeMerged} should
      * be merged together to form {@code mergeResult}.
      *
-     * <p> {@code toBeMerged} should be a subset of {@link #windows}
+     * <p>{@code toBeMerged} should be a subset of {@link #windows}
      * and disjoint from the {@code toBeMerged} set of previous calls
      * to {@code merge}.
      *
-     * <p> {@code mergeResult} must either not be in {@link #windows} or be in
+     * <p>{@code mergeResult} must either not be in {@link #windows} or be in
      * {@code toBeMerged}.
      *
      * @throws IllegalArgumentException if any elements of toBeMerged are not
@@ -99,7 +99,7 @@ public abstract void merge(Collection<W> toBeMerged, W mergeResult)
   /**
    * Does whatever merging of windows is necessary.
    *
-   * <p> See {@link MergeOverlappingIntervalWindows#mergeWindows} for an
+   * <p>See {@link MergeOverlappingIntervalWindows#mergeWindows} for an
    * example of how to override this method.
    */
   public abstract void mergeWindows(MergeContext c) throws Exception;
@@ -120,7 +120,7 @@ public abstract void merge(Collection<W> toBeMerged, W mergeResult)
    * Returns the window of the side input corresponding to the given window of
    * the main input.
    *
-   * <p> Authors of custom {@code WindowFn}s should override this.
+   * <p>Authors of custom {@code WindowFn}s should override this.
    */
   public abstract W getSideInputWindow(final BoundedWindow window);
 
@@ -128,20 +128,20 @@ public abstract void merge(Collection<W> toBeMerged, W mergeResult)
    * Returns the output timestamp to use for data depending on the given {@code inputTimestamp}
    * in the specified {@code window}.
    *
-    * <p> The result must be between {@code inputTimestamp} and {@code window.maxTimestamp()}
+    * <p>The result must be between {@code inputTimestamp} and {@code window.maxTimestamp()}
    * (inclusive on both sides). If this {@link WindowFn} doesn't produce overlapping windows,
    * this can (and typically should) just return {@code inputTimestamp}. If this does produce
    * overlapping windows, it is suggested that the that the result in later overlapping windows is
    * past the end of earlier windows so that the later windows don't prevent the watermark from
    * progressing past the end of the earlier window.
    *
-   * <p> Each {@code KV<K, Iterable<V>>} produced from a {@code GroupByKey} will be output at a
+   * <p>Each {@code KV<K, Iterable<V>>} produced from a {@code GroupByKey} will be output at a
    * timestamp that is the minimum of {@code getOutputTime} applied to the timestamp of all of
    * the non-late {@code KV<K, V>} that were used as input to the {@code GroupByKey}. The watermark
    * is also prevented from advancing past this minimum timestamp until after the
    * {@code KV<K, Iterable<V>>} has been output.
    *
-   * <p> This function should be monotonic across input timestamps. Specifically, if {@code A < B},
+   * <p>This function should be monotonic across input timestamps. Specifically, if {@code A < B},
    * then {@code getOutputTime(A, window) <= getOutputTime(B, window)}.
    */
   public abstract Instant getOutputTime(Instant inputTimestamp, W window);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java
index 5bd2de36ebed8..65ccf710bdf8e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java
@@ -20,7 +20,7 @@
  * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger} for controlling when those
  * elements are output.
  *
- * <p> {@code Window} logically divides up or groups the elements of a
+ * <p>{@code Window} logically divides up or groups the elements of a
  * {@link com.google.cloud.dataflow.sdk.values.PCollection} into finite windows according to a
  * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}.
  * The output of {@code Window} contains the same elements as input, but they
@@ -29,19 +29,19 @@
  * within composite transforms, will group by the combination of keys and
  * windows.
  *
- * <p> Windowing a {@code PCollection} allows chunks of it to be processed
+ * <p>Windowing a {@code PCollection} allows chunks of it to be processed
  * individually, before the entire {@code PCollection} is available.  This is
  * especially important for {@code PCollection}s with unbounded size, since the full
  * {@code PCollection} is never available at once.
  *
- * <p> For {@code PCollection}s with a bounded size, by default, all data is implicitly in a
+ * <p>For {@code PCollection}s with a bounded size, by default, all data is implicitly in a
  * single window, and this replicates conventional batch mode. However, windowing can still be a
  * convenient way to express time-sliced algorithms over bounded {@code PCollection}s.
  *
- * <p> As elements are assigned to a window, they are are placed into a pane. When the trigger fires
+ * <p>As elements are assigned to a window, they are are placed into a pane. When the trigger fires
  * all of the elements in the current pane are output.
  *
- * <p> The {@link com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger} will output a
+ * <p>The {@link com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger} will output a
  * window when the system watermark passes the end of the window.  See
  * {@link com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark} for details on the
  * watermark.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppEngineEnvironment.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppEngineEnvironment.java
index 61afdd242860e..c7fe4b4ff2459 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppEngineEnvironment.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppEngineEnvironment.java
@@ -28,8 +28,8 @@ public class AppEngineEnvironment {
 
   /**
    * Attempts to detect whether we are inside of AppEngine.
-   * <p>
-   * Purposely copied and left private from private <a href="https://code.google.com/p/
+   *
+   * <p>Purposely copied and left private from private <a href="https://code.google.com/p/
    * guava-libraries/source/browse/guava/src/com/google/common/util/concurrent/
    * MoreExecutors.java#785">code.google.common.util.concurrent.MoreExecutors#isAppEngine</a>.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOff.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOff.java
index f7c20a8e5695e..2484f1c78234e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOff.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOff.java
@@ -27,8 +27,8 @@
  * is happening as well as the amount of retries. Acts exactly as a AttemptBoundedExponentialBackOff
  * unless the time interval has expired since the object was created. At this point, it will always
  * return BackOff.STOP. Note that reset does not reset the timer.
- * <p>
- * Implementation is not thread-safe.
+ *
+ * <p>Implementation is not thread-safe.
  */
 public class AttemptAndTimeBoundedExponentialBackOff extends AttemptBoundedExponentialBackOff {
   private long endTimeMillis;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java
index de0ee98442e68..613316ea0e81b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java
@@ -22,26 +22,25 @@
 /**
  * Implementation of {@link BackOff} that increases the back off period for each retry attempt
  * using a randomization function that grows exponentially.
- * <p>
- * Example: The initial interval is .5 seconds and the maximum number of retries is 10.
+ *
+ * <p>Example: The initial interval is .5 seconds and the maximum number of retries is 10.
  * For 10 tries the sequence will be (values in seconds):
  *
  * <pre>
-   retry#      retry_interval     randomized_interval
-   1             0.5                [0.25,   0.75]
-   2             0.75               [0.375,  1.125]
-   3             1.125              [0.562,  1.687]
-   4             1.687              [0.8435, 2.53]
-   5             2.53               [1.265,  3.795]
-   6             3.795              [1.897,  5.692]
-   7             5.692              [2.846,  8.538]
-   8             8.538              [4.269, 12.807]
-   9            12.807              [6.403, 19.210]
-   10           {@link BackOff#STOP}
+ * retry#      retry_interval     randomized_interval
+ * 1             0.5                [0.25,   0.75]
+ * 2             0.75               [0.375,  1.125]
+ * 3             1.125              [0.562,  1.687]
+ * 4             1.687              [0.8435, 2.53]
+ * 5             2.53               [1.265,  3.795]
+ * 6             3.795              [1.897,  5.692]
+ * 7             5.692              [2.846,  8.538]
+ * 8             8.538              [4.269, 12.807]
+ * 9            12.807              [6.403, 19.210]
+ * 10           {@link BackOff#STOP}
  * </pre>
  *
- * <p>
- * Implementation is not thread-safe.
+ * <p>Implementation is not thread-safe.
  */
 public class AttemptBoundedExponentialBackOff implements BackOff {
   public static final double DEFAULT_MULTIPLIER = 1.5;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java
index 78ed9b7790041..96f8a17d1869a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java
@@ -29,7 +29,7 @@
 /**
  * Base class for implementations of {@link ExecutionContext}.
  *
- * <p> A concrete subclass should implement {@link #createStepContext} to create the appropriate
+ * <p>A concrete subclass should implement {@link #createStepContext} to create the appropriate
  * {@link ExecutionContext.StepContext} implementation. Any {@code StepContext} created will
  * be cached for the lifetime of this {@link ExecutionContext}.
  */
@@ -85,7 +85,7 @@ public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output) {}
   /**
    * Base class for implementations of {@link ExecutionContext.StepContext}.
    *
-   * <p> To complete a concrete subclass, implement {@link #timerInternals} and
+   * <p>To complete a concrete subclass, implement {@link #timerInternals} and
    * {@link #stateInternals}.
    */
   public abstract static class StepContext implements ExecutionContext.StepContext {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
index da4d3ae5b6350..4a6405fd8be43 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
@@ -70,7 +70,7 @@ protected void switchStateKey(Object newKey) {
   /**
    * Returns the key of the work currently being processed.
    *
-   * <p> If there is not a currently defined key, returns null.
+   * <p>If there is not a currently defined key, returns null.
    */
   public Object getKey() {
     return key;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
index cae0072657c8b..7d814ab4ff0a5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
@@ -217,17 +217,17 @@ public void run() {
 
   /**
    * Retrieves or creates the table.
-   * <p>
-   * The table is checked to conform to insertion requirements as specified
+   *
+   * <p>The table is checked to conform to insertion requirements as specified
    * by WriteDisposition and CreateDisposition.
-   * <p>
-   * If table truncation is requested (WriteDisposition.WRITE_TRUNCATE), then
+   *
+   * <p>If table truncation is requested (WriteDisposition.WRITE_TRUNCATE), then
    * this will re-create the table if necessary to ensure it is empty.
-   * <p>
-   * If an empty table is required (WriteDisposition.WRITE_EMPTY), then this
+   *
+   * <p>If an empty table is required (WriteDisposition.WRITE_EMPTY), then this
    * will fail if the table exists and is not empty.
-   * <p>
-   * When constructing a table, a {@code TableSchema} must be available.  If a
+   *
+   * <p>When constructing a table, a {@code TableSchema} must be available.  If a
    * schema is provided, then it will be used.  If no schema is provided, but
    * an existing table is being cleared (WRITE_TRUNCATE option above), then
    * the existing schema will be re-used.  If no schema is available, then an
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
index a5c16de808c3b..2152fcbf66562 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
@@ -128,7 +128,7 @@ public boolean hasNext() {
    *   <li> Every other atomic type is a {@link String}.
    * </ul></p>
    *
-   * <p> Note that currently integers are encoded as strings to match
+   * <p>Note that currently integers are encoded as strings to match
    * the behavior of the backend service.
    */
   private Object getTypedCellValue(TableFieldSchema fieldSchema, Object v) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudObject.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudObject.java
index d63acb8152f5c..8c704bf6d96cf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudObject.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudObject.java
@@ -28,8 +28,8 @@
 /**
  * A representation of an arbitrary Java object to be instantiated by Dataflow
  * workers.
- * <p>
- * Typically, an object to be written by the SDK to the Dataflow service will
+ *
+ * <p>Typically, an object to be written by the SDK to the Dataflow service will
  * implement a method (typically called {@code asCloudObject()}) that returns a
  * {@code CloudObject} to represent the object in the protocol.  Once the
  * {@code CloudObject} is constructed, the method should explicitly add
@@ -41,8 +41,8 @@ public final class CloudObject extends GenericJson {
    * Constructs a {@code CloudObject} by copying the supplied serialized object
    * spec, which must represent an SDK object serialized for transport via the
    * Dataflow API.
-   * <p>
-   * The most common use of this method is during deserialization on the worker,
+   *
+   * <p>The most common use of this method is during deserialization on the worker,
    * where it's used as a binding type during instance construction.
    *
    * @param spec supplies the serialized form of the object as a nested map
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
index 2dddbcdf1a897..c5273e492da47 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
@@ -33,6 +33,7 @@
 import com.fasterxml.jackson.annotation.JsonTypeInfo.As;
 import com.fasterxml.jackson.annotation.JsonTypeInfo.Id;
 import com.fasterxml.jackson.databind.JavaType;
+import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.annotation.JsonTypeIdResolver;
 import com.fasterxml.jackson.databind.jsontype.impl.TypeIdResolverBase;
 import com.fasterxml.jackson.databind.module.SimpleModule;
@@ -202,8 +203,8 @@ public static CloudObject makeCloudEncoding(
   static final class Jackson2Module extends SimpleModule {
     /**
      * The Coder custom type resolver.
-     * <p>
-     * This resolver resolves coders.  If the Coder ID is a particular
+     *
+     * <p>This resolver resolves coders.  If the Coder ID is a particular
      * well-known identifier supplied by the Dataflow service, it's replaced
      * with the corresponding class.  All other Coder instances are resolved
      * by class name, using the package com.google.cloud.dataflow.sdk.coders
@@ -275,8 +276,8 @@ public JsonTypeInfo.Id getMechanism() {
     /**
      * The mixin class defining how Coders are handled by the deserialization
      * {@link ObjectMapper}.
-     * <p>
-     * This is done via a mixin so that this resolver is <i>only</i> used
+     *
+     * <p>This is done via a mixin so that this resolver is <i>only</i> used
      * during deserialization requested by the Dataflow SDK.
      */
     @JsonTypeIdResolver(Resolver.class)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CounterAggregator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CounterAggregator.java
index 82eb0335513b0..824825f41fe10 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CounterAggregator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CounterAggregator.java
@@ -44,7 +44,7 @@ public class CounterAggregator<InputT, AccumT, OutputT> implements Aggregator<In
    * specified in the CombineFn argument. The underlying counter is
    * automatically added into the provided CounterSet.
    *
-   *  <p> If a counter with the same name already exists, it will be reused, as
+   *  <p>If a counter with the same name already exists, it will be reused, as
    * long as it has the same type.
    */
   public CounterAggregator(String name, CombineFn<? super InputT, AccumT, OutputT> combiner,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
index 7c2e3e9c5402f..4e17aec36005a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
@@ -54,8 +54,8 @@ public class Credentials {
    * The scope cloud-platform provides access to all Cloud Platform resources.
    * cloud-platform isn't sufficient yet for talking to datastore so we request
    * those resources separately.
-   * <p>
-   * Note that trusted scope relationships don't apply to OAuth tokens, so for
+   *
+   * <p>Note that trusted scope relationships don't apply to OAuth tokens, so for
    * services we access directly (GCS) as opposed to through the backend
    * (BigQuery, GCE), we need to explicitly request that scope.
    */
@@ -74,8 +74,8 @@ public String getRedirectUri() {
 
   /**
    * Initializes OAuth2 credentials.
-   * <p>
-   * This can use 3 different mechanisms for obtaining a credential:
+   *
+   * <p>This can use 3 different mechanisms for obtaining a credential:
    * <ol>
    *   <li>
    *     It can fetch the
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
index 592789bcc2017..901361ab85486 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
@@ -64,8 +64,8 @@ public static class GcsUtilFactory implements DefaultValueFactory<GcsUtil> {
     /**
      * Returns an instance of {@link GcsUtil} based on the
      * {@link PipelineOptions}.
-     * <p>
-     * If no instance has previously been created, one is created and the value
+     *
+     * <p>If no instance has previously been created, one is created and the value
      * stored in {@code options}.
      */
     @Override
@@ -232,7 +232,7 @@ long fileSize(GcsPath path, BackOff backoff, Sleeper sleeper) throws IOException
   /**
    * Opens an object in GCS.
    *
-   * <p> Returns a SeekableByteChannel that provides access to data in the bucket.
+   * <p>Returns a SeekableByteChannel that provides access to data in the bucket.
    *
    * @param path the GCS filename to read from
    * @return a SeekableByteChannel that can read the object data
@@ -248,7 +248,7 @@ public SeekableByteChannel open(GcsPath path)
   /**
    * Creates an object in GCS.
    *
-   * <p> Returns a WritableByteChannel that can be used to write data to the
+   * <p>Returns a WritableByteChannel that can be used to write data to the
    * object.
    *
    * @param path the GCS file to write to
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
index 52d5a4fdaccac..44b8ebd4e0d97 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
@@ -24,10 +24,10 @@
 
 /**
  * Defines a factory for working with read and write channels.
- * <p>
- * Channels provide an abstract API for IO operations.
- * <p>
- * See <a href="http://docs.oracle.com/javase/7/docs/api/java/nio/channels/package-summary.html"
+ *
+ * <p>Channels provide an abstract API for IO operations.
+ *
+ * <p>See <a href="http://docs.oracle.com/javase/7/docs/api/java/nio/channels/package-summary.html"
  * >Java NIO Channels</a>
  */
 public interface IOChannelFactory {
@@ -78,7 +78,7 @@ public interface IOChannelFactory {
    * {@code false} even if the channel returned is a {@code SeekableByteChannel}, if seeking is not
    * efficient for the given file specification.
    *
-   * <p> Only efficiently seekable files can be split into offset ranges.
+   * <p>Only efficiently seekable files can be split into offset ranges.
    *
    * <p>The specification is not expanded; it is used verbatim.
    */
@@ -86,13 +86,13 @@ public interface IOChannelFactory {
 
   /**
    * Resolve the given {@code other} against the {@code path}.
-   * <p>
-   * If the {@code other} parameter is an absolute path then this method trivially returns other.
-   * If {@code other} is an empty path then this method trivially returns the given {@code path}.
-   * Otherwise this method considers the given {@code path} to be a directory and resolves the
-   * {@code other} path against this path. In the simplest case, the {@code other} path does not
-   * have a root component, in which case this method joins the {@code other} path to the given
-   * {@code path} and returns a resulting path that ends with the {@code other} path.
+   *
+   * <p>If the {@code other} parameter is an absolute path then this method trivially returns
+   * other. If {@code other} is an empty path then this method trivially returns the given
+   * {@code path}. Otherwise this method considers the given {@code path} to be a directory and
+   * resolves the {@code other} path against this path. In the simplest case, the {@code other}
+   * path does not have a root component, in which case this method joins the {@code other} path
+   * to the given {@code path} and returns a resulting path that ends with the {@code other} path.
    * Where the {@code other} path has a root component then resolution is highly implementation
    * dependent and therefore unspecified.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
index 336c741b430ee..cbf420ec6bdd1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
@@ -46,10 +46,10 @@ public class IOChannelUtils {
   /**
    * Associates a scheme with an {@link IOChannelFactory}.
    *
-   * <p> The given factory is used to construct read and write channels when
+   * <p>The given factory is used to construct read and write channels when
    * a URI is provided with the given scheme.
    *
-   * <p> For example, when reading from "gs://bucket/path", the scheme "gs" is
+   * <p>For example, when reading from "gs://bucket/path", the scheme "gs" is
    * used to lookup the appropriate factory.
    */
   public static void setIOFactory(String scheme, IOChannelFactory factory) {
@@ -75,10 +75,10 @@ public static WritableByteChannel create(String filename, String mimeType)
   /**
    * Creates a write channel for the given file components.
    *
-   * <p> If numShards is specified, then a ShardingWritableByteChannel is
+   * <p>If numShards is specified, then a ShardingWritableByteChannel is
    * returned.
    *
-   * <p> Shard numbers are 0 based, meaning they start with 0 and end at the
+   * <p>Shard numbers are 0 based, meaning they start with 0 and end at the
    * number of shards - 1.
    */
   public static WritableByteChannel create(String prefix, String shardTemplate,
@@ -122,16 +122,16 @@ public static long getSizeBytes(String spec) throws IOException {
   /**
    * Constructs a fully qualified name from components.
    *
-   * <p> The name is built from a prefix, shard template (with shard numbers
+   * <p>The name is built from a prefix, shard template (with shard numbers
    * applied), and a suffix.  All components are required, but may be empty
    * strings.
    *
-   * <p> Within a shard template, repeating sequences of the letters "S" or "N"
+   * <p>Within a shard template, repeating sequences of the letters "S" or "N"
    * are replaced with the shard number, or number of shards respectively.  The
    * numbers are formatted with leading zeros to match the length of the
    * repeated sequence of letters.
    *
-   * <p> For example, if prefix = "output", shardTemplate = "-SSS-of-NNN", and
+   * <p>For example, if prefix = "output", shardTemplate = "-SSS-of-NNN", and
    * suffix = ".txt", with shardNum = 1 and numShards = 100, the following is
    * produced:  "output-001-of-100.txt".
    */
@@ -188,13 +188,13 @@ public static IOChannelFactory getFactory(String spec) throws IOException {
 
   /**
    * Resolve the given {@code other} against the {@code path}.
-   * <p>
-   * If the {@code other} parameter is an absolute path then this method trivially returns other.
-   * If {@code other} is an empty path then this method trivially returns the given {@code path}.
-   * Otherwise this method considers the given {@code path} to be a directory and resolves the
-   * {@code other} path against this path. In the simplest case, the {@code other} path does not
-   * have a root component, in which case this method joins the {@code other} path to the given
-   * {@code path} and returns a resulting path that ends with the {@code other} path.
+   *
+   * <p>If the {@code other} parameter is an absolute path then this method trivially returns
+   * other. If {@code other} is an empty path then this method trivially returns the given
+   * {@code path}. Otherwise this method considers the given {@code path} to be a directory and
+   * resolves the {@code other} path against this path. In the simplest case, the {@code other}
+   * path does not have a root component, in which case this method joins the {@code other} path
+   * to the given {@code path} and returns a resulting path that ends with the {@code other} path.
    * Where the {@code other} path has a root component then resolution is highly implementation
    * dependent and therefore unspecified.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
index e64503e9d53b6..f477125c82b47 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
@@ -37,8 +37,8 @@ public class InstanceBuilder<T> {
 
   /**
    * Create an InstanceBuilder for the given type.
-   * <p>
-   * The specified type is the type returned by {@link #build}, which is
+   *
+   * <p>The specified type is the type returned by {@link #build}, which is
    * typically the common base type or interface of the instance being
    * constructed.
    */
@@ -48,12 +48,12 @@ public static <T> InstanceBuilder<T> ofType(Class<T> type) {
 
   /**
    * Create an InstanceBuilder for the given type.
-   * <p>
-   * The specified type is the type returned by {@link #build}, which is
+   *
+   * <p>The specified type is the type returned by {@link #build}, which is
    * typically the common base type or interface for the instance to be
    * constructed.
-   * <p>
-   * The TypeDescriptor argument allows specification of generic types.  For example,
+   *
+   * <p>The TypeDescriptor argument allows specification of generic types.  For example,
    * a {@code List<String>} return type can be specified as
    * {@code ofType(new TypeDescriptor<List<String>>(){})}.
    */
@@ -65,13 +65,13 @@ public static <T> InstanceBuilder<T> ofType(TypeDescriptor<T> token) {
 
   /**
    * Sets the class name to be constructed.
-   * <p>
-   * If the name is a simple name (ie {@link Class#getSimpleName()}), then
+   *
+   * <p>If the name is a simple name (ie {@link Class#getSimpleName()}), then
    * the package of the return type is added as a prefix.
-   * <p>
-   * The default class is the return type, specified in {@link #ofType}.
-   * <p>
-   * Modifies and returns the {@code InstanceBuilder} for chaining.
+   *
+   * <p>The default class is the return type, specified in {@link #ofType}.
+   *
+   * <p>Modifies and returns the {@code InstanceBuilder} for chaining.
    *
    * @throws ClassNotFoundException if no class can be found by the given name
    */
@@ -94,8 +94,8 @@ public InstanceBuilder<T> fromClassName(String name)
 
   /**
    * Sets the factory class to use for instance construction.
-   * <p>
-   * Modifies and returns the {@code InstanceBuilder} for chaining.
+   *
+   * <p>Modifies and returns the {@code InstanceBuilder} for chaining.
    */
   public InstanceBuilder<T> fromClass(Class<?> factoryClass) {
     this.factoryClass = factoryClass;
@@ -104,11 +104,11 @@ public InstanceBuilder<T> fromClass(Class<?> factoryClass) {
 
   /**
    * Sets the name of the factory method used to construct the instance.
-   * <p>
-   * The default, if no factory method was specified, is to look for a class
+   *
+   * <p>The default, if no factory method was specified, is to look for a class
    * constructor.
-   * <p>
-   * Modifies and returns the {@code InstanceBuilder} for chaining.
+   *
+   * <p>Modifies and returns the {@code InstanceBuilder} for chaining.
    */
   public InstanceBuilder<T> fromFactoryMethod(String methodName) {
     Preconditions.checkArgument(this.methodName == null,
@@ -119,11 +119,11 @@ public InstanceBuilder<T> fromFactoryMethod(String methodName) {
 
   /**
    * Adds an argument to be passed to the factory method.
-   * <p>
-   * The argument type is used to lookup the factory method. This type may be
+   *
+   * <p>The argument type is used to lookup the factory method. This type may be
    * a supertype of the argument value's class.
-   * <p>
-   * Modifies and returns the {@code InstanceBuilder} for chaining.
+   *
+   * <p>Modifies and returns the {@code InstanceBuilder} for chaining.
    *
    * @param <ArgT> the argument type
    */
@@ -136,7 +136,7 @@ public <ArgT> InstanceBuilder<T> withArg(Class<? super ArgT> argType, ArgT value
   /**
    * Creates the instance by calling the factory method with the given
    * arguments.
-   * <p>
+   *
    * <h3>Defaults</h3>
    * <ul>
    *   <li>factory class: defaults to the output type class, overridden
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOff.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOff.java
index 9f60997f427d4..4406ee5c52b40 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOff.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOff.java
@@ -22,30 +22,29 @@
 /**
  * Implementation of {@link BackOff} that increases the back off period for each retry attempt
  * using a randomization function that grows exponentially.
- * <p>
- * Example: The initial interval is .5 seconds and the maximum interval is 60 secs.
+ *
+ * <p>Example: The initial interval is .5 seconds and the maximum interval is 60 secs.
  * For 14 tries the sequence will be (values in seconds):
  *
  * <pre>
- retry#      retry_interval     randomized_interval
- 1             0.5                [0.25,   0.75]
- 2             0.75               [0.375,  1.125]
- 3             1.125              [0.562,  1.687]
- 4             1.687              [0.8435, 2.53]
- 5             2.53               [1.265,  3.795]
- 6             3.795              [1.897,  5.692]
- 7             5.692              [2.846,  8.538]
- 8             8.538              [4.269, 12.807]
- 9            12.807              [6.403, 19.210]
- 10           28.832              [14.416, 43.248]
- 11           43.248              [21.624, 64.873]
- 12           60.0                [30.0, 90.0]
- 13           60.0                [30.0, 90.0]
- 14           60.0                [30.0, 90.0]
+ * retry#      retry_interval     randomized_interval
+ * 1             0.5                [0.25,   0.75]
+ * 2             0.75               [0.375,  1.125]
+ * 3             1.125              [0.562,  1.687]
+ * 4             1.687              [0.8435, 2.53]
+ * 5             2.53               [1.265,  3.795]
+ * 6             3.795              [1.897,  5.692]
+ * 7             5.692              [2.846,  8.538]
+ * 8             8.538              [4.269, 12.807]
+ * 9            12.807              [6.403, 19.210]
+ * 10           28.832              [14.416, 43.248]
+ * 11           43.248              [21.624, 64.873]
+ * 12           60.0                [30.0, 90.0]
+ * 13           60.0                [30.0, 90.0]
+ * 14           60.0                [30.0, 90.0]
  * </pre>
  *
- * <p>
- * Implementation is not thread-safe.
+ * <p>Implementation is not thread-safe.
  */
 public class IntervalBoundedExponentialBackOff implements BackOff {
   public static final double DEFAULT_MULTIPLIER = 1.5;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
index 0ccb282ea0057..29f16e4ee2e43 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
@@ -45,7 +45,7 @@ public class MergingActiveWindowSet<W extends BoundedWindow>
   /**
    * A map of live windows to windows that were merged into them.
    *
-   * <p> The keys of the map correspond to the set of (merged) windows and the values
+   * <p>The keys of the map correspond to the set of (merged) windows and the values
    * are the no-longer-present windows that were merged into the keys.  A given
    * window can appear in both the key and value of a single entry, but other at
    * most once across all keys and values.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
index 7dba59d387290..741b26e936cba 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
@@ -185,7 +185,7 @@ public T apply(WindowedValue<T> input) {
    * Implementation of conversion {@code Iterable<WindowedValue<KV<K, V>>>}
    * to {@code Map<K, Iterable<V>>}.
    *
-   * <p> For internal use only.
+   * <p>For internal use only.
    */
   private static class MultimapPCollectionView<K, V, W extends BoundedWindow>
       extends PCollectionViewBase<KV<K, V>, Map<K, Iterable<V>>, W> {
@@ -216,7 +216,7 @@ protected Map<K, Iterable<V>> fromElements(Iterable<WindowedValue<KV<K, V>>> ele
    * Implementation of conversion {@code Iterable<WindowedValue<KV<K, V>>} with
    * one value per key to {@code Map<K, V>}.
    *
-   * <p> For internal use only.
+   * <p>For internal use only.
    */
   private static class MapPCollectionView<K, V, W extends BoundedWindow>
       extends PCollectionViewBase<KV<K, V>, Map<K, V>, W> {
@@ -321,7 +321,7 @@ public ViewT fromIterableInternal(Iterable<WindowedValue<?>> elements) {
     /**
      * Returns a unique {@link TupleTag} identifying this {@link PCollectionView}.
      *
-     * <p> For internal use only by runner implementors.
+     * <p>For internal use only by runner implementors.
      */
     @Override
     public TupleTag<Iterable<WindowedValue<?>>> getTagInternal() {
@@ -337,7 +337,7 @@ public TupleTag<Iterable<WindowedValue<?>>> getTagInternal() {
      * Returns the {@link WindowingStrategy} of this {@link PCollectionView}, which should
      * be that of the underlying {@link PCollection}.
      *
-     * <p> For internal use only by runner implementors.
+     * <p>For internal use only by runner implementors.
      */
     @Override
     public WindowingStrategy<?, ?> getWindowingStrategyInternal() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java
index 6ed6ae857dac5..5b87b5cf8832a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java
@@ -26,7 +26,7 @@
  * A {@code PTuple} is an immutable tuple of
  * heterogeneously-typed values, "keyed" by {@link TupleTag}s.
  *
- * <p> PTuples can be created and accessed like follows:
+ * <p>PTuples can be created and accessed like follows:
  * <pre> {@code
  * String v1 = ...;
  * Integer v2 = ...;
@@ -63,7 +63,7 @@ public class PTuple {
   /**
    * Returns an empty PTuple.
    *
-   * <p> Longer PTuples can be created by calling
+   * <p>Longer PTuples can be created by calling
    * {@link #and} on the result.
    */
   public static PTuple empty() {
@@ -74,7 +74,7 @@ public static PTuple empty() {
    * Returns a singleton PTuple containing the given
    * value keyed by the given TupleTag.
    *
-   * <p> Longer PTuples can be created by calling
+   * <p>Longer PTuples can be created by calling
    * {@link #and} on the result.
    */
   public static <V> PTuple of(TupleTag<V> tag, V value) {
@@ -85,7 +85,7 @@ public static <V> PTuple of(TupleTag<V> tag, V value) {
    * Returns a new PTuple that has all the values and
    * tags of this PTuple plus the given value and tag.
    *
-   * <p> The given TupleTag should not already be mapped to a
+   * <p>The given TupleTag should not already be mapped to a
    * value in this PTuple.
    */
   public <V> PTuple and(TupleTag<V> tag, V value) {
@@ -152,7 +152,7 @@ private PTuple(Map<TupleTag<?>, ?> valueMap) {
    * Returns a PTuple with each of the given tags mapping
    * to the corresponding value.
    *
-   * <p> For internal use only.
+   * <p>For internal use only.
    */
   public static PTuple ofInternal(Map<TupleTag<?>, ?> valueMap) {
     return new PTuple(valueMap);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
index a906d96db8bc6..02ae8e302939e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
@@ -249,8 +249,8 @@ static List<DataflowPackage> stageClasspathElements(
 
   /**
    * Returns a unique name for a file with a given content hash.
-   * <p>
-   * Directory paths are removed. Example:
+   *
+   * <p>Directory paths are removed. Example:
    * <pre>
    * dir="a/b/c/d", contentHash="f000" => d-f000.zip
    * file="a/b/c/d.txt", contentHash="f000" => d-f000.txt
@@ -270,11 +270,11 @@ static String getUniqueContentName(File classpathElement, String contentHash) {
 
   /**
    * Copies the contents of the classpathElement to the output channel.
-   * <p>
-   * If the classpathElement is a directory, a Zip stream is constructed on the fly,
+   *
+   * <p>If the classpathElement is a directory, a Zip stream is constructed on the fly,
    * otherwise the file contents are copied as-is.
-   * <p>
-   * The output channel is not closed.
+   *
+   * <p>The output channel is not closed.
    */
   private static void copyContent(String classpathElement, WritableByteChannel outputChannel)
       throws IOException {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
index 693b5dcc678b3..78d20aff70b71 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
@@ -51,7 +51,7 @@ public interface StateContext {
      * Access the storage for the given {@code address} in all of the windows that were
      * merged into the current window including the current window.
      *
-     * <p> If no windows were merged, this reads from just the current window.
+     * <p>If no windows were merged, this reads from just the current window.
      */
     <StateT extends MergeableState<?, ?>> StateT accessAcrossMergedWindows(
         StateTag<StateT> address);
@@ -132,7 +132,7 @@ public abstract class OnMergeContext extends Context {
     /**
      * Return the collection of windows that were merged.
      *
-     * <p> Note that this may include the result window.
+     * <p>Note that this may include the result window.
      */
     public abstract Collection<W> mergingWindows();
 
@@ -161,7 +161,7 @@ public abstract class OnTriggerContext extends Context {
   /**
    * Called when windows are merged.
    *
-   * <p> There are generally two strategies for implementing this and handling merging of state:
+   * <p>There are generally two strategies for implementing this and handling merging of state:
    * <ul>
    * <li> Lazily merge the state when outputting. This is especially easy if all the state is stored
    * in {@link MergeableState}, since an automatically merged view can be retrieved.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index 068226034bc38..c0dbc95dd1950 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -47,7 +47,7 @@
  * Manages the execution of a {@link ReduceFn} after a {@link GroupByKeyOnly} has partitioned the
  * {@link PCollection} by key.
  *
- * <p> The {@link #onTrigger} relies on a {@link TriggerRunner} to manage the execution of
+ * <p>The {@link #onTrigger} relies on a {@link TriggerRunner} to manage the execution of
  * the triggering logic. The {@code ReduceFnRunner}s responsibilities are:
  *
  * <ul>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java
index 68e1f1fa0d953..42a04f25e8110 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java
@@ -31,7 +31,7 @@
 /**
  * A {@link PTransform} to reshuffle the elements based on their key.
  *
- * <p> Performs a {@link GroupByKey} so that the data is key-partitioned. Configures the
+ * <p>Performs a {@link GroupByKey} so that the data is key-partitioned. Configures the
  * {@link WindowingStrategy} so that no data is dropped, but doesn't affect the need for
  * the user to specify allowed lateness and accumulation mode before a user-inserted GroupByKey.
  *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java
index a7a73c8d289eb..6456e4737bf3d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java
@@ -92,10 +92,10 @@ public static <T extends Serializable> T clone(T value) {
 
   /**
    * Serializes a Coder and verifies that it can be correctly deserialized.
-   * <p>
-   * Throws a RuntimeException if serialized Coder cannot be deserialized, or
+   *
+   * <p>Throws a RuntimeException if serialized Coder cannot be deserialized, or
    * if the deserialized instance is not equal to the original.
-   * <p>
+   *
    * @return the serialized Coder, as a {@link CloudObject}
    */
   public static CloudObject ensureSerializable(Coder<?> coder) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java
index dbf3fbb8580d7..6a8a337ab14fc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java
@@ -74,18 +74,18 @@ private static ObjectMapper createObjectMapper() {
   /**
    * Deserializes an object from a Dataflow structured encoding (represented in
    * Java as a map).
-   * <p>
-   * The standard Dataflow SDK object serialization protocol is based on JSON.
+   *
+   * <p>The standard Dataflow SDK object serialization protocol is based on JSON.
    * Data is typically encoded as a JSON object whose fields represent the
    * object's data.
-   * <p>
-   * The actual deserialization is performed by Jackson, which can deserialize
+   *
+   * <p>The actual deserialization is performed by Jackson, which can deserialize
    * public fields, use JavaBean setters, or use injection annotations to
    * indicate how to construct the object.  The {@link ObjectMapper} used is
    * configured to use the "@type" field as the name of the class to instantiate
    * (supporting polymorphic types), and may be further configured by
-   * annotations or via {@link #registerModule}.
-   * <p>
+   * annotations or via {@link ObjectMapper#registerModule}.
+   *
    * @see <a href="http://wiki.fasterxml.com/JacksonFAQ#Data_Binding.2C_general">
    * Jackson Data-Binding</a>
    * @see <a href="https://github.com/FasterXML/jackson-annotations/wiki/Jackson-Annotations">
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ShardingWritableByteChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ShardingWritableByteChannel.java
index 977e0c022748f..54794ef04a3db 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ShardingWritableByteChannel.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ShardingWritableByteChannel.java
@@ -24,10 +24,10 @@
 /**
  * Implements a WritableByteChannel that may contain multiple output shards.
  *
- * <p> This provides {@link #writeToShard}, which takes a shard number for
+ * <p>This provides {@link #writeToShard}, which takes a shard number for
  * writing to a particular shard.
  *
- * <p> The channel is considered open if all downstream channels are open, and
+ * <p>The channel is considered open if all downstream channels are open, and
  * closes all downstream channels when closed.
  */
 public class ShardingWritableByteChannel implements WritableByteChannel {
@@ -64,7 +64,7 @@ public WritableByteChannel getChannel(int shardNum) {
   /**
    * Writes the buffer to the given shard.
    *
-   * <p> This does not change the current output shard.
+   * <p>This does not change the current output shard.
    *
    * @return The total number of bytes written.  If the shard number is
    * {@link #ALL_SHARDS}, then the total is the sum of each individual shard
@@ -91,7 +91,7 @@ public int writeToShard(int shardNum, ByteBuffer src) throws IOException {
   /**
    * Writes a buffer to all shards.
    *
-   * <p> Same as calling {@code writeToShard(ALL_SHARDS, buf)}.
+   * <p>Same as calling {@code writeToShard(ALL_SHARDS, buf)}.
    */
   @Override
   public int write(ByteBuffer src) throws IOException {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
index 8cc7e79846c39..709a54ac430e5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
@@ -75,7 +75,7 @@ public enum SideInputState {
   /**
    * Fetch the given side input, storing it in a process-level cache.
    *
-   * <p> If state is KNOWN_READY, attempt to fetch the data regardless of whether a
+   * <p>If state is KNOWN_READY, attempt to fetch the data regardless of whether a
    * not-ready entry was cached.
    */
   public <T, SideWindowT extends BoundedWindow> T fetchSideInput(final PCollectionView<T> view,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index 601ae179cd21b..b5e104a7523e5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -265,7 +265,7 @@ public Instant currentWatermarkTime() {
     /**
      * Produce a tag that is guaranteed to be unique for the given namespace, domain and timestamp.
      *
-     * <p> This is necessary because Windmill will deduplicate based only on this tag.
+     * <p>This is necessary because Windmill will deduplicate based only on this tag.
      */
     private ByteString timerTag(TimerData key) {
       String tagString = String.format("%s+%d:%d",
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
index 31c44882aade0..3a18336d0e1cb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
@@ -32,7 +32,7 @@ public class StringUtils {
   /**
    * Converts the given array of bytes into a legal JSON string.
    *
-   * <p> Uses a simple strategy of converting each byte to a single char,
+   * <p>Uses a simple strategy of converting each byte to a single char,
    * except for non-printable chars, non-ASCII chars, and '%', '\',
    * and '"', which are encoded as three chars in '%xx' format, where
    * 'xx' is the hexadecimal encoding of the byte.
@@ -108,14 +108,14 @@ public static byte[] jsonStringToByteArray(String string) {
   /**
    * Returns a simple name for a class.
    *
-   * <p> Note: this is non-invertible - the name may be simplified to an
+   * <p>Note: this is non-invertible - the name may be simplified to an
    * extent that it cannot be mapped back to the original class.
    *
-   * <p> This can be used to generate human-readable names. It
+   * <p>This can be used to generate human-readable names. It
    * removes the package and outer classes from the name,
    * and removes common suffixes.
    *
-   * <p> Examples:
+   * <p>Examples:
    * <ul>
    *   <li>{@code some.package.Word.SummaryDoFn} -> "Summary"
    *   <li>{@code another.package.PairingFn} -> "Pairing"
@@ -130,16 +130,16 @@ public static String approximateSimpleName(Class<?> clazz) {
   /**
    * Returns a name for a PTransform class.
    *
-   * <p> This can be used to generate human-readable transform names. It
+   * <p>This can be used to generate human-readable transform names. It
    * removes the package from the name, and removes common suffixes.
    *
-   * <p> It is different than approximateSimpleName:
+   * <p>It is different than approximateSimpleName:
    * <ul>
    *   <li>1. It keeps the outer classes names.
    *   <li>2. It removes the common transform inner class: "Bound".
    * </ul>
    *
-   * <p> Examples:
+   * <p>Examples:
    * <ul>
    *   <li>{@code some.package.Word.Summary} -> "Word.Summary"
    *   <li>{@code another.package.Pairing.Bound} -> "Pairing"
@@ -154,7 +154,7 @@ public static String approximatePTransformName(Class<?> clazz) {
   /**
    * Calculate the Levenshtein distance between two strings.
    *
-   * <p> The Levenshtein distance between two words is the minimum number of single-character edits
+   * <p>The Levenshtein distance between two words is the minimum number of single-character edits
    * (i.e. insertions, deletions or substitutions) required to change one string into the other.
    */
   public static int getLevenshteinDistance(final String s, final String t) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeUtil.java
index 1313286203e7f..93195a763586c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeUtil.java
@@ -30,15 +30,15 @@
 /**
  * A helper class for converting between Dataflow API and SDK time
  * representations.
- * <p>
- * Dataflow API times are strings of the form
+ *
+ * <p>Dataflow API times are strings of the form
  * {@code YYYY-MM-dd'T'HH:mm:ss[.nnnn]'Z'}: that is, RFC 3339
  * strings with optional fractional seconds and a 'Z' offset.
- * <p>
- * Dataflow API durations are strings of the form {@code ['-']sssss[.nnnn]'s'}:
+ *
+ * <p>Dataflow API durations are strings of the form {@code ['-']sssss[.nnnn]'s'}:
  * that is, seconds with optional fractional seconds and a literal 's' at the end.
- * <p>
- * In both formats, fractional seconds are either three digits (millisecond
+ *
+ * <p>In both formats, fractional seconds are either three digits (millisecond
  * resolution), six digits (microsecond resolution), or nine digits (nanosecond
  * resolution).
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
index ca2e433110af2..522fbf64afd8a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
@@ -26,7 +26,7 @@
 /**
  * Encapsulate interaction with time within the execution environment.
  *
- * <p> This class allows setting and deleting timers, and also retrieving an
+ * <p>This class allows setting and deleting timers, and also retrieving an
  * estimate of the current time.
  */
 public interface TimerInternals {
@@ -35,7 +35,7 @@ public interface TimerInternals {
    * Writes out a timer to be fired when the watermark reaches the given
    * timestamp.
    *
-   * <p> The combination of {@code namespace}, {@code timestamp} and {@code domain} uniquely
+   * <p>The combination of {@code namespace}, {@code timestamp} and {@code domain} uniquely
    * identify a timer. Multiple timers set for the same parameters can be safely deduplicated.
    */
   void setTimer(TimerData timerKey);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
index 619f4685e5327..d1fb8e946ef6f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
@@ -34,7 +34,7 @@
 /**
  * Executes a trigger within the context provided by {@link ReduceFnRunner}.
  *
- * <p> This is responsible for:
+ * <p>This is responsible for:
  *
  * <ul>
  * <li> Tracking the finished bits for the trigger tree, included whether the root trigger
@@ -49,7 +49,7 @@ public class TriggerRunner<W extends BoundedWindow> {
   /**
    * Result of trigger execution.
    *
-   * <p> This includes the actual {@link TriggerResult} as well as an updated set of finished bits.
+   * <p>This includes the actual {@link TriggerResult} as well as an updated set of finished bits.
    * The bits should typically be committed, but if the trigger fired we want to merge and apply
    * the merging logic on the old finished bits, hence the need to delay committing these results.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java
index 08651e5dcc370..f2e9c8bd74088 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java
@@ -24,7 +24,7 @@
 /**
  * Variable-length encoding for integers.
  *
- * <p> Handles, in a common encoding format, signed bytes, shorts, ints, and longs.
+ * <p>Handles, in a common encoding format, signed bytes, shorts, ints, and longs.
  * Takes between 1 and 10 bytes.
  * Less efficient than BigEndian{Int,Long} coder for negative or large numbers.
  * All negative ints are encoded using 5 bytes, longs take 10 bytes.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
index 147e84ab92be6..85b585c8be3e9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
@@ -70,7 +70,7 @@ public void addHold(ReduceFn<?, ?, ?, W>.ProcessValueContext c, boolean isLate)
   /**
    * Get information from the watermark hold for outputting.
    *
-   * <p> The output timestamp is the minimum of getOutputTimestamp applied to the non-late elements
+   * <p>The output timestamp is the minimum of getOutputTimestamp applied to the non-late elements
    * that arrived in the current pane.
    */
   public StateContents<Instant> extractAndRelease(final ReduceFn<?, ?, ?, W>.Context c) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
index e7408812f40ec..65d1d43c1a2c4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
@@ -23,7 +23,7 @@
 /**
  * A CounterSet maintains a set of {@link Counter}s.
  *
- * <p> Thread-safe.
+ * <p>Thread-safe.
  */
 public class CounterSet extends AbstractSet<Counter<?>> {
 
@@ -63,7 +63,7 @@ public void addNewCounter(Counter<?> counter) {
   /**
    * Adds the given Counter to this CounterSet.
    *
-   * <p> If a counter with the same name already exists, it will be
+   * <p>If a counter with the same name already exists, it will be
    * reused, as long as it is compatible.
    *
    * @return the Counter that was reused, or added
@@ -138,7 +138,7 @@ public class AddCounterMutator {
     /**
      * Adds the given Counter into the enclosing CounterSet.
      *
-     * <p> If a counter with the same name already exists, it will be
+     * <p>If a counter with the same name already exists, it will be
      * reused, as long as it has the same type.
      *
      * @return the Counter that was reused, or added
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java
index 12ff568dfce5a..9e79b960b96b0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java
@@ -25,7 +25,7 @@
  * Provides a view a of re-iterable of tagged values, with monotonically
  * increasing tags, as a list of tagged re-iterables.
  *
- * <p> This class, and the returned iterators, are not threadsafe.
+ * <p>This class, and the returned iterators, are not threadsafe.
  */
 public class TaggedReiteratorList extends AbstractList<Reiterator<Object>> {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
index b38ccf194b056..edf2eb85b3846 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
@@ -21,15 +21,15 @@
 /**
  * The abstract base class for Operations, which correspond to
  * Instructions in the original MapTask InstructionGraph.
- * <p>
- * Call start() to start the operation.
- * <p>
- * A read operation's start() method actually reads the data, and in
+ *
+ * <p>Call start() to start the operation.
+ *
+ * <p>A read operation's start() method actually reads the data, and in
  * effect runs the pipeline.
- * <p>
- * Call finish() to finish the operation.
- * <p>
- * Since both start() and finish() may call process() on
+ *
+ * <p>Call finish() to finish the operation.
+ *
+ * <p>Since both start() and finish() may call process() on
  * this operation's consumers, start an operation after
  * starting its consumers, and finish an operation before
  * finishing its consumers.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
index f1a851721b0bd..a938b9549de82 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
@@ -484,7 +484,7 @@ public static class SamplingSizeEstimator<T> implements SizeEstimator<T> {
      * The degree of confidence required in our expected value predictions
      * before we allow under-sampling.
      *
-     * <p> The value of 3.0 is a confidence interval of about 99.7% for a
+     * <p>The value of 3.0 is a confidence interval of about 99.7% for a
      * a high-degree-of-freedom t-distribution.
      */
     public static final double CONFIDENCE_INTERVAL_SIGMA = 3;
@@ -493,7 +493,7 @@ public static class SamplingSizeEstimator<T> implements SizeEstimator<T> {
      * The desired size of our confidence interval (relative to the measured
      * expected value).
      *
-     * <p> The value of 0.25 is plus or minus 25%.
+     * <p>The value of 0.25 is plus or minus 25%.
      */
     public static final double CONFIDENCE_INTERVAL_SIZE = 0.25;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index 16634c6f52bc7..a8de2c319e2e9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -33,8 +33,8 @@
 
 /**
  * A read operation.
- * <p>
- * Its start() method iterates through all elements of the source
+ *
+ * <p>Its start() method iterates through all elements of the source
  * and emits them on its output.
  */
 public class ReadOperation extends Operation {
@@ -55,10 +55,10 @@ public class ReadOperation extends Operation {
 
   /**
    * A cache of sourceIterator.getProgress() updated inside the read loop at a bounded rate.
-   * <p>
-   * Necessary so that ReadOperation.getProgress() can return immediately, rather than potentially
-   * wait for a read to complete (which can take an unbounded time, delay a worker progress update,
-   * and cause lease expiration and all sorts of trouble).
+   *
+   * <p>Necessary so that ReadOperation.getProgress() can return immediately, rather than
+   * potentially wait for a read to complete (which can take an unbounded time, delay a worker
+   * progress update, and cause lease expiration and all sorts of trouble).
    */
   private AtomicReference<Reader.Progress> progress = new AtomicReference<>();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
index 9fa960ae6fc43..f3da242fd66e5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
@@ -64,8 +64,8 @@ public void setStateSamplerAndOperationName(StateSampler stateSampler,
 
   /**
    * A stateful iterator over the data in a Reader.
-   * <p>
-   * Partially thread-safe: methods {@link #hasNext}, {@link #next}, {@link #close},
+   *
+   * <p>Partially thread-safe: methods {@link #hasNext}, {@link #next}, {@link #close},
    * {@link #getProgress} are called serially, but {@link #requestDynamicSplit}
    * can be called asynchronously to those. There will not be multiple concurrent calls to
    * {@link #requestDynamicSplit}).
@@ -110,25 +110,25 @@ public interface ReaderIterator<T> extends AutoCloseable {
      * Attempts to split the input in two parts: the "primary" part and the "residual" part.
      * The current {@link ReaderIterator} keeps processing the primary part, while the residual part
      * will be processed elsewhere (e.g. perhaps on a different worker).
-     * <p>
-     * The primary and residual parts, if concatenated, must represent the same input as the
+     *
+     * <p>The primary and residual parts, if concatenated, must represent the same input as the
      * current input of this {@link ReaderIterator} before this call.
-     * <p>
-     * The boundary between the primary part and the residual part is specified in
+     *
+     * <p>The boundary between the primary part and the residual part is specified in
      * a framework-specific way using {@link Reader.DynamicSplitRequest}: e.g., if the framework
      * supports the notion of positions, it might be a position at which the input is asked to split
      * itself (which is not necessarily the same position at which it <i>will</i> split itself);
      * it might be an approximate fraction of input, or something else.
-     * <p>
-     * {@link Reader.DynamicSplitResult} encodes, in a framework-specific way, the information
+     *
+     * <p>{@link Reader.DynamicSplitResult} encodes, in a framework-specific way, the information
      * sufficient to construct a description of the resulting primary and residual inputs.
      * For example, it might, again, be a position demarcating these parts, or it might be a pair of
      * fully-specified input descriptions, or something else.
-     * <p>
-     * After a successful call to {@link #requestDynamicSplit}, subsequent calls should be
+     *
+     * <p>After a successful call to {@link #requestDynamicSplit}, subsequent calls should be
      * interpreted relative to the new primary.
-     * <p>
-     * This call should not affect the range of input represented by the {@link Reader} that
+     *
+     * <p>This call should not affect the range of input represented by the {@link Reader} that
      * produced this {@link ReaderIterator}.
      *
      * @return {@code null} if the {@link Reader.DynamicSplitRequest} cannot be honored
@@ -166,7 +166,7 @@ public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest)
    * A representation of how far a {@code ReaderIterator} is through a
    * {@code Reader}.
    *
-   * <p> The common worker framework does not interpret instances of
+   * <p>The common worker framework does not interpret instances of
    * this interface.  But a tool-specific framework can make assumptions
    * about the implementation, and so the concrete Reader subclasses used
    * by a tool-specific framework should match.
@@ -177,7 +177,7 @@ public interface Progress {}
    * A representation of a position in an iteration through a
    * {@code Reader}.
    *
-   * <p> See the comment on {@link Progress} for how instances of this
+   * <p>See the comment on {@link Progress} for how instances of this
    * interface are used by the rest of the framework.
    */
   public interface Position {}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
index 73a22e7f09238..1f81e4508f442 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
@@ -21,7 +21,7 @@
 /**
  * Abstract base class for Sinks.
  *
- * <p> A Sink is written to by getting a SinkWriter and adding values to
+ * <p>A Sink is written to by getting a SinkWriter and adding values to
  * it.
  *
  * @param <T> the type of the elements written to the sink
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/SourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/SourceFormat.java
index 70e633ad61847..80a1428d89fa8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/SourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/SourceFormat.java
@@ -32,7 +32,7 @@ public interface SourceFormat {
    * e.g. splitting a source into shards, getting the metadata of a source,
    * etc.
    *
-   * <p> The common worker framework does not interpret instances of
+   * <p>The common worker framework does not interpret instances of
    * this interface.  But a tool-specific framework can make assumptions
    * about the implementation, and so the concrete Source subclasses used
    * by a tool-specific framework should match.
@@ -42,7 +42,7 @@ public interface OperationRequest {}
   /**
    * A representation of the result of a SourceOperationRequest.
    *
-   * <p> See the comment on {@link OperationRequest} for how instances of this
+   * <p>See the comment on {@link OperationRequest} for how instances of this
    * interface are used by the rest of the framework.
    */
   public interface OperationResponse {}
@@ -50,7 +50,7 @@ public interface OperationResponse {}
   /**
    * A representation of a specification of a source.
    *
-   * <p> See the comment on {@link OperationRequest} for how instances of this
+   * <p>See the comment on {@link OperationRequest} for how instances of this
    * interface are used by the rest of the framework.
    */
   public interface SourceSpec {}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
index 02ec2080861b9..992910239c6e1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
@@ -192,7 +192,7 @@ public ScopedState scopedState(int state) {
    * A nested class that is used to account for states and state
    * transitions based on lexical scopes.
    *
-   * <p> Thread-safe.
+   * <p>Thread-safe.
    */
   public class ScopedState implements AutoCloseable {
     private StateSampler sampler;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
index 26b47bb40b569..7900e550fe9ce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
@@ -37,8 +37,8 @@ public abstract class WorkExecutor implements AutoCloseable {
 
   /**
    * OperatingSystemMXBean for reporting CPU usage.
-   * <p>
-   * Uses com.sun.management.OperatingSystemMXBean instead of
+   *
+   * <p>Uses com.sun.management.OperatingSystemMXBean instead of
    * java.lang.management.OperatingSystemMXBean because the former supports
    * getProcessCpuLoad().
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
index 80d3bcf3daa0e..6640c4ee6d034 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
@@ -40,7 +40,7 @@
 /**
  * Implements the Java NIO {@link Path} API for Google Cloud Storage paths.
  *
- * <p> GcsPath uses a slash ('/') as a directory separator.  Below is
+ * <p>GcsPath uses a slash ('/') as a directory separator.  Below is
  * a summary of how slashes are treated:
  * <ul>
  *   <li> A GCS bucket may not contain a slash.  An object may contain zero or
@@ -57,10 +57,10 @@
  *        custom string concatenation.
  * </ul>
  *
- * <p> GcsPath treats all GCS objects and buckets as belonging to the same
+ * <p>GcsPath treats all GCS objects and buckets as belonging to the same
  * filesystem, so the root of a GcsPath is the GcsPath bucket="", object="".
  *
- * <p> Relative paths are not associated with any bucket.  This matches common
+ * <p>Relative paths are not associated with any bucket.  This matches common
  * treatment of Path in which relative paths can be constructed from one
  * filesystem and appended to another filesystem.
  *
@@ -75,7 +75,7 @@ public class GcsPath implements Path {
   /**
    * Creates a GcsPath from a URI.
    *
-   * <p> The URI must be in the form {@code gs://[bucket]/[path]}, and may not
+   * <p>The URI must be in the form {@code gs://[bucket]/[path]}, and may not
    * contain a port, user info, a query, or a fragment.
    */
   public static GcsPath fromUri(URI uri) {
@@ -99,7 +99,7 @@ public static GcsPath fromUri(URI uri) {
   /**
    * Pattern that is used to parse a GCS URL.
    *
-   * <p> This is used to separate the components.  Verification is handled
+   * <p>This is used to separate the components.  Verification is handled
    * separately.
    */
   public static final Pattern GCS_URI =
@@ -108,7 +108,7 @@ public static GcsPath fromUri(URI uri) {
   /**
    * Creates a GcsPath from a URI in string form.
    *
-   * <p> This does not use URI parsing, which means it may accept patterns that
+   * <p>This does not use URI parsing, which means it may accept patterns that
    * the URI parser would not accept.
    */
   public static GcsPath fromUri(String uri) {
@@ -146,22 +146,22 @@ public static GcsPath fromObject(StorageObject object) {
   /**
    * Creates a GcsPath from bucket and object components.
    *
-   * <p> A GcsPath without a bucket name is treated as a relative path, which
+   * <p>A GcsPath without a bucket name is treated as a relative path, which
    * is a path component with no linkage to the root element.  This is similar
    * to a Unix path that does not begin with the root marker (a slash).
    * GCS has different naming constraints and APIs for working with buckets and
    * objects, so these two concepts are kept separate to avoid accidental
    * attempts to treat objects as buckets, or vice versa, as much as possible.
    *
-   * <p> A GcsPath without an object name is a bucket reference.
+   * <p>A GcsPath without an object name is a bucket reference.
    * A bucket is always a directory, which could be used to lookup or add
    * files to a bucket, but could not be opened as a file.
    *
-   * <p> A GcsPath containing neither bucket or object names is treated as
+   * <p>A GcsPath containing neither bucket or object names is treated as
    * the root of the GCS filesystem.  A listing on the root element would return
    * the buckets available to the user.
    *
-   * <p> If {@code null} is passed as either parameter, it is converted to an
+   * <p>If {@code null} is passed as either parameter, it is converted to an
    * empty string internally for consistency.  There is no distinction between
    * an empty string and a {@code null}, as neither are allowed by GCS.
    *
@@ -270,7 +270,7 @@ public GcsPath getFileName() {
    * Returns the <em>parent path</em>, or {@code null} if this path does not
    * have a parent.
    *
-   * <p> Returns a path that ends in '/', as the parent path always refers to
+   * <p>Returns a path that ends in '/', as the parent path always refers to
    * a directory.
    */
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
index 19b0c74c1bf56..a65ef87617682 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
@@ -19,7 +19,7 @@
  * {@code StateInternals} describes the functionality a runner needs to provide for the
  * State API to be supported.
  *
- * <p> The SDK will only use this after elements have been partitioned by key. For instance, after a
+ * <p>The SDK will only use this after elements have been partitioned by key. For instance, after a
  * {@code GroupByKey} operation. The runner implementation must ensure that any writes using
  * {@code StaeIntetrnals} are implicitly scoped to the key being processed and the specific step
  * accessing state.
@@ -28,7 +28,7 @@
  * are persisted together with the completion status of the processing that produced these
  * writes.
  *
- * <p> This is a low-level API intended for use by the Dataflow SDK. It should not be
+ * <p>This is a low-level API intended for use by the Dataflow SDK. It should not be
  * used directly, and is highly likely to change.
  */
 public interface StateInternals  {
@@ -42,7 +42,7 @@ public interface StateInternals  {
    * Return state that reads from all the source namespaces. Only required to ensure that
    * resultNamespace contains all the data that is added.
    *
-   * <p> Merging state is potentially destructive, in that it may move information from the
+   * <p>Merging state is potentially destructive, in that it may move information from the
    * {@code sourceNamespaces} to {@code resultNamespace}. As a result, after calling this all
    * future calls should include as their namespaces a superset of
    * {@code sourceNamespaces} and {@code resultNamespace}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
index 256b4847a9ab9..423d30e2f4caf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
@@ -18,7 +18,7 @@
 /**
  * A namespace used for scoping state stored with {@link StateInternals}.
  *
- * <p> Instances of {@code StateNamespace} are guaranteed to have a {@link #hashCode} and
+ * <p>Instances of {@code StateNamespace} are guaranteed to have a {@link #hashCode} and
  * {@link #equals} that uniquely identify the namespace.
  */
 public interface StateNamespace {
@@ -27,10 +27,10 @@ public interface StateNamespace {
    * Return a {@link String} representation of the key. It is guaranteed that this
    * {@code String} will uniquely identify the key.
    *
-   * <p> This will encode the actual namespace as a {@code String}. It is
+   * <p>This will encode the actual namespace as a {@code String}. It is
    * preferable to use the {@code StateNamespace} object when possible.
    *
-   * <p> The string produced by the standard implementations will not contain a '+' character. This
+   * <p>The string produced by the standard implementations will not contain a '+' character. This
    * enables adding a '+' between the actual namespace and other information, if needed, to separate
    * the two.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
index 463267d010970..6cf6e3b13011a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
@@ -27,10 +27,10 @@
  * An address for persistent state. This includes a unique identifier for the location, the
  * information necessary to encode the value, and details about the intended access pattern.
  *
- * <p> State can be thought of as a sparse table, with each {@code StateTag} defining a column
+ * <p>State can be thought of as a sparse table, with each {@code StateTag} defining a column
  * that has cells of type {@code StateT}.
  *
- * <p> Currently, this can only be used in a step immediately following a {@link GroupByKey}.
+ * <p>Currently, this can only be used in a step immediately following a {@link GroupByKey}.
  *
  * @param <StateT> The type of state being tagged.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
index 7b51711f34d08..b30acd02227fa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
@@ -70,7 +70,7 @@ public static <T> StateTag<ValueState<T>> value(String id, Coder<T> valueCoder)
    * Create a state tag for values that use a {@link CombineFn} to automatically merge
    * multiple {@code InputT}s into a single {@code OutputT}.
    *
-   * <p> This determines the {@code Coder<AccumT>} from the given {@code Coder<InputT>}, and
+   * <p>This determines the {@code Coder<AccumT>} from the given {@code Coder<InputT>}, and
    * should only be used to initialize static values.
    */
   public static <InputT, AccumT, OutputT> StateTag<CombiningValueState<InputT, OutputT>>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java
index 42d2fcb9a88d1..6d26746831441 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java
@@ -23,7 +23,7 @@
 /**
  * State for holding up the watermark to the minimum of input {@code Instant}s.
  *
- * <p> This is intended for internal use only. The watermark will be held up based on the
+ * <p>This is intended for internal use only. The watermark will be held up based on the
  * values that are added and only released when items are cleared.
  */
 @Experimental(Kind.STATE)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
index ac2a9b3ee9c1e..6914646c55574 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
@@ -16,6 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.values;
 
+import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.SerializableComparator;
 import com.google.common.base.MoreObjects;
 
@@ -26,10 +29,7 @@
 /**
  * An immutable key/value pair.
  *
- * <p> Various
- * {@link com.google.cloud.dataflow.sdk.transforms.PTransform}s like
- * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} and
- * {@link com.google.cloud.dataflow.sdk.transforms.Combine#perKey}
+ * <p>Various {@link PTransform}s like {@link GroupByKey} and {@link Combine#perKey}
  * work on {@link PCollection}s of KVs.
  *
  * @param <K> the type of the key
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
index 57cbdf106c587..852c7084bec89 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
@@ -17,19 +17,18 @@
 package com.google.cloud.dataflow.sdk.values;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO.Read;
+import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 
 import java.util.Collection;
 import java.util.Collections;
 
 /**
- * {@code PBegin} is used as the "input" to a root
- * {@link com.google.cloud.dataflow.sdk.transforms.PTransform} that
- * is the first operation in a {@link Pipeline}, such as
- * {@link com.google.cloud.dataflow.sdk.io.TextIO.Read} or
- * {@link com.google.cloud.dataflow.sdk.transforms.Create}.
+ * {@code PBegin} is used as the "input" to a root {@link PTransform} that is the first
+ * operation in a {@link Pipeline}, such as {@link Read TextIO.Read} or {@link Create}.
  *
- * <p> Typically created by calling {@link Pipeline#begin} on a Pipeline.
+ * <p>Typically created by calling {@link Pipeline#begin} on a Pipeline.
  */
 public class PBegin implements PInput {
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
index 3c1c1e4215b46..15e6345b49f15 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
@@ -19,6 +19,8 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 
@@ -33,7 +35,7 @@
  * {@link com.google.cloud.dataflow.sdk.transforms.Create}), and can
  * be passed as the inputs of other PTransforms.
  *
- * <p> Some root transforms produce bounded {@code PCollections} and others
+ * <p>Some root transforms produce bounded {@code PCollections} and others
  * produce unbounded ones.  For example,
  * {@link com.google.cloud.dataflow.sdk.io.TextIO.Read} reads a static set
  * of files, so it produces a bounded {@code PCollection}.
@@ -41,7 +43,7 @@
  * receives a potentially infinite stream of Pubsub messages, so it produces
  * an unbounded {@code PCollection}.
  *
- * <p> Each element in a {@code PCollection} may have an associated implicit
+ * <p>Each element in a {@code PCollection} may have an associated implicit
  * timestamp.  Readers assign timestamps to elements when they create
  * {@code PCollection}s, and other {@code PTransform}s propagate these
  * timestamps from their input to their output. For example, PubsubIO.Read
@@ -50,17 +52,15 @@
  * explicitly assign timestamps to elements with
  * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#outputWithTimestamp}.
  *
- * <p> Additionally, a {@code PCollection} has an associated
+ * <p>Additionally, a {@code PCollection} has an associated
  * {@link WindowFn} and each element is assigned to a set of windows.
- * By default, the windowing function is
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows}
+ * By default, the windowing function is {@link GlobalWindows}
  * and all elements are assigned into a single default window.
- * This default can be overridden with the
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window}
+ * This default can be overridden with the {@link Window}
  * {@code PTransform}. Dataflow pipelines run in classic batch MapReduce style
  * with the default GlobalWindow strategy if timestamps are ignored.
  *
- * <p> See the individual {@code PTransform} subclasses for specific information
+ * <p>See the individual {@code PTransform} subclasses for specific information
  * on how they propagate timestamps and windowing.
  *
  * @param <T> the type of the elements of this PCollection
@@ -86,7 +86,7 @@ public enum IsBounded {
     /**
      * Returns the composed IsBounded property.
      *
-     * <p> The composed property is BOUNDED only if all components are BOUNDED.
+     * <p>The composed property is BOUNDED only if all components are BOUNDED.
      * Otherwise, it is UNBOUNDED.
      */
     public IsBounded and(IsBounded that) {
@@ -101,7 +101,7 @@ public IsBounded and(IsBounded that) {
   /**
    * Returns the name of this PCollection.
    *
-   * <p> By default, the name of a PCollection is based on the name of the
+   * <p>By default, the name of a PCollection is based on the name of the
    * PTransform that produces it.  It can be specified explicitly by
    * calling {@link #setName}.
    *
@@ -188,7 +188,7 @@ public IsBounded isBounded() {
    * {@link WindowingStrategy} that will be used for merging windows and triggering output in this
    * {@code PCollection} and subsequence {@code PCollection}s produced from this one.
    *
-   * <p> By default, no merging is performed.
+   * <p>By default, no merging is performed.
    */
   private WindowingStrategy<?, ?> windowingStrategy;
 
@@ -213,7 +213,7 @@ public PCollection<T> setTypeDescriptorInternal(TypeDescriptor<T> typeDescriptor
   /**
    * Sets the {@link WindowingStrategy} of this {@code PCollection}.
    *
-   * <p> For use by primitive transformations only.
+   * <p>For use by primitive transformations only.
    */
   public PCollection<T> setWindowingStrategyInternal(WindowingStrategy<?, ?> windowingStrategy) {
      this.windowingStrategy = windowingStrategy;
@@ -223,7 +223,7 @@ public PCollection<T> setWindowingStrategyInternal(WindowingStrategy<?, ?> windo
   /**
    * Sets the {@link PCollection.IsBounded} of this {@code PCollection}.
    *
-   * <p> For use by internal transformations only.
+   * <p>For use by internal transformations only.
    */
   public PCollection<T> setIsBoundedInternal(IsBounded isBounded) {
     this.isBounded = isBounded;
@@ -233,7 +233,7 @@ public PCollection<T> setIsBoundedInternal(IsBounded isBounded) {
   /**
    * Creates and returns a new PCollection for a primitive output.
    *
-   * <p> For use by primitive transformations only.
+   * <p>For use by primitive transformations only.
    */
   public static <T> PCollection<T> createPrimitiveOutputInternal(
       Pipeline pipeline,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
index 8174269df156e..867ca5100aef1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
@@ -35,7 +35,7 @@
  * output of
  * {@link com.google.cloud.dataflow.sdk.transforms.Partition}.
  *
- * <p> PCollectionLists can be created and accessed like follows:
+ * <p>PCollectionLists can be created and accessed like follows:
  * <pre> {@code
  * PCollection<String> pc1 = ...;
  * PCollection<String> pc2 = ...;
@@ -63,7 +63,7 @@ public class PCollectionList<T> implements PInput, POutput {
   /**
    * Returns an empty PCollectionList that is part of the given Pipeline.
    *
-   * <p> Longer PCollectionLists can be created by calling
+   * <p>Longer PCollectionLists can be created by calling
    * {@link #and} on the result.
    */
   public static <T> PCollectionList<T> empty(Pipeline pipeline) {
@@ -73,7 +73,7 @@ public static <T> PCollectionList<T> empty(Pipeline pipeline) {
   /**
    * Returns a singleton PCollectionList containing the given PCollection.
    *
-   * <p> Longer PCollectionLists can be created by calling
+   * <p>Longer PCollectionLists can be created by calling
    * {@link #and} on the result.
    */
   public static <T> PCollectionList<T> of(PCollection<T> pc) {
@@ -83,12 +83,12 @@ public static <T> PCollectionList<T> of(PCollection<T> pc) {
   /**
    * Returns a PCollectionList containing the given PCollections, in order.
    *
-   * <p> The argument list cannot be empty.
+   * <p>The argument list cannot be empty.
    *
-   * <p> All the PCollections in the resulting PCollectionList must be
+   * <p>All the PCollections in the resulting PCollectionList must be
    * part of the same Pipeline.
    *
-   * <p> Longer PCollectionLists can be created by calling
+   * <p>Longer PCollectionLists can be created by calling
    * {@link #and} on the result.
    */
   public static <T> PCollectionList<T> of(Iterable<PCollection<T>> pcs) {
@@ -105,7 +105,7 @@ public static <T> PCollectionList<T> of(Iterable<PCollection<T>> pcs) {
    * Returns a new PCollectionList that has all the PCollections of
    * this PCollectionList plus the given PCollection appended to the end.
    *
-   * <p> All the PCollections in the resulting PCollectionList must be
+   * <p>All the PCollections in the resulting PCollectionList must be
    * part of the same Pipeline.
    */
   public PCollectionList<T> and(PCollection<T> pc) {
@@ -125,7 +125,7 @@ public PCollectionList<T> and(PCollection<T> pc) {
    * this PCollectionList plus the given PCollections appended to the end,
    * in order.
    *
-   * <p> All the PCollections in the resulting PCollectionList must be
+   * <p>All the PCollections in the resulting PCollectionList must be
    * part of the same Pipeline.
    */
   public PCollectionList<T> and(Iterable<PCollection<T>> pcs) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
index c6862af7961f9..dc4b5c7793a4b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
@@ -39,7 +39,7 @@
  * {@link com.google.cloud.dataflow.sdk.transforms.ParDo} with side
  * outputs.
  *
- * <p> A {@code PCollectionTuple} can be created and accessed like follows:
+ * <p>A {@code PCollectionTuple} can be created and accessed like follows:
  * <pre> {@code
  * PCollection<String> pc1 = ...;
  * PCollection<Integer> pc2 = ...;
@@ -76,7 +76,7 @@ public class PCollectionTuple implements PInput, POutput {
   /**
    * Returns an empty {@code PCollectionTuple} that is part of the given {@link Pipeline}.
    *
-   * <p> A {@link PCollectionTuple} containing additional elements can be created by calling
+   * <p>A {@link PCollectionTuple} containing additional elements can be created by calling
    * {@link #and} on the result.
    */
   public static PCollectionTuple empty(Pipeline pipeline) {
@@ -87,7 +87,7 @@ public static PCollectionTuple empty(Pipeline pipeline) {
    * Returns a singleton {@link PCollectionTuple} containing the given
    * {@link PCollection} keyed by the given {@link TupleTag}.
    *
-   * <p> A {@code PCollectionTuple} containing additional elements can be created by calling
+   * <p>A {@code PCollectionTuple} containing additional elements can be created by calling
    * {@link #and} on the result.
    */
   public static <T> PCollectionTuple of(TupleTag<T> tag, PCollection<T> pc) {
@@ -99,10 +99,10 @@ public static <T> PCollectionTuple of(TupleTag<T> tag, PCollection<T> pc) {
    * {@link TupleTag} of this {@link PCollectionTuple} plus the given {@link PCollection}
    * associated with the given {@link TupleTag}.
    *
-   * <p> The given {@link TupleTag} should not already be mapped to a
+   * <p>The given {@link TupleTag} should not already be mapped to a
    * {@link PCollection} in this {@link PCollectionTuple}.
    *
-   * <p> Each {@link PCollection} in the resulting {@link PCollectionTuple} must be
+   * <p>Each {@link PCollection} in the resulting {@link PCollectionTuple} must be
    * part of the same {@link Pipeline}.
    */
   public <T> PCollectionTuple and(TupleTag<T> tag, PCollection<T> pc) {
@@ -193,7 +193,7 @@ public <OutputT extends POutput> OutputT apply(
    * Returns a {@link PCollectionTuple} with each of the given tags mapping to a new
    * output {@link PCollection}.
    *
-   * <p> For use by primitive transformations only.
+   * <p>For use by primitive transformations only.
    */
   public static PCollectionTuple ofPrimitiveOutputsInternal(
       Pipeline pipeline,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java
index a7f0d6645513e..39cfaabc735e5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java
@@ -33,20 +33,20 @@ public interface PInput {
   /**
    * Expands this {@code PInput} into a list of its component input {@link PValue}s.
    *
-   * <p> A {@link PValue} expands to itself.
+   * <p>A {@link PValue} expands to itself.
    *
-   * <p> A tuple or list of {@link PValue}s (e.g., {@link PCollectionTuple},
+   * <p>A tuple or list of {@link PValue}s (e.g., {@link PCollectionTuple},
    * and {@link PCollectionList}) expands to its component {@link PValue}s.
    *
-   * <p> Not intended to be invoked directly by user code.
+   * <p>Not intended to be invoked directly by user code.
    */
   public Collection<? extends PValue> expand();
 
   /**
-   * <p> After building, finalizes this {@code PInput} to make it ready for
+   * <p>After building, finalizes this {@code PInput} to make it ready for
    * being used as an input to a {@link com.google.cloud.dataflow.sdk.transforms.PTransform}.
    *
-   * <p> Automatically invoked whenever {@code apply()} is invoked on
+   * <p>Automatically invoked whenever {@code apply()} is invoked on
    * this {@code PInput}, so users do not normally call this explicitly.
    */
   public void finishSpecifying();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
index 080b41aff8ad8..8e9ccbe34323e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
@@ -36,13 +36,13 @@ public interface POutput {
    * Expands this {@code POutput} into a list of its component output
    * {@code PValue}s.
    *
-   * <p> A {@link PValue} expands to itself.
+   * <p>A {@link PValue} expands to itself.
    *
-   * <p> A tuple or list of {@code PValue}s (e.g.,
+   * <p>A tuple or list of {@code PValue}s (e.g.,
    * {@link PCollectionTuple}, and
    * {@link PCollectionList}) expands to its component {@code PValue}s.
    *
-   * <p> Not intended to be invoked directly by user code.
+   * <p>Not intended to be invoked directly by user code.
    */
   public Collection<? extends PValue> expand();
 
@@ -50,10 +50,10 @@ public interface POutput {
    * Records that this {@code POutput} is an output of the given
    * {@code PTransform}.
    *
-   * <p> For a compound {@code POutput}, it is advised to call
+   * <p>For a compound {@code POutput}, it is advised to call
    * this method on each component {@code POutput}.
    *
-   * <p> This is not intended to be invoked by user code, but
+   * <p>This is not intended to be invoked by user code, but
    * is automatically invoked as part of applying the
    * producing {@code PTransform}.
    */
@@ -63,10 +63,10 @@ public interface POutput {
    * As part of applying the producing {@code PTransform}, finalizes this
    * output to make it ready for being used as an input and for running.
    *
-   * <p> This includes ensuring that all {@code PCollection}s
+   * <p>This includes ensuring that all {@code PCollection}s
    * have {@code Coder}s specified or defaulted.
    *
-   * <p> Automatically invoked whenever this {@code POutput} is used
+   * <p>Automatically invoked whenever this {@code POutput} is used
    * as a {@code PInput} to another {@code PTransform}, or if never
    * used as a {@code PInput}, when {@link Pipeline#run}
    * is called, so users do not normally call this explicitly.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
index 169d34814d418..3f28677b3c683 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
@@ -23,10 +23,10 @@
  * A {@code POutputValueBase} is the abstract base class of
  * {@code PTransform} outputs.
  *
- * <p> A {@code PValueBase} that adds tracking of its producing
+ * <p>A {@code PValueBase} that adds tracking of its producing
  * {@code AppliedPTransform}.
  *
- * <p> For internal use.
+ * <p>For internal use.
  */
 public abstract class POutputValueBase implements POutput {
 
@@ -54,7 +54,7 @@ public Pipeline getPipeline() {
    * Returns the {@code AppliedPTransform} that this {@code POutputValueBase}
    * is an output of.
    *
-   * <p> For internal use only.
+   * <p>For internal use only.
    */
   public AppliedPTransform<?, ?, ?> getProducingTransformInternal() {
     return producingTransform;
@@ -64,7 +64,7 @@ public Pipeline getPipeline() {
    * Records that this {@code POutputValueBase} is an output with the
    * given name of the given {@code AppliedPTransform}.
    *
-   * <p> To be invoked only by {@link POutput#recordAsOutput}
+   * <p>To be invoked only by {@link POutput#recordAsOutput}
    * implementations.  Not to be invoked directly by user code.
    */
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java
index def7ddf5de96e..5196f965f48ee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java
@@ -33,7 +33,7 @@ public interface PValue extends POutput, PInput {
    * Returns the {@code AppliedPTransform} that this {@code POutputValueBase}
    * is an output of.
    *
-   * <p> For internal use only.
+   * <p>For internal use only.
    */
   public AppliedPTransform<?, ?, ?> getProducingTransformInternal();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
index 514dad72c2f51..99cb08c7b2468 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
@@ -36,13 +36,13 @@
  * no longer be changed.
  * </ul>
  *
- * <p> For internal use.
+ * <p>For internal use.
  */
 public abstract class PValueBase extends POutputValueBase implements PValue {
   /**
    * Returns the name of this {@code PValueBase}.
    *
-   * <p> By default, the name of a {@code PValueBase} is based on the
+   * <p>By default, the name of a {@code PValueBase} is based on the
    * name of the {@code PTransform} that produces it.  It can be
    * specified explicitly by calling {@link #setName}.
    *
@@ -108,7 +108,7 @@ public void recordAsOutput(AppliedPTransform<?, ?, ?> transform) {
    * given name of the given {@code AppliedPTransform} in the given
    * {@code Pipeline}.
    *
-   * <p> To be invoked only by {@link POutput#recordAsOutput}
+   * <p>To be invoked only by {@link POutput#recordAsOutput}
    * implementations.  Not to be invoked directly by user code.
    */
   protected void recordAsOutput(AppliedPTransform<?, ?, ?> transform,
@@ -123,7 +123,7 @@ protected void recordAsOutput(AppliedPTransform<?, ?, ?> transform,
    * Returns whether this {@code PValueBase} has been finalized, and
    * its core properties, e.g., name, can no longer be changed.
    *
-   * <p> For internal use only.
+   * <p>For internal use only.
    */
   public boolean isFinishedSpecifyingInternal() {
     return finishedSpecifying;
@@ -150,7 +150,7 @@ public String toString() {
    * Returns a {@code String} capturing the kind of this
    * {@code PValueBase}.
    *
-   * <p> By default, uses the base name of this {@code PValueBase}'s
+   * <p>By default, uses the base name of this {@code PValueBase}'s
    * class as its kind string.
    */
   protected String getKindString() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
index f827b2a56bd9b..5399b04a3c8d0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
@@ -39,7 +39,7 @@
 /**
  * An immutable (value, timestamp) pair.
  *
- * <p> Used for assigning initial timestamps to values inserted into a pipeline
+ * <p>Used for assigning initial timestamps to values inserted into a pipeline
  * with {@link com.google.cloud.dataflow.sdk.transforms.Create#timestamped}.
  *
  * @param <V> the type of the value
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
index f69fe350d1b5c..508b2f432896a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
@@ -36,7 +36,7 @@
  * Its generic type parameter allows tracking
  * the static type of things stored in tuples.
  *
- * <p> To aid in assigning default {@code Coder}s for results of
+ * <p>To aid in assigning default {@code Coder}s for results of
  * side outputs of {@code ParDo}, an output
  * {@code TupleTag} should be instantiated with an extra {@code {}} so
  * it is an instance of an anonymous subclass without generic type
@@ -57,7 +57,7 @@ public class TupleTag<V> implements Serializable {
   /**
    * Constructs a new {@code TupleTag}, with a fresh unique id.
    *
-   * <p> This is the normal way {@code TupleTag}s are constructed.
+   * <p>This is the normal way {@code TupleTag}s are constructed.
    */
   public TupleTag() {
     this.id = genId();
@@ -67,7 +67,7 @@ public TupleTag() {
   /**
    * Constructs a new {@code TupleTag} with the given id.
    *
-   * <p> It is up to the user to ensure that two {@code TupleTag}s
+   * <p>It is up to the user to ensure that two {@code TupleTag}s
    * with the same id actually mean the same tag and carry the same
    * generic type parameter.  Violating this invariant can lead to
    * hard-to-diagnose runtime type errors.  Consequently, this
@@ -85,9 +85,9 @@ public TupleTag(String id) {
   /**
    * Returns the id of this {@code TupleTag}.
    *
-   * <p> Two {@code TupleTag}s with the same id are considered equal.
+   * <p>Two {@code TupleTag}s with the same id are considered equal.
    *
-   * <p> {@code TupleTag}s are not ordered, i.e., the class does not implement
+   * <p>{@code TupleTag}s are not ordered, i.e., the class does not implement
    * Comparable interface. TupleTags implement equals and hashCode, making them
    * suitable for use as keys in HashMap and HashSet.
    */
@@ -113,7 +113,7 @@ public String getOutName(int outIndex) {
    * about the type of this {@code TupleTag} instance's most-derived
    * class.
    *
-   * <p> This is useful for a {@code TupleTag} constructed as an
+   * <p>This is useful for a {@code TupleTag} constructed as an
    * instance of an anonymous subclass with a trailing {@code {}},
    * e.g., {@code new TupleTag<SomeType>(){}}.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java
index 142ec57045aab..0bf24d77a4d4c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java
@@ -29,7 +29,7 @@
  * specify the tags of the side outputs of a
  * {@link com.google.cloud.dataflow.sdk.transforms.ParDo}.
  *
- * <p> TupleTagLists can be created and accessed like follows:
+ * <p>TupleTagLists can be created and accessed like follows:
  * <pre> {@code
  * TupleTag<String> tag1 = ...;
  * TupleTag<Integer> tag2 = ...;
@@ -57,7 +57,7 @@ public class TupleTagList implements Serializable {
   /**
    * Returns an empty TupleTagList.
    *
-   * <p> Longer TupleTagLists can be created by calling
+   * <p>Longer TupleTagLists can be created by calling
    * {@link #and} on the result.
    */
   public static TupleTagList empty() {
@@ -67,7 +67,7 @@ public static TupleTagList empty() {
   /**
    * Returns a singleton TupleTagList containing the given TupleTag.
    *
-   * <p> Longer TupleTagLists can be created by calling
+   * <p>Longer TupleTagLists can be created by calling
    * {@link #and} on the result.
    */
   public static TupleTagList of(TupleTag<?> tag) {
@@ -77,7 +77,7 @@ public static TupleTagList of(TupleTag<?> tag) {
   /**
    * Returns a TupleTagList containing the given TupleTags, in order.
    *
-   * <p> Longer TupleTagLists can be created by calling
+   * <p>Longer TupleTagLists can be created by calling
    * {@link #and} on the result.
    */
   public static TupleTagList of(List<TupleTag<?>> tags) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
index 8d9e40239bf0f..9c7c7708a3357 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
@@ -29,7 +29,7 @@
 /**
  * A description of a Java type, including actual generic parameters where possible.
  *
- * <p> To prevent losing actual type arguments due to erasure, create an anonymous subclass
+ * <p>To prevent losing actual type arguments due to erasure, create an anonymous subclass
  * with concrete types:
  * <pre>
  * {@code
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java
index 09473cc340583..6a3bf003bcce0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java
@@ -18,12 +18,12 @@
  * Defines {@link com.google.cloud.dataflow.sdk.values.PCollection} and other classes for
  * representing data in a {@link com.google.cloud.dataflow.sdk.Pipeline}.
  *
- * <p> A {@link com.google.cloud.dataflow.sdk.values.PCollection} is an immutable collection of
+ * <p>A {@link com.google.cloud.dataflow.sdk.values.PCollection} is an immutable collection of
  * values of type {@code T} and is the main representation for data.
  * A {@link com.google.cloud.dataflow.sdk.values.PCollectionTuple} is a tuple of PCollections
  * used in cases where PTransforms take or return multiple PCollections.
  *
- * <p> A {@link com.google.cloud.dataflow.sdk.values.PCollectionTuple} is an immutable tuple of
+ * <p>A {@link com.google.cloud.dataflow.sdk.values.PCollectionTuple} is an immutable tuple of
  * heterogeneously-typed {@link com.google.cloud.dataflow.sdk.values.PCollection}s, "keyed" by
  * {@link com.google.cloud.dataflow.sdk.values.TupleTag}s.
  * A PCollectionTuple can be used as the input or
@@ -34,7 +34,7 @@
  * {@link com.google.cloud.dataflow.sdk.transforms.ParDo} with side
  * outputs.
  *
- * <p> A {@link com.google.cloud.dataflow.sdk.values.PCollectionView} is an immutable view of a
+ * <p>A {@link com.google.cloud.dataflow.sdk.values.PCollectionView} is an immutable view of a
  * PCollection that can be accessed from a DoFn and other user Fns
  * as a side input.
  *
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/PrintBase64Encodings.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/PrintBase64Encodings.java
index aa6a0977f822d..0f08262aa2d69 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/PrintBase64Encodings.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/PrintBase64Encodings.java
@@ -28,9 +28,9 @@
  * A command-line utility for printing the base-64 encodings of test values, for generating exact
  * wire format tests.
  *
- * <p> For internal use only.
+ * <p>For internal use only.
  *
- * <p> Example invocation via maven:
+ * <p>Example invocation via maven:
  * {@code
  *   mvn test-compile exec:java \
  *       -Dexec.classpathScope=test \
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
index 43282dc94a779..ad496a3f67bab 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
@@ -82,10 +82,10 @@ public class DatastoreIOTest {
    * Shakespeare collections (e.g. there are 172948 lines in
    * all Shakespeare files).
    *
-   * <p> The function also sets up the datastore agent by creating
+   * <p>The function also sets up the datastore agent by creating
    * a Datastore object to access the dataset shakespeareddataset.
    *
-   * <p> Note that the local server must be started to let the agent
+   * <p>Note that the local server must be started to let the agent
    * be created normally.
    */
   @Before
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
index 6cc18a9fabc32..04d17eb46acef 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
@@ -173,7 +173,7 @@ public SplitAtFractionResult(int numPrimaryItems, int numResidualItems) {
    * Asserts that the {@code source}'s reader either fails to {@code splitAtFraction(fraction)}
    * after reading {@code numItemsToReadBeforeSplit} items, or succeeds in a way that is
    * consistent according to {@link #assertSplitAtFractionSucceedsAndConsistent}.
-   * <p> Returns SplitAtFractionResult.
+   * <p>Returns SplitAtFractionResult.
    */
 
   public static <T> SplitAtFractionResult assertSplitAtFractionBehavior(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
index 3572bcbfdc3d2..5b1748c325602 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunnerTest.java
@@ -172,7 +172,7 @@ public static <T extends DataflowJobUpdatedException> Matcher<T> expectReplacedB
    * Creates a mocked {@link DataflowPipelineJob} with the given {@code projectId} and {@code jobId}
    * that will immediately terminate in the provided {@code terminalState}.
    *
-   * <p> The return value may be further mocked.
+   * <p>The return value may be further mocked.
    */
   private DataflowPipelineJob createMockJob(
       String projectId, String jobId, State terminalState) throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index fceecac9677bc..b8f9baa43c34c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -480,7 +480,7 @@ protected Coder<?> getDefaultOutputCoder() {
   /**
    * A composite transform that returns a partially bound output.
    *
-   * <p> This is not allowed and will result in a failure.
+   * <p>This is not allowed and will result in a failure.
    */
   private static class PartiallyBoundOutputCreator
       extends PTransform<PCollection<Integer>, PCollectionTuple> {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
index fa7411cfb5dbe..621e524331192 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
@@ -58,7 +58,7 @@ enum TransformsSeen {
   /**
    * INVALID TRANSFORM, DO NOT COPY.
    *
-   * <p> This is an invalid composite transform that returns unbound outputs.
+   * <p>This is an invalid composite transform that returns unbound outputs.
    * This should never happen, and is here to test that it is properly rejected.
    */
   private static class InvalidCompositeTransform
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
index 681a95781e57e..2ed4edabb2325 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
@@ -39,7 +39,7 @@
 /**
  * An unbounded source for testing the unbounded sources framework code.
  *
- * <p> Each split of this sources produces records of the form KV(split_id, i),
+ * <p>Each split of this sources produces records of the form KV(split_id, i),
  * where i counts up from 0.  Each record has a timestamp of i, and the watermark
  * accurately tracks these timestamps.  The reader will occasionally return false
  * from {@code advance}, in order to simulate a source where not all the data is
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowJUnitTestRunner.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowJUnitTestRunner.java
index f06c15a6c1129..c5b5fcacb6b67 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowJUnitTestRunner.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowJUnitTestRunner.java
@@ -44,16 +44,16 @@
  * A test runner which can invoke a series of method or class test targets configuring
  * the {@link TestPipeline} to run against the Dataflow service based upon command-line
  * parameters specified.
- * <p>
- * Supported target definitions as command line parameters are:
+ *
+ * <p>Supported target definitions as command line parameters are:
  * <ul>
  *   <li>Class: "ClassName"
  *   <li>Method: "ClassName#methodName"
  * </ul>
  * Multiple parameters can be specified in sequence, which will cause the test
  * runner to invoke the tests in the specified order.
- * <p>
- * All tests will be executed after which, if any test had failed, the runner
+ *
+ * <p>All tests will be executed after which, if any test had failed, the runner
  * will exit with a non-zero status code.
  */
 public class DataflowJUnitTestRunner {
@@ -121,7 +121,7 @@ private static final Class<?> findClass(
         Iterables.filter(classes, new Predicate<ClassPath.ClassInfo>() {
       @Override
       public boolean apply(@Nullable ClassInfo input) {
-        return simpleName.equals(input.getSimpleName());
+        return input != null && simpleName.equals(input.getSimpleName());
       }
     });
     return Class.forName(Iterables.getOnlyElement(matches).getName());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 973d5578516c8..47efcc688bd0a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -715,7 +715,7 @@ public void registerByteSizeObserver(
   /**
    * A KeyedCombineFn that exercises the full generality of [Keyed]CombineFn.
    *
-   * <p> The net result of applying this CombineFn is a sorted list of all
+   * <p>The net result of applying this CombineFn is a sorted list of all
    * characters occurring in the key and the decimal representations of
    * each value.
    */

From 79a717f544d3569678a6ec0ac336a996ac1ef639 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 17 Aug 2015 15:31:33 -0700
Subject: [PATCH 0943/1541] Fix default insertion logic in Combine

This now properly propagates all panes, with PaneInfo

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100870191
---
 .../sdk/runners/DataflowPipelineRunner.java   | 89 ++++++++++++++++---
 .../dataflow/sdk/transforms/Combine.java      | 31 +++++--
 .../dataflow/sdk/transforms/CombineTest.java  | 25 ++++++
 3 files changed, 127 insertions(+), 18 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index f34983b717bbc..6ddd162434374 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -101,6 +101,7 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -241,6 +242,7 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
     this.translator = DataflowPipelineTranslator.fromOptions(options);
 
     this.streamingOverrides = ImmutableMap.<Class<?>, Class<?>>builder()
+        .put(Combine.GloballyAsSingletonView.class, StreamingCombineGloballyAsSingletonView.class)
         .put(Create.Values.class, StreamingCreate.class)
         .put(View.AsMap.class, StreamingViewAsMap.class)
         .put(View.AsMultimap.class, StreamingViewAsMultimap.class)
@@ -862,22 +864,89 @@ public StreamingViewAsSingleton(View.AsSingleton<T> transform) {
 
     @Override
     public PCollectionView<T> apply(PCollection<T> input) {
-      PCollectionView<T> view = PCollectionViews.singletonView(
-          input.getPipeline(),
-          input.getWindowingStrategy(),
-          transform.hasDefaultValue(),
-          transform.defaultValue(),
-          input.getCoder());
-      return input
-          .apply(ParDo.of(new WrapAsList<T>()))
-          .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, input.getCoder())))
-          .apply(View.CreatePCollectionView.<T, T>of(view));
+      Combine.Globally<T, T> combine = Combine.globally(
+          new SingletonCombine<>(transform.hasDefaultValue(), transform.defaultValue()));
+      if (!transform.hasDefaultValue()) {
+        combine = combine.withoutDefaults();
+      }
+      return input.apply(combine.asSingletonView());
     }
 
     @Override
     protected String getKindString() {
       return "StreamingViewAsSingleton";
     }
+
+    private static class SingletonCombine<T> extends Combine.BinaryCombineFn<T> {
+      private static final long serialVersionUID = 0L;
+
+      private boolean hasDefaultValue;
+      private T defaultValue;
+
+      SingletonCombine(boolean hasDefaultValue, T defaultValue) {
+        this.hasDefaultValue = hasDefaultValue;
+        this.defaultValue = defaultValue;
+      }
+
+      @Override
+      public T apply(T left, T right) {
+        throw new IllegalArgumentException("PCollection with more than one element "
+            + "accessed as a singleton view. Consider using Combine.globally().asSingleton() to "
+            + "combine the PCollection into a single value");
+      }
+
+      @Override
+      public T identity() {
+        if (hasDefaultValue) {
+          return defaultValue;
+        } else {
+          throw new IllegalArgumentException(
+              "Empty PCollection accessed as a singleton view. "
+              + "Consider setting withDefault to provide a default value");
+        }
+      }
+    }
+  }
+
+  private static class StreamingCombineGloballyAsSingletonView<InputT, OutputT>
+      extends PTransform<PCollection<InputT>, PCollectionView<OutputT>> {
+    private static final long serialVersionUID = 0L;
+
+    Combine.GloballyAsSingletonView<InputT, OutputT> transform;
+
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+    public StreamingCombineGloballyAsSingletonView(
+        Combine.GloballyAsSingletonView<InputT, OutputT> transform) {
+      this.transform = transform;
+    }
+
+    @Override
+    public PCollectionView<OutputT> apply(PCollection<InputT> input) {
+      PCollection<OutputT> combined =
+          input.apply(Combine.<InputT, OutputT>globally(transform.getCombineFn())
+              .withoutDefaults()
+              .withFanout(transform.getFanout()));
+
+      PCollectionView<OutputT> view = PCollectionViews.singletonView(
+          combined.getPipeline(),
+          combined.getWindowingStrategy(),
+          transform.getInsertDefault(),
+          transform.getInsertDefault()
+            ? transform.getCombineFn().apply(Collections.<InputT>emptyList()) : null,
+          combined.getCoder());
+      return combined
+          .apply(ParDo.of(new WrapAsList<OutputT>()))
+          .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, combined.getCoder())))
+          .apply(View.CreatePCollectionView.<OutputT, OutputT>of(view));
+    }
+
+    @Override
+    protected String getKindString() {
+      return "StreamingCombineGloballyAsSingletonView";
+    }
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index cdf51de15bade..df2481e4c3529 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -1373,21 +1373,24 @@ public PCollection<OutputT> apply(PCollection<InputT> input) {
     private PCollection<OutputT> insertDefaultValueIfEmpty(PCollection<OutputT> maybeEmpty) {
       final PCollectionView<Iterable<OutputT>> maybeEmptyView = maybeEmpty.apply(
           View.<OutputT>asIterable());
-      return maybeEmpty.getPipeline()
-        .apply("CreateVoid", Create.of((Void) null).withCoder(VoidCoder.of()))
-          .apply(ParDo.named("ProduceDefault").of(
+
+      PCollection<OutputT> defaultIfEmpty = maybeEmpty.getPipeline()
+          .apply("CreateVoid", Create.of((Void) null).withCoder(VoidCoder.of()))
+          .apply(ParDo.named("ProduceDefault").withSideInputs(maybeEmptyView).of(
               new DoFn<Void, OutputT>() {
                 @Override
                 public void processElement(DoFn<Void, OutputT>.ProcessContext c) {
                   Iterator<OutputT> combined = c.sideInput(maybeEmptyView).iterator();
-                  if (combined.hasNext()) {
-                    c.output(combined.next());
-                  } else {
+                  if (!combined.hasNext()) {
                     c.output(fn.apply(Collections.<InputT>emptyList()));
                   }
                 }
-              }).withSideInputs(maybeEmptyView))
-          .setCoder(maybeEmpty.getCoder());
+              }))
+          .setCoder(maybeEmpty.getCoder())
+          .setWindowingStrategyInternal(maybeEmpty.getWindowingStrategy());
+
+      return PCollectionList.of(maybeEmpty).and(defaultIfEmpty)
+          .apply(Flatten.<OutputT>pCollections());
     }
   }
 
@@ -1456,6 +1459,18 @@ public PCollectionView<OutputT> apply(PCollection<InputT> input) {
         return combined.apply(View.<OutputT>asSingleton());
       }
     }
+
+    public int getFanout() {
+      return fanout;
+    }
+
+    public boolean getInsertDefault() {
+      return insertDefault;
+    }
+
+    public CombineFn<? super InputT, ?, OutputT> getCombineFn() {
+      return fn;
+    }
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 47efcc688bd0a..c532398a9ce41 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -230,6 +230,31 @@ public void testFixedWindowsCombine() {
     p.run();
   }
 
+  private static class FormatPaneInfo extends DoFn<Integer, String> {
+    private static final long serialVersionUID = 0L;
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(c.element() + ": " + c.pane().isLast());
+    }
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testGlobalCombineWithDefaultsAndTriggers() {
+    Pipeline p = TestPipeline.create();
+    PCollection<Integer> input = p.apply(Create.of(1, 1));
+
+    PCollection<String> output = input
+        .apply(Window.<Integer>into(new GlobalWindows())
+            .triggering(AfterPane.elementCountAtLeast(1))
+            .accumulatingFiredPanes()
+            .withAllowedLateness(new Duration(0)))
+        .apply(Sum.integersGlobally())
+        .apply(ParDo.of(new FormatPaneInfo()));
+
+    DataflowAssert.that(output).containsInAnyOrder("1: false", "2: true");
+  }
+
   @Test
   @Category(RunnableOnService.class)
   public void testSessionsCombine() {

From 10332439afedb338e28e5929af8107cfc34618fd Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Tue, 18 Aug 2015 12:01:56 -0700
Subject: [PATCH 0944/1541] Exclude Windmill Reads from StateSampler

Don't track time spent waiting for Windmill Reads as time spent in computation.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100947783
---
 .../runners/worker/AssignWindowsParDoFn.java  |  16 +--
 .../sdk/runners/worker/CombineValuesFn.java   |  16 +--
 .../worker/GroupAlsoByWindowsParDoFn.java     |  15 +--
 .../sdk/runners/worker/NormalParDoFn.java     |  16 +--
 .../sdk/runners/worker/ParDoFnBase.java       |   8 +-
 .../ReifyTimestampAndWindowsParDoFn.java      |  17 +--
 .../dataflow/sdk/transforms/DoFnTester.java   |   2 +-
 .../cloud/dataflow/sdk/transforms/ParDo.java  |   2 +-
 .../sdk/transforms/windowing/Window.java      |   2 +-
 .../sdk/util/BaseExecutionContext.java        |   8 +-
 .../sdk/util/BatchModeExecutionContext.java   |   4 +-
 .../sdk/util/DirectModeExecutionContext.java  |   4 +-
 .../dataflow/sdk/util/ExecutionContext.java   |   4 +-
 .../util/StreamingModeExecutionContext.java   |  26 ++---
 .../util/state/WindmillStateInternals.java    | 100 +++++++++++-------
 .../runners/worker/FakeWindmillServer.java    |   5 +
 .../sdk/runners/worker/NormalParDoFnTest.java |  12 ++-
 .../worker/StreamingDataflowWorkerTest.java   |  32 ++++--
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  |   2 +-
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |   8 +-
 .../StreamingModeExecutionContextTest.java    |   3 +-
 .../state/WindmillStateInternalsTest.java     |   8 +-
 22 files changed, 201 insertions(+), 109 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
index ca4f1e8f08e0d..43492792ce221 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
@@ -51,10 +51,11 @@ static AssignWindowsParDoFn of(
       String stepName,
       String transformName,
       DataflowExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator)
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler)
       throws Exception {
     return new AssignWindowsParDoFn(
-        options, fn, stepName, transformName, executionContext, addCounterMutator);
+        options, fn, stepName, transformName, executionContext, addCounterMutator, stateSampler);
   }
 
   /**
@@ -73,7 +74,7 @@ public ParDoFn create(
         int numOutputs,
         DataflowExecutionContext executionContext,
         CounterSet.AddCounterMutator addCounterMutator,
-        StateSampler stateSampler /* ignored */)
+        StateSampler stateSampler)
             throws Exception {
 
       final Object deserializedWindowingStrategy =
@@ -101,7 +102,8 @@ public ParDoFn create(
           stepName,
           transformName,
           executionContext,
-          addCounterMutator);
+          addCounterMutator,
+          stateSampler);
     }
   }
 
@@ -118,7 +120,8 @@ private AssignWindowsParDoFn(
       String stepName,
       String transformName,
       DataflowExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator) {
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler) {
     super(
         options,
         NullSideInputReader.empty(),
@@ -126,7 +129,8 @@ private AssignWindowsParDoFn(
         stepName,
         transformName,
         executionContext,
-        addCounterMutator);
+        addCounterMutator,
+        stateSampler);
     this.fn = fn;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
index 120f21d9fbaf7..ad35f49d08fb6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
@@ -69,10 +69,11 @@ static CombineValuesFn of(
       String stepName,
       String transformName,
       DataflowExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator)
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler)
       throws Exception {
-    return new CombineValuesFn(
-        options, combineFn, phase, stepName, transformName, executionContext, addCounterMutator);
+    return new CombineValuesFn(options, combineFn, phase, stepName, transformName, executionContext,
+        addCounterMutator, stateSampler);
   }
 
   /**
@@ -118,7 +119,8 @@ public ParDoFn create(
           stepName,
           transformName,
           executionContext,
-          addCounterMutator);
+          addCounterMutator,
+          stateSampler);
     }
   }
 
@@ -155,7 +157,8 @@ private CombineValuesFn(
       String stepName,
       String transformName,
       DataflowExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator) {
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler) {
     super(
         options,
         NullSideInputReader.empty(),
@@ -163,7 +166,8 @@ private CombineValuesFn(
         stepName,
         transformName,
         executionContext,
-        addCounterMutator);
+        addCounterMutator,
+        stateSampler);
     this.phase = phase;
     this.combineFn = combineFn;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index a668d31bba638..8bdf00e9c3d86 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -68,10 +68,11 @@ static GroupAlsoByWindowsParDoFn of(
       String stepName,
       String transformName,
       DataflowExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator)
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler)
       throws Exception {
     return new GroupAlsoByWindowsParDoFn(options, groupAlsoByWindowsDoFn, stepName, transformName,
-        executionContext, addCounterMutator);
+        executionContext, addCounterMutator, stateSampler);
   }
 
   /**
@@ -158,7 +159,8 @@ public ParDoFn create(
           stepName,
           transformName,
           executionContext,
-          addCounterMutator);
+          addCounterMutator,
+          stateSampler);
     }
   }
 
@@ -173,7 +175,6 @@ public ParDoFn create(
       WindowingStrategy windowingStrategy,
       KvCoder kvCoder,
       @Nullable AppliedCombineFn maybeMergingCombineFn) {
-
     if (isStreamingPipeline) {
       if (maybeMergingCombineFn == null) {
         return StreamingGroupAlsoByWindowsDoFn.createForIterable(
@@ -257,7 +258,8 @@ private GroupAlsoByWindowsParDoFn(
       String stepName,
       String transformName,
       DataflowExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator) {
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler) {
     super(
         options,
         NullSideInputReader.empty(),
@@ -265,7 +267,8 @@ private GroupAlsoByWindowsParDoFn(
         stepName,
         transformName,
         executionContext,
-        addCounterMutator);
+        addCounterMutator,
+        stateSampler);
     this.groupAlsoByWindowsDoFn = groupAlsoByWindowsDoFn;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
index 3bb9eb6f729d6..2df5a1e8ce91d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
@@ -56,7 +56,8 @@ static NormalParDoFn of(
       String stepName,
       String transformName,
       DataflowExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator) {
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler) {
     return new NormalParDoFn(
         options,
         doFnInfo,
@@ -65,7 +66,8 @@ static NormalParDoFn of(
         stepName,
         transformName,
         executionContext,
-        addCounterMutator);
+        addCounterMutator,
+        stateSampler);
   }
 
   /**
@@ -84,7 +86,7 @@ public ParDoFn create(
         int numOutputs,
         DataflowExecutionContext executionContext,
         CounterSet.AddCounterMutator addCounterMutator,
-        StateSampler stateSampler /* ignored */)
+        StateSampler stateSampler)
             throws Exception {
       Object deserializedFnInfo =
           SerializableUtils.deserializeFromByteArray(
@@ -141,7 +143,8 @@ public ParDoFn create(
           stepName,
           transformName,
           executionContext,
-          addCounterMutator);
+          addCounterMutator,
+          stateSampler);
     }
   }
 
@@ -156,9 +159,10 @@ private NormalParDoFn(
       String stepName,
       String transformName,
       ExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator) {
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler) {
     super(options, sideInputReader, outputTags, stepName, transformName, executionContext,
-        addCounterMutator);
+        addCounterMutator, stateSampler);
     // The userDoFn is serialized because a fresh copy is provided each time it is accessed.
     this.serializedDoFn = SerializableUtils.serializeToByteArray(doFnInfo.getDoFn());
     this.doFnInfo = doFnInfo;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
index e49b4a5409f20..28dd5cfe36664 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
@@ -33,6 +33,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.OutputReceiver;
 import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
 import com.google.cloud.dataflow.sdk.util.common.worker.Receiver;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Throwables;
@@ -61,6 +62,7 @@ public abstract class ParDoFnBase implements ParDoFn {
   private final String transformName;
   private final ExecutionContext executionContext;
   private final CounterSet.AddCounterMutator addCounterMutator;
+  private final StateSampler stateSampler;
 
   /** The DoFnRunner executing a batch. Null between batches. */
   private DoFnRunner<Object, Object> fnRunner;
@@ -79,7 +81,8 @@ protected ParDoFnBase(
       String stepName,
       String transformName,
       ExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator) {
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler) {
     this.options = options;
 
     // We vend a freshly deserialized version for each run
@@ -98,6 +101,7 @@ protected ParDoFnBase(
     this.transformName = transformName;
     this.executionContext = executionContext;
     this.addCounterMutator = addCounterMutator;
+    this.stateSampler = stateSampler;
   }
 
   /**
@@ -114,7 +118,7 @@ public void startBundle(final Receiver... receivers) throws Exception {
 
     StepContext stepContext = null;
     if (executionContext != null) {
-      stepContext = executionContext.getStepContext(stepName, transformName);
+      stepContext = executionContext.getOrCreateStepContext(stepName, transformName, stateSampler);
     }
 
     @SuppressWarnings("unchecked")
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
index 70ccaa641870a..c5edfefebbdaa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
@@ -45,11 +45,12 @@ static ReifyTimestampAndWindowsParDoFn of(
       String stepName,
       String transformName,
       DataflowExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator)
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler)
       throws Exception {
 
     return new ReifyTimestampAndWindowsParDoFn(
-        options, fn, stepName, transformName, executionContext, addCounterMutator);
+        options, fn, stepName, transformName, executionContext, addCounterMutator, stateSampler);
   }
 
   /**
@@ -68,7 +69,7 @@ public ParDoFn create(
         int numOutputs,
         DataflowExecutionContext executionContext,
         CounterSet.AddCounterMutator addCounterMutator,
-        StateSampler stateSampler /* ignored */)
+        StateSampler stateSampler)
             throws Exception {
 
       final ReifyTimestampAndWindowsDoFn<Object, Object> fn =
@@ -80,7 +81,8 @@ public ParDoFn create(
           stepName,
           transformName,
           executionContext,
-          addCounterMutator);
+          addCounterMutator,
+          stateSampler);
     }
   }
 
@@ -97,8 +99,8 @@ private ReifyTimestampAndWindowsParDoFn(
       String stepName,
       String transformName,
       ExecutionContext executionContext,
-      AddCounterMutator addCounterMutator) {
-
+      AddCounterMutator addCounterMutator,
+      StateSampler stateSampler) {
     super(
         options,
         NullSideInputReader.empty(),
@@ -106,7 +108,8 @@ private ReifyTimestampAndWindowsParDoFn(
         stepName,
         transformName,
         executionContext,
-        addCounterMutator);
+        addCounterMutator,
+        stateSampler);
     this.fn = fn;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index 73a526ce585f9..c50b3d373351e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -372,7 +372,7 @@ void initializeState() {
         outputManager,
         mainOutputTag,
         sideOutputTags,
-        DirectModeExecutionContext.create().createStepContext("stepName", "stepName"),
+        DirectModeExecutionContext.create().createStepContext("stepName", "stepName", null),
         counterSet.getAddCounterMutator(),
         WindowingStrategy.globalDefault());
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 0d7d1a942c6d6..a20ab167c8105 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -1126,7 +1126,7 @@ private static <InputT, OutputT, ActualInputT extends InputT> void evaluateHelpe
             outputManager,
             mainOutputTag,
             sideOutputTags,
-            executionContext.getStepContext(stepName, stepName),
+            executionContext.getOrCreateStepContext(stepName, stepName, null),
             context.getAddCounterMutator(),
             input.getWindowingStrategy());
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 32506c88ba154..c3e650f9fde84 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -656,7 +656,7 @@ private static <T, W extends BoundedWindow> void evaluateHelper(
             new DoFnRunner.ListOutputManager(),
             outputTag,
             new ArrayList<TupleTag<?>>(),
-            executionContext.getStepContext(name, name),
+            executionContext.getOrCreateStepContext(name, name, null),
             context.getAddCounterMutator(),
             context.getOutput(transform).getWindowingStrategy());
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java
index 96f8a17d1869a..f4862decdddc8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
@@ -42,17 +43,18 @@ public abstract class BaseExecutionContext implements ExecutionContext {
    * of {@link StepContext} they need.
    */
   protected abstract ExecutionContext.StepContext createStepContext(
-      String stepName, String transformName);
+      String stepName, String transformName, StateSampler stateSampler);
 
 
   /**
    * Returns the {@link StepContext} associated with the given step.
    */
   @Override
-  public ExecutionContext.StepContext getStepContext(String stepName, String transformName) {
+  public ExecutionContext.StepContext getOrCreateStepContext(
+      String stepName, String transformName, StateSampler stateSampler) {
     ExecutionContext.StepContext context = cachedStepContexts.get(stepName);
     if (context == null) {
-      context = createStepContext(stepName, transformName);
+      context = createStepContext(stepName, transformName, stateSampler);
       cachedStepContexts.put(stepName, context);
     }
     return context;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
index 4a6405fd8be43..1f74ee7ac4d40 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 
@@ -36,7 +37,8 @@ public BatchModeExecutionContext() { }
    * Create a new {@link ExecutionContext.StepContext}.
    */
   @Override
-  public ExecutionContext.StepContext createStepContext(String stepName, String transformName) {
+  public ExecutionContext.StepContext createStepContext(
+      String stepName, String transformName, StateSampler stateSampler) {
     return new StepContext(stepName, transformName);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
index 8eda365f9e53d..ee221776d45f7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -42,7 +43,8 @@ public static DirectModeExecutionContext create() {
   }
 
   @Override
-  public ExecutionContext.StepContext createStepContext(String stepName, String transformName) {
+  public ExecutionContext.StepContext createStepContext(
+      String stepName, String transformName, StateSampler stateSampler) {
     return new StepContext(stepName, transformName);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
index 363506fa61ecb..f1727564d0ebf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
@@ -32,7 +33,8 @@ public interface ExecutionContext {
   /**
    * Returns the {@link StepContext} associated with the given step.
    */
-  StepContext getStepContext(String stepName, String transformName);
+  StepContext getOrCreateStepContext(
+      String stepName, String transformName, StateSampler stateSampler);
 
   /**
    * Returns a collection view of all of the {@link StepContext}s.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index b5e104a7523e5..41843c1aea4e6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.util.state.WindmillStateInternals;
 import com.google.cloud.dataflow.sdk.util.state.WindmillStateReader;
@@ -87,8 +88,9 @@ public void start(
   }
 
   @Override
-  public ExecutionContext.StepContext createStepContext(String stepName, String transformName) {
-    StepContext context = new StepContext(stepName, transformName);
+  public ExecutionContext.StepContext createStepContext(
+      String stepName, String transformName, StateSampler stateSampler) {
+    StepContext context = new StepContext(stepName, transformName, stateSampler);
     context.start(stateReader, inputDataWatermark);
     return context;
   }
@@ -304,26 +306,25 @@ private Windmill.Timer.Type timerType(TimeDomain domain) {
     }
   }
 
-
   class StepContext extends BaseExecutionContext.StepContext {
     private WindmillStateInternals stateInternals;
     private WindmillTimerInternals timerInternals;
-    private String prefix;
-    private String stateFamily;
+    private final String prefix;
+    private final String stateFamily;
+    private final StateSampler stateSampler;
 
-    public StepContext(String stepName, String transformName) {
+    public StepContext(String stepName, String transformName, StateSampler stateSampler) {
       super(StreamingModeExecutionContext.this, stepName, transformName);
 
       if (stateNameMap.isEmpty()) {
         this.prefix = transformName;
         this.stateFamily = "";
       } else {
-        this.prefix = stateNameMap.get(transformName);
-        if (prefix == null) {
-          this.prefix = "";
-        }
+        String mappedName = stateNameMap.get(transformName);
+        this.prefix = mappedName == null ? "" : mappedName;
         this.stateFamily = prefix;
       }
+      this.stateSampler = stateSampler;
     }
 
     /**
@@ -331,7 +332,9 @@ public StepContext(String stepName, String transformName) {
      */
     public void start(WindmillStateReader stateReader, Instant inputDataWatermark) {
       boolean useStateFamilies = !stateNameMap.isEmpty();
-      this.stateInternals = new WindmillStateInternals(prefix, useStateFamilies, stateReader);
+      this.stateInternals =
+          new WindmillStateInternals(
+              prefix, useStateFamilies, stateReader, stateSampler, getStepName());
       this.timerInternals = new WindmillTimerInternals(
           stateFamily, Preconditions.checkNotNull(inputDataWatermark));
     }
@@ -401,7 +404,6 @@ public void addBlockingSideInputs(Iterable<Windmill.GlobalDataRequest> sideInput
       }
     }
 
-
     @Override
     public StateInternals stateInternals() {
       return Preconditions.checkNotNull(stateInternals);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
index 6cb64613bcf6f..78fa929340266 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
@@ -18,7 +18,9 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
+import com.google.common.base.Supplier;
 import com.google.common.collect.Iterables;
 import com.google.common.util.concurrent.Futures;
 import com.google.protobuf.ByteString;
@@ -39,45 +41,51 @@
  */
 public class WindmillStateInternals extends MergingStateInternals {
 
-  private final StateTable inMemoryState = new StateTable() {
-    @Override
-    protected StateBinder binderForNamespace(final StateNamespace namespace) {
-      return new StateBinder() {
+  private final StateTable inMemoryState =
+      new StateTable() {
         @Override
-        public <T> BagState<T> bindBag(StateTag<BagState<T>> address, Coder<T> elemCoder) {
-          return new WindmillBag<>(encodeKey(namespace, address), stateFamily, elemCoder, reader);
-        }
+        protected StateBinder binderForNamespace(final StateNamespace namespace) {
+          return new StateBinder() {
+            @Override
+            public <T> BagState<T> bindBag(StateTag<BagState<T>> address, Coder<T> elemCoder) {
+              return new WindmillBag<>(encodeKey(namespace, address), stateFamily, elemCoder,
+                  reader, scopedReadStateSupplier);
+            }
 
-        @Override
-        public <T> WatermarkStateInternal bindWatermark(
-            StateTag<WatermarkStateInternal> address) {
-          return new WindmillWatermarkState(encodeKey(namespace, address), stateFamily, reader);
-        }
+            @Override
+            public <T> WatermarkStateInternal bindWatermark(
+                StateTag<WatermarkStateInternal> address) {
+              return new WindmillWatermarkState(
+                  encodeKey(namespace, address), stateFamily, reader, scopedReadStateSupplier);
+            }
 
-        @Override
-        public <InputT, AccumT, OutputT>
-        CombiningValueStateInternal<InputT, AccumT, OutputT> bindCombiningValue(
-            StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
-            Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
-          return new WindmillCombiningValue<>(
-              encodeKey(namespace, address), stateFamily, accumCoder, combineFn, reader);
-        }
+            @Override
+            public <InputT, AccumT, OutputT>
+                CombiningValueStateInternal<InputT, AccumT, OutputT> bindCombiningValue(
+                    StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
+                    Coder<AccumT> accumCoder,
+                    CombineFn<InputT, AccumT, OutputT> combineFn) {
+              return new WindmillCombiningValue<>(encodeKey(namespace, address), stateFamily,
+                  accumCoder, combineFn, reader, scopedReadStateSupplier);
+            }
 
-        @Override
-        public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> coder) {
-          return new WindmillValue<>(encodeKey(namespace, address), stateFamily, coder, reader);
+            @Override
+            public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> coder) {
+              return new WindmillValue<>(encodeKey(namespace, address), stateFamily, coder, reader,
+                  scopedReadStateSupplier);
+            }
+          };
         }
       };
-    }
-  };
 
   private final String prefix;
   private final String stateFamily;
   private final WindmillStateReader reader;
   private final boolean useStateFamilies;
+  private final Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
 
   public WindmillStateInternals(String prefix, boolean useStateFamilies,
-      WindmillStateReader reader) {
+      WindmillStateReader reader, final StateSampler stateSampler, final String stepName) {
     this.prefix = prefix;
     if (useStateFamilies) {
       this.stateFamily = prefix;
@@ -86,6 +94,20 @@ public WindmillStateInternals(String prefix, boolean useStateFamilies,
     }
     this.reader = reader;
     this.useStateFamilies = useStateFamilies;
+    this.scopedReadStateSupplier = new Supplier<StateSampler.ScopedState>() {
+      private int readState = -1;
+
+      @Override
+      public StateSampler.ScopedState get() {
+        if (stateSampler == null) {
+          return null;
+        }
+        if (readState == -1) {
+          readState = stateSampler.stateForName(stepName + "-windmill-read");
+        }
+        return stateSampler.scopedState(readState);
+      }
+    };
   }
 
   public void persist(final Windmill.WorkItemCommitRequest.Builder commitBuilder) {
@@ -143,17 +165,19 @@ private static class WindmillValue<T> implements ValueState<T>, WindmillState {
     private final String stateFamily;
     private final Coder<T> coder;
     private final WindmillStateReader reader;
+    private final Supplier<StateSampler.ScopedState> readStateSupplier;
 
     /** Whether we've modified the value since creation of this state. */
     private boolean modified = false;
     private T modifiedValue;
 
     private WindmillValue(ByteString stateKey, String stateFamily, Coder<T> coder,
-        WindmillStateReader reader) {
+        WindmillStateReader reader, Supplier<StateSampler.ScopedState> readStateSupplier) {
       this.stateKey = stateKey;
       this.stateFamily = stateFamily;
       this.coder = coder;
       this.reader = reader;
+      this.readStateSupplier = readStateSupplier;
     }
 
     @Override
@@ -169,7 +193,7 @@ public StateContents<T> get() {
       return new StateContents<T>() {
         @Override
         public T read() {
-          try {
+          try (StateSampler.ScopedState scope = readStateSupplier.get()) {
             return modified ? modifiedValue : future.get();
           } catch (InterruptedException | ExecutionException e) {
             throw new RuntimeException("Unable to read value from state", e);
@@ -218,16 +242,18 @@ private static class WindmillBag<T> implements BagState<T>, WindmillState {
     private final String stateFamily;
     private final Coder<T> elemCoder;
     private final WindmillStateReader reader;
+    private final Supplier<StateSampler.ScopedState> readStateSupplier;
 
     private boolean cleared = false;
     private final List<T> localAdditions = new ArrayList<>();
 
     private WindmillBag(ByteString stateKey, String stateFamily, Coder<T> elemCoder,
-        WindmillStateReader reader) {
+        WindmillStateReader reader, Supplier<StateSampler.ScopedState> readStateSupplier) {
       this.stateKey = stateKey;
       this.stateFamily = stateFamily;
       this.elemCoder = elemCoder;
       this.reader = reader;
+      this.readStateSupplier = readStateSupplier;
     }
 
     @Override
@@ -248,7 +274,7 @@ public StateContents<Iterable<T>> get() {
       return new StateContents<Iterable<T>>() {
         @Override
         public Iterable<T> read() {
-          try {
+          try (StateSampler.ScopedState scope = readStateSupplier.get()) {
             // We need to check cleared again, because it may have become clear in between creating
             // the future and calling read.
             Iterable<T> input = cleared ? Collections.<T>emptyList() : persistedData.get();
@@ -272,7 +298,7 @@ public StateContents<Boolean> isEmpty() {
       return new StateContents<Boolean>() {
         @Override
         public Boolean read() {
-          try {
+          try (StateSampler.ScopedState scope = readStateSupplier.get()) {
             // We need to check cleared again, because it may have become clear in between creating
             // the future and calling read.
             Iterable<T> input = cleared ? Collections.<T>emptyList() : persistedData.get();
@@ -329,15 +355,17 @@ private static class WindmillWatermarkState implements WatermarkStateInternal, W
     private final ByteString stateKey;
     private final String stateFamily;
     private final WindmillStateReader reader;
+    private final Supplier<StateSampler.ScopedState> readStateSupplier;
 
     private boolean cleared = false;
     private Instant localAdditions = null;
 
     private WindmillWatermarkState(ByteString stateKey, String stateFamily,
-        WindmillStateReader reader) {
+        WindmillStateReader reader, Supplier<StateSampler.ScopedState> readStateSupplier) {
       this.stateKey = stateKey;
       this.stateFamily = stateFamily;
       this.reader = reader;
+      this.readStateSupplier = readStateSupplier;
     }
 
     @Override
@@ -360,7 +388,7 @@ public StateContents<Instant> get() {
         public Instant read() {
           Instant value = localAdditions;
           if (!cleared) {
-            try {
+          try (StateSampler.ScopedState scope = readStateSupplier.get()) {
               Instant persisted = persistedData.get();
               if (value == null || (persisted != null && persisted.isBefore(value))) {
                 value = persisted;
@@ -386,7 +414,7 @@ public StateContents<Boolean> isEmpty() {
       return new StateContents<Boolean>() {
         @Override
         public Boolean read() {
-          try {
+          try (StateSampler.ScopedState scope = readStateSupplier.get()) {
             return localAdditions == null && (cleared || persistedData.get() == null);
           } catch (InterruptedException | ExecutionException e) {
             throw new RuntimeException("Unable to read state", e);
@@ -438,8 +466,8 @@ private static class WindmillCombiningValue<InputT, AccumT, OutputT>
     private WindmillCombiningValue(ByteString stateKey, String stateFamily,
         Coder<AccumT> accumCoder,
         CombineFn<InputT, AccumT, OutputT> combineFn,
-        WindmillStateReader reader) {
-      this.bag = new WindmillBag<>(stateKey, stateFamily, accumCoder, reader);
+        WindmillStateReader reader, Supplier<StateSampler.ScopedState> readStateSupplier) {
+      this.bag = new WindmillBag<>(stateKey, stateFamily, accumCoder, reader, readStateSupplier);
       this.combineFn = combineFn;
       this.localAdditionsAccum = combineFn.createAccumulator();
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java
index d258d331108bf..fb4861c151ff5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java
@@ -73,6 +73,11 @@ public Windmill.GetDataResponse getData(Windmill.GetDataRequest request) {
     Windmill.GetDataResponse response = dataToOffer.poll();
     if (response == null) {
       response = Windmill.GetDataResponse.newBuilder().build();
+    } else {
+      try {
+        // Sleep for a little bit to ensure that *-windmill-read state-sampled counters show up.
+        Thread.sleep(500);
+      } catch (InterruptedException e) {}
     }
     return response;
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
index 6a61ea9adb8a8..65472f4e6eb00 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
@@ -163,7 +163,8 @@ public void testNormalParDoFn() throws Exception {
         "doFn",
         "doFn",
         DataflowExecutionContext.withoutSideInputs(),
-        (new CounterSet()).getAddCounterMutator());
+        (new CounterSet()).getAddCounterMutator(),
+        null);
 
     normalParDoFn.startBundle(receiver, receiver1, receiver2, receiver3);
 
@@ -226,7 +227,8 @@ public void testUnexpectedNumberOfReceivers() throws Exception {
         "doFn",
         "doFn",
         DataflowExecutionContext.withoutSideInputs(),
-        (new CounterSet()).getAddCounterMutator());
+        (new CounterSet()).getAddCounterMutator(),
+        null);
 
     try {
       normalParDoFn.startBundle();
@@ -269,7 +271,8 @@ public void testErrorPropagation() throws Exception {
         "doFn",
         "doFn",
         DataflowExecutionContext.withoutSideInputs(),
-        (new CounterSet()).getAddCounterMutator());
+        (new CounterSet()).getAddCounterMutator(),
+        null);
 
     try {
       normalParDoFn.startBundle(receiver);
@@ -338,7 +341,8 @@ public void testUndeclaredSideOutputs() throws Exception {
         "doFn",
         "doFn",
         DataflowExecutionContext.withoutSideInputs(),
-        counters.getAddCounterMutator());
+        counters.getAddCounterMutator(),
+        null);
 
     normalParDoFn.startBundle(new TestReceiver(), new TestReceiver());
     normalParDoFn.processElement(WindowedValue.valueInGlobalWindow(5));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 5e7e53de15cf0..5a7a0469f87c3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -23,6 +23,7 @@
 import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
@@ -898,12 +899,31 @@ public void testMergeWindows() throws Exception {
             .setEndTimestamp(Long.MAX_VALUE)
             .build())));
 
-    assertThat(actualOutput.getWatermarkHoldsList(), Matchers.contains(
-        Matchers.equalTo(Windmill.WatermarkHold.newBuilder()
-            .setTag(watermarkHoldTag)
-            .setStateFamily(stateFamily)
-            .setReset(true)
-            .build())));
+    assertThat(
+        actualOutput.getWatermarkHoldsList(),
+        Matchers.contains(
+            Matchers.equalTo(
+                Windmill.WatermarkHold.newBuilder()
+                    .setTag(watermarkHoldTag)
+                    .setStateFamily(stateFamily)
+                    .setReset(true)
+                    .build())));
+
+    Windmill.Counter actualReadCounter =
+        getCounter(
+            actualOutput.getCounterUpdatesList(),
+            "computation-MergeWindows-System-windmill-read-msecs");
+    assertNotNull(actualReadCounter);
+    assertThat(actualReadCounter.getIntScalar(), Matchers.greaterThan(0L));
+  }
+
+  private static Windmill.Counter getCounter(List<Windmill.Counter> counters, String name) {
+    for (Windmill.Counter counter : counters) {
+      if (counter.getName().equals(name)) {
+        return counter;
+      }
+    }
+    return null;
   }
 
   static class PrintFn extends DoFn<ValueWithRecordId<KV<Integer, Integer>>, String> {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index 9ea2d6496b4f0..f8656888bf3bf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -417,7 +417,7 @@ public class GroupAlsoByWindowsDoFnTest {
         outputManager,
         outputTag,
         new ArrayList<TupleTag<?>>(),
-        execContext.getStepContext("merge", "merge"),
+        execContext.getOrCreateStepContext("merge", "merge", null),
         counters.getAddCounterMutator(),
         windowingStrategy);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
index ed3ce999df2d8..12a4b1e84852e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -35,6 +35,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -71,9 +72,10 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
       // StreamingGroupAlsoByWindows expects it to. So, hook that up.
 
       @Override
-      public ExecutionContext.StepContext createStepContext(String stepName, String transformName) {
+      public ExecutionContext.StepContext createStepContext(
+          String stepName, String transformName, StateSampler stateSampler) {
         ExecutionContext.StepContext context =
-            Mockito.spy(super.createStepContext(stepName, transformName));
+            Mockito.spy(super.createStepContext(stepName, transformName, stateSampler));
         Mockito.doReturn(mockTimerInternals).when(context).timerInternals();
         return context;
       }
@@ -439,7 +441,7 @@ DoFnRunner<TimerOrElement<KV<String, InputT>>, KV<String, OutputT>> makeRunner(
             outputManager,
             outputTag,
             new ArrayList<TupleTag<?>>(),
-            execContext.getStepContext("merge", "merge"),
+            execContext.getOrCreateStepContext("merge", "merge", null),
             counters.getAddCounterMutator(),
             windowingStrategy);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
index 1a39618b53c35..a26fee2031abb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
@@ -71,7 +71,8 @@ public void testTimerInternalsSetTimer() {
     Windmill.WorkItemCommitRequest.Builder outputBuilder =
         Windmill.WorkItemCommitRequest.newBuilder();
     executionContext.start(null, new Instant(1000), stateReader, outputBuilder);
-    ExecutionContext.StepContext step = executionContext.getStepContext("step", "transform");
+    ExecutionContext.StepContext step =
+        executionContext.getOrCreateStepContext("step", "transform", null);
 
     TimerInternals timerInternals = step.timerInternals();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
index 9122afab56a59..433ffc3d97a4e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
@@ -76,7 +76,7 @@ private ByteString key(String prefix, StateNamespace namespace, String addrId) {
   @Before
   public void setUp() {
     MockitoAnnotations.initMocks(this);
-    underTest = new WindmillStateInternals(STATE_FAMILY, true, mockReader);
+    underTest = new WindmillStateInternals(STATE_FAMILY, true, mockReader, null, "StepName");
   }
 
   private <T> void waitAndSet(
@@ -245,7 +245,7 @@ public void testBagPersistEmpty() throws Exception {
 
   @Test
   public void testBagNoStateFamilies() throws Exception {
-    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader);
+    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, null, "StepName");
 
     StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
     BagState<String> bag = underTest.state(NAMESPACE, addr);
@@ -543,7 +543,7 @@ public void testWatermarkPersistEmpty() throws Exception {
 
   @Test
   public void testWatermarkNoStateFamilies() throws Exception {
-    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader);
+    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, null, "StepName");
 
     StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal("watermark");
     WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
@@ -653,7 +653,7 @@ public void testValueNoChangePersist() throws Exception {
 
   @Test
   public void testValueNoStateFamilies() throws Exception {
-    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader);
+    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, null, "StepName");
 
     StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
     ValueState<String> value = underTest.state(NAMESPACE, addr);

From a3378bd51fceb4c8e755d534cc990ff0f83379fc Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 18 Aug 2015 13:03:48 -0700
Subject: [PATCH 0945/1541] Add the starter pipeline and pom as resources for
 examples

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100953390
---
 examples/pom.xml | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/examples/pom.xml b/examples/pom.xml
index bef0694f82a42..07d60a5b8e24e 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -50,6 +50,17 @@
   </profiles>
 
   <build>
+    <resources>
+      <resource>
+        <directory>../eclipse</directory>
+        <includes>
+          <!-- source for the starter pipeline -->
+          <include>starter/**/*.java</include>
+          <include>starter/**/pom.xml</include>
+        </includes>
+      </resource>
+    </resources>
+
     <plugins>
       <plugin>
         <artifactId>maven-compiler-plugin</artifactId>
@@ -76,6 +87,7 @@
           <consoleOutput>true</consoleOutput>
           <failOnViolation>true</failOnViolation>
           <includeTestSourceDirectory>true</includeTestSourceDirectory>
+          <includeResources>false</includeResources>
         </configuration>
         <executions>
           <execution>

From c726392135fe28e2dbd2921f21056b8140475114 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 18 Aug 2015 13:11:38 -0700
Subject: [PATCH 0946/1541] Cleanup Javadoc errors

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100954141
---
 .../sdk/runners/worker/ChunkingShuffleBatchReader.java      | 2 +-
 .../cloud/dataflow/sdk/runners/worker/SideInputUtils.java   | 3 ++-
 .../java/com/google/cloud/dataflow/sdk/util/ApiSurface.java | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/CoderUtils.java | 2 +-
 .../google/cloud/dataflow/sdk/util/ExecutionContext.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/ReduceFn.java   | 2 +-
 .../cloud/dataflow/sdk/util/common/worker/ParDoFn.java      | 2 +-
 .../com/google/cloud/dataflow/sdk/io/SourceTestUtils.java   | 6 +++---
 .../sdk/runners/worker/DataflowSideInputReaderTest.java     | 6 +++---
 .../cloud/dataflow/sdk/util/MutationDetectorsTest.java      | 2 +-
 .../google/cloud/dataflow/sdk/util/TriggerExecutorTest.java | 2 +-
 11 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java
index 84c62aee848ba..5ee398d1b12e4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java
@@ -80,7 +80,7 @@ static ShuffleEntry getShuffleEntry(DataInputStream input) throws IOException {
   /**
    * Extracts a length-prefix-encoded byte array from a given InputStream.
    *
-   * @param input stream to read from
+   * @param dataInputStream stream to read from
    * @return parsed byte array
    */
   static byte[] getFixedLengthPrefixedByteArray(DataInputStream dataInputStream)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
index 6e673fd161095..f76a2c4369ec8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
@@ -43,9 +43,10 @@ public class SideInputUtils {
 
   /**
    * Reads the given side input, producing the contents associated
-   * with a a {@link PCollectionView}.
+   * with a {@code PCollectionView}.
    *
    * @throws Exception anything thrown by the delegate {@link Reader}
+   * @see com.google.cloud.dataflow.sdk.values.PCollectionView
    */
   public static Object readSideInput(
       PipelineOptions options,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
index 4c74684bf4e4e..98131d732f7df 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
@@ -160,7 +160,7 @@ public ApiSurface pruningPattern(Pattern pattern) {
   }
 
   /**
-   * See {@link #pruningPattern(Pattern).
+   * See {@link #pruningPattern(Pattern)}.
    */
   public ApiSurface pruningPattern(String patternString) {
     return pruningPattern(Pattern.compile(patternString));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
index c5273e492da47..4ddb67a62d28b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
@@ -196,7 +196,7 @@ public static CloudObject makeCloudEncoding(
   }
 
   /**
-   * A {@link com.fasterxml.jackson.databind.module.Module} that adds the type
+   * A {@link com.fasterxml.jackson.databind.Module} that adds the type
    * resolver needed for Coder definitions created by the Dataflow service.
    */
   @SuppressWarnings("serial")
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
index f1727564d0ebf..77c66b29a4e58 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
@@ -90,7 +90,7 @@ public interface StepContext {
     void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output);
 
     /**
-     * Writes the given {@link PCollectionView} data to a globally accessible location.
+     * Writes the given {@code PCollectionView} data to a globally accessible location.
      */
     <T, W extends BoundedWindow> void writePCollectionViewData(
         TupleTag<?> tag,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
index 78d20aff70b71..0744b9791df9d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
@@ -182,7 +182,7 @@ public abstract class OnTriggerContext extends Context {
   public abstract void onTrigger(OnTriggerContext c) throws Exception;
 
   /**
-   * Called before {@link onTrigger} is invoked to provide an opportunity to prefetch any needed
+   * Called before {@link #onTrigger} is invoked to provide an opportunity to prefetch any needed
    * state.
    *
    * @param c Context to use prefetch from.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java
index 22656b238e907..828bc9e9718f9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java
@@ -20,7 +20,7 @@
  * Interface for functions invocable by {@link ParDoOperation} instances.
  *
  * <p>To easily create a {@link ParDoFn} implementation with default setup, processing,
- * and teardown, extend {@link ParDoFnBase}.
+ * and teardown, extend {@code ParDoFnBase}.
  */
 public interface ParDoFn {
   public void startBundle(Receiver... receivers) throws Exception;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
index 04d17eb46acef..2eb2a877aae6b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
@@ -37,11 +37,11 @@
 public class SourceTestUtils {
   /**
    * Testing utilities below depend on standard assertions and matchers to compare elements read by
-   * sources. In general the elements may not implement {@link equals}/{@link hashCode} properly,
+   * sources. In general the elements may not implement {@link #equals}/{@link #hashCode} properly,
    * however every source has a {@link Coder} and every {@code Coder} can
-   * produce a {@link Coder#structuralValue()} whose {@code equals}/{@code hashCode} is
+   * produce a {@link Coder#structuralValue} whose {@code equals}/{@code hashCode} is
    * consistent with equality of encoded format.
-   * So we use this {@link Coder#structuralValue()} to compare elements read by sources.
+   * So we use this {@link Coder#structuralValue} to compare elements read by sources.
    */
   private static <T> List<Object> createStructuralValues(Coder<T> coder, List<T> list)
       throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
index c2ce7dd817e7d..945f7a3897fde 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
@@ -55,7 +55,7 @@ public class DataflowSideInputReaderTest {
   /**
    * The size, in bytes, of a {@code long} placed in
    * {@link PCollectionViewTesting#DEFAULT_NONEMPTY_WINDOW}. This is the size of each of the
-   * elements of each {@link PCollection} created in the following tests.
+   * elements of each {@code PCollection} created in the following tests.
    *
    * <p>This value is arbitrary from the point of view of these tests.
    * The correctness of {@link DataflowSideInputReader} does not depend on this value,
@@ -73,8 +73,8 @@ private long windowedLongBytes() throws Exception {
    * Creates a {@link Source} descriptor for reading the provided contents as a side input.
    * The contents will all be placed in the {@link PCollectionViewTesting#DEFAULT_NONEMPTY_WINDOW}.
    *
-   * <p>If the {@link PCollectionView} has an incompatible {@link Coder} or
-   * {@link WindowingStrategy}, then results are unpredictable.
+   * <p>If the {@code PCollectionView} has an incompatible {@code Coder} or
+   * {@code WindowingStrategy}, then results are unpredictable.
    */
   @SuppressWarnings({"unchecked", "rawtypes"})
   private <T> Source sourceInDefaultWindow(PCollectionView<T> view, T... values)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MutationDetectorsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MutationDetectorsTest.java
index 41816c92b3c07..0f77679158d16 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MutationDetectorsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MutationDetectorsTest.java
@@ -59,7 +59,7 @@ public void testMutatingList() throws Exception {
 
   /**
    * Tests that {@link MutationDetectors#forValueWithCoder} does not false positive on a
-   * {@link LinkedList} that will clone as an {@link ArrayList}.
+   * {@link LinkedList} that will clone as an {@code ArrayList}.
    */
   @Test
   public void testUnmodifiedLinkedList() throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index e552b890db79f..30ba0b4163989 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -65,7 +65,7 @@
 import java.util.List;
 
 /**
- * Tests for {@link TriggerExecutor}.
+ * Tests for Trigger execution.
  */
 @RunWith(JUnit4.class)
 public class TriggerExecutorTest {

From 0a15469cda5ecf3363d8f4f3e51a240e6e08c9ba Mon Sep 17 00:00:00 2001
From: wan <wan@google.com>
Date: Tue, 14 Jul 2015 14:18:26 -0700
Subject: [PATCH 0947/1541] Adds counters for implementing shuffle sanity check
 for Dataflow

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100961192
---
 .../sdk/runners/worker/ConcatReader.java      | 23 +++++++++---
 .../runners/worker/ConcatReaderFactory.java   |  7 ++--
 .../runners/worker/GroupingShuffleReader.java | 36 +++++++++++++++++--
 .../worker/GroupingShuffleReaderFactory.java  | 13 ++++---
 .../worker/MapTaskExecutorFactory.java        |  6 ++--
 .../sdk/runners/worker/ReaderFactory.java     | 23 +++++++++---
 .../sdk/runners/worker/ShuffleSink.java       | 11 +++---
 .../sdk/runners/worker/SideInputUtils.java    |  5 ++-
 .../dataflow/sdk/util/CloudSourceUtils.java   |  3 +-
 .../worker/GroupingShuffleEntryIterator.java  |  5 ++-
 .../sdk/util/common/worker/ReadOperation.java |  5 +++
 .../BasicSerializableSourceFormatTest.java    |  2 +-
 .../runners/worker/AvroReaderFactoryTest.java |  3 +-
 .../worker/BigQueryReaderFactoryTest.java     |  6 ++--
 .../worker/ConcatReaderFactoryTest.java       |  4 +--
 .../sdk/runners/worker/ConcatReaderTest.java  | 11 +++---
 .../worker/GroupingShuffleReaderTest.java     |  8 ++---
 .../worker/InMemoryReaderFactoryTest.java     |  2 +-
 .../sdk/runners/worker/ReaderFactoryTest.java |  7 ++--
 .../worker/ShuffleReaderFactoryTest.java      |  3 +-
 .../runners/worker/TextReaderFactoryTest.java |  2 +-
 21 files changed, 138 insertions(+), 47 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
index e3a1d576de874..6ace2f3c89c32 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
@@ -20,6 +20,7 @@
 import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
@@ -47,16 +48,22 @@ public class ConcatReader<T> extends Reader<T> {
   private final List<Source> sources;
   private final PipelineOptions options;
   private final ExecutionContext executionContext;
+  private final CounterSet.AddCounterMutator addCounterMutator;
+  private final String operationName;
 
   /**
    * Create a {@code ConcatReader} using a given list of encoded {@code Source}s.
    */
   public ConcatReader(
-      PipelineOptions options, ExecutionContext executionContext, List<Source> sources) {
+      PipelineOptions options, ExecutionContext executionContext,
+      CounterSet.AddCounterMutator addCounterMutator, String operationName,
+      List<Source> sources) {
     Preconditions.checkNotNull(sources);
     this.sources = sources;
     this.options = options;
     this.executionContext = executionContext;
+    this.addCounterMutator = addCounterMutator;
+    this.operationName = operationName;
   }
 
   public Iterator<Source> getSources() {
@@ -65,7 +72,8 @@ public Iterator<Source> getSources() {
 
   @Override
   public ReaderIterator<T> iterator() throws IOException {
-    return new ConcatIterator<T>(options, executionContext, sources);
+    return new ConcatIterator<T>(options, executionContext, addCounterMutator,
+        operationName, sources);
   }
 
   private static class ConcatIterator<T> extends AbstractBoundedReaderIterator<T> {
@@ -74,12 +82,18 @@ private static class ConcatIterator<T> extends AbstractBoundedReaderIterator<T>
     private final List<Source> sources;
     private final PipelineOptions options;
     private final ExecutionContext executionContext;
+    private final CounterSet.AddCounterMutator addCounterMutator;
+    private final String operationName;
 
     public ConcatIterator(
-        PipelineOptions options, ExecutionContext executionContext, List<Source> sources) {
+        PipelineOptions options, ExecutionContext executionContext,
+        CounterSet.AddCounterMutator addCounterMutator, String operationName,
+        List<Source> sources) {
       this.sources = sources;
       this.options = options;
       this.executionContext = executionContext;
+      this.addCounterMutator = addCounterMutator;
+      this.operationName = operationName;
     }
 
     @Override
@@ -103,7 +117,8 @@ protected boolean hasNextImpl() throws IOException {
         try {
           @SuppressWarnings("unchecked")
           Reader<T> currentReader =
-              (Reader<T>) ReaderFactory.create(options, currentSource, executionContext);
+              (Reader<T>) ReaderFactory.create(options, currentSource, executionContext,
+                  addCounterMutator, operationName);
           currentIterator = currentReader.iterator();
         } catch (Exception e) {
           throw new IOException("Failed to create a reader for source: " + currentSource, e);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java
index 26117cf2dd310..165af87ac37e1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java
@@ -28,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import java.util.ArrayList;
@@ -39,10 +40,12 @@
  */
 public class ConcatReaderFactory {
   public static <T> Reader<T> create(PipelineOptions options, CloudObject spec,
-      @SuppressWarnings("unused") Coder<T> coder, ExecutionContext executionContext)
+      @SuppressWarnings("unused") Coder<T> coder, ExecutionContext executionContext,
+      CounterSet.AddCounterMutator addCounterMutator, String operationName)
       throws Exception {
     List<Source> sources = getSubSources(spec);
-    return new ConcatReader<T>(options, executionContext, sources);
+    return new ConcatReader<T>(options, executionContext, addCounterMutator,
+                               operationName, sources);
   }
 
   private static List<Source> getSubSources(CloudObject spec) throws Exception {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index bc553314631d6..50f81e734948e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -20,6 +20,7 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
 
 import com.google.api.client.util.Preconditions;
 import com.google.api.services.dataflow.model.ApproximateProgress;
@@ -31,6 +32,7 @@
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.Reiterable;
 import com.google.cloud.dataflow.sdk.util.common.Reiterator;
@@ -60,12 +62,17 @@
  */
 public class GroupingShuffleReader<K, V> extends Reader<WindowedValue<KV<K, Reiterable<V>>>> {
   private static final Logger LOG = LoggerFactory.getLogger(GroupingShuffleReader.class);
+  public static final String SOURCE_NAME = "GroupingShuffleSource";
 
   final byte[] shuffleReaderConfig;
   @Nullable final String startShufflePosition;
   @Nullable final String stopShufflePosition;
   final BatchModeExecutionContext executionContext;
+  @Nullable final CounterSet.AddCounterMutator addCounterMutator;
+  @Nullable final String operationName;
 
+  // Counts how many bytes were from by a given operation from a given shuffle session.
+  @Nullable Counter<Long> perOperationPerDatasetBytesCounter;
   Coder<K> keyCoder;
   Coder<V> valueCoder;
 
@@ -75,20 +82,42 @@ public GroupingShuffleReader(
       @Nullable String startShufflePosition,
       @Nullable String stopShufflePosition,
       Coder<WindowedValue<KV<K, Iterable<V>>>> coder,
-      BatchModeExecutionContext executionContext)
+      BatchModeExecutionContext executionContext,
+      CounterSet.AddCounterMutator addCounterMutator,
+      String operationName)
       throws Exception {
     this.shuffleReaderConfig = shuffleReaderConfig;
     this.startShufflePosition = startShufflePosition;
     this.stopShufflePosition = stopShufflePosition;
     this.executionContext = executionContext;
+    this.addCounterMutator = addCounterMutator;
+    this.operationName = operationName;
     initCoder(coder);
+    // We cannot initialize perOperationPerDatasetBytesCounter here, as it
+    // depends on shuffleReaderConfig, which isn't populated yet.
+  }
+
+  private synchronized void initCounter(String datasetId) {
+    if (perOperationPerDatasetBytesCounter == null
+        && addCounterMutator != null
+        && operationName != null) {
+      perOperationPerDatasetBytesCounter =
+          addCounterMutator.addCounter(
+              Counter.longs(
+                  "dax-shuffle-" + datasetId + "-wf-" + operationName + "-read-bytes",
+                  SUM));
+    }
   }
 
   @Override
   public ReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> iterator() throws IOException {
     Preconditions.checkArgument(shuffleReaderConfig != null);
+    ApplianceShuffleReader asr = new ApplianceShuffleReader(shuffleReaderConfig);
+    String datasetId = asr.getDatasetId();
+    initCounter(datasetId);
+
     return iterator(new BatchingShuffleEntryReader(
-        new ChunkingShuffleBatchReader(new ApplianceShuffleReader(shuffleReaderConfig))));
+        new ChunkingShuffleBatchReader(asr)));
   }
 
   private void initCoder(Coder<WindowedValue<KV<K, Iterable<V>>>> coder) throws Exception {
@@ -174,6 +203,9 @@ public GroupingShuffleReaderIterator(ShuffleEntryReader reader) {
                 reader.read(rangeTracker.getStartPosition(), rangeTracker.getStopPosition())) {
               @Override
               protected void notifyElementRead(long byteSize) {
+                if (GroupingShuffleReader.this.perOperationPerDatasetBytesCounter != null) {
+                  GroupingShuffleReader.this.perOperationPerDatasetBytesCounter.addValue(byteSize);
+                }
                 GroupingShuffleReader.this.notifyElementRead(byteSize);
               }
             };
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
index 49ef31d0acae9..5593030c02c31 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.KV;
 
 /**
@@ -36,17 +37,21 @@ public class GroupingShuffleReaderFactory {
   private GroupingShuffleReaderFactory() {}
 
   public static <K, V> GroupingShuffleReader<K, V> create(PipelineOptions options, CloudObject spec,
-      Coder<WindowedValue<KV<K, Iterable<V>>>> coder, ExecutionContext executionContext)
+      Coder<WindowedValue<KV<K, Iterable<V>>>> coder, ExecutionContext executionContext,
+      CounterSet.AddCounterMutator addCounterMutator, String operationName)
       throws Exception {
-    return create(options, spec, coder, (BatchModeExecutionContext) executionContext);
+    return create(options, spec, coder, (BatchModeExecutionContext) executionContext,
+                  addCounterMutator, operationName);
   }
 
   static <K, V> GroupingShuffleReader<K, V> create(PipelineOptions options, CloudObject spec,
-      Coder<WindowedValue<KV<K, Iterable<V>>>> coder, BatchModeExecutionContext executionContext)
+      Coder<WindowedValue<KV<K, Iterable<V>>>> coder, BatchModeExecutionContext executionContext,
+      CounterSet.AddCounterMutator addCounterMutator, String operationName)
       throws Exception {
     return new GroupingShuffleReader<>(options,
         decodeBase64(getString(spec, PropertyNames.SHUFFLE_READER_CONFIG)),
         getString(spec, PropertyNames.START_SHUFFLE_POSITION, null),
-        getString(spec, PropertyNames.END_SHUFFLE_POSITION, null), coder, executionContext);
+        getString(spec, PropertyNames.END_SHUFFLE_POSITION, null), coder, executionContext,
+        addCounterMutator, operationName);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index 0ba48dd4d2c1d..a3366cd3761fa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -136,12 +136,14 @@ static ReadOperation createReadOperation(
       throws Exception {
     ReadInstruction read = instruction.getRead();
 
-    Reader<?> reader = ReaderFactory.create(options, read.getSource(), executionContext);
+    String operationName = instruction.getSystemName();
+    Reader<?> reader = ReaderFactory.create(options, read.getSource(), executionContext,
+        addCounterMutator, operationName);
 
     OutputReceiver[] receivers =
         createOutputReceivers(instruction, counterPrefix, addCounterMutator, stateSampler, 1);
 
-    return new ReadOperation(instruction.getSystemName(), reader, receivers, counterPrefix,
+    return new ReadOperation(operationName, reader, receivers, counterPrefix,
         addCounterMutator, stateSampler);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
index 0252201ce564b..fb289f4543c50 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
@@ -24,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
 import com.google.cloud.dataflow.sdk.util.Serializer;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
@@ -60,7 +61,7 @@ private ReaderFactory() {}
     predefinedReaderFactories.put(
         "PartitioningShuffleSource", PartitioningShuffleReaderFactory.class.getName());
     predefinedReaderFactories.put(
-        "GroupingShuffleSource", GroupingShuffleReaderFactory.class.getName());
+        GroupingShuffleReader.SOURCE_NAME, GroupingShuffleReaderFactory.class.getName());
     predefinedReaderFactories.put("InMemorySource", InMemoryReaderFactory.class.getName());
     predefinedReaderFactories.put("BigQuerySource", BigQueryReaderFactory.class.getName());
     predefinedReaderFactories.put(
@@ -89,7 +90,9 @@ private ReaderFactory() {}
    */
   public static <T> Reader<T> create(
       @Nullable PipelineOptions options, Source cloudSource,
-      @Nullable ExecutionContext executionContext)
+      @Nullable ExecutionContext executionContext,
+      @Nullable CounterSet.AddCounterMutator addCounterMutator,
+      @Nullable String operationName)
           throws Exception {
     cloudSource = CloudSourceUtils.flattenBaseSpecs(cloudSource);
 
@@ -107,14 +110,24 @@ public static <T> Reader<T> create(
       coder = Serializer.deserialize(cloudSource.getCodec(), Coder.class);
     }
     try {
-      return InstanceBuilder.ofType(new TypeDescriptor<Reader<T>>() {})
+      InstanceBuilder<Reader<T>> builder =
+          InstanceBuilder.ofType(new TypeDescriptor<Reader<T>>() {})
           .fromClassName(sourceFactoryClassName)
           .fromFactoryMethod("create")
           .withArg(PipelineOptions.class, options)
           .withArg(CloudObject.class, object)
           .withArg(Coder.class, coder)
-          .withArg(ExecutionContext.class, executionContext)
-          .build();
+          .withArg(ExecutionContext.class, executionContext);
+
+      // These two kinds of sources require two more arguments to create.
+      if (objClassName.equals(GroupingShuffleReader.SOURCE_NAME)
+          || objClassName.equals(ConcatReader.SOURCE_NAME)) {
+        builder
+            .withArg(CounterSet.AddCounterMutator.class, addCounterMutator)
+            .withArg(String.class, operationName);
+      }
+
+      return builder.build();
 
     } catch (ClassNotFoundException exn) {
       throw new Exception("unable to create a source from " + cloudSource, exn);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
index 402c2c4ab4199..7029b29ab3d55 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
@@ -172,7 +172,9 @@ class ShuffleSinkWriter implements SinkWriter<WindowedValue<T>> {
 
     private ShuffleEntryWriter writer;
     private long seqNum = 0;
-    private Counter<Long> perWorkerPerDatasetBytesCounter;
+    private final Counter<Long> perWorkerPerDatasetBytesCounter;
+    // How many bytes were written to a given shuffle session, across all workers.
+    private final Counter<Long> perDatasetBytesCounter;
 
     ShuffleSinkWriter(
         ShuffleEntryWriter writer,
@@ -187,6 +189,8 @@ class ShuffleSinkWriter implements SinkWriter<WindowedValue<T>> {
               COUNTER_WORKER_PREFIX + dataflowOptions.getWorkerId()
               + COUNTER_DATASET_PREFIX + datasetId + COUNTER_SUFFIX,
               SUM));
+      this.perDatasetBytesCounter = addCounterMutator.addCounter(
+          Counter.longs("dax-shuffle-" + datasetId + "-written-bytes", SUM));
     }
 
     @Override
@@ -257,9 +261,8 @@ public long add(WindowedValue<T> windowedElem) throws IOException {
       ShuffleEntry entry = new ShuffleEntry(keyBytes, secondaryKeyBytes, valueBytes);
       writer.put(entry);
       long bytes = entry.length();
-      if (perWorkerPerDatasetBytesCounter != null) {
-        perWorkerPerDatasetBytesCounter.addValue(bytes);
-      }
+      perWorkerPerDatasetBytesCounter.addValue(bytes);
+      perDatasetBytesCounter.addValue(bytes);
       return bytes;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
index f76a2c4369ec8..ce236b5a2f3fe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
@@ -95,7 +95,10 @@ private static Iterable<Object> readSideInputSource(
       Observer observer,
       ExecutionContext executionContext)
       throws Exception {
-    Reader<Object> reader = ReaderFactory.create(options, sideInputSource, executionContext);
+    Reader<Object> reader = ReaderFactory.create(options, sideInputSource, executionContext,
+                                                 // We don't do shuffle sanity check on side inputs,
+                                                 // as they don't have to be read completely.
+                                                 null, null);
     if (observer != null) {
       reader.addObserver(observer);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
index 9a6dc3d2aeee7..51ebc0c677a89 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
@@ -58,7 +58,8 @@ public static Source flattenBaseSpecs(Source source) {
 
   public static <T> List<T> readElemsFromSource(PipelineOptions options, Source source) {
     try {
-      return ReaderUtils.readElemsFromReader(ReaderFactory.<T>create(options, source, null));
+      return ReaderUtils.readElemsFromReader(ReaderFactory.<T>create(
+          options, source, null, null, null));
     } catch (Exception e) {
       throw new RuntimeException("Failed to read from source: " + source.toString(), e);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java
index 9d37df2c4cf40..a340d603c2b06 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java
@@ -72,7 +72,10 @@ protected void report(ShuffleEntry entry) {
                 }.start()));
   }
 
-  /** Notifies observers about a new element read. */
+  /**
+   * Notifies observers about a new ShuffleEntry (key and value, not
+   * key and iterable of values) read.
+   */
   protected abstract void notifyElementRead(long byteSize);
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index a8de2c319e2e9..b7cb0ae6a1129 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -225,6 +225,11 @@ public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest
    * This is an observer on the instance of the source. Whenever source reads
    * an element, update() gets called with the byte size of the element, which
    * gets added up into the ReadOperation's byte counter.
+   *
+   * <p>Note that when the reader is a {@link GroupingShuffleReader}, update()
+   * is called for each underlying {@link ShuffleEntry} being read, with the
+   * byte size of the {@code ShuffleEntry} - it is not called for each grouped
+   * shuffle element (i.e. key and iterable of values).
    */
   private class ReaderObserver implements Observer {
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 0dc6f948e06ec..2b34fa8afcc0c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -356,7 +356,7 @@ public void testProgressAndSourceSplitTranslation() throws Exception {
     DataflowPipelineOptions options = PipelineOptionsFactory.create()
         .as(DataflowPipelineOptions.class);
     Reader<WindowedValue<Integer>> reader = ReaderFactory.create(
-        options, translateIOToCloudSource(TestIO.fromRange(10, 20), options), null);
+        options, translateIOToCloudSource(TestIO.fromRange(10, 20), options), null, null, null);
     try (Reader.ReaderIterator<WindowedValue<Integer>> iterator = reader.iterator()) {
       assertTrue(iterator.hasNext());
       assertEquals(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
index 3981a01c2cfc5..22fcc33c674c7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
@@ -62,7 +62,8 @@ Reader<?> runTestCreateAvroReader(String filename, @Nullable Long start, @Nullab
     cloudSource.setCodec(encoding);
 
     Reader<?> reader = ReaderFactory.create(
-        PipelineOptionsFactory.create(), cloudSource, DirectModeExecutionContext.create());
+        PipelineOptionsFactory.create(), cloudSource, DirectModeExecutionContext.create(),
+        null, null);
     return reader;
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
index bf319c330ca41..8e4094b9bc5e1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
@@ -48,7 +48,8 @@ void runTestCreateBigQueryReaderFromTable(
     cloudSource.setCodec(encoding);
 
     Reader<?> reader = ReaderFactory.create(
-        PipelineOptionsFactory.create(), cloudSource, DirectModeExecutionContext.create());
+        PipelineOptionsFactory.create(), cloudSource, DirectModeExecutionContext.create(),
+        null, null);
     Assert.assertThat(reader, new IsInstanceOf(BigQueryReader.class));
     BigQueryReader bigQueryReader = (BigQueryReader) reader;
     Assert.assertEquals(project, bigQueryReader.tableRef.getProjectId());
@@ -65,7 +66,8 @@ void runTestCreateBigQueryReaderFromQuery(String query, CloudObject encoding) th
     cloudSource.setCodec(encoding);
 
     Reader<?> reader = ReaderFactory.create(
-        PipelineOptionsFactory.create(), cloudSource, DirectModeExecutionContext.create());
+        PipelineOptionsFactory.create(), cloudSource, DirectModeExecutionContext.create(),
+        null, null);
     Assert.assertThat(reader, new IsInstanceOf(BigQueryReader.class));
     BigQueryReader bigQueryReader = (BigQueryReader) reader;
     Assert.assertEquals(query, bigQueryReader.query);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
index f975f66f2084a..cdaa8cb6362c1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
@@ -89,7 +89,7 @@ public void testCreateConcatReaderWithOneSubSource() throws Exception {
     List<List<String>> allData = createInMemorySourceData(1, 10);
 
     Source source = createSourcesWithInMemorySources(allData);
-    Reader<String> reader = ReaderFactory.create(null, source, null);
+    Reader<String> reader = ReaderFactory.create(null, source, null, null, null);
     assertNotNull(reader);
 
     List<String> expected = new ArrayList<>();
@@ -109,7 +109,7 @@ public void testCreateConcatReaderWithManySubSources() throws Exception {
     List<List<String>> allData = createInMemorySourceData(15, 10);
 
     Source source = createSourcesWithInMemorySources(allData);
-    Reader<String> reader = ReaderFactory.create(null, source, null);
+    Reader<String> reader = ReaderFactory.create(null, source, null, null, null);
     assertNotNull(reader);
 
     List<String> expected = new ArrayList<>();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
index c1595ca149f35..b0195b99352c1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
@@ -204,7 +204,7 @@ private void testReadersOfSizes(int... recordsPerReader) throws Exception {
       sourceList.add(createSourceForTestReader(createTestReader(items/* recordsPerReader */,
           -1/* recordToFailAt */, false/* failWhenClosing */, expected)));
     }
-    ConcatReader<String> concatReader = new ConcatReader<>(null, null, sourceList);
+    ConcatReader<String> concatReader = new ConcatReader<>(null, null, null, null, sourceList);
 
     List<String> actual = new ArrayList<>();
     readFully(concatReader, actual);
@@ -216,12 +216,13 @@ private void testReadersOfSizes(int... recordsPerReader) throws Exception {
   @Test
   public void testCreateFromNull() throws Exception {
     expectedException.expect(NullPointerException.class);
-    new ConcatReader<String>(null, null, null);
+    new ConcatReader<String>(null, null, null, null, null);
   }
 
   @Test
   public void testReadEmptyList() throws Exception {
-    ConcatReader<String> concat = new ConcatReader<>(null, null, new ArrayList<Source>());
+    ConcatReader<String> concat = new ConcatReader<>(null, null, null, null,
+        new ArrayList<Source>());
     ReaderIterator<String> iterator = concat.iterator();
     assertNotNull(iterator);
     assertFalse(concat.iterator().hasNext());
@@ -271,7 +272,7 @@ public void testAReaderFailsAtClose() throws Exception {
         createSourceForTestReader(createTestReader(10/* recordsPerReader */, -1/* recordToFailAt */,
             false/* failWhenClosing */, new ArrayList<String>())));
 
-    ConcatReader<String> concatReader = new ConcatReader<>(null, null, sources);
+    ConcatReader<String> concatReader = new ConcatReader<>(null, null, null, null, sources);
     List<String> actual = new ArrayList<>();
     try {
       readFully(concatReader, actual);
@@ -296,7 +297,7 @@ public void testReaderFailsAtRead() throws Exception {
     expected = expected.subList(0, 16);
     assertEquals(16, expected.size());
 
-    ConcatReader<String> concatReader = new ConcatReader<>(null, null, sources);
+    ConcatReader<String> concatReader = new ConcatReader<>(null, null, null, null, sources);
     List<String> actual = new ArrayList<>();
     try {
       readFully(concatReader, actual);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
index ab932c224380c..77527f2465ea2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
@@ -135,7 +135,7 @@ private void runTestReadFromShuffle(
     // Read from shuffle with GroupingShuffleReader.
     BatchModeExecutionContext context = new BatchModeExecutionContext();
     GroupingShuffleReader<Integer, String> groupingShuffleReader = new GroupingShuffleReader<>(
-        PipelineOptionsFactory.create(), null, null, null, sourceElemCoder, context);
+        PipelineOptionsFactory.create(), null, null, null, sourceElemCoder, context, null, null);
     ExecutorTestUtils.TestReaderObserver observer =
         new ExecutorTestUtils.TestReaderObserver(groupingShuffleReader);
 
@@ -293,7 +293,7 @@ public void testReadFromShuffleDataAndFailToSplit() throws Exception {
         WindowedValue.getFullCoder(
             KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())),
             IntervalWindow.getCoder()),
-        context);
+        context, null, null);
 
     try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
         groupingShuffleReader.iterator(shuffleReader)) {
@@ -343,7 +343,7 @@ public void testReadFromShuffleAndDynamicSplit() throws Exception {
         WindowedValue.getFullCoder(
             KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())),
             IntervalWindow.getCoder()),
-        context);
+        context, null, null);
 
     TestShuffleReader shuffleReader = new TestShuffleReader();
     final int kNumRecords = 10;
@@ -427,7 +427,7 @@ public void testGetApproximateProgress() throws Exception {
         WindowedValue.getFullCoder(
             KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())),
             IntervalWindow.getCoder()),
-        context);
+        context, null, null);
 
     TestShuffleReader shuffleReader = new TestShuffleReader();
     final int kNumRecords = 10;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
index 79913c0bc288c..fae3b4d5fef25 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
@@ -70,7 +70,7 @@ <T> void runTestCreateInMemoryReader(List<T> elements, Long start, Long end, int
     Source cloudSource = createInMemoryCloudSource(elements, start, end, coder);
 
     Reader<?> reader = ReaderFactory.create(
-        PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext());
+        PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext(), null, null);
     Assert.assertThat(reader, new IsInstanceOf(InMemoryReader.class));
     InMemoryReader<?> inMemoryReader = (InMemoryReader<?>) reader;
     Assert.assertEquals(encodedElements(elements, coder), inMemoryReader.encodedElements);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
index 7935954befe39..05727393ad00d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
@@ -82,7 +82,7 @@ public void testCreatePredefinedReader() throws Exception {
     cloudSource.setCodec(makeCloudEncoding("StringUtf8Coder"));
 
     Reader<?> reader = ReaderFactory.create(
-        PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext());
+        PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext(), null, null);
     Assert.assertThat(reader, new IsInstanceOf(TextReader.class));
   }
 
@@ -95,7 +95,7 @@ public void testCreateUserDefinedReader() throws Exception {
     cloudSource.setCodec(makeCloudEncoding("BigEndianIntegerCoder"));
 
     Reader<?> reader = ReaderFactory.create(
-        PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext());
+        PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext(), null, null);
     Assert.assertThat(reader, new IsInstanceOf(TestReader.class));
   }
 
@@ -107,7 +107,8 @@ public void testCreateUnknownReader() throws Exception {
     cloudSource.setCodec(makeCloudEncoding("StringUtf8Coder"));
     try {
       ReaderFactory.create(
-          PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext());
+          PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext(),
+          null, null);
       Assert.fail("should have thrown an exception");
     } catch (Exception exn) {
       Assert.assertThat(exn.toString(), CoreMatchers.containsString("unable to create a source"));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
index 9fcda2fe0584f..6149815b98516 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
@@ -65,7 +65,8 @@ <T extends Reader> T runTestCreateShuffleReader(byte[] shuffleReaderConfig,
     cloudSource.setSpec(spec);
     cloudSource.setCodec(encoding);
 
-    Reader<?> reader = ReaderFactory.create(PipelineOptionsFactory.create(), cloudSource, context);
+    Reader<?> reader = ReaderFactory.create(PipelineOptionsFactory.create(), cloudSource, context,
+        null, null);
     Assert.assertThat(reader, new IsInstanceOf(shuffleReaderClass));
     T shuffleSource = (T) reader;
     return shuffleSource;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
index 98921dcea74a5..aa238e89d643c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
@@ -66,7 +66,7 @@ void runTestCreateTextReader(String filename, @Nullable Boolean stripTrailingNew
     cloudSource.setCodec(encoding);
 
     Reader<?> reader = ReaderFactory.create(
-        PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext());
+        PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext(), null, null);
     Assert.assertThat(reader, new IsInstanceOf(TextReader.class));
     TextReader textReader = (TextReader<?>) reader;
     Assert.assertEquals(filename, textReader.filename);

From 64014d496d835d500ee59b1e0bc36b2ac445a4a5 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 18 Aug 2015 16:16:48 -0700
Subject: [PATCH 0948/1541] Add a standalone_examples_pom.xml to the example
 resources

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100973020
---
 examples/pom.xml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/examples/pom.xml b/examples/pom.xml
index 07d60a5b8e24e..41c5a6a1294b9 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -59,6 +59,9 @@
           <include>starter/**/pom.xml</include>
         </includes>
       </resource>
+      <resource>
+        <directory>src/main/resources</directory>
+      </resource>
     </resources>
 
     <plugins>

From 2ca465e702c95cad06022ea99cee084ee831caa4 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Tue, 18 Aug 2015 16:28:15 -0700
Subject: [PATCH 0949/1541] Allow adding PCollections to a PCollectionTuple
 returned from ParDo

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100974066
---
 .../dataflow/sdk/values/PCollectionTuple.java |  2 +-
 .../sdk/values/PCollectionTupleTest.java      | 46 ++++++++++++++++++-
 2 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
index dc4b5c7793a4b..88c332bef9908 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
@@ -221,7 +221,7 @@ public static PCollectionTuple ofPrimitiveOutputsInternal(
 
       pcollectionMap.put(outputTag, outputCollection);
     }
-    return new PCollectionTuple(null, pcollectionMap);
+    return new PCollectionTuple(pipeline, pcollectionMap);
   }
 
   @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionTupleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionTupleTest.java
index 217da49e60c8d..83f10c5e921b1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionTupleTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionTupleTest.java
@@ -17,20 +17,32 @@
 package com.google.cloud.dataflow.sdk.values;
 
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 
 import org.junit.Test;
+import org.junit.experimental.categories.Category;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.List;
+
 /** Unit tests for {@link PCollectionTuple}. */
 @RunWith(JUnit4.class)
-public final class PCollectionTupleTest {
+@SuppressWarnings("serial")
+public final class PCollectionTupleTest implements Serializable {
   @Test
   public void testOfThenHas() {
     Pipeline pipeline = TestPipeline.create();
@@ -47,4 +59,36 @@ public void testEmpty() {
     TupleTag<Object> tag = new TupleTag<>();
     assertFalse(PCollectionTuple.empty(pipeline).has(tag));
   }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testComposePCollectionTuple() {
+    Pipeline pipeline = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList(3, -42, 666);
+
+    TupleTag<Integer> mainOutputTag = new TupleTag<Integer>("main") {};
+    TupleTag<Integer> emptyOutputTag = new TupleTag<Integer>("empty") {};
+    final TupleTag<Integer> sideOutputTag = new TupleTag<Integer>("side") {};
+
+    PCollection<Integer> mainInput = pipeline
+        .apply(Create.of(inputs));
+
+    PCollectionTuple outputs = mainInput.apply(ParDo
+        .of(new DoFn<Integer, Integer>() {
+          @Override
+          public void processElement(ProcessContext c) {
+            c.sideOutput(sideOutputTag, c.element());
+          }})
+        .withOutputTags(emptyOutputTag, TupleTagList.of(sideOutputTag)));
+    assertNotNull("outputs.getPipeline()", outputs.getPipeline());
+    outputs = outputs.and(mainOutputTag, mainInput);
+
+    DataflowAssert.that(outputs.get(mainOutputTag)).containsInAnyOrder(inputs);
+    DataflowAssert.that(outputs.get(sideOutputTag)).containsInAnyOrder(inputs);
+    DataflowAssert.that(outputs.get(emptyOutputTag)).containsInAnyOrder();
+
+    pipeline.run();
+  }
+
 }

From 93d5c3a413e375dfcc64f711b151f5450afda749 Mon Sep 17 00:00:00 2001
From: malo <malo@google.com>
Date: Tue, 18 Aug 2015 17:38:49 -0700
Subject: [PATCH 0950/1541] Refresh progress information after splits

If a worker is stuck on a record, and splits, its progress information was getting stale, returning an obsolete fraction_complete. This is due to the fact that the progress information is currently only updated when the next record is read.
For example, if a worker is stuck at 20% and splits at 50%, then its progress is still reporting 20% instead of 40%.

This change fixes this bug by updating the reported progress after every split.
----Release Notes----
For sources that support dynamic work rebalancing, BoundedReader.getFractionConsumed() now needs to be thread-safe.
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=100980466
---
 .../cloud/dataflow/sdk/io/BoundedSource.java  | 16 +++++++---
 .../sdk/util/common/worker/ReadOperation.java | 10 ++++--
 .../sdk/util/common/worker/Reader.java        |  9 +++---
 .../util/common/worker/ReadOperationTest.java | 31 ++++++++++++-------
 4 files changed, 44 insertions(+), 22 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
index dcadcbd9b9ac8..5aac4bf5c6fe4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
@@ -82,12 +82,13 @@ public abstract List<? extends BoundedSource<T>> splitIntoBundles(
    * again on this object.
    *
    * <h3>Thread safety</h3>
-   * All methods will be run from the same thread except {@link #splitAtFraction}, which can be
-   * called concurrently from a different thread (but there will not be multiple concurrent calls
-   * to {@link #splitAtFraction} itself).
+   * All methods will be run from the same thread except {@link #splitAtFraction} and
+   * {@link #getFractionConsumed}, which can be called concurrently from a different thread. There
+   * will not be multiple concurrent calls to {@link #splitAtFraction} but there can be for
+   * {@link #getFractionConsumed} if {@link #splitAtFraction} is implemented.
    * <p>If the source does not implement {@link #splitAtFraction}, you do not need to worry about
-   * thread safety. If implemented, it must be safe to call {@link #splitAtFraction} concurrently
-   * with other methods.
+   * thread safety. If implemented, it must be safe to call {@link #splitAtFraction} and
+   * {@link #getFractionConsumed} concurrently with other methods.
    *
    * <h3>Implementing {@link #splitAtFraction}</h3>
    * In the course of dynamic work rebalancing, the method {@link #splitAtFraction}
@@ -115,6 +116,11 @@ public abstract static class BoundedReader<T> extends Source.Reader<T> {
      *
      * <p>By default, returns null to indicate that this cannot be estimated.
      *
+     * <h3>Thread safety</h3>
+     * If {@link #splitAtFraction} is implemented, this method can be called concurrently to other
+     * methods (including itself), and it is therefore critical for it to be implemented
+     * in a thread-safe way.
+     *
      * @return A value in [0, 1] representing the fraction of this reader's current input
      *   read so far, or {@code null} if such an estimate is not available.
      */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index b7cb0ae6a1129..eb6712d98e689 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -194,7 +194,8 @@ private void setProgressFromIterator() {
 
   /**
    * Returns a (possibly slightly stale) value of the progress of the task.
-   * Guaranteed to not block indefinitely.
+   * Guaranteed to not block indefinitely. Needs to be thread-safe for sources
+   * which support dynamic work rebalancing.
    *
    * @return the task progress, or {@code null} if the source iterator has not
    * been initialized
@@ -217,7 +218,12 @@ public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest
             splitRequest);
         return null;
       }
-      return readerIterator.requestDynamicSplit(splitRequest);
+      Reader.DynamicSplitResult result = readerIterator.requestDynamicSplit(splitRequest);
+      if (result != null) {
+        // After a successful split, the stop position changed and progress has to be recomputed.
+        setProgressFromIterator();
+      }
+      return result;
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
index f3da242fd66e5..e371a31aa8746 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
@@ -65,10 +65,11 @@ public void setStateSamplerAndOperationName(StateSampler stateSampler,
   /**
    * A stateful iterator over the data in a Reader.
    *
-   * <p>Partially thread-safe: methods {@link #hasNext}, {@link #next}, {@link #close},
-   * {@link #getProgress} are called serially, but {@link #requestDynamicSplit}
-   * can be called asynchronously to those. There will not be multiple concurrent calls to
-   * {@link #requestDynamicSplit}).
+   * <p>Partially thread-safe: methods {@link #hasNext}, {@link #next}, {@link #close}
+   * are called serially, but {@link #requestDynamicSplit} can be called asynchronously
+   * to those. There will not be multiple concurrent calls to {@link #requestDynamicSplit}).
+   * {@link #getProgress} can be called concurrently to any other call, including itself, if
+   * {@link #requestDynamicSplit} is implemented.
    */
   public interface ReaderIterator<T> extends AutoCloseable {
     /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
index 4bea3bbcba916..31be21c91e8d7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -147,38 +147,46 @@ public void testDynamicSplit() throws Exception {
 
     // An unstarted ReadOperation refuses split requests.
     assertNull(
-        readOperation.requestDynamicSplit(splitRequestAtIndex(7L)));
+        readOperation.requestDynamicSplit(splitRequestAtIndex(8L)));
 
     Thread thread = runReadLoopInThread(readOperation);
     iterator.offerNext(0); // Await first next() and return 0 from it.
+    receiver.unblockProcess();
+    iterator.offerNext(1);
     // Read loop is now blocked in process() (not next()).
     Reader.DynamicSplitResultWithPosition split =
         (Reader.DynamicSplitResultWithPosition) readOperation.requestDynamicSplit(
-          splitRequestAtIndex(7L));
+          splitRequestAtIndex(8L));
     assertNotNull(split);
-    assertEquals(positionAtIndex(7L), toCloudPosition(split.getAcceptedPosition()));
-    receiver.unblockProcess();
-    iterator.offerNext(1);
+    assertEquals(positionAtIndex(8L), toCloudPosition(split.getAcceptedPosition()));
+
+    // Check that the progress has been recomputed.
+    ApproximateProgress progress = readerProgressToCloudProgress(readOperation.getProgress());
+    assertEquals(2, progress.getPosition().getRecordIndex().longValue());
+    assertEquals(2.0f / 8.0f, progress.getPercentComplete(), 0.001f);
+
     receiver.unblockProcess();
     iterator.offerNext(2);
+    receiver.unblockProcess();
+    iterator.offerNext(3);
 
     // Should accept a split at an earlier position than previously requested.
     // Should reject a split at a later position than previously requested.
     // Note that here we're testing our own MockReaderIterator class, so it's kind of pointless,
     // but we're also testing that ReadOperation correctly relays the request to the iterator.
     split = (Reader.DynamicSplitResultWithPosition) readOperation.requestDynamicSplit(
-        splitRequestAtIndex(5L));
+        splitRequestAtIndex(6L));
     assertNotNull(split);
-    assertEquals(positionAtIndex(5L), toCloudPosition(split.getAcceptedPosition()));
+    assertEquals(positionAtIndex(6L), toCloudPosition(split.getAcceptedPosition()));
     split = (Reader.DynamicSplitResultWithPosition) readOperation.requestDynamicSplit(
-        splitRequestAtIndex(5L));
+        splitRequestAtIndex(6L));
     assertNull(split);
     receiver.unblockProcess();
 
-    iterator.offerNext(3);
-    receiver.unblockProcess();
     iterator.offerNext(4);
     receiver.unblockProcess();
+    iterator.offerNext(5);
+    receiver.unblockProcess();
 
     // Should return false from hasNext() and exit read loop now.
 
@@ -259,7 +267,8 @@ protected Integer nextImpl() throws IOException {
     @Override
     public Reader.Progress getProgress() {
       return cloudProgressToReaderProgress(
-          new ApproximateProgress().setPosition(new Position().setRecordIndex((long) current)));
+          new ApproximateProgress().setPosition(new Position().setRecordIndex((long) current))
+                                   .setPercentComplete((float) tracker.getFractionConsumed()));
     }
 
     @Override

From ea53709326d86ceeeff2fa73a9c8cac1df70006e Mon Sep 17 00:00:00 2001
From: Kevin Graney <kmg@google.com>
Date: Tue, 25 Aug 2015 14:23:31 -0400
Subject: [PATCH 0951/1541] Fix documentation of PubsubIO subscription format

---
 .../main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index dc6c3b34de579..0751edfd0b0be 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -302,7 +302,7 @@ public static Bound<String> topic(String topic) {
      * a specific Pubsub subscription. Mutually exclusive with
      * PubsubIO.Read.topic().
      * Cloud Pubsub subscription names should be of the form
-     * {@code /subscriptions/<project>/<<subscription>},
+     * {@code projects/<project>/subscriptions/<subscription>},
      * where {@code <project>} is the name of the project the subscription belongs to.
      * The {@code <subscription>} component must comply with the below requirements.
      *

From ec98eec49c1c1712abc639a495f36c2954feb101 Mon Sep 17 00:00:00 2001
From: jeremiele <jeremiele@google.com>
Date: Wed, 19 Aug 2015 10:33:58 -0700
Subject: [PATCH 0952/1541] Added a getter to Write.Bound to retrieve the Sink.

The getter has been added in order to make the Sink available to runners.
----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101037002
---
 .../com/google/cloud/dataflow/sdk/transforms/Write.java  | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
index 1b975c5eeb97f..960dce1ad84d8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
@@ -60,7 +60,7 @@ public static <T> Bound<T> to(Sink<T> sink) {
   public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
     private static final long serialVersionUID = 0;
 
-    private Sink<T> sink;
+    private final Sink<T> sink;
 
     private Bound(Sink<T> sink) {
       this.sink = sink;
@@ -73,6 +73,13 @@ public PDone apply(PCollection<T> input) {
       return createWrite(input, sink.createWriteOperation(options));
     }
 
+    /**
+     * Returns the {@link Sink} associated with this PTransform.
+     */
+    public Sink<T> getSink() {
+      return sink;
+    }
+
     /**
      * A write is performed as sequence of three {@link ParDo}'s.
      *

From 22fabb045a1f505ac411e449f9992ab2234a81df Mon Sep 17 00:00:00 2001
From: boyuan <boyuan@google.com>
Date: Wed, 19 Aug 2015 13:47:47 -0700
Subject: [PATCH 0953/1541] SyntheticInput implementation based on custom
 source

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101055871
---
 .../cloud/dataflow/sdk/io/OffsetBasedSource.java |  2 +-
 .../dataflow/sdk/io/OffsetBasedSourceTest.java   | 16 +++++++++++-----
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java
index bac4f750b7415..30a2f1f4e472d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java
@@ -97,7 +97,7 @@ public long getMinBundleSize() {
   @Override
   public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
     long trueEndOffset = (endOffset == Long.MAX_VALUE) ? getMaxEndOffset(options) : endOffset;
-    return getBytesPerOffset() * (trueEndOffset - getStartOffset() + 1);
+    return getBytesPerOffset() * (trueEndOffset - getStartOffset());
   }
 
   @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSourceTest.java
index 3766cddae5403..75adbef109dad 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSourceTest.java
@@ -59,11 +59,6 @@ public OffsetBasedSource<Integer> createSourceForSubrange(long start, long end)
       return new CoarseRangeSource(start, end, getMinBundleSize(), granularity);
     }
 
-    @Override
-    public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
-      throw new UnsupportedOperationException();
-    }
-
     @Override
     public boolean producesSortedKeys(PipelineOptions options) throws Exception {
       return false;
@@ -172,6 +167,17 @@ public void testSplitPositionsNonZeroStart() throws Exception {
         boundaries);
   }
 
+  @Test
+  public void testEstimatedSizeBytes() throws Exception {
+    long start = 300;
+    long end = 1000;
+    long minBundleSize = 150;
+    CoarseRangeSource testSource = new CoarseRangeSource(start, end, minBundleSize, 1);
+    PipelineOptions options = PipelineOptionsFactory.create();
+    assertEquals(
+        (end - start) * testSource.getBytesPerOffset(), testSource.getEstimatedSizeBytes(options));
+  }
+
   @Test
   public void testMinBundleSize() throws Exception {
     long start = 300;

From 5ea6f90652b8cccd0a1974c3388435f4f61d362c Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Wed, 19 Aug 2015 13:53:01 -0700
Subject: [PATCH 0954/1541] Provide default for
 UnboundedSource.getCurrentRecordId

This method only needs to be overridden if requiresDeduping returns true

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101056346
---
 .../cloud/dataflow/sdk/io/UnboundedSource.java      | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
index baaccf49b6a76..dbc36a43d5117 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
@@ -123,6 +123,8 @@ public interface CheckpointMark {
    */
   @Experimental(Experimental.Kind.SOURCE_SINK)
   public abstract static class UnboundedReader<OutputT> extends Source.Reader<OutputT> {
+    private static final byte[] EMPTY = new byte[0];
+
     /**
      * Initializes the reader and advances the reader to the first record.
      *
@@ -153,6 +155,9 @@ public abstract static class UnboundedReader<OutputT> extends Source.Reader<Outp
      * Returns a unique identifier for the current record.  This should be the same for each
      * instance of the same logical record read from the underlying data source.
      *
+     * <p>It is only necessary to override this if {@link #requiresDeduping} has been overridden to
+     * return true.
+     *
      * <p>For example, this could be a hash of the record contents, or a logical ID present in
      * the record.  If this is generated as a hash of the record contents, it should be at least 16
      * bytes (128 bits) to avoid collisions.
@@ -164,7 +169,13 @@ public abstract static class UnboundedReader<OutputT> extends Source.Reader<Outp
      *         {@link #start} or {@link #advance} wasn't called, or if the last {@link #start} or
      *         {@link #advance} returned {@code false}.
      */
-    public abstract byte[] getCurrentRecordId() throws NoSuchElementException;
+    public byte[] getCurrentRecordId() throws NoSuchElementException {
+      if (getCurrentSource().requiresDeduping()) {
+        throw new IllegalStateException(
+            "getCurrentRecordId() must be overridden if requiresDeduping returns true()");
+      }
+      return EMPTY;
+    }
 
     /**
      * Returns a lower bound on timestamps of future elements read by this reader.

From 1778f13ac8743de56d057e305fb35015106f68a2 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Wed, 19 Aug 2015 19:12:46 -0700
Subject: [PATCH 0955/1541] Allow UnboundedSources to report backlog

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101086369
---
 .../dataflow/sdk/io/UnboundedSource.java      | 27 ++++++++++++++
 .../BasicSerializableSourceFormat.java        |  7 ++++
 .../worker/StreamingDataflowWorker.java       |  5 ++-
 .../util/StreamingModeExecutionContext.java   | 36 ++++++++++++++-----
 .../BasicSerializableSourceFormatTest.java    | 18 ++++++++--
 .../sdk/runners/dataflow/CountingSource.java  |  5 +++
 .../StreamingModeExecutionContextTest.java    |  4 +--
 7 files changed, 89 insertions(+), 13 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
index dbc36a43d5117..afce457c46f0a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
@@ -216,6 +216,33 @@ public byte[] getCurrentRecordId() throws NoSuchElementException {
      */
     public abstract CheckpointMark getCheckpointMark();
 
+    /**
+     * Constant representing an unknown amount of backlog.
+     */
+    public static final long BACKLOG_UNKNOWN = -1L;
+
+    /**
+     * Returns the size of the backlog of unread data in the underlying data source represented by
+     * this split of this source.
+     *
+     * <p>One of this or {@link #getTotalBacklogBytes} should be overridden in order to allow the
+     * runner to scale the amount of resources allocated to the pipeline.
+     */
+    public long getSplitBacklogBytes() {
+      return BACKLOG_UNKNOWN;
+    }
+
+    /**
+     * Returns the size of the backlog of unread data in the underlying data source represented by
+     * all splits of this source.
+     *
+     * <p>One of this or {@link #getSplitBacklogBytes} should be overridden in order to allow the
+     * runner to scale the amount of resources allocated to the pipeline.
+     */
+    public long getTotalBacklogBytes() {
+      return BACKLOG_UNKNOWN;
+    }
+
     /**
      * Returns the {@link UnboundedSource} that created this reader.  This will not change over the
      * life of the reader.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index 6a12185922b4d..38961a933ccf6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -64,6 +64,7 @@
 import com.google.cloud.dataflow.sdk.values.PValue;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
+import com.google.protobuf.ByteString;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -195,6 +196,12 @@ public Reader.ReaderIterator<WindowedValue<T>> iterator() throws IOException {
     }
   }
 
+  private static final ByteString firstSplitKey = ByteString.copyFromUtf8("0000000000000001");
+
+  public static boolean isFirstUnboundedSourceSplit(ByteString splitKey) {
+    return splitKey.equals(firstSplitKey);
+  }
+
   /**
    * {@link Reader} for reading from {@link UnboundedSource UnboundedSources}.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 1f7beddf08dc5..c083603656fa5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -182,6 +182,7 @@ public static void main(String[] args) throws Exception {
   // Map of user state names to system state names.
   private ConcurrentMap<String, String> stateNameMap;
   private ConcurrentMap<String, String> systemNameToComputationIdMap;
+  private ConcurrentMap<String, String> computationIdToSystemNameMap;
 
   private ThreadFactory threadFactory;
   private BoundedQueueExecutor workUnitExecutor;
@@ -208,6 +209,7 @@ public StreamingDataflowWorker(
     this.commitCallbacks = new ConcurrentHashMap<>();
     this.stateNameMap = new ConcurrentHashMap<>();
     this.systemNameToComputationIdMap = new ConcurrentHashMap<>();
+    this.computationIdToSystemNameMap = new ConcurrentHashMap<>();
     this.threadFactory = new ThreadFactory() {
         @Override
         public Thread newThread(Runnable r) {
@@ -458,7 +460,7 @@ private void process(
       DataflowWorkerLoggingMDC.setStageName(computation);
       WorkerAndContext workerAndContext = mapTaskExecutors.get(computation).poll();
       if (workerAndContext == null) {
-        context = new StreamingModeExecutionContext(
+        context = new StreamingModeExecutionContext(computationIdToSystemNameMap.get(computation),
             stateFetcher, readerCache.get(computation), stateNameMap);
         worker = MapTaskExecutorFactory.create(options, mapTask, context);
         ReadOperation readOperation = worker.getReadOperation();
@@ -624,6 +626,7 @@ private void getConfig(String computation) {
     for (Windmill.GetConfigResponse.SystemNameToComputationIdMapEntry entry :
         response.getSystemNameToComputationIdMapList()) {
       systemNameToComputationIdMap.put(entry.getSystemName(), entry.getComputationId());
+      computationIdToSystemNameMap.put(entry.getComputationId(), entry.getSystemName());
     }
     for (String serializedMapTask : response.getCloudWorksList()) {
       try {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index 41843c1aea4e6..6c6a2aaeab8a3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -19,6 +19,7 @@
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
+import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
 import com.google.cloud.dataflow.sdk.runners.worker.DataflowExecutionContext;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
@@ -49,22 +50,25 @@
  * {@link ExecutionContext} for use in streaming mode.
  */
 public class StreamingModeExecutionContext extends DataflowExecutionContext {
+  private final String stageName;
+  private final StateFetcher stateFetcher;
+  private final Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
+  // Per-key cache of active Reader objects in use by this process.
+  private final ConcurrentMap<ByteString, UnboundedSource.UnboundedReader<?>> readerCache;
+  private final ConcurrentMap<String, String> stateNameMap;
+
   private Windmill.WorkItem work;
-  private StateFetcher stateFetcher;
+  private Instant inputDataWatermark;
+  private WindmillStateReader stateReader;
   private Windmill.WorkItemCommitRequest.Builder outputBuilder;
-  private Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
-
-  // Per-key cache of active Reader objects in use by this process.
-  private ConcurrentMap<ByteString, UnboundedSource.UnboundedReader<?>> readerCache;
   private UnboundedSource.UnboundedReader<?> activeReader;
-  private ConcurrentMap<String, String> stateNameMap;
-  private WindmillStateReader stateReader;
-  private Instant inputDataWatermark;
 
   public StreamingModeExecutionContext(
+      String stageName,
       StateFetcher stateFetcher,
       ConcurrentMap<ByteString, UnboundedSource.UnboundedReader<?>> readerCache,
       ConcurrentMap<String, String> stateNameMap) {
+    this.stageName = stageName;
     this.stateFetcher = stateFetcher;
     this.sideInputCache = new HashMap<>();
     this.readerCache = readerCache;
@@ -224,6 +228,22 @@ public void run() {
       }
       outputBuilder.setSourceWatermark(TimeUnit.MILLISECONDS.toMicros(watermark.getMillis()));
 
+      long backlogBytes = activeReader.getSplitBacklogBytes();
+      if (backlogBytes == UnboundedSource.UnboundedReader.BACKLOG_UNKNOWN
+          && BasicSerializableSourceFormat.isFirstUnboundedSourceSplit(getSerializedKey())) {
+        // Only call getTotalBacklogBytes() on the first split.
+        backlogBytes = activeReader.getTotalBacklogBytes();
+      }
+      if (backlogBytes != UnboundedSource.UnboundedReader.BACKLOG_UNKNOWN) {
+        outputBuilder.addCounterUpdates(
+            Windmill.Counter.newBuilder()
+            .setName("dataflow_backlog_size-" + stageName)
+            .setKind(Windmill.Counter.Kind.SUM)
+            .setIntScalar(backlogBytes)
+            .setCumulative(true)
+            .build());
+      }
+
       readerCache.put(getSerializedKey(), activeReader);
     }
     return callbacks;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 2b34fa8afcc0c..780960a23fe11 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -625,8 +625,8 @@ public void testUnboundedSplits() throws Exception {
 
   @Test
   public void testReadUnboundedReader() throws Exception {
-    StreamingModeExecutionContext context = new StreamingModeExecutionContext(
-        null, new ConcurrentHashMap<ByteString, UnboundedSource.UnboundedReader<?>>(), null);
+    StreamingModeExecutionContext context = new StreamingModeExecutionContext("stageName", null,
+        new ConcurrentHashMap<ByteString, UnboundedSource.UnboundedReader<?>>(), null);
 
     DataflowPipelineOptions options =
         PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
@@ -685,6 +685,20 @@ public void testReadUnboundedReader() throws Exception {
               .getSourceStateUpdates()
               .getFinalizeIdsList()
               .size());
+
+      Windmill.Counter backlog = getCounter(context, "dataflow_backlog_size-stageName");
+      assertEquals(7L, backlog.getIntScalar());
+      assertTrue(backlog.getCumulative());
+      assertEquals(Windmill.Counter.Kind.SUM, backlog.getKind());
+    }
+  }
+
+  private Windmill.Counter getCounter(StreamingModeExecutionContext context, String name) {
+    for (Windmill.Counter counter : context.getOutputBuilder().getCounterUpdatesList()) {
+      if (counter.getName().equals(name)) {
+        return counter;
+      }
     }
+    return null;
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
index 2ed4edabb2325..669cb9386be06 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
@@ -194,6 +194,11 @@ public Instant getWatermark() {
     public CheckpointMark getCheckpointMark() {
       return new CounterMark(current);
     }
+
+    @Override
+    public long getSplitBacklogBytes() {
+      return 7L;
+    }
   }
 
   @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
index a26fee2031abb..6ffa4e0b8084b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
@@ -66,7 +66,7 @@ private static TupleTag<Iterable<WindowedValue<String>>> newStringTag() {
   @Test
   public void testTimerInternalsSetTimer() {
     StreamingModeExecutionContext executionContext = new StreamingModeExecutionContext(
-        stateFetcher, null, new ConcurrentHashMap<String, String>());
+        "stageName", stateFetcher, null, new ConcurrentHashMap<String, String>());
 
     Windmill.WorkItemCommitRequest.Builder outputBuilder =
         Windmill.WorkItemCommitRequest.newBuilder();
@@ -94,7 +94,7 @@ public void testTimerInternalsSetTimer() {
   @Test
   public void testSideInputReaderReconstituted() {
     StreamingModeExecutionContext executionContext =
-        new StreamingModeExecutionContext(stateFetcher, null, null);
+        new StreamingModeExecutionContext("stageName", stateFetcher, null, null);
 
     PCollectionView<String> preview1 = PCollectionViewTesting.<String, String>testingView(
         newStringTag(), new ConstantViewFn<String, String>("view1"), StringUtf8Coder.of());

From 1ea5eaed47d32e45adf3020506310b58bca1feb2 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 19 Aug 2015 20:41:04 -0700
Subject: [PATCH 0956/1541] Add null check in ParDo immutability checking

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101090100
---
 .../com/google/cloud/dataflow/sdk/transforms/ParDo.java   | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index a20ab167c8105..98a03358748e8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -1221,9 +1221,7 @@ public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
               MutationDetectors.forValueWithCoder(
                   output.getValue(), outputs.get(tag).getCoder());
           MutationDetector priorDetector = mutationDetectorForTag.put(tag, newDetector);
-          if (priorDetector != null) {
-            verifyOutputUnmodified(priorDetector);
-          }
+          verifyOutputUnmodified(priorDetector);
         } catch (CoderException e) {
           throw new UserCodeException(e);
         }
@@ -1250,6 +1248,10 @@ public void verifyLatestOutputsUnmodified() {
      * to consolidate null checking to this method.
      */
     private <T> void verifyOutputUnmodified(@Nullable MutationDetector detector) {
+      if (detector == null) {
+        return;
+      }
+
       try {
         detector.verifyUnmodified();
       } catch (IllegalMutationException exn) {

From 5da6e97e3b4fcb28fa910d227358b9f4a4266c7e Mon Sep 17 00:00:00 2001
From: ccy <ccy@google.com>
Date: Thu, 20 Aug 2015 12:27:53 -0700
Subject: [PATCH 0957/1541] Add reset policy to
 AttemptAndTimeBoundedExponentialBackOff

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101147726
---
 .../sdk/runners/DataflowPipelineJob.java      |  16 ++-
 ...temptAndTimeBoundedExponentialBackOff.java |  88 ++++++++++++--
 ...tAndTimeBoundedExponentialBackOffTest.java | 111 +++++++++++++++++-
 3 files changed, 195 insertions(+), 20 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
index 444952ec9c28a..90d5fd6c4b97b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
@@ -197,12 +197,16 @@ State waitToFinish(
     MonitoringUtil monitor = new MonitoringUtil(projectId, dataflowClient);
 
     long lastTimestamp = 0;
-    BackOff backoff = timeUnit.toMillis(timeToWait) > 0
-        ? new AttemptAndTimeBoundedExponentialBackOff(
-            MESSAGES_POLLING_ATTEMPTS, MESSAGES_POLLING_INTERVAL, timeUnit.toMillis(timeToWait),
-            nanoClock)
-        : new AttemptBoundedExponentialBackOff(
-            MESSAGES_POLLING_ATTEMPTS, MESSAGES_POLLING_INTERVAL);
+    BackOff backoff =
+        timeUnit.toMillis(timeToWait) > 0
+            ? new AttemptAndTimeBoundedExponentialBackOff(
+                MESSAGES_POLLING_ATTEMPTS,
+                MESSAGES_POLLING_INTERVAL,
+                timeUnit.toMillis(timeToWait),
+                AttemptAndTimeBoundedExponentialBackOff.ResetPolicy.ATTEMPTS,
+                nanoClock)
+            : new AttemptBoundedExponentialBackOff(
+                MESSAGES_POLLING_ATTEMPTS, MESSAGES_POLLING_INTERVAL);
     State state;
     do {
       // Get the state of the job before listing messages. This ensures we always fetch job
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOff.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOff.java
index 2484f1c78234e..e94d414fc0725 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOff.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOff.java
@@ -26,16 +26,32 @@
  * Extension of {@link AttemptBoundedExponentialBackOff} that bounds the total time that the backoff
  * is happening as well as the amount of retries. Acts exactly as a AttemptBoundedExponentialBackOff
  * unless the time interval has expired since the object was created. At this point, it will always
- * return BackOff.STOP. Note that reset does not reset the timer.
+ * return BackOff.STOP. Calling reset() resets both the timer and the number of retry attempts,
+ * unless a custom ResetPolicy (ResetPolicy.ATTEMPTS or ResetPolicy.TIMER) is passed to the
+ * constructor.
  *
  * <p>Implementation is not thread-safe.
  */
 public class AttemptAndTimeBoundedExponentialBackOff extends AttemptBoundedExponentialBackOff {
   private long endTimeMillis;
+  private long maximumTotalWaitTimeMillis;
+  private ResetPolicy resetPolicy;
   private final NanoClock nanoClock;
-  // NanoClock.SYSTEM has a max elapsed time of 292 years or 2^63 ns.
+  // NanoClock.SYSTEM has a max elapsed time of 292 years or 2^63 ns.  Here, we choose 2^53 ns as
+  // a smaller but still huge limit.
   private static final long MAX_ELAPSED_TIME_MILLIS = 1L << 53;
 
+  /**
+   * A ResetPolicy controls the behavior of this BackOff when reset() is called.  By default, both
+   * the number of attempts and the time bound for the BackOff are reset, but an alternative
+   * ResetPolicy may be set to only reset one of these two.
+   */
+  public static enum ResetPolicy {
+    ALL,
+    ATTEMPTS,
+    TIMER
+  }
+
   /**
    * Constructs an instance of AttemptAndTimeBoundedExponentialBackoff.
    *
@@ -46,9 +62,34 @@ public class AttemptAndTimeBoundedExponentialBackOff extends AttemptBoundedExpon
    */
   public AttemptAndTimeBoundedExponentialBackOff(
       int maximumNumberOfAttempts, long initialIntervalMillis, long maximumTotalWaitTimeMillis) {
-    this(maximumNumberOfAttempts,
+    this(
+        maximumNumberOfAttempts,
+        initialIntervalMillis,
+        maximumTotalWaitTimeMillis,
+        ResetPolicy.ALL,
+        NanoClock.SYSTEM);
+  }
+
+  /**
+   * Constructs an instance of AttemptAndTimeBoundedExponentialBackoff.
+   *
+   * @param maximumNumberOfAttempts The maximum number of attempts it will make.
+   * @param initialIntervalMillis The original interval to wait between attempts in milliseconds.
+   * @param maximumTotalWaitTimeMillis The maximum total time that this object will
+   *    allow more attempts in milliseconds.
+   * @param resetPolicy The ResetPolicy specifying the properties of this BackOff that are subject
+   *    to being reset.
+   */
+  public AttemptAndTimeBoundedExponentialBackOff(
+      int maximumNumberOfAttempts,
+      long initialIntervalMillis,
+      long maximumTotalWaitTimeMillis,
+      ResetPolicy resetPolicy) {
+    this(
+        maximumNumberOfAttempts,
         initialIntervalMillis,
         maximumTotalWaitTimeMillis,
+        resetPolicy,
         NanoClock.SYSTEM);
   }
 
@@ -59,21 +100,50 @@ public AttemptAndTimeBoundedExponentialBackOff(
    * @param initialIntervalMillis The original interval to wait between attempts in milliseconds.
    * @param maximumTotalWaitTimeMillis The maximum total time that this object will
    *    allow more attempts in milliseconds.
+   * @param resetPolicy The ResetPolicy specifying the properties of this BackOff that are subject
+   *    to being reset.
    * @param nanoClock clock used to measure the time that has passed.
    */
   public AttemptAndTimeBoundedExponentialBackOff(
-      int maximumNumberOfAttempts, long initialIntervalMillis,
-      long maximumTotalWaitTimeMillis, NanoClock nanoClock) {
+      int maximumNumberOfAttempts,
+      long initialIntervalMillis,
+      long maximumTotalWaitTimeMillis,
+      ResetPolicy resetPolicy,
+      NanoClock nanoClock) {
     super(maximumNumberOfAttempts, initialIntervalMillis);
-    Preconditions.checkArgument(nanoClock != null, "NanoClock may not be null");
-    Preconditions.checkArgument(maximumTotalWaitTimeMillis > 0,
-        "Maximum total wait time must be greater than zero.");
-    Preconditions.checkArgument(maximumTotalWaitTimeMillis < MAX_ELAPSED_TIME_MILLIS,
+    Preconditions.checkArgument(
+        maximumTotalWaitTimeMillis > 0, "Maximum total wait time must be greater than zero.");
+    Preconditions.checkArgument(
+        maximumTotalWaitTimeMillis < MAX_ELAPSED_TIME_MILLIS,
         "Maximum total wait time must be less than " + MAX_ELAPSED_TIME_MILLIS + " milliseconds");
+    Preconditions.checkArgument(resetPolicy != null, "resetPolicy may not be null");
+    Preconditions.checkArgument(nanoClock != null, "nanoClock may not be null");
+    this.maximumTotalWaitTimeMillis = maximumTotalWaitTimeMillis;
+    this.resetPolicy = resetPolicy;
     this.nanoClock = nanoClock;
+    // Set the end time for this BackOff.  Note that we cannot simply call reset() here since the
+    // resetPolicy may not be set to reset the time bound.
     endTimeMillis = getTimeMillis() + maximumTotalWaitTimeMillis;
   }
 
+  @Override
+  public void reset() {
+    // reset() is called in the constructor of the parent class before resetPolicy and nanoClock are
+    // set.  In this case, we call the parent class's reset() method and return.
+    if (resetPolicy == null) {
+      super.reset();
+      return;
+    }
+    // Reset the number of attempts.
+    if (resetPolicy == ResetPolicy.ALL || resetPolicy == ResetPolicy.ATTEMPTS) {
+      super.reset();
+    }
+    // Reset the time bound.
+    if (resetPolicy == ResetPolicy.ALL || resetPolicy == ResetPolicy.TIMER) {
+      endTimeMillis = getTimeMillis() + maximumTotalWaitTimeMillis;
+    }
+  }
+
   public void setEndtimeMillis(long endTimeMillis) {
     this.endTimeMillis = endTimeMillis;
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOffTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOffTest.java
index 003e6fb4a7572..1d1f27f61b32c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOffTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOffTest.java
@@ -62,7 +62,13 @@ public void testUsingInvalidMaximumNumberOfRetries() throws Exception {
 
   @Test
   public void testThatFixedNumberOfAttemptsExits() throws Exception {
-    BackOff backOff = new AttemptAndTimeBoundedExponentialBackOff(3, 500L, 1000L, fastClock);
+    BackOff backOff =
+        new AttemptAndTimeBoundedExponentialBackOff(
+            3,
+            500L,
+            1000L,
+            AttemptAndTimeBoundedExponentialBackOff.ResetPolicy.ALL,
+            fastClock);
     assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
     assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
     assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
@@ -71,7 +77,12 @@ public void testThatFixedNumberOfAttemptsExits() throws Exception {
   @Test
   public void testThatResettingAllowsReuse() throws Exception {
     AttemptBoundedExponentialBackOff backOff =
-        new AttemptAndTimeBoundedExponentialBackOff(3, 500, 1000L, fastClock);
+        new AttemptAndTimeBoundedExponentialBackOff(
+            3,
+            500,
+            1000L,
+            AttemptAndTimeBoundedExponentialBackOff.ResetPolicy.ALL,
+            fastClock);
     assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
     assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
     assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
@@ -79,19 +90,108 @@ public void testThatResettingAllowsReuse() throws Exception {
     assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
     assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
     assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
+
+    backOff =
+        new AttemptAndTimeBoundedExponentialBackOff(
+            30,
+            500,
+            1000L,
+            AttemptAndTimeBoundedExponentialBackOff.ResetPolicy.ALL,
+            fastClock);
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
+    fastClock.sleep(2000L);
+    assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
+    backOff.reset();
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
+  }
+
+  @Test
+  public void testThatResettingAttemptsAllowsReuse() throws Exception {
+    AttemptBoundedExponentialBackOff backOff =
+        new AttemptAndTimeBoundedExponentialBackOff(
+            3,
+            500,
+            1000,
+            AttemptAndTimeBoundedExponentialBackOff.ResetPolicy.ATTEMPTS,
+            fastClock);
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
+    assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
+    backOff.reset();
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
+    assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
+  }
+
+  @Test
+  public void testThatResettingAttemptsDoesNotAllowsReuse() throws Exception {
+    AttemptBoundedExponentialBackOff backOff =
+        new AttemptAndTimeBoundedExponentialBackOff(
+            30,
+            500,
+            1000L,
+            AttemptAndTimeBoundedExponentialBackOff.ResetPolicy.ATTEMPTS,
+            fastClock);
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
+    fastClock.sleep(2000L);
+    assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
+    backOff.reset();
+    assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
+  }
+
+  @Test
+  public void testThatResettingTimerAllowsReuse() throws Exception {
+    AttemptBoundedExponentialBackOff backOff =
+        new AttemptAndTimeBoundedExponentialBackOff(
+            30,
+            500,
+            1000L,
+            AttemptAndTimeBoundedExponentialBackOff.ResetPolicy.TIMER,
+            fastClock);
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
+    fastClock.sleep(2000L);
+    assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
+    backOff.reset();
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(561L), lessThan(1688L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(843L), lessThan(2531L)));
+  }
+
+  @Test
+  public void testThatResettingTimerDoesNotAllowReuse() throws Exception {
+    AttemptBoundedExponentialBackOff backOff =
+        new AttemptAndTimeBoundedExponentialBackOff(
+            3,
+            500,
+            1000L,
+            AttemptAndTimeBoundedExponentialBackOff.ResetPolicy.TIMER,
+            fastClock);
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(249L), lessThan(751L)));
+    assertThat(backOff.nextBackOffMillis(), allOf(greaterThan(374L), lessThan(1126L)));
+    assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
+    backOff.reset();
+    assertEquals(BackOff.STOP, backOff.nextBackOffMillis());
   }
 
   @Test
   public void testTimeBound() throws Exception {
     AttemptBoundedExponentialBackOff backOff =
-        new AttemptAndTimeBoundedExponentialBackOff(3, 500L, 5L, fastClock);
+        new AttemptAndTimeBoundedExponentialBackOff(
+            3, 500L, 5L, AttemptAndTimeBoundedExponentialBackOff.ResetPolicy.ALL, fastClock);
     assertEquals(backOff.nextBackOffMillis(), 5L);
   }
 
   @Test
   public void testAtMaxAttempts() throws Exception {
     AttemptBoundedExponentialBackOff backOff =
-        new AttemptAndTimeBoundedExponentialBackOff(3, 500L, 1000L, fastClock);
+        new AttemptAndTimeBoundedExponentialBackOff(
+            3,
+            500L,
+            1000L,
+            AttemptAndTimeBoundedExponentialBackOff.ResetPolicy.ALL,
+            fastClock);
     assertFalse(backOff.atMaxAttempts());
     backOff.nextBackOffMillis();
     assertFalse(backOff.atMaxAttempts());
@@ -103,7 +203,8 @@ public void testAtMaxAttempts() throws Exception {
   @Test
   public void testAtMaxTime() throws Exception {
     AttemptBoundedExponentialBackOff backOff =
-        new AttemptAndTimeBoundedExponentialBackOff(3, 500L, 1L, fastClock);
+        new AttemptAndTimeBoundedExponentialBackOff(
+            3, 500L, 1L, AttemptAndTimeBoundedExponentialBackOff.ResetPolicy.ALL, fastClock);
     fastClock.sleep(2);
     assertTrue(backOff.atMaxAttempts());
     assertEquals(BackOff.STOP, backOff.nextBackOffMillis());

From 4ce047c90dd5627e00c5af0f50842226058b25f6 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 21 Aug 2015 09:42:38 -0700
Subject: [PATCH 0958/1541] DirectModeExecutionContext extends
 BaseExecutionContext

This decouples the DirectModeExecutionContext from
BatchModeExecutionContext. The common functionality
is already factored into BaseExecutionContext.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101227064
---
 .../dataflow/sdk/transforms/DoFnTester.java   |  2 +-
 .../sdk/util/DirectModeExecutionContext.java  | 35 +++++++++++--------
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index c50b3d373351e..8ad21ded3057f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -372,7 +372,7 @@ void initializeState() {
         outputManager,
         mainOutputTag,
         sideOutputTags,
-        DirectModeExecutionContext.create().createStepContext("stepName", "stepName", null),
+        DirectModeExecutionContext.create().getOrCreateStepContext("stepName", "stepName", null),
         counterSet.getAddCounterMutator(),
         WindowingStrategy.globalDefault());
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
index ee221776d45f7..d9474a61130a4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
@@ -22,19 +22,20 @@
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 
-import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
 /**
  * {@link ExecutionContext} for use in direct mode.
  */
-public class DirectModeExecutionContext extends BatchModeExecutionContext {
+public class DirectModeExecutionContext extends BaseExecutionContext {
 
-  private List<ValueWithMetadata<?>> output = new ArrayList<>();
-  private Map<TupleTag<?>, List<ValueWithMetadata<?>>> sideOutputs = new HashMap<>();
+  private Object key;
+  private List<ValueWithMetadata<?>> output = Lists.newArrayList();
+  private Map<TupleTag<?>, List<ValueWithMetadata<?>>> sideOutputs = Maps.newHashMap();
 
   protected DirectModeExecutionContext() {}
 
@@ -43,18 +44,22 @@ public static DirectModeExecutionContext create() {
   }
 
   @Override
-  public ExecutionContext.StepContext createStepContext(
+  protected ExecutionContext.StepContext createStepContext(
       String stepName, String transformName, StateSampler stateSampler) {
-    return new StepContext(stepName, transformName);
+    return new StepContext(this, stepName, transformName);
   }
 
-  @Override
-  protected void switchStateKey(Object newKey) {
+  public Object getKey() {
+    return key;
+  }
+
+  public void setKey(Object newKey) {
     // The direct mode runner may reorder elements, so we need to keep
     // around the state used for each key.
     for (ExecutionContext.StepContext stepContext : getAllStepContexts()) {
       ((StepContext) stepContext).switchKey(newKey);
     }
+    key = newKey;
   }
 
   @Override
@@ -66,7 +71,7 @@ public void noteOutput(WindowedValue<?> outputElem) {
   public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> outputElem) {
     List<ValueWithMetadata<?>> output = sideOutputs.get(tag);
     if (output == null) {
-      output = new ArrayList<>();
+      output = Lists.newArrayList();
       sideOutputs.put(tag, output);
     }
     output.add(ValueWithMetadata.of(outputElem).withKey(getKey()));
@@ -84,20 +89,20 @@ public <T> List<ValueWithMetadata<T>> getSideOutput(TupleTag<T> tag) {
       List<ValueWithMetadata<T>> typedOutput = (List) sideOutputs.get(tag);
       return typedOutput;
     } else {
-      return new ArrayList<>();
+      return Lists.newArrayList();
     }
   }
 
   /**
    * {@link ExecutionContext.StepContext} used in direct mode.
    */
-  class StepContext extends BaseExecutionContext.StepContext {
+  static class StepContext extends BaseExecutionContext.StepContext {
 
-    private final Map<Object, InMemoryStateInternals> stateInternals = new HashMap<>();
+    private final Map<Object, InMemoryStateInternals> stateInternals = Maps.newHashMap();
     private InMemoryStateInternals currentStateInternals = null;
 
-    private StepContext(String stepName, String transformName) {
-      super(DirectModeExecutionContext.this, stepName, transformName);
+    private StepContext(ExecutionContext executionContext, String stepName, String transformName) {
+      super(executionContext, stepName, transformName);
       switchKey(null);
     }
 

From f847a1a59b3cc0d31352dad121ed05098fd4f7c7 Mon Sep 17 00:00:00 2001
From: hokira <hokira@google.com>
Date: Fri, 21 Aug 2015 16:14:58 -0700
Subject: [PATCH 0959/1541] Add tests for GeneratorUnboundedSource

Make sure GeneratorUnboundedSource can generate deterministic input regardless of number of sources.

Fix a bug of missing records in GeneratorUnboundedSource

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101262176
---
 .../com/google/cloud/dataflow/sdk/io/SourceTestUtils.java     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
index 2eb2a877aae6b..1dacb3d0b198f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
@@ -43,7 +43,7 @@ public class SourceTestUtils {
    * consistent with equality of encoded format.
    * So we use this {@link Coder#structuralValue} to compare elements read by sources.
    */
-  private static <T> List<Object> createStructuralValues(Coder<T> coder, List<T> list)
+  public static <T> List<Object> createStructuralValues(Coder<T> coder, List<T> list)
       throws Exception {
     List<Object> result = new ArrayList<>();
     for (T elem : list) {
@@ -63,7 +63,7 @@ public static <T> List<T> readFromSource(BoundedSource<T> source, PipelineOption
   }
 
   /**
-   * Reads all elements from the given unstarted {@link Source.Reader}.
+   * Reads all elements from the given unstarted {@link BoundedSource.BoundedReader}.
    */
   public static <T> List<T> readFromUnstartedReader(BoundedSource.BoundedReader<T> reader)
       throws IOException {

From 79e46b47f50b390e81ac692e39b88baaff7ac3a6 Mon Sep 17 00:00:00 2001
From: danayakapura <danayakapura@google.com>
Date: Fri, 21 Aug 2015 16:40:52 -0700
Subject: [PATCH 0960/1541] Add an example of trigger usage

Example to show how to use triggers to produce partial (Speculative) results before all data is processed and to control when updated results are produced when data arrives late.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101264201
---
 .../examples/cookbook/TriggerExample.java     | 573 ++++++++++++++++++
 .../examples/cookbook/TriggerExampleTest.java | 141 +++++
 2 files changed, 714 insertions(+)
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
 create mode 100644 examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/TriggerExampleTest.java

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
new file mode 100644
index 0000000000000..4b318fa305c8c
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
@@ -0,0 +1,573 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.cookbook;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
+import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
+import com.google.cloud.dataflow.examples.common.PubsubFileInjector;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterEach;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterProcessingTime;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * This example illustrates the basic concepts behind triggering. It shows how to use different
+ * trigger definitions to produce partial (speculative) results before all the data is processed and
+ * to control when updated results are produced for late data. The example performs a streaming
+ * analysis of the data coming in from PubSub and writes the results to BigQuery. It divides the
+ * data into {@link Window windows} to be processed, and demonstrates using various kinds of {@link
+ * Trigger triggers} to control when the results for each window are emitted.
+ *
+ * <p> This example uses a portion of real traffic data from San Diego freeways. It contains
+ * readings from sensor stations set up along each freeway. Each sensor reading includes a
+ * calculation of the 'total flow' across all lanes in that freeway direction.
+ *
+ * <p> Concepts:
+ * <pre>
+ *   1. The default triggering behavior
+ *   2. Late data with the default trigger
+ *   3. How to get speculative estimates
+ *   4. Combining late data and speculative estimates
+ * </pre>
+ *
+ * <p> Before running this example, it will be useful to familiarize yourself with Dataflow triggers
+ * and understand the concept of 'late data',
+ * See:  <a href="https://cloud.google.com/dataflow/model/triggers">
+ * https://cloud.google.com/dataflow/model/triggers </a> and
+ * <a href="https://cloud.google.com/dataflow/model/windowing#Advanced">
+ * https://cloud.google.com/dataflow/model/windowing#Advanced </a>
+ *
+ * <p> The example pipeline reads data from a Pub/Sub topic. By default, running the example will
+ * also run an auxiliary pipeline to inject data from the default {@code --input} file to the
+ * {@code --pubsubTopic}. The auxiliary pipeline puts a timestamp on the injected data so that the
+ * example pipeline can operate on <i>event time</i> (rather than arrival time). The auxiliary
+ * pipeline also randomly simulates late data, by setting the timestamps of some of the data
+ * elements to be in the past. You may override the default {@code --input} with the file of your
+ * choosing or set {@code --input=""} which will disable the automatic Pub/Sub injection, and allow
+ * you to use a separate tool to publish to the given topic.
+ *
+ * <p> The example is configured to use the default Pub/Sub topic and the default BigQuery table
+ * from the example common package (there are no defaults for a general Dataflow pipeline).
+ * You can override them by using the {@code --pubsubTopic}, {@code --bigQueryDataset}, and
+ * {@code --bigQueryTable} options. If the Pub/Sub topic or the BigQuery table do not exist,
+ * the example will try to create them.
+ *
+ * <p> The pipeline outputs its results to a BigQuery table.
+ * Here are some queries you can use to see interesting results:
+ * Replace {@code <enter_table_name>} in the query below with the name of the BigQuery table.
+ * Replace {@code <enter_window_interval>} in the query below with the window interval.
+ *
+ * <p> To see the results of the default trigger,
+ * Note: When you start up your pipeline, you'll initially see results from 'late' data. Wait after
+ * the window duration, until the first pane of non-late data has been emitted, to see more
+ * interesting results.
+ * {@code SELECT * FROM enter_table_name WHERE trigger_type = "default" ORDER BY window DESC}
+ *
+ * <p> To see the late data i.e. dropped by the default trigger,
+ * {@code SELECT * FROM <enter_table_name> WHERE trigger_type = "withAllowedLateness" and
+ * (timing = "LATE" or timing = "ON_TIME") and freeway = "5" ORDER BY window DESC, processing_time}
+ *
+ * <p>To see the the difference between accumulation mode and discarding mode,
+ * {@code SELECT * FROM <enter_table_name> WHERE (timing = "LATE" or timing = "ON_TIME") AND
+ * (trigger_type = "withAllowedLateness" or trigger_type = "sequential") and freeway = "5" ORDER BY
+ * window DESC, processing_time}
+ *
+ * <p> To see speculative results every minute,
+ * {@code SELECT * FROM <enter_table_name> WHERE trigger_type = "speculative" and freeway = "5"
+ * ORDER BY window DESC, processing_time}
+ *
+ * <p> To see speculative results every five minutes after the end of the window
+ * {@code SELECT * FROM <enter_table_name> WHERE trigger_type = "sequential" and timing != "EARLY"
+ * and freeway = "5" ORDER BY window DESC, processing_time}
+ *
+ * <p> To see the first and the last pane for a freeway in a window for all the trigger types,
+ * {@code SELECT * FROM <enter_table_name> WHERE (isFirst = true or isLast = true) ORDER BY window}
+ *
+ * <p> To reduce the number of results for each query we can add additional where clauses.
+ * For examples, To see the results of the default trigger,
+ * {@code SELECT * FROM <enter_table_name> WHERE trigger_type = "default" AND freeway = "5" AND
+ * window = "<enter_window_interval>"}
+ *
+ * <p> The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
+ * and then exits.
+ */
+
+public class TriggerExample {
+  //Numeric value of fixed window duration, in minutes
+  public static final int WINDOW_DURATION = 30;
+  // Constants used in triggers.
+  // Speeding up ONE_MINUTE or FIVE_MINUTES helps you get an early approximation of results.
+  // ONE_MINUTE is used only with processing time before the end of the window
+  public static final Duration ONE_MINUTE = Duration.standardMinutes(1);
+  // FIVE_MINUTES is used only with processing time after the end of the window
+  public static final Duration FIVE_MINUTES = Duration.standardMinutes(5);
+  // ONE_DAY is used to specify the amount of lateness allowed for the data elements.
+  public static final Duration ONE_DAY = Duration.standardDays(1);
+
+  /**
+   * This transform demonstrates using triggers to control when data is produced for each window
+   * Consider an example to understand the results genrerated by each type of trigger.
+   * The example uses "freeway" as the key. Event time is the timestamp associated with the data
+   * element and processing time is the time when the data element gets processed in the pipeline.
+   * For freeway 5, suppose there are 10 elements in the [10:00:00, 10:30:00) window.
+   * Key (freeway) | Value (total_flow) | event time | processing time
+   * 5             | 50                 | 10:00:03   | 10:00:47
+   * 5             | 30                 | 10:01:00   | 10:01:03
+   * 5             | 30                 | 10:02:00   | 11:07:00
+   * 5             | 20                 | 10:04:10   | 10:05:15
+   * 5             | 60                 | 10:05:00   | 11:03:00
+   * 5             | 20                 | 10:05:01   | 11.07:30
+   * 5             | 60                 | 10:15:00   | 10:27:15
+   * 5             | 40                 | 10:26:40   | 10:26:43
+   * 5             | 60                 | 10:27:20   | 10:27:25
+   * 5             | 60                 | 10:29:00   | 11:11:00
+   *
+   * <p> Dataflow tracks a watermark which records up to what point in event time the data is
+   * complete. For the purposes of the example, we'll assume the watermark is approximately 15m
+   * behind the current processing time. In practice, the actual value would vary over time based
+   * on the systems knowledge of the current PubSub delay and contents of the backlog (data
+   * that has not yet been processed).
+   *
+   * <p> If the watermark is 15m behind, then the window [10:00:00, 10:30:00) (in event time) would
+   * close at 10:44:59, when the watermark passes 10:30:00.
+   */
+  static class CalculateTotalFlow
+  extends PTransform <PCollection<KV<String, Integer>>, PCollectionList<TableRow>> {
+    private static final long serialVersionUID = 0;
+
+    private int windowDuration;
+
+    CalculateTotalFlow(int windowDuration) {
+      this.windowDuration = windowDuration;
+    }
+
+    @Override
+    public PCollectionList<TableRow> apply(PCollection<KV<String, Integer>> flowInfo) {
+
+      // Concept #1: The default triggering behavior
+      // By default Dataflow uses a trigger which fires when the watermark has passed the end of the
+      // window. This would be written {@code Repeatedly.forever(AfterWatermark.pastEndOfWindow())}.
+
+      // The system also defaults to dropping late data -- data which arrives after the watermark
+      // has passed the event timestamp of the arriving element. This means that the default trigger
+      // will only fire once.
+
+      // Each pane produced by the default trigger with no allowed lateness will be the first and
+      // last pane in the window, and will be ON_TIME.
+
+      // The results for the example above with the default trigger and zero allowed lateness
+      // would be:
+      // Key (freeway) | Value (total_flow) | number_of_records | isFirst | isLast | timing
+      // 5             | 260                | 6                 | true    | true   | ON_TIME
+
+      // At 11:03:00 (processing time) the system watermark may have advanced to 10:54:00. As a
+      // result, when the data record with event time 10:05:00 arrives at 11:03:00, it is considered
+      // late, and dropped.
+
+      PCollection<TableRow> defaultTriggerResults = flowInfo
+          .apply("Default", Window
+              // The default window duration values work well if you're running the default input
+              // file. You may want to adjust the window duration otherwise.
+              .<KV<String, Integer>>into(FixedWindows.of(Duration.standardMinutes(windowDuration)))
+              // The default trigger first emits output when the system's watermark passes the end
+              // of the window.
+              .triggering(Repeatedly.forever(AfterWatermark.pastEndOfWindow()))
+              // Late data is dropped
+              .withAllowedLateness(Duration.ZERO)
+              // Discard elements after emitting each pane.
+              // With no allowed lateness and the specified trigger there will only be a single
+              // pane, so this doesn't have a noticeable effect. See concept 2 for more details.
+              .discardingFiredPanes())
+          .apply(new TotalFlow("default"));
+
+      // Concept #2: Late data with the default trigger
+      // This uses the same trigger as concept #1, but allows data that is up to ONE_DAY late. This
+      // leads to each window staying open for ONE_DAY after the watermark has passed the end of the
+      // window. Any late data will result in an additional pane being fired for that same window.
+
+      // The first pane produced will be ON_TIME and the remaining panes will be LATE.
+      // To definitely get the last pane when the window closes, use
+      // .withAllowedLateness(ONE_DAY, ClosingBehavior.FIRE_ALWAYS).
+
+      // The results for the example above with the default trigger and ONE_DAY allowed lateness
+      // would be:
+      // Key (freeway) | Value (total_flow) | number_of_records | isFirst | isLast | timing
+      // 5             | 260                | 6                 | true    | false  | ON_TIME
+      // 5             | 60                 | 1                 | false   | false  | LATE
+      // 5             | 30                 | 1                 | false   | false  | LATE
+      // 5             | 20                 | 1                 | false   | false  | LATE
+      // 5             | 60                 | 1                 | false   | false  | LATE
+      PCollection<TableRow> withAllowedLatenessResults = flowInfo
+          .apply("WithLateData", Window
+              .<KV<String, Integer>>into(FixedWindows.of(Duration.standardMinutes(windowDuration)))
+              // Late data is emitted as it arrives
+              .triggering(Repeatedly.forever(AfterWatermark.pastEndOfWindow()))
+              // Once the output is produced, the pane is dropped and we start preparing the next
+              // pane for the window
+              .discardingFiredPanes()
+              // Late data is handled up to one day
+              .withAllowedLateness(ONE_DAY))
+          .apply(new TotalFlow("withAllowedLateness"));
+
+      // Concept #3: How to get speculative estimates
+      // We can specify a trigger that fires independent of the watermark, for instance after
+      // ONE_MINUTE of processing time. This allows us to produce speculative estimates before
+      // all the data is available. Since we don't have any triggers that depend on the watermark
+      // we don't get an ON_TIME firing. Instead, all panes are either EARLY or LATE.
+
+      // We also use accumulatingFiredPanes to build up the results across each pane firing.
+
+      // The results for the example above for this trigger would be:
+      // Key (freeway) | Value (total_flow) | number_of_records | isFirst | isLast | timing
+      // 5             | 80                 | 2                 | true    | false  | EARLY
+      // 5             | 100                | 3                 | false   | false  | EARLY
+      // 5             | 260                | 6                 | false   | false  | EARLY
+      // 5             | 320                | 7                 | false   | false  | LATE
+      // 5             | 370                | 9                 | false   | false  | LATE
+      // 5             | 430                | 10                | false   | false  | LATE
+      PCollection<TableRow> speculativeResults = flowInfo
+          .apply("Speculative" , Window
+              .<KV<String, Integer>>into(FixedWindows.of(Duration.standardMinutes(windowDuration)))
+              // Trigger fires every minute.
+              .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()
+                  // Speculative every ONE_MINUTE
+                  .plusDelayOf(ONE_MINUTE)))
+              // After emitting each pane, it will continue accumulating the elements so that each
+              // approximation includes all of the previous data in addition to the newly arrived
+              // data.
+              .accumulatingFiredPanes()
+              .withAllowedLateness(ONE_DAY))
+          .apply(new TotalFlow("speculative"));
+
+      // Concept #4: Combining late data and speculative estimates
+      // We can put the previous concepts together to get EARLY estimates, an ON_TIME result,
+      // and LATE updates based on late data.
+
+      // Each time a triggering condition is satisfied it advances to the next trigger.
+      // If there are new elements this trigger emits a window under following condition:
+      // > Early approximations every minute till the end of the window.
+      // > An on-time firing when the watermark has passed the end of the window
+      // > Every five minutes of late data.
+
+      // Every pane produced will either be EARLY, ON_TIME or LATE.
+
+      // The results for the example above for this trigger would be:
+      // Key (freeway) | Value (total_flow) | number_of_records | isFirst | isLast | timing
+      // 5             | 80                 | 2                 | true    | false  | EARLY
+      // 5             | 100                | 3                 | false   | false  | EARLY
+      // 5             | 260                | 6                 | false   | false  | EARLY
+      // [First pane fired after the end of the window]
+      // 5             | 320                | 7                 | false   | false  | ON_TIME
+      // 5             | 430                | 10                | false   | false  | LATE
+
+      // For more possibilities of how to build advanced triggers, see {@link Trigger}.
+      PCollection<TableRow> sequentialResults = flowInfo
+          .apply("Sequential", Window
+              .<KV<String, Integer>>into(FixedWindows.of(Duration.standardMinutes(windowDuration)))
+              .triggering(AfterEach.inOrder(
+                  Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()
+                      // Speculative every ONE_MINUTE
+                      .plusDelayOf(ONE_MINUTE)).orFinally(AfterWatermark.pastEndOfWindow()),
+                  Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()
+                      // Late data every FIVE_MINUTES
+                      .plusDelayOf(FIVE_MINUTES))))
+              .accumulatingFiredPanes()
+              // For up to ONE_DAY
+              .withAllowedLateness(ONE_DAY))
+          .apply(new TotalFlow("sequential"));
+
+      // Adds the results generated by each trigger type to a PCollectionList.
+      PCollectionList<TableRow> resultsList = PCollectionList.of(defaultTriggerResults)
+          .and(withAllowedLatenessResults)
+          .and(speculativeResults)
+          .and(sequentialResults);
+
+      return resultsList;
+    }
+  }
+
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+  // The remaining parts of the pipeline are needed to produce the output for each
+  // concept above. Not directly relevant to understanding the trigger examples.
+
+  /**
+   * Calculate total flow and number of records for each freeway and format the results to TableRow
+   * objects, to save to BigQuery.
+   */
+  static class TotalFlow extends
+  PTransform <PCollection<KV<String, Integer>>, PCollection<TableRow>> {
+    private static final long serialVersionUID = 0;
+    private String triggerType;
+
+    public TotalFlow(String triggerType) {
+      this.triggerType = triggerType;
+    }
+
+    @Override
+    public PCollection<TableRow> apply(PCollection<KV<String, Integer>> flowInfo) {
+      PCollection<KV<String, Iterable<Integer>>> flowPerFreeway = flowInfo
+          .apply(GroupByKey.<String, Integer>create());
+
+      PCollection<KV<String, String>> results = flowPerFreeway.apply(ParDo.of(
+          new DoFn <KV<String, Iterable<Integer>>, KV<String, String>>() {
+
+            private static final long serialVersionUID = 0;
+            @Override
+            public void processElement(ProcessContext c) throws Exception {
+              Iterable<Integer> flows = c.element().getValue();
+              Integer sum = 0;
+              Long numberOfRecords = 0L;
+              for (Integer value : flows) {
+                sum += value;
+                numberOfRecords++;
+              }
+              c.output(KV.of(c.element().getKey(), sum + "," + numberOfRecords));
+            }
+          }));
+      PCollection<TableRow> output = results.apply(ParDo.of(new FormatTotalFlow(triggerType)));
+      return output;
+    }
+  }
+
+  /**
+   * Format the results of the Total flow calculation to a TableRow, to save to BigQuery.
+   * Adds the triggerType, pane information, processing time and the window timestamp.
+   * */
+  static class FormatTotalFlow extends DoFn<KV<String, String>, TableRow>
+  implements  RequiresWindowAccess {
+    private static final long serialVersionUID = 0;
+    private String triggerType;
+
+    public FormatTotalFlow(String triggerType) {
+      this.triggerType = triggerType;
+    }
+    @Override
+    public void processElement(ProcessContext c) throws Exception {
+      String[] values = c.element().getValue().split(",");
+      TableRow row = new TableRow()
+          .set("trigger_type", triggerType)
+          .set("freeway", c.element().getKey())
+          .set("total_flow", Integer.parseInt(values[0]))
+          .set("number_of_records", Long.parseLong(values[1]))
+          .set("window", c.window().toString())
+          .set("isFirst", c.pane().isFirst())
+          .set("isLast", c.pane().isLast())
+          .set("timing", c.pane().getTiming().toString())
+          .set("event_time", c.timestamp().toString())
+          .set("processing_time", Instant.now().toString());
+      c.output(row);
+    }
+  }
+
+  /**
+   * Extract the freeway and total flow in a reading.
+   * Freeway is used as key since we are calculating the total flow for each freeway.
+   */
+  static class ExtractFlowInfo extends DoFn<String, KV<String, Integer>> {
+    private static final long serialVersionUID = 0;
+
+    @Override
+    public void processElement(ProcessContext c) throws Exception {
+      String[] laneInfo = c.element().split(",");
+      if (laneInfo[0].equals("timestamp")) {
+        // Header row
+        return;
+      }
+      if (laneInfo.length < 48) {
+        //Skip the invalid input.
+        return;
+      }
+      String freeway = laneInfo[2];
+      Integer totalFlow = tryIntegerParse(laneInfo[7]);
+      // Ignore the records with total flow 0 to easily understand the working of triggers.
+      // Skip the records with total flow -1 since they are invalid input.
+      if (totalFlow == null || totalFlow <= 0) {
+        return;
+      }
+      c.output(KV.of(freeway,  totalFlow));
+    }
+  }
+
+  /**
+   * Inherits standard configuration options.
+   */
+  public interface TrafficFlowOptions extends
+  ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
+
+    @Description("Input file to inject to Pub/Sub topic")
+    @Default.String("gs://dataflow-samples/traffic_sensor/"
+        + "Freeways-5Minaa2010-01-01_to_2010-02-15.csv")
+    String getInput();
+    void setInput(String value);
+
+    @Description("Numeric value of window duration for fixed windows, in minutes")
+    @Default.Integer(WINDOW_DURATION)
+    Integer getWindowDuration();
+    void setWindowDuration(Integer value);
+  }
+
+  private static final String PUBSUB_TIMESTAMP_LABEL_KEY = "timestamp_ms";
+
+  public static void main(String[] args) throws Exception {
+    TrafficFlowOptions options = PipelineOptionsFactory.fromArgs(args)
+        .withValidation()
+        .as(TrafficFlowOptions.class);
+    options.setStreaming(true);
+
+    // In order to cancel the pipelines automatically,
+    // {@code DataflowPipelineRunner} is forced to be used.
+    options.setRunner(DataflowPipelineRunner.class);
+    options.setBigQuerySchema(getSchema());
+
+    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
+    dataflowUtils.setup();
+
+    Pipeline pipeline = Pipeline.create(options);
+
+    TableReference tableRef = getTableReference(options.getProject(),
+        options.getBigQueryDataset(), options.getBigQueryTable());
+
+    PCollectionList<TableRow> resultList = pipeline.apply(PubsubIO.Read.named("ReadPubsubInput")
+        .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
+        .topic(options.getPubsubTopic()))
+        .apply(ParDo.of(new ExtractFlowInfo()))
+        .apply(new CalculateTotalFlow(options.getWindowDuration()));
+
+    for (int i = 0; i < resultList.size(); i++){
+      resultList.get(i).apply(BigQueryIO.Write.to(tableRef).withSchema(getSchema()));
+    }
+
+    PipelineResult result = pipeline.run();
+    if (!options.getInput().isEmpty()){
+      //Inject the data into the pubsub topic
+      dataflowUtils.runInjectorPipeline(runInjector(options));
+    }
+    // dataflowUtils will try to cancel the pipeline and the injector before the program exits.
+    dataflowUtils.waitToFinish(result);
+  }
+
+  private static Pipeline runInjector(TrafficFlowOptions options){
+    DataflowPipelineOptions copiedOptions = options.cloneAs(DataflowPipelineOptions.class);
+    copiedOptions.setStreaming(false);
+    copiedOptions.setNumWorkers(
+        options.as(ExamplePubsubTopicOptions.class).getInjectorNumWorkers());
+    copiedOptions.setJobName(options.getJobName() + "-injector");
+    Pipeline injectorPipeline = Pipeline.create(copiedOptions);
+    injectorPipeline
+    .apply(TextIO.Read.named("ReadMyFile").from(options.getInput()))
+    .apply(ParDo.named("InsertRandomDelays").of(new InsertDelays()))
+    .apply(IntraBundleParallelization.of(PubsubFileInjector
+        .withTimestampLabelKey(PUBSUB_TIMESTAMP_LABEL_KEY)
+        .publish(options.getPubsubTopic()))
+        .withMaxParallelism(20));
+
+    return injectorPipeline;
+  }
+
+  /**
+   * Add current time to each record.
+   * Also insert a delay at random to demo the triggers.
+   */
+  public static class InsertDelays extends DoFn<String, String> {
+    private static final long serialVersionUID = 0;
+
+    private static final double THRESHOLD = 0.001;
+    // MIN_DELAY and MAX_DELAY in minutes.
+    private static final int MIN_DELAY = 1;
+    private static final int MAX_DELAY = 100;
+
+    @Override
+    public void processElement(ProcessContext c) throws Exception {
+      Instant timestamp = Instant.now();
+      if (Math.random() < THRESHOLD){
+        int range = MAX_DELAY - MIN_DELAY;
+        int delayInMinutes = (int) (Math.random() * range) + MIN_DELAY;
+        long delayInMillis = TimeUnit.MINUTES.toMillis(delayInMinutes);
+        timestamp = new Instant(timestamp.getMillis() - delayInMillis);
+      }
+      c.outputWithTimestamp(c.element(), timestamp);
+    }
+  }
+
+
+  /**Sets the table reference. **/
+  private static TableReference getTableReference(String project, String dataset, String table){
+    TableReference tableRef = new TableReference();
+    tableRef.setProjectId(project);
+    tableRef.setDatasetId(dataset);
+    tableRef.setTableId(table);
+    return tableRef;
+  }
+
+  /** Defines the BigQuery schema used for the output. */
+  private static TableSchema getSchema() {
+    List<TableFieldSchema> fields = new ArrayList<>();
+    fields.add(new TableFieldSchema().setName("trigger_type").setType("STRING"));
+    fields.add(new TableFieldSchema().setName("freeway").setType("STRING"));
+    fields.add(new TableFieldSchema().setName("total_flow").setType("INTEGER"));
+    fields.add(new TableFieldSchema().setName("number_of_records").setType("INTEGER"));
+    fields.add(new TableFieldSchema().setName("window").setType("STRING"));
+    fields.add(new TableFieldSchema().setName("isFirst").setType("BOOLEAN"));
+    fields.add(new TableFieldSchema().setName("isLast").setType("BOOLEAN"));
+    fields.add(new TableFieldSchema().setName("timing").setType("STRING"));
+    fields.add(new TableFieldSchema().setName("event_time").setType("TIMESTAMP"));
+    fields.add(new TableFieldSchema().setName("processing_time").setType("TIMESTAMP"));
+    TableSchema schema = new TableSchema().setFields(fields);
+    return schema;
+  }
+
+  private static Integer tryIntegerParse(String number) {
+    try {
+      return Integer.parseInt(number);
+    } catch (NumberFormatException e) {
+      return null;
+    }
+  }
+}
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/TriggerExampleTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/TriggerExampleTest.java
new file mode 100644
index 0000000000000..495d5b806a54f
--- /dev/null
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/TriggerExampleTest.java
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.cookbook;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.examples.cookbook.TriggerExample.ExtractFlowInfo;
+import com.google.cloud.dataflow.examples.cookbook.TriggerExample.TotalFlow;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Unit Tests for {@link TriggerExample}.
+ * The results generated by triggers are by definition non-deterministic and hence hard to test.
+ * The unit test does not test all aspects of the example.
+ */
+@RunWith(JUnit4.class)
+public class TriggerExampleTest {
+
+  private static final String[] INPUT =
+    {"01/01/2010 00:00:00,1108302,94,E,ML,36,100,29,0.0065,66,9,1,0.001,74.8,1,9,3,0.0028,71,1,9,"
+        + "12,0.0099,67.4,1,9,13,0.0121,99.0,1,,,,,0,,,,,0,,,,,0,,,,,0", "01/01/2010 00:00:00,"
+            + "1100333,5,N,FR,9,0,39,,,9,,,,0,,,,,0,,,,,0,,,,,0,,,,,0,,,,,0,,,,,0,,,,"};
+
+  private static final List<TimestampedValue<String>> TIME_STAMPED_INPUT = Arrays.asList(
+      TimestampedValue.of("01/01/2010 00:00:00,1108302,5,W,ML,36,100,30,0.0065,66,9,1,0.001,"
+          + "74.8,1,9,3,0.0028,71,1,9,12,0.0099,87.4,1,9,13,0.0121,99.0,1,,,,,0,,,,,0,,,,,0,,,"
+          + ",,0", new Instant(60000)),
+      TimestampedValue.of("01/01/2010 00:00:00,1108302,110,E,ML,36,100,40,0.0065,66,9,1,0.001,"
+          + "74.8,1,9,3,0.0028,71,1,9,12,0.0099,67.4,1,9,13,0.0121,99.0,1,,,,,0,,,,,0,,,,,0,,,"
+          + ",,0", new Instant(1)),
+      TimestampedValue.of("01/01/2010 00:00:00,1108302,110,E,ML,36,100,50,0.0065,66,9,1,"
+          + "0.001,74.8,1,9,3,0.0028,71,1,9,12,0.0099,97.4,1,9,13,0.0121,50.0,1,,,,,0,,,,,0"
+          + ",,,,,0,,,,,0", new Instant(1)));
+
+  private static final TableRow OUT_ROW_1 = new TableRow()
+      .set("trigger_type", "default")
+      .set("freeway", "5").set("total_flow", 30)
+      .set("number_of_records", 1)
+      .set("isFirst", true).set("isLast", true)
+      .set("timing", "ON_TIME")
+      .set("window", "[1970-01-01T00:01:00.000Z..1970-01-01T00:02:00.000Z)");
+
+  private static final TableRow OUT_ROW_2 = new TableRow()
+      .set("trigger_type", "default")
+      .set("freeway", "110").set("total_flow", 90)
+      .set("number_of_records", 2)
+      .set("isFirst", true).set("isLast", true)
+      .set("timing", "ON_TIME")
+      .set("window", "[1970-01-01T00:00:00.000Z..1970-01-01T00:01:00.000Z)");
+
+  @Test
+  public void testExtractTotalFlow() {
+    DoFnTester<String, KV<String, Integer>> extractFlowInfow = DoFnTester
+        .of(new ExtractFlowInfo());
+
+    List<KV<String, Integer>> results = extractFlowInfow.processBatch(INPUT);
+    Assert.assertEquals(results.size(), 1);
+    Assert.assertEquals(results.get(0).getKey(), "94");
+    Assert.assertEquals(results.get(0).getValue(), new Integer(29));
+
+    List<KV<String, Integer>> output = extractFlowInfow.processBatch("");
+    Assert.assertEquals(output.size(), 0);
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testTotalFlow () {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<KV<String, Integer>> flow = pipeline
+        .apply(Create.timestamped(TIME_STAMPED_INPUT))
+        .apply(ParDo.of(new ExtractFlowInfo()));
+
+    PCollection<TableRow> totalFlow = flow
+        .apply(Window.<KV<String, Integer>>into(FixedWindows.of(Duration.standardMinutes(1))))
+        .apply(new TotalFlow("default"));
+
+    PCollection<TableRow> results =  totalFlow.apply(ParDo.of(new FormatResults()));
+
+
+    DataflowAssert.that(results).containsInAnyOrder(OUT_ROW_1, OUT_ROW_2);
+    pipeline.run();
+
+  }
+
+  static class FormatResults extends DoFn<TableRow, TableRow> {
+    private static final long serialVersionUID = 0;
+
+    @Override
+    public void processElement(ProcessContext c) throws Exception {
+      TableRow element = c.element();
+      TableRow row = new TableRow()
+          .set("trigger_type", element.get("trigger_type"))
+          .set("freeway", element.get("freeway"))
+          .set("total_flow", element.get("total_flow"))
+          .set("number_of_records", element.get("number_of_records"))
+          .set("isFirst", element.get("isFirst"))
+          .set("isLast", element.get("isLast"))
+          .set("timing", element.get("timing"))
+          .set("window", element.get("window"));
+      c.output(row);
+    }
+  }
+}
+
+

From b896a7230f8f35aef077c28f38cc2895a1159d86 Mon Sep 17 00:00:00 2001
From: tudorm <tudorm@google.com>
Date: Sun, 23 Aug 2015 08:39:32 -0700
Subject: [PATCH 0961/1541] Export state-sampler information via metrics
 update.  Also count the state transitions to detect whether an execution
 context is stuck.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101320108
---
 .../worker/DataflowWorkProgressUpdater.java   |  2 +-
 .../sdk/runners/worker/DataflowWorker.java    | 46 ++++++++++++++++---
 .../util/common/worker/MapTaskExecutor.java   |  5 ++
 .../sdk/util/common/worker/StateSampler.java  | 43 +++++++++++++++++
 .../sdk/util/common/worker/WorkExecutor.java  | 10 ++++
 .../runners/worker/DataflowWorkerTest.java    | 20 ++++++++
 6 files changed, 118 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
index 4cd00727dcd9f..f9b62c3e4a282 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
@@ -86,7 +86,7 @@ protected void reportProgressHelper() throws Exception {
     WorkItemStatus status = buildStatus(workItem, false/*completed*/, worker.getOutputCounters(),
         worker.getOutputMetrics(), options, worker.getWorkerProgress(), dynamicSplitResultToReport,
         null/*sourceOperationResponse*/, null/*errors*/,
-        getNextReportIndex());
+        getNextReportIndex(), worker.getWorkerStateSamplerInfo());
     status.setRequestedLeaseDuration(toCloudDuration(Duration.millis(requestedLeaseDurationMs)));
 
     WorkItemServiceState result = workUnitClient.reportWorkItemStatus(status);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 4f5280fd05c66..720f2f1992349 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -24,6 +24,7 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
 
 import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.api.services.dataflow.model.MetricStructuredName;
 import com.google.api.services.dataflow.model.MetricUpdate;
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.api.services.dataflow.model.Status;
@@ -45,6 +46,7 @@
 import com.google.cloud.dataflow.sdk.util.common.Metric;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.common.cache.Cache;
@@ -61,6 +63,7 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
@@ -280,6 +283,18 @@ static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
       @Nullable Reader.DynamicSplitResult dynamicSplitResult,
       @Nullable SourceFormat.OperationResponse operationResponse, @Nullable List<Status> errors,
       long reportIndex) {
+
+    return buildStatus(workItem, completed, counters, metrics, options, progress,
+        dynamicSplitResult, operationResponse, errors, reportIndex, null);
+  }
+
+  static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
+      @Nullable CounterSet counters, @Nullable Collection<Metric<?>> metrics,
+      DataflowWorkerHarnessOptions options, @Nullable Reader.Progress progress,
+      @Nullable Reader.DynamicSplitResult dynamicSplitResult,
+      @Nullable SourceFormat.OperationResponse operationResponse, @Nullable List<Status> errors,
+      long reportIndex,
+      @Nullable StateSampler.StateSamplerInfo stateSamplerInfo) {
     WorkItemStatus status = new WorkItemStatus();
     status.setWorkItemId(Long.toString(workItem.getId()));
     status.setCompleted(completed);
@@ -301,16 +316,33 @@ static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
     if (metrics != null) {
       metricUpdates = CloudMetricUtils.extractCloudMetrics(metrics, options.getWorkerId());
     }
-    List<MetricUpdate> updates = null;
-    if (counterUpdates == null) {
-      updates = metricUpdates;
-    } else if (metrics == null) {
-      updates = counterUpdates;
-    } else {
-      updates = new ArrayList<>();
+    List<MetricUpdate> updates = new ArrayList<>();
+    if (counterUpdates != null) {
       updates.addAll(counterUpdates);
+    }
+    if (metricUpdates != null) {
       updates.addAll(metricUpdates);
     }
+    if (stateSamplerInfo != null) {
+      MetricUpdate update = new MetricUpdate();
+      update.setKind("internal");
+      MetricStructuredName name = new MetricStructuredName();
+      name.setName("state-sampler");
+      update.setName(name);
+      Map<String, Object> metric = new HashMap<String, Object>();
+      if (stateSamplerInfo.state != null) {
+        metric.put("last-state-name", stateSamplerInfo.state);
+      }
+      if (stateSamplerInfo.transitionCount != null) {
+        metric.put("num-transitions", stateSamplerInfo.transitionCount);
+      }
+      if (stateSamplerInfo.stateDurationMillis != null) {
+        metric.put("last-state-duration-ms",
+            stateSamplerInfo.stateDurationMillis);
+      }
+      update.setInternal(metric);
+      updates.add(update);
+    }
     status.setMetricUpdates(updates);
 
     // TODO: Provide more structure representation of error,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
index 0f8cb1b346cca..4785b7be6d35e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
@@ -84,6 +84,11 @@ public Reader.Progress getWorkerProgress() throws Exception {
     return getReadOperation().getProgress();
   }
 
+  @Override
+  public StateSampler.StateSamplerInfo getWorkerStateSamplerInfo() throws Exception {
+    return stateSampler == null ? null : stateSampler.getInfo();
+  }
+
   @Override
   public Reader.DynamicSplitResult requestDynamicSplit(
       Reader.DynamicSplitRequest splitRequest) throws Exception {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
index 992910239c6e1..062e37c3cec06 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
@@ -54,6 +54,13 @@ public class StateSampler extends TimerTask implements AutoCloseable {
   /** Special value of {@code currentState} that means we do not sample. */
   private static final int DO_NOT_SAMPLE = -1;
 
+  /**
+   * A counter that increments with each state transition. May be used
+   * to detect a context being stuck in a state for some amount of
+   * time.
+   */
+  private volatile long stateTransitionCount;
+
   /**
    * The timestamp (in nanoseconds) corresponding to the last time the
    * state was sampled (and recorded).
@@ -153,6 +160,36 @@ public int stateForName(String name) {
     }
   }
 
+  /**
+   * An internal class for representing StateSampler information
+   * typically used for debugging.
+   */
+  public static class StateSamplerInfo {
+    public final String state;
+    public final Long transitionCount;
+    public final Long stateDurationMillis;
+
+    public StateSamplerInfo(String state, Long transitionCount,
+                            Long stateDurationMillis) {
+      this.state = state;
+      this.transitionCount = transitionCount;
+      this.stateDurationMillis = stateDurationMillis;
+    }
+  }
+
+  /**
+   * Returns information about the current state of this state sampler
+   * into a {@link StateSamplerInfo} object, or null if sampling is
+   * not turned on.
+   *
+   * @return information about this state sampler or null if sampling is off
+   */
+  public synchronized StateSamplerInfo getInfo() {
+    return currentState == DO_NOT_SAMPLE ? null
+        : new StateSamplerInfo(countersByState.get(currentState).getName(),
+            stateTransitionCount, null);
+  }
+
   /**
    * Sets the current thread state.
    *
@@ -160,6 +197,12 @@ public int stateForName(String name) {
    * @return the previous state
    */
   public int setState(int state) {
+    // Updates to stateTransitionCount are always done by the same
+    // thread, making the non-atomic volatile update below safe. The
+    // count is updated first to avoid incorrectly attributing
+    // stuckness occuring in an old state to the new state.
+    long previousStateTransitionCount = this.stateTransitionCount;
+    this.stateTransitionCount = previousStateTransitionCount + 1;
     int previousState = currentState;
     currentState = state;
     return previousState;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
index 7900e550fe9ce..7ef12cbfd4845 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
@@ -27,6 +27,8 @@
 import java.util.Collection;
 import java.util.List;
 
+import javax.annotation.Nullable;
+
 /**
  * Abstract executor for WorkItem tasks.
  */
@@ -92,6 +94,14 @@ public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest
     return null;
   }
 
+  /**
+   * Returns the worker's current state sampler info, or null if the
+   * state sampling is not enabled.
+   */
+  @Nullable public StateSampler.StateSamplerInfo getWorkerStateSamplerInfo() throws Exception {
+    return null;
+  }
+
   @Override
   public void close() throws Exception {
     // By default, nothing to close or shut down.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
index 91ef97197ff1d..335a62a723260 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
@@ -28,6 +28,7 @@
 import com.google.api.services.dataflow.model.WorkItemStatus;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
 import com.google.cloud.dataflow.sdk.testing.FastNanoClockAndSleeper;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
 
 import org.eclipse.jetty.server.LocalConnector;
@@ -44,6 +45,8 @@
 import org.mockito.Mock;
 import org.mockito.MockitoAnnotations;
 
+import java.util.Map;
+
 /** Unit tests for {@link DataflowWorker}. */
 @RunWith(JUnit4.class)
 public class DataflowWorkerTest {
@@ -137,6 +140,23 @@ public void testUnknownHandler() throws Exception {
     assertThat(response, containsString("HTTP/1.1 404 Not Found"));
   }
 
+  @Test
+  public void testWorkItemStatusWithStateSamplerInfo() throws Exception {
+    WorkItem workItem = new WorkItem()
+        .setId(1L).setJobId("jobid").setInitialReportIndex(4L);
+    WorkItemStatus status = DataflowWorker.buildStatus(workItem, false,
+        null, null, options, null, null, null, null, 0,
+        new StateSampler.StateSamplerInfo("state", 101L, null));
+    assertEquals(1, status.getMetricUpdates().size());
+    assertEquals("internal", status.getMetricUpdates().get(0).getKind());
+    assertEquals("state-sampler", status.getMetricUpdates().get(0).getName().getName());
+    @SuppressWarnings("unchecked")
+    Map<String, Object> metric =
+        (Map<String, Object>) status.getMetricUpdates().get(0).getInternal();
+    assertEquals("state", metric.get("last-state-name"));
+    assertEquals(101L, metric.get("num-transitions"));
+  }
+
   private String testStatusServer(String request) throws Exception {
     Server server = new Server();
     LocalConnector connector = new LocalConnector(server);

From b3d3573aa1290d9772bfe80e88ea4e2603d50dce Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 24 Aug 2015 08:16:30 -0700
Subject: [PATCH 0962/1541] Minor test cleanup

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101365067
---
 .../transforms/ApproximateQuantilesTest.java  | 20 +++++++++++++++++--
 .../dataflow/sdk/transforms/TopTest.java      |  3 +--
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
index 87169e9fe4f1d..4228d7f42d45c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
@@ -38,15 +38,16 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Comparator;
 import java.util.List;
 
 /**
  * Tests for {@link ApproximateQuantiles}.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings({"serial", "unchecked"})
 public class ApproximateQuantilesTest {
 
   static final List<KV<String, Integer>> TABLE = Arrays.asList(
@@ -227,7 +228,7 @@ public void testAlternateComparator() {
         inputs,
         Arrays.asList("aa", "b", "zz"));
     checkCombineFn(
-        ApproximateQuantilesCombineFn.create(3, new TopTest.OrderByLength()),
+        ApproximateQuantilesCombineFn.create(3, new OrderByLength()),
         inputs,
         Arrays.asList("b", "aaa", "ccccc"));
   }
@@ -266,12 +267,27 @@ protected boolean matchesSafely(T item, Description mismatchDescription) {
 
   private static class DescendingIntComparator implements
       SerializableComparator<Integer> {
+    private static final long serialVersionUID = 0L;
     @Override
     public int compare(Integer o1, Integer o2) {
       return o2.compareTo(o1);
     }
   }
 
+  private static class OrderByLength implements Comparator<String>, Serializable {
+    private static final long serialVersionUID = 0L;
+
+    @Override
+    public int compare(String a, String b) {
+      if (a.length() != b.length()) {
+        return a.length() - b.length();
+      } else {
+        return a.compareTo(b);
+      }
+    }
+  }
+
+
   private PCollection<Integer> intRangeCollection(Pipeline p, int size) {
     return p.apply("CreateIntsUpTo(" + size + ")", Create.of(intRange(size)));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
index 25cc3045b093a..08622de0e44fa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
@@ -228,8 +228,7 @@ public void testTopGetNames() {
     assertEquals("Largest.PerKey", Top.<String, Integer>largestPerKey(2).getName());
   }
 
-  // used by ApproximateQuantilesTest
-  static class OrderByLength implements Comparator<String>, Serializable {
+  private static class OrderByLength implements Comparator<String>, Serializable {
     private static final long serialVersionUID = 0L;
 
     @Override

From 45a403f5628564d5a839c9359f4f528c1b92d390 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 24 Aug 2015 08:19:23 -0700
Subject: [PATCH 0963/1541] Rename possibly-confusing type variable in
 WindowedValue

The <V> in this generic method is arbitrary, not related
to the class-level <V> that it shadows.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101365312
---
 .../dataflow/sdk/util/WindowedValue.java      | 96 +++++++++----------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index b4b2c293d8452..33493ace601e9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -52,19 +52,19 @@
 /**
  * An immutable triple of value, timestamp, and windows.
  *
- * @param <V> the type of the value
+ * @param <T> the type of the value
  */
-public abstract class WindowedValue<V> {
+public abstract class WindowedValue<T> {
 
-  protected final V value;
+  protected final T value;
   protected final PaneInfo pane;
 
   /**
    * Returns a {@code WindowedValue} with the given value, timestamp,
    * and windows.
    */
-  public static <V> WindowedValue<V> of(
-      V value,
+  public static <T> WindowedValue<T> of(
+      T value,
       Instant timestamp,
       Collection<? extends BoundedWindow> windows,
       PaneInfo pane) {
@@ -82,8 +82,8 @@ public static <V> WindowedValue<V> of(
   /**
    * Returns a {@code WindowedValue} with the given value, timestamp, and window.
    */
-  public static <V> WindowedValue<V> of(
-      V value,
+  public static <T> WindowedValue<T> of(
+      T value,
       Instant timestamp,
       BoundedWindow window,
       PaneInfo pane) {
@@ -103,7 +103,7 @@ public static <V> WindowedValue<V> of(
    * Returns a {@code WindowedValue} with the given value in the {@link GlobalWindow} using the
    * default timestamp and pane.
    */
-  public static <V> WindowedValue<V> valueInGlobalWindow(V value) {
+  public static <T> WindowedValue<T> valueInGlobalWindow(T value) {
     return new ValueInGlobalWindow<>(value, PaneInfo.NO_FIRING);
   }
 
@@ -111,7 +111,7 @@ public static <V> WindowedValue<V> valueInGlobalWindow(V value) {
    * Returns a {@code WindowedValue} with the given value in the {@link GlobalWindow} using the
    * default timestamp and the specified pane.
    */
-  public static <V> WindowedValue<V> valueInGlobalWindow(V value, PaneInfo pane) {
+  public static <T> WindowedValue<T> valueInGlobalWindow(T value, PaneInfo pane) {
     return new ValueInGlobalWindow<>(value, pane);
   }
 
@@ -119,7 +119,7 @@ public static <V> WindowedValue<V> valueInGlobalWindow(V value, PaneInfo pane) {
    * Returns a {@code WindowedValue} with the given value and timestamp,
    * {@code GlobalWindow} and default pane.
    */
-  public static <V> WindowedValue<V> timestampedValueInGlobalWindow(V value, Instant timestamp) {
+  public static <T> WindowedValue<T> timestampedValueInGlobalWindow(T value, Instant timestamp) {
     if (BoundedWindow.TIMESTAMP_MIN_VALUE.equals(timestamp)) {
       return valueInGlobalWindow(value);
     } else {
@@ -131,33 +131,33 @@ public static <V> WindowedValue<V> timestampedValueInGlobalWindow(V value, Insta
    * Returns a {@code WindowedValue} with the given value in no windows, and the default timestamp
    * and pane.
    */
-  public static <V> WindowedValue<V> valueInEmptyWindows(V value) {
-    return new ValueInEmptyWindows<V>(value, PaneInfo.NO_FIRING);
+  public static <T> WindowedValue<T> valueInEmptyWindows(T value) {
+    return new ValueInEmptyWindows<T>(value, PaneInfo.NO_FIRING);
   }
 
   /**
    * Returns a {@code WindowedValue} with the given value in no windows, and the default timestamp
    * and the specified pane.
    */
-  public static <V> WindowedValue<V> valueInEmptyWindows(V value, PaneInfo pane) {
-    return new ValueInEmptyWindows<V>(value, pane);
+  public static <T> WindowedValue<T> valueInEmptyWindows(T value, PaneInfo pane) {
+    return new ValueInEmptyWindows<T>(value, pane);
   }
 
-  private WindowedValue(V value, PaneInfo pane) {
+  private WindowedValue(T value, PaneInfo pane) {
     this.value = value;
     this.pane = checkNotNull(pane);
   }
 
   /**
-   * Returns a new {@code WindowedValue} that is a copy of this one,
-   * but with a different value.
+   * Returns a new {@code WindowedValue} that is a copy of this one, but with a different value,
+   * which may have a new type {@code NewT}.
    */
-  public abstract <V> WindowedValue<V> withValue(V value);
+  public abstract <NewT> WindowedValue<NewT> withValue(NewT value);
 
   /**
    * Returns the value of this {@code WindowedValue}.
    */
-  public V getValue() {
+  public T getValue() {
     return value;
   }
 
@@ -194,9 +194,9 @@ public PaneInfo getPane() {
    * The abstract superclass of WindowedValue representations where
    * timestamp == MIN.
    */
-  private abstract static class MinTimestampWindowedValue<V>
-      extends WindowedValue<V> {
-    public MinTimestampWindowedValue(V value, PaneInfo pane) {
+  private abstract static class MinTimestampWindowedValue<T>
+      extends WindowedValue<T> {
+    public MinTimestampWindowedValue(T value, PaneInfo pane) {
       super(value, pane);
     }
 
@@ -210,14 +210,14 @@ public Instant getTimestamp() {
    * The representation of a WindowedValue where timestamp == MIN and
    * windows == {GlobalWindow}.
    */
-  private static class ValueInGlobalWindow<V>
-      extends MinTimestampWindowedValue<V> {
-    public ValueInGlobalWindow(V value, PaneInfo pane) {
+  private static class ValueInGlobalWindow<T>
+      extends MinTimestampWindowedValue<T> {
+    public ValueInGlobalWindow(T value, PaneInfo pane) {
       super(value, pane);
     }
 
     @Override
-    public <V> WindowedValue<V> withValue(V value) {
+    public <NewT> WindowedValue<NewT> withValue(NewT value) {
       return new ValueInGlobalWindow<>(value, pane);
     }
 
@@ -255,14 +255,14 @@ public String toString() {
    * The representation of a WindowedValue where timestamp == MIN and
    * windows == {}.
    */
-  private static class ValueInEmptyWindows<V>
-      extends MinTimestampWindowedValue<V> {
-    public ValueInEmptyWindows(V value, PaneInfo pane) {
+  private static class ValueInEmptyWindows<T>
+      extends MinTimestampWindowedValue<T> {
+    public ValueInEmptyWindows(T value, PaneInfo pane) {
       super(value, pane);
     }
 
     @Override
-    public <V> WindowedValue<V> withValue(V value) {
+    public <NewT> WindowedValue<NewT> withValue(NewT value) {
       return new ValueInEmptyWindows<>(value, pane);
     }
 
@@ -300,11 +300,11 @@ public String toString() {
    * The abstract superclass of WindowedValue representations where
    * timestamp is arbitrary.
    */
-  private abstract static class TimestampedWindowedValue<V>
-      extends WindowedValue<V> {
+  private abstract static class TimestampedWindowedValue<T>
+      extends WindowedValue<T> {
     protected final Instant timestamp;
 
-    public TimestampedWindowedValue(V value,
+    public TimestampedWindowedValue(T value,
                                     Instant timestamp,
                                     PaneInfo pane) {
       super(value, pane);
@@ -321,16 +321,16 @@ public Instant getTimestamp() {
    * The representation of a WindowedValue where timestamp {@code >}
    * MIN and windows == {GlobalWindow}.
    */
-  private static class TimestampedValueInGlobalWindow<V>
-      extends TimestampedWindowedValue<V> {
-    public TimestampedValueInGlobalWindow(V value,
+  private static class TimestampedValueInGlobalWindow<T>
+      extends TimestampedWindowedValue<T> {
+    public TimestampedValueInGlobalWindow(T value,
                                           Instant timestamp,
                                           PaneInfo pane) {
       super(value, timestamp, pane);
     }
 
     @Override
-    public <V> WindowedValue<V> withValue(V value) {
+    public <NewT> WindowedValue<NewT> withValue(NewT value) {
       return new TimestampedValueInGlobalWindow<>(value, timestamp, pane);
     }
 
@@ -371,11 +371,11 @@ public String toString() {
    * The representation of a WindowedValue where timestamp is arbitrary and
    * windows == a single non-Global window.
    */
-  private static class TimestampedValueInSingleWindow<V>
-      extends TimestampedWindowedValue<V> {
+  private static class TimestampedValueInSingleWindow<T>
+      extends TimestampedWindowedValue<T> {
     private final BoundedWindow window;
 
-    public TimestampedValueInSingleWindow(V value,
+    public TimestampedValueInSingleWindow(T value,
                                           Instant timestamp,
                                           BoundedWindow window,
                                           PaneInfo pane) {
@@ -384,7 +384,7 @@ public TimestampedValueInSingleWindow(V value,
     }
 
     @Override
-    public <V> WindowedValue<V> withValue(V value) {
+    public <NewT> WindowedValue<NewT> withValue(NewT value) {
       return new TimestampedValueInSingleWindow<>(value, timestamp, window, pane);
     }
 
@@ -427,12 +427,12 @@ public String toString() {
    * The representation of a WindowedValue, excluding the special
    * cases captured above.
    */
-  private static class TimestampedValueInMultipleWindows<V>
-      extends TimestampedWindowedValue<V> {
+  private static class TimestampedValueInMultipleWindows<T>
+      extends TimestampedWindowedValue<T> {
     private Collection<? extends BoundedWindow> windows;
 
     public TimestampedValueInMultipleWindows(
-        V value,
+        T value,
         Instant timestamp,
         Collection<? extends BoundedWindow> windows,
         PaneInfo pane) {
@@ -441,7 +441,7 @@ public TimestampedValueInMultipleWindows(
     }
 
     @Override
-    public <V> WindowedValue<V> withValue(V value) {
+    public <NewT> WindowedValue<NewT> withValue(NewT value) {
       return new TimestampedValueInMultipleWindows<>(value, timestamp, windows, pane);
     }
 
@@ -533,7 +533,7 @@ public Coder<T> getValueCoder() {
      * Returns a new {@code WindowedValueCoder} that is a copy of this one,
      * but with a different value coder.
      */
-    public abstract <V> WindowedValueCoder<V> withValueCoder(Coder<V> valueCoder);
+    public abstract <NewT> WindowedValueCoder<NewT> withValueCoder(Coder<NewT> valueCoder);
   }
 
   /**
@@ -587,7 +587,7 @@ public Coder<Collection<? extends BoundedWindow>> getWindowsCoder() {
     }
 
     @Override
-    public <V> WindowedValueCoder<V> withValueCoder(Coder<V> valueCoder) {
+    public <NewT> WindowedValueCoder<NewT> withValueCoder(Coder<NewT> valueCoder) {
       return new FullWindowedValueCoder<>(valueCoder, windowCoder);
     }
 
@@ -680,7 +680,7 @@ public static ValueOnlyWindowedValueCoder<?> of(
     }
 
     @Override
-    public <V> WindowedValueCoder<V> withValueCoder(Coder<V> valueCoder) {
+    public <NewT> WindowedValueCoder<NewT> withValueCoder(Coder<NewT> valueCoder) {
       return new ValueOnlyWindowedValueCoder<>(valueCoder);
     }
 

From 9090ce5c95ecee75d3adc5209e286b4e71cc31c1 Mon Sep 17 00:00:00 2001
From: zhouyunqing <zhouyunqing@google.com>
Date: Mon, 24 Aug 2015 11:16:52 -0700
Subject: [PATCH 0964/1541] Add a boolean thread local to make
 encodeToByteArray reentrant

If encodeToByteArray is recursively called, a new stream will be allocated
rather than reusing the thread local one.

This case is not common so it will not downgrade the performance.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101382998
---
 .../cloud/dataflow/sdk/util/CoderUtils.java   | 45 +++++++++++++++----
 .../dataflow/sdk/transforms/ViewTest.java     | 43 ++++++++++++++++++
 2 files changed, 80 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
index 4ddb67a62d28b..724de15542f44 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
@@ -40,6 +40,7 @@
 import com.fasterxml.jackson.databind.type.TypeFactory;
 
 import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
@@ -66,6 +67,16 @@ private CoderUtils() {}  // Non-instantiable
   private static ThreadLocal<SoftReference<ExposedByteArrayOutputStream>> threadLocalOutputStream
       = new ThreadLocal<>();
 
+  /**
+   * If true, a call to {@code encodeToByteArray} is already on the call stack.
+   */
+  private static ThreadLocal<Boolean> threadLocalOutputStreamInUse = new ThreadLocal<Boolean>() {
+    @Override
+    protected Boolean initialValue() {
+      return false;
+    }
+  };
+
   /**
    * Encodes the given value using the specified Coder, and returns
    * the encoded bytes.
@@ -79,15 +90,22 @@ public static <T> byte[] encodeToByteArray(Coder<T> coder, T value) throws Coder
 
   public static <T> byte[] encodeToByteArray(Coder<T> coder, T value, Coder.Context context)
       throws CoderException {
-    SoftReference<ExposedByteArrayOutputStream> refStream = threadLocalOutputStream.get();
-    ExposedByteArrayOutputStream stream = refStream == null ? null : refStream.get();
-    if (stream == null) {
-      stream = new ExposedByteArrayOutputStream();
-      threadLocalOutputStream.set(new SoftReference<>(stream));
+    if (threadLocalOutputStreamInUse.get()) {
+      // encodeToByteArray() is called recursively and the thread local stream is in use,
+      // allocating a new one.
+      ByteArrayOutputStream stream = new ExposedByteArrayOutputStream();
+      encodeToSafeStream(coder, value, stream, context);
+      return stream.toByteArray();
+    } else {
+      threadLocalOutputStreamInUse.set(true);
+      try {
+        ByteArrayOutputStream stream = getThreadLocalOutputStream();
+        encodeToSafeStream(coder, value, stream, context);
+        return stream.toByteArray();
+      } finally {
+        threadLocalOutputStreamInUse.set(false);
+      }
     }
-    stream.reset();
-    encodeToSafeStream(coder, value, stream, context);
-    return stream.toByteArray();
   }
 
   /**
@@ -143,6 +161,17 @@ private static <T> T decodeFromSafeStream(
     }
   }
 
+  private static ByteArrayOutputStream getThreadLocalOutputStream() {
+    SoftReference<ExposedByteArrayOutputStream> refStream = threadLocalOutputStream.get();
+    ExposedByteArrayOutputStream stream = refStream == null ? null : refStream.get();
+    if (stream == null) {
+      stream = new ExposedByteArrayOutputStream();
+      threadLocalOutputStream.set(new SoftReference<>(stream));
+    }
+    stream.reset();
+    return stream;
+  }
+
   /**
    * Clones the given value by encoding and then decoding it with the specified Coder.
    *
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index 08b7703539cc7..4cc991957db25 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -418,6 +418,49 @@ public void processElement(ProcessContext c) {
     p.run();
   }
 
+  @Test
+  @Category(RunnableOnService.class)
+  public void testSideInputWithNestedIterables() {
+    Pipeline pipeline = TestPipeline.create();
+    final PCollectionView<Iterable<Integer>> view1 = pipeline
+        .apply("CreateVoid1", Create.of((Void) null).withCoder(VoidCoder.of()))
+        .apply("OutputOneInteger", ParDo.of(new DoFn<Void, Integer>() {
+          @Override
+          public void processElement(ProcessContext c) {
+            c.output(17);
+          }
+        }))
+        .apply("View1", View.<Integer>asIterable());
+
+    final PCollectionView<Iterable<Iterable<Integer>>> view2 = pipeline
+        .apply("CreateVoid2", Create.of((Void) null).withCoder(VoidCoder.of()))
+        .apply("OutputSideInput",
+            ParDo.withSideInputs(view1).of(new DoFn<Void, Iterable<Integer>>(){
+              @Override
+              public void processElement(ProcessContext c) {
+                c.output(c.sideInput(view1));
+              }
+            }))
+        .apply("View2", View.<Iterable<Integer>>asIterable());
+
+    PCollection<Integer> output = pipeline
+        .apply("CreateVoid3", Create.of((Void) null).withCoder(VoidCoder.of()))
+        .apply("ReadIterableSideInput", ParDo.withSideInputs(view2).of(new DoFn<Void, Integer>() {
+          @Override
+          public void processElement(ProcessContext c) {
+            for (Iterable<Integer> input : c.sideInput(view2)) {
+              for (Integer i : input) {
+                c.output(i);
+              }
+            }
+          }
+        }));
+
+    DataflowAssert.that(output).containsInAnyOrder(17);
+
+    pipeline.run();
+  }
+
   @Test
   public void testViewGetName() {
     assertEquals("View.AsSingleton", View.<Integer>asSingleton().getName());

From 63e8e744f122d5601f093eea09095d534142d822 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 24 Aug 2015 13:21:35 -0700
Subject: [PATCH 0965/1541] Adds a bytes-read counter for UnboundedSource for
 autoscaling

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101395801
---
 .../worker/StreamingDataflowWorker.java       | 24 +++++++++++++++----
 .../worker/StreamingDataflowWorkerTest.java   | 15 ++++++++++++
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index c083603656fa5..81be68bba3c40 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -17,14 +17,18 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import com.google.api.services.dataflow.model.MapTask;
+import com.google.api.services.dataflow.model.ParallelInstruction;
+import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingInitializer;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
 import com.google.cloud.dataflow.sdk.util.BoundedQueueExecutor;
+import com.google.cloud.dataflow.sdk.util.Serializer;
 import com.google.cloud.dataflow.sdk.util.StateFetcher;
 import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.Transport;
@@ -34,6 +38,7 @@
 import com.google.cloud.dataflow.sdk.util.common.Counter.CounterMean;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
+import com.google.cloud.dataflow.sdk.util.common.worker.OutputObjectAndByteCounter;
 import com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation;
 import com.google.cloud.dataflow.sdk.util.state.WindmillStateReader;
 import com.google.common.base.Preconditions;
@@ -182,7 +187,6 @@ public static void main(String[] args) throws Exception {
   // Map of user state names to system state names.
   private ConcurrentMap<String, String> stateNameMap;
   private ConcurrentMap<String, String> systemNameToComputationIdMap;
-  private ConcurrentMap<String, String> computationIdToSystemNameMap;
 
   private ThreadFactory threadFactory;
   private BoundedQueueExecutor workUnitExecutor;
@@ -209,7 +213,6 @@ public StreamingDataflowWorker(
     this.commitCallbacks = new ConcurrentHashMap<>();
     this.stateNameMap = new ConcurrentHashMap<>();
     this.systemNameToComputationIdMap = new ConcurrentHashMap<>();
-    this.computationIdToSystemNameMap = new ConcurrentHashMap<>();
     this.threadFactory = new ThreadFactory() {
         @Override
         public Thread newThread(Runnable r) {
@@ -460,8 +463,8 @@ private void process(
       DataflowWorkerLoggingMDC.setStageName(computation);
       WorkerAndContext workerAndContext = mapTaskExecutors.get(computation).poll();
       if (workerAndContext == null) {
-        context = new StreamingModeExecutionContext(computationIdToSystemNameMap.get(computation),
-            stateFetcher, readerCache.get(computation), stateNameMap);
+        context = new StreamingModeExecutionContext(
+            mapTask.getSystemName(), stateFetcher, readerCache.get(computation), stateNameMap);
         worker = MapTaskExecutorFactory.create(options, mapTask, context);
         ReadOperation readOperation = worker.getReadOperation();
         // Disable progress updates since its results are unused for streaming
@@ -469,6 +472,18 @@ private void process(
         readOperation.setProgressUpdatePeriodMs(0);
         Preconditions.checkState(
             worker.supportsRestart(), "Streaming runner requires all operations support restart.");
+
+        // If using a custom source, count bytes read for autoscaling.
+        ParallelInstruction read = mapTask.getInstructions().get(0);
+        if (BasicSerializableSourceFormat.class.getName().equals(
+                read.getRead().getSource().getSpec().get("@type"))) {
+          readOperation.receivers[0].addOutputCounter(
+              new OutputObjectAndByteCounter(
+                  new MapTaskExecutorFactory.ElementByteSizeObservableCoder<>(
+                      Serializer.deserialize(read.getOutputs().get(0).getCodec(), Coder.class)),
+                  worker.getOutputCounters().getAddCounterMutator())
+                  .countBytes("dataflow_input_size-" + mapTask.getSystemName()));
+        }
       } else {
         worker = workerAndContext.getWorker();
         context = workerAndContext.getContext();
@@ -626,7 +641,6 @@ private void getConfig(String computation) {
     for (Windmill.GetConfigResponse.SystemNameToComputationIdMapEntry entry :
         response.getSystemNameToComputationIdMapList()) {
       systemNameToComputationIdMap.put(entry.getSystemName(), entry.getComputationId());
-      computationIdToSystemNameMap.put(entry.getComputationId(), entry.getSystemName());
     }
     for (String serializedMapTask : response.getCloudWorksList()) {
       try {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 5a7a0469f87c3..a2d9a0d921724 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -1016,6 +1016,11 @@ public void testUnboundedSources() throws Exception {
                 "} " +
                 "source_watermark: 9223372036854775000")).build()));
 
+    assertEquals(
+        16L,
+        getCounter(result.get(1L).getCounterUpdatesList(), "dataflow_input_size-computation")
+            .getIntScalar());
+
     // Test same key continuing.
     server.addWorkToOffer(buildInput(
         "work {" +
@@ -1048,6 +1053,11 @@ public void testUnboundedSources() throws Exception {
 
     assertThat(finalizeTracker, contains(0));
 
+    assertEquals(
+        0L,
+        getCounter(result.get(2L).getCounterUpdatesList(), "dataflow_input_size-computation")
+            .getIntScalar());
+
     // Test recovery.
     server.addWorkToOffer(buildInput(
         "work {" +
@@ -1091,6 +1101,11 @@ public void testUnboundedSources() throws Exception {
                 "  finalize_ids: " + finalizeId +
                 "} " +
                 "source_watermark: 9223372036854775000")).build()));
+
+    assertEquals(
+        16L,
+        getCounter(result.get(3L).getCounterUpdatesList(), "dataflow_input_size-computation")
+            .getIntScalar());
   }
 
   private static class MockWork extends StreamingDataflowWorker.Work {

From 12491720fc4b2ddeda7a08e0000f380552808a31 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Mon, 24 Aug 2015 14:29:14 -0700
Subject: [PATCH 0966/1541] Use PropertyNames.OUTPUT string in ParDo
 translator.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101403284
---
 .../cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 2ce78f8eb391e..bdc2b6c968967 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -959,7 +959,7 @@ private <InputT, OutputT> void translateSingleHelper(
                 transform.getFn(),
                 context.getInput(transform).getWindowingStrategy(),
                 transform.getSideInputs(), context.getInput(transform).getCoder(), context);
-            context.addOutput("out", context.getOutput(transform));
+            context.addOutput(PropertyNames.OUTPUT, context.getOutput(transform));
           }
         });
 

From 3acd13b6f3f82ff066e7b0c71a373227d10c0c2b Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Mon, 24 Aug 2015 15:38:32 -0700
Subject: [PATCH 0967/1541] Improvements to source testing and file-based
 sources

* Moves SourceTestUtils into the SDK testing package,
  so anybody who uses the SDK artifact can use it.
* Improves error messages emitted by SourceTestUtils
  for splitAtFraction tests.
* Makes the automatic exhaustive splitAtFraction test
  verify that it did not pass trivially (without any
  interesting splits). Also makes this test automatically
  verify concurrent splitting (quite expensive but useful).
* Makes FileBasedSource create a (dynamically splittable)
  reader in case it's constructed with a filepattern
  that matches a single file.
* Fixes XmlSourceTest setup so that the exhaustive
  splitAtFraction tests are non-trivial (they passed
  trivially before).
* The fixes above lead to discovering that Woodstox XML
  does not correctly report byte offsets of elements,
  thus making it impossible to correctly read offset
  ranges of multi-byte XML files. This is now clarified
  in the XmlSource docs.
* Makes StructuralByteArray.toString() be more compact,
  by using base64.
----Release Notes----
* Moves SourceTestUtils into the SDK io.testing package
  in the main SDK maven artifact.
* Clarifies that XmlSource cannot be used safely with
  multi-byte XML files.
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101410572
---
 .../dataflow/sdk/io/FileBasedSource.java      |   3 +
 .../cloud/dataflow/sdk/io/XmlSource.java      |  18 +-
 .../dataflow/sdk/testing/SourceTestUtils.java | 611 ++++++++++++++++++
 .../worker/PartialGroupByKeyOperation.java    |   4 +-
 .../cloud/dataflow/sdk/io/AvroSourceTest.java |   1 +
 .../dataflow/sdk/io/FileBasedSourceTest.java  |   8 +-
 .../sdk/io/OffsetBasedSourceTest.java         |   4 +-
 .../cloud/dataflow/sdk/io/XmlSourceTest.java  |  53 +-
 .../BasicSerializableSourceFormatTest.java    |   2 +-
 9 files changed, 678 insertions(+), 26 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SourceTestUtils.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index 1c18b40440bdb..a107d1f4a2714 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -317,6 +317,9 @@ public final BoundedReader<T> createReader(PipelineOptions options) throws IOExc
       }
       LOG.debug("Creating a reader for file pattern " + fileOrPatternSpec + " took "
           + (System.currentTimeMillis() - startTime) + " ms");
+      if (fileReaders.size() == 1) {
+        return fileReaders.get(0);
+      }
       return new FilePatternReader(this, fileReaders);
     } else {
       return createSingleFileReader(options);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
index 4883517639310..2f3864608a6a4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
@@ -87,8 +87,8 @@
  * }
  * </pre>
  *
- * <p>Currently only XML files that use character encoding UTF-8 are supported. Using a file that
- * has a different character encoding may result in loss of data.
+ * <p> Currently only XML files that use single-byte characters are supported. Using a file that
+ * contains multi-byte characters may result in data loss or duplication.
  *
  * <p>To use {@code XmlSource}, explicitly declare dependencies on following two jars from Woodstox
  * StAX XML parser.
@@ -108,7 +108,7 @@
  */
 // JAVADOCSTYLE ON
 public class XmlSource<T> extends FileBasedSource<T> {
-  private static final long serialVersionUID = 0L;
+  static final long serialVersionUID = 0L;
 
   private static final String XML_VERSION = "1.1";
   private static final int DEFAULT_MIN_BUNDLE_SIZE = 8 * 1024;
@@ -250,10 +250,9 @@ private static class XMLReader<T> extends FileBasedReader<T> {
     // the record boundary) and feed it to the parser. Because of this, the offset reported by the
     // XML parser is not the same as offset in the original file. They differ by a constant amount:
     // offsetInOriginalFile = parser.getLocation().getCharacterOffset() + parserBaseOffset;
-    // Note that parser.getLocation().getCharacterOffset() a byte offset since
-    // we provide a byte stream as the input source to the XML parser.
-    // http://
-    //   docs.oracle.com/javaee/5/api/javax/xml/stream/Location.html#getCharacterOffset()
+    // Note that this is true only for files with single-byte characters.
+    // It appears that, as of writing, there does not exist a Java XML parser capable of correctly
+    // reporting byte offsets of elements in the presence of multi-byte characters.
     private long parserBaseOffset = 0;
     private boolean readingStarted = false;
 
@@ -464,8 +463,9 @@ private long getFirstOccurenceOfRecordElement(
 
     private void setUpXMLParser(ReadableByteChannel channel, byte[] lookAhead) throws IOException {
       try {
-        // We use Woodstox because the StAX implementation provided by OpenJDK reports locations
-        // incorrectly.
+        // We use Woodstox because the StAX implementation provided by OpenJDK reports
+        // character locations incorrectly. Note that Woodstox still currently reports *byte*
+        // locations incorrectly when parsing documents that contain multi-byte characters.
         XMLInputFactory2 xmlInputFactory = (XMLInputFactory2) XMLInputFactory.newInstance();
         this.parser = xmlInputFactory.createXMLStreamReader(
             new SequenceInputStream(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SourceTestUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SourceTestUtils.java
new file mode 100644
index 0000000000000..9c4035c4368af
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SourceTestUtils.java
@@ -0,0 +1,611 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.io.BoundedSource;
+import com.google.cloud.dataflow.sdk.io.Source;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import org.junit.Assert;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+import java.util.concurrent.Callable;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+/**
+ * Helper functions and test harnesses for checking correctness of {@link Source}
+ * implementations.
+ *
+ * <p>Contains a few lightweight utilities (e.g. reading items from a source or a reader,
+ * such as {@link #readFromSource} and {@link #readFromUnstartedReader}), as well as
+ * heavyweight property testing and stress testing harnesses that help getting a large
+ * amount of test coverage with few code. Most notable ones are:
+ * <ul>
+ *   <li>{@link #assertSourcesEqualReferenceSource} helps testing that the data read
+ *   by the union of sources produced by {@link BoundedSource#splitIntoBundles}
+ *   is the same as data read by the original source.
+ *   <li>If your source implements dynamic work rebalancing, use the
+ *   {@code assertSplitAtFraction} family of functions - they test behavior of
+ *   {@link BoundedSource.BoundedReader#splitAtFraction}, in particular, that
+ *   various consistency properties are respected and the total set of data read
+ *   by the source is preserved when splits happen.
+ *   Use {@link #assertSplitAtFractionBehavior} to test individual cases
+ *   of {@code splitAtFraction} and use {@link #assertSplitAtFractionExhaustive}
+ *   as a heavy-weight stress test including concurrency. We strongly recommend to
+ *   use both.
+ * </ul>
+ * For example usages, see the unit tests of classes such as
+ * {@link com.google.cloud.dataflow.sdk.io.AvroSource} or
+ * {@link com.google.cloud.dataflow.sdk.io.XmlSource}.
+ *
+ * <p>Like {@link DataflowAssert}, requires JUnit and Hamcrest to be present in the classpath.
+ */
+public class SourceTestUtils {
+  // A wrapper around a value of type T that compares according to the structural
+  // value provided by a Coder<T>, but prints both the original and structural value,
+  // to help get good error messages from JUnit equality assertion failures and such.
+  private static class ReadableStructuralValue<T> {
+    private T originalValue;
+    private Object structuralValue;
+
+    public ReadableStructuralValue(T originalValue, Object structuralValue) {
+      this.originalValue = originalValue;
+      this.structuralValue = structuralValue;
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(structuralValue);
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (obj == null || !(obj instanceof ReadableStructuralValue)) {
+        return false;
+      }
+      return Objects.equals(structuralValue, ((ReadableStructuralValue) obj).structuralValue);
+    }
+
+    @Override
+    public String toString() {
+      return String.format("[%s (structural %s)]", originalValue, structuralValue);
+    }
+  }
+
+  /**
+   * Testing utilities below depend on standard assertions and matchers to compare elements read by
+   * sources. In general the elements may not implement {@code equals}/{@code hashCode} properly,
+   * however every source has a {@link Coder} and every {@code Coder} can
+   * produce a {@link Coder#structuralValue} whose {@code equals}/{@code hashCode} is
+   * consistent with equality of encoded format.
+   * So we use this {@link Coder#structuralValue} to compare elements read by sources.
+   */
+  public static <T> List<ReadableStructuralValue<T>> createStructuralValues(
+      Coder<T> coder, List<T> list)
+      throws Exception {
+    List<ReadableStructuralValue<T>> result = new ArrayList<>();
+    for (T elem : list) {
+      result.add(new ReadableStructuralValue<>(elem, coder.structuralValue(elem)));
+    }
+    return result;
+  }
+
+  /**
+   * Reads all elements from the given {@link BoundedSource}.
+   */
+  public static <T> List<T> readFromSource(BoundedSource<T> source, PipelineOptions options)
+      throws IOException {
+    try (BoundedSource.BoundedReader<T> reader = source.createReader(options)) {
+      return readFromUnstartedReader(reader);
+    }
+  }
+
+  /**
+   * Reads all elements from the given unstarted {@link BoundedSource.BoundedReader}.
+   */
+  public static <T> List<T> readFromUnstartedReader(BoundedSource.BoundedReader<T> reader)
+      throws IOException {
+    List<T> res = new ArrayList<>();
+    for (boolean more = reader.start(); more; more = reader.advance()) {
+      res.add(reader.getCurrent());
+    }
+    return res;
+  }
+
+  public static <T> List<T> readFromStartedReader(BoundedSource.BoundedReader<T> reader)
+      throws IOException {
+    List<T> res = new ArrayList<>();
+    while (reader.advance()) {
+      res.add(reader.getCurrent());
+    }
+    return res;
+  }
+
+  public static <T> List<T> readNItemsFromUnstartedReader(Source.Reader<T> reader, int n)
+      throws IOException {
+    List<T> res = new ArrayList<>();
+    for (int i = 0; i < n; ++i) {
+      assertTrue((i == 0) ? reader.start() : reader.advance());
+      res.add(reader.getCurrent());
+    }
+    return res;
+  }
+
+  /**
+   * Given a reference {@code Source} and a list of {@code Source}s, assert that the union of
+   * the records read from the list of sources is equal to the records read from the reference
+   * source.
+   */
+  public static <T> void assertSourcesEqualReferenceSource(
+      BoundedSource<T> referenceSource,
+      List<? extends BoundedSource<T>> sources,
+      PipelineOptions options)
+      throws Exception {
+    Coder<T> coder = referenceSource.getDefaultOutputCoder();
+    List<T> referenceRecords = readFromSource(referenceSource, options);
+    List<T> bundleRecords = new ArrayList<>();
+    for (BoundedSource<T> source : sources) {
+      assertThat(
+          "Coder type for source "
+              + source
+              + " is not compatible with Coder type for referenceSource "
+              + referenceSource,
+          source.getDefaultOutputCoder(),
+          equalTo(coder));
+      List<T> elems = readFromSource(source, options);
+      bundleRecords.addAll(elems);
+    }
+    List<ReadableStructuralValue<T>> bundleValues =
+        createStructuralValues(coder, bundleRecords);
+    List<ReadableStructuralValue<T>> referenceValues =
+        createStructuralValues(coder, referenceRecords);
+    assertThat(bundleValues, containsInAnyOrder(referenceValues.toArray()));
+  }
+
+  /**
+   * Assert that a {@code Reader} returns a {@code Source} that, when read from, produces the same
+   * records as the reader.
+   */
+  public static <T> void assertUnstartedReaderReadsSameAsItsSource(
+      BoundedSource.BoundedReader<T> reader, PipelineOptions options) throws Exception {
+    Coder<T> coder = reader.getCurrentSource().getDefaultOutputCoder();
+    List<T> expected = readFromUnstartedReader(reader);
+    List<T> actual = readFromSource(reader.getCurrentSource(), options);
+    List<ReadableStructuralValue<T>> expectedStructural = createStructuralValues(coder, expected);
+    List<ReadableStructuralValue<T>> actualStructural = createStructuralValues(coder, actual);
+    assertThat(actualStructural, containsInAnyOrder(expectedStructural.toArray()));
+  }
+
+  /**
+   * Expected outcome of
+   * {@link com.google.cloud.dataflow.sdk.io.BoundedSource.BoundedReader#splitAtFraction}.
+   */
+  public enum ExpectedSplitOutcome {
+    /**
+     * The operation must succeed and the results must be consistent.
+     */
+    MUST_SUCCEED_AND_BE_CONSISTENT,
+    /**
+     * The operation must fail (return {@code null}).
+     */
+    MUST_FAIL,
+    /**
+     * The operation must either fail, or succeed and the results be consistent.
+     */
+    MUST_BE_CONSISTENT_IF_SUCCEEDS
+  }
+
+  /**
+   * Contains two values: the number of items in the primary source, and the number of items in
+   * the residual source, -1 if split failed.
+   */
+  private static class SplitAtFractionResult {
+    public int numPrimaryItems;
+    public int numResidualItems;
+
+    public SplitAtFractionResult(int numPrimaryItems, int numResidualItems) {
+      this.numPrimaryItems = numPrimaryItems;
+      this.numResidualItems = numResidualItems;
+    }
+  }
+
+  /**
+   * Asserts that the {@code source}'s reader either fails to {@code splitAtFraction(fraction)}
+   * after reading {@code numItemsToReadBeforeSplit} items, or succeeds in a way that is
+   * consistent according to {@link #assertSplitAtFractionSucceedsAndConsistent}.
+   * <p> Returns SplitAtFractionResult.
+   */
+
+  public static <T> SplitAtFractionResult assertSplitAtFractionBehavior(
+      BoundedSource<T> source,
+      int numItemsToReadBeforeSplit,
+      double splitFraction,
+      ExpectedSplitOutcome expectedOutcome,
+      PipelineOptions options)
+      throws Exception {
+    return assertSplitAtFractionBehaviorImpl(
+        source, readFromSource(source, options), numItemsToReadBeforeSplit, splitFraction,
+        expectedOutcome, options);
+  }
+
+  /**
+   * Compares two lists elementwise and throws a detailed assertion failure optimized for
+   * human reading in case they are unequal.
+   */
+  private static <T> void assertListsEqualInOrder(
+      String message, String expectedLabel, List<T> expected, String actualLabel, List<T> actual) {
+    int i = 0;
+    for (; i < expected.size() && i < actual.size(); ++i) {
+      if (!Objects.equals(expected.get(i), actual.get(i))) {
+        Assert.fail(String.format(
+            "%s: %s and %s have %d items in common and then differ. "
+            + "Item in %s (%d more): %s, item in %s (%d more): %s",
+            message, expectedLabel, actualLabel, i,
+            expectedLabel, expected.size() - i - 1, expected.get(i),
+            actualLabel, actual.size() - i - 1, actual.get(i)));
+      }
+    }
+    if (i < expected.size() /* but i == actual.size() */) {
+      Assert.fail(String.format(
+          "%s: %s has %d more items after matching all %d from %s. First 5: %s",
+          message, expectedLabel, expected.size() - actual.size(), actual.size(), actualLabel,
+          expected.subList(actual.size(), Math.min(expected.size(), actual.size() + 5))));
+    } else if (i < actual.size() /* but i == expected.size() */) {
+      Assert.fail(String.format(
+          "%s: %s has %d more items after matching all %d from %s. First 5: %s",
+          message, actualLabel, actual.size() - expected.size(), expected.size(), expectedLabel,
+          actual.subList(expected.size(), Math.min(actual.size(), expected.size() + 5))));
+    } else {
+      // All is well.
+    }
+  }
+
+  private static <T> SourceTestUtils.SplitAtFractionResult assertSplitAtFractionBehaviorImpl(
+      BoundedSource<T> source, List<T> expectedItems, int numItemsToReadBeforeSplit,
+      double splitFraction, ExpectedSplitOutcome expectedOutcome, PipelineOptions options)
+      throws Exception {
+    try (BoundedSource.BoundedReader<T> reader = source.createReader(options)) {
+      BoundedSource<T> originalSource = reader.getCurrentSource();
+      List<T> currentItems = new ArrayList<>();
+      currentItems.addAll(readNItemsFromUnstartedReader(reader, numItemsToReadBeforeSplit));
+      BoundedSource<T> residual = reader.splitAtFraction(splitFraction);
+      if (residual != null) {
+        assertFalse(
+            String.format(
+                "Primary source didn't change after a successful split of %s at %f "
+                + "after reading %d items. "
+                + "Was the source object mutated instead of creating a new one? "
+                + "Source objects MUST be immutable.",
+                source, splitFraction, numItemsToReadBeforeSplit),
+            reader.getCurrentSource() == originalSource);
+        assertFalse(
+            String.format(
+                "Residual source equal to original source after a successful split of %s at %f "
+                + "after reading %d items. "
+                + "Was the source object mutated instead of creating a new one? "
+                + "Source objects MUST be immutable.",
+                source, splitFraction, numItemsToReadBeforeSplit),
+            reader.getCurrentSource() == residual);
+      }
+      // Failure cases are: must succeed but fails; must fail but succeeds.
+      switch (expectedOutcome) {
+        case MUST_SUCCEED_AND_BE_CONSISTENT:
+          assertNotNull(
+              "Failed to split reader of source: "
+                  + source
+                  + " at "
+                  + splitFraction
+                  + " after reading "
+                  + numItemsToReadBeforeSplit
+                  + " items",
+              residual);
+          break;
+        case MUST_FAIL:
+          assertEquals(null, residual);
+          break;
+        case MUST_BE_CONSISTENT_IF_SUCCEEDS:
+          // Nothing.
+          break;
+      }
+      currentItems.addAll(
+          numItemsToReadBeforeSplit > 0
+              ? readFromStartedReader(reader)
+              : readFromUnstartedReader(reader));
+      BoundedSource<T> primary = reader.getCurrentSource();
+      return verifySingleSplitAtFractionResult(
+          source, expectedItems, currentItems, primary, residual,
+          numItemsToReadBeforeSplit, splitFraction, options);
+    }
+  }
+
+  private static <T> SourceTestUtils.SplitAtFractionResult verifySingleSplitAtFractionResult(
+      BoundedSource<T> source, List<T> expectedItems, List<T> currentItems,
+      BoundedSource<T> primary, BoundedSource<T> residual,
+      int numItemsToReadBeforeSplit, double splitFraction, PipelineOptions options)
+      throws Exception {
+    List<T> primaryItems = readFromSource(primary, options);
+    if (residual != null) {
+      List<T> residualItems = readFromSource(residual, options);
+      List<T> totalItems = new ArrayList<>();
+      totalItems.addAll(primaryItems);
+      totalItems.addAll(residualItems);
+      String errorMsgForPrimarySourceComp =
+          String.format(
+              "Continued reading after split yielded different items than primary source: "
+                  + "split at %s after reading %s items, original source: %s, primary source: %s",
+              splitFraction,
+              numItemsToReadBeforeSplit,
+              source,
+              primary);
+      String errorMsgForTotalSourceComp =
+          String.format(
+              "Items in primary and residual sources after split do not add up to items "
+                  + "in the original source. Split at %s after reading %s items; "
+                  + "original source: %s, primary: %s, residual: %s",
+              splitFraction,
+              numItemsToReadBeforeSplit,
+              source,
+              primary,
+              residual);
+      Coder<T> coder = primary.getDefaultOutputCoder();
+      List<ReadableStructuralValue<T>> primaryValues =
+          createStructuralValues(coder, primaryItems);
+      List<ReadableStructuralValue<T>> currentValues =
+          createStructuralValues(coder, currentItems);
+      List<ReadableStructuralValue<T>> expectedValues =
+          createStructuralValues(coder, expectedItems);
+      List<ReadableStructuralValue<T>> totalValues =
+          createStructuralValues(coder, totalItems);
+      assertListsEqualInOrder(
+          errorMsgForPrimarySourceComp, "current", currentValues, "primary", primaryValues);
+      assertListsEqualInOrder(
+          errorMsgForTotalSourceComp, "total", expectedValues, "primary+residual", totalValues);
+      return new SplitAtFractionResult(primaryItems.size(), residualItems.size());
+    }
+    return new SplitAtFractionResult(primaryItems.size(), -1);
+  }
+
+  /**
+   * Verifies some consistency properties of
+   * {@link BoundedSource.BoundedReader#splitAtFraction} on the given source. Equivalent to
+   * the following pseudocode:
+   * <pre>
+   *   Reader reader = source.createReader();
+   *   read N items from reader;
+   *   Source residual = reader.splitAtFraction(splitFraction);
+   *   Source primary = reader.getCurrentSource();
+   *   assert: items in primary == items we read so far
+   *                               + items we'll get by continuing to read from reader;
+   *   assert: items in original source == items in primary + items in residual
+   * </pre>
+   */
+  public static <T> void assertSplitAtFractionSucceedsAndConsistent(
+      BoundedSource<T> source,
+      int numItemsToReadBeforeSplit,
+      double splitFraction,
+      PipelineOptions options)
+      throws Exception {
+    assertSplitAtFractionBehavior(
+        source,
+        numItemsToReadBeforeSplit,
+        splitFraction,
+        ExpectedSplitOutcome.MUST_SUCCEED_AND_BE_CONSISTENT,
+        options);
+  }
+
+  /**
+   * Asserts that the {@code source}'s reader fails to {@code splitAtFraction(fraction)}
+   * after reading {@code numItemsToReadBeforeSplit} items.
+   */
+  public static <T> void assertSplitAtFractionFails(
+      BoundedSource<T> source,
+      int numItemsToReadBeforeSplit,
+      double splitFraction,
+      PipelineOptions options)
+      throws Exception {
+    assertSplitAtFractionBehavior(
+        source, numItemsToReadBeforeSplit, splitFraction, ExpectedSplitOutcome.MUST_FAIL, options);
+  }
+
+  private static class SplitFractionStatistics {
+    List<Double> successfulFractions = new ArrayList<>();
+    List<Double> nonTrivialFractions = new ArrayList<>();
+  }
+
+  /**
+   * Asserts that given a start position,
+   * {@link BoundedSource.BoundedReader#splitAtFraction} at every interesting fraction (halfway
+   * between two fractions that differ by at least one item) can be called successfully and the
+   * results are consistent if a split succeeds.
+   */
+  private static <T> void assertSplitAtFractionBinary(
+      BoundedSource<T> source,
+      List<T> expectedItems,
+      int numItemsToBeReadBeforeSplit,
+      double leftFraction,
+      SplitAtFractionResult leftResult,
+      double rightFraction,
+      SplitAtFractionResult rightResult,
+      PipelineOptions options,
+      SplitFractionStatistics stats)
+      throws Exception {
+    if (rightFraction - leftFraction < 0.001) {
+      // Do not recurse too deeply. Otherwise we will end up in infinite
+      // recursion, e.g., while trying to find the exact minimal fraction s.t.
+      // split succeeds. A precision of 0.001 when looking for such a fraction
+      // ought to be enough for everybody.
+      return;
+    }
+    double middleFraction = (rightFraction + leftFraction) / 2;
+    if (leftResult == null) {
+      leftResult = assertSplitAtFractionBehaviorImpl(
+          source, expectedItems, numItemsToBeReadBeforeSplit, leftFraction,
+          ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
+    }
+    if (rightResult == null) {
+      rightResult = assertSplitAtFractionBehaviorImpl(
+          source, expectedItems, numItemsToBeReadBeforeSplit, rightFraction,
+          ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
+    }
+    SplitAtFractionResult middleResult = assertSplitAtFractionBehaviorImpl(
+        source, expectedItems, numItemsToBeReadBeforeSplit, middleFraction,
+        ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
+    if (middleResult.numResidualItems != -1) {
+      stats.successfulFractions.add(middleFraction);
+    }
+    if (middleResult.numResidualItems > 0) {
+      stats.nonTrivialFractions.add(middleFraction);
+    }
+    // Two split fractions are equivalent if they yield the same number of
+    // items in primary vs. residual source. Left and right are already not
+    // equivalent. Recurse into [left, middle) and [right, middle) respectively
+    // if middle is not equivalent to left or right.
+    if (leftResult.numPrimaryItems != middleResult.numPrimaryItems) {
+      assertSplitAtFractionBinary(
+          source, expectedItems, numItemsToBeReadBeforeSplit,
+          leftFraction, leftResult, middleFraction, middleResult, options, stats);
+    }
+    if (rightResult.numPrimaryItems != middleResult.numPrimaryItems) {
+      assertSplitAtFractionBinary(
+          source, expectedItems, numItemsToBeReadBeforeSplit,
+          middleFraction, middleResult, rightFraction, rightResult, options, stats);
+    }
+  }
+
+  /**
+   * Asserts that for each possible start position,
+   * {@link BoundedSource.BoundedReader#splitAtFraction} at every interesting fraction (halfway
+   * between two fractions that differ by at least one item) can be called successfully and the
+   * results are consistent if a split succeeds. Verifies multithreaded splitting as well.
+   */
+  public static <T> void assertSplitAtFractionExhaustive(
+      BoundedSource<T> source, PipelineOptions options) throws Exception {
+    List<T> expectedItems = readFromSource(source, options);
+    assertFalse("Empty source", expectedItems.isEmpty());
+    assertFalse("Source reads a single item", expectedItems.size() == 1);
+    List<List<Double>> allNonTrivialFractions = new ArrayList<>();
+    {
+      boolean anySuccessfulFractions = false;
+      boolean anyNonTrivialFractions = false;
+      for (int i = 0; i < expectedItems.size(); i++) {
+        SplitFractionStatistics stats = new SplitFractionStatistics();
+        assertSplitAtFractionBinary(source, expectedItems, i,
+            0.0, null, 1.0, null, options, stats);
+        if (!stats.successfulFractions.isEmpty()) {
+          anySuccessfulFractions = true;
+        }
+        if (!stats.nonTrivialFractions.isEmpty()) {
+          anyNonTrivialFractions = true;
+        }
+        allNonTrivialFractions.add(stats.nonTrivialFractions);
+      }
+      assertTrue(
+          "splitAtFraction test completed vacuously: no successful split fractions found",
+          anySuccessfulFractions);
+      assertTrue(
+          "splitAtFraction test completed vacuously: no non-trivial split fractions found",
+          anyNonTrivialFractions);
+    }
+    {
+      // Perform a stress test of "racy" concurrent splitting:
+      // for every position (number of items read), try to split at the minimum nontrivial
+      // split fraction for that position concurrently with reading the record at that position.
+      // To ensure that the test is non-vacuous, make sure that the splitting succeeds
+      // at least once and fails at least once.
+      ExecutorService executor = Executors.newFixedThreadPool(2);
+      for (int i = 0; i < expectedItems.size(); i++) {
+        double minNonTrivialFraction = 2.0;  // Greater than any possible fraction.
+        for (double fraction : allNonTrivialFractions.get(i)) {
+          minNonTrivialFraction = Math.min(minNonTrivialFraction, fraction);
+        }
+        if (minNonTrivialFraction == 2.0) {
+          // This will not happen all the time because otherwise the test above would
+          // detect vacuousness.
+          continue;
+        }
+        boolean haveSuccess = false, haveFailure = false;
+        while (!haveSuccess || !haveFailure) {
+          if (assertSplitAtFractionConcurrent(
+              executor, source, expectedItems, i, minNonTrivialFraction, options)) {
+            haveSuccess = true;
+          } else {
+            haveFailure = true;
+          }
+        }
+      }
+    }
+  }
+
+  private static <T> boolean assertSplitAtFractionConcurrent(
+      ExecutorService executor, BoundedSource<T> source, List<T> expectedItems,
+      final int numItemsToReadBeforeSplitting, final double fraction, PipelineOptions options)
+      throws Exception {
+    @SuppressWarnings("resource")  // Closed in readerThread
+    final BoundedSource.BoundedReader<T> reader = source.createReader(options);
+    final CountDownLatch unblockSplitter = new CountDownLatch(1);
+    Future<List<T>> readerThread = executor.submit(new Callable<List<T>>() {
+      @Override
+      public List<T> call() throws Exception {
+        try {
+          List<T> items = readNItemsFromUnstartedReader(reader, numItemsToReadBeforeSplitting);
+          unblockSplitter.countDown();
+          items.addAll(readFromStartedReader(reader));
+          return items;
+        } finally {
+          reader.close();
+        }
+      }
+    });
+    Future<KV<BoundedSource<T>, BoundedSource<T>>> splitterThread = executor.submit(
+        new Callable<KV<BoundedSource<T>, BoundedSource<T>>>() {
+          @Override
+          public KV<BoundedSource<T>, BoundedSource<T>> call() throws Exception {
+            unblockSplitter.await();
+            BoundedSource<T> residual = reader.splitAtFraction(fraction);
+            if (residual == null) {
+              return null;
+            }
+            return KV.of(reader.getCurrentSource(), residual);
+          }
+        });
+    List<T> currentItems = readerThread.get();
+    KV<BoundedSource<T>, BoundedSource<T>> splitSources = splitterThread.get();
+    if (splitSources == null) {
+      return false;
+    }
+    SplitAtFractionResult res = verifySingleSplitAtFractionResult(
+        source, expectedItems, currentItems, splitSources.getKey(), splitSources.getValue(),
+        numItemsToReadBeforeSplitting, fraction, options);
+    return (res.numResidualItems > 0);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
index a938b9549de82..d5e3945ee7d5e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
+import static com.google.api.client.util.Base64.encodeBase64String;
+
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 
 import java.util.ArrayList;
@@ -96,7 +98,7 @@ public int hashCode() {
 
     @Override
     public String toString() {
-      return "Val" + Arrays.toString(value);
+      return "base64:" + encodeBase64String(value);
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
index 30c6321f3f5dc..c36a33820ab17 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
@@ -27,6 +27,7 @@
 import com.google.cloud.dataflow.sdk.io.AvroSource.AvroReader.Seeker;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.testing.SourceTestUtils;
 
 import org.apache.avro.Schema;
 import org.apache.avro.file.CodecFactory;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index d997b9d35d7e3..e0b4c12a38afc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -14,10 +14,10 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
-import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionExhaustive;
-import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionFails;
-import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent;
-import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.readFromSource;
+import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.assertSplitAtFractionExhaustive;
+import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.assertSplitAtFractionFails;
+import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent;
+import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.readFromSource;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSourceTest.java
index 75adbef109dad..d7c901634e20e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSourceTest.java
@@ -14,8 +14,8 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
-import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionExhaustive;
-import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.readFromSource;
+import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.assertSplitAtFractionExhaustive;
+import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.readFromSource;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
index 0f1298d29436c..5618ec7a10c03 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/XmlSourceTest.java
@@ -14,9 +14,9 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
-import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionExhaustive;
-import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionFails;
-import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent;
+import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.assertSplitAtFractionExhaustive;
+import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.assertSplitAtFractionFails;
+import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent;
 import static org.hamcrest.Matchers.both;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.containsString;
@@ -34,6 +34,7 @@
 import com.google.common.collect.ImmutableList;
 
 import org.hamcrest.Matchers;
+import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
@@ -119,7 +120,7 @@ public class XmlSourceTest {
       + "<train>\n<name>Percy</name>   <number>6  </number>   <color>green</color></train>"
       + "</trains>";
 
-  String trainXMLWithAllFeatures =
+  String trainXMLWithAllFeaturesMultiByte =
       "<දුම්රියන්>"
       + "<දුම්රිය/>"
       + "<දුම්රිය size=\"small\"><name> Thomas¥</name><number>1</number><color>blue</color>"
@@ -134,6 +135,21 @@ public class XmlSourceTest {
       + "</දුම්රිය>"
       + "</දුම්රියන්>";
 
+  String trainXMLWithAllFeaturesSingleByte =
+      "<trains>"
+      + "<train/>"
+      + "<train size=\"small\"><name> Thomas</name><number>1</number><color>blue</color>"
+      + "</train>"
+      + "<train size=\"big\"><name>He nry</name><number>3</number><color>green</color></train>"
+      + "<train size=\"small\"><name>Toby  </name><number>7</number><color>brown</color>"
+      + "</train>"
+      + "<train/>"
+      + "<train size=\"big\"><name>Gordon</name><number>4</number><color> blue</color></train>"
+      + "<train size=\"small\"><name>Emily</name><number>-1</number><color>red</color></train>"
+      + "<train size=\"small\"><name>Percy</name><number>6</number><color>green</color>"
+      + "</train>"
+      + "</trains>";
+
   @XmlRootElement
   static class Train {
     public static final int TRAIN_NUMBER_UNDEFINED = -1;
@@ -304,6 +320,9 @@ public void testReadXMLWithMultiByteChars() throws IOException {
   }
 
   @Test
+  @Ignore(
+      "Multi-byte characters in XML are not supported because the parser "
+          + "currently does not correctly report byte offsets")
   public void testReadXMLWithMultiByteElementName() throws IOException {
     File file = tempFolder.newFile("trainXMLTiny");
     Files.write(file.toPath(), xmlWithMultiByteElementName.getBytes(StandardCharsets.UTF_8));
@@ -741,17 +760,33 @@ public void testSplitAtFraction() throws Exception {
   }
 
   @Test
-  public void testSplitAtFractionExhaustive() throws Exception {
+  public void testSplitAtFractionExhaustiveSingleByte() throws Exception {
     PipelineOptions options = PipelineOptionsFactory.create();
     File file = tempFolder.newFile("trainXMLSmall");
-    Files.write(file.toPath(), trainXMLWithAllFeatures.getBytes(StandardCharsets.UTF_8));
+    Files.write(file.toPath(), trainXMLWithAllFeaturesSingleByte.getBytes(StandardCharsets.UTF_8));
 
     XmlSource<Train> source =
         XmlSource.<Train>from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
-            .withRecordClass(Train.class)
-            .withMinBundleSize(1024);
+            .withRecordClass(Train.class);
+    assertSplitAtFractionExhaustive(source, options);
+  }
+
+  @Test
+  @Ignore(
+      "Multi-byte characters in XML are not supported because the parser "
+      + "currently does not correctly report byte offsets")
+  public void testSplitAtFractionExhaustiveMultiByte() throws Exception {
+    PipelineOptions options = PipelineOptionsFactory.create();
+    File file = tempFolder.newFile("trainXMLSmall");
+    Files.write(file.toPath(), trainXMLWithAllFeaturesMultiByte.getBytes(StandardCharsets.UTF_8));
+
+    XmlSource<Train> source =
+        XmlSource.<Train>from(file.toPath().toString())
+            .withRootElement("දුම්රියන්")
+            .withRecordElement("දුම්රිය")
+            .withRecordClass(Train.class);
     assertSplitAtFractionExhaustive(source, options);
   }
 
@@ -769,7 +804,7 @@ public void testReadXMLFilePattern() throws IOException {
     Pipeline p = TestPipeline.create();
 
     XmlSource<Train> source = XmlSource.<Train>from(file.getParent() + "/"
-                                           + "temp*.xml")
+        + "temp*.xml")
                                   .withRootElement("trains")
                                   .withRecordElement("train")
                                   .withRecordClass(Train.class)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 780960a23fe11..9f0aefa8d052f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -17,12 +17,12 @@
 package com.google.cloud.dataflow.sdk.runners.dataflow;
 
 import static com.google.api.client.util.Base64.decodeBase64;
-import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.readFromSource;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtFraction;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceOperationRequestToSourceOperationRequest;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.dictionaryToCloudSource;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceOperationResponseToCloudSourceOperationResponse;
+import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.readFromSource;
 import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
 import static com.google.cloud.dataflow.sdk.util.SerializableUtils.deserializeFromByteArray;
 import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;

From 25219f8f78bb5ad74e6383985e181a76a691839f Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 24 Aug 2015 19:52:29 -0700
Subject: [PATCH 0968/1541] Fix deps in examples/pom.xml

 - Some were used but not declared.
 - Some were declared but not used.
 - Some were declared for the wrong scope.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101429567
---
 examples/pom.xml | 76 ++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 60 insertions(+), 16 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index 41c5a6a1294b9..bea60195fde59 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -240,13 +240,27 @@
       <version>${project.version}</version>
     </dependency>
 
+    <dependency>
+      <groupId>com.google.api-client</groupId>
+      <artifactId>google-api-client</artifactId>
+      <version>1.20.0</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency google-api-client 1.20.0 -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
     <dependency>
       <groupId>com.google.apis</groupId>
-      <artifactId>google-api-services-storage</artifactId>
-      <version>v1-rev25-1.19.1</version>
+      <artifactId>google-api-services-dataflow</artifactId>
+      <version>v1b3-rev8-1.20.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.19.0 -->
+             in by a transitive dependency google-api-client 1.20.0 -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -260,7 +274,7 @@
       <version>v2-rev187-1.19.1</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.19.0 -->
+             in by a transitive dependency google-api-client 1.20.0 -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -270,11 +284,11 @@
 
     <dependency>
       <groupId>com.google.http-client</groupId>
-      <artifactId>google-http-client-jackson2</artifactId>
-      <version>1.19.0</version>
+      <artifactId>google-http-client</artifactId>
+      <version>1.20.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.19.0 -->
+             in by a transitive dependency google-api-client 1.20.0 -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -283,21 +297,49 @@
     </dependency>
 
     <dependency>
-      <groupId>com.google.guava</groupId>
-      <artifactId>guava</artifactId>
-      <version>18.0</version>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro</artifactId>
+      <version>1.7.7</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-datastore-protobuf</artifactId>
+      <version>v1beta2-rev1-2.1.2</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency google-api-client 1.20.0 -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-pubsub</artifactId>
+      <version>v1-rev3-1.20.0</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency google-api-client 1.20.0 -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
 
     <dependency>
-      <groupId>com.fasterxml.jackson.core</groupId>
-      <artifactId>jackson-core</artifactId>
-      <version>2.4.2</version>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>18.0</version>
     </dependency>
 
     <dependency>
-      <groupId>com.fasterxml.jackson.core</groupId>
-      <artifactId>jackson-annotations</artifactId>
-      <version>2.4.2</version>
+      <groupId>joda-time</groupId>
+      <artifactId>joda-time</artifactId>
+      <version>2.4</version>
     </dependency>
 
     <!-- Add slf4j API frontend binding with JUL backend -->
@@ -311,10 +353,12 @@
       <groupId>org.slf4j</groupId>
       <artifactId>slf4j-jdk14</artifactId>
       <version>1.7.7</version>
+      <scope>runtime</scope>
     </dependency>
 
     <!-- Hamcrest and JUnit are required dependencies of DataflowAssert,
          which is used in the main code of DebuggingWordCount example. -->
+
     <dependency>
       <groupId>org.hamcrest</groupId>
       <artifactId>hamcrest-all</artifactId>

From 8f4c824f87f5a382291fdcbe6d886472b1a8f392 Mon Sep 17 00:00:00 2001
From: malo <malo@google.com>
Date: Tue, 25 Aug 2015 14:06:46 -0700
Subject: [PATCH 0969/1541] Increase sleep in ReadOperation test to avoid
 flakiness

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101505702
---
 .../dataflow/sdk/util/common/worker/ReadOperationTest.java      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
index 31be21c91e8d7..ebd1b4469181d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -124,7 +124,7 @@ public void testGetProgress() throws Exception {
 
     Thread thread = runReadLoopInThread(readOperation);
     for (int i = 0; i < 5; ++i) {
-      Thread.sleep(300); // Wait for the operation to start and block.
+      Thread.sleep(500); // Wait for the operation to start and block.
       // Ensure that getProgress() doesn't block while the next() method is blocked.
       ApproximateProgress progress = readerProgressToCloudProgress(readOperation.getProgress());
       long observedIndex = progress.getPosition().getRecordIndex().longValue();

From f67d81c3c25105d0fac653e74821176b28b3d636 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Tue, 25 Aug 2015 17:29:59 -0700
Subject: [PATCH 0970/1541] Change version numbers

Switch from 'manual_build' to 'x.y.z-SNAPSHOT' version qualifiers

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101529565
---
 .travis.yml        | 1 +
 examples/README.md | 4 ++--
 examples/pom.xml   | 4 +---
 pom.xml            | 2 +-
 sdk/pom.xml        | 4 +---
 5 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index dc1c218f263a9..8b434650ee2de 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -39,5 +39,6 @@ install:
   - travis_retry mvn install clean -U -DskipTests=true
 
 script:
+  - travis_retry mvn versions:set -DnewVersion=manual_build
   - travis_retry mvn verify -U
   - travis_retry travis/test_wordcount.sh
diff --git a/examples/README.md b/examples/README.md
index f1442454e696a..d334200e858ca 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -59,11 +59,11 @@ Platform:
 
     mvn package
 
-    java -cp examples/target/google-cloud-dataflow-java-examples-all-bundled-manual_build.jar \
+    java -cp examples/target/google-cloud-dataflow-java-examples-all-bundled-<VERSION>.jar \
     com.google.cloud.dataflow.examples.WordCount \
     --inputFile=<INPUT FILE PATTERN> --output=<OUTPUT FILE>
 
-    java -cp examples/target/google-cloud-dataflow-java-examples-all-bundled-manual_build.jar \
+    java -cp examples/target/google-cloud-dataflow-java-examples-all-bundled-<VERSION>.jar \
     com.google.cloud.dataflow.examples.WordCount \
     --project=<YOUR CLOUD PLATFORM PROJECT ID> \
     --stagingLocation=<YOUR CLOUD STORAGE LOCATION> \
diff --git a/examples/pom.xml b/examples/pom.xml
index bea60195fde59..ab8ae9069b5fd 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>com.google.cloud.dataflow</groupId>
     <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
-    <version>manual_build</version>
+    <version>1.0.1-SNAPSHOT</version>
   </parent>
 
   <groupId>com.google.cloud.dataflow</groupId>
@@ -34,8 +34,6 @@
     examples.</description>
   <url>http://cloud.google.com/dataflow</url>
 
-  <version>manual_build</version>
-
   <packaging>jar</packaging>
 
   <profiles>
diff --git a/pom.xml b/pom.xml
index 50e5ebb2220be..51ca6c10301ee 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
   <url>http://cloud.google.com/dataflow</url>
   <inceptionYear>2013</inceptionYear>
 
-  <version>manual_build</version>
+  <version>1.0.1-SNAPSHOT</version>
 
   <licenses>
     <license>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 1a85092f31d7c..cdb5edee6cda0 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>com.google.cloud.dataflow</groupId>
     <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
-    <version>manual_build</version>
+    <version>1.0.1-SNAPSHOT</version>
   </parent>
 
   <groupId>com.google.cloud.dataflow</groupId>
@@ -33,8 +33,6 @@
     resources. This artifact includes entire Dataflow Java SDK.</description>
   <url>http://cloud.google.com/dataflow</url>
 
-  <version>manual_build</version>
-
   <packaging>jar</packaging>
 
   <properties>

From d9f63feb2700131c6757c969de57b20c0a1327a1 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 25 Aug 2015 23:37:07 -0700
Subject: [PATCH 0971/1541] Create a maven archetype for the starter pipeline

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101549697
---
 maven-archetypes/starter/pom.xml              | 56 +++++++++++++++
 .../META-INF/maven/archetype-metadata.xml     | 21 ++++++
 .../resources/archetype-resources/pom.xml     | 43 ++++++++++++
 .../src/main/java/StarterPipeline.java        | 70 +++++++++++++++++++
 .../projects/basic/archetype.properties       |  5 ++
 .../test/resources/projects/basic/goal.txt    |  1 +
 .../projects/basic/reference/pom.xml          | 43 ++++++++++++
 .../src/main/java/it/pkg/StarterPipeline.java | 70 +++++++++++++++++++
 pom.xml                                       |  1 +
 9 files changed, 310 insertions(+)
 create mode 100644 maven-archetypes/starter/pom.xml
 create mode 100644 maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml
 create mode 100644 maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml
 create mode 100644 maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java
 create mode 100644 maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties
 create mode 100644 maven-archetypes/starter/src/test/resources/projects/basic/goal.txt
 create mode 100644 maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
 create mode 100644 maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java

diff --git a/maven-archetypes/starter/pom.xml b/maven-archetypes/starter/pom.xml
new file mode 100644
index 0000000000000..a63777c07edf5
--- /dev/null
+++ b/maven-archetypes/starter/pom.xml
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  ~ Copyright (C) 2015 Google Inc.
+  ~
+  ~ Licensed under the Apache License, Version 2.0 (the "License"); you may not
+  ~ use this file except in compliance with the License. You may obtain a copy of
+  ~ the License at
+  ~
+  ~ http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+  ~ License for the specific language governing permissions and limitations under
+  ~ the License.
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>com.google.cloud.dataflow</groupId>
+    <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
+    <version>1.0.1-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>com.google.cloud.dataflow</groupId>
+  <artifactId>google-cloud-dataflow-java-archetypes-starter</artifactId>
+  <name>Google Cloud Dataflow Java SDK - Starter Archetype</name>
+  <description>A Maven archetype to create a simple starter pipeline to
+    get started using the Google Cloud Dataflow Java SDK. </description>
+  <url>http://cloud.google.com/dataflow</url>
+
+  <packaging>maven-archetype</packaging>
+
+  <build>
+    <extensions>
+      <extension>
+        <groupId>org.apache.maven.archetype</groupId>
+        <artifactId>archetype-packaging</artifactId>
+        <version>2.4</version>
+      </extension>
+    </extensions>
+
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <artifactId>maven-archetype-plugin</artifactId>
+          <version>2.4</version>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+  </build>
+</project>
diff --git a/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml b/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml
new file mode 100644
index 0000000000000..bf75798d39b75
--- /dev/null
+++ b/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<archetype-descriptor
+    xsi:schemaLocation="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0 http://maven.apache.org/xsd/archetype-descriptor-1.0.0.xsd"
+    name="Google Cloud Dataflow Starter Pipeline Archetype"
+    xmlns="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  <requiredProperties>
+    <requiredProperty key="targetPlatform">
+      <defaultValue>1.7</defaultValue>
+    </requiredProperty>
+  </requiredProperties>
+
+  <fileSets>
+    <fileSet filtered="true" packaged="true" encoding="UTF-8">
+      <directory>src/main/java</directory>
+      <includes>
+        <include>**/*.java</include>
+      </includes>
+    </fileSet>
+  </fileSets>
+</archetype-descriptor>
diff --git a/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml
new file mode 100644
index 0000000000000..bb679a00b4dca
--- /dev/null
+++ b/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml
@@ -0,0 +1,43 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <groupId>${groupId}</groupId>
+  <artifactId>${artifactId}</artifactId>
+  <version>${version}</version>
+
+  <build>
+   <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>3.3</version>
+        <configuration>
+          <source>${targetPlatform}</source>
+          <target>${targetPlatform}</target>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+
+  <dependencies>
+    <dependency>
+      <groupId>com.google.cloud.dataflow</groupId>
+      <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
+      <version>[1.0.0, 2.0.0)</version>
+    </dependency>
+
+    <!-- slf4j API frontend binding with JUL backend -->
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <version>1.7.7</version>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-jdk14</artifactId>
+      <version>1.7.7</version>
+    </dependency>
+  </dependencies>
+</project>
diff --git a/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java b/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java
new file mode 100644
index 0000000000000..3103db91fb671
--- /dev/null
+++ b/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package};
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A starter example for writing Google Cloud Dataflow programs.
+ *
+ * <p>The example takes two strings, converts them to their upper-case
+ * representation and logs them.
+ *
+ * <p>To run this starter example locally using DirectPipelineRunner, just
+ * execute it without any additional parameters from your favorite development
+ * environment. In Eclipse, this corresponds to the existing 'LOCAL' run
+ * configuration.
+ *
+ * <p>To run this starter example using managed resource in Google Cloud
+ * Platform, you should specify the following command-line options:
+ *   --project=<YOUR_PROJECT_ID>
+ *   --stagingLocation=<STAGING_LOCATION_IN_CLOUD_STORAGE>
+ *   --runner=BlockingDataflowPipelineRunner
+ * In Eclipse, you can just modify the existing 'SERVICE' run configuration.
+ */
+@SuppressWarnings("serial")
+public class StarterPipeline {
+  private static final Logger LOG = LoggerFactory.getLogger(StarterPipeline.class);
+
+  public static void main(String[] args) {
+    Pipeline p = Pipeline.create(
+        PipelineOptionsFactory.fromArgs(args).withValidation().create());
+
+    p.apply(Create.of("Hello", "World"))
+    .apply(ParDo.of(new DoFn<String, String>() {
+      @Override
+      public void processElement(ProcessContext c) {
+        c.output(c.element().toUpperCase());
+      }
+    }))
+    .apply(ParDo.of(new DoFn<String, Void>() {
+      @Override
+      public void processElement(ProcessContext c)  {
+        LOG.info(c.element());
+      }
+    }));
+
+    p.run();
+  }
+}
diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties b/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties
new file mode 100644
index 0000000000000..c59e77a9d55ba
--- /dev/null
+++ b/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties
@@ -0,0 +1,5 @@
+package=it.pkg
+version=0.1-SNAPSHOT
+groupId=archetype.it
+artifactId=basic
+targetPlatform=1.7
diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt b/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt
new file mode 100644
index 0000000000000..0b5987362fe3f
--- /dev/null
+++ b/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt
@@ -0,0 +1 @@
+verify
diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml b/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
new file mode 100644
index 0000000000000..d8c563d07fe4e
--- /dev/null
+++ b/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
@@ -0,0 +1,43 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <groupId>archetype.it</groupId>
+  <artifactId>basic</artifactId>
+  <version>0.1-SNAPSHOT</version>
+
+  <build>
+   <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>3.3</version>
+        <configuration>
+          <source>1.7</source>
+          <target>1.7</target>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+
+  <dependencies>
+    <dependency>
+      <groupId>com.google.cloud.dataflow</groupId>
+      <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
+      <version>[1.0.0, 2.0.0)</version>
+    </dependency>
+
+    <!-- slf4j API frontend binding with JUL backend -->
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <version>1.7.7</version>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-jdk14</artifactId>
+      <version>1.7.7</version>
+    </dependency>
+  </dependencies>
+</project>
diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java b/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java
new file mode 100644
index 0000000000000..ae6696f9f782a
--- /dev/null
+++ b/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package it.pkg;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A starter example for writing Google Cloud Dataflow programs.
+ *
+ * <p>The example takes two strings, converts them to their upper-case
+ * representation and logs them.
+ *
+ * <p>To run this starter example locally using DirectPipelineRunner, just
+ * execute it without any additional parameters from your favorite development
+ * environment. In Eclipse, this corresponds to the existing 'LOCAL' run
+ * configuration.
+ *
+ * <p>To run this starter example using managed resource in Google Cloud
+ * Platform, you should specify the following command-line options:
+ *   --project=<YOUR_PROJECT_ID>
+ *   --stagingLocation=<STAGING_LOCATION_IN_CLOUD_STORAGE>
+ *   --runner=BlockingDataflowPipelineRunner
+ * In Eclipse, you can just modify the existing 'SERVICE' run configuration.
+ */
+@SuppressWarnings("serial")
+public class StarterPipeline {
+  private static final Logger LOG = LoggerFactory.getLogger(StarterPipeline.class);
+
+  public static void main(String[] args) {
+    Pipeline p = Pipeline.create(
+        PipelineOptionsFactory.fromArgs(args).withValidation().create());
+
+    p.apply(Create.of("Hello", "World"))
+    .apply(ParDo.of(new DoFn<String, String>() {
+      @Override
+      public void processElement(ProcessContext c) {
+        c.output(c.element().toUpperCase());
+      }
+    }))
+    .apply(ParDo.of(new DoFn<String, Void>() {
+      @Override
+      public void processElement(ProcessContext c)  {
+        LOG.info(c.element());
+      }
+    }));
+
+    p.run();
+  }
+}
diff --git a/pom.xml b/pom.xml
index 51ca6c10301ee..a86404702a0d3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -71,6 +71,7 @@
   <modules>
     <module>sdk</module>
     <module>examples</module>
+    <module>maven-archetypes/starter</module>
   </modules>
 
   <profiles>

From 330819fefeab5995ec163ff61537c5f023f5fe62 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 26 Aug 2015 10:49:56 -0700
Subject: [PATCH 0972/1541] Create a Maven Archetype for the word count
 pipelines

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101591941
---
 examples/pom.xml                              |  14 -
 maven-archetypes/examples/pom.xml             |  56 +++
 .../META-INF/maven/archetype-metadata.xml     |  29 ++
 .../resources/archetype-resources/pom.xml     | 198 +++++++++
 .../src/main/java/DebuggingWordCount.java     | 183 ++++++++
 .../src/main/java/MinimalWordCount.java       | 117 +++++
 .../src/main/java/WindowedWordCount.java      | 265 ++++++++++++
 .../src/main/java/WordCount.java              | 210 +++++++++
 .../java/common/DataflowExampleOptions.java   |  29 ++
 .../java/common/DataflowExampleUtils.java     | 398 ++++++++++++++++++
 .../common/ExampleBigQueryTableOptions.java   |  53 +++
 .../common/ExamplePubsubTopicOptions.java     |  49 +++
 .../main/java/common/PubsubFileInjector.java  | 155 +++++++
 .../src/test/java/DebuggingWordCountTest.java |  44 ++
 .../src/test/java/WordCountTest.java          |  85 ++++
 .../projects/basic/archetype.properties       |   5 +
 .../test/resources/projects/basic/goal.txt    |   1 +
 pom.xml                                       |   1 +
 18 files changed, 1878 insertions(+), 14 deletions(-)
 create mode 100644 maven-archetypes/examples/pom.xml
 create mode 100644 maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml
 create mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
 create mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
 create mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java
 create mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java
 create mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java
 create mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleOptions.java
 create mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleUtils.java
 create mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java
 create mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java
 create mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/PubsubFileInjector.java
 create mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java
 create mode 100644 maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java
 create mode 100644 maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties
 create mode 100644 maven-archetypes/examples/src/test/resources/projects/basic/goal.txt

diff --git a/examples/pom.xml b/examples/pom.xml
index ab8ae9069b5fd..9e063ee1f2ea0 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -48,20 +48,6 @@
   </profiles>
 
   <build>
-    <resources>
-      <resource>
-        <directory>../eclipse</directory>
-        <includes>
-          <!-- source for the starter pipeline -->
-          <include>starter/**/*.java</include>
-          <include>starter/**/pom.xml</include>
-        </includes>
-      </resource>
-      <resource>
-        <directory>src/main/resources</directory>
-      </resource>
-    </resources>
-
     <plugins>
       <plugin>
         <artifactId>maven-compiler-plugin</artifactId>
diff --git a/maven-archetypes/examples/pom.xml b/maven-archetypes/examples/pom.xml
new file mode 100644
index 0000000000000..9912aaf6bf52c
--- /dev/null
+++ b/maven-archetypes/examples/pom.xml
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  ~ Copyright (C) 2015 Google Inc.
+  ~
+  ~ Licensed under the Apache License, Version 2.0 (the "License"); you may not
+  ~ use this file except in compliance with the License. You may obtain a copy of
+  ~ the License at
+  ~
+  ~ http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+  ~ License for the specific language governing permissions and limitations under
+  ~ the License.
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>com.google.cloud.dataflow</groupId>
+    <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
+    <version>1.0.1-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>com.google.cloud.dataflow</groupId>
+  <artifactId>google-cloud-dataflow-java-archetypes-examples</artifactId>
+  <name>Google Cloud Dataflow Java SDK - Examples Archetype</name>
+  <description>A Maven Archetype to create a project containing all the
+    example pipelines from the Google Cloud Dataflow Java SDK.</description>
+  <url>http://cloud.google.com/dataflow</url>
+
+  <packaging>maven-archetype</packaging>
+
+  <build>
+    <extensions>
+      <extension>
+        <groupId>org.apache.maven.archetype</groupId>
+        <artifactId>archetype-packaging</artifactId>
+        <version>2.4</version>
+      </extension>
+    </extensions>
+
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <artifactId>maven-archetype-plugin</artifactId>
+          <version>2.4</version>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+  </build>
+</project>
diff --git a/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml b/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml
new file mode 100644
index 0000000000000..7742af4e7242b
--- /dev/null
+++ b/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<archetype-descriptor
+    xsi:schemaLocation="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0 http://maven.apache.org/xsd/archetype-descriptor-1.0.0.xsd"
+    name="Google Cloud Dataflow Example Pipelines Archetype"
+    xmlns="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+  <requiredProperties>
+    <requiredProperty key="targetPlatform">
+      <defaultValue>1.7</defaultValue>
+    </requiredProperty>
+  </requiredProperties>
+
+  <fileSets>
+    <fileSet filtered="true" packaged="true" encoding="UTF-8">
+      <directory>src/main/java</directory>
+      <includes>
+        <include>**/*.java</include>
+      </includes>
+    </fileSet>
+
+    <fileSet filtered="true" packaged="true" encoding="UTF-8">
+      <directory>src/test/java</directory>
+      <includes>
+        <include>**/*.java</include>
+      </includes>
+    </fileSet>
+  </fileSets>
+</archetype-descriptor>
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
new file mode 100644
index 0000000000000..be13532efa16e
--- /dev/null
+++ b/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
@@ -0,0 +1,198 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  ~ Copyright (C) 2015 Google Inc.
+  ~
+  ~ Licensed under the Apache License, Version 2.0 (the "License"); you may not
+  ~ use this file except in compliance with the License. You may obtain a copy of
+  ~ the License at
+  ~
+  ~ http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+  ~ License for the specific language governing permissions and limitations under
+  ~ the License.
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <groupId>${groupId}</groupId>
+  <artifactId>${artifactId}</artifactId>
+  <version>${version}</version>
+
+  <packaging>jar</packaging>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>3.3</version>
+        <configuration>
+          <source>${targetPlatform}</source>
+          <target>${targetPlatform}</target>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>2.3</version>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <finalName>${project.artifactId}-bundled-${project.version}</finalName>
+              <artifactSet>
+                <includes>
+                  <include>*:*</include>
+                </includes>
+              </artifactSet>
+              <filters>
+                <filter>
+                  <artifact>*:*</artifact>
+                  <excludes>
+                    <exclude>META-INF/*.SF</exclude>
+                    <exclude>META-INF/*.DSA</exclude>
+                    <exclude>META-INF/*.RSA</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <version>2.18.1</version>
+        <configuration>
+          <parallel>all</parallel>
+          <threadCount>4</threadCount>
+          <redirectTestOutputToFile>true</redirectTestOutputToFile>
+        </configuration>
+        <dependencies>
+          <dependency>
+            <groupId>org.apache.maven.surefire</groupId>
+            <artifactId>surefire-junit47</artifactId>
+            <version>2.18.1</version>
+          </dependency>
+        </dependencies>
+      </plugin>
+    </plugins>
+  </build>
+
+  <dependencies>
+    <!-- Adds a dependency on a specific version of the Dataflow SDK. -->
+    <dependency>
+      <groupId>com.google.cloud.dataflow</groupId>
+      <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
+      <version>[1.0.0, 2.0.0)</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.api-client</groupId>
+      <artifactId>google-api-client</artifactId>
+      <version>1.20.0</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency google-api-client 1.20.0 -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <!-- Dependencies below this line are specific dependencies needed by the examples code. -->
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-bigquery</artifactId>
+      <version>v2-rev187-1.19.1</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency google-api-client 1.19.0 -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.http-client</groupId>
+      <artifactId>google-http-client</artifactId>
+      <version>1.20.0</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency google-api-client 1.20.0 -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-pubsub</artifactId>
+      <version>v1-rev3-1.20.0</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency google-api-client 1.19.0 -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>joda-time</groupId>
+      <artifactId>joda-time</artifactId>
+      <version>2.4</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>18.0</version>
+    </dependency>
+
+    <!-- Add slf4j API frontend binding with JUL backend -->
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <version>1.7.7</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-jdk14</artifactId>
+      <version>1.7.7</version>
+      <!-- When loaded at runtime this will wire up slf4j to the JUL backend -->
+      <scope>runtime</scope>
+    </dependency>
+
+    <!-- Hamcrest and JUnit are required dependencies of DataflowAssert,
+         which is used in the main code of DebuggingWordCount example. -->
+    <dependency>
+      <groupId>org.hamcrest</groupId>
+      <artifactId>hamcrest-all</artifactId>
+      <version>1.3</version>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>4.11</version>
+    </dependency>
+  </dependencies>
+</project>
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
new file mode 100644
index 0000000000000..2f640153db469
--- /dev/null
+++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package};
+
+import ${package}.WordCount.WordCountOptions;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Pattern;
+
+
+/**
+ * An example that verifies word counts in Shakespeare and includes Dataflow best practices.
+ *
+ * <p>This class, {@link DebuggingWordCount}, is the third in a series of four successively more
+ * detailed 'word count' examples. You may first want to take a look at {@link MinimalWordCount}
+ * and {@link WordCount}. After you've looked at this example, then see the
+ * {@link WindowedWordCount} pipeline, for introduction of additional concepts.
+ *
+ * <p>Basic concepts, also in the MinimalWordCount and WordCount examples:
+ * Reading text files; counting a PCollection; executing a Pipeline both locally
+ * and using the Dataflow service; defining DoFns.
+ *
+ * <p>New Concepts:
+ * <pre>
+ *   1. Logging to Cloud Logging
+ *   2. Controlling Dataflow worker log levels
+ *   3. Creating a custom aggregator
+ *   4. Testing your Pipeline via DataflowAssert
+ * </pre>
+ *
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
+ *
+ * <p>To execute this pipeline using the Dataflow service and the additional logging discussed
+ * below, specify pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ *   --workerLogLevelOverrides={"com.google.cloud.dataflow.examples":"DEBUG"}
+ * }
+ * </pre>
+ *
+ * <p>Note that when you run via <code>mvn exec</code>, you may need to escape
+ * the quotations as appropriate for your shell. For example, in <code>bash</code>:
+ * <pre>
+ * mvn compile exec:java ... \
+ *   -Dexec.args="... \
+ *     --workerLogLevelOverrides={\\\"com.google.cloud.dataflow.examples\\\":\\\"DEBUG\\\"}"
+ * </pre>
+ *
+ * <p>Concept #2: Dataflow workers which execute user code are configured to log to Cloud
+ * Logging by default at "INFO" log level and higher. One may override log levels for specific
+ * logging namespaces by specifying:
+ * <pre><code>
+ *   --workerLogLevelOverrides={"Name1":"Level1","Name2":"Level2",...}
+ * </code></pre>
+ * For example, by specifying:
+ * <pre><code>
+ *   --workerLogLevelOverrides={"com.google.cloud.dataflow.examples":"DEBUG"}
+ * </code></pre>
+ * when executing this pipeline using the Dataflow service, Cloud Logging would contain only
+ * "DEBUG" or higher level logs for the {@code com.google.cloud.dataflow.examples} package in
+ * addition to the default "INFO" or higher level logs. In addition, the default Dataflow worker
+ * logging configuration can be overridden by specifying
+ * {@code --defaultWorkerLogLevel=<one of TRACE, DEBUG, INFO, WARN, ERROR>}. For example,
+ * by specifying {@code --defaultWorkerLogLevel=DEBUG} when executing this pipeline with
+ * the Dataflow service, Cloud Logging would contain all "DEBUG" or higher level logs. Note
+ * that changing the default worker log level to TRACE or DEBUG will significantly increase
+ * the amount of logs output.
+ *
+ * <p>The input file defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt} and can be
+ * overridden with {@code --inputFile}.
+ */
+public class DebuggingWordCount {
+  /** A DoFn that filters for a specific key based upon a regular expression. */
+  public static class FilterTextFn extends DoFn<KV<String, Long>, KV<String, Long>> {
+    private static final long serialVersionUID = 0;
+    /**
+     * Concept #1: The logger below uses the fully qualified class name of FilterTextFn
+     * as the logger. All log statements emitted by this logger will be referenced by this name
+     * and will be visible in the Cloud Logging UI. Learn more at https://cloud.google.com/logging
+     * about the Cloud Logging UI.
+     */
+    private static final Logger LOG = LoggerFactory.getLogger(FilterTextFn.class);
+
+    private final Pattern filter;
+    public FilterTextFn(String pattern) {
+      filter = Pattern.compile(pattern);
+    }
+
+    /**
+     * Concept #3: A custom aggregator can track values in your pipeline as it runs. Those
+     * values will be displayed in the Dataflow Monitoring UI when this pipeline is run using the
+     * Dataflow service. These aggregators below track the number of matched and unmatched words.
+     * Learn more at https://cloud.google.com/dataflow/pipelines/dataflow-monitoring-intf about
+     * the Dataflow Monitoring UI.
+     */
+    private final Aggregator<Long, Long> matchedWords =
+        createAggregator("matchedWords", new Sum.SumLongFn());
+    private final Aggregator<Long, Long> unmatchedWords =
+        createAggregator("umatchedWords", new Sum.SumLongFn());
+
+    @Override
+    public void processElement(ProcessContext c) {
+      if (filter.matcher(c.element().getKey()).matches()) {
+        // Log at the "DEBUG" level each element that we match. When executing this pipeline
+        // using the Dataflow service, these log lines will appear in the Cloud Logging UI
+        // only if the log level is set to "DEBUG" or lower.
+        LOG.debug("Matched: " + c.element().getKey());
+        matchedWords.addValue(1L);
+        c.output(c.element());
+      } else {
+        // Log at the "TRACE" level each element that is not matched. Different log levels
+        // can be used to control the verbosity of logging providing an effective mechanism
+        // to filter less important information.
+        LOG.trace("Did not match: " + c.element().getKey());
+        unmatchedWords.addValue(1L);
+      }
+    }
+  }
+
+  public static void main(String[] args) {
+    WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
+      .as(WordCountOptions.class);
+    Pipeline p = Pipeline.create(options);
+
+    PCollection<KV<String, Long>> filteredWords =
+        p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
+         .apply(new WordCount.CountWords())
+         .apply(ParDo.of(new FilterTextFn("Flourish|stomach")));
+
+    /**
+     * Concept #4: DataflowAssert is a set of convenient PTransforms in the style of
+     * Hamcrest's collection matchers that can be used when writing Pipeline level tests
+     * to validate the contents of PCollections. DataflowAssert is best used in unit tests
+     * with small data sets but is demonstrated here as a teaching tool.
+     *
+     * <p>Below we verify that the set of filtered words matches our expected counts. Note
+     * that DataflowAssert does not provide any output and that successful completion of the
+     * Pipeline implies that the expectations were met. Learn more at
+     * https://cloud.google.com/dataflow/pipelines/testing-your-pipeline on how to test
+     * your Pipeline and see {@link DebuggingWordCountTest} for an example unit test.
+     */
+    List<KV<String, Long>> expectedResults = Arrays.asList(
+        KV.of("Flourish", 3L),
+        KV.of("stomach", 1L));
+    DataflowAssert.that(filteredWords).containsInAnyOrder(expectedResults);
+
+    p.run();
+  }
+}
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java
new file mode 100644
index 0000000000000..7de32c3974de2
--- /dev/null
+++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package};
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+
+/**
+ * An example that counts words in Shakespeare.
+ *
+ * <p>This class, {@link MinimalWordCount}, is the first in a series of four successively more
+ * detailed 'word count' examples. Here, for simplicity, we don't show any error-checking or
+ * argument processing, and focus on construction of the pipeline, which chains together the
+ * application of core transforms.
+ *
+ * <p>Next, see the {@link WordCount} pipeline, then the {@link DebuggingWordCount}, and finally
+ * the {@link WindowedWordCount} pipeline, for more detailed examples that introduce additional
+ * concepts.
+ *
+ * <p>Concepts:
+ * <pre>
+ *   1. Reading data from text files
+ *   2. Specifying 'inline' transforms
+ *   3. Counting a PCollection
+ *   4. Writing data to Cloud Storage as text files
+ * </pre>
+ *
+ * <p>To execute this pipeline, first edit the code to set your project ID, the staging
+ * location, and the output location. The specified GCS bucket(s) must already exist.
+ *
+ * <p>Then, run the pipeline as described in the README. It will be deployed and run using the
+ * Dataflow service. No args are required to run the pipeline. You can see the results in your
+ * output bucket in the GCS browser.
+ */
+public class MinimalWordCount {
+
+  public static void main(String[] args) {
+    // Create a DataflowPipelineOptions object. This object lets us set various execution
+    // options for our pipeline, such as the associated Cloud Platform project and the location
+    // in Google Cloud Storage to stage files.
+    DataflowPipelineOptions options = PipelineOptionsFactory.create()
+      .as(DataflowPipelineOptions.class);
+    options.setRunner(BlockingDataflowPipelineRunner.class);
+    // CHANGE 1/3: Your project ID is required in order to run your pipeline on the Google Cloud.
+    options.setProject("SET_YOUR_PROJECT_ID_HERE");
+    // CHANGE 2/3: Your Google Cloud Storage path is required for staging local files.
+    options.setStagingLocation("gs://SET_YOUR_BUCKET_NAME_HERE/AND_STAGING_DIRECTORY");
+
+    // Create the Pipeline object with the options we defined above.
+    Pipeline p = Pipeline.create(options);
+
+    // Apply the pipeline's transforms.
+
+    // Concept #1: Apply a root transform to the pipeline; in this case, TextIO.Read to read a set
+    // of input text files. TextIO.Read returns a PCollection where each element is one line from
+    // the input text (a set of Shakespeare's texts).
+    p.apply(TextIO.Read.from("gs://dataflow-samples/shakespeare/*"))
+     // Concept #2: Apply a ParDo transform to our PCollection of text lines. This ParDo invokes a
+     // DoFn (defined in-line) on each element that tokenizes the text line into individual words.
+     // The ParDo returns a PCollection<String>, where each element is an individual word in
+     // Shakespeare's collected texts.
+     .apply(ParDo.named("ExtractWords").of(new DoFn<String, String>() {
+                       private static final long serialVersionUID = 0;
+                       @Override
+                       public void processElement(ProcessContext c) {
+                         for (String word : c.element().split("[^a-zA-Z']+")) {
+                           if (!word.isEmpty()) {
+                             c.output(word);
+                           }
+                         }
+                       }
+                     }))
+     // Concept #3: Apply the Count transform to our PCollection of individual words. The Count
+     // transform returns a new PCollection of key/value pairs, where each key represents a unique
+     // word in the text. The associated value is the occurrence count for that word.
+     .apply(Count.<String>perElement())
+     // Apply another ParDo transform that formats our PCollection of word counts into a printable
+     // string, suitable for writing to an output file.
+     .apply(ParDo.named("FormatResults").of(new DoFn<KV<String, Long>, String>() {
+                       private static final long serialVersionUID = 0;
+                       @Override
+                       public void processElement(ProcessContext c) {
+                         c.output(c.element().getKey() + ": " + c.element().getValue());
+                       }
+                     }))
+     // Concept #4: Apply a write transform, TextIO.Write, at the end of the pipeline.
+     // TextIO.Write writes the contents of a PCollection (in this case, our PCollection of
+     // formatted strings) to a series of text files in Google Cloud Storage.
+     // CHANGE 3/3: The Google Cloud Storage path is required for outputting the results to.
+     .apply(TextIO.Write.to("gs://YOUR_OUTPUT_BUCKET/AND_OUTPUT_PREFIX"));
+
+    // Run the pipeline.
+    p.run();
+  }
+}
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java
new file mode 100644
index 0000000000000..c4c9e39f2355d
--- /dev/null
+++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java
@@ -0,0 +1,265 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package};
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import ${package}.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * An example that counts words in text, and can run over either unbounded or bounded input
+ * collections.
+ *
+ * <p>This class, {@link WindowedWordCount}, is the last in a series of four successively more
+ * detailed 'word count' examples. First take a look at {@link MinimalWordCount},
+ * {@link WordCount}, and {@link DebuggingWordCount}.
+ *
+ * <p>Basic concepts, also in the MinimalWordCount, WordCount, and DebuggingWordCount examples:
+ * Reading text files; counting a PCollection; writing to GCS; executing a Pipeline both locally
+ * and using the Dataflow service; defining DoFns; creating a custom aggregator;
+ * user-defined PTransforms; defining PipelineOptions.
+ *
+ * <p>New Concepts:
+ * <pre>
+ *   1. Unbounded and bounded pipeline input modes
+ *   2. Adding timestamps to data
+ *   3. PubSub topics as sources
+ *   4. Windowing
+ *   5. Re-using PTransforms over windowed PCollections
+ *   6. Writing to BigQuery
+ * </pre>
+ *
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
+ *
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
+ *
+ * <p>Optionally specify the input file path via:
+ * {@code --inputFile=gs://INPUT_PATH},
+ * which defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt}.
+ *
+ * <p>Specify an output BigQuery dataset and optionally, a table for the output. If you don't
+ * specify the table, one will be created for you using the job name. If you don't specify the
+ * dataset, a dataset called {@code dataflow-examples} must already exist in your project.
+ * {@code --bigQueryDataset=YOUR-DATASET --bigQueryTable=YOUR-NEW-TABLE-NAME}.
+ *
+ * <p>Decide whether you want your pipeline to run with 'bounded' (such as files in GCS) or
+ * 'unbounded' input (such as a PubSub topic). To run with unbounded input, set
+ * {@code --unbounded=true}. Then, optionally specify the Google Cloud PubSub topic to read from
+ * via {@code --pubsubTopic=projects/PROJECT_ID/topics/YOUR_TOPIC_NAME}. If the topic does not
+ * exist, the pipeline will create one for you. It will delete this topic when it terminates.
+ * The pipeline will automatically launch an auxiliary batch pipeline to populate the given PubSub
+ * topic with the contents of the {@code --inputFile}, in order to make the example easy to run.
+ * If you want to use an independently-populated PubSub topic, indicate this by setting
+ * {@code --inputFile=""}. In that case, the auxiliary pipeline will not be started.
+ *
+ * <p>By default, the pipeline will do fixed windowing, on 1-minute windows.  You can
+ * change this interval by setting the {@code --windowSize} parameter, e.g. {@code --windowSize=10}
+ * for 10-minute windows.
+ */
+public class WindowedWordCount {
+    private static final Logger LOG = LoggerFactory.getLogger(WindowedWordCount.class);
+    static final int WINDOW_SIZE = 1;  // Default window duration in minutes
+
+  /**
+   * Concept #2: A DoFn that sets the data element timestamp. This is a silly method, just for
+   * this example, for the bounded data case.
+   *
+   * <p>Imagine that many ghosts of Shakespeare are all typing madly at the same time to recreate
+   * his masterworks. Each line of the corpus will get a random associated timestamp somewhere in a
+   * 2-hour period.
+   */
+  static class AddTimestampFn extends DoFn<String, String> {
+    private static final long serialVersionUID = 0;
+    private static final long RAND_RANGE = 7200000; // 2 hours in ms
+
+    @Override
+    public void processElement(ProcessContext c) {
+      // Generate a timestamp that falls somewhere in the past two hours.
+      long randomTimestamp = System.currentTimeMillis()
+        - (int) (Math.random() * RAND_RANGE);
+      /**
+       * Concept #2: Set the data element with that timestamp.
+       */
+      c.outputWithTimestamp(c.element(), new Instant(randomTimestamp));
+    }
+  }
+
+  /** A DoFn that converts a Word and Count into a BigQuery table row. */
+  static class FormatAsTableRowFn extends DoFn<KV<String, Long>, TableRow> {
+    private static final long serialVersionUID = 0;
+
+    @Override
+    public void processElement(ProcessContext c) {
+      TableRow row = new TableRow()
+          .set("word", c.element().getKey())
+          .set("count", c.element().getValue())
+          // include a field for the window timestamp
+         .set("window_timestamp", c.timestamp().toString());
+      c.output(row);
+    }
+  }
+
+  /**
+   * Helper method that defines the BigQuery schema used for the output.
+   */
+  private static TableSchema getSchema() {
+    List<TableFieldSchema> fields = new ArrayList<>();
+    fields.add(new TableFieldSchema().setName("word").setType("STRING"));
+    fields.add(new TableFieldSchema().setName("count").setType("INTEGER"));
+    fields.add(new TableFieldSchema().setName("window_timestamp").setType("TIMESTAMP"));
+    TableSchema schema = new TableSchema().setFields(fields);
+    return schema;
+  }
+
+  /**
+   * Concept #6: We'll stream the results to a BigQuery table. The BigQuery output source is one
+   * that supports both bounded and unbounded data. This is a helper method that creates a
+   * TableReference from input options, to tell the pipeline where to write its BigQuery results.
+   */
+  private static TableReference getTableReference(Options options) {
+    TableReference tableRef = new TableReference();
+    tableRef.setProjectId(options.getProject());
+    tableRef.setDatasetId(options.getBigQueryDataset());
+    tableRef.setTableId(options.getBigQueryTable());
+    return tableRef;
+  }
+
+  /**
+   * Options supported by {@link WindowedWordCount}.
+   *
+   * <p>Inherits standard example configuration options, which allow specification of the BigQuery
+   * table and the PubSub topic, as well as the {@link WordCount.WordCountOptions} support for
+   * specification of the input file.
+   */
+  public static interface Options
+        extends WordCount.WordCountOptions, DataflowExampleUtils.DataflowExampleUtilsOptions {
+    @Description("Fixed window duration, in minutes")
+    @Default.Integer(WINDOW_SIZE)
+    Integer getWindowSize();
+    void setWindowSize(Integer value);
+
+    @Description("Whether to run the pipeline with unbounded input")
+    boolean isUnbounded();
+    void setUnbounded(boolean value);
+  }
+
+  public static void main(String[] args) throws IOException {
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    options.setBigQuerySchema(getSchema());
+    // DataflowExampleUtils creates the necessary input sources to simplify execution of this
+    // Pipeline.
+    DataflowExampleUtils exampleDataflowUtils = new DataflowExampleUtils(options,
+      options.isUnbounded());
+
+    Pipeline pipeline = Pipeline.create(options);
+
+    /**
+     * Concept #1: the Dataflow SDK lets us run the same pipeline with either a bounded or
+     * unbounded input source.
+     */
+    PCollection<String> input;
+    if (options.isUnbounded()) {
+      LOG.info("Reading from PubSub.");
+      /**
+       * Concept #3: Read from the PubSub topic. A topic will be created if it wasn't
+       * specified as an argument. The data elements' timestamps will come from the pubsub
+       * injection.
+       */
+      input = pipeline
+          .apply(PubsubIO.Read.topic(options.getPubsubTopic()));
+    } else {
+      /** Else, this is a bounded pipeline. Read from the GCS file. */
+      input = pipeline
+          .apply(TextIO.Read.from(options.getInputFile()))
+          // Concept #2: Add an element timestamp, using an artificial time just to show windowing.
+          // See AddTimestampFn for more detail on this.
+          .apply(ParDo.of(new AddTimestampFn()));
+    }
+
+    /**
+     * Concept #4: Window into fixed windows. The fixed window size for this example defaults to 1
+     * minute (you can change this with a command-line option). See the documentation for more
+     * information on how fixed windows work, and for information on the other types of windowing
+     * available (e.g., sliding windows).
+     */
+    PCollection<String> windowedWords = input
+      .apply(Window.<String>into(
+        FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))));
+
+    /**
+     * Concept #5: Re-use our existing CountWords transform that does not have knowledge of
+     * windows over a PCollection containing windowed values.
+     */
+    PCollection<KV<String, Long>> wordCounts = windowedWords.apply(new WordCount.CountWords());
+
+    /**
+     * Concept #6: Format the results for a BigQuery table, then write to BigQuery.
+     * The BigQuery output source supports both bounded and unbounded data.
+     */
+    wordCounts.apply(ParDo.of(new FormatAsTableRowFn()))
+        .apply(BigQueryIO.Write.to(getTableReference(options)).withSchema(getSchema()));
+
+    PipelineResult result = pipeline.run();
+
+    /**
+     * To mock unbounded input from PubSub, we'll now start an auxiliary 'injector' pipeline that
+     * runs for a limited time, and publishes to the input PubSub topic.
+     *
+     * With an unbounded input source, you will need to explicitly shut down this pipeline when you
+     * are done with it, so that you do not continue to be charged for the instances. You can do
+     * this via a ctrl-C from the command line, or from the developer's console UI for Dataflow
+     * pipelines. The PubSub topic will also be deleted at this time.
+     */
+    exampleDataflowUtils.mockUnboundedSource(options.getInputFile(), result);
+  }
+}
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java
new file mode 100644
index 0000000000000..251001f9f2d5f
--- /dev/null
+++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java
@@ -0,0 +1,210 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package};
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+
+/**
+ * An example that counts words in Shakespeare and includes Dataflow best practices.
+ *
+ * <p>This class, {@link WordCount}, is the second in a series of four successively more detailed
+ * 'word count' examples. You may first want to take a look at {@link MinimalWordCount}.
+ * After you've looked at this example, then see the {@link DebuggingWordCount}
+ * pipeline, for introduction of additional concepts.
+ *
+ * <p>For a detailed walkthrough of this example, see
+ *   <a href="https://cloud.google.com/dataflow/java-sdk/wordcount-example">
+ *   https://cloud.google.com/dataflow/java-sdk/wordcount-example
+ *   </a>
+ *
+ * <p>Basic concepts, also in the MinimalWordCount example:
+ * Reading text files; counting a PCollection; writing to GCS.
+ *
+ * <p>New Concepts:
+ * <pre>
+ *   1. Executing a Pipeline both locally and using the Dataflow service
+ *   2. Using ParDo with static DoFns defined out-of-line
+ *   3. Building a composite transform
+ *   4. Defining your own pipeline options
+ * </pre>
+ *
+ * <p>Concept #1: you can execute this pipeline either locally or using the Dataflow service.
+ * These are now command-line options and not hard-coded as they were in the MinimalWordCount
+ * example.
+ * To execute this pipeline locally, specify general pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
+ * and a local output file or output prefix on GCS:
+ * <pre>{@code
+ *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
+ * }</pre>
+ *
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
+ * and an output prefix on GCS:
+ * <pre>{@code
+ *   --output=gs://YOUR_OUTPUT_PREFIX
+ * }</pre>
+ *
+ * <p>The input file defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt} and can be
+ * overridden with {@code --inputFile}.
+ */
+public class WordCount {
+
+  /**
+   * Concept #2: You can make your pipeline code less verbose by defining your DoFns statically out-
+   * of-line. This DoFn tokenizes lines of text into individual words; we pass it to a ParDo in the
+   * pipeline.
+   */
+  static class ExtractWordsFn extends DoFn<String, String> {
+    private static final long serialVersionUID = 0;
+
+    private final Aggregator<Long, Long> emptyLines =
+        createAggregator("emptyLines", new Sum.SumLongFn());
+
+    @Override
+    public void processElement(ProcessContext c) {
+      if (c.element().trim().isEmpty()) {
+        emptyLines.addValue(1L);
+      }
+
+      // Split the line into words.
+      String[] words = c.element().split("[^a-zA-Z']+");
+
+      // Output each word encountered into the output PCollection.
+      for (String word : words) {
+        if (!word.isEmpty()) {
+          c.output(word);
+        }
+      }
+    }
+  }
+
+  /** A DoFn that converts a Word and Count into a printable string. */
+  public static class FormatAsTextFn extends DoFn<KV<String, Long>, String> {
+    private static final long serialVersionUID = 0;
+
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(c.element().getKey() + ": " + c.element().getValue());
+    }
+  }
+
+  /**
+   * A PTransform that converts a PCollection containing lines of text into a PCollection of
+   * formatted word counts.
+   *
+   * <p>Concept #3: This is a custom composite transform that bundles two transforms (ParDo and
+   * Count) as a reusable PTransform subclass. Using composite transforms allows for easy reuse,
+   * modular testing, and an improved monitoring experience.
+   */
+  public static class CountWords extends PTransform<PCollection<String>,
+      PCollection<KV<String, Long>>> {
+    private static final long serialVersionUID = 0;
+
+    @Override
+    public PCollection<KV<String, Long>> apply(PCollection<String> lines) {
+
+      // Convert lines of text into individual words.
+      PCollection<String> words = lines.apply(
+          ParDo.of(new ExtractWordsFn()));
+
+      // Count the number of times each word occurs.
+      PCollection<KV<String, Long>> wordCounts =
+          words.apply(Count.<String>perElement());
+
+      return wordCounts;
+    }
+  }
+
+  /**
+   * Options supported by {@link WordCount}.
+   *
+   * <p>Concept #4: Defining your own configuration options. Here, you can add your own arguments
+   * to be processed by the command-line parser, and specify default values for them. You can then
+   * access the options values in your pipeline code.
+   *
+   * <p>Inherits standard configuration options.
+   */
+  public static interface WordCountOptions extends PipelineOptions {
+    @Description("Path of the file to read from")
+    @Default.String("gs://dataflow-samples/shakespeare/kinglear.txt")
+    String getInputFile();
+    void setInputFile(String value);
+
+    @Description("Path of the file to write to")
+    @Default.InstanceFactory(OutputFactory.class)
+    String getOutput();
+    void setOutput(String value);
+
+    /**
+     * Returns "gs://${YOUR_STAGING_DIRECTORY}/counts.txt" as the default destination.
+     */
+    public static class OutputFactory implements DefaultValueFactory<String> {
+      @Override
+      public String create(PipelineOptions options) {
+        DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
+        if (dataflowOptions.getStagingLocation() != null) {
+          return GcsPath.fromUri(dataflowOptions.getStagingLocation())
+              .resolve("counts.txt").toString();
+        } else {
+          throw new IllegalArgumentException("Must specify --output or --stagingLocation");
+        }
+      }
+    }
+
+  }
+
+  public static void main(String[] args) {
+    WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
+      .as(WordCountOptions.class);
+    Pipeline p = Pipeline.create(options);
+
+    // Concepts #2 and #3: Our pipeline applies the composite CountWords transform, and passes the
+    // static FormatAsTextFn() to the ParDo transform.
+    p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
+     .apply(new CountWords())
+     .apply(ParDo.of(new FormatAsTextFn()))
+     .apply(TextIO.Write.named("WriteCounts").to(options.getOutput()));
+
+    p.run();
+  }
+}
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleOptions.java
new file mode 100644
index 0000000000000..e182f4cd2bd5e
--- /dev/null
+++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleOptions.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package}.common;
+
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+
+/**
+ * Options that can be used to configure the Dataflow examples.
+ */
+public interface DataflowExampleOptions extends DataflowPipelineOptions {
+  @Description("Whether to keep jobs running on the Dataflow service after local process exit")
+  @Default.Boolean(false)
+  boolean getKeepJobsRunning();
+  void setKeepJobsRunning(boolean keepJobsRunning);
+}
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleUtils.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleUtils.java
new file mode 100644
index 0000000000000..a77924aa63f46
--- /dev/null
+++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleUtils.java
@@ -0,0 +1,398 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package}.common;
+
+import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
+import com.google.api.client.util.Lists;
+import com.google.api.client.util.Sets;
+import com.google.api.services.bigquery.Bigquery;
+import com.google.api.services.bigquery.Bigquery.Datasets;
+import com.google.api.services.bigquery.Bigquery.Tables;
+import com.google.api.services.bigquery.model.Dataset;
+import com.google.api.services.bigquery.model.DatasetReference;
+import com.google.api.services.bigquery.model.Table;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.api.services.dataflow.Dataflow;
+import com.google.api.services.pubsub.Pubsub;
+import com.google.api.services.pubsub.model.Topic;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineJob;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
+import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
+import com.google.cloud.dataflow.sdk.util.Transport;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+import javax.servlet.http.HttpServletResponse;
+
+/**
+ * The utility class that sets up and tears down external resources, starts the Google Cloud Pub/Sub
+ * injector, and cancels the streaming and the injector pipelines once the program terminates.
+ *
+ * <p>It is used to run Dataflow examples, such as TrafficMaxLaneFlow and TrafficRoutes.
+ */
+public class DataflowExampleUtils {
+
+  private final DataflowPipelineOptions options;
+  private Bigquery bigQueryClient = null;
+  private Pubsub pubsubClient = null;
+  private Dataflow dataflowClient = null;
+  private Set<DataflowPipelineJob> jobsToCancel = Sets.newHashSet();
+  private List<String> pendingMessages = Lists.newArrayList();
+
+  /**
+   * Define an interface that supports the PubSub and BigQuery example options.
+   */
+  public static interface DataflowExampleUtilsOptions
+        extends DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
+  }
+
+  public DataflowExampleUtils(DataflowPipelineOptions options) {
+    this.options = options;
+  }
+
+  /**
+   * Do resources and runner options setup.
+   */
+  public DataflowExampleUtils(DataflowPipelineOptions options, boolean isUnbounded)
+      throws IOException {
+    this.options = options;
+    setupResourcesAndRunner(isUnbounded);
+  }
+
+  /**
+   * Sets up external resources that are required by the example,
+   * such as Pub/Sub topics and BigQuery tables.
+   *
+   * @throws IOException if there is a problem setting up the resources
+   */
+  public void setup() throws IOException {
+    setupPubsubTopic();
+    setupBigQueryTable();
+  }
+
+  /**
+   * Set up external resources, and configure the runner appropriately.
+   */
+  public void setupResourcesAndRunner(boolean isUnbounded) throws IOException {
+    if (isUnbounded) {
+      options.setStreaming(true);
+    }
+    setup();
+    setupRunner();
+  }
+
+  /**
+   * Sets up the Google Cloud Pub/Sub topic.
+   *
+   * <p>If the topic doesn't exist, a new topic with the given name will be created.
+   *
+   * @throws IOException if there is a problem setting up the Pub/Sub topic
+   */
+  public void setupPubsubTopic() throws IOException {
+    ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
+    if (!pubsubTopicOptions.getPubsubTopic().isEmpty()) {
+      pendingMessages.add("*******************Set Up Pubsub Topic*********************");
+      setupPubsubTopic(pubsubTopicOptions.getPubsubTopic());
+      pendingMessages.add("The Pub/Sub topic has been set up for this example: "
+          + pubsubTopicOptions.getPubsubTopic());
+    }
+  }
+
+  /**
+   * Sets up the BigQuery table with the given schema.
+   *
+   * <p>If the table already exists, the schema has to match the given one. Otherwise, the example
+   * will throw a RuntimeException. If the table doesn't exist, a new table with the given schema
+   * will be created.
+   *
+   * @throws IOException if there is a problem setting up the BigQuery table
+   */
+  public void setupBigQueryTable() throws IOException {
+    ExampleBigQueryTableOptions bigQueryTableOptions =
+        options.as(ExampleBigQueryTableOptions.class);
+    if (bigQueryTableOptions.getBigQueryDataset() != null
+        && bigQueryTableOptions.getBigQueryTable() != null
+        && bigQueryTableOptions.getBigQuerySchema() != null) {
+      pendingMessages.add("******************Set Up Big Query Table*******************");
+      setupBigQueryTable(bigQueryTableOptions.getProject(),
+                         bigQueryTableOptions.getBigQueryDataset(),
+                         bigQueryTableOptions.getBigQueryTable(),
+                         bigQueryTableOptions.getBigQuerySchema());
+      pendingMessages.add("The BigQuery table has been set up for this example: "
+          + bigQueryTableOptions.getProject()
+          + ":" + bigQueryTableOptions.getBigQueryDataset()
+          + "." + bigQueryTableOptions.getBigQueryTable());
+    }
+  }
+
+  /**
+   * Tears down external resources that can be deleted upon the example's completion.
+   */
+  private void tearDown() {
+    pendingMessages.add("*************************Tear Down*************************");
+    ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
+    if (!pubsubTopicOptions.getPubsubTopic().isEmpty()) {
+      try {
+        deletePubsubTopic(pubsubTopicOptions.getPubsubTopic());
+        pendingMessages.add("The Pub/Sub topic has been deleted: "
+            + pubsubTopicOptions.getPubsubTopic());
+      } catch (IOException e) {
+        pendingMessages.add("Failed to delete the Pub/Sub topic : "
+            + pubsubTopicOptions.getPubsubTopic());
+      }
+    }
+
+    ExampleBigQueryTableOptions bigQueryTableOptions =
+        options.as(ExampleBigQueryTableOptions.class);
+    if (bigQueryTableOptions.getBigQueryDataset() != null
+        && bigQueryTableOptions.getBigQueryTable() != null
+        && bigQueryTableOptions.getBigQuerySchema() != null) {
+      pendingMessages.add("The BigQuery table might contain the example's output, "
+          + "and it is not deleted automatically: "
+          + bigQueryTableOptions.getProject()
+          + ":" + bigQueryTableOptions.getBigQueryDataset()
+          + "." + bigQueryTableOptions.getBigQueryTable());
+      pendingMessages.add("Please go to the Developers Console to delete it manually."
+          + " Otherwise, you may be charged for its usage.");
+    }
+  }
+
+  private void setupBigQueryTable(String projectId, String datasetId, String tableId,
+      TableSchema schema) throws IOException {
+    if (bigQueryClient == null) {
+      bigQueryClient = Transport.newBigQueryClient(options.as(BigQueryOptions.class)).build();
+    }
+
+    Datasets datasetService = bigQueryClient.datasets();
+    if (executeNullIfNotFound(datasetService.get(projectId, datasetId)) == null) {
+      Dataset newDataset = new Dataset().setDatasetReference(
+          new DatasetReference().setProjectId(projectId).setDatasetId(datasetId));
+      datasetService.insert(projectId, newDataset).execute();
+    }
+
+    Tables tableService = bigQueryClient.tables();
+    Table table = executeNullIfNotFound(tableService.get(projectId, datasetId, tableId));
+    if (table == null) {
+      Table newTable = new Table().setSchema(schema).setTableReference(
+          new TableReference().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId));
+      tableService.insert(projectId, datasetId, newTable).execute();
+    } else if (!table.getSchema().equals(schema)) {
+      throw new RuntimeException(
+          "Table exists and schemas do not match, expecting: " + schema.toPrettyString()
+          + ", actual: " + table.getSchema().toPrettyString());
+    }
+  }
+
+  private void setupPubsubTopic(String topic) throws IOException {
+    if (pubsubClient == null) {
+      pubsubClient = Transport.newPubsubClient(options).build();
+    }
+    if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) == null) {
+      pubsubClient.projects().topics().create(topic, new Topic().setName(topic)).execute();
+    }
+  }
+
+  /**
+   * Deletes the Google Cloud Pub/Sub topic.
+   *
+   * @throws IOException if there is a problem deleting the Pub/Sub topic
+   */
+  private void deletePubsubTopic(String topic) throws IOException {
+    if (pubsubClient == null) {
+      pubsubClient = Transport.newPubsubClient(options).build();
+    }
+    if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) != null) {
+      pubsubClient.projects().topics().delete(topic).execute();
+    }
+  }
+
+  /**
+   * If this is an unbounded (streaming) pipeline, and both inputFile and pubsub topic are defined,
+   * start an 'injector' pipeline that publishes the contents of the file to the given topic, first
+   * creating the topic if necessary.
+   */
+  public void startInjectorIfNeeded(String inputFile) {
+    ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
+    if (pubsubTopicOptions.isStreaming()
+        && inputFile != null && !inputFile.isEmpty()
+        && pubsubTopicOptions.getPubsubTopic() != null
+        && !pubsubTopicOptions.getPubsubTopic().isEmpty()) {
+      runInjectorPipeline(inputFile, pubsubTopicOptions.getPubsubTopic());
+    }
+  }
+
+  /**
+   * Do some runner setup: check that the DirectPipelineRunner is not used in conjunction with
+   * streaming, and if streaming is specified, use the DataflowPipelineRunner. Return the streaming
+   * flag value.
+   */
+  public void setupRunner() {
+    if (options.isStreaming()) {
+      if (options.getRunner() == DirectPipelineRunner.class) {
+        throw new IllegalArgumentException(
+          "Processing of unbounded input sources is not supported with the DirectPipelineRunner.");
+      }
+      // In order to cancel the pipelines automatically,
+      // {@literal DataflowPipelineRunner} is forced to be used.
+      options.setRunner(DataflowPipelineRunner.class);
+    }
+  }
+
+  /**
+   * Runs the batch injector for the streaming pipeline.
+   *
+   * <p>The injector pipeline will read from the given text file, and inject data
+   * into the Google Cloud Pub/Sub topic.
+   */
+  public void runInjectorPipeline(String inputFile, String topic) {
+    DataflowPipelineOptions copiedOptions = options.cloneAs(DataflowPipelineOptions.class);
+    copiedOptions.setStreaming(false);
+    copiedOptions.setNumWorkers(
+        options.as(ExamplePubsubTopicOptions.class).getInjectorNumWorkers());
+    copiedOptions.setJobName(options.getJobName() + "-injector");
+    Pipeline injectorPipeline = Pipeline.create(copiedOptions);
+    injectorPipeline.apply(TextIO.Read.from(inputFile))
+                    .apply(IntraBundleParallelization
+                        .of(PubsubFileInjector.publish(topic))
+                        .withMaxParallelism(20));
+    DataflowPipelineJob injectorJob = (DataflowPipelineJob) injectorPipeline.run();
+    jobsToCancel.add(injectorJob);
+  }
+
+  /**
+   * Runs the provided injector pipeline for the streaming pipeline.
+   */
+  public void runInjectorPipeline(Pipeline injectorPipeline) {
+    DataflowPipelineJob injectorJob = (DataflowPipelineJob) injectorPipeline.run();
+    jobsToCancel.add(injectorJob);
+  }
+
+  /**
+   * Start the auxiliary injector pipeline, then wait for this pipeline to finish.
+   */
+  public void mockUnboundedSource(String inputFile, PipelineResult result) {
+    startInjectorIfNeeded(inputFile);
+    waitToFinish(result);
+  }
+
+  /**
+   * If {@literal DataflowPipelineRunner} or {@literal BlockingDataflowPipelineRunner} is used,
+   * waits for the pipeline to finish and cancels it (and the injector) before the program exists.
+   */
+  public void waitToFinish(PipelineResult result) {
+    if (result instanceof DataflowPipelineJob) {
+      final DataflowPipelineJob job = (DataflowPipelineJob) result;
+      jobsToCancel.add(job);
+      if (!options.as(DataflowExampleOptions.class).getKeepJobsRunning()) {
+        addShutdownHook(jobsToCancel);
+      }
+      try {
+        job.waitToFinish(-1, TimeUnit.SECONDS, new MonitoringUtil.PrintHandler(System.out));
+      } catch (Exception e) {
+        throw new RuntimeException("Failed to wait for job to finish: " + job.getJobId());
+      }
+    } else {
+      // Do nothing if the given PipelineResult doesn't support waitToFinish(),
+      // such as EvaluationResults returned by DirectPipelineRunner.
+    }
+  }
+
+  private void addShutdownHook(final Collection<DataflowPipelineJob> jobs) {
+    if (dataflowClient == null) {
+      dataflowClient = options.getDataflowClient();
+    }
+
+    Runtime.getRuntime().addShutdownHook(new Thread() {
+      @Override
+      public void run() {
+        tearDown();
+        printPendingMessages();
+        for (DataflowPipelineJob job : jobs) {
+          System.out.println("Canceling example pipeline: " + job.getJobId());
+          try {
+            job.cancel();
+          } catch (IOException e) {
+            System.out.println("Failed to cancel the job,"
+                + " please go to the Developers Console to cancel it manually");
+            System.out.println(
+                MonitoringUtil.getJobMonitoringPageURL(job.getProjectId(), job.getJobId()));
+          }
+        }
+
+        for (DataflowPipelineJob job : jobs) {
+          boolean cancellationVerified = false;
+          for (int retryAttempts = 6; retryAttempts > 0; retryAttempts--) {
+            if (job.getState().isTerminal()) {
+              cancellationVerified = true;
+              System.out.println("Canceled example pipeline: " + job.getJobId());
+              break;
+            } else {
+              System.out.println(
+                  "The example pipeline is still running. Verifying the cancellation.");
+            }
+            try {
+              Thread.sleep(10000);
+            } catch (InterruptedException e) {
+              // Ignore
+            }
+          }
+          if (!cancellationVerified) {
+            System.out.println("Failed to verify the cancellation for job: " + job.getJobId());
+            System.out.println("Please go to the Developers Console to verify manually:");
+            System.out.println(
+                MonitoringUtil.getJobMonitoringPageURL(job.getProjectId(), job.getJobId()));
+          }
+        }
+      }
+    });
+  }
+
+  private void printPendingMessages() {
+    System.out.println();
+    System.out.println("***********************************************************");
+    System.out.println("***********************************************************");
+    for (String message : pendingMessages) {
+      System.out.println(message);
+    }
+    System.out.println("***********************************************************");
+    System.out.println("***********************************************************");
+  }
+
+  private static <T> T executeNullIfNotFound(
+      AbstractGoogleClientRequest<T> request) throws IOException {
+    try {
+      return request.execute();
+    } catch (GoogleJsonResponseException e) {
+      if (e.getStatusCode() == HttpServletResponse.SC_NOT_FOUND) {
+        return null;
+      } else {
+        throw e;
+      }
+    }
+  }
+}
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java
new file mode 100644
index 0000000000000..bef5bfdd83bd7
--- /dev/null
+++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package}.common;
+
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+/**
+ * Options that can be used to configure BigQuery tables in Dataflow examples.
+ * The project defaults to the project being used to run the example.
+ */
+public interface ExampleBigQueryTableOptions extends DataflowPipelineOptions {
+  @Description("BigQuery dataset name")
+  @Default.String("dataflow_examples")
+  String getBigQueryDataset();
+  void setBigQueryDataset(String dataset);
+
+  @Description("BigQuery table name")
+  @Default.InstanceFactory(BigQueryTableFactory.class)
+  String getBigQueryTable();
+  void setBigQueryTable(String table);
+
+  @Description("BigQuery table schema")
+  TableSchema getBigQuerySchema();
+  void setBigQuerySchema(TableSchema schema);
+
+  /**
+   * Returns the job name as the default BigQuery table name.
+   */
+  static class BigQueryTableFactory implements DefaultValueFactory<String> {
+    @Override
+    public String create(PipelineOptions options) {
+      return options.as(DataflowPipelineOptions.class).getJobName()
+          .replace('-', '_');
+    }
+  }
+}
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java
new file mode 100644
index 0000000000000..525de69cdd77f
--- /dev/null
+++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package}.common;
+
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+/**
+ * Options that can be used to configure Pub/Sub topic in Dataflow examples.
+ */
+public interface ExamplePubsubTopicOptions extends DataflowPipelineOptions {
+  @Description("Pub/Sub topic")
+  @Default.InstanceFactory(PubsubTopicFactory.class)
+  String getPubsubTopic();
+  void setPubsubTopic(String topic);
+
+  @Description("Number of workers to use when executing the injector pipeline")
+  @Default.Integer(1)
+  int getInjectorNumWorkers();
+  void setInjectorNumWorkers(int numWorkers);
+
+  /**
+   * Returns a default Pub/Sub topic based on the project and the job names.
+   */
+  static class PubsubTopicFactory implements DefaultValueFactory<String> {
+    @Override
+    public String create(PipelineOptions options) {
+      DataflowPipelineOptions dataflowPipelineOptions =
+          options.as(DataflowPipelineOptions.class);
+      return "projects/" + dataflowPipelineOptions.getProject()
+          + "/topics/" + dataflowPipelineOptions.getJobName();
+    }
+  }
+}
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/PubsubFileInjector.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/PubsubFileInjector.java
new file mode 100644
index 0000000000000..cab116e76fbfe
--- /dev/null
+++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/PubsubFileInjector.java
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package}.common;
+
+import com.google.api.services.pubsub.Pubsub;
+import com.google.api.services.pubsub.model.PublishRequest;
+import com.google.api.services.pubsub.model.PubsubMessage;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
+import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.common.collect.ImmutableMap;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * A batch Dataflow pipeline for injecting a set of GCS files into
+ * a PubSub topic line by line. Empty lines are skipped.
+ *
+ * <p>This is useful for testing streaming
+ * pipelines. Note that since batch pipelines might retry chunks, this
+ * does _not_ guarantee exactly-once injection of file data. Some lines may
+ * be published multiple times.
+ * </p>
+ */
+public class PubsubFileInjector {
+
+  /**
+   * An incomplete {@code PubsubFileInjector} transform with unbound output topic.
+   */
+  public static class Unbound {
+    private final String timestampLabelKey;
+
+    Unbound() {
+      this.timestampLabelKey = null;
+    }
+
+    Unbound(String timestampLabelKey) {
+      this.timestampLabelKey = timestampLabelKey;
+    }
+
+    Unbound withTimestampLabelKey(String timestampLabelKey) {
+      return new Unbound(timestampLabelKey);
+    }
+
+    public Bound publish(String outputTopic) {
+      return new Bound(outputTopic, timestampLabelKey);
+    }
+  }
+
+  /** A DoFn that publishes non-empty lines to Google Cloud PubSub. */
+  public static class Bound extends DoFn<String, Void> {
+    private static final long serialVersionUID = 0;
+
+    private final String outputTopic;
+    private final String timestampLabelKey;
+    public transient Pubsub pubsub;
+
+    public Bound(String outputTopic, String timestampLabelKey) {
+      this.outputTopic = outputTopic;
+      this.timestampLabelKey = timestampLabelKey;
+    }
+
+    @Override
+    public void startBundle(Context context) {
+      this.pubsub =
+          Transport.newPubsubClient(context.getPipelineOptions().as(DataflowPipelineOptions.class))
+              .build();
+    }
+
+    @Override
+    public void processElement(ProcessContext c) throws IOException {
+      if (c.element().isEmpty()) {
+        return;
+      }
+      PubsubMessage pubsubMessage = new PubsubMessage();
+      pubsubMessage.encodeData(c.element().getBytes());
+      if (timestampLabelKey != null) {
+        pubsubMessage.setAttributes(
+            ImmutableMap.of(timestampLabelKey, Long.toString(c.timestamp().getMillis())));
+      }
+      PublishRequest publishRequest = new PublishRequest();
+      publishRequest.setMessages(Arrays.asList(pubsubMessage));
+      this.pubsub.projects().topics().publish(outputTopic, publishRequest).execute();
+    }
+  }
+
+  /**
+   * Creates a {@code PubsubFileInjector} transform with the given timestamp label key.
+   */
+  public static Unbound withTimestampLabelKey(String timestampLabelKey) {
+    return new Unbound(timestampLabelKey);
+  }
+
+  /**
+   * Creates a {@code PubsubFileInjector} transform that publishes to the given output topic.
+   */
+  public static Bound publish(String outputTopic) {
+    return new Unbound().publish(outputTopic);
+  }
+
+  /**
+   * Command line parameter options.
+   */
+  private interface PubsubFileInjectorOptions extends PipelineOptions {
+    @Description("GCS location of files.")
+    @Validation.Required
+    String getInput();
+    void setInput(String value);
+
+    @Description("Topic to publish on.")
+    @Validation.Required
+    String getOutputTopic();
+    void setOutputTopic(String value);
+  }
+
+  /**
+   * Sets up and starts streaming pipeline.
+   */
+  public static void main(String[] args) {
+    PubsubFileInjectorOptions options = PipelineOptionsFactory.fromArgs(args)
+        .withValidation()
+        .as(PubsubFileInjectorOptions.class);
+
+    Pipeline pipeline = Pipeline.create(options);
+
+    pipeline
+        .apply(TextIO.Read.from(options.getInput()))
+        .apply(IntraBundleParallelization.of(PubsubFileInjector.publish(options.getOutputTopic()))
+            .withMaxParallelism(20));
+
+    pipeline.run();
+  }
+}
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java
new file mode 100644
index 0000000000000..7a9aa4c5c67c5
--- /dev/null
+++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package};
+
+import com.google.common.io.Files;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Tests for {@link DebuggingWordCount}.
+ */
+@RunWith(JUnit4.class)
+public class DebuggingWordCountTest {
+  @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  @Test
+  public void testDebuggingWordCount() throws Exception {
+    File file = tmpFolder.newFile();
+    Files.write("stomach secret Flourish message Flourish here Flourish", file,
+        StandardCharsets.UTF_8);
+    DebuggingWordCount.main(new String[]{"--inputFile=" + file.getAbsolutePath()});
+  }
+}
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java
new file mode 100644
index 0000000000000..45555ce3ce48b
--- /dev/null
+++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package};
+
+import ${package}.WordCount.CountWords;
+import ${package}.WordCount.ExtractWordsFn;
+import ${package}.WordCount.FormatAsTextFn;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests of WordCount.
+ */
+@RunWith(JUnit4.class)
+public class WordCountTest {
+
+  /** Example test that tests a specific DoFn. */
+  @Test
+  public void testExtractWordsFn() {
+    DoFnTester<String, String> extractWordsFn =
+        DoFnTester.of(new ExtractWordsFn());
+
+    Assert.assertThat(extractWordsFn.processBatch(" some  input  words "),
+                      CoreMatchers.hasItems("some", "input", "words"));
+    Assert.assertThat(extractWordsFn.processBatch(" "),
+                      CoreMatchers.<String>hasItems());
+    Assert.assertThat(extractWordsFn.processBatch(" some ", " input", " words"),
+                      CoreMatchers.hasItems("some", "input", "words"));
+  }
+
+  static final String[] WORDS_ARRAY = new String[] {
+    "hi there", "hi", "hi sue bob",
+    "hi sue", "", "bob hi"};
+
+  static final List<String> WORDS = Arrays.asList(WORDS_ARRAY);
+
+  static final String[] COUNTS_ARRAY = new String[] {
+      "hi: 5", "there: 1", "sue: 2", "bob: 2"};
+
+  /** Example test that tests a PTransform by using an in-memory input and inspecting the output. */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testCountWords() throws Exception {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of(WORDS).withCoder(StringUtf8Coder.of()));
+
+    PCollection<String> output = input.apply(new CountWords())
+      .apply(ParDo.of(new FormatAsTextFn()));
+
+    DataflowAssert.that(output).containsInAnyOrder(COUNTS_ARRAY);
+    p.run();
+  }
+}
diff --git a/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties b/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties
new file mode 100644
index 0000000000000..c59e77a9d55ba
--- /dev/null
+++ b/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties
@@ -0,0 +1,5 @@
+package=it.pkg
+version=0.1-SNAPSHOT
+groupId=archetype.it
+artifactId=basic
+targetPlatform=1.7
diff --git a/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt b/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt
new file mode 100644
index 0000000000000..0b5987362fe3f
--- /dev/null
+++ b/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt
@@ -0,0 +1 @@
+verify
diff --git a/pom.xml b/pom.xml
index a86404702a0d3..5b6af24068ff4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -72,6 +72,7 @@
     <module>sdk</module>
     <module>examples</module>
     <module>maven-archetypes/starter</module>
+    <module>maven-archetypes/examples</module>
   </modules>
 
   <profiles>

From d7758b88486e11d774947520150a3c330034e8b4 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 26 Aug 2015 13:21:48 -0700
Subject: [PATCH 0973/1541] Add CoderProvider chaining

In the new CoderProviders namespace, the static method
firstOf now allows chaining of providers. This will
allow, for example, using AvroCoder by default, and
SerializableCoder as a fallback from that.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101609042
---
 .../dataflow/sdk/coders/CoderProviders.java   | 74 +++++++++++++++++++
 .../sdk/coders/CoderProvidersTest.java        | 71 ++++++++++++++++++
 2 files changed, 145 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProviders.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProvidersTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProviders.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProviders.java
new file mode 100644
index 0000000000000..9366c6c780717
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProviders.java
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
+import java.util.List;
+
+/**
+ * A {@code CoderProvider} may create a {@link Coder} for
+ * any concrete class.
+ */
+public final class CoderProviders {
+
+  // Static utility class
+  private CoderProviders() { }
+
+  /**
+   * Returns a {@link CoderProvider} that consults each of the provider {@code coderProviders}
+   * and returns the first {@link Coder} provided.
+   *
+   * <p>Note that while the number of types handled will be the union of those handled by all of
+   * the provided {@code coderProviders}, the actual {@link Coder} provided by an earlier provider
+   * may have inferior determinism properties.
+   */
+  public static CoderProvider firstOf(CoderProvider... coderProviders) {
+    return new FirstOf(ImmutableList.copyOf(coderProviders));
+  }
+
+  /**
+   * @see #firstOf
+   */
+  private static class FirstOf implements CoderProvider {
+
+    private Iterable<CoderProvider> providers;
+
+    public FirstOf(Iterable<CoderProvider> providers) {
+      this.providers = providers;
+    }
+
+    @Override
+    public <T> Coder<T> getCoder(TypeDescriptor<T> type) throws CannotProvideCoderException {
+      List<String> messages = Lists.newArrayList();
+      for (CoderProvider provider : providers) {
+        try {
+          return provider.getCoder(type);
+        } catch (CannotProvideCoderException exc) {
+          messages.add(String.format("%s could not provide a Coder for type %s: %s",
+              provider, type, exc.getMessage()));
+        }
+      }
+      throw new CannotProvideCoderException(
+          String.format("Cannot provide coder for type %s: %s.",
+              type, Joiner.on("; ").join(messages)));
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProvidersTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProvidersTest.java
new file mode 100644
index 0000000000000..1c0a89ed1b761
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProvidersTest.java
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2014 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import static org.hamcrest.Matchers.instanceOf;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Map;
+
+/**
+ * Tests for {@link CoderFactories}.
+ */
+@RunWith(JUnit4.class)
+public class CoderProvidersTest {
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void testAvroThenSerializableStringMap() throws Exception {
+    CoderProvider provider = CoderProviders.firstOf(AvroCoder.PROVIDER, SerializableCoder.PROVIDER);
+    Coder<Map<String, String>> coder =
+        provider.getCoder(new TypeDescriptor<Map<String, String>>(){});
+    assertThat(coder, instanceOf(AvroCoder.class));
+  }
+
+  @Test
+  public void testThrowingThenSerializable() throws Exception {
+    CoderProvider provider =
+        CoderProviders.firstOf(new ThrowingCoderProvider(), SerializableCoder.PROVIDER);
+    Coder<Integer> coder = provider.getCoder(new TypeDescriptor<Integer>(){});
+    assertThat(coder, instanceOf(SerializableCoder.class));
+  }
+
+  @Test
+  public void testNullThrows() throws Exception {
+    CoderProvider provider = CoderProviders.firstOf(new ThrowingCoderProvider());
+    thrown.expect(CannotProvideCoderException.class);
+    thrown.expectMessage("ThrowingCoderProvider");
+    provider.getCoder(new TypeDescriptor<Integer>(){});
+  }
+
+  private static class ThrowingCoderProvider implements CoderProvider {
+    @Override
+    public <T> Coder<T> getCoder(TypeDescriptor<T> type) throws CannotProvideCoderException {
+      throw new CannotProvideCoderException("ThrowingCoderProvider cannot ever provide a Coder");
+    }
+  }
+}

From 61f4ee6e9079004640da422a3bebc0dad451d14d Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 26 Aug 2015 13:40:11 -0700
Subject: [PATCH 0974/1541] Fix deps in sdk/pom.xml

 - Some were used but not declared.
 - Some were declared but not used.
 - Some were declared for the wrong scope.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101611258
---
 sdk/pom.xml | 40 ++++++++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index cdb5edee6cda0..4adc113f0d4a9 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -371,20 +371,6 @@
       </exclusions>
     </dependency>
 
-    <dependency>
-      <groupId>com.google.apis</groupId>
-      <artifactId>google-api-services-compute</artifactId>
-      <version>v1-rev53-1.20.0</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.20.0 -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-pubsub</artifactId>
@@ -447,6 +433,20 @@
       </exclusions>
     </dependency>
 
+    <dependency>
+      <groupId>com.google.oauth-client</groupId>
+      <artifactId>google-oauth-client</artifactId>
+      <version>1.20.0</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency google-api-client 1.20.0 -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-datastore-protobuf</artifactId>
@@ -479,6 +479,12 @@
       <version>18.0</version>
     </dependency>
 
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java</artifactId>
+      <version>2.5.0</version>
+    </dependency>
+
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-core</artifactId>
@@ -540,6 +546,12 @@
       <version>9.2.10.v20150310</version>
     </dependency>
 
+    <dependency>
+      <groupId>javax.servlet</groupId>
+      <artifactId>javax.servlet-api</artifactId>
+      <version>3.1.0</version>
+    </dependency>
+
     <!--
     To use com.google.cloud.dataflow.io.XmlSource, please explicitly declare
     the following two dependencies.

From bb56b349dc1f6e78c859ccfa95388692fae8533f Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 26 Aug 2015 16:38:51 -0700
Subject: [PATCH 0975/1541] Configure mvn dependency analysis to ignore runtime
 deps

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101632004
---
 pom.xml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/pom.xml b/pom.xml
index 5b6af24068ff4..8efd258e91b13 100644
--- a/pom.xml
+++ b/pom.xml
@@ -269,6 +269,16 @@
             </lifecycleMappingMetadata>
           </configuration>
         </plugin>
+
+        <!-- Ignore runtime-only dependencies in analysis -->
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-dependency-plugin</artifactId>
+          <configuration>
+            <ignoreNonCompile>true</ignoreNonCompile>
+          </configuration>
+        </plugin>
+
       </plugins>
     </pluginManagement>
   </build>

From 58450c1123d2fbccdf481a77eb15554d16eff84a Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 27 Aug 2015 13:11:06 -0700
Subject: [PATCH 0976/1541] Make OutputObjectAndByteCounter.countBytes correct,
 and add tests

Previous, only elements that were sampled would count towards the total byte count

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101711640
---
 .../worker/StreamingDataflowWorker.java       |  1 +
 .../util/common/ElementByteSizeObserver.java  | 12 ++++-
 .../worker/OutputObjectAndByteCounter.java    | 20 ++++++-
 .../OutputObjectAndByteCounterTest.java       | 53 +++++++++++++++++--
 4 files changed, 78 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 81be68bba3c40..a2bf337632d77 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -482,6 +482,7 @@ private void process(
                   new MapTaskExecutorFactory.ElementByteSizeObservableCoder<>(
                       Serializer.deserialize(read.getOutputs().get(0).getCodec(), Coder.class)),
                   worker.getOutputCounters().getAddCounterMutator())
+                  .setSamplingPeriod(100)
                   .countBytes("dataflow_input_size-" + mapTask.getSystemName()));
         }
       } else {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObserver.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObserver.java
index e2e9be3514604..6c764d99bb15f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObserver.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObserver.java
@@ -29,6 +29,7 @@ public class ElementByteSizeObserver implements Observer {
   private final Counter<Long> counter;
   private boolean isLazy = false;
   private long totalSize = 0;
+  private double scalingFactor = 1.0;
 
   public ElementByteSizeObserver(Counter<Long> counter) {
     this.counter = counter;
@@ -60,12 +61,19 @@ public void update(Object obj) {
     update(null, obj);
   }
 
+  /**
+   * Sets a multiplier to use on observed sizes.
+   */
+  public void setScalingFactor(double scalingFactor) {
+    this.scalingFactor = scalingFactor;
+  }
+
   @Override
   public void update(Observable obs, Object obj) {
     if (obj instanceof Long) {
-      totalSize += (Long) obj;
+      totalSize += scalingFactor * (Long) obj;
     } else if (obj instanceof Integer) {
-      totalSize += (Integer) obj;
+      totalSize += scalingFactor * (Integer) obj;
     } else {
       throw new AssertionError("unexpected parameter object");
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounter.java
index c7d737893bc6d..0cd7a0832b2be 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounter.java
@@ -35,7 +35,7 @@ public class OutputObjectAndByteCounter implements ElementCounter {
   private final ElementByteSizeObservable<Object> elementByteSizeObservable;
   private final CounterSet.AddCounterMutator addCounterMutator;
 
-  private final Random randomGenerator = new Random();
+  private Random randomGenerator = new Random();
 
   // Lowest sampling probability: 0.001%.
   private static final int SAMPLING_TOKEN_UPPER_BOUND = 1000000;
@@ -47,6 +47,7 @@ public class OutputObjectAndByteCounter implements ElementCounter {
   private Counter<Long> meanByteCount = null;
   private ElementByteSizeObserver byteCountObserver = null;
   private ElementByteSizeObserver meanByteCountObserver = null;
+  private int samplingTokenUpperBound = SAMPLING_TOKEN_UPPER_BOUND;
 
   @SuppressWarnings("unchecked")
   public OutputObjectAndByteCounter(
@@ -86,6 +87,14 @@ public OutputObjectAndByteCounter countMeanByte(String meanByteCounterName) {
     return this;
   }
 
+  /**
+   * Sets the minimum sampling rate to the inverse of the given value.
+   */
+  public OutputObjectAndByteCounter setSamplingPeriod(int period) {
+    this.samplingTokenUpperBound = period * SAMPLING_CUTOFF;
+    return this;
+  }
+
   public Counter<Long> getObjectCount() {
     return objectCount;
   }
@@ -109,6 +118,8 @@ public void update(Object elem) throws Exception {
     if ((byteCountObserver != null || meanByteCountObserver != null)
         && (sampleElement() || elementByteSizeObservable.isRegisterByteSizeObserverCheap(elem))) {
       if (byteCountObserver != null) {
+        byteCountObserver.setScalingFactor(
+            Math.max(samplingToken, SAMPLING_CUTOFF) / (double) SAMPLING_CUTOFF);
         elementByteSizeObservable.registerByteSizeObserver(elem, byteCountObserver);
       }
       if (meanByteCountObserver != null) {
@@ -149,7 +160,12 @@ protected boolean sampleElement() {
     // min(1, samplingCutoff / N), with an additional lower bound of
     // samplingCutoff / samplingTokenUpperBound. This algorithm may be refined
     // later.
-    samplingToken = Math.min(samplingToken + 1, SAMPLING_TOKEN_UPPER_BOUND);
+    samplingToken = Math.min(samplingToken + 1, samplingTokenUpperBound);
     return randomGenerator.nextInt(samplingToken) < SAMPLING_CUTOFF;
   }
+
+  public OutputObjectAndByteCounter setRandom(Random random) {
+    this.randomGenerator = random;
+    return this;
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounterTest.java
index 6eed4f087bddb..cd6b6e5084c43 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounterTest.java
@@ -20,20 +20,26 @@
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
 import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getMeanByteCounterName;
 import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getObjectCounterName;
+import static org.junit.Assert.assertEquals;
 
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.ElementByteSizeObservableCoder;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.Counter.CounterMean;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter;
 
-import org.junit.Assert;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+
 /**
  * Tests for {@link OutputObjectAndByteCounter}.
  */
@@ -51,8 +57,8 @@ public void testUpdate() throws Exception {
     outputCounter.finishLazyUpdate("bob");
 
     CounterMean<Long> meanByteCount = outputCounter.getMeanByteCount().getMean();
-    Assert.assertEquals(5, (long) meanByteCount.getAggregate());
-    Assert.assertEquals(2, meanByteCount.getCount());
+    assertEquals(5, (long) meanByteCount.getAggregate());
+    assertEquals(2, meanByteCount.getCount());
   }
 
   @Test
@@ -74,10 +80,49 @@ public void testAddingCountersIntoCounterSet() throws Exception {
     CounterSet counters = new CounterSet();
     new TestOutputCounter(counters);
 
-    Assert.assertEquals(
+    assertEquals(
         new CounterSet(
             Counter.longs(getMeanByteCounterName("output_name"), MEAN).resetMeanToValue(0, 0L),
             Counter.longs(getObjectCounterName("output_name"), SUM).resetToValue(0L)),
         counters);
   }
+
+  private OutputObjectAndByteCounter makeCounter(int samplingPeriod, int seed) {
+    return new OutputObjectAndByteCounter(
+               new ElementByteSizeObservableCoder<>(StringUtf8Coder.of()),
+               new CounterSet().getAddCounterMutator())
+        .setSamplingPeriod(samplingPeriod)
+        .setRandom(new Random(seed))
+        .countBytes("byte_count");
+  }
+
+  @Test
+  public void testSimpleByteCount() throws Exception {
+    OutputObjectAndByteCounter counter = makeCounter(1, 0);
+    for (int i = 0; i < 10000; i++) {
+      counter.update("foo");
+    }
+    assertEquals(30000L, (long) counter.getByteCount().getAggregate());
+  }
+
+  @Test
+  public void testSamplingByteCountFewElements() throws Exception {
+    OutputObjectAndByteCounter counter = makeCounter(100, 0);
+    for (int i = 0; i < 10; i++) {
+      counter.update("foo");
+    }
+    assertEquals(30L, (long) counter.getByteCount().getAggregate());
+  }
+
+  @Test
+  public void testSamplingByteCount() throws Exception {
+    List<Long> expected = Arrays.asList(3007230L, 2984989L, 3010540L);
+    for (int n = 0; n < 3; n++) {
+      OutputObjectAndByteCounter counter = makeCounter(100, n);
+      for (int i = 0; i < 1000000; i++) {
+        counter.update("foo");
+      }
+      assertEquals(expected.get(n), counter.getByteCount().getAggregate());
+    }
+  }
 }

From 211c55e121b25e2238ff3fbb2d8dd7c40845ba0c Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 27 Aug 2015 15:14:28 -0700
Subject: [PATCH 0977/1541] Remove a few Java warnings

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101724011
---
 .../sdk/runners/worker/GroupingShuffleRangeTracker.java         | 1 +
 .../cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java | 2 --
 .../java/com/google/cloud/dataflow/sdk/util/CoderUtils.java     | 1 +
 .../com/google/cloud/dataflow/sdk/util/PCollectionViews.java    | 1 +
 .../cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java  | 1 +
 .../com/google/cloud/dataflow/sdk/transforms/CombineTest.java   | 1 +
 .../java/com/google/cloud/dataflow/sdk/transforms/TopTest.java  | 1 +
 .../java/com/google/cloud/dataflow/sdk/util/SerializerTest.java | 1 +
 8 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTracker.java
index 55a9569d7339f..0092142d4c305 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTracker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTracker.java
@@ -117,6 +117,7 @@ public synchronized boolean tryReturnRecordAt(
     return true;
   }
 
+  @Override
   public synchronized boolean trySplitAtPosition(ByteArrayShufflePosition splitPosition) {
     if (lastGroupStart == null) {
       LOG.debug("Refusing to split {} at {}: unstarted", this, splitPosition);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index bcc7012144d85..b258090634f34 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -59,8 +59,6 @@
 @Experimental(Experimental.Kind.TRIGGER)
 public class AfterWatermark<W extends BoundedWindow> {
 
-  private static final long serialVersionUID = 0L;
-
   // Static factory class.
   private AfterWatermark() {}
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
index 724de15542f44..4eefecf5f482f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
@@ -246,6 +246,7 @@ public Resolver() {
             TypeFactory.defaultInstance());
       }
 
+      @Deprecated
       @Override
       public JavaType typeFromId(String id) {
         Class<?> clazz = getClassForId(id);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
index 741b26e936cba..66fa368e79d87 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
@@ -304,6 +304,7 @@ protected PCollectionViewBase(
      * For serialization only. Do not use directly. Subclasses should call from their own
      * protected no-argument constructor.
      */
+    @SuppressWarnings("unused")  // used for serialization
     protected PCollectionViewBase() {
       super();
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index 972ceab58ab1b..b47ac3337d972 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -77,6 +77,7 @@ public void testCreationFromSystemProperties() throws Exception {
         .put("sdk_pipeline_options", "{\"options\":{\"numWorkers\":999}}")
         .build());
 
+    @SuppressWarnings("deprecation")  // testing deprecated functionality
     DataflowWorkerHarnessOptions options = PipelineOptionsFactory.createFromSystemProperties();
     assertEquals("test_worker_id", options.getWorkerId());
     assertEquals("test_job_id", options.getJobId());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index c532398a9ce41..e72c16713b7aa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -311,6 +311,7 @@ public void testAccumulatingCombineEmpty() {
 
   // Checks that Min, Max, Mean, Sum (operations that pass-through to Combine),
   // provide their own top-level name.
+  @SuppressWarnings("deprecation")  // deprecated for testing
   @Test
   public void testCombinerNames() {
     Pipeline p = TestPipeline.create();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
index 08622de0e44fa..06afec6f81063 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
@@ -202,6 +202,7 @@ public void testCountConstraint() {
     input.apply(Top.of(-1, new OrderByLength()));
   }
 
+  @SuppressWarnings("deprecation")  // deprecated for testing
   @Test
   public void testTransformName() {
     Pipeline p = TestPipeline.create();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializerTest.java
index 54624ff2a9e37..80c797a231519 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializerTest.java
@@ -70,6 +70,7 @@ private static class InjectedTestRecord {
     private final String n;
     private final int v;
 
+    @SuppressWarnings("unused")  // used for JSON serialization
     public InjectedTestRecord(
         @JsonProperty("name") String name,
         @JsonProperty("value") int value) {

From 195f528dc11e625e294ec93ee1a5acc899692281 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 27 Aug 2015 16:37:36 -0700
Subject: [PATCH 0978/1541] Adds counters for bytes written to and read from
 Windmill

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101731765
---
 .../worker/StreamingDataflowWorker.java       | 36 +++++++++++++--
 .../cloud/dataflow/sdk/util/StateFetcher.java | 14 +++++-
 .../util/StreamingModeExecutionContext.java   |  6 +--
 .../sdk/util/state/WindmillStateReader.java   |  7 +++
 .../BasicSerializableSourceFormatTest.java    |  7 +--
 .../worker/StreamingDataflowWorkerTest.java   | 44 +++++++++++++++----
 .../StreamingModeExecutionContextTest.java    |  6 +--
 7 files changed, 98 insertions(+), 22 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index a2bf337632d77..d2a9b8cfcc8ab 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -464,7 +464,7 @@ private void process(
       WorkerAndContext workerAndContext = mapTaskExecutors.get(computation).poll();
       if (workerAndContext == null) {
         context = new StreamingModeExecutionContext(
-            mapTask.getSystemName(), stateFetcher, readerCache.get(computation), stateNameMap);
+            mapTask.getSystemName(), readerCache.get(computation), stateNameMap);
         worker = MapTaskExecutorFactory.create(options, mapTask, context);
         ReadOperation readOperation = worker.getReadOperation();
         // Disable progress updates since its results are unused for streaming
@@ -492,7 +492,8 @@ private void process(
 
       WindmillStateReader stateReader = new WindmillStateReader(
           metricTrackingWindmillServer, computation, work.getKey(), work.getWorkToken());
-      context.start(work, inputDataWatermark, stateReader, outputBuilder);
+      StateFetcher localStateFetcher = stateFetcher.byteTrackingView();
+      context.start(work, inputDataWatermark, stateReader, localStateFetcher, outputBuilder);
 
       for (Long callbackId : context.getReadyCommitCallbackIds()) {
         final Runnable callback = commitCallbacks.remove(callbackId);
@@ -513,10 +514,37 @@ public void run() {
       // Blocks while executing work.
       worker.execute();
 
-      buildCounters(worker.getOutputCounters(), outputBuilder);
-
       commitCallbacks.putAll(context.flushState());
 
+      // Compute shuffle and state byte statistics after the work is completely done, but before
+      // counters are added to the outputBuilder.
+      long shuffleBytesRead = 0;
+      for (Windmill.InputMessageBundle bundle : work.getMessageBundlesList()) {
+        for (Windmill.Message message : bundle.getMessagesList()) {
+          shuffleBytesRead += message.getSerializedSize();
+        }
+      }
+      long stateBytesRead = stateReader.getBytesRead() + localStateFetcher.getBytesRead();
+      long stateBytesWritten = Windmill.WorkItemCommitRequest.newBuilder(outputBuilder.build())
+                                   .clearOutputMessages()
+                                   .build()
+                                   .getSerializedSize();
+      CounterSet counters = worker.getOutputCounters();
+      counters
+          .getAddCounterMutator()
+          .addCounter(Counter.longs("WindmillShuffleBytesRead", Counter.AggregationKind.SUM))
+          .addValue(shuffleBytesRead);
+      counters
+          .getAddCounterMutator()
+          .addCounter(Counter.longs("WindmillStateBytesRead", Counter.AggregationKind.SUM))
+          .addValue(stateBytesRead);
+      counters
+          .getAddCounterMutator()
+          .addCounter(Counter.longs("WindmillStateBytesWritten", Counter.AggregationKind.SUM))
+          .addValue(stateBytesWritten);
+
+      buildCounters(counters, outputBuilder);
+
       mapTaskExecutors.get(computation).offer(new WorkerAndContext(worker, context));
       worker = null;
       context = null;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
index 709a54ac430e5..e6afd0e7ee0e6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
@@ -42,8 +42,8 @@ public class StateFetcher {
   private static final Logger LOG = LoggerFactory.getLogger(StateFetcher.class);
 
   private Cache<SideInputId, SideInputCacheEntry> sideInputCache;
-
   private MetricTrackingWindmillServerStub server;
+  private long bytesRead = 0L;
 
   public StateFetcher(MetricTrackingWindmillServerStub server) {
     this(server, CacheBuilder
@@ -65,6 +65,17 @@ public StateFetcher(MetricTrackingWindmillServerStub server,
     this.sideInputCache = sideInputCache;
   }
 
+  /**
+   * Returns a view of the underlying cache that keeps track of bytes read separately.
+   */
+  public StateFetcher byteTrackingView() {
+    return new StateFetcher(server, sideInputCache);
+  }
+
+  public long getBytesRead() {
+    return bytesRead;
+  }
+
   /**
    * Indicates the caller's knowledge of whether a particular side input has been computed.
    */
@@ -116,6 +127,7 @@ public SideInputCacheEntry call() throws Exception {
             .build());
 
         Windmill.GlobalData data = response.getGlobalData(0);
+        bytesRead += data.getSerializedSize();
 
         Iterable<WindowedValue<?>> rawData;
         if (data.getIsReady()) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
index 6c6a2aaeab8a3..f8001bcf38f2a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
@@ -51,7 +51,6 @@
  */
 public class StreamingModeExecutionContext extends DataflowExecutionContext {
   private final String stageName;
-  private final StateFetcher stateFetcher;
   private final Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
   // Per-key cache of active Reader objects in use by this process.
   private final ConcurrentMap<ByteString, UnboundedSource.UnboundedReader<?>> readerCache;
@@ -60,16 +59,15 @@ public class StreamingModeExecutionContext extends DataflowExecutionContext {
   private Windmill.WorkItem work;
   private Instant inputDataWatermark;
   private WindmillStateReader stateReader;
+  private StateFetcher stateFetcher;
   private Windmill.WorkItemCommitRequest.Builder outputBuilder;
   private UnboundedSource.UnboundedReader<?> activeReader;
 
   public StreamingModeExecutionContext(
       String stageName,
-      StateFetcher stateFetcher,
       ConcurrentMap<ByteString, UnboundedSource.UnboundedReader<?>> readerCache,
       ConcurrentMap<String, String> stateNameMap) {
     this.stageName = stageName;
-    this.stateFetcher = stateFetcher;
     this.sideInputCache = new HashMap<>();
     this.readerCache = readerCache;
     this.stateNameMap = stateNameMap;
@@ -79,10 +77,12 @@ public void start(
       Windmill.WorkItem work,
       Instant inputDataWatermark,
       WindmillStateReader stateReader,
+      StateFetcher stateFetcher,
       Windmill.WorkItemCommitRequest.Builder outputBuilder) {
     this.work = work;
     this.inputDataWatermark = inputDataWatermark;
     this.stateReader = stateReader;
+    this.stateFetcher = stateFetcher;
     this.outputBuilder = outputBuilder;
     this.sideInputCache.clear();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
index d1374c9b71e60..a4176a194cccf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
@@ -105,6 +105,8 @@ public String toString() {
 
   private final MetricTrackingWindmillServerStub metrics;
 
+  private long bytesRead = 0L;
+
   public WindmillStateReader(
       MetricTrackingWindmillServerStub metrics,
       String computation, ByteString key, long workToken) {
@@ -217,6 +219,10 @@ public void startBatchAndBlock() {
     consumeResponse(request, response, toFetch);
   }
 
+  public long getBytesRead() {
+    return bytesRead;
+  }
+
   private Windmill.GetDataRequest createRequest(Iterable<StateTag> toFetch) {
     Windmill.GetDataRequest.Builder request = Windmill.GetDataRequest.newBuilder();
     Windmill.KeyedGetDataRequest.Builder keyedDataBuilder = request
@@ -282,6 +288,7 @@ private void consumeResponse(Windmill.GetDataRequest request,
     }
 
     Windmill.KeyedGetDataResponse response = computationResponse.getData(0);
+    bytesRead += response.getSerializedSize();
 
     if (response.getFailed()) {
       // Set up all the futures for this key to throw an exception:
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 9f0aefa8d052f..e7d42dadf7acb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -625,8 +625,8 @@ public void testUnboundedSplits() throws Exception {
 
   @Test
   public void testReadUnboundedReader() throws Exception {
-    StreamingModeExecutionContext context = new StreamingModeExecutionContext("stageName", null,
-        new ConcurrentHashMap<ByteString, UnboundedSource.UnboundedReader<?>>(), null);
+    StreamingModeExecutionContext context = new StreamingModeExecutionContext(
+        "stageName", new ConcurrentHashMap<ByteString, UnboundedSource.UnboundedReader<?>>(), null);
 
     DataflowPipelineOptions options =
         PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
@@ -645,7 +645,8 @@ public void testReadUnboundedReader() throws Exception {
                   Windmill.SourceState.newBuilder().setState(state).build()) // Source state.
               .build(),
           new Instant(0),
-          null,
+          null, // StateReader
+          null, // StateFetcher
           Windmill.WorkItemCommitRequest.newBuilder());
 
       Reader.ReaderIterator<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>> reader =
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index a2d9a0d921724..874e99540cbf9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -69,6 +69,7 @@
 import com.google.cloud.dataflow.sdk.util.StringUtils;
 import com.google.cloud.dataflow.sdk.util.TimerOrElement.TimerOrElementCoder;
 import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
+import com.google.cloud.dataflow.sdk.util.VarInt;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
@@ -819,6 +820,22 @@ public void testMergeWindows() throws Exception {
             .addTimestamps(0)
             .build())));
 
+    List<Windmill.Counter> counters = actualOutput.getCounterUpdatesList();
+    // No state reads
+    assertEquals(0L, getCounter(counters, "WindmillStateBytesRead").getIntScalar());
+    // Timer + buffer + watermark hold
+    assertEquals(
+        Windmill.WorkItemCommitRequest.newBuilder(actualOutput)
+            .clearCounterUpdates()
+            .clearOutputMessages()
+            .build()
+            .getSerializedSize(),
+        getCounter(counters, "WindmillStateBytesWritten").getIntScalar());
+    // Input messages
+    assertEquals(VarInt.getLength(0L) + dataStringForIndex(0).length()
+            + addPaneTag(PaneInfo.NO_FIRING, windowAtZeroBytes()).size() + 5L /* proto overhead */,
+        getCounter(counters, "WindmillShuffleBytesRead").getIntScalar());
+
     Windmill.GetWorkResponse.Builder getWorkResponse = Windmill.GetWorkResponse.newBuilder();
     getWorkResponse.addWorkBuilder()
         .setComputationId(DEFAULT_COMPUTATION_ID)
@@ -832,6 +849,8 @@ public void testMergeWindows() throws Exception {
         .setTimestamp(timerTimestamp);
     server.addWorkToOffer(getWorkResponse.build());
 
+    long expectedBytesRead = 0L;
+
     Windmill.GetDataResponse.Builder dataResponse = Windmill.GetDataResponse.newBuilder();
     Windmill.KeyedGetDataResponse.Builder dataBuilder = dataResponse.addDataBuilder()
         .setComputationId(DEFAULT_COMPUTATION_ID)
@@ -855,11 +874,7 @@ public void testMergeWindows() throws Exception {
         .setData(ByteString.EMPTY);
     server.addDataToOffer(dataResponse.build());
 
-    // Read from the finished set to prevent blind write
-    dataBuilder.clearLists();
-    dataBuilder.clearWatermarkHolds();
-    dataBuilder.clearValues();
-    server.addDataToOffer(dataResponse.build());
+    expectedBytesRead += dataBuilder.build().getSerializedSize();
 
     result = server.waitForAndGetCommits(1);
 
@@ -909,12 +924,25 @@ public void testMergeWindows() throws Exception {
                     .setReset(true)
                     .build())));
 
+    counters = actualOutput.getCounterUpdatesList();
+
     Windmill.Counter actualReadCounter =
-        getCounter(
-            actualOutput.getCounterUpdatesList(),
-            "computation-MergeWindows-System-windmill-read-msecs");
+        getCounter(counters, "computation-MergeWindows-System-windmill-read-msecs");
     assertNotNull(actualReadCounter);
     assertThat(actualReadCounter.getIntScalar(), Matchers.greaterThan(0L));
+
+    // State reads for windowing
+    assertEquals(expectedBytesRead, getCounter(counters, "WindmillStateBytesRead").getIntScalar());
+    // State updates to clear state
+    assertEquals(
+        Windmill.WorkItemCommitRequest.newBuilder(actualOutput)
+            .clearCounterUpdates()
+            .clearOutputMessages()
+            .build()
+            .getSerializedSize(),
+        getCounter(counters, "WindmillStateBytesWritten").getIntScalar());
+    // No input messages
+    assertEquals(0L, getCounter(counters, "WindmillShuffleBytesRead").getIntScalar());
   }
 
   private static Windmill.Counter getCounter(List<Windmill.Counter> counters, String name) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
index 6ffa4e0b8084b..baa13d9afeea8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
@@ -66,11 +66,11 @@ private static TupleTag<Iterable<WindowedValue<String>>> newStringTag() {
   @Test
   public void testTimerInternalsSetTimer() {
     StreamingModeExecutionContext executionContext = new StreamingModeExecutionContext(
-        "stageName", stateFetcher, null, new ConcurrentHashMap<String, String>());
+        "stageName", null, new ConcurrentHashMap<String, String>());
 
     Windmill.WorkItemCommitRequest.Builder outputBuilder =
         Windmill.WorkItemCommitRequest.newBuilder();
-    executionContext.start(null, new Instant(1000), stateReader, outputBuilder);
+    executionContext.start(null, new Instant(1000), stateReader, stateFetcher, outputBuilder);
     ExecutionContext.StepContext step =
         executionContext.getOrCreateStepContext("step", "transform", null);
 
@@ -94,7 +94,7 @@ public void testTimerInternalsSetTimer() {
   @Test
   public void testSideInputReaderReconstituted() {
     StreamingModeExecutionContext executionContext =
-        new StreamingModeExecutionContext("stageName", stateFetcher, null, null);
+        new StreamingModeExecutionContext("stageName", null, null);
 
     PCollectionView<String> preview1 = PCollectionViewTesting.<String, String>testingView(
         newStringTag(), new ConstantViewFn<String, String>("view1"), StringUtf8Coder.of());

From 26e93aee01e12e59c4767417c571d70fd96e5d9c Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 27 Aug 2015 19:59:32 -0700
Subject: [PATCH 0979/1541] BatchModeExecutionContext extends
 DataflowExecutionContext

Previously, an inappropriate linearization of implementation
inheritance caused this misleading indirect subtype
relationship:

    StreamingModeExecutionContext <: BatchModeExecutionContext

After this change, these classes are siblings with a common
base class (DataflowExecutionContext).

The only behavioral changes in this CL is anything that
StreamingModeExecutionContext may have inherited that it
shouldn't have.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101744042
---
 .../worker/DataflowExecutionContext.java      | 26 +----------
 .../sdk/runners/worker/DataflowWorker.java    |  4 +-
 .../sdk/util/BatchModeExecutionContext.java   | 44 +++++++++++++++++--
 .../runners/worker/AvroSinkFactoryTest.java   |  6 ++-
 .../runners/worker/CombineValuesFnTest.java   |  3 +-
 .../worker/DataflowSideInputReaderTest.java   |  7 +--
 .../worker/GroupingShuffleReaderTest.java     | 21 +++++----
 .../worker/InMemoryReaderFactoryTest.java     |  4 +-
 .../worker/MapTaskExecutorFactoryTest.java    | 38 ++++++++++------
 .../sdk/runners/worker/NormalParDoFnTest.java | 26 +++++++----
 .../runners/worker/ParDoFnFactoryTest.java    |  9 ++--
 .../sdk/runners/worker/ReaderFactoryTest.java | 11 +++--
 .../worker/ShuffleReaderFactoryTest.java      | 24 +++++++---
 .../worker/ShuffleSinkFactoryTest.java        | 11 +++--
 .../runners/worker/SideInputUtilsTest.java    | 18 +++++---
 .../sdk/runners/worker/SinkFactoryTest.java   | 20 ++++++---
 .../runners/worker/TextReaderFactoryTest.java | 10 ++++-
 .../runners/worker/TextSinkFactoryTest.java   | 11 +++--
 18 files changed, 193 insertions(+), 100 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java
index 2fc7df00e4fb6..b712c674b1dce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java
@@ -17,7 +17,7 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import com.google.api.services.dataflow.model.SideInputInfo;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.BaseExecutionContext;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 
@@ -25,7 +25,7 @@
  * Extensions to {@link com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext} specific to
  * the Dataflow worker.
  */
-public abstract class DataflowExecutionContext extends BatchModeExecutionContext {
+public abstract class DataflowExecutionContext extends BaseExecutionContext {
   /**
    * Returns a {@link SideInputReader} for all the side inputs described in the given
    * {@link SideInputInfo} descriptors. By default, throws {@link UnsupportedOperationException}.
@@ -41,26 +41,4 @@ public abstract SideInputReader getSideInputReader(
    */
   public abstract SideInputReader getSideInputReaderForViews(
       Iterable<? extends PCollectionView<?>> views) throws Exception;
-
-  /**
-   * Returns a {@link DataflowExecutionContext} that does not support side inputs at all, for
-   * situations when side inputs are not relevant, such as testing.
-   */
-  public static DataflowExecutionContext withoutSideInputs() {
-    return new DataflowExecutionContext() {
-      @Override
-      public SideInputReader getSideInputReader(
-          Iterable<? extends SideInputInfo> sideInputInfos) throws Exception {
-        throw new UnsupportedOperationException(
-            "DataflowExecutionContext.withoutSideInputs().getSideInputReader(...)");
-      }
-
-      @Override
-      public SideInputReader getSideInputReaderForViews(
-          Iterable<? extends PCollectionView<?>> views) throws Exception {
-        throw new UnsupportedOperationException(
-            "DataflowExecutionContext.withoutSideInputs().getSideInputReaderForViews(...)");
-      }
-    };
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 720f2f1992349..b845620b89877 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -35,6 +35,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingHandler;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudCounterUtils;
 import com.google.cloud.dataflow.sdk.util.CloudMetricUtils;
 import com.google.cloud.dataflow.sdk.util.PCollectionViewWindow;
@@ -403,13 +404,14 @@ public abstract WorkItemServiceState reportWorkItemStatus(WorkItemStatus workIte
    * A {@link DataflowExecutionContext} that provides a caching side input reader using
    * the worker's shared cache.
    */
-  private static class DataflowWorkerExecutionContext extends DataflowExecutionContext {
+  private static class DataflowWorkerExecutionContext extends BatchModeExecutionContext {
 
     private final Cache<PCollectionViewWindow<?>, Sized<Object>> cache;
     private final PipelineOptions options;
 
     public DataflowWorkerExecutionContext(
         Cache<PCollectionViewWindow<?>, Sized<Object>> cache, PipelineOptions options) {
+      super(options);
       this.cache = cache;
       this.options = options;
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
index 1f74ee7ac4d40..4477bb0a54e6c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
@@ -16,28 +16,50 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.api.services.dataflow.model.SideInputInfo;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.worker.DataflowExecutionContext;
+import com.google.cloud.dataflow.sdk.runners.worker.DataflowSideInputReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
 
 import java.util.Objects;
 
 /**
  * {@link ExecutionContext} for use in batch mode.
  */
-public class BatchModeExecutionContext extends BaseExecutionContext {
+public class BatchModeExecutionContext extends DataflowExecutionContext {
   private Object key;
 
+  private PipelineOptions options;
+
+  protected BatchModeExecutionContext(PipelineOptions options) {
+    this.options = options;
+  }
+
+  /**
+   * Returns a {@link BatchModeExecutionContext} configured according to default
+   * pipeline options.
+   */
+  public static BatchModeExecutionContext withDefaultOptions() {
+    return new BatchModeExecutionContext(PipelineOptionsFactory.create());
+  }
+
   /**
-   * Creates a {@code BatchModeExecutionContext}.
+   * Returns a {@link BatchModeExecutionContext} configured according to the provided options.
    */
-  public BatchModeExecutionContext() { }
+  public static BatchModeExecutionContext fromOptions(PipelineOptions options) {
+    return new BatchModeExecutionContext(options);
+  }
 
   /**
    * Create a new {@link ExecutionContext.StepContext}.
    */
   @Override
-  public ExecutionContext.StepContext createStepContext(
+  protected ExecutionContext.StepContext createStepContext(
       String stepName, String transformName, StateSampler stateSampler) {
     return new StepContext(stepName, transformName);
   }
@@ -78,6 +100,20 @@ public Object getKey() {
     return key;
   }
 
+  @Override
+  public SideInputReader getSideInputReader(
+      Iterable<? extends SideInputInfo> sideInputInfos) throws Exception {
+    return DataflowSideInputReader.of(sideInputInfos, options, this);
+  }
+
+  @Override
+  public SideInputReader getSideInputReaderForViews(
+      Iterable<? extends PCollectionView<?>> views) throws Exception {
+    throw new UnsupportedOperationException(
+        "BatchModeExecutionContext.withoutSideInputs().getSideInputReaderForViews(...)");
+  }
+
+
   /**
    * {@link ExecutionContext.StepContext} used in batch mode.
    */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
index 299dbc12b9e35..3b42dc30c8ad0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
@@ -52,8 +53,9 @@ Sink<?> runTestCreateAvroSink(String filename,
     cloudSink.setSpec(spec);
     cloudSink.setCodec(encoding);
 
-    Sink<?> sink = SinkFactory.create(PipelineOptionsFactory.create(), cloudSink,
-        new BatchModeExecutionContext(), null);
+    PipelineOptions options = PipelineOptionsFactory.create();
+    Sink<?> sink = SinkFactory.create(options, cloudSink,
+        BatchModeExecutionContext.fromOptions(options), null);
     return sink;
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
index 0f5400316e1c1..c581959aadfb3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
@@ -30,6 +30,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
@@ -215,7 +216,7 @@ private static ParDoFn createCombineValuesFn(
             null, // no side inputs
             null, // no side outputs
             1, // single main output
-            DataflowExecutionContext.withoutSideInputs(),
+            BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create()),
             (new CounterSet()).getAddCounterMutator(),
             null);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
index 945f7a3897fde..319905561817e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
@@ -28,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.Sized;
@@ -99,7 +100,7 @@ private <T> Source sourceInDefaultWindow(PCollectionView<T> view, T... values)
   @Test
   public void testDataflowSideInputReaderGoodRead() throws Exception {
     PipelineOptions options = PipelineOptionsFactory.create();
-    ExecutionContext executionContext = DataflowExecutionContext.withoutSideInputs();
+    ExecutionContext executionContext = BatchModeExecutionContext.fromOptions(options);
     TupleTag<Iterable<WindowedValue<Long>>> tag = new TupleTag<>();
     PCollectionView<Long> view = PCollectionViewTesting.<Long, Long>testingView(
         tag, new PCollectionViewTesting.LengthViewFn<Long>(), LONG_CODER);
@@ -139,7 +140,7 @@ public void testDataflowSideInputReaderGoodRead() throws Exception {
   @Test
   public void testDataflowSideInputReaderBadRead() throws Exception {
     PipelineOptions options = PipelineOptionsFactory.create();
-    ExecutionContext executionContext = DataflowExecutionContext.withoutSideInputs();
+    ExecutionContext executionContext = BatchModeExecutionContext.fromOptions(options);
     TupleTag<Iterable<WindowedValue<Long>>> tag = new TupleTag<>();
     PCollectionView<Long> view = PCollectionViewTesting.testingView(
         tag, new PCollectionViewTesting.LengthViewFn<Long>(), LONG_CODER);
@@ -161,7 +162,7 @@ public void testDataflowSideInputReaderBadRead() throws Exception {
   @Test
   public void testDataflowSideInputEmpty() throws Exception {
     PipelineOptions options = PipelineOptionsFactory.create();
-    ExecutionContext executionContext = DataflowExecutionContext.withoutSideInputs();
+    ExecutionContext executionContext = BatchModeExecutionContext.fromOptions(options);
     DataflowSideInputReader sideInputReader = DataflowSideInputReader.of(
         Collections.<SideInputInfo>emptyList(), options, executionContext);
     assertTrue(sideInputReader.isEmpty());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
index 77527f2465ea2..c5e6c98543a0b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
@@ -35,6 +35,7 @@
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
@@ -132,10 +133,11 @@ private void runTestReadFromShuffle(
     assertEquals(kvCount, records.size());
     assertEquals(shuffleWriter.getSizes(), actualSizes);
 
+    PipelineOptions options = PipelineOptionsFactory.create();
     // Read from shuffle with GroupingShuffleReader.
-    BatchModeExecutionContext context = new BatchModeExecutionContext();
+    BatchModeExecutionContext context = BatchModeExecutionContext.fromOptions(options);
     GroupingShuffleReader<Integer, String> groupingShuffleReader = new GroupingShuffleReader<>(
-        PipelineOptionsFactory.create(), null, null, null, sourceElemCoder, context, null, null);
+        options, null, null, null, sourceElemCoder, context, null, null);
     ExecutorTestUtils.TestReaderObserver observer =
         new ExecutorTestUtils.TestReaderObserver(groupingShuffleReader);
 
@@ -274,7 +276,8 @@ static byte[] fabricatePosition(int shard, @Nullable byte[] key) throws Exceptio
 
   @Test
   public void testReadFromShuffleDataAndFailToSplit() throws Exception {
-    BatchModeExecutionContext context = new BatchModeExecutionContext();
+    PipelineOptions options = PipelineOptionsFactory.create();
+    BatchModeExecutionContext context = BatchModeExecutionContext.fromOptions(options);
     final int kFirstShard = 0;
 
     TestShuffleReader shuffleReader = new TestShuffleReader();
@@ -289,7 +292,7 @@ public void testReadFromShuffleDataAndFailToSplit() throws Exception {
     // use positions instead).
     String stop = encodeBase64URLSafeString(fabricatePosition(kNumRecords, null));
     GroupingShuffleReader<Integer, Integer> groupingShuffleReader = new GroupingShuffleReader<>(
-        PipelineOptionsFactory.create(), null, null, stop,
+        options, null, null, stop,
         WindowedValue.getFullCoder(
             KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())),
             IntervalWindow.getCoder()),
@@ -337,9 +340,10 @@ private Position makeShufflePosition(int shard, byte[] key) throws Exception {
 
   @Test
   public void testReadFromShuffleAndDynamicSplit() throws Exception {
-    BatchModeExecutionContext context = new BatchModeExecutionContext();
+    PipelineOptions options = PipelineOptionsFactory.create();
+    BatchModeExecutionContext context = BatchModeExecutionContext.fromOptions(options);
     GroupingShuffleReader<Integer, Integer> groupingShuffleReader = new GroupingShuffleReader<>(
-        PipelineOptionsFactory.create(), null, null, null,
+        options, null, null, null,
         WindowedValue.getFullCoder(
             KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())),
             IntervalWindow.getCoder()),
@@ -421,9 +425,10 @@ public void testGetApproximateProgress() throws Exception {
     // Store the positions of all KVs returned.
     List<byte[]> positionsList = new ArrayList<byte[]>();
 
-    BatchModeExecutionContext context = new BatchModeExecutionContext();
+    PipelineOptions options = PipelineOptionsFactory.create();
+    BatchModeExecutionContext context = BatchModeExecutionContext.fromOptions(options);
     GroupingShuffleReader<Integer, Integer> groupingShuffleReader = new GroupingShuffleReader<>(
-        PipelineOptionsFactory.create(), null, null, null,
+        options, null, null, null,
         WindowedValue.getFullCoder(
             KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())),
             IntervalWindow.getCoder()),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
index fae3b4d5fef25..1623235398afb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
@@ -70,7 +70,9 @@ <T> void runTestCreateInMemoryReader(List<T> elements, Long start, Long end, int
     Source cloudSource = createInMemoryCloudSource(elements, start, end, coder);
 
     Reader<?> reader = ReaderFactory.create(
-        PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext(), null, null);
+        PipelineOptionsFactory.create(), cloudSource,
+        BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create()),
+        null, null);
     Assert.assertThat(reader, new IsInstanceOf(InMemoryReader.class));
     InMemoryReader<?> inMemoryReader = (InMemoryReader<?>) reader;
     Assert.assertEquals(encodedElements(elements, coder), inMemoryReader.encodedElements);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index c9f7399328cf0..54377838d2f30 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -40,6 +40,7 @@
 import com.google.api.services.dataflow.model.Source;
 import com.google.api.services.dataflow.model.WriteInstruction;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactoryTest.TestReader;
 import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactoryTest.TestReaderFactory;
@@ -47,6 +48,7 @@
 import com.google.cloud.dataflow.sdk.runners.worker.SinkFactoryTest.TestSinkFactory;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
@@ -67,6 +69,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.common.worker.WriteOperation;
 
+import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -82,6 +85,13 @@
 @RunWith(JUnit4.class)
 @SuppressWarnings("serial")
 public class MapTaskExecutorFactoryTest {
+  private PipelineOptions options;
+
+  @Before
+  public void setUp() {
+    options = PipelineOptionsFactory.create();
+  }
+
   @Test
   public void testCreateMapTaskExecutor() throws Exception {
     List<ParallelInstruction> instructions = Arrays.asList(createReadInstruction("Read"),
@@ -95,9 +105,9 @@ public void testCreateMapTaskExecutor() throws Exception {
     CounterSet counterSet = null;
     try (
         MapTaskExecutor executor = MapTaskExecutorFactory.create(
-            PipelineOptionsFactory.create(),
+            options,
             mapTask,
-            DataflowExecutionContext.withoutSideInputs())) {
+            BatchModeExecutionContext.fromOptions(options))) {
       // Safe covariant cast not expressible without rawtypes.
       @SuppressWarnings({"rawtypes", "unchecked"})
       List<Object> operations = (List) executor.operations;
@@ -163,10 +173,10 @@ public void testExecutionContextPlumbing() throws Exception {
     MapTask mapTask = new MapTask();
     mapTask.setInstructions(instructions);
 
-    DataflowExecutionContext context = DataflowExecutionContext.withoutSideInputs();
+    DataflowExecutionContext context = BatchModeExecutionContext.fromOptions(options);
 
     try (MapTaskExecutor executor =
-        MapTaskExecutorFactory.create(PipelineOptionsFactory.create(), mapTask, context)) {
+        MapTaskExecutorFactory.create(options, mapTask, context)) {
       executor.execute();
     }
 
@@ -204,8 +214,8 @@ public void testCreateReadOperation() throws Exception {
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    Operation operation = MapTaskExecutorFactory.createOperation(PipelineOptionsFactory.create(),
-        createReadInstruction("Read"), DataflowExecutionContext.withoutSideInputs(),
+    Operation operation = MapTaskExecutorFactory.createOperation(options,
+        createReadInstruction("Read"), BatchModeExecutionContext.fromOptions(options),
         Collections.<Operation>emptyList(), counterPrefix, counterSet.getAddCounterMutator(),
         stateSampler);
     assertThat(operation, instanceOf(ReadOperation.class));
@@ -267,8 +277,8 @@ public void testCreateWriteOperation() throws Exception {
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    Operation operation = MapTaskExecutorFactory.createOperation(PipelineOptionsFactory.create(),
-        instruction, DataflowExecutionContext.withoutSideInputs(), priorOperations, counterPrefix,
+    Operation operation = MapTaskExecutorFactory.createOperation(options,
+        instruction, BatchModeExecutionContext.fromOptions(options), priorOperations, counterPrefix,
         counterSet.getAddCounterMutator(), stateSampler);
     assertThat(operation, instanceOf(WriteOperation.class));
     WriteOperation writeOperation = (WriteOperation) operation;
@@ -349,11 +359,11 @@ public void testCreateParDoOperation() throws Exception {
     ParallelInstruction instruction =
         createParDoInstruction(producerIndex, producerOutputNum, "DoFn");
 
-    DataflowExecutionContext context = DataflowExecutionContext.withoutSideInputs();
+    DataflowExecutionContext context = BatchModeExecutionContext.fromOptions(options);
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    Operation operation = MapTaskExecutorFactory.createOperation(PipelineOptionsFactory.create(),
+    Operation operation = MapTaskExecutorFactory.createOperation(options,
         instruction, context, priorOperations, counterPrefix, counterSet.getAddCounterMutator(),
         stateSampler);
     assertThat(operation, instanceOf(ParDoOperation.class));
@@ -416,8 +426,8 @@ public void testCreatePartialGroupByKeyOperation() throws Exception {
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    Operation operation = MapTaskExecutorFactory.createOperation(PipelineOptionsFactory.create(),
-        instruction, DataflowExecutionContext.withoutSideInputs(), priorOperations, counterPrefix,
+    Operation operation = MapTaskExecutorFactory.createOperation(options,
+        instruction, BatchModeExecutionContext.fromOptions(options), priorOperations, counterPrefix,
         counterSet.getAddCounterMutator(), stateSampler);
     assertThat(operation, instanceOf(PartialGroupByKeyOperation.class));
     PartialGroupByKeyOperation pgbkOperation = (PartialGroupByKeyOperation) operation;
@@ -476,8 +486,8 @@ public void testCreateFlattenOperation() throws Exception {
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    Operation operation = MapTaskExecutorFactory.createOperation(PipelineOptionsFactory.create(),
-        instruction, DataflowExecutionContext.withoutSideInputs(), priorOperations, counterPrefix,
+    Operation operation = MapTaskExecutorFactory.createOperation(options,
+        instruction, BatchModeExecutionContext.fromOptions(options), priorOperations, counterPrefix,
         counterSet.getAddCounterMutator(), stateSampler);
     assertThat(operation, instanceOf(FlattenOperation.class));
     FlattenOperation flattenOperation = (FlattenOperation) operation;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
index 65472f4e6eb00..f5eb2d310c76d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
@@ -29,8 +29,10 @@
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.fail;
 
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DirectSideInputReader;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
@@ -43,6 +45,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.Receiver;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
+import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -139,6 +142,13 @@ public void process(Object outputElem) {
     }
   }
 
+  private PipelineOptions options;
+
+  @Before
+  public void setupDefaultOptions() {
+    options = PipelineOptionsFactory.create();
+  }
+
   @Test
   public void testNormalParDoFn() throws Exception {
     List<String> sideOutputTags = Arrays.asList("tag1", "tag2", "tag3");
@@ -156,13 +166,13 @@ public void testNormalParDoFn() throws Exception {
     outputTags.add("output");
     outputTags.addAll(sideOutputTags);
     NormalParDoFn normalParDoFn = NormalParDoFn.of(
-        PipelineOptionsFactory.create(),
+        options,
         fnInfo,
         DirectSideInputReader.of(sideInputValues),
         outputTags,
         "doFn",
         "doFn",
-        DataflowExecutionContext.withoutSideInputs(),
+        BatchModeExecutionContext.fromOptions(options),
         (new CounterSet()).getAddCounterMutator(),
         null);
 
@@ -220,13 +230,13 @@ public void testUnexpectedNumberOfReceivers() throws Exception {
     PTuple sideInputValues = PTuple.empty();
     List<String> outputTags = Arrays.asList("output");
     NormalParDoFn normalParDoFn = NormalParDoFn.of(
-        PipelineOptionsFactory.create(),
+        options,
         fnInfo,
         DirectSideInputReader.of(sideInputValues),
         outputTags,
         "doFn",
         "doFn",
-        DataflowExecutionContext.withoutSideInputs(),
+        BatchModeExecutionContext.fromOptions(options),
         (new CounterSet()).getAddCounterMutator(),
         null);
 
@@ -264,13 +274,13 @@ public void testErrorPropagation() throws Exception {
     PTuple sideInputValues = PTuple.empty();
     List<String> outputTags = Arrays.asList("output");
     NormalParDoFn normalParDoFn = NormalParDoFn.of(
-        PipelineOptionsFactory.create(),
+        options,
         fnInfo,
         DirectSideInputReader.of(sideInputValues),
         outputTags,
         "doFn",
         "doFn",
-        DataflowExecutionContext.withoutSideInputs(),
+        BatchModeExecutionContext.fromOptions(options),
         (new CounterSet()).getAddCounterMutator(),
         null);
 
@@ -334,13 +344,13 @@ public void testUndeclaredSideOutputs() throws Exception {
     DoFnInfo<?, ?> fnInfo = new DoFnInfo<>(fn, WindowingStrategy.globalDefault());
     CounterSet counters = new CounterSet();
     NormalParDoFn normalParDoFn = NormalParDoFn.of(
-        PipelineOptionsFactory.create(),
+        options,
         fnInfo,
         NullSideInputReader.empty(),
         Arrays.asList("output", "declared"),
         "doFn",
         "doFn",
-        DataflowExecutionContext.withoutSideInputs(),
+        BatchModeExecutionContext.fromOptions(options),
         counters.getAddCounterMutator(),
         null);
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
index 559c5233e006a..0311aa2113f4d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
@@ -23,10 +23,12 @@
 import static org.junit.Assert.fail;
 
 import com.google.api.services.dataflow.model.MultiOutputInfo;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
@@ -88,12 +90,13 @@ public void testCreateNormalParDoFn() throws Exception {
     List<MultiOutputInfo> multiOutputInfos =
         Arrays.asList(multiOutputInfo);
 
-    DataflowExecutionContext context = DataflowExecutionContext.withoutSideInputs();
+    PipelineOptions options = PipelineOptionsFactory.create();
+    DataflowExecutionContext context = BatchModeExecutionContext.fromOptions(options);
     CounterSet counters = new CounterSet();
     StateSampler stateSampler = new StateSampler(
         "test", counters.getAddCounterMutator());
     ParDoFn parDoFn = factory.create(
-        PipelineOptionsFactory.create(),
+        options,
         cloudUserFn,
         "name",
         "transformName",
@@ -141,7 +144,7 @@ public void testCreateUnknownParDoFn() throws Exception {
           null,
           null,
           1,
-          DataflowExecutionContext.withoutSideInputs(),
+          BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create()),
           counters.getAddCounterMutator(),
           stateSampler);
       fail("should have thrown an exception");
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
index 05727393ad00d..ac7cad1c5966e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
@@ -82,7 +82,9 @@ public void testCreatePredefinedReader() throws Exception {
     cloudSource.setCodec(makeCloudEncoding("StringUtf8Coder"));
 
     Reader<?> reader = ReaderFactory.create(
-        PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext(), null, null);
+        PipelineOptionsFactory.create(), cloudSource,
+        BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create()),
+        null, null);
     Assert.assertThat(reader, new IsInstanceOf(TextReader.class));
   }
 
@@ -95,7 +97,9 @@ public void testCreateUserDefinedReader() throws Exception {
     cloudSource.setCodec(makeCloudEncoding("BigEndianIntegerCoder"));
 
     Reader<?> reader = ReaderFactory.create(
-        PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext(), null, null);
+        PipelineOptionsFactory.create(), cloudSource,
+        BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create()),
+        null, null);
     Assert.assertThat(reader, new IsInstanceOf(TestReader.class));
   }
 
@@ -107,7 +111,8 @@ public void testCreateUnknownReader() throws Exception {
     cloudSource.setCodec(makeCloudEncoding("StringUtf8Coder"));
     try {
       ReaderFactory.create(
-          PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext(),
+          PipelineOptionsFactory.create(), cloudSource,
+          BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create()),
           null, null);
       Assert.fail("should have thrown an exception");
     } catch (Exception exn) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
index 6149815b98516..e5da4de26cc24 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
@@ -74,8 +74,13 @@ <T extends Reader> T runTestCreateShuffleReader(byte[] shuffleReaderConfig,
 
   void runTestCreateUngroupedShuffleReader(byte[] shuffleReaderConfig, @Nullable String start,
       @Nullable String end, CloudObject encoding, Coder<?> coder) throws Exception {
-    UngroupedShuffleReader ungroupedShuffleReader = runTestCreateShuffleReader(shuffleReaderConfig,
-        start, end, encoding, new BatchModeExecutionContext(), UngroupedShuffleReader.class,
+    UngroupedShuffleReader ungroupedShuffleReader = runTestCreateShuffleReader(
+        shuffleReaderConfig,
+        start,
+        end,
+        encoding,
+        BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create()),
+        UngroupedShuffleReader.class,
         "UngroupedShuffleSource");
     Assert.assertArrayEquals(shuffleReaderConfig, ungroupedShuffleReader.shuffleReaderConfig);
     Assert.assertEquals(start, ungroupedShuffleReader.startShufflePosition);
@@ -87,7 +92,8 @@ start, end, encoding, new BatchModeExecutionContext(), UngroupedShuffleReader.cl
   void runTestCreateGroupingShuffleReader(byte[] shuffleReaderConfig, @Nullable String start,
       @Nullable String end, CloudObject encoding, Coder<?> keyCoder, Coder<?> valueCoder)
       throws Exception {
-    BatchModeExecutionContext context = new BatchModeExecutionContext();
+    BatchModeExecutionContext context =
+        BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create());
     GroupingShuffleReader groupingShuffleReader = runTestCreateShuffleReader(
         shuffleReaderConfig, start, end, encoding, context, GroupingShuffleReader.class,
         "GroupingShuffleSource");
@@ -103,10 +109,14 @@ void runTestCreateGroupingShuffleReader(byte[] shuffleReaderConfig, @Nullable St
   void runTestCreatePartitioningShuffleReader(byte[] shuffleReaderConfig, @Nullable String start,
       @Nullable String end, CloudObject encoding, Coder<?> keyCoder, Coder<?> windowedValueCoder)
       throws Exception {
-    PartitioningShuffleReader partitioningShuffleReader =
-        runTestCreateShuffleReader(shuffleReaderConfig, start, end, encoding,
-            new BatchModeExecutionContext(), PartitioningShuffleReader.class,
-            "PartitioningShuffleSource");
+    PartitioningShuffleReader partitioningShuffleReader = runTestCreateShuffleReader(
+        shuffleReaderConfig,
+        start,
+        end,
+        encoding,
+        BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create()),
+        PartitioningShuffleReader.class,
+        "PartitioningShuffleSource");
     Assert.assertArrayEquals(shuffleReaderConfig, partitioningShuffleReader.shuffleReaderConfig);
     Assert.assertEquals(start, partitioningShuffleReader.startShufflePosition);
     Assert.assertEquals(end, partitioningShuffleReader.stopShufflePosition);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java
index 9ee44696d6e85..cad5dadebc58c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java
@@ -24,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
@@ -59,12 +60,14 @@ ShuffleSink runTestCreateShuffleSinkHelper(byte[] shuffleWriterConfig,
     cloudSink.setSpec(spec);
     cloudSink.setCodec(encoding);
 
+    PipelineOptions options = PipelineOptionsFactory.create();
     CounterSet.AddCounterMutator addCounterMutator =
         new CounterSet().getAddCounterMutator();
-    Sink<?> sink = SinkFactory.create(PipelineOptionsFactory.create(),
-                                      cloudSink,
-                                      new BatchModeExecutionContext(),
-                                      addCounterMutator);
+    Sink<?> sink = SinkFactory.create(
+        options,
+        cloudSink,
+        BatchModeExecutionContext.fromOptions(options),
+        addCounterMutator);
     Assert.assertThat(sink, new IsInstanceOf(ShuffleSink.class));
     ShuffleSink shuffleSink = (ShuffleSink) sink;
     Assert.assertArrayEquals(shuffleWriterConfig,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java
index a34929dfa9bb6..be80235e54389 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java
@@ -18,7 +18,6 @@
 
 import static com.google.cloud.dataflow.sdk.runners.worker.SideInputUtils.createCollectionSideInputInfo;
 import static com.google.cloud.dataflow.sdk.runners.worker.SideInputUtils.createSingletonSideInputInfo;
-
 import static org.hamcrest.Matchers.emptyIterable;
 import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
 import static org.hamcrest.core.Is.is;
@@ -29,6 +28,7 @@
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
@@ -64,27 +64,33 @@ void assertThatContains(Object actual, Object... expected) {
   public void testReadSingletonSideInput() throws Exception {
     SideInputInfo sideInputInfo = createSingletonSideInputInfo(createSideInputSource(42));
 
+    PipelineOptions options = PipelineOptionsFactory.create();
     assertEquals(
         42,
         SideInputUtils.readSideInput(
-            PipelineOptionsFactory.create(), sideInputInfo, new BatchModeExecutionContext()));
+            options, sideInputInfo,
+            BatchModeExecutionContext.fromOptions(options)));
   }
 
   @Test
   public void testReadEmptyCollectionSideInput() throws Exception {
     SideInputInfo sideInputInfo = createCollectionSideInputInfo(createSideInputSource());
 
+    PipelineOptions options = PipelineOptionsFactory.create();
     assertThatContains(SideInputUtils.readSideInput(
-        PipelineOptionsFactory.create(), sideInputInfo, new BatchModeExecutionContext()));
+        options, sideInputInfo,
+        BatchModeExecutionContext.fromOptions(options)));
   }
 
   @Test
   public void testReadCollectionSideInput() throws Exception {
     SideInputInfo sideInputInfo = createCollectionSideInputInfo(createSideInputSource(3, 4, 5, 6));
 
+    PipelineOptions options = PipelineOptionsFactory.create();
     assertThatContains(
         SideInputUtils.readSideInput(
-            PipelineOptionsFactory.create(), sideInputInfo, new BatchModeExecutionContext()),
+            options, sideInputInfo,
+            BatchModeExecutionContext.fromOptions(options)),
         3, 4, 5, 6);
   }
 
@@ -94,9 +100,11 @@ public void testReadCollectionShardedSideInput() throws Exception {
         createCollectionSideInputInfo(createSideInputSource(3), createSideInputSource(),
             createSideInputSource(4, 5), createSideInputSource(6), createSideInputSource());
 
+    PipelineOptions options = PipelineOptionsFactory.create();
     assertThatContains(
         SideInputUtils.readSideInput(
-            PipelineOptionsFactory.create(), sideInputInfo, new BatchModeExecutionContext()),
+            options, sideInputInfo,
+            BatchModeExecutionContext.fromOptions(options)),
         3, 4, 5, 6);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactoryTest.java
index fd177c566a816..96189e7f3e0e1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactoryTest.java
@@ -31,6 +31,7 @@
 import org.hamcrest.CoreMatchers;
 import org.hamcrest.core.IsInstanceOf;
 import org.junit.Assert;
+import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -69,6 +70,13 @@ public void close() {
     }
   }
 
+  private PipelineOptions options;
+
+  @Before
+  public void setUp() {
+    options = PipelineOptionsFactory.create();
+  }
+
   @Test
   public void testCreatePredefinedSink() throws Exception {
     CloudObject spec = CloudObject.forClassName("TextSink");
@@ -81,9 +89,9 @@ public void testCreatePredefinedSink() throws Exception {
 
     CounterSet.AddCounterMutator addCounterMutator =
         new CounterSet().getAddCounterMutator();
-    Sink<?> sink = SinkFactory.create(PipelineOptionsFactory.create(),
+    Sink<?> sink = SinkFactory.create(options,
                                       cloudSink,
-                                      new BatchModeExecutionContext(),
+                                      BatchModeExecutionContext.fromOptions(options),
                                       addCounterMutator);
     Assert.assertThat(sink, new IsInstanceOf(TextSink.class));
   }
@@ -99,9 +107,9 @@ public void testCreateUserDefinedSink() throws Exception {
 
     CounterSet.AddCounterMutator addCounterMutator =
         new CounterSet().getAddCounterMutator();
-    Sink<?> sink = SinkFactory.create(PipelineOptionsFactory.create(),
+    Sink<?> sink = SinkFactory.create(options,
                                       cloudSink,
-                                      new BatchModeExecutionContext(),
+                                      BatchModeExecutionContext.fromOptions(options),
                                       addCounterMutator);
     Assert.assertThat(sink, new IsInstanceOf(TestSink.class));
   }
@@ -116,9 +124,9 @@ public void testCreateUnknownSink() throws Exception {
     try {
       CounterSet.AddCounterMutator addCounterMutator =
           new CounterSet().getAddCounterMutator();
-      SinkFactory.create(PipelineOptionsFactory.create(),
+      SinkFactory.create(options,
                          cloudSink,
-                         new BatchModeExecutionContext(),
+                         BatchModeExecutionContext.fromOptions(options),
                          addCounterMutator);
       Assert.fail("should have thrown an exception");
     } catch (Exception exn) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
index aa238e89d643c..a9f93ecc175ea 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
 import com.google.cloud.dataflow.sdk.io.TextIO.CompressionType;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
@@ -65,10 +66,15 @@ void runTestCreateTextReader(String filename, @Nullable Boolean stripTrailingNew
     cloudSource.setSpec(spec);
     cloudSource.setCodec(encoding);
 
+    PipelineOptions options = PipelineOptionsFactory.create();
     Reader<?> reader = ReaderFactory.create(
-        PipelineOptionsFactory.create(), cloudSource, new BatchModeExecutionContext(), null, null);
+        options,
+        cloudSource,
+        BatchModeExecutionContext.fromOptions(options),
+        null,
+        null);
     Assert.assertThat(reader, new IsInstanceOf(TextReader.class));
-    TextReader textReader = (TextReader<?>) reader;
+    TextReader<?> textReader = (TextReader<?>) reader;
     Assert.assertEquals(filename, textReader.filename);
     Assert.assertEquals(
         stripTrailingNewlines == null ? true : stripTrailingNewlines,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java
index 1d3751ef0c930..5ed168adc87bc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
@@ -69,10 +70,12 @@ void runTestCreateTextSink(String filename,
 
     CounterSet.AddCounterMutator addCounterMutator =
         new CounterSet().getAddCounterMutator();
-    Sink<?> sink = SinkFactory.create(PipelineOptionsFactory.create(),
-                                      cloudSink,
-                                      new BatchModeExecutionContext(),
-                                      addCounterMutator);
+    PipelineOptions options = PipelineOptionsFactory.create();
+    Sink<?> sink = SinkFactory.create(
+        options,
+        cloudSink,
+        BatchModeExecutionContext.fromOptions(options),
+        addCounterMutator);
     Assert.assertThat(sink, new IsInstanceOf(TextSink.class));
     TextSink<?> textSink = (TextSink<?>) sink;
     Assert.assertEquals(filename, textSink.namePrefix);

From fb5cf0fc8bacb0d40797141ef892387155ee3789 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 28 Aug 2015 21:01:39 -0700
Subject: [PATCH 0980/1541] Do not retain cloned PCollections in
 DirectPipelineRunner

In the DirectPipelineRunner, unless withEncodabilityTesting(false)
is called, every time a PCollection is assigned a value, that
value is coded and decoded to test the Coder.

Before this change, the newly decoded values were used as the
contents of the PCollection, likely with the intent of helping
to surface bugs based on any assumption of shared state.

However, we now directly check immutability of inputs and outputs,
which prevents any abuse of shared state directly. So the utility
of this unbounded memory overhead is minimal.  Moreover, any
in-memory code based on immutable data structures relies
heavily on sharing as many objects as possible. The DirectPipelineRunner
is such a body of code.

With this change, after the encode+decode is checked, the result
is discarded and the original values for the PCollection are used.
For correct pipelines this will have no behavioral change.
For pipelines that are ill-behaved in ways that are caught
by immutability checking, this will have no behavioral change.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101831604
---
 .../sdk/runners/DirectPipelineRunner.java     | 13 ++++++-----
 .../dataflow/sdk/testing/DataflowAssert.java  | 10 ++++++++
 .../transforms/windowing/WindowingTest.java   | 23 +++++++++++++++----
 3 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 4da2bab9ae3e1..2685f3a3fb916 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -783,7 +783,8 @@ public <T> void setPCollectionWindowedValue(
     public <T> void setPCollectionValuesWithMetadata(
         PCollection<T> pc, List<ValueWithMetadata<T>> elements) {
       LOG.debug("Setting {} = {}", pc, elements);
-      setPValue(pc, ensurePCollectionEncodable(pc, elements));
+      ensurePCollectionEncodable(pc, elements);
+      setPValue(pc, elements);
     }
 
     @Override
@@ -795,8 +796,8 @@ public <ElemT, T, WindowedT> void setPCollectionView(
     }
 
     /**
-     * Retrieves the value of the given PCollection.
-     * Throws an exception if the PCollection's value hasn't already been set.
+     * Retrieves the value of the given {@link PCollection}.
+     * Throws an exception if the {@link PCollection}'s value hasn't already been set.
      */
     @Override
     public <T> List<T> getPCollection(PCollection<T> pc) {
@@ -848,9 +849,9 @@ public <T, WindowedT> Iterable<WindowedValue<?>> getPCollectionView(PCollectionV
     }
 
     /**
-     * If testEncodability, ensures that the PCollection's coder and elements
-     * are encodable and decodable by encoding them and decoding them,
-     * and returning the result.  Otherwise returns the argument elements.
+     * If {@code testEncodability}, ensures that the {@link PCollection}'s coder and elements are
+     * encodable and decodable by encoding them and decoding them, and returning the result.
+     * Otherwise returns the argument elements.
      */
     <T> List<ValueWithMetadata<T>> ensurePCollectionEncodable(
         PCollection<T> pc, List<ValueWithMetadata<T>> elements) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index c5dc59d02a7c9..06fd25f4932e4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -22,6 +22,7 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.MapCoder;
@@ -38,6 +39,7 @@
 import com.google.cloud.dataflow.sdk.transforms.View;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -406,8 +408,16 @@ private CreateActual(PCollection<T> actual,
 
     @Override
     public PCollectionView<ActualT> apply(PBegin input) {
+      final Coder<T> coder = actual.getCoder();
       return actual
           .apply(Window.<T>into(new GlobalWindows()))
+          .apply(ParDo.of(new DoFn<T, T>() {
+            private static final long serialVersionUID = 0L;
+            @Override
+            public void processElement(ProcessContext context) throws CoderException {
+              context.output(CoderUtils.clone(coder, context.element()));
+            }
+          }))
           .apply(actualView);
     }
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
index 9f5d3a5ba82f3..6900ab0b61dce 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
@@ -16,6 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertThat;
+
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.io.TextIO;
@@ -30,10 +34,12 @@
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.common.collect.Iterables;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -48,14 +54,13 @@
 import java.io.FileOutputStream;
 import java.io.PrintStream;
 import java.io.Serializable;
-import java.util.Arrays;
 
 /** Unit tests for bucketing. */
 @RunWith(JUnit4.class)
 @SuppressWarnings({"serial", "unchecked"})
 public class WindowingTest implements Serializable {
   @Rule
-  public TemporaryFolder tmpFolder = new TemporaryFolder();
+  public transient TemporaryFolder tmpFolder = new TemporaryFolder();
 
   private static class WindowedCount extends PTransform<PCollection<String>, PCollection<String>> {
 
@@ -218,9 +223,17 @@ public void testElementsSortedByTimestamp() {
     PCollection<KV<String, Iterable<String>>> output = b
         .apply(GroupByKey.<String, String>create());
 
-    DataflowAssert.that(output).containsInAnyOrder(
-        KV.of("k",
-            (Iterable<String>) Arrays.asList("i", "h", "g", "f", "a", "e", "c", "b", "d", "j")));
+    DataflowAssert.that(output).satisfies(
+        new SerializableFunction<Iterable<KV<String, Iterable<String>>>, Void>() {
+          @Override
+          public Void apply(Iterable<KV<String, Iterable<String>>> contents) {
+            KV<String, Iterable<String>> element = Iterables.getOnlyElement(contents);
+            assertThat(element.getKey(), equalTo("k"));
+            assertThat(element.getValue(),
+                contains("i", "h", "g", "f", "a", "e", "c", "b", "d", "j"));
+            return null;
+          }
+        });
 
     p.run();
   }

From 630ea14aebe875d82c4705d285f9d603314c6d86 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 28 Aug 2015 21:09:28 -0700
Subject: [PATCH 0981/1541] Move streaming worker code to worker package

Some code used by the DataflowPipelineRunner in streaming mode
transform overrides moved to private nested classes.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101831905
---
 .../sdk/runners/DataflowPipelineRunner.java   | 35 ++++++++++-
 .../BasicSerializableSourceFormat.java        |  2 +-
 .../worker/GroupAlsoByWindowsParDoFn.java     |  1 -
 .../sdk/runners/worker/ParDoFnBase.java       |  1 -
 .../sdk/runners/worker/PubsubReader.java      |  1 -
 .../sdk/runners/worker/PubsubSink.java        |  1 -
 .../worker}/StateFetcher.java                 |  5 +-
 .../worker/StreamingDataflowWorker.java       |  3 -
 .../StreamingGroupAlsoByWindowsDoFn.java      | 10 +++-
 .../StreamingModeExecutionContext.java        | 13 +++--
 .../worker}/StreamingSideInputDoFnRunner.java | 11 +++-
 .../worker/UngroupedWindmillReader.java       |  1 -
 .../sdk/runners/worker/WindmillSink.java      |  1 -
 .../worker}/WindmillStateInternals.java       | 13 ++++-
 .../worker}/WindmillStateReader.java          |  4 +-
 .../worker/WindowingWindmillReader.java       |  1 -
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |  2 +-
 .../dataflow/sdk/util/ReduceFnRunner.java     |  2 +-
 .../StreamingPCollectionViewWriterFn.java     | 58 -------------------
 .../dataflow/sdk/util/SystemReduceFn.java     |  2 +-
 .../dataflow/sdk/util/state/StateTable.java   |  2 +-
 .../BasicSerializableSourceFormatTest.java    |  4 +-
 .../worker}/StateFetcherTest.java             |  6 +-
 .../StreamingGroupAlsoByWindowsDoFnTest.java  | 12 +++-
 .../StreamingModeExecutionContextTest.java    |  9 ++-
 .../StreamingSideInputDoFnRunnerTest.java     | 10 +++-
 .../worker}/WindmillStateInternalsTest.java   | 11 +++-
 .../worker}/WindmillStateReaderTest.java      |  4 +-
 .../worker/WindowingWindmillReaderTest.java   |  1 -
 .../dataflow/sdk/util/ApiSurfaceTest.java     |  2 +-
 30 files changed, 124 insertions(+), 104 deletions(-)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/{util => runners/worker}/StateFetcher.java (97%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/{util => runners/worker}/StreamingGroupAlsoByWindowsDoFn.java (90%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/{util => runners/worker}/StreamingModeExecutionContext.java (97%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/{util => runners/worker}/StreamingSideInputDoFnRunner.java (95%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/{util/state => runners/worker}/WindmillStateInternals.java (96%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/{util/state => runners/worker}/WindmillStateReader.java (98%)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingPCollectionViewWriterFn.java
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/{util => runners/worker}/StateFetcherTest.java (97%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/{util => runners/worker}/StreamingGroupAlsoByWindowsDoFnTest.java (96%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/{util => runners/worker}/StreamingModeExecutionContextTest.java (92%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/{util => runners/worker}/StreamingSideInputDoFnRunnerTest.java (96%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/{util/state => runners/worker}/WindmillStateInternalsTest.java (97%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/{util/state => runners/worker}/WindmillStateReaderTest.java (98%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 6ddd162434374..9fa2a004786af 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -68,9 +68,9 @@
 import com.google.cloud.dataflow.sdk.util.PathValidator;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.Reshuffle;
-import com.google.cloud.dataflow.sdk.util.StreamingPCollectionViewWriterFn;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -739,6 +739,39 @@ protected String getKindString() {
     }
   }
 
+  /**
+   * A specialized {@link DoFn} for writing the contents of a {@link PCollection}
+   * to a streaming {@link PCollectionView} backend implementation.
+   */
+  private static class StreamingPCollectionViewWriterFn<T>
+  extends DoFn<Iterable<T>, T> implements DoFn.RequiresWindowAccess {
+    private static final long serialVersionUID = 0;
+
+    private final PCollectionView<?> view;
+    private final Coder<T> dataCoder;
+
+    public static <T> StreamingPCollectionViewWriterFn<T> create(
+        PCollectionView<?> view, Coder<T> dataCoder) {
+      return new StreamingPCollectionViewWriterFn<T>(view, dataCoder);
+    }
+
+    private StreamingPCollectionViewWriterFn(PCollectionView<?> view, Coder<T> dataCoder) {
+      this.view = view;
+      this.dataCoder = dataCoder;
+    }
+
+    @Override
+    public void processElement(ProcessContext c) throws Exception {
+      List<WindowedValue<T>> output = new ArrayList<>();
+      for (T elem : c.element()) {
+        output.add(WindowedValue.of(elem, c.timestamp(), c.window(), c.pane()));
+      }
+
+      c.windowingInternals().writePCollectionViewData(
+          view.getTagInternal(), output, dataCoder);
+    }
+  }
+
   /**
    * Specialized implementation for {@link View.AsMap} for the Dataflow runner in streaming mode.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index 38961a933ccf6..84fa354f7894b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -51,12 +51,12 @@
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils;
+import com.google.cloud.dataflow.sdk.runners.worker.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index 8bdf00e9c3d86..32ffb804d9dfc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -40,7 +40,6 @@
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.Serializer;
-import com.google.cloud.dataflow.sdk.util.StreamingGroupAlsoByWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
index 28dd5cfe36664..7134547583d09 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
@@ -26,7 +26,6 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.StreamingSideInputDoFnRunner;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ElementCounter;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
index b8924dc5f19e0..ee56998d18b9f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
@@ -21,7 +21,6 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.ValueOnlyWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
index bc5cd1692d615..47557b720102e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
@@ -23,7 +23,6 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcher.java
similarity index 97%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcher.java
index e6afd0e7ee0e6..1acd38c8933b9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateFetcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcher.java
@@ -14,12 +14,13 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.util;
+package com.google.cloud.dataflow.sdk.runners.worker;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.runners.worker.MetricTrackingWindmillServerStub;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.cache.Cache;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index d2a9b8cfcc8ab..45c22dc68fad4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -29,8 +29,6 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
 import com.google.cloud.dataflow.sdk.util.BoundedQueueExecutor;
 import com.google.cloud.dataflow.sdk.util.Serializer;
-import com.google.cloud.dataflow.sdk.util.StateFetcher;
-import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.UserCodeException;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
@@ -40,7 +38,6 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
 import com.google.cloud.dataflow.sdk.util.common.worker.OutputObjectAndByteCounter;
 import com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation;
-import com.google.cloud.dataflow.sdk.util.state.WindmillStateReader;
 import com.google.common.base.Preconditions;
 import com.google.protobuf.ByteString;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
similarity index 90%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
index 3cdb2df4235fc..8915ca6b62f27 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
@@ -14,13 +14,21 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.util;
+package com.google.cloud.dataflow.sdk.runners.worker;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
+import com.google.cloud.dataflow.sdk.util.ReduceFnRunner;
+import com.google.cloud.dataflow.sdk.util.SystemDoFnInternal;
+import com.google.cloud.dataflow.sdk.util.SystemReduceFn;
+import com.google.cloud.dataflow.sdk.util.TimerInternals;
+import com.google.cloud.dataflow.sdk.util.TimerOrElement;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
similarity index 97%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
index f8001bcf38f2a..cb65f4e30ee5f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
@@ -14,20 +14,23 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.util;
+package com.google.cloud.dataflow.sdk.runners.worker;
 
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
-import com.google.cloud.dataflow.sdk.runners.worker.DataflowExecutionContext;
+import com.google.cloud.dataflow.sdk.runners.worker.StateFetcher.SideInputState;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
+import com.google.cloud.dataflow.sdk.util.BaseExecutionContext;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.SideInputReader;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimerInternals;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
-import com.google.cloud.dataflow.sdk.util.state.WindmillStateInternals;
-import com.google.cloud.dataflow.sdk.util.state.WindmillStateReader;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Preconditions;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
similarity index 95%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
index 0ffeb0bca3450..508fbaf6972a0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
@@ -14,19 +14,26 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.util;
+package com.google.cloud.dataflow.sdk.runners.worker;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.MapCoder;
 import com.google.cloud.dataflow.sdk.coders.Proto2Coder;
 import com.google.cloud.dataflow.sdk.coders.SetCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.worker.StateFetcher.SideInputState;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.GlobalDataRequest;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.DoFnInfo;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner.OutputManager;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
-import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
+import com.google.cloud.dataflow.sdk.util.SideInputReader;
+import com.google.cloud.dataflow.sdk.util.UserCodeException;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.state.BagState;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
index f9af41dfcca0c..38afae1c2e3b5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
@@ -24,7 +24,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
index 3dc27d71814c8..9076c7f1fea54 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
@@ -28,7 +28,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.PaneInfoCoder;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
similarity index 96%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
index 78fa929340266..4e1142b877008 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
@@ -13,13 +13,24 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.util.state;
+package com.google.cloud.dataflow.sdk.runners.worker;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+import com.google.cloud.dataflow.sdk.util.state.BagState;
+import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
+import com.google.cloud.dataflow.sdk.util.state.MergingStateInternals;
+import com.google.cloud.dataflow.sdk.util.state.State;
+import com.google.cloud.dataflow.sdk.util.state.StateContents;
+import com.google.cloud.dataflow.sdk.util.state.StateInternals;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateTable;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
+import com.google.cloud.dataflow.sdk.util.state.ValueState;
+import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
 import com.google.common.base.Supplier;
 import com.google.common.collect.Iterables;
 import com.google.common.util.concurrent.Futures;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
similarity index 98%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
index a4176a194cccf..42fcbb71d196e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
@@ -13,11 +13,9 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.util.state;
+package com.google.cloud.dataflow.sdk.runners.worker;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.runners.worker.MetricTrackingWindmillServerStub;
-import com.google.cloud.dataflow.sdk.runners.worker.StreamingDataflowWorker;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagList;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagValue;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
index 3314cf942d7e6..84db41c1a9e8e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
@@ -24,7 +24,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.TimerOrElement;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 72b0743966332..83723f7d5e573 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -73,7 +73,7 @@ public interface OutputManager {
   /** The context used for running the DoFn. */
   public final DoFnContext<InputT, OutputT> context;
 
-  DoFnRunner(
+  protected DoFnRunner(
       PipelineOptions options,
       DoFn<InputT, OutputT> fn,
       SideInputReader sideInputReader,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index c0dbc95dd1950..83606dbdbdcaa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -204,7 +204,7 @@ private void holdForEmptyPanes(ReduceFn<K, InputT, OutputT, W>.Context context)
   /**
    * Attempt to merge all of the windows.
    */
-  @VisibleForTesting void merge() throws Exception {
+  public void merge() throws Exception {
     activeWindows.mergeIfAppropriate(null, this);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingPCollectionViewWriterFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingPCollectionViewWriterFn.java
deleted file mode 100644
index 62f739e1b1bc0..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamingPCollectionViewWriterFn.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Utility DoFn for writing streaming PCollectionViews.
- *
- * @param <T> element type
- */
-public class StreamingPCollectionViewWriterFn<T>
-    extends DoFn<Iterable<T>, T> implements DoFn.RequiresWindowAccess {
-  private static final long serialVersionUID = 0;
-
-  private final PCollectionView<?> view;
-  private final Coder<T> dataCoder;
-
-  public static <T> StreamingPCollectionViewWriterFn<T> create(
-      PCollectionView<?> view, Coder<T> dataCoder) {
-    return new StreamingPCollectionViewWriterFn<T>(view, dataCoder);
-  }
-
-  private StreamingPCollectionViewWriterFn(PCollectionView<?> view, Coder<T> dataCoder) {
-    this.view = view;
-    this.dataCoder = dataCoder;
-  }
-
-  @Override
-  public void processElement(ProcessContext c) throws Exception {
-    List<WindowedValue<T>> output = new ArrayList<>();
-    for (T elem : c.element()) {
-      output.add(WindowedValue.of(elem, c.timestamp(), c.window(), c.pane()));
-    }
-
-    c.windowingInternals().writePCollectionViewData(
-        view.getTagInternal(), output, dataCoder);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
index 1796b21bbd5f5..7ae4bce254f3a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
@@ -36,7 +36,7 @@
  * @param <OutputT> The output type that will be produced for each key.
  * @param <W> The type of windows this operates on.
  */
-class SystemReduceFn<K, InputT, OutputT, W extends BoundedWindow>
+public class SystemReduceFn<K, InputT, OutputT, W extends BoundedWindow>
     extends ReduceFn<K, InputT, OutputT, W> {
 
   private static final long serialVersionUID = 0L;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java
index df0c302cf72fd..be9ae18550b73 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java
@@ -56,7 +56,7 @@ public void clearNamespace(StateNamespace namespace) {
     stateTable.rowKeySet().remove(namespace);
   }
 
-  protected void clear() {
+  public void clear() {
     stateTable.clear();
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index e7d42dadf7acb..a197741c689a8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -17,12 +17,12 @@
 package com.google.cloud.dataflow.sdk.runners.dataflow;
 
 import static com.google.api.client.util.Base64.decodeBase64;
+import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.readFromSource;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtFraction;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceOperationRequestToSourceOperationRequest;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.dictionaryToCloudSource;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceOperationResponseToCloudSourceOperationResponse;
-import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.readFromSource;
 import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
 import static com.google.cloud.dataflow.sdk.util.SerializableUtils.deserializeFromByteArray;
 import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
@@ -63,6 +63,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
 import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory;
+import com.google.cloud.dataflow.sdk.runners.worker.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
@@ -75,7 +76,6 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcherTest.java
similarity index 97%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcherTest.java
index 6e5fd94b977ea..b2470238e8dd3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StateFetcherTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcherTest.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.util;
+package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static org.hamcrest.Matchers.contains;
 import static org.junit.Assert.assertEquals;
@@ -29,14 +29,14 @@
 import com.google.cloud.dataflow.sdk.coders.ListCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.MetricTrackingWindmillServerStub;
+import com.google.cloud.dataflow.sdk.runners.worker.StateFetcher.SideInputState;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.View;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
-import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
similarity index 96%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
index 12a4b1e84852e..7a8de6d570fd4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.util;
+package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
@@ -33,7 +33,17 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
+import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
+import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimerInternals;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
+import com.google.cloud.dataflow.sdk.util.TimerOrElement;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
similarity index 92%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
index baa13d9afeea8..4d466f0b1fc29 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingModeExecutionContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
@@ -14,7 +14,7 @@
  * the License.
  ******************************************************************************/
 
-package com.google.cloud.dataflow.sdk.util;
+package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -24,9 +24,14 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting;
 import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting.ConstantViewFn;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.util.SideInputReader;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimerInternals;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaceForTest;
-import com.google.cloud.dataflow.sdk.util.state.WindmillStateReader;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java
similarity index 96%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java
index 73e2798f90c75..0b8655ed2f8f3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StreamingSideInputDoFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.util;
+package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static org.hamcrest.Matchers.contains;
 import static org.junit.Assert.assertEquals;
@@ -26,6 +26,7 @@
 
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.worker.StateFetcher.SideInputState;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.GlobalDataRequest;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
@@ -37,7 +38,12 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.util.StateFetcher.SideInputState;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.DoFnInfo;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.SideInputReader;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
 import com.google.cloud.dataflow.sdk.util.state.ValueState;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
similarity index 97%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
index 433ffc3d97a4e..197b6d103bc90 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
@@ -13,7 +13,7 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.util.state;
+package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
@@ -29,6 +29,15 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagValue;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.state.BagState;
+import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
+import com.google.cloud.dataflow.sdk.util.state.StateContents;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaceForTest;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.cloud.dataflow.sdk.util.state.StateTags;
+import com.google.cloud.dataflow.sdk.util.state.ValueState;
+import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
 import com.google.common.util.concurrent.SettableFuture;
 import com.google.protobuf.ByteString;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java
similarity index 98%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java
index b0506f43f86af..8897acf9cdb98 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/WindmillStateReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java
@@ -13,7 +13,7 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.util.state;
+package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
@@ -22,8 +22,6 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.MetricTrackingWindmillServerStub;
-import com.google.cloud.dataflow.sdk.runners.worker.StreamingDataflowWorker;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.KeyedGetDataRequest;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReaderTest.java
index 5497b5fdad353..359829d78c2dc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReaderTest.java
@@ -29,7 +29,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TimerOrElement;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java
index f505d41d91602..4260efff2df2f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java
@@ -45,7 +45,7 @@ public class ApiSurfaceTest {
   @Test
   public void testOurApiSurface() throws Exception {
     ApiSurface checkedApiSurface = ApiSurface.getSdkApiSurface()
-      .pruningClassName("com.google.cloud.dataflow.sdk.util.StateFetcher")
+      .pruningClassName("com.google.cloud.dataflow.sdk.runners.worker.StateFetcher")
       .pruningClassName("com.google.cloud.dataflow.sdk.util.common.ReflectHelpers");
 
     checkedApiSurface.getExposedClasses();

From 18abb580feff0882dc059ead8536134077a57683 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Sat, 29 Aug 2015 13:00:20 -0700
Subject: [PATCH 0982/1541] Move zip-related functionality to ZipFiles utility
 class

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=101855001
---
 .../cloud/dataflow/sdk/util/PackageUtil.java  |  89 +----
 .../cloud/dataflow/sdk/util/ZipFiles.java     | 294 ++++++++++++++++
 .../cloud/dataflow/sdk/util/ZipFilesTest.java | 323 ++++++++++++++++++
 3 files changed, 619 insertions(+), 87 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ZipFiles.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ZipFilesTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
index 02ae8e302939e..438de74574de9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
@@ -16,9 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-
 import com.google.api.client.util.BackOffUtils;
 import com.google.api.client.util.Sleeper;
 import com.google.api.services.dataflow.model.DataflowPackage;
@@ -41,12 +38,9 @@
 import java.nio.channels.Channels;
 import java.nio.channels.WritableByteChannel;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
 import java.util.Objects;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipOutputStream;
 
 /** Helper routines for packages. */
 public class PackageUtil {
@@ -110,7 +104,7 @@ static PackageAttributes createPackageAttributes(File classpathElement,
         Files.asByteSource(classpathElement).copyTo(countingOutputStream);
       } else {
         // Directories are recursively zipped.
-        zipDirectory(classpathElement, countingOutputStream);
+        ZipFiles.zipDirectory(classpathElement, countingOutputStream);
       }
 
       long size = countingOutputStream.getCount();
@@ -280,90 +274,11 @@ private static void copyContent(String classpathElement, WritableByteChannel out
       throws IOException {
     final File classpathElementFile = new File(classpathElement);
     if (classpathElementFile.isDirectory()) {
-      zipDirectory(classpathElementFile, Channels.newOutputStream(outputChannel));
+      ZipFiles.zipDirectory(classpathElementFile, Channels.newOutputStream(outputChannel));
     } else {
       Files.asByteSource(classpathElementFile).copyTo(Channels.newOutputStream(outputChannel));
     }
   }
-
-  /**
-   * Zips an entire directory specified by the path.
-   *
-   * @param sourceDirectory the directory to read from. This directory and all
-   *     subdirectories will be added to the zip-file. The path within the zip
-   *     file is relative to the directory given as parameter, not absolute.
-   * @param outputStream the stream to write the zip-file to. This method does not close
-   *     outputStream.
-   * @throws IOException the zipping failed, e.g. because the input was not
-   *     readable.
-   */
-  private static void zipDirectory(
-      File sourceDirectory,
-      OutputStream outputStream) throws IOException {
-    checkNotNull(sourceDirectory);
-    checkNotNull(outputStream);
-    checkArgument(
-        sourceDirectory.isDirectory(),
-        "%s is not a valid directory",
-        sourceDirectory.getAbsolutePath());
-    ZipOutputStream zos = new ZipOutputStream(outputStream);
-    for (File file : sourceDirectory.listFiles()) {
-      zipDirectoryInternal(file, "", zos);
-    }
-    zos.finish();
-  }
-
-  /**
-   * Private helper function for zipping files. This one goes recursively
-   * through the input directory and all of its subdirectories and adds the
-   * single zip entries.
-   *
-   * @param inputFile the file or directory to be added to the zip file
-   * @param directoryName the string-representation of the parent directory
-   *     name. Might be an empty name, or a name containing multiple directory
-   *     names separated by "/". The directory name must be a valid name
-   *     according to the file system limitations. The directory name should be
-   *     empty or should end in "/".
-   * @param zos the zipstream to write to
-   * @throws IOException the zipping failed, e.g. because the output was not
-   *     writeable.
-   */
-  private static void zipDirectoryInternal(
-      File inputFile,
-      String directoryName,
-      ZipOutputStream zos) throws IOException {
-    String entryName = directoryName + inputFile.getName();
-    if (inputFile.isDirectory()) {
-      entryName += "/";
-
-      // We are hitting a sub-directory. Recursively add children to zip in deterministic,
-      // sorted order.
-      File[] childFiles = inputFile.listFiles();
-      if (childFiles.length > 0) {
-        Arrays.sort(childFiles);
-        // loop through the directory content, and zip the files
-        for (File file : childFiles) {
-          zipDirectoryInternal(file, entryName, zos);
-        }
-
-        // Since this directory has children, exit now without creating a zipentry specific to
-        // this directory. The entry for a non-entry directory is incompatible with certain
-        // implementations of unzip.
-        return;
-      }
-    }
-
-    // Put the zip-entry for this file or empty directory into the zipoutputstream.
-    ZipEntry entry = new ZipEntry(entryName);
-    entry.setTime(inputFile.lastModified());
-    zos.putNextEntry(entry);
-
-    // Copy file contents into zipoutput stream.
-    if (inputFile.isFile()) {
-      Files.asByteSource(inputFile).copyTo(zos);
-    }
-  }
-
   /**
    * Holds the metadata necessary to stage a file or confirm that a staged file has not changed.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ZipFiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ZipFiles.java
new file mode 100644
index 0000000000000..773b65fb98227
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ZipFiles.java
@@ -0,0 +1,294 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import com.google.common.collect.FluentIterable;
+import com.google.common.collect.Iterators;
+import com.google.common.io.ByteSource;
+import com.google.common.io.CharSource;
+import com.google.common.io.Closer;
+import com.google.common.io.Files;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+import java.util.zip.ZipOutputStream;
+
+/**
+ * Functions for zipping a directory (including a subdirectory) into a ZIP-file
+ * or unzipping it again.
+ */
+public final class ZipFiles {
+  private ZipFiles() {}
+
+  /**
+   * Returns a new {@link ByteSource} for reading the contents of the given
+   * entry in the given zip file.
+   */
+  static ByteSource asByteSource(ZipFile file, ZipEntry entry) {
+    return new ZipEntryByteSource(file, entry);
+  }
+
+  /**
+   * Returns a new {@link CharSource} for reading the contents of the given
+   * entry in the given zip file as text using the given charset.
+   */
+  static CharSource asCharSource(
+      ZipFile file, ZipEntry entry, Charset charset) {
+    return asByteSource(file, entry).asCharSource(charset);
+  }
+
+  private static final class ZipEntryByteSource extends ByteSource {
+
+    private final ZipFile file;
+    private final ZipEntry entry;
+
+    ZipEntryByteSource(ZipFile file, ZipEntry entry) {
+      this.file = checkNotNull(file);
+      this.entry = checkNotNull(entry);
+    }
+
+    @Override
+    public InputStream openStream() throws IOException {
+      return file.getInputStream(entry);
+    }
+
+    // TODO: implement size() to try calling entry.getSize()?
+
+    @Override
+    public String toString() {
+      return "ZipFiles.asByteSource(" + file + ", " + entry + ")";
+    }
+  }
+
+  /**
+   * Returns a {@link FluentIterable} of all the entries in the given zip file.
+   */
+  // unmodifiable Iterator<? extends ZipEntry> can be safely cast
+  // to Iterator<ZipEntry>
+  @SuppressWarnings("unchecked")
+  static FluentIterable<ZipEntry> entries(final ZipFile file) {
+    checkNotNull(file);
+    return new FluentIterable<ZipEntry>() {
+      @Override
+      public Iterator<ZipEntry> iterator() {
+        return (Iterator<ZipEntry>) Iterators.forEnumeration(file.entries());
+      }
+    };
+  }
+
+  /**
+   * Unzips the zip file specified by the path and creates the directory structure <i>inside</i>
+   * the target directory. Refuses to unzip files that refer to a parent directory, for security
+   * reasons.
+   *
+   * @param zipFile the source zip-file to unzip
+   * @param targetDirectory the directory to unzip to. If the zip-file contains
+   *     any subdirectories, they will be created within our target directory.
+   * @throws IOException the unzipping failed, e.g. because the output was not writable, the {@code
+   *     zipFile} was not readable, or contains an illegal entry (contains "..", pointing outside
+   *     the target directory)
+   * @throws IllegalArgumentException the target directory is not a valid directory (e.g. does not
+   *     exist, or is a file instead of a directory)
+   */
+  static void unzipFile(
+      File zipFile,
+      File targetDirectory) throws IOException {
+    checkNotNull(zipFile);
+    checkNotNull(targetDirectory);
+    checkArgument(
+        targetDirectory.isDirectory(),
+        "%s is not a valid directory",
+        targetDirectory.getAbsolutePath());
+    final ZipFile zipFileObj = new ZipFile(zipFile);
+    try {
+      for (ZipEntry entry : entries(zipFileObj)) {
+        checkName(entry.getName());
+        File targetFile = new File(targetDirectory, entry.getName());
+        if (entry.isDirectory()) {
+          if (!targetFile.isDirectory() && !targetFile.mkdirs()) {
+            throw new IOException(
+                "Failed to create directory: " + targetFile.getAbsolutePath());
+          }
+        } else {
+          File parentFile = targetFile.getParentFile();
+          if (!parentFile.isDirectory()) {
+            if (!parentFile.mkdirs()) {
+              throw new IOException(
+                  "Failed to create directory: "
+                  + parentFile.getAbsolutePath());
+            }
+          }
+          // Write the file to the destination.
+          asByteSource(zipFileObj, entry).copyTo(Files.asByteSink(targetFile));
+        }
+      }
+    } finally {
+      zipFileObj.close();
+    }
+  }
+
+  /**
+   * Checks that the given entry name is legal for unzipping: if it contains
+   * ".." as a name element, it could cause the entry to be unzipped outside
+   * the directory we're unzipping to.
+   *
+   * @throws IOException if the name is illegal
+   */
+  private static void checkName(String name) throws IOException {
+    // First just check whether the entry name string contains "..".
+    // This should weed out the the vast majority of entries, which will not
+    // contain "..".
+    if (name.contains("..")) {
+      // If the string does contain "..", break it down into its actual name
+      // elements to ensure it actually contains ".." as a name, not just a
+      // name like "foo..bar" or even "foo..", which should be fine.
+      File file = new File(name);
+      while (file != null) {
+        if (file.getName().equals("..")) {
+          throw new IOException("Cannot unzip file containing an entry with "
+              + "\"..\" in the name: " + name);
+        }
+        file = file.getParentFile();
+      }
+    }
+  }
+
+  /**
+   * Zips an entire directory specified by the path.
+   *
+   * @param sourceDirectory the directory to read from. This directory and all
+   *     subdirectories will be added to the zip-file. The path within the zip
+   *     file is relative to the directory given as parameter, not absolute.
+   * @param zipFile the zip-file to write to.
+   * @throws IOException the zipping failed, e.g. because the input was not
+   *     readable.
+   */
+  static void zipDirectory(
+      File sourceDirectory,
+      File zipFile) throws IOException {
+    checkNotNull(sourceDirectory);
+    checkNotNull(zipFile);
+    checkArgument(
+        sourceDirectory.isDirectory(),
+        "%s is not a valid directory",
+        sourceDirectory.getAbsolutePath());
+    checkArgument(
+        !zipFile.exists(),
+        "%s does already exist, files are not being overwritten",
+        zipFile.getAbsolutePath());
+    Closer closer = Closer.create();
+    try {
+      OutputStream outputStream = closer.register(new BufferedOutputStream(
+          new FileOutputStream(zipFile)));
+      zipDirectory(sourceDirectory, outputStream);
+    } catch (Throwable t) {
+      throw closer.rethrow(t);
+    } finally {
+      closer.close();
+    }
+  }
+
+  /**
+   * Zips an entire directory specified by the path.
+   *
+   * @param sourceDirectory the directory to read from. This directory and all
+   *     subdirectories will be added to the zip-file. The path within the zip
+   *     file is relative to the directory given as parameter, not absolute.
+   * @param outputStream the stream to write the zip-file to. This method does not close
+   *     outputStream.
+   * @throws IOException the zipping failed, e.g. because the input was not
+   *     readable.
+   */
+  static void zipDirectory(
+      File sourceDirectory,
+      OutputStream outputStream) throws IOException {
+    checkNotNull(sourceDirectory);
+    checkNotNull(outputStream);
+    checkArgument(
+        sourceDirectory.isDirectory(),
+        "%s is not a valid directory",
+        sourceDirectory.getAbsolutePath());
+    ZipOutputStream zos = new ZipOutputStream(outputStream);
+    for (File file : sourceDirectory.listFiles()) {
+      zipDirectoryInternal(file, "", zos);
+    }
+    zos.finish();
+  }
+
+  /**
+   * Private helper function for zipping files. This one goes recursively
+   * through the input directory and all of its subdirectories and adds the
+   * single zip entries.
+   *
+   * @param inputFile the file or directory to be added to the zip file
+   * @param directoryName the string-representation of the parent directory
+   *     name. Might be an empty name, or a name containing multiple directory
+   *     names separated by "/". The directory name must be a valid name
+   *     according to the file system limitations. The directory name should be
+   *     empty or should end in "/".
+   * @param zos the zipstream to write to
+   * @throws IOException the zipping failed, e.g. because the output was not
+   *     writeable.
+   */
+  private static void zipDirectoryInternal(
+      File inputFile,
+      String directoryName,
+      ZipOutputStream zos) throws IOException {
+    String entryName = directoryName + inputFile.getName();
+    if (inputFile.isDirectory()) {
+      entryName += "/";
+
+      // We are hitting a sub-directory. Recursively add children to zip in deterministic,
+      // sorted order.
+      File[] childFiles = inputFile.listFiles();
+      if (childFiles.length > 0) {
+        Arrays.sort(childFiles);
+        // loop through the directory content, and zip the files
+        for (File file : childFiles) {
+          zipDirectoryInternal(file, entryName, zos);
+        }
+
+        // Since this directory has children, exit now without creating a zipentry specific to
+        // this directory. The entry for a non-entry directory is incompatible with certain
+        // implementations of unzip.
+        return;
+      }
+    }
+
+    // Put the zip-entry for this file or empty directory into the zipoutputstream.
+    ZipEntry entry = new ZipEntry(entryName);
+    entry.setTime(inputFile.lastModified());
+    zos.putNextEntry(entry);
+
+    // Copy file contents into zipoutput stream.
+    if (inputFile.isFile()) {
+      Files.asByteSource(inputFile).copyTo(zos);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ZipFilesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ZipFilesTest.java
new file mode 100644
index 0000000000000..d9e370bdd2685
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ZipFilesTest.java
@@ -0,0 +1,323 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.hamcrest.Matchers.arrayWithSize;
+import static org.hamcrest.Matchers.not;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import com.google.common.io.ByteSource;
+import com.google.common.io.CharSource;
+import com.google.common.io.Files;
+
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.Enumeration;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+
+/**
+ * Tests for the {@link ZipFiles} class. These tests make sure that the handling
+ * of zip-files works fine.
+ */
+@RunWith(JUnit4.class)
+public class ZipFilesTest {
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+  private File tmpDir;
+
+  @Rule
+  public TemporaryFolder tmpOutputFolder = new TemporaryFolder();
+  private File zipFile;
+
+  @Before
+  public void setUp() throws Exception {
+    tmpDir = tmpFolder.getRoot();
+    zipFile = createZipFileHandle(); // the file is not actually created
+  }
+
+  /**
+   * Verify that zipping and unzipping works fine. We zip a directory having
+   * some subdirectories, unzip it again and verify the structure to be in
+   * place.
+   */
+  @Test
+  public void testZipWithSubdirectories() throws Exception {
+    File zipDir = new File(tmpDir, "zip");
+    File subDir1 = new File(zipDir, "subDir1");
+    File subDir2 = new File(subDir1, "subdir2");
+    assertTrue(subDir2.mkdirs());
+    createFileWithContents(subDir2, "myTextFile.txt", "Simple Text");
+
+    assertZipAndUnzipOfDirectoryMatchesOriginal(tmpDir);
+  }
+
+  /**
+   * An empty subdirectory must have its own zip-entry.
+   */
+  @Test
+  public void testEmptySubdirectoryHasZipEntry() throws Exception {
+    File zipDir = new File(tmpDir, "zip");
+    File subDirEmpty = new File(zipDir, "subDirEmpty");
+    assertTrue(subDirEmpty.mkdirs());
+
+    ZipFiles.zipDirectory(tmpDir, zipFile);
+    assertZipOnlyContains("zip/subDirEmpty/");
+  }
+
+  /**
+   * A directory with contents should not have a zip entry.
+   */
+  @Test
+  public void testSubdirectoryWithContentsHasNoZipEntry() throws Exception {
+    File zipDir = new File(tmpDir, "zip");
+    File subDirContent = new File(zipDir, "subdirContent");
+    assertTrue(subDirContent.mkdirs());
+    createFileWithContents(subDirContent, "myTextFile.txt", "Simple Text");
+
+    ZipFiles.zipDirectory(tmpDir, zipFile);
+    assertZipOnlyContains("zip/subdirContent/myTextFile.txt");
+  }
+
+  @Test
+  public void testZipDirectoryToOutputStream() throws Exception {
+    createFileWithContents(tmpDir, "myTextFile.txt", "Simple Text");
+    File[] sourceFiles = tmpDir.listFiles();
+    Arrays.sort(sourceFiles);
+    assertThat(sourceFiles, not(arrayWithSize(0)));
+
+    try (FileOutputStream outputStream = new FileOutputStream(zipFile)) {
+      ZipFiles.zipDirectory(tmpDir, outputStream);
+    }
+    File outputDir = Files.createTempDir();
+    ZipFiles.unzipFile(zipFile, outputDir);
+    File[] outputFiles = outputDir.listFiles();
+    Arrays.sort(outputFiles);
+
+    assertThat(outputFiles, arrayWithSize(sourceFiles.length));
+    for (int i = 0; i < sourceFiles.length; i++) {
+      compareFileContents(sourceFiles[i], outputFiles[i]);
+    }
+
+    removeRecursive(outputDir.toPath());
+    assertTrue(zipFile.delete());
+  }
+
+  @Test
+  public void testEntries() throws Exception {
+    File zipDir = new File(tmpDir, "zip");
+    File subDir1 = new File(zipDir, "subDir1");
+    File subDir2 = new File(subDir1, "subdir2");
+    assertTrue(subDir2.mkdirs());
+    createFileWithContents(subDir2, "myTextFile.txt", "Simple Text");
+
+    ZipFiles.zipDirectory(tmpDir, zipFile);
+
+    ZipFile zip = new ZipFile(zipFile);
+    try {
+      Enumeration<? extends ZipEntry> entries = zip.entries();
+      for (ZipEntry entry : ZipFiles.entries(zip)) {
+        assertTrue(entries.hasMoreElements());
+        // ZipEntry doesn't override equals
+        assertEquals(entry.getName(), entries.nextElement().getName());
+      }
+      assertFalse(entries.hasMoreElements());
+    } finally {
+      zip.close();
+    }
+  }
+
+  @Test
+  public void testAsByteSource() throws Exception {
+    File zipDir = new File(tmpDir, "zip");
+    assertTrue(zipDir.mkdirs());
+    createFileWithContents(zipDir, "myTextFile.txt", "Simple Text");
+
+    ZipFiles.zipDirectory(tmpDir, zipFile);
+
+    ZipFile zip = new ZipFile(zipFile);
+    try {
+      ZipEntry entry = zip.getEntry("zip/myTextFile.txt");
+      ByteSource byteSource = ZipFiles.asByteSource(zip, entry);
+
+      if (entry.getSize() != -1) {
+        assertEquals(entry.getSize(), byteSource.size());
+      }
+
+      assertArrayEquals("Simple Text".getBytes(StandardCharsets.UTF_8), byteSource.read());
+    } finally {
+      zip.close();
+    }
+  }
+
+  @Test
+  public void testAsCharSource() throws Exception {
+    File zipDir = new File(tmpDir, "zip");
+    assertTrue(zipDir.mkdirs());
+    createFileWithContents(zipDir, "myTextFile.txt", "Simple Text");
+
+    ZipFiles.zipDirectory(tmpDir, zipFile);
+
+    ZipFile zip = new ZipFile(zipFile);
+    try {
+      ZipEntry entry = zip.getEntry("zip/myTextFile.txt");
+      CharSource charSource = ZipFiles.asCharSource(zip, entry, StandardCharsets.UTF_8);
+      assertEquals("Simple Text", charSource.read());
+    } finally {
+      zip.close();
+    }
+  }
+
+  private void assertZipOnlyContains(String zipFileEntry) throws IOException {
+    ZipFile zippedFile = new ZipFile(zipFile);
+    try {
+      assertEquals(1, zippedFile.size());
+
+      ZipEntry entry = zippedFile.entries().nextElement();
+      assertEquals(zipFileEntry, entry.getName());
+    } finally {
+      zippedFile.close();
+    }
+  }
+
+  /**
+   * try to unzip to a non-existent directory and make sure that it fails.
+   */
+  @Test
+  public void testInvalidTargetDirectory() throws IOException {
+    File zipDir = new File(tmpDir, "zipdir");
+    assertTrue(zipDir.mkdir());
+    ZipFiles.zipDirectory(tmpDir, zipFile);
+    File invalidDirectory = new File("/foo/bar");
+    assertTrue(!invalidDirectory.exists());
+    try {
+      ZipFiles.unzipFile(zipFile, invalidDirectory);
+      fail("We expect the IllegalArgumentException, but it never occured");
+    } catch (IllegalArgumentException e) {
+      // This is the expected exception - we passed the test.
+    }
+  }
+
+  /**
+   * Try to unzip to an existing directory, but failing to create directories.
+   */
+  @Test
+  public void testDirectoryCreateFailed() throws IOException {
+    File zipDir = new File(tmpDir, "zipdir");
+    assertTrue(zipDir.mkdir());
+    ZipFiles.zipDirectory(tmpDir, zipFile);
+    File targetDirectory = Files.createTempDir();
+    // Touch a file where the directory should be.
+    Files.touch(new File(targetDirectory, "zipdir"));
+    try {
+      ZipFiles.unzipFile(zipFile, targetDirectory);
+      fail("We expect the IOException, but it never occured");
+    } catch (IOException e) {
+      // This is the expected exception - we passed the test.
+    }
+  }
+
+  /**
+   * zip and unzip a certain directory, and verify the content afterward to be
+   * identical.
+   * @param sourceDir the directory to zip
+   */
+  private void assertZipAndUnzipOfDirectoryMatchesOriginal(File sourceDir) throws IOException {
+    File[] sourceFiles = sourceDir.listFiles();
+    Arrays.sort(sourceFiles);
+
+    File zipFile = createZipFileHandle();
+    ZipFiles.zipDirectory(sourceDir, zipFile);
+    File outputDir = Files.createTempDir();
+    ZipFiles.unzipFile(zipFile, outputDir);
+    File[] outputFiles = outputDir.listFiles();
+    Arrays.sort(outputFiles);
+
+    assertThat(outputFiles, arrayWithSize(sourceFiles.length));
+    for (int i = 0; i < sourceFiles.length; i++) {
+      compareFileContents(sourceFiles[i], outputFiles[i]);
+    }
+
+    removeRecursive(outputDir.toPath());
+    assertTrue(zipFile.delete());
+  }
+
+  /**
+   * Compare the content of two files or directories recursively.
+   * @param expected the expected directory or file content
+   * @param actual the actual directory or file content
+   */
+  private void compareFileContents(File expected, File actual) throws IOException {
+    assertEquals(expected.isDirectory(), actual.isDirectory());
+    assertEquals(expected.getName(), actual.getName());
+    if (expected.isDirectory()) {
+      // Go through the children step by step.
+      File[] expectedChildren = expected.listFiles();
+      Arrays.sort(expectedChildren);
+      File[] actualChildren = actual.listFiles();
+      Arrays.sort(actualChildren);
+      assertThat(actualChildren, arrayWithSize(expectedChildren.length));
+      for (int i = 0; i < expectedChildren.length; i++) {
+        compareFileContents(expectedChildren[i], actualChildren[i]);
+      }
+    } else {
+      // Compare the file content itself.
+      assertTrue(Files.equal(expected, actual));
+    }
+  }
+
+  /**
+   * Create a File object to which we can safely zip a file.
+   */
+  private File createZipFileHandle() throws IOException {
+    File zipFile = File.createTempFile("test", "zip", tmpOutputFolder.getRoot());
+    assertTrue(zipFile.delete());
+    return zipFile;
+  }
+
+  // This is not generally safe as it does not handle symlinks, etc. However it is safe
+  // enough for these tests.
+  private static void removeRecursive(Path path) throws IOException {
+    Iterable<File> files = Files.fileTreeTraverser().postOrderTraversal(path.toFile());
+    for (File f : files) {
+      java.nio.file.Files.delete(f.toPath());
+    }
+  }
+
+  /** Create file dir/fileName with contents fileContents. */
+  private void createFileWithContents(File dir, String fileName, String fileContents)
+      throws IOException {
+    File txtFile = new File(dir, fileName);
+    Files.asCharSink(txtFile, StandardCharsets.UTF_8).write(fileContents);
+  }
+}

From 31e37dcba86e3ec7bb802ef0f7549b1bc7aa93e5 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 3 Sep 2015 10:25:20 -0700
Subject: [PATCH 0983/1541] Add SerializableMatcher and support in
 DataflowAssert

A SerializableMatcher is simply a (hamcrest) Matcher that is also
declared Serializable. This allows shipping Matcher instances to
workers where they can be used in DataflowAssert.

In particular, a flexible Matcher is useful for testing the output
of GroupByKey, where the order of the values in each group is
unspecified.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=102255441
---
 .../examples/cookbook/DeDupExampleTest.java   |    3 +-
 .../dataflow/sdk/testing/DataflowAssert.java  |  141 +-
 .../sdk/testing/SerializableMatcher.java      |   36 +
 .../sdk/testing/SerializableMatchers.java     | 1248 +++++++++++++++++
 .../sdk/testing/DataflowAssertTest.java       |   78 +-
 .../sdk/testing/SerializableMatchersTest.java |  169 +++
 .../dataflow/sdk/transforms/CombineTest.java  |    2 +-
 .../dataflow/sdk/transforms/CountTest.java    |    2 +-
 .../dataflow/sdk/transforms/CreateTest.java   |    9 +-
 .../dataflow/sdk/transforms/FilterTest.java   |    2 +-
 .../dataflow/sdk/transforms/FlattenTest.java  |    6 +-
 .../sdk/transforms/GroupByKeyTest.java        |    3 +-
 .../dataflow/sdk/transforms/KeysTest.java     |    4 +-
 .../dataflow/sdk/transforms/KvSwapTest.java   |    2 +-
 .../dataflow/sdk/transforms/ParDoTest.java    |    4 +-
 .../sdk/transforms/PartitionTest.java         |    2 +-
 .../sdk/transforms/RemoveDuplicatesTest.java  |    3 +-
 .../dataflow/sdk/transforms/TopTest.java      |   16 +-
 .../dataflow/sdk/transforms/ValuesTest.java   |    3 +-
 .../transforms/windowing/WindowingTest.java   |    2 +-
 .../sdk/values/PCollectionTupleTest.java      |    2 +-
 21 files changed, 1637 insertions(+), 100 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatcher.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchersTest.java

diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/DeDupExampleTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/DeDupExampleTest.java
index d648829793097..bce6b118312be 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/DeDupExampleTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/DeDupExampleTest.java
@@ -77,8 +77,7 @@ public void testRemoveDuplicatesEmpty() {
     PCollection<String> output =
         input.apply(RemoveDuplicates.<String>create());
 
-    DataflowAssert.that(output)
-        .containsInAnyOrder();
+    DataflowAssert.that(output).empty();
     p.run();
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 06fd25f4932e4..5f8bfcd0548a7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -28,7 +28,6 @@
 import com.google.cloud.dataflow.sdk.coders.MapCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
@@ -55,6 +54,7 @@
 import java.io.Serializable;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
@@ -174,8 +174,7 @@ public static <T> SingletonAssert<T> thatSingleton(PCollection<T> actual) {
     KvCoder<K, V> kvCoder = (KvCoder<K, V>) actual.getCoder();
 
     return new SingletonAssert<>(
-        new CreateActual<KV<K, V>, Map<K, Iterable<V>>>(
-            actual, View.<K, V>asMultimap()), actual.getPipeline())
+        new CreateActual<>(actual, View.<K, V>asMultimap()), actual.getPipeline())
         .setCoder(MapCoder.of(kvCoder.getKeyCoder(), IterableCoder.of(kvCoder.getValueCoder())));
   }
 
@@ -191,18 +190,17 @@ public static <K, V> SingletonAssert<Map<K, V>> thatMap(PCollection<KV<K, V>> ac
     KvCoder<K, V> kvCoder = (KvCoder<K, V>) actual.getCoder();
 
     return new SingletonAssert<>(
-        new CreateActual<KV<K, V>, Map<K, V>>(
-            actual, View.<K, V>asMap().withSingletonValues()), actual.getPipeline())
+        new CreateActual<>(actual, View.<K, V>asMap()), actual.getPipeline())
         .setCoder(MapCoder.of(kvCoder.getKeyCoder(), kvCoder.getValueCoder()));
   }
 
   ////////////////////////////////////////////////////////////
 
   /**
-   * An assertion about the contents of a {@link PCollectionView}.
+   * An assertion about the contents of a {@link PCollectionView} yielding an {@code Iterable<T>}.
    */
-  @SuppressWarnings("serial")
   public static class IterableAssert<T> implements Serializable {
+    private static final long serialVersionUID = 0L;
 
     private final Pipeline pipeline;
     private final PTransform<PBegin, PCollectionView<Iterable<T>>> createActual;
@@ -216,8 +214,7 @@ protected IterableAssert(
     }
 
     /**
-     * Sets the coder to use for elements of type {@code T}, as needed
-     * for internal purposes.
+     * Sets the coder to use for elements of type {@code T}, as needed for internal purposes.
      *
      * <p>Returns this {@code IterableAssert}.
      */
@@ -269,6 +266,51 @@ public IterableAssert<T> satisfies(
       return this;
     }
 
+    /**
+     * Applies a {@link SerializableMatcher} to check the elements of the {@code Iterable}.
+     *
+     * <p>Returns this {@code IterableAssert}.
+     */
+    public IterableAssert<T> satisfies(final SerializableMatcher<Iterable<? extends T>> matcher) {
+      // Safe covariant cast. Could be elided by changing a lot of this file to use
+      // more flexible bounds.
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      SerializableFunction<Iterable<T>, Void> checkerFn =
+        (SerializableFunction) new MatcherCheckerFn<>(matcher);
+      pipeline.apply(
+          "DataflowAssert$" + (assertCount++),
+          new OneSideInputAssert<Iterable<T>>(
+              createActual,
+              checkerFn));
+      return this;
+    }
+
+    private static class MatcherCheckerFn<T> implements SerializableFunction<T, Void> {
+      private static final long serialVersionUID = 0L;
+
+      private SerializableMatcher<T> matcher;
+
+      public MatcherCheckerFn(SerializableMatcher<T> matcher) {
+        this.matcher = matcher;
+      }
+
+      @Override
+      public Void apply(T actual) {
+        assertThat(actual, matcher);
+        return null;
+      }
+    }
+
+
+    /**
+     * Checks that the {@code Iterable} is empty.
+     *
+     * <p> Returns this {@code IterableAssert}.
+     */
+    public IterableAssert<T> empty() {
+      return satisfies(new AssertContainsInAnyOrderRelation<T>(), Collections.<T>emptyList());
+    }
+
     /**
      * Checks that the {@code Iterable} contains the expected elements, in any
      * order.
@@ -291,14 +333,26 @@ public final IterableAssert<T> containsInAnyOrder(T... expectedElements) {
         new AssertContainsInAnyOrderRelation<T>(),
         Arrays.asList(expectedElements));
     }
+
+    /**
+     * Checks that the {@code Iterable} contains elements that match the provided matchers,
+     * in any order.
+     *
+     * <p> Returns this {@code IterableAssert}.
+     */
+    @SafeVarargs
+    public final IterableAssert<T> containsInAnyOrder(
+        SerializableMatcher<? super T>... elementMatchers) {
+      return satisfies(SerializableMatchers.containsInAnyOrder(elementMatchers));
+    }
   }
 
   /**
    * An assertion about the single value of type {@code T}
    * associated with a {@link PCollectionView}.
    */
-  @SuppressWarnings("serial")
   public static class SingletonAssert<T> implements Serializable {
+    private static final long serialVersionUID = 0L;
 
     private final Pipeline pipeline;
     private final CreateActual<?, T> createActual;
@@ -366,7 +420,6 @@ public SingletonAssert<T> satisfies(
       return this;
     }
 
-
     /**
      * Checks that the value of this {@code SingletonAssert}'s view is equal
      * to the expected value.
@@ -502,35 +555,40 @@ public PDone apply(PBegin input) {
 
       return PDone.in(input.getPipeline());
     }
+  }
 
-    private static class CheckerDoFn<ActualT> extends DoFn<Void, Void> {
-      private static final long serialVersionUID = 0;
-      private final SerializableFunction<ActualT, Void> checkerFn;
-      private final Aggregator<Integer, Integer> success =
-          createAggregator(SUCCESS_COUNTER, new Sum.SumIntegerFn());
-      private final Aggregator<Integer, Integer> failure =
-          createAggregator(FAILURE_COUNTER, new Sum.SumIntegerFn());
-      private final PCollectionView<ActualT> actual;
-
-      private CheckerDoFn(final SerializableFunction<ActualT, Void> checkerFn,
-          PCollectionView<ActualT> actual) {
-        this.checkerFn = checkerFn;
-        this.actual = actual;
-      }
+  /**
+   * A {@link DoFn} that runs a checking {@link SerializableFunction} on the contents of
+   * a {@link PCollectionView}, and adjusts counters and thrown exceptions for use in testing.
+   */
+  private static class CheckerDoFn<ActualT> extends DoFn<Void, Void> {
+    private static final long serialVersionUID = 0;
+    private final SerializableFunction<ActualT, Void> checkerFn;
+    private final Aggregator<Integer, Integer> success =
+        createAggregator(SUCCESS_COUNTER, new Sum.SumIntegerFn());
+    private final Aggregator<Integer, Integer> failure =
+        createAggregator(FAILURE_COUNTER, new Sum.SumIntegerFn());
+    private final PCollectionView<ActualT> actual;
+
+    private CheckerDoFn(
+        SerializableFunction<ActualT, Void> checkerFn,
+        PCollectionView<ActualT> actual) {
+      this.checkerFn = checkerFn;
+      this.actual = actual;
+    }
 
-      @Override
-      public void processElement(ProcessContext c) {
-        try {
-          ActualT actualContents = c.sideInput(actual);
-          checkerFn.apply(actualContents);
-          success.addValue(1);
-        } catch (Throwable t) {
-          LOG.error("DataflowAssert failed expectations.", t);
-          failure.addValue(1);
-          // TODO: allow for metrics to propagate on failure when running a streaming pipeline
-          if (!c.getPipelineOptions().as(StreamingOptions.class).isStreaming()) {
-            throw t;
-          }
+    @Override
+    public void processElement(ProcessContext c) {
+      try {
+        ActualT actualContents = c.sideInput(actual);
+        checkerFn.apply(actualContents);
+        success.addValue(1);
+      } catch (Throwable t) {
+        LOG.error("DataflowAssert failed expectations.", t);
+        failure.addValue(1);
+        // TODO: allow for metrics to propagate on failure when running a streaming pipeline
+        if (!c.getPipelineOptions().as(StreamingOptions.class).isStreaming()) {
+          throw t;
         }
       }
     }
@@ -619,8 +677,9 @@ public void processElement(ProcessContext c) {
    * A {@link SerializableFunction} that verifies that an actual value is equal to an
    * expected value.
    */
-  @SuppressWarnings("serial")
   private static class AssertIsEqualTo<T> implements SerializableFunction<T, Void> {
+    private static final long serialVersionUID = 0L;
+
     private T expected;
 
     public AssertIsEqualTo(T expected) {
@@ -638,9 +697,9 @@ public Void apply(T actual) {
    * A {@link SerializableFunction} that verifies that an {@code Iterable} contains
    * expected items in any order.
    */
-  @SuppressWarnings("serial")
   private static class AssertContainsInAnyOrder<T>
       implements SerializableFunction<Iterable<T>, Void> {
+    private static final long serialVersionUID = 0L;
 
     private T[] expected;
 
@@ -674,7 +733,7 @@ public Void apply(Iterable<T> actual) {
    * a {@code SerializableFunction<Actual, Void>}
    * that should verify the assertion..
    */
-  public static interface AssertRelation<ActualT, ExpectedT> extends Serializable {
+  private static interface AssertRelation<ActualT, ExpectedT> extends Serializable {
     public SerializableFunction<ActualT, Void> assertFor(ExpectedT input);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatcher.java
new file mode 100644
index 0000000000000..f633d273ae0d6
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatcher.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import org.hamcrest.Matcher;
+
+import java.io.Serializable;
+
+/**
+ * A {@link Matcher} that is also {@link Serializable}.
+ *
+ * <p>Such matchers can be used with {@link DataflowAssert}, which builds Dataflow pipelines
+ * such that these matchers may be serialized and executed remotely.
+ *
+ * <p>To create a {@code SerializableMatcher}, extend {@link org.hamcrest.BaseMatcher}
+ * and also implement this interface.
+ *
+ * @param <T> The type of value matched.
+ */
+public interface SerializableMatcher<T> extends Matcher<T>, Serializable {
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
new file mode 100644
index 0000000000000..4aabfa386adea
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
@@ -0,0 +1,1248 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.ListCoder;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.UserCodeException;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.base.MoreObjects;
+
+import org.hamcrest.BaseMatcher;
+import org.hamcrest.Description;
+import org.hamcrest.Matcher;
+import org.hamcrest.Matchers;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import javax.annotation.Nullable;
+
+/**
+ * Static class for building and using {@link SerializableMatcher} instances.
+ *
+ * <p>Most matchers are wrappers for hamcrest's {@link Matchers}. Please be familiar with the
+ * documentation there. Values retained by a {@link SerializableMatcher} are required to be
+ * serializable, either via Java serialization or via a provided {@link Coder}.
+ *
+ * <p>The following matchers are novel to Dataflow:
+ * <ul>
+ * <li>{@link #kvWithKey} for matching just the key of a {@link KV}.
+ * <li>{@link #kvWithValue} for matching just the value of a {@link KV}.
+ * <li>{@link #kv} for matching the key and value of a {@link KV}.
+ * </ul>
+ *
+ * <p>For example, to match a group from
+ * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}, which has type
+ * {@code KV<K, Iterable<V>>} for some {@code K} and {@code V} and where the order of the iterable
+ * is undefined, use a matcher like
+ * {@code kv(equalTo("some key"), containsInAnyOrder(1, 2, 3))}.
+ */
+public class SerializableMatchers implements Serializable {
+
+  // Serializable only because of capture by anonymous inner classes
+  private static final long serialVersionUID = 0L;
+
+  private SerializableMatchers() { } // not instantiable
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#allOf(Iterable)}.
+   */
+  public static <T> SerializableMatcher<T>
+  allOf(Iterable<SerializableMatcher<? super T>> serializableMatchers) {
+
+    @SuppressWarnings({"rawtypes", "unchecked"}) // safe covariant cast
+    final Iterable<Matcher<? super T>> matchers = (Iterable) serializableMatchers;
+
+    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+      private static final long serialVersionUID = 0L;
+        @Override
+        public Matcher<T> get() {
+          return Matchers.allOf(matchers);
+        }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#allOf(Matcher[])}.
+   */
+  @SafeVarargs
+  public static <T> SerializableMatcher<T> allOf(final SerializableMatcher<T>... matchers) {
+    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T> get() {
+        return Matchers.allOf(matchers);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#anyOf(Iterable)}.
+   */
+  public static <T> SerializableMatcher<T>
+  anyOf(Iterable<SerializableMatcher<? super T>> serializableMatchers) {
+
+    @SuppressWarnings({"rawtypes", "unchecked"}) // safe covariant cast
+    final Iterable<Matcher<? super T>> matchers = (Iterable) serializableMatchers;
+
+    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+      private static final long serialVersionUID = 0L;
+        @Override
+        public Matcher<T> get() {
+          return Matchers.anyOf(matchers);
+        }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#anyOf(Matcher[])}.
+   */
+  @SafeVarargs
+  public static <T> SerializableMatcher<T> anyOf(final SerializableMatcher<T>... matchers) {
+    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T> get() {
+        return Matchers.anyOf(matchers);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#anything()}.
+   */
+  public static SerializableMatcher<Object> anything() {
+    return fromSupplier(new SerializableSupplier<Matcher<Object>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<Object> get() {
+        return Matchers.anything();
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#arrayContaining(Object[])}.
+   */
+  @SafeVarargs
+  public static <T extends Serializable> SerializableMatcher<T[]>
+  arrayContaining(final T... items) {
+    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
+      private static final long serialVersionUID = 0L;
+        @Override
+        public Matcher<T[]> get() {
+          return Matchers.arrayContaining(items);
+        }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#arrayContaining(Object[])}.
+   *
+   * <p>The items of type {@code T} will be serialized using the provided {@link Coder}. They are
+   * explicitly <i>not</i> required or expected to be serializable via Java serialization.
+   */
+  @SafeVarargs
+  public static <T> SerializableMatcher<T[]> arrayContaining(Coder<T> coder, T... items) {
+
+    final SerializableSupplier<T[]> itemsSupplier =
+        new SerializableArrayViaCoder<>(coder, items);
+
+    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
+      private static final long serialVersionUID = 0L;
+        @Override
+        public Matcher<T[]> get() {
+          return Matchers.arrayContaining(itemsSupplier.get());
+        }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#arrayContaining(Matcher[])}.
+   */
+  @SafeVarargs
+  public static <T> SerializableMatcher<T[]>
+  arrayContaining(final SerializableMatcher<? super T>... matchers) {
+    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
+      private static final long serialVersionUID = 0L;
+        @Override
+        public Matcher<T[]> get() {
+          return Matchers.arrayContaining(matchers);
+        }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#arrayContaining(List)}.
+   */
+  public static <T> SerializableMatcher<T[]>
+  arrayContaining(List<SerializableMatcher<? super T>> serializableMatchers) {
+
+    @SuppressWarnings({"rawtypes", "unchecked"}) // safe covariant cast
+    final List<Matcher<? super T>> matchers = (List) serializableMatchers;
+
+    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
+      private static final long serialVersionUID = 0L;
+        @Override
+        public Matcher<T[]> get() {
+          return Matchers.arrayContaining(matchers);
+        }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#arrayContainingInAnyOrder(Object[])}.
+   */
+  @SafeVarargs
+  public static <T extends Serializable> SerializableMatcher<T[]>
+  arrayContainingInAnyOrder(final T... items) {
+
+    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
+      private static final long serialVersionUID = 0L;
+        @Override
+        public Matcher<T[]> get() {
+          return Matchers.arrayContainingInAnyOrder(items);
+        }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#arrayContainingInAnyOrder(Object[])}.
+   *
+   * <p>The items of type {@code T} will be serialized using the provided {@link Coder}. They are
+   * explicitly <i>not</i> required or expected to be serializable via Java serialization.
+   */
+  @SafeVarargs
+  public static <T> SerializableMatcher<T[]> arrayContainingInAnyOrder(Coder<T> coder, T... items) {
+
+    final SerializableSupplier<T[]> itemsSupplier =
+        new SerializableArrayViaCoder<>(coder, items);
+
+    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
+      private static final long serialVersionUID = 0L;
+        @Override
+        public Matcher<T[]> get() {
+          return Matchers.arrayContaining(itemsSupplier.get());
+        }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#arrayContainingInAnyOrder(Matcher[])}.
+   */
+  @SafeVarargs
+  public static <T> SerializableMatcher<T[]> arrayContainingInAnyOrder(
+      final SerializableMatcher<? super T>... matchers) {
+    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
+      private static final long serialVersionUID = 0L;
+        @Override
+        public Matcher<T[]> get() {
+          return Matchers.arrayContainingInAnyOrder(matchers);
+        }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#arrayContainingInAnyOrder(Collection)}.
+   */
+  public static <T> SerializableMatcher<T[]> arrayContainingInAnyOrder(
+      Collection<SerializableMatcher<? super T>> serializableMatchers) {
+
+    @SuppressWarnings({"rawtypes", "unchecked"}) // safe covariant cast
+    final Collection<Matcher<? super T>> matchers = (Collection) serializableMatchers;
+
+    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
+      private static final long serialVersionUID = 0L;
+        @Override
+        public Matcher<T[]> get() {
+          return Matchers.arrayContainingInAnyOrder(matchers);
+        }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#arrayWithSize(int)}.
+   */
+  public static <T> SerializableMatcher<T[]> arrayWithSize(final int size) {
+    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T[]> get() {
+        return Matchers.arrayWithSize(size);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#arrayWithSize(Matcher)}.
+   */
+  public static <T> SerializableMatcher<T[]> arrayWithSize(
+      final SerializableMatcher<? super Integer> sizeMatcher) {
+    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T[]> get() {
+        return Matchers.arrayWithSize(sizeMatcher);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#closeTo(double,double)}.
+   */
+  public static SerializableMatcher<Double> closeTo(final double target, final double error) {
+    return fromSupplier(new SerializableSupplier<Matcher<Double>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<Double> get() {
+        return Matchers.closeTo(target, error);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#contains(Object[])}.
+   */
+  @SafeVarargs
+  public static <T extends Serializable> SerializableMatcher<Iterable<? extends T>> contains(
+      final T... items) {
+    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
+      private static final long serialVersionUID = 0L;
+        @Override
+        public Matcher<Iterable<? extends T>> get() {
+          return Matchers.contains(items);
+        }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#contains(Object[])}.
+   *
+   * <p>The items of type {@code T} will be serialized using the provided {@link Coder}. They are
+   * explicitly <i>not</i> required or expected to be serializable via Java serialization.
+   */
+  @SafeVarargs
+  public static <T> SerializableMatcher<Iterable<? extends T>>
+  contains(Coder<T> coder, T... items) {
+
+    final SerializableSupplier<T[]> itemsSupplier =
+        new SerializableArrayViaCoder<>(coder, items);
+
+    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
+      private static final long serialVersionUID = 0L;
+        @Override
+        public Matcher<Iterable<? extends T>> get() {
+          return Matchers.containsInAnyOrder(itemsSupplier.get());
+        }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#contains(Matcher[])}.
+   */
+  @SafeVarargs
+  public static <T> SerializableMatcher<Iterable<? extends T>> contains(
+      final SerializableMatcher<? super T>... matchers) {
+    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
+      private static final long serialVersionUID = 0L;
+        @Override
+        public Matcher<Iterable<? extends T>> get() {
+          return Matchers.contains(matchers);
+        }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#contains(List)}.
+   */
+  public static <T extends Serializable> SerializableMatcher<Iterable<? extends T>> contains(
+      List<SerializableMatcher<? super T>> serializableMatchers) {
+
+    @SuppressWarnings({"rawtypes", "unchecked"}) // safe covariant cast
+    final List<Matcher<? super T>> matchers = (List) serializableMatchers;
+
+    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
+      private static final long serialVersionUID = 0L;
+        @Override
+        public Matcher<Iterable<? extends T>> get() {
+          return Matchers.contains(matchers);
+        }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#containsInAnyOrder(Object[])}.
+   */
+  @SafeVarargs
+  public static <T extends Serializable> SerializableMatcher<Iterable<? extends T>>
+  containsInAnyOrder(final T... items) {
+    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
+      private static final long serialVersionUID = 0L;
+        @Override
+        public Matcher<Iterable<? extends T>> get() {
+          return Matchers.containsInAnyOrder(items);
+        }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#containsInAnyOrder(Object[])}.
+   *
+   * <p>The items of type {@code T} will be serialized using the provided {@link Coder}.
+   * It is explicitly <i>not</i> required or expected to be serializable via Java serialization.
+   */
+  @SafeVarargs
+  public static <T> SerializableMatcher<Iterable<? extends T>>
+  containsInAnyOrder(Coder<T> coder, T... items) {
+
+    final SerializableSupplier<T[]> itemsSupplier =
+        new SerializableArrayViaCoder<>(coder, items);
+
+    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
+      private static final long serialVersionUID = 0L;
+        @Override
+        public Matcher<Iterable<? extends T>> get() {
+          return Matchers.containsInAnyOrder(itemsSupplier.get());
+        }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#containsInAnyOrder(Matcher[])}.
+   */
+  @SafeVarargs
+  public static <T> SerializableMatcher<Iterable<? extends T>> containsInAnyOrder(
+      final SerializableMatcher<? super T>... matchers) {
+    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
+      private static final long serialVersionUID = 0L;
+        @Override
+        public Matcher<Iterable<? extends T>> get() {
+          return Matchers.containsInAnyOrder(matchers);
+        }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#containsInAnyOrder(Collection)}.
+   */
+  public static <T> SerializableMatcher<Iterable<? extends T>> containsInAnyOrder(
+      Collection<SerializableMatcher<? super T>> serializableMatchers) {
+
+    @SuppressWarnings({"rawtypes", "unchecked"}) // safe covariant cast
+    final Collection<Matcher<? super T>> matchers = (Collection) serializableMatchers;
+
+    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
+      private static final long serialVersionUID = 0L;
+        @Override
+        public Matcher<Iterable<? extends T>> get() {
+          return Matchers.containsInAnyOrder(matchers);
+        }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#containsString}.
+   */
+  public static SerializableMatcher<String> containsString(final String substring) {
+    return fromSupplier(new SerializableSupplier<Matcher<String>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<String> get() {
+        return Matchers.containsString(substring);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#empty()}.
+   */
+  public static <T> SerializableMatcher<Collection<? extends T>> empty() {
+    return fromSupplier(new SerializableSupplier<Matcher<Collection<? extends T>>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<Collection<? extends T>> get() {
+        return Matchers.empty();
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#emptyArray()}.
+   */
+  public static <T> SerializableMatcher<T[]> emptyArray() {
+    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T[]> get() {
+        return Matchers.emptyArray();
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#emptyIterable()}.
+   */
+  public static <T> SerializableMatcher<Iterable<? extends T>> emptyIterable() {
+    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<Iterable<? extends T>> get() {
+        return Matchers.emptyIterable();
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#endsWith}.
+   */
+  public static SerializableMatcher<String> endsWith(final String substring) {
+    return fromSupplier(new SerializableSupplier<Matcher<String>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<String> get() {
+        return Matchers.endsWith(substring);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#equalTo()}.
+   */
+  public static <T extends Serializable> SerializableMatcher<T> equalTo(final T expected) {
+    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T> get() {
+        return Matchers.equalTo(expected);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#equalTo()}.
+   *
+   * <p>The expected value of type {@code T} will be serialized using the provided {@link Coder}.
+   * It is explicitly <i>not</i> required or expected to be serializable via Java serialization.
+   */
+  public static <T> SerializableMatcher<T> equalTo(Coder<T> coder, T expected) {
+
+    final SerializableSupplier<T> expectedSupplier = new SerializableViaCoder<>(coder, expected);
+
+    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T> get() {
+        return Matchers.equalTo(expectedSupplier.get());
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#greaterThan()}.
+   */
+  public static <T extends Comparable<T> & Serializable> SerializableMatcher<T>
+  greaterThan(final T target) {
+    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T> get() {
+        return Matchers.greaterThan(target);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#greaterThan()}.
+   *
+   * <p>The target value of type {@code T} will be serialized using the provided {@link Coder}.
+   * It is explicitly <i>not</i> required or expected to be serializable via Java serialization.
+   */
+  public static <T extends Comparable<T> & Serializable> SerializableMatcher<T>
+  greaterThan(final Coder<T> coder, T target) {
+    final SerializableSupplier<T> targetSupplier = new SerializableViaCoder<>(coder, target);
+    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T> get() {
+        return Matchers.greaterThan(targetSupplier.get());
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#greaterThanOrEqualTo()}.
+   */
+  public static <T extends Comparable<T>> SerializableMatcher<T> greaterThanOrEqualTo(
+      final T target) {
+    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T> get() {
+        return Matchers.greaterThanOrEqualTo(target);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#greaterThanOrEqualTo()}.
+   *
+   * <p>The target value of type {@code T} will be serialized using the provided {@link Coder}.
+   * It is explicitly <i>not</i> required or expected to be serializable via Java serialization.
+   */
+  public static <T extends Comparable<T> & Serializable> SerializableMatcher<T>
+  greaterThanOrEqualTo(final Coder<T> coder, T target) {
+    final SerializableSupplier<T> targetSupplier = new SerializableViaCoder<>(coder, target);
+    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T> get() {
+        return Matchers.greaterThanOrEqualTo(targetSupplier.get());
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#hasItem(Object)}.
+   */
+  public static <T extends Serializable> SerializableMatcher<Iterable<? super T>> hasItem(
+      final T target) {
+    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? super T>>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<Iterable<? super T>> get() {
+        return Matchers.hasItem(target);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#hasItem(Object)}.
+   *
+   * <p>The item of type {@code T} will be serialized using the provided {@link Coder}.
+   * It is explicitly <i>not</i> required or expected to be serializable via Java serialization.
+   */
+  public static <T> SerializableMatcher<Iterable<? super T>> hasItem(Coder<T> coder, T target) {
+    final SerializableSupplier<T> targetSupplier = new SerializableViaCoder<>(coder, target);
+    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? super T>>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<Iterable<? super T>> get() {
+        return Matchers.hasItem(targetSupplier.get());
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#hasItem(Matcher)}.
+   */
+  public static <T> SerializableMatcher<Iterable<? super T>> hasItem(
+      final SerializableMatcher<? super T> matcher) {
+    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? super T>>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<Iterable<? super T>> get() {
+        return Matchers.hasItem(matcher);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#hasSize(int)}.
+   */
+  public static <T> SerializableMatcher<Collection<? extends T>> hasSize(final int size) {
+    return fromSupplier(new SerializableSupplier<Matcher<Collection<? extends T>>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<Collection<? extends T>> get() {
+        return Matchers.hasSize(size);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#hasSize(Matcher)}.
+   */
+  public static <T> SerializableMatcher<Collection<? extends T>> hasSize(
+      final SerializableMatcher<? super Integer> sizeMatcher) {
+    return fromSupplier(new SerializableSupplier<Matcher<Collection<? extends T>>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<Collection<? extends T>> get() {
+        return Matchers.hasSize(sizeMatcher);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#iterableWithSize(int)}.
+   */
+  public static <T> SerializableMatcher<Iterable<T>> iterableWithSize(final int size) {
+    return fromSupplier(new SerializableSupplier<Matcher<Iterable<T>>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<Iterable<T>> get() {
+        return Matchers.iterableWithSize(size);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#iterableWithSize(Matcher)}.
+   */
+  public static <T> SerializableMatcher<Iterable<T>> iterableWithSize(
+      final SerializableMatcher<? super Integer> sizeMatcher) {
+    return fromSupplier(new SerializableSupplier<Matcher<Iterable<T>>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<Iterable<T>> get() {
+        return Matchers.iterableWithSize(sizeMatcher);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#isIn(Collection)}.
+   */
+  public static <T extends Serializable> SerializableMatcher<T>
+  isIn(final Collection<T> collection) {
+    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T> get() {
+        return Matchers.isIn(collection);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#isIn(Collection)}.
+   *
+   * <p>The items of type {@code T} will be serialized using the provided {@link Coder}.
+   * They are explicitly <i>not</i> required or expected to be serializable via Java serialization.
+   */
+  public static <T> SerializableMatcher<T> isIn(Coder<T> coder, Collection<T> collection) {
+    @SuppressWarnings("unchecked")
+    T[] items = (T[]) collection.toArray();
+    final SerializableSupplier<T[]> itemsSupplier =
+        new SerializableArrayViaCoder<>(coder, items);
+    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T> get() {
+        return Matchers.isIn(itemsSupplier.get());
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#isIn(Object[])}.
+   */
+  public static <T extends Serializable> SerializableMatcher<T> isIn(final T[] items) {
+    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T> get() {
+        return Matchers.isIn(items);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#isIn(Object[])}.
+   *
+   * <p>The items of type {@code T} will be serialized using the provided {@link Coder}.
+   * They are explicitly <i>not</i> required or expected to be serializable via Java serialization.
+   */
+  public static <T> SerializableMatcher<T> isIn(Coder<T> coder, T[] items) {
+    final SerializableSupplier<T[]> itemsSupplier =
+        new SerializableArrayViaCoder<>(coder, items);
+    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T> get() {
+        return Matchers.isIn(itemsSupplier.get());
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#isOneOf}.
+   */
+  @SafeVarargs
+  public static <T extends Serializable> SerializableMatcher<T> isOneOf(final T... elems) {
+    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T> get() {
+        return Matchers.isOneOf(elems);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#isOneOf}.
+   *
+   * <p>The items of type {@code T} will be serialized using the provided {@link Coder}.
+   * They are explicitly <i>not</i> required or expected to be serializable via Java serialization.
+   */
+  @SafeVarargs
+  public static <T> SerializableMatcher<T> isOneOf(Coder<T> coder, T... items) {
+    final SerializableSupplier<T[]> itemsSupplier =
+        new SerializableArrayViaCoder<>(coder, items);
+    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T> get() {
+        return Matchers.isOneOf(itemsSupplier.get());
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} that matches any {@link KV} with the specified key.
+   */
+  public static <K extends Serializable, V> SerializableMatcher<KV<? extends K, ? extends V>>
+  kvWithKey(K key) {
+    return new KvKeyMatcher<K, V>(equalTo(key));
+  }
+
+  /**
+   * A {@link SerializableMatcher} that matches any {@link KV} with the specified key.
+   *
+   * <p>The key of type {@code K} will be serialized using the provided {@link Coder}.
+   * It is explicitly <i>not</i> required or expected to be serializable via Java serialization.
+   */
+  public static <K, V> SerializableMatcher<KV<? extends K, ? extends V>>
+  kvWithKey(Coder<K> coder, K key) {
+    return new KvKeyMatcher<K, V>(equalTo(coder, key));
+  }
+
+  /**
+   * A {@link SerializableMatcher} that matches any {@link KV} with matching key.
+   */
+  public static <K, V> SerializableMatcher<KV<? extends K, ? extends V>> kvWithKey(
+      final SerializableMatcher<? super K> keyMatcher) {
+    return new KvKeyMatcher<K, V>(keyMatcher);
+  }
+
+  /**
+   * A {@link SerializableMatcher} that matches any {@link KV} with the specified value.
+   */
+  public static <K, V extends Serializable> SerializableMatcher<KV<? extends K, ? extends V>>
+  kvWithValue(V value) {
+    return new KvValueMatcher<K, V>(equalTo(value));
+  }
+
+  /**
+   * A {@link SerializableMatcher} that matches any {@link KV} with the specified value.
+   *
+   * <p>The value of type {@code V} will be serialized using the provided {@link Coder}.
+   * It is explicitly <i>not</i> required or expected to be serializable via Java serialization.
+   */
+  public static <K, V> SerializableMatcher<KV<? extends K, ? extends V>>
+  kvWithValue(Coder<V> coder, V value) {
+    return new KvValueMatcher<K, V>(equalTo(coder, value));
+  }
+
+  /**
+   * A {@link SerializableMatcher} that matches any {@link KV} with matching value.
+   */
+  public static <K, V> SerializableMatcher<KV<? extends K, ? extends V>> kvWithValue(
+      final SerializableMatcher<? super V> valueMatcher) {
+    return new KvValueMatcher<>(valueMatcher);
+  }
+
+  /**
+   * A {@link SerializableMatcher} that matches any {@link KV} with matching key and value.
+   */
+  public static <K, V> SerializableMatcher<KV<? extends K, ? extends V>> kv(
+      final SerializableMatcher<? super K> keyMatcher,
+      final SerializableMatcher<? super V> valueMatcher) {
+
+    return SerializableMatchers.<KV<? extends K, ? extends V>>allOf(
+        SerializableMatchers.<K, V>kvWithKey(keyMatcher),
+        SerializableMatchers.<K, V>kvWithValue(valueMatcher));
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#lessThan()}.
+   */
+  public static <T extends Comparable<T> & Serializable> SerializableMatcher<T> lessThan(
+      final T target) {
+    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T> get() {
+        return Matchers.lessThan(target);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#lessThan()}.
+   *
+   * <p>The target value of type {@code T} will be serialized using the provided {@link Coder}.
+   * It is explicitly <i>not</i> required or expected to be serializable via Java serialization.
+   */
+  public static <T extends Comparable<T>> SerializableMatcher<T>
+  lessThan(Coder<T> coder, T target) {
+    final SerializableSupplier<T> targetSupplier = new SerializableViaCoder<>(coder, target);
+    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T> get() {
+        return Matchers.lessThan(targetSupplier.get());
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#lessThanOrEqualTo()}.
+   */
+  public static <T extends Comparable<T> & Serializable> SerializableMatcher<T> lessThanOrEqualTo(
+      final T target) {
+    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T> get() {
+        return Matchers.lessThanOrEqualTo(target);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#lessThanOrEqualTo()}.
+   *
+   * <p>The target value of type {@code T} will be serialized using the provided {@link Coder}.
+   * It is explicitly <i>not</i> required or expected to be serializable via Java serialization.
+   */
+  public static <T extends Comparable<T>> SerializableMatcher<T> lessThanOrEqualTo(
+      Coder<T> coder, T target) {
+    final SerializableSupplier<T> targetSupplier = new SerializableViaCoder<>(coder, target);
+    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T> get() {
+        return Matchers.lessThanOrEqualTo(targetSupplier.get());
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#not}.
+   */
+  public static <T> SerializableMatcher<T> not(final SerializableMatcher<T> matcher) {
+    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<T> get() {
+        return Matchers.not(matcher);
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to
+   * {@link Matchers#nullValue}.
+   */
+  public static SerializableMatcher<Object> nullValue() {
+    return fromSupplier(new SerializableSupplier<Matcher<Object>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<Object> get() {
+        return Matchers.nullValue();
+      }
+    });
+  }
+
+  /**
+   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#startsWith}.
+   */
+  public static SerializableMatcher<String> startsWith(final String substring) {
+    return fromSupplier(new SerializableSupplier<Matcher<String>>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public Matcher<String> get() {
+        return Matchers.startsWith(substring);
+      }
+    });
+  }
+
+  private static class KvKeyMatcher<K, V>
+      extends BaseMatcher<KV<? extends K, ? extends V>>
+      implements SerializableMatcher<KV<? extends K, ? extends V>> {
+    private static final long serialVersionUID = 0L;
+
+    private final SerializableMatcher<? super K> keyMatcher;
+
+    public KvKeyMatcher(SerializableMatcher<? super K> keyMatcher) {
+      this.keyMatcher = keyMatcher;
+    }
+
+    @Override
+    public boolean matches(Object item) {
+      @SuppressWarnings("unchecked")
+      KV<K, ?> kvItem = (KV<K, ?>) item;
+      return keyMatcher.matches(kvItem.getKey());
+    }
+
+    @Override
+    public void describeMismatch(Object item, Description mismatchDescription) {
+      @SuppressWarnings("unchecked")
+      KV<K, ?> kvItem = (KV<K, ?>) item;
+      if (!keyMatcher.matches(kvItem.getKey())) {
+        mismatchDescription.appendText("key did not match: ");
+        keyMatcher.describeMismatch(kvItem.getKey(), mismatchDescription);
+      }
+    }
+
+    @Override
+    public void describeTo(Description description) {
+      description.appendText("KV with key matching ");
+      keyMatcher.describeTo(description);
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(this)
+          .addValue(keyMatcher)
+          .toString();
+    }
+  }
+
+  private static class KvValueMatcher<K, V>
+      extends BaseMatcher<KV<? extends K, ? extends V>>
+      implements SerializableMatcher<KV<? extends K, ? extends V>> {
+    private static final long serialVersionUID = 0L;
+
+    private final SerializableMatcher<? super V> valueMatcher;
+
+    public KvValueMatcher(SerializableMatcher<? super V> valueMatcher) {
+      this.valueMatcher = valueMatcher;
+    }
+
+    @Override
+    public boolean matches(Object item) {
+      @SuppressWarnings("unchecked")
+      KV<?, V> kvItem = (KV<?, V>) item;
+      return valueMatcher.matches(kvItem.getValue());
+    }
+
+    @Override
+    public void describeMismatch(Object item, Description mismatchDescription) {
+      @SuppressWarnings("unchecked")
+      KV<?, V> kvItem = (KV<?, V>) item;
+      if (!valueMatcher.matches(kvItem.getValue())) {
+        mismatchDescription.appendText("value did not match: ");
+        valueMatcher.describeMismatch(kvItem.getValue(), mismatchDescription);
+      }
+    }
+
+    @Override
+    public void describeTo(Description description) {
+      description.appendText("KV with value matching ");
+      valueMatcher.describeTo(description);
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(this)
+          .addValue(valueMatcher)
+          .toString();
+    }
+  }
+
+  /**
+   * Constructs a {@link SerializableMatcher} from a non-serializable {@link Matcher} via
+   * indirection through {@link SerializableSupplier}.
+   *
+   * <p>To wrap a {@link Matcher} which is not serializable, provide a {@link SerializableSupplier}
+   * with a {@link SerializableSupplier#get()} method that returns a fresh instance of the
+   * {@link Matcher} desired. The resulting {@link SerializableMatcher} will behave according to
+   * the {@link Matcher} returned by {@link SerializableSupplier#get() get()} when it is invoked
+   * during matching (which may occur on another machine, such as a Dataflow worker).
+   *
+   * <code>
+   * return fromSupplier(new SerializableSupplier<Matcher<T>>() {
+   *   private static final long serialVersionUID = 0L;
+   *     @Override
+   *     public Matcher<T> get() {
+   *       return new MyMatcherForT();
+   *     }
+   * });
+   * </code>
+   */
+  public static <T> SerializableMatcher<T> fromSupplier(
+      SerializableSupplier<Matcher<T>> supplier) {
+    return new SerializableMatcherFromSupplier<>(supplier);
+  }
+
+  /**
+   * Supplies values of type {@code T}, and is serializable. Thus, even if {@code T} is not
+   * serializable, the supplier can be serialized and provide a {@code T} wherever it is
+   * deserialized.
+   *
+   * @param <T> the type of value supplied.
+   */
+  public interface SerializableSupplier<T> extends Serializable {
+    T get();
+  }
+
+  /**
+   * Since the delegate {@link Matcher} is not generally serializable, instead this takes a nullary
+   * SerializableFunction to return such a matcher.
+   */
+  private static class SerializableMatcherFromSupplier<T> extends BaseMatcher<T>
+      implements SerializableMatcher<T> {
+
+    private static final long serialVersionUID = 0L;
+
+    private SerializableSupplier<Matcher<T>> supplier;
+
+    public SerializableMatcherFromSupplier(SerializableSupplier<Matcher<T>> supplier) {
+      this.supplier = supplier;
+    }
+
+    @Override
+    public void describeTo(Description description) {
+      supplier.get().describeTo(description);
+    }
+
+    @Override
+    public boolean matches(Object item) {
+      return supplier.get().matches(item);
+    }
+
+    @Override
+    public void describeMismatch(Object item, Description mismatchDescription) {
+      supplier.get().describeMismatch(item, mismatchDescription);
+    }
+  }
+
+  /**
+   * Wraps any value that can be encoded via a {@link Coder} to make it {@link Serializable}.
+   * This is not likely to be a good encoding, so should be used only for tests, where data
+   * volume is small and minor costs are not critical.
+   */
+  private static class SerializableViaCoder<T> implements SerializableSupplier<T> {
+    private static final long serialVersionUID = 0L;
+
+    /** Cached value that is not serialized. */
+    @Nullable
+    private transient T value;
+
+    /** The bytes of {@link #value} when encoded via {@link #coder}. */
+    private byte[] encodedValue;
+
+    private Coder<T> coder;
+
+    public SerializableViaCoder(Coder<T> coder, T value) {
+      this.coder = coder;
+      this.value = value;
+      try {
+        this.encodedValue = CoderUtils.encodeToByteArray(coder, value);
+      } catch (CoderException exc) {
+        throw new RuntimeException("Error serializing via Coder", exc);
+      }
+    }
+
+    @Override
+    public T get() {
+      if (value == null) {
+        try {
+          value = CoderUtils.decodeFromByteArray(coder, encodedValue);
+        } catch (CoderException exc) {
+          throw new RuntimeException("Error deserializing via Coder", exc);
+        }
+      }
+      return value;
+    }
+  }
+
+  /**
+   * Wraps any array with values that can be encoded via a {@link Coder} to make it
+   * {@link Serializable}. This is not likely to be a good encoding, so should be used only for
+   * tests, where data volume is small and minor costs are not critical.
+   */
+  private static class SerializableArrayViaCoder<T> implements SerializableSupplier<T[]> {
+    private static final long serialVersionUID = 0L;
+
+    /** Cached value that is not serialized. */
+    @Nullable
+    private transient T[] value;
+
+    /** The bytes of {@link #value} when encoded via {@link #coder}. */
+    private byte[] encodedValue;
+
+    private Coder<List<T>> coder;
+
+    public SerializableArrayViaCoder(Coder<T> elementCoder, T[] value) {
+      this.coder = ListCoder.of(elementCoder);
+      this.value = value;
+      try {
+        this.encodedValue = CoderUtils.encodeToByteArray(coder, Arrays.asList(value));
+      } catch (CoderException exc) {
+        throw new UserCodeException(exc);
+      }
+    }
+
+    @Override
+    public T[] get() {
+      if (value == null) {
+        try {
+          @SuppressWarnings("unchecked")
+          T[] decoded = (T[]) CoderUtils.decodeFromByteArray(coder, encodedValue).toArray();
+          value = decoded;
+        } catch (CoderException exc) {
+          throw new RuntimeException("Error deserializing via Coder", exc);
+        }
+      }
+      return value;
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
index 0d240d9d06bca..1a47d2eac4ae7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
@@ -16,7 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.testing;
 
-import static org.hamcrest.core.StringContains.containsString;
+import static com.google.cloud.dataflow.sdk.testing.SerializableMatchers.anything;
+import static com.google.cloud.dataflow.sdk.testing.SerializableMatchers.not;
+import static org.hamcrest.Matchers.containsString;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
@@ -45,6 +47,8 @@
 import java.io.Serializable;
 import java.util.regex.Pattern;
 
+import javax.annotation.Nullable;
+
 /**
  * Test case for {@link DataflowAssert}.
  */
@@ -155,33 +159,57 @@ public Void apply(Iterable<NotSerializableObject> contents) {
     pipeline.run();
   }
 
-
+  /**
+   * Basic test of succeeding {@link DataflowAssert} using a {@link SerializableMatcher}.
+   */
   @Test
   @Category(RunnableOnService.class)
-  public void testIsEqualTo() throws Exception {
+  public void testBasicMatcherSuccess() throws Exception {
     Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> pcollection = pipeline.apply(Create.of(42));
+    DataflowAssert.that(pcollection).containsInAnyOrder(anything());
+    pipeline.run();
+  }
 
-    PCollection<Integer> pcollection = pipeline
-        .apply(Create.of(43));
+  /**
+   * Basic test of failing {@link DataflowAssert} using a {@link SerializableMatcher}.
+   */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testBasicMatcherFailure() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> pcollection = pipeline.apply(Create.of(42));
+    DataflowAssert.that(pcollection).containsInAnyOrder(not(anything()));
+    runExpectingAssertionFailure(pipeline);
+  }
 
+  /**
+   * Basic test for {@code isEqualTo}.
+   */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testIsEqualTo() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> pcollection = pipeline.apply(Create.of(43));
     DataflowAssert.thatSingleton(pcollection).isEqualTo(43);
-
     pipeline.run();
   }
 
+  /**
+   * Tests that {@code containsInAnyOrder} is actually order-independent.
+   */
   @Test
   @Category(RunnableOnService.class)
   public void testContainsInAnyOrder() throws Exception {
     Pipeline pipeline = TestPipeline.create();
-
-    PCollection<Integer> pcollection = pipeline
-        .apply(Create.of(1, 2, 3, 4));
-
+    PCollection<Integer> pcollection = pipeline.apply(Create.of(1, 2, 3, 4));
     DataflowAssert.that(pcollection).containsInAnyOrder(2, 1, 4, 3);
-
     pipeline.run();
   }
 
+  /**
+   * Tests that {@code containsInAnyOrder} fails when and how it should.
+   */
   @Test
   @Category(RunnableOnService.class)
   public void testContainsInAnyOrderFalse() throws Exception {
@@ -192,6 +220,20 @@ public void testContainsInAnyOrderFalse() throws Exception {
 
     DataflowAssert.that(pcollection).containsInAnyOrder(2, 1, 4, 3, 7);
 
+    // The service runner does not give an exception we can usefully inspect.
+    @Nullable
+    Throwable exc = runExpectingAssertionFailure(pipeline);
+    Pattern expectedPattern = Pattern.compile(
+        "Expected: iterable over \\[((<4>|<7>|<3>|<2>|<1>)(, )?){5}\\] in any order");
+    if (exc != null) {
+      // A loose pattern, but should get the job done.
+      assertTrue("Expected error message from DataflowAssert with substring matching "
+          + expectedPattern + " but the message was \"" + exc.getMessage() + "\"",
+          expectedPattern.matcher(exc.getMessage()).find());
+    }
+  }
+
+  private static Throwable runExpectingAssertionFailure(Pipeline pipeline) {
     // Even though this test will succeed or fail adequately whether local or on the service,
     // it results in a different exception depending on the runner.
     if (pipeline.getRunner() instanceof DirectPipelineRunner) {
@@ -200,13 +242,7 @@ public void testContainsInAnyOrderFalse() throws Exception {
       try {
         pipeline.run();
       } catch (AssertionError exc) {
-        // A loose pattern, but should get the job done.
-        Pattern expectedPattern = Pattern.compile(
-            "Expected: iterable over \\[((<4>|<7>|<3>|<2>|<1>)(, )?){5}\\] in any order");
-        assertTrue("Expected error message from DataflowAssert with substring matching "
-            + expectedPattern + " but the message was \"" + exc.getMessage() + "\"",
-            expectedPattern.matcher(exc.getMessage()).find());
-        return;
+        return exc;
       }
     } else if (pipeline.getRunner() instanceof TestDataflowPipelineRunner) {
       // Separately, if this is run on the service, then the TestDataflowPipelineRunner throws
@@ -214,11 +250,11 @@ public void testContainsInAnyOrderFalse() throws Exception {
       try {
         pipeline.run();
       } catch (IllegalStateException exc) {
-        assertThat(exc.getMessage(),
-            containsString("The dataflow failed."));
-        return;
+        assertThat(exc.getMessage(), containsString("The dataflow failed."));
+        return null;
       }
     }
     fail("assertion should have failed");
+    throw new RuntimeException("unreachable");
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchersTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchersTest.java
new file mode 100644
index 0000000000000..3228e0b941de5
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchersTest.java
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import static com.google.cloud.dataflow.sdk.testing.SerializableMatchers.allOf;
+import static com.google.cloud.dataflow.sdk.testing.SerializableMatchers.anything;
+import static com.google.cloud.dataflow.sdk.testing.SerializableMatchers.containsInAnyOrder;
+import static com.google.cloud.dataflow.sdk.testing.SerializableMatchers.kvWithKey;
+import static com.google.cloud.dataflow.sdk.testing.SerializableMatchers.not;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
+
+import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.collect.ImmutableList;
+
+import org.hamcrest.Matchers;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.Serializable;
+
+/**
+ * Test case for {@link SerializableMatchers}.
+ *
+ * <p>Since the only new matchers are those for {@link KV}, only those are tested here, to avoid
+ * tediously repeating all of hamcrest's tests.
+ *
+ * <p>A few wrappers of a hamcrest matchers are tested for serializability. Beyond that,
+ * the boilerplate that is identical to each is considered thoroughly tested.
+ */
+@RunWith(JUnit4.class)
+public class SerializableMatchersTest implements Serializable {
+  private static final long serialVersionUID = 0;
+
+  @Rule
+  public transient ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void testAnythingSerializable() throws Exception {
+    SerializableUtils.ensureSerializable(anything());
+  }
+
+  @Test
+  public void testAllOfSerializable() throws Exception {
+    SerializableUtils.ensureSerializable(allOf(anything()));
+  }
+
+  @Test
+  public void testContainsInAnyOrderSerializable() throws Exception {
+    assertThat(ImmutableList.of(2, 1, 3),
+        SerializableUtils.ensureSerializable(containsInAnyOrder(1, 2, 3)));
+  }
+
+  @Test
+  public void testContainsInAnyOrderNotSerializable() throws Exception {
+    assertThat(
+        ImmutableList.of(new NotSerializableClass()),
+        SerializableUtils.ensureSerializable(containsInAnyOrder(
+            new NotSerializableClassCoder(),
+            new NotSerializableClass())));
+  }
+
+  @Test
+  public void testKvKeyMatcherSerializable() throws Exception {
+    assertThat(
+        KV.of("hello", 42L),
+        SerializableUtils.ensureSerializable(kvWithKey("hello")));
+  }
+
+  @Test
+  public void testKvMatcherBasicSuccess() throws Exception {
+    assertThat(
+        KV.of(1, 2),
+        SerializableMatchers.<Integer, Integer>kv(anything(), anything()));
+  }
+
+  @Test
+  public void testKvMatcherKeyFailure() throws Exception {
+    try {
+      assertThat(
+          KV.of(1, 2),
+          SerializableMatchers.<Integer, Integer>kv(not(anything()), anything()));
+    } catch (AssertionError exc) {
+      assertThat(exc.getMessage(), Matchers.containsString("key did not match"));
+      return;
+    }
+    fail("Should have failed");
+  }
+
+  @Test
+  public void testKvMatcherValueFailure() throws Exception {
+    try {
+      assertThat(
+          KV.of(1, 2),
+          SerializableMatchers.<Integer, Integer>kv(anything(), not(anything())));
+    } catch (AssertionError exc) {
+      assertThat(exc.getMessage(), Matchers.containsString("value did not match"));
+      return;
+    }
+    fail("Should have failed");
+  }
+
+  @Test
+  public void testKvMatcherGBKLikeSuccess() throws Exception {
+    assertThat(
+        KV.of("key", ImmutableList.of(1, 2, 3)),
+        SerializableMatchers.<Object, Iterable<Integer>>kv(
+            anything(), containsInAnyOrder(3, 2, 1)));
+  }
+
+  @Test
+  public void testKvMatcherGBKLikeFailure() throws Exception {
+    try {
+      assertThat(
+          KV.of("key", ImmutableList.of(1, 2, 3)),
+          SerializableMatchers.<String, Iterable<Integer>>kv(
+              anything(), containsInAnyOrder(1, 2, 3, 4)));
+    } catch (AssertionError exc) {
+      assertThat(exc.getMessage(), Matchers.containsString("value did not match"));
+      return;
+    }
+    fail("Should have failed.");
+  }
+
+  private static class NotSerializableClass {
+    @Override public boolean equals(Object other) {
+      return other instanceof NotSerializableClass;
+    }
+
+    @Override public int hashCode() {
+      return 0;
+    }
+  }
+
+  private static class NotSerializableClassCoder extends AtomicCoder<NotSerializableClass> {
+    private static final long serialVersionUID = 0L;
+
+    @Override
+    public void encode(NotSerializableClass value, OutputStream outStream, Coder.Context context) {
+    }
+
+    @Override
+    public NotSerializableClass decode(InputStream inStream, Coder.Context context) {
+      return new NotSerializableClass();
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index e72c16713b7aa..cb1b1a1dee44a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -291,7 +291,7 @@ public void testWindowedCombineEmpty() {
         .apply(Window.<Integer>into(FixedWindows.of(Duration.millis(1))))
         .apply(Combine.globally(new MeanInts()).withoutDefaults());
 
-    DataflowAssert.that(mean).containsInAnyOrder();
+    DataflowAssert.that(mean).empty();
 
     p.run();
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java
index 1b84e75607986..0fe554787296f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CountTest.java
@@ -79,7 +79,7 @@ public void testCountPerElementEmpty() {
     PCollection<KV<String, Long>> output =
         input.apply(Count.<String>perElement());
 
-    DataflowAssert.that(output).containsInAnyOrder();
+    DataflowAssert.that(output).empty();
     p.run();
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
index 36107824df58c..5db0ee79533b0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
@@ -175,8 +175,7 @@ public void testCreateTimestampedEmpty() {
         .apply(Create.timestamped(new ArrayList<TimestampedValue<String>>())
             .withCoder(StringUtf8Coder.of()));
 
-    DataflowAssert.that(output)
-        .containsInAnyOrder();
+    DataflowAssert.that(output).empty();
     p.run();
   }
 
@@ -212,12 +211,8 @@ public void testCreateTimestampedPolymorphicType() throws Exception {
   @Category(RunnableOnService.class)
   public void testCreateWithVoidType() throws Exception {
     Pipeline p = TestPipeline.create();
-
     PCollection<Void> output = p.apply(Create.of((Void) null, (Void) null));
-
-    DataflowAssert.that(output)
-      .containsInAnyOrder(null, null);
-
+    DataflowAssert.that(output).containsInAnyOrder((Void) null, (Void) null);
     p.run();
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
index aafb43483395f..fb3951d48e0dc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
@@ -76,7 +76,7 @@ public void testNoFilter() {
         .apply(Create.of(1, 2, 4, 5))
         .apply(Filter.by(new TrivialFn(false)));
 
-    DataflowAssert.that(output).containsInAnyOrder();
+    DataflowAssert.that(output).empty();
     p.run();
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
index 951f12cd01574..0f6a758687ab9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
@@ -112,7 +112,7 @@ public void testFlattenPCollectionListEmpty() {
         PCollectionList.<String>empty(p)
         .apply(Flatten.<String>pCollections()).setCoder(StringUtf8Coder.of());
 
-    DataflowAssert.that(output).containsInAnyOrder();
+    DataflowAssert.that(output).empty();
     p.run();
   }
 
@@ -139,7 +139,7 @@ public void processElement(ProcessContext c) {
                   }
                 }));
 
-    DataflowAssert.that(output).containsInAnyOrder();
+    DataflowAssert.that(output).empty();
     p.run();
   }
 
@@ -154,7 +154,7 @@ public void testFlattenPCollectionListEmptyThenParDo() {
         .apply(Flatten.<String>pCollections()).setCoder(StringUtf8Coder.of())
         .apply(ParDo.of(new IdentityFn<String>(){}));
 
-    DataflowAssert.that(output).containsInAnyOrder();
+    DataflowAssert.that(output).empty();
     p.run();
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
index 18c1e4236b5b3..6c80ee9e59ecc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
@@ -167,8 +167,7 @@ public void testGroupByKeyEmpty() {
     PCollection<KV<String, Iterable<Integer>>> output =
         input.apply(GroupByKey.<String, Integer>create());
 
-    DataflowAssert.that(output)
-        .containsInAnyOrder();
+    DataflowAssert.that(output).empty();
 
     p.run();
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java
index ea55bb4a2e9c8..e9edbb71c8b27 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KeysTest.java
@@ -77,9 +77,7 @@ public void testKeysEmpty() {
             KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())));
 
     PCollection<String> output = input.apply(Keys.<String>create());
-    DataflowAssert.that(output)
-        .containsInAnyOrder();
-
+    DataflowAssert.that(output).empty();
     p.run();
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java
index 42d20fe678746..06abbadd1b62d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/KvSwapTest.java
@@ -85,7 +85,7 @@ public void testKvSwapEmpty() {
     PCollection<KV<Integer, String>> output = input.apply(
         KvSwap.<String, Integer>create());
 
-    DataflowAssert.that(output).containsInAnyOrder();
+    DataflowAssert.that(output).empty();
     p.run();
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 1ea9c2cbe5f97..f27dc2c78bbfa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -410,7 +410,7 @@ public void testParDoWithSideOutputs() {
     DataflowAssert.that(outputs.get(sideOutputTag3))
         .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs)
                    .fromSideOutput(sideOutputTag3));
-    DataflowAssert.that(outputs.get(sideOutputTagUnwritten)).containsInAnyOrder();
+    DataflowAssert.that(outputs.get(sideOutputTagUnwritten)).empty();
 
     pipeline.run();
   }
@@ -434,7 +434,7 @@ public void processElement(ProcessContext c) {
                   c.sideOutput(sideOutputTag, c.element());
                 }}));
 
-    DataflowAssert.that(outputs.get(mainOutputTag)).containsInAnyOrder();
+    DataflowAssert.that(outputs.get(mainOutputTag)).empty();
     DataflowAssert.that(outputs.get(sideOutputTag)).containsInAnyOrder(inputs);
 
     pipeline.run();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
index 064636958dac7..a8e46ab391ead 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
@@ -82,7 +82,7 @@ public void testModPartition() {
         .apply(Create.of(1, 2, 4, 5))
         .apply(Partition.of(3, new ModFn()));
     assertTrue(outputs.size() == 3);
-    DataflowAssert.that(outputs.get(0)).containsInAnyOrder();
+    DataflowAssert.that(outputs.get(0)).empty();
     DataflowAssert.that(outputs.get(1)).containsInAnyOrder(1, 4);
     DataflowAssert.that(outputs.get(2)).containsInAnyOrder(2, 5);
     p.run();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java
index 1b4a99cd7433e..2f536514b3750 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java
@@ -82,8 +82,7 @@ public void testRemoveDuplicatesEmpty() {
     PCollection<String> output =
         input.apply(RemoveDuplicates.<String>create());
 
-    DataflowAssert.that(output)
-        .containsInAnyOrder();
+    DataflowAssert.that(output).empty();
     p.run();
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
index 06afec6f81063..eb1d80a2a5d61 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
@@ -131,11 +131,11 @@ public void testTopEmpty() {
     PCollection<KV<String, List<Integer>>> smallestPerKey = inputTable
         .apply(Top.<String, Integer>smallestPerKey(2));
 
-    DataflowAssert.thatSingletonIterable(top1).containsInAnyOrder();
-    DataflowAssert.thatSingletonIterable(top2).containsInAnyOrder();
-    DataflowAssert.thatSingletonIterable(top3).containsInAnyOrder();
-    DataflowAssert.that(largestPerKey).containsInAnyOrder();
-    DataflowAssert.that(smallestPerKey).containsInAnyOrder();
+    DataflowAssert.thatSingletonIterable(top1).empty();
+    DataflowAssert.thatSingletonIterable(top2).empty();
+    DataflowAssert.thatSingletonIterable(top3).empty();
+    DataflowAssert.that(largestPerKey).empty();
+    DataflowAssert.that(smallestPerKey).empty();
 
     p.run();
   }
@@ -159,9 +159,9 @@ public void testTopZero() {
     PCollection<KV<String, List<Integer>>> smallestPerKey = inputTable
         .apply(Top.<String, Integer>smallestPerKey(0));
 
-    DataflowAssert.thatSingletonIterable(top1).containsInAnyOrder();
-    DataflowAssert.thatSingletonIterable(top2).containsInAnyOrder();
-    DataflowAssert.thatSingletonIterable(top3).containsInAnyOrder();
+    DataflowAssert.thatSingletonIterable(top1).empty();
+    DataflowAssert.thatSingletonIterable(top2).empty();
+    DataflowAssert.thatSingletonIterable(top3).empty();
     DataflowAssert.that(largestPerKey).containsInAnyOrder(
         KV.of("a", Arrays.<Integer>asList()),
         KV.of("b", Arrays.<Integer>asList()));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
index c641f0f092511..9663c453dd19a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ValuesTest.java
@@ -81,8 +81,7 @@ public void testValuesEmpty() {
 
     PCollection<Integer> output = input.apply(Values.<Integer>create());
 
-    DataflowAssert.that(output)
-        .containsInAnyOrder();
+    DataflowAssert.that(output).empty();
 
     p.run();
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
index 6900ab0b61dce..a2e1299d69433 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
@@ -249,7 +249,7 @@ public void testEmptyInput() {
         input
         .apply(new WindowedCount(FixedWindows.of(new Duration(10))));
 
-    DataflowAssert.that(output).containsInAnyOrder();
+    DataflowAssert.that(output).empty();
 
     p.run();
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionTupleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionTupleTest.java
index 83f10c5e921b1..a04990734c1fc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionTupleTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionTupleTest.java
@@ -86,7 +86,7 @@ public void processElement(ProcessContext c) {
 
     DataflowAssert.that(outputs.get(mainOutputTag)).containsInAnyOrder(inputs);
     DataflowAssert.that(outputs.get(sideOutputTag)).containsInAnyOrder(inputs);
-    DataflowAssert.that(outputs.get(emptyOutputTag)).containsInAnyOrder();
+    DataflowAssert.that(outputs.get(emptyOutputTag)).empty();
 
     pipeline.run();
   }

From 0e0cec72b3ff83d905dc2e8d72c83ac80121663d Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 3 Sep 2015 10:29:36 -0700
Subject: [PATCH 0984/1541] Do not require type @param tags for generic classes

We never intended to require these, but our config actually does. Until
checkstyle 6.9, these style violations were inadvertently ignored.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=102255960
---
 checkstyle.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/checkstyle.xml b/checkstyle.xml
index c207f3674d6a9..495260e3165b0 100644
--- a/checkstyle.xml
+++ b/checkstyle.xml
@@ -151,6 +151,7 @@ page at http://checkstyle.sourceforge.net/config.html -->
     <module name="JavadocType">
       <property name="scope" value="protected"/>
       <property name="severity" value="error"/>
+      <property name="allowMissingParamTags" value="true"/>
     </module>
 
     <module name="JavadocStyle">

From 02e63ec0d2fef629e952affeb70185750d80f532 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 3 Sep 2015 14:30:39 -0700
Subject: [PATCH 0985/1541] TupleTag: minor Javadoc fix and code simplification

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=102280628
---
 .../com/google/cloud/dataflow/sdk/values/TupleTag.java   | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
index 508b2f432896a..6025d1f45f965 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
@@ -32,7 +32,7 @@
 
 /**
  * A {@code TupleTag} is a typed tag to use as the key of a
- * heterogeneously typed tuple, like {@link PCollectionTuple} or
+ * heterogeneously typed tuple, like {@link PCollectionTuple}.
  * Its generic type parameter allows tracking
  * the static type of things stored in tuples.
  *
@@ -60,8 +60,7 @@ public class TupleTag<V> implements Serializable {
    * <p>This is the normal way {@code TupleTag}s are constructed.
    */
   public TupleTag() {
-    this.id = genId();
-    this.generated = true;
+    this(genId(), true);
   }
 
   /**
@@ -78,8 +77,7 @@ public TupleTag() {
    * {@link #TupleTag()} should be preferred.
    */
   public TupleTag(String id) {
-    this.id = id;
-    this.generated = false;
+    this(id, false);
   }
 
   /**
@@ -121,7 +119,6 @@ public TypeDescriptor<V> getTypeDescriptor() {
     return new TypeDescriptor<V>(getClass()) {};
   }
 
-
   /////////////////////////////////////////////////////////////////////////////
   // Internal details below here.
 

From 5b5c57f5882620e93c369cd2de3c261633d392e5 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 3 Sep 2015 17:18:14 -0700
Subject: [PATCH 0986/1541] Add NullableCoder

NullableCoder can be used with Coders that do not tolerate input nulls to
encode PCollections of possibly-null values.

NullableCoder is simple and flexible, but may be inefficient in certain cases.
Null values are encoded as a single 0 byte, while non-null values are encoded
as a single 1 byte preceding the encoding used by the nested Coder. In dense
PCollections with few null values, this overhead may be large, and users may
wish to implement their own custom Coders.

----Release Notes----
Adds NullableCoder, which can be used with Coders that do not tolerate input
nulls to encode PCollections of possibly-null values.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=102297218
---
 .../cloud/dataflow/sdk/coders/KvCoder.java    |   8 +-
 .../dataflow/sdk/coders/NullableCoder.java    | 177 ++++++++++++++++++
 .../dataflow/sdk/coders/StandardCoder.java    |   7 +-
 .../sdk/coders/NullableCoderTest.java         | 132 +++++++++++++
 4 files changed, 316 insertions(+), 8 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/NullableCoder.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/NullableCoderTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
index bfd077d1b8023..168b980d883f7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
@@ -39,8 +39,8 @@
  * @param <K> the type of the keys of the KVs being transcoded
  * @param <V> the type of the values of the KVs being transcoded
  */
-@SuppressWarnings("serial")
 public class KvCoder<K, V> extends KvCoderBase<KV<K, V>> {
+  private static final long serialVersionUID = 0;
 
   public static <K, V> KvCoder<K, V> of(Coder<K> keyCoder,
                                         Coder<V> valueCoder) {
@@ -73,10 +73,10 @@ public Coder<V> getValueCoder() {
 
   /////////////////////////////////////////////////////////////////////////////
 
-  Coder<K> keyCoder;
-  Coder<V> valueCoder;
+  private final Coder<K> keyCoder;
+  private final Coder<V> valueCoder;
 
-  KvCoder(Coder<K> keyCoder, Coder<V> valueCoder) {
+  private KvCoder(Coder<K> keyCoder, Coder<V> valueCoder) {
     this.keyCoder = keyCoder;
     this.valueCoder = valueCoder;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/NullableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/NullableCoder.java
new file mode 100644
index 0000000000000..b02a0197cd4f0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/NullableCoder.java
@@ -0,0 +1,177 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.common.base.Optional;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.List;
+
+import javax.annotation.Nullable;
+
+/**
+ * A {@code NullableCoder} encodes nullable values of type {@code T} using a nested
+ * {@code Coder<T>} that does not tolerate {@code null} values. {@code NullableCoder} uses
+ * exactly 1 byte per entry to indicate whether the value is {@code null}, then adds the encoding
+ * of the inner coder for non-null values.
+ *
+ * @param <T> the type of the values being transcoded
+ */
+public class NullableCoder<T> extends StandardCoder<T> {
+  private static final long serialVersionUID = 0;
+
+  public static <T> NullableCoder<T> of(Coder<T> valueCoder) {
+    return new NullableCoder<>(valueCoder);
+  }
+
+  @JsonCreator
+  public static NullableCoder<?> of(
+      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+      List<Coder<?>> components) {
+    Preconditions.checkArgument(components.size() == 1,
+        "Expecting 1 components, got " + components.size());
+    return of(components.get(0));
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private final Coder<T> valueCoder;
+  private static final int ENCODE_NULL = 0;
+  private static final int ENCODE_PRESENT = 1;
+
+  private NullableCoder(Coder<T> valueCoder) {
+    this.valueCoder = valueCoder;
+  }
+
+  @Override
+  public void encode(@Nullable T value, OutputStream outStream, Context context)
+      throws IOException, CoderException  {
+    if (value == null) {
+      outStream.write(ENCODE_NULL);
+    } else {
+      outStream.write(ENCODE_PRESENT);
+      valueCoder.encode(value, outStream, context.nested());
+    }
+  }
+
+  @Override
+  @Nullable
+  public T decode(InputStream inStream, Context context) throws IOException, CoderException {
+    int b = inStream.read();
+    if (b == ENCODE_NULL) {
+      return null;
+    } else if (b != ENCODE_PRESENT) {
+        throw new CoderException(String.format(
+            "NullableCoder expects either a byte valued %s (null) or %s (present), got %s",
+            ENCODE_NULL, ENCODE_PRESENT, b));
+    }
+    return valueCoder.decode(inStream, context.nested());
+  }
+
+  @Override
+  public List<Coder<T>> getCoderArguments() {
+    return ImmutableList.of(valueCoder);
+  }
+
+  /**
+   * {@code NullableCoder} is deterministic if the nested {@code Coder} is.
+   *
+   * {@inheritDoc}
+   */
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    verifyDeterministic("Value coder must be deterministic", valueCoder);
+  }
+
+  /**
+   * {@code NullableCoder} is consistent with equals if the nested {@code Coder} is.
+   *
+   * {@inheritDoc}
+   */
+  @Override
+  public boolean consistentWithEquals() {
+    return valueCoder.consistentWithEquals();
+  }
+
+  @Override
+  public Object structuralValue(@Nullable T value) throws Exception {
+    if (value == null) {
+      return Optional.absent();
+    }
+    return Optional.of(valueCoder.structuralValue(value));
+  }
+
+  /**
+   * Overridden to short-circuit the default {@code StandardCoder} behavior of encoding and
+   * counting the bytes. The size is known (1 byte) when {@code value} is {@code null}, otherwise
+   * the size is 1 byte plus the size of nested {@code Coder}'s encoding of {@code value}.
+   *
+   * {@inheritDoc}
+   */
+  @Override
+  public void registerByteSizeObserver(
+      @Nullable T value, ElementByteSizeObserver observer, Context context) throws Exception {
+    observer.update(1);
+    if (value != null) {
+      valueCoder.registerByteSizeObserver(value, observer, context.nested());
+    }
+  }
+
+  /**
+   * Overridden to short-circuit the default {@code StandardCoder} behavior of encoding and
+   * counting the bytes. The size is known (1 byte) when {@code value} is {@code null}, otherwise
+   * the size is 1 byte plus the size of nested {@code Coder}'s encoding of {@code value}.
+   *
+   * {@inheritDoc}
+   */
+  @Override
+  protected long getEncodedElementByteSize(@Nullable T value, Context context) throws Exception {
+    if (value == null) {
+      return 1;
+    }
+
+    if (valueCoder instanceof StandardCoder) {
+      // If valueCoder is a StandardCoder then we can ask it directly for the encoded size of
+      // the value, adding 1 byte to count the null indicator.
+      return 1  + ((StandardCoder<T>) valueCoder)
+          .getEncodedElementByteSize(value, context.nested());
+    }
+
+    // If value is not a StandardCoder then fall back to the default StandardCoder behavior
+    // of encoding and counting the bytes. The encoding will include the null indicator byte.
+    return super.getEncodedElementByteSize(value, context);
+  }
+
+  /**
+   * {@code NullableCoder} is cheap if {@code valueCoder} is cheap.
+   *
+   * {@inheritDoc}
+   */
+  @Override
+  public boolean isRegisterByteSizeObserverCheap(@Nullable T value, Context context) {
+    return valueCoder.isRegisterByteSizeObserverCheap(value, context.nested());
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
index a4545154c151b..a18186e022af0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
@@ -180,9 +180,9 @@ protected long getEncodedElementByteSize(T value, Context context)
   }
 
   /**
-   * Notifies ElementByteSizeObserver about the byte size of the
-   * encoded value using this coder.  Calls
-   * getEncodedElementByteSize() and notifies ElementByteSizeObserver.
+   * Notifies {@code observer} about the byte size of the encoded value using this coder.
+   * Calls {@link #getEncodedElementByteSize} and notifies {@code observer} using
+   * {@link ElementByteSizeObserver#update(Object)}.
    */
   @Override
   public void registerByteSizeObserver(
@@ -191,7 +191,6 @@ public void registerByteSizeObserver(
     observer.update(getEncodedElementByteSize(value, context));
   }
 
-
   protected void verifyDeterministic(String message, Iterable<Coder<?>> coders)
       throws NonDeterministicException {
     for (Coder<?> coder : coders) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/NullableCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/NullableCoderTest.java
new file mode 100644
index 0000000000000..644930e72121b
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/NullableCoderTest.java
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.common.collect.ImmutableList;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.List;
+
+/** Unit tests for {@link NullableCoder}. */
+@RunWith(JUnit4.class)
+public class NullableCoderTest {
+
+  private static final Coder<String> TEST_CODER = NullableCoder.of(StringUtf8Coder.of());
+
+  private static final List<String> TEST_VALUES = Arrays.<String>asList(
+      "", "a", "13", "hello",
+      null,
+      "a longer string with spaces and all that",
+      "a string with a \n newline",
+      "スタリング");
+
+  @Test
+  public void testDecodeEncodeContentsInSameOrder() throws Exception {
+    for (String value : TEST_VALUES) {
+      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
+    }
+  }
+
+  @Test
+  public void testCoderSerializable() throws Exception {
+    CoderProperties.coderSerializable(TEST_CODER);
+  }
+
+  // If this changes, it implies the binary format has changed.
+  private static final String EXPECTED_ENCODING_ID = "";
+
+  @Test
+  public void testEncodingId() throws Exception {
+    CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
+  }
+
+  /**
+   * Generated data to check that the wire format has not changed. To regenerate, see
+   * {@code PrintBase64Encodings}.
+   *
+   * @see com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings
+   */
+  private static final List<String> TEST_ENCODINGS = Arrays.asList(
+      "AQA",
+      "AQFh",
+      "AQIxMw",
+      "AQVoZWxsbw",
+      "AA",
+      "AShhIGxvbmdlciBzdHJpbmcgd2l0aCBzcGFjZXMgYW5kIGFsbCB0aGF0",
+      "ARlhIHN0cmluZyB3aXRoIGEgCiBuZXdsaW5l",
+      "AQ_jgrnjgr_jg6rjg7PjgrA");
+
+  @Test
+  public void testWireFormatEncode() throws Exception {
+      CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
+  }
+
+  @Test
+  public void testEncodedSize() throws Exception {
+      NullableCoder<Double> coder = NullableCoder.of(DoubleCoder.of());
+      assertEquals(1, coder.getEncodedElementByteSize(null, Coder.Context.OUTER));
+      assertEquals(9, coder.getEncodedElementByteSize(5.0, Coder.Context.OUTER));
+  }
+
+  @Test
+  public void testObserverIsCheap() throws Exception {
+      NullableCoder<Double> coder = NullableCoder.of(DoubleCoder.of());
+      assertTrue(coder.isRegisterByteSizeObserverCheap(null, Coder.Context.OUTER));
+      assertTrue(coder.isRegisterByteSizeObserverCheap(5.0, Coder.Context.OUTER));
+  }
+
+  @Test
+  public void testObserverIsNotCheap() throws Exception {
+      NullableCoder<List<String>> coder = NullableCoder.of(ListCoder.of(StringUtf8Coder.of()));
+      assertFalse(coder.isRegisterByteSizeObserverCheap(null, Coder.Context.OUTER));
+      assertFalse(coder.isRegisterByteSizeObserverCheap(
+          ImmutableList.of("hi", "test"), Coder.Context.OUTER));
+  }
+
+  @Test
+  public void testStructuralValueConsistentWithEquals() throws Exception {
+    CoderProperties.structuralValueConsistentWithEquals(TEST_CODER, null, null);
+  }
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void testDecodingError() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage(equalTo("NullableCoder expects either a byte valued 0 (null) "
+        + "or 1 (present), got 5"));
+
+    InputStream input = new ByteArrayInputStream(new byte[] {5});
+    TEST_CODER.decode(input, Coder.Context.OUTER);
+  }
+
+}

From 6b20b21a21737ffe08bc24f1f231044e1c6569c5 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 3 Sep 2015 18:01:45 -0700
Subject: [PATCH 0987/1541] Improve Coder error messages when encoding
 unsupported null objects

And add tests throughout.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=102300561
---
 .../dataflow/sdk/coders/CollectionCoder.java  |  2 +-
 .../dataflow/sdk/coders/DurationCoder.java    |  3 ++
 .../dataflow/sdk/coders/InstantCoder.java     |  3 ++
 .../dataflow/sdk/coders/IterableCoder.java    |  2 +-
 .../sdk/coders/IterableLikeCoder.java         |  8 +++--
 .../cloud/dataflow/sdk/coders/ListCoder.java  |  2 +-
 .../cloud/dataflow/sdk/coders/MapCoder.java   |  3 ++
 .../dataflow/sdk/coders/Proto2Coder.java      |  3 ++
 .../cloud/dataflow/sdk/coders/SetCoder.java   |  2 +-
 .../sdk/coders/BigEndianIntegerCoderTest.java | 14 ++++++++
 .../sdk/coders/BigEndianLongCoderTest.java    | 14 ++++++++
 .../sdk/coders/ByteArrayCoderTest.java        | 13 ++++++++
 .../sdk/coders/CollectionCoderTest.java       | 14 ++++++++
 .../dataflow/sdk/coders/DoubleCoderTest.java  | 14 ++++++++
 .../sdk/coders/DurationCoderTest.java         | 14 ++++++++
 .../dataflow/sdk/coders/EntityCoderTest.java  | 14 ++++++++
 .../dataflow/sdk/coders/InstantCoderTest.java | 13 ++++++++
 .../sdk/coders/IterableCoderTest.java         | 14 ++++++++
 .../dataflow/sdk/coders/KvCoderTest.java      | 14 ++++++++
 .../dataflow/sdk/coders/ListCoderTest.java    | 32 +++++++++++++++++++
 .../dataflow/sdk/coders/MapCoderTest.java     | 14 ++++++++
 .../dataflow/sdk/coders/Proto2CoderTest.java  | 14 ++++++++
 .../dataflow/sdk/coders/SetCoderTest.java     | 14 ++++++++
 .../sdk/coders/StringUtf8CoderTest.java       | 14 ++++++++
 .../sdk/coders/TextualIntegerCoderTest.java   | 14 ++++++++
 .../dataflow/sdk/coders/VarIntCoderTest.java  | 14 ++++++++
 .../dataflow/sdk/coders/VarLongCoderTest.java | 14 ++++++++
 27 files changed, 290 insertions(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
index 342a44f182e25..ce9817533a714 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
@@ -65,6 +65,6 @@ public static <T> List<Object> getInstanceComponents(
   }
 
   protected CollectionCoder(Coder<T> elemCoder) {
-    super(elemCoder);
+    super(elemCoder, "Collection");
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DurationCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DurationCoder.java
index 568d4be6b94a9..cc6890433bfab 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DurationCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DurationCoder.java
@@ -58,6 +58,9 @@ private ReadableDuration fromLong(Long decoded) {
   @Override
   public void encode(ReadableDuration value, OutputStream outStream, Context context)
       throws CoderException, IOException {
+    if (value == null) {
+      throw new CoderException("cannot encode a null ReadableDuration");
+    }
     longCoder.encode(toLong(value), outStream, context);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
index cb402621f8402..8e823a83e71fd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
@@ -72,6 +72,9 @@ protected Instant doBackward(Long shiftedMillis) {
   @Override
   public void encode(Instant value, OutputStream outStream, Context context)
       throws CoderException, IOException {
+    if (value == null) {
+      throw new CoderException("cannot encode a null Instant");
+    }
     longCoder.encode(ORDER_PRESERVING_CONVERTER.convert(value), outStream, context);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
index b7e7a5432d88a..5a59a6576e28b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
@@ -69,7 +69,7 @@ public static <T> List<Object> getInstanceComponents(
   }
 
   protected IterableCoder(Coder<T> elemCoder) {
-    super(elemCoder);
+    super(elemCoder, "Iterable");
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
index 2da072e7fa740..193d0aa52a00a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
@@ -59,6 +59,7 @@ public Coder<T> getElemCoder() {
   // Internal operations below here.
 
   private final Coder<T> elementCoder;
+  private final String iterableName;
 
   /**
    * Returns the first element in this iterable-like if it is non-empty,
@@ -72,10 +73,13 @@ List<Object> getInstanceComponentsHelper(IterableT exampleValue) {
     return null;
   }
 
-  protected IterableLikeCoder(Coder<T> elementCoder) {
+  protected IterableLikeCoder(Coder<T> elementCoder, String  iterableName) {
     Preconditions.checkArgument(elementCoder != null,
         "element Coder for IterableLikeCoder must not be null");
+    Preconditions.checkArgument(iterableName != null,
+        "iterable name for IterableLikeCoder must not be null");
     this.elementCoder = elementCoder;
+    this.iterableName = iterableName;
   }
 
   @Override
@@ -83,7 +87,7 @@ public void encode(
       IterableT iterable, OutputStream outStream, Context context)
       throws IOException, CoderException  {
     if (iterable == null) {
-      throw new CoderException("cannot encode a null Iterable");
+      throw new CoderException("cannot encode a null " + iterableName);
     }
     Context nestedContext = context.nested();
     DataOutputStream dataOutStream = new DataOutputStream(outStream);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
index 198854e93c0a3..79f0615e7e8ea 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
@@ -63,7 +63,7 @@ public static <T> List<Object> getInstanceComponents(List<T> exampleValue) {
   }
 
   ListCoder(Coder<T> elemCoder) {
-    super(elemCoder);
+    super(elemCoder, "List");
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
index d084c1f119723..8049eb7d3e168 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
@@ -96,6 +96,9 @@ public void encode(
       OutputStream outStream,
       Context context)
       throws IOException, CoderException  {
+    if (map == null) {
+      throw new CoderException("cannot encode a null Map");
+    }
     DataOutputStream dataOutStream = new DataOutputStream(outStream);
     dataOutStream.writeInt(map.size());
     for (Entry<K, V> entry : map.entrySet()) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
index 32e540c98a279..c05719070bd90 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
@@ -170,6 +170,9 @@ public Proto2Coder<T> addExtensionsFrom(Iterable<Class<?>> extensionHosts) {
 
   @Override
   public void encode(T value, OutputStream outStream, Context context) throws IOException {
+    if (value == null) {
+      throw new CoderException("cannot encode a null " + protoMessageClass.getSimpleName());
+    }
     if (context.isWholeStream) {
       value.writeTo(outStream);
     } else {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
index 8e75f8fe2b9cf..abf379b76ea30 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
@@ -87,6 +87,6 @@ protected final Set<T> decodeToIterable(List<T> decodedElements) {
   }
 
   protected SetCoder(Coder<T> elemCoder) {
-    super(elemCoder);
+    super(elemCoder, "Set");
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java
index 7d3a3bfc04c95..d96c20805df72 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java
@@ -17,8 +17,11 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -73,4 +76,15 @@ public void testEncodingId() throws Exception {
   public void testWireFormatEncode() throws Exception {
     CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
   }
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void encodeNullThrowsCoderException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null Integer");
+
+    CoderUtils.encodeToBase64(TEST_CODER, null);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java
index ebc2b00c0f576..ea486c18230f3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java
@@ -17,8 +17,11 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -77,4 +80,15 @@ public void testEncodingId() throws Exception {
   public void testWireFormatEncode() throws Exception {
     CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
   }
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void encodeNullThrowsCoderException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null Long");
+
+    CoderUtils.encodeToBase64(TEST_CODER, null);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
index e80bf5239f147..989bc7f80b140 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
@@ -24,7 +24,9 @@
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.common.CounterTestUtils;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -128,4 +130,15 @@ public void testEncodingId() throws Exception {
   public void testWireFormatEncode() throws Exception {
     CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
   }
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void encodeNullThrowsCoderException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null byte[]");
+
+    CoderUtils.encodeToBase64(TEST_CODER, null);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java
index 6b552da394ad1..ae1d167df15eb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CollectionCoderTest.java
@@ -17,8 +17,11 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -76,4 +79,15 @@ public void testEncodingId() throws Exception {
   public void testWireFormat() throws Exception {
     CoderProperties.coderDecodesBase64ContentsEqual(TEST_CODER, TEST_ENCODINGS, TEST_VALUES);
   }
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void encodeNullThrowsCoderException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null Collection");
+
+    CoderUtils.encodeToBase64(TEST_CODER, null);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DoubleCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DoubleCoderTest.java
index e53830c439bc1..8791eb4d46e97 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DoubleCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DoubleCoderTest.java
@@ -17,8 +17,11 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -79,4 +82,15 @@ public void testEncodingId() throws Exception {
   public void testWireFormatEncode() throws Exception {
     CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
   }
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void encodeNullThrowsCoderException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null Double");
+
+    CoderUtils.encodeToBase64(TEST_CODER, null);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DurationCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DurationCoderTest.java
index 37782cde7384c..3d831b38c5b5c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DurationCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DurationCoderTest.java
@@ -17,11 +17,14 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.common.collect.Lists;
 
 import org.joda.time.Duration;
 import org.joda.time.ReadableDuration;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -69,4 +72,15 @@ public void testBasicEncoding() throws Exception {
   public void testWireFormatEncode() throws Exception {
     CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
   }
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void encodeNullThrowsCoderException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null ReadableDuration");
+
+    CoderUtils.encodeToBase64(TEST_CODER, null);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java
index cd0e2a73b676e..8ced1dc9d0a58 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/EntityCoderTest.java
@@ -22,8 +22,11 @@
 
 import com.google.api.services.datastore.DatastoreV1.Entity;
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -91,4 +94,15 @@ public void testEncodingId() throws Exception {
   public void testWireFormatEncode() throws Exception {
       CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
   }
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void encodeNullThrowsCoderException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null Entity");
+
+    CoderUtils.encodeToBase64(TEST_CODER, null);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java
index f9bf5a2a8e647..454500bf24182 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/InstantCoderTest.java
@@ -23,7 +23,9 @@
 
 import org.joda.time.Instant;
 import org.junit.Assert;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -100,4 +102,15 @@ public void testEncodingId() throws Exception {
   public void testWireFormatEncode() throws Exception {
     CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
   }
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void encodeNullThrowsCoderException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null Instant");
+
+    CoderUtils.encodeToBase64(TEST_CODER, null);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java
index eaec06e163cde..9afddc1b998bb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/IterableCoderTest.java
@@ -20,8 +20,11 @@
 import static org.junit.Assert.assertNull;
 
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -92,4 +95,15 @@ public void testEncodingId() throws Exception {
   public void testWireFormatEncode() throws Exception {
     CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
   }
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void encodeNullThrowsCoderException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null Iterable");
+
+    CoderUtils.encodeToBase64(TEST_CODER, null);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java
index 9d33bbc0d575d..0fd4c1b4d8edd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/KvCoderTest.java
@@ -17,10 +17,13 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.collect.ImmutableMap;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -101,4 +104,15 @@ public void testEncodingId() throws Exception {
   public void testWireFormatEncode() throws Exception {
     CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
   }
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void encodeNullThrowsCoderException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null KV");
+
+    CoderUtils.encodeToBase64(TEST_CODER, null);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java
index e0a31e7be0e24..6993f323e9ce3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ListCoderTest.java
@@ -20,8 +20,11 @@
 import static org.junit.Assert.assertNull;
 
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -99,4 +102,33 @@ public void testEncodingId() throws Exception {
   public void testWireFormatEncode() throws Exception {
       CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
   }
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void encodeNullThrowsCoderException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null List");
+
+    CoderUtils.encodeToBase64(TEST_CODER, null);
+  }
+
+  @Test
+  public void testListWithNullsAndVarIntCoderThrowsException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null Integer");
+
+    List<Integer> list = Arrays.asList(1, 2, 3, null, 4);
+    Coder<List<Integer>> coder = ListCoder.of(VarIntCoder.of());
+    CoderProperties.<List<Integer>>coderDecodeEncodeEqual(coder, list);
+  }
+
+  @Test
+  public void testListWithNullsAndSerializableCoder() throws Exception {
+    List<Integer> list = Arrays.asList(1, 2, 3, null, 4);
+    Coder<List<Integer>> coder = ListCoder.of(SerializableCoder.of(Integer.class));
+    CoderProperties.<List<Integer>>coderDecodeEncodeEqual(coder, list);
+  }
+
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java
index 6be1f3f8cd6d3..c263d2722d765 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/MapCoderTest.java
@@ -20,9 +20,12 @@
 import static org.junit.Assert.assertNull;
 
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.common.collect.ImmutableMap;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -89,4 +92,15 @@ public void testEncodingId() throws Exception {
   public void testWireFormatEncode() throws Exception {
     CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
   }
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void encodeNullThrowsCoderException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null Map");
+
+    CoderUtils.encodeToBase64(TEST_CODER, null);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
index e0143c2dcdf0b..f845e598aa48c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
@@ -20,9 +20,12 @@
 import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageB;
 import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageC;
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.common.collect.ImmutableList;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -107,4 +110,15 @@ public void testEncodingId() throws Exception {
         .withExtensionsFrom(Proto2CoderTestMessages.class);
     CoderProperties.coderHasEncodingId(coder, MessageC.class.getName());
   }
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void encodeNullThrowsCoderException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null MessageA");
+
+    CoderUtils.encodeToBase64(Proto2Coder.of(MessageA.class), null);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java
index 102a2c3b67044..42e560ca902a2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SetCoderTest.java
@@ -17,8 +17,11 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -69,4 +72,15 @@ public void testEncodingId() throws Exception {
   public void testWireFormatEncode() throws Exception {
     CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
   }
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void encodeNullThrowsCoderException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null Set");
+
+    CoderUtils.encodeToBase64(TEST_CODER, null);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringUtf8CoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringUtf8CoderTest.java
index 5c3bb28afef92..7f40fc0bfd6cb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringUtf8CoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StringUtf8CoderTest.java
@@ -17,8 +17,11 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -63,4 +66,15 @@ public void testDecodeEncodeEqual() throws Exception {
   public void testWireFormatEncode() throws Exception {
     CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
   }
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void encodeNullThrowsCoderException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null String");
+
+    CoderUtils.encodeToBase64(TEST_CODER, null);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java
index c61b9c2c0d234..5ccff309d6047 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoderTest.java
@@ -17,8 +17,11 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -73,4 +76,15 @@ public void testEncodingId() throws Exception {
   public void testWireFormatEncode() throws Exception {
     CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
   }
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void encodeNullThrowsCoderException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null Integer");
+
+    CoderUtils.encodeToBase64(TEST_CODER, null);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java
index fb507daa25f7a..cce3280361790 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarIntCoderTest.java
@@ -17,8 +17,11 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -73,5 +76,16 @@ public void testEncodingId() throws Exception {
   public void testWireFormatEncode() throws Exception {
     CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
   }
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void encodeNullThrowsCoderException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null Integer");
+
+    CoderUtils.encodeToBase64(TEST_CODER, null);
+  }
 }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java
index 8acd76f149d21..a371af3dfe4d1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/VarLongCoderTest.java
@@ -17,8 +17,11 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -77,4 +80,15 @@ public void testEncodingId() throws Exception {
   public void testWireFormatEncode() throws Exception {
     CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
   }
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void encodeNullThrowsCoderException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null Long");
+
+    CoderUtils.encodeToBase64(TEST_CODER, null);
+  }
 }

From c0483cc2619a03f9ad3c8c796ee964ef2b5319d6 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Fri, 4 Sep 2015 16:01:48 -0700
Subject: [PATCH 0988/1541] Fix dangling threads in BigQueryTableInserter

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=102383181
---
 .../cloud/dataflow/sdk/util/BigQueryTableInserter.java      | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
index 7d814ab4ff0a5..f1eb6e2cc9217 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
@@ -29,6 +29,7 @@
 import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.WriteDisposition;
 import com.google.cloud.hadoop.util.ApiErrorExtractor;
 import com.google.common.base.Throwables;
+import com.google.common.util.concurrent.MoreExecutors;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -41,6 +42,8 @@
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
 
 import javax.annotation.Nullable;
 
@@ -66,7 +69,8 @@ public class BigQueryTableInserter {
   private final TableReference ref;
   private final long maxRowsPerBatch;
 
-  private static final ExecutorService executor = Executors.newFixedThreadPool(100);
+  private static final ExecutorService executor = MoreExecutors.getExitingExecutorService(
+      (ThreadPoolExecutor) Executors.newFixedThreadPool(100), 10, TimeUnit.SECONDS);
 
   /**
    * Constructs a new row inserter.

From 807623e9a57be4cfe05cb66fc1b88b5a325c2fa9 Mon Sep 17 00:00:00 2001
From: Davor Bonaci <davorbonaci@users.noreply.github.com>
Date: Tue, 8 Sep 2015 13:52:16 -0700
Subject: [PATCH 0989/1541] Fix Travis build break

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 8b434650ee2de..2f06887e5daf7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -40,5 +40,5 @@ install:
 
 script:
   - travis_retry mvn versions:set -DnewVersion=manual_build
-  - travis_retry mvn verify -U
+  - travis_retry mvn install -U
   - travis_retry travis/test_wordcount.sh

From 117bb22fab30fce60072c2980c0ba57d83369511 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 8 Sep 2015 16:02:13 -0700
Subject: [PATCH 0990/1541] Provide more explicit error message when Coder
 inference fails

Before:

java.lang.IllegalStateException: Unable to infer a default Coder for Partition/ParMultiDo(Partition).out2 [PCollection]; either correct the root cause below or use setCoder() to specify one explicitly.
	at com.google.cloud.dataflow.sdk.values.TypedPValue.getCoder(TypedPValue.java:48)
	at com.google.cloud.dataflow.sdk.values.PCollection.getCoder(PCollection.java:137)
	at com.google.cloud.dataflow.sdk.testing.DataflowAssert.that(DataflowAssert.java:113)

<snip>

Caused by: com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException: default coder not provided
	at com.google.cloud.dataflow.sdk.transforms.ParDo$BoundMulti.getDefaultOutputCoder(ParDo.java:978)
	at com.google.cloud.dataflow.sdk.transforms.ParDo$BoundMulti.getDefaultOutputCoder(ParDo.java:1)
	at com.google.cloud.dataflow.sdk.values.TypedPValue.inferCoderOrFail(TypedPValue.java:156)
	at com.google.cloud.dataflow.sdk.values.TypedPValue.getCoder(TypedPValue.java:46)
	... 28 more

After:

java.lang.IllegalStateException: Unable to return a default Coder for Partition/ParMultiDo(Partition).out1 [PCollection]. Correct one of the following root causes:
  No Coder has been manually specified;  you may do so using .setCoder().
  Inferring a Coder from the CoderRegistry failed: Cannot provide a Coder for type variable V because the actual type is unknown due to erasure. If this error occurs for a side output of the producing ParDo, verify that the TupleTag for this output is constructed with proper type information (see TupleTag Javadoc) or explicitly set the Coder to use if this is not possible.
  Using the Coder from the input PTransform failed: Cannot provide a Coder for type variable V because the actual type is unknown due to erasure.
	at com.google.cloud.dataflow.sdk.values.TypedPValue.inferCoderOrFail(TypedPValue.java:187)
	at com.google.cloud.dataflow.sdk.values.TypedPValue.getCoder(TypedPValue.java:46)
	at com.google.cloud.dataflow.sdk.values.PCollection.getCoder(PCollection.java:137)
	at com.google.cloud.dataflow.sdk.testing.DataflowAssert.that(DataflowAssert.java:113)
	at com.google.cloud.dataflow.sdk.transforms.PartitionTest.testModPartition(PartitionTest.java:85)
<snip junit part of stack trace>

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=102603658
---
 .../coders/CannotProvideCoderException.java   |  31 ++++
 .../dataflow/sdk/coders/CoderRegistry.java    |  18 +--
 .../cloud/dataflow/sdk/transforms/ParDo.java  |   7 +-
 .../dataflow/sdk/values/TypedPValue.java      |  72 +++++++--
 .../dataflow/sdk/transforms/FlattenTest.java  |  38 +----
 .../dataflow/sdk/transforms/ParDoTest.java    |   2 +-
 .../dataflow/sdk/values/TypedPValueTest.java  | 152 ++++++++++++++++++
 7 files changed, 254 insertions(+), 66 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypedPValueTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CannotProvideCoderException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CannotProvideCoderException.java
index 891dea552aca4..db5318f1ba11d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CannotProvideCoderException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CannotProvideCoderException.java
@@ -22,17 +22,40 @@
  */
 public class CannotProvideCoderException extends Exception {
   private static final long serialVersionUID = 0;
+  private final ReasonCode reason;
 
   public CannotProvideCoderException(String message) {
+    this(message, ReasonCode.UNKNOWN);
+  }
+
+  public CannotProvideCoderException(String message, ReasonCode reason) {
     super(message);
+    this.reason = reason;
   }
 
   public CannotProvideCoderException(String message, Throwable cause) {
+    this(message, cause, ReasonCode.UNKNOWN);
+  }
+
+  public CannotProvideCoderException(String message, Throwable cause, ReasonCode reason) {
     super(message, cause);
+    this.reason = reason;
   }
 
   public CannotProvideCoderException(Throwable cause) {
+    this(cause, ReasonCode.UNKNOWN);
+  }
+
+  public CannotProvideCoderException(Throwable cause, ReasonCode reason) {
     super(cause);
+    this.reason = reason;
+  }
+
+  /**
+   * @return the reason that Coder inference failed.
+   */
+  public ReasonCode getReason() {
+    return reason;
   }
 
   /**
@@ -53,4 +76,12 @@ public Throwable getRootCause() {
       return ((CannotProvideCoderException) cause).getRootCause();
     }
   }
+
+  /**
+   * Indicates the reason that {@link Coder} inference failed.
+   */
+  public static enum ReasonCode {
+    UNKNOWN,
+    TYPE_ERASURE
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 6a1bbee097084..1e4e5e1c8ba80 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException.ReasonCode;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
@@ -169,17 +170,17 @@ public <T> Coder<T> getCoder(TypeDescriptor<T> typeDescriptor)
 
   /**
    * Returns the Coder to use by default for values of the given type,
-   * where the given context type uses the given context coder.
+   * where the given input type uses the given Coder.
    *
    * @throws CannotProvideCoderException if there is no default Coder.
    */
   public <InputT, OutputT> Coder<OutputT> getDefaultCoder(
       TypeDescriptor<OutputT> typeDescriptor,
-      TypeDescriptor<InputT> contextTypeDescriptor,
-      Coder<InputT> contextCoder)
+      TypeDescriptor<InputT> inputTypeDescriptor,
+      Coder<InputT> inputCoder)
       throws CannotProvideCoderException {
     return getDefaultCoder(
-        typeDescriptor, getTypeToCoderBindings(contextTypeDescriptor.getType(), contextCoder));
+        typeDescriptor, getTypeToCoderBindings(inputTypeDescriptor.getType(), inputCoder));
   }
 
   /**
@@ -589,12 +590,11 @@ Coder<?> getDefaultCoder(Type type, Map<Type, Coder<?>> typeCoderBindings)
       return getDefaultCoder(clazz);
     } else if (type instanceof ParameterizedType) {
       return getDefaultCoder((ParameterizedType) type, typeCoderBindings);
-    } else if (type instanceof TypeVariable
-        || type instanceof WildcardType) {
+    } else if (type instanceof TypeVariable || type instanceof WildcardType) {
       // No default coder for an unknown generic type.
-      throw new CannotProvideCoderException(
-          "Cannot provide a Coder for type variable "
-          + type + " because the actual type is unknown due to erasure.");
+      throw new CannotProvideCoderException("Cannot provide a Coder for type variable "
+          + type + " because the actual type is unknown due to erasure.",
+          ReasonCode.TYPE_ERASURE);
     } else {
       throw new RuntimeException(
           "Internal error: unexpected kind of Type: " + type);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 98a03358748e8..f31fff2c77f52 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -971,8 +971,11 @@ protected Coder<OutputT> getDefaultOutputCoder() {
     public <T> Coder<T> getDefaultOutputCoder(
         PCollection<? extends InputT> input, TypedPValue<T> output)
         throws CannotProvideCoderException {
-      throw new CannotProvideCoderException("default coder not provided");
-    }
+      return input.getPipeline().getCoderRegistry().getDefaultCoder(
+          output.getTypeDescriptor(),
+          fn.getInputTypeDescriptor(),
+          ((PCollection<InputT>) input).getCoder());
+      }
 
     @Override
     protected String getKindString() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
index 7fbf4be362fee..128a78303532d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
@@ -18,10 +18,12 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException.ReasonCode;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
 
 /**
  * A {@code TypedPValue<T>} is the abstract base class of things that
@@ -42,15 +44,7 @@ public abstract class TypedPValue<T> extends PValueBase implements PValue {
    */
   public Coder<T> getCoder() {
     if (coder == null) {
-      try {
         coder = inferCoderOrFail();
-      } catch (CannotProvideCoderException exc) {
-        throw new IllegalStateException(
-            "Unable to infer a default Coder for " + this
-            + "; either correct the root cause below "
-            + "or use setCoder() to specify one explicitly. ",
-            exc);
-      }
     }
     return coder;
   }
@@ -132,24 +126,68 @@ public TypedPValue<T> setTypeDescriptorInternal(TypeDescriptor<T> typeDescriptor
    * but can and should be improved by subclasses.
    */
   @SuppressWarnings({"unchecked", "rawtypes"})
-  private Coder<T> inferCoderOrFail() throws CannotProvideCoderException {
+  private Coder<T> inferCoderOrFail() {
+    // First option for a coder: use the Coder set on this PValue.
     if (coder != null) {
       return coder;
     }
 
-    TypeDescriptor<T> token = getTypeDescriptor();
+    AppliedPTransform<?, ?, ?> application = getProducingTransformInternal();
+
+    // Second option for a coder: Look in the coder registry.
     CoderRegistry registry = getPipeline().getCoderRegistry();
+    TypeDescriptor<T> token = getTypeDescriptor();
+    CannotProvideCoderException inferFromTokenException = null;
+    if (token != null) {
+      try {
+          return registry.getDefaultCoder(token);
+      } catch (CannotProvideCoderException exc) {
+        inferFromTokenException = exc;
+        // Attempt to detect when the token came from a TupleTag used for a ParDo side output,
+        // and provide a better error message if so. Unfortunately, this information is not
+        // directly available from the TypeDescriptor, so infer based on the type of the PTransform
+        // and the error message itself.
+        if (application.getTransform() instanceof ParDo.BoundMulti
+            && exc.getReason() == ReasonCode.TYPE_ERASURE) {
+          inferFromTokenException = new CannotProvideCoderException(exc.getMessage()
+              + " If this error occurs for a side output of the producing ParDo, verify that the "
+              + "TupleTag for this output is constructed with proper type information (see "
+              + "TupleTag Javadoc) or explicitly set the Coder to use if this is not possible.");
+        }
+      }
+    }
 
+    // Third option for a coder: use the default Coder from the producing PTransform.
+    CannotProvideCoderException inputCoderException;
     try {
-      if (token != null) {
-        return registry.getDefaultCoder(token);
-      }
+      return ((PTransform) application.getTransform()).getDefaultOutputCoder(
+          application.getInput(), this);
     } catch (CannotProvideCoderException exc) {
-        // try the next thing
+      inputCoderException = exc;
     }
 
-    AppliedPTransform<?, ?, ?> application = getProducingTransformInternal();
-    return ((PTransform) application.getTransform()).getDefaultOutputCoder(
-        application.getInput(), this);
+    // Build up the error message and list of causes.
+    StringBuilder messageBuilder = new StringBuilder()
+        .append("Unable to return a default Coder for ").append(this)
+        .append(". Correct one of the following root causes:");
+
+    // No exception, but give the user a message about .setCoder() has not been called.
+    messageBuilder.append("\n  No Coder has been manually specified; ")
+        .append(" you may do so using .setCoder().");
+
+    if (inferFromTokenException != null) {
+      messageBuilder
+          .append("\n  Inferring a Coder from the CoderRegistry failed: ")
+          .append(inferFromTokenException.getMessage());
+    }
+
+    if (inputCoderException != null) {
+      messageBuilder
+          .append("\n  Using the Coder from the input PTransform failed: ")
+          .append(inputCoderException.getMessage());
+    }
+
+    // Build and throw the exception.
+    throw new IllegalStateException(messageBuilder.toString());
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
index 0f6a758687ab9..dc43a4330ebe3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
@@ -21,11 +21,8 @@
 import static com.google.cloud.dataflow.sdk.TestUtils.LINES_ARRAY;
 import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES;
 import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES_ARRAY;
-import static org.hamcrest.Matchers.isA;
-import static org.hamcrest.core.StringContains.containsString;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
@@ -40,9 +37,6 @@
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 
-import org.hamcrest.BaseMatcher;
-import org.hamcrest.Description;
-import org.hamcrest.Matcher;
 import org.joda.time.Duration;
 import org.junit.Assert;
 import org.junit.Rule;
@@ -158,41 +152,11 @@ public void testFlattenPCollectionListEmptyThenParDo() {
     p.run();
   }
 
-  private static class HasMessageMatcher extends BaseMatcher<Throwable>
-      implements Matcher<Throwable>{
-
-    private final Matcher<String> messageMatcher;
-
-    public HasMessageMatcher(Matcher<String> messageMatcher) {
-      this.messageMatcher = messageMatcher;
-    }
-
-    @Override
-    public boolean matches(Object item) {
-      return (item instanceof Throwable) && matches((Throwable) item);
-    }
-
-    public boolean matches(Throwable item) {
-      return messageMatcher.matches(item.getMessage());
-    }
-
-    @Override
-    public void describeTo(Description description) {
-      description.appendText("where getMessage() is ");
-      description.appendDescriptionOf(messageMatcher);
-    }
-  }
-
-  private static Matcher<Throwable> hasMessage(Matcher<String> messageMatcher) {
-    return new HasMessageMatcher(messageMatcher);
-  }
-
   @Test
   public void testFlattenNoListsNoCoder() {
     // not RunnableOnService because it should fail at pipeline construction time anyhow.
     thrown.expect(IllegalStateException.class);
-    thrown.expectCause(isA(CannotProvideCoderException.class));
-    thrown.expectCause(hasMessage(containsString("cannot provide a Coder for empty")));
+    thrown.expectMessage("cannot provide a Coder for empty");
 
     Pipeline p = TestPipeline.create();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index f27dc2c78bbfa..e143f320eacd1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -908,7 +908,7 @@ public void testSideOutputUnknownCoder() throws Exception {
         .withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag)));
 
     thrown.expect(PipelineExecutionException.class);
-    thrown.expectMessage("Unable to infer a default Coder");
+    thrown.expectMessage("Unable to return a default Coder");
     pipeline.run();
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypedPValueTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypedPValueTest.java
new file mode 100644
index 0000000000000..be451e59dc27a
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypedPValueTest.java
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.values;
+
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.instanceOf;
+import static org.hamcrest.Matchers.not;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+
+/**
+ * Tests for {@link TypedPValue}, primarily focusing on Coder inference.
+ */
+@RunWith(JUnit4.class)
+@SuppressWarnings("serial") // suppressed because it wants them inside the anonymous TupleTags
+public class TypedPValueTest {
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  private static class IdentityDoFn extends DoFn<Integer, Integer> {
+    private static final long serialVersionUID = 0;
+    @Override
+    public void processElement(ProcessContext c) throws Exception {
+      c.output(c.element());
+    }
+  }
+
+  private static PCollectionTuple buildPCollectionTupleWithTags(
+      TupleTag<Integer> mainOutputTag, TupleTag<Integer> sideOutputTag) {
+    Pipeline p = TestPipeline.create();
+    PCollection<Integer> input = p.apply(Create.of(1, 2, 3));
+    PCollectionTuple tuple = input.apply(
+        ParDo
+        .withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag))
+        .of(new IdentityDoFn()));
+    return tuple;
+  }
+
+  private static <T> TupleTag<T> makeTagStatically() {
+    return new TupleTag<T>() {};
+  }
+
+  @Test
+  public void testUntypedSideOutputTupleTagGivesActionableMessage() {
+    TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {};
+    // untypedSideOutputTag did not use anonymous subclass.
+    TupleTag<Integer> untypedSideOutputTag = new TupleTag<Integer>();
+    PCollectionTuple tuple = buildPCollectionTupleWithTags(mainOutputTag, untypedSideOutputTag);
+
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("No Coder has been manually specified");
+    thrown.expectMessage("erasure");
+    thrown.expectMessage("see TupleTag Javadoc");
+
+    tuple.get(untypedSideOutputTag).getCoder();
+  }
+
+  @Test
+  public void testStaticFactorySideOutputTupleTagGivesActionableMessage() {
+    TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {};
+    // untypedSideOutputTag constructed from a static factory method.
+    TupleTag<Integer> untypedSideOutputTag = makeTagStatically();
+    PCollectionTuple tuple = buildPCollectionTupleWithTags(mainOutputTag, untypedSideOutputTag);
+
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("No Coder has been manually specified");
+    thrown.expectMessage("erasure");
+    thrown.expectMessage("see TupleTag Javadoc");
+
+    tuple.get(untypedSideOutputTag).getCoder();
+  }
+
+  @Test
+  public void testTypedSideOutputTupleTag() {
+    TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {};
+    // typedSideOutputTag was constructed with compile-time type information.
+    TupleTag<Integer> typedSideOutputTag = new TupleTag<Integer>() {};
+    PCollectionTuple tuple = buildPCollectionTupleWithTags(mainOutputTag, typedSideOutputTag);
+
+    assertThat(tuple.get(typedSideOutputTag).getCoder(), instanceOf(VarIntCoder.class));
+  }
+
+  @Test
+  public void testUntypedMainOutputTagTypedSideOutputTupleTag() {
+    // mainOutputTag is allowed to be untyped because Coder can be inferred other ways.
+    TupleTag<Integer> mainOutputTag = new TupleTag<>();
+    TupleTag<Integer> typedSideOutputTag = new TupleTag<Integer>() {};
+    PCollectionTuple tuple = buildPCollectionTupleWithTags(mainOutputTag, typedSideOutputTag);
+
+    assertThat(tuple.get(typedSideOutputTag).getCoder(), instanceOf(VarIntCoder.class));
+  }
+
+  // A simple class for which there should be no obvious Coder.
+  static class EmptyClass {
+  }
+
+  private static class EmptyClassDoFn extends DoFn<Integer, EmptyClass> {
+    private static final long serialVersionUID = 0;
+    @Override
+    public void processElement(ProcessContext c) throws Exception {
+      c.output(new EmptyClass());
+    }
+  }
+
+  @Test
+  public void testParDoWithNoSideOutputsErrorDoesNotMentionTupleTag() {
+    Pipeline p = TestPipeline.create();
+    PCollection<EmptyClass> input = p
+        .apply(Create.of(1, 2, 3))
+        .apply(ParDo.of(new EmptyClassDoFn()));
+
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("No Coder has been manually specified");
+    // Output specific to ParDo TupleTag side outputs should not be present.
+    thrown.expectMessage(not(containsString("erasure")));
+    thrown.expect(not(containsString("see TupleTag Javadoc")));
+    // Instead, expect output suggesting other possible fixes.
+    thrown.expectMessage("has no DefaultCoder annotation");
+    thrown.expectMessage("has no CoderFactory registered");
+    thrown.expectMessage("could not automatically create a Coder");
+
+    input.getCoder();
+  }
+}
+

From 9d65753f8849d6c9ff288138ff3664ad1cdb254e Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 9 Sep 2015 09:25:26 -0700
Subject: [PATCH 0991/1541] Additional trigger tests

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=102665511
---
 .../sdk/util/TriggerExecutorTest.java         | 110 ++++++++++++++++++
 1 file changed, 110 insertions(+)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index 30ba0b4163989..395dc77ae3cd1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -32,6 +32,7 @@
 import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterEach;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterFirst;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterProcessingTime;
@@ -608,6 +609,115 @@ public T answer(InvocationOnMock invocationOnMock) throws Throwable {
     }
   }
 
+  /**
+   * Test that we receive an empty on-time pane when an or-finally waiting for the watermark fires.
+   * Specifically, verify the proper triggerings and pane-info of a typical speculative/on-time/late
+   * when the on-time pane is empty.
+   */
+  @Test
+  public void testEmptyOnTimeFromOrFinally() throws Exception {
+    TriggerTester<Integer, Integer, IntervalWindow> tester = TriggerTester.combining(
+        FixedWindows.of(Duration.millis(10)),
+        AfterEach.<IntervalWindow>inOrder(
+            Repeatedly.<IntervalWindow>forever(
+                AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
+                    .plusDelayOf(new Duration(5)))
+            .orFinally(AfterWatermark.<IntervalWindow>pastEndOfWindow()),
+            Repeatedly.<IntervalWindow>forever(
+                AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
+                    .plusDelayOf(new Duration(25)))),
+        AccumulationMode.ACCUMULATING_FIRED_PANES,
+        new Sum.SumIntegerFn().<String>asKeyedFn(), VarIntCoder.of(),
+        Duration.millis(100));
+
+    tester.advanceWatermark(new Instant(0));
+    tester.advanceProcessingTime(new Instant(0));
+
+    tester.injectElement(1, new Instant(1));
+    tester.injectElement(1, new Instant(3));
+    tester.injectElement(1, new Instant(7));
+    tester.injectElement(1, new Instant(5));
+
+    tester.advanceProcessingTime(new Instant(5));
+
+    tester.advanceWatermark(new Instant(11));
+    List<WindowedValue<Integer>> output = tester.extractOutput();
+    assertEquals(2, output.size());
+
+    assertThat(output.get(0), WindowMatchers.isSingleWindowedValue(4, 1, 0, 10));
+    assertThat(output.get(1), WindowMatchers.isSingleWindowedValue(4, 9, 0, 10));
+
+    assertThat(output.get(0), WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(
+        true, false, Timing.EARLY, 0, -1)));
+    assertThat(output.get(1), WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(
+        false, false, Timing.ON_TIME, 1, 0)));
+  }
+
+  /**
+   * Tests for processing time firings after the watermark passes the end of the window.
+   * Specifically, verify the proper triggerings and pane-info of a typical speculative/on-time/late
+   * when the on-time pane is non-empty.
+   */
+  @Test
+  public void testProcessingTime() throws Exception {
+    TriggerTester<Integer, Integer, IntervalWindow> tester = TriggerTester.combining(
+        FixedWindows.of(Duration.millis(10)),
+        AfterEach.<IntervalWindow>inOrder(
+            Repeatedly.<IntervalWindow>forever(
+                AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
+                    .plusDelayOf(new Duration(5)))
+            .orFinally(AfterWatermark.<IntervalWindow>pastEndOfWindow()),
+            Repeatedly.<IntervalWindow>forever(
+                AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
+                    .plusDelayOf(new Duration(25)))),
+        AccumulationMode.ACCUMULATING_FIRED_PANES,
+        new Sum.SumIntegerFn().<String>asKeyedFn(), VarIntCoder.of(),
+        Duration.millis(100));
+
+    tester.advanceWatermark(new Instant(0));
+    tester.advanceProcessingTime(new Instant(0));
+
+    tester.injectElement(1, new Instant(1));
+    tester.injectElement(1, new Instant(3));
+    tester.injectElement(1, new Instant(7));
+    tester.injectElement(1, new Instant(5));
+
+    tester.advanceProcessingTime(new Instant(5));
+    tester.injectElement(1, new Instant(8));
+    tester.injectElement(1, new Instant(4));
+
+    tester.advanceWatermark(new Instant(11));
+    tester.injectElement(1, new Instant(8));
+    tester.injectElement(1, new Instant(4));
+    tester.injectElement(1, new Instant(5));
+
+    tester.advanceWatermark(new Instant(12));
+    tester.injectElement(1, new Instant(3));
+    tester.advanceProcessingTime(new Instant(15));
+    tester.injectElement(1, new Instant(5));
+    tester.advanceProcessingTime(new Instant(30));
+
+    tester.injectElement(1, new Instant(3));
+    tester.advanceWatermark(new Instant(125));
+
+    List<WindowedValue<Integer>> output = tester.extractOutput();
+    assertEquals(4, output.size());
+
+    assertThat(output.get(0), WindowMatchers.isSingleWindowedValue(4, 1, 0, 10));
+    assertThat(output.get(1), WindowMatchers.isSingleWindowedValue(6, 4, 0, 10));
+    assertThat(output.get(2), WindowMatchers.isSingleWindowedValue(11, 9, 0, 10));
+    assertThat(output.get(3), WindowMatchers.isSingleWindowedValue(12, 9, 0, 10));
+
+    assertThat(output.get(0), WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(
+        true, false, Timing.EARLY, 0, -1)));
+    assertThat(output.get(1), WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(
+        false, false, Timing.ON_TIME, 1, 0)));
+    assertThat(output.get(2), WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(
+        false, false, Timing.LATE, 2, 1)));
+    assertThat(output.get(3), WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(
+        false, true, Timing.LATE, 3, 2)));
+  }
+
   @Test
   public void testMultipleTimerTypes() throws Exception {
     Trigger<IntervalWindow> trigger = spy(Repeatedly.forever(

From 9f3e15bc0feb469a65f13a71925bde502d476f5d Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Wed, 9 Sep 2015 09:30:38 -0700
Subject: [PATCH 0992/1541] Don't include time spent fetching side inputs as
 user time

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=102665921
---
 .../sdk/runners/worker/StateFetcher.java      | 20 ++++++----
 .../worker/StreamingModeExecutionContext.java | 38 ++++++++++++++-----
 .../worker/WindmillStateInternals.java        | 19 ++--------
 .../runners/worker/WindmillStateReader.java   |  2 +-
 .../sdk/runners/worker/StateFetcherTest.java  | 35 +++++++++--------
 .../worker/WindmillStateInternalsTest.java    | 13 +++++--
 6 files changed, 75 insertions(+), 52 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcher.java
index 1acd38c8933b9..b096193436201 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcher.java
@@ -21,8 +21,10 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Supplier;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 import com.google.common.cache.Weigher;
@@ -39,7 +41,7 @@
 /**
  * Class responsible for fetching state from the windmill server.
  */
-public class StateFetcher {
+class StateFetcher {
   private static final Logger LOG = LoggerFactory.getLogger(StateFetcher.class);
 
   private Cache<SideInputId, SideInputCacheEntry> sideInputCache;
@@ -91,7 +93,8 @@ public enum SideInputState {
    * not-ready entry was cached.
    */
   public <T, SideWindowT extends BoundedWindow> T fetchSideInput(final PCollectionView<T> view,
-      final SideWindowT sideWindow, final String stateFamily, SideInputState state) {
+      final SideWindowT sideWindow, final String stateFamily, SideInputState state,
+      final Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
     final SideInputId id = new SideInputId(view.getTagInternal(), sideWindow);
 
     Callable<SideInputCacheEntry> fetchCallable = new Callable<SideInputCacheEntry>() {
@@ -121,11 +124,14 @@ public SideInputCacheEntry call() throws Exception {
                          .getMillis()))
                 .build();
 
-        Windmill.GetDataResponse response = server.getSideInputData(
-            Windmill.GetDataRequest.newBuilder()
-            .addGlobalDataFetchRequests(request)
-            .addGlobalDataToFetch(request.getDataId())
-            .build());
+        Windmill.GetDataResponse response;
+        try (StateSampler.ScopedState scope = scopedReadStateSupplier.get()) {
+            response = server.getSideInputData(
+                Windmill.GetDataRequest.newBuilder()
+                .addGlobalDataFetchRequests(request)
+                .addGlobalDataToFetch(request.getDataId())
+                .build());
+        }
 
         Windmill.GlobalData data = response.getGlobalData(0);
         bytesRead += data.getSerializedSize();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
index cb65f4e30ee5f..d28714f7ae27b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
@@ -34,6 +34,7 @@
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Preconditions;
+import com.google.common.base.Supplier;
 import com.google.common.collect.ImmutableSet;
 import com.google.protobuf.ByteString;
 
@@ -120,7 +121,8 @@ public SideInputReader getSideInputReaderForViews(Iterable<? extends PCollection
    * items until the active work item is finished.
    */
   private <T> T fetchSideInput(PCollectionView<T> view, BoundedWindow sideInputWindow,
-      String stateFamily, SideInputState state) {
+      String stateFamily, SideInputState state,
+      Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
     Map<BoundedWindow, Object> tagCache = sideInputCache.get(view.getTagInternal());
     if (tagCache == null) {
       tagCache = new HashMap<>();
@@ -135,7 +137,8 @@ private <T> T fetchSideInput(PCollectionView<T> view, BoundedWindow sideInputWin
             "Expected side input to be cached. Tag: "
             + view.getTagInternal().getId());
       }
-      T typed = stateFetcher.fetchSideInput(view, sideInputWindow, stateFamily, state);
+      T typed = stateFetcher.fetchSideInput(
+          view, sideInputWindow, stateFamily, state, scopedReadStateSupplier);
       sideInput = typed;
       if (sideInput != null) {
         tagCache.put(sideInputWindow, sideInput);
@@ -334,9 +337,10 @@ class StepContext extends BaseExecutionContext.StepContext {
     private WindmillTimerInternals timerInternals;
     private final String prefix;
     private final String stateFamily;
-    private final StateSampler stateSampler;
+    private final Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
 
-    public StepContext(String stepName, String transformName, StateSampler stateSampler) {
+    public StepContext(
+        final String stepName, String transformName, final StateSampler stateSampler) {
       super(StreamingModeExecutionContext.this, stepName, transformName);
 
       if (stateNameMap.isEmpty()) {
@@ -347,7 +351,20 @@ public StepContext(String stepName, String transformName, StateSampler stateSamp
         this.prefix = mappedName == null ? "" : mappedName;
         this.stateFamily = prefix;
       }
-      this.stateSampler = stateSampler;
+      this.scopedReadStateSupplier = new Supplier<StateSampler.ScopedState>() {
+        private int readState = -1;  // Uninitialized value.
+
+        @Override
+        public StateSampler.ScopedState get() {
+          if (stateSampler == null) {
+            return null;
+          }
+          if (readState == -1) {
+            readState = stateSampler.stateForName(stepName + "-windmill-read");
+          }
+          return stateSampler.scopedState(readState);
+        }
+      };
     }
 
     /**
@@ -357,7 +374,7 @@ public void start(WindmillStateReader stateReader, Instant inputDataWatermark) {
       boolean useStateFamilies = !stateNameMap.isEmpty();
       this.stateInternals =
           new WindmillStateInternals(
-              prefix, useStateFamilies, stateReader, stateSampler, getStepName());
+              prefix, useStateFamilies, stateReader, scopedReadStateSupplier);
       this.timerInternals = new WindmillTimerInternals(
           stateFamily, Preconditions.checkNotNull(inputDataWatermark));
     }
@@ -403,7 +420,8 @@ public boolean issueSideInputFetch(
         PCollectionView<?> view, BoundedWindow mainInputWindow, SideInputState state) {
       BoundedWindow sideInputWindow =
           view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(mainInputWindow);
-      return fetchSideInput(view, sideInputWindow, stateFamily, state) != null;
+      return fetchSideInput(view, sideInputWindow, stateFamily, state, scopedReadStateSupplier)
+          != null;
     }
 
     /**
@@ -463,8 +481,10 @@ public <T> T get(PCollectionView<T> view, BoundedWindow window) {
         throw new RuntimeException("get() called with unknown view");
       }
 
-      return context.fetchSideInput(
-          view, window, null /* unused stateFamily */, SideInputState.CACHED_IN_WORKITEM);
+      // We are only fetching the cached value here, so we don't need stateFamily or
+      // readStateSupplier.
+      return context.fetchSideInput(view, window, null /* unused stateFamily */,
+          SideInputState.CACHED_IN_WORKITEM, null /* unused readStateSupplier */);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
index 4e1142b877008..74977d6159ec4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
@@ -50,7 +50,7 @@
 /**
  * Implementation of {@link StateInternals} using Windmill to manage the underlying data.
  */
-public class WindmillStateInternals extends MergingStateInternals {
+class WindmillStateInternals extends MergingStateInternals {
 
   private final StateTable inMemoryState =
       new StateTable() {
@@ -96,7 +96,7 @@ public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> cod
   private final Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
 
   public WindmillStateInternals(String prefix, boolean useStateFamilies,
-      WindmillStateReader reader, final StateSampler stateSampler, final String stepName) {
+      WindmillStateReader reader, Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
     this.prefix = prefix;
     if (useStateFamilies) {
       this.stateFamily = prefix;
@@ -105,20 +105,7 @@ public WindmillStateInternals(String prefix, boolean useStateFamilies,
     }
     this.reader = reader;
     this.useStateFamilies = useStateFamilies;
-    this.scopedReadStateSupplier = new Supplier<StateSampler.ScopedState>() {
-      private int readState = -1;
-
-      @Override
-      public StateSampler.ScopedState get() {
-        if (stateSampler == null) {
-          return null;
-        }
-        if (readState == -1) {
-          readState = stateSampler.stateForName(stepName + "-windmill-read");
-        }
-        return stateSampler.scopedState(readState);
-      }
-    };
+    this.scopedReadStateSupplier = scopedReadStateSupplier;
   }
 
   public void persist(final Windmill.WorkItemCommitRequest.Builder commitBuilder) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
index 42fcbb71d196e..c232a1a093674 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
@@ -49,7 +49,7 @@
  * has been read. Will not initiate a read until {@link Future#get} is called, at which point all
  * the pending futures will be read.
  */
-public class WindmillStateReader {
+class WindmillStateReader {
 
   private static class StateTag {
     private enum Kind {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcherTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcherTest.java
index b2470238e8dd3..8fe7bc2cd4289 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcherTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcherTest.java
@@ -37,7 +37,9 @@
 import com.google.cloud.dataflow.sdk.transforms.View;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.common.base.Supplier;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 import com.google.protobuf.ByteString;
@@ -62,6 +64,9 @@ public class StateFetcherTest {
   @Mock
   MetricTrackingWindmillServerStub server;
 
+  @Mock
+  Supplier<StateSampler.ScopedState> readStateSupplier;
+
   @Before
   public void setUp() {
     MockitoAnnotations.initMocks(this);
@@ -89,14 +94,14 @@ public void testFetchGlobalDataBasic() throws Exception {
         buildGlobalDataResponse(tag, ByteString.EMPTY, false, null),
         buildGlobalDataResponse(tag, ByteString.EMPTY, true, encodedIterable));
 
-    assertEquals(null,
-        fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, STATE_FAMILY, SideInputState.UNKNOWN));
-    assertEquals(null,
-        fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, STATE_FAMILY, SideInputState.UNKNOWN));
+    assertEquals(null, fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, STATE_FAMILY,
+            SideInputState.UNKNOWN, readStateSupplier));
+    assertEquals(null, fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, STATE_FAMILY,
+            SideInputState.UNKNOWN, readStateSupplier));
     assertEquals("data", fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, STATE_FAMILY,
-                             SideInputState.KNOWN_READY));
+            SideInputState.KNOWN_READY, readStateSupplier));
     assertEquals("data", fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, STATE_FAMILY,
-                             SideInputState.KNOWN_READY));
+            SideInputState.KNOWN_READY, readStateSupplier));
 
     verify(server, times(2)).getSideInputData(buildGlobalDataRequest(tag, ByteString.EMPTY));
     verifyNoMoreInteractions(server);
@@ -139,15 +144,15 @@ public void testFetchGlobalDataCacheOverflow() throws Exception {
         buildGlobalDataResponse(tag2, ByteString.EMPTY, true, encodedIterable2),
         buildGlobalDataResponse(tag1, ByteString.EMPTY, true, encodedIterable1));
 
-    assertEquals("data1",
-        fetcher.fetchSideInput(view1, GlobalWindow.INSTANCE, STATE_FAMILY, SideInputState.UNKNOWN));
-    assertEquals("data2",
-        fetcher.fetchSideInput(view2, GlobalWindow.INSTANCE, STATE_FAMILY, SideInputState.UNKNOWN));
+    assertEquals("data1", fetcher.fetchSideInput(view1, GlobalWindow.INSTANCE, STATE_FAMILY,
+            SideInputState.UNKNOWN, readStateSupplier));
+    assertEquals("data2", fetcher.fetchSideInput(view2, GlobalWindow.INSTANCE, STATE_FAMILY,
+            SideInputState.UNKNOWN, readStateSupplier));
     cache.invalidateAll();
-    assertEquals("data1",
-        fetcher.fetchSideInput(view1, GlobalWindow.INSTANCE, STATE_FAMILY, SideInputState.UNKNOWN));
-    assertEquals("data1",
-        fetcher.fetchSideInput(view1, GlobalWindow.INSTANCE, STATE_FAMILY, SideInputState.UNKNOWN));
+    assertEquals("data1", fetcher.fetchSideInput(view1, GlobalWindow.INSTANCE, STATE_FAMILY,
+            SideInputState.UNKNOWN, readStateSupplier));
+    assertEquals("data1", fetcher.fetchSideInput(view1, GlobalWindow.INSTANCE, STATE_FAMILY,
+            SideInputState.UNKNOWN, readStateSupplier));
 
     ArgumentCaptor<Windmill.GetDataRequest> captor =
         ArgumentCaptor.forClass(Windmill.GetDataRequest.class);
@@ -179,7 +184,7 @@ public void testEmptyFetchGlobalData() throws Exception {
         buildGlobalDataResponse(tag, ByteString.EMPTY, true, encodedIterable));
 
     assertEquals(0L, (long) fetcher.fetchSideInput(
-        view, GlobalWindow.INSTANCE, STATE_FAMILY, SideInputState.UNKNOWN));
+        view, GlobalWindow.INSTANCE, STATE_FAMILY, SideInputState.UNKNOWN, readStateSupplier));
 
     verify(server).getSideInputData(buildGlobalDataRequest(tag, ByteString.EMPTY));
     verifyNoMoreInteractions(server);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
index 197b6d103bc90..6b00db9ee9f6b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
@@ -29,6 +29,7 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagValue;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.state.BagState;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
@@ -38,6 +39,7 @@
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.cloud.dataflow.sdk.util.state.ValueState;
 import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
+import com.google.common.base.Supplier;
 import com.google.common.util.concurrent.SettableFuture;
 import com.google.protobuf.ByteString;
 
@@ -74,6 +76,9 @@ public class WindmillStateInternalsTest {
 
   private WindmillStateInternals underTest;
 
+  @Mock
+  private Supplier<StateSampler.ScopedState> readStateSupplier;
+
   private ByteString key(StateNamespace namespace, String addrId) {
     return key("", namespace, addrId);
   }
@@ -85,7 +90,7 @@ private ByteString key(String prefix, StateNamespace namespace, String addrId) {
   @Before
   public void setUp() {
     MockitoAnnotations.initMocks(this);
-    underTest = new WindmillStateInternals(STATE_FAMILY, true, mockReader, null, "StepName");
+    underTest = new WindmillStateInternals(STATE_FAMILY, true, mockReader, readStateSupplier);
   }
 
   private <T> void waitAndSet(
@@ -254,7 +259,7 @@ public void testBagPersistEmpty() throws Exception {
 
   @Test
   public void testBagNoStateFamilies() throws Exception {
-    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, null, "StepName");
+    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, readStateSupplier);
 
     StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
     BagState<String> bag = underTest.state(NAMESPACE, addr);
@@ -552,7 +557,7 @@ public void testWatermarkPersistEmpty() throws Exception {
 
   @Test
   public void testWatermarkNoStateFamilies() throws Exception {
-    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, null, "StepName");
+    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, readStateSupplier);
 
     StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal("watermark");
     WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
@@ -662,7 +667,7 @@ public void testValueNoChangePersist() throws Exception {
 
   @Test
   public void testValueNoStateFamilies() throws Exception {
-    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, null, "StepName");
+    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, readStateSupplier);
 
     StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
     ValueState<String> value = underTest.state(NAMESPACE, addr);

From b5a41caf84cfabf7c5b368a49dcd42d282ceeb3e Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 9 Sep 2015 11:16:04 -0700
Subject: [PATCH 0993/1541] Fix structuralValue and consistentWithEquals for
 null

Wordsmithing: Previously, the specification of Coder.consistentWithEquals()
was ill-defined for coders that accept null values. After this change,
it is defined in terms of Objects.equals() instead of the equals() method
on instances.

The specification for Coder.structuralValue() has been clarified but
unchanged.

The implementation of StandardCoder.structuralValue() has been corrected
for null inputs.

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=102676484
---
 .../cloud/dataflow/sdk/coders/Coder.java      |  34 ++--
 .../dataflow/sdk/coders/StandardCoder.java    |   2 +-
 .../sdk/coders/StandardCoderTest.java         | 176 ++++++++++++++++++
 3 files changed, 195 insertions(+), 17 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StandardCoderTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
index d5e8b9d4a729f..e33754d558e97 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
@@ -141,29 +141,31 @@ public T decode(InputStream inStream, Context context)
   public void verifyDeterministic() throws Coder.NonDeterministicException;
 
   /**
-   * Returns true if the encoded bytes of two objects are
-   * equal only when they are also equal according to {@code Object.equals()}.
-   * (and also implements a compatible {@code Object.hasCode()})
+   * Returns true if whenever the encoded bytes of two values are equal, then the original values
+   * are equal according to {@code Objects.equals()} (note that this is well-defined for
+   * {@code null}). In other words, encoding is injective.
    *
-   * <p>This most notably false for arrays. It will generally
-   * be false when {@code Object.equals()} compares object identity,
-   * rather than performing a semantic/structural comparison.
+   * <p>This condition is most notably false for arrays. More generally, this condition is false
+   * whenever {@code equals()} compares object identity, rather than performing a
+   * semantic/structural comparison.
    */
   public boolean consistentWithEquals();
 
   /**
-   * Returns an object with an {@code Object.equals()} method
-   * that represents structural equality on the argument.
-   * (and also implements a compatible {@code Object.hashCode()}).
+   * Returns an object with an {@code Object.equals()} method that represents structural equality
+   * on the argument.
    *
-   * <p>For any two objects of type T, if their encoded bytes
-   * are the same, then their structural values are equal
-   * according to {@code Object.equals()}.
+   * <p>For any two values {@code x} and {@code y} of type {@code T}, if their encoded bytes are the
+   * same, then it must be the case that {@code structuralValue(x).equals(@code structuralValue(y)}.
    *
-   * <p>Most notably, the structural value for an array coder
-   * should perform a structural comparison of the contents of
-   * the arrays, rather than the default behavior of
-   * comparing according to object identity.
+   * <p>Most notably:
+   * <ul>
+   *   <li>The structural value for an array coder should perform a structural comparison of the
+   *   contents of the arrays, rather than the default behavior of comparing according to object
+   *   identity.
+   *   <li>The structural value for a coder accepting {@code null} should be a proper object with
+   *   an {@code equals()} method, even if the input value is {@code null}.
+   * </ul>
    *
    * <p>See also {@link #consistentWithEquals()}.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
index a18186e022af0..6dbe4f9f75010 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
@@ -214,7 +214,7 @@ public boolean consistentWithEquals() {
 
   @Override
   public Object structuralValue(T value) throws Exception {
-    if (value == null || consistentWithEquals()) {
+    if (value != null && consistentWithEquals()) {
       return value;
     } else {
       try {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StandardCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StandardCoderTest.java
new file mode 100644
index 0000000000000..8a14374712941
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StandardCoderTest.java
@@ -0,0 +1,176 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import javax.annotation.Nullable;
+
+/**
+ * Test case for {@link StandardCoder}.
+ */
+@RunWith(JUnit4.class)
+public class StandardCoderTest {
+
+  /**
+   * A coder for nullable {@code Boolean} values that is consistent with equals.
+   */
+  private static class NullBooleanCoder extends StandardCoder<Boolean> {
+
+    private static final long serialVersionUID = 0L;
+
+    @Override
+    public void encode(@Nullable Boolean value, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+      if (value == null) {
+        outStream.write(2);
+      } else if (value) {
+        outStream.write(1);
+      } else {
+        outStream.write(0);
+      }
+    }
+
+    @Override
+    @Nullable
+    public Boolean decode(
+        InputStream inStream, com.google.cloud.dataflow.sdk.coders.Coder.Context context)
+        throws CoderException, IOException {
+      int value = inStream.read();
+      if (value == 0) {
+        return false;
+      } else if (value == 1) {
+        return true;
+      } else if (value == 2) {
+        return null;
+      }
+      throw new CoderException("Invalid value for nullable Boolean: " + value);
+    }
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return Collections.emptyList();
+    }
+
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException { }
+
+    @Override
+    public boolean consistentWithEquals() {
+      return true;
+    }
+  }
+
+  /**
+   * A boxed {@code int} with {@code equals()} that compares object identity.
+   */
+  private static class ObjectIdentityBoolean {
+    private final boolean value;
+    public ObjectIdentityBoolean(boolean value) {
+      this.value = value;
+    }
+    public boolean getValue() {
+      return value;
+    }
+  }
+
+  /**
+   * A coder for nullable boxed {@code Boolean} values that is not consistent with equals.
+   */
+  private static class ObjectIdentityBooleanCoder extends StandardCoder<ObjectIdentityBoolean> {
+
+    private static final long serialVersionUID = 0L;
+
+    @Override
+    public void encode(
+        @Nullable ObjectIdentityBoolean value, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+      if (value == null) {
+        outStream.write(2);
+      } else if (value.getValue()){
+        outStream.write(1);
+      } else {
+        outStream.write(0);
+      }
+    }
+
+    @Override
+    @Nullable
+    public ObjectIdentityBoolean decode(
+        InputStream inStream, com.google.cloud.dataflow.sdk.coders.Coder.Context context)
+        throws CoderException, IOException {
+      int value = inStream.read();
+      if (value == 0) {
+        return new ObjectIdentityBoolean(false);
+      } else if (value == 1) {
+        return new ObjectIdentityBoolean(true);
+      } else if (value == 2) {
+        return null;
+      }
+      throw new CoderException("Invalid value for nullable Boolean: " + value);
+    }
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return Collections.emptyList();
+    }
+
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException { }
+
+    @Override
+    public boolean consistentWithEquals() {
+      return false;
+    }
+  }
+
+  /**
+   * Tests that {@link StandardCoder#structuralValue()} is correct whenever a subclass has a correct
+   * {@link Coder#consistentWithEquals()}.
+   */
+  @Test
+  public void testStructuralValue() throws Exception {
+    List<Boolean> testBooleans = Arrays.asList(null, true, false);
+    List<ObjectIdentityBoolean> testInconsistentBooleans =
+        Arrays.asList(null, new ObjectIdentityBoolean(true), new ObjectIdentityBoolean(false));
+
+    Coder<Boolean> consistentCoder = new NullBooleanCoder();
+    for (Boolean value1 : testBooleans) {
+      for (Boolean value2 : testBooleans) {
+        CoderProperties.structuralValueConsistentWithEquals(consistentCoder, value1, value2);
+      }
+    }
+
+    Coder<ObjectIdentityBoolean> inconsistentCoder = new ObjectIdentityBooleanCoder();
+    for (ObjectIdentityBoolean value1 : testInconsistentBooleans) {
+      for (ObjectIdentityBoolean value2 : testInconsistentBooleans) {
+        CoderProperties.structuralValueConsistentWithEquals(inconsistentCoder, value1, value2);
+      }
+    }
+  }
+}

From 93ce33512827a023d2f990924939f7ee2d421e07 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 9 Sep 2015 13:49:38 -0700
Subject: [PATCH 0994/1541] TypeDescriptor implements Serializable

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=102691146
---
 .../cloud/dataflow/sdk/runners/worker/ReaderFactory.java      | 1 +
 .../com/google/cloud/dataflow/sdk/util/common/Counter.java    | 1 +
 .../java/com/google/cloud/dataflow/sdk/values/TupleTag.java   | 2 ++
 .../com/google/cloud/dataflow/sdk/values/TypeDescriptor.java  | 4 +++-
 .../google/cloud/dataflow/sdk/coders/CoderProvidersTest.java  | 1 +
 .../cloud/dataflow/sdk/util/common/ReflectHelpersTest.java    | 1 +
 6 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
index fb289f4543c50..5803087f6713d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
@@ -43,6 +43,7 @@
  *                                     Coder<T>, ExecutionContext);
  * } </pre>
  */
+@SuppressWarnings("serial")
 public final class ReaderFactory {
   // Do not instantiate.
   private ReaderFactory() {}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
index 32acfef28dd41..c510e30853efd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
@@ -42,6 +42,7 @@
  *
  * @param <T> the type of values aggregated by this counter
  */
+@SuppressWarnings("serial")
 public abstract class Counter<T> {
   /**
    * Possible kinds of counter aggregation.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
index 6025d1f45f965..3a4d90d200ec6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
@@ -51,6 +51,7 @@
  * @param <V> the type of the elements or values of the tagged thing,
  * e.g., a {@code PCollection<V>}.
  */
+@SuppressWarnings("serial")
 public class TupleTag<V> implements Serializable {
   private static final long serialVersionUID = 0;
 
@@ -158,6 +159,7 @@ static synchronized String genId() {
   }
 
   @JsonCreator
+  @SuppressWarnings("unused")
   private static TupleTag<?> fromJson(
       @JsonProperty(PropertyNames.VALUE) String id,
       @JsonProperty(PropertyNames.IS_GENERATED) boolean generated) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
index 9c7c7708a3357..47bce082c2507 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
@@ -21,6 +21,7 @@
 import com.google.common.reflect.Parameter;
 import com.google.common.reflect.TypeToken;
 
+import java.io.Serializable;
 import java.lang.reflect.Method;
 import java.lang.reflect.Type;
 import java.lang.reflect.TypeVariable;
@@ -42,7 +43,8 @@
  *
  * @param <T> the type represented by this {@link TypeDescriptor}
  */
-public abstract class TypeDescriptor<T> {
+@SuppressWarnings("serial")
+public abstract class TypeDescriptor<T> implements Serializable {
 
   // This class is just a wrapper for TypeToken
   private final TypeToken<T> token;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProvidersTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProvidersTest.java
index 1c0a89ed1b761..cdda15125f31b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProvidersTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProvidersTest.java
@@ -33,6 +33,7 @@
  * Tests for {@link CoderFactories}.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class CoderProvidersTest {
 
   @Rule
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java
index f7d678ba8e185..8feab98a2f5de 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java
@@ -30,6 +30,7 @@
  * Tests for {@link ReflectHelpers}.
  */
 @RunWith(JUnit4.class)
+@SuppressWarnings("serial")
 public class ReflectHelpersTest {
 
   @Test

From de1c87208c1778ced9bd6c02c5230c0d5d08017e Mon Sep 17 00:00:00 2001
From: malo <malo@google.com>
Date: Wed, 9 Sep 2015 17:48:45 -0700
Subject: [PATCH 0995/1541] Fix over-approximating shuffle read counter

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=102713393
---
 .../runners/worker/GroupingShuffleReader.java |  14 +-
 .../worker/GroupingShuffleReaderTest.java     | 139 ++++++++++++++----
 2 files changed, 118 insertions(+), 35 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index 50f81e734948e..a76bb3f5e2b3d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -51,6 +51,7 @@
 
 import java.io.IOException;
 import java.util.Iterator;
+import java.util.concurrent.atomic.AtomicLong;
 
 import javax.annotation.Nullable;
 
@@ -177,6 +178,7 @@ private final class GroupingShuffleReaderIterator
 
     /** The next group to be consumed, if available. */
     private KeyGroupedShuffleEntries currentGroup = null;
+    private final AtomicLong currentGroupSize = new AtomicLong(0L);
 
     protected StateSampler stateSampler = null;
     protected int readState;
@@ -203,9 +205,9 @@ public GroupingShuffleReaderIterator(ShuffleEntryReader reader) {
                 reader.read(rangeTracker.getStartPosition(), rangeTracker.getStopPosition())) {
               @Override
               protected void notifyElementRead(long byteSize) {
-                if (GroupingShuffleReader.this.perOperationPerDatasetBytesCounter != null) {
-                  GroupingShuffleReader.this.perOperationPerDatasetBytesCounter.addValue(byteSize);
-                }
+                // We accumulate the sum of bytes read in a local variable. This sum will be counted
+                // when the values are actually read by the consumer of the shuffle reader.
+                currentGroupSize.addAndGet(byteSize);
                 GroupingShuffleReader.this.notifyElementRead(byteSize);
               }
             };
@@ -347,6 +349,12 @@ public V next() {
             GroupingShuffleReaderIterator.this.stateSampler.scopedState(
                 GroupingShuffleReaderIterator.this.readState)) {
           ShuffleEntry entry = base.next();
+          // The shuffle entries are handed over to the consumer of this iterator. Therefore, we can
+          // mark the values are read and increment the bytes read counter.
+          if (GroupingShuffleReader.this.perOperationPerDatasetBytesCounter != null) {
+            GroupingShuffleReader.this
+                .perOperationPerDatasetBytesCounter.addValue(currentGroupSize.getAndSet(0L));
+          }
           try {
             return CoderUtils.decodeFromByteArray(valueCoder, entry.getValue());
           } catch (IOException exn) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
index c5e6c98543a0b..f8e0316d619fe 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
@@ -21,6 +21,7 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toDynamicSplitRequest;
+import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
@@ -43,6 +44,7 @@
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.Reiterable;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
@@ -78,6 +80,9 @@ public class GroupingShuffleReaderTest {
   private static final Instant timestamp = new Instant(123000);
   private static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
 
+  // As Shuffle records, {@code KVS} is encoded as 10 records. Each records uses an integer as key
+  // (4 bytes), and a length-prefixed string as value (4 bytes + length of utf8-encoded string).
+  // Overall {@code KVS} has a byte size of 17 + 17 + 17 + 17 + 16 + 17 + 17 + 17 + 17 + 16 = 168.
   private static final List<KV<Integer, List<String>>> KVS = Arrays.asList(
       KV.of(1, Arrays.asList("in 1a", "in 1b")), KV.of(2, Arrays.asList("in 2a", "in 2b")),
       KV.of(3, Arrays.asList("in 3")), KV.of(4, Arrays.asList("in 4a", "in 4b", "in 4c", "in 4d")),
@@ -95,22 +100,15 @@ private enum ValuesToRead {
     READ_ALL_VALUES
   }
 
-  private void runTestReadFromShuffle(
-      List<KV<Integer, List<String>>> input, ValuesToRead valuesToRead) throws Exception {
+  private List<ShuffleEntry> writeShuffleEntries(List<KV<Integer, List<String>>> input)
+      throws Exception {
     Coder<WindowedValue<KV<Integer, String>>> sinkElemCoder = WindowedValue.getFullCoder(
         KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()), IntervalWindow.getCoder());
-
-    Coder<WindowedValue<KV<Integer, Iterable<String>>>> sourceElemCoder =
-        WindowedValue.getFullCoder(
-            KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(StringUtf8Coder.of())),
-            IntervalWindow.getCoder());
-
-    CounterSet.AddCounterMutator addCounterMutator =
-        new CounterSet().getAddCounterMutator();
+    CounterSet.AddCounterMutator addCounterMutator = new CounterSet().getAddCounterMutator();
     // Write to shuffle with GROUP_KEYS ShuffleSink.
-    ShuffleSink<KV<Integer, String>> shuffleSink = new ShuffleSink<>(
-        PipelineOptionsFactory.create(), null, ShuffleSink.ShuffleKind.GROUP_KEYS,
-        sinkElemCoder, addCounterMutator);
+    ShuffleSink<KV<Integer, String>> shuffleSink =
+        new ShuffleSink<>(PipelineOptionsFactory.create(), null, ShuffleSink.ShuffleKind.GROUP_KEYS,
+            sinkElemCoder, addCounterMutator);
 
     TestShuffleWriter shuffleWriter = new TestShuffleWriter();
 
@@ -122,32 +120,21 @@ private void runTestReadFromShuffle(
         Integer key = kvs.getKey();
         for (String value : kvs.getValue()) {
           ++kvCount;
-          actualSizes.add(shuffleSinkWriter.add(
-              WindowedValue.of(
-                  KV.of(key, value), timestamp, Lists.newArrayList(window),
-                  PaneInfo.NO_FIRING)));
+          actualSizes.add(shuffleSinkWriter.add(WindowedValue.of(
+              KV.of(key, value), timestamp, Lists.newArrayList(window), PaneInfo.NO_FIRING)));
         }
       }
     }
     List<ShuffleEntry> records = shuffleWriter.getRecords();
     assertEquals(kvCount, records.size());
     assertEquals(shuffleWriter.getSizes(), actualSizes);
+    return records;
+  }
 
-    PipelineOptions options = PipelineOptionsFactory.create();
-    // Read from shuffle with GroupingShuffleReader.
-    BatchModeExecutionContext context = BatchModeExecutionContext.fromOptions(options);
-    GroupingShuffleReader<Integer, String> groupingShuffleReader = new GroupingShuffleReader<>(
-        options, null, null, null, sourceElemCoder, context, null, null);
-    ExecutorTestUtils.TestReaderObserver observer =
-        new ExecutorTestUtils.TestReaderObserver(groupingShuffleReader);
-
-    TestShuffleReader shuffleReader = new TestShuffleReader();
-    List<Integer> expectedSizes = new ArrayList<>();
-    for (ShuffleEntry record : records) {
-      expectedSizes.add(record.length());
-      shuffleReader.addEntry(record);
-    }
-
+  private List<KV<Integer, List<String>>> runIterationOverGroupingShuffleReader(
+      BatchModeExecutionContext context, TestShuffleReader shuffleReader,
+      GroupingShuffleReader<Integer, String> groupingShuffleReader, ValuesToRead valuesToRead)
+      throws Exception {
     List<KV<Integer, List<String>>> actual = new ArrayList<>();
     try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<String>>>> iter =
         groupingShuffleReader.iterator(shuffleReader)) {
@@ -215,6 +202,35 @@ private void runTestReadFromShuffle(
         // As expected.
       }
     }
+    return actual;
+  }
+
+  private void runTestReadFromShuffle(
+      List<KV<Integer, List<String>>> input, ValuesToRead valuesToRead) throws Exception {
+    Coder<WindowedValue<KV<Integer, Iterable<String>>>> sourceElemCoder =
+        WindowedValue.getFullCoder(
+            KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(StringUtf8Coder.of())),
+            IntervalWindow.getCoder());
+
+    List<ShuffleEntry> records = writeShuffleEntries(input);
+
+    PipelineOptions options = PipelineOptionsFactory.create();
+    // Read from shuffle with GroupingShuffleReader.
+    BatchModeExecutionContext context = BatchModeExecutionContext.fromOptions(options);
+    GroupingShuffleReader<Integer, String> groupingShuffleReader = new GroupingShuffleReader<>(
+        options, null, null, null, sourceElemCoder, context, null, null);
+    ExecutorTestUtils.TestReaderObserver observer =
+        new ExecutorTestUtils.TestReaderObserver(groupingShuffleReader);
+
+    TestShuffleReader shuffleReader = new TestShuffleReader();
+    List<Integer> expectedSizes = new ArrayList<>();
+    for (ShuffleEntry record : records) {
+      expectedSizes.add(record.length());
+      shuffleReader.addEntry(record);
+    }
+
+    List<KV<Integer, List<String>>> actual = runIterationOverGroupingShuffleReader(
+        context, shuffleReader, groupingShuffleReader, valuesToRead);
 
     List<KV<Integer, List<String>>> expected = new ArrayList<>();
     for (KV<Integer, List<String>> kvs : input) {
@@ -264,6 +280,57 @@ public void testReadNonEmptyShuffleDataSkippingValues() throws Exception {
     runTestReadFromShuffle(KVS, ValuesToRead.SKIP_VALUES);
   }
 
+  private void runTestBytesReadCounter(
+      List<KV<Integer, List<String>>> input, ValuesToRead valuesToRead,
+      long expectedReadBytes) throws Exception {
+    // Create a shuffle reader with the shuffle values provided as input.
+    List<ShuffleEntry> records = writeShuffleEntries(input);
+    TestShuffleReader shuffleReader = new TestShuffleReader();
+    for (ShuffleEntry record : records) {
+      shuffleReader.addEntry(record);
+    }
+
+    Coder<WindowedValue<KV<Integer, Iterable<String>>>> sourceElemCoder =
+        WindowedValue.getFullCoder(
+            KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(StringUtf8Coder.of())),
+            IntervalWindow.getCoder());
+    PipelineOptions options = PipelineOptionsFactory.create();
+    CounterSet.AddCounterMutator addCounterMutator =
+        new CounterSet().getAddCounterMutator();
+    // Read from shuffle with GroupingShuffleReader.
+    BatchModeExecutionContext context = BatchModeExecutionContext.fromOptions(options);
+    GroupingShuffleReader<Integer, String> groupingShuffleReader = new GroupingShuffleReader<>(
+        options, null, null, null, sourceElemCoder, context, null, null);
+    groupingShuffleReader.perOperationPerDatasetBytesCounter =
+        addCounterMutator.addCounter(Counter.longs("dax-shuffle-test-wf-read-bytes", SUM));
+
+    runIterationOverGroupingShuffleReader(
+        context, shuffleReader, groupingShuffleReader, valuesToRead);
+
+    assertEquals(expectedReadBytes,
+                 (long) groupingShuffleReader.perOperationPerDatasetBytesCounter.getAggregate());
+  }
+
+  @Test
+  public void testBytesReadNonEmptyShuffleData() throws Exception {
+    runTestBytesReadCounter(KVS, ValuesToRead.READ_ALL_VALUES, 168L);
+  }
+
+  @Test
+  public void testBytesReadNonEmptyShuffleDataReadingOneValue() throws Exception {
+    runTestBytesReadCounter(KVS, ValuesToRead.READ_ONE_VALUE, 168L);
+  }
+
+  @Test
+  public void testBytesReadNonEmptyShuffleDataSkippingValues() throws Exception {
+    runTestBytesReadCounter(KVS, ValuesToRead.SKIP_VALUES, 0L);
+  }
+
+  @Test
+  public void testBytesReadEmptyShuffleData() throws Exception {
+    runTestBytesReadCounter(NO_KVS, ValuesToRead.READ_ALL_VALUES, 0L);
+  }
+
   static byte[] fabricatePosition(int shard, @Nullable byte[] key) throws Exception {
     ByteArrayOutputStream os = new ByteArrayOutputStream();
     DataOutputStream dos = new DataOutputStream(os);
@@ -342,12 +409,16 @@ private Position makeShufflePosition(int shard, byte[] key) throws Exception {
   public void testReadFromShuffleAndDynamicSplit() throws Exception {
     PipelineOptions options = PipelineOptionsFactory.create();
     BatchModeExecutionContext context = BatchModeExecutionContext.fromOptions(options);
+    CounterSet.AddCounterMutator addCounterMutator =
+        new CounterSet().getAddCounterMutator();
     GroupingShuffleReader<Integer, Integer> groupingShuffleReader = new GroupingShuffleReader<>(
         options, null, null, null,
         WindowedValue.getFullCoder(
             KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())),
             IntervalWindow.getCoder()),
         context, null, null);
+    groupingShuffleReader.perOperationPerDatasetBytesCounter =
+          addCounterMutator.addCounter(Counter.longs("dax-shuffle-test-wf-read-bytes", SUM));
 
     TestShuffleReader shuffleReader = new TestShuffleReader();
     final int kNumRecords = 10;
@@ -418,6 +489,10 @@ public void testReadFromShuffleAndDynamicSplit() throws Exception {
       assertFalse(iter.hasNext());
     }
     assertEquals(i, kNumRecords);
+    // There are 10 Shuffle records that each encode an integer key (4 bytes) and integer value (4
+    // bytes). We therefore expect to read 80 bytes.
+    assertEquals(
+        80L, (long) groupingShuffleReader.perOperationPerDatasetBytesCounter.getAggregate());
   }
 
   @Test

From 86888d745275be78974933b343cd067ba1a18f02 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Wed, 9 Sep 2015 18:10:59 -0700
Subject: [PATCH 0996/1541] Improves efficiency of FileBasedSource size
 estimation.

Efficiency is improved by issuing file stat requests in parallel similar to splitIntoBundles() method.

Removes the static thread pool and updated the code to create thread pools when necessary so that we can properly shutdown the thread pools.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=102714957
---
 .../dataflow/sdk/io/FileBasedSource.java      | 77 ++++++++++++++-----
 .../dataflow/sdk/io/FileBasedSourceTest.java  | 23 +++++-
 2 files changed, 78 insertions(+), 22 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index a107d1f4a2714..fbd21bfe478be 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -69,17 +69,13 @@ public abstract class FileBasedSource<T> extends OffsetBasedSource<T> {
   // Package-private for testing
   static final int MAX_NUMBER_OF_FILES_FOR_AN_EXACT_STAT = 100;
 
-  // Size of the thread pool to be used for sending requests to GCS.
+  // Size of the thread pool to be used for performing file operations in parallel.
   // Package-private for testing.
-  static final int SPLITTING_THREAD_POOL_SIZE = 128;
+  static final int THREAD_POOL_SIZE = 128;
 
   private final String fileOrPatternSpec;
   private final Mode mode;
 
-  // Thread pool to be used for parallelizing requests to GCS.
-  private static ListeningExecutorService service =
-      MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(SPLITTING_THREAD_POOL_SIZE));
-
   /**
    * A given {@code FileBasedSource} represents a file resource of one of these types.
    */
@@ -206,13 +202,41 @@ public final long getEstimatedSizeBytes(PipelineOptions options) throws Exceptio
   }
 
   // Get the exact total size of the given set of files.
+  // Invokes multiple requests for size estimation in parallel using a thread pool.
+  // TODO: replace this with bulk request API when it is available. Will require updates
+  // to IOChannelFactory interface.
   private static long getExactTotalSizeOfFiles(
-      Collection<String> files, IOChannelFactory ioChannelFactory) throws IOException {
+      Collection<String> files, IOChannelFactory ioChannelFactory) throws Exception {
+    List<ListenableFuture<Long>> futures = new ArrayList<>();
+    ListeningExecutorService service =
+        MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(THREAD_POOL_SIZE));
     long totalSize = 0;
-    for (String file : files) {
-      totalSize += ioChannelFactory.getSizeBytes(file);
+    try {
+      for (String file : files) {
+        futures.add(createFutureForSizeEstimation(file, ioChannelFactory, service));
+      }
+
+      for (Long val : Futures.allAsList(futures).get()) {
+        totalSize += val;
+      }
+
+      return totalSize;
+    } finally {
+      service.shutdown();
     }
-    return totalSize;
+  }
+
+  private static ListenableFuture<Long> createFutureForSizeEstimation(
+      final String file,
+      final IOChannelFactory ioChannelFactory,
+      ListeningExecutorService service) {
+    return service.submit(
+        new Callable<Long>() {
+          @Override
+          public Long call() throws Exception {
+            return ioChannelFactory.getSizeBytes(file);
+          }
+        });
   }
 
   // Estimate the total size of the given set of files through sampling and extrapolation.
@@ -220,7 +244,7 @@ private static long getExactTotalSizeOfFiles(
   // estimate.
   // TODO: Implement a more efficient sampling mechanism.
   private static long getEstimatedSizeOfFilesBySampling(
-      Collection<String> files, IOChannelFactory ioChannelFactory) throws IOException {
+      Collection<String> files, IOChannelFactory ioChannelFactory) throws Exception {
     int sampleSize = (int) (FRACTION_OF_FILES_TO_STAT * files.size());
     sampleSize = Math.max(MAX_NUMBER_OF_FILES_FOR_AN_EXACT_STAT, sampleSize);
 
@@ -232,8 +256,10 @@ private static long getEstimatedSizeOfFilesBySampling(
         / selectedFiles.size();
   }
 
-  private ListenableFuture<List<? extends FileBasedSource<T>>> createFuture(final String file,
-      final long desiredBundleSizeBytes, final PipelineOptions options,
+  private ListenableFuture<List<? extends FileBasedSource<T>>> createFutureForFileSplit(
+      final String file,
+      final long desiredBundleSizeBytes,
+      final PipelineOptions options,
       ListeningExecutorService service) {
     return service.submit(new Callable<List<? extends FileBasedSource<T>>>() {
       @Override
@@ -256,15 +282,24 @@ public final List<? extends FileBasedSource<T>> splitIntoBundles(
       long startTime = System.currentTimeMillis();
       List<ListenableFuture<List<? extends FileBasedSource<T>>>> futures = new ArrayList<>();
 
-      for (final String file : FileBasedSource.expandFilePattern(fileOrPatternSpec)) {
-        futures.add(createFuture(file, desiredBundleSizeBytes, options, service));
+      ListeningExecutorService service =
+          MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(THREAD_POOL_SIZE));
+      try {
+        for (final String file : FileBasedSource.expandFilePattern(fileOrPatternSpec)) {
+          futures.add(createFutureForFileSplit(file, desiredBundleSizeBytes, options, service));
+        }
+        List<? extends FileBasedSource<T>> splitResults =
+            ImmutableList.copyOf(Iterables.concat(Futures.allAsList(futures).get()));
+        LOG.debug(
+            "Splitting the source based on file pattern "
+                + fileOrPatternSpec
+                + " took "
+                + (System.currentTimeMillis() - startTime)
+                + " ms");
+        return splitResults;
+      } finally {
+        service.shutdown();
       }
-      List<? extends FileBasedSource<T>> splitResults =
-          ImmutableList.copyOf(Iterables.concat(Futures.allAsList(futures).get()));
-
-      LOG.debug("Splitting the source based on file pattern " + fileOrPatternSpec + " took "
-          + (System.currentTimeMillis() - startTime) + " ms");
-      return splitResults;
     } else {
       if (isSplittable()) {
         List<FileBasedSource<T>> splitResults = new ArrayList<>();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index e0b4c12a38afc..2fbde58cd7d2c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -399,7 +399,7 @@ public void testCloseUnstartedFilePatternReader() throws IOException {
 
   @Test
   public void testSplittingUsingFullThreadPool() throws Exception {
-    int numFiles = FileBasedSource.SPLITTING_THREAD_POOL_SIZE * 5;
+    int numFiles = FileBasedSource.THREAD_POOL_SIZE * 5;
     File file0 = null;
     for (int i = 0; i < numFiles; i++) {
       List<String> data = createStringDataset(3, 1000);
@@ -783,6 +783,27 @@ public void testEstimatedSizeOfFilePattern() throws Exception {
         file1.length() + file2.length() + file3.length(), source.getEstimatedSizeBytes(null));
   }
 
+  @Test
+  public void testEstimatedSizeOfFilePatternAllThreads() throws Exception {
+    File file0 = null;
+    int numFiles = FileBasedSource.THREAD_POOL_SIZE * 5;
+    long totalSize = 0;
+    for (int i = 0; i < numFiles; i++) {
+      List<String> data = createStringDataset(3, 20);
+      File file = createFileWithData("file" + i, data);
+      if (i == 0) {
+        file0 = file;
+      }
+      totalSize += file.length();
+    }
+
+    TestFileBasedSource source =
+        new TestFileBasedSource(new File(file0.getParent(), "file*").getPath(), 64, null);
+
+    // Since all files are of equal size, sampling should produce the exact result.
+    assertEquals(totalSize, source.getEstimatedSizeBytes(null));
+  }
+
   @Test
   public void testEstimatedSizeOfFilePatternThroughSamplingEqualSize() throws Exception {
     // When all files are of equal size, we should get the exact size.

From 825c7c01e7be6214e93b74c662baee0e3e4bd27a Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 9 Sep 2015 19:10:32 -0700
Subject: [PATCH 0997/1541] Clean up calls to TableRow.set() in
 BigQueryTornados

No need to unbox the Integer/Long and rebox them. The
input PCollections are already typed correctly.

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=102718218
---
 .../cloud/dataflow/examples/cookbook/BigQueryTornadoes.java   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java
index deb9d559bff52..40a92781c5938 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java
@@ -101,8 +101,8 @@ static class FormatCountsFn extends DoFn<KV<Integer, Long>, TableRow> {
     @Override
     public void processElement(ProcessContext c) {
       TableRow row = new TableRow()
-          .set("month", c.element().getKey().intValue())
-          .set("tornado_count", c.element().getValue().longValue());
+          .set("month", c.element().getKey())
+          .set("tornado_count", c.element().getValue());
       c.output(row);
     }
   }

From b72b7b2a185b546f68a9d7c2b8671d13981072f5 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Wed, 9 Sep 2015 19:28:41 -0700
Subject: [PATCH 0998/1541] Updates dependency google-api-services-dataflow.

----Release Notes----
Updating dependency google-api-services-dataflow from v1b3-rev8-1.20.0 to v1b3-rev10-1.20.0.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=102719057
---
 sdk/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 4adc113f0d4a9..4d2b312dd7a9f 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -328,7 +328,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-dataflow</artifactId>
-      <version>v1b3-rev8-1.20.0</version>
+      <version>v1b3-rev10-1.20.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->

From 046af6b7c3abf87776b65fce07e1641fcdf8ed58 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 9 Sep 2015 21:18:51 -0700
Subject: [PATCH 0999/1541] Minor cleanup of CoGbkResultCoder.equals()

----Release Notes----

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=102723893
---
 .../sdk/transforms/join/CoGbkResult.java      | 14 +++--
 .../transforms/join/CoGbkResultCoderTest.java | 60 ++++++++++++++-----
 2 files changed, 53 insertions(+), 21 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
index da44984e0bda3..61d937ca3f8c2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
@@ -22,7 +22,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
-import com.google.cloud.dataflow.sdk.coders.MapCoder;
 import com.google.cloud.dataflow.sdk.coders.StandardCoder;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
@@ -217,7 +216,6 @@ public static class CoGbkResultCoder extends StandardCoder<CoGbkResult> {
 
     private final CoGbkResultSchema schema;
     private final UnionCoder unionCoder;
-    private MapCoder<Integer, List<RawUnionValue>> mapCoder;
 
     /**
      * Returns a CoGbkResultCoder for the given schema and unionCoder.
@@ -296,11 +294,15 @@ private IterableCoder tagListCoder(int unionTag) {
     }
 
     @Override
-    public boolean equals(Object other) {
-      if (!super.equals(other)) {
+    public boolean equals(Object object) {
+      if (this == object) {
+        return true;
+      }
+      if (!(object instanceof CoGbkResultCoder)) {
         return false;
       }
-      return schema.equals(((CoGbkResultCoder) other).schema);
+      CoGbkResultCoder other = (CoGbkResultCoder) object;
+      return schema.equals(other.schema) && unionCoder.equals(other.unionCoder);
     }
 
     @Override
@@ -311,7 +313,7 @@ public int hashCode() {
     @Override
     public void verifyDeterministic() throws NonDeterministicException {
       verifyDeterministic(
-          "CoGbkResult requires the mapCoder to be deterministic", mapCoder);
+          "CoGbkResult requires the union coder to be deterministic", unionCoder);
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultCoderTest.java
index 1a3e841de5d7a..7ac278bc8d6ed 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultCoderTest.java
@@ -16,40 +16,70 @@
 
 package com.google.cloud.dataflow.sdk.transforms.join;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.DoubleCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 import com.google.cloud.dataflow.sdk.transforms.join.CoGbkResult.CoGbkResultCoder;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.Serializer;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.cloud.dataflow.sdk.values.TupleTagList;
+import com.google.common.collect.ImmutableList;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-import java.util.Arrays;
-
 /**
  * Tests the CoGbkResult.CoGbkResultCoder.
  */
 @RunWith(JUnit4.class)
 public class CoGbkResultCoderTest {
 
-  @Test
-  public void testSerializationDeserialization() {
-    CoGbkResultSchema schema =
+  private static final CoGbkResultSchema TEST_SCHEMA =
+        new CoGbkResultSchema(TupleTagList.of(new TupleTag<String>()).and(
+            new TupleTag<Integer>()));
+
+  private static final UnionCoder TEST_UNION_CODER =
+      UnionCoder.of(ImmutableList.<Coder<?>>of(
+          StringUtf8Coder.of(),
+          VarIntCoder.of()));
+
+  private static final UnionCoder COMPATIBLE_UNION_CODER =
+      UnionCoder.of(ImmutableList.<Coder<?>>of(
+          StringUtf8Coder.of(),
+          BigEndianIntegerCoder.of()));
+
+  private static final CoGbkResultSchema INCOMPATIBLE_SCHEMA =
         new CoGbkResultSchema(TupleTagList.of(new TupleTag<String>()).and(
             new TupleTag<Double>()));
-    UnionCoder unionCoder =
-        UnionCoder.of(Arrays.<Coder<?>>asList(StringUtf8Coder.of(),
-            DoubleCoder.of()));
-    CoGbkResultCoder newCoder = CoGbkResultCoder.of(schema, unionCoder);
-    CloudObject encoding = newCoder.asCloudObject();
-    Coder<?> decodedCoder = Serializer.deserialize(encoding, Coder.class);
-    assertEquals(newCoder, decodedCoder);
+
+  private static final UnionCoder INCOMPATIBLE_UNION_CODER =
+      UnionCoder.of(ImmutableList.<Coder<?>>of(
+          StringUtf8Coder.of(),
+          DoubleCoder.of()));
+
+  private static final CoGbkResultCoder TEST_CODER =
+      CoGbkResultCoder.of(TEST_SCHEMA, TEST_UNION_CODER);
+
+  private static final CoGbkResultCoder COMPATIBLE_TEST_CODER =
+      CoGbkResultCoder.of(TEST_SCHEMA, COMPATIBLE_UNION_CODER);
+
+  private static final CoGbkResultCoder INCOMPATIBLE_TEST_CODER =
+      CoGbkResultCoder.of(INCOMPATIBLE_SCHEMA, INCOMPATIBLE_UNION_CODER);
+
+  @Test
+  public void testEquals() {
+    assertFalse(TEST_CODER.equals(new Object()));
+    assertFalse(TEST_CODER.equals(COMPATIBLE_TEST_CODER));
+    assertFalse(TEST_CODER.equals(INCOMPATIBLE_TEST_CODER));
+  }
+
+  @Test
+  public void testSerializationDeserialization() {
+    CoderProperties.coderSerializable(TEST_CODER);
   }
 }

From 46ba104c69a51ff8a9b567c7fd0940558cea2532 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 10 Sep 2015 10:20:57 -0700
Subject: [PATCH 1000/1541] MapCoder: use the encoded size hint to allocate the
 Map

----Release Notes----
[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=102767824
---
 .../java/com/google/cloud/dataflow/sdk/coders/MapCoder.java   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
index 8049eb7d3e168..22f126a6544d4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
@@ -19,6 +19,7 @@
 import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.common.collect.Maps;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
@@ -29,7 +30,6 @@
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.util.Arrays;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -113,7 +113,7 @@ public Map<K, V> decode(InputStream inStream, Context context)
       throws IOException, CoderException {
     DataInputStream dataInStream = new DataInputStream(inStream);
     int size = dataInStream.readInt();
-    Map<K, V> retval = new HashMap<>();
+    Map<K, V> retval = Maps.newHashMapWithExpectedSize(size);
     for (int i = 0; i < size; ++i) {
       K key = keyCoder.decode(inStream, context.nested());
       V value = valueCoder.decode(inStream, context.nested());

From d1b46b1c98fff9f94e2d6d021407d13f49a40e01 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 10 Sep 2015 10:58:45 -0700
Subject: [PATCH 1001/1541] Add SetCoder to default Coder registry

And add tests for both SetCoder and MapCoder inference.

Fixes #56.

----Release Notes----
The Coder registry standard coders now include a Coder for objects
of type Set<T> when T has a registered Coder.

[]
-------------
Created by MOE: http://code.google.com/p/moe-java
MOE_MIGRATED_REVID=102771437
---
 .../dataflow/sdk/coders/CoderRegistry.java    |  6 ++-
 .../sdk/coders/CoderRegistryTest.java         | 42 +++++++++++++++++--
 2 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 1e4e5e1c8ba80..d07340af27f16 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -43,6 +43,7 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 /**
  * A {@code CoderRegistry} allows registering the
@@ -80,13 +81,14 @@ public void registerStandardCoders() {
     registerCoder(Iterable.class, IterableCoder.class);
     registerCoder(KV.class, KvCoder.class);
     registerCoder(List.class, ListCoder.class);
-    registerCoder(Map.class, MapCoder.class);
     registerCoder(Long.class, VarLongCoder.class);
+    registerCoder(Map.class, MapCoder.class);
+    registerCoder(Set.class, SetCoder.class);
     registerCoder(String.class, StringUtf8Coder.class);
     registerCoder(TableRow.class, TableRowJsonCoder.class);
+    registerCoder(TimestampedValue.class, TimestampedValue.TimestampedValueCoder.class);
     registerCoder(Void.class, VoidCoder.class);
     registerCoder(byte[].class, ByteArrayCoder.class);
-    registerCoder(TimestampedValue.class, TimestampedValue.TimestampedValueCoder.class);
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index ece19e58bcebf..6a01765694acb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -40,6 +40,8 @@
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
+import java.util.Map;
+import java.util.Set;
 
 /**
  * Tests for CoderRegistry.
@@ -127,7 +129,7 @@ public void testSimpleUnknownDefaultCoder() throws Exception {
   }
 
   @Test
-  public void testParameterizedDefaultCoder() throws Exception {
+  public void testParameterizedDefaultListCoder() throws Exception {
     CoderRegistry registry = getStandardRegistry();
     TypeDescriptor<List<Integer>> listToken = new TypeDescriptor<List<Integer>>() {};
     assertEquals(ListCoder.of(VarIntCoder.of()),
@@ -142,11 +144,42 @@ public void testParameterizedDefaultCoder() throws Exception {
 
   }
 
+  @Test
+  public void testParameterizedDefaultMapCoder() throws Exception {
+    CoderRegistry registry = getStandardRegistry();
+    TypeDescriptor<Map<Integer, String>> mapToken = new TypeDescriptor<Map<Integer, String>>() {};
+    assertEquals(MapCoder.of(VarIntCoder.of(), StringUtf8Coder.of()),
+                 registry.getDefaultCoder(mapToken));
+  }
+
+  @Test
+  public void testParameterizedDefaultNestedMapCoder() throws Exception {
+    CoderRegistry registry = getStandardRegistry();
+    TypeDescriptor<Map<Integer, Map<String, Double>>> mapToken =
+        new TypeDescriptor<Map<Integer, Map<String, Double>>>() {};
+    assertEquals(
+        MapCoder.of(VarIntCoder.of(), MapCoder.of(StringUtf8Coder.of(), DoubleCoder.of())),
+        registry.getDefaultCoder(mapToken));
+  }
+
+  @Test
+  public void testParameterizedDefaultSetCoder() throws Exception {
+    CoderRegistry registry = getStandardRegistry();
+    TypeDescriptor<Set<Integer>> setToken = new TypeDescriptor<Set<Integer>>() {};
+    assertEquals(SetCoder.of(VarIntCoder.of()), registry.getDefaultCoder(setToken));
+  }
+
+  @Test
+  public void testParameterizedDefaultNestedSetCoder() throws Exception {
+    CoderRegistry registry = getStandardRegistry();
+    TypeDescriptor<Set<Set<Integer>>> setToken = new TypeDescriptor<Set<Set<Integer>>>() {};
+    assertEquals(SetCoder.of(SetCoder.of(VarIntCoder.of())), registry.getDefaultCoder(setToken));
+  }
+
   @Test
   public void testParameterizedDefaultCoderUnknown() throws Exception {
     CoderRegistry registry = getStandardRegistry();
-    TypeDescriptor<List<UnknownType>> listUnknownToken =
-        new TypeDescriptor<List<UnknownType>>() {};
+    TypeDescriptor<List<UnknownType>> listUnknownToken = new TypeDescriptor<List<UnknownType>>() {};
 
     thrown.expect(CannotProvideCoderException.class);
     registry.getDefaultCoder(listUnknownToken);
@@ -295,7 +328,8 @@ public static MyValueCoder of() {
       return INSTANCE;
     }
 
-    public static List<Object> getInstanceComponents(MyValue exampleValue) {
+    public static List<Object> getInstanceComponents(
+        @SuppressWarnings("unused") MyValue exampleValue) {
       return Arrays.asList();
     }
 

From d81e2284a1d95ded469ad74b45698381d4181594 Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Thu, 10 Sep 2015 14:37:32 -0700
Subject: [PATCH 1002/1541] Allow getFractionConsumed to be called before
 reading has started

Calls to getFractionConsumed resulted in a NPE if a block had not been read from
the input.
----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=102791238
---
 .../dataflow/sdk/io/BlockBasedSource.java     | 28 +++++++++----
 .../cloud/dataflow/sdk/io/AvroSourceTest.java | 42 ++++++++++++++++++-
 2 files changed, 61 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
index 0fca998cec250..8ec55499de0a2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
@@ -22,6 +22,8 @@
 import java.io.IOException;
 import java.util.NoSuchElementException;
 
+import javax.annotation.Nullable;
+
 /**
  * A {@code BlockBasedSource} is a {@link FileBasedSource} where a file consists of blocks of
  * records.
@@ -122,7 +124,6 @@ protected abstract static class Block<T> {
    */
   @Experimental(Experimental.Kind.SOURCE_SINK)
   protected abstract static class BlockBasedReader<T> extends FileBasedReader<T> {
-    private Block<T> currentBlock;
     private boolean atSplitPoint;
 
     protected BlockBasedReader(BlockBasedSource<T> source) {
@@ -135,10 +136,12 @@ protected BlockBasedReader(BlockBasedSource<T> source) {
     public abstract boolean readNextBlock() throws IOException;
 
     /**
-     * Returns the current block (the block that was read by the previous call to
-     * {@link BlockBasedReader#readNextBlock}).
+     * Returns the current block (the block that was read by the last successful call to
+     * {@link BlockBasedReader#readNextBlock}). May return null initially, or if no block has been
+     * successfully read.
      */
-    public abstract Block<T> getCurrentBlock() throws NoSuchElementException;
+    @Nullable
+    public abstract Block<T> getCurrentBlock();
 
     /**
      * Returns the size of the current block in bytes as it is represented in the underlying file,
@@ -150,7 +153,7 @@ protected BlockBasedReader(BlockBasedSource<T> source) {
      * (but not correctness) of dynamic work rebalancing.
      *
      * <p>This method and {@link Block#getFractionOfBlockConsumed} are used to provide an estimate
-     * of progress within a block ({@code currentBlock.getFractionOfBlockConsumed() *
+     * of progress within a block ({@code getCurrentBlock().getFractionOfBlockConsumed() *
      * getCurrentBlockSize()}). It is acceptable for the result of this computation to be 0, but
      * progress estimation will be inaccurate.
      */
@@ -164,6 +167,11 @@ protected BlockBasedReader(BlockBasedSource<T> source) {
 
     @Override
     public final T getCurrent() throws NoSuchElementException {
+      Block<T> currentBlock = getCurrentBlock();
+      if (currentBlock == null) {
+        throw new NoSuchElementException(
+            "No block has been successfully read from " + getCurrentSource());
+      }
       return currentBlock.getCurrentRecord();
     }
 
@@ -180,11 +188,12 @@ protected boolean isAtSplitPoint() {
     @Override
     protected final boolean readNextRecord() throws IOException {
       atSplitPoint = false;
-      while (currentBlock == null || !currentBlock.readNextRecord()) {
+
+      while (getCurrentBlock() == null || !getCurrentBlock().readNextRecord()) {
         if (!readNextBlock()) {
           return false;
         }
-        currentBlock = getCurrentBlock();
+        // The first record in a block is a split point.
         atSplitPoint = true;
       }
       return true;
@@ -195,6 +204,11 @@ public Double getFractionConsumed() {
       if (getCurrentSource().getEndOffset() == Long.MAX_VALUE) {
         return null;
       }
+      Block<T> currentBlock = getCurrentBlock();
+      if (currentBlock == null) {
+        // There is no current block (i.e., the read has not yet begun).
+        return 0.0;
+      }
       long currentBlockOffset = getCurrentBlockOffset();
       long startOffset = getCurrentSource().getStartOffset();
       long endOffset = getCurrentSource().getEndOffset();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
index c36a33820ab17..9c465241e91e7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
@@ -27,7 +27,6 @@
 import com.google.cloud.dataflow.sdk.io.AvroSource.AvroReader.Seeker;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.testing.SourceTestUtils;
 
 import org.apache.avro.Schema;
 import org.apache.avro.file.CodecFactory;
@@ -50,6 +49,7 @@
 import java.io.PushbackInputStream;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.NoSuchElementException;
 import java.util.Random;
 
 /**
@@ -160,9 +160,47 @@ public void testSplitAtFraction() throws Exception {
     }
   }
 
+  @Test
+  public void testGetProgressFromUnstartedReader() throws Exception {
+    List<FixedRecord> records = createFixedRecords(DEFAULT_RECORD_COUNT);
+    String filename = generateTestFile("tmp.avro", records, SyncBehavior.SYNC_DEFAULT, 1000,
+        AvroCoder.of(FixedRecord.class), DataFileConstants.NULL_CODEC);
+    File file = new File(filename);
+
+    AvroSource<FixedRecord> source = AvroSource.from(filename).withSchema(FixedRecord.class);
+    try (BoundedSource.BoundedReader<FixedRecord> reader = source.createReader(null)) {
+      assertEquals(new Double(0.0), reader.getFractionConsumed());
+    }
+
+    List<? extends BoundedSource<FixedRecord>> splits =
+        source.splitIntoBundles(file.length() / 3, null);
+    for (BoundedSource<FixedRecord> subSource : splits) {
+      try (BoundedSource.BoundedReader<FixedRecord> reader = subSource.createReader(null)) {
+        assertEquals(new Double(0.0), reader.getFractionConsumed());
+      }
+    }
+  }
+
+  @Test
+  public void testGetCurrentFromUnstartedReader() throws Exception {
+    List<FixedRecord> records = createFixedRecords(DEFAULT_RECORD_COUNT);
+    String filename = generateTestFile("tmp.avro", records, SyncBehavior.SYNC_DEFAULT, 1000,
+        AvroCoder.of(FixedRecord.class), DataFileConstants.NULL_CODEC);
+
+    AvroSource<FixedRecord> source = AvroSource.from(filename).withSchema(FixedRecord.class);
+    try (BlockBasedSource.BlockBasedReader<FixedRecord> reader =
+        (BlockBasedSource.BlockBasedReader<FixedRecord>) source.createReader(null)) {
+      assertEquals(null, reader.getCurrentBlock());
+
+      expectedException.expect(NoSuchElementException.class);
+      expectedException.expectMessage("No block has been successfully read from");
+      reader.getCurrent();
+    }
+  }
+
   @Test
   public void testSplitAtFractionExhaustive() throws Exception {
-    List<FixedRecord> expected = createFixedRecords(100);
+    List<FixedRecord> expected = createFixedRecords(50);
     String filename = generateTestFile("tmp.avro", expected, SyncBehavior.SYNC_REGULAR, 5,
         AvroCoder.of(FixedRecord.class), DataFileConstants.NULL_CODEC);
 

From 92c6149950b199340cf66462e31b2a4ab1118707 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 10 Sep 2015 15:35:05 -0700
Subject: [PATCH 1003/1541] Suppress "serial" warnings via pom.xml

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=102796473
---
 .../dataflow/starter/StarterPipeline.java     |   1 -
 .../dataflow/examples/DebuggingWordCount.java |   1 -
 .../dataflow/examples/MinimalWordCount.java   |   2 -
 .../dataflow/examples/WindowedWordCount.java  |   3 -
 .../cloud/dataflow/examples/WordCount.java    |   6 -
 .../examples/common/PubsubFileInjector.java   |   2 -
 .../examples/complete/AutoComplete.java       |  24 --
 .../complete/StreamingWordExtract.java        |   8 -
 .../dataflow/examples/complete/TfIdf.java     |  18 --
 .../complete/TopWikipediaSessions.java        |  16 --
 .../examples/complete/TrafficMaxLaneFlow.java |   7 -
 .../examples/complete/TrafficRoutes.java      |   7 -
 .../examples/cookbook/BigQueryTornadoes.java  |   6 -
 .../cookbook/CombinePerKeyExamples.java       |   8 -
 .../examples/cookbook/DatastoreWordCount.java |   4 -
 .../examples/cookbook/FilterExamples.java     |  10 -
 .../examples/cookbook/JoinExamples.java       |   8 -
 .../examples/cookbook/MaxPerKeyExamples.java  |   6 -
 .../examples/cookbook/TriggerExample.java     |   9 -
 .../examples/complete/AutoCompleteTest.java   |   8 -
 .../examples/cookbook/TriggerExampleTest.java |   2 -
 .../src/main/java/DebuggingWordCount.java     |   1 -
 .../src/main/java/MinimalWordCount.java       |   2 -
 .../src/main/java/WindowedWordCount.java      |   3 -
 .../src/main/java/WordCount.java              |   6 -
 .../main/java/common/PubsubFileInjector.java  |   2 -
 .../src/main/java/StarterPipeline.java        |   1 -
 .../src/main/java/it/pkg/StarterPipeline.java |   1 -
 pom.xml                                       |   1 +
 .../google/cloud/dataflow/sdk/Pipeline.java   |   2 -
 .../dataflow/sdk/coders/AtomicCoder.java      |   2 -
 .../cloud/dataflow/sdk/coders/AvroCoder.java  |   4 -
 .../sdk/coders/BigEndianIntegerCoder.java     |   2 -
 .../sdk/coders/BigEndianLongCoder.java        |   2 -
 .../dataflow/sdk/coders/ByteArrayCoder.java   |   2 -
 .../coders/CannotProvideCoderException.java   |   1 -
 .../cloud/dataflow/sdk/coders/Coder.java      |   2 -
 .../dataflow/sdk/coders/CoderException.java   |   2 -
 .../dataflow/sdk/coders/CoderRegistry.java    |   1 -
 .../dataflow/sdk/coders/CollectionCoder.java  |   2 -
 .../dataflow/sdk/coders/CustomCoder.java      |   2 -
 .../dataflow/sdk/coders/DelegateCoder.java    |   2 -
 .../coders/DeterministicStandardCoder.java    |   2 -
 .../dataflow/sdk/coders/DoubleCoder.java      |   2 -
 .../dataflow/sdk/coders/DurationCoder.java    |   2 -
 .../dataflow/sdk/coders/EntityCoder.java      |   2 -
 .../dataflow/sdk/coders/InstantCoder.java     |   2 -
 .../dataflow/sdk/coders/IterableCoder.java    |   2 -
 .../sdk/coders/IterableLikeCoder.java         |   2 -
 .../cloud/dataflow/sdk/coders/JAXBCoder.java  |   2 -
 .../cloud/dataflow/sdk/coders/KvCoder.java    |   2 -
 .../dataflow/sdk/coders/KvCoderBase.java      |   2 -
 .../cloud/dataflow/sdk/coders/ListCoder.java  |   2 -
 .../cloud/dataflow/sdk/coders/MapCoder.java   |   1 -
 .../dataflow/sdk/coders/MapCoderBase.java     |   2 -
 .../dataflow/sdk/coders/NullableCoder.java    |   2 -
 .../dataflow/sdk/coders/Proto2Coder.java      |   2 -
 .../sdk/coders/SerializableCoder.java         |   2 -
 .../cloud/dataflow/sdk/coders/SetCoder.java   |   2 -
 .../dataflow/sdk/coders/StandardCoder.java    |   2 -
 .../sdk/coders/StringDelegateCoder.java       |   6 -
 .../dataflow/sdk/coders/StringUtf8Coder.java  |   1 -
 .../sdk/coders/TableRowJsonCoder.java         |   1 -
 .../sdk/coders/TextualIntegerCoder.java       |   2 -
 .../dataflow/sdk/coders/VarIntCoder.java      |   2 -
 .../dataflow/sdk/coders/VarLongCoder.java     |   2 -
 .../cloud/dataflow/sdk/coders/VoidCoder.java  |   1 -
 .../google/cloud/dataflow/sdk/io/AvroIO.java  |   4 -
 .../cloud/dataflow/sdk/io/AvroSource.java     |   2 -
 .../cloud/dataflow/sdk/io/BigQueryIO.java     |  15 --
 .../dataflow/sdk/io/BlockBasedSource.java     |   2 -
 .../io/BoundedReadFromUnboundedSource.java    |   3 -
 .../cloud/dataflow/sdk/io/BoundedSource.java  |   2 -
 .../dataflow/sdk/io/CompressedSource.java     |   3 -
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |   7 -
 .../cloud/dataflow/sdk/io/FileBasedSink.java  |   5 -
 .../dataflow/sdk/io/FileBasedSource.java      |   1 -
 .../dataflow/sdk/io/OffsetBasedSource.java    |   2 -
 .../cloud/dataflow/sdk/io/PubsubIO.java       |   6 -
 .../google/cloud/dataflow/sdk/io/Read.java    |   4 -
 .../google/cloud/dataflow/sdk/io/Sink.java    |   4 -
 .../google/cloud/dataflow/sdk/io/Source.java  |   2 -
 .../google/cloud/dataflow/sdk/io/TextIO.java  |   4 -
 .../dataflow/sdk/io/UnboundedSource.java      |   2 -
 .../google/cloud/dataflow/sdk/io/XmlSink.java |   4 -
 .../cloud/dataflow/sdk/io/XmlSource.java      |   1 -
 .../options/DataflowWorkerLoggingOptions.java |   2 -
 .../sdk/options/GoogleApiDebugOptions.java    |   2 -
 .../runners/AggregatorRetrievalException.java |   2 -
 .../DataflowJobAlreadyExistsException.java    |   2 -
 .../DataflowJobAlreadyUpdatedException.java   |   2 -
 .../DataflowJobCancelledException.java        |   2 -
 .../sdk/runners/DataflowJobException.java     |   1 -
 .../DataflowJobExecutionException.java        |   2 -
 .../runners/DataflowJobUpdatedException.java  |   2 -
 .../sdk/runners/DataflowPipelineRunner.java   |  36 ---
 .../sdk/runners/DataflowServiceException.java |   2 -
 .../sdk/runners/DirectPipelineRunner.java     |   1 -
 .../sdk/runners/worker/CombineValuesFn.java   |   8 -
 .../worker/GroupAlsoByWindowsParDoFn.java     |   2 -
 .../sdk/runners/worker/ReaderFactory.java     |   1 -
 .../sdk/runners/worker/SinkFactory.java       |   1 -
 .../worker/StreamingDataflowWorker.java       |   2 -
 .../StreamingGroupAlsoByWindowsDoFn.java      |   1 -
 .../dataflow/sdk/testing/DataflowAssert.java  |  27 ---
 .../sdk/testing/SerializableMatchers.java     | 224 ++++++------------
 .../sdk/transforms/ApproximateQuantiles.java  |   4 -
 .../sdk/transforms/ApproximateUnique.java     |   3 -
 .../dataflow/sdk/transforms/Combine.java      |   3 -
 .../cloud/dataflow/sdk/transforms/Count.java  |   6 -
 .../cloud/dataflow/sdk/transforms/Create.java |   1 -
 .../cloud/dataflow/sdk/transforms/DoFn.java   |   3 -
 .../sdk/transforms/DoFnReflector.java         |  18 +-
 .../sdk/transforms/DoFnWithContext.java       |   1 -
 .../cloud/dataflow/sdk/transforms/Filter.java |   1 -
 .../dataflow/sdk/transforms/Flatten.java      |   2 -
 .../dataflow/sdk/transforms/GroupByKey.java   |   1 -
 .../IntraBundleParallelization.java           |   3 -
 .../cloud/dataflow/sdk/transforms/Keys.java   |   1 -
 .../cloud/dataflow/sdk/transforms/KvSwap.java |   1 -
 .../cloud/dataflow/sdk/transforms/Max.java    |   8 -
 .../cloud/dataflow/sdk/transforms/Mean.java   |   4 -
 .../cloud/dataflow/sdk/transforms/Min.java    |   8 -
 .../dataflow/sdk/transforms/PTransform.java   |   2 -
 .../cloud/dataflow/sdk/transforms/ParDo.java  |   3 -
 .../dataflow/sdk/transforms/Partition.java    |   1 -
 .../sdk/transforms/RemoveDuplicates.java      |   1 -
 .../cloud/dataflow/sdk/transforms/Sample.java |   5 -
 .../cloud/dataflow/sdk/transforms/Sum.java    |   1 -
 .../cloud/dataflow/sdk/transforms/Top.java    |   7 -
 .../cloud/dataflow/sdk/transforms/Values.java |   1 -
 .../cloud/dataflow/sdk/transforms/View.java   |  13 -
 .../dataflow/sdk/transforms/WithKeys.java     |   1 -
 .../cloud/dataflow/sdk/transforms/Write.java  |   8 -
 .../sdk/transforms/join/CoGbkResult.java      |   1 -
 .../transforms/join/CoGbkResultSchema.java    |   1 -
 .../sdk/transforms/join/CoGroupByKey.java     |   1 -
 .../sdk/transforms/join/UnionCoder.java       |   1 -
 .../sdk/transforms/windowing/AfterAll.java    |   2 -
 .../sdk/transforms/windowing/AfterEach.java   |   2 -
 .../sdk/transforms/windowing/AfterFirst.java  |   2 -
 .../sdk/transforms/windowing/AfterPane.java   |   2 -
 .../windowing/AfterProcessingTime.java        |   2 -
 .../AfterSynchronizedProcessingTime.java      |   2 -
 .../transforms/windowing/AfterWatermark.java  |   4 -
 .../transforms/windowing/CalendarWindows.java |   6 -
 .../transforms/windowing/DefaultTrigger.java  |   2 -
 .../transforms/windowing/FixedWindows.java    |   1 -
 .../transforms/windowing/GlobalWindow.java    |   2 -
 .../transforms/windowing/GlobalWindows.java   |   1 -
 .../transforms/windowing/IntervalWindow.java  |   2 -
 .../transforms/windowing/InvalidWindows.java  |   2 -
 .../windowing/NonMergingWindowFn.java         |   2 -
 .../windowing/OrFinallyTrigger.java           |   2 -
 .../sdk/transforms/windowing/PaneInfo.java    |   2 -
 .../windowing/PartitioningWindowFn.java       |   2 -
 .../sdk/transforms/windowing/Repeatedly.java  |   2 -
 .../sdk/transforms/windowing/Sessions.java    |   2 -
 .../transforms/windowing/SlidingWindows.java  |   1 -
 .../sdk/transforms/windowing/TimeTrigger.java |   6 -
 .../sdk/transforms/windowing/Trigger.java     |   4 -
 .../sdk/transforms/windowing/Window.java      |   4 -
 .../sdk/transforms/windowing/WindowFn.java    |   2 -
 .../dataflow/sdk/util/AppliedCombineFn.java   |   2 -
 .../dataflow/sdk/util/AssignWindowsDoFn.java  |   1 -
 .../cloud/dataflow/sdk/util/BitSetCoder.java  |   2 -
 .../sdk/util/BoundedQueueExecutor.java        |   2 -
 .../cloud/dataflow/sdk/util/CoderUtils.java   |   1 -
 .../cloud/dataflow/sdk/util/DoFnInfo.java     |   1 -
 .../dataflow/sdk/util/ExecutableTrigger.java  |   4 -
 .../GroupAlsoByWindowsAndCombineDoFn.java     |   1 -
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |   1 -
 .../GroupAlsoByWindowsViaIteratorsDoFn.java   |   1 -
 .../sdk/util/IllegalMutationException.java    |   2 -
 .../dataflow/sdk/util/PCollectionViews.java   |   9 -
 .../cloud/dataflow/sdk/util/ReduceFn.java     |   2 -
 .../util/ReifyTimestampAndWindowsDoFn.java    |   2 -
 .../cloud/dataflow/sdk/util/Reshuffle.java    |   3 -
 .../cloud/dataflow/sdk/util/Structs.java      |   4 -
 .../dataflow/sdk/util/SystemReduceFn.java     |   5 -
 .../dataflow/sdk/util/TimerOrElement.java     |   1 -
 .../dataflow/sdk/util/UserCodeException.java  |   1 -
 .../dataflow/sdk/util/ValueWithRecordId.java  |   4 -
 .../dataflow/sdk/util/WatermarkHold.java      |   2 -
 .../dataflow/sdk/util/WindowedValue.java      |   6 -
 .../dataflow/sdk/util/WindowingStrategy.java  |   2 -
 .../dataflow/sdk/util/common/Counter.java     |   1 -
 .../dataflow/sdk/util/state/StateTags.java    |  10 -
 .../google/cloud/dataflow/sdk/values/KV.java  |   4 -
 .../dataflow/sdk/values/TimestampedValue.java |   1 -
 .../cloud/dataflow/sdk/values/TupleTag.java   |   3 -
 .../dataflow/sdk/values/TupleTagList.java     |   2 -
 .../dataflow/sdk/values/TypeDescriptor.java   |   9 +-
 .../cloud/dataflow/sdk/PipelineTest.java      |   7 -
 .../dataflow/sdk/coders/AvroCoderTest.java    |   1 -
 .../sdk/coders/CoderProvidersTest.java        |   1 -
 .../sdk/coders/CoderRegistryTest.java         |   1 -
 .../dataflow/sdk/coders/CustomCoderTest.java  |   1 -
 .../dataflow/sdk/coders/DefaultCoderTest.java |   1 -
 .../sdk/coders/DelegateCoderTest.java         |  10 -
 .../sdk/coders/SerializableCoderTest.java     |   1 -
 .../BoundedReadFromUnboundedSourceTest.java   |   1 -
 .../dataflow/sdk/io/CompressedSourceTest.java |   2 -
 .../dataflow/sdk/io/FileBasedSinkTest.java    |   4 -
 .../dataflow/sdk/io/FileBasedSourceTest.java  |   2 -
 .../sdk/io/OffsetBasedSourceTest.java         |   1 -
 .../AggregatorPipelineExtractorTest.java      |   1 -
 .../runners/DataflowPipelineRunnerTest.java   |   1 -
 .../DataflowPipelineTranslatorTest.java       |   1 -
 .../sdk/runners/DirectPipelineRunnerTest.java |   6 -
 .../sdk/runners/TransformTreeTest.java        |   1 -
 .../BasicSerializableSourceFormatTest.java    |   8 -
 .../sdk/runners/dataflow/CountingSource.java  |   5 -
 .../runners/worker/CombineValuesFnTest.java   |   1 -
 .../runners/worker/DataflowWorkerTest.java    |   4 +-
 .../worker/MapTaskExecutorFactoryTest.java    |   1 -
 .../sdk/runners/worker/NormalParDoFnTest.java |   1 -
 .../runners/worker/ParDoFnFactoryTest.java    |   2 +-
 .../worker/StreamingDataflowWorkerTest.java   |   8 -
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |   2 -
 .../StreamingSideInputDoFnRunnerTest.java     |   2 -
 .../sdk/runners/worker/TextReaderTest.java    |   2 -
 .../sdk/testing/CoderPropertiesTest.java      |   8 -
 .../sdk/testing/DataflowAssertTest.java       |   6 -
 .../sdk/testing/PCollectionViewTesting.java   |   8 -
 .../sdk/testing/SerializableMatchersTest.java |   4 -
 .../transforms/ApproximateQuantilesTest.java  |   3 -
 .../sdk/transforms/ApproximateUniqueTest.java |   1 -
 .../dataflow/sdk/transforms/CombineTest.java  |   3 -
 .../dataflow/sdk/transforms/CreateTest.java   |   2 +-
 .../sdk/transforms/DoFnReflectorTest.java     |  32 +--
 .../sdk/transforms/DoFnWithContextTest.java   |   2 -
 .../dataflow/sdk/transforms/FilterTest.java   |   1 -
 .../dataflow/sdk/transforms/FlattenTest.java  |   3 -
 .../sdk/transforms/GroupByKeyTest.java        |   2 +-
 .../IntraBundleParallelizationTest.java       |   1 -
 .../dataflow/sdk/transforms/NoOpDoFn.java     |   2 -
 .../dataflow/sdk/transforms/ParDoTest.java    |   7 -
 .../sdk/transforms/PartitionTest.java         |   1 -
 .../sdk/transforms/RemoveDuplicatesTest.java  |   3 -
 .../dataflow/sdk/transforms/SampleTest.java   |   1 -
 .../dataflow/sdk/transforms/TopTest.java      |   6 -
 .../dataflow/sdk/transforms/ViewTest.java     |   1 -
 .../dataflow/sdk/transforms/WithKeysTest.java |   1 -
 .../dataflow/sdk/transforms/WriteTest.java    |   4 -
 .../sdk/transforms/join/CoGroupByKeyTest.java |   1 -
 .../transforms/windowing/TimeTriggerTest.java |   2 -
 .../transforms/windowing/WindowingTest.java   |   2 +-
 .../dataflow/sdk/util/CoderUtilsTest.java     |   1 -
 .../sdk/util/CounterAggregatorTest.java       |   1 -
 .../sdk/util/ExecutableTriggerTest.java       |   2 -
 .../cloud/dataflow/sdk/util/PTupleTest.java   |   1 -
 .../sdk/util/SerializableUtilsTest.java       |   8 -
 .../dataflow/sdk/util/StringUtilsTest.java    |  12 +-
 .../sdk/util/common/ReflectHelpersTest.java   |   1 -
 .../sdk/values/PCollectionTupleTest.java      |   1 -
 .../cloud/dataflow/sdk/values/PDoneTest.java  |   1 -
 .../sdk/values/TypeDescriptorTest.java        |   1 -
 .../dataflow/sdk/values/TypedPValueTest.java  |   1 -
 259 files changed, 99 insertions(+), 1026 deletions(-)

diff --git a/eclipse/starter/src/main/java/com/google/cloud/dataflow/starter/StarterPipeline.java b/eclipse/starter/src/main/java/com/google/cloud/dataflow/starter/StarterPipeline.java
index 311b49f41ee5f..f13dc3cda4ebf 100644
--- a/eclipse/starter/src/main/java/com/google/cloud/dataflow/starter/StarterPipeline.java
+++ b/eclipse/starter/src/main/java/com/google/cloud/dataflow/starter/StarterPipeline.java
@@ -43,7 +43,6 @@
  *   --runner=BlockingDataflowPipelineRunner
  * In Eclipse, you can just modify the existing 'SERVICE' run configuration.
  */
-@SuppressWarnings("serial")
 public class StarterPipeline {
   private static final Logger LOG = LoggerFactory.getLogger(StarterPipeline.class);
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
index 67258243cf073..8823dbc323271 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
@@ -106,7 +106,6 @@
 public class DebuggingWordCount {
   /** A DoFn that filters for a specific key based upon a regular expression. */
   public static class FilterTextFn extends DoFn<KV<String, Long>, KV<String, Long>> {
-    private static final long serialVersionUID = 0;
     /**
      * Concept #1: The logger below uses the fully qualified class name of FilterTextFn
      * as the logger. All log statements emitted by this logger will be referenced by this name
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
index bad18c9c5803e..2707ab977b3ce 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
@@ -82,7 +82,6 @@ public static void main(String[] args) {
      // The ParDo returns a PCollection<String>, where each element is an individual word in
      // Shakespeare's collected texts.
      .apply(ParDo.named("ExtractWords").of(new DoFn<String, String>() {
-                       private static final long serialVersionUID = 0;
                        @Override
                        public void processElement(ProcessContext c) {
                          for (String word : c.element().split("[^a-zA-Z']+")) {
@@ -99,7 +98,6 @@ public void processElement(ProcessContext c) {
      // Apply another ParDo transform that formats our PCollection of word counts into a printable
      // string, suitable for writing to an output file.
      .apply(ParDo.named("FormatResults").of(new DoFn<KV<String, Long>, String>() {
-                       private static final long serialVersionUID = 0;
                        @Override
                        public void processElement(ProcessContext c) {
                          c.output(c.element().getKey() + ": " + c.element().getValue());
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
index f755516c862aa..207d58629c588 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
@@ -119,7 +119,6 @@ public class WindowedWordCount {
    * 2-hour period.
    */
   static class AddTimestampFn extends DoFn<String, String> {
-    private static final long serialVersionUID = 0;
     private static final long RAND_RANGE = 7200000; // 2 hours in ms
 
     @Override
@@ -136,8 +135,6 @@ public void processElement(ProcessContext c) {
 
   /** A DoFn that converts a Word and Count into a BigQuery table row. */
   static class FormatAsTableRowFn extends DoFn<KV<String, Long>, TableRow> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       TableRow row = new TableRow()
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
index a605db4e57df6..33719f8b3e742 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
@@ -95,8 +95,6 @@ public class WordCount {
    * pipeline.
    */
   static class ExtractWordsFn extends DoFn<String, String> {
-    private static final long serialVersionUID = 0;
-
     private final Aggregator<Long, Long> emptyLines =
         createAggregator("emptyLines", new Sum.SumLongFn());
 
@@ -120,8 +118,6 @@ public void processElement(ProcessContext c) {
 
   /** A DoFn that converts a Word and Count into a printable string. */
   public static class FormatAsTextFn extends DoFn<KV<String, Long>, String> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       c.output(c.element().getKey() + ": " + c.element().getValue());
@@ -138,8 +134,6 @@ public void processElement(ProcessContext c) {
    */
   public static class CountWords extends PTransform<PCollection<String>,
       PCollection<KV<String, Long>>> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public PCollection<KV<String, Long>> apply(PCollection<String> lines) {
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java
index 97a535da1442c..4a82ae612ae78 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java
@@ -71,8 +71,6 @@ public Bound publish(String outputTopic) {
 
   /** A DoFn that publishes non-empty lines to Google Cloud PubSub. */
   public static class Bound extends DoFn<String, Void> {
-    private static final long serialVersionUID = 0;
-
     private final String outputTopic;
     private final String timestampLabelKey;
     public transient Pubsub pubsub;
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
index f7e0f9e188566..b1db84c88caa2 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
@@ -104,8 +104,6 @@ public class AutoComplete {
    */
   public static class ComputeTopCompletions
       extends PTransform<PCollection<String>, PCollection<KV<String, List<CompletionCandidate>>>> {
-    private static final long serialVersionUID = 0;
-
     private final int candidatesPerPrefix;
     private final boolean recursive;
 
@@ -127,8 +125,6 @@ public PCollection<KV<String, List<CompletionCandidate>>> apply(PCollection<Stri
         // Map the KV outputs of Count into our own CompletionCandiate class.
         .apply(ParDo.named("CreateCompletionCandidates").of(
             new DoFn<KV<String, Long>, CompletionCandidate>() {
-              private static final long serialVersionUID = 0;
-
               @Override
               public void processElement(ProcessContext c) {
                 c.output(new CompletionCandidate(c.element().getKey(), c.element().getValue()));
@@ -153,8 +149,6 @@ public void processElement(ProcessContext c) {
   private static class ComputeTopFlat
       extends PTransform<PCollection<CompletionCandidate>,
                          PCollection<KV<String, List<CompletionCandidate>>>> {
-    private static final long serialVersionUID = 0;
-
     private final int candidatesPerPrefix;
     private final int minPrefix;
 
@@ -176,8 +170,6 @@ public PCollection<KV<String, List<CompletionCandidate>>> apply(
     }
 
     private static class HotKeyFanout implements SerializableFunction<String, Integer> {
-      private static final long serialVersionUID = 0;
-
       @Override
       public Integer apply(String input) {
         return (int) Math.pow(4, 5 - input.length());
@@ -195,8 +187,6 @@ public Integer apply(String input) {
   private static class ComputeTopRecursive
       extends PTransform<PCollection<CompletionCandidate>,
                          PCollectionList<KV<String, List<CompletionCandidate>>>> {
-    private static final long serialVersionUID = 0;
-
     private final int candidatesPerPrefix;
     private final int minPrefix;
 
@@ -206,8 +196,6 @@ public ComputeTopRecursive(int candidatesPerPrefix, int minPrefix) {
     }
 
     private class KeySizePartitionFn implements PartitionFn<KV<String, List<CompletionCandidate>>> {
-      private static final long serialVersionUID = 0;
-
       @Override
       public int partitionFor(KV<String, List<CompletionCandidate>> elem, int numPartitions) {
         return elem.getKey().length() > minPrefix ? 0 : 1;
@@ -216,8 +204,6 @@ public int partitionFor(KV<String, List<CompletionCandidate>> elem, int numParti
 
     private static class FlattenTops
         extends DoFn<KV<String, List<CompletionCandidate>>, CompletionCandidate> {
-      private static final long serialVersionUID = 0;
-
       @Override
       public void processElement(ProcessContext c) {
         for (CompletionCandidate cc : c.element().getValue()) {
@@ -247,8 +233,6 @@ public PCollectionList<KV<String, List<CompletionCandidate>>> apply(
             // ...together with those (previously excluded) candidates of length
             // exactly minPrefix...
             .and(input.apply(Filter.by(new SerializableFunction<CompletionCandidate, Boolean>() {
-                    private static final long serialVersionUID = 0;
-
                     @Override
                     public Boolean apply(CompletionCandidate c) {
                       return c.getValue().length() == minPrefix;
@@ -273,8 +257,6 @@ public Boolean apply(CompletionCandidate c) {
    */
   private static class AllPrefixes
       extends DoFn<CompletionCandidate, KV<String, CompletionCandidate>> {
-    private static final long serialVersionUID = 0;
-
     private final int minPrefix;
     private final int maxPrefix;
     public AllPrefixes(int minPrefix) {
@@ -354,8 +336,6 @@ public String toString() {
    * Takes as input a set of strings, and emits each #hashtag found therein.
    */
   static class ExtractHashtags extends DoFn<String, String> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       Matcher m = Pattern.compile("#\\S+").matcher(c.element());
@@ -366,8 +346,6 @@ public void processElement(ProcessContext c) {
   }
 
   static class FormatForBigquery extends DoFn<KV<String, List<CompletionCandidate>>, TableRow> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       List<TableRow> completions = new ArrayList<>();
@@ -402,8 +380,6 @@ static TableSchema getSchema() {
    * suitable for writing to Datastore.
    */
   static class FormatForDatastore extends DoFn<KV<String, List<CompletionCandidate>>, Entity> {
-    private static final long serialVersionUID = 0;
-
     private String kind;
 
     public FormatForDatastore(String kind) {
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java
index 879119caa43c6..99c5249362451 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java
@@ -63,8 +63,6 @@ public class StreamingWordExtract {
 
   /** A DoFn that tokenizes lines of text into individual words. */
   static class ExtractWords extends DoFn<String, String> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       String[] words = c.element().split("[^a-zA-Z']+");
@@ -78,8 +76,6 @@ public void processElement(ProcessContext c) {
 
   /** A DoFn that uppercases a word. */
   static class Uppercase extends DoFn<String, String> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       c.output(c.element().toUpperCase());
@@ -90,8 +86,6 @@ public void processElement(ProcessContext c) {
    * Converts strings into BigQuery rows.
    */
   static class StringToRowConverter extends DoFn<String, TableRow> {
-    private static final long serialVersionUID = 0;
-
     /**
      * In this example, put the whole string into single BigQuery field.
      */
@@ -102,8 +96,6 @@ public void processElement(ProcessContext c) {
 
     static TableSchema getSchema() {
       return new TableSchema().setFields(new ArrayList<TableFieldSchema>() {
-            private static final long serialVersionUID = 0;
-
             // Compose the list of TableFieldSchema from tableSchema.
             {
               add(new TableFieldSchema().setName("string_field").setType("STRING"));
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java
index fc10de55f7a95..65ac7539876f3 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java
@@ -154,8 +154,6 @@ public static Set<URI> listInputDocuments(Options options)
    */
   public static class ReadDocuments
       extends PTransform<PInput, PCollection<KV<URI, String>>> {
-    private static final long serialVersionUID = 0;
-
     private Iterable<URI> uris;
 
     public ReadDocuments(Iterable<URI> uris) {
@@ -207,8 +205,6 @@ public PCollection<KV<URI, String>> apply(PInput input) {
    */
   public static class ComputeTfIdf
       extends PTransform<PCollection<KV<URI, String>>, PCollection<KV<String, KV<URI, Double>>>> {
-    private static final long serialVersionUID = 0;
-
     public ComputeTfIdf() { }
 
     @Override
@@ -230,8 +226,6 @@ public PCollection<KV<String, KV<URI, Double>>> apply(
       PCollection<KV<URI, String>> uriToWords = uriToContent
           .apply(ParDo.named("SplitWords").of(
               new DoFn<KV<URI, String>, KV<URI, String>>() {
-                private static final long serialVersionUID = 0;
-
                 @Override
                 public void processElement(ProcessContext c) {
                   URI uri = c.element().getKey();
@@ -275,8 +269,6 @@ public void processElement(ProcessContext c) {
       PCollection<KV<URI, KV<String, Long>>> uriToWordAndCount = uriAndWordToCount
           .apply(ParDo.named("ShiftKeys").of(
               new DoFn<KV<KV<URI, String>, Long>, KV<URI, KV<String, Long>>>() {
-                private static final long serialVersionUID = 0;
-
                 @Override
                 public void processElement(ProcessContext c) {
                   URI uri = c.element().getKey().getKey();
@@ -316,8 +308,6 @@ public void processElement(ProcessContext c) {
       PCollection<KV<String, KV<URI, Double>>> wordToUriAndTf = uriToWordAndCountAndTotal
           .apply(ParDo.named("ComputeTermFrequencies").of(
               new DoFn<KV<URI, CoGbkResult>, KV<String, KV<URI, Double>>>() {
-                private static final long serialVersionUID = 0;
-
                 @Override
                 public void processElement(ProcessContext c) {
                   URI uri = c.element().getKey();
@@ -344,8 +334,6 @@ public void processElement(ProcessContext c) {
               .named("ComputeDocFrequencies")
               .withSideInputs(totalDocuments)
               .of(new DoFn<KV<String, Long>, KV<String, Double>>() {
-                private static final long serialVersionUID = 0;
-
                 @Override
                 public void processElement(ProcessContext c) {
                   String word = c.element().getKey();
@@ -375,8 +363,6 @@ public void processElement(ProcessContext c) {
       PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf = wordToUriAndTfAndDf
           .apply(ParDo.named("ComputeTfIdf").of(
               new DoFn<KV<String, CoGbkResult>, KV<String, KV<URI, Double>>>() {
-                private static final long serialVersionUID = 0;
-
                 @Override
                 public void processElement(ProcessContext c) {
                   String word = c.element().getKey();
@@ -406,8 +392,6 @@ public void processElement(ProcessContext c) {
    */
   public static class WriteTfIdf
       extends PTransform<PCollection<KV<String, KV<URI, Double>>>, PDone> {
-    private static final long serialVersionUID = 0;
-
     private String output;
 
     public WriteTfIdf(String output) {
@@ -418,8 +402,6 @@ public WriteTfIdf(String output) {
     public PDone apply(PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf) {
       return wordToUriAndTfIdf
           .apply(ParDo.named("Format").of(new DoFn<KV<String, KV<URI, Double>>, String>() {
-            private static final long serialVersionUID = 0;
-
             @Override
             public void processElement(ProcessContext c) {
               c.output(String.format("%s,\t%s,\t%f",
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java
index 890f0fc2d1dd9..c57a5f2aa6022 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java
@@ -85,8 +85,6 @@ public class TopWikipediaSessions {
    * Extracts user and timestamp from a TableRow representing a Wikipedia edit.
    */
   static class ExtractUserAndTimestamp extends DoFn<TableRow, String> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       TableRow row = c.element();
@@ -105,8 +103,6 @@ public void processElement(ProcessContext c) {
    */
   static class ComputeSessions
       extends PTransform<PCollection<String>, PCollection<KV<String, Long>>> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public PCollection<KV<String, Long>> apply(PCollection<String> actions) {
       return actions
@@ -121,16 +117,12 @@ public PCollection<KV<String, Long>> apply(PCollection<String> actions) {
    */
   private static class TopPerMonth
       extends PTransform<PCollection<KV<String, Long>>, PCollection<List<KV<String, Long>>>> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public PCollection<List<KV<String, Long>>> apply(PCollection<KV<String, Long>> sessions) {
       return sessions
         .apply(Window.<KV<String, Long>>into(CalendarWindows.months(1)))
 
           .apply(Top.of(1, new SerializableComparator<KV<String, Long>>() {
-                    private static final long serialVersionUID = 0;
-
                     @Override
                     public int compare(KV<String, Long> o1, KV<String, Long> o2) {
                       return Long.compare(o1.getValue(), o2.getValue());
@@ -142,8 +134,6 @@ public int compare(KV<String, Long> o1, KV<String, Long> o2) {
   static class SessionsToStringsDoFn extends DoFn<KV<String, Long>, KV<String, Long>>
       implements RequiresWindowAccess {
 
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       c.output(KV.of(
@@ -153,8 +143,6 @@ public void processElement(ProcessContext c) {
 
   static class FormatOutputDoFn extends DoFn<List<KV<String, Long>>, String>
       implements RequiresWindowAccess {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       for (KV<String, Long> item : c.element()) {
@@ -167,8 +155,6 @@ public void processElement(ProcessContext c) {
 
   static class ComputeTopSessions extends PTransform<PCollection<TableRow>, PCollection<String>> {
 
-    private static final long serialVersionUID = 0;
-
     private final double samplingThreshold;
 
     public ComputeTopSessions(double samplingThreshold) {
@@ -182,8 +168,6 @@ public PCollection<String> apply(PCollection<TableRow> input) {
 
           .apply(ParDo.named("SampleUsers").of(
               new DoFn<String, String>() {
-                private static final long serialVersionUID = 0;
-
                 @Override
                 public void processElement(ProcessContext c) {
                   if (Math.abs(c.element().hashCode()) <= Integer.MAX_VALUE * samplingThreshold) {
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
index f185dc8dcf79a..31fef75d26b53 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
@@ -161,7 +161,6 @@ public Integer getTotalFlow() {
    * point comes from.
    */
   static class ExtractFlowInfoFn extends DoFn<String, KV<String, LaneInfo>> {
-    private static final long serialVersionUID = 0;
     private static final DateTimeFormatter dateTimeFormat =
         DateTimeFormat.forPattern("MM/dd/yyyy HH:mm:ss");
 
@@ -215,8 +214,6 @@ public void processElement(ProcessContext c) {
    * value.
    */
   public static class MaxFlow implements SerializableFunction<Iterable<LaneInfo>, LaneInfo> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public LaneInfo apply(Iterable<LaneInfo> input) {
       Integer max = 0;
@@ -237,8 +234,6 @@ public LaneInfo apply(Iterable<LaneInfo> input) {
    * Add the timestamp from the window context.
    */
   static class FormatMaxesFn extends DoFn<KV<String, LaneInfo>, TableRow> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
 
@@ -281,8 +276,6 @@ static TableSchema getSchema() {
    */
   static class MaxLaneFlow
       extends PTransform<PCollection<KV<String, LaneInfo>>, PCollection<TableRow>> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public PCollection<TableRow> apply(PCollection<KV<String, LaneInfo>> flowInfo) {
       // stationId, LaneInfo => stationId + max lane flow info
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
index e8abb62bfd2b0..cd0ba7602278c 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
@@ -163,7 +163,6 @@ public Boolean getSlowdownEvent() {
    * (station, speed info) keyed on route.
    */
   static class ExtractStationSpeedFn extends DoFn<String, KV<String, StationSpeed>> {
-    private static final long serialVersionUID = 0;
     private static final DateTimeFormatter dateTimeFormat =
         DateTimeFormat.forPattern("MM/dd/yyyy HH:mm:ss");
 
@@ -211,8 +210,6 @@ public void processElement(ProcessContext c) {
    */
   static class GatherStats
       extends DoFn<KV<String, Iterable<StationSpeed>>, KV<String, RouteInfo>> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) throws IOException {
       String route = c.element().getKey();
@@ -256,8 +253,6 @@ public void processElement(ProcessContext c) throws IOException {
    * Format the results of the slowdown calculations to a TableRow, to save to BigQuery.
    */
   static class FormatStatsFn extends DoFn<KV<String, RouteInfo>, TableRow> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       RouteInfo routeInfo = c.element().getValue();
@@ -290,8 +285,6 @@ static TableSchema getSchema() {
    */
   static class TrackSpeed extends
       PTransform<PCollection<KV<String, StationSpeed>>, PCollection<TableRow>> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public PCollection<TableRow> apply(PCollection<KV<String, StationSpeed>> stationSpeed) {
       // Apply a GroupByKey transform to collect a list of all station
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java
index 40a92781c5938..503bcadf5332b 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java
@@ -80,8 +80,6 @@ public class BigQueryTornadoes {
    * in that sample, the month in which it occurred is output.
    */
   static class ExtractTornadoesFn extends DoFn<TableRow, Integer> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c){
       TableRow row = c.element();
@@ -96,8 +94,6 @@ public void processElement(ProcessContext c){
    * integer representation of month and the number of tornadoes that occurred in each month.
    */
   static class FormatCountsFn extends DoFn<KV<Integer, Long>, TableRow> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       TableRow row = new TableRow()
@@ -121,8 +117,6 @@ public void processElement(ProcessContext c) {
    */
   static class CountTornadoes
       extends PTransform<PCollection<TableRow>, PCollection<TableRow>> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public PCollection<TableRow> apply(PCollection<TableRow> rows) {
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java
index 4a68955325c6f..9540dd4482265 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java
@@ -89,8 +89,6 @@ public class CombinePerKeyExamples {
    * outputs word, play_name.
    */
   static class ExtractLargeWordsFn extends DoFn<TableRow, KV<String, String>> {
-    private static final long serialVersionUID = 0;
-
     private final Aggregator<Long, Long> smallerWords =
         createAggregator("smallerWords", new Sum.SumLongFn());
 
@@ -115,8 +113,6 @@ public void processElement(ProcessContext c){
    * containing a word with a string listing the plays in which it appeared.
    */
   static class FormatShakespeareOutputFn extends DoFn<KV<String, String>, TableRow> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       TableRow row = new TableRow()
@@ -138,8 +134,6 @@ public void processElement(ProcessContext c) {
    */
   static class PlaysForWord
       extends PTransform<PCollection<TableRow>, PCollection<TableRow>> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public PCollection<TableRow> apply(PCollection<TableRow> rows) {
 
@@ -167,8 +161,6 @@ public PCollection<TableRow> apply(PCollection<TableRow> rows) {
    * word has appeared.
    */
   public static class ConcatWords implements SerializableFunction<Iterable<String>, String> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public String apply(Iterable<String> input) {
       StringBuilder all = new StringBuilder();
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
index a0c2b798bd258..f3f71689e79da 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
@@ -73,8 +73,6 @@ public class DatastoreWordCount {
    * Shakespeare play) and converts it to a string.
    */
   static class GetContentFn extends DoFn<Entity, String> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       Map<String, Value> props = DatastoreHelper.getPropertyMap(c.element());
@@ -89,8 +87,6 @@ public void processElement(ProcessContext c) {
    * A DoFn that creates entity for every line in Shakespeare.
    */
   static class CreateEntityFn extends DoFn<String, Entity> {
-    private static final long serialVersionUID = 0;
-
     private String kind;
 
     CreateEntityFn(String kind) {
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java
index bde934858fb8d..781873a078837 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java
@@ -97,8 +97,6 @@ public class FilterExamples {
    * is interested in-- the mean_temp and year, month, and day-- as a bigquery table row.
    */
   static class ProjectionFn extends DoFn<TableRow, TableRow> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c){
       TableRow row = c.element();
@@ -122,8 +120,6 @@ public void processElement(ProcessContext c){
    * monthFilter, which is passed in as a parameter during construction of this DoFn.
    */
   static class FilterSingleMonthDataFn extends DoFn<TableRow, TableRow> {
-    private static final long serialVersionUID = 0;
-
     Integer monthFilter;
 
     public FilterSingleMonthDataFn(Integer monthFilter) {
@@ -146,8 +142,6 @@ public void processElement(ProcessContext c){
    * reading for that row ('mean_temp').
    */
   static class ExtractTempFn extends DoFn<TableRow, Double> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c){
       TableRow row = c.element();
@@ -164,8 +158,6 @@ public void processElement(ProcessContext c){
    **/
   static class BelowGlobalMean
       extends PTransform<PCollection<TableRow>, PCollection<TableRow>> {
-    private static final long serialVersionUID = 0;
-
     Integer monthFilter;
 
     public BelowGlobalMean(Integer monthFilter) {
@@ -199,8 +191,6 @@ public PCollection<TableRow> apply(PCollection<TableRow> rows) {
               .named("ParseAndFilter")
               .withSideInputs(globalMeanTemp)
               .of(new DoFn<TableRow, TableRow>() {
-                private static final long serialVersionUID = 0;
-
                 @Override
                 public void processElement(ProcessContext c) {
                   Double meanTemp = Double.parseDouble(c.element().get("mean_temp").toString());
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java
index dc7d3f6488fac..745c5d6719dba 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java
@@ -98,8 +98,6 @@ static PCollection<String> joinEvents(PCollection<TableRow> eventsTable,
     PCollection<KV<String, String>> finalResultCollection =
       kvpCollection.apply(ParDo.named("Process").of(
         new DoFn<KV<String, CoGbkResult>, KV<String, String>>() {
-          private static final long serialVersionUID = 0;
-
           @Override
           public void processElement(ProcessContext c) {
             KV<String, CoGbkResult> e = c.element();
@@ -117,8 +115,6 @@ public void processElement(ProcessContext c) {
     // write to GCS
     PCollection<String> formattedResults = finalResultCollection
         .apply(ParDo.named("Format").of(new DoFn<KV<String, String>, String>() {
-          private static final long serialVersionUID = 0;
-
           @Override
           public void processElement(ProcessContext c) {
             String outputstring = "Country code: " + c.element().getKey()
@@ -134,8 +130,6 @@ public void processElement(ProcessContext c) {
    * code of the event, and the value a string encoding event information.
    */
   static class ExtractEventDataFn extends DoFn<TableRow, KV<String, String>> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       TableRow row = c.element();
@@ -154,8 +148,6 @@ public void processElement(ProcessContext c) {
    * code, and the value the country name.
    */
   static class ExtractCountryInfoFn extends DoFn<TableRow, KV<String, String>> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       TableRow row = c.element();
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java
index 739679aed5e1a..1c26d0f19e30c 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java
@@ -81,8 +81,6 @@ public class MaxPerKeyExamples {
    * and the mean_temp.
    */
   static class ExtractTempFn extends DoFn<TableRow, KV<Integer, Double>> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       TableRow row = c.element();
@@ -97,8 +95,6 @@ public void processElement(ProcessContext c) {
    *
    */
   static class FormatMaxesFn extends DoFn<KV<Integer, Double>, TableRow> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       TableRow row = new TableRow()
@@ -114,8 +110,6 @@ public void processElement(ProcessContext c) {
    */
   static class MaxMeanTemp
       extends PTransform<PCollection<TableRow>, PCollection<TableRow>> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public PCollection<TableRow> apply(PCollection<TableRow> rows) {
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
index 4b318fa305c8c..39ca97d5187b8 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
@@ -180,8 +180,6 @@ public class TriggerExample {
    */
   static class CalculateTotalFlow
   extends PTransform <PCollection<KV<String, Integer>>, PCollectionList<TableRow>> {
-    private static final long serialVersionUID = 0;
-
     private int windowDuration;
 
     CalculateTotalFlow(int windowDuration) {
@@ -343,7 +341,6 @@ public PCollectionList<TableRow> apply(PCollection<KV<String, Integer>> flowInfo
    */
   static class TotalFlow extends
   PTransform <PCollection<KV<String, Integer>>, PCollection<TableRow>> {
-    private static final long serialVersionUID = 0;
     private String triggerType;
 
     public TotalFlow(String triggerType) {
@@ -358,7 +355,6 @@ public PCollection<TableRow> apply(PCollection<KV<String, Integer>> flowInfo) {
       PCollection<KV<String, String>> results = flowPerFreeway.apply(ParDo.of(
           new DoFn <KV<String, Iterable<Integer>>, KV<String, String>>() {
 
-            private static final long serialVersionUID = 0;
             @Override
             public void processElement(ProcessContext c) throws Exception {
               Iterable<Integer> flows = c.element().getValue();
@@ -382,7 +378,6 @@ public void processElement(ProcessContext c) throws Exception {
    * */
   static class FormatTotalFlow extends DoFn<KV<String, String>, TableRow>
   implements  RequiresWindowAccess {
-    private static final long serialVersionUID = 0;
     private String triggerType;
 
     public FormatTotalFlow(String triggerType) {
@@ -411,8 +406,6 @@ public void processElement(ProcessContext c) throws Exception {
    * Freeway is used as key since we are calculating the total flow for each freeway.
    */
   static class ExtractFlowInfo extends DoFn<String, KV<String, Integer>> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) throws Exception {
       String[] laneInfo = c.element().split(",");
@@ -516,8 +509,6 @@ private static Pipeline runInjector(TrafficFlowOptions options){
    * Also insert a delay at random to demo the triggers.
    */
   public static class InsertDelays extends DoFn<String, String> {
-    private static final long serialVersionUID = 0;
-
     private static final double THRESHOLD = 0.001;
     // MIN_DELAY and MAX_DELAY in minutes.
     private static final int MIN_DELAY = 1;
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
index cb5f9d2be4a70..20dbdc41cd01c 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
@@ -50,8 +50,6 @@
  */
 @RunWith(Parameterized.class)
 public class AutoCompleteTest implements Serializable {
-  private static final long serialVersionUID = 0;
-
   private boolean recursive;
 
   public AutoCompleteTest(Boolean recursive) {
@@ -88,8 +86,6 @@ public void testAutoComplete() {
       input.apply(new ComputeTopCompletions(2, recursive))
            .apply(Filter.by(
                         new SerializableFunction<KV<String, List<CompletionCandidate>>, Boolean>() {
-                          private static final long serialVersionUID = 0;
-
                           @Override
                           public Boolean apply(KV<String, List<CompletionCandidate>> element) {
                             return element.getKey().length() <= 2;
@@ -172,13 +168,9 @@ private static List<CompletionCandidate> parseList(String... entries) {
 
   private static class ReifyTimestamps<T>
       extends PTransform<PCollection<TimestampedValue<T>>, PCollection<T>> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public PCollection<T> apply(PCollection<TimestampedValue<T>> input) {
       return input.apply(ParDo.of(new DoFn<TimestampedValue<T>, T>() {
-        private static final long serialVersionUID = 0;
-
         @Override
         public void processElement(ProcessContext c) {
           c.outputWithTimestamp(c.element().getValue(), c.element().getTimestamp());
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/TriggerExampleTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/TriggerExampleTest.java
index 495d5b806a54f..209ea521bd9d5 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/TriggerExampleTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/TriggerExampleTest.java
@@ -119,8 +119,6 @@ public void testTotalFlow () {
   }
 
   static class FormatResults extends DoFn<TableRow, TableRow> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) throws Exception {
       TableRow element = c.element();
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
index 2f640153db469..3cf2bc0dffa3f 100644
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
+++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
@@ -106,7 +106,6 @@
 public class DebuggingWordCount {
   /** A DoFn that filters for a specific key based upon a regular expression. */
   public static class FilterTextFn extends DoFn<KV<String, Long>, KV<String, Long>> {
-    private static final long serialVersionUID = 0;
     /**
      * Concept #1: The logger below uses the fully qualified class name of FilterTextFn
      * as the logger. All log statements emitted by this logger will be referenced by this name
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java
index 7de32c3974de2..035db01e4a802 100644
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java
+++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java
@@ -82,7 +82,6 @@ public static void main(String[] args) {
      // The ParDo returns a PCollection<String>, where each element is an individual word in
      // Shakespeare's collected texts.
      .apply(ParDo.named("ExtractWords").of(new DoFn<String, String>() {
-                       private static final long serialVersionUID = 0;
                        @Override
                        public void processElement(ProcessContext c) {
                          for (String word : c.element().split("[^a-zA-Z']+")) {
@@ -99,7 +98,6 @@ public void processElement(ProcessContext c) {
      // Apply another ParDo transform that formats our PCollection of word counts into a printable
      // string, suitable for writing to an output file.
      .apply(ParDo.named("FormatResults").of(new DoFn<KV<String, Long>, String>() {
-                       private static final long serialVersionUID = 0;
                        @Override
                        public void processElement(ProcessContext c) {
                          c.output(c.element().getKey() + ": " + c.element().getValue());
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java
index c4c9e39f2355d..29921e235fb19 100644
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java
+++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java
@@ -119,7 +119,6 @@ public class WindowedWordCount {
    * 2-hour period.
    */
   static class AddTimestampFn extends DoFn<String, String> {
-    private static final long serialVersionUID = 0;
     private static final long RAND_RANGE = 7200000; // 2 hours in ms
 
     @Override
@@ -136,8 +135,6 @@ public void processElement(ProcessContext c) {
 
   /** A DoFn that converts a Word and Count into a BigQuery table row. */
   static class FormatAsTableRowFn extends DoFn<KV<String, Long>, TableRow> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       TableRow row = new TableRow()
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java
index 251001f9f2d5f..150b60d2d25f8 100644
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java
+++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java
@@ -95,8 +95,6 @@ public class WordCount {
    * pipeline.
    */
   static class ExtractWordsFn extends DoFn<String, String> {
-    private static final long serialVersionUID = 0;
-
     private final Aggregator<Long, Long> emptyLines =
         createAggregator("emptyLines", new Sum.SumLongFn());
 
@@ -120,8 +118,6 @@ public void processElement(ProcessContext c) {
 
   /** A DoFn that converts a Word and Count into a printable string. */
   public static class FormatAsTextFn extends DoFn<KV<String, Long>, String> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       c.output(c.element().getKey() + ": " + c.element().getValue());
@@ -138,8 +134,6 @@ public void processElement(ProcessContext c) {
    */
   public static class CountWords extends PTransform<PCollection<String>,
       PCollection<KV<String, Long>>> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public PCollection<KV<String, Long>> apply(PCollection<String> lines) {
 
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/PubsubFileInjector.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/PubsubFileInjector.java
index cab116e76fbfe..f6f80aec7d645 100644
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/PubsubFileInjector.java
+++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/PubsubFileInjector.java
@@ -71,8 +71,6 @@ public Bound publish(String outputTopic) {
 
   /** A DoFn that publishes non-empty lines to Google Cloud PubSub. */
   public static class Bound extends DoFn<String, Void> {
-    private static final long serialVersionUID = 0;
-
     private final String outputTopic;
     private final String timestampLabelKey;
     public transient Pubsub pubsub;
diff --git a/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java b/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java
index 3103db91fb671..4ed3a3277b646 100644
--- a/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java
+++ b/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java
@@ -43,7 +43,6 @@
  *   --runner=BlockingDataflowPipelineRunner
  * In Eclipse, you can just modify the existing 'SERVICE' run configuration.
  */
-@SuppressWarnings("serial")
 public class StarterPipeline {
   private static final Logger LOG = LoggerFactory.getLogger(StarterPipeline.class);
 
diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java b/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java
index ae6696f9f782a..8edfd40583262 100644
--- a/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java
+++ b/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java
@@ -43,7 +43,6 @@
  *   --runner=BlockingDataflowPipelineRunner
  * In Eclipse, you can just modify the existing 'SERVICE' run configuration.
  */
-@SuppressWarnings("serial")
 public class StarterPipeline {
   private static final Logger LOG = LoggerFactory.getLogger(StarterPipeline.class);
 
diff --git a/pom.xml b/pom.xml
index 8efd258e91b13..8d28466d1e0fd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -106,6 +106,7 @@
               <arg>-Xlint:-deprecation</arg>
               <arg>-Xlint:-processing</arg>
               <arg>-Xlint:-rawtypes</arg>
+              <arg>-Xlint:-serial</arg>
               <arg>-Xlint:-try</arg>
               <arg>-Xlint:-unchecked</arg>
               <arg>-Xlint:-varargs</arg>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index 8828806798c28..f2e7d72804c7b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -111,8 +111,6 @@ public class Pipeline {
    * <p>The exception thrown during pipeline execution may be retrieved via {@link #getCause}.
    */
   public static class PipelineExecutionException extends RuntimeException {
-    private static final long serialVersionUID = 0L;
-
     /**
      * Wraps {@code cause} into a {@code PipelineExecutionException}.
      */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
index 4353f42279e2f..0c34ec0662d8d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
@@ -29,8 +29,6 @@
  * @param <T> the type of the values being transcoded
  */
 public abstract class AtomicCoder<T> extends DeterministicStandardCoder<T> {
-  private static final long serialVersionUID = 0;
-
   protected AtomicCoder() { }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
index 43872b6f77e06..77757e44fa6b2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -108,8 +108,6 @@
  */
 public class AvroCoder<T> extends StandardCoder<T> {
 
-  private static final long serialVersionUID = 0L;
-
   /**
    * Returns an {@code AvroCoder} instance for the provided element class.
    * @param <T> the element type
@@ -295,8 +293,6 @@ public Schema getSchema() {
    * to remain final.
    */
   private static class SerializedAvroCoderProxy<T> implements Serializable {
-    private static final long serialVersionUID = 0L;
-
     private final Class<T> type;
     private final String schemaStr;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
index ae4c316cd4ba3..9fd69bb25ab4d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
@@ -31,8 +31,6 @@
  */
 public class BigEndianIntegerCoder extends AtomicCoder<Integer> {
 
-  private static final long serialVersionUID = 0L;
-
   @JsonCreator
   public static BigEndianIntegerCoder of() {
     return INSTANCE;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
index ecba95fbec01d..75ece5de6f9aa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
@@ -31,8 +31,6 @@
  */
 public class BigEndianLongCoder extends AtomicCoder<Long> {
 
-  private static final long serialVersionUID = 0L;
-
   @JsonCreator
   public static BigEndianLongCoder of() {
     return INSTANCE;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
index b73e4a415b7b6..028b621819692 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
@@ -36,8 +36,6 @@
  */
 public class ByteArrayCoder extends AtomicCoder<byte[]> {
 
-  private static final long serialVersionUID = 0L;
-
   @JsonCreator
   public static ByteArrayCoder of() {
     return INSTANCE;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CannotProvideCoderException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CannotProvideCoderException.java
index db5318f1ba11d..9ad8319d64fa0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CannotProvideCoderException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CannotProvideCoderException.java
@@ -21,7 +21,6 @@
  * provide a {@link Coder} that has been requested.
  */
 public class CannotProvideCoderException extends Exception {
-  private static final long serialVersionUID = 0;
   private final ReasonCode reason;
 
   public CannotProvideCoderException(String message) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
index e33754d558e97..5d11f8e89301f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
@@ -230,8 +230,6 @@ public void registerByteSizeObserver(
    * not deterministic.
    */
   public static class NonDeterministicException extends Throwable {
-    private static final long serialVersionUID = 0;
-
     private Coder<?> coder;
     private List<String> reasons;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java
index 09da8dd81ad44..f9565afa80af0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java
@@ -23,8 +23,6 @@
  * decoding a value.
  */
 public class CoderException extends IOException {
-  private static final long serialVersionUID = 0;
-
   public CoderException(String message) {
     super(message);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index d07340af27f16..e0cefe891a41b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -420,7 +420,6 @@ private <T> Coder<?>[] getDefaultCoders(
    * for that type.
    */
   static class IncompatibleCoderException extends RuntimeException {
-    private static final long serialVersionUID = 0L;
     private Coder<?> coder;
     private Type type;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
index ce9817533a714..b57562d64158b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
@@ -32,8 +32,6 @@
  */
 public class CollectionCoder<T> extends IterableLikeCoder<T, Collection<T>> {
 
-  private static final long serialVersionUID = 0L;
-
   public static <T> CollectionCoder<T> of(Coder<T> elemCoder) {
     return new CollectionCoder<>(elemCoder);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
index 42434004eb3c3..6529fdc1b1dfa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
@@ -47,8 +47,6 @@
  */
 public abstract class CustomCoder<T> extends AtomicCoder<T>
     implements Serializable {
-  private static final long serialVersionUID = 0;
-
   @JsonCreator
   public static CustomCoder<?> of(
       // N.B. typeId is a required parameter here, since a field named "@type"
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
index 414e310c14198..941dede33bfa7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
@@ -42,8 +42,6 @@
  * @param <IntermediateT> The type of objects a {@code T} will be converted to for coding.
  */
 public class DelegateCoder<T, IntermediateT> extends CustomCoder<T> {
-  private static final long serialVersionUID = 0;
-
   /**
    * A {@code CodingFunction<InputT, OutputT>} is a serializable function
    * from {@code InputT} to {@code OutputT} that
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DeterministicStandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DeterministicStandardCoder.java
index 0977af742a5d5..5cdd2e6ad71a7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DeterministicStandardCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DeterministicStandardCoder.java
@@ -25,8 +25,6 @@
  * @param <T> the type of the values being transcoded
  */
 public abstract class DeterministicStandardCoder<T> extends StandardCoder<T> {
-  private static final long serialVersionUID = 0;
-
   protected DeterministicStandardCoder() {}
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
index 46fa335759dd1..d7bb6cc52d987 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
@@ -31,8 +31,6 @@
  */
 public class DoubleCoder extends AtomicCoder<Double> {
 
-  private static final long serialVersionUID = 0L;
-
   @JsonCreator
   public static DoubleCoder of() {
     return INSTANCE;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DurationCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DurationCoder.java
index cc6890433bfab..3bca54cc2eac0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DurationCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DurationCoder.java
@@ -32,8 +32,6 @@
  */
 public class DurationCoder extends AtomicCoder<ReadableDuration> {
 
-  private static final long serialVersionUID = 0L;
-
   @JsonCreator
   public static DurationCoder of() {
     return INSTANCE;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
index 05e1a433ca96d..19ffbcccefe3a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
@@ -29,8 +29,6 @@
  */
 public class EntityCoder extends AtomicCoder<Entity> {
 
-  private static final long serialVersionUID = 0L;
-
   @JsonCreator
   public static EntityCoder of() {
     return INSTANCE;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
index 8e823a83e71fd..a9567dff80fe3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
@@ -32,8 +32,6 @@
  */
 public class InstantCoder extends AtomicCoder<Instant> {
 
-  private static final long serialVersionUID = 0L;
-
   @JsonCreator
   public static InstantCoder of() {
     return INSTANCE;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
index 5a59a6576e28b..5982b7b19d01a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
@@ -36,8 +36,6 @@
  */
 public class IterableCoder<T> extends IterableLikeCoder<T, Iterable<T>> {
 
-  private static final long serialVersionUID = 0L;
-
   public static <T> IterableCoder<T> of(Coder<T> elemCoder) {
     return new IterableCoder<>(elemCoder);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
index 193d0aa52a00a..dd6cce9098e44 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
@@ -41,8 +41,6 @@
  */
 public abstract class IterableLikeCoder<T, IterableT extends Iterable<T>>
     extends StandardCoder<IterableT> {
-  private static final long serialVersionUID = 0;
-
   public Coder<T> getElemCoder() {
     return elementCoder;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java
index b2c5c0546d92c..7275b81007bf3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java
@@ -37,8 +37,6 @@
  */
 public class JAXBCoder<T> extends AtomicCoder<T> {
 
-  private static final long serialVersionUID = 0L;
-
   private final Class<T> jaxbClass;
   private transient Marshaller jaxbMarshaller = null;
   private transient Unmarshaller jaxbUnmarshaller = null;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
index 168b980d883f7..a104b6eae49c3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
@@ -40,8 +40,6 @@
  * @param <V> the type of the values of the KVs being transcoded
  */
 public class KvCoder<K, V> extends KvCoderBase<KV<K, V>> {
-  private static final long serialVersionUID = 0;
-
   public static <K, V> KvCoder<K, V> of(Coder<K> keyCoder,
                                         Coder<V> valueCoder) {
     return new KvCoder<>(keyCoder, valueCoder);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java
index 46e026252b0af..d33da00d5c8bb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java
@@ -32,8 +32,6 @@
  * @param <T> the type of values being transcoded
  */
 public abstract class KvCoderBase<T> extends StandardCoder<T> {
-  private static final long serialVersionUID = 0;
-
   /**
    * A constructor used only for decoding from JSON.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
index 79f0615e7e8ea..2a9af4707efc6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
@@ -31,8 +31,6 @@
  */
 public class ListCoder<T> extends IterableLikeCoder<T, List<T>> {
 
-  private static final long serialVersionUID = 0L;
-
   public static <T> ListCoder<T> of(Coder<T> elemCoder) {
     return new ListCoder<>(elemCoder);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
index 22f126a6544d4..55738f0dea7dd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
@@ -40,7 +40,6 @@
  * @param <K> the type of the keys of the KVs being transcoded
  * @param <V> the type of the values of the KVs being transcoded
  */
-@SuppressWarnings("serial")
 public class MapCoder<K, V> extends MapCoderBase<Map<K, V>> {
   /**
    * Produces a MapCoder with the given keyCoder and valueCoder.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java
index d187fa3b04fe7..7a6a99790e906 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java
@@ -31,8 +31,6 @@
  * @param <T> the type of values being transcoded
  */
 public abstract class MapCoderBase<T> extends StandardCoder<T> {
-  private static final long serialVersionUID = 0;
-
   @JsonCreator
   public static MapCoderBase<?> of(
       // N.B. typeId is a required parameter here, since a field named "@type"
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/NullableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/NullableCoder.java
index b02a0197cd4f0..571445b16288e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/NullableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/NullableCoder.java
@@ -41,8 +41,6 @@
  * @param <T> the type of the values being transcoded
  */
 public class NullableCoder<T> extends StandardCoder<T> {
-  private static final long serialVersionUID = 0;
-
   public static <T> NullableCoder<T> of(Coder<T> valueCoder) {
     return new NullableCoder<>(valueCoder);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
index c05719070bd90..3c36ef3b79e75 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
@@ -72,8 +72,6 @@
  */
 public class Proto2Coder<T extends Message> extends AtomicCoder<T> {
 
-  private static final long serialVersionUID = 0;
-
   /** The class of Protobuf message to be encoded. */
   private final Class<T> protoMessageClass;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
index 23eed5d3403df..4ad0189f2f4e7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
@@ -50,8 +50,6 @@
  */
 public class SerializableCoder<T extends Serializable> extends AtomicCoder<T> {
 
-  private static final long serialVersionUID = 0L;
-
   /**
    * Returns a {@code SerializableCoder} instance for the provided element class.
    * @param <T> the element type
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
index abf379b76ea30..35b4974c5dc47 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
@@ -36,8 +36,6 @@
  */
 public class SetCoder<T> extends IterableLikeCoder<T, Set<T>> {
 
-  private static final long serialVersionUID = 0L;
-
   /**
    * Produces a SetCoder with the given elementCoder.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
index 6dbe4f9f75010..85722d18b0628 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
@@ -44,8 +44,6 @@
  * @param <T> the type of the values being transcoded
  */
 public abstract class StandardCoder<T> implements Coder<T> {
-  private static final long serialVersionUID = 0;
-
   protected StandardCoder() {}
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
index 1bdbd04fe185e..08246bb74f9a6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
@@ -41,8 +41,6 @@
  * @param <T> The type of objects coded.
  */
 public class StringDelegateCoder<T> extends DelegateCoder<T, String> {
-  private static final long serialVersionUID = 0;
-
   public static <T> StringDelegateCoder<T> of(Class<T> clazz) {
     return new StringDelegateCoder<T>(clazz);
   }
@@ -57,16 +55,12 @@ public String toString() {
   protected StringDelegateCoder(final Class<T> clazz) {
     super(StringUtf8Coder.of(),
       new CodingFunction<T, String>() {
-        private static final long serialVersionUID = 0;
-
         @Override
         public String apply(T input) {
           return input.toString();
         }
       },
       new CodingFunction<String, T>() {
-        private static final long serialVersionUID = 0;
-
         @Override
         public T apply(String input) throws
             NoSuchMethodException,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
index 9650bd07706cd..bc14afd94a7c9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
@@ -37,7 +37,6 @@
  * If in a nested context, prefixes the string with an integer length field,
  * encoded via the {@link VarIntCoder}.
  */
-@SuppressWarnings("serial")
 public class StringUtf8Coder extends AtomicCoder<String> {
 
   @JsonCreator
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
index e3b1be825526a..8e971752f1860 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
@@ -29,7 +29,6 @@
 /**
  * A {@code TableRowJsonCoder} encodes BigQuery {@link TableRow} objects.
  */
-@SuppressWarnings("serial")
 public class TableRowJsonCoder extends AtomicCoder<TableRow> {
 
   @JsonCreator
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
index 772d23791dacc..e06b5277322c6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
@@ -28,8 +28,6 @@
  */
 public class TextualIntegerCoder extends AtomicCoder<Integer> {
 
-  private static final long serialVersionUID = 0L;
-
   @JsonCreator
   public static TextualIntegerCoder of() {
     return new TextualIntegerCoder();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
index 555dbbac8d1b8..0211810bc9343 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
@@ -33,8 +33,6 @@
  */
 public class VarIntCoder extends AtomicCoder<Integer> {
 
-  private static final long serialVersionUID = 0L;
-
   @JsonCreator
   public static VarIntCoder of() {
     return INSTANCE;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
index 624334e3b0bd9..e0c087c6443cf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
@@ -33,8 +33,6 @@
  */
 public class VarLongCoder extends AtomicCoder<Long> {
 
-  private static final long serialVersionUID = 0L;
-
   @JsonCreator
   public static VarLongCoder of() {
     return INSTANCE;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
index 2e62d5554250c..2fd7b1b1c62ed 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
@@ -24,7 +24,6 @@
 /**
  * A {@code VoidCoder} encodes {@code Void}s. Uses zero bytes per {@code Void}.
  */
-@SuppressWarnings("serial")
 public class VoidCoder extends AtomicCoder<Void> {
 
   @JsonCreator
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index ebc158fa0c475..b08985a031bfa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -198,8 +198,6 @@ public static Bound<GenericRecord> withoutValidation() {
      * PCollection
      */
     public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
-      private static final long serialVersionUID = 0;
-
       /** The filepattern to read from. */
       @Nullable
       final String filepattern;
@@ -457,8 +455,6 @@ public static Bound<GenericRecord> withoutValidation() {
      * @param <T> the type of each of the elements of the input PCollection
      */
     public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
-      private static final long serialVersionUID = 0;
-
       /** The filename to write to. */
       @Nullable
       final String filenamePrefix;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
index 0cbf8f7405491..632e31e16eadc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
@@ -113,8 +113,6 @@
 // JAVADOCSTYLE ON
 @Experimental(Experimental.Kind.SOURCE_SINK)
 public class AvroSource<T> extends BlockBasedSource<T> {
-  private static final long serialVersionUID = 0;
-
   // Default minimum bundle size (chosen as two default-size Avro blocks to attempt to
   // ensure that every source has at least one block of records).
   // The default sync interval is 64k.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index e5c3527a9f62d..e01db45031bf1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -330,8 +330,6 @@ public static Bound withoutValidation() {
      * {@link PCollection} of {@link TableRow TableRows}.
      */
     public static class Bound extends PTransform<PInput, PCollection<TableRow>> {
-      private static final long serialVersionUID = 0;
-
       TableReference table;
       final String query;
       final boolean validate;
@@ -662,8 +660,6 @@ public static Bound withoutValidation() {
      * {@link PCollection} of {@link TableRow TableRows} to a BigQuery table.
      */
     public static class Bound extends PTransform<PCollection<TableRow>, PDone> {
-      private static final long serialVersionUID = 0;
-
       final TableReference table;
 
       final SerializableFunction<BoundedWindow, TableReference> tableRefFunction;
@@ -684,8 +680,6 @@ public static class Bound extends PTransform<PCollection<TableRow>, PDone> {
 
       private static class TranslateTableSpecFunction implements
           SerializableFunction<BoundedWindow, TableReference> {
-        private static final long serialVersionUID = 0;
-
         private SerializableFunction<BoundedWindow, String> tableSpecFunction;
 
         TranslateTableSpecFunction(SerializableFunction<BoundedWindow, String> tableSpecFunction) {
@@ -959,8 +953,6 @@ public static void verifyTablePresence(BigQueryOptions options, TableReference t
    */
   private static class StreamingWriteFn
       extends DoFn<KV<ShardedKey<String>, TableRowInfo>, Void> {
-    private static final long serialVersionUID = 0;
-
     /** TableSchema in JSON.  Use String to make the class Serializable. */
     private final String jsonTableSchema;
 
@@ -1078,8 +1070,6 @@ public int getShardNumber() {
    */
   public static class ShardedKeyCoder<KeyT>
       extends StandardCoder<ShardedKey<KeyT>> {
-    private static final long serialVersionUID = 0;
-
     public static <KeyT> ShardedKeyCoder<KeyT> of(Coder<KeyT> keyCoder) {
       return new ShardedKeyCoder<>(keyCoder);
     }
@@ -1128,7 +1118,6 @@ public void verifyDeterministic() throws NonDeterministicException {
   }
 
   private static class TableRowInfoCoder extends AtomicCoder<TableRowInfo> {
-    private static final long serialVersionUID = 0;
     private static final TableRowInfoCoder INSTANCE = new TableRowInfoCoder();
 
     @JsonCreator
@@ -1184,8 +1173,6 @@ private static class TableRowInfo {
   private static class TagWithUniqueIdsAndTable
       extends DoFn<TableRow, KV<ShardedKey<String>, TableRowInfo>>
       implements DoFn.RequiresWindowAccess {
-    private static final long serialVersionUID = 0;
-
     /** TableSpec to write to. */
     private final String tableSpec;
 
@@ -1249,8 +1236,6 @@ private String tableSpecFromWindow(BigQueryOptions options, BoundedWindow window
   * it leverages BigQuery best effort de-dup mechanism.
    */
   private static class StreamWithDeDup extends PTransform<PCollection<TableRow>, PDone> {
-    private static final long serialVersionUID = 0;
-
     private final transient TableReference tableReference;
     private final SerializableFunction<BoundedWindow, TableReference> tableRefFunction;
     private final transient TableSchema tableSchema;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
index 8ec55499de0a2..f51d269d767db 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
@@ -58,8 +58,6 @@
  */
 @Experimental(Experimental.Kind.SOURCE_SINK)
 public abstract class BlockBasedSource<T> extends FileBasedSource<T> {
-  private static final long serialVersionUID = 0;
-
   /**
    * Creates a {@code BlockBasedSource} based on a file name or pattern. Subclasses must call this
    * constructor when creating a {@code BlockBasedSource} for a file pattern. See
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java
index a2b15b08a9192..c7e2423ad6c56 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java
@@ -46,7 +46,6 @@
  * <p>Created by {@link Read}.
  */
 class BoundedReadFromUnboundedSource<T> extends PTransform<PInput, PCollection<T>> {
-  private static final long serialVersionUID = 0L;
   private final UnboundedSource<T, ?> source;
   private final long maxNumRecords;
   private final Duration maxReadTime;
@@ -86,7 +85,6 @@ public PCollection<T> apply(PInput input) {
     if (source.requiresDeduping()) {
       read = read.apply(RemoveDuplicates.withRepresentativeValueFn(
           new SerializableFunction<ValueWithRecordId<T>, byte[]>() {
-            private static final long serialVersionUID = 0L;
             @Override
             public byte[] apply(ValueWithRecordId<T> input) {
               return input.getId();
@@ -108,7 +106,6 @@ public String getKindString() {
 
   private static class UnboundedToBoundedSourceAdapter<T>
       extends BoundedSource<ValueWithRecordId<T>> {
-    private static final long serialVersionUID = 0L;
     private final UnboundedSource<T, ?> source;
     private final long maxNumRecords;
     private final Duration maxReadTime;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
index 5aac4bf5c6fe4..e7b1807556ef7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
@@ -47,8 +47,6 @@
  * @param <T> Type of records read by the source.
  */
 public abstract class BoundedSource<T> extends Source<T> {
-  private static final long serialVersionUID = 0L;
-
   /**
    * Splits the source into bundles of approximately given size (in bytes).
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
index 57bd39e77288d..31393f1a5d8e9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
@@ -54,8 +54,6 @@
  */
 @Experimental(Experimental.Kind.SOURCE_SINK)
 public class CompressedSource<T> extends FileBasedSource<T> {
-  private static final long serialVersionUID = 0;
-
   /**
    * Factory interface for creating channels that decompress the content of an underlying channel.
    *
@@ -217,7 +215,6 @@ public final DecompressingChannelFactory getChannelFactory() {
    * @param <T> The type of records read from the source.
    */
   public static class CompressedReader<T> extends FileBasedReader<T> {
-    static final long serialVersionUID = 0;
 
     private final FileBasedReader<T> readerDelegate;
     private final CompressedSource<T> source;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index d917024a1672e..fb95bab6fce9f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -194,8 +194,6 @@ public static Read.Bounded<Entity> readFrom(String host, String datasetId, Query
    */
   public static class Source extends BoundedSource<Entity> {
     private static final Logger LOG = LoggerFactory.getLogger(Source.class);
-    private static final long serialVersionUID = 0;
-
     String host;
     String datasetId;
     Query query;
@@ -413,8 +411,6 @@ public static Write.Bound<Entity> writeTo(String datasetId) {
    *
    */
   public static class Sink extends com.google.cloud.dataflow.sdk.io.Sink<Entity> {
-    private static final long serialVersionUID = 0;
-
     final String host;
     final String datasetId;
 
@@ -464,7 +460,6 @@ public DatastoreWriteOperation createWriteOperation(PipelineOptions options) {
    */
   private static class DatastoreWriteOperation
       extends WriteOperation<Entity, DatastoreWriteResult> {
-    private static final long serialVersionUID = 0;
     private static final Logger LOG = LoggerFactory.getLogger(DatastoreWriteOperation.class);
 
     private final DatastoreIO.Sink sink;
@@ -657,8 +652,6 @@ private void flushBatch() throws DatastoreException, IOException, InterruptedExc
   }
 
   private static class DatastoreWriteResult implements Serializable {
-    private static final long serialVersionUID = 0;
-
     final long entitiesWritten;
 
     public DatastoreWriteResult(long recordsWritten) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
index 2839d75c3d6b3..7d7ecd18ce2ad 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
@@ -76,8 +76,6 @@
  * @param <T> the type of values written to the sink.
  */
 public abstract class FileBasedSink<T> extends Sink<T> {
-  private static final long serialVersionUID = 0;
-
   /**
    * Base filename for final output files.
    */
@@ -175,7 +173,6 @@ public void validate(PipelineOptions options) {}
    * @param <T> the type of values written to the sink.
    */
   public abstract static class FileBasedWriteOperation<T> extends WriteOperation<T, FileResult> {
-    private static final long serialVersionUID = 0;
     private static final Logger LOG = LoggerFactory.getLogger(FileBasedWriteOperation.class);
 
     /**
@@ -515,8 +512,6 @@ public FileBasedWriteOperation<T> getWriteOperation() {
    * Result of a single bundle write. Contains the filename of the bundle.
    */
   public static final class FileResult implements Serializable {
-    private static final long serialVersionUID = 0;
-
     private final String filename;
 
     public FileResult(String filename) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index fbd21bfe478be..00c84026e57b1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -62,7 +62,6 @@
  * @param <T> Type of records represented by the source.
  */
 public abstract class FileBasedSource<T> extends OffsetBasedSource<T> {
-  private static final long serialVersionUID = 0;
   private static final Logger LOG = LoggerFactory.getLogger(FileBasedSource.class);
   private static final float FRACTION_OF_FILES_TO_STAT = 0.01f;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java
index 30a2f1f4e472d..ea33d3143ff9a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java
@@ -46,8 +46,6 @@
  * @param <T> Type of records represented by the source.
  */
 public abstract class OffsetBasedSource<T> extends BoundedSource<T> {
-  private static final long serialVersionUID = 0;
-
   private final long startOffset;
   private final long endOffset;
   private final long minBundleSize;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 0751edfd0b0be..382fc5a5174a2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -134,7 +134,6 @@ private static void validatePubsubName(String name) {
    * Class representing a Pubsub Subscription.
    */
   public static class PubsubSubscription implements Serializable {
-    private static final long serialVersionUID = 0L;
     private enum Type { NORMAL, FAKE }
 
     private final Type type;
@@ -198,7 +197,6 @@ public String asV1Beta2Path() {
    * Class representing a Pubsub Topic.
    */
   public static class PubsubTopic implements Serializable {
-    private static final long serialVersionUID = 0L;
     private enum Type { NORMAL, FAKE }
 
     private final Type type;
@@ -394,7 +392,6 @@ public static Bound<String> maxReadTime(Duration maxReadTime) {
      * A {@link PTransform} that reads from a PubSub source and returns
      * a unbounded PCollection containing the items from the stream.
      */
-    @SuppressWarnings("serial")
     public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
       /** The Pubsub topic to read from. */
       PubsubTopic topic;
@@ -578,7 +575,6 @@ public Duration getMaxReadTime() {
       }
 
       private class PubsubReader extends DoFn<Void, T> {
-        private static final long serialVersionUID = 0L;
         private static final int DEFAULT_PULL_SIZE = 100;
 
         @Override
@@ -735,7 +731,6 @@ public static <T> Bound<T> withCoder(Coder<T> coder) {
      * A {@link PTransform} that writes an unbounded {@code PCollection<String>}
      * to a PubSub stream.
      */
-    @SuppressWarnings("serial")
     public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       /** The Pubsub topic to publish to. */
       PubsubTopic topic;
@@ -833,7 +828,6 @@ public Coder<T> getCoder() {
       }
 
       private class PubsubWriter extends DoFn<T, Void> {
-        private static final long serialVersionUID = 0L;
         private static final int MAX_PUBLISH_BATCH_SIZE = 100;
         private transient List<PubsubMessage> output;
         private transient Pubsub pubsubClient;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
index b0eb49df02aa2..d1f5d325a20ee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
@@ -96,8 +96,6 @@ public <T> Unbounded<T> from(UnboundedSource<T, ?> source) {
    * {@link PTransform} that reads from a {@link BoundedSource}.
    */
   public static class Bounded<T> extends PTransform<PInput, PCollection<T>> {
-    private static final long serialVersionUID = 0;
-
     private final BoundedSource<T> source;
 
     private Bounded(@Nullable String name, BoundedSource<T> source) {
@@ -163,8 +161,6 @@ public void evaluate(
    * {@link PTransform} that reads from a {@link UnboundedSource}.
    */
   public static class Unbounded<T> extends PTransform<PInput, PCollection<T>> {
-    private static final long serialVersionUID = 0;
-
     private final UnboundedSource<T, ?> source;
 
     private Unbounded(@Nullable String name, UnboundedSource<T, ?> source) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
index 3a23aecd42bbc..29299989cacf9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
@@ -120,8 +120,6 @@
  */
 @Experimental(Experimental.Kind.SOURCE_SINK)
 public abstract class Sink<T> implements Serializable {
-  private static final long serialVersionUID = 0;
-
   /**
    * Ensures that the sink is valid and can be written to before the write operation begins. One
    * should use {@link com.google.common.base.Preconditions} to implement this method.
@@ -155,8 +153,6 @@ public abstract class Sink<T> implements Serializable {
    * @param <WriteT> The result of a per-bundle write
    */
   public abstract static class WriteOperation<T, WriteT> implements Serializable {
-    private static final long serialVersionUID = 0;
-
     /**
      * Performs initialization before writing to the sink. Called before writing begins.
      */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
index 54237c4dd7c4a..22cb63a6ed6e2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
@@ -52,8 +52,6 @@
  */
 @Experimental(Experimental.Kind.SOURCE_SINK)
 public abstract class Source<T> implements Serializable {
-  private static final long serialVersionUID = 0;
-
   /**
    * Checks that this source is valid, before it can be used in a pipeline.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index c3f7f4ae3fd11..0257a2debfd4e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -191,8 +191,6 @@ public static Bound<String> withCompressionType(TextIO.CompressionType compressi
      * PCollection, decoded from the lines of the text file
      */
     public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
-      private static final long serialVersionUID = 0;
-
       /** The filepattern to read from. */
       @Nullable
       final String filepattern;
@@ -433,8 +431,6 @@ public static Bound<String> withoutValidation() {
      * @param <T> the type of the elements of the input PCollection
      */
     public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
-      private static final long serialVersionUID = 0;
-
       /** The filename to write to. */
       @Nullable
       final String filenamePrefix;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
index afce457c46f0a..c904dd25a74fe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
@@ -49,8 +49,6 @@
  */
 public abstract class UnboundedSource<
         OutputT, CheckpointMarkT extends UnboundedSource.CheckpointMark> extends Source<OutputT> {
-  private static final long serialVersionUID = 0L;
-
   /**
    * Returns a list of {@code UnboundedSource} objects representing the instances of this source
    * that should be used when executing the workflow.  Each split should return a separate partition
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
index 49e004a65f0ab..06a363f1f9284 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
@@ -161,8 +161,6 @@ public static <T> Bound<T> writeOf(
    * A {@link FileBasedSink} that writes objects as XML elements.
    */
   public static class Bound<T> extends FileBasedSink<T> {
-    private static final long serialVersionUID = 0;
-
     final Class<T> classToBind;
     final String rootElementName;
 
@@ -228,8 +226,6 @@ public XmlWriteOperation<T> createWriteOperation(PipelineOptions options) {
    * {@link Sink.WriteOperation} for XML {@link Sink}s.
    */
   protected static final class XmlWriteOperation<T> extends FileBasedWriteOperation<T> {
-    private static final long serialVersionUID = 0;
-
     public XmlWriteOperation(XmlSink.Bound<T> sink) {
       super(sink);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
index 2f3864608a6a4..c0c4f60a5b15b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
@@ -108,7 +108,6 @@
  */
 // JAVADOCSTYLE ON
 public class XmlSource<T> extends FileBasedSource<T> {
-  static final long serialVersionUID = 0L;
 
   private static final String XML_VERSION = "1.1";
   private static final int DEFAULT_MIN_BUNDLE_SIZE = 8 * 1024;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
index 98abe457342e8..2328873784896 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
@@ -85,8 +85,6 @@ public enum Level {
    * takes precedence.
    */
   public static class WorkerLogLevelOverrides extends HashMap<String, Level> {
-    private static final long serialVersionUID = 0;
-
     /**
      * Overrides the default log level for the passed in class.
      *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
index 7324f19d8ddb4..5b82dc3f7ca7d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
@@ -48,8 +48,6 @@ public interface GoogleApiDebugOptions extends PipelineOptions {
    */
   public static class GoogleApiTracer extends HashMap<String, String>
       implements GoogleClientRequestInitializer {
-    private static final long serialVersionUID = 0;
-
     /**
      * Creates a {@link GoogleApiTracer} that sets the trace destination on all
      * calls that match the given client type.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorRetrievalException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorRetrievalException.java
index 8ad3510cfa435..0b946d74cb452 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorRetrievalException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorRetrievalException.java
@@ -22,8 +22,6 @@
  * Signals that an exception has occurred while retrieving {@link Aggregator}s.
  */
 public class AggregatorRetrievalException extends Exception {
-  private static final long serialVersionUID = 0L;
-
   public AggregatorRetrievalException(String message, Throwable cause) {
     super(message, cause);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyExistsException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyExistsException.java
index 5c5ebc2891eb7..951f9aaada280 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyExistsException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyExistsException.java
@@ -23,8 +23,6 @@
  * about the pre-existing job.
  */
 public class DataflowJobAlreadyExistsException extends DataflowJobException {
-  private static final long serialVersionUID = 0L;
-
   public DataflowJobAlreadyExistsException(
       DataflowPipelineJob job, String message) {
     super(job, message, null);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyUpdatedException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyUpdatedException.java
index 8a3edf5921014..69bc1d25f6a68 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyUpdatedException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyUpdatedException.java
@@ -22,8 +22,6 @@
  * this exception contains information about the pre-existing updated job.
  */
 public class DataflowJobAlreadyUpdatedException extends DataflowJobException {
-  private static final long serialVersionUID = 0L;
-
   public DataflowJobAlreadyUpdatedException(
       DataflowPipelineJob job, String message) {
     super(job, message, null);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobCancelledException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobCancelledException.java
index dea40c1035b84..abb3e1eb01fd3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobCancelledException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobCancelledException.java
@@ -20,8 +20,6 @@
  * Signals that a job run by a {@link BlockingDataflowPipelineRunner} was updated during execution.
  */
 public class DataflowJobCancelledException extends DataflowJobException {
-  private static final long serialVersionUID = 0L;
-
   public DataflowJobCancelledException(DataflowPipelineJob job, String message) {
     super(job, message, null);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobException.java
index fa427da300d13..9e305d565c6f1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobException.java
@@ -24,7 +24,6 @@
  * A {@link RuntimeException} that contains information about a {@link DataflowPipelineJob}.
  */
 public abstract class DataflowJobException extends RuntimeException {
-  private static final long serialVersionUID = 0L;
   private final DataflowPipelineJob job;
 
   DataflowJobException(DataflowPipelineJob job, String message, @Nullable Throwable cause) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobExecutionException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobExecutionException.java
index 27dcae73881aa..ae6df0fa657a0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobExecutionException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobExecutionException.java
@@ -23,8 +23,6 @@
  * provides access to the failed job.
  */
 public class DataflowJobExecutionException extends DataflowJobException {
-  private static final long serialVersionUID = 0L;
-
   DataflowJobExecutionException(DataflowPipelineJob job, String message) {
     this(job, message, null);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobUpdatedException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobUpdatedException.java
index 9ddb44d63e72c..3a92ebfc32aae 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobUpdatedException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobUpdatedException.java
@@ -20,8 +20,6 @@
  * Signals that a job run by a {@link BlockingDataflowPipelineRunner} was updated during execution.
  */
 public class DataflowJobUpdatedException extends DataflowJobException {
-  private static final long serialVersionUID = 0L;
-
   private DataflowPipelineJob replacedByJob;
 
   public DataflowJobUpdatedException(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 9fa2a004786af..e17ad9ac42aec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -469,8 +469,6 @@ public void setHooks(DataflowPipelineRunnerHooks hooks) {
    * mode.
    */
   private static class StreamingWrite<T> extends PTransform<PCollection<T>, PDone> {
-    private static final long serialVersionUID = 0L;
-
     /**
      * Builds an instance of this class from the overridden transform.
      */
@@ -498,8 +496,6 @@ protected String getKindString() {
    * can access.
    */
   public static class StreamingPubsubIOWrite<T> extends PTransform<PCollection<T>, PDone> {
-    private static final long serialVersionUID = 0L;
-
     private final PubsubIO.Write.Bound<T> transform;
 
     /**
@@ -532,8 +528,6 @@ protected String getKindString() {
    * are leveraged to do the deduplication.
    */
   private static class StreamingUnboundedRead<T> extends PTransform<PInput, PCollection<T>> {
-    private static final long serialVersionUID = 0L;
-
     private final UnboundedSource<T, ?> source;
 
     /**
@@ -568,7 +562,6 @@ public final PCollection<T> apply(PInput input) {
      */
     private static class ReadWithIds<T>
         extends PTransform<PInput, PCollection<ValueWithRecordId<T>>> {
-      private static final long serialVersionUID = 0L;
       private final UnboundedSource<T, ?> source;
 
       private ReadWithIds(UnboundedSource<T, ?> source) {
@@ -617,7 +610,6 @@ public void translate(ReadWithIds<?> transform,
    */
   private static class Deduplicate<T>
       extends PTransform<PCollection<ValueWithRecordId<T>>, PCollection<T>> {
-    private static final long serialVersionUID = 0L;
     // Use a finite set of keys to improve bundling.  Without this, the key space
     // will be the space of ids which is potentially very large, which results in much
     // more per-key overhead.
@@ -626,8 +618,6 @@ private static class Deduplicate<T>
     public PCollection<T> apply(PCollection<ValueWithRecordId<T>> input) {
       return input
           .apply(WithKeys.of(new SerializableFunction<ValueWithRecordId<T>, Integer>() {
-                    private static final long serialVersionUID = 0L;
-
                     @Override
                     public Integer apply(ValueWithRecordId<T> value) {
                       return Arrays.hashCode(value.getId()) % NUM_RESHARD_KEYS;
@@ -638,8 +628,6 @@ public Integer apply(ValueWithRecordId<T> value) {
           .apply(Reshuffle.<Integer, ValueWithRecordId<T>>of())
           .apply(ParDo.named("StripIds").of(
               new DoFn<KV<Integer, ValueWithRecordId<T>>, T>() {
-                private static final long serialVersionUID = 0L;
-
                 @Override
                 public void processElement(ProcessContext c) {
                   c.output(c.element().getValue().getValue());
@@ -652,8 +640,6 @@ public void processElement(ProcessContext c) {
    * Specialized implementation for {@link Create.Values} for the Dataflow runner in streaming mode.
    */
   private static class StreamingCreate<T> extends PTransform<PInput, PCollection<T>> {
-    private static final long serialVersionUID = 0L;
-
     private final Create.Values<T> transform;
 
     /**
@@ -669,8 +655,6 @@ public StreamingCreate(Create.Values<T> transform) {
      * in the streaming create implementation.
      */
     private static class OutputNullKv extends DoFn<String, KV<Void, Void>> {
-      private static final long serialVersionUID = 0;
-
       @Override
       public void processElement(DoFn<String, KV<Void, Void>>.ProcessContext c) throws Exception {
         c.output(KV.of((Void) null, (Void) null));
@@ -683,8 +667,6 @@ public void processElement(DoFn<String, KV<Void, Void>>.ProcessContext c) throws
      * need not implement {@code Serializable}.
      */
     private static class OutputElements<T> extends DoFn<Object, T> {
-      private static final long serialVersionUID = 0;
-
       private final Coder<T> coder;
       private final List<byte[]> encodedElements;
 
@@ -745,8 +727,6 @@ protected String getKindString() {
    */
   private static class StreamingPCollectionViewWriterFn<T>
   extends DoFn<Iterable<T>, T> implements DoFn.RequiresWindowAccess {
-    private static final long serialVersionUID = 0;
-
     private final PCollectionView<?> view;
     private final Coder<T> dataCoder;
 
@@ -777,8 +757,6 @@ public void processElement(ProcessContext c) throws Exception {
    */
   private static class StreamingViewAsMap<K, V>
       extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, V>>> {
-    private static final long serialVersionUID = 0L;
-
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
     public StreamingViewAsMap(View.AsMap<K, V> transform) { }
 
@@ -807,8 +785,6 @@ protected String getKindString() {
    */
   private static class StreamingViewAsMultimap<K, V>
     extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, Iterable<V>>>> {
-    private static final long serialVersionUID = 0L;
-
     /**
      * Builds an instance of this class from the overridden transform.
      */
@@ -841,8 +817,6 @@ protected String getKindString() {
    */
   private static class StreamingViewAsIterable<T>
       extends PTransform<PCollection<T>, PCollectionView<Iterable<T>>> {
-    private static final long serialVersionUID = 0L;
-
     /**
      * Builds an instance of this class from the overridden transform.
      */
@@ -870,8 +844,6 @@ protected String getKindString() {
   }
 
   private static class WrapAsList<T> extends DoFn<T, List<T>> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       c.output(Arrays.asList(c.element()));
@@ -883,8 +855,6 @@ public void processElement(ProcessContext c) {
    */
   private static class StreamingViewAsSingleton<T>
       extends PTransform<PCollection<T>, PCollectionView<T>> {
-    private static final long serialVersionUID = 0L;
-
     private View.AsSingleton<T> transform;
 
     /**
@@ -911,8 +881,6 @@ protected String getKindString() {
     }
 
     private static class SingletonCombine<T> extends Combine.BinaryCombineFn<T> {
-      private static final long serialVersionUID = 0L;
-
       private boolean hasDefaultValue;
       private T defaultValue;
 
@@ -943,8 +911,6 @@ public T identity() {
 
   private static class StreamingCombineGloballyAsSingletonView<InputT, OutputT>
       extends PTransform<PCollection<InputT>, PCollectionView<OutputT>> {
-    private static final long serialVersionUID = 0L;
-
     Combine.GloballyAsSingletonView<InputT, OutputT> transform;
 
     /**
@@ -987,8 +953,6 @@ protected String getKindString() {
    */
   private static class StreamingUnsupportedIO<InputT extends PInput, OutputT extends POutput>
       extends PTransform<InputT, OutputT> {
-    private static final long serialVersionUID = 0L;
-
     private PTransform<?, ?> transform;
 
     /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowServiceException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowServiceException.java
index 2bb63208f8b15..6e8301b13af72 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowServiceException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowServiceException.java
@@ -22,8 +22,6 @@
  * Signals there was an error retrieving information about a job from the Cloud Dataflow Service.
  */
 public class DataflowServiceException extends DataflowJobException {
-  private static final long serialVersionUID = 0L;
-
   DataflowServiceException(DataflowPipelineJob job, String message) {
     this(job, message, null);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 2685f3a3fb916..0a06011db078d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -261,7 +261,6 @@ private <K, InputT, AccumT, OutputT> PCollection<KV<K, OutputT>> applyTestCombin
    */
   public static class TestCombineDoFn<K, InputT, AccumT, OutputT>
       extends DoFn<KV<K, Iterable<InputT>>, KV<K, OutputT>> {
-    private static final long serialVersionUID = 0L;
     private final KeyedCombineFn<? super K, ? super InputT, AccumT, OutputT> fn;
     private final Coder<AccumT> accumCoder;
     private final boolean testSerializability;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
index ad35f49d08fb6..28e42cb3fc1d2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
@@ -178,8 +178,6 @@ private CombineValuesFn(
    */
   private static class CombineValuesDoFn<K, InputT, OutputT>
       extends DoFn<KV<K, Iterable<InputT>>, KV<K, OutputT>>{
-    private static final long serialVersionUID = 0L;
-
     private final Combine.KeyedCombineFn<K, InputT, ?, OutputT> combineFn;
 
     private CombineValuesDoFn(
@@ -201,8 +199,6 @@ public void processElement(ProcessContext c) {
    */
   private static class AddInputsDoFn<K, InputT, AccumT>
       extends DoFn<KV<K, Iterable<InputT>>, KV<K, AccumT>>{
-    private static final long serialVersionUID = 0L;
-
     private final Combine.KeyedCombineFn<K, InputT, AccumT, ?> combineFn;
 
     private AddInputsDoFn(
@@ -228,8 +224,6 @@ public void processElement(ProcessContext c) {
    */
   private static class MergeAccumulatorsDoFn<K, AccumT>
       extends DoFn<KV<K, Iterable<AccumT>>, KV<K, AccumT>>{
-    private static final long serialVersionUID = 0L;
-
     private final Combine.KeyedCombineFn<K, ?, AccumT, ?> combineFn;
 
     private MergeAccumulatorsDoFn(
@@ -252,8 +246,6 @@ public void processElement(ProcessContext c) {
    */
   private static class ExtractOutputDoFn<K, AccumT, OutputT>
       extends DoFn<KV<K, AccumT>, KV<K, OutputT>>{
-    private static final long serialVersionUID = 0L;
-
     private final Combine.KeyedCombineFn<K, ?, AccumT, OutputT> combineFn;
 
     private ExtractOutputDoFn(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index 32ffb804d9dfc..cc2213022603b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -203,8 +203,6 @@ public ParDoFn create(
   static class MergingKeyedCombineFn<K, AccumT>
       extends KeyedCombineFn<K, AccumT, List<AccumT>, AccumT> {
 
-    private static final long serialVersionUID = 0;
-
     final AppliedCombineFn<K, ?, AccumT, ?> appliedCombineFn;
 
     MergingKeyedCombineFn(AppliedCombineFn<K, ?, AccumT, ?> keyedCombineFn) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
index 5803087f6713d..fb289f4543c50 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
@@ -43,7 +43,6 @@
  *                                     Coder<T>, ExecutionContext);
  * } </pre>
  */
-@SuppressWarnings("serial")
 public final class ReaderFactory {
   // Do not instantiate.
   private ReaderFactory() {}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
index 6607271bb6f35..63a22288f0a2c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
@@ -69,7 +69,6 @@ private SinkFactory() {}
    * @throws Exception if the sink could not be decoded and
    * constructed
    */
-  @SuppressWarnings("serial")
   public static <T> Sink<T> create(
       PipelineOptions options,
       com.google.api.services.dataflow.model.Sink cloudSink,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 45c22dc68fad4..fd79bc2c988fb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -99,8 +99,6 @@ public class StreamingDataflowWorker {
    * Indicates that the key token was invalid when data was attempted to be fetched.
    */
   public static class KeyTokenInvalidException extends RuntimeException {
-    private static final long serialVersionUID = 0;
-
     public KeyTokenInvalidException(String key) {
       super("Unable to fetch data due to token mismatch for key " + key);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
index 8915ca6b62f27..3e56001489bfa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
@@ -40,7 +40,6 @@
  * @param <OutputT> output value element type
  * @param <W> window type
  */
-@SuppressWarnings("serial")
 @SystemDoFnInternal
 public abstract class StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends BoundedWindow>
     extends DoFn<TimerOrElement<KV<K, InputT>>, KV<K, OutputT>> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 5f8bfcd0548a7..499c97ca0cf17 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -200,8 +200,6 @@ public static <K, V> SingletonAssert<Map<K, V>> thatMap(PCollection<KV<K, V>> ac
    * An assertion about the contents of a {@link PCollectionView} yielding an {@code Iterable<T>}.
    */
   public static class IterableAssert<T> implements Serializable {
-    private static final long serialVersionUID = 0L;
-
     private final Pipeline pipeline;
     private final PTransform<PBegin, PCollectionView<Iterable<T>>> createActual;
     private Optional<Coder<T>> coder;
@@ -286,8 +284,6 @@ public IterableAssert<T> satisfies(final SerializableMatcher<Iterable<? extends
     }
 
     private static class MatcherCheckerFn<T> implements SerializableFunction<T, Void> {
-      private static final long serialVersionUID = 0L;
-
       private SerializableMatcher<T> matcher;
 
       public MatcherCheckerFn(SerializableMatcher<T> matcher) {
@@ -352,8 +348,6 @@ public final IterableAssert<T> containsInAnyOrder(
    * associated with a {@link PCollectionView}.
    */
   public static class SingletonAssert<T> implements Serializable {
-    private static final long serialVersionUID = 0L;
-
     private final Pipeline pipeline;
     private final CreateActual<?, T> createActual;
     private Optional<Coder<T>> coder;
@@ -448,8 +442,6 @@ public SingletonAssert<T> is(T expectedValue) {
   private static class CreateActual<T, ActualT>
       extends PTransform<PBegin, PCollectionView<ActualT>> {
 
-    private static final long serialVersionUID = 0;
-
     private final transient PCollection<T> actual;
     private final transient PTransform<PCollection<T>, PCollectionView<ActualT>> actualView;
 
@@ -465,7 +457,6 @@ public PCollectionView<ActualT> apply(PBegin input) {
       return actual
           .apply(Window.<T>into(new GlobalWindows()))
           .apply(ParDo.of(new DoFn<T, T>() {
-            private static final long serialVersionUID = 0L;
             @Override
             public void processElement(ProcessContext context) throws CoderException {
               context.output(CoderUtils.clone(coder, context.element()));
@@ -478,8 +469,6 @@ public void processElement(ProcessContext context) throws CoderException {
   private static class CreateExpected<T, ExpectedT>
       extends PTransform<PBegin, PCollectionView<ExpectedT>> {
 
-    private static final long serialVersionUID = 0;
-
     private final Iterable<T> elements;
     private final Optional<Coder<T>> coder;
     private final transient PTransform<PCollection<T>, PCollectionView<ExpectedT>> view;
@@ -503,8 +492,6 @@ public PCollectionView<ExpectedT> apply(PBegin input) {
 
   private static class PreExisting<T> extends PTransform<PBegin, PCollectionView<T>> {
 
-    private static final long serialVersionUID = 0;
-
     private final PCollectionView<T> view;
 
     private PreExisting(PCollectionView<T> view) {
@@ -532,8 +519,6 @@ public PCollectionView<T> apply(PBegin input) {
    */
   static class OneSideInputAssert<ActualT>
       extends PTransform<PBegin, PDone> implements Serializable {
-    private static final long serialVersionUID = 0;
-
     private final transient PTransform<PBegin, PCollectionView<ActualT>> createActual;
     private final SerializableFunction<ActualT, Void> checkerFn;
 
@@ -562,7 +547,6 @@ public PDone apply(PBegin input) {
    * a {@link PCollectionView}, and adjusts counters and thrown exceptions for use in testing.
    */
   private static class CheckerDoFn<ActualT> extends DoFn<Void, Void> {
-    private static final long serialVersionUID = 0;
     private final SerializableFunction<ActualT, Void> checkerFn;
     private final Aggregator<Integer, Integer> success =
         createAggregator(SUCCESS_COUNTER, new Sum.SumIntegerFn());
@@ -607,8 +591,6 @@ public void processElement(ProcessContext c) {
   static class TwoSideInputAssert<ActualT, ExpectedT>
       extends PTransform<PBegin, PDone> implements Serializable {
 
-    private static final long serialVersionUID = 0;
-
     private final transient PTransform<PBegin, PCollectionView<ActualT>> createActual;
     private final transient PTransform<PBegin, PCollectionView<ExpectedT>> createExpected;
     private final AssertRelation<ActualT, ExpectedT> relation;
@@ -636,7 +618,6 @@ public PDone apply(PBegin input) {
     }
 
     private static class CheckerDoFn<ActualT, ExpectedT> extends DoFn<Void, Void> {
-      private static final long serialVersionUID = 0;
       private final Aggregator<Integer, Integer> success =
           createAggregator(SUCCESS_COUNTER, new Sum.SumIntegerFn());
       private final Aggregator<Integer, Integer> failure =
@@ -678,8 +659,6 @@ public void processElement(ProcessContext c) {
    * expected value.
    */
   private static class AssertIsEqualTo<T> implements SerializableFunction<T, Void> {
-    private static final long serialVersionUID = 0L;
-
     private T expected;
 
     public AssertIsEqualTo(T expected) {
@@ -699,8 +678,6 @@ public Void apply(T actual) {
    */
   private static class AssertContainsInAnyOrder<T>
       implements SerializableFunction<Iterable<T>, Void> {
-    private static final long serialVersionUID = 0L;
-
     private T[] expected;
 
     @SafeVarargs
@@ -742,8 +719,6 @@ private static interface AssertRelation<ActualT, ExpectedT> extends Serializable
    */
   private static class AssertIsEqualToRelation<T>
       implements AssertRelation<T, T> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public SerializableFunction<T, Void> assertFor(T expected) {
       return new AssertIsEqualTo<T>(expected);
@@ -756,8 +731,6 @@ public SerializableFunction<T, Void> assertFor(T expected) {
    */
   private static class AssertContainsInAnyOrderRelation<T>
       implements AssertRelation<Iterable<T>, Iterable<T>> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public SerializableFunction<Iterable<T>, Void> assertFor(Iterable<T> expectedElements) {
       return new AssertContainsInAnyOrder<T>(expectedElements);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
index 4aabfa386adea..d7367266271a9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
@@ -59,8 +59,6 @@
 public class SerializableMatchers implements Serializable {
 
   // Serializable only because of capture by anonymous inner classes
-  private static final long serialVersionUID = 0L;
-
   private SerializableMatchers() { } // not instantiable
 
   /**
@@ -73,11 +71,10 @@ private SerializableMatchers() { } // not instantiable
     final Iterable<Matcher<? super T>> matchers = (Iterable) serializableMatchers;
 
     return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      private static final long serialVersionUID = 0L;
-        @Override
-        public Matcher<T> get() {
-          return Matchers.allOf(matchers);
-        }
+      @Override
+      public Matcher<T> get() {
+        return Matchers.allOf(matchers);
+      }
     });
   }
 
@@ -87,7 +84,6 @@ public Matcher<T> get() {
   @SafeVarargs
   public static <T> SerializableMatcher<T> allOf(final SerializableMatcher<T>... matchers) {
     return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T> get() {
         return Matchers.allOf(matchers);
@@ -105,11 +101,10 @@ public Matcher<T> get() {
     final Iterable<Matcher<? super T>> matchers = (Iterable) serializableMatchers;
 
     return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      private static final long serialVersionUID = 0L;
-        @Override
-        public Matcher<T> get() {
-          return Matchers.anyOf(matchers);
-        }
+      @Override
+      public Matcher<T> get() {
+        return Matchers.anyOf(matchers);
+      }
     });
   }
 
@@ -119,7 +114,6 @@ public Matcher<T> get() {
   @SafeVarargs
   public static <T> SerializableMatcher<T> anyOf(final SerializableMatcher<T>... matchers) {
     return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T> get() {
         return Matchers.anyOf(matchers);
@@ -132,7 +126,6 @@ public Matcher<T> get() {
    */
   public static SerializableMatcher<Object> anything() {
     return fromSupplier(new SerializableSupplier<Matcher<Object>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<Object> get() {
         return Matchers.anything();
@@ -148,11 +141,10 @@ public Matcher<Object> get() {
   public static <T extends Serializable> SerializableMatcher<T[]>
   arrayContaining(final T... items) {
     return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      private static final long serialVersionUID = 0L;
-        @Override
-        public Matcher<T[]> get() {
-          return Matchers.arrayContaining(items);
-        }
+      @Override
+      public Matcher<T[]> get() {
+        return Matchers.arrayContaining(items);
+      }
     });
   }
 
@@ -170,11 +162,10 @@ public static <T> SerializableMatcher<T[]> arrayContaining(Coder<T> coder, T...
         new SerializableArrayViaCoder<>(coder, items);
 
     return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      private static final long serialVersionUID = 0L;
-        @Override
-        public Matcher<T[]> get() {
-          return Matchers.arrayContaining(itemsSupplier.get());
-        }
+      @Override
+      public Matcher<T[]> get() {
+        return Matchers.arrayContaining(itemsSupplier.get());
+      }
     });
   }
 
@@ -186,11 +177,10 @@ public Matcher<T[]> get() {
   public static <T> SerializableMatcher<T[]>
   arrayContaining(final SerializableMatcher<? super T>... matchers) {
     return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      private static final long serialVersionUID = 0L;
-        @Override
-        public Matcher<T[]> get() {
-          return Matchers.arrayContaining(matchers);
-        }
+      @Override
+      public Matcher<T[]> get() {
+        return Matchers.arrayContaining(matchers);
+      }
     });
   }
 
@@ -205,11 +195,10 @@ public Matcher<T[]> get() {
     final List<Matcher<? super T>> matchers = (List) serializableMatchers;
 
     return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      private static final long serialVersionUID = 0L;
-        @Override
-        public Matcher<T[]> get() {
-          return Matchers.arrayContaining(matchers);
-        }
+      @Override
+      public Matcher<T[]> get() {
+        return Matchers.arrayContaining(matchers);
+      }
     });
   }
 
@@ -222,11 +211,10 @@ public Matcher<T[]> get() {
   arrayContainingInAnyOrder(final T... items) {
 
     return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      private static final long serialVersionUID = 0L;
-        @Override
-        public Matcher<T[]> get() {
-          return Matchers.arrayContainingInAnyOrder(items);
-        }
+      @Override
+      public Matcher<T[]> get() {
+        return Matchers.arrayContainingInAnyOrder(items);
+      }
     });
   }
 
@@ -244,11 +232,10 @@ public static <T> SerializableMatcher<T[]> arrayContainingInAnyOrder(Coder<T> co
         new SerializableArrayViaCoder<>(coder, items);
 
     return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      private static final long serialVersionUID = 0L;
-        @Override
-        public Matcher<T[]> get() {
-          return Matchers.arrayContaining(itemsSupplier.get());
-        }
+      @Override
+      public Matcher<T[]> get() {
+        return Matchers.arrayContaining(itemsSupplier.get());
+      }
     });
   }
 
@@ -260,11 +247,10 @@ public Matcher<T[]> get() {
   public static <T> SerializableMatcher<T[]> arrayContainingInAnyOrder(
       final SerializableMatcher<? super T>... matchers) {
     return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      private static final long serialVersionUID = 0L;
-        @Override
-        public Matcher<T[]> get() {
-          return Matchers.arrayContainingInAnyOrder(matchers);
-        }
+      @Override
+      public Matcher<T[]> get() {
+        return Matchers.arrayContainingInAnyOrder(matchers);
+      }
     });
   }
 
@@ -279,11 +265,10 @@ public static <T> SerializableMatcher<T[]> arrayContainingInAnyOrder(
     final Collection<Matcher<? super T>> matchers = (Collection) serializableMatchers;
 
     return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      private static final long serialVersionUID = 0L;
-        @Override
-        public Matcher<T[]> get() {
-          return Matchers.arrayContainingInAnyOrder(matchers);
-        }
+      @Override
+      public Matcher<T[]> get() {
+        return Matchers.arrayContainingInAnyOrder(matchers);
+      }
     });
   }
 
@@ -293,7 +278,6 @@ public Matcher<T[]> get() {
    */
   public static <T> SerializableMatcher<T[]> arrayWithSize(final int size) {
     return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T[]> get() {
         return Matchers.arrayWithSize(size);
@@ -308,7 +292,6 @@ public Matcher<T[]> get() {
   public static <T> SerializableMatcher<T[]> arrayWithSize(
       final SerializableMatcher<? super Integer> sizeMatcher) {
     return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T[]> get() {
         return Matchers.arrayWithSize(sizeMatcher);
@@ -322,7 +305,6 @@ public Matcher<T[]> get() {
    */
   public static SerializableMatcher<Double> closeTo(final double target, final double error) {
     return fromSupplier(new SerializableSupplier<Matcher<Double>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<Double> get() {
         return Matchers.closeTo(target, error);
@@ -338,11 +320,10 @@ public Matcher<Double> get() {
   public static <T extends Serializable> SerializableMatcher<Iterable<? extends T>> contains(
       final T... items) {
     return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
-      private static final long serialVersionUID = 0L;
-        @Override
-        public Matcher<Iterable<? extends T>> get() {
-          return Matchers.contains(items);
-        }
+      @Override
+      public Matcher<Iterable<? extends T>> get() {
+        return Matchers.contains(items);
+      }
     });
   }
 
@@ -361,11 +342,10 @@ public Matcher<Iterable<? extends T>> get() {
         new SerializableArrayViaCoder<>(coder, items);
 
     return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
-      private static final long serialVersionUID = 0L;
-        @Override
-        public Matcher<Iterable<? extends T>> get() {
-          return Matchers.containsInAnyOrder(itemsSupplier.get());
-        }
+      @Override
+      public Matcher<Iterable<? extends T>> get() {
+        return Matchers.containsInAnyOrder(itemsSupplier.get());
+      }
     });
   }
 
@@ -377,11 +357,10 @@ public Matcher<Iterable<? extends T>> get() {
   public static <T> SerializableMatcher<Iterable<? extends T>> contains(
       final SerializableMatcher<? super T>... matchers) {
     return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
-      private static final long serialVersionUID = 0L;
-        @Override
-        public Matcher<Iterable<? extends T>> get() {
-          return Matchers.contains(matchers);
-        }
+      @Override
+      public Matcher<Iterable<? extends T>> get() {
+        return Matchers.contains(matchers);
+      }
     });
   }
 
@@ -396,11 +375,10 @@ public static <T extends Serializable> SerializableMatcher<Iterable<? extends T>
     final List<Matcher<? super T>> matchers = (List) serializableMatchers;
 
     return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
-      private static final long serialVersionUID = 0L;
-        @Override
-        public Matcher<Iterable<? extends T>> get() {
-          return Matchers.contains(matchers);
-        }
+      @Override
+      public Matcher<Iterable<? extends T>> get() {
+        return Matchers.contains(matchers);
+      }
     });
   }
 
@@ -412,11 +390,10 @@ public Matcher<Iterable<? extends T>> get() {
   public static <T extends Serializable> SerializableMatcher<Iterable<? extends T>>
   containsInAnyOrder(final T... items) {
     return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
-      private static final long serialVersionUID = 0L;
-        @Override
-        public Matcher<Iterable<? extends T>> get() {
-          return Matchers.containsInAnyOrder(items);
-        }
+      @Override
+      public Matcher<Iterable<? extends T>> get() {
+        return Matchers.containsInAnyOrder(items);
+      }
     });
   }
 
@@ -435,11 +412,10 @@ public Matcher<Iterable<? extends T>> get() {
         new SerializableArrayViaCoder<>(coder, items);
 
     return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
-      private static final long serialVersionUID = 0L;
-        @Override
-        public Matcher<Iterable<? extends T>> get() {
-          return Matchers.containsInAnyOrder(itemsSupplier.get());
-        }
+      @Override
+      public Matcher<Iterable<? extends T>> get() {
+        return Matchers.containsInAnyOrder(itemsSupplier.get());
+      }
     });
   }
 
@@ -451,11 +427,10 @@ public Matcher<Iterable<? extends T>> get() {
   public static <T> SerializableMatcher<Iterable<? extends T>> containsInAnyOrder(
       final SerializableMatcher<? super T>... matchers) {
     return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
-      private static final long serialVersionUID = 0L;
-        @Override
-        public Matcher<Iterable<? extends T>> get() {
-          return Matchers.containsInAnyOrder(matchers);
-        }
+      @Override
+      public Matcher<Iterable<? extends T>> get() {
+        return Matchers.containsInAnyOrder(matchers);
+      }
     });
   }
 
@@ -470,11 +445,10 @@ public static <T> SerializableMatcher<Iterable<? extends T>> containsInAnyOrder(
     final Collection<Matcher<? super T>> matchers = (Collection) serializableMatchers;
 
     return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
-      private static final long serialVersionUID = 0L;
-        @Override
-        public Matcher<Iterable<? extends T>> get() {
-          return Matchers.containsInAnyOrder(matchers);
-        }
+      @Override
+      public Matcher<Iterable<? extends T>> get() {
+        return Matchers.containsInAnyOrder(matchers);
+      }
     });
   }
 
@@ -483,7 +457,6 @@ public Matcher<Iterable<? extends T>> get() {
    */
   public static SerializableMatcher<String> containsString(final String substring) {
     return fromSupplier(new SerializableSupplier<Matcher<String>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<String> get() {
         return Matchers.containsString(substring);
@@ -496,7 +469,6 @@ public Matcher<String> get() {
    */
   public static <T> SerializableMatcher<Collection<? extends T>> empty() {
     return fromSupplier(new SerializableSupplier<Matcher<Collection<? extends T>>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<Collection<? extends T>> get() {
         return Matchers.empty();
@@ -509,7 +481,6 @@ public Matcher<Collection<? extends T>> get() {
    */
   public static <T> SerializableMatcher<T[]> emptyArray() {
     return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T[]> get() {
         return Matchers.emptyArray();
@@ -522,7 +493,6 @@ public Matcher<T[]> get() {
    */
   public static <T> SerializableMatcher<Iterable<? extends T>> emptyIterable() {
     return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<Iterable<? extends T>> get() {
         return Matchers.emptyIterable();
@@ -535,7 +505,6 @@ public Matcher<Iterable<? extends T>> get() {
    */
   public static SerializableMatcher<String> endsWith(final String substring) {
     return fromSupplier(new SerializableSupplier<Matcher<String>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<String> get() {
         return Matchers.endsWith(substring);
@@ -548,7 +517,6 @@ public Matcher<String> get() {
    */
   public static <T extends Serializable> SerializableMatcher<T> equalTo(final T expected) {
     return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T> get() {
         return Matchers.equalTo(expected);
@@ -567,7 +535,6 @@ public static <T> SerializableMatcher<T> equalTo(Coder<T> coder, T expected) {
     final SerializableSupplier<T> expectedSupplier = new SerializableViaCoder<>(coder, expected);
 
     return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T> get() {
         return Matchers.equalTo(expectedSupplier.get());
@@ -581,7 +548,6 @@ public Matcher<T> get() {
   public static <T extends Comparable<T> & Serializable> SerializableMatcher<T>
   greaterThan(final T target) {
     return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T> get() {
         return Matchers.greaterThan(target);
@@ -599,7 +565,6 @@ public Matcher<T> get() {
   greaterThan(final Coder<T> coder, T target) {
     final SerializableSupplier<T> targetSupplier = new SerializableViaCoder<>(coder, target);
     return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T> get() {
         return Matchers.greaterThan(targetSupplier.get());
@@ -614,7 +579,6 @@ public Matcher<T> get() {
   public static <T extends Comparable<T>> SerializableMatcher<T> greaterThanOrEqualTo(
       final T target) {
     return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T> get() {
         return Matchers.greaterThanOrEqualTo(target);
@@ -633,7 +597,6 @@ public Matcher<T> get() {
   greaterThanOrEqualTo(final Coder<T> coder, T target) {
     final SerializableSupplier<T> targetSupplier = new SerializableViaCoder<>(coder, target);
     return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T> get() {
         return Matchers.greaterThanOrEqualTo(targetSupplier.get());
@@ -647,7 +610,6 @@ public Matcher<T> get() {
   public static <T extends Serializable> SerializableMatcher<Iterable<? super T>> hasItem(
       final T target) {
     return fromSupplier(new SerializableSupplier<Matcher<Iterable<? super T>>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<Iterable<? super T>> get() {
         return Matchers.hasItem(target);
@@ -664,7 +626,6 @@ public Matcher<Iterable<? super T>> get() {
   public static <T> SerializableMatcher<Iterable<? super T>> hasItem(Coder<T> coder, T target) {
     final SerializableSupplier<T> targetSupplier = new SerializableViaCoder<>(coder, target);
     return fromSupplier(new SerializableSupplier<Matcher<Iterable<? super T>>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<Iterable<? super T>> get() {
         return Matchers.hasItem(targetSupplier.get());
@@ -678,7 +639,6 @@ public Matcher<Iterable<? super T>> get() {
   public static <T> SerializableMatcher<Iterable<? super T>> hasItem(
       final SerializableMatcher<? super T> matcher) {
     return fromSupplier(new SerializableSupplier<Matcher<Iterable<? super T>>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<Iterable<? super T>> get() {
         return Matchers.hasItem(matcher);
@@ -691,7 +651,6 @@ public Matcher<Iterable<? super T>> get() {
    */
   public static <T> SerializableMatcher<Collection<? extends T>> hasSize(final int size) {
     return fromSupplier(new SerializableSupplier<Matcher<Collection<? extends T>>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<Collection<? extends T>> get() {
         return Matchers.hasSize(size);
@@ -705,7 +664,6 @@ public Matcher<Collection<? extends T>> get() {
   public static <T> SerializableMatcher<Collection<? extends T>> hasSize(
       final SerializableMatcher<? super Integer> sizeMatcher) {
     return fromSupplier(new SerializableSupplier<Matcher<Collection<? extends T>>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<Collection<? extends T>> get() {
         return Matchers.hasSize(sizeMatcher);
@@ -719,7 +677,6 @@ public Matcher<Collection<? extends T>> get() {
    */
   public static <T> SerializableMatcher<Iterable<T>> iterableWithSize(final int size) {
     return fromSupplier(new SerializableSupplier<Matcher<Iterable<T>>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<Iterable<T>> get() {
         return Matchers.iterableWithSize(size);
@@ -734,7 +691,6 @@ public Matcher<Iterable<T>> get() {
   public static <T> SerializableMatcher<Iterable<T>> iterableWithSize(
       final SerializableMatcher<? super Integer> sizeMatcher) {
     return fromSupplier(new SerializableSupplier<Matcher<Iterable<T>>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<Iterable<T>> get() {
         return Matchers.iterableWithSize(sizeMatcher);
@@ -748,7 +704,6 @@ public Matcher<Iterable<T>> get() {
   public static <T extends Serializable> SerializableMatcher<T>
   isIn(final Collection<T> collection) {
     return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T> get() {
         return Matchers.isIn(collection);
@@ -768,7 +723,6 @@ public static <T> SerializableMatcher<T> isIn(Coder<T> coder, Collection<T> coll
     final SerializableSupplier<T[]> itemsSupplier =
         new SerializableArrayViaCoder<>(coder, items);
     return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T> get() {
         return Matchers.isIn(itemsSupplier.get());
@@ -781,7 +735,6 @@ public Matcher<T> get() {
    */
   public static <T extends Serializable> SerializableMatcher<T> isIn(final T[] items) {
     return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T> get() {
         return Matchers.isIn(items);
@@ -799,7 +752,6 @@ public static <T> SerializableMatcher<T> isIn(Coder<T> coder, T[] items) {
     final SerializableSupplier<T[]> itemsSupplier =
         new SerializableArrayViaCoder<>(coder, items);
     return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T> get() {
         return Matchers.isIn(itemsSupplier.get());
@@ -813,7 +765,6 @@ public Matcher<T> get() {
   @SafeVarargs
   public static <T extends Serializable> SerializableMatcher<T> isOneOf(final T... elems) {
     return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T> get() {
         return Matchers.isOneOf(elems);
@@ -832,7 +783,6 @@ public static <T> SerializableMatcher<T> isOneOf(Coder<T> coder, T... items) {
     final SerializableSupplier<T[]> itemsSupplier =
         new SerializableArrayViaCoder<>(coder, items);
     return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T> get() {
         return Matchers.isOneOf(itemsSupplier.get());
@@ -912,7 +862,6 @@ public Matcher<T> get() {
   public static <T extends Comparable<T> & Serializable> SerializableMatcher<T> lessThan(
       final T target) {
     return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T> get() {
         return Matchers.lessThan(target);
@@ -930,7 +879,6 @@ public Matcher<T> get() {
   lessThan(Coder<T> coder, T target) {
     final SerializableSupplier<T> targetSupplier = new SerializableViaCoder<>(coder, target);
     return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T> get() {
         return Matchers.lessThan(targetSupplier.get());
@@ -945,7 +893,6 @@ public Matcher<T> get() {
   public static <T extends Comparable<T> & Serializable> SerializableMatcher<T> lessThanOrEqualTo(
       final T target) {
     return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T> get() {
         return Matchers.lessThanOrEqualTo(target);
@@ -964,7 +911,6 @@ public static <T extends Comparable<T>> SerializableMatcher<T> lessThanOrEqualTo
       Coder<T> coder, T target) {
     final SerializableSupplier<T> targetSupplier = new SerializableViaCoder<>(coder, target);
     return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T> get() {
         return Matchers.lessThanOrEqualTo(targetSupplier.get());
@@ -978,7 +924,6 @@ public Matcher<T> get() {
    */
   public static <T> SerializableMatcher<T> not(final SerializableMatcher<T> matcher) {
     return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<T> get() {
         return Matchers.not(matcher);
@@ -992,7 +937,6 @@ public Matcher<T> get() {
    */
   public static SerializableMatcher<Object> nullValue() {
     return fromSupplier(new SerializableSupplier<Matcher<Object>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<Object> get() {
         return Matchers.nullValue();
@@ -1005,7 +949,6 @@ public Matcher<Object> get() {
    */
   public static SerializableMatcher<String> startsWith(final String substring) {
     return fromSupplier(new SerializableSupplier<Matcher<String>>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public Matcher<String> get() {
         return Matchers.startsWith(substring);
@@ -1014,10 +957,8 @@ public Matcher<String> get() {
   }
 
   private static class KvKeyMatcher<K, V>
-      extends BaseMatcher<KV<? extends K, ? extends V>>
-      implements SerializableMatcher<KV<? extends K, ? extends V>> {
-    private static final long serialVersionUID = 0L;
-
+  extends BaseMatcher<KV<? extends K, ? extends V>>
+  implements SerializableMatcher<KV<? extends K, ? extends V>> {
     private final SerializableMatcher<? super K> keyMatcher;
 
     public KvKeyMatcher(SerializableMatcher<? super K> keyMatcher) {
@@ -1056,10 +997,8 @@ public String toString() {
   }
 
   private static class KvValueMatcher<K, V>
-      extends BaseMatcher<KV<? extends K, ? extends V>>
-      implements SerializableMatcher<KV<? extends K, ? extends V>> {
-    private static final long serialVersionUID = 0L;
-
+  extends BaseMatcher<KV<? extends K, ? extends V>>
+  implements SerializableMatcher<KV<? extends K, ? extends V>> {
     private final SerializableMatcher<? super V> valueMatcher;
 
     public KvValueMatcher(SerializableMatcher<? super V> valueMatcher) {
@@ -1109,8 +1048,7 @@ public String toString() {
    *
    * <code>
    * return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-   *   private static final long serialVersionUID = 0L;
-   *     @Override
+   *   *     @Override
    *     public Matcher<T> get() {
    *       return new MyMatcherForT();
    *     }
@@ -1138,9 +1076,7 @@ public interface SerializableSupplier<T> extends Serializable {
    * SerializableFunction to return such a matcher.
    */
   private static class SerializableMatcherFromSupplier<T> extends BaseMatcher<T>
-      implements SerializableMatcher<T> {
-
-    private static final long serialVersionUID = 0L;
+  implements SerializableMatcher<T> {
 
     private SerializableSupplier<Matcher<T>> supplier;
 
@@ -1170,8 +1106,6 @@ public void describeMismatch(Object item, Description mismatchDescription) {
    * volume is small and minor costs are not critical.
    */
   private static class SerializableViaCoder<T> implements SerializableSupplier<T> {
-    private static final long serialVersionUID = 0L;
-
     /** Cached value that is not serialized. */
     @Nullable
     private transient T value;
@@ -1210,8 +1144,6 @@ public T get() {
    * tests, where data volume is small and minor costs are not critical.
    */
   private static class SerializableArrayViaCoder<T> implements SerializableSupplier<T[]> {
-    private static final long serialVersionUID = 0L;
-
     /** Cached value that is not serialized. */
     @Nullable
     private transient T[] value;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
index 2179df3a453fe..687c51e4d2326 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
@@ -214,8 +214,6 @@ public static class ApproximateQuantilesCombineFn
       <T, ComparatorT extends Comparator<T> & Serializable>
       extends AccumulatingCombineFn<T, QuantileState<T, ComparatorT>, List<T>> {
 
-    private static final long serialVersionUID = 0L;
-
     /**
      * The cost (in time and space) to compute quantiles to a given
      * accuracy is a function of the total number of elements in the
@@ -656,8 +654,6 @@ public boolean hasNext() {
    */
   private static class QuantileStateCoder<T, ComparatorT extends Comparator<T> & Serializable>
       extends CustomCoder<QuantileState<T, ComparatorT>> {
-    private static final long serialVersionUID = 0L;
-
     private final ComparatorT compareFn;
     private final Coder<T> elementCoder;
     private final Coder<List<T>> elementListCoder;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
index d65e7544c9d78..f662486e3e8e1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
@@ -156,7 +156,6 @@ public static <K, V> PerKey<K, V> perKey(double maximumEstimationError) {
    *
    * @param <T> the type of the elements in the input {@code PCollection}
    */
-  @SuppressWarnings("serial")
   static class Globally<T> extends PTransform<PCollection<T>, PCollection<Long>> {
 
     /**
@@ -208,7 +207,6 @@ public PCollection<Long> apply(PCollection<T> input) {
    *        {@code PCollection}s
    * @param <V> the type of the values in the input {@code PCollection}
    */
-  @SuppressWarnings("serial")
   static class PerKey<K, V>
       extends PTransform<PCollection<KV<K, V>>, PCollection<KV<K, Long>>> {
 
@@ -273,7 +271,6 @@ public PCollection<KV<K, Long>> apply(PCollection<KV<K, V>> input) {
    *
    * @param <T> the type of the values being combined
    */
-  @SuppressWarnings("serial")
   public static class ApproximateUniqueCombineFn<T> extends
       CombineFn<T, ApproximateUniqueCombineFn.LargestUnique, Long> {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index df2481e4c3529..3ece72ff00ce1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -67,7 +67,6 @@
  * {@code PTransform}s for combining {@code PCollection} elements
  * globally and per-key.
  */
-@SuppressWarnings("serial")
 public class Combine {
 
   /**
@@ -1905,8 +1904,6 @@ public static <InputT, AccumT> InputOrAccum<InputT, AccumT> accum(AccumT aggr) {
       private static class InputOrAccumCoder<InputT, AccumT>
           extends StandardCoder<InputOrAccum<InputT, AccumT>> {
 
-        private static final long serialVersionUID = 0L;
-
         private final Coder<InputT> inputCoder;
         private final Coder<AccumT> accumCoder;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
index 1864a4e2f45db..9b29088cbe2ed 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
@@ -83,8 +83,6 @@ public static <T> PerElement<T> perElement() {
   public static class PerElement<T>
       extends PTransform<PCollection<T>, PCollection<KV<T, Long>>> {
 
-    private static final long serialVersionUID = 0L;
-
     public PerElement() { }
 
     @Override
@@ -92,8 +90,6 @@ public PCollection<KV<T, Long>> apply(PCollection<T> input) {
       return
           input
           .apply(ParDo.named("Init").of(new DoFn<T, KV<T, Void>>() {
-            private static final long serialVersionUID = 0L;
-
             @Override
             public void processElement(ProcessContext c) {
               c.output(KV.of(c.element(), (Void) null));
@@ -108,8 +104,6 @@ public void processElement(ProcessContext c) {
    */
   private static class CountFn<T> extends CombineFn<T, Long, Long> {
 
-    private static final long serialVersionUID = 0L;
-
     @Override
     public Long createAccumulator() {
       return 0L;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index 6d017420c6a51..d8733700e4613 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -81,7 +81,6 @@
  *
  * @param <T> the type of the elements of the resulting {@code PCollection}
  */
-@SuppressWarnings("serial")
 public class Create<T> {
   /**
    * Returns a new {@code Create.Values} transform that produces a
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index ec23640aa2f96..1cb1f9d094f81 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -68,7 +68,6 @@
  * @see #processElement for details on implementing the transformation
  * from {@code InputT} to {@code OutputT}.
  */
-@SuppressWarnings("serial")
 public abstract class DoFn<InputT, OutputT> implements Serializable {
 
   /** Information accessible to all methods in this {@code DoFn}. */
@@ -444,8 +443,6 @@ protected final <AggInputT> Aggregator<AggInputT, AggInputT> createAggregator(St
    */
   static class DelegatingAggregator<AggInputT, AggOutputT> implements
       Aggregator<AggInputT, AggOutputT>, Serializable {
-    private static final long serialVersionUID = 0L;
-
     private final UUID id;
 
     private final String name;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
index 5e6bbf373d543..40fe92261a72c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
@@ -101,8 +101,7 @@ <InputT, OutputT> Object createInstance(
         public <InputT, OutputT> TypeToken<?>
             tokenFor(TypeToken<InputT> in, TypeToken<OutputT> out) {
           return new TypeToken<WindowingInternals<InputT, OutputT>>() {
-            private static final long serialVersionUID = 0;
-          }
+            }
           .where(new TypeParameter<InputT>() {}, in)
           .where(new TypeParameter<OutputT>() {}, out);
         }
@@ -196,7 +195,11 @@ private static Collection<String> describeSupportedTypes(
           @Override
           @Nullable
           public String apply(@Nullable ExtraContextInfo input) {
-            return formatType(input.tokenFor(in, out));
+            if (input == null) {
+              return null;
+            } else {
+              return formatType(input.tokenFor(in, out));
+            }
           }
         })
         .toSortedSet(String.CASE_INSENSITIVE_ORDER);
@@ -207,8 +210,7 @@ static <InputT, OutputT> ExtraContextInfo[] verifyProcessMethodArguments(Method
     return verifyMethodArguments(m,
         EXTRA_PROCESS_CONTEXTS,
         new TypeToken<DoFnWithContext<InputT, OutputT>.ProcessContext>() {
-          private static final long serialVersionUID = 0;
-        },
+          },
         new TypeParameter<InputT>() {},
         new TypeParameter<OutputT>() {});
   }
@@ -218,8 +220,7 @@ static <InputT, OutputT> ExtraContextInfo[] verifyBundleMethodArguments(Method m
     return verifyMethodArguments(m,
         EXTRA_CONTEXTS,
         new TypeToken<DoFnWithContext<InputT, OutputT>.Context>() {
-          private static final long serialVersionUID = 0;
-        },
+          },
         new TypeParameter<InputT>() {},
         new TypeParameter<OutputT>() {});
   }
@@ -609,8 +610,6 @@ public static Class<?> getDoFnClass(DoFn<?, ?> fn) {
 
   private static class SimpleDoFnAdapter<InputT, OutputT> extends DoFn<InputT, OutputT> {
 
-    private static final long serialVersionUID = 0;
-
     private transient DoFnReflector reflector;
     private DoFnWithContext<InputT, OutputT> fn;
 
@@ -658,7 +657,6 @@ private void readObject(java.io.ObjectInputStream in)
   private static class WindowDoFnAdapter<InputT, OutputT>
   extends SimpleDoFnAdapter<InputT, OutputT> implements DoFn.RequiresWindowAccess {
 
-    private static final long serialVersionUID = 0;
     private WindowDoFnAdapter(DoFnReflector reflector, DoFnWithContext<InputT, OutputT> fn) {
       super(reflector, fn);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
index ccd86a5f3962e..f6b0fec2c0cc1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
@@ -79,7 +79,6 @@
  * @param <OutputT> the type of the (main) output elements
  */
 @Experimental
-@SuppressWarnings("serial")
 public abstract class DoFnWithContext<InputT, OutputT> implements Serializable {
 
   /** Information accessible to all methods in this {@code DoFnWithContext}. */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
index 88d39d6290f08..de8d4b55aec21 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
@@ -26,7 +26,6 @@
  * @param <T> the type of the values in the input {@code PCollection},
  * and the type of the elements in the output {@code PCollection}
  */
-@SuppressWarnings("serial")
 public class Filter<T> extends PTransform<PCollection<T>,
                                           PCollection<T>> {
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index 6ba2afd3f30de..c186353b02097 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -102,7 +102,6 @@ public static <T> FlattenIterables<T> iterables() {
    * @param <T> the type of the elements in the input and output
    * {@code PCollection}s.
    */
-  @SuppressWarnings("serial")
   public static class FlattenPCollectionList<T>
       extends PTransform<PCollectionList<T>, PCollection<T>> {
 
@@ -164,7 +163,6 @@ protected Coder<?> getDefaultOutputCoder(PCollectionList<T> input)
    * @param <T> the type of the elements of the input {@code Iterable}s and
    * the output {@code PCollection}
    */
-  @SuppressWarnings("serial")
   public static class FlattenIterables<T>
       extends PTransform<PCollection<Iterable<T>>, PCollection<T>> {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 6fbda9335d033..07657f98d3a44 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -128,7 +128,6 @@
  * and the elements of the {@code Iterable}s in the output
  * {@code PCollection}
  */
-@SuppressWarnings("serial")
 public class GroupByKey<K, V>
     extends PTransform<PCollection<KV<K, V>>,
                        PCollection<KV<K, Iterable<V>>>> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
index 415ea3532bfdb..30ea95a134f8c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
@@ -136,7 +136,6 @@ public <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT> doFn) {
    */
   public static class Bound<InputT, OutputT>
       extends PTransform<PCollection<? extends InputT>, PCollection<OutputT>> {
-    private static final long serialVersionUID = 0;
     private final DoFn<InputT, OutputT> doFn;
     private final int maxParallelism;
 
@@ -183,8 +182,6 @@ public PCollection<OutputT> apply(PCollection<? extends InputT> input) {
   public static class MultiThreadedIntraBundleProcessingDoFn<InputT, OutputT>
       extends DoFn<InputT, OutputT> {
 
-    private static final long serialVersionUID = 0;
-
     public MultiThreadedIntraBundleProcessingDoFn(DoFn<InputT, OutputT> doFn, int maxParallelism) {
       Preconditions.checkArgument(maxParallelism > 0,
           "Expected parallelism factor greater than zero, received %s.", maxParallelism);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java
index 113545c221f09..370d43dd02364 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java
@@ -40,7 +40,6 @@
  * @param <K> the type of the keys in the input {@code PCollection},
  * and the type of the elements in the output {@code PCollection}
  */
-@SuppressWarnings("serial")
 public class Keys<K> extends PTransform<PCollection<? extends KV<K, ?>>,
                                         PCollection<K>> {
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
index afce62efa9dd3..5a9cc87bc2942 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
@@ -42,7 +42,6 @@
  * @param <V> the type of the values in the input {@code PCollection}
  * and the keys in the output {@code PCollection}
  */
-@SuppressWarnings("serial")
 public class KvSwap<K, V> extends PTransform<PCollection<KV<K, V>>,
                                              PCollection<KV<V, K>>> {
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
index 0047548b22c0e..79d904ed57057 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
@@ -163,8 +163,6 @@ Combine.PerKey<K, T, T> perKey(ComparatorT comparator) {
    */
   public static class MaxFn<T> extends BinaryCombineFn<T> {
 
-    private static final long serialVersionUID = 0;
-
     private final T identity;
     private final Comparator<? super T> comparator;
 
@@ -209,8 +207,6 @@ public T apply(T left, T right) {
    */
   public static class MaxIntegerFn extends MaxFn<Integer> implements
       CounterProvider<Integer> {
-    private static final long serialVersionUID = 0L;
-
     public MaxIntegerFn() {
       super(Integer.MIN_VALUE, new Top.Largest<Integer>());
     }
@@ -227,8 +223,6 @@ public Counter<Integer> getCounter(String name) {
    */
   public static class MaxLongFn extends MaxFn<Long> implements
       CounterProvider<Long> {
-    private static final long serialVersionUID = 0L;
-
     public MaxLongFn() {
       super(Long.MIN_VALUE, new Top.Largest<Long>());
     }
@@ -245,8 +239,6 @@ public Counter<Long> getCounter(String name) {
    */
   public static class MaxDoubleFn extends MaxFn<Double> implements
       CounterProvider<Double> {
-    private static final long serialVersionUID = 0L;
-
     public MaxDoubleFn() {
       super(Double.NEGATIVE_INFINITY, new Top.Largest<Double>());
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
index 96b968dd599fa..7dccfb626bd65 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
@@ -99,8 +99,6 @@ public static <K, NumT extends Number> Combine.PerKey<K, NumT, Double> perKey()
    */
   static class MeanFn<NumT extends Number>
   extends Combine.AccumulatingCombineFn<NumT, CountSum<NumT>, Double> {
-    private static final long serialVersionUID = 0;
-
     /**
      * Constructs a combining function that computes the mean over
      * a collection of values of type {@code N}.
@@ -181,8 +179,6 @@ public String toString() {
 
   static class CountSumCoder<NumT extends Number>
   extends AtomicCoder<CountSum<NumT>> {
-     private static final long serialVersionUID = 0;
-
      private static final Coder<Long> LONG_CODER = BigEndianLongCoder.of();
      private static final Coder<Double> DOUBLE_CODER = DoubleCoder.of();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
index 9a8b0c9b8e2ec..a938cf147ce22 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
@@ -163,8 +163,6 @@ Combine.PerKey<K, T, T> perKey(ComparatorT comparator) {
    */
   public static class MinFn<T> extends BinaryCombineFn<T> {
 
-    private static final long serialVersionUID = 0;
-
     private final T identity;
     private final Comparator<? super T> comparator;
 
@@ -209,8 +207,6 @@ public T apply(T left, T right) {
    */
   public static class MinIntegerFn extends MinFn<Integer> implements
       CounterProvider<Integer> {
-    private static final long serialVersionUID = 0;
-
     public MinIntegerFn() {
       super(Integer.MAX_VALUE, new Top.Largest<Integer>());
     }
@@ -227,8 +223,6 @@ public Counter<Integer> getCounter(String name) {
    */
   public static class MinLongFn extends MinFn<Long> implements
       CounterProvider<Long> {
-    private static final long serialVersionUID = 0;
-
     public MinLongFn() {
       super(Long.MAX_VALUE, new Top.Largest<Long>());
     }
@@ -245,8 +239,6 @@ public Counter<Long> getCounter(String name) {
    */
   public static class MinDoubleFn extends MinFn<Double> implements
       CounterProvider<Double> {
-    private static final long serialVersionUID = 0;
-
     public MinDoubleFn() {
       super(Double.POSITIVE_INFINITY, new Top.Largest<Double>());
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
index d1ffb204b1023..8a7450997aab4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
@@ -169,8 +169,6 @@
  */
 public abstract class PTransform<InputT extends PInput, OutputT extends POutput>
     implements Serializable /* See the note above */ {
-  private static final long serialVersionUID = 0;
-
   /**
    * Applies this {@code PTransform} on the given {@code InputT}, and returns its
    * {@code Output}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index f31fff2c77f52..d9936bde20e14 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -667,7 +667,6 @@ public <InputT, OutputT> Bound<InputT, OutputT> of(DoFnWithContext<InputT, Outpu
    * @param <InputT> the type of the (main) input {@code PCollection} elements
    * @param <OutputT> the type of the (main) output {@code PCollection} elements
    */
-  @SuppressWarnings("serial")
   public static class Bound<InputT, OutputT>
       extends PTransform<PCollection<? extends InputT>, PCollection<OutputT>> {
     // Inherits name.
@@ -882,8 +881,6 @@ public <InputT> BoundMulti<InputT, OutputT> of(DoFnWithContext<InputT, OutputT>
    */
   public static class BoundMulti<InputT, OutputT>
       extends PTransform<PCollection<? extends InputT>, PCollectionTuple> {
-    private static final long serialVersionUID = 0L;
-
     // Inherits name.
     List<PCollectionView<?>> sideInputs;
     TupleTag<OutputT> mainOutputTag;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
index 2905c1204e69b..73b28810970b8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
@@ -61,7 +61,6 @@
  * @param <T> the type of the elements of the input and output
  * {@code PCollection}s
  */
-@SuppressWarnings("serial")
 public class Partition<T>
     extends PTransform<PCollection<T>, PCollectionList<T>> {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
index 8c31776b2257c..cff84991365e0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
@@ -56,7 +56,6 @@
  * @param <T> the type of the elements of the input and output
  * {@code PCollection}s
  */
-@SuppressWarnings("serial")
 public class RemoveDuplicates<T> extends PTransform<PCollection<T>,
                                                     PCollection<T>> {
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
index bbf1f7929ee05..4de64253268ca 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
@@ -128,7 +128,6 @@ public static <T> PTransform<PCollection<T>, PCollection<T>> any(long limit) {
    * elements of the input {@code PCollection}.
    */
   public static class SampleAny<T> extends PTransform<PCollection<T>, PCollection<T>> {
-    private static final long serialVersionUID = 0;
     private final long limit;
 
     /**
@@ -158,8 +157,6 @@ public PCollection<T> apply(PCollection<T> in) {
    * A {@link DoFn} that returns up to limit elements from the side input PCollection.
    */
   private static class SampleAnyDoFn<T> extends DoFn<Void, T> {
-    private static final long serialVersionUID = 0;
-
     long limit;
     final PCollectionView<Iterable<T>> iterableView;
 
@@ -189,8 +186,6 @@ public static class FixedSizedSampleFn<T>
       extends CombineFn<T,
           Top.BoundedHeap<KV<Integer, T>, SerializableComparator<KV<Integer, T>>>,
           Iterable<T>> {
-    private static final long serialVersionUID = 0;
-
     private final Top.TopCombineFn<KV<Integer, T>, SerializableComparator<KV<Integer, T>>>
         topCombineFn;
     private final Random rand = new Random();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
index a610140a070cc..82dcd4a6889c8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
@@ -38,7 +38,6 @@
  *     .apply(Sum.<String>integersPerKey());
  * } </pre>
  */
-@SuppressWarnings("serial")
 public class Sum {
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index 7724cef4b00d9..1b95c7ba3fa46 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -316,8 +316,6 @@ Combine.Globally<T, List<T>> largest(int count) {
    */
   public static class Largest<T extends Comparable<? super T>>
       implements Comparator<T>, Serializable {
-    private static final long serialVersionUID = 0L;
-
     @Override
     public int compare(T a, T b) {
       return a.compareTo(b);
@@ -330,8 +328,6 @@ public int compare(T a, T b) {
    */
   public static class Smallest<T extends Comparable<? super T>>
       implements Comparator<T>, Serializable {
-    private static final long serialVersionUID = 0L;
-
     @Override
     public int compare(T a, T b) {
       return b.compareTo(a);
@@ -349,7 +345,6 @@ public int compare(T a, T b) {
    *
    * @param <T> type of element being compared
    */
-  @SuppressWarnings("serial")
   public static class TopCombineFn<T, ComparatorT extends Comparator<T> & Serializable>
       extends AccumulatingCombineFn<T, BoundedHeap<T, ComparatorT>, List<T>> {
 
@@ -489,8 +484,6 @@ private List<T> asList() {
    */
   private static class BoundedHeapCoder<T, ComparatorT extends Comparator<T> & Serializable>
       extends CustomCoder<BoundedHeap<T, ComparatorT>> {
-    private static final long serialVersionUID = 0L;
-
     private final Coder<List<T>> listCoder;
     private final ComparatorT compareFn;
     private final int maximumSize;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java
index 2525fc573d986..d84bc779ece9a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java
@@ -40,7 +40,6 @@
  * @param <V> the type of the values in the input {@code PCollection},
  * and the type of the elements in the output {@code PCollection}
  */
-@SuppressWarnings("serial")
 public class Values<V> extends PTransform<PCollection<? extends KV<?, V>>,
                                           PCollection<V>> {
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index 1b6dd2531a42b..a014798fb5a6d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -226,8 +226,6 @@ public static <K, V> AsMultimap<K, V> asMultimap() {
    * <p>Instantiate via {@link View#asIterable}.
    */
   public static class AsList<T> extends PTransform<PCollection<T>, PCollectionView<List<T>>> {
-    private static final long serialVersionUID = 0;
-
     private AsList() { }
 
     @Override
@@ -253,8 +251,6 @@ public PCollectionView<List<T>> apply(PCollection<T> input) {
    */
   public static class AsIterable<T>
       extends PTransform<PCollection<T>, PCollectionView<Iterable<T>>> {
-    private static final long serialVersionUID = 0;
-
     private AsIterable() { }
 
     @Override
@@ -280,7 +276,6 @@ public PCollectionView<Iterable<T>> apply(PCollection<T> input) {
    * <p>Instantiate via {@link View#asIterable}.
    */
   public static class AsSingleton<T> extends PTransform<PCollection<T>, PCollectionView<T>> {
-    private static final long serialVersionUID = 0;
     private final T defaultValue;
     private final boolean hasDefault;
 
@@ -343,8 +338,6 @@ public PCollectionView<T> apply(PCollection<T> input) {
    */
   public static class AsMultimap<K, V>
       extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, Iterable<V>>>> {
-    private static final long serialVersionUID = 0;
-
     private AsMultimap() { }
 
     @Override
@@ -383,8 +376,6 @@ public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
    */
   public static class AsMap<K, V>
       extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, V>>> {
-    private static final long serialVersionUID = 0;
-
     private AsMap() { }
 
     /**
@@ -427,8 +418,6 @@ public PCollectionView<Map<K, V>> apply(PCollection<KV<K, V>> input) {
    */
   public static class CreatePCollectionView<ElemT, ViewT>
       extends PTransform<PCollection<ElemT>, PCollectionView<ViewT>> {
-    private static final long serialVersionUID = 0;
-
     private PCollectionView<ViewT> view;
 
     private CreatePCollectionView(PCollectionView<ViewT> view) {
@@ -479,8 +468,6 @@ private <ElemT, ViewT> void evaluateTyped(
    * @param <T> the type of elements to concatenate.
    */
   public static class Concatenate<T> extends CombineFn<T, List<T>, List<T>> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public List<T> createAccumulator() {
       return new ArrayList<T>();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
index c2c8f5535643e..aa8559c4e631c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
@@ -50,7 +50,6 @@
  * {@code PCollection} and the values in the output
  * {@code PCollection}
  */
-@SuppressWarnings("serial")
 public class WithKeys<K, V> extends PTransform<PCollection<V>,
                                                PCollection<KV<K, V>>> {
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
index 960dce1ad84d8..0ba3bd145f784 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
@@ -58,8 +58,6 @@ public static <T> Bound<T> to(Sink<T> sink) {
    * documentation about writing to Sinks.
    */
   public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
-    private static final long serialVersionUID = 0;
-
     private final Sink<T> sink;
 
     private Bound(Sink<T> sink) {
@@ -129,8 +127,6 @@ private <WriteT> PDone createWrite(
       operationCollection = operationCollection
           .apply("Initialize", ParDo.of(
               new DoFn<WriteOperation<T, WriteT>, WriteOperation<T, WriteT>>() {
-            private static final long serialVersionUID = 0;
-
             @Override
             public void processElement(ProcessContext c) throws Exception {
               WriteOperation<T, WriteT> writeOperation = c.element();
@@ -152,8 +148,6 @@ public void processElement(ProcessContext c) throws Exception {
       // as a side input), so this will happen after the initial ParDo.
       PCollection<WriteT> results = input
           .apply("WriteBundles", ParDo.of(new DoFn<T, WriteT>() {
-            private static final long serialVersionUID = 0;
-
             // Writer that will write the records in this bundle. Lazily
             // initialized in processElement.
             private Writer<T, WriteT> writer = null;
@@ -202,8 +196,6 @@ public void finishBundle(Context c) throws Exception {
       @SuppressWarnings("unused")
       final PCollection<Integer> done = operationCollection
           .apply("Finalize", ParDo.of(new DoFn<WriteOperation<T, WriteT>, Integer>() {
-            private static final long serialVersionUID = 0;
-
             @Override
             public void processElement(ProcessContext c) throws Exception {
               Iterable<WriteT> results = c.sideInput(resultsView);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
index 61d937ca3f8c2..2c0a9143757e5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
@@ -211,7 +211,6 @@ public <V> V getOnly(TupleTag<V> tag, V defaultValue) {
   /**
    * A {@link Coder} for {@link CoGbkResult}s.
    */
-  @SuppressWarnings("serial")
   public static class CoGbkResultCoder extends StandardCoder<CoGbkResult> {
 
     private final CoGbkResultSchema schema;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
index fbe2734587b2f..79b3e1b7bc961 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
@@ -38,7 +38,6 @@
  * {@link RawUnionValue} tags (which are used as secondary keys in the
  * {@link CoGroupByKey}).
  */
-@SuppressWarnings("serial")
 public class CoGbkResultSchema implements Serializable {
 
   private final TupleTagList tupleTagList;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
index 92b8e6b63d5b5..5736e911f8f28 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
@@ -71,7 +71,6 @@
  * @param <K> the type of the keys in the input and output
  * {@code PCollection}s
  */
-@SuppressWarnings("serial")
 public class CoGroupByKey<K> extends
     PTransform<KeyedPCollectionTuple<K>,
                PCollection<KV<K, CoGbkResult>>> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
index 4128b00ab96ca..2f1c2befd42f6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
@@ -34,7 +34,6 @@
 /**
  * A UnionCoder encodes RawUnionValues.
  */
-@SuppressWarnings("serial")
 class UnionCoder extends StandardCoder<RawUnionValue> {
   // TODO: Think about how to integrate this with a schema object (i.e.
   // a tuple of tuple tags).
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
index d3c6a1d5b1d9a..4e33747e29860 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
@@ -35,8 +35,6 @@
 @Experimental(Experimental.Kind.TRIGGER)
 public class AfterAll<W extends BoundedWindow> extends OnceTrigger<W> {
 
-  private static final long serialVersionUID = 0L;
-
   private AfterAll(List<Trigger<W>> subTriggers) {
     super(subTriggers);
     Preconditions.checkArgument(subTriggers.size() > 1);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index a58df67245f59..308e78763f8f4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -48,8 +48,6 @@
 @Experimental(Experimental.Kind.TRIGGER)
 public class AfterEach<W extends BoundedWindow> extends Trigger<W> {
 
-  private static final long serialVersionUID = 0L;
-
   private AfterEach(List<Trigger<W>> subTriggers) {
     super(subTriggers);
     Preconditions.checkArgument(subTriggers.size() > 1);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
index 01e0f66ca72a5..63ba0ba3376a9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
@@ -36,8 +36,6 @@
 @Experimental(Experimental.Kind.TRIGGER)
 public class AfterFirst<W extends BoundedWindow> extends OnceTrigger<W> {
 
-  private static final long serialVersionUID = 0L;
-
   AfterFirst(List<Trigger<W>> subTriggers) {
     super(subTriggers);
     Preconditions.checkArgument(subTriggers.size() > 1);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index 85e292d7cbf3f..7e56d0f11ca74 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -41,8 +41,6 @@
 @Experimental(Experimental.Kind.TRIGGER)
 public class AfterPane<W extends BoundedWindow> extends OnceTrigger<W>{
 
-  private static final long serialVersionUID = 0L;
-
   private static final StateTag<CombiningValueState<Long, Long>> ELEMENTS_IN_PANE_TAG =
       StateTags.makeSystemTagInternal(StateTags.combiningValueFromInputInternal(
           "count", VarLongCoder.of(), new Sum.SumLongFn()));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 8565fb4c5aaaf..03239affc5d51 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -37,8 +37,6 @@
 @Experimental(Experimental.Kind.TRIGGER)
 public class AfterProcessingTime<W extends BoundedWindow> extends TimeTrigger<W> {
 
-  private static final long serialVersionUID = 0L;
-
   private AfterProcessingTime(List<SerializableFunction<Instant, Instant>> transforms) {
     super(transforms);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
index 91182d6a810b9..8d7ce10b5dc0b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
@@ -30,8 +30,6 @@
 
 class AfterSynchronizedProcessingTime<W extends BoundedWindow> extends OnceTrigger<W> {
 
-  private static final long serialVersionUID = 0L;
-
   public AfterSynchronizedProcessingTime() {
     super(null);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index b258090634f34..741159fc9dc32 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -82,8 +82,6 @@ public static <W extends BoundedWindow> OnceTrigger<W> pastEndOfWindow() {
    */
   private static class FromFirstElementInPane<W extends BoundedWindow> extends TimeTrigger<W> {
 
-    private static final long serialVersionUID = 0L;
-
     private FromFirstElementInPane(
         List<SerializableFunction<Instant, Instant>> delayFunction) {
       super(delayFunction);
@@ -210,8 +208,6 @@ public int hashCode() {
    */
   private static class FromEndOfWindow<W extends BoundedWindow> extends OnceTrigger<W> {
 
-    private static final long serialVersionUID = 0L;
-
     private FromEndOfWindow() {
       super(null);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
index f15dff4344504..689ebf247e480 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
@@ -91,8 +91,6 @@ public static YearsWindows years(int number) {
    * is overridden with the {@link #withTimeZone} method.
    */
   public static class DaysWindows extends PartitioningWindowFn<Object, IntervalWindow> {
-    private static final long serialVersionUID = 0;
-
     public DaysWindows withStartingDay(int year, int month, int day) {
       return new DaysWindows(
           number, new DateTime(year, month, day, 0, 0, timeZone), timeZone);
@@ -170,8 +168,6 @@ public DateTimeZone getTimeZone() {
    * is overridden with the {@link #withTimeZone} method.
    */
   public static class MonthsWindows extends PartitioningWindowFn<Object, IntervalWindow> {
-    private static final long serialVersionUID = 0;
-
     public MonthsWindows beginningOnDay(int dayOfMonth) {
       return new MonthsWindows(
           number, dayOfMonth, startDate, timeZone);
@@ -263,8 +259,6 @@ public DateTimeZone getTimeZone() {
    * is overridden with the {@link #withTimeZone} method.
    */
   public static class YearsWindows extends PartitioningWindowFn<Object, IntervalWindow> {
-    private static final long serialVersionUID = 0;
-
     public YearsWindows beginningOnDay(int monthOfYear, int dayOfMonth) {
       return new YearsWindows(
           number, monthOfYear, dayOfMonth, startDate, timeZone);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
index 8a2abdf75d2b2..5ac0becd1099a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
@@ -32,8 +32,6 @@
 @Experimental(Experimental.Kind.TRIGGER)
 public class DefaultTrigger<W extends BoundedWindow> extends Trigger<W>{
 
-  private static final long serialVersionUID = 0L;
-
   private DefaultTrigger() {
     super(null);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
index 96208ee399d5f..0f05d2f408501 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
@@ -31,7 +31,6 @@
  *   Window.<Integer>into(FixedWindows.of(Duration.standardMinutes(10))));
  * } </pre>
  */
-@SuppressWarnings("serial")
 public class FixedWindows extends PartitioningWindowFn<Object, IntervalWindow> {
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
index db95d8e688080..7b7be1fa8e832 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
@@ -50,8 +50,6 @@ private GlobalWindow() {}
    * {@link Coder} for encoding and decoding {@code Window}s.
    */
   public static class Coder extends AtomicCoder<GlobalWindow> {
-    private static final long serialVersionUID = 0;
-
     public static final Coder INSTANCE = new Coder();
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
index 4248066db978b..7d55280b5e568 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
@@ -26,7 +26,6 @@
 /**
  * Default {@link WindowFn} where all data is in the same window.
  */
-@SuppressWarnings("serial")
 public class GlobalWindows extends NonMergingWindowFn<Object, GlobalWindow> {
 
   private static final Collection<GlobalWindow> GLOBAL_WINDOWS =
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
index ad1d50787b8ff..58287c71d6b0b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
@@ -170,8 +170,6 @@ public static Coder<IntervalWindow> getCoder() {
    */
   private static class IntervalWindowCoder extends AtomicCoder<IntervalWindow> {
 
-    private static final long serialVersionUID = 0L;
-
     private static final IntervalWindowCoder INSTANCE =
         new IntervalWindowCoder();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java
index 041ba0a63e618..596f4e7253dd6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java
@@ -28,8 +28,6 @@
  * @param <W> window type
  */
 public class InvalidWindows<W extends BoundedWindow> extends WindowFn<Object, W> {
-  private static final long serialVersionUID = 0;
-
   private String cause;
   private WindowFn<?, W> originalWindowFn;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java
index e7db3cecfcbf5..4432ad8b94787 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java
@@ -25,8 +25,6 @@
  */
 public abstract class NonMergingWindowFn<T, W extends BoundedWindow>
     extends WindowFn<T, W> {
-  private static final long serialVersionUID = 0;
-
   @Override
   public final void mergeWindows(MergeContext c) { }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java
index 654914b76ab54..4639689f41d40 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java
@@ -30,8 +30,6 @@ class OrFinallyTrigger<W extends BoundedWindow> extends Trigger<W> {
   private static final int ACTUAL = 0;
   private static final int UNTIL = 1;
 
-  private static final long serialVersionUID = 0L;
-
   @VisibleForTesting OrFinallyTrigger(Trigger<W> actual, Trigger.OnceTrigger<W> until) {
     super(Arrays.asList(actual, until));
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
index a0b4602ec1382..17c071d0e514b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
@@ -231,8 +231,6 @@ public String toString() {
    * A Coder for encoding PaneInfo instances.
    */
   public static class PaneInfoCoder extends AtomicCoder<PaneInfo> {
-    private static final long serialVersionUID = 0;
-
     private static enum Encoding {
       FIRST,
       ONE_INDEX,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
index d79bb8e7e0476..7be1ebffd3a23 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
@@ -30,8 +30,6 @@
  */
 public abstract class PartitioningWindowFn<T, W extends BoundedWindow>
     extends NonMergingWindowFn<T, W> {
-  private static final long serialVersionUID = 0;
-
   /**
    * Returns the single window to which elements with this timestamp belong.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
index bfea3e1233fd9..0a05f29f58321 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
@@ -37,8 +37,6 @@
  */
 public class Repeatedly<W extends BoundedWindow> extends Trigger<W> {
 
-  private static final long serialVersionUID = 0L;
-
   private static final int REPEATED = 0;
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
index 0472cfbfad2c1..02af5993060ab 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
@@ -37,8 +37,6 @@
  * } </pre>
  */
 public class Sessions extends WindowFn<Object, IntervalWindow> {
-  private static final long serialVersionUID = 0;
-
   /**
    * Duration of the gaps between sessions.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
index 9facd4c34deb1..7696849e89a06 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
@@ -37,7 +37,6 @@
  *   Window.<Integer>into(SlidingWindows.of(Duration.standardMinutes(10))));
  * } </pre>
  */
-@SuppressWarnings("serial")
 public class SlidingWindows extends NonMergingWindowFn<Object, IntervalWindow> {
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
index 0233880af1d7f..72f00b7ed8c22 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
@@ -43,8 +43,6 @@ public abstract class TimeTrigger<W extends BoundedWindow> extends OnceTrigger<W
       StateTags.makeSystemTagInternal(StateTags.combiningValueFromInputInternal(
           "delayed", InstantCoder.of(), Min.MinFn.<Instant>naturalOrder()));
 
-  private static final long serialVersionUID = 0L;
-
   protected static final List<SerializableFunction<Instant, Instant>> IDENTITY;
   static {
     IDENTITY = ImmutableList.<SerializableFunction<Instant, Instant>>of();
@@ -83,8 +81,6 @@ public TimeTrigger<W> plusDelayOf(final Duration delay) {
 
   private static SerializableFunction<Instant, Instant> delayFn(final Duration delay) {
     return new SerializableFunction<Instant, Instant>() {
-      private static final long serialVersionUID = 0L;
-
       @Override
       public Instant apply(Instant input) {
         return input.plus(delay);
@@ -106,8 +102,6 @@ public TimeTrigger<W> alignedTo(final Duration size, final Instant offset) {
   private static SerializableFunction<Instant, Instant> alignFn(
       final Duration size, final Instant offset) {
     return new SerializableFunction<Instant, Instant>() {
-      private static final long serialVersionUID = 0L;
-
       @Override
       public Instant apply(Instant point) {
         long millisSinceStart = new Duration(offset, point).getMillis() % size.getMillis();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 041d6d23be2d6..187c4bc69bd20 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -97,8 +97,6 @@
 @Experimental(Experimental.Kind.TRIGGER)
 public abstract class Trigger<W extends BoundedWindow> implements Serializable {
 
-  private static final long serialVersionUID = 0L;
-
   /**
    * {@code TriggerResult} enumerates the possible result a trigger can have when it is executed.
    */
@@ -526,8 +524,6 @@ public Trigger<W> orFinally(OnceTrigger<W> until) {
    *            {@code AtMostOnceTrigger}
    */
   public abstract static class OnceTrigger<W extends BoundedWindow> extends Trigger<W> {
-    private static final long serialVersionUID = 0L;
-
     protected OnceTrigger(List<Trigger<W>> subTriggers) {
       super(subTriggers);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index c3e650f9fde84..69c82c149b638 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -364,7 +364,6 @@ public <T> Bound<T> withAllowedLateness(Duration allowedLateness, ClosingBehavio
    *
    * @param <T> The type of elements this {@code Window} is applied to
    */
-  @SuppressWarnings("serial")
   public static class Bound<T> extends PTransform<PCollection<T>, PCollection<T>> {
 
     @Nullable private final WindowFn<? super T, ?> windowFn;
@@ -570,8 +569,6 @@ protected String getKindString() {
   private static <T> PTransform<PCollection<? extends T>, PCollection<T>> identity() {
     return ParDo.named("Identity").of(new DoFn<T, T>() {
 
-      private static final long serialVersionUID = 0L;
-
       @Override public void processElement(ProcessContext c) {
         c.output(c.element());
       }
@@ -592,7 +589,6 @@ public static <T> Remerge<T> remerge() {
    *  windows to be merged again as part of the next
    * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}.
    */
-  @SuppressWarnings("serial")
   public static class Remerge<T> extends PTransform<PCollection<T>, PCollection<T>> {
     @Override
     public PCollection<T> apply(PCollection<T> input) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
index 4254301ff6add..8bc7a2cacd489 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
@@ -40,8 +40,6 @@
  */
 public abstract class WindowFn<T, W extends BoundedWindow>
     implements Serializable {
-  private static final long serialVersionUID = 0;
-
   /**
    * Information available when running {@link #assignWindows}.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppliedCombineFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppliedCombineFn.java
index cef5c59d3ccc2..66da713526fe5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppliedCombineFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppliedCombineFn.java
@@ -37,8 +37,6 @@
  */
 public class AppliedCombineFn<K, InputT, AccumT, OutputT> implements Serializable {
 
-  private static final long serialVersionUID = 0L;
-
   private final KeyedCombineFn<K, InputT, AccumT, OutputT> fn;
   private final Coder<AccumT> accumulatorCoder;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
index 1e0c6ad84b2a7..ca59c53957324 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
@@ -31,7 +31,6 @@
  * @param <T> Type of elements being windowed
  * @param <W> Window type
  */
-@SuppressWarnings("serial")
 @SystemDoFnInternal
 public class AssignWindowsDoFn<T, W extends BoundedWindow> extends DoFn<T, T> {
   private WindowFn<? super T, W> fn;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BitSetCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BitSetCoder.java
index 6e2b6bacd37d5..f3a039ad6649f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BitSetCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BitSetCoder.java
@@ -30,8 +30,6 @@
 class BitSetCoder extends AtomicCoder<BitSet> {
 
   private static final BitSetCoder INSTANCE = new BitSetCoder();
-  private static final long serialVersionUID = 1L;
-
   private transient ByteArrayCoder byteArrayCoder = ByteArrayCoder.of();
 
   private BitSetCoder() {}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java
index 7569a73bf7f13..aaa71d7bf6641 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java
@@ -27,8 +27,6 @@
  */
 public class BoundedQueueExecutor extends ThreadPoolExecutor {
   private static class ReducableSemaphore extends Semaphore {
-    private static final long serialVersionUID = 0;
-
     ReducableSemaphore(int permits) {
       super(permits);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
index 4eefecf5f482f..417305f455f93 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
@@ -228,7 +228,6 @@ public static CloudObject makeCloudEncoding(
    * A {@link com.fasterxml.jackson.databind.Module} that adds the type
    * resolver needed for Coder definitions created by the Dataflow service.
    */
-  @SuppressWarnings("serial")
   static final class Jackson2Module extends SimpleModule {
     /**
      * The Coder custom type resolver.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
index dab58a68c9dc6..15a3a471c23c7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
@@ -29,7 +29,6 @@
  * @param <OutputT> the type of the (main) output elements of the DoFn
  */
 public class DoFnInfo<InputT, OutputT> implements Serializable {
-  private static final long serialVersionUID = 0;
   private final DoFn<InputT, OutputT> doFn;
   private final WindowingStrategy<?, ?> windowingStrategy;
   private final Iterable<PCollectionView<?>> sideInputViews;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
index ab0b36c219c3f..67eee5d3d7cfa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
@@ -36,8 +36,6 @@
  */
 public class ExecutableTrigger<W extends BoundedWindow> implements Serializable {
 
-  private static final long serialVersionUID = 0L;
-
   /** Store the index assigned to this trigger. */
   private final int triggerIndex;
   private final int firstIndexAfterSubtree;
@@ -178,8 +176,6 @@ public void invokeClear(Trigger<W>.TriggerContext c) throws Exception {
    */
   private static class ExecutableOnceTrigger<W extends BoundedWindow> extends ExecutableTrigger<W> {
 
-    private static final long serialVersionUID = 0L;
-
     public ExecutableOnceTrigger(OnceTrigger<W> trigger, int nextUnusedIndex) {
       super(trigger, nextUnusedIndex);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
index e1a00f24ffafb..6cbb2457d4b17 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
@@ -48,7 +48,6 @@
  * @param <OutputT> value output type
  * @param <W> window type
  */
-@SuppressWarnings("serial")
 @SystemDoFnInternal
 class GroupAlsoByWindowsAndCombineDoFn<K, InputT, AccumT, OutputT, W extends BoundedWindow>
     extends GroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index e35d9dd95e554..19719db41a480 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -36,7 +36,6 @@
  * @param <W> window type
  */
 @SystemDoFnInternal
-@SuppressWarnings("serial")
 public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends BoundedWindow>
     extends DoFn<KV<K, Iterable<WindowedValue<InputT>>>, KV<K, OutputT>> {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
index c014f56332c9f..5ada6d7873e7d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
@@ -47,7 +47,6 @@
  * @param <W> window type
  */
 @SystemDoFnInternal
-@SuppressWarnings("serial")
 class GroupAlsoByWindowsViaIteratorsDoFn<K, V, W extends BoundedWindow>
     extends GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W> {
   private final WindowingStrategy<?, W> strategy;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IllegalMutationException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IllegalMutationException.java
index 99fee1220e7f9..dbe249eeab5bb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IllegalMutationException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IllegalMutationException.java
@@ -20,8 +20,6 @@
  * Thrown when a value appears to have been mutated, but that mutation is forbidden.
  */
 public class IllegalMutationException extends RuntimeException {
-  private static final long serialVersionUID = 0L;
-
   private Object savedValue;
   private Object newValue;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
index 66fa368e79d87..14a6cdf1d149c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
@@ -106,7 +106,6 @@ public static <K, V, W extends BoundedWindow> PCollectionView<Map<K, Iterable<V>
    */
   private static class SingletonPCollectionView<T, W extends BoundedWindow>
      extends PCollectionViewBase<T, T, W> {
-    private static final long serialVersionUID = 0;
     private byte[] encodedDefaultValue;
     private transient T defaultValue;
     private Coder<T> valueCoder;
@@ -162,8 +161,6 @@ protected T fromElements(Iterable<WindowedValue<T>> contents) {
    */
   private static class IterablePCollectionView<T, W extends BoundedWindow>
       extends PCollectionViewBase<T, Iterable<T>, W> {
-    private static final long serialVersionUID = 0;
-
     public IterablePCollectionView(
         Pipeline pipeline, WindowingStrategy<?, W> windowingStrategy, Coder<T> valueCoder) {
       super(pipeline, windowingStrategy, valueCoder);
@@ -189,8 +186,6 @@ public T apply(WindowedValue<T> input) {
    */
   private static class MultimapPCollectionView<K, V, W extends BoundedWindow>
       extends PCollectionViewBase<KV<K, V>, Map<K, Iterable<V>>, W> {
-    private static final long serialVersionUID = 0;
-
     public MultimapPCollectionView(
         Pipeline pipeline,
         WindowingStrategy<KV<K, V>, W> windowingStrategy,
@@ -220,8 +215,6 @@ protected Map<K, Iterable<V>> fromElements(Iterable<WindowedValue<KV<K, V>>> ele
    */
   private static class MapPCollectionView<K, V, W extends BoundedWindow>
       extends PCollectionViewBase<KV<K, V>, Map<K, V>, W> {
-    private static final long serialVersionUID = 0;
-
     public MapPCollectionView(
         Pipeline pipeline,
         WindowingStrategy<KV<K, V>, W> windowingStrategy,
@@ -252,8 +245,6 @@ protected Map<K, V> fromElements(Iterable<WindowedValue<KV<K, V>>> elements) {
   private abstract static class PCollectionViewBase<ElemT, ViewT, W extends BoundedWindow>
       extends PValueBase
       implements PCollectionView<ViewT> {
-    private static final long serialVersionUID = 0L;
-
     /** A unique tag for the view, typed according to the elements underlying the view. */
     private TupleTag<Iterable<WindowedValue<ElemT>>> tag;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
index 0744b9791df9d..b8a13f96e8a9b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
@@ -40,8 +40,6 @@
 public abstract class ReduceFn<K, InputT, OutputT, W extends BoundedWindow>
     implements Serializable {
 
-  private static final long serialVersionUID = 0L;
-
   /** Interface for interacting with persistent state. */
   public interface StateContext {
     /** Access the storage for the given {@code address} in the current window. */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
index 5acc7049fda7f..88a1c15eb70c8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
@@ -29,8 +29,6 @@
 @SystemDoFnInternal
 public class ReifyTimestampAndWindowsDoFn<K, V>
     extends DoFn<KV<K, V>, KV<K, WindowedValue<V>>> {
-  private static final long serialVersionUID = 0;
-
   @Override
   public void processElement(ProcessContext c)
       throws Exception {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java
index 42a04f25e8110..fd734b972bc10 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java
@@ -41,8 +41,6 @@
 public class Reshuffle<K, V>
   extends PTransform<PCollection<KV<K, V>>, PCollection<KV<K, V>>> {
 
-  private static final long serialVersionUID = 0L;
-
   private Reshuffle() {
   }
 
@@ -69,7 +67,6 @@ public PCollection<KV<K, V>> apply(PCollection<KV<K, V>> input) {
         .setWindowingStrategyInternal(originalStrategy)
         .apply(ParDo.named("ExpandIterable").of(
             new DoFn<KV<K, Iterable<V>>, KV<K, V>>() {
-              private static final long serialVersionUID = 0;
               @Override
               public void processElement(ProcessContext c) {
                 K key = c.element().getKey();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java
index 27469095cdb49..c9d223c654dfa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java
@@ -361,16 +361,12 @@ private static <T> T decodeValue(Object value, Class<T> clazz) {
   }
 
   private static final class ParameterNotFoundException extends Exception {
-    private static final long serialVersionUID = 0;
-
     public ParameterNotFoundException(String name, Map<String, Object> map) {
       super("didn't find required parameter " + name + " in " + map);
     }
   }
 
   private static final class IncorrectTypeException extends Exception {
-    private static final long serialVersionUID = 0;
-
     public IncorrectTypeException(String name, Map<String, Object> map, String type) {
       super("required parameter " + name + " in " + map + " not " + type);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
index 7ae4bce254f3a..ab55d3f0b306c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
@@ -39,7 +39,6 @@
 public class SystemReduceFn<K, InputT, OutputT, W extends BoundedWindow>
     extends ReduceFn<K, InputT, OutputT, W> {
 
-  private static final long serialVersionUID = 0L;
   private static final String BUFFER_NAME = "buf";
 
   /**
@@ -64,8 +63,6 @@ public static <K, T, W extends BoundedWindow> Factory<K, T, Iterable<T>, W> buff
         StateTags.makeSystemTagInternal(StateTags.bag(BUFFER_NAME, inputCoder));
     return new Factory<K, T, Iterable<T>, W>() {
 
-      private static final long serialVersionUID = 0L;
-
       @Override
       public ReduceFn<K, T, Iterable<T>, W> create(K key) {
         return new SystemReduceFn<K, T, Iterable<T>, W>(bufferTag);
@@ -82,8 +79,6 @@ <K, InputT, AccumT, OutputT, W extends BoundedWindow> Factory<K, InputT, OutputT
       final Coder<K> keyCoder, final AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
     return new Factory<K, InputT, OutputT, W>() {
 
-      private static final long serialVersionUID = 0L;
-
       @Override
       public ReduceFn<K, InputT, OutputT, W> create(K key) {
         StateTag<CombiningValueState<InputT, OutputT>> bufferTag =
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
index 8128a22f07e98..9d7f54124d084 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
@@ -119,7 +119,6 @@ public int hashCode() {
    * {@code TimerOrElement} objects never need to be encoded, so this class does not
    * support the {@code encode} and {@code decode} methods.
    */
-  @SuppressWarnings("serial")
   public static class TimerOrElementCoder<T> extends StandardCoder<TimerOrElement<T>> {
     final Coder<T> elemCoder;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java
index 661f697eaa8a0..b0f21f5dfbfbb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java
@@ -29,7 +29,6 @@
  * reached.
  */
 public class UserCodeException extends RuntimeException {
-  private static final long serialVersionUID = 0;
   private static final Logger LOG = LoggerFactory.getLogger(UserCodeException.class);
 
   public UserCodeException(Throwable t) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ValueWithRecordId.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ValueWithRecordId.java
index fbc6f0337774c..ac1f2ebcce9ca 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ValueWithRecordId.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ValueWithRecordId.java
@@ -89,8 +89,6 @@ public int hashCode() {
    */
   public static class ValueWithRecordIdCoder<ValueT>
       extends StandardCoder<ValueWithRecordId<ValueT>> {
-    private static final long serialVersionUID = 0;
-
     public static <ValueT> ValueWithRecordIdCoder<ValueT> of(Coder<ValueT> valueCoder) {
       return new ValueWithRecordIdCoder<>(valueCoder);
     }
@@ -147,8 +145,6 @@ PTransform<PCollection<? extends ValueWithRecordId<T>>, PCollection<T>> stripIds
     return ParDo.named("StripIds")
         .of(
             new DoFn<ValueWithRecordId<T>, T>() {
-              private static final long serialVersionUID = 0L;
-
               @Override
               public void processElement(ProcessContext c) {
                 c.output(c.element().getValue());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
index 85b585c8be3e9..669ba4fba4201 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
@@ -37,8 +37,6 @@
  */
 public class WatermarkHold<W extends BoundedWindow> implements Serializable {
 
-  private static final long serialVersionUID = 0L;
-
   /** Watermark hold used for the actual data-based hold. */
   @VisibleForTesting static final StateTag<WatermarkStateInternal> DATA_HOLD_TAG =
       StateTags.makeSystemTagInternal(StateTags.watermarkStateInternal("hold"));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
index 33493ace601e9..1e944e25133ec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
@@ -514,8 +514,6 @@ public static <T> ValueOnlyWindowedValueCoder<T> getValueOnlyCoder(Coder<T> valu
    */
   public abstract static class WindowedValueCoder<T>
       extends StandardCoder<WindowedValue<T>> {
-    private static final long serialVersionUID = 0;
-
     final Coder<T> valueCoder;
 
     WindowedValueCoder(Coder<T> valueCoder) {
@@ -540,8 +538,6 @@ public Coder<T> getValueCoder() {
    * Coder for {@code WindowedValue}.
    */
   public static class FullWindowedValueCoder<T> extends WindowedValueCoder<T> {
-    private static final long serialVersionUID = 0;
-
     private final Coder<? extends BoundedWindow> windowCoder;
     // Precompute and cache the coder for a list of windows.
     private final Coder<Collection<? extends BoundedWindow>> windowsCoder;
@@ -660,8 +656,6 @@ public List<? extends Coder<?>> getComponents() {
    * timestamp and windows for encoding, and uses defaults timestamp, and windows for decoding.
    */
   public static class ValueOnlyWindowedValueCoder<T> extends WindowedValueCoder<T> {
-    private static final long serialVersionUID = 0;
-
     public static <T> ValueOnlyWindowedValueCoder<T> of(
         Coder<T> valueCoder) {
       return new ValueOnlyWindowedValueCoder<>(valueCoder);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
index 7fd0b40074401..432035e2ec12a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
@@ -50,8 +50,6 @@ public enum AccumulationMode {
   private static final Duration DEFAULT_ALLOWED_LATENESS = Duration.ZERO;
   private static final WindowingStrategy<Object, GlobalWindow> DEFAULT = of(new GlobalWindows());
 
-  private static final long serialVersionUID = 0L;
-
   private final WindowFn<T, W> windowFn;
   private final ExecutableTrigger<W> trigger;
   private final AccumulationMode mode;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
index c510e30853efd..32acfef28dd41 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
@@ -42,7 +42,6 @@
  *
  * @param <T> the type of values aggregated by this counter
  */
-@SuppressWarnings("serial")
 public abstract class Counter<T> {
   /**
    * Possible kinds of counter aggregation.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
index b30acd02227fa..82b17415dcddf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
@@ -130,8 +130,6 @@ public static <StateT extends State> StateTag<StateT> makeSystemTagInternal(
 
   private abstract static class StateTagBase<StateT extends State> implements StateTag<StateT> {
 
-    private static final long serialVersionUID = 0;
-
     private final StateKind kind;
     protected final String id;
 
@@ -163,8 +161,6 @@ public String toString() {
    */
   private static class ValueStateTag<T> extends StateTagBase<ValueState<T>> {
 
-    private static final long serialVersionUID = 0;
-
     private final Coder<T> coder;
 
     private ValueStateTag(StateKind kind, String id, Coder<T> coder) {
@@ -213,8 +209,6 @@ protected StateTag<ValueState<T>> asKind(StateKind kind) {
   private static class CombiningValueStateTag<InputT, AccumT, OutputT>
       extends StateTagBase<CombiningValueStateInternal<InputT, AccumT, OutputT>> {
 
-    private static final long serialVersionUID = 0;
-
     private final Coder<AccumT> accumCoder;
     private final CombineFn<InputT, AccumT, OutputT> combineFn;
 
@@ -266,8 +260,6 @@ protected StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> asKind(
    */
   private static class BagStateTag<T> extends StateTagBase<BagState<T>> {
 
-    private static final long serialVersionUID = 0;
-
     private final Coder<T> elemCoder;
 
     private BagStateTag(StateKind kind, String id, Coder<T> elemCoder) {
@@ -308,8 +300,6 @@ protected StateTag<BagState<T>> asKind(StateKind kind) {
 
   private static class WatermarkStateTagInternal extends StateTagBase<WatermarkStateInternal> {
 
-    private static final long serialVersionUID = 0;
-
     private WatermarkStateTagInternal(StateKind kind, String id) {
       super(kind, id);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
index 6914646c55574..7601c30637be2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
@@ -36,8 +36,6 @@
  * @param <V> the type of the value
  */
 public class KV<K, V> implements Serializable {
-  private static final long serialVersionUID = 0;
-
   /** Returns a KV with the given key and value. */
   public static <K, V> KV<K, V> of(K key, V value) {
     return new KV<>(key, value);
@@ -79,7 +77,6 @@ public boolean equals(Object other) {
   }
 
   /** Orders the {@link KV} by the key. A null key is less than any non-null key. */
-  @SuppressWarnings("serial")
   public static class OrderByKey<K extends Comparable<? super K>, V> implements
       SerializableComparator<KV<K, V>> {
     @Override
@@ -95,7 +92,6 @@ public int compare(KV<K, V> a, KV<K, V> b) {
   }
 
   /** Orders the {@link KV} by the value. A null value is less than any non-null value. */
-  @SuppressWarnings("serial")
   public static class OrderByValue<K, V extends Comparable<? super V>>
       implements SerializableComparator<KV<K, V>> {
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
index 5399b04a3c8d0..58447933059da 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
@@ -85,7 +85,6 @@ public String toString() {
   /**
    * A {@link Coder} for {@code TimestampedValue}.
    */
-  @SuppressWarnings("serial")
   public static class TimestampedValueCoder<T>
       extends StandardCoder<TimestampedValue<T>> {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
index 3a4d90d200ec6..e52de62319d75 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
@@ -51,10 +51,7 @@
  * @param <V> the type of the elements or values of the tagged thing,
  * e.g., a {@code PCollection<V>}.
  */
-@SuppressWarnings("serial")
 public class TupleTag<V> implements Serializable {
-  private static final long serialVersionUID = 0;
-
   /**
    * Constructs a new {@code TupleTag}, with a fresh unique id.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java
index 0bf24d77a4d4c..b7e7ae26a249a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java
@@ -52,8 +52,6 @@
  * } </pre>
  */
 public class TupleTagList implements Serializable {
-  private static final long serialVersionUID = 0;
-
   /**
    * Returns an empty TupleTagList.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
index 47bce082c2507..09008c69fcee6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
@@ -43,7 +43,6 @@
  *
  * @param <T> the type represented by this {@link TypeDescriptor}
  */
-@SuppressWarnings("serial")
 public abstract class TypeDescriptor<T> implements Serializable {
 
   // This class is just a wrapper for TypeToken
@@ -64,9 +63,7 @@ private TypeDescriptor(TypeToken<T> token) {
    * {@code new TypeDescriptor<List<String>>(){}}.
    */
   protected TypeDescriptor() {
-    token = new TypeToken<T>(getClass()) {
-      private static final long serialVersionUID = 0L;
-    };
+    token = new TypeToken<T>(getClass()) {};
   }
 
   /**
@@ -76,9 +73,7 @@ protected TypeDescriptor() {
    */
   @SuppressWarnings("unchecked")
   protected TypeDescriptor(Class<?> clazz) {
-    TypeToken<T> unresolvedToken = new TypeToken<T>(getClass()) {
-      private static final long serialVersionUID = 0L;
-    };
+    TypeToken<T> unresolvedToken = new TypeToken<T>(getClass()) {};
     token = (TypeToken<T>) TypeToken.of(clazz).resolveType(unresolvedToken.getType());
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
index faf4794105161..351af9307a983 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
@@ -144,7 +144,6 @@ public void testMultipleApply() {
   private static PTransform<PCollection<? extends String>, PCollection<String>> addSuffix(
       final String suffix) {
     return ParDo.of(new DoFn<String, String>() {
-      private static final long serialVersionUID = 0;
       @Override
       public void processElement(DoFn<String, String>.ProcessContext c) {
         c.output(c.element() + suffix);
@@ -211,8 +210,6 @@ public void testIdentityTransform() throws Exception {
 
   private static class IdentityTransform<T extends PInput & POutput>
       extends PTransform<T, T> {
-    private static final long serialVersionUID = 0L;
-
     @Override
     public T apply(T input) {
       return input;
@@ -242,8 +239,6 @@ public void testTupleProjectionTransform() throws Exception {
 
   private static class TupleProjectionTransform<T>
       extends PTransform<PCollectionTuple, PCollection<T>> {
-    private static final long serialVersionUID = 0L;
-
     private TupleTag<T> tag;
 
     public TupleProjectionTransform(TupleTag<T> tag) {
@@ -278,8 +273,6 @@ public void testTupleInjectionTransform() throws Exception {
 
   private static class TupleInjectionTransform<T>
       extends PTransform<PCollection<T>, PCollectionTuple> {
-    private static final long serialVersionUID = 0L;
-
     private TupleTag<T> tag;
 
     public TupleInjectionTransform(TupleTag<T> tag) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
index 44f53101bad2c..0eef5d15e5a57 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
@@ -72,7 +72,6 @@
 
 /** Tests for {@link AvroCoder}. */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class AvroCoderTest {
 
   @DefaultCoder(AvroCoder.class)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProvidersTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProvidersTest.java
index cdda15125f31b..1c0a89ed1b761 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProvidersTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProvidersTest.java
@@ -33,7 +33,6 @@
  * Tests for {@link CoderFactories}.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class CoderProvidersTest {
 
   @Rule
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index 6a01765694acb..3baca5d7461bb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -47,7 +47,6 @@
  * Tests for CoderRegistry.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class CoderRegistryTest {
 
   @Rule
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java
index b58119916806b..21f750362e0cd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CustomCoderTest.java
@@ -36,7 +36,6 @@
 
 /** Unit tests for {@link CustomCoder}. */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class CustomCoderTest {
 
   @Rule
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
index aabae8aaef223..5b5843ae44036 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
@@ -37,7 +37,6 @@
  * Tests of Coder defaults.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class DefaultCoderTest {
 
   @Rule
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java
index 81b8ec857bb9b..3397818c90a58 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DelegateCoderTest.java
@@ -38,8 +38,6 @@
 @RunWith(JUnit4.class)
 public class DelegateCoderTest implements Serializable {
 
-  private static final long serialVersionUID = 0L;
-
   private static final List<Set<Integer>> TEST_VALUES = Arrays.<Set<Integer>>asList(
       Collections.<Integer>emptySet(),
       Collections.singleton(13),
@@ -48,16 +46,12 @@ public class DelegateCoderTest implements Serializable {
   private static final DelegateCoder<Set<Integer>, List<Integer>> TEST_CODER = DelegateCoder.of(
       ListCoder.of(VarIntCoder.of()),
       new DelegateCoder.CodingFunction<Set<Integer>, List<Integer>>() {
-        private static final long serialVersionUID = 0;
-
         @Override
         public List<Integer> apply(Set<Integer> input) {
           return Lists.newArrayList(input);
         }
       },
       new DelegateCoder.CodingFunction<List<Integer>, Set<Integer>>() {
-        private static final long serialVersionUID = 0;
-
         @Override
         public Set<Integer> apply(List<Integer> input) {
           return Sets.newHashSet(input);
@@ -89,8 +83,6 @@ public void testSerializable() throws Exception {
 
   private static class TestAllowedEncodingsCoder extends StandardCoder<Integer> {
 
-    private static final long serialVersionUID = 0L;
-
     @Override
     public void encode(Integer value, OutputStream outstream, Context context) {
       throw new UnsupportedOperationException();
@@ -129,14 +121,12 @@ public void testEncodingId() throws Exception {
     Coder<Integer> trivialDelegateCoder = DelegateCoder.of(
       underlyingCoder,
       new DelegateCoder.CodingFunction<Integer, Integer>() {
-        private static final long serialVersionUID = 0;
         @Override
         public Integer apply(Integer input) {
           return input;
         }
       },
       new DelegateCoder.CodingFunction<Integer, Integer>() {
-        private static final long serialVersionUID = 0;
         @Override
         public Integer apply(Integer input) {
           return input;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java
index 0cb4f5b3c3682..b819967bdc7ca 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/SerializableCoderTest.java
@@ -48,7 +48,6 @@
  * Tests SerializableCoder.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class SerializableCoderTest implements Serializable {
 
   @DefaultCoder(SerializableCoder.class)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSourceTest.java
index ed3bd872e3691..f0105b676b551 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSourceTest.java
@@ -68,7 +68,6 @@ public void testTimeBound() throws Exception {
 
   private static class Checker
       implements SerializableFunction<Iterable<KV<Integer, Integer>>, Void> {
-    private static final long serialVersionUID = 0L;
     private final boolean dedup;
     private final boolean timeBound;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
index 7b81720bc9d16..d0a9b7be0359c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
@@ -177,8 +177,6 @@ private void runReadTest(byte[] input, CompressionMode mode) throws IOException
    * Dummy source for use in tests.
    */
   private static class ByteSource extends FileBasedSource<Byte> {
-    private static final long serialVersionUID = 0;
-
     public ByteSource(String fileOrPatternSpec, long minBundleSize) {
       super(fileOrPatternSpec, minBundleSize);
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSinkTest.java
index 14c859a1c3f3a..8236ae68351ea 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSinkTest.java
@@ -384,8 +384,6 @@ public void testGenerateOutputFilenamesWithoutExtension() {
    * A simple FileBasedSink that writes String values as lines with header and footer lines.
    */
   private static final class SimpleSink extends FileBasedSink<String> {
-    private static final long serialVersionUID = 0;
-
     public SimpleSink(String baseOutputFilename, String extension) {
       super(baseOutputFilename, extension);
     }
@@ -400,8 +398,6 @@ public SimpleWriteOperation createWriteOperation(PipelineOptions options) {
     }
 
     private static final class SimpleWriteOperation extends FileBasedWriteOperation<String> {
-      private static final long serialVersionUID = 0;
-
       public SimpleWriteOperation(
           SimpleSink sink, String tempOutputFilename, TemporaryFileRetention retentionPolicy) {
         super(sink, tempOutputFilename, retentionPolicy);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index 2fbde58cd7d2c..914bfcd4f76b2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -81,8 +81,6 @@ public class FileBasedSourceTest {
    */
   class TestFileBasedSource extends FileBasedSource<String> {
 
-    private static final long serialVersionUID = 85539251;
-
     final String splitHeader;
 
     public TestFileBasedSource(String fileOrPattern, long minBundleSize, String splitHeader) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSourceTest.java
index d7c901634e20e..bdbd00ecb3fa3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSourceTest.java
@@ -45,7 +45,6 @@ public class OffsetBasedSourceTest {
   // and rounds the start and end offset to the nearest multiple of a given number,
   // e.g. reading [13, 48) with granularity 10 gives records with values [20, 50).
   private static class CoarseRangeSource extends OffsetBasedSource<Integer> {
-    private static final long serialVersionUID = 0L;
     private long granularity;
 
     public CoarseRangeSource(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/AggregatorPipelineExtractorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/AggregatorPipelineExtractorTest.java
index c7824b1e7ccb6..04c7edef944e0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/AggregatorPipelineExtractorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/AggregatorPipelineExtractorTest.java
@@ -209,7 +209,6 @@ public Object answer(InvocationOnMock invocation) throws Throwable {
     }
   }
 
-  @SuppressWarnings("serial")
   private static class AggregatorProvidingDoFn<InT, OuT> extends DoFn<InT, OuT> {
     public <InputT, OutT> Aggregator<InputT, OutT> addAggregator(
         CombineFn<InputT, ?, OutT> combiner) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index c7398f9e3b6b6..d1287af33719f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -92,7 +92,6 @@
  * Tests for DataflowPipelineRunner.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class DataflowPipelineRunnerTest {
 
   private static final String PROJECT_ID = "some-project";
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index b8f9baa43c34c..5c6ad9285bc48 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -81,7 +81,6 @@
  * Tests for DataflowPipelineTranslator.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class DataflowPipelineTranslatorTest {
 
   @Rule public ExpectedException thrown = ExpectedException.none();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java
index 29110fb9d6d06..904e4bbddc65f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java
@@ -43,8 +43,6 @@
 @RunWith(JUnit4.class)
 public class DirectPipelineRunnerTest implements Serializable {
 
-  private static final long serialVersionUID = 0L;
-
   @Rule
   public transient ExpectedException expectedException = ExpectedException.none();
 
@@ -57,8 +55,6 @@ public void testToString() {
   }
 
   private static class CrashingCoder<T> extends AtomicCoder<T> {
-    private static final long serialVersionUID = 0L;
-
     @Override
     public void encode(T value, OutputStream stream, Context context) throws CoderException {
       throw new CoderException("Called CrashingCoder.encode");
@@ -79,8 +75,6 @@ public void testCoderException() {
     pipeline
         .apply("CreateTestData", Create.of(42))
         .apply("CrashDuringCoding", ParDo.of(new DoFn<Integer, String>() {
-          private static final long serialVersionUID = 0L;
-
           @Override
           public void processElement(ProcessContext context) {
             context.output("hello");
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
index 621e524331192..cb34e5c117e04 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
@@ -46,7 +46,6 @@
  * Tests for {@link TransformTreeNode} and {@link TransformHierarchy}.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class TransformTreeTest {
 
   enum TransformsSeen {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index a197741c689a8..50d8f950a3d8f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -116,8 +116,6 @@ public static Read fromRange(int from, int to) {
     }
 
     static class Read extends BoundedSource<Integer> {
-      private static final long serialVersionUID = 0;
-
       final int from;
       final int to;
       final boolean produceTimestamps;
@@ -399,8 +397,6 @@ public void testProgressAndSourceSplitTranslation() throws Exception {
    * A source that cannot do anything. Intended to be overridden for testing of individual methods.
    */
   private static class MockSource extends BoundedSource<Integer> {
-    private static final long serialVersionUID = -5041539913488064889L;
-
     @Override
     public List<? extends BoundedSource<Integer>> splitIntoBundles(
         long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
@@ -437,8 +433,6 @@ public Coder<Integer> getDefaultOutputCoder() {
   }
 
   private static class SourceProducingInvalidSplits extends MockSource {
-    private static final long serialVersionUID = -1731497848893255523L;
-
     private String description;
     private String errorMessage;
 
@@ -530,8 +524,6 @@ public BoundedSource<Integer> splitAtFraction(double fraction) {
   }
 
   private static class SourceProducingFailingReader extends MockSource {
-    private static final long serialVersionUID = -1288303253742972653L;
-
     @Override
     public BoundedReader<Integer> createReader(PipelineOptions options) throws IOException {
       return new FailingReader(this);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
index 669cb9386be06..18eb340d08850 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
@@ -47,7 +47,6 @@
  */
 public class CountingSource
     extends UnboundedSource<KV<Integer, Integer>, CountingSource.CounterMark> {
-  private static final long serialVersionUID = 0L;
   private static List<Integer> finalizeTracker;
   private final int numMessagesPerShard;
   private final int shardNumber;
@@ -108,16 +107,12 @@ public Coder<CounterMark> getCheckpointMarkCoder() {
     return DelegateCoder.of(
         VarIntCoder.of(),
         new DelegateCoder.CodingFunction<CounterMark, Integer>() {
-          private static final long serialVersionUID = 0L;
-
           @Override
           public Integer apply(CounterMark input) {
             return input.current;
           }
         },
         new DelegateCoder.CodingFunction<Integer, CounterMark>() {
-          private static final long serialVersionUID = 0L;
-
           @Override
           public CounterMark apply(Integer input) {
             return new CounterMark(input);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
index c581959aadfb3..973c3555670cd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
@@ -61,7 +61,6 @@
  * Tests for CombineValuesFn.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class CombineValuesFnTest {
   /** Example AccumulatingCombineFn. */
   public static class MeanInts extends
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
index 335a62a723260..58d55ff2773bd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
@@ -51,9 +51,7 @@
 @RunWith(JUnit4.class)
 public class DataflowWorkerTest {
 
-  private class WorkerException extends Exception {
-    static final long serialVersionUID = 0L;
-  }
+  private class WorkerException extends Exception { }
 
   @Rule
   public FastNanoClockAndSleeper clockAndSleeper = new FastNanoClockAndSleeper();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index 54377838d2f30..bed0cc28f5fcc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -83,7 +83,6 @@
  * Tests for MapTaskExecutorFactory.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class MapTaskExecutorFactoryTest {
   private PipelineOptions options;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
index f5eb2d310c76d..9e2fc655e152c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
@@ -59,7 +59,6 @@
  * Tests for NormalParDoFn.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class NormalParDoFnTest {
   static class TestDoFn extends DoFn<Integer, String> {
     enum State { UNSTARTED, STARTED, PROCESSING, FINISHED }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
index 0311aa2113f4d..714bbbd0e63d8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
@@ -50,7 +50,7 @@
  * Tests for ParDoFnFactory.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings({"rawtypes", "serial", "unchecked"})
+@SuppressWarnings({"rawtypes", "unchecked"})
 public class ParDoFnFactoryTest {
   static class TestDoFn extends DoFn<Integer, String> {
     final String stringState;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 874e99540cbf9..4fba866e79fc3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -390,7 +390,6 @@ public void testBasicHarness() throws Exception {
   }
 
   static class BlockingFn extends DoFn<String, String> implements TestRule {
-    private static final long serialVersionUID = 0;
     public static CountDownLatch blocker = new CountDownLatch(1);
     public static Semaphore counter = new Semaphore(0);
     public static AtomicInteger callCounter = new AtomicInteger(0);
@@ -525,7 +524,6 @@ public void testNumberOfWorkerHarnessThreadsIsHonored() throws Exception {
   }
 
   static class KeyTokenInvalidFn extends DoFn<KV<String, String>, KV<String, String>> {
-    private static final long serialVersionUID = 0;
     static boolean thrown = false;
 
     @Override
@@ -566,8 +564,6 @@ public void testKeyTokenInvalidException() throws Exception {
   }
 
   static class ChangeKeysFn extends DoFn<KV<String, String>, KV<String, String>> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       KV<String, String> elem = c.element();
@@ -602,8 +598,6 @@ public void testKeyChange() throws Exception {
   }
 
   static class TestExceptionFn extends DoFn<String, String> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) throws Exception {
       try {
@@ -955,8 +949,6 @@ private static Windmill.Counter getCounter(List<Windmill.Counter> counters, Stri
   }
 
   static class PrintFn extends DoFn<ValueWithRecordId<KV<Integer, Integer>>, String> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(ProcessContext c) {
       KV<Integer, Integer> elem = c.element().getValue();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
index 7a8de6d570fd4..3f0b1b402318e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -314,8 +314,6 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
    * to ensure that we are using the CombineFn API properly.
    */
   private static class SumLongs extends CombineFn<Long, Long, Long> {
-    private static final long serialVersionUID = 0L;
-
     @Override
     public Long createAccumulator() {
       return 0L;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java
index 0b8655ed2f8f3..503474b12cb16 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java
@@ -280,8 +280,6 @@ public void testMultipleSideInputs() throws Exception {
   }
 
   private static class SideInputFn extends DoFn<String, String> {
-    private static final long serialVersionUID = 0;
-
     private List<PCollectionView<String>> views;
 
     public SideInputFn(List<PCollectionView<String>> views) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
index 64ad259c02770..84fd9ca5baffc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
@@ -97,8 +97,6 @@ public class TextReaderTest {
    * trying to decode partial lines, in the tests where this coder is used.
    */
   private static class WholeLineVerifyingCoder extends AtomicCoder<String> {
-    private static final long serialVersionUID = 0L;
-
     @Override
     public void encode(String value, OutputStream outStream, Context context)
         throws CoderException, IOException {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/CoderPropertiesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/CoderPropertiesTest.java
index 6c3a85018d7b2..30ec8137268b6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/CoderPropertiesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/CoderPropertiesTest.java
@@ -47,8 +47,6 @@ public void testGoodCoderIsDeterministic() throws Exception {
 
   /** A coder that says it is not deterministic but actually is. */
   private static class NonDeterministicCoder extends CustomCoder<String> {
-    private static final long serialVersionUID = 0;
-
     @Override
     public void encode(String value, OutputStream outStream, Context context)
         throws CoderException, IOException {
@@ -86,8 +84,6 @@ public void testPassingInNonEqualValuesWithDeterministicCoder() throws Exception
 
   /** A coder that is non-deterministic because it adds a string to the value. */
   private static class BadDeterminsticCoder extends CustomCoder<String> {
-    private static final long serialVersionUID = 0;
-
     public BadDeterminsticCoder() {
     }
 
@@ -125,8 +121,6 @@ public void testGoodCoderEncodesEqualValues() throws Exception {
 
   /** This coder changes state during encoding/decoding. */
   private static class StateChangingSerializingCoder extends CustomCoder<String> {
-    private static final long serialVersionUID = 0;
-
     private int changedState;
 
     public StateChangingSerializingCoder() {
@@ -160,8 +154,6 @@ public void testBadCoderThatDependsOnChangingState() throws Exception {
 
   /** This coder loses information critical to its operation. */
   private static class ForgetfulSerializingCoder extends CustomCoder<String> {
-    private static final long serialVersionUID = 0;
-
     private transient int lostState;
 
     public ForgetfulSerializingCoder(int lostState) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
index 1a47d2eac4ae7..e3d70f7c95d2c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
@@ -54,8 +54,6 @@
  */
 @RunWith(JUnit4.class)
 public class DataflowAssertTest implements Serializable {
-  private static final long serialVersionUID = 0;
-
   @Rule
   public transient ExpectedException thrown = ExpectedException.none();
 
@@ -73,8 +71,6 @@ public int hashCode() {
   }
 
   private static class NotSerializableObjectCoder extends AtomicCoder<NotSerializableObject> {
-    private static final long serialVersionUID = 0;
-
     private NotSerializableObjectCoder() { }
     private static final NotSerializableObjectCoder INSTANCE = new NotSerializableObjectCoder();
 
@@ -148,8 +144,6 @@ public void testSerializablePredicate() throws Exception {
 
     DataflowAssert.that(pcollection).satisfies(
         new SerializableFunction<Iterable<NotSerializableObject>, Void>() {
-          private static final long serialVersionUID = 0;
-
           @Override
           public Void apply(Iterable<NotSerializableObject> contents) {
             return null; // no problem!
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java
index 175af94a58c41..b46ceb4f5fadb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java
@@ -91,8 +91,6 @@ public static interface ViewFn<ElemT, ViewT>
    * A {@link ViewFn} that returns the provided contents as a fully lazy iterable.
    */
   public static class IdentityViewFn<T> implements ViewFn<T, Iterable<T>> {
-    private static final long serialVersionUID = 0L;
-
     @Override
     public Iterable<T> apply(Iterable<WindowedValue<T>> contents) {
       return Iterables.transform(contents, new Function<WindowedValue<T>, T>() {
@@ -111,8 +109,6 @@ public T apply(WindowedValue<T> windowedValue) {
    * provided than {@code Integer.MAX_VALUE} then behavior is unpredictable.
    */
   public static class LengthViewFn<T> implements ViewFn<T, Long> {
-    private static final long serialVersionUID = 0L;
-
     @Override
     public Long apply(Iterable<WindowedValue<T>> contents) {
       return (long) Iterables.size(contents);
@@ -123,8 +119,6 @@ public Long apply(Iterable<WindowedValue<T>> contents) {
    * A {@link ViewFn} that always returns the value with which it is instantiated.
    */
   public static class ConstantViewFn<ElemT, ViewT> implements ViewFn<ElemT, ViewT> {
-    private static final long serialVersionUID = 0L;
-
     private ViewT value;
 
     public ConstantViewFn(ViewT value) {
@@ -223,8 +217,6 @@ public static <T> Iterable<WindowedValue<T>> contentsInDefaultWindow(T... values
   private static class PCollectionViewFromParts<ElemT, ViewT>
       extends PValueBase
       implements PCollectionView<ViewT> {
-    private static final long serialVersionUID = 0L;
-
     private TupleTag<Iterable<WindowedValue<ElemT>>> tag;
     private ViewFn<ElemT, ViewT> viewFn;
     private WindowingStrategy<?, ?> windowingStrategy;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchersTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchersTest.java
index 3228e0b941de5..1ab94c5379a06 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchersTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchersTest.java
@@ -52,8 +52,6 @@
  */
 @RunWith(JUnit4.class)
 public class SerializableMatchersTest implements Serializable {
-  private static final long serialVersionUID = 0;
-
   @Rule
   public transient ExpectedException thrown = ExpectedException.none();
 
@@ -155,8 +153,6 @@ private static class NotSerializableClass {
   }
 
   private static class NotSerializableClassCoder extends AtomicCoder<NotSerializableClass> {
-    private static final long serialVersionUID = 0L;
-
     @Override
     public void encode(NotSerializableClass value, OutputStream outStream, Coder.Context context) {
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
index 4228d7f42d45c..e366e6948d880 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantilesTest.java
@@ -267,7 +267,6 @@ protected boolean matchesSafely(T item, Description mismatchDescription) {
 
   private static class DescendingIntComparator implements
       SerializableComparator<Integer> {
-    private static final long serialVersionUID = 0L;
     @Override
     public int compare(Integer o1, Integer o2) {
       return o2.compareTo(o1);
@@ -275,8 +274,6 @@ public int compare(Integer o1, Integer o2) {
   }
 
   private static class OrderByLength implements Comparator<String>, Serializable {
-    private static final long serialVersionUID = 0L;
-
     @Override
     public int compare(String a, String b) {
       if (a.length() != b.length()) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
index c52cc31248521..66c679b1bb3f8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
@@ -47,7 +47,6 @@
  * Tests for the ApproximateUnique aggregator transform.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class ApproximateUniqueTest implements Serializable {
   // implements Serializable just to make it easy to use anonymous inner DoFn subclasses
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index cb1b1a1dee44a..aaa429e14237e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -82,7 +82,6 @@
  * Tests for Combine transforms.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class CombineTest implements Serializable {
   // This test is Serializable, just so that it's easy to have
   // anonymous inner classes inside the non-static test methods.
@@ -231,7 +230,6 @@ public void testFixedWindowsCombine() {
   }
 
   private static class FormatPaneInfo extends DoFn<Integer, String> {
-    private static final long serialVersionUID = 0L;
     @Override
     public void processElement(ProcessContext c) {
       c.output(c.element() + ": " + c.pane().isLast());
@@ -395,7 +393,6 @@ public void testHotKeyCombining() {
   }
 
   private static class GetLast extends DoFn<Integer, Integer> {
-    private static final long serialVersionUID = 0L;
     @Override
     public void processElement(ProcessContext c) {
       if (c.pane().isLast()) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
index 5db0ee79533b0..1af072112aba3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CreateTest.java
@@ -52,7 +52,7 @@
  * Tests for Create.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings({"serial", "unchecked"})
+@SuppressWarnings("unchecked")
 public class CreateTest {
   @Rule public final ExpectedException thrown = ExpectedException.none();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflectorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflectorTest.java
index 26d93c4ffca19..2a770c25aaeaf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflectorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflectorTest.java
@@ -105,8 +105,6 @@ private void checkInvokeFinishBundleWorks(DoFnReflector r) throws Exception {
   public void testDoFnWithNoExtraContext() throws Exception {
     DoFnReflector reflector = underTest(new DoFnWithContext<String, String>() {
 
-      private static final long serialVersionUID = 0;
-
       @ProcessElement
       public void processElement(ProcessContext c)
           throws Exception {
@@ -131,8 +129,6 @@ private class IdentityUsingInterfaceWithProcessElement
       extends DoFnWithContext<String, String>
       implements LayersOfInterfaces {
 
-    private static final long serialVersionUID = 0;
-
     @Override
     public void processElement(DoFnWithContext<String, String>.ProcessContext c) {
       wasProcessElementInvoked = true;
@@ -148,8 +144,6 @@ public void testDoFnWithProcessElementInterface() throws Exception {
   }
 
   private class IdentityParent extends DoFnWithContext<String, String> {
-    private static final long serialVersionUID = 0;
-
     @ProcessElement
     public void process(ProcessContext c) {
       wasProcessElementInvoked = true;
@@ -157,9 +151,7 @@ public void process(ProcessContext c) {
     }
   }
 
-  private class IdentityChild extends IdentityParent {
-    private static final long serialVersionUID = 0;
-  }
+  private class IdentityChild extends IdentityParent {}
 
   @Test
   public void testDoFnWithMethodInSuperclass() throws Exception {
@@ -172,8 +164,6 @@ public void testDoFnWithMethodInSuperclass() throws Exception {
   public void testDoFnWithWindow() throws Exception {
     DoFnReflector reflector = underTest(new DoFnWithContext<String, String>() {
 
-      private static final long serialVersionUID = 0;
-
       @ProcessElement
       public void processElement(ProcessContext c, BoundedWindow w)
           throws Exception {
@@ -192,8 +182,6 @@ public void processElement(ProcessContext c, BoundedWindow w)
   public void testDoFnWithWindowingInternals() throws Exception {
     DoFnReflector reflector = underTest(new DoFnWithContext<String, String>() {
 
-      private static final long serialVersionUID = 0;
-
       @ProcessElement
       public void processElement(ProcessContext c, WindowingInternals<String, String> w)
           throws Exception {
@@ -211,8 +199,6 @@ public void processElement(ProcessContext c, WindowingInternals<String, String>
   @Test
   public void testDoFnWithStartBundle() throws Exception {
     DoFnReflector reflector = underTest(new DoFnWithContext<String, String>() {
-      private static final long serialVersionUID = 0;
-
       @ProcessElement
       public void processElement(@SuppressWarnings("unused") ProcessContext c) {}
 
@@ -238,9 +224,7 @@ public void testNoProcessElement() throws Exception {
     thrown.expect(IllegalStateException.class);
     thrown.expectMessage("No method annotated with @ProcessElement found");
     thrown.expectMessage(getClass().getName() + "$");
-    underTest(new DoFnWithContext<String, String>() {
-      private static final long serialVersionUID = 0;
-    });
+    underTest(new DoFnWithContext<String, String>() {});
   }
 
   @Test
@@ -251,8 +235,6 @@ public void testMultipleProcessElement() throws Exception {
     thrown.expectMessage("bar()");
     thrown.expectMessage(getClass().getName() + "$");
     underTest(new DoFnWithContext<String, String>() {
-      private static final long serialVersionUID = 0;
-
       @ProcessElement
       public void foo() {}
 
@@ -269,8 +251,6 @@ public void testMultipleStartBundleElement() throws Exception {
     thrown.expectMessage("baz()");
     thrown.expectMessage(getClass().getName() + "$");
     underTest(new DoFnWithContext<String, String>() {
-      private static final long serialVersionUID = 0;
-
       @ProcessElement
       public void foo() {}
 
@@ -290,8 +270,6 @@ public void testMultipleFinishBundleElement() throws Exception {
     thrown.expectMessage("baz()");
     thrown.expectMessage(getClass().getName() + "$");
     underTest(new DoFnWithContext<String, String>() {
-      private static final long serialVersionUID = 0;
-
       @ProcessElement
       public void foo() {}
 
@@ -309,8 +287,6 @@ public void testPrivateProcessElement() throws Exception {
     thrown.expectMessage("process() must be public");
     thrown.expectMessage(getClass().getName() + "$");
     underTest(new DoFnWithContext<String, String>() {
-      private static final long serialVersionUID = 0;
-
       @ProcessElement
       private void process() {}
     });
@@ -322,8 +298,6 @@ public void testPrivateStartBundle() throws Exception {
     thrown.expectMessage("startBundle() must be public");
     thrown.expectMessage(getClass().getName() + "$");
     underTest(new DoFnWithContext<String, String>() {
-      private static final long serialVersionUID = 0;
-
       @ProcessElement
       public void processElement() {}
 
@@ -338,8 +312,6 @@ public void testPrivateFinishBundle() throws Exception {
     thrown.expectMessage("finishBundle() must be public");
     thrown.expectMessage(getClass().getName() + "$");
     underTest(new DoFnWithContext<String, String>() {
-      private static final long serialVersionUID = 0;
-
       @ProcessElement
       public void processElement() {}
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContextTest.java
index a48996687380c..b580139fd7aca 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContextTest.java
@@ -39,8 +39,6 @@ public class DoFnWithContextTest {
 
   private class NoOpDoFnWithContext extends DoFnWithContext<Void, Void> {
 
-    private static final long serialVersionUID = 0;
-
     /**
      * @param c context
      */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
index fb3951d48e0dc..8b465a1241873 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
@@ -32,7 +32,6 @@
  * Tests for {@link Filter}.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class FilterTest implements Serializable {
 
   static class TrivialFn implements SerializableFunction<Integer, Boolean> {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
index dc43a4330ebe3..b85ec021e8102 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
@@ -55,7 +55,6 @@
  * Tests for Flatten.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class FlattenTest implements Serializable {
 
   @Rule
@@ -123,8 +122,6 @@ public void testEmptyFlattenAsSideInput() {
     PCollection<String> output = p
         .apply(Create.of((Void) null).withCoder(VoidCoder.of()))
         .apply(ParDo.withSideInputs(view).of(new DoFn<Void, String>() {
-                  private static final long serialVersionUID = 0;
-
                   @Override
                   public void processElement(ProcessContext c) {
                     for (String side : c.sideInput(view)) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
index 6c80ee9e59ecc..b48051bd3d4b0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
@@ -61,7 +61,7 @@
  * Tests for GroupByKey.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings({"rawtypes", "serial", "unchecked"})
+@SuppressWarnings({"rawtypes", "unchecked"})
 public class GroupByKeyTest {
 
   @Rule
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java
index 55d64f88beeae..afefcdae15d35 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java
@@ -39,7 +39,6 @@
  * Tests for RateLimiter.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class IntraBundleParallelizationTest {
   private static final int PARALLELISM_FACTOR = 16;
   private static final AtomicInteger numSuccesses = new AtomicInteger();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/NoOpDoFn.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/NoOpDoFn.java
index f5b37f73508ea..20646cfb8e385 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/NoOpDoFn.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/NoOpDoFn.java
@@ -34,8 +34,6 @@
  * @param <OutputT> unused.
  */
 class NoOpDoFn<InputT, OutputT> extends DoFn<InputT, OutputT> {
-  private static final long serialVersionUID = 0L;
-
   @Override
   public void processElement(DoFn<InputT, OutputT>.ProcessContext c) throws Exception {
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index e143f320eacd1..64ad77202ffa5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -76,7 +76,6 @@
  * Tests for ParDo.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class ParDoTest implements Serializable {
   // This test is Serializable, just so that it's easy to have
   // anonymous inner classes inside the non-static test methods.
@@ -1101,7 +1100,6 @@ public void testParDoShiftTimestampInvalidZeroAllowed() {
   }
 
   private static class Checker implements SerializableFunction<Iterable<String>, Void> {
-    private static final long serialVersionUID = 0L;
     @Override
     public Void apply(Iterable<String> input) {
       boolean foundStart = false;
@@ -1203,7 +1201,6 @@ public void testMutatingOutputThenOutputDoFnError() throws Exception {
     pipeline
         .apply(Create.of(42))
         .apply(ParDo.of(new DoFn<Integer, List<Integer>>() {
-          private static final long serialVersionUID = 0L;
           @Override public void processElement(ProcessContext c) {
             List<Integer> outputList = Arrays.asList(1, 2, 3, 4);
             c.output(outputList);
@@ -1230,7 +1227,6 @@ public void testMutatingOutputThenTerminateDoFnError() throws Exception {
     pipeline
         .apply(Create.of(42))
         .apply(ParDo.of(new DoFn<Integer, List<Integer>>() {
-          private static final long serialVersionUID = 0L;
           @Override public void processElement(ProcessContext c) {
             List<Integer> outputList = Arrays.asList(1, 2, 3, 4);
             c.output(outputList);
@@ -1255,7 +1251,6 @@ public void testMutatingOutputCoderDoFnError() throws Exception {
     pipeline
         .apply(Create.of(42))
         .apply(ParDo.of(new DoFn<Integer, byte[]>() {
-          private static final long serialVersionUID = 0L;
           @Override public void processElement(ProcessContext c) {
             byte[] outputArray = new byte[]{0x1, 0x2, 0x3};
             c.output(outputArray);
@@ -1283,7 +1278,6 @@ public void testMutatingInputDoFnError() throws Exception {
         .apply(Create.of(Arrays.asList(1, 2, 3), Arrays.asList(4, 5, 6))
             .withCoder(ListCoder.of(VarIntCoder.of())))
         .apply(ParDo.of(new DoFn<List<Integer>, Integer>() {
-          private static final long serialVersionUID = 0L;
           @Override public void processElement(ProcessContext c) {
             List<Integer> inputList = c.element();
             inputList.set(0, 37);
@@ -1308,7 +1302,6 @@ public void testMutatingInputCoderDoFnError() throws Exception {
     pipeline
         .apply(Create.of(new byte[]{0x1, 0x2, 0x3}, new byte[]{0x4, 0x5, 0x6}))
         .apply(ParDo.of(new DoFn<byte[], Integer>() {
-          private static final long serialVersionUID = 0L;
           @Override public void processElement(ProcessContext c) {
             byte[] inputArray = c.element();
             inputArray[0] = 0xa;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
index a8e46ab391ead..36f5d8d872b9a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
@@ -41,7 +41,6 @@
  * Tests for {@link Partition}.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class PartitionTest implements Serializable {
   @Rule public ExpectedException expectedException = ExpectedException.none();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java
index 2f536514b3750..a6fe7e82a14fa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesTest.java
@@ -87,7 +87,6 @@ public void testRemoveDuplicatesEmpty() {
   }
 
   private static class Keys implements SerializableFunction<KV<String, String>, String> {
-    private static final long serialVersionUID = 0L;
     @Override
     public String apply(KV<String, String> input) {
       return input.getKey();
@@ -95,8 +94,6 @@ public String apply(KV<String, String> input) {
   }
 
   private static class Checker implements SerializableFunction<Iterable<KV<String, String>>, Void> {
-    private static final long serialVersionUID = 0L;
-
     @Override
     public Void apply(Iterable<KV<String, String>> input) {
       Map<String, String> values = new HashMap<>();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
index 82795f11c6ba9..d6467ad40d66e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
@@ -47,7 +47,6 @@
  * Tests for Sample transform.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class SampleTest {
   static final Integer[] EMPTY = new Integer[] { };
   static final Integer[] DATA = new Integer[] {1, 2, 3, 4, 5};
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
index eb1d80a2a5d61..8b61ba1b93216 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
@@ -230,8 +230,6 @@ public void testTopGetNames() {
   }
 
   private static class OrderByLength implements Comparator<String>, Serializable {
-    private static final long serialVersionUID = 0L;
-
     @Override
     public int compare(String a, String b) {
       if (a.length() != b.length()) {
@@ -243,8 +241,6 @@ public int compare(String a, String b) {
   }
 
   private static class IntegerComparator implements Comparator<Integer>, Serializable {
-    private static final long serialVersionUID = 0L;
-
     @Override
     public int compare(Integer o1, Integer o2) {
       return o1.compareTo(o2);
@@ -252,8 +248,6 @@ public int compare(Integer o1, Integer o2) {
   }
 
   private static class IntegerComparator2 implements Comparator<Integer>, Serializable {
-    private static final long serialVersionUID = 0L;
-
     @Override
     public int compare(Integer o1, Integer o2) {
       return o1.compareTo(o2);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index 4cc991957db25..36af4fae6a784 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -66,7 +66,6 @@
  * observed via {@link ParDo}.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class ViewTest implements Serializable {
   // This test is Serializable, just so that it's easy to have
   // anonymous inner classes inside the non-static test methods.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
index af0015e21eac2..d6d0fe865d0ed 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
@@ -36,7 +36,6 @@
  * Tests for ExtractKeys transform.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class WithKeysTest {
   static final String[] COLLECTION = new String[] {
     "a",
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java
index adae6f3689e33..6a53f6cb0e16a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java
@@ -127,7 +127,6 @@ public void runWrite(List<String> inputs, boolean windowed) {
   // TestWriter each verify that the sequence of method calls is consistent with the specification
   // of the Write PTransform.
   private static class TestSink extends Sink<String> {
-    private static final long serialVersionUID = 0;
     private boolean createCalled = false;
     private boolean validateCalled = false;
 
@@ -179,8 +178,6 @@ public String toString() {
   }
 
   private static class TestSinkWriteOperation extends WriteOperation<String, TestWriterResult> {
-    private static final long serialVersionUID = 0;
-
     private enum State {
       INITIAL,
       INITIALIZED,
@@ -274,7 +271,6 @@ public int hashCode() {
   }
 
   private static class TestWriterResult implements Serializable {
-    private static final long serialVersionUID = 0;
     String uId;
     List<String> elementsWritten;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
index 6efc41f88552e..1345289739c4d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKeyTest.java
@@ -57,7 +57,6 @@
  * Tests for CoGroupByKeyTest.  Implements Serializable for anonymous DoFns.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class CoGroupByKeyTest implements Serializable {
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
index 574ad716cd5db..4d4b4dc2049bf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
@@ -48,8 +48,6 @@ public void testAlignTo() {
 
   private static class TestTimeTrigger extends TimeTrigger<IntervalWindow> {
 
-    private static final long serialVersionUID = 0L;
-
     private TestTimeTrigger() {
       this(Collections.<SerializableFunction<Instant, Instant>>emptyList());
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
index a2e1299d69433..c2161900814a1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
@@ -57,7 +57,7 @@
 
 /** Unit tests for bucketing. */
 @RunWith(JUnit4.class)
-@SuppressWarnings({"serial", "unchecked"})
+@SuppressWarnings("unchecked")
 public class WindowingTest implements Serializable {
   @Rule
   public transient TemporaryFolder tmpFolder = new TemporaryFolder();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
index d64d723e8a356..ef3f80c6aab96 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
@@ -46,7 +46,6 @@
  * Tests for CoderUtils.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class CoderUtilsTest {
 
   @Rule
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java
index b13634f00086b..47ec579d0419f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java
@@ -55,7 +55,6 @@
  * Unit tests for the {@link Aggregator} API.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class CounterAggregatorTest {
   @Rule
   public final ExpectedException expectedEx = ExpectedException.none();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
index 742b801e14c28..cafd14a3032d5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
@@ -93,8 +93,6 @@ protected StubTrigger(Trigger<IntervalWindow>... subTriggers) {
       super(Arrays.asList(subTriggers));
     }
 
-    private static final long serialVersionUID = 0L;
-
     @Override
     public TriggerResult onElement(
         OnElementContext c) throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PTupleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PTupleTest.java
index 05a8546930677..51b9938e1ea61 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PTupleTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PTupleTest.java
@@ -32,7 +32,6 @@
 public final class PTupleTest {
   @Test
   public void accessingNullVoidValuesShouldNotCauseExceptions() {
-    @SuppressWarnings("serial")
     TupleTag<Void> tag = new TupleTag<Void>() {};
     PTuple tuple = PTuple.of(tag, null);
     assertTrue(tuple.has(tag));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
index 6eb587bbe0572..5d52c31134e38 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SerializableUtilsTest.java
@@ -45,8 +45,6 @@ public class SerializableUtilsTest {
 
   /** A class that is serializable by Java. */
   private static class SerializableByJava implements Serializable {
-    private static final long serialVersionUID = 0;
-
     final String stringValue;
     final int intValue;
 
@@ -79,8 +77,6 @@ public void testDeserializationError() {
 
   /** A class that is not serializable by Java. */
   private static class UnserializableByJava implements Serializable {
-    private static final long serialVersionUID = 0;
-
     @SuppressWarnings("unused")
     private Object unserializableField = new Object();
   }
@@ -94,8 +90,6 @@ public void testSerializationError() {
 
   /** A {@link Coder} that is not serializable by Java. */
   private static class UnserializableCoderByJava extends DeterministicStandardCoder<Object> {
-    private static final long serialVersionUID = 0;
-
     private final Object unserializableField = new Object();
 
     @Override
@@ -124,8 +118,6 @@ public void testEnsureSerializableWithUnserializableCoderByJava() {
 
   /** A {@link Coder} that is not serializable by Jackson. */
   private static class UnserializableCoderByJackson extends DeterministicStandardCoder<Object> {
-    private static final long serialVersionUID = 0;
-
     private final SerializableByJava unserializableField;
 
     public UnserializableCoderByJackson(SerializableByJava unserializableField) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java
index bf55cabce9723..49d8688ace724 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StringUtilsTest.java
@@ -60,7 +60,7 @@ public void testTranscodeMixedByteArray() {
    */
   private class EmbeddedDoFn {
 
-private class DeeperEmbeddedDoFn extends EmbeddedDoFn {}
+    private class DeeperEmbeddedDoFn extends EmbeddedDoFn {}
 
     private EmbeddedDoFn getEmbedded() {
       return new DeeperEmbeddedDoFn();
@@ -68,11 +68,7 @@ private EmbeddedDoFn getEmbedded() {
   }
 
   private class EmbeddedPTransform extends PTransform<PBegin, PDone> {
-    private static final long serialVersionUID = 0;
-
-    private class Bound extends PTransform<PBegin, PDone> {
-      private static final long serialVersionUID = 0;
-    }
+    private class Bound extends PTransform<PBegin, PDone> {}
 
     private Bound getBound() {
       return new Bound();
@@ -121,9 +117,7 @@ public void testPTransformName() {
   @Test
   public void testPTransformNameWithAnonOuterClass() throws Exception {
     AnonymousClass anonymousClassObj = new AnonymousClass() {
-      class NamedInnerClass extends PTransform<PBegin, PDone> {
-        private static final long serialVersionUID = 0;
-      }
+      class NamedInnerClass extends PTransform<PBegin, PDone> {}
 
       @Override
       public Object getInnerClassInstance() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java
index 8feab98a2f5de..f7d678ba8e185 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpersTest.java
@@ -30,7 +30,6 @@
  * Tests for {@link ReflectHelpers}.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class ReflectHelpersTest {
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionTupleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionTupleTest.java
index a04990734c1fc..1017556e777e6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionTupleTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PCollectionTupleTest.java
@@ -41,7 +41,6 @@
 
 /** Unit tests for {@link PCollectionTuple}. */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public final class PCollectionTupleTest implements Serializable {
   @Test
   public void testOfThenHas() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java
index 9534f70de4a00..4c273672cb0bb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/PDoneTest.java
@@ -39,7 +39,6 @@
  * Tests for PDone.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class PDoneTest {
   @Rule
   public TemporaryFolder tmpFolder = new TemporaryFolder();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypeDescriptorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypeDescriptorTest.java
index 49fbe5937a372..5d4591beea951 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypeDescriptorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypeDescriptorTest.java
@@ -35,7 +35,6 @@
  * Tests for TypeDescriptor.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial")
 public class TypeDescriptorTest {
 
   @Rule
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypedPValueTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypedPValueTest.java
index be451e59dc27a..169edaaad35d9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypedPValueTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypedPValueTest.java
@@ -39,7 +39,6 @@
  * Tests for {@link TypedPValue}, primarily focusing on Coder inference.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings("serial") // suppressed because it wants them inside the anonymous TupleTags
 public class TypedPValueTest {
   @Rule
   public ExpectedException thrown = ExpectedException.none();

From 5c3343bfdfa8639548af3ffb19aa883f44d90f51 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 11 Sep 2015 09:10:00 -0700
Subject: [PATCH 1004/1541] Add tests for the default implementation of
 isCompatible

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=102849222
---
 .../sdk/transforms/windowing/TriggerTest.java | 83 +++++++++++++++++++
 1 file changed, 83 insertions(+)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
index 8e29a4076b68b..84ecce58aa0dc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
@@ -16,11 +16,17 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
 
+import org.joda.time.Instant;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.util.Arrays;
+import java.util.List;
+
 /**
  * Tests for {@link Trigger}.
  */
@@ -33,4 +39,81 @@ public void testTriggerToString() throws Exception {
     assertEquals("Repeatedly(AfterWatermark.pastEndOfWindow())",
         Repeatedly.forever(AfterWatermark.pastEndOfWindow()).toString());
   }
+
+  @Test
+  public void testIsCompatible() throws Exception {
+    assertTrue(new Trigger1(null).isCompatible(new Trigger1(null)));
+    assertTrue(new Trigger1(Arrays.<Trigger<IntervalWindow>>asList(new Trigger2(null)))
+        .isCompatible(new Trigger1(Arrays.<Trigger<IntervalWindow>>asList(new Trigger2(null)))));
+
+    assertFalse(new Trigger1(null).isCompatible(new Trigger2(null)));
+    assertFalse(new Trigger1(Arrays.<Trigger<IntervalWindow>>asList(new Trigger1(null)))
+        .isCompatible(new Trigger1(Arrays.<Trigger<IntervalWindow>>asList(new Trigger2(null)))));
+  }
+
+  private static class Trigger1 extends Trigger<IntervalWindow> {
+
+    private Trigger1(List<Trigger<IntervalWindow>> subTriggers) {
+      super(subTriggers);
+    }
+
+    @Override
+    public Trigger.TriggerResult onElement(Trigger<IntervalWindow>.OnElementContext c) {
+      return null;
+    }
+
+    @Override
+    public Trigger.MergeResult onMerge(Trigger<IntervalWindow>.OnMergeContext c) {
+      return null;
+    }
+
+    @Override
+    public Trigger.TriggerResult onTimer(Trigger<IntervalWindow>.OnTimerContext c) {
+      return null;
+    }
+
+    @Override
+    protected Trigger<IntervalWindow> getContinuationTrigger(
+        List<Trigger<IntervalWindow>> continuationTriggers) {
+      return null;
+    }
+
+    @Override
+    public Instant getWatermarkThatGuaranteesFiring(IntervalWindow window) {
+      return null;
+    }
+  }
+
+  private static class Trigger2 extends Trigger<IntervalWindow> {
+
+    private Trigger2(List<Trigger<IntervalWindow>> subTriggers) {
+      super(subTriggers);
+    }
+
+    @Override
+    public Trigger.TriggerResult onElement(Trigger<IntervalWindow>.OnElementContext c) {
+      return null;
+    }
+
+    @Override
+    public Trigger.MergeResult onMerge(Trigger<IntervalWindow>.OnMergeContext c) {
+      return null;
+    }
+
+    @Override
+    public Trigger.TriggerResult onTimer(Trigger<IntervalWindow>.OnTimerContext c) {
+      return null;
+    }
+
+    @Override
+    protected Trigger<IntervalWindow> getContinuationTrigger(
+        List<Trigger<IntervalWindow>> continuationTriggers) {
+      return null;
+    }
+
+    @Override
+    public Instant getWatermarkThatGuaranteesFiring(IntervalWindow window) {
+      return null;
+    }
+  }
 }

From ed4571d9b3bb5b5ef460cdefcc2eed69c5b06846 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 11 Sep 2015 10:02:57 -0700
Subject: [PATCH 1005/1541] Remove unused import

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=102853158
---
 .../sdk/runners/worker/StreamingSideInputDoFnRunner.java         | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
index 508fbaf6972a0..4e4874bf8b878 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
@@ -28,7 +28,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner.OutputManager;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
 import com.google.cloud.dataflow.sdk.util.UserCodeException;

From a2af8e46a8d160341f0fc91d098c20ac2a0ddcdc Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 11 Sep 2015 10:35:25 -0700
Subject: [PATCH 1006/1541] Make SerializableMatcher package-private

Since SerializableMatcher extends hamcrest Matcher,
it could cause diamond dependency trouble. It will
be package-private until we have decided upon an
approach to providing the functionality without such
risk.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=102856249
---
 .../com/google/cloud/dataflow/sdk/testing/DataflowAssert.java | 4 ++--
 .../cloud/dataflow/sdk/testing/SerializableMatcher.java       | 2 +-
 .../cloud/dataflow/sdk/testing/SerializableMatchers.java      | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 499c97ca0cf17..1c476a803c32a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -269,7 +269,7 @@ public IterableAssert<T> satisfies(
      *
      * <p>Returns this {@code IterableAssert}.
      */
-    public IterableAssert<T> satisfies(final SerializableMatcher<Iterable<? extends T>> matcher) {
+    IterableAssert<T> satisfies(final SerializableMatcher<Iterable<? extends T>> matcher) {
       // Safe covariant cast. Could be elided by changing a lot of this file to use
       // more flexible bounds.
       @SuppressWarnings({"rawtypes", "unchecked"})
@@ -337,7 +337,7 @@ public final IterableAssert<T> containsInAnyOrder(T... expectedElements) {
      * <p> Returns this {@code IterableAssert}.
      */
     @SafeVarargs
-    public final IterableAssert<T> containsInAnyOrder(
+    final IterableAssert<T> containsInAnyOrder(
         SerializableMatcher<? super T>... elementMatchers) {
       return satisfies(SerializableMatchers.containsInAnyOrder(elementMatchers));
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatcher.java
index f633d273ae0d6..10f221e347e33 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatcher.java
@@ -31,6 +31,6 @@
  *
  * @param <T> The type of value matched.
  */
-public interface SerializableMatcher<T> extends Matcher<T>, Serializable {
+interface SerializableMatcher<T> extends Matcher<T>, Serializable {
 }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
index d7367266271a9..2dcc34bbae3dc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
@@ -56,7 +56,7 @@
  * is undefined, use a matcher like
  * {@code kv(equalTo("some key"), containsInAnyOrder(1, 2, 3))}.
  */
-public class SerializableMatchers implements Serializable {
+class SerializableMatchers implements Serializable {
 
   // Serializable only because of capture by anonymous inner classes
   private SerializableMatchers() { } // not instantiable

From 00dfca328a4833f9b81f2535582eeeaf0b33feb7 Mon Sep 17 00:00:00 2001
From: cushon <cushon@google.com>
Date: Fri, 11 Sep 2015 11:54:26 -0700
Subject: [PATCH 1007/1541] Work around a type inference change in javac

The javac compiler's behavior when handling wildcards and "capture" type
variables has been improved for conformance to the language specification. This
improves type checking behavior in certain unusual circumstances. It is also a
source-incompatible change: certain uses of wildcards that have compiled in the
past may fail to compile because of a program's reliance on the javac bug.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=102863946
---
 .../google/cloud/dataflow/sdk/testing/DataflowAssert.java | 2 +-
 .../cloud/dataflow/sdk/testing/SerializableMatchers.java  | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 1c476a803c32a..a28219273240c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -339,7 +339,7 @@ public final IterableAssert<T> containsInAnyOrder(T... expectedElements) {
     @SafeVarargs
     final IterableAssert<T> containsInAnyOrder(
         SerializableMatcher<? super T>... elementMatchers) {
-      return satisfies(SerializableMatchers.containsInAnyOrder(elementMatchers));
+      return satisfies(SerializableMatchers.<T>containsInAnyOrder(elementMatchers));
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
index 2dcc34bbae3dc..252bd2fdec321 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
@@ -179,7 +179,7 @@ public Matcher<T[]> get() {
     return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
       @Override
       public Matcher<T[]> get() {
-        return Matchers.arrayContaining(matchers);
+        return Matchers.<T>arrayContaining(matchers);
       }
     });
   }
@@ -249,7 +249,7 @@ public static <T> SerializableMatcher<T[]> arrayContainingInAnyOrder(
     return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
       @Override
       public Matcher<T[]> get() {
-        return Matchers.arrayContainingInAnyOrder(matchers);
+        return Matchers.<T>arrayContainingInAnyOrder(matchers);
       }
     });
   }
@@ -359,7 +359,7 @@ public static <T> SerializableMatcher<Iterable<? extends T>> contains(
     return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
       @Override
       public Matcher<Iterable<? extends T>> get() {
-        return Matchers.contains(matchers);
+        return Matchers.<T>contains(matchers);
       }
     });
   }
@@ -429,7 +429,7 @@ public static <T> SerializableMatcher<Iterable<? extends T>> containsInAnyOrder(
     return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
       @Override
       public Matcher<Iterable<? extends T>> get() {
-        return Matchers.containsInAnyOrder(matchers);
+        return Matchers.<T>containsInAnyOrder(matchers);
       }
     });
   }

From b94d17675edb70e30a4d1bebc90b76a1782fb9dd Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 11 Sep 2015 12:15:45 -0700
Subject: [PATCH 1008/1541] Flush invalid entries from UnboundedReader cache

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=102865765
---
 .../worker/StreamingDataflowWorker.java       | 18 +++++-
 .../worker/StreamingModeExecutionContext.java | 18 ++++--
 .../BasicSerializableSourceFormatTest.java    |  6 +-
 .../StreamingModeExecutionContextTest.java    | 55 +++++++++++++++++++
 4 files changed, 88 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index fd79bc2c988fb..6098c83e56ef5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -166,6 +166,19 @@ public static void main(String[] args) throws Exception {
     worker.runStatusServer(statusPort);
   }
 
+  /**
+   * Entry in a per-key {@link UnboundedSource.UnboundedReader} cache.
+   */
+  public static class ReaderCacheEntry {
+    UnboundedSource.UnboundedReader<?> reader;
+    long token;
+
+    public ReaderCacheEntry(UnboundedSource.UnboundedReader<?> reader, long token) {
+      this.reader = reader;
+      this.token = token;
+    }
+  }
+
   // Maps from computation ids to per-computation state.
   private final ConcurrentMap<String, MapTask> instructionMap;
   private final ConcurrentMap<String, ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest>>
@@ -173,8 +186,7 @@ public static void main(String[] args) throws Exception {
   private final ConcurrentMap<String, ConcurrentLinkedQueue<WorkerAndContext>> mapTaskExecutors;
   private final ConcurrentMap<String, ActiveWorkForComputation> activeWorkMap;
   // Per computation cache of active readers, keyed by split ID.
-  private final ConcurrentMap<String, ConcurrentMap<ByteString, UnboundedSource.UnboundedReader<?>>>
-      readerCache;
+  private final ConcurrentMap<String, ConcurrentMap<ByteString, ReaderCacheEntry>> readerCache;
 
   // Map of tokens to commit callbacks.
   private ConcurrentMap<Long, Runnable> commitCallbacks;
@@ -338,7 +350,7 @@ private void addComputation(MapTask mapTask) {
       mapTaskExecutors.put(computationId, new ConcurrentLinkedQueue<WorkerAndContext>());
       activeWorkMap.put(computationId, new ActiveWorkForComputation(workUnitExecutor));
       readerCache.put(
-          computationId, new ConcurrentHashMap<ByteString, UnboundedSource.UnboundedReader<?>>());
+          computationId, new ConcurrentHashMap<ByteString, ReaderCacheEntry>());
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
index d28714f7ae27b..55fe351b0bba8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
 import com.google.cloud.dataflow.sdk.runners.worker.StateFetcher.SideInputState;
+import com.google.cloud.dataflow.sdk.runners.worker.StreamingDataflowWorker.ReaderCacheEntry;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.BaseExecutionContext;
@@ -57,7 +58,7 @@ public class StreamingModeExecutionContext extends DataflowExecutionContext {
   private final String stageName;
   private final Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
   // Per-key cache of active Reader objects in use by this process.
-  private final ConcurrentMap<ByteString, UnboundedSource.UnboundedReader<?>> readerCache;
+  private final ConcurrentMap<ByteString, ReaderCacheEntry> readerCache;
   private final ConcurrentMap<String, String> stateNameMap;
 
   private Windmill.WorkItem work;
@@ -69,7 +70,7 @@ public class StreamingModeExecutionContext extends DataflowExecutionContext {
 
   public StreamingModeExecutionContext(
       String stageName,
-      ConcurrentMap<ByteString, UnboundedSource.UnboundedReader<?>> readerCache,
+      ConcurrentMap<ByteString, ReaderCacheEntry> readerCache,
       ConcurrentMap<String, String> stateNameMap) {
     this.stageName = stageName;
     this.sideInputCache = new HashMap<>();
@@ -172,7 +173,15 @@ public Windmill.WorkItemCommitRequest.Builder getOutputBuilder() {
   }
 
   public UnboundedSource.UnboundedReader<?> getCachedReader() {
-    return readerCache.get(getSerializedKey());
+    ReaderCacheEntry entry = readerCache.get(getSerializedKey());
+    if (entry == null) {
+      return null;
+    } else if (entry.token != getWork().getCacheToken()) {
+      readerCache.remove(getSerializedKey());
+      return null;
+    } else {
+      return entry.reader;
+    }
   }
 
   public void setActiveReader(UnboundedSource.UnboundedReader<?> reader) {
@@ -250,7 +259,8 @@ public void run() {
             .build());
       }
 
-      readerCache.put(getSerializedKey(), activeReader);
+      readerCache.put(
+          getSerializedKey(), new ReaderCacheEntry(activeReader, getWork().getCacheToken()));
     }
     return callbacks;
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 50d8f950a3d8f..71c5aa5391544 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -57,12 +57,12 @@
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.io.BoundedSource;
 import com.google.cloud.dataflow.sdk.io.Read;
-import com.google.cloud.dataflow.sdk.io.UnboundedSource;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
 import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory;
+import com.google.cloud.dataflow.sdk.runners.worker.StreamingDataflowWorker.ReaderCacheEntry;
 import com.google.cloud.dataflow.sdk.runners.worker.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
@@ -618,7 +618,7 @@ public void testUnboundedSplits() throws Exception {
   @Test
   public void testReadUnboundedReader() throws Exception {
     StreamingModeExecutionContext context = new StreamingModeExecutionContext(
-        "stageName", new ConcurrentHashMap<ByteString, UnboundedSource.UnboundedReader<?>>(), null);
+        "stageName", new ConcurrentHashMap<ByteString, ReaderCacheEntry>(), null);
 
     DataflowPipelineOptions options =
         PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
@@ -679,6 +679,8 @@ public void testReadUnboundedReader() throws Exception {
               .getFinalizeIdsList()
               .size());
 
+      assertNotNull(context.getCachedReader());
+
       Windmill.Counter backlog = getCounter(context, "dataflow_backlog_size-stageName");
       assertEquals(7L, backlog.getIntScalar());
       assertTrue(backlog.getCumulative());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
index 4d466f0b1fc29..2c4e11fb84bee 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
@@ -21,6 +21,11 @@
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.io.UnboundedSource;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.dataflow.CountingSource;
+import com.google.cloud.dataflow.sdk.runners.worker.StreamingDataflowWorker.ReaderCacheEntry;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting;
 import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting.ConstantViewFn;
@@ -34,6 +39,7 @@
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaceForTest;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.protobuf.ByteString;
 
 import org.joda.time.Instant;
 import org.junit.Before;
@@ -123,4 +129,53 @@ public void testSideInputReaderReconstituted() {
     assertTrue(sideInputReader.contains(view2));
     assertFalse(sideInputReader.contains(view3));
   }
+
+  private void startContext(
+      StreamingModeExecutionContext context, String key, long cacheToken) {
+    context.start(
+        Windmill.WorkItem.newBuilder()
+            .setKey(ByteString.copyFromUtf8(key)) // key is zero-padded index.
+            .setWorkToken(0) // Required proto field, unused.
+            .setCacheToken(cacheToken)
+            .setSourceState(Windmill.SourceState.newBuilder()
+                                .setState(ByteString.EMPTY)
+                                .build()) // Source state.
+            .build(),
+        new Instant(0),
+        null, // StateReader
+        null, // StateFetcher
+        Windmill.WorkItemCommitRequest.newBuilder());
+  }
+
+  @Test
+  public void testReaderCache() throws Exception {
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    options.setNumWorkers(5);
+
+    ConcurrentHashMap<ByteString, ReaderCacheEntry> readerCache =
+        new ConcurrentHashMap<ByteString, ReaderCacheEntry>();
+    StreamingModeExecutionContext context =
+        new StreamingModeExecutionContext("stageName", readerCache, null);
+
+    UnboundedSource.UnboundedReader<?> reader1 =
+        new CountingSource(Integer.MAX_VALUE).createReader(options, null);
+    UnboundedSource.UnboundedReader<?> reader2 =
+        new CountingSource(Integer.MAX_VALUE).createReader(options, null);
+
+    readerCache.put(ByteString.copyFromUtf8("0000000000000001"), new ReaderCacheEntry(reader1, 1L));
+    readerCache.put(ByteString.copyFromUtf8("0000000000000002"), new ReaderCacheEntry(reader2, 2L));
+
+    startContext(context, "0000000000000001", 1L);
+    assertEquals(reader1, context.getCachedReader());
+
+    startContext(context, "0000000000000001", 1L);
+    assertEquals(reader1, context.getCachedReader());
+
+    startContext(context, "0000000000000002", 1L);
+    assertEquals(null, context.getCachedReader());
+
+    startContext(context, "0000000000000003", 3L);
+    assertEquals(null, context.getCachedReader());
+  }
 }

From 945a41d409ea49b7bc6b4bea224cd33966bcd4a6 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 11 Sep 2015 12:45:44 -0700
Subject: [PATCH 1009/1541] Make Filter a proper transform with Coder
 propagation

----Release Notes----
 - Made Filter a first-class transform.
 - Deprecated the old ParDo-returning static methods of Filter

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=102868123
---
 .../cloud/dataflow/sdk/transforms/Filter.java | 148 +++++++++++-------
 .../dataflow/sdk/transforms/FilterTest.java   |  41 +++++
 2 files changed, 136 insertions(+), 53 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
index de8d4b55aec21..89a549ed62cdb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
 /**
@@ -26,8 +27,8 @@
  * @param <T> the type of the values in the input {@code PCollection},
  * and the type of the elements in the output {@code PCollection}
  */
-public class Filter<T> extends PTransform<PCollection<T>,
-                                          PCollection<T>> {
+public class Filter<T> extends PTransform<PCollection<T>, PCollection<T>> {
+
   /**
    * Returns a {@code PTransform} that takes an input
    * {@code PCollection<T>} and returns a {@code PCollection<T>} with
@@ -38,7 +39,7 @@ public class Filter<T> extends PTransform<PCollection<T>,
    * <pre> {@code
    * PCollection<String> wordList = ...;
    * PCollection<String> longWords =
-   *     wordList.apply(Filter.by(new MatchIfWordLengthGT(6)));
+   *     wordList.apply(Filter.byPredicate(new MatchIfWordLengthGT(6)));
    * } </pre>
    *
    * <p>See also {@link #lessThan}, {@link #lessThanEq},
@@ -46,21 +47,31 @@ public class Filter<T> extends PTransform<PCollection<T>,
    * satisfying various inequalities with the specified value based on
    * the elements' natural ordering.
    */
-  public static <T, PredicateT extends SerializableFunction<T, Boolean>>
-      ParDo.Bound<T, T> by(final PredicateT filterPred) {
+  public static <T, PredicateT extends SerializableFunction<T, Boolean>> Filter<T>
+  byPredicate(PredicateT predicate) {
+    return new Filter<T>("Filter", predicate);
+  }
+
+  /**
+   * @deprecated use {@link byPredicate}, which returns a {@link Filter} transform instead of
+   * a {@link ParDo.Bound}.
+   */
+  @Deprecated
+  public static <T, PredicateT extends SerializableFunction<T, Boolean>> ParDo.Bound<T, T>
+  by(final PredicateT filterPred) {
     return ParDo.named("Filter").of(new DoFn<T, T>() {
-            @Override
-            public void processElement(ProcessContext c) {
-              if (filterPred.apply(c.element()) == true) {
-                c.output(c.element());
-              }
-            }
-        });
+      @Override
+      public void processElement(ProcessContext c) {
+        if (filterPred.apply(c.element()) == true) {
+          c.output(c.element());
+        }
+      }
+    });
   }
 
   /**
    * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<T>} and returns a {@code PCollection<T>} with
+   * {@link PCollection} and returns a {@link PCollection} with
    * elements that are less than a given value, based on the
    * elements' natural ordering. Elements must be {@code Comparable}.
    *
@@ -76,19 +87,18 @@ public void processElement(ProcessContext c) {
    * inequalities with the specified value based on the elements'
    * natural ordering.
    *
-   * <p>See also {@link #by}, which returns elements
+   * <p>See also {@link #byPredicate}, which returns elements
    * that satisfy the given predicate.
    */
-  public static <T extends Comparable<T>>
-      ParDo.Bound<T, T> lessThan(final T value) {
+  public static <T extends Comparable<T>> ParDo.Bound<T, T> lessThan(final T value) {
     return ParDo.named("Filter.lessThan").of(new DoFn<T, T>() {
-            @Override
-            public void processElement(ProcessContext c) {
-              if (c.element().compareTo(value) < 0) {
-                c.output(c.element());
-              }
-            }
-        });
+      @Override
+      public void processElement(ProcessContext c) {
+        if (c.element().compareTo(value) < 0) {
+          c.output(c.element());
+        }
+      }
+    });
   }
 
   /**
@@ -109,19 +119,18 @@ public void processElement(ProcessContext c) {
    * inequalities with the specified value based on the elements'
    * natural ordering.
    *
-   * <p>See also {@link #by}, which returns elements
+   * <p>See also {@link #byPredicate}, which returns elements
    * that satisfy the given predicate.
    */
-  public static <T extends Comparable<T>>
-      ParDo.Bound<T, T> greaterThan(final T value) {
+  public static <T extends Comparable<T>> ParDo.Bound<T, T> greaterThan(final T value) {
     return ParDo.named("Filter.greaterThan").of(new DoFn<T, T>() {
-            @Override
-            public void processElement(ProcessContext c) {
-              if (c.element().compareTo(value) > 0) {
-                c.output(c.element());
-              }
-            }
-        });
+      @Override
+      public void processElement(ProcessContext c) {
+        if (c.element().compareTo(value) > 0) {
+          c.output(c.element());
+        }
+      }
+    });
   }
 
   /**
@@ -142,19 +151,18 @@ public void processElement(ProcessContext c) {
    * inequalities with the specified value based on the elements'
    * natural ordering.
    *
-   * <p>See also {@link #by}, which returns elements
+   * <p>See also {@link #byPredicate}, which returns elements
    * that satisfy the given predicate.
    */
-  public static <T extends Comparable<T>>
-      ParDo.Bound<T, T> lessThanEq(final T value) {
+  public static <T extends Comparable<T>> ParDo.Bound<T, T> lessThanEq(final T value) {
     return ParDo.named("Filter.lessThanEq").of(new DoFn<T, T>() {
-            @Override
-            public void processElement(ProcessContext c) {
-              if (c.element().compareTo(value) <= 0) {
-                c.output(c.element());
-              }
-            }
-        });
+      @Override
+      public void processElement(ProcessContext c) {
+        if (c.element().compareTo(value) <= 0) {
+          c.output(c.element());
+        }
+      }
+    });
   }
 
   /**
@@ -175,18 +183,52 @@ public void processElement(ProcessContext c) {
    * inequalities with the specified value based on the elements'
    * natural ordering.
    *
-   * <p>See also {@link #by}, which returns elements
+   * <p>See also {@link #byPredicate}, which returns elements
    * that satisfy the given predicate.
    */
-  public static <T extends Comparable<T>>
-      ParDo.Bound<T, T> greaterThanEq(final T value) {
+  public static <T extends Comparable<T>> ParDo.Bound<T, T> greaterThanEq(final T value) {
     return ParDo.named("Filter.greaterThanEq").of(new DoFn<T, T>() {
-            @Override
-            public void processElement(ProcessContext c) {
-              if (c.element().compareTo(value) >= 0) {
-                c.output(c.element());
-              }
-            }
-        });
+      @Override
+      public void processElement(ProcessContext c) {
+        if (c.element().compareTo(value) >= 0) {
+          c.output(c.element());
+        }
+      }
+    });
+  }
+
+  ///////////////////////////////////////////////////////////////////////////////
+
+  private SerializableFunction<T, Boolean> predicate;
+
+  private Filter(SerializableFunction<T, Boolean> predicate) {
+    this.predicate = predicate;
+  }
+
+  private Filter(String name, SerializableFunction<T, Boolean> predicate) {
+    super(name);
+    this.predicate = predicate;
+  }
+
+  public Filter<T> named(String name) {
+    return new Filter<>(name, predicate);
+  }
+
+  @Override
+  public PCollection<T> apply(PCollection<T> input) {
+    PCollection<T> output = input.apply(ParDo.named("Filter").of(new DoFn<T, T>() {
+      @Override
+      public void processElement(ProcessContext c) {
+        if (predicate.apply(c.element()) == true) {
+          c.output(c.element());
+        }
+      }
+    }));
+    return output;
+  }
+
+  @Override
+  protected Coder<T> getDefaultOutputCoder(PCollection<T> input) {
+    return input.getCoder();
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
index 8b465a1241873..41a4ff2fbea46 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterTest.java
@@ -54,6 +54,7 @@ public Boolean apply(Integer elem) {
     }
   }
 
+  @Deprecated
   @Test
   @Category(RunnableOnService.class)
   public void testIdentityFilterBy() {
@@ -67,6 +68,7 @@ public void testIdentityFilterBy() {
     p.run();
   }
 
+  @Deprecated
   @Test
   public void testNoFilter() {
     TestPipeline p = TestPipeline.create();
@@ -79,6 +81,7 @@ public void testNoFilter() {
     p.run();
   }
 
+  @Deprecated
   @Test
   @Category(RunnableOnService.class)
   public void testFilterBy() {
@@ -92,6 +95,44 @@ public void testFilterBy() {
     p.run();
   }
 
+  @Test
+  @Category(RunnableOnService.class)
+  public void testIdentityFilterByPredicate() {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<Integer> output = p
+        .apply(Create.of(591, 11789, 1257, 24578, 24799, 307))
+        .apply(Filter.byPredicate(new TrivialFn(true)));
+
+    DataflowAssert.that(output).containsInAnyOrder(591, 11789, 1257, 24578, 24799, 307);
+    p.run();
+  }
+
+  @Test
+  public void testNoFilterByPredicate() {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<Integer> output = p
+        .apply(Create.of(1, 2, 4, 5))
+        .apply(Filter.byPredicate(new TrivialFn(false)));
+
+    DataflowAssert.that(output).empty();
+    p.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testFilterByPredicate() {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<Integer> output = p
+        .apply(Create.of(1, 2, 3, 4, 5, 6, 7))
+        .apply(Filter.byPredicate(new EvenFn()));
+
+    DataflowAssert.that(output).containsInAnyOrder(2, 4, 6);
+    p.run();
+  }
+
   @Test
   @Category(RunnableOnService.class)
   public void testFilterLessThan() {

From 17a4c5aa016a4780f089e3385611d177c36b58c5 Mon Sep 17 00:00:00 2001
From: zhouyunqing <zhouyunqing@google.com>
Date: Fri, 11 Sep 2015 14:40:23 -0700
Subject: [PATCH 1010/1541] Instrumenting the worker to get user code msecs

User code msecs is time spent on doing Dataflow SDK operations other than shuffling (like GroupByKey or CoGroupByKey)

We assume that user code in work items is CPU-bound. To account for proportional slowdown in case there are more work items concurrently executing user code than there are CPUs, we introduce "effective user code time" that is invariant to the level of multithreading. More precisely, a unit of time spent in user code counts as min(1, numCPUs / numActiveItemsInUserCode) units of "effective time in user code".

How it works:
1. StateSampler maintains the states of the current work item being executed on MapTaskExecutor.
2. StateSampler periodically grab the current state and invoke a callback, which will further hand the info to UserCodeTimeTracker.
3. UserCodeTimeTracker has the global knowledge of all the work item being executed on a single DataflowWorker or StreamingDataflowWorker, upon invoked by UserCodeTimeTracker, a delta is calcuated by both the info from this StateSampler and the states of other samplers.
4. The info finally got summed up to counters naming <stage_name>-user-code-msecs.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=102878843
---
 .../sdk/runners/worker/DataflowWorker.java    |  25 ++-
 .../runners/worker/GroupingShuffleReader.java |  12 +-
 .../worker/MapTaskExecutorFactory.java        |   9 +-
 .../sdk/runners/worker/ShuffleSink.java       |   6 +
 .../worker/StreamingDataflowWorker.java       |  19 ++-
 .../worker/StreamingModeExecutionContext.java |   3 +-
 .../runners/worker/UserCodeTimeTracker.java   | 156 ++++++++++++++++++
 .../util/common/worker/MapTaskExecutor.java   |  36 ++--
 .../sdk/util/common/worker/Operation.java     |  21 ++-
 .../sdk/util/common/worker/ReadOperation.java |   5 +-
 .../sdk/util/common/worker/Reader.java        |  10 ++
 .../common/worker/ReceivingOperation.java     |  12 +-
 .../dataflow/sdk/util/common/worker/Sink.java |  10 ++
 .../sdk/util/common/worker/StateSampler.java  | 121 ++++++++++++--
 .../util/common/worker/WriteOperation.java    |   2 +-
 .../worker/MapTaskExecutorFactoryTest.java    |  20 ++-
 .../worker/UserCodeTimeTrackerTest.java       | 120 ++++++++++++++
 .../util/common/worker/StateSamplerTest.java  |  12 +-
 18 files changed, 534 insertions(+), 65 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTracker.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTrackerTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index b845620b89877..5457ffb8eecbd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -106,6 +106,11 @@ public class DataflowWorker {
    */
   private Server statusServer;
 
+  /**
+   * Tracker for user code time.
+   */
+  private final UserCodeTimeTracker userCodeTimeTracker = new UserCodeTimeTracker();
+
   /**
    * A weight in "bytes" for the overhead of a {@link Sized} wrapper in the cache. It is just an
    * approximation so it is OK for it to be fairly arbitrary as long as it is nonzero.
@@ -159,19 +164,30 @@ private boolean doWork(WorkItem workItem) throws IOException {
       DataflowExecutionContext executionContext =
           new DataflowWorkerExecutionContext(sideInputCache, options);
 
-      if (workItem.getMapTask() != null) {
-        worker = MapTaskExecutorFactory.create(options, workItem.getMapTask(), executionContext);
+      CounterSet counters = new CounterSet();
+      StateSampler sampler = null;
 
+      if (workItem.getMapTask() != null) {
+        sampler = new StateSampler(
+            workItem.getMapTask().getStageName() + "-", counters.getAddCounterMutator());
+        worker = MapTaskExecutorFactory.create(
+            options, workItem.getMapTask(), executionContext, counters, sampler);
       } else if (workItem.getSourceOperationTask() != null) {
+        sampler = new StateSampler(
+            "source-operation-", counters.getAddCounterMutator());
         worker = SourceOperationExecutorFactory.create(options, workItem.getSourceOperationTask());
-
       } else {
         throw new RuntimeException("Unknown kind of work item: " + workItem.toString());
       }
 
+      sampler.addSamplingCallback(
+          new UserCodeTimeTracker.StateSamplerCallback(
+              userCodeTimeTracker, workItem.getId()));
+
       DataflowWorkProgressUpdater progressUpdater =
           new DataflowWorkProgressUpdater(workItem, worker, workUnitClient, options);
-      try {
+      try (AutoCloseable scope = userCodeTimeTracker.scopedWork(
+              sampler.getPrefix(), workItem.getId(), counters.getAddCounterMutator())) {
         executeWork(worker, progressUpdater);
       } finally {
         // Grab nextReportIndex so we can use it in handleWorkError if there is an exception.
@@ -179,7 +195,6 @@ private boolean doWork(WorkItem workItem) throws IOException {
       }
 
       // Log all counter values for debugging purposes.
-      CounterSet counters = worker.getOutputCounters();
       for (Counter<?> counter : counters) {
         LOG.trace("COUNTER {}.", counter);
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index a76bb3f5e2b3d..aee45c0df973f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -44,6 +44,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
 import com.google.cloud.dataflow.sdk.values.KV;
 
 import org.slf4j.Logger;
@@ -110,6 +111,11 @@ private synchronized void initCounter(String datasetId) {
     }
   }
 
+  @Override
+  protected StateKind getStateSamplerStateKind() {
+    return StateKind.FRAMEWORK;
+  }
+
   @Override
   public ReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> iterator() throws IOException {
     Preconditions.checkArgument(shuffleReaderConfig != null);
@@ -185,14 +191,16 @@ private final class GroupingShuffleReaderIterator
 
     public GroupingShuffleReaderIterator(ShuffleEntryReader reader) {
       if (GroupingShuffleReader.this.stateSampler == null) {
+        // This code path is only used in tests.
         CounterSet counterSet = new CounterSet();
         this.stateSampler = new StateSampler("local", counterSet.getAddCounterMutator());
-        this.readState = stateSampler.stateForName("shuffle");
+        this.readState = stateSampler.stateForName("shuffle", StateSampler.StateKind.FRAMEWORK);
       } else {
         checkNotNull(GroupingShuffleReader.this.stateSamplerOperationName);
         this.stateSampler = GroupingShuffleReader.this.stateSampler;
         this.readState = stateSampler.stateForName(
-            GroupingShuffleReader.this.stateSamplerOperationName + "-process");
+            GroupingShuffleReader.this.stateSamplerOperationName + "-process",
+            StateSampler.StateKind.FRAMEWORK);
       }
 
       this.rangeTracker =
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index a3366cd3761fa..d112f8e8822c8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -76,13 +76,10 @@ public class MapTaskExecutorFactory {
    * Creates a new MapTaskExecutor from the given MapTask definition.
    */
   public static MapTaskExecutor create(
-      PipelineOptions options, MapTask mapTask, DataflowExecutionContext context) throws Exception {
+      PipelineOptions options, MapTask mapTask, DataflowExecutionContext context,
+      CounterSet counters, StateSampler stateSampler) throws Exception {
     List<Operation> operations = new ArrayList<>();
-    CounterSet counters = new CounterSet();
-    String counterPrefix = mapTask.getStageName() + "-";
-    StateSampler stateSampler = new StateSampler(counterPrefix, counters.getAddCounterMutator());
-    // Open-ended state.
-    stateSampler.setState("other");
+    String counterPrefix = stateSampler.getPrefix();
 
     // Instantiate operations for each instruction in the graph.
     for (ParallelInstruction instruction : mapTask.getInstructions()) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
index 7029b29ab3d55..ab447642646c4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
@@ -33,6 +33,7 @@
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
 import com.google.cloud.dataflow.sdk.values.KV;
 
 import java.io.IOException;
@@ -280,4 +281,9 @@ public SinkWriter<WindowedValue<T>> writer() throws IOException {
     String datasetId = applianceWriter.getDatasetId();
     return writer(new ChunkingShuffleEntryWriter(applianceWriter), datasetId);
   }
+
+  @Override
+  protected StateKind getStateSamplerStateKind() {
+    return StateKind.FRAMEWORK;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 6098c83e56ef5..c0958396c041c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -38,6 +38,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
 import com.google.cloud.dataflow.sdk.util.common.worker.OutputObjectAndByteCounter;
 import com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.common.base.Preconditions;
 import com.google.protobuf.ByteString;
 
@@ -69,6 +70,7 @@
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
 
 import javax.servlet.ServletException;
@@ -209,6 +211,9 @@ public ReaderCacheEntry(UnboundedSource.UnboundedReader<?> reader, long token) {
   private final MetricTrackingWindmillServerStub metricTrackingWindmillServer;
   private Timer globalCountersUpdatesTimer;
 
+  private final UserCodeTimeTracker userCodeTimeTracker = new UserCodeTimeTracker();
+  private final AtomicInteger nextStateSamplerId = new AtomicInteger();
+
   public StreamingDataflowWorker(
       List<MapTask> mapTasks, WindmillServerStub server, DataflowWorkerHarnessOptions options) {
     this.options = options;
@@ -470,9 +475,21 @@ private void process(
       DataflowWorkerLoggingMDC.setStageName(computation);
       WorkerAndContext workerAndContext = mapTaskExecutors.get(computation).poll();
       if (workerAndContext == null) {
+        CounterSet counters = new CounterSet();
         context = new StreamingModeExecutionContext(
             mapTask.getSystemName(), readerCache.get(computation), stateNameMap);
-        worker = MapTaskExecutorFactory.create(options, mapTask, context);
+        StateSampler sampler =
+            new StateSampler(mapTask.getStageName() + "-", counters.getAddCounterMutator());
+        // In streaming mode, state samplers are long lived. So here a unique id is generated as
+        // the item_id for the userCodeTimeTracker.
+        int stateSamplerId = nextStateSamplerId.incrementAndGet();
+        sampler.addSamplingCallback(
+            new UserCodeTimeTracker.StateSamplerCallback(
+                userCodeTimeTracker, stateSamplerId));
+        // "work" will never finish here.
+        userCodeTimeTracker.workStarted(
+            sampler.getPrefix(), stateSamplerId, counters.getAddCounterMutator());
+        worker = MapTaskExecutorFactory.create(options, mapTask, context, counters, sampler);
         ReadOperation readOperation = worker.getReadOperation();
         // Disable progress updates since its results are unused for streaming
         // and involves starting a thread.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
index 55fe351b0bba8..7e54e8680d095 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
@@ -31,6 +31,7 @@
 import com.google.cloud.dataflow.sdk.util.TimerInternals;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -370,7 +371,7 @@ public StateSampler.ScopedState get() {
             return null;
           }
           if (readState == -1) {
-            readState = stateSampler.stateForName(stepName + "-windmill-read");
+            readState = stateSampler.stateForName(stepName + "-windmill-read", StateKind.FRAMEWORK);
           }
           return stateSampler.scopedState(readState);
         }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTracker.java
new file mode 100644
index 0000000000000..15d8de9c84303
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTracker.java
@@ -0,0 +1,156 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
+
+import org.eclipse.jetty.util.ConcurrentHashSet;
+
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+/**
+ * This class tracks the user code time spent on every work item executed by a Dataflow worker.
+ * User code time is the time spent on doing Dataflow SDK operations other than shuffling (like
+ * GroupByKey or CoGroupByKey) and reading / writing states from / to windmill.
+ *
+ * <p>We assume that user code in work items is CPU-bound. To account for proportional slowdown in
+ * case there are more work items concurrently executing user code than there are CPUs, we introduce
+ * "effective user code time" that is invariant to the level of multithreading. More precisely,
+ * a unit of time spent in user code counts as min(1, numCPUs / numActiveItemsInUserCode) units of
+ * "effective time in user code".
+ */
+public class UserCodeTimeTracker {
+  private static class WorkItemInfo {
+    final Counter<Long> counter;
+
+    public WorkItemInfo(String counterPrefix, AddCounterMutator mutator) {
+      counter = mutator.addCounter(
+          Counter.longs(counterPrefix + "user-code-msecs", Counter.AggregationKind.SUM));
+    }
+  }
+
+  /**
+   * Mapping from item id to WorkItemInfo.
+   */
+  private final Map<Long, WorkItemInfo> itemMap = new ConcurrentHashMap<Long, WorkItemInfo>();
+
+  /**
+   * Set of the item ids in states with the kind StateSampler.StateKind.USER.
+   */
+  private final Set<Long> itemsInUserState = new ConcurrentHashSet<Long>();
+
+  /**
+   * Records the start of the work.
+   * @param counterPrefix counter prefix associated with this work item.
+   * @param itemId id of the work.
+   * @param mutator counter mutator associated with this work item.
+   */
+  public void workStarted(String counterPrefix, long itemId, AddCounterMutator mutator) {
+    if (itemMap.put(itemId, new WorkItemInfo(counterPrefix, mutator)) != null) {
+      throw new IllegalArgumentException("Item " + itemId + " already started.");
+    }
+  }
+
+  /**
+   * Records the finish of the work.
+   * @param itemId id of the work.
+   */
+  public void workFinished(long itemId) {
+    if (itemMap.remove(itemId) == null) {
+      throw new IllegalArgumentException("Item " + itemId + " never started.");
+    }
+    itemsInUserState.remove(itemId);
+  }
+
+  /**
+   * Records the observation that the work with {@code itemId} has been in a state with {@code kind}
+   * for {@code elapsedMs} milliseconds.
+   * It is supposed to be called in a callback of a StateSampler.
+   * @param itemId the id of the work
+   * @param kind kind of the associated state
+   * @param elapsedMs time duration in milliseconds since the previous observation of this work
+   *        item's state
+   */
+  public void workObservedInState(
+      long itemId, StateKind kind, long elapsedMs) {
+    if (kind == StateSampler.StateKind.USER) {
+      itemsInUserState.add(itemId);
+    } else {
+      itemsInUserState.remove(itemId);
+      return;
+    }
+
+    WorkItemInfo info = itemMap.get(itemId);
+    if (info == null) {
+      throw new NoSuchElementException("Item " + itemId + " doesn't exist.");
+    }
+    int numProcessors = getNumProcessors();
+    int numActives = itemsInUserState.size();
+    long userCodeMsecs = (long) (elapsedMs * Math.min(1.0, 1.0 * numProcessors / numActives));
+
+    info.counter.addValue(userCodeMsecs);
+  }
+
+  /**
+   * Returns an AutoCloseable that will call {@link #workStarted} at first and will automatically
+   * call {@link #workFinished} upon closing.
+   * @param counterPrefix counter prefix associated with this work item.
+   * @param itemId id of the work.
+   * @param mutator counter mutator associated with this work item.
+   * @return an AutoCloseable that automatically call {@link #workFinished} upon closing.
+   */
+  public AutoCloseable scopedWork(
+      String counterPrefix, final long itemId, final CounterSet.AddCounterMutator mutator) {
+    workStarted(counterPrefix, itemId, mutator);
+    return new AutoCloseable() {
+      @Override
+      public void close() throws Exception {
+        workFinished(itemId);
+      }
+    };
+  }
+
+  protected int getNumProcessors() {
+    return Runtime.getRuntime().availableProcessors();
+  }
+
+  /**
+   * A simple callback to be used to invoke {@code UserCodeTimeTracker.workObservedInState} from
+   * the StateSampler.
+   */
+  public static class StateSamplerCallback implements StateSampler.SamplingCallback {
+    private final UserCodeTimeTracker tracker;
+
+    private final long itemId;
+
+    StateSamplerCallback(UserCodeTimeTracker tracker, long itemId) {
+      this.tracker = tracker;
+      this.itemId = itemId;
+    }
+
+    @Override
+    public void run(int state, StateKind kind, long elapsedMs) {
+      tracker.workObservedInState(itemId, kind, elapsedMs);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
index 4785b7be6d35e..da1a6404c73a4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
@@ -55,23 +55,27 @@ public MapTaskExecutor(
   public void execute() throws Exception {
     LOG.debug("Executing map task");
 
-    // Start operations, in reverse-execution-order, so that a
-    // consumer is started before a producer might output to it.
-    // Starting a root operation such as a ReadOperation does the work
-    // of processing the input dataset.
-    LOG.debug("Starting operations");
-    ListIterator<Operation> iterator = operations.listIterator(operations.size());
-    while (iterator.hasPrevious()) {
-      Operation op = iterator.previous();
-      op.start();
-    }
+    try (StateSampler.ScopedState state
+        = stateSampler.scopedState(
+            stateSampler.stateForName("other", StateSampler.StateKind.FRAMEWORK))) {
+      // Start operations, in reverse-execution-order, so that a
+      // consumer is started before a producer might output to it.
+      // Starting a root operation such as a ReadOperation does the work
+      // of processing the input dataset.
+      LOG.debug("Starting operations");
+      ListIterator<Operation> iterator = operations.listIterator(operations.size());
+      while (iterator.hasPrevious()) {
+        Operation op = iterator.previous();
+        op.start();
+      }
 
-    // Finish operations, in forward-execution-order, so that a
-    // producer finishes outputting to its consumers before those
-    // consumers are themselves finished.
-    LOG.debug("Finishing operations");
-    for (Operation op : operations) {
-      op.finish();
+      // Finish operations, in forward-execution-order, so that a
+      // producer finishes outputting to its consumers before those
+      // consumers are themselves finished.
+      LOG.debug("Finishing operations");
+      for (Operation op : operations) {
+        op.finish();
+      }
     }
 
     LOG.debug("Map task execution complete");
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
index edf2eb85b3846..744ffd3633e97 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
@@ -79,13 +79,26 @@ public Operation(String operationName,
                    OutputReceiver[] receivers,
                    String counterPrefix,
                    CounterSet.AddCounterMutator addCounterMutator,
-                   StateSampler stateSampler) {
+                   StateSampler stateSampler,
+                   StateSampler.StateKind stateKind) {
     this.operationName = operationName;
     this.receivers = receivers;
     this.stateSampler = stateSampler;
-    startState = stateSampler.stateForName(operationName + "-start");
-    processState = stateSampler.stateForName(operationName + "-process");
-    finishState = stateSampler.stateForName(operationName + "-finish");
+    startState = stateSampler.stateForName(operationName + "-start", stateKind);
+    processState = stateSampler.stateForName(operationName + "-process", stateKind);
+    finishState = stateSampler.stateForName(operationName + "-finish", stateKind);
+  }
+
+  /**
+   * Constructs an operation in the USER state kind.
+   */
+  public Operation(String operationName,
+                   OutputReceiver[] receivers,
+                   String counterPrefix,
+                   CounterSet.AddCounterMutator addCounterMutator,
+                   StateSampler stateSampler) {
+    this(operationName, receivers, counterPrefix, addCounterMutator,
+        stateSampler, StateSampler.StateKind.USER);
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index eb6712d98e689..aaef60a207a42 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -77,7 +77,8 @@ public class ReadOperation extends Operation {
   public ReadOperation(String operationName, Reader<?> reader, OutputReceiver[] receivers,
       String counterPrefix, CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler) {
-    super(operationName, receivers, counterPrefix, addCounterMutator, stateSampler);
+    super(operationName, receivers, counterPrefix, addCounterMutator,
+          stateSampler, reader.getStateSamplerStateKind());
     this.reader = reader;
     this.byteCount = addCounterMutator.addCounter(
         Counter.longs(bytesCounterName(counterPrefix, operationName), SUM));
@@ -89,7 +90,7 @@ public ReadOperation(String operationName, Reader<?> reader, OutputReceiver[] re
   ReadOperation(Reader<?> reader, OutputReceiver outputReceiver, String counterPrefix,
       CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) {
     this("ReadOperation", reader, new OutputReceiver[] {outputReceiver}, counterPrefix,
-        addCounterMutator, stateSampler);
+         addCounterMutator, stateSampler);
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
index e371a31aa8746..ef87b3e61804f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
+
 import java.io.IOException;
 import java.util.NoSuchElementException;
 import java.util.Observable;
@@ -235,4 +237,12 @@ protected void notifyElementRead(long byteSize) {
   public boolean supportsRestart() {
     return false;
   }
+
+  /**
+   * The default state kind of all the states reported in this reader.
+   * Defaults to {@link StateKind#USER}.
+   */
+  protected StateKind getStateSamplerStateKind() {
+    return StateKind.USER;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java
index 6ccb750d99b9f..eb8f26cfd1669 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java
@@ -24,13 +24,23 @@
  */
 public abstract class ReceivingOperation extends Operation implements Receiver {
 
+  public ReceivingOperation(String operationName,
+                            OutputReceiver[] receivers,
+                            String counterPrefix,
+                            CounterSet.AddCounterMutator addCounterMutator,
+                            StateSampler stateSampler,
+                            StateSampler.StateKind stateKind) {
+    super(operationName, receivers,
+          counterPrefix, addCounterMutator, stateSampler, stateKind);
+  }
+
   public ReceivingOperation(String operationName,
                             OutputReceiver[] receivers,
                             String counterPrefix,
                             CounterSet.AddCounterMutator addCounterMutator,
                             StateSampler stateSampler) {
     super(operationName, receivers,
-          counterPrefix, addCounterMutator, stateSampler);
+        counterPrefix, addCounterMutator, stateSampler);
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
index 1f81e4508f442..b48d70b36072c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
+
 import java.io.IOException;
 
 /**
@@ -51,4 +53,12 @@ public interface SinkWriter<ElemT> extends AutoCloseable {
   public boolean supportsRestart() {
     return false;
   }
+
+  /**
+   * The default state kind of all the states reported in this Sink.
+   * Defaults to {@link StateKind#USER}.
+   */
+  protected StateKind getStateSamplerStateKind() {
+    return StateKind.USER;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
index 062e37c3cec06..c3443e074e07f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
@@ -21,10 +21,11 @@
 
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 import java.util.Random;
 import java.util.Timer;
 import java.util.TimerTask;
-
 import java.util.concurrent.TimeUnit;
 
 import javax.annotation.concurrent.ThreadSafe;
@@ -36,7 +37,16 @@
  * regular intervals, with adjustment for scheduling delay.
  */
 @ThreadSafe
-public class StateSampler extends TimerTask implements AutoCloseable {
+public class StateSampler implements AutoCloseable {
+
+  /** Different kinds of states. */
+  public enum StateKind {
+    /** IO, user code, etc. */
+    USER,
+    /** Reading/writing from/to shuffle service, etc. */
+    FRAMEWORK
+  }
+
   public static final long DEFAULT_SAMPLING_PERIOD_MS = 200;
 
   private final String prefix;
@@ -46,7 +56,10 @@ public class StateSampler extends TimerTask implements AutoCloseable {
   private ArrayList<Counter<Long>> countersByState = new ArrayList<>();
 
   /** Map of state name to state. */
-  private HashMap<String, Integer> statesByName = new HashMap<>();
+  private Map<String, Integer> statesByName = new HashMap<>();
+
+  /** Map of state id to kind. */
+  private Map<Integer, StateKind> kindsByState = new HashMap<>();
 
   /** The current state. */
   private volatile int currentState;
@@ -78,6 +91,16 @@ public class StateSampler extends TimerTask implements AutoCloseable {
     }
   }
 
+  private Random rand = new Random();
+
+  /** The timer selected for this sampler. */
+  private final Timer timer;
+
+  /** The current task to invoke the StateSampler. */
+  private TimerTask invocationTask = null;
+
+  private List<SamplingCallback> callbacks = new ArrayList<>();
+
   /**
    * Constructs a new {@link StateSampler} that can be used to obtain
    * an approximate breakdown of the time spent by an execution
@@ -90,14 +113,35 @@ public class StateSampler extends TimerTask implements AutoCloseable {
    */
   public StateSampler(String prefix,
                       CounterSet.AddCounterMutator counterSetMutator,
-                      long samplingPeriodMs) {
+                      final long samplingPeriodMs) {
     this.prefix = prefix;
     this.counterSetMutator = counterSetMutator;
     currentState = DO_NOT_SAMPLE;
-    Random rand = new Random();
-    int initialDelay = rand.nextInt((int) samplingPeriodMs);
-    timers[rand.nextInt(NUM_TIMER_THREADS)].scheduleAtFixedRate(
-        this, initialDelay, samplingPeriodMs);
+    timer = timers[rand.nextInt(NUM_TIMER_THREADS)];
+    // Here "stratified sampling" is used, which makes sure that there's 1 uniformly chosen sampled
+    // point in every bucket of samplingPeriodMs, to prevent pathological behavior in case some
+    // states happen to occur at a similar period.
+    // The current implementation uses a fixed-rate timer with a period samplingPeriodMs as a
+    // trampoline to a one-shot random timer which fires with a random delay within
+    // samplingPeriodMs.
+    timer.scheduleAtFixedRate(new TimerTask() {
+      @Override
+      public void run() {
+        long delay = rand.nextInt((int) samplingPeriodMs);
+        // TimerTask cannot be invoked multiple times, so a new one must be created.
+        // TODO: change Timer to ScheduledThreadPoolExecutor.
+        if (invocationTask != null) {
+          invocationTask.cancel();
+        }
+        invocationTask = new TimerTask() {
+          @Override
+          public void run() {
+            StateSampler.this.run();
+          }
+        };
+        timer.schedule(invocationTask, delay);
+      }
+    }, 0, samplingPeriodMs);
     stateTimestampNs = System.nanoTime();
   }
 
@@ -115,14 +159,21 @@ public StateSampler(String prefix,
     this(prefix, counterSetMutator, DEFAULT_SAMPLING_PERIOD_MS);
   }
 
-  @Override
   public void run() {
     long startTimestampNs = System.nanoTime();
     int state = currentState;
     if (state != DO_NOT_SAMPLE) {
+      long elapsedMs = 0;
+      StateKind kind = null;
       synchronized (this) {
-        countersByState.get(state).addValue(
-            TimeUnit.NANOSECONDS.toMillis(startTimestampNs - stateTimestampNs));
+        elapsedMs =
+          TimeUnit.NANOSECONDS.toMillis(startTimestampNs - stateTimestampNs);
+        kind = kindsByState.get(state);
+        countersByState.get(state).addValue(elapsedMs);
+      }
+      // Invoke all callbacks.
+      for (SamplingCallback c : callbacks) {
+        c.run(state, kind, elapsedMs);
       }
     }
     stateTimestampNs = startTimestampNs;
@@ -130,7 +181,9 @@ public void run() {
 
   @Override
   public void close() {
-    this.cancel();  // cancel the TimerTask
+    if (invocationTask != null) {
+      invocationTask.cancel();
+    }
   }
 
   /**
@@ -139,9 +192,10 @@ public void close() {
    * transitions is done for efficiency.
    *
    * @name the name for the state
+   * @kind kind of the state, see {#code StateKind}
    * @return the state associated with the state name
    */
-  public int stateForName(String name) {
+  public int stateForName(String name, StateKind kind) {
     if (name.isEmpty()) {
       return DO_NOT_SAMPLE;
     }
@@ -155,6 +209,13 @@ public int stateForName(String name) {
         state = countersByState.size();
         statesByName.put(name, state);
         countersByState.add(counter);
+        kindsByState.put(state, kind);
+      }
+      StateKind originalKind = kindsByState.get(state);
+      if (originalKind != kind) {
+        throw new IllegalArgumentException(
+            "for state named " + name
+            + ", requested kind " + kind + " different from the original kind " + originalKind);
       }
       return state;
     }
@@ -212,10 +273,11 @@ public int setState(int state) {
    * Sets the current thread state.
    *
    * @param name the name of the new state to transition to
+   * @param kind kind of the new state
    * @return the previous state
    */
-  public int setState(String name) {
-    return setState(stateForName(name));
+  public int setState(String name, StateKind kind) {
+    return setState(stateForName(name, kind));
   }
 
   /**
@@ -231,6 +293,19 @@ public ScopedState scopedState(int state) {
     return new ScopedState(this, setState(state));
   }
 
+  /**
+   * Add a callback to the sampler.
+   * The callbacks will be executed sequentially upon {@link StateSampler#run}.
+   */
+  public void addSamplingCallback(SamplingCallback callback) {
+    callbacks.add(callback);
+  }
+
+  /** Get the counter prefix associated with this sampler. */
+  public String getPrefix() {
+    return prefix;
+  }
+
   /**
    * A nested class that is used to account for states and state
    * transitions based on lexical scopes.
@@ -251,4 +326,20 @@ public void close() {
       sampler.setState(previousState);
     }
   }
+
+  /**
+   * Callbacks which supposed to be called sequentially upon {@link StateSampler#run}.
+   * They should be registered via {@link #addSamplingCallback}.
+   */
+  public static interface SamplingCallback {
+    /**
+     * The entrance method of the callback, it is called in {@link StateSampler#run},
+     * once per sample. This method should be thread safe.
+     *
+     * @param state The state of the StateSampler at the time of sample.
+     * @param kind The kind associated with the state, see {@link StateKind}.
+     * @param elapsedMs Milliseconds since last sample.
+     */
+    public void run(int state, StateKind kind, long elapsedMs);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
index db5e0a5c61a01..31ed07d5fcdc8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
@@ -47,7 +47,7 @@ public WriteOperation(String operationName,
                         CounterSet.AddCounterMutator addCounterMutator,
                         StateSampler stateSampler) {
     super(operationName, receivers,
-          counterPrefix, addCounterMutator, stateSampler);
+          counterPrefix, addCounterMutator, stateSampler, sink.getStateSamplerStateKind());
     this.sink = sink;
     this.byteCount = addCounterMutator.addCounter(
         Counter.longs(bytesCounterName(counterPrefix, operationName), SUM));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index bed0cc28f5fcc..5b0d84c2ae9fd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -26,6 +26,7 @@
 import static org.hamcrest.Matchers.hasItems;
 import static org.hamcrest.Matchers.instanceOf;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertThat;
 
@@ -101,12 +102,16 @@ public void testCreateMapTaskExecutor() throws Exception {
     mapTask.setStageName("test");
     mapTask.setInstructions(instructions);
 
-    CounterSet counterSet = null;
+    CounterSet counterSet = new CounterSet();
+    StateSampler sampler =
+        new StateSampler(mapTask.getStageName() + "-", counterSet.getAddCounterMutator());
     try (
         MapTaskExecutor executor = MapTaskExecutorFactory.create(
             options,
             mapTask,
-            BatchModeExecutionContext.fromOptions(options))) {
+            BatchModeExecutionContext.fromOptions(options),
+            counterSet,
+            sampler)) {
       // Safe covariant cast not expressible without rawtypes.
       @SuppressWarnings({"rawtypes", "unchecked"})
       List<Object> operations = (List) executor.operations;
@@ -125,6 +130,9 @@ public void testCreateMapTaskExecutor() throws Exception {
     Counter<Long> otherMsecCounter =
         (Counter<Long>) counterSet.getExistingCounter("test-other-msecs");
 
+    // "other" state only got created upon MapTaskExecutor.execute().
+    assertNull(otherMsecCounter);
+
     assertEquals(
         new CounterSet(
             Counter.longs(getElementCounterName("read_output_name"), SUM).resetToValue(0L),
@@ -157,8 +165,7 @@ public void testCreateMapTaskExecutor() throws Exception {
             Counter.longs("Write-ByteCount", SUM).resetToValue(0L),
             Counter.longs("test-Write-start-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Write-process-msecs", SUM).resetToValue(0L),
-            Counter.longs("test-Write-finish-msecs", SUM).resetToValue(0L),
-            Counter.longs("test-other-msecs", SUM).resetToValue(otherMsecCounter.getAggregate())),
+            Counter.longs("test-Write-finish-msecs", SUM).resetToValue(0L)),
         counterSet);
   }
 
@@ -170,12 +177,15 @@ public void testExecutionContextPlumbing() throws Exception {
         createWriteInstruction(2, 0, "Write"));
 
     MapTask mapTask = new MapTask();
+    mapTask.setStageName("test");
     mapTask.setInstructions(instructions);
 
     DataflowExecutionContext context = BatchModeExecutionContext.fromOptions(options);
 
+    CounterSet counters = new CounterSet();
     try (MapTaskExecutor executor =
-        MapTaskExecutorFactory.create(options, mapTask, context)) {
+        MapTaskExecutorFactory.create(options, mapTask, context, counters,
+            new StateSampler(mapTask.getStageName() + "-", counters.getAddCounterMutator()))) {
       executor.execute();
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTrackerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTrackerTest.java
new file mode 100644
index 0000000000000..fa34e9a275bf1
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTrackerTest.java
@@ -0,0 +1,120 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Test for {@link UserCodeTimeTracker}.
+ */
+@RunWith(JUnit4.class)
+public class UserCodeTimeTrackerTest {
+  UserCodeTimeTracker getTracker(final int numProcessors) {
+    return new UserCodeTimeTracker() {
+      @Override
+      protected int getNumProcessors() {
+        return numProcessors;
+      }
+    };
+  }
+  @Test
+  public void testUserFrameworkTimeDiscrimination() {
+    CounterSet counters = new CounterSet();
+    UserCodeTimeTracker tracker = getTracker(1);
+    CounterSet.AddCounterMutator mutator = counters.getAddCounterMutator();
+    tracker.workStarted("stage1-", 1, mutator);
+    tracker.workObservedInState(1, StateKind.USER, 100);
+    tracker.workObservedInState(1, StateKind.FRAMEWORK, 50);
+    tracker.workFinished(1);
+
+    assertEquals(100L, counters.getExistingCounter("stage1-user-code-msecs").getAggregate());
+  }
+
+  @Test
+  public void testSaturated() {
+    CounterSet counters = new CounterSet();
+    UserCodeTimeTracker tracker = getTracker(2);
+    CounterSet.AddCounterMutator mutator = counters.getAddCounterMutator();
+    tracker.workStarted("stage1-", 1, mutator);
+    tracker.workStarted("stage2-", 2, mutator);
+    tracker.workObservedInState(1, StateKind.USER, 100);
+    tracker.workObservedInState(2, StateKind.USER, 50);
+    tracker.workFinished(1);
+    tracker.workFinished(2);
+
+    assertEquals(100L, counters.getExistingCounter("stage1-user-code-msecs").getAggregate());
+    assertEquals(50L, counters.getExistingCounter("stage2-user-code-msecs").getAggregate());
+  }
+
+  @Test
+  public void testOversubscribed() {
+    CounterSet counters = new CounterSet();
+    UserCodeTimeTracker tracker = getTracker(1);
+    CounterSet.AddCounterMutator mutator = counters.getAddCounterMutator();
+    tracker.workStarted("stage1-", 1, mutator);
+    tracker.workStarted("stage2-", 2, mutator);
+    // 1 user state work item.
+    tracker.workObservedInState(1, StateKind.USER, 10);
+    // 2 user state work items.
+    tracker.workObservedInState(2, StateKind.USER, 20);
+    // 2 user state work items.
+    tracker.workObservedInState(1, StateKind.USER, 30);
+    // 2 user state work items.
+    tracker.workObservedInState(2, StateKind.USER, 40);
+    // 1 user state work item.
+    tracker.workObservedInState(1, StateKind.FRAMEWORK, 99);
+    // 1 user state work items.
+    tracker.workObservedInState(2, StateKind.USER, 50);
+    // 2 user state work items.
+    tracker.workFinished(1);
+    // 1 user state work item.
+    tracker.workObservedInState(2, StateKind.USER, 60);
+    tracker.workFinished(2);
+    // 0 user state work item.
+
+    // 10 + 30 / 2 = 25
+    assertEquals(25L, counters.getExistingCounter("stage1-user-code-msecs").getAggregate());
+    // 20 / 2 + 40 / 2 + 50 + 60 = 140
+    assertEquals(140L, counters.getExistingCounter("stage2-user-code-msecs").getAggregate());
+  }
+
+  @Test
+  public void testScopedWork() throws Exception {
+    CounterSet counters = new CounterSet();
+    UserCodeTimeTracker tracker = getTracker(1);
+    try (AutoCloseable scope1 = tracker.scopedWork("stage1-", 1, counters.getAddCounterMutator())) {
+      tracker.workObservedInState(1, StateKind.USER, 10);
+      try (AutoCloseable scope2 = tracker.scopedWork(
+          "stage2-", 2, counters.getAddCounterMutator())) {
+        tracker.workObservedInState(2, StateKind.USER, 30);
+        tracker.workObservedInState(1, StateKind.USER, 100);
+      }
+    }
+
+    // 10 + 100 / 2 = 60
+    assertEquals(60L, counters.getExistingCounter("stage1-user-code-msecs").getAggregate());
+    // 30 / 2 = 15
+    assertEquals(15L, counters.getExistingCounter("stage2-user-code-msecs").getAggregate());
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
index d0b366641a671..c30fbe0b7a1c6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
@@ -43,8 +43,8 @@ public void basicTest() throws InterruptedException {
     try (StateSampler stateSampler =
         new StateSampler("test-", counters.getAddCounterMutator(), periodMs)) {
 
-      int state1 = stateSampler.stateForName("1");
-      int state2 = stateSampler.stateForName("2");
+      int state1 = stateSampler.stateForName("1", StateSampler.StateKind.USER);
+      int state2 = stateSampler.stateForName("2", StateSampler.StateKind.USER);
 
       try (StateSampler.ScopedState s1 =
           stateSampler.scopedState(state1)) {
@@ -74,9 +74,9 @@ public void nestingTest() throws InterruptedException {
     try (StateSampler stateSampler =
         new StateSampler("test-", counters.getAddCounterMutator(), periodMs)) {
 
-      int state1 = stateSampler.stateForName("1");
-      int state2 = stateSampler.stateForName("2");
-      int state3 = stateSampler.stateForName("3");
+      int state1 = stateSampler.stateForName("1", StateSampler.StateKind.USER);
+      int state2 = stateSampler.stateForName("2", StateSampler.StateKind.USER);
+      int state3 = stateSampler.stateForName("3", StateSampler.StateKind.USER);
 
       try (StateSampler.ScopedState s1 =
           stateSampler.scopedState(state1)) {
@@ -115,7 +115,7 @@ public void nonScopedTest() throws InterruptedException {
     try (StateSampler stateSampler = new StateSampler("test-",
         counters.getAddCounterMutator(), periodMs)) {
 
-      int state1 = stateSampler.stateForName("1");
+      int state1 = stateSampler.stateForName("1", StateSampler.StateKind.USER);
       int previousState = stateSampler.setState(state1);
       Thread.sleep(2 * periodMs);
       stateSampler.setState(previousState);

From a1a98969ffb3add2f009e0288edaed0b274ad1e2 Mon Sep 17 00:00:00 2001
From: zhouyunqing <zhouyunqing@google.com>
Date: Sat, 12 Sep 2015 10:13:14 -0700
Subject: [PATCH 1011/1541] Rollback of user code time stats

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=102919280
---
 .../sdk/runners/worker/DataflowWorker.java    |  25 +--
 .../runners/worker/GroupingShuffleReader.java |  12 +-
 .../worker/MapTaskExecutorFactory.java        |   9 +-
 .../sdk/runners/worker/ShuffleSink.java       |   6 -
 .../worker/StreamingDataflowWorker.java       |  19 +--
 .../worker/StreamingModeExecutionContext.java |   3 +-
 .../runners/worker/UserCodeTimeTracker.java   | 156 ------------------
 .../util/common/worker/MapTaskExecutor.java   |  36 ++--
 .../sdk/util/common/worker/Operation.java     |  21 +--
 .../sdk/util/common/worker/ReadOperation.java |   5 +-
 .../sdk/util/common/worker/Reader.java        |  10 --
 .../common/worker/ReceivingOperation.java     |  12 +-
 .../dataflow/sdk/util/common/worker/Sink.java |  10 --
 .../sdk/util/common/worker/StateSampler.java  | 121 ++------------
 .../util/common/worker/WriteOperation.java    |   2 +-
 .../worker/MapTaskExecutorFactoryTest.java    |  20 +--
 .../worker/UserCodeTimeTrackerTest.java       | 120 --------------
 .../util/common/worker/StateSamplerTest.java  |  12 +-
 18 files changed, 65 insertions(+), 534 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTracker.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTrackerTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 5457ffb8eecbd..b845620b89877 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -106,11 +106,6 @@ public class DataflowWorker {
    */
   private Server statusServer;
 
-  /**
-   * Tracker for user code time.
-   */
-  private final UserCodeTimeTracker userCodeTimeTracker = new UserCodeTimeTracker();
-
   /**
    * A weight in "bytes" for the overhead of a {@link Sized} wrapper in the cache. It is just an
    * approximation so it is OK for it to be fairly arbitrary as long as it is nonzero.
@@ -164,30 +159,19 @@ private boolean doWork(WorkItem workItem) throws IOException {
       DataflowExecutionContext executionContext =
           new DataflowWorkerExecutionContext(sideInputCache, options);
 
-      CounterSet counters = new CounterSet();
-      StateSampler sampler = null;
-
       if (workItem.getMapTask() != null) {
-        sampler = new StateSampler(
-            workItem.getMapTask().getStageName() + "-", counters.getAddCounterMutator());
-        worker = MapTaskExecutorFactory.create(
-            options, workItem.getMapTask(), executionContext, counters, sampler);
+        worker = MapTaskExecutorFactory.create(options, workItem.getMapTask(), executionContext);
+
       } else if (workItem.getSourceOperationTask() != null) {
-        sampler = new StateSampler(
-            "source-operation-", counters.getAddCounterMutator());
         worker = SourceOperationExecutorFactory.create(options, workItem.getSourceOperationTask());
+
       } else {
         throw new RuntimeException("Unknown kind of work item: " + workItem.toString());
       }
 
-      sampler.addSamplingCallback(
-          new UserCodeTimeTracker.StateSamplerCallback(
-              userCodeTimeTracker, workItem.getId()));
-
       DataflowWorkProgressUpdater progressUpdater =
           new DataflowWorkProgressUpdater(workItem, worker, workUnitClient, options);
-      try (AutoCloseable scope = userCodeTimeTracker.scopedWork(
-              sampler.getPrefix(), workItem.getId(), counters.getAddCounterMutator())) {
+      try {
         executeWork(worker, progressUpdater);
       } finally {
         // Grab nextReportIndex so we can use it in handleWorkError if there is an exception.
@@ -195,6 +179,7 @@ private boolean doWork(WorkItem workItem) throws IOException {
       }
 
       // Log all counter values for debugging purposes.
+      CounterSet counters = worker.getOutputCounters();
       for (Counter<?> counter : counters) {
         LOG.trace("COUNTER {}.", counter);
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index aee45c0df973f..a76bb3f5e2b3d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -44,7 +44,6 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
 import com.google.cloud.dataflow.sdk.values.KV;
 
 import org.slf4j.Logger;
@@ -111,11 +110,6 @@ private synchronized void initCounter(String datasetId) {
     }
   }
 
-  @Override
-  protected StateKind getStateSamplerStateKind() {
-    return StateKind.FRAMEWORK;
-  }
-
   @Override
   public ReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> iterator() throws IOException {
     Preconditions.checkArgument(shuffleReaderConfig != null);
@@ -191,16 +185,14 @@ private final class GroupingShuffleReaderIterator
 
     public GroupingShuffleReaderIterator(ShuffleEntryReader reader) {
       if (GroupingShuffleReader.this.stateSampler == null) {
-        // This code path is only used in tests.
         CounterSet counterSet = new CounterSet();
         this.stateSampler = new StateSampler("local", counterSet.getAddCounterMutator());
-        this.readState = stateSampler.stateForName("shuffle", StateSampler.StateKind.FRAMEWORK);
+        this.readState = stateSampler.stateForName("shuffle");
       } else {
         checkNotNull(GroupingShuffleReader.this.stateSamplerOperationName);
         this.stateSampler = GroupingShuffleReader.this.stateSampler;
         this.readState = stateSampler.stateForName(
-            GroupingShuffleReader.this.stateSamplerOperationName + "-process",
-            StateSampler.StateKind.FRAMEWORK);
+            GroupingShuffleReader.this.stateSamplerOperationName + "-process");
       }
 
       this.rangeTracker =
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index d112f8e8822c8..a3366cd3761fa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -76,10 +76,13 @@ public class MapTaskExecutorFactory {
    * Creates a new MapTaskExecutor from the given MapTask definition.
    */
   public static MapTaskExecutor create(
-      PipelineOptions options, MapTask mapTask, DataflowExecutionContext context,
-      CounterSet counters, StateSampler stateSampler) throws Exception {
+      PipelineOptions options, MapTask mapTask, DataflowExecutionContext context) throws Exception {
     List<Operation> operations = new ArrayList<>();
-    String counterPrefix = stateSampler.getPrefix();
+    CounterSet counters = new CounterSet();
+    String counterPrefix = mapTask.getStageName() + "-";
+    StateSampler stateSampler = new StateSampler(counterPrefix, counters.getAddCounterMutator());
+    // Open-ended state.
+    stateSampler.setState("other");
 
     // Instantiate operations for each instruction in the graph.
     for (ParallelInstruction instruction : mapTask.getInstructions()) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
index ab447642646c4..7029b29ab3d55 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
@@ -33,7 +33,6 @@
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
 import com.google.cloud.dataflow.sdk.values.KV;
 
 import java.io.IOException;
@@ -281,9 +280,4 @@ public SinkWriter<WindowedValue<T>> writer() throws IOException {
     String datasetId = applianceWriter.getDatasetId();
     return writer(new ChunkingShuffleEntryWriter(applianceWriter), datasetId);
   }
-
-  @Override
-  protected StateKind getStateSamplerStateKind() {
-    return StateKind.FRAMEWORK;
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index c0958396c041c..6098c83e56ef5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -38,7 +38,6 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
 import com.google.cloud.dataflow.sdk.util.common.worker.OutputObjectAndByteCounter;
 import com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.common.base.Preconditions;
 import com.google.protobuf.ByteString;
 
@@ -70,7 +69,6 @@
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
 
 import javax.servlet.ServletException;
@@ -211,9 +209,6 @@ public ReaderCacheEntry(UnboundedSource.UnboundedReader<?> reader, long token) {
   private final MetricTrackingWindmillServerStub metricTrackingWindmillServer;
   private Timer globalCountersUpdatesTimer;
 
-  private final UserCodeTimeTracker userCodeTimeTracker = new UserCodeTimeTracker();
-  private final AtomicInteger nextStateSamplerId = new AtomicInteger();
-
   public StreamingDataflowWorker(
       List<MapTask> mapTasks, WindmillServerStub server, DataflowWorkerHarnessOptions options) {
     this.options = options;
@@ -475,21 +470,9 @@ private void process(
       DataflowWorkerLoggingMDC.setStageName(computation);
       WorkerAndContext workerAndContext = mapTaskExecutors.get(computation).poll();
       if (workerAndContext == null) {
-        CounterSet counters = new CounterSet();
         context = new StreamingModeExecutionContext(
             mapTask.getSystemName(), readerCache.get(computation), stateNameMap);
-        StateSampler sampler =
-            new StateSampler(mapTask.getStageName() + "-", counters.getAddCounterMutator());
-        // In streaming mode, state samplers are long lived. So here a unique id is generated as
-        // the item_id for the userCodeTimeTracker.
-        int stateSamplerId = nextStateSamplerId.incrementAndGet();
-        sampler.addSamplingCallback(
-            new UserCodeTimeTracker.StateSamplerCallback(
-                userCodeTimeTracker, stateSamplerId));
-        // "work" will never finish here.
-        userCodeTimeTracker.workStarted(
-            sampler.getPrefix(), stateSamplerId, counters.getAddCounterMutator());
-        worker = MapTaskExecutorFactory.create(options, mapTask, context, counters, sampler);
+        worker = MapTaskExecutorFactory.create(options, mapTask, context);
         ReadOperation readOperation = worker.getReadOperation();
         // Disable progress updates since its results are unused for streaming
         // and involves starting a thread.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
index 7e54e8680d095..55fe351b0bba8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
@@ -31,7 +31,6 @@
 import com.google.cloud.dataflow.sdk.util.TimerInternals;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -371,7 +370,7 @@ public StateSampler.ScopedState get() {
             return null;
           }
           if (readState == -1) {
-            readState = stateSampler.stateForName(stepName + "-windmill-read", StateKind.FRAMEWORK);
+            readState = stateSampler.stateForName(stepName + "-windmill-read");
           }
           return stateSampler.scopedState(readState);
         }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTracker.java
deleted file mode 100644
index 15d8de9c84303..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTracker.java
+++ /dev/null
@@ -1,156 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
-
-import org.eclipse.jetty.util.ConcurrentHashSet;
-
-import java.util.Map;
-import java.util.NoSuchElementException;
-import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
-
-/**
- * This class tracks the user code time spent on every work item executed by a Dataflow worker.
- * User code time is the time spent on doing Dataflow SDK operations other than shuffling (like
- * GroupByKey or CoGroupByKey) and reading / writing states from / to windmill.
- *
- * <p>We assume that user code in work items is CPU-bound. To account for proportional slowdown in
- * case there are more work items concurrently executing user code than there are CPUs, we introduce
- * "effective user code time" that is invariant to the level of multithreading. More precisely,
- * a unit of time spent in user code counts as min(1, numCPUs / numActiveItemsInUserCode) units of
- * "effective time in user code".
- */
-public class UserCodeTimeTracker {
-  private static class WorkItemInfo {
-    final Counter<Long> counter;
-
-    public WorkItemInfo(String counterPrefix, AddCounterMutator mutator) {
-      counter = mutator.addCounter(
-          Counter.longs(counterPrefix + "user-code-msecs", Counter.AggregationKind.SUM));
-    }
-  }
-
-  /**
-   * Mapping from item id to WorkItemInfo.
-   */
-  private final Map<Long, WorkItemInfo> itemMap = new ConcurrentHashMap<Long, WorkItemInfo>();
-
-  /**
-   * Set of the item ids in states with the kind StateSampler.StateKind.USER.
-   */
-  private final Set<Long> itemsInUserState = new ConcurrentHashSet<Long>();
-
-  /**
-   * Records the start of the work.
-   * @param counterPrefix counter prefix associated with this work item.
-   * @param itemId id of the work.
-   * @param mutator counter mutator associated with this work item.
-   */
-  public void workStarted(String counterPrefix, long itemId, AddCounterMutator mutator) {
-    if (itemMap.put(itemId, new WorkItemInfo(counterPrefix, mutator)) != null) {
-      throw new IllegalArgumentException("Item " + itemId + " already started.");
-    }
-  }
-
-  /**
-   * Records the finish of the work.
-   * @param itemId id of the work.
-   */
-  public void workFinished(long itemId) {
-    if (itemMap.remove(itemId) == null) {
-      throw new IllegalArgumentException("Item " + itemId + " never started.");
-    }
-    itemsInUserState.remove(itemId);
-  }
-
-  /**
-   * Records the observation that the work with {@code itemId} has been in a state with {@code kind}
-   * for {@code elapsedMs} milliseconds.
-   * It is supposed to be called in a callback of a StateSampler.
-   * @param itemId the id of the work
-   * @param kind kind of the associated state
-   * @param elapsedMs time duration in milliseconds since the previous observation of this work
-   *        item's state
-   */
-  public void workObservedInState(
-      long itemId, StateKind kind, long elapsedMs) {
-    if (kind == StateSampler.StateKind.USER) {
-      itemsInUserState.add(itemId);
-    } else {
-      itemsInUserState.remove(itemId);
-      return;
-    }
-
-    WorkItemInfo info = itemMap.get(itemId);
-    if (info == null) {
-      throw new NoSuchElementException("Item " + itemId + " doesn't exist.");
-    }
-    int numProcessors = getNumProcessors();
-    int numActives = itemsInUserState.size();
-    long userCodeMsecs = (long) (elapsedMs * Math.min(1.0, 1.0 * numProcessors / numActives));
-
-    info.counter.addValue(userCodeMsecs);
-  }
-
-  /**
-   * Returns an AutoCloseable that will call {@link #workStarted} at first and will automatically
-   * call {@link #workFinished} upon closing.
-   * @param counterPrefix counter prefix associated with this work item.
-   * @param itemId id of the work.
-   * @param mutator counter mutator associated with this work item.
-   * @return an AutoCloseable that automatically call {@link #workFinished} upon closing.
-   */
-  public AutoCloseable scopedWork(
-      String counterPrefix, final long itemId, final CounterSet.AddCounterMutator mutator) {
-    workStarted(counterPrefix, itemId, mutator);
-    return new AutoCloseable() {
-      @Override
-      public void close() throws Exception {
-        workFinished(itemId);
-      }
-    };
-  }
-
-  protected int getNumProcessors() {
-    return Runtime.getRuntime().availableProcessors();
-  }
-
-  /**
-   * A simple callback to be used to invoke {@code UserCodeTimeTracker.workObservedInState} from
-   * the StateSampler.
-   */
-  public static class StateSamplerCallback implements StateSampler.SamplingCallback {
-    private final UserCodeTimeTracker tracker;
-
-    private final long itemId;
-
-    StateSamplerCallback(UserCodeTimeTracker tracker, long itemId) {
-      this.tracker = tracker;
-      this.itemId = itemId;
-    }
-
-    @Override
-    public void run(int state, StateKind kind, long elapsedMs) {
-      tracker.workObservedInState(itemId, kind, elapsedMs);
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
index da1a6404c73a4..4785b7be6d35e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
@@ -55,27 +55,23 @@ public MapTaskExecutor(
   public void execute() throws Exception {
     LOG.debug("Executing map task");
 
-    try (StateSampler.ScopedState state
-        = stateSampler.scopedState(
-            stateSampler.stateForName("other", StateSampler.StateKind.FRAMEWORK))) {
-      // Start operations, in reverse-execution-order, so that a
-      // consumer is started before a producer might output to it.
-      // Starting a root operation such as a ReadOperation does the work
-      // of processing the input dataset.
-      LOG.debug("Starting operations");
-      ListIterator<Operation> iterator = operations.listIterator(operations.size());
-      while (iterator.hasPrevious()) {
-        Operation op = iterator.previous();
-        op.start();
-      }
+    // Start operations, in reverse-execution-order, so that a
+    // consumer is started before a producer might output to it.
+    // Starting a root operation such as a ReadOperation does the work
+    // of processing the input dataset.
+    LOG.debug("Starting operations");
+    ListIterator<Operation> iterator = operations.listIterator(operations.size());
+    while (iterator.hasPrevious()) {
+      Operation op = iterator.previous();
+      op.start();
+    }
 
-      // Finish operations, in forward-execution-order, so that a
-      // producer finishes outputting to its consumers before those
-      // consumers are themselves finished.
-      LOG.debug("Finishing operations");
-      for (Operation op : operations) {
-        op.finish();
-      }
+    // Finish operations, in forward-execution-order, so that a
+    // producer finishes outputting to its consumers before those
+    // consumers are themselves finished.
+    LOG.debug("Finishing operations");
+    for (Operation op : operations) {
+      op.finish();
     }
 
     LOG.debug("Map task execution complete");
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
index 744ffd3633e97..edf2eb85b3846 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
@@ -79,26 +79,13 @@ public Operation(String operationName,
                    OutputReceiver[] receivers,
                    String counterPrefix,
                    CounterSet.AddCounterMutator addCounterMutator,
-                   StateSampler stateSampler,
-                   StateSampler.StateKind stateKind) {
+                   StateSampler stateSampler) {
     this.operationName = operationName;
     this.receivers = receivers;
     this.stateSampler = stateSampler;
-    startState = stateSampler.stateForName(operationName + "-start", stateKind);
-    processState = stateSampler.stateForName(operationName + "-process", stateKind);
-    finishState = stateSampler.stateForName(operationName + "-finish", stateKind);
-  }
-
-  /**
-   * Constructs an operation in the USER state kind.
-   */
-  public Operation(String operationName,
-                   OutputReceiver[] receivers,
-                   String counterPrefix,
-                   CounterSet.AddCounterMutator addCounterMutator,
-                   StateSampler stateSampler) {
-    this(operationName, receivers, counterPrefix, addCounterMutator,
-        stateSampler, StateSampler.StateKind.USER);
+    startState = stateSampler.stateForName(operationName + "-start");
+    processState = stateSampler.stateForName(operationName + "-process");
+    finishState = stateSampler.stateForName(operationName + "-finish");
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index aaef60a207a42..eb6712d98e689 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -77,8 +77,7 @@ public class ReadOperation extends Operation {
   public ReadOperation(String operationName, Reader<?> reader, OutputReceiver[] receivers,
       String counterPrefix, CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler) {
-    super(operationName, receivers, counterPrefix, addCounterMutator,
-          stateSampler, reader.getStateSamplerStateKind());
+    super(operationName, receivers, counterPrefix, addCounterMutator, stateSampler);
     this.reader = reader;
     this.byteCount = addCounterMutator.addCounter(
         Counter.longs(bytesCounterName(counterPrefix, operationName), SUM));
@@ -90,7 +89,7 @@ public ReadOperation(String operationName, Reader<?> reader, OutputReceiver[] re
   ReadOperation(Reader<?> reader, OutputReceiver outputReceiver, String counterPrefix,
       CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) {
     this("ReadOperation", reader, new OutputReceiver[] {outputReceiver}, counterPrefix,
-         addCounterMutator, stateSampler);
+        addCounterMutator, stateSampler);
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
index ef87b3e61804f..e371a31aa8746 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
@@ -16,8 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
-
 import java.io.IOException;
 import java.util.NoSuchElementException;
 import java.util.Observable;
@@ -237,12 +235,4 @@ protected void notifyElementRead(long byteSize) {
   public boolean supportsRestart() {
     return false;
   }
-
-  /**
-   * The default state kind of all the states reported in this reader.
-   * Defaults to {@link StateKind#USER}.
-   */
-  protected StateKind getStateSamplerStateKind() {
-    return StateKind.USER;
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java
index eb8f26cfd1669..6ccb750d99b9f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java
@@ -24,23 +24,13 @@
  */
 public abstract class ReceivingOperation extends Operation implements Receiver {
 
-  public ReceivingOperation(String operationName,
-                            OutputReceiver[] receivers,
-                            String counterPrefix,
-                            CounterSet.AddCounterMutator addCounterMutator,
-                            StateSampler stateSampler,
-                            StateSampler.StateKind stateKind) {
-    super(operationName, receivers,
-          counterPrefix, addCounterMutator, stateSampler, stateKind);
-  }
-
   public ReceivingOperation(String operationName,
                             OutputReceiver[] receivers,
                             String counterPrefix,
                             CounterSet.AddCounterMutator addCounterMutator,
                             StateSampler stateSampler) {
     super(operationName, receivers,
-        counterPrefix, addCounterMutator, stateSampler);
+          counterPrefix, addCounterMutator, stateSampler);
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
index b48d70b36072c..1f81e4508f442 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
@@ -16,8 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
-
 import java.io.IOException;
 
 /**
@@ -53,12 +51,4 @@ public interface SinkWriter<ElemT> extends AutoCloseable {
   public boolean supportsRestart() {
     return false;
   }
-
-  /**
-   * The default state kind of all the states reported in this Sink.
-   * Defaults to {@link StateKind#USER}.
-   */
-  protected StateKind getStateSamplerStateKind() {
-    return StateKind.USER;
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
index c3443e074e07f..062e37c3cec06 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
@@ -21,11 +21,10 @@
 
 import java.util.ArrayList;
 import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
 import java.util.Random;
 import java.util.Timer;
 import java.util.TimerTask;
+
 import java.util.concurrent.TimeUnit;
 
 import javax.annotation.concurrent.ThreadSafe;
@@ -37,16 +36,7 @@
  * regular intervals, with adjustment for scheduling delay.
  */
 @ThreadSafe
-public class StateSampler implements AutoCloseable {
-
-  /** Different kinds of states. */
-  public enum StateKind {
-    /** IO, user code, etc. */
-    USER,
-    /** Reading/writing from/to shuffle service, etc. */
-    FRAMEWORK
-  }
-
+public class StateSampler extends TimerTask implements AutoCloseable {
   public static final long DEFAULT_SAMPLING_PERIOD_MS = 200;
 
   private final String prefix;
@@ -56,10 +46,7 @@ public enum StateKind {
   private ArrayList<Counter<Long>> countersByState = new ArrayList<>();
 
   /** Map of state name to state. */
-  private Map<String, Integer> statesByName = new HashMap<>();
-
-  /** Map of state id to kind. */
-  private Map<Integer, StateKind> kindsByState = new HashMap<>();
+  private HashMap<String, Integer> statesByName = new HashMap<>();
 
   /** The current state. */
   private volatile int currentState;
@@ -91,16 +78,6 @@ public enum StateKind {
     }
   }
 
-  private Random rand = new Random();
-
-  /** The timer selected for this sampler. */
-  private final Timer timer;
-
-  /** The current task to invoke the StateSampler. */
-  private TimerTask invocationTask = null;
-
-  private List<SamplingCallback> callbacks = new ArrayList<>();
-
   /**
    * Constructs a new {@link StateSampler} that can be used to obtain
    * an approximate breakdown of the time spent by an execution
@@ -113,35 +90,14 @@ public enum StateKind {
    */
   public StateSampler(String prefix,
                       CounterSet.AddCounterMutator counterSetMutator,
-                      final long samplingPeriodMs) {
+                      long samplingPeriodMs) {
     this.prefix = prefix;
     this.counterSetMutator = counterSetMutator;
     currentState = DO_NOT_SAMPLE;
-    timer = timers[rand.nextInt(NUM_TIMER_THREADS)];
-    // Here "stratified sampling" is used, which makes sure that there's 1 uniformly chosen sampled
-    // point in every bucket of samplingPeriodMs, to prevent pathological behavior in case some
-    // states happen to occur at a similar period.
-    // The current implementation uses a fixed-rate timer with a period samplingPeriodMs as a
-    // trampoline to a one-shot random timer which fires with a random delay within
-    // samplingPeriodMs.
-    timer.scheduleAtFixedRate(new TimerTask() {
-      @Override
-      public void run() {
-        long delay = rand.nextInt((int) samplingPeriodMs);
-        // TimerTask cannot be invoked multiple times, so a new one must be created.
-        // TODO: change Timer to ScheduledThreadPoolExecutor.
-        if (invocationTask != null) {
-          invocationTask.cancel();
-        }
-        invocationTask = new TimerTask() {
-          @Override
-          public void run() {
-            StateSampler.this.run();
-          }
-        };
-        timer.schedule(invocationTask, delay);
-      }
-    }, 0, samplingPeriodMs);
+    Random rand = new Random();
+    int initialDelay = rand.nextInt((int) samplingPeriodMs);
+    timers[rand.nextInt(NUM_TIMER_THREADS)].scheduleAtFixedRate(
+        this, initialDelay, samplingPeriodMs);
     stateTimestampNs = System.nanoTime();
   }
 
@@ -159,21 +115,14 @@ public StateSampler(String prefix,
     this(prefix, counterSetMutator, DEFAULT_SAMPLING_PERIOD_MS);
   }
 
+  @Override
   public void run() {
     long startTimestampNs = System.nanoTime();
     int state = currentState;
     if (state != DO_NOT_SAMPLE) {
-      long elapsedMs = 0;
-      StateKind kind = null;
       synchronized (this) {
-        elapsedMs =
-          TimeUnit.NANOSECONDS.toMillis(startTimestampNs - stateTimestampNs);
-        kind = kindsByState.get(state);
-        countersByState.get(state).addValue(elapsedMs);
-      }
-      // Invoke all callbacks.
-      for (SamplingCallback c : callbacks) {
-        c.run(state, kind, elapsedMs);
+        countersByState.get(state).addValue(
+            TimeUnit.NANOSECONDS.toMillis(startTimestampNs - stateTimestampNs));
       }
     }
     stateTimestampNs = startTimestampNs;
@@ -181,9 +130,7 @@ public void run() {
 
   @Override
   public void close() {
-    if (invocationTask != null) {
-      invocationTask.cancel();
-    }
+    this.cancel();  // cancel the TimerTask
   }
 
   /**
@@ -192,10 +139,9 @@ public void close() {
    * transitions is done for efficiency.
    *
    * @name the name for the state
-   * @kind kind of the state, see {#code StateKind}
    * @return the state associated with the state name
    */
-  public int stateForName(String name, StateKind kind) {
+  public int stateForName(String name) {
     if (name.isEmpty()) {
       return DO_NOT_SAMPLE;
     }
@@ -209,13 +155,6 @@ public int stateForName(String name, StateKind kind) {
         state = countersByState.size();
         statesByName.put(name, state);
         countersByState.add(counter);
-        kindsByState.put(state, kind);
-      }
-      StateKind originalKind = kindsByState.get(state);
-      if (originalKind != kind) {
-        throw new IllegalArgumentException(
-            "for state named " + name
-            + ", requested kind " + kind + " different from the original kind " + originalKind);
       }
       return state;
     }
@@ -273,11 +212,10 @@ public int setState(int state) {
    * Sets the current thread state.
    *
    * @param name the name of the new state to transition to
-   * @param kind kind of the new state
    * @return the previous state
    */
-  public int setState(String name, StateKind kind) {
-    return setState(stateForName(name, kind));
+  public int setState(String name) {
+    return setState(stateForName(name));
   }
 
   /**
@@ -293,19 +231,6 @@ public ScopedState scopedState(int state) {
     return new ScopedState(this, setState(state));
   }
 
-  /**
-   * Add a callback to the sampler.
-   * The callbacks will be executed sequentially upon {@link StateSampler#run}.
-   */
-  public void addSamplingCallback(SamplingCallback callback) {
-    callbacks.add(callback);
-  }
-
-  /** Get the counter prefix associated with this sampler. */
-  public String getPrefix() {
-    return prefix;
-  }
-
   /**
    * A nested class that is used to account for states and state
    * transitions based on lexical scopes.
@@ -326,20 +251,4 @@ public void close() {
       sampler.setState(previousState);
     }
   }
-
-  /**
-   * Callbacks which supposed to be called sequentially upon {@link StateSampler#run}.
-   * They should be registered via {@link #addSamplingCallback}.
-   */
-  public static interface SamplingCallback {
-    /**
-     * The entrance method of the callback, it is called in {@link StateSampler#run},
-     * once per sample. This method should be thread safe.
-     *
-     * @param state The state of the StateSampler at the time of sample.
-     * @param kind The kind associated with the state, see {@link StateKind}.
-     * @param elapsedMs Milliseconds since last sample.
-     */
-    public void run(int state, StateKind kind, long elapsedMs);
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
index 31ed07d5fcdc8..db5e0a5c61a01 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
@@ -47,7 +47,7 @@ public WriteOperation(String operationName,
                         CounterSet.AddCounterMutator addCounterMutator,
                         StateSampler stateSampler) {
     super(operationName, receivers,
-          counterPrefix, addCounterMutator, stateSampler, sink.getStateSamplerStateKind());
+          counterPrefix, addCounterMutator, stateSampler);
     this.sink = sink;
     this.byteCount = addCounterMutator.addCounter(
         Counter.longs(bytesCounterName(counterPrefix, operationName), SUM));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index 5b0d84c2ae9fd..bed0cc28f5fcc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -26,7 +26,6 @@
 import static org.hamcrest.Matchers.hasItems;
 import static org.hamcrest.Matchers.instanceOf;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertThat;
 
@@ -102,16 +101,12 @@ public void testCreateMapTaskExecutor() throws Exception {
     mapTask.setStageName("test");
     mapTask.setInstructions(instructions);
 
-    CounterSet counterSet = new CounterSet();
-    StateSampler sampler =
-        new StateSampler(mapTask.getStageName() + "-", counterSet.getAddCounterMutator());
+    CounterSet counterSet = null;
     try (
         MapTaskExecutor executor = MapTaskExecutorFactory.create(
             options,
             mapTask,
-            BatchModeExecutionContext.fromOptions(options),
-            counterSet,
-            sampler)) {
+            BatchModeExecutionContext.fromOptions(options))) {
       // Safe covariant cast not expressible without rawtypes.
       @SuppressWarnings({"rawtypes", "unchecked"})
       List<Object> operations = (List) executor.operations;
@@ -130,9 +125,6 @@ public void testCreateMapTaskExecutor() throws Exception {
     Counter<Long> otherMsecCounter =
         (Counter<Long>) counterSet.getExistingCounter("test-other-msecs");
 
-    // "other" state only got created upon MapTaskExecutor.execute().
-    assertNull(otherMsecCounter);
-
     assertEquals(
         new CounterSet(
             Counter.longs(getElementCounterName("read_output_name"), SUM).resetToValue(0L),
@@ -165,7 +157,8 @@ public void testCreateMapTaskExecutor() throws Exception {
             Counter.longs("Write-ByteCount", SUM).resetToValue(0L),
             Counter.longs("test-Write-start-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Write-process-msecs", SUM).resetToValue(0L),
-            Counter.longs("test-Write-finish-msecs", SUM).resetToValue(0L)),
+            Counter.longs("test-Write-finish-msecs", SUM).resetToValue(0L),
+            Counter.longs("test-other-msecs", SUM).resetToValue(otherMsecCounter.getAggregate())),
         counterSet);
   }
 
@@ -177,15 +170,12 @@ public void testExecutionContextPlumbing() throws Exception {
         createWriteInstruction(2, 0, "Write"));
 
     MapTask mapTask = new MapTask();
-    mapTask.setStageName("test");
     mapTask.setInstructions(instructions);
 
     DataflowExecutionContext context = BatchModeExecutionContext.fromOptions(options);
 
-    CounterSet counters = new CounterSet();
     try (MapTaskExecutor executor =
-        MapTaskExecutorFactory.create(options, mapTask, context, counters,
-            new StateSampler(mapTask.getStageName() + "-", counters.getAddCounterMutator()))) {
+        MapTaskExecutorFactory.create(options, mapTask, context)) {
       executor.execute();
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTrackerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTrackerTest.java
deleted file mode 100644
index fa34e9a275bf1..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTrackerTest.java
+++ /dev/null
@@ -1,120 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.junit.Assert.assertEquals;
-
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Test for {@link UserCodeTimeTracker}.
- */
-@RunWith(JUnit4.class)
-public class UserCodeTimeTrackerTest {
-  UserCodeTimeTracker getTracker(final int numProcessors) {
-    return new UserCodeTimeTracker() {
-      @Override
-      protected int getNumProcessors() {
-        return numProcessors;
-      }
-    };
-  }
-  @Test
-  public void testUserFrameworkTimeDiscrimination() {
-    CounterSet counters = new CounterSet();
-    UserCodeTimeTracker tracker = getTracker(1);
-    CounterSet.AddCounterMutator mutator = counters.getAddCounterMutator();
-    tracker.workStarted("stage1-", 1, mutator);
-    tracker.workObservedInState(1, StateKind.USER, 100);
-    tracker.workObservedInState(1, StateKind.FRAMEWORK, 50);
-    tracker.workFinished(1);
-
-    assertEquals(100L, counters.getExistingCounter("stage1-user-code-msecs").getAggregate());
-  }
-
-  @Test
-  public void testSaturated() {
-    CounterSet counters = new CounterSet();
-    UserCodeTimeTracker tracker = getTracker(2);
-    CounterSet.AddCounterMutator mutator = counters.getAddCounterMutator();
-    tracker.workStarted("stage1-", 1, mutator);
-    tracker.workStarted("stage2-", 2, mutator);
-    tracker.workObservedInState(1, StateKind.USER, 100);
-    tracker.workObservedInState(2, StateKind.USER, 50);
-    tracker.workFinished(1);
-    tracker.workFinished(2);
-
-    assertEquals(100L, counters.getExistingCounter("stage1-user-code-msecs").getAggregate());
-    assertEquals(50L, counters.getExistingCounter("stage2-user-code-msecs").getAggregate());
-  }
-
-  @Test
-  public void testOversubscribed() {
-    CounterSet counters = new CounterSet();
-    UserCodeTimeTracker tracker = getTracker(1);
-    CounterSet.AddCounterMutator mutator = counters.getAddCounterMutator();
-    tracker.workStarted("stage1-", 1, mutator);
-    tracker.workStarted("stage2-", 2, mutator);
-    // 1 user state work item.
-    tracker.workObservedInState(1, StateKind.USER, 10);
-    // 2 user state work items.
-    tracker.workObservedInState(2, StateKind.USER, 20);
-    // 2 user state work items.
-    tracker.workObservedInState(1, StateKind.USER, 30);
-    // 2 user state work items.
-    tracker.workObservedInState(2, StateKind.USER, 40);
-    // 1 user state work item.
-    tracker.workObservedInState(1, StateKind.FRAMEWORK, 99);
-    // 1 user state work items.
-    tracker.workObservedInState(2, StateKind.USER, 50);
-    // 2 user state work items.
-    tracker.workFinished(1);
-    // 1 user state work item.
-    tracker.workObservedInState(2, StateKind.USER, 60);
-    tracker.workFinished(2);
-    // 0 user state work item.
-
-    // 10 + 30 / 2 = 25
-    assertEquals(25L, counters.getExistingCounter("stage1-user-code-msecs").getAggregate());
-    // 20 / 2 + 40 / 2 + 50 + 60 = 140
-    assertEquals(140L, counters.getExistingCounter("stage2-user-code-msecs").getAggregate());
-  }
-
-  @Test
-  public void testScopedWork() throws Exception {
-    CounterSet counters = new CounterSet();
-    UserCodeTimeTracker tracker = getTracker(1);
-    try (AutoCloseable scope1 = tracker.scopedWork("stage1-", 1, counters.getAddCounterMutator())) {
-      tracker.workObservedInState(1, StateKind.USER, 10);
-      try (AutoCloseable scope2 = tracker.scopedWork(
-          "stage2-", 2, counters.getAddCounterMutator())) {
-        tracker.workObservedInState(2, StateKind.USER, 30);
-        tracker.workObservedInState(1, StateKind.USER, 100);
-      }
-    }
-
-    // 10 + 100 / 2 = 60
-    assertEquals(60L, counters.getExistingCounter("stage1-user-code-msecs").getAggregate());
-    // 30 / 2 = 15
-    assertEquals(15L, counters.getExistingCounter("stage2-user-code-msecs").getAggregate());
-  }
-}
-
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
index c30fbe0b7a1c6..d0b366641a671 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
@@ -43,8 +43,8 @@ public void basicTest() throws InterruptedException {
     try (StateSampler stateSampler =
         new StateSampler("test-", counters.getAddCounterMutator(), periodMs)) {
 
-      int state1 = stateSampler.stateForName("1", StateSampler.StateKind.USER);
-      int state2 = stateSampler.stateForName("2", StateSampler.StateKind.USER);
+      int state1 = stateSampler.stateForName("1");
+      int state2 = stateSampler.stateForName("2");
 
       try (StateSampler.ScopedState s1 =
           stateSampler.scopedState(state1)) {
@@ -74,9 +74,9 @@ public void nestingTest() throws InterruptedException {
     try (StateSampler stateSampler =
         new StateSampler("test-", counters.getAddCounterMutator(), periodMs)) {
 
-      int state1 = stateSampler.stateForName("1", StateSampler.StateKind.USER);
-      int state2 = stateSampler.stateForName("2", StateSampler.StateKind.USER);
-      int state3 = stateSampler.stateForName("3", StateSampler.StateKind.USER);
+      int state1 = stateSampler.stateForName("1");
+      int state2 = stateSampler.stateForName("2");
+      int state3 = stateSampler.stateForName("3");
 
       try (StateSampler.ScopedState s1 =
           stateSampler.scopedState(state1)) {
@@ -115,7 +115,7 @@ public void nonScopedTest() throws InterruptedException {
     try (StateSampler stateSampler = new StateSampler("test-",
         counters.getAddCounterMutator(), periodMs)) {
 
-      int state1 = stateSampler.stateForName("1", StateSampler.StateKind.USER);
+      int state1 = stateSampler.stateForName("1");
       int previousState = stateSampler.setState(state1);
       Thread.sleep(2 * periodMs);
       stateSampler.setState(previousState);

From a65336c49ad0c8c9f883a236f190c1242f930c7c Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 14 Sep 2015 11:13:03 -0700
Subject: [PATCH 1012/1541] Update ApiSurfaceTest to catch public usage of
 hamcrest

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103010408
---
 .../java/com/google/cloud/dataflow/sdk/util/ApiSurface.java   | 1 -
 .../com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java    | 4 +++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
index 98131d732f7df..a5e581489acb8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
@@ -552,7 +552,6 @@ public static ApiSurface getSdkApiSurface() throws IOException {
         .pruningPrefix("org.joda.time")
         .pruningPrefix("org.apache.avro")
         .pruningPrefix("org.junit")
-        .pruningPrefix("org.hamcrest")
         .pruningPrefix("com.fasterxml.jackson.annotation");
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java
index 4260efff2df2f..d70c329be4a50 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java
@@ -46,7 +46,9 @@ public class ApiSurfaceTest {
   public void testOurApiSurface() throws Exception {
     ApiSurface checkedApiSurface = ApiSurface.getSdkApiSurface()
       .pruningClassName("com.google.cloud.dataflow.sdk.runners.worker.StateFetcher")
-      .pruningClassName("com.google.cloud.dataflow.sdk.util.common.ReflectHelpers");
+      .pruningClassName("com.google.cloud.dataflow.sdk.util.common.ReflectHelpers")
+      .pruningClassName("com.google.cloud.dataflow.sdk.TestUtils")
+      .pruningClassName("com.google.cloud.dataflow.sdk.WindowMatchers");
 
     checkedApiSurface.getExposedClasses();
 

From 0e4417a7e4ebb7a5b55066584c0446ff095a1c89 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Mon, 14 Sep 2015 11:16:36 -0700
Subject: [PATCH 1013/1541] Removes the accidentally left-over SourceTestUtils

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103010796
---
 .../cloud/dataflow/sdk/io/AvroSourceTest.java |   1 +
 .../dataflow/sdk/io/SourceTestUtils.java      | 347 ------------------
 .../BasicSerializableSourceFormatTest.java    |   2 +-
 3 files changed, 2 insertions(+), 348 deletions(-)
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
index 9c465241e91e7..832cf47313631 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
@@ -27,6 +27,7 @@
 import com.google.cloud.dataflow.sdk.io.AvroSource.AvroReader.Seeker;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.testing.SourceTestUtils;
 
 import org.apache.avro.Schema;
 import org.apache.avro.file.CodecFactory;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
deleted file mode 100644
index 1dacb3d0b198f..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/SourceTestUtils.java
+++ /dev/null
@@ -1,347 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import static org.hamcrest.Matchers.contains;
-import static org.hamcrest.Matchers.containsInAnyOrder;
-import static org.hamcrest.Matchers.equalTo;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Utilities for testing {@link Source} classes.
- */
-public class SourceTestUtils {
-  /**
-   * Testing utilities below depend on standard assertions and matchers to compare elements read by
-   * sources. In general the elements may not implement {@link #equals}/{@link #hashCode} properly,
-   * however every source has a {@link Coder} and every {@code Coder} can
-   * produce a {@link Coder#structuralValue} whose {@code equals}/{@code hashCode} is
-   * consistent with equality of encoded format.
-   * So we use this {@link Coder#structuralValue} to compare elements read by sources.
-   */
-  public static <T> List<Object> createStructuralValues(Coder<T> coder, List<T> list)
-      throws Exception {
-    List<Object> result = new ArrayList<>();
-    for (T elem : list) {
-      result.add(coder.structuralValue(elem));
-    }
-    return result;
-  }
-
-  /**
-   * Reads all elements from the given {@link BoundedSource}.
-   */
-  public static <T> List<T> readFromSource(BoundedSource<T> source, PipelineOptions options)
-      throws IOException {
-    try (BoundedSource.BoundedReader<T> reader = source.createReader(options)) {
-      return readFromUnstartedReader(reader);
-    }
-  }
-
-  /**
-   * Reads all elements from the given unstarted {@link BoundedSource.BoundedReader}.
-   */
-  public static <T> List<T> readFromUnstartedReader(BoundedSource.BoundedReader<T> reader)
-      throws IOException {
-    List<T> res = new ArrayList<>();
-    for (boolean more = reader.start(); more; more = reader.advance()) {
-      res.add(reader.getCurrent());
-    }
-    return res;
-  }
-
-  public static <T> List<T> readFromStartedReader(BoundedSource.BoundedReader<T> reader)
-      throws IOException {
-    List<T> res = new ArrayList<>();
-    while (reader.advance()) {
-      res.add(reader.getCurrent());
-    }
-    return res;
-  }
-
-  public static <T> List<T> readNItemsFromUnstartedReader(Source.Reader<T> reader, int n)
-      throws IOException {
-    List<T> res = new ArrayList<>();
-    for (int i = 0; i < n; ++i) {
-      assertTrue((i == 0) ? reader.start() : reader.advance());
-      res.add(reader.getCurrent());
-    }
-    return res;
-  }
-
-  /**
-   * Given a reference {@code Source} and a list of {@code Source}s, assert that the union of
-   * the records read from the list of sources is equal to the records read from the reference
-   * source.
-   */
-  public static <T> void assertSourcesEqualReferenceSource(
-      BoundedSource<T> referenceSource,
-      List<? extends BoundedSource<T>> sources,
-      PipelineOptions options)
-      throws Exception {
-    Coder<T> coder = referenceSource.getDefaultOutputCoder();
-    List<T> referenceRecords = readFromSource(referenceSource, options);
-    List<T> bundleRecords = new ArrayList<>();
-    for (BoundedSource<T> source : sources) {
-      assertThat(
-          "Coder type for source "
-              + source
-              + " is not compatible with Coder type for referenceSource "
-              + referenceSource,
-          source.getDefaultOutputCoder(),
-          equalTo(coder));
-      List<T> elems = readFromSource(source, options);
-      bundleRecords.addAll(elems);
-    }
-    List<Object> bundleValues = createStructuralValues(coder, bundleRecords);
-    List<Object> referenceValues = createStructuralValues(coder, referenceRecords);
-    assertThat(bundleValues, containsInAnyOrder(referenceValues.toArray()));
-  }
-
-  /**
-   * Assert that a {@code Reader} returns a {@code Source} that, when read from, produces the same
-   * records as the reader.
-   */
-  public static <T> void assertUnstartedReaderReadsSameAsItsSource(
-      BoundedSource.BoundedReader<T> reader, PipelineOptions options) throws Exception {
-    Coder<T> coder = reader.getCurrentSource().getDefaultOutputCoder();
-    List<T> expected = readFromUnstartedReader(reader);
-    List<T> actual = readFromSource(reader.getCurrentSource(), options);
-    List<Object> expectedValues = createStructuralValues(coder, expected);
-    List<Object> actualValues = createStructuralValues(coder, actual);
-    assertThat(actualValues, containsInAnyOrder(expectedValues.toArray()));
-  }
-
-  /**
-   * Expected outcome of
-   * {@link com.google.cloud.dataflow.sdk.io.BoundedSource.BoundedReader#splitAtFraction}.
-   */
-  public enum ExpectedSplitOutcome {
-    /**
-     * The operation must succeed and the results must be consistent.
-     */
-    MUST_SUCCEED_AND_BE_CONSISTENT,
-    /**
-     * The operation must fail (return {@code null}).
-     */
-    MUST_FAIL,
-    /**
-     * The operation must either fail, or succeed and the results be consistent.
-     */
-    MUST_BE_CONSISTENT_IF_SUCCEEDS
-  }
-
-  /**
-   * Contains two values: the number of items in the primary source, and the number of items in
-   * the residual source, -1 if split failed.
-   */
-  private static class SplitAtFractionResult {
-    public int numPrimaryItems;
-    public int numResidualItems;
-
-    public SplitAtFractionResult(int numPrimaryItems, int numResidualItems) {
-      this.numPrimaryItems = numPrimaryItems;
-      this.numResidualItems = numResidualItems;
-    }
-  }
-
-  /**
-   * Asserts that the {@code source}'s reader either fails to {@code splitAtFraction(fraction)}
-   * after reading {@code numItemsToReadBeforeSplit} items, or succeeds in a way that is
-   * consistent according to {@link #assertSplitAtFractionSucceedsAndConsistent}.
-   * <p>Returns SplitAtFractionResult.
-   */
-
-  public static <T> SplitAtFractionResult assertSplitAtFractionBehavior(
-      BoundedSource<T> source,
-      int numItemsToReadBeforeSplit,
-      double splitFraction,
-      ExpectedSplitOutcome expectedOutcome,
-      PipelineOptions options)
-      throws Exception {
-    List<T> expectedItems = readFromSource(source, options);
-    try (BoundedSource.BoundedReader<T> reader = source.createReader(options)) {
-      List<T> currentItems = new ArrayList<>();
-      currentItems.addAll(readNItemsFromUnstartedReader(reader, numItemsToReadBeforeSplit));
-      BoundedSource<T> residual = reader.splitAtFraction(splitFraction);
-      // Failure cases are: must succeed but fails; must fail but succeeds.
-      switch (expectedOutcome) {
-        case MUST_SUCCEED_AND_BE_CONSISTENT:
-          assertNotNull(
-              "Failed to split reader of source: "
-                  + source
-                  + " at "
-                  + splitFraction
-                  + " after reading "
-                  + numItemsToReadBeforeSplit
-                  + " items",
-              residual);
-          break;
-        case MUST_FAIL:
-          assertEquals(null, residual);
-          break;
-        case MUST_BE_CONSISTENT_IF_SUCCEEDS:
-          // Nothing.
-          break;
-      }
-      BoundedSource<T> primary = reader.getCurrentSource();
-      List<T> primaryItems = readFromSource(primary, options);
-      if (residual != null) {
-        List<T> residualItems = readFromSource(residual, options);
-        List<T> totalItems = new ArrayList<>();
-        totalItems.addAll(primaryItems);
-        totalItems.addAll(residualItems);
-        currentItems.addAll(
-            numItemsToReadBeforeSplit > 0
-                ? readFromStartedReader(reader)
-                : readFromUnstartedReader(reader));
-        String errorMsgForPrimarySourceComp =
-            String.format(
-                "Continued reading after split yielded different items than primary source: "
-                    + "split at %s after reading %s items, original source: %s, primary source: %s",
-                splitFraction,
-                numItemsToReadBeforeSplit,
-                source,
-                primary);
-        String errorMsgForTotalSourceComp =
-            String.format(
-                "Items in primary and residual sources after split do not add up to items "
-                    + "in the original source. Split at %s after reading %s items; "
-                    + "original source: %s, primary: %s, residual: %s",
-                splitFraction,
-                numItemsToReadBeforeSplit,
-                source,
-                primary,
-                residual);
-        Coder<T> coder = reader.getCurrentSource().getDefaultOutputCoder();
-        List<Object> primaryValues = createStructuralValues(coder, primaryItems);
-        List<Object> currentValues = createStructuralValues(coder, currentItems);
-        List<Object> expectedValues = createStructuralValues(coder, expectedItems);
-        List<Object> totalValues = createStructuralValues(coder, totalItems);
-        assertThat(errorMsgForPrimarySourceComp, currentValues, contains(primaryValues.toArray()));
-        assertThat(errorMsgForTotalSourceComp, totalValues, contains(expectedValues.toArray()));
-        return new SplitAtFractionResult(primaryItems.size(), residualItems.size());
-      }
-      return new SplitAtFractionResult(primaryItems.size(), -1);
-    }
-  }
-
-  /**
-   * Verifies some consistency properties of
-   * {@link BoundedSource.BoundedReader#splitAtFraction} on the given source. Equivalent to
-   * the following pseudocode:
-   * <pre>
-   *   Reader reader = source.createReader();
-   *   read N items from reader;
-   *   Source residual = reader.splitAtFraction(splitFraction);
-   *   Source primary = reader.getCurrentSource();
-   *   assert: items in primary == items we read so far
-   *                               + items we'll get by continuing to read from reader;
-   *   assert: items in original source == items in primary + items in residual
-   * </pre>
-   */
-  public static <T> void assertSplitAtFractionSucceedsAndConsistent(
-      BoundedSource<T> source,
-      int numItemsToReadBeforeSplit,
-      double splitFraction,
-      PipelineOptions options)
-      throws Exception {
-    assertSplitAtFractionBehavior(
-        source,
-        numItemsToReadBeforeSplit,
-        splitFraction,
-        ExpectedSplitOutcome.MUST_SUCCEED_AND_BE_CONSISTENT,
-        options);
-  }
-
-  /**
-   * Asserts that the {@code source}'s reader fails to {@code splitAtFraction(fraction)}
-   * after reading {@code numItemsToReadBeforeSplit} items.
-   */
-  public static <T> void assertSplitAtFractionFails(
-      BoundedSource<T> source,
-      int numItemsToReadBeforeSplit,
-      double splitFraction,
-      PipelineOptions options)
-      throws Exception {
-    assertSplitAtFractionBehavior(
-        source, numItemsToReadBeforeSplit, splitFraction, ExpectedSplitOutcome.MUST_FAIL, options);
-  }
-
-  /**
-   * Asserts that given a start position,
-   * {@link BoundedSource.BoundedReader#splitAtFraction} at every interesting fraction (halfway
-   * between two fractions that differ by at least one item) can be called successfully and the
-   * results are consistent if a split succeeds.
-   */
-  public static <T> void assertSplitAtFractionBinary(
-      BoundedSource<T> source,
-      int numItemsToBeReadBeforeSplit,
-      double firstSplitFraction,
-      double secondSplitFraction,
-      PipelineOptions options)
-      throws Exception {
-    if (secondSplitFraction - firstSplitFraction < 0.0001) {
-      return;
-    }
-    double middleSplitFraction = ((secondSplitFraction - firstSplitFraction)
-        / 2) + firstSplitFraction;
-    SplitAtFractionResult splitAtFirst = assertSplitAtFractionBehavior(
-        source, numItemsToBeReadBeforeSplit, firstSplitFraction,
-        ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
-    SplitAtFractionResult splitAtMiddle = assertSplitAtFractionBehavior(
-        source, numItemsToBeReadBeforeSplit, middleSplitFraction,
-        ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
-    SplitAtFractionResult splitAtSecond = assertSplitAtFractionBehavior(
-        source, numItemsToBeReadBeforeSplit, secondSplitFraction,
-        ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
-    if (splitAtFirst.numPrimaryItems != splitAtMiddle.numPrimaryItems
-        || splitAtFirst.numResidualItems != splitAtMiddle.numResidualItems) {
-      assertSplitAtFractionBinary(source, numItemsToBeReadBeforeSplit, firstSplitFraction,
-          middleSplitFraction, options);
-    }
-    if (splitAtSecond.numPrimaryItems != splitAtMiddle.numPrimaryItems
-        || splitAtSecond.numResidualItems != splitAtMiddle.numResidualItems) {
-      assertSplitAtFractionBinary(source, numItemsToBeReadBeforeSplit, middleSplitFraction,
-          secondSplitFraction, options);
-    }
-  }
-
-  /**
-   * Asserts that for each possible start position,
-   * {@link BoundedSource.BoundedReader#splitAtFraction} at every interesting fraction (halfway
-   * between two fractions that differ by at least one item) can be called successfully and the
-   * results are consistent if a split succeeds.
-   */
-  public static <T> void assertSplitAtFractionExhaustive(
-      BoundedSource<T> source, PipelineOptions options) throws Exception {
-    List<T> expectedItems = readFromSource(source, options);
-    for (int i = 0; i < expectedItems.size(); i++) {
-      assertSplitAtFractionBinary(source, i, 0.0, 1.0, options);
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 71c5aa5391544..03a62bdb110b4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -17,12 +17,12 @@
 package com.google.cloud.dataflow.sdk.runners.dataflow;
 
 import static com.google.api.client.util.Base64.decodeBase64;
-import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.readFromSource;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtFraction;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceOperationRequestToSourceOperationRequest;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.dictionaryToCloudSource;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceOperationResponseToCloudSourceOperationResponse;
+import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.readFromSource;
 import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
 import static com.google.cloud.dataflow.sdk.util.SerializableUtils.deserializeFromByteArray;
 import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;

From 469dd0b83830f42ce6f38b0dd430e2298ef8d2c0 Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Tue, 15 Sep 2015 11:43:17 -0700
Subject: [PATCH 1014/1541] Updates dependency
 google-api-services-datastore-protobuf.

----Release Notes----
Updating dependency google-api-services-datastore-protobuf from v1beta2-rev1-2.1.2 to v1beta2-rev1-3.0.2.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103114238
---
 sdk/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 4d2b312dd7a9f..902504aaa50db 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -450,7 +450,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-datastore-protobuf</artifactId>
-      <version>v1beta2-rev1-2.1.2</version>
+      <version>v1beta2-rev1-3.0.2</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->

From cead04eec2a423356e57798c1c3ffaf83896bd43 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Wed, 16 Sep 2015 14:20:31 -0700
Subject: [PATCH 1015/1541] Version management

Declare released version 1.1.0. Prepare for version 1.1.1-SNAPSHOT.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103228778
---
 examples/pom.xml                  | 2 +-
 maven-archetypes/examples/pom.xml | 2 +-
 maven-archetypes/starter/pom.xml  | 2 +-
 pom.xml                           | 2 +-
 sdk/pom.xml                       | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index 9e063ee1f2ea0..7aa6127ffe816 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>com.google.cloud.dataflow</groupId>
     <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
-    <version>1.0.1-SNAPSHOT</version>
+    <version>1.1.1-SNAPSHOT</version>
   </parent>
 
   <groupId>com.google.cloud.dataflow</groupId>
diff --git a/maven-archetypes/examples/pom.xml b/maven-archetypes/examples/pom.xml
index 9912aaf6bf52c..cdb3744d89b3e 100644
--- a/maven-archetypes/examples/pom.xml
+++ b/maven-archetypes/examples/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>com.google.cloud.dataflow</groupId>
     <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
-    <version>1.0.1-SNAPSHOT</version>
+    <version>1.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/maven-archetypes/starter/pom.xml b/maven-archetypes/starter/pom.xml
index a63777c07edf5..cdbb5137ea348 100644
--- a/maven-archetypes/starter/pom.xml
+++ b/maven-archetypes/starter/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>com.google.cloud.dataflow</groupId>
     <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
-    <version>1.0.1-SNAPSHOT</version>
+    <version>1.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 8d28466d1e0fd..6b56dc45a2623 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
   <url>http://cloud.google.com/dataflow</url>
   <inceptionYear>2013</inceptionYear>
 
-  <version>1.0.1-SNAPSHOT</version>
+  <version>1.1.1-SNAPSHOT</version>
 
   <licenses>
     <license>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 902504aaa50db..d912a3575a1af 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>com.google.cloud.dataflow</groupId>
     <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
-    <version>1.0.1-SNAPSHOT</version>
+    <version>1.1.1-SNAPSHOT</version>
   </parent>
 
   <groupId>com.google.cloud.dataflow</groupId>

From f32a0a4976e426c564a4e128e979449a020dcead Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Tue, 15 Sep 2015 15:50:13 -0700
Subject: [PATCH 1016/1541] Removes remaining references to
 CLOUDSDK_EXTRA_SCOPES

These were long unnecessary since
https://code.google.com/p/google-cloud-sdk/issues/detail?id=128
and some references were already removed, but not all.
----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103139474
---
 .../cloud/dataflow/examples/cookbook/DatastoreWordCount.java  | 4 ----
 .../java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java    | 4 ----
 2 files changed, 8 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
index f3f71689e79da..ac972ddeae755 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
@@ -50,10 +50,6 @@
  * $ gcloud auth login
  * }</pre>
  *
- * <p>Note that the environment variable CLOUDSDK_EXTRA_SCOPES must be set
- * to the same value when executing a Datastore pipeline, as the local auth
- * cache is keyed by the requested scopes.
- *
  * <p>To run this pipeline locally, the following options must be provided:
  * <pre>{@code
  *   --project=YOUR_PROJECT_ID
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index fb95bab6fce9f..8d584d42439b7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -85,10 +85,6 @@
   * $ gcloud auth login
  * </pre>
  *
- * <p>Note that the environment variable CLOUDSDK_EXTRA_SCOPES must be set
- * to the same value when executing a Datastore pipeline, as the local auth
- * cache is keyed by the requested scopes.
- *
  * <p>To read a {@link PCollection} from a query to Datastore, use
  * {@link DatastoreIO#read} and its methods {#link DatastoreIO.Read#withDataset}
  * and {#link DatastoreIO.Read#withQuery} to specify dataset to read, the query

From 61a16d1145ecfc67e5e6aa208559eae7d8dd5422 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 15 Sep 2015 16:08:06 -0700
Subject: [PATCH 1017/1541] Split out parts of AvroIOTest that require
 Avro-generated classes

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103141077
---
 .../sdk/io/AvroIOGeneratedClassTest.java      | 368 ++++++++++++++++++
 .../cloud/dataflow/sdk/io/AvroIOTest.java     | 328 ----------------
 .../google/cloud/dataflow/sdk/io/user.avsc    |   2 +-
 3 files changed, 369 insertions(+), 329 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOGeneratedClassTest.java

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOGeneratedClassTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOGeneratedClassTest.java
new file mode 100644
index 0000000000000..22fecd369a6e0
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOGeneratedClassTest.java
@@ -0,0 +1,368 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.EvaluationResults;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.specific.SpecificDatumReader;
+import org.apache.avro.specific.SpecificDatumWriter;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests for AvroIO Read and Write transforms, using classes generated from {@code user.avsc}.
+ */
+@RunWith(JUnit4.class)
+public class AvroIOGeneratedClassTest {
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+  private File avroFile;
+
+  @Before
+  public void prepareAvroFileBeforeAnyTest() throws IOException {
+    avroFile = tmpFolder.newFile("file.avro");
+  }
+
+  private final String schemaString =
+      "{\"namespace\": \"example.avro\",\n"
+    + " \"type\": \"record\",\n"
+    + " \"name\": \"AvroGeneratedUser\",\n"
+    + " \"fields\": [\n"
+    + "     {\"name\": \"name\", \"type\": \"string\"},\n"
+    + "     {\"name\": \"favorite_number\", \"type\": [\"int\", \"null\"]},\n"
+    + "     {\"name\": \"favorite_color\", \"type\": [\"string\", \"null\"]}\n"
+    + " ]\n"
+    + "}";
+  private final Schema.Parser parser = new Schema.Parser();
+  private final Schema schema = parser.parse(schemaString);
+
+  private AvroGeneratedUser[] generateAvroObjects() {
+    AvroGeneratedUser user1 = new AvroGeneratedUser();
+    user1.setName("Bob");
+    user1.setFavoriteNumber(256);
+
+    AvroGeneratedUser user2 = new AvroGeneratedUser();
+    user2.setName("Alice");
+    user2.setFavoriteNumber(128);
+
+    AvroGeneratedUser user3 = new AvroGeneratedUser();
+    user3.setName("Ted");
+    user3.setFavoriteColor("white");
+
+    return new AvroGeneratedUser[] { user1, user2, user3 };
+  }
+
+  private GenericRecord[] generateAvroGenericRecords() {
+    GenericRecord user1 = new GenericData.Record(schema);
+    user1.put("name", "Bob");
+    user1.put("favorite_number", 256);
+
+    GenericRecord user2 = new GenericData.Record(schema);
+    user2.put("name", "Alice");
+    user2.put("favorite_number", 128);
+
+    GenericRecord user3 = new GenericData.Record(schema);
+    user3.put("name", "Ted");
+    user3.put("favorite_color", "white");
+
+    return new GenericRecord[] { user1, user2, user3 };
+  }
+
+  private void generateAvroFile(AvroGeneratedUser[] elements) throws IOException {
+    DatumWriter<AvroGeneratedUser> userDatumWriter =
+        new SpecificDatumWriter<>(AvroGeneratedUser.class);
+    try (DataFileWriter<AvroGeneratedUser> dataFileWriter = new DataFileWriter<>(userDatumWriter)) {
+      dataFileWriter.create(elements[0].getSchema(), avroFile);
+      for (AvroGeneratedUser user : elements) {
+        dataFileWriter.append(user);
+      }
+    }
+  }
+
+  private List<AvroGeneratedUser> readAvroFile() throws IOException {
+    DatumReader<AvroGeneratedUser> userDatumReader =
+        new SpecificDatumReader<>(AvroGeneratedUser.class);
+    List<AvroGeneratedUser> users = new ArrayList<>();
+    try (DataFileReader<AvroGeneratedUser> dataFileReader =
+        new DataFileReader<>(avroFile, userDatumReader)) {
+      while (dataFileReader.hasNext()) {
+        users.add(dataFileReader.next());
+      }
+    }
+    return users;
+  }
+
+  <T> void runTestRead(AvroIO.Read.Bound<T> read, String expectedName, T[] expectedOutput)
+      throws Exception {
+    generateAvroFile(generateAvroObjects());
+
+    DirectPipeline p = DirectPipeline.createForTest();
+    PCollection<T> output = p.apply(read);
+    EvaluationResults results = p.run();
+    assertEquals(expectedName, output.getName());
+    assertThat(results.getPCollection(output),
+               containsInAnyOrder(expectedOutput));
+  }
+
+  @Test
+  public void testReadFromGeneratedClass() throws Exception {
+    runTestRead(AvroIO.Read.from(avroFile.getPath())
+                           .withSchema(AvroGeneratedUser.class),
+                "AvroIO.Read.out", generateAvroObjects());
+    runTestRead(AvroIO.Read.withSchema(AvroGeneratedUser.class)
+                           .from(avroFile.getPath()),
+                "AvroIO.Read.out", generateAvroObjects());
+    runTestRead(AvroIO.Read.named("MyRead")
+                           .from(avroFile.getPath())
+                           .withSchema(AvroGeneratedUser.class),
+                "MyRead.out", generateAvroObjects());
+    runTestRead(AvroIO.Read.named("MyRead")
+                           .withSchema(AvroGeneratedUser.class)
+                           .from(avroFile.getPath()),
+                "MyRead.out", generateAvroObjects());
+    runTestRead(AvroIO.Read.from(avroFile.getPath())
+                           .withSchema(AvroGeneratedUser.class)
+                           .named("HerRead"),
+                "HerRead.out", generateAvroObjects());
+    runTestRead(AvroIO.Read.from(avroFile.getPath())
+                           .named("HerRead")
+                           .withSchema(AvroGeneratedUser.class),
+                "HerRead.out", generateAvroObjects());
+    runTestRead(AvroIO.Read.withSchema(AvroGeneratedUser.class)
+                           .named("HerRead")
+                           .from(avroFile.getPath()),
+                "HerRead.out", generateAvroObjects());
+    runTestRead(AvroIO.Read.withSchema(AvroGeneratedUser.class)
+                           .from(avroFile.getPath())
+                           .named("HerRead"),
+                "HerRead.out", generateAvroObjects());
+  }
+
+  @Test
+  public void testReadFromSchema() throws Exception {
+    runTestRead(AvroIO.Read.from(avroFile.getPath())
+                           .withSchema(schema),
+                "AvroIO.Read.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.withSchema(schema)
+                           .from(avroFile.getPath()),
+                "AvroIO.Read.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.named("MyRead")
+                           .from(avroFile.getPath())
+                           .withSchema(schema),
+                "MyRead.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.named("MyRead")
+                           .withSchema(schema)
+                           .from(avroFile.getPath()),
+                "MyRead.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.from(avroFile.getPath())
+                           .withSchema(schema)
+                           .named("HerRead"),
+                "HerRead.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.from(avroFile.getPath())
+                           .named("HerRead")
+                           .withSchema(schema),
+                "HerRead.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.withSchema(schema)
+                           .named("HerRead")
+                           .from(avroFile.getPath()),
+                "HerRead.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.withSchema(schema)
+                           .from(avroFile.getPath())
+                           .named("HerRead"),
+                "HerRead.out", generateAvroGenericRecords());
+  }
+
+  @Test
+  public void testReadFromSchemaString() throws Exception {
+    runTestRead(AvroIO.Read.from(avroFile.getPath())
+                           .withSchema(schemaString),
+                "AvroIO.Read.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.withSchema(schemaString)
+                           .from(avroFile.getPath()),
+                "AvroIO.Read.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.named("MyRead")
+                           .from(avroFile.getPath())
+                           .withSchema(schemaString),
+                "MyRead.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.named("MyRead")
+                           .withSchema(schemaString)
+                           .from(avroFile.getPath()),
+                "MyRead.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.from(avroFile.getPath())
+                           .withSchema(schemaString)
+                           .named("HerRead"),
+                "HerRead.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.from(avroFile.getPath())
+                           .named("HerRead")
+                           .withSchema(schemaString),
+                "HerRead.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.withSchema(schemaString)
+                           .named("HerRead")
+                           .from(avroFile.getPath()),
+                "HerRead.out", generateAvroGenericRecords());
+    runTestRead(AvroIO.Read.withSchema(schemaString)
+                           .from(avroFile.getPath())
+                           .named("HerRead"),
+                "HerRead.out", generateAvroGenericRecords());
+  }
+
+  <T> void runTestWrite(AvroIO.Write.Bound<T> write, String expectedName)
+      throws Exception {
+    AvroGeneratedUser[] users = generateAvroObjects();
+
+    DirectPipeline p = DirectPipeline.createForTest();
+    @SuppressWarnings("unchecked")
+    PCollection<T> input = p.apply(Create.of(Arrays.asList((T[]) users))
+                            .withCoder((Coder<T>) AvroCoder.of(AvroGeneratedUser.class)));
+    input.apply(write.withoutSharding());
+    p.run();
+    assertEquals(expectedName, write.getName());
+
+    assertThat(readAvroFile(), containsInAnyOrder(users));
+  }
+
+  @Test
+  public void testWriteFromGeneratedClass() throws Exception {
+    runTestWrite(AvroIO.Write.to(avroFile.getPath())
+                             .withSchema(AvroGeneratedUser.class),
+                 "AvroIO.Write");
+    runTestWrite(AvroIO.Write.withSchema(AvroGeneratedUser.class)
+                             .to(avroFile.getPath()),
+                 "AvroIO.Write");
+    runTestWrite(AvroIO.Write.named("MyWrite")
+                             .to(avroFile.getPath())
+                             .withSchema(AvroGeneratedUser.class),
+                 "MyWrite");
+    runTestWrite(AvroIO.Write.named("MyWrite")
+                             .withSchema(AvroGeneratedUser.class)
+                             .to(avroFile.getPath()),
+                 "MyWrite");
+    runTestWrite(AvroIO.Write.to(avroFile.getPath())
+                             .withSchema(AvroGeneratedUser.class)
+                             .named("HerWrite"),
+                 "HerWrite");
+    runTestWrite(AvroIO.Write.to(avroFile.getPath())
+                             .named("HerWrite")
+                             .withSchema(AvroGeneratedUser.class),
+                 "HerWrite");
+    runTestWrite(AvroIO.Write.withSchema(AvroGeneratedUser.class)
+                             .named("HerWrite")
+                             .to(avroFile.getPath()),
+                 "HerWrite");
+    runTestWrite(AvroIO.Write.withSchema(AvroGeneratedUser.class)
+                             .to(avroFile.getPath())
+                             .named("HerWrite"),
+                 "HerWrite");
+  }
+
+  @Test
+  public void testWriteFromSchema() throws Exception {
+    runTestWrite(AvroIO.Write.to(avroFile.getPath())
+                             .withSchema(schema),
+                 "AvroIO.Write");
+    runTestWrite(AvroIO.Write.withSchema(schema)
+                             .to(avroFile.getPath()),
+                 "AvroIO.Write");
+    runTestWrite(AvroIO.Write.named("MyWrite")
+                             .to(avroFile.getPath())
+                             .withSchema(schema),
+                 "MyWrite");
+    runTestWrite(AvroIO.Write.named("MyWrite")
+                             .withSchema(schema)
+                             .to(avroFile.getPath()),
+                 "MyWrite");
+    runTestWrite(AvroIO.Write.to(avroFile.getPath())
+                             .withSchema(schema)
+                             .named("HerWrite"),
+                 "HerWrite");
+    runTestWrite(AvroIO.Write.to(avroFile.getPath())
+                             .named("HerWrite")
+                             .withSchema(schema),
+                 "HerWrite");
+    runTestWrite(AvroIO.Write.withSchema(schema)
+                             .named("HerWrite")
+                             .to(avroFile.getPath()),
+                 "HerWrite");
+    runTestWrite(AvroIO.Write.withSchema(schema)
+                             .to(avroFile.getPath())
+                             .named("HerWrite"),
+                 "HerWrite");
+  }
+
+  @Test
+  public void testWriteFromSchemaString() throws Exception {
+    runTestWrite(AvroIO.Write.to(avroFile.getPath())
+                             .withSchema(schemaString),
+                 "AvroIO.Write");
+    runTestWrite(AvroIO.Write.withSchema(schemaString)
+                             .to(avroFile.getPath()),
+                 "AvroIO.Write");
+    runTestWrite(AvroIO.Write.named("MyWrite")
+                             .to(avroFile.getPath())
+                             .withSchema(schemaString),
+                 "MyWrite");
+    runTestWrite(AvroIO.Write.named("MyWrite")
+                             .withSchema(schemaString)
+                             .to(avroFile.getPath()),
+                 "MyWrite");
+    runTestWrite(AvroIO.Write.to(avroFile.getPath())
+                             .withSchema(schemaString)
+                             .named("HerWrite"),
+                 "HerWrite");
+    runTestWrite(AvroIO.Write.to(avroFile.getPath())
+                             .named("HerWrite")
+                             .withSchema(schemaString),
+                 "HerWrite");
+    runTestWrite(AvroIO.Write.withSchema(schemaString)
+                             .named("HerWrite")
+                             .to(avroFile.getPath()),
+                 "HerWrite");
+    runTestWrite(AvroIO.Write.withSchema(schemaString)
+                             .to(avroFile.getPath())
+                             .named("HerWrite"),
+                 "HerWrite");
+  }
+
+  // TODO: for Write only, test withSuffix, withNumShards,
+  // withShardNameTemplate and withoutSharding.
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
index 35878f5d7b2f8..330e29456c086 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
@@ -16,42 +16,17 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
-import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.EvaluationResults;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PDone;
-
-import org.apache.avro.Schema;
-import org.apache.avro.file.DataFileReader;
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.io.DatumReader;
-import org.apache.avro.io.DatumWriter;
-import org.apache.avro.specific.SpecificDatumReader;
-import org.apache.avro.specific.SpecificDatumWriter;
-import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
 /**
  * Tests for AvroIO Read and Write transforms.
  */
@@ -59,309 +34,6 @@
 public class AvroIOTest {
   @Rule
   public TemporaryFolder tmpFolder = new TemporaryFolder();
-  private File avroFile;
-
-  @Before
-  public void prepareAvroFileBeforeAnyTest() throws IOException {
-    avroFile = tmpFolder.newFile("file.avro");
-  }
-
-  private final String schemaString =
-      "{\"namespace\": \"example.avro\",\n"
-    + " \"type\": \"record\",\n"
-    + " \"name\": \"User\",\n"
-    + " \"fields\": [\n"
-    + "     {\"name\": \"name\", \"type\": \"string\"},\n"
-    + "     {\"name\": \"favorite_number\", \"type\": [\"int\", \"null\"]},\n"
-    + "     {\"name\": \"favorite_color\", \"type\": [\"string\", \"null\"]}\n"
-    + " ]\n"
-    + "}";
-  private final Schema.Parser parser = new Schema.Parser();
-  private final Schema schema = parser.parse(schemaString);
-
-  private User[] generateAvroObjects() {
-    User user1 = new User();
-    user1.setName("Bob");
-    user1.setFavoriteNumber(256);
-
-    User user2 = new User();
-    user2.setName("Alice");
-    user2.setFavoriteNumber(128);
-
-    User user3 = new User();
-    user3.setName("Ted");
-    user3.setFavoriteColor("white");
-
-    return new User[] { user1, user2, user3 };
-  }
-
-  private GenericRecord[] generateAvroGenericRecords() {
-    GenericRecord user1 = new GenericData.Record(schema);
-    user1.put("name", "Bob");
-    user1.put("favorite_number", 256);
-
-    GenericRecord user2 = new GenericData.Record(schema);
-    user2.put("name", "Alice");
-    user2.put("favorite_number", 128);
-
-    GenericRecord user3 = new GenericData.Record(schema);
-    user3.put("name", "Ted");
-    user3.put("favorite_color", "white");
-
-    return new GenericRecord[] { user1, user2, user3 };
-  }
-
-  private void generateAvroFile(User[] elements) throws IOException {
-    DatumWriter<User> userDatumWriter = new SpecificDatumWriter<>(User.class);
-    try (DataFileWriter<User> dataFileWriter = new DataFileWriter<>(userDatumWriter)) {
-      dataFileWriter.create(elements[0].getSchema(), avroFile);
-      for (User user : elements) {
-        dataFileWriter.append(user);
-      }
-    }
-  }
-
-  private List<User> readAvroFile() throws IOException {
-    DatumReader<User> userDatumReader = new SpecificDatumReader<>(User.class);
-    List<User> users = new ArrayList<>();
-    try (DataFileReader<User> dataFileReader = new DataFileReader<>(avroFile, userDatumReader)) {
-      while (dataFileReader.hasNext()) {
-        users.add(dataFileReader.next());
-      }
-    }
-    return users;
-  }
-
-  <T> void runTestRead(AvroIO.Read.Bound<T> read, String expectedName, T[] expectedOutput)
-      throws Exception {
-    generateAvroFile(generateAvroObjects());
-
-    DirectPipeline p = DirectPipeline.createForTest();
-    PCollection<T> output = p.apply(read);
-    EvaluationResults results = p.run();
-    assertEquals(expectedName, output.getName());
-    assertThat(results.getPCollection(output),
-               containsInAnyOrder(expectedOutput));
-  }
-
-  @Test
-  public void testReadFromGeneratedClass() throws Exception {
-    runTestRead(AvroIO.Read.from(avroFile.getPath())
-                           .withSchema(User.class),
-                "AvroIO.Read.out", generateAvroObjects());
-    runTestRead(AvroIO.Read.withSchema(User.class)
-                           .from(avroFile.getPath()),
-                "AvroIO.Read.out", generateAvroObjects());
-    runTestRead(AvroIO.Read.named("MyRead")
-                           .from(avroFile.getPath())
-                           .withSchema(User.class),
-                "MyRead.out", generateAvroObjects());
-    runTestRead(AvroIO.Read.named("MyRead")
-                           .withSchema(User.class)
-                           .from(avroFile.getPath()),
-                "MyRead.out", generateAvroObjects());
-    runTestRead(AvroIO.Read.from(avroFile.getPath())
-                           .withSchema(User.class)
-                           .named("HerRead"),
-                "HerRead.out", generateAvroObjects());
-    runTestRead(AvroIO.Read.from(avroFile.getPath())
-                           .named("HerRead")
-                           .withSchema(User.class),
-                "HerRead.out", generateAvroObjects());
-    runTestRead(AvroIO.Read.withSchema(User.class)
-                           .named("HerRead")
-                           .from(avroFile.getPath()),
-                "HerRead.out", generateAvroObjects());
-    runTestRead(AvroIO.Read.withSchema(User.class)
-                           .from(avroFile.getPath())
-                           .named("HerRead"),
-                "HerRead.out", generateAvroObjects());
-  }
-
-  @Test
-  public void testReadFromSchema() throws Exception {
-    runTestRead(AvroIO.Read.from(avroFile.getPath())
-                           .withSchema(schema),
-                "AvroIO.Read.out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.withSchema(schema)
-                           .from(avroFile.getPath()),
-                "AvroIO.Read.out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.named("MyRead")
-                           .from(avroFile.getPath())
-                           .withSchema(schema),
-                "MyRead.out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.named("MyRead")
-                           .withSchema(schema)
-                           .from(avroFile.getPath()),
-                "MyRead.out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.from(avroFile.getPath())
-                           .withSchema(schema)
-                           .named("HerRead"),
-                "HerRead.out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.from(avroFile.getPath())
-                           .named("HerRead")
-                           .withSchema(schema),
-                "HerRead.out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.withSchema(schema)
-                           .named("HerRead")
-                           .from(avroFile.getPath()),
-                "HerRead.out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.withSchema(schema)
-                           .from(avroFile.getPath())
-                           .named("HerRead"),
-                "HerRead.out", generateAvroGenericRecords());
-  }
-
-  @Test
-  public void testReadFromSchemaString() throws Exception {
-    runTestRead(AvroIO.Read.from(avroFile.getPath())
-                           .withSchema(schemaString),
-                "AvroIO.Read.out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.withSchema(schemaString)
-                           .from(avroFile.getPath()),
-                "AvroIO.Read.out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.named("MyRead")
-                           .from(avroFile.getPath())
-                           .withSchema(schemaString),
-                "MyRead.out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.named("MyRead")
-                           .withSchema(schemaString)
-                           .from(avroFile.getPath()),
-                "MyRead.out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.from(avroFile.getPath())
-                           .withSchema(schemaString)
-                           .named("HerRead"),
-                "HerRead.out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.from(avroFile.getPath())
-                           .named("HerRead")
-                           .withSchema(schemaString),
-                "HerRead.out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.withSchema(schemaString)
-                           .named("HerRead")
-                           .from(avroFile.getPath()),
-                "HerRead.out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.withSchema(schemaString)
-                           .from(avroFile.getPath())
-                           .named("HerRead"),
-                "HerRead.out", generateAvroGenericRecords());
-  }
-
-  <T> void runTestWrite(AvroIO.Write.Bound<T> write, String expectedName)
-      throws Exception {
-    User[] users = generateAvroObjects();
-
-    DirectPipeline p = DirectPipeline.createForTest();
-    @SuppressWarnings("unchecked")
-    PCollection<T> input = p.apply(Create.of(Arrays.asList((T[]) users))
-                            .withCoder((Coder<T>) AvroCoder.of(User.class)));
-    PDone output = input.apply(write.withoutSharding());
-    EvaluationResults results = p.run();
-    assertEquals(expectedName, write.getName());
-
-    assertThat(readAvroFile(), containsInAnyOrder(users));
-  }
-
-  @Test
-  public void testWriteFromGeneratedClass() throws Exception {
-    runTestWrite(AvroIO.Write.to(avroFile.getPath())
-                             .withSchema(User.class),
-                 "AvroIO.Write");
-    runTestWrite(AvroIO.Write.withSchema(User.class)
-                             .to(avroFile.getPath()),
-                 "AvroIO.Write");
-    runTestWrite(AvroIO.Write.named("MyWrite")
-                             .to(avroFile.getPath())
-                             .withSchema(User.class),
-                 "MyWrite");
-    runTestWrite(AvroIO.Write.named("MyWrite")
-                             .withSchema(User.class)
-                             .to(avroFile.getPath()),
-                 "MyWrite");
-    runTestWrite(AvroIO.Write.to(avroFile.getPath())
-                             .withSchema(User.class)
-                             .named("HerWrite"),
-                 "HerWrite");
-    runTestWrite(AvroIO.Write.to(avroFile.getPath())
-                             .named("HerWrite")
-                             .withSchema(User.class),
-                 "HerWrite");
-    runTestWrite(AvroIO.Write.withSchema(User.class)
-                             .named("HerWrite")
-                             .to(avroFile.getPath()),
-                 "HerWrite");
-    runTestWrite(AvroIO.Write.withSchema(User.class)
-                             .to(avroFile.getPath())
-                             .named("HerWrite"),
-                 "HerWrite");
-  }
-
-  @Test
-  public void testWriteFromSchema() throws Exception {
-    runTestWrite(AvroIO.Write.to(avroFile.getPath())
-                             .withSchema(schema),
-                 "AvroIO.Write");
-    runTestWrite(AvroIO.Write.withSchema(schema)
-                             .to(avroFile.getPath()),
-                 "AvroIO.Write");
-    runTestWrite(AvroIO.Write.named("MyWrite")
-                             .to(avroFile.getPath())
-                             .withSchema(schema),
-                 "MyWrite");
-    runTestWrite(AvroIO.Write.named("MyWrite")
-                             .withSchema(schema)
-                             .to(avroFile.getPath()),
-                 "MyWrite");
-    runTestWrite(AvroIO.Write.to(avroFile.getPath())
-                             .withSchema(schema)
-                             .named("HerWrite"),
-                 "HerWrite");
-    runTestWrite(AvroIO.Write.to(avroFile.getPath())
-                             .named("HerWrite")
-                             .withSchema(schema),
-                 "HerWrite");
-    runTestWrite(AvroIO.Write.withSchema(schema)
-                             .named("HerWrite")
-                             .to(avroFile.getPath()),
-                 "HerWrite");
-    runTestWrite(AvroIO.Write.withSchema(schema)
-                             .to(avroFile.getPath())
-                             .named("HerWrite"),
-                 "HerWrite");
-  }
-
-  @Test
-  public void testWriteFromSchemaString() throws Exception {
-    runTestWrite(AvroIO.Write.to(avroFile.getPath())
-                             .withSchema(schemaString),
-                 "AvroIO.Write");
-    runTestWrite(AvroIO.Write.withSchema(schemaString)
-                             .to(avroFile.getPath()),
-                 "AvroIO.Write");
-    runTestWrite(AvroIO.Write.named("MyWrite")
-                             .to(avroFile.getPath())
-                             .withSchema(schemaString),
-                 "MyWrite");
-    runTestWrite(AvroIO.Write.named("MyWrite")
-                             .withSchema(schemaString)
-                             .to(avroFile.getPath()),
-                 "MyWrite");
-    runTestWrite(AvroIO.Write.to(avroFile.getPath())
-                             .withSchema(schemaString)
-                             .named("HerWrite"),
-                 "HerWrite");
-    runTestWrite(AvroIO.Write.to(avroFile.getPath())
-                             .named("HerWrite")
-                             .withSchema(schemaString),
-                 "HerWrite");
-    runTestWrite(AvroIO.Write.withSchema(schemaString)
-                             .named("HerWrite")
-                             .to(avroFile.getPath()),
-                 "HerWrite");
-    runTestWrite(AvroIO.Write.withSchema(schemaString)
-                             .to(avroFile.getPath())
-                             .named("HerWrite"),
-                 "HerWrite");
-  }
 
   @Test
   public void testReadWithoutValidationFlag() throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/user.avsc b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/user.avsc
index 451a19fa12c32..0cd9cee690275 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/user.avsc
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/user.avsc
@@ -1,7 +1,7 @@
 {
   "namespace": "com.google.cloud.dataflow.sdk.io",
   "type": "record",
-  "name": "User",
+  "name": "AvroGeneratedUser",
   "fields": [
     { "name": "name", "type": "string"},
     { "name": "favorite_number", "type": ["int", "null"]},

From 6f526b8e0b97b86c86ada2ff58650410e7bd1ebd Mon Sep 17 00:00:00 2001
From: laraschmidt <laraschmidt@google.com>
Date: Tue, 15 Sep 2015 16:44:05 -0700
Subject: [PATCH 1018/1541] Updating SDK to use new gcs-connector gcsio/util
 library.

----Release Notes----
Update to the newest gcs-connector gcsio/util library.
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103144509
---
 sdk/pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index d912a3575a1af..ccae35f077587 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -464,13 +464,13 @@
     <dependency>
       <groupId>com.google.cloud.bigdataoss</groupId>
       <artifactId>gcsio</artifactId>
-      <version>1.4.1</version>
+      <version>1.4.2</version>
     </dependency>
 
     <dependency>
       <groupId>com.google.cloud.bigdataoss</groupId>
       <artifactId>util</artifactId>
-      <version>1.4.1</version>
+      <version>1.4.2</version>
     </dependency>
 
     <dependency>

From 2b528331c9b572a304853592ec123d74d7e15cbc Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 15 Sep 2015 17:22:59 -0700
Subject: [PATCH 1019/1541] AvroIOTest: test schema upgradability

Tests that AvroIO can read an upgraded version of an old class,
as long as the schema resolution process succeeds. This test covers
the specific case when a new, @Nullable field has been added.

For more information, see
http://avro.apache.org/docs/1.7.7/spec.html#Schema+Resolution

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103148179
---
 .../cloud/dataflow/sdk/io/AvroIOTest.java     | 130 ++++++++++++++++++
 1 file changed, 130 insertions(+)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
index 330e29456c086..30c578bbda886 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
@@ -20,13 +20,27 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
+import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.MoreObjects;
+import com.google.common.collect.ImmutableList;
+
 import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.reflect.Nullable;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.File;
+import java.util.List;
+import java.util.Objects;
+
 /**
  * Tests for AvroIO Read and Write transforms.
  */
@@ -59,6 +73,122 @@ public void testAvroIOGetName() {
         AvroIO.Write.named("WriteMyFile").to("gs://bucket/foo/baz").getName());
   }
 
+  @DefaultCoder(AvroCoder.class)
+  static class GenericClass {
+    int intField;
+    String stringField;
+    public GenericClass() {}
+    public GenericClass(int intValue, String stringValue) {
+      this.intField = intValue;
+      this.stringField = stringValue;
+    }
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(getClass())
+          .add("intField", intField)
+          .add("stringField", stringField)
+          .toString();
+    }
+    @Override
+    public int hashCode() {
+      return Objects.hash(intField, stringField);
+    }
+    @Override
+    public boolean equals(Object other) {
+      if (other == null || !(other instanceof GenericClass)) {
+        return false;
+      }
+      GenericClass o = (GenericClass) other;
+      return Objects.equals(intField, o.intField) && Objects.equals(stringField, o.stringField);
+    }
+  }
+
+  @Test
+  public void testAvroIOWriteAndRead() throws Throwable {
+    DirectPipeline p = DirectPipeline.createForTest();
+    List<GenericClass> values = ImmutableList.of(new GenericClass(3, "hi"),
+        new GenericClass(5, "bar"));
+    File outputFile = tmpFolder.newFile("output.avro");
+
+    p.apply(Create.of(values))
+      .apply(AvroIO.Write.to(outputFile.getAbsolutePath())
+          .withoutSharding()
+          .withSchema(GenericClass.class));
+    p.run();
+
+    p = DirectPipeline.createForTest();
+    PCollection<GenericClass> input = p
+        .apply(AvroIO.Read.from(outputFile.getAbsolutePath()).withSchema(GenericClass.class));
+
+    DataflowAssert.that(input).containsInAnyOrder(values);
+    p.run();
+  }
+
+  @DefaultCoder(AvroCoder.class)
+  static class GenericClassV2 {
+    int intField;
+    String stringField;
+    @Nullable String nullableField;
+    public GenericClassV2() {}
+    public GenericClassV2(int intValue, String stringValue, String nullableValue) {
+      this.intField = intValue;
+      this.stringField = stringValue;
+      this.nullableField = nullableValue;
+    }
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(getClass())
+          .add("intField", intField)
+          .add("stringField", stringField)
+          .add("nullableField", nullableField)
+          .toString();
+    }
+    @Override
+    public int hashCode() {
+      return Objects.hash(intField, stringField, nullableField);
+    }
+    @Override
+    public boolean equals(Object other) {
+      if (other == null || !(other instanceof GenericClassV2)) {
+        return false;
+      }
+      GenericClassV2 o = (GenericClassV2) other;
+      return Objects.equals(intField, o.intField)
+          && Objects.equals(stringField, o.stringField)
+          && Objects.equals(nullableField, o.nullableField);
+    }
+  }
+
+  /**
+   * Tests that {@code AvroIO} can read an upgraded version of an old class, as long as the
+   * schema resolution process succeeds. This test covers the case when a new, {@code @Nullable}
+   * field has been added.
+   *
+   * <p>For more information, see http://avro.apache.org/docs/1.7.7/spec.html#Schema+Resolution
+   */
+  @Test
+  public void testAvroIOWriteAndReadSchemaUpgrade() throws Throwable {
+    DirectPipeline p = DirectPipeline.createForTest();
+    List<GenericClass> values = ImmutableList.of(new GenericClass(3, "hi"),
+        new GenericClass(5, "bar"));
+    File outputFile = tmpFolder.newFile("output.avro");
+
+    p.apply(Create.of(values))
+      .apply(AvroIO.Write.to(outputFile.getAbsolutePath())
+          .withoutSharding()
+          .withSchema(GenericClass.class));
+    p.run();
+
+    List<GenericClassV2> expected = ImmutableList.of(new GenericClassV2(3, "hi", null),
+        new GenericClassV2(5, "bar", null));
+    p = DirectPipeline.createForTest();
+    PCollection<GenericClassV2> input = p
+        .apply(AvroIO.Read.from(outputFile.getAbsolutePath()).withSchema(GenericClassV2.class));
+
+    DataflowAssert.that(input).containsInAnyOrder(expected);
+    p.run();
+  }
+
   // TODO: for Write only, test withSuffix, withNumShards,
   // withShardNameTemplate and withoutSharding.
 }

From b05f4f33252c9a1acb9057cd554548da2c4fa1b1 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 15 Sep 2015 20:56:24 -0700
Subject: [PATCH 1020/1541] Add Java 8 testing profile with example tests

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103160358
---
 sdk/pom.xml                                   | 72 ++++++++++++++
 .../sdk/transforms/FilterJava8Test.java       | 99 +++++++++++++++++++
 .../sdk/transforms/PartitionJava8Test.java    | 74 ++++++++++++++
 3 files changed, 245 insertions(+)
 create mode 100644 sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/FilterJava8Test.java
 create mode 100644 sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/PartitionJava8Test.java

diff --git a/sdk/pom.xml b/sdk/pom.xml
index ccae35f077587..3d71135636709 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -54,6 +54,77 @@
         <testParallelValue>both</testParallelValue>
       </properties>
     </profile>
+
+    <profile>
+      <id>java8tests</id>
+      <activation>
+        <jdk>[1.8,)</jdk>
+      </activation>
+
+      <build>
+        <plugins>
+          <!-- Tells Maven about the Java 8 test source root. -->
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>build-helper-maven-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>add-java8-test-source</id>
+                <phase>initialize</phase>
+                <goals>
+                  <goal>add-test-source</goal>
+                </goals>
+                <configuration>
+                  <sources>
+                    <source>${project.basedir}/src/test/java8</source>
+                  </sources>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+
+          <!-- Set `-source 1.8 -target 1.8` for Java 8 tests
+               and `-source 1.7 -target 1.7` for Java 7 tests -->
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-compiler-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>default-testCompile</id>
+                <phase>test-compile</phase>
+                <goals>
+                  <goal>testCompile</goal>
+                </goals>
+                <configuration>
+                  <testSource>1.7</testSource>
+                  <testTarget>1.7</testTarget>
+                  <testExcludes>
+                    <!-- This pattern is brittle; we would prefer to filter on the directory
+                         but that seems to be unavailable to us. -->
+                    <exclude>**/*Java8Test.java</exclude>
+                  </testExcludes>
+                </configuration>
+              </execution>
+
+              <execution>
+                <id>java8-testCompile</id>
+                <phase>test-compile</phase>
+                <goals>
+                  <goal>testCompile</goal>
+                </goals>
+                <configuration>
+                  <testSource>1.8</testSource>
+                  <testTarget>1.8</testTarget>
+                  <testIncludes>
+                    <include>**/*Java8Test.java</include>
+                  </testIncludes>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
   </profiles>
 
   <build>
@@ -66,6 +137,7 @@
 
     <plugins>
       <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-compiler-plugin</artifactId>
       </plugin>
 
diff --git a/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/FilterJava8Test.java b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/FilterJava8Test.java
new file mode 100644
index 0000000000000..74bf5a654a205
--- /dev/null
+++ b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/FilterJava8Test.java
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+
+/**
+ * Java 8 Tests for {@link Filter}.
+ */
+@RunWith(JUnit4.class)
+@SuppressWarnings("serial")
+public class FilterJava8Test implements Serializable {
+
+  @Rule
+  public transient ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testIdentityFilterByPredicate() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(591, 11789, 1257, 24578, 24799, 307))
+        .apply(Filter.byPredicate(i -> true));
+
+    DataflowAssert.that(output).containsInAnyOrder(591, 11789, 1257, 24578, 24799, 307);
+    pipeline.run();
+  }
+
+  @Test
+  public void testNoFilterByPredicate() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 4, 5))
+        .apply(Filter.byPredicate(i -> false));
+
+    DataflowAssert.that(output).empty();
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testFilterByPredicate() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3, 4, 5, 6, 7))
+        .apply(Filter.byPredicate(i -> i % 2 == 0));
+
+    DataflowAssert.that(output).containsInAnyOrder(2, 4, 6);
+    pipeline.run();
+  }
+
+  /**
+   * Confirms that in Java 8 style, where a lambda results in a rawtype, the output type token is
+   * not useful. If this test ever fails there may be simplifications available to us.
+   */
+  @Test
+  public void testFilterParDoOutputTypeDescriptorRaw() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    PCollection<String> output = pipeline
+        .apply(Create.of("hello"))
+        .apply(Filter.by(s -> true));
+
+    thrown.expect(CannotProvideCoderException.class);
+    pipeline.getCoderRegistry().getDefaultCoder(output.getTypeDescriptor());
+  }
+}
diff --git a/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/PartitionJava8Test.java b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/PartitionJava8Test.java
new file mode 100644
index 0000000000000..c459ada0cec20
--- /dev/null
+++ b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/PartitionJava8Test.java
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+
+/**
+ * Java 8 Tests for {@link Filter}.
+ */
+@RunWith(JUnit4.class)
+@SuppressWarnings("serial")
+public class PartitionJava8Test implements Serializable {
+
+  @Rule
+  public transient ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void testModPartition() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollectionList<Integer> outputs = pipeline
+        .apply(Create.of(1, 2, 4, 5))
+        .apply(Partition.of(3, (element, numPartitions) -> element % numPartitions));
+    assertEquals(3, outputs.size());
+    DataflowAssert.that(outputs.get(0)).empty();
+    DataflowAssert.that(outputs.get(1)).containsInAnyOrder(1, 4);
+    DataflowAssert.that(outputs.get(2)).containsInAnyOrder(2, 5);
+    pipeline.run();
+  }
+
+  /**
+   * Confirms that in Java 8 style, where a lambda results in a rawtype, the output type token is
+   * not useful. If this test ever fails there may be simplifications available to us.
+   */
+  @Test
+  public void testPartitionFnOutputTypeDescriptorRaw() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollectionList<String> output = pipeline
+        .apply(Create.of("hello"))
+        .apply(Partition.of(1, (element, numPartitions) -> 0));
+
+    thrown.expect(CannotProvideCoderException.class);
+    pipeline.getCoderRegistry().getDefaultCoder(output.get(0).getTypeDescriptor());
+  }
+}

From 125623cdbe846e181deca2631f9e3826a637ddce Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 15 Sep 2015 21:38:36 -0700
Subject: [PATCH 1021/1541] Minor clean up of Partition and PartitionTest

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103162383
---
 .../dataflow/sdk/transforms/Partition.java    | 10 ++---
 .../sdk/transforms/PartitionTest.java         | 40 ++++++++++---------
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
index 73b28810970b8..bbbccbc75ddd6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
@@ -61,8 +61,7 @@
  * @param <T> the type of the elements of the input and output
  * {@code PCollection}s
  */
-public class Partition<T>
-    extends PTransform<PCollection<T>, PCollectionList<T>> {
+public class Partition<T> extends PTransform<PCollection<T>, PCollectionList<T>> {
 
   /**
    * A function object that chooses an output partition for an element.
@@ -97,6 +96,8 @@ public static <T> Partition<T> of(
     return new Partition<>(new PartitionDoFn<T>(numPartitions, partitionFn));
   }
 
+  /////////////////////////////////////////////////////////////////////////////
+
   @Override
   public PCollectionList<T> apply(PCollection<T> in) {
     final TupleTagList outputTags = partitionDoFn.getOutputTags();
@@ -119,8 +120,6 @@ public PCollectionList<T> apply(PCollection<T> in) {
     return pcs;
   }
 
-  /////////////////////////////////////////////////////////////////////////////
-
   private final transient PartitionDoFn<T> partitionDoFn;
 
   private Partition(PartitionDoFn<T> partitionDoFn) {
@@ -137,8 +136,7 @@ private static class PartitionDoFn<X> extends DoFn<X, Void> {
      *
      * @throws IllegalArgumentException if {@code numPartitions <= 0}
      */
-    public PartitionDoFn(
-        int numPartitions, PartitionFn<? super X> partitionFn) {
+    public PartitionDoFn(int numPartitions, PartitionFn<? super X> partitionFn) {
       if (numPartitions <= 0) {
         throw new IllegalArgumentException("numPartitions must be > 0");
       }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
index 36f5d8d872b9a..5121a0a88172a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionTest.java
@@ -19,6 +19,7 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
+import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
@@ -42,7 +43,8 @@
  */
 @RunWith(JUnit4.class)
 public class PartitionTest implements Serializable {
-  @Rule public ExpectedException expectedException = ExpectedException.none();
+
+  @Rule public transient ExpectedException thrown = ExpectedException.none();
 
   static class ModFn implements PartitionFn<Integer> {
     @Override
@@ -61,63 +63,63 @@ public int partitionFor(Integer elem, int numPartitions) {
   @Test
   @Category(RunnableOnService.class)
   public void testEvenOddPartition() {
-    TestPipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
-    PCollectionList<Integer> outputs = p
+    PCollectionList<Integer> outputs = pipeline
         .apply(Create.of(591, 11789, 1257, 24578, 24799, 307))
         .apply(Partition.of(2, new ModFn()));
     assertTrue(outputs.size() == 2);
     DataflowAssert.that(outputs.get(0)).containsInAnyOrder(24578);
     DataflowAssert.that(outputs.get(1)).containsInAnyOrder(591, 11789, 1257,
         24799, 307);
-    p.run();
+    pipeline.run();
   }
 
   @Test
   public void testModPartition() {
-    TestPipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
-    PCollectionList<Integer> outputs = p
+    PCollectionList<Integer> outputs = pipeline
         .apply(Create.of(1, 2, 4, 5))
         .apply(Partition.of(3, new ModFn()));
     assertTrue(outputs.size() == 3);
     DataflowAssert.that(outputs.get(0)).empty();
     DataflowAssert.that(outputs.get(1)).containsInAnyOrder(1, 4);
     DataflowAssert.that(outputs.get(2)).containsInAnyOrder(2, 5);
-    p.run();
+    pipeline.run();
   }
 
   @Test
   public void testOutOfBoundsPartitions() {
-    TestPipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
-    p
+    pipeline
     .apply(Create.of(-1))
     .apply(Partition.of(5, new IdentityFn()));
 
-    expectedException.expect(RuntimeException.class);
-    expectedException.expectMessage(
+    thrown.expect(RuntimeException.class);
+    thrown.expectMessage(
         "Partition function returned out of bounds index: -1 not in [0..5)");
-    p.run();
+    pipeline.run();
   }
 
   @Test
   public void testZeroNumPartitions() {
-    TestPipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
-    PCollection<Integer> input = p.apply(Create.of(591));
+    PCollection<Integer> input = pipeline.apply(Create.of(591));
 
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("numPartitions must be > 0");
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("numPartitions must be > 0");
     input.apply(Partition.of(0, new IdentityFn()));
   }
 
   @Test
   public void testDroppedPartition() {
-    TestPipeline p = TestPipeline.create();
+    Pipeline pipeline = TestPipeline.create();
 
     // Compute the set of integers either 1 or 2 mod 3, the hard way.
-    PCollectionList<Integer> outputs = p
+    PCollectionList<Integer> outputs = pipeline
         .apply(Create.of(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))
         .apply(Partition.of(3, new ModFn()));
 
@@ -128,7 +130,7 @@ public void testDroppedPartition() {
 
     PCollection<Integer> output = outputs.apply(Flatten.<Integer>pCollections());
     DataflowAssert.that(output).containsInAnyOrder(2, 4, 5, 7, 8, 10, 11);
-    p.run();
+    pipeline.run();
   }
 
   @Test

From cc60e45c81562e7649cd9bd16c42de56732a158e Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Wed, 16 Sep 2015 01:23:20 -0700
Subject: [PATCH 1022/1541] Adds dynamic work rebalancing support for
 ConcatReader.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103172962
---
 .../sdk/runners/worker/ConcatReader.java      | 103 ++++++++++++--
 .../sdk/runners/worker/ConcatReaderTest.java  | 127 +++++++++++++++++-
 .../sdk/runners/worker/ReaderTestUtils.java   |  17 +++
 3 files changed, 231 insertions(+), 16 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
index 6ace2f3c89c32..a1d475c851170 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
@@ -16,13 +16,24 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
+import static com.google.common.base.Preconditions.checkNotNull;
+
 import com.google.api.client.util.Preconditions;
+import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.ConcatPosition;
 import com.google.api.services.dataflow.model.Source;
+import com.google.cloud.dataflow.sdk.io.range.OffsetRangeTracker;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.DataflowReaderProgress;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.util.Iterator;
@@ -30,19 +41,25 @@
 import java.util.NoSuchElementException;
 
 /**
- * A {@link Reader} that reads elements from a given set of encoded {@code Source}s. Creates {@code
- * Reader}s for sources lazily, i.e. only when elements from the particular {@code Reader} are about
- * to be read.
+ * A {@link Reader} that reads elements from a given set of encoded {@link Source}s. Creates
+ * {@code Reader}s for sources lazily, i.e. only when elements from the particular {@code Reader}
+ * are about to be read.
+ *
+ * <p>This class does does not cache {@code Reader}s and instead creates new set of {@code Reader}s
+ * for every new {@link ConcatIterator}. Because of this, multiple {@code ConcatIterator}s
+ * created using the same {@code ConcatReader} will not be able to share any state between each
+ * other. This design was chosen since keeping a large number of {@code Reader} objects alive within
+ * a single {@code ConcatReader} could be highly memory consuming.
  *
- * <p>This class does does not cache {@code Reader}s and creates a new {@code Reader} every time a
- * new {@code ReaderIterator} has to be created. Because of this, multiple iterators created using
- * the same {@code ConcatReader} will not be able to share any state between each other. This design
- * was chosen since keeping a large number of {@code Reader} objects alive within a single
- * {@code ConcatReader} could be highly memory consuming.
+ * <p> For progress reporting and dynamic work rebalancing purposes, {@code ConcatIterator} uses a
+ * position of type {@link ConcatPosition}. Progress reporting and dynamic work rebalancing
+ * currently work only at the granularity of full sources being concatenated.
  *
  * @param <T> Type of the elements read by the {@code Reader}s.
  */
 public class ConcatReader<T> extends Reader<T> {
+  private static final Logger LOG = LoggerFactory.getLogger(ConcatReader.class);
+
   public static final String SOURCE_NAME = "ConcatSource";
 
   private final List<Source> sources;
@@ -84,6 +101,8 @@ private static class ConcatIterator<T> extends AbstractBoundedReaderIterator<T>
     private final ExecutionContext executionContext;
     private final CounterSet.AddCounterMutator addCounterMutator;
     private final String operationName;
+    private final OffsetRangeTracker rangeTracker;
+    private boolean isAtFirstRecordInCurrentSource = true;
 
     public ConcatIterator(
         PipelineOptions options, ExecutionContext executionContext,
@@ -94,13 +113,14 @@ public ConcatIterator(
       this.executionContext = executionContext;
       this.addCounterMutator = addCounterMutator;
       this.operationName = operationName;
+      this.rangeTracker = new OffsetRangeTracker(0, sources.size());
     }
 
     @Override
     protected boolean hasNextImpl() throws IOException {
       for (;;) {
         if (currentIterator != null && currentIterator.hasNext()) {
-          return true;
+          break;
         }
 
         if (currentIterator != null) {
@@ -120,14 +140,18 @@ protected boolean hasNextImpl() throws IOException {
               (Reader<T>) ReaderFactory.create(options, currentSource, executionContext,
                   addCounterMutator, operationName);
           currentIterator = currentReader.iterator();
+          isAtFirstRecordInCurrentSource = true;
         } catch (Exception e) {
           throw new IOException("Failed to create a reader for source: " + currentSource, e);
         }
       }
+
+      return rangeTracker.tryReturnRecordAt(isAtFirstRecordInCurrentSource, currentIteratorIndex);
     }
 
     @Override
     protected T nextImpl() throws IOException, NoSuchElementException {
+      isAtFirstRecordInCurrentSource = false;
       return currentIterator.next();
     }
 
@@ -137,5 +161,66 @@ public void close() throws IOException {
         currentIterator.close();
       }
     }
+
+    @Override
+    public Progress getProgress() {
+      if (currentIteratorIndex < 0) {
+        // Reading has not been started yet.
+        return null;
+      }
+
+      ConcatPosition concatPosition = new ConcatPosition();
+      concatPosition.setIndex(currentIteratorIndex);
+      Progress progressOfCurrentIterator = currentIterator.getProgress();
+      if (!(progressOfCurrentIterator instanceof DataflowReaderProgress)) {
+        throw new IllegalArgumentException("Cannot process progress " + progressOfCurrentIterator
+            + " since ConcatReader can only handle readers that generate a progress of type "
+            + "DataflowReaderProgress");
+      }
+      com.google.api.services.dataflow.model.Position positionOfCurrentIterator =
+          ((DataflowReaderProgress) progressOfCurrentIterator).cloudProgress.getPosition();
+
+      if (positionOfCurrentIterator != null) {
+        concatPosition.setPosition(positionOfCurrentIterator);
+      }
+
+      ApproximateProgress progress = new ApproximateProgress();
+      com.google.api.services.dataflow.model.Position currentPosition =
+          new com.google.api.services.dataflow.model.Position();
+      currentPosition.setConcatPosition(concatPosition);
+      progress.setPosition(currentPosition);
+
+      return cloudProgressToReaderProgress(progress);
+    }
+
+    @Override
+    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
+      checkNotNull(splitRequest);
+
+      ApproximateProgress splitProgress = splitRequestToApproximateProgress(splitRequest);
+      com.google.api.services.dataflow.model.Position cloudPosition = splitProgress.getPosition();
+      if (cloudPosition == null) {
+        LOG.warn("Concat only supports split at a Position. Requested: {}", splitRequest);
+        return null;
+      }
+
+      ConcatPosition concatPosition = cloudPosition.getConcatPosition();
+      if (concatPosition == null) {
+        LOG.warn(
+            "ConcatReader only supports split at a ConcatPosition. Requested: {}", cloudPosition);
+        return null;
+      }
+
+      if (rangeTracker.trySplitAtPosition(concatPosition.getIndex())) {
+        com.google.api.services.dataflow.model.Position positionToSplit =
+            new com.google.api.services.dataflow.model.Position();
+        positionToSplit.setConcatPosition(
+            new ConcatPosition().setIndex((int) rangeTracker.getStopPosition().longValue()));
+        return new DynamicSplitResultWithPosition(cloudPositionToReaderPosition(positionToSplit));
+      } else {
+        LOG.debug("Could not perform the dynamic split request " + splitRequest);
+        return null;
+      }
+    }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
index b0195b99352c1..d8700d9c9e1f5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
@@ -16,14 +16,19 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromSplitResult;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readFully;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtConcatPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.fail;
 
+import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
@@ -31,6 +36,7 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader.DynamicSplitResult;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader.ReaderIterator;
 
 import org.junit.Before;
@@ -197,15 +203,21 @@ private Source createSourceForTestReader(TestReader<String> testReader) {
     return source;
   }
 
-  private void testReadersOfSizes(int... recordsPerReader) throws Exception {
+  private ConcatReader<String> createConcatReadersOfSizes(
+      List<String> expected, int... recordsPerReader) throws Exception {
     List<Source> sourceList = new ArrayList<>();
-    List<String> expected = new ArrayList<>();
+
     for (int items : recordsPerReader) {
       sourceList.add(createSourceForTestReader(createTestReader(items/* recordsPerReader */,
           -1/* recordToFailAt */, false/* failWhenClosing */, expected)));
     }
-    ConcatReader<String> concatReader = new ConcatReader<>(null, null, null, null, sourceList);
+    return new ConcatReader<String>(null /* options */, null /* executionContext */,
+        null /* addCounterMutator */, null /* operationName */, sourceList);
+  }
 
+  private void testReadersOfSizes(int... recordsPerReader) throws Exception {
+    List<String> expected = new ArrayList<>();
+    ConcatReader<String> concatReader = createConcatReadersOfSizes(expected, recordsPerReader);
     List<String> actual = new ArrayList<>();
     readFully(concatReader, actual);
     assertThat(actual, containsInAnyOrder(expected.toArray()));
@@ -216,12 +228,14 @@ private void testReadersOfSizes(int... recordsPerReader) throws Exception {
   @Test
   public void testCreateFromNull() throws Exception {
     expectedException.expect(NullPointerException.class);
-    new ConcatReader<String>(null, null, null, null, null);
+    new ConcatReader<String>(null /* options */, null /* executionContext */,
+        null /* addCounterMutator */, null /* operationName */, null /* sources */);
   }
 
   @Test
   public void testReadEmptyList() throws Exception {
-    ConcatReader<String> concat = new ConcatReader<>(null, null, null, null,
+    ConcatReader<String> concat = new ConcatReader<>(null /* options */,
+        null /* executionContext */, null /* addCounterMutator */, null /* operationName */,
         new ArrayList<Source>());
     ReaderIterator<String> iterator = concat.iterator();
     assertNotNull(iterator);
@@ -272,7 +286,9 @@ public void testAReaderFailsAtClose() throws Exception {
         createSourceForTestReader(createTestReader(10/* recordsPerReader */, -1/* recordToFailAt */,
             false/* failWhenClosing */, new ArrayList<String>())));
 
-    ConcatReader<String> concatReader = new ConcatReader<>(null, null, null, null, sources);
+    ConcatReader<String> concatReader = new ConcatReader<>(null /* options */,
+        null /* executionContext */, null /* addCounterMutator */,
+        null /* operationName */, sources);
     List<String> actual = new ArrayList<>();
     try {
       readFully(concatReader, actual);
@@ -297,7 +313,9 @@ public void testReaderFailsAtRead() throws Exception {
     expected = expected.subList(0, 16);
     assertEquals(16, expected.size());
 
-    ConcatReader<String> concatReader = new ConcatReader<>(null, null, null, null, sources);
+    ConcatReader<String> concatReader = new ConcatReader<>(null  /* options */,
+        null  /* executionContext */, null  /* addCounterMutator */,
+        null  /* operationName */, sources);
     List<String> actual = new ArrayList<>();
     try {
       readFully(concatReader, actual);
@@ -309,4 +327,99 @@ public void testReaderFailsAtRead() throws Exception {
       assertAllOpenReadersClosed(recordedReaders);
     }
   }
+
+  private void runProgressTest(int... sizes) throws Exception {
+    ConcatReader<String> concatReader = createConcatReadersOfSizes(new ArrayList<String>(), sizes);
+    try (Reader.ReaderIterator<String> iterator = concatReader.iterator()) {
+      for (int readerIndex = 0; readerIndex < sizes.length; readerIndex++) {
+        for (int recordIndex = 0; recordIndex < sizes[readerIndex]; recordIndex++) {
+          iterator.next();
+          ApproximateProgress progress = readerProgressToCloudProgress(iterator.getProgress());
+          assertEquals(
+              readerIndex, progress.getPosition().getConcatPosition().getIndex().intValue());
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testGetProgressSingle() throws Exception {
+    runProgressTest(10);
+  }
+
+  @Test
+  public void testGetProgressSameSize() throws Exception {
+    runProgressTest(10, 10, 10);
+  }
+
+  @Test
+  public void testGetProgressDifferentSizes() throws Exception {
+    runProgressTest(10, 30, 20, 15, 7);
+  }
+
+  // This is an exhaustive test for method ConcatIterator#splitAtPosition.
+  // Given an array of reader sizes of length 's' this method exhaustively create ConcatReaders that
+  // have read up to every possible position. For each position 'p', this method creates a set of
+  // ConcatReaders of size 's+1' that have read up to position 'p' and tests splitting those
+  // ConcatReaders for index positions in the range [0, s].
+  public void runUpdateStopPositionTest(int... readerSizes) throws Exception {
+    ConcatReader<String> concatReader =
+        createConcatReadersOfSizes(new ArrayList<String>(), readerSizes);
+
+    // This includes indexToSplit == sizes.length case to test out of range split requests.
+    for (int indexToSplit = 0; indexToSplit <= readerSizes.length; indexToSplit++) {
+      int recordsToRead = -1; // Number of records to read from the ConcatReader before splitting.
+      for (int readerIndex = 0; readerIndex < readerSizes.length; readerIndex++) {
+        for (int recordIndex = 0; recordIndex <= readerSizes[readerIndex]; recordIndex++) {
+          if (readerIndex > 0 && recordIndex == 0) {
+            // This is an invalid state as far as ConcatReader is concerned.
+            // When we have not read any records from the reader at 'readerIndex', current reader
+            // should be the reader at 'readerIndex - 1'.
+            continue;
+          }
+
+          recordsToRead++;
+
+          ReaderIterator<String> iterator = concatReader.iterator();
+          for (int i = 0; i < recordsToRead; i++) {
+            iterator.next();
+          }
+
+          DynamicSplitResult splitResult =
+              iterator.requestDynamicSplit(splitRequestAtConcatPosition(indexToSplit, null));
+
+          // We will not be able to successfully perform the request to dynamically split (and hence
+          // splitResult will be null) in following cases.
+          // * recordsToRead == 0 - ConcatReader has not started reading
+          // * readerIndex >= indexToSplit - ConcatReader has already read at least one record from
+          //   reader proposed in the split request.
+          // * indexToSplit < 0 || indexToSplit >= sizes.length - split position is out of range
+
+          if ((recordsToRead == 0) || (readerIndex >= indexToSplit)
+              || (indexToSplit < 0 || indexToSplit >= readerSizes.length)) {
+            assertNull(splitResult);
+          } else {
+            assertEquals(
+                indexToSplit,
+                positionFromSplitResult(splitResult).getConcatPosition().getIndex().intValue());
+          }
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testUpdateStopPositionSingle() throws Exception {
+    runUpdateStopPositionTest(10);
+  }
+
+  @Test
+  public void testUpdateStopPositionSameSize() throws Exception {
+    runUpdateStopPositionTest(10, 10, 10);
+  }
+
+  @Test
+  public void testUpdateStopPositionDifferentSizes() throws Exception {
+    runUpdateStopPositionTest(10, 30, 20, 15, 7);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
index 7d9f397d10557..16c7242618c12 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
@@ -20,6 +20,7 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toDynamicSplitRequest;
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.ConcatPosition;
 import com.google.api.services.dataflow.model.Position;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader.ReaderIterator;
@@ -42,6 +43,12 @@ public static Position positionAtByteOffset(@Nullable Long byteOffset) {
     return new Position().setByteOffset(byteOffset);
   }
 
+  public static Position positionAtConcatPosition(
+      @Nullable Integer index, @Nullable Position innerPosition) {
+    return new Position().setConcatPosition(
+        new ConcatPosition().setIndex(index).setPosition(innerPosition));
+  }
+
   public static ApproximateProgress approximateProgressAtPosition(@Nullable Position position) {
     return new ApproximateProgress().setPosition(position);
   }
@@ -54,6 +61,11 @@ public static ApproximateProgress approximateProgressAtByteOffset(@Nullable Long
     return approximateProgressAtPosition(positionAtByteOffset(byteOffset));
   }
 
+  public static ApproximateProgress approximateProgressAtConcatPosition(
+      @Nullable Integer index, @Nullable Position innerPosition) {
+    return approximateProgressAtPosition(positionAtConcatPosition(index, innerPosition));
+  }
+
   public static ApproximateProgress approximateProgressAtFraction(@Nullable Float fraction) {
     return new ApproximateProgress().setPercentComplete(fraction);
   }
@@ -70,6 +82,11 @@ public static Reader.DynamicSplitRequest splitRequestAtByteOffset(@Nullable Long
     return toDynamicSplitRequest(approximateProgressAtByteOffset(byteOffset));
   }
 
+  public static Reader.DynamicSplitRequest splitRequestAtConcatPosition(
+      @Nullable Integer index, @Nullable Position innerPosition) {
+    return toDynamicSplitRequest(approximateProgressAtConcatPosition(index, innerPosition));
+  }
+
   public static Position positionFromSplitResult(Reader.DynamicSplitResult dynamicSplitResult) {
     return toCloudPosition(
         ((Reader.DynamicSplitResultWithPosition) dynamicSplitResult).getAcceptedPosition());

From 83bc1e7bd4aeb1062e37f8bdd88f1618f35fda5f Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 16 Sep 2015 10:21:23 -0700
Subject: [PATCH 1023/1541] ParDo: cleanup

1. Make all fields private final, to enforce the Builder notation.
2. Make one withSideInputs variant call the other, which makes
   future updates easier.
3. One Java warning fix.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103205447
---
 .../cloud/dataflow/sdk/transforms/ParDo.java  | 51 +++++++++----------
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index d9936bde20e14..59684f5fa0c19 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -43,6 +43,7 @@
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Maps;
 
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
@@ -567,13 +568,14 @@ public static <InputT, OutputT> Bound<InputT, OutputT> of(DoFnWithContext<InputT
    * bind the input/output types of this {@code PTransform}.
    */
   public static class Unbound {
-    String name;
-    List<PCollectionView<?>> sideInputs = Collections.emptyList();
+    private final String name;
+    private final List<PCollectionView<?>> sideInputs;
 
-    Unbound() {}
+    Unbound() {
+      this(null, ImmutableList.<PCollectionView<?>>of());
+    }
 
-    Unbound(String name,
-            List<PCollectionView<?>> sideInputs) {
+    Unbound(String name, List<PCollectionView<?>> sideInputs) {
       this.name = name;
       this.sideInputs = sideInputs;
     }
@@ -599,7 +601,7 @@ public Unbound named(String name) {
      * {@link ParDo#withSideInputs} for more explanation.
      */
     public Unbound withSideInputs(PCollectionView<?>... sideInputs) {
-      return new Unbound(name, ImmutableList.copyOf(sideInputs));
+      return withSideInputs(Arrays.asList(sideInputs));
     }
 
     /**
@@ -670,8 +672,8 @@ public <InputT, OutputT> Bound<InputT, OutputT> of(DoFnWithContext<InputT, Outpu
   public static class Bound<InputT, OutputT>
       extends PTransform<PCollection<? extends InputT>, PCollection<OutputT>> {
     // Inherits name.
-    List<PCollectionView<?>> sideInputs;
-    DoFn<InputT, OutputT> fn;
+    private final List<PCollectionView<?>> sideInputs;
+    private final DoFn<InputT, OutputT> fn;
 
     Bound(String name,
           List<PCollectionView<?>> sideInputs,
@@ -701,7 +703,7 @@ public Bound<InputT, OutputT> named(String name) {
      * {@link ParDo#withSideInputs} for more explanation.
      */
     public Bound<InputT, OutputT> withSideInputs(PCollectionView<?>... sideInputs) {
-      return new Bound<>(name, ImmutableList.copyOf(sideInputs), fn);
+      return withSideInputs(Arrays.asList(sideInputs));
     }
 
     /**
@@ -733,9 +735,6 @@ public BoundMulti<InputT, OutputT> withOutputTags(TupleTag<OutputT> mainOutputTa
 
     @Override
     public PCollection<OutputT> apply(PCollection<? extends InputT> input) {
-      if (sideInputs == null) {
-        sideInputs = Collections.emptyList();
-      }
       return PCollection.<OutputT>createPrimitiveOutputInternal(
               input.getPipeline(),
               input.getWindowingStrategy(),
@@ -783,10 +782,10 @@ public List<PCollectionView<?>> getSideInputs() {
    * @param <OutputT> the type of the main output {@code PCollection} elements
    */
   public static class UnboundMulti<OutputT> {
-    String name;
-    List<PCollectionView<?>> sideInputs;
-    TupleTag<OutputT> mainOutputTag;
-    TupleTagList sideOutputTags;
+    private final String name;
+    private final List<PCollectionView<?>> sideInputs;
+    private final TupleTag<OutputT> mainOutputTag;
+    private final TupleTagList sideOutputTags;
 
     UnboundMulti(String name,
                  List<PCollectionView<?>> sideInputs,
@@ -821,9 +820,7 @@ public UnboundMulti<OutputT> named(String name) {
      */
     public UnboundMulti<OutputT> withSideInputs(
         PCollectionView<?>... sideInputs) {
-      return new UnboundMulti<>(
-          name, ImmutableList.copyOf(sideInputs),
-          mainOutputTag, sideOutputTags);
+      return withSideInputs(Arrays.asList(sideInputs));
     }
 
     /**
@@ -882,10 +879,10 @@ public <InputT> BoundMulti<InputT, OutputT> of(DoFnWithContext<InputT, OutputT>
   public static class BoundMulti<InputT, OutputT>
       extends PTransform<PCollection<? extends InputT>, PCollectionTuple> {
     // Inherits name.
-    List<PCollectionView<?>> sideInputs;
-    TupleTag<OutputT> mainOutputTag;
-    TupleTagList sideOutputTags;
-    DoFn<InputT, OutputT> fn;
+    private final List<PCollectionView<?>> sideInputs;
+    private final TupleTag<OutputT> mainOutputTag;
+    private final TupleTagList sideOutputTags;
+    private final DoFn<InputT, OutputT> fn;
 
     BoundMulti(String name,
                List<PCollectionView<?>> sideInputs,
@@ -921,9 +918,7 @@ public BoundMulti<InputT, OutputT> named(String name) {
      */
     public BoundMulti<InputT, OutputT> withSideInputs(
         PCollectionView<?>... sideInputs) {
-      return new BoundMulti<>(
-          name, ImmutableList.copyOf(sideInputs),
-          mainOutputTag, sideOutputTags, fn);
+      return withSideInputs(Arrays.asList(sideInputs));
     }
 
     /**
@@ -968,10 +963,12 @@ protected Coder<OutputT> getDefaultOutputCoder() {
     public <T> Coder<T> getDefaultOutputCoder(
         PCollection<? extends InputT> input, TypedPValue<T> output)
         throws CannotProvideCoderException {
+      @SuppressWarnings("unchecked")
+      Coder<InputT> inputCoder = ((PCollection<InputT>) input).getCoder();
       return input.getPipeline().getCoderRegistry().getDefaultCoder(
           output.getTypeDescriptor(),
           fn.getInputTypeDescriptor(),
-          ((PCollection<InputT>) input).getCoder());
+          inputCoder);
       }
 
     @Override

From dcc06f90850151291b02700a69a8a5c93b2fd9b2 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 17 Sep 2015 07:44:08 -0700
Subject: [PATCH 1024/1541] Minor clean up of CoderRegistry

No functional changes. Makes private things private; moves
private and public bits nearer each other; improves some
javadoc.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103289378
---
 .../dataflow/sdk/coders/CoderRegistry.java    | 384 +++++++++---------
 1 file changed, 200 insertions(+), 184 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index e0cefe891a41b..217f26df13b5f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -24,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Maps;
@@ -45,23 +46,23 @@
 import java.util.Map;
 import java.util.Set;
 
+import javax.annotation.Nullable;
+
 /**
- * A {@code CoderRegistry} allows registering the
- * default {@link Coder} to use for a Java class,
- * and looking up and instantiating the default
- * {@link Coder} for a Java type.
+ * A {@code CoderRegistry} allows registering the default {@link Coder} to use for a Java class,
+ * and looking up and instantiating the default {@link Coder} for a Java type.
  *
- * <p>{@code CoderRegistry} uses the following mechanisms to determine a
- * default {@link Coder} for a Java class, in order of precedence:
- * <ul>
- *   <li> Registration: coders can be registered explicitly via
- *        {@link #registerCoder}.  Built-in types are registered via
- *        {@link #registerStandardCoders()}.
- *   <li> Annotations: {@link DefaultCoder} can be used to annotate a type with
- *        the default {@code Coder} type.
- *   <li> Inheritance: {@code Serializable} objects are given a default
- *        {@code Coder} of {@link SerializableCoder}.
- * </ul>
+ * <p>{@code CoderRegistry} uses the following mechanisms to determine a default {@link Coder} for a
+ * Java class, in order of precedence:
+ * <ol>
+ *   <li>Registration: A {@link Coder} class can be registered explicitly via
+ *       {@link #registerCoder}.  Built-in types are registered via
+ *       {@link #registerStandardCoders()}.
+ *   <li>Annotations: {@link DefaultCoder} can be used to annotate a type with
+ *       the default {@code Coder} type.
+ *   <li>Inheritance: {@code Serializable} objects are given a default
+ *       {@code Coder} of {@link SerializableCoder}.
+ * </ol>
  */
 public class CoderRegistry implements CoderProvider {
 
@@ -107,18 +108,18 @@ public void registerStandardCoders() {
    * takes no generic type parameters, then the {@code of()} factory method should have no
    * arguments.
    *
-   * <p>If {@code T} is a parameterized type, then it should additionally
-   * have a method with the following signature:
+   * <p>If {@code T} is a parameterized type, then it should additionally have a method with the
+   * following signature:
    *
    * <pre> {@code
    * public static List<Object> getInstanceComponents(T exampleValue);
    * } </pre>
    *
-   * <p>This method will be called to decompose a value during the coder
-   * inference process, to automatically choose coders for the components.
+   * <p>This method will be called to decompose a value during the {@link Coder} inference process,
+   * to automatically choose {@link Coder Coders} for the components.
    *
    * @param clazz the class of objects to be encoded
-   * @param coderClazz a class with static factory methods to provide coders
+   * @param coderClazz a class with static factory methods to provide {@link Coder Coders}
    */
   public void registerCoder(Class<?> clazz, Class<?> coderClazz) {
     registerCoder(clazz, CoderFactories.fromStaticMethods(coderClazz));
@@ -152,7 +153,7 @@ public <T> void registerCoder(Class<T> rawClazz, Coder<T> coder) {
   }
 
   /**
-   * Returns the Coder to use by default for values of the given type.
+   * Returns the {@link Coder} to use by default for values of the given type.
    *
    * @throws CannotProvideCoderException if there is no default Coder.
    */
@@ -171,8 +172,8 @@ public <T> Coder<T> getCoder(TypeDescriptor<T> typeDescriptor)
   }
 
   /**
-   * Returns the Coder to use by default for values of the given type,
-   * where the given input type uses the given Coder.
+   * Returns the {@link Coder} to use by default for values of the given type, where the given input
+   * type uses the given {@link Coder}.
    *
    * @throws CannotProvideCoderException if there is no default Coder.
    */
@@ -186,8 +187,8 @@ public <InputT, OutputT> Coder<OutputT> getDefaultCoder(
   }
 
   /**
-   * Returns the Coder to use on elements produced by this function, given
-   * the coder used for its input elements.
+   * Returns the {@link Coder} to use on elements produced by this function, given the {@link Coder}
+   * used for its input elements.
    */
   public <InputT, OutputT> Coder<OutputT> getDefaultOutputCoder(
       SerializableFunction<InputT, OutputT> fn, Coder<InputT> inputCoder)
@@ -204,9 +205,8 @@ public <InputT, OutputT> Coder<OutputT> getDefaultOutputCoder(
   }
 
   /**
-   * Returns the Coder to use for the specified type parameter specialization
-   * of the subclass, given Coders to use for all other type parameters
-   * (if any).
+   * Returns the {@link Coder} to use for the specified type parameter specialization of the
+   * subclass, given {@link Coder Coders} to use for all other type parameters (if any).
    *
    * @throws CannotProvideCoderException if there is no default Coder.
    */
@@ -230,11 +230,10 @@ public <T, OutputT> Coder<OutputT> getDefaultCoder(
   }
 
   /**
-   * Returns the Coder to use for the provided example value, if it can
-   * be determined.
+   * Returns the {@link Coder} to use for the provided example value, if it can be determined.
    *
-   * @throws CannotProvideCoderException if there is no default Coder or
-   * more than one coder matches
+   * @throws CannotProvideCoderException if there is no default {@link Coder} or
+   * more than one {@link Coder} matches
    */
   public <T> Coder<T> getDefaultCoder(T exampleValue) throws CannotProvideCoderException {
     Class<?> clazz = exampleValue == null ? Void.class : exampleValue.getClass();
@@ -254,7 +253,7 @@ public <T> Coder<T> getDefaultCoder(T exampleValue) throws CannotProvideCoderExc
             "Cannot provide coder based on value with class "
             + clazz + ": The registered CoderFactory with class "
             + factory.getClass() + " failed to decompose the value, "
-            + "which is required in order to provide coders for the components.");
+            + "which is required in order to provide Coders for the components.");
       }
 
       // componentcoders = components.map(this.getDefaultCoder)
@@ -269,7 +268,7 @@ public <T> Coder<T> getDefaultCoder(T exampleValue) throws CannotProvideCoderExc
         }
       }
 
-      // Trust that factory.create maps from valid component coders
+      // Trust that factory.create maps from valid component Coders
       // to a valid Coder<T>.
       @SuppressWarnings("unchecked")
       Coder<T> coder = (Coder<T>) factory.create(componentCoders);
@@ -278,33 +277,126 @@ public <T> Coder<T> getDefaultCoder(T exampleValue) throws CannotProvideCoderExc
   }
 
   /**
-   * Returns a Map from each of baseClass's type parameters to the Coder to
-   * use by default for it, in the context of subClass's specialization of
-   * baseClass.
+   * Returns the {@link Coder} to use by default for values of the given class. The following three
+   * sources for a {@link Coder} will be attempted, in order:
+   *
+   * <ol>
+   *   <li>A {@link Coder} class registered explicitly via a call to {@link #registerCoder},
+   *   <li>A {@link DefaultCoder} annotation on the class,
+   *   <li>This registry's fallback {@link CoderProvider}, which may be able to generate a
+   *   {@link Coder} for an arbitrary class.
+   * </ol>
+   *
+   * @throws CannotProvideCoderException if a {@link Coder} cannot be provided
+   */
+  public <T> Coder<T> getDefaultCoder(Class<T> clazz) throws CannotProvideCoderException {
+    if (clazz.getTypeParameters().length > 0) {
+      throw new IllegalArgumentException(
+          "CoderRegistry.getDefaultCoder(Class) cannot be used with parameterized types due to "
+              + "erasure. Instead of getDefaultCoder(" + clazz.getSimpleName() + ") "
+              + "use getDefaultCoder(new TypeDescriptor<" + clazz.getSimpleName() + "<...>>(){}).");
+    }
+
+    try {
+      CoderFactory coderFactory = getDefaultCoderFactory(clazz);
+      LOG.debug("Default coder for {} found by factory", clazz);
+      @SuppressWarnings("unchecked")
+      Coder<T> coder = (Coder<T>) coderFactory.create(Collections.<Coder<?>>emptyList());
+      return coder;
+    } catch (CannotProvideCoderException exc) {
+      // try other ways of finding one
+    }
+
+    DefaultCoder defaultAnnotation = clazz.getAnnotation(
+        DefaultCoder.class);
+    if (defaultAnnotation != null) {
+      LOG.debug("Default coder for {} found by DefaultCoder annotation", clazz);
+      @SuppressWarnings("unchecked")
+      Coder<T> coder = InstanceBuilder.ofType(Coder.class)
+          .fromClass(defaultAnnotation.value())
+          .fromFactoryMethod("of")
+          .withArg(Class.class, clazz)
+          .build();
+      return coder;
+    }
+
+    if (getFallbackCoderProvider() != null) {
+      try {
+        return getFallbackCoderProvider().getCoder(TypeDescriptor.<T>of(clazz));
+      } catch (CannotProvideCoderException exc) {
+        throw new CannotProvideCoderException(
+            "Cannot provide coder for class " + clazz + " because "
+            + "it has no " + CoderFactory.class.getSimpleName() + " registered, "
+            + "it has no " + DefaultCoder.class.getSimpleName() + " annotation, "
+            + "and the fallback " + CoderProvider.class.getSimpleName()
+            + " could not automatically create a Coder.",
+            exc);
+      }
+    } else {
+      throw new CannotProvideCoderException(
+            "Cannot provide coder for class " + clazz + " because "
+            + "it has no " + CoderFactory.class.getSimpleName() + " registered, "
+            + "it has no " + DefaultCoder.class.getSimpleName() + " annotation, "
+            + "and there is no fallback CoderProvider configured.");
+    }
+  }
+
+  /**
+   * Sets the fallback {@link CoderProvider} for this registry. If no other method succeeds in
+   * providing a {@code Coder<T>} for a type {@code T}, then the registry will attempt to create
+   * a {@link Coder} using this {@link CoderProvider}.
+   *
+   * <p>By default, this is set to {@link SerializableCoder#PROVIDER}.
+   *
+   * <p>See {@link #getFallbackCoderProvider}.
+   */
+  public void setFallbackCoderProvider(CoderProvider coderProvider) {
+    fallbackCoderProvider = coderProvider;
+  }
+
+  /**
+   * Returns the fallback {@link CoderProvider} for this registry.
+   *
+   * <p>See {@link #setFallbackCoderProvider}.
+   */
+  public CoderProvider getFallbackCoderProvider() {
+    return fallbackCoderProvider;
+  }
+
+  /**
+   * Returns a {@code Map} from each of {@code baseClass}'s type parameters to the {@link Coder} to
+   * use by default for it, in the context of {@code subClass}'s specialization of
+   * {@code baseClass}.
+   *
+   * <p>If no {@link Coder} can be inferred for a particular type parameter, then that type variable
+   * will be absent from the returned {@code Map}.
    *
-   * <p>If no coder can be inferred for a particular type parameter,
-   * then that type variable will be absent from the returned map.
+   * <p>For example, if {@code baseClass} is {@code Map.class}, where {@code Map<K, V>} has type
+   * parameters {@code K} and {@code V}, and {@code subClass} extends {@code Map<String, Integer>}
+   * then the result will map the type variable {@code K} to a {@code Coder<String>} and the
+   * type variable {@code V} to a {@code Coder<Integer>}.
    *
-   * <p>For example, if baseClass is Map.class and subClass extends
-   * {@code Map<String, Integer>} then this will return the registered Coders
-   * to use for String and Integer as a {"K": stringCoder, "V": intCoder} Map.
-   * The knownCoders parameter can be used to provide known coders for any of
-   * the parameters that will be used to infer the others.
+   * <p>The {@code knownCoders} parameter can be used to provide known {@link Coder Coders} for any
+   * of the parameters; these will be used to infer the others.
    *
-   * <p>Note that inference is attempted for every type variable.
-   * For a type {@code MyType<One, Two, Three>} inference will will be
-   * attempted for all of {@code One}, {@code Two}, {@code Three},
-   * even if the requester only wants a coder for {@code Two}.
+   * <p>Note that inference is attempted for every type variable. For a type
+   * {@code MyType<One, Two, Three>} inference will be attempted for all of {@code One},
+   * {@code Two}, {@code Three}, even if the requester only wants a {@link Coder} for {@code Two}.
    *
-   * <p>For this reason, {@code getDefaultCoders} (plural) does not throw
-   * an exception if a coder for a particular type variable cannot be
-   * desired coder or throw a {@link CannotProvideCoderException} when appropriate.
+   * <p>For this reason {@code getDefaultCoders} (plural) does not throw an exception if a
+   * {@link Coder} for a particular type variable cannot be inferred, but merely omits the entry
+   * from the returned {@code Map}. It is the responsibility of the caller (usually
+   * {@link #getDefaultCoder} to extract the desired coder or throw a
+   * {@link CannotProvideCoderException} when appropriate.
    *
    * @param subClass the concrete type whose specializations are being inferred
    * @param baseClass the base type, a parameterized class
-   * @param knownCoders a map corresponding to the set of known coders indexed
-   *        by parameter name
+   * @param knownCoders a map corresponding to the set of known {@link Coder Coders} indexed by
+   * parameter name
+   *
+   * @deprecated this method is not part of the public interface and will be made private
    */
+  @Deprecated
   public <T> Map<Type, Coder<?>> getDefaultCoders(
       Class<? extends T> subClass,
       Class<T> baseClass,
@@ -326,46 +418,46 @@ public <T> Map<Type, Coder<?>> getDefaultCoders(
   }
 
   /**
-   * Returns an array listing, for each of {@code baseClass}'s type parameters, the
-   * Coder to use by default for it, in the context of {@code subClass}'s specialization
-   * of {@code baseClass}.
-   *
-   * <p>If a coder cannot be inferred for a type variable, its slot in the
-   * resulting array will be {@code null}.
-   *
-   * <p>For example, if {@code baseClass} is {@code Map.class} and {@code subClass}
-   * extends {@code Map<String, Integer>} then this will return the registered Coders
-   * to use for {@code String} and {@code Integer}, in that order.
-   * The {@code knownCoders} parameter can be used to provide known coders
-   * for any of the parameters that will be used to infer the others.
-   *
-   * <p>Note that inference is attempted for every type variable.
-   * For a type {@code MyType<One, Two, Three>} inference will will be
-   * attempted for all of {@code One}, {@code Two}, {@code Three},
-   * even if the requester only wants a coder for {@code Two}.
-   *
-   * <p>For this reason {@code getDefaultCoders} (plural) does not throw
-   * an exception if a coder for a particular type variable cannot be
-   * inferred. Instead, it results in a {@code null} in the array.
-   * It is the responsibility of the caller (usually {@link #getDefaultCoder}
-   * to extract the desired coder or throw a {@link CannotProvideCoderException}
-   * when appropriate.
+   * Returns an array listing, for each of {@code baseClass}'s type parameters, the {@link Coder} to
+   * use by default for it, in the context of {@code subClass}'s specialization of
+   * {@code baseClass}.
+   *
+   * <p>If a {@link Coder} cannot be inferred for a type variable, its slot in the resulting array
+   * will be {@code null}.
+   *
+   * <p>For example, if {@code baseClass} is {@code Map.class}, where {@code Map<K, V>} has type
+   * parameters {@code K} and {@code V} in that order, and {@code subClass} extends
+   * {@code Map<String, Integer>} then the result will contain a {@code Coder<String>} and a
+   * {@code Coder<Integer>}, in that order.
+   *
+   * <p>The {@code knownCoders} parameter can be used to provide known {@link Coder Coders} for any
+   * of the type parameters. These will be used to infer the others. If non-null, the length of this
+   * array must match the number of type parameters of {@code baseClass}, and simply be filled with
+   * {@code null} values for each type parameters without a known {@link Coder}.
+   *
+   * <p>Note that inference is attempted for every type variable. For a type
+   * {@code MyType<One, Two, Three>} inference will will be attempted for all of {@code One},
+   * {@code Two}, {@code Three}, even if the requester only wants a {@link Coder} for {@code Two}.
+   *
+   * <p>For this reason {@code getDefaultCoders} (plural) does not throw an exception if a
+   * {@link Coder} for a particular type variable cannot be inferred. Instead, it results in a
+   * {@code null} in the array. It is the responsibility of the caller (usually
+   * {@link #getDefaultCoder} to extract the desired coder or throw a
+   * {@link CannotProvideCoderException} when appropriate.
    *
    * @param subClass the concrete type whose specializations are being inferred
    * @param baseClass the base type, a parameterized class
-   * @param knownCoders an array corresponding to the set of base class
-   *        type parameters.  Each entry is can be either a Coder (in which
-   *        case it will be used for inference) or null (in which case it
-   *        will be inferred).  May be null to indicate the entire set of
-   *        parameters should be inferred.
-   * @throws IllegalArgumentException if baseClass doesn't have type parameters
-   *         or if the length of knownCoders is not equal to the number of type
-   *         parameters
+   * @param knownCoders an array corresponding to the set of base class type parameters. Each entry
+   *        can be either a {@link Coder} (in which case it will be used for inference) or
+   *        {@code null} (in which case it will be inferred). May be {@code null} to indicate the
+   *        entire set of parameters should be inferred.
+   * @throws IllegalArgumentException if baseClass doesn't have type parameters or if the length of
+   *         {@code knownCoders} is not equal to the number of type parameters of {@code baseClass}.
    */
   private <T> Coder<?>[] getDefaultCoders(
       Class<? extends T> subClass,
       Class<T> baseClass,
-      Coder<?>[] knownCoders) {
+      @Nullable Coder<?>[] knownCoders) {
     Type type = TypeDescriptor.of(subClass).getSupertype(baseClass).getType();
     if (!(type instanceof ParameterizedType)) {
       throw new IllegalArgumentException(type + " is not a ParameterizedType");
@@ -416,10 +508,10 @@ private <T> Coder<?>[] getDefaultCoders(
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * Thrown when a coder cannot possibly encode a type, yet has been proposed as a coder
-   * for that type.
+   * Thrown when a {@link Coder} cannot possibly encode a type, yet has been proposed as a
+   * {@link Coder} for that type.
    */
-  static class IncompatibleCoderException extends RuntimeException {
+  @VisibleForTesting static class IncompatibleCoderException extends RuntimeException {
     private Coder<?> coder;
     private Type type;
 
@@ -445,11 +537,11 @@ public Type getType() {
   }
 
   /**
-   * Returns {@code true} if the given coder can possibly encode elements
+   * Returns {@code true} if the given {@link Coder} can possibly encode elements
    * of the given type.
    */
-  static <T, CoderT extends Coder<T>, CandidateT>
-      void verifyCompatible(CoderT coder, Type candidateType) throws IncompatibleCoderException {
+  @VisibleForTesting static <T, CoderT extends Coder<T>, CandidateT>
+  void verifyCompatible(CoderT coder, Type candidateType) throws IncompatibleCoderException {
 
     // Various representations of the coder's class
     @SuppressWarnings("unchecked")
@@ -532,19 +624,19 @@ private static boolean isNullOrEmpty(Collection<?> c) {
    * The map of classes to the CoderFactories to use to create their
    * default Coders.
    */
-  Map<Class<?>, CoderFactory> coderFactoryMap = new HashMap<>();
+  private Map<Class<?>, CoderFactory> coderFactoryMap = new HashMap<>();
 
-   /**
-    * A provider of coders for types where no coder is registered.
-    */
-   private CoderProvider fallbackCoderProvider;
+  /**
+   * A provider of coders for types where no coder is registered.
+   */
+  private CoderProvider fallbackCoderProvider;
 
   /**
    * Returns the CoderFactory to use to create default Coders for
    * instances of the given class, or null if there is no default
    * CoderFactory registered.
    */
-  CoderFactory getDefaultCoderFactory(Class<?> clazz) throws CannotProvideCoderException {
+  private CoderFactory getDefaultCoderFactory(Class<?> clazz) throws CannotProvideCoderException {
     CoderFactory coderFactoryOrNull = coderFactoryMap.get(clazz);
     if (coderFactoryOrNull != null) {
       return coderFactoryOrNull;
@@ -561,14 +653,13 @@ CoderFactory getDefaultCoderFactory(Class<?> clazz) throws CannotProvideCoderExc
    *
    * @throws CannotProvideCoderException if a coder cannot be provided
    */
-  <T> Coder<T> getDefaultCoder(
+  private <T> Coder<T> getDefaultCoder(
       TypeDescriptor<T> typeDescriptor,
       Map<Type, Coder<?>> typeCoderBindings)
       throws CannotProvideCoderException {
 
-    Coder<?> defaultCoder = getDefaultCoder(typeDescriptor.getType(),
-                                            typeCoderBindings);
-    LOG.debug("Default Coder for {}: {}", typeDescriptor, defaultCoder);
+    Coder<?> defaultCoder = getDefaultCoder(typeDescriptor.getType(), typeCoderBindings);
+    LOG.debug("Default coder for {}: {}", typeDescriptor, defaultCoder);
     @SuppressWarnings("unchecked")
     Coder<T> result = (Coder<T>) defaultCoder;
     return result;
@@ -580,7 +671,7 @@ <T> Coder<T> getDefaultCoder(
    *
    * @throws CannotProvideCoderException if a coder cannot be provided
    */
-  Coder<?> getDefaultCoder(Type type, Map<Type, Coder<?>> typeCoderBindings)
+  private Coder<?> getDefaultCoder(Type type, Map<Type, Coder<?>> typeCoderBindings)
       throws CannotProvideCoderException {
     Coder<?> coder = typeCoderBindings.get(type);
     if (coder != null) {
@@ -593,7 +684,7 @@ Coder<?> getDefaultCoder(Type type, Map<Type, Coder<?>> typeCoderBindings)
       return getDefaultCoder((ParameterizedType) type, typeCoderBindings);
     } else if (type instanceof TypeVariable || type instanceof WildcardType) {
       // No default coder for an unknown generic type.
-      throw new CannotProvideCoderException("Cannot provide a Coder for type variable "
+      throw new CannotProvideCoderException("Cannot provide a coder for type variable "
           + type + " because the actual type is unknown due to erasure.",
           ReasonCode.TYPE_ERASURE);
     } else {
@@ -604,86 +695,12 @@ Coder<?> getDefaultCoder(Type type, Map<Type, Coder<?>> typeCoderBindings)
 
   /**
    * Returns the {@link Coder} to use by default for values of the given
-   * class.
-   *
-   * <ol>
-   * <li>A {@link Coder} class registered explicitly via
-   * a call to {@link #registerCoder},
-   * <li>A {@link DefaultCoder} annotation on the class,
-   * <li>This registry's fallback {@link CoderProvider}, which
-   * may be able to generate a coder for an arbitrary class.
-   * </ol>
-   *
-   * @throws CannotProvideCoderException if a coder cannot be provided
-   */
-  public <T> Coder<T> getDefaultCoder(Class<T> clazz) throws CannotProvideCoderException {
-    if (clazz.getTypeParameters().length > 0) {
-      throw new IllegalArgumentException(
-          "CoderRegistry.getDefaultCoder(Class) cannot be used with parameterized types due to "
-              + "erasure. Instead of getDefaultCoder(" + clazz.getSimpleName() + ") "
-              + "use getDefaultCoder(new TypeDescriptor<" + clazz.getSimpleName() + "<...>>(){}).");
-    }
-
-    try {
-      CoderFactory coderFactory = getDefaultCoderFactory(clazz);
-      LOG.debug("Default Coder for {} found by factory", clazz);
-      @SuppressWarnings("unchecked")
-      Coder<T> coder = (Coder<T>) coderFactory.create(Collections.<Coder<?>>emptyList());
-      return coder;
-    } catch (CannotProvideCoderException exc) {
-      // try other ways of finding one
-    }
-
-    DefaultCoder defaultAnnotation = clazz.getAnnotation(
-        DefaultCoder.class);
-    if (defaultAnnotation != null) {
-      LOG.debug("Default Coder for {} found by DefaultCoder annotation", clazz);
-      @SuppressWarnings("unchecked")
-      Coder<T> coder = InstanceBuilder.ofType(Coder.class)
-          .fromClass(defaultAnnotation.value())
-          .fromFactoryMethod("of")
-          .withArg(Class.class, clazz)
-          .build();
-      return coder;
-    }
-
-    if (getFallbackCoderProvider() != null) {
-      try {
-        return getFallbackCoderProvider().getCoder(TypeDescriptor.<T>of(clazz));
-      } catch (CannotProvideCoderException exc) {
-        throw new CannotProvideCoderException(
-            "Cannot provide coder for class " + clazz + " because "
-            + "it has no " + CoderFactory.class.getSimpleName() + " registered, "
-            + "it has no " + DefaultCoder.class.getSimpleName() + " annotation, "
-            + "and the fallback " + CoderProvider.class.getSimpleName()
-            + " could not automatically create a Coder.",
-            exc);
-      }
-    } else {
-      throw new CannotProvideCoderException(
-            "Cannot provide coder for class " + clazz + " because "
-            + "it has no " + CoderFactory.class.getSimpleName() + " registered, "
-            + "it has no " + DefaultCoder.class.getSimpleName() + " annotation, "
-            + "and there is no fallback CoderProvider configured.");
-    }
-  }
-
-  public void setFallbackCoderProvider(CoderProvider coderProvider) {
-    fallbackCoderProvider = coderProvider;
-  }
-
-  public CoderProvider getFallbackCoderProvider() {
-    return fallbackCoderProvider;
-  }
-
-  /**
-   * Returns the Coder to use by default for values of the given
    * parameterized type, in a context where the given types use the
-   * given coders.
+   * given {@link Coder Coders}.
    *
    * @throws CannotProvideCoderException if no coder can be provided
    */
-  Coder<?> getDefaultCoder(
+  private Coder<?> getDefaultCoder(
       ParameterizedType type,
       Map<Type, Coder<?>> typeCoderBindings)
       throws CannotProvideCoderException {
@@ -707,7 +724,7 @@ Coder<?> getDefaultCoder(
   /**
    * Returns an immutable {@code Map} from each of the type variables
    * embedded in the given type to the corresponding types
-   * in the given coder.
+   * in the given {@link Coder}.
    */
   private Map<Type, Coder<?>> getTypeToCoderBindings(Type type, Coder<?> coder) {
     if (type instanceof TypeVariable || type instanceof Class) {
@@ -720,9 +737,8 @@ private Map<Type, Coder<?>> getTypeToCoderBindings(Type type, Coder<?> coder) {
   }
 
   /**
-   * Returns an immutable {@code Map} from the type arguments of the
-   * parameterized type to their corresponding coders, and so on recursively
-   * for their type parameters.
+   * Returns an immutable {@code Map} from the type arguments of the parameterized type to their
+   * corresponding {@link Coder Coders}, and so on recursively for their type parameters.
    *
    * <p>This method is simply a specialization to break out the most
    * elaborate case of {@link #getTypeToCoderBindings(Type, Coder)}.

From 6e961862e452e34dfbe3e58f9b7f78ba03e59dbe Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 17 Sep 2015 07:46:25 -0700
Subject: [PATCH 1025/1541] Improve some InstanceBuilder error messages

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103289523
---
 .../cloud/dataflow/sdk/util/InstanceBuilder.java | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
index f477125c82b47..c3c0568863dda 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
@@ -18,6 +18,7 @@
 
 import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.base.Joiner;
 
 import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
@@ -221,12 +222,19 @@ private T buildFromMethod(Class<?>[] types) {
       return type.cast(method.invoke(null, args));
 
     } catch (NoSuchMethodException e) {
-      throw new RuntimeException("Unable to find factory method "
-          + factoryClass.getName() + "#" + methodName);
+      throw new RuntimeException(
+          String.format("Unable to find factory method %s#%s(%s)",
+              factoryClass.getSimpleName(),
+              methodName,
+              Joiner.on(", ").join(types)));
 
     } catch (IllegalAccessException | InvocationTargetException e) {
-      throw new RuntimeException("Failed to construct instance from "
-          + "factory method " + factoryClass.getName() + "#" + methodName, e);
+      throw new RuntimeException(
+          String.format("Failed to construct instance from factory method %s#%s(%s)",
+              factoryClass.getSimpleName(),
+              methodName,
+              Joiner.on(", ").join(types)),
+          e);
     }
   }
 

From 1f3597601d164a6334944f7be139983985b0171a Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 17 Sep 2015 11:27:15 -0700
Subject: [PATCH 1026/1541] Factor out static method-based CoderProvider logic

Previously, a couple lines of CoderRegistry implemented
the logic behind @DefaultCoder processing, essentially
inlining the invocation of a CoderProvider expressed as
static methods on a class.

Now:

 - The building of a CoderProvider from a suitably defined
   class is factored out in CoderProviders.fromStaticMethods().
 - AvroCoder is suitably defined.
 - SerializableCoder is suitably defined.
 - The CoderRegistry invokes CoderProviders, along with slight
   refactoring.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103308789
---
 .../cloud/dataflow/sdk/coders/AvroCoder.java  | 10 ++
 .../dataflow/sdk/coders/CoderProviders.java   | 96 ++++++++++++++++++-
 .../dataflow/sdk/coders/CoderRegistry.java    | 34 ++++---
 .../sdk/coders/SerializableCoder.java         | 10 ++
 .../dataflow/sdk/coders/DefaultCoderTest.java | 27 +++++-
 5 files changed, 158 insertions(+), 19 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
index 77757e44fa6b2..d25bbc0d4614e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -108,6 +108,16 @@
  */
 public class AvroCoder<T> extends StandardCoder<T> {
 
+  /**
+   * Returns an {@code AvroCoder} instance for the provided element type.
+   * @param <T> the element type
+   */
+  public static <T> AvroCoder<T> of(TypeDescriptor<T> type) {
+    @SuppressWarnings("unchecked")
+    Class<T> clazz = (Class<T>) type.getRawType();
+    return of(clazz);
+  }
+
   /**
    * Returns an {@code AvroCoder} instance for the provided element class.
    * @param <T> the element type
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProviders.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProviders.java
index 9366c6c780717..00bb4543ba855 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProviders.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProviders.java
@@ -16,11 +16,15 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 import com.google.common.base.Joiner;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 
+import java.lang.reflect.InvocationTargetException;
 import java.util.List;
 
 /**
@@ -32,18 +36,38 @@ public final class CoderProviders {
   // Static utility class
   private CoderProviders() { }
 
+  /**
+   * Creates a {@link CoderProvider} built from particular static methods of a class that
+   * implements {@link Coder}, particularly for use in {@link DefaultCoder} annotations.
+   *
+   * <p>The class must have the following static method:
+   *
+   * <pre>{@code
+   * public static Coder<T> of(TypeDescriptor<T> type)
+   * }
+   * </pre>
+   */
+  public static <T> CoderProvider fromStaticMethods(Class<T> clazz) {
+    return new CoderProviderFromStaticMethods(clazz);
+  }
+
+
   /**
    * Returns a {@link CoderProvider} that consults each of the provider {@code coderProviders}
    * and returns the first {@link Coder} provided.
    *
-   * <p>Note that while the number of types handled will be the union of those handled by all of
-   * the provided {@code coderProviders}, the actual {@link Coder} provided by an earlier provider
-   * may have inferior determinism properties.
+   * <p>Note that the order in which the providers are listed matters: While the set of types
+   * handled will be the union of those handled by all of the providers in the list, the actual
+   * {@link Coder} provided by the first successful provider may differ, and may have inferior
+   * properties. For example, not all {@link Coder Coders} are deterministic, handle {@code null}
+   * values, or have comparable performance.
    */
   public static CoderProvider firstOf(CoderProvider... coderProviders) {
     return new FirstOf(ImmutableList.copyOf(coderProviders));
   }
 
+  ///////////////////////////////////////////////////////////////////////////////////////////////
+
   /**
    * @see #firstOf
    */
@@ -71,4 +95,70 @@ public <T> Coder<T> getCoder(TypeDescriptor<T> type) throws CannotProvideCoderEx
               type, Joiner.on("; ").join(messages)));
     }
   }
+
+  private static class CoderProviderFromStaticMethods implements CoderProvider {
+
+    /** If true, then clazz has {@code of(TypeDescriptor)}. If false, {@code of(Class)}. */
+    private final boolean takesTypeDescriptor;
+    private final Class<?> clazz;
+
+    public CoderProviderFromStaticMethods(Class<?> clazz) {
+      // Note that the second condition supports older classes, which only needed to provide
+      // of(Class), not of(TypeDescriptor). Our own classes have updated to accept a
+      // TypeDescriptor. Hence the error message points only to the current specification,
+      // not both acceptable conditions.
+      checkArgument(classTakesTypeDescriptor(clazz) || classTakesClass(clazz),
+          "Class " + clazz.getCanonicalName()
+          + " is missing required static method of(TypeDescriptor).");
+
+      this.takesTypeDescriptor = classTakesTypeDescriptor(clazz);
+      this.clazz = clazz;
+    }
+
+    @Override
+    public <T> Coder<T> getCoder(TypeDescriptor<T> type) throws CannotProvideCoderException {
+      try {
+        if (takesTypeDescriptor) {
+          @SuppressWarnings("unchecked")
+          Coder<T> result = InstanceBuilder.ofType(Coder.class)
+              .fromClass(clazz)
+              .fromFactoryMethod("of")
+              .withArg(TypeDescriptor.class, type)
+              .build();
+          return result;
+        } else {
+          @SuppressWarnings("unchecked")
+          Coder<T> result = InstanceBuilder.ofType(Coder.class)
+              .fromClass(clazz)
+              .fromFactoryMethod("of")
+              .withArg(Class.class, type.getRawType())
+              .build();
+          return result;
+        }
+      } catch (RuntimeException exc) {
+        if (exc.getCause() instanceof InvocationTargetException) {
+          throw new CannotProvideCoderException(exc.getCause().getCause());
+        }
+        throw exc;
+      }
+    }
+
+    private boolean classTakesTypeDescriptor(Class<?> clazz) {
+      try {
+        clazz.getDeclaredMethod("of", TypeDescriptor.class);
+        return true;
+      } catch (NoSuchMethodException | SecurityException exc) {
+        return false;
+      }
+    }
+
+    private boolean classTakesClass(Class<?> clazz) {
+      try {
+        clazz.getDeclaredMethod("of", Class.class);
+        return true;
+      } catch (NoSuchMethodException | SecurityException exc) {
+        return false;
+      }
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 217f26df13b5f..d9ab48688da81 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -20,7 +20,6 @@
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException.ReasonCode;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
@@ -307,17 +306,10 @@ public <T> Coder<T> getDefaultCoder(Class<T> clazz) throws CannotProvideCoderExc
       // try other ways of finding one
     }
 
-    DefaultCoder defaultAnnotation = clazz.getAnnotation(
-        DefaultCoder.class);
-    if (defaultAnnotation != null) {
-      LOG.debug("Default coder for {} found by DefaultCoder annotation", clazz);
-      @SuppressWarnings("unchecked")
-      Coder<T> coder = InstanceBuilder.ofType(Coder.class)
-          .fromClass(defaultAnnotation.value())
-          .fromFactoryMethod("of")
-          .withArg(Class.class, clazz)
-          .build();
-      return coder;
+    try {
+      return getDefaultCoderFromAnnotation(clazz);
+    } catch (CannotProvideCoderException exc) {
+      // try other ways
     }
 
     if (getFallbackCoderProvider() != null) {
@@ -647,6 +639,24 @@ private CoderFactory getDefaultCoderFactory(Class<?> clazz) throws CannotProvide
     }
   }
 
+  /**
+   * Returns the {@link Coder} returned according to the {@link CoderProvider} from any
+   * {@link DefaultCoder} annotation on the given class.
+   */
+  private <T> Coder<T> getDefaultCoderFromAnnotation(Class<T> clazz)
+      throws CannotProvideCoderException {
+    DefaultCoder defaultAnnotation = clazz.getAnnotation(DefaultCoder.class);
+    if (defaultAnnotation == null) {
+      throw new CannotProvideCoderException(
+          String.format("Class %s does not have a @DefaultCoder annotation.",
+              clazz.getCanonicalName()));
+    }
+
+    LOG.debug("DefaultCoder annotation found for {}", clazz);
+    CoderProvider coderProvider = CoderProviders.fromStaticMethods(defaultAnnotation.value());
+    return coderProvider.getCoder(TypeDescriptor.of(clazz));
+  }
+
   /**
    * Returns the {@link Coder} to use by default for values of the given type,
    * in a context where the given types use the given coders.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
index 4ad0189f2f4e7..63d28a53143a7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
@@ -50,6 +50,16 @@
  */
 public class SerializableCoder<T extends Serializable> extends AtomicCoder<T> {
 
+  /**
+   * Returns a {@code SerializableCoder} instance for the provided element type.
+   * @param <T> the element type
+   */
+  public static <T extends Serializable> SerializableCoder<T> of(TypeDescriptor<T> type) {
+    @SuppressWarnings("unchecked")
+    Class<T> clazz = (Class<T>) type.getRawType();
+    return of(clazz);
+  }
+
   /**
    * Returns a {@code SerializableCoder} instance for the provided element class.
    * @param <T> the element type
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
index 5b5843ae44036..498b64db572c4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/DefaultCoderTest.java
@@ -16,11 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import static com.google.common.base.Preconditions.checkArgument;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.instanceOf;
 import static org.junit.Assert.assertThat;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 import org.junit.Before;
@@ -64,15 +64,18 @@ private static class SerializableRecord extends SerializableBase {
   private static class CustomRecord extends SerializableBase {
   }
 
+  @DefaultCoder(OldCustomSerializableCoder.class)
+  private static class OldCustomRecord extends SerializableBase {
+  }
+
   private static class Unknown {
   }
 
   private static class CustomSerializableCoder extends SerializableCoder<CustomRecord> {
     // Extending SerializableCoder isn't trivial, but it can be done.
     @SuppressWarnings("unchecked")
-    public static <T extends Serializable> SerializableCoder<T> of(Class<T> recordType) {
-       Preconditions.checkArgument(
-           CustomRecord.class.isAssignableFrom(recordType));
+    public static <T extends Serializable> SerializableCoder<T> of(TypeDescriptor<T> recordType) {
+       checkArgument(recordType.isSupertypeOf(new TypeDescriptor<CustomRecord>() {}));
        return (SerializableCoder<T>) new CustomSerializableCoder();
     }
 
@@ -81,6 +84,20 @@ protected CustomSerializableCoder() {
     }
   }
 
+  private static class OldCustomSerializableCoder extends SerializableCoder<OldCustomRecord> {
+    // Extending SerializableCoder isn't trivial, but it can be done.
+    @Deprecated // old form using a Class
+    @SuppressWarnings("unchecked")
+    public static <T extends Serializable> SerializableCoder<T> of(Class<T> recordType) {
+       checkArgument(OldCustomRecord.class.isAssignableFrom(recordType));
+       return (SerializableCoder<T>) new OldCustomSerializableCoder();
+    }
+
+    protected OldCustomSerializableCoder() {
+      super(OldCustomRecord.class);
+    }
+  }
+
   @Test
   public void testDefaultCoderClasses() throws Exception {
     assertThat(registry.getDefaultCoder(AvroRecord.class), instanceOf(AvroCoder.class));
@@ -90,6 +107,8 @@ public void testDefaultCoderClasses() throws Exception {
         instanceOf(SerializableCoder.class));
     assertThat(registry.getDefaultCoder(CustomRecord.class),
         instanceOf(CustomSerializableCoder.class));
+    assertThat(registry.getDefaultCoder(OldCustomRecord.class),
+        instanceOf(OldCustomSerializableCoder.class));
   }
 
   @Test

From bd8fc9c969a579bdd52ee0889f9073380e748e34 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 17 Sep 2015 14:21:50 -0700
Subject: [PATCH 1027/1541] Add FlatMapElements transform

----Release Notes----
Added new FlatMapElements transform, which is simpler to use than the
full power of ParDo and interoperates with Java 8 lambda.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103325440
---
 .../sdk/transforms/FlatMapElements.java       | 147 ++++++++++++++++++
 .../sdk/transforms/SimpleFunction.java        |  54 +++++++
 .../sdk/transforms/FlatMapElementsTest.java   |  93 +++++++++++
 .../transforms/FlatMapElementsJava8Test.java  |  60 +++++++
 4 files changed, 354 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElements.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SimpleFunction.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsTest.java
 create mode 100644 sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsJava8Test.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElements.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElements.java
new file mode 100644
index 0000000000000..c2fe19f71728d
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElements.java
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+import java.lang.reflect.ParameterizedType;
+
+/**
+ * {@code PTransform}s for mapping a simple function that returns iterables over the elements of a
+ * {@link PCollection} and merging the results.
+ */
+public class FlatMapElements<InputT, OutputT>
+extends PTransform<PCollection<InputT>, PCollection<OutputT>> {
+  /**
+   * For a {@code SerializableFunction<InputT, ? extends Iterable<OutputT>>} {@code fn},
+   * returns a {@link PTransform that applies {@code fn} to every element of the input
+   * {@code PCollection<InputT>} and outputs all of the elements to the output
+   * {@code PCollection<OutputT>}.
+   *
+   * <p>Example of use in Java 8:
+   * <pre>{@code
+   * import static com.google.cloud.dataflow.sdk.values.TypeDescriptors.strings;
+   *
+   * PCollection<String> words = lines.apply(
+   *     FlatMap.via((String line) -> line.split(...))
+   *         .withOutputType(new TypeDescriptor<String>(){});
+   * }</pre>
+   *
+   * <p>In Java 7, the overload {@link #via(SimpleFunction)} is more concise as the output type
+   * descriptor need not be provided.
+   */
+  public static <InputT, OutputT> MissingOutputTypeDescriptor<InputT, OutputT>
+  via(SerializableFunction<InputT, ? extends Iterable<OutputT>> fn) {
+    return new MissingOutputTypeDescriptor<>(fn);
+  }
+
+  /**
+   * For a {@code SimpleFunction<InputT, ? extends Iterable<OutputT>>} {@code fn},
+   * return a {@link PTransform that applies {@code fn} to every element of the input
+   * {@code PCollection<InputT>} and outputs all of the elements to the output
+   * {@code PCollection<OutputT>}.
+   *
+   * <p>This overload is intended primarily for use in Java 7. In Java 8, the overload
+   * {@link #via(SerializableFunction)} supports use of lambda for greater concision.
+   *
+   * <p>Example of use in Java 7:
+   * <pre>{@code
+   * PCollection<String> lines = ...;
+   * PCollection<String> words = lines.apply(FlatMap.via(
+   *     new SimpleFunction<String, Integer>() {
+   *       public Integer apply(String line) {
+   *         return line.split(...).length;
+   *       }
+   *     });
+   * }</pre>
+   *
+   * <p>To use a Java 8 lambda, see {@link #of(SerializableFunction, TypeDescriptor)}.
+   */
+  public static <InputT, OutputT> FlatMapElements<InputT, OutputT>
+  via(SimpleFunction<InputT, ? extends Iterable<OutputT>> fn) {
+
+    @SuppressWarnings({"rawtypes", "unchecked"}) // safe by static typing
+    TypeDescriptor<Iterable<?>> iterableType = (TypeDescriptor) fn.getOutputTypeDescriptor();
+
+    @SuppressWarnings("unchecked") // safe by correctness of getIterableElementType
+    TypeDescriptor<OutputT> outputType =
+        (TypeDescriptor<OutputT>) getIterableElementType(iterableType);
+
+    return new FlatMapElements<>(fn, outputType);
+  }
+
+  /**
+   * An intermediate builder for a {@link FlatMapElements} transform. To complete the transform,
+   * provide an output type descriptor to {@link MissingOutputTypeDescriptor#withOutputType}. See
+   * {@link #via(SerializableFunction)} for a full example of use.
+   */
+  public static final class MissingOutputTypeDescriptor<InputT, OutputT> {
+
+    private final SerializableFunction<InputT, ? extends Iterable<OutputT>> fn;
+
+    private MissingOutputTypeDescriptor(
+        SerializableFunction<InputT, ? extends Iterable<OutputT>> fn) {
+      this.fn = fn;
+    }
+
+    public FlatMapElements<InputT, OutputT> withOutputType(TypeDescriptor<OutputT> outputType) {
+      return new FlatMapElements<>(fn, outputType);
+    }
+  }
+
+  private static TypeDescriptor<?> getIterableElementType(
+      TypeDescriptor<Iterable<?>> iterableTypeDescriptor) {
+
+    // If a rawtype was used, the type token may be for Object, not a subtype of Iterable.
+    // In this case, we rely on static typing of the function elsewhere to ensure it is
+    // at least some kind of iterable, and grossly overapproximate the element type to be Object.
+    if (!iterableTypeDescriptor.isSubtypeOf(new TypeDescriptor<Iterable<?>>() {})) {
+      return new TypeDescriptor<Object>() {};
+    }
+
+    // Otherwise we can do the proper thing and get the actual type parameter.
+    ParameterizedType iterableType =
+        (ParameterizedType) iterableTypeDescriptor.getSupertype(Iterable.class).getType();
+    return TypeDescriptor.of(iterableType.getActualTypeArguments()[0]);
+  }
+
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+
+  private final SerializableFunction<InputT, ? extends Iterable<OutputT>> fn;
+  private final TypeDescriptor<OutputT> outputType;
+
+  private FlatMapElements(
+      SerializableFunction<InputT, ? extends Iterable<OutputT>> fn,
+      TypeDescriptor<OutputT> outputType) {
+    this.fn = fn;
+    this.outputType = outputType;
+  }
+
+  @Override
+  public PCollection<OutputT> apply(PCollection<InputT> input) {
+    return input.apply(ParDo.named("Map").of(new DoFn<InputT, OutputT>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public void processElement(ProcessContext c) {
+        for (OutputT element : fn.apply(c.element())) {
+          c.output(element);
+        }
+      }
+    })).setTypeDescriptorInternal(outputType);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SimpleFunction.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SimpleFunction.java
new file mode 100644
index 0000000000000..921bba8df79d6
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SimpleFunction.java
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+/**
+ * A {@link SerializableFunction} which is not a <i>functional interface</i>.
+ * Concrete subclasses allow us to infer type information, which in turn aids
+ * {@link Coder} inference.
+ */
+public abstract class SimpleFunction<InputT, OutputT>
+    implements SerializableFunction<InputT, OutputT> {
+
+  /**
+   * Returns a {@link TypeDescriptor} capturing what is known statically
+   * about the input type of this {@code DoFn} instance's most-derived
+   * class.
+   *
+   * <p>See {@link #getOutputTypeDescriptor} for more discussion.
+   */
+  public TypeDescriptor<InputT> getInputTypeDescriptor() {
+    return new TypeDescriptor<InputT>(getClass()) {};
+  }
+
+  /**
+   * Returns a {@link TypeDescriptor} capturing what is known statically
+   * about the output type of this {@code DoFn} instance's
+   * most-derived class.
+   *
+   * <p>In the normal case of a concrete {@code DoFn} subclass with
+   * no generic type parameters of its own (including anonymous inner
+   * classes), this will be a complete non-generic type, which is good
+   * for choosing a default output {@code Coder<OutputT>} for the output
+   * {@code PCollection<OutputT>}.
+   */
+  public TypeDescriptor<OutputT> getOutputTypeDescriptor() {
+    return new TypeDescriptor<OutputT>(getClass()) {};
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsTest.java
new file mode 100644
index 0000000000000..6acefb050e490
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsTest.java
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Tests for {@link FlatMapElements}.
+ */
+@RunWith(JUnit4.class)
+public class FlatMapElementsTest implements Serializable {
+
+  @Rule
+  public transient ExpectedException thrown = ExpectedException.none();
+
+  /**
+   * Basic test of {@link FlatMapElements} with a {@link SimpleFunction}.
+   */
+  @Test
+  public void testFlatMapBasic() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3))
+
+        // Note that FlatMapElements takes a SimpleFunction<InputT, ? extends Iterable<OutputT>>
+        // so the use of List<Integer> here (as opposed to Iterable<Integer>) deliberately exercises
+        // the use of an upper bound.
+        .apply(FlatMapElements.via(new SimpleFunction<Integer, List<Integer>>() {
+          @Override
+          public List<Integer> apply(Integer input) {
+            return ImmutableList.of(-input, input);
+          }
+        }));
+
+    DataflowAssert.that(output).containsInAnyOrder(1, -2, -1, -3, 2, 3);
+    pipeline.run();
+  }
+
+  /**
+   * Tests that when built with a concrete subclass of {@link SimpleFunction}, the type descriptor
+   * of the output reflects its static type.
+   */
+  @Test
+  public void testFlatMapFnOutputTypeDescriptor() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<String> output = pipeline
+        .apply(Create.of("hello"))
+        .apply(FlatMapElements.via(new SimpleFunction<String, Set<String>>() {
+          @Override
+          public Set<String> apply(String input) {
+            return ImmutableSet.copyOf(input.split(""));
+          }
+        }));
+
+    assertThat(output.getTypeDescriptor(),
+        equalTo((TypeDescriptor<String>) new TypeDescriptor<String>() {}));
+    assertThat(pipeline.getCoderRegistry().getDefaultCoder(output.getTypeDescriptor()),
+        equalTo(pipeline.getCoderRegistry().getDefaultCoder(new TypeDescriptor<String>() {})));
+  }
+}
diff --git a/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsJava8Test.java b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsJava8Test.java
new file mode 100644
index 0000000000000..8e4ce1572fd15
--- /dev/null
+++ b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsJava8Test.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.collect.ImmutableList;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+
+/**
+ * Java 8 Tests for {@link FlatMapElements}.
+ */
+@RunWith(JUnit4.class)
+public class FlatMapElementsJava8Test implements Serializable {
+
+  @Rule
+  public transient ExpectedException thrown = ExpectedException.none();
+
+  /**
+   * Basic test of {@link FlatMapElements} with a lambda (which is instantiated as a
+   * {@link SerializableFunctaion}).
+   */
+  @Test
+  public void testFlatMapBasic() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3))
+        .apply(FlatMapElements
+            // Note that the input type annotation is required.
+            .via((Integer i) -> ImmutableList.of(i, -i))
+            .withOutputType(new TypeDescriptor<Integer>() {}));
+
+    DataflowAssert.that(output).containsInAnyOrder(1, 3, -1, -3, 2, -2);
+    pipeline.run();
+  }
+}

From adf3d30001b02244050dd68a0a9d9a17efa408af Mon Sep 17 00:00:00 2001
From: laraschmidt <laraschmidt@google.com>
Date: Sat, 25 Jul 2015 15:34:55 -0700
Subject: [PATCH 1028/1541] Logging upload id on initial upload request at
 debug level

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103332410
---
 .../sdk/util/RetryHttpRequestInitializer.java | 30 +++++-
 .../cloud/dataflow/sdk/util/Transport.java    |  2 +-
 .../sdk/util/UploadIdResponseInterceptor.java | 57 +++++++++++
 .../util/RetryHttpRequestInitializerTest.java | 10 +-
 .../util/UploadIdResponseInterceptorTest.java | 98 +++++++++++++++++++
 5 files changed, 193 insertions(+), 4 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UploadIdResponseInterceptor.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/UploadIdResponseInterceptorTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
index c2672f4f091ce..6259428649804 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
@@ -21,6 +21,7 @@
 import com.google.api.client.http.HttpRequest;
 import com.google.api.client.http.HttpRequestInitializer;
 import com.google.api.client.http.HttpResponse;
+import com.google.api.client.http.HttpResponseInterceptor;
 import com.google.api.client.http.HttpUnsuccessfulResponseHandler;
 import com.google.api.client.util.BackOff;
 import com.google.api.client.util.ExponentialBackOff;
@@ -46,6 +47,8 @@
  * <p>This allows chaining through to another HttpRequestInitializer, since
  * clients have exactly one HttpRequestInitializer, and Credential is also
  * a required HttpRequestInitializer.
+ *
+ * <p>Also can take a HttpResponseInterceptor to be applied to the responses.
  */
 public class RetryHttpRequestInitializer implements HttpRequestInitializer {
 
@@ -122,6 +125,8 @@ public boolean handleResponse(HttpRequest request, HttpResponse response,
 
   private final HttpRequestInitializer chained;
 
+  private final HttpResponseInterceptor responseInterceptor;  // response Interceptor to use
+
   private final NanoClock nanoClock;  // used for testing
 
   private final Sleeper sleeper;  // used for testing
@@ -143,7 +148,21 @@ public RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained) {
    */
   public RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained,
       Collection<Integer> additionalIgnoredResponseCodes) {
-    this(chained, NanoClock.SYSTEM, Sleeper.DEFAULT, additionalIgnoredResponseCodes);
+    this(chained, NanoClock.SYSTEM, Sleeper.DEFAULT, additionalIgnoredResponseCodes, null);
+  }
+
+  /**
+   * @param chained a downstream HttpRequestInitializer, which will also be applied to HttpRequest
+   * initialization.  May be null.
+   * @param additionalIgnoredResponseCodes a list of HTTP status codes that should not be logged.
+   * @param responseInterceptor HttpResponseInterceptor to be applied on all requests. May be null.
+   */
+  public RetryHttpRequestInitializer(
+      @Nullable HttpRequestInitializer chained,
+      Collection<Integer> additionalIgnoredResponseCodes,
+      @Nullable HttpResponseInterceptor responseInterceptor) {
+    this(chained, NanoClock.SYSTEM, Sleeper.DEFAULT, additionalIgnoredResponseCodes,
+        responseInterceptor);
   }
 
   /**
@@ -156,11 +175,13 @@ public RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained,
    * @param additionalIgnoredResponseCodes a list of HTTP status codes that should not be logged.
    */
   RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained,
-      NanoClock nanoClock, Sleeper sleeper, Collection<Integer> additionalIgnoredResponseCodes) {
+      NanoClock nanoClock, Sleeper sleeper, Collection<Integer> additionalIgnoredResponseCodes,
+      HttpResponseInterceptor responseInterceptor) {
     this.chained = chained;
     this.nanoClock = nanoClock;
     this.sleeper = sleeper;
     this.ignoredResponseCodes.addAll(additionalIgnoredResponseCodes);
+    this.responseInterceptor = responseInterceptor;
   }
 
   @Override
@@ -187,5 +208,10 @@ public void initialize(HttpRequest request) throws IOException {
     LoggingHttpBackOffIOExceptionHandler loggingBackoffHandler =
         new LoggingHttpBackOffIOExceptionHandler(BackOff.ZERO_BACKOFF);
     request.setIOExceptionHandler(loggingBackoffHandler);
+
+    // Set response initializer
+    if (responseInterceptor != null) {
+      request.setResponseInterceptor(responseInterceptor);
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
index afd3404389e03..72febae67d239 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
@@ -163,7 +163,7 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
         new RetryHttpRequestInitializer(
             // Do not log the code 404. Code up the stack will deal with 404's if needed, and
             // logging it by default clutters the output during file staging.
-            options.getGcpCredential(), ImmutableList.of(404)))
+            options.getGcpCredential(), ImmutableList.of(404), new UploadIdResponseInterceptor()))
         .setApplicationName(options.getAppName())
         .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
     if (servicePath != null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UploadIdResponseInterceptor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UploadIdResponseInterceptor.java
new file mode 100644
index 0000000000000..a7a6fbbddabd5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UploadIdResponseInterceptor.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.http.GenericUrl;
+import com.google.api.client.http.HttpResponse;
+import com.google.api.client.http.HttpResponseInterceptor;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+
+/**
+ * Implements a response intercepter that logs the upload id if the upload
+ * id header exists and it is the first request (does not have upload_id parameter in the request).
+ * Only logs if debug level is enabled.
+ */
+public class UploadIdResponseInterceptor implements HttpResponseInterceptor {
+
+  private static final Logger LOG = LoggerFactory.getLogger(UploadIdResponseInterceptor.class);
+  private static final String UPLOAD_ID_PARAM = "upload_id";
+  private static final String UPLOAD_TYPE_PARAM = "uploadType";
+  private static final String UPLOAD_HEADER = "X-GUploader-UploadID";
+
+  @Override
+  public void interceptResponse(HttpResponse response) throws IOException {
+    if (!LOG.isDebugEnabled()) {
+      return;
+    }
+    String uploadId = response.getHeaders().getFirstHeaderStringValue(UPLOAD_HEADER);
+    if (uploadId == null) {
+      return;
+    }
+
+    GenericUrl url = response.getRequest().getUrl();
+    // The check for no upload id limits the output to one log line per upload.
+    // The check for upload type makes sure this is an upload and not a read.
+    if (url.get(UPLOAD_ID_PARAM) == null && url.get(UPLOAD_TYPE_PARAM) != null) {
+      LOG.debug("Upload ID for url {} is {}", url, uploadId);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializerTest.java
index b0caf42c81fbd..a914b06f096b5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializerTest.java
@@ -30,6 +30,7 @@
 import com.google.api.client.http.HttpRequest;
 import com.google.api.client.http.HttpResponse;
 import com.google.api.client.http.HttpResponseException;
+import com.google.api.client.http.HttpResponseInterceptor;
 import com.google.api.client.http.HttpTransport;
 import com.google.api.client.http.LowLevelHttpRequest;
 import com.google.api.client.http.LowLevelHttpResponse;
@@ -65,6 +66,7 @@ public class RetryHttpRequestInitializerTest {
   @Mock private PrivateKey mockPrivateKey;
   @Mock private LowLevelHttpRequest mockLowLevelRequest;
   @Mock private LowLevelHttpResponse mockLowLevelResponse;
+  @Mock private HttpResponseInterceptor mockHttpResponseInterceptor;
 
   private final JsonFactory jsonFactory = JacksonFactory.getDefaultInstance();
   private Storage storage;
@@ -100,7 +102,7 @@ protected LowLevelHttpRequest buildRequest(String method, String url)
         mockCredential, new MockNanoClock(), new Sleeper() {
           @Override
           public void sleep(long millis) throws InterruptedException {}
-        }, Arrays.asList(418 /* I'm a teapot */));
+        }, Arrays.asList(418 /* I'm a teapot */), mockHttpResponseInterceptor);
     storage = new Storage.Builder(lowLevelTransport, jsonFactory, initializer)
         .setApplicationName("test").build();
   }
@@ -110,6 +112,7 @@ public void tearDown() {
     verifyNoMoreInteractions(mockPrivateKey);
     verifyNoMoreInteractions(mockLowLevelRequest);
     verifyNoMoreInteractions(mockCredential);
+    verifyNoMoreInteractions(mockHttpResponseInterceptor);
   }
 
   @Test
@@ -124,6 +127,7 @@ public void testBasicOperation() throws IOException {
     assertNotNull(response);
 
     verify(mockCredential).initialize(any(HttpRequest.class));
+    verify(mockHttpResponseInterceptor).interceptResponse(any(HttpResponse.class));
     verify(mockLowLevelRequest, atLeastOnce())
         .addHeader(anyString(), anyString());
     verify(mockLowLevelRequest).setTimeout(anyInt(), anyInt());
@@ -151,6 +155,7 @@ public void testErrorCodeForbidden() throws IOException {
     }
 
     verify(mockCredential).initialize(any(HttpRequest.class));
+    verify(mockHttpResponseInterceptor).interceptResponse(any(HttpResponse.class));
     verify(mockLowLevelRequest, atLeastOnce())
         .addHeader(anyString(), anyString());
     verify(mockLowLevelRequest).setTimeout(anyInt(), anyInt());
@@ -177,6 +182,7 @@ public void testRetryableError() throws IOException {
     assertNotNull(response);
 
     verify(mockCredential).initialize(any(HttpRequest.class));
+    verify(mockHttpResponseInterceptor).interceptResponse(any(HttpResponse.class));
     verify(mockLowLevelRequest, atLeastOnce())
         .addHeader(anyString(), anyString());
     verify(mockLowLevelRequest, times(3)).setTimeout(anyInt(), anyInt());
@@ -200,6 +206,7 @@ public void testThrowIOException() throws IOException {
     assertNotNull(response);
 
     verify(mockCredential).initialize(any(HttpRequest.class));
+    verify(mockHttpResponseInterceptor).interceptResponse(any(HttpResponse.class));
     verify(mockLowLevelRequest, atLeastOnce())
         .addHeader(anyString(), anyString());
     verify(mockLowLevelRequest, times(2)).setTimeout(anyInt(), anyInt());
@@ -226,6 +233,7 @@ public Integer answer(InvocationOnMock invocation) {
     assertNotNull(response);
 
     verify(mockCredential).initialize(any(HttpRequest.class));
+    verify(mockHttpResponseInterceptor).interceptResponse(any(HttpResponse.class));
     verify(mockLowLevelRequest, atLeastOnce()).addHeader(anyString(),
         anyString());
     verify(mockLowLevelRequest, times(retries)).setTimeout(anyInt(), anyInt());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/UploadIdResponseInterceptorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/UploadIdResponseInterceptorTest.java
new file mode 100644
index 0000000000000..1a537a6fbeddd
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/UploadIdResponseInterceptorTest.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.api.client.http.GenericUrl;
+import com.google.api.client.http.HttpResponse;
+
+import com.google.api.client.testing.http.HttpTesting;
+import com.google.api.client.testing.http.MockHttpTransport;
+import com.google.api.client.testing.http.MockLowLevelHttpResponse;
+import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+
+/**
+ * A test for {@link UploadIdResponseInterceptor}.
+ */
+
+@RunWith(JUnit4.class)
+public class UploadIdResponseInterceptorTest {
+
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+  // Note that expected logs also turns on debug logging.
+  @Rule public ExpectedLogs expectedLogs = ExpectedLogs.none(UploadIdResponseInterceptor.class);
+
+  /**
+   * Builds a HttpResponse with the given string response.
+   *
+   * @param header header value to provide or null if none.
+   * @param uploadId upload id to provide in the url upload id param or null if none.
+   * @param uploadType upload type to provide in url upload type param or null if none.
+   * @return HttpResponse with the given parameters
+   * @throws IOException
+   */
+  private HttpResponse buildHttpResponse(String header, String uploadId, String uploadType)
+      throws IOException {
+    MockHttpTransport.Builder builder = new MockHttpTransport.Builder();
+    MockLowLevelHttpResponse resp = new MockLowLevelHttpResponse();
+    builder.setLowLevelHttpResponse(resp);
+    resp.setStatusCode(200);
+    GenericUrl url = new GenericUrl(HttpTesting.SIMPLE_URL);
+    if (header != null) {
+      resp.addHeader("X-GUploader-UploadID", header);
+    }
+    if (uploadId != null) {
+      url.put("upload_id", uploadId);
+    }
+    if (uploadType != null) {
+      url.put("uploadType", uploadType);
+    }
+    return builder.build().createRequestFactory().buildGetRequest(url).execute();
+  }
+
+  /**
+   * Tests the responses that should not log.
+   */
+  @Test
+  public void testResponseNoLogging() throws IOException {
+    new UploadIdResponseInterceptor().interceptResponse(buildHttpResponse(null, null, null));
+    new UploadIdResponseInterceptor().interceptResponse(buildHttpResponse("hh", "a", null));
+    new UploadIdResponseInterceptor().interceptResponse(buildHttpResponse(null, "h", null));
+    new UploadIdResponseInterceptor().interceptResponse(buildHttpResponse("hh", null, null));
+    new UploadIdResponseInterceptor().interceptResponse(buildHttpResponse(null, null, "type"));
+    new UploadIdResponseInterceptor().interceptResponse(buildHttpResponse("hh", "a", "type"));
+    new UploadIdResponseInterceptor().interceptResponse(buildHttpResponse(null, "h", "type"));
+    expectedLogs.verifyNotLogged("");
+  }
+
+  /**
+   * Check that a response logs with the correct log.
+   */
+  @Test
+  public void testResponseLogs() throws IOException {
+    new UploadIdResponseInterceptor().interceptResponse(buildHttpResponse("abc", null, "type"));
+    GenericUrl url = new GenericUrl(HttpTesting.SIMPLE_URL);
+    url.put("uploadType", "type");
+    expectedLogs.verifyDebug("Upload ID for url " + url + " is abc");
+  }
+}

From 51a83ed06e1c4fcab706dcb8478532496c5839dc Mon Sep 17 00:00:00 2001
From: zhouyunqing <zhouyunqing@google.com>
Date: Fri, 18 Sep 2015 10:35:29 -0700
Subject: [PATCH 1029/1541] Instrumenting the worker to get user code msecs

(Resubmit)

User code msecs is time spent on doing Dataflow SDK operations other than shuffling (like GroupByKey or CoGroupByKey)

We assume that user code in work items is CPU-bound. To account for proportional slowdown in case there are more work items concurrently executing user code than there are CPUs, we introduce "effective user code time" that is invariant to the level of multithreading. More precisely, a unit of time spent in user code counts as min(1, numCPUs / numActiveItemsInUserCode) units of "effective time in user code".

How it works:
1. StateSampler maintains the states of the current work item being executed on MapTaskExecutor.
2. StateSampler periodically grab the current state and invoke a callback, which will further hand the info to UserCodeTimeTracker.
3. UserCodeTimeTracker has the global knowledge of all the work item being executed on a single DataflowWorker or StreamingDataflowWorker, upon invoked by UserCodeTimeTracker, a delta is calcuated by both the info from this StateSampler and the states of other samplers.
4. The info finally got summed up to counters naming <stage_name>-user-code-msecs.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103395435
---
 .../sdk/runners/worker/DataflowWorker.java    |  37 ++++-
 .../runners/worker/GroupingShuffleReader.java |  12 +-
 .../worker/MapTaskExecutorFactory.java        |   9 +-
 .../sdk/runners/worker/ShuffleSink.java       |   6 +
 .../worker/StreamingDataflowWorker.java       |  19 ++-
 .../worker/StreamingModeExecutionContext.java |   3 +-
 .../runners/worker/UserCodeTimeTracker.java   | 156 ++++++++++++++++++
 .../util/common/worker/MapTaskExecutor.java   |  36 ++--
 .../sdk/util/common/worker/Operation.java     |  21 ++-
 .../sdk/util/common/worker/ReadOperation.java |   5 +-
 .../sdk/util/common/worker/Reader.java        |  10 ++
 .../common/worker/ReceivingOperation.java     |  12 +-
 .../dataflow/sdk/util/common/worker/Sink.java |  10 ++
 .../sdk/util/common/worker/StateSampler.java  | 140 +++++++++++++---
 .../util/common/worker/WriteOperation.java    |   2 +-
 .../worker/MapTaskExecutorFactoryTest.java    |  20 ++-
 .../worker/UserCodeTimeTrackerTest.java       | 120 ++++++++++++++
 .../util/common/worker/StateSamplerTest.java  |  39 ++++-
 18 files changed, 578 insertions(+), 79 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTracker.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTrackerTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index b845620b89877..52f61eb0ca143 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -106,6 +106,11 @@ public class DataflowWorker {
    */
   private Server statusServer;
 
+  /**
+   * Tracker for user code time.
+   */
+  private final UserCodeTimeTracker userCodeTimeTracker = new UserCodeTimeTracker();
+
   /**
    * A weight in "bytes" for the overhead of a {@link Sized} wrapper in the cache. It is just an
    * approximation so it is OK for it to be fairly arbitrary as long as it is nonzero.
@@ -159,27 +164,41 @@ private boolean doWork(WorkItem workItem) throws IOException {
       DataflowExecutionContext executionContext =
           new DataflowWorkerExecutionContext(sideInputCache, options);
 
-      if (workItem.getMapTask() != null) {
-        worker = MapTaskExecutorFactory.create(options, workItem.getMapTask(), executionContext);
+      CounterSet counters = new CounterSet();
+      StateSampler sampler = null;
 
+      if (workItem.getMapTask() != null) {
+        sampler = new StateSampler(
+            workItem.getMapTask().getStageName() + "-", counters.getAddCounterMutator());
+        worker = MapTaskExecutorFactory.create(
+            options, workItem.getMapTask(), executionContext, counters, sampler);
       } else if (workItem.getSourceOperationTask() != null) {
+        sampler = new StateSampler(
+            "source-operation-", counters.getAddCounterMutator());
         worker = SourceOperationExecutorFactory.create(options, workItem.getSourceOperationTask());
-
       } else {
         throw new RuntimeException("Unknown kind of work item: " + workItem.toString());
       }
 
+      sampler.addSamplingCallback(
+          new UserCodeTimeTracker.StateSamplerCallback(
+              userCodeTimeTracker, workItem.getId()));
+
       DataflowWorkProgressUpdater progressUpdater =
           new DataflowWorkProgressUpdater(workItem, worker, workUnitClient, options);
-      try {
-        executeWork(worker, progressUpdater);
-      } finally {
-        // Grab nextReportIndex so we can use it in handleWorkError if there is an exception.
-        nextReportIndex = progressUpdater.getNextReportIndex();
+      try (AutoCloseable scope = userCodeTimeTracker.scopedWork(
+              sampler.getPrefix(), workItem.getId(), counters.getAddCounterMutator())) {
+        // Nested try/finally is used to make sure worker.close() happen before scope.close().
+        try {
+          executeWork(worker, progressUpdater);
+        } finally {
+          worker.close();
+          // Grab nextReportIndex so we can use it in handleWorkError if there is an exception.
+          nextReportIndex = progressUpdater.getNextReportIndex();
+        }
       }
 
       // Log all counter values for debugging purposes.
-      CounterSet counters = worker.getOutputCounters();
       for (Counter<?> counter : counters) {
         LOG.trace("COUNTER {}.", counter);
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index a76bb3f5e2b3d..aee45c0df973f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -44,6 +44,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
 import com.google.cloud.dataflow.sdk.values.KV;
 
 import org.slf4j.Logger;
@@ -110,6 +111,11 @@ private synchronized void initCounter(String datasetId) {
     }
   }
 
+  @Override
+  protected StateKind getStateSamplerStateKind() {
+    return StateKind.FRAMEWORK;
+  }
+
   @Override
   public ReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> iterator() throws IOException {
     Preconditions.checkArgument(shuffleReaderConfig != null);
@@ -185,14 +191,16 @@ private final class GroupingShuffleReaderIterator
 
     public GroupingShuffleReaderIterator(ShuffleEntryReader reader) {
       if (GroupingShuffleReader.this.stateSampler == null) {
+        // This code path is only used in tests.
         CounterSet counterSet = new CounterSet();
         this.stateSampler = new StateSampler("local", counterSet.getAddCounterMutator());
-        this.readState = stateSampler.stateForName("shuffle");
+        this.readState = stateSampler.stateForName("shuffle", StateSampler.StateKind.FRAMEWORK);
       } else {
         checkNotNull(GroupingShuffleReader.this.stateSamplerOperationName);
         this.stateSampler = GroupingShuffleReader.this.stateSampler;
         this.readState = stateSampler.stateForName(
-            GroupingShuffleReader.this.stateSamplerOperationName + "-process");
+            GroupingShuffleReader.this.stateSamplerOperationName + "-process",
+            StateSampler.StateKind.FRAMEWORK);
       }
 
       this.rangeTracker =
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index a3366cd3761fa..d112f8e8822c8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -76,13 +76,10 @@ public class MapTaskExecutorFactory {
    * Creates a new MapTaskExecutor from the given MapTask definition.
    */
   public static MapTaskExecutor create(
-      PipelineOptions options, MapTask mapTask, DataflowExecutionContext context) throws Exception {
+      PipelineOptions options, MapTask mapTask, DataflowExecutionContext context,
+      CounterSet counters, StateSampler stateSampler) throws Exception {
     List<Operation> operations = new ArrayList<>();
-    CounterSet counters = new CounterSet();
-    String counterPrefix = mapTask.getStageName() + "-";
-    StateSampler stateSampler = new StateSampler(counterPrefix, counters.getAddCounterMutator());
-    // Open-ended state.
-    stateSampler.setState("other");
+    String counterPrefix = stateSampler.getPrefix();
 
     // Instantiate operations for each instruction in the graph.
     for (ParallelInstruction instruction : mapTask.getInstructions()) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
index 7029b29ab3d55..ab447642646c4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
@@ -33,6 +33,7 @@
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
 import com.google.cloud.dataflow.sdk.values.KV;
 
 import java.io.IOException;
@@ -280,4 +281,9 @@ public SinkWriter<WindowedValue<T>> writer() throws IOException {
     String datasetId = applianceWriter.getDatasetId();
     return writer(new ChunkingShuffleEntryWriter(applianceWriter), datasetId);
   }
+
+  @Override
+  protected StateKind getStateSamplerStateKind() {
+    return StateKind.FRAMEWORK;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 6098c83e56ef5..c0958396c041c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -38,6 +38,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
 import com.google.cloud.dataflow.sdk.util.common.worker.OutputObjectAndByteCounter;
 import com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.common.base.Preconditions;
 import com.google.protobuf.ByteString;
 
@@ -69,6 +70,7 @@
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
 
 import javax.servlet.ServletException;
@@ -209,6 +211,9 @@ public ReaderCacheEntry(UnboundedSource.UnboundedReader<?> reader, long token) {
   private final MetricTrackingWindmillServerStub metricTrackingWindmillServer;
   private Timer globalCountersUpdatesTimer;
 
+  private final UserCodeTimeTracker userCodeTimeTracker = new UserCodeTimeTracker();
+  private final AtomicInteger nextStateSamplerId = new AtomicInteger();
+
   public StreamingDataflowWorker(
       List<MapTask> mapTasks, WindmillServerStub server, DataflowWorkerHarnessOptions options) {
     this.options = options;
@@ -470,9 +475,21 @@ private void process(
       DataflowWorkerLoggingMDC.setStageName(computation);
       WorkerAndContext workerAndContext = mapTaskExecutors.get(computation).poll();
       if (workerAndContext == null) {
+        CounterSet counters = new CounterSet();
         context = new StreamingModeExecutionContext(
             mapTask.getSystemName(), readerCache.get(computation), stateNameMap);
-        worker = MapTaskExecutorFactory.create(options, mapTask, context);
+        StateSampler sampler =
+            new StateSampler(mapTask.getStageName() + "-", counters.getAddCounterMutator());
+        // In streaming mode, state samplers are long lived. So here a unique id is generated as
+        // the item_id for the userCodeTimeTracker.
+        int stateSamplerId = nextStateSamplerId.incrementAndGet();
+        sampler.addSamplingCallback(
+            new UserCodeTimeTracker.StateSamplerCallback(
+                userCodeTimeTracker, stateSamplerId));
+        // "work" will never finish here.
+        userCodeTimeTracker.workStarted(
+            sampler.getPrefix(), stateSamplerId, counters.getAddCounterMutator());
+        worker = MapTaskExecutorFactory.create(options, mapTask, context, counters, sampler);
         ReadOperation readOperation = worker.getReadOperation();
         // Disable progress updates since its results are unused for streaming
         // and involves starting a thread.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
index 55fe351b0bba8..7e54e8680d095 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
@@ -31,6 +31,7 @@
 import com.google.cloud.dataflow.sdk.util.TimerInternals;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -370,7 +371,7 @@ public StateSampler.ScopedState get() {
             return null;
           }
           if (readState == -1) {
-            readState = stateSampler.stateForName(stepName + "-windmill-read");
+            readState = stateSampler.stateForName(stepName + "-windmill-read", StateKind.FRAMEWORK);
           }
           return stateSampler.scopedState(readState);
         }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTracker.java
new file mode 100644
index 0000000000000..15d8de9c84303
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTracker.java
@@ -0,0 +1,156 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.util.common.Counter;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
+
+import org.eclipse.jetty.util.ConcurrentHashSet;
+
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+/**
+ * This class tracks the user code time spent on every work item executed by a Dataflow worker.
+ * User code time is the time spent on doing Dataflow SDK operations other than shuffling (like
+ * GroupByKey or CoGroupByKey) and reading / writing states from / to windmill.
+ *
+ * <p>We assume that user code in work items is CPU-bound. To account for proportional slowdown in
+ * case there are more work items concurrently executing user code than there are CPUs, we introduce
+ * "effective user code time" that is invariant to the level of multithreading. More precisely,
+ * a unit of time spent in user code counts as min(1, numCPUs / numActiveItemsInUserCode) units of
+ * "effective time in user code".
+ */
+public class UserCodeTimeTracker {
+  private static class WorkItemInfo {
+    final Counter<Long> counter;
+
+    public WorkItemInfo(String counterPrefix, AddCounterMutator mutator) {
+      counter = mutator.addCounter(
+          Counter.longs(counterPrefix + "user-code-msecs", Counter.AggregationKind.SUM));
+    }
+  }
+
+  /**
+   * Mapping from item id to WorkItemInfo.
+   */
+  private final Map<Long, WorkItemInfo> itemMap = new ConcurrentHashMap<Long, WorkItemInfo>();
+
+  /**
+   * Set of the item ids in states with the kind StateSampler.StateKind.USER.
+   */
+  private final Set<Long> itemsInUserState = new ConcurrentHashSet<Long>();
+
+  /**
+   * Records the start of the work.
+   * @param counterPrefix counter prefix associated with this work item.
+   * @param itemId id of the work.
+   * @param mutator counter mutator associated with this work item.
+   */
+  public void workStarted(String counterPrefix, long itemId, AddCounterMutator mutator) {
+    if (itemMap.put(itemId, new WorkItemInfo(counterPrefix, mutator)) != null) {
+      throw new IllegalArgumentException("Item " + itemId + " already started.");
+    }
+  }
+
+  /**
+   * Records the finish of the work.
+   * @param itemId id of the work.
+   */
+  public void workFinished(long itemId) {
+    if (itemMap.remove(itemId) == null) {
+      throw new IllegalArgumentException("Item " + itemId + " never started.");
+    }
+    itemsInUserState.remove(itemId);
+  }
+
+  /**
+   * Records the observation that the work with {@code itemId} has been in a state with {@code kind}
+   * for {@code elapsedMs} milliseconds.
+   * It is supposed to be called in a callback of a StateSampler.
+   * @param itemId the id of the work
+   * @param kind kind of the associated state
+   * @param elapsedMs time duration in milliseconds since the previous observation of this work
+   *        item's state
+   */
+  public void workObservedInState(
+      long itemId, StateKind kind, long elapsedMs) {
+    if (kind == StateSampler.StateKind.USER) {
+      itemsInUserState.add(itemId);
+    } else {
+      itemsInUserState.remove(itemId);
+      return;
+    }
+
+    WorkItemInfo info = itemMap.get(itemId);
+    if (info == null) {
+      throw new NoSuchElementException("Item " + itemId + " doesn't exist.");
+    }
+    int numProcessors = getNumProcessors();
+    int numActives = itemsInUserState.size();
+    long userCodeMsecs = (long) (elapsedMs * Math.min(1.0, 1.0 * numProcessors / numActives));
+
+    info.counter.addValue(userCodeMsecs);
+  }
+
+  /**
+   * Returns an AutoCloseable that will call {@link #workStarted} at first and will automatically
+   * call {@link #workFinished} upon closing.
+   * @param counterPrefix counter prefix associated with this work item.
+   * @param itemId id of the work.
+   * @param mutator counter mutator associated with this work item.
+   * @return an AutoCloseable that automatically call {@link #workFinished} upon closing.
+   */
+  public AutoCloseable scopedWork(
+      String counterPrefix, final long itemId, final CounterSet.AddCounterMutator mutator) {
+    workStarted(counterPrefix, itemId, mutator);
+    return new AutoCloseable() {
+      @Override
+      public void close() throws Exception {
+        workFinished(itemId);
+      }
+    };
+  }
+
+  protected int getNumProcessors() {
+    return Runtime.getRuntime().availableProcessors();
+  }
+
+  /**
+   * A simple callback to be used to invoke {@code UserCodeTimeTracker.workObservedInState} from
+   * the StateSampler.
+   */
+  public static class StateSamplerCallback implements StateSampler.SamplingCallback {
+    private final UserCodeTimeTracker tracker;
+
+    private final long itemId;
+
+    StateSamplerCallback(UserCodeTimeTracker tracker, long itemId) {
+      this.tracker = tracker;
+      this.itemId = itemId;
+    }
+
+    @Override
+    public void run(int state, StateKind kind, long elapsedMs) {
+      tracker.workObservedInState(itemId, kind, elapsedMs);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
index 4785b7be6d35e..da1a6404c73a4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
@@ -55,23 +55,27 @@ public MapTaskExecutor(
   public void execute() throws Exception {
     LOG.debug("Executing map task");
 
-    // Start operations, in reverse-execution-order, so that a
-    // consumer is started before a producer might output to it.
-    // Starting a root operation such as a ReadOperation does the work
-    // of processing the input dataset.
-    LOG.debug("Starting operations");
-    ListIterator<Operation> iterator = operations.listIterator(operations.size());
-    while (iterator.hasPrevious()) {
-      Operation op = iterator.previous();
-      op.start();
-    }
+    try (StateSampler.ScopedState state
+        = stateSampler.scopedState(
+            stateSampler.stateForName("other", StateSampler.StateKind.FRAMEWORK))) {
+      // Start operations, in reverse-execution-order, so that a
+      // consumer is started before a producer might output to it.
+      // Starting a root operation such as a ReadOperation does the work
+      // of processing the input dataset.
+      LOG.debug("Starting operations");
+      ListIterator<Operation> iterator = operations.listIterator(operations.size());
+      while (iterator.hasPrevious()) {
+        Operation op = iterator.previous();
+        op.start();
+      }
 
-    // Finish operations, in forward-execution-order, so that a
-    // producer finishes outputting to its consumers before those
-    // consumers are themselves finished.
-    LOG.debug("Finishing operations");
-    for (Operation op : operations) {
-      op.finish();
+      // Finish operations, in forward-execution-order, so that a
+      // producer finishes outputting to its consumers before those
+      // consumers are themselves finished.
+      LOG.debug("Finishing operations");
+      for (Operation op : operations) {
+        op.finish();
+      }
     }
 
     LOG.debug("Map task execution complete");
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
index edf2eb85b3846..744ffd3633e97 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
@@ -79,13 +79,26 @@ public Operation(String operationName,
                    OutputReceiver[] receivers,
                    String counterPrefix,
                    CounterSet.AddCounterMutator addCounterMutator,
-                   StateSampler stateSampler) {
+                   StateSampler stateSampler,
+                   StateSampler.StateKind stateKind) {
     this.operationName = operationName;
     this.receivers = receivers;
     this.stateSampler = stateSampler;
-    startState = stateSampler.stateForName(operationName + "-start");
-    processState = stateSampler.stateForName(operationName + "-process");
-    finishState = stateSampler.stateForName(operationName + "-finish");
+    startState = stateSampler.stateForName(operationName + "-start", stateKind);
+    processState = stateSampler.stateForName(operationName + "-process", stateKind);
+    finishState = stateSampler.stateForName(operationName + "-finish", stateKind);
+  }
+
+  /**
+   * Constructs an operation in the USER state kind.
+   */
+  public Operation(String operationName,
+                   OutputReceiver[] receivers,
+                   String counterPrefix,
+                   CounterSet.AddCounterMutator addCounterMutator,
+                   StateSampler stateSampler) {
+    this(operationName, receivers, counterPrefix, addCounterMutator,
+        stateSampler, StateSampler.StateKind.USER);
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index eb6712d98e689..aaef60a207a42 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -77,7 +77,8 @@ public class ReadOperation extends Operation {
   public ReadOperation(String operationName, Reader<?> reader, OutputReceiver[] receivers,
       String counterPrefix, CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler) {
-    super(operationName, receivers, counterPrefix, addCounterMutator, stateSampler);
+    super(operationName, receivers, counterPrefix, addCounterMutator,
+          stateSampler, reader.getStateSamplerStateKind());
     this.reader = reader;
     this.byteCount = addCounterMutator.addCounter(
         Counter.longs(bytesCounterName(counterPrefix, operationName), SUM));
@@ -89,7 +90,7 @@ public ReadOperation(String operationName, Reader<?> reader, OutputReceiver[] re
   ReadOperation(Reader<?> reader, OutputReceiver outputReceiver, String counterPrefix,
       CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) {
     this("ReadOperation", reader, new OutputReceiver[] {outputReceiver}, counterPrefix,
-        addCounterMutator, stateSampler);
+         addCounterMutator, stateSampler);
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
index e371a31aa8746..ef87b3e61804f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
+
 import java.io.IOException;
 import java.util.NoSuchElementException;
 import java.util.Observable;
@@ -235,4 +237,12 @@ protected void notifyElementRead(long byteSize) {
   public boolean supportsRestart() {
     return false;
   }
+
+  /**
+   * The default state kind of all the states reported in this reader.
+   * Defaults to {@link StateKind#USER}.
+   */
+  protected StateKind getStateSamplerStateKind() {
+    return StateKind.USER;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java
index 6ccb750d99b9f..eb8f26cfd1669 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java
@@ -24,13 +24,23 @@
  */
 public abstract class ReceivingOperation extends Operation implements Receiver {
 
+  public ReceivingOperation(String operationName,
+                            OutputReceiver[] receivers,
+                            String counterPrefix,
+                            CounterSet.AddCounterMutator addCounterMutator,
+                            StateSampler stateSampler,
+                            StateSampler.StateKind stateKind) {
+    super(operationName, receivers,
+          counterPrefix, addCounterMutator, stateSampler, stateKind);
+  }
+
   public ReceivingOperation(String operationName,
                             OutputReceiver[] receivers,
                             String counterPrefix,
                             CounterSet.AddCounterMutator addCounterMutator,
                             StateSampler stateSampler) {
     super(operationName, receivers,
-          counterPrefix, addCounterMutator, stateSampler);
+        counterPrefix, addCounterMutator, stateSampler);
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
index 1f81e4508f442..b48d70b36072c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
+
 import java.io.IOException;
 
 /**
@@ -51,4 +53,12 @@ public interface SinkWriter<ElemT> extends AutoCloseable {
   public boolean supportsRestart() {
     return false;
   }
+
+  /**
+   * The default state kind of all the states reported in this Sink.
+   * Defaults to {@link StateKind#USER}.
+   */
+  protected StateKind getStateSamplerStateKind() {
+    return StateKind.USER;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
index 062e37c3cec06..60cf0ef78ff63 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
@@ -18,13 +18,16 @@
 
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
 
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 import java.util.Random;
-import java.util.Timer;
-import java.util.TimerTask;
-
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ScheduledFuture;
 import java.util.concurrent.TimeUnit;
 
 import javax.annotation.concurrent.ThreadSafe;
@@ -36,7 +39,16 @@
  * regular intervals, with adjustment for scheduling delay.
  */
 @ThreadSafe
-public class StateSampler extends TimerTask implements AutoCloseable {
+public class StateSampler implements AutoCloseable {
+
+  /** Different kinds of states. */
+  public enum StateKind {
+    /** IO, user code, etc. */
+    USER,
+    /** Reading/writing from/to shuffle service, etc. */
+    FRAMEWORK
+  }
+
   public static final long DEFAULT_SAMPLING_PERIOD_MS = 200;
 
   private final String prefix;
@@ -46,7 +58,10 @@ public class StateSampler extends TimerTask implements AutoCloseable {
   private ArrayList<Counter<Long>> countersByState = new ArrayList<>();
 
   /** Map of state name to state. */
-  private HashMap<String, Integer> statesByName = new HashMap<>();
+  private Map<String, Integer> statesByName = new HashMap<>();
+
+  /** Map of state id to kind. */
+  private Map<Integer, StateKind> kindsByState = new HashMap<>();
 
   /** The current state. */
   private volatile int currentState;
@@ -68,15 +83,19 @@ public class StateSampler extends TimerTask implements AutoCloseable {
   private long stateTimestampNs = 0;
 
   /** Using a fixed number of timers for all StateSampler objects. */
-  private static final int NUM_TIMER_THREADS = 16;
+  private static final int NUM_EXECUTOR_THREADS = 16;
 
-  /** The timers are used for periodically sampling the states. */
-  private static Timer[] timers = new Timer[NUM_TIMER_THREADS];
-  static {
-    for (int i = 0; i < timers.length; ++i) {
-      timers[i] = new Timer("StateSampler_" + i, true /* is daemon */);
-    }
-  }
+  private static final ScheduledExecutorService executorService =
+      Executors.newScheduledThreadPool(NUM_EXECUTOR_THREADS,
+          new ThreadFactoryBuilder().setDaemon(true).build());
+
+  private Random rand = new Random();
+
+  private List<SamplingCallback> callbacks = new ArrayList<>();
+
+  private ScheduledFuture<?> invocationTriggerFuture = null;
+
+  private ScheduledFuture<?> invocationFuture = null;
 
   /**
    * Constructs a new {@link StateSampler} that can be used to obtain
@@ -90,14 +109,31 @@ public class StateSampler extends TimerTask implements AutoCloseable {
    */
   public StateSampler(String prefix,
                       CounterSet.AddCounterMutator counterSetMutator,
-                      long samplingPeriodMs) {
+                      final long samplingPeriodMs) {
     this.prefix = prefix;
     this.counterSetMutator = counterSetMutator;
     currentState = DO_NOT_SAMPLE;
-    Random rand = new Random();
-    int initialDelay = rand.nextInt((int) samplingPeriodMs);
-    timers[rand.nextInt(NUM_TIMER_THREADS)].scheduleAtFixedRate(
-        this, initialDelay, samplingPeriodMs);
+    // Here "stratified sampling" is used, which makes sure that there's 1 uniformly chosen sampled
+    // point in every bucket of samplingPeriodMs, to prevent pathological behavior in case some
+    // states happen to occur at a similar period.
+    // The current implementation uses a fixed-rate timer with a period samplingPeriodMs as a
+    // trampoline to a one-shot random timer which fires with a random delay within
+    // samplingPeriodMs.
+    invocationTriggerFuture = executorService.scheduleAtFixedRate(new Runnable(){
+      @Override
+      public void run() {
+        long delay = rand.nextInt((int) samplingPeriodMs);
+        if (invocationFuture != null) {
+          invocationFuture.cancel(false);
+        }
+        invocationFuture = executorService.schedule(new Runnable(){
+          @Override
+          public void run() {
+            StateSampler.this.run();
+          }
+        }, delay, TimeUnit.MILLISECONDS);
+      }
+    }, 0, samplingPeriodMs, TimeUnit.MILLISECONDS);
     stateTimestampNs = System.nanoTime();
   }
 
@@ -115,14 +151,25 @@ public StateSampler(String prefix,
     this(prefix, counterSetMutator, DEFAULT_SAMPLING_PERIOD_MS);
   }
 
-  @Override
   public void run() {
     long startTimestampNs = System.nanoTime();
     int state = currentState;
     if (state != DO_NOT_SAMPLE) {
+      long elapsedMs = 0;
+      StateKind kind = null;
+      List<SamplingCallback> copyOfCallbacks = new ArrayList<>();
       synchronized (this) {
-        countersByState.get(state).addValue(
-            TimeUnit.NANOSECONDS.toMillis(startTimestampNs - stateTimestampNs));
+        elapsedMs =
+          TimeUnit.NANOSECONDS.toMillis(startTimestampNs - stateTimestampNs);
+        kind = kindsByState.get(state);
+        countersByState.get(state).addValue(elapsedMs);
+        for (SamplingCallback c : callbacks) {
+          copyOfCallbacks.add(c);
+        }
+      }
+      // Invoke all callbacks.
+      for (SamplingCallback c : copyOfCallbacks) {
+        c.run(state, kind, elapsedMs);
       }
     }
     stateTimestampNs = startTimestampNs;
@@ -130,7 +177,12 @@ public void run() {
 
   @Override
   public void close() {
-    this.cancel();  // cancel the TimerTask
+    if (invocationTriggerFuture != null) {
+      invocationTriggerFuture.cancel(false);
+    }
+    if (invocationFuture != null) {
+      invocationFuture.cancel(false);
+    }
   }
 
   /**
@@ -139,9 +191,10 @@ public void close() {
    * transitions is done for efficiency.
    *
    * @name the name for the state
+   * @kind kind of the state, see {#code StateKind}
    * @return the state associated with the state name
    */
-  public int stateForName(String name) {
+  public int stateForName(String name, StateKind kind) {
     if (name.isEmpty()) {
       return DO_NOT_SAMPLE;
     }
@@ -155,6 +208,13 @@ public int stateForName(String name) {
         state = countersByState.size();
         statesByName.put(name, state);
         countersByState.add(counter);
+        kindsByState.put(state, kind);
+      }
+      StateKind originalKind = kindsByState.get(state);
+      if (originalKind != kind) {
+        throw new IllegalArgumentException(
+            "for state named " + name
+            + ", requested kind " + kind + " different from the original kind " + originalKind);
       }
       return state;
     }
@@ -212,10 +272,11 @@ public int setState(int state) {
    * Sets the current thread state.
    *
    * @param name the name of the new state to transition to
+   * @param kind kind of the new state
    * @return the previous state
    */
-  public int setState(String name) {
-    return setState(stateForName(name));
+  public int setState(String name, StateKind kind) {
+    return setState(stateForName(name, kind));
   }
 
   /**
@@ -231,6 +292,19 @@ public ScopedState scopedState(int state) {
     return new ScopedState(this, setState(state));
   }
 
+  /**
+   * Add a callback to the sampler.
+   * The callbacks will be executed sequentially upon {@link StateSampler#run}.
+   */
+  public synchronized void addSamplingCallback(SamplingCallback callback) {
+    callbacks.add(callback);
+  }
+
+  /** Get the counter prefix associated with this sampler. */
+  public String getPrefix() {
+    return prefix;
+  }
+
   /**
    * A nested class that is used to account for states and state
    * transitions based on lexical scopes.
@@ -251,4 +325,20 @@ public void close() {
       sampler.setState(previousState);
     }
   }
+
+  /**
+   * Callbacks which supposed to be called sequentially upon {@link StateSampler#run}.
+   * They should be registered via {@link #addSamplingCallback}.
+   */
+  public static interface SamplingCallback {
+    /**
+     * The entrance method of the callback, it is called in {@link StateSampler#run},
+     * once per sample. This method should be thread safe.
+     *
+     * @param state The state of the StateSampler at the time of sample.
+     * @param kind The kind associated with the state, see {@link StateKind}.
+     * @param elapsedMs Milliseconds since last sample.
+     */
+    public void run(int state, StateKind kind, long elapsedMs);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
index db5e0a5c61a01..31ed07d5fcdc8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
@@ -47,7 +47,7 @@ public WriteOperation(String operationName,
                         CounterSet.AddCounterMutator addCounterMutator,
                         StateSampler stateSampler) {
     super(operationName, receivers,
-          counterPrefix, addCounterMutator, stateSampler);
+          counterPrefix, addCounterMutator, stateSampler, sink.getStateSamplerStateKind());
     this.sink = sink;
     this.byteCount = addCounterMutator.addCounter(
         Counter.longs(bytesCounterName(counterPrefix, operationName), SUM));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index bed0cc28f5fcc..5b0d84c2ae9fd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -26,6 +26,7 @@
 import static org.hamcrest.Matchers.hasItems;
 import static org.hamcrest.Matchers.instanceOf;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertThat;
 
@@ -101,12 +102,16 @@ public void testCreateMapTaskExecutor() throws Exception {
     mapTask.setStageName("test");
     mapTask.setInstructions(instructions);
 
-    CounterSet counterSet = null;
+    CounterSet counterSet = new CounterSet();
+    StateSampler sampler =
+        new StateSampler(mapTask.getStageName() + "-", counterSet.getAddCounterMutator());
     try (
         MapTaskExecutor executor = MapTaskExecutorFactory.create(
             options,
             mapTask,
-            BatchModeExecutionContext.fromOptions(options))) {
+            BatchModeExecutionContext.fromOptions(options),
+            counterSet,
+            sampler)) {
       // Safe covariant cast not expressible without rawtypes.
       @SuppressWarnings({"rawtypes", "unchecked"})
       List<Object> operations = (List) executor.operations;
@@ -125,6 +130,9 @@ public void testCreateMapTaskExecutor() throws Exception {
     Counter<Long> otherMsecCounter =
         (Counter<Long>) counterSet.getExistingCounter("test-other-msecs");
 
+    // "other" state only got created upon MapTaskExecutor.execute().
+    assertNull(otherMsecCounter);
+
     assertEquals(
         new CounterSet(
             Counter.longs(getElementCounterName("read_output_name"), SUM).resetToValue(0L),
@@ -157,8 +165,7 @@ public void testCreateMapTaskExecutor() throws Exception {
             Counter.longs("Write-ByteCount", SUM).resetToValue(0L),
             Counter.longs("test-Write-start-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Write-process-msecs", SUM).resetToValue(0L),
-            Counter.longs("test-Write-finish-msecs", SUM).resetToValue(0L),
-            Counter.longs("test-other-msecs", SUM).resetToValue(otherMsecCounter.getAggregate())),
+            Counter.longs("test-Write-finish-msecs", SUM).resetToValue(0L)),
         counterSet);
   }
 
@@ -170,12 +177,15 @@ public void testExecutionContextPlumbing() throws Exception {
         createWriteInstruction(2, 0, "Write"));
 
     MapTask mapTask = new MapTask();
+    mapTask.setStageName("test");
     mapTask.setInstructions(instructions);
 
     DataflowExecutionContext context = BatchModeExecutionContext.fromOptions(options);
 
+    CounterSet counters = new CounterSet();
     try (MapTaskExecutor executor =
-        MapTaskExecutorFactory.create(options, mapTask, context)) {
+        MapTaskExecutorFactory.create(options, mapTask, context, counters,
+            new StateSampler(mapTask.getStageName() + "-", counters.getAddCounterMutator()))) {
       executor.execute();
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTrackerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTrackerTest.java
new file mode 100644
index 0000000000000..fa34e9a275bf1
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTrackerTest.java
@@ -0,0 +1,120 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Test for {@link UserCodeTimeTracker}.
+ */
+@RunWith(JUnit4.class)
+public class UserCodeTimeTrackerTest {
+  UserCodeTimeTracker getTracker(final int numProcessors) {
+    return new UserCodeTimeTracker() {
+      @Override
+      protected int getNumProcessors() {
+        return numProcessors;
+      }
+    };
+  }
+  @Test
+  public void testUserFrameworkTimeDiscrimination() {
+    CounterSet counters = new CounterSet();
+    UserCodeTimeTracker tracker = getTracker(1);
+    CounterSet.AddCounterMutator mutator = counters.getAddCounterMutator();
+    tracker.workStarted("stage1-", 1, mutator);
+    tracker.workObservedInState(1, StateKind.USER, 100);
+    tracker.workObservedInState(1, StateKind.FRAMEWORK, 50);
+    tracker.workFinished(1);
+
+    assertEquals(100L, counters.getExistingCounter("stage1-user-code-msecs").getAggregate());
+  }
+
+  @Test
+  public void testSaturated() {
+    CounterSet counters = new CounterSet();
+    UserCodeTimeTracker tracker = getTracker(2);
+    CounterSet.AddCounterMutator mutator = counters.getAddCounterMutator();
+    tracker.workStarted("stage1-", 1, mutator);
+    tracker.workStarted("stage2-", 2, mutator);
+    tracker.workObservedInState(1, StateKind.USER, 100);
+    tracker.workObservedInState(2, StateKind.USER, 50);
+    tracker.workFinished(1);
+    tracker.workFinished(2);
+
+    assertEquals(100L, counters.getExistingCounter("stage1-user-code-msecs").getAggregate());
+    assertEquals(50L, counters.getExistingCounter("stage2-user-code-msecs").getAggregate());
+  }
+
+  @Test
+  public void testOversubscribed() {
+    CounterSet counters = new CounterSet();
+    UserCodeTimeTracker tracker = getTracker(1);
+    CounterSet.AddCounterMutator mutator = counters.getAddCounterMutator();
+    tracker.workStarted("stage1-", 1, mutator);
+    tracker.workStarted("stage2-", 2, mutator);
+    // 1 user state work item.
+    tracker.workObservedInState(1, StateKind.USER, 10);
+    // 2 user state work items.
+    tracker.workObservedInState(2, StateKind.USER, 20);
+    // 2 user state work items.
+    tracker.workObservedInState(1, StateKind.USER, 30);
+    // 2 user state work items.
+    tracker.workObservedInState(2, StateKind.USER, 40);
+    // 1 user state work item.
+    tracker.workObservedInState(1, StateKind.FRAMEWORK, 99);
+    // 1 user state work items.
+    tracker.workObservedInState(2, StateKind.USER, 50);
+    // 2 user state work items.
+    tracker.workFinished(1);
+    // 1 user state work item.
+    tracker.workObservedInState(2, StateKind.USER, 60);
+    tracker.workFinished(2);
+    // 0 user state work item.
+
+    // 10 + 30 / 2 = 25
+    assertEquals(25L, counters.getExistingCounter("stage1-user-code-msecs").getAggregate());
+    // 20 / 2 + 40 / 2 + 50 + 60 = 140
+    assertEquals(140L, counters.getExistingCounter("stage2-user-code-msecs").getAggregate());
+  }
+
+  @Test
+  public void testScopedWork() throws Exception {
+    CounterSet counters = new CounterSet();
+    UserCodeTimeTracker tracker = getTracker(1);
+    try (AutoCloseable scope1 = tracker.scopedWork("stage1-", 1, counters.getAddCounterMutator())) {
+      tracker.workObservedInState(1, StateKind.USER, 10);
+      try (AutoCloseable scope2 = tracker.scopedWork(
+          "stage2-", 2, counters.getAddCounterMutator())) {
+        tracker.workObservedInState(2, StateKind.USER, 30);
+        tracker.workObservedInState(1, StateKind.USER, 100);
+      }
+    }
+
+    // 10 + 100 / 2 = 60
+    assertEquals(60L, counters.getExistingCounter("stage1-user-code-msecs").getAggregate());
+    // 30 / 2 = 15
+    assertEquals(15L, counters.getExistingCounter("stage2-user-code-msecs").getAggregate());
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
index d0b366641a671..5ce246ddf7fde 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
@@ -20,11 +20,15 @@
 
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.SamplingCallback;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.util.concurrent.atomic.AtomicLong;
+
 /**
  * Unit tests for the {@link Counter} API.
  */
@@ -43,8 +47,8 @@ public void basicTest() throws InterruptedException {
     try (StateSampler stateSampler =
         new StateSampler("test-", counters.getAddCounterMutator(), periodMs)) {
 
-      int state1 = stateSampler.stateForName("1");
-      int state2 = stateSampler.stateForName("2");
+      int state1 = stateSampler.stateForName("1", StateSampler.StateKind.USER);
+      int state2 = stateSampler.stateForName("2", StateSampler.StateKind.USER);
 
       try (StateSampler.ScopedState s1 =
           stateSampler.scopedState(state1)) {
@@ -74,9 +78,9 @@ public void nestingTest() throws InterruptedException {
     try (StateSampler stateSampler =
         new StateSampler("test-", counters.getAddCounterMutator(), periodMs)) {
 
-      int state1 = stateSampler.stateForName("1");
-      int state2 = stateSampler.stateForName("2");
-      int state3 = stateSampler.stateForName("3");
+      int state1 = stateSampler.stateForName("1", StateSampler.StateKind.USER);
+      int state2 = stateSampler.stateForName("2", StateSampler.StateKind.USER);
+      int state3 = stateSampler.stateForName("3", StateSampler.StateKind.USER);
 
       try (StateSampler.ScopedState s1 =
           stateSampler.scopedState(state1)) {
@@ -115,7 +119,7 @@ public void nonScopedTest() throws InterruptedException {
     try (StateSampler stateSampler = new StateSampler("test-",
         counters.getAddCounterMutator(), periodMs)) {
 
-      int state1 = stateSampler.stateForName("1");
+      int state1 = stateSampler.stateForName("1", StateSampler.StateKind.USER);
       int previousState = stateSampler.setState(state1);
       Thread.sleep(2 * periodMs);
       stateSampler.setState(previousState);
@@ -126,4 +130,27 @@ public void nonScopedTest() throws InterruptedException {
       assertTrue(s <= 4 * periodMs + tolerance);
     }
   }
+
+  @Test
+  public void noSamplingAfterCloseTest() throws Exception {
+    CounterSet counters = new CounterSet();
+    long periodMs = 50;
+
+    final AtomicLong lastSampledTimeStamp = new AtomicLong();
+    try (StateSampler stateSampler = new StateSampler("test-",
+        counters.getAddCounterMutator(), periodMs)) {
+      stateSampler.setState("test", StateKind.USER);
+      stateSampler.addSamplingCallback(new SamplingCallback(){
+        @Override
+        public void run(int state, StateKind kind, long elapsedMs) {
+          lastSampledTimeStamp.set(System.currentTimeMillis());
+        }
+      });
+      Thread.sleep(3 * periodMs);
+    }
+    long cancelledTimeStamp = System.currentTimeMillis();
+    Thread.sleep(2 * periodMs);
+    assertTrue(lastSampledTimeStamp.get() > 0);
+    assertTrue(lastSampledTimeStamp.get() <= cancelledTimeStamp);
+  }
 }

From edbc78ce881e9397905758403931162f8cb08925 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 18 Sep 2015 14:39:57 -0700
Subject: [PATCH 1030/1541] Enable @DefaultCoder annotations for generic types

Before this change, the following annotation has no effect:

    @DefaultCoder(SerializableCoder.class)
    class MyType<T> implements Serializable { ... }

ParameterizedType and Class objects have distinct treatment in the
CoderRegistry. This change does not alter that state of affairs, but addresses
more targeted limitations. Previously,

1. The treatment of a ParameterizedType, call it Foo<T>, did not interrogate
   any @DefaultCoder annotation, based on an assumption that a more powerful
   CoderFactory-based inference mechanism would be needed.

2. The treatment of a Class object deliberately failed if the Class
   corresponded to a parameterized type, again based on the assumption that
   the CoderFactory-based technique was needed, which could not succeed
   because the Class object cannot provide actual type parameters.

After this change, if the Coder class from a @DefaultCoder annotation can
provide a coder, regardless of type parameters, it will be allowed to in
both of the above situations.

Fixes #57

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103418163
---
 .../dataflow/sdk/coders/CoderRegistry.java    |  92 ++++++++++-----
 .../sdk/coders/CoderRegistryTest.java         | 106 ++++++++++++++++--
 .../dataflow/sdk/values/TypedPValueTest.java  |  30 +++--
 3 files changed, 180 insertions(+), 48 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index d9ab48688da81..962ee2da8ddd8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -289,13 +289,8 @@ public <T> Coder<T> getDefaultCoder(T exampleValue) throws CannotProvideCoderExc
    * @throws CannotProvideCoderException if a {@link Coder} cannot be provided
    */
   public <T> Coder<T> getDefaultCoder(Class<T> clazz) throws CannotProvideCoderException {
-    if (clazz.getTypeParameters().length > 0) {
-      throw new IllegalArgumentException(
-          "CoderRegistry.getDefaultCoder(Class) cannot be used with parameterized types due to "
-              + "erasure. Instead of getDefaultCoder(" + clazz.getSimpleName() + ") "
-              + "use getDefaultCoder(new TypeDescriptor<" + clazz.getSimpleName() + "<...>>(){}).");
-    }
 
+    CannotProvideCoderException factoryException;
     try {
       CoderFactory coderFactory = getDefaultCoderFactory(clazz);
       LOG.debug("Default coder for {} found by factory", clazz);
@@ -303,34 +298,45 @@ public <T> Coder<T> getDefaultCoder(Class<T> clazz) throws CannotProvideCoderExc
       Coder<T> coder = (Coder<T>) coderFactory.create(Collections.<Coder<?>>emptyList());
       return coder;
     } catch (CannotProvideCoderException exc) {
-      // try other ways of finding one
+      factoryException = exc;
     }
 
+    CannotProvideCoderException annotationException;
     try {
       return getDefaultCoderFromAnnotation(clazz);
     } catch (CannotProvideCoderException exc) {
-      // try other ways
+      annotationException = exc;
     }
 
+    CannotProvideCoderException fallbackException;
     if (getFallbackCoderProvider() != null) {
       try {
         return getFallbackCoderProvider().getCoder(TypeDescriptor.<T>of(clazz));
       } catch (CannotProvideCoderException exc) {
-        throw new CannotProvideCoderException(
-            "Cannot provide coder for class " + clazz + " because "
-            + "it has no " + CoderFactory.class.getSimpleName() + " registered, "
-            + "it has no " + DefaultCoder.class.getSimpleName() + " annotation, "
-            + "and the fallback " + CoderProvider.class.getSimpleName()
-            + " could not automatically create a Coder.",
-            exc);
+        fallbackException = exc;
       }
     } else {
-      throw new CannotProvideCoderException(
-            "Cannot provide coder for class " + clazz + " because "
-            + "it has no " + CoderFactory.class.getSimpleName() + " registered, "
-            + "it has no " + DefaultCoder.class.getSimpleName() + " annotation, "
-            + "and there is no fallback CoderProvider configured.");
+      fallbackException = new CannotProvideCoderException("no fallback CoderProvider configured");
     }
+
+    // Build up the error message and list of causes.
+    StringBuilder messageBuilder = new StringBuilder()
+        .append("Unable to provide a default Coder for ").append(clazz.getCanonicalName())
+        .append(". Correct one of the following root causes:");
+
+    messageBuilder
+        .append("\n  Building a Coder using a registered CoderFactory failed: ")
+        .append(factoryException.getMessage());
+
+    messageBuilder
+        .append("\n  Building a Coder from the @DefaultCoder annotation failed: ")
+        .append(annotationException.getMessage());
+
+    messageBuilder
+        .append("\n  Building a Coder from the fallback CoderProvider failed: ")
+        .append(fallbackException.getMessage());
+
+    throw new CannotProvideCoderException(messageBuilder.toString());
   }
 
   /**
@@ -624,9 +630,8 @@ private static boolean isNullOrEmpty(Collection<?> c) {
   private CoderProvider fallbackCoderProvider;
 
   /**
-   * Returns the CoderFactory to use to create default Coders for
-   * instances of the given class, or null if there is no default
-   * CoderFactory registered.
+   * Returns the {@link CoderFactory} to use to create default {@link Coder Coders} for instances of
+   * the given class, or {@code null} if there is no default {@link CoderFactory} registered.
    */
   private CoderFactory getDefaultCoderFactory(Class<?> clazz) throws CannotProvideCoderException {
     CoderFactory coderFactoryOrNull = coderFactoryMap.get(clazz);
@@ -652,7 +657,8 @@ private <T> Coder<T> getDefaultCoderFromAnnotation(Class<T> clazz)
               clazz.getCanonicalName()));
     }
 
-    LOG.debug("DefaultCoder annotation found for {}", clazz);
+    LOG.debug("DefaultCoder annotation found for {} with value {}",
+        clazz, defaultAnnotation.value());
     CoderProvider coderProvider = CoderProviders.fromStaticMethods(defaultAnnotation.value());
     return coderProvider.getCoder(TypeDescriptor.of(clazz));
   }
@@ -713,7 +719,43 @@ private Coder<?> getDefaultCoder(Type type, Map<Type, Coder<?>> typeCoderBinding
   private Coder<?> getDefaultCoder(
       ParameterizedType type,
       Map<Type, Coder<?>> typeCoderBindings)
-      throws CannotProvideCoderException {
+          throws CannotProvideCoderException {
+
+    CannotProvideCoderException factoryException;
+    try {
+      return getDefaultCoderFromFactory(type, typeCoderBindings);
+    } catch (CannotProvideCoderException exc) {
+      factoryException = exc;
+    }
+
+    CannotProvideCoderException annotationException;
+    try {
+      Class<?> rawClazz = (Class<?>) type.getRawType();
+      return getDefaultCoderFromAnnotation(rawClazz);
+    } catch (CannotProvideCoderException exc) {
+      annotationException = exc;
+    }
+
+    // Build up the error message and list of causes.
+    StringBuilder messageBuilder = new StringBuilder()
+        .append("Unable to provide a default Coder for ").append(type)
+        .append(". Correct one of the following root causes:");
+
+    messageBuilder
+        .append("\n  Building a Coder using a registered CoderFactory failed: ")
+        .append(factoryException.getMessage());
+
+    messageBuilder
+        .append("\n  Building a Coder from the @DefaultCoder annotation failed: ")
+        .append(annotationException.getMessage());
+
+    throw new CannotProvideCoderException(messageBuilder.toString());
+  }
+
+  private Coder<?> getDefaultCoderFromFactory(
+      ParameterizedType type,
+      Map<Type, Coder<?>> typeCoderBindings)
+          throws CannotProvideCoderException {
     Class<?> rawClazz = (Class<?>) type.getRawType();
     CoderFactory coderFactory = getDefaultCoderFactory(rawClazz);
     List<Coder<?>> typeArgumentCoders = new ArrayList<>();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index 3baca5d7461bb..0476afa98743f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -18,10 +18,17 @@
 
 import static org.junit.Assert.assertEquals;
 
+import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry.IncompatibleCoderException;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 import com.google.common.collect.ImmutableList;
 
@@ -184,14 +191,6 @@ public void testParameterizedDefaultCoderUnknown() throws Exception {
     registry.getDefaultCoder(listUnknownToken);
   }
 
-  @Test
-  public void testParameterizedDefaultCoderWrongMethod() throws Exception {
-    CoderRegistry registry = getStandardRegistry();
-    thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage("new TypeDescriptor<List<...>>(){}");
-    registry.getDefaultCoder(List.class);
-  }
-
   @Test
   public void testTypeParameterInferenceForward() throws Exception {
     CoderRegistry registry = getStandardRegistry();
@@ -315,11 +314,87 @@ public void testComponentIncompatibility() throws Exception {
         new TypeDescriptor<List<String>>() {}.getType());
   }
 
-  static class MyGenericClass<FooT, BazT> { }
+  @Test
+  public void testDefaultCoderAnnotationGenericRawtype() throws Exception {
+    CoderRegistry registry = new CoderRegistry();
+    registry.registerStandardCoders();
+    assertEquals(
+        registry.getDefaultCoder(MySerializableGeneric.class),
+        SerializableCoder.of(MySerializableGeneric.class));
+  }
+
+  @Test
+  public void testDefaultCoderAnnotationGeneric() throws Exception {
+    CoderRegistry registry = new CoderRegistry();
+    registry.registerStandardCoders();
+    assertEquals(
+        registry.getDefaultCoder(new TypeDescriptor<MySerializableGeneric<String>>() {}),
+        SerializableCoder.of(MySerializableGeneric.class));
+  }
+
+  private static class PTransformOutputingMySerializableGeneric
+  extends PTransform<PCollection<String>, PCollection<KV<String, MySerializableGeneric<String>>>> {
+
+    private class OutputDoFn extends DoFn<String, KV<String, MySerializableGeneric<String>>> {
+      @Override
+      public void processElement(ProcessContext c) { }
+    }
+
+    @Override
+    public PCollection<KV<String, MySerializableGeneric<String>>>
+    apply(PCollection<String> input) {
+      return input.apply(ParDo.of(new OutputDoFn()));
+    }
+  }
+
+  /**
+   * In-context test that assures the functionality tested in
+   * {@link #testDefaultCoderAnnotationGeneric} is invoked in the right ways.
+   */
+  @Test
+  public void testSpecializedButIgnoredGenericInPipeline() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    pipeline
+        .apply(Create.of("hello", "goodbye"))
+        .apply(new PTransformOutputingMySerializableGeneric());
+
+    pipeline.run();
+  }
+
+  private static class GenericOutputMySerializedGeneric<T extends Serializable>
+  extends PTransform<
+      PCollection<String>,
+      PCollection<KV<String, MySerializableGeneric<T>>>> {
+
+    private class OutputDoFn extends DoFn<String, KV<String, MySerializableGeneric<T>>> {
+      @Override
+      public void processElement(ProcessContext c) { }
+    }
+
+    @Override
+    public PCollection<KV<String, MySerializableGeneric<T>>>
+    apply(PCollection<String> input) {
+      return input.apply(ParDo.of(new OutputDoFn()));
+    }
+  }
+
+  @Test
+  public void testIgnoredGenericInPipeline() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    pipeline
+        .apply(Create.of("hello", "goodbye"))
+        .apply(new GenericOutputMySerializedGeneric<String>());
 
-  static class MyValue { }
+    pipeline.run();
+  }
 
-  static class MyValueCoder implements Coder<MyValue> {
+  private static class MyGenericClass<FooT, BazT> { }
+
+  private static class MyValue { }
+
+  private static class MyValueCoder implements Coder<MyValue> {
 
     private static final MyValueCoder INSTANCE = new MyValueCoder();
 
@@ -389,5 +464,12 @@ public Collection<String> getAllowedEncodings() {
     }
   }
 
-  static class UnknownType { }
+  private static class UnknownType { }
+
+  @DefaultCoder(SerializableCoder.class)
+  private static class MySerializableGeneric<T extends Serializable> implements Serializable {
+    @SuppressWarnings("unused")
+    private T foo;
+  }
+
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypedPValueTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypedPValueTest.java
index 169edaaad35d9..4c621111656a0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypedPValueTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypedPValueTest.java
@@ -20,6 +20,7 @@
 import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.not;
 import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
@@ -135,17 +136,24 @@ public void testParDoWithNoSideOutputsErrorDoesNotMentionTupleTag() {
         .apply(Create.of(1, 2, 3))
         .apply(ParDo.of(new EmptyClassDoFn()));
 
-    thrown.expect(IllegalStateException.class);
-    thrown.expectMessage("No Coder has been manually specified");
-    // Output specific to ParDo TupleTag side outputs should not be present.
-    thrown.expectMessage(not(containsString("erasure")));
-    thrown.expect(not(containsString("see TupleTag Javadoc")));
-    // Instead, expect output suggesting other possible fixes.
-    thrown.expectMessage("has no DefaultCoder annotation");
-    thrown.expectMessage("has no CoderFactory registered");
-    thrown.expectMessage("could not automatically create a Coder");
-
-    input.getCoder();
+    try {
+      input.getCoder();
+    } catch (IllegalStateException exc) {
+      String message = exc.getMessage();
+
+      // Output specific to ParDo TupleTag side outputs should not be present.
+      assertThat(message, not(containsString("erasure")));
+      assertThat(message, not(containsString("see TupleTag Javadoc")));
+      // Instead, expect output suggesting other possible fixes.
+      assertThat(message,
+          containsString("Building a Coder using a registered CoderFactory failed"));
+      assertThat(message,
+          containsString("Building a Coder from the @DefaultCoder annotation failed"));
+      assertThat(message,
+          containsString("Building a Coder from the fallback CoderProvider failed"));
+      return;
+    }
+    fail("Should have thrown IllegalStateException due to failure to infer a coder.");
   }
 }
 

From da2ac4f1948c4d890921d90fae18977aa970bd93 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 21 Sep 2015 13:21:41 -0700
Subject: [PATCH 1031/1541] Add MapElements transform

----Release Notes----
Added new MapElements transforms, which is simpler to use than the full
power of ParDo. This transform interoperates with Java 8 lambdas.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103575639
---
 .../dataflow/sdk/transforms/MapElements.java  | 113 ++++++++++++++++++
 .../sdk/transforms/MapElementsTest.java       | 104 ++++++++++++++++
 .../sdk/transforms/MapElementsJava8Test.java  |  54 +++++++++
 3 files changed, 271 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapElements.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapElementsTest.java
 create mode 100644 sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/MapElementsJava8Test.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapElements.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapElements.java
new file mode 100644
index 0000000000000..c56934f4a5cd0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapElements.java
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+/**
+ * {@code PTransform}s for mapping a simple function over the elements of a {@link PCollection}.
+ */
+public class MapElements<InputT, OutputT>
+extends PTransform<PCollection<InputT>, PCollection<OutputT>> {
+
+  /**
+   * For a {@code SerializableFunction<InputT, OutputT>} {@code fn} and output type descriptor,
+   * returns a {@code PTransform} that takes an input {@code PCollection<InputT>} and returns
+   * a {@code PCollection<OutputT>} containing {@code fn.apply(v)} for every element {@code v} in
+   * the input.
+   *
+   * <p>Example of use in Java 8:
+   * <pre>{@code
+   * PCollection<Integer> wordLengths = words.apply(
+   *     MapElements.via((String word) -> word.length())
+   *         .withOutputType(new TypeDescriptor<Integer>() {});
+   * }</pre>
+   *
+   * <p>In Java 7, the overload {@link #via(SimpleFunction)} is more concise as the output type
+   * descriptor need not be provided.
+   */
+  public static <InputT, OutputT> MissingOutputTypeDescriptor<InputT, OutputT>
+  via(SerializableFunction<InputT, OutputT> fn) {
+    return new MissingOutputTypeDescriptor<>(fn);
+  }
+
+  /**
+   * For a {@code SimpleFunction<InputT, OutputT>} {@code fn}, returns a {@code PTransform} that
+   * takes an input {@code PCollection<InputT>} and returns a {@code PCollection<OutputT>}
+   * containing {@code fn.apply(v)} for every element {@code v} in the input.
+   *
+   * <p>This overload is intended primarily for use in Java 7. In Java 8, the overload
+   * {@link #via(SerializableFunction)} supports use of lambda for greater concision.
+   *
+   * <p>Example of use in Java 7:
+   * <pre>{@code
+   * PCollection<String> words = ...;
+   * PCollection<Integer> wordsPerLine = words.apply(MapElements.via(
+   *     new SimpleFunction<String, Integer>() {
+   *       public Integer apply(String word) {
+   *         return word.length();
+   *       }
+   *     }));
+   * }</pre>
+   */
+  public static <InputT, OutputT> MapElements<InputT, OutputT>
+  via(final SimpleFunction<InputT, OutputT> fn) {
+    return new MapElements<>(fn, fn.getOutputTypeDescriptor());
+  }
+
+  /**
+   * An intermediate builder for a {@link MapElements} transform. To complete the transform, provide
+   * an output type descriptor to {@link MissingOutputTypeDescriptor#withOutputType}. See
+   * {@link #via(SerializableFunction)} for a full example of use.
+   */
+  public static final class MissingOutputTypeDescriptor<InputT, OutputT> {
+
+    private final SerializableFunction<InputT, OutputT> fn;
+
+    private MissingOutputTypeDescriptor(SerializableFunction<InputT, OutputT> fn) {
+      this.fn = fn;
+    }
+
+    public MapElements<InputT, OutputT> withOutputType(TypeDescriptor<OutputT> outputType) {
+      return new MapElements<>(fn, outputType);
+    }
+  }
+
+  ///////////////////////////////////////////////////////////////////
+
+  private final SerializableFunction<InputT, OutputT> fn;
+  private final TypeDescriptor<OutputT> outputType;
+
+  private MapElements(
+      SerializableFunction<InputT, OutputT> fn,
+      TypeDescriptor<OutputT> outputType) {
+    this.fn = fn;
+    this.outputType = outputType;
+  }
+
+  @Override
+  public PCollection<OutputT> apply(PCollection<InputT> input) {
+    return input.apply(ParDo.named("Map").of(new DoFn<InputT, OutputT>() {
+      private static final long serialVersionUID = 0L;
+      @Override
+      public void processElement(ProcessContext c) {
+        c.output(fn.apply(c.element()));
+      }
+    })).setTypeDescriptorInternal(outputType);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapElementsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapElementsTest.java
new file mode 100644
index 0000000000000..99607a26b787c
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapElementsTest.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+
+/**
+ * Tests for {@link MapElements}.
+ */
+@RunWith(JUnit4.class)
+public class MapElementsTest implements Serializable {
+
+  @Rule
+  public transient ExpectedException thrown = ExpectedException.none();
+
+  /**
+   * Basic test of {@link MapElements} with a {@link SimpleFunction}.
+   */
+  @Test
+  public void testMapBasic() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3))
+        .apply(MapElements.via(new SimpleFunction<Integer, Integer>() {
+          @Override
+          public Integer apply(Integer input) {
+            return -input;
+          }
+        }));
+
+    DataflowAssert.that(output).containsInAnyOrder(-2, -1, -3);
+    pipeline.run();
+  }
+
+  /**
+   * Basic test of {@link MapElements} with a {@link SerializableFunction}. This style is
+   * generally discouraged in Java 7, in favor of {@link SimpleFunction}.
+   */
+  @Test
+  public void testMapBasicSerializableFunction() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3))
+        .apply(MapElements.via(new SerializableFunction<Integer, Integer>() {
+          @Override
+          public Integer apply(Integer input) {
+            return -input;
+          }
+        }).withOutputType(new TypeDescriptor<Integer>() {}));
+
+    DataflowAssert.that(output).containsInAnyOrder(-2, -1, -3);
+    pipeline.run();
+  }
+
+  /**
+   * Tests that when built with a concrete subclass of {@link SimpleFunction}, the type descriptor
+   * of the output reflects its static type.
+   */
+  @Test
+  public void testSimpleFunctionOutputTypeDescriptor() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<String> output = pipeline
+        .apply(Create.of("hello"))
+        .apply(MapElements.via(new SimpleFunction<String, String>() {
+          @Override
+          public String apply(String input) {
+            return input;
+          }
+        }));
+    assertThat(output.getTypeDescriptor(),
+        equalTo((TypeDescriptor<String>) new TypeDescriptor<String>() {}));
+    assertThat(pipeline.getCoderRegistry().getDefaultCoder(output.getTypeDescriptor()),
+        equalTo(pipeline.getCoderRegistry().getDefaultCoder(new TypeDescriptor<String>() {})));
+  }
+}
diff --git a/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/MapElementsJava8Test.java b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/MapElementsJava8Test.java
new file mode 100644
index 0000000000000..619a93f48f6dd
--- /dev/null
+++ b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/MapElementsJava8Test.java
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+
+/**
+ * Java 8 tests for {@link MapElements}.
+ */
+@RunWith(JUnit4.class)
+public class MapElementsJava8Test implements Serializable {
+
+  /**
+   * Basic test of {@link MapElements} with a lambda (which is instantiated as a
+   * {@link SerializableFunction}).
+   */
+  @Test
+  public void testMapBasic() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3))
+        .apply(MapElements
+            // Note that the type annotation is required (for Java, not for Dataflow)
+            .via((Integer i) -> i * 2)
+            .withOutputType(new TypeDescriptor<Integer>() {}));
+
+    DataflowAssert.that(output).containsInAnyOrder(6, 2, 4);
+    pipeline.run();
+  }
+}

From 4e559f98bdd726517c42d8e8df1e05c851d95472 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 21 Sep 2015 13:22:18 -0700
Subject: [PATCH 1032/1541] Fix typos in FlatMapElements javadoc

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103575691
---
 .../cloud/dataflow/sdk/transforms/FlatMapElements.java | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElements.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElements.java
index c2fe19f71728d..128650f1d2710 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElements.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElements.java
@@ -35,10 +35,8 @@ public class FlatMapElements<InputT, OutputT>
    *
    * <p>Example of use in Java 8:
    * <pre>{@code
-   * import static com.google.cloud.dataflow.sdk.values.TypeDescriptors.strings;
-   *
    * PCollection<String> words = lines.apply(
-   *     FlatMap.via((String line) -> line.split(...))
+   *     FlatMapElements.via((String line) -> Arrays.asList(line.split(" ")))
    *         .withOutputType(new TypeDescriptor<String>(){});
    * }</pre>
    *
@@ -62,10 +60,10 @@ public class FlatMapElements<InputT, OutputT>
    * <p>Example of use in Java 7:
    * <pre>{@code
    * PCollection<String> lines = ...;
-   * PCollection<String> words = lines.apply(FlatMap.via(
-   *     new SimpleFunction<String, Integer>() {
+   * PCollection<String> words = lines.apply(FlatMapElements.via(
+   *     new SimpleFunction<String, List<String>>() {
    *       public Integer apply(String line) {
-   *         return line.split(...).length;
+   *         return Arrays.asList(line.split(" "));
    *       }
    *     });
    * }</pre>

From 4e22ffa4932f4f002886d694d1e7cae84aa60412 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Mon, 21 Sep 2015 15:33:51 -0700
Subject: [PATCH 1033/1541] Specifies type parameter in View.asX() javadoc
 examples

It is necessary for Java's type inference to work properly.
----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103588933
---
 .../cloud/dataflow/sdk/transforms/ParDo.java  |  2 +-
 .../cloud/dataflow/sdk/transforms/View.java   | 20 +++++++++----------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 59684f5fa0c19..965d3cd77a26a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -147,7 +147,7 @@
  * PCollection<String> words = ...;
  * PCollection<Integer> maxWordLengthCutOff = ...; // Singleton PCollection
  * final PCollectionView<Integer> maxWordLengthCutOffView =
- *     maxWordLengthCutOff.apply(View.asSingleton());
+ *     maxWordLengthCutOff.apply(View.<Integer>asSingleton());
  * PCollection<String> wordsBelowCutOff =
  *     words.apply(ParDo.withSideInput(maxWordLengthCutOffView)
  *                      .of(new DoFn<String, String>() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index a014798fb5a6d..67dc1ed8097a4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -45,7 +45,7 @@
  * {@code
  * PCollectionView<T> output = someOtherPCollection
  *     .apply(Combine.globally(...))
- *     .apply(View.asSingleton());
+ *     .apply(View.<T>asSingleton());
  * }
  * </pre>
  *
@@ -56,7 +56,7 @@
  * <pre>
  * {@code
  * PCollectionView<List<T>> output =
- *    smallPCollection.apply(View.asList());
+ *    smallPCollection.apply(View.<T>asList());
  * }
  * </pre>
  *
@@ -67,7 +67,7 @@
  * <pre>
  * {@code
  * PCollectionView<Map<K, V> output =
- *     somePCollection.apply(View.asMap());
+ *     somePCollection.apply(View.<K, V>asMap());
  * }
  * </pre>
  *
@@ -77,7 +77,7 @@
  * <pre>
  * {@code
  * PCollectionView<Map<K, Iterable<V>> output =
- *     somePCollection.apply(View.asMap());
+ *     somePCollection.apply(View.<K, Iterable<V>>asMap());
  * }
  * </pre>
  *
@@ -87,7 +87,7 @@
  * <pre>
  * {@code
  * PCollectionView<Iterable<T>> output =
- *     somePCollection.apply(View.asIterable());
+ *     somePCollection.apply(View.<T>asIterable());
  * }
  * </pre>
  *
@@ -107,7 +107,7 @@
  * PCollection<UrlVisit> urlVisits = ... // very large collection
  * final PCollectionView<Map<URL, Page>> = urlToPage
  *     .apply(WithKeys.of( ... )) // extract the URL from the page
- *     .apply(View.asMap());
+ *     .apply(View.<URL, Page>asMap());
  *
  * PCollection PageVisits = urlVisits
  *     .apply(ParDo.withSideInputs(urlToPage)
@@ -141,7 +141,7 @@ private View() { }
    * CombineFn<InputT, OutputT> yourCombineFn = ...
    * PCollectionView<OutputT> output = input
    *     .apply(Combine.globally(yourCombineFn))
-   *     .apply(View.asSingleton());
+   *     .apply(View.<OutputT>asSingleton());
    * }</pre>
    *
    * <p>If the input {@link PCollection} is empty,
@@ -191,7 +191,7 @@ public static <T> AsIterable<T> asIterable() {
    * CombineFn<V, OutputT> yourCombineFn = ...
    * PCollectionView<Map<K, OutputT>> output = input
    *     .apply(Combine.perKey(yourCombineFn.<K>asKeyedFn()))
-   *     .apply(View.asMap());
+   *     .apply(View.<K, OutputT>asMap());
    * }</pre>
    *
    * <p>Currently, the resulting map is required to fit into memory.
@@ -210,7 +210,7 @@ public static <K, V> AsMap<K, V> asMap() {
    * <pre>
    * {@code
    * PCollection<KV<K, V>> input = ... // maybe more than one occurrence of a some keys
-   * PCollectionView<Map<K, V>> output = input.apply(View.asMultimap());
+   * PCollectionView<Map<K, V>> output = input.apply(View.<K, V>asMultimap());
    * }</pre>
    *
    * <p>Currently, the resulting map is required to fit into memory.
@@ -369,7 +369,7 @@ public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
    * {@code
    * PCollectionView<Map<K, OutputT>> input
    *     .apply(Combine.perKey(myCombineFunction))
-   *     .apply(View.asMap());
+   *     .apply(View.<K, OutputT>asMap());
    * }</pre>
    *
    * <p>Instantiate via {@link View#asMap}.

From a3a9df547eedbccdf12a4443359825fc7e72b4a5 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 22 Sep 2015 14:53:32 -0700
Subject: [PATCH 1034/1541] Make ReaderFactory a real class; remove some
 reflection

This refactor has no observable change, given that the reflection
was not actually part of our public programming model.

Previously, ReaderFactory was actually a global registry mapping
service-side class names to SDK-side classes. These classes were
to "implement" a particular interface via their static methods.

This change makes ReaderFactory actually express that interface,
and provides ReaderFactory.Registry encapsulating the default
behavior of maintaining a lightweight registry, previously in
a global variable.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103684318
---
 .../BasicSerializableSourceFormat.java        |  48 +++-
 .../sdk/runners/worker/AvroReaderFactory.java |  25 ++-
 .../runners/worker/BigQueryReaderFactory.java |  25 ++-
 .../sdk/runners/worker/ConcatReader.java      |  31 ++-
 .../runners/worker/ConcatReaderFactory.java   |  58 ++++-
 .../worker/GroupingShuffleReaderFactory.java  |  47 ++--
 .../runners/worker/InMemoryReaderFactory.java |  21 +-
 .../worker/MapTaskExecutorFactory.java        |  91 +++++++-
 .../PartitioningShuffleReaderFactory.java     |  37 ++-
 .../sdk/runners/worker/PubsubReader.java      |  30 ++-
 .../sdk/runners/worker/ReaderFactory.java     | 212 +++++++++++-------
 .../sdk/runners/worker/SideInputUtils.java    |   8 +-
 .../sdk/runners/worker/TextReaderFactory.java |  39 ++--
 .../worker/UngroupedShuffleReaderFactory.java |  26 ++-
 .../worker/UngroupedWindmillReader.java       |  38 +++-
 .../worker/WindowingWindmillReader.java       |  46 +++-
 .../dataflow/sdk/util/CloudSourceUtils.java   |   7 +-
 .../BasicSerializableSourceFormatTest.java    |  34 +--
 .../runners/worker/AvroReaderFactoryTest.java |   9 +-
 .../worker/BigQueryReaderFactoryTest.java     |  19 +-
 .../worker/ConcatReaderFactoryTest.java       |  10 +-
 .../sdk/runners/worker/ConcatReaderTest.java  |  87 ++++---
 .../worker/InMemoryReaderFactoryTest.java     |   8 +-
 .../worker/MapTaskExecutorFactoryTest.java    |  71 ++++--
 .../sdk/runners/worker/ReaderFactoryTest.java |  53 +++--
 .../worker/ShuffleReaderFactoryTest.java      |   4 +-
 .../runners/worker/TextReaderFactoryTest.java |   4 +-
 .../worker/WindowingWindmillReaderTest.java   |   8 +-
 28 files changed, 780 insertions(+), 316 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
index 84fa354f7894b..026d6c49dea53 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
@@ -50,6 +50,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory;
 import com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils;
 import com.google.cloud.dataflow.sdk.runners.worker.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -59,6 +60,7 @@
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
 import com.google.cloud.dataflow.sdk.values.PValue;
@@ -77,6 +79,8 @@
 import java.util.Map;
 import java.util.NoSuchElementException;
 
+import javax.annotation.Nullable;
+
 /**
  * A helper class for supporting sources defined as {@code Source}.
  *
@@ -168,27 +172,47 @@ public OperationResponse performSourceOperation(OperationRequest request) throws
   }
 
   /**
-   * Factory method allowing this class to satisfy the implicit contract of
-   * {@link com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory}.
+   * Factory to create a {@link BasicSerializableSourceFormat} from a Dataflow API
+   * source specification.
    */
-  public static <T> Reader<WindowedValue<T>> create(
-      final PipelineOptions options, final CloudObject spec, Coder<WindowedValue<T>> coder,
+  public static class Factory implements ReaderFactory {
+    @Override
+    public Reader<?> create(
+        CloudObject spec,
+        @Nullable Coder<?> coder,
+        @Nullable PipelineOptions options,
+        @Nullable ExecutionContext executionContext,
+        @Nullable CounterSet.AddCounterMutator addCounterMutator,
+        @Nullable String operationName)
+            throws Exception {
+      // The parameter "coder" is deliberately never used. It is an artifact of ReaderFactory:
+      // some readers need a coder, some don't (i.e. for some it doesn't even make sense),
+      // but ReaderFactory passes it to all readers anyway.
+      return BasicSerializableSourceFormat.create(spec, options, executionContext);
+    }
+  }
+
+  public static Reader<WindowedValue<?>> create(
+      final CloudObject spec,
+      final PipelineOptions options,
       ExecutionContext executionContext) throws Exception {
-    // The parameter "coder" is deliberately never used. It is an artifact of ReaderFactory:
-    // some readers need a coder, some don't (i.e. for some it doesn't even make sense),
-    // but ReaderFactory passes it to all readers anyway.
-    final Source<T> source = (Source<T>) deserializeFromCloudSource(spec);
+
+    @SuppressWarnings("unchecked")
+    final Source<Object> source = (Source<Object>) deserializeFromCloudSource(spec);
+
     if (source instanceof BoundedSource) {
-      return new Reader<WindowedValue<T>>() {
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      Reader<WindowedValue<?>> reader = (Reader) new Reader<WindowedValue<Object>>() {
         @Override
-        public Reader.ReaderIterator<WindowedValue<T>> iterator() throws IOException {
+        public Reader.ReaderIterator<WindowedValue<Object>> iterator() throws IOException {
           return new BoundedReaderIterator<>(
-              ((BoundedSource<T>) source).createReader(options));
+              ((BoundedSource<Object>) source).createReader(options));
         }
       };
+      return reader;
     } else if (source instanceof UnboundedSource) {
       @SuppressWarnings({"unchecked", "rawtypes"})
-      Reader<WindowedValue<T>> reader = new UnboundedReader(
+      Reader<WindowedValue<?>> reader = (Reader) new UnboundedReader<Object>(
           options, spec, (StreamingModeExecutionContext) executionContext);
       return reader;
     } else {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
index 1f3d320e16c5d..4b31ebc02dee5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
@@ -25,28 +25,37 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.ValueOnlyWindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
+import javax.annotation.Nullable;
+
 /**
  * Creates an AvroReader from a CloudObject spec.
  */
-@SuppressWarnings("rawtypes")
-public class AvroReaderFactory {
-  // Do not instantiate.
-  private AvroReaderFactory() {}
+public class AvroReaderFactory implements ReaderFactory {
+
+  public AvroReaderFactory() {}
 
-  public static <T> Reader<T> create(PipelineOptions options, CloudObject spec, Coder<T> coder,
-      ExecutionContext executionContext) throws Exception {
+  @Override
+  public Reader<?> create(
+      CloudObject spec,
+      @Nullable Coder<?> coder,
+      @Nullable PipelineOptions options,
+      @Nullable ExecutionContext executionContext,
+      @Nullable CounterSet.AddCounterMutator addCounterMutator,
+      @Nullable String operationName)
+          throws Exception {
     return create(spec, coder);
   }
 
-  static <T> Reader<T> create(CloudObject spec, Coder<T> coder) throws Exception {
+  Reader<?> create(CloudObject spec, Coder<?> coder) throws Exception {
     String filename = getString(spec, PropertyNames.FILENAME);
     Long startOffset = getLong(spec, PropertyNames.START_OFFSET, null);
     Long endOffset = getLong(spec, PropertyNames.END_OFFSET, null);
 
     if (coder instanceof ValueOnlyWindowedValueCoder) {
-      return (Reader<T>) new AvroReader(
+      return new AvroReader<>(
           filename, startOffset, endOffset, (ValueOnlyWindowedValueCoder<?>) coder);
     } else {
       return new AvroByteReader<>(filename, startOffset, endOffset, coder);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
index 42467cdb3d54a..6a3263a248a9c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
@@ -26,15 +26,32 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+
+import javax.annotation.Nullable;
 
 /**
  * Creates a BigQueryReader from a {@link CloudObject} spec.
  */
-public class BigQueryReaderFactory {
-  // Do not instantiate.
-  private BigQueryReaderFactory() {}
+public class BigQueryReaderFactory implements ReaderFactory {
+
+  @Override
+  public Reader<?> create(
+      CloudObject spec,
+      @Nullable Coder<?> coder,
+      @Nullable PipelineOptions options,
+      @Nullable ExecutionContext executionContext,
+      @Nullable CounterSet.AddCounterMutator addCounterMutator,
+      @Nullable String operationName)
+          throws Exception {
+    return createTyped(spec, coder, options, executionContext);
+  }
 
-  public static BigQueryReader create(PipelineOptions options, CloudObject spec, Coder<?> coder,
+  public BigQueryReader createTyped(
+      CloudObject spec,
+      Coder<?> coder,
+      PipelineOptions options,
       ExecutionContext executionContext) throws Exception {
     String query = getString(spec, PropertyNames.BIGQUERY_QUERY, null);
     if (query != null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
index a1d475c851170..b4f87f04f63b8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
@@ -67,15 +67,20 @@ public class ConcatReader<T> extends Reader<T> {
   private final ExecutionContext executionContext;
   private final CounterSet.AddCounterMutator addCounterMutator;
   private final String operationName;
+  private final ReaderFactory.Registry registry;
 
   /**
    * Create a {@code ConcatReader} using a given list of encoded {@code Source}s.
    */
   public ConcatReader(
-      PipelineOptions options, ExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator, String operationName,
+      ReaderFactory.Registry registry,
+      PipelineOptions options,
+      ExecutionContext executionContext,
+      CounterSet.AddCounterMutator addCounterMutator,
+      String operationName,
       List<Source> sources) {
     Preconditions.checkNotNull(sources);
+    this.registry = registry;
     this.sources = sources;
     this.options = options;
     this.executionContext = executionContext;
@@ -89,8 +94,13 @@ public Iterator<Source> getSources() {
 
   @Override
   public ReaderIterator<T> iterator() throws IOException {
-    return new ConcatIterator<T>(options, executionContext, addCounterMutator,
-        operationName, sources);
+    return new ConcatIterator<T>(
+        registry,
+        options,
+        executionContext,
+        addCounterMutator,
+        operationName,
+        sources);
   }
 
   private static class ConcatIterator<T> extends AbstractBoundedReaderIterator<T> {
@@ -103,11 +113,16 @@ private static class ConcatIterator<T> extends AbstractBoundedReaderIterator<T>
     private final String operationName;
     private final OffsetRangeTracker rangeTracker;
     private boolean isAtFirstRecordInCurrentSource = true;
+    private final ReaderFactory.Registry registry;
 
     public ConcatIterator(
-        PipelineOptions options, ExecutionContext executionContext,
-        CounterSet.AddCounterMutator addCounterMutator, String operationName,
+        ReaderFactory.Registry registry,
+        PipelineOptions options,
+        ExecutionContext executionContext,
+        CounterSet.AddCounterMutator addCounterMutator,
+        String operationName,
         List<Source> sources) {
+      this.registry = registry;
       this.sources = sources;
       this.options = options;
       this.executionContext = executionContext;
@@ -137,8 +152,8 @@ protected boolean hasNextImpl() throws IOException {
         try {
           @SuppressWarnings("unchecked")
           Reader<T> currentReader =
-              (Reader<T>) ReaderFactory.create(options, currentSource, executionContext,
-                  addCounterMutator, operationName);
+              (Reader<T>) registry.create(
+                  currentSource, options, executionContext, addCounterMutator, operationName);
           currentIterator = currentReader.iterator();
           isAtFirstRecordInCurrentSource = true;
         } catch (Exception e) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java
index 165af87ac37e1..e090d416b7473 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java
@@ -35,17 +35,61 @@
 import java.util.List;
 import java.util.Map;
 
+import javax.annotation.Nullable;
+
 /**
  * Creates an {@link ConcatReader} from a {@link CloudObject} spec.
  */
-public class ConcatReaderFactory {
-  public static <T> Reader<T> create(PipelineOptions options, CloudObject spec,
-      @SuppressWarnings("unused") Coder<T> coder, ExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator, String operationName)
-      throws Exception {
+public class ConcatReaderFactory implements ReaderFactory {
+
+  private final ReaderFactory.Registry registry;
+
+  private ConcatReaderFactory(ReaderFactory.Registry registry) {
+    this.registry = registry;
+  }
+
+  /**
+   * Returns a new {@link ConcatReaderFactory} that will use the default
+   * {@link ReaderFactory.Registry} to create sub-{@link Reader} instances.
+   */
+  public static ConcatReaderFactory withDefaultRegistry() {
+    return withRegistry(ReaderFactory.Registry.defaultRegistry());
+  }
+
+  /**
+   * Returns a new {@link ConcatReaderFactory} that will use the provided
+   * {@link ReaderFactory.Registry} to create sub-{@link Reader} instances.
+   */
+  public static ConcatReaderFactory withRegistry(ReaderFactory.Registry registry) {
+    return new ConcatReaderFactory(registry);
+  }
+
+  @Override
+  public Reader<?> create(
+      CloudObject spec,
+      @Nullable Coder<?> coder,
+      @Nullable PipelineOptions options,
+      @Nullable ExecutionContext executionContext,
+      @Nullable CounterSet.AddCounterMutator addCounterMutator,
+      @Nullable String operationName)
+          throws Exception {
+    @SuppressWarnings("unchecked")
+    Coder<Object> typedCoder = (Coder<Object>) coder;
+    return createTyped(
+        spec, typedCoder, options, executionContext, addCounterMutator, operationName);
+  }
+
+  public <T> Reader<T> createTyped(
+      CloudObject spec,
+      @Nullable Coder<T> coder,
+      @Nullable PipelineOptions options,
+      @Nullable ExecutionContext executionContext,
+      @Nullable CounterSet.AddCounterMutator addCounterMutator,
+      @Nullable String operationName)
+          throws Exception {
     List<Source> sources = getSubSources(spec);
-    return new ConcatReader<T>(options, executionContext, addCounterMutator,
-                               operationName, sources);
+    return new ConcatReader<T>(
+        registry, options, executionContext, addCounterMutator, operationName, sources);
   }
 
   private static List<Source> getSubSources(CloudObject spec) throws Exception {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
index 5593030c02c31..2bb767166f598 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
@@ -27,31 +27,46 @@
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.values.KV;
 
+import javax.annotation.Nullable;
+
 /**
  * Creates a GroupingShuffleReader from a CloudObject spec.
  */
-public class GroupingShuffleReaderFactory {
-  // Do not instantiate.
-  private GroupingShuffleReaderFactory() {}
-
-  public static <K, V> GroupingShuffleReader<K, V> create(PipelineOptions options, CloudObject spec,
-      Coder<WindowedValue<KV<K, Iterable<V>>>> coder, ExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator, String operationName)
-      throws Exception {
-    return create(options, spec, coder, (BatchModeExecutionContext) executionContext,
-                  addCounterMutator, operationName);
+public class GroupingShuffleReaderFactory implements ReaderFactory {
+
+  @Override
+  public Reader<?> create(
+      CloudObject spec,
+      @Nullable Coder<?> coder,
+      @Nullable PipelineOptions options,
+      @Nullable ExecutionContext executionContext,
+      @Nullable CounterSet.AddCounterMutator addCounterMutator,
+      @Nullable String operationName)
+          throws Exception {
+
+    @SuppressWarnings({"rawtypes", "unchecked"})
+    Coder<WindowedValue<KV<Object, Iterable<Object>>>> typedCoder = (Coder) coder;
+    return createTyped(
+        spec, typedCoder, options, executionContext, addCounterMutator, operationName);
   }
 
-  static <K, V> GroupingShuffleReader<K, V> create(PipelineOptions options, CloudObject spec,
-      Coder<WindowedValue<KV<K, Iterable<V>>>> coder, BatchModeExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator, String operationName)
-      throws Exception {
-    return new GroupingShuffleReader<>(options,
+  public <K, V> GroupingShuffleReader<K, V> createTyped(
+      CloudObject spec,
+      @Nullable Coder<WindowedValue<KV<K, Iterable<V>>>> coder,
+      @Nullable PipelineOptions options,
+      @Nullable ExecutionContext executionContext,
+      @Nullable CounterSet.AddCounterMutator addCounterMutator,
+      @Nullable String operationName)
+          throws Exception {
+    return new GroupingShuffleReader<K, V>(options,
         decodeBase64(getString(spec, PropertyNames.SHUFFLE_READER_CONFIG)),
         getString(spec, PropertyNames.START_SHUFFLE_POSITION, null),
-        getString(spec, PropertyNames.END_SHUFFLE_POSITION, null), coder, executionContext,
+        getString(spec, PropertyNames.END_SHUFFLE_POSITION, null),
+        coder,
+        (BatchModeExecutionContext) executionContext,
         addCounterMutator, operationName);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java
index 7c73dd4390bd4..af6facf31521d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java
@@ -24,22 +24,31 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import java.util.Collections;
 
+import javax.annotation.Nullable;
+
 /**
  * Creates an InMemoryReader from a CloudObject spec.
  */
-public class InMemoryReaderFactory {
-  // Do not instantiate.
-  private InMemoryReaderFactory() {}
+public class InMemoryReaderFactory implements ReaderFactory {
 
-  public static <T> InMemoryReader<T> create(PipelineOptions options, CloudObject spec,
-      Coder<T> coder, ExecutionContext executionContext) throws Exception {
+  @Override
+  public Reader<?> create(
+      CloudObject spec,
+      @Nullable Coder<?> coder,
+      @Nullable PipelineOptions options,
+      @Nullable ExecutionContext executionContext,
+      @Nullable CounterSet.AddCounterMutator addCounterMutator,
+      @Nullable String operationName)
+          throws Exception {
     return create(spec, coder);
   }
 
-  static <T> InMemoryReader<T> create(CloudObject spec, Coder<T> coder) throws Exception {
+  <T> InMemoryReader<T> create(CloudObject spec, Coder<T> coder) throws Exception {
     return new InMemoryReader<>(
         getStrings(spec, PropertyNames.ELEMENTS, Collections.<String>emptyList()),
         getLong(spec, PropertyNames.START_INDEX, null),
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index d112f8e8822c8..31a007b1d8f8f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -72,26 +72,62 @@
  * Creates a MapTaskExecutor from a MapTask definition.
  */
 public class MapTaskExecutorFactory {
+
+  /**
+   * Creates a new MapTaskExecutor from the given MapTask definition using the default
+   * {@link ReaderFactory.Registry}.
+   */
+  public static MapTaskExecutor create(
+      PipelineOptions options,
+      MapTask mapTask,
+      DataflowExecutionContext context,
+      CounterSet counters,
+      StateSampler stateSampler) throws Exception {
+    return create(
+        options,
+        mapTask,
+        ReaderFactory.Registry.defaultRegistry(),
+        context,
+        counters,
+        stateSampler);
+  }
+
   /**
-   * Creates a new MapTaskExecutor from the given MapTask definition.
+   * Creates a new MapTaskExecutor from the given MapTask definition using the provided
+   * {@link ReaderFactory.Registry}.
    */
   public static MapTaskExecutor create(
-      PipelineOptions options, MapTask mapTask, DataflowExecutionContext context,
-      CounterSet counters, StateSampler stateSampler) throws Exception {
+      PipelineOptions options,
+      MapTask mapTask,
+      ReaderFactory.Registry registry,
+      DataflowExecutionContext context,
+      CounterSet counters,
+      StateSampler stateSampler)
+          throws Exception {
+
     List<Operation> operations = new ArrayList<>();
     String counterPrefix = stateSampler.getPrefix();
 
     // Instantiate operations for each instruction in the graph.
     for (ParallelInstruction instruction : mapTask.getInstructions()) {
-      operations.add(createOperation(options, instruction, context, operations, counterPrefix,
-          counters.getAddCounterMutator(), stateSampler));
+      operations.add(createOperation(
+          options,
+          instruction,
+          registry,
+          context,
+          operations,
+          counterPrefix,
+          counters.getAddCounterMutator(),
+          stateSampler));
     }
 
     return new MapTaskExecutor(operations, counters, stateSampler);
+
   }
 
   /**
-   * Creates an Operation from the given ParallelInstruction definition.
+   * Creates an Operation from the given ParallelInstruction definition using the provided
+   * {@link ReaderFactory.Registry}.
    */
   static Operation createOperation(
       PipelineOptions options,
@@ -101,10 +137,42 @@ static Operation createOperation(
       String counterPrefix,
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler)
-      throws Exception {
+          throws Exception {
+    return createOperation(
+        options,
+        instruction,
+        ReaderFactory.Registry.defaultRegistry(),
+        executionContext,
+        priorOperations,
+        counterPrefix,
+        addCounterMutator,
+        stateSampler);
+  }
+
+  /**
+   * Creates an Operation from the given ParallelInstruction definition using the provided
+   * {@link ReaderFactory.Registry}.
+   */
+  static Operation createOperation(
+      PipelineOptions options,
+      ParallelInstruction instruction,
+      ReaderFactory.Registry registry,
+      DataflowExecutionContext executionContext,
+      List<Operation> priorOperations,
+      String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler)
+          throws Exception {
     if (instruction.getRead() != null) {
-      return createReadOperation(options, instruction, executionContext, priorOperations,
-          counterPrefix, addCounterMutator, stateSampler);
+      return createReadOperation(
+          options,
+          instruction,
+          registry,
+          executionContext,
+          priorOperations,
+          counterPrefix,
+          addCounterMutator,
+          stateSampler);
     } else if (instruction.getWrite() != null) {
       return createWriteOperation(options, instruction, executionContext, priorOperations,
           counterPrefix, addCounterMutator, stateSampler);
@@ -125,6 +193,7 @@ static Operation createOperation(
   static ReadOperation createReadOperation(
       PipelineOptions options,
       ParallelInstruction instruction,
+      ReaderFactory.Registry registry,
       DataflowExecutionContext executionContext,
       @SuppressWarnings("unused") List<Operation> priorOperations,
       String counterPrefix,
@@ -134,8 +203,8 @@ static ReadOperation createReadOperation(
     ReadInstruction read = instruction.getRead();
 
     String operationName = instruction.getSystemName();
-    Reader<?> reader = ReaderFactory.create(options, read.getSource(), executionContext,
-        addCounterMutator, operationName);
+    Reader<?> reader = registry.create(
+        read.getSource(), options, executionContext, addCounterMutator, operationName);
 
     OutputReceiver[] receivers =
         createOutputReceivers(instruction, counterPrefix, addCounterMutator, stateSampler, 1);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java
index d634d21820acf..b14ded5763e4c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java
@@ -18,6 +18,7 @@
 
 import static com.google.api.client.util.Base64.decodeBase64;
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+import static com.google.common.base.Preconditions.checkNotNull;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
@@ -25,21 +26,43 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.values.KV;
 
+import javax.annotation.Nullable;
+
 /**
  * Creates a PartitioningShuffleReader from a CloudObject spec.
  */
-public class PartitioningShuffleReaderFactory {
-  // Do not instantiate.
-  private PartitioningShuffleReaderFactory() {}
+public class PartitioningShuffleReaderFactory implements ReaderFactory {
+
+  @Override
+  public Reader<?> create(
+      CloudObject spec,
+      @Nullable Coder<?> coder,
+      @Nullable PipelineOptions options,
+      @Nullable ExecutionContext executionContext,
+      @Nullable CounterSet.AddCounterMutator addCounterMutator,
+      @Nullable String operationName)
+          throws Exception {
+    checkNotNull(options, "PipelineOptions must not be null in PartitioningShuffleReaderFactory");
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    Coder<WindowedValue<KV<Object, Object>>> typedCoder =
+        (Coder<WindowedValue<KV<Object, Object>>>) coder;
+    return createTyped(spec, options, typedCoder);
+  }
 
-  public static <K, V> PartitioningShuffleReader<K, V> create(PipelineOptions options,
-      CloudObject spec, Coder<WindowedValue<KV<K, V>>> coder, ExecutionContext executionContext)
+  public <K, V> PartitioningShuffleReader<K, V> createTyped(
+      CloudObject spec,
+      PipelineOptions options,
+      Coder<WindowedValue<KV<K, V>>> coder)
       throws Exception {
-    return new PartitioningShuffleReader<K, V>(options,
+    return new PartitioningShuffleReader<K, V>(
+        options,
         decodeBase64(getString(spec, PropertyNames.SHUFFLE_READER_CONFIG)),
         getString(spec, PropertyNames.START_SHUFFLE_POSITION, null),
-        getString(spec, PropertyNames.END_SHUFFLE_POSITION, null), coder);
+        getString(spec, PropertyNames.END_SHUFFLE_POSITION, null),
+        coder);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
index ee56998d18b9f..260cf07437704 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.ValueOnlyWindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.joda.time.Instant;
@@ -31,24 +32,36 @@
 import java.io.InputStream;
 import java.util.concurrent.TimeUnit;
 
+import javax.annotation.Nullable;
+
 /**
  * A Reader that receives elements from Pubsub, via a Windmill server.
  */
 class PubsubReader<T> extends Reader<WindowedValue<T>> {
-  private final ValueOnlyWindowedValueCoder coder;
+  private final ValueOnlyWindowedValueCoder<?> coder;
   private StreamingModeExecutionContext context;
 
   PubsubReader(Coder<WindowedValue<T>> coder, StreamingModeExecutionContext context) {
-    this.coder = (ValueOnlyWindowedValueCoder) coder;
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    ValueOnlyWindowedValueCoder<?> typedCoder = (ValueOnlyWindowedValueCoder) coder;
+    this.coder = typedCoder;
     this.context = context;
   }
 
-  public static <T> PubsubReader<T> create(
-      PipelineOptions options,
-      CloudObject spec,
-      Coder<WindowedValue<T>> coder,
-      ExecutionContext context) {
-    return new PubsubReader<>(coder, (StreamingModeExecutionContext) context);
+  static class Factory implements ReaderFactory {
+    @Override
+    public Reader<?> create(
+        CloudObject cloudSourceSpec,
+        @Nullable Coder<?> coder,
+        @Nullable PipelineOptions options,
+        @Nullable ExecutionContext executionContext,
+        @Nullable CounterSet.AddCounterMutator addCounterMutator,
+        @Nullable String operationName)
+        throws Exception {
+      @SuppressWarnings("unchecked")
+      Coder<WindowedValue<Object>> typedCoder = (Coder<WindowedValue<Object>>) coder;
+      return new PubsubReader<>(typedCoder, (StreamingModeExecutionContext) executionContext);
+    }
   }
 
   @Override
@@ -81,6 +94,7 @@ public WindowedValue<T> next() throws IOException {
       long timestampMillis = TimeUnit.MICROSECONDS.toMillis(message.getTimestamp());
       InputStream data = message.getData().newInput();
       notifyElementRead(data.available());
+      @SuppressWarnings("unchecked")
       T value = (T) coder.getValueCoder().decode(data, Coder.Context.OUTER);
       return WindowedValue.timestampedValueInGlobalWindow(value, new Instant(timestampMillis));
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
index fb289f4543c50..dfcf39bfb443a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
@@ -19,118 +19,162 @@
 import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
 import com.google.cloud.dataflow.sdk.util.Serializer;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.collect.Maps;
 
-import java.util.HashMap;
 import java.util.Map;
 
 import javax.annotation.Nullable;
 
 /**
- * Constructs a Reader from a Dataflow API Source definition.
- *
- * <p>A ReaderFactory concrete "subclass" should define a method with the
- * following signature:
- * <pre> {@code
- * static SomeReaderSubclass<T> create(PipelineOptions, CloudObject,
- *                                     Coder<T>, ExecutionContext);
- * } </pre>
+ * Creates a {@link Reader} from a Dataflow API source definition, presented as a
+ * {@link CloudObject}.
  */
-public final class ReaderFactory {
-  // Do not instantiate.
-  private ReaderFactory() {}
-
-  /**
-   * A map from the short names of predefined sources to
-   * their full factory class names.
-   */
-  static Map<String, String> predefinedReaderFactories = new HashMap<>();
-
-  static {
-    predefinedReaderFactories.put("TextSource", TextReaderFactory.class.getName());
-    predefinedReaderFactories.put("AvroSource", AvroReaderFactory.class.getName());
-    predefinedReaderFactories.put(
-        "UngroupedShuffleSource", UngroupedShuffleReaderFactory.class.getName());
-    predefinedReaderFactories.put(
-        "PartitioningShuffleSource", PartitioningShuffleReaderFactory.class.getName());
-    predefinedReaderFactories.put(
-        GroupingShuffleReader.SOURCE_NAME, GroupingShuffleReaderFactory.class.getName());
-    predefinedReaderFactories.put("InMemorySource", InMemoryReaderFactory.class.getName());
-    predefinedReaderFactories.put("BigQuerySource", BigQueryReaderFactory.class.getName());
-    predefinedReaderFactories.put(
-        "com.google.cloud.dataflow.sdk.runners.worker.BucketingWindmillSource",
-        WindowingWindmillReader.class.getName());
-    predefinedReaderFactories.put(
-        "WindowingWindmillReader", WindowingWindmillReader.class.getName());
-    predefinedReaderFactories.put(
-        "com.google.cloud.dataflow.sdk.runners.worker.UngroupedWindmillSource",
-        UngroupedWindmillReader.class.getName());
-    predefinedReaderFactories.put(
-        "UngroupedWindmillReader", UngroupedWindmillReader.class.getName());
-    predefinedReaderFactories.put(
-        "com.google.cloud.dataflow.sdk.runners.worker.PubsubSource",
-        PubsubReader.class.getName());
-    predefinedReaderFactories.put(
-        "PubsubReader", PubsubReader.class.getName());
-    predefinedReaderFactories.put(ConcatReader.SOURCE_NAME, ConcatReaderFactory.class.getName());
-  }
+public interface ReaderFactory {
 
   /**
-   * Creates a Reader from a Dataflow API Source definition.
+   * Creates a {@link Reader} from a Dataflow API source definition, presented as a
+   * {@link CloudObject}.
    *
-   * @throws Exception if the source could not be decoded and
-   * constructed
+   * @throws Exception if a {@link Reader} could not be created
    */
-  public static <T> Reader<T> create(
-      @Nullable PipelineOptions options, Source cloudSource,
+  Reader<?> create(
+      CloudObject cloudSourceSpec,
+      @Nullable Coder<?> coder,
+      @Nullable PipelineOptions options,
       @Nullable ExecutionContext executionContext,
       @Nullable CounterSet.AddCounterMutator addCounterMutator,
       @Nullable String operationName)
-          throws Exception {
-    cloudSource = CloudSourceUtils.flattenBaseSpecs(cloudSource);
+          throws Exception;
+
+  /**
+   * An immutable registry from {@link String} identifiers (provided to the worker by the Dataflow
+   * service) to appropriate {@link ReaderFactory} instances.
+   */
+  public class Registry implements ReaderFactory {
+
+    /**
+     * A {@link Registry} with each {@link ReaderFactory} known to the Dataflow worker already
+     * registered.
+     */
+    public static Registry defaultRegistry() {
+      Map<String, ReaderFactory> factories = Maps.newHashMap();
+
+      factories.put("TextSource", TextReaderFactory.getInstance());
+      factories.put("AvroSource", new AvroReaderFactory());
+      factories.put("UngroupedShuffleSource", new UngroupedShuffleReaderFactory());
+      factories.put("PartitioningShuffleSource", new PartitioningShuffleReaderFactory());
+      factories.put("GroupingShuffleSource", new GroupingShuffleReaderFactory());
+      factories.put("InMemorySource", new InMemoryReaderFactory());
+      factories.put("BigQuerySource", new BigQueryReaderFactory());
 
-    CloudObject object = CloudObject.fromSpec(cloudSource.getSpec());
+      // Aliases for WindowingWindmillreader
+      factories.put("WindowingWindmillReader", new WindowingWindmillReader.Factory());
+      factories.put("com.google.cloud.dataflow.sdk.runners.worker.WindowingWindmillReader",
+          new WindowingWindmillReader.Factory());
+      factories.put("com.google.cloud.dataflow.sdk.runners.worker.BucketingWindmillSource",
+          new WindowingWindmillReader.Factory());
 
-    String objClassName = object.getClassName();
-    String sourceFactoryClassName = predefinedReaderFactories.get(objClassName);
-    if (sourceFactoryClassName == null) {
-      sourceFactoryClassName = object.getClassName();
+      // Aliases for UngroupedWindmillReader
+      factories.put("UngroupedWindmillReader", new UngroupedWindmillReader.Factory());
+      factories.put("com.google.cloud.dataflow.sdk.runners.worker.UngroupedWindmillSource",
+          new UngroupedWindmillReader.Factory());
+      factories.put("com.google.cloud.dataflow.sdk.runners.worker.UngroupedWindmillReader",
+          new UngroupedWindmillReader.Factory());
+
+      // Aliases for PubsubReader
+      factories.put("PubsubReader", new PubsubReader.Factory());
+      factories.put("com.google.cloud.dataflow.sdk.runners.worker.PubsubSource",
+          new PubsubReader.Factory());
+
+      // Custom sources
+      factories.put(
+          "com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat",
+          new BasicSerializableSourceFormat.Factory());
+
+      return new Registry(factories);
+    }
+
+    /**
+     * Builds a new {@link Registry} with the provided mutable map of initial mappings.
+     *
+     * <p>Owns and mutates the provided map, which must be mutable. This constructor should only be
+     * called by methods in this class that are aware of this requirement and abstract from this
+     * behavior.
+     */
+    private Registry(Map<String, ReaderFactory> factories) {
+      // ConcatReader requires special treatment: Recursive access to the registry since it calls
+      // back to create its sub-readers lazily.
+      this.factories = factories;
+      this.factories.put(ConcatReader.SOURCE_NAME, ConcatReaderFactory.withRegistry(this));
     }
 
-    @SuppressWarnings("rawtypes")
-    Coder coder = null;
-    if (cloudSource.getCodec() != null) {
-      coder = Serializer.deserialize(cloudSource.getCodec(), Coder.class);
+    /**
+     * A map from the short names of predefined sources to the associated {@link ReaderFactory}.
+     */
+    private final Map<String, ReaderFactory> factories;
+
+    /**
+     * Returns a new {@link Registry} with the provided identifier associated with the provided
+     * {@link ReaderFactory}, overriding any existing binding for that identifier.
+     */
+    public Registry register(String readerSpecType, ReaderFactory factory) {
+      Map<String, ReaderFactory> newFactories = Maps.newHashMap();
+      newFactories.putAll(factories);
+      newFactories.put(readerSpecType, factory);
+      return new Registry(newFactories);
     }
-    try {
-      InstanceBuilder<Reader<T>> builder =
-          InstanceBuilder.ofType(new TypeDescriptor<Reader<T>>() {})
-          .fromClassName(sourceFactoryClassName)
-          .fromFactoryMethod("create")
-          .withArg(PipelineOptions.class, options)
-          .withArg(CloudObject.class, object)
-          .withArg(Coder.class, coder)
-          .withArg(ExecutionContext.class, executionContext);
-
-      // These two kinds of sources require two more arguments to create.
-      if (objClassName.equals(GroupingShuffleReader.SOURCE_NAME)
-          || objClassName.equals(ConcatReader.SOURCE_NAME)) {
-        builder
-            .withArg(CounterSet.AddCounterMutator.class, addCounterMutator)
-            .withArg(String.class, operationName);
+
+    /**
+     * Creates a {@link Reader} according to the provided {@code sourceSpec}, by dispatching on
+     * the type of {@link CloudObject} to instantiate.
+     */
+    @Override
+    public Reader<?> create(
+        CloudObject sourceSpec,
+        @Nullable Coder<?> coder,
+        @Nullable PipelineOptions options,
+        @Nullable ExecutionContext executionContext,
+        @Nullable CounterSet.AddCounterMutator addCounterMutator,
+        @Nullable String operationName)
+            throws Exception {
+
+      String objClassName = sourceSpec.getClassName();
+      ReaderFactory readerFactory = factories.get(objClassName);
+      if (readerFactory == null) {
+        throw new IllegalArgumentException(String.format(
+            "Unable to create a Reader: Unknown Reader type in Source specification: %s",
+            objClassName));
       }
+      return readerFactory.create(
+          sourceSpec, coder, options, executionContext, addCounterMutator, operationName);
+    }
 
-      return builder.build();
+    /**
+     * Creates a {@link Reader} from a Dataflow API {@link Source} specification, using the
+     * {@link Coder} contained in the {@link Source} specification.
+     */
+    public Reader<?> create(
+        Source cloudSource,
+        @Nullable PipelineOptions options,
+        @Nullable ExecutionContext executionContext,
+        @Nullable CounterSet.AddCounterMutator addCounterMutator,
+        @Nullable String operationName)
+            throws Exception {
 
-    } catch (ClassNotFoundException exn) {
-      throw new Exception("unable to create a source from " + cloudSource, exn);
+      cloudSource = CloudSourceUtils.flattenBaseSpecs(cloudSource);
+      CloudObject sourceSpec = CloudObject.fromSpec(cloudSource.getSpec());
+      Coder<?> coder = null;
+      if (cloudSource.getCodec() != null) {
+        coder = Serializer.deserialize(cloudSource.getCodec(), Coder.class);
+      }
+      return create(sourceSpec, coder, options, executionContext, addCounterMutator, operationName);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
index ce236b5a2f3fe..6dd37fbe1abd9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
@@ -95,10 +95,10 @@ private static Iterable<Object> readSideInputSource(
       Observer observer,
       ExecutionContext executionContext)
       throws Exception {
-    Reader<Object> reader = ReaderFactory.create(options, sideInputSource, executionContext,
-                                                 // We don't do shuffle sanity check on side inputs,
-                                                 // as they don't have to be read completely.
-                                                 null, null);
+    // We don't do shuffle sanity check on side inputs, as they don't have to be read completely.
+    @SuppressWarnings("unchecked")
+    Reader<Object> reader = (Reader<Object>) ReaderFactory.Registry.defaultRegistry().create(
+        sideInputSource, options, executionContext, null, null);
     if (observer != null) {
       reader.addObserver(observer);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java
index 03990c3590149..db19d5e7c0513 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java
@@ -20,38 +20,43 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.getLong;
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 
-import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.Serializer;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+
+import javax.annotation.Nullable;
 
 /**
  * Creates a TextReader from a CloudObject spec.
  */
-public class TextReaderFactory {
-  // Do not instantiate.
-  private TextReaderFactory() {}
+public class TextReaderFactory implements ReaderFactory {
 
-  public static <T> TextReader<T> create(PipelineOptions options, CloudObject spec, Coder<T> coder,
-      ExecutionContext executionContext) throws Exception {
-    return create(spec, coder);
-  }
+  private static final TextReaderFactory INSTANCE = new TextReaderFactory();
 
-  static <T> TextReader<T> create(CloudObject spec, Coder<T> coder) throws Exception {
-    return create(spec, coder, true);
+  public static TextReaderFactory getInstance() {
+    return INSTANCE;
   }
 
-  public static <T> TextReader<T> create(Source spec) throws Exception {
-    return create(
-        CloudObject.fromSpec(spec.getSpec()), Serializer.deserialize(spec.getCodec(), Coder.class));
+  private TextReaderFactory() {}
+
+  @Override
+  public Reader<?> create(
+      CloudObject spec,
+      @Nullable Coder<?> coder,
+      @Nullable PipelineOptions options,
+      @Nullable ExecutionContext executionContext,
+      @Nullable CounterSet.AddCounterMutator addCounterMutator,
+      @Nullable String operationName)
+          throws Exception {
+    return create(spec, coder);
   }
 
-  static <T> TextReader<T> create(CloudObject spec, Coder<T> coder, boolean useDefaultBufferSize)
-      throws Exception {
+  public <T> TextReader<T> create(CloudObject spec, Coder<T> coder) throws Exception {
     String filenameOrPattern = getString(spec, PropertyNames.FILENAME, null);
     if (filenameOrPattern == null) {
       filenameOrPattern = getString(spec, PropertyNames.FILEPATTERN, null);
@@ -60,7 +65,7 @@ static <T> TextReader<T> create(CloudObject spec, Coder<T> coder, boolean useDef
         getBoolean(spec, PropertyNames.STRIP_TRAILING_NEWLINES, true),
         getLong(spec, PropertyNames.START_OFFSET, null),
         getLong(spec, PropertyNames.END_OFFSET, null), coder,
-        useDefaultBufferSize,
+        true /* useDefaultBufferSize */,
         Enum.valueOf(TextIO.CompressionType.class,
             getString(spec, PropertyNames.COMPRESSION_TYPE, "AUTO")));
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java
index bf6fa2b9550eb..d0d75baeb5b53 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java
@@ -24,20 +24,34 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+
+import javax.annotation.Nullable;
 
 /**
  * Creates an UngroupedShuffleReader from a CloudObject spec.
  */
-public class UngroupedShuffleReaderFactory {
-  // Do not instantiate.
-  private UngroupedShuffleReaderFactory() {}
+public class UngroupedShuffleReaderFactory implements ReaderFactory {
+
+  @Override
+  public Reader<?> create(
+      CloudObject spec,
+      @Nullable Coder<?> coder,
+      @Nullable PipelineOptions options,
+      @Nullable ExecutionContext executionContext,
+      @Nullable CounterSet.AddCounterMutator addCounterMutator,
+      @Nullable String operationName)
+          throws Exception {
+    return create(spec, coder, options);
+  }
 
-  public static <T> UngroupedShuffleReader<T> create(PipelineOptions options, CloudObject spec,
-      Coder<T> coder, ExecutionContext executionContext) throws Exception {
+  public <T> UngroupedShuffleReader<T> create(
+      CloudObject spec, Coder<T> coder, PipelineOptions options) throws Exception {
     return create(options, spec, coder);
   }
 
-  static <T> UngroupedShuffleReader<T> create(
+  <T> UngroupedShuffleReader<T> create(
       PipelineOptions options, CloudObject spec, Coder<T> coder) throws Exception {
     return new UngroupedShuffleReader<>(options,
         decodeBase64(getString(spec, PropertyNames.SHUFFLE_READER_CONFIG)),
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
index 38afae1c2e3b5..7703cef556578 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.values.KV;
 
@@ -36,6 +37,8 @@
 import java.util.Collection;
 import java.util.concurrent.TimeUnit;
 
+import javax.annotation.Nullable;
+
 /**
  * A Reader that receives input data from a Windmill server, and returns it as
  * individual elements.
@@ -52,11 +55,21 @@ class UngroupedWindmillReader<T> extends Reader<WindowedValue<T>> {
     this.context = context;
   }
 
-  public static <T> UngroupedWindmillReader<T> create(PipelineOptions options,
-                                             CloudObject spec,
-                                             Coder coder,
-                                             ExecutionContext context) {
-    return new UngroupedWindmillReader<>(coder, (StreamingModeExecutionContext) context);
+  static class Factory implements ReaderFactory {
+    @Override
+    public Reader<?> create(
+        CloudObject spec,
+        @Nullable Coder<?> coder,
+        @Nullable PipelineOptions options,
+        @Nullable ExecutionContext executionContext,
+        @Nullable CounterSet.AddCounterMutator addCounterMutator,
+        @Nullable String operationName)
+            throws Exception {
+      @SuppressWarnings("unchecked")
+      Coder<WindowedValue<Object>> typedCoder = (Coder<WindowedValue<Object>>) coder;
+      return new UngroupedWindmillReader<>(
+          typedCoder, (StreamingModeExecutionContext) executionContext);
+    }
   }
 
   @Override
@@ -93,21 +106,22 @@ public WindowedValue<T> next() throws IOException {
           windowsCoder, message.getMetadata());
       PaneInfo pane = WindmillSink.decodeMetadataPane(message.getMetadata());
       if (valueCoder instanceof KvCoder) {
-        KvCoder kvCoder = (KvCoder) valueCoder;
+        KvCoder<?, ?> kvCoder = (KvCoder<?, ?>) valueCoder;
         InputStream key = context.getSerializedKey().newInput();
         notifyElementRead(key.available() + data.available() + metadata.available());
-        return WindowedValue.of((T) KV.of(decode(kvCoder.getKeyCoder(), key),
-                                          decode(kvCoder.getValueCoder(), data)),
-                                timestampMillis,
-                                windows,
-                                pane);
+
+        @SuppressWarnings("unchecked")
+        T result = (T) KV.of(
+            decode(kvCoder.getKeyCoder(), key),
+            decode(kvCoder.getValueCoder(), data));
+        return WindowedValue.of(result, timestampMillis, windows, pane);
       } else {
         notifyElementRead(data.available() + metadata.available());
         return WindowedValue.of(decode(valueCoder, data), timestampMillis, windows, pane);
       }
     }
 
-    private <T> T decode(Coder<T> coder, InputStream input) throws IOException {
+    private <X> X decode(Coder<X> coder, InputStream input) throws IOException {
       return coder.decode(input, Coder.Context.OUTER);
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
index 84db41c1a9e8e..4f552712f74d5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
@@ -30,6 +30,7 @@
 import com.google.cloud.dataflow.sdk.util.TimerOrElement.TimerOrElementCoder;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
@@ -44,6 +45,8 @@
 import java.util.List;
 import java.util.concurrent.TimeUnit;
 
+import javax.annotation.Nullable;
+
 /**
  * A Reader that receives input data from a Windmill server, and returns it as
  * groups of elements and timers.
@@ -64,11 +67,31 @@ class WindowingWindmillReader<T> extends Reader<WindowedValue<TimerOrElement<T>>
     this.context = context;
   }
 
-  public static <T> WindowingWindmillReader<T> create(PipelineOptions options,
-                                             CloudObject spec,
-                                             Coder coder,
-                                             ExecutionContext context) {
-    return new WindowingWindmillReader<>(coder, (StreamingModeExecutionContext) context);
+  static class Factory implements ReaderFactory {
+    @Override
+    public Reader<?> create(
+        CloudObject spec,
+        @Nullable Coder<?> coder,
+        @Nullable PipelineOptions options,
+        @Nullable ExecutionContext context,
+        @Nullable CounterSet.AddCounterMutator addCounterMutator,
+        @Nullable String operationName)
+            throws Exception {
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      Coder<WindowedValue<TimerOrElement<Object>>> typedCoder =
+          (Coder<WindowedValue<TimerOrElement<Object>>>) coder;
+      return WindowingWindmillReader.create(typedCoder, (StreamingModeExecutionContext) context);
+    }
+  }
+
+  /**
+   * Creates a {@link WindowingWindmillReader} from the provided {@link Coder}
+   * and {@link StreamingModeExecutionContext}.
+   */
+  public static <T> WindowingWindmillReader<T> create(
+      Coder<WindowedValue<TimerOrElement<T>>> coder,
+      StreamingModeExecutionContext context) {
+    return new WindowingWindmillReader<T>(coder, context);
   }
 
   @Override
@@ -88,7 +111,8 @@ class WindowingWindmillReaderIterator
 
     private WindowingWindmillReaderIterator() throws IOException {
       if (valueCoder instanceof KvCoder) {
-        key = ((KvCoder) valueCoder).getKeyCoder().decode(
+        KvCoder<?, ?> kvCoder = (KvCoder<?, ?>) valueCoder;
+        key = kvCoder.getKeyCoder().decode(
             context.getSerializedKey().newInput(), Coder.Context.OUTER);
       }
 
@@ -185,12 +209,12 @@ public WindowedValue<TimerOrElement<T>> next() throws IOException {
             windowsCoder, message.getMetadata());
         PaneInfo pane = WindmillSink.decodeMetadataPane(message.getMetadata());
         if (valueCoder instanceof KvCoder) {
-          KvCoder kvCoder = (KvCoder) valueCoder;
+          KvCoder<?, ?> kvCoder = (KvCoder<?, ?>) valueCoder;
           notifyElementRead(
               context.getSerializedKey().size() + data.available() + metadata.available());
-          return WindowedValue.of(
-              TimerOrElement.element((T) KV.of(key, decode(kvCoder.getValueCoder(), data))),
-              timestampMillis, windows, pane);
+          @SuppressWarnings("unchecked")
+          T result = (T) KV.of(key, decode(kvCoder.getValueCoder(), data));
+          return WindowedValue.of(TimerOrElement.element(result), timestampMillis, windows, pane);
         } else {
           notifyElementRead(data.available() + metadata.available());
           return WindowedValue.of(TimerOrElement.element(decode(valueCoder, data)),
@@ -201,7 +225,7 @@ public WindowedValue<TimerOrElement<T>> next() throws IOException {
       }
     }
 
-    private <T> T decode(Coder<T> coder, InputStream input) throws IOException {
+    private <X> X decode(Coder<X> coder, InputStream input) throws IOException {
       return coder.decode(input, Coder.Context.OUTER);
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
index 51ebc0c677a89..142a38669983d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
@@ -19,6 +19,7 @@
 import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import java.util.HashMap;
 import java.util.List;
@@ -58,8 +59,10 @@ public static Source flattenBaseSpecs(Source source) {
 
   public static <T> List<T> readElemsFromSource(PipelineOptions options, Source source) {
     try {
-      return ReaderUtils.readElemsFromReader(ReaderFactory.<T>create(
-          options, source, null, null, null));
+      @SuppressWarnings("unchecked")
+      Reader<T> reader = (Reader<T>) ReaderFactory.Registry.defaultRegistry().create(
+          source, options, null, null, null);
+      return ReaderUtils.readElemsFromReader(reader);
     } catch (Exception e) {
       throw new RuntimeException("Failed to read from source: " + source.toString(), e);
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
index 03a62bdb110b4..cebc8f22af68d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
@@ -353,8 +353,13 @@ public void testProgressAndSourceSplitTranslation() throws Exception {
     // now check that we are wrapping it correctly.
     DataflowPipelineOptions options = PipelineOptionsFactory.create()
         .as(DataflowPipelineOptions.class);
-    Reader<WindowedValue<Integer>> reader = ReaderFactory.create(
-        options, translateIOToCloudSource(TestIO.fromRange(10, 20), options), null, null, null);
+    Reader<WindowedValue<Integer>> reader =
+        (Reader<WindowedValue<Integer>>) ReaderFactory.Registry.defaultRegistry().create(
+            translateIOToCloudSource(TestIO.fromRange(10, 20), options),
+            options,
+            null, // executionContext
+            null, // addCounterMutator
+            null); // operationName
     try (Reader.ReaderIterator<WindowedValue<Integer>> iterator = reader.iterator()) {
       assertTrue(iterator.hasNext());
       assertEquals(
@@ -641,20 +646,21 @@ public void testReadUnboundedReader() throws Exception {
           null, // StateFetcher
           Windmill.WorkItemCommitRequest.newBuilder());
 
-      Reader.ReaderIterator<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>> reader =
-          BasicSerializableSourceFormat.<ValueWithRecordId<KV<Integer, Integer>>>create(
-                  options,
-                  (CloudObject)
-                      BasicSerializableSourceFormat.serializeToCloudSource(
-                              new CountingSource(Integer.MAX_VALUE), options)
-                          .getSpec(),
-                  null,
-                  context)
-              .iterator();
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      Reader<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>> reader = (Reader)
+          BasicSerializableSourceFormat.create(
+              (CloudObject) BasicSerializableSourceFormat.serializeToCloudSource(
+                  new CountingSource(Integer.MAX_VALUE), options)
+              .getSpec(),
+              options,
+              context);
+
+      Reader.ReaderIterator<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>> iterator =
+          reader.iterator();
 
       // Verify data.
-      while (reader.hasNext()) {
-        value = reader.next();
+      while (iterator.hasNext()) {
+        value = iterator.next();
         assertEquals(KV.of(0, i), value.getValue().getValue());
         assertArrayEquals(
             encodeToByteArray(KvCoder.of(VarIntCoder.of(), VarIntCoder.of()), KV.of(0, i)),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
index 22fcc33c674c7..288e3bcb7b47b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
@@ -61,9 +61,12 @@ Reader<?> runTestCreateAvroReader(String filename, @Nullable Long start, @Nullab
     cloudSource.setSpec(spec);
     cloudSource.setCodec(encoding);
 
-    Reader<?> reader = ReaderFactory.create(
-        PipelineOptionsFactory.create(), cloudSource, DirectModeExecutionContext.create(),
-        null, null);
+    Reader<?> reader = ReaderFactory.Registry.defaultRegistry().create(
+        cloudSource,
+        PipelineOptionsFactory.create(),
+        DirectModeExecutionContext.create(),
+        null,
+        null);
     return reader;
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
index 8e4094b9bc5e1..96a3affb15455 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
@@ -47,9 +47,12 @@ void runTestCreateBigQueryReaderFromTable(
     cloudSource.setSpec(spec);
     cloudSource.setCodec(encoding);
 
-    Reader<?> reader = ReaderFactory.create(
-        PipelineOptionsFactory.create(), cloudSource, DirectModeExecutionContext.create(),
-        null, null);
+    Reader<?> reader = ReaderFactory.Registry.defaultRegistry().create(
+        cloudSource,
+        PipelineOptionsFactory.create(),
+        DirectModeExecutionContext.create(),
+        null,
+        null);
     Assert.assertThat(reader, new IsInstanceOf(BigQueryReader.class));
     BigQueryReader bigQueryReader = (BigQueryReader) reader;
     Assert.assertEquals(project, bigQueryReader.tableRef.getProjectId());
@@ -65,9 +68,13 @@ void runTestCreateBigQueryReaderFromQuery(String query, CloudObject encoding) th
     cloudSource.setSpec(spec);
     cloudSource.setCodec(encoding);
 
-    Reader<?> reader = ReaderFactory.create(
-        PipelineOptionsFactory.create(), cloudSource, DirectModeExecutionContext.create(),
-        null, null);
+    Reader<?> reader = ReaderFactory.Registry.defaultRegistry().create(
+        cloudSource,
+        PipelineOptionsFactory.create(),
+        DirectModeExecutionContext.create(),
+        null,
+        null);
+
     Assert.assertThat(reader, new IsInstanceOf(BigQueryReader.class));
     BigQueryReader bigQueryReader = (BigQueryReader) reader;
     Assert.assertEquals(query, bigQueryReader.query);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
index cdaa8cb6362c1..bde1ebda9c57c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
@@ -89,7 +89,10 @@ public void testCreateConcatReaderWithOneSubSource() throws Exception {
     List<List<String>> allData = createInMemorySourceData(1, 10);
 
     Source source = createSourcesWithInMemorySources(allData);
-    Reader<String> reader = ReaderFactory.create(null, source, null, null, null);
+
+    @SuppressWarnings("unchecked")
+    Reader<String> reader = (Reader<String>) ReaderFactory.Registry.defaultRegistry().create(
+        source, null, null, null, null);
     assertNotNull(reader);
 
     List<String> expected = new ArrayList<>();
@@ -109,7 +112,10 @@ public void testCreateConcatReaderWithManySubSources() throws Exception {
     List<List<String>> allData = createInMemorySourceData(15, 10);
 
     Source source = createSourcesWithInMemorySources(allData);
-    Reader<String> reader = ReaderFactory.create(null, source, null, null, null);
+
+    @SuppressWarnings("unchecked")
+    Reader<String> reader = (Reader<String>) ReaderFactory.Registry.defaultRegistry().create(
+        source, null, null, null, null);
     assertNotNull(reader);
 
     List<String> expected = new ArrayList<>();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
index d8700d9c9e1f5..603d81966a493 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
@@ -35,6 +35,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader.DynamicSplitResult;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader.ReaderIterator;
@@ -52,6 +53,8 @@
 import java.util.List;
 import java.util.NoSuchElementException;
 
+import javax.annotation.Nullable;
+
 /**
  * Tests for {@code ConcatReader}.
  */
@@ -64,9 +67,13 @@ public class ConcatReaderTest {
 
   private List<TestReader<?>> recordedReaders = new ArrayList<>();
 
+  private ReaderFactory.Registry registry;
+
   @Before
   public void setUp() {
     recordedReaders.clear();
+    registry = ReaderFactory.Registry.defaultRegistry()
+        .register(TestReader.class.getName(), new TestReaderFactory());
   }
 
   /**
@@ -162,15 +169,19 @@ public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest request) {
     }
   }
 
-  // This create method was defined outside "TestReader" since a static method has to be in a static
-  // or a top level class. "TestReader" was not defined static since it needs to have access to the
-  // "recordedReaders" instance variable.
-  static <T> TestReader<T> create(@SuppressWarnings("unused") PipelineOptions options,
-      CloudObject spec, @SuppressWarnings("unused") Coder<T> coder,
-      @SuppressWarnings("unused") ExecutionContext executionContext) throws Exception {
-    @SuppressWarnings("unchecked")
-    TestReader<T> reader = (TestReader<T>) spec.get(READER_OBJECT);
-    return reader;
+  private static class TestReaderFactory implements ReaderFactory {
+    @Override
+    public Reader<?> create(
+        CloudObject spec,
+        @Nullable Coder<?> coder,
+        @Nullable PipelineOptions options,
+        @Nullable ExecutionContext executionContext,
+        @Nullable CounterSet.AddCounterMutator addCounterMutator,
+        @Nullable String operationName)
+            throws Exception {
+      Reader<?> reader = (Reader<?>) spec.get(READER_OBJECT);
+      return reader;
+    }
   }
 
   private TestReader<String> createTestReader(long recordsPerReader, long recordToFailAt,
@@ -197,7 +208,7 @@ private static void assertAllOpenReadersClosed(List<TestReader<?>> readers) {
 
   private Source createSourceForTestReader(TestReader<String> testReader) {
     Source source = new Source();
-    CloudObject specObj = CloudObject.forClass(ConcatReaderTest.class);
+    CloudObject specObj = CloudObject.forClass(TestReader.class);
     specObj.put(READER_OBJECT, testReader);
     source.setSpec(specObj);
     return source;
@@ -208,11 +219,19 @@ private ConcatReader<String> createConcatReadersOfSizes(
     List<Source> sourceList = new ArrayList<>();
 
     for (int items : recordsPerReader) {
-      sourceList.add(createSourceForTestReader(createTestReader(items/* recordsPerReader */,
-          -1/* recordToFailAt */, false/* failWhenClosing */, expected)));
+      sourceList.add(createSourceForTestReader(createTestReader(
+          items /* recordsPerReader */,
+          -1 /* recordToFailAt */,
+          false /* failWhenClosing */,
+          expected)));
     }
-    return new ConcatReader<String>(null /* options */, null /* executionContext */,
-        null /* addCounterMutator */, null /* operationName */, sourceList);
+    return new ConcatReader<>(
+        registry,
+        null /* options */,
+        null /* executionContext */,
+        null /* addCounterMutator */,
+        null /* operationName */,
+        sourceList);
   }
 
   private void testReadersOfSizes(int... recordsPerReader) throws Exception {
@@ -228,15 +247,25 @@ private void testReadersOfSizes(int... recordsPerReader) throws Exception {
   @Test
   public void testCreateFromNull() throws Exception {
     expectedException.expect(NullPointerException.class);
-    new ConcatReader<String>(null /* options */, null /* executionContext */,
-        null /* addCounterMutator */, null /* operationName */, null /* sources */);
+    new ConcatReader<String>(
+        registry,
+        null /* options */,
+        null /* executionContext */,
+        null /* addCounterMutator */,
+        null /* operationName */,
+        null /* sources */);
   }
 
   @Test
   public void testReadEmptyList() throws Exception {
-    ConcatReader<String> concat = new ConcatReader<>(null /* options */,
-        null /* executionContext */, null /* addCounterMutator */, null /* operationName */,
-        new ArrayList<Source>());
+    ConcatReader<String> concat =
+        new ConcatReader<>(
+            registry,
+            null /* options */,
+            null /* executionContext */,
+            null /* addCounterMutator */,
+            null /* operationName */,
+            new ArrayList<Source>());
     ReaderIterator<String> iterator = concat.iterator();
     assertNotNull(iterator);
     assertFalse(concat.iterator().hasNext());
@@ -286,9 +315,13 @@ public void testAReaderFailsAtClose() throws Exception {
         createSourceForTestReader(createTestReader(10/* recordsPerReader */, -1/* recordToFailAt */,
             false/* failWhenClosing */, new ArrayList<String>())));
 
-    ConcatReader<String> concatReader = new ConcatReader<>(null /* options */,
-        null /* executionContext */, null /* addCounterMutator */,
-        null /* operationName */, sources);
+    ConcatReader<String> concatReader = new ConcatReader<>(
+        registry,
+        null /* options */,
+        null /* executionContext */,
+        null /* addCounterMutator */,
+        null /* operationName */,
+        sources);
     List<String> actual = new ArrayList<>();
     try {
       readFully(concatReader, actual);
@@ -313,9 +346,13 @@ public void testReaderFailsAtRead() throws Exception {
     expected = expected.subList(0, 16);
     assertEquals(16, expected.size());
 
-    ConcatReader<String> concatReader = new ConcatReader<>(null  /* options */,
-        null  /* executionContext */, null  /* addCounterMutator */,
-        null  /* operationName */, sources);
+    ConcatReader<String> concatReader = new ConcatReader<>(
+        registry,
+        null  /* options */,
+        null  /* executionContext */,
+        null  /* addCounterMutator */,
+        null  /* operationName */,
+        sources);
     List<String> actual = new ArrayList<>();
     try {
       readFully(concatReader, actual);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
index 1623235398afb..e57f945eac27a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
@@ -69,10 +69,12 @@ <T> void runTestCreateInMemoryReader(List<T> elements, Long start, Long end, int
       int expectedEnd, Coder<T> coder) throws Exception {
     Source cloudSource = createInMemoryCloudSource(elements, start, end, coder);
 
-    Reader<?> reader = ReaderFactory.create(
-        PipelineOptionsFactory.create(), cloudSource,
+    Reader<?> reader = ReaderFactory.Registry.defaultRegistry().create(
+        cloudSource,
+        PipelineOptionsFactory.create(),
         BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create()),
-        null, null);
+        null,
+        null);
     Assert.assertThat(reader, new IsInstanceOf(InMemoryReader.class));
     InMemoryReader<?> inMemoryReader = (InMemoryReader<?>) reader;
     Assert.assertEquals(encodedElements(elements, coder), inMemoryReader.encodedElements);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index 5b0d84c2ae9fd..f7cd0c0400248 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -85,18 +85,27 @@
  */
 @RunWith(JUnit4.class)
 public class MapTaskExecutorFactoryTest {
+
   private PipelineOptions options;
+  private ReaderFactory.Registry readerFactoryRegistry;
 
   @Before
   public void setUp() {
     options = PipelineOptionsFactory.create();
+    readerFactoryRegistry = ReaderFactory.Registry.defaultRegistry()
+        .register(
+            TestReaderFactory.class.getName(),
+            new TestReaderFactory());
   }
 
   @Test
   public void testCreateMapTaskExecutor() throws Exception {
-    List<ParallelInstruction> instructions = Arrays.asList(createReadInstruction("Read"),
-        createParDoInstruction(0, 0, "DoFn1"), createParDoInstruction(0, 0, "DoFnWithContext"),
-        createFlattenInstruction(1, 0, 2, 0, "Flatten"), createWriteInstruction(3, 0, "Write"));
+    List<ParallelInstruction> instructions = Arrays.asList(
+        createReadInstruction("Read"),
+        createParDoInstruction(0, 0, "DoFn1"),
+        createParDoInstruction(0, 0, "DoFnWithContext"),
+        createFlattenInstruction(1, 0, 2, 0, "Flatten"),
+        createWriteInstruction(3, 0, "Write"));
 
     MapTask mapTask = new MapTask();
     mapTask.setStageName("test");
@@ -109,6 +118,7 @@ public void testCreateMapTaskExecutor() throws Exception {
         MapTaskExecutor executor = MapTaskExecutorFactory.create(
             options,
             mapTask,
+            readerFactoryRegistry,
             BatchModeExecutionContext.fromOptions(options),
             counterSet,
             sampler)) {
@@ -171,7 +181,8 @@ public void testCreateMapTaskExecutor() throws Exception {
 
   @Test
   public void testExecutionContextPlumbing() throws Exception {
-    List<ParallelInstruction> instructions = Arrays.asList(createReadInstruction("Read"),
+    List<ParallelInstruction> instructions = Arrays.asList(
+        createReadInstruction("Read"),
         createParDoInstruction(0, 0, "DoFn1", "DoFnUserName"),
         createParDoInstruction(1, 0, "DoFnWithContext", "DoFnWithContextUserName"),
         createWriteInstruction(2, 0, "Write"));
@@ -184,7 +195,12 @@ public void testExecutionContextPlumbing() throws Exception {
 
     CounterSet counters = new CounterSet();
     try (MapTaskExecutor executor =
-        MapTaskExecutorFactory.create(options, mapTask, context, counters,
+        MapTaskExecutorFactory.create(
+            options,
+            mapTask,
+            readerFactoryRegistry,
+            context,
+            counters,
             new StateSampler(mapTask.getStageName() + "-", counters.getAddCounterMutator()))) {
       executor.execute();
     }
@@ -223,9 +239,14 @@ public void testCreateReadOperation() throws Exception {
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    Operation operation = MapTaskExecutorFactory.createOperation(options,
-        createReadInstruction("Read"), BatchModeExecutionContext.fromOptions(options),
-        Collections.<Operation>emptyList(), counterPrefix, counterSet.getAddCounterMutator(),
+    Operation operation = MapTaskExecutorFactory.createOperation(
+        PipelineOptionsFactory.create(),
+        createReadInstruction("Read"),
+        readerFactoryRegistry,
+        BatchModeExecutionContext.fromOptions(options),
+        Collections.<Operation>emptyList(),
+        counterPrefix,
+        counterSet.getAddCounterMutator(),
         stateSampler);
     assertThat(operation, instanceOf(ReadOperation.class));
     ReadOperation readOperation = (ReadOperation) operation;
@@ -372,8 +393,14 @@ public void testCreateParDoOperation() throws Exception {
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    Operation operation = MapTaskExecutorFactory.createOperation(options,
-        instruction, context, priorOperations, counterPrefix, counterSet.getAddCounterMutator(),
+    Operation operation = MapTaskExecutorFactory.createOperation(
+        options,
+        instruction,
+        readerFactoryRegistry,
+        context,
+        priorOperations,
+        counterPrefix,
+        counterSet.getAddCounterMutator(),
         stateSampler);
     assertThat(operation, instanceOf(ParDoOperation.class));
     ParDoOperation parDoOperation = (ParDoOperation) operation;
@@ -435,9 +462,15 @@ public void testCreatePartialGroupByKeyOperation() throws Exception {
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    Operation operation = MapTaskExecutorFactory.createOperation(options,
-        instruction, BatchModeExecutionContext.fromOptions(options), priorOperations, counterPrefix,
-        counterSet.getAddCounterMutator(), stateSampler);
+    Operation operation = MapTaskExecutorFactory.createOperation(
+        options,
+        instruction,
+        readerFactoryRegistry,
+        BatchModeExecutionContext.fromOptions(options),
+        priorOperations,
+        counterPrefix,
+        counterSet.getAddCounterMutator(),
+        stateSampler);
     assertThat(operation, instanceOf(PartialGroupByKeyOperation.class));
     PartialGroupByKeyOperation pgbkOperation = (PartialGroupByKeyOperation) operation;
 
@@ -495,9 +528,15 @@ public void testCreateFlattenOperation() throws Exception {
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    Operation operation = MapTaskExecutorFactory.createOperation(options,
-        instruction, BatchModeExecutionContext.fromOptions(options), priorOperations, counterPrefix,
-        counterSet.getAddCounterMutator(), stateSampler);
+    Operation operation = MapTaskExecutorFactory.createOperation(
+        options,
+        instruction,
+        readerFactoryRegistry,
+        BatchModeExecutionContext.fromOptions(options),
+        priorOperations,
+        counterPrefix,
+        counterSet.getAddCounterMutator(),
+        stateSampler);
     assertThat(operation, instanceOf(FlattenOperation.class));
     FlattenOperation flattenOperation = (FlattenOperation) operation;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
index ac7cad1c5966e..fe3af7218d089 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.hamcrest.CoreMatchers;
@@ -37,14 +38,23 @@
 
 import java.util.NoSuchElementException;
 
+import javax.annotation.Nullable;
+
 /**
  * Tests for ReaderFactory.
  */
 @RunWith(JUnit4.class)
 public class ReaderFactoryTest {
-  static class TestReaderFactory {
-    public static TestReader create(PipelineOptions options, CloudObject o, Coder<Integer> coder,
-        ExecutionContext executionContext) {
+
+  static class TestReaderFactory implements ReaderFactory {
+    @Override
+    public Reader<?> create(
+        CloudObject spec,
+        @Nullable Coder<?> coder,
+        @Nullable PipelineOptions options,
+        @Nullable ExecutionContext executionContext,
+        @Nullable CounterSet.AddCounterMutator addCounterMutator,
+        @Nullable String operationName) {
       return new TestReader();
     }
   }
@@ -81,10 +91,13 @@ public void testCreatePredefinedReader() throws Exception {
     cloudSource.setSpec(spec);
     cloudSource.setCodec(makeCloudEncoding("StringUtf8Coder"));
 
-    Reader<?> reader = ReaderFactory.create(
-        PipelineOptionsFactory.create(), cloudSource,
-        BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create()),
-        null, null);
+    PipelineOptions options = PipelineOptionsFactory.create();
+    Reader<?> reader = ReaderFactory.Registry.defaultRegistry().create(
+        cloudSource,
+        options,
+        BatchModeExecutionContext.fromOptions(options),
+        null,
+        null);
     Assert.assertThat(reader, new IsInstanceOf(TextReader.class));
   }
 
@@ -96,10 +109,15 @@ public void testCreateUserDefinedReader() throws Exception {
     cloudSource.setSpec(spec);
     cloudSource.setCodec(makeCloudEncoding("BigEndianIntegerCoder"));
 
-    Reader<?> reader = ReaderFactory.create(
-        PipelineOptionsFactory.create(), cloudSource,
-        BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create()),
-        null, null);
+    PipelineOptions options = PipelineOptionsFactory.create();
+    ReaderFactory.Registry registry = ReaderFactory.Registry.defaultRegistry()
+        .register(TestReaderFactory.class.getName(), new TestReaderFactory());
+    Reader<?> reader = registry.create(
+        cloudSource,
+        PipelineOptionsFactory.create(),
+        BatchModeExecutionContext.fromOptions(options),
+        null,
+        null);
     Assert.assertThat(reader, new IsInstanceOf(TestReader.class));
   }
 
@@ -110,13 +128,16 @@ public void testCreateUnknownReader() throws Exception {
     cloudSource.setSpec(spec);
     cloudSource.setCodec(makeCloudEncoding("StringUtf8Coder"));
     try {
-      ReaderFactory.create(
-          PipelineOptionsFactory.create(), cloudSource,
-          BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create()),
-          null, null);
+      PipelineOptions options = PipelineOptionsFactory.create();
+      ReaderFactory.Registry.defaultRegistry().create(
+          cloudSource,
+          options,
+          BatchModeExecutionContext.fromOptions(options),
+          null,
+          null);
       Assert.fail("should have thrown an exception");
     } catch (Exception exn) {
-      Assert.assertThat(exn.toString(), CoreMatchers.containsString("unable to create a source"));
+      Assert.assertThat(exn.toString(), CoreMatchers.containsString("Unable to create a Reader"));
     }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
index e5da4de26cc24..870092b3ad7df 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
@@ -65,8 +65,8 @@ <T extends Reader> T runTestCreateShuffleReader(byte[] shuffleReaderConfig,
     cloudSource.setSpec(spec);
     cloudSource.setCodec(encoding);
 
-    Reader<?> reader = ReaderFactory.create(PipelineOptionsFactory.create(), cloudSource, context,
-        null, null);
+    Reader<?> reader = ReaderFactory.Registry.defaultRegistry().create(
+        cloudSource, PipelineOptionsFactory.create(), context, null, null);
     Assert.assertThat(reader, new IsInstanceOf(shuffleReaderClass));
     T shuffleSource = (T) reader;
     return shuffleSource;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
index a9f93ecc175ea..9a6dda610bd35 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
@@ -67,9 +67,9 @@ void runTestCreateTextReader(String filename, @Nullable Boolean stripTrailingNew
     cloudSource.setCodec(encoding);
 
     PipelineOptions options = PipelineOptionsFactory.create();
-    Reader<?> reader = ReaderFactory.create(
-        options,
+    Reader<?> reader = ReaderFactory.Registry.defaultRegistry().create(
         cloudSource,
+        options,
         BatchModeExecutionContext.fromOptions(options),
         null,
         null);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReaderTest.java
index 359829d78c2dc..19f955f3cd5e1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReaderTest.java
@@ -38,7 +38,6 @@
 import com.google.protobuf.ByteString;
 
 import org.joda.time.Instant;
-
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -90,9 +89,10 @@ public void testTimerOrdering() throws Exception {
                 KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())),
             GlobalWindow.Coder.INSTANCE);
 
-    ReaderIterator<WindowedValue<TimerOrElement<KV<String, String>>>> iterator =
-        WindowingWindmillReader.<KV<String, String>>create(null, null, coder, mockContext)
-            .iterator();
+    WindowingWindmillReader<KV<String, String>> reader =
+        WindowingWindmillReader.create(coder, mockContext);
+
+    ReaderIterator<WindowedValue<TimerOrElement<KV<String, String>>>> iterator = reader.iterator();
     List<WindowedValue<TimerOrElement<KV<String, String>>>> result = new ArrayList<>();
     while (iterator.hasNext()) {
       result.add(iterator.next());

From 3315791ac3539080db85b37041e03949cdddac30 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 22 Sep 2015 15:39:58 -0700
Subject: [PATCH 1035/1541] Modify GroupAlsoByWindowsDoFn to process batches

Specifically, this passes chunks of data down to the
ReduceFnRunner.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103689135
---
 .../sdk/util/GroupAlsoByWindowsDoFn.java      | 19 ++++++++++++-------
 .../dataflow/sdk/util/ReduceFnRunner.java     |  6 ++++++
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 19719db41a480..1706619848f2e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -23,9 +23,12 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterables;
 
 import org.joda.time.Instant;
 
+import java.util.List;
+
 /**
  * DoFn that merges windows and groups elements in those windows, optionally
  * combining values.
@@ -109,15 +112,17 @@ public void processElement(
           key, strategy, timerInternals, c.windowingInternals(),
           droppedDueToClosedWindow, droppedDueToLateness, reduceFnFactory.create(key));
 
-      for (WindowedValue<InputT> e : c.element().getValue()) {
-        // First, handle anything that needs to happen for this element
-        runner.processElement(e);
+      Iterable<List<WindowedValue<InputT>>> chunks =
+          Iterables.partition(c.element().getValue(), 1000);
+      for (Iterable<WindowedValue<InputT>> chunk : chunks) {
+        // Process the chunk of elements.
+        runner.processElements(chunk);
 
-        // Then, since elements are sorted by their timestamp, advance the watermark and fire any
-        // timers that need to be fired.
-        timerInternals.advanceWatermark(runner, e.getTimestamp());
+        // Then, since elements are sorted by their timestamp, advance the watermark to the first
+        // element, and fire any timers that may have been scheduled.
+        timerInternals.advanceWatermark(runner, chunk.iterator().next().getTimestamp());
 
-        // Also, fire any processing timers that need to fire
+        // Fire any processing timers that need to fire
         timerInternals.advanceProcessingTime(runner, Instant.now());
       }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index 83606dbdbdcaa..07ace21a6f561 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -137,6 +137,12 @@ private ActiveWindowSet<W> createActiveWindowSet() {
     return triggerRunner.isClosed(contextFactory.base(window).state());
   }
 
+  public void processElements(Iterable<WindowedValue<InputT>> values) {
+    for (WindowedValue<InputT> value : values) {
+      processElement(value);
+    }
+  }
+
   public void processElement(WindowedValue<InputT> value) {
     Lateness lateness = getLateness(value);
     if (lateness.isPastAllowedLateness) {

From 7d18d58cb9e5a9968118bf642ca6365858e42789 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 23 Sep 2015 14:23:21 -0700
Subject: [PATCH 1036/1541] Demonstrate new lambda-friendly transforms in
 MinimalWordCount

Java 8 lambda enables some more concise expression of word count via
Filter, MapElements, and FlatMapElements transforms. This change
adds a Java 8 profile to our examples module and includes in it a
concise version of the MinimalWordCount example.

The Java 7 MinimalWordCount example is also updated to reflect the
use of these transforms without a lambda.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103782162
---
 examples/pom.xml                              | 123 ++++++++++++++++++
 .../dataflow/examples/MinimalWordCount.java   |  10 +-
 .../examples/MinimalWordCountJava8.java       |  68 ++++++++++
 .../examples/MinimalWordCountJava8Test.java   |  60 +++++++++
 4 files changed, 257 insertions(+), 4 deletions(-)
 create mode 100644 examples/src/main/java8/com/google/cloud/dataflow/examples/MinimalWordCountJava8.java
 create mode 100644 examples/src/test/java8/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java

diff --git a/examples/pom.xml b/examples/pom.xml
index 7aa6127ffe816..4adbc61d584eb 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -45,6 +45,129 @@
         <testParallelValue>both</testParallelValue>
       </properties>
     </profile>
+
+    <profile>
+      <id>java8</id>
+      <activation>
+        <jdk>[1.8,)</jdk>
+      </activation>
+
+      <build>
+        <plugins>
+          <!-- Tells Maven about the Java 8 main and test source root. -->
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>build-helper-maven-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>add-java8-main-source</id>
+                <phase>initialize</phase>
+                <goals>
+                  <goal>add-source</goal>
+                </goals>
+                <configuration>
+                  <sources>
+                    <source>${project.basedir}/src/main/java8</source>
+                  </sources>
+                </configuration>
+              </execution>
+
+              <execution>
+                <id>add-java8-test-source</id>
+                <phase>initialize</phase>
+                <goals>
+                  <goal>add-test-source</goal>
+                </goals>
+                <configuration>
+                  <sources>
+                    <source>${project.basedir}/src/test/java8</source>
+                  </sources>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-compiler-plugin</artifactId>
+            <executions>
+
+              <!-- Set `-source 1.7 -target 1.7` for Java 7 tests -->
+              <execution>
+                <id>default-testCompile</id>
+                <phase>test-compile</phase>
+                <goals>
+                  <goal>testCompile</goal>
+                </goals>
+                <configuration>
+                  <testSource>1.7</testSource>
+                  <testTarget>1.7</testTarget>
+                  <testExcludes>
+                    <!-- This pattern is brittle; we would prefer to filter on the directory
+                         but that seems to be unavailable to us. -->
+                    <exclude>**/*Java8Test.java</exclude>
+                  </testExcludes>
+                </configuration>
+              </execution>
+
+              <!-- Set `-source 1.8 -target 1.8` for Java 8 tests -->
+              <execution>
+                <id>java8-test-compile</id>
+                <phase>test-compile</phase>
+                <goals>
+                  <goal>testCompile</goal>
+                </goals>
+                <configuration>
+                  <testSource>1.8</testSource>
+                  <testTarget>1.8</testTarget>
+                  <includes>
+                    <!-- This pattern is brittle; we would prefer to filter on the directory
+                         but that seems to be unavailable to us. -->
+                    <include>**/*Java8Test.java</include>
+                  </includes>
+                </configuration>
+              </execution>
+
+              <!-- Set `-source 1.7 -target 1.7` for Java 7 examples -->
+              <execution>
+                <id>default-compile</id>
+                <phase>compile</phase>
+                <goals>
+                  <goal>compile</goal>
+                </goals>
+                <configuration>
+                  <source>1.7</source>
+                  <target>1.7</target>
+                  <excludes>
+                    <!-- This pattern is brittle; we would prefer to filter on the directory
+                         but that seems to be unavailable to us. -->
+                    <exclude>**/*Java8*.java</exclude>
+                  </excludes>
+                </configuration>
+              </execution>
+
+              <!-- Set `-source 1.8 -target 1.8` for Java 8 examples -->
+              <execution>
+                <id>java8-compile</id>
+                <phase>compile</phase>
+                <goals>
+                  <goal>compile</goal>
+                </goals>
+                <configuration>
+                  <source>1.8</source>
+                  <target>1.8</target>
+                  <includes>
+                    <!-- This pattern is brittle; we would prefer to filter on the directory
+                         but that seems to be unavailable to us. -->
+                    <include>**/*Java8*.java</include>
+                  </includes>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
   </profiles>
 
   <build>
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
index 2707ab977b3ce..4ed05207c461c 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
@@ -23,7 +23,9 @@
 import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Count;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SimpleFunction;
 import com.google.cloud.dataflow.sdk.values.KV;
 
 
@@ -95,12 +97,12 @@ public void processElement(ProcessContext c) {
      // transform returns a new PCollection of key/value pairs, where each key represents a unique
      // word in the text. The associated value is the occurrence count for that word.
      .apply(Count.<String>perElement())
-     // Apply another ParDo transform that formats our PCollection of word counts into a printable
+     // Apply a MapElements transform that formats our PCollection of word counts into a printable
      // string, suitable for writing to an output file.
-     .apply(ParDo.named("FormatResults").of(new DoFn<KV<String, Long>, String>() {
+     .apply("FormatResults", MapElements.via(new SimpleFunction<KV<String, Long>, String>() {
                        @Override
-                       public void processElement(ProcessContext c) {
-                         c.output(c.element().getKey() + ": " + c.element().getValue());
+                       public String apply(KV<String, Long> input) {
+                         return input.getKey() + ": " + input.getValue();
                        }
                      }))
      // Concept #4: Apply a write transform, TextIO.Write, at the end of the pipeline.
diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/MinimalWordCountJava8.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/MinimalWordCountJava8.java
new file mode 100644
index 0000000000000..c115ea0a33360
--- /dev/null
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/MinimalWordCountJava8.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.Filter;
+import com.google.cloud.dataflow.sdk.transforms.FlatMapElements;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+import java.util.Arrays;
+
+/**
+ * An example that counts words in Shakespeare, using Java 8 language features.
+ *
+ * <p>See {@link MinimalWordCount} for a comprehensive explanation.
+ */
+public class MinimalWordCountJava8 {
+
+  public static void main(String[] args) {
+    DataflowPipelineOptions options = PipelineOptionsFactory.create()
+        .as(DataflowPipelineOptions.class);
+
+    options.setRunner(BlockingDataflowPipelineRunner.class);
+
+    // CHANGE 1 of 3: Your project ID is required in order to run your pipeline on the Google Cloud.
+    options.setProject("SET_YOUR_PROJECT_ID_HERE");
+
+    // CHANGE 2 of 3: Your Google Cloud Storage path is required for staging local files.
+    options.setStagingLocation("gs://SET_YOUR_BUCKET_NAME_HERE/AND_STAGING_DIRECTORY");
+
+    Pipeline p = Pipeline.create(options);
+
+    p.apply(TextIO.Read.from("gs://dataflow-samples/shakespeare/*"))
+     .apply(FlatMapElements.via((String word) -> Arrays.asList(word.split("[^a-zA-Z']+")))
+         .withOutputType(new TypeDescriptor<String>() {}))
+     .apply(Filter.byPredicate((String word) -> !word.isEmpty()))
+     .apply(Count.<String>perElement())
+     .apply(MapElements
+         .via((KV<String, Long> wordCount) -> wordCount.getKey() + ": " + wordCount.getValue())
+         .withOutputType(new TypeDescriptor<String>() {}))
+
+     // CHANGE 3 of 3: The Google Cloud Storage path is required for outputting the results to.
+     .apply(TextIO.Write.to("gs://YOUR_OUTPUT_BUCKET/AND_OUTPUT_PREFIX"));
+
+    p.run();
+  }
+}
diff --git a/examples/src/test/java8/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java b/examples/src/test/java8/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java
new file mode 100644
index 0000000000000..5286d61a078f8
--- /dev/null
+++ b/examples/src/test/java8/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.Filter;
+import com.google.cloud.dataflow.sdk.transforms.FlatMapElements;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+import java.util.Arrays;
+
+/**
+ * To keep {@link MinimalWordCountJava8} simple, it is not factored or testable. This test
+ * file should be maintained with a copy of its code for a basic smoke test.
+ */
+@RunWith(JUnit4.class)
+public class MinimalWordCountJava8Test implements Serializable {
+
+  /**
+   * A basic smoke test that ensures there is no crash at pipeline construction time.
+   */
+  @Test
+  public void testMinimalWordCountJava8() {
+    Pipeline p = TestPipeline.create();
+
+    p.apply(TextIO.Read.from("gs://dataflow-samples/shakespeare/*"))
+     .apply(FlatMapElements.via((String word) -> Arrays.asList(word.split("[^a-zA-Z']+")))
+         .withOutputType(new TypeDescriptor<String>() {}))
+     .apply(Filter.byPredicate((String word) -> !word.isEmpty()))
+     .apply(Count.<String>perElement())
+     .apply(MapElements
+         .via((KV<String, Long> wordCount) -> wordCount.getKey() + ": " + wordCount.getValue())
+         .withOutputType(new TypeDescriptor<String>() {}))
+     .apply(TextIO.Write.to("gs://YOUR_OUTPUT_BUCKET/AND_OUTPUT_PREFIX"));
+  }
+}

From 999f03871c0ee10a65464544c85346431880043b Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 23 Sep 2015 17:09:13 -0700
Subject: [PATCH 1037/1541] Plumb the WorkItem into StreamingGroupAlsoByWindows

This eliminates the intermediate TimerOrElement objects that were passed
around, and lets the ReduceFn see the entire bundle of values at a time.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103798969
---
 .../sdk/runners/worker/KeyedWorkItem.java     | 240 +++++++++++++
 .../StreamingGroupAlsoByWindowsDoFn.java      |  60 +---
 .../worker/StreamingModeExecutionContext.java |  25 +-
 .../sdk/runners/worker/WindmillSink.java      |   2 +-
 .../worker/WindowingWindmillReader.java       | 193 +++--------
 .../dataflow/sdk/util/TimerOrElement.java     | 175 +---------
 .../sdk/runners/worker/KeyedWorkItemTest.java | 155 +++++++++
 .../worker/StreamingDataflowWorkerTest.java   |   4 +-
 .../StreamingGroupAlsoByWindowsDoFnTest.java  | 318 ++++++++++--------
 .../worker/WindowingWindmillReaderTest.java   | 123 -------
 10 files changed, 647 insertions(+), 648 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItem.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReaderTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItem.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItem.java
new file mode 100644
index 0000000000000..1a05a5bec4f4b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItem.java
@@ -0,0 +1,240 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.InputMessageBundle;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.Message;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.Timer;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+import com.google.common.base.Function;
+import com.google.common.base.Predicate;
+import com.google.common.base.Predicates;
+import com.google.common.base.Throwables;
+import com.google.common.collect.FluentIterable;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Objects;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Wrapper around a {@link Windmill.WorkItem} which contains all the timers and elements associated
+ * with a specific work item.
+ *
+ * <p>Used as the input type of {@link StreamingGroupAlsoByWindowsDoFn}.
+ *
+ * @param <ElemT> the element type
+ */
+public class KeyedWorkItem<ElemT> {
+
+  private static final Predicate<Timer> IS_WATERMARK = new Predicate<Timer>() {
+    @Override
+    public boolean apply(Timer input) {
+      return input.getType() == Timer.Type.WATERMARK;
+    }
+  };
+
+  public static <ElemT> KeyedWorkItem<ElemT> workItem(
+      Object key,
+      Windmill.WorkItem workItem,
+      Coder<? extends BoundedWindow> windowCoder,
+      Coder<Collection<? extends BoundedWindow>> windowsCoder,
+      Coder<ElemT> valueCoder) {
+    return new KeyedWorkItem<>(key, workItem, windowCoder, windowsCoder, valueCoder);
+  }
+
+  public Object key() {
+    return key;
+  }
+
+  public Iterable<TimerData> timersIterable() {
+    FluentIterable<Timer> allTimers = FluentIterable.from(workItem.getTimers().getTimersList());
+    FluentIterable<Timer> eventTimers = allTimers.filter(IS_WATERMARK);
+    FluentIterable<Timer> nonEventTimers = allTimers.filter(Predicates.not(IS_WATERMARK));
+    return eventTimers.append(nonEventTimers).transform(new Function<Timer, TimerData>() {
+      private TimeDomain getTimeDomain(Windmill.Timer.Type type) {
+        switch (type) {
+          case REALTIME:
+            return TimeDomain.PROCESSING_TIME;
+          case DEPENDENT_REALTIME:
+            return TimeDomain.SYNCHRONIZED_PROCESSING_TIME;
+          case WATERMARK:
+            return TimeDomain.EVENT_TIME;
+          default:
+            throw new IllegalArgumentException("Unsupported timer type " + type);
+        }
+      }
+
+      @Override
+      public TimerData apply(Timer timer) {
+        String tag = timer.getTag().toStringUtf8();
+        String namespaceString = tag.substring(0, tag.indexOf('+'));
+        StateNamespace namespace = StateNamespaces.fromString(namespaceString, windowCoder);
+
+        Instant timestamp = new Instant(TimeUnit.MICROSECONDS.toMillis(timer.getTimestamp()));
+        return TimerData.of(namespace, timestamp, getTimeDomain(timer.getType()));
+      }
+    });
+  }
+
+  public Iterable<WindowedValue<ElemT>> elementsIterable() {
+    return FluentIterable.from(workItem.getMessageBundlesList())
+        .transformAndConcat(new Function<InputMessageBundle, Iterable<Message>>() {
+          @Override
+          public Iterable<Message> apply(InputMessageBundle input) {
+            return input.getMessagesList();
+          }
+        })
+        .transform(new Function<Message, WindowedValue<ElemT>>() {
+          @Override
+          public WindowedValue<ElemT> apply(Message message) {
+            try {
+              Instant timestamp = new Instant(
+                  TimeUnit.MICROSECONDS.toMillis(message.getTimestamp()));
+              Collection<? extends BoundedWindow> windows =
+                  WindmillSink.decodeMetadataWindows(windowsCoder, message.getMetadata());
+              PaneInfo pane = WindmillSink.decodeMetadataPane(message.getMetadata());
+
+              InputStream inputStream = message.getData().newInput();
+              ElemT value = valueCoder.decode(inputStream, Coder.Context.OUTER);
+              return WindowedValue.of(value, timestamp, windows, pane);
+            } catch (IOException e) {
+              throw Throwables.propagate(e);
+            }
+          }
+        });
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (!(other instanceof KeyedWorkItem)) {
+      return false;
+    }
+
+    KeyedWorkItem<?> that = (KeyedWorkItem<?>) other;
+    return Objects.equals(this.key, that.key)
+        && Objects.equals(this.workItem, that.workItem);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(key, workItem);
+  }
+
+  /**
+   * Coder that forwards {@code ByteSizeObserver} calls to an underlying element coder.
+   * {@code TimerOrElement} objects never need to be encoded, so this class does not
+   * support the {@code encode} and {@code decode} methods.
+   */
+  public static class KeyedWorkItemCoder<T> extends StandardCoder<KeyedWorkItem<T>> {
+    final Coder<T> elemCoder;
+
+    /**
+     * Creates a new {@code TimerOrElement.Coder} that wraps the given {@link Coder}.
+     */
+    public static <T> KeyedWorkItemCoder<T> of(Coder<T> elemCoder) {
+      return new KeyedWorkItemCoder<>(elemCoder);
+    }
+
+    @JsonCreator
+    public static KeyedWorkItemCoder<?> of(
+            @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+            List<Object> components) {
+      return of((Coder<?>) components.get(0));
+    }
+
+    @Override
+    public void encode(KeyedWorkItem<T> value, OutputStream outStream, Context context) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public KeyedWorkItem<T> decode(InputStream inStream, Context context) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean isRegisterByteSizeObserverCheap(KeyedWorkItem<T> value, Context context) {
+      return true;
+    }
+
+    @Override
+    public void registerByteSizeObserver(
+        KeyedWorkItem<T> value, ElementByteSizeObserver observer, Context context)
+        throws Exception {
+      observer.update((long) value.workItem.getSerializedSize());
+    }
+
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException {}
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return Arrays.asList(elemCoder);
+    }
+
+    public Coder<T> getElementCoder() {
+      return elemCoder;
+    }
+
+    protected KeyedWorkItemCoder(Coder<T> elemCoder) {
+      this.elemCoder = elemCoder;
+    }
+  }
+
+  //////////////////////////////////////////////////////////////////////////////
+
+  private final Windmill.WorkItem workItem;
+  private final Object key;
+
+  private final transient Coder<? extends BoundedWindow> windowCoder;
+  private final transient Coder<Collection<? extends BoundedWindow>> windowsCoder;
+  private final transient Coder<ElemT> valueCoder;
+
+  KeyedWorkItem(
+      Object key,
+      Windmill.WorkItem workItem,
+      Coder<? extends BoundedWindow> windowCoder,
+      Coder<Collection<? extends BoundedWindow>> windowsCoder,
+      Coder<ElemT> valueCoder) {
+    this.key = key;
+    this.workItem = workItem;
+    this.windowCoder = windowCoder;
+    this.windowsCoder = windowsCoder;
+    this.valueCoder = valueCoder;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
index 3e56001489bfa..4728df2b994c7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
@@ -26,8 +26,7 @@
 import com.google.cloud.dataflow.sdk.util.SystemDoFnInternal;
 import com.google.cloud.dataflow.sdk.util.SystemReduceFn;
 import com.google.cloud.dataflow.sdk.util.TimerInternals;
-import com.google.cloud.dataflow.sdk.util.TimerOrElement;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
@@ -42,7 +41,7 @@
  */
 @SystemDoFnInternal
 public abstract class StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends BoundedWindow>
-    extends DoFn<TimerOrElement<KV<K, InputT>>, KV<K, OutputT>> {
+    extends DoFn<KeyedWorkItem<InputT>, KV<K, OutputT>> {
 
   public static <K, InputT, AccumT, OutputT, W extends BoundedWindow>
       StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> create(
@@ -55,7 +54,7 @@ StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> create(
   }
 
   public static <K, V, W extends BoundedWindow>
-  StreamingGroupAlsoByWindowsDoFn<K, V, Iterable<V>, W> createForIterable(
+  DoFn<KeyedWorkItem<V>, KV<K, Iterable<V>>> createForIterable(
       final WindowingStrategy<?, W> windowingStrategy,
       final Coder<V> inputCoder) {
     return new StreamingGABWViaWindowSetDoFn<>(
@@ -73,8 +72,6 @@ private static class StreamingGABWViaWindowSetDoFn<K, InputT, OutputT, W extends
     private final WindowingStrategy<Object, W> windowingStrategy;
     private SystemReduceFn.Factory<K, InputT, OutputT, W> reduceFnFactory;
 
-    private transient ReduceFnRunner<K, InputT, OutputT, W> runner;
-
     public StreamingGABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
         SystemReduceFn.Factory<K, InputT, OutputT, W> reduceFnFactory) {
       @SuppressWarnings("unchecked")
@@ -83,54 +80,25 @@ public StreamingGABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
       this.reduceFnFactory = reduceFnFactory;
     }
 
-    private void initForKey(ProcessContext c, K key) throws Exception{
-      if (runner == null) {
-        TimerInternals timerInternals = c.windowingInternals().timerInternals();
-        runner = new ReduceFnRunner<>(
-            key, windowingStrategy, timerInternals, c.windowingInternals(),
-            droppedDueToClosedWindow, droppedDueToLateness, reduceFnFactory.create(key));
-      }
-    }
-
     @Override
     public void processElement(ProcessContext c) throws Exception {
-      if (c.element().isTimer()) {
-        processTimer(c);
-      } else {
-        processValue(c);
-      }
-    }
+      KeyedWorkItem<InputT> element = c.element();
 
-
-    private void processTimer(ProcessContext c) throws Exception {
       @SuppressWarnings("unchecked")
       K key = (K) c.element().key();
-      initForKey(c, key);
-      runner.onTimer(c.element().getTimer());
-    }
-
-    private void processValue(ProcessContext c) throws Exception {
-      K key = c.element().element().getKey();
-      initForKey(c, key);
-      InputT value = c.element().element().getValue();
-      runner.processElement(
-          WindowedValue.of(
-              value,
-              c.timestamp(),
-              c.windowingInternals().windows(),
-              c.pane()));
-    }
+      TimerInternals timerInternals = c.windowingInternals().timerInternals();
+      ReduceFnRunner<K, InputT, OutputT, W> runner = new ReduceFnRunner<>(
+            key, windowingStrategy, timerInternals, c.windowingInternals(),
+            droppedDueToClosedWindow, droppedDueToLateness, reduceFnFactory.create(key));
 
-    @Override
-    public void finishBundle(Context c) throws Exception {
-      if (runner != null) {
-        // Merge before finishing the bundle in case it causes triggers to fire.
-        runner.merge();
-        runner.persist();
+      for (TimerData timer : element.timersIterable()) {
+        runner.onTimer(timer);
       }
+      runner.processElements(element.elementsIterable());
 
-      // Prepare this DoFn for reuse.
-      runner = null;
+      // Merge before finishing the bundle in case it causes triggers to fire.
+      runner.merge();
+      runner.persist();
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
index 7e54e8680d095..a158688daa575 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
@@ -29,6 +29,7 @@
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TimerInternals;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
@@ -270,6 +271,18 @@ public List<Long> getReadyCommitCallbackIds() {
     return work.getSourceState().getFinalizeIdsList();
   }
 
+  /**
+   * Produce a tag that is guaranteed to be unique for the given namespace, domain and timestamp.
+   *
+   * <p>This is necessary because Windmill will deduplicate based only on this tag.
+   */
+  public static ByteString timerTag(TimerData key) {
+    String tagString = String.format("%s+%d:%d",
+        key.getNamespace().stringKey(), key.getDomain().ordinal(),
+        key.getTimestamp().getMillis());
+    return ByteString.copyFromUtf8(tagString);
+  }
+
   private static class WindmillTimerInternals implements TimerInternals {
 
     private Map<TimerData, Boolean> timers = new HashMap<>();
@@ -301,18 +314,6 @@ public Instant currentWatermarkTime() {
       return inputDataWatermark;
     }
 
-    /**
-     * Produce a tag that is guaranteed to be unique for the given namespace, domain and timestamp.
-     *
-     * <p>This is necessary because Windmill will deduplicate based only on this tag.
-     */
-    private ByteString timerTag(TimerData key) {
-      String tagString = String.format("%s+%d:%d",
-          key.getNamespace().stringKey(), key.getDomain().ordinal(),
-          key.getTimestamp().getMillis());
-      return ByteString.copyFromUtf8(tagString);
-    }
-
     public void persistTo(Windmill.WorkItemCommitRequest.Builder outputBuilder) {
       for (Entry<TimerData, Boolean> entry : timers.entrySet()) {
         Windmill.Timer.Builder timer = outputBuilder.addOutputTimersBuilder()
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
index 9076c7f1fea54..1d6b15cebb9cb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
@@ -17,7 +17,6 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-import static com.google.cloud.dataflow.sdk.util.ValueWithRecordId.ValueWithRecordIdCoder;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
@@ -29,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
+import com.google.cloud.dataflow.sdk.util.ValueWithRecordId.ValueWithRecordIdCoder;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
index 4f552712f74d5..1c71c3bc5ef6b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
@@ -19,51 +19,48 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.runners.worker.KeyedWorkItem.KeyedWorkItemCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.WorkItem;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.TimerOrElement;
-import com.google.cloud.dataflow.sdk.util.TimerOrElement.TimerOrElementCoder;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
-import com.google.cloud.dataflow.sdk.values.KV;
-
-import org.joda.time.Instant;
 
 import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
 import java.util.Collection;
-import java.util.List;
-import java.util.concurrent.TimeUnit;
+import java.util.NoSuchElementException;
 
 import javax.annotation.Nullable;
 
 /**
- * A Reader that receives input data from a Windmill server, and returns it as
- * groups of elements and timers.
+ * A Reader that receives input data from a Windmill server, and returns a singleton iterable
+ * containing the work item.
  */
-class WindowingWindmillReader<T> extends Reader<WindowedValue<TimerOrElement<T>>> {
-  private final Coder<T> valueCoder;
+class WindowingWindmillReader<T> extends Reader<WindowedValue<KeyedWorkItem<T>>> {
+
+  private final KvCoder<?, T> kvCoder;
   private final Coder<? extends BoundedWindow> windowCoder;
   private final Coder<Collection<? extends BoundedWindow>> windowsCoder;
   private StreamingModeExecutionContext context;
 
-  WindowingWindmillReader(Coder<WindowedValue<TimerOrElement<T>>> coder,
+  WindowingWindmillReader(Coder<WindowedValue<KeyedWorkItem<T>>> coder,
                           StreamingModeExecutionContext context) {
-    FullWindowedValueCoder<TimerOrElement<T>> inputCoder =
-        (FullWindowedValueCoder<TimerOrElement<T>>) coder;
+    FullWindowedValueCoder<KeyedWorkItem<T>> inputCoder =
+        (FullWindowedValueCoder<KeyedWorkItem<T>>) coder;
     this.windowsCoder = inputCoder.getWindowsCoder();
     this.windowCoder = inputCoder.getWindowCoder();
-    this.valueCoder = ((TimerOrElementCoder<T>) inputCoder.getValueCoder()).getElementCoder();
+    Coder<T> elementCoder = ((KeyedWorkItemCoder<T>) inputCoder.getValueCoder()).getElementCoder();
+    if (!(elementCoder instanceof KvCoder)) {
+      throw new IllegalArgumentException(
+          "WindowingWindmillReader only works with KvCoders.");
+    }
+    @SuppressWarnings("unchecked")
+    KvCoder<?, T> kvCoder = (KvCoder<?, T>)
+        elementCoder;
+    this.kvCoder = kvCoder;
     this.context = context;
   }
 
@@ -78,8 +75,8 @@ public Reader<?> create(
         @Nullable String operationName)
             throws Exception {
       @SuppressWarnings({"rawtypes", "unchecked"})
-      Coder<WindowedValue<TimerOrElement<Object>>> typedCoder =
-          (Coder<WindowedValue<TimerOrElement<Object>>>) coder;
+      Coder<WindowedValue<KeyedWorkItem<Object>>> typedCoder =
+          (Coder<WindowedValue<KeyedWorkItem<Object>>>) coder;
       return WindowingWindmillReader.create(typedCoder, (StreamingModeExecutionContext) context);
     }
   }
@@ -89,145 +86,31 @@ public Reader<?> create(
    * and {@link StreamingModeExecutionContext}.
    */
   public static <T> WindowingWindmillReader<T> create(
-      Coder<WindowedValue<TimerOrElement<T>>> coder,
+      Coder<WindowedValue<KeyedWorkItem<T>>> coder,
       StreamingModeExecutionContext context) {
     return new WindowingWindmillReader<T>(coder, context);
   }
 
   @Override
-  public ReaderIterator<WindowedValue<TimerOrElement<T>>> iterator() throws IOException {
-    return new WindowingWindmillReaderIterator();
-  }
+  public ReaderIterator<WindowedValue<KeyedWorkItem<T>>> iterator() throws IOException {
+    final Object key = kvCoder.getKeyCoder().decode(
+        context.getSerializedKey().newInput(), Coder.Context.OUTER);
+    final WorkItem workItem = context.getWork();
 
-  class WindowingWindmillReaderIterator
-  extends AbstractReaderIterator<WindowedValue<TimerOrElement<T>>> {
-    private int bundleIndex = 0;
-    private int messageIndex = 0;
-    private int processingTimeTimerIndex = 0;
-    private int eventTimeTimerIndex = 0;
-    Object key = null;
-    private List<WindowedValue<TimerOrElement<T>>> eventTimeTimers;
-    private List<WindowedValue<TimerOrElement<T>>> processingTimeTimers;
+    return new AbstractReaderIterator<WindowedValue<KeyedWorkItem<T>>>() {
+      boolean consumed = false;
 
-    private WindowingWindmillReaderIterator() throws IOException {
-      if (valueCoder instanceof KvCoder) {
-        KvCoder<?, ?> kvCoder = (KvCoder<?, ?>) valueCoder;
-        key = kvCoder.getKeyCoder().decode(
-            context.getSerializedKey().newInput(), Coder.Context.OUTER);
+      @Override
+      public boolean hasNext() throws IOException {
+        return !consumed;
       }
-
-      eventTimeTimers = new ArrayList<>();
-      processingTimeTimers = new ArrayList<>();
-      for (Windmill.Timer rawTimer : context.getWork().getTimers().getTimersList()) {
-        WindowedValue<TimerOrElement<T>> timer = createTimer(rawTimer);
-        if (timer.getValue().getTimer().getDomain() == TimeDomain.EVENT_TIME) {
-          eventTimeTimers.add(timer);
-        } else {
-          processingTimeTimers.add(timer);
-        }
+      @Override
+      public WindowedValue<KeyedWorkItem<T>> next() throws IOException, NoSuchElementException {
+        consumed = true;
+        return WindowedValue.valueInEmptyWindows(KeyedWorkItem.<T>workItem(
+            key, workItem, windowCoder, windowsCoder, kvCoder.getValueCoder()));
       }
-    }
-
-    private boolean hasMoreMessages() {
-      Windmill.WorkItem work = context.getWork();
-      return bundleIndex < work.getMessageBundlesCount() &&
-          messageIndex < work.getMessageBundles(bundleIndex).getMessagesCount();
-    }
-
-    private boolean hasMoreProcessingTimeTimers() {
-      return processingTimeTimerIndex < processingTimeTimers.size();
-    }
-
-    private boolean hasMoreEventTimeTimers() {
-      return eventTimeTimerIndex < eventTimeTimers.size();
-    }
-
-    @Override
-    public boolean hasNext() throws IOException {
-      return hasMoreMessages() || hasMoreProcessingTimeTimers() || hasMoreEventTimeTimers();
-    }
-
-    private TimeDomain getTimeDomain(Windmill.Timer.Type type) {
-      switch (type) {
-        case REALTIME:
-          return TimeDomain.PROCESSING_TIME;
-        case DEPENDENT_REALTIME:
-          return TimeDomain.SYNCHRONIZED_PROCESSING_TIME;
-        case WATERMARK:
-          return TimeDomain.EVENT_TIME;
-        default:
-          throw new IllegalArgumentException("Unsupported timer type " + type);
-      }
-    }
-
-    private <W extends BoundedWindow> WindowedValue<TimerOrElement<T>> createTimer(
-        Windmill.Timer timer) {
-      String tag = timer.getTag().toStringUtf8();
-      String namespaceString = tag.substring(0, tag.indexOf('+'));
-      StateNamespace namespace = StateNamespaces.fromString(namespaceString, windowCoder);
-
-      Instant timestamp = new Instant(TimeUnit.MICROSECONDS.toMillis(timer.getTimestamp()));
-      TimerData timerData = TimerData.of(namespace, timestamp, getTimeDomain(timer.getType()));
-
-      return WindowedValue.<TimerOrElement<T>>of(
-          TimerOrElement.<T>timer(key, timerData),
-          timestamp,
-          new ArrayList<W>(),
-          PaneInfo.NO_FIRING);
-    }
-
-    @Override
-    public WindowedValue<TimerOrElement<T>> next() throws IOException {
-      if (hasMoreEventTimeTimers()) {
-        if (valueCoder instanceof KvCoder) {
-          return eventTimeTimers.get(eventTimeTimerIndex++);
-        } else {
-          throw new RuntimeException("Timer set on non-keyed DoFn");
-        }
-      } else if (hasMoreProcessingTimeTimers()) {
-        if (valueCoder instanceof KvCoder) {
-          return processingTimeTimers.get(processingTimeTimerIndex++);
-        } else {
-          throw new RuntimeException("Timer set on non-keyed DoFn");
-        }
-      } else {
-        Windmill.Message message =
-            context.getWork().getMessageBundles(bundleIndex).getMessages(messageIndex);
-
-        if (messageIndex >=
-            context.getWork().getMessageBundles(bundleIndex).getMessagesCount() - 1) {
-          messageIndex = 0;
-          bundleIndex++;
-        } else {
-          messageIndex++;
-        }
-        Instant timestampMillis =
-            new Instant(TimeUnit.MICROSECONDS.toMillis(message.getTimestamp()));
-        InputStream data = message.getData().newInput();
-        InputStream metadata = message.getMetadata().newInput();
-        Collection<? extends BoundedWindow> windows = WindmillSink.decodeMetadataWindows(
-            windowsCoder, message.getMetadata());
-        PaneInfo pane = WindmillSink.decodeMetadataPane(message.getMetadata());
-        if (valueCoder instanceof KvCoder) {
-          KvCoder<?, ?> kvCoder = (KvCoder<?, ?>) valueCoder;
-          notifyElementRead(
-              context.getSerializedKey().size() + data.available() + metadata.available());
-          @SuppressWarnings("unchecked")
-          T result = (T) KV.of(key, decode(kvCoder.getValueCoder(), data));
-          return WindowedValue.of(TimerOrElement.element(result), timestampMillis, windows, pane);
-        } else {
-          notifyElementRead(data.available() + metadata.available());
-          return WindowedValue.of(TimerOrElement.element(decode(valueCoder, data)),
-                                  timestampMillis,
-                                  windows,
-                                  pane);
-        }
-      }
-    }
-
-    private <X> X decode(Coder<X> coder, InputStream input) throws IOException {
-      return coder.decode(input, Coder.Context.OUTER);
-    }
+    };
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
index 9d7f54124d084..8b9b7e0a4a34e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
@@ -13,192 +13,45 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.StandardCoder;
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.cloud.dataflow.sdk.runners.worker.KeyedWorkItem.KeyedWorkItemCoder;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
 
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Arrays;
 import java.util.List;
-import java.util.Objects;
 
 /**
- * Class representing either a timer, or arbitrary element.
- * Used as the input type of {@link StreamingGroupAlsoByWindowsDoFn}.
- *
- * @param <ElemT> the element type
+ * Empty class which exists because the back end will sometimes insert uses of
+ * {@code com.google.cloud.dataflow.sdk.util.TimerOrElement$TimerOrElementCoder} and we'd like to be
+ * able to rename/move that without breaking things.
  */
-public class TimerOrElement<ElemT> {
+public class TimerOrElement {
 
-  /**
-   * Creates a new {@code TimerOrElement<ElemT>} representing a timer.
-   *
-   * @param <ElemT> the element type
-   */
-  public static <ElemT> TimerOrElement<ElemT> timer(Object key, TimerData timerData) {
-    return new TimerOrElement<>(key, timerData);
-  }
-
-  /**
-   * Creates a new {@code TimerOrElement<ElemT>} representing an element.
-   *
-   * @param <ElemT> the element type
-   */
-  public static <ElemT> TimerOrElement<ElemT> element(ElemT element) {
-    return new TimerOrElement<>(element);
-  }
+  // TimerOrElement should never be created.
+  private TimerOrElement() {}
 
   /**
-   * Returns whether this is a timer or an element.
+   * Empty class which exists because the back end will sometimes insert uses of
+   * {@code com.google.cloud.dataflow.sdk.util.TimerOrElement$TimerOrElementCoder} and we'd like to
+   * be able to rename/move that without breaking things.
    */
-  public boolean isTimer() {
-    return timer != null;
-  }
+  public static class TimerOrElementCoder<ElemT> extends KeyedWorkItemCoder<ElemT> {
 
-  /**
-   * If this is a timer, returns the associated {@link TimerData}. Otherwise, throws an exception.
-   */
-  public TimerData getTimer() {
-    if (!isTimer()) {
-      throw new IllegalStateException("getTimer() called, but this is an element");
+    private TimerOrElementCoder(Coder<ElemT> elemCoder) {
+      super(elemCoder);
     }
-    return timer;
-  }
-
-  /**
-   * If this is a timer, returns its key, otherwise throws an exception.
-   */
-  public Object key() {
-    if (!isTimer()) {
-      throw new IllegalStateException("key() called, but this is an element");
-    }
-    return key;
-  }
-
-  /**
-   * If this is an element, returns it, otherwise throws an exception.
-   */
-  public ElemT element() {
-    if (isTimer()) {
-      throw new IllegalStateException("element() called, but this is a timer");
-    }
-    return element;
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (!(other instanceof TimerOrElement)) {
-      return false;
-    }
-    TimerOrElement that = (TimerOrElement) other;
-    if (this.isTimer() && that.isTimer()) {
-      return Objects.equals(this.getTimer(), that.getTimer())
-          && Objects.equals(this.key(), that.key());
-    } else if (!this.isTimer() && !that.isTimer()) {
-      return Objects.equals(this.element(), that.element());
-    } else {
-      return false;
-    }
-  }
 
-  @Override
-  public int hashCode() {
-    return isTimer() ? Objects.hash(key(), getTimer()) : Objects.hash(element());
-  }
-
-  /**
-   * Coder that forwards {@code ByteSizeObserver} calls to an underlying element coder.
-   * {@code TimerOrElement} objects never need to be encoded, so this class does not
-   * support the {@code encode} and {@code decode} methods.
-   */
-  public static class TimerOrElementCoder<T> extends StandardCoder<TimerOrElement<T>> {
-    final Coder<T> elemCoder;
-
-    /**
-     * Creates a new {@code TimerOrElement.Coder} that wraps the given {@link Coder}.
-     */
     public static <T> TimerOrElementCoder<T> of(Coder<T> elemCoder) {
       return new TimerOrElementCoder<>(elemCoder);
     }
 
     @JsonCreator
     public static TimerOrElementCoder<?> of(
-            @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-            List<Object> components) {
+        @JsonProperty(PropertyNames.COMPONENT_ENCODINGS) List<Object> components) {
       return of((Coder<?>) components.get(0));
     }
-
-    @Override
-    public void encode(TimerOrElement<T> value, OutputStream outStream, Context context) {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public TimerOrElement<T> decode(InputStream inStream, Context context) {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public boolean isRegisterByteSizeObserverCheap(TimerOrElement<T> value, Context context) {
-      if (value.isTimer()) {
-        return true;
-      } else {
-        return elemCoder.isRegisterByteSizeObserverCheap(value.element(), context);
-      }
-    }
-
-    @Override
-    public void registerByteSizeObserver(
-        TimerOrElement<T> value, ElementByteSizeObserver observer, Context context)
-        throws Exception{
-      if (!value.isTimer()) {
-        elemCoder.registerByteSizeObserver(value.element(), observer, context);
-      }
-    }
-
-    @Override
-    public void verifyDeterministic() throws NonDeterministicException {
-      verifyDeterministic(
-          "TimerOrElementCoder requires a deterministic elemCoder", elemCoder);
-    }
-
-    @Override
-    public List<? extends Coder<?>> getCoderArguments() {
-      return Arrays.asList(elemCoder);
-    }
-
-    public Coder<T> getElementCoder() {
-      return elemCoder;
-    }
-
-    private TimerOrElementCoder(Coder<T> elemCoder) {
-      this.elemCoder = elemCoder;
-    }
-  }
-
-  //////////////////////////////////////////////////////////////////////////////
-
-  private final Object key;
-  private final TimerData timer;
-  private final ElemT element;
-
-  TimerOrElement(Object key, TimerData timer) {
-    this.key = key;
-    this.timer = timer;
-    this.element = null;
-  }
-
-  TimerOrElement(ElemT element) {
-    this.key = null;
-    this.timer = null;
-    this.element = element;
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemTest.java
new file mode 100644
index 0000000000000..d4498cff80d8c
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemTest.java
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CollectionCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+import com.google.protobuf.ByteString;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Instant;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.concurrent.TimeUnit;
+
+/** Tests for {@link KeyedWorkItem}. */
+@RunWith(JUnit4.class)
+public class KeyedWorkItemTest {
+
+  private static final String STATE_FAMILY = "state";
+  private static final String KEY = "key";
+  private static final ByteString SERIALIZED_KEY = ByteString.copyFromUtf8(KEY);
+
+  private static final Coder<IntervalWindow> WINDOW_CODER = IntervalWindow.getCoder();
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  private static final Coder<Collection<? extends BoundedWindow>> WINDOWS_CODER =
+      (Coder) CollectionCoder.of(WINDOW_CODER);
+  private static final Coder<String> VALUE_CODER = StringUtf8Coder.of();
+  private static final IntervalWindow WINDOW_1 =
+      new IntervalWindow(new Instant(0), new Instant(10));
+  private static final StateNamespace STATE_NAMESPACE_1 =
+      StateNamespaces.window(WINDOW_CODER, WINDOW_1);
+  private static final IntervalWindow WINDOW_2 =
+      new IntervalWindow(new Instant(10), new Instant(20));
+  private static final StateNamespace STATE_NAMESPACE_2 =
+      StateNamespaces.window(WINDOW_CODER, WINDOW_2);
+
+  @Mock
+  private StreamingModeExecutionContext mockContext;
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+  }
+
+  @Test
+  public void testElementIteration() throws Exception {
+    Windmill.WorkItem.Builder workItem = Windmill.WorkItem.newBuilder()
+        .setKey(SERIALIZED_KEY)
+        .setWorkToken(17);
+    Windmill.InputMessageBundle.Builder chunk1 = workItem.addMessageBundlesBuilder();
+    chunk1.setSourceComputationId("computation");
+    addElement(chunk1, 5, "hello", WINDOW_1, paneInfo(0));
+    addElement(chunk1, 7, "world", WINDOW_2, paneInfo(2));
+    Windmill.InputMessageBundle.Builder chunk2 = workItem.addMessageBundlesBuilder();
+    chunk2.setSourceComputationId("computation");
+    addElement(chunk2, 6, "earth", WINDOW_1, paneInfo(1));
+
+    KeyedWorkItem<String> keyedWorkItem =
+        KeyedWorkItem.workItem(KEY, workItem.build(), WINDOW_CODER, WINDOWS_CODER, VALUE_CODER);
+
+    assertThat(keyedWorkItem.elementsIterable(), Matchers.contains(
+        WindowedValue.of("hello", new Instant(5), WINDOW_1, paneInfo(0)),
+        WindowedValue.of("world", new Instant(7), WINDOW_2, paneInfo(2)),
+        WindowedValue.of("earth", new Instant(6), WINDOW_1, paneInfo(1))));
+  }
+
+  private void addElement(
+      Windmill.InputMessageBundle.Builder chunk, long timestamp, String value,
+      IntervalWindow window, PaneInfo pane) throws IOException {
+    ByteString encodedMetadata =
+        WindmillSink.encodeMetadata(WINDOWS_CODER, Collections.singletonList(window), pane);
+    chunk.addMessagesBuilder()
+        .setTimestamp(TimeUnit.MILLISECONDS.toMicros(timestamp))
+        .setData(ByteString.copyFromUtf8(value))
+        .setMetadata(encodedMetadata);
+  }
+
+  private PaneInfo paneInfo(int index) {
+    return PaneInfo.createPane(false, false, Timing.EARLY, index, -1);
+  }
+
+  /** Make sure that event time timers are processed before other timers. */
+  @Test
+  public void testTimerOrdering() throws Exception {
+    Windmill.WorkItem workItem = Windmill.WorkItem.newBuilder()
+        .setKey(SERIALIZED_KEY)
+        .setWorkToken(17)
+        .setTimers(Windmill.TimerBundle.newBuilder()
+            .addTimers(makeSerializedTimer(STATE_NAMESPACE_1, 0, Windmill.Timer.Type.REALTIME))
+            .addTimers(makeSerializedTimer(STATE_NAMESPACE_1, 1, Windmill.Timer.Type.WATERMARK))
+            .addTimers(makeSerializedTimer(STATE_NAMESPACE_1, 2, Windmill.Timer.Type.REALTIME))
+            .addTimers(makeSerializedTimer(STATE_NAMESPACE_2, 3, Windmill.Timer.Type.WATERMARK))
+            .build())
+        .build();
+
+    KeyedWorkItem<String> keyedWorkItem =
+        KeyedWorkItem.<String>workItem(KEY, workItem, WINDOW_CODER, WINDOWS_CODER, VALUE_CODER);
+
+    assertThat(keyedWorkItem.timersIterable(), Matchers.contains(
+        makeTimer(STATE_NAMESPACE_1, 1, TimeDomain.EVENT_TIME),
+        makeTimer(STATE_NAMESPACE_2, 3, TimeDomain.EVENT_TIME),
+        makeTimer(STATE_NAMESPACE_1, 0, TimeDomain.PROCESSING_TIME),
+        makeTimer(STATE_NAMESPACE_1, 2, TimeDomain.PROCESSING_TIME)));
+  }
+
+  private static Windmill.Timer makeSerializedTimer(
+      StateNamespace ns, long timestamp, Windmill.Timer.Type type) {
+    return Windmill.Timer.newBuilder()
+        .setTag(ByteString.copyFromUtf8(
+            ns.stringKey() + "+" + type + "-" + timestamp))
+        .setTimestamp(TimeUnit.MILLISECONDS.toMicros(timestamp))
+        .setType(type)
+        .setStateFamily(STATE_FAMILY)
+        .build();
+  }
+
+  private static TimerData makeTimer(StateNamespace ns, long timestamp, TimeDomain domain) {
+    return TimerData.of(ns, new Instant(timestamp), domain);
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 4fba866e79fc3..3677fd621768d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -50,6 +50,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
 import com.google.cloud.dataflow.sdk.runners.dataflow.CountingSource;
+import com.google.cloud.dataflow.sdk.runners.worker.KeyedWorkItem.KeyedWorkItemCoder;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.WorkItemCommitRequest;
@@ -67,7 +68,6 @@
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
-import com.google.cloud.dataflow.sdk.util.TimerOrElement.TimerOrElementCoder;
 import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
 import com.google.cloud.dataflow.sdk.util.VarInt;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
@@ -161,7 +161,7 @@ private String dataStringForIndex(long index) {
 
   private ParallelInstruction makeWindowingSourceInstruction(Coder<?> coder) {
     CloudObject encodedCoder = FullWindowedValueCoder.of(
-        TimerOrElementCoder.of(coder), IntervalWindow.getCoder()).asCloudObject();
+        KeyedWorkItemCoder.of(coder), IntervalWindow.getCoder()).asCloudObject();
     return new ParallelInstruction()
         .setSystemName(DEFAULT_SOURCE_SYSTEM_NAME)
         .setRead(new ReadInstruction().setSource(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
index 3f0b1b402318e..430db36b2f1e1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -22,11 +22,18 @@
 
 import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.CollectionCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.InputMessageBundle;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.Timer;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.WorkItem;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
@@ -41,7 +48,6 @@
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TimerInternals;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.TimerOrElement;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -50,6 +56,7 @@
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.protobuf.ByteString;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -62,19 +69,29 @@
 import org.mockito.Mockito;
 import org.mockito.MockitoAnnotations;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.List;
+import java.util.concurrent.TimeUnit;
 
 /** Unit tests for {@link StreamingGroupAlsoByWindowsDoFn}. */
 @RunWith(JUnit4.class)
 public class StreamingGroupAlsoByWindowsDoFnTest {
+  private static final String KEY = "k";
+  private static final String STATE_FAMILY = "stateFamily";
+  private static final long WORK_TOKEN = 1000L;
+  private static final String SOURCE_COMPUTATION_ID = "sourceComputationId";
   private ExecutionContext execContext;
   private CounterSet counters;
 
   @Mock
   private TimerInternals mockTimerInternals;
 
+  private Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
+  private Coder<Collection<IntervalWindow>> windowsCoder = CollectionCoder.of(windowCoder);
+
   @Before public void setUp() {
     MockitoAnnotations.initMocks(this);
     execContext = new DirectModeExecutionContext() {
@@ -96,7 +113,7 @@ public ExecutionContext.StepContext createStepContext(
   @Test public void testEmpty() throws Exception {
     TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>> runner =
+    DoFnRunner<KeyedWorkItem<String>, KV<String, Iterable<String>>> runner =
         makeRunner(
             outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
@@ -109,53 +126,76 @@ public ExecutionContext.StepContext createStepContext(
     assertEquals(0, result.size());
   }
 
-  private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
-      Coder<W> windowCoder, W window, Instant timestamp, TimeDomain domain) {
+  private void addTimer(WorkItem.Builder workItem,
+      IntervalWindow window, Instant timestamp, Windmill.Timer.Type type) {
     StateNamespace namespace = StateNamespaces.window(windowCoder, window);
-    return TimerOrElement.<KV<String, V>>timer("k", TimerData.of(namespace, timestamp, domain));
+    workItem.getTimersBuilder().addTimersBuilder()
+        .setTag(StreamingModeExecutionContext.timerTag(TimerData.of(
+            namespace, timestamp,
+            type == Windmill.Timer.Type.WATERMARK
+                ? TimeDomain.EVENT_TIME : TimeDomain.PROCESSING_TIME)))
+        .setTimestamp(TimeUnit.MILLISECONDS.toMicros(timestamp.getMillis()))
+        .setType(type)
+        .setStateFamily(STATE_FAMILY);
+  }
+
+  private <V> void addElement(
+      InputMessageBundle.Builder messageBundle, Collection<IntervalWindow> windows,
+      Instant timestamp, Coder<V> valueCoder, V value) throws IOException {
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    Coder<Collection<? extends BoundedWindow>> windowsCoder =
+        (Coder) CollectionCoder.of(windowCoder);
+
+    ByteString.Output dataOutput = ByteString.newOutput();
+    valueCoder.encode(value, dataOutput, Context.OUTER);
+    messageBundle.addMessagesBuilder()
+        .setMetadata(WindmillSink.encodeMetadata(windowsCoder, windows, PaneInfo.NO_FIRING))
+        .setData(dataOutput.toByteString())
+        .setTimestamp(TimeUnit.MILLISECONDS.toMicros(timestamp.getMillis()));
+  }
+
+  private <T> WindowedValue<KeyedWorkItem<T>> createValue(
+      WorkItem.Builder workItem, Coder<T> valueCoder) {
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    Coder<Collection<? extends BoundedWindow>> wildcardWindowsCoder = (Coder) windowsCoder;
+    return WindowedValue.valueInEmptyWindows(KeyedWorkItem.workItem(
+        KEY, workItem.build(), windowCoder, wildcardWindowsCoder, valueCoder));
   }
 
   @Test public void testFixedWindows() throws Exception {
     TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>> runner =
+    DoFnRunner<KeyedWorkItem<String>, KV<String, Iterable<String>>> runner =
         makeRunner(
             outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
-    Coder<IntervalWindow> windowCoder = FixedWindows.of(Duration.millis(10)).windowCoder();
-
     runner.startBundle();
     when(mockTimerInternals.currentWatermarkTime()).thenReturn(new Instant(0));
 
-    runner.processElement(WindowedValue.of(
-        TimerOrElement.element(KV.of("k", "v1")),
-        new Instant(1),
-        Arrays.asList(window(0, 10)),
-        PaneInfo.NO_FIRING));
+    WorkItem.Builder workItem1 = WorkItem.newBuilder();
+    workItem1.setKey(ByteString.copyFromUtf8(KEY));
+    workItem1.setWorkToken(WORK_TOKEN);
+    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
+    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
 
-    runner.processElement(WindowedValue.of(
-        TimerOrElement.element(KV.of("k", "v2")),
-        new Instant(2),
-        Arrays.asList(window(0, 10)),
-        PaneInfo.NO_FIRING));
+    Coder<String> valueCoder = StringUtf8Coder.of();
+    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(1), valueCoder, "v1");
+    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(2), valueCoder, "v2");
+    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, "v0");
+    addElement(messageBundle, Arrays.asList(window(10, 20)), new Instant(13), valueCoder, "v3");
 
-    runner.processElement(WindowedValue.of(
-        TimerOrElement.element(KV.of("k", "v0")),
-        new Instant(0),
-        Arrays.asList(window(0, 10)),
-        PaneInfo.NO_FIRING));
+    runner.processElement(createValue(workItem1, valueCoder));
 
-    runner.processElement(WindowedValue.of(
-        TimerOrElement.element(KV.of("k", "v3")),
-        new Instant(13),
-        Arrays.asList(window(10, 20)),
-        PaneInfo.NO_FIRING));
+    runner.finishBundle();
+    runner.startBundle();
 
-    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, String>timer(
-            windowCoder, window(0, 10), new Instant(9), TimeDomain.EVENT_TIME)));
+    WorkItem.Builder workItem2 = WorkItem.newBuilder();
+    workItem2.setKey(ByteString.copyFromUtf8(KEY));
+    workItem2.setWorkToken(WORK_TOKEN);
+    addTimer(workItem2, window(0, 10), new Instant(9), Timer.Type.WATERMARK);
+    addTimer(workItem2, window(10, 20), new Instant(19), Timer.Type.WATERMARK);
 
-    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, String>timer(
-        windowCoder, window(10, 20), new Instant(19), TimeDomain.EVENT_TIME)));
+    runner.processElement(createValue(workItem2, valueCoder));
 
     runner.finishBundle();
 
@@ -164,13 +204,13 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
     assertEquals(2, result.size());
 
     WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
-    assertEquals("k", item0.getValue().getKey());
+    assertEquals(KEY, item0.getValue().getKey());
     assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1", "v2"));
     assertEquals(new Instant(0), item0.getTimestamp());
     assertThat(item0.getWindows(), Matchers.<BoundedWindow>contains(window(0, 10)));
 
     WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
-    assertEquals("k", item1.getValue().getKey());
+    assertEquals(KEY, item1.getValue().getKey());
     assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v3"));
     assertEquals(new Instant(13), item1.getTimestamp());
     assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(10, 20)));
@@ -179,44 +219,43 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
   @Test public void testSlidingWindows() throws Exception {
     TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>> runner =
+    DoFnRunner<KeyedWorkItem<String>, KV<String, Iterable<String>>> runner =
         makeRunner(
             outputTag,
             outputManager,
             WindowingStrategy.of(
             SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))));
 
-    Coder<IntervalWindow> windowCoder =
-        SlidingWindows.of(Duration.millis(10)).every(Duration.millis(10)).windowCoder();
-
     runner.startBundle();
     when(mockTimerInternals.currentWatermarkTime()).thenReturn(new Instant(0));
 
-    runner.processElement(WindowedValue.of(
-        TimerOrElement.element(KV.of("k", "v1")),
-        new Instant(5),
-        Arrays.asList(window(-10, 10), window(0, 20)),
-        PaneInfo.NO_FIRING));
+    WorkItem.Builder workItem1 = WorkItem.newBuilder();
+    workItem1.setKey(ByteString.copyFromUtf8(KEY));
+    workItem1.setWorkToken(WORK_TOKEN);
+    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
+    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
 
-    runner.processElement(WindowedValue.of(
-        TimerOrElement.element(KV.of("k", "v0")),
-        new Instant(2),
-        Arrays.asList(window(-10, 10), window(0, 20)),
-        PaneInfo.NO_FIRING));
+    Coder<String> valueCoder = StringUtf8Coder.of();
+    addElement(messageBundle,
+        Arrays.asList(window(-10, 10), window(0, 20)), new Instant(5), valueCoder, "v1");
+    addElement(messageBundle,
+        Arrays.asList(window(-10, 10), window(0, 20)), new Instant(2), valueCoder, "v0");
+    addElement(messageBundle,
+        Arrays.asList(window(0, 20), window(10, 30)), new Instant(15), valueCoder, "v2");
 
-    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, String>timer(
-        windowCoder, window(-10, 10), new Instant(9), TimeDomain.EVENT_TIME)));
+    runner.processElement(createValue(workItem1, valueCoder));
 
-    runner.processElement(WindowedValue.of(
-        TimerOrElement.element(KV.of("k", "v2")),
-        new Instant(5),
-        Arrays.asList(window(0, 20), window(10, 30)),
-        PaneInfo.NO_FIRING));
+    runner.finishBundle();
+    runner.startBundle();
 
-    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, String>timer(
-        windowCoder, window(0, 20), new Instant(19), TimeDomain.EVENT_TIME)));
-    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, String>timer(
-        windowCoder, window(10, 30), new Instant(29), TimeDomain.EVENT_TIME)));
+    WorkItem.Builder workItem2 = WorkItem.newBuilder();
+    workItem2.setKey(ByteString.copyFromUtf8(KEY));
+    workItem2.setWorkToken(WORK_TOKEN);
+    addTimer(workItem2, window(-10, 10), new Instant(9), Timer.Type.WATERMARK);
+    addTimer(workItem2, window(0, 20), new Instant(19), Timer.Type.WATERMARK);
+    addTimer(workItem2, window(10, 30), new Instant(29), Timer.Type.WATERMARK);
+
+    runner.processElement(createValue(workItem2, valueCoder));
 
     runner.finishBundle();
 
@@ -225,13 +264,13 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
     assertEquals(3, result.size());
 
     WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
-    assertEquals("k", item0.getValue().getKey());
+    assertEquals(KEY, item0.getValue().getKey());
     assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1"));
     assertEquals(new Instant(2), item0.getTimestamp());
     assertThat(item0.getWindows(), Matchers.<BoundedWindow>contains(window(-10, 10)));
 
     WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
-    assertEquals("k", item1.getValue().getKey());
+    assertEquals(KEY, item1.getValue().getKey());
     assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1", "v2"));
     // For this sliding window, the minimum output timestmap was 10, since we didn't want to overlap
     // with the previous window that was [-10, 10).
@@ -239,7 +278,7 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
     assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(0, 20)));
 
     WindowedValue<KV<String, Iterable<String>>> item2 = result.get(2);
-    assertEquals("k", item2.getValue().getKey());
+    assertEquals(KEY, item2.getValue().getKey());
     assertThat(item2.getValue().getValue(), Matchers.containsInAnyOrder("v2"));
     assertEquals(new Instant(20), item2.getTimestamp());
     assertThat(item2.getWindows(), Matchers.<BoundedWindow>contains(window(10, 30)));
@@ -248,47 +287,39 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
   @Test public void testSessions() throws Exception {
     TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>> runner =
-        makeRunner(
-            outputTag,
-            outputManager,
-            WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))));
+    DoFnRunner<KeyedWorkItem<String>, KV<String, Iterable<String>>> runner = makeRunner(
+        outputTag,
+        outputManager,
+        WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))));
 
-    Coder<IntervalWindow> windowCoder =
-        Sessions.withGapDuration(Duration.millis(10)).windowCoder();
     runner.startBundle();
     when(mockTimerInternals.currentWatermarkTime()).thenReturn(new Instant(0));
 
-    runner.processElement(WindowedValue.of(
-        TimerOrElement.element(KV.of("k", "v1")),
-        new Instant(0),
-        Arrays.asList(window(0, 10)),
-        PaneInfo.NO_FIRING));
-
-    runner.processElement(WindowedValue.of(
-        TimerOrElement.element(KV.of("k", "v2")),
-        new Instant(5),
-        Arrays.asList(window(5, 15)),
-        PaneInfo.NO_FIRING));
-
-    runner.processElement(WindowedValue.of(
-        TimerOrElement.element(KV.of("k", "v3")),
-        new Instant(15),
-        Arrays.asList(window(15, 25)),
-        PaneInfo.NO_FIRING));
-
-    runner.processElement(WindowedValue.of(
-        TimerOrElement.element(KV.of("k", "v0")),
-        new Instant(3),
-        Arrays.asList(window(3, 13)),
-        PaneInfo.NO_FIRING));
-
-    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, String>timer(
-        windowCoder, window(0, 10), new Instant(9), TimeDomain.EVENT_TIME)));
-    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, String>timer(
-        windowCoder, window(0, 15), new Instant(14), TimeDomain.EVENT_TIME)));
-    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, String>timer(
-        windowCoder, window(15, 25), new Instant(24), TimeDomain.EVENT_TIME)));
+    WorkItem.Builder workItem1 = WorkItem.newBuilder();
+    workItem1.setKey(ByteString.copyFromUtf8(KEY));
+    workItem1.setWorkToken(WORK_TOKEN);
+    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
+    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
+
+    Coder<String> valueCoder = StringUtf8Coder.of();
+    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, "v1");
+    addElement(messageBundle, Arrays.asList(window(5, 15)), new Instant(5), valueCoder, "v2");
+    addElement(messageBundle, Arrays.asList(window(15, 25)), new Instant(15), valueCoder, "v3");
+    addElement(messageBundle, Arrays.asList(window(3, 13)), new Instant(3), valueCoder, "v0");
+
+    runner.processElement(createValue(workItem1, valueCoder));
+
+    runner.finishBundle();
+    runner.startBundle();
+
+    WorkItem.Builder workItem2 = WorkItem.newBuilder();
+    workItem2.setKey(ByteString.copyFromUtf8(KEY));
+    workItem2.setWorkToken(WORK_TOKEN);
+    addTimer(workItem2, window(0, 10), new Instant(9), Timer.Type.WATERMARK);
+    addTimer(workItem2, window(0, 15), new Instant(14), Timer.Type.WATERMARK);
+    addTimer(workItem2, window(15, 25), new Instant(24), Timer.Type.WATERMARK);
+
+    runner.processElement(createValue(workItem2, valueCoder));
 
     runner.finishBundle();
 
@@ -297,13 +328,13 @@ private <W extends BoundedWindow, V> TimerOrElement<KV<String, V>> timer(
     assertEquals(2, result.size());
 
     WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
-    assertEquals("k", item0.getValue().getKey());
+    assertEquals(KEY, item0.getValue().getKey());
     assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1", "v2"));
     assertEquals(new Instant(0), item0.getTimestamp());
     assertThat(item0.getWindows(), Matchers.<BoundedWindow>contains(window(0, 15)));
 
     WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
-    assertEquals("k", item1.getValue().getKey());
+    assertEquals(KEY, item1.getValue().getKey());
     assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v3"));
     assertEquals(new Instant(15), item1.getTimestamp());
     assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(15, 25)));
@@ -349,49 +380,40 @@ public Long extractOutput(Long accumulator) {
         combineFn.asKeyedFn(), registry, KvCoder.of(StringUtf8Coder.of(), BigEndianLongCoder.of()));
 
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<TimerOrElement<KV<String, Long>>, KV<String, Long>> runner =
-        makeRunner(
-            outputTag,
-            outputManager,
-            WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))),
-            appliedCombineFn);
-
-    Coder<IntervalWindow> windowCoder =
-        Sessions.withGapDuration(Duration.millis(10)).windowCoder();
+    DoFnRunner<KeyedWorkItem<Long>, KV<String, Long>> runner = makeRunner(
+        outputTag,
+        outputManager,
+        WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))),
+        appliedCombineFn);
 
     runner.startBundle();
     when(mockTimerInternals.currentWatermarkTime()).thenReturn(new Instant(0));
 
-    runner.processElement(WindowedValue.of(
-        TimerOrElement.element(KV.of("k", 1L)),
-        new Instant(0),
-        Arrays.asList(window(0, 10)),
-        PaneInfo.NO_FIRING));
-
-    runner.processElement(WindowedValue.of(
-        TimerOrElement.element(KV.of("k", 2L)),
-        new Instant(5),
-        Arrays.asList(window(5, 15)),
-        PaneInfo.NO_FIRING));
-
-    runner.processElement(WindowedValue.of(
-        TimerOrElement.element(KV.of("k", 3L)),
-        new Instant(15),
-        Arrays.asList(window(15, 25)),
-        PaneInfo.NO_FIRING));
-
-    runner.processElement(WindowedValue.of(
-        TimerOrElement.element(KV.of("k", 4L)),
-        new Instant(3),
-        Arrays.asList(window(3, 13)),
-        PaneInfo.NO_FIRING));
-
-    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, Long>timer(
-        windowCoder, window(0, 10), new Instant(9), TimeDomain.EVENT_TIME)));
-    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, Long>timer(
-        windowCoder, window(0, 15), new Instant(14), TimeDomain.EVENT_TIME)));
-    runner.processElement(WindowedValue.valueInEmptyWindows(this.<IntervalWindow, Long>timer(
-        windowCoder, window(15, 25), new Instant(24), TimeDomain.EVENT_TIME)));
+    WorkItem.Builder workItem1 = WorkItem.newBuilder();
+    workItem1.setKey(ByteString.copyFromUtf8(KEY));
+    workItem1.setWorkToken(WORK_TOKEN);
+    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
+    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
+
+    Coder<Long> valueCoder = BigEndianLongCoder.of();
+    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, 1L);
+    addElement(messageBundle, Arrays.asList(window(5, 15)), new Instant(5), valueCoder, 2L);
+    addElement(messageBundle, Arrays.asList(window(15, 25)), new Instant(15), valueCoder, 3L);
+    addElement(messageBundle, Arrays.asList(window(3, 13)), new Instant(3), valueCoder, 4L);
+
+    runner.processElement(createValue(workItem1, valueCoder));
+
+    runner.finishBundle();
+    runner.startBundle();
+
+    WorkItem.Builder workItem2 = WorkItem.newBuilder();
+    workItem2.setKey(ByteString.copyFromUtf8(KEY));
+    workItem2.setWorkToken(WORK_TOKEN);
+    addTimer(workItem2, window(0, 10), new Instant(9), Timer.Type.WATERMARK);
+    addTimer(workItem2, window(0, 15), new Instant(14), Timer.Type.WATERMARK);
+    addTimer(workItem2, window(15, 25), new Instant(24), Timer.Type.WATERMARK);
+
+    runner.processElement(createValue(workItem2, valueCoder));
 
     runner.finishBundle();
 
@@ -400,47 +422,47 @@ public Long extractOutput(Long accumulator) {
     assertEquals(2, result.size());
 
     WindowedValue<KV<String, Long>> item0 = result.get(0);
-    assertEquals("k", item0.getValue().getKey());
+    assertEquals(KEY, item0.getValue().getKey());
     assertEquals((Long) 7L, item0.getValue().getValue());
     assertEquals(new Instant(0), item0.getTimestamp());
     assertThat(item0.getWindows(), Matchers.<BoundedWindow>contains(window(0, 15)));
 
     WindowedValue<KV<String, Long>> item1 = result.get(1);
-    assertEquals("k", item1.getValue().getKey());
+    assertEquals(KEY, item1.getValue().getKey());
     assertEquals((Long) 3L, item1.getValue().getValue());
     assertEquals(new Instant(15), item1.getTimestamp());
     assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(15, 25)));
   }
 
-  private DoFnRunner<TimerOrElement<KV<String, String>>, KV<String, Iterable<String>>> makeRunner(
+  private DoFnRunner<KeyedWorkItem<String>, KV<String, Iterable<String>>> makeRunner(
           TupleTag<KV<String, Iterable<String>>> outputTag,
           DoFnRunner.OutputManager outputManager,
           WindowingStrategy<? super String, IntervalWindow> windowingStrategy) {
 
-    StreamingGroupAlsoByWindowsDoFn<String, String, Iterable<String>, IntervalWindow> fn =
+    DoFn<KeyedWorkItem<String>, KV<String, Iterable<String>>> fn =
         StreamingGroupAlsoByWindowsDoFn.createForIterable(windowingStrategy, StringUtf8Coder.of());
 
     return makeRunner(outputTag, outputManager, windowingStrategy, fn);
   }
 
-  private DoFnRunner<TimerOrElement<KV<String, Long>>, KV<String, Long>> makeRunner(
+  private DoFnRunner<KeyedWorkItem<Long>, KV<String, Long>> makeRunner(
           TupleTag<KV<String, Long>> outputTag,
           DoFnRunner.OutputManager outputManager,
           WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
           AppliedCombineFn<String, Long, ?, Long> combineFn) {
 
-    StreamingGroupAlsoByWindowsDoFn<String, Long, Long, IntervalWindow> fn =
+    DoFn<KeyedWorkItem<Long>, KV<String, Long>> fn =
         StreamingGroupAlsoByWindowsDoFn.create(windowingStrategy, combineFn, StringUtf8Coder.of());
 
     return makeRunner(outputTag, outputManager, windowingStrategy, fn);
   }
 
   private <InputT, OutputT>
-      DoFnRunner<TimerOrElement<KV<String, InputT>>, KV<String, OutputT>> makeRunner(
+      DoFnRunner<KeyedWorkItem<InputT>, KV<String, OutputT>> makeRunner(
           TupleTag<KV<String, OutputT>> outputTag,
           DoFnRunner.OutputManager outputManager,
           WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
-          StreamingGroupAlsoByWindowsDoFn<String, InputT, OutputT, IntervalWindow> fn) {
+          DoFn<KeyedWorkItem<InputT>, KV<String, OutputT>> fn) {
     return
         DoFnRunner.create(
             PipelineOptionsFactory.create(),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReaderTest.java
deleted file mode 100644
index 19f955f3cd5e1..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReaderTest.java
+++ /dev/null
@@ -1,123 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import static com.google.cloud.dataflow.sdk.util.common.worker.Reader.ReaderIterator;
-import static org.hamcrest.Matchers.contains;
-import static org.junit.Assert.assertThat;
-import static org.mockito.Mockito.when;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.TimerOrElement;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.protobuf.ByteString;
-
-import org.joda.time.Instant;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.Mock;
-import org.mockito.MockitoAnnotations;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-/** Unit tests for {@link WindowingWindmillReader}. */
-@RunWith(JUnit4.class)
-public class WindowingWindmillReaderTest {
-  private static final String STATE_FAMILY = "state";
-  private static final String KEY = "key";
-  private static final ByteString SERIALIZED_KEY = ByteString.copyFromUtf8("key");
-  private static final StateNamespace STATE_NAMESPACE =
-      StateNamespaces.window(GlobalWindow.Coder.INSTANCE, GlobalWindow.INSTANCE);
-
-  @Mock
-  StreamingModeExecutionContext mockContext;
-
-  @Before
-  public void setUp() {
-    MockitoAnnotations.initMocks(this);
-  }
-
-
-  // Make sure that event time timers are processed before processing time ones.
-  @Test
-  public void testTimerOrdering() throws Exception {
-    when(mockContext.getWork()).thenReturn(
-        Windmill.WorkItem.newBuilder()
-        .setKey(SERIALIZED_KEY)
-        .setWorkToken(17)
-        .setTimers(Windmill.TimerBundle.newBuilder()
-            .addTimers(makeSerializedTimer("Processing-1", Windmill.Timer.Type.REALTIME))
-            .addTimers(makeSerializedTimer("Event-1", Windmill.Timer.Type.WATERMARK))
-            .addTimers(makeSerializedTimer("Processing-2", Windmill.Timer.Type.REALTIME))
-            .addTimers(makeSerializedTimer("Event-2", Windmill.Timer.Type.WATERMARK))
-            .build())
-        .build());
-    when(mockContext.getSerializedKey()).thenReturn(SERIALIZED_KEY);
-
-    Coder<WindowedValue<TimerOrElement<KV<String, String>>>> coder =
-        WindowedValue.getFullCoder(
-            TimerOrElement.TimerOrElementCoder.of(
-                KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())),
-            GlobalWindow.Coder.INSTANCE);
-
-    WindowingWindmillReader<KV<String, String>> reader =
-        WindowingWindmillReader.create(coder, mockContext);
-
-    ReaderIterator<WindowedValue<TimerOrElement<KV<String, String>>>> iterator = reader.iterator();
-    List<WindowedValue<TimerOrElement<KV<String, String>>>> result = new ArrayList<>();
-    while (iterator.hasNext()) {
-      result.add(iterator.next());
-    }
-
-    assertThat(result, contains(
-        makeTimer(TimeDomain.EVENT_TIME),
-        makeTimer(TimeDomain.EVENT_TIME),
-        makeTimer(TimeDomain.PROCESSING_TIME),
-        makeTimer(TimeDomain.PROCESSING_TIME)));
-  }
-
-  private static Windmill.Timer makeSerializedTimer(String name, Windmill.Timer.Type type) {
-    return Windmill.Timer.newBuilder()
-        .setTag(ByteString.copyFromUtf8(STATE_NAMESPACE.stringKey() + "+" + name))
-        .setTimestamp(0)
-        .setType(type)
-        .setStateFamily(STATE_FAMILY)
-        .build();
-  }
-
-  private static WindowedValue<TimerOrElement<KV<String, String>>> makeTimer(TimeDomain domain) {
-    return WindowedValue.<TimerOrElement<KV<String, String>>>of(
-        TimerOrElement.<KV<String, String>>timer(
-            KEY, TimerData.of(STATE_NAMESPACE, new Instant(0), domain)),
-        new Instant(0), Arrays.<BoundedWindow>asList(), PaneInfo.NO_FIRING);
-  }
-}

From 531a35298a10cf02f6cbd498771dc4143ac3da0d Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 23 Sep 2015 17:55:40 -0700
Subject: [PATCH 1038/1541] Remove interaction logging from trigger tester

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103802869
---
 .../dataflow/sdk/util/TriggerTester.java      | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 8ba05bbd45edf..647bc440daa72 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -65,7 +65,6 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.logging.Logger;
 
 import javax.annotation.Nullable;
 
@@ -84,8 +83,6 @@
  */
 public class TriggerTester<InputT, OutputT, W extends BoundedWindow> {
 
-  private static final Logger LOGGER = Logger.getLogger(TriggerTester.class.getName());
-
   private Instant watermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
   private Instant processingTime = BoundedWindow.TIMESTAMP_MIN_VALUE;
 
@@ -96,7 +93,6 @@ public class TriggerTester<InputT, OutputT, W extends BoundedWindow> {
   private final Coder<OutputT> outputCoder;
 
   private static final String KEY = "TEST_KEY";
-  private boolean logInteractions = false;
   private ExecutableTrigger<W> executableTrigger;
 
   private final InMemoryLongSumAggregator droppedDueToClosedWindow =
@@ -104,12 +100,6 @@ public class TriggerTester<InputT, OutputT, W extends BoundedWindow> {
   private final InMemoryLongSumAggregator droppedDueToLateness =
       new InMemoryLongSumAggregator(ReduceFnRunner.DROPPED_DUE_TO_LATENESS_COUNTER);
 
-  private void logInteraction(String fmt, Object... args) {
-    if (logInteractions) {
-      LOGGER.warning("Trigger Interaction: " + String.format(fmt, args));
-    }
-  }
-
   public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>, W> nonCombining(
       WindowingStrategy<?, W> windowingStrategy) throws Exception {
     return new TriggerTester<Integer, Iterable<Integer>, W>(
@@ -175,10 +165,6 @@ public ExecutableTrigger<W> getTrigger() {
     return executableTrigger;
   }
 
-  public void logInteractions(boolean logInteractions) {
-    this.logInteractions = logInteractions;
-  }
-
   public boolean isMarkedFinished(W window) {
     return runner.isFinished(window);
   }
@@ -291,7 +277,6 @@ public void advanceWatermark(Instant newWatermark) throws Exception {
     Preconditions.checkState(!newWatermark.isBefore(watermark),
         "Cannot move watermark time backwards from %s to %s",
         watermark.getMillis(), newWatermark.getMillis());
-    logInteraction("Advancing watermark to %d", newWatermark.getMillis());
     watermark = newWatermark;
     timerInternals.advanceWatermark(runner, newWatermark);
   }
@@ -302,7 +287,6 @@ public void advanceProcessingTime(
     Preconditions.checkState(!newProcessingTime.isBefore(processingTime),
         "Cannot move processing time backwards from %s to %s",
         processingTime.getMillis(), newProcessingTime.getMillis());
-    logInteraction("Advancing processing time to %d", newProcessingTime.getMillis());
     processingTime = newProcessingTime;
     timerInternals.advanceProcessingTime(runner, newProcessingTime);
   }
@@ -310,8 +294,6 @@ public void advanceProcessingTime(
   public void injectElement(InputT value, Instant timestamp) throws Exception {
     Collection<W> windows = windowFn.assignWindows(new TriggerTester.StubAssignContext<W>(
         windowFn, value, timestamp, Arrays.asList(GlobalWindow.INSTANCE)));
-    logInteraction("Element %s at time %d put in windows %s",
-        value, timestamp.getMillis(), windows);
     runner.processElement(WindowedValue.of(value, timestamp, windows, PaneInfo.NO_FIRING));
   }
 
@@ -367,7 +349,6 @@ public void outputWindowedValue(KV<String, OutputT> output, Instant timestamp,
       KV<String, OutputT> copy = SerializableUtils.<KV<String, OutputT>>ensureSerializableByCoder(
           KvCoder.of(StringUtf8Coder.of(), outputCoder), output, "outputForWindow");
       WindowedValue<KV<String, OutputT>> value = WindowedValue.of(copy, timestamp, windows, pane);
-      logInteraction("Outputting: %s", value);
       outputs.add(value);
     }
 

From 4c3fc805258931142b111e8ad74e6664f9292fcc Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 23 Sep 2015 21:22:42 -0700
Subject: [PATCH 1039/1541] DatastoreIO: add support for querying namespace,
 and cleanup

1. Add support for querying within a Datastore namespace to
   DatastoreIO.Source. (DatastoreIO cannot by itself support
   writing to a namespace as it receives fully-built Entities.)
2. Add namespace support to the DatastoreWordCount example,
   showing both reading and writing within a namespace.
3. Deprecate DatastoreIO.read() since it creates a Source, not
   a Read, and add DatastoreIO.source() to replace it.
4. Clean up code and Javadoc in both DatastoreIO and
   DatastoreWordCount.

Fixes GoogleCloudPlatform/DataflowJavaSDK#59.

----Release Notes----
- DatastoreIO now supports (parallel) reads within namespaces.
  Entities can be written to namespaces by setting the namespace
  in the Entity key.
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103813624
---
 .../examples/cookbook/DatastoreWordCount.java |  76 ++--
 .../cloud/dataflow/sdk/io/DatastoreIO.java    | 405 +++++++++++-------
 .../dataflow/sdk/io/DatastoreIOTest.java      | 391 +++++++++--------
 .../runners/worker/DatastoreReaderTest.java   |  65 ++-
 4 files changed, 576 insertions(+), 361 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
index ac972ddeae755..d9001229be5df 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.examples.cookbook;
 
-import com.google.api.services.datastore.DatastoreV1;
 import com.google.api.services.datastore.DatastoreV1.Entity;
 import com.google.api.services.datastore.DatastoreV1.Key;
 import com.google.api.services.datastore.DatastoreV1.Property;
@@ -26,6 +25,7 @@
 import com.google.cloud.dataflow.examples.WordCount;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.io.DatastoreIO;
+import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.Default;
 import com.google.cloud.dataflow.sdk.options.Description;
@@ -38,6 +38,8 @@
 import java.util.Map;
 import java.util.UUID;
 
+import javax.annotation.Nullable;
+
 /**
  * A WordCount example using DatastoreIO.
  *
@@ -72,7 +74,7 @@ static class GetContentFn extends DoFn<Entity, String> {
     @Override
     public void processElement(ProcessContext c) {
       Map<String, Value> props = DatastoreHelper.getPropertyMap(c.element());
-      DatastoreV1.Value value = props.get("content");
+      Value value = props.get("content");
       if (value != null) {
         c.output(DatastoreHelper.getString(value));
       }
@@ -83,19 +85,36 @@ public void processElement(ProcessContext c) {
    * A DoFn that creates entity for every line in Shakespeare.
    */
   static class CreateEntityFn extends DoFn<String, Entity> {
-    private String kind;
+    private final String namespace;
+    private final String kind;
+    private final Key ancestorKey;
 
-    CreateEntityFn(String kind) {
+    CreateEntityFn(String namespace, String kind) {
+      this.namespace = namespace;
       this.kind = kind;
+
+      // Build the ancestor key for all created entities once, including the namespace.
+      Key.Builder keyBuilder = DatastoreHelper.makeKey(kind, "root");
+      if (namespace != null) {
+        keyBuilder.getPartitionIdBuilder().setNamespace(namespace);
+      }
+      ancestorKey = keyBuilder.build();
     }
 
     public Entity makeEntity(String content) {
       Entity.Builder entityBuilder = Entity.newBuilder();
-      // Create entities with same ancestor Key.
-      Key ancestorKey = DatastoreHelper.makeKey(kind, "root").build();
-      Key key = DatastoreHelper.makeKey(ancestorKey, kind, UUID.randomUUID().toString()).build();
 
-      entityBuilder.setKey(key);
+      // All created entities have the same ancestor Key.
+      Key.Builder keyBuilder =
+          DatastoreHelper.makeKey(ancestorKey, kind, UUID.randomUUID().toString());
+      // NOTE: Namespace is not inherited between keys created with DatastoreHelper.makeKey, so
+      // we must set the namespace on keyBuilder. TODO: Once partitionId inheritance is added,
+      // we can simplify this code.
+      if (namespace != null) {
+        keyBuilder.getPartitionIdBuilder().setNamespace(namespace);
+      }
+
+      entityBuilder.setKey(keyBuilder.build());
       entityBuilder.addProperty(Property.newBuilder().setName("content")
           .setValue(Value.newBuilder().setStringValue(content)));
       return entityBuilder.build();
@@ -133,6 +152,10 @@ public static interface Options extends PipelineOptions {
     String getKind();
     void setKind(String value);
 
+    @Description("Dataset namespace")
+    String getNamespace();
+    void setNamespace(@Nullable String value);
+
     @Description("Read an existing dataset, do not write first")
     boolean isReadOnly();
     void setReadOnly(boolean value);
@@ -150,7 +173,7 @@ public static interface Options extends PipelineOptions {
   public static void writeDataToDatastore(Options options) {
       Pipeline p = Pipeline.create(options);
       p.apply(TextIO.Read.named("ReadLines").from(options.getInput()))
-       .apply(ParDo.of(new CreateEntityFn(options.getKind())))
+       .apply(ParDo.of(new CreateEntityFn(options.getNamespace(), options.getKind())))
        .apply(DatastoreIO.writeTo(options.getDataset()));
 
       p.run();
@@ -165,22 +188,26 @@ public static void readDataFromDatastore(Options options) {
     q.addKindBuilder().setName(options.getKind());
     Query query = q.build();
 
+    // For Datastore sources, the read namespace can be set on the entire query.
+    DatastoreIO.Source source = DatastoreIO.source()
+        .withDataset(options.getDataset())
+        .withQuery(query)
+        .withNamespace(options.getNamespace());
+
     Pipeline p = Pipeline.create(options);
-    p.apply(DatastoreIO.readFrom(options.getDataset(), query).named("ReadShakespeareFromDatastore"))
-     .apply(ParDo.of(new GetContentFn()))
-     .apply(new WordCount.CountWords())
-     .apply(ParDo.of(new WordCount.FormatAsTextFn()))
-     .apply(TextIO.Write.named("WriteLines")
-                        .to(options.getOutput())
-                        .withNumShards(options.getNumShards()));
+    p.apply("ReadShakespeareFromDatastore", Read.from(source))
+        .apply("StringifyEntity", ParDo.of(new GetContentFn()))
+        .apply("CountWords", new WordCount.CountWords())
+        .apply("PrintWordCount", ParDo.of(new WordCount.FormatAsTextFn()))
+        .apply("WriteLines", TextIO.Write.to(options.getOutput())
+            .withNumShards(options.getNumShards()));
     p.run();
   }
 
   /**
-   * Main function.
-   * An example to demo how to use DatastoreIO.  The runner here is
-   * customizable, which means users could pass either DirectPipelineRunner
-   * or DataflowPipelineRunner in PipelineOptions.
+   * An example to demo how to use {@link DatastoreIO}.  The runner here is
+   * customizable, which means users could pass either {@code DirectPipelineRunner}
+   * or {@code DataflowPipelineRunner} in the pipeline options.
    */
   public static void main(String args[]) {
     // The options are used in two places, for Dataflow service, and
@@ -189,10 +216,11 @@ public static void main(String args[]) {
 
     if (!options.isReadOnly()) {
       // First example: write data to Datastore for reading later.
-      // Note: this will insert new entries with the given kind. Existing entries
-      // should be cleared first, or the final counts will contain duplicates.
-      // The Datastore Admin tool in the AppEngine console can be used to erase
-      // all entries with a particular kind.
+      //
+      // NOTE: this write does not delete any existing Entities in the Datastore, so if run
+      // multiple times with the same output dataset, there may be duplicate entries. The
+      // Datastore Query tool in the Google Developers Console can be used to inspect or erase all
+      // entries with a particular namespace and/or kind.
       DatastoreWordCount.writeDataToDatastore(options);
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 8d584d42439b7..848c51c037fa0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -23,17 +23,22 @@
 import static com.google.api.services.datastore.client.DatastoreHelper.makeFilter;
 import static com.google.api.services.datastore.client.DatastoreHelper.makeOrder;
 import static com.google.api.services.datastore.client.DatastoreHelper.makeValue;
+import static com.google.common.base.Preconditions.checkNotNull;
 
 import com.google.api.client.auth.oauth2.Credential;
 import com.google.api.client.util.BackOff;
 import com.google.api.client.util.BackOffUtils;
 import com.google.api.client.util.Sleeper;
-import com.google.api.services.datastore.DatastoreV1;
 import com.google.api.services.datastore.DatastoreV1.CommitRequest;
 import com.google.api.services.datastore.DatastoreV1.Entity;
+import com.google.api.services.datastore.DatastoreV1.EntityResult;
 import com.google.api.services.datastore.DatastoreV1.Key;
 import com.google.api.services.datastore.DatastoreV1.Key.PathElement;
+import com.google.api.services.datastore.DatastoreV1.PartitionId;
 import com.google.api.services.datastore.DatastoreV1.Query;
+import com.google.api.services.datastore.DatastoreV1.QueryResultBatch;
+import com.google.api.services.datastore.DatastoreV1.RunQueryRequest;
+import com.google.api.services.datastore.DatastoreV1.RunQueryResponse;
 import com.google.api.services.datastore.client.Datastore;
 import com.google.api.services.datastore.client.DatastoreException;
 import com.google.api.services.datastore.client.DatastoreFactory;
@@ -46,15 +51,17 @@
 import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
 import com.google.cloud.dataflow.sdk.io.Sink.WriteOperation;
 import com.google.cloud.dataflow.sdk.io.Sink.Writer;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions;
 import com.google.cloud.dataflow.sdk.options.GcpOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Write;
 import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
 import com.google.cloud.dataflow.sdk.util.RetryHttpRequestInitializer;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.MoreObjects;
 import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
+import com.google.common.collect.ImmutableList;
+import com.google.common.primitives.Ints;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -66,51 +73,53 @@
 import java.util.List;
 import java.util.NoSuchElementException;
 
+import javax.annotation.Nullable;
+
 /**
- * {@link com.google.cloud.dataflow.sdk.transforms.PTransform}s for reading and writing
+ * <p>{@link DatastoreIO} provides an API to Read and Write {@link PCollection PCollections} of
  * <a href="https://developers.google.com/datastore/">Google Cloud Datastore</a>
- * entities.
- *
- * <p>The {@link DatastoreIO} class provides an API to Read and Write a
- * {@link PCollection} of Datastore Entity.  This API currently requires an
- * authentication workaround described below.
+ * {@link Entity} objects.
  *
- * <p>Datastore is a fully managed NoSQL data storage service.
- * An Entity is an object in Datastore, analogous to a row in traditional
- * database table.  DatastoreIO supports Read/Write from/to Datastore within
- * Dataflow SDK service.
+ * <p>Google Cloud Datastore is a fully managed NoSQL data storage service.
+ * An {@code Entity} is an object in Datastore, analogous to a row in traditional
+ * database table.
  *
- * <p>To use {@link DatastoreIO}, users must use gcloud to get credential for Datastore:
+ * <p>This API currently requires an authentication workaround. To use {@link DatastoreIO}, users
+ * must use the {@code gcloud} command line tool to get credentials for Datastore:
  * <pre>
-  * $ gcloud auth login
+ * $ gcloud auth login
  * </pre>
  *
- * <p>To read a {@link PCollection} from a query to Datastore, use
- * {@link DatastoreIO#read} and its methods {#link DatastoreIO.Read#withDataset}
- * and {#link DatastoreIO.Read#withQuery} to specify dataset to read, the query
- * to read from, and optionally {@link DatastoreIO.Source#withHost} to specify
- * the host of Datastore.
- * For example:
+ * <p>To read a {@link PCollection} from a query to Datastore, use {@link DatastoreIO#source} and
+ * its methods {@link DatastoreIO.Source#withDataset} and {@link DatastoreIO.Source#withQuery} to
+ * specify the dataset to query and the query to read from. You can optionally provide a namespace
+ * to query within using {@link DatastoreIO.Source#withNamespace} or a Datastore host using
+ * {@link DatastoreIO.Source#withHost}.
+ *
+ * <p>For example:
  *
  * <pre> {@code
  * // Read a query from Datastore
- * PipelineOptions options =
- *     PipelineOptionsFactory.fromArgs(args).create();
+ * PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create();
+ * Query query = ...;
+ * String dataset = "...";
+ *
  * Pipeline p = Pipeline.create(options);
  * PCollection<Entity> entities = p.apply(
- *     Read.from(DatastoreIO.read()
+ *     Read.from(DatastoreIO.source()
  *         .withDataset(datasetId)
  *         .withQuery(query)
  *         .withHost(host)));
- * p.run();
  * } </pre>
  *
  * <p>or:
  *
  * <pre> {@code
- * // Read a query from Datastore
- * PipelineOptions options =
- *     PipelineOptionsFactory.fromArgs(args).create();
+ * // Read a query from Datastore using the default namespace and host
+ * PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create();
+ * Query query = ...;
+ * String dataset = "...";
+ *
  * Pipeline p = Pipeline.create(options);
  * PCollection<Entity> entities = p.apply(DatastoreIO.readFrom(datasetId, query));
  * p.run();
@@ -126,29 +135,37 @@
  * } </pre>
  *
  * <p>To optionally change the host that is used to write to the Datastore, use {@link
- * DatastoreIO#sink} to build a DatastoreIO {@link Sink} and write to it using the {@link Write}
+ * DatastoreIO#sink} to build a {@link DatastoreIO.Sink} and write to it using the {@link Write}
  * transform:
  *
  * <pre> {@code
  * PCollection<Entity> entities = ...;
  * entities.apply(Write.to(DatastoreIO.sink().withDataset(dataset).withHost(host)));
- * p.run();
  * } </pre>
  *
- * <p>Entities in the PCollection to be written must have complete keys.  Complete keys specify the
- * name/id of the entity, where incomplete keys do not. Entities will be committed as upsert (update
- * or insert) mutations. Please read
+ * <p>{@link Entity Entities} in the {@code PCollection} to be written must have complete
+ * {@link Key Keys}. Complete {@code Keys} specify the {@code name} and {@code id} of the
+ * {@code Entity}, where incomplete {@code Keys} do not. A {@code namespace} other than the
+ * project default may be written to by specifying it in the {@code Entity} {@code Keys}.
+ *
+ * <pre>{@code
+ * Key.Builder keyBuilder = DatastoreHelper.makeKey(...);
+ * keyBuilder.getPartitionIdBuilder().setNamespace(namespace);
+ * }</pre>
+ *
+ * <p>{@code Entities} will be committed as upsert (update or insert) mutations. Please read
  * <a href="https://cloud.google.com/datastore/docs/concepts/entities">Entities, Properties, and
- * Keys</a> for more information about entity keys.
+ * Keys</a> for more information about {@code Entity} keys.
  *
  * <p><h3>Permissions</h3>
- * Permission requirements depend on the
- * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner PipelineRunner} that is
- * used to execute the Dataflow job. Please refer to the documentation of corresponding
- * {@code PipelineRunner}s for more details.
+ * Permission requirements depend on the {@code PipelineRunner} that is used to execute the
+ * Dataflow job. Please refer to the documentation of corresponding {@code PipelineRunner}s for
+ * more details.
  *
  * <p>Please see <a href="https://cloud.google.com/datastore/docs/activate">Cloud Datastore Sign Up
  * </a>for security and permission related information specific to Datastore.
+ *
+ * @see com.google.cloud.dataflow.sdk.runners.PipelineRunner
  */
 @Experimental(Experimental.Kind.SOURCE_SINK)
 public class DatastoreIO {
@@ -161,12 +178,29 @@ public class DatastoreIO {
   public static final int DATASTORE_BATCH_UPDATE_LIMIT = 500;
 
   /**
-   * Returns an empty {@code DatastoreIO.Read} builder with the default host.
-   * You'll need to configure the dataset and query using {@link DatastoreIO.Source#withDataset}
-   * and {@link DatastoreIO.Source#withQuery}.
+   * Returns an empty {@link DatastoreIO.Source} builder with the default {@code host}.
+   * Configure the {@code dataset}, {@code query}, and {@code namespace} using
+   * {@link DatastoreIO.Source#withDataset}, {@link DatastoreIO.Source#withQuery},
+   * and {@link DatastoreIO.Source#withNamespace}.
+   *
+   * @deprecated the name and return type do not match. Use {@link #source()}.
    */
+  @Deprecated
   public static Source read() {
-    return new Source(DEFAULT_HOST, null, null);
+    return source();
+  }
+
+  /**
+   * Returns an empty {@link DatastoreIO.Source} builder with the default {@code host}.
+   * Configure the {@code dataset}, {@code query}, and {@code namespace} using
+   * {@link DatastoreIO.Source#withDataset}, {@link DatastoreIO.Source#withQuery},
+   * and {@link DatastoreIO.Source#withNamespace}.
+   *
+   * <p>The resulting {@link Source} object can be passed to {@link Read} to create a
+   * {@code PTransform} that will read from Datastore.
+   */
+  public static Source source() {
+    return new Source(DEFAULT_HOST, null, null, null);
   }
 
   /**
@@ -174,46 +208,59 @@ public static Source read() {
    * against the given dataset.
    */
   public static Read.Bounded<Entity> readFrom(String datasetId, Query query) {
-    return Read.from(new Source(DEFAULT_HOST, datasetId, query));
+    return Read.from(new Source(DEFAULT_HOST, datasetId, query, null));
   }
 
   /**
    * Returns a {@code PTransform} that reads Datastore entities from the query
    * against the given dataset and host.
+   *
+   * @deprecated prefer {@link #source()} with {@link Source#withHost}, {@link Source#withDataset},
+   *    {@link Source#withQuery}s.
    */
+  @Deprecated
   public static Read.Bounded<Entity> readFrom(String host, String datasetId, Query query) {
-    return Read.from(new Source(host, datasetId, query));
+    return Read.from(new Source(host, datasetId, query, null));
   }
 
   /**
    * A {@link Source} that reads the result rows of a Datastore query as {@code Entity} objects.
    */
   public static class Source extends BoundedSource<Entity> {
-    private static final Logger LOG = LoggerFactory.getLogger(Source.class);
-    String host;
-    String datasetId;
-    Query query;
+    public String getHost() {
+      return host;
+    }
 
-    /** For testing only. */
-    private QuerySplitter mockSplitter;
-    private Long mockEstimateSizeBytes;
+    public String getDataset() {
+      return datasetId;
+    }
 
-    private Source(String host, String datasetId, Query query) {
-      this.host = host;
-      this.datasetId = datasetId;
-      this.query = query;
+    public Query getQuery() {
+      return query;
+    }
+
+    @Nullable
+    public String getNamespace() {
+      return namespace;
     }
 
     public Source withDataset(String datasetId) {
-      return new Source(host, datasetId, query);
+      checkNotNull(datasetId, "datasetId");
+      return new Source(host, datasetId, query, namespace);
     }
 
     public Source withQuery(Query query) {
-      return new Source(host, datasetId, query);
+      checkNotNull(query, "query");
+      return new Source(host, datasetId, query, namespace);
     }
 
     public Source withHost(String host) {
-      return new Source(host, datasetId, query);
+      checkNotNull(host, "host");
+      return new Source(host, datasetId, query, namespace);
+    }
+
+    public Source withNamespace(@Nullable String namespace) {
+      return new Source(host, datasetId, query, namespace);
     }
 
     @Override
@@ -221,12 +268,67 @@ public Coder<Entity> getDefaultOutputCoder() {
       return EntityCoder.of();
     }
 
+    @Override
+    public boolean producesSortedKeys(PipelineOptions options) {
+      // TODO: Perhaps this can be implemented by inspecting the query.
+      return false;
+    }
+
+    @Override
+    public List<Source> splitIntoBundles(long desiredBundleSizeBytes, PipelineOptions options)
+        throws Exception {
+      long numSplits;
+      try {
+        numSplits = Math.round(((double) getEstimatedSizeBytes(options)) / desiredBundleSizeBytes);
+      } catch (Exception e) {
+        // Fallback in case estimated size is unavailable. TODO: fix this, it's horrible.
+
+        // 1. Try Dataflow's numWorkers, which will be 0 for other workers.
+        DataflowPipelineWorkerPoolOptions poolOptions =
+            options.as(DataflowPipelineWorkerPoolOptions.class);
+        if (poolOptions.getNumWorkers() > 0) {
+          LOG.warn("Estimated size of unavailable, using the number of workers {}",
+              poolOptions.getNumWorkers(), e);
+          numSplits = poolOptions.getNumWorkers();
+        } else {
+          // 2. Default to 12 in the unknown case.
+          numSplits = 12;
+        }
+      }
+
+      // If the desiredBundleSize or number of workers results in 1 split, simply return
+      // a source that reads from the original query.
+      if (numSplits <= 1) {
+        return ImmutableList.of(this);
+      }
+
+      ImmutableList.Builder<Source> splits = ImmutableList.builder();
+      for (Query splitQuery : getSplitQueries(Ints.checkedCast(numSplits), options)) {
+        splits.add(new Source(host, datasetId, splitQuery, namespace));
+      }
+      return splits.build();
+    }
+
+    @Override
+    public BoundedReader<Entity> createReader(PipelineOptions pipelineOptions) throws IOException {
+      return new DatastoreReader(this, getDatastore(pipelineOptions));
+    }
+
+    @Override
+    public void validate() {
+      Preconditions.checkNotNull(host, "host");
+      Preconditions.checkNotNull(query, "query");
+      Preconditions.checkNotNull(datasetId, "datasetId");
+    }
+
     @Override
     public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
       // Datastore provides no way to get a good estimate of how large the result of a query
       // will be. As a rough approximation, we attempt to fetch the statistics of the whole
       // entity kind being queried, using the __Stat_Kind__ system table, assuming exactly 1 kind
       // is specified in the query.
+      //
+      // See https://cloud.google.com/datastore/docs/concepts/stats
       if (mockEstimateSizeBytes != null) {
         return mockEstimateSizeBytes;
       }
@@ -239,18 +341,21 @@ public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
       String ourKind = query.getKind(0).getName();
       long latestTimestamp = queryLatestStatisticsTimestamp(datastore);
       Query.Builder query = Query.newBuilder();
-      query.addKindBuilder().setName("__Stat_Kind__");
+      if (namespace == null) {
+        query.addKindBuilder().setName("__Stat_Kind__");
+      } else {
+        query.addKindBuilder().setName("__Ns_Stat_Kind__");
+      }
       query.setFilter(makeFilter(
           makeFilter("kind_name", EQUAL, makeValue(ourKind)).build(),
           makeFilter("timestamp", EQUAL, makeValue(latestTimestamp)).build()));
-      DatastoreV1.RunQueryRequest request =
-          DatastoreV1.RunQueryRequest.newBuilder().setQuery(query).build();
+      RunQueryRequest request = makeRequest(query.build());
 
       long now = System.currentTimeMillis();
-      DatastoreV1.RunQueryResponse response = datastore.runQuery(request);
+      RunQueryResponse response = datastore.runQuery(request);
       LOG.info("Query for per-kind statistics took {}ms", System.currentTimeMillis() - now);
 
-      DatastoreV1.QueryResultBatch batch = response.getBatch();
+      QueryResultBatch batch = response.getBatch();
       if (batch.getEntityResultCount() == 0) {
         throw new NoSuchElementException(
             "Datastore statistics for kind " + ourKind + " unavailable");
@@ -259,23 +364,98 @@ public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
       return getPropertyMap(entity).get("entity_bytes").getIntegerValue();
     }
 
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(getClass())
+          .add("host", host)
+          .add("dataset", datasetId)
+          .add("query", query)
+          .add("namespace", namespace)
+          .toString();
+    }
+
+    ///////////////////////////////////////////////////////////////////////////////////////////
+
+    private static final Logger LOG = LoggerFactory.getLogger(Source.class);
+    private final String host;
+    /** Not really nullable, but it may be {@code null} for in-progress {@code Source}s. */
+    @Nullable
+    private final String datasetId;
+    /** Not really nullable, but it may be {@code null} for in-progress {@code Source}s. */
+    @Nullable
+    private final Query query;
+    @Nullable
+    private final String namespace;
+
+    /** For testing only. TODO: This could be much cleaner with dependency injection. */
+    @Nullable
+    private QuerySplitter mockSplitter;
+    @Nullable
+    private Long mockEstimateSizeBytes;
+
+    /**
+     * Note that only {@code namespace} is really {@code @Nullable}. The other parameters may be
+     * {@code null} as a matter of build order, but if they are {@code null} at instantiation time,
+     * an error will be thrown.
+     */
+    private Source(
+        String host, @Nullable String datasetId, @Nullable Query query,
+        @Nullable String namespace) {
+      this.host = checkNotNull(host, "host");
+      this.datasetId = datasetId;
+      this.query = query;
+      this.namespace = namespace;
+    }
+
+    /**
+     * A helper function to get the split queries, taking into account the optional
+     * {@code namespace} and whether there is a mock splitter.
+     */
+    private List<Query> getSplitQueries(int numSplits, PipelineOptions options)
+        throws DatastoreException {
+      // If namespace is set, include it in the split request so splits are calculated accordingly.
+      PartitionId.Builder partitionBuilder = PartitionId.newBuilder();
+      if (namespace != null) {
+        partitionBuilder.setNamespace(namespace);
+      }
+
+      if (mockSplitter != null) {
+        // For testing.
+        return mockSplitter.getSplits(query, partitionBuilder.build(), numSplits, null);
+      }
+
+      return DatastoreHelper.getQuerySplitter().getSplits(
+          query, partitionBuilder.build(), numSplits, getDatastore(options));
+    }
+
+    /**
+     * Builds a {@link RunQueryRequest} from the {@code query}, using the properties set on this
+     * {@code Source}. For example, sets the {@code namespace} for the request.
+     */
+    private RunQueryRequest makeRequest(Query query) {
+      RunQueryRequest.Builder requestBuilder = RunQueryRequest.newBuilder().setQuery(query);
+      if (namespace != null) {
+        requestBuilder.getPartitionIdBuilder().setNamespace(namespace);
+      }
+      return requestBuilder.build();
+    }
+
     /**
      * Datastore system tables with statistics are periodically updated. This method fetches
-     * the latest timestamp of statistics update using the __Stat_Total__ table.
+     * the latest timestamp of statistics update using the {@code __Stat_Total__} table.
      */
     private long queryLatestStatisticsTimestamp(Datastore datastore) throws DatastoreException {
       Query.Builder query = Query.newBuilder();
       query.addKindBuilder().setName("__Stat_Total__");
       query.addOrder(makeOrder("timestamp", DESCENDING));
       query.setLimit(1);
-      DatastoreV1.RunQueryRequest request =
-          DatastoreV1.RunQueryRequest.newBuilder().setQuery(query).build();
+      RunQueryRequest request = makeRequest(query.build());
 
       long now = System.currentTimeMillis();
-      DatastoreV1.RunQueryResponse response = datastore.runQuery(request);
+      RunQueryResponse response = datastore.runQuery(request);
       LOG.info("Query for latest stats timestamp of dataset {} took {}ms", datasetId,
           System.currentTimeMillis() - now);
-      DatastoreV1.QueryResultBatch batch = response.getBatch();
+      QueryResultBatch batch = response.getBatch();
       if (batch.getEntityResultCount() == 0) {
         throw new NoSuchElementException(
             "Datastore total statistics for dataset " + datasetId + " unavailable");
@@ -284,57 +464,6 @@ private long queryLatestStatisticsTimestamp(Datastore datastore) throws Datastor
       return getPropertyMap(entity).get("timestamp").getTimestampMicrosecondsValue();
     }
 
-    @Override
-    public boolean producesSortedKeys(PipelineOptions options) {
-      // TODO: Perhaps this can be implemented by inspecting the query.
-      return false;
-    }
-
-    @Override
-    public List<Source> splitIntoBundles(long desiredBundleSizeBytes, PipelineOptions options)
-        throws Exception {
-      DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
-      long numSplits;
-      try {
-        numSplits = Math.round(((double) getEstimatedSizeBytes(options)) / desiredBundleSizeBytes);
-      } catch (Exception e) {
-        LOG.warn("Estimated size unavailable, using number of workers", e);
-        // Fallback in case estimated size is unavailable.
-        numSplits = dataflowOptions.getNumWorkers();
-      }
-
-      // If the desiredBundleSize or number of workers results in 1 split, simply return
-      // a source that reads from the original query.
-      if (numSplits <= 1) {
-        return Lists.newArrayList(this);
-      }
-
-      List<Query> splitQueries;
-      if (mockSplitter == null) {
-        splitQueries = DatastoreHelper.getQuerySplitter().getSplits(
-            query, (int) numSplits, getDatastore(options));
-      } else {
-        splitQueries = mockSplitter.getSplits(query, (int) numSplits, null);
-      }
-      List<Source> res = new ArrayList<>();
-      for (Query splitQuery : splitQueries) {
-        res.add(new Source(host, datasetId, splitQuery));
-      }
-      return res;
-    }
-
-    @Override
-    public BoundedReader<Entity> createReader(PipelineOptions pipelineOptions) throws IOException {
-      return new DatastoreReader(this, getDatastore(pipelineOptions));
-    }
-
-    @Override
-    public void validate() {
-      Preconditions.checkNotNull(host, "host");
-      Preconditions.checkNotNull(query, "query");
-      Preconditions.checkNotNull(datasetId, "datasetId");
-    }
-
     private Datastore getDatastore(PipelineOptions pipelineOptions) {
       DatastoreOptions.Builder builder =
           new DatastoreOptions.Builder().host(host).dataset(datasetId).initializer(
@@ -349,34 +478,19 @@ private Datastore getDatastore(PipelineOptions pipelineOptions) {
 
     /** For testing only. */
     Source withMockSplitter(QuerySplitter splitter) {
-      Source res = new Source(host, datasetId, query);
+      Source res = new Source(host, datasetId, query, namespace);
       res.mockSplitter = splitter;
       res.mockEstimateSizeBytes = mockEstimateSizeBytes;
       return res;
     }
 
     /** For testing only. */
-    public Source withMockEstimateSizeBytes(Long estimateSizeBytes) {
-      Source res = new Source(host, datasetId, query);
+    Source withMockEstimateSizeBytes(Long estimateSizeBytes) {
+      Source res = new Source(host, datasetId, query, namespace);
       res.mockSplitter = mockSplitter;
       res.mockEstimateSizeBytes = estimateSizeBytes;
       return res;
     }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder();
-      sb.append("Datastore: ");
-      sb.append("host ").append((host == null) ? "null" : host);
-      sb.append("; dataset ").append((datasetId == null) ? "null" : datasetId);
-      sb.append("; query: ");
-      if (query == null) {
-        sb.append("null");
-      } else {
-        sb.append("\n").append(query.toString());
-      }
-      return sb.toString();
-    }
   }
 
   ///////////////////// Write Class /////////////////////////////////
@@ -414,6 +528,7 @@ public static class Sink extends com.google.cloud.dataflow.sdk.io.Sink<Entity> {
      * Returns a {@link Sink} that is like this one, but will write to the specified dataset.
      */
     public Sink withDataset(String datasetId) {
+      checkNotNull(datasetId, "datasetId");
       return new Sink(host, datasetId);
     }
 
@@ -422,6 +537,7 @@ public Sink withDataset(String datasetId) {
      * the {@link DatastoreIO#DEFAULT_HOST default host} will be used.
      */
     public Sink withHost(String host) {
+      checkNotNull(host, "host");
       return new Sink(host, datasetId);
     }
 
@@ -429,7 +545,7 @@ public Sink withHost(String host) {
      * Constructs a Sink with given host and dataset.
      */
     protected Sink(String host, String datasetId) {
-      this.host = host;
+      this.host = checkNotNull(host, "host");
       this.datasetId = datasetId;
     }
 
@@ -657,7 +773,7 @@ public DatastoreWriteResult(long recordsWritten) {
 
   /**
    * A {@link Source.Reader} over the records from a query of the datastore.
-
+   *
    * <p>Timestamped records are currently not supported.
    * All records implicitly have the timestamp of {@code BoundedWindow.TIMESTAMP_MIN_VALUE}.
    */
@@ -677,12 +793,12 @@ public static class DatastoreReader extends BoundedSource.BoundedReader<Entity>
     /**
      * Iterator over records.
      */
-    private java.util.Iterator<DatastoreV1.EntityResult> entities;
+    private java.util.Iterator<EntityResult> entities;
 
     /**
      * Current batch of query results.
      */
-    private DatastoreV1.QueryResultBatch currentBatch;
+    private QueryResultBatch currentBatch;
 
     /**
      * Maximum number of results to request per query.
@@ -760,7 +876,7 @@ public Double getFractionConsumed() {
      * and updates the cursor to get the next batch as needed.
      * Query has specified limit and offset from InputSplit.
      */
-    private Iterator<DatastoreV1.EntityResult> getIteratorAndMoveCursor()
+    private Iterator<EntityResult> getIteratorAndMoveCursor()
         throws DatastoreException {
       Query.Builder query = this.source.query.toBuilder().clone();
       query.setLimit(QUERY_LIMIT);
@@ -768,9 +884,8 @@ private Iterator<DatastoreV1.EntityResult> getIteratorAndMoveCursor()
         query.setStartCursor(currentBatch.getEndCursor());
       }
 
-      DatastoreV1.RunQueryRequest request =
-          DatastoreV1.RunQueryRequest.newBuilder().setQuery(query).build();
-      DatastoreV1.RunQueryResponse response = datastore.runQuery(request);
+      RunQueryRequest request = source.makeRequest(query.build());
+      RunQueryResponse response = datastore.runQuery(request);
 
       currentBatch = response.getBatch();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
index ad496a3f67bab..abc145d42c1c2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
@@ -27,29 +27,30 @@
 import static org.mockito.Mockito.never;
 import static org.mockito.Mockito.spy;
 import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
 import static org.mockito.Mockito.when;
 
-import com.google.api.services.datastore.DatastoreV1;
 import com.google.api.services.datastore.DatastoreV1.Entity;
 import com.google.api.services.datastore.DatastoreV1.Key;
+import com.google.api.services.datastore.DatastoreV1.KindExpression;
+import com.google.api.services.datastore.DatastoreV1.PartitionId;
+import com.google.api.services.datastore.DatastoreV1.PropertyFilter;
 import com.google.api.services.datastore.DatastoreV1.Query;
+import com.google.api.services.datastore.DatastoreV1.Value;
 import com.google.api.services.datastore.client.Datastore;
 import com.google.api.services.datastore.client.DatastoreHelper;
 import com.google.api.services.datastore.client.QuerySplitter;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.EntityCoder;
 import com.google.cloud.dataflow.sdk.io.DatastoreIO.DatastoreWriter;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.Write;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
 import com.google.common.collect.Lists;
 
 import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.mockito.Mock;
@@ -61,83 +62,141 @@
 import java.util.List;
 import java.util.NoSuchElementException;
 
+import javax.annotation.Nullable;
+
 /**
  * Tests for DatastoreIO Read and Write transforms.
  */
 @RunWith(JUnit4.class)
 public class DatastoreIOTest {
-  private String host;
-  private String datasetId;
-  private Query query;
+  private static final String HOST = "testHost";
+  private static final String DATASET = "testDataset";
+  private static final String NAMESPACE = "testNamespace";
+  private static final String KIND = "testKind";
+  private static final Query QUERY;
+  static {
+    Query.Builder q = Query.newBuilder();
+    q.addKindBuilder().setName(KIND);
+    QUERY = q.build();
+  }
+  private DatastoreIO.Source initialSource;
 
   @Mock
   Datastore mockDatastore;
 
-  /**
-   * Sets the default dataset ID as "shakespearedataset", which
-   * contains two kinds of records: "food" and "shakespeare".
-   * The "food" table contains 10 manually constructed entities,
-   * The "shakespeare" table contains 172948 entities,
-   * where each entity represents one line in one play in
-   * Shakespeare collections (e.g. there are 172948 lines in
-   * all Shakespeare files).
-   *
-   * <p>The function also sets up the datastore agent by creating
-   * a Datastore object to access the dataset shakespeareddataset.
-   *
-   * <p>Note that the local server must be started to let the agent
-   * be created normally.
-   */
+  @Rule
+  public final ExpectedException thrown = ExpectedException.none();
+
   @Before
   public void setUp() {
-    this.host = "http://localhost:1234";
-    this.datasetId = "shakespearedataset";
+    MockitoAnnotations.initMocks(this);
+    initialSource = DatastoreIO.source()
+        .withHost(HOST).withDataset(DATASET).withQuery(QUERY).withNamespace(NAMESPACE);
+  }
 
-    Query.Builder q = Query.newBuilder();
-    q.addKindBuilder().setName("shakespeare");
-    this.query = q.build();
+  /**
+   * Helper function to create a test {@code DataflowPipelineOptions}.
+   */
+  static final DataflowPipelineOptions testPipelineOptions(@Nullable Integer numWorkers) {
+    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+    options.setGcpCredential(new TestCredential());
+    if (numWorkers != null) {
+      options.setNumWorkers(numWorkers);
+    }
+    return options;
+  }
 
-    MockitoAnnotations.initMocks(this);
+  @Test
+  public void testBuildSource() throws Exception {
+    DatastoreIO.Source source = DatastoreIO.source()
+        .withHost(HOST).withDataset(DATASET).withQuery(QUERY).withNamespace(NAMESPACE);
+    assertEquals(QUERY, source.getQuery());
+    assertEquals(DATASET, source.getDataset());
+    assertEquals(HOST, source.getHost());
+    assertEquals(NAMESPACE, source.getNamespace());
   }
 
   /**
-   * Test for reading one entity from kind "food".
+   * {@link #testBuildSource} but constructed in a different order.
    */
   @Test
-  public void testBuildRead() throws Exception {
-    DatastoreIO.Source readQuery =
-        DatastoreIO.read().withHost(this.host).withDataset(this.datasetId).withQuery(this.query);
-    assertEquals(this.query, readQuery.query);
-    assertEquals(this.datasetId, readQuery.datasetId);
-    assertEquals(this.host, readQuery.host);
+  public void testBuildSourceAlt() throws Exception {
+    DatastoreIO.Source source = DatastoreIO.source()
+        .withDataset(DATASET).withNamespace(NAMESPACE).withQuery(QUERY).withHost(HOST);
+    assertEquals(QUERY, source.getQuery());
+    assertEquals(DATASET, source.getDataset());
+    assertEquals(HOST, source.getHost());
+    assertEquals(NAMESPACE, source.getNamespace());
   }
 
   @Test
-  public void testBuildReadAlt() throws Exception {
-    DatastoreIO.Source readQuery =
-        DatastoreIO.read().withDataset(this.datasetId).withQuery(this.query).withHost(this.host);
-    assertEquals(this.query, readQuery.query);
-    assertEquals(this.datasetId, readQuery.datasetId);
-    assertEquals(this.host, readQuery.host);
+  public void testSourceValidationFailsHost() throws Exception {
+    thrown.expect(NullPointerException.class);
+    thrown.expectMessage("host");
+
+    DatastoreIO.Source source = initialSource.withHost(null);
+    source.validate();
   }
 
-  @Test(expected = NullPointerException.class)
-  public void testBuildReadWithoutDatastoreSettingToCatchException() throws Exception {
-    // create pipeline and run the pipeline to get result
-    Pipeline p = DirectPipeline.createForTest();
-    p.apply(Read.from(DatastoreIO.read().withHost(null)));
+  @Test
+  public void testSourceValidationFailsDataset() throws Exception {
+    DatastoreIO.Source source = DatastoreIO.source().withQuery(QUERY);
+    thrown.expect(NullPointerException.class);
+    thrown.expectMessage("dataset");
+    source.validate();
   }
 
   @Test
-  public void testQuerySplitWithMockSplitter() throws Exception {
-    String dataset = "mydataset";
-    DatastoreV1.KindExpression mykind =
-        DatastoreV1.KindExpression.newBuilder().setName("mykind").build();
-    Query query = Query.newBuilder().addKind(mykind).build();
+  public void testSourceValidationFailsQuery() throws Exception {
+    DatastoreIO.Source source = DatastoreIO.source().withDataset(DATASET);
+    thrown.expect(NullPointerException.class);
+    thrown.expectMessage("query");
+    source.validate();
+  }
 
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    options.setGcpCredential(new TestCredential());
+  @Test
+  public void testSourceValidationSucceedsNamespace() throws Exception {
+    DatastoreIO.Source source = DatastoreIO.source().withDataset(DATASET).withQuery(QUERY);
+    /* Should succeed, as a null namespace is fine. */
+    source.validate();
+  }
+
+  @Test
+  public void testSinkDoesNotAllowNullHost() throws Exception {
+    thrown.expect(NullPointerException.class);
+    thrown.expectMessage("host");
+
+    DatastoreIO.sink().withDataset(DATASET).withHost(null);
+  }
+
+  @Test
+  public void testSinkDoesNotAllowNullDataset() throws Exception {
+    thrown.expect(NullPointerException.class);
+    thrown.expectMessage("datasetId");
+
+    DatastoreIO.sink().withDataset(null);
+  }
+
+  @Test
+  public void testSinkValidationFailsWithNoDataset() throws Exception {
+    DatastoreIO.Sink sink = DatastoreIO.sink();
+
+    thrown.expect(NullPointerException.class);
+    thrown.expectMessage("Dataset");
+
+    sink.validate(testPipelineOptions(null));
+  }
+
+  @Test
+  public void testSinkValidationSucceedsWithDataset() throws Exception {
+    DatastoreIO.Sink sink = DatastoreIO.sink().withDataset(DATASET);
+    sink.validate(testPipelineOptions(null));
+  }
+
+  @Test
+  public void testQuerySplitBasic() throws Exception {
+    KindExpression mykind = KindExpression.newBuilder().setName("mykind").build();
+    Query query = Query.newBuilder().addKind(mykind).build();
 
     List<Query> mockSplits = new ArrayList<>();
     for (int i = 0; i < 8; ++i) {
@@ -145,107 +204,112 @@ public void testQuerySplitWithMockSplitter() throws Exception {
           Query.newBuilder()
               .addKind(mykind)
               .setFilter(
-                  DatastoreHelper.makeFilter("foo", DatastoreV1.PropertyFilter.Operator.EQUAL,
-                      DatastoreV1.Value.newBuilder().setIntegerValue(i).build()))
+                  DatastoreHelper.makeFilter("foo", PropertyFilter.Operator.EQUAL,
+                      Value.newBuilder().setIntegerValue(i).build()))
               .build());
     }
 
     QuerySplitter splitter = mock(QuerySplitter.class);
-    when(splitter.getSplits(any(Query.class), eq(8), any(Datastore.class))).thenReturn(mockSplits);
-
-    DatastoreIO.Source io =
-        DatastoreIO.read()
-            .withDataset(dataset)
-            .withQuery(query)
-            .withMockSplitter(splitter)
-            .withMockEstimateSizeBytes(8 * 1024L);
-
-    List<DatastoreIO.Source> bundles = io.splitIntoBundles(1024, options);
+    /* No namespace */
+    PartitionId partition = PartitionId.newBuilder().build();
+    when(splitter.getSplits(any(Query.class), eq(partition), eq(8), any(Datastore.class)))
+        .thenReturn(mockSplits);
+
+    DatastoreIO.Source io = initialSource
+        .withNamespace(null)
+        .withQuery(query)
+        .withMockSplitter(splitter)
+        .withMockEstimateSizeBytes(8 * 1024L);
+
+    List<DatastoreIO.Source> bundles = io.splitIntoBundles(1024, testPipelineOptions(null));
     assertEquals(8, bundles.size());
     for (int i = 0; i < 8; ++i) {
       DatastoreIO.Source bundle = bundles.get(i);
-      Query bundleQuery = bundle.query;
+      Query bundleQuery = bundle.getQuery();
       assertEquals("mykind", bundleQuery.getKind(0).getName());
       assertEquals(i, bundleQuery.getFilter().getPropertyFilter().getValue().getIntegerValue());
     }
   }
 
+  /**
+   * Verifies that when namespace is set in the source, the split request includes the namespace.
+   */
+  @Test
+  public void testSourceWithNamespace() throws Exception {
+    QuerySplitter splitter = mock(QuerySplitter.class);
+    DatastoreIO.Source io = initialSource
+        .withMockSplitter(splitter)
+        .withMockEstimateSizeBytes(8 * 1024L);
+
+    io.splitIntoBundles(1024, testPipelineOptions(null));
+
+    PartitionId partition = PartitionId.newBuilder().setNamespace(NAMESPACE).build();
+    verify(splitter).getSplits(eq(QUERY), eq(partition), eq(8), any(Datastore.class));
+    verifyNoMoreInteractions(splitter);
+  }
+
   @Test
   public void testQuerySplitWithZeroSize() throws Exception {
-    String dataset = "mydataset";
-    DatastoreV1.KindExpression mykind =
-        DatastoreV1.KindExpression.newBuilder().setName("mykind").build();
+    KindExpression mykind = KindExpression.newBuilder().setName("mykind").build();
     Query query = Query.newBuilder().addKind(mykind).build();
 
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    options.setGcpCredential(new TestCredential());
-
     List<Query> mockSplits = Lists.newArrayList(
         Query.newBuilder()
             .addKind(mykind)
             .build());
 
     QuerySplitter splitter = mock(QuerySplitter.class);
-    when(splitter.getSplits(any(Query.class), eq(1), any(Datastore.class))).thenReturn(mockSplits);
+    when(splitter.getSplits(any(Query.class), any(PartitionId.class), eq(1), any(Datastore.class)))
+        .thenReturn(mockSplits);
 
-    DatastoreIO.Source io =
-        DatastoreIO.read()
-            .withDataset(dataset)
-            .withQuery(query)
-            .withMockSplitter(splitter)
-            .withMockEstimateSizeBytes(0L);
+    DatastoreIO.Source io = initialSource
+        .withQuery(query)
+        .withMockSplitter(splitter)
+        .withMockEstimateSizeBytes(0L);
 
-    List<DatastoreIO.Source> bundles = io.splitIntoBundles(1024, options);
+    List<DatastoreIO.Source> bundles = io.splitIntoBundles(1024, testPipelineOptions(null));
     assertEquals(1, bundles.size());
-    verify(splitter, never()).getSplits(any(Query.class), eq(1), any(Datastore.class));
+    verify(splitter, never())
+        .getSplits(any(Query.class), any(PartitionId.class), eq(1), any(Datastore.class));
     DatastoreIO.Source bundle = bundles.get(0);
-    Query bundleQuery = bundle.query;
+    Query bundleQuery = bundle.getQuery();
     assertEquals("mykind", bundleQuery.getKind(0).getName());
     assertFalse(bundleQuery.hasFilter());
   }
 
   @Test
   public void testQuerySplitSizeUnavailable() throws Exception {
-    String dataset = "mydataset";
-    DatastoreV1.KindExpression mykind =
-        DatastoreV1.KindExpression.newBuilder().setName("mykind").build();
+    KindExpression mykind = KindExpression.newBuilder().setName("mykind").build();
     Query query = Query.newBuilder().addKind(mykind).build();
 
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    options.setGcpCredential(new TestCredential());
-    options.setNumWorkers(2);
-
     List<Query> mockSplits = new ArrayList<>();
     for (int i = 0; i < 2; i++) {
       mockSplits.add(
           Query.newBuilder()
               .addKind(mykind)
               .setFilter(
-                  DatastoreHelper.makeFilter("foo", DatastoreV1.PropertyFilter.Operator.EQUAL,
-                      DatastoreV1.Value.newBuilder().setIntegerValue(i).build()))
+                  DatastoreHelper.makeFilter("foo", PropertyFilter.Operator.EQUAL,
+                      Value.newBuilder().setIntegerValue(i).build()))
               .build());
     }
 
     QuerySplitter splitter = mock(QuerySplitter.class);
-    when(splitter.getSplits(any(Query.class), eq(2), any(Datastore.class))).thenReturn(mockSplits);
+    when(splitter.getSplits(any(Query.class), any(PartitionId.class), eq(2), any(Datastore.class)))
+        .thenReturn(mockSplits);
 
-    DatastoreIO.Source io =
-        DatastoreIO.read()
-            .withDataset(dataset)
-            .withQuery(query)
-            .withMockSplitter(splitter)
-            .withMockEstimateSizeBytes(8 * 1024L);
+    DatastoreIO.Source io = initialSource
+        .withQuery(query)
+        .withMockSplitter(splitter)
+        .withMockEstimateSizeBytes(8 * 1024L);
 
     DatastoreIO.Source spiedIo = spy(io);
     when(spiedIo.getEstimatedSizeBytes(any(PipelineOptions.class))).thenThrow(new IOException());
 
-    List<DatastoreIO.Source> bundles = spiedIo.splitIntoBundles(1024, options);
+    List<DatastoreIO.Source> bundles = spiedIo.splitIntoBundles(1024, testPipelineOptions(2));
     assertEquals(2, bundles.size());
     for (int i = 0; i < 2; ++i) {
       DatastoreIO.Source bundle = bundles.get(i);
-      Query bundleQuery = bundle.query;
+      Query bundleQuery = bundle.getQuery();
       assertEquals("mykind", bundleQuery.getKind(0).getName());
       assertEquals(i, bundleQuery.getFilter().getPropertyFilter().getValue().getIntegerValue());
     }
@@ -253,72 +317,30 @@ public void testQuerySplitSizeUnavailable() throws Exception {
 
   @Test
   public void testQuerySplitNoWorkers() throws Exception {
-    String dataset = "mydataset";
-    DatastoreV1.KindExpression mykind =
-        DatastoreV1.KindExpression.newBuilder().setName("mykind").build();
+    KindExpression mykind = KindExpression.newBuilder().setName("mykind").build();
     Query query = Query.newBuilder().addKind(mykind).build();
 
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    options.setGcpCredential(new TestCredential());
-    options.setNumWorkers(0);
-
     List<Query> mockSplits = Lists.newArrayList(Query.newBuilder().addKind(mykind).build());
 
     QuerySplitter splitter = mock(QuerySplitter.class);
-    when(splitter.getSplits(any(Query.class), eq(1), any(Datastore.class))).thenReturn(mockSplits);
+    when(splitter.getSplits(any(Query.class), any(PartitionId.class), eq(12), any(Datastore.class)))
+        .thenReturn(mockSplits);
 
-    DatastoreIO.Source io =
-        DatastoreIO.read()
-            .withDataset(dataset)
-            .withQuery(query)
-            .withMockSplitter(splitter)
-            .withMockEstimateSizeBytes(8 * 1024L);
+    DatastoreIO.Source io = initialSource
+        .withQuery(query)
+        .withMockSplitter(splitter)
+        .withMockEstimateSizeBytes(8 * 1024L);
 
     DatastoreIO.Source spiedIo = spy(io);
     when(spiedIo.getEstimatedSizeBytes(any(PipelineOptions.class)))
         .thenThrow(new NoSuchElementException());
 
-    List<DatastoreIO.Source> bundles = spiedIo.splitIntoBundles(1024, options);
-    assertEquals(1, bundles.size());
-    verify(splitter, never()).getSplits(any(Query.class), eq(1), any(Datastore.class));
-    DatastoreIO.Source bundle = bundles.get(0);
-    Query bundleQuery = bundle.query;
-    assertEquals("mykind", bundleQuery.getKind(0).getName());
-    assertFalse(bundleQuery.hasFilter());
-  }
-
-  @Test
-  public void testQuerySplitWithSmallDataset() throws Exception {
-    String dataset = "mydataset";
-    DatastoreV1.KindExpression mykind =
-        DatastoreV1.KindExpression.newBuilder().setName("mykind").build();
-    Query query = Query.newBuilder().addKind(mykind).build();
-
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    options.setGcpCredential(new TestCredential());
-
-    List<Query> mockSplits = Lists.newArrayList(
-        Query.newBuilder()
-            .addKind(mykind)
-            .build());
-
-    QuerySplitter splitter = mock(QuerySplitter.class);
-    when(splitter.getSplits(any(Query.class), eq(1), any(Datastore.class))).thenReturn(mockSplits);
-
-    DatastoreIO.Source io =
-        DatastoreIO.read()
-            .withDataset(dataset)
-            .withQuery(query)
-            .withMockSplitter(splitter)
-            .withMockEstimateSizeBytes(1L);
-
-    List<DatastoreIO.Source> bundles = io.splitIntoBundles(1024, options);
+    List<DatastoreIO.Source> bundles = spiedIo.splitIntoBundles(1024, testPipelineOptions(0));
     assertEquals(1, bundles.size());
-    verify(splitter, never()).getSplits(any(Query.class), eq(1), any(Datastore.class));
+    verify(splitter, never())
+        .getSplits(any(Query.class), any(PartitionId.class), eq(1), any(Datastore.class));
     DatastoreIO.Source bundle = bundles.get(0);
-    Query bundleQuery = bundle.query;
+    Query bundleQuery = bundle.getQuery();
     assertEquals("mykind", bundleQuery.getKind(0).getName());
     assertFalse(bundleQuery.hasFilter());
   }
@@ -327,42 +349,32 @@ public void testQuerySplitWithSmallDataset() throws Exception {
    * Test building a Sink using builder methods.
    */
   @Test
-  public void testBuildWrite() throws Exception {
-    DatastoreIO.Sink sink = DatastoreIO.sink().withDataset(this.datasetId).withHost(this.host);
-    assertEquals(this.host, sink.host);
-    assertEquals(this.datasetId, sink.datasetId);
-
-    sink = DatastoreIO.sink().withHost(this.host).withDataset(this.datasetId);
-    assertEquals(this.host, sink.host);
-    assertEquals(this.datasetId, sink.datasetId);
-
-    sink = DatastoreIO.sink().withDataset(this.datasetId).withHost(this.host);
-    assertEquals(this.host, sink.host);
-    assertEquals(this.datasetId, sink.datasetId);
+  public void testBuildSink() throws Exception {
+    DatastoreIO.Sink sink = DatastoreIO.sink().withDataset(DATASET).withHost(HOST);
+    assertEquals(HOST, sink.host);
+    assertEquals(DATASET, sink.datasetId);
+
+    sink = DatastoreIO.sink().withHost(HOST).withDataset(DATASET);
+    assertEquals(HOST, sink.host);
+    assertEquals(DATASET, sink.datasetId);
+
+    sink = DatastoreIO.sink().withDataset(DATASET).withHost(HOST);
+    assertEquals(HOST, sink.host);
+    assertEquals(DATASET, sink.datasetId);
   }
 
   /**
    * Test building a sink using the default host.
    */
   @Test
-  public void testBuildWriteDefaults() throws Exception {
-    DatastoreIO.Sink sink = DatastoreIO.sink().withDataset(this.datasetId);
+  public void testBuildSinkDefaults() throws Exception {
+    DatastoreIO.Sink sink = DatastoreIO.sink().withDataset(DATASET);
     assertEquals(DatastoreIO.DEFAULT_HOST, sink.host);
-    assertEquals(this.datasetId, sink.datasetId);
+    assertEquals(DATASET, sink.datasetId);
 
-    sink = DatastoreIO.sink().withDataset(this.datasetId);
+    sink = DatastoreIO.sink().withDataset(DATASET);
     assertEquals(DatastoreIO.DEFAULT_HOST, sink.host);
-    assertEquals(this.datasetId, sink.datasetId);
-  }
-
-  /**
-   * Test building an invalid sink.
-   */
-  @Test(expected = NullPointerException.class)
-  public void testBuildWriteWithoutDatastoreToCatchException() throws Exception {
-    // create pipeline and run the pipeline to get result
-    Pipeline p = DirectPipeline.createForTest();
-    p.apply(Create.<Entity>of().withCoder(EntityCoder.of())).apply(Write.to(DatastoreIO.sink()));
+    assertEquals(DATASET, sink.datasetId);
   }
 
   /**
@@ -405,11 +417,15 @@ public void testHasNameOrId() {
   /**
    * Test that entities with incomplete keys cannot be updated.
    */
-  @Test(expected = IllegalArgumentException.class)
+  @Test
   public void testAddEntitiesWithIncompleteKeys() throws Exception {
     Key key = DatastoreHelper.makeKey("bird").build();
     Entity entity = Entity.newBuilder().setKey(key).build();
     DatastoreWriter writer = new DatastoreIO.DatastoreWriter(null, mockDatastore);
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Entities to be written to the Datastore must have complete keys");
+
     writer.write(entity);
   }
 
@@ -423,8 +439,7 @@ public void testAddingEntities() throws Exception {
         Entity.newBuilder().setKey(DatastoreHelper.makeKey("bird", "condor").build()).build(),
         Entity.newBuilder().setKey(DatastoreHelper.makeKey("bird", "robin").build()).build());
 
-    List<Entity> allEntities = new ArrayList<>();
-    allEntities.addAll(expected);
+    List<Entity> allEntities = Lists.newArrayList(expected);
     Collections.shuffle(allEntities);
 
     DatastoreWriter writer = new DatastoreIO.DatastoreWriter(null, mockDatastore);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java
index cb3c997633242..c4d031b64e0c2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java
@@ -21,6 +21,7 @@
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Matchers.argThat;
+import static org.mockito.Mockito.doReturn;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
@@ -43,6 +44,7 @@
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Objects;
 
 /**
  * Unit tests for {@code DatastoreSource}.
@@ -51,12 +53,19 @@
 public class DatastoreReaderTest {
   private static final String TEST_KIND = "mykind";
   private static final String TEST_PROPERTY = "myproperty";
+  private static final String TEST_NAMESPACE = "mynamespace";
 
-  private static class IsValidRequest extends ArgumentMatcher<RunQueryRequest> {
+  private static class IsValidRequestWithNamespace extends ArgumentMatcher<RunQueryRequest> {
+    private final String namespace;
+
+    public IsValidRequestWithNamespace(String namespace) {
+      this.namespace = namespace;
+    }
     @Override
     public boolean matches(Object o) {
       RunQueryRequest request = (RunQueryRequest) o;
-      return request.hasQuery();
+      return request.hasQuery()
+          && Objects.equals(request.getPartitionId().getNamespace(), namespace);
     }
   }
 
@@ -71,6 +80,7 @@ private Datastore buildMockDatastore() throws DatastoreException {
     RunQueryResponse.Builder firstResponseBuilder = RunQueryResponse.newBuilder();
     RunQueryResponse.Builder secondResponseBuilder = RunQueryResponse.newBuilder();
     RunQueryResponse.Builder thirdResponseBuilder = RunQueryResponse.newBuilder();
+    RunQueryResponse.Builder firstNamespaceResponseBuilder = RunQueryResponse.newBuilder();
     {
       QueryResultBatch.Builder resultsBatch = QueryResultBatch.newBuilder();
       resultsBatch.addEntityResult(0, createEntityResult("val0"));
@@ -105,10 +115,29 @@ private Datastore buildMockDatastore() throws DatastoreException {
 
       thirdResponseBuilder.setBatch(resultsBatch.build());
     }
-    when(datastore.runQuery(argThat(new IsValidRequest())))
+    {
+      QueryResultBatch.Builder resultsBatch = QueryResultBatch.newBuilder();
+      resultsBatch.addEntityResult(0, createEntityResult("nsval0"));
+      resultsBatch.addEntityResult(1, createEntityResult("nsval1"));
+      resultsBatch.addEntityResult(2, createEntityResult("nsval2"));
+      resultsBatch.addEntityResult(3, createEntityResult("nsval3"));
+      resultsBatch.addEntityResult(4, createEntityResult("nsval4"));
+      resultsBatch.setEntityResultType(ResultType.FULL);
+
+      resultsBatch.setMoreResults(MoreResultsType.NO_MORE_RESULTS);
+
+      firstNamespaceResponseBuilder.setBatch(resultsBatch.build());
+    }
+    // Without namespace
+    when(datastore.runQuery(argThat(new IsValidRequestWithNamespace(""))))
         .thenReturn(firstResponseBuilder.build())
         .thenReturn(secondResponseBuilder.build())
         .thenReturn(thirdResponseBuilder.build());
+
+    // With namespace
+    doReturn(firstNamespaceResponseBuilder.build()).when(datastore)
+    .runQuery(argThat(new IsValidRequestWithNamespace(TEST_NAMESPACE)));
+
     return datastore;
   }
 
@@ -124,7 +153,7 @@ public void testRead() throws Exception {
     List<Entity> entityResults = new ArrayList<Entity>();
 
     try (DatastoreIO.DatastoreReader iterator =
-            new DatastoreIO.DatastoreReader(DatastoreIO.read().withQuery(query), datastore)) {
+            new DatastoreIO.DatastoreReader(DatastoreIO.source().withQuery(query), datastore)) {
       while (iterator.advance()) {
         entityResults.add(iterator.getCurrent());
       }
@@ -140,4 +169,32 @@ public void testRead() throws Exception {
           entityResults.get(i).getPropertyList().get(0).getValue().getStringValue(), "val" + i);
     }
   }
+
+  @Test
+  public void testReadWithNamespace() throws Exception {
+    Datastore datastore = buildMockDatastore();
+
+    Query.Builder q = Query.newBuilder();
+    q.addKindBuilder().setName(TEST_KIND);
+    Query query = q.build();
+
+    List<Entity> entityResults = new ArrayList<Entity>();
+
+    try (DatastoreIO.DatastoreReader iterator = new DatastoreIO.DatastoreReader(
+        DatastoreIO.source().withQuery(query).withNamespace(TEST_NAMESPACE), datastore)) {
+      while (iterator.advance()) {
+        entityResults.add(iterator.getCurrent());
+      }
+    }
+
+    assertEquals(5, entityResults.size());
+    for (int i = 0; i < 5; i++) {
+      assertNotNull(entityResults.get(i).getPropertyList());
+      assertEquals(entityResults.get(i).getPropertyList().size(), 1);
+      assertTrue(entityResults.get(i).getPropertyList().get(0).hasValue());
+      assertTrue(entityResults.get(i).getPropertyList().get(0).getValue().hasStringValue());
+      assertEquals(
+          entityResults.get(i).getPropertyList().get(0).getValue().getStringValue(), "nsval" + i);
+    }
+  }
 }

From 643e74aeffc8eff0adf99cf683e206e149030614 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Thu, 24 Sep 2015 13:21:22 -0700
Subject: [PATCH 1040/1541] Update TriggerTester to submit elements in chunks

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103875183
---
 .../dataflow/sdk/util/ReduceFnRunner.java     |  2 +-
 .../transforms/windowing/AfterAllTest.java    | 41 ++++++-----
 .../transforms/windowing/AfterEachTest.java   | 22 +++---
 .../transforms/windowing/AfterFirstTest.java  | 48 +++++++------
 .../transforms/windowing/AfterPaneTest.java   | 27 ++++---
 .../windowing/AfterProcessingTimeTest.java    | 25 ++++---
 .../AfterSynchronizedProcessingTimeTest.java  | 25 ++++---
 .../windowing/AfterWatermarkTest.java         | 46 +++++++-----
 .../windowing/DefaultTriggerTest.java         | 39 +++++-----
 .../windowing/OrFinallyTriggerTest.java       | 47 ++++++------
 .../transforms/windowing/RepeatedlyTest.java  |  4 +-
 .../sdk/util/TriggerExecutorTest.java         | 71 +++++++++++--------
 .../dataflow/sdk/util/TriggerTester.java      | 32 ++++++---
 13 files changed, 265 insertions(+), 164 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index 07ace21a6f561..aa0e6de0565bb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -143,7 +143,7 @@ public void processElements(Iterable<WindowedValue<InputT>> values) {
     }
   }
 
-  public void processElement(WindowedValue<InputT> value) {
+  private void processElement(WindowedValue<InputT> value) {
     Lateness lateness = getLateness(value);
     if (lateness.isPastAllowedLateness) {
       // Drop the element in all assigned windows if it is past the allowed lateness limit.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index 69d6627768217..59f9620043889 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -30,6 +30,7 @@
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -74,7 +75,7 @@ private void injectElement(int element, TriggerResult result1, TriggerResult res
           Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
           .thenReturn(result2);
     }
-    tester.injectElement(element, new Instant(element));
+    tester.injectElements(TimestampedValue.of(element, new Instant(element)));
   }
 
   @Test
@@ -202,12 +203,13 @@ public void testAfterAllRealTriggersFixedWindow() throws Exception {
 
     tester.advanceProcessingTime(new Instant(0));
     // 6 elements -> after pane fires
-    tester.injectElement(0, new Instant(0));
-    tester.injectElement(1, new Instant(0));
-    tester.injectElement(2, new Instant(1));
-    tester.injectElement(3, new Instant(1));
-    tester.injectElement(4, new Instant(1));
-    tester.injectElement(5, new Instant(2));
+    tester.injectElements(
+        TimestampedValue.of(0, new Instant(0)),
+        TimestampedValue.of(1, new Instant(0)),
+        TimestampedValue.of(2, new Instant(1)),
+        TimestampedValue.of(3, new Instant(1)),
+        TimestampedValue.of(4, new Instant(1)),
+        TimestampedValue.of(5, new Instant(2)));
 
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
     tester.advanceProcessingTime(new Instant(5));
@@ -217,13 +219,15 @@ public void testAfterAllRealTriggersFixedWindow() throws Exception {
 
     // 4 elements, advance processing time, then deliver the last elem
     tester.advanceProcessingTime(new Instant(15));
-    tester.injectElement(6, new Instant(2));
-    tester.injectElement(7, new Instant(3));
-    tester.injectElement(8, new Instant(4));
-    tester.injectElement(9, new Instant(5));
+    tester.injectElements(
+        TimestampedValue.of(6, new Instant(2)),
+        TimestampedValue.of(7, new Instant(3)),
+        TimestampedValue.of(8, new Instant(4)),
+        TimestampedValue.of(9, new Instant(5)));
     tester.advanceProcessingTime(new Instant(20));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    tester.injectElement(10, new Instant(6));
+    tester.injectElements(
+        TimestampedValue.of(10, new Instant(6)));
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(6, 7, 8, 9, 10), 2, 0, 50)));
 
@@ -246,14 +250,17 @@ public void testAfterAllMergingWindowSomeFinished() throws Exception {
         Duration.millis(100));
 
     tester.advanceProcessingTime(new Instant(10));
-    tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 15
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1))); // in [1, 11), timer for 15
     tester.advanceProcessingTime(new Instant(15));
-    tester.injectElement(2, new Instant(1)); // in [1, 11) count = 1
-    tester.injectElement(3, new Instant(2)); // in [2, 12), timer for 16
+    tester.injectElements(
+        TimestampedValue.of(2, new Instant(1)),  // in [1, 11) count = 1
+        TimestampedValue.of(3, new Instant(2))); // in [2, 12), timer for 16
 
     // Enough data comes in for 2 that combined, we should fire
-    tester.injectElement(4, new Instant(2));
-    tester.injectElement(5, new Instant(2));
+    tester.injectElements(
+        TimestampedValue.of(4, new Instant(2)),
+        TimestampedValue.of(5, new Instant(2)));
 
     tester.doMerge();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index c77d7ab0f9f6d..63ac66ddfcf02 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -31,6 +31,7 @@
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -75,7 +76,9 @@ private void injectElement(int element, TriggerResult result1, TriggerResult res
           Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
           .thenReturn(result2);
     }
-    tester.injectElement(element, new Instant(element));
+
+    tester.injectElements(
+        TimestampedValue.of(element, new Instant(element)));
   }
 
   @Test
@@ -223,7 +226,7 @@ public void testSequenceRealTriggersFixedWindow() throws Exception {
 
     // Inject a bunch of elements
     for (int i = 0; i < 20; i++) {
-      tester.injectElement(i, new Instant(i));
+      tester.injectElements(TimestampedValue.of(i, new Instant(i)));
     }
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(0, 1, 2, 3, 4), 0, 0, 50),
@@ -250,20 +253,23 @@ public void testAfterEachMergingWindowSomeFinished() throws Exception {
         Duration.millis(100));
 
     tester.advanceProcessingTime(new Instant(10));
-    tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 15
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1))); // in [1, 11), timer for 15
     tester.advanceProcessingTime(new Instant(15));
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.contains(1), 1, 1, 11)));
 
-    tester.injectElement(2, new Instant(1)); // in [1, 11) count = 1
-    tester.injectElement(3, new Instant(2)); // in [2, 12), timer for 16
+    tester.injectElements(
+        TimestampedValue.of(2, new Instant(1)),  // in [1, 11) count = 1
+        TimestampedValue.of(3, new Instant(2))); // in [2, 12), timer for 16
 
     // [2, 12) tries to fire, but gets merged; count = 2
     tester.advanceProcessingTime(new Instant(30));
 
-    tester.injectElement(4, new Instant(1));
-    tester.injectElement(5, new Instant(2));
-    tester.injectElement(6, new Instant(1)); // count = 5, but need to call merge to discover this
+    tester.injectElements(
+        TimestampedValue.of(4, new Instant(1)),
+        TimestampedValue.of(5, new Instant(2)),
+        TimestampedValue.of(6, new Instant(1))); // count = 5, but need to call merge fire
 
     tester.doMerge();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index 7a21226b97be3..c66424e9f6ba3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -30,6 +30,7 @@
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -74,7 +75,9 @@ private void injectElement(int element, TriggerResult result1, TriggerResult res
           Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
           .thenReturn(result2);
     }
-    tester.injectElement(element, new Instant(element));
+
+    tester.injectElements(
+        TimestampedValue.of(element, new Instant(element)));
   }
 
   @Test
@@ -212,21 +215,23 @@ public void testAfterFirstRealTriggersFixedWindow() throws Exception {
 
     tester.advanceProcessingTime(new Instant(0));
     // 5 elements -> after pane fires
-    tester.injectElement(0, new Instant(0));
-    tester.injectElement(1, new Instant(0));
-    tester.injectElement(2, new Instant(1));
-    tester.injectElement(3, new Instant(1));
-    tester.injectElement(4, new Instant(1));
+    tester.injectElements(
+        TimestampedValue.of(0, new Instant(0)),
+        TimestampedValue.of(1, new Instant(0)),
+        TimestampedValue.of(2, new Instant(1)),
+        TimestampedValue.of(3, new Instant(1)),
+        TimestampedValue.of(4, new Instant(1)));
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(0, 1, 2, 3, 4), 0, 0, 50)));
 
     // 4 elements, advance processing time to 5 (shouldn't fire yet), then advance it to 6
     tester.advanceProcessingTime(new Instant(1));
-    tester.injectElement(5, new Instant(2));
-    tester.injectElement(6, new Instant(3));
-    tester.injectElement(7, new Instant(4));
-    tester.injectElement(8, new Instant(5));
+    tester.injectElements(
+        TimestampedValue.of(5, new Instant(2)),
+        TimestampedValue.of(6, new Instant(3)),
+        TimestampedValue.of(7, new Instant(4)),
+        TimestampedValue.of(8, new Instant(5)));
     tester.advanceProcessingTime(new Instant(5));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
     tester.advanceProcessingTime(new Instant(6));
@@ -235,11 +240,12 @@ public void testAfterFirstRealTriggersFixedWindow() throws Exception {
 
     // Now, send in 5 more elements, and make sure they come out as a group. State should not
     // be carried over.
-    tester.injectElement(9, new Instant(6));
-    tester.injectElement(10, new Instant(7));
-    tester.injectElement(11, new Instant(8));
-    tester.injectElement(12, new Instant(9));
-    tester.injectElement(13, new Instant(10));
+    tester.injectElements(
+        TimestampedValue.of(9, new Instant(6)),
+        TimestampedValue.of(10, new Instant(7)),
+        TimestampedValue.of(11, new Instant(8)),
+        TimestampedValue.of(12, new Instant(9)),
+        TimestampedValue.of(13, new Instant(10)));
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(9, 10, 11, 12, 13), 6, 0, 50)));
@@ -263,13 +269,15 @@ public void testAfterFirstMergingWindowSomeFinished() throws Exception {
         Duration.millis(100));
 
     tester.advanceProcessingTime(new Instant(10));
-    tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 15
-    tester.injectElement(2, new Instant(1)); // in [1, 11) count = 1
-    tester.injectElement(3, new Instant(2)); // in [2, 12), timer for 16
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),   // in [1, 11), timer for 15
+        TimestampedValue.of(2, new Instant(1)),   // in [1, 11) count = 1
+        TimestampedValue.of(3, new Instant(2)));  // in [2, 12), timer for 16
 
     // Enough data comes in for 2 that combined, we should fire
-    tester.injectElement(4, new Instant(2));
-    tester.injectElement(5, new Instant(2));
+    tester.injectElements(
+        TimestampedValue.of(4, new Instant(2)),
+        TimestampedValue.of(5, new Instant(2)));
 
     tester.doMerge();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
index 4bdcd0d92fd55..4b4c69ab5291a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.transforms.Sum.SumIntegerFn;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -50,15 +51,17 @@ public void testAfterPaneWithGlobalWindowsAndCombining() throws Exception {
         VarIntCoder.of(),
         Duration.millis(100));
 
-    tester.injectElement(1, new Instant(1));
-    tester.injectElement(2, new Instant(9));
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(2, new Instant(9)));
 
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.equalTo(3), 1, 0, 10)));
 
     // This element should not be output because that trigger (which was one-time) has already
     // gone off.
-    tester.injectElement(6, new Instant(2));
+    tester.injectElements(
+        TimestampedValue.of(6, new Instant(2)));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
@@ -77,15 +80,17 @@ public void testAfterPaneWithFixedWindow() throws Exception {
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
 
-    tester.injectElement(1, new Instant(1)); // first in window [0, 10)
-    tester.injectElement(2, new Instant(9));
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)), // first in window [0, 10)
+        TimestampedValue.of(2, new Instant(9)));
 
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
 
     // This element belongs in the window that has already fired. It should not be re-output because
     // that trigger (which was one-time) has already gone off.
-    tester.injectElement(6, new Instant(2));
+    tester.injectElements(
+        TimestampedValue.of(6, new Instant(2)));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
@@ -106,15 +111,17 @@ public void testAfterPaneWithMerging() throws Exception {
 
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
-    tester.injectElement(1, new Instant(1)); // in [1, 11)
-    tester.injectElement(2, new Instant(2)); // in [2, 12)
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),  // in [1, 11)
+        TimestampedValue.of(2, new Instant(2))); // in [2, 12)
     tester.doMerge();
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 1, 12)));
 
     // Because we closed the previous window, we don't have it around to merge with.
-    tester.injectElement(3, new Instant(7)); // in [7, 17)
-    tester.injectElement(4, new Instant(8)); // in [8, 18)
+    tester.injectElements(
+        TimestampedValue.of(3, new Instant(7)),  // in [7, 17)
+        TimestampedValue.of(4, new Instant(8))); // in [8, 18)
     tester.doMerge();
 
     assertThat(tester.extractOutput(), Matchers.contains(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
index 38b9405028ce7..0096216d71f58 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
@@ -24,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -50,7 +51,7 @@ public void testAfterProcessingTimeIgnoresTimer() throws Exception {
 
     tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
         new Instant(15), TimeDomain.PROCESSING_TIME);
-    tester.injectElement(1, new Instant(1));
+    tester.injectElements(TimestampedValue.of(1, new Instant(1)));
     tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
         new Instant(5), TimeDomain.PROCESSING_TIME);
   }
@@ -68,16 +69,20 @@ public void testAfterProcessingTimeWithFixedWindow() throws Exception {
 
     tester.advanceProcessingTime(new Instant(10));
 
-    tester.injectElement(1, new Instant(1)); // first in window [0, 10), timer set for 15
+    tester.injectElements(
+        // first in window [0, 10), timer set for 15
+        TimestampedValue.of(1, new Instant(1)));
     tester.advanceProcessingTime(new Instant(11));
-    tester.injectElement(2, new Instant(9));
+    tester.injectElements(
+        TimestampedValue.of(2, new Instant(9)));
 
     tester.advanceProcessingTime(new Instant(12));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
-    tester.injectElement(3, new Instant(8));
-    tester.injectElement(4, new Instant(19));
-    tester.injectElement(5, new Instant(30));
+    tester.injectElements(
+        TimestampedValue.of(3, new Instant(8)),
+        TimestampedValue.of(4, new Instant(19)),
+        TimestampedValue.of(5, new Instant(30)));
 
     tester.advanceProcessingTime(new Instant(16));
     assertThat(tester.extractOutput(), Matchers.contains(
@@ -85,7 +90,7 @@ public void testAfterProcessingTimeWithFixedWindow() throws Exception {
 
     // This element belongs in the window that has already fired. It should not be re-output because
     // that trigger (which was one-time) has already gone off.
-    tester.injectElement(6, new Instant(2));
+    tester.injectElements(TimestampedValue.of(6, new Instant(2)));
 
     tester.advanceProcessingTime(new Instant(19));
     assertThat(tester.extractOutput(), Matchers.containsInAnyOrder(
@@ -111,9 +116,11 @@ public void testAfterProcessingTimeWithMergingWindow() throws Exception {
         Duration.millis(100));
 
     tester.advanceProcessingTime(new Instant(10));
-    tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 15
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1))); // in [1, 11), timer for 15
     tester.advanceProcessingTime(new Instant(11));
-    tester.injectElement(2, new Instant(2)); // in [2, 12), timer for 16
+    tester.injectElements(
+        TimestampedValue.of(2, new Instant(2))); // in [2, 12), timer for 16
 
     tester.advanceProcessingTime(new Instant(15));
     // This fires, because the earliest element in [1, 12) arrived at time 10
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java
index 303084aff8754..3270d073e4abf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -50,9 +51,10 @@ public void testAfterProcessingTimeWithFixedWindow() throws Exception {
 
     tester.advanceProcessingTime(new Instant(10));
 
-    tester.injectElement(1, new Instant(1)); // first in window [0, 10), timer set for 15
-    tester.injectElement(2, new Instant(9));
-    tester.injectElement(3, new Instant(8));
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)), // first in window [0, 10), timer set for 15
+        TimestampedValue.of(2, new Instant(9)),
+        TimestampedValue.of(3, new Instant(8)));
 
     tester.advanceProcessingTime(new Instant(16));
     assertThat(tester.extractOutput(), Matchers.contains(
@@ -60,7 +62,8 @@ public void testAfterProcessingTimeWithFixedWindow() throws Exception {
 
     // This element belongs in the window that has already fired. It should not be re-output because
     // that trigger (which was one-time) has already gone off.
-    tester.injectElement(6, new Instant(2));
+    tester.injectElements(
+        TimestampedValue.of(6, new Instant(2)));
 
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
     tester.assertHasOnlyGlobalAndFinishedSetsFor(
@@ -77,8 +80,9 @@ public void testAfterProcessingTimeWithMergingWindow() throws Exception {
         Duration.millis(100));
 
     tester.advanceProcessingTime(new Instant(10));
-    tester.injectElement(1, new Instant(1)); // in [1, 11), synchronized timer for 10
-    tester.injectElement(2, new Instant(2)); // in [2, 12), synchronized timer for 10
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),   // in [1, 11), synchronized timer for 10
+        TimestampedValue.of(2, new Instant(2)));  // in [2, 12), synchronized timer for 10
     tester.advanceProcessingTime(new Instant(11));
 
     assertThat(tester.extractOutput(), Matchers.contains(
@@ -101,14 +105,16 @@ public void testAfterProcessingTimeWithMergingWindowAlreadyFired() throws Except
     tester.advanceProcessingTime(new Instant(10));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
-    tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 15
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1))); // in [1, 11), synchronized timer for 15
 
     tester.advanceProcessingTime(new Instant(16));
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.contains(1), 1, 1, 11)));
 
     // Because we discarded the previous window, we don't have it around to merge with.
-    tester.injectElement(2, new Instant(2)); // in [2, 12), timer for 21
+    tester.injectElements(
+        TimestampedValue.of(2, new Instant(2))); // in [2, 12), synchronized timer for 21
 
     tester.advanceProcessingTime(new Instant(100));
     assertThat(tester.extractOutput(), Matchers.contains(
@@ -132,7 +138,8 @@ public void testAfterSynchronizedProcessingTimeIgnoresTimer() throws Exception {
     tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
         new Instant(15), TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
     tester.advanceProcessingTime(new Instant(5));
-    tester.injectElement(1, new Instant(1));
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)));
     tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
         new Instant(0), TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
index 37453d0191550..563981de6413c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -48,10 +49,12 @@ public void testFirstInPaneWithFixedWindow() throws Exception {
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
 
-    tester.injectElement(1, new Instant(1)); // first in window [0, 10), timer set for 6
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1))); // first in window [0, 10), timer set for 6
     tester.advanceWatermark(new Instant(5));
-    tester.injectElement(2, new Instant(9));
-    tester.injectElement(3, new Instant(8));
+    tester.injectElements(
+        TimestampedValue.of(2, new Instant(9)),
+        TimestampedValue.of(3, new Instant(8)));
 
     tester.advanceWatermark(new Instant(6));
     assertThat(tester.extractOutput(), Matchers.contains(
@@ -59,7 +62,8 @@ public void testFirstInPaneWithFixedWindow() throws Exception {
 
     // This element belongs in the window that has already fired. It should not be re-output because
     // that trigger (which was one-time) has already gone off.
-    tester.injectElement(6, new Instant(2));
+    tester.injectElements(
+        TimestampedValue.of(6, new Instant(2)));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
@@ -83,9 +87,10 @@ public void testAlignAndDelay() throws Exception {
     Instant zero = new Instant(0);
 
     // first in window [0, 1m), timer set for 6m
-    tester.injectElement(1, zero.plus(Duration.standardSeconds(1)));
-    tester.injectElement(2, zero.plus(Duration.standardSeconds(5)));
-    tester.injectElement(3, zero.plus(Duration.standardSeconds(55)));
+    tester.injectElements(
+        TimestampedValue.of(1, zero.plus(Duration.standardSeconds(1))),
+        TimestampedValue.of(2, zero.plus(Duration.standardSeconds(5))),
+        TimestampedValue.of(3, zero.plus(Duration.standardSeconds(55))));
 
     // Advance almost to 6m, but not quite. No output should be produced.
     tester.advanceWatermark(zero.plus(Duration.standardMinutes(6)).minus(1));
@@ -111,8 +116,9 @@ public void testFirstInPaneWithMerging() throws Exception {
 
     tester.advanceWatermark(new Instant(1));
 
-    tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 6
-    tester.injectElement(2, new Instant(2)); // in [2, 12), timer for 7
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),  // in [1, 11), timer for 6
+        TimestampedValue.of(2, new Instant(2))); // in [2, 12), timer for 7
     tester.advanceWatermark(new Instant(6));
 
     // We merged, and updated the watermark timer to the earliest timer, which was still 6.
@@ -134,11 +140,13 @@ public void testEndOfWindowFixedWindow() throws Exception {
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
 
-    tester.injectElement(1, new Instant(1)); // first in window [0, 10), timer set for 9
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1))); // first in window [0, 10), timer set for 9
     tester.advanceWatermark(new Instant(8));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    tester.injectElement(2, new Instant(9));
-    tester.injectElement(3, new Instant(8));
+    tester.injectElements(
+        TimestampedValue.of(2, new Instant(9)),
+        TimestampedValue.of(3, new Instant(8)));
 
     tester.advanceWatermark(new Instant(9));
     assertThat(tester.extractOutput(), Matchers.contains(
@@ -146,7 +154,8 @@ public void testEndOfWindowFixedWindow() throws Exception {
 
     // This element belongs in the window that has already fired. It should not be re-output because
     // that trigger (which was one-time) has already gone off.
-    tester.injectElement(6, new Instant(2));
+    tester.injectElements(
+        TimestampedValue.of(6, new Instant(2)));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
@@ -167,14 +176,16 @@ public void testEndOfWindowWithMerging() throws Exception {
 
     tester.advanceWatermark(new Instant(1));
 
-    tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 10
-    tester.injectElement(2, new Instant(2)); // in [2, 12), timer for 11
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),  // in [1, 11), timer for 10
+        TimestampedValue.of(2, new Instant(2))); // in [2, 12), timer for 11
     tester.advanceWatermark(new Instant(10));
 
     // We merged, and updated the watermark timer to the end of the new window.
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
-    tester.injectElement(3, new Instant(1)); // in [1, 11), timer for 10
+    tester.injectElements(
+        TimestampedValue.of(3, new Instant(1))); // in [1, 11), timer for 10
     tester.advanceWatermark(new Instant(11));
 
     assertThat(tester.extractOutput(), Matchers.contains(
@@ -197,7 +208,8 @@ public void testEndOfWindowIgnoresTimer() throws Exception {
 
     tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
         new Instant(15), TimeDomain.EVENT_TIME);
-    tester.injectElement(1, new Instant(1));
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)));
     tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
         new Instant(9), TimeDomain.EVENT_TIME);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
index d281d93fdd821..01788e00d9428 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
@@ -24,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -48,11 +49,12 @@ public void testDefaultTriggerWithFixedWindow() throws Exception {
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
 
-    tester.injectElement(1, new Instant(1));
-    tester.injectElement(2, new Instant(9));
-    tester.injectElement(3, new Instant(15));
-    tester.injectElement(4, new Instant(19));
-    tester.injectElement(5, new Instant(30));
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(2, new Instant(9)),
+        TimestampedValue.of(3, new Instant(15)),
+        TimestampedValue.of(4, new Instant(19)),
+        TimestampedValue.of(5, new Instant(30)));
 
     // Advance the watermark almost to the end of the first window.
     tester.advanceProcessingTime(new Instant(500));
@@ -86,15 +88,17 @@ public void testDefaultTriggerWithSessionWindow() throws Exception {
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
 
-    tester.injectElement(1, new Instant(1));
-    tester.injectElement(2, new Instant(9));
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(2, new Instant(9)));
 
     // no output, because we merged into the [9-19) session
     tester.advanceWatermark(new Instant(10));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
-    tester.injectElement(3, new Instant(15));
-    tester.injectElement(4, new Instant(30));
+    tester.injectElements(
+        TimestampedValue.of(3, new Instant(15)),
+        TimestampedValue.of(4, new Instant(30)));
 
     tester.advanceWatermark(new Instant(100));
     assertThat(tester.extractOutput(), Matchers.contains(
@@ -115,9 +119,10 @@ public void testDefaultTriggerWithSlidingWindow() throws Exception {
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
 
-    tester.injectElement(1, new Instant(1));
-    tester.injectElement(2, new Instant(4));
-    tester.injectElement(3, new Instant(9));
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(2, new Instant(4)),
+        TimestampedValue.of(3, new Instant(9)));
 
     tester.advanceWatermark(new Instant(100));
     assertThat(tester.extractOutput(), Matchers.contains(
@@ -126,7 +131,8 @@ public void testDefaultTriggerWithSlidingWindow() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(3), 10, 5, 15)));
 
     // This data is late, so it will hold the watermark to 109
-    tester.injectElement(4, new Instant(8));
+    tester.injectElements(
+        TimestampedValue.of(4, new Instant(8)));
 
     tester.advanceWatermark(new Instant(101));
     assertThat(tester.getWatermarkHold(), Matchers.equalTo(new Instant(109)));
@@ -149,9 +155,10 @@ public void testDefaultTriggerWithContainedSessionWindow() throws Exception {
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
 
-    tester.injectElement(1, new Instant(1));
-    tester.injectElement(2, new Instant(9));
-    tester.injectElement(3, new Instant(7));
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(2, new Instant(9)),
+        TimestampedValue.of(3, new Instant(7)));
 
     tester.advanceWatermark(new Instant(20));
     Iterable<WindowedValue<Iterable<Integer>>> extractOutput = tester.extractOutput();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
index 2fa0dcba07354..b9769128d90cf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
@@ -29,6 +29,7 @@
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -75,7 +76,8 @@ private void injectElement(int element, TriggerResult result1, TriggerResult res
           Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
           .thenReturn(result2);
     }
-    tester.injectElement(element, new Instant(element));
+    tester.injectElements(
+        TimestampedValue.of(element, new Instant(element)));
   }
 
   @Test
@@ -238,26 +240,29 @@ public void testOrFinallyRealTriggersFixedWindow() throws Exception {
     // First, fire processing time then the 5 element
 
     tester.advanceProcessingTime(new Instant(0));
-    tester.injectElement(0, new Instant(0));
-    tester.injectElement(1, new Instant(0));
-    tester.injectElement(2, new Instant(1));
-    tester.injectElement(3, new Instant(1));
+    tester.injectElements(
+        TimestampedValue.of(0, new Instant(0)),
+        TimestampedValue.of(1, new Instant(0)),
+        TimestampedValue.of(2, new Instant(1)),
+        TimestampedValue.of(3, new Instant(1)));
     tester.advanceProcessingTime(new Instant(5));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
-    tester.injectElement(4, new Instant(1));
+    tester.injectElements(
+        TimestampedValue.of(4, new Instant(1)));
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(0, 1, 2, 3, 4), 0, 0, 50)));
 
     tester.assertHasOnlyGlobalAndPaneInfoFor(new IntervalWindow(new Instant(0), new Instant(50)));
 
     // Then fire 6 new elements, then processing time
-    tester.injectElement(6, new Instant(2));
-    tester.injectElement(7, new Instant(3));
-    tester.injectElement(8, new Instant(4));
-    tester.injectElement(9, new Instant(5));
-    tester.injectElement(10, new Instant(2));
-    tester.injectElement(11, new Instant(3));
+    tester.injectElements(
+        TimestampedValue.of(6, new Instant(2)),
+        TimestampedValue.of(7, new Instant(3)),
+        TimestampedValue.of(8, new Instant(4)),
+        TimestampedValue.of(9, new Instant(5)),
+        TimestampedValue.of(10, new Instant(2)),
+        TimestampedValue.of(11, new Instant(3)));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
     tester.advanceProcessingTime(new Instant(15));
 
@@ -265,9 +270,9 @@ public void testOrFinallyRealTriggersFixedWindow() throws Exception {
         isSingleWindowedValue(Matchers.containsInAnyOrder(6, 7, 8, 9, 10, 11), 2, 0, 50)));
 
     // Finally, fire 3 more elements and verify the base of the orFinally doesn't fire.
-    tester.injectElement(100, new Instant(1));
-    tester.injectElement(101, new Instant(1));
-    tester.injectElement(102, new Instant(1));
+    TimestampedValue.of(100, new Instant(1));
+    TimestampedValue.of(101, new Instant(1));
+    TimestampedValue.of(102, new Instant(1));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
   }
 
@@ -283,13 +288,15 @@ public void testOrFinallyMergingWindowSomeFinished() throws Exception {
         Duration.millis(100));
 
     tester.advanceProcessingTime(new Instant(10));
-    tester.injectElement(1, new Instant(1)); // in [1, 11), timer for 15
-    tester.injectElement(2, new Instant(1)); // in [1, 11) count = 1
-    tester.injectElement(3, new Instant(2)); // in [2, 12), timer for 16
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),   // in [1, 11), timer for 15
+        TimestampedValue.of(2, new Instant(1)),   // in [1, 11) count = 1
+        TimestampedValue.of(3, new Instant(2))); // in [2, 12), timer for 16
 
     // Enough data comes in for 2 that combined, we should fire
-    tester.injectElement(4, new Instant(2));
-    tester.injectElement(5, new Instant(2));
+    tester.injectElements(
+        TimestampedValue.of(4, new Instant(2)),
+        TimestampedValue.of(5, new Instant(2)));
 
     tester.doMerge();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index 3438a8247d168..35bc158c9ce05 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -27,6 +27,7 @@
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -65,7 +66,8 @@ private void injectElement(int element, TriggerResult result1)
           Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
           .thenReturn(result1);
     }
-    tester.injectElement(element, new Instant(element));
+
+    tester.injectElements(TimestampedValue.of(element, new Instant(element)));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index 395dc77ae3cd1..3dc9647501f2e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -49,6 +49,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -86,7 +87,7 @@ private void injectElement(TriggerTester<Integer, ?, IntervalWindow> tester,
     when(mockTrigger.onElement(
         Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
         .thenReturn(result);
-    tester.injectElement(element, new Instant(element));
+    tester.injectElements(TimestampedValue.of(element, new Instant(element)));
   }
 
   @Test
@@ -321,8 +322,9 @@ public void testPaneInfoAllStatesAfterWatermark() throws Exception {
             .withClosingBehavior(ClosingBehavior.FIRE_ALWAYS));
 
     tester.advanceWatermark(new Instant(0));
-    tester.injectElement(1, new Instant(1));
-    tester.injectElement(2, new Instant(2));
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(2, new Instant(2)));
 
     List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
     assertThat(output, Matchers.contains(
@@ -362,8 +364,9 @@ public void testPaneInfoAllStatesAfterWatermarkAccumulating() throws Exception {
             .withClosingBehavior(ClosingBehavior.FIRE_ALWAYS));
 
     tester.advanceWatermark(new Instant(0));
-    tester.injectElement(1, new Instant(1));
-    tester.injectElement(2, new Instant(2));
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(2, new Instant(2)));
 
     List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
     assertThat(output, Matchers.contains(
@@ -402,8 +405,9 @@ public void testPaneInfoFinalAndOnTime() throws Exception {
             .withClosingBehavior(ClosingBehavior.FIRE_ALWAYS));
 
     tester.advanceWatermark(new Instant(0));
-    tester.injectElement(1, new Instant(1));
-    tester.injectElement(2, new Instant(2));
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(2, new Instant(2)));
 
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, false, Timing.EARLY, 0, -1))));
@@ -481,19 +485,22 @@ public void testDropDataMultipleWindows() throws Exception {
         VarIntCoder.of(),
         Duration.millis(20));
 
-    tester.injectElement(10, new Instant(23)); // [-60, 40), [-30, 70), [0, 100)
-    tester.injectElement(12, new Instant(40)); // [-30, 70), [0, 100), [30, 130)
+    tester.injectElements(
+        TimestampedValue.of(10, new Instant(23)),   // [-60, 40), [-30, 70), [0, 100)
+        TimestampedValue.of(12, new Instant(40))); // [-30, 70), [0, 100), [30, 130)
 
     assertEquals(0, tester.getElementsDroppedDueToLateness());
     assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
 
     tester.advanceWatermark(new Instant(70));
-    tester.injectElement(14, new Instant(60)); // [-30, 70) = closed, [0, 100), [30, 130)
+    tester.injectElements(
+        TimestampedValue.of(14, new Instant(60))); // [-30, 70) = closed, [0, 100), [30, 130)
 
     assertEquals(0, tester.getElementsDroppedDueToLateness());
     assertEquals(1, tester.getElementsDroppedDueToClosedWindow());
 
-    tester.injectElement(16, new Instant(40)); // dropped due to lateness, assigned to 3 windows
+    tester.injectElements(
+        TimestampedValue.of(16, new Instant(40))); // dropped b/c lateness, assigned to 3 windows
 
     assertEquals(3, tester.getElementsDroppedDueToLateness());
     assertEquals(1, tester.getElementsDroppedDueToClosedWindow());
@@ -633,10 +640,11 @@ public void testEmptyOnTimeFromOrFinally() throws Exception {
     tester.advanceWatermark(new Instant(0));
     tester.advanceProcessingTime(new Instant(0));
 
-    tester.injectElement(1, new Instant(1));
-    tester.injectElement(1, new Instant(3));
-    tester.injectElement(1, new Instant(7));
-    tester.injectElement(1, new Instant(5));
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(1, new Instant(3)),
+        TimestampedValue.of(1, new Instant(7)),
+        TimestampedValue.of(1, new Instant(5)));
 
     tester.advanceProcessingTime(new Instant(5));
 
@@ -677,27 +685,34 @@ public void testProcessingTime() throws Exception {
     tester.advanceWatermark(new Instant(0));
     tester.advanceProcessingTime(new Instant(0));
 
-    tester.injectElement(1, new Instant(1));
-    tester.injectElement(1, new Instant(3));
-    tester.injectElement(1, new Instant(7));
-    tester.injectElement(1, new Instant(5));
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(1, new Instant(3)),
+        TimestampedValue.of(1, new Instant(7)),
+        TimestampedValue.of(1, new Instant(5)));
 
     tester.advanceProcessingTime(new Instant(5));
-    tester.injectElement(1, new Instant(8));
-    tester.injectElement(1, new Instant(4));
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(8)),
+        TimestampedValue.of(1, new Instant(4)));
 
     tester.advanceWatermark(new Instant(11));
-    tester.injectElement(1, new Instant(8));
-    tester.injectElement(1, new Instant(4));
-    tester.injectElement(1, new Instant(5));
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(8)),
+        TimestampedValue.of(1, new Instant(4)),
+        TimestampedValue.of(1, new Instant(5)));
 
     tester.advanceWatermark(new Instant(12));
-    tester.injectElement(1, new Instant(3));
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(3)));
+
     tester.advanceProcessingTime(new Instant(15));
-    tester.injectElement(1, new Instant(5));
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(5)));
     tester.advanceProcessingTime(new Instant(30));
 
-    tester.injectElement(1, new Instant(3));
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(3)));
     tester.advanceWatermark(new Instant(125));
 
     List<WindowedValue<Integer>> output = tester.extractOutput();
@@ -732,7 +747,7 @@ public void testMultipleTimerTypes() throws Exception {
             AccumulationMode.DISCARDING_FIRED_PANES,
             Duration.standardDays(1));
 
-    tester.injectElement(1, new Instant(1));
+    tester.injectElements(TimestampedValue.of(1, new Instant(1)));
 
     ResultCaptor<TriggerResult> result = new ResultCaptor<>();
     doAnswer(result)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 647bc440daa72..7f6c386e3797f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -45,9 +45,11 @@
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Function;
 import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
 import com.google.common.collect.FluentIterable;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
@@ -66,8 +68,6 @@
 import java.util.Map;
 import java.util.Set;
 
-import javax.annotation.Nullable;
-
 /**
  * Test utility that runs a {@link WindowFn}, {@link Trigger} using in-memory stub implementations
  * to provide the {@link TimerInternals} and {@link WindowingInternals} needed to run
@@ -262,8 +262,7 @@ public List<WindowedValue<OutputT>> extractOutput() {
     ImmutableList<WindowedValue<OutputT>> result = FluentIterable.from(stubContexts.outputs)
         .transform(new Function<WindowedValue<KV<String, OutputT>>, WindowedValue<OutputT>>() {
           @Override
-          @Nullable
-          public WindowedValue<OutputT> apply(@Nullable WindowedValue<KV<String, OutputT>> input) {
+          public WindowedValue<OutputT> apply(WindowedValue<KV<String, OutputT>> input) {
             return input.withValue(input.getValue().getValue());
           }
         })
@@ -291,10 +290,27 @@ public void advanceProcessingTime(
     timerInternals.advanceProcessingTime(runner, newProcessingTime);
   }
 
-  public void injectElement(InputT value, Instant timestamp) throws Exception {
-    Collection<W> windows = windowFn.assignWindows(new TriggerTester.StubAssignContext<W>(
-        windowFn, value, timestamp, Arrays.asList(GlobalWindow.INSTANCE)));
-    runner.processElement(WindowedValue.of(value, timestamp, windows, PaneInfo.NO_FIRING));
+  /**
+   * Inject all the timestamped values (after passing through the window function) as if they
+   * arrived in a single chunk of a bundle (or work-unit).
+   */
+  @SafeVarargs
+  public final void injectElements(TimestampedValue<InputT>... values) throws Exception {
+    runner.processElements(FluentIterable.of(values)
+        .transform(new Function<TimestampedValue<InputT>, WindowedValue<InputT>>() {
+          @Override
+          public WindowedValue<InputT> apply(TimestampedValue<InputT> input) {
+            try {
+              InputT value = input.getValue();
+              Instant timestamp = input.getTimestamp();
+              Collection<W> windows = windowFn.assignWindows(new TriggerTester.StubAssignContext<W>(
+                  windowFn, value, timestamp, Arrays.asList(GlobalWindow.INSTANCE)));
+              return WindowedValue.of(value, timestamp, windows, PaneInfo.NO_FIRING);
+            } catch (Exception e) {
+              throw Throwables.propagate(e);
+            }
+          }
+        }));
   }
 
   public void doMerge() throws Exception {

From 63c7fbe387f4594cbc28e6d5464027211b889a48 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 24 Sep 2015 13:33:09 -0700
Subject: [PATCH 1041/1541] Plumb PaneInfo through
 DoFnWithContext.ProcessContext

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103876443
---
 .../cloud/dataflow/sdk/transforms/DoFnReflector.java |  6 ++++++
 .../dataflow/sdk/transforms/DoFnWithContext.java     | 12 ++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
index 40fe92261a72c..d9556270092fc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.ProcessElement;
 import com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.StartBundle;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.UserCodeException;
 import com.google.cloud.dataflow.sdk.util.WindowingInternals;
 import com.google.cloud.dataflow.sdk.util.common.ReflectHelpers;
@@ -589,6 +590,11 @@ public Instant timestamp() {
       return context.timestamp();
     }
 
+    @Override
+    public PaneInfo pane() {
+      return context.pane();
+    }
+
     @Override
     public BoundedWindow window() {
       return context.window();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
index f6b0fec2c0cc1..36f2c43b86ffd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
@@ -24,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.DelegatingAggregator;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.WindowingInternals;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -221,6 +222,17 @@ public abstract class ProcessContext extends Context {
      * for more information.
      */
     public abstract Instant timestamp();
+
+    /**
+     * Returns information about the pane within this window into which the
+     * input element has been assigned.
+     *
+     * <p>Generally all data is in a single, uninteresting pane unless custom
+     * triggering and/or late data has been explicitly requested.
+     * See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window}
+     * for more information.
+     */
+    public abstract PaneInfo pane();
   }
 
   /**

From cd0de4a764b014cc625a553aa6cf5fa1775bdb67 Mon Sep 17 00:00:00 2001
From: tudorm <tudorm@google.com>
Date: Fri, 25 Sep 2015 12:02:55 -0700
Subject: [PATCH 1042/1541] Change the tolerance for the
 StateSamplerTest.netstingTest.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=103963258
---
 .../cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
index 5ce246ddf7fde..26b5704b50167 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
@@ -108,7 +108,7 @@ public void nestingTest() throws InterruptedException {
 
       long toleranceMs = periodMs;
       assertTrue(s1 + s2 + s3 >= 4 * periodMs - toleranceMs);
-      assertTrue(s1 + s2 + s3 <= 16 * periodMs + toleranceMs);
+      assertTrue(s1 + s2 + s3 <= 18 * periodMs + toleranceMs);
     }
   }
 

From 6a249347ce3dbcf8fbcaa54de9eb3736d832dff4 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Mon, 28 Sep 2015 16:52:34 -0700
Subject: [PATCH 1043/1541] BigQueryIO: validate writeDisposition before
 writing any data out

This fix is for DirectPipelineRunner.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104151904
---
 .../cloud/dataflow/sdk/io/BigQueryIO.java     | 48 ++++++------
 .../sdk/util/BigQueryTableInserter.java       | 74 ++++++++++++++++---
 .../dataflow/sdk/util/BigQueryUtilTest.java   | 19 ++---
 3 files changed, 94 insertions(+), 47 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index e01db45031bf1..3dbccd3b03512 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -783,8 +783,8 @@ private static void verifyTableEmpty(
           TableReference table) {
         try {
           Bigquery client = Transport.newBigQueryClient(options).build();
-          BigQueryTableInserter inserter = new BigQueryTableInserter(client, table);
-          if (!inserter.isEmpty()) {
+          BigQueryTableInserter inserter = new BigQueryTableInserter(client);
+          if (!inserter.isEmpty(table)) {
             throw new IllegalArgumentException(
                 "BigQuery table is not empty: " + BigQueryIO.toTableSpec(table));
           }
@@ -1020,8 +1020,8 @@ public TableReference getOrCreateTable(BigQueryOptions options, String tableSpec
           if (!createdTables.contains(tableSpec)) {
             TableSchema tableSchema = JSON_FACTORY.fromString(jsonTableSchema, TableSchema.class);
             Bigquery client = Transport.newBigQueryClient(options).build();
-            BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
-            inserter.tryCreateTable(tableSchema);
+            BigQueryTableInserter inserter = new BigQueryTableInserter(client);
+            inserter.tryCreateTable(tableReference, tableSchema);
             createdTables.add(tableSpec);
           }
         }
@@ -1034,8 +1034,8 @@ private void flushRows(Bigquery client, TableReference tableReference,
         List<TableRow> tableRows, List<String> uniqueIds) {
       if (!tableRows.isEmpty()) {
         try {
-          BigQueryTableInserter inserter = new BigQueryTableInserter(client, tableReference);
-          inserter.insertAll(tableRows, uniqueIds);
+          BigQueryTableInserter inserter = new BigQueryTableInserter(client);
+          inserter.insertAll(tableReference, tableRows, uniqueIds);
         } catch (IOException e) {
           throw new RuntimeException(e);
         }
@@ -1337,33 +1337,33 @@ private static void evaluateWriteHelper(
       Write.Bound transform, DirectPipelineRunner.EvaluationContext context) {
     BigQueryOptions options = context.getPipelineOptions();
     Bigquery client = Transport.newBigQueryClient(options).build();
+    BigQueryTableInserter inserter = new BigQueryTableInserter(client);
 
     try {
-      Map<BoundedWindow, List<TableRow>> tableRows = new HashMap<>();
+      Map<TableReference, List<TableRow>> tableRows = new HashMap<>();
       for (WindowedValue<TableRow> windowedValue : context.getPCollectionWindowedValues(
           context.getInput(transform))) {
         for (BoundedWindow window : windowedValue.getWindows()) {
-          List<TableRow> rows = getOrCreateMapListValue(tableRows, window);
+          TableReference ref;
+          if (transform.tableRefFunction != null) {
+            ref = transform.tableRefFunction.apply(window);
+          } else {
+            ref = transform.table;
+          }
+          if (ref.getProjectId() == null) {
+            ref.setProjectId(options.getProject());
+          }
+          LOG.info("Writing to BigQuery table {}", toTableSpec(ref));
+          inserter.getOrCreateTable(
+              ref, transform.writeDisposition, transform.createDisposition, transform.schema);
+
+          List<TableRow> rows = getOrCreateMapListValue(tableRows, ref);
           rows.add(windowedValue.getValue());
         }
       }
 
-      for (BoundedWindow window : tableRows.keySet()) {
-        TableReference ref;
-        if (transform.tableRefFunction != null) {
-          ref = transform.tableRefFunction.apply(window);
-        } else {
-          ref = transform.table;
-        }
-        if (ref.getProjectId() == null) {
-          ref.setProjectId(options.getProject());
-        }
-        LOG.info("Writing to BigQuery table {}", toTableSpec(ref));
-
-        BigQueryTableInserter inserter = new BigQueryTableInserter(client, ref);
-        inserter.getOrCreateTable(
-            transform.writeDisposition, transform.createDisposition, transform.schema);
-        inserter.insertAll(tableRows.get(window));
+      for (TableReference ref : tableRows.keySet()) {
+        inserter.insertAll(ref, tableRows.get(ref));
       }
     } catch (IOException e) {
       throw new RuntimeException(e);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
index f1eb6e2cc9217..89936f4a9e465 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
@@ -28,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.WriteDisposition;
 import com.google.cloud.hadoop.util.ApiErrorExtractor;
+import com.google.common.base.Preconditions;
 import com.google.common.base.Throwables;
 import com.google.common.util.concurrent.MoreExecutors;
 
@@ -66,7 +67,7 @@ public class BigQueryTableInserter {
   private static final long INITIAL_INSERT_BACKOFF_INTERVAL_MS = 200L;
 
   private final Bigquery client;
-  private final TableReference ref;
+  private final TableReference defaultRef;
   private final long maxRowsPerBatch;
 
   private static final ExecutorService executor = MoreExecutors.getExitingExecutorService(
@@ -76,11 +77,10 @@ public class BigQueryTableInserter {
    * Constructs a new row inserter.
    *
    * @param client a BigQuery client
-   * @param ref identifies the table to insert into
    */
-  public BigQueryTableInserter(Bigquery client, TableReference ref) {
+  public BigQueryTableInserter(Bigquery client) {
     this.client = client;
-    this.ref = ref;
+    this.defaultRef = null;
     this.maxRowsPerBatch = MAX_ROWS_PER_BATCH;
   }
 
@@ -88,26 +88,75 @@ public BigQueryTableInserter(Bigquery client, TableReference ref) {
    * Constructs a new row inserter.
    *
    * @param client a BigQuery client
-   * @param ref identifies the table to insert into
+   * @param defaultRef identifies the table to insert into
+   * @deprecated replaced by {@link #BigQueryTableInserter(Bigquery)}
    */
-  public BigQueryTableInserter(Bigquery client, TableReference ref, int maxRowsPerBatch) {
+  @Deprecated
+  public BigQueryTableInserter(Bigquery client, TableReference defaultRef) {
     this.client = client;
-    this.ref = ref;
+    this.defaultRef = defaultRef;
+    this.maxRowsPerBatch = MAX_ROWS_PER_BATCH;
+  }
+
+  /**
+   * Constructs a new row inserter.
+   *
+   * @param client a BigQuery client
+   */
+  public BigQueryTableInserter(Bigquery client, int maxRowsPerBatch) {
+    this.client = client;
+    this.defaultRef = null;
+    this.maxRowsPerBatch = maxRowsPerBatch;
+  }
+
+  /**
+   * Constructs a new row inserter.
+   *
+   * @param client a BigQuery client
+   * @param defaultRef identifies the default table to insert into
+   * @deprecated replaced by {@link #BigQueryTableInserter(Bigquery, int)}
+   */
+  @Deprecated
+  public BigQueryTableInserter(Bigquery client, TableReference defaultRef, int maxRowsPerBatch) {
+    this.client = client;
+    this.defaultRef = defaultRef;
     this.maxRowsPerBatch = maxRowsPerBatch;
   }
 
   /**
    * Insert all rows from the given list.
+   *
+   * @deprecated replaced by {@link #insertAll(TableReference, List<TableRow>)}
    */
+  @Deprecated
   public void insertAll(List<TableRow> rowList) throws IOException {
-    insertAll(rowList, null);
+    insertAll(defaultRef, rowList, null);
   }
 
   /**
    * Insert all rows from the given list using specified insertIds if not null.
+   *
+   * @deprecated replaced by {@link #insertAll(TableReference, List<TableRow>, List<String>)}
    */
+  @Deprecated
   public void insertAll(List<TableRow> rowList,
       @Nullable List<String> insertIdList) throws IOException {
+    insertAll(defaultRef, rowList, insertIdList);
+  }
+
+  /**
+   * Insert all rows from the given list.
+   */
+  public void insertAll(TableReference ref, List<TableRow> rowList) throws IOException {
+    insertAll(ref, rowList, null);
+  }
+
+  /**
+   * Insert all rows from the given list using specified insertIds if not null.
+   */
+  public void insertAll(TableReference ref, List<TableRow> rowList,
+      @Nullable List<String> insertIdList) throws IOException {
+    Preconditions.checkNotNull(ref, "ref");
     if (insertIdList != null && rowList.size() != insertIdList.size()) {
       throw new AssertionError("If insertIdList is not null it needs to have at least "
           + "as many elements as rowList");
@@ -238,6 +287,7 @@ public void run() {
    * {@code IOException} is thrown.
    */
   public Table getOrCreateTable(
+      TableReference ref,
       WriteDisposition writeDisposition,
       CreateDisposition createDisposition,
       @Nullable TableSchema schema) throws IOException {
@@ -262,7 +312,7 @@ public Table getOrCreateTable(
         return table;
       }
 
-      boolean empty = isEmpty();
+      boolean empty = isEmpty(ref);
       if (empty) {
         if (writeDisposition == WriteDisposition.WRITE_TRUNCATE) {
           LOG.info("Empty table found, not removing {}", BigQueryIO.toTableSpec(ref));
@@ -292,13 +342,13 @@ public Table getOrCreateTable(
     }
 
     // Create the table.
-    return tryCreateTable(schema);
+    return tryCreateTable(ref, schema);
   }
 
   /**
    * Checks if a table is empty.
    */
-  public boolean isEmpty() throws IOException {
+  public boolean isEmpty(TableReference ref) throws IOException {
     Bigquery.Tabledata.List list = client.tabledata()
         .list(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
     list.setMaxResults(1L);
@@ -320,7 +370,7 @@ public boolean isEmpty() throws IOException {
    * @throws IOException if other error than already existing table occurs.
    */
   @Nullable
-  public Table tryCreateTable(TableSchema schema) throws IOException {
+  public Table tryCreateTable(TableReference ref, TableSchema schema) throws IOException {
     LOG.info("Trying to create BigQuery table: {}", BigQueryIO.toTableSpec(ref));
 
     Table content = new Table();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
index c68c3a1801060..343caf8261e6a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
@@ -339,10 +339,9 @@ public void testWriteAppend() throws IOException {
     TableReference ref = BigQueryIO
         .parseTableSpec("project:dataset.table");
 
-    BigQueryTableInserter inserter =
-        new BigQueryTableInserter(mockClient, ref);
+    BigQueryTableInserter inserter = new BigQueryTableInserter(mockClient);
 
-    inserter.getOrCreateTable(BigQueryIO.Write.WriteDisposition.WRITE_APPEND,
+    inserter.getOrCreateTable(ref, BigQueryIO.Write.WriteDisposition.WRITE_APPEND,
         BigQueryIO.Write.CreateDisposition.CREATE_NEVER, null);
 
     verifyTableGet();
@@ -358,10 +357,9 @@ public void testWriteEmpty() throws IOException {
     TableReference ref = BigQueryIO
         .parseTableSpec("project:dataset.table");
 
-    BigQueryTableInserter inserter =
-        new BigQueryTableInserter(mockClient, ref);
+    BigQueryTableInserter inserter = new BigQueryTableInserter(mockClient);
 
-    inserter.getOrCreateTable(BigQueryIO.Write.WriteDisposition.WRITE_EMPTY,
+    inserter.getOrCreateTable(ref, BigQueryIO.Write.WriteDisposition.WRITE_EMPTY,
         BigQueryIO.Write.CreateDisposition.CREATE_NEVER, null);
 
     verifyTableGet();
@@ -380,11 +378,10 @@ public void testWriteEmptyFail() throws IOException {
     TableReference ref = BigQueryIO
         .parseTableSpec("project:dataset.table");
 
-    BigQueryTableInserter inserter =
-        new BigQueryTableInserter(mockClient, ref);
+    BigQueryTableInserter inserter = new BigQueryTableInserter(mockClient);
 
     try {
-      inserter.getOrCreateTable(BigQueryIO.Write.WriteDisposition.WRITE_EMPTY,
+      inserter.getOrCreateTable(ref, BigQueryIO.Write.WriteDisposition.WRITE_EMPTY,
           BigQueryIO.Write.CreateDisposition.CREATE_NEVER, null);
     } finally {
       verifyTableGet();
@@ -405,7 +402,7 @@ public void testInsertAll() throws Exception, IOException {
 
     TableReference ref = BigQueryIO
         .parseTableSpec("project:dataset.table");
-    BigQueryTableInserter inserter = new BigQueryTableInserter(mockClient, ref, 5);
+    BigQueryTableInserter inserter = new BigQueryTableInserter(mockClient, 5);
 
     List<TableRow> rows = new ArrayList<>();
     List<String> ids = new ArrayList<>();
@@ -415,7 +412,7 @@ public void testInsertAll() throws Exception, IOException {
     }
 
     try {
-      inserter.insertAll(rows, ids);
+      inserter.insertAll(ref, rows, ids);
     } finally {
       verifyInsertAll(5);
     }

From fa29ccabafd4e3401b31e9d68b74da23e8bd74bf Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 28 Sep 2015 22:45:29 -0700
Subject: [PATCH 1044/1541] Remove Google API client versions of Guava classes

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104171565
---
 .../cloud/dataflow/examples/common/DataflowExampleUtils.java  | 4 ++--
 .../src/main/java/common/DataflowExampleUtils.java            | 4 ++--
 .../com/google/cloud/dataflow/sdk/coders/CollectionCoder.java | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/IterableCoder.java   | 2 +-
 .../java/com/google/cloud/dataflow/sdk/coders/KvCoder.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/coders/ListCoder.java  | 2 +-
 .../java/com/google/cloud/dataflow/sdk/coders/MapCoder.java   | 2 +-
 .../java/com/google/cloud/dataflow/sdk/coders/SetCoder.java   | 2 +-
 .../main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java    | 2 +-
 .../com/google/cloud/dataflow/sdk/io/FileBasedSource.java     | 2 +-
 .../main/java/com/google/cloud/dataflow/sdk/io/TextIO.java    | 2 +-
 .../main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java | 2 +-
 .../cloud/dataflow/sdk/runners/DataflowPipelineRunner.java    | 2 +-
 .../dataflow/sdk/runners/DataflowPipelineTranslator.java      | 2 +-
 .../cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java | 2 +-
 .../cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java | 2 +-
 .../dataflow/sdk/runners/worker/ByteArrayShufflePosition.java | 2 +-
 .../cloud/dataflow/sdk/runners/worker/ConcatReader.java       | 3 ++-
 .../dataflow/sdk/runners/worker/GroupingShuffleReader.java    | 2 +-
 .../sdk/runners/worker/PartitioningShuffleReader.java         | 2 +-
 .../google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java | 2 +-
 .../dataflow/sdk/runners/worker/UngroupedShuffleReader.java   | 2 +-
 .../worker/logging/DataflowWorkerLoggingInitializer.java      | 2 +-
 .../java/com/google/cloud/dataflow/sdk/transforms/Create.java | 2 +-
 .../dataflow/sdk/transforms/IntraBundleParallelization.java   | 2 +-
 .../cloud/dataflow/sdk/transforms/join/CoGbkResult.java       | 2 +-
 .../cloud/dataflow/sdk/util/BigQueryTableRowIterator.java     | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/Credentials.java  | 2 +-
 .../google/cloud/dataflow/sdk/util/DataflowPathValidator.java | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java   | 2 +-
 .../main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java | 2 +-
 .../com/google/cloud/dataflow/sdk/util/InstanceBuilder.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/util/SerializableUtils.java | 2 +-
 .../com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java     | 4 ++--
 .../com/google/cloud/dataflow/sdk/transforms/CombineTest.java | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/ParDoTest.java   | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/SampleTest.java  | 2 +-
 .../java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java  | 2 +-
 38 files changed, 42 insertions(+), 41 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
index 73d44eb25d3e4..08e4b48997770 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
@@ -16,8 +16,6 @@
 
 import com.google.api.client.googleapis.json.GoogleJsonResponseException;
 import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
-import com.google.api.client.util.Lists;
-import com.google.api.client.util.Sets;
 import com.google.api.services.bigquery.Bigquery;
 import com.google.api.services.bigquery.Bigquery.Datasets;
 import com.google.api.services.bigquery.Bigquery.Tables;
@@ -40,6 +38,8 @@
 import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
 import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
 
 import java.io.IOException;
 import java.util.Collection;
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleUtils.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleUtils.java
index a77924aa63f46..98617699ad886 100644
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleUtils.java
+++ b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleUtils.java
@@ -16,8 +16,6 @@
 
 import com.google.api.client.googleapis.json.GoogleJsonResponseException;
 import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
-import com.google.api.client.util.Lists;
-import com.google.api.client.util.Sets;
 import com.google.api.services.bigquery.Bigquery;
 import com.google.api.services.bigquery.Bigquery.Datasets;
 import com.google.api.services.bigquery.Bigquery.Tables;
@@ -40,6 +38,8 @@
 import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
 import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
 
 import java.io.IOException;
 import java.util.Collection;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
index b57562d64158b..5b0b69322f800 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
@@ -16,8 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.common.base.Preconditions;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
index 5982b7b19d01a..67e5fdb3c008f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
@@ -18,9 +18,9 @@
 
 import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.common.base.Preconditions;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
index a104b6eae49c3..33085cf2af1bd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
@@ -18,11 +18,11 @@
 
 import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.base.Preconditions;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
index 2a9af4707efc6..e52296e328be1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
@@ -16,8 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.common.base.Preconditions;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
index 55738f0dea7dd..a51bade232770 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
@@ -16,9 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.Maps;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
index 35b4974c5dc47..80c54277576dc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
@@ -16,8 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.common.base.Preconditions;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index b08985a031bfa..b55b81913cfca 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
@@ -33,6 +32,7 @@
 import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.common.base.Preconditions;
 
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericRecord;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index 00c84026e57b1..3c133f7e4c1cb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -14,10 +14,10 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterables;
 import com.google.common.util.concurrent.Futures;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 0257a2debfd4e..36ae6d2ccc7fa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
@@ -33,6 +32,7 @@
 import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.common.base.Preconditions;
 import com.google.common.primitives.Ints;
 
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
index c0c4f60a5b15b..adec1f1308d21 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
@@ -14,10 +14,10 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.JAXBCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.common.base.Preconditions;
 
 import org.codehaus.stax2.XMLInputFactory2;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index e17ad9ac42aec..06f9d388e63c0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -21,7 +21,6 @@
 import static java.nio.charset.StandardCharsets.UTF_8;
 
 import com.google.api.client.googleapis.json.GoogleJsonResponseException;
-import com.google.api.client.util.Joiner;
 import com.google.api.services.dataflow.Dataflow;
 import com.google.api.services.dataflow.model.DataflowPackage;
 import com.google.api.services.dataflow.model.Job;
@@ -80,6 +79,7 @@
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.cloud.dataflow.sdk.values.POutput;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
 import com.google.common.collect.ImmutableMap;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index bdc2b6c968967..5a4b9ff895ae0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -29,7 +29,6 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 import static com.google.common.base.Preconditions.checkArgument;
 
-import com.google.api.client.util.Preconditions;
 import com.google.api.services.dataflow.model.AutoscalingSettings;
 import com.google.api.services.dataflow.model.DataflowPackage;
 import com.google.api.services.dataflow.model.Disk;
@@ -81,6 +80,7 @@
 import com.google.cloud.dataflow.sdk.values.PValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.cloud.dataflow.sdk.values.TypedPValue;
+import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
 
 import com.fasterxml.jackson.core.JsonProcessingException;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
index 227fbf67ba3ad..652bc4ac20723 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.runners.dataflow;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.io.AvroIO;
 import com.google.cloud.dataflow.sdk.io.ShardNameTemplate;
@@ -25,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.util.PathValidator;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.common.base.Preconditions;
 
 /**
  * Avro transform support code for the Dataflow backend.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
index f208dd0bbef08..abdf07fa58697 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.runners.dataflow;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.io.ShardNameTemplate;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
@@ -24,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.util.PathValidator;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.common.base.Preconditions;
 
 /**
  * TextIO transform support code for the Dataflow backend.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
index ea8462a5fd7e7..f4831bc9975c4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
@@ -19,8 +19,8 @@
 import static com.google.api.client.util.Base64.decodeBase64;
 import static com.google.api.client.util.Base64.encodeBase64URLSafeString;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShufflePosition;
+import com.google.common.base.Preconditions;
 import com.google.common.primitives.UnsignedBytes;
 
 import java.util.Arrays;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
index b4f87f04f63b8..a1dd49f078643 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
@@ -21,7 +21,6 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
 import static com.google.common.base.Preconditions.checkNotNull;
 
-import com.google.api.client.util.Preconditions;
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.api.services.dataflow.model.ConcatPosition;
 import com.google.api.services.dataflow.model.Source;
@@ -32,6 +31,8 @@
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.common.base.Preconditions;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index aee45c0df973f..a25d7647c2d6c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -22,7 +22,6 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
 
-import com.google.api.client.util.Preconditions;
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
@@ -46,6 +45,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.base.Preconditions;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
index dc2001e1f5f6f..18e401f0e4232 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
@@ -29,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.base.Preconditions;
 
 import java.io.IOException;
 import java.util.Iterator;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
index ab447642646c4..86cb099b888b0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
@@ -18,7 +18,6 @@
 
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
@@ -35,6 +34,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.base.Preconditions;
 
 import java.io.IOException;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
index f7023edaf6a6f..36ca63badc9a0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
@@ -25,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
+import com.google.common.base.Preconditions;
 
 import java.io.IOException;
 import java.util.Iterator;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
index f75d926c22ae5..efd13c2fefae8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
@@ -22,9 +22,9 @@
 import static com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.Level.TRACE;
 import static com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.Level.WARN;
 
-import com.google.api.client.util.Lists;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions;
 import com.google.common.collect.ImmutableBiMap;
+import com.google.common.collect.Lists;
 
 import java.io.File;
 import java.io.IOException;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index d8733700e4613..556f3010d13ef 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -33,6 +32,7 @@
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 import com.google.common.base.Function;
 import com.google.common.base.Optional;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.Iterables;
 
 import org.joda.time.Instant;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
index 30ea95a134f8c..2bcf28a7915a4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
-import com.google.api.client.util.Throwables;
 import com.google.cloud.dataflow.sdk.options.GcsOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
@@ -28,6 +27,7 @@
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
 
 import org.joda.time.Instant;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
index 2c0a9143757e5..ccbb723831d34 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
@@ -18,7 +18,6 @@
 
 import static com.google.cloud.dataflow.sdk.util.Structs.addObject;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
@@ -28,6 +27,7 @@
 import com.google.cloud.dataflow.sdk.util.common.Reiterator;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.cloud.dataflow.sdk.values.TupleTagList;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.Iterators;
 import com.google.common.collect.PeekingIterator;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
index 2152fcbf66562..bea289eb846ed 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
@@ -20,7 +20,6 @@
 import com.google.api.client.util.BackOff;
 import com.google.api.client.util.BackOffUtils;
 import com.google.api.client.util.Data;
-import com.google.api.client.util.Preconditions;
 import com.google.api.client.util.Sleeper;
 import com.google.api.services.bigquery.Bigquery;
 import com.google.api.services.bigquery.Bigquery.Jobs.Insert;
@@ -36,6 +35,7 @@
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
+import com.google.common.base.Preconditions;
 
 import org.joda.time.Duration;
 import org.joda.time.format.DateTimeFormat;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
index 4e17aec36005a..1af539163bdf8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
@@ -27,9 +27,9 @@
 import com.google.api.client.http.HttpTransport;
 import com.google.api.client.json.JsonFactory;
 import com.google.api.client.json.jackson2.JacksonFactory;
-import com.google.api.client.util.Preconditions;
 import com.google.api.client.util.store.FileDataStoreFactory;
 import com.google.cloud.dataflow.sdk.options.GcpOptions;
+import com.google.common.base.Preconditions;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
index 817fdc53791c4..cfb120cff35ca 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
@@ -16,10 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.common.base.Preconditions;
 
 import java.io.IOException;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 83723f7d5e573..7192ce5135f92 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
@@ -35,6 +34,7 @@
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Preconditions;
 import com.google.common.base.Throwables;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
index 901361ab85486..71e3e507fbdb8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
@@ -18,7 +18,6 @@
 
 import com.google.api.client.googleapis.json.GoogleJsonResponseException;
 import com.google.api.client.util.BackOff;
-import com.google.api.client.util.Preconditions;
 import com.google.api.client.util.Sleeper;
 import com.google.api.services.storage.Storage;
 import com.google.api.services.storage.model.Objects;
@@ -36,6 +35,7 @@
 import com.google.cloud.hadoop.util.ResilientOperation;
 import com.google.cloud.hadoop.util.RetryDeterminer;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 
 import org.slf4j.Logger;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
index c3c0568863dda..99442d0453651 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
@@ -16,9 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
 
 import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java
index 6456e4737bf3d..cacba0ea17049 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java
@@ -19,9 +19,9 @@
 import static com.google.cloud.dataflow.sdk.util.CoderUtils.decodeFromByteArray;
 import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.common.base.Preconditions;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
index 6640c4ee6d034..f72ba4c2bc188 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
@@ -16,9 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.util.gcsfs;
 
-import com.google.api.client.util.Preconditions;
-import com.google.api.client.util.Strings;
 import com.google.api.services.storage.model.StorageObject;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
 
 import java.io.File;
 import java.io.IOException;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index aaa429e14237e..707b982141692 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -20,7 +20,6 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
@@ -54,6 +53,7 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.common.base.MoreObjects;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Sets;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 64ad77202ffa5..d0010aa0d1d0d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -29,7 +29,6 @@
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
-import com.google.api.client.util.Preconditions;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.Pipeline.PipelineExecutionException;
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
@@ -51,6 +50,7 @@
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.cloud.dataflow.sdk.values.TupleTagList;
+import com.google.common.base.Preconditions;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
index d6467ad40d66e..d8605134636bb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SampleTest.java
@@ -21,7 +21,6 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
-import com.google.api.client.util.Joiner;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
@@ -29,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 
 import org.junit.Test;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
index e18681169b3dc..9592e52b11f4d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
@@ -41,7 +41,6 @@
 import com.google.api.client.testing.http.MockLowLevelHttpRequest;
 import com.google.api.client.testing.http.MockLowLevelHttpResponse;
 import com.google.api.client.util.BackOff;
-import com.google.api.client.util.Throwables;
 import com.google.api.services.storage.Storage;
 import com.google.api.services.storage.model.Bucket;
 import com.google.api.services.storage.model.Objects;
@@ -52,6 +51,7 @@
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.cloud.hadoop.gcsio.GoogleCloudStorageReadChannel;
 import com.google.cloud.hadoop.util.ClientRequestHelper;
+import com.google.common.base.Throwables;
 import com.google.common.collect.ImmutableList;
 
 import org.junit.Rule;

From b78d885ecdf85274aeee77e6a8a4a90c8b95132f Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 29 Sep 2015 13:29:04 -0700
Subject: [PATCH 1045/1541] Re-throw exceptions that happen during cleanup

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104231015
---
 .../cloud/dataflow/sdk/util/ReduceFnRunner.java    | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index aa0e6de0565bb..f283313bf3c3e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -34,8 +34,6 @@
 import com.google.common.base.Throwables;
 
 import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.util.Collection;
 import java.util.Collections;
@@ -70,8 +68,6 @@
 public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow>
     implements MergeCallback<W> {
 
-  private static final Logger LOG = LoggerFactory.getLogger(ReduceFnRunner.class);
-
   public static final String DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER = "DroppedDueToClosedWindow";
   public static final String DROPPED_DUE_TO_LATENESS_COUNTER = "DroppedDueToLateness";
 
@@ -306,7 +302,9 @@ public void onTimer(TimerData timer) {
       try {
         doCleanup(context, isAtWatermark);
       } catch (Exception e) {
-        LOG.error("Exception while garbage collecting window {}", windowNamespace.getWindow(), e);
+        Throwables.propagateIfInstanceOf(e, UserCodeException.class);
+        throw new RuntimeException(
+            "Exception while garbage collecting window " + windowNamespace.getWindow(), e);
       }
     } else {
       // Skip timers for expired windows.
@@ -375,7 +373,11 @@ private void doCleanup(
 
     // Cleanup the associated state.
     nonEmptyPanes.clearPane(context);
-    reduceFn.clearState(context);
+    try {
+      reduceFn.clearState(context);
+    } catch (Exception e) {
+      throw wrapMaybeUserException(e);
+    }
     triggerRunner.clearEverything(context);
     paneInfo.clear(context.state());
     watermarkHold.releaseOnTime(context);

From a3b5ad05e57494b771abadb0e5f3e36bec4da225 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 30 Sep 2015 15:45:35 -0700
Subject: [PATCH 1046/1541] Remove references to existing Run Configurations

When the maven archetype is used to generate the Starter Pipeline, it
will not also create Eclipse run configurations.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104343760
---
 .../archetype-resources/src/main/java/StarterPipeline.java    | 4 +---
 .../basic/reference/src/main/java/it/pkg/StarterPipeline.java | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java b/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java
index 4ed3a3277b646..ffabbc01cff62 100644
--- a/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java
+++ b/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java
@@ -33,15 +33,13 @@
  *
  * <p>To run this starter example locally using DirectPipelineRunner, just
  * execute it without any additional parameters from your favorite development
- * environment. In Eclipse, this corresponds to the existing 'LOCAL' run
- * configuration.
+ * environment.
  *
  * <p>To run this starter example using managed resource in Google Cloud
  * Platform, you should specify the following command-line options:
  *   --project=<YOUR_PROJECT_ID>
  *   --stagingLocation=<STAGING_LOCATION_IN_CLOUD_STORAGE>
  *   --runner=BlockingDataflowPipelineRunner
- * In Eclipse, you can just modify the existing 'SERVICE' run configuration.
  */
 public class StarterPipeline {
   private static final Logger LOG = LoggerFactory.getLogger(StarterPipeline.class);
diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java b/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java
index 8edfd40583262..2e7c4e1fc9850 100644
--- a/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java
+++ b/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java
@@ -33,15 +33,13 @@
  *
  * <p>To run this starter example locally using DirectPipelineRunner, just
  * execute it without any additional parameters from your favorite development
- * environment. In Eclipse, this corresponds to the existing 'LOCAL' run
- * configuration.
+ * environment.
  *
  * <p>To run this starter example using managed resource in Google Cloud
  * Platform, you should specify the following command-line options:
  *   --project=<YOUR_PROJECT_ID>
  *   --stagingLocation=<STAGING_LOCATION_IN_CLOUD_STORAGE>
  *   --runner=BlockingDataflowPipelineRunner
- * In Eclipse, you can just modify the existing 'SERVICE' run configuration.
  */
 public class StarterPipeline {
   private static final Logger LOG = LoggerFactory.getLogger(StarterPipeline.class);

From a09755cfc1c0f0eeb3a202bc30687dbc1894db61 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 30 Sep 2015 15:54:04 -0700
Subject: [PATCH 1047/1541] Explicitly provide slf4j libs by the Dataflow
 worker

The following jars will be added to the users classpath at the
beginning slf4j-api, slf4j-jdk14, jcl-over-slf4j, log4j-over-slf4j,
and log4j-to-slf4j.

They slf4j_api and slf4j_jdk14 need to be provided since they have to
match versions exactly as per http://www.slf4j.org/manual.html
"Binary compatibility". We additionally add the three bridge libs in
the case that the user submits code which exercises a JCL or Log4j
(1.x or 2.x) logger allowing us to automatically capture those logs
as well.

This change also addresses the explicit dependency on slf4j_jdk14 by
the Dataflow SDK which is considered bad practice to force onto
people as per http://www.slf4j.org/manual.html
"Declaring project dependencies for logging".

----Release Notes----
Modified scope of slf4j-jdk14 dependency to test. When a Dataflow
job is executing, the slf4j-api and slf4j-jdk14, jcl-over-slf4j
and log4j-over-slf4j, log4j-to-slf4j dependencies will be provided
by the system.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104344537
---
 examples/pom.xml |  8 --------
 sdk/pom.xml      | 14 +++++++-------
 2 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index 4adbc61d584eb..e59b8558197aa 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -449,20 +449,12 @@
       <version>2.4</version>
     </dependency>
 
-    <!-- Add slf4j API frontend binding with JUL backend -->
     <dependency>
       <groupId>org.slf4j</groupId>
       <artifactId>slf4j-api</artifactId>
       <version>1.7.7</version>
     </dependency>
 
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-jdk14</artifactId>
-      <version>1.7.7</version>
-      <scope>runtime</scope>
-    </dependency>
-
     <!-- Hamcrest and JUnit are required dependencies of DataflowAssert,
          which is used in the main code of DebuggingWordCount example. -->
 
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 3d71135636709..4c07023d673ab 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -575,19 +575,12 @@
       <version>2.4.2</version>
     </dependency>
 
-    <!-- Add slf4j API frontend binding with JUL backend -->
     <dependency>
       <groupId>org.slf4j</groupId>
       <artifactId>slf4j-api</artifactId>
       <version>1.7.7</version>
     </dependency>
 
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-jdk14</artifactId>
-      <version>1.7.7</version>
-    </dependency>
-
     <dependency>
       <groupId>org.apache.avro</groupId>
       <artifactId>avro</artifactId>
@@ -676,6 +669,13 @@
       <scope>provided</scope>
     </dependency>
 
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-jdk14</artifactId>
+      <version>1.7.7</version>
+      <scope>test</scope>
+    </dependency>
+
     <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-all</artifactId>

From 6ed2ea3e6c6265bfb869277a729e8b50c96457fa Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 30 Sep 2015 19:24:47 -0700
Subject: [PATCH 1048/1541] Add special handling for triggering in Reshuffle

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104360963
---
 .../StreamingGroupAlsoByWindowsDoFn.java      |   9 +-
 ...eamingGroupAlsoByWindowsReshuffleDoFn.java |  52 +++++
 .../cloud/dataflow/sdk/util/Reshuffle.java    |   8 +-
 .../dataflow/sdk/util/ReshuffleTrigger.java   |  63 +++++
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  11 +
 ...ngGroupAlsoByWindowsReshuffleDoFnTest.java | 217 ++++++++++++++++++
 .../sdk/util/ReshuffleTriggerTest.java        |  53 +++++
 7 files changed, 405 insertions(+), 8 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFn.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReshuffleTrigger.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReshuffleTriggerTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
index 4728df2b994c7..627964e4f4079 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
@@ -57,8 +57,13 @@ StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> create(
   DoFn<KeyedWorkItem<V>, KV<K, Iterable<V>>> createForIterable(
       final WindowingStrategy<?, W> windowingStrategy,
       final Coder<V> inputCoder) {
-    return new StreamingGABWViaWindowSetDoFn<>(
-        windowingStrategy, SystemReduceFn.<K, V, W>buffering(inputCoder));
+    // If the windowing strategy indicates we're doing a reshuffle, use the special-path.
+    if (StreamingGroupAlsoByWindowsReshuffleDoFn.isReshuffle(windowingStrategy)) {
+      return new StreamingGroupAlsoByWindowsReshuffleDoFn<>();
+    } else {
+      return new StreamingGABWViaWindowSetDoFn<>(
+          windowingStrategy, SystemReduceFn.<K, V, W>buffering(inputCoder));
+    }
   }
 
   private static class StreamingGABWViaWindowSetDoFn<K, InputT, OutputT, W extends BoundedWindow>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFn.java
new file mode 100644
index 0000000000000..39c521a8959fb
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFn.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.util.ReshuffleTrigger;
+import com.google.cloud.dataflow.sdk.util.SystemDoFnInternal;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import java.util.Collections;
+
+/**
+ * Implementation of {@link StreamingGroupAlsoByWindowsDoFn} used for the {@link ReshuffleTrigger}
+ * which outputs each element as a separate pane.
+ *
+ * @param <K> key type
+ * @param <T> value element type
+ */
+@SystemDoFnInternal
+public class StreamingGroupAlsoByWindowsReshuffleDoFn<K, T>
+    extends DoFn<KeyedWorkItem<T>, KV<K, Iterable<T>>> {
+
+  public static boolean isReshuffle(WindowingStrategy<?, ?> strategy) {
+    return strategy.getTrigger().getSpec() instanceof ReshuffleTrigger;
+  }
+
+  @Override
+  public void processElement(ProcessContext c) throws Exception {
+    @SuppressWarnings("unchecked")
+    K key = (K) c.element().key();
+    for (WindowedValue<T> item : c.element().elementsIterable()) {
+      c.windowingInternals().outputWindowedValue(
+          KV.of(key, (Iterable<T>) Collections.singletonList(item.getValue())),
+          item.getTimestamp(), item.getWindows(), item.getPane());
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java
index fd734b972bc10..25f96cab15bfb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java
@@ -19,9 +19,7 @@
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -38,8 +36,7 @@
  * @param <K> The type of key being reshuffled on.
  * @param <V> The type of value being reshuffled.
  */
-public class Reshuffle<K, V>
-  extends PTransform<PCollection<KV<K, V>>, PCollection<KV<K, V>>> {
+public class Reshuffle<K, V> extends PTransform<PCollection<KV<K, V>>, PCollection<KV<K, V>>> {
 
   private Reshuffle() {
   }
@@ -52,8 +49,7 @@ public static <K, V> Reshuffle<K, V> of() {
   public PCollection<KV<K, V>> apply(PCollection<KV<K, V>> input) {
     WindowingStrategy<?, ?> originalStrategy = input.getWindowingStrategy();
     Window.Bound<KV<K, V>> rewindow = Window
-        .<KV<K, V>>triggering(Repeatedly.<BoundedWindow>forever(
-            AfterPane.<BoundedWindow>elementCountAtLeast(1)))
+        .<KV<K, V>>triggering(new ReshuffleTrigger<>())
         .discardingFiredPanes();
     if (!originalStrategy.isAllowedLatenessSpecified()) {
       rewindow = rewindow.withAllowedLateness(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReshuffleTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReshuffleTrigger.java
new file mode 100644
index 0000000000000..f27ff380e01e9
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReshuffleTrigger.java
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
+
+import org.joda.time.Instant;
+
+import java.util.List;
+
+/**
+ * The trigger used with {@link Reshuffle} which triggers on every element
+ * and never buffers state.
+ *
+ * @param <W> The kind of window that is being reshuffled.
+ */
+public class ReshuffleTrigger<W extends BoundedWindow> extends Trigger<W> {
+
+  ReshuffleTrigger() {
+    super(null);
+  }
+
+  @Override
+  public TriggerResult onElement(Trigger<W>.OnElementContext c) {
+    return TriggerResult.FIRE;
+  }
+
+  @Override
+  public MergeResult onMerge(Trigger<W>.OnMergeContext c) {
+    // We fire on every element, so merging will never produce a pane containing new elements.
+    return MergeResult.CONTINUE;
+  }
+
+  @Override
+  public TriggerResult onTimer(Trigger<W>.OnTimerContext c) {
+    return TriggerResult.CONTINUE;
+  }
+
+  @Override
+  protected Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
+    return this;
+  }
+
+  @Override
+  public Instant getWatermarkThatGuaranteesFiring(W window) {
+    throw new UnsupportedOperationException(
+        "ReshuffleTrigger should not be used outside of Reshuffle");
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
index 430db36b2f1e1..a03dfed3c9560 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -18,6 +18,7 @@
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
@@ -27,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.coders.CollectionCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.InputMessageBundle;
@@ -45,6 +47,7 @@
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
+import com.google.cloud.dataflow.sdk.util.ReshuffleTriggerTest;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TimerInternals;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
@@ -110,6 +113,14 @@ public ExecutionContext.StepContext createStepContext(
     counters = new CounterSet();
   }
 
+  @Test public void testReshufle() throws Exception {
+    DoFn<?, ?> fn = StreamingGroupAlsoByWindowsDoFn.createForIterable(
+        WindowingStrategy.of(FixedWindows.of(Duration.standardSeconds(30)))
+        .withTrigger(ReshuffleTriggerTest.forTest()),
+        VarIntCoder.of());
+    assertTrue(fn instanceof StreamingGroupAlsoByWindowsReshuffleDoFn);
+  }
+
   @Test public void testEmpty() throws Exception {
     TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
     DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java
new file mode 100644
index 0000000000000..01c0f3359b580
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java
@@ -0,0 +1,217 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
+import com.google.cloud.dataflow.sdk.coders.CollectionCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.InputMessageBundle;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.WorkItem;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.protobuf.ByteString;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mockito;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+/** Unit tests for {@link StreamingGroupAlsoByWindowsReshuffleDoFn}. */
+@RunWith(JUnit4.class)
+public class StreamingGroupAlsoByWindowsReshuffleDoFnTest {
+  private static final String KEY = "k";
+  private static final long WORK_TOKEN = 1000L;
+  private static final String SOURCE_COMPUTATION_ID = "sourceComputationId";
+  private ExecutionContext execContext;
+  private CounterSet counters;
+
+  private Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
+  private Coder<Collection<IntervalWindow>> windowsCoder = CollectionCoder.of(windowCoder);
+
+  @Before public void setUp() {
+    execContext = new DirectModeExecutionContext() {
+      // Normally timerInternals doesn't come from the execution context, but
+      // StreamingGroupAlsoByWindows expects it to. So, hook that up.
+
+      @Override
+      public ExecutionContext.StepContext createStepContext(
+          String stepName, String transformName, StateSampler stateSampler) {
+        ExecutionContext.StepContext context =
+            Mockito.spy(super.createStepContext(stepName, transformName, stateSampler));
+        Mockito.doReturn(null).when(context).timerInternals();
+        return context;
+      }
+    };
+    counters = new CounterSet();
+  }
+
+  @Test public void testEmpty() throws Exception {
+    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
+    DoFnRunner<KeyedWorkItem<String>, KV<String, Iterable<String>>> runner =
+        makeRunner(
+            outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
+
+    runner.startBundle();
+
+    runner.finishBundle();
+
+    List<?> result = outputManager.getOutput(outputTag);
+
+    assertEquals(0, result.size());
+  }
+
+  private <V> void addElement(
+      InputMessageBundle.Builder messageBundle, Collection<IntervalWindow> windows,
+      Instant timestamp, Coder<V> valueCoder, V value) throws IOException {
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    Coder<Collection<? extends BoundedWindow>> windowsCoder =
+        (Coder) CollectionCoder.of(windowCoder);
+
+    ByteString.Output dataOutput = ByteString.newOutput();
+    valueCoder.encode(value, dataOutput, Context.OUTER);
+    messageBundle.addMessagesBuilder()
+        .setMetadata(WindmillSink.encodeMetadata(windowsCoder, windows, PaneInfo.NO_FIRING))
+        .setData(dataOutput.toByteString())
+        .setTimestamp(TimeUnit.MILLISECONDS.toMicros(timestamp.getMillis()));
+  }
+
+  private <T> WindowedValue<KeyedWorkItem<T>> createValue(
+      WorkItem.Builder workItem, Coder<T> valueCoder) {
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    Coder<Collection<? extends BoundedWindow>> wildcardWindowsCoder = (Coder) windowsCoder;
+    return WindowedValue.valueInEmptyWindows(KeyedWorkItem.workItem(
+        KEY, workItem.build(), windowCoder, wildcardWindowsCoder, valueCoder));
+  }
+
+  @Test public void testFixedWindows() throws Exception {
+    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
+    DoFnRunner<KeyedWorkItem<String>, KV<String, Iterable<String>>> runner =
+        makeRunner(
+            outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
+
+    runner.startBundle();
+
+    WorkItem.Builder workItem = WorkItem.newBuilder();
+    workItem.setKey(ByteString.copyFromUtf8(KEY));
+    workItem.setWorkToken(WORK_TOKEN);
+    InputMessageBundle.Builder messageBundle = workItem.addMessageBundlesBuilder();
+    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
+
+    Coder<String> valueCoder = StringUtf8Coder.of();
+    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(1), valueCoder, "v1");
+    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(2), valueCoder, "v2");
+    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, "v0");
+    addElement(messageBundle, Arrays.asList(window(10, 20)), new Instant(13), valueCoder, "v3");
+
+    runner.processElement(createValue(workItem, valueCoder));
+
+    runner.finishBundle();
+
+    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
+
+    assertEquals(4, result.size());
+
+    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
+    assertEquals(KEY, item0.getValue().getKey());
+    assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v1"));
+    assertEquals(new Instant(1), item0.getTimestamp());
+    assertThat(item0.getWindows(), Matchers.<BoundedWindow>contains(window(0, 10)));
+
+    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
+    assertEquals(KEY, item1.getValue().getKey());
+    assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v2"));
+    assertEquals(new Instant(2), item1.getTimestamp());
+    assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(0, 10)));
+
+    WindowedValue<KV<String, Iterable<String>>> item2 = result.get(2);
+    assertEquals(KEY, item2.getValue().getKey());
+    assertThat(item2.getValue().getValue(), Matchers.containsInAnyOrder("v0"));
+    assertEquals(new Instant(0), item2.getTimestamp());
+    assertThat(item2.getWindows(), Matchers.<BoundedWindow>contains(window(0, 10)));
+
+    WindowedValue<KV<String, Iterable<String>>> item3 = result.get(3);
+    assertEquals(KEY, item3.getValue().getKey());
+    assertThat(item3.getValue().getValue(), Matchers.containsInAnyOrder("v3"));
+    assertEquals(new Instant(13), item3.getTimestamp());
+    assertThat(item3.getWindows(), Matchers.<BoundedWindow>contains(window(10, 20)));
+  }
+
+  private DoFnRunner<KeyedWorkItem<String>, KV<String, Iterable<String>>> makeRunner(
+          TupleTag<KV<String, Iterable<String>>> outputTag,
+          DoFnRunner.OutputManager outputManager,
+          WindowingStrategy<? super String, IntervalWindow> windowingStrategy) {
+
+    DoFn<KeyedWorkItem<String>, KV<String, Iterable<String>>> fn =
+        new StreamingGroupAlsoByWindowsReshuffleDoFn<>();
+
+    return makeRunner(outputTag, outputManager, windowingStrategy, fn);
+  }
+
+  private <InputT, OutputT>
+      DoFnRunner<KeyedWorkItem<InputT>, KV<String, OutputT>> makeRunner(
+          TupleTag<KV<String, OutputT>> outputTag,
+          DoFnRunner.OutputManager outputManager,
+          WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
+          DoFn<KeyedWorkItem<InputT>, KV<String, OutputT>> fn) {
+    return
+        DoFnRunner.create(
+            PipelineOptionsFactory.create(),
+            fn,
+            NullSideInputReader.empty(),
+            outputManager,
+            outputTag,
+            new ArrayList<TupleTag<?>>(),
+            execContext.getOrCreateStepContext("merge", "merge", null),
+            counters.getAddCounterMutator(),
+            windowingStrategy);
+  }
+
+  private IntervalWindow window(long start, long end) {
+    return new IntervalWindow(new Instant(start), new Instant(end));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReshuffleTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReshuffleTriggerTest.java
new file mode 100644
index 0000000000000..4ff5f620f800b
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReshuffleTriggerTest.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link ReshuffleTrigger}.
+ */
+@RunWith(JUnit4.class)
+public class ReshuffleTriggerTest {
+
+  /** Public so that other tests can instantiate ReshufleTrigger. */
+  public static <W extends BoundedWindow> ReshuffleTrigger<W> forTest() {
+    return new ReshuffleTrigger<>();
+  }
+
+  @Test
+  public void testOnElement() {
+    assertEquals(TriggerResult.FIRE, forTest().onElement(null));
+  }
+
+  @Test
+  public void testOnMerge() {
+    assertEquals(MergeResult.CONTINUE, forTest().onMerge(null));
+  }
+
+  @Test
+  public void testOnTimer() {
+    assertEquals(TriggerResult.CONTINUE, forTest().onTimer(null));
+  }
+}

From d46b4328411b5dfebfb8766f376c6640a5cafa4f Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 30 Sep 2015 21:44:16 -0700
Subject: [PATCH 1049/1541] Update StateTag implementations of equals and
 hashCode

The previous implementation was using getId(), which was appending the
kind to the ID and doing string construction. The new implementation
compares the kind and id directly.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104367009
---
 .../dataflow/sdk/util/state/StateTags.java    | 111 +++++++++++++-----
 1 file changed, 81 insertions(+), 30 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
index 82b17415dcddf..87f3ec0b62a21 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.common.base.MoreObjects;
 
+import java.io.Serializable;
 import java.util.Objects;
 
 /**
@@ -53,7 +54,7 @@ private StateTags() { }
    * Create a simple state tag for values of type {@code T}.
    */
   public static <T> StateTag<ValueState<T>> value(String id, Coder<T> valueCoder) {
-    return new ValueStateTag<>(StateKind.USER, id, valueCoder);
+    return new ValueStateTag<>(new StructuredId(id), valueCoder);
   }
 
   /**
@@ -91,7 +92,7 @@ public static <T> StateTag<ValueState<T>> value(String id, Coder<T> valueCoder)
       String id, Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
     StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> internal =
         new CombiningValueStateTag<InputT, AccumT, OutputT>(
-            StateKind.USER, id, accumCoder, combineFn);
+            new StructuredId(id), accumCoder, combineFn);
 
     // This is a safe cast, since StateTag only supports reading, and
     // CombiningValue<InputT, OutputT> is a super-interface of
@@ -106,14 +107,14 @@ public static <T> StateTag<ValueState<T>> value(String id, Coder<T> valueCoder)
    * occasionally retrieving all the values that have been added.
    */
   public static <T> StateTag<BagState<T>> bag(String id, Coder<T> elemCoder) {
-    return new BagStateTag<T>(StateKind.USER, id, elemCoder);
+    return new BagStateTag<T>(new StructuredId(id), elemCoder);
   }
 
   /**
    * Create a state tag for holding the watermark.
    */
   public static <T> StateTag<WatermarkStateInternal> watermarkStateInternal(String id) {
-    return new WatermarkStateTagInternal(StateKind.USER, id);
+    return new WatermarkStateTagInternal(new StructuredId(id));
   }
 
   /**
@@ -128,13 +129,61 @@ public static <StateT extends State> StateTag<StateT> makeSystemTagInternal(
     return ((StateTagBase<StateT>) tag).asKind(StateKind.SYSTEM);
   }
 
-  private abstract static class StateTagBase<StateT extends State> implements StateTag<StateT> {
-
+  private static class StructuredId implements Serializable {
     private final StateKind kind;
-    protected final String id;
+    private final String rawId;
 
-    protected StateTagBase(StateKind kind, String id) {
+    private StructuredId(String rawId) {
+      this(StateKind.USER, rawId);
+    }
+
+    private StructuredId(StateKind kind, String rawId) {
       this.kind = kind;
+      this.rawId = rawId;
+    }
+
+    public StructuredId asKind(StateKind kind) {
+      return new StructuredId(kind, rawId);
+    }
+
+    public String getIdString() {
+      return kind.prefix + rawId;
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(getClass())
+          .add("id", rawId)
+          .add("kind", kind)
+          .toString();
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (obj == this) {
+        return true;
+      }
+
+      if (!(obj instanceof StructuredId)) {
+        return false;
+      }
+
+      StructuredId that = (StructuredId) obj;
+      return Objects.equals(this.kind, that.kind)
+          && Objects.equals(this.rawId, that.rawId);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(kind, rawId);
+    }
+  }
+
+  private abstract static class StateTagBase<StateT extends State> implements StateTag<StateT> {
+
+    protected final StructuredId id;
+
+    protected StateTagBase(StructuredId id) {
       this.id = id;
     }
 
@@ -143,12 +192,14 @@ protected StateTagBase(StateKind kind, String id) {
      */
     @Override
     public String getId() {
-      return kind.prefix + id;
+      return id.getIdString();
     }
 
     @Override
     public String toString() {
-      return MoreObjects.toStringHelper(getClass()).add("id", id).toString();
+      return MoreObjects.toStringHelper(getClass())
+          .add("id", id)
+          .toString();
     }
 
     protected abstract StateTag<StateT> asKind(StateKind kind);
@@ -163,8 +214,8 @@ private static class ValueStateTag<T> extends StateTagBase<ValueState<T>> {
 
     private final Coder<T> coder;
 
-    private ValueStateTag(StateKind kind, String id, Coder<T> coder) {
-      super(kind, id);
+    private ValueStateTag(StructuredId id, Coder<T> coder) {
+      super(id);
       this.coder = coder;
     }
 
@@ -184,18 +235,18 @@ public boolean equals(Object obj) {
       }
 
       ValueStateTag<?> that = (ValueStateTag<?>) obj;
-      return Objects.equals(this.getId(), that.getId())
+      return Objects.equals(this.id, that.id)
           && Objects.equals(this.coder, that.coder);
     }
 
     @Override
     public int hashCode() {
-      return Objects.hash(getClass(), getId(), coder);
+      return Objects.hash(getClass(), id, coder);
     }
 
     @Override
     protected StateTag<ValueState<T>> asKind(StateKind kind) {
-      return new ValueStateTag<T>(kind, id, coder);
+      return new ValueStateTag<T>(id.asKind(kind), coder);
     }
   }
 
@@ -213,9 +264,9 @@ private static class CombiningValueStateTag<InputT, AccumT, OutputT>
     private final CombineFn<InputT, AccumT, OutputT> combineFn;
 
     private CombiningValueStateTag(
-        StateKind kind, String id,
+        StructuredId id,
         Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
-      super(kind, id);
+      super(id);
       this.combineFn = combineFn;
       this.accumCoder = accumCoder;
     }
@@ -236,19 +287,19 @@ public boolean equals(Object obj) {
       }
 
       CombiningValueStateTag<?, ?, ?> that = (CombiningValueStateTag<?, ?, ?>) obj;
-      return Objects.equals(this.getId(), that.getId())
+      return Objects.equals(this.id, that.id)
           && Objects.equals(this.accumCoder, that.accumCoder);
     }
 
     @Override
     public int hashCode() {
-      return Objects.hash(getClass(), getId(), accumCoder);
+      return Objects.hash(getClass(), id, accumCoder);
     }
 
     @Override
     protected StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> asKind(
         StateKind kind) {
-      return new CombiningValueStateTag<>(kind, id, accumCoder, combineFn);
+      return new CombiningValueStateTag<>(id.asKind(kind), accumCoder, combineFn);
     }
   }
 
@@ -262,8 +313,8 @@ private static class BagStateTag<T> extends StateTagBase<BagState<T>> {
 
     private final Coder<T> elemCoder;
 
-    private BagStateTag(StateKind kind, String id, Coder<T> elemCoder) {
-      super(kind, id);
+    private BagStateTag(StructuredId id, Coder<T> elemCoder) {
+      super(id);
       this.elemCoder = elemCoder;
     }
 
@@ -283,25 +334,25 @@ public boolean equals(Object obj) {
       }
 
       BagStateTag<?> that = (BagStateTag<?>) obj;
-      return Objects.equals(this.getId(), that.getId())
+      return Objects.equals(this.id, that.id)
           && Objects.equals(this.elemCoder, that.elemCoder);
     }
 
     @Override
     public int hashCode() {
-      return Objects.hash(getClass(), getId(), elemCoder);
+      return Objects.hash(getClass(), id, elemCoder);
     }
 
     @Override
     protected StateTag<BagState<T>> asKind(StateKind kind) {
-      return new BagStateTag<>(kind, id, elemCoder);
+      return new BagStateTag<>(id.asKind(kind), elemCoder);
     }
   }
 
   private static class WatermarkStateTagInternal extends StateTagBase<WatermarkStateInternal> {
 
-    private WatermarkStateTagInternal(StateKind kind, String id) {
-      super(kind, id);
+    private WatermarkStateTagInternal(StructuredId id) {
+      super(id);
     }
 
     @Override
@@ -320,17 +371,17 @@ public boolean equals(Object obj) {
       }
 
       WatermarkStateTagInternal that = (WatermarkStateTagInternal) obj;
-      return Objects.equals(this.getId(), that.getId());
+      return Objects.equals(this.id, that.id);
     }
 
     @Override
     public int hashCode() {
-      return Objects.hash(getClass(), getId());
+      return Objects.hash(getClass(), id);
     }
 
     @Override
     protected StateTag<WatermarkStateInternal> asKind(StateKind kind) {
-      return new WatermarkStateTagInternal(kind, id);
+      return new WatermarkStateTagInternal(id.asKind(kind));
     }
   }
 }

From f25aa39e94cc1401fba734c22204c36ad1a557b4 Mon Sep 17 00:00:00 2001
From: hokira <hokira@google.com>
Date: Wed, 30 Sep 2015 21:47:31 -0700
Subject: [PATCH 1050/1541] Spell correction in the javadoc of WindowFn.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104367153
---
 .../cloud/dataflow/sdk/transforms/windowing/WindowFn.java       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
index 8bc7a2cacd489..a5ad99bb2b02d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
@@ -129,7 +129,7 @@ public abstract void merge(Collection<W> toBeMerged, W mergeResult)
     * <p>The result must be between {@code inputTimestamp} and {@code window.maxTimestamp()}
    * (inclusive on both sides). If this {@link WindowFn} doesn't produce overlapping windows,
    * this can (and typically should) just return {@code inputTimestamp}. If this does produce
-   * overlapping windows, it is suggested that the that the result in later overlapping windows is
+   * overlapping windows, it is suggested that the result in later overlapping windows is
    * past the end of earlier windows so that the later windows don't prevent the watermark from
    * progressing past the end of the earlier window.
    *

From ca1ebdc7844680c0e7beca5c8a7db04d42358bed Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Thu, 1 Oct 2015 14:07:32 -0700
Subject: [PATCH 1051/1541] Introduce a TriggerBuilder interface

This allows separating the expressions that produce triggers (such as
AfterWatermark.pastEndOfWindow()) from the actual trigger object.

This also allows creating builders that expand to existing triggers.

This change should have no backwards incompatibilities. Since Trigger
implements the TriggerBuilder interface, any variable of type Trigger
that a user may have created, will be passable to the Window methods
that expect a TriggerBuilder.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104433746
---
 .../sdk/transforms/windowing/Trigger.java     |  7 ++++-
 .../transforms/windowing/TriggerBuilder.java  | 29 +++++++++++++++++++
 .../sdk/transforms/windowing/Window.java      |  9 +++---
 3 files changed, 40 insertions(+), 5 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerBuilder.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 187c4bc69bd20..1bb261b407886 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -95,7 +95,7 @@
  *            {@code Trigger}
  */
 @Experimental(Experimental.Kind.TRIGGER)
-public abstract class Trigger<W extends BoundedWindow> implements Serializable {
+public abstract class Trigger<W extends BoundedWindow> implements Serializable, TriggerBuilder<W> {
 
   /**
    * {@code TriggerResult} enumerates the possible result a trigger can have when it is executed.
@@ -516,6 +516,11 @@ public Trigger<W> orFinally(OnceTrigger<W> until) {
     return new OrFinallyTrigger<W>(this, until);
   }
 
+  @Override
+  public Trigger<W> buildTrigger() {
+    return this;
+  }
+
   /**
    * {@link Trigger}s that are guaranteed to fire at most once should extend from this, rather
    * than the general {@link Trigger} class to indicate that behavior.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerBuilder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerBuilder.java
new file mode 100644
index 0000000000000..cc817ba1a3ab7
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerBuilder.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+/**
+ * Anything that can be used to create an instance of a {@code Trigger} implements this interface.
+ *
+ * <p>This includes {@code Trigger}s (which can return themselves) and any "enhanced" syntax for
+ * constructing a trigger.
+ *
+ * @param <W> The type of windows the built trigger will operate on.
+ */
+public interface TriggerBuilder<W extends BoundedWindow> {
+  /** Return the {@code Trigger} built by this builder. */
+  Trigger<W> buildTrigger();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 69c82c149b638..376dec6042534 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -201,7 +201,7 @@ public static <T> Bound<T> into(WindowFn<? super T, ?> fn) {
    * mode using either {@link #discardingFiredPanes()} or {@link #accumulatingFiredPanes()}.
    */
   @Experimental(Kind.TRIGGER)
-  public static <T> Bound<T> triggering(Trigger<?> trigger) {
+  public static <T> Bound<T> triggering(TriggerBuilder<?> trigger) {
     return new Unbound().triggering(trigger);
   }
 
@@ -296,7 +296,7 @@ public <T> Bound<T> into(WindowFn<? super T, ?> fn) {
      * mode using either {@link #discardingFiredPanes()} or {@link #accumulatingFiredPanes()}.
      */
     @Experimental(Kind.TRIGGER)
-    public <T> Bound<T> triggering(Trigger<?> trigger) {
+    public <T> Bound<T> triggering(TriggerBuilder<?> trigger) {
       return new Bound<T>(name).triggering(trigger);
     }
 
@@ -430,8 +430,9 @@ public Bound<T> named(String name) {
      * mode using either {@link #discardingFiredPanes()} or {@link #accumulatingFiredPanes()}.
      */
     @Experimental(Kind.TRIGGER)
-    public Bound<T> triggering(Trigger<?> trigger) {
-      return new Bound<T>(name, windowFn, trigger, mode, allowedLateness, closingBehavior);
+    public Bound<T> triggering(TriggerBuilder<?> trigger) {
+      return new Bound<T>(
+          name, windowFn, trigger.buildTrigger(), mode, allowedLateness, closingBehavior);
     }
 
    /**

From 8e1de2021d0f248901c1ee2c9894ec547e72f31c Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 5 Oct 2015 09:34:03 -0700
Subject: [PATCH 1052/1541] Handle exceptions from within the SDK while
 processing work

The DataflowWorkerHarness when processing work wasn't handling
general exceptions from being thrown causing the processing
threads to all die leaving a zombie worker.
----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104664789
---
 .../runners/worker/DataflowWorkerHarness.java | 26 ++++++++++++++-----
 .../worker/DataflowWorkerHarnessTest.java     | 22 +++++++++++++---
 2 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index cd713ddeb1745..89c766cdbda91 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -142,13 +142,7 @@ public Boolean call() {
       boolean success = true;
       try {
         do { // We loop getting and processing work.
-          try {
-            LOG.debug("Thread starting getAndPerformWork.");
-            success = worker.getAndPerformWork();
-            LOG.debug("{} processing one WorkItem.", success ? "Finished" : "Failed");
-          } catch (IOException e) {  // If there is a problem getting work.
-            success = false;
-          }
+          success = doWork();
           if (success) {
             backOff.reset();
           }
@@ -158,10 +152,27 @@ public Boolean call() {
         LOG.error("Already tried several attempts at working on tasks. Aborting.", e);
       } catch (InterruptedException e) {
         LOG.error("Interrupted during thread execution or sleep.", e);
+      } catch (Throwable t) {
+        LOG.error("Thread {} died.", Thread.currentThread().getId(), t);
       }
       return false;
     }
 
+    private boolean doWork() {
+      try {
+        LOG.debug("Thread starting getAndPerformWork.");
+        boolean success = worker.getAndPerformWork();
+        LOG.debug("{} processing one WorkItem.", success ? "Finished" : "Failed");
+        return success;
+      } catch (IOException e) {  // If there is a problem getting work.
+        LOG.debug("There was a problem getting work.", e);
+        return false;
+      } catch (Exception e) {  // These exceptions are caused by bugs within the SDK
+        LOG.error("There was an unhandled error caused by the Dataflow SDK.", e);
+        return false;
+      }
+    }
+
     private final DataflowWorker worker;
     private final Sleeper sleeper;
     private final BackOff backOff;
@@ -183,6 +194,7 @@ static void processWork(DataflowWorkerHarnessOptions pipelineOptions,
     LOG.debug("Waiting for {} worker threads", numThreads);
     // We wait forever unless there is a big problem.
     executor.invokeAll(tasks);
+    LOG.error("All threads died.");
   }
 
   static DataflowWorker create(DataflowWorkerHarnessOptions options) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
index 88cbbea5d4f90..66710fd22944b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
@@ -94,10 +94,8 @@ public void setUp() throws Exception {
     pipelineOptions.setGcpCredential(new TestCredential());
   }
 
-  @Test
-  public void testThatWeRetryIfTaskExecutionFailAgainAndAgain() throws Exception {
+  public void runTestThatWeRetryIfTaskExecutionFailsAgainAndAgain() throws Exception {
     final int numWorkers = Math.max(Runtime.getRuntime().availableProcessors(), 1);
-    when(mockDataflowWorker.getAndPerformWork()).thenReturn(false);
     final AtomicInteger sleepCount = new AtomicInteger(0);
     final AtomicInteger illegalIntervalCount = new AtomicInteger(0);
     DataflowWorkerHarness.processWork(
@@ -124,6 +122,24 @@ public void sleep(long millis) throws InterruptedException {
     assertEquals(0, illegalIntervalCount.get());
   }
 
+  @Test
+  public void testThatWeRetryIfTaskExecutionFailAgainAndAgain() throws Exception {
+    when(mockDataflowWorker.getAndPerformWork()).thenReturn(false);
+    runTestThatWeRetryIfTaskExecutionFailsAgainAndAgain();
+  }
+
+  @Test
+  public void testThatWeRetryIfTaskExecutionFailAgainAndAgainByIOException() throws Exception {
+    when(mockDataflowWorker.getAndPerformWork()).thenThrow(new IOException());
+    runTestThatWeRetryIfTaskExecutionFailsAgainAndAgain();
+  }
+
+  @Test
+  public void testThatWeRetryIfTaskExecutionFailAgainAndAgainByUnknownException() throws Exception {
+    when(mockDataflowWorker.getAndPerformWork()).thenThrow(new RuntimeException());
+    runTestThatWeRetryIfTaskExecutionFailsAgainAndAgain();
+  }
+
   @Test
   public void testNumberOfWorkerHarnessThreadsIsHonored() throws Exception {
     final int expectedNumberOfThreads = 5;

From b1455a8c122efd273979840c2a32478dd3ba04ff Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Mon, 5 Oct 2015 15:08:21 -0700
Subject: [PATCH 1053/1541] Version management

Declare version 1.2.0. Prepare for version 1.2.1.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104700491
---
 examples/pom.xml                  | 2 +-
 maven-archetypes/examples/pom.xml | 2 +-
 maven-archetypes/starter/pom.xml  | 2 +-
 pom.xml                           | 2 +-
 sdk/pom.xml                       | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index e59b8558197aa..5dc1b250c277b 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>com.google.cloud.dataflow</groupId>
     <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
-    <version>1.1.1-SNAPSHOT</version>
+    <version>1.2.1-SNAPSHOT</version>
   </parent>
 
   <groupId>com.google.cloud.dataflow</groupId>
diff --git a/maven-archetypes/examples/pom.xml b/maven-archetypes/examples/pom.xml
index cdb3744d89b3e..8b0d5e2c0fa7f 100644
--- a/maven-archetypes/examples/pom.xml
+++ b/maven-archetypes/examples/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>com.google.cloud.dataflow</groupId>
     <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
-    <version>1.1.1-SNAPSHOT</version>
+    <version>1.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/maven-archetypes/starter/pom.xml b/maven-archetypes/starter/pom.xml
index cdbb5137ea348..0298889abf0a8 100644
--- a/maven-archetypes/starter/pom.xml
+++ b/maven-archetypes/starter/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>com.google.cloud.dataflow</groupId>
     <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
-    <version>1.1.1-SNAPSHOT</version>
+    <version>1.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 6b56dc45a2623..a936936f25d8a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
   <url>http://cloud.google.com/dataflow</url>
   <inceptionYear>2013</inceptionYear>
 
-  <version>1.1.1-SNAPSHOT</version>
+  <version>1.2.1-SNAPSHOT</version>
 
   <licenses>
     <license>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 4c07023d673ab..da584217444aa 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>com.google.cloud.dataflow</groupId>
     <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
-    <version>1.1.1-SNAPSHOT</version>
+    <version>1.2.1-SNAPSHOT</version>
   </parent>
 
   <groupId>com.google.cloud.dataflow</groupId>

From bb0f496bb3a4e7bf8b12c0f7e7d50dc9379e8d00 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Mon, 5 Oct 2015 14:30:27 -0700
Subject: [PATCH 1054/1541] Removes unnecessary interface SourceFormat

SourceFormat has no subclasses except BasicSerializableSourceFormat,
and is never intended to get more. This change removes the entire
level of abstraction associated with SourceFormat - the interface,
SourceFormatFactory, the bridge interfaces around SourceOperation
etc. Also renames BasicSerializableSourceFormat to CustomSourceHelper.
----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104696163
---
 .../google/cloud/dataflow/sdk/io/Read.java    |  4 +-
 .../sdk/runners/DataflowPipelineRunner.java   |  4 +-
 ...leSourceFormat.java => CustomSources.java} | 67 ++++++------------
 .../sdk/runners/dataflow/ReadTranslator.java  |  2 +-
 .../sdk/runners/worker/DataflowWorker.java    | 26 +++----
 .../sdk/runners/worker/ReaderFactory.java     |  6 +-
 .../runners/worker/SourceFormatFactory.java   | 49 -------------
 .../worker/SourceOperationExecutor.java       | 47 ++-----------
 .../worker/SourceTranslationUtils.java        | 68 -------------------
 .../worker/StreamingDataflowWorker.java       |  4 +-
 .../worker/StreamingModeExecutionContext.java |  4 +-
 .../sdk/util/common/worker/SourceFormat.java  | 57 ----------------
 ...FormatTest.java => CustomSourcesTest.java} | 29 ++++----
 .../worker/StreamingDataflowWorkerTest.java   |  4 +-
 14 files changed, 63 insertions(+), 308 deletions(-)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/{BasicSerializableSourceFormat.java => CustomSources.java} (90%)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFormatFactory.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/SourceFormat.java
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/{BasicSerializableSourceFormatTest.java => CustomSourcesTest.java} (95%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
index d1f5d325a20ee..f4936a2375fa7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
@@ -20,7 +20,7 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
+import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -151,7 +151,7 @@ private static void registerDefaultTransformEvaluator() {
             @Override
             public void evaluate(
                 Bounded transform, DirectPipelineRunner.EvaluationContext context) {
-              BasicSerializableSourceFormat.evaluateReadHelper(transform, context);
+              CustomSources.evaluateReadHelper(transform, context);
             }
           });
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 06f9d388e63c0..2f9d7b990accd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -42,7 +42,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.JobSpecification;
-import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
+import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
 import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowAggregatorTransforms;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
@@ -599,7 +599,7 @@ private static class ReadWithIdsTranslator
       @Override
       public void translate(ReadWithIds<?> transform,
           DataflowPipelineTranslator.TranslationContext context) {
-        BasicSerializableSourceFormat.translateReadHelper(
+        CustomSources.translateReadHelper(
             transform.getSource(), transform, context);
       }
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
similarity index 90%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
index 026d6c49dea53..a28d2c144b5d4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
@@ -18,9 +18,7 @@
 
 import static com.google.api.client.util.Base64.decodeBase64;
 import static com.google.api.client.util.Base64.encodeBase64String;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceOperationResponseToSourceOperationResponse;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceToDictionary;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceOperationRequestToCloudSourceOperationRequest;
 import static com.google.cloud.dataflow.sdk.util.SerializableUtils.deserializeFromByteArray;
 import static com.google.cloud.dataflow.sdk.util.SerializableUtils.serializeToByteArray;
 import static com.google.cloud.dataflow.sdk.util.Structs.addString;
@@ -33,8 +31,6 @@
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.api.services.dataflow.model.DerivedSource;
 import com.google.api.services.dataflow.model.DynamicSourceSplit;
-import com.google.api.services.dataflow.model.SourceGetMetadataRequest;
-import com.google.api.services.dataflow.model.SourceGetMetadataResponse;
 import com.google.api.services.dataflow.model.SourceMetadata;
 import com.google.api.services.dataflow.model.SourceOperationRequest;
 import com.google.api.services.dataflow.model.SourceOperationResponse;
@@ -62,7 +58,6 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
-import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
 import com.google.cloud.dataflow.sdk.values.PValue;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
@@ -84,11 +79,10 @@
 /**
  * A helper class for supporting sources defined as {@code Source}.
  *
- * <p>Provides a bridge between the high-level {@code Source} API and the raw
- * API-level {@code SourceFormat} API, by encoding the serialized
- * {@code Source} in a parameter of the API {@code Source} message.
+ * <p>Provides a bridge between the high-level {@code Source} API and the
+ * low-level {@code CloudSource} class.
  */
-public class BasicSerializableSourceFormat implements SourceFormat {
+public class CustomSources {
   private static final String SERIALIZED_SOURCE = "serialized_source";
   @VisibleForTesting static final String SERIALIZED_SOURCE_SPLITS = "serialized_source_splits";
   private static final long DEFAULT_DESIRED_BUNDLE_SIZE_BYTES = 64 * (1 << 20);
@@ -102,13 +96,7 @@ public class BasicSerializableSourceFormat implements SourceFormat {
   // Maximum number of custom source splits currently supported by Dataflow.
   private static final int MAX_NUMBER_OF_SPLITS = 16000;
 
-  private static final Logger LOG = LoggerFactory.getLogger(BasicSerializableSourceFormat.class);
-
-  private final PipelineOptions options;
-
-  public BasicSerializableSourceFormat(PipelineOptions options) {
-    this.options = options;
-  }
+  private static final Logger LOG = LoggerFactory.getLogger(CustomSources.class);
 
   /**
    * A {@code DynamicSplitResult} specified explicitly by a pair of {@code BoundedSource}
@@ -156,23 +144,20 @@ public static DynamicSourceSplit toSourceSplit(
    * by deserializing its source to a {@code BoundedSource}, splitting it, and
    * serializing results back.
    */
-  @Override
-  public OperationResponse performSourceOperation(OperationRequest request) throws Exception {
-    SourceOperationRequest cloudRequest =
-        sourceOperationRequestToCloudSourceOperationRequest(request);
-    SourceOperationResponse cloudResponse = new SourceOperationResponse();
-    if (cloudRequest.getGetMetadata() != null) {
-      cloudResponse.setGetMetadata(performGetMetadata(cloudRequest.getGetMetadata()));
-    } else if (cloudRequest.getSplit() != null) {
-      cloudResponse.setSplit(performSplit(cloudRequest.getSplit()));
+  public static SourceOperationResponse performSourceOperation(
+      SourceOperationRequest request, PipelineOptions options) throws Exception {
+    SourceOperationResponse response = new SourceOperationResponse();
+    if (request.getSplit() != null) {
+      response.setSplit(performSplit(request.getSplit(), options));
     } else {
-      throw new UnsupportedOperationException("Unknown source operation request");
+      throw new UnsupportedOperationException(
+          "Unsupported source operation request: " + request);
     }
-    return cloudSourceOperationResponseToSourceOperationResponse(cloudResponse);
+    return response;
   }
 
   /**
-   * Factory to create a {@link BasicSerializableSourceFormat} from a Dataflow API
+   * Factory to create a {@link CustomSources} from a Dataflow API
    * source specification.
    */
   public static class Factory implements ReaderFactory {
@@ -188,7 +173,7 @@ public Reader<?> create(
       // The parameter "coder" is deliberately never used. It is an artifact of ReaderFactory:
       // some readers need a coder, some don't (i.e. for some it doesn't even make sense),
       // but ReaderFactory passes it to all readers anyway.
-      return BasicSerializableSourceFormat.create(spec, options, executionContext);
+      return CustomSources.create(spec, options, executionContext);
     }
   }
 
@@ -297,7 +282,9 @@ private UnboundedSource<T, UnboundedSource.CheckpointMark> parseSource(int index
     }
   }
 
-  private SourceSplitResponse performSplit(SourceSplitRequest request) throws Exception {
+  private static SourceSplitResponse performSplit(
+      SourceSplitRequest request, PipelineOptions options)
+      throws Exception {
     Source<?> anySource = deserializeFromCloudSource(request.getSource().getSpec());
     if (!(anySource instanceof BoundedSource)) {
       throw new UnsupportedOperationException("Cannot split a non-Bounded source: " + anySource);
@@ -349,20 +336,6 @@ private SourceSplitResponse performSplit(SourceSplitRequest request) throws Exce
     return response;
   }
 
-  private SourceGetMetadataResponse performGetMetadata(SourceGetMetadataRequest request)
-      throws Exception {
-    Source<?> source = deserializeFromCloudSource(request.getSource().getSpec());
-    SourceMetadata metadata = new SourceMetadata();
-    if (source instanceof BoundedSource) {
-      BoundedSource<?> boundedSource = (BoundedSource<?>) source;
-      metadata.setProducesSortedKeys(boundedSource.producesSortedKeys(options));
-      metadata.setEstimatedSizeBytes(boundedSource.getEstimatedSizeBytes(options));
-    }
-    SourceGetMetadataResponse response = new SourceGetMetadataResponse();
-    response.setMetadata(metadata);
-    return response;
-  }
-
   public static Source<?> deserializeFromCloudSource(Map<String, Object> spec) throws Exception {
     Source<?> source = (Source<?>) deserializeFromByteArray(
         Base64.decodeBase64(getString(spec, SERIALIZED_SOURCE)), "Source");
@@ -390,7 +363,7 @@ public static com.google.api.services.dataflow.model.Source serializeToCloudSour
     com.google.api.services.dataflow.model.Source cloudSource =
         new com.google.api.services.dataflow.model.Source();
     // We ourselves act as the SourceFormat.
-    cloudSource.setSpec(CloudObject.forClass(BasicSerializableSourceFormat.class));
+    cloudSource.setSpec(CloudObject.forClass(CustomSources.class));
     addString(
         cloudSource.getSpec(), SERIALIZED_SOURCE, encodeBase64String(serializeToByteArray(source)));
 
@@ -655,7 +628,9 @@ public boolean hasNext() throws IOException {
         }
         try {
           Thread.sleep(nextBackoff);
-        } catch (InterruptedException e) {}
+        } catch (InterruptedException e) {
+          // ignore.
+        }
       }
       return true;
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
index 212aa97af2ff2..a7d0914da2dfa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
@@ -27,6 +27,6 @@
 public class ReadTranslator implements TransformTranslator<Read.Bounded<?>> {
   @Override
   public void translate(Read.Bounded<?> transform, TranslationContext context) {
-    BasicSerializableSourceFormat.translateReadHelper(transform.getSource(), transform, context);
+    CustomSources.translateReadHelper(transform.getSource(), transform, context);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 52f61eb0ca143..97effae3dfcf9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -18,22 +18,21 @@
 
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceOperationExecutor.SPLIT_RESPONSE_TOO_LARGE_ERROR;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceOperationExecutor.isSplitResponseTooLarge;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceOperationResponseToSourceOperationResponse;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceOperationResponseToCloudSourceOperationResponse;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
 
 import com.google.api.client.googleapis.json.GoogleJsonResponseException;
 import com.google.api.services.dataflow.model.MetricStructuredName;
 import com.google.api.services.dataflow.model.MetricUpdate;
 import com.google.api.services.dataflow.model.SideInputInfo;
+import com.google.api.services.dataflow.model.SourceOperationResponse;
 import com.google.api.services.dataflow.model.Status;
 import com.google.api.services.dataflow.model.WorkItem;
 import com.google.api.services.dataflow.model.WorkItemServiceState;
 import com.google.api.services.dataflow.model.WorkItemStatus;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
+import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingHandler;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudCounterUtils;
@@ -46,7 +45,6 @@
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.Metric;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
-import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -155,7 +153,7 @@ private boolean doWork(WorkItem workItem) throws IOException {
     LOG.debug("Executing: {}", workItem);
 
     WorkExecutor worker = null;
-    SourceFormat.OperationResponse operationResponse = null;
+    SourceOperationResponse operationResponse = null;
     long nextReportIndex = workItem.getInitialReportIndex();
     try {
       // Populate PipelineOptions with data from work unit.
@@ -214,8 +212,7 @@ private boolean doWork(WorkItem workItem) throws IOException {
       // into the work update.
       operationResponse =
           (worker instanceof SourceOperationExecutor)
-              ? cloudSourceOperationResponseToSourceOperationResponse(
-                  ((SourceOperationExecutor) worker).getResponse())
+              ? ((SourceOperationExecutor) worker).getResponse()
               : null;
 
       try {
@@ -283,7 +280,7 @@ private void handleWorkError(WorkItem workItem, WorkExecutor worker, long nextRe
 
   private void reportStatus(DataflowWorkerHarnessOptions options, String status, WorkItem workItem,
       @Nullable CounterSet counters, @Nullable Collection<Metric<?>> metrics,
-      @Nullable SourceFormat.OperationResponse operationResponse, @Nullable List<Status> errors,
+      @Nullable SourceOperationResponse operationResponse, @Nullable List<Status> errors,
       long reportIndex)
       throws IOException {
     String message = "{} processing work item {}";
@@ -301,7 +298,7 @@ static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
       @Nullable CounterSet counters, @Nullable Collection<Metric<?>> metrics,
       DataflowWorkerHarnessOptions options, @Nullable Reader.Progress progress,
       @Nullable Reader.DynamicSplitResult dynamicSplitResult,
-      @Nullable SourceFormat.OperationResponse operationResponse, @Nullable List<Status> errors,
+      @Nullable SourceOperationResponse operationResponse, @Nullable List<Status> errors,
       long reportIndex) {
 
     return buildStatus(workItem, completed, counters, metrics, options, progress,
@@ -312,7 +309,7 @@ static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
       @Nullable CounterSet counters, @Nullable Collection<Metric<?>> metrics,
       DataflowWorkerHarnessOptions options, @Nullable Reader.Progress progress,
       @Nullable Reader.DynamicSplitResult dynamicSplitResult,
-      @Nullable SourceFormat.OperationResponse operationResponse, @Nullable List<Status> errors,
+      @Nullable SourceOperationResponse operationResponse, @Nullable List<Status> errors,
       long reportIndex,
       @Nullable StateSampler.StateSamplerInfo stateSamplerInfo) {
     WorkItemStatus status = new WorkItemStatus();
@@ -378,18 +375,17 @@ static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
       Reader.DynamicSplitResultWithPosition asPosition =
           (Reader.DynamicSplitResultWithPosition) dynamicSplitResult;
       status.setStopPosition(toCloudPosition(asPosition.getAcceptedPosition()));
-    } else if (dynamicSplitResult instanceof BasicSerializableSourceFormat.BoundedSourceSplit) {
+    } else if (dynamicSplitResult instanceof CustomSources.BoundedSourceSplit) {
       status.setDynamicSourceSplit(
-          BasicSerializableSourceFormat.toSourceSplit(
-              (BasicSerializableSourceFormat.BoundedSourceSplit<?>) dynamicSplitResult, options));
+          CustomSources.toSourceSplit(
+              (CustomSources.BoundedSourceSplit<?>) dynamicSplitResult, options));
     } else if (dynamicSplitResult != null) {
       throw new IllegalArgumentException(
           "Unexpected type of dynamic split result: " + dynamicSplitResult);
     }
 
     if (workItem.getSourceOperationTask() != null) {
-      status.setSourceOperationResponse(
-          sourceOperationResponseToCloudSourceOperationResponse(operationResponse));
+      status.setSourceOperationResponse(operationResponse);
     }
 
     return status;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
index dfcf39bfb443a..26bbd699c541a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
@@ -19,7 +19,7 @@
 import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
+import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
@@ -95,8 +95,8 @@ public static Registry defaultRegistry() {
 
       // Custom sources
       factories.put(
-          "com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat",
-          new BasicSerializableSourceFormat.Factory());
+          "com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources",
+          new CustomSources.Factory());
 
       return new Registry(factories);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFormatFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFormatFactory.java
deleted file mode 100644
index 301a091fd991d..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceFormatFactory.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-
-import com.google.api.services.dataflow.model.Source;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
-
-import java.util.Map;
-
-/**
- * Creates {@code SourceFormat} objects from {@code Source}.
- */
-public class SourceFormatFactory {
-  private SourceFormatFactory() {}
-
-  public static SourceFormat create(PipelineOptions options, Source source) throws Exception {
-    Map<String, Object> spec = source.getSpec();
-
-    try {
-      return InstanceBuilder.ofType(SourceFormat.class)
-          .fromClassName(getString(spec, PropertyNames.OBJECT_TYPE_NAME))
-          .withArg(PipelineOptions.class, options)
-          .build();
-
-    } catch (ClassNotFoundException exn) {
-      throw new Exception(
-          "unable to create a source format from " + source, exn);
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
index d153c446ac45d..f66c4d6e16574 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
@@ -16,18 +16,12 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceOperationRequestToSourceOperationRequest;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceOperationResponseToCloudSourceOperationResponse;
-
-import com.google.api.services.dataflow.model.Source;
 import com.google.api.services.dataflow.model.SourceOperationRequest;
 import com.google.api.services.dataflow.model.SourceOperationResponse;
 import com.google.api.services.dataflow.model.SourceSplitResponse;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.DataflowSourceOperationResponse;
+import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
-import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
 import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
 
 import org.slf4j.Logger;
@@ -40,7 +34,7 @@
  */
 @SuppressWarnings("resource")
 public class SourceOperationExecutor extends WorkExecutor {
-  private static final Logger LOG = LoggerFactory.getLogger(MapTaskExecutor.class);
+  private static final Logger LOG = LoggerFactory.getLogger(SourceOperationExecutor.class);
   public static final String SPLIT_RESPONSE_TOO_LARGE_ERROR =
       "Total size of the BoundedSource objects generated by splitIntoBundles() operation is larger"
       + " than the allowable limit. For more information, please check the corresponding FAQ"
@@ -65,21 +59,7 @@ public SourceOperationExecutor(PipelineOptions options,
   @Override
   public void execute() throws Exception {
     LOG.debug("Executing source operation");
-
-    Source sourceSpec;
-    if (request.getGetMetadata() != null) {
-      sourceSpec = request.getGetMetadata().getSource();
-    } else if (request.getSplit() != null) {
-      sourceSpec = request.getSplit().getSource();
-    } else {
-      throw new UnsupportedOperationException("Unknown source operation");
-    }
-
-    this.response = sourceOperationResponseToCloudSourceOperationResponse(
-        SourceFormatFactory.create(options, sourceSpec)
-            .performSourceOperation(
-                cloudSourceOperationRequestToSourceOperationRequest(request)));
-
+    this.response = CustomSources.performSourceOperation(request, options);
     LOG.debug("Source operation execution complete");
   }
 
@@ -87,16 +67,9 @@ public SourceOperationResponse getResponse() {
     return response;
   }
 
-  static boolean isSplitResponseTooLarge(SourceFormat.OperationResponse operationResponse) {
-    return isSplitOperationResponse(operationResponse)
-        && isSplitOperationTooLargeForDataflowService(operationResponse);
-  }
-
-  private static boolean isSplitOperationTooLargeForDataflowService(
-      SourceFormat.OperationResponse operationResponse) {
+  static boolean isSplitResponseTooLarge(SourceOperationResponse operationResponse) {
     try {
-      SourceSplitResponse splitResponse =
-          ((DataflowSourceOperationResponse) operationResponse).cloudResponse.getSplit();
+      SourceSplitResponse splitResponse = operationResponse.getSplit();
       int size = splitResponse.getFactory().toByteArray(operationResponse).length;
       return size >= SOURCE_OPERATION_RESPONSE_SIZE_LIMIT_MB * 1024 * 1024;
     } catch (OutOfMemoryError e) {
@@ -107,14 +80,4 @@ private static boolean isSplitOperationTooLargeForDataflowService(
       throw new RuntimeException(e);
     }
   }
-
-  private static boolean isSplitOperationResponse(
-      SourceFormat.OperationResponse operationResponse) {
-    if (operationResponse instanceof DataflowSourceOperationResponse) {
-      return (
-          ((DataflowSourceOperationResponse) operationResponse).cloudResponse.getSplit() != null);
-    }
-
-    return false;
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
index b5f451e80eed0..8506dad152ad7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
@@ -25,11 +25,8 @@
 import com.google.api.services.dataflow.model.Position;
 import com.google.api.services.dataflow.model.Source;
 import com.google.api.services.dataflow.model.SourceMetadata;
-import com.google.api.services.dataflow.model.SourceOperationRequest;
-import com.google.api.services.dataflow.model.SourceOperationResponse;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
-import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -50,17 +47,6 @@ public static Reader.Position cloudPositionToReaderPosition(@Nullable Position c
     return cloudPosition == null ? null : new DataflowReaderPosition(cloudPosition);
   }
 
-  public static SourceFormat.OperationRequest cloudSourceOperationRequestToSourceOperationRequest(
-      @Nullable SourceOperationRequest request) {
-    return request == null ? null : new DataflowSourceOperationRequest(request);
-  }
-
-  public static SourceFormat.OperationResponse
-      cloudSourceOperationResponseToSourceOperationResponse(
-          @Nullable SourceOperationResponse response) {
-    return response == null ? null : new DataflowSourceOperationResponse(response);
-  }
-
   public static ApproximateProgress readerProgressToCloudProgress(
       @Nullable Reader.Progress readerProgress) {
     return readerProgress == null ? null : ((DataflowReaderProgress) readerProgress).cloudProgress;
@@ -70,21 +56,6 @@ public static Position toCloudPosition(@Nullable Reader.Position readerPosition)
     return readerPosition == null ? null : ((DataflowReaderPosition) readerPosition).cloudPosition;
   }
 
-
-  public static SourceOperationRequest sourceOperationRequestToCloudSourceOperationRequest(
-      @Nullable SourceFormat.OperationRequest request) {
-    return (request == null) ? null : ((DataflowSourceOperationRequest) request).cloudRequest;
-  }
-
-  public static SourceOperationResponse sourceOperationResponseToCloudSourceOperationResponse(
-      @Nullable SourceFormat.OperationResponse response) {
-    return (response == null) ? null : ((DataflowSourceOperationResponse) response).cloudResponse;
-  }
-
-  public static Source sourceSpecToCloudSource(@Nullable SourceFormat.SourceSpec spec) {
-    return (spec == null) ? null : ((DataflowSourceSpec) spec).cloudSource;
-  }
-
   public static ApproximateProgress splitRequestToApproximateProgress(
       @Nullable Reader.DynamicSplitRequest splitRequest) {
     return (splitRequest == null)
@@ -123,45 +94,6 @@ public String toString() {
     }
   }
 
-  static class DataflowSourceOperationRequest implements SourceFormat.OperationRequest {
-    public final SourceOperationRequest cloudRequest;
-
-    public DataflowSourceOperationRequest(SourceOperationRequest cloudRequest) {
-      this.cloudRequest = cloudRequest;
-    }
-
-    @Override
-    public String toString() {
-      return String.valueOf(cloudRequest);
-    }
-  }
-
-  static class DataflowSourceOperationResponse implements SourceFormat.OperationResponse {
-    public final SourceOperationResponse cloudResponse;
-
-    public DataflowSourceOperationResponse(SourceOperationResponse cloudResponse) {
-      this.cloudResponse = cloudResponse;
-    }
-
-    @Override
-    public String toString() {
-      return String.valueOf(cloudResponse);
-    }
-  }
-
-  static class DataflowSourceSpec implements SourceFormat.SourceSpec {
-    public final Source cloudSource;
-
-    public DataflowSourceSpec(Source cloudSource) {
-      this.cloudSource = cloudSource;
-    }
-
-    @Override
-    public String toString() {
-      return String.valueOf(cloudSource);
-    }
-  }
-
   // Represents a cloud Source as a dictionary for encoding inside the {@code SOURCE_STEP_INPUT}
   // property of CloudWorkflowStep.input.
   public static Map<String, Object> cloudSourceToDictionary(Source source) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index c0958396c041c..b938880bfc8d9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -22,7 +22,7 @@
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
+import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingInitializer;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
@@ -499,7 +499,7 @@ private void process(
 
         // If using a custom source, count bytes read for autoscaling.
         ParallelInstruction read = mapTask.getInstructions().get(0);
-        if (BasicSerializableSourceFormat.class.getName().equals(
+        if (CustomSources.class.getName().equals(
                 read.getRead().getSource().getSpec().get("@type"))) {
           readOperation.receivers[0].addOutputCounter(
               new OutputObjectAndByteCounter(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
index a158688daa575..279c53b7f2b27 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
@@ -19,7 +19,7 @@
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
-import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
+import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
 import com.google.cloud.dataflow.sdk.runners.worker.StateFetcher.SideInputState;
 import com.google.cloud.dataflow.sdk.runners.worker.StreamingDataflowWorker.ReaderCacheEntry;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
@@ -247,7 +247,7 @@ public void run() {
 
       long backlogBytes = activeReader.getSplitBacklogBytes();
       if (backlogBytes == UnboundedSource.UnboundedReader.BACKLOG_UNKNOWN
-          && BasicSerializableSourceFormat.isFirstUnboundedSourceSplit(getSerializedKey())) {
+          && CustomSources.isFirstUnboundedSourceSplit(getSerializedKey())) {
         // Only call getTotalBacklogBytes() on the first split.
         backlogBytes = activeReader.getTotalBacklogBytes();
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/SourceFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/SourceFormat.java
deleted file mode 100644
index 80a1428d89fa8..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/SourceFormat.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-/**
- * An interface for sources that can perform operations on source specifications, such as
- * splitting the source and computing its metadata. See {@code SourceOperationRequest} for details.
- */
-public interface SourceFormat {
-  /**
-   * Performs an operation on the specification of a source.
-   * See {@code SourceOperationRequest} for details.
-   */
-  public OperationResponse performSourceOperation(OperationRequest operation) throws Exception;
-
-  /**
-   * A representation of an operation on the specification of a source,
-   * e.g. splitting a source into shards, getting the metadata of a source,
-   * etc.
-   *
-   * <p>The common worker framework does not interpret instances of
-   * this interface.  But a tool-specific framework can make assumptions
-   * about the implementation, and so the concrete Source subclasses used
-   * by a tool-specific framework should match.
-   */
-  public interface OperationRequest {}
-
-  /**
-   * A representation of the result of a SourceOperationRequest.
-   *
-   * <p>See the comment on {@link OperationRequest} for how instances of this
-   * interface are used by the rest of the framework.
-   */
-  public interface OperationResponse {}
-
-  /**
-   * A representation of a specification of a source.
-   *
-   * <p>See the comment on {@link OperationRequest} for how instances of this
-   * interface are used by the rest of the framework.
-   */
-  public interface SourceSpec {}
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
similarity index 95%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
index cebc8f22af68d..6687bdd53a827 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/BasicSerializableSourceFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
@@ -18,10 +18,8 @@
 
 import static com.google.api.client.util.Base64.decodeBase64;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtFraction;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceOperationRequestToSourceOperationRequest;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.dictionaryToCloudSource;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.sourceOperationResponseToCloudSourceOperationResponse;
 import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.readFromSource;
 import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
 import static com.google.cloud.dataflow.sdk.util.SerializableUtils.deserializeFromByteArray;
@@ -47,6 +45,7 @@
 import com.google.api.services.dataflow.model.DerivedSource;
 import com.google.api.services.dataflow.model.Job;
 import com.google.api.services.dataflow.model.SourceOperationRequest;
+import com.google.api.services.dataflow.model.SourceOperationResponse;
 import com.google.api.services.dataflow.model.SourceSplitRequest;
 import com.google.api.services.dataflow.model.SourceSplitResponse;
 import com.google.api.services.dataflow.model.Step;
@@ -79,7 +78,6 @@
 import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
-import com.google.cloud.dataflow.sdk.util.common.worker.SourceFormat;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.Preconditions;
@@ -106,7 +104,7 @@
  * Tests for {@code BasicSerializableSourceFormat}.
  */
 @RunWith(JUnit4.class)
-public class BasicSerializableSourceFormatTest {
+public class CustomSourcesTest {
   @Rule
   public ExpectedException expectedException = ExpectedException.none();
 
@@ -378,15 +376,15 @@ public void testProgressAndSourceSplitTranslation() throws Exception {
 
       assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(0)));
       assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(0.1f)));
-      BasicSerializableSourceFormat.BoundedSourceSplit<Integer> sourceSplit =
-          (BasicSerializableSourceFormat.BoundedSourceSplit<Integer>)
+      CustomSources.BoundedSourceSplit<Integer> sourceSplit =
+          (CustomSources.BoundedSourceSplit<Integer>)
               iterator.requestDynamicSplit(splitRequestAtFraction(0.5f));
       assertNotNull(sourceSplit);
       assertThat(readFromSource(sourceSplit.primary, options), contains(10, 11, 12, 13, 14));
       assertThat(readFromSource(sourceSplit.residual, options), contains(15, 16, 17, 18, 19));
 
       sourceSplit =
-          (BasicSerializableSourceFormat.BoundedSourceSplit<Integer>)
+          (CustomSources.BoundedSourceSplit<Integer>)
               iterator.requestDynamicSplit(splitRequestAtFraction(0.8f));
       assertNotNull(sourceSplit);
       assertThat(readFromSource(sourceSplit.primary, options), contains(10, 11, 12, 13));
@@ -596,11 +594,8 @@ private static SourceSplitResponse performSplit(
     splitRequest.setSource(source);
     SourceOperationRequest request = new SourceOperationRequest();
     request.setSplit(splitRequest);
-    SourceFormat.OperationRequest request1 =
-        cloudSourceOperationRequestToSourceOperationRequest(request);
-    SourceFormat.OperationResponse response =
-        new BasicSerializableSourceFormat(options).performSourceOperation(request1);
-    return sourceOperationResponseToCloudSourceOperationResponse(response).getSplit();
+    SourceOperationResponse response = CustomSources.performSourceOperation(request, options);
+    return response.getSplit();
   }
 
   @Test
@@ -608,10 +603,10 @@ public void testUnboundedSplits() throws Exception {
     DataflowPipelineOptions options =
         PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
     com.google.api.services.dataflow.model.Source source =
-        BasicSerializableSourceFormat.serializeToCloudSource(
+        CustomSources.serializeToCloudSource(
             new CountingSource(Integer.MAX_VALUE), options);
     List<String> serializedSplits =
-        getStrings(source.getSpec(), BasicSerializableSourceFormat.SERIALIZED_SOURCE_SPLITS, null);
+        getStrings(source.getSpec(), CustomSources.SERIALIZED_SOURCE_SPLITS, null);
     assertEquals(20, serializedSplits.size());
     for (String serializedSplit : serializedSplits) {
       assertTrue(
@@ -648,10 +643,10 @@ public void testReadUnboundedReader() throws Exception {
 
       @SuppressWarnings({"unchecked", "rawtypes"})
       Reader<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>> reader = (Reader)
-          BasicSerializableSourceFormat.create(
-              (CloudObject) BasicSerializableSourceFormat.serializeToCloudSource(
+          CustomSources.create(
+              (CloudObject) CustomSources.serializeToCloudSource(
                   new CountingSource(Integer.MAX_VALUE), options)
-              .getSpec(),
+                  .getSpec(),
               options,
               context);
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 3677fd621768d..50d8b92102e25 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -48,8 +48,8 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.dataflow.BasicSerializableSourceFormat;
 import com.google.cloud.dataflow.sdk.runners.dataflow.CountingSource;
+import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
 import com.google.cloud.dataflow.sdk.runners.worker.KeyedWorkItem.KeyedWorkItemCoder;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
@@ -971,7 +971,7 @@ public void testUnboundedSources() throws Exception {
                 .setRead(
                     new ReadInstruction()
                         .setSource(
-                            BasicSerializableSourceFormat.serializeToCloudSource(
+                            CustomSources.serializeToCloudSource(
                                 new CountingSource(1), options)))
                 .setOutputs(
                     Arrays.asList(

From 8d1b982d9a3bbd338a1162e7bd0217cdd02f311e Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Mon, 5 Oct 2015 17:47:23 -0700
Subject: [PATCH 1055/1541] Support dynamic work rebalancing for Avro files

Use the custom source implementation of Avro for reading from sources created
using AvroIO. The custom source implementation of Avro supports dynamic work
rebalancing, while the native implementation did not.

Also adds support for using AvroSource with an updated schema.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104716578
---
 .../cloud/dataflow/sdk/coders/AvroCoder.java  |  16 +-
 .../google/cloud/dataflow/sdk/io/AvroIO.java  |   2 +-
 .../cloud/dataflow/sdk/io/AvroSource.java     |  90 ++++++--
 .../dataflow/sdk/io/FileBasedSource.java      |   3 +-
 .../sdk/runners/worker/AvroByteReader.java    |  12 +-
 .../sdk/runners/worker/AvroReader.java        | 215 +++++++-----------
 .../sdk/runners/worker/AvroReaderFactory.java |  17 +-
 .../cloud/dataflow/sdk/io/AvroSourceTest.java | 121 ++++++++--
 .../runners/worker/AvroByteReaderTest.java    |  27 ++-
 .../sdk/runners/worker/AvroByteSinkTest.java  |  32 +--
 .../runners/worker/AvroReaderFactoryTest.java |   6 +-
 .../sdk/runners/worker/AvroReaderTest.java    | 179 +++++++++++++--
 .../sdk/runners/worker/AvroSinkTest.java      |  25 +-
 .../worker/ConcatReaderFactoryTest.java       |   6 +-
 .../sdk/runners/worker/ConcatReaderTest.java  |   8 +-
 .../sdk/runners/worker/ReaderTestUtils.java   |  53 ++++-
 16 files changed, 538 insertions(+), 274 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
index d25bbc0d4614e..081a752687cd8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -222,6 +222,13 @@ public String getEncodingId() {
     return type.getName();
   }
 
+  /**
+   * Returns the type this coder encodes/decodes.
+   */
+  public Class<T> getType() {
+    return type;
+  }
+
   private Object writeReplace() {
     // When serialized by Java, instances of AvroCoder should be replaced by
     // a SerializedAvroCoderProxy.
@@ -269,8 +276,12 @@ public void verifyDeterministic() throws NonDeterministicException {
 
   /**
    * Returns a new DatumReader that can be used to read from
-   * an Avro file directly.
+   * an Avro file directly. Assumes the schema used to read is
+   * the same as the schema that was used when writing.
+   *
+   * @deprecated For {@code AvroCoder} internal use only.
    */
+  @Deprecated
   public DatumReader<T> createDatumReader() {
     if (type.equals(GenericRecord.class)) {
       return new GenericDatumReader<>(schema);
@@ -282,7 +293,10 @@ public DatumReader<T> createDatumReader() {
   /**
    * Returns a new DatumWriter that can be used to write to
    * an Avro file directly.
+   *
+   * @deprecated For {@code AvroCoder} internal use only.
    */
+  @Deprecated
   public DatumWriter<T> createDatumWriter() {
     if (type.equals(GenericRecord.class)) {
       return new GenericDatumWriter<>(schema);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index b55b81913cfca..98cc0b7e92bdb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -702,7 +702,7 @@ private static void validateOutputComponent(String partialFilePattern) {
   private static <T> void evaluateReadHelper(
       Read.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
     AvroReader<T> reader = new AvroReader<>(transform.filepattern, null, null,
-        WindowedValue.getValueOnlyCoder(transform.getDefaultOutputCoder()));
+        (AvroCoder<T>) transform.getDefaultOutputCoder(), context.getPipelineOptions());
     List<WindowedValue<T>> elems = ReaderUtils.readElemsFromReader(reader);
     List<ValueWithMetadata<T>> output = new ArrayList<>();
     for (WindowedValue<T> elem : elems) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
index 632e31e16eadc..84d81c4909efd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
@@ -21,14 +21,17 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.Preconditions;
 
 import org.apache.avro.Schema;
 import org.apache.avro.file.DataFileConstants;
+import org.apache.avro.generic.GenericDatumReader;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.io.BinaryDecoder;
 import org.apache.avro.io.DatumReader;
 import org.apache.avro.io.DecoderFactory;
 import org.apache.avro.reflect.ReflectData;
+import org.apache.avro.reflect.ReflectDatumReader;
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
 import org.apache.commons.compress.compressors.snappy.SnappyCompressorInputStream;
 import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
@@ -41,6 +44,7 @@
 import java.nio.channels.Channels;
 import java.nio.channels.ReadableByteChannel;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.zip.Inflater;
 import java.util.zip.InflaterInputStream;
 
@@ -119,7 +123,11 @@ public class AvroSource<T> extends BlockBasedSource<T> {
   static final long DEFAULT_MIN_BUNDLE_SIZE = 2 * DataFileConstants.DEFAULT_SYNC_INTERVAL;
 
   // The JSON schema used to encode records.
-  private final String schema;
+  private final String readSchemaString;
+
+  // The JSON schema that was used to write the source Avro file (may differ from the schema we will
+  // use to read from it).
+  private final String fileSchemaString;
 
   // The type of the records contained in the file.
   private final Class<T> type;
@@ -137,6 +145,12 @@ public class AvroSource<T> extends BlockBasedSource<T> {
   // Default output coder, lazily initialized.
   private transient AvroCoder<T> coder = null;
 
+  // Schema of the file, lazily initialized.
+  private transient Schema fileSchema;
+
+  // Schema used to encode records, lazily initialized.
+  private transient Schema readSchema;
+
   /**
    * Creates a {@code Read} transform that will read from an {@code AvroSource} that is configured
    * to read records of the given type from a file pattern.
@@ -189,25 +203,27 @@ public <X> AvroSource<X> withSchema(Class<X> clazz) {
    */
   public AvroSource<T> withMinBundleSize(long minBundleSize) {
     return new AvroSource<T>(
-        getFileOrPatternSpec(), minBundleSize, schema, type, codec, syncMarker);
+        getFileOrPatternSpec(), minBundleSize, readSchemaString, type, codec, syncMarker);
   }
 
   private AvroSource(String fileNameOrPattern, long minBundleSize, String schema, Class<T> type,
       String codec, byte[] syncMarker) {
     super(fileNameOrPattern, minBundleSize);
-    this.schema = schema;
+    this.readSchemaString = schema;
     this.codec = codec;
     this.syncMarker = syncMarker;
     this.type = type;
+    this.fileSchemaString = null;
   }
 
   private AvroSource(String fileName, long minBundleSize, long startOffset, long endOffset,
-      String schema, Class<T> type, String codec, byte[] syncMarker) {
+      String schema, Class<T> type, String codec, byte[] syncMarker, String fileSchema) {
     super(fileName, minBundleSize, startOffset, endOffset);
-    this.schema = schema;
+    this.readSchemaString = schema;
     this.codec = codec;
     this.syncMarker = syncMarker;
     this.type = type;
+    this.fileSchemaString = fileSchema;
   }
 
   @Override
@@ -296,28 +312,32 @@ static Metadata readMetadataFromFile(String fileName) throws IOException {
   public AvroSource<T> createForSubrangeOfFile(String fileName, long start, long end) {
     byte[] syncMarker = this.syncMarker;
     String codec = this.codec;
-    String schema = this.schema;
+    String readSchemaString = this.readSchemaString;
+    String fileSchemaString = this.fileSchemaString;
     // codec and syncMarker are initially null when the source is created, as they differ
     // across input files and must be read from the file. Here, when we are creating a source
     // for a subrange of a file, we can initialize these values. When the resulting AvroSource
     // is further split, they do not need to be read again.
-    if (codec == null || syncMarker == null) {
+    if (codec == null || syncMarker == null || fileSchemaString == null) {
       Metadata metadata;
       try {
+        Collection<String> files = FileBasedSource.expandFilePattern(fileName);
+        Preconditions.checkArgument(files.size() <= 1, "More than 1 file matched %s");
         metadata = readMetadataFromFile(fileName);
       } catch (IOException e) {
         throw new RuntimeException("Error reading metadata from file " + fileName, e);
       }
       codec = metadata.codec;
       syncMarker = metadata.syncMarker;
+      fileSchemaString = metadata.schema;
       // If the source was created with a null schema, use the schema that we read from the file's
       // metadata.
-      if (schema == null) {
-        schema = metadata.schema;
+      if (readSchemaString == null) {
+        readSchemaString = metadata.schema;
       }
     }
-    return new AvroSource<T>(
-        fileName, getMinBundleSize(), start, end, schema, type, codec, syncMarker);
+    return new AvroSource<T>(fileName, getMinBundleSize(), start, end, readSchemaString, type,
+        codec, syncMarker, fileSchemaString);
   }
 
   @Override
@@ -334,13 +354,39 @@ public boolean producesSortedKeys(PipelineOptions options) throws Exception {
   public AvroCoder<T> getDefaultOutputCoder() {
     if (coder == null) {
       Schema.Parser parser = new Schema.Parser();
-      coder = AvroCoder.of(type, parser.parse(schema));
+      coder = AvroCoder.of(type, parser.parse(readSchemaString));
     }
     return coder;
   }
 
   public String getSchema() {
-    return schema;
+    return readSchemaString;
+  }
+
+  private Schema getReadSchema() {
+    if (readSchemaString == null) {
+      return null;
+    }
+
+    // If the schema has not been parsed, parse it.
+    if (readSchema == null) {
+      Schema.Parser parser = new Schema.Parser();
+      readSchema = parser.parse(readSchemaString);
+    }
+    return readSchema;
+  }
+
+  private Schema getFileSchema() {
+    if (fileSchemaString == null) {
+      return null;
+    }
+
+    // If the schema has not been parsed, parse it.
+    if (fileSchema == null) {
+      Schema.Parser parser = new Schema.Parser();
+      fileSchema = parser.parse(fileSchemaString);
+    }
+    return fileSchema;
   }
 
   private byte[] getSyncMarker() {
@@ -351,8 +397,22 @@ private String getCodec() {
     return codec;
   }
 
+  private DatumReader<T> createDatumReader() {
+    Schema readSchema = getReadSchema();
+    Schema fileSchema = getFileSchema();
+    Preconditions.checkNotNull(
+        readSchema, "No read schema has been initialized for source %s", this);
+    Preconditions.checkNotNull(
+        fileSchema, "No file schema has been initialized for source %s", this);
+    if (type == GenericRecord.class) {
+      return new GenericDatumReader<>(fileSchema, readSchema);
+    } else {
+      return new ReflectDatumReader<>(fileSchema, readSchema);
+    }
+  }
+
   /**
-   * A {@link BlockBasedSource.Block} of Avro records. Visible for testing.
+   * A {@link BlockBasedSource.Block} of Avro records.
    * @param <T> The type of records stored in the block.
    */
   @Experimental(Experimental.Kind.SOURCE_SINK)
@@ -410,7 +470,7 @@ private static InputStream decodeAsInputStream(byte[] data, String codec) throws
 
     AvroBlock(byte[] data, long numRecords, AvroSource<T> source) throws IOException {
       this.numRecords = numRecords;
-      this.reader = source.getDefaultOutputCoder().createDatumReader();
+      this.reader = source.createDatumReader();
       this.decoder =
           DecoderFactory.get().binaryDecoder(decodeAsInputStream(data, source.getCodec()), null);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index 3c133f7e4c1cb..a11b8cc895660 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -405,7 +405,8 @@ public final long getMaxEndOffset(PipelineOptions options) throws Exception {
     }
   }
 
-  private static Collection<String> expandFilePattern(String fileOrPatternSpec) throws IOException {
+  protected static final Collection<String> expandFilePattern(String fileOrPatternSpec)
+      throws IOException {
     IOChannelFactory factory = IOChannelUtils.getFactory(fileOrPatternSpec);
     Collection<String> matches = factory.match(fileOrPatternSpec);
     LOG.info("Matched {} files for pattern {}", matches.size(), fileOrPatternSpec);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
index 736cec71d54ab..d1893f1d29a90 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
@@ -18,13 +18,13 @@
 
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericDatumReader;
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
@@ -43,11 +43,11 @@ public class AvroByteReader<T> extends Reader<T> {
   final Coder<T> coder;
   private final Schema schema = Schema.create(Schema.Type.BYTES);
 
-  public AvroByteReader(
-      String filename, @Nullable Long startPosition, @Nullable Long endPosition, Coder<T> coder) {
+  public AvroByteReader(String filename, @Nullable Long startPosition, @Nullable Long endPosition,
+      Coder<T> coder, @Nullable PipelineOptions options) {
     this.coder = coder;
-    avroReader = new AvroReader<>(filename, startPosition, endPosition,
-        WindowedValue.getValueOnlyCoder(AvroCoder.of(ByteBuffer.class, schema)));
+    avroReader = new AvroReader<>(
+        filename, startPosition, endPosition, AvroCoder.of(ByteBuffer.class, schema), options);
   }
 
   @Override
@@ -59,7 +59,7 @@ class AvroByteFileIterator extends AbstractBoundedReaderIterator<T> {
     private final ReaderIterator<WindowedValue<ByteBuffer>> avroFileIterator;
 
     public AvroByteFileIterator() throws IOException {
-      avroFileIterator = avroReader.iterator(new GenericDatumReader<ByteBuffer>(schema));
+      avroFileIterator = avroReader.iterator();
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
index eacfb02f38ff6..ebad4ab948301 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
@@ -20,26 +20,19 @@
 
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.io.AvroSource;
+import com.google.cloud.dataflow.sdk.io.BoundedSource;
+import com.google.cloud.dataflow.sdk.io.OffsetBasedSource;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
-import org.apache.avro.file.DataFileReader;
-import org.apache.avro.file.SeekableInput;
-import org.apache.avro.io.DatumReader;
+import org.apache.avro.generic.GenericRecord;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.FileNotFoundException;
 import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.channels.ReadableByteChannel;
-import java.nio.channels.SeekableByteChannel;
-import java.util.Collection;
-import java.util.Iterator;
 
 import javax.annotation.Nullable;
 
@@ -49,169 +42,127 @@
  * @param <T> the type of the elements read from the source
  */
 public class AvroReader<T> extends Reader<WindowedValue<T>> {
-  private static final Logger LOG = LoggerFactory.getLogger(InMemoryReader.class);
+  private static final Logger LOG = LoggerFactory.getLogger(AvroReader.class);
 
-  final String filename;
   @Nullable
   final Long startPosition;
   @Nullable
   final Long endPosition;
+  final String filename;
+  final AvroSource<T> avroSource;
   final AvroCoder<T> avroCoder;
+  final PipelineOptions options;
 
+  @SuppressWarnings("unchecked")
   public AvroReader(String filename, @Nullable Long startPosition, @Nullable Long endPosition,
-      WindowedValue.ValueOnlyWindowedValueCoder<T> coder) {
+      AvroCoder<T> coder, @Nullable PipelineOptions options) {
 
-    if (!(coder.getValueCoder() instanceof AvroCoder)) {
-      throw new IllegalArgumentException("AvroReader requires an AvroCoder");
-    }
+    this.avroCoder = coder;
 
-    this.filename = filename;
     this.startPosition = startPosition;
     this.endPosition = endPosition;
-    this.avroCoder = (AvroCoder<T>) coder.getValueCoder();
-  }
-
-  public ReaderIterator<WindowedValue<T>> iterator(DatumReader<T> datumReader) throws IOException {
-    IOChannelFactory factory = IOChannelUtils.getFactory(filename);
-    Collection<String> inputs = factory.match(filename);
-    if (inputs.isEmpty()) {
-      throw new FileNotFoundException("No match for file pattern '" + filename + "'");
-    }
+    this.filename = filename;
+    this.options = options;
 
-    if (inputs.size() == 1) {
-      String input = inputs.iterator().next();
-      ReadableByteChannel reader = factory.open(input);
-      return new AvroFileIterator(datumReader, input, reader, startPosition, endPosition);
+    Class<T> type = avroCoder.getType();
+    AvroSource<T> source;
+    if (type.equals(GenericRecord.class)) {
+      source = (AvroSource<T>) AvroSource.from(filename).withSchema(avroCoder.getSchema());
     } else {
-      if (startPosition != null || endPosition != null) {
-        throw new IllegalArgumentException(
-            "Offset range specified: [" + startPosition + ", " + endPosition + "), so "
-            + "an exact filename was expected, but more than 1 file matched \"" + filename
-            + "\" (total " + inputs.size() + "): apparently a filepattern was given.");
-      }
-      return new AvroFileMultiIterator(datumReader, factory, inputs.iterator());
+      source = AvroSource.from(filename).withSchema(type);
     }
+
+
+    this.avroSource = source;
   }
 
   @Override
   public ReaderIterator<WindowedValue<T>> iterator() throws IOException {
-    return iterator(avroCoder.createDatumReader());
+    Long endPosition = this.endPosition;
+    Long startPosition = this.startPosition;
+    if (endPosition == null) {
+      endPosition = Long.MAX_VALUE;
+    }
+    if (startPosition == null) {
+      startPosition = 0L;
+    }
+    BoundedSource.BoundedReader<T> reader;
+    if (startPosition == 0 && endPosition == Long.MAX_VALUE) {
+      // Read entire file (or collection of files).
+      reader = avroSource.createReader(options);
+    } else {
+      // Read a subrange of file.
+      reader = avroSource.createForSubrangeOfFile(filename, startPosition, endPosition)
+          .createReader(options);
+    }
+    return new AvroFileIterator((AvroSource.AvroReader<T>) reader);
   }
 
-  class AvroFileMultiIterator extends LazyMultiReaderIterator<WindowedValue<T>> {
-    private final IOChannelFactory factory;
-    private final DatumReader<T> datumReader;
+  class AvroFileIterator extends AbstractBoundedReaderIterator<WindowedValue<T>> {
+    final AvroSource.AvroReader<T> reader;
+    boolean hasStarted = false;
+    long blockOffset = -1;
 
-    public AvroFileMultiIterator(
-        DatumReader<T> datumReader, IOChannelFactory factory, Iterator<String> inputs) {
-      super(inputs);
-      this.factory = factory;
-      this.datumReader = datumReader;
+    public AvroFileIterator(AvroSource.AvroReader<T> reader) {
+      this.reader = reader;
     }
 
     @Override
-    protected ReaderIterator<WindowedValue<T>> open(String input) throws IOException {
-      return new AvroFileIterator(datumReader, input, factory.open(input), null, null);
-    }
-  }
-
-  class AvroFileIterator extends AbstractBoundedReaderIterator<WindowedValue<T>> {
-    final DataFileReader<T> fileReader;
-    final Long endOffset;
-
-    public AvroFileIterator(DatumReader<T> datumReader, String filename, ReadableByteChannel reader,
-        @Nullable Long startOffset, @Nullable Long endOffset) throws IOException {
-      if (!(reader instanceof SeekableByteChannel)) {
-        throw new UnsupportedOperationException(
-            "Unable to seek to offset in stream for " + filename);
-      }
-      SeekableByteChannel inChannel = (SeekableByteChannel) reader;
-      SeekableInput seekableInput = new SeekableByteChannelInput(inChannel);
-      this.fileReader = new DataFileReader<>(seekableInput, datumReader);
-      this.endOffset = endOffset;
-      if (startOffset != null && startOffset > 0) {
-        // Sync to the first record at or after startOffset.
-        fileReader.sync(startOffset);
+    protected WindowedValue<T> nextImpl() throws IOException {
+      T next = reader.getCurrent();
+      // Coarse-grained reporting of input bytes consumed.
+      // After completing reading a block, the block offset changes.
+      long currentOffset = reader.getCurrentBlockOffset();
+      if (currentOffset != blockOffset) {
+        notifyElementRead(reader.getCurrentBlockSize());
+        blockOffset = currentOffset;
       }
+      return WindowedValue.valueInGlobalWindow(next);
     }
 
     @Override
     protected boolean hasNextImpl() throws IOException {
-      return fileReader.hasNext() && (endOffset == null || !fileReader.pastSync(endOffset));
-    }
-
-    @Override
-    protected WindowedValue<T> nextImpl() throws IOException {
-      T next = fileReader.next();
-      // DataFileReader doesn't seem to support getting the current position.
-      // Calls to tell() return how much has been read from the underlying Channel, which is a bad
-      // length approximation due to buffering. Use the coder instead.
-      // TODO: Avoid reencoding the record to get its length.
-      notifyElementRead(CoderUtils.encodeToByteArray(avroCoder, next).length);
-      return WindowedValue.valueInGlobalWindow(next);
+      if (!hasStarted) {
+        hasStarted = true;
+        return reader.start();
+      }
+      return reader.advance();
     }
 
     @Override
     public Progress getProgress() {
-      com.google.api.services.dataflow.model.Position currentPosition =
-          new com.google.api.services.dataflow.model.Position();
-      ApproximateProgress progress = new ApproximateProgress();
-      // The fileReader.tell() result is computed from the underlying SeekableByteChannelInput, so
-      // its value is an overestimation of the current position. This is however enough to get a
-      // progress estimation, but would not be precise enough for dynamic splitting.
-      // TODO: Make the progress estimation more precise.
-      try {
-        currentPosition.setByteOffset(fileReader.tell());
-        progress.setPosition(currentPosition);
-      } catch (IOException e) {
-        // If fileReader.tell() throws an exception, we do not set the position.
-        LOG.warn("Avro source file {} failed to report current progress.", filename);
+      Double readerProgress = reader.getFractionConsumed();
+      if (readerProgress == null) {
+        return null;
       }
-      // We do not compute progress percentage, as the endOffset is not necessarily a correct block
-      // boundary.
+      ApproximateProgress progress = new ApproximateProgress();
+      progress.setPercentComplete(readerProgress.floatValue());
       return cloudProgressToReaderProgress(progress);
     }
 
     @Override
     public void close() throws IOException {
-      fileReader.close();
-    }
-  }
-
-  /**
-   * An implementation of an Avro SeekableInput wrapping a
-   * SeekableByteChannel.
-   */
-  static class SeekableByteChannelInput implements SeekableInput {
-    final SeekableByteChannel channel;
-
-    public SeekableByteChannelInput(SeekableByteChannel channel) {
-      this.channel = channel;
-    }
-
-    @Override
-    public void seek(long position) throws IOException {
-      channel.position(position);
+      reader.close();
     }
 
     @Override
-    public long tell() throws IOException {
-      return channel.position();
-    }
-
-    @Override
-    public long length() throws IOException {
-      return channel.size();
-    }
-
-    @Override
-    public int read(byte[] b, int offset, int length) throws IOException {
-      return channel.read(ByteBuffer.wrap(b, offset, length));
-    }
-
-    @Override
-    public void close() throws IOException {
-      channel.close();
+    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
+      ApproximateProgress splitProgress =
+          SourceTranslationUtils.splitRequestToApproximateProgress(splitRequest);
+      double splitAtFraction = splitProgress.getPercentComplete();
+      LOG.info("Received request for dynamic split at {}", splitAtFraction);
+      OffsetBasedSource<T> residual = reader.splitAtFraction(splitAtFraction);
+      if (residual == null) {
+        LOG.info("Rejected split request for split at {}", splitAtFraction);
+        return null;
+      }
+      com.google.api.services.dataflow.model.Position acceptedPosition =
+          new com.google.api.services.dataflow.model.Position();
+      acceptedPosition.setByteOffset(residual.getStartOffset());
+      LOG.info("Accepted split for position {} which resulted in a new source with byte offset {}",
+          splitAtFraction, residual.getStartOffset());
+      return new DynamicSplitResultWithPosition(
+          SourceTranslationUtils.cloudPositionToReaderPosition(acceptedPosition));
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
index 4b31ebc02dee5..cc8e87efa957c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
@@ -19,6 +19,7 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.getLong;
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
@@ -46,19 +47,27 @@ public Reader<?> create(
       @Nullable CounterSet.AddCounterMutator addCounterMutator,
       @Nullable String operationName)
           throws Exception {
-    return create(spec, coder);
+    return create(spec, coder, options);
   }
 
-  Reader<?> create(CloudObject spec, Coder<?> coder) throws Exception {
+  Reader<?> create(CloudObject spec, Coder<?> coder, PipelineOptions options) throws Exception {
     String filename = getString(spec, PropertyNames.FILENAME);
     Long startOffset = getLong(spec, PropertyNames.START_OFFSET, null);
     Long endOffset = getLong(spec, PropertyNames.END_OFFSET, null);
 
+    // If the coder is a ValueOnlyWindowedValueCoder, the source is a user source. Otherwise,
+    // the coder is a FullWindowedValueCoder and the source is a materialized PCollection.
     if (coder instanceof ValueOnlyWindowedValueCoder) {
+      ValueOnlyWindowedValueCoder<?> valueCoder = (ValueOnlyWindowedValueCoder<?>) coder;
+      if (!(valueCoder.getValueCoder() instanceof AvroCoder)) {
+        throw new IllegalArgumentException(
+            "AvroReader requires an AvroCoder, but the instance given was "
+            + valueCoder.getValueCoder());
+      }
       return new AvroReader<>(
-          filename, startOffset, endOffset, (ValueOnlyWindowedValueCoder<?>) coder);
+          filename, startOffset, endOffset, (AvroCoder<?>) valueCoder.getValueCoder(), options);
     } else {
-      return new AvroByteReader<>(filename, startOffset, endOffset, coder);
+      return new AvroByteReader<>(filename, startOffset, endOffset, coder, options);
     }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
index 832cf47313631..8e6b2f7f48213 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
@@ -28,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.testing.SourceTestUtils;
+import com.google.common.base.MoreObjects;
 
 import org.apache.avro.Schema;
 import org.apache.avro.file.CodecFactory;
@@ -35,6 +36,8 @@
 import org.apache.avro.file.DataFileWriter;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.io.DatumWriter;
+import org.apache.avro.reflect.AvroDefault;
+import org.apache.avro.reflect.Nullable;
 import org.apache.avro.reflect.ReflectData;
 import org.junit.Rule;
 import org.junit.Test;
@@ -51,6 +54,7 @@
 import java.util.ArrayList;
 import java.util.List;
 import java.util.NoSuchElementException;
+import java.util.Objects;
 import java.util.Random;
 
 /**
@@ -300,6 +304,24 @@ public void testCreationWithSchema() throws Exception {
     assertEqualsWithGeneric(expected, records);
   }
 
+  @Test
+  public void testSchemaUpdate() throws Exception {
+    List<Bird> birds = createRandomRecords(100);
+    String filename = generateTestFile("tmp.avro", birds, SyncBehavior.SYNC_DEFAULT, 0,
+        AvroCoder.of(Bird.class), DataFileConstants.NULL_CODEC);
+
+    AvroSource<FancyBird> source = AvroSource.from(filename).withSchema(FancyBird.class);
+    List<FancyBird> actual = SourceTestUtils.readFromSource(source, null);
+
+    List<FancyBird> expected = new ArrayList<>();
+    for (Bird bird : birds) {
+      expected.add(new FancyBird(
+          bird.number, bird.species, bird.quality, bird.quantity, null, "MAXIMUM OVERDRIVE"));
+    }
+
+    assertThat(actual, containsInAnyOrder(expected.toArray()));
+  }
+
   private void assertEqualsWithGeneric(List<Bird> expected, List<GenericRecord> actual) {
     assertEquals(expected.size(), actual.size());
     for (int i = 0; i < expected.size(); i++) {
@@ -536,46 +558,105 @@ private static List<FixedRecord> createFixedRecords(int count) {
    * Class used as the record type in tests.
    */
   @DefaultCoder(AvroCoder.class)
-  public static class Bird {
-    private long number;
-    private String species;
-    private String quality;
-    private long quantity;
-
-    public String getQuality() {
-      return this.quality;
+  static class Bird {
+    long number;
+    String species;
+    String quality;
+    long quantity;
+
+    public Bird() {}
+
+    public Bird(long number, String species, String quality, long quantity) {
+      this.number = number;
+      this.species = species;
+      this.quality = quality;
+      this.quantity = quantity;
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(Bird.class)
+          .addValue(number)
+          .addValue(species)
+          .addValue(quantity)
+          .addValue(quality)
+          .toString();
     }
 
-    public String getSpecies() {
-      return this.species;
+    @Override
+    public boolean equals(Object obj) {
+      if (obj instanceof Bird) {
+        Bird other = (Bird) obj;
+        return Objects.equals(species, other.species) && Objects.equals(quality, other.quality)
+            && quantity == other.quantity && number == other.number;
+      }
+      return false;
     }
 
-    public long getQuantity() {
-      return quantity;
+    @Override
+    public int hashCode() {
+      return Objects.hash(number, species, quality, quantity);
     }
+  }
 
-    public long getNumber() {
-      return number;
+  /**
+   * Class used as the record type in tests.
+   *
+   * <p>Contains nullable fields and fields with default values. Can be read using a file written
+   * with the Bird schema.
+   */
+  @DefaultCoder(AvroCoder.class)
+  public static class FancyBird {
+    long number;
+    String species;
+    String quality;
+    long quantity;
+
+    @Nullable
+    String habitat;
+
+    @AvroDefault("\"MAXIMUM OVERDRIVE\"")
+    String fancinessLevel;
+
+    public FancyBird() {}
+
+    public FancyBird(long number, String species, String quality, long quantity, String habitat,
+        String fancinessLevel) {
+      this.number = number;
+      this.species = species;
+      this.quality = quality;
+      this.quantity = quantity;
+      this.habitat = habitat;
+      this.fancinessLevel = fancinessLevel;
     }
 
     @Override
     public String toString() {
-      return quantity + " " + quality + " " + species;
+      return MoreObjects.toStringHelper(FancyBird.class)
+          .addValue(number)
+          .addValue(species)
+          .addValue(quality)
+          .addValue(quantity)
+          .addValue(habitat)
+          .addValue(fancinessLevel)
+          .toString();
     }
 
     @Override
     public boolean equals(Object obj) {
-      if (obj instanceof Bird) {
-        Bird other = (Bird) obj;
-        return species.equals(other.species) && quality.equals(other.quality)
-            && quantity == other.quantity && number == other.number;
+      if (obj instanceof FancyBird) {
+        FancyBird other = (FancyBird) obj;
+        return Objects.equals(species, other.species) && Objects.equals(quality, other.quality)
+            && quantity == other.quantity && number == other.number
+            && Objects.equals(fancinessLevel, other.fancinessLevel)
+            && Objects.equals(habitat, other.habitat);
       }
       return false;
     }
 
     @Override
     public int hashCode() {
-      return toString().hashCode();
+      return Objects.hash(number, species, quality, quantity, habitat, fancinessLevel);
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
index a756ec26bcb40..8ffc46ebbe5f9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
@@ -24,7 +24,6 @@
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.MimeTypes;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.apache.avro.Schema;
 import org.apache.avro.file.DataFileWriter;
@@ -56,6 +55,15 @@ public class AvroByteReaderTest {
   @Rule
   public TemporaryFolder tmpFolder = new TemporaryFolder();
 
+  /**
+   * Reads from a file generated from a collection of elements and verifies that the elements read
+   * are the same as the elements written.
+   *
+   * @param elemsList a list of blocks of elements, each of which is as a list of elements.
+   * @param coder the coder used to encode the elements
+   * @param requireExactMatch if true, each block must match exactly
+   * @throws Exception
+   */
   private <T> void runTestRead(List<List<T>> elemsList, Coder<T> coder, boolean requireExactMatch)
       throws Exception {
     File tmpFile = tmpFolder.newFile("file.avro");
@@ -68,6 +76,7 @@ private <T> void runTestRead(List<List<T>> elemsList, Coder<T> coder, boolean re
     DatumWriter<ByteBuffer> datumWriter = new GenericDatumWriter<>(schema);
     List<Long> syncPoints = new ArrayList<>();
     List<Integer> expectedSizes = new ArrayList<>();
+    long expectedTotalSize = 0;
     try (DataFileWriter<ByteBuffer> fileWriter = new DataFileWriter<>(datumWriter)) {
       fileWriter.create(schema, outStream);
       boolean first = true;
@@ -83,6 +92,7 @@ private <T> void runTestRead(List<List<T>> elemsList, Coder<T> coder, boolean re
           byte[] encodedElem = CoderUtils.encodeToByteArray(coder, elem);
           fileWriter.append(ByteBuffer.wrap(encodedElem));
           expectedSizes.add(encodedElem.length);
+          expectedTotalSize += encodedElem.length;
         }
       }
     }
@@ -121,20 +131,21 @@ private <T> void runTestRead(List<List<T>> elemsList, Coder<T> coder, boolean re
       Assert.assertEquals(expected, actual);
     }
 
-    Assert.assertEquals(expectedSizes, actualSizes);
+    long actualTotalSize = 0;
+    for (int elemSize : actualSizes) {
+      actualTotalSize += elemSize;
+    }
+    Assert.assertEquals(expectedTotalSize, actualTotalSize);
   }
 
   private <T> List<T> readElems(String filename, @Nullable Long startOffset,
       @Nullable Long endOffset, Coder<T> coder, List<Integer> actualSizes) throws Exception {
-    AvroByteReader<T> avroReader = new AvroByteReader<>(filename, startOffset, endOffset, coder);
+    AvroByteReader<T> avroReader =
+        new AvroByteReader<>(filename, startOffset, endOffset, coder, null);
     new ExecutorTestUtils.TestReaderObserver(avroReader, actualSizes);
 
     List<T> actualElems = new ArrayList<>();
-    try (Reader.ReaderIterator<T> iterator = avroReader.iterator()) {
-      while (iterator.hasNext()) {
-        actualElems.add(iterator.next());
-      }
-    }
+    ReaderTestUtils.readRemainingFromReader(avroReader, actualElems);
     return actualElems;
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
index 907ca1fc5550b..d81a9c182c0d0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
@@ -20,14 +20,8 @@
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 
-import org.apache.avro.Schema;
-import org.apache.avro.file.DataFileReader;
-import org.apache.avro.file.SeekableInput;
-import org.apache.avro.generic.GenericDatumReader;
-import org.apache.avro.io.DatumReader;
 import org.junit.Assert;
 import org.junit.Rule;
 import org.junit.Test;
@@ -36,8 +30,6 @@
 import org.junit.runners.JUnit4;
 
 import java.io.File;
-import java.nio.ByteBuffer;
-import java.nio.channels.SeekableByteChannel;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -64,31 +56,15 @@ <T> void runTestWriteFile(List<T> elems, Coder<T> coder) throws Exception {
     }
 
     // Read back the file.
+    AvroByteReader<T> reader = new AvroByteReader<>(filename, null, null, coder, null);
 
-    SeekableByteChannel inChannel =
-        (SeekableByteChannel) IOChannelUtils.getFactory(filename).open(filename);
-
-    SeekableInput seekableInput = new AvroReader.SeekableByteChannelInput(inChannel);
-
-    Schema schema = Schema.create(Schema.Type.BYTES);
-
-    DatumReader<ByteBuffer> datumReader = new GenericDatumReader<>(schema);
 
     List<T> actual = new ArrayList<>();
+    ReaderTestUtils.readRemainingFromReader(reader, actual);
     List<Long> expectedSizes = new ArrayList<>();
 
-    try (DataFileReader<ByteBuffer> fileReader = new DataFileReader<>(seekableInput, datumReader)) {
-      ByteBuffer inBuffer = ByteBuffer.allocate(10 * 1024);
-      while (fileReader.hasNext()) {
-        inBuffer = fileReader.next(inBuffer);
-        byte[] encodedElem = new byte[inBuffer.remaining()];
-        inBuffer.get(encodedElem);
-        assert inBuffer.remaining() == 0;
-        inBuffer.clear();
-        T elem = CoderUtils.decodeFromByteArray(coder, encodedElem);
-        actual.add(elem);
-        expectedSizes.add((long) encodedElem.length);
-      }
+    for (T value : actual) {
+      expectedSizes.add((long) CoderUtils.encodeToByteArray(coder, value).length);
     }
 
     // Compare the expected and the actual elements.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
index 288e3bcb7b47b..fda18132d355c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
@@ -78,7 +78,7 @@ public void testCreatePlainAvroByteReader() throws Exception {
 
     Assert.assertThat(reader, new IsInstanceOf(AvroByteReader.class));
     AvroByteReader avroReader = (AvroByteReader) reader;
-    Assert.assertEquals(pathToAvroFile, avroReader.avroReader.filename);
+    Assert.assertEquals(pathToAvroFile, avroReader.avroReader.avroSource.getFileOrPatternSpec());
     Assert.assertEquals(null, avroReader.avroReader.startPosition);
     Assert.assertEquals(null, avroReader.avroReader.endPosition);
     Assert.assertEquals(coder, avroReader.coder);
@@ -92,7 +92,7 @@ public void testCreateRichAvroByteReader() throws Exception {
 
     Assert.assertThat(reader, new IsInstanceOf(AvroByteReader.class));
     AvroByteReader avroReader = (AvroByteReader) reader;
-    Assert.assertEquals(pathToAvroFile, avroReader.avroReader.filename);
+    Assert.assertEquals(pathToAvroFile, avroReader.avroReader.avroSource.getFileOrPatternSpec());
     Assert.assertEquals(200L, (long) avroReader.avroReader.startPosition);
     Assert.assertEquals(500L, (long) avroReader.avroReader.endPosition);
     Assert.assertEquals(coder, avroReader.coder);
@@ -106,7 +106,7 @@ public void testCreateRichAvroReader() throws Exception {
 
     Assert.assertThat(reader, new IsInstanceOf(AvroReader.class));
     AvroReader avroReader = (AvroReader) reader;
-    Assert.assertEquals(pathToAvroFile, avroReader.filename);
+    Assert.assertEquals(pathToAvroFile, avroReader.avroSource.getFileOrPatternSpec());
     Assert.assertEquals(200L, (long) avroReader.startPosition);
     Assert.assertEquals(500L, (long) avroReader.endPosition);
     Assert.assertEquals(coder.getValueCoder(), avroReader.avroCoder);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
index 9cb133132f309..88b3e3314dd35 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
@@ -18,16 +18,18 @@
 
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
+import static org.hamcrest.Matchers.isA;
 
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.MimeTypes;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader.DynamicSplitResult;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader.Progress;
 
 import org.apache.avro.file.DataFileWriter;
 import org.apache.avro.io.DatumWriter;
@@ -60,17 +62,25 @@ public class AvroReaderTest {
   @Rule
   public ExpectedException expectedException = ExpectedException.none();
 
-  private <T> void runTestRead(
-      List<List<T>> elemsList, AvroCoder<T> coder, boolean requireExactMatch) throws Exception {
+  /** Class representing information about an Avro file generated from a list of elements. */
+  private static class AvroFileInfo<T> {
+    String filename;
+    List<Integer> elementSizes = new ArrayList<>();
+    List<Long> syncPoints = new ArrayList<>();
+    long totalElementEncodedSize = 0;
+  }
+
+  /** Write input elements to a file and return information about the Avro-encoded file. */
+  private <T> AvroFileInfo<T> initInputFile(List<List<T>> elemsList, AvroCoder<T> coder)
+      throws Exception {
     File tmpFile = tmpFolder.newFile("file.avro");
-    String filename = tmpFile.getPath();
+    AvroFileInfo<T> fileInfo = new AvroFileInfo<>();
+    fileInfo.filename = tmpFile.getPath();
 
     // Write the data.
     OutputStream outStream =
-        Channels.newOutputStream(IOChannelUtils.create(filename, MimeTypes.BINARY));
+        Channels.newOutputStream(IOChannelUtils.create(fileInfo.filename, MimeTypes.BINARY));
     DatumWriter<T> datumWriter = coder.createDatumWriter();
-    List<Long> syncPoints = new ArrayList<>();
-    List<Integer> expectedSizes = new ArrayList<>();
     try (DataFileWriter<T> fileWriter = new DataFileWriter<>(datumWriter)) {
       fileWriter.create(coder.getSchema(), outStream);
       boolean first = true;
@@ -80,28 +90,46 @@ private <T> void runTestRead(
         } else {
           // Ensure a block boundary here.
           long syncPoint = fileWriter.sync();
-          syncPoints.add(syncPoint);
+          fileInfo.syncPoints.add(syncPoint);
         }
         for (T elem : elems) {
           fileWriter.append(elem);
-          expectedSizes.add(CoderUtils.encodeToByteArray(coder, elem).length);
+          fileInfo.elementSizes.add(CoderUtils.encodeToByteArray(coder, elem).length);
+          fileInfo.totalElementEncodedSize += CoderUtils.encodeToByteArray(coder, elem).length;
         }
       }
     }
 
+    return fileInfo;
+  }
+
+  /**
+   * Reads from a file generated from a collection of elements and verifies that the elements read
+   * are the same as the elements written.
+   *
+   * @param elemsList a list of blocks of elements, each of which is as a list of elements.
+   * @param coder the coder used to encode the elements
+   * @param requireExactMatch if true, each block must match exactly
+   * @throws Exception
+   */
+  private <T> void runTestRead(
+      List<List<T>> elemsList, AvroCoder<T> coder, boolean requireExactMatch) throws Exception {
+    // Initialize the input file.
+    AvroFileInfo<T> fileInfo = initInputFile(elemsList, coder);
+
     // Test reading the data back.
     List<List<T>> actualElemsList = new ArrayList<>();
     List<Integer> actualSizes = new ArrayList<>();
     Long startOffset = null;
     Long endOffset;
     long prevSyncPoint = 0;
-    for (long syncPoint : syncPoints) {
+    for (long syncPoint : fileInfo.syncPoints) {
       endOffset = (prevSyncPoint + syncPoint) / 2;
-      actualElemsList.add(readElems(filename, startOffset, endOffset, coder, actualSizes));
+      actualElemsList.add(readElems(fileInfo.filename, startOffset, endOffset, coder, actualSizes));
       startOffset = endOffset;
       prevSyncPoint = syncPoint;
     }
-    actualElemsList.add(readElems(filename, startOffset, null, coder, actualSizes));
+    actualElemsList.add(readElems(fileInfo.filename, startOffset, null, coder, actualSizes));
 
     // Compare the expected and the actual elements.
     if (requireExactMatch) {
@@ -123,30 +151,131 @@ private <T> void runTestRead(
       Assert.assertEquals(expected, actual);
     }
 
-    Assert.assertEquals(expectedSizes, actualSizes);
+    long actualTotalSize = 0;
+    for (int elemSize : actualSizes) {
+      actualTotalSize += elemSize;
+    }
+    Assert.assertEquals(fileInfo.totalElementEncodedSize, actualTotalSize);
   }
 
   private <T> List<T> readElems(String filename, @Nullable Long startOffset,
-      @Nullable Long endOffset, Coder<T> coder, List<Integer> actualSizes) throws Exception {
-    AvroReader<T> avroReader =
-        new AvroReader<>(filename, startOffset, endOffset, WindowedValue.getValueOnlyCoder(coder));
+      @Nullable Long endOffset, AvroCoder<T> coder, List<Integer> actualSizes) throws Exception {
+    AvroReader<T> avroReader = new AvroReader<>(filename, startOffset, endOffset, coder, null);
     new ExecutorTestUtils.TestReaderObserver(avroReader, actualSizes);
 
-    long offsetReported = 0;
+    float progressReported = 0;
     List<T> actualElems = new ArrayList<>();
     try (Reader.ReaderIterator<WindowedValue<T>> iterator = avroReader.iterator()) {
       while (iterator.hasNext()) {
         actualElems.add(iterator.next().getValue());
-        long progress =
-            readerProgressToCloudProgress(iterator.getProgress()).getPosition().getByteOffset();
+        float progress = 0.0f;
+        Progress readerProgress = iterator.getProgress();
+        if (readerProgress != null) {
+          progress = readerProgressToCloudProgress(iterator.getProgress()).getPercentComplete();
+        }
         // Make sure that the reported progress is monotonous.
-        Assert.assertThat(progress, greaterThanOrEqualTo(offsetReported));
-        offsetReported = progress;
+        Assert.assertThat(progress, greaterThanOrEqualTo(progressReported));
+        progressReported = progress;
       }
     }
     return actualElems;
   }
 
+  // Verification behavior for split requests. Used for testRequestDynamicSplitInternal.
+  private static enum SplitVerificationBehavior {
+    VERIFY_SUCCESS, // Split request must succeed.
+    VERIFY_FAILURE, // Split request must fail.
+    DO_NOT_VERIFY; // Perform no verification.
+  }
+
+  private <T> void testRequestDynamicSplitInternal(AvroReader<T> reader, float splitAtFraction,
+      long readBeforeSplit, SplitVerificationBehavior splitVerificationBehavior) throws Exception {
+    // Read all elements from the reader
+    List<T> expectedElements = new ArrayList<>();
+    Long endOffset = reader.endPosition;
+    List<WindowedValue<T>> windowedValues = new ArrayList<>();
+    ReaderTestUtils.readRemainingFromReader(reader, windowedValues);
+    ReaderTestUtils.windowedValuesToValues(windowedValues, expectedElements);
+
+
+    List<T> primaryElements = new ArrayList<>();
+    List<T> residualElements = new ArrayList<>();
+    try (Reader.ReaderIterator<WindowedValue<T>> iterator = reader.iterator()) {
+      // Read n elements from the reader
+      windowedValues.clear();
+      ReaderTestUtils.readAtMostNElementsFromIterator(iterator, readBeforeSplit, windowedValues);
+      ReaderTestUtils.windowedValuesToValues(windowedValues, primaryElements);
+
+      // Request a split at the specified position
+      DynamicSplitResult splitResult =
+          iterator.requestDynamicSplit(ReaderTestUtils.splitRequestAtFraction(splitAtFraction));
+
+      switch (splitVerificationBehavior) {
+        case VERIFY_SUCCESS:
+          Assert.assertNotNull(splitResult);
+          break;
+        case VERIFY_FAILURE:
+          Assert.assertNull(splitResult);
+          break;
+        case DO_NOT_VERIFY:
+      }
+
+      // Finish reading from the original reader.
+      windowedValues.clear();
+      ReaderTestUtils.readRemainingFromIterator(iterator, windowedValues);
+      ReaderTestUtils.windowedValuesToValues(windowedValues, primaryElements);
+
+      if (splitResult != null) {
+        Long splitPosition = ReaderTestUtils.positionFromSplitResult(splitResult).getByteOffset();
+        AvroReader<T> residualReader = new AvroReader<T>(reader.avroSource.getFileOrPatternSpec(),
+            splitPosition, endOffset, reader.avroCoder, reader.options);
+        // Read from the residual until it is complete.
+        windowedValues.clear();
+        ReaderTestUtils.readRemainingFromReader(residualReader, windowedValues);
+        ReaderTestUtils.windowedValuesToValues(windowedValues, residualElements);
+      }
+    }
+
+    primaryElements.addAll(residualElements);
+    Assert.assertEquals(expectedElements, primaryElements);
+    if (splitVerificationBehavior == SplitVerificationBehavior.VERIFY_SUCCESS) {
+      Assert.assertNotEquals(0, residualElements.size());
+    }
+  }
+
+  @Test
+  public void testRequestDynamicSplit() throws Exception {
+    // Note that exhaustive tests for AvroSource's split behavior exist in {@link AvroSourceTest}.
+    List<List<String>> elements = generateInputBlocks(10, 100 * 100, 100);
+    AvroCoder<String> coder = AvroCoder.of(String.class);
+    AvroFileInfo<String> fileInfo = initInputFile(elements, coder);
+    AvroReader<String> reader = new AvroReader<String>(fileInfo.filename, null, null, coder, null);
+    // Read most of the records before the proposed split point.
+    testRequestDynamicSplitInternal(reader, 0.5F, 490L, SplitVerificationBehavior.VERIFY_SUCCESS);
+    // Read a single record.
+    testRequestDynamicSplitInternal(reader, 0.5F, 1L, SplitVerificationBehavior.VERIFY_SUCCESS);
+    // Read zero records.
+    testRequestDynamicSplitInternal(reader, 0.5F, 0L, SplitVerificationBehavior.VERIFY_FAILURE);
+    // Read almost the entire input.
+    testRequestDynamicSplitInternal(reader, 0.5F, 900L, SplitVerificationBehavior.VERIFY_FAILURE);
+    // Read the entire input.
+    testRequestDynamicSplitInternal(reader, 0.5F, 2000L, SplitVerificationBehavior.VERIFY_FAILURE);
+  }
+
+  @Test
+  public void testRequestDynamicSplitExhaustive() throws Exception {
+    List<List<String>> elements = generateInputBlocks(5, 10 * 10, 10);
+    AvroCoder<String> coder = AvroCoder.of(String.class);
+    AvroFileInfo<String> fileInfo = initInputFile(elements, coder);
+    AvroReader<String> reader = new AvroReader<String>(fileInfo.filename, null, null, coder, null);
+    for (float splitFraction = 0.0F; splitFraction < 1.0F; splitFraction += 0.02F) {
+      for (long recordsToRead = 0L; recordsToRead <= 500; recordsToRead += 5) {
+        testRequestDynamicSplitInternal(
+            reader, splitFraction, recordsToRead, SplitVerificationBehavior.DO_NOT_VERIFY);
+      }
+    }
+  }
+
   @Test
   public void testRead() throws Exception {
     runTestRead(Collections.singletonList(TestUtils.INTS), AvroCoder.of(Integer.class),
@@ -181,8 +310,8 @@ private List<List<String>> generateInputBlocks(
 
   @Test
   public void testReadSmallRanges() throws Exception {
-    runTestRead(
-        generateInputBlocks(3, 50, 5), AvroCoder.of(String.class), true/* require exact match */);
+    runTestRead(generateInputBlocks(3, 50, 5), AvroCoder.of(String.class),
+        true/* require exact match */);
   }
 
   @Test
@@ -193,7 +322,7 @@ public void testReadBigRanges() throws Exception {
 
   @Test
   public void testErrorOnFileNotFound() throws Exception {
-    expectedException.expect(FileNotFoundException.class);
+    expectedException.expectCause(isA(FileNotFoundException.class));
     readElems("file-not-found", 0L, 100L, AvroCoder.of(String.class), new ArrayList<Integer>());
   }
 
@@ -205,7 +334,7 @@ public void testErrorOnMultipleFiles() throws Exception {
     Channels.newOutputStream(IOChannelUtils.create(file2.getPath(), MimeTypes.BINARY)).close();
 
     expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("more than 1 file matched");
+    expectedException.expectMessage("More than 1 file matched");
     readElems(new File(tmpFolder.getRoot(), "*").getPath(), 0L, 100L,
         AvroCoder.of(String.class), new ArrayList<Integer>());
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
index 86ba2cb13b83c..301b02758f3bb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
@@ -19,14 +19,9 @@
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 
-import org.apache.avro.file.DataFileReader;
-import org.apache.avro.file.SeekableInput;
-import org.apache.avro.generic.GenericDatumReader;
-import org.apache.avro.io.DatumReader;
 import org.junit.Assert;
 import org.junit.Rule;
 import org.junit.Test;
@@ -35,7 +30,6 @@
 import org.junit.runners.JUnit4;
 
 import java.io.File;
-import java.nio.channels.SeekableByteChannel;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -62,22 +56,17 @@ <T> void runTestWriteFile(List<T> elems, AvroCoder<T> coder) throws Exception {
     }
 
     // Read back the file.
+    AvroReader<T> reader = new AvroReader<>(filename, null, null, coder, null);
 
-    SeekableByteChannel inChannel =
-        (SeekableByteChannel) IOChannelUtils.getFactory(filename).open(filename);
-
-    SeekableInput seekableInput = new AvroReader.SeekableByteChannelInput(inChannel);
-
-    DatumReader<T> datumReader = new GenericDatumReader<>(coder.getSchema());
+    List<WindowedValue<T>> windowedValues = new ArrayList<>();
+    ReaderTestUtils.readRemainingFromReader(reader, windowedValues);
 
     List<T> actual = new ArrayList<>();
+    ReaderTestUtils.windowedValuesToValues(windowedValues, actual);
+
     List<Long> expectedSizes = new ArrayList<>();
-    try (DataFileReader<T> fileReader = new DataFileReader<>(seekableInput, datumReader)) {
-      while (fileReader.hasNext()) {
-        T next = fileReader.next();
-        actual.add(next);
-        expectedSizes.add((long) CoderUtils.encodeToByteArray(coder, next).length);
-      }
+    for (T value : actual) {
+      expectedSizes.add((long) CoderUtils.encodeToByteArray(coder, value).length);
     }
 
     // Compare the expected and the actual elements.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
index bde1ebda9c57c..85d8bf203b3d4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
@@ -16,7 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readFully;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readRemainingFromReader;
 import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
 import static com.google.cloud.dataflow.sdk.util.Structs.addList;
 import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
@@ -101,7 +101,7 @@ public void testCreateConcatReaderWithOneSubSource() throws Exception {
     }
 
     List<String> actual = new ArrayList<>();
-    readFully(reader, actual);
+    readRemainingFromReader(reader, actual);
 
     assertEquals(actual.size(), 10);
     assertThat(actual, containsInAnyOrder(expected.toArray()));
@@ -124,7 +124,7 @@ public void testCreateConcatReaderWithManySubSources() throws Exception {
     }
 
     List<String> actual = new ArrayList<>();
-    readFully(reader, actual);
+    readRemainingFromReader(reader, actual);
 
     assertEquals(actual.size(), 150);
     assertThat(actual, containsInAnyOrder(expected.toArray()));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
index 603d81966a493..b6dcf546a4d80 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
@@ -17,7 +17,7 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromSplitResult;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readFully;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readRemainingFromReader;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtConcatPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static org.hamcrest.Matchers.containsInAnyOrder;
@@ -238,7 +238,7 @@ private void testReadersOfSizes(int... recordsPerReader) throws Exception {
     List<String> expected = new ArrayList<>();
     ConcatReader<String> concatReader = createConcatReadersOfSizes(expected, recordsPerReader);
     List<String> actual = new ArrayList<>();
-    readFully(concatReader, actual);
+    readRemainingFromReader(concatReader, actual);
     assertThat(actual, containsInAnyOrder(expected.toArray()));
     assertEquals(recordedReaders.size(), recordsPerReader.length);
     assertAllOpenReadersClosed(recordedReaders);
@@ -324,7 +324,7 @@ public void testAReaderFailsAtClose() throws Exception {
         sources);
     List<String> actual = new ArrayList<>();
     try {
-      readFully(concatReader, actual);
+      readRemainingFromReader(concatReader, actual);
       fail();
     } catch (IOException e) {
       assertThat(actual, containsInAnyOrder(expected.toArray()));
@@ -355,7 +355,7 @@ public void testReaderFailsAtRead() throws Exception {
         sources);
     List<String> actual = new ArrayList<>();
     try {
-      readFully(concatReader, actual);
+      readRemainingFromReader(concatReader, actual);
       fail();
     } catch (IOException e) {
       assertThat(actual, containsInAnyOrder(expected.toArray()));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
index 16c7242618c12..fb3e0a2f9289c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
@@ -22,10 +22,12 @@
 import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.api.services.dataflow.model.ConcatPosition;
 import com.google.api.services.dataflow.model.Position;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader.ReaderIterator;
 
 import java.io.IOException;
+import java.util.Collection;
 import java.util.List;
 
 import javax.annotation.Nullable;
@@ -101,18 +103,59 @@ public static Reader.DynamicSplitRequest splitRequestAtFraction(float fraction)
   }
 
   /**
-   * Creates an {@link ReaderIterator} from the given {@code Reader} and reads it to the end.
+   * Creates a {@link ReaderIterator} from the given {@code Reader} and reads it to the end.
    *
    * @param reader {@code Reader} to read from
    * @param results elements that are read are added to this list. Will contain partially read
    * results if an exception is thrown
    * @throws IOException
    */
-  public static <T> void readFully(Reader<T> reader, List<T> results) throws IOException {
+  public static <T> void readRemainingFromReader(Reader<T> reader, List<T> results)
+      throws IOException {
     try (ReaderIterator<T> iterator = reader.iterator()) {
-      while (iterator.hasNext()) {
-        results.add(iterator.next());
-      }
+      readRemainingFromIterator(iterator, results);
+    }
+  }
+
+  /**
+   * Read elements from a {@code Reader} until either the reader is exhausted, or n elements are
+   * read.
+   */
+  public static <T> void readAtMostNElementsFromReader(
+      Reader<T> reader, long numElementsToRead, List<T> results) throws IOException {
+    try (ReaderIterator<T> iterator = reader.iterator()) {
+      readAtMostNElementsFromIterator(iterator, numElementsToRead, results);
+    }
+  }
+
+  /**
+   * Read elements from a {@link ReaderIterator} until either the iterator is exhausted, or n
+   * elements are read.
+   */
+  public static <T> void readAtMostNElementsFromIterator(
+      ReaderIterator<T> iterator, long numElementsToRead, List<T> results) throws IOException {
+    for (long i = 0L; i < numElementsToRead && iterator.hasNext(); i++) {
+      results.add(iterator.next());
+    }
+  }
+
+  /**
+   * Read all remaining elements from a {@link ReaderIterator}.
+   */
+  public static <T> void readRemainingFromIterator(ReaderIterator<T> iterator, List<T> results)
+      throws IOException {
+    while (iterator.hasNext()) {
+      results.add(iterator.next());
+    }
+  }
+
+  /**
+   * Appends all values from a collection of {@code WindowedValue} values to a collection of values.
+   */
+  public static <T> void windowedValuesToValues(
+      Collection<WindowedValue<T>> windowedValues, Collection<T> values) {
+    for (WindowedValue<T> windowedValue : windowedValues) {
+      values.add(windowedValue.getValue());
     }
   }
 }

From c4c664902cdc672c5a42a2ca1b631365c7402b80 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 5 Oct 2015 19:50:50 -0700
Subject: [PATCH 1056/1541] TriggerExample: fix typo in Javadoc

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104723831
---
 .../google/cloud/dataflow/examples/cookbook/TriggerExample.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
index 39ca97d5187b8..b68f048ca84fc 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
@@ -153,7 +153,7 @@ public class TriggerExample {
 
   /**
    * This transform demonstrates using triggers to control when data is produced for each window
-   * Consider an example to understand the results genrerated by each type of trigger.
+   * Consider an example to understand the results generated by each type of trigger.
    * The example uses "freeway" as the key. Event time is the timestamp associated with the data
    * element and processing time is the time when the data element gets processed in the pipeline.
    * For freeway 5, suppose there are 10 elements in the [10:00:00, 10:30:00) window.

From 061bb5126aaf7c68d0105ce411dfd4cd864d3a5e Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Tue, 6 Oct 2015 10:25:16 -0700
Subject: [PATCH 1057/1541] Correct addInput documentation in Combine

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104774810
---
 .../google/cloud/dataflow/sdk/transforms/Combine.java  | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 3ece72ff00ce1..f37c05cc3bc99 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -310,9 +310,10 @@ public static <K, InputT, OutputT> GroupedValues<K, InputT, OutputT> groupedValu
    *   public Accum createAccumulator() {
    *     return new Accum();
    *   }
-   *   public void addInput(Accum accum, Integer input) {
+   *   public Accum addInput(Accum accum, Integer input) {
    *       accum.sum += input;
    *       accum.count++;
+   *       return accum;
    *   }
    *   public Accum mergeAccumulators(Iterable<Accum> accums) {
    *     Accum merged = createAccumulator();
@@ -1034,8 +1035,9 @@ public final OutputT extractOutput(AccumT accumulator) {
    *   public Accum createAccumulator(String key) {
    *     return new Accum();
    *   }
-   *   public void addInput(String key, Accum accum, Integer input) {
+   *   public Accum addInput(String key, Accum accum, Integer input) {
    *       accum.s += "+" + input;
+   *       return accum;
    *   }
    *   public Accum mergeAccumulators(String key, Iterable<Accum> accums) {
    *     Accum merged = new Accum();
@@ -1080,8 +1082,8 @@ public abstract static class KeyedCombineFn<K, InputT, AccumT, OutputT>
     public abstract AccumT createAccumulator(K key);
 
     /**
-     * Adds the given input value to the given accumulator,
-     * modifying the accumulator.
+     * Adds the given input value to the given accumulator, returning the
+     * new accumulator value.
      *
      * <p>For efficiency, the input accumulator may be modified and returned.
      *

From 2945c89ec7a703d99f9c774777656f86116e9d8c Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Tue, 6 Oct 2015 13:49:52 -0700
Subject: [PATCH 1058/1541] Remove Combine.globally from batch View.asList()
 implementation.

It introduces StreamingViewAsList to keep streaming implementation.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104795453
---
 .../sdk/runners/DataflowPipelineRunner.java   | 81 ++++++++++++++++++-
 .../cloud/dataflow/sdk/transforms/View.java   | 55 +------------
 .../dataflow/sdk/util/PCollectionViews.java   | 40 +++++++++
 3 files changed, 120 insertions(+), 56 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 2f9d7b990accd..c67a9631c0937 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -31,6 +31,8 @@
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.ListCoder;
 import com.google.cloud.dataflow.sdk.io.AvroIO;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
@@ -46,6 +48,7 @@
 import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowAggregatorTransforms;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
@@ -247,6 +250,7 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
         .put(View.AsMap.class, StreamingViewAsMap.class)
         .put(View.AsMultimap.class, StreamingViewAsMultimap.class)
         .put(View.AsSingleton.class, StreamingViewAsSingleton.class)
+        .put(View.AsList.class, StreamingViewAsList.class)
         .put(View.AsIterable.class, StreamingViewAsIterable.class)
         .put(Write.Bound.class, StreamingWrite.class)
         .put(PubsubIO.Write.Bound.class, StreamingPubsubIOWrite.class)
@@ -769,7 +773,7 @@ public PCollectionView<Map<K, V>> apply(PCollection<KV<K, V>> input) {
               input.getCoder());
 
       return input
-          .apply(Combine.globally(new View.Concatenate<KV<K, V>>()).withoutDefaults())
+          .apply(Combine.globally(new Concatenate<KV<K, V>>()).withoutDefaults())
           .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, input.getCoder())))
           .apply(View.CreatePCollectionView.<KV<K, V>, Map<K, V>>of(view));
     }
@@ -800,7 +804,7 @@ public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
               input.getCoder());
 
       return input
-          .apply(Combine.globally(new View.Concatenate<KV<K, V>>()).withoutDefaults())
+          .apply(Combine.globally(new Concatenate<KV<K, V>>()).withoutDefaults())
           .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, input.getCoder())))
           .apply(View.CreatePCollectionView.<KV<K, V>, Map<K, Iterable<V>>>of(view));
     }
@@ -811,6 +815,30 @@ protected String getKindString() {
     }
   }
 
+  /**
+   * Specialized implementation for {@link View.AsList} for the Dataflow runner in streaming mode.
+   */
+  private static class StreamingViewAsList<T>
+      extends PTransform<PCollection<T>, PCollectionView<List<T>>> {
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+    public StreamingViewAsList(View.AsList<T> transform) {}
+
+    @Override
+    public PCollectionView<List<T>> apply(PCollection<T> input) {
+      // Using Combine.globally(...).asSingletonView() allows automatic propagation of
+      // the CombineFn's default value as the default value of the SingletonView.
+      return input.apply(Combine.globally(new Concatenate<T>()).asSingletonView());
+    }
+
+    @Override
+    protected String getKindString() {
+      return "StreamingViewAsList";
+    }
+  }
+
   /**
    * Specialized implementation for {@link View.AsIterable} for the Dataflow runner in streaming
    * mode.
@@ -833,7 +861,7 @@ public PCollectionView<Iterable<T>> apply(PCollection<T> input) {
       @SuppressWarnings({"rawtypes", "unchecked"})
       Combine.GloballyAsSingletonView<T, Iterable<T>> concatAndView =
       (Combine.GloballyAsSingletonView)
-      Combine.globally(new View.Concatenate<T>()).asSingletonView();
+      Combine.globally(new Concatenate<T>()).asSingletonView();
       return input.apply(concatAndView);
     }
 
@@ -948,6 +976,53 @@ protected String getKindString() {
     }
   }
 
+  /**
+   * Combiner that combines {@code T}s into a single {@code List<T>} containing all inputs.
+   *
+   * <p>For internal use by {@link StreamingViewAsMap}, {@link StreamingViewAsMultimap},
+   * {@link StreamingViewAsList}, {@link StreamingViewAsIterable}.
+   * They require the input {@link PCollection} fits in memory.
+   * For a large {@link PCollection} this is expected to crash!
+   *
+   * @param <T> the type of elements to concatenate.
+   */
+  private static class Concatenate<T> extends CombineFn<T, List<T>, List<T>> {
+    @Override
+    public List<T> createAccumulator() {
+      return new ArrayList<T>();
+    }
+
+    @Override
+    public List<T> addInput(List<T> accumulator, T input) {
+      accumulator.add(input);
+      return accumulator;
+    }
+
+    @Override
+    public List<T> mergeAccumulators(Iterable<List<T>> accumulators) {
+      List<T> result = createAccumulator();
+      for (List<T> accumulator : accumulators) {
+        result.addAll(accumulator);
+      }
+      return result;
+    }
+
+    @Override
+    public List<T> extractOutput(List<T> accumulator) {
+      return accumulator;
+    }
+
+    @Override
+    public Coder<List<T>> getAccumulatorCoder(CoderRegistry registry, Coder<T> inputCoder) {
+      return ListCoder.of(inputCoder);
+    }
+
+    @Override
+    public Coder<List<T>> getDefaultOutputCoder(CoderRegistry registry, Coder<T> inputCoder) {
+      return ListCoder.of(inputCoder);
+    }
+  }
+
   /**
    * Specialized expansion for unsupported IO transforms that throws an error.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index 67dc1ed8097a4..af7fa20cc7cac 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -16,18 +16,13 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.ListCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.util.PCollectionViews;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 
-import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 
@@ -239,7 +234,8 @@ public void validate(PCollection<T> input) {
 
     @Override
     public PCollectionView<List<T>> apply(PCollection<T> input) {
-      return input.apply(Combine.globally(new Concatenate<T>()).asSingletonView());
+      return input.apply(CreatePCollectionView.<T, List<T>>of(PCollectionViews.listView(
+          input.getPipeline(), input.getWindowingStrategy(), input.getCoder())));
     }
   }
 
@@ -456,51 +452,4 @@ private <ElemT, ViewT> void evaluateTyped(
           });
     }
   }
-
-  /**
-   * Combiner that combines {@code T}s into a single {@code List<T>} containing
-   * all inputs.
-   *
-   * <p>For internal use only by {@link View#asList()}, which views a tiny {@link PCollection}
-   * that fits in memory as a single {@code List}. For a large {@link PCollection} this is
-   * expected to crash!
-   *
-   * @param <T> the type of elements to concatenate.
-   */
-  public static class Concatenate<T> extends CombineFn<T, List<T>, List<T>> {
-    @Override
-    public List<T> createAccumulator() {
-      return new ArrayList<T>();
-    }
-
-    @Override
-    public List<T> addInput(List<T> accumulator, T input) {
-      accumulator.add(input);
-      return accumulator;
-    }
-
-    @Override
-    public List<T> mergeAccumulators(Iterable<List<T>> accumulators) {
-      List<T> result = createAccumulator();
-      for (List<T> accumulator : accumulators) {
-        result.addAll(accumulator);
-      }
-      return result;
-    }
-
-    @Override
-    public List<T> extractOutput(List<T> accumulator) {
-      return accumulator;
-    }
-
-    @Override
-    public Coder<List<T>> getAccumulatorCoder(CoderRegistry registry, Coder<T> inputCoder) {
-      return ListCoder.of(inputCoder);
-    }
-
-    @Override
-    public Coder<List<T>> getDefaultOutputCoder(CoderRegistry registry, Coder<T> inputCoder) {
-      return ListCoder.of(inputCoder);
-    }
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
index 14a6cdf1d149c..c5bf4e877bd73 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
@@ -29,12 +29,14 @@
 import com.google.common.base.Function;
 import com.google.common.base.MoreObjects;
 import com.google.common.collect.HashMultimap;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Multimap;
 
 import java.io.IOException;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
 import java.util.Objects;
@@ -74,6 +76,17 @@ public static <T, W extends BoundedWindow> PCollectionView<Iterable<T>> iterable
     return new IterablePCollectionView<>(pipeline, windowingStrategy, valueCoder);
   }
 
+  /**
+   * Returns a {@code PCollectionView<List<T>>} capable of processing elements encoded using the
+   * provided {@link Coder} and windowed using the provided {@link WindowingStrategy}.
+   */
+  public static <T, W extends BoundedWindow> PCollectionView<List<T>> listView(
+      Pipeline pipeline,
+      WindowingStrategy<?, W> windowingStrategy,
+      Coder<T> valueCoder) {
+    return new ListPCollectionView<>(pipeline, windowingStrategy, valueCoder);
+  }
+
   /**
    * Returns a {@code PCollectionView<Map<K, V>>} capable of processing elements encoded using the
    * provided {@link Coder} and windowed using the provided {@link WindowingStrategy}.
@@ -178,6 +191,33 @@ public T apply(WindowedValue<T> input) {
     }
   }
 
+  /**
+   * Implementation of conversion {@code Iterable<WindowedValue<T>>} to {@code List<T>}.
+   *
+   * <p>For internal use only.
+   *
+   * <p>Instantiate via {@link PCollectionViews#listView}.
+   */
+  private static class ListPCollectionView<T, W extends BoundedWindow>
+      extends PCollectionViewBase<T, List<T>, W> {
+    public ListPCollectionView(
+        Pipeline pipeline, WindowingStrategy<?, W> windowingStrategy, Coder<T> valueCoder) {
+      super(pipeline, windowingStrategy, valueCoder);
+    }
+
+    @Override
+    protected List<T> fromElements(Iterable<WindowedValue<T>> contents) {
+      return ImmutableList.copyOf(
+          Iterables.transform(contents, new Function<WindowedValue<T>, T>() {
+            @SuppressWarnings("unchecked")
+            @Override
+            public T apply(WindowedValue<T> input) {
+              return input.getValue();
+            }
+          }));
+    }
+  }
+
   /**
    * Implementation of conversion {@code Iterable<WindowedValue<KV<K, V>>>}
    * to {@code Map<K, Iterable<V>>}.

From 6555db8bd0b714414abfac945018152b2059f5f6 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 7 Oct 2015 10:08:44 -0700
Subject: [PATCH 1059/1541] Remove extraneous serialVersionUID in MapElements

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104872344
---
 .../com/google/cloud/dataflow/sdk/transforms/MapElements.java    | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapElements.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapElements.java
index c56934f4a5cd0..6d82dc49c0235 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapElements.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapElements.java
@@ -103,7 +103,6 @@ private MapElements(
   @Override
   public PCollection<OutputT> apply(PCollection<InputT> input) {
     return input.apply(ParDo.named("Map").of(new DoFn<InputT, OutputT>() {
-      private static final long serialVersionUID = 0L;
       @Override
       public void processElement(ProcessContext c) {
         c.output(fn.apply(c.element()));

From f9cb8a11490e490dd40248b3fea65816fcd33ac5 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Sun, 12 Jul 2015 10:57:12 -0700
Subject: [PATCH 1060/1541] Improve default output coder error message in
 TypedPValue

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104877406
---
 .../java/com/google/cloud/dataflow/sdk/values/TypedPValue.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
index 128a78303532d..433a6baba38fd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
@@ -183,7 +183,7 @@ private Coder<T> inferCoderOrFail() {
 
     if (inputCoderException != null) {
       messageBuilder
-          .append("\n  Using the Coder from the input PTransform failed: ")
+          .append("\n  Using the default output Coder from the producing PTransform failed: ")
           .append(inputCoderException.getMessage());
     }
 

From 61f8ef83c6a15a1e4ca905ea4d4c6b4bf89ae9d3 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 7 Oct 2015 15:18:09 -0700
Subject: [PATCH 1061/1541] BigQueryIO: minor cleanup

- Replace constructors with well-named static factory methods
- Fix a few Javadoc issues
- Make fields private and final, add accessors, add @Nullable as
  appropriate.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104904988
---
 .../cloud/dataflow/sdk/io/BigQueryIO.java     |   4 +-
 .../sdk/runners/worker/BigQueryReader.java    | 100 ++++++++++--------
 .../runners/worker/BigQueryReaderFactory.java |   9 +-
 .../sdk/util/BigQueryTableInserter.java       |   4 +-
 .../sdk/util/BigQueryTableRowIterator.java    |  30 ++++--
 .../worker/BigQueryReaderFactoryTest.java     |  29 +++--
 .../runners/worker/BigQueryReaderTest.java    |  65 ++++++------
 .../dataflow/sdk/util/BigQueryUtilTest.java   |  10 +-
 8 files changed, 143 insertions(+), 108 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 3dbccd3b03512..cf400103a5a3a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -1307,9 +1307,9 @@ private static void evaluateReadHelper(
     BigQueryReader reader = null;
     if (transform.query != null) {
       LOG.info("Reading from BigQuery query {}", transform.query);
-      reader = new BigQueryReader(client, transform.query, options.getProject());
+      reader = BigQueryReader.fromQuery(transform.query, options.getProject(), client);
     } else {
-      reader = new BigQueryReader(client, transform.table);
+      reader = BigQueryReader.fromTable(transform.table, client);
       LOG.info("Reading from BigQuery table {}", toTableSpec(transform.table));
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
index 505e69d323f5e..40ce8298cd174 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.common.base.Preconditions.checkNotNull;
+
 import com.google.api.services.bigquery.Bigquery;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
@@ -28,6 +30,8 @@
 
 import java.io.IOException;
 
+import javax.annotation.Nullable;
+
 /**
  * A source that reads a BigQuery table and yields TableRow objects.
  *
@@ -37,60 +41,70 @@
  * read by each worker (i.e. the source is used as a side input).
  */
 public class BigQueryReader extends Reader<WindowedValue<TableRow>> {
-  final TableReference tableRef;
-  final BigQueryOptions bigQueryOptions;
-  final Bigquery bigQueryClient;
-  final String query;
-  final String projectId;
-
-  /** Builds a BigQuery source using pipeline options to instantiate a Bigquery client. */
-  public BigQueryReader(BigQueryOptions bigQueryOptions, TableReference tableRef) {
-    // Save pipeline options so that we can construct the BigQuery client on-demand whenever an
-    // iterator gets created.
-    this.bigQueryOptions = bigQueryOptions;
-    this.tableRef = tableRef;
-    this.bigQueryClient = null;
-    this.query = null;
-    this.projectId = null;
-  }
+  @Nullable private final TableReference tableRef;
+  @Nullable private final String query;
+  @Nullable private final String projectId;
+  private final Bigquery bigQueryClient;
 
-  public BigQueryReader(BigQueryOptions bigQueryOptions, String query, String projectId) {
-    this.bigQueryOptions = bigQueryOptions;
-    this.tableRef = null;
-    this.bigQueryClient = null;
+  private BigQueryReader(TableReference tableRef, String query,  String projectId,
+      Bigquery bigQueryClient) {
+    this.tableRef = tableRef;
     this.query = query;
     this.projectId = projectId;
+    this.bigQueryClient = checkNotNull(bigQueryClient, "bigQueryClient");
   }
 
-  /** Builds a BigQueryReader directly using a BigQuery client. */
-  public BigQueryReader(Bigquery bigQueryClient, TableReference tableRef) {
-    this.bigQueryOptions = null;
-    this.tableRef = tableRef;
-    this.bigQueryClient = bigQueryClient;
-    this.query = null;
-    this.projectId = null;
+  /**
+   * Returns a {@code BigQueryReader} that uses the specified client to read from the specified
+   * table.
+   */
+  public static BigQueryReader fromTable(TableReference tableRef, Bigquery bigQueryClient) {
+    return new BigQueryReader(tableRef, null, null, bigQueryClient);
   }
 
-  public BigQueryReader(Bigquery bigQueryClient, String query, String projectId) {
-    this.bigQueryOptions = null;
-    this.tableRef = null;
-    this.bigQueryClient = bigQueryClient;
-    this.query = query;
-    this.projectId = projectId;
+  /**
+   * Returns a {@code BigQueryReader} that reads from the specified table. The {@link Bigquery}
+   * client is constructed at runtime from the specified options.
+   */
+  public static BigQueryReader fromTableWithOptions(
+      TableReference tableRef, BigQueryOptions bigQueryOptions) {
+    Bigquery client = Transport.newBigQueryClient(bigQueryOptions).build();
+    return new BigQueryReader(tableRef, null, null, client);
+  }
+
+  /**
+   * Returns a {@code BigQueryReader} that uses the specified client to read the results from
+   * executing the specified query in the specified project.
+   */
+  public static BigQueryReader fromQuery(String query, String projectId, Bigquery bigQueryClient) {
+    return new BigQueryReader(null, query, projectId, bigQueryClient);
+  }
+
+  /**
+   * Returns a {@code BigQueryReader} that reads the results from executing the specified query in
+   * the specified project. The {@link Bigquery} client is constructed at runtime from the
+   * specified options.
+   */
+  public static BigQueryReader fromQueryWithOptions(
+      String query, String projectId, BigQueryOptions bigQueryOptions) {
+    Bigquery client = Transport.newBigQueryClient(bigQueryOptions).build();
+    return new BigQueryReader(null, query, projectId, client);
+  }
+
+  public TableReference getTableRef() {
+    return tableRef;
+  }
+
+  public String getQuery() {
+    return query;
   }
 
   @Override
   public ReaderIterator<WindowedValue<TableRow>> iterator() throws IOException {
     if (tableRef != null) {
-      return new BigQueryReaderIterator(
-          bigQueryClient != null
-              ? bigQueryClient : Transport.newBigQueryClient(bigQueryOptions).build(),
-          tableRef);
+      return new BigQueryReaderIterator(bigQueryClient, tableRef);
     } else {
-      return new BigQueryReaderIterator(
-          bigQueryClient != null
-              ? bigQueryClient : Transport.newBigQueryClient(bigQueryOptions).build(),
-          query, projectId);
+      return new BigQueryReaderIterator(bigQueryClient, query, projectId);
     }
   }
 
@@ -101,11 +115,11 @@ class BigQueryReaderIterator extends AbstractBoundedReaderIterator<WindowedValue
     private BigQueryTableRowIterator rowIterator;
 
     public BigQueryReaderIterator(Bigquery bigQueryClient, TableReference tableRef) {
-      rowIterator = new BigQueryTableRowIterator(bigQueryClient, tableRef);
+      rowIterator = BigQueryTableRowIterator.of(bigQueryClient, tableRef);
     }
 
     public BigQueryReaderIterator(Bigquery bigQueryClient, String query, String projectId) {
-      rowIterator = new BigQueryTableRowIterator(bigQueryClient, query, projectId);
+      rowIterator = BigQueryTableRowIterator.of(bigQueryClient, query, projectId);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
index 6a3263a248a9c..bbf23f23df1cc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
@@ -56,17 +56,18 @@ public BigQueryReader createTyped(
     String query = getString(spec, PropertyNames.BIGQUERY_QUERY, null);
     if (query != null) {
       GcpOptions gcpOptions = options.as(GcpOptions.class);
-      return new BigQueryReader(options.as(BigQueryOptions.class), query, gcpOptions.getProject());
+      return BigQueryReader.fromQueryWithOptions(
+          query, gcpOptions.getProject(), options.as(BigQueryOptions.class));
     }
 
     String tableId = getString(spec, PropertyNames.BIGQUERY_TABLE, null);
     if (tableId != null) {
-      return new BigQueryReader(
-          options.as(BigQueryOptions.class),
+      return BigQueryReader.fromTableWithOptions(
           new TableReference()
               .setProjectId(getString(spec, PropertyNames.BIGQUERY_PROJECT))
               .setDatasetId(getString(spec, PropertyNames.BIGQUERY_DATASET))
-              .setTableId(tableId));
+              .setTableId(tableId),
+          options.as(BigQueryOptions.class));
     }
 
     throw new IllegalArgumentException("Either a table or a query has to be specified");
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
index 89936f4a9e465..20ba789d91934 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
@@ -126,7 +126,7 @@ public BigQueryTableInserter(Bigquery client, TableReference defaultRef, int max
   /**
    * Insert all rows from the given list.
    *
-   * @deprecated replaced by {@link #insertAll(TableReference, List<TableRow>)}
+   * @deprecated replaced by {@link #insertAll(TableReference, List)}
    */
   @Deprecated
   public void insertAll(List<TableRow> rowList) throws IOException {
@@ -136,7 +136,7 @@ public void insertAll(List<TableRow> rowList) throws IOException {
   /**
    * Insert all rows from the given list using specified insertIds if not null.
    *
-   * @deprecated replaced by {@link #insertAll(TableReference, List<TableRow>, List<String>)}
+   * @deprecated replaced by {@link #insertAll(TableReference, List, List)}
    */
   @Deprecated
   public void insertAll(List<TableRow> rowList,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
index bea289eb846ed..aa4b95068a9dd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static com.google.common.base.Preconditions.checkNotNull;
+
 import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
 import com.google.api.client.util.BackOff;
 import com.google.api.client.util.BackOffUtils;
@@ -84,19 +86,29 @@ public class BigQueryTableRowIterator implements Iterator<TableRow>, Closeable {
   // Temporary table used to store query results.
   private String temporaryTableId = null;
 
-  public BigQueryTableRowIterator(Bigquery client, TableReference ref) {
-    this.client = client;
+  private BigQueryTableRowIterator(
+      Bigquery client, TableReference ref, String query, String projectId) {
+    this.client = checkNotNull(client, "client");
     this.ref = ref;
-    this.query = null;
-    this.projectId = ref.getProjectId();
-  }
-
-  public BigQueryTableRowIterator(Bigquery client, String query, String projectId) {
-    this.client = client;
-    this.ref = null;
     this.query = query;
     this.projectId = projectId;
+  }
+
+  /**
+   * Constructs a {@code BigQueryTableRowIterator} that uses the specified client to read from
+   * the specified table.
+   */
+  public static BigQueryTableRowIterator of(Bigquery client, TableReference ref) {
+    checkNotNull(ref, "ref");
+    return new BigQueryTableRowIterator(client, ref, null, ref.getProjectId());
+  }
 
+  /**
+   * Constructs a {@code BigQueryTableRowIterator} that uses the specified client to read from
+   * the results of executing the specified client in the specified project.
+   */
+  public static BigQueryTableRowIterator of(Bigquery client, String query, String projectId) {
+    return new BigQueryTableRowIterator(client, null, query, projectId);
   }
 
   @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
index 96a3affb15455..bd00dfbc95681 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
@@ -18,15 +18,19 @@
 
 import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
 import static com.google.cloud.dataflow.sdk.util.Structs.addString;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
 
+import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.dataflow.model.Source;
+import com.google.cloud.dataflow.sdk.options.GcpOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.TestCredential;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
 import org.hamcrest.core.IsInstanceOf;
-import org.junit.Assert;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -47,17 +51,21 @@ void runTestCreateBigQueryReaderFromTable(
     cloudSource.setSpec(spec);
     cloudSource.setCodec(encoding);
 
+    GcpOptions options = PipelineOptionsFactory.create().as(GcpOptions.class);
+    options.setGcpCredential(new TestCredential());
+
     Reader<?> reader = ReaderFactory.Registry.defaultRegistry().create(
         cloudSource,
-        PipelineOptionsFactory.create(),
+        options,
         DirectModeExecutionContext.create(),
         null,
         null);
-    Assert.assertThat(reader, new IsInstanceOf(BigQueryReader.class));
+    assertThat(reader, new IsInstanceOf(BigQueryReader.class));
     BigQueryReader bigQueryReader = (BigQueryReader) reader;
-    Assert.assertEquals(project, bigQueryReader.tableRef.getProjectId());
-    Assert.assertEquals(dataset, bigQueryReader.tableRef.getDatasetId());
-    Assert.assertEquals(table, bigQueryReader.tableRef.getTableId());
+    TableReference tableRef = bigQueryReader.getTableRef();
+    assertEquals(project, tableRef.getProjectId());
+    assertEquals(dataset, tableRef.getDatasetId());
+    assertEquals(table, tableRef.getTableId());
   }
 
   void runTestCreateBigQueryReaderFromQuery(String query, CloudObject encoding) throws Exception {
@@ -68,16 +76,19 @@ void runTestCreateBigQueryReaderFromQuery(String query, CloudObject encoding) th
     cloudSource.setSpec(spec);
     cloudSource.setCodec(encoding);
 
+    GcpOptions options = PipelineOptionsFactory.create().as(GcpOptions.class);
+    options.setGcpCredential(new TestCredential());
+
     Reader<?> reader = ReaderFactory.Registry.defaultRegistry().create(
         cloudSource,
-        PipelineOptionsFactory.create(),
+        options,
         DirectModeExecutionContext.create(),
         null,
         null);
 
-    Assert.assertThat(reader, new IsInstanceOf(BigQueryReader.class));
+    assertThat(reader, new IsInstanceOf(BigQueryReader.class));
     BigQueryReader bigQueryReader = (BigQueryReader) reader;
-    Assert.assertEquals(query, bigQueryReader.query);
+    assertEquals(query, bigQueryReader.getQuery());
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
index 754592386c71a..99ef958c51d0c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
 import static org.mockito.Matchers.contains;
@@ -41,7 +42,6 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.common.collect.Lists;
 
-import org.junit.Assert;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -526,8 +526,6 @@ public class BigQueryReaderTest {
   @Mock
   private MockHttpTransport mockTransport;
 
-  private Bigquery bigQueryClient;
-
   private void verifyDatasetInsert() throws IOException {
     verify(mockTransport, times(1)).buildRequest(eq("POST"), endsWith(QUERY_DATASET_INSERT_PATH));
   }
@@ -562,8 +560,8 @@ private void verifyDatasetDelete() throws IOException {
   public void testReadQuery() throws Exception {
     setUpMockQuery();
 
-    bigQueryClient = new Bigquery(mockTransport, Transport.getJsonFactory(), null);
-    BigQueryReader reader = new BigQueryReader(bigQueryClient, QUERY, PROJECT_ID);
+    Bigquery bigQueryClient = new Bigquery(mockTransport, Transport.getJsonFactory(), null);
+    BigQueryReader reader = BigQueryReader.fromQuery(QUERY, PROJECT_ID, bigQueryClient);
     Reader.ReaderIterator<WindowedValue<TableRow>> iterator = reader.iterator();
 
     assertTrue(iterator.hasNext());
@@ -733,53 +731,52 @@ private void verifyTabledataList() throws IOException {
   public void testReadTable() throws Exception {
     setUpMockTable();
 
-    bigQueryClient = new Bigquery(mockTransport, Transport.getJsonFactory(), null);
-
-    BigQueryReader reader = new BigQueryReader(
-        bigQueryClient,
-        new TableReference().setProjectId(PROJECT_ID).setDatasetId(DATASET).setTableId(TABLE));
+    Bigquery bigQueryClient = new Bigquery(mockTransport, Transport.getJsonFactory(), null);
+    BigQueryReader reader = BigQueryReader.fromTable(
+        new TableReference().setProjectId(PROJECT_ID).setDatasetId(DATASET).setTableId(TABLE),
+        bigQueryClient);
 
     Reader.ReaderIterator<WindowedValue<TableRow>> iterator = reader.iterator();
-    Assert.assertTrue(iterator.hasNext());
+    assertTrue(iterator.hasNext());
 
     TableRow row = iterator.next().getValue();
 
-    Assert.assertEquals("Arthur", row.get("name"));
-    Assert.assertEquals("42", row.get("integer"));
-    Assert.assertEquals(3.14159, row.get("float"));
-    Assert.assertEquals(false, row.get("bool"));
+    assertEquals("Arthur", row.get("name"));
+    assertEquals("42", row.get("integer"));
+    assertEquals(3.14159, row.get("float"));
+    assertEquals(false, row.get("bool"));
 
     TableRow nested = (TableRow) row.get("record");
-    Assert.assertEquals("43", nested.get("nestedInt"));
-    Assert.assertEquals(4.14159, nested.get("nestedFloat"));
+    assertEquals("43", nested.get("nestedInt"));
+    assertEquals(4.14159, nested.get("nestedFloat"));
 
-    Assert.assertEquals(Lists.newArrayList("42", "43", "79"), row.get("repeatedInt"));
-    Assert.assertTrue(((List<?>) row.get("repeatedFloat")).isEmpty());
-    Assert.assertTrue(((List<?>) row.get("repeatedRecord")).isEmpty());
+    assertEquals(Lists.newArrayList("42", "43", "79"), row.get("repeatedInt"));
+    assertTrue(((List<?>) row.get("repeatedFloat")).isEmpty());
+    assertTrue(((List<?>) row.get("repeatedRecord")).isEmpty());
 
     row = iterator.next().getValue();
 
-    Assert.assertEquals("Allison", row.get("name"));
-    Assert.assertEquals("79", row.get("integer"));
-    Assert.assertEquals(2.71828, row.get("float"));
-    Assert.assertEquals(true, row.get("bool"));
+    assertEquals("Allison", row.get("name"));
+    assertEquals("79", row.get("integer"));
+    assertEquals(2.71828, row.get("float"));
+    assertEquals(true, row.get("bool"));
 
     nested = (TableRow) row.get("record");
-    Assert.assertEquals("80", nested.get("nestedInt"));
-    Assert.assertEquals(3.71828, nested.get("nestedFloat"));
+    assertEquals("80", nested.get("nestedInt"));
+    assertEquals(3.71828, nested.get("nestedFloat"));
 
-    Assert.assertTrue(((List<?>) row.get("repeatedInt")).isEmpty());
-    Assert.assertEquals(Lists.newArrayList(3.14159, 2.71828), row.get("repeatedFloat"));
+    assertTrue(((List<?>) row.get("repeatedInt")).isEmpty());
+    assertEquals(Lists.newArrayList(3.14159, 2.71828), row.get("repeatedFloat"));
 
     @SuppressWarnings("unchecked")
     List<TableRow> nestedRecords = (List<TableRow>) row.get("repeatedRecord");
-    Assert.assertEquals(2, nestedRecords.size());
-    Assert.assertEquals("hello", nestedRecords.get(0).get("string"));
-    Assert.assertEquals(true, nestedRecords.get(0).get("bool"));
-    Assert.assertEquals("world", nestedRecords.get(1).get("string"));
-    Assert.assertEquals(false, nestedRecords.get(1).get("bool"));
+    assertEquals(2, nestedRecords.size());
+    assertEquals("hello", nestedRecords.get(0).get("string"));
+    assertEquals(true, nestedRecords.get(0).get("bool"));
+    assertEquals("world", nestedRecords.get(1).get("string"));
+    assertEquals(false, nestedRecords.get(1).get("bool"));
 
-    Assert.assertFalse(iterator.hasNext());
+    assertFalse(iterator.hasNext());
 
     verifyTableGet();
     verifyTabledataList();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
index 343caf8261e6a..89c1288e5eb0b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
@@ -189,7 +189,7 @@ public void testReadWithTime() throws IOException {
     TableDataList dataList = rawDataList(rawRow("Arthur", "1.430397296789E9", 42));
     onTableList(dataList);
 
-    try (BigQueryTableRowIterator iterator = new BigQueryTableRowIterator(
+    try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.of(
         mockClient,
         BigQueryIO.parseTableSpec("project:dataset.table"))) {
 
@@ -230,7 +230,7 @@ public void testRead() throws IOException {
     TableDataList dataList = rawDataList(rawRow("Arthur", 42));
     onTableList(dataList);
 
-    try (BigQueryTableRowIterator iterator = new BigQueryTableRowIterator(
+    try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.of(
         mockClient,
         BigQueryIO.parseTableSpec("project:dataset.table"))) {
 
@@ -260,7 +260,7 @@ public void testReadEmpty() throws IOException {
         .setTotalRows(0L);
     onTableList(dataList);
 
-    try (BigQueryTableRowIterator iterator = new BigQueryTableRowIterator(
+    try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.of(
         mockClient,
         BigQueryIO.parseTableSpec("project:dataset.table"))) {
 
@@ -288,7 +288,7 @@ public void testReadMultiPage() throws IOException {
         .thenReturn(page1)
         .thenReturn(page2);
 
-    try (BigQueryTableRowIterator iterator = new BigQueryTableRowIterator(
+    try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.of(
         mockClient,
         BigQueryIO.parseTableSpec("project:dataset.table"))) {
 
@@ -321,7 +321,7 @@ public void testReadOpenFailure() throws IOException {
     when(mockTablesGet.execute())
         .thenThrow(new IOException("No such table"));
 
-    try (BigQueryTableRowIterator iterator = new BigQueryTableRowIterator(
+    try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.of(
         mockClient,
         BigQueryIO.parseTableSpec("project:dataset.table"))) {
       try {

From 1099d5b74e601e1d98323141f105de7cf8858c10 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 7 Oct 2015 16:37:49 -0700
Subject: [PATCH 1062/1541] Expand error message for erased type variables in
 CoderRegistry

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104912339
---
 .../dataflow/sdk/coders/CoderRegistry.java    |  7 ++++--
 .../sdk/coders/CoderRegistryTest.java         | 23 ++++++++++++++++++-
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 962ee2da8ddd8..ec56820e5aa6c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -700,8 +700,11 @@ private Coder<?> getDefaultCoder(Type type, Map<Type, Coder<?>> typeCoderBinding
       return getDefaultCoder((ParameterizedType) type, typeCoderBindings);
     } else if (type instanceof TypeVariable || type instanceof WildcardType) {
       // No default coder for an unknown generic type.
-      throw new CannotProvideCoderException("Cannot provide a coder for type variable "
-          + type + " because the actual type is unknown due to erasure.",
+      throw new CannotProvideCoderException(
+          String.format("Cannot provide a coder for type variable %s"
+          + " (declared by %s) because the actual type is unknown due to erasure.",
+          type,
+          ((TypeVariable<?>) type).getGenericDeclaration()),
           ReasonCode.TYPE_ERASURE);
     } else {
       throw new RuntimeException(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index 0476afa98743f..5cd99a134afa3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import static org.hamcrest.Matchers.allOf;
+import static org.hamcrest.Matchers.containsString;
 import static org.junit.Assert.assertEquals;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
@@ -347,6 +349,25 @@ public void processElement(ProcessContext c) { }
     }
   }
 
+  /**
+   * Tests that the error message for a type variable includes a mention of where the
+   * type variable was declared.
+   */
+  @Test
+  public void testTypeVariableErrorMessage() throws Exception {
+    CoderRegistry registry = new CoderRegistry();
+
+    thrown.expect(CannotProvideCoderException.class);
+    thrown.expectMessage(allOf(
+        containsString("TestGenericT"),
+        containsString("erasure"),
+        containsString("com.google.cloud.dataflow.sdk.coders.CoderRegistryTest$TestGenericClass")));
+    registry.getDefaultCoder(TypeDescriptor.of(
+        TestGenericClass.class.getTypeParameters()[0]));
+  }
+
+  private static class TestGenericClass<TestGenericT> { }
+
   /**
    * In-context test that assures the functionality tested in
    * {@link #testDefaultCoderAnnotationGeneric} is invoked in the right ways.
@@ -402,6 +423,7 @@ public static MyValueCoder of() {
       return INSTANCE;
     }
 
+    @SuppressWarnings("unused")
     public static List<Object> getInstanceComponents(
         @SuppressWarnings("unused") MyValue exampleValue) {
       return Arrays.asList();
@@ -471,5 +493,4 @@ private static class MySerializableGeneric<T extends Serializable> implements Se
     @SuppressWarnings("unused")
     private T foo;
   }
-
 }

From 98ab43a7f33c2a7974aa61720204f45b644e5d00 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 7 Oct 2015 21:53:21 -0700
Subject: [PATCH 1063/1541] Do not overwrite TypeDescriptor of PCollection in
 PCollectionTuple

The previous behavior was added based on an assumption that a TupleTag
indexing into a PCollectionTuple would probably have better type information
than was already present on a PCollection. This assumption is unwarranted
if the PCollection received a particular TypeDescriptor prior to being
added to a PCollectionTuple.

In particular, MapElements sets a specific type descriptor which is
overwritten when the PCollection is put into a PCollectionTuple during
evaluation by the DirectPipelineRunner.

The assumption is also not needed -- the affected use case is that of
a multi-output ParDo, where the outputs are primitives, and these
follow a separate code path.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104930665
---
 .../com/google/cloud/dataflow/sdk/values/PCollectionTuple.java | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
index 88c332bef9908..04926392f98c0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
@@ -111,9 +111,6 @@ public <T> PCollectionTuple and(TupleTag<T> tag, PCollection<T> pc) {
           "PCollections come from different Pipelines");
     }
 
-    // The TypeDescriptor<T> in tag will often have good
-    // reflective information about T
-    pc.setTypeDescriptorInternal(tag.getTypeDescriptor());
     return new PCollectionTuple(pipeline,
         new ImmutableMap.Builder<TupleTag<?>, PCollection<?>>()
             .putAll(pcollectionMap)

From c9056b05dbfec5b028b84ce44213ce50248a9b25 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 7 Oct 2015 23:06:11 -0700
Subject: [PATCH 1064/1541] FlatMapElements: javadoc fixes

- two broken tags missing }
- one broken link to refactored java8 function

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104933857
---
 .../cloud/dataflow/sdk/transforms/FlatMapElements.java      | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElements.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElements.java
index 128650f1d2710..b0837c46ff9eb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElements.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElements.java
@@ -29,7 +29,7 @@ public class FlatMapElements<InputT, OutputT>
 extends PTransform<PCollection<InputT>, PCollection<OutputT>> {
   /**
    * For a {@code SerializableFunction<InputT, ? extends Iterable<OutputT>>} {@code fn},
-   * returns a {@link PTransform that applies {@code fn} to every element of the input
+   * returns a {@link PTransform} that applies {@code fn} to every element of the input
    * {@code PCollection<InputT>} and outputs all of the elements to the output
    * {@code PCollection<OutputT>}.
    *
@@ -50,7 +50,7 @@ public class FlatMapElements<InputT, OutputT>
 
   /**
    * For a {@code SimpleFunction<InputT, ? extends Iterable<OutputT>>} {@code fn},
-   * return a {@link PTransform that applies {@code fn} to every element of the input
+   * return a {@link PTransform} that applies {@code fn} to every element of the input
    * {@code PCollection<InputT>} and outputs all of the elements to the output
    * {@code PCollection<OutputT>}.
    *
@@ -68,7 +68,7 @@ public class FlatMapElements<InputT, OutputT>
    *     });
    * }</pre>
    *
-   * <p>To use a Java 8 lambda, see {@link #of(SerializableFunction, TypeDescriptor)}.
+   * <p>To use a Java 8 lambda, see {@link #via(SerializableFunction)}.
    */
   public static <InputT, OutputT> FlatMapElements<InputT, OutputT>
   via(SimpleFunction<InputT, ? extends Iterable<OutputT>> fn) {

From 27a68e8602d742a52495a3f48c0656445fbdf9b8 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 8 Oct 2015 14:25:58 -0700
Subject: [PATCH 1065/1541] Use MapElements in WordCount example

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=104998220
---
 .../google/cloud/dataflow/examples/WordCount.java    | 12 +++++++-----
 .../examples/cookbook/DatastoreWordCount.java        |  3 ++-
 .../cloud/dataflow/examples/WordCountTest.java       |  4 ++--
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
index 33719f8b3e742..1086106f04980 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
@@ -27,8 +27,10 @@
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Count;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SimpleFunction;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -116,11 +118,11 @@ public void processElement(ProcessContext c) {
     }
   }
 
-  /** A DoFn that converts a Word and Count into a printable string. */
-  public static class FormatAsTextFn extends DoFn<KV<String, Long>, String> {
+  /** A SimpleFunction that converts a Word and Count into a printable string. */
+  public static class FormatAsTextFn extends SimpleFunction<KV<String, Long>, String> {
     @Override
-    public void processElement(ProcessContext c) {
-      c.output(c.element().getKey() + ": " + c.element().getValue());
+    public String apply(KV<String, Long> input) {
+      return input.getKey() + ": " + input.getValue();
     }
   }
 
@@ -196,7 +198,7 @@ public static void main(String[] args) {
     // static FormatAsTextFn() to the ParDo transform.
     p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
      .apply(new CountWords())
-     .apply(ParDo.of(new FormatAsTextFn()))
+     .apply(MapElements.via(new FormatAsTextFn()))
      .apply(TextIO.Write.named("WriteCounts").to(options.getOutput()));
 
     p.run();
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
index d9001229be5df..3c7f045cef2f9 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
@@ -33,6 +33,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.options.Validation;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 
 import java.util.Map;
@@ -198,7 +199,7 @@ public static void readDataFromDatastore(Options options) {
     p.apply("ReadShakespeareFromDatastore", Read.from(source))
         .apply("StringifyEntity", ParDo.of(new GetContentFn()))
         .apply("CountWords", new WordCount.CountWords())
-        .apply("PrintWordCount", ParDo.of(new WordCount.FormatAsTextFn()))
+        .apply("PrintWordCount", MapElements.via(new WordCount.FormatAsTextFn()))
         .apply("WriteLines", TextIO.Write.to(options.getOutput())
             .withNumShards(options.getNumShards()));
     p.run();
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
index 527cdc3a84b56..4542c48540994 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
@@ -26,7 +26,7 @@
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
 import org.hamcrest.CoreMatchers;
@@ -77,7 +77,7 @@ public void testCountWords() throws Exception {
     PCollection<String> input = p.apply(Create.of(WORDS).withCoder(StringUtf8Coder.of()));
 
     PCollection<String> output = input.apply(new CountWords())
-      .apply(ParDo.of(new FormatAsTextFn()));
+      .apply(MapElements.via(new FormatAsTextFn()));
 
     DataflowAssert.that(output).containsInAnyOrder(COUNTS_ARRAY);
     p.run();

From 2c6185ee27e2b61092f2d663757ad5900bda8fb6 Mon Sep 17 00:00:00 2001
From: ccy <ccy@google.com>
Date: Thu, 8 Oct 2015 16:02:51 -0700
Subject: [PATCH 1066/1541] Fix comment regarding condition for AfterAll
 finishing

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105007693
---
 .../google/cloud/dataflow/sdk/transforms/windowing/Trigger.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 1bb261b407886..94f062d9441e7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -68,7 +68,7 @@
  *   <li> {@link AfterFirst#of} to create a trigger that fires after at least one of its arguments
  *   fires. An {@link AfterFirst} trigger finishes after it fires once.
  *   <li> {@link AfterAll#of} to create a trigger that fires after all least one of its arguments
- *   have fired at least once. An {@link AfterFirst} trigger finishes after it fires once.
+ *   have fired at least once. An {@link AfterAll} trigger finishes after it fires once.
  * </ul>
  *
  * <p>Each trigger tree is instantiated per-key and per-window. Every trigger in the tree is in one

From d6d067ec54cecd297cd86c2a68acfa4dd5f553eb Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 8 Oct 2015 16:07:09 -0700
Subject: [PATCH 1067/1541] Test simple Combine with Java 8 lambdas and method
 references

Combine, as written, accepts the functional interface SerializableFunction.
This change adds Java 8 tests to verify that this invocation operates
correctly when provided an anonymous lambda function or method reference.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105008164
---
 .../sdk/transforms/CombineJava8Test.java      | 136 ++++++++++++++++++
 1 file changed, 136 insertions(+)
 create mode 100644 sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/CombineJava8Test.java

diff --git a/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/CombineJava8Test.java b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/CombineJava8Test.java
new file mode 100644
index 0000000000000..f3f04c3ae7497
--- /dev/null
+++ b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/CombineJava8Test.java
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+
+/**
+ * Java 8 Tests for {@link Combine}.
+ */
+@RunWith(JUnit4.class)
+@SuppressWarnings("serial")
+public class CombineJava8Test implements Serializable {
+
+  @Rule
+  public transient ExpectedException thrown = ExpectedException.none();
+
+  /**
+   * Class for use in testing use of Java 8 method references.
+   */
+  private static class Summer implements Serializable {
+    public int sum(Iterable<Integer> integers) {
+      int sum = 0;
+      for (int i : integers) {
+        sum += i;
+      }
+      return sum;
+    }
+  }
+
+  /**
+   * Tests creation of a global {@link Combine} via Java 8 lambda.
+   */
+  @Test
+  public void testCombineGloballyLambda() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3, 4))
+        .apply(Combine.globally(integers -> {
+          int sum = 0;
+          for (int i : integers) {
+            sum += i;
+          }
+          return sum;
+        }));
+
+    DataflowAssert.that(output).containsInAnyOrder(10);
+    pipeline.run();
+  }
+
+  /**
+   * Tests creation of a global {@link Combine} via a Java 8 method reference.
+   */
+  @Test
+  public void testCombineGloballyInstanceMethodReference() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3, 4))
+        .apply(Combine.globally(new Summer()::sum));
+
+    DataflowAssert.that(output).containsInAnyOrder(10);
+    pipeline.run();
+  }
+
+  /**
+   * Tests creation of a per-key {@link Combine} via a Java 8 lambda.
+   */
+  @Test
+  public void testCombinePerKeyLambda() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> output = pipeline
+        .apply(Create.of(KV.of("a", 1), KV.of("b", 2), KV.of("a", 3), KV.of("c", 4)))
+        .apply(Combine.perKey(integers -> {
+          int sum = 0;
+          for (int i : integers) {
+            sum += i;
+          }
+          return sum;
+        }));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("a", 4),
+        KV.of("b", 2),
+        KV.of("c", 4));
+    pipeline.run();
+  }
+
+  /**
+   * Tests creation of a per-key {@link Combine} via a Java 8 method reference.
+   */
+  @Test
+  public void testCombinePerKeyInstanceMethodReference() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> output = pipeline
+        .apply(Create.of(KV.of("a", 1), KV.of("b", 2), KV.of("a", 3), KV.of("c", 4)))
+        .apply(Combine.perKey(new Summer()::sum));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("a", 4),
+        KV.of("b", 2),
+        KV.of("c", 4));
+    pipeline.run();
+  }
+}

From f5e3b8ebb982905b3978795485659d37dc772959 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 8 Oct 2015 19:54:00 -0700
Subject: [PATCH 1068/1541] BigQueryTableRowIterator: make getF/getV work
 correctly

BigQuery's recommended API is that users should use the getF
and getV commands to read data from TableRows, but we have
been breaking that API and supporting a Map<String, Object>
interface instead. Switch to make the code support both
interfaces, with the eventual goal of deprecating the
Map<String, Object> version.

Includes a little bit of cleanup to internal function names
and arguments.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105022905
---
 .../sdk/runners/worker/BigQueryReader.java    |  15 +-
 .../sdk/util/BigQueryTableRowIterator.java    | 142 +++++++++++++-----
 .../runners/worker/BigQueryReaderTest.java    | 135 ++++++++++++++++-
 .../dataflow/sdk/util/BigQueryUtilTest.java   |  30 ++--
 4 files changed, 259 insertions(+), 63 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
index 40ce8298cd174..5771f9088a41f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
@@ -102,24 +102,25 @@ public String getQuery() {
   @Override
   public ReaderIterator<WindowedValue<TableRow>> iterator() throws IOException {
     if (tableRef != null) {
-      return new BigQueryReaderIterator(bigQueryClient, tableRef);
+      return new BigQueryReaderIterator(tableRef, bigQueryClient);
     } else {
-      return new BigQueryReaderIterator(bigQueryClient, query, projectId);
+      return new BigQueryReaderIterator(query, projectId, bigQueryClient);
     }
   }
 
   /**
    * A ReaderIterator that yields TableRow objects for each row of a BigQuery table.
    */
-  class BigQueryReaderIterator extends AbstractBoundedReaderIterator<WindowedValue<TableRow>> {
+  private static class BigQueryReaderIterator
+      extends AbstractBoundedReaderIterator<WindowedValue<TableRow>> {
     private BigQueryTableRowIterator rowIterator;
 
-    public BigQueryReaderIterator(Bigquery bigQueryClient, TableReference tableRef) {
-      rowIterator = BigQueryTableRowIterator.of(bigQueryClient, tableRef);
+    public BigQueryReaderIterator(TableReference tableRef, Bigquery bigQueryClient) {
+      rowIterator = BigQueryTableRowIterator.fromTable(tableRef, bigQueryClient);
     }
 
-    public BigQueryReaderIterator(Bigquery bigQueryClient, String query, String projectId) {
-      rowIterator = BigQueryTableRowIterator.of(bigQueryClient, query, projectId);
+    public BigQueryReaderIterator(String query, String projectId, Bigquery bigQueryClient) {
+      rowIterator = BigQueryTableRowIterator.fromQuery(query, projectId, bigQueryClient);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
index aa4b95068a9dd..349031c1464cf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
 
 import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
 import com.google.api.client.util.BackOff;
@@ -32,12 +33,13 @@
 import com.google.api.services.bigquery.model.JobConfigurationQuery;
 import com.google.api.services.bigquery.model.JobReference;
 import com.google.api.services.bigquery.model.Table;
+import com.google.api.services.bigquery.model.TableCell;
 import com.google.api.services.bigquery.model.TableDataList;
 import com.google.api.services.bigquery.model.TableFieldSchema;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
-import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
 
 import org.joda.time.Duration;
 import org.joda.time.format.DateTimeFormat;
@@ -47,7 +49,6 @@
 
 import java.io.Closeable;
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
@@ -56,16 +57,18 @@
 import java.util.Objects;
 import java.util.Random;
 
+import javax.annotation.Nullable;
+
 /**
  * Iterates over all rows in a table.
  */
 public class BigQueryTableRowIterator implements Iterator<TableRow>, Closeable {
   private static final Logger LOG = LoggerFactory.getLogger(BigQueryTableRowIterator.class);
 
+  @Nullable private TableReference ref;
+  @Nullable private final String projectId;
+  @Nullable private TableSchema schema;
   private final Bigquery client;
-  private TableReference ref;
-  private final String projectId;
-  private TableSchema schema;
   private String pageToken;
   private Iterator<TableRow> rowIterator;
   // Set true when the final page is seen from the service.
@@ -87,28 +90,33 @@ public class BigQueryTableRowIterator implements Iterator<TableRow>, Closeable {
   private String temporaryTableId = null;
 
   private BigQueryTableRowIterator(
-      Bigquery client, TableReference ref, String query, String projectId) {
-    this.client = checkNotNull(client, "client");
+      @Nullable TableReference ref, @Nullable String query, @Nullable String projectId,
+      Bigquery client) {
     this.ref = ref;
     this.query = query;
     this.projectId = projectId;
+    this.client = checkNotNull(client, "client");
   }
 
   /**
-   * Constructs a {@code BigQueryTableRowIterator} that uses the specified client to read from
-   * the specified table.
+   * Constructs a {@code BigQueryTableRowIterator} that reads from the specified table.
    */
-  public static BigQueryTableRowIterator of(Bigquery client, TableReference ref) {
+  public static BigQueryTableRowIterator fromTable(TableReference ref, Bigquery client) {
     checkNotNull(ref, "ref");
-    return new BigQueryTableRowIterator(client, ref, null, ref.getProjectId());
+    checkNotNull(client, "client");
+    return new BigQueryTableRowIterator(ref, null, ref.getProjectId(), client);
   }
 
   /**
-   * Constructs a {@code BigQueryTableRowIterator} that uses the specified client to read from
-   * the results of executing the specified client in the specified project.
+   * Constructs a {@code BigQueryTableRowIterator} that reads from the results of executing the
+   * specified query in the specified project.
    */
-  public static BigQueryTableRowIterator of(Bigquery client, String query, String projectId) {
-    return new BigQueryTableRowIterator(client, null, query, projectId);
+  public static BigQueryTableRowIterator fromQuery(
+      String query, String projectId, Bigquery client) {
+    checkNotNull(query, "query");
+    checkNotNull(projectId, "projectId");
+    checkNotNull(client, "client");
+    return new BigQueryTableRowIterator(null, query, projectId, client);
   }
 
   @Override
@@ -125,20 +133,19 @@ public boolean hasNext() {
   }
 
   /**
-   * Adjusts a field returned from the API to
-   * match the type that will be seen when run on the
-   * backend service. The end result is:
+   * Adjusts a field returned from the BigQuery API to match the type that will be seen when
+   * run on the backend service. The end result is:
    *
-   * <p><ul>
-   *   <li> Nulls are {@code null}.
-   *   <li> Repeated fields are lists.
-   *   <li> Record columns are {@link TableRow}s.
-   *   <li> {@code BOOLEAN} columns are JSON booleans, hence Java {@link Boolean}s.
-   *   <li> {@code FLOAT} columns are JSON floats, hence Java {@link Double}s.
-   *   <li> {@code TIMESTAMP} columns are {@link String}s that are of the format
-   *        {yyyy-MM-dd HH:mm:ss.SSS UTC}.
-   *   <li> Every other atomic type is a {@link String}.
-   * </ul></p>
+   * <ul>
+   *   <li>Nulls are {@code null}.
+   *   <li>Repeated fields are {@code List} of objects.
+   *   <li>Record columns are {@link TableRow} objects.
+   *   <li>{@code BOOLEAN} columns are JSON booleans, hence Java {@code Boolean} objects.
+   *   <li>{@code FLOAT} columns are JSON floats, hence Java {@code Double} objects.
+   *   <li>{@code TIMESTAMP} columns are {@code String} objects that are of the format
+   *       {@code yyyy-MM-dd HH:mm:ss.SSS UTC}.
+   *   <li>Every other atomic type is a {@code String}.
+   * </ul>
    *
    * <p>Note that currently integers are encoded as strings to match
    * the behavior of the backend service.
@@ -155,12 +162,12 @@ private Object getTypedCellValue(TableFieldSchema fieldSchema, Object v) {
     if (Objects.equals(fieldSchema.getMode(), "REPEATED")) {
       TableFieldSchema elementSchema = fieldSchema.clone().setMode("REQUIRED");
       @SuppressWarnings("unchecked")
-      List<Map<String, Object>> rawValues = (List<Map<String, Object>>) v;
-      List<Object> values = new ArrayList<Object>(rawValues.size());
-      for (Map<String, Object> element : rawValues) {
+      List<Map<String, Object>> rawCells = (List<Map<String, Object>>) v;
+      ImmutableList.Builder<Object> values = ImmutableList.builder();
+      for (Map<String, Object> element : rawCells) {
         values.add(getTypedCellValue(elementSchema, element.get("v")));
       }
-      return values;
+      return values.build();
     }
 
     if (fieldSchema.getType().equals("RECORD")) {
@@ -188,19 +195,74 @@ private Object getTypedCellValue(TableFieldSchema fieldSchema, Object v) {
     return v;
   }
 
+  /**
+   * Converts a row returned from the BigQuery JSON API as a {@code Map<String, Object>} into a
+   * Java {@link TableRow} with nested {@link TableCell TableCells}. The {@code Object} values in
+   * the cells are converted to Java types according to the provided field schemas.
+   *
+   * <p>See {@link #getTypedCellValue(TableFieldSchema, Object)} for details on how BigQuery
+   * types are mapped to Java types.
+   */
   private TableRow getTypedTableRow(List<TableFieldSchema> fields, Map<String, Object> rawRow) {
-    @SuppressWarnings("unchecked")
-    List<Map<String, Object>> cells = (List<Map<String, Object>>) rawRow.get("f");
-    Preconditions.checkState(cells.size() == fields.size());
+    // If rawRow is a TableRow, use it. If not, create a new one.
+    TableRow row;
+    if (rawRow instanceof TableRow) {
+      // Since rawRow is a TableRow, we also know that rawRow.getF() returns a List<TableCell>.
+      // We do not need to do any type conversion.
+      row = (TableRow) rawRow;
+    } else {
+      row = new TableRow();
+
+      // Since rawRow is a Map<String, Object> we use Map.get("f") instead of TableRow.getF() to
+      // get its cells. Similarly, when rawCell is a Map<String, Object> instead of a TableCell,
+      // we will use Map.get("v") instead of TableCell.getV() get its value.
+      @SuppressWarnings("unchecked")
+      List<Map<String, Object>> rawCells = (List<Map<String, Object>>) rawRow.get("f");
+
+      ImmutableList.Builder<TableCell> builder = ImmutableList.builder();
+      for (Map<String, Object> rawCell : rawCells) {
+        // If rawCell is a TableCell, use it. If not, create a new one.
+        if (rawCell instanceof TableCell) {
+          builder.add((TableCell) rawCell);
+        } else {
+          builder.add(new TableCell().setV(rawCell.get("v")));
+        }
+      }
+      row.setF(builder.build());
+    }
 
-    Iterator<Map<String, Object>> cellIt = cells.iterator();
-    Iterator<TableFieldSchema> fieldIt = fields.iterator();
+    // From here, everything is a TableRow/TableCell, no need to interpret as Map<String,Object>.
+    List<TableCell> cells = row.getF();
+    checkState(cells.size() == fields.size(),
+        "Expected that the row has the same number of cells %s as fields in the schema %s",
+        cells.size(), fields.size());
 
-    TableRow row = new TableRow();
+    // Loop through all the fields in the row, normalizing their types with the TableFieldSchema
+    // and also storing the normalized values by field name in the Map<String, Object> that
+    // underlies the TableRow.
+    Iterator<TableCell> cellIt = cells.iterator();
+    Iterator<TableFieldSchema> fieldIt = fields.iterator();
     while (cellIt.hasNext()) {
-      Map<String, Object> cell = cellIt.next();
+      TableCell cell = cellIt.next();
       TableFieldSchema fieldSchema = fieldIt.next();
-      row.set(fieldSchema.getName(), getTypedCellValue(fieldSchema, cell.get("v")));
+
+      // Convert the object in this cell to the Java type corresponding to its type in the schema.
+      Object convertedValue = getTypedCellValue(fieldSchema, cell.getV());
+      cell.setV(convertedValue);
+
+      String fieldName = fieldSchema.getName();
+      if (fieldName.equals("f")) {
+        // This is a workaround for a crash when the schema has a field named "f". Specifically,
+        // tableRow.set("f", value) is equivalent to tableRow.setF(value), and value must be a
+        // List<TableCell> or a ClassCastException will be thrown. To avoid the crash, we simply
+        // do not set the Map property named "f".
+        //
+        // The value for a field named "f" can instead be retrieved by calling tableRow.getF() and
+        // to get the list of cells, and accessing the positional entry that corresponds to the
+        // position of the "f" field in the TableSchema.
+        continue;
+      }
+      row.set(fieldName, convertedValue);
     }
     return row;
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
index 99ef958c51d0c..a1ddca84378d8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
@@ -35,11 +35,13 @@
 import com.google.api.client.testing.http.MockLowLevelHttpRequest;
 import com.google.api.client.testing.http.MockLowLevelHttpResponse;
 import com.google.api.services.bigquery.Bigquery;
+import com.google.api.services.bigquery.model.TableCell;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 
 import org.junit.Test;
@@ -71,6 +73,14 @@ public class BigQueryReaderTest {
   private static final String GET_TABLE_REQUEST_PATH =
       String.format("projects/%s/datasets/%s/tables/%s", PROJECT_ID, DATASET, TABLE);
 
+  private static final List<TableCell> makeCellList(Object... fields) {
+    ImmutableList.Builder<TableCell> cells = ImmutableList.builder();
+    for (Object o : fields) {
+      cells.add(new TableCell().setV(o));
+    }
+    return cells.build();
+  }
+
   // This is a real response (with some unused fields removed) for the table created from this
   // schema:
   // [
@@ -295,7 +305,6 @@ public class BigQueryReaderTest {
       + " ]\n"
       + "}";
 
-
   private static final String INSERT_QUERY_JOB_PATH =
       String.format("https://www.googleapis.com/bigquery/v2/projects/%s/jobs", PROJECT_ID);
 
@@ -569,10 +578,12 @@ public void testReadQuery() throws Exception {
 
     assertEquals("Arthur", row.get("name"));
     assertEquals("42", row.get("integer"));
+    assertEquals(makeCellList("Arthur", "42"), row.getF());
 
     row = iterator.next().getValue();
     assertEquals("Allison", row.get("name"));
     assertEquals("79", row.get("integer"));
+    assertEquals(makeCellList("Allison", "79"), row.getF());
 
     iterator.close();
 
@@ -716,6 +727,30 @@ public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable
                 .setResponse(response);
           }
         });
+    when(mockTransport.buildRequest(eq("GET"), endsWith(GET_TABLE_WITH_F_REQUEST_PATH)))
+    .thenAnswer(new Answer<LowLevelHttpRequest>() {
+      @Override
+      public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
+        MockLowLevelHttpResponse response =
+            new MockLowLevelHttpResponse()
+                .setContentType(Json.MEDIA_TYPE)
+                .setContent(GET_TABLE_WITH_F_RESPONSE_JSON);
+        return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
+            .setResponse(response);
+      }
+    });
+    when(mockTransport.buildRequest(eq("GET"), endsWith(LIST_TABLE_WITH_F_DATA_REQUEST_PATH)))
+    .thenAnswer(new Answer<LowLevelHttpRequest>() {
+      @Override
+      public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
+        MockLowLevelHttpResponse response =
+            new MockLowLevelHttpResponse()
+                .setContentType(Json.MEDIA_TYPE)
+                .setContent(LIST_TABLE_WITH_F_DATA_RESPONSE_JSON);
+        return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
+            .setResponse(response);
+      }
+    });
     when(mockTransport.supportsMethod("GET")).thenReturn(true);
   }
 
@@ -749,6 +784,7 @@ public void testReadTable() throws Exception {
     TableRow nested = (TableRow) row.get("record");
     assertEquals("43", nested.get("nestedInt"));
     assertEquals(4.14159, nested.get("nestedFloat"));
+    assertEquals(makeCellList("43", 4.14159), nested.getF());
 
     assertEquals(Lists.newArrayList("42", "43", "79"), row.get("repeatedInt"));
     assertTrue(((List<?>) row.get("repeatedFloat")).isEmpty());
@@ -764,6 +800,7 @@ public void testReadTable() throws Exception {
     nested = (TableRow) row.get("record");
     assertEquals("80", nested.get("nestedInt"));
     assertEquals(3.71828, nested.get("nestedFloat"));
+    assertEquals(makeCellList("80", 3.71828), nested.getF());
 
     assertTrue(((List<?>) row.get("repeatedInt")).isEmpty());
     assertEquals(Lists.newArrayList(3.14159, 2.71828), row.get("repeatedFloat"));
@@ -773,8 +810,10 @@ public void testReadTable() throws Exception {
     assertEquals(2, nestedRecords.size());
     assertEquals("hello", nestedRecords.get(0).get("string"));
     assertEquals(true, nestedRecords.get(0).get("bool"));
+    assertEquals(makeCellList(true, "hello"), nestedRecords.get(0).getF());
     assertEquals("world", nestedRecords.get(1).get("string"));
     assertEquals(false, nestedRecords.get(1).get("bool"));
+    assertEquals(makeCellList(false, "world"), nestedRecords.get(1).getF());
 
     assertFalse(iterator.hasNext());
 
@@ -784,4 +823,98 @@ public void testReadTable() throws Exception {
     verify(mockTransport, atLeastOnce()).supportsMethod("GET");
     verifyNoMoreInteractions(mockTransport);
   }
+
+  ////////////////////////////////////////////////////////////////////////////
+  // Constants and tests for surviving a field named "f" in the table schema.
+  ////////////////////////////////////////////////////////////////////////////
+
+  // Modified from GET_TABLE_RESPONSE_JSON for a table with a field name "f"
+  // [
+  //  {"name":"int","type":"INTEGER"},
+  //  {"name":"f","type":"STRING"}
+  // ]
+  private static final String GET_TABLE_WITH_F_RESPONSE_JSON =
+      "{\n"
+      + " \"schema\": {\n"
+      + "  \"fields\": [\n"
+      + "   {\n"
+      + "    \"name\": \"int\",\n"
+      + "    \"type\": \"INTEGER\"\n"
+      + "   },\n"
+      + "   {\n"
+      + "    \"name\": \"f\",\n"
+      + "    \"type\": \"STRING\"\n"
+      + "   }\n"
+      + "  ]\n"
+      + " },\n"
+      + " \"numRows\": \"2\",\n"
+      + " \"type\": \"TABLE\"\n"
+      + "}";
+
+  private static final String TABLE_WITH_FIELD_F = "table_f";
+  private static final String GET_TABLE_WITH_F_REQUEST_PATH =
+      String.format("projects/%s/datasets/%s/tables/%s", PROJECT_ID, DATASET, TABLE_WITH_FIELD_F);
+  private static final String LIST_TABLE_WITH_F_DATA_REQUEST_PATH =
+      String.format("%s/data", GET_TABLE_WITH_F_REQUEST_PATH);
+
+  // Modified from LIST_TABLEDATA_RESPONSE_JSON for a table with a field named "f"
+  // with the following data:
+  // {"int": "5", "f": "Arthur"},
+  //
+  // {"int": "42", "f": "Allison"}
+  private static final String LIST_TABLE_WITH_F_DATA_RESPONSE_JSON =
+      "{\n"
+      + " \"totalRows\": \"2\",\n"
+      + " \"rows\": [\n"
+      + "  {\n"
+      + "   \"f\": [\n"
+      + "    {\n"
+      + "     \"v\": \"5\"\n"
+      + "    },\n"
+      + "    {\n"
+      + "     \"v\": \"Arthur\"\n"
+      + "    }\n"
+      + "   ]\n"
+      + "  },\n"
+      + "  {\n"
+      + "   \"f\": [\n"
+      + "    {\n"
+      + "     \"v\": \"42\"\n"
+      + "    },\n"
+      + "    {\n"
+      + "     \"v\": \"Allison\"\n"
+      + "    }\n"
+      + "   ]\n"
+      + "  }\n"
+      + " ]\n"
+      + "}";
+
+  /**
+   * This tests two different things:
+   *
+   * <ol>
+   * <li>{@link BigQueryReader} can handle a field named "f" without crashing.
+   * <li>The value of field named "f" can be retrieved positionally from {@link TableRow#getF()}
+   * </ol>
+   */
+  @Test
+  public void testReadTableWithFieldF() throws Exception {
+    setUpMockTable();
+
+    Bigquery bigQueryClient = new Bigquery(mockTransport, Transport.getJsonFactory(), null);
+    TableReference tableRef = new TableReference()
+        .setProjectId(PROJECT_ID).setDatasetId(DATASET).setTableId(TABLE_WITH_FIELD_F);
+    BigQueryReader reader = BigQueryReader.fromTable(tableRef, bigQueryClient);
+
+    Reader.ReaderIterator<WindowedValue<TableRow>> iterator = reader.iterator();
+    assertTrue(iterator.hasNext());
+
+    TableRow row = iterator.next().getValue();
+    assertEquals(makeCellList("5", "Arthur"), row.getF());
+    assertEquals("Arthur", row.getF().get(1).getV());
+
+    row = iterator.next().getValue();
+    assertEquals(makeCellList("42", "Allison"), row.getF());
+    assertEquals("Allison", row.getF().get(1).getV());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
index 89c1288e5eb0b..eb3699216483a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
@@ -189,9 +189,9 @@ public void testReadWithTime() throws IOException {
     TableDataList dataList = rawDataList(rawRow("Arthur", "1.430397296789E9", 42));
     onTableList(dataList);
 
-    try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.of(
-        mockClient,
-        BigQueryIO.parseTableSpec("project:dataset.table"))) {
+    try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.fromTable(
+        BigQueryIO.parseTableSpec("project:dataset.table"),
+        mockClient)) {
 
       Assert.assertTrue(iterator.hasNext());
       TableRow row = iterator.next();
@@ -230,9 +230,9 @@ public void testRead() throws IOException {
     TableDataList dataList = rawDataList(rawRow("Arthur", 42));
     onTableList(dataList);
 
-    try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.of(
-        mockClient,
-        BigQueryIO.parseTableSpec("project:dataset.table"))) {
+    try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.fromTable(
+        BigQueryIO.parseTableSpec("project:dataset.table"),
+        mockClient)) {
 
       Assert.assertTrue(iterator.hasNext());
       TableRow row = iterator.next();
@@ -260,9 +260,9 @@ public void testReadEmpty() throws IOException {
         .setTotalRows(0L);
     onTableList(dataList);
 
-    try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.of(
-        mockClient,
-        BigQueryIO.parseTableSpec("project:dataset.table"))) {
+    try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.fromTable(
+        BigQueryIO.parseTableSpec("project:dataset.table"),
+        mockClient)) {
 
       Assert.assertFalse(iterator.hasNext());
 
@@ -288,9 +288,9 @@ public void testReadMultiPage() throws IOException {
         .thenReturn(page1)
         .thenReturn(page2);
 
-    try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.of(
-        mockClient,
-        BigQueryIO.parseTableSpec("project:dataset.table"))) {
+    try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.fromTable(
+        BigQueryIO.parseTableSpec("project:dataset.table"),
+        mockClient)) {
 
       List<String> names = new LinkedList<>();
       Iterators.addAll(names,
@@ -321,9 +321,9 @@ public void testReadOpenFailure() throws IOException {
     when(mockTablesGet.execute())
         .thenThrow(new IOException("No such table"));
 
-    try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.of(
-        mockClient,
-        BigQueryIO.parseTableSpec("project:dataset.table"))) {
+    try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.fromTable(
+        BigQueryIO.parseTableSpec("project:dataset.table"),
+        mockClient)) {
       try {
         Assert.assertFalse(iterator.hasNext());  // throws.
       } finally {

From 56b10c1a7e850de4de44f4bc714317483c404621 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 9 Oct 2015 10:32:29 -0700
Subject: [PATCH 1069/1541] Hide the CloudDebuggerOptions

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105068420
---
 .../google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
index 44d0647cef5bf..62be4c9ec2e22 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
@@ -23,6 +23,7 @@
  */
 @Description("[Experimental] Used to configure the Cloud Debugger")
 @Experimental
+@Hidden
 public interface CloudDebuggerOptions {
 
   /**

From 2928a5623d3a3926e11d50383e6064caea2ecb8e Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Fri, 9 Oct 2015 11:17:31 -0700
Subject: [PATCH 1070/1541] Avoid deserializing and calling start/finishBundle
 on empty bundles

Often in streaming mode small bundles arrive and get buffered by the
GroupAlsoByWindowsFn.  In this case it's wasteful to deserialize and
initialize all subsequent DoFns.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105073190
---
 .../sdk/runners/worker/ParDoFnBase.java       | 27 ++++++++++--
 .../util/common/worker/ParDoOperation.java    |  8 +++-
 .../worker/MapTaskExecutorFactoryTest.java    | 38 ++++++++++------
 .../sdk/runners/worker/NormalParDoFnTest.java |  1 +
 .../sdk/runners/worker/ReaderFactoryTest.java | 43 +++++++++++++++++++
 .../dataflow/sdk/transforms/ParDoTest.java    |  2 -
 6 files changed, 100 insertions(+), 19 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
index 7134547583d09..fea0544344ce6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
@@ -62,9 +62,11 @@ public abstract class ParDoFnBase implements ParDoFn {
   private final ExecutionContext executionContext;
   private final CounterSet.AddCounterMutator addCounterMutator;
   private final StateSampler stateSampler;
+  private final boolean hasStreamingSideInput;
 
   /** The DoFnRunner executing a batch. Null between batches. */
   private DoFnRunner<Object, Object> fnRunner;
+  private Receiver[] receivers;
 
   public ExecutionContext getExecutionContext() {
     return executionContext;
@@ -101,6 +103,8 @@ protected ParDoFnBase(
     this.executionContext = executionContext;
     this.addCounterMutator = addCounterMutator;
     this.stateSampler = stateSampler;
+    this.hasStreamingSideInput =
+        options.as(StreamingOptions.class).isStreaming() && !sideInputReader.isEmpty();
   }
 
   /**
@@ -109,12 +113,22 @@ protected ParDoFnBase(
   protected abstract DoFnInfo<?, ?> getDoFnInfo();
 
   @Override
-  public void startBundle(final Receiver... receivers) throws Exception {
+  public void startBundle(Receiver... receivers) throws Exception {
     if (receivers.length != sideOutputTags.size() + 1) {
       throw new AssertionError(
           "unexpected number of receivers for DoFn");
     }
 
+    this.receivers = receivers;
+    if (hasStreamingSideInput) {
+      // There is non-trivial setup that needs to be performed for watermark propagation
+      // even on empty bundles.
+      reallyStartBundle();
+    }
+  }
+
+  private void reallyStartBundle() throws Exception {
+    Preconditions.checkState(fnRunner == null, "bundle already started (or not properly finished)");
     StepContext stepContext = null;
     if (executionContext != null) {
       stepContext = executionContext.getOrCreateStepContext(stepName, transformName, stateSampler);
@@ -164,7 +178,7 @@ public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
       }
     };
 
-    if (options.as(StreamingOptions.class).isStreaming() && !sideInputReader.isEmpty()) {
+    if (hasStreamingSideInput) {
       fnRunner = new StreamingSideInputDoFnRunner<Object, Object, BoundedWindow>(
           options,
           doFnInfo,
@@ -193,12 +207,17 @@ public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
   @Override
   @SuppressWarnings("unchecked")
   public void processElement(Object elem) throws Exception {
+    if (fnRunner == null) {
+      reallyStartBundle();
+    }
     fnRunner.processElement((WindowedValue<Object>) elem);
   }
 
   @Override
   public void finishBundle() throws Exception {
-    fnRunner.finishBundle();
-    fnRunner = null;
+    if (fnRunner != null) {
+      fnRunner.finishBundle();
+      fnRunner = null;
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperation.java
index 0777fcf197cb1..2f3fa6521d5d8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperation.java
@@ -17,12 +17,13 @@
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.common.annotations.VisibleForTesting;
 
 /**
  * A ParDo mapping function.
  */
 public class ParDoOperation extends ReceivingOperation {
-  public final ParDoFn fn;
+  private final ParDoFn fn;
 
   public ParDoOperation(String operationName,
                         ParDoFn fn,
@@ -67,4 +68,9 @@ public void finish() throws Exception {
   public boolean supportsRestart() {
     return true;
   }
+
+  @VisibleForTesting
+  public ParDoFn getFn() throws Exception {
+    return fn;
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index f7cd0c0400248..e5c090f6ea8b5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -43,6 +43,7 @@
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactoryTest.SingletonTestReaderFactory;
 import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactoryTest.TestReader;
 import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactoryTest.TestReaderFactory;
 import com.google.cloud.dataflow.sdk.runners.worker.SinkFactoryTest.TestSink;
@@ -56,6 +57,7 @@
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
@@ -86,6 +88,9 @@
 @RunWith(JUnit4.class)
 public class MapTaskExecutorFactoryTest {
 
+  private static final CloudObject windowedStringCoder =
+      WindowedValue.getValueOnlyCoder(StringUtf8Coder.of()).asCloudObject();
+
   private PipelineOptions options;
   private ReaderFactory.Registry readerFactoryRegistry;
 
@@ -95,7 +100,10 @@ public void setUp() {
     readerFactoryRegistry = ReaderFactory.Registry.defaultRegistry()
         .register(
             TestReaderFactory.class.getName(),
-            new TestReaderFactory());
+            new TestReaderFactory())
+        .register(
+            SingletonTestReaderFactory.class.getName(),
+            new SingletonTestReaderFactory());
   }
 
   @Test
@@ -182,10 +190,9 @@ public void testCreateMapTaskExecutor() throws Exception {
   @Test
   public void testExecutionContextPlumbing() throws Exception {
     List<ParallelInstruction> instructions = Arrays.asList(
-        createReadInstruction("Read"),
+        createReadInstruction("Read", SingletonTestReaderFactory.class),
         createParDoInstruction(0, 0, "DoFn1", "DoFnUserName"),
-        createParDoInstruction(1, 0, "DoFnWithContext", "DoFnWithContextUserName"),
-        createWriteInstruction(2, 0, "Write"));
+        createParDoInstruction(1, 0, "DoFnWithContext", "DoFnWithContextUserName"));
 
     MapTask mapTask = new MapTask();
     mapTask.setStageName("test");
@@ -213,18 +220,23 @@ public void testExecutionContextPlumbing() throws Exception {
   }
 
   static ParallelInstruction createReadInstruction(String name) {
-    CloudObject spec = CloudObject.forClass(TestReaderFactory.class);
+    return createReadInstruction(name, TestReaderFactory.class);
+  }
+
+  static ParallelInstruction createReadInstruction(
+      String name, Class<? extends ReaderFactory> readerFactoryClass) {
+    CloudObject spec = CloudObject.forClass(readerFactoryClass);
 
     Source cloudSource = new Source();
     cloudSource.setSpec(spec);
-    cloudSource.setCodec(CloudObject.forClass(StringUtf8Coder.class));
+    cloudSource.setCodec(windowedStringCoder);
 
     ReadInstruction readInstruction = new ReadInstruction();
     readInstruction.setSource(cloudSource);
 
     InstructionOutput output = new InstructionOutput();
     output.setName("read_output_name");
-    output.setCodec(CloudObject.forClass(StringUtf8Coder.class));
+    output.setCodec(windowedStringCoder);
 
     ParallelInstruction instruction = new ParallelInstruction();
     instruction.setSystemName(name);
@@ -279,7 +291,7 @@ static ParallelInstruction createWriteInstruction(
     com.google.api.services.dataflow.model.Sink cloudSink =
         new com.google.api.services.dataflow.model.Sink();
     cloudSink.setSpec(spec);
-    cloudSink.setCodec(CloudObject.forClass(StringUtf8Coder.class));
+    cloudSink.setCodec(windowedStringCoder);
 
     WriteInstruction writeInstruction = new WriteInstruction();
     writeInstruction.setInput(cloudInput);
@@ -337,7 +349,9 @@ public void testCreateWriteOperation() throws Exception {
 
   static class TestDoFn extends DoFn<String, String> {
     @Override
-    public void processElement(ProcessContext c) {}
+    public void processElement(ProcessContext c) {
+      c.output(c.element());
+    }
   }
 
   static ParallelInstruction createParDoInstruction(
@@ -368,7 +382,7 @@ static ParallelInstruction createParDoInstruction(
 
     InstructionOutput output = new InstructionOutput();
     output.setName(systemName + "_output");
-    output.setCodec(CloudObject.forClass(StringUtf8Coder.class));
+    output.setCodec(windowedStringCoder);
 
     ParallelInstruction instruction = new ParallelInstruction();
     instruction.setParDo(parDoInstruction);
@@ -408,8 +422,8 @@ public void testCreateParDoOperation() throws Exception {
     assertEquals(parDoOperation.receivers.length, 1);
     assertEquals(parDoOperation.receivers[0].getReceiverCount(), 0);
     assertEquals(parDoOperation.initializationState, Operation.InitializationState.UNSTARTED);
-    assertThat(parDoOperation.fn, instanceOf(NormalParDoFn.class));
-    NormalParDoFn normalParDoFn = (NormalParDoFn) parDoOperation.fn;
+    assertThat(parDoOperation.getFn(), instanceOf(NormalParDoFn.class));
+    NormalParDoFn normalParDoFn = (NormalParDoFn) parDoOperation.getFn();
 
     assertThat(
         normalParDoFn.getDoFnInfo().getDoFn(),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
index 9e2fc655e152c..e87d42a90b7fd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
@@ -285,6 +285,7 @@ public void testErrorPropagation() throws Exception {
 
     try {
       normalParDoFn.startBundle(receiver);
+      normalParDoFn.processElement(null);
       fail("should have failed");
     } catch (Exception exn) {
       // Because we're calling this from inside the SDK and not from a
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
index fe3af7218d089..48ca57ca85327 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 
@@ -82,6 +83,48 @@ public void close() {}
     }
   }
 
+  static class SingletonTestReaderFactory implements ReaderFactory {
+    @Override
+    public Reader<?> create(
+        CloudObject spec,
+        @Nullable Coder<?> coder,
+        @Nullable PipelineOptions options,
+        @Nullable ExecutionContext executionContext,
+        @Nullable CounterSet.AddCounterMutator addCounterMutator,
+        @Nullable String operationName) {
+      return new SingletonTestReader();
+    }
+  }
+
+  static class SingletonTestReader extends Reader<WindowedValue<String>> {
+    @Override
+    public SingletonTestReaderIterator iterator() {
+      return new SingletonTestReaderIterator();
+    }
+
+    /** A source iterator that produces no values, for testing. */
+    class SingletonTestReaderIterator extends AbstractReaderIterator<WindowedValue<String>> {
+      private boolean seen = false;
+      @Override
+      public boolean hasNext() {
+        return !seen;
+      }
+
+      @Override
+      public WindowedValue<String> next() {
+        if (seen) {
+          throw new NoSuchElementException();
+        } else {
+          seen = true;
+          return WindowedValue.valueInGlobalWindow("something");
+        }
+      }
+
+      @Override
+      public void close() {}
+    }
+  }
+
   @Test
   public void testCreatePredefinedReader() throws Exception {
     CloudObject spec = CloudObject.forClassName("TextSource");
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index d0010aa0d1d0d..d8d2d66f57405 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -27,7 +27,6 @@
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.Pipeline.PipelineExecutionException;
@@ -883,7 +882,6 @@ public Void apply(Iterable<String> outputs) {
       }
 
       assertEquals(starteds.size(), finisheds.size());
-      assertTrue(starteds.size() > 0);
       for (String started : starteds) {
         assertEquals(sideOutputPrefix + "started", started);
       }

From a4bde0dca09ab362eac95feee566914c8492efe0 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Fri, 9 Oct 2015 11:50:24 -0700
Subject: [PATCH 1071/1541] Write: move from transforms to io package

And leave behind a deprecated subclass for backwards
compatibility.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105076144
---
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |   1 -
 .../google/cloud/dataflow/sdk/io/Sink.java    |   1 -
 .../google/cloud/dataflow/sdk/io/Write.java   | 213 ++++++++++++++++++
 .../google/cloud/dataflow/sdk/io/XmlSink.java |   1 -
 .../sdk/runners/DataflowPipelineRunner.java   |   2 +-
 .../cloud/dataflow/sdk/transforms/Write.java  | 196 +---------------
 .../sdk/{transforms => io}/WriteTest.java     |   4 +-
 7 files changed, 223 insertions(+), 195 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Write.java
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/{transforms => io}/WriteTest.java (99%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 848c51c037fa0..a9d5c13c47b74 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -54,7 +54,6 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions;
 import com.google.cloud.dataflow.sdk.options.GcpOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.Write;
 import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
 import com.google.cloud.dataflow.sdk.util.RetryHttpRequestInitializer;
 import com.google.cloud.dataflow.sdk.values.PCollection;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
index 29299989cacf9..a5649ceb5e9e4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
@@ -17,7 +17,6 @@
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.Write;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
 import java.io.Serializable;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Write.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Write.java
new file mode 100644
index 0000000000000..4d130cc865020
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Write.java
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
+import com.google.cloud.dataflow.sdk.io.Sink.WriteOperation;
+import com.google.cloud.dataflow.sdk.io.Sink.Writer;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.PDone;
+
+import org.joda.time.Instant;
+
+import java.util.UUID;
+
+/**
+ * A {@link PTransform} that writes to a {@link Sink}. A write begins with a sequential global
+ * initialization of a sink, followed by a parallel write, and ends with a sequential finalization
+ * of the write. The output of a write is {@link PDone}.  In the case of an empty PCollection, only
+ * the global initialization and finalization will be performed.
+ *
+ * <p>Currently, only batch workflows can contain Write transforms.
+ *
+ * <p>Example usage:
+ *
+ * <p>{@code p.apply(Write.to(new MySink(...)));}
+ */
+@Experimental(Experimental.Kind.SOURCE_SINK)
+public class Write {
+  /**
+   * Creates a Write transform that writes to the given Sink.
+   */
+  public static <T> Bound<T> to(Sink<T> sink) {
+    return new Bound<>(sink);
+  }
+
+  /**
+   * A {@link PTransform} that writes to a {@link Sink}. See {@link Write} and {@link Sink} for
+   * documentation about writing to Sinks.
+   */
+  public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
+    private final Sink<T> sink;
+
+    private Bound(Sink<T> sink) {
+      this.sink = sink;
+    }
+
+    @Override
+    public PDone apply(PCollection<T> input) {
+      PipelineOptions options = input.getPipeline().getOptions();
+      sink.validate(options);
+      return createWrite(input, sink.createWriteOperation(options));
+    }
+
+    /**
+     * Returns the {@link Sink} associated with this PTransform.
+     */
+    public Sink<T> getSink() {
+      return sink;
+    }
+
+    /**
+     * A write is performed as sequence of three {@link ParDo}'s.
+     *
+     * <p>In the first, a do-once ParDo is applied to a singleton PCollection containing the Sink's
+     * {@link WriteOperation}. In this initialization ParDo, {@link WriteOperation#initialize} is
+     * called. The output of this ParDo is a singleton PCollection
+     * containing the WriteOperation.
+     *
+     * <p>This singleton collection containing the WriteOperation is then used as a side input to a
+     * ParDo over the PCollection of elements to write. In this bundle-writing phase,
+     * {@link WriteOperation#createWriter} is called to obtain a {@link Writer}.
+     * {@link Writer#open} and {@link Writer#close} are called in {@link DoFn#startBundle} and
+     * {@link DoFn#finishBundle}, respectively, and {@link Writer#write} method is called for every
+     * element in the bundle. The output of this ParDo is a PCollection of <i>writer result</i>
+     * objects (see {@link Sink} for a description of writer results)-one for each bundle.
+     *
+     * <p>The final do-once ParDo uses the singleton collection of the WriteOperation as input and
+     * the collection of writer results as a side-input. In this ParDo,
+     * {@link WriteOperation#finalize} is called to finalize the write.
+     *
+     * <p>If the write of any element in the PCollection fails, {@link Writer#close} will be called
+     * before the exception that caused the write to fail is propagated and the write result will be
+     * discarded.
+     *
+     * <p>Since the {@link WriteOperation} is serialized after the initialization ParDo and
+     * deserialized in the bundle-writing and finalization phases, any state change to the
+     * WriteOperation object that occurs during initialization is visible in the latter phases.
+     * However, the WriteOperation is not serialized after the bundle-writing phase.  This is why
+     * implementations should guarantee that {@link WriteOperation#createWriter} does not mutate
+     * WriteOperation).
+     */
+    private <WriteT> PDone createWrite(
+        PCollection<T> input, WriteOperation<T, WriteT> writeOperation) {
+      Pipeline p = input.getPipeline();
+
+      // A coder to user for the WriteOperation.
+      @SuppressWarnings("unchecked")
+      Coder<WriteOperation<T, WriteT>> operationCoder =
+          (Coder<WriteOperation<T, WriteT>>) SerializableCoder.of(writeOperation.getClass());
+
+      // A singleton collection of the WriteOperation, to be used as input to a ParDo to initialize
+      // the sink.
+      PCollection<WriteOperation<T, WriteT>> operationCollection =
+          p.apply(Create.<WriteOperation<T, WriteT>>of(writeOperation).withCoder(operationCoder));
+
+      // Initialize the resource in a do-once ParDo on the WriteOperation.
+      operationCollection = operationCollection
+          .apply("Initialize", ParDo.of(
+              new DoFn<WriteOperation<T, WriteT>, WriteOperation<T, WriteT>>() {
+            @Override
+            public void processElement(ProcessContext c) throws Exception {
+              WriteOperation<T, WriteT> writeOperation = c.element();
+              writeOperation.initialize(c.getPipelineOptions());
+              // The WriteOperation is also the output of this ParDo, so it can have mutable
+              // state.
+              c.output(writeOperation);
+            }
+          }))
+          .setCoder(operationCoder);
+
+      // Create a view of the WriteOperation to be used as a sideInput to the parallel write phase.
+      final PCollectionView<WriteOperation<T, WriteT>> writeOperationView =
+          operationCollection.apply(View.<WriteOperation<T, WriteT>>asSingleton());
+
+      // Perform the per-bundle writes as a ParDo on the input PCollection (with the WriteOperation
+      // as a side input) and collect the results of the writes in a PCollection.
+      // There is a dependency between this ParDo and the first (the WriteOperation PCollection
+      // as a side input), so this will happen after the initial ParDo.
+      PCollection<WriteT> results = input
+          .apply("WriteBundles", ParDo.of(new DoFn<T, WriteT>() {
+            // Writer that will write the records in this bundle. Lazily
+            // initialized in processElement.
+            private Writer<T, WriteT> writer = null;
+
+            @Override
+            public void processElement(ProcessContext c) throws Exception {
+              // Lazily initialize the Writer
+              if (writer == null) {
+                WriteOperation<T, WriteT> writeOperation = c.sideInput(writeOperationView);
+                writer = writeOperation.createWriter(c.getPipelineOptions());
+                writer.open(UUID.randomUUID().toString());
+              }
+              try {
+                writer.write(c.element());
+              } catch (Exception e) {
+                // Discard write result and close the write.
+                try {
+                  writer.close();
+                } catch (Exception closeException) {
+                  // Do not mask the exception that caused the write to fail.
+                }
+                throw e;
+              }
+            }
+
+            @Override
+            public void finishBundle(Context c) throws Exception {
+              if (writer != null) {
+                WriteT result = writer.close();
+                // Output the result of the write.
+                c.outputWithTimestamp(result, Instant.now());
+              }
+            }
+          }).withSideInputs(writeOperationView))
+          .setCoder(writeOperation.getWriterResultCoder())
+          .apply(Window.<WriteT>into(new GlobalWindows()));
+
+      final PCollectionView<Iterable<WriteT>> resultsView =
+          results.apply(View.<WriteT>asIterable());
+
+      // Finalize the write in another do-once ParDo on the singleton collection containing the
+      // Writer. The results from the per-bundle writes are given as an Iterable side input.
+      // The WriteOperation's state is the same as after its initialization in the first do-once
+      // ParDo. There is a dependency between this ParDo and the parallel write (the writer results
+      // collection as a side input), so it will happen after the parallel write.
+      @SuppressWarnings("unused")
+      final PCollection<Integer> done = operationCollection
+          .apply("Finalize", ParDo.of(new DoFn<WriteOperation<T, WriteT>, Integer>() {
+            @Override
+            public void processElement(ProcessContext c) throws Exception {
+              Iterable<WriteT> results = c.sideInput(resultsView);
+              WriteOperation<T, WriteT> writeOperation = c.element();
+              writeOperation.finalize(results, c.getPipelineOptions());
+            }
+          }).withSideInputs(resultsView));
+      return PDone.in(input.getPipeline());
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
index 06a363f1f9284..455d180479b6e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
@@ -20,7 +20,6 @@
 import com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriteOperation;
 import com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriter;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.Write;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.Preconditions;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index c67a9631c0937..cf0695c7acd81 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -39,6 +39,7 @@
 import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
+import com.google.cloud.dataflow.sdk.io.Write;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
@@ -57,7 +58,6 @@
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.View;
 import com.google.cloud.dataflow.sdk.transforms.WithKeys;
-import com.google.cloud.dataflow.sdk.transforms.Write;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
index 0ba3bd145f784..5cf655a3e5633 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
@@ -14,196 +14,14 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
-import com.google.cloud.dataflow.sdk.io.Sink;
-import com.google.cloud.dataflow.sdk.io.Sink.WriteOperation;
-import com.google.cloud.dataflow.sdk.io.Sink.Writer;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.PDone;
-
-import org.joda.time.Instant;
-
-import java.util.UUID;
-
 /**
- * A {@link PTransform} that writes to a {@link Sink}. A write begins with a sequential global
- * initialization of a sink, followed by a parallel write, and ends with a sequential finalization
- * of the write. The output of a write is {@link PDone}.  In the case of an empty PCollection, only
- * the global initialization and finalization will be performed.
- *
- * <p>Currently, only batch workflows can contain Write transforms.
- *
- * <p>Example usage:
+ * A backwards-compatible {@code Write} class that simply inherits from the
+ * {@link com.google.cloud.dataflow.sdk.io.Write} class that should be used instead.
  *
- * <p>{@code p.apply(Write.to(new MySink(...)));}
+ * @deprecated: use {@link com.google.cloud.dataflow.sdk.io.Write} from the
+ * {@code com.google.cloud.dataflow.sdk.io} package instead.
  */
-@Experimental(Experimental.Kind.SOURCE_SINK)
-public class Write {
-  /**
-   * Creates a Write transform that writes to the given Sink.
-   */
-  public static <T> Bound<T> to(Sink<T> sink) {
-    return new Bound<>(sink);
-  }
-
-  /**
-   * A {@link PTransform} that writes to a {@link Sink}. See {@link Write} and {@link Sink} for
-   * documentation about writing to Sinks.
-   */
-  public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
-    private final Sink<T> sink;
-
-    private Bound(Sink<T> sink) {
-      this.sink = sink;
-    }
-
-    @Override
-    public PDone apply(PCollection<T> input) {
-      PipelineOptions options = input.getPipeline().getOptions();
-      sink.validate(options);
-      return createWrite(input, sink.createWriteOperation(options));
-    }
-
-    /**
-     * Returns the {@link Sink} associated with this PTransform.
-     */
-    public Sink<T> getSink() {
-      return sink;
-    }
-
-    /**
-     * A write is performed as sequence of three {@link ParDo}'s.
-     *
-     * <p>In the first, a do-once ParDo is applied to a singleton PCollection containing the Sink's
-     * {@link WriteOperation}. In this initialization ParDo, {@link WriteOperation#initialize} is
-     * called. The output of this ParDo is a singleton PCollection
-     * containing the WriteOperation.
-     *
-     * <p>This singleton collection containing the WriteOperation is then used as a side input to a
-     * ParDo over the PCollection of elements to write. In this bundle-writing phase,
-     * {@link WriteOperation#createWriter} is called to obtain a {@link Writer}.
-     * {@link Writer#open} and {@link Writer#close} are called in {@link DoFn#startBundle} and
-     * {@link DoFn#finishBundle}, respectively, and {@link Writer#write} method is called for every
-     * element in the bundle. The output of this ParDo is a PCollection of <i>writer result</i>
-     * objects (see {@link Sink} for a description of writer results)-one for each bundle.
-     *
-     * <p>The final do-once ParDo uses the singleton collection of the WriteOperation as input and
-     * the collection of writer results as a side-input. In this ParDo,
-     * {@link WriteOperation#finalize} is called to finalize the write.
-     *
-     * <p>If the write of any element in the PCollection fails, {@link Writer#close} will be called
-     * before the exception that caused the write to fail is propagated and the write result will be
-     * discarded.
-     *
-     * <p>Since the {@link WriteOperation} is serialized after the initialization ParDo and
-     * deserialized in the bundle-writing and finalization phases, any state change to the
-     * WriteOperation object that occurs during initialization is visible in the latter phases.
-     * However, the WriteOperation is not serialized after the bundle-writing phase.  This is why
-     * implementations should guarantee that {@link WriteOperation#createWriter} does not mutate
-     * WriteOperation).
-     */
-    private <WriteT> PDone createWrite(
-        PCollection<T> input, WriteOperation<T, WriteT> writeOperation) {
-      Pipeline p = input.getPipeline();
-
-      // A coder to user for the WriteOperation.
-      @SuppressWarnings("unchecked")
-      Coder<WriteOperation<T, WriteT>> operationCoder =
-          (Coder<WriteOperation<T, WriteT>>) SerializableCoder.of(writeOperation.getClass());
-
-      // A singleton collection of the WriteOperation, to be used as input to a ParDo to initialize
-      // the sink.
-      PCollection<WriteOperation<T, WriteT>> operationCollection =
-          p.apply(Create.<WriteOperation<T, WriteT>>of(writeOperation).withCoder(operationCoder));
-
-      // Initialize the resource in a do-once ParDo on the WriteOperation.
-      operationCollection = operationCollection
-          .apply("Initialize", ParDo.of(
-              new DoFn<WriteOperation<T, WriteT>, WriteOperation<T, WriteT>>() {
-            @Override
-            public void processElement(ProcessContext c) throws Exception {
-              WriteOperation<T, WriteT> writeOperation = c.element();
-              writeOperation.initialize(c.getPipelineOptions());
-              // The WriteOperation is also the output of this ParDo, so it can have mutable
-              // state.
-              c.output(writeOperation);
-            }
-          }))
-          .setCoder(operationCoder);
-
-      // Create a view of the WriteOperation to be used as a sideInput to the parallel write phase.
-      final PCollectionView<WriteOperation<T, WriteT>> writeOperationView =
-          operationCollection.apply(View.<WriteOperation<T, WriteT>>asSingleton());
-
-      // Perform the per-bundle writes as a ParDo on the input PCollection (with the WriteOperation
-      // as a side input) and collect the results of the writes in a PCollection.
-      // There is a dependency between this ParDo and the first (the WriteOperation PCollection
-      // as a side input), so this will happen after the initial ParDo.
-      PCollection<WriteT> results = input
-          .apply("WriteBundles", ParDo.of(new DoFn<T, WriteT>() {
-            // Writer that will write the records in this bundle. Lazily
-            // initialized in processElement.
-            private Writer<T, WriteT> writer = null;
-
-            @Override
-            public void processElement(ProcessContext c) throws Exception {
-              // Lazily initialize the Writer
-              if (writer == null) {
-                WriteOperation<T, WriteT> writeOperation = c.sideInput(writeOperationView);
-                writer = writeOperation.createWriter(c.getPipelineOptions());
-                writer.open(UUID.randomUUID().toString());
-              }
-              try {
-                writer.write(c.element());
-              } catch (Exception e) {
-                // Discard write result and close the write.
-                try {
-                  writer.close();
-                } catch (Exception closeException) {
-                  // Do not mask the exception that caused the write to fail.
-                }
-                throw e;
-              }
-            }
-
-            @Override
-            public void finishBundle(Context c) throws Exception {
-              if (writer != null) {
-                WriteT result = writer.close();
-                // Output the result of the write.
-                c.outputWithTimestamp(result, Instant.now());
-              }
-            }
-          }).withSideInputs(writeOperationView))
-          .setCoder(writeOperation.getWriterResultCoder())
-          .apply(Window.<WriteT>into(new GlobalWindows()));
-
-      final PCollectionView<Iterable<WriteT>> resultsView =
-          results.apply(View.<WriteT>asIterable());
-
-      // Finalize the write in another do-once ParDo on the singleton collection containing the
-      // Writer. The results from the per-bundle writes are given as an Iterable side input.
-      // The WriteOperation's state is the same as after its initialization in the first do-once
-      // ParDo. There is a dependency between this ParDo and the parallel write (the writer results
-      // collection as a side input), so it will happen after the parallel write.
-      @SuppressWarnings("unused")
-      final PCollection<Integer> done = operationCollection
-          .apply("Finalize", ParDo.of(new DoFn<WriteOperation<T, WriteT>, Integer>() {
-            @Override
-            public void processElement(ProcessContext c) throws Exception {
-              Iterable<WriteT> results = c.sideInput(resultsView);
-              WriteOperation<T, WriteT> writeOperation = c.element();
-              writeOperation.finalize(results, c.getPipelineOptions());
-            }
-          }).withSideInputs(resultsView));
-      return PDone.in(input.getPipeline());
-    }
-  }
+@Deprecated
+public class Write extends com.google.cloud.dataflow.sdk.io.Write {
 }
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/WriteTest.java
similarity index 99%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/io/WriteTest.java
index 6a53f6cb0e16a..2340187f1c854 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WriteTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/WriteTest.java
@@ -12,7 +12,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.transforms;
+package com.google.cloud.dataflow.sdk.io;
 
 import static org.hamcrest.Matchers.anyOf;
 import static org.hamcrest.Matchers.containsInAnyOrder;
@@ -25,13 +25,13 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.io.Sink;
 import com.google.cloud.dataflow.sdk.io.Sink.WriteOperation;
 import com.google.cloud.dataflow.sdk.io.Sink.Writer;
 import com.google.cloud.dataflow.sdk.options.Description;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest.TestPipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.PCollection;

From be5ac8f76d0f879442d543421a9efa840d26f1ce Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Fri, 9 Oct 2015 12:25:13 -0700
Subject: [PATCH 1072/1541] Fixes a shutdown race in ReadOperation

Prevents ReadOperation from calling requestDynamicSplit()
on a closed reader, by closing the reader only after the
operation is marked finished.
----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105078926
---
 .../sdk/util/common/worker/ReadOperation.java | 13 ++++-
 .../util/common/worker/ReadOperationTest.java | 55 ++++++++++++++++++-
 2 files changed, 64 insertions(+), 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index aaef60a207a42..7d1df49b10ada 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -89,8 +89,8 @@ public ReadOperation(String operationName, Reader<?> reader, OutputReceiver[] re
   /** Invoked by tests. */
   ReadOperation(Reader<?> reader, OutputReceiver outputReceiver, String counterPrefix,
       CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) {
-    this("ReadOperation", reader, new OutputReceiver[] {outputReceiver}, counterPrefix,
-         addCounterMutator, stateSampler);
+    this("ReadOperation", reader, new OutputReceiver[]{outputReceiver}, counterPrefix,
+        addCounterMutator, stateSampler);
   }
 
   /**
@@ -173,7 +173,6 @@ public void run() {
         }
         setProgressFromIterator();
       } finally {
-        readerIterator.close();
         if (progressUpdatePeriodMs != 0) {
           updateRequester.interrupt();
           updateRequester.join();
@@ -182,6 +181,14 @@ public void run() {
     }
   }
 
+  @Override
+  public void finish() throws Exception {
+    // Mark operation finished before closing the reader, so that anybody who checks if
+    // it's finished (e.g. requestDynamicSplit) won't use a closed reader.
+    super.finish();
+    readerIterator.close();
+  }
+
   private void setProgressFromIterator() {
     try {
       progress.set(readerIterator.getProgress());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
index ebd1b4469181d..e0dac4e85ed7b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -28,7 +28,6 @@
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
 import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getMeanByteCounterName;
 import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getObjectCounterName;
-
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertEquals;
@@ -43,6 +42,7 @@
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReader;
+import com.google.common.base.Preconditions;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -223,6 +223,51 @@ public void testDynamicSplitDoesNotBlock() throws Exception {
     thread.join();
   }
 
+  @Test
+  public void testRaceBetweenCloseAndDynamicSplit() throws Exception {
+    MockReaderIterator iterator = new MockReaderIterator(0, 10);
+    CounterSet counterSet = new CounterSet();
+    MockOutputReceiver receiver = new MockOutputReceiver();
+    final ReadOperation readOperation = new ReadOperation(
+        new MockReader(iterator), receiver, "test-",
+        counterSet.getAddCounterMutator(),
+        new StateSampler("test-", counterSet.getAddCounterMutator()));
+
+    final Exchanger<Void> startCompleted = new Exchanger<>();
+    final Exchanger<Void> requestDynamicSplitCompleted = new Exchanger<>();
+    Thread thread = new Thread() {
+      @Override
+      public void run() {
+        try {
+          readOperation.start();
+          startCompleted.exchange(null);
+          requestDynamicSplitCompleted.exchange(null);
+          readOperation.finish();
+        } catch (Exception e) {
+          e.printStackTrace();
+        }
+      }
+    };
+    thread.start();
+
+    for (int i = 0; i < 10; ++i) {
+      iterator.offerNext(i);
+      receiver.unblockProcess();
+    }
+    // Wait for ReadOperation.start() to finish
+    startCompleted.exchange(null);
+    // Check that requestDynamicSplit is safe (no-op) if the operation is done with start()
+    // but not yet done with finish()
+    readOperation.requestDynamicSplit(splitRequestAtIndex(5L));
+    // Allow thread to finish() and join.
+    requestDynamicSplitCompleted.exchange(null);
+
+    thread.join();
+
+    // Check once more that requestDynamicSplit on a finished operation is also safe (no-op).
+    readOperation.requestDynamicSplit(splitRequestAtIndex(5L));
+  }
+
   private Thread runReadLoopInThread(final ReadOperation readOperation) {
     Thread thread = new Thread() {
       @Override
@@ -243,6 +288,7 @@ private static class MockReaderIterator extends AbstractBoundedReaderIterator<In
     private final OffsetRangeTracker tracker;
     private Exchanger<Integer> exchanger = new Exchanger<>();
     private int current;
+    private volatile boolean isClosed;
 
     public MockReaderIterator(int from, int to) {
       this.tracker = new OffsetRangeTracker(from, to);
@@ -266,6 +312,7 @@ protected Integer nextImpl() throws IOException {
 
     @Override
     public Reader.Progress getProgress() {
+      Preconditions.checkState(!isClosed);
       return cloudProgressToReaderProgress(
           new ApproximateProgress().setPosition(new Position().setRecordIndex((long) current))
                                    .setPercentComplete((float) tracker.getFractionConsumed()));
@@ -274,6 +321,7 @@ public Reader.Progress getProgress() {
     @Override
     public Reader.DynamicSplitResult requestDynamicSplit(
         Reader.DynamicSplitRequest splitRequest) {
+      Preconditions.checkState(!isClosed);
       ApproximateProgress progress = splitRequestToApproximateProgress(splitRequest);
       int index = progress.getPosition().getRecordIndex().intValue();
       if (!tracker.trySplitAtPosition(index)) {
@@ -290,6 +338,11 @@ public int offerNext(int next) {
         throw new RuntimeException(e);
       }
     }
+
+    @Override
+    public void close() throws IOException {
+      isClosed = true;
+    }
   }
 
   private static class MockReader extends Reader<Integer> {

From b11603122562af2e7fc9f0f85c40c0ec6c8b4116 Mon Sep 17 00:00:00 2001
From: mattlang <mattlang@google.com>
Date: Fri, 9 Oct 2015 13:20:49 -0700
Subject: [PATCH 1073/1541] Enable dynamic work rebalancing when reading from
 intermediate data

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105083061
---
 .../sdk/runners/worker/AvroByteReader.java    |  10 ++
 .../runners/worker/AvroByteReaderTest.java    | 155 +++++++++++++++---
 2 files changed, 141 insertions(+), 24 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
index d1893f1d29a90..2b61e89509048 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
@@ -82,5 +82,15 @@ protected T nextImpl() throws IOException {
     public void close() throws IOException {
       avroFileIterator.close();
     }
+
+    @Override
+    public Progress getProgress() {
+      return avroFileIterator.getProgress();
+    }
+
+    @Override
+    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
+      return avroFileIterator.requestDynamicSplit(splitRequest);
+    }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
index 8ffc46ebbe5f9..ef61bc6b88945 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
@@ -24,6 +24,8 @@
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.MimeTypes;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader.DynamicSplitResult;
 
 import org.apache.avro.Schema;
 import org.apache.avro.file.DataFileWriter;
@@ -55,28 +57,26 @@ public class AvroByteReaderTest {
   @Rule
   public TemporaryFolder tmpFolder = new TemporaryFolder();
 
-  /**
-   * Reads from a file generated from a collection of elements and verifies that the elements read
-   * are the same as the elements written.
-   *
-   * @param elemsList a list of blocks of elements, each of which is as a list of elements.
-   * @param coder the coder used to encode the elements
-   * @param requireExactMatch if true, each block must match exactly
-   * @throws Exception
-   */
-  private <T> void runTestRead(List<List<T>> elemsList, Coder<T> coder, boolean requireExactMatch)
+  /** Class representing information about an Avro file generated from a list of elements. */
+  private static class AvroFileInfo<T> {
+    String filename;
+    List<Integer> elementSizes = new ArrayList<>();
+    List<Long> syncPoints = new ArrayList<>();
+    long totalElementEncodedSize = 0;
+  }
+
+  /** Write input elements to a file and return information about the Avro-encoded file. */
+  private <T> AvroFileInfo<T> initInputFile(List<List<T>> elemsList, Coder<T> coder)
       throws Exception {
     File tmpFile = tmpFolder.newFile("file.avro");
-    String filename = tmpFile.getPath();
+    AvroFileInfo<T> fileInfo = new AvroFileInfo<>();
+    fileInfo.filename = tmpFile.getPath();
 
     // Write the data.
     OutputStream outStream =
-        Channels.newOutputStream(IOChannelUtils.create(filename, MimeTypes.BINARY));
+        Channels.newOutputStream(IOChannelUtils.create(fileInfo.filename, MimeTypes.BINARY));
     Schema schema = Schema.create(Schema.Type.BYTES);
     DatumWriter<ByteBuffer> datumWriter = new GenericDatumWriter<>(schema);
-    List<Long> syncPoints = new ArrayList<>();
-    List<Integer> expectedSizes = new ArrayList<>();
-    long expectedTotalSize = 0;
     try (DataFileWriter<ByteBuffer> fileWriter = new DataFileWriter<>(datumWriter)) {
       fileWriter.create(schema, outStream);
       boolean first = true;
@@ -86,30 +86,47 @@ private <T> void runTestRead(List<List<T>> elemsList, Coder<T> coder, boolean re
         } else {
           // Ensure a block boundary here.
           long syncPoint = fileWriter.sync();
-          syncPoints.add(syncPoint);
+          fileInfo.syncPoints.add(syncPoint);
         }
         for (T elem : elems) {
-          byte[] encodedElem = CoderUtils.encodeToByteArray(coder, elem);
-          fileWriter.append(ByteBuffer.wrap(encodedElem));
-          expectedSizes.add(encodedElem.length);
-          expectedTotalSize += encodedElem.length;
+          byte[] encodedElement = CoderUtils.encodeToByteArray(coder, elem);
+          fileWriter.append(ByteBuffer.wrap(encodedElement));
+          fileInfo.elementSizes.add(encodedElement.length);
+          fileInfo.totalElementEncodedSize += encodedElement.length;
         }
       }
     }
 
+    return fileInfo;
+  }
+
+
+  /**
+   * Reads from a file generated from a collection of elements and verifies that the elements read
+   * are the same as the elements written.
+   *
+   * @param elemsList a list of blocks of elements, each of which is as a list of elements.
+   * @param coder the coder used to encode the elements
+   * @param requireExactMatch if true, each block must match exactly
+   * @throws Exception
+   */
+  private <T> void runTestRead(List<List<T>> elemsList, Coder<T> coder, boolean requireExactMatch)
+      throws Exception {
+    AvroFileInfo<T> fileInfo = initInputFile(elemsList, coder);
+
     // Test reading the data back.
     List<List<T>> actualElemsList = new ArrayList<>();
     List<Integer> actualSizes = new ArrayList<>();
     Long startOffset = null;
     Long endOffset;
     long prevSyncPoint = 0;
-    for (long syncPoint : syncPoints) {
+    for (long syncPoint : fileInfo.syncPoints) {
       endOffset = (prevSyncPoint + syncPoint) / 2;
-      actualElemsList.add(readElems(filename, startOffset, endOffset, coder, actualSizes));
+      actualElemsList.add(readElems(fileInfo.filename, startOffset, endOffset, coder, actualSizes));
       startOffset = endOffset;
       prevSyncPoint = syncPoint;
     }
-    actualElemsList.add(readElems(filename, startOffset, null, coder, actualSizes));
+    actualElemsList.add(readElems(fileInfo.filename, startOffset, null, coder, actualSizes));
 
     // Compare the expected and the actual elements.
     if (requireExactMatch) {
@@ -135,7 +152,7 @@ private <T> void runTestRead(List<List<T>> elemsList, Coder<T> coder, boolean re
     for (int elemSize : actualSizes) {
       actualTotalSize += elemSize;
     }
-    Assert.assertEquals(expectedTotalSize, actualTotalSize);
+    Assert.assertEquals(fileInfo.totalElementEncodedSize, actualTotalSize);
   }
 
   private <T> List<T> readElems(String filename, @Nullable Long startOffset,
@@ -192,6 +209,96 @@ public void testReadBigRanges() throws Exception {
         false/* don't require exact match */);
   }
 
+  // Verification behavior for split requests. Used for testRequestDynamicSplitInternal.
+  private static enum SplitVerificationBehavior {
+    VERIFY_SUCCESS, // Split request must succeed.
+    VERIFY_FAILURE, // Split request must fail.
+    DO_NOT_VERIFY; // Perform no verification.
+  }
+
+  private <T> void testRequestDynamicSplitInternal(AvroByteReader<T> reader, float splitAtFraction,
+      long readBeforeSplit, SplitVerificationBehavior splitVerificationBehavior) throws Exception {
+    // Read all elements from the reader
+    List<T> expectedElements = new ArrayList<>();
+    Long endOffset = reader.avroReader.endPosition;
+    ReaderTestUtils.readRemainingFromReader(reader, expectedElements);
+
+
+    List<T> primaryElements = new ArrayList<>();
+    List<T> residualElements = new ArrayList<>();
+    try (Reader.ReaderIterator<T> iterator = reader.iterator()) {
+      // Read n elements from the reader
+      ReaderTestUtils.readAtMostNElementsFromIterator(iterator, readBeforeSplit, primaryElements);
+
+      // Request a split at the specified position
+      DynamicSplitResult splitResult =
+          iterator.requestDynamicSplit(ReaderTestUtils.splitRequestAtFraction(splitAtFraction));
+
+      switch (splitVerificationBehavior) {
+        case VERIFY_SUCCESS:
+          Assert.assertNotNull(splitResult);
+          break;
+        case VERIFY_FAILURE:
+          Assert.assertNull(splitResult);
+          break;
+        case DO_NOT_VERIFY:
+      }
+
+      // Finish reading from the original reader.
+      ReaderTestUtils.readRemainingFromIterator(iterator, primaryElements);
+
+      if (splitResult != null) {
+        Long splitPosition = ReaderTestUtils.positionFromSplitResult(splitResult).getByteOffset();
+        AvroByteReader<T> residualReader =
+            new AvroByteReader<T>(reader.avroReader.avroSource.getFileOrPatternSpec(),
+                splitPosition, endOffset, reader.coder, reader.avroReader.options);
+        // Read from the residual until it is complete.
+        ReaderTestUtils.readRemainingFromReader(residualReader, residualElements);
+      }
+    }
+
+    primaryElements.addAll(residualElements);
+    Assert.assertEquals(expectedElements, primaryElements);
+    if (splitVerificationBehavior == SplitVerificationBehavior.VERIFY_SUCCESS) {
+      Assert.assertNotEquals(0, residualElements.size());
+    }
+  }
+
+  @Test
+  public void testRequestDynamicSplit() throws Exception {
+    // Note that exhaustive tests for AvroSource's split behavior exist in {@link AvroSourceTest}.
+    List<List<String>> elements = generateInputBlocks(10, 100 * 100, 100);
+    Coder<String> coder = StringUtf8Coder.of();
+    AvroFileInfo<String> fileInfo = initInputFile(elements, coder);
+    AvroByteReader<String> reader =
+        new AvroByteReader<String>(fileInfo.filename, null, null, coder, null);
+    // Read most of the records before the proposed split point.
+    testRequestDynamicSplitInternal(reader, 0.5F, 490L, SplitVerificationBehavior.VERIFY_SUCCESS);
+    // Read a single record.
+    testRequestDynamicSplitInternal(reader, 0.5F, 1L, SplitVerificationBehavior.VERIFY_SUCCESS);
+    // Read zero records.
+    testRequestDynamicSplitInternal(reader, 0.5F, 0L, SplitVerificationBehavior.VERIFY_FAILURE);
+    // Read almost the entire input.
+    testRequestDynamicSplitInternal(reader, 0.5F, 900L, SplitVerificationBehavior.VERIFY_FAILURE);
+    // Read the entire input.
+    testRequestDynamicSplitInternal(reader, 0.5F, 2000L, SplitVerificationBehavior.VERIFY_FAILURE);
+  }
+
+  @Test
+  public void testRequestDynamicSplitExhaustive() throws Exception {
+    List<List<String>> elements = generateInputBlocks(5, 10 * 10, 10);
+    Coder<String> coder = StringUtf8Coder.of();
+    AvroFileInfo<String> fileInfo = initInputFile(elements, coder);
+    AvroByteReader<String> reader =
+        new AvroByteReader<String>(fileInfo.filename, null, null, coder, null);
+    for (float splitFraction = 0.0F; splitFraction < 1.0F; splitFraction += 0.02F) {
+      for (long recordsToRead = 0L; recordsToRead <= 500; recordsToRead += 5) {
+        testRequestDynamicSplitInternal(
+            reader, splitFraction, recordsToRead, SplitVerificationBehavior.DO_NOT_VERIFY);
+      }
+    }
+  }
+
   // TODO: sharded filenames
   // TODO: reading from GCS
 }

From df8d9cbe2d6491352a8b83ae7c7ccf79a9fbefaf Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 9 Oct 2015 13:42:14 -0700
Subject: [PATCH 1074/1541] Add tests using Java 8 method references

Filter, FlatMapElements, and MapElements should work with first-class
functions however they are created. Previously, they were tested with
lambda. Now they are also tested with method references. (They do work.)

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105084851
---
 .../sdk/transforms/FilterJava8Test.java       | 19 +++++++++++++++
 .../transforms/FlatMapElementsJava8Test.java  | 24 +++++++++++++++++++
 .../sdk/transforms/MapElementsJava8Test.java  | 23 ++++++++++++++++++
 3 files changed, 66 insertions(+)

diff --git a/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/FilterJava8Test.java b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/FilterJava8Test.java
index 74bf5a654a205..db65932ccba49 100644
--- a/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/FilterJava8Test.java
+++ b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/FilterJava8Test.java
@@ -96,4 +96,23 @@ public void testFilterParDoOutputTypeDescriptorRaw() throws Exception {
     thrown.expect(CannotProvideCoderException.class);
     pipeline.getCoderRegistry().getDefaultCoder(output.getTypeDescriptor());
   }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testFilterByMethodReference() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3, 4, 5, 6, 7))
+        .apply(Filter.byPredicate(new EvenFilter()::isEven));
+
+    DataflowAssert.that(output).containsInAnyOrder(2, 4, 6);
+    pipeline.run();
+  }
+
+  private static class EvenFilter implements Serializable {
+    public boolean isEven(int i) {
+      return i % 2 == 0;
+    }
+  }
 }
diff --git a/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsJava8Test.java b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsJava8Test.java
index 8e4ce1572fd15..1f00cc9d28643 100644
--- a/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsJava8Test.java
+++ b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsJava8Test.java
@@ -30,6 +30,7 @@
 import org.junit.runners.JUnit4;
 
 import java.io.Serializable;
+import java.util.List;
 
 /**
  * Java 8 Tests for {@link FlatMapElements}.
@@ -57,4 +58,27 @@ public void testFlatMapBasic() throws Exception {
     DataflowAssert.that(output).containsInAnyOrder(1, 3, -1, -3, 2, -2);
     pipeline.run();
   }
+
+  /**
+   * Basic test of {@link FlatMapElements} with a method reference.
+   */
+  @Test
+  public void testFlatMapMethodReference() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3))
+        .apply(FlatMapElements
+            // Note that the input type annotation is required.
+            .via(new Negater()::numAndNegation)
+            .withOutputType(new TypeDescriptor<Integer>() {}));
+
+    DataflowAssert.that(output).containsInAnyOrder(1, 3, -1, -3, 2, -2);
+    pipeline.run();
+  }
+
+  private static class Negater implements Serializable {
+    public List<Integer> numAndNegation(int input) {
+      return ImmutableList.of(input, -input);
+    }
+  }
 }
diff --git a/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/MapElementsJava8Test.java b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/MapElementsJava8Test.java
index 619a93f48f6dd..123e6803876cd 100644
--- a/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/MapElementsJava8Test.java
+++ b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/MapElementsJava8Test.java
@@ -51,4 +51,27 @@ public void testMapBasic() throws Exception {
     DataflowAssert.that(output).containsInAnyOrder(6, 2, 4);
     pipeline.run();
   }
+
+  /**
+   * Basic test of {@link MapElements} with a method reference.
+   */
+  @Test
+  public void testMapMethodReference() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3))
+        .apply(MapElements
+            // Note that the type annotation is required (for Java, not for Dataflow)
+            .via(new Doubler()::doubleIt)
+            .withOutputType(new TypeDescriptor<Integer>() {}));
+
+    DataflowAssert.that(output).containsInAnyOrder(6, 2, 4);
+    pipeline.run();
+  }
+
+  private static class Doubler implements Serializable {
+    public int doubleIt(int val) {
+      return val * 2;
+    }
+  }
 }

From 3c2264763c7b76081bda364ea866940e1c1f688d Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 9 Oct 2015 13:44:12 -0700
Subject: [PATCH 1075/1541] Add DataflowProfilingOptions

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105085000
---
 .../sdk/options/DataflowPipelineOptions.java  |  3 +-
 .../sdk/options/DataflowProfilingOptions.java | 46 ++++++++++++++++++
 .../options/DataflowProfilingOptionsTest.java | 47 +++++++++++++++++++
 3 files changed, 95 insertions(+), 1 deletion(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowProfilingOptions.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowProfilingOptionsTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
index 889cf31b20dd5..e0b0c1767ec5d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
@@ -31,7 +31,8 @@
 public interface DataflowPipelineOptions extends
     PipelineOptions, GcpOptions, ApplicationNameOptions, DataflowPipelineDebugOptions,
     DataflowPipelineWorkerPoolOptions, BigQueryOptions,
-    GcsOptions, StreamingOptions, CloudDebuggerOptions, DataflowWorkerLoggingOptions {
+    GcsOptions, StreamingOptions, CloudDebuggerOptions, DataflowWorkerLoggingOptions,
+    DataflowProfilingOptions {
 
   static final String DATAFLOW_STORAGE_LOCATION = "Dataflow Storage Location";
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowProfilingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowProfilingOptions.java
new file mode 100644
index 0000000000000..8ad2ba2e5e0dc
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowProfilingOptions.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.options;
+
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+
+import java.util.HashMap;
+
+/**
+ * Options for controlling profiling of pipeline execution.
+ */
+@Description("[Experimental] Used to configure profiling of the Dataflow pipeline")
+@Experimental
+@Hidden
+public interface DataflowProfilingOptions {
+
+  @Description("Whether to periodically dump profiling information to local disk.\n"
+      + "WARNING: Enabling this option may fill local disk with profiling information.")
+  boolean getEnableProfilingAgent();
+  void setEnableProfilingAgent(boolean enabled);
+
+  @Description(
+      "[INTERNAL] Additional configuration for the profiling agent. Not typically necessary.")
+  @Hidden
+  DataflowProfilingAgentConfiguration getProfilingAgentConfiguration();
+  void setProfilingAgentConfiguration(DataflowProfilingAgentConfiguration configuration);
+
+  /**
+   * Configuration the for profiling agent.
+   */
+  public static class DataflowProfilingAgentConfiguration extends HashMap<String, Object> {
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowProfilingOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowProfilingOptionsTest.java
new file mode 100644
index 0000000000000..8b89abd2b1d39
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/DataflowProfilingOptionsTest.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.options;
+
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import org.hamcrest.Matchers;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link DataflowProfilingOptions}.
+ */
+@RunWith(JUnit4.class)
+public class DataflowProfilingOptionsTest {
+
+  private static final ObjectMapper MAPPER = new ObjectMapper();
+
+  @Test
+  public void testOptionsObject() throws Exception {
+    DataflowPipelineOptions options = PipelineOptionsFactory.fromArgs(new String[] {
+        "--enableProfilingAgent", "--profilingAgentConfiguration={\"interval\": 21}"})
+        .as(DataflowPipelineOptions.class);
+    assertTrue(options.getEnableProfilingAgent());
+
+    String json = MAPPER.writeValueAsString(options);
+    assertThat(json, Matchers.containsString(
+        "\"profilingAgentConfiguration\":{\"interval\":21}"));
+  }
+}

From 109a2677a1a6837baa67a7cb425d6a588bb1e274 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 9 Oct 2015 13:48:23 -0700
Subject: [PATCH 1076/1541] Merge before processing elements

This means that when a trigger fires we don't need to merge, which
greatly simplifies the implementation. It also means that the number of
intermediate (partially merged) windows is significantly reduced
(dependent on the number of calls to processElements rather than the
number of individual elements).

Persist windows when running tests that involve merging. This is more
representative of how actual bundles are processed by
GroupAlsoByWindows.

Remove the delayed commit of the finished set. With pre-merging this is
no longer necessary.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105085413
---
 .../StreamingGroupAlsoByWindowsDoFn.java      |   3 -
 .../dataflow/sdk/util/ActiveWindowSet.java    |  23 +-
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |   3 -
 .../sdk/util/MergingActiveWindowSet.java      |  22 +-
 .../sdk/util/NonMergingActiveWindowSet.java   |  23 +-
 .../dataflow/sdk/util/ReduceFnRunner.java     | 316 ++++++++++++------
 .../dataflow/sdk/util/TriggerRunner.java      |  76 ++---
 .../transforms/windowing/AfterAllTest.java    |  23 +-
 .../transforms/windowing/AfterEachTest.java   |  44 +--
 .../transforms/windowing/AfterFirstTest.java  |  22 +-
 .../transforms/windowing/AfterPaneTest.java   |   3 +-
 .../windowing/IntervalWindowTest.java         |   1 +
 .../windowing/OrFinallyTriggerTest.java       |  47 +--
 .../transforms/windowing/RepeatedlyTest.java  |  14 +-
 .../sdk/util/TriggerExecutorTest.java         |  13 +-
 .../dataflow/sdk/util/TriggerTester.java      |  26 +-
 16 files changed, 406 insertions(+), 253 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
index 627964e4f4079..cfc98e5c964d1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
@@ -100,9 +100,6 @@ public void processElement(ProcessContext c) throws Exception {
         runner.onTimer(timer);
       }
       runner.processElements(element.elementsIterable());
-
-      // Merge before finishing the bundle in case it causes triggers to fire.
-      runner.merge();
       runner.persist();
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
index efe8a9815a663..61af1a209cc1a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
@@ -16,6 +16,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 
 import java.util.Collection;
 
@@ -46,16 +47,22 @@ public interface MergeCallback<W extends BoundedWindow> {
    */
   boolean add(W window);
 
+  /**
+   * Return true if the window is active.
+   */
+  boolean contains(W window);
+
   /**
    * Remove a window from the {@code ActiveWindowSet}.
    */
   void remove(W window);
 
   /**
-   * Invoke {@code merge} on the associated {@code WindowFn}, and return true if the {@code window}
-   * still exists afterwards.
+   * Invoke {@link WindowFn#mergeWindows} on the {@code WindowFn} associated with this window set,
+   * merging as many of the active windows as possible. {@code mergeCallback} will be invoked for
+   * each group of windows that are merged.
    */
-  boolean mergeIfAppropriate(W window, MergeCallback<W> mergeCallback) throws Exception;
+  void merge(MergeCallback<W> mergeCallback) throws Exception;
 
   /**
    * Return the set of windows that were merged to produce {@code window}. If the associated
@@ -63,4 +70,14 @@ public interface MergeCallback<W extends BoundedWindow> {
    * {@code window}.
    */
   Iterable<W> sourceWindows(W window);
+
+  /**
+   * Return the subset of {@code windows} that existed in the original merge tree.
+   */
+  Collection<W> originalWindows(Collection<W> windows);
+
+  /**
+   * Return the number of windows that are currently active.
+   */
+  int size();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 1706619848f2e..2fafc8abbd652 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -126,9 +126,6 @@ public void processElement(
         timerInternals.advanceProcessingTime(runner, Instant.now());
       }
 
-      // Merge the active windows for the current key, to fire any data-based triggers.
-      runner.merge();
-
       // Finish any pending windows by advancing the watermark to infinity.
       timerInternals.advanceWatermark(runner, new Instant(Long.MAX_VALUE));
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
index 29f16e4ee2e43..2d2ba5f370615 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
@@ -24,8 +24,11 @@
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.cloud.dataflow.sdk.util.state.ValueState;
+import com.google.common.base.Predicates;
+import com.google.common.collect.Collections2;
 
 import java.util.Collection;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
@@ -79,6 +82,11 @@ public void persist() {
     }
   }
 
+  @Override
+  public boolean contains(W window) {
+    return mergeTree.containsKey(window);
+  }
+
   @Override
   public boolean add(W window) {
     if (mergeTree.containsKey(window)) {
@@ -117,9 +125,8 @@ public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
   }
 
   @Override
-  public boolean mergeIfAppropriate(W window, MergeCallback<W> mergeCallback) throws Exception {
+  public void merge(MergeCallback<W> mergeCallback) throws Exception {
     windowFn.mergeWindows(new MergeContextImpl(mergeCallback));
-    return window == null || mergeTree.containsKey(window);
   }
 
   @Override
@@ -172,4 +179,15 @@ private Map<W, Set<W>> deepCopy(Map<W, Set<W>> mergeTree) {
     }
     return newMergeTree;
   }
+
+  @Override
+  public int size() {
+    return mergeTree.size();
+  }
+
+  @Override
+  public Collection<W> originalWindows(Collection<W> windows) {
+    return Collections.unmodifiableCollection(
+        Collections2.filter(windows, Predicates.in(originalMergeTree.keySet())));
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java
index 8f0ec5101c34a..2c453ef95b934 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java
@@ -18,6 +18,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 
+import java.util.Collection;
 import java.util.Collections;
 
 /**
@@ -40,15 +41,18 @@ public boolean add(W window) {
     return true;
   }
 
+  @Override
+  public boolean contains(W window) {
+    // Windows should never disappear, since we don't support merging.
+    return true;
+  }
+
   @Override
   public void remove(W window) {}
 
   @Override
-  public boolean mergeIfAppropriate(W window, MergeCallback<W> reduceFnRunner)
-      throws Exception {
+  public void merge(MergeCallback<W> reduceFnRunner) throws Exception {
     // We never merge, so there is nothing to do here.
-    // The window (which existed before the merge) must still exist after the merge.
-    return true;
   }
 
   @Override
@@ -56,4 +60,15 @@ public Iterable<W> sourceWindows(W window) {
     // There is no merging, so the only source window is the window itself.
     return Collections.singleton(window);
   }
+
+  @Override
+  public int size() {
+    throw new UnsupportedOperationException("Cannot determine size of NonMergingActiveWindowSet");
+  }
+
+  @Override
+  public Collection<W> originalWindows(Collection<W> windows) {
+    throw new UnsupportedOperationException(
+        "Cannot determine original windows of NonMergingActiveWindowSet");
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index f283313bf3c3e..3052420f8a0ab 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -20,24 +20,31 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
 import com.google.cloud.dataflow.sdk.util.ActiveWindowSet.MergeCallback;
 import com.google.cloud.dataflow.sdk.util.ReduceFnContextFactory.OnTriggerCallbacks;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.TriggerRunner.Result;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces.WindowNamespace;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Function;
+import com.google.common.base.Functions;
 import com.google.common.base.Throwables;
+import com.google.common.collect.FluentIterable;
+import com.google.common.collect.Maps;
 
 import org.joda.time.Instant;
 
 import java.util.Collection;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
+import java.util.Set;
 
 import javax.annotation.Nullable;
 
@@ -65,8 +72,7 @@
  * @param <OutputT> The output type that will be produced for each key.
  * @param <W> The type of windows this operates on.
  */
-public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow>
-    implements MergeCallback<W> {
+public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
 
   public static final String DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER = "DroppedDueToClosedWindow";
   public static final String DROPPED_DUE_TO_LATENESS_COUNTER = "DroppedDueToLateness";
@@ -134,13 +140,149 @@ private ActiveWindowSet<W> createActiveWindowSet() {
   }
 
   public void processElements(Iterable<WindowedValue<InputT>> values) {
+    Function<W, W> windowMapping = Functions.identity();
+
+    final Map<W, TriggerResult> results = Maps.newHashMap();
+
+    // If windows might merge, extract the windows from all the values, and pre-merge them.
+    if (!windowingStrategy.getWindowFn().isNonMerging()) {
+      windowMapping = premergeForValues(values, results);
+    }
+
+    // Process the elements
     for (WindowedValue<InputT> value : values) {
-      processElement(value);
+      processElement(windowMapping, results, value);
+    }
+
+    // Trigger output from any window that was triggered by merging or processing elements.
+    for (Map.Entry<W, TriggerResult> result : results.entrySet()) {
+      handleTriggerResult(contextFactory.base(result.getKey()), false, result.getValue());
     }
   }
 
-  private void processElement(WindowedValue<InputT> value) {
-    Lateness lateness = getLateness(value);
+  /**
+   * Extract the windows associated with the values, and invoke merge.
+   *
+   * @param results an output parameter that accumulates all of the windows that have had the
+   *     trigger return FIRE or FIRE_AND_FINISH. Once present in this map, it is no longer
+   *     necessary to evaluate triggers for the given window.
+   * @return A function which maps the initial windows of the values to the intermediate windows
+   *     they should be processed in.
+   */
+  private Function<W, W> premergeForValues(
+      Iterable<WindowedValue<InputT>> values, final Map<W, TriggerResult> results) {
+    // Add the windows from the values to the active window set, and keep track of which ones
+    // were not previously in the active window set.
+    Set<W> newWindows = addToActiveWindows(values);
+
+    // Merge all of the active windows and retain a mapping from source windows to result windows.
+    final Map<W, W> sourceWindowsToResultWindows = mergeActiveWindows(results);
+
+    // For any new windows that survived merging, make sure we've scheduled cleanup
+    for (W window : newWindows) {
+      if (activeWindows.contains(window)) {
+        scheduleCleanup(contextFactory.base(window));
+      }
+    }
+
+    // Update our window mapping function.
+    return new Function<W, W>() {
+      @Override
+      public W apply(W input) {
+        W result = sourceWindowsToResultWindows.get(input);
+        // If null, the initial window wasn't subject to any merging.
+        return result == null ? input : result;
+      }
+    };
+  }
+
+  /**
+   * Merge the active windows.
+   *
+   * @param results an output parameter that accumulates all of the windows that have had the
+   *     trigger return FIRE or FIRE_AND_FINISH. Once present in this map, it is no longer
+   *     necessary to evaluate triggers for the given window.
+   * @return A map from initial windows of the values to the intermediate windows they should be
+   *     processed in. The domain will be the windows that were merged into intermediate windows
+   *     and the range is the intermediate windows that exist in the active window set.
+   */
+  private Map<W, W> mergeActiveWindows(final Map<W, TriggerResult> results) {
+    final Map<W, W> sourceWindowsToResultWindows =
+        Maps.newHashMapWithExpectedSize(activeWindows.size());
+
+    try {
+      activeWindows.merge(new MergeCallback<W>() {
+        @Override
+        public void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResultNew)
+            throws Exception {
+          // We only need to call onMerge with windows that were previously persisted.
+          Collection<W> originalWindows = activeWindows.originalWindows(mergedWindows);
+          if (!originalWindows.isEmpty()) {
+            TriggerResult result =
+                ReduceFnRunner.this.onMerge(originalWindows, resultWindow, isResultNew);
+            if (result.isFire()) {
+              results.put(resultWindow, result);
+            }
+          } else {
+            // If there were no windows, then merging didn't rearrange the cleanup timers. Make
+            // sure that we have one properly scheduled
+            scheduleCleanup(contextFactory.base(resultWindow));
+          }
+
+          for (W mergedWindow : mergedWindows) {
+            sourceWindowsToResultWindows.put(mergedWindow, resultWindow);
+
+            // If the window wasn't in the persisted original set, then we scheduled cleanup above
+            // but didn't pass it to merge to have the cleanup canceled. Do so here
+            if (!originalWindows.contains(mergedWindow)) {
+              cancelCleanup(contextFactory.base(mergedWindow));
+            }
+          }
+        }
+      });
+    } catch (Exception e) {
+      Throwables.propagateIfPossible(e);
+      throw new RuntimeException("Exception while merging windows", e);
+    }
+    return sourceWindowsToResultWindows;
+  }
+
+  /**
+   * Add the initial windows from each of the values to the active window set. Returns the set of
+   * new windows.
+   */
+  private Set<W> addToActiveWindows(Iterable<WindowedValue<InputT>> values) {
+    Set<W> newWindows = new HashSet<>();
+    for (WindowedValue<?> value : values) {
+      if (getLateness(value.getTimestamp()).isPastAllowedLateness) {
+        continue;
+      }
+
+      for (BoundedWindow untypedWindow : value.getWindows()) {
+        @SuppressWarnings("unchecked")
+        W window = (W) untypedWindow;
+        ReduceFn<K, InputT, OutputT, W>.Context context = contextFactory.base(window);
+        if (!triggerRunner.isClosed(context.state())) {
+          if (activeWindows.add(window)) {
+            newWindows.add(window);
+          }
+        }
+      }
+    }
+    return newWindows;
+  }
+
+  /**
+   * @param windowMapping a function which maps windows associated with the value to the window that
+   *     it was merged into, and in which we should actually process the element
+   * @param results a record of all of the windows that have had the trigger return FIRE or
+   *     FIRE_AND_FINISH. Once present in this map, it is no longer necessary to evaluate triggers
+   *     for the given result.
+   * @param value the value being processed
+   */
+  private void processElement(Function<W, W> windowMapping, Map<W, TriggerResult> results,
+      WindowedValue<InputT> value) {
+    Lateness lateness = getLateness(value.getTimestamp());
     if (lateness.isPastAllowedLateness) {
       // Drop the element in all assigned windows if it is past the allowed lateness limit.
       droppedDueToLateness.addValue((long) value.getWindows().size());
@@ -148,13 +290,16 @@ private void processElement(WindowedValue<InputT> value) {
     }
 
     @SuppressWarnings("unchecked")
-    Collection<W> windows = (Collection<W>) value.getWindows();
+    Iterable<W> windows =
+        FluentIterable.from((Collection<W>) value.getWindows()).transform(windowMapping);
 
-    // Prefetch in each of the windows
+    // Prefetch in each of the windows if we're going to need to process triggers
     for (W window : windows) {
-      ReduceFn<K, InputT, OutputT, W>.ProcessValueContext context =
-          contextFactory.forValue(window, value.getValue(), value.getTimestamp());
-      triggerRunner.prefetchForValue(context.state());
+      if (!results.containsKey(window)) {
+        ReduceFn<K, InputT, OutputT, W>.ProcessValueContext context =
+            contextFactory.forValue(window, value.getValue(), value.getTimestamp());
+        triggerRunner.prefetchForValue(context.state());
+      }
     }
 
     // And process each of the windows
@@ -163,17 +308,20 @@ private void processElement(WindowedValue<InputT> value) {
           contextFactory.forValue(window, value.getValue(), value.getTimestamp());
 
       // Check to see if the triggerRunner thinks the window is closed. If so, drop that window.
-      if (triggerRunner.isClosed(context.state())) {
-        droppedDueToClosedWindow.addValue(1L);
-        continue;
+      if (!results.containsKey(window) && triggerRunner.isClosed(context.state())) {
+          droppedDueToClosedWindow.addValue(1L);
+          continue;
       }
 
-      // If this is a new window
-      if (activeWindows.add(window)) {
-        // And schedule cleanup
+      nonEmptyPanes.recordContent(context);
+
+      // Make sure we've scheduled the cleanup timer for this window, if the premerge didn't already
+      // do that.
+      if (windowingStrategy.getWindowFn().isNonMerging()) {
+        // Since non-merging window functions don't track the active window set, we always schedule
+        // cleanup.
         scheduleCleanup(context);
       }
-      nonEmptyPanes.recordContent(context);
 
       // Update the watermark hold since the value will be part of the next pane.
       watermarkHold.addHold(context, lateness.isLate);
@@ -186,11 +334,16 @@ private void processElement(WindowedValue<InputT> value) {
       }
 
       // Run the trigger and handle the result as appropriate
-      try {
-        handleTriggerResult(context, false, triggerRunner.processValue(context));
-      } catch (Exception e) {
-        Throwables.propagateIfPossible(e);
-        throw new RuntimeException("Failed to run trigger", e);
+      if (!results.containsKey(window)) {
+        try {
+          TriggerResult result = triggerRunner.processValue(context);
+          if (result.isFire()) {
+            results.put(window, result);
+          }
+        } catch (Exception e) {
+          Throwables.propagateIfPossible(e);
+          throw new RuntimeException("Failed to run trigger", e);
+        }
       }
     }
   }
@@ -203,13 +356,6 @@ private void holdForEmptyPanes(ReduceFn<K, InputT, OutputT, W>.Context context)
     }
   }
 
-  /**
-   * Attempt to merge all of the windows.
-   */
-  public void merge() throws Exception {
-    activeWindows.mergeIfAppropriate(null, this);
-  }
-
   /**
    * Make sure that all the state built up in this runner has been persisted.
    */
@@ -220,8 +366,8 @@ public void persist() {
   /**
    * Called when windows merge.
    */
-  @Override
-  public void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResultWindowNew) {
+  public TriggerResult onMerge(
+      Collection<W> mergedWindows, W resultWindow, boolean isResultWindowNew) {
     ReduceFn<K, InputT, OutputT, W>.OnMergeContext resultContext =
         contextFactory.forMerge(mergedWindows, resultWindow);
 
@@ -236,7 +382,7 @@ public void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResul
     }
 
     // Have the trigger merge state as needed, and handle the result.
-    Result triggerResult;
+    TriggerResult triggerResult;
     try {
       triggerResult = triggerRunner.onMerge(resultContext);
     } catch (Exception e) {
@@ -265,8 +411,7 @@ public void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResul
       scheduleCleanup(resultContext);
     }
 
-    // Handle the trigger result as appropriate.
-    handleTriggerResult(resultContext, false, triggerResult);
+    return triggerResult;
   }
 
   /**
@@ -281,6 +426,11 @@ public void onTimer(TimerData timer) {
     @SuppressWarnings("unchecked")
     WindowNamespace<W> windowNamespace = (WindowNamespace<W>) timer.getNamespace();
     W window = windowNamespace.getWindow();
+    if (!activeWindows.contains(window) && windowingStrategy.getWindowFn().isNonMerging()) {
+      throw new IllegalStateException(
+          "Internal Error: Received timer " + timer + " for inactive window: " + window);
+    }
+
     ReduceFn<K, InputT, OutputT, W>.Context context = contextFactory.base(window);
 
     // If this timer firing is at the watermark, then it may cause a trigger firing of an
@@ -288,13 +438,12 @@ public void onTimer(TimerData timer) {
     boolean isAtWatermark = TimeDomain.EVENT_TIME == timer.getDomain()
         && !timer.getTimestamp().isBefore(window.maxTimestamp());
 
-    if (TimeDomain.EVENT_TIME == timer.getDomain()
-        && (isCleanupTime(window, timer.getTimestamp())
-            || isCleanupTime(window, timerInternals.currentWatermarkTime()))) {
-      // If it looks like this was a watermark firing, see if the trigger tree was waiting for it.
-      // If it fires, then we know there was a pending AfterWatermark trigger.
+    if (shouldCleanup(timer, window)) {
+      // We're going to cleanup the window. We want to treat any potential output from this as
+      // the at-watermark firing if the current time is the at-watermark firing and there was a
+      // trigger waiting for it.
       if (isAtWatermark) {
-        TriggerRunner.Result timerResult = runTriggersForTimer(context, timer);
+        TriggerResult timerResult = runTriggersForTimer(context, timer);
         isAtWatermark = (timerResult != null && timerResult.isFire());
       }
 
@@ -307,26 +456,30 @@ public void onTimer(TimerData timer) {
             "Exception while garbage collecting window " + windowNamespace.getWindow(), e);
       }
     } else {
-      // Skip timers for expired windows.
-      if (isCleanupTime(window, timerInternals.currentWatermarkTime())) {
-        return;
+      if (activeWindows.contains(window) && !triggerRunner.isClosed(context.state())) {
+        handleTriggerResult(context, isAtWatermark, runTriggersForTimer(context, timer));
       }
 
-      TriggerRunner.Result timerResult = runTriggersForTimer(context, timer);
-      if (timerResult != null) {
-        handleTriggerResult(context, isAtWatermark, timerResult);
-      }
-
-      // Since we processed an on-time firing, we should schedule the GC timer.
       if (TimeDomain.EVENT_TIME == timer.getDomain()
+          // If we processed an on-time firing, we should schedule the GC timer.
           && timer.getTimestamp().isEqual(window.maxTimestamp())) {
         scheduleCleanup(context);
       }
     }
   }
 
+  /**
+   * Return true if either the timer looks like a cleanup timer or the current watermark is so far
+   * gone that we should cleanup the window.
+   */
+  private boolean shouldCleanup(TimerData timer, W window) {
+    return TimeDomain.EVENT_TIME == timer.getDomain()
+        && (isCleanupTime(window, timer.getTimestamp())
+            || isCleanupTime(window, timerInternals.currentWatermarkTime()));
+  }
+
   @Nullable
-  private TriggerRunner.Result runTriggersForTimer(
+  private TriggerResult runTriggersForTimer(
       ReduceFn<K, InputT, OutputT, W>.Context context, TimerData timer) {
 
     triggerRunner.prefetchForTimer(context.state());
@@ -347,27 +500,9 @@ private TriggerRunner.Result runTriggersForTimer(
   /** Called when the cleanup timer has fired for the given window. */
   private void doCleanup(
       ReduceFn<K, InputT, OutputT, W>.Context context, boolean maybeAtWatermark) throws Exception {
-    // If the window is active, fire a pane.
-    if (!triggerRunner.isClosed(context.state())) {
-      // Before we fire, make sure this window doesn't get merged away. If it does, the merging
-      // should have cleaned up the window anyways.
-      try {
-        if (!activeWindows.mergeIfAppropriate(context.window(), this)) {
-          // The window was merged away. Either the onMerge fired the resulting window or the
-          // trigger wasn't ready to fire in the resulting window. Either way, we aren't ready to
-          // fire.
-
-          // Since we haven't committed the finished bits for this window, we won't skip it when
-          // merging. Instead, we'll examine the triggers that are pending on all merge windows,
-          // and select the right behavior.
-          return;
-        }
-      } catch (Exception e) {
-        Throwables.propagateIfPossible(e);
-        throw new RuntimeException("Exception while merging windows", e);
-      }
-
-      // Run onTrigger to produce the actual final pane contents.
+    // If the window isn't closed, or if we should always fire a final pane, then trigger output
+    if (!triggerRunner.isClosed(context.state())
+        || windowingStrategy.getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS) {
       onTrigger(context, maybeAtWatermark, true /* isFinal */);
     }
 
@@ -385,29 +520,12 @@ private void doCleanup(
 
   private void handleTriggerResult(
       ReduceFn<K, InputT, OutputT, W>.Context context,
-      boolean maybeAtWatermark, TriggerRunner.Result result) {
-    // Unless the trigger is firing, there is nothing to do besides persisting the results.
+      boolean maybeAtWatermark, TriggerResult result) {
+    // Unless the trigger is firing, there is nothing to do.
     if (!result.isFire()) {
-      result.persistFinishedSet(context.state());
       return;
     }
 
-    // Before we fire, make sure this window doesn't get merged away.
-    try {
-      if (!activeWindows.mergeIfAppropriate(context.window(), this)) {
-        // The window was merged away. Either the onMerge fired the resulting window or the trigger
-        // wasn't ready to fire in the resulting window. Either way, we aren't ready to fire.
-
-        // Since we haven't committed the finished bits for this window, we won't skip it when
-        // merging. Instead, we'll examine the triggers that are pending on all merge windows,
-        // and select the right behavior.
-        return;
-      }
-    } catch (Exception e) {
-      Throwables.propagateIfPossible(e);
-      throw new RuntimeException("Exception while merging windows", e);
-    }
-
     // Run onTrigger to produce the actual pane contents.
     onTrigger(context, maybeAtWatermark, result.isFinish());
 
@@ -438,9 +556,6 @@ private void handleTriggerResult(
         throw new RuntimeException("Exception while clearing trigger state", e);
       }
     }
-
-    // Make sure we've persisted the finished bits.
-    result.persistFinishedSet(context.state());
   }
 
   public static <T> StateContents<T> stateContentsOf(final T value) {
@@ -480,7 +595,9 @@ private void onTrigger(final ReduceFn<K, InputT, OutputT, W>.Context context,
         || (isAtWatermark && pane.getTiming() == Timing.ON_TIME);
 
     // We've consumed the empty pane hold by reading it, so reinstate that, if necessary.
-    holdForEmptyPanes(context);
+    if (!isFinal) {
+      holdForEmptyPanes(context);
+    }
 
     // If there is nothing to output, we're done.
     if (!shouldOutput) {
@@ -528,9 +645,10 @@ private void scheduleCleanup(ReduceFn<?, ?, ?, W>.Context context) {
 
   private void cancelCleanup(ReduceFn<?, ?, ?, W>.Context context) {
     context.timers().deleteTimer(cleanupTime(context.window()), TimeDomain.EVENT_TIME);
+    context.timers().deleteTimer(context.window().maxTimestamp(), TimeDomain.EVENT_TIME);
   }
 
-  private boolean shouldDiscardAfterFiring(TriggerRunner.Result result) {
+  private boolean shouldDiscardAfterFiring(TriggerResult result) {
     return result.isFinish()
         || (result.isFire()
             && AccumulationMode.DISCARDING_FIRED_PANES == windowingStrategy.getMode());
@@ -552,12 +670,12 @@ private Lateness(boolean isLate, boolean isPastAllowedLateness) {
     }
   }
 
-  private Lateness getLateness(WindowedValue<InputT> value) {
+  private Lateness getLateness(Instant timestamp) {
     Instant latestAllowed =
         timerInternals.currentWatermarkTime().minus(windowingStrategy.getAllowedLateness());
-    if (value.getTimestamp().isBefore(latestAllowed)) {
+    if (timestamp.isBefore(latestAllowed)) {
       return Lateness.PAST_ALLOWED_LATENESS;
-    } else if (value.getTimestamp().isBefore(timerInternals.currentWatermarkTime())) {
+    } else if (timestamp.isBefore(timerInternals.currentWatermarkTime())) {
       return Lateness.LATE;
     } else {
       return Lateness.NOT_LATE;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
index d1fb8e946ef6f..34116057dc333 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
@@ -46,52 +46,6 @@
  */
 public class TriggerRunner<W extends BoundedWindow> {
 
-  /**
-   * Result of trigger execution.
-   *
-   * <p>This includes the actual {@link TriggerResult} as well as an updated set of finished bits.
-   * The bits should typically be committed, but if the trigger fired we want to merge and apply
-   * the merging logic on the old finished bits, hence the need to delay committing these results.
-   */
-  public static class Result {
-
-    private final TriggerResult result;
-    private final boolean isFinishedSetUsed;
-    private final BitSet modifiedFinishedSet;
-
-    private Result(
-        TriggerResult result,
-        boolean isFinishedSetUsed,
-        BitSet modifiedFinishedSet) {
-      this.result = result;
-      this.isFinishedSetUsed = isFinishedSetUsed;
-      this.modifiedFinishedSet = modifiedFinishedSet;
-    }
-
-    public boolean isFire() {
-      return result.isFire();
-    }
-
-    public boolean isFinish() {
-      return result.isFinish();
-    }
-
-    public void persistFinishedSet(ReduceFn.StateContext state) {
-      if (!isFinishedSetUsed) {
-        return;
-      }
-
-      ValueState<BitSet> finishedSet = state.access(FINISHED_BITS_TAG);
-      if (!finishedSet.get().equals(modifiedFinishedSet)) {
-        if (modifiedFinishedSet.isEmpty()) {
-          finishedSet.clear();
-        } else {
-          finishedSet.set(modifiedFinishedSet);
-        }
-      }
-    }
-  }
-
   @VisibleForTesting static final StateTag<ValueState<BitSet>> FINISHED_BITS_TAG =
       StateTags.makeSystemTagInternal(StateTags.value("closed", BitSetCoder.of()));
 
@@ -124,18 +78,19 @@ public boolean isClosed(ReduceFn.StateContext state) {
   /**
    * Run the trigger logic to deal with a new value.
    */
-  public Result processValue(ReduceFn<?, ?, ?, W>.ProcessValueContext c) throws Exception {
+  public TriggerResult processValue(ReduceFn<?, ?, ?, W>.ProcessValueContext c) throws Exception {
     // Clone so that we can detect changes and so that changes here don't pollute merging.
     BitSet finishedSet = (BitSet) readFinishedBits(c.state().access(FINISHED_BITS_TAG)).clone();
     Trigger<W>.OnElementContext triggerContext = contextFactory.create(c, rootTrigger, finishedSet);
     TriggerResult result = rootTrigger.invokeElement(triggerContext);
-    return new Result(result, isFinishedSetNeeded(), finishedSet);
+    persistFinishedSet(c.state(), finishedSet);
+    return result;
   }
 
   /**
    * Run the trigger merging logic as part of executing the specified merge.
    */
-  public Result onMerge(ReduceFn<?, ?, ?, W>.OnMergeContext c) throws Exception {
+  public TriggerResult onMerge(ReduceFn<?, ?, ?, W>.OnMergeContext c) throws Exception {
     // Clone so that we can detect changes and so that changes here don't pollute merging.
     BitSet finishedSet = (BitSet) readFinishedBits(c.state().access(FINISHED_BITS_TAG)).clone();
 
@@ -158,19 +113,36 @@ public Result onMerge(ReduceFn<?, ?, ?, W>.OnMergeContext c) throws Exception {
       throw new IllegalStateException("Root trigger returned MergeResult.ALREADY_FINISHED.");
     }
 
-    return new Result(result.getTriggerResult(), isFinishedSetNeeded(), finishedSet);
+    persistFinishedSet(c.state(), finishedSet);
+    return result.getTriggerResult();
   }
 
   /**
    * Run the trigger logic appropriate for receiving a timer with the specified destination ID.
    */
-  public Result onTimer(ReduceFn<?, ?, ?, W>.Context c, TimerData timer) throws Exception {
+  public TriggerResult onTimer(ReduceFn<?, ?, ?, W>.Context c, TimerData timer) throws Exception {
     // Clone so that we can detect changes and so that changes here don't pollute merging.
     BitSet finishedSet = (BitSet) readFinishedBits(c.state().access(FINISHED_BITS_TAG)).clone();
     Trigger<W>.OnTimerContext triggerContext =
         contextFactory.create(c, rootTrigger, finishedSet, timer.getTimestamp(), timer.getDomain());
     TriggerResult result = rootTrigger.invokeTimer(triggerContext);
-    return new Result(result, isFinishedSetNeeded(), finishedSet);
+    persistFinishedSet(c.state(), finishedSet);
+    return result;
+  }
+
+  private void persistFinishedSet(ReduceFn.StateContext state, BitSet modifiedFinishedSet) {
+    if (!isFinishedSetNeeded()) {
+      return;
+    }
+
+    ValueState<BitSet> finishedSet = state.access(FINISHED_BITS_TAG);
+    if (!finishedSet.get().equals(modifiedFinishedSet)) {
+      if (modifiedFinishedSet.isEmpty()) {
+        finishedSet.clear();
+      } else {
+        finishedSet.set(modifiedFinishedSet);
+      }
+    }
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index 59f9620043889..9edef5ea2b9d3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -156,19 +156,20 @@ public void testOnTimerFireAndFinish() throws Exception {
   public void testOnMergeFires() throws Exception {
     setUp(Sessions.withGapDuration(Duration.millis(10)));
 
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(12, TriggerResult.FIRE_AND_FINISH, TriggerResult.CONTINUE);
-    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    when(mockTrigger1.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    when(mockTrigger2.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(5, new Instant(12)));
 
-    when(mockTrigger1.onMerge(
-        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+    when(mockTrigger1.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.ALREADY_FINISHED);
-
-    when(mockTrigger2.onMerge(
-        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+    when(mockTrigger2.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.FIRE_AND_FINISH);
-
-    tester.doMerge();
+    tester.injectElements(
+        TimestampedValue.of(12, new Instant(5)));
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
@@ -262,8 +263,6 @@ public void testAfterAllMergingWindowSomeFinished() throws Exception {
         TimestampedValue.of(4, new Instant(2)),
         TimestampedValue.of(5, new Instant(2)));
 
-    tester.doMerge();
-
     // This fires, because the earliest element in [1, 12) arrived at time 10
     assertThat(tester.extractOutput(), Matchers.contains(WindowMatchers.isSingleWindowedValue(
         Matchers.containsInAnyOrder(1, 2, 3, 4, 5), 1, 1, 12)));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index 63ac66ddfcf02..a119425f08d28 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -156,18 +156,19 @@ public void testOnTimerFinish() throws Exception {
   public void testOnMergeFinishes() throws Exception {
     setUp(Sessions.withGapDuration(Duration.millis(10)));
 
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    when(mockTrigger1.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    when(mockTrigger2.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(5, new Instant(12)));
 
-    when(mockTrigger1.onMerge(
-        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+    when(mockTrigger1.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.ALREADY_FINISHED);
-
-    when(mockTrigger2.onMerge(
-        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+    when(mockTrigger2.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.FIRE_AND_FINISH);
-    tester.doMerge();
+    tester.injectElements(TimestampedValue.of(12, new Instant(5)));
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
@@ -181,16 +182,21 @@ public void testOnMergeFinishes() throws Exception {
   public void testOnMergeFires() throws Exception {
     setUp(Sessions.withGapDuration(Duration.millis(10)));
 
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    when(mockTrigger1.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    when(mockTrigger2.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(12, new Instant(12)));
 
-    when(mockTrigger1.onMerge(
-        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+    when(mockTrigger1.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.ALREADY_FINISHED);
-    when(mockTrigger2.onMerge(
-        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any())).thenReturn(MergeResult.FIRE);
-    tester.doMerge();
+    when(mockTrigger2.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+        .thenReturn(MergeResult.FIRE);
+
+    tester.injectElements(
+        TimestampedValue.of(5, new Instant(5)));
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
@@ -260,7 +266,7 @@ public void testAfterEachMergingWindowSomeFinished() throws Exception {
         WindowMatchers.isSingleWindowedValue(Matchers.contains(1), 1, 1, 11)));
 
     tester.injectElements(
-        TimestampedValue.of(2, new Instant(1)),  // in [1, 11) count = 1
+        TimestampedValue.of(2, new Instant(1)),   // in [1, 11) count = 1
         TimestampedValue.of(3, new Instant(2))); // in [2, 12), timer for 16
 
     // [2, 12) tries to fire, but gets merged; count = 2
@@ -271,8 +277,6 @@ public void testAfterEachMergingWindowSomeFinished() throws Exception {
         TimestampedValue.of(5, new Instant(2)),
         TimestampedValue.of(6, new Instant(1))); // count = 5, but need to call merge fire
 
-    tester.doMerge();
-
     // This fires, because the earliest element in [1, 12) arrived at time 10
     assertThat(tester.extractOutput(), Matchers.contains(WindowMatchers.isSingleWindowedValue(
         Matchers.containsInAnyOrder(1, 2, 3, 4, 5, 6), 1, 1, 12)));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index c66424e9f6ba3..23910f1a92cdb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -169,17 +169,19 @@ public void testOnTimerContinue() throws Exception {
   public void testOnMergeFires() throws Exception {
     setUp(Sessions.withGapDuration(Duration.millis(10)));
 
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-
-    when(mockTrigger1.onMerge(
-        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any())).thenReturn(MergeResult.CONTINUE);
+    when(mockTrigger1.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    when(mockTrigger2.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(5, new Instant(12)));
 
-    when(mockTrigger2.onMerge(
-        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+    when(mockTrigger1.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+        .thenReturn(MergeResult.CONTINUE);
+    when(mockTrigger2.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.FIRE_AND_FINISH);
-    tester.doMerge();
+    tester.injectElements(TimestampedValue.of(12, new Instant(5)));
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
@@ -279,8 +281,6 @@ public void testAfterFirstMergingWindowSomeFinished() throws Exception {
         TimestampedValue.of(4, new Instant(2)),
         TimestampedValue.of(5, new Instant(2)));
 
-    tester.doMerge();
-
     // This fires, because the earliest element in [1, 12) arrived at time 10
     assertThat(tester.extractOutput(), Matchers.contains(WindowMatchers.isSingleWindowedValue(
         Matchers.containsInAnyOrder(1, 2, 3, 4, 5), 1, 1, 12)));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
index 4b4c69ab5291a..9da3b4bde8826 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
@@ -114,7 +114,7 @@ public void testAfterPaneWithMerging() throws Exception {
     tester.injectElements(
         TimestampedValue.of(1, new Instant(1)),  // in [1, 11)
         TimestampedValue.of(2, new Instant(2))); // in [2, 12)
-    tester.doMerge();
+
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 1, 12)));
 
@@ -122,7 +122,6 @@ public void testAfterPaneWithMerging() throws Exception {
     tester.injectElements(
         TimestampedValue.of(3, new Instant(7)),  // in [7, 17)
         TimestampedValue.of(4, new Instant(8))); // in [8, 18)
-    tester.doMerge();
 
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 7, 7, 18)));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindowTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindowTest.java
index 348e6424bafac..968063e45fb5d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindowTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindowTest.java
@@ -29,6 +29,7 @@
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
+
 import java.util.List;
 
 /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
index b9769128d90cf..5fac95bd72d76 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
@@ -156,16 +156,21 @@ public void testOnTimerFinishesUntil() throws Exception {
   public void testMergeActualFires() throws Exception {
     setUp(Sessions.withGapDuration(Duration.millis(10)));
 
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    when(mockActual.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    when(mockUntil.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
 
-    when(mockActual.onMerge(
-        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any())).thenReturn(MergeResult.FIRE);
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(12, new Instant(12)));
 
-    when(mockUntil.onMerge(
-        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any())).thenReturn(MergeResult.CONTINUE);
-    tester.doMerge();
+    when(mockActual.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+        .thenReturn(MergeResult.FIRE);
+    when(mockUntil.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+        .thenReturn(MergeResult.CONTINUE);
+
+    tester.injectElements(TimestampedValue.of(5, new Instant(5)));
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
@@ -176,19 +181,21 @@ public void testMergeActualFires() throws Exception {
   public void testMergeUntilFires() throws Exception {
     setUp(Sessions.withGapDuration(Duration.millis(10)));
 
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(12, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(5, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    when(mockActual.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    when(mockUntil.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(12, new Instant(12)));
 
-    when(mockActual.onMerge(
-        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+    when(mockActual.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.CONTINUE);
-
-    when(mockUntil.onMerge(
-        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+    when(mockUntil.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.FIRE_AND_FINISH);
 
-    tester.doMerge();
+    tester.injectElements(
+        TimestampedValue.of(5, new Instant(5)));
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
@@ -289,8 +296,8 @@ public void testOrFinallyMergingWindowSomeFinished() throws Exception {
 
     tester.advanceProcessingTime(new Instant(10));
     tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),   // in [1, 11), timer for 15
-        TimestampedValue.of(2, new Instant(1)),   // in [1, 11) count = 1
+        TimestampedValue.of(1, new Instant(1)),  // in [1, 11), timer for 15
+        TimestampedValue.of(2, new Instant(1)),  // in [1, 11) count = 1
         TimestampedValue.of(3, new Instant(2))); // in [2, 12), timer for 16
 
     // Enough data comes in for 2 that combined, we should fire
@@ -298,8 +305,6 @@ public void testOrFinallyMergingWindowSomeFinished() throws Exception {
         TimestampedValue.of(4, new Instant(2)),
         TimestampedValue.of(5, new Instant(2)));
 
-    tester.doMerge();
-
     // This fires, because the earliest element in [1, 12) arrived at time 10
     assertThat(tester.extractOutput(), Matchers.contains(WindowMatchers.isSingleWindowedValue(
         Matchers.containsInAnyOrder(1, 2, 3, 4, 5), 1, 1, 12)));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index 35bc158c9ce05..413d1bccd6625 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -127,14 +127,16 @@ public void testOnElementTimerFires() throws Exception {
   public void testMerge() throws Exception {
     setUp(Sessions.withGapDuration(Duration.millis(10)));
 
-    injectElement(1, TriggerResult.CONTINUE);
-    injectElement(12, TriggerResult.CONTINUE);
-    injectElement(5, TriggerResult.CONTINUE);
+    when(mockRepeated.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(5, new Instant(12)));
 
-    when(mockRepeated.onMerge(
-        Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+    when(mockRepeated.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.FIRE_AND_FINISH);
-    tester.doMerge();
+    tester.injectElements(
+        TimestampedValue.of(12, new Instant(5)));
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index 3dc9647501f2e..96dcdf52958a2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -456,18 +456,19 @@ public void testMergeBeforeFinalizing() throws Exception {
         Duration.millis(0));
 
     // All on time data, verify watermark hold.
-    injectElement(tester, 1, TriggerResult.CONTINUE); // [1-11)
-    injectElement(tester, 10, TriggerResult.CONTINUE); // [10-20)
-
-    // Finalizing forces us to merge to merge, but we aren't ready to fire yet.
     when(mockTrigger.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.CONTINUE);
+    when(mockTrigger.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+        .thenReturn(TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(10, new Instant(10)));
 
-    // Wasn't waiting for the ON_TIME firing
     when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
 
     tester.advanceWatermark(new Instant(100));
+
     List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
     assertThat(output.size(), Matchers.equalTo(1));
     assertThat(output.get(0), isSingleWindowedValue(Matchers.containsInAnyOrder(1, 10), 1, 1, 20));
@@ -486,7 +487,7 @@ public void testDropDataMultipleWindows() throws Exception {
         Duration.millis(20));
 
     tester.injectElements(
-        TimestampedValue.of(10, new Instant(23)),   // [-60, 40), [-30, 70), [0, 100)
+        TimestampedValue.of(10, new Instant(23)),  // [-60, 40), [-30, 70), [0, 100)
         TimestampedValue.of(12, new Instant(40))); // [-30, 70), [0, 100), [30, 130)
 
     assertEquals(0, tester.getElementsDroppedDueToLateness());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 7f6c386e3797f..efb34f4d6b6c0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -87,10 +87,11 @@ public class TriggerTester<InputT, OutputT, W extends BoundedWindow> {
   private Instant processingTime = BoundedWindow.TIMESTAMP_MIN_VALUE;
 
   private final BatchTimerInternals timerInternals = new BatchTimerInternals(processingTime);
-  private final ReduceFnRunner<String, InputT, OutputT, W> runner;
   private final WindowFn<Object, W> windowFn;
   private final StubContexts stubContexts;
   private final Coder<OutputT> outputCoder;
+  private final WindowingStrategy<Object, W> objectStrategy;
+  private final ReduceFn<String, InputT, OutputT, W> reduceFn;
 
   private static final String KEY = "TEST_KEY";
   private ExecutableTrigger<W> executableTrigger;
@@ -151,12 +152,16 @@ private TriggerTester(
     @SuppressWarnings("unchecked")
     WindowingStrategy<Object, W> objectStrategy = (WindowingStrategy<Object, W>) wildcardStrategy;
 
+    this.objectStrategy = objectStrategy;
+    this.reduceFn = reduceFn;
     this.windowFn = objectStrategy.getWindowFn();
     this.stubContexts = new StubContexts();
     this.outputCoder = outputCoder;
     executableTrigger = wildcardStrategy.getTrigger();
+  }
 
-    this.runner = new ReduceFnRunner<>(
+  ReduceFnRunner<String, InputT, OutputT, W> createRunner() {
+    return new ReduceFnRunner<>(
         KEY, objectStrategy, timerInternals, stubContexts,
         droppedDueToClosedWindow, droppedDueToLateness, reduceFn);
   }
@@ -166,7 +171,7 @@ public ExecutableTrigger<W> getTrigger() {
   }
 
   public boolean isMarkedFinished(W window) {
-    return runner.isFinished(window);
+    return createRunner().isFinished(window);
   }
 
   @SafeVarargs
@@ -204,8 +209,6 @@ public final void assertHasOnlyGlobalAndPaneInfoFor(W... expectedWindows) {
    */
   private void assertHasOnlyGlobalAndAllowedTags(
       Set<W> expectedWindows, Set<StateTag<?>> allowedTags) {
-    runner.persist();
-
     Set<StateNamespace> expectedWindowsSet = new HashSet<>();
     for (W expectedWindow : expectedWindows) {
       expectedWindowsSet.add(windowNamespace(expectedWindow));
@@ -243,7 +246,6 @@ private StateNamespace windowNamespace(W window) {
   }
 
   public Instant getWatermarkHold() {
-    runner.persist();
     return stubContexts.state.minimumWatermarkHold();
   }
 
@@ -277,7 +279,9 @@ public void advanceWatermark(Instant newWatermark) throws Exception {
         "Cannot move watermark time backwards from %s to %s",
         watermark.getMillis(), newWatermark.getMillis());
     watermark = newWatermark;
+    ReduceFnRunner<String, InputT, OutputT, W> runner = createRunner();
     timerInternals.advanceWatermark(runner, newWatermark);
+    runner.persist();
   }
 
   /** Advance the processing time to the specified time, firing any timers that should fire. */
@@ -287,7 +291,9 @@ public void advanceProcessingTime(
         "Cannot move processing time backwards from %s to %s",
         processingTime.getMillis(), newProcessingTime.getMillis());
     processingTime = newProcessingTime;
+    ReduceFnRunner<String, InputT, OutputT, W> runner = createRunner();
     timerInternals.advanceProcessingTime(runner, newProcessingTime);
+    runner.persist();
   }
 
   /**
@@ -296,6 +302,7 @@ public void advanceProcessingTime(
    */
   @SafeVarargs
   public final void injectElements(TimestampedValue<InputT>... values) throws Exception {
+    ReduceFnRunner<String, InputT, OutputT, W> runner = createRunner();
     runner.processElements(FluentIterable.of(values)
         .transform(new Function<TimestampedValue<InputT>, WindowedValue<InputT>>() {
           @Override
@@ -311,15 +318,16 @@ public WindowedValue<InputT> apply(TimestampedValue<InputT> input) {
             }
           }
         }));
-  }
 
-  public void doMerge() throws Exception {
-    runner.merge();
+    // Persist after each bundle.
+    runner.persist();
   }
 
   public void fireTimer(W window, Instant timestamp, TimeDomain domain) {
+    ReduceFnRunner<String, InputT, OutputT, W> runner = createRunner();
     runner.onTimer(TimerData.of(
         StateNamespaces.window(windowFn.windowCoder(), window), timestamp, domain));
+    runner.persist();
   }
 
   private static class TestingInMemoryStateInternals extends InMemoryStateInternals {

From 5292f5d49b5039a788e28a0fe2de3b8c86899ac5 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Fri, 9 Oct 2015 14:05:54 -0700
Subject: [PATCH 1077/1541] Add a pipeline option to GcsOptions for
 user-specified buffer size

The default settings for opening IO Channels to Google Cloud Storage use
64 MB buffers per connection, which can lead to out-of-memory issues when
running a pipeline with several GCS connections. See GitHub issue #62.

Add a GcsOptions pipeline option so that users can override these sizes
as needed.

Also clean up the tests.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105087013
---
 .../dataflow/sdk/options/GcsOptions.java      |  9 +++
 .../cloud/dataflow/sdk/util/GcsUtil.java      | 43 ++++++++++-----
 .../cloud/dataflow/sdk/util/GcsUtilTest.java  | 55 +++++++++++--------
 3 files changed, 72 insertions(+), 35 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
index dc571b5b7ff5e..fefce2b058964 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
@@ -68,11 +68,20 @@ public interface GcsOptions extends
   String getGcsEndpoint();
   void setGcsEndpoint(String value);
 
+  @Description("The buffer size (in bytes) to use when uploading files to GCS. Please see the "
+      + "documentation for AbstractGoogleAsyncWriteChannel.setUploadBufferSize for more "
+      + "information on the restrictions and performance implications of this value.\n\n"
+      + "https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/util/src/main/java/"
+      + "com/google/cloud/hadoop/util/AbstractGoogleAsyncWriteChannel.java")
+  Integer getGcsUploadBufferSizeBytes();
+  void setGcsUploadBufferSizeBytes(Integer bytes);
+
   /**
    * Returns the default {@link ExecutorService} to use within the Dataflow SDK. The
    * {@link ExecutorService} is compatible with AppEngine.
    */
   public static class ExecutorServiceFactory implements DefaultValueFactory<ExecutorService> {
+    @SuppressWarnings("deprecation")  // IS_APP_ENGINE is deprecated for internal use only.
     @Override
     public ExecutorService create(PipelineOptions options) {
       ThreadFactoryBuilder threadFactoryBuilder = new ThreadFactoryBuilder();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
index 71e3e507fbdb8..20a9abedc74a3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
@@ -52,6 +52,8 @@
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import javax.annotation.Nullable;
+
 /**
  * Provides operations on GCS.
  */
@@ -70,10 +72,11 @@ public static class GcsUtilFactory implements DefaultValueFactory<GcsUtil> {
      */
     @Override
     public GcsUtil create(PipelineOptions options) {
-      GcsOptions gcsOptions = options.as(GcsOptions.class);
       LOG.debug("Creating new GcsUtil");
+      GcsOptions gcsOptions = options.as(GcsOptions.class);
+
       return new GcsUtil(Transport.newStorageClient(gcsOptions).build(),
-          gcsOptions.getExecutorService());
+          gcsOptions.getExecutorService(), gcsOptions.getGcsUploadBufferSizeBytes());
     }
   }
 
@@ -96,7 +99,9 @@ public GcsUtil create(PipelineOptions options) {
   /////////////////////////////////////////////////////////////////////////////
 
   /** Client for the GCS API. */
-  private Storage storage;
+  private Storage storageClient;
+  /** Buffer size for GCS uploads (in bytes). */
+  @Nullable private final Integer uploadBufferSizeBytes;
 
   // Helper delegate for turning IOExceptions from API calls into higher-level semantics.
   private final ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
@@ -117,14 +122,17 @@ public boolean isGcsPatternSupported(String gcsPattern) {
     return true;
   }
 
-  private GcsUtil(Storage storageClient, ExecutorService executorService) {
-    storage = storageClient;
+  private GcsUtil(
+      Storage storageClient, ExecutorService executorService,
+      @Nullable Integer uploadBufferSizeBytes) {
+    this.storageClient = storageClient;
+    this.uploadBufferSizeBytes = uploadBufferSizeBytes;
     this.executorService = executorService;
   }
 
   // Use this only for testing purposes.
-  protected void setStorageClient(Storage storage) {
-    this.storage = storage;
+  protected void setStorageClient(Storage storageClient) {
+    this.storageClient = storageClient;
   }
 
   /**
@@ -152,7 +160,7 @@ public List<GcsPath> expand(GcsPath gcsPattern) throws IOException {
         prefix, p.toString());
 
     // List all objects that start with the prefix (including objects in sub-directories).
-    Storage.Objects.List listObject = storage.objects().list(gcsPattern.getBucket());
+    Storage.Objects.List listObject = storageClient.objects().list(gcsPattern.getBucket());
     listObject.setMaxResults(MAX_LIST_ITEMS_PER_CALL);
     listObject.setPrefix(prefix);
 
@@ -197,6 +205,12 @@ public List<GcsPath> expand(GcsPath gcsPattern) throws IOException {
     return results;
   }
 
+  @VisibleForTesting
+  @Nullable
+  Integer getUploadBufferSizeBytes() {
+    return uploadBufferSizeBytes;
+  }
+
   /**
    * Returns the file size from GCS or throws {@link FileNotFoundException}
    * if the resource does not exist.
@@ -212,7 +226,7 @@ public long fileSize(GcsPath path) throws IOException {
   @VisibleForTesting
   long fileSize(GcsPath path, BackOff backoff, Sleeper sleeper) throws IOException {
       Storage.Objects.Get getObject =
-          storage.objects().get(path.getBucket(), path.getObject());
+          storageClient.objects().get(path.getBucket(), path.getObject());
       try {
         StorageObject object = ResilientOperation.retry(
             ResilientOperation.getGoogleRequestCallable(getObject),
@@ -240,7 +254,7 @@ long fileSize(GcsPath path, BackOff backoff, Sleeper sleeper) throws IOException
    */
   public SeekableByteChannel open(GcsPath path)
       throws IOException {
-    return new GoogleCloudStorageReadChannel(storage, path.getBucket(),
+    return new GoogleCloudStorageReadChannel(storageClient, path.getBucket(),
             path.getObject(), errorExtractor,
             new ClientRequestHelper<StorageObject>());
   }
@@ -260,14 +274,17 @@ public WritableByteChannel create(GcsPath path,
       String type) throws IOException {
     GoogleCloudStorageWriteChannel channel = new GoogleCloudStorageWriteChannel(
         executorService,
-        storage,
+        storageClient,
         new ClientRequestHelper<StorageObject>(),
         path.getBucket(),
         path.getObject(),
-        (new AsyncWriteChannelOptions.Builder()).build(),
+        AsyncWriteChannelOptions.newBuilder().build(),
         new ObjectWriteConditions(),
         Collections.<String, String>emptyMap(),
         type);
+    if (uploadBufferSizeBytes != null) {
+      channel.setUploadBufferSize(uploadBufferSizeBytes);
+    }
     channel.initialize();
     return channel;
   }
@@ -287,7 +304,7 @@ public boolean bucketExists(GcsPath path) throws IOException {
   @VisibleForTesting
   boolean bucketExists(GcsPath path, BackOff backoff, Sleeper sleeper) throws IOException {
     Storage.Buckets.Get getBucket =
-        storage.buckets().get(path.getBucket());
+        storageClient.buckets().get(path.getBucket());
 
       try {
         ResilientOperation.retry(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
index 9592e52b11f4d..89a8ef527ff5f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
@@ -20,6 +20,7 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
@@ -87,17 +88,36 @@ public void testGlobTranslation() {
     assertEquals("foo-[0-9][^/]*", GcsUtil.globToRegexp("foo-[0-9]*"));
   }
 
-  @Test
-  public void testCreationWithDefaultOptions() {
+  private static GcsOptions gcsOptionsWithTestCredential() {
     GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
     pipelineOptions.setGcpCredential(new TestCredential());
+    return pipelineOptions;
+  }
+
+  @Test
+  public void testCreationWithDefaultOptions() {
+    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
     assertNotNull(pipelineOptions.getGcpCredential());
   }
 
+  @Test
+  public void testUploadBufferSizeDefault() {
+    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
+    GcsUtil util = pipelineOptions.getGcsUtil();
+    assertNull(util.getUploadBufferSizeBytes());
+  }
+
+  @Test
+  public void testUploadBufferSizeUserSpecified() {
+    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
+    pipelineOptions.setGcsUploadBufferSizeBytes(12345);
+    GcsUtil util = pipelineOptions.getGcsUtil();
+    assertEquals((Integer) 12345, util.getUploadBufferSizeBytes());
+  }
+
   @Test
   public void testCreationWithExecutorServiceProvided() {
-    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
-    pipelineOptions.setGcpCredential(new TestCredential());
+    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
     pipelineOptions.setExecutorService(Executors.newCachedThreadPool());
     assertSame(pipelineOptions.getExecutorService(), pipelineOptions.getGcsUtil().executorService);
   }
@@ -145,8 +165,7 @@ public void run() {
 
   @Test
   public void testGlobExpansion() throws IOException {
-    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
-    pipelineOptions.setGcpCredential(new TestCredential());
+    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
     GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
 
     Storage mockStorage = Mockito.mock(Storage.class);
@@ -227,8 +246,7 @@ public void testGlobExpansion() throws IOException {
   // Patterns that contain recursive wildcards ('**') are not supported.
   @Test
   public void testRecursiveGlobExpansionFails() throws IOException {
-    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
-    pipelineOptions.setGcpCredential(new TestCredential());
+    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
     GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
     GcsPath pattern = GcsPath.fromUri("gs://testbucket/test**");
 
@@ -241,8 +259,7 @@ public void testRecursiveGlobExpansionFails() throws IOException {
   // listing is only eventually consistent.
   @Test
   public void testNonExistent() throws IOException {
-    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
-    pipelineOptions.setGcpCredential(new TestCredential());
+    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
     GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
 
     Storage mockStorage = Mockito.mock(Storage.class);
@@ -281,8 +298,7 @@ public void testNonExistent() throws IOException {
 
   @Test
   public void testGetSizeBytes() throws Exception {
-    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
-    pipelineOptions.setGcpCredential(new TestCredential());
+    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
     GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
 
     Storage mockStorage = Mockito.mock(Storage.class);
@@ -308,8 +324,7 @@ public void testGetSizeBytesWhenFileNotFound() throws Exception {
     MockHttpTransport mockTransport =
         new MockHttpTransport.Builder().setLowLevelHttpResponse(notFoundResponse).build();
 
-    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
-    pipelineOptions.setGcpCredential(new TestCredential());
+    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
     GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
 
     gcsUtil.setStorageClient(new Storage(mockTransport, Transport.getJsonFactory(), null));
@@ -320,8 +335,7 @@ public void testGetSizeBytesWhenFileNotFound() throws Exception {
 
   @Test
   public void testRetryFileSize() throws IOException {
-    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
-    pipelineOptions.setGcpCredential(new TestCredential());
+    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
     GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
 
     Storage mockStorage = Mockito.mock(Storage.class);
@@ -346,8 +360,7 @@ public void testRetryFileSize() throws IOException {
 
   @Test
   public void testBucketExists() throws IOException {
-    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
-    pipelineOptions.setGcpCredential(new TestCredential());
+    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
     GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
 
     Storage mockStorage = Mockito.mock(Storage.class);
@@ -370,8 +383,7 @@ public void testBucketExists() throws IOException {
 
   @Test
   public void testBucketDoesNotExistBecauseOfAccessError() throws IOException {
-    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
-    pipelineOptions.setGcpCredential(new TestCredential());
+    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
     GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
 
     Storage mockStorage = Mockito.mock(Storage.class);
@@ -396,8 +408,7 @@ public void testBucketDoesNotExistBecauseOfAccessError() throws IOException {
 
   @Test
   public void testBucketDoesNotExist() throws IOException {
-    GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
-    pipelineOptions.setGcpCredential(new TestCredential());
+    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
     GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
 
     Storage mockStorage = Mockito.mock(Storage.class);

From 65b737bc4d62c6872f5c36bfe8787d646d65fc83 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 12 Oct 2015 06:35:02 -0700
Subject: [PATCH 1078/1541] Fix typo in error message

Observed in https://github.com/GoogleCloudPlatform/DataflowJavaSDK/issues/65
(scroll right on the text box).

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105206603
---
 .../java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 7192ce5135f92..4a8c105edb4da 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -526,7 +526,7 @@ private void checkTimestamp(Instant timestamp) {
         throw new IllegalArgumentException(String.format(
             "Cannot output with timestamp %s. Output timestamps must be no earlier than the "
             + "timestamp of the current input (%s) minus the allowed skew (%s). See the "
-            + "DoFn#getAllowedTimestmapSkew() Javadoc for details on changing the allowed skew.",
+            + "DoFn#getAllowedTimestampSkew() Javadoc for details on changing the allowed skew.",
             timestamp, windowedValue.getTimestamp(),
             PeriodFormat.getDefault().print(fn.getAllowedTimestampSkew().toPeriod())));
       }

From 2b2c413287f9ef3f85d4676534904c4a1c2df7c6 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 12 Oct 2015 10:32:05 -0700
Subject: [PATCH 1079/1541] WorkerDiskType option: document and add reference
 URL

The format of the string expected by this option is set by
GCE and is non-obvious. Provide an example and a link for
more information.

Fixes #68

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105223902
---
 .../options/DataflowPipelineWorkerPoolOptions.java    | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index 85b0b5eedc2ed..0aa50e58be369 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -173,9 +173,16 @@ public String getTeardownPolicyName() {
   void setFilesToStage(List<String> value);
 
   /**
-   * Specifies what type of persistent disk should be used.
+   * Specifies what type of persistent disk should be used. The value should be a full or partial
+   * URL of a disk type resource, e.g., zones/us-central1-f/disks/pd-standard. For
+   * more information, see the
+   * <a href="https://cloud.google.com/compute/docs/reference/latest/diskTypes">API reference
+   * documentation for DiskTypes</a>.
    */
-  @Description("Specifies what type of persistent disk should be used.")
+  @Description("Specifies what type of persistent disk should be used. The value should be a full "
+      + "or partial URL of a disk type resource, e.g., zones/us-central1-f/disks/pd-standard. For "
+      + "more information, see the API reference documentation for DiskTypes: "
+      + "https://cloud.google.com/compute/docs/reference/latest/diskTypes")
   String getWorkerDiskType();
   void setWorkerDiskType(String value);
 }

From 170c3b8b2761fe476dd8aff1a76b6a382f5d32a1 Mon Sep 17 00:00:00 2001
From: zhouyunqing <zhouyunqing@google.com>
Date: Mon, 12 Oct 2015 10:57:14 -0700
Subject: [PATCH 1080/1541] Stop sampling after StateSampler.close()

Implemented by serializing all the scheduled task runs and
introducing a cancellation flag.

Fixes a race condition where a scheduled sampling could still
happen after cancellatino.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105226569
---
 .../sdk/util/common/worker/StateSampler.java  | 62 ++++++++++---------
 .../util/common/worker/StateSamplerTest.java  | 17 +++--
 2 files changed, 46 insertions(+), 33 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
index 60cf0ef78ff63..e258aeca2fb88 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
@@ -119,21 +119,32 @@ public StateSampler(String prefix,
     // The current implementation uses a fixed-rate timer with a period samplingPeriodMs as a
     // trampoline to a one-shot random timer which fires with a random delay within
     // samplingPeriodMs.
-    invocationTriggerFuture = executorService.scheduleAtFixedRate(new Runnable(){
-      @Override
-      public void run() {
-        long delay = rand.nextInt((int) samplingPeriodMs);
-        if (invocationFuture != null) {
-          invocationFuture.cancel(false);
-        }
-        invocationFuture = executorService.schedule(new Runnable(){
-          @Override
-          public void run() {
-            StateSampler.this.run();
-          }
-        }, delay, TimeUnit.MILLISECONDS);
-      }
-    }, 0, samplingPeriodMs, TimeUnit.MILLISECONDS);
+    invocationTriggerFuture =
+        executorService.scheduleAtFixedRate(
+            new Runnable() {
+              @Override
+              public void run() {
+                long delay = rand.nextInt((int) samplingPeriodMs);
+                synchronized (StateSampler.this) {
+                  if (invocationFuture != null) {
+                    invocationFuture.cancel(false);
+                  }
+                  invocationFuture =
+                      executorService.schedule(
+                          new Runnable() {
+                            @Override
+                            public void run() {
+                              StateSampler.this.run();
+                            }
+                          },
+                          delay,
+                          TimeUnit.MILLISECONDS);
+                }
+              }
+            },
+            0,
+            samplingPeriodMs,
+            TimeUnit.MILLISECONDS);
     stateTimestampNs = System.nanoTime();
   }
 
@@ -151,24 +162,16 @@ public StateSampler(String prefix,
     this(prefix, counterSetMutator, DEFAULT_SAMPLING_PERIOD_MS);
   }
 
-  public void run() {
+  public synchronized void run() {
     long startTimestampNs = System.nanoTime();
     int state = currentState;
     if (state != DO_NOT_SAMPLE) {
-      long elapsedMs = 0;
       StateKind kind = null;
-      List<SamplingCallback> copyOfCallbacks = new ArrayList<>();
-      synchronized (this) {
-        elapsedMs =
-          TimeUnit.NANOSECONDS.toMillis(startTimestampNs - stateTimestampNs);
-        kind = kindsByState.get(state);
-        countersByState.get(state).addValue(elapsedMs);
-        for (SamplingCallback c : callbacks) {
-          copyOfCallbacks.add(c);
-        }
-      }
+      long elapsedMs = TimeUnit.NANOSECONDS.toMillis(startTimestampNs - stateTimestampNs);
+      kind = kindsByState.get(state);
+      countersByState.get(state).addValue(elapsedMs);
       // Invoke all callbacks.
-      for (SamplingCallback c : copyOfCallbacks) {
+      for (SamplingCallback c : callbacks) {
         c.run(state, kind, elapsedMs);
       }
     }
@@ -176,7 +179,8 @@ public void run() {
   }
 
   @Override
-  public void close() {
+  public synchronized void close() {
+    currentState = DO_NOT_SAMPLE;
     if (invocationTriggerFuture != null) {
       invocationTriggerFuture.cancel(false);
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
index 26b5704b50167..53777e1ff8ecc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
+import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.util.common.Counter;
@@ -23,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.SamplingCallback;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
 
+import org.hamcrest.Matchers;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -131,8 +133,7 @@ public void nonScopedTest() throws InterruptedException {
     }
   }
 
-  @Test
-  public void noSamplingAfterCloseTest() throws Exception {
+  private void noSamplingAfterCloseTestOnce() throws Exception {
     CounterSet counters = new CounterSet();
     long periodMs = 50;
 
@@ -150,7 +151,15 @@ public void run(int state, StateKind kind, long elapsedMs) {
     }
     long cancelledTimeStamp = System.currentTimeMillis();
     Thread.sleep(2 * periodMs);
-    assertTrue(lastSampledTimeStamp.get() > 0);
-    assertTrue(lastSampledTimeStamp.get() <= cancelledTimeStamp);
+    assertThat(lastSampledTimeStamp.get(), Matchers.greaterThan(0L));
+    assertThat(lastSampledTimeStamp.get(), Matchers.lessThanOrEqualTo(cancelledTimeStamp));
+  }
+
+  @Test
+  public void noSamplingAfterCloseTest() throws Exception {
+    // Run it multiple times to detect flakyness.
+    for (int i = 0; i < 50; ++i) {
+      noSamplingAfterCloseTestOnce();
+    }
   }
 }

From 7d7e460944c8595564ec0f4bd6d228b3e625ce75 Mon Sep 17 00:00:00 2001
From: herohde <herohde@google.com>
Date: Mon, 12 Oct 2015 11:11:09 -0700
Subject: [PATCH 1081/1541] Add size-based file rotation to java logger

The logger will now automatically write logs to a new file when the
size exceeds a configurable limit (default 1 GB). This mechanism
 - which is similar to glog - is intended to work in combination
with a purger that removes old files.
----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105228034
---
 .../logging/DataflowWorkerLoggingHandler.java | 148 ++++++++++++++----
 .../DataflowWorkerLoggingInitializer.java     |  38 +++--
 .../DataflowWorkerLoggingHandlerTest.java     |  44 +++++-
 3 files changed, 181 insertions(+), 49 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandler.java
index 6387048aff855..85d2b1b1698c6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandler.java
@@ -19,9 +19,13 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingInitializer.LEVELS;
 
 import com.google.common.base.MoreObjects;
+import com.google.common.base.Supplier;
+import com.google.common.io.CountingOutputStream;
 
 import com.fasterxml.jackson.core.JsonEncoding;
+import com.fasterxml.jackson.core.JsonFactory;
 import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.core.util.MinimalPrettyPrinter;
 import com.fasterxml.jackson.databind.ObjectMapper;
 
 import java.io.BufferedOutputStream;
@@ -31,6 +35,8 @@
 import java.io.OutputStream;
 import java.io.PrintWriter;
 import java.io.StringWriter;
+import java.text.SimpleDateFormat;
+import java.util.Date;
 import java.util.logging.ErrorManager;
 import java.util.logging.Handler;
 import java.util.logging.LogRecord;
@@ -57,21 +63,23 @@ public static String formatException(Throwable thrown) {
     return sw.toString();
   }
 
-  // Null after close().
-  private JsonGenerator generator;
-
   /**
-   * Constructs a handler that writes to a file.
+   * Constructs a handler that writes to a rotating set of files.
    */
-  public DataflowWorkerLoggingHandler(String fileName) throws IOException {
-    this(new BufferedOutputStream(new FileOutputStream(new File(fileName), true /* append */)));
+  public DataflowWorkerLoggingHandler(String filename, long sizeLimit) throws IOException {
+    this(new FileOutputStreamFactory(filename), sizeLimit);
   }
 
   /**
-   * Constructs a handler that writes to an arbitrary output stream.
+   * Constructs a handler that writes to arbitrary output streams. No rollover if sizeLimit is
+   * zero or negative.
    */
-  public DataflowWorkerLoggingHandler(OutputStream output) throws IOException {
-    generator = new ObjectMapper().getFactory().createGenerator(output, JsonEncoding.UTF8);
+  DataflowWorkerLoggingHandler(Supplier<OutputStream> factory, long sizeLimit)
+      throws IOException {
+    this.outputStreamFactory = factory;
+    this.generatorFactory = new ObjectMapper().getFactory();
+    this.sizeLimit = sizeLimit < 1 ? Long.MAX_VALUE : sizeLimit;
+    createOutputStream();
   }
 
   @Override
@@ -79,6 +87,9 @@ public synchronized void publish(LogRecord record) {
     if (!isLoggable(record)) {
       return;
     }
+
+    rolloverOutputStreamIfNeeded();
+
     try {
       // Generating a JSON map like:
       // {"timestamp": {"seconds": 1435835832, "nanos": 123456789}, ...  "message": "hello"}
@@ -105,11 +116,10 @@ public synchronized void publish(LogRecord record) {
       writeIfNotNull("exception", formatException(record.getThrown()));
       generator.writeEndObject();
       generator.writeRaw(System.lineSeparator());
-    } catch (IOException e) {
-      if (getErrorManager() != null) {
-        getErrorManager().error("Unable to publish", e, ErrorManager.WRITE_FAILURE);
-      }
+    } catch (IOException | RuntimeException e) {
+      reportFailure("Unable to publish", e, ErrorManager.WRITE_FAILURE);
     }
+
     // This implementation is based on that of java.util.logging.FileHandler, which flushes in a
     // synchronized context like this. Unfortunately the maximum throughput for generating log
     // entries will be the inverse of the flush latency. That could be as little as one hundred
@@ -130,27 +140,16 @@ public boolean isLoggable(LogRecord record) {
     return generator != null && record != null && super.isLoggable(record);
   }
 
-  /**
-   * Appends a JSON key/value pair if the specified val is not null.
-   */
-  private void writeIfNotNull(String name, String val) throws IOException {
-    if (val != null) {
-      generator.writeStringField(name, val);
-    }
-  }
-
   @Override
   public synchronized void flush() {
     try {
       if (generator != null) {
         generator.flush();
       }
-    } catch (IOException e) {
-      if (getErrorManager() != null) {
-        getErrorManager().error("Unable to flush", e, ErrorManager.FLUSH_FAILURE);
-      }
+    } catch (IOException | RuntimeException e) {
+      reportFailure("Unable to flush", e, ErrorManager.FLUSH_FAILURE);
     }
-  }
+}
 
   @Override
   public synchronized void close() {
@@ -162,11 +161,98 @@ public synchronized void close() {
       if (generator != null) {
         generator.close();
       }
-    } catch (IOException e) {
-      if (getErrorManager() != null) {
-        getErrorManager().error("Unable to close", e, ErrorManager.CLOSE_FAILURE);
+    } catch (IOException | RuntimeException e) {
+      reportFailure("Unable to close", e, ErrorManager.CLOSE_FAILURE);
+    } finally {
+      generator = null;
+      counter = null;
+    }
+  }
+
+  /**
+   * Unique file generator. Uses filenames with timestamp.
+   */
+  private static final class FileOutputStreamFactory implements Supplier<OutputStream> {
+    private final String filepath;
+    private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy_MM_dd_hh_mm_ss_SSS");
+
+    public FileOutputStreamFactory(String filepath) {
+      this.filepath = filepath;
+    }
+
+    @Override
+    public OutputStream get() {
+      try {
+        String filename = filepath + "." + formatter.format(new Date());
+        return new BufferedOutputStream(
+            new FileOutputStream(new File(filename), true /* append */));
+      } catch (IOException e) {
+        throw new RuntimeException(e);
       }
     }
-    generator = null;
   }
+
+  private void createOutputStream() throws IOException {
+    CountingOutputStream stream = new CountingOutputStream(outputStreamFactory.get());
+    generator = generatorFactory.createGenerator(stream, JsonEncoding.UTF8);
+    counter = stream;
+
+    // Avoid 1 space indent for every line. We already add a newline after each log record.
+    generator.setPrettyPrinter(new MinimalPrettyPrinter(""));
+  }
+
+  /**
+   * Rollover to a new output stream (log file) if we have reached the size limit. Ensure that
+   * the rollover fails or succeeds atomically.
+   */
+  private void rolloverOutputStreamIfNeeded() {
+    if (counter.getCount() < sizeLimit) {
+      return;
+    }
+
+    try {
+      JsonGenerator old = generator;
+      createOutputStream();
+
+      try {
+        // Rollover successful. Attempt to close old stream, but ignore on failure.
+        old.close();
+      } catch (IOException | RuntimeException e) {
+        reportFailure("Unable to close old log file", e, ErrorManager.CLOSE_FAILURE);
+      }
+    } catch (IOException | RuntimeException e) {
+      reportFailure("Unable to create new log file", e, ErrorManager.OPEN_FAILURE);
+    }
+  }
+
+  /**
+   * Appends a JSON key/value pair if the specified val is not null.
+   */
+  private void writeIfNotNull(String name, String val) throws IOException {
+    if (val != null) {
+      generator.writeStringField(name, val);
+    }
+  }
+
+  /**
+   * Report logging failure to ErrorManager. Does not throw.
+   */
+  private void reportFailure(String message, Exception e, int code) {
+    try {
+      ErrorManager manager = getErrorManager();
+      if (manager != null) {
+        manager.error(message, e, code);
+      }
+    } catch (Throwable t) {
+      // Failed to report logging failure. No meaningful action left.
+    }
+  }
+
+  // Null after close().
+  private JsonGenerator generator;
+  private CountingOutputStream counter;
+
+  private final long sizeLimit;
+  private final Supplier<OutputStream> outputStreamFactory;
+  private final JsonFactory generatorFactory;
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
index efd13c2fefae8..167859e0a03b1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
@@ -31,28 +31,29 @@
 import java.io.PrintStream;
 import java.util.List;
 import java.util.Map;
-import java.util.logging.FileHandler;
 import java.util.logging.Handler;
 import java.util.logging.Level;
 import java.util.logging.LogManager;
 import java.util.logging.Logger;
 
 /**
- * Sets up {@link java.util.logging} configuration on the Dataflow worker with a
+ * Sets up {@link java.util.logging} configuration on the Dataflow worker with a rotating
  * file logger. The file logger uses the {@link DataflowWorkerLoggingHandler} format.
  * A user can override the logging level by customizing the options found within
  * {@link DataflowWorkerLoggingOptions}. A user can override the location by specifying the
- * Java system property "dataflow.worker.logging.location". The default log level is INFO
- * and the default location is a file named dataflow.json.log within the systems temporary
- * directory.
+ * Java system property "dataflow.worker.logging.basepath" and the file size in MB before
+ * rolling over to a new file by specifying the Java system property "dataflow.worker.
+ * loggging.filesize_mb". The default log level is INFO, the default location is a file
+ * named dataflow-json.log within the system temporary directory and the default file size
+ * is 1 GB.
  */
 public class DataflowWorkerLoggingInitializer {
   private static final String ROOT_LOGGER_NAME = "";
   private static final String DEFAULT_LOGGING_LOCATION =
-      new File(System.getProperty("java.io.tmpdir"), "dataflow.json.log").getPath();
-  private static final String DATAFLOW_WORKER_LOGGING_LOCATION = "dataflow.worker.logging.location";
-  private static final String DATAFLOW_WORKER_JSON_LOGGING_LOCATION =
-      "dataflow.worker.json.logging.location";
+      new File(System.getProperty("java.io.tmpdir"), "dataflow-json.log").getPath();
+  private static final String FILEPATH_PROPERTY = "dataflow.worker.logging.filepath";
+  private static final String FILESIZE_MB_PROPERTY = "dataflow.worker.logging.filesize_mb";
+
   static final ImmutableBiMap<Level, DataflowWorkerLoggingOptions.Level> LEVELS =
       ImmutableBiMap.<Level, DataflowWorkerLoggingOptions.Level>builder()
           .put(Level.SEVERE, ERROR)
@@ -73,23 +74,24 @@ public class DataflowWorkerLoggingInitializer {
    * garbage collected. java.util.logging only has weak references to the loggers
    * so if they are garbage collection, our hierarchical configuration will be lost. */
   private static List<Logger> configuredLoggers = Lists.newArrayList();
-  private static FileHandler fileHandler;
   private static PrintStream originalStdOut;
   private static PrintStream originalStdErr;
+  private static boolean initialized = false;
 
   /**
    * Sets up the initial logging configuration.
    */
   public static synchronized void initialize() {
-    if (fileHandler != null) {
+    if (initialized) {
       return;
     }
+
     try {
+      String filepath = System.getProperty(FILEPATH_PROPERTY, DEFAULT_LOGGING_LOCATION);
+      int filesizeMb = Integer.parseInt(System.getProperty(FILESIZE_MB_PROPERTY, "1024"));
+
       DataflowWorkerLoggingHandler loggingHandler =
-          new DataflowWorkerLoggingHandler(
-              System.getProperty(
-                  DATAFLOW_WORKER_JSON_LOGGING_LOCATION,
-                  System.getProperty(DATAFLOW_WORKER_LOGGING_LOCATION, DEFAULT_LOGGING_LOCATION)));
+          new DataflowWorkerLoggingHandler(filepath, filesizeMb * 1024 * 1024);
       loggingHandler.setLevel(Level.ALL);
 
       // Reset the global log manager, get the root logger and remove the default log handlers.
@@ -108,7 +110,9 @@ public static synchronized void initialize() {
       originalStdErr = System.err;
       System.setOut(JulLoggerPrintStreamAdapterFactory.create("System.out", Level.INFO));
       System.setErr(JulLoggerPrintStreamAdapterFactory.create("System.err", Level.SEVERE));
-    } catch (SecurityException | IOException e) {
+
+      initialized = true;
+    } catch (SecurityException | IOException | NumberFormatException e) {
       throw new ExceptionInInitializerError(e);
     }
   }
@@ -137,8 +141,8 @@ public static synchronized void configure(DataflowWorkerLoggingOptions options)
   // Visible for testing
   static void reset() {
     configuredLoggers = Lists.newArrayList();
-    fileHandler = null;
     System.setOut(originalStdOut);
     System.setErr(originalStdErr);
+    initialized = false;
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandlerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandlerTest.java
index 86ac40a6121b6..78705059f1169 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandlerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandlerTest.java
@@ -19,6 +19,7 @@
 import static org.junit.Assert.assertEquals;
 
 import com.google.cloud.dataflow.sdk.testing.RestoreDataflowLoggingMDC;
+import com.google.common.base.Supplier;
 import com.google.common.base.Throwables;
 
 import com.fasterxml.jackson.core.JsonProcessingException;
@@ -32,6 +33,7 @@
 
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.io.OutputStream;
 import java.nio.charset.StandardCharsets;
 import java.util.logging.Level;
 import java.util.logging.LogRecord;
@@ -60,18 +62,58 @@ private static String escapeNewline() {
   // Typically \n or \r\n
   private static String escapedNewline = escapeNewline();
 
+  private static class FixedOutputStreamFactory implements Supplier<OutputStream> {
+    private OutputStream[] streams;
+    private int next = 0;
+
+    public FixedOutputStreamFactory(OutputStream... streams) {
+      this.streams = streams;
+    }
+
+    @Override
+    public OutputStream get() {
+      return streams[next++];
+    }
+  }
+
   /**
    * Encodes a LogRecord into a Json string.
    */
   private static String createJson(LogRecord record) throws IOException {
     ByteArrayOutputStream output = new ByteArrayOutputStream();
-    DataflowWorkerLoggingHandler handler = new DataflowWorkerLoggingHandler(output);
+    FixedOutputStreamFactory factory = new FixedOutputStreamFactory(output);
+    DataflowWorkerLoggingHandler handler = new DataflowWorkerLoggingHandler(factory, 0);
     // Format the record as JSON.
     handler.publish(record);
     // Decode the binary output as UTF-8 and return the generated string.
     return new String(output.toByteArray(), StandardCharsets.UTF_8);
   }
 
+  @Test
+  public void testOutputStreamRollover() throws IOException {
+    ByteArrayOutputStream first = new ByteArrayOutputStream();
+    ByteArrayOutputStream second = new ByteArrayOutputStream();
+
+    LogRecord record = createLogRecord("test.message", null);
+    String expected = "{\"timestamp\":{\"seconds\":0,\"nanos\":1000000},\"severity\":\"INFO\","
+        + "\"message\":\"test.message\",\"thread\":\"2\",\"logger\":\"LoggerName\"}"
+        + System.lineSeparator();
+
+    FixedOutputStreamFactory factory = new FixedOutputStreamFactory(first, second);
+    DataflowWorkerLoggingHandler handler
+        = new DataflowWorkerLoggingHandler(factory, expected.length() + 1 /* sizelimit */);
+
+    // Using |expected|+1 for size limit means that we will rollover after writing 2 log messages.
+    // We thus expect to see 2 messsages written to 'first' and 1 message to 'second',
+
+    handler.publish(record);
+    handler.publish(record);
+    handler.publish(record);
+
+    assertEquals(expected + expected, new String(first.toByteArray(), StandardCharsets.UTF_8));
+    assertEquals(expected, new String(second.toByteArray(), StandardCharsets.UTF_8));
+  }
+
   @Test
   public void testWithUnsetValuesInMDC() throws IOException {
     assertEquals(

From 755952d398a856b1e386df550f3a2627961948bd Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Tue, 13 Oct 2015 11:06:06 -0700
Subject: [PATCH 1082/1541] Fix a BigQuery output issue in DirectPipelineRunner

getOrCreateTable() was called for each output element,
and it introduces unnecessary BigQuery APIs call and duplicated messages.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105328508
---
 .../com/google/cloud/dataflow/sdk/io/BigQueryIO.java     | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index cf400103a5a3a..7394f4e53fc53 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -1353,9 +1353,6 @@ private static void evaluateWriteHelper(
           if (ref.getProjectId() == null) {
             ref.setProjectId(options.getProject());
           }
-          LOG.info("Writing to BigQuery table {}", toTableSpec(ref));
-          inserter.getOrCreateTable(
-              ref, transform.writeDisposition, transform.createDisposition, transform.schema);
 
           List<TableRow> rows = getOrCreateMapListValue(tableRows, ref);
           rows.add(windowedValue.getValue());
@@ -1363,6 +1360,12 @@ private static void evaluateWriteHelper(
       }
 
       for (TableReference ref : tableRows.keySet()) {
+        LOG.info("Writing to BigQuery table {}", toTableSpec(ref));
+        // {@link BigQueryTableInserter#getOrCreateTable} validates {@link CreateDisposition}
+        // and {@link WriteDisposition}.
+        // For each {@link TableReference}, it can only be called before rows are written.
+        inserter.getOrCreateTable(
+            ref, transform.writeDisposition, transform.createDisposition, transform.schema);
         inserter.insertAll(ref, tableRows.get(ref));
       }
     } catch (IOException e) {

From 4db89fe7fe298ac712be5c009c8f1cec06437a71 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 13 Oct 2015 12:49:21 -0700
Subject: [PATCH 1083/1541] Miscellaneous cleanup

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105338462
---
 .../worker/CachingSideInputReader.java        |  12 +-
 .../worker/DataflowExecutionContext.java      |   9 +-
 .../worker/DataflowSideInputReader.java       |   8 +-
 .../sdk/runners/worker/DataflowWorker.java    |   2 +-
 .../runners/worker/DefaultParDoFnFactory.java |  82 +++++++
 .../worker/MapTaskExecutorFactory.java        |   3 +-
 .../sdk/runners/worker/ParDoFnFactory.java    |  62 +----
 .../sdk/runners/worker/SizedWeigher.java      |   6 +-
 .../dataflow/sdk/util/SideInputReader.java    |   7 +-
 .../google/cloud/dataflow/sdk/util/Sized.java |   1 +
 .../sdk/util/SizedSideInputReader.java        |   5 +-
 .../worker/CachingSideInputReaderTest.java    | 219 ++++++++++--------
 .../worker/DataflowSideInputReaderTest.java   | 144 +++++++-----
 ...st.java => DefaultParDoFnFactoryTest.java} | 107 +++++----
 .../worker/SizedDirectSideInputReader.java    |  75 ++++++
 .../sdk/testing/PCollectionViewTesting.java   |  13 ++
 16 files changed, 475 insertions(+), 280 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DefaultParDoFnFactory.java
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/{ParDoFnFactoryTest.java => DefaultParDoFnFactoryTest.java} (61%)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SizedDirectSideInputReader.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java
index 4a6f6c8914c9d..b1dd17efbdd33 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java
@@ -22,9 +22,7 @@
 import com.google.cloud.dataflow.sdk.util.Sized;
 import com.google.cloud.dataflow.sdk.util.SizedSideInputReader;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.common.base.Throwables;
 import com.google.common.cache.Cache;
-import com.google.common.util.concurrent.UncheckedExecutionException;
 
 import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutionException;
@@ -41,8 +39,8 @@
 final class CachingSideInputReader
     extends SizedSideInputReader.Defaults
     implements SizedSideInputReader {
-  private SizedSideInputReader subReader;
-  private Cache<PCollectionViewWindow<?>, Sized<Object>> cache;
+  private final SizedSideInputReader subReader;
+  private final Cache<PCollectionViewWindow<?>, Sized<Object>> cache;
 
   private CachingSideInputReader(
       SizedSideInputReader subReader, Cache<PCollectionViewWindow<?>, Sized<Object>> cache) {
@@ -82,8 +80,10 @@ public Sized<Object> call() {
               }
             });
         return sideInputContents;
-      } catch (ExecutionException | UncheckedExecutionException exc) {
-        throw Throwables.propagate(exc.getCause());
+      } catch (ExecutionException checkedException) {
+        // The call to subReader.getSized() is not permitted to throw any checked exceptions,
+        // so the Callable created above should not throw any either.
+        throw new RuntimeException("Unexpected checked exception.", checkedException.getCause());
       }
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java
index b712c674b1dce..241641e2f1201 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java
@@ -22,22 +22,19 @@
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 
 /**
- * Extensions to {@link com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext} specific to
- * the Dataflow worker.
+ * Extensions to {@link BaseExecutionContext} specific to the Dataflow worker.
  */
 public abstract class DataflowExecutionContext extends BaseExecutionContext {
   /**
    * Returns a {@link SideInputReader} for all the side inputs described in the given
-   * {@link SideInputInfo} descriptors. By default, throws {@link UnsupportedOperationException}.
-   * Individual workers should override this behavior.
+   * {@link SideInputInfo} descriptors.
    */
   public abstract SideInputReader getSideInputReader(
       Iterable<? extends SideInputInfo> sideInputInfos) throws Exception;
 
   /**
    * Returns a {@link SideInputReader} for all the provided views, where the execution context
-   * itself knows how to read data for the view. By default, throws
-   * {@code UnsupportedOperationException}. Particular workers should override this behavior.
+   * itself knows how to read data for the view.
    */
   public abstract SideInputReader getSideInputReaderForViews(
       Iterable<? extends PCollectionView<?>> views) throws Exception;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
index 68348a9cf51b6..821bd4c6016e1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
@@ -28,8 +28,8 @@
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Preconditions;
-import com.google.common.collect.Maps;
 
+import java.util.HashMap;
 import java.util.Map;
 import java.util.Observable;
 import java.util.Observer;
@@ -48,7 +48,7 @@ public class DataflowSideInputReader
   /** An observer for each side input to count its size as it is being read. */
   private final Map<TupleTag<Object>, ByteSizeObserver> observers;
 
-  /** An byte count saved as overhead per side input, not cleared when the observer is reset. */
+  /** A byte count saved as overhead per side input, not cleared when the observer is reset. */
   private final Map<TupleTag<Object>, Long> overheads;
 
   /** The underlying reader, which does not keep track of sizes. */
@@ -61,8 +61,8 @@ private DataflowSideInputReader(
     // Initializing the values may or may not actually read through the
     // source. The full size is the amount read here plus the amount
     // read when view.fromIterableInternal() is called.
-    this.observers = Maps.newHashMap();
-    this.overheads = Maps.newHashMap();
+    this.observers = new HashMap<>();
+    this.overheads = new HashMap<>();
 
     PTuple sideInputValues = PTuple.empty();
     for (SideInputInfo sideInputInfo : sideInputInfos) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 97effae3dfcf9..6c24cdb394bf7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -124,7 +124,7 @@ public DataflowWorker(WorkUnitClient workUnitClient, DataflowWorkerHarnessOption
     this.options = options;
     this.sideInputCache = CacheBuilder.newBuilder()
         .maximumWeight(options.getWorkerCacheMb() * MEGABYTES) // weights are in bytes
-        .weigher(new SizedWeigher<PCollectionViewWindow<?>, Object>(OVERHEAD_WEIGHT))
+        .weigher(SizedWeigher.<PCollectionViewWindow<?>, Object>withBaseWeight(OVERHEAD_WEIGHT))
         .softValues()
         .build();
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DefaultParDoFnFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DefaultParDoFnFactory.java
new file mode 100644
index 0000000000000..0595ee1f37d28
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DefaultParDoFnFactory.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.api.services.dataflow.model.MultiOutputInfo;
+import com.google.api.services.dataflow.model.SideInputInfo;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+import com.google.common.collect.ImmutableMap;
+
+import java.util.List;
+
+/**
+ * A factory that dispatches to all known factories in the Dataflow SDK based on the value of
+ * {@link CloudObject#getClassName()} for the specified {@code DoFn}.
+ */
+public class DefaultParDoFnFactory implements ParDoFnFactory {
+  private final ImmutableMap<String, ParDoFnFactory> defaultFactories;
+
+  public DefaultParDoFnFactory() {
+    defaultFactories = ImmutableMap.<String, ParDoFnFactory>builder()
+        .put("DoFn", new NormalParDoFn.Factory())
+        .put("CombineValuesFn", new CombineValuesFn.Factory())
+        .put("MergeBucketsDoFn", new GroupAlsoByWindowsParDoFn.Factory())
+        .put("AssignBucketsDoFn", new AssignWindowsParDoFn.Factory())
+        .put("MergeWindowsDoFn", new GroupAlsoByWindowsParDoFn.Factory())
+        .put("AssignWindowsDoFn", new AssignWindowsParDoFn.Factory())
+        .put("ReifyTimestampAndWindowsDoFn", new ReifyTimestampAndWindowsParDoFn.Factory())
+        .build();
+  }
+
+  @Override
+  public ParDoFn create(
+      PipelineOptions options,
+      CloudObject cloudUserFn,
+      String stepName,
+      String transformName,
+      List<SideInputInfo> sideInputInfos,
+      List<MultiOutputInfo> multiOutputInfos,
+      int numOutputs,
+      DataflowExecutionContext executionContext,
+      CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler)
+          throws Exception {
+
+    String className = cloudUserFn.getClassName();
+    ParDoFnFactory factory = defaultFactories.get(className);
+
+    if (factory == null) {
+      throw new Exception("No known ParDoFnFactory for " + className);
+    }
+
+    return factory.create(
+        options,
+        cloudUserFn,
+        stepName,
+        transformName,
+        sideInputInfos,
+        multiOutputInfos,
+        numOutputs,
+        executionContext,
+        addCounterMutator,
+        stateSampler);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index 31a007b1d8f8f..219ce2d547f75 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -233,7 +233,7 @@ static WriteOperation createWriteOperation(PipelineOptions options,
     return operation;
   }
 
-  private static ParDoFnFactory parDoFnFactory = new ParDoFnFactory.DefaultFactory();
+  private static ParDoFnFactory parDoFnFactory = new DefaultParDoFnFactory();
 
   static ParDoOperation createParDoOperation(
       PipelineOptions options,
@@ -519,3 +519,4 @@ static void attachInput(ReceivingOperation operation, @Nullable InstructionInput
     operation.attachInput(source, outputNum);
   }
 }
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
index fc28ee0a4b8c4..b80709f2142eb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
@@ -24,20 +24,19 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
 /**
- * Creates a ParDoFn from a CloudObject spec.
+ * The interface of factories that create a worker-side {@link ParDoFn} from a {@link CloudObject}
+ * specification provided by the Dataflow service.
  */
 public interface ParDoFnFactory {
 
   /**
-   * Create a {@link ParDoFn} from standard parameters, corresponding to the specification
+   * Creates a {@link ParDoFn} from standard parameters, corresponding to the specification
    * provided to the worker by the Dataflow service.
    */
-  public ParDoFn create(
+  ParDoFn create(
       PipelineOptions options,
       CloudObject cloudUserFn,
       String stepName,
@@ -49,57 +48,4 @@ public ParDoFn create(
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler)
       throws Exception;
-
-  /**
-   * A factory that dispatches to all known factories in the Dataflow SDK based on the value of
-   * {@link CloudObject#getClassName()} for the specified {@code DoFn}.
-   */
-  public class DefaultFactory implements ParDoFnFactory {
-    private final Map<String, ParDoFnFactory> defaultFactories = new HashMap<>();
-
-    public DefaultFactory() {
-      defaultFactories.put("DoFn", new NormalParDoFn.Factory());
-      defaultFactories.put("CombineValuesFn", new CombineValuesFn.Factory());
-      defaultFactories.put("MergeBucketsDoFn", new GroupAlsoByWindowsParDoFn.Factory());
-      defaultFactories.put("AssignBucketsDoFn", new AssignWindowsParDoFn.Factory());
-      defaultFactories.put("MergeWindowsDoFn", new GroupAlsoByWindowsParDoFn.Factory());
-      defaultFactories.put("AssignWindowsDoFn", new AssignWindowsParDoFn.Factory());
-      defaultFactories.put("ReifyTimestampAndWindowsDoFn",
-          new ReifyTimestampAndWindowsParDoFn.Factory());
-    }
-
-    @Override
-    public ParDoFn create(
-        PipelineOptions options,
-        CloudObject cloudUserFn,
-        String stepName,
-        String transformName,
-        List<SideInputInfo> sideInputInfos,
-        List<MultiOutputInfo> multiOutputInfos,
-        int numOutputs,
-        DataflowExecutionContext executionContext,
-        CounterSet.AddCounterMutator addCounterMutator,
-        StateSampler stateSampler)
-            throws Exception {
-
-      String className = cloudUserFn.getClassName();
-      ParDoFnFactory factory = defaultFactories.get(className);
-
-      if (factory == null) {
-        throw new Exception("No known ParDoFnFactory for " + className);
-      }
-
-      return factory.create(
-          options,
-          cloudUserFn,
-          stepName,
-          transformName,
-          sideInputInfos,
-          multiOutputInfos,
-          numOutputs,
-          executionContext,
-          addCounterMutator,
-          stateSampler);
-    }
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SizedWeigher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SizedWeigher.java
index 9844e077df3df..9b4b246beac22 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SizedWeigher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SizedWeigher.java
@@ -30,9 +30,13 @@
  */
 class SizedWeigher<K, V> implements Weigher<K, Sized<V>>{
 
+  public static <K, V> SizedWeigher<K, V> withBaseWeight(int baseWeight) {
+    return new SizedWeigher<>(baseWeight);
+  }
+
   private final int baseWeight;
 
-  public SizedWeigher(int baseWeight) {
+  private SizedWeigher(int baseWeight) {
     Preconditions.checkArgument(
         baseWeight > 0,
         "base weight for SizedWeigher must be positive");
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SideInputReader.java
index 178d5dd2750ae..37873f3136a2c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SideInputReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SideInputReader.java
@@ -33,15 +33,16 @@ public interface SideInputReader {
    * return {@code null} for any other reason.
    */
   @Nullable
-  public <T> T get(PCollectionView<T> view, BoundedWindow window);
+  <T> T get(PCollectionView<T> view, BoundedWindow window);
 
   /**
    * Returns true if the given {@link PCollectionView} is valid for this reader.
    */
-  public <T> boolean contains(PCollectionView<T> view);
+  <T> boolean contains(PCollectionView<T> view);
 
   /**
    * Returns true if there are no side inputs in this reader.
    */
-  public boolean isEmpty();
+  boolean isEmpty();
 }
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Sized.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Sized.java
index e812fa288a274..f019ecca7a5d6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Sized.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Sized.java
@@ -43,3 +43,4 @@ public T getValue() {
     return value;
   }
 }
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SizedSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SizedSideInputReader.java
index 445e9ce6a1c6e..9cc2140d6970b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SizedSideInputReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SizedSideInputReader.java
@@ -33,15 +33,16 @@ public interface SizedSideInputReader extends SideInputReader {
    * object where {@link Sized#getValue()} returns {@code null} and {@link Sized#getSize()} may
    * still return any non-negative value.
    */
-  public <T> Sized<T> getSized(PCollectionView<T> view, BoundedWindow window);
+  <T> Sized<T> getSized(PCollectionView<T> view, BoundedWindow window);
 
   /**
    * Abstract class providing default implementations for methods of {@link SizedSideInputReader}.
    */
-  public abstract static class Defaults implements SizedSideInputReader {
+  abstract static class Defaults implements SizedSideInputReader {
     @Override
     public <T> T get(PCollectionView<T> view, BoundedWindow window) {
       return getSized(view, window).getValue();
     }
   }
 }
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java
index 09ae735158a6b..37e9511cf4d34 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java
@@ -24,10 +24,7 @@
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.DirectSideInputReader;
 import com.google.cloud.dataflow.sdk.util.PCollectionViewWindow;
-import com.google.cloud.dataflow.sdk.util.PTuple;
-import com.google.cloud.dataflow.sdk.util.SideInputReader;
 import com.google.cloud.dataflow.sdk.util.Sized;
 import com.google.cloud.dataflow.sdk.util.SizedSideInputReader;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
@@ -36,136 +33,164 @@
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Maps;
 
+import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-import java.util.Map;
-
 /**
  * Tests for {@link CachingSideInputReader}.
  */
 @RunWith(JUnit4.class)
 public class CachingSideInputReaderTest {
 
-  /**
-   * A {@link SizedSideInputReader} where the sizes are included in the values of the
-   * {@link PTuple} used to instantiate it.
-   */
-  private static class SizedDirectSideInputReader extends SizedSideInputReader.Defaults {
-
-    private final SideInputReader subReader;
-    private final Map<TupleTag<?>, Long> sizes;
-
-    /**
-     * Instantiates a {@link SizedDirectSideInputReader} from a {@link PTuple}. The values in the
-     * {@link PTuple} should all be {@link Sized}. A {@link DirectSideInputReader} will be used for
-     * the actual retrieval logic; this class merely does the size bookkeeping.
-     */
-    public SizedDirectSideInputReader(Map<TupleTag<Object>, Sized<Object>> sizedContents) {
-      sizes = Maps.newHashMap();
-      PTuple values = PTuple.empty();
-      for (Map.Entry<TupleTag<Object>, Sized<Object>> entry : sizedContents.entrySet()) {
-        values = values.and(entry.getKey(), entry.getValue().getValue());
-        sizes.put(entry.getKey(), entry.getValue().getSize());
-      }
-      subReader = DirectSideInputReader.of(values);
-    }
-
-    @Override
-    public boolean isEmpty() {
-      return subReader.isEmpty();
-    }
-
-    @Override
-    public <T> boolean contains(PCollectionView<T> view) {
-      return subReader.contains(view);
-    }
-
-    @Override
-    public <T> Sized<T> getSized(PCollectionView<T> view, BoundedWindow window) {
-      return Sized.of(
-          subReader.get(view, window),
-          sizes.get(view.getTagInternal()));
-    }
-  }
-
   private static boolean isCached(
       Cache<PCollectionViewWindow<?>, Sized<Object>> cache,
       PCollectionView<?> view, BoundedWindow window) {
     return null != cache.getIfPresent(PCollectionViewWindow.of(view, window));
   }
 
-  @Test
-  public void testCachingSideInputReaderCachesSmallItem() throws Exception {
-    Cache<PCollectionViewWindow<?>, Sized<Object>> cache = CacheBuilder.newBuilder()
-        .maximumWeight(100)
-        .weigher(new SizedWeigher<>(1))
-        .build();
+  /** An arbitrary {@link TupleTag} used for tests. */
+  private static final TupleTag<Iterable<WindowedValue<String>>> ITERABLE_TAG = new TupleTag<>();
 
-    TupleTag<Iterable<WindowedValue<String>>> tag = new TupleTag<>();
-    TupleTag<Object> untypedTag = new TupleTag<>(tag.getId());
-    PCollectionView<Long> view = PCollectionViewTesting.testingView(
-        tag, new PCollectionViewTesting.LengthViewFn<String>(), StringUtf8Coder.of());
-    Iterable<WindowedValue<String>> contents =
-        PCollectionViewTesting.contentsInDefaultWindow("hello", "goodbye");
+  /** A {@link TupleTag} that agrees with {@link #ITERABLE_TAG} but is not {@code ==} to it. */
+  private static final TupleTag<Object> UNTYPED_ITERABLE_TAG = new TupleTag<>(ITERABLE_TAG.getId());
 
-    CachingSideInputReader sideInputReader = CachingSideInputReader.of(
-        new SizedDirectSideInputReader(ImmutableMap.<TupleTag<Object>, Sized<Object>>of(
-            untypedTag, Sized.<Object>of(contents, 5L))),
-        cache);
+  /**
+   * A {@link PCollectionView} using {@link #ITERABLE_TAG} that maps the contents of a
+   * {@link PCollection} to the number of elements it contains.
+   */
+  private static final PCollectionView<Long> LENGTH_VIEW_FOR_ITERABLE_TAG =
+      PCollectionViewTesting.testingView(
+          ITERABLE_TAG,
+          new PCollectionViewTesting.LengthViewFn<String>(), StringUtf8Coder.of());
 
-    assertFalse(sideInputReader.isEmpty());
-    assertTrue(sideInputReader.contains(view));
-    assertFalse(isCached(cache, view, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW));
-    assertThat(
-        sideInputReader.get(view, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW),
-        equalTo(2L));
-    assertTrue(isCached(cache, view, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW));
+  private static final int MAXIMUM_CACHE_SIZE = 1000;
+
+  /** A {@link Cache} that is set up before each test. */
+  private Cache<PCollectionViewWindow<?>, Sized<Object>> defaultCache;
+
+  @Before
+  public void setupCache() {
+    defaultCache = CacheBuilder.newBuilder()
+        .maximumWeight(MAXIMUM_CACHE_SIZE)
+        .weigher(SizedWeigher.withBaseWeight(1))
+        .build();
   }
 
   @Test
-  public void testCachingSideInputReaderDoesNotCacheLargeItem() throws Exception {
-    Cache<PCollectionViewWindow<?>, Sized<Object>> cache = CacheBuilder.newBuilder()
-        .maximumWeight(100)
-        .weigher(new SizedWeigher<>(1000))
-        .build();
+  public void testCachingSideInputReaderAgreesWithUnderlyingReaderForSmallItem() throws Exception {
+    // A SideInputReader that vends fixed contents for LENGTH_VIEW_FOR_DEFAULT_TAG
+    // with a chosen size that fits in the maximum size of the cache.
+    SizedSideInputReader reader = SizedDirectSideInputReader.withContents(
+        ImmutableMap.of(
+            UNTYPED_ITERABLE_TAG,
+            Sized.<Object>of(
+                PCollectionViewTesting.contentsInDefaultWindow("some", "small", "collection"),
+                MAXIMUM_CACHE_SIZE - 100)));
+
+    // The CachingSideInputReader under test
+    CachingSideInputReader cachingReader =
+        CachingSideInputReader.of(reader, defaultCache);
 
-    TupleTag<Iterable<WindowedValue<String>>> tag = new TupleTag<>();
-    TupleTag<Object> untypedTag = new TupleTag<>(tag.getId());
-    PCollectionView<Long> view = PCollectionViewTesting.testingView(
-        tag, new PCollectionViewTesting.LengthViewFn<String>(), StringUtf8Coder.of());
-    Iterable<WindowedValue<String>> contents =
-        PCollectionViewTesting.contentsInDefaultWindow("hello", "goodbye");
+    assertThat(
+        cachingReader.get(
+            LENGTH_VIEW_FOR_ITERABLE_TAG,
+            PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW),
+        equalTo(
+            reader.get(
+                LENGTH_VIEW_FOR_ITERABLE_TAG,
+                PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW)));
+  }
 
-    CachingSideInputReader sideInputReader = CachingSideInputReader.of(
-        new SizedDirectSideInputReader(ImmutableMap.<TupleTag<Object>, Sized<Object>>of(
-             untypedTag, Sized.<Object>of(contents, 5L))),
-        cache);
+  @Test
+  public void testCachingSideInputReaderAgreesWithUnderlyingReaderForLargeItem() throws Exception {
+    // A SideInputReader that vends fixed contents for LENGTH_VIEW_FOR_DEFAULT_TAG
+    // with a chosen size that exceeds the maximum size of the cache.
+    SizedSideInputReader reader = SizedDirectSideInputReader.withContents(
+        ImmutableMap.of(
+            UNTYPED_ITERABLE_TAG,
+            Sized.<Object>of(
+                PCollectionViewTesting.contentsInDefaultWindow("some", "large", "collection"),
+                MAXIMUM_CACHE_SIZE + 100)));
+
+    // The CachingSideInputReader under test
+    CachingSideInputReader cachingReader =
+        CachingSideInputReader.of(reader, defaultCache);
 
-    assertFalse(sideInputReader.isEmpty());
-    assertTrue(sideInputReader.contains(view));
-    assertFalse(isCached(cache, view, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW));
     assertThat(
-        sideInputReader.get(view, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW),
-        equalTo(2L));
-    assertFalse(isCached(cache, view, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW));
+        cachingReader.get(
+            LENGTH_VIEW_FOR_ITERABLE_TAG,
+            PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW),
+        equalTo(
+            reader.get(
+                LENGTH_VIEW_FOR_ITERABLE_TAG,
+                PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW)));
   }
 
+
   @Test
-  public void testCachingSideInputReaderEmpty() throws Exception {
-    Cache<PCollectionViewWindow<?>, Sized<Object>> cache = CacheBuilder.newBuilder()
-        .build();
+  public void testCachingSideInputReaderCachesSmallItem() throws Exception {
+    // A SideInputReader that vends fixed contents for LENGTH_VIEW_FOR_DEFAULT_TAG
+    // with a chosen size that fits in the maximum size of the cache.
+    SizedSideInputReader reader = SizedDirectSideInputReader.withContents(
+        ImmutableMap.of(
+            UNTYPED_ITERABLE_TAG,
+            Sized.<Object>of(
+                PCollectionViewTesting.contentsInDefaultWindow("hello", "goodbye"),
+                MAXIMUM_CACHE_SIZE - 1000)));
+
+    // The CachingSideInputReader under test
+    CachingSideInputReader cachingReader =
+        CachingSideInputReader.of(reader, defaultCache);
+
+    cachingReader.get(
+        LENGTH_VIEW_FOR_ITERABLE_TAG,
+        PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
+
+    assertTrue(
+        isCached(
+            defaultCache,
+            LENGTH_VIEW_FOR_ITERABLE_TAG,
+            PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW));
+  }
 
+  @Test
+  public void testCachingSideInputReaderDoesNotCacheLargeItem() throws Exception {
+    // A SideInputReader that vends fixed contents for LENGTH_VIEW_FOR_DEFAULT_TAG
+    // with a chosen size that exceeds in the maximum size of the cache.
+    SizedSideInputReader reader = SizedDirectSideInputReader.withContents(
+        ImmutableMap.of(
+            UNTYPED_ITERABLE_TAG,
+            Sized.<Object>of(
+                PCollectionViewTesting.contentsInDefaultWindow("hello", "goodbye"),
+                MAXIMUM_CACHE_SIZE + 100)));
+
+    // The CachingSideInputReader under test
+    CachingSideInputReader cachingReader =
+        CachingSideInputReader.of(reader, defaultCache);
+
+    cachingReader.get(
+        LENGTH_VIEW_FOR_ITERABLE_TAG,
+        PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
+
+    assertFalse(
+        isCached(
+            defaultCache,
+            LENGTH_VIEW_FOR_ITERABLE_TAG,
+            PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW));
+  }
+
+  @Test
+  public void testCachingSideInputReaderEmpty() throws Exception {
     TupleTag<Iterable<WindowedValue<String>>> tag = new TupleTag<>();
     PCollectionView<Long> view = PCollectionViewTesting.testingView(
         tag, new PCollectionViewTesting.LengthViewFn<String>(), StringUtf8Coder.of());
 
     CachingSideInputReader sideInputReader = CachingSideInputReader.of(
-        new SizedDirectSideInputReader(ImmutableMap.<TupleTag<Object>, Sized<Object>>of()),
-        cache);
+        SizedDirectSideInputReader.withContents(ImmutableMap.<TupleTag<Object>, Sized<Object>>of()),
+        defaultCache);
 
     assertFalse(sideInputReader.contains(view));
     assertTrue(sideInputReader.isEmpty());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
index 319905561817e..47a0949378765 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
@@ -35,8 +35,9 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.collect.Lists;
+import com.google.common.collect.ImmutableList;
 
+import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -52,23 +53,20 @@
 public class DataflowSideInputReaderTest {
 
   private static final Coder<Long> LONG_CODER = BigEndianLongCoder.of();
+  private static final TupleTag<Iterable<WindowedValue<Long>>> DEFAULT_TAG = new TupleTag<>();
+  private static final PCollectionView<Long> DEFAULT_LENGTH_VIEW =
+      PCollectionViewTesting.<Long, Long>testingView(
+          DEFAULT_TAG, new PCollectionViewTesting.LengthViewFn<Long>(), LONG_CODER);
 
-  /**
-   * The size, in bytes, of a {@code long} placed in
-   * {@link PCollectionViewTesting#DEFAULT_NONEMPTY_WINDOW}. This is the size of each of the
-   * elements of each {@code PCollection} created in the following tests.
-   *
-   * <p>This value is arbitrary from the point of view of these tests.
-   * The correctness of {@link DataflowSideInputReader} does not depend on this value,
-   * but depends on the fact that the reported sizes are this value times the number
-   * of elements in a collection.
-   */
-  private long windowedLongBytes() throws Exception {
-    long arbitraryLong = 42L;
-    return CoderUtils.encodeToByteArray(
-        PCollectionViewTesting.defaultWindowedValueCoder(LONG_CODER),
-        PCollectionViewTesting.valueInDefaultWindow(arbitraryLong)).length;
-  }
+  private static final List<Long> DEFAULT_SOURCE_CONTENTS =
+      ImmutableList.of(1L, -43255L, 0L, 13L, 1975858L);
+
+  private static final long DEFAULT_SOURCE_LENGTH = DEFAULT_SOURCE_CONTENTS.size();
+
+  private PipelineOptions options = PipelineOptionsFactory.create();
+  private static ExecutionContext executionContext;
+  private SideInputInfo defaultSideInputInfo;
+  private DataflowSideInputReader defaultSideInputReader;
 
   /**
    * Creates a {@link Source} descriptor for reading the provided contents as a side input.
@@ -77,16 +75,17 @@ private long windowedLongBytes() throws Exception {
    * <p>If the {@code PCollectionView} has an incompatible {@code Coder} or
    * {@code WindowingStrategy}, then results are unpredictable.
    */
-  @SuppressWarnings({"unchecked", "rawtypes"})
-  private <T> Source sourceInDefaultWindow(PCollectionView<T> view, T... values)
+  private final <T> Source sourceInDefaultWindow(PCollectionView<T> view, Iterable<T> values)
       throws Exception {
     List<WindowedValue<T>> windowedValues =
-        Lists.newArrayList(PCollectionViewTesting.contentsInDefaultWindow(values));
+        ImmutableList.copyOf(PCollectionViewTesting.contentsInDefaultWindow(values));
 
+    @SuppressWarnings({"unchecked", "rawtypes"})
     List<Coder<?>> componentCoders = (List) view.getCoderInternal().getCoderArguments();
     if (componentCoders == null || componentCoders.size() != 1) {
       throw new Exception("Could not extract element Coder from " + view.getCoderInternal());
     }
+    @SuppressWarnings("unchecked")
     Coder<WindowedValue<T>> elemCoder = (Coder<WindowedValue<T>>) componentCoders.get(0);
 
     return InMemoryReaderFactoryTest.createInMemoryCloudSource(
@@ -94,43 +93,88 @@ private <T> Source sourceInDefaultWindow(PCollectionView<T> view, T... values)
   }
 
   /**
-   * Tests that when a PCollectionView is actually available in a {@link DataflowSideInputReader},
-   * the read succeeds and has the right size.
+   * The size, in bytes, of a {@code long} placed in
+   * {@link PCollectionViewTesting#DEFAULT_NONEMPTY_WINDOW}. This is the size of each of the
+   * elements of each {@code PCollection} created in the following tests.
+   *
+   * <p>This value is arbitrary from the point of view of these tests.
+   * The correctness of {@link DataflowSideInputReader} does not depend on this value,
+   * but depends on the fact that the reported sizes are this value times the number
+   * of elements in a collection.
+   */
+  private long windowedLongBytes() throws Exception {
+    long arbitraryLong = 42L;
+    return CoderUtils.encodeToByteArray(
+        PCollectionViewTesting.defaultWindowedValueCoder(LONG_CODER),
+        PCollectionViewTesting.valueInDefaultWindow(arbitraryLong)).length;
+  }
+
+  @Before
+  public void setUp() throws Exception {
+    options = PipelineOptionsFactory.create();
+    executionContext = BatchModeExecutionContext.fromOptions(options);
+
+    defaultSideInputInfo = SideInputUtils.createCollectionSideInputInfo(
+        sourceInDefaultWindow(DEFAULT_LENGTH_VIEW, DEFAULT_SOURCE_CONTENTS));
+    defaultSideInputInfo.setTag(DEFAULT_LENGTH_VIEW.getTagInternal().getId());
+
+    defaultSideInputReader = DataflowSideInputReader.of(
+        Collections.singletonList(defaultSideInputInfo), options, executionContext);
+  }
+
+  /**
+   * Tests that when a {@link PCollectionView} is actually available in a
+   * {@link DataflowSideInputReader}, it does not claim to be empty.
    */
   @Test
-  public void testDataflowSideInputReaderGoodRead() throws Exception {
-    PipelineOptions options = PipelineOptionsFactory.create();
-    ExecutionContext executionContext = BatchModeExecutionContext.fromOptions(options);
-    TupleTag<Iterable<WindowedValue<Long>>> tag = new TupleTag<>();
-    PCollectionView<Long> view = PCollectionViewTesting.<Long, Long>testingView(
-        tag, new PCollectionViewTesting.LengthViewFn<Long>(), LONG_CODER);
+  public void testDataflowSideInputReaderNotEmpty() throws Exception {
+    assertFalse(defaultSideInputReader.isEmpty());
+  }
 
-    SideInputInfo sideInputInfo = SideInputUtils.createCollectionSideInputInfo(
-        sourceInDefaultWindow(view, 1L, -43255L, 0L, 13L, 1975858L));
-    sideInputInfo.setTag(view.getTagInternal().getId());
+  /**
+   * Tests that when a {@link PCollectionView} is actually available in a
+   * {@link DataflowSideInputReader}, the read succeeds and has the right size.
+   */
+  @Test
+  public void testDataflowSideInputReaderGoodRead() throws Exception {
+    assertTrue(defaultSideInputReader.contains(DEFAULT_LENGTH_VIEW));
+    Sized<Long> sizedValue = defaultSideInputReader.getSized(
+        DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
+    assertThat(sizedValue.getValue(), equalTo(DEFAULT_SOURCE_LENGTH));
+    assertThat(sizedValue.getSize(), equalTo(DEFAULT_SOURCE_LENGTH * windowedLongBytes()));
+  }
 
+  /**
+   * Tests that when a {@link PCollectionView} is actually available in a
+   * {@link DataflowSideInputReader}, repeated reads yield the same value with the same size.
+   */
+  @Test
+  public void testDataflowSideInputReaderRepeatedRead() throws Exception {
     DataflowSideInputReader sideInputReader = DataflowSideInputReader.of(
-        Arrays.asList(sideInputInfo), options, executionContext);
-
-    assertFalse(sideInputReader.isEmpty());
-    assertTrue(sideInputReader.contains(view));
+        Collections.singletonList(defaultSideInputInfo), options, executionContext);
 
-    Sized<Long> sizedValue = sideInputReader.getSized(
-        view, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
-    assertThat(sizedValue.getValue(), equalTo(5L));
-    assertThat(sizedValue.getSize(), equalTo(5 * windowedLongBytes()));
+    Sized<Long> firstRead = sideInputReader.getSized(
+        DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
 
     // A repeated read should yield the same size
     Sized<Long> repeatedRead = sideInputReader.getSized(
-        view, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
-    assertThat(repeatedRead.getValue(), equalTo(5L));
-    assertThat(sizedValue.getSize(), equalTo(5 * windowedLongBytes()));
+        DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
+
+    assertThat(repeatedRead.getValue(), equalTo(firstRead.getValue()));
+    assertThat(repeatedRead.getSize(), equalTo(firstRead.getSize()));
+
+  }
+
+  @Test
+  public void testDataflowSideInputReaderMiss() throws Exception {
+    DataflowSideInputReader sideInputReader = DataflowSideInputReader.of(
+        Collections.singletonList(defaultSideInputInfo), options, executionContext);
 
     // Reading an empty window still yields the same size, for now
     Sized<Long> emptyWindowValue = sideInputReader.getSized(
-        view, PCollectionViewTesting.DEFAULT_EMPTY_WINDOW);
+        DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_EMPTY_WINDOW);
     assertThat(emptyWindowValue.getValue(), equalTo(0L));
-    assertThat(emptyWindowValue.getSize(), equalTo(5 * windowedLongBytes()));
+    assertThat(emptyWindowValue.getSize(), equalTo(DEFAULT_SOURCE_LENGTH * windowedLongBytes()));
   }
 
   /**
@@ -139,20 +183,14 @@ public void testDataflowSideInputReaderGoodRead() throws Exception {
    */
   @Test
   public void testDataflowSideInputReaderBadRead() throws Exception {
-    PipelineOptions options = PipelineOptionsFactory.create();
-    ExecutionContext executionContext = BatchModeExecutionContext.fromOptions(options);
-    TupleTag<Iterable<WindowedValue<Long>>> tag = new TupleTag<>();
-    PCollectionView<Long> view = PCollectionViewTesting.testingView(
-        tag, new PCollectionViewTesting.LengthViewFn<Long>(), LONG_CODER);
-
     SideInputInfo sideInputInfo = SideInputUtils.createCollectionSideInputInfo(
-        sourceInDefaultWindow(view, 1L, -43255L, 0L, 13L, 1975858L));
+        sourceInDefaultWindow(DEFAULT_LENGTH_VIEW, DEFAULT_SOURCE_CONTENTS));
     sideInputInfo.setTag("not the same tag at all");
 
     DataflowSideInputReader sideInputReader = DataflowSideInputReader.of(
         Arrays.asList(sideInputInfo), options, executionContext);
 
-    assertFalse(sideInputReader.contains(view));
+    assertFalse(sideInputReader.contains(DEFAULT_LENGTH_VIEW));
   }
 
   /**
@@ -161,8 +199,6 @@ public void testDataflowSideInputReaderBadRead() throws Exception {
    */
   @Test
   public void testDataflowSideInputEmpty() throws Exception {
-    PipelineOptions options = PipelineOptionsFactory.create();
-    ExecutionContext executionContext = BatchModeExecutionContext.fromOptions(options);
     DataflowSideInputReader sideInputReader = DataflowSideInputReader.of(
         Collections.<SideInputInfo>emptyList(), options, executionContext);
     assertTrue(sideInputReader.isEmpty());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DefaultParDoFnFactoryTest.java
similarity index 61%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DefaultParDoFnFactoryTest.java
index 714bbbd0e63d8..8be86f71f32da 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DefaultParDoFnFactoryTest.java
@@ -39,26 +39,27 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 
 import org.hamcrest.Matchers;
+import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 
 /**
- * Tests for ParDoFnFactory.
+ * Tests for {@link DefaultParDoFnFactory}.
  */
 @RunWith(JUnit4.class)
-@SuppressWarnings({"rawtypes", "unchecked"})
-public class ParDoFnFactoryTest {
-  static class TestDoFn extends DoFn<Integer, String> {
-    final String stringState;
-    final long longState;
-
-    TestDoFn(String stringState, long longState) {
-      this.stringState = stringState;
-      this.longState = longState;
+public class DefaultParDoFnFactoryTest {
+
+  private static class TestDoFn extends DoFn<Integer, String> {
+    final String stringField;
+    final long longField;
+
+    TestDoFn(String stringValue, long longValue) {
+      this.stringField = stringValue;
+      this.longField = longValue;
     }
 
     @Override
@@ -67,52 +68,66 @@ public void processElement(ProcessContext c) {
     }
   }
 
-  private static ParDoFnFactory factory = new ParDoFnFactory.DefaultFactory();
+  // Miscellaneous default values required by the ParDoFnFactory interface
+  private static final ParDoFnFactory DEFAULT_FACTORY = new DefaultParDoFnFactory();
+  private static final PipelineOptions DEFAULT_OPTIONS = PipelineOptionsFactory.create();
+  private static final DataflowExecutionContext DEFAULT_EXECUTION_CONTEXT =
+      BatchModeExecutionContext.fromOptions(DEFAULT_OPTIONS);
+  private static final CounterSet EMPTY_COUNTER_SET = new CounterSet();
+  private static final StateSampler EMPTY_STATE_SAMPLER =
+      new StateSampler("test", EMPTY_COUNTER_SET.getAddCounterMutator());
 
-  @Test
-  public void testCreateNormalParDoFn() throws Exception {
-    String stringState = "some state";
-    long longState = 42L;
+  private List<MultiOutputInfo> dummySingleOutputInfo;
 
-    TestDoFn fn = new TestDoFn(stringState, longState);
+  @Before
+  public void setUp() throws Exception {
+    String tag = "output";
+    MultiOutputInfo multiOutputInfo = new MultiOutputInfo();
+    multiOutputInfo.setTag(tag);
+    dummySingleOutputInfo = Collections.singletonList(multiOutputInfo);
+  }
 
+  /**
+   * Tests that a "normal" {@link DoFn} is correctly dispatched to {@link NormalParDoFn} and
+   * instantiated correctly.
+   */
+  @Test
+  public void testCreateNormalParDoFn() throws Exception {
+    // A serialized DoFn
+    String stringFieldValue = "some state";
+    long longFieldValue = 42L;
+    TestDoFn fn = new TestDoFn(stringFieldValue, longFieldValue);
     String serializedFn =
         StringUtils.byteArrayToJsonString(
             SerializableUtils.serializeToByteArray(
-                new DoFnInfo(fn, WindowingStrategy.globalDefault())));
-
+                new DoFnInfo<>(fn, WindowingStrategy.globalDefault())));
     CloudObject cloudUserFn = CloudObject.forClassName("DoFn");
     addString(cloudUserFn, "serialized_fn", serializedFn);
 
-    String tag = "output";
-    MultiOutputInfo multiOutputInfo = new MultiOutputInfo();
-    multiOutputInfo.setTag(tag);
-    List<MultiOutputInfo> multiOutputInfos =
-        Arrays.asList(multiOutputInfo);
-
-    PipelineOptions options = PipelineOptionsFactory.create();
-    DataflowExecutionContext context = BatchModeExecutionContext.fromOptions(options);
-    CounterSet counters = new CounterSet();
-    StateSampler stateSampler = new StateSampler(
-        "test", counters.getAddCounterMutator());
-    ParDoFn parDoFn = factory.create(
-        options,
+    // Create the ParDoFn from the serialized DoFn
+    ParDoFn parDoFn = DEFAULT_FACTORY.create(
+        DEFAULT_OPTIONS,
         cloudUserFn,
         "name",
         "transformName",
         null,
-        multiOutputInfos,
+        dummySingleOutputInfo,
         1,
-        context,
-        counters.getAddCounterMutator(),
-        stateSampler);
+        DEFAULT_EXECUTION_CONTEXT,
+        EMPTY_COUNTER_SET.getAddCounterMutator(),
+        EMPTY_STATE_SAMPLER);
 
     // Test that the factory created the correct class
     assertThat(parDoFn, instanceOf(NormalParDoFn.class));
 
+    // TODO: move the asserts below into new tests in NormalParDoFnTest, and this test should
+    // simply assert that DefaultParDoFnFactory.create() matches NormalParDoFn.Factory.create()
+
     // Test that the DoFnInfo reflects the one passed in
     NormalParDoFn normalParDoFn = (NormalParDoFn) parDoFn;
+    @SuppressWarnings("rawtypes")
     DoFnInfo doFnInfo = normalParDoFn.getDoFnInfo();
+    @SuppressWarnings("rawtypes")
     DoFn actualDoFn = doFnInfo.getDoFn();
     assertThat(actualDoFn, instanceOf(TestDoFn.class));
     assertThat(
@@ -124,29 +139,27 @@ public void testCreateNormalParDoFn() throws Exception {
 
     // Test that the deserialized user DoFn is as expected
     TestDoFn actualTestDoFn = (TestDoFn) actualDoFn;
-    assertEquals(stringState, actualTestDoFn.stringState);
-    assertEquals(longState, actualTestDoFn.longState);
-    assertEquals(context, normalParDoFn.getExecutionContext());
+    assertEquals(stringFieldValue, actualTestDoFn.stringField);
+    assertEquals(longFieldValue, actualTestDoFn.longField);
+    assertEquals(DEFAULT_EXECUTION_CONTEXT, normalParDoFn.getExecutionContext());
   }
 
   @Test
   public void testCreateUnknownParDoFn() throws Exception {
+    // A bogus serialized DoFn
     CloudObject cloudUserFn = CloudObject.forClassName("UnknownKindOfDoFn");
     try {
-      CounterSet counters = new CounterSet();
-      StateSampler stateSampler = new StateSampler(
-          "test", counters.getAddCounterMutator());
-      factory.create(
-          PipelineOptionsFactory.create(),
+      DEFAULT_FACTORY.create(
+          DEFAULT_OPTIONS,
           cloudUserFn,
           "name",
           "transformName",
           null,
           null,
           1,
-          BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create()),
-          counters.getAddCounterMutator(),
-          stateSampler);
+          DEFAULT_EXECUTION_CONTEXT,
+          EMPTY_COUNTER_SET.getAddCounterMutator(),
+          EMPTY_STATE_SAMPLER);
       fail("should have thrown an exception");
     } catch (Exception exn) {
       assertThat(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SizedDirectSideInputReader.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SizedDirectSideInputReader.java
new file mode 100644
index 0000000000000..daccfd071cd21
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SizedDirectSideInputReader.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.DirectSideInputReader;
+import com.google.cloud.dataflow.sdk.util.PTuple;
+import com.google.cloud.dataflow.sdk.util.SideInputReader;
+import com.google.cloud.dataflow.sdk.util.Sized;
+import com.google.cloud.dataflow.sdk.util.SizedSideInputReader;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * A {@link SizedSideInputReader} with explicitly provided sizes for all values.
+ */
+class SizedDirectSideInputReader extends SizedSideInputReader.Defaults {
+
+  private final SideInputReader subReader;
+  private final Map<TupleTag<?>, Long> sizes;
+
+  /**
+   * Returns a {@link SizedDirectSideInputReader} containing the contents in the provided
+   * {@code Map}. A {@link DirectSideInputReader} will be used for the actual retrieval logic; this
+   * class merely does the size bookkeeping.
+   */
+  public static SizedDirectSideInputReader withContents(
+      Map<TupleTag<Object>, Sized<Object>> sizedContents) {
+    return new SizedDirectSideInputReader(sizedContents);
+  }
+
+  private SizedDirectSideInputReader(Map<TupleTag<Object>, Sized<Object>> sizedContents) {
+    sizes = new HashMap<>();
+    PTuple values = PTuple.empty();
+    for (Map.Entry<TupleTag<Object>, Sized<Object>> entry : sizedContents.entrySet()) {
+      values = values.and(entry.getKey(), entry.getValue().getValue());
+      sizes.put(entry.getKey(), entry.getValue().getSize());
+    }
+    subReader = DirectSideInputReader.of(values);
+  }
+
+  @Override
+  public boolean isEmpty() {
+    return subReader.isEmpty();
+  }
+
+  @Override
+  public <T> boolean contains(PCollectionView<T> view) {
+    return subReader.contains(view);
+  }
+
+  @Override
+  public <T> Sized<T> getSized(PCollectionView<T> view, BoundedWindow window) {
+    return Sized.of(
+        subReader.get(view, window),
+        sizes.get(view.getTagInternal()));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java
index b46ceb4f5fadb..e6555c056b962 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/PCollectionViewTesting.java
@@ -197,6 +197,7 @@ public static <T> WindowedValue<T> valueInDefaultWindow(T value) {
   /**
    * Prepares {@code values} for reading as the contents of a {@link PCollectionView} side input.
    */
+  @SafeVarargs
   public static <T> Iterable<WindowedValue<T>> contentsInDefaultWindow(T... values)
       throws Exception {
     List<WindowedValue<T>> windowedValues = Lists.newArrayList();
@@ -206,6 +207,18 @@ public static <T> Iterable<WindowedValue<T>> contentsInDefaultWindow(T... values
     return windowedValues;
   }
 
+  /**
+   * Prepares {@code values} for reading as the contents of a {@link PCollectionView} side input.
+   */
+  public static <T> Iterable<WindowedValue<T>> contentsInDefaultWindow(Iterable<T> values)
+      throws Exception {
+    List<WindowedValue<T>> windowedValues = Lists.newArrayList();
+    for (T value : values) {
+      windowedValues.add(valueInDefaultWindow(value));
+    }
+    return windowedValues;
+  }
+
   // Internal details below here
 
   /**

From 0f77f645296eebbbd66f5103b989d313fad0d62b Mon Sep 17 00:00:00 2001
From: elibixby <elibixby@google.com>
Date: Tue, 20 Oct 2015 18:04:20 -0700
Subject: [PATCH 1084/1541] Fixed close not being called

---
 .../cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java
index c7e2423ad6c56..52c730cc3928c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java
@@ -258,7 +258,9 @@ public Instant getCurrentTimestamp() throws NoSuchElementException {
       }
 
       @Override
-      public void close() {}
+      public void close() throws IOException {
+        reader.close();
+      }
 
       @Override
       public BoundedSource<ValueWithRecordId<T>> getCurrentSource() {

From 63b4dbb211a6ecd7fb7007dfc7324b3542bcdc93 Mon Sep 17 00:00:00 2001
From: Max Shytikov <mshytikov@gmail.com>
Date: Thu, 22 Oct 2015 15:48:15 +0200
Subject: [PATCH 1085/1541] Fixed typo in doc

---
 .../main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 7394f4e53fc53..19ff74b974e8e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -155,7 +155,7 @@
  * here's code that outputs daily tables to BigQuery:
  * <pre>{@code
  * PCollection<TableRow> quotes = ...
- * quotes.apply(Window.<TableRow>info(CalendarWindows.days(1)))
+ * quotes.apply(Window.<TableRow>into(CalendarWindows.days(1)))
  *       .apply(BigQueryIO.Write
  *         .named("Write")
  *         .withSchema(schema)

From b9d35c4e618bbfbb30ac1c93aeadef1b9b292abd Mon Sep 17 00:00:00 2001
From: Max Shytikov <mshytikov@gmail.com>
Date: Fri, 23 Oct 2015 14:00:13 +0200
Subject: [PATCH 1086/1541] Fixed example in BigQueryIO doc

The original example was not working and gave ClassCastException
---
 .../java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 19ff74b974e8e..c6be5fdd194bf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -161,8 +161,8 @@
  *         .withSchema(schema)
  *         .to(new SerializableFunction<BoundedWindow, String>() {
  *               public String apply(BoundedWindow window) {
- *                 String dayString = DateTimeFormat.forPattern("yyyy_MM_dd").parseDateTime(
- *                   ((DaysWindow) window).getStartDate());
+ *                 String dayString = DateTimeFormat.forPattern("yyyyMMdd").print(
+ *                   ((IntervalWindow) window).start());
  *                 return "my-project:output.output_table_" + dayString;
  *               }
  *             }));

From b5ebe6f49de41b96fc729afddc65422dc551e774 Mon Sep 17 00:00:00 2001
From: Kevin Sookocheff <kevin.sookocheff@workiva.com>
Date: Mon, 26 Oct 2015 13:52:56 -0600
Subject: [PATCH 1087/1541] Fix BQ docs for pushing to date range

---
 .../google/cloud/dataflow/sdk/io/BigQueryIO.java   | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 7394f4e53fc53..f96aa04b76e61 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -160,13 +160,13 @@
  *         .named("Write")
  *         .withSchema(schema)
  *         .to(new SerializableFunction<BoundedWindow, String>() {
- *               public String apply(BoundedWindow window) {
- *                 String dayString = DateTimeFormat.forPattern("yyyy_MM_dd").parseDateTime(
- *                   ((DaysWindow) window).getStartDate());
- *                 return "my-project:output.output_table_" + dayString;
- *               }
- *             }));
- *
+ *           public String apply(BoundedWindow window) {
+ *             String dayString = DateTimeFormat.forPattern("yyyy_MM_dd")
+ *                  .withZone(DateTimeZone.UTC)
+ *                  .print(((IntervalWindow) window).start());
+ *             return "my-project:output.output_table_" + dayString;
+ *           }
+ *         }));
  * }</pre>
  *
  * <p>Per-window tables are not yet supported in batch mode.

From 53d50040565cae029b5273f0a784857764d1b1a4 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 15 Oct 2015 11:19:52 -0700
Subject: [PATCH 1088/1541] Return unmodifiable side inputs from the Context

Fix an issue where we allowed duplicate keys into a map
if at least one of the values were null.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105526040
---
 .../sdk/runners/DataflowPipelineRunner.java   |  31 +-
 .../dataflow/sdk/util/PCollectionViews.java   |  10 +-
 .../dataflow/sdk/transforms/ViewTest.java     | 318 ++++++++++++++++++
 3 files changed, 342 insertions(+), 17 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index cf0695c7acd81..273951e3090bc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -828,9 +828,15 @@ public StreamingViewAsList(View.AsList<T> transform) {}
 
     @Override
     public PCollectionView<List<T>> apply(PCollection<T> input) {
-      // Using Combine.globally(...).asSingletonView() allows automatic propagation of
-      // the CombineFn's default value as the default value of the SingletonView.
-      return input.apply(Combine.globally(new Concatenate<T>()).asSingletonView());
+      PCollectionView<List<T>> view =
+          PCollectionViews.listView(
+              input.getPipeline(),
+              input.getWindowingStrategy(),
+              input.getCoder());
+
+      return input.apply(Combine.globally(new Concatenate<T>()).withoutDefaults())
+          .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, input.getCoder())))
+          .apply(View.CreatePCollectionView.<T, List<T>>of(view));
     }
 
     @Override
@@ -853,16 +859,15 @@ public StreamingViewAsIterable(View.AsIterable<T> transform) { }
 
     @Override
     public PCollectionView<Iterable<T>> apply(PCollection<T> input) {
-      // Using Combine.globally(...).asSingletonView() allows automatic propagation of
-      // the CombineFn's default value as the default value of the SingletonView.
-      //
-      // safe covariant cast List<T> -> Iterable<T>
-      // not expressible in java, even with unchecked casts
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      Combine.GloballyAsSingletonView<T, Iterable<T>> concatAndView =
-      (Combine.GloballyAsSingletonView)
-      Combine.globally(new Concatenate<T>()).asSingletonView();
-      return input.apply(concatAndView);
+      PCollectionView<Iterable<T>> view =
+          PCollectionViews.iterableView(
+              input.getPipeline(),
+              input.getWindowingStrategy(),
+              input.getCoder());
+
+      return input.apply(Combine.globally(new Concatenate<T>()).withoutDefaults())
+          .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, input.getCoder())))
+          .apply(View.CreatePCollectionView.<T, Iterable<T>>of(view));
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
index c5bf4e877bd73..b81732a37cf60 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
@@ -181,13 +181,14 @@ public IterablePCollectionView(
 
     @Override
     protected Iterable<T> fromElements(Iterable<WindowedValue<T>> contents) {
-      return Iterables.transform(contents, new Function<WindowedValue<T>, T>() {
+      return Iterables.unmodifiableIterable(
+          Iterables.transform(contents, new Function<WindowedValue<T>, T>() {
         @SuppressWarnings("unchecked")
         @Override
         public T apply(WindowedValue<T> input) {
           return input.getValue();
         }
-      });
+      }));
     }
   }
 
@@ -243,7 +244,7 @@ protected Map<K, Iterable<V>> fromElements(Iterable<WindowedValue<KV<K, V>>> ele
       // Safe covariant cast that Java cannot express without rawtypes, even with unchecked casts
       @SuppressWarnings({"unchecked", "rawtypes"})
       Map<K, Iterable<V>> resultMap = (Map) multimap.asMap();
-      return resultMap;
+      return Collections.unmodifiableMap(resultMap);
     }
   }
 
@@ -270,9 +271,10 @@ protected Map<K, V> fromElements(Iterable<WindowedValue<KV<K, V>>> elements) {
       Map<K, V> map = new HashMap<>();
       for (WindowedValue<KV<K, V>> elem : elements) {
         KV<K, V> kv = elem.getValue();
-        if (map.put(kv.getKey(), kv.getValue()) != null) {
+        if (map.containsKey(kv.getKey())) {
           throw new IllegalArgumentException("Duplicate values for " + kv.getKey());
         }
+        map.put(kv.getKey(), kv.getValue());
       }
       return Collections.unmodifiableMap(map);
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index 36af4fae6a784..555eb8b6e76b7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -18,9 +18,15 @@
 
 import static org.hamcrest.Matchers.isA;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.Pipeline.PipelineExecutionException;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.NullableCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
@@ -43,6 +49,7 @@
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
@@ -56,6 +63,9 @@
 import org.junit.runners.JUnit4;
 
 import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
@@ -181,6 +191,79 @@ public void processElement(ProcessContext c) {
     pipeline.run();
   }
 
+  @Test
+  @Category(RunnableOnService.class)
+  public void testEmptyListSideInput() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<List<Integer>> view = pipeline
+        .apply("CreateEmptyView", Create.<Integer>of().withCoder(VarIntCoder.of()))
+        .apply(View.<Integer>asList());
+
+    PCollection<Integer> results = pipeline
+        .apply("Create1", Create.of(1))
+        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
+            new DoFn<Integer, Integer>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                assertTrue(c.sideInput(view).isEmpty());
+                c.output(1);
+              }
+            }));
+
+    // Pass at least one value through to guarantee that DoFn executes.
+    DataflowAssert.that(results).containsInAnyOrder(1);
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testListSideInputIsImmutable() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<List<Integer>> view = pipeline
+        .apply("CreateSideInput", Create.of(11))
+        .apply(View.<Integer>asList());
+
+    PCollection<Integer> output = pipeline
+        .apply("CreateMainInput", Create.of(29))
+        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
+            new DoFn<Integer, Integer>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                try {
+                  c.sideInput(view).clear();
+                  fail("Expected UnsupportedOperationException on clear()");
+                } catch (UnsupportedOperationException expected) {
+                }
+                try {
+                  c.sideInput(view).add(4);
+                  fail("Expected UnsupportedOperationException on add()");
+                } catch (UnsupportedOperationException expected) {
+                }
+                try {
+                  c.sideInput(view).addAll(new ArrayList<Integer>());
+                  fail("Expected UnsupportedOperationException on addAll()");
+                } catch (UnsupportedOperationException expected) {
+                }
+                try {
+                  c.sideInput(view).remove(0);
+                  fail("Expected UnsupportedOperationException on remove()");
+                } catch (UnsupportedOperationException expected) {
+                }
+                for (Integer i : c.sideInput(view)) {
+                  c.output(i);
+                }
+              }
+            }));
+
+    // Pass at least one value through to guarantee that DoFn executes.
+    DataflowAssert.that(output).containsInAnyOrder(11);
+
+    pipeline.run();
+  }
+
   @Test
   @Category(RunnableOnService.class)
   public void testIterableSideInput() {
@@ -209,6 +292,65 @@ public void processElement(ProcessContext c) {
     pipeline.run();
   }
 
+  @Test
+  @Category(RunnableOnService.class)
+  public void testEmptyIterableSideInput() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Iterable<Integer>> view = pipeline
+        .apply("CreateEmptyView", Create.<Integer>of().withCoder(VarIntCoder.of()))
+        .apply(View.<Integer>asIterable());
+
+    PCollection<Integer> results = pipeline
+        .apply("Create1", Create.of(1))
+        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
+            new DoFn<Integer, Integer>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                assertFalse(c.sideInput(view).iterator().hasNext());
+                c.output(1);
+              }
+            }));
+
+    // Pass at least one value through to guarantee that DoFn executes.
+    DataflowAssert.that(results).containsInAnyOrder(1);
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testIterableSideInputIsImmutable() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Iterable<Integer>> view = pipeline
+        .apply("CreateSideInput", Create.of(11))
+        .apply(View.<Integer>asIterable());
+
+    PCollection<Integer> output = pipeline
+        .apply("CreateMainInput", Create.of(29))
+        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
+            new DoFn<Integer, Integer>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                Iterator<Integer> iterator = c.sideInput(view).iterator();
+                while (iterator.hasNext()) {
+                  try {
+                    iterator.remove();
+                    fail("Expected UnsupportedOperationException on remove()");
+                  } catch (UnsupportedOperationException expected) {
+                  }
+                  c.output(iterator.next());
+                }
+              }
+            }));
+
+    // Pass at least one value through to guarantee that DoFn executes.
+    DataflowAssert.that(output).containsInAnyOrder(11);
+
+    pipeline.run();
+  }
+
   @Test
   @Category(RunnableOnService.class)
   public void testMultimapSideInput() {
@@ -237,6 +379,80 @@ public void processElement(ProcessContext c) {
     pipeline.run();
   }
 
+  @Test
+  @Category(RunnableOnService.class)
+  public void testEmptyMultimapSideInput() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Map<Integer, Iterable<Integer>>> view = pipeline
+        .apply("CreateEmptyView", Create.<KV<Integer, Integer>>of()
+            .withCoder(KvCoder.of(VarIntCoder.of(), VarIntCoder.of())))
+        .apply(View.<Integer, Integer>asMultimap());
+
+    PCollection<Integer> results = pipeline
+        .apply("Create1", Create.of(1))
+        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
+            new DoFn<Integer, Integer>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                assertTrue(c.sideInput(view).isEmpty());
+                c.output(c.element());
+              }
+            }));
+
+    // Pass at least one value through to guarantee that DoFn executes.
+    DataflowAssert.that(results).containsInAnyOrder(1);
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testMultimapSideInputIsImmutable() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Map<String, Iterable<Integer>>> view = pipeline
+        .apply("CreateSideInput", Create.of(KV.of("a", 1)))
+        .apply(View.<String, Integer>asMultimap());
+
+    PCollection<KV<String, Integer>> output = pipeline
+        .apply("CreateMainInput", Create.of("apple"))
+        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
+            new DoFn<String, KV<String, Integer>>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                try {
+                  c.sideInput(view).clear();
+                  fail("Expected UnsupportedOperationException on clear()");
+                } catch (UnsupportedOperationException expected) {
+                }
+                try {
+                  c.sideInput(view).put("c", ImmutableList.of(3));
+                  fail("Expected UnsupportedOperationException on put()");
+                } catch (UnsupportedOperationException expected) {
+                }
+                try {
+                  c.sideInput(view).remove("c");
+                  fail("Expected UnsupportedOperationException on remove()");
+                } catch (UnsupportedOperationException expected) {
+                }
+                try {
+                  c.sideInput(view).putAll(new HashMap<String, Iterable<Integer>>());
+                  fail("Expected UnsupportedOperationException on putAll()");
+                } catch (UnsupportedOperationException expected) {
+                }
+                for (Integer v : c.sideInput(view).get(c.element().substring(0, 1))) {
+                  c.output(KV.of(c.element(), v));
+                }
+              }
+            }));
+
+    // Pass at least one value through to guarantee that DoFn executes.
+    DataflowAssert.that(output).containsInAnyOrder(KV.of("apple", 1));
+
+    pipeline.run();
+  }
+
   @Test
   @Category(RunnableOnService.class)
   public void testMapSideInput() {
@@ -263,6 +479,108 @@ public void processElement(ProcessContext c) {
     pipeline.run();
   }
 
+  @Test
+  @Category(RunnableOnService.class)
+  public void testEmptyMapSideInput() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Map<Integer, Integer>> view = pipeline
+        .apply("CreateEmptyView", Create.<KV<Integer, Integer>>of()
+            .withCoder(KvCoder.of(VarIntCoder.of(), VarIntCoder.of())))
+        .apply(View.<Integer, Integer>asMap());
+
+    PCollection<Integer> results = pipeline
+        .apply("Create1", Create.of(1))
+        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
+            new DoFn<Integer, Integer>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                assertTrue(c.sideInput(view).isEmpty());
+                c.output(c.element());
+              }
+            }));
+
+    // Pass at least one value through to guarantee that DoFn executes.
+    DataflowAssert.that(results).containsInAnyOrder(1);
+
+    pipeline.run();
+  }
+
+  @Test
+  public void testMapSideInputWithNullValuesCatchesDuplicates() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Map<String, Integer>> view = pipeline
+        .apply("CreateSideInput", Create.of(KV.of("a", (Integer) null), KV.of("a", (Integer) null))
+            .withCoder(KvCoder.of(StringUtf8Coder.of(), NullableCoder.of(VarIntCoder.of()))))
+        .apply(View.<String, Integer>asMap());
+
+    PCollection<KV<String, Integer>> output = pipeline
+        .apply("CreateMainInput", Create.of("apple", "banana", "blackberry"))
+        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
+            new DoFn<String, KV<String, Integer>>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                c.output(KV.of(c.element(), c.sideInput(view).get(c.element().substring(0, 1))));
+              }
+            }));
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder(KV.of("apple", 1),
+                            KV.of("banana", 3), KV.of("blackberry", 3));
+
+    // PipelineExecutionException is thrown with cause having a message stating that a
+    // duplicate is not allowed.
+    thrown.expectCause(ThrowableMessageMatcher.hasMessage(
+        Matchers.containsString("Duplicate values for a")));
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testMapSideInputIsImmutable() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Map<String, Integer>> view = pipeline
+        .apply("CreateSideInput", Create.of(KV.of("a", 1)))
+        .apply(View.<String, Integer>asMap());
+
+    PCollection<KV<String, Integer>> output = pipeline
+        .apply("CreateMainInput", Create.of("apple"))
+        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
+            new DoFn<String, KV<String, Integer>>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                try {
+                  c.sideInput(view).clear();
+                  fail("Expected UnsupportedOperationException on clear()");
+                } catch (UnsupportedOperationException expected) {
+                }
+                try {
+                  c.sideInput(view).put("c", 3);
+                  fail("Expected UnsupportedOperationException on put()");
+                } catch (UnsupportedOperationException expected) {
+                }
+                try {
+                  c.sideInput(view).remove("c");
+                  fail("Expected UnsupportedOperationException on remove()");
+                } catch (UnsupportedOperationException expected) {
+                }
+                try {
+                  c.sideInput(view).putAll(new HashMap<String, Integer>());
+                  fail("Expected UnsupportedOperationException on putAll()");
+                } catch (UnsupportedOperationException expected) {
+                }
+                c.output(KV.of(c.element(), c.sideInput(view).get(c.element().substring(0, 1))));
+              }
+            }));
+
+    // Pass at least one value through to guarantee that DoFn executes.
+    DataflowAssert.that(output).containsInAnyOrder(KV.of("apple", 1));
+
+    pipeline.run();
+  }
+
   @Test
   @Category(RunnableOnService.class)
   public void testCombinedMapSideInput() {

From aa3d6934a9b251344bad6986e36f20c6dafe8d2d Mon Sep 17 00:00:00 2001
From: tudorm <tudorm@google.com>
Date: Thu, 15 Oct 2015 11:21:22 -0700
Subject: [PATCH 1089/1541] Change the tolerance for the
 StateSamplerTest.basicTest and StateSamplerTest.nonScopedTest

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105526169
---
 .../dataflow/sdk/util/common/worker/StateSamplerTest.java     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
index 53777e1ff8ecc..3d16930750aa3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
@@ -68,7 +68,7 @@ public void basicTest() throws InterruptedException {
       long s2 = getCounterLongValue(counters, "test-2-msecs");
 
       long toleranceMs = periodMs;
-      assertTrue(s1 + s2 >= 4 * periodMs - toleranceMs);
+      assertTrue(s1 + s2 >= 1 * periodMs - toleranceMs);
       assertTrue(s1 + s2 <= 10 * periodMs + toleranceMs);
     }
   }
@@ -129,7 +129,7 @@ public void nonScopedTest() throws InterruptedException {
       long s = getCounterLongValue(counters, "test-1-msecs");
 
       assertTrue(s >= periodMs - tolerance);
-      assertTrue(s <= 4 * periodMs + tolerance);
+      assertTrue(s <= 5 * periodMs + tolerance);
     }
   }
 

From 834e429fbe1bef4bec189c035b421eb22bc0d26a Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Thu, 15 Oct 2015 11:42:23 -0700
Subject: [PATCH 1090/1541] Updates dynamic split request handling of
 FileBasedReader.

With this change, FileBasedReader will be able to perform dynamic splits requests based on fraction of work to be completed.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105528161
---
 .../sdk/runners/worker/FileBasedReader.java   | 48 ++++++++++++++++---
 .../sdk/runners/worker/TextReaderTest.java    | 37 ++++++++++++++
 2 files changed, 78 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
index c318c346c3082..2c0348bf9a91f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
@@ -210,16 +210,42 @@ public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest)
       ApproximateProgress splitProgress = splitRequestToApproximateProgress(splitRequest);
       com.google.api.services.dataflow.model.Position splitPosition = splitProgress.getPosition();
       if (splitPosition == null) {
-        LOG.warn("FileBasedReader only supports split at a Position. Requested: {}",
+        if (splitProgress.getPercentComplete() != null) {
+          float percentageComplete = splitProgress.getPercentComplete().floatValue();
+          if (percentageComplete <= 0 || percentageComplete >= 1) {
+            LOG.warn(
+                "FileBasedReader cannot be split since the provided percentage of "
+                + "work to be completed is out of the valid range (0, 1). Requested: {}",
+                splitRequest);
+          }
+
+          splitPosition = new com.google.api.services.dataflow.model.Position();
+          if (getEndOffset() == Long.MAX_VALUE) {
+            LOG.warn(
+                "FileBasedReader cannot be split since the end offset is set to Long.MAX_VALUE."
+                + " Requested: {}",
+                splitRequest);
+            return null;
+          }
+
+          splitPosition.setByteOffset(
+              rangeTracker.getPositionForFractionConsumed(percentageComplete));
+        } else {
+          LOG.warn(
+              "FileBasedReader requires either a position or percentage of work to be complete to"
+              + " perform a dynamic split request. Requested: {}",
+              splitRequest);
+          return null;
+        }
+      } else if (splitPosition.getByteOffset() == null) {
+        LOG.warn(
+            "FileBasedReader cannot be split since the provided split position "
+            + "does not contain a valid offset. Requested: {}",
             splitRequest);
         return null;
       }
       Long splitOffset = splitPosition.getByteOffset();
-      if (splitOffset == null) {
-        LOG.warn("FileBasedReader only supports split at byte offset. Requested: {}",
-            splitPosition);
-        return null;
-      }
+
       if (rangeTracker.trySplitAtPosition(splitOffset)) {
         return new DynamicSplitResultWithPosition(cloudPositionToReaderPosition(splitPosition));
       } else {
@@ -229,12 +255,20 @@ public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest)
 
     /**
      * Returns the end offset of the iterator or Long.MAX_VALUE if unspecified.
-     * The method is called for test ONLY.
+     * This method is called for test ONLY.
      */
     long getEndOffset() {
       return rangeTracker.getStopPosition();
     }
 
+    /**
+     * Returns the start offset of the iterator.
+     * This method is called for test ONLY.
+     */
+    long getStartOffset() {
+      return rangeTracker.getStartPosition();
+    }
+
     @Override
     public void close() throws IOException {
       stream.close();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
index 84fd9ca5baffc..60be604942976 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
@@ -18,6 +18,7 @@
 
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromSplitResult;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtByteOffset;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtFraction;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static org.hamcrest.Matchers.containsInAnyOrder;
@@ -506,6 +507,42 @@ public void testUpdateStopPosition() throws Exception {
       }
     }
 
+    // Update based on fraction.
+    {
+      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 0L, fileSize,
+          new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
+      ExecutorTestUtils.TestReaderObserver observer =
+          new ExecutorTestUtils.TestReaderObserver(textReader);
+
+      try (TextReader<String>.TextFileIterator iterator =
+          (TextReader<String>.TextFileIterator) textReader.iterator()) {
+        // Poke the iterator so we can test dynamic splitting.
+        assertTrue(iterator.hasNext());
+
+        // Trying to split at 0 or 1 will fail.
+        assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(0)));
+        assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(1)));
+
+        // must be less than or equal to (size of first two lines / size of file) for this test to
+        // pass.
+        float splitPos = 0.61f;
+
+        long stopPosition = (long) Math.ceil(fileSize * splitPos);
+        assertEquals(fileSize, iterator.getEndOffset());
+        assertEquals(
+            Long.valueOf(stopPosition),
+            positionFromSplitResult(iterator.requestDynamicSplit(splitRequestAtFraction(splitPos)))
+                .getByteOffset());
+        assertEquals(stopPosition, iterator.getEndOffset());
+        assertEquals(fileContent[0], iterator.next());
+        assertEquals(fileContent[1], iterator.next());
+        assertFalse(iterator.hasNext());
+        assertEquals(
+            Arrays.asList(fileContent[0].length(), fileContent[1].length()),
+            observer.getActualSizes());
+      }
+    }
+
     // Proposed stop position is before the current position, no update.
     {
       TextReader<String> textReader = new TextReader<>(

From dc80e6a0a473a05f114ddc368cd8a8b25750536e Mon Sep 17 00:00:00 2001
From: chernyak <chernyak@google.com>
Date: Thu, 15 Oct 2015 15:51:43 -0700
Subject: [PATCH 1091/1541] Add a byte limit to the GetWork windmill API call

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105551064
---
 sdk/src/main/proto/windmill.proto | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index cb3a86ec15fb4..08890610d5cdd 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -143,6 +143,7 @@ message ComputationWorkItems {
 message GetWorkRequest {
   required fixed64 client_id = 1;
   optional int64 max_items = 2 [default = 0xffffffff];
+  optional int64 max_bytes = 3 [default = 0x7fffffffffffffff];
 }
 
 message GetWorkResponse {

From 85604f73b226f7c5ecea80da9bdb3830527758f1 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 16 Oct 2015 09:41:03 -0700
Subject: [PATCH 1092/1541] Test for AvroCoder for generic type with explicit
 schema

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105608026
---
 .../dataflow/sdk/coders/AvroCoderTest.java    | 43 +++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
index 0eef5d15e5a57..db6e9449abb46 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
@@ -65,6 +66,7 @@
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import java.util.SortedMap;
 import java.util.SortedSet;
 import java.util.TreeMap;
@@ -708,4 +710,45 @@ public void testNullableNonDeterministicField() {
         reasonField(UnorderedMapClass.class, "mapField",
             " may not be deterministically ordered"));
   }
+
+  /**
+   * Tests that a parameterized class can have an automatically generated schema if the generic
+   * field is annotated with a union tag.
+   */
+  @Test
+  public void testGenericClassWithUnionAnnotation() throws Exception {
+    // Cast is safe as long as the same coder is used for encoding and decoding.
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    AvroCoder<GenericWithAnnotation<String>> coder =
+        (AvroCoder) AvroCoder.of(GenericWithAnnotation.class);
+
+    assertThat(coder.getSchema().getField("onlySomeTypesAllowed").schema().getType(),
+        equalTo(Schema.Type.UNION));
+
+    CoderProperties.coderDecodeEncodeEqual(coder, new GenericWithAnnotation<>("hello"));
+  }
+
+  private static class GenericWithAnnotation<T> {
+    @AvroSchema("[\"string\", \"int\"]")
+    private T onlySomeTypesAllowed;
+
+    public GenericWithAnnotation(T value) {
+      onlySomeTypesAllowed = value;
+    }
+
+    // For deserialization only
+    @SuppressWarnings("unused")
+    protected GenericWithAnnotation() { }
+
+    @Override
+    public boolean equals(Object other) {
+      return other instanceof GenericWithAnnotation
+          && onlySomeTypesAllowed.equals(((GenericWithAnnotation<?>) other).onlySomeTypesAllowed);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(getClass(), onlySomeTypesAllowed);
+    }
+  }
 }

From 64be88788c673c4f46dddb2897d3e7a89aaaffce Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Fri, 16 Oct 2015 12:06:35 -0700
Subject: [PATCH 1093/1541] Add compact operation to [Keyed]CombineFn

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105622079
---
 .../sdk/runners/DirectPipelineRunner.java     |  6 ++-
 .../worker/GroupAlsoByWindowsParDoFn.java     | 11 ++--
 .../worker/MapTaskExecutorFactory.java        |  5 ++
 .../worker/WindmillStateInternals.java        |  2 +-
 .../dataflow/sdk/transforms/Combine.java      | 53 +++++++++++++++++++
 .../worker/PartialGroupByKeyOperation.java    | 15 ++++++
 .../google/cloud/dataflow/sdk/TestUtils.java  |  4 ++
 .../PartialGroupByKeyOperationTest.java       | 10 ++++
 8 files changed, 98 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 0a06011db078d..0bb5b3750815d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -336,8 +336,12 @@ public static <K, AccumT, InputT> List<AccumT> addInputsRandomly(
         // create a new accumulator after index i is added, i.e. [0]
         // is guaranteed, [1] is an even 1/2, [2] is 1/4, etc. The
         // goal is to partition the inputs into accumulators, and make
-        // the accumulators potentially lumpy.
+        // the accumulators potentially lumpy.  Also compact about half
+        // of the accumulators.
         if (i == 0 || random.nextInt(1 << Math.min(i, 30)) == 0) {
+          if (i % 2 == 0) {
+            accumulator = fn.compact(key, accumulator);
+          }
           out.add(accumulator);
           accumulator = fn.createAccumulator(key);
           hasInput = false;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index cc2213022603b..4bb468f290c14 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -215,18 +215,17 @@ public List<AccumT> createAccumulator(K key) {
     @Override
     public List<AccumT> addInput(K key, List<AccumT> accumulator, AccumT input) {
       accumulator.add(input);
-      // TODO: Buffer more once we have compaction operation.
-      if (accumulator.size() > 1) {
-        return mergeToSingleton(key, accumulator);
-      } else {
-        return accumulator;
-      }
+      return accumulator;
     }
     @Override
     public List<AccumT> mergeAccumulators(K key, Iterable<List<AccumT>> accumulators) {
       return mergeToSingleton(key, Iterables.concat(accumulators));
     }
     @Override
+    public List<AccumT> compact(K key, List<AccumT> accumulator) {
+      return mergeToSingleton(key, accumulator);
+    }
+    @Override
     public AccumT extractOutput(K key, List<AccumT> accumulator) {
       if (accumulator.size() == 0) {
         return appliedCombineFn.getFn().createAccumulator(key);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index 219ce2d547f75..a758e8b6f7abe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -346,6 +346,11 @@ public AccumT merge(WindowedValue<K> windowedKey, Iterable<AccumT> accumulators)
       return this.combineFn.mergeAccumulators(windowedKey.getValue(), accumulators);
     }
 
+    @Override
+    public AccumT compact(WindowedValue<K> windowedKey, AccumT accumulator) {
+      return this.combineFn.compact(windowedKey.getValue(), accumulator);
+    }
+
     @Override
     public OutputT extract(WindowedValue<K> windowedKey, AccumT accumulator) {
       return this.combineFn.extractOutput(windowedKey.getValue(), accumulator);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
index 74977d6159ec4..c91644d77aa72 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
@@ -497,7 +497,7 @@ public void clear() {
     @Override
     public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) throws IOException {
       if (hasLocalAdditions) {
-        bag.add(localAdditionsAccum);
+        bag.add(combineFn.compact(localAdditionsAccum));
         localAdditionsAccum = combineFn.createAccumulator();
         hasLocalAdditions = false;
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index f37c05cc3bc99..85811464417d8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -377,6 +377,21 @@ public abstract static class CombineFn<InputT, AccumT, OutputT> implements Seria
      */
     public abstract OutputT extractOutput(AccumT accumulator);
 
+    /**
+     * Returns an accumulator that represents the same logical value as the
+     * input accumulator, but may have a more compact representation.
+     * For most CombineFns this would be a no-op, but should be overridden
+     * by CombineFns that (for example) buffer up elements and combine
+     * them in batches.
+     *
+     * <p>For efficiency, the input accumulator may be modified and returned.
+     *
+     * <p>By default returns the original accumulator.
+     */
+    public AccumT compact(AccumT accumulator) {
+      return accumulator;
+    }
+
     /**
      * Applies this {@code CombineFn} to a collection of input values
      * to produce a combined output value.
@@ -495,6 +510,11 @@ public OutputT extractOutput(K key, AccumT accumulator) {
           return CombineFn.this.extractOutput(accumulator);
         }
 
+        @Override
+        public AccumT compact(K key, AccumT accumulator) {
+          return CombineFn.this.compact(accumulator);
+        }
+
         @Override
         public Coder<AccumT> getAccumulatorCoder(
             CoderRegistry registry, Coder<K> keyCoder, Coder<InputT> inputCoder)
@@ -1114,6 +1134,21 @@ public abstract static class KeyedCombineFn<K, InputT, AccumT, OutputT>
      */
     public abstract OutputT extractOutput(K key, AccumT accumulator);
 
+    /**
+     * Returns an accumulator that represents the same logical value as the
+     * input accumulator, but may have a more compact representation.
+     * For most CombineFns this would be a no-op, but should be overridden
+     * by CombineFns that (for example) buffer up elements and combine
+     * them in batches.
+     *
+     * <p>For efficiency, the input accumulator may be modified and returned.
+     *
+     * <p>By default returns the original accumulator.
+     */
+    public AccumT compact(K key, AccumT accumulator) {
+      return accumulator;
+    }
+
     /**
      * Returns the a regular {@link CombineFn} that operates on a specific key.
      */
@@ -1140,6 +1175,11 @@ public OutputT extractOutput(AccumT accumulator) {
           return KeyedCombineFn.this.extractOutput(key, accumulator);
         }
 
+        @Override
+        public AccumT compact(AccumT accumulator) {
+          return KeyedCombineFn.this.compact(key, accumulator);
+        }
+
         @Override
         public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<InputT> inputCoder)
             throws CannotProvideCoderException {
@@ -1546,6 +1586,11 @@ public V extractOutput(List<V> accumulator) {
       return combiner.apply(accumulator);
     }
 
+    @Override
+    public List<V> compact(List<V> accumulator) {
+      return accumulator.size() > 1 ? mergeToSingleton(accumulator) : accumulator;
+    }
+
     private List<V> mergeToSingleton(Iterable<V> values) {
       List<V> singleton = new ArrayList<>();
       singleton.add(combiner.apply(values));
@@ -1757,6 +1802,10 @@ public AccumT mergeAccumulators(
               return fn.mergeAccumulators(key.getKey(), accumulators);
             }
             @Override
+            public AccumT compact(KV<K, Integer> key, AccumT accumulator) {
+              return fn.compact(key.getKey(), accumulator);
+            }
+            @Override
             public AccumT extractOutput(KV<K, Integer> key, AccumT accumulator) {
               return accumulator;
             }
@@ -1788,6 +1837,10 @@ public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators) {
               return fn.mergeAccumulators(key, accumulators);
             }
             @Override
+            public AccumT compact(K key, AccumT accumulator) {
+              return fn.compact(key, accumulator);
+            }
+            @Override
             public OutputT extractOutput(K key, AccumT accumulator) {
               return fn.extractOutput(key, accumulator);
             }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
index d5e3945ee7d5e..5f8f9d32fe108 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
@@ -63,6 +63,7 @@ public interface Combiner<K, InputT, AccumT, OutputT> {
     public AccumT createAccumulator(K key);
     public AccumT add(K key, AccumT accumulator, InputT value);
     public AccumT merge(K key, Iterable<AccumT> accumulators);
+    public AccumT compact(K key, AccumT accumulator);
     public OutputT extract(K key, AccumT accumulator);
   }
 
@@ -298,6 +299,7 @@ interface GroupingTableEntry<K, InputT, AccumT> {
       public AccumT getValue();
       public void add(InputT value) throws Exception;
       public long getSize();
+      public void compact() throws Exception;
     }
 
     public abstract GroupingTableEntry<K, InputT, AccumT> createTableEntry(K key) throws Exception;
@@ -354,6 +356,7 @@ public void put(K key, InputT value, Receiver receiver) throws Exception {
      */
     private void output(GroupingTableEntry<K, InputT, AccumT> entry, Receiver receiver)
         throws Exception {
+      entry.compact();
       receiver.process(pairInfo.makeOutputPair(entry.getKey(), entry.getValue()));
     }
 
@@ -409,6 +412,9 @@ public long getSize() {
           return size;
         }
 
+        @Override
+        public void compact() { }
+
         @Override
         public void add(V value) throws Exception {
           values.add(value);
@@ -462,6 +468,15 @@ public long getSize() {
           return keySize + accumulatorSize;
         }
 
+        @Override
+        public void compact() throws Exception {
+          AccumT newAccumulator = combiner.compact(key, accumulator);
+          if (newAccumulator != accumulator) {
+            accumulator = newAccumulator;
+            accumulatorSize = accumulatorSizer.estimateSize(newAccumulator);
+          }
+        }
+
         @Override
         public void add(InputT value) throws Exception {
           accumulator = combiner.add(key, accumulator, value);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
index cdf07ca9ecc15..257ecbbd4e338 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
@@ -174,11 +174,15 @@ private static <InputT, AccumT, OutputT> void checkCombineFnShardsInternal(
       Iterable<? extends Iterable<InputT>> shards,
       Matcher<? super OutputT> matcher) {
     List<AccumT> accumulators = new ArrayList<>();
+    int maybeCompact = 0;
     for (Iterable<InputT> shard : shards) {
       AccumT accumulator = fn.createAccumulator();
       for (InputT elem : shard) {
         accumulator = fn.addInput(accumulator, elem);
       }
+      if (maybeCompact++ % 2 == 0) {
+        accumulator = fn.compact(accumulator);
+      }
       accumulators.add(accumulator);
     }
     AccumT merged = fn.mergeAccumulators(accumulators);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
index 7bee96910990d..496632ba9a5fc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
@@ -164,6 +164,11 @@ public Integer merge(WindowedValue<String> key, Iterable<Integer> accumulators)
         return sum;
       }
 
+      @Override
+      public Integer compact(WindowedValue<String> key, Integer accumulator) {
+        return accumulator;
+      }
+
       @Override
       public Integer extract(WindowedValue<String> key, Integer accumulator) {
         return accumulator;
@@ -326,6 +331,11 @@ public Long merge(Object key, Iterable<Long> accumulators) {
         return sum;
       }
 
+      @Override
+      public Long compact(Object key, Long accumulator) {
+        return accumulator;
+      }
+
       @Override
       public Long extract(Object key, Long accumulator) {
         return accumulator;

From b9d8c5a30511e01e24842bf76bd65af913658ca0 Mon Sep 17 00:00:00 2001
From: chernyak <chernyak@google.com>
Date: Fri, 16 Oct 2015 12:15:19 -0700
Subject: [PATCH 1094/1541] Byte-limited fetches for TagListState

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105622807
---
 sdk/src/main/proto/windmill.proto | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index 08890610d5cdd..5165c2bfd2b5d 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -91,6 +91,14 @@ message TagList {
   optional int64 end_timestamp = 2 [default=-0x8000000000000000];
   repeated Value values = 3;
   optional string state_family = 4;
+
+  // For a TagList fetch request, attempt to limit the size of each fetched tag
+  // list to this byte limit.
+  optional int64 fetch_max_bytes = 6 [default =  0x7fffffffffffffff];
+  // The continuation token is returned whenever partial results are returned
+  // due to fetch byte limiting. Subsequent requests should set the returned
+  // continuation token to retrieve the next set of tag list values.
+  optional bytes continuation_token = 5;
 }
 
 message GlobalDataId {

From 742377fe35f37cf693f42da4aa8be79af1a3c950 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Fri, 16 Oct 2015 16:25:59 -0700
Subject: [PATCH 1095/1541] PackageUtilTest: make OS/JDK-independent

These tests were failing for packaging directories on a Mac OS X
laptop with newer JDK and OS than is tested by Travis-CI. We should
not necessarily expect the hashes of zipped directories to be the
same on all operating systems, though we should expect the directory
to hash the same way twice on the same machine using the same JDK.

Switch the tests to not check exact hashes, but also add a test that
hashes that should be consistently the same are, and hashes that
should be different are.

Also clean up folder/file creation, and add a few semantically
missing tests.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105644478
---
 .../dataflow/sdk/util/PackageUtilTest.java    | 261 +++++++++---------
 1 file changed, 129 insertions(+), 132 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
index 5c5055860fd2e..7b43d7602ceed 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotEquals;
@@ -58,6 +59,8 @@
 import com.google.common.io.Files;
 import com.google.common.io.LineReader;
 
+import org.hamcrest.BaseMatcher;
+import org.hamcrest.Description;
 import org.hamcrest.Matchers;
 import org.junit.Before;
 import org.junit.Rule;
@@ -78,6 +81,7 @@
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
+import java.util.regex.Pattern;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;
 
@@ -94,6 +98,35 @@ public class PackageUtilTest {
   @Mock
   GcsUtil mockGcsUtil;
 
+  // 128 bits, base64 encoded is 171 bits, rounds to 22 bytes
+  private static final String HASH_PATTERN = "[a-zA-Z0-9+-]{22}";
+
+  // Hamcrest matcher to assert a string matches a pattern
+  private static class RegexMatcher extends BaseMatcher<String> {
+    private final Pattern pattern;
+
+    public RegexMatcher(String regex) {
+      this.pattern = Pattern.compile(regex);
+    }
+
+    @Override
+    public boolean matches(Object o) {
+      if (!(o instanceof String)) {
+        return false;
+      }
+      return pattern.matcher((String) o).matches();
+    }
+
+    @Override
+    public void describeTo(Description description) {
+      description.appendText(String.format("matches regular expression %s", pattern));
+    }
+
+    public static RegexMatcher matches(String regex) {
+      return new RegexMatcher(regex);
+    }
+  }
+
   @Before
   public void setUp() {
     MockitoAnnotations.initMocks(this);
@@ -104,109 +137,94 @@ public void setUp() {
     IOChannelUtils.registerStandardIOFactories(pipelineOptions);
   }
 
-  @Test
-  public void testPackageNamingWithFileHavingExtension() throws Exception {
-    File tmpFile = tmpFolder.newFile("file.txt");
-    String contents = "This is a test!";
+  private File makeFileWithContents(String name, String contents) throws Exception {
+    File tmpFile = tmpFolder.newFile(name);
     Files.write(contents, tmpFile, StandardCharsets.UTF_8);
-    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    tmpFile.setLastModified(0);  // required for determinism with directories
+    return tmpFile;
+  }
+
+  static final String STAGING_PATH = GcsPath.fromComponents("somebucket", "base/path").toString();
+  private static PackageAttributes makePackageAttributes(File file, String overridePackageName) {
+    return PackageUtil.createPackageAttributes(file, STAGING_PATH, overridePackageName);
+  }
 
-    PackageAttributes attr =
-        PackageUtil.createPackageAttributes(tmpFile, gcsStaging.toString(), null);
+  @Test
+  public void testFileWithExtensionPackageNamingAndSize() throws Exception {
+    String contents = "This is a test!";
+    File tmpFile = makeFileWithContents("file.txt", contents);
+    PackageAttributes attr = makePackageAttributes(tmpFile, null);
     DataflowPackage target = attr.getDataflowPackage();
 
-    assertEquals("file-cC7coLIYHBXUV-rKw53jmw.txt", target.getName());
-    assertEquals("gs://somebucket/base/path/file-cC7coLIYHBXUV-rKw53jmw.txt",
-        target.getLocation());
-    assertEquals(contents.length(), attr.getSize());
+    assertThat(target.getName(), RegexMatcher.matches("file-" + HASH_PATTERN + ".txt"));
+    assertThat(target.getLocation(), equalTo(STAGING_PATH + '/' + target.getName()));
+    assertThat(attr.getSize(), equalTo((long) contents.length()));
   }
 
   @Test
-  public void testPackageNamingWithFileMissingExtension() throws Exception {
-    File tmpFile = tmpFolder.newFile("file");
-    Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
-    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
-
-    PackageAttributes target =
-        PackageUtil.createPackageAttributes(tmpFile, gcsStaging.toString(), null);
+  public void testPackageNamingWithFileNoExtension() throws Exception {
+    File tmpFile = makeFileWithContents("file", "This is a test!");
+    DataflowPackage target = makePackageAttributes(tmpFile, null).getDataflowPackage();
 
-    assertEquals("file-cC7coLIYHBXUV-rKw53jmw", target.getDataflowPackage().getName());
-    assertEquals("gs://somebucket/base/path/file-cC7coLIYHBXUV-rKw53jmw",
-        target.getDataflowPackage().getLocation());
+    assertThat(target.getName(), RegexMatcher.matches("file-" + HASH_PATTERN));
+    assertThat(target.getLocation(), equalTo(STAGING_PATH + '/' + target.getName()));
   }
 
   @Test
   public void testPackageNamingWithDirectory() throws Exception {
     File tmpDirectory = tmpFolder.newFolder("folder");
-    File tmpFile = tmpFolder.newFile("folder/file.txt");
-    Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
-    tmpFile.setLastModified(0);
-    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    DataflowPackage target = makePackageAttributes(tmpDirectory, null).getDataflowPackage();
 
-    PackageAttributes attr =
-        PackageUtil.createPackageAttributes(tmpDirectory, gcsStaging.toString(), null);
-    DataflowPackage target = attr.getDataflowPackage();
+    assertThat(target.getName(), RegexMatcher.matches("folder-" + HASH_PATTERN + ".zip"));
+    assertThat(target.getLocation(), equalTo(STAGING_PATH + '/' + target.getName()));
+  }
+
+  @Test
+  public void testPackageNamingWithFilesHavingSameContentsAndSameNames() throws Exception {
+    File tmpDirectory1 = tmpFolder.newFolder("folder1", "folderA");
+    makeFileWithContents("folder1/folderA/sameName", "This is a test!");
+    DataflowPackage target1 = makePackageAttributes(tmpDirectory1, null).getDataflowPackage();
 
-    assertEquals("folder-5n8NFLL1nYzz4BJ5C4t3rA.zip", target.getName());
-    assertEquals("gs://somebucket/base/path/folder-5n8NFLL1nYzz4BJ5C4t3rA.zip",
-                 target.getLocation());
-    assertEquals(145L, attr.getSize());
+    File tmpDirectory2 = tmpFolder.newFolder("folder2", "folderA");
+    makeFileWithContents("folder2/folderA/sameName", "This is a test!");
+    DataflowPackage target2 = makePackageAttributes(tmpDirectory2, null).getDataflowPackage();
+
+    assertEquals(target1.getName(), target2.getName());
+    assertEquals(target1.getLocation(), target2.getLocation());
   }
 
   @Test
   public void testPackageNamingWithFilesHavingSameContentsButDifferentNames() throws Exception {
-    tmpFolder.newFolder("folder1");
-    File tmpDirectory1 = tmpFolder.newFolder("folder1/folderA");
-    File tmpFile1 = tmpFolder.newFile("folder1/folderA/uniqueName1");
-    Files.write("This is a test!", tmpFile1, StandardCharsets.UTF_8);
-
-    tmpFolder.newFolder("folder2");
-    File tmpDirectory2 = tmpFolder.newFolder("folder2/folderA");
-    File tmpFile2 = tmpFolder.newFile("folder2/folderA/uniqueName2");
-    Files.write("This is a test!", tmpFile2, StandardCharsets.UTF_8);
-
-    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
-
-    PackageAttributes target1 =
-        PackageUtil.createPackageAttributes(tmpDirectory1, gcsStaging.toString(), null);
-    PackageAttributes target2 =
-        PackageUtil.createPackageAttributes(tmpDirectory2, gcsStaging.toString(), null);
-
-    assertNotEquals(target1.getDataflowPackage().getName(),
-        target2.getDataflowPackage().getName());
-    assertNotEquals(target1.getDataflowPackage().getLocation(),
-        target2.getDataflowPackage().getLocation());
+    File tmpDirectory1 = tmpFolder.newFolder("folder1", "folderA");
+    makeFileWithContents("folder1/folderA/uniqueName1", "This is a test!");
+    DataflowPackage target1 = makePackageAttributes(tmpDirectory1, null).getDataflowPackage();
+
+    File tmpDirectory2 = tmpFolder.newFolder("folder2", "folderA");
+    makeFileWithContents("folder2/folderA/uniqueName2", "This is a test!");
+    DataflowPackage target2 = makePackageAttributes(tmpDirectory2, null).getDataflowPackage();
+
+    assertNotEquals(target1.getName(), target2.getName());
+    assertNotEquals(target1.getLocation(), target2.getLocation());
   }
 
   @Test
   public void testPackageNamingWithDirectoriesHavingSameContentsButDifferentNames()
       throws Exception {
-    tmpFolder.newFolder("folder1");
-    File tmpDirectory1 = tmpFolder.newFolder("folder1/folderA");
-    tmpFolder.newFolder("folder1/folderA/uniqueName1");
-
-    tmpFolder.newFolder("folder2");
-    File tmpDirectory2 = tmpFolder.newFolder("folder2/folderA");
-    tmpFolder.newFolder("folder2/folderA/uniqueName2");
+    File tmpDirectory1 = tmpFolder.newFolder("folder1", "folderA");
+    tmpFolder.newFolder("folder1", "folderA", "uniqueName1");
+    DataflowPackage target1 = makePackageAttributes(tmpDirectory1, null).getDataflowPackage();
 
-    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    File tmpDirectory2 = tmpFolder.newFolder("folder2", "folderA");
+    tmpFolder.newFolder("folder2", "folderA", "uniqueName2");
+    DataflowPackage target2 = makePackageAttributes(tmpDirectory2, null).getDataflowPackage();
 
-    PackageAttributes target1 =
-        PackageUtil.createPackageAttributes(tmpDirectory1, gcsStaging.toString(), null);
-    PackageAttributes target2 =
-        PackageUtil.createPackageAttributes(tmpDirectory2, gcsStaging.toString(), null);
-
-    assertNotEquals(target1.getDataflowPackage().getName(),
-        target2.getDataflowPackage().getName());
-    assertNotEquals(target1.getDataflowPackage().getLocation(),
-        target2.getDataflowPackage().getLocation());
+    assertNotEquals(target1.getName(), target2.getName());
+    assertNotEquals(target1.getLocation(), target2.getLocation());
   }
 
   @Test
-  public void testPackageUploadWithLargeClasspathLogsWarning() throws IOException {
-    File tmpFile = tmpFolder.newFile("file.txt");
-    Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
-    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+  public void testPackageUploadWithLargeClasspathLogsWarning() throws Exception {
+    File tmpFile = makeFileWithContents("file.txt", "This is a test!");
     // all files will be present and cached so no upload needed.
     when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(tmpFile.length());
 
@@ -216,7 +234,7 @@ public void testPackageUploadWithLargeClasspathLogsWarning() throws IOException
       classpathElements.add(eltName + '=' + tmpFile.getAbsolutePath());
     }
 
-    PackageUtil.stageClasspathElements(classpathElements, gcsStaging.toString());
+    PackageUtil.stageClasspathElements(classpathElements, STAGING_PATH);
 
     logged.verifyWarn("Your classpath contains 1005 elements, which Google Cloud Dataflow");
   }
@@ -224,46 +242,41 @@ public void testPackageUploadWithLargeClasspathLogsWarning() throws IOException
   @Test
   public void testPackageUploadWithFileSucceeds() throws Exception {
     Pipe pipe = Pipe.open();
-    File tmpFile = tmpFolder.newFile("file.txt");
-    Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
-    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    String contents = "This is a test!";
+    File tmpFile = makeFileWithContents("file.txt", contents);
     when(mockGcsUtil.fileSize(any(GcsPath.class)))
         .thenThrow(new FileNotFoundException("some/path"));
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
-        ImmutableList.of(tmpFile.getAbsolutePath()), gcsStaging.toString());
+        ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH);
     DataflowPackage target = Iterables.getOnlyElement(targets);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
     verify(mockGcsUtil).create(any(GcsPath.class), anyString());
     verifyNoMoreInteractions(mockGcsUtil);
 
-    assertEquals("file-cC7coLIYHBXUV-rKw53jmw.txt", target.getName());
-    assertEquals("gs://somebucket/base/path/file-cC7coLIYHBXUV-rKw53jmw.txt",
-        target.getLocation());
-    assertEquals("This is a test!",
-        new LineReader(Channels.newReader(pipe.source(), "UTF-8")).readLine());
+    assertThat(target.getName(), RegexMatcher.matches("file-" + HASH_PATTERN + ".txt"));
+    assertThat(target.getLocation(), equalTo(STAGING_PATH + '/' + target.getName()));
+    assertThat(new LineReader(Channels.newReader(pipe.source(), "UTF-8")).readLine(),
+        equalTo(contents));
   }
 
   @Test
   public void testPackageUploadWithDirectorySucceeds() throws Exception {
     Pipe pipe = Pipe.open();
     File tmpDirectory = tmpFolder.newFolder("folder");
-    tmpFolder.newFolder("folder/empty_directory");
-    tmpFolder.newFolder("folder/directory");
-    File tmpFile1 = tmpFolder.newFile("folder/file.txt");
-    File tmpFile2 = tmpFolder.newFile("folder/directory/file.txt");
-    Files.write("This is a test!", tmpFile1, StandardCharsets.UTF_8);
-    Files.write("This is also a test!", tmpFile2, StandardCharsets.UTF_8);
-
-    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    tmpFolder.newFolder("folder", "empty_directory");
+    tmpFolder.newFolder("folder", "directory");
+    makeFileWithContents("folder/file.txt", "This is a test!");
+    makeFileWithContents("folder/directory/file.txt", "This is also a test!");
+
     when(mockGcsUtil.fileSize(any(GcsPath.class)))
         .thenThrow(new FileNotFoundException("some/path"));
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     PackageUtil.stageClasspathElements(
-        ImmutableList.of(tmpDirectory.getAbsolutePath()), gcsStaging.toString());
+        ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
     verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -285,30 +298,26 @@ public void testPackageUploadWithEmptyDirectorySucceeds() throws Exception {
     Pipe pipe = Pipe.open();
     File tmpDirectory = tmpFolder.newFolder("folder");
 
-    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
     when(mockGcsUtil.fileSize(any(GcsPath.class)))
         .thenThrow(new FileNotFoundException("some/path"));
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
-        ImmutableList.of(tmpDirectory.getAbsolutePath()), gcsStaging.toString());
+        ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH);
     DataflowPackage target = Iterables.getOnlyElement(targets);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
     verify(mockGcsUtil).create(any(GcsPath.class), anyString());
     verifyNoMoreInteractions(mockGcsUtil);
 
-    assertEquals("folder-ds2yutlYLSPB9vTYaCGNbA.zip", target.getName());
-    assertEquals("gs://somebucket/base/path/folder-ds2yutlYLSPB9vTYaCGNbA.zip",
-        target.getLocation());
+    assertThat(target.getName(), RegexMatcher.matches("folder-" + HASH_PATTERN + ".zip"));
+    assertThat(target.getLocation(), equalTo(STAGING_PATH + '/' + target.getName()));
     assertNull(new ZipInputStream(Channels.newInputStream(pipe.source())).getNextEntry());
   }
 
   @Test(expected = RuntimeException.class)
   public void testPackageUploadFailsWhenIOExceptionThrown() throws Exception {
-    File tmpFile = tmpFolder.newFile("file.txt");
-    Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
-    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    File tmpFile = makeFileWithContents("file.txt", "This is a test!");
     when(mockGcsUtil.fileSize(any(GcsPath.class)))
         .thenThrow(new FileNotFoundException("some/path"));
     when(mockGcsUtil.create(any(GcsPath.class), anyString()))
@@ -317,7 +326,7 @@ public void testPackageUploadFailsWhenIOExceptionThrown() throws Exception {
     try {
       PackageUtil.stageClasspathElements(
           ImmutableList.of(tmpFile.getAbsolutePath()),
-          gcsStaging.toString(), fastNanoClockAndSleeper);
+          STAGING_PATH, fastNanoClockAndSleeper);
     } finally {
       verify(mockGcsUtil).fileSize(any(GcsPath.class));
       verify(mockGcsUtil, times(5)).create(any(GcsPath.class), anyString());
@@ -327,20 +336,18 @@ public void testPackageUploadFailsWhenIOExceptionThrown() throws Exception {
 
   @Test
   public void testPackageUploadFailsWithPermissionsErrorGivesDetailedMessage() throws Exception {
-    File tmpFile = tmpFolder.newFile("file.txt");
-    Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
-    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    File tmpFile = makeFileWithContents("file.txt", "This is a test!");
     when(mockGcsUtil.fileSize(any(GcsPath.class)))
         .thenThrow(new FileNotFoundException("some/path"));
     when(mockGcsUtil.create(any(GcsPath.class), anyString()))
-        .thenThrow(new IOException("Failed to write to GCS path " + gcsStaging,
+        .thenThrow(new IOException("Failed to write to GCS path " + STAGING_PATH,
             googleJsonResponseException(
                 HttpStatusCodes.STATUS_CODE_FORBIDDEN, "Permission denied", "Test message")));
 
     try {
       PackageUtil.stageClasspathElements(
           ImmutableList.of(tmpFile.getAbsolutePath()),
-          gcsStaging.toString(), fastNanoClockAndSleeper);
+          STAGING_PATH, fastNanoClockAndSleeper);
       fail("Expected RuntimeException");
     } catch (RuntimeException e) {
       assertTrue("Expected IOException containing detailed message.",
@@ -360,9 +367,7 @@ public void testPackageUploadFailsWithPermissionsErrorGivesDetailedMessage() thr
   @Test
   public void testPackageUploadEventuallySucceeds() throws Exception {
     Pipe pipe = Pipe.open();
-    File tmpFile = tmpFolder.newFile("file.txt");
-    Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
-    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    File tmpFile = makeFileWithContents("file.txt", "This is a test!");
     when(mockGcsUtil.fileSize(any(GcsPath.class)))
         .thenThrow(new FileNotFoundException("some/path"));
     when(mockGcsUtil.create(any(GcsPath.class), anyString()))
@@ -372,7 +377,7 @@ public void testPackageUploadEventuallySucceeds() throws Exception {
     try {
       PackageUtil.stageClasspathElements(
                                               ImmutableList.of(tmpFile.getAbsolutePath()),
-                                              gcsStaging.toString(),
+                                              STAGING_PATH,
                                               fastNanoClockAndSleeper);
     } finally {
       verify(mockGcsUtil).fileSize(any(GcsPath.class));
@@ -383,13 +388,11 @@ public void testPackageUploadEventuallySucceeds() throws Exception {
 
   @Test
   public void testPackageUploadIsSkippedWhenFileAlreadyExists() throws Exception {
-    File tmpFile = tmpFolder.newFile("file.txt");
-    Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
-    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    File tmpFile = makeFileWithContents("file.txt", "This is a test!");
     when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(tmpFile.length());
 
     PackageUtil.stageClasspathElements(
-        ImmutableList.of(tmpFile.getAbsolutePath()), gcsStaging.toString());
+        ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
     verifyNoMoreInteractions(mockGcsUtil);
@@ -399,18 +402,15 @@ public void testPackageUploadIsSkippedWhenFileAlreadyExists() throws Exception {
   public void testPackageUploadIsNotSkippedWhenSizesAreDifferent() throws Exception {
     Pipe pipe = Pipe.open();
     File tmpDirectory = tmpFolder.newFolder("folder");
-    tmpFolder.newFolder("folder/empty_directory");
-    tmpFolder.newFolder("folder/directory");
-    File tmpFile1 = tmpFolder.newFile("folder/file.txt");
-    File tmpFile2 = tmpFolder.newFile("folder/directory/file.txt");
-    Files.write("This is a test!", tmpFile1, StandardCharsets.UTF_8);
-    Files.write("This is also a test!", tmpFile2, StandardCharsets.UTF_8);
-    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    tmpFolder.newFolder("folder", "empty_directory");
+    tmpFolder.newFolder("folder", "directory");
+    makeFileWithContents("folder/file.txt", "This is a test!");
+    makeFileWithContents("folder/directory/file.txt", "This is also a test!");
     when(mockGcsUtil.fileSize(any(GcsPath.class))).thenReturn(Long.MAX_VALUE);
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     PackageUtil.stageClasspathElements(
-        ImmutableList.of(tmpDirectory.getAbsolutePath()), gcsStaging.toString());
+        ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
     verify(mockGcsUtil).create(any(GcsPath.class), anyString());
@@ -420,9 +420,7 @@ public void testPackageUploadIsNotSkippedWhenSizesAreDifferent() throws Exceptio
   @Test
   public void testPackageUploadWithExplicitPackageName() throws Exception {
     Pipe pipe = Pipe.open();
-    File tmpFile = tmpFolder.newFile("file.txt");
-    Files.write("This is a test!", tmpFile, StandardCharsets.UTF_8);
-    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
+    File tmpFile = makeFileWithContents("file.txt", "This is a test!");
     final String overriddenName = "alias.txt";
 
     when(mockGcsUtil.fileSize(any(GcsPath.class)))
@@ -430,25 +428,24 @@ public void testPackageUploadWithExplicitPackageName() throws Exception {
     when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());
 
     List<DataflowPackage> targets = PackageUtil.stageClasspathElements(
-        ImmutableList.of(overriddenName + "=" + tmpFile.getAbsolutePath()), gcsStaging.toString());
+        ImmutableList.of(overriddenName + "=" + tmpFile.getAbsolutePath()), STAGING_PATH);
     DataflowPackage target = Iterables.getOnlyElement(targets);
 
     verify(mockGcsUtil).fileSize(any(GcsPath.class));
     verify(mockGcsUtil).create(any(GcsPath.class), anyString());
     verifyNoMoreInteractions(mockGcsUtil);
 
-    assertEquals(overriddenName, target.getName());
-    assertEquals("gs://somebucket/base/path/file-cC7coLIYHBXUV-rKw53jmw.txt",
-        target.getLocation());
+    assertThat(target.getName(), equalTo(overriddenName));
+    assertThat(target.getLocation(),
+        RegexMatcher.matches(STAGING_PATH + "/file-" + HASH_PATTERN + ".txt"));
   }
 
   @Test
   public void testPackageUploadIsSkippedWithNonExistentResource() throws Exception {
     String nonExistentFile =
         IOChannelUtils.resolve(tmpFolder.getRoot().getPath(), "non-existent-file");
-    GcsPath gcsStaging = GcsPath.fromComponents("somebucket", "base/path");
     assertEquals(Collections.EMPTY_LIST, PackageUtil.stageClasspathElements(
-        ImmutableList.of(nonExistentFile), gcsStaging.toString()));
+        ImmutableList.of(nonExistentFile), STAGING_PATH));
   }
 
   /**

From c0c69cfa465cf3839592442aa4cf2a60f90d751b Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 19 Oct 2015 17:16:27 -0700
Subject: [PATCH 1096/1541] Introduce new trigger builder for early/late
 firings

Adds a new syntax for easy specification of early and late firings
along with a single firing at the watermark.

    AfterWatermark.pastEndOfWindow()
        [.withEarlyFirings(...)]
	[.withLateFirings(...)]

The arguments to both `withEarlyFirings` and `withLateFirings` is a
`OnceTrigger` which defines the event which the early/late panes should
be triggered on. It will be implicitly repeated.

----Release Notes----

Added a new syntax for easy specification of early and late triggering.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105813176
---
 .../transforms/windowing/AfterWatermark.java  | 274 +++++++++++++++++-
 .../sdk/transforms/windowing/Trigger.java     |  10 +
 .../dataflow/sdk/util/ExecutableTrigger.java  |   2 +-
 .../sdk/util/TriggerContextFactory.java       |  10 +
 .../windowing/AfterWatermarkTest.java         | 262 ++++++++++++++++-
 .../sdk/util/TriggerExecutorTest.java         |   1 +
 .../dataflow/sdk/util/TriggerTester.java      |  10 +-
 7 files changed, 556 insertions(+), 13 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 741159fc9dc32..0cf48c8dd9579 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -19,10 +19,13 @@
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
+import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
 import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
 
 import org.joda.time.Instant;
 
@@ -73,7 +76,7 @@ static <W extends BoundedWindow> TimeTrigger<W> pastFirstElementInPane() {
   /**
    * Creates a trigger that fires when the watermark passes the end of the window.
    */
-  public static <W extends BoundedWindow> OnceTrigger<W> pastEndOfWindow() {
+  public static <W extends BoundedWindow> FromEndOfWindow<W> pastEndOfWindow() {
     return new FromEndOfWindow<W>();
   }
 
@@ -203,15 +206,280 @@ public int hashCode() {
     }
   }
 
+  /**
+   * Interface for building an AfterWatermarkTrigger with early firings already filled in.
+   */
+  public interface AfterWatermarkEarly<W extends BoundedWindow> extends TriggerBuilder<W> {
+    /**
+     * Provide a trigger for the late firings.
+     */
+    TriggerBuilder<W> withLateFirings(OnceTrigger<W> lateTrigger);
+  }
+
+  /**
+   * Interface for building an AfterWatermarkTrigger with late firings already filled in.
+   */
+  public interface AfterWatermarkLate<W extends BoundedWindow> extends TriggerBuilder<W> {
+    /**
+     * Provide a trigger for the early firings.
+     */
+    TriggerBuilder<W> withEarlyFirings(OnceTrigger<W> earlyTrigger);
+  }
+
+  /**
+   * A trigger which never fires. Used for the "early" trigger when only a late trigger was
+   * specified.
+   */
+  private static class NeverTrigger<W extends BoundedWindow> extends OnceTrigger<W> {
+
+    protected NeverTrigger() {
+      super(null);
+    }
+
+    @Override
+    public TriggerResult onElement(OnElementContext c) throws Exception {
+      return TriggerResult.CONTINUE;
+    }
+
+    @Override
+    public MergeResult onMerge(OnMergeContext c) throws Exception {
+      return c.trigger().finishedInAnyMergingWindow()
+          ? MergeResult.ALREADY_FINISHED
+          : MergeResult.CONTINUE;
+    }
+
+    @Override
+    public TriggerResult onTimer(OnTimerContext c) throws Exception {
+      return TriggerResult.CONTINUE;
+    }
+
+    @Override
+    protected Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
+      return this;
+    }
+
+    @Override
+    public Instant getWatermarkThatGuaranteesFiring(W window) {
+      return BoundedWindow.TIMESTAMP_MAX_VALUE;
+    }
+  }
+
+  private static class AfterWatermarkEarlyAndLate<W extends BoundedWindow>
+      extends Trigger<W>
+      implements TriggerBuilder<W>, AfterWatermarkEarly<W>, AfterWatermarkLate<W> {
+
+    private static final int EARLY_INDEX = 0;
+    private static final int LATE_INDEX = 1;
+
+    private final OnceTrigger<W> earlyTrigger;
+    private final OnceTrigger<W> lateTrigger;
+
+    @SuppressWarnings("unchecked")
+    private AfterWatermarkEarlyAndLate(OnceTrigger<W> earlyTrigger, OnceTrigger<W> lateTrigger) {
+      super(lateTrigger == null
+          ? ImmutableList.<Trigger<W>>of(earlyTrigger)
+          : ImmutableList.<Trigger<W>>of(earlyTrigger, lateTrigger));
+      this.earlyTrigger =
+          Preconditions.checkNotNull(earlyTrigger, "earlyTrigger should not be null");
+      this.lateTrigger = lateTrigger;
+    }
+
+    @Override
+    public TriggerBuilder<W> withEarlyFirings(OnceTrigger<W> earlyTrigger) {
+      return new AfterWatermarkEarlyAndLate<W>(earlyTrigger, lateTrigger);
+    }
+
+    @Override
+    public TriggerBuilder<W> withLateFirings(OnceTrigger<W> lateTrigger) {
+      return new AfterWatermarkEarlyAndLate<W>(earlyTrigger, lateTrigger);
+    }
+
+    @Override
+    public TriggerResult onElement(OnElementContext c) throws Exception {
+      // We always have an early trigger, even if it is the one that never fires. It will be marked
+      // as finished once the watermark has passed the end of the window.
+
+      if (!c.trigger().isFinished(EARLY_INDEX)) {
+        // We're running the early trigger. If the window function is merging, we need to also
+        // pass the events to the late trigger, so that merging data is available.
+        ExecutableTrigger<W> current = c.trigger().subTrigger(EARLY_INDEX);
+        TriggerResult result = current.invokeElement(c);
+        if (result.isFire()) {
+          // the subtriggers are OnceTriggers that are implicitly repeated. Rather than having
+          // wrapped them explicitly, we implement that logic here. This allows us to take advantage
+          // of the fact that they're being repeated to improve the implementation of this trigger.
+          current.invokeClear(c);
+          c.trigger().setFinished(false, EARLY_INDEX);
+
+          if (lateTrigger != null && c.trigger().isMerging()) {
+            c.trigger().subTrigger(LATE_INDEX).invokeClear(c);
+          }
+
+          return TriggerResult.FIRE;
+        } else {
+
+          if (lateTrigger != null && c.trigger().isMerging()) {
+            if (c.trigger().subTrigger(LATE_INDEX).invokeElement(c).isFinish()) {
+              // If late trigegr finishes, clear it out and keep going.
+              c.trigger().subTrigger(LATE_INDEX).invokeClear(c);
+              c.trigger().setFinished(false, LATE_INDEX);
+            }
+          }
+
+          return TriggerResult.CONTINUE;
+        }
+      } else if (lateTrigger != null) {
+        // We're running the late trigger -- otherwise the root would have finished when the early
+        // finished.
+        ExecutableTrigger<W> current = c.trigger().subTrigger(LATE_INDEX);
+        TriggerResult result = current.invokeElement(c);
+        if (result.isFire()) {
+          // the subtriggers are OnceTriggers that need an implicit repeat around them. So, reset
+          // the trigger after it fires.
+          current.invokeClear(c);
+          c.trigger().setFinished(false, LATE_INDEX);
+          return TriggerResult.FIRE;
+        } else {
+          return TriggerResult.CONTINUE;
+        }
+      } else {
+        throw new IllegalStateException(
+            "Shouldn't receive elements after the watermark with no late trigger");
+      }
+    }
+
+    @Override
+    public MergeResult onMerge(OnMergeContext c) throws Exception {
+      boolean pastEndOfWindow = false;
+
+      // If the watermark was past the end of a window that is past the end of the new window,
+      // then the watermark must also be past the end of this window. What's more, we've already
+      // fired some elements for that trigger firing, so we report FINISHED (without firing).
+      OnMergeContext earlySubContext = c.forTrigger(c.trigger().subTrigger(EARLY_INDEX));
+      for (W finishedWindow : earlySubContext.trigger().getFinishedMergingWindows()) {
+        if (!finishedWindow.maxTimestamp().isBefore(c.window().maxTimestamp())) {
+          pastEndOfWindow = true;
+          break;
+        }
+      }
+
+      c.trigger().setFinished(pastEndOfWindow, EARLY_INDEX);
+
+      if (pastEndOfWindow) {
+        // If we've already fired the AtWatermark for a watermark that is >= the end of this window,
+        // then we should merge the late trigger (if any)
+        if (lateTrigger != null) {
+          ExecutableTrigger<W> lateTrigger = c.trigger().subTrigger(LATE_INDEX);
+          MergeResult result = lateTrigger.invokeMerge(c);
+          // clear merge state if it got marked finished
+          if (result.isFire()) {
+            c.trigger().setFinished(false, 1);
+            lateTrigger.invokeClear(c);
+            return MergeResult.FIRE;
+          } else {
+            return MergeResult.CONTINUE;
+          }
+        } else {
+          throw new IllegalStateException(
+              "Shouldn't merge with windows that have already finished");
+        }
+      } else {
+        // We haven't reached the watermark yet, so merge the early trigger.
+        ExecutableTrigger<W> earlyTrigger = c.trigger().subTrigger(EARLY_INDEX);
+        MergeResult result = earlyTrigger.invokeMerge(c);
+
+        // clear merge state if it got marked finished
+        if (result.isFire()) {
+          c.trigger().setFinished(false, EARLY_INDEX);
+          earlyTrigger.invokeClear(c);
+          return MergeResult.FIRE;
+        } else {
+          return MergeResult.CONTINUE;
+        }
+      }
+    }
+
+    @Override
+    public TriggerResult onTimer(OnTimerContext c) throws Exception {
+      boolean isOnTime = c.timeDomain() == TimeDomain.EVENT_TIME
+          && c.timestamp().isEqual(c.window().maxTimestamp());
+
+      if (!isOnTime) {
+        // If this timer isn't the ON_TIME firing, send it to the current subtree.
+        ExecutableTrigger<W> current = c.trigger().firstUnfinishedSubTrigger();
+        if (current == null) {
+          return TriggerResult.CONTINUE;
+        }
+        TriggerResult result = current.invokeTimer(c);
+        if (result.isFire()) {
+          // the subtriggers are OnceTriggers that need an implicit repeat around them. So, reset
+          // the trigger after it fires.
+          current.invokeClear(c);
+          return TriggerResult.FIRE;
+        } else {
+          return TriggerResult.CONTINUE;
+        }
+      } else {
+        // Mark the early trigger finished.
+        c.trigger().setFinished(true, EARLY_INDEX);
+
+        if (lateTrigger != null) {
+          // In case we ran the late trigger in parallel, clear out its state.
+
+          if (c.trigger().isMerging()) {
+            // If we were pre-running the late trigger, clear out any state since we're firing.
+            c.trigger().setFinished(false, LATE_INDEX);
+            c.trigger().subTrigger(LATE_INDEX).invokeClear(c);
+          }
+
+          return TriggerResult.FIRE;
+        } else {
+          return TriggerResult.FIRE_AND_FINISH;
+        }
+      }
+    }
+
+    @Override
+    public Trigger<W> getContinuationTrigger() {
+      return new AfterWatermarkEarlyAndLate<W>(
+          earlyTrigger.getContinuationTrigger(),
+          lateTrigger == null ? null : lateTrigger.getContinuationTrigger());
+    }
+
+    @Override
+    protected Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
+      throw new UnsupportedOperationException(
+          "Should not call getContinuationTrigger(List<Trigger<W>>)");
+    }
+
+    @Override
+    public Instant getWatermarkThatGuaranteesFiring(W window) {
+      // Even without an early or late trigger, we'll still produce a firing at the watermark.
+      return window.maxTimestamp();
+    }
+  }
+
   /**
    * A watermark trigger targeted relative to the end of the window.
    */
-  private static class FromEndOfWindow<W extends BoundedWindow> extends OnceTrigger<W> {
+  public static class FromEndOfWindow<W extends BoundedWindow> extends OnceTrigger<W> {
 
     private FromEndOfWindow() {
       super(null);
     }
 
+    public AfterWatermarkEarly<W> withEarlyFirings(OnceTrigger<W> earlyFirings) {
+      Preconditions.checkNotNull(earlyFirings,
+          "Must specify the trigger to use for early firings");
+      return new AfterWatermarkEarlyAndLate<W>(earlyFirings, null);
+    }
+
+    public AfterWatermarkLate<W> withLateFirings(OnceTrigger<W> lateFirings) {
+      Preconditions.checkNotNull(lateFirings,
+          "Must specify the trigger to use for late firings");
+      return new AfterWatermarkEarlyAndLate<W>(new NeverTrigger<W>(), lateFirings);
+    }
+
     @Override
     public TriggerResult onElement(OnElementContext c) throws Exception {
       c.timers().setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
@@ -224,7 +492,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
       // then the watermark must also be past the end of this window. What's more, we've already
       // fired some elements for that trigger firing, so we report FINISHED (without firing).
       for (W finishedWindow : c.trigger().getFinishedMergingWindows()) {
-        if (finishedWindow.maxTimestamp().isAfter(c.window().maxTimestamp())) {
+        if (!finishedWindow.maxTimestamp().isBefore(c.window().maxTimestamp())) {
           return MergeResult.ALREADY_FINISHED;
         }
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 94f062d9441e7..e6fc3cd54f6ea 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -191,6 +191,11 @@ public interface TriggerInfo<W extends BoundedWindow> {
      */
     boolean isFinished();
 
+    /**
+     * Return true if the given subtrigger is marked finished.
+     */
+    boolean isFinished(int subtriggerIndex);
+
     /**
      * Returns true if all the sub-triggers of the current trigger are marked finished.
      */
@@ -216,6 +221,11 @@ public interface TriggerInfo<W extends BoundedWindow> {
      * Sets the finished bit for the current trigger.
      */
     void setFinished(boolean finished);
+
+    /**
+     * Sets the finished bit for the given sub-trigger.
+     */
+    void setFinished(boolean finished, int subTriggerIndex);
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
index 67eee5d3d7cfa..326b1c3876a68 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
@@ -61,7 +61,7 @@ public static <W extends BoundedWindow> ExecutableTrigger<W> createForOnceTrigge
   }
 
   private ExecutableTrigger(Trigger<W> trigger, int nextUnusedIndex) {
-    this.trigger = trigger;
+    this.trigger = Preconditions.checkNotNull(trigger, "trigger must not be null");
     this.triggerIndex = nextUnusedIndex++;
 
     if (trigger.subTriggers() != null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
index f774d48f9f9b9..9e574c27e9151 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
@@ -117,6 +117,11 @@ public boolean isFinished() {
       return finishedSet.get(trigger.getTriggerIndex());
     }
 
+    @Override
+    public boolean isFinished(int subtriggerIndex) {
+      return finishedSet.get(subTrigger(subtriggerIndex).getTriggerIndex());
+    }
+
     @Override
     public boolean areAllSubtriggersFinished() {
       return Iterables.isEmpty(unfinishedSubTriggers());
@@ -154,6 +159,11 @@ public void resetTree() throws Exception {
     public void setFinished(boolean finished) {
       finishedSet.set(trigger.getTriggerIndex(), finished);
     }
+
+    @Override
+    public void setFinished(boolean finished, int subTriggerIndex) {
+      finishedSet.set(subTrigger(subTriggerIndex).getTriggerIndex(), finished);
+    }
   }
 
   private class TriggerTimers implements Timers {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
index 563981de6413c..8126b996244b6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
@@ -16,23 +16,35 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.WindowMatchers;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
+import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.MockitoAnnotations;
+
+import java.util.List;
 
 /**
  * Tests the {@link AfterWatermark} triggers.
@@ -40,6 +52,16 @@
 @RunWith(JUnit4.class)
 public class AfterWatermarkTest {
 
+  @Mock
+  private OnceTrigger<IntervalWindow> mockEarly;
+  @Mock
+  private OnceTrigger<IntervalWindow> mockLate;
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+  }
+
   @Test
   public void testFirstInPaneWithFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
@@ -58,7 +80,7 @@ public void testFirstInPaneWithFixedWindow() throws Exception {
 
     tester.advanceWatermark(new Instant(6));
     assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10)));
+        WindowMatchers.isSingleWindowedValue(containsInAnyOrder(1, 2, 3), 1, 0, 10)));
 
     // This element belongs in the window that has already fired. It should not be re-output because
     // that trigger (which was one-time) has already gone off.
@@ -100,7 +122,7 @@ public void testAlignAndDelay() throws Exception {
     tester.advanceWatermark(zero.plus(Duration.standardMinutes(6)));
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(
-            Matchers.containsInAnyOrder(1, 2, 3),
+            containsInAnyOrder(1, 2, 3),
             zero.plus(Duration.standardSeconds(1)).getMillis(),
             zero.getMillis(), zero.plus(Duration.standardMinutes(1)).getMillis())));
   }
@@ -123,7 +145,7 @@ public void testFirstInPaneWithMerging() throws Exception {
 
     // We merged, and updated the watermark timer to the earliest timer, which was still 6.
     assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 1, 12)));
+        WindowMatchers.isSingleWindowedValue(containsInAnyOrder(1, 2), 1, 1, 12)));
 
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
 
@@ -150,7 +172,7 @@ public void testEndOfWindowFixedWindow() throws Exception {
 
     tester.advanceWatermark(new Instant(9));
     assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10)));
+        WindowMatchers.isSingleWindowedValue(containsInAnyOrder(1, 2, 3), 1, 0, 10)));
 
     // This element belongs in the window that has already fired. It should not be re-output because
     // that trigger (which was one-time) has already gone off.
@@ -189,7 +211,7 @@ public void testEndOfWindowWithMerging() throws Exception {
     tester.advanceWatermark(new Instant(11));
 
     assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 1, 12)));
+        WindowMatchers.isSingleWindowedValue(containsInAnyOrder(1, 2, 3), 1, 1, 12)));
 
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
 
@@ -245,4 +267,234 @@ public void testContinuation() throws Exception {
         firstElementAligned,
         firstElementAligned.getContinuationTrigger().getContinuationTrigger());
   }
+
+  @Test
+  public void testEarlyAndAtWatermarkProcessElement() throws Exception {
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        FixedWindows.of(Duration.millis(100)),
+        AfterWatermark.<IntervalWindow>pastEndOfWindow()
+            .withEarlyFirings(mockEarly),
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
+
+    when(mockEarly.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+        .thenReturn(TriggerResult.CONTINUE)
+        .thenReturn(TriggerResult.FIRE_AND_FINISH)
+        .thenReturn(TriggerResult.CONTINUE);
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(5L)),
+        TimestampedValue.of(2, new Instant(10L))); // Fires due to early trigger
+    tester.injectElements(TimestampedValue.of(3, new Instant(15L)));
+
+    tester.advanceWatermark(new Instant(100L)); // Fires due to AtWatermark
+    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
+    assertThat(output, Matchers.contains(
+        isSingleWindowedValue(containsInAnyOrder(1, 2), 5, 0, 100),
+        isSingleWindowedValue(containsInAnyOrder(3), 15, 0, 100)));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(100))));
+  }
+
+  @Test
+  public void testLateAndAtWatermarkProcessElement() throws Exception {
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        FixedWindows.of(Duration.millis(100)),
+        AfterWatermark.<IntervalWindow>pastEndOfWindow()
+            .withLateFirings(mockLate),
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
+
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(5L)),
+        TimestampedValue.of(2, new Instant(10L)),
+        TimestampedValue.of(3, new Instant(15L)));
+
+    tester.advanceWatermark(new Instant(100L)); // Fires due to AtWatermark
+
+    when(mockLate.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+        .thenReturn(TriggerResult.CONTINUE)
+        .thenReturn(TriggerResult.CONTINUE)
+        .thenReturn(TriggerResult.FIRE_AND_FINISH)
+        .thenReturn(TriggerResult.CONTINUE)
+        .thenReturn(TriggerResult.FIRE_AND_FINISH);
+
+    tester.injectElements(
+        TimestampedValue.of(4, new Instant(20L)),
+        TimestampedValue.of(5, new Instant(25L)),
+        TimestampedValue.of(6, new Instant(30L)));  // Fires due to late trigger
+    tester.injectElements(
+        TimestampedValue.of(7, new Instant(35L)),
+        TimestampedValue.of(8, new Instant(40L))); // Fires due to late trigger
+
+    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
+    assertThat(output, Matchers.contains(
+        isSingleWindowedValue(containsInAnyOrder(1, 2, 3), 5, 0, 100),
+        isSingleWindowedValue(containsInAnyOrder(4, 5, 6), 99, 0, 100),
+        isSingleWindowedValue(containsInAnyOrder(7, 8), 99, 0, 100)));
+
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(100))));
+  }
+
+  @Test
+  public void testEarlyLateAndAtWatermarkProcessElement() throws Exception {
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        FixedWindows.of(Duration.millis(100)),
+        AfterWatermark.<IntervalWindow>pastEndOfWindow()
+            .withEarlyFirings(mockEarly)
+            .withLateFirings(mockLate),
+        AccumulationMode.DISCARDING_FIRED_PANES,
+        Duration.millis(100));
+
+    when(mockEarly.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+        .thenReturn(TriggerResult.CONTINUE)
+        .thenReturn(TriggerResult.FIRE_AND_FINISH)
+        .thenReturn(TriggerResult.CONTINUE);
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(5L)),
+        TimestampedValue.of(2, new Instant(10L))); // Fires due to early trigger
+    tester.injectElements(
+        TimestampedValue.of(3, new Instant(15L)));
+
+    tester.advanceWatermark(new Instant(100L)); // Fires due to AtWatermark
+
+    when(mockLate.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+        .thenReturn(TriggerResult.CONTINUE)
+        .thenReturn(TriggerResult.CONTINUE)
+        .thenReturn(TriggerResult.FIRE_AND_FINISH)
+        .thenReturn(TriggerResult.CONTINUE)
+        .thenReturn(TriggerResult.FIRE_AND_FINISH);
+    tester.injectElements(
+        TimestampedValue.of(4, new Instant(20L)),
+        TimestampedValue.of(5, new Instant(25L)),
+        TimestampedValue.of(6, new Instant(30L)));  // Fires due to late trigger
+    tester.injectElements(
+        TimestampedValue.of(7, new Instant(35L)),
+        TimestampedValue.of(8, new Instant(40L))); // Fires due to late
+
+    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
+    assertThat(output, Matchers.contains(
+        isSingleWindowedValue(containsInAnyOrder(1, 2), 5, 0, 100),
+        isSingleWindowedValue(containsInAnyOrder(3), 15, 0, 100),
+        isSingleWindowedValue(containsInAnyOrder(4, 5, 6), 99, 0, 100),
+        isSingleWindowedValue(containsInAnyOrder(7, 8), 99, 0, 100)));
+
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(100))));
+  }
+
+  @Test
+  public void testEarlyAndAtWatermarkSessions() throws Exception {
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        Sessions.withGapDuration(Duration.millis(20)),
+        AfterWatermark.<IntervalWindow>pastEndOfWindow()
+            .withEarlyFirings(AfterPane.<IntervalWindow>elementCountAtLeast(2)),
+        AccumulationMode.ACCUMULATING_FIRED_PANES,
+        Duration.millis(100));
+
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(5L)),
+        TimestampedValue.of(2, new Instant(20L)));
+
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(40))));
+    tester.injectElements(TimestampedValue.of(3, new Instant(6L)));
+
+    tester.advanceWatermark(new Instant(100L)); // Fires due to AtWatermark
+    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
+    assertThat(output, Matchers.contains(
+        isSingleWindowedValue(containsInAnyOrder(1, 2), 5, 5, 40),
+        isSingleWindowedValue(containsInAnyOrder(1, 2, 3), 6, 5, 40)));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(40))));
+  }
+
+  @Test
+  public void testLateAndAtWatermarkSessionsProcessingTime() throws Exception {
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        Sessions.withGapDuration(Duration.millis(20)),
+        AfterWatermark.<IntervalWindow>pastEndOfWindow()
+            .withLateFirings(AfterProcessingTime
+                .<IntervalWindow>pastFirstElementInPane().plusDelayOf(Duration.millis(10))),
+        AccumulationMode.ACCUMULATING_FIRED_PANES,
+        Duration.millis(100));
+
+    tester.advanceProcessingTime(new Instant(0L));
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(5L)),
+        TimestampedValue.of(2, new Instant(20L)));
+
+    tester.advanceWatermark(new Instant(70L)); // Fires due to AtWatermark
+
+    tester.injectElements(TimestampedValue.of(3, new Instant(6L)));
+
+    tester.advanceProcessingTime(new Instant(100L));
+
+    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
+    assertThat(output, Matchers.contains(
+        isSingleWindowedValue(containsInAnyOrder(1, 2), 5, 5, 40),
+        isSingleWindowedValue(containsInAnyOrder(1, 2, 3), 39, 5, 40)));
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(40))));
+  }
+
+  @Test
+  public void testLateAndAtWatermarkSessions() throws Exception {
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        Sessions.withGapDuration(Duration.millis(20)),
+        AfterWatermark.<IntervalWindow>pastEndOfWindow()
+            .withLateFirings(AfterPane.<IntervalWindow>elementCountAtLeast(2)),
+        AccumulationMode.ACCUMULATING_FIRED_PANES,
+        Duration.millis(100));
+
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(5L)),
+        TimestampedValue.of(2, new Instant(20L)));
+
+    tester.advanceWatermark(new Instant(70L)); // Fires due to AtWatermark
+
+    IntervalWindow window = new IntervalWindow(new Instant(5), new Instant(40));
+    assertFalse(tester.isMarkedFinished(window));
+
+    tester.injectElements(
+        TimestampedValue.of(3, new Instant(7L)),
+        TimestampedValue.of(4, new Instant(8L)),
+        TimestampedValue.of(5, new Instant(9L)));  // Fires because we have at least 5 items
+
+    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
+    assertThat(output, Matchers.contains(
+        isSingleWindowedValue(containsInAnyOrder(1, 2), 5, 5, 40),
+        isSingleWindowedValue(containsInAnyOrder(1, 2, 3, 4, 5), 39, 5, 40)));
+    assertFalse(tester.isMarkedFinished(window));
+  }
+
+  @Test
+  public void testEarlyLateAndAtWatermarkSessions() throws Exception {
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+        Sessions.withGapDuration(Duration.millis(20)),
+        AfterWatermark.<IntervalWindow>pastEndOfWindow()
+            .withEarlyFirings(AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
+                .plusDelayOf(Duration.millis(50)))
+            .withLateFirings(AfterPane.<IntervalWindow>elementCountAtLeast(2)),
+        AccumulationMode.ACCUMULATING_FIRED_PANES,
+        Duration.millis(100));
+
+    tester.advanceProcessingTime(new Instant(0));
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(5L)),
+        TimestampedValue.of(2, new Instant(20L)));
+
+    tester.advanceProcessingTime(new Instant(55)); // Fires due to early trigger
+
+    tester.advanceWatermark(new Instant(70L)); // Fires due to AtWatermark
+
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(40))));
+
+    tester.injectElements(
+        TimestampedValue.of(3, new Instant(6L)),
+        TimestampedValue.of(4, new Instant(7L)), // Should fire due to late trigger
+        TimestampedValue.of(5, new Instant(8L)));
+
+    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
+    assertThat(output, Matchers.contains(
+        isSingleWindowedValue(containsInAnyOrder(1, 2), 5, 5, 40),
+        // We get an empty on time pane
+        isSingleWindowedValue(containsInAnyOrder(1, 2), 39, 5, 40),
+        isSingleWindowedValue(containsInAnyOrder(1, 2, 3, 4, 5), 39, 5, 40)));
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(40))));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index 96dcdf52958a2..104304fae5d47 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -78,6 +78,7 @@ public class TriggerExecutorTest {
   @Before
   public void setUp() {
     MockitoAnnotations.initMocks(this);
+    when(mockTrigger.buildTrigger()).thenReturn(mockTrigger);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index efb34f4d6b6c0..531488bf5a375 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -34,6 +34,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.TriggerBuilder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
@@ -53,6 +54,7 @@
 import com.google.common.collect.FluentIterable;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Iterables;
 import com.google.common.collect.Sets;
 
 import org.joda.time.Duration;
@@ -110,11 +112,11 @@ public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>
   }
 
   public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>, W> nonCombining(
-      WindowFn<?, W> windowFn, Trigger<W> trigger, AccumulationMode mode,
+      WindowFn<?, W> windowFn, TriggerBuilder<W> trigger, AccumulationMode mode,
       Duration allowedDataLateness) throws Exception {
 
     WindowingStrategy<?, W> strategy = WindowingStrategy.of(windowFn)
-        .withTrigger(trigger)
+        .withTrigger(trigger.buildTrigger())
         .withMode(mode)
         .withAllowedLateness(allowedDataLateness);
     return nonCombining(strategy);
@@ -303,8 +305,8 @@ public void advanceProcessingTime(
   @SafeVarargs
   public final void injectElements(TimestampedValue<InputT>... values) throws Exception {
     ReduceFnRunner<String, InputT, OutputT, W> runner = createRunner();
-    runner.processElements(FluentIterable.of(values)
-        .transform(new Function<TimestampedValue<InputT>, WindowedValue<InputT>>() {
+    runner.processElements(Iterables.transform(
+        Arrays.asList(values), new Function<TimestampedValue<InputT>, WindowedValue<InputT>>() {
           @Override
           public WindowedValue<InputT> apply(TimestampedValue<InputT> input) {
             try {

From f08971d8041039ed9ff3f8e689e2231875496688 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 20 Oct 2015 10:06:16 -0700
Subject: [PATCH 1097/1541] Change getByteArray(UTF_8).length to
 Utf8.encodedBytes

While getByteArray(UTF_8).length requires converting to a byte array to determine the length, Utf8.encodedBytes doesn't.

Should improve performance and memory use, especially in the case of estimating encoded-size of StringUtf8Coded strings.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105872120
---
 .../cloud/dataflow/sdk/coders/StringUtf8Coder.java   | 12 ++++++++----
 .../dataflow/sdk/runners/DataflowPipelineRunner.java |  8 ++++----
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
index bc14afd94a7c9..5839fdfaa6579 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
@@ -19,10 +19,12 @@
 import com.google.cloud.dataflow.sdk.util.ExposedByteArrayOutputStream;
 import com.google.cloud.dataflow.sdk.util.StreamUtils;
 import com.google.cloud.dataflow.sdk.util.VarInt;
+import com.google.common.base.Utf8;
+import com.google.common.io.ByteStreams;
+import com.google.common.io.CountingOutputStream;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 
-import java.io.ByteArrayOutputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.EOFException;
@@ -119,11 +121,13 @@ protected long getEncodedElementByteSize(String value, Context context)
       throw new CoderException("cannot encode a null String");
     }
     if (context.isWholeStream) {
-      return value.getBytes(StandardCharsets.UTF_8).length;
+      return Utf8.encodedLength(value);
     } else {
-      DataOutputStream stream = new DataOutputStream(new ByteArrayOutputStream());
+      CountingOutputStream countingStream =
+          new CountingOutputStream(ByteStreams.nullOutputStream());
+      DataOutputStream stream = new DataOutputStream(countingStream);
       writeString(value, stream);
-      return stream.size();
+      return countingStream.getCount();
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 273951e3090bc..9505489631699 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -18,7 +18,6 @@
 
 import static com.google.cloud.dataflow.sdk.util.StringUtils.approximatePTransformName;
 import static com.google.cloud.dataflow.sdk.util.StringUtils.approximateSimpleName;
-import static java.nio.charset.StandardCharsets.UTF_8;
 
 import com.google.api.client.googleapis.json.GoogleJsonResponseException;
 import com.google.api.services.dataflow.Dataflow;
@@ -85,6 +84,7 @@
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
+import com.google.common.base.Utf8;
 import com.google.common.collect.ImmutableMap;
 
 import org.joda.time.DateTimeUtils;
@@ -391,7 +391,7 @@ public DataflowPipelineJob run(Pipeline pipeline) {
     } catch (GoogleJsonResponseException e) {
       String errorMessages = "Unexpected errors";
       if (e.getDetails() != null) {
-        if (newJob.toString().getBytes(UTF_8).length >= CREATE_JOB_REQUEST_LIMIT_BYTES) {
+        if (Utf8.encodedLength(newJob.toString()) >= CREATE_JOB_REQUEST_LIMIT_BYTES) {
           errorMessages = "The size of the serialized JSON representation of the pipeline "
               + "exceeds the allowable limit. "
               + "For more information, please check the FAQ link below:\n"
@@ -1086,8 +1086,8 @@ public StreamingUnsupportedIO(TextIO.Write.Bound<?> transform) {
     @Override
     public OutputT apply(InputT input) {
       throw new UnsupportedOperationException(
-          "The DataflowPipelineRunner in streaming mode does not support " +
-          approximatePTransformName(transform.getClass()));
+          "The DataflowPipelineRunner in streaming mode does not support "
+          + approximatePTransformName(transform.getClass()));
     }
 
   }

From 7186f81d6e26ba786d07b47ff169dcf9744ec7fe Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 20 Oct 2015 22:17:19 -0700
Subject: [PATCH 1098/1541] Fix javadoc compile warnings

[WARNING] Javadoc Warnings
[WARNING] /tmp/DataflowJavaSDK/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java:147: warning - Tag @link: can't find triggering(Trigger) in com.google.cloud.dataflow.sdk.transforms.windowing.Window.Bound
[WARNING] /tmp/DataflowJavaSDK/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java:508: warning - Tag @link: can't find to(String) in com.google.cloud.dataflow.sdk.io.Write
[WARNING] /tmp/DataflowJavaSDK/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java:185: warning - Tag @see: missing final '>': "<a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html">TableRow</a>
[WARNING] <p><h3>Permissions</h3>
[WARNING] Permission requirements depend on the
[WARNING] {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner PipelineRunner} that is
[WARNING] used to execute the Dataflow job. Please refer to the documentation of corresponding
[WARNING] {@code PipelineRunner}s for more details.
[WARNING] <p>Please see <a href="https://cloud.google.com/bigquery/access-control">BigQuery Access Control
[WARNING] </a> for security and permission related information specific to BigQuery."
[WARNING] /tmp/DataflowJavaSDK/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java:524: warning - Tag @link: can't find withSchema in com.google.cloud.dataflow.sdk.io.Write
[WARNING] /tmp/DataflowJavaSDK/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java:550: warning - Tag @link: can't find withSchema in com.google.cloud.dataflow.sdk.io.Write
[WARNING] /tmp/DataflowJavaSDK/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java:484: warning - Tag @link: can't find to(String) in com.google.cloud.dataflow.sdk.io.Write
[INFO] Building jar: /tmp/DataflowJavaSDK/sdk/target/google-cloud-dataflow-java-sdk-all-1.2.1-SNAPSHOT-javadoc.jar

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105933774
---
 .../java/com/google/cloud/dataflow/sdk/io/AvroIO.java     | 2 +-
 .../java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java | 8 ++++----
 .../java/com/google/cloud/dataflow/sdk/io/TextIO.java     | 2 +-
 .../cloud/dataflow/sdk/transforms/windowing/Window.java   | 2 +-
 .../google/cloud/dataflow/sdk/util/ActiveWindowSet.java   | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index 98cc0b7e92bdb..8ee3acd60b742 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -501,7 +501,7 @@ public Bound<T> named(String name) {
        * Returns a new AvroIO.Write PTransform that's like this one but
        * that writes to the file(s) with the given filename prefix.
        *
-       * <p>See {@link Write#to(String) Write.to(String)} for more information.
+       * <p>See {@link AvroIO.Write#to(String)} for more information.
        *
        * <p>Does not modify this object.
        */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 5d820ee2cca90..f103e8dda8876 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -110,6 +110,8 @@
  *         .from("clouddataflow-readonly:samples.weather_stations");
  * }</pre>
  *
+ * <p>See {@link TableRow} for more information on the {@code TableRow} object.
+ *
  * <p>Users may provide a query to read from rather than reading all of a BigQuery table. If
  * specified, the result obtained by executing the specified query will be used as the data of the
  * input transform.
@@ -172,8 +174,6 @@
  *
  * <p>Per-window tables are not yet supported in batch mode.
  *
- * @see <a href="https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableRow.html">TableRow</a>
- *
  * <p><h3>Permissions</h3>
  * Permission requirements depend on the
  * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner PipelineRunner} that is
@@ -499,7 +499,7 @@ public boolean getValidate() {
    * <p>By default, tables will be created if they do not exist, which
    * corresponds to a {@code CreateDisposition.CREATE_IF_NEEDED} disposition
    * that matches the default of BigQuery's Jobs API.  A schema must be
-   * provided (via {@link Write#withSchema}), or else the transform may fail
+   * provided (via {@link BigQueryIO.Write#withSchema}), or else the transform may fail
    * at runtime with an {@link java.lang.IllegalArgumentException}.
    *
    * <p>The dataset being written must already exist.
@@ -539,7 +539,7 @@ public enum CreateDisposition {
        * Specifies that tables should be created if needed. This is the default
        * behavior.
        *
-       * <p>Requires that a table schema is provided via {@link Write#withSchema}.
+       * <p>Requires that a table schema is provided via {@link BigQueryIO.Write#withSchema}.
        * This precondition is checked before starting a job. The schema is
        * not required to match an existing table's schema.
        *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 36ae6d2ccc7fa..fc868b0dd3493 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -477,7 +477,7 @@ public Bound<T> named(String name) {
        * Returns a new TextIO.Write PTransform that's like this one but
        * that writes to the file(s) with the given filename prefix.
        *
-       * <p>See {@link Write#to(String) Write.to(String)} for more information.
+       * <p>See {@link TextIO.Write#to(String) Write.to(String)} for more information.
        *
        * <p>Does not modify this object.
        */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 376dec6042534..7bc23d94d8d95 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -93,7 +93,7 @@
  *
  * <h2> Triggers </h2>
  *
- * <p>{@link Window.Bound#triggering(Trigger)} allows specifying a trigger to control when
+ * <p>{@link Window.Bound#triggering(TriggerBuilder)} allows specifying a trigger to control when
  * (in processing time) results for the given window can be produced. If unspecified, the default
  * behavior is to trigger first when the watermark passes the end of the window, and then trigger
  * again every time there is late arriving data.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
index 61af1a209cc1a..f0b6a88e2b7f4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
@@ -28,7 +28,7 @@
 public interface ActiveWindowSet<W extends BoundedWindow> {
 
   /**
-   * Callback for {@link #mergeIfAppropriate}.
+   * Callback for {@link #merge}.
    */
   public interface MergeCallback<W extends BoundedWindow> {
     void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResultNew) throws Exception;

From 48506f0d351210d61f6a3ad3ab6207a687b53ea9 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 20 Oct 2015 22:55:08 -0700
Subject: [PATCH 1099/1541] Improve error message during parameterized type
 inference

Rather than placing the real reason for failure in a
cause, add it to the message. This leads to better error
messages ultimately shown to the user.

When Set<T> could not be Coded, because of T, the error was:

  Cannot provide coder for parameterized type java.util.Set<T>

and now is:

  Cannot provide coder for parameterized type java.util.Set<T>: Cannot
  provide coder for T [explain why].

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105935375
---
 .../dataflow/sdk/coders/CoderRegistry.java    | 26 +++++++++++--------
 .../sdk/coders/CoderRegistryTest.java         | 11 ++++++++
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index ec56820e5aa6c..debf77b0a56a3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -248,11 +248,11 @@ public <T> Coder<T> getDefaultCoder(T exampleValue) throws CannotProvideCoderExc
 
       List<Object> components = factory.getInstanceComponents(exampleValue);
       if (components == null) {
-        throw new CannotProvideCoderException(
-            "Cannot provide coder based on value with class "
-            + clazz + ": The registered CoderFactory with class "
-            + factory.getClass() + " failed to decompose the value, "
-            + "which is required in order to provide Coders for the components.");
+        throw new CannotProvideCoderException(String.format(
+            "Cannot provide coder based on value with class %s: The registered CoderFactory with "
+            + "class %s failed to decompose the value, which is required in order to provide "
+            + "Coders for the components.",
+            clazz.getCanonicalName(), factory.getClass().getCanonicalName()));
       }
 
       // componentcoders = components.map(this.getDefaultCoder)
@@ -263,7 +263,9 @@ public <T> Coder<T> getDefaultCoder(T exampleValue) throws CannotProvideCoderExc
           componentCoders.add(componentCoder);
         } catch (CannotProvideCoderException exc) {
           throw new CannotProvideCoderException(
-              "Cannot provide coder based on value with class " + clazz, exc);
+              String.format("Cannot provide coder based on value with class %s",
+                  clazz.getCanonicalName()),
+              exc);
         }
       }
 
@@ -466,8 +468,8 @@ private <T> Coder<?>[] getDefaultCoders(
       knownCoders = new Coder<?>[typeArgs.length];
     } else if (typeArgs.length != knownCoders.length) {
       throw new IllegalArgumentException(
-          "Class " + baseClass + " has " + typeArgs.length + " parameters, "
-          + "but " + knownCoders.length + " coders are requested.");
+          String.format("Class %s has %d parameters, but %d coders are requested.",
+              baseClass.getCanonicalName(), typeArgs.length, knownCoders.length));
     }
 
     Map<Type, Coder<?>> context = new HashMap<>();
@@ -639,8 +641,8 @@ private CoderFactory getDefaultCoderFactory(Class<?> clazz) throws CannotProvide
       return coderFactoryOrNull;
     } else {
       throw new CannotProvideCoderException(
-          "Cannot provide coder based on value with class "
-          + clazz + ": No CoderFactory has been registered for the class.");
+          String.format("Cannot provide coder based on value with class %s: No CoderFactory has "
+              + "been registered for the class.", clazz.getCanonicalName()));
     }
   }
 
@@ -769,7 +771,9 @@ private Coder<?> getDefaultCoderFromFactory(
         typeArgumentCoders.add(typeArgumentCoder);
       } catch (CannotProvideCoderException exc) {
          throw new CannotProvideCoderException(
-          "Cannot provide coder for parameterized type " + type,
+          String.format("Cannot provide coder for parameterized type %s: %s",
+              type,
+              exc.getMessage()),
           exc);
       }
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index 5cd99a134afa3..8ad44117721e3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -119,6 +119,7 @@ public List<Coder<?>> getCoderArguments() {
   @Test
   public void testRegisterInstantiatedCoderInvalidRawtype() throws Exception {
     thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("may not be used with unspecialized generic classes");
     CoderRegistry registry = new CoderRegistry();
     registry.registerCoder(List.class, new MyListCoder());
   }
@@ -133,6 +134,11 @@ public void testSimpleDefaultCoder() throws Exception {
   public void testSimpleUnknownDefaultCoder() throws Exception {
     CoderRegistry registry = getStandardRegistry();
     thrown.expect(CannotProvideCoderException.class);
+    thrown.expectMessage(allOf(
+        containsString(UnknownType.class.getCanonicalName()),
+        containsString("No CoderFactory has been registered"),
+        containsString("does not have a @DefaultCoder annotation"),
+        containsString("does not implement Serializable")));
     registry.getDefaultCoder(UnknownType.class);
   }
 
@@ -190,6 +196,11 @@ public void testParameterizedDefaultCoderUnknown() throws Exception {
     TypeDescriptor<List<UnknownType>> listUnknownToken = new TypeDescriptor<List<UnknownType>>() {};
 
     thrown.expect(CannotProvideCoderException.class);
+    thrown.expectMessage(String.format(
+        "Cannot provide coder for parameterized type %s: Unable to provide a default Coder for %s",
+        listUnknownToken,
+        UnknownType.class.getCanonicalName()));
+
     registry.getDefaultCoder(listUnknownToken);
   }
 

From e204e2915917a8b686e4af177fca7c58356b6e5e Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 20 Oct 2015 22:58:35 -0700
Subject: [PATCH 1100/1541] Add external Javadoc links

- Avro
- Google API Client Library
- Google Cloud Datastore
- Google OAuth Client Library

Also alphabetize the linked directories so we can
more easily track what's there and what isn't.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105935507
---
 examples/pom.xml                    | 39 +++++++++++++++++++-----
 javadoc/apiclient-docs/package-list | 34 +++++++++++++++++++++
 javadoc/avro-docs/package-list      | 30 +++++++++++++++++++
 javadoc/datastore-docs/package-list |  2 ++
 javadoc/oauth-docs/package-list     | 11 +++++++
 sdk/pom.xml                         | 46 +++++++++++++++++++++++------
 6 files changed, 146 insertions(+), 16 deletions(-)
 create mode 100644 javadoc/apiclient-docs/package-list
 create mode 100644 javadoc/avro-docs/package-list
 create mode 100644 javadoc/datastore-docs/package-list
 create mode 100644 javadoc/oauth-docs/package-list

diff --git a/examples/pom.xml b/examples/pom.xml
index 5dc1b250c277b..78dc847010150 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -245,30 +245,48 @@
           <bottom><![CDATA[<br>]]></bottom>
 
           <offlineLinks>
+            <!-- The Dataflow SDK docs -->
             <offlineLink>
               <url>https://cloud.google.com/dataflow/java-sdk/JavaDoc/</url>
               <location>${basedir}/../javadoc/dataflow-sdk-docs</location>
             </offlineLink>
+            <!-- Other dependencies -->
             <offlineLink>
-              <url>http://docs.guava-libraries.googlecode.com/git-history/release18/javadoc/</url>
-              <location>${basedir}/../javadoc/guava-docs</location>
+              <url>https://developers.google.com/api-client-library/java/google-api-java-client/reference/1.20.0/</url>
+              <location>${basedir}/../javadoc/apiclient-docs</location>
             </offlineLink>
             <offlineLink>
-              <url>http://www.joda.org/joda-time/apidocs</url>
-              <location>${basedir}/../javadoc/joda-docs</location>
+              <url>http://avro.apache.org/docs/1.7.7/api/java/</url>
+              <location>${basedir}/../javadoc/avro-docs</location>
             </offlineLink>
             <offlineLink>
               <url>https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/</url>
-              <location>${basedir}/../javadoc//bq-docs</location>
+              <location>${basedir}/../javadoc/bq-docs</location>
             </offlineLink>
             <offlineLink>
-              <url>http://fasterxml.github.io/jackson-databind/javadoc/2.4/</url>
-              <location>${basedir}/../javadoc/jackson-databind-docs</location>
+              <url>https://cloud.google.com/datastore/docs/apis/javadoc/</url>
+              <location>${basedir}/../javadoc/datastore-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>http://docs.guava-libraries.googlecode.com/git-history/release18/javadoc/</url>
+              <location>${basedir}/../javadoc/guava-docs</location>
             </offlineLink>
             <offlineLink>
               <url>http://fasterxml.github.io/jackson-annotations/javadoc/2.4/</url>
               <location>${basedir}/../javadoc/jackson-annotations-docs</location>
             </offlineLink>
+            <offlineLink>
+              <url>http://fasterxml.github.io/jackson-databind/javadoc/2.4/</url>
+              <location>${basedir}/../javadoc/jackson-databind-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>http://www.joda.org/joda-time/apidocs</url>
+              <location>${basedir}/../javadoc/joda-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>https://developers.google.com/api-client-library/java/google-oauth-java-client/reference/1.20.0/</url>
+              <location>${basedir}/../javadoc/oauth-docs</location>
+            </offlineLink>
           </offlineLinks>
         </configuration>
         <executions>
@@ -350,6 +368,7 @@
     <dependency>
       <groupId>com.google.api-client</groupId>
       <artifactId>google-api-client</artifactId>
+      <!-- If updating version, please update the javadoc offlineLink -->
       <version>1.20.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
@@ -378,6 +397,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-bigquery</artifactId>
+      <!-- If updating version, please update the javadoc offlineLink -->
       <version>v2-rev187-1.19.1</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
@@ -406,12 +426,14 @@
     <dependency>
       <groupId>org.apache.avro</groupId>
       <artifactId>avro</artifactId>
+      <!-- If updating version, please update the javadoc offlineLink -->
       <version>1.7.7</version>
     </dependency>
 
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-datastore-protobuf</artifactId>
+      <!-- If updating version, please update the javadoc offlineLink -->
       <version>v1beta2-rev1-2.1.2</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
@@ -440,12 +462,14 @@
     <dependency>
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
+      <!-- If updating version, please update the javadoc offlineLink -->
       <version>18.0</version>
     </dependency>
 
     <dependency>
       <groupId>joda-time</groupId>
       <artifactId>joda-time</artifactId>
+      <!-- If updating version, please update the javadoc offlineLink -->
       <version>2.4</version>
     </dependency>
 
@@ -461,6 +485,7 @@
     <dependency>
       <groupId>org.hamcrest</groupId>
       <artifactId>hamcrest-all</artifactId>
+      <!-- If updating version, please update the javadoc offlineLink -->
       <version>1.3</version>
     </dependency>
 
diff --git a/javadoc/apiclient-docs/package-list b/javadoc/apiclient-docs/package-list
new file mode 100644
index 0000000000000..3ec1471f3041c
--- /dev/null
+++ b/javadoc/apiclient-docs/package-list
@@ -0,0 +1,34 @@
+com.google.api.client.googleapis
+com.google.api.client.googleapis.apache
+com.google.api.client.googleapis.auth.clientlogin
+com.google.api.client.googleapis.auth.oauth2
+com.google.api.client.googleapis.batch
+com.google.api.client.googleapis.batch.json
+com.google.api.client.googleapis.compute
+com.google.api.client.googleapis.extensions.android.accounts
+com.google.api.client.googleapis.extensions.android.gms.auth
+com.google.api.client.googleapis.extensions.appengine.auth.oauth2
+com.google.api.client.googleapis.extensions.appengine.notifications
+com.google.api.client.googleapis.extensions.appengine.testing.auth.oauth2
+com.google.api.client.googleapis.extensions.java6.auth.oauth2
+com.google.api.client.googleapis.extensions.servlet.notifications
+com.google.api.client.googleapis.javanet
+com.google.api.client.googleapis.json
+com.google.api.client.googleapis.media
+com.google.api.client.googleapis.notifications
+com.google.api.client.googleapis.notifications.json
+com.google.api.client.googleapis.notifications.json.gson
+com.google.api.client.googleapis.notifications.json.jackson2
+com.google.api.client.googleapis.services
+com.google.api.client.googleapis.services.json
+com.google.api.client.googleapis.services.protobuf
+com.google.api.client.googleapis.testing
+com.google.api.client.googleapis.testing.auth.oauth2
+com.google.api.client.googleapis.testing.compute
+com.google.api.client.googleapis.testing.json
+com.google.api.client.googleapis.testing.notifications
+com.google.api.client.googleapis.testing.services
+com.google.api.client.googleapis.testing.services.json
+com.google.api.client.googleapis.testing.services.protobuf
+com.google.api.client.googleapis.util
+com.google.api.client.googleapis.xml.atom
diff --git a/javadoc/avro-docs/package-list b/javadoc/avro-docs/package-list
new file mode 100644
index 0000000000000..319ff01fdec26
--- /dev/null
+++ b/javadoc/avro-docs/package-list
@@ -0,0 +1,30 @@
+org.apache.avro
+org.apache.avro.compiler.idl
+org.apache.avro.compiler.specific
+org.apache.avro.data
+org.apache.avro.file
+org.apache.avro.generic
+org.apache.avro.hadoop.file
+org.apache.avro.hadoop.io
+org.apache.avro.hadoop.util
+org.apache.avro.io
+org.apache.avro.io.parsing
+org.apache.avro.ipc
+org.apache.avro.ipc.generic
+org.apache.avro.ipc.reflect
+org.apache.avro.ipc.specific
+org.apache.avro.ipc.stats
+org.apache.avro.ipc.trace
+org.apache.avro.mapred
+org.apache.avro.mapred.tether
+org.apache.avro.mapreduce
+org.apache.avro.mojo
+org.apache.avro.protobuf
+org.apache.avro.reflect
+org.apache.avro.specific
+org.apache.avro.thrift
+org.apache.avro.tool
+org.apache.avro.util
+org.apache.trevni
+org.apache.trevni.avro
+org.apache.trevni.avro.mapreduce
diff --git a/javadoc/datastore-docs/package-list b/javadoc/datastore-docs/package-list
new file mode 100644
index 0000000000000..ebbafd860d2f1
--- /dev/null
+++ b/javadoc/datastore-docs/package-list
@@ -0,0 +1,2 @@
+com.google.api.services.datastore
+com.google.api.services.datastore.client
diff --git a/javadoc/oauth-docs/package-list b/javadoc/oauth-docs/package-list
new file mode 100644
index 0000000000000..38fc0461af4ce
--- /dev/null
+++ b/javadoc/oauth-docs/package-list
@@ -0,0 +1,11 @@
+com.google.api.client.auth.oauth
+com.google.api.client.auth.oauth2
+com.google.api.client.auth.openidconnect
+com.google.api.client.extensions.appengine.auth
+com.google.api.client.extensions.appengine.auth.oauth2
+com.google.api.client.extensions.auth.helpers
+com.google.api.client.extensions.auth.helpers.oauth
+com.google.api.client.extensions.java6.auth.oauth2
+com.google.api.client.extensions.jetty.auth.oauth2
+com.google.api.client.extensions.servlet.auth
+com.google.api.client.extensions.servlet.auth.oauth2
diff --git a/sdk/pom.xml b/sdk/pom.xml
index da584217444aa..d4749123dd4e5 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -229,33 +229,49 @@
 
           <offlineLinks>
             <offlineLink>
-              <url>http://docs.guava-libraries.googlecode.com/git-history/release18/javadoc/</url>
-              <location>${basedir}/../javadoc/guava-docs</location>
+              <url>https://developers.google.com/api-client-library/java/google-api-java-client/reference/1.20.0/</url>
+              <location>${basedir}/../javadoc/apiclient-docs</location>
             </offlineLink>
             <offlineLink>
-              <url>http://www.joda.org/joda-time/apidocs</url>
-              <location>${basedir}/../javadoc/joda-docs</location>
+              <url>http://avro.apache.org/docs/1.7.7/api/java/</url>
+              <location>${basedir}/../javadoc/avro-docs</location>
             </offlineLink>
             <offlineLink>
               <url>https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/</url>
-              <location>${basedir}/../javadoc//bq-docs</location>
+              <location>${basedir}/../javadoc/bq-docs</location>
             </offlineLink>
             <offlineLink>
-              <url>http://fasterxml.github.io/jackson-databind/javadoc/2.4/</url>
-              <location>${basedir}/../javadoc/jackson-databind-docs</location>
+              <url>https://cloud.google.com/datastore/docs/apis/javadoc/</url>
+              <location>${basedir}/../javadoc/datastore-docs</location>
             </offlineLink>
             <offlineLink>
-              <url>http://fasterxml.github.io/jackson-annotations/javadoc/2.4/</url>
-              <location>${basedir}/../javadoc/jackson-annotations-docs</location>
+              <url>http://docs.guava-libraries.googlecode.com/git-history/release18/javadoc/</url>
+              <location>${basedir}/../javadoc/guava-docs</location>
             </offlineLink>
             <offlineLink>
               <url>http://hamcrest.org/JavaHamcrest/javadoc/1.3/</url>
               <location>${basedir}/../javadoc/hamcrest-docs</location>
             </offlineLink>
+            <offlineLink>
+              <url>http://fasterxml.github.io/jackson-annotations/javadoc/2.4/</url>
+              <location>${basedir}/../javadoc/jackson-annotations-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>http://fasterxml.github.io/jackson-databind/javadoc/2.4/</url>
+              <location>${basedir}/../javadoc/jackson-databind-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>http://www.joda.org/joda-time/apidocs</url>
+              <location>${basedir}/../javadoc/joda-docs</location>
+            </offlineLink>
             <offlineLink>
               <url>http://junit.sourceforge.net/javadoc/</url>
               <location>${basedir}/../javadoc/junit-docs</location>
             </offlineLink>
+            <offlineLink>
+              <url>https://developers.google.com/api-client-library/java/google-oauth-java-client/reference/1.20.0/</url>
+              <location>${basedir}/../javadoc/oauth-docs</location>
+            </offlineLink>
           </offlineLinks>
         </configuration>
         <executions>
@@ -420,6 +436,7 @@
     <dependency>
       <groupId>com.google.api-client</groupId>
       <artifactId>google-api-client</artifactId>
+      <!-- If updating version, please update the javadoc offlineLink -->
       <version>1.20.0</version>
       <exclusions>
         <exclusion>
@@ -432,6 +449,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-bigquery</artifactId>
+      <!-- If updating version, please update the javadoc offlineLink -->
       <version>v2-rev198-1.20.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
@@ -494,6 +512,7 @@
     <dependency>
       <groupId>com.google.oauth-client</groupId>
       <artifactId>google-oauth-client-java6</artifactId>
+      <!-- If updating version, please update the javadoc offlineLink -->
       <version>1.20.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
@@ -508,6 +527,7 @@
     <dependency>
       <groupId>com.google.oauth-client</groupId>
       <artifactId>google-oauth-client</artifactId>
+      <!-- If updating version, please update the javadoc offlineLink -->
       <version>1.20.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
@@ -522,6 +542,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-datastore-protobuf</artifactId>
+      <!-- If updating version, please update the javadoc offlineLink -->
       <version>v1beta2-rev1-3.0.2</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
@@ -548,6 +569,7 @@
     <dependency>
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
+      <!-- If updating version, please update the javadoc offlineLink -->
       <version>18.0</version>
     </dependency>
 
@@ -560,18 +582,21 @@
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-core</artifactId>
+      <!-- If updating version, please update the javadoc offlineLink -->
       <version>2.4.2</version>
     </dependency>
 
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-annotations</artifactId>
+      <!-- If updating version, please update the javadoc offlineLink -->
       <version>2.4.2</version>
     </dependency>
 
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-databind</artifactId>
+      <!-- If updating version, please update the javadoc offlineLink -->
       <version>2.4.2</version>
     </dependency>
 
@@ -584,6 +609,7 @@
     <dependency>
       <groupId>org.apache.avro</groupId>
       <artifactId>avro</artifactId>
+      <!-- If updating version, please update the javadoc offlineLink -->
       <version>1.7.7</version>
     </dependency>
 
@@ -596,6 +622,7 @@
     <dependency>
       <groupId>joda-time</groupId>
       <artifactId>joda-time</artifactId>
+      <!-- If updating version, please update the javadoc offlineLink -->
       <version>2.4</version>
     </dependency>
 
@@ -658,6 +685,7 @@
     <dependency>
       <groupId>org.hamcrest</groupId>
       <artifactId>hamcrest-all</artifactId>
+      <!-- If updating version, please update the javadoc offlineLink -->
       <version>1.3</version>
       <scope>provided</scope>
     </dependency>

From ec58b6b8c51ccb0a9386541c0df13e831d7d38fb Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 21 Oct 2015 09:48:17 -0700
Subject: [PATCH 1101/1541] Provide a lazy byte size observer for shuffles

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105970947
---
 .../runners/worker/GroupingShuffleReader.java | 20 ++++++++++++++----
 .../worker/GroupingShuffleReaderTest.java     | 21 ++++++++++++++-----
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index a25d7647c2d6c..c06c995b10e08 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -33,6 +33,8 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservableIterable;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservableIterator;
 import com.google.cloud.dataflow.sdk.util.common.Reiterable;
 import com.google.cloud.dataflow.sdk.util.common.Reiterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
@@ -171,7 +173,7 @@ final ReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> iterator(ShuffleEntryR
    * to the current key, which would introduce a performance
    * penalty.
    */
-  private final class GroupingShuffleReaderIterator
+  final class GroupingShuffleReaderIterator
       extends AbstractBoundedReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> {
     // N.B. This class is *not* static; it uses the keyCoder, valueCoder, and
     // executionContext from its enclosing GroupingShuffleReader.
@@ -311,7 +313,8 @@ public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest)
      * of a {@code KV<K, Reiterable<V>>} entry produced by a
      * {@link GroupingShuffleReader}.
      */
-    private final class ValuesIterable implements Reiterable<V> {
+    private final class ValuesIterable extends ElementByteSizeObservableIterable<V, ValuesIterator>
+        implements Reiterable<V> {
       // N.B. This class is *not* static; it uses the valueCoder from
       // its enclosing GroupingShuffleReader.
 
@@ -325,6 +328,11 @@ public ValuesIterable(Reiterable<ShuffleEntry> base) {
       public ValuesIterator iterator() {
         return new ValuesIterator(base.iterator());
       }
+
+      @Override
+      protected ValuesIterator createIterator() {
+        return iterator();
+      }
     }
 
     /**
@@ -332,7 +340,8 @@ public ValuesIterator iterator() {
      * of a {@code KV<K, Reiterable<V>>} entry produced by a
      * {@link GroupingShuffleReader}.
      */
-    private final class ValuesIterator implements Reiterator<V> {
+    private final class ValuesIterator extends ElementByteSizeObservableIterator<V>
+        implements Reiterator<V> {
       // N.B. This class is *not* static; it uses the valueCoder from
       // its enclosing GroupingShuffleReader.
 
@@ -357,11 +366,14 @@ public V next() {
             GroupingShuffleReaderIterator.this.stateSampler.scopedState(
                 GroupingShuffleReaderIterator.this.readState)) {
           ShuffleEntry entry = base.next();
+
           // The shuffle entries are handed over to the consumer of this iterator. Therefore, we can
           // mark the values are read and increment the bytes read counter.
+          long lastGroupSize = currentGroupSize.getAndSet(0L);
+          notifyValueReturned(lastGroupSize);
           if (GroupingShuffleReader.this.perOperationPerDatasetBytesCounter != null) {
             GroupingShuffleReader.this
-                .perOperationPerDatasetBytesCounter.addValue(currentGroupSize.getAndSet(0L));
+                .perOperationPerDatasetBytesCounter.addValue(lastGroupSize);
           }
           try {
             return CoderUtils.decodeFromByteArray(valueCoder, entry.getValue());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
index f8e0316d619fe..35d602e6198b7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
@@ -33,6 +33,7 @@
 import com.google.api.services.dataflow.model.Position;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
@@ -46,6 +47,7 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.util.common.Reiterable;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
@@ -133,8 +135,11 @@ private List<ShuffleEntry> writeShuffleEntries(List<KV<Integer, List<String>>> i
 
   private List<KV<Integer, List<String>>> runIterationOverGroupingShuffleReader(
       BatchModeExecutionContext context, TestShuffleReader shuffleReader,
-      GroupingShuffleReader<Integer, String> groupingShuffleReader, ValuesToRead valuesToRead)
+      GroupingShuffleReader<Integer, String> groupingShuffleReader,
+      Coder<WindowedValue<KV<Integer, Iterable<String>>>> coder, ValuesToRead valuesToRead)
       throws Exception {
+    Counter<Long> elementByteSizeCounter = Counter.longs("element-byte-size-counter", SUM);
+    ElementByteSizeObserver elementObserver = new ElementByteSizeObserver(elementByteSizeCounter);
     List<KV<Integer, List<String>>> actual = new ArrayList<>();
     try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<String>>>> iter =
         groupingShuffleReader.iterator(shuffleReader)) {
@@ -144,12 +149,18 @@ private List<KV<Integer, List<String>>> runIterationOverGroupingShuffleReader(
         assertTrue(iter.hasNext());
         assertTrue(iter.hasNext());
 
-        WindowedValue<KV<Integer, Reiterable<String>>> windowedValue = iter.next();
+        @SuppressWarnings({"rawtypes", "unchecked"})  // safe co-variant cast.
+        WindowedValue<KV<Integer, Iterable<String>>> windowedValue = (WindowedValue) iter.next();
+        // Verify that the byte size observer is lazy for every value the GroupingShuffleReader
+        // produces.
+        coder.registerByteSizeObserver(windowedValue, elementObserver, Context.OUTER);
+        assertTrue(elementObserver.getIsLazy());
+
         // Verify value is in an empty windows.
         assertEquals(BoundedWindow.TIMESTAMP_MIN_VALUE, windowedValue.getTimestamp());
         assertEquals(0, windowedValue.getWindows().size());
 
-        KV<Integer, Reiterable<String>> elem = windowedValue.getValue();
+        KV<Integer, Iterable<String>> elem = windowedValue.getValue();
         Integer key = elem.getKey();
         List<String> values = new ArrayList<>();
         if (valuesToRead.ordinal() > ValuesToRead.SKIP_VALUES.ordinal()) {
@@ -230,7 +241,7 @@ private void runTestReadFromShuffle(
     }
 
     List<KV<Integer, List<String>>> actual = runIterationOverGroupingShuffleReader(
-        context, shuffleReader, groupingShuffleReader, valuesToRead);
+        context, shuffleReader, groupingShuffleReader, sourceElemCoder, valuesToRead);
 
     List<KV<Integer, List<String>>> expected = new ArrayList<>();
     for (KV<Integer, List<String>> kvs : input) {
@@ -305,7 +316,7 @@ private void runTestBytesReadCounter(
         addCounterMutator.addCounter(Counter.longs("dax-shuffle-test-wf-read-bytes", SUM));
 
     runIterationOverGroupingShuffleReader(
-        context, shuffleReader, groupingShuffleReader, valuesToRead);
+        context, shuffleReader, groupingShuffleReader, sourceElemCoder, valuesToRead);
 
     assertEquals(expectedReadBytes,
                  (long) groupingShuffleReader.perOperationPerDatasetBytesCounter.getAggregate());

From e3c954deb6ba2809a5ba617491b13505c5ef4098 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 21 Oct 2015 11:04:40 -0700
Subject: [PATCH 1102/1541] Remove uses of the deprecated getFullNameForTesting

It was only being used in two places, and those places already have
their names tested separately.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105978988
---
 .../dataflow/sdk/transforms/CombineTest.java  | 20 ++++---------------
 .../dataflow/sdk/transforms/TopTest.java      | 19 ------------------
 2 files changed, 4 insertions(+), 35 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 707b982141692..068565c68875f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -34,7 +34,6 @@
 import com.google.cloud.dataflow.sdk.coders.StandardCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.RecordingPipelineVisitor;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
@@ -309,28 +308,17 @@ public void testAccumulatingCombineEmpty() {
 
   // Checks that Min, Max, Mean, Sum (operations that pass-through to Combine),
   // provide their own top-level name.
-  @SuppressWarnings("deprecation")  // deprecated for testing
   @Test
   public void testCombinerNames() {
-    Pipeline p = TestPipeline.create();
-    PCollection<KV<String, Integer>> input = createInput(p, TABLE);
-
     Combine.PerKey<String, Integer, Integer> min = Min.integersPerKey();
     Combine.PerKey<String, Integer, Integer> max = Max.integersPerKey();
     Combine.PerKey<String, Integer, Double> mean = Mean.perKey();
     Combine.PerKey<String, Integer, Integer> sum = Sum.integersPerKey();
 
-    input.apply(min);
-    input.apply(max);
-    input.apply(mean);
-    input.apply(sum);
-
-    p.traverseTopologically(new RecordingPipelineVisitor());
-
-    assertThat(p.getFullNameForTesting(min), Matchers.startsWith("Min"));
-    assertThat(p.getFullNameForTesting(max), Matchers.startsWith("Max"));
-    assertThat(p.getFullNameForTesting(mean), Matchers.startsWith("Mean"));
-    assertThat(p.getFullNameForTesting(sum), Matchers.startsWith("Sum"));
+    assertThat(min.getName(), Matchers.startsWith("Min"));
+    assertThat(max.getName(), Matchers.startsWith("Max"));
+    assertThat(mean.getName(), Matchers.startsWith("Mean"));
+    assertThat(sum.getName(), Matchers.startsWith("Sum"));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
index 8b61ba1b93216..49a66300cc3a8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
@@ -17,13 +17,11 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.runners.RecordingPipelineVisitor;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -202,23 +200,6 @@ public void testCountConstraint() {
     input.apply(Top.of(-1, new OrderByLength()));
   }
 
-  @SuppressWarnings("deprecation")  // deprecated for testing
-  @Test
-  public void testTransformName() {
-    Pipeline p = TestPipeline.create();
-    PCollection<String> input =
-        p.apply(Create.of(Arrays.asList(COLLECTION))
-            .withCoder(StringUtf8Coder.of()));
-
-    PTransform<PCollection<String>, PCollection<List<String>>> top = Top
-        .of(10, new OrderByLength());
-    input.apply(top);
-
-    p.traverseTopologically(new RecordingPipelineVisitor());
-    // Check that the transform is named "Top" rather than "Combine".
-    assertThat(p.getFullNameForTesting(top), Matchers.startsWith("Top"));
-  }
-
   @Test
   public void testTopGetNames() {
     assertEquals("Top.Globally", Top.of(1, new OrderByLength()).getName());

From 6136d6984f59ea4d20c8a27301143501c45e5b2b Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 21 Oct 2015 11:25:09 -0700
Subject: [PATCH 1103/1541] Set test* values in DirectPipelineRunner from
 Options

These options are similar to other configuration options passed to the
runners, and should be configured the same way.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=105981234
---
 .../sdk/options/DirectPipelineOptions.java    | 36 +++++++++++++++++++
 .../sdk/runners/DirectPipelineRunner.java     | 10 ++++--
 2 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java
index 04f165a8b4da2..9f25c88a45d60 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java
@@ -16,8 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
+import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.values.PCollection;
 
 import com.fasterxml.jackson.annotation.JsonIgnore;
 
@@ -37,4 +39,38 @@ public interface DirectPipelineOptions extends
       + " If not explicitly specified, a random seed will be generated.")
   Long getDirectPipelineRunnerRandomSeed();
   void setDirectPipelineRunnerRandomSeed(Long value);
+
+  /**
+   * Controls whether the runner should ensure that all of the elements of
+   * the pipeline can be serialized. Enabled by default.
+   */
+  @JsonIgnore
+  @Description("Controls whether the runner should ensure that all of the elements of the "
+      + "pipeline, such as DoFns, can be serialized. Enabled by default.")
+  @Default.Boolean(true)
+  boolean isTestSerializability();
+  void setTestSerializability(boolean testSerializability);
+
+  /**
+   * Controls whether the runner should ensure that all of the elements of
+   * every {@link PCollection} can be encoded using the appropriate
+   * {@link Coder}. Enabled by default.
+   */
+  @JsonIgnore
+  @Description("Controls whether the runner should ensure that all of the elements of every "
+      + "PCollection can be encoded using the appropriate Coder. Enabled by default.")
+  @Default.Boolean(true)
+  boolean isTestEncodability();
+  void setTestEncodability(boolean testEncodability);
+
+  /**
+   * Controls whether the runner should randomize the order of each
+   * {@link PCollection}. Enabled by default.
+   */
+  @JsonIgnore
+  @Description("Controls whether the runner randomizes the order of PCollections. "
+      + "Enabled by default.")
+  @Default.Boolean(true)
+  boolean isTestUnorderedness();
+  void setTestUnorderedness(boolean testUnorderedness);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 0bb5b3750815d..6e8a260dfae7e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -966,9 +966,9 @@ public <T> AggregatorValues<T> getAggregatorValues(Aggregator<?, T> aggregator)
   /////////////////////////////////////////////////////////////////////////////
 
   private final DirectPipelineOptions options;
-  private boolean testSerializability = true;
-  private boolean testEncodability = true;
-  private boolean testUnorderedness = true;
+  private boolean testSerializability;
+  private boolean testEncodability;
+  private boolean testUnorderedness;
 
   /** Returns a new DirectPipelineRunner. */
   private DirectPipelineRunner(DirectPipelineOptions options) {
@@ -984,6 +984,10 @@ private DirectPipelineRunner(DirectPipelineOptions options) {
 
     LOG.debug("DirectPipelineRunner using random seed {}.", randomSeed);
     rand = new Random(randomSeed);
+
+    testSerializability = options.isTestSerializability();
+    testEncodability = options.isTestEncodability();
+    testUnorderedness = options.isTestUnorderedness();
   }
 
   public DirectPipelineOptions getPipelineOptions() {

From db569355f0262383d14ccab8b855ab2cf2817cf1 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Thu, 22 Oct 2015 01:31:43 -0700
Subject: [PATCH 1104/1541] Adds the ability for sources to specify
 splittability

This adds two methods: estimatedParallelism() on the Reader and remainingParallelism() on the ReaderIterator, which populate counters used by the backend.  These are then used to better inform the autoscaling mechanisms.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=106037623
---
 .../sdk/runners/dataflow/CustomSources.java   | 10 +++
 .../worker/ByteArrayShufflePosition.java      | 10 +++
 .../runners/worker/GroupingShuffleReader.java | 13 +++
 .../sdk/runners/worker/InMemoryReader.java    | 10 +++
 .../worker/MapTaskExecutorFactory.java        |  8 +-
 .../sdk/util/common/worker/ReadOperation.java | 53 ++++++++++--
 .../sdk/util/common/worker/Reader.java        | 51 +++++++++++
 .../sdk/runners/worker/ConcatReaderTest.java  |  5 ++
 .../worker/GroupingShuffleReaderTest.java     | 85 +++++++++++++++++--
 .../runners/worker/InMemoryReaderTest.java    | 18 ++++
 .../worker/MapTaskExecutorFactoryTest.java    | 20 ++++-
 .../common/worker/MapTaskExecutorTest.java    |  3 +-
 .../util/common/worker/ReadOperationTest.java | 10 +--
 13 files changed, 276 insertions(+), 20 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
index a28d2c144b5d4..d72545c76d2c5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
@@ -590,6 +590,11 @@ public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest
       }
       return new BoundedSourceSplit<T>(primary, residual);
     }
+
+    @Override
+    public double getRemainingParallelism() {
+      return Double.NaN;
+    }
   }
 
   private static class UnboundedReaderIterator<T>
@@ -660,5 +665,10 @@ public Reader.Progress getProgress() {
     public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest request) {
       return null;
     }
+
+    @Override
+    public double getRemainingParallelism() {
+      return Double.NaN;
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
index f4831bc9975c4..019cb5513c8f1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
@@ -21,6 +21,7 @@
 
 import com.google.cloud.dataflow.sdk.util.common.worker.ShufflePosition;
 import com.google.common.base.Preconditions;
+import com.google.common.primitives.Bytes;
 import com.google.common.primitives.UnsignedBytes;
 
 import java.util.Arrays;
@@ -69,6 +70,15 @@ public String encodeBase64() {
     return encodeBase64URLSafeString(position);
   }
 
+  /**
+   * Returns the {@link ByteArrayShufflePosition} that immediately follows this one, i.e. there
+   * are no possible {@link ByteArrayShufflePosition ByteArrayShufflePositions} between this and
+   * its successor.
+   */
+  public ByteArrayShufflePosition immediateSuccessor() {
+    return new ByteArrayShufflePosition(Bytes.concat(position, new byte[] {0}));
+  }
+
   @Override
   public boolean equals(Object o) {
     if (this == o) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index c06c995b10e08..854c89b1c944f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -251,6 +251,19 @@ protected WindowedValue<KV<K, Reiterable<V>>> nextImpl() throws IOException {
           KV.<K, Reiterable<V>>of(key, new ValuesIterable(group.values)));
     }
 
+    @Override
+    public double getRemainingParallelism() {
+      // Return 1 iff the stop position <= the lexicographic successor to the current position.
+      ByteArrayShufflePosition stopPosition = rangeTracker.getStopPosition();
+      if (stopPosition != null
+          && lastGroupStart != null
+          && stopPosition.compareTo(lastGroupStart.immediateSuccessor()) <= 0) {
+        return 1;
+      } else {
+        return Double.POSITIVE_INFINITY;
+      }
+    }
+
     /**
      * Returns the position before the next {@code KV<K, Reiterable<V>>} to be returned by the
      * {@link GroupingShuffleReaderIterator}. Returns null if the
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
index aeb0cf30586a1..d6ca259e6279f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
@@ -80,6 +80,11 @@ public ReaderIterator<T> iterator() throws IOException {
     return new InMemoryReaderIterator();
   }
 
+  @Override
+  public double getTotalParallelism() {
+    return this.endIndex - this.startIndex;
+  }
+
   /**
    * A ReaderIterator that yields an in-memory list of elements.
    */
@@ -123,6 +128,11 @@ public Progress getProgress() {
       return cloudProgressToReaderProgress(progress);
     }
 
+    @Override
+    public double getRemainingParallelism() {
+      return tracker.getStopPosition() - nextIndex;
+    }
+
     @Override
     public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
       checkNotNull(splitRequest);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index a758e8b6f7abe..243f3f40ad5f1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -117,6 +117,7 @@ public static MapTaskExecutor create(
           context,
           operations,
           counterPrefix,
+          mapTask.getSystemName(),
           counters.getAddCounterMutator(),
           stateSampler));
     }
@@ -135,6 +136,7 @@ static Operation createOperation(
       DataflowExecutionContext executionContext,
       List<Operation> priorOperations,
       String counterPrefix,
+      String systemStageName,
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler)
           throws Exception {
@@ -145,6 +147,7 @@ static Operation createOperation(
         executionContext,
         priorOperations,
         counterPrefix,
+        systemStageName,
         addCounterMutator,
         stateSampler);
   }
@@ -160,6 +163,7 @@ static Operation createOperation(
       DataflowExecutionContext executionContext,
       List<Operation> priorOperations,
       String counterPrefix,
+      String systemStageName,
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler)
           throws Exception {
@@ -171,6 +175,7 @@ static Operation createOperation(
           executionContext,
           priorOperations,
           counterPrefix,
+          systemStageName,
           addCounterMutator,
           stateSampler);
     } else if (instruction.getWrite() != null) {
@@ -197,6 +202,7 @@ static ReadOperation createReadOperation(
       DataflowExecutionContext executionContext,
       @SuppressWarnings("unused") List<Operation> priorOperations,
       String counterPrefix,
+      String systemStageName,
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler)
       throws Exception {
@@ -209,7 +215,7 @@ static ReadOperation createReadOperation(
     OutputReceiver[] receivers =
         createOutputReceivers(instruction, counterPrefix, addCounterMutator, stateSampler, 1);
 
-    return new ReadOperation(operationName, reader, receivers, counterPrefix,
+    return new ReadOperation(operationName, reader, receivers, counterPrefix, systemStageName,
         addCounterMutator, stateSampler);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index 7d1df49b10ada..71bce5981249a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -47,6 +47,12 @@ public class ReadOperation extends Operation {
   /** The total byte counter for all data read by this operation. */
   final Counter<Long> byteCount;
 
+  /** The counter for estimating total parallelism in this task. */
+  private final Counter<Double> totalParallelismCounter;
+
+  /** The counter for estimating remaining parallelism in this task. */
+  private final Counter<Double> remainingParallelismCounter;
+
   /**
    * The Reader's iterator this operation reads from, created by start().
    * Guarded by sourceIteratorLock.
@@ -75,7 +81,7 @@ public class ReadOperation extends Operation {
 
 
   public ReadOperation(String operationName, Reader<?> reader, OutputReceiver[] receivers,
-      String counterPrefix, CounterSet.AddCounterMutator addCounterMutator,
+      String counterPrefix, String systemStageName, CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler) {
     super(operationName, receivers, counterPrefix, addCounterMutator,
           stateSampler, reader.getStateSamplerStateKind());
@@ -84,13 +90,19 @@ public ReadOperation(String operationName, Reader<?> reader, OutputReceiver[] re
         Counter.longs(bytesCounterName(counterPrefix, operationName), SUM));
     reader.addObserver(new ReaderObserver());
     reader.setStateSamplerAndOperationName(stateSampler, operationName);
+    this.totalParallelismCounter = addCounterMutator.addCounter(
+        Counter.doubles(totalParallelismCounterName(systemStageName), SUM));
+    // Set only when a task is started or split.
+    totalParallelismCounter.resetToValue(fixJsonDouble(reader.getTotalParallelism()));
+    this.remainingParallelismCounter = addCounterMutator.addCounter(
+        Counter.doubles(remainingParallelismCounterName(systemStageName), SUM));
   }
 
-  /** Invoked by tests. */
-  ReadOperation(Reader<?> reader, OutputReceiver outputReceiver, String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) {
-    this("ReadOperation", reader, new OutputReceiver[]{outputReceiver}, counterPrefix,
-        addCounterMutator, stateSampler);
+  static ReadOperation forTest(Reader<?> reader, OutputReceiver outputReceiver,
+      String counterPrefix, CounterSet.AddCounterMutator addCounterMutator,
+      StateSampler stateSampler) {
+    return new ReadOperation("ReadOperation", reader, new OutputReceiver[]{outputReceiver},
+        counterPrefix, "systemStageName", addCounterMutator, stateSampler);
   }
 
   /**
@@ -106,6 +118,14 @@ protected String bytesCounterName(String counterPrefix, String operationName) {
     return operationName + "-ByteCount";
   }
 
+  protected String totalParallelismCounterName(String systemStageName) {
+    return "dataflow_total_parallelism-" + systemStageName;
+  }
+
+  protected String remainingParallelismCounterName(String systemStageName) {
+    return "dataflow_remaining_parallelism-" + systemStageName;
+  }
+
   public Reader<?> getReader() {
     return reader;
   }
@@ -192,6 +212,8 @@ public void finish() throws Exception {
   private void setProgressFromIterator() {
     try {
       progress.set(readerIterator.getProgress());
+      remainingParallelismCounter.resetToValue(
+          fixJsonDouble(readerIterator.getRemainingParallelism()));
     } catch (UnsupportedOperationException e) {
       // Ignore: same semantics as null.
     } catch (Exception e) {
@@ -230,6 +252,7 @@ public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest
       if (result != null) {
         // After a successful split, the stop position changed and progress has to be recomputed.
         setProgressFromIterator();
+        totalParallelismCounter.resetToValue(fixJsonDouble(reader.getTotalParallelism()));
       }
       return result;
     }
@@ -253,4 +276,22 @@ public void update(Observable obs, Object obj) {
       byteCount.addValue((long) obj);
     }
   }
+
+  /**
+   * JSON doesn't correctly handle non-finite values.  For the reader parallelism counters,
+   * large enough values should be sufficient.
+   *
+   * <p>TODO: Remove this hack once we move to gRPC.
+   */
+  private static double fixJsonDouble(double x) {
+    if (Double.isNaN(x)) {
+      return -1e200;
+    } else if (x == Double.POSITIVE_INFINITY) {
+      return 1e100;
+    } else if (x == Double.NEGATIVE_INFINITY) {
+      return -1e100;
+    } else {
+      return x;
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
index ef87b3e61804f..4484529159d8c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
@@ -74,6 +74,13 @@ public void setStateSamplerAndOperationName(StateSampler stateSampler,
    * {@link #requestDynamicSplit} is implemented.
    */
   public interface ReaderIterator<T> extends AutoCloseable {
+
+    /**
+     * A value to return from {@link #getRemainingParallelism()} when remaining parallelism
+     * can be interpolated from {@link Reader#getTotalParallelism} and the progress fraction.
+     */
+    public static final double REMAINING_PARALLELISM_FROM_PROGRESS_FRACTION = Double.NaN;
+
     /**
      * Returns whether the source has any more elements. Some sources,
      * such as GroupingShuffleReader, invalidate the return value of
@@ -140,6 +147,27 @@ public interface ReaderIterator<T> extends AutoCloseable {
      *   and residual part.
      */
     public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest request);
+
+    /**
+     * Returns an estimate of the degree of parallelism that could be achieved by
+     * {@link #requestDynamicSplit()} taking into account what has already been consumed.
+     * E.g., if the reader has just returned the last record in the source, the remaining
+     * parallelism is 1 because it can't be split up any further. If the reader just
+     * returned the 3rd record in a perfectly parallelizable source with 5 records,
+     * the remaining parallelism is 3 because it could be processed in parallel by this
+     * worker and two others.  If the reader does not support dynamic splitting,
+     * the remaining parallelism is always 1.
+     *
+     * <p>An exact number isn't required, mostly we want to be able to distinguish
+     * between many, few, or one. Should not block.
+     *
+     * <p>An implementor may return {@link REMAINING_PARALLELISM_FROM_PROGRESS_FRACTION},
+     * in which case the remaining parallelism will be interpolated from
+     * {@link Reader#getTotalParallelism} using the current progress fraction.
+     * Infinity may also be returned (indicating no known bound on parallelism),
+     * as may fractional estimates (in which case the sum over all shards is taken).
+     */
+    public double getRemainingParallelism();
   }
 
   /** An abstract base class for ReaderIterator implementations. */
@@ -163,6 +191,11 @@ public Progress getProgress() {
     public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
       return null;
     }
+
+    @Override
+    public double getRemainingParallelism() {
+      return REMAINING_PARALLELISM_FROM_PROGRESS_FRACTION;
+    }
   }
 
   /**
@@ -238,6 +271,24 @@ public boolean supportsRestart() {
     return false;
   }
 
+  /**
+   * Returns an estimate of the parallelism of the source being read by this reader, i.e.
+   * the number of bundles it could be split into.  An exact number isn't required, mostly
+   * we want to be able to distinguish between many, few, or one.  Used to cap the parallelism
+   * Dataflow will allocate for this part of the pipeline.  Should not block.
+   *
+   * <p>Defaults to positive infinity, indicating unbounded parallelism.  An unsplittable source
+   * would have parallelism exactly 1.
+   *
+   * <p>See also {@link ReaderIterator#getRemainingParallelism} which may be implemented to
+   * complement this method if a better-than-linear estimate of remaining parallelism can be
+   * obtained (e.g. it is easy to detect when one is at the last record.
+   */
+  public double getTotalParallelism() {
+    // By default, don't assume any limitations.
+    return Double.POSITIVE_INFINITY;
+  }
+
   /**
    * The default state kind of all the states reported in this reader.
    * Defaults to {@link StateKind#USER}.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
index b6dcf546a4d80..46fd969cd4cd8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
@@ -166,6 +166,11 @@ public Progress getProgress() {
       public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest request) {
         throw new UnsupportedOperationException();
       }
+
+      @Override
+      public double getRemainingParallelism() {
+        return Double.NaN;
+      }
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
index 35d602e6198b7..2b50c3876454b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
@@ -342,12 +342,20 @@ public void testBytesReadEmptyShuffleData() throws Exception {
     runTestBytesReadCounter(NO_KVS, ValuesToRead.READ_ALL_VALUES, 0L);
   }
 
+  static byte[] fabricatePosition(int shard) throws Exception {
+    return fabricatePosition(shard, (Integer) null);
+  }
+
   static byte[] fabricatePosition(int shard, @Nullable byte[] key) throws Exception {
+    return fabricatePosition(shard, key == null ? null : Arrays.hashCode(key));
+  }
+
+  static byte[] fabricatePosition(int shard, @Nullable Integer keyHash) throws Exception {
     ByteArrayOutputStream os = new ByteArrayOutputStream();
     DataOutputStream dos = new DataOutputStream(os);
     dos.writeInt(shard);
-    if (key != null) {
-      dos.writeInt(Arrays.hashCode(key));
+    if (keyHash != null) {
+      dos.writeInt(keyHash);
     }
     return os.toByteArray();
   }
@@ -368,7 +376,7 @@ public void testReadFromShuffleDataAndFailToSplit() throws Exception {
     // Note that TestShuffleReader start/end positions are in the
     // space of keys not the positions (TODO: should probably always
     // use positions instead).
-    String stop = encodeBase64URLSafeString(fabricatePosition(kNumRecords, null));
+    String stop = encodeBase64URLSafeString(fabricatePosition(kNumRecords));
     GroupingShuffleReader<Integer, Integer> groupingShuffleReader = new GroupingShuffleReader<>(
         options, null, null, stop,
         WindowedValue.getFullCoder(
@@ -411,11 +419,76 @@ public void testReadFromShuffleDataAndFailToSplit() throws Exception {
     }
   }
 
+  @Test
+  public void testRemainingParallelism() throws Exception {
+    PipelineOptions options = PipelineOptionsFactory.create();
+    BatchModeExecutionContext context = BatchModeExecutionContext.fromOptions(options);
+    final int kFirstShard = 0;
+
+    TestShuffleReader shuffleReader = new TestShuffleReader();
+    final int kNumRecords = 5;
+    for (int i = 0; i < kNumRecords; ++i) {
+      byte[] key = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
+      ShuffleEntry entry = new ShuffleEntry(fabricatePosition(kFirstShard, i), key, null, key);
+      shuffleReader.addEntry(entry);
+    }
+
+    GroupingShuffleReader<Integer, Integer> groupingShuffleReader =
+        new GroupingShuffleReader<>(
+            options,
+            null,
+            null,
+            null,
+            WindowedValue.getFullCoder(
+                KvCoder.of(
+                    BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())),
+                IntervalWindow.getCoder()),
+            context,
+            null,
+            null);
+
+    try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
+            groupingShuffleReader.iterator(shuffleReader)) {
+
+      assertEquals(Double.POSITIVE_INFINITY, iter.getRemainingParallelism(), 0);
+
+      // The only way to set a stop *position* in tests is via a split. To do that,
+      // we must call hasNext() first.
+      assertTrue(iter.hasNext());
+      assertNotNull(
+          iter.requestDynamicSplit(
+              splitRequestAtPosition(
+                  makeShufflePosition(
+                      ByteArrayShufflePosition.of(fabricatePosition(kFirstShard, 2))
+                          .immediateSuccessor()
+                          .getPosition()))));
+      assertTrue(iter.hasNext());
+
+      assertEquals(Double.POSITIVE_INFINITY, iter.getRemainingParallelism(), 0);
+      iter.next();
+      assertEquals(Double.POSITIVE_INFINITY, iter.getRemainingParallelism(), 0);
+      assertTrue(iter.hasNext());
+      assertEquals(Double.POSITIVE_INFINITY, iter.getRemainingParallelism(), 0);
+      iter.next();
+      assertEquals(Double.POSITIVE_INFINITY, iter.getRemainingParallelism(), 0);
+      assertTrue(iter.hasNext());
+      assertEquals(1, iter.getRemainingParallelism(), 0);
+      iter.next();
+      assertEquals(1, iter.getRemainingParallelism(), 0);
+      assertFalse(iter.hasNext());
+      assertEquals(1, iter.getRemainingParallelism(), 0);
+    }
+  }
+
   private Position makeShufflePosition(int shard, byte[] key) throws Exception {
     return new Position().setShufflePosition(
         encodeBase64URLSafeString(fabricatePosition(shard, key)));
   }
 
+  private Position makeShufflePosition(byte[] position) throws Exception {
+    return new Position().setShufflePosition(encodeBase64URLSafeString(position));
+  }
+
   @Test
   public void testReadFromShuffleAndDynamicSplit() throws Exception {
     PipelineOptions options = PipelineOptionsFactory.create();
@@ -467,7 +540,7 @@ public void testReadFromShuffleAndDynamicSplit() throws Exception {
           iter.requestDynamicSplit(splitRequestAtPosition(makeShufflePosition(kSecondShard, null)));
       assertNotNull(dynamicSplitResult);
       assertEquals(
-          encodeBase64URLSafeString(fabricatePosition(kSecondShard, null)),
+          encodeBase64URLSafeString(fabricatePosition(kSecondShard)),
           positionFromSplitResult(dynamicSplitResult).getShufflePosition());
 
       while (iter.hasNext()) {
@@ -524,7 +597,7 @@ public void testGetApproximateProgress() throws Exception {
     final int kNumRecords = 10;
 
     for (int i = 0; i < kNumRecords; ++i) {
-      byte[] position = fabricatePosition(i, null);
+      byte[] position = fabricatePosition(i);
       byte[] keyByte = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
       positionsList.add(position);
       ShuffleEntry entry = new ShuffleEntry(position, keyByte, null, keyByte);
@@ -552,7 +625,7 @@ public void testGetApproximateProgress() throws Exception {
 
       // Cannot split since all input was consumed.
       Position proposedSplitPosition = new Position();
-      String stop = encodeBase64URLSafeString(fabricatePosition(0, null));
+      String stop = encodeBase64URLSafeString(fabricatePosition(0));
       proposedSplitPosition.setShufflePosition(stop);
       assertNull(
           readerIterator.requestDynamicSplit(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
index cd4471b0bfa76..6b561d0e2b9da 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
@@ -186,4 +186,22 @@ public void testDynamicSplit() throws Exception {
       assertEquals(4, iterator.tracker.getStopPosition().longValue());
     }
   }
+
+  @Test
+  public void testParallelism() throws Exception {
+    List<Integer> elements = Arrays.asList(33, 44, 55, 66, 77, 88);
+
+    Coder<Integer> coder = BigEndianIntegerCoder.of();
+    InMemoryReader<Integer> inMemoryReader =
+        new InMemoryReader<>(encodedElements(elements, coder), 1L, 4L, coder);
+    int count = 0;
+    for (Reader.ReaderIterator iterator = inMemoryReader.iterator();
+        iterator.hasNext();
+        iterator.next()) {
+      assertTrue(iterator.getRemainingParallelism() >= 1);
+      assertEquals(3 - count, iterator.getRemainingParallelism(), 0 /*tolerance*/);
+      count++;
+    }
+    assertEquals(count, inMemoryReader.getTotalParallelism(), 0 /*tolerance*/);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index e5c090f6ea8b5..36ba38df9ee65 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -117,6 +117,7 @@ public void testCreateMapTaskExecutor() throws Exception {
 
     MapTask mapTask = new MapTask();
     mapTask.setStageName("test");
+    mapTask.setSystemName("stageName");
     mapTask.setInstructions(instructions);
 
     CounterSet counterSet = new CounterSet();
@@ -157,6 +158,10 @@ public void testCreateMapTaskExecutor() throws Exception {
             Counter.longs(getObjectCounterName("read_output_name"), SUM).resetToValue(0L),
             Counter.longs(getMeanByteCounterName("read_output_name"), MEAN).resetMeanToValue(0, 0L),
             Counter.longs("Read-ByteCount", SUM).resetToValue(0L),
+            Counter.doubles(
+                "dataflow_total_parallelism-stageName", SUM).resetToValue(1e100),
+            Counter.doubles(
+                "dataflow_remaining_parallelism-stageName", SUM).resetToValue(0.0),
             Counter.longs("test-Read-start-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Read-process-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Read-finish-msecs", SUM).resetToValue(0L),
@@ -250,6 +255,7 @@ static ParallelInstruction createReadInstruction(
   public void testCreateReadOperation() throws Exception {
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
+    String systemStageName = "stageName";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
     Operation operation = MapTaskExecutorFactory.createOperation(
         PipelineOptionsFactory.create(),
@@ -258,6 +264,7 @@ public void testCreateReadOperation() throws Exception {
         BatchModeExecutionContext.fromOptions(options),
         Collections.<Operation>emptyList(),
         counterPrefix,
+        systemStageName,
         counterSet.getAddCounterMutator(),
         stateSampler);
     assertThat(operation, instanceOf(ReadOperation.class));
@@ -272,6 +279,10 @@ public void testCreateReadOperation() throws Exception {
         new CounterSet(
             Counter.longs("test-Read-start-msecs", SUM).resetToValue(0L),
             Counter.longs("Read-ByteCount", SUM).resetToValue(0L),
+            Counter.doubles(
+                "dataflow_total_parallelism-stageName", SUM).resetToValue(1e100),
+            Counter.doubles(
+                "dataflow_remaining_parallelism-stageName", SUM).resetToValue(0.0),
             Counter.longs("test-Read-finish-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Read-process-msecs", SUM),
             Counter.longs(getMeanByteCounterName("read_output_name"), MEAN).resetMeanToValue(0, 0L),
@@ -318,10 +329,11 @@ public void testCreateWriteOperation() throws Exception {
 
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
+    String systemStageName = "stageName";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
     Operation operation = MapTaskExecutorFactory.createOperation(options,
         instruction, BatchModeExecutionContext.fromOptions(options), priorOperations, counterPrefix,
-        counterSet.getAddCounterMutator(), stateSampler);
+        systemStageName, counterSet.getAddCounterMutator(), stateSampler);
     assertThat(operation, instanceOf(WriteOperation.class));
     WriteOperation writeOperation = (WriteOperation) operation;
 
@@ -406,6 +418,7 @@ public void testCreateParDoOperation() throws Exception {
     DataflowExecutionContext context = BatchModeExecutionContext.fromOptions(options);
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
+    String systemStageName = "stageName";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
     Operation operation = MapTaskExecutorFactory.createOperation(
         options,
@@ -414,6 +427,7 @@ public void testCreateParDoOperation() throws Exception {
         context,
         priorOperations,
         counterPrefix,
+        systemStageName,
         counterSet.getAddCounterMutator(),
         stateSampler);
     assertThat(operation, instanceOf(ParDoOperation.class));
@@ -475,6 +489,7 @@ public void testCreatePartialGroupByKeyOperation() throws Exception {
 
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
+    String systemStageName = "stageName";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
     Operation operation = MapTaskExecutorFactory.createOperation(
         options,
@@ -483,6 +498,7 @@ public void testCreatePartialGroupByKeyOperation() throws Exception {
         BatchModeExecutionContext.fromOptions(options),
         priorOperations,
         counterPrefix,
+        systemStageName,
         counterSet.getAddCounterMutator(),
         stateSampler);
     assertThat(operation, instanceOf(PartialGroupByKeyOperation.class));
@@ -541,6 +557,7 @@ public void testCreateFlattenOperation() throws Exception {
 
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
+    String systemStageName = "stageName";
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
     Operation operation = MapTaskExecutorFactory.createOperation(
         options,
@@ -549,6 +566,7 @@ public void testCreateFlattenOperation() throws Exception {
         BatchModeExecutionContext.fromOptions(options),
         priorOperations,
         counterPrefix,
+        systemStageName,
         counterSet.getAddCounterMutator(),
         stateSampler);
     assertThat(operation, instanceOf(FlattenOperation.class));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
index f72116d4ee77f..ba428df4ce431 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
@@ -96,7 +96,8 @@ static class TestReadOperation extends ReadOperation {
 
     TestReadOperation(OutputReceiver outputReceiver, String counterPrefix,
         AddCounterMutator addCounterMutator, StateSampler stateSampler) {
-      super(new TestReader(), outputReceiver, counterPrefix, addCounterMutator, stateSampler);
+      super("ReadOperation", new TestReader(), new OutputReceiver[]{outputReceiver},
+          counterPrefix, "systemStageName", addCounterMutator, stateSampler);
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
index e0dac4e85ed7b..b6eeb6a9cd849 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -94,7 +94,7 @@ public void testRunReadOperation() throws Exception {
     StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
     TestOutputReceiver receiver = new TestOutputReceiver(counterSet);
 
-    ReadOperation readOperation = new ReadOperation(
+    ReadOperation readOperation = ReadOperation.forTest(
         reader, receiver, counterPrefix, counterSet.getAddCounterMutator(), stateSampler);
 
     readOperation.start();
@@ -115,7 +115,7 @@ public void testGetProgress() throws Exception {
     MockReaderIterator iterator = new MockReaderIterator(0, 5);
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
-    final ReadOperation readOperation = new ReadOperation(new MockReader(iterator),
+    final ReadOperation readOperation = ReadOperation.forTest(new MockReader(iterator),
         new TestOutputReceiver("out", null, counterSet), counterPrefix,
         counterSet.getAddCounterMutator(),
         new StateSampler(counterPrefix, counterSet.getAddCounterMutator()));
@@ -139,7 +139,7 @@ public void testDynamicSplit() throws Exception {
     MockReaderIterator iterator = new MockReaderIterator(0, 10);
     CounterSet counterSet = new CounterSet();
     MockOutputReceiver receiver = new MockOutputReceiver();
-    ReadOperation readOperation = new ReadOperation(new MockReader(iterator), receiver, "test-",
+    ReadOperation readOperation = ReadOperation.forTest(new MockReader(iterator), receiver, "test-",
         counterSet.getAddCounterMutator(),
         new StateSampler("test-", counterSet.getAddCounterMutator()));
     // Update progress on every iteration of the read loop.
@@ -201,7 +201,7 @@ public void testDynamicSplitDoesNotBlock() throws Exception {
     MockReaderIterator iterator = new MockReaderIterator(0, 10);
     CounterSet counterSet = new CounterSet();
     MockOutputReceiver receiver = new MockOutputReceiver();
-    ReadOperation readOperation = new ReadOperation(new MockReader(iterator), receiver, "test-",
+    ReadOperation readOperation = ReadOperation.forTest(new MockReader(iterator), receiver, "test-",
         counterSet.getAddCounterMutator(),
         new StateSampler("test-", counterSet.getAddCounterMutator()));
 
@@ -228,7 +228,7 @@ public void testRaceBetweenCloseAndDynamicSplit() throws Exception {
     MockReaderIterator iterator = new MockReaderIterator(0, 10);
     CounterSet counterSet = new CounterSet();
     MockOutputReceiver receiver = new MockOutputReceiver();
-    final ReadOperation readOperation = new ReadOperation(
+    final ReadOperation readOperation = ReadOperation.forTest(
         new MockReader(iterator), receiver, "test-",
         counterSet.getAddCounterMutator(),
         new StateSampler("test-", counterSet.getAddCounterMutator()));

From 4c6dd0a2023b7f8f01ea4522f0c6c6d89953dc55 Mon Sep 17 00:00:00 2001
From: kak <kak@google.com>
Date: Thu, 22 Oct 2015 11:27:57 -0700
Subject: [PATCH 1105/1541] Switch from FluentIterable.of(E[]) to
 from(asList(E[]))

The former is being deprecated and removed.

----Release Notes----

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=106075119
---
 .../cloud/dataflow/sdk/util/common/ReflectHelpers.java      | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java
index f49d6f31f5b91..f87242f3ca2f4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java
@@ -15,6 +15,8 @@
  */
 package com.google.cloud.dataflow.sdk.util.common;
 
+import static java.util.Arrays.asList;
+
 import com.google.common.base.Function;
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
@@ -46,7 +48,7 @@ public class ReflectHelpers {
   public static final Function<Method, String> METHOD_FORMATTER = new Function<Method, String>() {
     @Override
     public String apply(Method input) {
-      String parameterTypes = FluentIterable.of(input.getParameterTypes())
+      String parameterTypes = FluentIterable.from(asList(input.getParameterTypes()))
           .transform(CLASS_SIMPLE_NAME)
           .join(COMMA_SEPARATOR);
       return String.format("%s(%s)",
@@ -137,7 +139,7 @@ private void formatParameterizedType(StringBuilder builder, ParameterizedType t)
       format(builder, t.getRawType());
       builder.append('<');
       COMMA_SEPARATOR.appendTo(builder,
-          FluentIterable.of(t.getActualTypeArguments())
+          FluentIterable.from(asList(t.getActualTypeArguments()))
           .transform(TYPE_SIMPLE_DESCRIPTION));
       builder.append('>');
     }

From c349644ff0f9a4fb4f5a788e32c63fddf0b661f5 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 26 Oct 2015 11:34:37 -0700
Subject: [PATCH 1106/1541] Use pass-through WindowFn during Reshuffle

This fixes a bug whereby a Reshuffle after a GroupByKey
would always fail if the input had a merging windowing
function.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=106319482
---
 .../transforms/windowing/FixedWindows.java    |  20 +-
 .../sdk/transforms/windowing/Sessions.java    |  15 ++
 .../transforms/windowing/SlidingWindows.java  |  26 ++-
 .../cloud/dataflow/sdk/util/Reshuffle.java    |  77 ++++++-
 .../dataflow/sdk/util/WindowingStrategy.java  |  24 ++
 .../dataflow/sdk/util/ReshuffleTest.java      | 208 ++++++++++++++++++
 6 files changed, 357 insertions(+), 13 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReshuffleTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
index 0f05d2f408501..12a0f1b9185ed 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
@@ -21,6 +21,8 @@
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 
+import java.util.Objects;
+
 /**
  * A {@link WindowFn} that windows values into fixed-size timestamp-based windows.
  *
@@ -86,9 +88,7 @@ public Coder<IntervalWindow> windowCoder() {
 
   @Override
   public boolean isCompatible(WindowFn<?, ?> other) {
-    return (other instanceof FixedWindows)
-        && (size.equals(((FixedWindows) other).size))
-        && (offset.equals(((FixedWindows) other).offset));
+    return this.equals(other);
   }
 
   public Duration getSize() {
@@ -99,4 +99,18 @@ public Duration getOffset() {
     return offset;
   }
 
+  @Override
+  public boolean equals(Object object) {
+    if (!(object instanceof FixedWindows)) {
+      return false;
+    }
+    FixedWindows other = (FixedWindows) object;
+    return getOffset().equals(other.getOffset())
+        && getSize().equals(other.getSize());
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(size, offset);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
index 02af5993060ab..c419f7e792eca 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
@@ -23,6 +23,7 @@
 
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Objects;
 
 /**
  * A {@link WindowFn} windowing values into sessions separated by {@link #gapDuration}-long
@@ -92,4 +93,18 @@ public Duration getGapDuration() {
   public Instant getOutputTime(Instant inputTimestamp, IntervalWindow window) {
     return inputTimestamp;
   }
+
+  @Override
+  public boolean equals(Object object) {
+    if (!(object instanceof Sessions)) {
+      return false;
+    }
+    Sessions other = (Sessions) object;
+    return getGapDuration().equals(other.getGapDuration());
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(gapDuration);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
index 7696849e89a06..3b4c2d71d7051 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
@@ -24,6 +24,7 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
+import java.util.Objects;
 
 /**
  * A {@link WindowFn} that windows values into possibly overlapping fixed-size
@@ -132,14 +133,7 @@ public IntervalWindow getSideInputWindow(final BoundedWindow window) {
 
   @Override
   public boolean isCompatible(WindowFn<?, ?> other) {
-    if (other instanceof SlidingWindows) {
-      SlidingWindows that = (SlidingWindows) other;
-      return period.equals(that.period)
-        && size.equals(that.size)
-        && offset.equals(that.offset);
-    } else {
-      return false;
-    }
+    return equals(other);
   }
 
   /**
@@ -188,4 +182,20 @@ public Instant getOutputTime(Instant inputTimestamp, IntervalWindow window) {
         ? inputTimestamp
         : startOfLastSegment.plus(1);
   }
+
+  @Override
+  public boolean equals(Object object) {
+    if (!(object instanceof SlidingWindows)) {
+      return false;
+    }
+    SlidingWindows other = (SlidingWindows) object;
+    return getOffset().equals(other.getOffset())
+        && getSize().equals(other.getSize())
+        && getPeriod().equals(other.getPeriod());
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(size, offset, period);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java
index 25f96cab15bfb..0ca8fa8a698b4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java
@@ -15,19 +15,28 @@
  */
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
 import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+import java.util.Collection;
 
 /**
- * A {@link PTransform} to reshuffle the elements based on their key.
+ * A {@link PTransform} that returns a {@link PCollection} equivalent to its input but operationally
+ * provides some of the side effects of a {@link GroupByKey}, in particular preventing fusion of
+ * the surrounding transforms, checkpointing and deduplication by id (see
+ * {@link ValueWithRecordId}).
  *
  * <p>Performs a {@link GroupByKey} so that the data is key-partitioned. Configures the
  * {@link WindowingStrategy} so that no data is dropped, but doesn't affect the need for
@@ -48,8 +57,12 @@ public static <K, V> Reshuffle<K, V> of() {
   @Override
   public PCollection<KV<K, V>> apply(PCollection<KV<K, V>> input) {
     WindowingStrategy<?, ?> originalStrategy = input.getWindowingStrategy();
+    // If the input has already had its windows merged, then the GBK that performed the merge
+    // will have set originalStrategy.getWindowFn() to InvalidWindows, causing the GBK contained
+    // here to fail. Instead, we install a valid WindowFn that leaves all windows unchanged.
     Window.Bound<KV<K, V>> rewindow = Window
-        .<KV<K, V>>triggering(new ReshuffleTrigger<>())
+        .<KV<K, V>>into(new PassThroughWindowFn<>(originalStrategy.getWindowFn()))
+        .triggering(new ReshuffleTrigger<>())
         .discardingFiredPanes();
     if (!originalStrategy.isAllowedLatenessSpecified()) {
       rewindow = rewindow.withAllowedLateness(
@@ -72,4 +85,64 @@ public void processElement(ProcessContext c) {
               }
             }));
   }
+
+  /**
+   * A {@link WindowFn} that leaves all associations between elements and windows unchanged.
+   *
+   * <p>In order to implement all the abstract methods of {@link WindowFn}, this requires the
+   * prior {@link WindowFn}, to which all auxiliary functionality is delegated.
+   */
+  private static class PassThroughWindowFn<T> extends NonMergingWindowFn<T, BoundedWindow> {
+
+    /** The WindowFn prior to this. Used for its windowCoder, etc. */
+    private final WindowFn<?, BoundedWindow> priorWindowFn;
+
+    public PassThroughWindowFn(WindowFn<?, ?> priorWindowFn) {
+      // Safe because it is only used privately here.
+      // At every point where a window is returned or accepted, it has been provided
+      // by priorWindowFn, so it is of the type expected.
+      @SuppressWarnings("unchecked")
+      WindowFn<?, BoundedWindow> internalWindowFn = (WindowFn<?, BoundedWindow>) priorWindowFn;
+      this.priorWindowFn = internalWindowFn;
+    }
+
+    @Override
+    public Collection<BoundedWindow> assignWindows(WindowFn<T, BoundedWindow>.AssignContext c)
+        throws Exception {
+      // The windows are provided by priorWindowFn, which also provides the coder for them
+      @SuppressWarnings("unchecked")
+      Collection<BoundedWindow> priorWindows = (Collection<BoundedWindow>) c.windows();
+      return priorWindows;
+    }
+
+    @Override
+    public boolean isCompatible(WindowFn<?, ?> other) {
+      throw new UnsupportedOperationException(
+          String.format("%s.isCompatible() should never be called."
+              + " It is a private implementation detail of Reshuffle."
+              + " This message indicates a bug in the Dataflow SDK.",
+              getClass().getCanonicalName()));
+    }
+
+    @Override
+    public Coder<BoundedWindow> windowCoder() {
+      // Safe because priorWindowFn provides the windows also.
+      // The Coder is _not_ actually a coder for an arbitrary BoundedWindow.
+      return priorWindowFn.windowCoder();
+    }
+
+    @Override
+    public BoundedWindow getSideInputWindow(BoundedWindow window) {
+      throw new UnsupportedOperationException(
+          String.format("%s.getSideInputWindow() should never be called."
+              + " It is a private implementation detail of Reshuffle."
+              + " This message indicates a bug in the Dataflow SDK.",
+              getClass().getCanonicalName()));
+    }
+
+    @Override
+    public Instant getOutputTime(Instant inputTimestamp, BoundedWindow window) {
+      return inputTimestamp;
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
index 432035e2ec12a..a452dc58a840f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
@@ -27,6 +27,7 @@
 import org.joda.time.Duration;
 
 import java.io.Serializable;
+import java.util.Objects;
 
 /**
  * A {@code WindowingStrategy} describes the windowing behavior for a specific collection of values.
@@ -195,4 +196,27 @@ public String toString() {
         trigger.toString(),
         mode.toString());
   }
+
+  @Override
+  public boolean equals(Object object) {
+    if (!(object instanceof WindowingStrategy)) {
+      return false;
+    }
+    WindowingStrategy<?, ?> other = (WindowingStrategy<?, ?>) object;
+    return
+        isTriggerSpecified() == other.isTriggerSpecified()
+        && isAllowedLatenessSpecified() == other.isAllowedLatenessSpecified()
+        && isModeSpecified() == other.isModeSpecified()
+        && getMode().equals(other.getMode())
+        && getAllowedLateness().equals(other.getAllowedLateness())
+        && getClosingBehavior().equals(other.getClosingBehavior())
+        && getTrigger().equals(other.getTrigger())
+        && getWindowFn().equals(other.getWindowFn());
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(triggerSpecified, allowedLatenessSpecified, modeSpecified,
+        windowFn, trigger, mode, allowedLateness, closingBehavior);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReshuffleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReshuffleTest.java
new file mode 100644
index 0000000000000..c2a4fc40fc52c
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReshuffleTest.java
@@ -0,0 +1,208 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.collect.ImmutableList;
+
+import org.joda.time.Duration;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.List;
+
+/**
+ * Tests for {@link Reshuffle}.
+ */
+@RunWith(JUnit4.class)
+public class ReshuffleTest {
+
+  private static final List<KV<String, Integer>> ARBITRARY_KVS = ImmutableList.of(
+        KV.of("k1", 3),
+        KV.of("k5", Integer.MAX_VALUE),
+        KV.of("k5", Integer.MIN_VALUE),
+        KV.of("k2", 66),
+        KV.of("k1", 4),
+        KV.of("k2", -33),
+        KV.of("k3", 0));
+
+  // TODO: test with more than one value per key
+  private static final List<KV<String, Integer>> GBK_TESTABLE_KVS = ImmutableList.of(
+        KV.of("k1", 3),
+        KV.of("k2", 4));
+
+  private static final List<KV<String, Iterable<Integer>>> GROUPED_TESTABLE_KVS = ImmutableList.of(
+        KV.of("k1", (Iterable<Integer>) ImmutableList.of(3)),
+        KV.of("k2", (Iterable<Integer>) ImmutableList.of(4)));
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testJustReshuffle() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> input = pipeline
+        .apply(Create.of(ARBITRARY_KVS)
+            .withCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())));
+
+    PCollection<KV<String, Integer>> output = input
+        .apply(Reshuffle.<String, Integer>of());
+
+    DataflowAssert.that(output).containsInAnyOrder(ARBITRARY_KVS);
+
+    assertEquals(
+        input.getWindowingStrategy(),
+        output.getWindowingStrategy());
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testReshuffleAfterSessionsAndGroupByKey() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<KV<String, Iterable<Integer>>> input = pipeline
+        .apply(Create.of(GBK_TESTABLE_KVS)
+            .withCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())))
+        .apply(Window.<KV<String, Integer>>into(
+            Sessions.withGapDuration(Duration.standardMinutes(10))))
+        .apply(GroupByKey.<String, Integer>create());
+
+    PCollection<KV<String, Iterable<Integer>>> output = input
+        .apply(Reshuffle.<String, Iterable<Integer>>of());
+
+    DataflowAssert.that(output).containsInAnyOrder(GROUPED_TESTABLE_KVS);
+
+    assertEquals(
+        input.getWindowingStrategy(),
+        output.getWindowingStrategy());
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testReshuffleAfterFixedWindowsAndGroupByKey() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<KV<String, Iterable<Integer>>> input = pipeline
+        .apply(Create.of(GBK_TESTABLE_KVS)
+            .withCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())))
+        .apply(Window.<KV<String, Integer>>into(
+            FixedWindows.of(Duration.standardMinutes(10L))))
+        .apply(GroupByKey.<String, Integer>create());
+
+    PCollection<KV<String, Iterable<Integer>>> output = input
+        .apply(Reshuffle.<String, Iterable<Integer>>of());
+
+    DataflowAssert.that(output).containsInAnyOrder(GROUPED_TESTABLE_KVS);
+
+    assertEquals(
+        input.getWindowingStrategy(),
+        output.getWindowingStrategy());
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testReshuffleAfterSlidingWindowsAndGroupByKey() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<KV<String, Iterable<Integer>>> input = pipeline
+        .apply(Create.of(GBK_TESTABLE_KVS)
+            .withCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())))
+        .apply(Window.<KV<String, Integer>>into(
+            FixedWindows.of(Duration.standardMinutes(10L))))
+        .apply(GroupByKey.<String, Integer>create());
+
+    PCollection<KV<String, Iterable<Integer>>> output = input
+        .apply(Reshuffle.<String, Iterable<Integer>>of());
+
+    DataflowAssert.that(output).containsInAnyOrder(GROUPED_TESTABLE_KVS);
+
+    assertEquals(
+        input.getWindowingStrategy(),
+        output.getWindowingStrategy());
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testReshuffleAfterFixedWindows() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> input = pipeline
+        .apply(Create.of(ARBITRARY_KVS)
+            .withCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())))
+        .apply(Window.<KV<String, Integer>>into(
+            FixedWindows.of(Duration.standardMinutes(10L))));
+
+    PCollection<KV<String, Integer>> output = input
+        .apply(Reshuffle.<String, Integer>of());
+
+    DataflowAssert.that(output).containsInAnyOrder(ARBITRARY_KVS);
+
+    assertEquals(
+        input.getWindowingStrategy(),
+        output.getWindowingStrategy());
+
+    pipeline.run();
+  }
+
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testReshuffleAfterSlidingWindows() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> input = pipeline
+        .apply(Create.of(ARBITRARY_KVS)
+            .withCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())))
+        .apply(Window.<KV<String, Integer>>into(
+            FixedWindows.of(Duration.standardMinutes(10L))));
+
+    PCollection<KV<String, Integer>> output = input
+        .apply(Reshuffle.<String, Integer>of());
+
+    DataflowAssert.that(output).containsInAnyOrder(ARBITRARY_KVS);
+
+    assertEquals(
+        input.getWindowingStrategy(),
+        output.getWindowingStrategy());
+
+    pipeline.run();
+  }
+}

From a1b6f85f8e9e1580f119acc9925c575f9f39e066 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Mon, 26 Oct 2015 12:13:40 -0700
Subject: [PATCH 1107/1541] Move CounterProvider off of BinaryCombine_Fns

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=106323690
---
 .../dataflow/sdk/transforms/Combine.java      | 24 ++++++++++++-------
 .../cloud/dataflow/sdk/transforms/Sum.java    | 10 +++++---
 .../dataflow/sdk/transforms/CombineTest.java  |  5 ----
 3 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 85811464417d8..ca2bf3c2d2e5d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -34,7 +34,7 @@
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.util.common.CounterProvider;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
@@ -677,8 +677,7 @@ public void verifyDeterministic() throws NonDeterministicException {
    * An abstract subclass of {@link CombineFn} for implementing combiners that are more
    * easily expressed as binary operations on ints.
    */
-  public abstract static class BinaryCombineIntegerFn extends
-      CombineFn<Integer, int[], Integer> implements CounterProvider<Integer> {
+  public abstract static class BinaryCombineIntegerFn extends CombineFn<Integer, int[], Integer> {
 
     /**
      * Applies the binary operation to the two operands, returning the result.
@@ -748,15 +747,17 @@ public Coder<Integer> getDefaultOutputCoder(CoderRegistry registry,
     private int[] wrap(int value) {
       return new int[] { value };
     }
+
+    public Counter<Integer> getCounter(String name) {
+      throw new UnsupportedOperationException("BinaryCombineDoubleFn does not support getCounter");
+    }
   }
 
   /**
    * An abstract subclass of {@link CombineFn} for implementing combiners that are more
    * easily expressed as binary operations on longs.
    */
-  public abstract static class BinaryCombineLongFn extends
-      CombineFn<Long, long[], Long> implements CounterProvider<Long> {
-
+  public abstract static class BinaryCombineLongFn extends CombineFn<Long, long[], Long> {
     /**
      * Applies the binary operation to the two operands, returning the result.
      */
@@ -824,14 +825,17 @@ public Coder<Long> getDefaultOutputCoder(CoderRegistry registry, Coder<Long> inp
     private long[] wrap(long value) {
       return new long[] { value };
     }
+
+    public Counter<Long> getCounter(String name) {
+      throw new UnsupportedOperationException("BinaryCombineDoubleFn does not support getCounter");
+    }
   }
 
   /**
    * An abstract subclass of {@link CombineFn} for implementing combiners that are more
    * easily expressed as binary operations on doubles.
    */
-  public abstract static class BinaryCombineDoubleFn extends
-      CombineFn<Double, double[], Double> implements CounterProvider<Double> {
+  public abstract static class BinaryCombineDoubleFn extends CombineFn<Double, double[], Double> {
 
     /**
      * Applies the binary operation to the two operands, returning the result.
@@ -900,6 +904,10 @@ public Coder<Double> getDefaultOutputCoder(CoderRegistry registry, Coder<Double>
     private double[] wrap(double value) {
       return new double[] { value };
     }
+
+    public Counter<Double> getCounter(String name) {
+      throw new UnsupportedOperationException("BinaryCombineDoubleFn does not support getCounter");
+    }
   }
 
   /////////////////////////////////////////////////////////////////////////////
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
index 82dcd4a6889c8..dbd5067c24154 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind;
+import com.google.cloud.dataflow.sdk.util.common.CounterProvider;
 
 /**
  * {@code PTransform}s for computing the sum of the elements in a
@@ -117,7 +118,8 @@ public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
    * {@code Iterable} of {@code Integer}s, useful as an argument to
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
-  public static class SumIntegerFn extends Combine.BinaryCombineIntegerFn {
+  public static class SumIntegerFn
+      extends Combine.BinaryCombineIntegerFn implements CounterProvider<Integer> {
     @Override
     public int apply(int a, int b) {
       return a + b;
@@ -139,7 +141,8 @@ public Counter<Integer> getCounter(String name) {
    * {@code Iterable} of {@code Long}s, useful as an argument to
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
-  public static class SumLongFn extends Combine.BinaryCombineLongFn {
+  public static class SumLongFn
+      extends Combine.BinaryCombineLongFn implements CounterProvider<Long> {
     @Override
     public long apply(long a, long b) {
       return a + b;
@@ -161,7 +164,8 @@ public Counter<Long> getCounter(String name) {
    * {@code Iterable} of {@code Double}s, useful as an argument to
    * {@link Combine#globally} or {@link Combine#perKey}.
    */
-  public static class SumDoubleFn extends Combine.BinaryCombineDoubleFn {
+  public static class SumDoubleFn
+      extends Combine.BinaryCombineDoubleFn implements CounterProvider<Double> {
     @Override
     public double apply(double a, double b) {
       return a + b;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 068565c68875f..987b97e9ae248 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -443,11 +443,6 @@ public int apply(int left, int right) {
     public int identity() {
       return 1;
     }
-
-    @Override
-    public Counter<Integer> getCounter(String name) {
-      throw new UnsupportedOperationException();
-    }
   }
 
   private static final class TestProdObj extends Combine.BinaryCombineFn<Integer> {

From 0d4aa4e1097ef96912eb31fcad780b41ffbc31b3 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 26 Oct 2015 14:06:43 -0700
Subject: [PATCH 1108/1541] Update pom.xml to the latest version of BigQuery
 API

And make it consistent across all pom.xml files

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=106334422
---
 examples/pom.xml                                                | 2 +-
 .../examples/src/main/resources/archetype-resources/pom.xml     | 2 +-
 sdk/pom.xml                                                     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index 78dc847010150..a968a70d3d1ba 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -398,7 +398,7 @@
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-bigquery</artifactId>
       <!-- If updating version, please update the javadoc offlineLink -->
-      <version>v2-rev187-1.19.1</version>
+      <version>v2-rev238-1.20.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.20.0 -->
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
index be13532efa16e..bed413eaa04ce 100644
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
+++ b/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
@@ -115,7 +115,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-bigquery</artifactId>
-      <version>v2-rev187-1.19.1</version>
+      <version>v2-rev238-1.20.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
diff --git a/sdk/pom.xml b/sdk/pom.xml
index d4749123dd4e5..7959bdacf8c63 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -450,7 +450,7 @@
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-bigquery</artifactId>
       <!-- If updating version, please update the javadoc offlineLink -->
-      <version>v2-rev198-1.20.0</version>
+      <version>v2-rev238-1.20.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.20.0 -->

From a192b2e08dcf4d5cd5b707013f5a72772532a89d Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Tue, 27 Oct 2015 14:58:41 -0700
Subject: [PATCH 1109/1541] Limited parallelism reporting for TextIO reader.

Compressed files are currently unsplittable, so they have
parallelism 1 per file.  Uncompressed files are assumed
splittable down to the individual byte.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=106438264
---
 .../sdk/runners/worker/FileBasedReader.java   | 31 +++++--
 .../sdk/runners/worker/TextReader.java        | 47 ++++++++--
 .../runners/worker/TextReaderFactoryTest.java |  2 +-
 .../sdk/runners/worker/TextReaderTest.java    | 89 +++++++++++++++++++
 4 files changed, 154 insertions(+), 15 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
index 2c0348bf9a91f..a1c349a4360dd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
@@ -53,7 +53,10 @@
  */
 public abstract class FileBasedReader<T> extends Reader<T> {
   protected static final int BUF_SIZE = 200;
-  protected final String filename;
+  protected final String filepattern;
+
+  @Nullable
+  private Collection<String> expandedFilepattern;
 
   @Nullable
   protected final Long startPosition;
@@ -65,9 +68,9 @@ public abstract class FileBasedReader<T> extends Reader<T> {
 
   private static final Logger LOG = LoggerFactory.getLogger(FileBasedReader.class);
 
-  protected FileBasedReader(String filename, @Nullable Long startPosition,
+  protected FileBasedReader(String filepattern, @Nullable Long startPosition,
       @Nullable Long endPosition, Coder<T> coder, boolean useDefaultBufferSize) {
-    this.filename = filename;
+    this.filepattern = filepattern;
     this.startPosition = startPosition;
     this.endPosition = endPosition;
     this.coder = coder;
@@ -95,19 +98,27 @@ protected abstract ReaderIterator<T> newReaderIteratorForRangeInFile(IOChannelFa
   protected abstract ReaderIterator<T> newReaderIteratorForFiles(
       IOChannelFactory factory, Collection<String> files) throws IOException;
 
+  protected Collection<String> expandedFilepattern() throws IOException {
+    if (expandedFilepattern == null) {
+      IOChannelFactory factory = IOChannelUtils.getFactory(filepattern);
+      expandedFilepattern = factory.match(filepattern);
+    }
+    return expandedFilepattern;
+  }
+
   @Override
   public ReaderIterator<T> iterator() throws IOException {
-    IOChannelFactory factory = IOChannelUtils.getFactory(filename);
-    Collection<String> inputs = factory.match(filename);
+    IOChannelFactory factory = IOChannelUtils.getFactory(filepattern);
+    Collection<String> inputs = expandedFilepattern();
     if (inputs.isEmpty()) {
-      throw new FileNotFoundException("No match for file pattern '" + filename + "'");
+      throw new FileNotFoundException("No match for file pattern '" + filepattern + "'");
     }
 
     if (startPosition != null || endPosition != null) {
       if (inputs.size() != 1) {
         throw new IllegalArgumentException(
             "Offset range specified: [" + startPosition + ", " + endPosition + "), so "
-            + "an exact filename was expected, but more than 1 file matched \"" + filename
+            + "an exact filename was expected, but more than 1 file matched \"" + filepattern
             + "\" (total " + inputs.size() + "): apparently a filepattern was given.");
       }
 
@@ -305,8 +316,12 @@ public FilenameBasedStreamFactory(String filename, TextIO.CompressionType compre
     }
 
     protected TextIO.CompressionType getCompressionTypeForAuto() {
+      return getCompressionTypeForAuto(filename);
+    }
+
+    protected static TextIO.CompressionType getCompressionTypeForAuto(String filepattern) {
       for (TextIO.CompressionType type : TextIO.CompressionType.values()) {
-        if (type.matches(filename) && type != TextIO.CompressionType.AUTO
+        if (type.matches(filepattern) && type != TextIO.CompressionType.AUTO
             && type != TextIO.CompressionType.UNCOMPRESSED) {
           return type;
         }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
index 1d321c3c33c0d..b7e9aaab3472d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
@@ -41,20 +41,55 @@ public class TextReader<T> extends FileBasedReader<T> {
   final boolean stripTrailingNewlines;
   final TextIO.CompressionType compressionType;
 
-  public TextReader(String filename, boolean stripTrailingNewlines, @Nullable Long startPosition,
+  public TextReader(String filepattern, boolean stripTrailingNewlines, @Nullable Long startPosition,
       @Nullable Long endPosition, Coder<T> coder, TextIO.CompressionType compressionType) {
-    this(filename, stripTrailingNewlines, startPosition, endPosition, coder, true,
+    this(filepattern, stripTrailingNewlines, startPosition, endPosition, coder, true,
         compressionType);
   }
 
-  protected TextReader(String filename, boolean stripTrailingNewlines, @Nullable Long startPosition,
-      @Nullable Long endPosition, Coder<T> coder, boolean useDefaultBufferSize,
-      TextIO.CompressionType compressionType) {
-    super(filename, startPosition, endPosition, coder, useDefaultBufferSize);
+  protected TextReader(String filepattern, boolean stripTrailingNewlines,
+      @Nullable Long startPosition, @Nullable Long endPosition, Coder<T> coder,
+      boolean useDefaultBufferSize, TextIO.CompressionType compressionType) {
+    super(filepattern, startPosition, endPosition, coder, useDefaultBufferSize);
     this.stripTrailingNewlines = stripTrailingNewlines;
     this.compressionType = compressionType;
   }
 
+  public double getTotalParallelism() {
+    try {
+      if (compressionType == TextIO.CompressionType.UNCOMPRESSED) {
+        // All files are splittable.
+        return getTotalParallelismSplittable();
+      } else if (compressionType == TextIO.CompressionType.AUTO) {
+        for (String file : expandedFilepattern()) {
+          if (FilenameBasedStreamFactory.getCompressionTypeForAuto(file)
+              == TextIO.CompressionType.UNCOMPRESSED) {
+            // At least one file is splittable.
+            return getTotalParallelismSplittable();
+          }
+        }
+        // All files were compressed.
+        return getTotalParallelismUnsplittable();
+      } else {
+        // No compressed formats support liquid sharding yet.
+        return getTotalParallelismUnsplittable();
+      }
+    } catch (IOException exn) {
+      throw new RuntimeException(exn);
+    }
+  }
+
+  private double getTotalParallelismSplittable() {
+    // Assume splittable at every byte.
+    return (endPosition == null ? Double.POSITIVE_INFINITY : endPosition)
+        - (startPosition == null ? 0 : startPosition);
+  }
+
+  private double getTotalParallelismUnsplittable() throws IOException {
+    // Total parallelism is the number of files matched by the filepattern.
+    return expandedFilepattern().size();
+  }
+
   @Override
   protected ReaderIterator<T> newReaderIteratorForRangeInFile(IOChannelFactory factory,
       String oneFile, long startPosition, @Nullable Long endPosition) throws IOException {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
index 9a6dda610bd35..f833149f8ca54 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
@@ -75,7 +75,7 @@ void runTestCreateTextReader(String filename, @Nullable Boolean stripTrailingNew
         null);
     Assert.assertThat(reader, new IsInstanceOf(TextReader.class));
     TextReader<?> textReader = (TextReader<?>) reader;
-    Assert.assertEquals(filename, textReader.filename);
+    Assert.assertEquals(filename, textReader.filepattern);
     Assert.assertEquals(
         stripTrailingNewlines == null ? true : stripTrailingNewlines,
         textReader.stripTrailingNewlines);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
index 60be604942976..aa44e1b98adf9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
@@ -765,6 +765,95 @@ public void testCompressionTypeFileGlob() throws IOException {
     }
   }
 
+  @Test
+  public void testParallelismEstimatesDeclaredNotCompressed() throws IOException {
+    File file =
+        createFileWithCompressionType(fileContent, "test.gz", CompressionType.UNCOMPRESSED);
+    TextReader<String> textReader =
+        new TextReader<>(
+            file.getPath(),
+            true /*stripTrailingNewlines*/,
+            null /*startPos*/,
+            null /*endPos*/,
+            new WholeLineVerifyingCoder(),
+            CompressionType.UNCOMPRESSED);
+    assertEquals(Double.POSITIVE_INFINITY, textReader.getTotalParallelism(), 0 /*tolerance*/);
+    file.delete();
+  }
+
+  @Test
+  public void testParallelismEstimatesDeclaredCompressed() throws IOException {
+    File file = createFileWithCompressionType(fileContent, "test.txt", CompressionType.GZIP);
+    TextReader<String> textReader =
+        new TextReader<>(
+            file.getPath(), true, null, null, new WholeLineVerifyingCoder(), CompressionType.GZIP);
+    assertEquals(1, textReader.getTotalParallelism(), 0 /*tolerance*/);
+    file.delete();
+  }
+
+  @Test
+  public void testParallelismEstimatesAutoNotCompressed() throws IOException {
+    File file =
+        createFileWithCompressionType(fileContent, "test.txt", CompressionType.UNCOMPRESSED);
+    TextReader<String> textReader =
+        new TextReader<>(
+            file.getPath(), true, null, null, new WholeLineVerifyingCoder(), CompressionType.AUTO);
+    assertEquals(Double.POSITIVE_INFINITY, textReader.getTotalParallelism(), 0 /*tolerance*/);
+    file.delete();
+  }
+
+  @Test
+  public void testParallelismEstimatesAutoCompressed() throws IOException {
+    File file = createFileWithCompressionType(fileContent, "test.gz", CompressionType.GZIP);
+    TextReader<String> textReader =
+        new TextReader<>(
+            file.getPath(), true, null, null, new WholeLineVerifyingCoder(), CompressionType.AUTO);
+    assertEquals(1, textReader.getTotalParallelism(), 0 /*tolerance*/);
+    file.delete();
+  }
+
+  @Test
+  public void testParallelismEstimatesPartialRead() throws IOException {
+    File file =
+        createFileWithCompressionType(fileContent, "test.txt", CompressionType.UNCOMPRESSED);
+    TextReader<String> textReader =
+        new TextReader<>(
+            file.getPath(),
+            true /*stripTrailingNewlines*/,
+            10L /*startPos*/,
+            17L /*endPos*/,
+            new WholeLineVerifyingCoder(),
+            CompressionType.AUTO);
+    assertEquals(7, textReader.getTotalParallelism(), 0 /*tolerance*/);
+    file.delete();
+  }
+
+  @Test
+  public void testParallelismEstimatesCompressedGlob() throws IOException {
+    File gzip = createFileWithCompressionType(fileContent, "test.gz", CompressionType.GZIP);
+    File bzip = createFileWithCompressionType(fileContent, "test.bz2", CompressionType.BZIP2);
+    String pattern = new File(tmpFolder.getRoot(), "*").getPath();
+    TextReader<String> textReader =
+        new TextReader<>(
+            pattern, true, null, null, new WholeLineVerifyingCoder(), CompressionType.AUTO);
+    assertEquals(2, textReader.getTotalParallelism(), 0 /*tolerance*/);
+    gzip.delete();
+    bzip.delete();
+  }
+
+  @Test
+  public void testParallelismEstimatesMixedGlob() throws IOException {
+    File gzip = createFileWithCompressionType(fileContent, "test.gz", CompressionType.GZIP);
+    File txt = createFileWithCompressionType(fileContent, "test.txt", CompressionType.UNCOMPRESSED);
+    String pattern = new File(tmpFolder.getRoot(), "*").getPath();
+    TextReader<String> textReader =
+        new TextReader<>(
+            pattern, true, null, null, new WholeLineVerifyingCoder(), CompressionType.AUTO);
+    assertEquals(Double.POSITIVE_INFINITY, textReader.getTotalParallelism(), 0 /*tolerance*/);
+    gzip.delete();
+    txt.delete();
+  }
+
   @Test
   public void testErrorOnFileNotFound() throws Exception {
     expectedException.expect(FileNotFoundException.class);

From f707e9c7eadf390a47af41e0aa5f73ee166d7544 Mon Sep 17 00:00:00 2001
From: samuelw <samuelw@google.com>
Date: Tue, 27 Oct 2015 20:02:20 -0700
Subject: [PATCH 1110/1541] Optimize StreamingDataflowWorker progress updates

StreamingDataflowWorker was updating progress after every element
read which is unnecessary since it doesn't use the progress.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=106460645
---
 .../runners/worker/StreamingDataflowWorker.java   |  2 +-
 .../sdk/util/common/worker/ReadOperation.java     | 15 ++++++++++-----
 .../sdk/util/common/worker/ReadOperationTest.java |  3 +--
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index b938880bfc8d9..8458f3d3fb80b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -493,7 +493,7 @@ private void process(
         ReadOperation readOperation = worker.getReadOperation();
         // Disable progress updates since its results are unused for streaming
         // and involves starting a thread.
-        readOperation.setProgressUpdatePeriodMs(0);
+        readOperation.setProgressUpdatePeriodMs(ReadOperation.DONT_UPDATE_PERIODICALLY);
         Preconditions.checkState(
             worker.supportsRestart(), "Streaming runner requires all operations support restart.");
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index 71bce5981249a..380914fc3afa1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -105,12 +105,16 @@ static ReadOperation forTest(Reader<?> reader, OutputReceiver outputReceiver,
         counterPrefix, "systemStageName", addCounterMutator, stateSampler);
   }
 
+  public static final long DONT_UPDATE_PERIODICALLY = -1;
+  public static final long UPDATE_ON_EACH_ITERATION = 0;
+
   /**
-   * Controls the frequency at which progress is updated. A value of zero means
-   * "update progress on each iteration". A value of less than zero means never
-   * update progress. Ignored after starting.
+   * Controls the frequency at which progress is updated.  The given value must
+   * be positive or one of the special values of DONT_UPDATE_PERIODICALLY or
+   * UPDATE_ON_EACH_ITERATION.
    */
   public void setProgressUpdatePeriodMs(long millis) {
+    assert millis > 0 || millis == DONT_UPDATE_PERIODICALLY || millis == UPDATE_ON_EACH_ITERATION;
     progressUpdatePeriodMs = millis;
   }
 
@@ -186,14 +190,15 @@ public void run() {
           }
           value = readerIterator.next();
 
-          if (isProgressUpdateRequested.getAndSet(false) || progressUpdatePeriodMs == 0) {
+          if (isProgressUpdateRequested.getAndSet(false) ||
+              progressUpdatePeriodMs == UPDATE_ON_EACH_ITERATION) {
             setProgressFromIterator();
           }
           receiver.process(value);
         }
         setProgressFromIterator();
       } finally {
-        if (progressUpdatePeriodMs != 0) {
+        if (updateRequester != null) {
           updateRequester.interrupt();
           updateRequester.join();
         }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
index b6eeb6a9cd849..99c296517970f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -142,8 +142,7 @@ public void testDynamicSplit() throws Exception {
     ReadOperation readOperation = ReadOperation.forTest(new MockReader(iterator), receiver, "test-",
         counterSet.getAddCounterMutator(),
         new StateSampler("test-", counterSet.getAddCounterMutator()));
-    // Update progress on every iteration of the read loop.
-    readOperation.setProgressUpdatePeriodMs(0);
+    readOperation.setProgressUpdatePeriodMs(ReadOperation.UPDATE_ON_EACH_ITERATION);
 
     // An unstarted ReadOperation refuses split requests.
     assertNull(

From 198a00ebde74ec55779b997f1e961e66101c4093 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 28 Oct 2015 14:34:11 -0700
Subject: [PATCH 1111/1541] Remove Eclipse Starter Project

This has been supplanted by the Maven Starter Archetype and Cloud
Dataflow Plugin for Eclipse.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=106534970
---
 README.md                                     |  8 +-
 eclipse/README.md                             | 74 -------------------
 eclipse/starter/.classpath                    | 20 -----
 eclipse/starter/.project                      | 23 ------
 eclipse/starter/.settings/LOCAL.launch        | 15 ----
 eclipse/starter/.settings/SERVICE.launch      | 16 ----
 eclipse/starter/pom.xml                       | 43 -----------
 .../dataflow/starter/StarterPipeline.java     | 69 -----------------
 8 files changed, 4 insertions(+), 264 deletions(-)
 delete mode 100644 eclipse/README.md
 delete mode 100644 eclipse/starter/.classpath
 delete mode 100644 eclipse/starter/.project
 delete mode 100644 eclipse/starter/.settings/LOCAL.launch
 delete mode 100644 eclipse/starter/.settings/SERVICE.launch
 delete mode 100644 eclipse/starter/pom.xml
 delete mode 100644 eclipse/starter/src/main/java/com/google/cloud/dataflow/starter/StarterPipeline.java

diff --git a/README.md b/README.md
index 5da046ec6b4d1..d5345a38c16cb 100644
--- a/README.md
+++ b/README.md
@@ -77,8 +77,6 @@ module provides a set of basic Java APIs to program against.
 * The [`examples`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples)
 module provides a few samples to get started. We recommend starting with the
 `WordCount` example.
-* The [`eclipse`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/eclipse)
-directory contains the starter project for the Eclipse environment.
 * The [`contrib`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/contrib)
 directory hosts community-contributed Dataflow modules.
 
@@ -103,8 +101,10 @@ You can speed up the build and install process by using the following options:
   locally.
 
 If you are using [Eclipse](https://eclipse.org/) integrated development
-environment (IDE), please additionally review our
-[Eclipse integration instructions](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/eclipse/README.md).
+environment (IDE), the
+[Cloud Dataflow Plugin for Eclipse](https://cloud.google.com/dataflow/getting-started-eclipse)
+provides tools to create and execute Dataflow pipelines locally and on the
+Dataflow Service.
 
 After building and installing, you can execute the `WordCount` and other
 example pipelines by following the instructions in this
diff --git a/eclipse/README.md b/eclipse/README.md
deleted file mode 100644
index ba2b4ae5ce75a..0000000000000
--- a/eclipse/README.md
+++ /dev/null
@@ -1,74 +0,0 @@
-# Eclipse integration
-
-Google Cloud Dataflow SDK for Java supports the [Eclipse](https://eclipse.org/)
-integrated development environment (IDE) for the development of both user
-pipelines and the SDK itself. This is in addition to other supported development
-environments, such as [Apache Maven](https://maven.apache.org/).
-
-## Requirements
-
-In addition to Eclipse, you need to install the
-[M2Eclipse plugin](http://eclipse.org/m2e/) prior to importing projects.
-
-## Development of user pipelines
-
-We provide the [Eclipse starter project](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/tree/master/eclipse/starter)
-for getting started with Cloud Dataflow in Eclipse for the development of user
-pipelines and general usage of the Cloud Dataflow SDK for Java.
-
-Start by cloning this repository or downloading its contents to your local
-machine. Now, in the Eclipse IDE, choose `File` menu and then select `Import`.
-In the `Import` wizard, choose `Existing Projects into Workspace` inside the
-`General` group.
-
-In the next window, set `Select root directory` to point to the location with
-the contents of this repository. `Projects` list should automatically populate
-with `google-cloud-dataflow-starter` project. Make sure that project is
-selected and choose `Finish` to complete the import wizard.
-
-You can now run the starter pipeline on your local machine. From the `Run` menu,
-select `Run`. Choose `LOCAL` run configuration. When the execution finishes,
-among other output, the console should contain text `HELLO WORLD`.
-
-You can also run the starter pipeline on the Google Cloud Dataflow Service using
-managed resources in the Google Cloud Platform. Start by following the general
-Cloud Dataflow [Getting Started](https://cloud.google.com/dataflow/getting-started)
-instructions. You should have a Google Cloud Platform project that has a Cloud
-Dataflow API enabled, a Google Cloud Storage bucket that will serve as a
-staging location, and installed and authenticated Google Cloud SDK. Now, from
-the `Run` menu, select `Run configurations`. Choose `SERVICE` run configuration
-inside the `Java Application` group. In the arguments tab, populate values for
-`--project` and `--stagingLocation` arguments. Click `Run` to start the program.
-When the execution finishes, among other output, the console should contain
-`Submitted job: <job_id>` and `Job finished with status DONE` statements.
-
-At this point, you should be ready to start making changes to
-`StarterPipeline.java` and developing your own pipeline.
-
-## Development of the SDK
-
-You can work on the development of the Cloud Dataflow SDK itself from Eclipse.
-
-Start by cloning this repository or downloading its contents to your local
-machine. Now, in the Eclipse IDE, choose `File` menu and then select `Import`.
-In the `Import` wizard, choose `Existing Maven Projects` inside the `Maven`
-group. If this import source is not available, you may not have installed the
-M2Eclipse plugin properly.
-
-In the next window, set `Root Directory` to point to the location with the
-contents of this repository. `Projects` list should automatically populate with
-several projects including: `/pom.xml`, `sdk/pom.xml` and `examples/pom.xml`.
-Make sure all projects are selected and choose `Finish` to complete the import
-wizard.
-
-In the `Package Explorer` you can now select the `src/test/java` package group
-in one of the projects. From the `Run` menu, select `Run`. Choose `JUnit Test`
-run configuration. This will execute all unit tests of the particular project
-locally.
-
-At this point, you should be ready to start making changes to the Cloud Dataflow
-SDK for Java. Please consider sharing your improvements with the rest of the
-Dataflow community by posting them as pull requests in our
-[GitHub repository](https://github.com/GoogleCloudPlatform/DataflowJavaSDK).
-
-Good luck!
diff --git a/eclipse/starter/.classpath b/eclipse/starter/.classpath
deleted file mode 100644
index 9f9ff59805559..0000000000000
--- a/eclipse/starter/.classpath
+++ /dev/null
@@ -1,20 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<classpath>
-  <classpathentry kind="src" output="target/classes" path="src/main/java">
-    <attributes>
-      <attribute name="optional" value="true"/>
-      <attribute name="maven.pomderived" value="true"/>
-    </attributes>
-  </classpathentry>
-  <classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
-    <attributes>
-      <attribute name="maven.pomderived" value="true"/>
-    </attributes>
-  </classpathentry>
-  <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER">
-    <attributes>
-      <attribute name="maven.pomderived" value="true"/>
-    </attributes>
-  </classpathentry>
-  <classpathentry kind="output" path="target/classes"/>
-</classpath>
diff --git a/eclipse/starter/.project b/eclipse/starter/.project
deleted file mode 100644
index 64c4186fcb661..0000000000000
--- a/eclipse/starter/.project
+++ /dev/null
@@ -1,23 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<projectDescription>
-  <name>google-cloud-dataflow-starter</name>
-  <comment></comment>
-  <projects>
-  </projects>
-  <buildSpec>
-    <buildCommand>
-      <name>org.eclipse.jdt.core.javabuilder</name>
-      <arguments>
-      </arguments>
-    </buildCommand>
-    <buildCommand>
-      <name>org.eclipse.m2e.core.maven2Builder</name>
-      <arguments>
-      </arguments>
-    </buildCommand>
-  </buildSpec>
-  <natures>
-    <nature>org.eclipse.jdt.core.javanature</nature>
-    <nature>org.eclipse.m2e.core.maven2Nature</nature>
-  </natures>
-</projectDescription>
diff --git a/eclipse/starter/.settings/LOCAL.launch b/eclipse/starter/.settings/LOCAL.launch
deleted file mode 100644
index 74145d0378c9e..0000000000000
--- a/eclipse/starter/.settings/LOCAL.launch
+++ /dev/null
@@ -1,15 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-
-<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication">
-  <listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
-    <listEntry value="/google-cloud-dataflow-starter/src/main/java/com/google/cloud/dataflow/starter/StarterPipeline.java"/>
-  </listAttribute>
-  <listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
-    <listEntry value="1"/>
-  </listAttribute>
-  <booleanAttribute key="org.eclipse.jdt.launching.ATTR_USE_START_ON_FIRST_THREAD" value="true"/>
-  <stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/>
-  <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="com.google.cloud.dataflow.starter.StarterPipeline"/>
-  <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="google-cloud-dataflow-starter"/>
-  <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
-</launchConfiguration>
diff --git a/eclipse/starter/.settings/SERVICE.launch b/eclipse/starter/.settings/SERVICE.launch
deleted file mode 100644
index 4dce6b7bb0333..0000000000000
--- a/eclipse/starter/.settings/SERVICE.launch
+++ /dev/null
@@ -1,16 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-
-<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication">
-  <listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
-    <listEntry value="/google-cloud-dataflow-starter/src/main/java/com/google/cloud/dataflow/starter/StarterPipeline.java"/>
-  </listAttribute>
-  <listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
-    <listEntry value="1"/>
-  </listAttribute>
-  <booleanAttribute key="org.eclipse.jdt.launching.ATTR_USE_START_ON_FIRST_THREAD" value="true"/>
-  <stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/>
-  <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="com.google.cloud.dataflow.starter.StarterPipeline"/>
-  <stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="--runner=BlockingDataflowPipelineRunner&#10;--project=PUT_YOUR_GOOGLE_CLOUD_PROJECT_HERE&#10;--stagingLocation=gs://PUT_YOUR_GOOGLE_CLOUD_STORAGE_BUCKET_HERE/"/>
-  <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="google-cloud-dataflow-starter"/>
-  <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
-</launchConfiguration>
diff --git a/eclipse/starter/pom.xml b/eclipse/starter/pom.xml
deleted file mode 100644
index 3c78477e10783..0000000000000
--- a/eclipse/starter/pom.xml
+++ /dev/null
@@ -1,43 +0,0 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-
-  <groupId>com.google.cloud.dataflow</groupId>
-  <artifactId>google-cloud-dataflow-starter</artifactId>
-  <version>0.0.1-SNAPSHOT</version>
-
-  <build>
-   <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-compiler-plugin</artifactId>
-        <version>3.3</version>
-        <configuration>
-          <source>7</source>
-          <target>7</target>
-        </configuration>
-      </plugin>
-    </plugins>
-  </build>
-
-  <dependencies>
-    <dependency>
-      <groupId>com.google.cloud.dataflow</groupId>
-      <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
-      <version>LATEST</version>
-    </dependency>
-
-    <!-- slf4j API frontend binding with JUL backend -->
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-api</artifactId>
-      <version>1.7.7</version>
-    </dependency>
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-jdk14</artifactId>
-      <version>1.7.7</version>
-    </dependency>
-  </dependencies>
-</project>
diff --git a/eclipse/starter/src/main/java/com/google/cloud/dataflow/starter/StarterPipeline.java b/eclipse/starter/src/main/java/com/google/cloud/dataflow/starter/StarterPipeline.java
deleted file mode 100644
index f13dc3cda4ebf..0000000000000
--- a/eclipse/starter/src/main/java/com/google/cloud/dataflow/starter/StarterPipeline.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.starter;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * A starter example for writing Google Cloud Dataflow programs.
- *
- * <p>The example takes two strings, converts them to their upper-case
- * representation and logs them.
- *
- * <p>To run this starter example locally using DirectPipelineRunner, just
- * execute it without any additional parameters from your favorite development
- * environment. In Eclipse, this corresponds to the existing 'LOCAL' run
- * configuration.
- *
- * <p>To run this starter example using managed resource in Google Cloud
- * Platform, you should specify the following command-line options:
- *   --project=<YOUR_PROJECT_ID>
- *   --stagingLocation=<STAGING_LOCATION_IN_CLOUD_STORAGE>
- *   --runner=BlockingDataflowPipelineRunner
- * In Eclipse, you can just modify the existing 'SERVICE' run configuration.
- */
-public class StarterPipeline {
-  private static final Logger LOG = LoggerFactory.getLogger(StarterPipeline.class);
-
-  public static void main(String[] args) {
-    Pipeline p = Pipeline.create(
-        PipelineOptionsFactory.fromArgs(args).withValidation().create());
-
-    p.apply(Create.of("Hello", "World"))
-    .apply(ParDo.of(new DoFn<String, String>() {
-      @Override
-      public void processElement(ProcessContext c) {
-        c.output(c.element().toUpperCase());
-      }
-    }))
-    .apply(ParDo.of(new DoFn<String, Void>() {
-      @Override
-      public void processElement(ProcessContext c)  {
-        LOG.info(c.element());
-      }
-    }));
-
-    p.run();
-  }
-}

From f5fd89e459fcda0c91a4bdbed1d88916171c0538 Mon Sep 17 00:00:00 2001
From: Dan Halperin <dhalperi@google.com>
Date: Wed, 28 Oct 2015 17:14:29 -0700
Subject: [PATCH 1112/1541] Enable Travis container-based infrastructure

See: http://docs.travis-ci.com/user/workers/container-based-infrastructure/

"""
Jobs running on container-based infrastructure:
1. Start up faster
2. Allow the use of caches for public repositories
3. disallow the use of sudo, setuid, and setgid
"""
---
 .travis.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 2f06887e5daf7..b12caf64d4342 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,7 @@
 language: java
 
+sudo: false
+
 notifications:
   email:
     recipients:

From 11acfd74259fb6c88e9e22a09be1500dfe6e7a26 Mon Sep 17 00:00:00 2001
From: Daniel Halperin <daniel@halper.in>
Date: Mon, 2 Nov 2015 23:54:57 -0800
Subject: [PATCH 1113/1541] hadoop contrib: make it compile against latest

- Update version to recommended [1.0.0,2.0.0)
- SourceTestUtils moved packages
- Removed test-jar from pom.xml
- README.md: syntax highlighting
- Minor documentation fixups

Now, `mvn verify` passes when run in the `contrib/hadoop` folder.
---
 contrib/hadoop/AUTHORS.md                     |  3 ++-
 contrib/hadoop/README.md                      | 20 +++++++++++--------
 contrib/hadoop/pom.xml                        | 18 ++++++-----------
 .../contrib/hadoop/HadoopFileSourceTest.java  |  6 ++++--
 .../contrib/hadoop/WritableCoderTest.java     |  1 +
 5 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/contrib/hadoop/AUTHORS.md b/contrib/hadoop/AUTHORS.md
index 05b3cdd26f7a7..6effdb917d197 100644
--- a/contrib/hadoop/AUTHORS.md
+++ b/contrib/hadoop/AUTHORS.md
@@ -1,6 +1,7 @@
-# Authors of hadoop
+# Authors of 'hadoop' module
 
 The following is the official list of authors for copyright purposes of this community-contributed module.
 
     Cloudera
     Tom White, tom [at] cloudera [dot] com
+    Google Inc.
\ No newline at end of file
diff --git a/contrib/hadoop/README.md b/contrib/hadoop/README.md
index 9c9bf83cdc196..450bd8e5b5847 100644
--- a/contrib/hadoop/README.md
+++ b/contrib/hadoop/README.md
@@ -1,8 +1,8 @@
-Hadoop
-======
+Hadoop module
+=============
 
 This library provides Dataflow sources and sinks to make it possible to read and write
-Apache Hadoop file formats from Dataflow programs.
+Apache Hadoop file formats from Dataflow pipelines.
 
 Currently, only the read path is implemented. A `HadoopFileSource` allows any Hadoop
 `FileInputFormat` to be read as a `PCollection`.
@@ -10,12 +10,16 @@ Currently, only the read path is implemented. A `HadoopFileSource` allows any Ha
 A `HadoopFileSource` can be read from using the `com.google.cloud.dataflow.sdk.io.Read`
 transform. For example:
 
-    HadoopFileSource<K, V> source = HadoopFileSource.from(path, MyInputFormat.class,
-      MyKey.class, MyValue.class);
-    PCollection<KV<MyKey, MyValue>> records = Read.from(mySource);
+```java
+HadoopFileSource<K, V> source = HadoopFileSource.from(path, MyInputFormat.class,
+  MyKey.class, MyValue.class);
+PCollection<KV<MyKey, MyValue>> records = Read.from(mySource);
+```
 
 Alternatively, the `readFrom` method is a convenience method that returns a read
 transform. For example:
 
-    PCollection<KV<MyKey, MyValue>> records = HadoopFileSource.readFrom(path,
-      MyInputFormat.class, MyKey.class, MyValue.class);
+```java
+PCollection<KV<MyKey, MyValue>> records = HadoopFileSource.readFrom(path,
+  MyInputFormat.class, MyKey.class, MyValue.class);
+```
diff --git a/contrib/hadoop/pom.xml b/contrib/hadoop/pom.xml
index e6565092bb1f3..9323e9568824f 100644
--- a/contrib/hadoop/pom.xml
+++ b/contrib/hadoop/pom.xml
@@ -36,7 +36,7 @@
 
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-    <google-cloud-dataflow-version>manual_build</google-cloud-dataflow-version>
+    <google-cloud-dataflow-version>[1.2.0,2.0.0)</google-cloud-dataflow-version>
   </properties>
 
   <build>
@@ -114,11 +114,11 @@
           <offlineLinks>
             <offlineLink>
               <url>https://cloud.google.com/dataflow/java-sdk/JavaDoc/</url>
-              <location>${basedir}/../javadoc/dataflow-sdk-docs</location>
+              <location>${basedir}/../../javadoc/dataflow-sdk-docs</location>
             </offlineLink>
             <offlineLink>
               <url>http://docs.guava-libraries.googlecode.com/git-history/release18/javadoc/</url>
-              <location>${basedir}/../javadoc/guava-docs</location>
+              <location>${basedir}/../../javadoc/guava-docs</location>
             </offlineLink>
           </offlineLinks>
         </configuration>
@@ -141,6 +141,9 @@
       <version>${google-cloud-dataflow-version}</version>
     </dependency>
 
+	<!-- @tomwhite: Hadoop doesn't have great RPC client compatibility between one version and
+	another so it's common to mark the Hadoop dependency as provided and have users specify the
+	version they need in their project. -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-client</artifactId>
@@ -149,7 +152,6 @@
     </dependency>
 
     <!-- test dependencies -->
-
     <dependency>
       <groupId>org.hamcrest</groupId>
       <artifactId>hamcrest-all</artifactId>
@@ -163,13 +165,5 @@
       <version>4.11</version>
       <scope>test</scope>
     </dependency>
-
-    <dependency>
-      <groupId>com.google.cloud.dataflow</groupId>
-      <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
-      <version>${google-cloud-dataflow-version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
   </dependencies>
 </project>
diff --git a/contrib/hadoop/src/test/java/com/google/cloud/dataflow/contrib/hadoop/HadoopFileSourceTest.java b/contrib/hadoop/src/test/java/com/google/cloud/dataflow/contrib/hadoop/HadoopFileSourceTest.java
index 515176528d78a..cef3c08348523 100644
--- a/contrib/hadoop/src/test/java/com/google/cloud/dataflow/contrib/hadoop/HadoopFileSourceTest.java
+++ b/contrib/hadoop/src/test/java/com/google/cloud/dataflow/contrib/hadoop/HadoopFileSourceTest.java
@@ -16,7 +16,7 @@
 
 package com.google.cloud.dataflow.contrib.hadoop;
 
-import static com.google.cloud.dataflow.sdk.io.SourceTestUtils.readFromSource;
+import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.readFromSource;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
@@ -25,10 +25,11 @@
 
 import com.google.cloud.dataflow.sdk.io.BoundedSource;
 import com.google.cloud.dataflow.sdk.io.Source;
-import com.google.cloud.dataflow.sdk.io.SourceTestUtils;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.testing.SourceTestUtils;
 import com.google.cloud.dataflow.sdk.values.KV;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
@@ -39,6 +40,7 @@
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
+
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
diff --git a/contrib/hadoop/src/test/java/com/google/cloud/dataflow/contrib/hadoop/WritableCoderTest.java b/contrib/hadoop/src/test/java/com/google/cloud/dataflow/contrib/hadoop/WritableCoderTest.java
index d8d4c4b9e42e6..8eeb5e5167ad9 100644
--- a/contrib/hadoop/src/test/java/com/google/cloud/dataflow/contrib/hadoop/WritableCoderTest.java
+++ b/contrib/hadoop/src/test/java/com/google/cloud/dataflow/contrib/hadoop/WritableCoderTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.contrib.hadoop;
 
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+
 import org.apache.hadoop.io.IntWritable;
 import org.junit.Test;
 

From 833e535c485cd9c98247b6a5aa44b58a86272d62 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Wed, 28 Oct 2015 18:28:25 -0700
Subject: [PATCH 1114/1541] Adjust the bounds for published parallelism
 counters.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=106555057
---
 .../sdk/util/common/worker/ReadOperation.java | 44 +++++++++++++------
 .../worker/MapTaskExecutorFactoryTest.java    | 22 +++++-----
 2 files changed, 41 insertions(+), 25 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index 380914fc3afa1..127a88427daf7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -20,6 +20,7 @@
 
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 
 import org.slf4j.Logger;
@@ -41,6 +42,18 @@ public class ReadOperation extends Operation {
   private static final Logger LOG = LoggerFactory.getLogger(ReadOperation.class);
   private static final long DEFAULT_PROGRESS_UPDATE_PERIOD_MS = TimeUnit.SECONDS.toMillis(1);
 
+  /**
+   * For the reader parallelism counters, large enough values should be sufficient, and there
+   * are issues with arbitrarily large values.
+   *
+   * Specifically, When reporting parallelism as a part of a sum, we want to cap it at a value that
+   * won't impose an artifical constraint on the services view of available parallelism, but small
+   * enough that that adding and subtracting this value for every bundle will not overwhelm values
+   * as small as 1.0.
+   */
+  @VisibleForTesting
+  public static final double LARGE_PARALLELISM_BOUND = 1e7;
+
   /** The Reader this operation reads from. */
   public final Reader<?> reader;
 
@@ -93,7 +106,7 @@ public ReadOperation(String operationName, Reader<?> reader, OutputReceiver[] re
     this.totalParallelismCounter = addCounterMutator.addCounter(
         Counter.doubles(totalParallelismCounterName(systemStageName), SUM));
     // Set only when a task is started or split.
-    totalParallelismCounter.resetToValue(fixJsonDouble(reader.getTotalParallelism()));
+    totalParallelismCounter.resetToValue(boundParallelism(reader.getTotalParallelism()));
     this.remainingParallelismCounter = addCounterMutator.addCounter(
         Counter.doubles(remainingParallelismCounterName(systemStageName), SUM));
   }
@@ -218,7 +231,7 @@ private void setProgressFromIterator() {
     try {
       progress.set(readerIterator.getProgress());
       remainingParallelismCounter.resetToValue(
-          fixJsonDouble(readerIterator.getRemainingParallelism()));
+          boundParallelism(readerIterator.getRemainingParallelism()));
     } catch (UnsupportedOperationException e) {
       // Ignore: same semantics as null.
     } catch (Exception e) {
@@ -257,7 +270,7 @@ public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest
       if (result != null) {
         // After a successful split, the stop position changed and progress has to be recomputed.
         setProgressFromIterator();
-        totalParallelismCounter.resetToValue(fixJsonDouble(reader.getTotalParallelism()));
+        totalParallelismCounter.resetToValue(boundParallelism(reader.getTotalParallelism()));
       }
       return result;
     }
@@ -283,18 +296,23 @@ public void update(Observable obs, Object obj) {
   }
 
   /**
-   * JSON doesn't correctly handle non-finite values.  For the reader parallelism counters,
-   * large enough values should be sufficient.
+   * JSON doesn't correctly handle non-finite values, and we want to bound how large each
+   * term in the total sum is.  See {@link #LARGE_PARALLELISM_BOUND}.
    *
-   * <p>TODO: Remove this hack once we move to gRPC.
+   * <p>TODO: Remove this hack once we move to gRPC or report this value in a more structured
+   * format.
    */
-  private static double fixJsonDouble(double x) {
-    if (Double.isNaN(x)) {
-      return -1e200;
-    } else if (x == Double.POSITIVE_INFINITY) {
-      return 1e100;
-    } else if (x == Double.NEGATIVE_INFINITY) {
-      return -1e100;
+  private static double boundParallelism(double x) {
+    if (Double.isNaN(x) || x < 1) {
+      if (x < 1) {
+        LOG.warn("Invalid parallelism value: " + x);
+      }
+      // Irrational; sums won't come out to an integral value. This is to better avoid
+      // accidental coincidences which would imply the remaining parallelism is zero when it's not.
+      // Also, negative so that it's recognized as "invalid."
+      return -LARGE_PARALLELISM_BOUND * Math.sqrt(2);
+    } else if (x > LARGE_PARALLELISM_BOUND) {
+      return LARGE_PARALLELISM_BOUND;
     } else {
       return x;
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index 36ba38df9ee65..8778d7b7842a5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -158,10 +158,9 @@ public void testCreateMapTaskExecutor() throws Exception {
             Counter.longs(getObjectCounterName("read_output_name"), SUM).resetToValue(0L),
             Counter.longs(getMeanByteCounterName("read_output_name"), MEAN).resetMeanToValue(0, 0L),
             Counter.longs("Read-ByteCount", SUM).resetToValue(0L),
-            Counter.doubles(
-                "dataflow_total_parallelism-stageName", SUM).resetToValue(1e100),
-            Counter.doubles(
-                "dataflow_remaining_parallelism-stageName", SUM).resetToValue(0.0),
+            Counter.doubles("dataflow_total_parallelism-stageName", SUM)
+                .resetToValue(ReadOperation.LARGE_PARALLELISM_BOUND),
+            Counter.doubles("dataflow_remaining_parallelism-stageName", SUM).resetToValue(0.0),
             Counter.longs("test-Read-start-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Read-process-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Read-finish-msecs", SUM).resetToValue(0L),
@@ -173,15 +172,15 @@ public void testCreateMapTaskExecutor() throws Exception {
             Counter.longs("test-DoFn1-finish-msecs", SUM).resetToValue(0L),
             Counter.longs(getElementCounterName("DoFnWithContext_output"), SUM).resetToValue(0L),
             Counter.longs(getObjectCounterName("DoFnWithContext_output"), SUM).resetToValue(0L),
-            Counter.longs(
-                getMeanByteCounterName("DoFnWithContext_output"), MEAN).resetMeanToValue(0, 0L),
+            Counter.longs(getMeanByteCounterName("DoFnWithContext_output"), MEAN)
+                .resetMeanToValue(0, 0L),
             Counter.longs("test-DoFnWithContext-start-msecs", SUM).resetToValue(0L),
             Counter.longs("test-DoFnWithContext-process-msecs", SUM).resetToValue(0L),
             Counter.longs("test-DoFnWithContext-finish-msecs", SUM).resetToValue(0L),
             Counter.longs(getElementCounterName("flatten_output_name"), SUM).resetToValue(0L),
             Counter.longs(getObjectCounterName("flatten_output_name"), SUM).resetToValue(0L),
-            Counter.longs(
-                getMeanByteCounterName("flatten_output_name"), MEAN).resetMeanToValue(0, 0L),
+            Counter.longs(getMeanByteCounterName("flatten_output_name"), MEAN)
+                .resetMeanToValue(0, 0L),
             Counter.longs("test-Flatten-start-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Flatten-process-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Flatten-finish-msecs", SUM).resetToValue(0L),
@@ -279,10 +278,9 @@ public void testCreateReadOperation() throws Exception {
         new CounterSet(
             Counter.longs("test-Read-start-msecs", SUM).resetToValue(0L),
             Counter.longs("Read-ByteCount", SUM).resetToValue(0L),
-            Counter.doubles(
-                "dataflow_total_parallelism-stageName", SUM).resetToValue(1e100),
-            Counter.doubles(
-                "dataflow_remaining_parallelism-stageName", SUM).resetToValue(0.0),
+            Counter.doubles("dataflow_total_parallelism-stageName", SUM)
+                .resetToValue(ReadOperation.LARGE_PARALLELISM_BOUND),
+            Counter.doubles("dataflow_remaining_parallelism-stageName", SUM).resetToValue(0.0),
             Counter.longs("test-Read-finish-msecs", SUM).resetToValue(0L),
             Counter.longs("test-Read-process-msecs", SUM),
             Counter.longs(getMeanByteCounterName("read_output_name"), MEAN).resetMeanToValue(0, 0L),

From ad2e79359487988bdc67bd56223557e1de3218bb Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Thu, 29 Oct 2015 16:23:18 -0700
Subject: [PATCH 1115/1541] Add dependency-reduced POM to .gitignore

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=106642697
---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index f6bd2af381db8..52aee3fa88a13 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,6 @@ target/
 *.ipr
 *.iws
 
+# The build process generates the dependency-reduced POM, but it shouldn't be
+# committed.
+dependency-reduced-pom.xml

From eda21abd27f801c8c0af6a30351f6127a6c57bb5 Mon Sep 17 00:00:00 2001
From: zhouyunqing <zhouyunqing@google.com>
Date: Fri, 30 Oct 2015 14:38:44 -0700
Subject: [PATCH 1116/1541] Wait until the first sampling event happens to
 reduce flakyness

Also increased the sampling period.
----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=106725534
---
 .../sdk/util/common/worker/StateSamplerTest.java  | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
index 3d16930750aa3..90e911a9ab1be 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
@@ -29,6 +29,7 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.util.concurrent.Semaphore;
 import java.util.concurrent.atomic.AtomicLong;
 
 /**
@@ -36,6 +37,7 @@
  */
 @RunWith(JUnit4.class)
 public class StateSamplerTest {
+
   public static long getCounterLongValue(CounterSet counters, String name) {
     @SuppressWarnings("unchecked")
     Counter<Long> counter = (Counter<Long>) counters.getExistingCounter(name);
@@ -135,9 +137,10 @@ public void nonScopedTest() throws InterruptedException {
 
   private void noSamplingAfterCloseTestOnce() throws Exception {
     CounterSet counters = new CounterSet();
-    long periodMs = 50;
+    long periodMs = 200;
 
     final AtomicLong lastSampledTimeStamp = new AtomicLong();
+    final Semaphore sampleHappened = new Semaphore(0);
     try (StateSampler stateSampler = new StateSampler("test-",
         counters.getAddCounterMutator(), periodMs)) {
       stateSampler.setState("test", StateKind.USER);
@@ -145,20 +148,20 @@ private void noSamplingAfterCloseTestOnce() throws Exception {
         @Override
         public void run(int state, StateKind kind, long elapsedMs) {
           lastSampledTimeStamp.set(System.currentTimeMillis());
+          sampleHappened.release();
         }
       });
-      Thread.sleep(3 * periodMs);
+      sampleHappened.acquire();
     }
-    long cancelledTimeStamp = System.currentTimeMillis();
+    long samplerStoppedTimeStamp = System.currentTimeMillis();
     Thread.sleep(2 * periodMs);
-    assertThat(lastSampledTimeStamp.get(), Matchers.greaterThan(0L));
-    assertThat(lastSampledTimeStamp.get(), Matchers.lessThanOrEqualTo(cancelledTimeStamp));
+    assertThat(lastSampledTimeStamp.get(), Matchers.lessThanOrEqualTo(samplerStoppedTimeStamp));
   }
 
   @Test
   public void noSamplingAfterCloseTest() throws Exception {
     // Run it multiple times to detect flakyness.
-    for (int i = 0; i < 50; ++i) {
+    for (int i = 0; i < 10; ++i) {
       noSamplingAfterCloseTestOnce();
     }
   }

From 4e21185845b45af467cbd3bfbb0574eca080095f Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Fri, 30 Oct 2015 14:44:58 -0700
Subject: [PATCH 1117/1541] Update windmill proto library version to 0.4.151029

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=106726041
---
 sdk/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 7959bdacf8c63..cd7cf56f44722 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -430,7 +430,7 @@
     <dependency>
       <groupId>com.google.cloud.dataflow</groupId>
       <artifactId>google-cloud-dataflow-java-proto-library-all</artifactId>
-      <version>0.4.150721</version>
+      <version>0.4.151029</version>
     </dependency>
 
     <dependency>

From 66ac33e3c6a0adff3c0bb30e8e8e19c775a3b34c Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Fri, 30 Oct 2015 15:02:21 -0700
Subject: [PATCH 1118/1541] Replace DATA_PARALLEL default job type

----Release Notes----
Set default job type to JAVA_BATCH_AUTOSCALING, which corresponds to a worker configuration where workers are capable of autoscaling, but will not do so unless the system is further configured to enable this.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=106727574
---
 .../cloud/dataflow/sdk/runners/DataflowPipelineRunner.java   | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 9505489631699..e26b3cb4d5220 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -350,8 +350,9 @@ public DataflowPipelineJob run(Pipeline pipeline) {
     Map<String, Object> environmentVersion = new HashMap<>();
     environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_MAJOR_KEY, ENVIRONMENT_MAJOR_VERSION);
     newJob.getEnvironment().setVersion(environmentVersion);
-    // Default jobType is DATA_PARALLEL, which is for java batch.
-    String jobType = "DATA_PARALLEL";
+    // Default jobType is JAVA_BATCH_AUTOSCALING: A Java job with workers that the job can
+    // autoscale if specified.
+    String jobType = "JAVA_BATCH_AUTOSCALING";
 
     if (options.isStreaming()) {
       jobType = "STREAMING";

From 8b0aa8542f090d9c78c08f2ab275b82b662834c2 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 19 Aug 2015 09:41:06 -0700
Subject: [PATCH 1119/1541] Adjust ApiSurfaceTest to exclude more extraneous
 classes

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=106850104
---
 .../java/com/google/cloud/dataflow/sdk/util/ApiSurface.java     | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
index a5e581489acb8..2456c0e0947d4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
@@ -546,6 +546,8 @@ private boolean exposed(int modifiers) {
   public static ApiSurface getSdkApiSurface() throws IOException {
     return ApiSurface.ofPackage("com.google.cloud.dataflow")
         .pruningPattern("com[.]google[.]cloud[.]dataflow.*Test")
+        .pruningPattern("com[.]google[.]cloud[.]dataflow.*Benchmark")
+        .pruningPrefix("com.google.cloud.dataflow.integration")
         .pruningPrefix("java")
         .pruningPrefix("com.google.api")
         .pruningPrefix("com.google.protobuf")

From ece6b9a523e893c45f5e9ee9fe08cbed2b7dee72 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Mon, 2 Nov 2015 14:59:45 -0800
Subject: [PATCH 1120/1541] Add Test for Empty ParDo with Side Outputs

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=106883913
---
 .../dataflow/sdk/transforms/ParDoTest.java    | 93 +++++++++++++++++++
 1 file changed, 93 insertions(+)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index d8d2d66f57405..50afbf993418e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -153,6 +153,11 @@ private void outputToAllWithSideInputs(ProcessContext c, String value) {
     }
   }
 
+  static class TestNoOutputDoFn extends DoFn<Integer, String> {
+    @Override
+    public void processElement(DoFn<Integer, String>.ProcessContext c) throws Exception {}
+  }
+
   static class TestDoFnWithContext extends DoFnWithContext<Integer, String> {
     enum State { UNSTARTED, STARTED, PROCESSING, FINISHED }
     State state = State.UNSTARTED;
@@ -372,6 +377,23 @@ public void testParDoEmpty() {
     pipeline.run();
   }
 
+  @Test
+  @Category(RunnableOnService.class)
+  public void testParDoEmptyOutputs() {
+
+    Pipeline pipeline = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList();
+
+    PCollection<String> output = pipeline
+        .apply(Create.of(inputs).withCoder(VarIntCoder.of()))
+        .apply("TestDoFn", ParDo.of(new TestNoOutputDoFn()));
+
+    DataflowAssert.that(output).empty();
+
+    pipeline.run();
+  }
+
   @Test
   @Category(RunnableOnService.class)
   public void testParDoWithSideOutputs() {
@@ -385,6 +407,49 @@ public void testParDoWithSideOutputs() {
     TupleTag<String> sideOutputTag3 = new TupleTag<String>("side3"){};
     TupleTag<String> sideOutputTagUnwritten = new TupleTag<String>("sideUnwritten"){};
 
+    PCollectionTuple outputs = pipeline
+        .apply(Create.of(inputs))
+        .apply(ParDo
+               .of(new TestDoFn(
+                   Arrays.<PCollectionView<Integer>>asList(),
+                   Arrays.asList(sideOutputTag1, sideOutputTag2, sideOutputTag3)))
+               .withOutputTags(
+                   mainOutputTag,
+                   TupleTagList.of(sideOutputTag3)
+                       .and(sideOutputTag1)
+                       .and(sideOutputTagUnwritten)
+                       .and(sideOutputTag2)));
+
+    DataflowAssert.that(outputs.get(mainOutputTag))
+        .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs));
+
+    DataflowAssert.that(outputs.get(sideOutputTag1))
+        .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs)
+                   .fromSideOutput(sideOutputTag1));
+    DataflowAssert.that(outputs.get(sideOutputTag2))
+        .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs)
+                   .fromSideOutput(sideOutputTag2));
+    DataflowAssert.that(outputs.get(sideOutputTag3))
+        .satisfies(ParDoTest.HasExpectedOutput.forInput(inputs)
+                   .fromSideOutput(sideOutputTag3));
+    DataflowAssert.that(outputs.get(sideOutputTagUnwritten)).empty();
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testParDoEmptyWithSideOutputs() {
+    Pipeline pipeline = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList();
+
+    TupleTag<String> mainOutputTag = new TupleTag<String>("main"){};
+    TupleTag<String> sideOutputTag1 = new TupleTag<String>("side1"){};
+    TupleTag<String> sideOutputTag2 = new TupleTag<String>("side2"){};
+    TupleTag<String> sideOutputTag3 = new TupleTag<String>("side3"){};
+    TupleTag<String> sideOutputTagUnwritten = new TupleTag<String>("sideUnwritten"){};
+
     PCollectionTuple outputs = pipeline
         .apply(Create.of(inputs))
         .apply(ParDo
@@ -413,6 +478,34 @@ public void testParDoWithSideOutputs() {
     pipeline.run();
   }
 
+  @Test
+  @Category(RunnableOnService.class)
+  public void testParDoWithEmptySideOutputs() {
+    Pipeline pipeline = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList();
+
+    TupleTag<String> mainOutputTag = new TupleTag<String>("main"){};
+    TupleTag<String> sideOutputTag1 = new TupleTag<String>("side1"){};
+    TupleTag<String> sideOutputTag2 = new TupleTag<String>("side2"){};
+
+    PCollectionTuple outputs = pipeline
+        .apply(Create.of(inputs))
+        .apply(ParDo
+               .of(new TestNoOutputDoFn())
+               .withOutputTags(
+                   mainOutputTag,
+                   TupleTagList.of(sideOutputTag1).and(sideOutputTag2)));
+
+    DataflowAssert.that(outputs.get(mainOutputTag)).empty();
+
+    DataflowAssert.that(outputs.get(sideOutputTag1)).empty();
+    DataflowAssert.that(outputs.get(sideOutputTag2)).empty();
+
+    pipeline.run();
+  }
+
+
   @Test
   @Category(RunnableOnService.class)
   public void testParDoWithOnlySideOutputs() {

From 677141828c2ffc94652b3a8ef7a0019f4f0bbaeb Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Sat, 29 Aug 2015 19:01:57 -0700
Subject: [PATCH 1121/1541] Move MapAggregatorValues to util package

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=106903567
---
 .../google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java  | 2 +-
 .../google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java | 2 +-
 .../sdk/{runners/dataflow => util}/MapAggregatorValues.java     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/{runners/dataflow => util}/MapAggregatorValues.java (96%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
index 90d5fd6c4b97b..48a0100a2a362 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
@@ -30,10 +30,10 @@
 import com.google.cloud.dataflow.sdk.PipelineResult;
 import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowAggregatorTransforms;
 import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowMetricUpdateExtractor;
-import com.google.cloud.dataflow.sdk.runners.dataflow.MapAggregatorValues;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.util.AttemptAndTimeBoundedExponentialBackOff;
 import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
+import com.google.cloud.dataflow.sdk.util.MapAggregatorValues;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Throwables;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 6e8a260dfae7e..45b31870d33e5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -29,7 +29,6 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
-import com.google.cloud.dataflow.sdk.runners.dataflow.MapAggregatorValues;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
@@ -40,6 +39,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.MapAggregatorValues;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/MapAggregatorValues.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MapAggregatorValues.java
similarity index 96%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/MapAggregatorValues.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MapAggregatorValues.java
index 8cf7e8af9452b..a4d8ffd168e54 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/MapAggregatorValues.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MapAggregatorValues.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.runners.dataflow;
+package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.runners.AggregatorValues;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;

From a33e53b270de1fe361e34453a3fc13f948c3dc49 Mon Sep 17 00:00:00 2001
From: markshields <markshields@google.com>
Date: Mon, 2 Nov 2015 19:18:48 -0800
Subject: [PATCH 1122/1541] Update sdk windmill.proto to match windmill version

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=106904548
---
 sdk/src/main/proto/windmill.proto | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index 5165c2bfd2b5d..9ff0ea32ca434 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -92,13 +92,19 @@ message TagList {
   repeated Value values = 3;
   optional string state_family = 4;
 
+  // In request: A previously returned continuation_token from an
+  // earlier request. Indicates we wish to fetch the next page of
+  // values.
+  // In response: Copied from request.
+  optional bytes request_token = 7;
+  // In response only: Set when there are values after those returned
+  // above, but they were suppressed to respect the fetch_max_bytes
+  // limit. Subsequent requests should copy this to request_token to
+  // retrieve the next page of values.
+  optional bytes continuation_token = 5;
   // For a TagList fetch request, attempt to limit the size of each fetched tag
   // list to this byte limit.
   optional int64 fetch_max_bytes = 6 [default =  0x7fffffffffffffff];
-  // The continuation token is returned whenever partial results are returned
-  // due to fetch byte limiting. Subsequent requests should set the returned
-  // continuation token to retrieve the next set of tag list values.
-  optional bytes continuation_token = 5;
 }
 
 message GlobalDataId {

From 3912f4a2522fa389a412513c4c537f14045ee262 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Tue, 3 Nov 2015 14:13:16 -0800
Subject: [PATCH 1123/1541] Add getAggregatorValues to DoFnTester

This allows testing of the proper behavior of Aggregators within a DoFn
without having to construct a TestPipeline.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=106977636
---
 .../dataflow/sdk/transforms/DoFnTester.java   | 26 ++++-
 .../sdk/transforms/DoFnTesterTest.java        | 96 +++++++++++++++++++
 2 files changed, 118 insertions(+), 4 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnTesterTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index 8ad21ded3057f..99fc0a31d0e6a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
+import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -304,10 +305,29 @@ public <T> List<T> takeSideOutputElements(TupleTag<T> tag) {
     return resultElems;
   }
 
+  /**
+   * Returns the value of the provided {@link Aggregator}.
+   */
+  public <OutputT> OutputT getAggregatorValue(Aggregator<?, OutputT> agg) {
+    @SuppressWarnings("unchecked")
+    Counter<OutputT> counter =
+        (Counter<OutputT>) counterSet.getExistingCounter("user-" + STEP_NAME + "-" + agg.getName());
+    return counter.getAggregate();
+  }
+
   /////////////////////////////////////////////////////////////////////////////
 
   /** The possible states of processing a DoFn. */
-  enum State { UNSTARTED, STARTED, FINISHED }
+  enum State {
+    UNSTARTED,
+    STARTED,
+    FINISHED
+  }
+
+  /** The name of the step of a DoFnTester. */
+  static final String STEP_NAME = "stepName";
+  /** The name of the enclosing DoFn PTransform for a DoFnTester. */
+  static final String TRANSFORM_NAME = "transformName";
 
   final PipelineOptions options = PipelineOptionsFactory.create();
 
@@ -333,8 +353,6 @@ enum State { UNSTARTED, STARTED, FINISHED }
 
   /** Counters for user-defined Aggregators if processing is in progress. */
   CounterSet counterSet;
-  // TODO: expose counterSet through a getter method, once we have
-  // a convenient public API for it.
 
   /** The state of processing of the DoFn under test. */
   State state;
@@ -372,7 +390,7 @@ void initializeState() {
         outputManager,
         mainOutputTag,
         sideOutputTags,
-        DirectModeExecutionContext.create().getOrCreateStepContext("stepName", "stepName", null),
+        DirectModeExecutionContext.create().getOrCreateStepContext(STEP_NAME, TRANSFORM_NAME, null),
         counterSet.getAddCounterMutator(),
         WindowingStrategy.globalDefault());
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnTesterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnTesterTest.java
new file mode 100644
index 0000000000000..f2cb40c2e84f8
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnTesterTest.java
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertThat;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link DoFnTester}.
+ */
+@RunWith(JUnit4.class)
+public class DoFnTesterTest {
+  @Test
+  public void getAggregatorValuesShouldGetValueOfCounter() {
+    CounterDoFn counterDoFn = new CounterDoFn();
+
+    DoFnTester<Long, Void> tester = DoFnTester.of(counterDoFn);
+    tester.processBatch(1L, 2L, 4L, 8L);
+
+    Long aggregatorVal = tester.getAggregatorValue(counterDoFn.agg);
+
+    assertThat(aggregatorVal, equalTo(15L));
+  }
+
+  @Test
+  public void getAggregatorValuesWithEmptyCounterShouldSucceed() {
+    CounterDoFn counterDoFn = new CounterDoFn();
+
+    DoFnTester<Long, Void> tester = DoFnTester.of(counterDoFn);
+    tester.processBatch();
+    Long aggregatorVal = tester.getAggregatorValue(counterDoFn.agg);
+    // empty bundle
+    assertThat(aggregatorVal, equalTo(0L));
+  }
+
+  @Test
+  public void getAggregatorValuesInStartFinishBundleShouldGetValues() {
+    CounterDoFn fn = new CounterDoFn(1L, 2L);
+    DoFnTester<Long, Void> tester = DoFnTester.of(fn);
+    tester.processBatch(0L, 0L);
+
+    Long aggValue = tester.getAggregatorValue(fn.agg);
+    assertThat(aggValue, equalTo(1L + 2L));
+  }
+
+  /**
+   * A DoFn that adds values to an aggregator in processElement.
+   */
+  private static class CounterDoFn extends DoFn<Long, Void> {
+    Aggregator<Long, Long> agg = createAggregator("ctr", new Sum.SumLongFn());
+    private final long startBundleVal;
+    private final long finishBundleVal;
+
+    public CounterDoFn() {
+      this(0L, 0L);
+    }
+
+    public CounterDoFn(long start, long finish) {
+      this.startBundleVal = start;
+      this.finishBundleVal = finish;
+    }
+
+    @Override
+    public void startBundle(DoFn<Long, Void>.Context c) {
+      agg.addValue(startBundleVal);
+    }
+
+    @Override
+    public void processElement(DoFn<Long, Void>.ProcessContext c) throws Exception {
+      agg.addValue(c.element());
+    }
+
+    @Override
+    public void finishBundle(DoFn<Long, Void>.Context c) {
+      agg.addValue(finishBundleVal);
+    }
+  }
+}
+

From 495b0a67babedf61b96d6baac9f56e2fa4d38ede Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Tue, 3 Nov 2015 18:25:13 -0800
Subject: [PATCH 1124/1541] Insert reshards before fixed-shard TextIO sinks

To decouple parallelism when writing to an output with a fixed
number of shards, a (shuffling) reshard is inserted immediately
prior to the write.

----Release Notes----

To decouple parallelism when writing to an output with a fixed
number of shards, a (shuffling) reshard is inserted immediately
prior to the write.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107000514
---
 .../google/cloud/dataflow/sdk/io/TextIO.java  | 119 +++++++++++++++---
 1 file changed, 101 insertions(+), 18 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index fc868b0dd3493..6c3f5ca3fbd14 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -23,11 +23,18 @@
 import com.google.cloud.dataflow.sdk.runners.worker.FileBasedReader;
 import com.google.cloud.dataflow.sdk.runners.worker.TextReader;
 import com.google.cloud.dataflow.sdk.runners.worker.TextSink;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PDone;
@@ -443,6 +450,9 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       /** Requested number of shards.  0 for automatic. */
       final int numShards;
 
+      /** Insert a shuffle before writing to decouple parallelism when numShards != 0. */
+      final boolean forceReshard;
+
       /** Shard template string. */
       final String shardTemplate;
 
@@ -450,16 +460,17 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       final boolean validate;
 
       Bound(Coder<T> coder) {
-        this(null, null, "", coder, 0, ShardNameTemplate.INDEX_OF_MAX, true);
+        this(null, null, "", coder, 0, true, ShardNameTemplate.INDEX_OF_MAX, true);
       }
 
       Bound(String name, String filenamePrefix, String filenameSuffix, Coder<T> coder,
-          int numShards, String shardTemplate, boolean validate) {
+          int numShards, boolean forceReshard, String shardTemplate, boolean validate) {
         super(name);
         this.coder = coder;
         this.filenamePrefix = filenamePrefix;
         this.filenameSuffix = filenameSuffix;
         this.numShards = numShards;
+        this.forceReshard = forceReshard;
         this.shardTemplate = shardTemplate;
         this.validate = validate;
       }
@@ -469,8 +480,8 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
        * with the given step name.  Does not modify this object.
        */
       public Bound<T> named(String name) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate,
-            validate);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
+            forceReshard, shardTemplate, validate);
       }
 
       /**
@@ -483,8 +494,8 @@ public Bound<T> named(String name) {
        */
       public Bound<T> to(String filenamePrefix) {
         validateOutputComponent(filenamePrefix);
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate,
-            validate);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
+            shardTemplate, validate);
       }
 
       /**
@@ -497,8 +508,8 @@ public Bound<T> to(String filenamePrefix) {
        */
       public Bound<T> withSuffix(String nameExtension) {
         validateOutputComponent(nameExtension);
-        return new Bound<>(name, filenamePrefix, nameExtension, coder, numShards, shardTemplate,
-            validate);
+        return new Bound<>(name, filenamePrefix, nameExtension, coder, numShards, forceReshard,
+            shardTemplate, validate);
       }
 
       /**
@@ -516,9 +527,31 @@ public Bound<T> withSuffix(String nameExtension) {
        * @see ShardNameTemplate
        */
       public Bound<T> withNumShards(int numShards) {
+        return withNumShards(numShards, forceReshard);
+      }
+
+      /**
+       * Returns a new TextIO.Write PTransform that's like this one but
+       * that uses the provided shard count.
+       *
+       * <p>Constraining the number of shards is likely to reduce
+       * the performance of a pipeline.  If forceReshard is true, the output
+       * will be shuffled to obtain the desired sharding.  If it is false,
+       * data will not be reshuffled, but parallelism of preceeding stages
+       * may be constrained.  Setting this value is not recommended
+       * unless you require a specific number of output files.
+       *
+       * <p>Does not modify this object.
+       *
+       * @param numShards the number of shards to use, or 0 to let the system
+       *                  decide.
+       * @param forceReshard whether to force a reshard to obtain the desired sharding.
+       * @see ShardNameTemplate
+       */
+      private Bound<T> withNumShards(int numShards, boolean forceReshard) {
         Preconditions.checkArgument(numShards >= 0);
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate,
-            validate);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
+            shardTemplate, validate);
       }
 
       /**
@@ -530,8 +563,8 @@ public Bound<T> withNumShards(int numShards) {
        * @see ShardNameTemplate
        */
       public Bound<T> withShardNameTemplate(String shardTemplate) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate,
-            validate);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
+            shardTemplate, validate);
       }
 
       /**
@@ -544,7 +577,21 @@ public Bound<T> withShardNameTemplate(String shardTemplate) {
        * <p>Does not modify this object.
        */
       public Bound<T> withoutSharding() {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, 1, "", validate);
+        return withoutSharding(forceReshard);
+      }
+
+      /**
+       * Returns a new TextIO.Write PTransform that's like this one but
+       * that forces a single file as output.
+       *
+       * <p>This is a shortcut for
+       * {@code .withNumShards(1, forceReshard).withShardNameTemplate("")}
+       *
+       * <p>Does not modify this object.
+       */
+      private Bound<T> withoutSharding(boolean forceReshard) {
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, 1, forceReshard, "",
+            validate);
       }
 
       /**
@@ -556,8 +603,8 @@ public Bound<T> withoutSharding() {
        * @param <X> the type of the elements of the input PCollection
        */
       public <X> Bound<X> withCoder(Coder<X> coder) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate,
-            validate);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
+            shardTemplate, validate);
       }
 
       /**
@@ -570,8 +617,39 @@ public <X> Bound<X> withCoder(Coder<X> coder) {
        * available at execution time.
        */
       public Bound<T> withoutValidation() {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, shardTemplate,
-            false);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
+            shardTemplate, false);
+      }
+
+      private static class ReshardForWrite<T> extends PTransform<PCollection<T>, PCollection<T>> {
+        public PCollection<T> apply(PCollection<T> input) {
+          return input
+              // TODO: This would need to be adapted to write per-window shards.
+              .apply(Window.<T>into(new GlobalWindows())
+                           .triggering(DefaultTrigger.of())
+                           .discardingFiredPanes())
+              .apply("RandomKey", ParDo.of(
+                  new DoFn<T, KV<Long, T>>() {
+                    transient long counter, step;
+                    public void startBundle(Context c) {
+                      counter = (long) (Math.random() * Long.MAX_VALUE);
+                      step = 1 + 2 * (long) (Math.random() * Long.MAX_VALUE);
+                    }
+                    public void processElement(ProcessContext c) {
+                      counter += step;
+                      c.output(KV.of(counter, c.element()));
+                    }
+                  }))
+              .apply(GroupByKey.<Long, T>create())
+              .apply("Ungroup", ParDo.of(
+                  new DoFn<KV<Long, Iterable<T>>, T>() {
+                    public void processElement(ProcessContext c) {
+                      for (T item : c.element().getValue()) {
+                        c.output(item);
+                      }
+                    }
+                  }));
+        }
       }
 
       @Override
@@ -580,7 +658,12 @@ public PDone apply(PCollection<T> input) {
           throw new IllegalStateException(
               "need to set the filename prefix of a TextIO.Write transform");
         }
-        return PDone.in(input.getPipeline());
+        if (numShards > 0 && forceReshard) {
+          // Reshard and re-apply a version of this write without resharding.
+          return input.apply(new ReshardForWrite<T>()).apply(withNumShards(numShards, false));
+        } else {
+          return PDone.in(input.getPipeline());
+        }
       }
 
       /**

From cdb178179e87e8461cea9f964b379cf738995d68 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 3 Nov 2015 20:58:00 -0800
Subject: [PATCH 1125/1541] Fix minor typos in AfterWatermark javadoc

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107007207
---
 .../dataflow/sdk/transforms/windowing/AfterWatermark.java     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 0cf48c8dd9579..92d632c96ea17 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -47,13 +47,13 @@
  * <p>For sources that provide heuristic watermarks (e.g.
  * {@link com.google.cloud.dataflow.sdk.io.PubsubIO} when using user-supplied event times), the
  * watermark itself becomes an <i>estimate</i> that no data with an event time earlier than that
- * watermark (i.e. "late data) will ever be observed in the pipeline. These heuristics can
+ * watermark (i.e. "late data") will ever be observed in the pipeline. These heuristics can
  * often be quite accurate, but the chance of seeing late data for any given window is non-zero.
  * Thus, if absolute correctness over time is important to your use case, you may want to consider
  * using a trigger that accounts for late data. The default trigger,
  * {@code Repeatedly.forever(AfterWatermark.pastEndOfWindow())}, which fires
  * once when the watermark passes the end of the window and then immediately therafter when any
- * late data arrive, is one such example.
+ * late data arrives, is one such example.
  *
  * <p>The watermark is the clock that defines {@link TimeDomain#EVENT_TIME}.
  *

From 2b7fdf5e75186c75aa9c1afec755d75b099d3bf3 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Wed, 4 Nov 2015 08:49:44 -0800
Subject: [PATCH 1126/1541] Remove the pipeline option for setting the VM image

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107044324
---
 .../sdk/options/DataflowPipelineWorkerPoolOptions.java   | 9 ---------
 .../dataflow/sdk/runners/DataflowPipelineTranslator.java | 3 ---
 2 files changed, 12 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index 0aa50e58be369..33f8357ce38a0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -27,15 +27,6 @@
  */
 @Description("Options that are used to configure the Dataflow pipeline worker pool.")
 public interface DataflowPipelineWorkerPoolOptions extends PipelineOptions {
-  /**
-   * Disk source image to use by VMs for jobs.
-   * @see <a href="https://developers.google.com/compute/docs/images">Compute Engine Images</a>
-   */
-  @Description("Disk source image to use by VMs for jobs. See "
-      + "https://developers.google.com/compute/docs/images for further details.")
-  String getDiskSourceImage();
-  void setDiskSourceImage(String value);
-
   /**
    * Number of workers to use when executing the Dataflow job.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 5a4b9ff895ae0..4952fc08543e6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -421,9 +421,6 @@ public Job translate(List<DataflowPackage> packages) {
 
       workerPool.setPackages(packages);
       workerPool.setNumWorkers(options.getNumWorkers());
-      if (options.getDiskSourceImage() != null) {
-        workerPool.setDiskSourceImage(options.getDiskSourceImage());
-      }
 
       if (options.isStreaming()) {
         // Use separate data disk for streaming.

From 4f472f81fb33561586baab3c7c26aa9e88799dfa Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 4 Nov 2015 09:09:39 -0800
Subject: [PATCH 1127/1541] Update Combine, Top Error Message for Non-global
 window

Refer to the fact that Default Values are not supported - the exception
occurs before the pipeline is run and is done when it is merely possible
that at a future point the input PCollection would be empty (and
therefore the default value would be required).

Add documentation to Top#of(), Top#largest(), and Top#smallest() to note
that default values are not supported if the input is not windowed into
the Global Window.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107046137
---
 .../dataflow/sdk/transforms/Combine.java      | 16 +++++---
 .../dataflow/sdk/transforms/GroupByKey.java   |  3 ++
 .../cloud/dataflow/sdk/transforms/Top.java    | 41 +++++++++++++++----
 .../dataflow/sdk/transforms/TopTest.java      | 22 ++++++++++
 4 files changed, 69 insertions(+), 13 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index ca2bf3c2d2e5d..835081b273adf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -441,6 +441,14 @@ public TypeDescriptor<OutputT> getOutputType() {
           .getType();
     }
 
+    String getIncompatibleGlobalWindowErrorMessage() {
+      return "Default values are not supported in Combine.globally() if the output "
+          + "PCollection is not windowed by GlobalWindows. Instead, use "
+          + "Combine.globally().withoutDefaults() to output an empty PCollection if the input "
+          + "PCollection is empty, or Combine.globally().asSingletonView() to get the default "
+          + "output of the CombineFn if the input PCollection is empty.";
+    }
+
     /**
      * Returns the {@code Coder} to use for accumulator {@code AccumT}
      * values, or null if it is not able to be inferred.
@@ -1317,7 +1325,8 @@ public Coder<OutputT> getDefaultOutputCoder(
    * <p>If the input {@code PCollection} is windowed into {@link GlobalWindows},
    * a default value in the {@link GlobalWindow} will be output if the input
    * {@code PCollection} is empty.  To use this with inputs with other windowing,
-   * either {@link #withoutDefaults} or {@link #asSingletonView} must be called.
+   * either {@link #withoutDefaults} or {@link #asSingletonView} must be called,
+   * as the default value cannot be automatically assigned to any single window.
    *
    * <p>By default, the {@code Coder} of the output {@code PValue<OutputT>}
    * is inferred from the concrete type of the
@@ -1408,10 +1417,7 @@ public PCollection<OutputT> apply(PCollection<InputT> input) {
 
       if (insertDefault) {
         if (!output.getWindowingStrategy().getWindowFn().isCompatible(new GlobalWindows())) {
-          throw new IllegalStateException(
-              "Attempted to add default value to PCollection not windowed by GlobalWindows. "
-              + "Instead, use Combine.globally().withoutDefaults() or "
-              + "Combine.globally().asSingletonView().");
+          throw new IllegalStateException(fn.getIncompatibleGlobalWindowErrorMessage());
         }
         return insertDefaultValueIfEmpty(output);
       } else {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 07657f98d3a44..ca125545d90d9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -397,15 +397,18 @@ public PCollection<KV<K, Iterable<V>>> apply(
       @SuppressWarnings("unchecked")
       KvCoder<K, Iterable<WindowedValue<V>>> inputKvCoder =
           (KvCoder<K, Iterable<WindowedValue<V>>>) input.getCoder();
+
       Coder<K> keyCoder = inputKvCoder.getKeyCoder();
       Coder<Iterable<WindowedValue<V>>> inputValueCoder =
           inputKvCoder.getValueCoder();
+
       IterableCoder<WindowedValue<V>> inputIterableValueCoder =
           (IterableCoder<WindowedValue<V>>) inputValueCoder;
       Coder<WindowedValue<V>> inputIterableElementCoder =
           inputIterableValueCoder.getElemCoder();
       WindowedValueCoder<V> inputIterableWindowedValueCoder =
           (WindowedValueCoder<V>) inputIterableElementCoder;
+
       Coder<V> inputIterableElementValueCoder =
           inputIterableWindowedValueCoder.getValueCoder();
       Coder<Iterable<V>> outputValueCoder =
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index 1b95c7ba3fa46..b1fe7275bfa8c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -24,6 +24,8 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn.Accumulator;
 import com.google.cloud.dataflow.sdk.transforms.Combine.PerKey;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -74,6 +76,12 @@ public class Top {
    * is a {@code ListCoder} of the {@code Coder} of the elements of
    * the input {@code PCollection}.
    *
+   * <p>If the input {@code PCollection} is windowed into {@link GlobalWindows},
+   * an empty {@code List<T>} in the {@link GlobalWindow} will be output if the input
+   * {@code PCollection} is empty.  To use this with inputs with other windowing,
+   * either {@link Combine.Globally#withoutDefaults withoutDefaults} or
+   * {@link Combine.Globally#asSingletonView asSingletonView} must be called.
+   *
    * <p>See also {@link #smallest} and {@link #largest}, which sort
    * {@code Comparable} elements using their natural ordering.
    *
@@ -111,6 +119,12 @@ Combine.Globally<T, List<T>> of(int count, ComparatorT compareFn) {
    * is a {@code ListCoder} of the {@code Coder} of the elements of
    * the input {@code PCollection}.
    *
+   * <p>If the input {@code PCollection} is windowed into {@link GlobalWindows},
+   * an empty {@code List<T>} in the {@link GlobalWindow} will be output if the input
+   * {@code PCollection} is empty.  To use this with inputs with other windowing,
+   * either {@link Combine.Globally#withoutDefaults withoutDefaults} or
+   * {@link Combine.Globally#asSingletonView asSingletonView} must be called.
+   *
    * <p>See also {@link #largest}.
    *
    * <p>See also {@link #of}, which sorts using a user-specified
@@ -120,8 +134,7 @@ Combine.Globally<T, List<T>> of(int count, ComparatorT compareFn) {
    * {@link #largestPerKey}, which take a {@code PCollection} of
    * {@code KV}s and return the top values associated with each key.
    */
-  public static <T extends Comparable<T>>
-      Combine.Globally<T, List<T>> smallest(int count) {
+  public static <T extends Comparable<T>> Combine.Globally<T, List<T>> smallest(int count) {
     return Combine.globally(new TopCombineFn<>(count, new Smallest<T>()))
         .named("Smallest.Globally");
   }
@@ -151,6 +164,12 @@ Combine.Globally<T, List<T>> smallest(int count) {
    * is a {@code ListCoder} of the {@code Coder} of the elements of
    * the input {@code PCollection}.
    *
+   * <p>If the input {@code PCollection} is windowed into {@link GlobalWindows},
+   * an empty {@code List<T>} in the {@link GlobalWindow} will be output if the input
+   * {@code PCollection} is empty.  To use this with inputs with other windowing,
+   * either {@link Combine.Globally#withoutDefaults withoutDefaults} or
+   * {@link Combine.Globally#asSingletonView asSingletonView} must be called.
+   *
    * <p>See also {@link #smallest}.
    *
    * <p>See also {@link #of}, which sorts using a user-specified
@@ -160,10 +179,8 @@ Combine.Globally<T, List<T>> smallest(int count) {
    * {@link #largestPerKey}, which take a {@code PCollection} of
    * {@code KV}s and return the top values associated with each key.
    */
-  public static <T extends Comparable<T>>
-      Combine.Globally<T, List<T>> largest(int count) {
-    return Combine.globally(new TopCombineFn<>(count, new Largest<T>()))
-.named("Largest.Globally");
+  public static <T extends Comparable<T>> Combine.Globally<T, List<T>> largest(int count) {
+    return Combine.globally(new TopCombineFn<>(count, new Largest<T>())).named("Largest.Globally");
   }
 
   /**
@@ -257,8 +274,7 @@ Combine.Globally<T, List<T>> largest(int count) {
   public static <K, V extends Comparable<V>>
       PTransform<PCollection<KV<K, V>>, PCollection<KV<K, List<V>>>>
       smallestPerKey(int count) {
-    return Combine.perKey(
-new TopCombineFn<>(count, new Smallest<V>()).<K>asKeyedFn())
+    return Combine.perKey(new TopCombineFn<>(count, new Smallest<V>()).<K>asKeyedFn())
         .named("Smallest.PerKey");
   }
 
@@ -369,6 +385,15 @@ public Coder<BoundedHeap<T, ComparatorT>> getAccumulatorCoder(
         CoderRegistry registry, Coder<T> inputCoder) {
       return new BoundedHeapCoder<>(count, compareFn, inputCoder);
     }
+
+    @Override
+    String getIncompatibleGlobalWindowErrorMessage() {
+      return "Default values are not supported in Top.[of, smallest, largest]() if the output "
+          + "PCollection is not windowed by GlobalWindows. Instead, use "
+          + "Top.[of, smallest, largest]().withoutDefaults() to output an empty PCollection if the"
+          + " input PCollection is empty, or Top.[of, smallest, largest]().asSingletonView() to "
+          + "get a PCollection containing the empty list if the input PCollection is empty.";
+    }
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
index 49a66300cc3a8..ac06bff21392a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/TopTest.java
@@ -24,10 +24,14 @@
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window.Bound;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
 import org.hamcrest.Matchers;
+import org.joda.time.Duration;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
@@ -36,6 +40,7 @@
 
 import java.io.Serializable;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
 
@@ -138,6 +143,23 @@ public void testTopEmpty() {
     p.run();
   }
 
+  @Test
+  public void testTopEmptyWithIncompatibleWindows() {
+    Pipeline p = TestPipeline.create();
+    Bound<String> windowingFn = Window.<String>into(FixedWindows.of(Duration.standardDays(10L)));
+    PCollection<String> input =
+        p.apply(Create.timestamped(Collections.<String>emptyList(), Collections.<Long>emptyList()))
+         .apply(windowingFn);
+
+    expectedEx.expect(IllegalStateException.class);
+    expectedEx.expectMessage("Top");
+    expectedEx.expectMessage("GlobalWindows");
+    expectedEx.expectMessage("withoutDefaults");
+    expectedEx.expectMessage("asSingletonView");
+
+    input.apply(Top.of(1, new OrderByLength()));
+  }
+
   @Test
   @SuppressWarnings("unchecked")
   public void testTopZero() {

From 947a0e81be4864f60ab1d0295777232da8b1911e Mon Sep 17 00:00:00 2001
From: markshields <markshields@google.com>
Date: Wed, 4 Nov 2015 17:18:12 -0800
Subject: [PATCH 1128/1541] Memory improvements to the streaming worker

- Limit size of GetWork calls
- Allow process to crash on OOMs
- Pushback based on GC thrashing instead of memory threshold
- Pushback on GetData as well as GetWork
- Support heap dumps on /heapz

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107094278
---
 .../MetricTrackingWindmillServerStub.java     |  25 +-
 .../worker/StreamingDataflowWorker.java       | 106 +++--
 .../dataflow/sdk/util/MemoryMonitor.java      | 423 ++++++++++++++++++
 .../worker/StreamingDataflowWorkerTest.java   |  11 -
 .../dataflow/sdk/util/MemoryMonitorTest.java  |  84 ++++
 5 files changed, 575 insertions(+), 74 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MemoryMonitor.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MemoryMonitorTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MetricTrackingWindmillServerStub.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MetricTrackingWindmillServerStub.java
index d62f3d4a9b977..ba96c473b7c60 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MetricTrackingWindmillServerStub.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MetricTrackingWindmillServerStub.java
@@ -17,25 +17,29 @@
 
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
+import com.google.cloud.dataflow.sdk.util.MemoryMonitor;
 
 import java.io.PrintWriter;
 import java.util.concurrent.atomic.AtomicInteger;
 
 /**
  * Wrapper around a {@link WindmillServerStub} that tracks metrics for the number of in-flight
- * requests.
+ * requests and throttles requests when memory pressure is high.
  */
 public class MetricTrackingWindmillServerStub {
-
   private final AtomicInteger activeSideInputs = new AtomicInteger();
   private final AtomicInteger activeStateReads = new AtomicInteger();
   private final WindmillServerStub server;
+  private final MemoryMonitor gcThrashingMonitor;
 
-  public MetricTrackingWindmillServerStub(WindmillServerStub server) {
+  public MetricTrackingWindmillServerStub(
+      WindmillServerStub server, MemoryMonitor gcThrashingMonitor) {
     this.server = server;
+    this.gcThrashingMonitor = gcThrashingMonitor;
   }
 
   public Windmill.GetDataResponse getStateData(Windmill.GetDataRequest request) {
+    gcThrashingMonitor.waitForResources("GetStateData");
     activeStateReads.getAndIncrement();
     try {
       return server.getData(request);
@@ -45,6 +49,7 @@ public Windmill.GetDataResponse getStateData(Windmill.GetDataRequest request) {
   }
 
   public Windmill.GetDataResponse getSideInputData(Windmill.GetDataRequest request) {
+    gcThrashingMonitor.waitForResources("GetSideInputData");
     activeSideInputs.getAndIncrement();
     try {
       return server.getData(request);
@@ -58,18 +63,4 @@ public void printHtml(PrintWriter writer) {
     writer.println("  Side Inputs: " + activeSideInputs.get());
     writer.println("  State Reads: " + activeStateReads.get());
   }
-
-  public AutoCloseable sideInput() {
-    return initiate(activeSideInputs);
-  }
-
-  private AutoCloseable initiate(final AtomicInteger counter) {
-    counter.getAndIncrement();
-    return new AutoCloseable() {
-      @Override
-      public void close() {
-        counter.getAndDecrement();
-      }
-    };
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 8458f3d3fb80b..141271c36ae71 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -28,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
 import com.google.cloud.dataflow.sdk.util.BoundedQueueExecutor;
+import com.google.cloud.dataflow.sdk.util.MemoryMonitor;
 import com.google.cloud.dataflow.sdk.util.Serializer;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.UserCodeException;
@@ -91,12 +92,19 @@ public class StreamingDataflowWorker {
   static final int MAX_WORK_UNITS_QUEUED = 100;
   static final long MAX_COMMIT_BYTES = 32 << 20;
   static final int DEFAULT_STATUS_PORT = 8081;
-  // Memory threshold over which no new work will be processed.
-  // Set to a value >= 1 to disable pushback.
-  static final double PUSHBACK_THRESHOLD_RATIO = 0.9;
   static final String DEFAULT_WINDMILL_SERVER_CLASS_NAME =
       "com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServer";
 
+  // Maximum size of the result of a GetWork request.
+  private static final long MAX_GET_WORK_FETCH_BYTES = 64L << 20; // 64m
+
+  /**
+   * Maximum number of items to return in a GetWork request.
+   */
+  private static final long MAX_GET_WORK_ITEMS = 100;
+
+  private static final MemoryMonitor memoryMonitor = new MemoryMonitor();
+
   /**
    * Indicates that the key token was invalid when data was attempted to be fetched.
    */
@@ -119,6 +127,19 @@ public static boolean isKeyTokenInvalidException(Throwable t) {
     return false;
   }
 
+  /**
+   * Returns whether an exception was caused by a {@link OutOfMemoryError}.
+   */
+  private static boolean isOutOfMemoryError(Throwable t) {
+    while (t != null) {
+      if (t instanceof OutOfMemoryError) {
+        return true;
+      }
+      t = t.getCause();
+    }
+    return false;
+  }
+
   static MapTask parseMapTask(String input) throws IOException {
     return Transport.getJsonFactory()
         .fromString(input, MapTask.class);
@@ -128,6 +149,8 @@ public static void main(String[] args) throws Exception {
     Thread.setDefaultUncaughtExceptionHandler(
         DataflowWorkerHarness.WorkerUncaughtExceptionHandler.INSTANCE);
 
+    new Thread(memoryMonitor).start();
+
     DataflowWorkerLoggingInitializer.initialize();
     DataflowWorkerHarnessOptions options =
         PipelineOptionsFactory.createFromSystemPropertiesInternal();
@@ -169,7 +192,7 @@ public static void main(String[] args) throws Exception {
   }
 
   /**
-   * Entry in a per-key {@link UnboundedSource.UnboundedReader} cache.
+   * Entry in a per-key UnboundedSource#UnboundedReader cache.
    */
   public static class ReaderCacheEntry {
     UnboundedSource.UnboundedReader<?> reader;
@@ -255,7 +278,7 @@ public Thread newThread(Runnable r) {
             },
             new ThreadPoolExecutor.DiscardPolicy());
     this.windmillServer = server;
-    this.metricTrackingWindmillServer = new MetricTrackingWindmillServerStub(server);
+    this.metricTrackingWindmillServer = new MetricTrackingWindmillServerStub(server, memoryMonitor);
     this.running = new AtomicBoolean();
     this.stateFetcher = new StateFetcher(metricTrackingWindmillServer);
     this.clientId = new Random().nextLong();
@@ -367,40 +390,10 @@ private static void sleep(int millis) {
     }
   }
 
-  private static long lastPushbackLog = 0;
-
-  protected static boolean inPushback(Runtime rt) {
-    // If free memory is less than a percentage of total memory, block
-    // until current work drains and memory is released.
-    // Also force a GC to try to get under the memory threshold if possible.
-    long currentMemorySize = rt.totalMemory();
-    long memoryUsed = currentMemorySize - rt.freeMemory();
-    long maxMemory = rt.maxMemory();
-
-    if (memoryUsed <= maxMemory * PUSHBACK_THRESHOLD_RATIO) {
-      return false;
-    }
-
-    if (lastPushbackLog < System.currentTimeMillis() - 60 * 1000) {
-      LOG.warn(
-          "In pushback, not accepting new work. Using {}MB / {}MB ({}MB currently used by JVM)",
-          memoryUsed >> 20, maxMemory >> 20, currentMemorySize >> 20);
-      lastPushbackLog = System.currentTimeMillis();
-    }
-
-    return true;
-  }
-
   private void dispatchLoop() {
     LOG.info("Dispatch starting");
-    Runtime rt = Runtime.getRuntime();
     while (running.get()) {
-      if (inPushback(rt)) {
-        System.gc();
-        while (inPushback(rt)) {
-          sleep(10);
-        }
-      }
+      memoryMonitor.waitForResources("GetWork");
 
       int backoff = 1;
       Windmill.GetWorkResponse workResponse;
@@ -584,13 +577,22 @@ public void run() {
           worker.close();
         } catch (Exception e) {
           LOG.warn("Failed to close worker: ", e);
+        } finally {
+          // Release references to potentially large objects early.
+          worker = null;
+          context = null;
         }
       }
 
       t = t instanceof UserCodeException ? t.getCause() : t;
 
-      if (isKeyTokenInvalidException(t)) {
-        LOG.debug("Execution of work for {} for key {} failed due to token expiration, "
+      if (isOutOfMemoryError(t)) {
+        reportFailure(computation, work, t);
+        LOG.error("Received OutOfMemoryError, crashing.  Error was ", t);
+        System.exit(1);
+      } else if (isKeyTokenInvalidException(t)) {
+        LOG.debug(
+            "Execution of work for {} for key {} failed due to token expiration, "
             + "will not retry locally.",
             computation, work.getKey().toStringUtf8());
         activeWorkMap.get(computation).completeWork(work.getKey());
@@ -677,9 +679,10 @@ public void run() {
   private Windmill.GetWorkResponse getWork() {
     return windmillServer.getWork(
         Windmill.GetWorkRequest.newBuilder()
-        .setClientId(clientId)
-        .setMaxItems(100)
-        .build());
+            .setClientId(clientId)
+            .setMaxItems(MAX_GET_WORK_ITEMS)
+            .setMaxBytes(MAX_GET_WORK_FETCH_BYTES)
+            .build());
   }
 
   private void commitWork(Windmill.CommitWorkRequest request) {
@@ -927,10 +930,12 @@ public void handle(
         responseWriter.println("ok");
       } else if (target.equals("/threadz")) {
         printThreads(responseWriter);
+      } else if (target.equals("/heapz")) {
+        dumpHeap(responseWriter);
       } else {
         printHeader(responseWriter);
-        printMetrics(responseWriter);
         printResources(responseWriter);
+        printMetrics(responseWriter);
         printLastException(responseWriter);
         printSpecs(responseWriter);
       }
@@ -976,11 +981,8 @@ private void printMetrics(PrintWriter response) {
   }
 
   private void printResources(PrintWriter response) {
-    Runtime rt = Runtime.getRuntime();
     response.append("<h2>Resources</h2>\n");
-    response.append("Total Memory: " + (rt.totalMemory() >> 20) + "MB<br>\n");
-    response.append("Used Memory: " + ((rt.totalMemory() - rt.freeMemory()) >> 20) + "MB<br>\n");
-    response.append("Max Memory: " + (rt.maxMemory() >> 20) + "MB<br>\n");
+    response.append("Memory is " + memoryMonitor.describeMemory() + "<br>\n");
   }
 
   private void printSpecs(PrintWriter response) {
@@ -1014,4 +1016,16 @@ private void printThreads(PrintWriter response) {
       response.println("<br>");
     }
   }
+
+  private void dumpHeap(PrintWriter response) {
+    try {
+      String dumpFile = MemoryMonitor.dumpHeap();
+      response.println("Dumped heap to " + dumpFile);
+    } catch (Exception e) {
+      response.println("Failed to dump heap: <br>");
+      StringWriter writer = new StringWriter();
+      e.printStackTrace(new PrintWriter(writer));
+      response.println(writer.toString().replace("\t", "&nbsp&nbsp").replace("\n", "<br>"));
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MemoryMonitor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MemoryMonitor.java
new file mode 100644
index 0000000000000..7a7c7d974ae08
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MemoryMonitor.java
@@ -0,0 +1,423 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.common.base.Preconditions.checkState;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.util.concurrent.AtomicDouble;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.lang.management.GarbageCollectorMXBean;
+import java.lang.management.ManagementFactory;
+import java.util.ArrayDeque;
+import java.util.Queue;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import javax.management.InstanceNotFoundException;
+import javax.management.MBeanException;
+import javax.management.MBeanServer;
+import javax.management.MalformedObjectNameException;
+import javax.management.ObjectName;
+import javax.management.ReflectionException;
+
+/**
+ * A runnable which monitors a server for GC thrashing.
+ *
+ * <p>Note: Only one instance of this should be initialized per server and
+ * it should be done when the server starts running.
+ *
+ * <p>This runnable works as follows:
+ * <ul>
+ * <li> It wakes up periodically and determines how much time was spend on garbage
+ *      collection since the last time it woke up.
+ * <li> If the time spent in garbage collection in the last period of time
+ *      exceeds a certain threshold, that period is marked as "being in GC thrashing"
+ * <li> It keeps track of the GC thrashing status of the last few periods.
+ * <li> Every time the runnable's thread wakes up, it computes the ratio
+ *      {@code (# monitored periods in GC thrashing) / (# monitored periods)}.
+ * <li> If this ratio exceeds a certain threshold, it is assumed that the server
+ *      is in GC thrashing.
+ * <li> It can also shutdown the current jvm runtime when a threshold of consecutive gc
+ *      thrashing count is met. A heap dump is made before shutdown.
+ * </ul>
+ */
+public class MemoryMonitor implements Runnable {
+  private static final Logger LOG = LoggerFactory.getLogger(MemoryMonitor.class);
+
+  /** Directory to hold heap dumps if not overridden. */
+  private static final String DEFAULT_LOGGING_DIR = "dataflow/logs";
+
+  /**
+   * Amount of time (in ms) this thread must sleep
+   * between two consecutive iterations.
+   */
+  public static final long DEFAULT_SLEEP_TIME_MILLIS = 15 * 1000; // 15 sec.
+
+  /**
+   * The number of periods to take into account when determining
+   * if the server is in GC thrashing.
+   */
+  private static final int NUM_MONITORED_PERIODS = 4; // ie 1 min's worth.
+
+  /**
+   * The GC thrashing threshold (0.00 - 100.00) for every period. If
+   * the time spent on garbage collection in one period exceeds this
+   * threshold, that period is considered to be in GC thrashing.
+   */
+  private static final double GC_THRASHING_PERCENTAGE_PER_PERIOD = 50.0;
+
+  /**
+   * The <code>(# monitored periods in GC thrashing) / (# monitored
+   * periods)</code> threshold after which the server is considered to be in
+   * GC thrashing, expressed as a percentage.
+   */
+  private static final double GC_THRASHING_PERCENTAGE_PER_SERVER = 60.0;
+
+  /**
+   * The amount of memory (in bytes) we should pre-allocate, in order
+   * to be able to dump the heap.
+   *
+   * Since the server is in GC thrashing when we try to dump the heap, we
+   * might not be able to successfully do it. However, if we pre-allocate a
+   * big enough block of memory and "release" it right before trying to dump
+   * the heap, the pre-allocated block of memory will get GCed, and the heap
+   * dump might succeed.
+   */
+  private static final int HEAP_DUMP_RESERVED_BYTES = 10 << 20; // 10MB
+
+  /**
+   * Shutdown the current JVM instance after given consecutive gc thrashing
+   * periods are detected. This offers an opportunity to fast kill a JVM server
+   * if it is about to enter a long lasting gc thrashing state, which is almost
+   * never a desired behavior for a healthy server. 0 to disable.
+   */
+  private static final int DEFAULT_SHUT_DOWN_AFTER_NUM_GCTHRASHING = 8; // ie 2 min's worth.
+
+  /**
+   * Delay between logging the current memory state.
+   */
+  private static final int NORMAL_LOGGING_PERIOD_MILLIS = 5 * 60 * 1000; // 5 min.
+
+  /**
+   * Abstract interface for providing GC stats (for testing).
+   */
+  public interface GCStatsProvider {
+    /**
+     * Return the total milliseconds spent in GC since JVM was started.
+     */
+    long totalGCTimeMilliseconds();
+  }
+
+  /**
+   * True system GC stats.
+   */
+  private static class SystemGCStatsProvider implements GCStatsProvider {
+    @Override
+    public long totalGCTimeMilliseconds() {
+      long inGC = 0;
+      for (GarbageCollectorMXBean gc : ManagementFactory.getGarbageCollectorMXBeans()) {
+        inGC += gc.getCollectionTime();
+      }
+      return inGC;
+    }
+  }
+
+  /** Where to get GC stats. */
+  private final GCStatsProvider gcStatsProvider;
+
+  /** Actual sleep time, in milliseconds, for main monitor. */
+  private final long sleepTimeMillis;
+
+  /** Actual number of cycles before shutting down VM. */
+  private final int shutDownAfterNumGCThrashing;
+
+  /**
+   * The state of the periods that are taken into account when
+   * deciding if the server is in GC thrashing.
+   */
+  private final Queue<Boolean> periodIsThrashing = new ArrayDeque<>();
+
+  /**
+   * Keeps track of the time the server spent in GC since it started running.
+   */
+  private long timeInGC = 0;
+
+  /**
+   * A reserved block of memory, needed to dump the heap. Dumping the heap
+   * requires memory. However, since we try to do it when the server is in
+   * GC thrashing, no memory is available and dumpHeap() brings the server
+   * down. If we pre-allocate a block of memory though, and "release" it
+   * right before dumping the heap, this block of memory will be
+   * garbage collected, thus giving dumpHeap() enough space to dump the heap.
+   */
+  @SuppressWarnings("unused")
+  private byte[] reservedForDumpingHeap = new byte[HEAP_DUMP_RESERVED_BYTES];
+
+  private final AtomicBoolean isThrashing = new AtomicBoolean(false);
+
+  private final AtomicBoolean isStarted = new AtomicBoolean(false);
+
+  private final AtomicDouble lastMeasuredGCPercentage = new AtomicDouble(0.0);
+  private final AtomicDouble maxGCPercentage = new AtomicDouble(0.0);
+  private final AtomicInteger numPushbacks = new AtomicInteger(0);
+
+  /** Wait point for threads in pushback waiting for gc thrashing to pass. */
+  private final Object waitingForResources = new Object();
+
+  public MemoryMonitor() {
+    gcStatsProvider = new SystemGCStatsProvider();
+    sleepTimeMillis = DEFAULT_SLEEP_TIME_MILLIS;
+    shutDownAfterNumGCThrashing = DEFAULT_SHUT_DOWN_AFTER_NUM_GCTHRASHING;
+  }
+
+  /**
+   * For testing only: Construct memory monitor which takes GC stats from given provider
+   * and uses given sleep time and shutdown threshold.
+   */
+  @VisibleForTesting
+  public MemoryMonitor(
+      GCStatsProvider gcStatsProvider, long sleepTimeMillis, int shutDownAfterNumGCThrashing) {
+    this.gcStatsProvider = gcStatsProvider;
+    this.sleepTimeMillis = sleepTimeMillis;
+    this.shutDownAfterNumGCThrashing = shutDownAfterNumGCThrashing;
+  }
+
+  /**
+   * Check if we've observed high gc workload in sufficient
+   * sample periods to justify classifying the server as in gc thrashing.
+   */
+  private void updateIsThrashing() {
+    // have we monitored enough periods?
+    if (periodIsThrashing.size() < NUM_MONITORED_PERIODS) {
+      setIsThrashing(false);
+      return;
+    }
+
+    // count the number of periods in GC thrashing
+    int numPeriodsInGCThrashing = 0;
+    for (Boolean state : periodIsThrashing) {
+      numPeriodsInGCThrashing += (state ? 1 : 0);
+    }
+
+    // Did we have too many periods in GC thrashing?
+    boolean serverInGcThrashing = (numPeriodsInGCThrashing * 100
+        >= periodIsThrashing.size() * GC_THRASHING_PERCENTAGE_PER_SERVER);
+    setIsThrashing(serverInGcThrashing);
+  }
+
+  /**
+   * Set the thrashing state.
+   */
+  private void setIsThrashing(boolean serverInGcThrashing) {
+    synchronized (waitingForResources) {
+      boolean prev = isThrashing.getAndSet(serverInGcThrashing);
+      if (prev && !serverInGcThrashing) {
+        waitingForResources.notifyAll();
+      }
+    }
+  }
+
+  /**
+   * Determines if too much time was spent on garbage collection in the last
+   * period of time.
+   *
+   * @param now The current time.
+   * @param lastTimeWokeUp The last time this thread woke up.
+   *
+   * @return The state of the last period of time.
+   */
+  private boolean wasLastPeriodInGCThrashing(long now, long lastTimeWokeUp) {
+    // Find out how much time was spent on garbage collection
+    // since the start of the server.  This queries the set of garbage collectors for
+    // how long each one has spent doing GC.
+    long inGC = gcStatsProvider.totalGCTimeMilliseconds();
+
+    // Compare the amount of time spent in GC thrashing to the given threshold;
+    // if config.getSleepTimeMillis() is equal to 0 (should happen in tests only),
+    // then we compare percentage-per-period to 100%
+    double gcPercentage = (inGC - timeInGC) * 100 / (now - lastTimeWokeUp);
+
+    lastMeasuredGCPercentage.set(gcPercentage);
+    maxGCPercentage.set(Math.max(maxGCPercentage.get(), gcPercentage));
+    timeInGC = inGC;
+
+    return gcPercentage > GC_THRASHING_PERCENTAGE_PER_PERIOD;
+  }
+
+  /**
+   * Updates the data we monitor.
+   *
+   * @param now The current time.
+   * @param lastTimeWokeUp The last time this thread woke up.
+   */
+  private void updateData(long now, long lastTimeWokeUp) {
+    // remove data that's no longer relevant
+    int numIntervals = NUM_MONITORED_PERIODS;
+    while (periodIsThrashing.size() >= numIntervals) {
+      periodIsThrashing.poll();
+    }
+    // store the state of the last period
+    boolean wasThrashing = wasLastPeriodInGCThrashing(now, lastTimeWokeUp);
+    periodIsThrashing.offer(wasThrashing);
+  }
+
+  /**
+   * Dumps the heap to a file and return the name of the file, or
+   * <code>null</code> if the heap could not be dumped.
+   *
+   * @return The name of the file the heap was dumped to.
+   */
+  private String tryToDumpHeap() {
+    // clearing this list should "release" some memory
+    // that will be needed to dump the heap
+    reservedForDumpingHeap = null;
+
+    try {
+      return dumpHeap();
+    } catch (Exception e) {
+      return null;
+    }
+  }
+
+  /**
+   * Return a string describing the current memory state of the server.
+   */
+  public String describeMemory() {
+    Runtime runtime = Runtime.getRuntime();
+    long maxMemory = runtime.maxMemory();
+    long totalMemory = runtime.totalMemory();
+    long usedMemory = totalMemory - runtime.freeMemory();
+    return String.format(
+        "used/total/max = %d/%d/%d MB, GC last/max = %.2f/%.2f %%, #pushbacks=%d, gc thrashing=%s",
+        usedMemory >> 20, totalMemory >> 20, maxMemory >> 20, lastMeasuredGCPercentage.get(),
+        maxGCPercentage.get(), numPushbacks.get(), isThrashing.get());
+  }
+
+  /**
+   * Runs this thread.
+   */
+  @Override
+  public void run() {
+    checkState(!isStarted.getAndSet(true), "run() called twice");
+
+    try {
+      long lastTimeWokeUp = System.currentTimeMillis();
+      long lastLog = -1;
+      int currentThrashingCount = 0;
+      while (true) {
+        Thread.sleep(sleepTimeMillis);
+        long now = System.currentTimeMillis();
+
+        updateData(now, lastTimeWokeUp);
+        updateIsThrashing();
+
+        if (lastLog < 0 || lastLog + NORMAL_LOGGING_PERIOD_MILLIS < now) {
+          LOG.info("Memory is {}", describeMemory());
+          lastLog = now;
+        }
+
+        if (isThrashing.get()) {
+          currentThrashingCount++;
+
+          if (shutDownAfterNumGCThrashing > 0
+              && (currentThrashingCount >= shutDownAfterNumGCThrashing)) {
+            String heapDumpFile = tryToDumpHeap();
+            LOG.error(
+                "Shutting down JVM after {} consecutive periods of measured GC thrashing. "
+                + "Memory is {}. Heap dump written to {}",
+                currentThrashingCount, describeMemory(), heapDumpFile);
+            System.exit(1);
+          }
+        } else {
+          // Reset the counter whenever the server is evaluated not under gc thrashing.
+          currentThrashingCount = 0;
+        }
+
+        lastTimeWokeUp = now;
+      }
+    } catch (InterruptedException e) {
+      // most probably means that the server is shutting down
+      // in any case, there's not much we can do here
+      LOG.info("The GCThrashingMonitor was interrupted.");
+    }
+  }
+
+  /**
+   * Return only when the server is not in the GC thrashing state.
+   */
+  public void waitForResources(String context) {
+    if (!isThrashing.get()) {
+      return;
+    }
+    numPushbacks.incrementAndGet();
+    LOG.info("Waiting for resources for {}. Memory is {}", context, describeMemory());
+    synchronized (waitingForResources) {
+      // No big deal if isThrashing became false in the meantime.
+      while (isThrashing.get()) {
+        try {
+          waitingForResources.wait();
+        } catch (InterruptedException e1) {
+          LOG.debug("waitForResources was interrupted.");
+        }
+      }
+    }
+    LOG.info("Resources granted for {}. Memory is {}", context, describeMemory());
+  }
+
+  /**
+   * Return the path for logging heap dumps.
+   */
+  private static String getLoggingDir() {
+    String defaultPath = System.getProperty("java.io.tmpdir", DEFAULT_LOGGING_DIR);
+    String jsonLogFile = System.getProperty("dataflow.worker.logging.filepath");
+    if (jsonLogFile == null) {
+      return defaultPath;
+    }
+    String logPath = new File(jsonLogFile).getParent();
+    if (logPath == null) {
+      return defaultPath;
+    }
+    return logPath;
+  }
+
+  /**
+   * Dump the current heap profile to a file and return its name.
+   *
+   * @throws MalformedObjectNameException
+   * @throws MBeanException
+   * @throws ReflectionException
+   * @throws InstanceNotFoundException
+   */
+  public static String dumpHeap() throws MalformedObjectNameException, InstanceNotFoundException,
+                                         ReflectionException, MBeanException {
+    boolean liveObjectsOnly = true;
+    String fileName = String.format("%s/heap_dump_%d", getLoggingDir(), System.currentTimeMillis());
+
+    MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
+    ObjectName oname = new ObjectName("com.sun.management:type=HotSpotDiagnostic");
+    Object[] parameters = {fileName, liveObjectsOnly};
+    String[] signatures = {"java.lang.String", boolean.class.getName()};
+    mbs.invoke(oname, "dumpHeap", parameters, signatures);
+
+    return fileName;
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 50d8b92102e25..7fcc5bc103c95 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -1162,15 +1162,4 @@ public void testActiveWork() throws Exception {
     assertEquals(true, activeWork.activateWork(key1, new MockWork(1)));
     activeWork.completeWork(key1);
   }
-
-  @Test
-  public void testPushback() throws Exception {
-    Runtime r = Mockito.mock(Runtime.class);
-    Mockito.when(r.maxMemory()).thenReturn(100000000L);
-    Mockito.when(r.freeMemory()).thenReturn(80000000L, 5000000L, 5000000L);
-    Mockito.when(r.totalMemory()).thenReturn(90000000L, 98000000L, 40000000L);
-    assertEquals(false, StreamingDataflowWorker.inPushback(r));
-    assertEquals(true, StreamingDataflowWorker.inPushback(r));
-    assertEquals(false, StreamingDataflowWorker.inPushback(r));
-  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MemoryMonitorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MemoryMonitorTest.java
new file mode 100644
index 0000000000000..4cf75623a54fb
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MemoryMonitorTest.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+/**
+ * Test the memory monitor will block threads when the server is in a (faked) GC thrashing state.
+ */
+@RunWith(JUnit4.class)
+public class MemoryMonitorTest {
+  static class FakeGCStatsProvider implements MemoryMonitor.GCStatsProvider {
+    AtomicBoolean inGCThrashingState = new AtomicBoolean(false);
+    long lastCallTimestamp = System.currentTimeMillis();
+    long lastGCResult = 0;
+
+    @Override
+    public long totalGCTimeMilliseconds() {
+      if (inGCThrashingState.get()) {
+        long now = System.currentTimeMillis();
+        lastGCResult += now - lastCallTimestamp;
+        lastCallTimestamp = now;
+      }
+      return lastGCResult;
+    }
+  }
+
+  private FakeGCStatsProvider provider;
+  private MemoryMonitor monitor;
+  private Thread thread;
+
+  @Before
+  public void setup() {
+    provider = new FakeGCStatsProvider();
+    // Update every 10ms, never shutdown VM.
+    monitor = new MemoryMonitor(provider, 10, 0);
+    thread = new Thread(monitor);
+    thread.start();
+  }
+
+  @Test(timeout = 1000)
+  public void detectGCThrashing() throws InterruptedException {
+    Thread.sleep(100);
+    monitor.waitForResources("Test1");
+    provider.inGCThrashingState.set(true);
+    Thread.sleep(100);
+    final Semaphore s = new Semaphore(0);
+    new Thread(new Runnable() {
+      @Override
+      public void run() {
+        monitor.waitForResources("Test2");
+        s.release();
+      }
+    }).start();
+    assertFalse(s.tryAcquire(100, TimeUnit.MILLISECONDS));
+    provider.inGCThrashingState.set(false);
+    assertTrue(s.tryAcquire(100, TimeUnit.MILLISECONDS));
+    monitor.waitForResources("Test3");
+  }
+}

From 01633e729acc9b81188277cd888c3cc4ebaa251d Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Wed, 4 Nov 2015 18:42:27 -0800
Subject: [PATCH 1129/1541] Moves the exception throwing to outside the finally
 block

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107100131
---
 .../java/com/google/cloud/dataflow/sdk/io/PubsubIO.java   | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 382fc5a5174a2..8823389467bdc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -39,6 +39,7 @@
 import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.common.base.Throwables;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -606,6 +607,7 @@ public void processElement(ProcessContext c) throws IOException {
 
           List<PubsubMessage> messages = new ArrayList<>();
 
+          Throwable finallyBlockException = null;
           try {
             while ((getMaxNumRecords() == 0 || messages.size() < getMaxNumRecords())
                 && Instant.now().isBefore(endTime)) {
@@ -641,10 +643,14 @@ public void processElement(ProcessContext c) throws IOException {
               try {
                 pubsubClient.projects().subscriptions().delete(subscription).execute();
               } catch (IOException e) {
-                throw new RuntimeException("Failed to delete subscription: ", e);
+                finallyBlockException = new RuntimeException("Failed to delete subscription: ", e);
+                LOG.error("Failed to delete subscription: ", e);
               }
             }
           }
+          if (finallyBlockException != null) {
+            Throwables.propagate(finallyBlockException);
+          }
 
           for (PubsubMessage message : messages) {
             Instant timestamp;

From b52a7978fa6138206094dd9336685e583db5f868 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 4 Nov 2015 21:39:29 -0800
Subject: [PATCH 1130/1541] Fix repeat in AfterWatermark.withLateFirings()

Previously, AfterWatermark.withLateFirings(subTrigger) would
fail to fully reset subTrigger if it fired due to onTimer, resulting
in exceptions of this form:

Exception: java.lang.IllegalStateException:
Shouldn't invokeElement on finished triggers.
    com.google.cloud.dataflow.sdk.util.ExecutableTrigger.invokeElement...

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107107998
---
 .../dataflow/sdk/transforms/windowing/AfterWatermark.java   | 1 +
 .../sdk/transforms/windowing/AfterWatermarkTest.java        | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 92d632c96ea17..8bc52ebeb0353 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -414,6 +414,7 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
         if (result.isFire()) {
           // the subtriggers are OnceTriggers that need an implicit repeat around them. So, reset
           // the trigger after it fires.
+          c.trigger().setFinished(false, LATE_INDEX);
           current.invokeClear(c);
           return TriggerResult.FIRE;
         } else {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
index 8126b996244b6..7450752ec2346 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
@@ -423,7 +423,11 @@ public void testLateAndAtWatermarkSessionsProcessingTime() throws Exception {
 
     tester.injectElements(TimestampedValue.of(3, new Instant(6L)));
 
-    tester.advanceProcessingTime(new Instant(100L));
+    tester.advanceProcessingTime(new Instant(100L)); // Fires the late trigger
+
+    tester.injectElements(TimestampedValue.of(4, new Instant(9L)));
+
+    tester.advanceProcessingTime(new Instant(100L)); // Fires the late trigger again
 
     List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
     assertThat(output, Matchers.contains(

From f878f4b9b94be80612e9f1ba1acadc3807cf179b Mon Sep 17 00:00:00 2001
From: aneeshnaman <aneeshnaman@google.com>
Date: Fri, 6 Nov 2015 10:42:46 -0800
Subject: [PATCH 1131/1541] Adds comment to clarify semantics of the
 TagList.end_timestamp field

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107247531
---
 sdk/src/main/proto/windmill.proto | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index 9ff0ea32ca434..d200a9a60e8b6 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -88,6 +88,8 @@ message TagValue {
 
 message TagList {
   required bytes tag = 1;
+  // In request: All items till this timestamp (inclusive) are deleted before
+  // adding the new ones listed below.
   optional int64 end_timestamp = 2 [default=-0x8000000000000000];
   repeated Value values = 3;
   optional string state_family = 4;

From 6f3b33edd76dfc48c98ab3fef0fca1fa576b2100 Mon Sep 17 00:00:00 2001
From: markshields <markshields@google.com>
Date: Fri, 6 Nov 2015 12:32:44 -0800
Subject: [PATCH 1132/1541] Prepare windmill.proto for output watermarks

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107257166
---
 sdk/src/main/proto/windmill.proto | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index d200a9a60e8b6..f1a3f3125879f 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -149,6 +149,7 @@ message ComputationWorkItems {
   required string computation_id = 1;
   repeated WorkItem work = 2;
   optional int64 input_data_watermark = 3 [default=-0x8000000000000000];
+  optional int64 output_data_watermark = 4 [default=-0x8000000000000000];
 }
 
 ////////////////////////////////////////////////////////////////////////////////

From 7935ff515d4fb24bbafb8aa9be2fa6080c867236 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Mon, 9 Nov 2015 12:24:20 -0800
Subject: [PATCH 1133/1541] Retry setup in DataflowExampleUtils

While we are creating resources, it is possible that the request
 fails but resources have been created on the server.
Instead of directly retry the failed request, we should go back verify
if resources already exist.
This can be done by retrying setup.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107410282
---
 .../examples/common/DataflowExampleUtils.java | 24 +++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
index 08e4b48997770..0183d8eccbbd9 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
@@ -16,6 +16,9 @@
 
 import com.google.api.client.googleapis.json.GoogleJsonResponseException;
 import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
+import com.google.api.client.util.BackOff;
+import com.google.api.client.util.BackOffUtils;
+import com.google.api.client.util.Sleeper;
 import com.google.api.services.bigquery.Bigquery;
 import com.google.api.services.bigquery.Bigquery.Datasets;
 import com.google.api.services.bigquery.Bigquery.Tables;
@@ -36,8 +39,10 @@
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
+import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
 import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.common.base.Throwables;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 
@@ -91,8 +96,23 @@ public DataflowExampleUtils(DataflowPipelineOptions options, boolean isUnbounded
    * @throws IOException if there is a problem setting up the resources
    */
   public void setup() throws IOException {
-    setupPubsubTopic();
-    setupBigQueryTable();
+    Sleeper sleeper = Sleeper.DEFAULT;
+    BackOff backOff = new AttemptBoundedExponentialBackOff(3, 200);
+    Throwable lastException = null;
+    try {
+      do {
+        try {
+          setupPubsubTopic();
+          setupBigQueryTable();
+          return;
+        } catch (GoogleJsonResponseException e) {
+          lastException = e;
+        }
+      } while (BackOffUtils.next(sleeper, backOff));
+    } catch (InterruptedException e) {
+      // Ignore InterruptedException
+    }
+    Throwables.propagate(lastException);
   }
 
   /**

From 1d92b7d1d82e25574ba690ca4b369110507ee247 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 9 Nov 2015 14:33:13 -0800
Subject: [PATCH 1134/1541] examples/pom.xml: add default slf4j-jdk14 logger

Since 1.2.0, Dataflow users need to specify a logger or the following
error will appear and messages will not be logged. We should do this
in our examples.

SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107422966
---
 examples/pom.xml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/examples/pom.xml b/examples/pom.xml
index a968a70d3d1ba..c107589140492 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -479,6 +479,13 @@
       <version>1.7.7</version>
     </dependency>
 
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-jdk14</artifactId>
+      <version>1.7.7</version>
+      <scope>runtime</scope>
+    </dependency>
+
     <!-- Hamcrest and JUnit are required dependencies of DataflowAssert,
          which is used in the main code of DebuggingWordCount example. -->
 

From c33aaaa35327c667659f0022f6375691ecbade93 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 9 Nov 2015 14:42:12 -0800
Subject: [PATCH 1135/1541] Fix continuation of OrFinally trigger

Since the continuation is specified to fire as-soon-as-reasonable, the simple translation was firing the or-finally too early.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107423899
---
 .../sdk/transforms/windowing/OrFinallyTrigger.java       | 9 ++++++---
 .../sdk/transforms/windowing/OrFinallyTriggerTest.java   | 6 ++++--
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java
index 4639689f41d40..a2717d218993a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java
@@ -77,8 +77,11 @@ public Instant getWatermarkThatGuaranteesFiring(W window) {
 
   @Override
   public Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
-    return new OrFinallyTrigger<W>(
-        continuationTriggers.get(ACTUAL),
-        (Trigger.OnceTrigger<W>) continuationTriggers.get(UNTIL));
+    // Use OrFinallyTrigger instead of AfterFirst because the continuation of ACTUAL
+    // may not be a OnceTrigger.
+    return Repeatedly.forever(
+        new OrFinallyTrigger<W>(
+            continuationTriggers.get(ACTUAL),
+            (Trigger.OnceTrigger<W>) continuationTriggers.get(UNTIL)));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
index 5fac95bd72d76..b6b768947ff35 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
@@ -321,10 +321,12 @@ public void testContinuation() throws Exception {
     Trigger<IntervalWindow> aOrFinallyB = triggerA.orFinally(triggerB);
     Trigger<IntervalWindow> bOrFinallyA = triggerB.orFinally(triggerA);
     assertEquals(
-        triggerA.getContinuationTrigger().orFinally(triggerB.getContinuationTrigger()),
+        Repeatedly.forever(
+            triggerA.getContinuationTrigger().orFinally(triggerB.getContinuationTrigger())),
         aOrFinallyB.getContinuationTrigger());
     assertEquals(
-        triggerB.getContinuationTrigger().orFinally(triggerA.getContinuationTrigger()),
+        Repeatedly.forever(
+            triggerB.getContinuationTrigger().orFinally(triggerA.getContinuationTrigger())),
         bOrFinallyA.getContinuationTrigger());
   }
 }

From fdf5d181914172960651b7d14cd4afce86e68ae5 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 9 Nov 2015 15:59:46 -0800
Subject: [PATCH 1136/1541] Use guava for side input iterable of iterables
 concatenation

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107432422
---
 .../sdk/runners/worker/SideInputUtils.java    | 56 +------------------
 1 file changed, 2 insertions(+), 54 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
index 6dd37fbe1abd9..309c50b5ec3b6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
@@ -25,13 +25,13 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.common.collect.Iterables;
 
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.NoSuchElementException;
 import java.util.Observer;
 
 /**
@@ -85,7 +85,7 @@ private static Iterable<Object> readSideInputSources(
       for (Source sideInputSource : sideInputSources) {
         shards.add(readSideInputSource(options, sideInputSource, observer, executionContext));
       }
-      return new ShardedIterable<>(shards);
+      return Iterables.concat(shards);
     }
   }
 
@@ -181,58 +181,6 @@ public void remove() {
 
   /////////////////////////////////////////////////////////////////////////////
 
-  static class ShardedIterable<T> implements Iterable<T> {
-    final List<Iterable<T>> shards;
-
-    public ShardedIterable(List<Iterable<T>> shards) {
-      this.shards = shards;
-    }
-
-    @Override
-    public Iterator<T> iterator() {
-      return new ShardedIterator<>(shards.iterator());
-    }
-  }
-
-  static class ShardedIterator<T> implements Iterator<T> {
-    final Iterator<Iterable<T>> shards;
-    Iterator<T> shard;
-
-    public ShardedIterator(Iterator<Iterable<T>> shards) {
-      this.shards = shards;
-      this.shard = null;
-    }
-
-    @Override
-    public boolean hasNext() {
-      boolean shardHasNext;
-      for (;;) {
-        shardHasNext = (shard != null && shard.hasNext());
-        if (shardHasNext) {
-          break;
-        }
-        if (!shards.hasNext()) {
-          break;
-        }
-        shard = shards.next().iterator();
-      }
-      return shardHasNext;
-    }
-
-    @Override
-    public T next() {
-      if (!hasNext()) {
-        throw new NoSuchElementException();
-      }
-      return shard.next();
-    }
-
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-  }
-
   /**
    * Builds a {@link SideInputInfo} for a "singleton" side input.
    */

From 5b657a9e55c061f0cf67140e7392192d9b95fd02 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Mon, 9 Nov 2015 16:21:37 -0800
Subject: [PATCH 1137/1541] Fix formatting in contrib/hadoop to conform to
 Google rules

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107434552
---
 contrib/hadoop/README.md | 16 ++++++++--------
 contrib/hadoop/pom.xml   |  6 +++---
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/contrib/hadoop/README.md b/contrib/hadoop/README.md
index 450bd8e5b5847..018268fd5e65d 100644
--- a/contrib/hadoop/README.md
+++ b/contrib/hadoop/README.md
@@ -1,14 +1,14 @@
-Hadoop module
-=============
+# Hadoop module
 
-This library provides Dataflow sources and sinks to make it possible to read and write
-Apache Hadoop file formats from Dataflow pipelines.
+This library provides Dataflow sources and sinks to make it possible to read and
+write Apache Hadoop file formats from Dataflow pipelines.
 
-Currently, only the read path is implemented. A `HadoopFileSource` allows any Hadoop
-`FileInputFormat` to be read as a `PCollection`.
+Currently, only the read path is implemented. A `HadoopFileSource` allows any
+Hadoop `FileInputFormat` to be read as a `PCollection`.
 
-A `HadoopFileSource` can be read from using the `com.google.cloud.dataflow.sdk.io.Read`
-transform. For example:
+A `HadoopFileSource` can be read from using the
+`com.google.cloud.dataflow.sdk.io.Read` transform. For example:
+>>>>>>> Merge pull request #76 from dhalperi/use-travis-containers
 
 ```java
 HadoopFileSource<K, V> source = HadoopFileSource.from(path, MyInputFormat.class,
diff --git a/contrib/hadoop/pom.xml b/contrib/hadoop/pom.xml
index 9323e9568824f..8e5a207d12157 100644
--- a/contrib/hadoop/pom.xml
+++ b/contrib/hadoop/pom.xml
@@ -141,9 +141,9 @@
       <version>${google-cloud-dataflow-version}</version>
     </dependency>
 
-	<!-- @tomwhite: Hadoop doesn't have great RPC client compatibility between one version and
-	another so it's common to mark the Hadoop dependency as provided and have users specify the
-	version they need in their project. -->
+    <!-- @tomwhite: Hadoop doesn't have great RPC client compatibility between one version and
+    another so it's common to mark the Hadoop dependency as provided and have users specify the
+    version they need in their project. -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-client</artifactId>

From ce0701dff3ac4841fc10c7c3c58d2eb466ddabd1 Mon Sep 17 00:00:00 2001
From: malo <malo@google.com>
Date: Fri, 28 Aug 2015 18:22:53 -0700
Subject: [PATCH 1138/1541] Avoid double counting bytes read in
 GroupingShuffleReader

In the presence of dynamic split and re-iteration of the ValueIterator, it was possible for the bytes read counter to count a record twice. This change moves the counter update to the underlying GroupingShuffleEntryIterator instead of the GroupingShuffleReaderIterator where size information is not directly available.
----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107444345
---
 .../runners/worker/GroupingShuffleReader.java | 12 ++--
 .../worker/GroupingShuffleEntryIterator.java  | 57 ++++++++++++++-----
 .../worker/GroupingShuffleReaderTest.java     | 41 +++++++++++--
 3 files changed, 85 insertions(+), 25 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index 854c89b1c944f..f20b43ca9824c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -212,7 +212,8 @@ public GroupingShuffleReaderIterator(ShuffleEntryReader reader) {
       try (StateSampler.ScopedState read = stateSampler.scopedState(readState)) {
         this.groups =
             new GroupingShuffleEntryIterator(
-                reader.read(rangeTracker.getStartPosition(), rangeTracker.getStopPosition())) {
+                reader.read(rangeTracker.getStartPosition(), rangeTracker.getStopPosition()),
+                GroupingShuffleReader.this.perOperationPerDatasetBytesCounter) {
               @Override
               protected void notifyElementRead(long byteSize) {
                 // We accumulate the sum of bytes read in a local variable. This sum will be counted
@@ -381,13 +382,8 @@ public V next() {
           ShuffleEntry entry = base.next();
 
           // The shuffle entries are handed over to the consumer of this iterator. Therefore, we can
-          // mark the values are read and increment the bytes read counter.
-          long lastGroupSize = currentGroupSize.getAndSet(0L);
-          notifyValueReturned(lastGroupSize);
-          if (GroupingShuffleReader.this.perOperationPerDatasetBytesCounter != null) {
-            GroupingShuffleReader.this
-                .perOperationPerDatasetBytesCounter.addValue(lastGroupSize);
-          }
+          // notify the bytes that have been read so far.
+          notifyValueReturned(currentGroupSize.getAndSet(0L));
           try {
             return CoderUtils.decodeFromByteArray(valueCoder, entry.getValue());
           } catch (IOException exn) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java
index a340d603c2b06..f9b3a233717fb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java
@@ -18,6 +18,7 @@
 
 import static com.google.common.base.Preconditions.checkNotNull;
 
+import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservableIterable;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservableIterator;
 import com.google.cloud.dataflow.sdk.util.common.PeekingReiterator;
@@ -53,13 +54,28 @@ public abstract class GroupingShuffleEntryIterator
    */
   @Nullable private byte[] currentKeyBytes = null;
 
+  /**
+   * The size of the shuffle entries read so far for the current key
+   * (if currentKeyBytes is non-null), or the previous key (if currentKeyBytes
+   * is null).
+   */
+  private long totalByteSizeOfEntriesForCurrentKey = 0L;
+
+  /**
+   * Counter to increment with the bytes read from the underlying shuffle
+   * iterator, or null if no counting is needed.
+   */
+  @Nullable private Counter<Long> bytesCounter;
+
   /**
    * Constructs a GroupingShuffleEntryIterator, given a Reiterator
    * over ungrouped ShuffleEntries, assuming the ungrouped
-   * ShuffleEntries for a given key are consecutive.
+   * ShuffleEntries for a given key are consecutive. The counter given
+   * as argument, if non-null, will be updated with the byte size of the entries
+   * read.
    */
   public GroupingShuffleEntryIterator(
-      Reiterator<ShuffleEntry> shuffleIterator) {
+      Reiterator<ShuffleEntry> shuffleIterator, Counter<Long> bytesCounter) {
     this.shuffleIterator =
         new PeekingReiterator<>(
             new ProgressTrackingReiterator<>(
@@ -70,6 +86,7 @@ protected void report(ShuffleEntry entry) {
                     notifyElementRead(entry.length());
                   }
                 }.start()));
+    this.bytesCounter = bytesCounter;
   }
 
   /**
@@ -103,17 +120,23 @@ public void remove() {
   }
 
   private void advanceIteratorToNextKey() {
-    if (currentKeyBytes == null) {
-      return;
-    }
-    while (shuffleIterator.hasNext()) {
-      ShuffleEntry entry = shuffleIterator.peek();
-      if (!Arrays.equals(entry.getKey(), currentKeyBytes)) {
-        break;
+    if (currentKeyBytes != null) {
+      // We need to advance the iterator to the next key.
+      while (shuffleIterator.hasNext()) {
+        ShuffleEntry entry = shuffleIterator.peek();
+        if (!Arrays.equals(entry.getKey(), currentKeyBytes)) {
+          break;
+        }
+        totalByteSizeOfEntriesForCurrentKey += shuffleIterator.next().length();
       }
-      shuffleIterator.next();
+      currentKeyBytes = null;
+    }
+    // We are now at key boundary.
+    if (bytesCounter != null) {
+      // Commit the size of the currently read key group.
+      bytesCounter.addValue(totalByteSizeOfEntriesForCurrentKey);
     }
-    currentKeyBytes = null;
+    totalByteSizeOfEntriesForCurrentKey = 0L;
   }
 
   private static class ValuesIterable
@@ -146,6 +169,7 @@ private class ValuesIterator
     private final PeekingReiterator<ShuffleEntry> valueShuffleIterator;
     private final ProgressTracker<ShuffleEntry> tracker;
     private boolean nextKnownValid = false;
+    private long byteSizeRead = 0L;
 
     public ValuesIterator(byte[] valueKeyBytes) {
       this.valueKeyBytes = checkNotNull(valueKeyBytes);
@@ -166,11 +190,12 @@ protected void report(ShuffleEntry entry) {
       }.start();
     }
 
-    private ValuesIterator(ValuesIterator it) {
+    private ValuesIterator(ValuesIterator it, long byteSizeRead) {
       this.valueKeyBytes = it.valueKeyBytes;
       this.valueShuffleIterator = it.valueShuffleIterator.copy();
       this.tracker = it.tracker.copy();
       this.nextKnownValid = it.nextKnownValid;
+      this.byteSizeRead = byteSizeRead;
     }
 
     @Override
@@ -190,6 +215,11 @@ public boolean hasNext() {
       if (!nextKnownValid && valueKeyBytes == currentKeyBytes) {
         shuffleIterator = valueShuffleIterator.copy();
         currentKeyBytes = null;
+        // We update the bytes read size for the key as this is the first
+        // ValuesIterator copy to finish reading the values of the
+        // "parent" GroupingShuffleEntryIterator. Setting currentKeyBytes
+        // to null prevents other copies from also recording their bytes read.
+        totalByteSizeOfEntriesForCurrentKey = byteSizeRead;
       }
 
       return nextKnownValid;
@@ -201,6 +231,7 @@ public ShuffleEntry next() {
         throw new NoSuchElementException();
       }
       ShuffleEntry entry = valueShuffleIterator.next();
+      byteSizeRead += entry.length();
       nextKnownValid = false;
       tracker.saw(entry);
       return entry;
@@ -213,7 +244,7 @@ public void remove() {
 
     @Override
     public ValuesIterator copy() {
-      return new ValuesIterator(this);
+      return new ValuesIterator(this, byteSizeRead);
     }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
index 2b50c3876454b..ce6281fb308a5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
@@ -90,7 +90,9 @@ public class GroupingShuffleReaderTest {
       KV.of(3, Arrays.asList("in 3")), KV.of(4, Arrays.asList("in 4a", "in 4b", "in 4c", "in 4d")),
       KV.of(5, Arrays.asList("in 5")));
 
-  /** How many of the values with each key are to be read. */
+  /** How many of the values with each key are to be read.
+   * Note that the order matters as the conversion to ordinal is used below.
+   */
   private enum ValuesToRead {
     /** Don't even ask for the values iterator. */
     SKIP_VALUES,
@@ -99,7 +101,9 @@ private enum ValuesToRead {
     /** Read just the first value. */
     READ_ONE_VALUE,
     /** Read all the values. */
-    READ_ALL_VALUES
+    READ_ALL_VALUES,
+    /** Read all the values twice. */
+    READ_ALL_VALUES_TWICE
   }
 
   private List<ShuffleEntry> writeShuffleEntries(List<KV<Integer, List<String>>> input)
@@ -184,7 +188,7 @@ private List<KV<Integer, List<String>>> runIterationOverGroupingShuffleReader(
                 break;
               }
             }
-            if (valuesToRead == ValuesToRead.READ_ALL_VALUES) {
+            if (valuesToRead.ordinal() >= ValuesToRead.READ_ALL_VALUES.ordinal()) {
               assertFalse(valuesIterator.hasNext());
               assertFalse(valuesIterator.hasNext());
 
@@ -197,6 +201,25 @@ private List<KV<Integer, List<String>>> runIterationOverGroupingShuffleReader(
               valuesIterable.iterator(); // Verifies that this does not throw.
             }
           }
+          if (valuesToRead == ValuesToRead.READ_ALL_VALUES_TWICE) {
+            // Create new iterator;
+            valuesIterator = valuesIterable.iterator();
+
+            while (valuesIterator.hasNext()) {
+              assertTrue(valuesIterator.hasNext());
+              assertTrue(valuesIterator.hasNext());
+              assertEquals("BatchModeExecutionContext key", key, context.getKey());
+              valuesIterator.next();
+            }
+            assertFalse(valuesIterator.hasNext());
+            assertFalse(valuesIterator.hasNext());
+            try {
+              valuesIterator.next();
+              fail("Expected NoSuchElementException");
+            } catch (NoSuchElementException exn) {
+              // As expected.
+            }
+          }
 
           prevValuesIterable = valuesIterable;
           prevValuesIterator = valuesIterator;
@@ -276,6 +299,11 @@ public void testReadNonEmptyShuffleData() throws Exception {
     runTestReadFromShuffle(KVS, ValuesToRead.READ_ALL_VALUES);
   }
 
+  @Test
+  public void testReadNonEmptyShuffleDataTwice() throws Exception {
+    runTestReadFromShuffle(KVS, ValuesToRead.READ_ALL_VALUES_TWICE);
+  }
+
   @Test
   public void testReadNonEmptyShuffleDataReadingOneValue() throws Exception {
     runTestReadFromShuffle(KVS, ValuesToRead.READ_ONE_VALUE);
@@ -327,6 +355,11 @@ public void testBytesReadNonEmptyShuffleData() throws Exception {
     runTestBytesReadCounter(KVS, ValuesToRead.READ_ALL_VALUES, 168L);
   }
 
+  @Test
+  public void testBytesReadNonEmptyShuffleDataTwice() throws Exception {
+    runTestBytesReadCounter(KVS, ValuesToRead.READ_ALL_VALUES_TWICE, 168L);
+  }
+
   @Test
   public void testBytesReadNonEmptyShuffleDataReadingOneValue() throws Exception {
     runTestBytesReadCounter(KVS, ValuesToRead.READ_ONE_VALUE, 168L);
@@ -334,7 +367,7 @@ public void testBytesReadNonEmptyShuffleDataReadingOneValue() throws Exception {
 
   @Test
   public void testBytesReadNonEmptyShuffleDataSkippingValues() throws Exception {
-    runTestBytesReadCounter(KVS, ValuesToRead.SKIP_VALUES, 0L);
+    runTestBytesReadCounter(KVS, ValuesToRead.SKIP_VALUES, 168L);
   }
 
   @Test

From 816480016284eb35452fb43032dff605dbf0b134 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Tue, 10 Nov 2015 09:58:04 -0800
Subject: [PATCH 1139/1541] Fix formatting in contrib/hadoop to confirm to
 Google rules

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107496291
---
 .../cloud/dataflow/contrib/hadoop/HadoopFileSource.java   | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/contrib/hadoop/src/main/java/com/google/cloud/dataflow/contrib/hadoop/HadoopFileSource.java b/contrib/hadoop/src/main/java/com/google/cloud/dataflow/contrib/hadoop/HadoopFileSource.java
index 152758108cdd2..f24c3b7bd823e 100644
--- a/contrib/hadoop/src/main/java/com/google/cloud/dataflow/contrib/hadoop/HadoopFileSource.java
+++ b/contrib/hadoop/src/main/java/com/google/cloud/dataflow/contrib/hadoop/HadoopFileSource.java
@@ -245,8 +245,8 @@ public long getEstimatedSizeBytes(PipelineOptions options) {
       for (FileStatus st : listStatus(createFormat(job), job)) {
         size += st.getLen();
       }
-    } catch (IOException | NoSuchMethodException | InvocationTargetException |
-        IllegalAccessException | InstantiationException e) {
+    } catch (IOException | NoSuchMethodException | InvocationTargetException
+        | IllegalAccessException | InstantiationException e) {
       // ignore, and return 0
     }
     return size;
@@ -454,8 +454,8 @@ public SerializableSplit() {
     }
 
     public SerializableSplit(InputSplit split) {
-      Preconditions.checkArgument(split instanceof Writable, "Split is not writable: " +
-          split);
+      Preconditions.checkArgument(split instanceof Writable, "Split is not writable: "
+          + split);
       this.split = split;
     }
 

From 38f25d9ca8cbbec96a7a520c4bfe39be5269ac26 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Mon, 9 Nov 2015 16:21:37 -0800
Subject: [PATCH 1140/1541] Fix a bad merge

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107434552
---
 contrib/hadoop/README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/contrib/hadoop/README.md b/contrib/hadoop/README.md
index 018268fd5e65d..49bbf980e80a5 100644
--- a/contrib/hadoop/README.md
+++ b/contrib/hadoop/README.md
@@ -8,7 +8,6 @@ Hadoop `FileInputFormat` to be read as a `PCollection`.
 
 A `HadoopFileSource` can be read from using the
 `com.google.cloud.dataflow.sdk.io.Read` transform. For example:
->>>>>>> Merge pull request #76 from dhalperi/use-travis-containers
 
 ```java
 HadoopFileSource<K, V> source = HadoopFileSource.from(path, MyInputFormat.class,

From 2b200759edc25a6ec43ca277dac33fb62fddf475 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Tue, 10 Nov 2015 12:03:21 -0800
Subject: [PATCH 1141/1541] Update prebuilt proto libraries for Dataflow to
 11/9 version

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107510539
---
 sdk/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index cd7cf56f44722..6b95db6e6b55e 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -430,7 +430,7 @@
     <dependency>
       <groupId>com.google.cloud.dataflow</groupId>
       <artifactId>google-cloud-dataflow-java-proto-library-all</artifactId>
-      <version>0.4.151029</version>
+      <version>0.4.151109</version>
     </dependency>
 
     <dependency>

From c02820655ed8c8516efedefd213df92632754994 Mon Sep 17 00:00:00 2001
From: Daniel Norberg <dano@spotify.com>
Date: Wed, 11 Nov 2015 12:59:55 -0500
Subject: [PATCH 1142/1541] worker: fix leak in StateSampler

Mismatching keys were being used for get and set in the statesByName
map, causing the state/counter existence check to always fail and every
invocation of stateForName() to create a new counter and state.

This manifests as a memory leak where the majority of the heap is
consumed by the contents of statesByName and kindsByState, ultimately
causing the worker to fail.
---
 .../cloud/dataflow/sdk/util/common/worker/StateSampler.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
index e258aeca2fb88..1f71bd17e4aec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
@@ -210,7 +210,7 @@ public int stateForName(String name, StateKind kind) {
         Counter<Long> counter = counterSetMutator.addCounter(
             Counter.longs(counterName, Counter.AggregationKind.SUM));
         state = countersByState.size();
-        statesByName.put(name, state);
+        statesByName.put(counterName, state);
         countersByState.add(counter);
         kindsByState.put(state, kind);
       }

From 8182b153a661cb26247b59c51334c69474761fb8 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 11 Nov 2015 06:53:28 -0800
Subject: [PATCH 1143/1541] Separately test each GroupAlsoByWindows
 implementation

We have three implementations of GroupAlsoByWindows with different
performance characteristics, usable in different situations.

Previously, these were each exercised somewhat according to
whether they were vended by the GroupAlsoByWindowsDoFn factory
methods.

With this change, each implementation has a curated test suite
that tests them against shared properties. This more easily
supports tests that are specific to a particular implementation,
as well as testing an implementation in situations where the
factory methods may not choose to vend it.

The actual tests, now in GroupAlsoByWindowsProperties, are
unchanged for now.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107582313
---
 .../GroupAlsoByWindowsAndCombineDoFn.java     |  19 +-
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  72 +--
 .../GroupAlsoByWindowsViaIteratorsDoFn.java   |   8 +
 ...GroupAlsoByWindowsViaOutputBufferDoFn.java |  90 ++++
 .../GroupAlsoByWindowsAndCombineDoFnTest.java |  81 +++
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  | 446 +++------------
 .../util/GroupAlsoByWindowsProperties.java    | 506 ++++++++++++++++++
 ...roupAlsoByWindowsViaIteratorsDoFnTest.java |  81 +++
 ...pAlsoByWindowsViaOutputBufferDoFnTest.java | 135 +++++
 9 files changed, 989 insertions(+), 449 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFnTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsProperties.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFnTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFnTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
index 6cbb2457d4b17..2f1b49c2a6fa9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
@@ -73,9 +73,15 @@ public static boolean isSupported(WindowingStrategy<?, ?> strategy) {
   private WindowFn<Object, W> windowFn;
 
   public GroupAlsoByWindowsAndCombineDoFn(
-      WindowFn<Object, W> windowFn, KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
-    this.windowFn = windowFn;
+      WindowFn<?, W> windowFn, KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
     this.combineFn = combineFn;
+
+    // To make a MergeContext that is compatible with the type of windowFn, we need to remove
+    // the wildcard from the element type.
+    @SuppressWarnings("unchecked")
+    WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
+    this.windowFn = objectWindowFn;
+
   }
 
   @Override
@@ -94,7 +100,7 @@ public int compare(BoundedWindow w1, BoundedWindow w2) {
     final Map<W, AccumT> accumulators = Maps.newHashMap();
     final Map<W, Instant> minTimestamps = Maps.newHashMap();
 
-    WindowFn<Object, W>.MergeContext mergeContext = new CombiningMergeContext() {
+    WindowFn<Object, W>.MergeContext mergeContext = windowFn.new MergeContext() {
       @Override
       public Collection<W> windows() {
         return liveWindows;
@@ -170,11 +176,4 @@ private void closeWindow(
         Arrays.asList(w),
         PaneInfo.ON_TIME_AND_ONLY_FIRING);
   }
-
-  private abstract class CombiningMergeContext extends WindowFn<Object, W>.MergeContext {
-
-    public CombiningMergeContext() {
-      windowFn.super();
-    }
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 2fafc8abbd652..678bd80a43b5a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -17,17 +17,10 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
-import com.google.common.collect.Iterables;
-
-import org.joda.time.Instant;
-
-import java.util.List;
 
 /**
  * DoFn that merges windows and groups elements in those windows, optionally
@@ -56,7 +49,9 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
 
     return GroupAlsoByWindowsViaIteratorsDoFn.isSupported(windowingStrategy)
         ? new GroupAlsoByWindowsViaIteratorsDoFn<K, V, W>(windowingStrategy)
-        : new GABWViaOutputBufferDoFn<>(noWildcard, SystemReduceFn.<K, V, W>buffering(inputCoder));
+        : new GroupAlsoByWindowsViaOutputBufferDoFn<>(
+            noWildcard,
+            SystemReduceFn.<K, V, W>buffering(inputCoder));
   }
 
   /**
@@ -74,65 +69,8 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
     WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
     return GroupAlsoByWindowsAndCombineDoFn.isSupported(windowingStrategy)
         ? new GroupAlsoByWindowsAndCombineDoFn<>(noWildcard.getWindowFn(), combineFn.getFn())
-        : new GABWViaOutputBufferDoFn<>(noWildcard,
+        : new GroupAlsoByWindowsViaOutputBufferDoFn<>(
+            noWildcard,
             SystemReduceFn.<K, InputT, AccumT, OutputT, W>combining(keyCoder, combineFn));
   }
-
-  @SystemDoFnInternal
-  private static class GABWViaOutputBufferDoFn<K, InputT, OutputT, W extends BoundedWindow>
-     extends GroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
-
-    private final Aggregator<Long, Long> droppedDueToClosedWindow =
-        createAggregator(ReduceFnRunner.DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER, new Sum.SumLongFn());
-    private final Aggregator<Long, Long> droppedDueToLateness =
-        createAggregator(ReduceFnRunner.DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
-
-    private final WindowingStrategy<Object, W> strategy;
-    private SystemReduceFn.Factory<K, InputT, OutputT, W> reduceFnFactory;
-
-    public GABWViaOutputBufferDoFn(
-        WindowingStrategy<Object, W> windowingStrategy,
-        SystemReduceFn.Factory<K, InputT, OutputT, W> reduceFnFactory) {
-      this.strategy = windowingStrategy;
-      this.reduceFnFactory = reduceFnFactory;
-    }
-
-    @Override
-    public void processElement(
-        DoFn<KV<K, Iterable<WindowedValue<InputT>>>,
-        KV<K, OutputT>>.ProcessContext c)
-        throws Exception {
-      K key = c.element().getKey();
-      // Used with Batch, we know that all the data is available for this key. We can't use the
-      // timer manager from the context because it doesn't exist. So we create one and emulate the
-      // watermark, knowing that we have all data and it is in timestamp order.
-      BatchTimerInternals timerInternals = new BatchTimerInternals(Instant.now());
-
-      ReduceFnRunner<K, InputT, OutputT, W> runner = new ReduceFnRunner<>(
-          key, strategy, timerInternals, c.windowingInternals(),
-          droppedDueToClosedWindow, droppedDueToLateness, reduceFnFactory.create(key));
-
-      Iterable<List<WindowedValue<InputT>>> chunks =
-          Iterables.partition(c.element().getValue(), 1000);
-      for (Iterable<WindowedValue<InputT>> chunk : chunks) {
-        // Process the chunk of elements.
-        runner.processElements(chunk);
-
-        // Then, since elements are sorted by their timestamp, advance the watermark to the first
-        // element, and fire any timers that may have been scheduled.
-        timerInternals.advanceWatermark(runner, chunk.iterator().next().getTimestamp());
-
-        // Fire any processing timers that need to fire
-        timerInternals.advanceProcessingTime(runner, Instant.now());
-      }
-
-      // Finish any pending windows by advancing the watermark to infinity.
-      timerInternals.advanceWatermark(runner, new Instant(Long.MAX_VALUE));
-
-      // Finally, advance the processing time to infinity to fire any timers.
-      timerInternals.advanceProcessingTime(runner, new Instant(Long.MAX_VALUE));
-
-      runner.persist();
-    }
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
index 5ada6d7873e7d..c5800afe6bb9d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static com.google.common.base.Preconditions.checkArgument;
+
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
@@ -73,12 +75,18 @@ public static boolean isSupported(WindowingStrategy<?, ?> strategy) {
   }
 
   public GroupAlsoByWindowsViaIteratorsDoFn(WindowingStrategy<?, W> strategy) {
+    checkArgument(GroupAlsoByWindowsViaIteratorsDoFn.isSupported(strategy),
+        "%s does not support merging or non-default triggering, "
+        + "found in windowing strategy: %s",
+        getClass(),
+        strategy);
     this.strategy = strategy;
   }
 
   @Override
   public void processElement(ProcessContext c) throws Exception {
     K key = c.element().getKey();
+    // This iterable is required to be in order of increasing timestamps
     Iterable<WindowedValue<V>> value = c.element().getValue();
     PeekingReiterator<WindowedValue<V>> iterator;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
new file mode 100644
index 0000000000000..4afd7cb4483db
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.collect.Iterables;
+
+import org.joda.time.Instant;
+
+import java.util.List;
+
+/**
+ * The default batch {@link GroupAlsoByWindowsDoFn} implementation, if no specialized "fast path"
+ * implementation is applicable.
+ */
+@SystemDoFnInternal
+class GroupAlsoByWindowsViaOutputBufferDoFn<K, InputT, OutputT, W extends BoundedWindow>
+   extends GroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
+
+  private final Aggregator<Long, Long> droppedDueToClosedWindow =
+      createAggregator(ReduceFnRunner.DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER, new Sum.SumLongFn());
+  private final Aggregator<Long, Long> droppedDueToLateness =
+      createAggregator(ReduceFnRunner.DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
+
+  private final WindowingStrategy<?, W> strategy;
+  private SystemReduceFn.Factory<K, InputT, OutputT, W> reduceFnFactory;
+
+  public GroupAlsoByWindowsViaOutputBufferDoFn(
+      WindowingStrategy<?, W> windowingStrategy,
+      SystemReduceFn.Factory<K, InputT, OutputT, W> reduceFnFactory) {
+    this.strategy = windowingStrategy;
+    this.reduceFnFactory = reduceFnFactory;
+  }
+
+  @Override
+  public void processElement(
+      DoFn<KV<K, Iterable<WindowedValue<InputT>>>,
+      KV<K, OutputT>>.ProcessContext c)
+      throws Exception {
+    K key = c.element().getKey();
+    // Used with Batch, we know that all the data is available for this key. We can't use the
+    // timer manager from the context because it doesn't exist. So we create one and emulate the
+    // watermark, knowing that we have all data and it is in timestamp order.
+    BatchTimerInternals timerInternals = new BatchTimerInternals(Instant.now());
+
+    ReduceFnRunner<K, InputT, OutputT, W> runner = new ReduceFnRunner<>(
+        key, strategy, timerInternals, c.windowingInternals(),
+        droppedDueToClosedWindow, droppedDueToLateness, reduceFnFactory.create(key));
+
+    Iterable<List<WindowedValue<InputT>>> chunks =
+        Iterables.partition(c.element().getValue(), 1000);
+    for (Iterable<WindowedValue<InputT>> chunk : chunks) {
+      // Process the chunk of elements.
+      runner.processElements(chunk);
+
+      // Then, since elements are sorted by their timestamp, advance the watermark to the first
+      // element, and fire any timers that may have been scheduled.
+      timerInternals.advanceWatermark(runner, chunk.iterator().next().getTimestamp());
+
+      // Fire any processing timers that need to fire
+      timerInternals.advanceProcessingTime(runner, Instant.now());
+    }
+
+    // Finish any pending windows by advancing the watermark to infinity.
+    timerInternals.advanceWatermark(runner, new Instant(Long.MAX_VALUE));
+
+    // Finally, advance the processing time to infinity to fire any timers.
+    timerInternals.advanceProcessingTime(runner, new Instant(Long.MAX_VALUE));
+
+    runner.persist();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFnTest.java
new file mode 100644
index 0000000000000..6c9cfa61f0d1a
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFnTest.java
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsProperties.GroupAlsoByWindowsDoFnFactory;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Unit tests for {@link GroupAlsoByWindowsAndCombineDoFn}. */
+@RunWith(JUnit4.class)
+public class GroupAlsoByWindowsAndCombineDoFnTest {
+
+  private class GABWAndCombineDoFnFactory<K, InputT, AccumT, OutputT>
+  implements GroupAlsoByWindowsDoFnFactory<K, InputT, OutputT> {
+
+    private final KeyedCombineFn<K, InputT, AccumT, OutputT> keyedCombineFn;
+
+    public GABWAndCombineDoFnFactory(
+        KeyedCombineFn<K, InputT, AccumT, OutputT> keyedCombineFn) {
+      this.keyedCombineFn = keyedCombineFn;
+    }
+
+    @Override
+    public <W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, InputT, OutputT, W>
+        forStrategy(WindowingStrategy<?, W> windowingStrategy) {
+
+      @SuppressWarnings("unchecked")
+      WindowFn<Object, W> windowFn = (WindowFn<Object, W>) windowingStrategy.getWindowFn();
+
+      return new GroupAlsoByWindowsAndCombineDoFn<K, InputT, AccumT, OutputT, W>(
+          windowFn,
+          keyedCombineFn);
+    }
+  }
+
+  @Test
+  public void testEmptyInputEmptyOutput() throws Exception {
+    GroupAlsoByWindowsProperties.emptyInputEmptyOutput(
+        new GABWAndCombineDoFnFactory<>(new Sum.SumLongFn().asKeyedFn()));
+  }
+
+  @Test
+  public void testCombinesElementsInSlidingWindows() throws Exception {
+    CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
+
+    GroupAlsoByWindowsProperties.combinesElementsInSlidingWindows(
+        new GABWAndCombineDoFnFactory<>(combineFn.<String>asKeyedFn()),
+        combineFn);
+  }
+
+  @Test
+  public void testCombinesIntoSessions() throws Exception {
+    CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
+
+    GroupAlsoByWindowsProperties.combinesElementsPerSession(
+        new GABWAndCombineDoFnFactory<>(combineFn.<String>asKeyedFn()),
+        combineFn);
+  }
+
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
index f8656888bf3bf..48e4e1656b78a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
@@ -16,413 +16,115 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import static org.junit.Assert.assertEquals;
+import static org.hamcrest.Matchers.instanceOf;
 import static org.junit.Assert.assertThat;
 
-import com.google.cloud.dataflow.sdk.TestUtils.KvMatcher;
-import com.google.cloud.dataflow.sdk.WindowMatchers;
+import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
-import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
 
-import org.hamcrest.Matchers;
 import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-/** Unit tests for {@link GroupAlsoByWindowsDoFn}. */
+/**
+ * Unit tests for the static factory methods in the abstract
+ * class {@link GroupAlsoByWindowsDoFn}.
+ */
 @RunWith(JUnit4.class)
 public class GroupAlsoByWindowsDoFnTest {
-  ExecutionContext execContext;
-  CounterSet counters;
-
-  @Before public void setUp() {
-    execContext = new DirectModeExecutionContext();
-    counters = new CounterSet();
-  }
 
-  @Test public void testEmpty() throws Exception {
-    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>, KV<String, Iterable<String>>> runner =
-        makeRunner(
-            outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
+  @Test
+  public void testCreateNoncombiningNonmerging() throws Exception {
+    Coder<Long> inputCoder = VarLongCoder.of();
+    WindowingStrategy<?, IntervalWindow> windowingStrategy =
+        WindowingStrategy.of(FixedWindows.of(Duration.millis(10)));
 
-    runner.startBundle();
-
-    runner.finishBundle();
-
-    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
-
-    assertEquals(0, result.size());
+    assertThat(
+        GroupAlsoByWindowsDoFn.createForIterable(windowingStrategy, inputCoder),
+        instanceOf(GroupAlsoByWindowsViaIteratorsDoFn.class));
   }
 
-  @Test public void testFixedWindows() throws Exception {
-    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>, KV<String, Iterable<String>>> runner =
-        makeRunner(
-            outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
-
-    runner.startBundle();
+  @Test
+  public void testCreateNoncombiningMerging() throws Exception {
+    Coder<Long> inputCoder = VarLongCoder.of();
+    WindowingStrategy<?, IntervalWindow> windowingStrategy =
+        WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)));
 
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        KV.of("k", (Iterable<WindowedValue<String>>) Arrays.asList(
-            WindowedValue.of(
-                "v1",
-                new Instant(1),
-                Arrays.asList(window(0, 10)),
-                PaneInfo.NO_FIRING),
-            WindowedValue.of(
-                "v2",
-                new Instant(2),
-                Arrays.asList(window(0, 10)),
-                PaneInfo.NO_FIRING),
-            WindowedValue.of(
-                "v3",
-                new Instant(13),
-                Arrays.asList(window(10, 20)),
-                PaneInfo.NO_FIRING)))));
-
-    runner.finishBundle();
-
-    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
-
-    assertEquals(2, result.size());
-
-    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
-    assertEquals("k", item0.getValue().getKey());
-    assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v1", "v2"));
-    assertEquals(new Instant(1), item0.getTimestamp());
-    assertThat(item0.getWindows(),
-        Matchers.contains(window(0, 10)));
-
-    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
-    assertEquals("k", item1.getValue().getKey());
-    assertThat(item1.getValue().getValue(), Matchers.contains("v3"));
-    assertEquals(new Instant(13), item1.getTimestamp());
-    assertThat(item1.getWindows(),
-        Matchers.contains(window(10, 20)));
+    assertThat(
+        GroupAlsoByWindowsDoFn.createForIterable(windowingStrategy, inputCoder),
+        instanceOf(GroupAlsoByWindowsViaOutputBufferDoFn.class));
   }
 
-  @Test public void testSlidingWindows() throws Exception {
-    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>, KV<String, Iterable<String>>> runner =
-        makeRunner(
-            outputTag,
-            outputManager,
-            WindowingStrategy.of(
-                SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))));
-
-    runner.startBundle();
-
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        KV.of("k", (Iterable<WindowedValue<String>>) Arrays.asList(
-            WindowedValue.of(
-                "v1",
-                new Instant(5),
-                Arrays.asList(window(-10, 10), window(0, 20)),
-                PaneInfo.NO_FIRING),
-            WindowedValue.of(
-                "v2",
-                new Instant(15),
-                Arrays.asList(window(0, 20), window(10, 30)),
-                PaneInfo.NO_FIRING)))));
-
-    runner.finishBundle();
+  @Test
+  public void testCreateNoncombiningWithTrigger() throws Exception {
+    Coder<Long> inputCoder = VarLongCoder.of();
+    WindowingStrategy<?, IntervalWindow> windowingStrategy =
+        WindowingStrategy.of(FixedWindows.of(Duration.millis(10)))
+        .withTrigger(AfterPane.elementCountAtLeast(1));
 
-    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
-
-    assertEquals(3, result.size());
-
-    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
-    assertEquals("k", item0.getValue().getKey());
-    assertThat(item0.getValue().getValue(), Matchers.contains("v1"));
-    assertEquals(new Instant(5), item0.getTimestamp());
-    assertThat(item0.getWindows(),
-        Matchers.contains(window(-10, 10)));
-
-    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
-    assertEquals("k", item1.getValue().getKey());
-    assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v1", "v2"));
-    assertEquals(new Instant(10), item1.getTimestamp());
-    assertThat(item1.getWindows(),
-        Matchers.contains(window(0, 20)));
-
-    WindowedValue<KV<String, Iterable<String>>> item2 = result.get(2);
-    assertEquals("k", item2.getValue().getKey());
-    assertThat(item2.getValue().getValue(), Matchers.contains("v2"));
-    assertEquals(new Instant(20), item2.getTimestamp());
-    assertThat(item2.getWindows(),
-        Matchers.contains(window(10, 30)));
+    assertThat(
+        GroupAlsoByWindowsDoFn.createForIterable(windowingStrategy, inputCoder),
+        instanceOf(GroupAlsoByWindowsViaOutputBufferDoFn.class));
   }
 
-  @Test public void testSlidingWindowsCombine() throws Exception {
-    TupleTag<KV<String, Long>> outputTag = new TupleTag<>();
-    CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-
+  @Test
+  public void testCreateCombiningNonmerging() throws Exception {
+    Coder<String> keyCoder = StringUtf8Coder.of();
     AppliedCombineFn<String, Long, ?, Long> appliedFn = AppliedCombineFn.withInputCoder(
-        combineFn.<String>asKeyedFn(), new CoderRegistry(),
+        new Sum.SumLongFn().<String>asKeyedFn(),
+        new CoderRegistry(),
         KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()));
-    DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>, KV<String, Long>> runner =
-        makeRunner(
-            outputTag,
-            outputManager,
-            WindowingStrategy.of(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))),
-            appliedFn);
-
-    runner.startBundle();
-
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        KV.of("k", (Iterable<WindowedValue<Long>>) Arrays.asList(
-            WindowedValue.of(
-                1L,
-                new Instant(5),
-                Arrays.asList(window(-10, 10), window(0, 20)),
-                PaneInfo.NO_FIRING),
-            WindowedValue.of(
-                2L,
-                new Instant(15),
-                Arrays.asList(window(0, 20), window(10, 30)),
-                PaneInfo.NO_FIRING),
-            WindowedValue.of(
-                4L,
-                new Instant(18),
-                Arrays.asList(window(0, 20), window(10, 30)),
-                PaneInfo.NO_FIRING)))));
-
-    runner.finishBundle();
-
-    List<WindowedValue<KV<String, Long>>> result = outputManager.getOutput(outputTag);
-
-    assertEquals(3, result.size());
-
-    assertThat(result, Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(
-            KvMatcher.isKv(Matchers.equalTo("k"), Matchers.equalTo(1L)),
-            5, -10, 10),
-        WindowMatchers.isSingleWindowedValue(
-            KvMatcher.isKv(Matchers.equalTo("k"), Matchers.equalTo(7L)),
-            10, 0, 20),
-        WindowMatchers.isSingleWindowedValue(
-            KvMatcher.isKv(Matchers.equalTo("k"), Matchers.equalTo(6L)),
-            20, 10, 30)));
-  }
-
-  @Test public void testDiscontiguousWindows() throws Exception {
-    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>, KV<String, Iterable<String>>> runner =
-        makeRunner(
-            outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
-
-    runner.startBundle();
-
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        KV.of("k", (Iterable<WindowedValue<String>>) Arrays.asList(
-            WindowedValue.of(
-                "v1",
-                new Instant(1),
-                Arrays.asList(window(0, 5)),
-                PaneInfo.NO_FIRING),
-            WindowedValue.of(
-                "v2",
-                new Instant(4),
-                Arrays.asList(window(1, 5)),
-                PaneInfo.NO_FIRING),
-            WindowedValue.of(
-                "v3",
-                new Instant(4),
-                Arrays.asList(window(0, 5)),
-                PaneInfo.NO_FIRING)))));
-
-    runner.finishBundle();
-
-    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
-
-    assertEquals(2, result.size());
-
-    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
-    assertEquals("k", item0.getValue().getKey());
-    assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v1", "v3"));
-    assertEquals(new Instant(1), item0.getTimestamp());
-    assertThat(item0.getWindows(),
-        Matchers.contains(window(0, 5)));
-
-    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
-    assertEquals("k", item1.getValue().getKey());
-    assertThat(item1.getValue().getValue(), Matchers.contains("v2"));
-    assertEquals(new Instant(4), item1.getTimestamp());
-    assertThat(item1.getWindows(),
-        Matchers.contains(window(1, 5)));
-  }
-
-  @Test public void testSessions() throws Exception {
-    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>, KV<String, Iterable<String>>> runner =
-        makeRunner(
-            outputTag,
-            outputManager,
-            WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))));
-
-    runner.startBundle();
-
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        KV.of("k", (Iterable<WindowedValue<String>>) Arrays.asList(
-            WindowedValue.of(
-                "v1",
-                new Instant(0),
-                Arrays.asList(window(0, 10)),
-                PaneInfo.NO_FIRING),
-            WindowedValue.of(
-                "v2",
-                new Instant(5),
-                Arrays.asList(window(5, 15)),
-                PaneInfo.NO_FIRING),
-            WindowedValue.of(
-                "v3",
-                new Instant(15),
-                Arrays.asList(window(15, 25)),
-                PaneInfo.NO_FIRING)))));
-
-    runner.finishBundle();
-
-    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
-
-    assertEquals(2, result.size());
-
-    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
-    assertEquals("k", item0.getValue().getKey());
-    assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v1", "v2"));
-    assertEquals(new Instant(0), item0.getTimestamp());
-    assertThat(item0.getWindows(),
-        Matchers.contains(window(0, 15)));
-
-    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
-    assertEquals("k", item1.getValue().getKey());
-    assertThat(item1.getValue().getValue(), Matchers.contains("v3"));
-    assertEquals(new Instant(15), item1.getTimestamp());
-    assertThat(item1.getWindows(),
-        Matchers.contains(window(15, 25)));
+    WindowingStrategy<?, IntervalWindow> windowingStrategy =
+        WindowingStrategy.of(FixedWindows.of(Duration.millis(10)));
+
+    assertThat(
+        GroupAlsoByWindowsDoFn.create(windowingStrategy,
+            appliedFn,
+            keyCoder),
+        instanceOf(GroupAlsoByWindowsAndCombineDoFn.class));
   }
 
-  @Test public void testSessionsCombine() throws Exception {
-    TupleTag<KV<String, Long>> outputTag = new TupleTag<>();
-    CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
+  @Test
+  public void testCreateCombiningMerging() throws Exception {
+    Coder<String> keyCoder = StringUtf8Coder.of();
     AppliedCombineFn<String, Long, ?, Long> appliedFn = AppliedCombineFn.withInputCoder(
-        combineFn.<String>asKeyedFn(), new CoderRegistry(),
+        new Sum.SumLongFn().<String>asKeyedFn(),
+        new CoderRegistry(),
         KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()));
-    DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>, KV<String, Long>> runner =
-        makeRunner(
-            outputTag,
-            outputManager,
-            WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))),
-            appliedFn);
-    runner.startBundle();
-
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        KV.of("k", (Iterable<WindowedValue<Long>>) Arrays.asList(
-            WindowedValue.of(
-                1L,
-                new Instant(0),
-                Arrays.asList(window(0, 10)),
-                PaneInfo.NO_FIRING),
-            WindowedValue.of(
-                2L,
-                new Instant(5),
-                Arrays.asList(window(5, 15)),
-                PaneInfo.NO_FIRING),
-            WindowedValue.of(
-                4L,
-                new Instant(15),
-                Arrays.asList(window(15, 25)),
-                PaneInfo.NO_FIRING)))));
-
-    runner.finishBundle();
-
-    List<WindowedValue<KV<String, Long>>> result = outputManager.getOutput(outputTag);
-
-    assertThat(result, Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(
-            KvMatcher.isKv(Matchers.equalTo("k"), Matchers.equalTo(3L)),
-            0, 0, 15),
-        WindowMatchers.isSingleWindowedValue(
-            KvMatcher.isKv(Matchers.equalTo("k"), Matchers.equalTo(4L)),
-            15, 15, 25)));
-  }
-
-  private <ReceiverT>
-      DoFnRunner<KV<String, Iterable<WindowedValue<String>>>, KV<String, Iterable<String>>>
-      makeRunner(
-          TupleTag<KV<String, Iterable<String>>> outputTag,
-          DoFnRunner.OutputManager outputManager,
-          WindowingStrategy<? super String, IntervalWindow> windowingStrategy) {
-
-    GroupAlsoByWindowsDoFn<String, String, Iterable<String>, IntervalWindow> fn =
-        GroupAlsoByWindowsDoFn.createForIterable(windowingStrategy, StringUtf8Coder.of());
-
-    return makeRunner(outputTag, outputManager, windowingStrategy, fn);
+    WindowingStrategy<?, IntervalWindow> windowingStrategy =
+        WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)));
+
+    assertThat(
+        GroupAlsoByWindowsDoFn.create(windowingStrategy,
+            appliedFn,
+            keyCoder),
+        instanceOf(GroupAlsoByWindowsAndCombineDoFn.class));
   }
 
-  private <ReceiverT>
-      DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>, KV<String, Long>>
-      makeRunner(
-          TupleTag<KV<String, Long>> outputTag,
-          DoFnRunner.OutputManager outputManager,
-          WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
-          AppliedCombineFn<String, Long, ?, Long> combineFn) {
-
-    GroupAlsoByWindowsDoFn<String, Long, Long, IntervalWindow> fn =
-        GroupAlsoByWindowsDoFn.create(windowingStrategy, combineFn, StringUtf8Coder.of());
-
-    return makeRunner(outputTag, outputManager, windowingStrategy, fn);
-  }
-
-  private <InputT, OutputT, ReceiverT>
-      DoFnRunner<KV<String, Iterable<WindowedValue<InputT>>>, KV<String, OutputT>>
-      makeRunner(
-        TupleTag<KV<String, OutputT>> outputTag,
-        DoFnRunner.OutputManager outputManager,
-        WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
-        GroupAlsoByWindowsDoFn<String, InputT, OutputT, IntervalWindow> fn) {
-
-    return DoFnRunner.create(
-        PipelineOptionsFactory.create(),
-        fn,
-        NullSideInputReader.empty(),
-        outputManager,
-        outputTag,
-        new ArrayList<TupleTag<?>>(),
-        execContext.getOrCreateStepContext("merge", "merge", null),
-        counters.getAddCounterMutator(),
-        windowingStrategy);
-  }
-
-  private BoundedWindow window(long start, long end) {
-    return new IntervalWindow(new Instant(start), new Instant(end));
+  @Test
+  public void testCreateCombiningWithTrigger() throws Exception {
+    Coder<String> keyCoder = StringUtf8Coder.of();
+    AppliedCombineFn<String, Long, ?, Long> appliedFn = AppliedCombineFn.withInputCoder(
+        new Sum.SumLongFn().<String>asKeyedFn(),
+        new CoderRegistry(),
+        KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()));
+    WindowingStrategy<?, IntervalWindow> windowingStrategy =
+        WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)))
+        .withTrigger(AfterPane.elementCountAtLeast(1));
+
+    assertThat(
+        GroupAlsoByWindowsDoFn.create(windowingStrategy,
+            appliedFn,
+            keyCoder),
+        instanceOf(GroupAlsoByWindowsViaOutputBufferDoFn.class));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsProperties.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsProperties.java
new file mode 100644
index 0000000000000..d725e0caf3c74
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsProperties.java
@@ -0,0 +1,506 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.TestUtils.KvMatcher;
+import com.google.cloud.dataflow.sdk.WindowMatchers;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.collect.ImmutableList;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Properties of {@link GroupAlsoByWindowsDoFn}.
+ *
+ * <p>Some properties may not hold of some implementations, due to restrictions on the context
+ * in which the implementation is applicable. For example,
+ * {@link GroupAlsoByWindowsViaIteratorsDoFn} does not support merging window functions.
+ */
+public class GroupAlsoByWindowsProperties {
+
+  /**
+   * A factory of {@link GroupAlsoByWindowsDoFn} so that the various properties can provide
+   * the appropriate windowing strategy under test.
+   */
+  public interface GroupAlsoByWindowsDoFnFactory<K, InputT, OutputT> {
+    <W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, InputT, OutputT, W>
+    forStrategy(WindowingStrategy<?, W> strategy);
+  }
+
+  /**
+   * Tests that for empty input and the given {@link WindowingStrategy}, the provided GABW
+   * implementation produces no output.
+   *
+   * <p>The input type is deliberately left as a wildcard, since it is not relevant.
+   */
+  public static <K, InputT, OutputT> void emptyInputEmptyOutput(
+      GroupAlsoByWindowsDoFnFactory<K, InputT, OutputT> gabwFactory)
+          throws Exception {
+
+    WindowingStrategy<?, IntervalWindow> windowingStrategy =
+        WindowingStrategy.of(FixedWindows.of(Duration.millis(10)));
+    TupleTag<KV<K, OutputT>> outputTag = new TupleTag<>();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
+
+    DoFnRunner<?, KV<K, OutputT>> runner =
+        makeRunner(
+            gabwFactory.forStrategy(windowingStrategy),
+            windowingStrategy,
+            outputTag,
+            outputManager);
+
+    runner.startBundle();
+
+    runner.finishBundle();
+
+    List<WindowedValue<KV<K, OutputT>>> result = outputManager.getOutput(outputTag);
+
+    assertEquals(0, result.size());
+  }
+
+  /**
+   * Tests that for a simple sequence of elements on the same key, the given GABW implementation
+   * correctly groups them according to fixed windows.
+   *
+   * <p>The notable specialized property of this input is that each element occurs in a single
+   * window.
+   */
+  public static void groupsElementsIntoFixedWindows(
+      GroupAlsoByWindowsDoFnFactory<String, String, Iterable<String>> gabwFactory)
+          throws Exception {
+
+    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
+    WindowingStrategy<?, IntervalWindow> windowingStrategy =
+        WindowingStrategy.of(FixedWindows.of(Duration.millis(10)));
+
+    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>, KV<String, Iterable<String>>> runner =
+        makeRunner(
+            gabwFactory.forStrategy(windowingStrategy),
+            windowingStrategy,
+            outputTag,
+            outputManager);
+
+    runner.startBundle();
+
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        KV.of("k", (Iterable<WindowedValue<String>>) Arrays.asList(
+            WindowedValue.of(
+                "v1",
+                new Instant(1),
+                Arrays.asList(window(0, 10)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                "v2",
+                new Instant(2),
+                Arrays.asList(window(0, 10)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                "v3",
+                new Instant(13),
+                Arrays.asList(window(10, 20)),
+                PaneInfo.NO_FIRING)))));
+
+    runner.finishBundle();
+
+    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
+
+    assertEquals(2, result.size());
+
+    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
+    assertEquals("k", item0.getValue().getKey());
+    assertThat(item0.getValue().getValue(), containsInAnyOrder("v1", "v2"));
+    assertEquals(new Instant(1), item0.getTimestamp());
+    assertThat(item0.getWindows(), contains(window(0, 10)));
+
+    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
+    assertEquals("k", item1.getValue().getKey());
+    assertThat(item1.getValue().getValue(), contains("v3"));
+    assertEquals(new Instant(13), item1.getTimestamp());
+    assertThat(item1.getWindows(),
+        contains(window(10, 20)));
+  }
+
+  /**
+   * Tests that for a simple sequence of elements on the same key, the given GABW implementation
+   * correctly groups them into sliding windows.
+   *
+   * <p>In the input here, each element occurs in multiple windows.
+   */
+  public static void groupsElementsIntoSlidingWindows(
+      GroupAlsoByWindowsDoFnFactory<String, String, Iterable<String>> gabwFactory)
+          throws Exception {
+
+    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
+    WindowingStrategy<?, IntervalWindow> windowingStrategy = WindowingStrategy.of(
+        SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10)));
+
+    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>, KV<String, Iterable<String>>> runner =
+        makeRunner(
+            gabwFactory.forStrategy(windowingStrategy),
+            windowingStrategy,
+            outputTag,
+            outputManager);
+
+    runner.startBundle();
+
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        KV.of("k", (Iterable<WindowedValue<String>>) Arrays.asList(
+            WindowedValue.of(
+                "v1",
+                new Instant(5),
+                Arrays.asList(window(-10, 10), window(0, 20)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                "v2",
+                new Instant(15),
+                Arrays.asList(window(0, 20), window(10, 30)),
+                PaneInfo.NO_FIRING)))));
+
+    runner.finishBundle();
+
+    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
+
+    assertEquals(3, result.size());
+
+    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
+    assertEquals("k", item0.getValue().getKey());
+    assertThat(item0.getValue().getValue(), contains("v1"));
+    assertEquals(new Instant(5), item0.getTimestamp());
+    assertThat(item0.getWindows(),
+        contains(window(-10, 10)));
+
+    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
+    assertEquals("k", item1.getValue().getKey());
+    assertThat(item1.getValue().getValue(), containsInAnyOrder("v1", "v2"));
+    assertEquals(new Instant(10), item1.getTimestamp());
+    assertThat(item1.getWindows(),
+        contains(window(0, 20)));
+
+    WindowedValue<KV<String, Iterable<String>>> item2 = result.get(2);
+    assertEquals("k", item2.getValue().getKey());
+    assertThat(item2.getValue().getValue(), contains("v2"));
+    assertEquals(new Instant(20), item2.getTimestamp());
+    assertThat(item2.getWindows(),
+        contains(window(10, 30)));
+  }
+
+  /**
+   * Tests that for a simple sequence of elements on the same key, the given GABW implementation
+   * correctly groups and combines them according to sliding windows.
+   *
+   * <p>In the input here, each element occurs in multiple windows.
+   */
+  public static void combinesElementsInSlidingWindows(
+      GroupAlsoByWindowsDoFnFactory<String, Long, Long> gabwFactory,
+      CombineFn<Long, ?, Long> combineFn)
+          throws Exception {
+
+    TupleTag<KV<String, Long>> outputTag = new TupleTag<>();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
+    WindowingStrategy<?, IntervalWindow> windowingStrategy = WindowingStrategy.of(
+        SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10)));
+
+    DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>, KV<String, Long>> runner =
+        makeRunner(
+            gabwFactory.forStrategy(windowingStrategy),
+            windowingStrategy,
+            outputTag,
+            outputManager);
+
+    runner.startBundle();
+
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        KV.of("k", (Iterable<WindowedValue<Long>>) Arrays.asList(
+            WindowedValue.of(
+                1L,
+                new Instant(5),
+                Arrays.asList(window(-10, 10), window(0, 20)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                2L,
+                new Instant(15),
+                Arrays.asList(window(0, 20), window(10, 30)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                4L,
+                new Instant(18),
+                Arrays.asList(window(0, 20), window(10, 30)),
+                PaneInfo.NO_FIRING)))));
+
+    runner.finishBundle();
+
+    List<WindowedValue<KV<String, Long>>> result = outputManager.getOutput(outputTag);
+
+    assertEquals(3, result.size());
+
+    assertThat(result, contains(
+        WindowMatchers.isSingleWindowedValue(
+            KvMatcher.isKv(
+                equalTo("k"),
+                equalTo(combineFn.apply(ImmutableList.of(1L)))),
+            5,   // aggregate timestamp
+            -10, // window start
+            10), // window end
+        WindowMatchers.isSingleWindowedValue(
+            KvMatcher.isKv(
+                equalTo("k"),
+                equalTo(combineFn.apply(ImmutableList.of(1L, 2L, 4L)))),
+            10,  // aggregate timestamp
+            0,   // window start
+            20), // window end
+        WindowMatchers.isSingleWindowedValue(
+            KvMatcher.isKv(
+                equalTo("k"),
+                equalTo(combineFn.apply(ImmutableList.of(2L, 4L)))),
+            20, // aggregate timestamp
+            10, // window start
+            30))); // window end
+  }
+
+  /**
+   * Tests that the given GABW implementation correctly groups elements that fall into overlapping
+   * windows that are not merged.
+   */
+  public static void groupsIntoOverlappingNonmergingWindows(
+      GroupAlsoByWindowsDoFnFactory<String, String, Iterable<String>> gabwFactory)
+          throws Exception {
+
+    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
+    WindowingStrategy<?, IntervalWindow> windowingStrategy =
+        WindowingStrategy.of(FixedWindows.of(Duration.millis(10)));
+
+    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>, KV<String, Iterable<String>>> runner =
+        makeRunner(
+            gabwFactory.forStrategy(windowingStrategy),
+            windowingStrategy,
+            outputTag,
+            outputManager);
+
+    runner.startBundle();
+
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        KV.of("k", (Iterable<WindowedValue<String>>) Arrays.asList(
+            WindowedValue.of(
+                "v1",
+                new Instant(1),
+                Arrays.asList(window(0, 5)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                "v2",
+                new Instant(4),
+                Arrays.asList(window(1, 5)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                "v3",
+                new Instant(4),
+                Arrays.asList(window(0, 5)),
+                PaneInfo.NO_FIRING)))));
+
+    runner.finishBundle();
+
+    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
+
+    assertEquals(2, result.size());
+
+    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
+    assertEquals("k", item0.getValue().getKey());
+    assertThat(item0.getValue().getValue(), containsInAnyOrder("v1", "v3"));
+    assertEquals(new Instant(1), item0.getTimestamp());
+    assertThat(item0.getWindows(),
+        contains(window(0, 5)));
+
+    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
+    assertEquals("k", item1.getValue().getKey());
+    assertThat(item1.getValue().getValue(), contains("v2"));
+    assertEquals(new Instant(4), item1.getTimestamp());
+    assertThat(item1.getWindows(),
+        contains(window(1, 5)));
+  }
+
+  /**
+   * Tests that the given GABW implementation correctly groups elements into merged sessions.
+   */
+  public static void groupsElementsInMergedSessions(
+      GroupAlsoByWindowsDoFnFactory<String, String, Iterable<String>> gabwFactory)
+          throws Exception {
+
+    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
+    WindowingStrategy<?, IntervalWindow> windowingStrategy =
+        WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)));
+
+    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>, KV<String, Iterable<String>>> runner =
+        makeRunner(
+            gabwFactory.forStrategy(windowingStrategy),
+            windowingStrategy,
+            outputTag,
+            outputManager);
+
+    runner.startBundle();
+
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        KV.of("k", (Iterable<WindowedValue<String>>) Arrays.asList(
+            WindowedValue.of(
+                "v1",
+                new Instant(0),
+                Arrays.asList(window(0, 10)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                "v2",
+                new Instant(5),
+                Arrays.asList(window(5, 15)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                "v3",
+                new Instant(15),
+                Arrays.asList(window(15, 25)),
+                PaneInfo.NO_FIRING)))));
+
+    runner.finishBundle();
+
+    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
+
+    assertEquals(2, result.size());
+
+    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
+    assertEquals("k", item0.getValue().getKey());
+    assertThat(item0.getValue().getValue(), containsInAnyOrder("v1", "v2"));
+    assertEquals(new Instant(0), item0.getTimestamp());
+    assertThat(item0.getWindows(),
+        contains(window(0, 15)));
+
+    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
+    assertEquals("k", item1.getValue().getKey());
+    assertThat(item1.getValue().getValue(), contains("v3"));
+    assertEquals(new Instant(15), item1.getTimestamp());
+    assertThat(item1.getWindows(),
+        contains(window(15, 25)));
+  }
+
+  /**
+   * Tests that the given {@link GroupAlsoByWindowsDoFn} implementation combines elements per
+   * session window correctly according to the provided {@link CombineFn}.
+   */
+  public static void combinesElementsPerSession(
+      GroupAlsoByWindowsDoFnFactory<String, Long, Long> gabwFactory,
+      CombineFn<Long, ?, Long> combineFn)
+          throws Exception {
+
+    TupleTag<KV<String, Long>> outputTag = new TupleTag<>();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
+    WindowingStrategy<?, IntervalWindow> windowingStrategy =
+        WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)));
+
+    DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>, KV<String, Long>> runner =
+        makeRunner(
+            gabwFactory.forStrategy(windowingStrategy),
+            windowingStrategy,
+            outputTag,
+            outputManager);
+
+    runner.startBundle();
+
+    runner.processElement(WindowedValue.valueInEmptyWindows(
+        KV.of("k", (Iterable<WindowedValue<Long>>) Arrays.asList(
+            WindowedValue.of(
+                1L,
+                new Instant(0),
+                Arrays.asList(window(0, 10)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                2L,
+                new Instant(5),
+                Arrays.asList(window(5, 15)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                4L,
+                new Instant(15),
+                Arrays.asList(window(15, 25)),
+                PaneInfo.NO_FIRING)))));
+
+    runner.finishBundle();
+
+    List<WindowedValue<KV<String, Long>>> result = outputManager.getOutput(outputTag);
+
+    assertThat(result, contains(
+        WindowMatchers.isSingleWindowedValue(
+            KvMatcher.isKv(
+                equalTo("k"),
+                equalTo(combineFn.apply(ImmutableList.of(1L, 2L)))),
+            0, // aggregate timestamp
+            0, // window start
+            15), // window end
+        WindowMatchers.isSingleWindowedValue(
+            KvMatcher.isKv(
+                equalTo("k"),
+                equalTo(combineFn.apply(ImmutableList.of(4L)))),
+            15, // aggregate timestamp
+            15, // window start
+            25))); // window end
+  }
+
+  private static <K, InputT, OutputT, W extends BoundedWindow>
+  DoFnRunner<KV<K, Iterable<WindowedValue<InputT>>>, KV<K, OutputT>>
+      makeRunner(
+          GroupAlsoByWindowsDoFn<K, InputT, OutputT, W> fn,
+          WindowingStrategy<?, W> windowingStrategy,
+          TupleTag<KV<K, OutputT>> outputTag,
+          DoFnRunner.OutputManager outputManager) {
+
+    ExecutionContext executionContext = DirectModeExecutionContext.create();
+    CounterSet counters = new CounterSet();
+
+    return DoFnRunner.create(
+        PipelineOptionsFactory.create(),
+        fn,
+        NullSideInputReader.empty(),
+        outputManager,
+        outputTag,
+        new ArrayList<TupleTag<?>>(),
+        executionContext.getOrCreateStepContext("GABWStep", "GABWTransform", null),
+        counters.getAddCounterMutator(),
+        windowingStrategy);
+  }
+
+  private static BoundedWindow window(long start, long end) {
+    return new IntervalWindow(new Instant(start), new Instant(end));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFnTest.java
new file mode 100644
index 0000000000000..e53a4219124f3
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFnTest.java
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsProperties.GroupAlsoByWindowsDoFnFactory;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Unit tests for {@link GroupAlsoByWindowsViaIteratorsDoFn}.
+ *
+ * <p>Note the absence of tests for sessions, as merging window functions are not supported.
+ */
+@RunWith(JUnit4.class)
+public class GroupAlsoByWindowsViaIteratorsDoFnTest {
+
+  @Rule
+  public final transient ExpectedException thrown = ExpectedException.none();
+
+  private class GABWViaIteratorsDoFnFactory<K, InputT>
+  implements GroupAlsoByWindowsDoFnFactory<K, InputT, Iterable<InputT>> {
+    @Override
+    public <W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, InputT, Iterable<InputT>, W>
+        forStrategy(WindowingStrategy<?, W> windowingStrategy) {
+      return new GroupAlsoByWindowsViaIteratorsDoFn<K, InputT, W>(windowingStrategy);
+    }
+  }
+
+  @Test
+  public void testEmptyInputEmptyOutput() throws Exception {
+    GroupAlsoByWindowsProperties.emptyInputEmptyOutput(
+        new GABWViaIteratorsDoFnFactory<>());
+  }
+
+  @Test
+  public void testGroupsElementsIntoFixedWindows() throws Exception {
+    GroupAlsoByWindowsProperties.groupsElementsIntoFixedWindows(
+        new GABWViaIteratorsDoFnFactory<String, String>());
+  }
+
+  @Test
+  public void testGroupsElementsIntoSlidingWindows() throws Exception {
+    GroupAlsoByWindowsProperties.groupsElementsIntoSlidingWindows(
+        new GABWViaIteratorsDoFnFactory<String, String>());
+  }
+
+  @Test
+  public void testGroupsIntoOverlappingNonmergingWindows() throws Exception {
+    GroupAlsoByWindowsProperties.groupsIntoOverlappingNonmergingWindows(
+        new GABWViaIteratorsDoFnFactory<String, String>());
+  }
+
+  @Test
+  public void testMergingNotSupported() throws Exception {
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("merging");
+    thrown.expectMessage("not support");
+
+    GroupAlsoByWindowsProperties.groupsElementsInMergedSessions(
+        new GABWViaIteratorsDoFnFactory<String, String>());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFnTest.java
new file mode 100644
index 0000000000000..43ab7685963ee
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFnTest.java
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsProperties.GroupAlsoByWindowsDoFnFactory;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Unit tests for {@link GroupAlsoByWindowsViaOutputBufferDoFn}.
+ */
+@RunWith(JUnit4.class)
+public class GroupAlsoByWindowsViaOutputBufferDoFnTest {
+
+  private class BufferingGABWViaOutputBufferDoFnFactory<K, InputT>
+  implements GroupAlsoByWindowsDoFnFactory<K, InputT, Iterable<InputT>> {
+
+    private final Coder<InputT> inputCoder;
+
+    public BufferingGABWViaOutputBufferDoFnFactory(Coder<InputT> inputCoder) {
+      this.inputCoder = inputCoder;
+    }
+
+    @Override
+    public <W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, InputT, Iterable<InputT>, W>
+        forStrategy(WindowingStrategy<?, W> windowingStrategy) {
+      return new GroupAlsoByWindowsViaOutputBufferDoFn<K, InputT, Iterable<InputT>, W>(
+          windowingStrategy,
+          SystemReduceFn.<K, InputT, W>buffering(inputCoder));
+    }
+  }
+
+  private class CombiningGABWViaOutputBufferDoFnFactory<K, InputT, AccumT, OutputT>
+  implements GroupAlsoByWindowsDoFnFactory<K, InputT, OutputT> {
+
+    private final Coder<K> keyCoder;
+    private final AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn;
+
+    public CombiningGABWViaOutputBufferDoFnFactory(
+        Coder<K> keyCoder, AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
+      this.keyCoder = keyCoder;
+      this.combineFn = combineFn;
+    }
+
+    @Override
+    public <W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, InputT, OutputT, W>
+        forStrategy(WindowingStrategy<?, W> windowingStrategy) {
+      return new GroupAlsoByWindowsViaOutputBufferDoFn<K, InputT, OutputT, W>(
+          windowingStrategy,
+          SystemReduceFn.<K, InputT, AccumT, OutputT, W>combining(keyCoder, combineFn));
+    }
+  }
+
+  @Test
+  public void testEmptyInputEmptyOutput() throws Exception {
+    GroupAlsoByWindowsProperties.emptyInputEmptyOutput(
+        new BufferingGABWViaOutputBufferDoFnFactory<>(StringUtf8Coder.of()));
+  }
+
+  @Test
+  public void testGroupsElementsIntoFixedWindows() throws Exception {
+    GroupAlsoByWindowsProperties.groupsElementsIntoFixedWindows(
+        new BufferingGABWViaOutputBufferDoFnFactory<String, String>(StringUtf8Coder.of()));
+  }
+
+  @Test
+  public void testGroupsElementsIntoSlidingWindows() throws Exception {
+    GroupAlsoByWindowsProperties.groupsElementsIntoSlidingWindows(
+        new BufferingGABWViaOutputBufferDoFnFactory<String, String>(StringUtf8Coder.of()));
+  }
+
+  @Test
+  public void testCombinesElementsInSlidingWindows() throws Exception {
+    CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
+    AppliedCombineFn<String, Long, ?, Long> appliedFn = AppliedCombineFn.withInputCoder(
+        combineFn.<String>asKeyedFn(), new CoderRegistry(),
+        KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()));
+
+    GroupAlsoByWindowsProperties.combinesElementsInSlidingWindows(
+        new CombiningGABWViaOutputBufferDoFnFactory<>(
+            StringUtf8Coder.of(),
+            appliedFn),
+        combineFn);
+  }
+
+  @Test
+  public void testGroupsIntoOverlappingNonmergingWindows() throws Exception {
+    GroupAlsoByWindowsProperties.groupsIntoOverlappingNonmergingWindows(
+        new BufferingGABWViaOutputBufferDoFnFactory<String, String>(StringUtf8Coder.of()));
+  }
+
+  @Test
+  public void testGroupsIntoSessions() throws Exception {
+    GroupAlsoByWindowsProperties.groupsElementsInMergedSessions(
+        new BufferingGABWViaOutputBufferDoFnFactory<String, String>(StringUtf8Coder.of()));
+  }
+
+  @Test
+  public void testCombinesIntoSessions() throws Exception {
+    CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
+    AppliedCombineFn<String, Long, ?, Long> appliedFn = AppliedCombineFn.withInputCoder(
+        combineFn.<String>asKeyedFn(), new CoderRegistry(),
+        KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()));
+
+    GroupAlsoByWindowsProperties.combinesElementsPerSession(
+        new CombiningGABWViaOutputBufferDoFnFactory<>(
+            StringUtf8Coder.of(),
+            appliedFn),
+        combineFn);
+  }
+}

From 94115d53e2d23b28c1f49fca479a8f485a0eb805 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 11 Nov 2015 09:59:17 -0800
Subject: [PATCH 1144/1541] Fix reference to maxReadTime in PubsubIO.Read

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107596523
---
 .../main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 8823389467bdc..951e6e4141262 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -496,7 +496,7 @@ public <X> Bound<X> withCoder(Coder<X> coder) {
       /**
        * Sets the maximum number of records that will be read from Pubsub.
        *
-       * <p>Setting either this or {@link #maxNumRecords} will cause the output {@code PCollection}
+       * <p>Setting either this or {@link #maxReadTime} will cause the output {@code PCollection}
        * to be bounded.
        */
       public Bound<T> maxNumRecords(int maxNumRecords) {

From 42b878aad17298a5f7ae0827bbf4cc7303cc1f53 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Wed, 11 Nov 2015 13:05:11 -0800
Subject: [PATCH 1145/1541] Testing improvements regarding custom sources in
 streaming

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107615100
---
 .../dataflow/sdk/runners/dataflow/CountingSource.java      | 7 +++----
 .../dataflow/sdk/runners/dataflow/CustomSourcesTest.java   | 6 +++---
 .../sdk/runners/worker/StreamingDataflowWorkerTest.java    | 6 +++---
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
index 18eb340d08850..933f62fa82ec5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
@@ -24,7 +24,6 @@
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.values.KV;
 
 import org.joda.time.Instant;
@@ -127,7 +126,6 @@ public boolean requiresDeduping() {
 
   private class CountingSourceReader extends UnboundedReader<KV<Integer, Integer>> {
     private int current;
-    private boolean done = false;
 
     public CountingSourceReader(int startingPoint) {
       this.current = startingPoint;
@@ -148,7 +146,6 @@ public boolean advance() {
         current++;
         return true;
       } else {
-        done = true;
         return false;
       }
     }
@@ -182,7 +179,9 @@ public CountingSource getCurrentSource() {
 
     @Override
     public Instant getWatermark() {
-      return done ? BoundedWindow.TIMESTAMP_MAX_VALUE : new Instant(current - 1);
+      // The watermark is a promise about future elements, and the timestamps of elements are
+      // strictly increasing for this source.
+      return new Instant(current + 1);
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
index 6687bdd53a827..cb7a5c31401a9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
@@ -668,10 +668,10 @@ public void testReadUnboundedReader() throws Exception {
       // Extract and verify state modifications.
       context.flushState();
       state = context.getOutputBuilder().getSourceStateUpdates().getState();
-      // CountingSource's watermark is the last record - 1.  i is now one past the last record,
-      // so the expected watermark is i-2 millis.
+      // CountingSource's watermark is the last record + 1.  i is now one past the last record,
+      // so the expected watermark is i millis.
       assertEquals(
-          TimeUnit.MILLISECONDS.toMicros(i - 2), context.getOutputBuilder().getSourceWatermark());
+          TimeUnit.MILLISECONDS.toMicros(i), context.getOutputBuilder().getSourceWatermark());
       assertEquals(
           1,
           context
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 7fcc5bc103c95..5394d8fb0e5eb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -1034,7 +1034,7 @@ public void testUnboundedSources() throws Exception {
                 "  state: \"\000\"" +
                 "  finalize_ids: " + finalizeId +
                 "} " +
-                "source_watermark: 9223372036854775000")).build()));
+                "source_watermark: 1000")).build()));
 
     assertEquals(
         16L,
@@ -1069,7 +1069,7 @@ public void testUnboundedSources() throws Exception {
             "  state: \"\000\"" +
             "  finalize_ids: " + finalizeId +
             "} " +
-            "source_watermark: 9223372036854775000").build()));
+            "source_watermark: 1000").build()));
 
     assertThat(finalizeTracker, contains(0));
 
@@ -1120,7 +1120,7 @@ public void testUnboundedSources() throws Exception {
                 "  state: \"\005\"" +
                 "  finalize_ids: " + finalizeId +
                 "} " +
-                "source_watermark: 9223372036854775000")).build()));
+                "source_watermark: 6000")).build()));
 
     assertEquals(
         16L,

From 395e6dd5c418634a10aa5fd9e5e0a766da76af65 Mon Sep 17 00:00:00 2001
From: chernyak <chernyak@google.com>
Date: Wed, 11 Nov 2015 17:57:50 -0800
Subject: [PATCH 1146/1541] Output watermark should be specified per key rather
 then per computation:

The watermark may advance independently for each computation range, thus it is incorrect to report just one value for the computation. Furthermore, it is not correct to report the min (or the max), since both approaches can lead to mis-identified late data.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107642346
---
 sdk/src/main/proto/windmill.proto | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index f1a3f3125879f..cc24ab322d38d 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -143,13 +143,13 @@ message WorkItem {
   optional TimerBundle timers = 4;
   repeated GlobalDataId global_data_id_notifications = 5;
   optional SourceState source_state = 6;
+  optional int64 output_data_watermark = 8 [default=-0x8000000000000000];
 }
 
 message ComputationWorkItems {
   required string computation_id = 1;
   repeated WorkItem work = 2;
   optional int64 input_data_watermark = 3 [default=-0x8000000000000000];
-  optional int64 output_data_watermark = 4 [default=-0x8000000000000000];
 }
 
 ////////////////////////////////////////////////////////////////////////////////

From 91f034569e1b64c9948842e3e765de386d9148f9 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Thu, 12 Nov 2015 08:59:13 -0800
Subject: [PATCH 1147/1541] Improve UncaughtExceptionHandler

Handle exceptions thrown inside UncaughtExceptionHandler by ignoring them and attempting to print the original exception.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107688317
---
 .../runners/worker/DataflowWorkerHarness.java | 26 +++++++++++++++----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index 89c766cdbda91..5f8d5db70bc44 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -49,6 +49,7 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.io.PrintStream;
 import java.lang.Thread.UncaughtExceptionHandler;
 import java.util.Collections;
 import java.util.LinkedList;
@@ -86,12 +87,27 @@ public class DataflowWorkerHarness {
   static class WorkerUncaughtExceptionHandler implements UncaughtExceptionHandler {
     static final WorkerUncaughtExceptionHandler INSTANCE = new WorkerUncaughtExceptionHandler();
 
+    // Cache the original System.err, because the logging infrastructure modifies it and redirects
+    // it to a logger.
+    private static final PrintStream originalStandardError = System.err;
+
     @Override
-    public void uncaughtException(Thread t, Throwable e) {
-      LOG.error("Uncaught exception in main thread. Exiting with status code 1.", e);
-      System.err.println("Uncaught exception in main thread. Exiting with status code 1.");
-      e.printStackTrace();
-      System.exit(1);
+    public void uncaughtException(Thread thread, Throwable e) {
+      try {
+        LOG.error("Uncaught exception in main thread. Exiting with status code 1.", e);
+        System.err.println("Uncaught exception in main thread. Exiting with status code 1.");
+        e.printStackTrace();
+      } catch (Throwable t) {
+        originalStandardError.println(
+            "Uncaught exception in main thread. Exiting with status code 1.");
+        e.printStackTrace(originalStandardError);
+
+        originalStandardError.println(
+            "UncaughtExceptionHandler caused another exception to be thrown, as follows:");
+        t.printStackTrace(originalStandardError);
+      } finally {
+        System.exit(1);
+      }
     }
   }
 

From 9f8f66de9cae276d51df373a4d54ef89a6f621dd Mon Sep 17 00:00:00 2001
From: wan <wan@google.com>
Date: Thu, 12 Nov 2015 10:12:34 -0800
Subject: [PATCH 1148/1541] Integration test for shuffle sanity check for
 Dataflow

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107695154
---
 .../worker/GroupingShuffleReaderFactory.java  | 21 ++++++
 ...uffleReaderWithFaultyBytesReadCounter.java | 67 +++++++++++++++++++
 2 files changed, 88 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderWithFaultyBytesReadCounter.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
index 2bb767166f598..c8b154989566c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
@@ -20,6 +20,7 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
@@ -30,6 +31,8 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.values.KV;
 
+import java.util.List;
+
 import javax.annotation.Nullable;
 
 /**
@@ -61,6 +64,14 @@ public <K, V> GroupingShuffleReader<K, V> createTyped(
       @Nullable CounterSet.AddCounterMutator addCounterMutator,
       @Nullable String operationName)
           throws Exception {
+    if (shouldUseGroupingShuffleReaderWithFaultyBytesReadCounter(options)) {
+      return new GroupingShuffleReaderWithFaultyBytesReadCounter<K, V>(options,
+          decodeBase64(getString(spec, PropertyNames.SHUFFLE_READER_CONFIG)),
+          getString(spec, PropertyNames.START_SHUFFLE_POSITION, null),
+          getString(spec, PropertyNames.END_SHUFFLE_POSITION, null), coder,
+          (BatchModeExecutionContext) executionContext, addCounterMutator, operationName);
+    }
+
     return new GroupingShuffleReader<K, V>(options,
         decodeBase64(getString(spec, PropertyNames.SHUFFLE_READER_CONFIG)),
         getString(spec, PropertyNames.START_SHUFFLE_POSITION, null),
@@ -69,4 +80,14 @@ public <K, V> GroupingShuffleReader<K, V> createTyped(
         (BatchModeExecutionContext) executionContext,
         addCounterMutator, operationName);
   }
+
+  /**
+   * Returns true if we should inject errors in the shuffle read bytes
+   * counter for testing.
+   */
+  private static boolean shouldUseGroupingShuffleReaderWithFaultyBytesReadCounter(
+      PipelineOptions options) {
+    List<String> experiments = options.as(DataflowPipelineDebugOptions.class).getExperiments();
+    return (experiments != null) && experiments.contains("inject_shuffle_read_count_error");
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderWithFaultyBytesReadCounter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderWithFaultyBytesReadCounter.java
new file mode 100644
index 0000000000000..8fe2e8146667f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderWithFaultyBytesReadCounter.java
@@ -0,0 +1,67 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.Reiterable;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.annotations.VisibleForTesting;
+
+import java.io.IOException;
+
+import javax.annotation.Nullable;
+
+/**
+ * A source that reads from a shuffled dataset and yields key-grouped
+ * data.  Like {@link GroupingShuffleReader}, except that it injects
+ * an error in the counter tracking how many bytes are read.  This
+ * class is solely used to test the shuffle sanity check mechanism.
+ *
+ * @param <K> the type of the keys read from the shuffle
+ * @param <V> the type of the values read from the shuffle
+ */
+@VisibleForTesting
+class GroupingShuffleReaderWithFaultyBytesReadCounter<K, V> extends GroupingShuffleReader<K, V> {
+  public GroupingShuffleReaderWithFaultyBytesReadCounter(
+      PipelineOptions options,
+      byte[] shuffleReaderConfig,
+      @Nullable String startShufflePosition,
+      @Nullable String stopShufflePosition,
+      Coder<WindowedValue<KV<K, Iterable<V>>>> coder,
+      BatchModeExecutionContext executionContext,
+      CounterSet.AddCounterMutator addCounterMutator,
+      String operationName)
+      throws Exception {
+    super(options, shuffleReaderConfig, startShufflePosition, stopShufflePosition, coder,
+        executionContext, addCounterMutator, operationName);
+  }
+
+  @Override
+  public ReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> iterator() throws IOException {
+    // This causes perOperationPerDatasetBytesCounter to be initialized.
+    ReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> it = super.iterator();
+
+    // Inject an error in the counter tracking how many bytes are read
+    // from this reader's data source.
+    perOperationPerDatasetBytesCounter.addValue(1L);
+    return it;
+  }
+}

From dd3c0a59dc5dbebf57d0b56d71c4acc80af95ebd Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 12 Nov 2015 20:40:20 -0800
Subject: [PATCH 1149/1541] Improve Dataflow javadocs

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107747956
---
 .../dataflow/sdk/options/DefaultValueFactory.java   | 10 +++++-----
 .../dataflow/sdk/options/DirectPipelineOptions.java | 13 ++++++-------
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java
index fecc65088612a..1faedb70d6947 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java
@@ -17,10 +17,10 @@
 package com.google.cloud.dataflow.sdk.options;
 
 /**
- * An interface used with {@link Default.InstanceFactory} annotation to specify the class that will
- * be an instance factory to produce default values for a given getter on {@link PipelineOptions}.
- * When a property on a {@link PipelineOptions} is fetched, and is currently unset, the default
- * value factory will be instantiated and invoked.
+ * An interface used with the {@link Default.InstanceFactory} annotation to specify the class that
+ * will be an instance factory to produce default values for a given getter on
+ * {@link PipelineOptions}. When a property on a {@link PipelineOptions} is fetched, and is
+ * currently unset, the default value factory will be instantiated and invoked.
  *
  * <p>Care must be taken to not produce an infinite loop when accessing other fields on the
  * {@link PipelineOptions} object.
@@ -32,7 +32,7 @@ public interface DefaultValueFactory<T> {
    * Creates a default value for a getter marked with {@link Default.InstanceFactory}.
    *
    * @param options The current pipeline options.
-   * @return The default value to be used for the given pipeline options.
+   * @return The default value to be used for the annotated getter.
    */
   T create(PipelineOptions options);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java
index 9f25c88a45d60..0867740fabd00 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java
@@ -42,11 +42,11 @@ public interface DirectPipelineOptions extends
 
   /**
    * Controls whether the runner should ensure that all of the elements of
-   * the pipeline can be serialized. Enabled by default.
+   * the pipeline, such as DoFns, can be serialized.
    */
   @JsonIgnore
   @Description("Controls whether the runner should ensure that all of the elements of the "
-      + "pipeline, such as DoFns, can be serialized. Enabled by default.")
+      + "pipeline, such as DoFns, can be serialized.")
   @Default.Boolean(true)
   boolean isTestSerializability();
   void setTestSerializability(boolean testSerializability);
@@ -54,22 +54,21 @@ public interface DirectPipelineOptions extends
   /**
    * Controls whether the runner should ensure that all of the elements of
    * every {@link PCollection} can be encoded using the appropriate
-   * {@link Coder}. Enabled by default.
+   * {@link Coder}.
    */
   @JsonIgnore
   @Description("Controls whether the runner should ensure that all of the elements of every "
-      + "PCollection can be encoded using the appropriate Coder. Enabled by default.")
+      + "PCollection can be encoded using the appropriate Coder.")
   @Default.Boolean(true)
   boolean isTestEncodability();
   void setTestEncodability(boolean testEncodability);
 
   /**
    * Controls whether the runner should randomize the order of each
-   * {@link PCollection}. Enabled by default.
+   * {@link PCollection}.
    */
   @JsonIgnore
-  @Description("Controls whether the runner randomizes the order of PCollections. "
-      + "Enabled by default.")
+  @Description("Controls whether the runner should randomize the order of each PCollection.")
   @Default.Boolean(true)
   boolean isTestUnorderedness();
   void setTestUnorderedness(boolean testUnorderedness);

From b038d9f1be67cfffa750aa6244af1ceb7933a754 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 13 Nov 2015 09:06:52 -0800
Subject: [PATCH 1150/1541] Improve Dataflow javadocs

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107785434
---
 .../options/DataflowPipelineDebugOptions.java | 26 ++++++++++---------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
index cf65e8d654173..e94b56df87141 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
@@ -72,11 +72,11 @@ public interface DataflowPipelineDebugOptions extends PipelineOptions {
    * <p>Defaults to the current version of the Google Cloud Dataflow
    * API, at the time the current SDK version was released.
    *
-   * <p>If the string contains "://", then this is treated as a url,
+   * <p>If the string contains "://", then this is treated as a URL,
    * otherwise {@link #getApiRootUrl()} is used as the root
-   * url.
+   * URL.
    */
-  @Description("The URL for the Dataflow API. If the string contains \"://\""
+  @Description("The URL for the Dataflow API. If the string contains \"://\", this"
       + " will be treated as the entire URL, otherwise will be treated relative to apiRootUrl.")
   @Default.String(Dataflow.DEFAULT_SERVICE_PATH)
   String getDataflowEndpoint();
@@ -106,12 +106,12 @@ public interface DataflowPipelineDebugOptions extends PipelineOptions {
   void setPathValidatorClass(Class<? extends PathValidator> validatorClass);
 
   /**
-   * The path validator instance that should be created and used to validate paths.
+   * The path validator instance that should be used to validate paths.
    * If no path validator has been set explicitly, the default is to use the instance factory that
    * constructs a path validator based upon the currently set pathValidatorClass.
    */
   @JsonIgnore
-  @Description("The path validator instance that should be created and used to validate paths. "
+  @Description("The path validator instance that should be used to validate paths. "
       + "If no path validator has been set explicitly, the default is to use the instance factory "
       + "that constructs a path validator based upon the currently set pathValidatorClass.")
   @Default.InstanceFactory(PathValidatorFactory.class)
@@ -120,22 +120,23 @@ public interface DataflowPipelineDebugOptions extends PipelineOptions {
 
   /**
    * The class responsible for staging resources to be accessible by workers
-   * during job execution.
+   * during job execution. If stager has not been set explicitly, an instance of this class
+   * will be created and used as the resource stager.
    */
   @Description("The class of the stager that should be created and used to stage resources. "
-      + "If stager has not been set explicitly, an instance of this class will be "
-      + "constructed and used as the resource stager.")
+      + "If stager has not been set explicitly, an instance of the this class will be created "
+      + "and used as the resource stager.")
   @Default.Class(GcsStager.class)
   Class<? extends Stager> getStagerClass();
   void setStagerClass(Class<? extends Stager> stagerClass);
 
   /**
-   * The resource stager instance that should be created and used to stage resources.
+   * The resource stager instance that should be used to stage resources.
    * If no stager has been set explicitly, the default is to use the instance factory
    * that constructs a resource stager based upon the currently set stagerClass.
    */
   @JsonIgnore
-  @Description("The resource stager instance that should be created and used to stage resources. "
+  @Description("The resource stager instance that should be used to stage resources. "
       + "If no stager has been set explicitly, the default is to use the instance factory "
       + "that constructs a resource stager based upon the currently set stagerClass.")
   @Default.InstanceFactory(StagerFactory.class)
@@ -179,8 +180,9 @@ public Dataflow create(PipelineOptions options) {
   void setUpdate(boolean value);
 
   /**
-   * Mapping of old PTranform names to new ones, specified as a semicolon-separated list of
-   * oldName=newName pairs. To mark a transform as deleted, omit newName.
+   * Mapping of old PTranform names to new ones, specified as JSON
+   * <code>{"oldName":"newName",...}</code>. To mark a transform as deleted, make newName the
+   * empty string.
    */
   @JsonIgnore
   @Description(

From 7e71c919d6606bb3f9a6a60f536b7f8413e9c236 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 13 Nov 2015 09:08:29 -0800
Subject: [PATCH 1151/1541] Improve Dataflow javadocs

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107785588
---
 .../cloud/dataflow/sdk/options/Default.java       | 15 ++++++++++++++-
 .../cloud/dataflow/sdk/options/Description.java   |  4 ++--
 .../google/cloud/dataflow/sdk/options/Hidden.java |  2 ++
 .../cloud/dataflow/sdk/options/Validation.java    |  3 ++-
 4 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java
index 07f03b9381697..46ff682f5b9f1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
+import java.lang.annotation.Documented;
 import java.lang.annotation.ElementType;
 import java.lang.annotation.Retention;
 import java.lang.annotation.RetentionPolicy;
@@ -32,6 +33,7 @@
    */
   @Target(ElementType.METHOD)
   @Retention(RetentionPolicy.RUNTIME)
+  @Documented
   public @interface Class {
     java.lang.Class<?> value();
   }
@@ -42,6 +44,7 @@
    */
   @Target(ElementType.METHOD)
   @Retention(RetentionPolicy.RUNTIME)
+  @Documented
   public @interface String {
     java.lang.String value();
   }
@@ -51,6 +54,7 @@
    */
   @Target(ElementType.METHOD)
   @Retention(RetentionPolicy.RUNTIME)
+  @Documented
   public @interface Boolean {
     boolean value();
   }
@@ -60,6 +64,7 @@
    */
   @Target(ElementType.METHOD)
   @Retention(RetentionPolicy.RUNTIME)
+  @Documented
   public @interface Character {
     char value();
   }
@@ -69,6 +74,7 @@
    */
   @Target(ElementType.METHOD)
   @Retention(RetentionPolicy.RUNTIME)
+  @Documented
   public @interface Byte {
     byte value();
   }
@@ -77,6 +83,7 @@
    */
   @Target(ElementType.METHOD)
   @Retention(RetentionPolicy.RUNTIME)
+  @Documented
   public @interface Short {
     short value();
   }
@@ -85,6 +92,7 @@
    */
   @Target(ElementType.METHOD)
   @Retention(RetentionPolicy.RUNTIME)
+  @Documented
   public @interface Integer {
     int value();
   }
@@ -94,6 +102,7 @@
    */
   @Target(ElementType.METHOD)
   @Retention(RetentionPolicy.RUNTIME)
+  @Documented
   public @interface Long {
     long value();
   }
@@ -103,6 +112,7 @@
    */
   @Target(ElementType.METHOD)
   @Retention(RetentionPolicy.RUNTIME)
+  @Documented
   public @interface Float {
     float value();
   }
@@ -112,6 +122,7 @@
    */
   @Target(ElementType.METHOD)
   @Retention(RetentionPolicy.RUNTIME)
+  @Documented
   public @interface Double {
     double value();
   }
@@ -122,18 +133,20 @@
    */
   @Target(ElementType.METHOD)
   @Retention(RetentionPolicy.RUNTIME)
+  @Documented
   public @interface Enum {
     java.lang.String value();
   }
 
   /**
    * Value must be of type {@link DefaultValueFactory} and have a default constructor.
-   * Value is instantiated and then used as a type factory to generate the default.
+   * Value is instantiated and then used as a factory to generate the default.
    *
    * <p>See {@link DefaultValueFactory} for more details.
    */
   @Target(ElementType.METHOD)
   @Retention(RetentionPolicy.RUNTIME)
+  @Documented
   public @interface InstanceFactory {
     java.lang.Class<? extends DefaultValueFactory<?>> value();
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java
index d8c473a518f14..9ceaf586f595c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java
@@ -22,9 +22,9 @@
 import java.lang.annotation.Target;
 
 /**
- * Descriptions are used to generate human readable output when the --help
+ * Descriptions are used to generate human readable output when the {@code --help}
  * command is specified. Description annotations placed on interfaces that extend
- * {@link PipelineOptions} to describe groups of related options. Description annotations
+ * {@link PipelineOptions} will describe groups of related options. Description annotations
  * placed on getter methods will be used to provide human readable information
  * for the specific option.
  */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Hidden.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Hidden.java
index 8cfa6b7ba2f0f..6a487eb2f5484 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Hidden.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Hidden.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
+import java.lang.annotation.Documented;
 import java.lang.annotation.ElementType;
 import java.lang.annotation.Retention;
 import java.lang.annotation.RetentionPolicy;
@@ -27,5 +28,6 @@
  */
 @Target({ElementType.METHOD, ElementType.TYPE})
 @Retention(RetentionPolicy.RUNTIME)
+@Documented
 public @interface Hidden {
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java
index 1bdea4d1fdeda..20034f83e2d78 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
+import java.lang.annotation.Documented;
 import java.lang.annotation.ElementType;
 import java.lang.annotation.Retention;
 import java.lang.annotation.RetentionPolicy;
@@ -26,7 +27,6 @@
  * properties on {@link PipelineOptions} with information representing the validation criteria to
  * be used when validating with the {@link PipelineOptionsValidator}.
  */
-
 public @interface Validation {
   /**
    * This criteria specifies that the value must be not null. Note that this annotation
@@ -34,6 +34,7 @@
    */
   @Target(value = ElementType.METHOD)
   @Retention(RetentionPolicy.RUNTIME)
+  @Documented
   public @interface Required {
     /**
      * The groups that the annotated attribute is a member of. A member can be in 0 or more groups.

From 76e6cbd01bcbb45aea27df97665834c51e91955b Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 13 Nov 2015 09:20:59 -0800
Subject: [PATCH 1152/1541] Improve Dataflow javadocs

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107786537
---
 .../sdk/options/DataflowPipelineOptions.java  |  8 ++---
 .../DataflowPipelineWorkerPoolOptions.java    | 32 +++++++++++++++----
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
index e0b0c1767ec5d..3e9a2424f0c57 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
@@ -47,14 +47,14 @@ public interface DataflowPipelineOptions extends
   /**
    * GCS path for temporary files, e.g. gs://bucket/object
    *
-   * <p>Must be a valid Cloud Storage url, beginning with the prefix "gs://"
+   * <p>Must be a valid Cloud Storage URL, beginning with the prefix "gs://"
    *
    * <p>At least one of {@link #getTempLocation()} or {@link #getStagingLocation()} must be set. If
    * {@link #getTempLocation()} is not set, then the Dataflow pipeline defaults to using
    * {@link #getStagingLocation()}.
    */
   @Description("GCS path for temporary files, eg \"gs://bucket/object\". "
-      + "Must be a valid Cloud Storage url, beginning with the prefix \"gs://\". "
+      + "Must be a valid Cloud Storage URL, beginning with the prefix \"gs://\". "
       + "At least one of tempLocation or stagingLocation must be set. If tempLocation is unset, "
       + "defaults to using stagingLocation.")
   @Validation.Required(groups = {DATAFLOW_STORAGE_LOCATION})
@@ -64,14 +64,14 @@ public interface DataflowPipelineOptions extends
   /**
    * GCS path for staging local files, e.g. gs://bucket/object
    *
-   * <p>Must be a valid Cloud Storage url, beginning with the prefix "gs://"
+   * <p>Must be a valid Cloud Storage URL, beginning with the prefix "gs://"
    *
    * <p>At least one of {@link #getTempLocation()} or {@link #getStagingLocation()} must be set. If
    * {@link #getTempLocation()} is not set, then the Dataflow pipeline defaults to using
    * {@link #getStagingLocation()}.
    */
   @Description("GCS path for staging local files, e.g. \"gs://bucket/object\". "
-      + "Must be a valid Cloud Storage url, beginning with the prefix \"gs://\". "
+      + "Must be a valid Cloud Storage URL, beginning with the prefix \"gs://\". "
       + "At least one of stagingLocation or tempLocation must be set. If stagingLocation is unset, "
       + "defaults to using tempLocation.")
   @Validation.Required(groups = {DATAFLOW_STORAGE_LOCATION})
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index 33f8357ce38a0..1950352519ab0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -28,7 +28,9 @@
 @Description("Options that are used to configure the Dataflow pipeline worker pool.")
 public interface DataflowPipelineWorkerPoolOptions extends PipelineOptions {
   /**
-   * Number of workers to use when executing the Dataflow job.
+   * Number of workers to use when executing the Dataflow job. Note that selection of an autoscaling
+   * algorithm other then {@code NONE} will affect the size of the worker pool. If left unspecified,
+   * the Dataflow service will determine the number of workers.
    */
   @Description("Number of workers to use when executing the Dataflow job. Note that "
       + "selection of an autoscaling algorithm other then \"NONE\" will affect the "
@@ -63,15 +65,28 @@ public String getAlgorithm() {
     }
   }
 
+  /**
+   * [Experimental] The autoscaling algorithm to use for the workerpool.
+   *
+   * <ul>
+   *   <li>NONE: does not change the size of the worker pool.</li>
+   *   <li>BASIC: autoscale the worker pool size up to maxNumWorkers until the job completes.</li>
+   *   <li>THROUGHPUT_BASED: autoscale the workerpool based on throughput (up to maxNumWorkers).
+   *   </li>
+   * </ul>
+   */
   @Description("[Experimental] The autoscaling algorithm to use for the workerpool. "
       + "NONE: does not change the size of the worker pool. "
-      + "BASIC: autoscale the worker pool size up to maxNumWorkers until the job completes.")
+      + "BASIC (deprecated): autoscale the worker pool size up to maxNumWorkers until the job "
+      + "completes. "
+      + "THROUGHPUT_BASED: autoscale the workerpool based on throughput (up to maxNumWorkers).")
   @Experimental(Experimental.Kind.AUTOSCALING)
   AutoscalingAlgorithmType getAutoscalingAlgorithm();
   void setAutoscalingAlgorithm(AutoscalingAlgorithmType value);
 
   /**
    * The maximum number of workers to use when using workerpool autoscaling.
+   * If left unspecified, the Dataflow service will compute a ceiling.
    */
   @Description("[Experimental] The maximum number of workers to use when using workerpool "
       + "autoscaling. If left unspecified, the Dataflow service will compute a ceiling.")
@@ -92,7 +107,9 @@ public String getAlgorithm() {
    *
    * <p>Default is up to the Dataflow service.
    */
-  @Description("GCE network for launching workers. Default is up to the Dataflow service.")
+  @Description("GCE network for launching workers. For more information, see the reference "
+      + "documentation https://cloud.google.com/compute/docs/networking. "
+      + "Default is up to the Dataflow service.")
   String getNetwork();
   void setNetwork(String value);
 
@@ -102,7 +119,8 @@ public String getAlgorithm() {
    *
    * <p>Default is up to the Dataflow service.
    */
-  @Description("GCE availability zone for launching workers. "
+  @Description("GCE availability zone for launching workers. See "
+      + "https://developers.google.com/compute/docs/zones for a list of valid options. "
       + "Default is up to the Dataflow service.")
   String getZone();
   void setZone(String value);
@@ -142,10 +160,10 @@ public String getTeardownPolicyName() {
   /**
    * The teardown policy for the VMs.
    *
-   * <p>By default this is left unset and the service sets the default policy.
+   * <p>If unset, the Dataflow service will choose a reasonable default.
    */
-  @Description("The teardown policy for the VMs. By default this is left unset "
-      + "and the service sets the default policy.")
+  @Description("The teardown policy for the VMs. If unset, the Dataflow service will "
+      + "choose a reasonable default.")
   TeardownPolicy getTeardownPolicy();
   void setTeardownPolicy(TeardownPolicy value);
 

From b43d446561e815102a18d668ee1ec0019c188956 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 13 Nov 2015 09:25:43 -0800
Subject: [PATCH 1153/1541] Improve Dataflow javadocs

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107786884
---
 .../sdk/options/PipelineOptionsFactory.java    | 18 ++++++++++--------
 .../sdk/options/PipelineOptionsRegistrar.java  |  8 ++++----
 .../dataflow/sdk/options/StreamingOptions.java |  3 +++
 3 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index 563762fa662de..c46c59e4177d2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -130,8 +130,8 @@ public static <T extends PipelineOptions> T as(Class<T> klass) {
    *   --project=MyProject (simple property, will set the "project" property to "MyProject")
    *   --readOnly=true (for boolean properties, will set the "readOnly" property to "true")
    *   --readOnly (shorthand for boolean properties, will set the "readOnly" property to "true")
-   *   --x=1 --x=2 --x=3 (list style property, will set the "x" property to [1, 2, 3])
-   *   --x=1,2,3 (shorthand list style property, will set the "x" property to [1, 2, 3])
+   *   --x=1 --x=2 --x=3 (list style simple property, will set the "x" property to [1, 2, 3])
+   *   --x=1,2,3 (shorthand list style simple property, will set the "x" property to [1, 2, 3])
    *   --complexObject='{"key1":"value1",...} (JSON format for all other complex types)
    * </pre>
    *
@@ -164,8 +164,8 @@ public static Builder fromArgs(String[] args) {
   }
 
   /**
-   * After creation we will validate that {@link PipelineOptions} conforms to all the
-   * validation criteria from {@code <T>}. See
+   * After creation we will validate that {@code <T>} conforms to all the
+   * validation criteria. See
    * {@link PipelineOptionsValidator#validate(Class, PipelineOptions)} for more details about
    * validation.
    */
@@ -201,8 +201,8 @@ private Builder(String[] args, boolean validation,
      *   --project=MyProject (simple property, will set the "project" property to "MyProject")
      *   --readOnly=true (for boolean properties, will set the "readOnly" property to "true")
      *   --readOnly (shorthand for boolean properties, will set the "readOnly" property to "true")
-     *   --x=1 --x=2 --x=3 (list style property, will set the "x" property to [1, 2, 3])
-     *   --x=1,2,3 (shorthand list style property, will set the "x" property to [1, 2, 3])
+     *   --x=1 --x=2 --x=3 (list style simple property, will set the "x" property to [1, 2, 3])
+     *   --x=1,2,3 (shorthand list style simple property, will set the "x" property to [1, 2, 3])
      *   --complexObject='{"key1":"value1",...} (JSON format for all other complex types)
      * </pre>
      *
@@ -468,7 +468,7 @@ Class<T> getProxyClass() {
   /**
    * Finds the appropriate {@code ClassLoader} to be used by the
    * {@link ServiceLoader#load} call, which by default would use the context
-   * {@code ClassLoader}, which can be null. The fallback is as follow: context
+   * {@code ClassLoader}, which can be null. The fallback is as follows: context
    * ClassLoader, class ClassLoader and finaly the system ClassLoader.
    */
   static ClassLoader findClassLoader() {
@@ -649,9 +649,11 @@ public static void printHelp(PrintStream out) {
    *  --option1={@code <type>} or list of valid enum choices
    *     Default: value (if available, see {@link Default})
    *     ... option description ... (if available, see {@link Description})
+   *     Required groups (if available, see {@link Required})
    *  --option2={@code <type>} or list of valid enum choices
    *     Default: value (if available, see {@link Default})
    *     ... option description ... (if available, see {@link Description})
+   *     Required groups (if available, see {@link Required})
    * </pre>
    * This method will attempt to format its output to be compatible with a terminal window.
    */
@@ -1231,7 +1233,7 @@ private static ListMultimap<String, String> parseCommandLine(
    * split up each string on ','.
    *
    * <p>We special case the "runner" option. It is mapped to the class of the {@link PipelineRunner}
-   * based off of the {@link PipelineRunner}s simple class name.
+   * based off of the {@link PipelineRunner}s simple class name or fully qualified class name.
    *
    * <p>If strict parsing is enabled, unknown options or options that cannot be converted to
    * the expected java type using an {@link ObjectMapper} will be ignored.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java
index 05374ee005636..1678541bef0cb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
+import com.google.auto.service.AutoService;
+
 import java.util.ServiceLoader;
 
 /**
@@ -23,13 +25,11 @@
  * {@link PipelineOptions} registered with this SDK by creating a {@link ServiceLoader} entry
  * and a concrete implementation of this interface.
  *
- * <p>Note that automatic registration of any
- * {@link com.google.cloud.dataflow.sdk.options.PipelineOptions} requires users
+ * <p>Note that automatic registration of any {@link PipelineOptions} requires users
  * conform to the limitations discussed on {@link PipelineOptionsFactory#register(Class)}.
  *
  * <p>It is optional but recommended to use one of the many build time tools such as
- * {@link com.google.auto.service.AutoService} to generate the necessary META-INF
- * files automatically.
+ * {@link AutoService} to generate the necessary META-INF files automatically.
  */
 public interface PipelineOptionsRegistrar {
   Iterable<Class<? extends PipelineOptions>> getPipelineOptions();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java
index 69e72099455ec..9563c589046cd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java
@@ -21,6 +21,9 @@
  */
 public interface StreamingOptions extends
     ApplicationNameOptions, GcpOptions, PipelineOptions {
+  /**
+   * Set to true if running a streaming pipeline.
+   */
   @Description("Set to true if running a streaming pipeline.")
   boolean isStreaming();
   void setStreaming(boolean value);

From dad6ff412f5aa3b6775076c3134d5b1a9c63e185 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 13 Nov 2015 09:28:25 -0800
Subject: [PATCH 1154/1541] Improve Dataflow javadocs

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107787062
---
 .../sdk/options/GoogleApiDebugOptions.java    | 10 +++++-
 .../dataflow/sdk/options/PipelineOptions.java | 31 ++++++++++++-------
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
index 5b82dc3f7ca7d..eff679b405de1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
@@ -29,7 +29,15 @@
  */
 public interface GoogleApiDebugOptions extends PipelineOptions {
   /**
-   * This option enables tracing of API calls to Google services used within the Dataflow SDK.
+   * This option enables tracing of API calls to Google services used within the
+   * Dataflow SDK. Values are expected in JSON format <code>{"ApiName":"TraceDestination",...}
+   * </code> where the {@code ApiName} represents the request classes canonical name. The
+   * {@code TraceDestination} is a logical trace consumer to whom the trace will be reported.
+   * Typically, "producer" is the right destination to use: this makes API traces available to the
+   * team offering the API. Note that by enabling this option, the contents of the requests to and
+   * from Google Cloud services will be made available to Google. For example, by specifying
+   * <code>{"Dataflow":"producer"}</code>, all calls to the Dataflow service will be made available
+   * to Google, specifically to the Google Cloud Dataflow team.
    */
   @Description("This option enables tracing of API calls to Google services used within the "
       + "Dataflow SDK. Values are expected in JSON format {\"ApiName\":\"TraceDestination\",...} "
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
index 075a9c6c48b8f..923033d5dadbf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.options;
 
+import com.google.auto.service.AutoService;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.options.GoogleApiDebugOptions.GoogleApiTracer;
 import com.google.cloud.dataflow.sdk.options.ProxyInvocationHandler.Deserializer;
@@ -38,7 +39,7 @@
 /**
  * PipelineOptions are used to configure Pipelines. You can extend {@link PipelineOptions}
  * to create custom configuration options specific to your {@link Pipeline},
- * for both local execution and execution via {@link PipelineRunner}.
+ * for both local execution and execution via a {@link PipelineRunner}.
  *
  * <p>{@link PipelineOptions} and their subinterfaces represent a collection of properties
  * which can be manipulated in a type safe manner. {@link PipelineOptions} is backed by a
@@ -53,7 +54,7 @@
  * {@link Context#getPipelineOptions()}.
  *
  * <p>For example:
- * <pre> {@code
+ * <pre>{@code
  * // The most common way to construct PipelineOptions is via command-line argument parsing:
  * public static void main(String[] args) {
  *   // Will parse the arguments passed into the application and construct a PipelineOptions
@@ -85,12 +86,12 @@
  *
  * // Prints out options which are available to be set on DataflowPipelineOptions
  * PipelineOptionsFactory.printHelp(System.out, DataflowPipelineOptions.class);
- * } </pre>
+ * }</pre>
  *
  * <h2>Defining Your Own PipelineOptions</h2>
  *
  * Defining your own {@link PipelineOptions} is the way for you to make configuration
- * options available for both local execution and execution via {@link PipelineRunner}.
+ * options available for both local execution and execution via a {@link PipelineRunner}.
  * By having PipelineOptionsFactory as your command-line interpreter, you will provide
  * a standardized way for users to interact with your application via the command-line.
  *
@@ -101,13 +102,13 @@
  * JavaBean properties</a>.
  *
  * <p>For example:
- * <pre> {@code
+ * <pre>{@code
  *  // Creates a user defined property called "myProperty"
  *  public interface MyOptions extends PipelineOptions {
  *    String getMyProperty();
  *    void setMyProperty(String value);
  *  }
- * } </pre>
+ * }</pre>
  *
  * <p>Note: Please see the section on Registration below when using custom property types.
  *
@@ -122,7 +123,7 @@
  *       getter and setter method.
  *   <li>Every method must conform to being a getter or setter for a JavaBean.
  *   <li>The derived interface of {@link PipelineOptions} must be composable with every interface
- *       part of allPipelineOptionsClasses.
+ *       part registered with the PipelineOptionsFactory.
  *   <li>Only getters may be annotated with {@link JsonIgnore @JsonIgnore}.
  *   <li>If any getter is annotated with {@link JsonIgnore @JsonIgnore}, then all getters for
  *       this property must be annotated with {@link JsonIgnore @JsonIgnore}.
@@ -164,8 +165,7 @@
  * and a concrete implementation of the {@link PipelineOptionsRegistrar} interface.
  *
  * <p>It is optional but recommended to use one of the many build time tools such as
- * {@link com.google.auto.service.AutoService} to generate the necessary META-INF
- * files automatically.
+ * {@link AutoService} to generate the necessary META-INF files automatically.
  *
  * <p>A list of registered options can be fetched from
  * {@link PipelineOptionsFactory#getRegisteredOptions()}.
@@ -213,9 +213,14 @@ public interface PipelineOptions {
    */
   <T extends PipelineOptions> T cloneAs(Class<T> kls);
 
+  /**
+   * The pipeline runner that will be used to execute the pipeline.
+   * For registered runners, the class name can be specified, otherwise the fully
+   * qualified name needs to be specified.
+   */
   @Validation.Required
   @Description("The pipeline runner that will be used to execute the pipeline. "
-      + "For registered runners, the class name can be specified, otherwise the fully"
+      + "For registered runners, the class name can be specified, otherwise the fully "
       + "qualified name needs to be specified.")
   @Default.Class(DirectPipelineRunner.class)
   Class<? extends PipelineRunner<?>> getRunner();
@@ -230,8 +235,12 @@ public static enum CheckEnabled {
     ERROR;
   }
 
+  /**
+   * Whether to check for stable unique names on each transform. This is necessary to
+   * support updating of pipelines.
+   */
   @Validation.Required
-  @Description("Whether to check for stable unique names on each stage. This is necessary to "
+  @Description("Whether to check for stable unique names on each transform. This is necessary to "
       + "support updating of pipelines.")
   @Default.Enum("WARNING")
   CheckEnabled getStableUniqueNames();

From ac0509f9ce7514d5ebf691a963528c12ebc4eb06 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 13 Nov 2015 09:48:07 -0800
Subject: [PATCH 1155/1541] Improve Dataflow javadocs

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107788537
---
 .../dataflow/sdk/options/GcpOptions.java      | 20 ++++++++-----------
 .../dataflow/sdk/options/GcsOptions.java      |  6 ++++++
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
index bd2928a74e0a8..ef36cda04ae09 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
@@ -40,7 +40,7 @@
 /**
  * Options used to configure Google Cloud Platform project and credentials.
  *
- * <p>These options configure which of the following 4 different mechanisms for obtaining a
+ * <p>These options configure which of the following three different mechanisms for obtaining a
  * credential are used:
  * <ol>
  *   <li>
@@ -49,11 +49,6 @@
  *     application default credentials</a>.
  *   </li>
  *   <li>
- *     It can run the gcloud tool in a subprocess to obtain a credential.
- *     This is the preferred mechanism.  The property "GCloudPath" can be
- *     used to specify where we search for gcloud data.
- *   </li>
- *   <li>
  *     The user can specify a client secrets file and go through the OAuth2
  *     webflow. The credential will then be cached in the user's home
  *     directory for reuse.
@@ -63,9 +58,10 @@
  *     with the service account name.
  *   </li>
  * </ol>
- * The default mechanism is to use the
+ *
+ * <p>The default mechanism is to use the
  * <a href="https://developers.google.com/accounts/docs/application-default-credentials">
- * application default credentials</a> falling back to gcloud. The other options can be
+ * application default credentials</a>. The other options can be
  * used by setting the corresponding properties.
  */
 @Description("Options used to configure Google Cloud Platform project and credentials.")
@@ -159,7 +155,7 @@ public String create(PipelineOptions options) {
    */
   @Description("The class of the credential factory that should be created and used to create "
       + "credentials. If gcpCredential has not been set explicitly, an instance of this class will "
-      + "be constructed and used as a credential factory. The default credential factory will")
+      + "be constructed and used as a credential factory.")
   @Default.Class(GcpCredentialFactory.class)
   Class<? extends CredentialFactory> getCredentialFactoryClass();
   void setCredentialFactoryClass(
@@ -180,8 +176,8 @@ void setCredentialFactoryClass(
   void setGcpCredential(Credential value);
 
   /**
-   * Attempts to get infer the default project based upon the environment this application
-   * is executing within. Currently this only supports getting the default project from gCloud.
+   * Attempts to infer the default project based upon the environment this application
+   * is executing within. Currently this only supports getting the default project from gcloud.
    */
   public static class DefaultProjectFactory implements DefaultValueFactory<String> {
     private static final Logger LOG = LoggerFactory.getLogger(DefaultProjectFactory.class);
@@ -213,7 +209,7 @@ public String create(PipelineOptions options) {
             matcher = projectPattern.matcher(line);
             if (matcher.matches()) {
               String project = matcher.group(1).trim();
-              LOG.info("Inferred default GCP project '{}' from gCloud. If this is the incorrect "
+              LOG.info("Inferred default GCP project '{}' from gcloud. If this is the incorrect "
                   + "project, please cancel this Pipeline and specify the command-line "
                   + "argument --project.", project);
               return project;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
index fefce2b058964..d2218075bd6e1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.util.AppEngineEnvironment;
 import com.google.cloud.dataflow.sdk.util.GcsUtil;
+import com.google.cloud.hadoop.util.AbstractGoogleAsyncWriteChannel;
 import com.google.common.util.concurrent.MoreExecutors;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
 
@@ -68,6 +69,11 @@ public interface GcsOptions extends
   String getGcsEndpoint();
   void setGcsEndpoint(String value);
 
+  /**
+   * The buffer size (in bytes) to use when uploading files to GCS. Please see the documentation for
+   * {@link AbstractGoogleAsyncWriteChannel#setUploadBufferSize} for more information on the
+   * restrictions and performance implications of this value.
+   */
   @Description("The buffer size (in bytes) to use when uploading files to GCS. Please see the "
       + "documentation for AbstractGoogleAsyncWriteChannel.setUploadBufferSize for more "
       + "information on the restrictions and performance implications of this value.\n\n"

From c07a2230458ae0fc88788c8545dcf73386d6deda Mon Sep 17 00:00:00 2001
From: gildea <gildea@google.com>
Date: Fri, 13 Nov 2015 10:14:39 -0800
Subject: [PATCH 1156/1541] Fix some grammar in Dataflow documentation

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107791071
---
 .../com/google/cloud/dataflow/sdk/util/common/CounterSet.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
index 65d1d43c1a2c4..62904a56620ae 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
@@ -131,7 +131,7 @@ public synchronized boolean add(Counter<?> e) {
 
   /**
    * A nested class that supports adding additional counters into the
-   * enclosing CounterSet. This is useful as a mutator; hiding other
+   * enclosing CounterSet. This is useful as a mutator, hiding other
    * public methods of the CounterSet.
    */
   public class AddCounterMutator {

From 3385cbb44ab54d46367a7bdbe3f03bd4ca6b7374 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 13 Nov 2015 11:43:04 -0800
Subject: [PATCH 1157/1541] Minor fixes to trigger javadoc

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107800030
---
 .../sdk/transforms/windowing/AfterAll.java     |  3 +++
 .../sdk/transforms/windowing/AfterEach.java    |  3 +++
 .../sdk/transforms/windowing/AfterFirst.java   |  3 +++
 .../windowing/AfterProcessingTime.java         |  3 +++
 .../transforms/windowing/AfterWatermark.java   | 18 ++++++++++++++++--
 .../sdk/transforms/windowing/Repeatedly.java   |  4 ++--
 6 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
index 4e33747e29860..ed4f7ff77eb2d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
@@ -40,6 +40,9 @@ private AfterAll(List<Trigger<W>> subTriggers) {
     Preconditions.checkArgument(subTriggers.size() > 1);
   }
 
+  /**
+   * Returns an {@code AfterAll} {@code Trigger} with the given subtriggers.
+   */
   @SafeVarargs
   public static <W extends BoundedWindow> OnceTrigger<W> of(
       OnceTrigger<W>... triggers) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index 308e78763f8f4..bf16365bd8303 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -53,6 +53,9 @@ private AfterEach(List<Trigger<W>> subTriggers) {
     Preconditions.checkArgument(subTriggers.size() > 1);
   }
 
+  /**
+   * Returns an {@code AfterEach} {@code Trigger} with the given subtriggers.
+   */
   @SafeVarargs
   public static <W extends BoundedWindow> Trigger<W> inOrder(Trigger<W>... triggers) {
     return new AfterEach<W>(Arrays.<Trigger<W>>asList(triggers));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
index 63ba0ba3376a9..534649b5875b2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
@@ -41,6 +41,9 @@ public class AfterFirst<W extends BoundedWindow> extends OnceTrigger<W> {
     Preconditions.checkArgument(subTriggers.size() > 1);
   }
 
+  /**
+   * Returns an {@code AfterFirst} {@code Trigger} with the given subtriggers.
+   */
   @SafeVarargs
   public static <W extends BoundedWindow> OnceTrigger<W> of(
       OnceTrigger<W>... triggers) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 03239affc5d51..5fe0eb9942c28 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -32,6 +32,9 @@
  * {@code AfterProcessingTime} triggers fire based on the current processing time. They operate in
  * the real-time domain.
  *
+ * <p>The time at which to fire the timer can be adjusted via the methods in {@link TimeTrigger},
+ * such as {@link TimeTrigger#plusDelayOf} or {@link TimeTrigger#alignedTo}.
+ *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used
  */
 @Experimental(Experimental.Kind.TRIGGER)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 8bc52ebeb0353..8758768486047 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -57,6 +57,10 @@
  *
  * <p>The watermark is the clock that defines {@link TimeDomain#EVENT_TIME}.
  *
+ * Additionaly firings before or after the watermark can be requested by calling
+ * {@code AfterWatermark.pastEndOfWindow.withEarlyFirings(OnceTrigger)} or
+ * {@code AfterWatermark.pastEndOfWindow.withEarlyFirings(OnceTrigger)}.
+ *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used.
  */
 @Experimental(Experimental.Kind.TRIGGER)
@@ -211,7 +215,8 @@ public int hashCode() {
    */
   public interface AfterWatermarkEarly<W extends BoundedWindow> extends TriggerBuilder<W> {
     /**
-     * Provide a trigger for the late firings.
+     * Creates a new {@code Trigger} like the this, except that it fires repeatedly whenever
+     * the given {@code Trigger} fires before the watermark has passed the end of the window.
      */
     TriggerBuilder<W> withLateFirings(OnceTrigger<W> lateTrigger);
   }
@@ -221,7 +226,8 @@ public interface AfterWatermarkEarly<W extends BoundedWindow> extends TriggerBui
    */
   public interface AfterWatermarkLate<W extends BoundedWindow> extends TriggerBuilder<W> {
     /**
-     * Provide a trigger for the early firings.
+     * Creates a new {@code Trigger} like the this, except that it fires repeatedly whenever
+     * the given {@code Trigger} fires after the watermark has passed the end of the window.
      */
     TriggerBuilder<W> withEarlyFirings(OnceTrigger<W> earlyTrigger);
   }
@@ -469,12 +475,20 @@ private FromEndOfWindow() {
       super(null);
     }
 
+    /**
+     * Creates a new {@code Trigger} like the this, except that it fires repeatedly whenever
+     * the given {@code Trigger} fires before the watermark has passed the end of the window.
+     */
     public AfterWatermarkEarly<W> withEarlyFirings(OnceTrigger<W> earlyFirings) {
       Preconditions.checkNotNull(earlyFirings,
           "Must specify the trigger to use for early firings");
       return new AfterWatermarkEarlyAndLate<W>(earlyFirings, null);
     }
 
+    /**
+     * Creates a new {@code Trigger} like the this, except that it fires repeatedly whenever
+     * the given {@code Trigger} fires after the watermark has passed the end of the window.
+     */
     public AfterWatermarkLate<W> withLateFirings(OnceTrigger<W> lateFirings) {
       Preconditions.checkNotNull(lateFirings,
           "Must specify the trigger to use for late firings");
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
index 0a05f29f58321..79006867a3362 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
@@ -29,8 +29,8 @@
  *     Repeatedly.forever(AfterWatermark.isPastEndOfWindow());
  * } </pre>
  *
- * <p>{@code Repeatedly.forever(someTrigger)} behaves like the infinite
- * {@code SequenceOf(someTrigger, someTrigger, someTrigger, ...)}.
+ * <p>{@code Repeatedly.forever(someTrigger)} behaves like an infinite
+ * {@code AfterEach.inOrder(someTrigger, someTrigger, someTrigger, ...)}.
  *
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
  *            {@code Trigger}

From 198eb1fd6667be8193ffcdbe68052bcc3e08e3cb Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Fri, 13 Nov 2015 12:04:09 -0800
Subject: [PATCH 1158/1541] Updates to windowing javadoc

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107801906
---
 .../transforms/windowing/GlobalWindow.java    |  5 ++-
 .../transforms/windowing/GlobalWindows.java   |  2 +-
 .../MergeOverlappingIntervalWindows.java      |  4 +--
 .../windowing/NonMergingWindowFn.java         |  2 +-
 .../sdk/transforms/windowing/PaneInfo.java    | 11 +++---
 .../sdk/transforms/windowing/Window.java      | 34 +++++++++----------
 .../sdk/transforms/windowing/WindowFn.java    |  9 +++--
 7 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
index 7b7be1fa8e832..d7fc396f493b9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
@@ -28,6 +28,9 @@
  * The default window into which all data is placed (via {@link GlobalWindows}).
  */
 public class GlobalWindow extends BoundedWindow {
+  /**
+   * Singleton instance of {@link GlobalWindow}.
+   */
   public static final GlobalWindow INSTANCE = new GlobalWindow();
 
   // Triggers use maxTimestamp to set timers' timestamp. Timers fires when
@@ -47,7 +50,7 @@ public Instant maxTimestamp() {
   private GlobalWindow() {}
 
   /**
-   * {@link Coder} for encoding and decoding {@code Window}s.
+   * {@link Coder} for encoding and decoding {@code GlobalWindow}s.
    */
   public static class Coder extends AtomicCoder<GlobalWindow> {
     public static final Coder INSTANCE = new Coder();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
index 7d55280b5e568..d3d949c7c9c35 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
@@ -24,7 +24,7 @@
 import java.util.Collections;
 
 /**
- * Default {@link WindowFn} where all data is in the same window.
+ * Default {@link WindowFn} that assigns all data to the same window.
  */
 public class GlobalWindows extends NonMergingWindowFn<Object, GlobalWindow> {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/MergeOverlappingIntervalWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/MergeOverlappingIntervalWindows.java
index 1dc15128445de..4e06234c15f83 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/MergeOverlappingIntervalWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/MergeOverlappingIntervalWindows.java
@@ -22,12 +22,12 @@
 import java.util.List;
 
 /**
- * A {@link WindowFn} that merges overlapping {@link IntervalWindow}s.
+ * A utility function for merging overlapping {@link IntervalWindow}s.
  */
 public class MergeOverlappingIntervalWindows {
 
   /**
-   * Merge overlapping intervals.
+   * Merge overlapping {@link IntervalWindow}s.
    */
   public static void mergeWindows(WindowFn<?, IntervalWindow>.MergeContext c) throws Exception {
     // Merge any overlapping windows into a single window.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java
index 4432ad8b94787..8aa66fcfc8436 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java
@@ -29,7 +29,7 @@ public abstract class NonMergingWindowFn<T, W extends BoundedWindow>
   public final void mergeWindows(MergeContext c) { }
 
   @Override
-  public boolean isNonMerging() {
+  public final boolean isNonMerging() {
     return true;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
index 17c071d0e514b..67d2d1b06b07a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
@@ -171,8 +171,9 @@ public Timing getTiming() {
   }
 
   /**
-   * The zero-based index of this trigger firing that produced this pane. i.e.
-   * 0 for the first time the timer fires, 1 for the next time, etc.
+   * The zero-based index of this trigger firing that produced this pane.
+   *
+   * <p>This will return 0 for the first time the timer fires, 1 for the next time, etc.
    *
    * <p>A given (key, window, pane-index) is guaranteed to be unique in the
    * output of a group-by-key operation.
@@ -182,8 +183,10 @@ public long getIndex() {
   }
 
   /**
-   * The zero-based index of this trigger firing among non-speculative panes, i.e.
-   * 0 for the first non-{@link Timing#EARLY} timer firing, 1 for the next one, etc.
+   * The zero-based index of this trigger firing among non-speculative panes.
+   *
+   * <p> This will return 0 for the first non-{@link Timing#EARLY} timer firing, 1 for the next one,
+   * etc.
    *
    * <p>Always -1 for speculative data.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 7bc23d94d8d95..2185cd0089b5a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -114,14 +114,12 @@
  * <pre> {@code
  * PCollection<String> items = ...;
  * PCollection<String> windowed_items = items.apply(
- *   Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))
- *      .triggering(AfterEach.inOrder(
- *          AfterWatermark.pastEndOfWindow(),
- *          Repeatedly
- *              .forever(AfterProcessingTime
- *                  .pastFirstElementInPane().plusDelay(Duration.standardMinutes(1)))
- *              .orFinally(AfterWatermark
- *                  .pastEndOfWindow().plusDelay(Duration.standardDays(1)))));
+ *   Window.<String>into(FixedWindows.of(Duration.standardMinutes(1)))
+ *      .triggering(
+ *          AfterWatermark.pastEndOfWindow()
+ *              .withLateFirings(AfterProcessingTime
+ *                  .pastFirstElementInPane().plusDelayOf(Duration.standardHours(1))))
+ *      .withAllowedLateness(Duration.standardDays(1)));
  * PCollection<KV<String, Long>> windowed_counts = windowed_items.apply(
  *   Count.<String>perElement());
  * } </pre>
@@ -132,15 +130,17 @@
  * <pre> {@code
  * PCollection<String> windowed_items = items.apply(
  *   Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))
- *      .triggering(Repeatedly
- *              .forever(AfterProcessingTime
- *                  .pastFirstElementInPane().plusDelay(Duration.standardMinutes(1)))
- *              .orFinally(AfterWatermark.pastEndOfWindow())));
+ *      .triggering(
+ *      .triggering(
+ *          AfterWatermark.pastEndOfWindow()
+ *              .withEarlyFirings(AfterProcessingTime
+ *                  .pastFirstElementInPane().plusDelayOf(Duration.standardMinutes(1))))
+ *      .withAllowedLateness(Duration.ZERO));
  * } </pre>
  *
- * <p>After a {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} the trigger is reset to
- * the default trigger. If you want to produce early results from a pipeline consisting of multiple
- * {@code GroupByKey}s, you must set a trigger before <i>each</i> {@code GroupByKey}.
+ * <p>After a {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} the trigger is set to
+ * a trigger that will preserve the intent of the upstream trigger.  See
+ * {@link Trigger@getContinuationTrigger} for more information.
  *
  * <p>See {@link Trigger} for details on the available triggers.
  */
@@ -185,7 +185,7 @@ public static Unbound named(String name) {
    *
    * <p>The resulting {@code PTransform}'s types have been bound, with both the
    * input and output being a {@code PCollection<T>}, inferred from the types of
-   * the argument {@code WindowFn<T, B>}.  It is ready to be applied, or further
+   * the argument {@code WindowFn}.  It is ready to be applied, or further
    * properties can be set on it first.
    */
   public static <T> Bound<T> into(WindowFn<? super T, ?> fn) {
@@ -360,7 +360,7 @@ public <T> Bound<T> withAllowedLateness(Duration allowedLateness, ClosingBehavio
 
   /**
    * A {@code PTransform} that windows the elements of a {@code PCollection<T>},
-   * into finite windows according to a user-specified {@code WindowFn<T, B>}.
+   * into finite windows according to a user-specified {@code WindowFn}.
    *
    * @param <T> The type of elements this {@code Window} is applied to
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
index a5ad99bb2b02d..a685345e563ee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
@@ -30,9 +30,14 @@
  * predefined {@code WindowFn}s.
  *
  * <p>Users will generally want to use the predefined
- * {@code WindowFn}s, but it is  also possible to create new
+ * {@code WindowFn}s, but it is also possible to create new
  * subclasses.
- * TODO: Describe how to properly create {@code WindowFn}s.
+ *
+ * <p>To create a custom {@code WindowFn}, inherit from this class and override all required
+ * methods.  If no merging is required, inherit from {@link NonMergingWindowFn}
+ * instead.  If no merging is required and each element is assigned to a single window, inherit from
+ * {@code PartitioningWindowFn}.  Inheriting from the most specific subclass will enable more
+ * optimizations in the runner.
  *
  * @param <T> type of elements being windowed
  * @param <W> {@link BoundedWindow} subclass used to represent the

From 4717590ece3737c666bbbf3575b13d5009b56003 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Sun, 8 Feb 2015 13:35:00 -0800
Subject: [PATCH 1159/1541] Touchup documentation in Runners Package

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107809165
---
 .../sdk/runners/AggregatorPipelineExtractor.java         | 7 +++----
 .../sdk/runners/AggregatorRetrievalException.java        | 4 ++++
 .../sdk/runners/BlockingDataflowPipelineRunner.java      | 4 ++--
 .../sdk/runners/DataflowJobAlreadyExistsException.java   | 4 ++++
 .../sdk/runners/DataflowJobAlreadyUpdatedException.java  | 4 ++++
 .../sdk/runners/DataflowJobCancelledException.java       | 8 ++++++++
 .../sdk/runners/DataflowJobUpdatedException.java         | 8 ++++++++
 .../cloud/dataflow/sdk/runners/DataflowPipelineJob.java  | 9 +++++++++
 .../dataflow/sdk/runners/DataflowPipelineTranslator.java | 3 +++
 .../dataflow/sdk/runners/DirectPipelineRegistrar.java    | 4 ++--
 .../cloud/dataflow/sdk/runners/DirectPipelineRunner.java | 3 +++
 .../dataflow/sdk/runners/PipelineRunnerRegistrar.java    | 7 +++++--
 .../cloud/dataflow/sdk/runners/TransformHierarchy.java   | 3 +++
 13 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorPipelineExtractor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorPipelineExtractor.java
index 76b61c14a4de2..ab87f2ea22e2e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorPipelineExtractor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorPipelineExtractor.java
@@ -31,9 +31,8 @@
 import java.util.Map;
 
 /**
- * An {@code AggregatorPipelineExtractor} retrieves {@link Aggregator Aggregators} at each
- * {@link ParDo} and returns a {@link Map} of {@link Aggregator} to the
- * {@link PTransform PTransforms} in which it is present.
+ * Retrieves {@link Aggregator Aggregators} at each {@link ParDo} and returns a {@link Map} of
+ * {@link Aggregator} to the {@link PTransform PTransforms} in which it is present.
  */
 public class AggregatorPipelineExtractor {
   private final Pipeline pipeline;
@@ -46,7 +45,7 @@ public AggregatorPipelineExtractor(Pipeline pipeline) {
   }
 
   /**
-   * Returns a {@link Map} mapping each {@link Aggregator} in the {@link Pipeline} to the {@link
+   * Returns a {@link Map} between each {@link Aggregator} in the {@link Pipeline} to the {@link
    * PTransform PTransforms} in which it is used.
    */
   public Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> getAggregatorSteps() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorRetrievalException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorRetrievalException.java
index 0b946d74cb452..90162aded2ee4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorRetrievalException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorRetrievalException.java
@@ -22,6 +22,10 @@
  * Signals that an exception has occurred while retrieving {@link Aggregator}s.
  */
 public class AggregatorRetrievalException extends Exception {
+  /**
+   * Constructs a new {@code AggregatorRetrievalException} with the specified detail message and
+   * cause.
+   */
   public AggregatorRetrievalException(String message, Throwable cause) {
     super(message, cause);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
index a27402877d456..95e3dfeb91f3f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
@@ -46,7 +46,7 @@
  *
  * <p><h3>Permissions</h3>
  * When reading from a Dataflow source or writing to a Dataflow sink using
- * {@code BlockingDataflowPipelineRunner}, the Google cloudservices account and the Google compute
+ * {@code BlockingDataflowPipelineRunner}, the Google cloud services account and the Google compute
  * engine service account of the GCP project running the Dataflow Job will need access to the
  * corresponding source/sink.
  *
@@ -167,7 +167,7 @@ public <OutputT extends POutput, InputT extends PInput> OutputT apply(
   }
 
   /**
-   * Sets callbacks to invoke during execution see {@code DataflowPipelineRunnerHooks}.
+   * Sets callbacks to invoke during execution. See {@link DataflowPipelineRunnerHooks}.
    */
   @Experimental
   public void setHooks(DataflowPipelineRunnerHooks hooks) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyExistsException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyExistsException.java
index 951f9aaada280..1547f73efe4c4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyExistsException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyExistsException.java
@@ -23,6 +23,10 @@
  * about the pre-existing job.
  */
 public class DataflowJobAlreadyExistsException extends DataflowJobException {
+  /**
+   * Create a new {@code DataflowJobAlreadyExistsException} with the specified {@link
+   * DataflowPipelineJob} and message.
+   */
   public DataflowJobAlreadyExistsException(
       DataflowPipelineJob job, String message) {
     super(job, message, null);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyUpdatedException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyUpdatedException.java
index 69bc1d25f6a68..d4ae4f514df32 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyUpdatedException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyUpdatedException.java
@@ -22,6 +22,10 @@
  * this exception contains information about the pre-existing updated job.
  */
 public class DataflowJobAlreadyUpdatedException extends DataflowJobException {
+  /**
+   * Create a new {@code DataflowJobAlreadyUpdatedException} with the specified {@link
+   * DataflowPipelineJob} and message.
+   */
   public DataflowJobAlreadyUpdatedException(
       DataflowPipelineJob job, String message) {
     super(job, message, null);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobCancelledException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobCancelledException.java
index abb3e1eb01fd3..0d31726ee9e2a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobCancelledException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobCancelledException.java
@@ -20,10 +20,18 @@
  * Signals that a job run by a {@link BlockingDataflowPipelineRunner} was updated during execution.
  */
 public class DataflowJobCancelledException extends DataflowJobException {
+  /**
+   * Create a new {@code DataflowJobAlreadyUpdatedException} with the specified {@link
+   * DataflowPipelineJob} and message.
+   */
   public DataflowJobCancelledException(DataflowPipelineJob job, String message) {
     super(job, message, null);
   }
 
+  /**
+   * Create a new {@code DataflowJobAlreadyUpdatedException} with the specified {@link
+   * DataflowPipelineJob}, message, and cause.
+   */
   public DataflowJobCancelledException(DataflowPipelineJob job, String message, Throwable cause) {
     super(job, message, cause);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobUpdatedException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobUpdatedException.java
index 3a92ebfc32aae..1becdd7c1036e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobUpdatedException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobUpdatedException.java
@@ -22,11 +22,19 @@
 public class DataflowJobUpdatedException extends DataflowJobException {
   private DataflowPipelineJob replacedByJob;
 
+  /**
+   * Create a new {@code DataflowJobUpdatedException} with the specified original {@link
+   * DataflowPipelineJob}, message, and replacement {@link DataflowPipelineJob}.
+   */
   public DataflowJobUpdatedException(
       DataflowPipelineJob job, String message, DataflowPipelineJob replacedByJob) {
     this(job, message, replacedByJob, null);
   }
 
+  /**
+   * Create a new {@code DataflowJobUpdatedException} with the specified original {@link
+   * DataflowPipelineJob}, message, replacement {@link DataflowPipelineJob}, and cause.
+   */
   public DataflowJobUpdatedException(
       DataflowPipelineJob job, String message, DataflowPipelineJob replacedByJob, Throwable cause) {
     super(job, message, cause);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
index 48a0100a2a362..e9f134c8489b8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
@@ -118,10 +118,16 @@ public DataflowPipelineJob(String projectId, String jobId, Dataflow dataflowClie
     this.aggregatorTransforms = aggregatorTransforms;
   }
 
+  /**
+   * Get the id of this job.
+   */
   public String getJobId() {
     return jobId;
   }
 
+  /**
+   * Get the project this job exists in.
+   */
   public String getProjectId() {
     return projectId;
   }
@@ -142,6 +148,9 @@ public DataflowPipelineJob getReplacedByJob() {
     return replacedByJob;
   }
 
+  /**
+   * Get the Cloud Dataflow API Client used by this job.
+   */
   public Dataflow getDataflowClient() {
     return dataflowClient;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 4952fc08543e6..7956efdfb2e78 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -175,6 +175,9 @@ public Job getJob() {
     }
   }
 
+  /**
+   * Renders a {@link Job} as a string.
+   */
   public static String jobToString(Job job) {
     try {
       return MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(job);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrar.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrar.java
index 765a94fa381bd..f2dd40cbdb703 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrar.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrar.java
@@ -30,7 +30,7 @@ public class DirectPipelineRegistrar {
   private DirectPipelineRegistrar() { }
 
   /**
-   * Register the {@link DirectPipelineOptions}.
+   * Register the {@link DirectPipelineRunner}.
    */
   @AutoService(PipelineRunnerRegistrar.class)
   public static class Runner implements PipelineRunnerRegistrar {
@@ -41,7 +41,7 @@ public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners() {
   }
 
   /**
-   * Register the {@link DirectPipelineRunner}.
+   * Register the {@link DirectPipelineOptions}.
    */
   @AutoService(PipelineOptionsRegistrar.class)
   public static class Options implements PipelineOptionsRegistrar {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 45b31870d33e5..3109f2f6a2d7c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -990,6 +990,9 @@ private DirectPipelineRunner(DirectPipelineOptions options) {
     testUnorderedness = options.isTestUnorderedness();
   }
 
+  /**
+   * Get the options used in this {@link Pipeline}.
+   */
   public DirectPipelineOptions getPipelineOptions() {
     return options;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java
index b6dce95d373c4..1ca33466a1293 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.runners;
 
+import com.google.auto.service.AutoService;
 import java.util.ServiceLoader;
 
 /**
@@ -29,9 +30,11 @@
  * {@link Class#getSimpleName() simple name} must be unique.
  *
  * <p>It is optional but recommended to use one of the many build time tools such as
- * {@link com.google.auto.service.AutoService} to generate the necessary
- * META-INF files automatically.
+ * {@link AutoService} to generate the necessary META-INF files automatically.
  */
 public interface PipelineRunnerRegistrar {
+  /**
+   * Get the set of {@link PipelineRunner PipelineRunners} to register.
+   */
   public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java
index e574f6b282220..d62192d1e7889 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java
@@ -36,6 +36,9 @@ public class TransformHierarchy {
   private final Deque<TransformTreeNode> transformStack = new LinkedList<>();
   private final Map<PInput, TransformTreeNode> producingTransformNode = new HashMap<>();
 
+  /**
+   * Create a {@code TransformHierarchy} containing a root node.
+   */
   public TransformHierarchy() {
     // First element in the stack is the root node, holding all child nodes.
     transformStack.add(new TransformTreeNode(null, null, "", null));

From a25a717c1d69e03a0c0ebacc3cd3e3e078906437 Mon Sep 17 00:00:00 2001
From: markshields <markshields@google.com>
Date: Fri, 13 Nov 2015 16:41:23 -0800
Subject: [PATCH 1160/1541] Paginate list results from Windmill to Worker

We set a byte limit for each TagList request.
Windmill will try to respect that, while also
returning at least one value per list. Subsequent
values can be requested using a contination token.

We pre-fetch the next page while iterating over
the current page to retain sequential read performance.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107825653
---
 .../runners/worker/WindmillStateReader.java   | 437 +++++++++++++-----
 .../runners/worker/FakeWindmillServer.java    |   3 +-
 .../worker/WindmillStateReaderTest.java       | 185 +++++---
 3 files changed, 452 insertions(+), 173 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
index c232a1a093674..88390e2b37664 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
@@ -20,9 +20,12 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagList;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagValue;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Function;
 import com.google.common.base.Objects;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.AbstractIterator;
 import com.google.common.util.concurrent.ForwardingFuture;
+import com.google.common.util.concurrent.Futures;
 import com.google.common.util.concurrent.SettableFuture;
 import com.google.protobuf.ByteString;
 
@@ -35,6 +38,7 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
@@ -44,13 +48,24 @@
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 
+import javax.annotation.Nullable;
+
 /**
  * Reads persistent state from {@link Windmill}. Returns {@code Future}s containing the data that
  * has been read. Will not initiate a read until {@link Future#get} is called, at which point all
  * the pending futures will be read.
  */
 class WindmillStateReader {
-
+  /**
+   * Ideal maximum bytes in a TagList response. However, Windmill will always return
+   * at least one value if possible irrespective of this limit.
+   */
+  public static final long MAX_LIST_BYTES = 1L << 20; // 1MB
+
+  /**
+   * When combined with a key and computationId, represents the unique address for
+   * state managed by Windmill.
+   */
   private static class StateTag {
     private enum Kind {
       VALUE,
@@ -62,10 +77,26 @@ private enum Kind {
     private final ByteString tag;
     private final String stateFamily;
 
-    private StateTag(Kind kind, ByteString tag, String stateFamily) {
+    /**
+     * For {@link Kind#LIST} kinds: A previous 'continuation_token' returned by Windmill to signal
+     * the resulting list was incomplete. Sending that token will request the next page of values.
+     * Null for first request.
+     *
+     * <p>Null for other kinds.
+     */
+    @Nullable
+    private final ByteString requestToken;
+
+    private StateTag(
+        Kind kind, ByteString tag, String stateFamily, @Nullable ByteString requestToken) {
       this.kind = kind;
       this.tag = tag;
       this.stateFamily = Preconditions.checkNotNull(stateFamily);
+      this.requestToken = requestToken;
+    }
+
+    private StateTag(Kind kind, ByteString tag, String stateFamily) {
+      this(kind, tag, stateFamily, null);
     }
 
     @Override
@@ -79,19 +110,37 @@ public boolean equals(Object obj) {
       }
 
       StateTag that = (StateTag) obj;
-      return Objects.equal(this.kind, that.kind)
-          && Objects.equal(this.tag, that.tag)
-          && Objects.equal(this.stateFamily, that.stateFamily);
+      return Objects.equal(this.kind, that.kind) && Objects.equal(this.tag, that.tag)
+          && Objects.equal(this.stateFamily, that.stateFamily)
+          && Objects.equal(this.requestToken, that.requestToken);
     }
 
     @Override
     public int hashCode() {
-      return Objects.hashCode(kind, tag, stateFamily);
+      return Objects.hashCode(kind, tag, stateFamily, requestToken);
     }
 
     @Override
     public String toString() {
-      return "Tag(" + kind + "," + tag.toStringUtf8() + "," + stateFamily + ")";
+      return "Tag(" + kind + "," + tag.toStringUtf8() + "," + stateFamily
+          + (requestToken == null ? "" : ("," + requestToken.toStringUtf8())) + ")";
+    }
+  }
+
+  /**
+   * An in-memory collection of deserialized values and an optional continuation token to pass to
+   * Windmill when fetching the next page of values.
+   */
+  private static class ValuesAndContToken<T> {
+    private final List<T> values;
+
+    /** Token to pass to next request for next page of values. Null if done. */
+    @Nullable
+    private final ByteString continuationToken;
+
+    public ValuesAndContToken(List<T> values, @Nullable ByteString continuationToken) {
+      this.values = values;
+      this.continuationToken = continuationToken;
     }
   }
 
@@ -105,42 +154,89 @@ public String toString() {
 
   private long bytesRead = 0L;
 
-  public WindmillStateReader(
-      MetricTrackingWindmillServerStub metrics,
-      String computation, ByteString key, long workToken) {
+  public WindmillStateReader(MetricTrackingWindmillServerStub metrics, String computation,
+      ByteString key, long workToken) {
     this.metrics = metrics;
     this.computation = computation;
     this.key = key;
     this.workToken = workToken;
   }
 
-  @VisibleForTesting ConcurrentLinkedQueue<StateTag> pendingLookups = new ConcurrentLinkedQueue<>();
-  private ConcurrentHashMap<StateTag, Coder<?>> coders = new ConcurrentHashMap<>();
+  private static final class CoderAndFuture<ElemT, FutureT> {
+    private Coder<ElemT> coder;
+    private final SettableFuture<FutureT> future;
 
-  private ConcurrentHashMap<StateTag, SettableFuture<?>> futures = new ConcurrentHashMap<>();
+    private CoderAndFuture(Coder<ElemT> coder, SettableFuture<FutureT> future) {
+      this.coder = coder;
+      this.future = future;
+    }
 
-  private <T> Future<T> stateFuture(StateTag tag, Coder<?> coder) {
-    SettableFuture<?> wildcardFuture = futures.get(tag);
-    if (wildcardFuture == null) {
-      // If we don't yet have a future, try to create one.
-      wildcardFuture = SettableFuture.<T>create();
-      SettableFuture<?> old = futures.putIfAbsent(tag, wildcardFuture);
+    private SettableFuture<FutureT> getFuture() {
+      return future;
+    }
 
-      if (old == null) {
-        // We won the race, queue the lookup and coder.
-        pendingLookups.add(tag);
-        if (coder != null) {
-          coders.putIfAbsent(tag, coder);
-        }
-      } else {
-        // We lost the race, use the other future.
-        wildcardFuture = old;
+    private SettableFuture<FutureT> getNonDoneFuture(StateTag stateTag) {
+      if (future.isDone()) {
+        throw new IllegalStateException("Future for " + stateTag + " is already done");
       }
+      return future;
     }
 
+    private Coder<ElemT> getAndClearCoder() {
+      if (coder == null) {
+        throw new IllegalStateException("Coder has already been cleared from cache");
+      }
+      Coder<ElemT> result = coder;
+      coder = null;
+      return result;
+    }
+
+    private void checkNoCoder() {
+      if (coder != null) {
+        throw new IllegalStateException("Unexpected coder");
+      }
+    }
+  }
+
+  @VisibleForTesting
+  ConcurrentLinkedQueue<StateTag> pendingLookups = new ConcurrentLinkedQueue<>();
+  private ConcurrentHashMap<StateTag, CoderAndFuture<?, ?>> waiting = new ConcurrentHashMap<>();
+
+  private <ElemT, FutureT> Future<FutureT> stateFuture(
+      StateTag stateTag, @Nullable Coder<ElemT> coder) {
+    CoderAndFuture<ElemT, FutureT> coderAndFuture =
+        new CoderAndFuture<ElemT, FutureT>(coder, SettableFuture.<FutureT>create());
+    CoderAndFuture<?, ?> existingCoderAndFutureWildcard =
+        waiting.putIfAbsent(stateTag, coderAndFuture);
+    if (existingCoderAndFutureWildcard == null) {
+      // Schedule a new request. It's response is guaranteed to find the future and coder.
+      pendingLookups.add(stateTag);
+    } else {
+      // Piggy-back on the pending or already answered request.
+      @SuppressWarnings("unchecked")
+      CoderAndFuture<ElemT, FutureT> existingCoderAndFuture =
+          (CoderAndFuture<ElemT, FutureT>) existingCoderAndFutureWildcard;
+      coderAndFuture = existingCoderAndFuture;
+    }
+
+    return wrappedFuture(coderAndFuture.getFuture());
+  }
+
+  private <ElemT, FutureT> CoderAndFuture<ElemT, FutureT> getWaiting(
+      StateTag stateTag, boolean shouldRemove) {
+    CoderAndFuture<?, ?> coderAndFutureWildcard;
+    if (shouldRemove) {
+      coderAndFutureWildcard = waiting.remove(stateTag);
+    } else {
+      coderAndFutureWildcard = waiting.get(stateTag);
+    }
+    if (coderAndFutureWildcard == null) {
+      throw new IllegalStateException("Missing future for " + stateTag);
+    }
     @SuppressWarnings("unchecked")
-    SettableFuture<T> typedFuture = (SettableFuture<T>) wildcardFuture;
-    return wrappedFuture(typedFuture);
+    CoderAndFuture<ElemT, FutureT> coderAndFuture =
+        (CoderAndFuture<ElemT, FutureT>) coderAndFutureWildcard;
+    return coderAndFuture;
   }
 
   public Future<Instant> watermarkFuture(ByteString encodedTag, String stateFamily) {
@@ -151,9 +247,27 @@ public <T> Future<T> valueFuture(ByteString encodedTag, String stateFamily, Code
     return stateFuture(new StateTag(StateTag.Kind.VALUE, encodedTag, stateFamily), coder);
   }
 
-  public <T> Future<Iterable<T>> listFuture(ByteString encodedTag, String stateFamily,
-      Coder<T> elemCoder) {
-    return stateFuture(new StateTag(StateTag.Kind.LIST, encodedTag, stateFamily), elemCoder);
+  public <T> Future<Iterable<T>> listFuture(
+      ByteString encodedTag, String stateFamily, Coder<T> elemCoder) {
+    // First request has no continuation token.
+    StateTag stateTag = new StateTag(StateTag.Kind.LIST, encodedTag, stateFamily);
+    // Convert the ValuesAndContToken<T> to Iterable<T>.
+    return valuesToPagingIterableFuture(
+        stateTag, elemCoder, this.<T, ValuesAndContToken<T>>stateFuture(stateTag, elemCoder));
+  }
+
+  /**
+   * Internal request to fetch the next 'page' of values in a TagList. Return null if
+   * no continuation token is in {@code contTag}, which signals there are no more pages.
+   */
+  @Nullable
+  private <T> Future<ValuesAndContToken<T>> continuationListFuture(
+      StateTag contStateTag, Coder<T> elemCoder) {
+    if (contStateTag.requestToken == null) {
+      // We're done.
+      return null;
+    }
+    return stateFuture(contStateTag, elemCoder);
   }
 
   private <T> Future<T> wrappedFuture(final Future<T> future) {
@@ -187,16 +301,38 @@ public T get(long timeout, TimeUnit unit)
     };
   }
 
+  /**
+   * Return future which transforms a {@code ValuesAndContToken<T>} result into the
+   * initial Iterable<T> result expected from the external caller.
+   */
+  private <T> Future<Iterable<T>> valuesToPagingIterableFuture(final StateTag stateTag,
+      final Coder<T> elemCoder, final Future<ValuesAndContToken<T>> future) {
+    return Futures.lazyTransform(future, new Function<ValuesAndContToken<T>, Iterable<T>>() {
+      @Override
+      public Iterable<T> apply(ValuesAndContToken<T> valuesAndContToken) {
+        if (valuesAndContToken.continuationToken == null) {
+          // Number of values is small enough Windmill sent us the entire list in one response.
+          return valuesAndContToken.values;
+        } else {
+          // Return an iterable which knows how to come back for more.
+          StateTag contStateTag = new StateTag(stateTag.kind, stateTag.tag, stateTag.stateFamily,
+              valuesAndContToken.continuationToken);
+          return new TagListPagingIterable<>(valuesAndContToken.values, contStateTag, elemCoder);
+        }
+      }
+    });
+  }
+
   public void startBatchAndBlock() {
     // First, drain work out of the pending lookups into a set. These will be the items we fetch.
     HashSet<StateTag> toFetch = new HashSet<>();
     while (!pendingLookups.isEmpty()) {
-      StateTag tag = pendingLookups.poll();
-      if (tag == null) {
+      StateTag stateTag = pendingLookups.poll();
+      if (stateTag == null) {
         break;
       }
 
-      if (!toFetch.add(tag)) {
+      if (!toFetch.add(stateTag)) {
         throw new IllegalStateException("Duplicate tags being fetched.");
       }
     }
@@ -223,36 +359,42 @@ public long getBytesRead() {
 
   private Windmill.GetDataRequest createRequest(Iterable<StateTag> toFetch) {
     Windmill.GetDataRequest.Builder request = Windmill.GetDataRequest.newBuilder();
-    Windmill.KeyedGetDataRequest.Builder keyedDataBuilder = request
-        .addRequestsBuilder().setComputationId(computation)
-        .addRequestsBuilder().setKey(key).setWorkToken(workToken);
-
-    for (StateTag tag : toFetch) {
-      switch (tag.kind) {
+    Windmill.KeyedGetDataRequest.Builder keyedDataBuilder =
+        request.addRequestsBuilder()
+            .setComputationId(computation)
+            .addRequestsBuilder()
+            .setKey(key)
+            .setWorkToken(workToken);
+
+    for (StateTag stateTag : toFetch) {
+      switch (stateTag.kind) {
         case LIST:
-          keyedDataBuilder
-              .addListsToFetchBuilder()
-              .setTag(tag.tag)
-              .setStateFamily(tag.stateFamily)
-              .setEndTimestamp(Long.MAX_VALUE);
+          TagList.Builder tagList =
+              keyedDataBuilder.addListsToFetchBuilder()
+                  .setTag(stateTag.tag)
+                  .setStateFamily(stateTag.stateFamily)
+                  .setEndTimestamp(Long.MAX_VALUE)
+                  .setFetchMaxBytes(MAX_LIST_BYTES);
+          if (stateTag.requestToken != null) {
+            // We're asking for the next page.
+            tagList.setRequestToken(stateTag.requestToken);
+          }
           break;
 
         case WATERMARK:
-          keyedDataBuilder
-              .addWatermarkHoldsToFetchBuilder()
-              .setTag(tag.tag)
-              .setStateFamily(tag.stateFamily);
+          keyedDataBuilder.addWatermarkHoldsToFetchBuilder()
+              .setTag(stateTag.tag)
+              .setStateFamily(stateTag.stateFamily);
           break;
 
         case VALUE:
-          keyedDataBuilder
-              .addValuesToFetchBuilder()
-              .setTag(tag.tag)
-              .setStateFamily(tag.stateFamily);
+          keyedDataBuilder.addValuesToFetchBuilder()
+              .setTag(stateTag.tag)
+              .setStateFamily(stateTag.stateFamily);
           break;
 
         default:
-          throw new RuntimeException("Unknown kind of tag requested: " + tag.kind);
+          throw new RuntimeException("Unknown kind of tag requested: " + stateTag.kind);
       }
     }
 
@@ -263,8 +405,7 @@ private void consumeResponse(Windmill.GetDataRequest request,
       Windmill.GetDataResponse getDataResponse, Set<StateTag> toFetch) {
     // Validate the response is for our computation/key.
     if (getDataResponse.getDataCount() == 0) {
-      throw new RuntimeException(
-          "No computation in response to request: " + request);
+      throw new RuntimeException("No computation in response to request: " + request);
     } else if (getDataResponse.getDataCount() > 1) {
       throw new RuntimeException("Expected exactly one computation in response, but got: "
           + getDataResponse.getDataList());
@@ -273,13 +414,12 @@ private void consumeResponse(Windmill.GetDataRequest request,
     Windmill.ComputationGetDataResponse computationResponse = getDataResponse.getData(0);
 
     if (!computation.equals(computationResponse.getComputationId())) {
-      throw new RuntimeException("Expected data for computation " + computation
-          + " but was " + computationResponse.getComputationId());
+      throw new RuntimeException("Expected data for computation " + computation + " but was "
+          + computationResponse.getComputationId());
     }
 
     if (computationResponse.getDataCount() == 0) {
-      throw new RuntimeException(
-          "No key in response to request: " + request);
+      throw new RuntimeException("No key in response to request: " + request);
     } else if (computationResponse.getDataCount() > 1) {
       throw new RuntimeException(
           "Expected exactly one key in response, but was: " + computationResponse.getDataList());
@@ -293,30 +433,29 @@ private void consumeResponse(Windmill.GetDataRequest request,
       StreamingDataflowWorker.KeyTokenInvalidException keyTokenInvalidException =
           new StreamingDataflowWorker.KeyTokenInvalidException(key.toStringUtf8());
       for (StateTag stateTag : toFetch) {
-        futures.get(stateTag).setException(keyTokenInvalidException);
+        waiting.get(stateTag).future.setException(keyTokenInvalidException);
       }
       return;
     }
 
     if (!key.equals(response.getKey())) {
-      throw new RuntimeException("Expected data for key " + key
-          + " but was " + response.getKey());
+      throw new RuntimeException("Expected data for key " + key + " but was " + response.getKey());
     }
 
 
-    for (Windmill.TagList list : response.getListsList()) {
-      StateTag stateTag = new StateTag(
-          StateTag.Kind.LIST, list.getTag(), list.getStateFamily());
+    for (Windmill.TagList tagList : response.getListsList()) {
+      StateTag stateTag = new StateTag(StateTag.Kind.LIST, tagList.getTag(),
+          tagList.getStateFamily(), tagList.hasRequestToken() ? tagList.getRequestToken() : null);
       if (!toFetch.remove(stateTag)) {
         throw new IllegalStateException(
             "Received response for unrequested tag " + stateTag + ". Pending tags: " + toFetch);
       }
-      consumeTagList(list, stateTag);
+      consumeTagList(tagList, stateTag);
     }
 
     for (Windmill.WatermarkHold hold : response.getWatermarkHoldsList()) {
-      StateTag stateTag = new StateTag(
-          StateTag.Kind.WATERMARK, hold.getTag(), hold.getStateFamily());
+      StateTag stateTag =
+          new StateTag(StateTag.Kind.WATERMARK, hold.getTag(), hold.getStateFamily());
       if (!toFetch.remove(stateTag)) {
         throw new IllegalStateException(
             "Received response for unrequested tag " + stateTag + ". Pending tags: " + toFetch);
@@ -325,8 +464,7 @@ private void consumeResponse(Windmill.GetDataRequest request,
     }
 
     for (Windmill.TagValue value : response.getValuesList()) {
-      StateTag stateTag = new StateTag(
-          StateTag.Kind.VALUE, value.getTag(), value.getStateFamily());
+      StateTag stateTag = new StateTag(StateTag.Kind.VALUE, value.getTag(), value.getStateFamily());
       if (!toFetch.remove(stateTag)) {
         throw new IllegalStateException(
             "Received response for unrequested tag " + stateTag + ". Pending tags: " + toFetch);
@@ -340,52 +478,56 @@ private void consumeResponse(Windmill.GetDataRequest request,
     }
   }
 
-  private <T> void consumeTagList(TagList list, StateTag stateTag) {
-    @SuppressWarnings("unchecked")
-    SettableFuture<Iterable<T>> future = (SettableFuture<Iterable<T>>) futures.get(stateTag);
-    if (future == null) {
-      throw new IllegalStateException("Missing future for " + stateTag);
-    } else if (future.isDone()) {
-      LOG.error("Future for {} is already done", stateTag);
-    }
-
-    if (list.getValuesCount() == 0) {
-      future.set(Collections.<T>emptyList());
-      return;
+  /**
+   * The deserialized values in {@code tagList} as a read-only array list.
+   */
+  private <T> List<T> tagListPageValues(TagList tagList, Coder<T> elemCoder) {
+    if (tagList.getValuesCount() == 0) {
+      return Collections.<T>emptyList();
     }
 
-    @SuppressWarnings("unchecked")
-    Coder<T> elemCoder = (Coder<T>) coders.remove(stateTag);
-    if (elemCoder == null) {
-      throw new IllegalStateException("Missing element coder for " + stateTag);
-    }
-
-    List<T> valueList = new ArrayList<>(list.getValuesCount());
-    for (Windmill.Value value : list.getValuesList()) {
+    List<T> valueList = new ArrayList<>(tagList.getValuesCount());
+    for (Windmill.Value value : tagList.getValuesList()) {
       if (value.hasData() && !value.getData().isEmpty()) {
-        // Drop the first byte of the data; it's the zero byte we prependend to avoid writing
+        // Drop the first byte of the data; it's the zero byte we prepended to avoid writing
         // empty data.
         InputStream inputStream = value.getData().substring(1).newInput();
         try {
           valueList.add(elemCoder.decode(inputStream, Coder.Context.OUTER));
         } catch (IOException e) {
-          throw new IllegalStateException(
-              "Unable to decode tag list using " + elemCoder, e);
+          throw new IllegalStateException("Unable to decode tag list using " + elemCoder, e);
         }
       }
     }
+    return Collections.unmodifiableList(valueList);
+  }
 
-    future.set(Collections.unmodifiableList(valueList));
+  private <T> void consumeTagList(TagList tagList, StateTag stateTag) {
+    boolean shouldRemove;
+    if (stateTag.requestToken == null) {
+      // This is the response for the first page.
+      // Leave the future in the cache so subsequent requests for the first page
+      // can return immediately.
+      shouldRemove = false;
+    } else {
+      // This is a response for a subsequent page.
+      // Don't cache the future since we may need to make multiple requests with different
+      // continuation tokens.
+      shouldRemove = true;
+    }
+    CoderAndFuture<T, ValuesAndContToken<T>> coderAndFuture = getWaiting(stateTag, shouldRemove);
+    SettableFuture<ValuesAndContToken<T>> future = coderAndFuture.getNonDoneFuture(stateTag);
+    Coder<T> coder = coderAndFuture.getAndClearCoder();
+    List<T> values = this.<T>tagListPageValues(tagList, coder);
+    future.set(new ValuesAndContToken<T>(
+        values, tagList.hasContinuationToken() ? tagList.getContinuationToken() : null));
   }
 
   private void consumeWatermark(Windmill.WatermarkHold watermarkHold, StateTag stateTag) {
-    @SuppressWarnings("unchecked")
-    SettableFuture<Instant> future = (SettableFuture<Instant>) futures.get(stateTag);
-    if (future == null) {
-      throw new IllegalStateException("Missing future for " + stateTag);
-    } else if (future.isDone()) {
-      LOG.error("Future for {} is already done", stateTag);
-    }
+    CoderAndFuture<Void, Instant> coderAndFuture = getWaiting(stateTag, false);
+    SettableFuture<Instant> future = coderAndFuture.getNonDoneFuture(stateTag);
+    // No coders for watermarks
+    coderAndFuture.checkNoCoder();
 
     Instant hold = null;
     for (long timestamp : watermarkHold.getTimestampsList()) {
@@ -399,22 +541,11 @@ private void consumeWatermark(Windmill.WatermarkHold watermarkHold, StateTag sta
   }
 
   private <T> void consumeTagValue(TagValue tagValue, StateTag stateTag) {
-    @SuppressWarnings("unchecked")
-    SettableFuture<T> future = (SettableFuture<T>) futures.get(stateTag);
-    if (future == null) {
-      throw new IllegalStateException("Missing future for " + stateTag);
-    } else if (future.isDone()) {
-      LOG.error("Future for {} is already done", stateTag);
-    }
+    CoderAndFuture<T, T> coderAndFuture = getWaiting(stateTag, false);
+    SettableFuture<T> future = coderAndFuture.getNonDoneFuture(stateTag);
+    Coder<T> coder = coderAndFuture.getAndClearCoder();
 
-    @SuppressWarnings("unchecked")
-    Coder<T> coder = (Coder<T>) coders.remove(stateTag);
-    if (coder == null) {
-      throw new IllegalStateException("Missing coder for " + stateTag);
-    }
-
-    if (tagValue.hasValue()
-        && tagValue.getValue().hasData()
+    if (tagValue.hasValue() && tagValue.getValue().hasData()
         && !tagValue.getValue().getData().isEmpty()) {
       InputStream inputStream = tagValue.getValue().getData().newInput();
       try {
@@ -427,4 +558,72 @@ private <T> void consumeTagValue(TagValue tagValue, StateTag stateTag) {
       future.set(null);
     }
   }
+
+  /**
+   * An iterable over elements backed by paginated GetData requests to Windmill. The
+   * iterable may be iterated over an arbitrary number of times and multiple iterators
+   * may be active simultaneously.
+   *
+   * <p>There are two pattern we wish to support with low -memory and -latency:
+   * <ol>
+   * <li>Re-iterate over the initial elements multiple times (eg Iterables.first). We'll cache
+   * the initial 'page' of values returned by Windmill from our first request for the lifetime
+   * of the iterable.
+   * <li>Iterate through all elements of a very large collection. We'll send the GetData request
+   * for the next page when the current page is begun. We'll discard intermediate pages and only
+   * retain the first. Thus the maximum memory pressure is one page plus one page per call to
+   * iterator.
+   * </ol>
+   */
+  private class TagListPagingIterable<T> implements Iterable<T> {
+    /** Initial values returned for the first page. Never reclaimed. */
+    private final List<T> firstPage;
+
+    /** State tag with continuation token set for second page. */
+    private final StateTag secondPageCont;
+
+    /** Coder for elements. */
+    private final Coder<T> elemCoder;
+
+    private TagListPagingIterable(List<T> firstPage, StateTag secondPageCont, Coder<T> elemCoder) {
+      this.firstPage = firstPage;
+      this.secondPageCont = secondPageCont;
+      this.elemCoder = elemCoder;
+    }
+
+    @Override
+    public Iterator<T> iterator() {
+      return new AbstractIterator<T>() {
+        private int numPagesRead = 1;
+        private Iterator<T> currentPage = firstPage.iterator();
+        private StateTag nextPageCont = secondPageCont;
+        private Future<ValuesAndContToken<T>> pendingNextPage =
+            continuationListFuture(nextPageCont, elemCoder);
+
+        @Override
+        protected T computeNext() {
+          while (true) {
+            if (currentPage.hasNext()) {
+              return currentPage.next();
+            }
+            if (pendingNextPage == null) {
+              return endOfData();
+            }
+
+            ValuesAndContToken<T> valuesAndContToken;
+            try {
+              valuesAndContToken = pendingNextPage.get();
+              numPagesRead++;
+            } catch (InterruptedException | ExecutionException e) {
+              throw new RuntimeException("Unable to read value from state", e);
+            }
+            currentPage = valuesAndContToken.values.iterator();
+            nextPageCont = new StateTag(nextPageCont.kind, nextPageCont.tag,
+                nextPageCont.stateFamily, valuesAndContToken.continuationToken);
+            pendingNextPage = continuationListFuture(nextPageCont, elemCoder);
+          }
+        }
+      };
+    }
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java
index fb4861c151ff5..3dcb53ffeefc3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java
@@ -133,7 +133,8 @@ public Map<Long, WorkItemCommitRequest> waitForAndGetCommits(int numCommits) {
     }
 
     assertFalse(
-        "Should have received commits after 10s, but only got " + commitsReceived,
+        "Should have received " + numCommits + " more commits beyond " + commitsRequested
+        + " commits already seen, but after 10s have only seen " + commitsReceived,
         commitsReceived.size() < commitsRequested + numCommits);
     commitsRequested += numCommits;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java
index 8897acf9cdb98..f6ad4190fd1ff 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.KeyedGetDataRequest;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.common.collect.Iterables;
 import com.google.protobuf.ByteString;
 import com.google.protobuf.ByteString.Output;
 
@@ -48,12 +49,12 @@
  */
 @RunWith(JUnit4.class)
 public class WindmillStateReaderTest {
-
   private static final VarIntCoder INT_CODER = VarIntCoder.of();
 
   private static final String COMPUTATION = "computation";
   private static final ByteString DATA_KEY = ByteString.copyFromUtf8("DATA_KEY");
   private static final long WORK_TOKEN = 5043L;
+  private static final ByteString CONT_TOKEN = ByteString.copyFromUtf8("CONT_TOKEN");
 
   private static final ByteString STATE_KEY_1 = ByteString.copyFromUtf8("key1");
   private static final ByteString STATE_KEY_2 = ByteString.copyFromUtf8("key2");
@@ -92,29 +93,108 @@ public void testReadList() throws Exception {
     Mockito.verifyNoMoreInteractions(mockWindmill);
 
     Windmill.GetDataRequest.Builder expectedRequest = Windmill.GetDataRequest.newBuilder();
-    expectedRequest
-        .addRequestsBuilder().setComputationId(COMPUTATION)
-        .addRequestsBuilder().setKey(DATA_KEY).setWorkToken(WORK_TOKEN)
-        .addListsToFetch(Windmill.TagList.newBuilder()
-            .setTag(STATE_KEY_1).setStateFamily(STATE_FAMILY).setEndTimestamp(Long.MAX_VALUE));
+    expectedRequest.addRequestsBuilder()
+        .setComputationId(COMPUTATION)
+        .addRequestsBuilder()
+        .setKey(DATA_KEY)
+        .setWorkToken(WORK_TOKEN)
+        .addListsToFetch(
+            Windmill.TagList.newBuilder()
+                .setTag(STATE_KEY_1)
+                .setStateFamily(STATE_FAMILY)
+                .setEndTimestamp(Long.MAX_VALUE)
+                .setFetchMaxBytes(WindmillStateReader.MAX_LIST_BYTES));
 
     Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
-    response
-        .addDataBuilder().setComputationId(COMPUTATION)
-        .addDataBuilder().setKey(DATA_KEY)
-        .addLists(Windmill.TagList.newBuilder()
-            .setTag(STATE_KEY_1)
-            .setStateFamily(STATE_FAMILY)
-            .addValues(intValue(5, true))
-            .addValues(intValue(6, true)));
+    response.addDataBuilder()
+        .setComputationId(COMPUTATION)
+        .addDataBuilder()
+        .setKey(DATA_KEY)
+        .addLists(
+            Windmill.TagList.newBuilder()
+                .setTag(STATE_KEY_1)
+                .setStateFamily(STATE_FAMILY)
+                .addValues(intValue(5, true))
+                .addValues(intValue(6, true)));
 
     Mockito.when(mockWindmill.getStateData(expectedRequest.build())).thenReturn(response.build());
 
     Iterable<Integer> results = future.get();
     Mockito.verify(mockWindmill).getStateData(expectedRequest.build());
+    Iterables.size(results);
+    Mockito.verifyNoMoreInteractions(mockWindmill);
+
+    assertThat(results, Matchers.contains(5, 6));
+  }
+
+  @Test
+  public void testReadListWithContinuations() throws Exception {
+    Future<Iterable<Integer>> future = underTest.listFuture(STATE_KEY_1, STATE_FAMILY, INT_CODER);
+    Mockito.verifyNoMoreInteractions(mockWindmill);
+
+    Windmill.GetDataRequest.Builder expectedRequest1 = Windmill.GetDataRequest.newBuilder();
+    expectedRequest1.addRequestsBuilder()
+        .setComputationId(COMPUTATION)
+        .addRequestsBuilder()
+        .setKey(DATA_KEY)
+        .setWorkToken(WORK_TOKEN)
+        .addListsToFetch(
+            Windmill.TagList.newBuilder()
+                .setTag(STATE_KEY_1)
+                .setStateFamily(STATE_FAMILY)
+                .setEndTimestamp(Long.MAX_VALUE)
+                .setFetchMaxBytes(WindmillStateReader.MAX_LIST_BYTES));
+
+    Windmill.GetDataResponse.Builder response1 = Windmill.GetDataResponse.newBuilder();
+    response1.addDataBuilder()
+        .setComputationId(COMPUTATION)
+        .addDataBuilder()
+        .setKey(DATA_KEY)
+        .addLists(
+            Windmill.TagList.newBuilder()
+                .setTag(STATE_KEY_1)
+                .setStateFamily(STATE_FAMILY)
+                .setContinuationToken(CONT_TOKEN)
+                .addValues(intValue(5, true))
+                .addValues(intValue(6, true)));
+
+    Windmill.GetDataRequest.Builder expectedRequest2 = Windmill.GetDataRequest.newBuilder();
+    expectedRequest2.addRequestsBuilder()
+        .setComputationId(COMPUTATION)
+        .addRequestsBuilder()
+        .setKey(DATA_KEY)
+        .setWorkToken(WORK_TOKEN)
+        .addListsToFetch(
+            Windmill.TagList.newBuilder()
+                .setTag(STATE_KEY_1)
+                .setStateFamily(STATE_FAMILY)
+                .setEndTimestamp(Long.MAX_VALUE)
+                .setFetchMaxBytes(WindmillStateReader.MAX_LIST_BYTES)
+                .setRequestToken(CONT_TOKEN));
+
+    Windmill.GetDataResponse.Builder response2 = Windmill.GetDataResponse.newBuilder();
+    response2.addDataBuilder()
+        .setComputationId(COMPUTATION)
+        .addDataBuilder()
+        .setKey(DATA_KEY)
+        .addLists(
+            Windmill.TagList.newBuilder()
+                .setTag(STATE_KEY_1)
+                .setStateFamily(STATE_FAMILY)
+                .setRequestToken(CONT_TOKEN)
+                .addValues(intValue(7, true))
+                .addValues(intValue(8, true)));
+
+    Mockito.when(mockWindmill.getStateData(expectedRequest1.build())).thenReturn(response1.build());
+    Mockito.when(mockWindmill.getStateData(expectedRequest2.build())).thenReturn(response2.build());
+
+    Iterable<Integer> results = future.get();
+    Mockito.verify(mockWindmill).getStateData(expectedRequest1.build());
+    Iterables.size(results);
+    Mockito.verify(mockWindmill).getStateData(expectedRequest2.build());
     Mockito.verifyNoMoreInteractions(mockWindmill);
 
-    assertThat(results, Matchers.containsInAnyOrder(5, 6));
+    assertThat(results, Matchers.contains(5, 6, 7, 8));
   }
 
   @Test
@@ -123,8 +203,7 @@ public void testReadValue() throws Exception {
     Mockito.verifyNoMoreInteractions(mockWindmill);
 
     Windmill.GetDataRequest.Builder expectedRequest = Windmill.GetDataRequest.newBuilder();
-    expectedRequest
-        .addRequestsBuilder()
+    expectedRequest.addRequestsBuilder()
         .setComputationId(COMPUTATION)
         .addRequestsBuilder()
         .setKey(DATA_KEY)
@@ -135,8 +214,7 @@ public void testReadValue() throws Exception {
                 .setStateFamily(STATE_FAMILY)
                 .build());
     Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
-    response
-        .addDataBuilder()
+    response.addDataBuilder()
         .setComputationId(COMPUTATION)
         .addDataBuilder()
         .setKey(DATA_KEY)
@@ -170,14 +248,16 @@ public void testReadWatermark() throws Exception {
             Windmill.WatermarkHold.newBuilder().setTag(STATE_KEY_1).setStateFamily(STATE_FAMILY));
 
     Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
-    response
-        .addDataBuilder().setComputationId(COMPUTATION)
-        .addDataBuilder().setKey(DATA_KEY)
-        .addWatermarkHolds(Windmill.WatermarkHold.newBuilder()
-            .setTag(STATE_KEY_1)
-            .setStateFamily(STATE_FAMILY)
-            .addTimestamps(5000000)
-            .addTimestamps(6000000));
+    response.addDataBuilder()
+        .setComputationId(COMPUTATION)
+        .addDataBuilder()
+        .setKey(DATA_KEY)
+        .addWatermarkHolds(
+            Windmill.WatermarkHold.newBuilder()
+                .setTag(STATE_KEY_1)
+                .setStateFamily(STATE_FAMILY)
+                .addTimestamps(5000000)
+                .addTimestamps(6000000));
 
     Mockito.when(mockWindmill.getStateData(expectedRequest.build())).thenReturn(response.build());
 
@@ -200,19 +280,22 @@ public void testBatching() throws Exception {
         ArgumentCaptor.forClass(Windmill.GetDataRequest.class);
 
     Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
-    response
-        .addDataBuilder().setComputationId(COMPUTATION)
-        .addDataBuilder().setKey(DATA_KEY)
-        .addWatermarkHolds(Windmill.WatermarkHold.newBuilder()
-            .setTag(STATE_KEY_2)
-            .setStateFamily(STATE_FAMILY)
-            .addTimestamps(5000000)
-            .addTimestamps(6000000))
-        .addLists(Windmill.TagList.newBuilder()
-            .setTag(STATE_KEY_1)
-            .setStateFamily(STATE_FAMILY)
-            .addValues(intValue(5, true))
-            .addValues(intValue(100, true)));
+    response.addDataBuilder()
+        .setComputationId(COMPUTATION)
+        .addDataBuilder()
+        .setKey(DATA_KEY)
+        .addWatermarkHolds(
+            Windmill.WatermarkHold.newBuilder()
+                .setTag(STATE_KEY_2)
+                .setStateFamily(STATE_FAMILY)
+                .addTimestamps(5000000)
+                .addTimestamps(6000000))
+        .addLists(
+            Windmill.TagList.newBuilder()
+                .setTag(STATE_KEY_1)
+                .setStateFamily(STATE_FAMILY)
+                .addValues(intValue(5, true))
+                .addValues(intValue(100, true)));
 
     Mockito.when(mockWindmill.getStateData(Mockito.isA(Windmill.GetDataRequest.class)))
         .thenReturn(response.build());
@@ -236,7 +319,7 @@ public void testBatching() throws Exception {
     assertThat(result, Matchers.equalTo(new Instant(5000)));
     Mockito.verifyNoMoreInteractions(mockWindmill);
 
-    assertThat(listFuture.get(), Matchers.containsInAnyOrder(5, 100));
+    assertThat(listFuture.get(), Matchers.contains(5, 100));
     Mockito.verifyNoMoreInteractions(mockWindmill);
 
     // And verify that getting a future again returns the already completed future.
@@ -250,20 +333,15 @@ public void testNoStateFamily() throws Exception {
     Mockito.verifyNoMoreInteractions(mockWindmill);
 
     Windmill.GetDataRequest.Builder expectedRequest = Windmill.GetDataRequest.newBuilder();
-    expectedRequest
-        .addRequestsBuilder()
+    expectedRequest.addRequestsBuilder()
         .setComputationId(COMPUTATION)
         .addRequestsBuilder()
         .setKey(DATA_KEY)
         .setWorkToken(WORK_TOKEN)
         .addValuesToFetch(
-            Windmill.TagValue.newBuilder()
-                .setTag(STATE_KEY_1)
-                .setStateFamily("")
-                .build());
+            Windmill.TagValue.newBuilder().setTag(STATE_KEY_1).setStateFamily("").build());
     Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
-    response
-        .addDataBuilder()
+    response.addDataBuilder()
         .setComputationId(COMPUTATION)
         .addDataBuilder()
         .setKey(DATA_KEY)
@@ -280,7 +358,6 @@ public void testNoStateFamily() throws Exception {
     Mockito.verifyNoMoreInteractions(mockWindmill);
 
     assertThat(result, Matchers.equalTo(8));
-
   }
 
   @Test
@@ -293,9 +370,11 @@ public void testKeyTokenInvalid() throws Exception {
     Mockito.verifyNoMoreInteractions(mockWindmill);
 
     Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
-    response
-        .addDataBuilder().setComputationId(COMPUTATION)
-        .addDataBuilder().setKey(DATA_KEY).setFailed(true);
+    response.addDataBuilder()
+        .setComputationId(COMPUTATION)
+        .addDataBuilder()
+        .setKey(DATA_KEY)
+        .setFailed(true);
 
     Mockito.when(mockWindmill.getStateData(Mockito.isA(Windmill.GetDataRequest.class)))
         .thenReturn(response.build());

From ed51b467d525cd50794c8449b4cda2d2f0ecb806 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Mon, 16 Nov 2015 12:08:27 -0800
Subject: [PATCH 1161/1541] Javadoc cleanup

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107963584
---
 .../sdk/annotations/Experimental.java         |  8 +++---
 .../cloud/dataflow/sdk/transforms/Sample.java |  2 +-
 .../sdk/transforms/join/CoGbkResult.java      | 16 ++++++------
 .../transforms/join/CoGbkResultSchema.java    |  2 +-
 .../sdk/transforms/join/CoGroupByKey.java     | 26 +++++++++----------
 5 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java
index d524715066fec..a75a5c9980a28 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java
@@ -30,10 +30,10 @@
  * about the quality or performance of the API in question, only the fact that
  * it is not "API-frozen."
  *
- * <p>It is generally safe for <i>applications</i> to depend on beta APIs, at
- * the cost of some extra work during upgrades. However, it is generally
- * inadvisable for <i>libraries</i> (which get included on users' class paths,
- * outside the library developers' control) to do so.
+ * <p>It is generally safe for <i>applications</i> to depend on experimental
+ * APIs, at the cost of some extra work during upgrades. However, it is
+ * generally inadvisable for <i>libraries</i> (which get included on users'
+ * class paths, outside the library developers' control) to do so.
  */
 @Retention(RetentionPolicy.CLASS)
 @Target({
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
index 4de64253268ca..c5b6e7ec14142 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
@@ -55,7 +55,7 @@ public class Sample {
    * <pre> {@code
    * PCollection<String> input = ...;
    * PCollection<String> output = input.apply(Sample.<String>any(100));
-   * } </pre>bla
+   * } </pre>
    *
    * @param <T> the type of the elements of the input and output
    * {@code PCollection}s
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
index ccbb723831d34..f840108248752 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
@@ -65,7 +65,7 @@ public class CoGbkResult {
   private static final Logger LOG = LoggerFactory.getLogger(CoGbkResult.class);
 
   /**
-   * A row in the PCollection resulting from a CoGroupByKey transform.
+   * A row in the {@link PCollection} resulting from a {@link CoGroupByKey} transform.
    * Currently, this row must fit into memory.
    *
    * @param schema the set of tuple tags used to refer to input tables and
@@ -156,7 +156,7 @@ public boolean isEmpty() {
   }
 
   /**
-   * Returns the schema used by this CoGbkResult.
+   * Returns the schema used by this {@link CoGbkResult}.
    */
   public CoGbkResultSchema getSchema() {
     return schema;
@@ -172,7 +172,7 @@ public String toString() {
    * {@code TupleTag<V>} as an {@code Iterable<V>} (which may be empty if there
    * are no results).
    *
-   * <p>If tag was not part of the original CoGroupByKey,
+   * <p>If tag was not part of the original {@link CoGroupByKey},
    * throws an IllegalArgumentException.
    */
   public <V> Iterable<V> getAll(TupleTag<V> tag) {
@@ -190,7 +190,7 @@ public <V> Iterable<V> getAll(TupleTag<V> tag) {
    * If there is a singleton value for the given tag, returns it.
    * Otherwise, throws an IllegalArgumentException.
    *
-   * <p>If tag was not part of the original CoGroupByKey,
+   * <p>If tag was not part of the original {@link CoGroupByKey},
    * throws an IllegalArgumentException.
    */
   public <V> V getOnly(TupleTag<V> tag) {
@@ -201,7 +201,7 @@ public <V> V getOnly(TupleTag<V> tag) {
    * If there is a singleton value for the given tag, returns it.  If there is
    * no value for the given tag, returns the defaultValue.
    *
-   * <p>If tag was not part of the original CoGroupByKey,
+   * <p>If tag was not part of the original {@link CoGroupByKey},
    * throws an IllegalArgumentException.
    */
   public <V> V getOnly(TupleTag<V> tag, V defaultValue) {
@@ -217,7 +217,7 @@ public static class CoGbkResultCoder extends StandardCoder<CoGbkResult> {
     private final UnionCoder unionCoder;
 
     /**
-     * Returns a CoGbkResultCoder for the given schema and unionCoder.
+     * Returns a {@link CoGbkResultCoder} for the given schema and {@link UnionCoder}.
      */
     public static CoGbkResultCoder of(
         CoGbkResultSchema schema,
@@ -331,7 +331,7 @@ public static <V> CoGbkResult of(TupleTag<V> tag, List<V> data) {
   }
 
   /**
-   * Returns a new CoGbkResult based on this, with the given tag and given data
+   * Returns a new {@link CoGbkResult} based on this, with the given tag and given data
    * added to it.
    */
   public <V> CoGbkResult and(TupleTag<V> tag, List<V> data) {
@@ -348,7 +348,7 @@ public <V> CoGbkResult and(TupleTag<V> tag, List<V> data) {
   }
 
   /**
-   * Returns an empty CoGbkResult.
+   * Returns an empty {@link CoGbkResult}.
    */
   public static <V> CoGbkResult empty() {
     return new CoGbkResult(new CoGbkResultSchema(TupleTagList.empty()),
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
index 79b3e1b7bc961..2860ba70e1e0f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
@@ -80,7 +80,7 @@ public int getIndex(TupleTag<?> tag) {
   }
 
   /**
-   * Returns the JoinTupleTag at the given index.
+   * Returns the tuple tag at the given index.
    */
   public TupleTag<?> getTag(int index) {
     return tupleTagList.get(index);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
index 5736e911f8f28..b84068295a238 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
@@ -43,30 +43,30 @@
  * <p>Example of performing a {@link CoGroupByKey} followed by a
  * {@link ParDo} that consumes
  * the results:
- * <pre> <code>
- * {@literal PCollection<KV<K, V1>>} pt1 = ...;
- * {@literal PCollection<KV<K, V2>>} pt2 = ...;
+ * <pre> {@code
+ * PCollection<KV<K, V1>> pt1 = ...;
+ * PCollection<KV<K, V2>> pt2 = ...;
  *
- * final {@literal TupleTag<V1>} t1 = new {@literal TupleTag<>()};
- * final {@literal TupleTag<V2>} t2 = new {@literal TupleTag<>()};
- * {@literal PCollection<KV<K, CoGbkResult>>} coGbkResultCollection =
+ * final TupleTag<V1> t1 = new TupleTag<>();
+ * final TupleTag<V2> t2 = new TupleTag<>();
+ * PCollection<KV<K, CoGbkResult>> coGbkResultCollection =
  *   KeyedPCollectionTuple.of(t1, pt1)
  *                        .and(t2, pt2)
- *                        .apply({@literal CoGroupByKey.<K>create()});
+ *                        .apply(CoGroupByKey.<K>create());
  *
- * {@literal PCollection<T>} finalResultCollection =
+ * PCollection<T> finalResultCollection =
  *   coGbkResultCollection.apply(ParDo.of(
- *     new {@literal DoFn<KV<K, CoGbkResult>, T>()} {
- *       {@literal @}Override
+ *     new DoFn<KV<K, CoGbkResult>, T>() {
+ *       @Override
  *       public void processElement(ProcessContext c) {
- *         {@literal KV<K, CoGbkResult>} e = c.element();
- *         {@literal Iterable<V1>} pt1Vals = e.getValue().getAll(t1);
+ *         KV<K, CoGbkResult> e = c.element();
+ *         Iterable<V1> pt1Vals = e.getValue().getAll(t1);
  *         V2 pt2Val = e.getValue().getOnly(t2);
  *          ... Do Something ....
  *         c.output(...some T...);
  *       }
  *     }));
- * </code> </pre>
+ * } </pre>
  *
  * @param <K> the type of the keys in the input and output
  * {@code PCollection}s

From e8be9824cebdd88aaad8d19db5df126cb7407aff Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Mon, 16 Nov 2015 14:14:57 -0800
Subject: [PATCH 1162/1541] Add WithTimestamps PTransform

This transform eases application of Timestamps based on a property of
the input element by automatically applying a function to each element
in the input and emitting it in the output with the timestamp provided
by a specified timestamp fn.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107976591
---
 .../sdk/transforms/WithTimestamps.java        | 129 +++++++++++
 .../sdk/transforms/WithTimestampsTest.java    | 210 ++++++++++++++++++
 .../transforms/WithTimestampsJava8Test.java   |  66 ++++++
 3 files changed, 405 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithTimestamps.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithTimestampsTest.java
 create mode 100644 sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/WithTimestampsJava8Test.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithTimestamps.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithTimestamps.java
new file mode 100644
index 0000000000000..8b581b4ea47a3
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithTimestamps.java
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import com.google.cloud.dataflow.sdk.io.Source;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+/**
+ * A {@link PTransform} for assigning timestamps to all the elements of a {@link PCollection}.
+ *
+ * <p>Timestamps are used to assign {@link BoundedWindow Windows} to elements within the
+ * {@link Window#into(com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn)}
+ * {@link PTransform}. Assigning timestamps is useful when the input data set comes from a
+ * {@link Source} without implicit timestamps (such as
+ * {@link com.google.cloud.dataflow.sdk.io.TextIO.Read TextIO}).
+ *
+ */
+public class WithTimestamps<T> extends PTransform<PCollection<T>, PCollection<T>> {
+  /**
+   * For a {@link SerializableFunction} {@code fn} from {@code T} to {@link Instant}, outputs a
+   * {@link PTransform} that takes an input {@link PCollection PCollection&lt;T&gt;} and outputs a
+   * {@link PCollection PCollection&lt;T&gt;} containing every element {@code v} in the input where
+   * each element is output with a timestamp obtained as the result of {@code fn.apply(v)}.
+   *
+   * <p>If the input {@link PCollection} elements have timestamps, the output timestamp for each
+   * element must not be before the input element's timestamp minus the value of
+   * {@link #getAllowedTimestampSkew()}. If an output timestamp is before this time, the transform
+   * will throw an {@link IllegalArgumentException} when executed. Use
+   * {@link #withAllowedTimestampSkew(Duration)} to update the allowed skew.
+   *
+   * <p>Each output element will be in the same windows as the input element. If a new window based
+   * on the new output timestamp is desired, apply a new instance of {@link Window#into(WindowFn)}.
+   *
+   * <p>This transform will fail at execution time with a {@link NullPointerException} if for any
+   * input element the result of {@code fn.apply(v)} is {@code null}.
+   *
+   * <p>Example of use in Java 8:
+   * <pre>{@code
+   * PCollection<Record> timestampedRecords = records.apply(
+   *     TimestampElements.via((Record rec) -> rec.getInstant());
+   * }</pre>
+   */
+  public static <T> WithTimestamps<T> of(SerializableFunction<T, Instant> fn) {
+    return new WithTimestamps<>(fn, Duration.ZERO);
+  }
+
+  ///////////////////////////////////////////////////////////////////
+
+  private final SerializableFunction<T, Instant> fn;
+  private final Duration allowedTimestampSkew;
+
+  private WithTimestamps(SerializableFunction<T, Instant> fn, Duration allowedTimestampSkew) {
+    this.fn = checkNotNull(fn, "WithTimestamps fn cannot be null");
+    this.allowedTimestampSkew = allowedTimestampSkew;
+  }
+
+  /**
+   * Return a new WithTimestamps like this one with updated allowed timestamp skew, which is the
+   * maximum duration that timestamps can be shifted backward. Does not modify this object.
+   *
+   * <p>The default value is {@code Duration.ZERO}, allowing timestamps to only be shifted into the
+   * future. For infinite skew, use {@code new Duration(Long.MAX_VALUE)}.
+   */
+  public WithTimestamps<T> withAllowedTimestampSkew(Duration allowedTimestampSkew) {
+    return new WithTimestamps<>(this.fn, allowedTimestampSkew);
+  }
+
+  /**
+   * Returns the allowed timestamp skew duration, which is the maximum
+   * duration that timestamps can be shifted backwards from the timestamp of the input element.
+   *
+   * @see DoFn#getAllowedTimestampSkew()
+   */
+  public Duration getAllowedTimestampSkew() {
+    return allowedTimestampSkew;
+  }
+
+  @Override
+  public PCollection<T> apply(PCollection<T> input) {
+    return input
+        .apply(ParDo.named("AddTimestamps").of(new AddTimestampsDoFn<T>(fn, allowedTimestampSkew)))
+        .setTypeDescriptorInternal(input.getTypeDescriptor());
+  }
+
+  private static class AddTimestampsDoFn<T> extends DoFn<T, T> {
+    private final SerializableFunction<T, Instant> fn;
+    private final Duration allowedTimestampSkew;
+
+    public AddTimestampsDoFn(SerializableFunction<T, Instant> fn, Duration allowedTimestampSkew) {
+      this.fn = fn;
+      this.allowedTimestampSkew = allowedTimestampSkew;
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      Instant timestamp = fn.apply(c.element());
+      checkNotNull(
+          timestamp, "Timestamps for WithTimestamps cannot be null. Timestamp provided by %s.", fn);
+      c.outputWithTimestamp(c.element(), timestamp);
+    }
+
+    @Override
+    public Duration getAllowedTimestampSkew() {
+      return allowedTimestampSkew;
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithTimestampsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithTimestampsTest.java
new file mode 100644
index 0000000000000..60ec2654f9e4f
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithTimestampsTest.java
@@ -0,0 +1,210 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.hamcrest.Matchers.isA;
+
+import com.google.cloud.dataflow.sdk.Pipeline.PipelineExecutionException;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+
+/**
+ * Tests for {@link WithTimestamps}.
+ */
+@RunWith(JUnit4.class)
+public class WithTimestampsTest implements Serializable {
+  @Rule
+  public transient ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void withTimestampsShouldApplyTimestamps() {
+    TestPipeline p = TestPipeline.create();
+
+    SerializableFunction<String, Instant> timestampFn =
+        new SerializableFunction<String, Instant>() {
+          @Override
+          public Instant apply(String input) {
+            return new Instant(Long.valueOf(input));
+          }
+        };
+
+    String yearTwoThousand = "946684800000";
+    PCollection<String> timestamped =
+        p.apply(Create.of("1234", "0", Integer.toString(Integer.MAX_VALUE), yearTwoThousand))
+         .apply(WithTimestamps.of(timestampFn));
+
+    PCollection<KV<String, Instant>> timestampedVals =
+        timestamped.apply(ParDo.of(new DoFn<String, KV<String, Instant>>() {
+          @Override
+          public void processElement(DoFn<String, KV<String, Instant>>.ProcessContext c)
+              throws Exception {
+            c.output(KV.of(c.element(), c.timestamp()));
+          }
+        }));
+
+    DataflowAssert.that(timestamped)
+        .containsInAnyOrder(yearTwoThousand, "0", "1234", Integer.toString(Integer.MAX_VALUE));
+    DataflowAssert.that(timestampedVals)
+        .containsInAnyOrder(
+            KV.of("0", new Instant(0)),
+            KV.of("1234", new Instant(1234L)),
+            KV.of(Integer.toString(Integer.MAX_VALUE), new Instant(Integer.MAX_VALUE)),
+            KV.of(yearTwoThousand, new Instant(Long.valueOf(yearTwoThousand))));
+
+    p.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void withTimestampsBackwardsInTimeShouldThrow() {
+    TestPipeline p = TestPipeline.create();
+
+    SerializableFunction<String, Instant> timestampFn =
+        new SerializableFunction<String, Instant>() {
+          @Override
+          public Instant apply(String input) {
+            return new Instant(Long.valueOf(input));
+          }
+        };
+    SerializableFunction<String, Instant> backInTimeFn =
+        new SerializableFunction<String, Instant>() {
+          @Override
+          public Instant apply(String input) {
+            return new Instant(Long.valueOf(input)).minus(Duration.millis(1000L));
+          }
+        };
+
+
+    String yearTwoThousand = "946684800000";
+
+    p.apply(Create.of("1234", "0", Integer.toString(Integer.MAX_VALUE), yearTwoThousand))
+     .apply("WithTimestamps", WithTimestamps.of(timestampFn))
+     .apply("AddSkew", WithTimestamps.of(backInTimeFn));
+
+    thrown.expect(PipelineExecutionException.class);
+    thrown.expectCause(isA(IllegalArgumentException.class));
+    thrown.expectMessage("no earlier than the timestamp of the current input");
+
+    p.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void withTimestampsBackwardsInTimeAndWithAllowedTimestampSkewShouldSucceed() {
+    TestPipeline p = TestPipeline.create();
+
+    SerializableFunction<String, Instant> timestampFn =
+        new SerializableFunction<String, Instant>() {
+          @Override
+          public Instant apply(String input) {
+            return new Instant(Long.valueOf(input));
+          }
+        };
+
+    final Duration skew = Duration.millis(1000L);
+    SerializableFunction<String, Instant> backInTimeFn =
+        new SerializableFunction<String, Instant>() {
+          @Override
+          public Instant apply(String input) {
+            return new Instant(Long.valueOf(input)).minus(skew);
+          }
+        };
+
+    String yearTwoThousand = "946684800000";
+    PCollection<String> timestampedWithSkew =
+        p.apply(Create.of("1234", "0", Integer.toString(Integer.MAX_VALUE), yearTwoThousand))
+         .apply("FirstTimestamp", WithTimestamps.of(timestampFn))
+         .apply(
+             "WithSkew",
+             WithTimestamps.of(backInTimeFn).withAllowedTimestampSkew(skew.plus(100L)));
+
+    PCollection<KV<String, Instant>> timestampedVals =
+        timestampedWithSkew.apply(ParDo.of(new DoFn<String, KV<String, Instant>>() {
+          @Override
+          public void processElement(DoFn<String, KV<String, Instant>>.ProcessContext c)
+              throws Exception {
+            c.output(KV.of(c.element(), c.timestamp()));
+          }
+        }));
+
+    DataflowAssert.that(timestampedWithSkew)
+        .containsInAnyOrder(yearTwoThousand, "0", "1234", Integer.toString(Integer.MAX_VALUE));
+    DataflowAssert.that(timestampedVals)
+        .containsInAnyOrder(
+            KV.of("0", new Instant(0L).minus(skew)),
+            KV.of("1234", new Instant(1234L).minus(skew)),
+            KV.of(
+                Integer.toString(Integer.MAX_VALUE),
+                new Instant(Long.valueOf(Integer.MAX_VALUE)).minus(skew)),
+            KV.of(yearTwoThousand, new Instant(Long.valueOf(yearTwoThousand)).minus(skew)));
+
+    p.run();
+  }
+
+  @Test
+  public void withTimestampsWithNullTimestampShouldThrow() {
+    SerializableFunction<String, Instant> timestampFn =
+        new SerializableFunction<String, Instant>() {
+          @Override
+          public Instant apply(String input) {
+            return null;
+          }
+        };
+
+    TestPipeline p = TestPipeline.create();
+    String yearTwoThousand = "946684800000";
+    p.apply(Create.of("1234", "0", Integer.toString(Integer.MAX_VALUE), yearTwoThousand))
+     .apply(WithTimestamps.of(timestampFn));
+
+    thrown.expect(PipelineExecutionException.class);
+    thrown.expectCause(isA(NullPointerException.class));
+    thrown.expectMessage("WithTimestamps");
+    thrown.expectMessage("cannot be null");
+
+    p.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void withTimestampsWithNullFnShouldThrowOnConstruction() {
+    TestPipeline p = TestPipeline.create();
+
+    SerializableFunction<String, Instant> timestampFn = null;
+
+    thrown.expect(NullPointerException.class);
+    thrown.expectMessage("WithTimestamps fn cannot be null");
+
+    p.apply(Create.of("1234", "0", Integer.toString(Integer.MAX_VALUE)))
+     .apply(WithTimestamps.of(timestampFn));
+
+    p.run();
+  }
+}
diff --git a/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/WithTimestampsJava8Test.java b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/WithTimestampsJava8Test.java
new file mode 100644
index 0000000000000..50b5ff737f762
--- /dev/null
+++ b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/WithTimestampsJava8Test.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+
+/**
+ * Java 8 tests for {@link WithTimestamps}.
+ */
+@RunWith(JUnit4.class)
+public class WithTimestampsJava8Test implements Serializable {
+  @Test
+  @Category(RunnableOnService.class)
+  public void withTimestampsLambdaShouldApplyTimestamps() {
+    TestPipeline p = TestPipeline.create();
+
+    String yearTwoThousand = "946684800000";
+    PCollection<String> timestamped =
+        p.apply(Create.of("1234", "0", Integer.toString(Integer.MAX_VALUE), yearTwoThousand))
+         .apply(WithTimestamps.of((String input) -> new Instant(Long.valueOf(yearTwoThousand))));
+
+    PCollection<KV<String, Instant>> timestampedVals =
+        timestamped.apply(ParDo.of(new DoFn<String, KV<String, Instant>>() {
+          @Override
+          public void processElement(DoFn<String, KV<String, Instant>>.ProcessContext c)
+              throws Exception {
+            c.output(KV.of(c.element(), c.timestamp()));
+          }
+        }));
+
+    DataflowAssert.that(timestamped)
+        .containsInAnyOrder(yearTwoThousand, "0", "1234", Integer.toString(Integer.MAX_VALUE));
+    DataflowAssert.that(timestampedVals)
+        .containsInAnyOrder(
+            KV.of("0", new Instant(0)),
+            KV.of("1234", new Instant("1234")),
+            KV.of(Integer.toString(Integer.MAX_VALUE), new Instant(Integer.MAX_VALUE)),
+            KV.of(yearTwoThousand, new Instant(Long.valueOf(yearTwoThousand))));
+  }
+}
+

From 3a559b379cd376741a7996566890b12ddb3692eb Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 16 Nov 2015 15:47:35 -0800
Subject: [PATCH 1163/1541] DataflowPipelineWorkerPoolOptions: add
 TEARDOWN_ON_SUCCESS option

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107985832
---
 .../DataflowPipelineWorkerPoolOptions.java       | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index 1950352519ab0..d089491c3a156 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -143,8 +143,22 @@ public String getAlgorithm() {
    * The policy for tearing down the workers spun up by the service.
    */
   public enum TeardownPolicy {
+    /**
+     * All VMs created for a Dataflow job are deleted when the job finishes, regardless of whether
+     * it fails or succeeds.
+     */
     TEARDOWN_ALWAYS("TEARDOWN_ALWAYS"),
-    TEARDOWN_NEVER("TEARDOWN_NEVER");
+    /**
+     * All VMs created for a Dataflow job are left running when the job finishes, regardless of
+     * whether it fails or succeeds.
+     */
+    TEARDOWN_NEVER("TEARDOWN_NEVER"),
+    /**
+     * All VMs created for a Dataflow job are deleted when the job succeeds, but are left running
+     * when it fails. (This is typically used for debugging failing jobs by SSHing into the
+     * workers.)
+     */
+    TEARDOWN_ON_SUCCESS("TEARDOWN_ON_SUCCESS");
 
     private final String teardownPolicy;
 

From f86a22d7af9c14bbfebfd159fbe6eebe0d8ad262 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 16 Nov 2015 16:29:24 -0800
Subject: [PATCH 1164/1541] Move version numbering into parent pom

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107989908
---
 examples/pom.xml | 33 ++++++++++++-----------------
 pom.xml          | 18 +++++++++++++++-
 sdk/pom.xml      | 55 +++++++++++++++++++-----------------------------
 3 files changed, 52 insertions(+), 54 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index c107589140492..6b16defd203ee 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -368,8 +368,7 @@
     <dependency>
       <groupId>com.google.api-client</groupId>
       <artifactId>google-api-client</artifactId>
-      <!-- If updating version, please update the javadoc offlineLink -->
-      <version>1.20.0</version>
+      <version>${google-clients.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.20.0 -->
@@ -383,7 +382,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-dataflow</artifactId>
-      <version>v1b3-rev8-1.20.0</version>
+      <version>${dataflow.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.20.0 -->
@@ -397,8 +396,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-bigquery</artifactId>
-      <!-- If updating version, please update the javadoc offlineLink -->
-      <version>v2-rev238-1.20.0</version>
+      <version>${bigquery.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.20.0 -->
@@ -412,7 +410,7 @@
     <dependency>
       <groupId>com.google.http-client</groupId>
       <artifactId>google-http-client</artifactId>
-      <version>1.20.0</version>
+      <version>${google-clients.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.20.0 -->
@@ -426,15 +424,13 @@
     <dependency>
       <groupId>org.apache.avro</groupId>
       <artifactId>avro</artifactId>
-      <!-- If updating version, please update the javadoc offlineLink -->
-      <version>1.7.7</version>
+      <version>${avro.version}</version>
     </dependency>
 
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-datastore-protobuf</artifactId>
-      <!-- If updating version, please update the javadoc offlineLink -->
-      <version>v1beta2-rev1-2.1.2</version>
+      <version>${datastore.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.20.0 -->
@@ -448,7 +444,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-pubsub</artifactId>
-      <version>v1-rev3-1.20.0</version>
+      <version>${pubsub.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.20.0 -->
@@ -462,27 +458,25 @@
     <dependency>
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
-      <!-- If updating version, please update the javadoc offlineLink -->
-      <version>18.0</version>
+      <version>${guava.version}</version>
     </dependency>
 
     <dependency>
       <groupId>joda-time</groupId>
       <artifactId>joda-time</artifactId>
-      <!-- If updating version, please update the javadoc offlineLink -->
-      <version>2.4</version>
+      <version>${joda.version}</version>
     </dependency>
 
     <dependency>
       <groupId>org.slf4j</groupId>
       <artifactId>slf4j-api</artifactId>
-      <version>1.7.7</version>
+      <version>${slf4j.version}</version>
     </dependency>
 
     <dependency>
       <groupId>org.slf4j</groupId>
       <artifactId>slf4j-jdk14</artifactId>
-      <version>1.7.7</version>
+      <version>${slf4j.version}</version>
       <scope>runtime</scope>
     </dependency>
 
@@ -492,14 +486,13 @@
     <dependency>
       <groupId>org.hamcrest</groupId>
       <artifactId>hamcrest-all</artifactId>
-      <!-- If updating version, please update the javadoc offlineLink -->
-      <version>1.3</version>
+      <version>${hamcrest.version}</version>
     </dependency>
 
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
-      <version>4.11</version>
+      <version>${junit.version}</version>
     </dependency>
   </dependencies>
 </project>
diff --git a/pom.xml b/pom.xml
index a936936f25d8a..ea75ee107c6c7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -65,6 +65,22 @@
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     <dataflow.javadoc_opts></dataflow.javadoc_opts>
+
+    <!-- If updating dependencies, please update any relevant javadoc offlineLinks -->
+    <avro.version>1.7.7</avro.version>
+    <bigquery.version>v2-rev238-1.20.0</bigquery.version>
+    <dataflow.version>v1b3-rev10-1.20.0</dataflow.version>
+    <datastore.version>v1beta2-rev1-3.0.2</datastore.version>
+    <google-clients.version>1.20.0</google-clients.version>
+    <guava.version>18.0</guava.version>
+    <hamcrest.version>1.3</hamcrest.version>
+    <jackson.version>2.4.2</jackson.version>
+    <joda.version>2.4</joda.version>
+    <junit.version>4.11</junit.version>
+    <protobuf.version>2.5.0</protobuf.version>
+    <pubsub.version>v1-rev3-1.20.0</pubsub.version>
+    <slf4j.version>1.7.7</slf4j.version>
+    <storage.version>v1-rev25-1.19.1</storage.version>
   </properties>
 
   <packaging>pom</packaging>
@@ -227,7 +243,7 @@
                   <pluginExecutionFilter>
                     <groupId>org.apache.avro</groupId>
                     <artifactId>avro-maven-plugin</artifactId>
-                    <versionRange>[1.7.7,)</versionRange>
+                    <versionRange>${avro.version}</versionRange>
                     <goals>
                       <goal>schema</goal>
                     </goals>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 6b95db6e6b55e..4fcb4bdc13a47 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -370,7 +370,7 @@
       <plugin>
         <groupId>org.apache.avro</groupId>
         <artifactId>avro-maven-plugin</artifactId>
-        <version>1.7.7</version>
+        <version>${avro.version}</version>
         <executions>
           <execution>
             <id>schemas</id>
@@ -416,7 +416,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-dataflow</artifactId>
-      <version>v1b3-rev10-1.20.0</version>
+      <version>${dataflow.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
@@ -436,8 +436,7 @@
     <dependency>
       <groupId>com.google.api-client</groupId>
       <artifactId>google-api-client</artifactId>
-      <!-- If updating version, please update the javadoc offlineLink -->
-      <version>1.20.0</version>
+      <version>${google-clients.version}</version>
       <exclusions>
         <exclusion>
           <groupId>com.google.guava</groupId>
@@ -449,8 +448,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-bigquery</artifactId>
-      <!-- If updating version, please update the javadoc offlineLink -->
-      <version>v2-rev238-1.20.0</version>
+      <version>${bigquery.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.20.0 -->
@@ -464,7 +462,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-pubsub</artifactId>
-      <version>v1-rev3-1.20.0</version>
+      <version>${pubsub.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.20.0 -->
@@ -478,7 +476,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-storage</artifactId>
-      <version>v1-rev25-1.19.1</version>
+      <version>${storage.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.20.0 -->
@@ -492,7 +490,7 @@
     <dependency>
       <groupId>com.google.http-client</groupId>
       <artifactId>google-http-client-jackson2</artifactId>
-      <version>1.20.0</version>
+      <version>${google-clients.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.20.0 -->
@@ -506,14 +504,13 @@
     <dependency>
       <groupId>com.google.http-client</groupId>
       <artifactId>google-http-client</artifactId>
-      <version>1.20.0</version>
+      <version>${google-clients.version}</version>
     </dependency>
 
     <dependency>
       <groupId>com.google.oauth-client</groupId>
       <artifactId>google-oauth-client-java6</artifactId>
-      <!-- If updating version, please update the javadoc offlineLink -->
-      <version>1.20.0</version>
+      <version>${google-clients.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.20.0 -->
@@ -527,8 +524,7 @@
     <dependency>
       <groupId>com.google.oauth-client</groupId>
       <artifactId>google-oauth-client</artifactId>
-      <!-- If updating version, please update the javadoc offlineLink -->
-      <version>1.20.0</version>
+      <version>${google-clients.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.20.0 -->
@@ -542,8 +538,7 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-datastore-protobuf</artifactId>
-      <!-- If updating version, please update the javadoc offlineLink -->
-      <version>v1beta2-rev1-3.0.2</version>
+      <version>${datastore.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
              in by a transitive dependency google-api-client 1.19.0 -->
@@ -570,47 +565,43 @@
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
       <!-- If updating version, please update the javadoc offlineLink -->
-      <version>18.0</version>
+      <version>${guava.version}</version>
     </dependency>
 
     <dependency>
       <groupId>com.google.protobuf</groupId>
       <artifactId>protobuf-java</artifactId>
-      <version>2.5.0</version>
+      <version>${protobuf.version}</version>
     </dependency>
 
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-core</artifactId>
-      <!-- If updating version, please update the javadoc offlineLink -->
-      <version>2.4.2</version>
+      <version>${jackson.version}</version>
     </dependency>
 
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-annotations</artifactId>
-      <!-- If updating version, please update the javadoc offlineLink -->
-      <version>2.4.2</version>
+      <version>${jackson.version}</version>
     </dependency>
 
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-databind</artifactId>
-      <!-- If updating version, please update the javadoc offlineLink -->
-      <version>2.4.2</version>
+      <version>${jackson.version}</version>
     </dependency>
 
     <dependency>
       <groupId>org.slf4j</groupId>
       <artifactId>slf4j-api</artifactId>
-      <version>1.7.7</version>
+      <version>${slf4j.version}</version>
     </dependency>
 
     <dependency>
       <groupId>org.apache.avro</groupId>
       <artifactId>avro</artifactId>
-      <!-- If updating version, please update the javadoc offlineLink -->
-      <version>1.7.7</version>
+      <version>${avro.version}</version>
     </dependency>
 
     <dependency>
@@ -622,8 +613,7 @@
     <dependency>
       <groupId>joda-time</groupId>
       <artifactId>joda-time</artifactId>
-      <!-- If updating version, please update the javadoc offlineLink -->
-      <version>2.4</version>
+      <version>${joda.version}</version>
     </dependency>
 
     <dependency>
@@ -685,22 +675,21 @@
     <dependency>
       <groupId>org.hamcrest</groupId>
       <artifactId>hamcrest-all</artifactId>
-      <!-- If updating version, please update the javadoc offlineLink -->
-      <version>1.3</version>
+      <version>${hamcrest.version}</version>
       <scope>provided</scope>
     </dependency>
 
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
-      <version>4.11</version>
+      <version>${junit.version}</version>
       <scope>provided</scope>
     </dependency>
 
     <dependency>
       <groupId>org.slf4j</groupId>
       <artifactId>slf4j-jdk14</artifactId>
-      <version>1.7.7</version>
+      <version>${slf4j.version}</version>
       <scope>test</scope>
     </dependency>
 

From 819d649755055379492ee4a4d1aac1bd05bb9932 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Mon, 16 Nov 2015 17:17:12 -0800
Subject: [PATCH 1165/1541] Removes unused ReaderIterator.copy

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107994547
---
 .../sdk/runners/dataflow/CustomSources.java   | 10 ---
 .../sdk/runners/worker/TextReader.java        | 80 +++----------------
 .../sdk/util/common/worker/Reader.java        | 13 ---
 .../sdk/runners/worker/ConcatReaderTest.java  |  5 --
 .../sdk/runners/worker/TextReaderTest.java    | 37 ---------
 5 files changed, 13 insertions(+), 132 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
index d72545c76d2c5..3e14ca615544a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
@@ -523,11 +523,6 @@ public WindowedValue<T> next() throws IOException {
       return iteratorAdapter.next();
     }
 
-    @Override
-    public BoundedReaderIterator<T> copy() throws IOException {
-      throw new UnsupportedOperationException();
-    }
-
     @Override
     public void close() throws IOException {
       reader.close();
@@ -648,11 +643,6 @@ public WindowedValue<ValueWithRecordId<T>> next() throws IOException {
           new ValueWithRecordId<>(result.getValue(), reader.getCurrentRecordId()));
     }
 
-    @Override
-    public UnboundedReaderIterator<T> copy() throws IOException {
-      throw new UnsupportedOperationException();
-    }
-
     @Override
     public void close() { }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
index b7e9aaab3472d..c9dae7e4fb2d9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
@@ -19,7 +19,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
-import com.google.cloud.dataflow.sdk.util.common.worker.ProgressTracker;
 import com.google.cloud.dataflow.sdk.util.common.worker.ProgressTrackerGroup;
 
 import java.io.ByteArrayOutputStream;
@@ -161,51 +160,20 @@ protected ReaderIterator<T> open(String input) throws IOException {
   }
 
   class TextFileIterator extends FileBasedIterator {
-    private final boolean stripTrailingNewlines;
     private ScanState state;
 
     TextFileIterator(CopyableSeekableByteChannel seeker, boolean stripTrailingNewlines,
         long startOffset, @Nullable Long endOffset,
         FileBasedReader.DecompressingStreamFactory compressionStreamFactory) throws IOException {
-      this(seeker, stripTrailingNewlines, startOffset, startOffset, endOffset,
+      super(seeker, startOffset, startOffset, endOffset,
           new ProgressTrackerGroup<Integer>() {
             @Override
             protected void report(Integer lineLength) {
               notifyElementRead(lineLength.longValue());
             }
-          }.start(),
-          new ScanState(BUF_SIZE, !stripTrailingNewlines),
-          compressionStreamFactory);
-    }
-
-    private TextFileIterator(CopyableSeekableByteChannel seeker, boolean stripTrailingNewlines,
-        long startOffset, long offset, @Nullable Long endOffset, ProgressTracker<Integer> tracker,
-        ScanState state, FileBasedReader.DecompressingStreamFactory compressionStreamFactory)
-            throws IOException {
-      super(seeker, startOffset, offset, endOffset, tracker, compressionStreamFactory);
-
-      this.stripTrailingNewlines = stripTrailingNewlines;
-      this.state = state;
-    }
+          }.start(), compressionStreamFactory);
 
-    private TextFileIterator(TextFileIterator it) throws IOException {
-      // Correctly adjust the start position of the seeker given
-      // that it may hold bytes that have been read and now reside
-      // in the read buffer (that is copied during cloning).
-      this(
-          it.seeker.copy(),
-          it.stripTrailingNewlines,
-          it.rangeTracker.getStartPosition() + it.state.totalBytesRead,
-          it.offset,
-          it.rangeTracker.getStopPosition(),
-          it.progressTracker.copy(),
-          it.state.copy(),
-          it.compressionStreamFactory);
-    }
-
-    @Override
-    public ReaderIterator<T> copy() throws IOException {
-      return new TextFileIterator(this);
+      this.state = new ScanState(BUF_SIZE, stripTrailingNewlines);
     }
 
     /**
@@ -216,7 +184,7 @@ public ReaderIterator<T> copy() throws IOException {
      *
      * @return a {@code ByteArrayOutputStream} containing the contents of the
      *     line, with any line-termination characters stripped if
-     *     keepNewlines==false, or {@code null} if the end of the stream has
+     *     stripTrailingNewlines==true, or {@code null} if the end of the stream has
      *     been reached.
      * @throws IOException if an I/O error occurs
      */
@@ -262,36 +230,16 @@ private static class ScanState {
     private int start; // Valid bytes in buf start at this index
     private int pos; // Where the separator is in the buf (if one was found)
     private int end; // the index of the end of bytes in buf
-    private byte[] buf;
-    private boolean keepNewlines;
+    private final byte[] buf;
+    private final boolean stripTrailingNewlines;
     private byte lastByteRead;
-    private long totalBytesRead;
 
-    public ScanState(int size, boolean keepNewlines) {
+    public ScanState(int size, boolean stripTrailingNewlines) {
       this.start = 0;
       this.pos = 0;
       this.end = 0;
       this.buf = new byte[size];
-      this.keepNewlines = keepNewlines;
-      totalBytesRead = 0;
-    }
-
-    public ScanState copy() {
-      byte[] bufCopy = new byte[buf.length]; // copy :(
-      System.arraycopy(buf, start, bufCopy, start, end - start);
-      return new ScanState(
-          this.keepNewlines, this.start, this.pos, this.end, bufCopy, this.lastByteRead, 0);
-    }
-
-    private ScanState(boolean keepNewlines, int start, int pos, int end, byte[] buf,
-        byte lastByteRead, long totalBytesRead) {
-      this.start = start;
-      this.pos = pos;
-      this.end = end;
-      this.buf = buf;
-      this.keepNewlines = keepNewlines;
-      this.lastByteRead = lastByteRead;
-      this.totalBytesRead = totalBytesRead;
+      this.stripTrailingNewlines = stripTrailingNewlines;
     }
 
     public boolean readBytes(PushbackInputStream stream) throws IOException {
@@ -303,7 +251,6 @@ public boolean readBytes(PushbackInputStream stream) throws IOException {
       if (bytesRead == -1) {
         return false;
       }
-      totalBytesRead += bytesRead;
       end += bytesRead;
       return true;
     }
@@ -348,11 +295,11 @@ public byte lastByteRead() {
     /**
      * Copies data from the input buffer to the output buffer.
      *
-     * <p>If keepNewlines==true, line-termination characters are included in the copy.
+     * <p>If stripTrailing==false, line-termination characters are included in the copy.
      */
     private void copyToOutputBuffer(ByteArrayOutputStream out) {
       int charsCopied = pos - start;
-      if (keepNewlines && separatorFound()) {
+      if (!stripTrailingNewlines && separatorFound()) {
         charsCopied++;
       }
       out.write(buf, start, charsCopied);
@@ -362,7 +309,7 @@ private void copyToOutputBuffer(ByteArrayOutputStream out) {
      * Scans the input buffer to determine if a matched carriage return
      * has an accompanying linefeed and process the input buffer accordingly.
      *
-     * <p>If keepNewlines==true and a linefeed character is detected,
+     * <p>If stripTrailingNewlines==false and a linefeed character is detected,
      * it is included in the copy.
      *
      * @return the number of characters consumed
@@ -376,7 +323,7 @@ private int copyCharIfLinefeed(ByteArrayOutputStream out, PushbackInputStream st
           charsConsumed++;
           pos++;
           start++;
-          if (keepNewlines) {
+          if (!stripTrailingNewlines) {
             out.write('\n');
           }
         }
@@ -386,8 +333,7 @@ private int copyCharIfLinefeed(ByteArrayOutputStream out, PushbackInputStream st
         int b = stream.read();
         if (b == '\n') {
           charsConsumed++;
-          totalBytesRead++;
-          if (keepNewlines) {
+          if (!stripTrailingNewlines) {
             out.write(b);
           }
         } else if (b != -1) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
index 4484529159d8c..2ecc686701130 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
@@ -96,15 +96,6 @@ public interface ReaderIterator<T> extends AutoCloseable {
      */
     public T next() throws IOException, NoSuchElementException;
 
-    /**
-     * Copies the current ReaderIterator.
-     *
-     * @throws UnsupportedOperationException if the particular implementation
-     * does not support copy
-     * @throws IOException if copying the iterator involves IO that fails
-     */
-    public ReaderIterator<T> copy() throws IOException;
-
     @Override
     public void close() throws IOException;
 
@@ -172,10 +163,6 @@ public interface ReaderIterator<T> extends AutoCloseable {
 
   /** An abstract base class for ReaderIterator implementations. */
   public abstract static class AbstractReaderIterator<T> implements ReaderIterator<T> {
-    @Override
-    public ReaderIterator<T> copy() throws IOException {
-      throw new UnsupportedOperationException();
-    }
 
     @Override
     public void close() throws IOException {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
index 46fd969cd4cd8..a8adae4148565 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
@@ -143,11 +143,6 @@ public T next() throws IOException, NoSuchElementException {
         return iteratorImpl.next();
       }
 
-      @Override
-      public ReaderIterator<T> copy() throws IOException {
-        return new TestIterator<T>(iteratorImpl.copy());
-      }
-
       @Override
       public void close() throws IOException {
         isClosed = true;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
index aa44e1b98adf9..abfe2342d7525 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
@@ -345,43 +345,6 @@ private void testStringPayload(String[] lines, String separator, boolean stripNe
     assertEquals(expected, actual);
   }
 
-  @Test
-  public void testCloneIteratorWithEndPositionAndFinalBytesInBuffer() throws Exception {
-    String line = "a\n";
-    boolean stripNewlines = false;
-    File tmpFile = tmpFolder.newFile();
-    List<String> expected = new ArrayList<>();
-    try (PrintStream writer = new PrintStream(new FileOutputStream(tmpFile))) {
-      // Write 5x the size of the buffer and 10 extra trailing bytes
-      for (long bytesWritten = 0; bytesWritten < TextReader.BUF_SIZE * 3 + 10; ) {
-        writer.print(line);
-        expected.add(line);
-        bytesWritten += line.length();
-      }
-    }
-    Long fileSize = tmpFile.length();
-
-    TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), stripNewlines, null,
-        fileSize, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
-
-    List<String> actual = new ArrayList<>();
-    Reader.ReaderIterator<String> iterator = textReader.iterator();
-    while (true) {
-      Reader.ReaderIterator<String> copy;
-      try {
-        if (!iterator.hasNext()) {
-          break;
-        }
-        actual.add(iterator.next());
-        copy = iterator.copy();
-      } finally {
-        iterator.close();
-      }
-      iterator = copy;
-    }
-    assertEquals(expected, actual);
-  }
-
   @Test
   public void testNonStringCoders() throws Exception {
     File tmpFile = tmpFolder.newFile();

From 7dca13caf1495359e1cc70853fb4e5eb74178d87 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Mon, 16 Nov 2015 17:45:40 -0800
Subject: [PATCH 1166/1541] Insert reshards before fixed-shard AvroIO sinks

To decouple parallelism when writing to Avro files with a fixed
number of shards, a (shuffling) reshard is inserted immediately
prior to the write.

----Release Notes----

To decouple parallelism when writing to Avro files with a fixed
number of shards, a (shuffling) reshard is inserted immediately
prior to the write.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=107996893
---
 .../google/cloud/dataflow/sdk/io/AvroIO.java  | 81 ++++++++++++++++---
 .../cloud/dataflow/sdk/io/FileBasedSink.java  | 40 +++++++++
 .../google/cloud/dataflow/sdk/io/TextIO.java  | 42 +---------
 3 files changed, 111 insertions(+), 52 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index 8ee3acd60b742..aaac93b16530a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -464,6 +464,8 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       final int numShards;
       /** Shard template string. */
       final String shardTemplate;
+      /** Insert a shuffle before writing to decouple parallelism when numShards != 0. */
+      final boolean forceReshard;
       /** The class type of the records. */
       final Class<T> type;
       /** The schema of the output file. */
@@ -473,16 +475,18 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       final boolean validate;
 
       Bound(Class<T> type) {
-        this(null, null, "", 0, ShardNameTemplate.INDEX_OF_MAX, type, null, true);
+        this(null, null, "", 0, ShardNameTemplate.INDEX_OF_MAX, true, type, null, true);
       }
 
       Bound(String name, String filenamePrefix, String filenameSuffix, int numShards,
-          String shardTemplate, Class<T> type, Schema schema, boolean validate) {
+          String shardTemplate, boolean forceReshard, Class<T> type, Schema schema,
+          boolean validate) {
         super(name);
         this.filenamePrefix = filenamePrefix;
         this.filenameSuffix = filenameSuffix;
         this.numShards = numShards;
         this.shardTemplate = shardTemplate;
+        this.forceReshard = forceReshard;
         this.type = type;
         this.schema = schema;
         this.validate = validate;
@@ -494,7 +498,8 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
        */
       public Bound<T> named(String name) {
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, validate);
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard,
+            type, schema, validate);
       }
 
       /**
@@ -508,7 +513,8 @@ public Bound<T> named(String name) {
       public Bound<T> to(String filenamePrefix) {
         validateOutputComponent(filenamePrefix);
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, validate);
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard,
+            type, schema, validate);
       }
 
       /**
@@ -522,7 +528,8 @@ public Bound<T> to(String filenamePrefix) {
       public Bound<T> withSuffix(String filenameSuffix) {
         validateOutputComponent(filenameSuffix);
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, validate);
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard, type,
+            schema, validate);
       }
 
       /**
@@ -540,9 +547,32 @@ public Bound<T> withSuffix(String filenameSuffix) {
        * @see ShardNameTemplate
        */
       public Bound<T> withNumShards(int numShards) {
+        return withNumShards(numShards, forceReshard);
+      }
+
+      /**
+       * Returns a new AvroIO.Write PTransform that's like this one but
+       * that uses the provided shard count.
+       *
+       * <p>Constraining the number of shards is likely to reduce
+       * the performance of a pipeline.  If forceReshard is true, the output
+       * will be shuffled to obtain the desired sharding.  If it is false,
+       * data will not be reshuffled, but parallelism of preceeding stages
+       * may be constrained.  Setting this value is not recommended
+       * unless you require a specific number of output files.
+       *
+       * <p>Does not modify this object.
+       *
+       * @param numShards the number of shards to use, or 0 to let the system
+       *                  decide.
+       * @param forceReshard whether to force a reshard to obtain the desired sharding.
+       * @see ShardNameTemplate
+       */
+      private Bound<T> withNumShards(int numShards, boolean forceReshard) {
         Preconditions.checkArgument(numShards >= 0);
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, validate);
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard,
+            type, schema, validate);
       }
 
       /**
@@ -555,7 +585,8 @@ public Bound<T> withNumShards(int numShards) {
        */
       public Bound<T> withShardNameTemplate(String shardTemplate) {
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, validate);
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard,
+            type, schema, validate);
       }
 
       /**
@@ -568,7 +599,23 @@ public Bound<T> withShardNameTemplate(String shardTemplate) {
        * <p>Does not modify this object.
        */
       public Bound<T> withoutSharding() {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, 1, "", type, schema, validate);
+        return withoutSharding(forceReshard);
+      }
+
+      /**
+       * Returns a new AvroIO.Write PTransform that's like this one but
+       * that forces a single file as output.
+       *
+       * <p>This is a shortcut for
+       * {@code .withNumShards(1, forceReshard).withShardNameTemplate("")}
+       *
+       * <p>Does not modify this object.
+       *
+       * @param forceReshard whether to force a reshard to obtain the desired sharding.
+       */
+      private Bound<T> withoutSharding(boolean forceReshard) {
+        return new Bound<>(name, filenamePrefix, filenameSuffix, 1, "", forceReshard,
+            type, schema, validate);
       }
 
       /**
@@ -579,8 +626,8 @@ public Bound<T> withoutSharding() {
        * @param <X> the type of the elements of the input PCollection
        */
       public <X> Bound<X> withSchema(Class<X> type) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type,
-            ReflectData.get().getSchema(type), validate);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
+            forceReshard, type, ReflectData.get().getSchema(type), validate);
       }
 
       /**
@@ -590,7 +637,7 @@ public <X> Bound<X> withSchema(Class<X> type) {
        */
       public Bound<GenericRecord> withSchema(Schema schema) {
         return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
-            GenericRecord.class, schema, validate);
+            forceReshard, GenericRecord.class, schema, validate);
       }
 
       /**
@@ -613,7 +660,8 @@ public Bound<GenericRecord> withSchema(String schema) {
        */
       public Bound<T> withoutValidation() {
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, false);
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard,
+            type, schema, false);
       }
 
       @Override
@@ -626,7 +674,14 @@ public PDone apply(PCollection<T> input) {
           throw new IllegalStateException("need to set the schema of an AvroIO.Write transform");
         }
 
-        return PDone.in(input.getPipeline());
+        if (numShards > 0 && forceReshard) {
+          // Reshard and re-apply a version of this write without resharding.
+          return input
+              .apply(new FileBasedSink.ReshardForWrite<T>())
+              .apply(withNumShards(numShards, false));
+        } else {
+          return PDone.in(input.getPipeline());
+        }
       }
 
       /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
index 7d7ecd18ce2ad..f14f4bf7a00bc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
@@ -28,6 +28,13 @@
 import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
 import com.google.cloud.dataflow.sdk.options.GcsOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.FileIOChannelFactory;
 import com.google.cloud.dataflow.sdk.util.GcsIOChannelFactory;
 import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
@@ -35,6 +42,8 @@
 import com.google.cloud.dataflow.sdk.util.MimeTypes;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.hadoop.util.ApiErrorExtractor;
 import com.google.common.base.Preconditions;
 
@@ -795,4 +804,35 @@ public void flush() throws IOException {
       flushIfPossible();
     }
   }
+
+  static class ReshardForWrite<T> extends PTransform<PCollection<T>, PCollection<T>> {
+    public PCollection<T> apply(PCollection<T> input) {
+      return input
+          // TODO: This would need to be adapted to write per-window shards.
+          .apply(Window.<T>into(new GlobalWindows())
+                       .triggering(DefaultTrigger.of())
+                       .discardingFiredPanes())
+          .apply("RandomKey", ParDo.of(
+              new DoFn<T, KV<Long, T>>() {
+                transient long counter, step;
+                public void startBundle(Context c) {
+                  counter = (long) (Math.random() * Long.MAX_VALUE);
+                  step = 1 + 2 * (long) (Math.random() * Long.MAX_VALUE);
+                }
+                public void processElement(ProcessContext c) {
+                  counter += step;
+                  c.output(KV.of(counter, c.element()));
+                }
+              }))
+          .apply(GroupByKey.<Long, T>create())
+          .apply("Ungroup", ParDo.of(
+              new DoFn<KV<Long, Iterable<T>>, T>() {
+                public void processElement(ProcessContext c) {
+                  for (T item : c.element().getValue()) {
+                    c.output(item);
+                  }
+                }
+              }));
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 6c3f5ca3fbd14..0196838713cc7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -23,18 +23,11 @@
 import com.google.cloud.dataflow.sdk.runners.worker.FileBasedReader;
 import com.google.cloud.dataflow.sdk.runners.worker.TextReader;
 import com.google.cloud.dataflow.sdk.runners.worker.TextSink;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PDone;
@@ -621,37 +614,6 @@ public Bound<T> withoutValidation() {
             shardTemplate, false);
       }
 
-      private static class ReshardForWrite<T> extends PTransform<PCollection<T>, PCollection<T>> {
-        public PCollection<T> apply(PCollection<T> input) {
-          return input
-              // TODO: This would need to be adapted to write per-window shards.
-              .apply(Window.<T>into(new GlobalWindows())
-                           .triggering(DefaultTrigger.of())
-                           .discardingFiredPanes())
-              .apply("RandomKey", ParDo.of(
-                  new DoFn<T, KV<Long, T>>() {
-                    transient long counter, step;
-                    public void startBundle(Context c) {
-                      counter = (long) (Math.random() * Long.MAX_VALUE);
-                      step = 1 + 2 * (long) (Math.random() * Long.MAX_VALUE);
-                    }
-                    public void processElement(ProcessContext c) {
-                      counter += step;
-                      c.output(KV.of(counter, c.element()));
-                    }
-                  }))
-              .apply(GroupByKey.<Long, T>create())
-              .apply("Ungroup", ParDo.of(
-                  new DoFn<KV<Long, Iterable<T>>, T>() {
-                    public void processElement(ProcessContext c) {
-                      for (T item : c.element().getValue()) {
-                        c.output(item);
-                      }
-                    }
-                  }));
-        }
-      }
-
       @Override
       public PDone apply(PCollection<T> input) {
         if (filenamePrefix == null) {
@@ -660,7 +622,9 @@ public PDone apply(PCollection<T> input) {
         }
         if (numShards > 0 && forceReshard) {
           // Reshard and re-apply a version of this write without resharding.
-          return input.apply(new ReshardForWrite<T>()).apply(withNumShards(numShards, false));
+          return input
+              .apply(new FileBasedSink.ReshardForWrite<T>())
+              .apply(withNumShards(numShards, false));
         } else {
           return PDone.in(input.getPipeline());
         }

From 610703ae21f987b94351998dfe610fe8d9eb9196 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Tue, 17 Nov 2015 00:34:34 -0800
Subject: [PATCH 1167/1541] Merges FileBasedReader into TextReader

TextReader has always been its only subclass.

Note that this change only concerns the "native" FileBasedReader class,
which is an implementation detail and not part of the public SDK API.

This change does not affect the class used for custom file-based formats,
com.google.cloud.dataflow.sdk.io.FileBasedSource.FileBasedReader.

Also deletes an implementation-detail property "useDefaultBufferSize"
which was never set to anything but true.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108016126
---
 .../google/cloud/dataflow/sdk/io/TextIO.java  |   3 +-
 .../sdk/runners/worker/FileBasedReader.java   | 340 ------------------
 .../sdk/runners/worker/TextReader.java        | 283 +++++++++++++--
 .../sdk/runners/worker/TextReaderFactory.java |   1 -
 .../runners/worker/FileBasedReaderTest.java   |  45 ---
 .../dataflow/sdk/util/IOFactoryTest.java      |   2 +-
 6 files changed, 259 insertions(+), 415 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReaderTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 0196838713cc7..11047e4a1ed2f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -20,7 +20,6 @@
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.worker.FileBasedReader;
 import com.google.cloud.dataflow.sdk.runners.worker.TextReader;
 import com.google.cloud.dataflow.sdk.runners.worker.TextSink;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -682,7 +681,7 @@ public void evaluate(
   /**
    * Possible text file compression types.
    */
-  public static enum CompressionType implements FileBasedReader.DecompressingStreamFactory {
+  public static enum CompressionType implements TextReader.DecompressingStreamFactory {
     /**
      * Automatically determine the compression type based on filename extension.
      */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
deleted file mode 100644
index a1c349a4360dd..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReader.java
+++ /dev/null
@@ -1,340 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.api.client.util.Preconditions.checkNotNull;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
-
-import com.google.api.services.dataflow.model.ApproximateProgress;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.io.range.OffsetRangeTracker;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
-import com.google.cloud.dataflow.sdk.util.common.worker.ProgressTracker;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.PushbackInputStream;
-import java.nio.channels.Channels;
-import java.util.Collection;
-
-import javax.annotation.Nullable;
-
-/**
- * Abstract base class for sources that read from files.
- *
- * @param <T> the type of the elements read from the source
- */
-public abstract class FileBasedReader<T> extends Reader<T> {
-  protected static final int BUF_SIZE = 200;
-  protected final String filepattern;
-
-  @Nullable
-  private Collection<String> expandedFilepattern;
-
-  @Nullable
-  protected final Long startPosition;
-
-  @Nullable
-  protected final Long endPosition;
-  protected final Coder<T> coder;
-  protected final boolean useDefaultBufferSize;
-
-  private static final Logger LOG = LoggerFactory.getLogger(FileBasedReader.class);
-
-  protected FileBasedReader(String filepattern, @Nullable Long startPosition,
-      @Nullable Long endPosition, Coder<T> coder, boolean useDefaultBufferSize) {
-    this.filepattern = filepattern;
-    this.startPosition = startPosition;
-    this.endPosition = endPosition;
-    this.coder = coder;
-    this.useDefaultBufferSize = useDefaultBufferSize;
-  }
-
-  /**
-   * Returns a new iterator for elements in the given range in the
-   * given file.  If the range starts in the middle an element, this
-   * element is skipped as it is considered part of the previous
-   * range; if the last element that starts in the range finishes
-   * beyond the end position, it is still considered part of this
-   * range.  In other words, the start position and the end position
-   * are "rounded up" to element boundaries.
-   *
-   * @param endPosition offset of the end position; null means end-of-file
-   */
-  protected abstract ReaderIterator<T> newReaderIteratorForRangeInFile(IOChannelFactory factory,
-      String oneFile, long startPosition, @Nullable Long endPosition) throws IOException;
-
-  /**
-   * Returns a new iterator for elements in the given files.  Caller
-   * must ensure that the file collection is not empty.
-   */
-  protected abstract ReaderIterator<T> newReaderIteratorForFiles(
-      IOChannelFactory factory, Collection<String> files) throws IOException;
-
-  protected Collection<String> expandedFilepattern() throws IOException {
-    if (expandedFilepattern == null) {
-      IOChannelFactory factory = IOChannelUtils.getFactory(filepattern);
-      expandedFilepattern = factory.match(filepattern);
-    }
-    return expandedFilepattern;
-  }
-
-  @Override
-  public ReaderIterator<T> iterator() throws IOException {
-    IOChannelFactory factory = IOChannelUtils.getFactory(filepattern);
-    Collection<String> inputs = expandedFilepattern();
-    if (inputs.isEmpty()) {
-      throw new FileNotFoundException("No match for file pattern '" + filepattern + "'");
-    }
-
-    if (startPosition != null || endPosition != null) {
-      if (inputs.size() != 1) {
-        throw new IllegalArgumentException(
-            "Offset range specified: [" + startPosition + ", " + endPosition + "), so "
-            + "an exact filename was expected, but more than 1 file matched \"" + filepattern
-            + "\" (total " + inputs.size() + "): apparently a filepattern was given.");
-      }
-
-      return newReaderIteratorForRangeInFile(factory, inputs.iterator().next(),
-          startPosition == null ? 0 : startPosition, endPosition);
-    } else {
-      return newReaderIteratorForFiles(factory, inputs);
-    }
-  }
-
-  /**
-   * Abstract base class for file-based source iterators.
-   */
-  protected abstract class FileBasedIterator extends AbstractBoundedReaderIterator<T> {
-    protected final CopyableSeekableByteChannel seeker;
-    protected final PushbackInputStream stream;
-    protected final OffsetRangeTracker rangeTracker;
-    protected long offset;
-    protected final ProgressTracker<Integer> progressTracker;
-    protected ByteArrayOutputStream nextElement;
-    protected DecompressingStreamFactory compressionStreamFactory;
-
-    FileBasedIterator(
-        CopyableSeekableByteChannel seeker,
-        long startOffset,
-        long offset,
-        @Nullable Long endOffset,
-        ProgressTracker<Integer> progressTracker,
-        DecompressingStreamFactory compressionStreamFactory)
-        throws IOException {
-      this.seeker = checkNotNull(seeker);
-      this.seeker.position(startOffset);
-      this.compressionStreamFactory = compressionStreamFactory;
-      InputStream inputStream =
-          compressionStreamFactory.createInputStream(Channels.newInputStream(seeker));
-      BufferedInputStream bufferedStream =
-          useDefaultBufferSize
-              ? new BufferedInputStream(inputStream)
-              : new BufferedInputStream(inputStream, BUF_SIZE);
-      this.stream = new PushbackInputStream(bufferedStream, BUF_SIZE);
-      long stopOffset = (endOffset == null) ? OffsetRangeTracker.OFFSET_INFINITY : endOffset;
-      this.rangeTracker = new OffsetRangeTracker(startOffset, stopOffset);
-      this.offset = offset;
-      this.progressTracker = checkNotNull(progressTracker);
-    }
-
-    /**
-     * Reads the next element.
-     *
-     * @return a {@code ByteArrayOutputStream} containing the contents
-     *     of the element, or {@code null} if the end of the stream
-     *     has been reached.
-     * @throws IOException if an I/O error occurs
-     */
-    protected abstract ByteArrayOutputStream readElement() throws IOException;
-
-    @Override
-    protected boolean hasNextImpl() throws IOException {
-      long startOffset = offset;
-      ByteArrayOutputStream element = readElement(); // As a side effect, updates "offset"
-      if (element != null && rangeTracker.tryReturnRecordAt(true, startOffset)) {
-        nextElement = element;
-        progressTracker.saw((int) (offset - startOffset));
-      } else {
-        nextElement = null;
-      }
-      return nextElement != null;
-    }
-
-    @Override
-    protected T nextImpl() throws IOException {
-      return CoderUtils.decodeFromByteArray(coder, nextElement.toByteArray());
-    }
-
-    @Override
-    public Progress getProgress() {
-      // Currently we assume that only a offset position and fraction are reported as
-      // current progress. An implementor can override this method to update
-      // other metrics, e.g. report a different completion percentage or remaining time.
-      com.google.api.services.dataflow.model.Position currentPosition =
-          new com.google.api.services.dataflow.model.Position();
-      currentPosition.setByteOffset(offset);
-
-      ApproximateProgress progress = new ApproximateProgress();
-      progress.setPosition(currentPosition);
-
-      // If endOffset is unspecified, we don't know the fraction consumed.
-      if (rangeTracker.getStopPosition() != Long.MAX_VALUE) {
-        progress.setPercentComplete((float) rangeTracker.getFractionConsumed());
-      }
-
-      return cloudProgressToReaderProgress(progress);
-    }
-
-    @Override
-    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
-      checkNotNull(splitRequest);
-
-      // Currently, file-based Reader only supports split at a byte offset.
-      ApproximateProgress splitProgress = splitRequestToApproximateProgress(splitRequest);
-      com.google.api.services.dataflow.model.Position splitPosition = splitProgress.getPosition();
-      if (splitPosition == null) {
-        if (splitProgress.getPercentComplete() != null) {
-          float percentageComplete = splitProgress.getPercentComplete().floatValue();
-          if (percentageComplete <= 0 || percentageComplete >= 1) {
-            LOG.warn(
-                "FileBasedReader cannot be split since the provided percentage of "
-                + "work to be completed is out of the valid range (0, 1). Requested: {}",
-                splitRequest);
-          }
-
-          splitPosition = new com.google.api.services.dataflow.model.Position();
-          if (getEndOffset() == Long.MAX_VALUE) {
-            LOG.warn(
-                "FileBasedReader cannot be split since the end offset is set to Long.MAX_VALUE."
-                + " Requested: {}",
-                splitRequest);
-            return null;
-          }
-
-          splitPosition.setByteOffset(
-              rangeTracker.getPositionForFractionConsumed(percentageComplete));
-        } else {
-          LOG.warn(
-              "FileBasedReader requires either a position or percentage of work to be complete to"
-              + " perform a dynamic split request. Requested: {}",
-              splitRequest);
-          return null;
-        }
-      } else if (splitPosition.getByteOffset() == null) {
-        LOG.warn(
-            "FileBasedReader cannot be split since the provided split position "
-            + "does not contain a valid offset. Requested: {}",
-            splitRequest);
-        return null;
-      }
-      Long splitOffset = splitPosition.getByteOffset();
-
-      if (rangeTracker.trySplitAtPosition(splitOffset)) {
-        return new DynamicSplitResultWithPosition(cloudPositionToReaderPosition(splitPosition));
-      } else {
-        return null;
-      }
-    }
-
-    /**
-     * Returns the end offset of the iterator or Long.MAX_VALUE if unspecified.
-     * This method is called for test ONLY.
-     */
-    long getEndOffset() {
-      return rangeTracker.getStopPosition();
-    }
-
-    /**
-     * Returns the start offset of the iterator.
-     * This method is called for test ONLY.
-     */
-    long getStartOffset() {
-      return rangeTracker.getStartPosition();
-    }
-
-    @Override
-    public void close() throws IOException {
-      stream.close();
-    }
-  }
-
-  /**
-   * Factory interface for creating a decompressing {@link InputStream}.
-   */
-  public interface DecompressingStreamFactory {
-    /**
-     * Create a decompressing {@link InputStream} from an existing {@link InputStream}.
-     *
-     * @param inputStream the existing stream
-     * @return a stream that decompresses the contents of the existing stream
-     * @throws IOException
-     */
-    public InputStream createInputStream(InputStream inputStream) throws IOException;
-  }
-
-  /**
-   * Factory for creating decompressing input streams based on a filename and
-   * a {@link TextIO.CompressionType}.  If the compression mode is AUTO, the filename
-   * is checked against known extensions to determine a compression type to use.
-   */
-  protected static class FilenameBasedStreamFactory
-      implements DecompressingStreamFactory {
-    private String filename;
-    private TextIO.CompressionType compressionType;
-
-    public FilenameBasedStreamFactory(String filename, TextIO.CompressionType compressionType) {
-      this.filename = filename;
-      this.compressionType = compressionType;
-    }
-
-    protected TextIO.CompressionType getCompressionTypeForAuto() {
-      return getCompressionTypeForAuto(filename);
-    }
-
-    protected static TextIO.CompressionType getCompressionTypeForAuto(String filepattern) {
-      for (TextIO.CompressionType type : TextIO.CompressionType.values()) {
-        if (type.matches(filepattern) && type != TextIO.CompressionType.AUTO
-            && type != TextIO.CompressionType.UNCOMPRESSED) {
-          return type;
-        }
-      }
-      return TextIO.CompressionType.UNCOMPRESSED;
-    }
-
-    @Override
-    public InputStream createInputStream(InputStream inputStream) throws IOException {
-      if (compressionType == TextIO.CompressionType.AUTO) {
-        return getCompressionTypeForAuto().createInputStream(inputStream);
-      }
-      return compressionType.createInputStream(inputStream);
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
index c9dae7e4fb2d9..04bde2c353652 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
@@ -16,14 +16,34 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.api.client.util.Preconditions.checkNotNull;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
+
+import com.google.api.services.dataflow.model.ApproximateProgress;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.io.range.OffsetRangeTracker;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
+import com.google.cloud.dataflow.sdk.util.common.worker.ProgressTracker;
 import com.google.cloud.dataflow.sdk.util.common.worker.ProgressTrackerGroup;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.common.annotations.VisibleForTesting;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
+import java.io.BufferedInputStream;
 import java.io.ByteArrayOutputStream;
+import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.PushbackInputStream;
+import java.nio.channels.Channels;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.channels.SeekableByteChannel;
 import java.util.Collection;
@@ -36,20 +56,27 @@
  *
  * @param <T> the type of the elements read from the source
  */
-public class TextReader<T> extends FileBasedReader<T> {
-  final boolean stripTrailingNewlines;
-  final TextIO.CompressionType compressionType;
-
-  public TextReader(String filepattern, boolean stripTrailingNewlines, @Nullable Long startPosition,
-      @Nullable Long endPosition, Coder<T> coder, TextIO.CompressionType compressionType) {
-    this(filepattern, stripTrailingNewlines, startPosition, endPosition, coder, true,
-        compressionType);
-  }
-
-  protected TextReader(String filepattern, boolean stripTrailingNewlines,
-      @Nullable Long startPosition, @Nullable Long endPosition, Coder<T> coder,
-      boolean useDefaultBufferSize, TextIO.CompressionType compressionType) {
-    super(filepattern, startPosition, endPosition, coder, useDefaultBufferSize);
+public class TextReader<T> extends Reader<T> {
+  private static final Logger LOG = LoggerFactory.getLogger(TextReader.class);
+
+  @VisibleForTesting static final int BUF_SIZE = 200;
+
+  // The following fields are package-private to be visible in tests.
+  @VisibleForTesting final String filepattern;
+  @VisibleForTesting @Nullable final Long startPosition;
+  @VisibleForTesting @Nullable final Long endPosition;
+  @VisibleForTesting final Coder<T> coder;
+  @VisibleForTesting final TextIO.CompressionType compressionType;
+  @VisibleForTesting final boolean stripTrailingNewlines;
+  @VisibleForTesting @Nullable private Collection<String> expandedFilepattern;
+
+  public TextReader(String filepattern, boolean stripTrailingNewlines,
+                    @Nullable Long startPosition, @Nullable Long endPosition, Coder<T> coder,
+                    TextIO.CompressionType compressionType) {
+    this.filepattern = filepattern;
+    this.startPosition = startPosition;
+    this.endPosition = endPosition;
+    this.coder = coder;
     this.stripTrailingNewlines = stripTrailingNewlines;
     this.compressionType = compressionType;
   }
@@ -89,8 +116,7 @@ private double getTotalParallelismUnsplittable() throws IOException {
     return expandedFilepattern().size();
   }
 
-  @Override
-  protected ReaderIterator<T> newReaderIteratorForRangeInFile(IOChannelFactory factory,
+  private ReaderIterator<T> newReaderIteratorForRangeInFile(IOChannelFactory factory,
       String oneFile, long startPosition, @Nullable Long endPosition) throws IOException {
     // Position before the first record, so we can find the record beginning.
     final long start = startPosition > 0 ? startPosition - 1 : 0;
@@ -106,8 +132,7 @@ protected ReaderIterator<T> newReaderIteratorForRangeInFile(IOChannelFactory fac
     return iterator;
   }
 
-  @Override
-  protected ReaderIterator<T> newReaderIteratorForFiles(
+  private ReaderIterator<T> newReaderIteratorForFiles(
       IOChannelFactory factory, Collection<String> files) throws IOException {
     if (files.size() == 1) {
       return newReaderIteratorForFile(factory, files.iterator().next(), stripTrailingNewlines);
@@ -139,10 +164,93 @@ private TextFileIterator newReaderIteratorForRangeWithStrictStart(IOChannelFacto
 
     return new TextFileIterator(
         new CopyableSeekableByteChannel(seeker), stripTrailingNewlines, startOffset, endOffset,
-        new FileBasedReader.FilenameBasedStreamFactory(input, compressionType));
+        new FilenameBasedStreamFactory(input, compressionType));
+  }
+
+  private Collection<String> expandedFilepattern() throws IOException {
+    if (expandedFilepattern == null) {
+      IOChannelFactory factory = IOChannelUtils.getFactory(filepattern);
+      expandedFilepattern = factory.match(filepattern);
+    }
+    return expandedFilepattern;
+  }
+
+  @Override
+  public ReaderIterator<T> iterator() throws IOException {
+    IOChannelFactory factory = IOChannelUtils.getFactory(filepattern);
+    Collection<String> inputs = expandedFilepattern();
+    if (inputs.isEmpty()) {
+      throw new FileNotFoundException("No match for file pattern '" + filepattern + "'");
+    }
+
+    if (startPosition != null || endPosition != null) {
+      if (inputs.size() != 1) {
+        throw new IllegalArgumentException(
+            "Offset range specified: [" + startPosition + ", " + endPosition + "), so "
+            + "an exact filename was expected, but more than 1 file matched \"" + filepattern
+            + "\" (total " + inputs.size() + "): apparently a filepattern was given.");
+      }
+
+      return newReaderIteratorForRangeInFile(factory, inputs.iterator().next(),
+          startPosition == null ? 0 : startPosition, endPosition);
+    } else {
+      return newReaderIteratorForFiles(factory, inputs);
+    }
+  }
+
+  /**
+   * Factory interface for creating a decompressing {@link InputStream}.
+   */
+  public interface DecompressingStreamFactory {
+    /**
+     * Create a decompressing {@link InputStream} from an existing {@link InputStream}.
+     *
+     * @param inputStream the existing stream
+     * @return a stream that decompresses the contents of the existing stream
+     * @throws IOException
+     */
+    public InputStream createInputStream(InputStream inputStream) throws IOException;
+  }
+
+  /**
+   * Factory for creating decompressing input streams based on a filename and
+   * a {@link TextIO.CompressionType}.  If the compression mode is AUTO, the filename
+   * is checked against known extensions to determine a compression type to use.
+   */
+  protected static class FilenameBasedStreamFactory
+      implements DecompressingStreamFactory {
+    private String filename;
+    private TextIO.CompressionType compressionType;
+
+    public FilenameBasedStreamFactory(String filename, TextIO.CompressionType compressionType) {
+      this.filename = filename;
+      this.compressionType = compressionType;
+    }
+
+    protected TextIO.CompressionType getCompressionTypeForAuto() {
+      return getCompressionTypeForAuto(filename);
+    }
+
+    protected static TextIO.CompressionType getCompressionTypeForAuto(String filepattern) {
+      for (TextIO.CompressionType type : TextIO.CompressionType.values()) {
+        if (type.matches(filepattern) && type != TextIO.CompressionType.AUTO
+            && type != TextIO.CompressionType.UNCOMPRESSED) {
+          return type;
+        }
+      }
+      return TextIO.CompressionType.UNCOMPRESSED;
+    }
+
+    @Override
+    public InputStream createInputStream(InputStream inputStream) throws IOException {
+      if (compressionType == TextIO.CompressionType.AUTO) {
+        return getCompressionTypeForAuto().createInputStream(inputStream);
+      }
+      return compressionType.createInputStream(inputStream);
+    }
   }
 
-  class TextFileMultiIterator extends LazyMultiReaderIterator<T> {
+  private class TextFileMultiIterator extends LazyMultiReaderIterator<T> {
     private final IOChannelFactory factory;
     private final boolean stripTrailingNewlines;
 
@@ -159,19 +267,33 @@ protected ReaderIterator<T> open(String input) throws IOException {
     }
   }
 
-  class TextFileIterator extends FileBasedIterator {
+  class TextFileIterator extends AbstractBoundedReaderIterator<T> {
+    private final CopyableSeekableByteChannel seeker;
+    private final PushbackInputStream stream;
+    private final OffsetRangeTracker rangeTracker;
+    private final ProgressTracker<Integer> progressTracker;
+    private long offset;
+    private ByteArrayOutputStream nextElement;
     private ScanState state;
 
     TextFileIterator(CopyableSeekableByteChannel seeker, boolean stripTrailingNewlines,
         long startOffset, @Nullable Long endOffset,
-        FileBasedReader.DecompressingStreamFactory compressionStreamFactory) throws IOException {
-      super(seeker, startOffset, startOffset, endOffset,
-          new ProgressTrackerGroup<Integer>() {
+        DecompressingStreamFactory compressionStreamFactory) throws IOException {
+      this.seeker = checkNotNull(seeker);
+      this.seeker.position(startOffset);
+      InputStream inputStream =
+          compressionStreamFactory.createInputStream(Channels.newInputStream(seeker));
+      BufferedInputStream bufferedStream = new BufferedInputStream(inputStream);
+      this.stream = new PushbackInputStream(bufferedStream, BUF_SIZE);
+      long stopOffset = (endOffset == null) ? OffsetRangeTracker.OFFSET_INFINITY : endOffset;
+      this.rangeTracker = new OffsetRangeTracker(startOffset, stopOffset);
+      this.offset = startOffset;
+      this.progressTracker = checkNotNull(new ProgressTrackerGroup<Integer>() {
             @Override
             protected void report(Integer lineLength) {
               notifyElementRead(lineLength.longValue());
             }
-          }.start(), compressionStreamFactory);
+          }.start());
 
       this.state = new ScanState(BUF_SIZE, stripTrailingNewlines);
     }
@@ -188,7 +310,6 @@ protected void report(Integer lineLength) {
      *     been reached.
      * @throws IOException if an I/O error occurs
      */
-    @Override
     protected ByteArrayOutputStream readElement() throws IOException {
       ByteArrayOutputStream buffer = new ByteArrayOutputStream(BUF_SIZE);
 
@@ -220,6 +341,116 @@ protected ByteArrayOutputStream readElement() throws IOException {
       offset += charsConsumed;
       return buffer;
     }
+
+    @Override
+    protected boolean hasNextImpl() throws IOException {
+      long startOffset = offset;
+      ByteArrayOutputStream element = readElement(); // As a side effect, updates "offset"
+      if (element != null && rangeTracker.tryReturnRecordAt(true, startOffset)) {
+        nextElement = element;
+        progressTracker.saw((int) (offset - startOffset));
+      } else {
+        nextElement = null;
+      }
+      return nextElement != null;
+    }
+
+    @Override
+    protected T nextImpl() throws IOException {
+      return CoderUtils.decodeFromByteArray(coder, nextElement.toByteArray());
+    }
+
+    @Override
+    public Progress getProgress() {
+      // Currently we assume that only a offset position and fraction are reported as
+      // current progress. An implementor can override this method to update
+      // other metrics, e.g. report a different completion percentage or remaining time.
+      com.google.api.services.dataflow.model.Position currentPosition =
+          new com.google.api.services.dataflow.model.Position();
+      currentPosition.setByteOffset(offset);
+
+      ApproximateProgress progress = new ApproximateProgress();
+      progress.setPosition(currentPosition);
+
+      // If endOffset is unspecified, we don't know the fraction consumed.
+      if (rangeTracker.getStopPosition() != Long.MAX_VALUE) {
+        progress.setPercentComplete((float) rangeTracker.getFractionConsumed());
+      }
+
+      return cloudProgressToReaderProgress(progress);
+    }
+
+    @Override
+    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
+      checkNotNull(splitRequest);
+
+      // Currently, file-based Reader only supports split at a byte offset.
+      ApproximateProgress splitProgress = splitRequestToApproximateProgress(splitRequest);
+      com.google.api.services.dataflow.model.Position splitPosition = splitProgress.getPosition();
+      if (splitPosition == null) {
+        if (splitProgress.getPercentComplete() != null) {
+          float percentageComplete = splitProgress.getPercentComplete().floatValue();
+          if (percentageComplete <= 0 || percentageComplete >= 1) {
+            LOG.warn(
+                "TextReader cannot be split since the provided percentage of "
+                + "work to be completed is out of the valid range (0, 1). Requested: {}",
+                splitRequest);
+          }
+
+          splitPosition = new com.google.api.services.dataflow.model.Position();
+          if (getEndOffset() == Long.MAX_VALUE) {
+            LOG.warn(
+                "TextReader cannot be split since the end offset is set to Long.MAX_VALUE."
+                + " Requested: {}",
+                splitRequest);
+            return null;
+          }
+
+          splitPosition.setByteOffset(
+              rangeTracker.getPositionForFractionConsumed(percentageComplete));
+        } else {
+          LOG.warn(
+              "TextReader requires either a position or percentage of work to be complete to"
+              + " perform a dynamic split request. Requested: {}",
+              splitRequest);
+          return null;
+        }
+      } else if (splitPosition.getByteOffset() == null) {
+        LOG.warn(
+            "TextReader cannot be split since the provided split position "
+            + "does not contain a valid offset. Requested: {}",
+            splitRequest);
+        return null;
+      }
+      Long splitOffset = splitPosition.getByteOffset();
+
+      if (rangeTracker.trySplitAtPosition(splitOffset)) {
+        return new DynamicSplitResultWithPosition(cloudPositionToReaderPosition(splitPosition));
+      } else {
+        return null;
+      }
+    }
+
+    /**
+     * Returns the end offset of the iterator or Long.MAX_VALUE if unspecified.
+     * This method is called for test ONLY.
+     */
+    long getEndOffset() {
+      return rangeTracker.getStopPosition();
+    }
+
+    /**
+     * Returns the start offset of the iterator.
+     * This method is called for test ONLY.
+     */
+    long getStartOffset() {
+      return rangeTracker.getStartPosition();
+    }
+
+    @Override
+    public void close() throws IOException {
+      stream.close();
+    }
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java
index db19d5e7c0513..aa4f42e3bb668 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java
@@ -65,7 +65,6 @@ public <T> TextReader<T> create(CloudObject spec, Coder<T> coder) throws Excepti
         getBoolean(spec, PropertyNames.STRIP_TRAILING_NEWLINES, true),
         getLong(spec, PropertyNames.START_OFFSET, null),
         getLong(spec, PropertyNames.END_OFFSET, null), coder,
-        true /* useDefaultBufferSize */,
         Enum.valueOf(TextIO.CompressionType.class,
             getString(spec, PropertyNames.COMPRESSION_TYPE, "AUTO")));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReaderTest.java
deleted file mode 100644
index 3765af8a55901..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FileBasedReaderTest.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.io.TextIO.CompressionType;
-import com.google.cloud.dataflow.sdk.runners.worker.FileBasedReader.FilenameBasedStreamFactory;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for FileBasedReader.
- */
-@RunWith(JUnit4.class)
-public class FileBasedReaderTest {
-
-  private void testGetStreamForAutoHelper(CompressionType expected, String filename) {
-    FilenameBasedStreamFactory factory = new FilenameBasedStreamFactory(filename,
-        CompressionType.AUTO);
-    CompressionType actual = factory.getCompressionTypeForAuto();
-    Assert.assertEquals(expected, actual);
-  }
-
-  @Test
-  public void testGetStreamForAuto() {
-    testGetStreamForAutoHelper(CompressionType.UNCOMPRESSED, "test");
-    testGetStreamForAutoHelper(CompressionType.UNCOMPRESSED, "test.txt");
-    testGetStreamForAutoHelper(CompressionType.GZIP, "test.gz");
-    testGetStreamForAutoHelper(CompressionType.BZIP2, "test.bz2");
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
index e9cbb4996d982..277d741f0b320 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
@@ -78,7 +78,7 @@ public void testMultiFileRead() throws Exception {
 
 
     TextReader<String> reader = new TextReader<>(
-        tmpFolder.getRoot() + "/file*", true/* strip newlines */, null, null, StringUtf8Coder.of(),
+        tmpFolder.getRoot() + "/file*", true /* strip newlines */, null, null, StringUtf8Coder.of(),
         TextIO.CompressionType.UNCOMPRESSED);
 
     Set<String> records = new TreeSet<>();

From 48f8147245e1d604daee423f4d4bef460879d54b Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Tue, 17 Nov 2015 08:52:57 -0800
Subject: [PATCH 1168/1541] Adds Protocol Buffer definition for gRPC
 communication with Windmill

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108045132
---
 sdk/src/main/proto/windmill_service.proto | 27 +++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 sdk/src/main/proto/windmill_service.proto

diff --git a/sdk/src/main/proto/windmill_service.proto b/sdk/src/main/proto/windmill_service.proto
new file mode 100644
index 0000000000000..bd25fe5efc60e
--- /dev/null
+++ b/sdk/src/main/proto/windmill_service.proto
@@ -0,0 +1,27 @@
+syntax = "proto2";
+
+import "windmill.proto";
+
+package google.dataflow.windmillservice.v1alpha1;
+
+// The Cloud Windmill Service API used by GCE to acquire and process streaming
+// Dataflow work.
+service CloudWindmillServiceV1Alpha1 {
+  // Gets streaming Dataflow work.
+  rpc GetWork(.windmill.GetWorkRequest) returns(.windmill.GetWorkResponse);
+
+  // Gets data from Windmill.
+  rpc GetData(.windmill.GetDataRequest) returns(.windmill.GetDataResponse);
+
+  // Commits previously acquired work.
+  rpc CommitWork(.windmill.CommitWorkRequest)
+      returns(.windmill.CommitWorkResponse);
+
+  // Gets dependant configuration from windmill.
+  rpc GetConfig(.windmill.GetConfigRequest)
+      returns(.windmill.GetConfigResponse);
+
+  // Reports stats to Windmill.
+  rpc ReportStats(.windmill.ReportStatsRequest)
+      returns(.windmill.ReportStatsResponse);
+}
\ No newline at end of file

From 5333d9918c323395e42b25e353e841e09c4044f4 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 17 Nov 2015 09:07:30 -0800
Subject: [PATCH 1169/1541] Improve javadoc for View transforms

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108046434
---
 .../cloud/dataflow/sdk/transforms/View.java   | 112 ++++++++++--------
 1 file changed, 61 insertions(+), 51 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index af7fa20cc7cac..a41da34399791 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -27,10 +27,22 @@
 import java.util.Map;
 
 /**
- * Transforms for creating {@link PCollectionView}s from {@link PCollection}s,
- * for consuming the contents of those {@link PCollection}s as side inputs
- * to {@link ParDo} transforms. These transforms support viewing a {@link PCollection}
- * as a single value, an iterable, a map, or a multimap.
+ * Transforms for creating {@link PCollectionView PCollectionViews} from
+ * {@link PCollection PCollections} (to read them as side inputs).
+ *
+ * <p>While a {@link PCollection PCollection&lt;ElemT&gt;} has many values of type {@code ElemT} per
+ * window, a {@link PCollectionView PCollectionView&lt;ViewT&gt;} has a single value of type
+ * {@code ViewT} for each window. It can be thought of as a mapping from windows to values of
+ * type {@code ViewT}. The transforms here represent ways of converting the {@code ElemT} values
+ * in a window into a {@code ViewT} for that window.
+ *
+ * <p>When a {@link ParDo} tranform is processing a main input
+ * element in a window {@code w} and a {@link PCollectionView} is read via
+ * {@link DoFn.ProcessContext#sideInput}, the value of the view for {@code w} is
+ * returned.
+ *
+ * <p>The SDK supports viewing a {@link PCollection}, per window, as a single value,
+ * a {@link List}, an {@link Iterable}, a {@link Map}, or a multimap (iterable-valued {@link Map}).
  *
  * <p>For a {@link PCollection} that contains a single value of type {@code T}
  * per window, such as the output of {@link Combine#globally},
@@ -44,9 +56,9 @@
  * }
  * </pre>
  *
- * <p>For a small {@link PCollection} that can fit entirely in memory,
+ * <p>For a small {@link PCollection} with windows that can fit entirely in memory,
  * use {@link View#asList()} to prepare it for use as a {@code List}.
- * When read as a side input, the entire list will be cached in memory.
+ * When read as a side input, the entire list for a window will be cached in memory.
  *
  * <pre>
  * {@code
@@ -56,7 +68,7 @@
  * </pre>
  *
  * <p>If a {@link PCollection} of {@code KV<K, V>} is known to
- * have a single value for each key, then use {@link View#asMap()}
+ * have a single value per window for each key, then use {@link View#asMap()}
  * to view it as a {@code Map<K, V>}:
  *
  * <pre>
@@ -126,9 +138,10 @@ public class View {
   private View() { }
 
   /**
-   * Returns a {@link AsSingleton} transform that takes a singleton
-   * {@link PCollection} as input and produces a {@link PCollectionView}
-   * of the single value, to be consumed as a side input.
+   * Returns a {@link AsSingleton} transform that takes a
+   * {@link PCollection} with a single value per window
+   * as input and produces a {@link PCollectionView} that returns
+   * the value in the main input window when read as a side input.
    *
    * <pre>
    * {@code
@@ -152,33 +165,37 @@ public static <T> AsSingleton<T> asSingleton() {
   }
 
   /**
-   * Returns a transform that takes a {@link PCollection} and returns a
-   * {@code List} containing all of its elements, to be consumed as
-   * a side input.
+   * Returns a {@link View.AsList} transform that takes a {@link PCollection} and returns a
+   * {@link PCollectionView} mapping each window to a {@link List} containing
+   * all of the elements in the window.
    *
    * <p>The resulting list is required to fit in memory.
    */
-  public static <T> PTransform<PCollection<T>, PCollectionView<List<T>>> asList() {
+  public static <T> AsList<T> asList() {
     return new AsList<>();
   }
 
   /**
-   * Returns a {@link AsIterable} that takes a
-   * {@link PCollection} as input and produces a {@link PCollectionView}
-   * of the values, to be consumed as an iterable side input.  The values of
-   * this {@code Iterable} may not be cached; if that behavior is desired, use
-   * {@link #asList}.
+   * Returns a {@link View.AsIterable} transform that takes a {@link PCollection} as input
+   * and produces a {@link PCollectionView} mapping each window to an
+   * {@link Iterable} of the values in that window.
+   *
+   * <p>The values of the {@link Iterable} for a window are not required to fit in memory,
+   * but they may also not be effectively cached. If it is known that every window fits in memory,
+   * and stronger caching is desired, use {@link #asList}.
    */
   public static <T> AsIterable<T> asIterable() {
     return new AsIterable<>();
   }
 
   /**
-   * Returns an {@link AsMap} transform that takes a {@link PCollection} as input
-   * and produces a {@link PCollectionView} of the values to be consumed
-   * as a {@code Map<K, V>} side input. It is required that each key of the input be
-   * associated with a single value. If this is not the case, precede this
-   * view with {@code Combine.perKey}, as below, or alternatively use {@link View#asMultimap()}.
+   * Returns a {@link View.AsMap} transform that takes a
+   * {@link PCollection PCollection&lt;KV&lt;K V&gt;&gt;} as
+   * input and produces a {@link PCollectionView} mapping each window to
+   * a {@link Map Map&gt;K, V&gt;}. It is required that each key of the input be
+   * associated with a single value, per window. If this is not the case, precede this
+   * view with {@code Combine.perKey}, as in the example below, or alternatively
+   * use {@link View#asMultimap()}.
    *
    * <pre>
    * {@code
@@ -196,9 +213,11 @@ public static <K, V> AsMap<K, V> asMap() {
   }
 
   /**
-   * Returns an {@link AsMultimap} transform that takes a {@link PCollection}
-   * of {@code KV<K, V>} pairs as input and produces a {@link PCollectionView} of
-   * its contents as a {@code Map<K, Iterable<V>>} for use as a side input.
+   * Returns a {@link View.AsMultimap} transform that takes a
+   * {@link PCollection PCollection&lt;KV&ltK, V&gt;&gt;}
+   * as input and produces a {@link PCollectionView} mapping
+   * each window to its contents as a {@link Map Map&lt;K, Iterable&lt;V&gt;&gt;}
+   * for use as a side input.
    * In contrast to {@link View#asMap()}, it is not required that the keys in the
    * input collection be unique.
    *
@@ -215,10 +234,10 @@ public static <K, V> AsMultimap<K, V> asMultimap() {
   }
 
   /**
-   * A {@link PTransform} that produces a {@link PCollectionView} of a singleton
-   * {@link PCollection} yielding the single element it contains.
+   * Not intended for direct use by pipeline authors; public only so a {@link PipelineRunner} may
+   * override its behavior.
    *
-   * <p>Instantiate via {@link View#asIterable}.
+   * <p>See {@link View#asList()}.
    */
   public static class AsList<T> extends PTransform<PCollection<T>, PCollectionView<List<T>>> {
     private AsList() { }
@@ -240,10 +259,10 @@ public PCollectionView<List<T>> apply(PCollection<T> input) {
   }
 
   /**
-   * A {@link PTransform} that produces a {@link PCollectionView} of a singleton
-   * {@link PCollection} yielding the single element it contains.
+   * Not intended for direct use by pipeline authors; public only so a {@link PipelineRunner} may
+   * override its behavior.
    *
-   * <p>Instantiate via {@link View#asIterable}.
+   * <p>See {@link View#asIterable()}.
    */
   public static class AsIterable<T>
       extends PTransform<PCollection<T>, PCollectionView<Iterable<T>>> {
@@ -266,10 +285,10 @@ public PCollectionView<Iterable<T>> apply(PCollection<T> input) {
   }
 
   /**
-   * A {@link PTransform} that produces a {@link PCollectionView} of a singleton
-   * {@link PCollection} yielding the single element it contains.
+   * Not intended for direct use by pipeline authors; public only so a {@link PipelineRunner} may
+   * override its behavior.
    *
-   * <p>Instantiate via {@link View#asIterable}.
+   * <p>See {@link View#asSingleton()}.
    */
   public static class AsSingleton<T> extends PTransform<PCollection<T>, PCollectionView<T>> {
     private final T defaultValue;
@@ -327,10 +346,10 @@ public PCollectionView<T> apply(PCollection<T> input) {
   }
 
   /**
-   * A {@link PTransform} that produces a {@link PCollectionView} of a keyed {@link PCollection}
-   * yielding a map of keys to all associated values.
+   * Not intended for direct use by pipeline authors; public only so a {@link PipelineRunner} may
+   * override its behavior.
    *
-   * <p>Instantiate via {@link View#asMap}.
+   * <p>See {@link View#asMultimap()}.
    */
   public static class AsMultimap<K, V>
       extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, Iterable<V>>>> {
@@ -356,19 +375,10 @@ public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
   }
 
   /**
-   * A {@link PTransform} that produces a {@link PCollectionView} of a keyed {@link PCollection}
-   * yielding a map from each key to its unique associated value. When converting
-   * a {@link PCollection} that has more than one value per key, precede this transform with a
-   * {@code Combine.perKey}:
-   *
-   * <pre>
-   * {@code
-   * PCollectionView<Map<K, OutputT>> input
-   *     .apply(Combine.perKey(myCombineFunction))
-   *     .apply(View.<K, OutputT>asMap());
-   * }</pre>
+   * Not intended for direct use by pipeline authors; public only so a {@link PipelineRunner} may
+   * override its behavior.
    *
-   * <p>Instantiate via {@link View#asMap}.
+   * <p>See {@link View#asMap()}.
    */
   public static class AsMap<K, V>
       extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, V>>> {

From 4f2371b3aff1f39ef32eae0a6366971466794f05 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Sat, 22 Aug 2015 09:57:13 -0700
Subject: [PATCH 1170/1541] Fix Javadoc typo

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108050814
---
 .../google/cloud/dataflow/sdk/transforms/windowing/Window.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 2185cd0089b5a..a88fab1ce3832 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -140,7 +140,7 @@
  *
  * <p>After a {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} the trigger is set to
  * a trigger that will preserve the intent of the upstream trigger.  See
- * {@link Trigger@getContinuationTrigger} for more information.
+ * {@link Trigger#getContinuationTrigger} for more information.
  *
  * <p>See {@link Trigger} for details on the available triggers.
  */

From 2b9f3b45168ca455b4ecb1dab9baeea6c04ff072 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 17 Nov 2015 11:27:43 -0800
Subject: [PATCH 1171/1541] AvroIO: improve Javadoc

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108060907
---
 .../google/cloud/dataflow/sdk/io/AvroIO.java  | 181 ++++++++++--------
 1 file changed, 104 insertions(+), 77 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index aaac93b16530a..7997d96f4543a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
+import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.worker.AvroReader;
 import com.google.cloud.dataflow.sdk.runners.worker.AvroSink;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -58,8 +59,8 @@
  *
  * <p>It is required to specify {@link AvroIO.Read#withSchema}. To
  * read specific records, such as Avro-generated classes, provide an
- * Avro-generated class type. To read GenericRecords, provide either
- * an org.apache.avro.Schema or a schema in a JSON-encoded string form.
+ * Avro-generated class type. To read {@link GenericRecord GenericRecords}, provide either
+ * a {@link Schema} object or an Avro schema in a JSON-encoded string form.
  * An exception will be thrown if a record doesn't match the specified
  * schema.
  *
@@ -74,8 +75,7 @@
  *
  * // A Read from a GCS file (runs locally and via the Google Cloud
  * // Dataflow service):
- * Schema schema = new Schema.Parser().parse(new File(
- *     "gs://my_bucket/path/to/schema.avsc"));
+ * Schema schema = new Schema.Parser().parse(new File("schema.avsc"));
  * PCollection<GenericRecord> records =
  *     p.apply(AvroIO.Read.named("ReadFromAvro")
  *                        .from("gs://my_bucket/path/to/records-*.avro")
@@ -92,8 +92,8 @@
  *
  * <p>It is required to specify {@link AvroIO.Write#withSchema}. To
  * write specific records, such as Avro-generated classes, provide an
- * Avro-generated class type. To write GenericRecords, provide either
- * an org.apache.avro.Schema or a schema in a JSON-encoded string form.
+ * Avro-generated class type. To write {@link GenericRecord GenericRecords}, provide either
+ * a {@link Schema} object or a schema in a JSON-encoded string form.
  * An exception will be thrown if a record doesn't match the specified
  * schema.
  *
@@ -106,8 +106,7 @@
  *
  * // A Write to a sharded GCS file (runs locally and via the Google Cloud
  * // Dataflow service):
- * Schema schema = new Schema.Parser().parse(new File(
- *     "gs://my_bucket/path/to/schema.avsc"));
+ * Schema schema = new Schema.Parser().parse(new File("schema.avsc"));
  * PCollection<GenericRecord> records = ...;
  * records.apply(AvroIO.Write.named("WriteToAvro")
  *                           .to("gs://my_bucket/path/to/numbers")
@@ -116,10 +115,9 @@
  * } </pre>
  *
  * <p><h3>Permissions</h3>
- * Permission requirements depend on the
- * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner PipelineRunner} that is
- * used to execute the Dataflow job. Please refer to the documentation of corresponding
- * {@code PipelineRunner}s for more details.
+ * Permission requirements depend on the {@link PipelineRunner} that is used to execute the
+ * Dataflow job. Please refer to the documentation of corresponding {@link PipelineRunner}s for
+ * more details.
  */
 public class AvroIO {
   /**
@@ -129,39 +127,39 @@ public class AvroIO {
    */
   public static class Read {
     /**
-     * Returns an AvroIO.Read PTransform with the given step name.
+     * Returns a {@link PTransform} with the given step name.
      */
     public static Bound<GenericRecord> named(String name) {
       return new Bound<>(GenericRecord.class).named(name);
     }
 
     /**
-     * Returns an AvroIO.Read PTransform that reads from the file(s)
-     * with the given name or pattern.  This can be a local filename
+     * Returns a {@link PTransform} that reads from the file(s)
+     * with the given name or pattern. This can be a local filename
      * or filename pattern (if running locally), or a Google Cloud
      * Storage filename or filename pattern of the form
-     * {@code "gs://<bucket>/<filepath>"}) (if running locally or via
-     * the Google Cloud Dataflow service).  Standard
-     * <a href="http://docs.oracle.com/javase/tutorial/essential/io/find.html"
-     * >Java Filesystem glob patterns</a> ("*", "?", "[..]") are supported.
+     * {@code "gs://<bucket>/<filepath>"} (if running locally or via
+     * the Google Cloud Dataflow service). Standard
+     * <a href="http://docs.oracle.com/javase/tutorial/essential/io/find.html">Java
+     * Filesystem glob patterns</a> ("*", "?", "[..]") are supported.
      */
     public static Bound<GenericRecord> from(String filepattern) {
       return new Bound<>(GenericRecord.class).from(filepattern);
     }
 
     /**
-     * Returns an AvroIO.Read PTransform that reads Avro file(s)
+     * Returns a {@link PTransform} that reads Avro file(s)
      * containing records whose type is the specified Avro-generated class.
      *
      * @param <T> the type of the decoded elements, and the elements
-     * of the resulting PCollection
+     * of the resulting {@link PCollection}
      */
     public static <T> Bound<T> withSchema(Class<T> type) {
       return new Bound<>(type).withSchema(type);
     }
 
     /**
-     * Returns an AvroIO.Read PTransform that reads Avro file(s)
+     * Returns a {@link PTransform} that reads Avro file(s)
      * containing records of the specified schema.
      */
     public static Bound<GenericRecord> withSchema(Schema schema) {
@@ -169,7 +167,7 @@ public static Bound<GenericRecord> withSchema(Schema schema) {
     }
 
     /**
-     * Returns an AvroIO.Read PTransform that reads Avro file(s)
+     * Returns a {@link PTransform} that reads Avro file(s)
      * containing records of the specified schema in a JSON-encoded
      * string form.
      */
@@ -178,8 +176,8 @@ public static Bound<GenericRecord> withSchema(String schema) {
     }
 
     /**
-     * Returns a AvroIO.Read PTransform that has GCS path validation on
-     * pipeline creation disabled.
+     * Returns a {@link PTransform} that reads Avro file(s)
+     * that has GCS path validation on pipeline creation disabled.
      *
      * <p>This can be useful in the case where the GCS input location does
      * not exist at the pipeline creation time, but is expected to be available
@@ -222,29 +220,35 @@ public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
       }
 
       /**
-       * Returns a new AvroIO.Read PTransform that's like this one but
-       * with the given step name.  Does not modify this object.
+       * Returns a new {@link PTransform} that's like this one but
+       * with the given step name.
+       *
+       * <p>Does not modify this object.
        */
       public Bound<T> named(String name) {
         return new Bound<>(name, filepattern, type, schema, validate);
       }
 
       /**
-       * Returns a new AvroIO.Read PTransform that's like this one but
+       * Returns a new {@link PTransform} that's like this one but
        * that reads from the file(s) with the given name or pattern.
        * (See {@link AvroIO.Read#from} for a description of
-       * filepatterns.)  Does not modify this object.
+       * filepatterns.)
+       *
+       * <p>Does not modify this object.
        */
       public Bound<T> from(String filepattern) {
         return new Bound<>(name, filepattern, type, schema, validate);
       }
 
       /**
-       * Returns a new AvroIO.Read PTransform that's like this one but
+       * Returns a new {@link PTransform} that's like this one but
        * that reads Avro file(s) containing records whose type is the
-       * specified Avro-generated class.  Does not modify this object.
+       * specified Avro-generated class.
+       *
+       * <p>Does not modify this object.
        *
-       * @param <X> the type of the decoded elements, and the elements of
+       * @param <X> the type of the decoded elements and the elements of
        * the resulting PCollection
        */
       public <X> Bound<X> withSchema(Class<X> type) {
@@ -252,27 +256,31 @@ public <X> Bound<X> withSchema(Class<X> type) {
       }
 
       /**
-       * Returns a new AvroIO.Read PTransform that's like this one but
+       * Returns a new {@link PTransform} that's like this one but
        * that reads Avro file(s) containing records of the specified schema.
-       * Does not modify this object.
+       *
+       * <p>Does not modify this object.
        */
       public Bound<GenericRecord> withSchema(Schema schema) {
         return new Bound<>(name, filepattern, GenericRecord.class, schema, validate);
       }
 
       /**
-       * Returns a new AvroIO.Read PTransform that's like this one but
+       * Returns a new {@link PTransform} that's like this one but
        * that reads Avro file(s) containing records of the specified schema
-       * in a JSON-encoded string form.  Does not modify this object.
+       * in a JSON-encoded string form.
+       *
+       * <p>Does not modify this object.
        */
       public Bound<GenericRecord> withSchema(String schema) {
         return withSchema((new Schema.Parser()).parse(schema));
       }
 
       /**
-       * Returns a new TextIO.Read PTransform that's like this one but
+       * Returns a new {@link PTransform} that's like this one but
        * that has GCS input path validation on pipeline creation disabled.
-       * Does not modify this object.
+       *
+       * <p>Does not modify this object.
        *
        * <p>This can be useful in the case where the GCS input location does
        * not exist at the pipeline creation time, but is expected to be
@@ -334,6 +342,9 @@ public void evaluate(
             Bound.class, transformEvaluator);
       }
     }
+
+    /** Disallow construction of utility class. */
+    private Read() {}
   }
 
   /////////////////////////////////////////////////////////////////////////////
@@ -344,17 +355,17 @@ public void evaluate(
    */
   public static class Write {
     /**
-     * Returns an AvroIO.Write PTransform with the given step name.
+     * Returns a {@link PTransform} with the given step name.
      */
     public static Bound<GenericRecord> named(String name) {
       return new Bound<>(GenericRecord.class).named(name);
     }
 
     /**
-     * Returns an AvroIO.Write PTransform that writes to the file(s)
-     * with the given prefix.  This can be a local filename
+     * Returns a {@link PTransform} that writes to the file(s)
+     * with the given prefix. This can be a local filename
      * (if running locally), or a Google Cloud Storage filename of
-     * the form {@code "gs://<bucket>/<filepath>"})
+     * the form {@code "gs://<bucket>/<filepath>"}
      * (if running locally or via the Google Cloud Dataflow service).
      *
      * <p>The files written will begin with this prefix, followed by
@@ -366,7 +377,7 @@ public static Bound<GenericRecord> to(String prefix) {
     }
 
     /**
-     * Returns an AvroIO.Write PTransform that writes to the file(s) with the
+     * Returns a {@link PTransform} that writes to the file(s) with the
      * given filename suffix.
      */
     public static Bound<GenericRecord> withSuffix(String filenameSuffix) {
@@ -374,7 +385,7 @@ public static Bound<GenericRecord> withSuffix(String filenameSuffix) {
     }
 
     /**
-     * Returns an AvroIO.Write PTransform that uses the provided shard count.
+     * Returns a {@link PTransform} that uses the provided shard count.
      *
      * <p>Constraining the number of shards is likely to reduce
      * the performance of a pipeline. Setting this value is not recommended
@@ -388,7 +399,7 @@ public static Bound<GenericRecord> withNumShards(int numShards) {
     }
 
     /**
-     * Returns an AvroIO.Write PTransform that uses the given shard name
+     * Returns a {@link PTransform} that uses the given shard name
      * template.
      *
      * <p>See {@link ShardNameTemplate} for a description of shard templates.
@@ -398,11 +409,11 @@ public static Bound<GenericRecord> withShardNameTemplate(String shardTemplate) {
     }
 
     /**
-     * Returns an AvroIO.Write PTransform that forces a single file as
+     * Returns a {@link PTransform} that forces a single file as
      * output.
      *
      * <p>Constraining the number of shards is likely to reduce
-     * the performance of a pipeline.  Setting this value is not recommended
+     * the performance of a pipeline. Setting this value is not recommended
      * unless you require a specific number of output files.
      */
     public static Bound<GenericRecord> withoutSharding() {
@@ -410,7 +421,7 @@ public static Bound<GenericRecord> withoutSharding() {
     }
 
     /**
-     * Returns an AvroIO.Write PTransform that writes Avro file(s)
+     * Returns a {@link PTransform} that writes Avro file(s)
      * containing records whose type is the specified Avro-generated class.
      *
      * @param <T> the type of the elements of the input PCollection
@@ -420,7 +431,7 @@ public static <T> Bound<T> withSchema(Class<T> type) {
     }
 
     /**
-     * Returns an AvroIO.Write PTransform that writes Avro file(s)
+     * Returns a {@link PTransform} that writes Avro file(s)
      * containing records of the specified schema.
      */
     public static Bound<GenericRecord> withSchema(Schema schema) {
@@ -428,7 +439,7 @@ public static Bound<GenericRecord> withSchema(Schema schema) {
     }
 
     /**
-     * Returns an AvroIO.Write PTransform that writes Avro file(s)
+     * Returns a {@link PTransform} that writes Avro file(s)
      * containing records of the specified schema in a JSON-encoded
      * string form.
      */
@@ -437,7 +448,7 @@ public static Bound<GenericRecord> withSchema(String schema) {
     }
 
     /**
-     * Returns a AvroIO.Write PTransform that has GCS path validation on
+     * Returns a {@link PTransform} that writes Avro file(s) that has GCS path validation on
      * pipeline creation disabled.
      *
      * <p>This can be useful in the case where the GCS output location does
@@ -460,7 +471,7 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       final String filenamePrefix;
       /** Suffix to use for each filename. */
       final String filenameSuffix;
-      /** Requested number of shards.  0 for automatic. */
+      /** Requested number of shards. 0 for automatic. */
       final int numShards;
       /** Shard template string. */
       final String shardTemplate;
@@ -493,8 +504,10 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       }
 
       /**
-       * Returns a new AvroIO.Write PTransform that's like this one but
-       * with the given step name.  Does not modify this object.
+       * Returns a new {@link PTransform} that's like this one but
+       * with the given step name.
+       *
+       * <p>Does not modify this object.
        */
       public Bound<T> named(String name) {
         return new Bound<>(
@@ -503,10 +516,11 @@ public Bound<T> named(String name) {
       }
 
       /**
-       * Returns a new AvroIO.Write PTransform that's like this one but
+       * Returns a new {@link PTransform} that's like this one but
        * that writes to the file(s) with the given filename prefix.
        *
-       * <p>See {@link AvroIO.Write#to(String)} for more information.
+       * <p>See {@link AvroIO.Write#to(String)} for more information
+       * about filenames.
        *
        * <p>Does not modify this object.
        */
@@ -518,12 +532,12 @@ public Bound<T> to(String filenamePrefix) {
       }
 
       /**
-       * Returns a new AvroIO.Write PTransform that's like this one but
+       * Returns a new {@link PTransform} that's like this one but
        * that writes to the file(s) with the given filename suffix.
        *
-       * <p>Does not modify this object.
+       * <p>See {@link ShardNameTemplate} for a description of shard templates.
        *
-       * @see ShardNameTemplate
+       * <p>Does not modify this object.
        */
       public Bound<T> withSuffix(String filenameSuffix) {
         validateOutputComponent(filenameSuffix);
@@ -533,11 +547,11 @@ public Bound<T> withSuffix(String filenameSuffix) {
       }
 
       /**
-       * Returns a new AvroIO.Write PTransform that's like this one but
+       * Returns a new {@link PTransform} that's like this one but
        * that uses the provided shard count.
        *
        * <p>Constraining the number of shards is likely to reduce
-       * the performance of a pipeline.  Setting this value is not recommended
+       * the performance of a pipeline. Setting this value is not recommended
        * unless you require a specific number of output files.
        *
        * <p>Does not modify this object.
@@ -551,14 +565,14 @@ public Bound<T> withNumShards(int numShards) {
       }
 
       /**
-       * Returns a new AvroIO.Write PTransform that's like this one but
+       * Returns a new {@link PTransform} that's like this one but
        * that uses the provided shard count.
        *
        * <p>Constraining the number of shards is likely to reduce
-       * the performance of a pipeline.  If forceReshard is true, the output
-       * will be shuffled to obtain the desired sharding.  If it is false,
+       * the performance of a pipeline. If forceReshard is true, the output
+       * will be shuffled to obtain the desired sharding. If it is false,
        * data will not be reshuffled, but parallelism of preceeding stages
-       * may be constrained.  Setting this value is not recommended
+       * may be constrained. Setting this value is not recommended
        * unless you require a specific number of output files.
        *
        * <p>Does not modify this object.
@@ -576,7 +590,7 @@ private Bound<T> withNumShards(int numShards, boolean forceReshard) {
       }
 
       /**
-       * Returns a new AvroIO.Write PTransform that's like this one but
+       * Returns a new {@link PTransform} that's like this one but
        * that uses the given shard name template.
        *
        * <p>Does not modify this object.
@@ -590,7 +604,7 @@ public Bound<T> withShardNameTemplate(String shardTemplate) {
       }
 
       /**
-       * Returns a new AvroIO.Write PTransform that's like this one but
+       * Returns a new {@link PTransform} that's like this one but
        * that forces a single file as output.
        *
        * <p>This is a shortcut for
@@ -603,7 +617,7 @@ public Bound<T> withoutSharding() {
       }
 
       /**
-       * Returns a new AvroIO.Write PTransform that's like this one but
+       * Returns a new {@link PTransform} that's like this one but
        * that forces a single file as output.
        *
        * <p>This is a shortcut for
@@ -619,9 +633,11 @@ private Bound<T> withoutSharding(boolean forceReshard) {
       }
 
       /**
-       * Returns a new AvroIO.Write PTransform that's like this one but
+       * Returns a new {@link PTransform} that's like this one but
        * that writes to Avro file(s) containing records whose type is the
-       * specified Avro-generated class.  Does not modify this object.
+       * specified Avro-generated class.
+       *
+       * <p>Does not modify this object.
        *
        * @param <X> the type of the elements of the input PCollection
        */
@@ -631,9 +647,11 @@ public <X> Bound<X> withSchema(Class<X> type) {
       }
 
       /**
-       * Returns a new AvroIO.Write PTransform that's like this one but
+       * Returns a new {@link PTransform} that's like this one but
        * that writes to Avro file(s) containing records of the specified
-       * schema.  Does not modify this object.
+       * schema.
+       *
+       * <p>Does not modify this object.
        */
       public Bound<GenericRecord> withSchema(Schema schema) {
         return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
@@ -641,18 +659,21 @@ public Bound<GenericRecord> withSchema(Schema schema) {
       }
 
       /**
-       * Returns a new AvroIO.Write PTransform that's like this one but
+       * Returns a new {@link PTransform} that's like this one but
        * that writes to Avro file(s) containing records of the specified
-       * schema in a JSON-encoded string form.  Does not modify this object.
+       * schema in a JSON-encoded string form.
+       *
+       * <p>Does not modify this object.
        */
       public Bound<GenericRecord> withSchema(String schema) {
         return withSchema((new Schema.Parser()).parse(schema));
       }
 
       /**
-       * Returns a new TextIO.Write PTransform that's like this one but
+       * Returns a new {@link PTransform} that's like this one but
        * that has GCS output path validation on pipeline creation disabled.
-       * Does not modify this object.
+       *
+       * <p>Does not modify this object.
        *
        * <p>This can be useful in the case where the GCS output location does
        * not exist at the pipeline creation time, but is expected to be
@@ -739,6 +760,9 @@ public void evaluate(
             Bound.class, transformEvaluator);
       }
     }
+
+    /** Disallow construction of utility class. */
+    private Write() {}
   }
 
   // Pattern which matches old-style shard output patterns, which are now
@@ -754,6 +778,9 @@ private static void validateOutputComponent(String partialFilePattern) {
 
   /////////////////////////////////////////////////////////////////////////////
 
+  /** Disallow construction of utility class. */
+  private AvroIO() {}
+
   private static <T> void evaluateReadHelper(
       Read.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
     AvroReader<T> reader = new AvroReader<>(transform.filepattern, null, null,
@@ -772,7 +799,7 @@ private static <T> void evaluateWriteHelper(
         context.getPCollectionWindowedValues(context.getInput(transform));
     int numShards = transform.numShards;
     if (numShards < 1) {
-      // System gets to choose.  For direct mode, choose 1.
+      // System gets to choose. For direct mode, choose 1.
       numShards = 1;
     }
     AvroSink<T> writer = new AvroSink<>(transform.filenamePrefix, transform.shardTemplate,

From 1b8066048934713690694d79d08fc109c187391d Mon Sep 17 00:00:00 2001
From: zhouyunqing <zhouyunqing@google.com>
Date: Tue, 17 Nov 2015 11:58:54 -0800
Subject: [PATCH 1172/1541] Reduce the usage of stateByName() in MTE and adding
 more tests.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108064150
---
 .../sdk/util/common/worker/StateSampler.java  |  6 ++--
 .../util/common/worker/StateSamplerTest.java  | 29 +++++++++++++++++++
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
index 1f71bd17e4aec..742c86b120327 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
@@ -203,14 +203,14 @@ public int stateForName(String name, StateKind kind) {
       return DO_NOT_SAMPLE;
     }
 
-    String counterName = prefix + name + "-msecs";
     synchronized (this) {
-      Integer state = statesByName.get(counterName);
+      Integer state = statesByName.get(name);
       if (state == null) {
+        String counterName = prefix + name + "-msecs";
         Counter<Long> counter = counterSetMutator.addCounter(
             Counter.longs(counterName, Counter.AggregationKind.SUM));
         state = countersByState.size();
-        statesByName.put(counterName, state);
+        statesByName.put(name, state);
         countersByState.add(counter);
         kindsByState.put(state, kind);
       }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
index 90e911a9ab1be..8291b73021eaa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
@@ -16,12 +16,14 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.SamplingCallback;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.ScopedState;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
 
 import org.hamcrest.Matchers;
@@ -165,4 +167,31 @@ public void noSamplingAfterCloseTest() throws Exception {
       noSamplingAfterCloseTestOnce();
     }
   }
+
+  @Test
+  public void reuseStateByNameTest() throws Exception {
+    StateSampler stateSampler = new StateSampler("test-",
+        new CounterSet().getAddCounterMutator(), 200);
+    int state1 = stateSampler.stateForName("test_state", StateKind.USER);
+    int state2 = stateSampler.stateForName("test_state", StateKind.USER);
+    assertEquals(state1, state2);
+    stateSampler.close();
+  }
+
+  @Test
+  public void reuseManyStatesByName() throws Exception {
+    // Issue big number of stateForName() and setState calls to StateSampler.
+    CounterSet counters = new CounterSet();
+    StateSampler stateSampler = new StateSampler("test-",
+        counters.getAddCounterMutator(), 200);
+    for (int i = 0; i < 10000; i++) {
+      for (int j = 0; j < 10000; j++) {
+        int state = stateSampler.stateForName("state" + j, StateKind.USER);
+        try (ScopedState scope = stateSampler.scopedState(state)) {}
+      }
+    }
+    // All counters got reused.
+    assertEquals(10000, counters.size());
+    stateSampler.close();
+  }
 }

From 9cfdd21a4ad31f47268d47a6d97db28eec273876 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 17 Nov 2015 12:29:31 -0800
Subject: [PATCH 1173/1541] AvroSource: improve Javadoc

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108067235
---
 .../cloud/dataflow/sdk/io/AvroSource.java     | 85 +++++++++++--------
 1 file changed, 48 insertions(+), 37 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
index 84d81c4909efd..421b9b365f031 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
@@ -19,11 +19,13 @@
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.Preconditions;
 
 import org.apache.avro.Schema;
+import org.apache.avro.file.CodecFactory;
 import org.apache.avro.file.DataFileConstants;
 import org.apache.avro.generic.GenericDatumReader;
 import org.apache.avro.generic.GenericRecord;
@@ -50,22 +52,22 @@
 
 // JAVADOCSTYLE OFF
 /**
- * A {@code FileBasedSource} for reading Avro-format files.
+ * A {@link FileBasedSource} for reading Avro files.
  *
  * <p>To read a {@link PCollection} of objects from one or more Avro files, use
- * {@link AvroSource#from} to specify the path(s) of the files to read. The {@code AvroSource} that
- * is returned will read objects of type {@code GenericRecord} with the schema(s) that were written
- * at file creation. To further configure the {@code AvroSource} to read with a user-defined schema,
- * or to return records of a type other than {@code GenericRecord}, use
- * {@link AvroSource#withSchema(Schema)} (using a {@code Schema} object),
+ * {@link AvroSource#from} to specify the path(s) of the files to read. The {@link AvroSource} that
+ * is returned will read objects of type {@link GenericRecord} with the schema(s) that were written
+ * at file creation. To further configure the {@link AvroSource} to read with a user-defined schema,
+ * or to return records of a type other than {@link GenericRecord}, use
+ * {@link AvroSource#withSchema(Schema)} (using an Avro {@link Schema}),
  * {@link AvroSource#withSchema(String)} (using a JSON schema), or
- * {@link AvroSource#withSchema(Class)} (to return objects of the class specified).
+ * {@link AvroSource#withSchema(Class)} (to return objects of the Avro-generated class specified).
  *
- * <p>An {@code AvroSource} can be read from using the {@link Read} transform. For example:
+ * <p>An {@link AvroSource} can be read from using the {@link Read} transform. For example:
  *
  * <pre>
  * {@code
- * AvroSource<T> source = AvroSource.from(file.toPath()).withSchema(MyType.class);
+ * AvroSource<MyType> source = AvroSource.from(file.toPath()).withSchema(MyType.class);
  * PCollection<MyType> records = Read.from(mySource);
  * }
  * </pre>
@@ -90,31 +92,29 @@
  * records. Blocks may be encoded (e.g., with bzip2, deflate, snappy, etc.). Blocks are delineated
  * from one another by a 16-byte sync marker.
  *
- * <p>An {@code AvroSource} for a subrange of a single file contains records in the blocks such that
+ * <p>An {@link AvroSource} for a subrange of a single file contains records in the blocks such that
  * the start offset of the block is greater than or equal to the start offset of the source and less
  * than the end offset of the source.
  *
  * <p>To use XZ-encoded Avro files, please include an explicit dependency on {@code xz-1.5.jar},
  * which has been marked as optional in the Maven {@code sdk/pom.xml} for Google Cloud Dataflow:
- * <pre>
- * {@code
+ *
+ * <pre>{@code
  * <dependency>
  *   <groupId>org.tukaani</groupId>
  *   <artifactId>xz</artifactId>
  *   <version>1.5</version>
  * </dependency>
- * }
- * </pre>
+ * }</pre>
  *
- * @param <T> The type of records to be read from the source.
+ * <h3>Permissions</h3>
+ * <p>Permission requirements depend on the {@link PipelineRunner} that is used to execute the
+ * Dataflow job. Please refer to the documentation of corresponding {@link PipelineRunner}s for
+ * more details.
  *
- * <p><h3>Permissions</h3>
- * Permission requirements depend on the
- * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner PipelineRunner} that is
- * used to execute the Dataflow job. Please refer to the documentation of corresponding
- * {@code PipelineRunner}s for more details.
+ * @param <T> The type of records to be read from the source.
  */
-// JAVADOCSTYLE ON
+//JAVADOCSTYLE ON
 @Experimental(Experimental.Kind.SOURCE_SINK)
 public class AvroSource<T> extends BlockBasedSource<T> {
   // Default minimum bundle size (chosen as two default-size Avro blocks to attempt to
@@ -152,7 +152,7 @@ public class AvroSource<T> extends BlockBasedSource<T> {
   private transient Schema readSchema;
 
   /**
-   * Creates a {@code Read} transform that will read from an {@code AvroSource} that is configured
+   * Creates a {@link Read} transform that will read from an {@link AvroSource} that is configured
    * to read records of the given type from a file pattern.
    */
   public static <T> Read.Bounded<T> readFromFileWithClass(String filePattern, Class<T> clazz) {
@@ -161,9 +161,9 @@ public static <T> Read.Bounded<T> readFromFileWithClass(String filePattern, Clas
   }
 
   /**
-   * Creates an {@code AvroSource} that reads from the given file name or pattern ("glob"). The
-   * returned source can be further configured by calling {@code withSchema} to return a type other
-   * than {@code GenericRecord}.
+   * Creates an {@link AvroSource} that reads from the given file name or pattern ("glob"). The
+   * returned source can be further configured by calling {@link #withSchema} to return a type other
+   * than {@link GenericRecord}.
    */
   public static AvroSource<GenericRecord> from(String fileNameOrPattern) {
     return new AvroSource<>(
@@ -171,8 +171,10 @@ public static AvroSource<GenericRecord> from(String fileNameOrPattern) {
   }
 
   /**
-   * Returns an {@code AvroSource} that's like this one but reads files containing records that
+   * Returns an {@link AvroSource} that's like this one but reads files containing records that
    * conform to the given schema.
+   *
+   * <p>Does not modify this object.
    */
   public AvroSource<GenericRecord> withSchema(String schema) {
     return new AvroSource<>(
@@ -180,8 +182,10 @@ public AvroSource<GenericRecord> withSchema(String schema) {
   }
 
   /**
-   * Returns an {@code AvroSource} that's like this one but reads files containing records that
+   * Returns an {@link AvroSource} that's like this one but reads files containing records that
    * conform to the given schema.
+   *
+   * <p>Does not modify this object.
    */
   public AvroSource<GenericRecord> withSchema(Schema schema) {
     return new AvroSource<>(getFileOrPatternSpec(), getMinBundleSize(), schema.toString(),
@@ -189,8 +193,10 @@ public AvroSource<GenericRecord> withSchema(Schema schema) {
   }
 
   /**
-   * Returns an {@code AvroSource} that's like this one but reads files containing records of the
+   * Returns an {@link AvroSource} that's like this one but reads files containing records of the
    * type of the given class.
+   *
+   * <p>Does not modify this object.
    */
   public <X> AvroSource<X> withSchema(Class<X> clazz) {
     return new AvroSource<X>(getFileOrPatternSpec(), getMinBundleSize(),
@@ -198,8 +204,10 @@ public <X> AvroSource<X> withSchema(Class<X> clazz) {
   }
 
   /**
-   * Returns an {@code AvroSource} that's like this one but uses the supplied minimum bundle size.
+   * Returns an {@link AvroSource} that's like this one but uses the supplied minimum bundle size.
    * Refer to {@link OffsetBasedSource} for a description of {@code minBundleSize} and its use.
+   *
+   * <p>Does not modify this object.
    */
   public AvroSource<T> withMinBundleSize(long minBundleSize) {
     return new AvroSource<T>(
@@ -249,7 +257,7 @@ public Metadata(byte[] syncMarker, String codec, String schema) {
   }
 
   /**
-   * Reads the {@code Metadata} from the header of an Avro file. Throws an IOException if the file
+   * Reads the {@link Metadata} from the header of an Avro file. Throws an IOException if the file
    * is an invalid format.
    *
    * <p>This method parses the header of an Avro
@@ -413,6 +421,7 @@ private DatumReader<T> createDatumReader() {
 
   /**
    * A {@link BlockBasedSource.Block} of Avro records.
+   *
    * @param <T> The type of records stored in the block.
    */
   @Experimental(Experimental.Kind.SOURCE_SINK)
@@ -436,10 +445,9 @@ static class AvroBlock<T> extends Block<T> {
      * Decodes a byte array as an InputStream. The byte array may be compressed using some
      * codec. Reads from the returned stream will result in decompressed bytes.
      *
-     * <p>This supports the same codecs as Avro's {@code CodecFactory}, namely those defined in
-     * <a
-     * href="https://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/file/DataFileConstants.html">
-     * {@code DataFileConstants}</a>.
+     * <p>This supports the same codecs as Avro's {@link CodecFactory}, namely those defined in
+     * {@link DataFileConstants}.
+     *
      * <ul>
      * <li>"snappy" : Google's Snappy compression
      * <li>"deflate" : deflate compression
@@ -535,6 +543,9 @@ public static class AvroReader<T> extends BlockBasedReader<T> {
     // Decoder to decode binary-encoded values from the buffer.
     private BinaryDecoder decoder;
 
+    /**
+     * Reads Avro records of type {@code T} from the specified source.
+     */
     public AvroReader(AvroSource<T> source) {
       super(source);
     }
@@ -608,7 +619,7 @@ public long getCurrentBlockSize() {
     }
 
     /**
-     * Creates a {@code PushbackInputStream} that has a large enough pushback buffer to be able
+     * Creates a {@link PushbackInputStream} that has a large enough pushback buffer to be able
      * to push back the syncBuffer and the readBuffer.
      */
     private PushbackInputStream createStream(ReadableByteChannel channel) {
@@ -661,7 +672,7 @@ static long advancePastNextSyncMarker(PushbackInputStream stream, byte[] syncMar
     }
 
     /**
-     * A {@code Seeker} looks for a given marker within a byte buffer. Uses naive string matching
+     * A {@link Seeker} looks for a given marker within a byte buffer. Uses naive string matching
      * with a sliding window, as sync markers are small and random.
      */
     static class Seeker {
@@ -675,7 +686,7 @@ static class Seeker {
       private int available = 0;
 
       /**
-       * Create a {@code Seeker} that looks for the given marker.
+       * Create a {@link Seeker} that looks for the given marker.
        */
       public Seeker(byte[] marker) {
         this.marker = marker;

From 2e1d4f2d65c1d658c4ff783d895e326116cb86a0 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 17 Nov 2015 14:09:40 -0800
Subject: [PATCH 1174/1541] Javadoc improvements

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108077373
---
 .../dataflow/sdk/coders/AtomicCoder.java      |  7 +-
 .../cloud/dataflow/sdk/coders/AvroCoder.java  | 31 ++++----
 .../sdk/coders/BigEndianIntegerCoder.java     | 16 ++++-
 .../sdk/coders/BigEndianLongCoder.java        | 16 ++++-
 .../dataflow/sdk/coders/ByteArrayCoder.java   | 22 ++++--
 .../coders/CannotProvideCoderException.java   |  9 +++
 .../cloud/dataflow/sdk/coders/Coder.java      | 35 ++++++----
 .../dataflow/sdk/coders/CoderException.java   |  3 +-
 .../dataflow/sdk/coders/CoderFactory.java     |  6 +-
 .../dataflow/sdk/coders/CoderProvider.java    |  2 +-
 .../dataflow/sdk/coders/CoderProviders.java   |  6 +-
 .../dataflow/sdk/coders/CoderRegistry.java    | 28 +++++---
 .../dataflow/sdk/coders/CollectionCoder.java  | 11 ++-
 .../dataflow/sdk/coders/CustomCoder.java      | 22 +++++-
 .../dataflow/sdk/coders/DefaultCoder.java     | 41 +++++------
 .../dataflow/sdk/coders/DelegateCoder.java    | 32 ++++++---
 .../coders/DeterministicStandardCoder.java    | 10 +--
 .../dataflow/sdk/coders/DoubleCoder.java      | 25 +++++--
 .../dataflow/sdk/coders/DurationCoder.java    | 15 +++-
 .../dataflow/sdk/coders/EntityCoder.java      |  9 ++-
 .../dataflow/sdk/coders/InstantCoder.java     | 15 +++-
 .../dataflow/sdk/coders/IterableCoder.java    |  7 +-
 .../sdk/coders/IterableLikeCoder.java         | 39 +++++++----
 .../cloud/dataflow/sdk/coders/ListCoder.java  |  4 +-
 .../cloud/dataflow/sdk/coders/MapCoder.java   | 20 +++---
 .../dataflow/sdk/coders/NullableCoder.java    |  4 +-
 .../dataflow/sdk/coders/Proto2Coder.java      |  4 +-
 .../sdk/coders/SerializableCoder.java         | 17 +++--
 .../cloud/dataflow/sdk/coders/SetCoder.java   | 20 +++---
 .../dataflow/sdk/coders/StandardCoder.java    | 70 ++++++++++---------
 .../sdk/coders/StringDelegateCoder.java       |  6 +-
 .../dataflow/sdk/coders/StringUtf8Coder.java  | 20 ++++--
 .../sdk/coders/TableRowJsonCoder.java         |  8 ++-
 .../sdk/coders/TextualIntegerCoder.java       |  7 +-
 .../dataflow/sdk/coders/VarIntCoder.java      | 11 ++-
 .../dataflow/sdk/coders/VarLongCoder.java     | 14 ++--
 .../cloud/dataflow/sdk/coders/VoidCoder.java  | 11 ++-
 .../dataflow/sdk/coders/package-info.java     |  2 +-
 38 files changed, 410 insertions(+), 215 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
index 0c34ec0662d8d..c4951b40041d7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
@@ -20,8 +20,7 @@
 import java.util.List;
 
 /**
- * An {@code AtomicCoder} is a {@link Coder} that has no component {@link Coder Coders} or other
- * state.
+ * A {@link Coder} that has no component {@link Coder Coders} or other state.
  *
  * <p>Note that, unless the behavior is overridden, atomic coders are presumed to be deterministic
  * and all instances are considered equal.
@@ -39,7 +38,9 @@ public List<Coder<?>> getCoderArguments() {
   /**
    * Returns a list of values contained in the provided example
    * value, one per type parameter. If there are no type parameters,
-   * returns the empty list.
+   * returns an empty list.
+   *
+   * <p>Because {@link AtomicCoder} has no components, always returns an empty list.
    *
    * @param exampleValue unused, but part of the latent interface expected by
    * {@link CoderFactories#fromStaticMethods}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
index 081a752687cd8..0afbe98fd2064 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -65,22 +65,19 @@
 /**
  * A {@link Coder} using Avro binary format.
  *
+ * <p>Each instance of {@code AvroCoder<T>} encapsulates an Avro schema for objects of type
+ * {@code T}.
  *
- * <p>The Avro schema is generated using reflection on the element type, using
- * Avro's <a href="http://avro.apache.org/docs/current/api/java/index.html">
- * org.apache.avro.reflect.ReflectData</a>,
- * and encoded as part of the {@code Coder} instance.
+ * <p>The Avro schema may be provided explicitly via {@link AvroCoder#of(Class, Schema)} or
+ * omitted via {@link AvroCoder#of(Class)}, in which case it will be inferred
+ * using Avro's {@link org.apache.avro.reflect.ReflectData}.
  *
  * <p>For complete details about schema generation and how it can be controlled please see
- * the <a href="http://avro.apache.org/docs/current/api/java/index.html">
- * org.apache.avro.reflect package</a>.
+ * the {@link org.apache.avro.reflect} package.
  * Only concrete classes with a no-argument constructor can be mapped to Avro records.
- * All inherited fields that are not static or transient are used. Fields are not permitted to be
- * null unless annotated by
- * <a href="http://avro.apache.org/docs/current/api/java/org/apache/avro/reflect/Nullable.html">
- * org.apache.avro.reflect.Nullable</a> or a
- * <a href="http://avro.apache.org/docs/current/api/java/org/apache/avro/reflect/Union.html">
- * org.apache.avro.reflect.Union</a> containing null.
+ * All inherited fields that are not static or transient are included. Fields are not permitted to
+ * be null unless annotated by {@link Nullable} or a {@link Union} schema
+ * containing {@code "null"}.
  *
  * <p>To use, specify the {@code Coder} type on a PCollection:
  * <pre>
@@ -263,9 +260,9 @@ public CloudObject asCloudObject() {
   }
 
   /**
-   * Raises an exception describing reasons why the type may not be deterministically
-   * encoded using the given Schema, the directBinaryEncoder, and the ReflectDatumWriter
-   * or GenericDatumWriter.
+   * @throws NonDeterministicException when the type may not be deterministically
+   * encoded using the given {@link Schema}, the {@code directBinaryEncoder}, and the
+   * {@link ReflectDatumWriter} or {@link GenericDatumWriter}.
    */
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
@@ -313,7 +310,7 @@ public Schema getSchema() {
   }
 
   /**
-   * Proxy to use in place of serializing the AvroCoder. This allows the fields
+   * Proxy to use in place of serializing the {@link AvroCoder}. This allows the fields
    * to remain final.
    */
   private static class SerializedAvroCoderProxy<T> implements Serializable {
@@ -337,7 +334,7 @@ private Object readResolve() {
    * Helper class encapsulating the various pieces of state maintained by the
    * recursive walk used for checking if the encoding will be deterministic.
    */
-  protected static class AvroDeterminismChecker {
+  private static class AvroDeterminismChecker {
 
     // Reasons that the original type are not deterministic. This accumulates
     // the actual output.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
index 9fd69bb25ab4d..24f6a45433add 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
@@ -27,7 +27,7 @@
 import java.io.UTFDataFormatException;
 
 /**
- * A {@code BigEndianIntegerCoder} encodes {@code Integer}s in 4 bytes, big-endian.
+ * A {@link BigEndianIntegerCoder} encodes {@link Integer Integers} in 4 bytes, big-endian.
  */
 public class BigEndianIntegerCoder extends AtomicCoder<Integer> {
 
@@ -63,19 +63,31 @@ public Integer decode(InputStream inStream, Context context)
     }
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return {@code true}. This coder is injective.
+   */
   @Override
   public boolean consistentWithEquals() {
     return true;
   }
 
   /**
-   * Returns true since registerByteSizeObserver() runs in constant time.
+   * {@inheritDoc}
+   *
+   * @return {@code true}, because {@link #getEncodedElementByteSize} runs in constant time.
    */
   @Override
   public boolean isRegisterByteSizeObserverCheap(Integer value, Context context) {
     return true;
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return {@code 4}, the size in bytes of an integer's big endian encoding.
+   */
   @Override
   protected long getEncodedElementByteSize(Integer value, Context context)
       throws Exception {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
index 75ece5de6f9aa..4196608b54e45 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
@@ -27,7 +27,7 @@
 import java.io.UTFDataFormatException;
 
 /**
- * A {@code BigEndianLongCoder} encodes {@code Long}s in 8 bytes, big-endian.
+ * A {@link BigEndianLongCoder} encodes {@link Long}s in 8 bytes, big-endian.
  */
 public class BigEndianLongCoder extends AtomicCoder<Long> {
 
@@ -63,19 +63,31 @@ public Long decode(InputStream inStream, Context context)
     }
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return {@code true}. This coder is injective.
+   */
   @Override
   public boolean consistentWithEquals() {
     return true;
   }
 
   /**
-   * Returns true since registerByteSizeObserver() runs in constant time.
+   * {@inheritDoc}
+   *
+   * @return {@code true}, since {@link #getEncodedElementByteSize} returns a constant.
    */
   @Override
   public boolean isRegisterByteSizeObserverCheap(Long value, Context context) {
     return true;
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return {@code 8}, the byte size of a big-endian encoded {@code Long}.
+   */
   @Override
   protected long getEncodedElementByteSize(Long value, Context context)
       throws Exception {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
index 028b621819692..13c0e0ab7074b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
 import com.google.cloud.dataflow.sdk.util.ExposedByteArrayOutputStream;
 import com.google.cloud.dataflow.sdk.util.StreamUtils;
 import com.google.cloud.dataflow.sdk.util.VarInt;
@@ -29,10 +30,15 @@
 import java.io.OutputStream;
 
 /**
- * A {@code ByteArrayCoder} encodes {@code byte[]} objects.
+ * A {@link Coder} for {@code byte[]}.
  *
- * <p>If in a nested context, prefixes the encoded array with its
- * length, encoded via a {@link VarIntCoder}.
+ * <p>The encoding format is as follows:
+ * <ul>
+ * <li>If in a non-nested context (the {@code byte[]} is the only value in the stream), the
+ * bytes are read/written directly.</li>
+ * <li>If in a nested context, the bytes are prefixed with the length of the array,
+ * encoded via a {@link VarIntCoder}.</li>
+ * </ul>
  */
 public class ByteArrayCoder extends AtomicCoder<byte[]> {
 
@@ -98,13 +104,21 @@ public byte[] decode(InputStream inStream, Context context)
     }
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return objects that are equal if the two arrays contain the same bytes.
+   */
   @Override
   public Object structuralValue(byte[] value) {
     return new StructuralByteArray(value);
   }
 
   /**
-   * Returns true since registerByteSizeObserver() runs in constant time.
+   * {@inheritDoc}
+   *
+   * @return {@code true} since {@link #getEncodedElementByteSize()} runs in constant time using
+   * the {@code length} of the provided array.
    */
   @Override
   public boolean isRegisterByteSizeObserverCheap(byte[] value, Context context) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CannotProvideCoderException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CannotProvideCoderException.java
index 9ad8319d64fa0..97b5e238b0c4a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CannotProvideCoderException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CannotProvideCoderException.java
@@ -80,7 +80,16 @@ public Throwable getRootCause() {
    * Indicates the reason that {@link Coder} inference failed.
    */
   public static enum ReasonCode {
+    /**
+     * The reason a coder could not be provided is unknown or does have an established
+     * {@link ReasonCode}.
+     */
     UNKNOWN,
+
+    /**
+     * The reason a coder could not be provided is type erasure, for example when requesting
+     * coder inference for a {@code List<T>} where {@code T} is unknown.
+     */
     TYPE_ERASURE
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
index 5d11f8e89301f..75847219058af 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
@@ -30,40 +30,46 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
+import java.util.Objects;
 
 import javax.annotation.Nullable;
 
 /**
- * A {@code Coder<T>} defines how to encode and decode values of type {@code T} into byte streams.
- *
- * <p>All methods of a {@link Coder} are required to be thread safe.
+ * A {@link Coder<T>} defines how to encode and decode values of type {@code T} into byte streams.
  *
  * <p>{@link Coder} instances are serialized during job creation and deserialized
- * before use, via JSON serialization.
- *
- * <p>See {@link SerializableCoder} for an example of a {@code Coder} that adds a custom field to
+ * before use, via JSON serialization. See {@link SerializableCoder} for an example of a
+ * {@link Coder} that adds a custom field to
  * the {@link Coder} serialization. It provides a constructor annotated with
  * {@link com.fasterxml.jackson.annotation.JsonCreator}, which is a factory method used when
  * deserializing a {@link Coder} instance.
  *
- * <p>See {@link KvCoder} for an example of a nested {@code Coder} type.
+ * <p>{@link Coder} classes for compound types are often composed from coder classes for types
+ * contains therein. The composition of {@link Coder} instances into a coder for the compound
+ * class is the subject of the {@link CoderFactory} type, which enables automatic generic
+ * composition of {@link Coder} classes within the {@link CoderRegistry}. With particular
+ * static methods on a compound {@link Coder} class, a {@link CoderFactory} can be automatically
+ * inferred. See {@link KvCoder} for an example of a simple compound {@link Coder} that supports
+ * automatic composition in the {@link CoderRegistry}.
  *
  * <p>The binary format of a {@link Coder} is identified by {@link #getEncodingId()}; be sure to
  * understand the requirements for evolving coder formats.
  *
+ * <p>All methods of a {@link Coder} are required to be thread safe.
+ *
  * @param <T> the type of the values being transcoded
  */
 public interface Coder<T> extends Serializable {
   /** The context in which encoding or decoding is being done. */
   public static class Context {
     /**
-     * The outer context.  The value being encoded or decoded takes
-     * up the remainder of the whole record/stream contents.
+     * The outer context: the value being encoded or decoded takes
+     * up the remainder of the record/stream contents.
      */
     public static final Context OUTER = new Context(true);
 
     /**
-     * The nested context.  The value being encoded or decoded is
+     * The nested context: the value being encoded or decoded is
      * (potentially) a part of a larger record/stream contents, and
      * may have other parts encoded or decoded after it.
      */
@@ -141,9 +147,10 @@ public T decode(InputStream inStream, Context context)
   public void verifyDeterministic() throws Coder.NonDeterministicException;
 
   /**
-   * Returns true if whenever the encoded bytes of two values are equal, then the original values
-   * are equal according to {@code Objects.equals()} (note that this is well-defined for
-   * {@code null}). In other words, encoding is injective.
+   * Returns {@code true} if this {@link Coder} is injective with respect to {@link Objects#equals}.
+   *
+   * <p>Whenever the encoded bytes of two values are equal, then the original values are equal
+   * according to {@code Objects.equals()}. Note that this is well-defined for {@code null}.
    *
    * <p>This condition is most notably false for arrays. More generally, this condition is false
    * whenever {@code equals()} compares object identity, rather than performing a
@@ -227,7 +234,7 @@ public void registerByteSizeObserver(
 
   /**
    * Exception thrown by {@link Coder#verifyDeterministic()} if the encoding is
-   * not deterministic.
+   * not deterministic, including details of why the encoding is not deterministic.
    */
   public static class NonDeterministicException extends Throwable {
     private Coder<?> coder;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java
index f9565afa80af0..8ff8571e5c271 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java
@@ -19,8 +19,7 @@
 import java.io.IOException;
 
 /**
- * A {@code CoderException} is thrown if there is a problem encoding or
- * decoding a value.
+ * An {@link Exception} thrown if there is a problem encoding or decoding a value.
  */
 public class CoderException extends IOException {
   public CoderException(String message) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactory.java
index f7df53ea1f67f..541256c97814e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactory.java
@@ -19,9 +19,9 @@
 import java.util.List;
 
 /**
- * A {@code CoderFactory} creates coders and decomposes values.
- * It may operate on a parameterized type, such as {@code List},
- * in which case the {@code create} method accepts a list of
+ * A {@link CoderFactory} creates coders and decomposes values.
+ * It may operate on a parameterized type, such as {@link List},
+ * in which case the {@link #create} method accepts a list of
  * coders to use for the type parameters.
  */
 public interface CoderFactory {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProvider.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProvider.java
index 25543f85826a8..a3e6ec4e7cad2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProvider.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProvider.java
@@ -19,7 +19,7 @@
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 /**
- * A {@code CoderProvider} may create a {@link Coder} for
+ * A {@link CoderProvider} may create a {@link Coder} for
  * any concrete class.
  */
 public interface CoderProvider {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProviders.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProviders.java
index 00bb4543ba855..8b0aedd2b4462 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProviders.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProviders.java
@@ -28,8 +28,7 @@
 import java.util.List;
 
 /**
- * A {@code CoderProvider} may create a {@link Coder} for
- * any concrete class.
+ * Static utility methods for working with {@link CoderProvider CoderProviders}.
  */
 public final class CoderProviders {
 
@@ -38,7 +37,8 @@ private CoderProviders() { }
 
   /**
    * Creates a {@link CoderProvider} built from particular static methods of a class that
-   * implements {@link Coder}, particularly for use in {@link DefaultCoder} annotations.
+   * implements {@link Coder}. The requirements for this method are precisely the requirements
+   * for a {@link Coder} class to be usable with {@link DefaultCoder} annotations.
    *
    * <p>The class must have the following static method:
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index debf77b0a56a3..082344c113a9d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -48,19 +48,31 @@
 import javax.annotation.Nullable;
 
 /**
- * A {@code CoderRegistry} allows registering the default {@link Coder} to use for a Java class,
+ * A {@link CoderRegistry} allows registering the default {@link Coder} to use for a Java class,
  * and looking up and instantiating the default {@link Coder} for a Java type.
  *
- * <p>{@code CoderRegistry} uses the following mechanisms to determine a default {@link Coder} for a
+ * <p>{@link CoderRegistry} uses the following mechanisms to determine a default {@link Coder} for a
  * Java class, in order of precedence:
  * <ol>
- *   <li>Registration: A {@link Coder} class can be registered explicitly via
- *       {@link #registerCoder}.  Built-in types are registered via
- *       {@link #registerStandardCoders()}.
+ *   <li>Registration:
+ *     <ul>
+ *       <li>A {@link CoderFactory} can be registered to handle a particular class via
+ *           {@link #registerCoder(Class, CoderFactory)}.</li>
+ *       <li>A {@link Coder} class with the static methods to satisfy
+ *           {@link CoderFactories#fromStaticMethods} can be registered via
+ *           {@link #registerCoder(Class, Class)}.</li>
+ *       <li>Built-in types are registered via
+ *           {@link #registerStandardCoders()}.</li>
+ *     </ul>
  *   <li>Annotations: {@link DefaultCoder} can be used to annotate a type with
- *       the default {@code Coder} type.
- *   <li>Inheritance: {@code Serializable} objects are given a default
- *       {@code Coder} of {@link SerializableCoder}.
+ *       the default {@code Coder} type. The {@link Coder} class must satisfy the requirements
+ *       of {@link CoderProviders#fromStaticMethods}.
+ *   <li>Fallback: A fallback {@link CoderProvider} is used to attempt to provide a {@link Coder}
+ *       for any type. By default, this is {@link SerializableCoder#PROVIDER}, which can provide
+ *       a {@link Coder} for any type that is serializable via Java serialization. The fallback
+ *       {@link CoderProvider} can be get and set via {@link #getFallbackCoderProvider()}
+ *       and {@link #setFallbackCoderProvider}. Multiple fallbacks can be chained together using
+ *       {@link CoderProviders#firstOf}.
  * </ol>
  */
 public class CoderRegistry implements CoderProvider {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
index 5b0b69322f800..a028317e673da 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
@@ -26,9 +26,8 @@
 import java.util.List;
 
 /**
- * A {@code CollectionCoder} encodes {@code Collection}s.
- *
- * @param <T> the type of the elements of the Collections being transcoded
+ * A {@link CollectionCoder} encodes {@link Collection Collections} in the format
+ * of {@link IterableLikeCoder}.
  */
 public class CollectionCoder<T> extends IterableLikeCoder<T, Collection<T>> {
 
@@ -39,6 +38,12 @@ public static <T> CollectionCoder<T> of(Coder<T> elemCoder) {
   /////////////////////////////////////////////////////////////////////////////
   // Internal operations below here.
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return the decoded elements directly, since {@link List} is a subtype of
+   * {@link Collection}.
+   */
   @Override
   protected final Collection<T> decodeToIterable(List<T> decodedElements) {
     return decodedElements;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
index 6529fdc1b1dfa..f88cdd87cf677 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
@@ -33,7 +33,7 @@
 import java.util.Collection;
 
 /**
- * An abstract base class for writing {@link Coder}s that encodes itself via java
+ * An abstract base class for writing a {@link Coder} class that encodes itself via Java
  * serialization.
  *
  * <p>To complete an implementation, subclasses must implement {@link Coder#encode}
@@ -67,6 +67,11 @@ public static CustomCoder<?> of(
         type);
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return A thin {@link CloudObject} wrapping of the Java serialization of {@code this}.
+   */
   @Override
   public CloudObject asCloudObject() {
     // N.B. We use the CustomCoder class, not the derived class, since during
@@ -92,6 +97,12 @@ public CloudObject asCloudObject() {
     return result;
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @throws {@link NonDeterministicException}. A {@link CustomCoder} is presumed
+   * nondeterministic.
+   */
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
     throw new NonDeterministicException(this,
@@ -99,6 +110,15 @@ public void verifyDeterministic() throws NonDeterministicException {
         + " or they are presumed nondeterministic.");
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return The canonical class name for this coder. For stable data formats that are independent
+   *         of class name, it is recommended to override this method.
+   *
+   * @throws UnsupportedOperationException when an anonymous class is used, since they do not have
+   *         a stable canonical class name.
+   */
   @Override
   public String getEncodingId() {
     if (getClass().isAnonymousClass()) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java
index e1657e3806479..110579b64c386 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
 import java.lang.annotation.Documented;
 import java.lang.annotation.ElementType;
 import java.lang.annotation.Retention;
@@ -23,42 +25,37 @@
 import java.lang.annotation.Target;
 
 /**
- * The {@code DefaultCoder} annotation
+ * The {@link DefaultCoder} annotation
  * specifies a default {@link Coder} class to handle encoding and decoding
  * instances of the annotated class.
  *
- * <p>The specified {@code Coder} must implement a function with the following
- * signature:
- * <pre>{@code
- * public static Coder<T> of(Class<T> clazz) {...}
- * }</pre>
+ * <p>The specified {@link Coder} must satisfy the requirements of
+ * {@link CoderProviders#fromStaticMethods}. Two classes provided by the SDK that
+ * are intended for use with this annotation include {@link SerializableCoder}
+ * and {@link AvroCoder}.
  *
- * <p>For example, to configure the use of Java serialization as the default
+ * <p>To configure the use of Java serialization as the default
  * for a class, annotate the class to use
- * {@link com.google.cloud.dataflow.sdk.coders.SerializableCoder} as follows:the
+ * {@link SerializableCoder} as follows:
  *
- * <pre><code>
- * {@literal @}DefaultCoder(SerializableCoder.class)
- * public class MyCustomDataType {
+ * <pre><code>{@literal @}DefaultCoder(SerializableCoder.class)
+ * public class MyCustomDataType implements Serializable {
  *   // ...
- * }
- * </code></pre>
+ * }</code></pre>
  *
  * <p>Similarly, to configure the use of
- * {@link com.google.cloud.dataflow.sdk.coders.AvroCoder} as the default:
- * <pre><code>
- * {@literal @}DefaultCoder(AvroCoder.class)
+ * {@link AvroCoder} as the default:
+ * <pre><code>{@literal @}DefaultCoder(AvroCoder.class)
  * public class MyCustomDataType {
- *   public MyCustomDataType() {}   // Avro requires an empty constructor.
+ *   public MyCustomDataType() {}  // Avro requires an empty constructor.
  *   // ...
- * }
- * </code></pre>
+ * }</code></pre>
  *
  * <p>Coders specified explicitly via
- * {@link com.google.cloud.dataflow.sdk.values.PCollection#setCoder(Coder)
- *  PCollection.setCoder}
+ * {@link PCollection#setCoder}
  * take precedence, followed by Coders registered at runtime via
- * {@link CoderRegistry#registerCoder}.
+ * {@link CoderRegistry#registerCoder}. See {@link CoderRegistry} for a more detailed discussion
+ * of the precedence rules.
  */
 @Documented
 @Retention(RetentionPolicy.RUNTIME)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
index 941dede33bfa7..365f1d43a139b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
@@ -43,7 +43,7 @@
  */
 public class DelegateCoder<T, IntermediateT> extends CustomCoder<T> {
   /**
-   * A {@code CodingFunction<InputT, OutputT>} is a serializable function
+   * A {@link CodingFunction CodingFunction&ltInputT, OutputT&gt;} is a serializable function
    * from {@code InputT} to {@code OutputT} that
    * may throw any {@code Exception}.
    */
@@ -77,15 +77,24 @@ public Coder<IntermediateT> getCoder() {
   }
 
   /**
-   * A delegate coder is deterministic if the underlying coder is deterministic.
-   * For this to be safe, the intermediate {@code CodingFunction<T, IntermediateT>} must
-   * also be deterministic.
+   * {@inheritDoc}
+   *
+   * @throws NonDeterministicException when the underlying coder's {@code verifyDeterministic()}
+   *         throws a {@link NonDeterministicException}. For this to be safe, the intermediate
+   *         {@code CodingFunction<T, IntermediateT>} must also be deterministic.
    */
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
     coder.verifyDeterministic();
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return a structural for a value of type {@code T} obtained by first converting to
+   *         {@code IntermediateT} and then obtaining a structural value according to the underlying
+   *         coder.
+   */
   @Override
   public Object structuralValue(T value) throws Exception {
     return coder.structuralValue(toFn.apply(value));
@@ -97,17 +106,24 @@ public String toString() {
   }
 
   /**
-   * The encoding id for the binary format of the delegate coder is a combination of the underlying
-   * coder class and its encoding id.
+   * {@inheritDoc}
    *
-   * <p>Note that this omits any description of the coding functions. These should be modified with
-   * care.
+   * @return a {@link String} composed from the underlying coder class name and its encoding id.
+   *         Note that this omits any description of the coding functions. These should be modified
+   *         with care.
    */
   @Override
   public String getEncodingId() {
     return delegateEncodingId(coder.getClass(), coder.getEncodingId());
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return allowed encodings which are composed from the underlying coder class and its allowed
+   *         encoding ids. Note that this omits any description of the coding functions. These
+   *         should be modified with care.
+   */
   @Override
   public Collection<String> getAllowedEncodings() {
     List<String> allowedEncodings = Lists.newArrayList();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DeterministicStandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DeterministicStandardCoder.java
index 5cdd2e6ad71a7..2382279957151 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DeterministicStandardCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DeterministicStandardCoder.java
@@ -17,9 +17,9 @@
 package com.google.cloud.dataflow.sdk.coders;
 
 /**
- * A {@code DeterministicStandardCoder} is a {@link StandardCoder} that is
+ * A {@link DeterministicStandardCoder} is a {@link StandardCoder} that is
  * deterministic, in the sense that for objects considered equal
- * according to {@code Object.equals()}, the encoded bytes are
+ * according to {@link Object#equals()}, the encoded bytes are
  * also equal.
  *
  * @param <T> the type of the values being transcoded
@@ -28,8 +28,10 @@ public abstract class DeterministicStandardCoder<T> extends StandardCoder<T> {
   protected DeterministicStandardCoder() {}
 
   /**
-   * As a {@code DeterministicStandardCoder} is presumed deterministic, this
-   * method does nothing.
+   * {@inheritDoc}
+   *
+   * @throws nothing unless overridden.
+   *         A {@link DeterministicStandardCoder} is presumed deterministic.
    */
   @Override
   public void verifyDeterministic() throws NonDeterministicException { }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
index d7bb6cc52d987..68d58df102599 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
@@ -27,7 +27,7 @@
 import java.io.UTFDataFormatException;
 
 /**
- * A {@code DoubleCoder} encodes {@code Double} values in 8 bytes using Java serialization.
+ * A {@link DoubleCoder} encodes {@link Double} values in 8 bytes using Java serialization.
  */
 public class DoubleCoder extends AtomicCoder<Double> {
 
@@ -64,9 +64,12 @@ public Double decode(InputStream inStream, Context context)
   }
 
   /**
-   * Floating-point operations are not guaranteed to be deterministic, even
-   * if the storage format might be, so floating point representations are not
-   * recommended for use in operations that require deterministic inputs.
+   * {@inheritDoc}
+   *
+   * @throws NonDeterministicException always.
+   *         Floating-point operations are not guaranteed to be deterministic, even
+   *         if the storage format might be, so floating point representations are not
+   *         recommended for use in operations that require deterministic inputs.
    */
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
@@ -74,19 +77,31 @@ public void verifyDeterministic() throws NonDeterministicException {
         "Floating point encodings are not guaranteed to be deterministic.");
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return {@code true}. This coder is injective.
+   */
   @Override
   public boolean consistentWithEquals() {
     return true;
   }
 
   /**
-   * Returns true since registerByteSizeObserver() runs in constant time.
+   * {@inheritDoc}
+   *
+   * @return {@code true}. {@link DoubleCoder#getEncodedElementByteSize} returns a constant.
    */
   @Override
   public boolean isRegisterByteSizeObserverCheap(Double value, Context context) {
     return true;
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return {@code 8}, the byte size of a {@link Double} encoded using Java serialization.
+   */
   @Override
   protected long getEncodedElementByteSize(Double value, Context context)
       throws Exception {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DurationCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DurationCoder.java
index 3bca54cc2eac0..25527f05df217 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DurationCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DurationCoder.java
@@ -28,7 +28,8 @@
 import java.io.OutputStream;
 
 /**
- * A {@link Coder} for a joda {@link Duration}.
+ * A {@link Coder} that encodes a joda {@link Duration} as a {@link Long} using the format of
+ * {@link VarLongCoder}.
  */
 public class DurationCoder extends AtomicCoder<ReadableDuration> {
 
@@ -41,7 +42,7 @@ public static DurationCoder of() {
 
   private static final DurationCoder INSTANCE = new DurationCoder();
 
-  private final Coder<Long> longCoder = VarLongCoder.of();
+  private final VarLongCoder longCoder = VarLongCoder.of();
 
   private DurationCoder() {}
 
@@ -68,11 +69,21 @@ public ReadableDuration decode(InputStream inStream, Context context)
       return fromLong(longCoder.decode(inStream, context));
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return {@code true}. This coder is injective.
+   */
   @Override
   public boolean consistentWithEquals() {
     return true;
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return {@code true}, because it is cheap to ascertain the byte size of a long.
+   */
   @Override
   public boolean isRegisterByteSizeObserverCheap(ReadableDuration value, Context context) {
     return longCoder.isRegisterByteSizeObserverCheap(toLong(value), context);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
index 19ffbcccefe3a..3ae857f065ee5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
@@ -25,7 +25,7 @@
 import java.io.OutputStream;
 
 /**
- * An {@code EntityCoder} is a {@link Coder} for {@link Entity} objects.
+ * A {@link Coder} for {@link Entity} objects based on their encoded Protocol Buffer form.
  */
 public class EntityCoder extends AtomicCoder<Entity> {
 
@@ -72,8 +72,11 @@ protected long getEncodedElementByteSize(Entity value, Context context)
   }
 
   /**
-   * A datastore kind can hold arbitrary Object instances, which
-   * makes the encoding non-deterministic.
+   * {@inheritDoc}
+   *
+   * @throws NonDeterministicException always.
+   *         A datastore kind can hold arbitrary {@link Object} instances, which
+   *         makes the encoding non-deterministic.
    */
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
index a9567dff80fe3..99b58ce4197bb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
@@ -28,7 +28,8 @@
 import java.io.OutputStream;
 
 /**
- * A {@code InstantCoder} is a {@link Coder} for a joda {@link Instant}.
+ * A {@link Coder} for joda {@link Instant} that encodes it as a big endian {@link Long}
+ * shifted such that lexicographic ordering of the bytes corresponds to chronological order.
  */
 public class InstantCoder extends AtomicCoder<Instant> {
 
@@ -41,7 +42,7 @@ public static InstantCoder of() {
 
   private static final InstantCoder INSTANCE = new InstantCoder();
 
-  private final Coder<Long> longCoder = BigEndianLongCoder.of();
+  private final BigEndianLongCoder longCoder = BigEndianLongCoder.of();
 
   private InstantCoder() {}
 
@@ -82,11 +83,21 @@ public Instant decode(InputStream inStream, Context context)
     return ORDER_PRESERVING_CONVERTER.reverse().convert(longCoder.decode(inStream, context));
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return {@code true}. This coder is injective.
+   */
   @Override
   public boolean consistentWithEquals() {
     return true;
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return {@code true}. The byte size for a big endian long is a constant.
+   */
   @Override
   public boolean isRegisterByteSizeObserverCheap(Instant value, Context context) {
     return longCoder.isRegisterByteSizeObserverCheap(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
index 67e5fdb3c008f..70dcd84e52227 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
@@ -28,11 +28,10 @@
 import java.util.List;
 
 /**
- * An {@code IterableCoder} encodes any {@code Iterable}
- * as the sequence of its elements, encoded according to the
- * component coder.
+ * An {@link IterableCoder} encodes any {@link Iterable} in the format
+ * of {@link IterableLikeCoder}.
  *
- * @param <T> the type of the elements of the Iterables being transcoded
+ * @param <T> the type of the elements of the iterables being transcoded
  */
 public class IterableCoder<T> extends IterableLikeCoder<T, Iterable<T>> {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
index dd6cce9098e44..3adcb6d9a9008 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
@@ -36,6 +36,21 @@
  * An abstract base class with functionality for assembling a
  * {@link Coder} for a class that implements {@code Iterable}.
  *
+ * <p>To complete a subclass, implement the {@link #decodeToIterable} method. This superclass
+ * will decode the elements in the input stream into a {@link List} and then pass them to that
+ * method to be converted into the appropriate iterable type. Note that this means the input
+ * iterables must fit into memory.
+ *
+ * <p>The format of this coder is as follows:
+ *
+ * <ul>
+ *   <li>If the input {@link Iterable} has a known and finite size, then the size is written to the
+ *       output stream in big endian format, followed by all of the encoded elements.</li>
+ *   <li>If the input {@link Iterable} is not known to have a finite size, then each element
+ *       of the input is preceded by {@code true} encoded as a byte (indicating "more data")
+ *       followed by the encoded element, and terminated by {@code false} encoded as a byte.</li>
+ * </ul>
+ *
  * @param <T> the type of the elements of the {@code Iterable}s being transcoded
  * @param <IterableT> the type of the Iterables being transcoded
  */
@@ -46,10 +61,8 @@ public Coder<T> getElemCoder() {
   }
 
   /**
-   * Builds an instance of the coder's associated {@code Iterable} from a list
-   * of decoded elements.  If {@code IterableT} is a supertype of {@code List<T>}, the
-   * derived class implementation is permitted to return {@code decodedElements}
-   * directly.
+   * Builds an instance of {@code IterableT}, this coder's associated {@link Iterable}-like
+   * subtype, from a list of decoded elements.
    */
   protected abstract IterableT decodeToIterable(List<T> decodedElements);
 
@@ -60,7 +73,7 @@ public Coder<T> getElemCoder() {
   private final String iterableName;
 
   /**
-   * Returns the first element in this iterable-like if it is non-empty,
+   * Returns the first element in the iterable-like {@code exampleValue} if it is non-empty,
    * otherwise returns {@code null}.
    */
   protected static <T, IterableT extends Iterable<T>>
@@ -141,7 +154,11 @@ public List<? extends Coder<?>> getCoderArguments() {
   }
 
   /**
-   * Encoding is not deterministic for the general Iterable case, as it depends
+   * {@inheritDoc}
+   *
+   * @throws
+   * {@link NonDeterministicException} always.
+   * Encoding is not deterministic for the general {@link Iterable} case, as it depends
    * upon the type of iterable. This may allow two objects to compare as equal
    * while the encoding differs.
    */
@@ -152,8 +169,10 @@ public void verifyDeterministic() throws NonDeterministicException {
   }
 
   /**
-   * Returns whether iterable can use lazy counting, since that
-   * requires minimal extra computation.
+   * {@inheritDoc}
+   *
+   * @return {@code true} if the iterable is of a known class that supports lazy counting
+   * of byte size, since that requires minimal extra computation.
    */
   @Override
   public boolean isRegisterByteSizeObserverCheap(
@@ -161,10 +180,6 @@ public boolean isRegisterByteSizeObserverCheap(
     return iterable instanceof ElementByteSizeObservableIterable;
   }
 
-  /**
-   * Notifies ElementByteSizeObserver about the byte size of the
-   * encoded value using this coder.
-   */
   @Override
   public void registerByteSizeObserver(
       IterableT iterable, ElementByteSizeObserver observer, Context context)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
index e52296e328be1..bc74404c5053e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
@@ -25,7 +25,7 @@
 import java.util.List;
 
 /**
- * A {@code ListCoder} encodes {@code List}s.
+ * A {@link Coder} for {@link List}, using the format of {@link IterableLikeCoder}.
  *
  * @param <T> the type of the elements of the Lists being transcoded
  */
@@ -60,7 +60,7 @@ public static <T> List<Object> getInstanceComponents(List<T> exampleValue) {
     return getInstanceComponentsHelper(exampleValue);
   }
 
-  ListCoder(Coder<T> elemCoder) {
+  protected ListCoder(Coder<T> elemCoder) {
     super(elemCoder, "List");
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
index a51bade232770..5b8e750ea68cb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
@@ -35,7 +35,8 @@
 import java.util.Map.Entry;
 
 /**
- * A {@code MapCoder} encodes {@code Map}s.
+ * A {@link Coder} for {@link Map Maps} that encodes them according to provided
+ * coders for keys and values.
  *
  * @param <K> the type of the keys of the KVs being transcoded
  * @param <V> the type of the values of the KVs being transcoded
@@ -121,16 +122,23 @@ public Map<K, V> decode(InputStream inStream, Context context)
     return retval;
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return a {@link List} containing the key coder at index 0 at the and value coder at index 1.
+   */
   @Override
   public List<? extends Coder<?>> getCoderArguments() {
     return Arrays.asList(keyCoder, valueCoder);
   }
 
   /**
-   * Not all maps have a deterministic encoding.
+   * {@inheritDoc}
    *
-   * <p>For example, HashMap comparison does not depend on element order, so
-   * two HashMap instances may be equal but produce different encodings.
+   * @throws
+   * {@link NonDeterministicException} always. Not all maps have a deterministic encoding.
+   * For example, {@link HashMap} comparison does not depend on element order, so
+   * two {@link HashMap} instances may be equal but produce different encodings.
    */
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
@@ -138,10 +146,6 @@ public void verifyDeterministic() throws NonDeterministicException {
         "Ordering of entries in a Map may be non-deterministic.");
   }
 
-  /**
-   * Notifies ElementByteSizeObserver about the byte size of the
-   * encoded value using this coder.
-   */
   @Override
   public void registerByteSizeObserver(
       Map<K, V> map, ElementByteSizeObserver observer, Context context)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/NullableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/NullableCoder.java
index 571445b16288e..5598a71b05017 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/NullableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/NullableCoder.java
@@ -33,8 +33,8 @@
 import javax.annotation.Nullable;
 
 /**
- * A {@code NullableCoder} encodes nullable values of type {@code T} using a nested
- * {@code Coder<T>} that does not tolerate {@code null} values. {@code NullableCoder} uses
+ * A {@link NullableCoder} encodes nullable values of type {@code T} using a nested
+ * {@code Coder<T>} that does not tolerate {@code null} values. {@link NullableCoder} uses
  * exactly 1 byte per entry to indicate whether the value is {@code null}, then adds the encoding
  * of the inner coder for non-null values.
  *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
index 3c36ef3b79e75..9305fbeb4ae2a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
@@ -41,12 +41,12 @@
 import javax.annotation.Nullable;
 
 /**
- * An encoder using Google Protocol Buffers 2 binary format.
+ * A {@link Coder} using Google Protocol Buffers 2 binary format.
  *
  * <p>To learn more about Protocol Buffers, visit:
  * <a href="https://developers.google.com/protocol-buffers">https://developers.google.com/protocol-buffers</a>
  *
- * <p>To use, specify the {@code Coder} type on a PCollection:
+ * <p>To use, specify the {@link Coder} type on a PCollection containing Protocol Buffers messages.
  *
  * <pre>
  * {@code
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
index 63d28a53143a7..e5f88c36fddb9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
@@ -31,8 +31,7 @@
 import java.io.Serializable;
 
 /**
- * A {@code SerializableCoder} is a {@link Coder} for a
- * Java class that implements {@link java.io.Serializable}.
+ * A {@link Coder} for Java classes that implement {@link Serializable}.
  *
  * <p>To use, specify the coder type on a PCollection:
  * <pre>
@@ -42,8 +41,8 @@
  * }
  * </pre>
  *
- * <p>SerializableCoder does not guarantee a deterministic encoding, as Java
- * Serialization may produce different binary encodings for two equivalent
+ * <p>{@link SerializableCoder} does not guarantee a deterministic encoding, as Java
+ * serialization may produce different binary encodings for two equivalent
  * objects.
  *
  * @param <T> the type of elements handled by this coder
@@ -51,7 +50,7 @@
 public class SerializableCoder<T extends Serializable> extends AtomicCoder<T> {
 
   /**
-   * Returns a {@code SerializableCoder} instance for the provided element type.
+   * Returns a {@link SerializableCoder} instance for the provided element type.
    * @param <T> the element type
    */
   public static <T extends Serializable> SerializableCoder<T> of(TypeDescriptor<T> type) {
@@ -61,7 +60,7 @@ public static <T extends Serializable> SerializableCoder<T> of(TypeDescriptor<T>
   }
 
   /**
-   * Returns a {@code SerializableCoder} instance for the provided element class.
+   * Returns a {@link SerializableCoder} instance for the provided element class.
    * @param <T> the element type
    */
   public static <T extends Serializable> SerializableCoder<T> of(Class<T> clazz) {
@@ -149,6 +148,12 @@ public CloudObject asCloudObject() {
     return result;
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @throws NonDeterministicException always. Java serialization is not
+   *         deterministic with respect to {@link Object#equals} for all types.
+   */
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
     throw new NonDeterministicException(this,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
index 80c54277576dc..36b3606f84ea1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
@@ -27,17 +27,15 @@
 import java.util.Set;
 
 /**
- * A {@code SetCoder<T>} encodes any {@code Set<T>}
- * as an encoding of an iterable of its elements. The elements
- * may not be in a deterministic order, depending on the
- * {@code Set} implementation.
+ * A {@link SetCoder} encodes any {@link Set} using the format of {@link IterableLikeCoder}. The
+ * elements may not be in a deterministic order, depending on the {@code Set} implementation.
  *
  * @param <T> the type of the elements of the set
  */
 public class SetCoder<T> extends IterableLikeCoder<T, Set<T>> {
 
   /**
-   * Produces a SetCoder with the given elementCoder.
+   * Produces a {@link SetCoder} with the given {@code elementCoder}.
    */
   public static <T> SetCoder<T> of(Coder<T> elementCoder) {
     return new SetCoder<>(elementCoder);
@@ -56,10 +54,10 @@ public static SetCoder<?> of(
   }
 
   /**
-   * Not all sets have a deterministic encoding.
+   * {@inheritDoc}
    *
-   * <p>For example, {@code HashSet} comparison does not depend on element order, so
-   * two {@code HashSet} instances may be equal but produce different encodings.
+   * @throws NonDeterministicException always. Sets are not ordered, but
+   *         they are encoded in the order of an arbitrary iteration.
    */
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
@@ -79,6 +77,12 @@ public static <T> List<Object> getInstanceComponents(
   /////////////////////////////////////////////////////////////////////////////
   // Internal operations below here.
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return A new {@link Set} built from the elements in the {@link List} decoded by
+   * {@link IterableLikeCoder}.
+   */
   @Override
   protected final Set<T> decodeToIterable(List<T> decodedElements) {
     return new HashSet<>(decodedElements);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
index 85722d18b0628..ad5551224a9a3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
@@ -37,47 +37,37 @@
 import java.util.List;
 
 /**
- * A {@code StandardCoder} is a {@link Coder} that defines equality,
- * hashing, and printing via the class name and recursively using
- * {@link #getComponents}.
+ * An abstract base class to implement a {@link Coder} that defines equality, hashing, and printing
+ * via the class name and recursively using {@link #getComponents}.
  *
- * @param <T> the type of the values being transcoded
+ * <p>To extend {@link StandardCoder}, override the following methods as appropriate:
+ *
+ * <ul>
+ *   <li>{@link #getComponents}: the default implementation returns {@link #getCoderArguments}.</li>
+ *   <li>{@link #getEncodedElementByteSize} and
+ *       {@link #isRegisterByteSizeObserverCheap}: the
+ *       default implementation encodes values to bytes and counts the bytes, which is considered
+ *       expensive.</li>
+ *   <li>{@link #getEncodingId} and {@link #getAllowedEncodings}: by default, the encoding id
+ *       is the empty string, so only the canonical name of the subclass will be used for
+ *       compatibility checks, and no other encoding ids are allowed.</li>
+ * </ul>
  */
 public abstract class StandardCoder<T> implements Coder<T> {
   protected StandardCoder() {}
 
-  /**
-   * A unique identifier for the binary format written by {@link #encode}. If a subclass is
-   * modified to write a new format, it is imperative that this method be overridden to return
-   * a distinct value.
-   *
-   * <p>By default, this is an empty string so only the fully qualified name of the class is
-   * used.
-   *
-   * @see #getAllowedEncodings()
-   */
   @Override
   public String getEncodingId() {
     return "";
   }
 
-  /**
-   * A collection of encodings supported by {@link #decode} in addition to the value of
-   * {@link #getEncodingId()}.
-   *
-   * <p>By default, this is empty.
-   *
-   *@see #getEncodingId()
-   */
   @Override
   public Collection<String> getAllowedEncodings() {
     return Collections.emptyList();
   }
 
   /**
-   * Returns the list of {@code Coder}s that are components of this
-   * {@code Coder}.  Returns an empty list if this is an {@link AtomicCoder} (or
-   * other {@code Coder} with no components).
+   * Returns the list of {@link Coder Coders} that are components of this {@link Coder}.
    */
   public List<? extends Coder<?>> getComponents() {
     List<? extends Coder<?>> coderArguments = getCoderArguments();
@@ -88,6 +78,12 @@ public List<? extends Coder<?>> getComponents() {
     }
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return {@code true} if the two {@link StandardCoder} instances have the
+   * same class and equal components.
+   */
   @Override
   public boolean equals(Object o) {
     if (o == null || this.getClass() != o.getClass()) {
@@ -151,9 +147,11 @@ public CloudObject asCloudObject() {
   }
 
   /**
-   * {@code StandardCoder} requires elements to be fully encoded and copied
-   * into a byte stream to determine the byte size of the element, which is
-   * considered expensive.
+   * {@inheritDoc}
+   *
+   * @return {@code false} unless it is overridden. {@link StandardCoder#registerByteSizeObserver}
+   *         invokes {@link #getEncodedElementByteSize} which requires re-encoding an element
+   *         unless it is overridden. This is considered expensive.
    */
   @Override
   public boolean isRegisterByteSizeObserverCheap(T value, Context context) {
@@ -161,9 +159,7 @@ public boolean isRegisterByteSizeObserverCheap(T value, Context context) {
   }
 
   /**
-   * Returns the size in bytes of the encoded value using this
-   * coder. Derived classes override this method if byte size can be
-   * computed with less computation or copying.
+   * Returns the size in bytes of the encoded value using this coder.
    */
   protected long getEncodedElementByteSize(T value, Context context)
       throws Exception {
@@ -178,9 +174,10 @@ protected long getEncodedElementByteSize(T value, Context context)
   }
 
   /**
-   * Notifies {@code observer} about the byte size of the encoded value using this coder.
-   * Calls {@link #getEncodedElementByteSize} and notifies {@code observer} using
-   * {@link ElementByteSizeObserver#update(Object)}.
+   * {@inheritDoc}
+   *
+   * <p>For {@link StandardCoder} subclasses, this notifies {@code observer} about the byte size
+   * of the encoded value using this coder as returned by {@link #getEncodedElementByteSize}.
    */
   @Override
   public void registerByteSizeObserver(
@@ -205,6 +202,11 @@ protected void verifyDeterministic(String message, Coder<?>... coders)
     verifyDeterministic(message, Arrays.asList(coders));
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return {@code false} for {@link StandardCoder} unless overridden.
+   */
   @Override
   public boolean consistentWithEquals() {
     return false;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
index 08246bb74f9a6..9696b58511c32 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
@@ -19,11 +19,11 @@
 import java.lang.reflect.InvocationTargetException;
 
 /**
- * A {@code StringDelegateCoder<T>} wraps a {@code Coder<String>}
- * and encodes/decodes values of type {@code T} via string representations.
+ * A {@link Coder} that wraps a {@code Coder<String>}
+ * and encodes/decodes values via string representations.
  *
  * <p>To decode, the input byte stream is decoded to
- * a {@code String}, and this is passed to the single-arg
+ * a {@link String}, and this is passed to the single-argument
  * constructor for {@code T}.
  *
  * <p>To encode, the input value is converted via {@code toString()},
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
index 5839fdfaa6579..179840c3c43e4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
@@ -35,9 +35,9 @@
 import java.nio.charset.StandardCharsets;
 
 /**
- * A {@code StringUtf8Coder} encodes Java Strings in UTF-8 encoding.
+ * A {@link Coder} that encodes {@link String Strings} in UTF-8 encoding.
  * If in a nested context, prefixes the string with an integer length field,
- * encoded via the {@link VarIntCoder}.
+ * encoded via a {@link VarIntCoder}.
  */
 public class StringUtf8Coder extends AtomicCoder<String> {
 
@@ -50,7 +50,6 @@ public static StringUtf8Coder of() {
 
   private static final StringUtf8Coder INSTANCE = new StringUtf8Coder();
 
-  // Writes a string with VarInt size prefix, supporting large strings.
   private static void writeString(String value, DataOutputStream dos)
       throws IOException {
     byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
@@ -58,7 +57,6 @@ private static void writeString(String value, DataOutputStream dos)
     dos.write(bytes);
   }
 
-  // Reads a string with VarInt size prefix, supporting large strings.
   private static String readString(DataInputStream dis) throws IOException {
     int len = VarInt.decodeInt(dis);
     if (len < 0) {
@@ -106,14 +104,22 @@ public String decode(InputStream inStream, Context context)
     }
   }
 
-  @Override
-  public void verifyDeterministic() { }
-
+  /**
+   * {@inheritDoc}
+   *
+   * @return {@code true}. This coder is injective.
+   */
   @Override
   public boolean consistentWithEquals() {
     return true;
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return the byte size of the UTF-8 encoding of the a string or, in a nested context,
+   * the byte size of the encoding plus the encoded length prefix.
+   */
   @Override
   protected long getEncodedElementByteSize(String value, Context context)
       throws Exception {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
index 8e971752f1860..bed88b080fe00 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
@@ -27,7 +27,7 @@
 import java.io.OutputStream;
 
 /**
- * A {@code TableRowJsonCoder} encodes BigQuery {@link TableRow} objects.
+ * A {@link Coder} that encodes BigQuery {@link TableRow} objects in their native JSON format.
  */
 public class TableRowJsonCoder extends AtomicCoder<TableRow> {
 
@@ -69,8 +69,10 @@ protected long getEncodedElementByteSize(TableRow value, Context context)
   private TableRowJsonCoder() { }
 
   /**
-   * TableCell can hold arbitrary Object instances, which makes the encoding
-   * non-deterministic.
+   * {@inheritDoc}
+   *
+   * @throws NonDeterministicException always. A {@link TableRow} can hold arbitrary
+   *         {@link Object} instances, which makes the encoding non-deterministic.
    */
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
index e06b5277322c6..9250c683f85b7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
@@ -23,8 +23,8 @@
 import java.io.OutputStream;
 
 /**
- * A {@code TextualIntegerCoder} encodes {@code Integer}s
- * as their textual, decimal, representation.
+ * A {@link Coder} that encodes {@code Integer Integers} as the ASCII bytes of
+ * their textual, decimal, representation.
  */
 public class TextualIntegerCoder extends AtomicCoder<Integer> {
 
@@ -58,9 +58,6 @@ public Integer decode(InputStream inStream, Context context)
     }
   }
 
-  @Override
-  public void verifyDeterministic() { }
-
   @Override
   protected long getEncodedElementByteSize(Integer value, Context context) throws Exception {
     if (value == null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
index 0211810bc9343..41be8b400466a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
@@ -27,7 +27,7 @@
 import java.io.UTFDataFormatException;
 
 /**
- * A {@code VarIntCoder} encodes {@code Integer}s using between 1 and 5 bytes. Negative
+ * A {@link Coder} that encodes {@link Integer Integers} using between 1 and 5 bytes. Negative
  * numbers always take 5 bytes, so {@link BigEndianIntegerCoder} may be preferable for
  * integers that are known to often be large or negative.
  */
@@ -66,13 +66,20 @@ public Integer decode(InputStream inStream, Context context)
     }
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return {@code true}. {@link VarIntCoder} is injective.
+   */
   @Override
   public boolean consistentWithEquals() {
     return true;
   }
 
   /**
-   * Returns true since registerByteSizeObserver() runs in constant time.
+   * {@inheritDoc}
+   *
+   * @return {@code true}. {@link VarIntCoder#getEncodedElementByteSize()} runs in constant time.
    */
   @Override
   public boolean isRegisterByteSizeObserverCheap(Integer value, Context context) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
index e0c087c6443cf..7530d64c2a4b2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
@@ -27,7 +27,7 @@
 import java.io.UTFDataFormatException;
 
 /**
- * A {@code VarLongCoder} encodes longs using between 1 and 10 bytes. Negative
+ * A {@link Coder} that encodes {@link Long Longs} using between 1 and 10 bytes. Negative
  * numbers always take 10 bytes, so {@link BigEndianLongCoder} may be preferable for
  * longs that are known to often be large or negative.
  */
@@ -65,16 +65,20 @@ public Long decode(InputStream inStream, Context context)
     }
   }
 
-  @Override
-  public void verifyDeterministic() { }
-
+  /**
+   * {@inheritDoc}
+   *
+   * @return {@code true}. {@link VarLongCoder} is injective.
+   */
   @Override
   public boolean consistentWithEquals() {
     return true;
   }
 
   /**
-   * Returns true since registerByteSizeObserver() runs in constant time.
+   * {@inheritDoc}
+   *
+   * @return {@code true}. {@link VarLongCoder#getEncodedElementByteSize()} runs in constant time.
    */
   @Override
   public boolean isRegisterByteSizeObserverCheap(Long value, Context context) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
index 2fd7b1b1c62ed..794452540352c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
@@ -22,7 +22,7 @@
 import java.io.OutputStream;
 
 /**
- * A {@code VoidCoder} encodes {@code Void}s. Uses zero bytes per {@code Void}.
+ * A {@link Coder} for {@link Void}. Uses zero bytes per {@link Void}.
  */
 public class VoidCoder extends AtomicCoder<Void> {
 
@@ -48,13 +48,20 @@ public Void decode(InputStream inStream, Context context) {
     return null;
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return  {@code true}. {@link VoidCoder} is (vacuously) injective.
+   */
   @Override
   public boolean consistentWithEquals() {
     return true;
   }
 
   /**
-   * Returns true since registerByteSizeObserver() runs in constant time.
+   * {@inheritDoc}
+   *
+   * @return {@code true}. {@link VoidCoder#getEncodedElementByteSize()} runs in constant time.
    */
   @Override
   public boolean isRegisterByteSizeObserverCheap(Void value, Context context) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java
index 6d3f47b8fd4e7..fdf931f4a8eb6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java
@@ -15,7 +15,7 @@
  */
 
 /**
- * Defines {@link com.google.cloud.dataflow.sdk.coders.Coder}s
+ * Defines {@link com.google.cloud.dataflow.sdk.coders.Coder Coders}
  * to specify how data is encoded to and decoded from byte strings.
  *
  * <p>During execution of a Pipeline, elements in a

From 70efc8d8ad9819b441941e6df5ffaae47680b2c8 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Tue, 17 Nov 2015 15:34:14 -0800
Subject: [PATCH 1175/1541] Update Dataflow to use 11/17 version of prebuilt
 proto library

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108086699
---
 sdk/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 4fcb4bdc13a47..34410ce947ffe 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -430,7 +430,7 @@
     <dependency>
       <groupId>com.google.cloud.dataflow</groupId>
       <artifactId>google-cloud-dataflow-java-proto-library-all</artifactId>
-      <version>0.4.151109</version>
+      <version>0.4.151117</version>
     </dependency>
 
     <dependency>

From 9310053bf34af0ea4d4afc697b29668f79a39197 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 18 Nov 2015 15:48:23 -0800
Subject: [PATCH 1176/1541] Fix typo in CoGbkResult message

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108190212
---
 .../google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
index f840108248752..aac57bc5fc1b0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
@@ -115,7 +115,7 @@ public CoGbkResult(
       // keep in memory, so we copy the re-iterable of remaining items
       // and append filtered views to each of the sorted lists computed earlier.
       LOG.info("CoGbkResult has more than " + inMemoryElementCount + " elements,"
-               + "reiteration (which may be slow) is required.");
+               + " reiteration (which may be slow) is required.");
       final Reiterator<RawUnionValue> tail = (Reiterator<RawUnionValue>) taggedIter;
       // This is a trinary-state array recording whether a given tag is present in the tail. The
       // initial value is null (unknown) for all tags, and the first iteration through the entire

From 912b5c6d65812916d9ad407c08b9df948ed1cdfc Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Wed, 18 Nov 2015 16:13:21 -0800
Subject: [PATCH 1177/1541] Upgrade to bigdataoss library 1.4.3

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108192799
---
 sdk/pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 34410ce947ffe..e04a64fd1861d 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -552,13 +552,13 @@
     <dependency>
       <groupId>com.google.cloud.bigdataoss</groupId>
       <artifactId>gcsio</artifactId>
-      <version>1.4.2</version>
+      <version>1.4.3</version>
     </dependency>
 
     <dependency>
       <groupId>com.google.cloud.bigdataoss</groupId>
       <artifactId>util</artifactId>
-      <version>1.4.2</version>
+      <version>1.4.3</version>
     </dependency>
 
     <dependency>

From bc37cd29119ae24398b3a3f9923229bbd29e64ab Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 18 Nov 2015 16:44:38 -0800
Subject: [PATCH 1178/1541] BigQueryIO: improve Javadoc

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108195823
---
 .../cloud/dataflow/sdk/io/BigQueryIO.java     | 279 +++++++++++-------
 1 file changed, 173 insertions(+), 106 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index f103e8dda8876..5da265e6d7cc1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -34,6 +34,7 @@
 import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
 import com.google.cloud.dataflow.sdk.options.GcpOptions;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.worker.BigQueryReader;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -81,8 +82,9 @@
 /**
  * {@link PTransform}s for reading and writing
  * <a href="https://developers.google.com/bigquery/">BigQuery</a> tables.
- * <p><h3>Table References</h3>
- * A fully-qualified BigQuery table name consists of three components:
+ *
+ * <h3>Table References</h3>
+ * <p>A fully-qualified BigQuery table name consists of three components:
  * <ul>
  *   <li>{@code projectId}: the Cloud project id (defaults to
  *       {@link GcpOptions#getProject()}).
@@ -96,21 +98,22 @@
  * Tables can be referred to as Strings, with or without the {@code projectId}.
  * A helper function is provided ({@link BigQueryIO#parseTableSpec(String)})
  * that parses the following string forms into a {@link TableReference}:
+ *
  * <ul>
  *   <li>[{@code project_id}]:[{@code dataset_id}].[{@code table_id}]
  *   <li>[{@code dataset_id}].[{@code table_id}]
  * </ul>
- * <p><h3>Reading</h3>
- * To read from a BigQuery table, apply a {@link BigQueryIO.Read} transformation.
- * This produces a {@code PCollection<TableRow>} as output:
+ *
+ * <h3>Reading</h3>
+ * <p>To read from a BigQuery table, apply a {@link BigQueryIO.Read} transformation.
+ * This produces a {@link PCollection} of {@link TableRow TableRows} as output:
  * <pre>{@code
  * PCollection<TableRow> shakespeare = pipeline.apply(
- *     BigQueryIO.Read
- *         .named("Read")
- *         .from("clouddataflow-readonly:samples.weather_stations");
+ *     BigQueryIO.Read.named("Read")
+ *                    .from("clouddataflow-readonly:samples.weather_stations"));
  * }</pre>
  *
- * <p>See {@link TableRow} for more information on the {@code TableRow} object.
+ * <p>See {@link TableRow} for more information on the {@link TableRow} object.
  *
  * <p>Users may provide a query to read from rather than reading all of a BigQuery table. If
  * specified, the result obtained by executing the specified query will be used as the data of the
@@ -118,17 +121,16 @@
  *
  * <pre>{@code
  * PCollection<TableRow> shakespeare = pipeline.apply(
- *     BigQueryIO.Read
- *         .named("Read")
- *         .fromQuery("SELECT year, mean_temp FROM samples.weather_stations");
+ *     BigQueryIO.Read.named("Read")
+ *                    .fromQuery("SELECT year, mean_temp FROM samples.weather_stations"));
  * }</pre>
  *
  * <p>When creating a BigQuery input transform, users should provide either a query or a table.
  * Pipeline construction will fail with a validation error if neither or both are specified.
  *
- * <p><h3>Writing</h3>
- * To write to a BigQuery table, apply a {@link BigQueryIO.Write} transformation.
- * This consumes a {@code PCollection<TableRow>} as input.
+ * <h3>Writing</h3>
+ * <p>To write to a BigQuery table, apply a {@link BigQueryIO.Write} transformation.
+ * This consumes a {@link PCollection} of {@link TableRow TableRows} as input.
  * <pre>{@code
  * PCollection<TableRow> quotes = ...
  *
@@ -149,8 +151,8 @@
  * empty. Note that the dataset being written to must already exist. Write
  * dispositions are not supported in streaming mode.
  *
- * <p><h3>Sharding BigQuery output tables</h3>
- * A common use case is to dynamically generate BigQuery table names based on
+ * <h3>Sharding BigQuery output tables</h3>
+ * <p>A common use case is to dynamically generate BigQuery table names based on
  * the current window. To support this,
  * {@link BigQueryIO.Write#to(SerializableFunction)}
  * accepts a function mapping the current window to a tablespec. For example,
@@ -174,11 +176,10 @@
  *
  * <p>Per-window tables are not yet supported in batch mode.
  *
- * <p><h3>Permissions</h3>
- * Permission requirements depend on the
- * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner PipelineRunner} that is
- * used to execute the Dataflow job. Please refer to the documentation of corresponding
- * {@code PipelineRunner}s for more details.
+ * <h3>Permissions</h3>
+ * <p>Permission requirements depend on the {@link PipelineRunner} that is used to execute the
+ * Dataflow job. Please refer to the documentation of corresponding {@link PipelineRunner}s for
+ * more details.
  *
  * <p>Please see <a href="https://cloud.google.com/bigquery/access-control">BigQuery Access Control
  * </a> for security and permission related information specific to BigQuery.
@@ -211,8 +212,8 @@ public class BigQueryIO {
   private static final String TABLE_REGEXP = "[-\\w$@]{1,1024}";
 
   /**
-   * Matches table specifications in the form
-   * "[project_id]:[dataset_id].[table_id]" or "[dataset_id].[table_id]".
+   * Matches table specifications in the form {@code "[project_id]:[dataset_id].[table_id]"} or
+   * {@code "[dataset_id].[table_id]"}.
    */
   private static final String DATASET_TABLE_REGEXP =
       String.format("((?<PROJECT>%s):)?(?<DATASET>%s)\\.(?<TABLE>%s)", PROJECT_ID_REGEXP,
@@ -220,6 +221,7 @@ public class BigQueryIO {
 
   private static final Pattern TABLE_SPEC = Pattern.compile(DATASET_TABLE_REGEXP);
 
+  // TODO: make this private and remove improper access from BigQueryIOTranslator.
   public static final String SET_PROJECT_FROM_OPTIONS_WARNING =
       "No project specified for BigQuery table \"%1$s.%2$s\". Assuming it is in \"%3$s\". If the"
       + " table is in a different project please specify it as a part of the BigQuery table"
@@ -237,7 +239,7 @@ public class BigQueryIO {
 
   /**
    * Parse a table specification in the form
-   * "[project_id]:[dataset_id].[table_id]" or "[dataset_id].[table_id]".
+   * {@code "[project_id]:[dataset_id].[table_id]"} or {@code "[dataset_id].[table_id]"}.
    *
    * <p>If the project id is omitted, the default project id is used.
    */
@@ -256,7 +258,7 @@ public static TableReference parseTableSpec(String tableSpec) {
   }
 
   /**
-   * Returns a canonical string representation of the TableReference.
+   * Returns a canonical string representation of the {@link TableReference}.
    */
   public static String toTableSpec(TableReference ref) {
     StringBuilder sb = new StringBuilder();
@@ -273,11 +275,10 @@ public static String toTableSpec(TableReference ref) {
    * A {@link PTransform} that reads from a BigQuery table and returns a
    * {@link PCollection} of {@link TableRow TableRows} containing each of the rows of the table.
    *
-   * <p>Each TableRow record contains values indexed by column name.  Here is a
+   * <p>Each {@link TableRow} contains values indexed by column name. Here is a
    * sample processing function that processes a "line" column from rows:
-   * <pre><code>
-   * static class ExtractWordsFn extends DoFn{@literal <TableRow, String>} {
-   *   {@literal @}Override
+   * <pre>{@code
+   * static class ExtractWordsFn extends DoFn<TableRow, String> {
    *   public void processElement(ProcessContext c) {
    *     // Get the "line" field of the TableRow object, split it into words, and emit them.
    *     TableRow row = c.element();
@@ -288,18 +289,20 @@ public static String toTableSpec(TableReference ref) {
    *       }
    *     }
    *   }
-   * }
-   * </code></pre>
+   * }}</pre>
    */
   public static class Read {
+    /**
+     * Returns a {@link Read.Bound} with the given name. The BigQuery table or query to be read
+     * from has not yet been configured.
+     */
     public static Bound named(String name) {
       return new Bound().named(name);
     }
 
     /**
-     * Reads a BigQuery table specified as
-     * "[project_id]:[dataset_id].[table_id]" or "[dataset_id].[table_id]" for
-     * tables within the current project.
+     * Reads a BigQuery table specified as {@code "[project_id]:[dataset_id].[table_id]"} or
+     * {@code "[dataset_id].[table_id]"} for tables within the current project.
      */
     public static Bound from(String tableSpec) {
       return new Bound().from(tableSpec);
@@ -313,7 +316,7 @@ public static Bound fromQuery(String query) {
     }
 
     /**
-     * Reads a BigQuery table specified as a TableReference object.
+     * Reads a BigQuery table specified as a {@link TableReference} object.
      */
     public static Bound from(TableReference table) {
       return new Bound().from(table);
@@ -339,13 +342,11 @@ public static class Bound extends PTransform<PInput, PCollection<TableRow>> {
           "Validation of query \"%1$s\" failed. If the query depends on an earlier stage of the"
           + " pipeline, This validation can be disabled using #withoutValidation.";
 
-      Bound() {
-        query = null;
-        table = null;
-        this.validate = true;
+      private Bound() {
+        this(null, null, null, true);
       }
 
-      Bound(String name, String query, TableReference reference, boolean validate) {
+      private Bound(String name, String query, TableReference reference, boolean validate) {
         super(name);
         this.table = reference;
         this.query = query;
@@ -353,32 +354,39 @@ public static class Bound extends PTransform<PInput, PCollection<TableRow>> {
       }
 
       /**
-       * Sets the name associated with this transformation.
+       * Returns a copy of this transform using the name associated with this transformation.
+       *
+       * <p>Does not modify this object.
        */
       public Bound named(String name) {
         return new Bound(name, query, table, validate);
       }
 
       /**
-       * Sets the table specification.
+       * Returns a copy of this transform that reads from the specified table. Refer to
+       * {@link #parseTableSpec(String)} for the specification format.
        *
-       * <p>Refer to {@link #parseTableSpec(String)} for the specification format.
+       * <p>Does not modify this object.
        */
       public Bound from(String tableSpec) {
         return from(parseTableSpec(tableSpec));
       }
 
       /**
-       * Sets the BigQuery query to be used.
+       * Returns a copy of this transform that reads from the specified table.
+       *
+       * <p>Does not modify this object.
        */
-      public Bound fromQuery(String query) {
+      public Bound from(TableReference table) {
         return new Bound(name, query, table, validate);
       }
 
       /**
-       * Sets the table specification.
+       * Returns a copy of this transform that reads the results of the specified query.
+       *
+       * <p>Does not modify this object.
        */
-      public Bound from(TableReference table) {
+      public Bound fromQuery(String query) {
         return new Bound(name, query, table, validate);
       }
 
@@ -471,12 +479,15 @@ public void evaluate(
       }
 
       /**
-       * Returns the table to write.
+       * Returns the table to write, or {@code null} if reading from a query instead.
        */
       public TableReference getTable() {
         return table;
       }
 
+      /**
+       * Returns the query to be read, or {@code null} if reading from a table instead.
+       */
       public String getQuery() {
         return query;
       }
@@ -488,6 +499,9 @@ public boolean getValidate() {
         return validate;
       }
     }
+
+    /** Disallow construction of utility class. */
+    private Read() {}
   }
 
   /////////////////////////////////////////////////////////////////////////////
@@ -496,36 +510,36 @@ public boolean getValidate() {
    * A {@link PTransform} that writes a {@link PCollection} containing {@link TableRow TableRows}
    * to a BigQuery table.
    *
-   * <p>By default, tables will be created if they do not exist, which
-   * corresponds to a {@code CreateDisposition.CREATE_IF_NEEDED} disposition
-   * that matches the default of BigQuery's Jobs API.  A schema must be
-   * provided (via {@link BigQueryIO.Write#withSchema}), or else the transform may fail
-   * at runtime with an {@link java.lang.IllegalArgumentException}.
+   * <p>In BigQuery, each table has an encosing dataset. The dataset being written must already
+   * exist.
    *
-   * <p>The dataset being written must already exist.
+   * <p>By default, tables will be created if they do not exist, which corresponds to a
+   * {@link CreateDisposition#CREATE_IF_NEEDED} disposition that matches the default of BigQuery's
+   * Jobs API. A schema must be provided (via {@link BigQueryIO.Write#withSchema(TableSchema)}),
+   * or else the transform may fail at runtime with an {@link IllegalArgumentException}.
    *
    * <p>By default, writes require an empty table, which corresponds to
-   * a {@code WriteDisposition.WRITE_EMPTY} disposition that matches the
+   * a {@link WriteDisposition#WRITE_EMPTY} disposition that matches the
    * default of BigQuery's Jobs API.
    *
    * <p>Here is a sample transform that produces TableRow values containing
    * "word" and "count" columns:
-   * <pre><code>
-   * static class FormatCountsFn extends DoFnP{@literal <KV<String, Long>, TableRow>} {
-   *   {@literal @}Override
+   * <pre>{@code
+   * static class FormatCountsFn extends DoFn<KV<String, Long>, TableRow> {
    *   public void processElement(ProcessContext c) {
    *     TableRow row = new TableRow()
    *         .set("word", c.element().getKey())
    *         .set("count", c.element().getValue().intValue());
    *     c.output(row);
    *   }
-   * }
-   * </code></pre>
+   * }}</pre>
    */
   public static class Write {
     /**
-     * An enumeration type for the BigQuery create disposition strings publicly
-     * documented as {@code CREATE_NEVER}, and {@code CREATE_IF_NEEDED}.
+     * An enumeration type for the BigQuery create disposition strings.
+     *
+     * @see <a href="https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.createDisposition">
+     * <code>configuration.query.createDisposition</code> in the BigQuery Jobs API</a>
      */
     public enum CreateDisposition {
       /**
@@ -552,9 +566,10 @@ public enum CreateDisposition {
     }
 
     /**
-     * An enumeration type for the BigQuery write disposition strings publicly
-     * documented as {@code WRITE_TRUNCATE}, {@code WRITE_APPEND}, and
-     * {@code WRITE_EMPTY}.
+     * An enumeration type for the BigQuery write disposition strings.
+     *
+     * @see <a href="https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.writeDisposition">
+     * <code>configuration.query.writeDisposition</code> in the BigQuery Jobs API</a>
      */
     public enum WriteDisposition {
       /**
@@ -562,7 +577,7 @@ public enum WriteDisposition {
        *
        * <p>The replacement may occur in multiple steps - for instance by first
        * removing the existing table, then creating a replacement, then filling
-       * it in.  This is not an atomic operation, and external programs may
+       * it in. This is not an atomic operation, and external programs may
        * see the table in any of these intermediate steps.
        */
       WRITE_TRUNCATE,
@@ -579,16 +594,17 @@ public enum WriteDisposition {
        * <p>If the output table is not empty, the write fails at runtime.
        *
        * <p>This check may occur long before data is written, and does not
-       * guarantee exclusive access to the table.  If two programs are run
+       * guarantee exclusive access to the table. If two programs are run
        * concurrently, each specifying the same output table and
-       * a {@link WriteDisposition} of {@code WRITE_EMPTY}, it is possible
+       * a {@link WriteDisposition} of {@link WriteDisposition#WRITE_EMPTY}, it is possible
        * for both to succeed.
        */
       WRITE_EMPTY
     }
 
     /**
-     * Sets the name associated with this transformation.
+     * Creates a write transformation with the given transform name. The BigQuery table to be
+     * written has not yet been configured.
      */
     public static Bound named(String name) {
       return new Bound().named(name);
@@ -608,20 +624,27 @@ public static Bound to(TableReference table) {
       return new Bound().to(table);
     }
 
-    /** Creates a write transformation from a function that maps windows to table specifications.
+    /**
+     * Creates a write transformation from a function that maps windows to table specifications.
      * Each time a new window is encountered, this function will be called and the resulting table
      * will be created. Records within that window will be written to the associated table.
      *
-     * <p>See {@link #parseTableSpec(String)} for the format that tableSpecFunction should return.
+     * <p>See {@link #parseTableSpec(String)} for the format that {@code tableSpecFunction} should
+     * return.
      *
-     * <p>tableSpecFunction should be determinstic. When given the same window, it should always
-     * return the same table specification.
+     * <p>{@code tableSpecFunction} should be deterministic. When given the same window, it should
+     * always return the same table specification.
      */
     public static Bound to(SerializableFunction<BoundedWindow, String> tableSpecFunction) {
       return new Bound().to(tableSpecFunction);
     }
 
-    /** Creates a write transformation from a function that maps windows to TableReference objects.
+    /**
+     * Creates a write transformation from a function that maps windows to {@link TableReference}
+     * objects.
+     *
+     * <p>{@code tableRefFunction} should be deterministic. When given the same window, it should
+     * always return the same table reference.
      */
     public static Bound toTableReference(
         SerializableFunction<BoundedWindow, TableReference> tableRefFunction) {
@@ -629,28 +652,28 @@ public static Bound toTableReference(
     }
 
     /**
-     * Specifies a table schema to use in table creation.
+     * Creates a write transformation with the specified schema to use in table creation.
      *
-     * <p>The schema is required only if writing to a table that does not already
-     * exist, and {@link BigQueryIO.Write.CreateDisposition} is set to
-     * {@code CREATE_IF_NEEDED}.
+     * <p>The schema is <i>required</i> only if writing to a table that does not already
+     * exist, and {@link CreateDisposition} is set to
+     * {@link CreateDisposition#CREATE_IF_NEEDED}.
      */
     public static Bound withSchema(TableSchema schema) {
       return new Bound().withSchema(schema);
     }
 
-    /** Specifies options for creating the table. */
+    /** Creates a write transformation with the specified options for creating the table. */
     public static Bound withCreateDisposition(CreateDisposition disposition) {
       return new Bound().withCreateDisposition(disposition);
     }
 
-    /** Specifies options for writing to the table. */
+    /** Creates a write transformation with the specified options for writing to the table. */
     public static Bound withWriteDisposition(WriteDisposition disposition) {
       return new Bound().withWriteDisposition(disposition);
     }
 
     /**
-     * Disables BigQuery table validation, which is enabled by default.
+     * Creates a write transformation with BigQuery table validation disabled.
      */
     public static Bound withoutValidation() {
       return new Bound().withoutValidation();
@@ -693,16 +716,18 @@ public TableReference apply(BoundedWindow value) {
         }
       }
 
+      /**
+       * @deprecated Should be private. Instead, use one of the factory methods in
+       * {@link BigQueryIO.Write}, such as {@link BigQueryIO.Write#to(String)}, to create an
+       * instance of this class.
+       */
+      @Deprecated
       public Bound() {
-        this.table = null;
-        this.tableRefFunction = null;
-        this.schema = null;
-        this.createDisposition = CreateDisposition.CREATE_IF_NEEDED;
-        this.writeDisposition = WriteDisposition.WRITE_EMPTY;
-        this.validate = true;
+        this(null, null, null, null, CreateDisposition.CREATE_IF_NEEDED,
+            WriteDisposition.WRITE_EMPTY, true);
       }
 
-      Bound(String name, TableReference ref,
+      private Bound(String name, TableReference ref,
           SerializableFunction<BoundedWindow, TableReference> tableRefFunction, TableSchema schema,
           CreateDisposition createDisposition, WriteDisposition writeDisposition,
           boolean validate) {
@@ -716,7 +741,9 @@ public Bound() {
       }
 
       /**
-       * Sets the name associated with this transformation.
+       * Returns a copy of this write transformation, but with the specified transform name.
+       *
+       * <p>Does not modify this object.
        */
       public Bound named(String name) {
         return new Bound(name, table, tableRefFunction, schema, createDisposition,
@@ -724,27 +751,48 @@ public Bound named(String name) {
       }
 
       /**
-       * Specifies the table specification.
+       * Returns a copy of this write transformation, but writing to the specified table. Refer to
+       * {@link #parseTableSpec(String)} for the specification format.
        *
-       * <p>Refer to {@link #parseTableSpec(String)} for the specification format.
+       * <p>Does not modify this object.
        */
       public Bound to(String tableSpec) {
         return to(parseTableSpec(tableSpec));
       }
 
       /**
-       * Specifies the table to be written to.
+       * Returns a copy of this write transformation, but writing to the specified table.
+       *
+       * <p>Does not modify this object.
        */
       public Bound to(TableReference table) {
         return new Bound(name, table, tableRefFunction, schema, createDisposition,
             writeDisposition, validate);
       }
 
+      /**
+       * Returns a copy of this write transformation, but using the specified function to determine
+       * which table to write to for each window.
+       *
+       * <p>Does not modify this object.
+       *
+       * <p>{@code tableSpecFunction} should be deterministic. When given the same window, it
+       * should always return the same table specification.
+       */
       public Bound to(
           SerializableFunction<BoundedWindow, String> tableSpecFunction) {
         return toTableReference(new TranslateTableSpecFunction(tableSpecFunction));
       }
 
+      /**
+       * Returns a copy of this write transformation, but using the specified function to determine
+       * which table to write to for each window.
+       *
+       * <p>Does not modify this object.
+       *
+       * <p>{@code tableRefFunction} should be deterministic. When given the same window, it should
+       * always return the same table reference.
+       */
       public Bound toTableReference(
           SerializableFunction<BoundedWindow, TableReference> tableRefFunction) {
         return new Bound(name, table, tableRefFunction, schema, createDisposition,
@@ -752,27 +800,40 @@ public Bound toTableReference(
       }
 
       /**
-       * Specifies the table schema, used if the table is created.
+       * Returns a copy of this write transformation, but using the specified schema for rows
+       * to be written.
+       *
+       * <p>Does not modify this object.
        */
       public Bound withSchema(TableSchema schema) {
         return new Bound(name, table, tableRefFunction, schema, createDisposition,
             writeDisposition, validate);
       }
 
-      /** Specifies options for creating the table. */
+      /**
+       * Returns a copy of this write transformation, but using the specified create disposition.
+       *
+       * <p>Does not modify this object.
+       */
       public Bound withCreateDisposition(CreateDisposition createDisposition) {
         return new Bound(name, table, tableRefFunction, schema, createDisposition,
             writeDisposition, validate);
       }
 
-      /** Specifies options for writing the table. */
+      /**
+       * Returns a copy of this write transformation, but using the specified write disposition.
+       *
+       * <p>Does not modify this object.
+       */
       public Bound withWriteDisposition(WriteDisposition writeDisposition) {
         return new Bound(name, table, tableRefFunction, schema, createDisposition,
             writeDisposition, validate);
       }
 
       /**
-       * Disable table validation.
+       * Returns a copy of this write transformation, but without BigQuery table validation.
+       *
+       * <p>Does not modify this object.
        */
       public Bound withoutValidation() {
         return new Bound(name, table, tableRefFunction, schema, createDisposition,
@@ -894,19 +955,22 @@ public TableSchema getSchema() {
         return schema;
       }
 
-      /** Returns the table reference. */
+      /** Returns the table reference, or {@code null} if a . */
       public TableReference getTable() {
         return table;
       }
 
-      /** Returns true if table validation is enabled. */
+      /** Returns {@code true} if table validation is enabled. */
       public boolean getValidate() {
         return validate;
       }
     }
+
+    /** Disallow construction of utility class. */
+    private Write() {}
   }
 
-  public static void verifyDatasetPresence(BigQueryOptions options, TableReference table) {
+  private static void verifyDatasetPresence(BigQueryOptions options, TableReference table) {
     try {
       Bigquery client = Transport.newBigQueryClient(options).build();
       BigQueryTableRowIterator.executeWithBackOff(
@@ -927,7 +991,7 @@ public static void verifyDatasetPresence(BigQueryOptions options, TableReference
     }
   }
 
-  public static void verifyTablePresence(BigQueryOptions options, TableReference table) {
+  private static void verifyTablePresence(BigQueryOptions options, TableReference table) {
     try {
       Bigquery client = Transport.newBigQueryClient(options).build();
       BigQueryTableRowIterator.executeWithBackOff(
@@ -954,7 +1018,7 @@ public static void verifyTablePresence(BigQueryOptions options, TableReference t
    */
   private static class StreamingWriteFn
       extends DoFn<KV<ShardedKey<String>, TableRowInfo>, Void> {
-    /** TableSchema in JSON.  Use String to make the class Serializable. */
+    /** TableSchema in JSON. Use String to make the class Serializable. */
     private final String jsonTableSchema;
 
     /** JsonTableRows to accumulate BigQuery rows in order to batch writes. */
@@ -1067,9 +1131,9 @@ public int getShardNumber() {
   }
 
   /**
-   * A {@link Coder} for {@code ShardedKey}, using a wrapped key {@code Coder}.
+   * A {@link Coder} for {@link ShardedKey}, using a wrapped key {@link Coder}.
    */
-  public static class ShardedKeyCoder<KeyT>
+  private static class ShardedKeyCoder<KeyT>
       extends StandardCoder<ShardedKey<KeyT>> {
     public static <KeyT> ShardedKeyCoder<KeyT> of(Coder<KeyT> keyCoder) {
       return new ShardedKeyCoder<>(keyCoder);
@@ -1177,7 +1241,7 @@ private static class TagWithUniqueIdsAndTable
     /** TableSpec to write to. */
     private final String tableSpec;
 
-    /** User function mapping windows to TableReference in JSON. */
+    /** User function mapping windows to {@link TableReference} in JSON. */
     private final SerializableFunction<BoundedWindow, TableReference> tableRefFunction;
 
     private transient String randomUUID;
@@ -1281,7 +1345,7 @@ public PDone apply(PCollection<TableRow> input) {
           .apply(ParDo.of(new StreamingWriteFn(tableSchema)));
 
       // Note that the implementation to return PDone here breaks the
-      // implicit assumption about the job execution order.  If a user
+      // implicit assumption about the job execution order. If a user
       // implements a PTransform that takes PDone returned here as its
       // input, the transform may not necessarily be executed after
       // the BigQueryIO.Write.
@@ -1292,6 +1356,9 @@ public PDone apply(PCollection<TableRow> input) {
 
   /////////////////////////////////////////////////////////////////////////////
 
+  /** Disallow construction of utility class. */
+  private BigQueryIO() {}
+
   /**
    * Direct mode read evaluator.
    *

From 0416263a0d8aac778fffef0856b620213d3dba1b Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 18 Nov 2015 16:47:18 -0800
Subject: [PATCH 1179/1541] PubSubIO: improve Javadoc

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108196067
---
 .../cloud/dataflow/sdk/io/PubsubIO.java       | 560 +++++++++++-------
 1 file changed, 335 insertions(+), 225 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 951e6e4141262..99c802964baf7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -28,10 +28,14 @@
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
@@ -43,7 +47,6 @@
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -54,24 +57,24 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
-
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import javax.annotation.Nullable;
 
 /**
- * Read and Write {@link PTransform}s for Pub/Sub streams. These transforms create
- * and consume unbounded {@link com.google.cloud.dataflow.sdk.values.PCollection}s.
+ * Read and Write {@link PTransform}s for Cloud Pub/Sub streams. These transforms create
+ * and consume unbounded {@link PCollection PCollections}.
  *
- * <p><h3>Permissions</h3>
- * Permission requirements depend on the
- * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner PipelineRunner} that is
- * used to execute the Dataflow job. Please refer to the documentation of corresponding
- * {@code PipelineRunner}s for more details.
+ * <h3>Permissions</h3>
+ * <p>Permission requirements depend on the {@link PipelineRunner} that is used to execute the
+ * Dataflow job. Please refer to the documentation of corresponding
+ * {@link PipelineRunner PipelineRunners} for more details.
  */
 public class PubsubIO {
   private static final Logger LOG = LoggerFactory.getLogger(PubsubIO.class);
+
+  /** The default {@link Coder} used to translate to/from Cloud Pub/Sub messages. */
   public static final Coder<String> DEFAULT_PUBSUB_CODER = StringUtf8Coder.of();
 
   /**
@@ -86,17 +89,14 @@ public class PubsubIO {
   private static final Pattern SUBSCRIPTION_REGEXP =
       Pattern.compile("projects/([^/]+)/subscriptions/(.+)");
 
-  private static final Pattern TOPIC_REGEXP =
-      Pattern.compile("projects/([^/]+)/topics/(.+)");
+  private static final Pattern TOPIC_REGEXP = Pattern.compile("projects/([^/]+)/topics/(.+)");
 
   private static final Pattern V1BETA1_SUBSCRIPTION_REGEXP =
       Pattern.compile("/subscriptions/([^/]+)/(.+)");
 
-  private static final Pattern V1BETA1_TOPIC_REGEXP =
-      Pattern.compile("/topics/([^/]+)/(.+)");
+  private static final Pattern V1BETA1_TOPIC_REGEXP = Pattern.compile("/topics/([^/]+)/(.+)");
 
-  private static final Pattern PUBSUB_NAME_REGEXP =
-      Pattern.compile("[a-zA-Z][-._~%+a-zA-Z0-9]+");
+  private static final Pattern PUBSUB_NAME_REGEXP = Pattern.compile("[a-zA-Z][-._~%+a-zA-Z0-9]+");
 
   private static final int PUBSUB_NAME_MAX_LENGTH = 255;
 
@@ -119,20 +119,18 @@ private static void validatePubsubName(String name) {
     }
 
     if (name.startsWith("goog")) {
-      throw new IllegalArgumentException(
-          "Pubsub object name cannot start with goog: " + name);
+      throw new IllegalArgumentException("Pubsub object name cannot start with goog: " + name);
     }
 
     Matcher match = PUBSUB_NAME_REGEXP.matcher(name);
     if (!match.matches()) {
-      throw new IllegalArgumentException(
-          "Illegal Pubsub object name specified: " + name
+      throw new IllegalArgumentException("Illegal Pubsub object name specified: " + name
           + " Please see Javadoc for naming rules.");
     }
   }
 
   /**
-   * Class representing a Pubsub Subscription.
+   * Class representing a Cloud Pub/Sub Subscription.
    */
   public static class PubsubSubscription implements Serializable {
     private enum Type { NORMAL, FAKE }
@@ -147,6 +145,23 @@ private PubsubSubscription(Type type, String project, String subscription) {
       this.subscription = subscription;
     }
 
+    /**
+     * Creates a class representing a Pub/Sub subscription from the specified subscription path.
+     *
+     * <p>Cloud Pub/Sub subscription names should be of the form
+     * {@code projects/<project>/subscriptions/<subscription>}, where {@code <project>} is the name
+     * of the project the subscription belongs to. The {@code <subscription>} component must comply
+     * with the following requirements:
+     *
+     * <ul>
+     * <li>Can only contain lowercase letters, numbers, dashes ('-'), underscores ('_') and periods
+     * ('.').</li>
+     * <li>Must be between 3 and 255 characters.</li>
+     * <li>Must begin with a letter.</li>
+     * <li>Must end with a letter or a number.</li>
+     * <li>Cannot begin with {@code 'goog'} prefix.</li>
+     * </ul>
+     */
     public static PubsubSubscription fromPath(String path) {
       if (path.startsWith(SUBSCRIPTION_RANDOM_TEST_PREFIX)
           || path.startsWith(SUBSCRIPTION_STARTING_SIGNAL)) {
@@ -164,8 +179,7 @@ public static PubsubSubscription fromPath(String path) {
       } else {
         Matcher match = SUBSCRIPTION_REGEXP.matcher(path);
         if (!match.matches()) {
-          throw new IllegalArgumentException(
-              "Pubsub subscription is not in "
+          throw new IllegalArgumentException("Pubsub subscription is not in "
               + "projects/<project_id>/subscriptions/<subscription_name> format: " + path);
         }
         projectName = match.group(1);
@@ -177,6 +191,13 @@ public static PubsubSubscription fromPath(String path) {
       return new PubsubSubscription(Type.NORMAL, projectName, subscriptionName);
     }
 
+    /**
+     * Returns the string representation of this subscription as a path used in the Cloud Pub/Sub
+     * v1beta1 API.
+     *
+     * @deprecated the v1beta1 API for Cloud Pub/Sub is deprecated.
+     */
+    @Deprecated
     public String asV1Beta1Path() {
       if (type == Type.NORMAL) {
         return "/subscriptions/" + project + "/" + subscription;
@@ -185,6 +206,13 @@ public String asV1Beta1Path() {
       }
     }
 
+    /**
+     * Returns the string representation of this subscription as a path used in the Cloud Pub/Sub
+     * v1beta2 API.
+     *
+     * @deprecated the v1beta2 API for Cloud Pub/Sub is deprecated.
+     */
+    @Deprecated
     public String asV1Beta2Path() {
       if (type == Type.NORMAL) {
         return "projects/" + project + "/subscriptions/" + subscription;
@@ -192,10 +220,22 @@ public String asV1Beta2Path() {
         return subscription;
       }
     }
+
+    /**
+     * Returns the string representation of this subscription as a path used in the Cloud Pub/Sub
+     * API.
+     */
+    public String asPath() {
+      if (type == Type.NORMAL) {
+        return "projects/" + project + "/subscriptions/" + subscription;
+      } else {
+        return subscription;
+      }
+    }
   }
 
   /**
-   * Class representing a Pubsub Topic.
+   * Class representing a Cloud Pub/Sub Topic.
    */
   public static class PubsubTopic implements Serializable {
     private enum Type { NORMAL, FAKE }
@@ -204,12 +244,29 @@ private enum Type { NORMAL, FAKE }
     private final String project;
     private final String topic;
 
-    public PubsubTopic(Type type, String project, String topic) {
+    private PubsubTopic(Type type, String project, String topic) {
       this.type = type;
       this.project = project;
       this.topic = topic;
     }
 
+    /**
+     * Creates a class representing a Cloud Pub/Sub topic from the specified topic path.
+     *
+     * <p>Cloud Pub/Sub topic names should be of the form
+     * {@code /topics/<project>/<topic>}, where {@code <project>} is the name of
+     * the publishing project. The {@code <topic>} component must comply with
+     * the following requirements:
+     *
+     * <ul>
+     * <li>Can only contain lowercase letters, numbers, dashes ('-'), underscores ('_') and periods
+     * ('.').</li>
+     * <li>Must be between 3 and 255 characters.</li>
+     * <li>Must begin with a letter.</li>
+     * <li>Must end with a letter or a number.</li>
+     * <li>Cannot begin with 'goog' prefix.</li>
+     * </ul>
+     */
     public static PubsubTopic fromPath(String path) {
       if (path.equals(TOPIC_DEV_NULL_TEST_NAME)) {
         return new PubsubTopic(Type.FAKE, "", path);
@@ -227,8 +284,7 @@ public static PubsubTopic fromPath(String path) {
         Matcher match = TOPIC_REGEXP.matcher(path);
         if (!match.matches()) {
           throw new IllegalArgumentException(
-              "Pubsub topic is not in projects/<project_id>/topics/<topic_name> format: "
-              + path);
+              "Pubsub topic is not in projects/<project_id>/topics/<topic_name> format: " + path);
         }
         projectName = match.group(1);
         topicName = match.group(2);
@@ -239,6 +295,13 @@ public static PubsubTopic fromPath(String path) {
       return new PubsubTopic(Type.NORMAL, projectName, topicName);
     }
 
+    /**
+     * Returns the string representation of this topic as a path used in the Cloud Pub/Sub
+     * v1beta1 API.
+     *
+     * @deprecated the v1beta1 API for Cloud Pub/Sub is deprecated.
+     */
+    @Deprecated
     public String asV1Beta1Path() {
       if (type == Type.NORMAL) {
         return "/topics/" + project + "/" + topic;
@@ -247,6 +310,13 @@ public String asV1Beta1Path() {
       }
     }
 
+    /**
+     * Returns the string representation of this topic as a path used in the Cloud Pub/Sub
+     * v1beta2 API.
+     *
+     * @deprecated the v1beta2 API for Cloud Pub/Sub is deprecated.
+     */
+    @Deprecated
     public String asV1Beta2Path() {
       if (type == Type.NORMAL) {
         return "projects/" + project + "/topics/" + topic;
@@ -254,39 +324,46 @@ public String asV1Beta2Path() {
         return topic;
       }
     }
+
+    /**
+     * Returns the string representation of this topic as a path used in the Cloud Pub/Sub
+     * API.
+     */
+    public String asPath() {
+      if (type == Type.NORMAL) {
+        return "projects/" + project + "/topics/" + topic;
+      } else {
+        return topic;
+      }
+    }
   }
 
   /**
-   * A {@link PTransform} that continuously reads from a Pubsub stream and
-   * returns a {@code PCollection<String>} containing the items from
+   * A {@link PTransform} that continuously reads from a Cloud Pub/Sub stream and
+   * returns a {@link PCollection} of {@link String Strings} containing the items from
    * the stream.
    *
-   * <p>When running with a runner that only supports bounded {@code PCollection}s
-   * (such as DirectPipelineRunner or DataflowPipelineRunner without --streaming), only a
-   * bounded portion of the input Pubsub stream can be processed.  As such, either
-   * {@link Bound#maxNumRecords} or {@link Bound#maxReadTime} must be set.
+   * <p>When running with a {@link PipelineRunner} that only supports bounded
+   * {@link PCollection PCollections} (such as {@link DirectPipelineRunner} or
+   * {@link DataflowPipelineRunner} without {@code --streaming}), only a bounded portion of the
+   * input Pub/Sub stream can be processed. As such, either {@link Bound#maxNumRecords(int)} or
+   * {@link Bound#maxReadTime(Duration)} must be set.
    */
   public static class Read {
+    /**
+     * Creates and returns a transform for reading from Cloud Pub/Sub with the specified transform
+     * name.
+     */
     public static Bound<String> named(String name) {
       return new Bound<>(DEFAULT_PUBSUB_CODER).named(name);
     }
 
     /**
-     * Creates and returns a PubsubIO.Read PTransform for reading from
-     * a Pubsub topic with the specified publisher topic. Format for
-     * Cloud Pubsub topic names should be of the form
-     * {@code /topics/<project>/<topic>}, where {@code <project>} is the name of
-     * the publishing project. The {@code <topic>} component must comply with
-     * the below requirements.
+     * Creates and returns a transform for reading from a Cloud Pub/Sub topic. Mutually exclusive
+     * with {@link #subscription(String)}.
      *
-     * <ul>
-     * <li>Can only contain lowercase letters, numbers, dashes ('-'), underscores ('_') and periods
-     * ('.').</li>
-     * <li>Must be between 3 and 255 characters.</li>
-     * <li>Must begin with a letter.</li>
-     * <li>Must end with a letter or a number.</li>
-     * <li>Cannot begin with 'goog' prefix.</li>
-     * </ul>
+     * <p>See {@link PubsubIO.PubsubTopic#fromPath(String)} for more details on the format
+     * of the {@code topic} string.
      *
      * <p>Dataflow will start reading data published on this topic from the time the pipeline is
      * started. Any data published on the topic before the pipeline is started will not be read by
@@ -297,42 +374,31 @@ public static Bound<String> topic(String topic) {
     }
 
     /**
-     * Creates and returns a PubsubIO.Read PTransform for reading from
-     * a specific Pubsub subscription. Mutually exclusive with
-     * PubsubIO.Read.topic().
-     * Cloud Pubsub subscription names should be of the form
-     * {@code projects/<project>/subscriptions/<subscription>},
-     * where {@code <project>} is the name of the project the subscription belongs to.
-     * The {@code <subscription>} component must comply with the below requirements.
+     * Creates and returns a transform for reading from a specific Cloud Pub/Sub subscription.
+     * Mutually exclusive with {@link #topic(String)}.
      *
-     * <ul>
-     * <li>Can only contain lowercase letters, numbers, dashes ('-'), underscores ('_') and periods
-     * ('.').</li>
-     * <li>Must be between 3 and 255 characters.</li>
-     * <li>Must begin with a letter.</li>
-     * <li>Must end with a letter or a number.</li>
-     * <li>Cannot begin with 'goog' prefix.</li>
-     * </ul>
+     * <p>See {@link PubsubIO.PubsubSubscription#fromPath(String)} for more details on the format
+     * of the {@code subscription} string.
      */
     public static Bound<String> subscription(String subscription) {
       return new Bound<>(DEFAULT_PUBSUB_CODER).subscription(subscription);
     }
 
     /**
-     * Creates and returns a PubsubIO.Read PTransform where record timestamps are expected
-     * to be provided using the PubSub labeling API. The {@code <timestampLabel>} parameter
-     * specifies the label name. The label value sent to PubsSub is a numerical value representing
-     * the number of milliseconds since the Unix epoch. For example, if using the joda time classes,
-     * org.joda.time.Instant.getMillis() returns the correct value for this label.
+     * Creates and returns a transform reading from Cloud Pub/Sub where record timestamps are
+     * expected to be provided as Pub/Sub message attributes. The {@code timestampLabel}
+     * parameter specifies the name of the attribute that contains the timestamp. The value of the
+     * attribute should be a numerical value representing the number of milliseconds since the Unix
+     * epoch. For example, if using the Joda time classes,
+     * {@link Instant#getMillis()} returns the correct value for this label.
      *
-     * <p>If {@code <timestampLabel>} is not provided, the system will generate record timestamps
+     * <p>If {@code timestampLabel} is not provided, the system will generate record timestamps
      * the first time it sees each record. All windowing will be done relative to these timestamps.
      *
-     * <p>By default windows are emitted based on an estimate of when this source is likely
+     * <p>By default, windows are emitted based on an estimate of when this source is likely
      * done producing data for a given timestamp (referred to as the Watermark; see
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark} for more details).
-     * Any late data will be handled by the trigger specified with the windowing strategy -- by
-     * default it will be output immediately.
+     * {@link AfterWatermark} for more details). Any late data will be handled by the trigger
+     * specified with the windowing strategy &ndash; by default it will be output immediately.
      *
      * <p>Note that the system can guarantee that no late data will ever be seen when it assigns
      * timestamps by arrival time (i.e. {@code timestampLabel} is not provided).
@@ -342,22 +408,22 @@ public static Bound<String> timestampLabel(String timestampLabel) {
     }
 
     /**
-     * Creates and returns a PubSubIO.Read PTransform where unique record identifiers are
-     * expected to be provided using the PubSub labeling API. The {@code <idLabel>} parameter
-     * specifies the label name. The label value sent to PubSub can be any string value that
-     * uniquely identifies this record.
+     * Creates and returns a transform for reading from Cloud Pub/Sub where unique record
+     * identifiers are expected to be provided as Pub/Sub message attributes. The {@code idLabel}
+     * parameter specifies the attribute name. The value of the attribute can be any string
+     * that uniquely identifies this record.
      *
-     * <p>If idLabel is not provided, Dataflow cannot guarantee that no duplicate data will be
-     * delivered on the PubSub stream. In this case,  deduplication of the stream will be
-     * stricly best effort.
+     * <p>If {@code idLabel} is not provided, Dataflow cannot guarantee that no duplicate data will
+     * be delivered on the Pub/Sub stream. In this case, deduplication of the stream will be
+     * strictly best effort.
      */
     public static Bound<String> idLabel(String idLabel) {
       return new Bound<>(DEFAULT_PUBSUB_CODER).idLabel(idLabel);
     }
 
-   /**
-     * Creates and returns a PubsubIO.Read PTransform that uses the given
-     * {@code Coder<T>} to decode PubSub record into a value of type {@code T}.
+    /**
+     * Creates and returns a transform for reading from Cloud Pub/Sub that uses the given
+     * {@link Coder} to decode Pub/Sub messages into a value of type {@code T}.
      *
      * <p>By default, uses {@link StringUtf8Coder}, which just
      * returns the text lines as Java strings.
@@ -370,60 +436,64 @@ public static <T> Bound<T> withCoder(Coder<T> coder) {
     }
 
     /**
-     * Sets the maximum number of records that will be read from Pubsub.
+     * Creates and returns a transform for reading from Cloud Pub/Sub with a maximum number of
+     * records that will be read. The transform produces a <i>bounded</i> {@link PCollection}.
      *
-     * <p>Either this or {@link #maxReadTime} must be set for use as a bounded
-     * {@code PCollection}.
+     * <p>Either this option or {@link #maxReadTime(Duration)} must be set in order to create a
+     * bounded source.
      */
     public static Bound<String> maxNumRecords(int maxNumRecords) {
       return new Bound<>(DEFAULT_PUBSUB_CODER).maxNumRecords(maxNumRecords);
     }
 
     /**
-     * Sets the maximum duration during which records will be read from Pubsub.
+     * Creates and returns a transform for reading from Cloud Pub/Sub with a maximum number of
+     * duration during which records will be read.  The transform produces a <i>bounded</i>
+     * {@link PCollection}.
      *
-     * <p>Either this or {@link #maxNumRecords} must be set for use as a bounded
-     * {@code PCollection}.
+     * <p>Either this option or {@link #maxNumRecords(int)} must be set in order to create a bounded
+     * source.
      */
     public static Bound<String> maxReadTime(Duration maxReadTime) {
       return new Bound<>(DEFAULT_PUBSUB_CODER).maxReadTime(maxReadTime);
     }
 
     /**
-     * A {@link PTransform} that reads from a PubSub source and returns
-     * a unbounded PCollection containing the items from the stream.
+     * A {@link PTransform} that reads from a Cloud Pub/Sub source and returns
+     * a unbounded {@link PCollection} containing the items from the stream.
      */
     public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
-      /** The Pubsub topic to read from. */
-      PubsubTopic topic;
-      /** The Pubsub subscription to read from. */
-      PubsubSubscription subscription;
-      /** The Pubsub label to read timestamps from. */
-      String timestampLabel;
-      /** The Pubsub label to read ids from. */
-      String idLabel;
+      /** The Cloud Pub/Sub topic to read from. */
+      @Nullable private final PubsubTopic topic;
+
+      /** The Cloud Pub/Sub subscription to read from. */
+      @Nullable private final PubsubSubscription subscription;
+
+      /** The name of the message attribute to read timestamps from. */
+      @Nullable private final String timestampLabel;
+
+      /** The name of the message attribute to read unique message IDs from. */
+      @Nullable private final String idLabel;
+
       /** The coder used to decode each record. */
-      @Nullable
-      final Coder<T> coder;
+      @Nullable private final Coder<T> coder;
+
       /** Stop after reading this many records. */
-      int maxNumRecords;
+      private final int maxNumRecords;
+
       /** Stop after reading for this much time. */
-      Duration maxReadTime;
+      @Nullable private final Duration maxReadTime;
 
-      Bound(Coder<T> coder) {
-        this.coder = coder;
+      private Bound(Coder<T> coder) {
+        this(null, null, null, null, coder, null, 0, null);
       }
 
-      Bound(String name, PubsubSubscription subscription, PubsubTopic topic, String timestampLabel,
-          Coder<T> coder, String idLabel,
-          int maxNumRecords, Duration maxReadTime) {
+      private Bound(String name, PubsubSubscription subscription, PubsubTopic topic,
+          String timestampLabel, Coder<T> coder, String idLabel, int maxNumRecords,
+          Duration maxReadTime) {
         super(name);
-        if (subscription != null) {
-          this.subscription = subscription;
-        }
-        if (topic != null) {
-          this.topic = topic;
-        }
+        this.subscription = subscription;
+        this.topic = topic;
         this.timestampLabel = timestampLabel;
         this.coder = coder;
         this.idLabel = idLabel;
@@ -432,21 +502,27 @@ public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
       }
 
       /**
-       * Returns a new PubsubIO.Read PTransform that's like this one but with the given
-       * step name. Does not modify the object.
+       * Returns a transform that's like this one but with the given step name.
+       *
+       * <p>Does not modify this object.
        */
       public Bound<T> named(String name) {
-        return new Bound<>(name, subscription, topic, timestampLabel,
-            coder, idLabel, maxNumRecords, maxReadTime);
+        return new Bound<>(
+            name, subscription, topic, timestampLabel, coder, idLabel, maxNumRecords, maxReadTime);
       }
 
       /**
-       * Returns a new PubsubIO.Read PTransform that's like this one but reading from the
-       * given subscription. Does not modify the object.
+       * Returns a transform that's like this one but reading from the
+       * given subscription.
+       *
+       * <p>See {@link PubsubIO.PubsubSubscription#fromPath(String)} for more details on the format
+       * of the {@code subscription} string.
        *
        * <p>Multiple readers reading from the same subscription will each receive
-       * some arbirary portion of the data.  Most likely, separate readers should
+       * some arbitrary portion of the data.  Most likely, separate readers should
        * use their own subscriptions.
+       *
+       * <p>Does not modify this object.
        */
       public Bound<T> subscription(String subscription) {
         return new Bound<>(name, PubsubSubscription.fromPath(subscription), topic, timestampLabel,
@@ -454,77 +530,84 @@ public Bound<T> subscription(String subscription) {
       }
 
       /**
-       * Returns a new PubsubIO.Read PTransform that's like this one but reading from the
-       * give topic. Does not modify the object.
+       * Returns a transform that's like this one but that reads from the specified topic.
+       *
+       * <p>See {@link PubsubIO.PubsubTopic#fromPath(String)} for more details on the
+       * format of the {@code topic} string.
+       *
+       * <p>Does not modify this object.
        */
       public Bound<T> topic(String topic) {
-        return new Bound<>(name, subscription, PubsubTopic.fromPath(topic), timestampLabel,
-            coder, idLabel, maxNumRecords, maxReadTime);
+        return new Bound<>(name, subscription, PubsubTopic.fromPath(topic), timestampLabel, coder,
+            idLabel, maxNumRecords, maxReadTime);
       }
 
       /**
-       * Returns a new PubsubIO.Read PTransform that's like this one but reading timestamps
-       * from the given PubSub label. Does not modify the object.
+       * Returns a transform that's like this one but that reads message timestamps
+       * from the given message attribute. See {@link PubsubIO.Read#timestampLabel(String)} for
+       * more details on the format of the timestamp attribute.
+       *
+       * <p>Does not modify this object.
        */
       public Bound<T> timestampLabel(String timestampLabel) {
-        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel,
-            maxNumRecords, maxReadTime);
+        return new Bound<>(
+            name, subscription, topic, timestampLabel, coder, idLabel, maxNumRecords, maxReadTime);
       }
 
       /**
-       * Returns a new PubsubIO.Read PTransform that's like this one but reading unique ids
-       * from the given PubSub label. Does not modify the object.
+       * Returns a transform that's like this one but that reads unique message IDs
+       * from the given message attribute. See {@link PubsubIO.Read#idLabel(String)} for more
+       * details on the format of the ID attribute.
+       *
+       * <p>Does not modify this object.
        */
       public Bound<T> idLabel(String idLabel) {
-        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel,
-            maxNumRecords, maxReadTime);
+        return new Bound<>(
+            name, subscription, topic, timestampLabel, coder, idLabel, maxNumRecords, maxReadTime);
       }
 
       /**
-       * Returns a new PubsubIO.Read PTransform that's like this one but that uses the given
-       * {@code Coder<X>} to decode each record into a value of type {@code X}.  Does not modify
-       * this object.
+       * Returns a transform that's like this one but that uses the given
+       * {@link Coder} to decode each record into a value of type {@code X}.
+       *
+       * <p>Does not modify this object.
        *
        * @param <X> the type of the decoded elements, and the
        * elements of the resulting PCollection.
        */
       public <X> Bound<X> withCoder(Coder<X> coder) {
-        return new Bound<>(name, subscription, topic, timestampLabel, coder, idLabel,
-            maxNumRecords, maxReadTime);
+        return new Bound<>(
+            name, subscription, topic, timestampLabel, coder, idLabel, maxNumRecords, maxReadTime);
       }
 
       /**
-       * Sets the maximum number of records that will be read from Pubsub.
-       *
-       * <p>Setting either this or {@link #maxReadTime} will cause the output {@code PCollection}
-       * to be bounded.
+       * Returns a transform that's like this one but will only read up to the specified
+       * maximum number of records from Cloud Pub/Sub. The transform produces a <i>bounded</i>
+       * {@link PCollection}. See {@link PubsubIO.Read#maxNumRecords(int)} for more details.
        */
       public Bound<T> maxNumRecords(int maxNumRecords) {
-        return new Bound<>(name, subscription, topic, timestampLabel,
-            coder, idLabel, maxNumRecords, maxReadTime);
+        return new Bound<>(
+            name, subscription, topic, timestampLabel, coder, idLabel, maxNumRecords, maxReadTime);
       }
 
       /**
-       * Sets the maximum duration during which records will be read from Pubsub.
-       *
-       * <p>Setting either this or {@link #maxNumRecords} will cause the output {@code PCollection}
-       * to be bounded.
+       * Returns a transform that's like this one but will only read during the specified
+       * duration from Cloud Pub/Sub. The transform produces a <i>bounded</i> {@link PCollection}.
+       * See {@link PubsubIO.Read#maxReadTime(Duration)} for more details.
        */
       public Bound<T> maxReadTime(Duration maxReadTime) {
-        return new Bound<>(name, subscription, topic, timestampLabel,
-            coder, idLabel, maxNumRecords, maxReadTime);
+        return new Bound<>(
+            name, subscription, topic, timestampLabel, coder, idLabel, maxNumRecords, maxReadTime);
       }
 
       @Override
       public PCollection<T> apply(PInput input) {
         if (topic == null && subscription == null) {
-          throw new IllegalStateException(
-              "need to set either the topic or the subscription for "
+          throw new IllegalStateException("need to set either the topic or the subscription for "
               + "a PubsubIO.Read transform");
         }
         if (topic != null && subscription != null) {
-          throw new IllegalStateException(
-              "Can't set both the topic and the subscription for a "
+          throw new IllegalStateException("Can't set both the topic and the subscription for a "
               + "PubsubIO.Read transform");
         }
 
@@ -533,8 +616,7 @@ public PCollection<T> apply(PInput input) {
         if (boundedOutput) {
           return input.getPipeline().begin()
               .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
-              .apply(ParDo.of(new PubsubReader()))
-              .setCoder(coder);
+              .apply(ParDo.of(new PubsubReader())).setCoder(coder);
         } else {
           return PCollection.<T>createPrimitiveOutputInternal(
                   input.getPipeline(), WindowingStrategy.globalDefault(), IsBounded.UNBOUNDED)
@@ -586,23 +668,22 @@ public void processElement(ProcessContext c) throws IOException {
 
           String subscription;
           if (getSubscription() == null) {
-            String topic = getTopic().asV1Beta2Path();
+            String topic = getTopic().asPath();
             String[] split = topic.split("/");
-            subscription = "projects/" + split[1] + "/subscriptions/" + split[3]
-                + "_dataflow_" + new Random().nextLong();
-            Subscription subInfo = new Subscription()
-                .setAckDeadlineSeconds(60)
-                .setTopic(topic);
+            subscription =
+                "projects/" + split[1] + "/subscriptions/" + split[3] + "_dataflow_"
+                + new Random().nextLong();
+            Subscription subInfo = new Subscription().setAckDeadlineSeconds(60).setTopic(topic);
             try {
               pubsubClient.projects().subscriptions().create(subscription, subInfo).execute();
             } catch (Exception e) {
               throw new RuntimeException("Failed to create subscription: ", e);
             }
           } else {
-             subscription = getSubscription().asV1Beta2Path();
+            subscription = getSubscription().asPath();
           }
 
-          Instant endTime = getMaxReadTime() == null
+          Instant endTime = (getMaxReadTime() == null)
               ? new Instant(Long.MAX_VALUE) : Instant.now().plus(getMaxReadTime());
 
           List<PubsubMessage> messages = new ArrayList<>();
@@ -662,70 +743,82 @@ public void processElement(ProcessContext c) throws IOException {
                 throw new RuntimeException(
                     "Message from pubsub missing timestamp label: " + getTimestampLabel());
               }
-              timestamp = new Instant(Long.parseLong(
-                      message.getAttributes().get(getTimestampLabel())));
+              timestamp =
+                  new Instant(Long.parseLong(message.getAttributes().get(getTimestampLabel())));
             }
             c.outputWithTimestamp(
-                CoderUtils.decodeFromByteArray(getCoder(), message.decodeData()),
-                timestamp);
+                CoderUtils.decodeFromByteArray(getCoder(), message.decodeData()), timestamp);
           }
         }
       }
     }
+
+    /** Disallow construction of utility class. */
+    private Read() {}
   }
 
 
   /////////////////////////////////////////////////////////////////////////////
 
+  /** Disallow construction of utility class. */
+  private PubsubIO() {}
+
   /**
    * A {@link PTransform} that continuously writes a
-   * {@code PCollection<String>} to a Pubsub stream.
+   * {@link PCollection} of {@link String Strings} to a Cloud Pub/Sub stream.
    */
   // TODO: Support non-String encodings.
   public static class Write {
+    /**
+     * Creates a transform that writes to Pub/Sub with the given step name.
+     */
     public static Bound<String> named(String name) {
       return new Bound<>(DEFAULT_PUBSUB_CODER).named(name);
     }
 
-    /** The topic to publish to.
-     * Cloud Pubsub topic names should be {@code /topics/<project>/<topic>},
-     * where {@code <project>} is the name of the publishing project.
+    /**
+     * Creates a transform that publishes to the specified topic.
+     *
+     * <p>See {@link PubsubIO.PubsubTopic#fromPath(String)} for more details on the format of the
+     * {@code topic} string.
      */
     public static Bound<String> topic(String topic) {
       return new Bound<>(DEFAULT_PUBSUB_CODER).topic(topic);
     }
 
     /**
-     * If specified, Dataflow will add a Pubsub label to each output record specifying the logical
-     * timestamp of the record. {@code <timestampLabel>} determines the label name. The label value
-     * is a numerical value representing the number of milliseconds since the Unix epoch. For
-     * example, if using the joda time classes, the org.joda.time.Instant(long) constructor can be
-     * used to parse this value. If the output from this sink is being read by another Dataflow
-     * source, then PubsubIO.Read.timestampLabel can be used to ensure that the other source reads
-     * these timestamps from the appropriate label.
+     * Creates a transform that writes to Pub/Sub, adds each record's timestamp to the published
+     * messages in an attribute with the specified name. The value of the attribute will be a number
+     * representing the number of milliseconds since the Unix epoch. For example, if using the Joda
+     * time classes, {@link Instant#Instant(long)} can be used to parse this value.
+     *
+     * <p>If the output from this sink is being read by another Dataflow source, then
+     * {@link PubsubIO.Read#timestampLabel(String)} can be used to ensure the other source reads
+     * these timestamps from the appropriate attribute.
      */
     public static Bound<String> timestampLabel(String timestampLabel) {
       return new Bound<>(DEFAULT_PUBSUB_CODER).timestampLabel(timestampLabel);
     }
 
     /**
-     * If specified, Dataflow will add a Pubsub label to each output record containing a unique
-     * identifier for that record. {@code <idLabel>} determines the label name. The label value
-     * is an opaque string value. This is useful if the the output from this sink is being read
-     * by another Dataflow source, in which case PubsubIO.Read.idLabel can be used to ensure that
-     * the other source reads these ids from the appropriate label.
+     * Creates a transform that writes to Pub/Sub, adding each record's unique identifier to the
+     * published messages in an attribute with the specified name. The value of the attribute is an
+     * opaque string.
+     *
+     * <p>If the the output from this sink is being read by another Dataflow source, then
+     * {@link PubsubIO.Read#idLabel(String)} can be used to ensure that* the other source reads
+     * these unique identifiers from the appropriate attribute.
      */
     public static Bound<String> idLabel(String idLabel) {
       return new Bound<>(DEFAULT_PUBSUB_CODER).idLabel(idLabel);
     }
 
-   /**
-     * Returns a TextIO.Write PTransform that uses the given
-     * {@code Coder<T>} to encode each of the elements of the input
-     * {@code PCollection<T>} into an output PubSub record.
+    /**
+     * Creates a transform that  uses the given {@link Coder} to encode each of the
+     * elements of the input collection into an output message.
      *
-     * <p>By default, uses {@link StringUtf8Coder}, which writes input
-     * Java strings directly as records.
+     * <p>By default, uses {@link StringUtf8Coder}, which writes input Java strings directly as
+     * records.
      *
      * @param <T> the type of the elements of the input PCollection
      */
@@ -734,69 +827,85 @@ public static <T> Bound<T> withCoder(Coder<T> coder) {
     }
 
     /**
-     * A {@link PTransform} that writes an unbounded {@code PCollection<String>}
-     * to a PubSub stream.
+     * A {@link PTransform} that writes an unbounded {@link PCollection} of {@link String Strings}
+     * to a Cloud Pub/Sub stream.
      */
     public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
-      /** The Pubsub topic to publish to. */
-      PubsubTopic topic;
-      String timestampLabel;
-      String idLabel;
-      final Coder<T> coder;
+      /** The Cloud Pub/Sub topic to publish to. */
+      @Nullable private final PubsubTopic topic;
+      /** The name of the message attribute to publish message timestamps in. */
+      @Nullable private final String timestampLabel;
+      /** The name of the message attribute to publish unique message IDs in. */
+      @Nullable private final String idLabel;
+      private final Coder<T> coder;
 
-      Bound(Coder<T> coder) {
-        this.coder = coder;
+      private Bound(Coder<T> coder) {
+        this(null, null, null, null, coder);
       }
 
-      Bound(String name, PubsubTopic topic, String timestampLabel, String idLabel, Coder<T> coder) {
+      private Bound(
+          String name, PubsubTopic topic, String timestampLabel, String idLabel, Coder<T> coder) {
         super(name);
-        if (topic != null) {
-          this.topic = topic;
-        }
+        this.topic = topic;
         this.timestampLabel = timestampLabel;
         this.idLabel = idLabel;
         this.coder = coder;
       }
 
       /**
-       * Returns a new PubsubIO.Write PTransform that's like this one but with the given step
-       * name. Does not modify the object.
+       * Returns a new transform that's like this one but with the specified step
+       * name.
+       *
+       * <p>Does not modify this object.
        */
       public Bound<T> named(String name) {
         return new Bound<>(name, topic, timestampLabel, idLabel, coder);
       }
 
       /**
-       * Returns a new PubsubIO.Write PTransform that's like this one but writing to the given
-       * topic. Does not modify the object.
+       * Returns a new transform that's like this one but that writes to the specified
+       * topic.
+       *
+       * <p>See {@link PubsubIO.PubsubTopic#fromPath(String)} for more details on the format of the
+       * {@code topic} string.
+       *
+       * <p>Does not modify this object.
        */
       public Bound<T> topic(String topic) {
         return new Bound<>(name, PubsubTopic.fromPath(topic), timestampLabel, idLabel, coder);
       }
 
       /**
-       * Returns a new PubsubIO.Write PTransform that's like this one but publishing timestamps
-       * to the given PubSub label. Does not modify the object.
+       * Returns a new transform that's like this one but that publishes record timestamps
+       * to a message attribute with the specified name. See
+       * {@link PubsubIO.Write#timestampLabel(String)} for more details.
+       *
+       * <p>Does not modify this object.
        */
       public Bound<T> timestampLabel(String timestampLabel) {
         return new Bound<>(name, topic, timestampLabel, idLabel, coder);
       }
 
       /**
-       * Returns a new PubsubIO.Write PTransform that's like this one but publishing record ids
-       * to the given PubSub label. Does not modify the object.
+       * Returns a new transform that's like this one but that publishes unique record IDs
+       * to a message attribute with the specified name. See {@link PubsubIO.Write#idLabel(String)}
+       * for more details.
+       *
+       * <p>Does not modify this object.
        */
-     public Bound<T> idLabel(String idLabel) {
-       return new Bound<>(name, topic, timestampLabel, idLabel, coder);
+      public Bound<T> idLabel(String idLabel) {
+        return new Bound<>(name, topic, timestampLabel, idLabel, coder);
       }
 
-     /**
-       * Returns a new PubsubIO.Write PTransform that's like this one
-       * but that uses the given {@code Coder<X>} to encode each of
-       * the elements of the input {@code PCollection<X>} into an
-       * output record.  Does not modify this object.
+      /**
+       * Returns a new transform that's like this one
+       * but that uses the given {@link Coder} to encode each of
+       * the elements of the input {@link PCollection} into an
+       * output record.
+       *
+       * <p>Does not modify this object.
        *
-       * @param <X> the type of the elements of the input PCollection
+       * @param <X> the type of the elements of the input {@link PCollection}
        */
       public <X> Bound<X> withCoder(Coder<X> coder) {
         return new Bound<>(name, topic, timestampLabel, idLabel, coder);
@@ -805,8 +914,7 @@ public <X> Bound<X> withCoder(Coder<X> coder) {
       @Override
       public PDone apply(PCollection<T> input) {
         if (topic == null) {
-          throw new IllegalStateException(
-              "need to set the topic of a PubsubIO.Write transform");
+          throw new IllegalStateException("need to set the topic of a PubsubIO.Write transform");
         }
         input.apply(ParDo.of(new PubsubWriter()));
         return PDone.in(input.getPipeline());
@@ -838,7 +946,6 @@ private class PubsubWriter extends DoFn<T, Void> {
         private transient List<PubsubMessage> output;
         private transient Pubsub pubsubClient;
 
-
         @Override
         public void startBundle(Context c) {
           this.output = new ArrayList<>();
@@ -849,16 +956,15 @@ public void startBundle(Context c) {
 
         @Override
         public void processElement(ProcessContext c) throws IOException {
-          PubsubMessage message = new PubsubMessage().encodeData(
-              CoderUtils.encodeToByteArray(getCoder(), c.element()));
+          PubsubMessage message =
+              new PubsubMessage().encodeData(CoderUtils.encodeToByteArray(getCoder(), c.element()));
           if (getTimestampLabel() != null) {
             Map<String, String> attributes = message.getAttributes();
             if (attributes == null) {
               attributes = new HashMap<>();
               message.setAttributes(attributes);
             }
-            attributes.put(
-                getTimestampLabel(), String.valueOf(c.timestamp().getMillis()));
+            attributes.put(getTimestampLabel(), String.valueOf(c.timestamp().getMillis()));
           }
           output.add(message);
 
@@ -877,10 +983,14 @@ public void finishBundle(Context c) throws IOException {
         private void publish() throws IOException {
           PublishRequest publishRequest = new PublishRequest().setMessages(output);
           pubsubClient.projects().topics()
-              .publish(getTopic().asV1Beta2Path(), publishRequest).execute();
+              .publish(getTopic().asPath(), publishRequest)
+              .execute();
           output.clear();
         }
       }
     }
+
+    /** Disallow construction of utility class. */
+    private Write() {}
   }
 }

From 10dc3b2e20e8c0d1e90b2e60323783cd44caf443 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 18 Nov 2015 16:59:33 -0800
Subject: [PATCH 1180/1541] Fix WithTimestamps Java 8 Example

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108197218
---
 .../google/cloud/dataflow/sdk/transforms/WithTimestamps.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithTimestamps.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithTimestamps.java
index 8b581b4ea47a3..85a93bfe18d43 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithTimestamps.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithTimestamps.java
@@ -59,7 +59,7 @@ public class WithTimestamps<T> extends PTransform<PCollection<T>, PCollection<T>
    * <p>Example of use in Java 8:
    * <pre>{@code
    * PCollection<Record> timestampedRecords = records.apply(
-   *     TimestampElements.via((Record rec) -> rec.getInstant());
+   *     WithTimestamps.of((Record rec) -> rec.getInstant());
    * }</pre>
    */
   public static <T> WithTimestamps<T> of(SerializableFunction<T, Instant> fn) {

From 3ff65d008cda98312b117278962b0593c05e5de5 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Thu, 19 Nov 2015 00:12:46 -0800
Subject: [PATCH 1181/1541] Deprecate the incorrect chaining in
 RetryHttpRequestInitializer

Move to use ChainingHttpRequestInitializer for chaining.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108220465
---
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |  4 +--
 .../sdk/util/RetryHttpRequestInitializer.java | 35 +++++++++++++++++-
 .../cloud/dataflow/sdk/util/Transport.java    | 36 ++++++++++++++-----
 3 files changed, 64 insertions(+), 11 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index a9d5c13c47b74..353cacac67d21 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -466,7 +466,7 @@ private long queryLatestStatisticsTimestamp(Datastore datastore) throws Datastor
     private Datastore getDatastore(PipelineOptions pipelineOptions) {
       DatastoreOptions.Builder builder =
           new DatastoreOptions.Builder().host(host).dataset(datasetId).initializer(
-              new RetryHttpRequestInitializer(null));
+              new RetryHttpRequestInitializer());
 
       Credential credential = pipelineOptions.as(GcpOptions.class).getGcpCredential();
       if (credential != null) {
@@ -606,7 +606,7 @@ public DatastoreWriter createWriter(PipelineOptions options) throws Exception {
           new DatastoreOptions.Builder()
               .host(sink.host)
               .dataset(sink.datasetId)
-              .initializer(new RetryHttpRequestInitializer(null));
+              .initializer(new RetryHttpRequestInitializer());
       Credential credential = options.as(GcpOptions.class).getGcpCredential();
       if (credential != null) {
         builder.credential(credential);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
index 6259428649804..756dce0a99859 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
@@ -123,6 +123,7 @@ public boolean handleResponse(HttpRequest request, HttpResponse response,
     }
   }
 
+  @Deprecated
   private final HttpRequestInitializer chained;
 
   private final HttpResponseInterceptor responseInterceptor;  // response Interceptor to use
@@ -133,22 +134,51 @@ public boolean handleResponse(HttpRequest request, HttpResponse response,
 
   private Set<Integer> ignoredResponseCodes = new HashSet<>(DEFAULT_IGNORED_RESPONSE_CODES);
 
+  public RetryHttpRequestInitializer() {
+    this(Collections.<Integer>emptyList());
+  }
+
   /**
    * @param chained a downstream HttpRequestInitializer, which will also be
    *                applied to HttpRequest initialization.  May be null.
+   *
+   * @deprecated use {@link #RetryHttpRequestInitializer}.
    */
+  @Deprecated
   public RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained) {
     this(chained, Collections.<Integer>emptyList());
   }
 
+  /**
+   * @param additionalIgnoredResponseCodes a list of HTTP status codes that should not be logged.
+   */
+  public RetryHttpRequestInitializer(Collection<Integer> additionalIgnoredResponseCodes) {
+    this(additionalIgnoredResponseCodes, null);
+  }
+
+
   /**
    * @param chained a downstream HttpRequestInitializer, which will also be
    *                applied to HttpRequest initialization.  May be null.
    * @param additionalIgnoredResponseCodes a list of HTTP status codes that should not be logged.
+   *
+   * @deprecated use {@link #RetryHttpRequestInitializer(Collection)}.
    */
+  @Deprecated
   public RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained,
       Collection<Integer> additionalIgnoredResponseCodes) {
-    this(chained, NanoClock.SYSTEM, Sleeper.DEFAULT, additionalIgnoredResponseCodes, null);
+    this(chained, additionalIgnoredResponseCodes, null);
+  }
+
+  /**
+   * @param additionalIgnoredResponseCodes a list of HTTP status codes that should not be logged.
+   * @param responseInterceptor HttpResponseInterceptor to be applied on all requests. May be null.
+   */
+  public RetryHttpRequestInitializer(
+      Collection<Integer> additionalIgnoredResponseCodes,
+      @Nullable HttpResponseInterceptor responseInterceptor) {
+    this(null, NanoClock.SYSTEM, Sleeper.DEFAULT, additionalIgnoredResponseCodes,
+        responseInterceptor);
   }
 
   /**
@@ -156,7 +186,10 @@ public RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained,
    * initialization.  May be null.
    * @param additionalIgnoredResponseCodes a list of HTTP status codes that should not be logged.
    * @param responseInterceptor HttpResponseInterceptor to be applied on all requests. May be null.
+   *
+   * @deprecated use {@link #RetryHttpRequestInitializer(Collection, HttpResponseInterceptor)}.
    */
+  @Deprecated
   public RetryHttpRequestInitializer(
       @Nullable HttpRequestInitializer chained,
       Collection<Integer> additionalIgnoredResponseCodes,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
index 72febae67d239..7735a9e01fcc0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
@@ -16,7 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.api.client.auth.oauth2.Credential;
 import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
+import com.google.api.client.http.HttpRequestInitializer;
 import com.google.api.client.http.HttpTransport;
 import com.google.api.client.json.JsonFactory;
 import com.google.api.client.json.jackson2.JacksonFactory;
@@ -28,6 +30,7 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.GcsOptions;
+import com.google.cloud.hadoop.util.ChainingHttpRequestInitializer;
 import com.google.common.collect.ImmutableList;
 
 import java.io.IOException;
@@ -95,8 +98,10 @@ private static ApiComponents apiComponentsFromUrl(String urlString) {
   public static Bigquery.Builder
       newBigQueryClient(BigQueryOptions options) {
     return new Bigquery.Builder(getTransport(), getJsonFactory(),
-        // Do not log 404. It clutters the output and is possibly even required by the caller.
-        new RetryHttpRequestInitializer(options.getGcpCredential(), ImmutableList.of(404)))
+        chainHttpRequestInitializer(
+            options.getGcpCredential(),
+            // Do not log 404. It clutters the output and is possibly even required by the caller.
+            new RetryHttpRequestInitializer(ImmutableList.of(404))))
         .setApplicationName(options.getAppName())
         .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
   }
@@ -110,8 +115,10 @@ private static ApiComponents apiComponentsFromUrl(String urlString) {
   public static Pubsub.Builder
       newPubsubClient(DataflowPipelineOptions options) {
     return new Pubsub.Builder(getTransport(), getJsonFactory(),
-        // Do not log 404. It clutters the output and is possibly even required by the caller.
-        new RetryHttpRequestInitializer(options.getGcpCredential(), ImmutableList.of(404)))
+        chainHttpRequestInitializer(
+            options.getGcpCredential(),
+            // Do not log 404. It clutters the output and is possibly even required by the caller.
+            new RetryHttpRequestInitializer(ImmutableList.of(404))))
         .setRootUrl(options.getPubsubRootUrl())
         .setApplicationName(options.getAppName())
         .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
@@ -131,8 +138,10 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
 
     return new Dataflow.Builder(getTransport(),
         getJsonFactory(),
-        // Do not log 404. It clutters the output and is possibly even required by the caller.
-        new RetryHttpRequestInitializer(options.getGcpCredential(), ImmutableList.of(404)))
+        chainHttpRequestInitializer(
+            options.getGcpCredential(),
+            // Do not log 404. It clutters the output and is possibly even required by the caller.
+            new RetryHttpRequestInitializer(ImmutableList.of(404))))
         .setApplicationName(options.getAppName())
         .setRootUrl(components.rootUrl)
         .setServicePath(components.servicePath)
@@ -160,10 +169,12 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
       newStorageClient(GcsOptions options) {
     String servicePath = options.getGcsEndpoint();
     Storage.Builder storageBuilder = new Storage.Builder(getTransport(), getJsonFactory(),
-        new RetryHttpRequestInitializer(
+        chainHttpRequestInitializer(
+            options.getGcpCredential(),
             // Do not log the code 404. Code up the stack will deal with 404's if needed, and
             // logging it by default clutters the output during file staging.
-            options.getGcpCredential(), ImmutableList.of(404), new UploadIdResponseInterceptor()))
+            new RetryHttpRequestInitializer(
+                ImmutableList.of(404), new UploadIdResponseInterceptor())))
         .setApplicationName(options.getAppName())
         .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
     if (servicePath != null) {
@@ -173,4 +184,13 @@ public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options
     }
     return storageBuilder;
   }
+
+  private static HttpRequestInitializer chainHttpRequestInitializer(
+      Credential credential, HttpRequestInitializer httpRequestInitializer) {
+    if (credential == null) {
+      return httpRequestInitializer;
+    } else {
+      return new ChainingHttpRequestInitializer(credential, httpRequestInitializer);
+    }
+  }
 }

From 056279ed7a28b618ebb3af256bce33997086f98b Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Thu, 19 Nov 2015 11:10:39 -0800
Subject: [PATCH 1182/1541] Extend input type of Flatten#iterables

This allows any PCollection with a subclass of iterable as the type of
element to be the input to a Flatten#iterables PTransform.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108265658
---
 .../dataflow/sdk/transforms/Flatten.java      | 16 ++---
 .../dataflow/sdk/transforms/FlattenTest.java  | 58 ++++++++++++++++++-
 2 files changed, 65 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index c186353b02097..2e1a3ab04f85e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -18,7 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.IterableLikeCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
@@ -164,17 +164,17 @@ protected Coder<?> getDefaultOutputCoder(PCollectionList<T> input)
    * the output {@code PCollection}
    */
   public static class FlattenIterables<T>
-      extends PTransform<PCollection<Iterable<T>>, PCollection<T>> {
+      extends PTransform<PCollection<? extends Iterable<T>>, PCollection<T>> {
 
     @Override
-    public PCollection<T> apply(PCollection<Iterable<T>> in) {
-      Coder<Iterable<T>> inCoder = in.getCoder();
-      if (!(inCoder instanceof IterableCoder)) {
+    public PCollection<T> apply(PCollection<? extends Iterable<T>> in) {
+      Coder<? extends Iterable<T>> inCoder = in.getCoder();
+      if (!(inCoder instanceof IterableLikeCoder)) {
         throw new IllegalArgumentException(
-            "expecting the input Coder<Iterable> to be an IterableCoder");
+            "expecting the input Coder<Iterable> to be an IterableLikeCoder");
       }
-      IterableCoder<T> iterableCoder = (IterableCoder<T>) inCoder;
-      Coder<T> elemCoder = iterableCoder.getElemCoder();
+      @SuppressWarnings("unchecked")
+      Coder<T> elemCoder = ((IterableLikeCoder<T, ?>) inCoder).getElemCoder();
 
       return in.apply(ParDo.named("FlattenIterables").of(
           new DoFn<Iterable<T>, T>() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
index b85ec021e8102..0c9d3315db686 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlattenTest.java
@@ -24,7 +24,10 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CollectionCoder;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.ListCoder;
+import com.google.cloud.dataflow.sdk.coders.SetCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
@@ -36,6 +39,7 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.common.collect.ImmutableSet;
 
 import org.joda.time.Duration;
 import org.junit.Assert;
@@ -49,7 +53,9 @@
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.List;
+import java.util.Set;
 
 /**
  * Tests for Flatten.
@@ -183,6 +189,57 @@ public void testFlattenIterables() {
     p.run();
   }
 
+  @Test
+  @Category(RunnableOnService.class)
+  public void testFlattenIterablesLists() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<List<String>> input =
+        p.apply(Create.<List<String>>of(LINES).withCoder(ListCoder.of(StringUtf8Coder.of())));
+
+    PCollection<String> output = input.apply(Flatten.<String>iterables());
+
+    DataflowAssert.that(output).containsInAnyOrder(LINES_ARRAY);
+
+    p.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testFlattenIterablesSets() {
+    Pipeline p = TestPipeline.create();
+
+    Set<String> linesSet = ImmutableSet.copyOf(LINES);
+
+    PCollection<Set<String>> input =
+        p.apply(Create.<Set<String>>of(linesSet).withCoder(SetCoder.of(StringUtf8Coder.of())));
+
+    PCollection<String> output = input.apply(Flatten.<String>iterables());
+
+    DataflowAssert.that(output).containsInAnyOrder(LINES_ARRAY);
+
+    p.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testFlattenIterablesCollections() {
+
+    Pipeline p = TestPipeline.create();
+
+    Set<String> linesSet = ImmutableSet.copyOf(LINES);
+
+    PCollection<Collection<String>> input =
+        p.apply(Create.<Collection<String>>of(linesSet)
+                      .withCoder(CollectionCoder.of(StringUtf8Coder.of())));
+
+    PCollection<String> output = input.apply(Flatten.<String>iterables());
+
+    DataflowAssert.that(output).containsInAnyOrder(LINES_ARRAY);
+
+    p.run();
+  }
+
   @Test
   @Category(RunnableOnService.class)
   public void testFlattenIterablesEmpty() {
@@ -201,7 +258,6 @@ public void testFlattenIterablesEmpty() {
     p.run();
   }
 
-
   /////////////////////////////////////////////////////////////////////////////
 
   @Test

From cc6853579056e15a29681901ca469f388cf42fc3 Mon Sep 17 00:00:00 2001
From: markshields <markshields@google.com>
Date: Thu, 19 Nov 2015 12:48:24 -0800
Subject: [PATCH 1183/1541] Stream Iterable<T> state through MergedBag

This avoids OOMs on GBK results with many
elements.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108275870
---
 .../util/state/InMemoryStateInternals.java    | 30 +++++++++----------
 .../dataflow/sdk/util/state/MergedBag.java    |  8 ++---
 .../worker/StreamingDataflowWorkerTest.java   | 10 +++++--
 3 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
index da18405be3ddc..a710dd050cb5c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
@@ -31,7 +31,6 @@
  * and for running tests that need state.
  */
 public class InMemoryStateInternals extends MergingStateInternals {
-
   private interface InMemoryState {
     boolean isEmptyForTesting();
   }
@@ -40,7 +39,6 @@ private interface InMemoryState {
     @Override
     protected StateBinder binderForNamespace(final StateNamespace namespace) {
       return new StateBinder() {
-
         @Override
         public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> coder) {
           return new InMemoryValue<T>();
@@ -52,11 +50,10 @@ public <T> BagState<T> bindBag(final StateTag<BagState<T>> address, Coder<T> ele
         }
 
         @Override
-        public <InputT, AccumT, OutputT>
-        CombiningValueStateInternal<InputT, AccumT, OutputT> bindCombiningValue(
-            StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
-            Coder<AccumT> accumCoder,
-            final CombineFn<InputT, AccumT, OutputT> combineFn) {
+        public <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
+            bindCombiningValue(
+                StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
+                Coder<AccumT> accumCoder, final CombineFn<InputT, AccumT, OutputT> combineFn) {
           return new InMemoryCombiningValue<InputT, AccumT, OutputT>(combineFn);
         }
 
@@ -115,13 +112,12 @@ public void set(T input) {
 
     @Override
     public boolean isEmptyForTesting() {
-       return isCleared;
+      return isCleared;
     }
   }
 
   private final class WatermarkStateInternalImplementation
       implements WatermarkStateInternal, InMemoryState {
-
     private Instant minimumHold = null;
 
     @Override
@@ -150,7 +146,7 @@ public void add(Instant watermarkHold) {
 
     @Override
     public boolean isEmptyForTesting() {
-       return minimumHold == null;
+      return minimumHold == null;
     }
 
     @Override
@@ -171,7 +167,6 @@ public String toString() {
 
   private final class InMemoryCombiningValue<InputT, AccumT, OutputT>
       implements CombiningValueStateInternal<InputT, AccumT, OutputT>, InMemoryState {
-
     private boolean isCleared = true;
     private final CombineFn<InputT, AccumT, OutputT> combineFn;
     private AccumT accum;
@@ -233,18 +228,23 @@ public void addAccum(AccumT accum) {
 
     @Override
     public boolean isEmptyForTesting() {
-       return isCleared;
+      return isCleared;
     }
   }
 
   private static final class InMemoryBag<T> implements BagState<T>, InMemoryState {
-    private final List<T> contents = new ArrayList<>();
+    private List<T> contents = new ArrayList<>();
 
     @Override
     public void clear() {
       // Even though we're clearing we can't remove this from the in-memory state map, since
       // other users may already have a handle on this Bag.
-      contents.clear();
+      // The result of get/read below must be stable for the lifetime of the bundle within which it
+      // was generated. In batch and direct runners the bundle lifetime can be
+      // greater than the window lifetime, in which case this method can be called while
+      // the result is still in use. We protect against this by hot-swapping instead of
+      // clearing the contents.
+      contents = new ArrayList<>();
     }
 
     @Override
@@ -264,7 +264,7 @@ public void add(T input) {
 
     @Override
     public boolean isEmptyForTesting() {
-       return contents.isEmpty();
+      return contents.isEmpty();
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedBag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedBag.java
index 76c8c67a15f3e..f39a681347b3e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedBag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedBag.java
@@ -27,7 +27,6 @@
  * @param <T> the type of elements in the bag
  */
 class MergedBag<T> implements BagState<T> {
-
   private final Collection<BagState<T>> sources;
   private final BagState<T> result;
 
@@ -61,12 +60,11 @@ public StateContents<Iterable<T>> get() {
     return new StateContents<Iterable<T>>() {
       @Override
       public Iterable<T> read() {
-        // Can't use FluentIterables#toList because some values may be legitimately null.
-        List<T> result = new ArrayList<>();
+        List<Iterable<T>> allIterables = new ArrayList<>();
         for (StateContents<Iterable<T>> future : futures) {
-          Iterables.addAll(result, future.read());
+          allIterables.add(future.read());
         }
-        return result;
+        return Iterables.concat(allIterables);
       }
     };
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 5394d8fb0e5eb..938975cd3e405 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -783,7 +783,11 @@ public void testMergeWindows() throws Exception {
         ByteString.copyFromUtf8(window + "+shold");
     String stateFamily = "MergeWindows";
     ByteString bufferData = ByteString.copyFromUtf8("\000data0");
-    ByteString outputData = ByteString.copyFromUtf8("\000\000\000\001\005data0");
+    // Encoded form for Iterable<String>: -1, true, 'data0', false
+    ByteString outputData = ByteString.copyFrom(
+        new byte[] { (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff,
+                     0x01, 0x05, 0x64, 0x61, 0x74, 0x61, 0x30, 0x00 });
+
     // These values are not essential to the change detector test
     long timerTimestamp = 999000L;
 
@@ -881,8 +885,8 @@ public void testMergeWindows() throws Exception {
         actualOutput.getOutputMessages(0).getBundles(0).getKey().toStringUtf8());
     assertEquals(0,
         actualOutput.getOutputMessages(0).getBundles(0).getMessages(0).getTimestamp());
-    assertEquals(
-        outputData, actualOutput.getOutputMessages(0).getBundles(0).getMessages(0).getData());
+    assertEquals(outputData,
+        actualOutput.getOutputMessages(0).getBundles(0).getMessages(0).getData());
 
     ByteString metadata =
         actualOutput.getOutputMessages(0).getBundles(0).getMessages(0).getMetadata();

From 0f3b8a52cc169840cf92c825da89f9f286f28787 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Thu, 19 Nov 2015 13:49:36 -0800
Subject: [PATCH 1184/1541] Condense PipelineOptionsFactory Exceptions

This allows runtime failures to all be resolved at once rather than
rerunning every time and fixing each failure individually.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108281549
---
 .../sdk/options/PipelineOptionsFactory.java   | 227 +++++++++++++++---
 .../options/PipelineOptionsFactoryTest.java   | 188 +++++++++++++++
 2 files changed, 381 insertions(+), 34 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index c46c59e4177d2..bbdc8d5406808 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -61,6 +61,7 @@
 import java.lang.reflect.Method;
 import java.lang.reflect.Modifier;
 import java.lang.reflect.Proxy;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
@@ -880,6 +881,7 @@ private static List<PropertyDescriptor> getPropertyDescriptors(Class<?> beanClas
     SortedMap<String, Method> propertyNamesToGetters = getPropertyNamesToGetters(methods);
     List<PropertyDescriptor> descriptors = Lists.newArrayList();
 
+    List<TypeMismatch> mismatches = new ArrayList<>();
     /*
      * Add all the getter/setter pairs to the list of descriptors removing the getter once
      * it has been paired up.
@@ -898,15 +900,20 @@ private static List<PropertyDescriptor> getPropertyDescriptors(Class<?> beanClas
       if (getterMethod != null) {
         Class<?> getterPropertyType = getterMethod.getReturnType();
         Class<?> setterPropertyType = method.getParameterTypes()[0];
-        Preconditions.checkArgument(getterPropertyType == setterPropertyType,
-            "Type mismatch between getter and setter methods for property [%s]. "
-            + "Getter is of type [%s] whereas setter is of type [%s].",
-            propertyName, getterPropertyType.getName(), setterPropertyType.getName());
+        if (getterPropertyType != setterPropertyType) {
+          TypeMismatch mismatch = new TypeMismatch();
+          mismatch.propertyName = propertyName;
+          mismatch.getterPropertyType = getterPropertyType;
+          mismatch.setterPropertyType = setterPropertyType;
+          mismatches.add(mismatch);
+          continue;
+        }
       }
 
       descriptors.add(new PropertyDescriptor(
           propertyName, getterMethod, method));
     }
+    throwForTypeMismatches(mismatches);
 
     // Add the remaining getters with missing setters.
     for (Map.Entry<String, Method> getterToMethod : propertyNamesToGetters.entrySet()) {
@@ -916,6 +923,35 @@ private static List<PropertyDescriptor> getPropertyDescriptors(Class<?> beanClas
     return descriptors;
   }
 
+  private static class TypeMismatch {
+    private String propertyName;
+    private Class<?> getterPropertyType;
+    private Class<?> setterPropertyType;
+  }
+
+  private static void throwForTypeMismatches(List<TypeMismatch> mismatches) {
+    if (mismatches.size() == 1) {
+      TypeMismatch mismatch = mismatches.get(0);
+      throw new IllegalArgumentException(String.format(
+          "Type mismatch between getter and setter methods for property [%s]. "
+          + "Getter is of type [%s] whereas setter is of type [%s].",
+          mismatch.propertyName,
+          mismatch.getterPropertyType.getName(),
+          mismatch.setterPropertyType.getName()));
+    } else if (mismatches.size() > 1) {
+      StringBuilder builder = new StringBuilder(
+          String.format("Type mismatches between getters and setters detected:"));
+      for (TypeMismatch mismatch : mismatches) {
+        builder.append(String.format(
+            "%n  - Property [%s]: Getter is of type [%s] whereas setter is of type [%s].",
+            mismatch.propertyName,
+            mismatch.getterPropertyType.getName(),
+            mismatch.setterPropertyType.getName()));
+      }
+      throw new IllegalArgumentException(builder.toString());
+    }
+  }
+
   /**
    * Returns a map of the property name to the getter method it represents.
    * If there are duplicate methods with the same bean name, then it is indeterminate
@@ -1006,18 +1042,21 @@ private static List<PropertyDescriptor> validateClass(Class<? extends PipelineOp
     for (Method method : interfaceMethods) {
       methodNameToMethodMap.put(method, method);
     }
+    List<MultipleDefinitions> multipleDefinitions = Lists.newArrayList();
     for (Map.Entry<Method, Collection<Method>> entry
         : methodNameToMethodMap.asMap().entrySet()) {
       Set<Class<?>> returnTypes = FluentIterable.from(entry.getValue())
           .transform(ReturnTypeFetchingFunction.INSTANCE).toSet();
       SortedSet<Method> collidingMethods = FluentIterable.from(entry.getValue())
           .toSortedSet(MethodComparator.INSTANCE);
-      Preconditions.checkArgument(returnTypes.size() == 1,
-          "Method [%s] has multiple definitions %s with different return types for [%s].",
-          entry.getKey().getName(),
-          collidingMethods,
-          iface.getName());
+      if (returnTypes.size() > 1) {
+        MultipleDefinitions defs = new MultipleDefinitions();
+        defs.method = entry.getKey();
+        defs.collidingMethods = collidingMethods;
+        multipleDefinitions.add(defs);
+      }
     }
+    throwForMultipleDefinitions(iface, multipleDefinitions);
 
     // Verify that there is no getter with a mixed @JsonIgnore annotation and verify
     // that no setter has @JsonIgnore.
@@ -1033,6 +1072,9 @@ private static List<PropertyDescriptor> validateClass(Class<? extends PipelineOp
 
     List<PropertyDescriptor> descriptors = getPropertyDescriptors(klass);
 
+    List<InconsistentlyIgnoredGetters> incompletelyIgnoredGetters = new ArrayList<>();
+    List<IgnoredSetter> ignoredSetters = new ArrayList<>();
+
     for (PropertyDescriptor descriptor : descriptors) {
       if (descriptor.getReadMethod() == null
           || descriptor.getWriteMethod() == null
@@ -1050,44 +1092,58 @@ private static List<PropertyDescriptor> validateClass(Class<? extends PipelineOp
           .transform(MethodToDeclaringClassFunction.INSTANCE)
           .transform(ReflectHelpers.CLASS_NAME);
 
-      Preconditions.checkArgument(gettersWithJsonIgnore.isEmpty()
-          || getters.size() == gettersWithJsonIgnore.size(),
-          "Expected getter for property [%s] to be marked with @JsonIgnore on all %s, "
-          + "found only on %s",
-          descriptor.getName(), getterClassNames, gettersWithJsonIgnoreClassNames);
+      if (!(gettersWithJsonIgnore.isEmpty() || getters.size() == gettersWithJsonIgnore.size())) {
+        InconsistentlyIgnoredGetters err = new InconsistentlyIgnoredGetters();
+        err.descriptor = descriptor;
+        err.getterClassNames = getterClassNames;
+        err.gettersWithJsonIgnoreClassNames = gettersWithJsonIgnoreClassNames;
+        incompletelyIgnoredGetters.add(err);
+      }
+      if (!incompletelyIgnoredGetters.isEmpty()) {
+        continue;
+      }
 
       SortedSet<Method> settersWithJsonIgnore =
           Sets.filter(methodNameToAllMethodMap.get(descriptor.getWriteMethod()),
               JsonIgnorePredicate.INSTANCE);
 
       Iterable<String> settersWithJsonIgnoreClassNames = FluentIterable.from(settersWithJsonIgnore)
-          .transform(MethodToDeclaringClassFunction.INSTANCE)
-          .transform(ReflectHelpers.CLASS_NAME);
-
-      Preconditions.checkArgument(settersWithJsonIgnore.isEmpty(),
-          "Expected setter for property [%s] to not be marked with @JsonIgnore on %s",
-          descriptor.getName(), settersWithJsonIgnoreClassNames);
+              .transform(MethodToDeclaringClassFunction.INSTANCE)
+              .transform(ReflectHelpers.CLASS_NAME);
+
+      if (!settersWithJsonIgnore.isEmpty()) {
+        IgnoredSetter ignored = new IgnoredSetter();
+        ignored.descriptor = descriptor;
+        ignored.settersWithJsonIgnoreClassNames = settersWithJsonIgnoreClassNames;
+        ignoredSetters.add(ignored);
+      }
     }
+    throwForGettersWithInconsistentJsonIgnore(incompletelyIgnoredGetters);
+    throwForSettersWithJsonIgnore(ignoredSetters);
 
+    List<MissingBeanMethod> missingBeanMethods = new ArrayList<>();
     // Verify that each property has a matching read and write method.
     for (PropertyDescriptor propertyDescriptor : descriptors) {
-      Preconditions.checkArgument(
-          IGNORED_METHODS.contains(propertyDescriptor.getWriteMethod())
-          || propertyDescriptor.getReadMethod() != null,
-          "Expected getter for property [%s] of type [%s] on [%s].",
-          propertyDescriptor.getName(),
-          propertyDescriptor.getPropertyType().getName(),
-          iface.getName());
-      Preconditions.checkArgument(
-          IGNORED_METHODS.contains(propertyDescriptor.getReadMethod())
-          || propertyDescriptor.getWriteMethod() != null,
-          "Expected setter for property [%s] of type [%s] on [%s].",
-          propertyDescriptor.getName(),
-          propertyDescriptor.getPropertyType().getName(),
-          iface.getName());
+      if (!(IGNORED_METHODS.contains(propertyDescriptor.getWriteMethod())
+        || propertyDescriptor.getReadMethod() != null)) {
+        MissingBeanMethod method = new MissingBeanMethod();
+        method.property = propertyDescriptor;
+        method.methodType = "getter";
+        missingBeanMethods.add(method);
+        continue;
+      }
+      if (!(IGNORED_METHODS.contains(propertyDescriptor.getReadMethod())
+              || propertyDescriptor.getWriteMethod() != null)) {
+        MissingBeanMethod method = new MissingBeanMethod();
+        method.property = propertyDescriptor;
+        method.methodType = "setter";
+        missingBeanMethods.add(method);
+        continue;
+      }
       methods.add(propertyDescriptor.getReadMethod());
       methods.add(propertyDescriptor.getWriteMethod());
     }
+    throwForMissingBeanMethod(iface, missingBeanMethods);
 
     // Verify that no additional methods are on an interface that aren't a bean property.
     SortedSet<Method> unknownMethods = new TreeSet<>(MethodComparator.INSTANCE);
@@ -1100,6 +1156,109 @@ private static List<PropertyDescriptor> validateClass(Class<? extends PipelineOp
     return descriptors;
   }
 
+  private static class MultipleDefinitions {
+    private Method method;
+    private SortedSet<Method> collidingMethods;
+  }
+
+  private static void throwForMultipleDefinitions(
+      Class<? extends PipelineOptions> iface, List<MultipleDefinitions> definitions) {
+    if (definitions.size() == 1) {
+      MultipleDefinitions errDef = definitions.get(0);
+      throw new IllegalArgumentException(String.format(
+          "Method [%s] has multiple definitions %s with different return types for [%s].",
+          errDef.method.getName(), errDef.collidingMethods, iface.getName()));
+    } else if (definitions.size() > 1) {
+      StringBuilder errorBuilder = new StringBuilder(String.format(
+          "Interface [%s] has Methods with multiple definitions with different return types:",
+          iface.getName()));
+      for (MultipleDefinitions errDef : definitions) {
+        errorBuilder.append(String.format(
+            "%n  - Method [%s] has multiple definitions %s",
+            errDef.method.getName(),
+            errDef.collidingMethods));
+      }
+      throw new IllegalArgumentException(errorBuilder.toString());
+    }
+  }
+
+  private static class InconsistentlyIgnoredGetters {
+    PropertyDescriptor descriptor;
+    Iterable<String> getterClassNames;
+    Iterable<String> gettersWithJsonIgnoreClassNames;
+  }
+
+  private static void throwForGettersWithInconsistentJsonIgnore(
+      List<InconsistentlyIgnoredGetters> getters) {
+    if (getters.size() == 1) {
+      InconsistentlyIgnoredGetters getter = getters.get(0);
+      throw new IllegalArgumentException(String.format(
+          "Expected getter for property [%s] to be marked with @JsonIgnore on all %s, "
+          + "found only on %s",
+          getter.descriptor.getName(), getter.getterClassNames,
+          getter.gettersWithJsonIgnoreClassNames));
+    } else if (getters.size() > 1) {
+      StringBuilder errorBuilder =
+          new StringBuilder("Property getters are inconsistently marked with @JsonIgnore:");
+      for (InconsistentlyIgnoredGetters getter : getters) {
+        errorBuilder.append(
+            String.format("%n  - Expected for property [%s] to be marked on all %s, "
+                + "found only on %s",
+                getter.descriptor.getName(), getter.getterClassNames,
+                getter.gettersWithJsonIgnoreClassNames));
+      }
+      throw new IllegalArgumentException(errorBuilder.toString());
+    }
+  }
+
+  private static class IgnoredSetter {
+    PropertyDescriptor descriptor;
+    Iterable<String> settersWithJsonIgnoreClassNames;
+  }
+
+  private static void throwForSettersWithJsonIgnore(List<IgnoredSetter> setters) {
+    if (setters.size() == 1) {
+      IgnoredSetter setter = setters.get(0);
+      throw new IllegalArgumentException(
+          String.format("Expected setter for property [%s] to not be marked with @JsonIgnore on %s",
+              setter.descriptor.getName(), setter.settersWithJsonIgnoreClassNames));
+    } else if (setters.size() > 1) {
+      StringBuilder builder = new StringBuilder("Found setters marked with @JsonIgnore:");
+      for (IgnoredSetter setter : setters) {
+        builder.append(
+            String.format("%n  - Setter for property [%s] should not be marked with @JsonIgnore "
+                + "on %s",
+                setter.descriptor.getName(), setter.settersWithJsonIgnoreClassNames));
+      }
+      throw new IllegalArgumentException(builder.toString());
+    }
+  }
+
+  private static class MissingBeanMethod {
+    String methodType;
+    PropertyDescriptor property;
+  }
+
+  private static void throwForMissingBeanMethod(
+      Class<? extends PipelineOptions> iface, List<MissingBeanMethod> missingBeanMethods) {
+    if (missingBeanMethods.size() == 1) {
+      MissingBeanMethod missingBeanMethod = missingBeanMethods.get(0);
+      throw new IllegalArgumentException(
+          String.format("Expected %s for property [%s] of type [%s] on [%s].",
+              missingBeanMethod.methodType, missingBeanMethod.property.getName(),
+              missingBeanMethod.property.getPropertyType().getName(), iface.getName()));
+    } else if (missingBeanMethods.size() > 1) {
+      StringBuilder builder = new StringBuilder(String.format(
+          "Found missing property methods on [%s]:", iface.getName()));
+      for (MissingBeanMethod method : missingBeanMethods) {
+        builder.append(
+            String.format("%n  - Expected %s for property [%s] of type [%s]", method.methodType,
+                method.property.getName(), method.property.getPropertyType().getName()));
+      }
+      throw new IllegalArgumentException(builder.toString());
+    }
+  }
+
   /** A {@link Comparator} that uses the classes name to compare them. */
   private static class ClassNameComparator implements Comparator<Class<?>> {
     static final ClassNameComparator INSTANCE = new ClassNameComparator();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
index b47ac3337d972..e687f2798946e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactoryTest.java
@@ -37,6 +37,7 @@
 import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.fasterxml.jackson.annotation.JsonProperty;
 
+import org.hamcrest.Matchers;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
@@ -130,6 +131,23 @@ public void testMissingGetterThrows() throws Exception {
     PipelineOptionsFactory.as(MissingGetter.class);
   }
 
+  /** A test interface missing multiple getters. */
+  public static interface MissingMultipleGetters extends MissingGetter {
+    void setOtherObject(Object value);
+  }
+
+  @Test
+  public void testMultipleMissingGettersThrows() {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage(
+        "missing property methods on [com.google.cloud.dataflow.sdk.options."
+        + "PipelineOptionsFactoryTest$MissingMultipleGetters]");
+    expectedException.expectMessage("getter for property [object] of type [java.lang.Object]");
+    expectedException.expectMessage("getter for property [otherObject] of type [java.lang.Object]");
+
+    PipelineOptionsFactory.as(MissingMultipleGetters.class);
+  }
+
   /** A test interface missing a setter. */
   public static interface MissingSetter extends PipelineOptions {
     Object getObject();
@@ -145,6 +163,40 @@ public void testMissingSetterThrows() throws Exception {
     PipelineOptionsFactory.as(MissingSetter.class);
   }
 
+  /** A test interface missing multiple setters. */
+  public static interface MissingMultipleSetters extends MissingSetter {
+    Object getOtherObject();
+  }
+
+  @Test
+  public void testMissingMultipleSettersThrows() {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage(
+        "missing property methods on [com.google.cloud.dataflow.sdk.options."
+        + "PipelineOptionsFactoryTest$MissingMultipleSetters]");
+    expectedException.expectMessage("setter for property [object] of type [java.lang.Object]");
+    expectedException.expectMessage("setter for property [otherObject] of type [java.lang.Object]");
+
+    PipelineOptionsFactory.as(MissingMultipleSetters.class);
+  }
+
+  /** A test interface missing a setter and a getter. */
+  public static interface MissingGettersAndSetters extends MissingGetter {
+    Object getOtherObject();
+  }
+
+  @Test
+  public void testMissingGettersAndSettersThrows() {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage(
+        "missing property methods on [com.google.cloud.dataflow.sdk.options."
+        + "PipelineOptionsFactoryTest$MissingGettersAndSetters]");
+    expectedException.expectMessage("getter for property [object] of type [java.lang.Object]");
+    expectedException.expectMessage("setter for property [otherObject] of type [java.lang.Object]");
+
+    PipelineOptionsFactory.as(MissingGettersAndSetters.class);
+  }
+
   /** A test interface with a type mismatch between the getter and setter. */
   public static interface GetterSetterTypeMismatch extends PipelineOptions {
     boolean getValue();
@@ -161,6 +213,24 @@ public void testGetterSetterTypeMismatchThrows() throws Exception {
     PipelineOptionsFactory.as(GetterSetterTypeMismatch.class);
   }
 
+  /** A test interface with multiple type mismatches between getters and setters. */
+  public static interface MultiGetterSetterTypeMismatch extends GetterSetterTypeMismatch {
+    long getOther();
+    void setOther(String other);
+  }
+
+  @Test
+  public void testMultiGetterSetterTypeMismatchThrows() {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Type mismatches between getters and setters detected:");
+    expectedException.expectMessage("Property [value]: Getter is of type "
+        + "[boolean] whereas setter is of type [int].");
+    expectedException.expectMessage("Property [other]: Getter is of type [long] "
+        + "whereas setter is of type [java.lang.String].");
+
+    PipelineOptionsFactory.as(MultiGetterSetterTypeMismatch.class);
+  }
+
   /** A test interface representing a composite interface. */
   public static interface CombinedObject extends MissingGetter, MissingSetter {
   }
@@ -206,6 +276,48 @@ public void testReturnTypeConflictThrows() throws Exception {
     PipelineOptionsFactory.as(ReturnTypeConflict.class);
   }
 
+  /** An interface to provide multiple methods with return type conflicts. */
+  public static interface MultiReturnTypeConflictBase extends CombinedObject {
+    Object getOther();
+    void setOther(Object object);
+  }
+
+  /** A test interface that has multiple conflicting return types with its parent. */
+  public static interface MultiReturnTypeConflict extends MultiReturnTypeConflictBase {
+    @Override
+    String getObject();
+    void setObject(String value);
+
+    @Override
+    Long getOther();
+    void setOther(Long other);
+  }
+
+  @Test
+  public void testMultipleReturnTypeConflictsThrows() throws Exception {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("[com.google.cloud.dataflow.sdk.options."
+        + "PipelineOptionsFactoryTest$MultiReturnTypeConflict]");
+    expectedException.expectMessage(
+        "Methods with multiple definitions with different return types");
+    expectedException.expectMessage("Method [getObject] has multiple definitions");
+    expectedException.expectMessage("public abstract java.lang.Object "
+        + "com.google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest$"
+        + "MissingSetter.getObject()");
+    expectedException.expectMessage(
+        "public abstract java.lang.String com.google.cloud.dataflow.sdk.options."
+        + "PipelineOptionsFactoryTest$MultiReturnTypeConflict.getObject()");
+    expectedException.expectMessage("Method [getOther] has multiple definitions");
+    expectedException.expectMessage("public abstract java.lang.Object "
+        + "com.google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest$"
+        + "MultiReturnTypeConflictBase.getOther()");
+    expectedException.expectMessage(
+        "public abstract java.lang.Long com.google.cloud.dataflow.sdk.options."
+        + "PipelineOptionsFactoryTest$MultiReturnTypeConflict.getOther()");
+
+    PipelineOptionsFactory.as(MultiReturnTypeConflict.class);
+  }
+
   /** Test interface that has {@link JsonIgnore @JsonIgnore} on a setter for a property. */
   public static interface SetterWithJsonIgnore extends PipelineOptions {
     String getValue();
@@ -222,6 +334,27 @@ public void testSetterAnnotatedWithJsonIgnore() throws Exception {
     PipelineOptionsFactory.as(SetterWithJsonIgnore.class);
   }
 
+  /** Test interface that has {@link JsonIgnore @JsonIgnore} on multiple setters. */
+  public static interface MultiSetterWithJsonIgnore extends SetterWithJsonIgnore {
+    Integer getOther();
+    @JsonIgnore
+    void setOther(Integer other);
+  }
+
+  @Test
+  public void testMultipleSettersAnnotatedWithJsonIgnore() throws Exception {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Found setters marked with @JsonIgnore:");
+    expectedException.expectMessage(
+        "property [other] should not be marked with @JsonIgnore on [com"
+        + ".google.cloud.dataflow.sdk.options."
+        + "PipelineOptionsFactoryTest$MultiSetterWithJsonIgnore]");
+    expectedException.expectMessage(
+        "property [value] should not be marked with @JsonIgnore on [com."
+        + "google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest$SetterWithJsonIgnore]");
+    PipelineOptionsFactory.as(MultiSetterWithJsonIgnore.class);
+  }
+
   /**
    * This class is has a conflicting field with {@link CombinedObject} that doesn't have
    * {@link JsonIgnore @JsonIgnore}.
@@ -249,6 +382,61 @@ public void testNotAllGettersAnnotatedWithJsonIgnore() throws Exception {
     options.as(CombinedObject.class);
   }
 
+  private static interface MultiGetters extends PipelineOptions {
+    Object getObject();
+    void setObject(Object value);
+
+    @JsonIgnore
+    Integer getOther();
+    void setOther(Integer value);
+
+    Void getConsistent();
+    void setConsistent(Void consistent);
+  }
+
+  private static interface MultipleGettersWithInconsistentJsonIgnore extends PipelineOptions {
+    @JsonIgnore
+    Object getObject();
+    void setObject(Object value);
+
+    Integer getOther();
+    void setOther(Integer value);
+
+    Void getConsistent();
+    void setConsistent(Void consistent);
+  }
+
+  @Test
+  public void testMultipleGettersWithInconsistentJsonIgnore() {
+    // Initial construction is valid.
+    MultiGetters options = PipelineOptionsFactory.as(MultiGetters.class);
+
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Property getters are inconsistently marked with @JsonIgnore:");
+    expectedException.expectMessage(
+        "property [object] to be marked on all");
+    expectedException.expectMessage("found only on [com.google.cloud.dataflow.sdk.options."
+        + "PipelineOptionsFactoryTest$MultiGetters]");
+    expectedException.expectMessage(
+        "property [other] to be marked on all");
+    expectedException.expectMessage("found only on [com.google.cloud.dataflow.sdk.options."
+        + "PipelineOptionsFactoryTest$MultipleGettersWithInconsistentJsonIgnore]");
+
+    expectedException.expectMessage(Matchers.anyOf(
+        containsString(java.util.Arrays.toString(new String[]
+            {"com.google.cloud.dataflow.sdk.options."
+                + "PipelineOptionsFactoryTest$MultipleGettersWithInconsistentJsonIgnore",
+                "com.google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest$MultiGetters"})),
+        containsString(java.util.Arrays.toString(new String[]
+            {"com.google.cloud.dataflow.sdk.options.PipelineOptionsFactoryTest$MultiGetters",
+                "com.google.cloud.dataflow.sdk.options."
+                + "PipelineOptionsFactoryTest$MultipleGettersWithInconsistentJsonIgnore"}))));
+    expectedException.expectMessage(not(containsString("property [consistent]")));
+
+    // When we attempt to convert, we should error immediately
+    options.as(MultipleGettersWithInconsistentJsonIgnore.class);
+  }
+
   @Test
   public void testAppNameIsNotOverriddenWhenPassedInViaCommandLine() {
     ApplicationNameOptions options = PipelineOptionsFactory

From 70918753f6bad8a2a64b0231a427abb3411dece2 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Fri, 20 Nov 2015 01:31:27 -0800
Subject: [PATCH 1185/1541] TextIO: improve javadoc

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108325447
---
 .../google/cloud/dataflow/sdk/io/TextIO.java  | 264 ++++++++++--------
 1 file changed, 144 insertions(+), 120 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 11047e4a1ed2f..875f84b664451 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.worker.TextReader;
 import com.google.cloud.dataflow.sdk.runners.worker.TextSink;
@@ -48,22 +49,27 @@
 /**
  * {@link PTransform}s for reading and writing text files.
  *
- * <p>To read a {@link PCollection} from one or more text files, use
- * {@link TextIO.Read}, specifying {@link TextIO.Read#from} to specify
+ * <p>To read a {@link PCollection} from one or more text files, use {@link TextIO.Read}.
+ * You can instantiate a transform using {@link TextIO.Read#from(String)} to specify
  * the path of the file(s) to read from (e.g., a local filename or
  * filename pattern if running locally, or a Google Cloud Storage
  * filename or filename pattern of the form
- * {@code "gs://<bucket>/<filepath>"}), and optionally
- * {@link TextIO.Read#named} to specify the name of the pipeline step
- * and/or {@link TextIO.Read#withCoder} to specify the Coder to use to
- * decode the text lines into Java values.  For example:
+ * {@code "gs://<bucket>/<filepath>"}). You may optionally call
+ * {@link TextIO.Read#named(String)} to specify the name of the pipeline step.
+ *
+ * <p>By default, {@link TextIO.Read} returns a {@link PCollection} of {@link String Strings},
+ * each corresponding to one line of an input UTF-8 text file. To convert directly from the raw
+ * bytes (split into lines delimited by '\n', '\r', or '\r\n') to another object of type {@code T},
+ * supply a {@code Coder<T>} using {@link TextIO.Read#withCoder(Coder)}.
+ *
+ * <p>See the following examples:
  *
  * <pre> {@code
  * Pipeline p = ...;
  *
  * // A simple Read of a local file (only runs locally):
  * PCollection<String> lines =
- *     p.apply(TextIO.Read.from("/path/to/file.txt"));
+ *     p.apply(TextIO.Read.from("/local/path/to/file.txt"));
  *
  * // A fully-specified Read from a GCS file (runs locally and via the
  * // Google Cloud Dataflow service):
@@ -74,14 +80,13 @@
  * } </pre>
  *
  * <p>To write a {@link PCollection} to one or more text files, use
- * {@link TextIO.Write}, specifying {@link TextIO.Write#to} to specify
+ * {@link TextIO.Write}, specifying {@link TextIO.Write#to(String)} to specify
  * the path of the file to write to (e.g., a local filename or sharded
  * filename pattern if running locally, or a Google Cloud Storage
  * filename or sharded filename pattern of the form
- * {@code "gs://<bucket>/<filepath>"}), and optionally
- * {@link TextIO.Write#named} to specify the name of the pipeline step
- * and/or {@link TextIO.Write#withCoder} to specify the Coder to use
- * to encode the Java values into text lines.
+ * {@code "gs://<bucket>/<filepath>"}). You can optionally name the resulting transform using
+ * {@link TextIO.Write#named(String)}, and you can use {@link TextIO.Write#withCoder(Coder)}
+ * to specify the Coder to use to encode the Java values into text lines.
  *
  * <p>Any existing files with the same names as generated output files
  * will be overwritten.
@@ -101,37 +106,40 @@
  *                           .withCoder(TextualIntegerCoder.of()));
  * } </pre>
  *
- * <p><h3>Permissions</h3>
- * Permission requirements depend on the
- * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner PipelineRunner} that is
- * used to execute the Dataflow job. Please refer to the documentation of corresponding
- * {@code PipelineRunner}s for more details.
+ * <h3>Permissions</h3>
+ * <p>When run using the {@link DirectPipelineRunner}, your pipeline can read and write text files
+ * on your local drive and remote text files on Google Cloud Storage that you have access to using
+ * your {@code gcloud} credentials. When running in the Dataflow service using
+ * {@link DataflowPipelineRunner}, the pipeline can only read and write files from GCS. For more
+ * information about permissions, see the Cloud Dataflow documentation on
+ * <a href="https://cloud.google.com/dataflow/security-and-permissions">Security and
+ * Permissions</a>.
  */
 public class TextIO {
+  /** The default coder, which returns each line of the input file as a string. */
   public static final Coder<String> DEFAULT_TEXT_CODER = StringUtf8Coder.of();
 
   /**
    * A {@link PTransform} that reads from a text file (or multiple text
    * files matching a pattern) and returns a {@link PCollection} containing
-   * the decoding of each of the lines of the text file(s).  The
-   * default decoding just returns the lines.
+   * the decoding of each of the lines of the text file(s). The
+   * default decoding just returns each line as a {@link String}, but you may call
+   * {@link #withCoder(Coder)} to change the return type.
    */
   public static class Read {
     /**
-     * Returns a {@link TextIO.Read} {@link PTransform} with the given step name.
+     * Returns a transform for reading text files that uses the given step name.
      */
     public static Bound<String> named(String name) {
       return new Bound<>(DEFAULT_TEXT_CODER).named(name);
     }
 
     /**
-     * Returns a {@link TextIO.Read} {@link PTransform} that reads from the file(s)
-     * with the given name or pattern.  This can be a local filename
-     * or filename pattern (if running locally), or a Google Cloud
-     * Storage filename or filename pattern of the form
-     * {@code "gs://<bucket>/<filepath>"}) (if running locally or via
-     * the Google Cloud Dataflow service).  Standard
-     * <a href="http://docs.oracle.com/javase/tutorial/essential/io/find.html"
+     * Returns a transform for reading text files that reads from the file(s)
+     * with the given filename or filename pattern. This can be a local path (if running locally),
+     * or a Google Cloud Storage filename or filename pattern of the form
+     * {@code "gs://<bucket>/<filepath>"} (if running locally or via the Google Cloud Dataflow
+     * service). Standard <a href="http://docs.oracle.com/javase/tutorial/essential/io/find.html"
      * >Java Filesystem glob patterns</a> ("*", "?", "[..]") are supported.
      */
     public static Bound<String> from(String filepattern) {
@@ -139,7 +147,7 @@ public static Bound<String> from(String filepattern) {
     }
 
     /**
-     * Returns a TextIO.Read PTransform that uses the given
+     * Returns a transform for reading text files that uses the given
      * {@code Coder<T>} to decode each of the lines of the file into a
      * value of type {@code T}.
      *
@@ -154,7 +162,7 @@ public static <T> Bound<T> withCoder(Coder<T> coder) {
     }
 
     /**
-     * Returns a TextIO.Read PTransform that has GCS path validation on
+     * Returns a transform for reading text files that has GCS path validation on
      * pipeline creation disabled.
      *
      * <p>This can be useful in the case where the GCS input does not
@@ -166,12 +174,12 @@ public static Bound<String> withoutValidation() {
     }
 
     /**
-     * Returns a TextIO.Read PTransform that reads from a file with the
-     * specified compression type.
+     * Returns a transform for reading text files that decompresses all input files
+     * using the specified compression type.
      *
-     * <p>If no compression type is specified, the default is AUTO. In this
-     * mode, the compression type of the file is determined by its extension
-     * (e.g., *.gz is gzipped, *.bz2 is bzipped, all other extensions are
+     * <p>If no compression type is specified, the default is {@link TextIO.CompressionType#AUTO}.
+     * In this mode, the compression type of the file is determined by its extension
+     * (e.g., {@code *.gz} is gzipped, {@code *.bz2} is bzipped, and all other extensions are
      * uncompressed).
      */
     public static Bound<String> withCompressionType(TextIO.CompressionType compressionType) {
@@ -181,34 +189,32 @@ public static Bound<String> withCompressionType(TextIO.CompressionType compressi
     // TODO: strippingNewlines, etc.
 
     /**
-     * A {@link PTransform} that reads from a text file (or multiple text files
-     * matching a pattern) and returns a bounded PCollection containing the
-     * decoding of each of the lines of the text file(s).  The default
-     * decoding just returns the lines.
+     * A {@link PTransform} that reads from one or more text files and returns a bounded
+     * {@link PCollection} containing one element for each line of the input files.
      *
      * @param <T> the type of each of the elements of the resulting
-     * PCollection, decoded from the lines of the text file
+     * {@link PCollection}. By default, each line is returned as a {@link String}, however you
+     * may use {@link #withCoder(Coder)} to supply a {@code Coder<T>} to produce a
+     * {@code PCollection<T>} instead.
      */
     public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
       /** The filepattern to read from. */
-      @Nullable
-      final String filepattern;
+      @Nullable private final String filepattern;
 
       /** The Coder to use to decode each line. */
-      @Nullable
-      final Coder<T> coder;
+      @Nullable private final Coder<T> coder;
 
       /** An option to indicate if input validation is desired. Default is true. */
-      final boolean validate;
+      private final boolean validate;
 
       /** Option to indicate the input source's compression type. Default is AUTO. */
-      final TextIO.CompressionType compressionType;
+      private final TextIO.CompressionType compressionType;
 
       Bound(Coder<T> coder) {
         this(null, null, coder, true, TextIO.CompressionType.AUTO);
       }
 
-      Bound(String name, String filepattern, Coder<T> coder, boolean validate,
+      private Bound(String name, String filepattern, Coder<T> coder, boolean validate,
           TextIO.CompressionType compressionType) {
         super(name);
         this.coder = coder;
@@ -218,28 +224,33 @@ public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
       }
 
       /**
-       * Returns a new TextIO.Read PTransform that's like this one but
-       * with the given step name.  Does not modify this object.
+       * Returns a new transform for reading from text files that's like this one but
+       * with the given step name.
+       *
+       * <p>Does not modify this object.
        */
       public Bound<T> named(String name) {
         return new Bound<>(name, filepattern, coder, validate, compressionType);
       }
 
       /**
-       * Returns a new TextIO.Read PTransform that's like this one but
-       * that reads from the file(s) with the given name or pattern.
-       * (See {@link TextIO.Read#from} for a description of
-       * filepatterns.)  Does not modify this object.
+       * Returns a new transform for reading from text files that's like this one but
+       * that reads from the file(s) with the given name or pattern. See {@link TextIO.Read#from}
+       * for a description of filepatterns.
+       *
+       * <p>Does not modify this object.
+
        */
       public Bound<T> from(String filepattern) {
         return new Bound<>(name, filepattern, coder, validate, compressionType);
       }
 
       /**
-       * Returns a new TextIO.Read PTransform that's like this one but
-       * that uses the given {@code Coder<X>} to decode each of the
-       * lines of the file into a value of type {@code X}.  Does not
-       * modify this object.
+       * Returns a new transform for reading from text files that's like this one but
+       * that uses the given {@link Coder Coder<X>} to decode each of the
+       * lines of the file into a value of type {@code X}.
+       *
+       * <p>Does not modify this object.
        *
        * @param <X> the type of the decoded elements, and the
        * elements of the resulting PCollection
@@ -249,30 +260,27 @@ public <X> Bound<X> withCoder(Coder<X> coder) {
       }
 
       /**
-       * Returns a new TextIO.Read PTransform that's like this one but
+       * Returns a new transform for reading from text files that's like this one but
        * that has GCS path validation on pipeline creation disabled.
-       * Does not modify this object.
        *
        * <p>This can be useful in the case where the GCS input does not
        * exist at the pipeline creation time, but is expected to be
        * available at execution time.
+       *
+       * <p>Does not modify this object.
        */
       public Bound<T> withoutValidation() {
         return new Bound<>(name, filepattern, coder, false, compressionType);
       }
 
       /**
-       * Returns a new TextIO.Read PTransform that's like this one but
+       * Returns a new transform for reading from text files that's like this one but
        * reads from input sources using the specified compression type.
-       * Does not modify this object.
        *
-       * <p>If AUTO compression type is specified, a compression type is
-       * selected on a per-file basis, based on the file's extension (e.g.,
-       * .gz will be processed as a gzipped file, .bz will be processed
-       * as a bzipped file, other extensions with be treated as uncompressed
-       * input).
+       * <p>If no compression type is specified, the default is {@link TextIO.CompressionType#AUTO}.
+       * See {@link TextIO.Read#withCompressionType(CompressionType)} for more details.
        *
-       * <p>If no compression type is specified, the default is AUTO.
+       * <p>Does not modify this object.
        */
       public Bound<T> withCompressionType(TextIO.CompressionType compressionType) {
         return new Bound<>(name, filepattern, coder, validate, compressionType);
@@ -321,52 +329,55 @@ public void evaluate(
             });
       }
     }
+
+    /** Disallow construction of utility classes. */
+    private Read() {}
   }
 
 
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * A {@link PTransform} that writes a {@link PCollection} to a text file (or
+   * A {@link PTransform} that writes a {@link PCollection} to text file (or
    * multiple text files matching a sharding pattern), with each
-   * PCollection element being encoded into its own line.
+   * element of the input collection encoded into its own line.
    */
   public static class Write {
     /**
-     * Returns a TextIO.Write PTransform with the given step name.
+     * Returns a transform for writing to text files with the given step name.
      */
     public static Bound<String> named(String name) {
       return new Bound<>(DEFAULT_TEXT_CODER).named(name);
     }
 
     /**
-     * Returns a TextIO.Write PTransform that writes to the file(s)
-     * with the given prefix.  This can be a local filename
+     * Returns a transform for writing to text files that writes to the file(s)
+     * with the given prefix. This can be a local filename
      * (if running locally), or a Google Cloud Storage filename of
-     * the form {@code "gs://<bucket>/<filepath>"})
+     * the form {@code "gs://<bucket>/<filepath>"}
      * (if running locally or via the Google Cloud Dataflow service).
      *
      * <p>The files written will begin with this prefix, followed by
-     * a shard identifier (see {@link Bound#withNumShards}, and end
-     * in a common extension, if given by {@link Bound#withSuffix}.
+     * a shard identifier (see {@link Bound#withNumShards(int)}, and end
+     * in a common extension, if given by {@link Bound#withSuffix(String)}.
      */
     public static Bound<String> to(String prefix) {
       return new Bound<>(DEFAULT_TEXT_CODER).to(prefix);
     }
 
     /**
-     * Returns a TextIO.Write PTransform that writes to the file(s) with the
-     * given filename suffix.
+     * Returns a transform for writing to text files that appends the specified suffix
+     * to the created files.
      */
     public static Bound<String> withSuffix(String nameExtension) {
       return new Bound<>(DEFAULT_TEXT_CODER).withSuffix(nameExtension);
     }
 
     /**
-     * Returns a TextIO.Write PTransform that uses the provided shard count.
+     * Returns a transform for writing to text files that uses the provided shard count.
      *
      * <p>Constraining the number of shards is likely to reduce
-     * the performance of a pipeline.  Setting this value is not recommended
+     * the performance of a pipeline. Setting this value is not recommended
      * unless you require a specific number of output files.
      *
      * @param numShards the number of shards to use, or 0 to let the system
@@ -377,7 +388,7 @@ public static Bound<String> withNumShards(int numShards) {
     }
 
     /**
-     * Returns a TextIO.Write PTransform that uses the given shard name
+     * Returns a transform for writing to text files that uses the given shard name
      * template.
      *
      * <p>See {@link ShardNameTemplate} for a description of shard templates.
@@ -387,7 +398,7 @@ public static Bound<String> withShardNameTemplate(String shardTemplate) {
     }
 
     /**
-     * Returns a TextIO.Write PTransform that forces a single file as
+     * Returns a transform for writing to text files that forces a single file as
      * output.
      */
     public static Bound<String> withoutSharding() {
@@ -395,21 +406,21 @@ public static Bound<String> withoutSharding() {
     }
 
     /**
-     * Returns a TextIO.Write PTransform that uses the given
-     * {@code Coder<T>} to encode each of the elements of the input
-     * {@code PCollection<T>} into an output text line.
+     * Returns a transform for writing to text files that uses the given
+     * {@link Coder} to encode each of the elements of the input
+     * {@link PCollection} into an output text line.
      *
      * <p>By default, uses {@link StringUtf8Coder}, which writes input
      * Java strings directly as output lines.
      *
-     * @param <T> the type of the elements of the input PCollection
+     * @param <T> the type of the elements of the input {@link PCollection}
      */
     public static <T> Bound<T> withCoder(Coder<T> coder) {
       return new Bound<>(coder);
     }
 
     /**
-     * Returns a TextIO.Write PTransform that has GCS path validation on
+     * Returns a transform for writing to text files that has GCS path validation on
      * pipeline creation disabled.
      *
      * <p>This can be useful in the case where the GCS output location does
@@ -430,32 +441,31 @@ public static Bound<String> withoutValidation() {
      * @param <T> the type of the elements of the input PCollection
      */
     public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
-      /** The filename to write to. */
-      @Nullable
-      final String filenamePrefix;
-      /** Suffix to use for each filename. */
-      final String filenameSuffix;
+      /** The prefix of each file written, combined with suffix and shardTemplate. */
+      @Nullable private final String filenamePrefix;
+      /** The suffix of each file written, combined with prefix and shardTemplate. */
+      private final String filenameSuffix;
 
       /** The Coder to use to decode each line. */
-      final Coder<T> coder;
+      private final Coder<T> coder;
 
-      /** Requested number of shards.  0 for automatic. */
-      final int numShards;
+      /** Requested number of shards. 0 for automatic. */
+      private final int numShards;
 
       /** Insert a shuffle before writing to decouple parallelism when numShards != 0. */
-      final boolean forceReshard;
+      private final boolean forceReshard;
 
-      /** Shard template string. */
-      final String shardTemplate;
+      /** The shard template of each file written, combined with prefix and suffix. */
+      private final String shardTemplate;
 
       /** An option to indicate if output validation is desired. Default is true. */
-      final boolean validate;
+      private final boolean validate;
 
       Bound(Coder<T> coder) {
         this(null, null, "", coder, 0, true, ShardNameTemplate.INDEX_OF_MAX, true);
       }
 
-      Bound(String name, String filenamePrefix, String filenameSuffix, Coder<T> coder,
+      private Bound(String name, String filenamePrefix, String filenameSuffix, Coder<T> coder,
           int numShards, boolean forceReshard, String shardTemplate, boolean validate) {
         super(name);
         this.coder = coder;
@@ -468,8 +478,10 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       }
 
       /**
-       * Returns a new TextIO.Write PTransform that's like this one but
-       * with the given step name.  Does not modify this object.
+       * Returns a transform for writing to text files that's like this one but
+       * with the given step name.
+       *
+       * <p>Does not modify this object.
        */
       public Bound<T> named(String name) {
         return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
@@ -477,7 +489,7 @@ public Bound<T> named(String name) {
       }
 
       /**
-       * Returns a new TextIO.Write PTransform that's like this one but
+       * Returns a transform for writing to text files that's like this one but
        * that writes to the file(s) with the given filename prefix.
        *
        * <p>See {@link TextIO.Write#to(String) Write.to(String)} for more information.
@@ -491,7 +503,7 @@ public Bound<T> to(String filenamePrefix) {
       }
 
       /**
-       * Returns a new TextIO.Write PTransform that's like this one but
+       * Returns a transform for writing to text files that that's like this one but
        * that writes to the file(s) with the given filename suffix.
        *
        * <p>Does not modify this object.
@@ -505,11 +517,11 @@ public Bound<T> withSuffix(String nameExtension) {
       }
 
       /**
-       * Returns a new TextIO.Write PTransform that's like this one but
+       * Returns a transform for writing to text files that's like this one but
        * that uses the provided shard count.
        *
        * <p>Constraining the number of shards is likely to reduce
-       * the performance of a pipeline.  Setting this value is not recommended
+       * the performance of a pipeline. Setting this value is not recommended
        * unless you require a specific number of output files.
        *
        * <p>Does not modify this object.
@@ -523,14 +535,14 @@ public Bound<T> withNumShards(int numShards) {
       }
 
       /**
-       * Returns a new TextIO.Write PTransform that's like this one but
+       * Returns a transform for writing to text files that's like this one but
        * that uses the provided shard count.
        *
        * <p>Constraining the number of shards is likely to reduce
-       * the performance of a pipeline.  If forceReshard is true, the output
-       * will be shuffled to obtain the desired sharding.  If it is false,
+       * the performance of a pipeline. If forceReshard is true, the output
+       * will be shuffled to obtain the desired sharding. If it is false,
        * data will not be reshuffled, but parallelism of preceeding stages
-       * may be constrained.  Setting this value is not recommended
+       * may be constrained. Setting this value is not recommended
        * unless you require a specific number of output files.
        *
        * <p>Does not modify this object.
@@ -547,7 +559,7 @@ private Bound<T> withNumShards(int numShards, boolean forceReshard) {
       }
 
       /**
-       * Returns a new TextIO.Write PTransform that's like this one but
+       * Returns a transform for writing to text files that's like this one but
        * that uses the given shard name template.
        *
        * <p>Does not modify this object.
@@ -560,9 +572,13 @@ public Bound<T> withShardNameTemplate(String shardTemplate) {
       }
 
       /**
-       * Returns a new TextIO.Write PTransform that's like this one but
+       * Returns a transform for writing to text files that's like this one but
        * that forces a single file as output.
        *
+       * <p>Constraining the number of shards is likely to reduce
+       * the performance of a pipeline. Using this setting is not recommended
+       * unless you truly require a single output file.
+       *
        * <p>This is a shortcut for
        * {@code .withNumShards(1).withShardNameTemplate("")}
        *
@@ -573,9 +589,13 @@ public Bound<T> withoutSharding() {
       }
 
       /**
-       * Returns a new TextIO.Write PTransform that's like this one but
+       * Returns a transform for writing to text files that's like this one but
        * that forces a single file as output.
        *
+       * <p>Constraining the number of shards is likely to reduce
+       * the performance of a pipeline. Using this setting is not recommended
+       * unless you truly require a single output file.
+       *
        * <p>This is a shortcut for
        * {@code .withNumShards(1, forceReshard).withShardNameTemplate("")}
        *
@@ -587,12 +607,12 @@ private Bound<T> withoutSharding(boolean forceReshard) {
       }
 
       /**
-       * Returns a new TextIO.Write PTransform that's like this one
-       * but that uses the given {@code Coder<X>} to encode each of
-       * the elements of the input {@code PCollection<X>} into an
-       * output text line.  Does not modify this object.
+       * Returns a transform for writing to text files that's like this one
+       * but that uses the given {@link Coder Coder<X>} to encode each of
+       * the elements of the input {@link PCollection PCollection<X>} into an
+       * output text line. Does not modify this object.
        *
-       * @param <X> the type of the elements of the input PCollection
+       * @param <X> the type of the elements of the input {@link PCollection}
        */
       public <X> Bound<X> withCoder(Coder<X> coder) {
         return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
@@ -600,13 +620,14 @@ public <X> Bound<X> withCoder(Coder<X> coder) {
       }
 
       /**
-       * Returns a new TextIO.Write PTransform that's like this one but
+       * Returns a transform for writing to text files that's like this one but
        * that has GCS output path validation on pipeline creation disabled.
-       * Does not modify this object.
        *
        * <p>This can be useful in the case where the GCS output location does
        * not exist at the pipeline creation time, but is expected to be
        * available at execution time.
+       *
+       * <p>Does not modify this object.
        */
       public Bound<T> withoutValidation() {
         return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
@@ -696,7 +717,7 @@ public static enum CompressionType implements TextReader.DecompressingStreamFact
     GZIP(".gz") {
       @Override
       public InputStream createInputStream(InputStream inputStream) throws IOException {
-        // Determine if the input stream is gzipped.  The input stream returned from the
+        // Determine if the input stream is gzipped. The input stream returned from the
         // GCS connector may already be decompressed, and no action is required.
         PushbackInputStream stream = new PushbackInputStream(inputStream, 2);
         byte[] headerBytes = new byte[2];
@@ -753,6 +774,9 @@ private static void validateOutputComponent(String partialFilePattern) {
 
   //////////////////////////////////////////////////////////////////////////////
 
+  /** Disable construction of utility class. */
+  private TextIO() {}
+
   private static <T> void evaluateReadHelper(
       Read.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
     TextReader<T> reader =
@@ -767,7 +791,7 @@ private static <T> void evaluateWriteHelper(
     List<T> elems = context.getPCollection(context.getInput(transform));
     int numShards = transform.numShards;
     if (numShards < 1) {
-      // System gets to choose.  For direct mode, choose 1.
+      // System gets to choose. For direct mode, choose 1.
       numShards = 1;
     }
     TextSink<WindowedValue<T>> writer = TextSink.createForDirectPipelineRunner(

From c7f0421c8a345de72137ff2006a7e5fbf128bf85 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 20 Nov 2015 09:49:18 -0800
Subject: [PATCH 1186/1541] Make the IterableLikeCoder efficient for many small
 values

Use a small buffer to count the number of elements that are buffered
before prefixing the count. For small values like ints, this reduces the overhead
from 1 byte per value, to a few bytes per 64k of data written.

----Release Notes----
Modified IterableLikeCoder in a backwards compatible manner but not in
a forwards compatible manner preventing users from "updating" a pipeline to a previous minor
release.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108352267
---
 .../sdk/coders/IterableLikeCoder.java         |  46 ++--
 .../BufferedElementCountingOutputStream.java  | 184 ++++++++++++++++
 ...fferedElementCountingOutputStreamTest.java | 205 ++++++++++++++++++
 3 files changed, 421 insertions(+), 14 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferedElementCountingOutputStream.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BufferedElementCountingOutputStreamTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
index 3adcb6d9a9008..522e9b1ee74b2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.util.BufferedElementCountingOutputStream;
+import com.google.cloud.dataflow.sdk.util.VarInt;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservableIterable;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.common.base.Preconditions;
@@ -111,15 +113,17 @@ public void encode(
         elementCoder.encode(elem, dataOutStream, nestedContext);
       }
     } else {
-      // We don't know the size without traversing it.  So use a
-      // "hasNext" sentinel before each element.
-      // TODO: Don't use the sentinel if context.isWholeStream.
+      // We don't know the size without traversing it so use a fixed size buffer
+      // and encode as many elements as possible into it before outputting the size followed
+      // by the elements.
       dataOutStream.writeInt(-1);
+      BufferedElementCountingOutputStream countingOutputStream =
+          new BufferedElementCountingOutputStream(dataOutStream);
       for (T elem : iterable) {
-        dataOutStream.writeBoolean(true);
-        elementCoder.encode(elem, dataOutStream, nestedContext);
+        countingOutputStream.markElementStart();
+        elementCoder.encode(elem, countingOutputStream, nestedContext);
       }
-      dataOutStream.writeBoolean(false);
+      countingOutputStream.finish();
     }
     // Make sure all our output gets pushed to the underlying outStream.
     dataOutStream.flush();
@@ -138,11 +142,15 @@ public IterableT decode(InputStream inStream, Context context)
       }
       return decodeToIterable(elements);
     } else {
-      // We don't know the size a priori.  Check if we're done with
-      // each element.
       List<T> elements = new ArrayList<>();
-      while (dataInStream.readBoolean()) {
-        elements.add(elementCoder.decode(dataInStream, nestedContext));
+      long count;
+      // We don't know the size a priori.  Check if we're done with
+      // each block of elements.
+      while ((count = VarInt.decodeLong(dataInStream)) > 0) {
+        while (count > 0) {
+          elements.add(elementCoder.decode(dataInStream, nestedContext));
+          count -= 1;
+        }
       }
       return decodeToIterable(elements);
     }
@@ -205,14 +213,24 @@ public void registerByteSizeObserver(
           elementCoder.registerByteSizeObserver(elem, observer, nestedContext);
         }
       } else {
-        // We don't know the size without traversing it.  So use a
-        // "hasNext" sentinel before each element.
-        // TODO: Don't use the sentinel if context.isWholeStream.
+        // TODO: Update to use an accurate count depending on size and count, currently we
+        // are under estimating the size by up to 10 bytes per block of data since we are
+        // not encoding the count prefix which occurs at most once per 64k of data and is upto
+        // 10 bytes long. Since we include the total count we can upper bound the underestimate
+        // to be 10 / 65536 ~= 0.0153% of the actual size.
         observer.update(4L);
+        long count = 0;
         for (T elem : iterable) {
-          observer.update(1L);
+          count += 1;
           elementCoder.registerByteSizeObserver(elem, observer, nestedContext);
         }
+        if (count > 0) {
+          // Update the length based upon the number of counted elements, this helps
+          // eliminate the case where all the elements are encoded in the first block and
+          // it is quite short (e.g. Long.MAX_VALUE nulls encoded with VoidCoder).
+          observer.update(VarInt.getLength(count));
+        }
+        // Update with the terminator byte.
         observer.update(1L);
       }
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferedElementCountingOutputStream.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferedElementCountingOutputStream.java
new file mode 100644
index 0000000000000..e8e693a996c19
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferedElementCountingOutputStream.java
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+
+import javax.annotation.concurrent.NotThreadSafe;
+
+/**
+ * Provides an efficient encoding for {@link Iterable}s containing small values by
+ * buffering up to {@code bufferSize} bytes of data before prefixing the count.
+ * Note that each element needs to be encoded in a nested context. See
+ * {@link Context Coder.Context} for more details.
+ *
+ * <p>To use this stream:
+ * <pre><code>
+ * BufferedElementCountingOutputStream os = ...
+ * for (Element E : elements) {
+ *   os.markElementStart();
+ *   // write an element to os
+ * }
+ * os.finish();
+ * </code></pre>
+ *
+ * <p>The resulting output stream is:
+ * <pre>
+ * countA element(0) element(1) ... element(countA - 1)
+ * countB element(0) element(1) ... element(countB - 1)
+ * ...
+ * countX element(0) element(1) ... element(countX - 1)
+ * countY
+ * </pre>
+ *
+ * <p>To read this stream:
+ * <pre><code>
+ * InputStream is = ...
+ * long count;
+ * do {
+ *   count = VarInt.decodeLong(is);
+ *   for (int i = 0; i < count; ++i) {
+ *     // read an element from is
+ *   }
+ * } while(count > 0);
+ * </code></pre>
+ *
+ * <p>The counts are encoded as variable length longs. See {@link VarInt#encode(long, OutputStream)}
+ * for more details. The end of the iterable is detected by reading a count of 0.
+ */
+@NotThreadSafe
+public class BufferedElementCountingOutputStream extends OutputStream {
+  public static final int DEFAULT_BUFFER_SIZE = 64 * 1024;
+  private final ByteBuffer buffer;
+  private final OutputStream os;
+  private boolean finished;
+  private long count;
+
+  /**
+   * Creates an output stream which encodes the number of elements output to it in a streaming
+   * manner.
+   */
+  public BufferedElementCountingOutputStream(OutputStream os) {
+    this(os, DEFAULT_BUFFER_SIZE);
+  }
+
+  /**
+   * Creates an output stream which encodes the number of elements output to it in a streaming
+   * manner with the given {@code bufferSize}.
+   */
+  BufferedElementCountingOutputStream(OutputStream os, int bufferSize) {
+    this.buffer = ByteBuffer.allocate(bufferSize);
+    this.os = os;
+    this.finished = false;
+    this.count = 0;
+  }
+
+  /**
+   * Finishes the encoding by flushing any buffered data,
+   * and outputting a final count of 0.
+   */
+  public void finish() throws IOException {
+    if (finished) {
+      return;
+    }
+    flush();
+    // Finish the stream by stating that there are 0 elements that follow.
+    VarInt.encode(0, os);
+    finished = true;
+  }
+
+  /**
+   * Marks that a new element is being output. This allows this output stream
+   * to use the buffer if it had previously overflowed marking the start of a new
+   * block of elements.
+   */
+  public void markElementStart() throws IOException {
+    if (finished) {
+      throw new IOException("Stream has been finished. Can not add any more elements.");
+    }
+    count++;
+  }
+
+  @Override
+  public void write(int b) throws IOException {
+    if (finished) {
+      throw new IOException("Stream has been finished. Can not write any more data.");
+    }
+    if (count == 0) {
+      os.write(b);
+      return;
+    }
+
+    if (buffer.hasRemaining()) {
+      buffer.put((byte) b);
+    } else {
+      outputBuffer();
+      os.write(b);
+    }
+  }
+
+  @Override
+  public void write(byte[] b, int off, int len) throws IOException {
+    if (finished) {
+      throw new IOException("Stream has been finished. Can not write any more data.");
+    }
+    if (count == 0) {
+      os.write(b, off, len);
+      return;
+    }
+
+    if (buffer.remaining() >= len) {
+      buffer.put(b, off, len);
+    } else {
+      outputBuffer();
+      os.write(b, off, len);
+    }
+  }
+
+  @Override
+  public void flush() throws IOException {
+    if (finished) {
+      return;
+    }
+    outputBuffer();
+    os.flush();
+  }
+
+  @Override
+  public void close() throws IOException {
+    finish();
+    os.close();
+  }
+
+  // Output the buffer if it contains any data.
+  private void outputBuffer() throws IOException {
+    if (count > 0) {
+      VarInt.encode(count, os);
+      // We are using a heap based buffer and not a direct buffer so it is safe to access
+      // the underlying array.
+      os.write(buffer.array(), buffer.arrayOffset(), buffer.position());
+      buffer.clear();
+      // The buffer has been flushed so we must write to the underlying stream until
+      // we learn of the next element. We reset the count to zero marking that we should
+      // not use the buffer.
+      count = 0;
+    }
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BufferedElementCountingOutputStreamTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BufferedElementCountingOutputStreamTest.java
new file mode 100644
index 0000000000000..af2f4425507f4
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BufferedElementCountingOutputStreamTest.java
@@ -0,0 +1,205 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
+import com.google.common.collect.ImmutableList;
+
+import org.hamcrest.collection.IsIterableContainingInOrder;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+
+/**
+ * Tests for {@link BufferedElementCountingOutputStream}.
+ */
+@RunWith(JUnit4.class)
+public class BufferedElementCountingOutputStreamTest {
+  @Rule public final ExpectedException expectedException = ExpectedException.none();
+  private static final int BUFFER_SIZE = 8;
+
+  @Test
+  public void testEmptyValues() throws Exception {
+    testValues(Collections.<byte[]>emptyList());
+  }
+
+  @Test
+  public void testSingleValue() throws Exception {
+    testValues(toBytes("abc"));
+  }
+
+  @Test
+  public void testSingleValueGreaterThanBuffer() throws Exception {
+    testValues(toBytes("abcdefghijklmnopqrstuvwxyz"));
+  }
+
+  @Test
+  public void testMultipleValuesLessThanBuffer() throws Exception {
+    testValues(toBytes("a", "b", "c"));
+  }
+
+  @Test
+  public void testMultipleValuesThatBecomeGreaterThanBuffer() throws Exception {
+    testValues(toBytes("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l",
+        "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"));
+  }
+
+  @Test
+  public void testMultipleRandomSizedValues() throws Exception {
+    Random r = new Random(234589023580234890L);
+    byte[] randomData = new byte[r.nextInt(18)];
+    for (int i = 0; i < 1000; ++i) {
+      List<byte[]> bytes = new ArrayList<>();
+      for (int j = 0; j < 100; ++j) {
+        r.nextBytes(randomData);
+        bytes.add(randomData);
+      }
+      testValues(bytes);
+    }
+  }
+
+  @Test
+  public void testFlushInMiddleOfElement() throws Exception {
+    ByteArrayOutputStream bos = new ByteArrayOutputStream();
+    BufferedElementCountingOutputStream os = new BufferedElementCountingOutputStream(bos);
+    os.markElementStart();
+    os.write(1);
+    os.flush();
+    os.write(2);
+    os.close();
+    assertArrayEquals(new byte[]{ 1, 1, 2, 0 }, bos.toByteArray());
+  }
+
+  @Test
+  public void testFlushInMiddleOfElementUsingByteArrays() throws Exception {
+    ByteArrayOutputStream bos = new ByteArrayOutputStream();
+    BufferedElementCountingOutputStream os = new BufferedElementCountingOutputStream(bos);
+    os.markElementStart();
+    os.write(new byte[]{ 1 });
+    os.flush();
+    os.write(new byte[]{ 2 });
+    os.close();
+    assertArrayEquals(new byte[]{ 1, 1, 2, 0 }, bos.toByteArray());
+  }
+
+  @Test
+  public void testFlushingWhenFinishedIsNoOp() throws Exception {
+    BufferedElementCountingOutputStream os = testValues(toBytes("a"));
+    os.flush();
+    os.flush();
+    os.flush();
+  }
+
+  @Test
+  public void testFinishingWhenFinishedIsNoOp() throws Exception {
+    BufferedElementCountingOutputStream os = testValues(toBytes("a"));
+    os.finish();
+    os.finish();
+    os.finish();
+  }
+
+  @Test
+  public void testClosingFinishesTheStream() throws Exception {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    BufferedElementCountingOutputStream os = createAndWriteValues(toBytes("abcdefghij"), baos);
+    os.close();
+    verifyValues(toBytes("abcdefghij"), new ByteArrayInputStream(baos.toByteArray()));
+  }
+
+  @Test
+  public void testAddingElementWhenFinishedThrows() throws Exception {
+    expectedException.expect(IOException.class);
+    expectedException.expectMessage("Stream has been finished.");
+    testValues(toBytes("a")).markElementStart();
+  }
+
+  @Test
+  public void testWritingByteWhenFinishedThrows() throws Exception {
+    expectedException.expect(IOException.class);
+    expectedException.expectMessage("Stream has been finished.");
+    testValues(toBytes("a")).write(1);
+  }
+
+  @Test
+  public void testWritingBytesWhenFinishedThrows() throws Exception {
+    expectedException.expect(IOException.class);
+    expectedException.expectMessage("Stream has been finished.");
+    testValues(toBytes("a")).write("b".getBytes());
+  }
+
+  private List<byte[]> toBytes(String ... values) {
+    ImmutableList.Builder<byte[]> builder = ImmutableList.builder();
+    for (String value : values) {
+      builder.add(value.getBytes());
+    }
+    return builder.build();
+  }
+
+  private BufferedElementCountingOutputStream
+      testValues(List<byte[]> expectedValues) throws Exception {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    BufferedElementCountingOutputStream os = createAndWriteValues(expectedValues, baos);
+    os.finish();
+    verifyValues(expectedValues, new ByteArrayInputStream(baos.toByteArray()));
+    return os;
+  }
+
+  private void verifyValues(List<byte[]> expectedValues, InputStream is) throws Exception {
+    List<byte[]> values = new ArrayList<>();
+    long count;
+    do {
+      count = VarInt.decodeLong(is);
+      for (int i = 0; i < count; ++i) {
+        values.add(ByteArrayCoder.of().decode(is, Context.NESTED));
+      }
+    } while(count > 0);
+
+    if (expectedValues.isEmpty()) {
+      assertTrue(values.isEmpty());
+    } else {
+      assertThat(values, IsIterableContainingInOrder.contains(expectedValues.toArray()));
+    }
+  }
+
+  private BufferedElementCountingOutputStream
+      createAndWriteValues(List<byte[]> values, OutputStream output) throws Exception {
+    BufferedElementCountingOutputStream os =
+        new BufferedElementCountingOutputStream(output, BUFFER_SIZE);
+
+    for (byte[] value : values) {
+      os.markElementStart();
+      ByteArrayCoder.of().encode(value, os, Context.NESTED);
+    }
+    return os;
+  }
+}
+

From e975c13029a222b37efb1cce0a63a6d9315392f1 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 23 Nov 2015 08:36:18 -0800
Subject: [PATCH 1187/1541] Fire a pane on AfterAll merge if it was not
 finished in all windows

Previously, when windows merged, an AfterAll trigger would think
it was ALREADY_FINISHED if all of its subtriggers were ALREADY_FINISHED.
But some of the subtriggers might have been finished in different
windows, and the combined AfterAll never fired (hence finished) in
any window.

With this change, that firing occurs.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108508082
---
 .../sdk/transforms/windowing/AfterAll.java    | 10 ++++++-
 .../transforms/windowing/AfterAllTest.java    | 26 +++++++++++++++++++
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
index ed4f7ff77eb2d..a542bbf679a92 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
@@ -84,7 +84,15 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
       fired |= result.isFire();
     }
 
-    return fired ? MergeResult.FIRE_AND_FINISH : MergeResult.ALREADY_FINISHED;
+    // When we reach this point, we know all subtriggers are finished, possibly already,
+    // possibly because of firing right now. So this trigger is finished and the decision
+    // is whether to fire. If no subtrigger wants to fire and the root trigger was already
+    // finished in some window, then there is no need to fire.
+    if (!fired && c.trigger().finishedInAnyMergingWindow()) {
+      return MergeResult.ALREADY_FINISHED;
+    } else {
+      return MergeResult.FIRE_AND_FINISH;
+    }
   }
 
   @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index 9edef5ea2b9d3..87a00db9b55e9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -178,6 +178,32 @@ public void testOnMergeFires() throws Exception {
         new IntervalWindow(new Instant(1), new Instant(22)));
   }
 
+  @Test
+  public void testOnMergeFiresNotAlreadyFinished() throws Exception {
+    setUp(Sessions.withGapDuration(Duration.millis(10)));
+
+    when(mockTrigger1.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    when(mockTrigger2.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+        .thenReturn(TriggerResult.CONTINUE);
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(5, new Instant(12)));
+
+    when(mockTrigger1.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+        .thenReturn(MergeResult.ALREADY_FINISHED);
+    when(mockTrigger2.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
+        .thenReturn(MergeResult.ALREADY_FINISHED);
+    tester.injectElements(
+        TimestampedValue.of(12, new Instant(5)));
+
+    assertThat(tester.extractOutput(), Matchers.contains(
+        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(
+        new IntervalWindow(new Instant(1), new Instant(22)));
+  }
+
   @Test
   public void testFireDeadline() throws Exception {
     BoundedWindow window = new IntervalWindow(new Instant(0), new Instant(10));

From 34f4dcc608a6f11b0f85615a407792df0ab075f1 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 23 Nov 2015 12:15:18 -0800
Subject: [PATCH 1188/1541] Fix a worker crash when writing to intermediate
 files

The check `coder instanceof ValueOnlyWindowedValueCoder` is
not enough to guarantee that this is a user-requested AvroSink
at the pipeline end, because ValueOnlyWindowedValueCoder is also
used at the beginning of a pipeline to read from user sources,
which may also be materialized.

We were crashing when materializing immediately after a
non-Avro source, e.g., TextIO used as a side input. This failed
because we tried to pass a StringUtf8Coder to AvroSink.

The updated check will catch the crashing case, but will still
use AvroSink when materializing a source's immediate output and
the Source uses an AvroCoder. However, this is safe since it will
be treated by the reader as if it was coming directly from an
AvroSource, and the window and timestamp will be re-applied
correctly.

Also add comments.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108535669
---
 .../sdk/runners/worker/AvroReaderFactory.java | 11 ++----
 .../dataflow/sdk/runners/worker/AvroSink.java |  6 +--
 .../sdk/runners/worker/AvroSinkFactory.java   | 39 ++++++++++++++++++-
 3 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
index cc8e87efa957c..82fd6fe027d32 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
@@ -55,15 +55,10 @@ Reader<?> create(CloudObject spec, Coder<?> coder, PipelineOptions options) thro
     Long startOffset = getLong(spec, PropertyNames.START_OFFSET, null);
     Long endOffset = getLong(spec, PropertyNames.END_OFFSET, null);
 
-    // If the coder is a ValueOnlyWindowedValueCoder, the source is a user source. Otherwise,
-    // the coder is a FullWindowedValueCoder and the source is a materialized PCollection.
-    if (coder instanceof ValueOnlyWindowedValueCoder) {
+    // See AvroSinkFactory#create for an explanation of this logic.
+    if (coder instanceof ValueOnlyWindowedValueCoder
+        && ((ValueOnlyWindowedValueCoder) coder).getValueCoder() instanceof AvroCoder) {
       ValueOnlyWindowedValueCoder<?> valueCoder = (ValueOnlyWindowedValueCoder<?>) coder;
-      if (!(valueCoder.getValueCoder() instanceof AvroCoder)) {
-        throw new IllegalArgumentException(
-            "AvroReader requires an AvroCoder, but the instance given was "
-            + valueCoder.getValueCoder());
-      }
       return new AvroReader<>(
           filename, startOffset, endOffset, (AvroCoder<?>) valueCoder.getValueCoder(), options);
     } else {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java
index 9ecf1c3cdebe2..b101a2b0fcded 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java
@@ -16,14 +16,13 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import static com.google.cloud.dataflow.sdk.util.WindowedValue.ValueOnlyWindowedValueCoder;
-
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.MimeTypes;
 import com.google.cloud.dataflow.sdk.util.ShardingWritableByteChannel;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.ValueOnlyWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 
 import org.apache.avro.Schema;
@@ -57,7 +56,8 @@ public AvroSink(String filename, ValueOnlyWindowedValueCoder<T> coder) {
   public AvroSink(String filenamePrefix, String shardFormat, String filenameSuffix, int shardCount,
                   ValueOnlyWindowedValueCoder<T> coder) {
     if (!(coder.getValueCoder() instanceof AvroCoder)) {
-      throw new IllegalArgumentException("AvroSink requires an AvroCoder");
+      throw new IllegalArgumentException(String.format(
+          "AvroSink requires an AvroCoder, not a %s", coder.getValueCoder().getClass()));
     }
 
     this.filenamePrefix = filenamePrefix;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
index fcc7f2d48e0e5..fadd31a38fbff 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
@@ -18,6 +18,7 @@
 
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
@@ -49,8 +50,42 @@ static <T> Sink<T> create(CloudObject spec, Coder<T> coder)
       throws Exception {
     String filename = getString(spec, PropertyNames.FILENAME);
 
-    if (coder instanceof ValueOnlyWindowedValueCoder) {
-      return (Sink<T>) new AvroSink(filename, (ValueOnlyWindowedValueCoder<?>) coder);
+    // Avro sinks are used both for outputting user data at the end of a pipeline and for
+    // materializing PCollections as intermediate results. It is important to distinguish these
+    // two cases because one requires only the values (outputting with AvroSink) and one requires
+    // the values along with their window and timestamp (materializing intermediate results with
+    // AvroByteSink).
+    //
+    // The logic we would like is "use AvroSink when writing at the end of a pipeline; use
+    // AvroByteSink for materialized results".
+    //
+    // ValueOnlyWindowedValueCoder is used to decode/encode the values read from a Source, and used
+    // to encode the values written to a Sink. FullWindowedValueCoder is used as the coder between
+    // other edges in a Dataflow pipeline graph.
+    //
+    // Checking that the provided coder is an instance of ValueOnlyWindowedValueCoder is almost
+    // enough to identify a user's AvroSink at the end of a pipeline, but it does not eliminate the
+    // case when we are materializing immediately after reading from a Source. If this was the
+    // entire check to decide to use AvroSink, there could be a crash when we materialized the
+    // output of a Source that does not use AvroCoder, such as TextIO with StringUtf8Coder.
+    //
+    // Adding the additional test that the inner value coder is an AvroCoder will eliminate the
+    // TextIO case but will leave sources that, like AvroSource, use AvroCoder to represent their
+    // values. This fixes the potential crash, but still would use AvroSink for intermediate
+    // results immediately after such a Source.
+    //
+    // Luckily, using AvroSink in these cases is safe. Though AvroSink will only encode the value,
+    // and will drop the associated timestamp and window, the dropped values were applied by
+    // ValueOnlyWindowedValueCoder and will be reapplied by the same when the file is re-read by
+    // later in the pipeline.
+    //
+    // Otherwise, this is definitely a materialized result and we should use the AvroByteSink to
+    // include the window and timestamp.
+    //
+    // See AvroReaderFactory#create for the accompanying reader logic.
+    if (coder instanceof ValueOnlyWindowedValueCoder
+        && ((ValueOnlyWindowedValueCoder) coder).getValueCoder() instanceof AvroCoder) {
+      return new AvroSink(filename, (ValueOnlyWindowedValueCoder<?>) coder);
     } else {
       return new AvroByteSink<>(filename, coder);
     }

From dd85e088c7680f75d64bc18b50dbe550611a212a Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 23 Nov 2015 14:20:47 -0800
Subject: [PATCH 1189/1541] Enable application default credentials for Cloud
 Shell

----Release Notes----
Updated core Google dependencies from 1.20.0 from 1.21.0.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108548408
---
 examples/pom.xml                              | 12 +++++-----
 .../resources/archetype-resources/pom.xml     | 16 +++++++-------
 pom.xml                                       | 10 ++++-----
 sdk/pom.xml                                   | 22 +++++++------------
 4 files changed, 27 insertions(+), 33 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index 6b16defd203ee..bd02ea8f9bc83 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -371,7 +371,7 @@
       <version>${google-clients.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.20.0 -->
+             in by a transitive dependency of google-api-client -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -385,7 +385,7 @@
       <version>${dataflow.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.20.0 -->
+             in by a transitive dependency of google-api-client -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -399,7 +399,7 @@
       <version>${bigquery.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.20.0 -->
+             in by a transitive dependency of google-api-client -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -413,7 +413,7 @@
       <version>${google-clients.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.20.0 -->
+             in by a transitive dependency of google-api-client -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -433,7 +433,7 @@
       <version>${datastore.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.20.0 -->
+             in by a transitive dependency of google-api-client -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -447,7 +447,7 @@
       <version>${pubsub.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.20.0 -->
+             in by a transitive dependency of google-api-client -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
index bed413eaa04ce..85310878d7994 100644
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
+++ b/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
@@ -100,10 +100,10 @@
     <dependency>
       <groupId>com.google.api-client</groupId>
       <artifactId>google-api-client</artifactId>
-      <version>1.20.0</version>
+      <version>1.21.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.20.0 -->
+             in by a transitive dependency of google-api-client -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -115,10 +115,10 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-bigquery</artifactId>
-      <version>v2-rev238-1.20.0</version>
+      <version>v2-rev248-1.21.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.19.0 -->
+             in by a transitive dependency of google-api-client -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -129,10 +129,10 @@
     <dependency>
       <groupId>com.google.http-client</groupId>
       <artifactId>google-http-client</artifactId>
-      <version>1.20.0</version>
+      <version>1.21.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.20.0 -->
+             in by a transitive dependency of google-api-client -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -143,10 +143,10 @@
     <dependency>
       <groupId>com.google.apis</groupId>
       <artifactId>google-api-services-pubsub</artifactId>
-      <version>v1-rev3-1.20.0</version>
+      <version>v1-rev7-1.21.0</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.19.0 -->
+             in by a transitive dependency of google-api-client -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
diff --git a/pom.xml b/pom.xml
index ea75ee107c6c7..f352b8435fa91 100644
--- a/pom.xml
+++ b/pom.xml
@@ -68,19 +68,19 @@
 
     <!-- If updating dependencies, please update any relevant javadoc offlineLinks -->
     <avro.version>1.7.7</avro.version>
-    <bigquery.version>v2-rev238-1.20.0</bigquery.version>
-    <dataflow.version>v1b3-rev10-1.20.0</dataflow.version>
+    <bigquery.version>v2-rev248-1.21.0</bigquery.version>
+    <dataflow.version>v1b3-rev12-1.21.0</dataflow.version>
     <datastore.version>v1beta2-rev1-3.0.2</datastore.version>
-    <google-clients.version>1.20.0</google-clients.version>
+    <google-clients.version>1.21.0</google-clients.version>
     <guava.version>18.0</guava.version>
     <hamcrest.version>1.3</hamcrest.version>
     <jackson.version>2.4.2</jackson.version>
     <joda.version>2.4</joda.version>
     <junit.version>4.11</junit.version>
     <protobuf.version>2.5.0</protobuf.version>
-    <pubsub.version>v1-rev3-1.20.0</pubsub.version>
+    <pubsub.version>v1-rev7-1.21.0</pubsub.version>
     <slf4j.version>1.7.7</slf4j.version>
-    <storage.version>v1-rev25-1.19.1</storage.version>
+    <storage.version>v1-rev53-1.21.0</storage.version>
   </properties>
 
   <packaging>pom</packaging>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index e04a64fd1861d..4514e2121ff4b 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -419,7 +419,7 @@
       <version>${dataflow.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.19.0 -->
+             in by a transitive dependency of google-api-client -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -451,7 +451,7 @@
       <version>${bigquery.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.20.0 -->
+             in by a transitive dependency of google-api-client -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -465,7 +465,7 @@
       <version>${pubsub.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.20.0 -->
+             in by a transitive dependency of google-api-client -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -479,7 +479,7 @@
       <version>${storage.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.20.0 -->
+             in by a transitive dependency of google-api-client -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -493,7 +493,7 @@
       <version>${google-clients.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.20.0 -->
+             in by a transitive dependency of google-api-client -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -501,19 +501,13 @@
       </exclusions>
     </dependency>
 
-    <dependency>
-      <groupId>com.google.http-client</groupId>
-      <artifactId>google-http-client</artifactId>
-      <version>${google-clients.version}</version>
-    </dependency>
-
     <dependency>
       <groupId>com.google.oauth-client</groupId>
       <artifactId>google-oauth-client-java6</artifactId>
       <version>${google-clients.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.20.0 -->
+             in by a transitive dependency of google-api-client -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -527,7 +521,7 @@
       <version>${google-clients.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.20.0 -->
+             in by a transitive dependency of google-api-client -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>
@@ -541,7 +535,7 @@
       <version>${datastore.version}</version>
       <exclusions>
         <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency google-api-client 1.19.0 -->
+             in by a transitive dependency of google-api-client -->
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava-jdk5</artifactId>

From 470b7e430dbb4210a960942945360256091e7ab5 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 23 Nov 2015 15:04:25 -0800
Subject: [PATCH 1190/1541] Merge all subtriggers in AfterAll.

Previously, not all subtriggers were given an opportunity
to merge their state. Now they are each give such an
opportunity.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108553113
---
 .../sdk/transforms/windowing/AfterAll.java    | 24 +++++++++----------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
index a542bbf679a92..e76c14b77a86b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
@@ -72,23 +72,21 @@ public TriggerResult onElement(OnElementContext c) throws Exception {
   @Override
   public MergeResult onMerge(OnMergeContext c) throws Exception {
     // CONTINUE if merging returns CONTINUE for at least one sub-trigger
-    // FIRE_AND_FINISH if merging returns FIRE or FIRE_AND_FINISH for at least one sub-trigger
-    //   *and* FIRE, FIRE_AND_FINISH, or FINISH for all other sub-triggers.
-    // FINISH if merging returns FINISH for all sub-triggers.
-    boolean fired = false;
+    // ALREADY_FINISHED if merging returns ALREADY_FINISHED for all sub-triggers and this
+    // trigger itself was already finished in some window.
+    // FIRE_AND_FINISH otherwise: It means this trigger is ready to fire (because all subtriggers
+    // are satisfied) but has never fired as a whole.
+    boolean anyContinue = true;
+    boolean alreadyFinished = true;
     for (ExecutableTrigger<W> subTrigger : c.trigger().subTriggers()) {
       MergeResult result = subTrigger.invokeMerge(c);
-      if (MergeResult.CONTINUE.equals(result)) {
-        return MergeResult.CONTINUE;
-      }
-      fired |= result.isFire();
+      anyContinue |= !(result.isFire() && result.isFinish());
+      alreadyFinished &= !result.isFire() && result.isFinish();
     }
 
-    // When we reach this point, we know all subtriggers are finished, possibly already,
-    // possibly because of firing right now. So this trigger is finished and the decision
-    // is whether to fire. If no subtrigger wants to fire and the root trigger was already
-    // finished in some window, then there is no need to fire.
-    if (!fired && c.trigger().finishedInAnyMergingWindow()) {
+    if (anyContinue) {
+      return MergeResult.CONTINUE;
+    } else if (alreadyFinished && c.trigger().finishedInAnyMergingWindow()) {
       return MergeResult.ALREADY_FINISHED;
     } else {
       return MergeResult.FIRE_AND_FINISH;

From a3e38ef748cd555a0de7b2c96b732307c315d5f6 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 24 Nov 2015 09:52:37 -0800
Subject: [PATCH 1191/1541] PubSubIO: add, test, document support for RFC3339
 timestamps

Fixes GoogleCloudPlatform/DataflowJavaSDK#65

----Release Notes----
PubSubIO now accepts RFC 3339 timestamps in addition to milliseconds since Unix
epoch.
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108621625
---
 .../cloud/dataflow/sdk/io/PubsubIO.java       |  82 +++++++--
 .../cloud/dataflow/sdk/io/PubsubIOTest.java   | 157 ++++++++++++++++++
 2 files changed, 222 insertions(+), 17 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
index 99c802964baf7..653b31f059e4d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
@@ -16,6 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
+import static com.google.common.base.MoreObjects.firstNonNull;
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.google.api.client.util.Clock;
+import com.google.api.client.util.DateTime;
 import com.google.api.services.pubsub.Pubsub;
 import com.google.api.services.pubsub.model.AcknowledgeRequest;
 import com.google.api.services.pubsub.model.PublishRequest;
@@ -43,7 +48,9 @@
 import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Throwables;
+import com.google.common.collect.ImmutableMap;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -129,6 +136,48 @@ private static void validatePubsubName(String name) {
     }
   }
 
+  /**
+   * Returns the {@link Instant} that corresponds to the timestamp in the supplied
+   * {@link PubsubMessage} under the specified {@code ink label}. See
+   * {@link PubsubIO.Read#timestampLabel(String)} for details about how these messages are
+   * parsed.
+   *
+   * <p>The {@link Clock} parameter is used to virtualize time for testing.
+   *
+   * @throws IllegalArgumentException if the timestamp label is provided, but there is no
+   *     corresponding attribute in the message or the value provided is not a valid timestamp
+   *     string.
+   * @see PubsubIO.Read#timestampLabel(String)
+   */
+  @VisibleForTesting
+  protected static Instant assignMessageTimestamp(
+      PubsubMessage message, @Nullable String label, Clock clock) {
+    if (label == null) {
+      return new Instant(clock.currentTimeMillis());
+    }
+
+    // Extract message attributes, defaulting to empty map if null.
+    Map<String, String> attributes = firstNonNull(
+        message.getAttributes(), ImmutableMap.<String, String>of());
+
+    String timestampStr = attributes.get(label);
+    checkArgument(timestampStr != null && !timestampStr.isEmpty(),
+        "PubSub message is missing a timestamp in label: %s", label);
+
+    long millisSinceEpoch;
+    try {
+      // Try parsing as milliseconds since epoch. Note there is no way to parse a string in
+      // RFC 3339 format here.
+      // Expected IllegalArgumentException if parsing fails; we use that to fall back to RFC 3339.
+      millisSinceEpoch = Long.parseLong(timestampStr);
+    } catch (IllegalArgumentException e) {
+      // Try parsing as RFC3339 string. DateTime.parseRfc3339 will throw an IllegalArgumentException
+      // if parsing fails, and the caller should handle.
+      millisSinceEpoch = DateTime.parseRfc3339(timestampStr).getValue();
+    }
+    return new Instant(millisSinceEpoch);
+  }
+
   /**
    * Class representing a Cloud Pub/Sub Subscription.
    */
@@ -387,10 +436,18 @@ public static Bound<String> subscription(String subscription) {
     /**
      * Creates and returns a transform reading from Cloud Pub/Sub where record timestamps are
      * expected to be provided as Pub/Sub message attributes. The {@code timestampLabel}
-     * parameter specifies the name of the attribute that contains the timestamp. The value of the
-     * attribute should be a numerical value representing the number of milliseconds since the Unix
-     * epoch. For example, if using the Joda time classes,
-     * {@link Instant#getMillis()} returns the correct value for this label.
+     * parameter specifies the name of the attribute that contains the timestamp.
+     *
+     * <p>The timestamp value is expected to be represented in the attribute as either:
+     *
+     * <ul>
+     * <li>a numerical value representing the number of milliseconds since the Unix epoch. For
+     * example, if using the Joda time classes, {@link Instant#getMillis()} returns the correct
+     * value for this attribute.
+     * <li>a String in RFC 3339 format. For example, {@code 2015-10-29T23:41:41.123Z}. The
+     * sub-second component of the timestamp is optional, and digits beyond the first three
+     * (i.e., time units smaller than milliseconds) will be ignored.
+     * </ul>
      *
      * <p>If {@code timestampLabel} is not provided, the system will generate record timestamps
      * the first time it sees each record. All windowing will be done relative to these timestamps.
@@ -402,6 +459,8 @@ public static Bound<String> subscription(String subscription) {
      *
      * <p>Note that the system can guarantee that no late data will ever be seen when it assigns
      * timestamps by arrival time (i.e. {@code timestampLabel} is not provided).
+     *
+     * @see <a href="https://www.ietf.org/rfc/rfc3339.txt">RFC 3339</a>
      */
     public static Bound<String> timestampLabel(String timestampLabel) {
       return new Bound<>(DEFAULT_PUBSUB_CODER).timestampLabel(timestampLabel);
@@ -734,20 +793,9 @@ public void processElement(ProcessContext c) throws IOException {
           }
 
           for (PubsubMessage message : messages) {
-            Instant timestamp;
-            if (getTimestampLabel() == null) {
-              timestamp = Instant.now();
-            } else {
-              if (message.getAttributes() == null
-                  || !message.getAttributes().containsKey(getTimestampLabel())) {
-                throw new RuntimeException(
-                    "Message from pubsub missing timestamp label: " + getTimestampLabel());
-              }
-              timestamp =
-                  new Instant(Long.parseLong(message.getAttributes().get(getTimestampLabel())));
-            }
             c.outputWithTimestamp(
-                CoderUtils.decodeFromByteArray(getCoder(), message.decodeData()), timestamp);
+                CoderUtils.decodeFromByteArray(getCoder(), message.decodeData()),
+                assignMessageTimestamp(message, getTimestampLabel(), Clock.SYSTEM));
           }
         }
       }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/PubsubIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/PubsubIOTest.java
index ad0fcafdcd2d9..8e7ad29bbe244 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/PubsubIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/PubsubIOTest.java
@@ -18,12 +18,21 @@
 
 import static org.junit.Assert.assertEquals;
 
+import com.google.api.client.testing.http.FixedClock;
+import com.google.api.client.util.Clock;
+import com.google.api.services.pubsub.model.PubsubMessage;
+
+import org.joda.time.Instant;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.util.HashMap;
+
+import javax.annotation.Nullable;
+
 /**
  * Tests for PubsubIO Read and Write transforms.
  */
@@ -73,4 +82,152 @@ public void testTopicValidationTooLong() throws Exception {
         .append("1111111111111111111111111111111111111111111111111111111111111111111111111111")
         .toString());
   }
+
+  /**
+   * Helper function that creates a {@link PubsubMessage} with the given timestamp registered as
+   * an attribute with the specified label.
+   *
+   * <p>If {@code label} is {@code null}, then the attributes are {@code null}.
+   *
+   * <p>Else, if {@code timestamp} is {@code null}, then attributes are present but have no key for
+   * the label.
+   */
+  private static PubsubMessage messageWithTimestamp(
+      @Nullable String label, @Nullable String timestamp) {
+    PubsubMessage message = new PubsubMessage();
+    if (label == null) {
+      message.setAttributes(null);
+      return message;
+    }
+
+    message.setAttributes(new HashMap<String, String>());
+
+    if (timestamp == null) {
+      return message;
+    }
+
+    message.getAttributes().put(label, timestamp);
+    return message;
+  }
+
+  /**
+   * Helper function that parses the given string to a timestamp through the PubSubIO plumbing.
+   */
+  private static Instant parseTimestamp(@Nullable String timestamp) {
+    PubsubMessage message = messageWithTimestamp("mylabel", timestamp);
+    return PubsubIO.assignMessageTimestamp(message, "mylabel", Clock.SYSTEM);
+  }
+
+  @Test
+  public void noTimestampLabelReturnsNow() {
+    final long time = 987654321L;
+    Instant timestamp = PubsubIO.assignMessageTimestamp(
+        messageWithTimestamp(null, null), null, new FixedClock(time));
+
+    assertEquals(new Instant(time), timestamp);
+  }
+
+  @Test
+  public void timestampLabelWithNullAttributesThrowsError() {
+    PubsubMessage message = messageWithTimestamp(null, null);
+    thrown.expect(RuntimeException.class);
+    thrown.expectMessage("PubSub message is missing a timestamp in label: myLabel");
+
+    PubsubIO.assignMessageTimestamp(message, "myLabel", Clock.SYSTEM);
+  }
+
+  @Test
+  public void timestampLabelSetWithMissingAttributeThrowsError() {
+    PubsubMessage message = messageWithTimestamp("notMyLabel", "ignored");
+    thrown.expect(RuntimeException.class);
+    thrown.expectMessage("PubSub message is missing a timestamp in label: myLabel");
+
+    PubsubIO.assignMessageTimestamp(message, "myLabel", Clock.SYSTEM);
+  }
+
+  @Test
+  public void timestampLabelParsesMillisecondsSinceEpoch() {
+    Long millis = 1446162101123L;
+    assertEquals(new Instant(millis), parseTimestamp(millis.toString()));
+  }
+
+  @Test
+  public void timestampLabelParsesRfc3339Seconds() {
+    String rfc3339 = "2015-10-29T23:41:41Z";
+    assertEquals(Instant.parse(rfc3339), parseTimestamp(rfc3339));
+  }
+
+  @Test
+  public void timestampLabelParsesRfc3339Tenths() {
+    String rfc3339tenths = "2015-10-29T23:41:41.1Z";
+    assertEquals(Instant.parse(rfc3339tenths), parseTimestamp(rfc3339tenths));
+  }
+
+  @Test
+  public void timestampLabelParsesRfc3339Hundredths() {
+    String rfc3339hundredths = "2015-10-29T23:41:41.12Z";
+    assertEquals(Instant.parse(rfc3339hundredths), parseTimestamp(rfc3339hundredths));
+  }
+
+  @Test
+  public void timestampLabelParsesRfc3339Millis() {
+    String rfc3339millis = "2015-10-29T23:41:41.123Z";
+    assertEquals(Instant.parse(rfc3339millis), parseTimestamp(rfc3339millis));
+  }
+
+  @Test
+  public void timestampLabelParsesRfc3339Micros() {
+    String rfc3339micros = "2015-10-29T23:41:41.123456Z";
+    assertEquals(Instant.parse(rfc3339micros), parseTimestamp(rfc3339micros));
+    // Note: micros part 456/1000 is dropped.
+    assertEquals(Instant.parse("2015-10-29T23:41:41.123Z"), parseTimestamp(rfc3339micros));
+  }
+
+  @Test
+  public void timestampLabelParsesRfc3339MicrosRounding() {
+    String rfc3339micros = "2015-10-29T23:41:41.123999Z";
+    assertEquals(Instant.parse(rfc3339micros), parseTimestamp(rfc3339micros));
+    // Note: micros part 999/1000 is dropped, not rounded up.
+    assertEquals(Instant.parse("2015-10-29T23:41:41.123Z"), parseTimestamp(rfc3339micros));
+  }
+
+  @Test
+  public void timestampLabelWithInvalidFormatThrowsError() {
+    thrown.expect(NumberFormatException.class);
+    parseTimestamp("not-a-timestamp");
+  }
+
+  @Test
+  public void timestampLabelWithInvalidFormat2ThrowsError() {
+    thrown.expect(NumberFormatException.class);
+    parseTimestamp("null");
+  }
+
+  @Test
+  public void timestampLabelWithInvalidFormat3ThrowsError() {
+    thrown.expect(NumberFormatException.class);
+    parseTimestamp("2015-10");
+  }
+
+  @Test
+  public void timestampLabelParsesRfc3339WithSmallYear() {
+    // Google and JodaTime agree on dates after 1582-10-15, when the Gregorian Calendar was adopted
+    // This is therefore a "small year" until this difference is reconciled.
+    String rfc3339SmallYear = "1582-10-15T01:23:45.123Z";
+    assertEquals(Instant.parse(rfc3339SmallYear), parseTimestamp(rfc3339SmallYear));
+  }
+
+  @Test
+  public void timestampLabelParsesRfc3339WithLargeYear() {
+    // Year 9999 in range.
+    String rfc3339LargeYear = "9999-10-29T23:41:41.123999Z";
+    assertEquals(Instant.parse(rfc3339LargeYear), parseTimestamp(rfc3339LargeYear));
+  }
+
+  @Test
+  public void timestampLabelRfc3339WithTooLargeYearThrowsError() {
+    thrown.expect(NumberFormatException.class);
+    // Year 10000 out of range.
+    parseTimestamp("10000-10-29T23:41:41.123999Z");
+  }
 }

From 5b6ed0544a4b81a6f28c6b33384c45f357fdc909 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 8 Jul 2015 10:51:40 -0700
Subject: [PATCH 1192/1541] Add customized output timestamp for GroupByKey
 result

This addresses a problem whereby the output watermark may be held longer
than neeeded/desired, depending on how combined data is going to be
used.

By default, in a GroupByKey (and window) the collection of grouped values
for a window receives a timestamp of the minimum non-late data, and
the output watermark never advances past this time until it is output.
This ensures that timestamps extracted from the collection data are
never late relative to the watermark.

There are situations where this is not desirable. For example,
with session windows the minimum non-late timestamp for a window may
be indefinitely far in the past, hence the output watermark
may be held up indefinitely, preventing later shorter-lived sessions
from being closed and processed. A related issue affects sliding windows
but a one-off solution is already in place.

After this experimental addition, the behavior may be customized by
providing an OutputTimeFn to Window.into(...).withOutputTimeFn(...).
There are three policies provided in OutputTimeFns:

 - output at the earliest non-late input timestamp
 - output at the latest non-late input timestamp
 - output at the end of the window

Either of the latest non-late input timestamp or the end of window
allow session windows to close in a timely manner. Start at the javadoc
for OutputTimeFn for a full description.

----Release Notes----
 - Added customized output timestamps for GroupByKey results. This allows,
   in particular, setting output timestamps to a later time in order to
   allow the output watermark to progress, rather than being held
   at the earliest timestamp of any buffered input.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108627978
---
 .../sdk/annotations/Experimental.java         |   5 +-
 .../worker/StreamingSideInputDoFnRunner.java  |   3 +-
 .../worker/WindmillStateInternals.java        | 106 +++-
 .../sdk/testing/WindowFnTestUtils.java        |  90 ++-
 .../transforms/windowing/CalendarWindows.java |   6 +-
 .../transforms/windowing/OutputTimeFn.java    | 327 +++++++++++
 .../transforms/windowing/OutputTimeFns.java   | 168 ++++++
 .../windowing/PartitioningWindowFn.java       |   4 +-
 .../sdk/transforms/windowing/Sessions.java    |  14 +-
 .../transforms/windowing/SlidingWindows.java  |  25 +-
 .../sdk/transforms/windowing/Window.java      |  62 +-
 .../sdk/transforms/windowing/WindowFn.java    |  88 ++-
 .../GroupAlsoByWindowsAndCombineDoFn.java     |  86 +--
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  18 +-
 .../GroupAlsoByWindowsViaIteratorsDoFn.java   |  14 +-
 .../sdk/util/ReduceFnContextFactory.java      |   5 +-
 .../dataflow/sdk/util/ReduceFnRunner.java     |   3 +
 .../dataflow/sdk/util/WatermarkHold.java      |  58 +-
 .../dataflow/sdk/util/WindowingStrategy.java  |  54 +-
 .../util/state/InMemoryStateInternals.java    |  45 +-
 .../state/MergedWatermarkStateInternal.java   |  57 +-
 .../sdk/util/state/MergingStateInternals.java |  22 +-
 .../sdk/util/state/StateInternals.java        |  16 +-
 .../dataflow/sdk/util/state/StateTag.java     |  12 +-
 .../dataflow/sdk/util/state/StateTags.java    |  26 +-
 .../util/state/WatermarkStateInternal.java    |  23 +-
 .../worker/WindmillStateInternalsTest.java    | 254 +++++++--
 .../sdk/transforms/GroupByKeyTest.java        |  59 ++
 .../transforms/windowing/SessionsTest.java    |  48 +-
 .../sdk/transforms/windowing/WindowTest.java  |  64 ++-
 .../GroupAlsoByWindowsAndCombineDoFnTest.java |  14 +-
 .../util/GroupAlsoByWindowsProperties.java    | 528 +++++++++++++-----
 ...roupAlsoByWindowsViaIteratorsDoFnTest.java |  22 +
 ...pAlsoByWindowsViaOutputBufferDoFnTest.java |  44 ++
 .../dataflow/sdk/util/TriggerTester.java      |   8 +-
 .../state/InMemoryStateInternalsTest.java     | 118 +++-
 .../dataflow/sdk/util/state/StateTagTest.java |  15 +-
 37 files changed, 2112 insertions(+), 399 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFn.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFns.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java
index a75a5c9980a28..f094442ec3c08 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java
@@ -69,6 +69,9 @@ public enum Kind {
     CODER_ENCODING_ID,
 
     /** State-related experimental APIs. */
-    STATE
+    STATE,
+
+    /** Experimental APIs related to customizing the output time for computed values. */
+    OUTPUT_TIME
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
index 4e4874bf8b878..85e79b6438073 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
@@ -107,7 +107,8 @@ public StreamingSideInputDoFnRunner(
     this.elementsAddr = StateTags.makeSystemTagInternal(StateTags.bag("elem",
         WindowedValue.getFullCoder(doFnInfo.getInputCoder(), windowFn.windowCoder())));
     this.watermarkHoldingAddr =
-        StateTags.makeSystemTagInternal(StateTags.watermarkStateInternal("hold"));
+        StateTags.makeSystemTagInternal(StateTags.watermarkStateInternal("hold",
+            doFnInfo.getWindowingStrategy().getOutputTimeFn()));
 
     this.blockedMap = stepContext.stateInternals().state(StateNamespaces.global(), blockedMapAddr)
         .get().read();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
index c91644d77aa72..6f0b44c4c4559 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
@@ -18,6 +18,8 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.state.BagState;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
@@ -64,10 +66,15 @@ public <T> BagState<T> bindBag(StateTag<BagState<T>> address, Coder<T> elemCoder
             }
 
             @Override
-            public <T> WatermarkStateInternal bindWatermark(
-                StateTag<WatermarkStateInternal> address) {
+            public <T, W extends BoundedWindow> WatermarkStateInternal bindWatermark(
+                StateTag<WatermarkStateInternal> address,
+                OutputTimeFn<? super W> outputTimeFn) {
               return new WindmillWatermarkState(
-                  encodeKey(namespace, address), stateFamily, reader, scopedReadStateSupplier);
+                  encodeKey(namespace, address),
+                  stateFamily,
+                  reader,
+                  scopedReadStateSupplier,
+                  outputTimeFn);
             }
 
             @Override
@@ -350,6 +357,7 @@ public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) throws
 
   private static class WindmillWatermarkState implements WatermarkStateInternal, WindmillState {
 
+    private final OutputTimeFn<?> outputTimeFn;
     private final ByteString stateKey;
     private final String stateFamily;
     private final WindmillStateReader reader;
@@ -358,12 +366,17 @@ private static class WindmillWatermarkState implements WatermarkStateInternal, W
     private boolean cleared = false;
     private Instant localAdditions = null;
 
-    private WindmillWatermarkState(ByteString stateKey, String stateFamily,
-        WindmillStateReader reader, Supplier<StateSampler.ScopedState> readStateSupplier) {
+    private WindmillWatermarkState(
+        ByteString stateKey,
+        String stateFamily,
+        WindmillStateReader reader,
+        Supplier<StateSampler.ScopedState> readStateSupplier,
+        OutputTimeFn<?> outputTimeFn) {
       this.stateKey = stateKey;
       this.stateFamily = stateFamily;
       this.reader = reader;
       this.readStateSupplier = readStateSupplier;
+      this.outputTimeFn = outputTimeFn;
     }
 
     @Override
@@ -372,6 +385,15 @@ public void clear() {
       localAdditions = null;
     }
 
+    /**
+     * {@inheritDoc}
+     *
+     * <p>Does nothing. There is only one hold and it is not extraneous.
+     * See {@link MergedWatermarkStateInternal} for a nontrivial implementation.
+     */
+    @Override
+    public void releaseExtraneousHolds() { }
+
     @Override
     public StateContents<Instant> get() {
       // If we clear after calling get() but before calling read(), technically we didn't need the
@@ -386,11 +408,9 @@ public StateContents<Instant> get() {
         public Instant read() {
           Instant value = localAdditions;
           if (!cleared) {
-          try (StateSampler.ScopedState scope = readStateSupplier.get()) {
+            try (StateSampler.ScopedState scope = readStateSupplier.get()) {
               Instant persisted = persistedData.get();
-              if (value == null || (persisted != null && persisted.isBefore(value))) {
-                value = persisted;
-              }
+              value = (value == null) ? persisted : outputTimeFn.combine(value, persisted);
             } catch (InterruptedException | ExecutionException e) {
               throw new RuntimeException("Unable to read state", e);
             }
@@ -422,28 +442,80 @@ public Boolean read() {
     }
 
     @Override
-    public void add(Instant watermarkHold) {
-      if (localAdditions == null || watermarkHold.isBefore(localAdditions)) {
-        localAdditions = watermarkHold;
-      }
+    public void add(Instant outputTime) {
+      localAdditions = (localAdditions == null) ? outputTime
+          : outputTimeFn.combine(outputTime, localAdditions);
     }
 
     @Override
     public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) {
-      // If we do a delete, we need to have done a read
-      if (cleared) {
-        reader.watermarkFuture(stateKey, stateFamily);
+      if (!cleared && localAdditions == null) {
+        // Nothing to do
+        return;
+      } else if (cleared && localAdditions == null) {
+        // Just clearing the persisted state; blind delete
         commitBuilder.addWatermarkHoldsBuilder()
             .setTag(stateKey)
             .setStateFamily(stateFamily)
             .setReset(true);
+
+      } else if (cleared && localAdditions != null) {
+        // Since we cleared before adding, we can do a blind overwrite of persisted state
+        commitBuilder.addWatermarkHoldsBuilder()
+            .setTag(stateKey)
+            .setStateFamily(stateFamily)
+            .setReset(true)
+            .addTimestamps(TimeUnit.MILLISECONDS.toMicros(localAdditions.getMillis()));
+      } else if (!cleared && localAdditions != null){
+        // Otherwise, we need to combine the local additions with the already persisted data
+        combineWithPersisted(commitBuilder);
+      } else {
+        throw new IllegalStateException("Unreachable condition");
       }
+    }
 
-      if (localAdditions != null) {
+    /**
+     * Combines local additions with persisted data and mutates the {@code commitBuilder}
+     * to write the result.
+     */
+    private void combineWithPersisted(Windmill.WorkItemCommitRequest.Builder commitBuilder) {
+      boolean windmillCanCombine = false;
+
+      // If the combined output time depends only on the window, then we are just blindly adding
+      // the same value that may or may not already be present. This depends on the state only being
+      // used for one window.
+      windmillCanCombine |= outputTimeFn.dependsOnlyOnWindow();
+
+      // If the combined output time depends only on the earliest input timestamp, then because
+      // assignOutputTime is monotonic, the hold only depends on the earliest output timestamp
+      // (which is the value submitted as a watermark hold). The only way holds for later inputs
+      // can be redundant is if the are later (or equal) to the earliest. So taking the MIN
+      // implicitly, as Windmill does, has the desired behavior.
+      windmillCanCombine |= outputTimeFn.dependsOnlyOnEarliestInputTimestamp();
+
+      if (windmillCanCombine) {
+        // We do a blind write and let Windmill take the MIN
         commitBuilder.addWatermarkHoldsBuilder()
             .setTag(stateKey)
             .setStateFamily(stateFamily)
             .addTimestamps(TimeUnit.MILLISECONDS.toMicros(localAdditions.getMillis()));
+      } else {
+        // The non-fast path does a read-modify-write
+        Instant priorHold;
+        try {
+          priorHold = reader.watermarkFuture(stateKey, stateFamily).get();
+        } catch (InterruptedException | ExecutionException e) {
+          throw new RuntimeException("Unable to read state", e);
+        }
+
+        Instant combinedHold = (priorHold == null) ? localAdditions
+            : outputTimeFn.combine(priorHold, localAdditions);
+
+        commitBuilder.addWatermarkHoldsBuilder()
+            .setTag(stateKey)
+            .setStateFamily(stateFamily)
+            .setReset(true)
+            .addTimestamps(TimeUnit.MILLISECONDS.toMicros(combinedHold.getMillis()));
       }
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
index 300e1c5ff11e4..dc0baf52b81de 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
@@ -16,13 +16,19 @@
 
 package com.google.cloud.dataflow.sdk.testing;
 
+import static org.hamcrest.Matchers.greaterThan;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 
 import org.joda.time.Instant;
+import org.joda.time.ReadableInstant;
 
 import java.util.ArrayList;
 import java.util.Collection;
@@ -34,6 +40,8 @@
 import java.util.Map;
 import java.util.Set;
 
+import javax.annotation.Nullable;
+
 /**
  * A utility class for testing {@link WindowFn}s.
  */
@@ -51,7 +59,6 @@ public static Set<String> set(long... timestamps) {
     return result;
   }
 
-
   /**
    * Runs the {@link WindowFn} over the provided input, returning a map
    * of windows to the timestamps in those windows.
@@ -193,7 +200,7 @@ public static <T, W extends BoundedWindow> void validateNonInterferingOutputTime
 
     Instant instant = new Instant(timestamp);
     for (W window : windows) {
-      Instant outputTimestamp = windowFn.getOutputTime(instant, window);
+      Instant outputTimestamp = windowFn.getOutputTimeFn().assignOutputTime(instant, window);
       assertFalse("getOutputTime must be greater than or equal to input timestamp",
           outputTimestamp.isBefore(instant));
       assertFalse("getOutputTime must be less than or equal to the max timestamp",
@@ -203,8 +210,9 @@ public static <T, W extends BoundedWindow> void validateNonInterferingOutputTime
 
   /**
    * Assigns the given {@code timestamp} to windows using the specified {@code windowFn}, and
-   * verifies that result of {@code windowFn.getOutputTimestamp} for later windows (as defined by
-   * {@code maxTimestamp} won't prevent the watermark from passing the end of earlier windows.
+   * verifies that result of {@link WindowFn#getOutputTime windowFn.getOutputTime} for later windows
+   * (as defined by {@code maxTimestamp} won't prevent the watermark from passing the end of earlier
+   * windows.
    *
    * <p>This verifies that overlapping windows don't interfere at all. Depending on the
    * {@code windowFn} this may be stricter than desired.
@@ -223,7 +231,7 @@ public int compare(BoundedWindow o1, BoundedWindow o2) {
     Instant instant = new Instant(timestamp);
     Instant endOfPrevious = null;
     for (W window : sortedWindows) {
-      Instant outputTimestamp = windowFn.getOutputTime(instant, window);
+      Instant outputTimestamp = windowFn.getOutputTimeFn().assignOutputTime(instant, window);
       if (endOfPrevious == null) {
         // If this is the first window, the output timestamp can be anything, as long as it is in
         // the valid range.
@@ -242,4 +250,76 @@ public int compare(BoundedWindow o1, BoundedWindow o2) {
       endOfPrevious = window.maxTimestamp();
     }
   }
+
+  /**
+   * Verifies that later-ending merged windows from any of the timestamps hold up output of
+   * earlier-ending windows, using the provided {@link WindowFn} and {@link OutputTimeFn}.
+   *
+   * <p>Given a list of lists of timestamps, where each list is expected to merge into a single
+   * window with end times in ascending order, assigns and merges windows for each list (as though
+   * each were a separate key/user session). Then maps each timestamp in the list according to
+   * {@link OutputTimeFn#assignOutputTime outputTimeFn.assignOutputTime()} and
+   * {@link OutputTimeFn#combine outputTimeFn.combine()}.
+   *
+   * <p>Verifies that a overlapping windows do not hold each other up via the watermark.
+   */
+  public static <T, W extends IntervalWindow>
+  void validateGetOutputTimestamps(
+      WindowFn<T, W> windowFn,
+      OutputTimeFn<? super W> outputTimeFn,
+      List<List<Long>> timestampsPerWindow) throws Exception {
+
+    // Assign windows to each timestamp, then merge them, storing the merged windows in
+    // a list in corresponding order to timestampsPerWindow
+    final List<W> windows = new ArrayList<>();
+    for (List<Long> timestampsForWindow : timestampsPerWindow) {
+      final Set<W> windowsToMerge = new HashSet<>();
+
+      for (long timestamp : timestampsForWindow) {
+        windowsToMerge.addAll(
+            WindowFnTestUtils.<T, W>assignedWindows(windowFn, timestamp));
+      }
+
+      windowFn.mergeWindows(windowFn.new MergeContext() {
+        @Override
+        public Collection<W> windows() {
+          return windowsToMerge;
+        }
+
+        @Override
+        public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
+          windows.add(mergeResult);
+        }
+      });
+    }
+
+    // Map every list of input timestamps to an output timestamp
+    final List<Instant> combinedOutputTimestamps = new ArrayList<>();
+    for (int i = 0; i < timestampsPerWindow.size(); ++i) {
+      List<Long> timestampsForWindow = timestampsPerWindow.get(i);
+      W window = windows.get(i);
+
+      List<Instant> outputInstants = new ArrayList<>();
+      for (long inputTimestamp : timestampsForWindow) {
+        outputInstants.add(outputTimeFn.assignOutputTime(new Instant(inputTimestamp), window));
+      }
+
+      combinedOutputTimestamps.add(OutputTimeFns.combineOutputTimes(outputTimeFn, outputInstants));
+    }
+
+    // Consider windows in increasing order of max timestamp; ensure the output timestamp is after
+    // the max timestamp of the previous
+    @Nullable W earlierEndingWindow = null;
+    for (int i = 0; i < windows.size(); ++i) {
+      W window = windows.get(i);
+      ReadableInstant outputTimestamp = combinedOutputTimestamps.get(i);
+
+      if (earlierEndingWindow != null) {
+        assertThat(outputTimestamp,
+            greaterThan((ReadableInstant) earlierEndingWindow.maxTimestamp()));
+      }
+
+      earlierEndingWindow = window;
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
index 689ebf247e480..de5140f2a5d60 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
@@ -131,7 +131,7 @@ public Coder<IntervalWindow> windowCoder() {
     }
 
     @Override
-    public boolean isCompatible(WindowFn other) {
+    public boolean isCompatible(WindowFn<?, ?> other) {
       if (!(other instanceof DaysWindows)) {
         return false;
       }
@@ -217,7 +217,7 @@ public Coder<IntervalWindow> windowCoder() {
     }
 
     @Override
-    public boolean isCompatible(WindowFn other) {
+    public boolean isCompatible(WindowFn<?, ?> other) {
       if (!(other instanceof MonthsWindows)) {
         return false;
       }
@@ -312,7 +312,7 @@ public Coder<IntervalWindow> windowCoder() {
     }
 
     @Override
-    public boolean isCompatible(WindowFn other) {
+    public boolean isCompatible(WindowFn<?, ?> other) {
       if (!(other instanceof YearsWindows)) {
         return false;
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFn.java
new file mode 100644
index 0000000000000..f97cd8589f0e1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFn.java
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.common.collect.Ordering;
+
+import org.joda.time.Instant;
+
+import java.io.Serializable;
+import java.util.Objects;
+
+/**
+ * <b><i>(Experimental)</i></b> A function from timestamps of input values to the timestamp for a
+ * computed value.
+ *
+ * <p>The function is represented via three components:
+ * <ol>
+ *   <li>{@link #assignOutputTime} calculates an output timestamp for any input
+ *       value in a particular window.</li>
+ *   <li>The output timestamps for all non-late input values within a window are combined
+ *       according to {@link #combine combine()}, a commutative and associative operation on
+ *       the output timestamps.</li>
+ *   <li>The output timestamp when windows merge is provided by {@link #merge merge()}.</li>
+ * </ol>
+ *
+ * <p>To implement this interface, extend {@link OutputTimeFn.Defaults} or
+ * {@link OutputTimeFn.DependsOnlyOnWindow} or your implementation may be impacted when the
+ * interface is enlarged. This interface will only be enlarged in ways that are
+ * backwards-compatible for consumers. The base classes will only be changed in ways that
+ * are backwards-compatible for implementors as well.
+ *
+ * <p>Note that as long as the interface remains experimental, we may choose to change it in
+ * arbitrary backwards incompatible ways if it is indicated by the experiment.
+ *
+ * @param <W> the type of window. Contravariant: methods accepting any subtype of
+ * {@code OutputTimeFn<W>} should use the parameter type {@code OutputTimeFn<? super W>}.
+ */
+@Experimental(Experimental.Kind.OUTPUT_TIME)
+public interface OutputTimeFn<W extends BoundedWindow> extends Serializable {
+
+  /**
+   * Returns the output timestamp to use for data depending on the given
+   * {@code inputTimestamp} in the specified {@code window}.
+   *
+   *
+   * <p>The result of this method must be between {@code inputTimestamp} and
+   * {@code window.maxTimestamp()} (inclusive on both sides).
+   *
+   * <p>This function must be monotonic across input timestamps. Specifically, if {@code A < B},
+   * then {@code assignOutputTime(A, window) <= assignOutputTime(B, window)}.
+   *
+   * <p>For a {@link WindowFn} that doesn't produce overlapping windows, this can (and typically
+   * should) just return {@code inputTimestamp}. In the presence of overlapping windows, it is
+   * suggested that the result in later overlapping windows is past the end of earlier windows
+   * so that the later windows don't prevent the watermark from
+   * progressing past the end of the earlier window.
+   *
+   * <p>See the overview of {@link OutputTimeFn} for the consistency properties required
+   * between {@link #assignOutputTime}, {@link #combine}, and {@link #merge}.
+   */
+  Instant assignOutputTime(Instant inputTimestamp, W window);
+
+  /**
+   * Combines the given output times, which must be from the same window, into an output time
+   * for a computed value.
+   *
+   * <ul>
+   *   <li>{@code combine} must be commutative: {@code combine(a, b).equals(combine(b, a))}.</li>
+   *   <li>{@code combine} must be associative:
+   *       {@code combine(a, combine(b, c)).equals(combine(combine(a, b), c))}.</li>
+   * </ul>
+   */
+  Instant combine(Instant outputTime, Instant otherOutputTime);
+
+  /**
+   * Merges the given output times, presumed to be combined output times for windows that
+   * are merging, into an output time for the {@code resultWindow}.
+   *
+   * <p>When windows {@code w1} and {@code w2} merge to become a new window {@code w1plus2},
+   * then {@link #merge} must be implemented such that the output time is the same as
+   * if all timestamps were assigned in {@code w1plus2}. Formally:
+   *
+   * <p>{@code fn.merge(w, fn.assignOutputTime(t1, w1), fn.assignOutputTime(t2, w2))}
+   *
+   * <p>must be equal to
+   *
+   * <p>{@code fn.combine(fn.assignOutputTime(t1, w1plus2), fn.assignOutputTime(t2, w1plus2))}
+   *
+   * <p>If the assigned time depends only on the window, the correct implementation of
+   * {@link #merge merge()} necessarily returns the result of
+   * {@link #assignOutputTime assignOutputTime(t1, w1plus2)}
+   * (which equals {@link #assignOutputTime assignOutputTime(t2, w1plus2)}.
+   * Defaults for this case are provided by {@link DependsOnlyOnWindow}.
+   *
+   * <p>For many other {@link OutputTimeFn} implementations, such as taking the earliest or latest
+   * timestamp, this will be the same as {@link #combine combine()}. Defaults for this
+   * case are provided by {@link Default}.
+   */
+  Instant merge(W intoWindow, Iterable<? extends Instant> mergingTimestamps);
+
+  /**
+   * Returns {@code true} if the result of combination of many output timestamps actually depends
+   * only on the earliest.
+   *
+   * <p>This may allow optimizations when it is very efficient to retrieve the earliest timestamp
+   * to be combined.
+   */
+  boolean dependsOnlyOnEarliestInputTimestamp();
+
+  /**
+   * Returns {@code true} if the result does not depend on what outputs were combined but only
+   * the window they are in. The canonical example is if all timestamps are sure to
+   * be the end of the window.
+   *
+   * <p>This may allow optimizations, since it is typically very efficient to retrieve the window
+   * and combining output timestamps is not necessary.
+   *
+   * <p>If the assigned output time for an implementation depends only on the window, consider
+   * extending {@link DependsOnlyOnWindow}, which returns {@code true} here and also provides
+   * a framework for easily implementing a correct {@link #merge}, {@link #combine} and
+   * {@link #assignOutputTime}.
+   */
+  boolean dependsOnlyOnWindow();
+
+  /**
+   * Please extend {@link Defaults} or {@link DependsOnlyOnWindow} if you want guaranteed
+   * compilation compatibility; this interface may be enlarged in consumer-compatible ways.
+   */
+  void pleaseExtendBaseClassesForCompilationCompatibility();
+
+  /**
+   * <b><i>(Experimental)</i></b> Default method implementations for {@link OutputTimeFn} where the
+   * output time depends on the input element timestamps and possibly the window.
+   *
+   * <p>To complete an implementation, override {@link #assignOutputTime}, at a minimum.
+   *
+   * <p>By default, {@link #combine} and {@link #merge} return the earliest timestamp of their
+   * inputs.
+   */
+  public abstract static class Defaults<W extends BoundedWindow> implements OutputTimeFn<W> {
+
+    /**
+     * {@inheritDoc}
+     *
+     * @return the earlier of the two timestamps.
+     */
+    @Override
+    public Instant combine(Instant outputTimestamp, Instant otherOutputTimestamp) {
+      return Ordering.natural().min(outputTimestamp, otherOutputTimestamp);
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * @return the result of {@link #combine combine(outputTimstamp, otherOutputTimestamp)},
+     * by default.
+     */
+    @Override
+    public Instant merge(W resultWindow, Iterable<? extends Instant> mergingTimestamps) {
+      return OutputTimeFns.combineOutputTimes(this, mergingTimestamps);
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * @return {@code false}. An {@link OutputTimeFn} that depends only on the window should extend
+     * {@link DependsOnlyOnWindow}.
+     */
+    @Override
+    public final boolean dependsOnlyOnWindow() {
+      return false;
+    }
+
+      /**
+       * {@inheritDoc}
+       *
+       * @return {@code true} by default.
+       */
+    @Override
+    public boolean dependsOnlyOnEarliestInputTimestamp() {
+      return false;
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * @return {@code true} if the two {@link OutputTimeFn} instances have the same class, by
+     *         default.
+     */
+    @Override
+    public boolean equals(Object other) {
+      if (other == null) {
+        return false;
+      }
+
+      return this.getClass().equals(other.getClass());
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(getClass());
+    }
+
+    /**
+     * This base class provides compilation compatibility when {@link OutputTimeFn} is enlarged.
+     */
+    @Override
+    public void pleaseExtendBaseClassesForCompilationCompatibility() { }
+  }
+
+  /**
+   * <b><i>(Experimental)</i></b> Default method implementations for {@link OutputTimeFn} when the
+   * output time depends only on the window.
+   *
+   * <p>To complete an implementation, override {@link #assignOutputTime(BoundedWindow)}.
+   */
+  public abstract static class DependsOnlyOnWindow<W extends BoundedWindow>
+      implements OutputTimeFn<W> {
+
+    /**
+     * Returns the output timestamp to use for data in the specified {@code window}.
+     *
+     * <p>Note that the result of this method must be between the maximum possible input timestamp
+     * in {@code window} and {@code window.maxTimestamp()} (inclusive on both sides).
+     *
+     * <p>For example, using {@code Sessions.withGapDuration(gapDuration)}, we know that all input
+     * timestamps must lie at least {@code gapDuration} from the end of the session, so
+     * {@code window.maxTimestamp() - gapDuration} is an acceptable assigned timestamp.
+     *
+     * @see #assignOutputTime(Instant, BoundedWindow)
+     */
+    protected abstract Instant assignOutputTime(W window);
+
+    /**
+     * {@inheritDoc}
+     *
+     * @return the result of {#link assignOutputTime(BoundedWindow) assignOutputTime(window)}.
+     */
+    @Override
+    public final Instant assignOutputTime(Instant timestamp, W window) {
+      return assignOutputTime(window);
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * @return the same timestamp as both argument timestamps, which are necessarily equal.
+     */
+    @Override
+    public final Instant combine(Instant outputTimestamp, Instant otherOutputTimestamp) {
+      return outputTimestamp;
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * @return the result of
+     * {@link #assignOutputTime(BoundedWindow) assignOutputTime(resultWindow)}.
+     */
+    @Override
+    public final Instant merge(W resultWindow, Iterable<? extends Instant> mergingTimestamps) {
+      return assignOutputTime(resultWindow);
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * @return {@code true}.
+     */
+    @Override
+    public final boolean dependsOnlyOnWindow() {
+      return true;
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * @return {@code true}. Since the output time depends only on the window, it can
+     * certainly be ascertained given a single input timestamp.
+     */
+    @Override
+    public final boolean dependsOnlyOnEarliestInputTimestamp() {
+      return true;
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * @return {@code true} if the two {@link OutputTimeFn} instances have the same class, by
+     *         default.
+     */
+    @Override
+    public boolean equals(Object other) {
+      if (other == null) {
+        return false;
+      }
+
+      return this.getClass().equals(other.getClass());
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(getClass());
+    }
+
+    /**
+     * This base class provides compilation compatibility when {@link OutputTimeFn} is enlarged.
+     */
+    @Override
+    public void pleaseExtendBaseClassesForCompilationCompatibility() { }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFns.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFns.java
new file mode 100644
index 0000000000000..dcc0f5b7b9c2e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFns.java
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Ordering;
+
+import org.joda.time.Instant;
+
+import javax.annotation.Nullable;
+
+/**
+ * <b><i>(Experimental)</i></b> Static utility methods and provided implementations for
+ * {@link OutputTimeFn}.
+ */
+@Experimental(Experimental.Kind.OUTPUT_TIME)
+public class OutputTimeFns {
+  /**
+   * The policy of outputting at the earliest of the input timestamps for non-late input data
+   * that led to a computed value.
+   *
+   * <p>For example, suppose <i>v</i><sub>1</sub> through <i>v</i><sub>n</sub> are all on-time
+   * elements being aggregated via some function {@code f} into
+   * {@code f}(<i>v</i><sub>1</sub>, ..., <i>v</i><sub>n</sub>. When emitted, the output
+   * timestamp of the result will be the earliest of the event time timestamps
+   *
+   * <p>If data arrives late, it has no effect on the output timestamp.
+   */
+  public static OutputTimeFn<BoundedWindow> outputAtEarliestInputTimestamp() {
+    return new OutputAtEarliestInputTimestamp();
+  }
+
+  /**
+   * The policy of holding the watermark to the latest of the input timestamps
+   * for non-late input data that led to a computed value.
+   *
+   * <p>For example, suppose <i>v</i><sub>1</sub> through <i>v</i><sub>n</sub> are all on-time
+   * elements being aggregated via some function {@code f} into
+   * {@code f}(<i>v</i><sub>1</sub>, ..., <i>v</i><sub>n</sub>. When emitted, the output
+   * timestamp of the result will be the latest of the event time timestamps
+   *
+   * <p>If data arrives late, it has no effect on the output timestamp.
+   */
+  public static OutputTimeFn<BoundedWindow> outputAtLatestInputTimestamp() {
+    return new OutputAtLatestInputTimestamp();
+  }
+
+  /**
+   * The policy of outputting with timestamps at the end of the window.
+   *
+   * <p>Note that this output timestamp depends only on the window. See
+   * {#link dependsOnlyOnWindow()}.
+   *
+   * <p>When windows merge, instead of using {@link OutputTimeFn#combine} to obtain an output
+   * timestamp for the results in the new window, it is mandatory to obtain a new output
+   * timestamp from {@link OutputTimeFn#assignOutputTime} with the new window and an arbitrary
+   * timestamp (because it is guaranteed that the timestamp is irrelevant).
+   *
+   * <p>For non-merging window functions, this {@link OutputTimeFn} works transparently.
+   */
+  public static OutputTimeFn<BoundedWindow> outputAtEndOfWindow() {
+    return new OutputAtEndOfWindow();
+  }
+
+  /**
+   * Applies the given {@link OutputTimeFn} to the given output times, obtaining
+   * the output time for a value computed. See {@link OutputTimeFn#combine} for
+   * a full specification.
+   *
+   * @throws IllegalArgumentException if {@code outputTimes} is empty.
+   */
+  public static Instant combineOutputTimes(
+      OutputTimeFn<?> outputTimeFn, Iterable<? extends Instant> outputTimes) {
+    checkArgument(
+        !Iterables.isEmpty(outputTimes),
+        "Collection of output times must not be empty in %s.combineOutputTimes",
+        OutputTimeFns.class.getName());
+
+    @Nullable
+    Instant combinedOutputTime = null;
+    for (Instant outputTime : outputTimes) {
+      combinedOutputTime =
+          combinedOutputTime == null
+              ? outputTime : outputTimeFn.combine(combinedOutputTime, outputTime);
+    }
+    return combinedOutputTime;
+  }
+
+  /**
+   * See {@link #outputAtEarliestInputTimestamp}.
+   */
+  private static class OutputAtEarliestInputTimestamp extends OutputTimeFn.Defaults<BoundedWindow> {
+    @Override
+    public Instant assignOutputTime(Instant inputTimestamp, BoundedWindow window) {
+      return inputTimestamp;
+    }
+
+    @Override
+    public Instant combine(Instant outputTime, Instant otherOutputTime) {
+      return Ordering.natural().min(outputTime, otherOutputTime);
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * @return {@code true}. The result of any combine will be the earliest input timestamp.
+     */
+    @Override
+    public boolean dependsOnlyOnEarliestInputTimestamp() {
+      return true;
+    }
+  }
+
+  /**
+   * See {@link #outputAtLatestInputTimestamp}.
+   */
+  private static class OutputAtLatestInputTimestamp extends OutputTimeFn.Defaults<BoundedWindow> {
+    @Override
+    public Instant assignOutputTime(Instant inputTimestamp, BoundedWindow window) {
+      return inputTimestamp;
+    }
+
+    @Override
+    public Instant combine(Instant outputTime, Instant otherOutputTime) {
+      return Ordering.natural().max(outputTime, otherOutputTime);
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * @return {@code false}.
+     */
+    @Override
+    public boolean dependsOnlyOnEarliestInputTimestamp() {
+      return false;
+    }
+  }
+
+  private static class OutputAtEndOfWindow extends OutputTimeFn.DependsOnlyOnWindow<BoundedWindow> {
+
+    /**
+     *{@inheritDoc}
+     *
+     *@return {@code window.maxTimestamp()}.
+     */
+    @Override
+    protected Instant assignOutputTime(BoundedWindow window) {
+      return window.maxTimestamp();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
index 7be1ebffd3a23..bea0285b61a82 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
@@ -22,8 +22,8 @@
 import java.util.Collection;
 
 /**
- * A {@link WindowFn} that places each value into exactly one window
- * based on its timestamp and never merges windows.
+ * A {@link WindowFn} that places each value into exactly one window based on its timestamp and
+ * never merges windows.
  *
  * @param <T> type of elements being windowed
  * @param <W> window type
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
index c419f7e792eca..da137c1f47f1c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
@@ -16,10 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 
 import org.joda.time.Duration;
-import org.joda.time.Instant;
 
 import java.util.Arrays;
 import java.util.Collection;
@@ -85,13 +86,14 @@ public IntervalWindow getSideInputWindow(BoundedWindow window) {
     throw new UnsupportedOperationException("Sessions is not allowed in side inputs");
   }
 
-  public Duration getGapDuration() {
-    return gapDuration;
+  @Experimental(Kind.OUTPUT_TIME)
+  @Override
+  public OutputTimeFn<? super IntervalWindow> getOutputTimeFn() {
+    return OutputTimeFns.outputAtEarliestInputTimestamp();
   }
 
-  @Override
-  public Instant getOutputTime(Instant inputTimestamp, IntervalWindow window) {
-    return inputTimestamp;
+  public Duration getGapDuration() {
+    return gapDuration;
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
index 3b4c2d71d7051..b0066d6124eba 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 
 import org.joda.time.Duration;
@@ -170,17 +172,28 @@ public Duration getOffset() {
   }
 
   /**
-   * Ensure that later sliding windows have an output time that is past the end of earlier windows.
+   * Ensures that later sliding windows have an output time that is past the end of earlier windows.
    *
    * <p>If this is the earliest sliding window containing {@code inputTimestamp}, that's fine.
    * Otherwise, we pick the earliest time that doesn't overlap with earlier windows.
    */
+  @Experimental(Kind.OUTPUT_TIME)
   @Override
-  public Instant getOutputTime(Instant inputTimestamp, IntervalWindow window) {
-    Instant startOfLastSegment = window.maxTimestamp().minus(period);
-    return startOfLastSegment.isBefore(inputTimestamp)
-        ? inputTimestamp
-        : startOfLastSegment.plus(1);
+  public OutputTimeFn<? super IntervalWindow> getOutputTimeFn() {
+    return new OutputTimeFn.Defaults<BoundedWindow>() {
+      @Override
+      public Instant assignOutputTime(Instant inputTimestamp, BoundedWindow window) {
+        Instant startOfLastSegment = window.maxTimestamp().minus(period);
+        return startOfLastSegment.isBefore(inputTimestamp)
+            ? inputTimestamp
+                : startOfLastSegment.plus(1);
+      }
+
+      @Override
+      public boolean dependsOnlyOnEarliestInputTimestamp() {
+        return true;
+      }
+    };
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index a88fab1ce3832..531392b551b92 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -371,21 +371,23 @@ public static class Bound<T> extends PTransform<PCollection<T>, PCollection<T>>
     @Nullable private final AccumulationMode mode;
     @Nullable private final Duration allowedLateness;
     @Nullable private final ClosingBehavior closingBehavior;
+    @Nullable private final OutputTimeFn<?> outputTimeFn;
 
     private Bound(String name,
         @Nullable WindowFn<? super T, ?> windowFn, @Nullable Trigger<?> trigger,
         @Nullable AccumulationMode mode, @Nullable Duration allowedLateness,
-        ClosingBehavior behavior) {
+        ClosingBehavior behavior, @Nullable OutputTimeFn<?> outputTimeFn) {
       super(name);
       this.windowFn = windowFn;
       this.trigger = trigger;
       this.mode = mode;
       this.allowedLateness = allowedLateness;
       this.closingBehavior = behavior;
+      this.outputTimeFn = outputTimeFn;
     }
 
     private Bound(String name) {
-      this(name, null, null, null, null, null);
+      this(name, null, null, null, null, null, null);
     }
 
     /**
@@ -402,7 +404,8 @@ private Bound<T> into(WindowFn<? super T, ?> windowFn) {
         throw new IllegalArgumentException("Window coders must be deterministic.", e);
       }
 
-      return new Bound<>(name, windowFn, trigger, mode, allowedLateness, closingBehavior);
+      return new Bound<>(
+          name, windowFn, trigger, mode, allowedLateness, closingBehavior, outputTimeFn);
     }
 
     /**
@@ -415,7 +418,8 @@ private Bound<T> into(WindowFn<? super T, ?> windowFn) {
      * explanation.
      */
     public Bound<T> named(String name) {
-      return new Bound<>(name, windowFn, trigger, mode, allowedLateness, closingBehavior);
+      return new Bound<>(
+          name, windowFn, trigger, mode, allowedLateness, closingBehavior, outputTimeFn);
     }
 
     /**
@@ -432,7 +436,13 @@ public Bound<T> named(String name) {
     @Experimental(Kind.TRIGGER)
     public Bound<T> triggering(TriggerBuilder<?> trigger) {
       return new Bound<T>(
-          name, windowFn, trigger.buildTrigger(), mode, allowedLateness, closingBehavior);
+          name,
+          windowFn,
+          trigger.buildTrigger(),
+          mode,
+          allowedLateness,
+          closingBehavior,
+          outputTimeFn);
     }
 
    /**
@@ -444,9 +454,14 @@ public Bound<T> triggering(TriggerBuilder<?> trigger) {
     */
     @Experimental(Kind.TRIGGER)
    public Bound<T> discardingFiredPanes() {
-     return new Bound<T>(name,
-         windowFn, trigger, AccumulationMode.DISCARDING_FIRED_PANES,
-         allowedLateness, closingBehavior);
+     return new Bound<T>(
+         name,
+         windowFn,
+         trigger,
+         AccumulationMode.DISCARDING_FIRED_PANES,
+         allowedLateness,
+         closingBehavior,
+         outputTimeFn);
    }
 
    /**
@@ -458,9 +473,14 @@ public Bound<T> discardingFiredPanes() {
     */
    @Experimental(Kind.TRIGGER)
    public Bound<T> accumulatingFiredPanes() {
-     return new Bound<T>(name,
-         windowFn, trigger, AccumulationMode.ACCUMULATING_FIRED_PANES,
-         allowedLateness, closingBehavior);
+     return new Bound<T>(
+         name,
+         windowFn,
+         trigger,
+         AccumulationMode.ACCUMULATING_FIRED_PANES,
+         allowedLateness,
+         closingBehavior,
+         outputTimeFn);
    }
 
     /**
@@ -478,7 +498,18 @@ public Bound<T> accumulatingFiredPanes() {
      */
     @Experimental(Kind.TRIGGER)
     public Bound<T> withAllowedLateness(Duration allowedLateness) {
-      return new Bound<T>(name, windowFn, trigger, mode, allowedLateness, closingBehavior);
+      return new Bound<T>(
+          name, windowFn, trigger, mode, allowedLateness, closingBehavior, outputTimeFn);
+    }
+
+    /**
+     * <b><i>(Experimental)</i></b> Override the default {@link OutputTimeFn}, to control
+     * the output timestamp of values output from a {@link GroupByKey} operation.
+     */
+    @Experimental(Kind.OUTPUT_TIME)
+    public Bound<T> withOutputTimeFn(OutputTimeFn<?> outputTimeFn) {
+      return new Bound<T>(
+          name, windowFn, trigger, mode, allowedLateness, closingBehavior, outputTimeFn);
     }
 
     /**
@@ -493,9 +524,11 @@ public Bound<T> withAllowedLateness(Duration allowedLateness) {
      */
     @Experimental(Kind.TRIGGER)
     public Bound<T> withAllowedLateness(Duration allowedLateness, ClosingBehavior behavior) {
-      return new Bound<T>(name, windowFn, trigger, mode, allowedLateness, behavior);
+      return new Bound<T>(name, windowFn, trigger, mode, allowedLateness, behavior, outputTimeFn);
     }
 
+    // Rawtype cast of OutputTimeFn cannot be eliminated with intermediate variable, as it is
+    // casting between wildcards
     private WindowingStrategy<?, ?> getOutputStrategy(WindowingStrategy<?, ?> inputStrategy) {
       WindowingStrategy<?, ?> result = inputStrategy;
       if (windowFn != null) {
@@ -513,6 +546,9 @@ public Bound<T> withAllowedLateness(Duration allowedLateness, ClosingBehavior be
       if (closingBehavior != null) {
         result = result.withClosingBehavior(closingBehavior);
       }
+      if (outputTimeFn != null) {
+        result = result.withOutputTimeFn(outputTimeFn);
+      }
       return result;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
index a685345e563ee..0423cfb5eadda 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
@@ -16,7 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
+import com.google.common.collect.Ordering;
 
 import org.joda.time.Instant;
 
@@ -128,26 +132,35 @@ public abstract void merge(Collection<W> toBeMerged, W mergeResult)
   public abstract W getSideInputWindow(final BoundedWindow window);
 
   /**
-   * Returns the output timestamp to use for data depending on the given {@code inputTimestamp}
-   * in the specified {@code window}.
+   * @deprecated Implement {@link #getOutputTimeFn} to return either the appropriate
+   * {@link ElementaryOutputTimeFn} or a custom {@link OutputTimeFn} extending
+   * {@link OutputTimeFn.Defaults}.
+   */
+  @Deprecated
+  @Experimental(Kind.OUTPUT_TIME)
+  public Instant getOutputTime(Instant inputTimestamp, W window) {
+    return getOutputTimeFn().assignOutputTime(inputTimestamp, window);
+  }
+
+  /**
+   * Provides a default implementation for {@link WindowingStrategy#getOutputTimeFn()}.
+   * See the full specification there.
    *
-    * <p>The result must be between {@code inputTimestamp} and {@code window.maxTimestamp()}
-   * (inclusive on both sides). If this {@link WindowFn} doesn't produce overlapping windows,
-   * this can (and typically should) just return {@code inputTimestamp}. If this does produce
-   * overlapping windows, it is suggested that the result in later overlapping windows is
-   * past the end of earlier windows so that the later windows don't prevent the watermark from
-   * progressing past the end of the earlier window.
+   * <p>If this {@link WindowFn} doesn't produce overlapping windows, this need not (and probably
+   * should not) override any of the default implementations in {@link OutputTimeFn.Defaults}.
    *
-   * <p>Each {@code KV<K, Iterable<V>>} produced from a {@code GroupByKey} will be output at a
-   * timestamp that is the minimum of {@code getOutputTime} applied to the timestamp of all of
-   * the non-late {@code KV<K, V>} that were used as input to the {@code GroupByKey}. The watermark
-   * is also prevented from advancing past this minimum timestamp until after the
-   * {@code KV<K, Iterable<V>>} has been output.
+   * <p>If this {@link WindowFn} does produce overlapping windows that can be predicted here, it is
+   * suggested that the result in later overlapping windows is past the end of earlier windows so
+   * that the later windows don't prevent the watermark from progressing past the end of the earlier
+   * window.
    *
-   * <p>This function should be monotonic across input timestamps. Specifically, if {@code A < B},
-   * then {@code getOutputTime(A, window) <= getOutputTime(B, window)}.
+   * <p>For example, a timestamp in a sliding window should be moved past the beginning of the next
+   * sliding window. See {@link SlidingWindows#getOutputTimeFn}.
    */
-  public abstract Instant getOutputTime(Instant inputTimestamp, W window);
+  @Experimental(Kind.OUTPUT_TIME)
+  public OutputTimeFn<? super W> getOutputTimeFn() {
+    return new OutputAtEarliestAssignedTimestamp<>(this);
+  }
 
   /**
    * Returns true if this {@code WindowFn} never needs to merge any windows.
@@ -162,4 +175,47 @@ public boolean isNonMerging() {
   public boolean assignsToSingleWindow() {
     return false;
   }
+
+  /**
+   * A compatibility adapter that will return the assigned timestamps according to the
+   * {@link WindowFn}, which was the prior policy. Specifying the assigned output timestamps
+   * on the {@link WindowFn} is now deprecated.
+   */
+  private static class OutputAtEarliestAssignedTimestamp<W extends BoundedWindow>
+      extends OutputTimeFn.Defaults<W> {
+
+    private final WindowFn<?, W> windowFn;
+
+    public OutputAtEarliestAssignedTimestamp(WindowFn<?, W> windowFn) {
+      this.windowFn = windowFn;
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * @return the result of {@link WindowFn#getOutputTime windowFn.getOutputTime()}.
+     */
+    @Override
+    @SuppressWarnings("deprecation") // this is an adapter for the deprecated behavior
+    public Instant assignOutputTime(Instant timestamp, W window) {
+      return windowFn.getOutputTime(timestamp, window);
+    }
+
+    @Override
+    public Instant combine(Instant outputTime, Instant otherOutputTime) {
+      return Ordering.natural().min(outputTime, otherOutputTime);
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * @return {@code true}. When the {@link OutputTimeFn} is not overridden by {@link WindowFn}
+     *         or {@link WindowingStrategy}, the minimum output timestamp is taken, which depends
+     *         only on the minimum input timestamp by monotonicity of {@link #assignOutputTime}.
+     */
+    @Override
+    public boolean dependsOnlyOnEarliestInputTimestamp() {
+      return true;
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
index 2f1b49c2a6fa9..2fc6c650e2e19 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkState;
 
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
@@ -25,11 +26,11 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 
 import org.joda.time.Instant;
 
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Comparator;
@@ -61,8 +62,8 @@ public static boolean isSupported(WindowingStrategy<?, ?> strategy) {
     // Right now, we support ACCUMULATING_FIRED_PANES because it is the same as
     // DISCARDING_FIRED_PANES. In Batch mode there is no late data so the default
     // trigger (after watermark) will only fire once.
-    if (!strategy.getMode().equals(AccumulationMode.DISCARDING_FIRED_PANES)
-        && !strategy.getMode().equals(AccumulationMode.ACCUMULATING_FIRED_PANES)) {
+    if (!(strategy.getMode().equals(AccumulationMode.DISCARDING_FIRED_PANES)
+        || strategy.getMode().equals(AccumulationMode.ACCUMULATING_FIRED_PANES))) {
       return false;
     }
 
@@ -70,17 +71,24 @@ public static boolean isSupported(WindowingStrategy<?, ?> strategy) {
   }
 
   private final KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn;
-  private WindowFn<Object, W> windowFn;
+  private WindowingStrategy<Object, W> windowingStrategy;
 
   public GroupAlsoByWindowsAndCombineDoFn(
-      WindowFn<?, W> windowFn, KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
+      WindowingStrategy<?, W> strategy,
+      KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
+
+    checkArgument(GroupAlsoByWindowsAndCombineDoFn.isSupported(strategy),
+        "%s does not support non-default triggering, "
+        + "found in windowing strategy: %s",
+        getClass(),
+        strategy);
     this.combineFn = combineFn;
 
     // To make a MergeContext that is compatible with the type of windowFn, we need to remove
     // the wildcard from the element type.
     @SuppressWarnings("unchecked")
-    WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
-    this.windowFn = objectWindowFn;
+    WindowingStrategy<Object, W> objectWindowingStrategy = (WindowingStrategy<Object, W>) strategy;
+    this.windowingStrategy = objectWindowingStrategy;
 
   }
 
@@ -98,9 +106,10 @@ public int compare(BoundedWindow w1, BoundedWindow w2) {
         });
 
     final Map<W, AccumT> accumulators = Maps.newHashMap();
-    final Map<W, Instant> minTimestamps = Maps.newHashMap();
+    final Map<W, Instant> accumulatorOutputTimestamps = Maps.newHashMap();
 
-    WindowFn<Object, W>.MergeContext mergeContext = windowFn.new MergeContext() {
+    WindowFn<Object, W>.MergeContext mergeContext =
+        windowingStrategy.getWindowFn().new MergeContext() {
       @Override
       public Collection<W> windows() {
         return liveWindows;
@@ -108,20 +117,18 @@ public Collection<W> windows() {
 
       @Override
       public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
-        List<AccumT> accumsToBeMerged = new ArrayList<>(toBeMerged.size());
-        Instant minTimestamp = null;
+        List<AccumT> accumsToBeMerged = Lists.newArrayListWithCapacity(toBeMerged.size());
+        List<Instant> timestampsToBeMerged = Lists.newArrayListWithCapacity(toBeMerged.size());
         for (W window : toBeMerged) {
           accumsToBeMerged.add(accumulators.remove(window));
-
-          Instant timestampToBeMerged = minTimestamps.remove(window);
-          if (minTimestamp == null
-              || (timestampToBeMerged != null && timestampToBeMerged.isBefore(minTimestamp))) {
-            minTimestamp = timestampToBeMerged;
-          }
+          timestampsToBeMerged.add(accumulatorOutputTimestamps.remove(window));
         }
         liveWindows.removeAll(toBeMerged);
 
-        minTimestamps.put(mergeResult, minTimestamp);
+
+        Instant mergedOutputTimestamp =
+            windowingStrategy.getOutputTimeFn().merge(mergeResult, timestampsToBeMerged);
+        accumulatorOutputTimestamps.put(mergeResult, mergedOutputTimestamp);
         liveWindows.add(mergeResult);
         accumulators.put(mergeResult, combineFn.mergeAccumulators(key, accumsToBeMerged));
       }
@@ -132,48 +139,55 @@ public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
 
       @SuppressWarnings("unchecked")
       Collection<W> windows = (Collection<W>) e.getWindows();
-      for (W w : windows) {
-        Instant timestamp = minTimestamps.get(w);
-        if (timestamp == null || timestamp.compareTo(e.getTimestamp()) > 0) {
-          minTimestamps.put(w, e.getTimestamp());
+      for (W window : windows) {
+        Instant outputTime =
+            windowingStrategy.getOutputTimeFn().assignOutputTime(e.getTimestamp(), window);
+        Instant accumulatorOutputTime = accumulatorOutputTimestamps.get(window);
+        if (accumulatorOutputTime == null) {
+          accumulatorOutputTimestamps.put(window, outputTime);
         } else {
-          minTimestamps.put(w, timestamp);
+          accumulatorOutputTimestamps.put(window,
+              windowingStrategy.getOutputTimeFn().combine(outputTime, accumulatorOutputTime));
         }
 
-        AccumT accum = accumulators.get(w);
-        checkState((timestamp == null && accum == null) || (timestamp != null && accum != null));
+        AccumT accum = accumulators.get(window);
+        checkState((accumulatorOutputTime == null && accum == null)
+            || (accumulatorOutputTime != null && accum != null),
+            "accumulator and accumulatorOutputTime should both be null or both be non-null");
         if (accum == null) {
           accum = combineFn.createAccumulator(key);
-          liveWindows.add(w);
+          liveWindows.add(window);
         }
         accum = combineFn.addInput(key, accum, e.getValue());
-        accumulators.put(w, accum);
+        accumulators.put(window, accum);
       }
 
-      windowFn.mergeWindows(mergeContext);
+      windowingStrategy.getWindowFn().mergeWindows(mergeContext);
 
       while (!liveWindows.isEmpty()
           && liveWindows.peek().maxTimestamp().isBefore(e.getTimestamp())) {
-        closeWindow(key, liveWindows.poll(), accumulators, minTimestamps, c);
+        closeWindow(key, liveWindows.poll(), accumulators, accumulatorOutputTimestamps, c);
       }
     }
 
     // To have gotten here, we've either not had any elements added, or we've only run merge
     // and then closed windows. We don't need to retry merging.
     while (!liveWindows.isEmpty()) {
-      closeWindow(key, liveWindows.poll(), accumulators, minTimestamps, c);
+      closeWindow(key, liveWindows.poll(), accumulators, accumulatorOutputTimestamps, c);
     }
   }
 
   private void closeWindow(
-      K key, W w, Map<W, AccumT> accumulators, Map<W, Instant> minTimestamps, ProcessContext c) {
-    AccumT accum = accumulators.remove(w);
-    Instant timestamp = minTimestamps.remove(w);
+      K key, W window, Map<W, AccumT> accumulators,
+      Map<W, Instant> accumulatorOutputTimes,
+      ProcessContext context) {
+    AccumT accum = accumulators.remove(window);
+    Instant timestamp = accumulatorOutputTimes.remove(window);
     checkState(accum != null && timestamp != null);
-    c.windowingInternals().outputWindowedValue(
+    context.windowingInternals().outputWindowedValue(
         KV.of(key, combineFn.extractOutput(key, accum)),
-        windowFn.getOutputTime(timestamp, w),
-        Arrays.asList(w),
+        timestamp,
+        Arrays.asList(window),
         PaneInfo.ON_TIME_AND_ONLY_FIRING);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 678bd80a43b5a..4ed180b67df30 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -16,11 +16,12 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static com.google.common.base.Preconditions.checkNotNull;
+
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.base.Preconditions;
 
 /**
  * DoFn that merges windows and groups elements in those windows, optionally
@@ -44,13 +45,11 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
    */
   public static <K, V, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W>
   createForIterable(WindowingStrategy<?, W> windowingStrategy, Coder<V> inputCoder) {
-    @SuppressWarnings("unchecked")
-    WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
 
     return GroupAlsoByWindowsViaIteratorsDoFn.isSupported(windowingStrategy)
         ? new GroupAlsoByWindowsViaIteratorsDoFn<K, V, W>(windowingStrategy)
         : new GroupAlsoByWindowsViaOutputBufferDoFn<>(
-            noWildcard,
+            windowingStrategy,
             SystemReduceFn.<K, V, W>buffering(inputCoder));
   }
 
@@ -63,14 +62,15 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
       final WindowingStrategy<?, W> windowingStrategy,
       final AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn,
       final Coder<K> keyCoder) {
-    Preconditions.checkNotNull(combineFn);
 
-    @SuppressWarnings("unchecked")
-    WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
+    checkNotNull(combineFn);
+
     return GroupAlsoByWindowsAndCombineDoFn.isSupported(windowingStrategy)
-        ? new GroupAlsoByWindowsAndCombineDoFn<>(noWildcard.getWindowFn(), combineFn.getFn())
+        ? new GroupAlsoByWindowsAndCombineDoFn<>(
+            windowingStrategy,
+            combineFn.getFn())
         : new GroupAlsoByWindowsViaOutputBufferDoFn<>(
-            noWildcard,
+            windowingStrategy,
             SystemReduceFn.<K, InputT, AccumT, OutputT, W>combining(keyCoder, combineFn));
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
index c5800afe6bb9d..400824017cbcb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
@@ -63,11 +63,16 @@ public static boolean isSupported(WindowingStrategy<?, ?> strategy) {
       return false;
     }
 
+    // It must be possible to compute the output timestamp of a pane from the input timestamp
+    // of the element with the earliest input timestamp.
+    if (!strategy.getOutputTimeFn().dependsOnlyOnEarliestInputTimestamp()) {
+      return false;
+    }
     // Right now, we support ACCUMULATING_FIRED_PANES because it is the same as
     // DISCARDING_FIRED_PANES. In Batch mode there is no late data so the default
     // trigger (after watermark) will only fire once.
-    if (!strategy.getMode().equals(AccumulationMode.DISCARDING_FIRED_PANES)
-        && !strategy.getMode().equals(AccumulationMode.ACCUMULATING_FIRED_PANES)) {
+    if (!(strategy.getMode().equals(AccumulationMode.DISCARDING_FIRED_PANES)
+        || strategy.getMode().equals(AccumulationMode.ACCUMULATING_FIRED_PANES))) {
       return false;
     }
 
@@ -76,7 +81,8 @@ public static boolean isSupported(WindowingStrategy<?, ?> strategy) {
 
   public GroupAlsoByWindowsViaIteratorsDoFn(WindowingStrategy<?, W> strategy) {
     checkArgument(GroupAlsoByWindowsViaIteratorsDoFn.isSupported(strategy),
-        "%s does not support merging or non-default triggering, "
+        "%s does not support merging, non-default triggering, "
+        + "or any OutputTimeFn where dependsOnlyOnEarliest() is false, "
         + "found in windowing strategy: %s",
         getClass(),
         strategy);
@@ -118,7 +124,7 @@ public void processElement(ProcessContext c) throws Exception {
           windows.put(window.maxTimestamp(), window);
           c.windowingInternals().outputWindowedValue(
               KV.of(key, (Iterable<V>) new WindowReiterable<V>(iterator, window)),
-              strategy.getWindowFn().getOutputTime(e.getTimestamp(), typedWindow),
+              strategy.getOutputTimeFn().assignOutputTime(e.getTimestamp(), typedWindow),
               Arrays.asList(window),
               PaneInfo.ON_TIME_AND_ONLY_FIRING);
         }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
index 63d8ecb144733..df4a853599334 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
@@ -165,7 +165,7 @@ public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
         sourceNamespaces.add(namespaceFor(sourceWindow));
       }
 
-      return stateInternals.mergedState(sourceNamespaces, namespace, address);
+      return stateInternals.mergedState(sourceNamespaces, namespace, address, window);
     }
   }
 
@@ -211,7 +211,8 @@ public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
         mergingNamespaces.add(delegate.namespaceFor(mergingWindow));
       }
 
-      return delegate.stateInternals.mergedState(mergingNamespaces, delegate.namespace, address);
+      return delegate.stateInternals.mergedState(
+          mergingNamespaces, delegate.namespace, address, delegate.window());
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index 3052420f8a0ab..d539c84ddc066 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -381,6 +381,9 @@ public TriggerResult onMerge(
       throw wrapMaybeUserException(e);
     }
 
+    // Merge the watermark hold
+    watermarkHold.mergeHolds(resultContext);
+
     // Have the trigger merge state as needed, and handle the result.
     TriggerResult triggerResult;
     try {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
index 669ba4fba4201..d480369b52e65 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
@@ -16,6 +16,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
@@ -38,17 +39,24 @@
 public class WatermarkHold<W extends BoundedWindow> implements Serializable {
 
   /** Watermark hold used for the actual data-based hold. */
-  @VisibleForTesting static final StateTag<WatermarkStateInternal> DATA_HOLD_TAG =
-      StateTags.makeSystemTagInternal(StateTags.watermarkStateInternal("hold"));
+  @VisibleForTesting static final String DATA_HOLD_ID = "hold";
+  @VisibleForTesting static StateTag<WatermarkStateInternal> watermarkHoldTagForOutputTimeFn(
+      OutputTimeFn<?> outputTimeFn) {
+    return StateTags.makeSystemTagInternal(
+        StateTags.watermarkStateInternal(DATA_HOLD_ID, outputTimeFn));
+  }
 
   private final WindowingStrategy<?, W> windowingStrategy;
+  private final StateTag<WatermarkStateInternal> watermarkHoldTag;
 
   public WatermarkHold(WindowingStrategy<?, W> windowingStrategy) {
     this.windowingStrategy = windowingStrategy;
+    this.watermarkHoldTag = watermarkHoldTagForOutputTimeFn(windowingStrategy.getOutputTimeFn());
   }
 
   /**
-   * Update the watermark hold to include the timestamp of the value in {@code c}.
+   * Update the watermark hold to include the appropriate output timestamp for the value in
+   * {@code c}.
    *
    * <p>If the value was not late, then the input watermark must be less than the timestamp, and we
    * can use {@link WindowFn#getOutputTime} to determine the appropriate output time.
@@ -59,20 +67,40 @@ public WatermarkHold(WindowingStrategy<?, W> windowingStrategy) {
    * dropped.
    */
   public void addHold(ReduceFn<?, ?, ?, W>.ProcessValueContext c, boolean isLate) {
-    Instant holdTo = isLate
+    Instant outputTime = isLate
         ? c.window().maxTimestamp().plus(windowingStrategy.getAllowedLateness())
-        : windowingStrategy.getWindowFn().getOutputTime(c.timestamp(), c.window());
-    c.state().access(DATA_HOLD_TAG).add(holdTo);
+        : windowingStrategy.getOutputTimeFn().assignOutputTime(c.timestamp(), c.window());
+    c.state().access(watermarkHoldTag).add(outputTime);
+  }
+
+  /**
+   * Updates the watermark hold when windows merge. For example, if the new window implies
+   * a later watermark hold, then earlier holds may be released.
+   */
+  public void mergeHolds(final ReduceFn<?, ?, ?, W>.OnMergeContext c) {
+    // If the output hold depends only on the window, then there may not be a hold in place
+    // for the new merged window, so add one.
+    if (windowingStrategy.getOutputTimeFn().dependsOnlyOnWindow()) {
+      Instant arbitraryTimestamp = new Instant(0);
+      c.state().access(watermarkHoldTag).add(
+          windowingStrategy.getOutputTimeFn().assignOutputTime(
+              arbitraryTimestamp,
+              c.window()));
+    }
+
+    c.state().accessAcrossMergedWindows(watermarkHoldTag).releaseExtraneousHolds();
   }
 
   /**
-   * Get information from the watermark hold for outputting.
+   * Returns the combined timestamp at which the output watermark was being held and releases
+   * the hold.
    *
-   * <p>The output timestamp is the minimum of getOutputTimestamp applied to the non-late elements
-   * that arrived in the current pane.
+   * <p>The returned timestamp is the output timestamp according to the {@link OutputTimeFn}
+   * from the windowing strategy of this {@link WatermarkHold}, combined across all the non-late
+   * elements in the current pane.
    */
   public StateContents<Instant> extractAndRelease(final ReduceFn<?, ?, ?, W>.Context c) {
-    final WatermarkStateInternal dataHold = c.state().accessAcrossMergedWindows(DATA_HOLD_TAG);
+    final WatermarkStateInternal dataHold = c.state().accessAcrossMergedWindows(watermarkHoldTag);
     final StateContents<Instant> holdFuture = dataHold.get();
     return new StateContents<Instant>() {
       @Override
@@ -82,7 +110,7 @@ public Instant read() {
           hold = c.window().maxTimestamp();
         }
 
-        // Clear the bag (to release the watermark)
+        // Clear the underlying state to allow the output watermark to progress.
         dataHold.clear();
 
         return hold;
@@ -91,18 +119,18 @@ public Instant read() {
   }
 
   public void holdForOnTime(final ReduceFn<?, ?, ?, W>.Context c) {
-    c.state().accessAcrossMergedWindows(DATA_HOLD_TAG).add(c.window().maxTimestamp());
+    c.state().accessAcrossMergedWindows(watermarkHoldTag).add(c.window().maxTimestamp());
   }
 
   public void holdForFinal(final ReduceFn<?, ?, ?, W>.Context c) {
     if (c.windowingStrategy().getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS) {
-      c.state().accessAcrossMergedWindows(DATA_HOLD_TAG)
+      c.state().accessAcrossMergedWindows(watermarkHoldTag)
            .add(c.window().maxTimestamp().plus(c.windowingStrategy().getAllowedLateness()));
     }
   }
 
   public void releaseOnTime(final ReduceFn<?, ?, ?, W>.Context c) {
-    c.state().accessAcrossMergedWindows(DATA_HOLD_TAG).clear();
+    c.state().accessAcrossMergedWindows(watermarkHoldTag).clear();
 
     if (c.windowingStrategy().getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS
         && c.windowingStrategy().getAllowedLateness().isLongerThan(Duration.ZERO)) {
@@ -111,6 +139,6 @@ public void releaseOnTime(final ReduceFn<?, ?, ?, W>.Context c) {
   }
 
   public void releaseFinal(final ReduceFn<?, ?, ?, W>.Context c) {
-    c.state().accessAcrossMergedWindows(DATA_HOLD_TAG).clear();
+    c.state().accessAcrossMergedWindows(watermarkHoldTag).clear();
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
index a452dc58a840f..c167b8c0cdffe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
@@ -16,13 +16,16 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.common.base.MoreObjects;
 
 import org.joda.time.Duration;
 
@@ -52,6 +55,7 @@ public enum AccumulationMode {
   private static final WindowingStrategy<Object, GlobalWindow> DEFAULT = of(new GlobalWindows());
 
   private final WindowFn<T, W> windowFn;
+  private final OutputTimeFn<? super W> outputTimeFn;
   private final ExecutableTrigger<W> trigger;
   private final AccumulationMode mode;
   private final Duration allowedLateness;
@@ -59,12 +63,14 @@ public enum AccumulationMode {
   private final boolean triggerSpecified;
   private final boolean modeSpecified;
   private final boolean allowedLatenessSpecified;
+  private final boolean outputTimeFnSpecified;
 
   private WindowingStrategy(
       WindowFn<T, W> windowFn,
       ExecutableTrigger<W> trigger, boolean triggerSpecified,
       AccumulationMode mode, boolean modeSpecified,
       Duration allowedLateness, boolean allowedLatenessSpecified,
+      OutputTimeFn<? super W> outputTimeFn, boolean outputTimeFnSpecified,
       ClosingBehavior closingBehavior) {
     this.windowFn = windowFn;
     this.trigger = trigger;
@@ -74,6 +80,8 @@ private WindowingStrategy(
     this.allowedLateness = allowedLateness;
     this.allowedLatenessSpecified = allowedLatenessSpecified;
     this.closingBehavior = closingBehavior;
+    this.outputTimeFn = outputTimeFn;
+    this.outputTimeFnSpecified = outputTimeFnSpecified;
   }
 
   /**
@@ -89,6 +97,7 @@ public static <T, W extends BoundedWindow> WindowingStrategy<T, W> of(
         ExecutableTrigger.create(DefaultTrigger.<W>of()), false,
         AccumulationMode.DISCARDING_FIRED_PANES, false,
         DEFAULT_ALLOWED_LATENESS, false,
+        windowFn.getOutputTimeFn(), false,
         ClosingBehavior.FIRE_IF_NON_EMPTY);
   }
 
@@ -124,6 +133,14 @@ public ClosingBehavior getClosingBehavior() {
     return closingBehavior;
   }
 
+  public OutputTimeFn<? super W> getOutputTimeFn() {
+    return outputTimeFn;
+  }
+
+  public boolean isOutputTimeFnSpecified() {
+    return outputTimeFnSpecified;
+  }
+
   /**
    * Returns a {@link WindowingStrategy} identical to {@code this} but with the trigger set to
    * {@code wildcardTrigger}.
@@ -136,6 +153,7 @@ public WindowingStrategy<T, W> withTrigger(Trigger<?> wildcardTrigger) {
         ExecutableTrigger.create(typedTrigger), true,
         mode, modeSpecified,
         allowedLateness, allowedLatenessSpecified,
+        outputTimeFn, outputTimeFnSpecified,
         closingBehavior);
   }
 
@@ -149,6 +167,7 @@ public WindowingStrategy<T, W> withMode(AccumulationMode mode) {
         trigger, triggerSpecified,
         mode, true,
         allowedLateness, allowedLatenessSpecified,
+        outputTimeFn, outputTimeFnSpecified,
         closingBehavior);
   }
 
@@ -159,11 +178,18 @@ public WindowingStrategy<T, W> withMode(AccumulationMode mode) {
   public WindowingStrategy<T, W> withWindowFn(WindowFn<?, ?> wildcardWindowFn) {
     @SuppressWarnings("unchecked")
     WindowFn<T, W> typedWindowFn = (WindowFn<T, W>) wildcardWindowFn;
+
+    // The onus of type correctness falls on the callee.
+    @SuppressWarnings("unchecked")
+    OutputTimeFn<? super W> newOutputTimeFn = (OutputTimeFn<? super W>)
+        (outputTimeFnSpecified ? outputTimeFn : typedWindowFn.getOutputTimeFn());
+
     return new WindowingStrategy<T, W>(
         typedWindowFn,
         trigger, triggerSpecified,
         mode, modeSpecified,
         allowedLateness, allowedLatenessSpecified,
+        newOutputTimeFn, outputTimeFnSpecified,
         closingBehavior);
   }
 
@@ -177,6 +203,7 @@ public WindowingStrategy<T, W> withAllowedLateness(Duration allowedLateness) {
         trigger, triggerSpecified,
         mode, modeSpecified,
         allowedLateness, true,
+        outputTimeFn, outputTimeFnSpecified,
         closingBehavior);
   }
 
@@ -186,15 +213,34 @@ public WindowingStrategy<T, W> withClosingBehavior(ClosingBehavior closingBehavi
         trigger, triggerSpecified,
         mode, modeSpecified,
         allowedLateness, allowedLatenessSpecified,
+        outputTimeFn, outputTimeFnSpecified,
+        closingBehavior);
+  }
+
+  @Experimental(Experimental.Kind.OUTPUT_TIME)
+  public WindowingStrategy<T, W> withOutputTimeFn(OutputTimeFn<?> outputTimeFn) {
+
+    @SuppressWarnings("unchecked")
+    OutputTimeFn<? super W> typedOutputTimeFn = (OutputTimeFn<? super W>) outputTimeFn;
+
+    return new WindowingStrategy<T, W>(
+        windowFn,
+        trigger, triggerSpecified,
+        mode, modeSpecified,
+        allowedLateness, allowedLatenessSpecified,
+        typedOutputTimeFn, true,
         closingBehavior);
   }
 
   @Override
   public String toString() {
-    return String.format("%s, %s, %s",
-        StringUtils.approximateSimpleName(windowFn.getClass()),
-        trigger.toString(),
-        mode.toString());
+    return MoreObjects.toStringHelper(this)
+        .add("windowFn", windowFn)
+        .add("allowedLateness", allowedLateness)
+        .add("trigger", trigger)
+        .add("accumulationMode", mode)
+        .add("outputTimeFn", outputTimeFn)
+        .toString();
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
index a710dd050cb5c..1af60ad209461 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
@@ -15,8 +15,12 @@
  */
 package com.google.cloud.dataflow.sdk.util.state;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
 import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
 
 import org.joda.time.Instant;
@@ -30,6 +34,7 @@
  * In-memory implementation of {@link StateInternals}. Used in {@code BatchModeExecutionContext}
  * and for running tests that need state.
  */
+@Experimental(Kind.STATE)
 public class InMemoryStateInternals extends MergingStateInternals {
   private interface InMemoryState {
     boolean isEmptyForTesting();
@@ -58,8 +63,10 @@ public <T> BagState<T> bindBag(final StateTag<BagState<T>> address, Coder<T> ele
         }
 
         @Override
-        public <T> WatermarkStateInternal bindWatermark(StateTag<WatermarkStateInternal> address) {
-          return new WatermarkStateInternalImplementation();
+        public <T, W extends BoundedWindow> WatermarkStateInternal bindWatermark(
+            StateTag<WatermarkStateInternal> address,
+            OutputTimeFn<? super W> outputTimeFn) {
+          return new WatermarkStateInternalImplementation(outputTimeFn);
         }
       };
     }
@@ -118,35 +125,49 @@ public boolean isEmptyForTesting() {
 
   private final class WatermarkStateInternalImplementation
       implements WatermarkStateInternal, InMemoryState {
-    private Instant minimumHold = null;
+
+    private final OutputTimeFn<?> outputTimeFn;
+    private Instant combinedHold = null;
+
+    public WatermarkStateInternalImplementation(OutputTimeFn<?> outputTimeFn) {
+      this.outputTimeFn = outputTimeFn;
+    }
 
     @Override
     public void clear() {
       // Even though we're clearing we can't remove this from the in-memory state map, since
       // other users may already have a handle on this WatermarkBagInternal.
-      minimumHold = null;
+      combinedHold = null;
     }
 
+    /**
+     * {@inheritDoc}
+     *
+     * <p>Does nothing. There is only one hold and it is not extraneous.
+     * See {@link MergedWatermarkStateInternal} for a nontrivial implementation.
+     */
+    @Override
+    public void releaseExtraneousHolds() { }
+
     @Override
     public StateContents<Instant> get() {
       return new StateContents<Instant>() {
         @Override
         public Instant read() {
-          return minimumHold;
+          return combinedHold;
         }
       };
     }
 
     @Override
-    public void add(Instant watermarkHold) {
-      if (minimumHold == null || minimumHold.isAfter(watermarkHold)) {
-        minimumHold = watermarkHold;
-      }
+    public void add(Instant outputTime) {
+      combinedHold = combinedHold == null ? outputTime
+          : outputTimeFn.combine(combinedHold, outputTime);
     }
 
     @Override
     public boolean isEmptyForTesting() {
-      return minimumHold == null;
+      return combinedHold == null;
     }
 
     @Override
@@ -154,14 +175,14 @@ public StateContents<Boolean> isEmpty() {
       return new StateContents<Boolean>() {
         @Override
         public Boolean read() {
-          return minimumHold == null;
+          return combinedHold == null;
         }
       };
     }
 
     @Override
     public String toString() {
-      return Objects.toString(minimumHold);
+      return Objects.toString(combinedHold);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkStateInternal.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkStateInternal.java
index e2af4db164edf..ec9c492417681 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkStateInternal.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkStateInternal.java
@@ -15,6 +15,11 @@
  */
 package com.google.cloud.dataflow.sdk.util.state;
 
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
+import com.google.common.collect.Lists;
+
 import org.joda.time.Instant;
 
 import java.util.ArrayList;
@@ -25,15 +30,22 @@
  * Implementation of {@link WatermarkStateInternal} reading from multiple sources and writing to a
  * single result.
  */
-class MergedWatermarkStateInternal implements WatermarkStateInternal {
+class MergedWatermarkStateInternal<W extends BoundedWindow> implements WatermarkStateInternal {
 
   private final Collection<WatermarkStateInternal> sources;
   private final WatermarkStateInternal result;
+  private final OutputTimeFn<? super W> outputTimeFn;
+  private final W resultWindow;
 
   public MergedWatermarkStateInternal(
-      Collection<WatermarkStateInternal> sources, WatermarkStateInternal result) {
+      Collection<WatermarkStateInternal> sources,
+      WatermarkStateInternal result,
+      W resultWindow,
+      OutputTimeFn<? super W> outputTimeFn) {
     this.sources = sources;
     this.result = result;
+    this.resultWindow = resultWindow;
+    this.outputTimeFn = outputTimeFn;
   }
 
   @Override
@@ -45,12 +57,17 @@ public void clear() {
   }
 
   @Override
-  public void add(Instant watermarkHold) {
-    result.add(watermarkHold);
+  public void add(Instant outputTimestamp) {
+    result.add(outputTimestamp);
   }
 
   @Override
   public StateContents<Instant> get() {
+    // Short circuit if output times depend only on the window, hence are all equal.
+    if (outputTimeFn.dependsOnlyOnWindow()) {
+      return result.get();
+    }
+
     // Get the underlying StateContents's right away.
     final List<StateContents<Instant>> reads = new ArrayList<>(sources.size());
     for (WatermarkStateInternal source : sources) {
@@ -61,21 +78,23 @@ public StateContents<Instant> get() {
     return new StateContents<Instant>() {
       @Override
       public Instant read() {
-        Instant minimum = null;
+        List<Instant> outputTimesToMerge = Lists.newArrayListWithCapacity(sources.size());
         for (StateContents<Instant> read : reads) {
-          Instant input = read.read();
-          if (minimum == null || (input != null && minimum.isAfter(input))) {
-            minimum = input;
+          Instant sourceOutputTime = read.read();
+          if (sourceOutputTime != null) {
+            outputTimesToMerge.add(sourceOutputTime);
           }
         }
 
-        // Also, compact the state
-        if (minimum != null) {
+        if (outputTimesToMerge.isEmpty()) {
+          return null;
+        } else {
+          // Also, compact the state
           clear();
-          add(minimum);
+          Instant mergedOutputTime = outputTimeFn.merge(resultWindow, outputTimesToMerge);
+          add(mergedOutputTime);
+          return mergedOutputTime;
         }
-
-        return minimum;
       }
     };
   }
@@ -101,4 +120,16 @@ public Boolean read() {
       }
     };
   }
+
+  @Override
+  public void releaseExtraneousHolds() {
+    if (outputTimeFn.dependsOnlyOnEarliestInputTimestamp()) {
+      // No need to do anything; the merged watermark state will hold to the earliest
+      // due to semantics of watermark holds.
+    } else {
+      // In all other cases, get() implements the necessary combining logic, and actually
+      // performs compaction that releases the watermark.
+      get().read();
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
index a54b95dd8fc7c..c6a7ae17e3c56 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
@@ -15,8 +15,12 @@
  */
 package com.google.cloud.dataflow.sdk.util.state;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
 import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
 
 import java.util.ArrayList;
@@ -26,12 +30,15 @@
  * Abstract implementation of {@link StateInternals} that provides {@link #mergedState} in terms of
  * {@link #state}.
  */
+@Experimental(Kind.STATE)
 public abstract class MergingStateInternals implements StateInternals {
 
   @Override
   public <T extends MergeableState<?, ?>> T mergedState(
       final Iterable<StateNamespace> sourceNamespaces,
-      final StateNamespace resultNamespace, StateTag<T> address) {
+      final StateNamespace resultNamespace,
+      StateTag<T> address,
+      final BoundedWindow resultWindow) {
     return address.bind(new StateBinder() {
       @Override
       public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> coder) {
@@ -73,8 +80,9 @@ CombiningValueStateInternal<InputT, AccumT, OutputT> bindCombiningValue(
       }
 
       @Override
-      public <T> WatermarkStateInternal bindWatermark(
-          StateTag<WatermarkStateInternal> address) {
+      public <T, W extends BoundedWindow> WatermarkStateInternal bindWatermark(
+          StateTag<WatermarkStateInternal> address,
+          OutputTimeFn<? super W> outputTimeFn) {
         List<WatermarkStateInternal> sources = new ArrayList<>();
         for (StateNamespace sourceNamespace : sourceNamespaces) {
           // Skip adding the result namespace for now.
@@ -84,7 +92,13 @@ public <T> WatermarkStateInternal bindWatermark(
         }
         WatermarkStateInternal result = state(resultNamespace, address);
         sources.add(result);
-        return new MergedWatermarkStateInternal(sources, result);
+
+        // It is the responsibility of the SDK to only pass allowed result windows.
+        @SuppressWarnings("unchecked")
+        W typedResultWindow = (W) resultWindow;
+
+        return new MergedWatermarkStateInternal<W>(
+            sources, result, typedResultWindow, outputTimeFn);
       }
     });
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
index a65ef87617682..f8038823b762e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
@@ -15,13 +15,18 @@
  */
 package com.google.cloud.dataflow.sdk.util.state;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+
 /**
  * {@code StateInternals} describes the functionality a runner needs to provide for the
  * State API to be supported.
  *
  * <p>The SDK will only use this after elements have been partitioned by key. For instance, after a
- * {@code GroupByKey} operation. The runner implementation must ensure that any writes using
- * {@code StaeIntetrnals} are implicitly scoped to the key being processed and the specific step
+ * {@link GroupByKey} operation. The runner implementation must ensure that any writes using
+ * {@link StateInternals} are implicitly scoped to the key being processed and the specific step
  * accessing state.
  *
  * <p>The runner implementation must also ensure that any writes to the associated state objects
@@ -31,6 +36,7 @@
  * <p>This is a low-level API intended for use by the Dataflow SDK. It should not be
  * used directly, and is highly likely to change.
  */
+@Experimental(Kind.STATE)
 public interface StateInternals  {
 
   /**
@@ -48,6 +54,8 @@ public interface StateInternals  {
    * {@code sourceNamespaces} and {@code resultNamespace}.
    */
   <T extends MergeableState<?, ?>> T mergedState(
-      Iterable<StateNamespace> sourceNamespaces, StateNamespace resultNamespace,
-      StateTag<T> address);
+      Iterable<StateNamespace> sourceNamespaces,
+      StateNamespace resultNamespace,
+      StateTag<T> address,
+      BoundedWindow resultWindow);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
index 6cf6e3b13011a..2419bb8ed6007 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
@@ -20,6 +20,8 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
 
 import java.io.Serializable;
 
@@ -50,7 +52,15 @@ public interface StateBinder {
         StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
         Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn);
 
-    <T> WatermarkStateInternal bindWatermark(StateTag<WatermarkStateInternal> address);
+    /**
+     * Bind to a watermark {@link StateTag}.
+     *
+     * <p>This accepts the {@link OutputTimeFn} that dictates how watermark hold timestamps
+     * added to the returned {@link WatermarkStateInternal} are to be combined.
+     */
+    <T, W extends BoundedWindow> WatermarkStateInternal bindWatermark(
+        StateTag<WatermarkStateInternal> address,
+        OutputTimeFn<? super W> outputTimeFn);
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
index 87f3ec0b62a21..8fe1681c3c970 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
@@ -21,6 +21,8 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
 import com.google.common.base.MoreObjects;
 
 import java.io.Serializable;
@@ -113,8 +115,9 @@ public static <T> StateTag<BagState<T>> bag(String id, Coder<T> elemCoder) {
   /**
    * Create a state tag for holding the watermark.
    */
-  public static <T> StateTag<WatermarkStateInternal> watermarkStateInternal(String id) {
-    return new WatermarkStateTagInternal(new StructuredId(id));
+  public static <T> StateTag<WatermarkStateInternal> watermarkStateInternal(
+      String id, OutputTimeFn<?> outputTimeFn) {
+    return new WatermarkStateTagInternal(new StructuredId(id), outputTimeFn);
   }
 
   /**
@@ -349,15 +352,23 @@ protected StateTag<BagState<T>> asKind(StateKind kind) {
     }
   }
 
-  private static class WatermarkStateTagInternal extends StateTagBase<WatermarkStateInternal> {
+  private static class WatermarkStateTagInternal<W extends BoundedWindow>
+      extends StateTagBase<WatermarkStateInternal> {
 
-    private WatermarkStateTagInternal(StructuredId id) {
+    /**
+     * When multiple output times are added to hold the watermark, this determines how they are
+     * combined, and also the behavior when merging windows.
+     */
+    private final OutputTimeFn<? super W> outputTimeFn;
+
+    private WatermarkStateTagInternal(StructuredId id, OutputTimeFn<? super W> outputTimeFn) {
       super(id);
+      this.outputTimeFn = outputTimeFn;
     }
 
     @Override
     public WatermarkStateInternal bind(StateBinder visitor) {
-      return visitor.bindWatermark(this);
+      return visitor.bindWatermark(this, outputTimeFn);
     }
 
     @Override
@@ -371,7 +382,8 @@ public boolean equals(Object obj) {
       }
 
       WatermarkStateTagInternal that = (WatermarkStateTagInternal) obj;
-      return Objects.equals(this.id, that.id);
+      return Objects.equals(this.id, that.id)
+          && Objects.equals(this.outputTimeFn, that.outputTimeFn);
     }
 
     @Override
@@ -381,7 +393,7 @@ public int hashCode() {
 
     @Override
     protected StateTag<WatermarkStateInternal> asKind(StateKind kind) {
-      return new WatermarkStateTagInternal(id.asKind(kind));
+      return new WatermarkStateTagInternal(id.asKind(kind), outputTimeFn);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java
index 6d26746831441..29ed90b0b8141 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java
@@ -17,14 +17,29 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
 
 import org.joda.time.Instant;
 
 /**
- * State for holding up the watermark to the minimum of input {@code Instant}s.
+ * A {@link State} accepting and aggregating output timestamps, which determines
+ * the time to which the output watermark must be held.
  *
- * <p>This is intended for internal use only. The watermark will be held up based on the
- * values that are added and only released when items are cleared.
+ * <p><b><i>For internal use only. This API may change at any time.</i></b>
  */
 @Experimental(Kind.STATE)
-public interface WatermarkStateInternal extends MergeableState<Instant, Instant> {}
+public interface WatermarkStateInternal extends MergeableState<Instant, Instant> {
+
+  /**
+   * Release any holds that have become extraneous so they do not prevent progress of the
+   * output watermark.
+   *
+   * <p>For example, when using {@link OutputTimeFns#outputAtEndOfWindow()}, there will be holds
+   * in place at the end of every initial window that merges into the result window. These holds
+   * need to be released. It is implementation-dependent how (or whether) this happens.
+   *
+   * <p>This method is permitted to be "best effort" but should always try to release holds
+   * as far as possible to allow the output watermark to make progress.
+   */
+  void releaseExtraneousHolds();
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
index 6b00db9ee9f6b..fe80f90a175fd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
@@ -28,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagList;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagValue;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.state.BagState;
@@ -40,6 +41,7 @@
 import com.google.cloud.dataflow.sdk.util.state.ValueState;
 import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
 import com.google.common.base.Supplier;
+import com.google.common.util.concurrent.Futures;
 import com.google.common.util.concurrent.SettableFuture;
 import com.google.protobuf.ByteString;
 
@@ -61,7 +63,6 @@
  */
 @RunWith(JUnit4.class)
 public class WindmillStateInternalsTest {
-
   private static final StateNamespace NAMESPACE = new StateNamespaceForTest("ns");
   private static final String STATE_FAMILY = "family";
 
@@ -93,8 +94,7 @@ public void setUp() {
     underTest = new WindmillStateInternals(STATE_FAMILY, true, mockReader, readStateSupplier);
   }
 
-  private <T> void waitAndSet(
-      final SettableFuture<T> future, final T value, final long millis) {
+  private <T> void waitAndSet(final SettableFuture<T> future, final T value, final long millis) {
     new Thread(new Runnable() {
       @Override
       public void run() {
@@ -292,11 +292,11 @@ public void testCombiningAddBeforeRead() throws Exception {
 
     value.add(5);
     value.add(6);
-    waitAndSet(future, Arrays.asList(new int[]{8}, new int[]{10}), 200);
+    waitAndSet(future, Arrays.asList(new int[] {8}, new int[] {10}), 200);
     assertThat(result.read(), Matchers.equalTo(29));
 
     // That get "compressed" the combiner. So, the underlying future should change:
-    future.set(Arrays.asList(new int[]{29}));
+    future.set(Arrays.asList(new int[] {29}));
 
     value.add(2);
     assertThat(result.read(), Matchers.equalTo(31));
@@ -330,7 +330,7 @@ public void testCombiningIsEmpty() throws Exception {
     StateContents<Boolean> result = value.isEmpty();
     Mockito.verify(mockReader).listFuture(key(NAMESPACE, "combining"), STATE_FAMILY, accumCoder);
 
-    waitAndSet(future, Arrays.asList(new int[]{29}), 200);
+    waitAndSet(future, Arrays.asList(new int[] {29}), 200);
     assertThat(result.read(), Matchers.is(false));
   }
 
@@ -364,9 +364,10 @@ public void testCombiningAddPersist() throws Exception {
     TagList listUpdates = commitBuilder.getListUpdates(0);
     assertEquals(key(NAMESPACE, "combining"), listUpdates.getTag());
     assertEquals(1, listUpdates.getValuesCount());
-    assertEquals(11,
-        CoderUtils.decodeFromByteArray(accumCoder,
-            listUpdates.getValues(0).getData().substring(1).toByteArray())[0]);
+    assertEquals(
+        11,
+        CoderUtils.decodeFromByteArray(
+            accumCoder, listUpdates.getValues(0).getData().substring(1).toByteArray())[0]);
 
     // Blind adds should not need to read the future.
     Mockito.verify(mockReader).startBatchAndBlock();
@@ -395,9 +396,10 @@ public void testCombiningClearPersist() throws Exception {
     TagList listUpdates = commitBuilder.getListUpdates(1);
     assertEquals(key(NAMESPACE, "combining"), listUpdates.getTag());
     assertEquals(1, listUpdates.getValuesCount());
-    assertEquals(11,
-        CoderUtils.decodeFromByteArray(accumCoder,
-            listUpdates.getValues(0).getData().substring(1).toByteArray())[0]);
+    assertEquals(
+        11,
+        CoderUtils.decodeFromByteArray(
+            accumCoder, listUpdates.getValues(0).getData().substring(1).toByteArray())[0]);
 
     // Blind adds should not need to read the future.
     Mockito.verify(mockReader).listFuture(key(NAMESPACE, "combining"), STATE_FAMILY, accumCoder);
@@ -406,8 +408,9 @@ public void testCombiningClearPersist() throws Exception {
   }
 
   @Test
-  public void testWatermarkAddBeforeRead() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal("watermark");
+  public void testWatermarkAddBeforeReadEarliest() throws Exception {
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
     WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
 
     SettableFuture<Instant> future = SettableFuture.create();
@@ -428,9 +431,59 @@ public void testWatermarkAddBeforeRead() throws Exception {
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
+  @Test
+  public void testWatermarkAddBeforeReadLatest() throws Exception {
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtLatestInputTimestamp());
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+
+    SettableFuture<Instant> future = SettableFuture.create();
+    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY)).thenReturn(future);
+
+    StateContents<Instant> result = bag.get();
+
+    bag.add(new Instant(3000));
+    waitAndSet(future, new Instant(2000), 200);
+    assertThat(result.read(), Matchers.equalTo(new Instant(3000)));
+
+    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
+    Mockito.verifyNoMoreInteractions(mockReader);
+
+    // Adding another value doesn't create another future, but does update the result.
+    bag.add(new Instant(3000));
+    assertThat(result.read(), Matchers.equalTo(new Instant(3000)));
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testWatermarkAddBeforeReadEndOfWindow() throws Exception {
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtEndOfWindow());
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+
+    SettableFuture<Instant> future = SettableFuture.create();
+    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY)).thenReturn(future);
+
+    StateContents<Instant> result = bag.get();
+
+    bag.add(new Instant(3000));
+    waitAndSet(future, new Instant(3000), 200);
+    assertThat(result.read(), Matchers.equalTo(new Instant(3000)));
+
+    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
+    Mockito.verifyNoMoreInteractions(mockReader);
+
+    // Adding another value doesn't create another future, but does update the result.
+    bag.add(new Instant(3000));
+    assertThat(result.read(), Matchers.equalTo(new Instant(3000)));
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
   @Test
   public void testWatermarkClearBeforeRead() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal("watermark");
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
+
     WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
 
     bag.clear();
@@ -445,7 +498,8 @@ public void testWatermarkClearBeforeRead() throws Exception {
 
   @Test
   public void testWatermarkIsEmptyWindmillHasData() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal("watermark");
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
     WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
 
     SettableFuture<Instant> future = SettableFuture.create();
@@ -459,7 +513,8 @@ public void testWatermarkIsEmptyWindmillHasData() throws Exception {
 
   @Test
   public void testWatermarkIsEmpty() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal("watermark");
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
     WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
 
     SettableFuture<Instant> future = SettableFuture.create();
@@ -473,7 +528,8 @@ public void testWatermarkIsEmpty() throws Exception {
 
   @Test
   public void testWatermarkIsEmptyAfterClear() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal("watermark");
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
     WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
 
     bag.clear();
@@ -486,8 +542,9 @@ public void testWatermarkIsEmptyAfterClear() throws Exception {
   }
 
   @Test
-  public void testWatermarkPersist() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal("watermark");
+  public void testWatermarkPersistEarliest() throws Exception {
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
     WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
 
     bag.add(new Instant(1000));
@@ -508,9 +565,118 @@ public void testWatermarkPersist() throws Exception {
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
+  @Test
+  public void testWatermarkPersistLatestEmpty() throws Exception {
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtLatestInputTimestamp());
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+
+    bag.add(new Instant(1000));
+    bag.add(new Instant(2000));
+
+    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY))
+        .thenReturn(Futures.<Instant>immediateFuture(null));
+
+    Windmill.WorkItemCommitRequest.Builder commitBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.persist(commitBuilder);
+
+    assertEquals(1, commitBuilder.getWatermarkHoldsCount());
+
+    Windmill.WatermarkHold watermarkHold = commitBuilder.getWatermarkHolds(0);
+    assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
+    assertEquals(TimeUnit.MILLISECONDS.toMicros(2000), watermarkHold.getTimestamps(0));
+
+    // Blind adds should not need to read the future.
+    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
+    Mockito.verify(mockReader).startBatchAndBlock();
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testWatermarkPersistLatestWindmillWins() throws Exception {
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtLatestInputTimestamp());
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+
+    bag.add(new Instant(1000));
+    bag.add(new Instant(2000));
+
+    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY))
+        .thenReturn(Futures.<Instant>immediateFuture(new Instant(4000)));
+
+    Windmill.WorkItemCommitRequest.Builder commitBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.persist(commitBuilder);
+
+    assertEquals(1, commitBuilder.getWatermarkHoldsCount());
+
+    Windmill.WatermarkHold watermarkHold = commitBuilder.getWatermarkHolds(0);
+    assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
+    assertEquals(TimeUnit.MILLISECONDS.toMicros(4000), watermarkHold.getTimestamps(0));
+
+    // Blind adds should not need to read the future.
+    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
+    Mockito.verify(mockReader).startBatchAndBlock();
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testWatermarkPersistLatestLocalAdditionsWin() throws Exception {
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtLatestInputTimestamp());
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+
+    bag.add(new Instant(1000));
+    bag.add(new Instant(2000));
+
+    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY))
+        .thenReturn(Futures.<Instant>immediateFuture(new Instant(500)));
+
+    Windmill.WorkItemCommitRequest.Builder commitBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.persist(commitBuilder);
+
+    assertEquals(1, commitBuilder.getWatermarkHoldsCount());
+
+    Windmill.WatermarkHold watermarkHold = commitBuilder.getWatermarkHolds(0);
+    assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
+    assertEquals(TimeUnit.MILLISECONDS.toMicros(2000), watermarkHold.getTimestamps(0));
+
+    // Blind adds should not need to read the future.
+    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
+    Mockito.verify(mockReader).startBatchAndBlock();
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testWatermarkPersistEndOfWindow() throws Exception {
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtEndOfWindow());
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+
+    bag.add(new Instant(2000));
+    bag.add(new Instant(2000));
+
+    Windmill.WorkItemCommitRequest.Builder commitBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.persist(commitBuilder);
+
+    assertEquals(1, commitBuilder.getWatermarkHoldsCount());
+
+    Windmill.WatermarkHold watermarkHold = commitBuilder.getWatermarkHolds(0);
+    assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
+    assertEquals(TimeUnit.MILLISECONDS.toMicros(2000), watermarkHold.getTimestamps(0));
+
+    // Blind adds should not need to read the future.
+    Mockito.verify(mockReader).startBatchAndBlock();
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
   @Test
   public void testWatermarkClearPersist() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal("watermark");
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
     WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
 
     bag.add(new Instant(500));
@@ -522,26 +688,24 @@ public void testWatermarkClearPersist() throws Exception {
         Windmill.WorkItemCommitRequest.newBuilder();
     underTest.persist(commitBuilder);
 
-    assertEquals(2, commitBuilder.getWatermarkHoldsCount());
-
-    Windmill.WatermarkHold clear = commitBuilder.getWatermarkHolds(0);
-    assertEquals(key(NAMESPACE, "watermark"), clear.getTag());
-    assertEquals(0, clear.getTimestampsCount());
+    assertEquals(1, commitBuilder.getWatermarkHoldsCount());
 
-    Windmill.WatermarkHold update = commitBuilder.getWatermarkHolds(1);
-    assertEquals(key(NAMESPACE, "watermark"), update.getTag());
-    assertEquals(1, update.getTimestampsCount());
-    assertEquals(TimeUnit.MILLISECONDS.toMicros(1000), update.getTimestamps(0));
+    Windmill.WatermarkHold clearAndUpdate = commitBuilder.getWatermarkHolds(0);
+    assertEquals(key(NAMESPACE, "watermark"), clearAndUpdate.getTag());
+    assertEquals(1, clearAndUpdate.getTimestampsCount());
+    assertEquals(key(NAMESPACE, "watermark"), clearAndUpdate.getTag());
+    assertEquals(1, clearAndUpdate.getTimestampsCount());
+    assertEquals(TimeUnit.MILLISECONDS.toMicros(1000), clearAndUpdate.getTimestamps(0));
 
     // Clearing requires reading the future.
-    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
     Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
   @Test
   public void testWatermarkPersistEmpty() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal("watermark");
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
     WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
 
     bag.add(new Instant(500));
@@ -556,15 +720,37 @@ public void testWatermarkPersistEmpty() throws Exception {
   }
 
   @Test
-  public void testWatermarkNoStateFamilies() throws Exception {
+  public void testWatermarkNoStateFamiliesEarliest() throws Exception {
     underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, readStateSupplier);
 
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal("watermark");
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
     WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
     bag.get();
     Mockito.verify(mockReader).watermarkFuture(key(STATE_FAMILY, NAMESPACE, "watermark"), "");
   }
 
+  @Test
+  public void testWatermarkNoStateFamiliesLatest() throws Exception {
+    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, readStateSupplier);
+
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtLatestInputTimestamp());
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+    bag.get();
+    Mockito.verify(mockReader).watermarkFuture(key(STATE_FAMILY, NAMESPACE, "watermark"), "");
+  }
+
+  @Test
+  public void testWatermarkNoStateFamiliesEndOfWindow() throws Exception {
+    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, readStateSupplier);
+
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtLatestInputTimestamp());
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+    bag.get();
+    Mockito.verify(mockReader).watermarkFuture(key(STATE_FAMILY, NAMESPACE, "watermark"), "");
+  }
 
   @Test
   public void testValueSetBeforeRead() throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
index b48051bd3d4b0..90aefa01de40a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 import static com.google.cloud.dataflow.sdk.TestUtils.KvMatcher.isKv;
+import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
 import static org.hamcrest.core.Is.is;
 import static org.junit.Assert.assertThat;
@@ -36,6 +37,7 @@
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.NoopPathValidator;
@@ -43,8 +45,10 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
 import org.joda.time.Duration;
+import org.joda.time.Instant;
 import org.junit.Assert;
 import org.junit.Rule;
 import org.junit.Test;
@@ -361,6 +365,61 @@ public PCollection<KV<String, Integer>> apply(PBegin input) {
     input.apply("GroupByKey", GroupByKey.<String, Integer>create());
   }
 
+  /**
+   * Tests that when two elements are combined via a GroupByKey their output timestamp agrees
+   * with the windowing function customized to actually be the same as the default, the earlier of
+   * the two values.
+   */
+  @Test
+  public void testOutputTimeFnEarliest() {
+    Pipeline pipeline = TestPipeline.create();
+
+    pipeline.apply(
+        Create.timestamped(
+            TimestampedValue.of(KV.of(0, "hello"), new Instant(0)),
+            TimestampedValue.of(KV.of(0, "goodbye"), new Instant(10))))
+        .apply(Window.<KV<Integer, String>>into(FixedWindows.of(Duration.standardMinutes(10)))
+            .withOutputTimeFn(OutputTimeFns.outputAtEarliestInputTimestamp()))
+        .apply(GroupByKey.<Integer, String>create())
+        .apply(ParDo.of(new AssertTimestamp(new Instant(0))));
+
+    pipeline.run();
+  }
+
+
+  /**
+   * Tests that when two elements are combined via a GroupByKey their output timestamp agrees
+   * with the windowing function customized to use the latest value.
+   */
+  @Test
+  public void testOutputTimeFnLatest() {
+    Pipeline pipeline = TestPipeline.create();
+
+    pipeline.apply(
+        Create.timestamped(
+            TimestampedValue.of(KV.of(0, "hello"), new Instant(0)),
+            TimestampedValue.of(KV.of(0, "goodbye"), new Instant(10))))
+        .apply(Window.<KV<Integer, String>>into(FixedWindows.of(Duration.standardMinutes(10)))
+            .withOutputTimeFn(OutputTimeFns.outputAtLatestInputTimestamp()))
+        .apply(GroupByKey.<Integer, String>create())
+        .apply(ParDo.of(new AssertTimestamp(new Instant(10))));
+
+    pipeline.run();
+  }
+
+  private static class AssertTimestamp<K, V> extends DoFn<KV<K, V>, Void> {
+    private final Instant timestamp;
+
+    public AssertTimestamp(Instant timestamp) {
+      this.timestamp = timestamp;
+    }
+
+    @Override
+    public void processElement(ProcessContext c) throws Exception {
+      assertThat(c.timestamp(), equalTo(timestamp));
+    }
+  }
+
   @Test
   public void testGroupByKeyGetName() {
     Assert.assertEquals("GroupByKey", GroupByKey.<String, Integer>create().getName());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SessionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SessionsTest.java
index 8da9b1cbcc3a3..91049cd9d9d8f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SessionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/SessionsTest.java
@@ -18,10 +18,15 @@
 
 import static com.google.cloud.dataflow.sdk.testing.WindowFnTestUtils.runWindowFn;
 import static com.google.cloud.dataflow.sdk.testing.WindowFnTestUtils.set;
+
+import static org.hamcrest.Matchers.allOf;
+import static org.hamcrest.Matchers.containsString;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.testing.WindowFnTestUtils;
+import com.google.common.collect.ImmutableList;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -31,6 +36,7 @@
 
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
@@ -100,11 +106,51 @@ public void testEquality() {
             Sessions.withGapDuration(new Duration(20))));
   }
 
+  /**
+   * Validates that the output timestamp for aggregate data falls within the acceptable range.
+   */
   @Test
   public void testValidOutputTimes() throws Exception {
     for (long timestamp : Arrays.asList(200, 800, 700)) {
       WindowFnTestUtils.validateGetOutputTimestamp(
-          Sessions.withGapDuration(new Duration(500)), timestamp);
+          Sessions.withGapDuration(Duration.millis(500)), timestamp);
+    }
+  }
+
+  /**
+   * Test to confirm that {@link Sessions} with the default {@link OutputTimeFn} holds up the
+   * watermark potentially indefinitely.
+   */
+  @Test
+  public void testInvalidOutputAtEarliest() throws Exception {
+    try {
+      WindowFnTestUtils.<Object, IntervalWindow>validateGetOutputTimestamps(
+          Sessions.withGapDuration(Duration.millis(10)),
+          OutputTimeFns.outputAtEarliestInputTimestamp(),
+          ImmutableList.of(
+              (List<Long>) ImmutableList.of(1L, 3L),
+              (List<Long>) ImmutableList.of(0L, 5L, 10L, 15L, 20L)));
+    } catch (AssertionError exc) {
+      assertThat(
+          exc.getMessage(),
+          // These are the non-volatile pieces of the error message that a timestamp
+          // was not greater than what it should be.
+          allOf(containsString("a value greater than"), containsString("was less than")));
     }
   }
+
+  /**
+   * When a user explicitly requests per-key aggregate values have their derived timestamp to be
+   * the end of the window (instead of the earliest possible), the session here should not hold
+   * each other up, even though they overlap.
+   */
+  @Test
+  public void testValidOutputAtEndTimes() throws Exception {
+    WindowFnTestUtils.<Object, IntervalWindow>validateGetOutputTimestamps(
+        Sessions.withGapDuration(Duration.millis(10)),
+        OutputTimeFns.outputAtEndOfWindow(),
+          ImmutableList.of(
+              (List<Long>) ImmutableList.of(1L, 3L),
+              (List<Long>) ImmutableList.of(0L, 5L, 10L, 15L, 20L)));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java
index d22e7b896ca68..ac0e15886374f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java
@@ -16,20 +16,29 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
+import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Duration;
+import org.joda.time.Instant;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
@@ -37,14 +46,16 @@
 import org.junit.runners.JUnit4;
 import org.mockito.Mockito;
 
+import java.io.Serializable;
+
 /**
  * Tests for {@link Window}.
  */
 @RunWith(JUnit4.class)
-public class WindowTest {
+public class WindowTest implements Serializable {
 
   @Rule
-  public ExpectedException thrown = ExpectedException.none();
+  public transient ExpectedException thrown = ExpectedException.none();
 
   @Test
   public void testWindowIntoSetWindowfn() {
@@ -159,4 +170,53 @@ public void testMissingLateness() {
       .apply("Window", Window.<String>into(fixed10))
       .apply("Trigger", Window.<String>triggering(trigger));
   }
+
+  /**
+   * Tests that when two elements are combined via a GroupByKey their output timestamp agrees
+   * with the windowing function default, the earlier of the two values.
+   */
+  @Test
+  public void testOutputTimeFnDefault() {
+    Pipeline pipeline = TestPipeline.create();
+
+    pipeline.apply(
+        Create.timestamped(
+            TimestampedValue.of(KV.of(0, "hello"), new Instant(0)),
+            TimestampedValue.of(KV.of(0, "goodbye"), new Instant(10))))
+        .apply(Window.<KV<Integer, String>>into(FixedWindows.of(Duration.standardMinutes(10))))
+        .apply(GroupByKey.<Integer, String>create())
+        .apply(ParDo.of(new DoFn<KV<Integer, Iterable<String>>, Void>() {
+          @Override
+          public void processElement(ProcessContext c) throws Exception {
+            assertThat(c.timestamp(), equalTo(new Instant(0)));
+          }
+        }));
+
+    pipeline.run();
+  }
+
+  /**
+   * Tests that when two elements are combined via a GroupByKey their output timestamp agrees
+   * with the windowing function customized to use the end of the window.
+   */
+  @Test
+  public void testOutputTimeFnEndOfWindow() {
+    Pipeline pipeline = TestPipeline.create();
+
+    pipeline.apply(
+        Create.timestamped(
+            TimestampedValue.of(KV.of(0, "hello"), new Instant(0)),
+            TimestampedValue.of(KV.of(0, "goodbye"), new Instant(10))))
+        .apply(Window.<KV<Integer, String>>into(FixedWindows.of(Duration.standardMinutes(10)))
+            .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow()))
+        .apply(GroupByKey.<Integer, String>create())
+        .apply(ParDo.of(new DoFn<KV<Integer, Iterable<String>>, Void>() {
+          @Override
+          public void processElement(ProcessContext c) throws Exception {
+            assertThat(c.timestamp(), equalTo(new Instant(10 * 60 * 1000 - 1)));
+          }
+        }));
+
+    pipeline.run();
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFnTest.java
index 6c9cfa61f0d1a..11df7e19d099e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFnTest.java
@@ -20,7 +20,6 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsProperties.GroupAlsoByWindowsDoFnFactory;
 
 import org.junit.Test;
@@ -45,11 +44,8 @@ public GABWAndCombineDoFnFactory(
     public <W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, InputT, OutputT, W>
         forStrategy(WindowingStrategy<?, W> windowingStrategy) {
 
-      @SuppressWarnings("unchecked")
-      WindowFn<Object, W> windowFn = (WindowFn<Object, W>) windowingStrategy.getWindowFn();
-
       return new GroupAlsoByWindowsAndCombineDoFn<K, InputT, AccumT, OutputT, W>(
-          windowFn,
+          windowingStrategy,
           keyedCombineFn);
     }
   }
@@ -78,4 +74,12 @@ public void testCombinesIntoSessions() throws Exception {
         combineFn);
   }
 
+  @Test
+  public void testCombinesIntoSessionsWithEndOfWindowTimestamp() throws Exception {
+    CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
+
+    GroupAlsoByWindowsProperties.combinesElementsPerSessionWithEndOfWindowTimestamp(
+        new GABWAndCombineDoFnFactory<>(combineFn.<String>asKeyedFn()),
+        combineFn);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsProperties.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsProperties.java
index d725e0caf3c74..10af67d68b9b7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsProperties.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsProperties.java
@@ -19,7 +19,6 @@
 import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.equalTo;
-import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 
 import com.google.cloud.dataflow.sdk.TestUtils.KvMatcher;
@@ -29,6 +28,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
@@ -36,12 +37,15 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
 import java.util.List;
 
 /**
@@ -74,52 +78,29 @@ public static <K, InputT, OutputT> void emptyInputEmptyOutput(
 
     WindowingStrategy<?, IntervalWindow> windowingStrategy =
         WindowingStrategy.of(FixedWindows.of(Duration.millis(10)));
-    TupleTag<KV<K, OutputT>> outputTag = new TupleTag<>();
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-
-    DoFnRunner<?, KV<K, OutputT>> runner =
-        makeRunner(
-            gabwFactory.forStrategy(windowingStrategy),
-            windowingStrategy,
-            outputTag,
-            outputManager);
 
-    runner.startBundle();
+    List<?> result = runGABW(
+        gabwFactory,
+        windowingStrategy,
+        (K) null, // key should never be used
+        Collections.<WindowedValue<InputT>>emptyList());
 
-    runner.finishBundle();
-
-    List<WindowedValue<KV<K, OutputT>>> result = outputManager.getOutput(outputTag);
-
-    assertEquals(0, result.size());
+    assertThat(result.size(), equalTo(0));
   }
 
   /**
    * Tests that for a simple sequence of elements on the same key, the given GABW implementation
    * correctly groups them according to fixed windows.
-   *
-   * <p>The notable specialized property of this input is that each element occurs in a single
-   * window.
    */
   public static void groupsElementsIntoFixedWindows(
       GroupAlsoByWindowsDoFnFactory<String, String, Iterable<String>> gabwFactory)
           throws Exception {
 
-    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     WindowingStrategy<?, IntervalWindow> windowingStrategy =
         WindowingStrategy.of(FixedWindows.of(Duration.millis(10)));
 
-    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>, KV<String, Iterable<String>>> runner =
-        makeRunner(
-            gabwFactory.forStrategy(windowingStrategy),
-            windowingStrategy,
-            outputTag,
-            outputManager);
-
-    runner.startBundle();
-
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        KV.of("k", (Iterable<WindowedValue<String>>) Arrays.asList(
+    List<WindowedValue<KV<String, Iterable<String>>>> result =
+        runGABW(gabwFactory, windowingStrategy, "key",
             WindowedValue.of(
                 "v1",
                 new Instant(1),
@@ -134,24 +115,18 @@ public static void groupsElementsIntoFixedWindows(
                 "v3",
                 new Instant(13),
                 Arrays.asList(window(10, 20)),
-                PaneInfo.NO_FIRING)))));
+                PaneInfo.NO_FIRING));
 
-    runner.finishBundle();
-
-    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
-
-    assertEquals(2, result.size());
+    assertThat(result.size(), equalTo(2));
 
     WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
-    assertEquals("k", item0.getValue().getKey());
     assertThat(item0.getValue().getValue(), containsInAnyOrder("v1", "v2"));
-    assertEquals(new Instant(1), item0.getTimestamp());
+    assertThat(item0.getTimestamp(), equalTo(new Instant(1)));
     assertThat(item0.getWindows(), contains(window(0, 10)));
 
     WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
-    assertEquals("k", item1.getValue().getKey());
     assertThat(item1.getValue().getValue(), contains("v3"));
-    assertEquals(new Instant(13), item1.getTimestamp());
+    assertThat(item1.getTimestamp(), equalTo(new Instant(13)));
     assertThat(item1.getWindows(),
         contains(window(10, 20)));
   }
@@ -166,22 +141,11 @@ public static void groupsElementsIntoSlidingWindows(
       GroupAlsoByWindowsDoFnFactory<String, String, Iterable<String>> gabwFactory)
           throws Exception {
 
-    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     WindowingStrategy<?, IntervalWindow> windowingStrategy = WindowingStrategy.of(
         SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10)));
 
-    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>, KV<String, Iterable<String>>> runner =
-        makeRunner(
-            gabwFactory.forStrategy(windowingStrategy),
-            windowingStrategy,
-            outputTag,
-            outputManager);
-
-    runner.startBundle();
-
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        KV.of("k", (Iterable<WindowedValue<String>>) Arrays.asList(
+    List<WindowedValue<KV<String, Iterable<String>>>> result =
+        runGABW(gabwFactory, windowingStrategy, "key",
             WindowedValue.of(
                 "v1",
                 new Instant(5),
@@ -191,32 +155,25 @@ public static void groupsElementsIntoSlidingWindows(
                 "v2",
                 new Instant(15),
                 Arrays.asList(window(0, 20), window(10, 30)),
-                PaneInfo.NO_FIRING)))));
-
-    runner.finishBundle();
+                PaneInfo.NO_FIRING));
 
-    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
-
-    assertEquals(3, result.size());
+    assertThat(result.size(), equalTo(3));
 
     WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
-    assertEquals("k", item0.getValue().getKey());
     assertThat(item0.getValue().getValue(), contains("v1"));
-    assertEquals(new Instant(5), item0.getTimestamp());
+    assertThat(item0.getTimestamp(), equalTo(new Instant(5)));
     assertThat(item0.getWindows(),
         contains(window(-10, 10)));
 
     WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
-    assertEquals("k", item1.getValue().getKey());
     assertThat(item1.getValue().getValue(), containsInAnyOrder("v1", "v2"));
-    assertEquals(new Instant(10), item1.getTimestamp());
+    assertThat(item1.getTimestamp(), equalTo(new Instant(10)));
     assertThat(item1.getWindows(),
         contains(window(0, 20)));
 
     WindowedValue<KV<String, Iterable<String>>> item2 = result.get(2);
-    assertEquals("k", item2.getValue().getKey());
     assertThat(item2.getValue().getValue(), contains("v2"));
-    assertEquals(new Instant(20), item2.getTimestamp());
+    assertThat(item2.getTimestamp(), equalTo(new Instant(20)));
     assertThat(item2.getWindows(),
         contains(window(10, 30)));
   }
@@ -232,22 +189,11 @@ public static void combinesElementsInSlidingWindows(
       CombineFn<Long, ?, Long> combineFn)
           throws Exception {
 
-    TupleTag<KV<String, Long>> outputTag = new TupleTag<>();
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     WindowingStrategy<?, IntervalWindow> windowingStrategy = WindowingStrategy.of(
         SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10)));
 
-    DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>, KV<String, Long>> runner =
-        makeRunner(
-            gabwFactory.forStrategy(windowingStrategy),
-            windowingStrategy,
-            outputTag,
-            outputManager);
-
-    runner.startBundle();
-
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        KV.of("k", (Iterable<WindowedValue<Long>>) Arrays.asList(
+    List<WindowedValue<KV<String, Long>>> result =
+        runGABW(gabwFactory, windowingStrategy, "k",
             WindowedValue.of(
                 1L,
                 new Instant(5),
@@ -262,13 +208,9 @@ public static void combinesElementsInSlidingWindows(
                 4L,
                 new Instant(18),
                 Arrays.asList(window(0, 20), window(10, 30)),
-                PaneInfo.NO_FIRING)))));
+                PaneInfo.NO_FIRING));
 
-    runner.finishBundle();
-
-    List<WindowedValue<KV<String, Long>>> result = outputManager.getOutput(outputTag);
-
-    assertEquals(3, result.size());
+    assertThat(result.size(), equalTo(3));
 
     assertThat(result, contains(
         WindowMatchers.isSingleWindowedValue(
@@ -302,22 +244,11 @@ public static void groupsIntoOverlappingNonmergingWindows(
       GroupAlsoByWindowsDoFnFactory<String, String, Iterable<String>> gabwFactory)
           throws Exception {
 
-    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     WindowingStrategy<?, IntervalWindow> windowingStrategy =
         WindowingStrategy.of(FixedWindows.of(Duration.millis(10)));
 
-    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>, KV<String, Iterable<String>>> runner =
-        makeRunner(
-            gabwFactory.forStrategy(windowingStrategy),
-            windowingStrategy,
-            outputTag,
-            outputManager);
-
-    runner.startBundle();
-
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        KV.of("k", (Iterable<WindowedValue<String>>) Arrays.asList(
+    List<WindowedValue<KV<String, Iterable<String>>>> result =
+        runGABW(gabwFactory, windowingStrategy, "key",
             WindowedValue.of(
                 "v1",
                 new Instant(1),
@@ -332,25 +263,19 @@ public static void groupsIntoOverlappingNonmergingWindows(
                 "v3",
                 new Instant(4),
                 Arrays.asList(window(0, 5)),
-                PaneInfo.NO_FIRING)))));
-
-    runner.finishBundle();
+                PaneInfo.NO_FIRING));
 
-    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
-
-    assertEquals(2, result.size());
+    assertThat(result.size(), equalTo(2));
 
     WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
-    assertEquals("k", item0.getValue().getKey());
     assertThat(item0.getValue().getValue(), containsInAnyOrder("v1", "v3"));
-    assertEquals(new Instant(1), item0.getTimestamp());
+    assertThat(item0.getTimestamp(), equalTo(new Instant(1)));
     assertThat(item0.getWindows(),
         contains(window(0, 5)));
 
     WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
-    assertEquals("k", item1.getValue().getKey());
     assertThat(item1.getValue().getValue(), contains("v2"));
-    assertEquals(new Instant(4), item1.getTimestamp());
+    assertThat(item1.getTimestamp(), equalTo(new Instant(4)));
     assertThat(item1.getWindows(),
         contains(window(1, 5)));
   }
@@ -362,22 +287,11 @@ public static void groupsElementsInMergedSessions(
       GroupAlsoByWindowsDoFnFactory<String, String, Iterable<String>> gabwFactory)
           throws Exception {
 
-    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     WindowingStrategy<?, IntervalWindow> windowingStrategy =
         WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)));
 
-    DoFnRunner<KV<String, Iterable<WindowedValue<String>>>, KV<String, Iterable<String>>> runner =
-        makeRunner(
-            gabwFactory.forStrategy(windowingStrategy),
-            windowingStrategy,
-            outputTag,
-            outputManager);
-
-    runner.startBundle();
-
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        KV.of("k", (Iterable<WindowedValue<String>>) Arrays.asList(
+    List<WindowedValue<KV<String, Iterable<String>>>> result =
+        runGABW(gabwFactory, windowingStrategy, "key",
             WindowedValue.of(
                 "v1",
                 new Instant(0),
@@ -392,25 +306,19 @@ public static void groupsElementsInMergedSessions(
                 "v3",
                 new Instant(15),
                 Arrays.asList(window(15, 25)),
-                PaneInfo.NO_FIRING)))));
-
-    runner.finishBundle();
-
-    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
+                PaneInfo.NO_FIRING));
 
-    assertEquals(2, result.size());
+    assertThat(result.size(), equalTo(2));
 
     WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
-    assertEquals("k", item0.getValue().getKey());
     assertThat(item0.getValue().getValue(), containsInAnyOrder("v1", "v2"));
-    assertEquals(new Instant(0), item0.getTimestamp());
+    assertThat(item0.getTimestamp(), equalTo(new Instant(0)));
     assertThat(item0.getWindows(),
         contains(window(0, 15)));
 
     WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
-    assertEquals("k", item1.getValue().getKey());
     assertThat(item1.getValue().getValue(), contains("v3"));
-    assertEquals(new Instant(15), item1.getTimestamp());
+    assertThat(item1.getTimestamp(), equalTo(new Instant(15)));
     assertThat(item1.getWindows(),
         contains(window(15, 25)));
   }
@@ -424,22 +332,11 @@ public static void combinesElementsPerSession(
       CombineFn<Long, ?, Long> combineFn)
           throws Exception {
 
-    TupleTag<KV<String, Long>> outputTag = new TupleTag<>();
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
     WindowingStrategy<?, IntervalWindow> windowingStrategy =
         WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)));
 
-    DoFnRunner<KV<String, Iterable<WindowedValue<Long>>>, KV<String, Long>> runner =
-        makeRunner(
-            gabwFactory.forStrategy(windowingStrategy),
-            windowingStrategy,
-            outputTag,
-            outputManager);
-
-    runner.startBundle();
-
-    runner.processElement(WindowedValue.valueInEmptyWindows(
-        KV.of("k", (Iterable<WindowedValue<Long>>) Arrays.asList(
+    List<WindowedValue<KV<String, Long>>> result =
+        runGABW(gabwFactory, windowingStrategy, "k",
             WindowedValue.of(
                 1L,
                 new Instant(0),
@@ -454,11 +351,7 @@ public static void combinesElementsPerSession(
                 4L,
                 new Instant(15),
                 Arrays.asList(window(15, 25)),
-                PaneInfo.NO_FIRING)))));
-
-    runner.finishBundle();
-
-    List<WindowedValue<KV<String, Long>>> result = outputManager.getOutput(outputTag);
+                PaneInfo.NO_FIRING));
 
     assertThat(result, contains(
         WindowMatchers.isSingleWindowedValue(
@@ -477,6 +370,340 @@ public static void combinesElementsPerSession(
             25))); // window end
   }
 
+  /**
+   * Tests that for a simple sequence of elements on the same key, the given GABW implementation
+   * correctly groups them according to fixed windows and also sets the output timestamp
+   * according to the policy {@link OutputTimeFns#outputAtEndOfWindow()}.
+   */
+  public static void groupsElementsIntoFixedWindowsWithEndOfWindowTimestamp(
+      GroupAlsoByWindowsDoFnFactory<String, String, Iterable<String>> gabwFactory)
+          throws Exception {
+
+    WindowingStrategy<?, IntervalWindow> windowingStrategy =
+        WindowingStrategy.of(FixedWindows.of(Duration.millis(10)))
+        .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow());
+
+    List<WindowedValue<KV<String, Iterable<String>>>> result =
+        runGABW(gabwFactory, windowingStrategy, "key",
+            WindowedValue.of(
+                "v1",
+                new Instant(1),
+                Arrays.asList(window(0, 10)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                "v2",
+                new Instant(2),
+                Arrays.asList(window(0, 10)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                "v3",
+                new Instant(13),
+                Arrays.asList(window(10, 20)),
+                PaneInfo.NO_FIRING));
+
+    assertThat(result.size(), equalTo(2));
+
+    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
+    assertThat(item0.getValue().getValue(), containsInAnyOrder("v1", "v2"));
+    assertThat(item0.getTimestamp(), equalTo(window(0, 10).maxTimestamp()));
+    assertThat(item0.getTimestamp(),
+        equalTo(Iterables.getOnlyElement(item0.getWindows()).maxTimestamp()));
+
+    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
+    assertThat(item1.getValue().getValue(), contains("v3"));
+    assertThat(item1.getTimestamp(), equalTo(window(10, 20).maxTimestamp()));
+    assertThat(item1.getTimestamp(),
+        equalTo(Iterables.getOnlyElement(item1.getWindows()).maxTimestamp()));
+  }
+
+  /**
+   * Tests that for a simple sequence of elements on the same key, the given GABW implementation
+   * correctly groups them according to fixed windows and also sets the output timestamp
+   * according to a custom {@link OutputTimeFn}.
+   */
+  public static void groupsElementsIntoFixedWindowsWithCustomTimestamp(
+      GroupAlsoByWindowsDoFnFactory<String, String, Iterable<String>> gabwFactory)
+          throws Exception {
+
+    WindowingStrategy<?, IntervalWindow> windowingStrategy =
+        WindowingStrategy.of(FixedWindows.of(Duration.millis(10)))
+        .withOutputTimeFn(new OutputTimeFn.Defaults<IntervalWindow>() {
+
+          @Override
+          public Instant assignOutputTime(Instant inputTimestamp, IntervalWindow window) {
+            return window.start();
+          }
+
+          @Override
+          public Instant combine(Instant outputTime, Instant otherOutputTime) {
+            return outputTime;
+          }
+
+          @Override
+          public boolean dependsOnlyOnEarliestInputTimestamp() {
+            return true;
+          }
+        });
+
+    List<WindowedValue<KV<String, Iterable<String>>>> result =
+        runGABW(gabwFactory, windowingStrategy, "key",
+            WindowedValue.of(
+                "v1",
+                new Instant(1),
+                Arrays.asList(window(0, 10)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                "v2",
+                new Instant(2),
+                Arrays.asList(window(0, 10)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                "v3",
+                new Instant(13),
+                Arrays.asList(window(10, 20)),
+                PaneInfo.NO_FIRING));
+
+    assertThat(result.size(), equalTo(2));
+
+    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
+    assertThat(item0.getValue().getValue(), containsInAnyOrder("v1", "v2"));
+    assertThat(item0.getWindows(), contains(window(0, 10)));
+    assertThat(item0.getTimestamp(),
+        equalTo(((IntervalWindow) Iterables.getOnlyElement(item0.getWindows())).start()));
+
+    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
+    assertThat(item1.getValue().getValue(), contains("v3"));
+    assertThat(item1.getWindows(), contains(window(10, 20)));
+    assertThat(item1.getTimestamp(),
+        equalTo(((IntervalWindow) Iterables.getOnlyElement(item1.getWindows())).start()));
+  }
+
+
+  /**
+   * Tests that for a simple sequence of elements on the same key, the given GABW implementation
+   * correctly groups them according to fixed windows and also sets the output timestamp
+   * according to the policy {@link OutputTimeFns#outputAtLatestInputTimestamp()}.
+   */
+  public static void groupsElementsIntoFixedWindowsWithLatestTimestamp(
+      GroupAlsoByWindowsDoFnFactory<String, String, Iterable<String>> gabwFactory)
+          throws Exception {
+
+    WindowingStrategy<?, IntervalWindow> windowingStrategy =
+        WindowingStrategy.of(FixedWindows.of(Duration.millis(10)))
+        .withOutputTimeFn(OutputTimeFns.outputAtLatestInputTimestamp());
+
+    List<WindowedValue<KV<String, Iterable<String>>>> result =
+        runGABW(gabwFactory, windowingStrategy, "k",
+            WindowedValue.of(
+                "v1",
+                new Instant(1),
+                Arrays.asList(window(0, 10)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                "v2",
+                new Instant(2),
+                Arrays.asList(window(0, 10)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                "v3",
+                new Instant(13),
+                Arrays.asList(window(10, 20)),
+                PaneInfo.NO_FIRING));
+
+    assertThat(result.size(), equalTo(2));
+
+    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
+    assertThat(item0.getValue().getValue(), containsInAnyOrder("v1", "v2"));
+    assertThat(item0.getWindows(), contains(window(0, 10)));
+    assertThat(item0.getTimestamp(), equalTo(new Instant(2)));
+
+    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
+    assertThat(item1.getValue().getValue(), contains("v3"));
+    assertThat(item1.getWindows(), contains(window(10, 20)));
+    assertThat(item1.getTimestamp(), equalTo(new Instant(13)));
+  }
+
+  /**
+   * Tests that the given GABW implementation correctly groups elements into merged sessions
+   * with output timestamps at the end of the merged window.
+   */
+  public static void groupsElementsInMergedSessionsWithEndOfWindowTimestamp(
+      GroupAlsoByWindowsDoFnFactory<String, String, Iterable<String>> gabwFactory)
+          throws Exception {
+
+    WindowingStrategy<?, IntervalWindow> windowingStrategy =
+        WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)))
+            .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow());
+
+    List<WindowedValue<KV<String, Iterable<String>>>> result =
+        runGABW(gabwFactory, windowingStrategy, "k",
+            WindowedValue.of(
+                "v1",
+                new Instant(0),
+                Arrays.asList(window(0, 10)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                "v2",
+                new Instant(5),
+                Arrays.asList(window(5, 15)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                "v3",
+                new Instant(15),
+                Arrays.asList(window(15, 25)),
+                PaneInfo.NO_FIRING));
+
+    assertThat(result.size(), equalTo(2));
+
+    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
+    assertThat(item0.getValue().getValue(), containsInAnyOrder("v1", "v2"));
+    assertThat(item0.getWindows(), contains(window(0, 15)));
+    assertThat(item0.getTimestamp(),
+        equalTo(Iterables.getOnlyElement(item0.getWindows()).maxTimestamp()));
+
+    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
+    assertThat(item1.getValue().getValue(), contains("v3"));
+    assertThat(item1.getWindows(), contains(window(15, 25)));
+    assertThat(item1.getTimestamp(),
+        equalTo(Iterables.getOnlyElement(item1.getWindows()).maxTimestamp()));
+  }
+
+  /**
+   * Tests that the given GABW implementation correctly groups elements into merged sessions
+   * with output timestamps at the end of the merged window.
+   */
+  public static void groupsElementsInMergedSessionsWithLatestTimestamp(
+      GroupAlsoByWindowsDoFnFactory<String, String, Iterable<String>> gabwFactory)
+          throws Exception {
+
+    WindowingStrategy<?, IntervalWindow> windowingStrategy =
+        WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)))
+            .withOutputTimeFn(OutputTimeFns.outputAtLatestInputTimestamp());
+
+    List<WindowedValue<KV<String, Iterable<String>>>> result =
+        runGABW(gabwFactory, windowingStrategy, "k",
+            WindowedValue.of(
+                "v1",
+                new Instant(0),
+                Arrays.asList(window(0, 10)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                "v2",
+                new Instant(5),
+                Arrays.asList(window(5, 15)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                "v3",
+                new Instant(15),
+                Arrays.asList(window(15, 25)),
+                PaneInfo.NO_FIRING));
+
+    assertThat(result.size(), equalTo(2));
+
+    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
+    assertThat(item0.getValue().getValue(), containsInAnyOrder("v1", "v2"));
+    assertThat(item0.getWindows(), contains(window(0, 15)));
+    assertThat(item0.getTimestamp(), equalTo(new Instant(5)));
+
+    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
+    assertThat(item1.getValue().getValue(), contains("v3"));
+    assertThat(item1.getWindows(), contains(window(15, 25)));
+    assertThat(item1.getTimestamp(), equalTo(new Instant(15)));
+  }
+
+  /**
+   * Tests that the given {@link GroupAlsoByWindowsDoFn} implementation combines elements per
+   * session window correctly according to the provided {@link CombineFn}.
+   */
+  public static void combinesElementsPerSessionWithEndOfWindowTimestamp(
+      GroupAlsoByWindowsDoFnFactory<String, Long, Long> gabwFactory,
+      CombineFn<Long, ?, Long> combineFn)
+          throws Exception {
+
+    WindowingStrategy<?, IntervalWindow> windowingStrategy =
+        WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)))
+        .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow());
+
+
+    List<WindowedValue<KV<String, Long>>> result =
+        runGABW(gabwFactory, windowingStrategy, "k",
+            WindowedValue.of(
+                1L,
+                new Instant(0),
+                Arrays.asList(window(0, 10)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                2L,
+                new Instant(5),
+                Arrays.asList(window(5, 15)),
+                PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                4L,
+                new Instant(15),
+                Arrays.asList(window(15, 25)),
+                PaneInfo.NO_FIRING));
+
+    assertThat(result.size(), equalTo(2));
+
+    WindowedValue<KV<String, Long>> item0 = result.get(0);
+    assertThat(item0.getValue().getValue(), equalTo(combineFn.apply(ImmutableList.of(1L, 2L))));
+    assertThat(item0.getWindows(), contains(window(0, 15)));
+    assertThat(item0.getTimestamp(),
+        equalTo(Iterables.getOnlyElement(item0.getWindows()).maxTimestamp()));
+
+    WindowedValue<KV<String, Long>> item1 = result.get(1);
+    assertThat(item1.getValue().getValue(), equalTo(combineFn.apply(ImmutableList.of(4L))));
+    assertThat(item1.getWindows(), contains(window(15, 25)));
+    assertThat(item1.getTimestamp(),
+        equalTo(Iterables.getOnlyElement(item1.getWindows()).maxTimestamp()));
+  }
+
+  @SafeVarargs
+  private static <K, InputT, OutputT, W extends BoundedWindow>
+  List<WindowedValue<KV<K, OutputT>>> runGABW(
+      GroupAlsoByWindowsDoFnFactory<K, InputT, OutputT> gabwFactory,
+      WindowingStrategy<?, W> windowingStrategy,
+      K key,
+      WindowedValue<InputT>... values) {
+    return runGABW(gabwFactory, windowingStrategy, key, Arrays.asList(values));
+  }
+
+  private static <K, InputT, OutputT, W extends BoundedWindow>
+  List<WindowedValue<KV<K, OutputT>>> runGABW(
+      GroupAlsoByWindowsDoFnFactory<K, InputT, OutputT> gabwFactory,
+      WindowingStrategy<?, W> windowingStrategy,
+      K key,
+      Collection<WindowedValue<InputT>> values) {
+
+    TupleTag<KV<K, OutputT>> outputTag = new TupleTag<>();
+    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
+
+    DoFnRunner<KV<K, Iterable<WindowedValue<InputT>>>, KV<K, OutputT>> runner =
+        makeRunner(
+            gabwFactory.forStrategy(windowingStrategy),
+            windowingStrategy,
+            outputTag,
+            outputManager);
+
+    runner.startBundle();
+
+    if (values.size() > 0) {
+      runner.processElement(WindowedValue.valueInEmptyWindows(
+          KV.of(key, (Iterable<WindowedValue<InputT>>) values)));
+    }
+
+    runner.finishBundle();
+
+    List<WindowedValue<KV<K, OutputT>>> result = outputManager.getOutput(outputTag);
+
+    // Sanity check for corruption
+    for (WindowedValue<KV<K, OutputT>> elem : result) {
+      assertThat(elem.getValue().getKey(), equalTo(key));
+    }
+
+    return result;
+  }
+
   private static <K, InputT, OutputT, W extends BoundedWindow>
   DoFnRunner<KV<K, Iterable<WindowedValue<InputT>>>, KV<K, OutputT>>
       makeRunner(
@@ -503,4 +730,5 @@ public static void combinesElementsPerSession(
   private static BoundedWindow window(long start, long end) {
     return new IntervalWindow(new Instant(start), new Instant(end));
   }
+
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFnTest.java
index e53a4219124f3..41341443551cc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFnTest.java
@@ -69,6 +69,28 @@ public void testGroupsIntoOverlappingNonmergingWindows() throws Exception {
         new GABWViaIteratorsDoFnFactory<String, String>());
   }
 
+  @Test
+  public void testGroupsElementsIntoFixedWindowsWithEndOfWindowTimestamp() throws Exception {
+    GroupAlsoByWindowsProperties.groupsElementsIntoFixedWindowsWithEndOfWindowTimestamp(
+        new GABWViaIteratorsDoFnFactory<String, String>());
+  }
+
+  @Test
+  public void testLatestTimestampNotSupported() throws Exception {
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("OutputTimeFn");
+    thrown.expectMessage("not support");
+
+    GroupAlsoByWindowsProperties.groupsElementsIntoFixedWindowsWithLatestTimestamp(
+        new GABWViaIteratorsDoFnFactory<String, String>());
+  }
+
+  @Test
+  public void testGroupsElementsIntoFixedWindowsWithCustomTimestamp() throws Exception {
+    GroupAlsoByWindowsProperties.groupsElementsIntoFixedWindowsWithCustomTimestamp(
+        new GABWViaIteratorsDoFnFactory<String, String>());
+  }
+
   @Test
   public void testMergingNotSupported() throws Exception {
     thrown.expect(IllegalArgumentException.class);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFnTest.java
index 43ab7685963ee..6109bf744210c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFnTest.java
@@ -132,4 +132,48 @@ public void testCombinesIntoSessions() throws Exception {
             appliedFn),
         combineFn);
   }
+
+  @Test
+  public void testGroupsElementsIntoFixedWindowsWithEndOfWindowTimestamp() throws Exception {
+    GroupAlsoByWindowsProperties.groupsElementsIntoFixedWindowsWithEndOfWindowTimestamp(
+        new BufferingGABWViaOutputBufferDoFnFactory<String, String>(StringUtf8Coder.of()));
+  }
+
+  @Test
+  public void testGroupsElementsIntoFixedWindowsWithLatestTimestamp() throws Exception {
+    GroupAlsoByWindowsProperties.groupsElementsIntoFixedWindowsWithLatestTimestamp(
+        new BufferingGABWViaOutputBufferDoFnFactory<String, String>(StringUtf8Coder.of()));
+  }
+
+  @Test
+  public void testGroupsElementsIntoFixedWindowsWithCustomTimestamp() throws Exception {
+    GroupAlsoByWindowsProperties.groupsElementsIntoFixedWindowsWithCustomTimestamp(
+        new BufferingGABWViaOutputBufferDoFnFactory<String, String>(StringUtf8Coder.of()));
+  }
+
+  @Test
+  public void testGroupsElementsIntoSessionsWithEndOfWindowTimestamp() throws Exception {
+    GroupAlsoByWindowsProperties.groupsElementsInMergedSessionsWithEndOfWindowTimestamp(
+        new BufferingGABWViaOutputBufferDoFnFactory<String, String>(StringUtf8Coder.of()));
+  }
+
+  @Test
+  public void testGroupsElementsIntoSessionsWithLatestTimestamp() throws Exception {
+    GroupAlsoByWindowsProperties.groupsElementsInMergedSessionsWithLatestTimestamp(
+        new BufferingGABWViaOutputBufferDoFnFactory<String, String>(StringUtf8Coder.of()));
+  }
+
+  @Test
+  public void testCombinesIntoSessionsWithEndOfWindowTimestamp() throws Exception {
+    CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
+    AppliedCombineFn<String, Long, ?, Long> appliedFn = AppliedCombineFn.withInputCoder(
+        combineFn.<String>asKeyedFn(), new CoderRegistry(),
+        KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()));
+
+    GroupAlsoByWindowsProperties.combinesElementsPerSessionWithEndOfWindowTimestamp(
+        new CombiningGABWViaOutputBufferDoFnFactory<>(
+            StringUtf8Coder.of(),
+            appliedFn),
+        combineFn);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 531488bf5a375..f5bef0f3da40f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -190,7 +190,8 @@ public final void assertHasOnlyGlobalAndFinishedSetsAndPaneInfoFor(W... expected
         ImmutableSet.<StateTag<?>>of(
             TriggerRunner.FINISHED_BITS_TAG,
             PaneInfoTracker.PANE_INFO_TAG,
-            WatermarkHold.DATA_HOLD_TAG));
+            WatermarkHold.watermarkHoldTagForOutputTimeFn(
+                objectStrategy.getOutputTimeFn())));
   }
 
   public final void assertHasOnlyGlobalState() {
@@ -202,7 +203,10 @@ public final void assertHasOnlyGlobalState() {
   public final void assertHasOnlyGlobalAndPaneInfoFor(W... expectedWindows) {
     assertHasOnlyGlobalAndAllowedTags(
         ImmutableSet.copyOf(expectedWindows),
-        ImmutableSet.<StateTag<?>>of(PaneInfoTracker.PANE_INFO_TAG, WatermarkHold.DATA_HOLD_TAG));
+        ImmutableSet.<StateTag<?>>of(
+            PaneInfoTracker.PANE_INFO_TAG,
+            WatermarkHold.watermarkHoldTagForOutputTimeFn(
+                objectStrategy.getOutputTimeFn())));
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
index 4b1e162d4e9cd..ee09e08c37ebd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
@@ -22,6 +22,9 @@
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Instant;
@@ -37,6 +40,8 @@
 @RunWith(JUnit4.class)
 public class InMemoryStateInternalsTest {
 
+  private static final BoundedWindow WINDOW_1 = new IntervalWindow(new Instant(0), new Instant(10));
+  private static final BoundedWindow WINDOW_3 = new IntervalWindow(new Instant(5), new Instant(20));
   private static final StateNamespace NAMESPACE_1 = new StateNamespaceForTest("ns1");
   private static final StateNamespace NAMESPACE_2 = new StateNamespaceForTest("ns2");
   private static final StateNamespace NAMESPACE_3 = new StateNamespaceForTest("ns3");
@@ -48,8 +53,15 @@ public class InMemoryStateInternalsTest {
           "sumInteger", VarIntCoder.of(), new Sum.SumIntegerFn());
   private static final StateTag<BagState<String>> STRING_BAG_ADDR =
       StateTags.bag("stringBag", StringUtf8Coder.of());
-  private static final StateTag<WatermarkStateInternal> WATERMARK_BAG_ADDR =
-      StateTags.watermarkStateInternal("watermark");
+  private static final StateTag<WatermarkStateInternal> WATERMARK_EARLIEST_ADDR =
+      StateTags.watermarkStateInternal("watermark",
+          OutputTimeFns.outputAtEarliestInputTimestamp());
+  private static final StateTag<WatermarkStateInternal> WATERMARK_LATEST_ADDR =
+      StateTags.watermarkStateInternal("watermark",
+          OutputTimeFns.outputAtLatestInputTimestamp());
+  private static final StateTag<WatermarkStateInternal> WATERMARK_EOW_ADDR =
+      StateTags.watermarkStateInternal("watermark",
+          OutputTimeFns.outputAtEndOfWindow());
 
   InMemoryStateInternals underTest = new InMemoryStateInternals();
 
@@ -120,7 +132,7 @@ public void testMergeBagIntoSource() throws Exception {
     bag1.add("!");
 
     BagState<String> merged = underTest.mergedState(
-        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_1, STRING_BAG_ADDR);
+        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_1, STRING_BAG_ADDR, WINDOW_1);
 
     // Reading the merged bag gets both the contents
     assertThat(merged.get().read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
@@ -142,7 +154,7 @@ public void testMergeBagIntoNewNamespace() throws Exception {
     bag1.add("!");
 
     BagState<String> merged = underTest.mergedState(
-        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_3, STRING_BAG_ADDR);
+        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_3, STRING_BAG_ADDR, WINDOW_3);
 
     // Reading the merged bag gets both the contents
     assertThat(merged.get().read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
@@ -204,7 +216,7 @@ public void testMergeCombiningValueIntoSource() throws Exception {
     assertThat(value2.get().read(), Matchers.equalTo(10));
 
     CombiningValueState<Integer, Integer> merged = underTest.mergedState(
-        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_1, SUM_INTEGER_ADDR);
+        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_1, SUM_INTEGER_ADDR, WINDOW_1);
 
     assertThat(value1.get().read(), Matchers.equalTo(11));
     assertThat(value2.get().read(), Matchers.equalTo(10));
@@ -233,7 +245,7 @@ public void testMergeCombiningValueIntoNewNamespace() throws Exception {
     assertThat(value2.get().read(), Matchers.equalTo(10));
 
     CombiningValueState<Integer, Integer> merged = underTest.mergedState(
-        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_3, SUM_INTEGER_ADDR);
+        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_3, SUM_INTEGER_ADDR, WINDOW_3);
 
     assertThat(value1.get().read(), Matchers.equalTo(11));
     assertThat(value2.get().read(), Matchers.equalTo(10));
@@ -253,12 +265,12 @@ public void testMergeCombiningValueIntoNewNamespace() throws Exception {
   }
 
   @Test
-  public void testWatermarkState() throws Exception {
-    WatermarkStateInternal value = underTest.state(NAMESPACE_1, WATERMARK_BAG_ADDR);
+  public void testWatermarkEarliestState() throws Exception {
+    WatermarkStateInternal value = underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
 
     // State instances are cached, but depend on the namespace.
-    assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_BAG_ADDR));
-    assertFalse(value.equals(underTest.state(NAMESPACE_2, WATERMARK_BAG_ADDR)));
+    assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR));
+    assertFalse(value.equals(underTest.state(NAMESPACE_2, WATERMARK_EARLIEST_ADDR)));
 
     assertThat(value.get().read(), Matchers.nullValue());
     StateContents<Instant> readFuture = value.get();
@@ -276,12 +288,58 @@ public void testWatermarkState() throws Exception {
 
     value.clear();
     assertThat(readFuture.read(), Matchers.equalTo(null));
-    assertThat(underTest.state(NAMESPACE_1, WATERMARK_BAG_ADDR), Matchers.sameInstance(value));
+    assertThat(underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR), Matchers.sameInstance(value));
+  }
+
+  @Test
+  public void testWatermarkLatestState() throws Exception {
+    WatermarkStateInternal value = underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR);
+
+    // State instances are cached, but depend on the namespace.
+    assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR));
+    assertFalse(value.equals(underTest.state(NAMESPACE_2, WATERMARK_LATEST_ADDR)));
+
+    assertThat(value.get().read(), Matchers.nullValue());
+    StateContents<Instant> readFuture = value.get();
+    value.add(new Instant(2000));
+    assertThat(readFuture.read(), Matchers.equalTo(new Instant(2000)));
+    assertThat(value.get().read(), Matchers.equalTo(new Instant(2000)));
+
+    value.add(new Instant(3000));
+    assertThat(readFuture.read(), Matchers.equalTo(new Instant(3000)));
+    assertThat(value.get().read(), Matchers.equalTo(new Instant(3000)));
+
+    value.add(new Instant(1000));
+    assertThat(readFuture.read(), Matchers.equalTo(new Instant(3000)));
+    assertThat(value.get().read(), Matchers.equalTo(new Instant(3000)));
+
+    value.clear();
+    assertThat(readFuture.read(), Matchers.equalTo(null));
+    assertThat(underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR), Matchers.sameInstance(value));
+  }
+
+  @Test
+  public void testWatermarkEndOfWindowState() throws Exception {
+    WatermarkStateInternal value = underTest.state(NAMESPACE_1, WATERMARK_EOW_ADDR);
+
+    // State instances are cached, but depend on the namespace.
+    assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_EOW_ADDR));
+    assertFalse(value.equals(underTest.state(NAMESPACE_2, WATERMARK_EOW_ADDR)));
+
+    assertThat(value.get().read(), Matchers.nullValue());
+    StateContents<Instant> readFuture = value.get();
+    value.add(new Instant(2000));
+    assertThat(readFuture.read(), Matchers.equalTo(new Instant(2000)));
+    assertThat(value.get().read(), Matchers.equalTo(new Instant(2000)));
+
+    value.clear();
+    assertThat(readFuture.read(), Matchers.equalTo(null));
+    assertThat(underTest.state(NAMESPACE_1, WATERMARK_EOW_ADDR), Matchers.sameInstance(value));
   }
 
   @Test
   public void testWatermarkStateIsEmpty() throws Exception {
-    WatermarkStateInternal value = underTest.state(NAMESPACE_1, WATERMARK_BAG_ADDR);
+    WatermarkStateInternal value = underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
 
     assertThat(value.isEmpty().read(), Matchers.is(true));
     StateContents<Boolean> readFuture = value.isEmpty();
@@ -293,9 +351,9 @@ public void testWatermarkStateIsEmpty() throws Exception {
   }
 
   @Test
-  public void testMergeWatermarkIntoSource() throws Exception {
-    WatermarkStateInternal value1 = underTest.state(NAMESPACE_1, WATERMARK_BAG_ADDR);
-    WatermarkStateInternal value2 = underTest.state(NAMESPACE_2, WATERMARK_BAG_ADDR);
+  public void testMergeEarliestWatermarkIntoSource() throws Exception {
+    WatermarkStateInternal value1 = underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
+    WatermarkStateInternal value2 = underTest.state(NAMESPACE_2, WATERMARK_EARLIEST_ADDR);
 
     value1.add(new Instant(3000));
     value2.add(new Instant(5000));
@@ -303,7 +361,7 @@ public void testMergeWatermarkIntoSource() throws Exception {
     value2.add(new Instant(2000));
 
     WatermarkStateInternal merged = underTest.mergedState(
-        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_1, WATERMARK_BAG_ADDR);
+        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_1, WATERMARK_EARLIEST_ADDR, WINDOW_1);
 
     assertThat(value1.get().read(), Matchers.equalTo(new Instant(3000)));
     assertThat(value2.get().read(), Matchers.equalTo(new Instant(2000)));
@@ -316,4 +374,32 @@ public void testMergeWatermarkIntoSource() throws Exception {
     merged.add(new Instant(1000));
     assertThat(merged.get().read(), Matchers.equalTo(new Instant(1000)));
   }
+
+  @Test
+  public void testMergeLatestWatermarkIntoSource() throws Exception {
+    WatermarkStateInternal value1 = underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR);
+    WatermarkStateInternal value2 = underTest.state(NAMESPACE_2, WATERMARK_LATEST_ADDR);
+
+    value1.add(new Instant(3000));
+    value2.add(new Instant(5000));
+    value1.add(new Instant(4000));
+    value2.add(new Instant(2000));
+
+    WatermarkStateInternal merged = underTest.mergedState(
+        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_1, WATERMARK_LATEST_ADDR, WINDOW_1);
+
+    assertThat(value1.get().read(), Matchers.equalTo(new Instant(4000)));
+    assertThat(value2.get().read(), Matchers.equalTo(new Instant(5000)));
+    assertThat(merged.get().read(), Matchers.equalTo(new Instant(5000)));
+
+    // Reading the merged value compressed the old values
+    assertThat(value1.get().read(), Matchers.equalTo(new Instant(5000)));
+    assertThat(value2.get().read(), Matchers.equalTo(null));
+
+    merged.add(new Instant(1000));
+    assertThat(merged.get().read(), Matchers.equalTo(new Instant(5000)));
+
+    merged.add(new Instant(7000));
+    assertThat(merged.get().read(), Matchers.equalTo(new Instant(7000)));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateTagTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateTagTest.java
index 4705ef1b0dea9..9b0fed7d11b51 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateTagTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateTagTest.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.transforms.Max.MaxIntegerFn;
 import com.google.cloud.dataflow.sdk.transforms.Min;
 import com.google.cloud.dataflow.sdk.transforms.Min.MinIntegerFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -35,7 +36,6 @@
  */
 @RunWith(JUnit4.class)
 public class StateTagTest {
-
   @Test
   public void testValueEquality() {
     StateTag<?> fooVarInt1 = StateTags.value("foo", VarIntCoder.of());
@@ -62,12 +62,19 @@ public void testBagEquality() {
 
   @Test
   public void testWatermarkBagEquality() {
-    StateTag<?> foo1 = StateTags.watermarkStateInternal("foo");
-    StateTag<?> foo2 = StateTags.watermarkStateInternal("foo");
-    StateTag<?> bar = StateTags.watermarkStateInternal("bar");
+    StateTag<?> foo1 = StateTags.watermarkStateInternal(
+        "foo", OutputTimeFns.outputAtEarliestInputTimestamp());
+    StateTag<?> foo2 = StateTags.watermarkStateInternal(
+        "foo", OutputTimeFns.outputAtEarliestInputTimestamp());
+    StateTag<?> bar = StateTags.watermarkStateInternal(
+        "bar", OutputTimeFns.outputAtEarliestInputTimestamp());
+
+    StateTag<?> bar2 = StateTags.watermarkStateInternal(
+        "bar", OutputTimeFns.outputAtLatestInputTimestamp());
 
     assertEquals(foo1, foo2);
     assertNotEquals(foo1, bar);
+    assertNotEquals(bar, bar2);
   }
 
   @Test

From 777004ebfbe006373b0cf2584cf291d4840e7a4a Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Tue, 24 Nov 2015 15:51:57 -0800
Subject: [PATCH 1193/1541] Minor javadoc updates.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108656541
---
 .../dataflow/sdk/transforms/Aggregator.java   |  2 +-
 .../sdk/transforms/AggregatorRetriever.java   |  2 +-
 .../sdk/transforms/AppliedPTransform.java     |  2 ++
 .../sdk/transforms/ApproximateQuantiles.java  |  7 ++--
 .../dataflow/sdk/transforms/Combine.java      | 32 +++++++++++++------
 .../cloud/dataflow/sdk/transforms/Count.java  |  8 +++--
 .../cloud/dataflow/sdk/transforms/Max.java    |  4 +++
 .../cloud/dataflow/sdk/transforms/Min.java    |  4 +++
 .../cloud/dataflow/sdk/transforms/Sum.java    |  4 +++
 .../cloud/dataflow/sdk/transforms/Top.java    |  4 +++
 10 files changed, 53 insertions(+), 16 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
index 71f330c3521c1..7e56ddac0dc7e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
@@ -41,7 +41,7 @@
  *     myAggregator = createAggregator("myAggregator", new Sum.SumIntegerFn());
  *   }
  *
- *   {@literal @}Override
+ *   @Override
  *   public void processElement(ProcessContext c) {
  *     myAggregator.addValue(1);
  *   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AggregatorRetriever.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AggregatorRetriever.java
index 97d5367626876..4bbea85f52a0c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AggregatorRetriever.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AggregatorRetriever.java
@@ -19,7 +19,7 @@
 import java.util.Collection;
 
 /**
- * A class for extracting {@link Aggregator Aggregators} from {@link DoFn DoFns}.
+ * An internal class for extracting {@link Aggregator Aggregators} from {@link DoFn DoFns}.
  */
 public final class AggregatorRetriever {
   private AggregatorRetriever() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java
index b86d5d723bbf2..7b3d87dfcf8be 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java
@@ -25,6 +25,8 @@
  * Represents the application of a {@link PTransform} to a specific input to produce
  * a specific output.
  *
+ * <p>For internal use.
+ *
  * @param <InputT> transform input type
  * @param <OutputT> transform output type
  * @param <TransformT> transform type
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
index 687c51e4d2326..f1b607c182a47 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
@@ -51,10 +51,13 @@
 
 /**
  * {@code PTransform}s for getting an idea of a {@code PCollection}'s
- * data distribution using approximate {@code N}-tiles, either
- * globally or per-key.
+ * data distribution using approximate {@code N}-tiles (e.g. quartiles,
+ * percentiles, etc.), either globally or per-key.
  */
 public class ApproximateQuantiles {
+  private ApproximateQuantiles() {
+    // do not instantiate
+  }
 
   /**
    * Returns a {@code PTransform} that takes a {@code PCollection<T>}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 835081b273adf..16d12e2350499 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -66,8 +66,14 @@
 /**
  * {@code PTransform}s for combining {@code PCollection} elements
  * globally and per-key.
+ *
+ * <p>See the <a href="https://cloud.google.com/dataflow/model/combine">documentation</a>
+ * for how to use the operations in this class.
  */
 public class Combine {
+  private Combine() {
+    // do not instantiate
+  }
 
   /**
    * Returns a {@link Globally Combine.Globally} {@code PTransform}
@@ -396,8 +402,9 @@ public AccumT compact(AccumT accumulator) {
      * Applies this {@code CombineFn} to a collection of input values
      * to produce a combined output value.
      *
-     * <p>Useful when testing the behavior of a {@code CombineFn}
-     * separately from a {@code Combine} transform.
+     * <p>Useful when using a {@code CombineFn}  separately from a
+     * {@code Combine} transform.  Does not invoke the
+     * {@link mergeAccumulators} operation.
      */
     public OutputT apply(Iterable<? extends InputT> inputs) {
       AccumT accum = createAccumulator();
@@ -621,8 +628,7 @@ public Coder<V> getDefaultOutputCoder(CoderRegistry registry, Coder<V> inputCode
   /**
    * Holds a single value value of type {@code V} which may or may not be present.
    *
-   * <p>Used only as a private accumulator class. The type appears in public interfaces, but from
-   * a public perspective, it has no accessible members.
+   * <p>Used only as a private accumulator class.
    */
   public static class Holder<V> {
     private V value;
@@ -683,7 +689,7 @@ public void verifyDeterministic() throws NonDeterministicException {
 
   /**
    * An abstract subclass of {@link CombineFn} for implementing combiners that are more
-   * easily expressed as binary operations on ints.
+   * easily and efficiently expressed as binary operations on <code>int</code>s.
    */
   public abstract static class BinaryCombineIntegerFn extends CombineFn<Integer, int[], Integer> {
 
@@ -763,7 +769,7 @@ public Counter<Integer> getCounter(String name) {
 
   /**
    * An abstract subclass of {@link CombineFn} for implementing combiners that are more
-   * easily expressed as binary operations on longs.
+   * easily and efficiently expressed as binary operations on <code>long</code>s.
    */
   public abstract static class BinaryCombineLongFn extends CombineFn<Long, long[], Long> {
     /**
@@ -841,7 +847,7 @@ public Counter<Long> getCounter(String name) {
 
   /**
    * An abstract subclass of {@link CombineFn} for implementing combiners that are more
-   * easily expressed as binary operations on doubles.
+   * easily and efficiently expressed as binary operations on <code>double</code>s.
    */
   public abstract static class BinaryCombineDoubleFn extends CombineFn<Double, double[], Double> {
 
@@ -1373,8 +1379,8 @@ public Globally<InputT, OutputT> named(String name) {
      * Returns a {@link PTransform} that produces a {@code PCollectionView}
      * whose elements are the result of combining elements per-window in
      * the input {@code PCollection}.  If a value is requested from the view
-     * for a window that is not present, the result of calling the {@code CombineFn}
-     * on empty input will returned.
+     * for a window that is not present, the result of applying the {@code CombineFn}
+     * to an empty input set will be returned.
      */
     public GloballyAsSingletonView<InputT, OutputT> asSingletonView() {
       return new GloballyAsSingletonView<>(fn, insertDefault, fanout);
@@ -1382,7 +1388,8 @@ public GloballyAsSingletonView<InputT, OutputT> asSingletonView() {
 
     /**
      * Returns a {@link PTransform} identical to this, but that does not attempt to
-     * provide a default value in the case of empty input.
+     * provide a default value in the case of empty input.  Required when the input
+     * is not globally windowed and the output is not being used as a side input.
      */
     public Globally<InputT, OutputT> withoutDefaults() {
       return new Globally<>(name, fn, false, fanout);
@@ -2125,6 +2132,11 @@ public void processElement(ProcessContext c) {
       return output;
     }
 
+    /**
+     * Returns the {@link CombineFn} bound to its coders.
+     *
+     * <p>For internal use.
+     */
     public AppliedCombineFn<? super K, ? super InputT, ?, OutputT> getAppliedFn(
         CoderRegistry registry, Coder<? extends KV<K, ? extends Iterable<InputT>>> inputCoder) {
       KvCoder<K, InputT> kvCoder = getKvCoder(inputCoder);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
index 9b29088cbe2ed..ffa11d13a3c91 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
@@ -24,10 +24,14 @@
  * {@code PTransorm}s to count the elements in a {@link PCollection}.
  *
  * <p>{@link Count#perElement()} can be used to count the number of occurrences of each
- * distinct element in the PCollection. {@link Count#globally()} can
- * be used to count the total number of elements in a PCollection.
+ * distinct element in the PCollection, {@link Count#perKey()} can be used to count the
+ * number of values per key, and {@link Count#globally()} can be used to count the total
+ * number of elements in a PCollection.
  */
 public class Count {
+  private Count() {
+    // do not instantiate
+  }
 
   /**
    * Returns a {@link Combine.Globally} {@link PTransform} that counts the number of elements in
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
index 79d904ed57057..8678e4f33eaeb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
@@ -44,6 +44,10 @@
  */
 public class Max {
 
+  private Max() {
+    // do not instantiate
+  }
+
   /**
    * Returns a {@code PTransform} that takes an input {@code PCollection<Integer>} and returns a
    * {@code PCollection<Integer>} whose contents is the maximum of the input {@code PCollection}'s
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
index a938cf147ce22..47ab3a0ad27d6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
@@ -44,6 +44,10 @@
  */
 public class Min {
 
+  private Min() {
+    // do not instantiate
+  }
+
   /**
    * Returns a {@code PTransform} that takes an input {@code PCollection<Integer>} and returns a
    * {@code PCollection<Integer>} whose contents is a single value that is the minimum of the input
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
index dbd5067c24154..5b30475a9d8cd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
@@ -41,6 +41,10 @@
  */
 public class Sum {
 
+  private Sum() {
+    // do not instantiate
+  }
+
   /**
    * Returns a {@code PTransform} that takes an input
    * {@code PCollection<Integer>} and returns a
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index b1fe7275bfa8c..b5af914b8d66b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -49,6 +49,10 @@
  */
 public class Top {
 
+  private Top() {
+    // do not instantiate
+  }
+
   /**
    * Returns a {@code PTransform} that takes an input
    * {@code PCollection<T>} and returns a {@code PCollection<List<T>>} with a

From c4a6c4da410dd0de9b9e465f2fc331f3911e89b7 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 24 Nov 2015 15:56:37 -0800
Subject: [PATCH 1194/1541] Javadoc cleanup to BoundedSource

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108656926
---
 .../cloud/dataflow/sdk/io/BoundedSource.java  | 27 +++++++++----------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
index e7b1807556ef7..5f91ef39b863a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
@@ -48,7 +48,7 @@
  */
 public abstract class BoundedSource<T> extends Source<T> {
   /**
-   * Splits the source into bundles of approximately given size (in bytes).
+   * Splits the source into bundles of approximately {@code desiredBundleSizeBytes}.
    */
   public abstract List<? extends BoundedSource<T>> splitIntoBundles(
       long desiredBundleSizeBytes, PipelineOptions options) throws Exception;
@@ -61,8 +61,8 @@ public abstract List<? extends BoundedSource<T>> splitIntoBundles(
   public abstract long getEstimatedSizeBytes(PipelineOptions options) throws Exception;
 
   /**
-   * Whether this source is known to produce key/value pairs with the (encoded) keys in
-   * lexicographically sorted order.
+   * Whether this source is known to produce key/value pairs sorted by lexicographic order on
+   * the bytes of the encoded key.
    */
   public abstract boolean producesSortedKeys(PipelineOptions options) throws Exception;
 
@@ -102,8 +102,9 @@ public abstract List<? extends BoundedSource<T>> splitIntoBundles(
   @Experimental(Experimental.Kind.SOURCE_SINK)
   public abstract static class BoundedReader<T> extends Source.Reader<T> {
     /**
-     * Returns a value in [0, 1] representing approximately what fraction of the source
-     * ({@link #getCurrentSource}) this reader has read so far.
+     * Returns a value in [0, 1] representing approximately what fraction of the
+     * {@link #getCurrentSource current source} this reader has read so far, or {@code null} if such
+     * an estimate is not available.
      *
      * <p>It is recommended that this method should satisfy the following properties:
      * <ul>
@@ -114,13 +115,10 @@ public abstract static class BoundedReader<T> extends Source.Reader<T> {
      *
      * <p>By default, returns null to indicate that this cannot be estimated.
      *
-     * <h3>Thread safety</h3>
+     * <h5>Thread safety</h5>
      * If {@link #splitAtFraction} is implemented, this method can be called concurrently to other
      * methods (including itself), and it is therefore critical for it to be implemented
      * in a thread-safe way.
-     *
-     * @return A value in [0, 1] representing the fraction of this reader's current input
-     *   read so far, or {@code null} if such an estimate is not available.
      */
     public Double getFractionConsumed() {
       return null;
@@ -133,9 +131,10 @@ public Double getFractionConsumed() {
      * Tells the reader to narrow the range of the input it's going to read and give up
      * the remainder, so that the new range would contain approximately the given
      * fraction of the amount of data in the current range.
+     *
      * <p>Returns a {@code BoundedSource} representing the remainder.
      *
-     * <h3>Detailed description</h3>
+     * <h5>Detailed description</h5>
      * Assuming the following sequence of calls:
      * <pre>{@code
      *   BoundedSource<T> initial = reader.getCurrentSource();
@@ -161,20 +160,20 @@ public Double getFractionConsumed() {
      * corresponding to the given fraction. In this case, the method MUST have no effect
      * (the reader must behave as if the method hadn't been called at all).
      *
-     * <h3>Statefulness</h3>
+     * <h5>Statefulness</h5>
      * Since this method (if successful) affects the reader's source, in subsequent invocations
      * "fraction" should be interpreted relative to the new current source.
      *
-     * <h3>Thread safety and blocking</h3>
+     * <h5>Thread safety and blocking</h5>
      * This method will be called concurrently to other methods (however there will not be multiple
      * concurrent invocations of this method itself), and it is critical for it to be implemented
      * in a thread-safe way (otherwise data loss is possible).
      *
-     * <p>It is also very important that this method always completes quickly, in particular,
+     * <p>It is also very important that this method always completes quickly. In particular,
      * it should not perform or wait on any blocking operations such as I/O, RPCs etc. Violating
      * this requirement may stall completion of the work item or even cause it to fail.
      *
-     * <p>E.g. it is incorrect to make both this method and {@link #start}/{@link #advance}
+     * <p>It is incorrect to make both this method and {@link #start}/{@link #advance}
      * {@code synchronized}, because those methods can perform blocking operations, and then
      * this method would have to wait for those calls to complete.
      *

From 2174ac18a4d244b8274dd9b4f6a5574ac83ed425 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 24 Nov 2015 15:57:19 -0800
Subject: [PATCH 1195/1541] Cleanup Javadoc on BlockBasedSource

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108656990
---
 .../dataflow/sdk/io/BlockBasedSource.java     | 37 +++++++++++--------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
index f51d269d767db..62115dfbe5e69 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
@@ -30,14 +30,15 @@
  *
  * <p>{@code BlockBasedSource} should be derived from when a file format does not support efficient
  * seeking to a record in the file, but can support efficient seeking to a block. Alternatively,
- * records in the file cannot be offset-addressed, but blocks can (i.e., it is not possible to say
- * that record i starts at offset m, but it is possible to say that block j starts at offset n).
+ * records in the file cannot be offset-addressed, but blocks can (it is not possible to say
+ * that record {code i} starts at offset {@code m}, but it is possible to say that block {@code j}
+ *  starts at offset {@code n}).
  *
  * <p>The records that will be read from a {@code BlockBasedSource} that corresponds to a subrange
- * of a file [startOffset, endOffset) are those records such that the record is contained in a
- * block that starts at offset {@code i}, where {@code i >= startOffset} and {@code i < endOffset}.
- * In other words, a record will be read from the source if it is contained in a block that begins
- * within the range described by the source.
+ * of a file {@code [startOffset, endOffset)} are those records such that the record is contained in
+ * a block that starts at offset {@code i}, where {@code i >= startOffset} and
+ * {@code i < endOffset}. In other words, a record will be read from the source if its first byte is
+ * contained in a block that begins within the range described by the source.
  *
  * <p>This entails that it is possible to determine the start offsets of all blocks in a file.
  *
@@ -105,12 +106,12 @@ protected abstract static class Block<T> {
     public abstract boolean readNextRecord() throws IOException;
 
     /**
-     * Returns the fraction of the block already consumed (i.e., not including the current record),
-     * if possible, as a value in [0, 1]. Successive calls to this method must be monotonically
-     * non-decreasing.
+     * Returns the fraction of the block already consumed, if possible, as a value in
+     * {@code [0, 1]}. It should not include the current record. Successive results from this method
+     * must be monotonically increasing.
      *
-     * <p>If it is not possible to compute the fraction of the block consumed (e.g., the total
-     * number of records is unknown and record offsets are unknown), this method may return zero.
+     * <p>If it is not possible to compute the fraction of the block consumed this method may
+     * return zero. For example, when the total number of records in the block is unknown.
      */
     public abstract double getFractionOfBlockConsumed();
   }
@@ -118,7 +119,7 @@ protected abstract static class Block<T> {
   /**
    * A {@code Reader} that reads records from a {@link BlockBasedSource}. If the source is a
    * subrange of a file, the blocks that will be read by this reader are those such that the first
-   * byte of the block is within the range [start, end).
+   * byte of the block is within the range {@code [start, end)}.
    */
   @Experimental(Experimental.Kind.SOURCE_SINK)
   protected abstract static class BlockBasedReader<T> extends FileBasedReader<T> {
@@ -143,7 +144,7 @@ protected BlockBasedReader(BlockBasedSource<T> source) {
 
     /**
      * Returns the size of the current block in bytes as it is represented in the underlying file,
-     * if possible. This method may return 0 if the size of the current block is unknown.
+     * if possible. This method may return {@code 0} if the size of the current block is unknown.
      *
      * <p>The size returned by this method must be such that for two successive blocks A and B,
      * {@code offset(A) + size(A) <= offset(B)}. If this is not satisfied, the progress reported
@@ -152,8 +153,8 @@ protected BlockBasedReader(BlockBasedSource<T> source) {
      *
      * <p>This method and {@link Block#getFractionOfBlockConsumed} are used to provide an estimate
      * of progress within a block ({@code getCurrentBlock().getFractionOfBlockConsumed() *
-     * getCurrentBlockSize()}). It is acceptable for the result of this computation to be 0, but
-     * progress estimation will be inaccurate.
+     * getCurrentBlockSize()}). It is acceptable for the result of this computation to be {@code 0},
+     * but progress estimation will be inaccurate.
      */
     public abstract long getCurrentBlockSize();
 
@@ -183,6 +184,12 @@ protected boolean isAtSplitPoint() {
       return atSplitPoint;
     }
 
+    /**
+     * Reads the next record from the {@link getCurrentBlock() current block} if
+     * possible. Will call {@link readNextBlock()} to advance to the next block if not.
+     *
+     * <p>The first record read from a block is treated as a split point.
+     */
     @Override
     protected final boolean readNextRecord() throws IOException {
       atSplitPoint = false;

From cd212b1937df737c38887d59ce38291d099f4b33 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 30 Nov 2015 10:42:26 -0800
Subject: [PATCH 1196/1541] Migrate to use nano time and a nano sleep in tests

Use a high precision timer instead of the system clock to measure time differences to prevent things like NTP from affecting tests.
----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108993050
---
 .../DataflowWorkProgressUpdaterTest.java      |  5 +-
 .../runners/worker/FakeWindmillServer.java    |  3 +-
 .../worker/WindmillStateInternalsTest.java    |  3 +-
 .../sdk/testing/ExpectedLogsTest.java         |  9 ++-
 .../sdk/testing/SystemNanoTimeSleeper.java    | 68 +++++++++++++++++++
 .../testing/SystemNanoTimeSleeperTest.java    | 53 +++++++++++++++
 .../IntraBundleParallelizationTest.java       | 44 ++++++------
 .../dataflow/sdk/util/MemoryMonitorTest.java  |  6 +-
 .../util/common/worker/ReadOperationTest.java |  3 +-
 .../util/common/worker/StateSamplerTest.java  | 22 +++---
 10 files changed, 177 insertions(+), 39 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/SystemNanoTimeSleeper.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/SystemNanoTimeSleeperTest.java

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
index b76ebba36fed3..a6d60d946ca38 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -22,6 +22,7 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
+import static com.google.cloud.dataflow.sdk.testing.SystemNanoTimeSleeper.sleepMillis;
 import static com.google.cloud.dataflow.sdk.util.CloudCounterUtils.extractCounter;
 import static com.google.cloud.dataflow.sdk.util.CloudMetricUtils.extractCloudMetric;
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudDuration;
@@ -266,7 +267,7 @@ public void workProgressUpdaterSendsLastPendingUpdateWhenStopped() throws Except
     progressUpdater.startReportingProgress();
 
     // The initial update should be sent after 300 msec.
-    Thread.sleep(50);
+    sleepMillis(50);
     verifyZeroInteractions(workUnitClient);
 
     verify(workUnitClient, timeout(350))
@@ -279,7 +280,7 @@ public void workProgressUpdaterSendsLastPendingUpdateWhenStopped() throws Except
     verifyNoMoreInteractions(workUnitClient);
 
     // still not yet after 50ms
-    Thread.sleep(50);
+    sleepMillis(50);
     verifyNoMoreInteractions(workUnitClient);
 
     // Stop the progressUpdater now, and expect the last update immediately
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java
index 3dcb53ffeefc3..1b609f044e2db 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.testing.SystemNanoTimeSleeper.sleepMillis;
 import static org.junit.Assert.assertFalse;
 
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
@@ -76,7 +77,7 @@ public Windmill.GetDataResponse getData(Windmill.GetDataRequest request) {
     } else {
       try {
         // Sleep for a little bit to ensure that *-windmill-read state-sampled counters show up.
-        Thread.sleep(500);
+        sleepMillis(500);
       } catch (InterruptedException e) {}
     }
     return response;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
index fe80f90a175fd..b976e05051af4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
@@ -15,6 +15,7 @@
  */
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.testing.SystemNanoTimeSleeper.sleepMillis;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
@@ -99,7 +100,7 @@ private <T> void waitAndSet(final SettableFuture<T> future, final T value, final
       @Override
       public void run() {
         try {
-          Thread.sleep(millis);
+          sleepMillis(millis);
         } catch (InterruptedException e) {
           throw new RuntimeException("Interrupted before setting", e);
         }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java
index 44b0b60039c95..2dce880ffc39f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/ExpectedLogsTest.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.testing;
 
+import static com.google.cloud.dataflow.sdk.testing.SystemNanoTimeSleeper.sleepMillis;
+
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -31,6 +33,7 @@
 import java.util.concurrent.CompletionService;
 import java.util.concurrent.ExecutorCompletionService;
 import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
 
 /** Tests for {@link FastNanoClockAndSleeper}. */
 @RunWith(JUnit4.class)
@@ -114,7 +117,8 @@ public void testLogCaptureOccursAtLowestLogLevel() throws Throwable {
   public void testThreadSafetyOfLogSaver() throws Throwable {
     CompletionService<Void> completionService =
         new ExecutorCompletionService<>(Executors.newCachedThreadPool());
-    final long scheduledLogTime = System.currentTimeMillis() + 500L;
+    final long scheduledLogTime =
+        TimeUnit.MILLISECONDS.convert(System.nanoTime(), TimeUnit.NANOSECONDS) + 500L;
 
     List<String> expectedStrings = new ArrayList<>();
     for (int i = 0; i < 100; i++) {
@@ -124,7 +128,8 @@ public void testThreadSafetyOfLogSaver() throws Throwable {
         @Override
         public Void call() throws Exception {
           // Have all threads started and waiting to log at about the same moment.
-          Thread.sleep(Math.max(1, scheduledLogTime - System.currentTimeMillis()));
+          sleepMillis(Math.max(1, scheduledLogTime
+              - TimeUnit.MILLISECONDS.convert(System.nanoTime(), TimeUnit.NANOSECONDS)));
           LOG.trace(expected);
           return null;
         }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/SystemNanoTimeSleeper.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/SystemNanoTimeSleeper.java
new file mode 100644
index 0000000000000..d8507f79b08f5
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/SystemNanoTimeSleeper.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import com.google.api.client.util.Sleeper;
+
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.LockSupport;
+
+/**
+ * This class provides an expensive sleeper to deal with issues around Java's
+ * accuracy of {@link System#currentTimeMillis} and methods such as
+ * {@link Object#wait} and {@link Thread#sleep} which depend on it. This <a
+ * href="https://blogs.oracle.com/dholmes/entry/inside_the_hotspot_vm_clocks">
+ * article</a> goes into further detail about this issue.
+ *
+ * This {@link Sleeper} uses {@link System#nanoTime}
+ * as the timing source and {@link LockSupport#parkNanos} as the wait method.
+ * Note that usage of this sleeper may impact performance because
+ * of the relatively more expensive methods being invoked when compared to
+ * {@link Thread#sleep}.
+ */
+public class SystemNanoTimeSleeper implements Sleeper {
+  public static final Sleeper INSTANCE = new SystemNanoTimeSleeper();
+
+  /** Limit visibility to prevent instantiation. */
+  private SystemNanoTimeSleeper() {
+  }
+
+  @Override
+  public void sleep(long millis) throws InterruptedException {
+    long currentTime;
+    long endTime = System.nanoTime() + TimeUnit.NANOSECONDS.convert(millis, TimeUnit.MILLISECONDS);
+    while ((currentTime = System.nanoTime()) < endTime) {
+      if (Thread.interrupted()) {
+        throw new InterruptedException();
+      }
+      LockSupport.parkNanos(endTime - currentTime);
+    }
+    if (Thread.interrupted()) {
+      throw new InterruptedException();
+    }
+    return;
+  }
+
+  /**
+   * Causes the currently executing thread to sleep (temporarily cease
+   * execution) for the specified number of milliseconds. The thread does not
+   * lose ownership of any monitors.
+   */
+  public static void sleepMillis(long millis) throws InterruptedException {
+    INSTANCE.sleep(millis);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/SystemNanoTimeSleeperTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/SystemNanoTimeSleeperTest.java
new file mode 100644
index 0000000000000..33b6b693a2977
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/SystemNanoTimeSleeperTest.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import static com.google.cloud.dataflow.sdk.testing.SystemNanoTimeSleeper.sleepMillis;
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link SystemNanoTimeSleeper}. */
+@RunWith(JUnit4.class)
+public class SystemNanoTimeSleeperTest {
+  @Test
+  public void testSleep() throws Exception {
+    long startTime = System.nanoTime();
+    sleepMillis(100);
+    long endTime = System.nanoTime();
+    assertTrue(endTime - startTime >= 100);
+  }
+
+  @Test
+  public void testNegativeSleep() throws Exception {
+    sleepMillis(-100);
+  }
+
+  @Test(expected = InterruptedException.class)
+  public void testInterruptionInLoop() throws Exception {
+    Thread.currentThread().interrupt();
+    sleepMillis(0);
+  }
+
+  @Test(expected = InterruptedException.class)
+  public void testInterruptionOutsideOfLoop() throws Exception {
+    Thread.currentThread().interrupt();
+    sleepMillis(-100);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java
index afefcdae15d35..a15c694fe97c8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java
@@ -16,23 +16,27 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import static com.google.cloud.dataflow.sdk.testing.SystemNanoTimeSleeper.sleepMillis;
 import static org.hamcrest.Matchers.both;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.hamcrest.Matchers.is;
 import static org.hamcrest.Matchers.lessThan;
 import static org.hamcrest.Matchers.lessThanOrEqualTo;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 
-import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
 import java.util.ArrayList;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 
 /**
@@ -65,7 +69,7 @@ private static class DelayFn<T> extends DoFn<T, T> {
     @Override
     public void processElement(ProcessContext c) {
       try {
-        Thread.sleep(DELAY_MS);
+        sleepMillis(DELAY_MS);
       } catch (InterruptedException e) {
         e.printStackTrace();
         throw new RuntimeException("Interrupted");
@@ -101,7 +105,9 @@ public void processElement(ProcessContext c) {
   private static class ConcurrencyMeasuringFn<T> extends DoFn<T, T> {
     @Override
     public void processElement(ProcessContext c) {
-      synchronized (this) {
+      // Synchronize on the class to provide synchronous access irrespective of
+      // how this DoFn is called.
+      synchronized (ConcurrencyMeasuringFn.class) {
         concurrentElements++;
         if (concurrentElements > maxConcurrency) {
           maxConcurrency = concurrentElements;
@@ -110,7 +116,7 @@ public void processElement(ProcessContext c) {
 
       c.output(c.element());
 
-      synchronized (this) {
+      synchronized (ConcurrencyMeasuringFn.class) {
         concurrentElements--;
       }
     }
@@ -127,11 +133,11 @@ public void testParallelization() {
 
     // The minimum is guaranteed to be >= 2x the delay interval, since no more than half the
     // elements can be scheduled at once.
-    Assert.assertThat(minDuration,
+    assertThat(minDuration,
         greaterThanOrEqualTo(2 * DelayFn.DELAY_MS));
     // Also, it should take <= 8x the delay interval since we should be at least
     // parallelizing some of the work.
-    Assert.assertThat(minDuration,
+    assertThat(minDuration,
         lessThanOrEqualTo(8 * DelayFn.DELAY_MS));
   }
 
@@ -140,19 +146,19 @@ public void testExceptionHandling() {
     ExceptionThrowingFn<Integer> fn = new ExceptionThrowingFn<>(10);
     try {
       run(100, PARALLELISM_FACTOR, fn);
-      Assert.fail("Expected exception to propagate");
+      fail("Expected exception to propagate");
     } catch (RuntimeException e) {
-      Assert.assertThat(e.getMessage(), containsString("Expected failure"));
+      assertThat(e.getMessage(), containsString("Expected failure"));
     }
 
     // Should have processed 10 elements, but stopped before processing all
     // of them.
-    Assert.assertThat(numProcessed.get(),
+    assertThat(numProcessed.get(),
         is(both(greaterThanOrEqualTo(10))
             .and(lessThan(100))));
 
     // The first failure should prevent the scheduling of any more elements.
-    Assert.assertThat(numFailures.get(),
+    assertThat(numFailures.get(),
         is(both(greaterThanOrEqualTo(1))
             .and(lessThanOrEqualTo(PARALLELISM_FACTOR))));
   }
@@ -162,20 +168,20 @@ public void testExceptionHandlingOnLastElement() {
     ExceptionThrowingFn<Integer> fn = new ExceptionThrowingFn<>(9);
     try {
       run(10, PARALLELISM_FACTOR, fn);
-      Assert.fail("Expected exception to propagate");
+      fail("Expected exception to propagate");
     } catch (RuntimeException e) {
-      Assert.assertThat(e.getMessage(), containsString("Expected failure"));
+      assertThat(e.getMessage(), containsString("Expected failure"));
     }
 
     // Should have processed 10 elements, but stopped before processing all
     // of them.
-    Assert.assertEquals(10, numProcessed.get());
-    Assert.assertEquals(1, numFailures.get());
+    assertEquals(10, numProcessed.get());
+    assertEquals(1, numFailures.get());
   }
 
   @Test
   public void testIntraBundleParallelizationGetName() {
-    Assert.assertEquals(
+    assertEquals(
         "IntraBundleParallelization",
         IntraBundleParallelization.of(new DelayFn<Integer>()).withMaxParallelism(1).getName());
   }
@@ -194,14 +200,14 @@ private long run(int numElements, int maxParallelism, DoFn<Integer, Integer> doF
         .apply(IntraBundleParallelization.of(doFn).withMaxParallelism(maxParallelism))
         .apply(ParDo.of(downstream));
 
-    long startTime = System.currentTimeMillis();
+    long startTime = System.nanoTime();
 
     pipeline.run();
 
     // Downstream methods should not see parallel threads.
-    Assert.assertEquals(1, maxConcurrency);
+    assertEquals(1, maxConcurrency);
 
-    long endTime = System.currentTimeMillis();
-    return endTime - startTime;
+    long endTime = System.nanoTime();
+    return TimeUnit.MILLISECONDS.convert(endTime - startTime, TimeUnit.NANOSECONDS);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MemoryMonitorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MemoryMonitorTest.java
index 4cf75623a54fb..c175165b6dcaa 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MemoryMonitorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MemoryMonitorTest.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static com.google.cloud.dataflow.sdk.testing.SystemNanoTimeSleeper.sleepMillis;
+
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
@@ -64,10 +66,10 @@ public void setup() {
 
   @Test(timeout = 1000)
   public void detectGCThrashing() throws InterruptedException {
-    Thread.sleep(100);
+    sleepMillis(100);
     monitor.waitForResources("Test1");
     provider.inGCThrashingState.set(true);
-    Thread.sleep(100);
+    sleepMillis(100);
     final Semaphore s = new Semaphore(0);
     new Thread(new Runnable() {
       @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
index 99c296517970f..72c24de508b4d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -23,6 +23,7 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
+import static com.google.cloud.dataflow.sdk.testing.SystemNanoTimeSleeper.sleepMillis;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
@@ -124,7 +125,7 @@ public void testGetProgress() throws Exception {
 
     Thread thread = runReadLoopInThread(readOperation);
     for (int i = 0; i < 5; ++i) {
-      Thread.sleep(500); // Wait for the operation to start and block.
+      sleepMillis(500); // Wait for the operation to start and block.
       // Ensure that getProgress() doesn't block while the next() method is blocked.
       ApproximateProgress progress = readerProgressToCloudProgress(readOperation.getProgress());
       long observedIndex = progress.getPosition().getRecordIndex().longValue();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
index 8291b73021eaa..f4f6752b6c504 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
+import static com.google.cloud.dataflow.sdk.testing.SystemNanoTimeSleeper.sleepMillis;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
@@ -59,13 +60,13 @@ public void basicTest() throws InterruptedException {
       try (StateSampler.ScopedState s1 =
           stateSampler.scopedState(state1)) {
         assert s1 != null;
-        Thread.sleep(2 * periodMs);
+        sleepMillis(2 * periodMs);
       }
 
       try (StateSampler.ScopedState s2 =
           stateSampler.scopedState(state2)) {
         assert s2 != null;
-        Thread.sleep(3 * periodMs);
+        sleepMillis(3 * periodMs);
       }
 
       long s1 = getCounterLongValue(counters, "test-1-msecs");
@@ -91,21 +92,21 @@ public void nestingTest() throws InterruptedException {
       try (StateSampler.ScopedState s1 =
           stateSampler.scopedState(state1)) {
         assert s1 != null;
-        Thread.sleep(2 * periodMs);
+        sleepMillis(2 * periodMs);
 
         try (StateSampler.ScopedState s2 =
             stateSampler.scopedState(state2)) {
           assert s2 != null;
-          Thread.sleep(2 * periodMs);
+          sleepMillis(2 * periodMs);
 
           try (StateSampler.ScopedState s3 =
               stateSampler.scopedState(state3)) {
             assert s3 != null;
-            Thread.sleep(2 * periodMs);
+            sleepMillis(2 * periodMs);
           }
-          Thread.sleep(periodMs);
+          sleepMillis(periodMs);
         }
-        Thread.sleep(periodMs);
+        sleepMillis(periodMs);
       }
 
       long s1 = getCounterLongValue(counters, "test-1-msecs");
@@ -127,7 +128,7 @@ public void nonScopedTest() throws InterruptedException {
 
       int state1 = stateSampler.stateForName("1", StateSampler.StateKind.USER);
       int previousState = stateSampler.setState(state1);
-      Thread.sleep(2 * periodMs);
+      sleepMillis(2 * periodMs);
       stateSampler.setState(previousState);
       long tolerance = periodMs;
       long s = getCounterLongValue(counters, "test-1-msecs");
@@ -149,14 +150,13 @@ private void noSamplingAfterCloseTestOnce() throws Exception {
       stateSampler.addSamplingCallback(new SamplingCallback(){
         @Override
         public void run(int state, StateKind kind, long elapsedMs) {
-          lastSampledTimeStamp.set(System.currentTimeMillis());
+          lastSampledTimeStamp.set(System.nanoTime());
           sampleHappened.release();
         }
       });
       sampleHappened.acquire();
     }
-    long samplerStoppedTimeStamp = System.currentTimeMillis();
-    Thread.sleep(2 * periodMs);
+    long samplerStoppedTimeStamp = System.nanoTime();
     assertThat(lastSampledTimeStamp.get(), Matchers.lessThanOrEqualTo(samplerStoppedTimeStamp));
   }
 

From a8347d186700e83edb91a3c673200abec1b1d4e7 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Mon, 30 Nov 2015 10:48:03 -0800
Subject: [PATCH 1197/1541] Cleanup Javadoc on CompressedSource

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=108993638
---
 .../dataflow/sdk/io/CompressedSource.java     | 20 ++++++++-----------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
index 31393f1a5d8e9..5f98732608ead 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
@@ -37,15 +37,12 @@
  *
  * <p>For example, use the following to read from a gzip-compressed XML file:
  *
- * {@code
+ * <pre> {@code
  * XmlSource mySource = XmlSource.from(...);
- * PCollection<T> collection = p.apply(CompressedSource.readFromSource(mySource,
- * CompressedSource.CompressionMode.GZIP);}
- *
- * Or, alternatively:
- * XmlSource mySource = XmlSource.from(...);
- * {@code PCollection<T> collection = p.apply(Read.from(CompressedSource.from(mySource,
- * CompressedSource.CompressionMode.GZIP)));}
+ * PCollection<T> collection = p.apply(Read.from(CompressedSource
+ *     .from(mySource)
+ *     .withDecompression(CompressedSource.CompressionMode.GZIP)));
+ * } </pre>
  *
  * <p>Default compression modes are {@link CompressionMode#GZIP} and {@link CompressionMode#BZIP2}.
  * User-defined compression types are supported by implementing {@link DecompressingChannelFactory}.
@@ -56,9 +53,8 @@
 public class CompressedSource<T> extends FileBasedSource<T> {
   /**
    * Factory interface for creating channels that decompress the content of an underlying channel.
-   *
-   * <p>TODO: Refactor decompressing channel/stream creation and default instances to util classes.
    */
+  // TODO: Refactor decompressing channel/stream creation and default instances to util classes.
   public static interface DecompressingChannelFactory extends Serializable {
     /**
      * Given a channel, create a channel that decompresses the content read from the channel.
@@ -97,8 +93,8 @@ public abstract ReadableByteChannel createDecompressingChannel(ReadableByteChann
   private final DecompressingChannelFactory channelFactory;
 
   /**
-   * Creates a {@link Read} transform that reads from a {@code CompressedSource} that reads from an
-   * underlying {@link FileBasedSource} after decompressing it with a {@link
+   * Creates a {@link Read} transform that reads from that reads from the underlying
+   * {@link FileBasedSource} {@code sourceDelegate} after decompressing it with a {@link
    * DecompressingChannelFactory}.
    */
   public static <T> Read.Bounded<T> readFromSource(

From 435508ef2ebcd9b78b89e4cd8fcb12f656cc7638 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 30 Nov 2015 19:53:19 -0800
Subject: [PATCH 1198/1541] Convert OutputTimeFn from interface to abstract
 class

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109041088
---
 .../transforms/windowing/OutputTimeFn.java    | 70 ++++++++-----------
 1 file changed, 31 insertions(+), 39 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFn.java
index f97cd8589f0e1..a72a9cb1eb8b5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFn.java
@@ -38,20 +38,22 @@
  *   <li>The output timestamp when windows merge is provided by {@link #merge merge()}.</li>
  * </ol>
  *
- * <p>To implement this interface, extend {@link OutputTimeFn.Defaults} or
- * {@link OutputTimeFn.DependsOnlyOnWindow} or your implementation may be impacted when the
- * interface is enlarged. This interface will only be enlarged in ways that are
- * backwards-compatible for consumers. The base classes will only be changed in ways that
- * are backwards-compatible for implementors as well.
- *
- * <p>Note that as long as the interface remains experimental, we may choose to change it in
- * arbitrary backwards incompatible ways if it is indicated by the experiment.
+ * <p>This abstract class cannot be subclassed directly, by design: it may grow
+ * in consumer-compatible ways that require mutually-exclusive default implementations. To
+ * create a concrete subclass, extend {@link OutputTimeFn.Defaults} or
+ * {@link OutputTimeFn.DependsOnlyOnWindow}. Note that as long as this class remains
+ * experimental, we may also choose to change it in arbitrary backwards-incompatible ways.
  *
  * @param <W> the type of window. Contravariant: methods accepting any subtype of
  * {@code OutputTimeFn<W>} should use the parameter type {@code OutputTimeFn<? super W>}.
  */
 @Experimental(Experimental.Kind.OUTPUT_TIME)
-public interface OutputTimeFn<W extends BoundedWindow> extends Serializable {
+public abstract class OutputTimeFn<W extends BoundedWindow> implements Serializable {
+
+  /**
+   * Private constructor to prevent subclassing other than provided base classes.
+   */
+  private OutputTimeFn() { }
 
   /**
    * Returns the output timestamp to use for data depending on the given
@@ -73,7 +75,7 @@ public interface OutputTimeFn<W extends BoundedWindow> extends Serializable {
    * <p>See the overview of {@link OutputTimeFn} for the consistency properties required
    * between {@link #assignOutputTime}, {@link #combine}, and {@link #merge}.
    */
-  Instant assignOutputTime(Instant inputTimestamp, W window);
+  public abstract Instant assignOutputTime(Instant inputTimestamp, W window);
 
   /**
    * Combines the given output times, which must be from the same window, into an output time
@@ -85,7 +87,7 @@ public interface OutputTimeFn<W extends BoundedWindow> extends Serializable {
    *       {@code combine(a, combine(b, c)).equals(combine(combine(a, b), c))}.</li>
    * </ul>
    */
-  Instant combine(Instant outputTime, Instant otherOutputTime);
+  public abstract Instant combine(Instant outputTime, Instant otherOutputTime);
 
   /**
    * Merges the given output times, presumed to be combined output times for windows that
@@ -111,7 +113,7 @@ public interface OutputTimeFn<W extends BoundedWindow> extends Serializable {
    * timestamp, this will be the same as {@link #combine combine()}. Defaults for this
    * case are provided by {@link Default}.
    */
-  Instant merge(W intoWindow, Iterable<? extends Instant> mergingTimestamps);
+  public abstract Instant merge(W intoWindow, Iterable<? extends Instant> mergingTimestamps);
 
   /**
    * Returns {@code true} if the result of combination of many output timestamps actually depends
@@ -120,7 +122,7 @@ public interface OutputTimeFn<W extends BoundedWindow> extends Serializable {
    * <p>This may allow optimizations when it is very efficient to retrieve the earliest timestamp
    * to be combined.
    */
-  boolean dependsOnlyOnEarliestInputTimestamp();
+  public abstract boolean dependsOnlyOnEarliestInputTimestamp();
 
   /**
    * Returns {@code true} if the result does not depend on what outputs were combined but only
@@ -135,13 +137,7 @@ public interface OutputTimeFn<W extends BoundedWindow> extends Serializable {
    * a framework for easily implementing a correct {@link #merge}, {@link #combine} and
    * {@link #assignOutputTime}.
    */
-  boolean dependsOnlyOnWindow();
-
-  /**
-   * Please extend {@link Defaults} or {@link DependsOnlyOnWindow} if you want guaranteed
-   * compilation compatibility; this interface may be enlarged in consumer-compatible ways.
-   */
-  void pleaseExtendBaseClassesForCompilationCompatibility();
+  public abstract boolean dependsOnlyOnWindow();
 
   /**
    * <b><i>(Experimental)</i></b> Default method implementations for {@link OutputTimeFn} where the
@@ -152,7 +148,11 @@ public interface OutputTimeFn<W extends BoundedWindow> extends Serializable {
    * <p>By default, {@link #combine} and {@link #merge} return the earliest timestamp of their
    * inputs.
    */
-  public abstract static class Defaults<W extends BoundedWindow> implements OutputTimeFn<W> {
+  public abstract static class Defaults<W extends BoundedWindow> extends OutputTimeFn<W> {
+
+    protected Defaults() {
+      super();
+    }
 
     /**
      * {@inheritDoc}
@@ -186,11 +186,11 @@ public final boolean dependsOnlyOnWindow() {
       return false;
     }
 
-      /**
-       * {@inheritDoc}
-       *
-       * @return {@code true} by default.
-       */
+    /**
+     * {@inheritDoc}
+     *
+     * @return {@code true} by default.
+     */
     @Override
     public boolean dependsOnlyOnEarliestInputTimestamp() {
       return false;
@@ -215,12 +215,6 @@ public boolean equals(Object other) {
     public int hashCode() {
       return Objects.hash(getClass());
     }
-
-    /**
-     * This base class provides compilation compatibility when {@link OutputTimeFn} is enlarged.
-     */
-    @Override
-    public void pleaseExtendBaseClassesForCompilationCompatibility() { }
   }
 
   /**
@@ -230,7 +224,11 @@ public void pleaseExtendBaseClassesForCompilationCompatibility() { }
    * <p>To complete an implementation, override {@link #assignOutputTime(BoundedWindow)}.
    */
   public abstract static class DependsOnlyOnWindow<W extends BoundedWindow>
-      implements OutputTimeFn<W> {
+      extends OutputTimeFn<W> {
+
+    protected DependsOnlyOnWindow() {
+      super();
+    }
 
     /**
      * Returns the output timestamp to use for data in the specified {@code window}.
@@ -317,11 +315,5 @@ public boolean equals(Object other) {
     public int hashCode() {
       return Objects.hash(getClass());
     }
-
-    /**
-     * This base class provides compilation compatibility when {@link OutputTimeFn} is enlarged.
-     */
-    @Override
-    public void pleaseExtendBaseClassesForCompilationCompatibility() { }
   }
 }

From c7bb5bbe7d5a6350c2ce991f06084cd378ddb69b Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Tue, 1 Dec 2015 13:25:38 -0800
Subject: [PATCH 1199/1541] Remove Unneccessary import in CombineJava8Test

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109118933
---
 .../google/cloud/dataflow/sdk/transforms/CombineJava8Test.java | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/CombineJava8Test.java b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/CombineJava8Test.java
index f3f04c3ae7497..b569e49c951db 100644
--- a/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/CombineJava8Test.java
+++ b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/CombineJava8Test.java
@@ -16,10 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
-import static org.junit.Assert.assertEquals;
-
 import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.values.KV;

From f56b1b8d32b5df59c504517812f15b72e69961a2 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 1 Dec 2015 14:06:21 -0800
Subject: [PATCH 1200/1541] Fix sizing of windowed side inputs to improve
 caching

Caching was using the entire size of the side input, not the size of just
the elements. This was incorrectly reporting the size of the side input
by a factor of the number of windows making the caching pointless for
side inputs with a large number of windows.
----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109123443
---
 .../worker/DataflowSideInputReader.java       | 56 ++++++++++++++-----
 .../worker/DataflowSideInputReaderTest.java   | 45 +++++++++++----
 2 files changed, 75 insertions(+), 26 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
index 821bd4c6016e1..545f32bacad43 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
@@ -19,15 +19,18 @@
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.DirectSideInputReader;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
 import com.google.cloud.dataflow.sdk.util.Sized;
 import com.google.cloud.dataflow.sdk.util.SizedSideInputReader;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Preconditions;
+import com.google.common.base.Predicate;
+import com.google.common.collect.Iterables;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -51,8 +54,8 @@ public class DataflowSideInputReader
   /** A byte count saved as overhead per side input, not cleared when the observer is reset. */
   private final Map<TupleTag<Object>, Long> overheads;
 
-  /** The underlying reader, which does not keep track of sizes. */
-  private final SideInputReader subReader;
+  /** A list of TupleTags representing the side input values. */
+  private final PTuple sideInputValues;
 
   private DataflowSideInputReader(
       Iterable<? extends SideInputInfo> sideInputInfos,
@@ -64,7 +67,7 @@ private DataflowSideInputReader(
     this.observers = new HashMap<>();
     this.overheads = new HashMap<>();
 
-    PTuple sideInputValues = PTuple.empty();
+    PTuple sideInputValuesBeingBuilt = PTuple.empty();
     for (SideInputInfo sideInputInfo : sideInputInfos) {
       TupleTag<Object> tag = new TupleTag<>(sideInputInfo.getTag());
       ByteSizeObserver observer = new ByteSizeObserver();
@@ -73,9 +76,9 @@ private DataflowSideInputReader(
       overheads.put(tag, observer.getBytes());
       observer.reset();
       observers.put(tag, observer);
-      sideInputValues = sideInputValues.and(tag, sideInputValue);
+      sideInputValuesBeingBuilt = sideInputValuesBeingBuilt.and(tag, sideInputValue);
     }
-    this.subReader = DirectSideInputReader.of(sideInputValues);
+    sideInputValues = sideInputValuesBeingBuilt;
   }
 
   /**
@@ -92,12 +95,12 @@ public static DataflowSideInputReader of(
 
   @Override
   public <T> boolean contains(PCollectionView<T> view) {
-    return subReader.contains(view);
+    return sideInputValues.has(view.getTagInternal());
   }
 
   @Override
   public boolean isEmpty() {
-    return subReader.isEmpty();
+    return sideInputValues.isEmpty();
   }
 
   /**
@@ -108,16 +111,39 @@ public boolean isEmpty() {
    */
   @Override
   public <T> Sized<T> getSized(PCollectionView<T> view, final BoundedWindow window) {
+    final TupleTag<Iterable<WindowedValue<?>>> tag = view.getTagInternal();
+    if (!sideInputValues.has(tag)) {
+      throw new IllegalArgumentException("calling getSideInput() with unknown view");
+    }
+
     // It is hard to estimate the size with any accuracy here, and there will be improvements
     // possible, but it is only required to estimate in a way so that a cache will not OOM.
-    T value = subReader.get(view, window);
-    @SuppressWarnings({"rawtypes", "unchecked"}) // irrelevant phantom type
-    TupleTag<Object> tag = (TupleTag) view.getTagInternal();
-    ByteSizeObserver observer = observers.get(tag);
+    T value;
     long overhead = overheads.get(tag);
-    long bytesRead = observer.getBytes();
-    observer.reset();
-    return Sized.of(value, overhead + bytesRead);
+    final ByteSizeObserver observer = observers.get(tag);
+    if (view.getWindowingStrategyInternal().getWindowFn() instanceof GlobalWindows) {
+      value = view.fromIterableInternal(sideInputValues.get(tag));
+      long bytesRead = observer.getBytes();
+      observer.reset();
+      return Sized.of(value, overhead + bytesRead);
+    } else {
+      final long[] sum = new long[]{ 0L };
+      value = view.fromIterableInternal(
+          Iterables.filter(sideInputValues.get(tag),
+              new Predicate<WindowedValue<?>>() {
+                  @Override
+                  public boolean apply(WindowedValue<?> element) {
+                    boolean containsWindow = element.getWindows().contains(window);
+                    // Only sum up the size of the elements within the window.
+                    if (containsWindow) {
+                      sum[0] += observer.getBytes();
+                    }
+                    observer.reset();
+                    return containsWindow;
+                  }
+                }));
+      return Sized.of(value, overhead + sum[0]);
+    }
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
index 47a0949378765..c3ad2bbd23e6c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
@@ -28,6 +28,9 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
@@ -36,7 +39,9 @@
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
 
+import org.joda.time.Instant;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -62,7 +67,8 @@ public class DataflowSideInputReaderTest {
       ImmutableList.of(1L, -43255L, 0L, 13L, 1975858L);
 
   private static final long DEFAULT_SOURCE_LENGTH = DEFAULT_SOURCE_CONTENTS.size();
-
+  private static final IntervalWindow OTHER_WINDOW =
+      new IntervalWindow(new Instant(50000L), new Instant(60000L));
   private PipelineOptions options = PipelineOptionsFactory.create();
   private static ExecutionContext executionContext;
   private SideInputInfo defaultSideInputInfo;
@@ -70,15 +76,19 @@ public class DataflowSideInputReaderTest {
 
   /**
    * Creates a {@link Source} descriptor for reading the provided contents as a side input.
-   * The contents will all be placed in the {@link PCollectionViewTesting#DEFAULT_NONEMPTY_WINDOW}.
    *
    * <p>If the {@code PCollectionView} has an incompatible {@code Coder} or
    * {@code WindowingStrategy}, then results are unpredictable.
    */
-  private final <T> Source sourceInDefaultWindow(PCollectionView<T> view, Iterable<T> values)
+  private final <T> Source sourceInMultipleWindows(PCollectionView<T> view, Iterable<T> values)
       throws Exception {
-    List<WindowedValue<T>> windowedValues =
-        ImmutableList.copyOf(PCollectionViewTesting.contentsInDefaultWindow(values));
+    List<WindowedValue<T>> windowedValues = ImmutableList.<WindowedValue<T>>builder()
+        .addAll(PCollectionViewTesting.contentsInDefaultWindow(values))
+        // We add the values twice within the other window so there are a different number
+        // then in the default window.
+        .addAll(contentsInWindow(values, OTHER_WINDOW))
+        .addAll(contentsInWindow(values, OTHER_WINDOW))
+        .build();
 
     @SuppressWarnings({"unchecked", "rawtypes"})
     List<Coder<?>> componentCoders = (List) view.getCoderInternal().getCoderArguments();
@@ -109,13 +119,26 @@ private long windowedLongBytes() throws Exception {
         PCollectionViewTesting.valueInDefaultWindow(arbitraryLong)).length;
   }
 
+  /**
+   * Prepares {@code values} for reading as the contents of a {@link PCollectionView} side input.
+   */
+  private static <T> Iterable<WindowedValue<T>> contentsInWindow(Iterable<T> values,
+      BoundedWindow window) throws Exception {
+    List<WindowedValue<T>> windowedValues = Lists.newArrayList();
+    for (T value : values) {
+      windowedValues.add(
+          WindowedValue.of(value, window.maxTimestamp().minus(1), window, PaneInfo.NO_FIRING));
+    }
+    return windowedValues;
+  }
+
   @Before
   public void setUp() throws Exception {
     options = PipelineOptionsFactory.create();
     executionContext = BatchModeExecutionContext.fromOptions(options);
 
     defaultSideInputInfo = SideInputUtils.createCollectionSideInputInfo(
-        sourceInDefaultWindow(DEFAULT_LENGTH_VIEW, DEFAULT_SOURCE_CONTENTS));
+        sourceInMultipleWindows(DEFAULT_LENGTH_VIEW, DEFAULT_SOURCE_CONTENTS));
     defaultSideInputInfo.setTag(DEFAULT_LENGTH_VIEW.getTagInternal().getId());
 
     defaultSideInputReader = DataflowSideInputReader.of(
@@ -136,7 +159,7 @@ public void testDataflowSideInputReaderNotEmpty() throws Exception {
    * {@link DataflowSideInputReader}, the read succeeds and has the right size.
    */
   @Test
-  public void testDataflowSideInputReaderGoodRead() throws Exception {
+  public void testDataflowSideInputReaderFilteredRead() throws Exception {
     assertTrue(defaultSideInputReader.contains(DEFAULT_LENGTH_VIEW));
     Sized<Long> sizedValue = defaultSideInputReader.getSized(
         DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
@@ -156,7 +179,7 @@ public void testDataflowSideInputReaderRepeatedRead() throws Exception {
     Sized<Long> firstRead = sideInputReader.getSized(
         DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
 
-    // A repeated read should yield the same size
+    // A repeated read should yield the same size.
     Sized<Long> repeatedRead = sideInputReader.getSized(
         DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
 
@@ -170,11 +193,11 @@ public void testDataflowSideInputReaderMiss() throws Exception {
     DataflowSideInputReader sideInputReader = DataflowSideInputReader.of(
         Collections.singletonList(defaultSideInputInfo), options, executionContext);
 
-    // Reading an empty window still yields the same size, for now
+    // Reading an empty window yields the size of 0 elements.
     Sized<Long> emptyWindowValue = sideInputReader.getSized(
         DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_EMPTY_WINDOW);
     assertThat(emptyWindowValue.getValue(), equalTo(0L));
-    assertThat(emptyWindowValue.getSize(), equalTo(DEFAULT_SOURCE_LENGTH * windowedLongBytes()));
+    assertThat(emptyWindowValue.getSize(), equalTo(0L));
   }
 
   /**
@@ -184,7 +207,7 @@ public void testDataflowSideInputReaderMiss() throws Exception {
   @Test
   public void testDataflowSideInputReaderBadRead() throws Exception {
     SideInputInfo sideInputInfo = SideInputUtils.createCollectionSideInputInfo(
-        sourceInDefaultWindow(DEFAULT_LENGTH_VIEW, DEFAULT_SOURCE_CONTENTS));
+        sourceInMultipleWindows(DEFAULT_LENGTH_VIEW, DEFAULT_SOURCE_CONTENTS));
     sideInputInfo.setTag("not the same tag at all");
 
     DataflowSideInputReader sideInputReader = DataflowSideInputReader.of(

From 3ebf8b5e3143821893feb67cfc24ab3a9c51ec9b Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Tue, 1 Dec 2015 14:12:27 -0800
Subject: [PATCH 1201/1541] Improve Lambda Support in WithKeys,
 RemoveDuplicates

Both WithKeys#of(SerializableFunction) and
RemoveDuplicates#withRepresentativeValuesFn(SerializableFunction) can be
passed Lambdas in Java 8, but due to erasure fail during Pipeline
construction when the coder is not accessible.

Add WithKeys#withKeyType(TypeDescriptor) and
WithRepresentativeValues#withRepresentativeType(TypeDescriptor) to ease
use of these PTransforms without explicitly providing a coder to the
output PCollection.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109124138
---
 .../sdk/transforms/RemoveDuplicates.java      | 43 +++++++-
 .../dataflow/sdk/transforms/WithKeys.java     | 17 ++++
 .../dataflow/sdk/transforms/WithKeysTest.java | 16 +++
 .../transforms/RemoveDuplicatesJava8Test.java | 99 +++++++++++++++++++
 .../sdk/transforms/WithKeysJava8Test.java     | 74 ++++++++++++++
 5 files changed, 244 insertions(+), 5 deletions(-)
 create mode 100644 sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesJava8Test.java
 create mode 100644 sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/WithKeysJava8Test.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
index cff84991365e0..8913138abb278 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 /**
  * {@code RemoveDuplicates<T>} takes a {@code PCollection<T>} and
@@ -77,7 +78,7 @@ public static <T> RemoveDuplicates<T> create() {
    */
   public static <T, IdT> WithRepresentativeValues<T, IdT> withRepresentativeValueFn(
       SerializableFunction<T, IdT> fn) {
-    return new WithRepresentativeValues<T, IdT>(fn);
+    return new WithRepresentativeValues<T, IdT>(fn, null);
   }
 
   @Override
@@ -100,18 +101,34 @@ public Void apply(Iterable<Void> iter) {
         .apply(Keys.<T>create());
   }
 
-  private static class WithRepresentativeValues<T, IdT>
+  /**
+   * A {@link RemoveDuplicates} {@link PTransform} that uses a {@link SerializableFunction} to
+   * obtain a representative value for each input element.
+   *
+   * Construct via {@link RemoveDuplicates#withRepresentativeValueFn(SerializableFunction)}.
+   *
+   * @param <T> the type of input and output element
+   * @param <IdT> the type of representative values used to dedup
+   */
+  public static class WithRepresentativeValues<T, IdT>
       extends PTransform<PCollection<T>, PCollection<T>> {
-    private SerializableFunction<T, IdT> fn;
+    private final SerializableFunction<T, IdT> fn;
+    private final TypeDescriptor<IdT> representativeType;
 
-    private WithRepresentativeValues(SerializableFunction<T, IdT> fn) {
+    private WithRepresentativeValues(
+        SerializableFunction<T, IdT> fn, TypeDescriptor<IdT> representativeType) {
       this.fn = fn;
+      this.representativeType = representativeType;
     }
 
     @Override
     public PCollection<T> apply(PCollection<T> in) {
+      WithKeys<IdT, T> withKeys = WithKeys.of(fn);
+      if (representativeType != null) {
+        withKeys = withKeys.withKeyType(representativeType);
+      }
       return in
-          .apply(WithKeys.of(fn))
+          .apply(withKeys)
           .apply(Combine.<IdT, T, T>perKey(
               new Combine.BinaryCombineFn<T>() {
                 @Override
@@ -121,5 +138,21 @@ public T apply(T left, T right) {
               }))
           .apply(Values.<T>create());
     }
+
+    /**
+     * Return a {@code WithRepresentativeValues} {@link PTransform} that is like this one, but with
+     * the specified output type descriptor.
+     *
+     * Required for use of {@link RemoveDuplicates#withRepresentativeValueFn(SerializableFunction)}
+     * in Java 8 with a lambda as the fn.
+     *
+     * @param type a {@link TypeDescriptor} describing the representative type of this
+     *             {@code WithRepresentativeValues}
+     * @return A {@code WithRepresentativeValues} {@link PTransform} that is like this one, but with
+     *         the specified output type descriptor.
+     */
+    public WithRepresentativeValues<T, IdT> withRepresentativeType(TypeDescriptor<IdT> type) {
+      return new WithRepresentativeValues<>(fn, type);
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
index aa8559c4e631c..c06795c703849 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
@@ -58,6 +58,9 @@ public class WithKeys<K, V> extends PTransform<PCollection<V>,
    * values in the input {@code PCollection} has been paired with a
    * key computed from the value by invoking the given
    * {@code SerializableFunction}.
+   *
+   * <p>If using a lambda in Java 8, {@link #withKeyType(TypeDescriptor)} must
+   * be called on the result {@link PTransform}.
    */
   public static <K, V> WithKeys<K, V> of(SerializableFunction<V, K> fn) {
     return new WithKeys<>(fn, null);
@@ -92,6 +95,20 @@ private WithKeys(SerializableFunction<V, K> fn, Class<K> keyClass) {
     this.keyClass = keyClass;
   }
 
+  /**
+   * Return a {@link WithKeys} that is like this one with the specified key type descriptor.
+   *
+   * For use with lambdas in Java 8, either this method must be called with an appropriate type
+   * descriptor or {@link PCollection#setCoder(Coder)} must be called on the output
+   * {@link PCollection}.
+   */
+  public WithKeys<K, V> withKeyType(TypeDescriptor<K> keyType) {
+    // Safe cast
+    @SuppressWarnings("unchecked")
+    Class<K> rawType = (Class<K>) keyType.getRawType();
+    return new WithKeys<>(fn, rawType);
+  }
+
   @Override
   public PCollection<KV<K, V>> apply(PCollection<V> in) {
     PCollection<KV<K, V>> result =
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
index d6d0fe865d0ed..0f9abd487f931 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysTest.java
@@ -24,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -98,6 +99,21 @@ public void testWithKeysGetName() {
     assertEquals("WithKeys", WithKeys.<Integer, String>of(100).getName());
   }
 
+  @Test
+  public void testWithKeysWithUnneededWithKeyTypeSucceeds() {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<String> input =
+        p.apply(Create.of(Arrays.asList(COLLECTION)).withCoder(
+            StringUtf8Coder.of()));
+
+    PCollection<KV<Integer, String>> output =
+        input.apply(WithKeys.of(new LengthAsKey()).withKeyType(TypeDescriptor.of(Integer.class)));
+    DataflowAssert.that(output).containsInAnyOrder(WITH_KEYS);
+
+    p.run();
+  }
+
   /**
    * Key a value by its length.
    */
diff --git a/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesJava8Test.java b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesJava8Test.java
new file mode 100644
index 0000000000000..d9e2180b7da74
--- /dev/null
+++ b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesJava8Test.java
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.hasItem;
+import static org.hamcrest.Matchers.not;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Multimap;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * Java 8 tests for {@link RemoveDuplicates}.
+ */
+@RunWith(JUnit4.class)
+public class RemoveDuplicatesJava8Test {
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void withLambdaRepresentativeValuesFnAndTypeDescriptorShouldApplyFn() {
+    TestPipeline p = TestPipeline.create();
+
+    Multimap<Integer, String> predupedContents = HashMultimap.create();
+    predupedContents.put(3, "foo");
+    predupedContents.put(4, "foos");
+    predupedContents.put(6, "barbaz");
+    predupedContents.put(6, "bazbar");
+    PCollection<String> dupes =
+        p.apply(Create.of("foo", "foos", "barbaz", "barbaz", "bazbar", "foo"));
+    PCollection<String> deduped =
+        dupes.apply(RemoveDuplicates.withRepresentativeValueFn((String s) -> s.length())
+                                    .withRepresentativeType(TypeDescriptor.of(Integer.class)));
+
+    DataflowAssert.that(deduped).satisfies((Iterable<String> strs) -> {
+      Set<Integer> seenLengths = new HashSet<>();
+      for (String s : strs) {
+        assertThat(predupedContents.values(), hasItem(s));
+        assertThat(seenLengths, not(contains(s.length())));
+        seenLengths.add(s.length());
+      }
+      return null;
+    });
+
+    p.run();
+  }
+
+  @Test
+  public void withLambdaRepresentativeValuesFnNoTypeDescriptorShouldThrow() {
+    TestPipeline p = TestPipeline.create();
+
+    Multimap<Integer, String> predupedContents = HashMultimap.create();
+    predupedContents.put(3, "foo");
+    predupedContents.put(4, "foos");
+    predupedContents.put(6, "barbaz");
+    predupedContents.put(6, "bazbar");
+    PCollection<String> dupes =
+        p.apply(Create.of("foo", "foos", "barbaz", "barbaz", "bazbar", "foo"));
+
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("Unable to return a default Coder for RemoveRepresentativeDupes");
+    thrown.expectMessage("Cannot provide a coder for type variable K");
+    thrown.expectMessage("the actual type is unknown due to erasure.");
+
+    // Thrown when applying a transform to the internal WithKeys that withRepresentativeValueFn is
+    // implemented with
+    dupes.apply("RemoveRepresentativeDupes",
+        RemoveDuplicates.withRepresentativeValueFn((String s) -> s.length()));
+  }
+}
+
diff --git a/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/WithKeysJava8Test.java b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/WithKeysJava8Test.java
new file mode 100644
index 0000000000000..c10af29030133
--- /dev/null
+++ b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/WithKeysJava8Test.java
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.Pipeline.PipelineExecutionException;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Java 8 Tests for {@link WithKeys}.
+ */
+@RunWith(JUnit4.class)
+public class WithKeysJava8Test {
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void withLambdaAndTypeDescriptorShouldSucceed() {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<String> values = p.apply(Create.of("1234", "3210", "0", "-12"));
+    PCollection<KV<Integer, String>> kvs = values.apply(
+        WithKeys.of((String s) -> Integer.valueOf(s))
+                .withKeyType(TypeDescriptor.of(Integer.class)));
+
+    DataflowAssert.that(kvs).containsInAnyOrder(
+        KV.of(1234, "1234"), KV.of(0, "0"), KV.of(-12, "-12"), KV.of(3210, "3210"));
+
+    p.run();
+  }
+
+  @Test
+  public void withLambdaAndNoTypeDescriptorShouldThrow() {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<String> values = p.apply(Create.of("1234", "3210", "0", "-12"));
+
+    values.apply("ApplyKeysWithWithKeys", WithKeys.of((String s) -> Integer.valueOf(s)));
+
+    thrown.expect(PipelineExecutionException.class);
+    thrown.expectMessage("Unable to return a default Coder for ApplyKeysWithWithKeys");
+    thrown.expectMessage("Cannot provide a coder for type variable K");
+    thrown.expectMessage("the actual type is unknown due to erasure.");
+
+    p.run();
+  }
+}
+

From 3f9882eae89c197c17a79b907073f82f96737779 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 1 Dec 2015 14:32:44 -0800
Subject: [PATCH 1202/1541] Setup data structure to allow for overrides for
 batch pipelines

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109126289
---
 .../sdk/runners/DataflowPipelineRunner.java   | 52 ++++++++++---------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index e26b3cb4d5220..0f9f8ac6710c5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -137,8 +137,8 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
   /** Translator for this DataflowPipelineRunner, based on options. */
   private final DataflowPipelineTranslator translator;
 
-  /** Custom transforms implementations for running in streaming mode. */
-  private final Map<Class<?>, Class<?>> streamingOverrides;
+  /** Custom transforms implementations. */
+  private final Map<Class<?>, Class<?>> overrides;
 
   /** A set of user defined functions to invoke at different points in execution. */
   private DataflowPipelineRunnerHooks hooks;
@@ -244,24 +244,29 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
     this.dataflowClient = options.getDataflowClient();
     this.translator = DataflowPipelineTranslator.fromOptions(options);
 
-    this.streamingOverrides = ImmutableMap.<Class<?>, Class<?>>builder()
-        .put(Combine.GloballyAsSingletonView.class, StreamingCombineGloballyAsSingletonView.class)
-        .put(Create.Values.class, StreamingCreate.class)
-        .put(View.AsMap.class, StreamingViewAsMap.class)
-        .put(View.AsMultimap.class, StreamingViewAsMultimap.class)
-        .put(View.AsSingleton.class, StreamingViewAsSingleton.class)
-        .put(View.AsList.class, StreamingViewAsList.class)
-        .put(View.AsIterable.class, StreamingViewAsIterable.class)
-        .put(Write.Bound.class, StreamingWrite.class)
-        .put(PubsubIO.Write.Bound.class, StreamingPubsubIOWrite.class)
-        .put(Read.Unbounded.class, StreamingUnboundedRead.class)
-        .put(Read.Bounded.class, StreamingUnsupportedIO.class)
-        .put(AvroIO.Read.Bound.class, StreamingUnsupportedIO.class)
-        .put(AvroIO.Write.Bound.class, StreamingUnsupportedIO.class)
-        .put(BigQueryIO.Read.Bound.class, StreamingUnsupportedIO.class)
-        .put(TextIO.Read.Bound.class, StreamingUnsupportedIO.class)
-        .put(TextIO.Write.Bound.class, StreamingUnsupportedIO.class)
-        .build();
+    if (options.isStreaming()) {
+      overrides = ImmutableMap.<Class<?>, Class<?>>builder()
+          .put(Combine.GloballyAsSingletonView.class, StreamingCombineGloballyAsSingletonView.class)
+          .put(Create.Values.class, StreamingCreate.class)
+          .put(View.AsMap.class, StreamingViewAsMap.class)
+          .put(View.AsMultimap.class, StreamingViewAsMultimap.class)
+          .put(View.AsSingleton.class, StreamingViewAsSingleton.class)
+          .put(View.AsList.class, StreamingViewAsList.class)
+          .put(View.AsIterable.class, StreamingViewAsIterable.class)
+          .put(Write.Bound.class, StreamingWrite.class)
+          .put(PubsubIO.Write.Bound.class, StreamingPubsubIOWrite.class)
+          .put(Read.Unbounded.class, StreamingUnboundedRead.class)
+          .put(Read.Bounded.class, StreamingUnsupportedIO.class)
+          .put(AvroIO.Read.Bound.class, StreamingUnsupportedIO.class)
+          .put(AvroIO.Write.Bound.class, StreamingUnsupportedIO.class)
+          .put(BigQueryIO.Read.Bound.class, StreamingUnsupportedIO.class)
+          .put(TextIO.Read.Bound.class, StreamingUnsupportedIO.class)
+          .put(TextIO.Write.Bound.class, StreamingUnsupportedIO.class)
+          .build();
+    } else {
+      overrides = ImmutableMap.<Class<?>, Class<?>>builder()
+          .build();
+    }
   }
 
   /**
@@ -289,16 +294,15 @@ public <OutputT extends POutput, InputT extends PInput> OutputT apply(
           pc.isBounded());
       return outputT;
 
-    } else if (options.isStreaming() && streamingOverrides.containsKey(transform.getClass())) {
-      // It is the responsibility of whoever constructs streamingOverrides
-      // to ensure this is type safe.
+    } else if (overrides.containsKey(transform.getClass())) {
+      // It is the responsibility of whoever constructs overrides to ensure this is type safe.
       @SuppressWarnings("unchecked")
       Class<PTransform<InputT, OutputT>> transformClass =
           (Class<PTransform<InputT, OutputT>>) transform.getClass();
 
       @SuppressWarnings("unchecked")
       Class<PTransform<InputT, OutputT>> customTransformClass =
-          (Class<PTransform<InputT, OutputT>>) streamingOverrides.get(transform.getClass());
+          (Class<PTransform<InputT, OutputT>>) overrides.get(transform.getClass());
 
       PTransform<InputT, OutputT> customTransform =
           InstanceBuilder.ofType(customTransformClass)

From b6c4f8f3f7701a4f0190508e65eeb4aa9d1a4b88 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 1 Dec 2015 16:19:09 -0800
Subject: [PATCH 1203/1541] DataflowPipelineOptions: use gcloud project default
 if available

The DefaultProjectFactory from GcpPipelineOptions should be used
here, too.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109137241
---
 .../cloud/dataflow/sdk/options/DataflowPipelineOptions.java      | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
index 3e9a2424f0c57..a0f188af0785d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
@@ -40,6 +40,7 @@ public interface DataflowPipelineOptions extends
       + "See https://cloud.google.com/storage/docs/projects for further details.")
   @Override
   @Validation.Required
+  @Default.InstanceFactory(DefaultProjectFactory.class)
   String getProject();
   @Override
   void setProject(String value);

From a8b19a609e206ce592fd2fefdf77fc9a64e108be Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 1 Dec 2015 19:01:44 -0800
Subject: [PATCH 1204/1541] Filter: fixup uses of #by and #byPredicate

Uses, tests, javadoc

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109150424
---
 .../dataflow/examples/complete/AutoComplete.java   | 14 +++++++-------
 .../examples/complete/AutoCompleteTest.java        |  2 +-
 .../cloud/dataflow/sdk/transforms/Filter.java      |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
index b1db84c88caa2..e8c6d405b7638 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
@@ -232,12 +232,13 @@ public PCollectionList<KV<String, List<CompletionCandidate>>> apply(
             .of(larger.get(1).apply(ParDo.of(new FlattenTops())))
             // ...together with those (previously excluded) candidates of length
             // exactly minPrefix...
-            .and(input.apply(Filter.by(new SerializableFunction<CompletionCandidate, Boolean>() {
-                    @Override
-                    public Boolean apply(CompletionCandidate c) {
-                      return c.getValue().length() == minPrefix;
-                    }
-                  })))
+            .and(input.apply(Filter.byPredicate(
+                new SerializableFunction<CompletionCandidate, Boolean>() {
+                  @Override
+                  public Boolean apply(CompletionCandidate c) {
+                    return c.getValue().length() == minPrefix;
+                  }
+                })))
             .apply("FlattenSmall", Flatten.<CompletionCandidate>pCollections())
             // ...set the key to be the minPrefix-length prefix...
             .apply(ParDo.of(new AllPrefixes(minPrefix, minPrefix)))
@@ -297,7 +298,6 @@ public String getValue() {
     }
 
     // Empty constructor required for Avro decoding.
-    @SuppressWarnings("unused")
     public CompletionCandidate() {}
 
     @Override
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
index 20dbdc41cd01c..aec1557c28b0d 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
@@ -84,7 +84,7 @@ public void testAutoComplete() {
 
     PCollection<KV<String, List<CompletionCandidate>>> output =
       input.apply(new ComputeTopCompletions(2, recursive))
-           .apply(Filter.by(
+           .apply(Filter.byPredicate(
                         new SerializableFunction<KV<String, List<CompletionCandidate>>, Boolean>() {
                           @Override
                           public Boolean apply(KV<String, List<CompletionCandidate>> element) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
index 89a549ed62cdb..9e123a19fcd1c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
@@ -53,7 +53,7 @@ public class Filter<T> extends PTransform<PCollection<T>, PCollection<T>> {
   }
 
   /**
-   * @deprecated use {@link byPredicate}, which returns a {@link Filter} transform instead of
+   * @deprecated use {@link #byPredicate}, which returns a {@link Filter} transform instead of
    * a {@link ParDo.Bound}.
    */
   @Deprecated

From 8db50044b6d38f06ee588e884f0d4a9ea2ae8f6e Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 1 Dec 2015 19:03:13 -0800
Subject: [PATCH 1205/1541] ByteArrayCoder: remove unused import, fix Javadoc
 link

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109150531
---
 .../com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
index 13c0e0ab7074b..47efbedc66949 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
 import com.google.cloud.dataflow.sdk.util.ExposedByteArrayOutputStream;
 import com.google.cloud.dataflow.sdk.util.StreamUtils;
 import com.google.cloud.dataflow.sdk.util.VarInt;
@@ -117,8 +116,8 @@ public Object structuralValue(byte[] value) {
   /**
    * {@inheritDoc}
    *
-   * @return {@code true} since {@link #getEncodedElementByteSize()} runs in constant time using
-   * the {@code length} of the provided array.
+   * @return {@code true} since {@link #getEncodedElementByteSize(byte[], Context)} runs in
+   * constant time using the {@code length} of the provided array.
    */
   @Override
   public boolean isRegisterByteSizeObserverCheap(byte[] value, Context context) {

From f6ade89e2a34f44b27fb7a73e0088e384d1099ad Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 1 Dec 2015 22:12:43 -0800
Subject: [PATCH 1206/1541] Coders: fix Javadoc

- throws are not linked
- nothing is not a valid class
- #f() invalid when f does not have a zero-args version

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109161228
---
 .../google/cloud/dataflow/sdk/coders/CustomCoder.java |  2 +-
 .../cloud/dataflow/sdk/coders/DelegateCoder.java      | 11 +++++------
 .../sdk/coders/DeterministicStandardCoder.java        |  6 +++---
 .../cloud/dataflow/sdk/coders/IterableLikeCoder.java  |  3 +--
 .../google/cloud/dataflow/sdk/coders/MapCoder.java    |  7 +++----
 .../google/cloud/dataflow/sdk/coders/VarIntCoder.java |  2 +-
 .../cloud/dataflow/sdk/coders/VarLongCoder.java       |  2 +-
 7 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
index f88cdd87cf677..b34ef8cf6decf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
@@ -100,7 +100,7 @@ public CloudObject asCloudObject() {
   /**
    * {@inheritDoc}
    *
-   * @throws {@link NonDeterministicException}. A {@link CustomCoder} is presumed
+   * @throws NonDeterministicException a {@link CustomCoder} is presumed
    * nondeterministic.
    */
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
index 365f1d43a139b..cdd882b07a198 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
@@ -27,7 +27,7 @@
 
 /**
  * A {@code DelegateCoder<T, IntermediateT>} wraps a {@link Coder} for {@code IntermediateT} and
- * encodes/decodes values of type {@code T}s by converting
+ * encodes/decodes values of type {@code T} by converting
  * to/from {@code IntermediateT} and then encoding/decoding using the underlying
  * {@code Coder<IntermediateT>}.
  *
@@ -43,9 +43,8 @@
  */
 public class DelegateCoder<T, IntermediateT> extends CustomCoder<T> {
   /**
-   * A {@link CodingFunction CodingFunction&ltInputT, OutputT&gt;} is a serializable function
-   * from {@code InputT} to {@code OutputT} that
-   * may throw any {@code Exception}.
+   * A {@link DelegateCoder.CodingFunction CodingFunction&lt;InputT, OutputT&gt;} is a serializable
+   * function from {@code InputT} to {@code OutputT} that may throw any {@link Exception}.
    */
   public static interface CodingFunction<InputT, OutputT> extends Serializable {
      public abstract OutputT apply(InputT input) throws Exception;
@@ -80,8 +79,8 @@ public Coder<IntermediateT> getCoder() {
    * {@inheritDoc}
    *
    * @throws NonDeterministicException when the underlying coder's {@code verifyDeterministic()}
-   *         throws a {@link NonDeterministicException}. For this to be safe, the intermediate
-   *         {@code CodingFunction<T, IntermediateT>} must also be deterministic.
+   *         throws a {@link Coder.NonDeterministicException}. For this to be safe, the
+   *         intermediate {@code CodingFunction<T, IntermediateT>} must also be deterministic.
    */
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DeterministicStandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DeterministicStandardCoder.java
index 2382279957151..0e0018afd4d45 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DeterministicStandardCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DeterministicStandardCoder.java
@@ -19,7 +19,7 @@
 /**
  * A {@link DeterministicStandardCoder} is a {@link StandardCoder} that is
  * deterministic, in the sense that for objects considered equal
- * according to {@link Object#equals()}, the encoded bytes are
+ * according to {@link Object#equals(Object)}, the encoded bytes are
  * also equal.
  *
  * @param <T> the type of the values being transcoded
@@ -30,8 +30,8 @@ protected DeterministicStandardCoder() {}
   /**
    * {@inheritDoc}
    *
-   * @throws nothing unless overridden.
-   *         A {@link DeterministicStandardCoder} is presumed deterministic.
+   * @throws NonDeterministicException never, unless overridden. A
+   * {@link DeterministicStandardCoder} is presumed deterministic.
    */
   @Override
   public void verifyDeterministic() throws NonDeterministicException { }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
index 522e9b1ee74b2..7fb573a5c65c8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
@@ -164,8 +164,7 @@ public List<? extends Coder<?>> getCoderArguments() {
   /**
    * {@inheritDoc}
    *
-   * @throws
-   * {@link NonDeterministicException} always.
+   * @throws NonDeterministicException always.
    * Encoding is not deterministic for the general {@link Iterable} case, as it depends
    * upon the type of iterable. This may allow two objects to compare as equal
    * while the encoding differs.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
index 5b8e750ea68cb..b6f31030e41f8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
@@ -135,10 +135,9 @@ public List<? extends Coder<?>> getCoderArguments() {
   /**
    * {@inheritDoc}
    *
-   * @throws
-   * {@link NonDeterministicException} always. Not all maps have a deterministic encoding.
-   * For example, {@link HashMap} comparison does not depend on element order, so
-   * two {@link HashMap} instances may be equal but produce different encodings.
+   * @throws NonDeterministicException always. Not all maps have a deterministic encoding.
+   * For example, {@code HashMap} comparison does not depend on element order, so
+   * two {@code HashMap} instances may be equal but produce different encodings.
    */
   @Override
   public void verifyDeterministic() throws NonDeterministicException {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
index 41be8b400466a..1010601f40553 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
@@ -79,7 +79,7 @@ public boolean consistentWithEquals() {
   /**
    * {@inheritDoc}
    *
-   * @return {@code true}. {@link VarIntCoder#getEncodedElementByteSize()} runs in constant time.
+   * @return {@code true}. {@link #getEncodedElementByteSize(Integer, Context)} is cheap.
    */
   @Override
   public boolean isRegisterByteSizeObserverCheap(Integer value, Context context) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
index 7530d64c2a4b2..177ea09d596cf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
@@ -78,7 +78,7 @@ public boolean consistentWithEquals() {
   /**
    * {@inheritDoc}
    *
-   * @return {@code true}. {@link VarLongCoder#getEncodedElementByteSize()} runs in constant time.
+   * @return {@code true}. {@link #getEncodedElementByteSize(Long, Context)} is cheap.
    */
   @Override
   public boolean isRegisterByteSizeObserverCheap(Long value, Context context) {

From 927c58665138f419b1771eb76fe0c15193669a88 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 1 Dec 2015 22:13:55 -0800
Subject: [PATCH 1207/1541] Coder: fix javadoc

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109161310
---
 .../main/java/com/google/cloud/dataflow/sdk/coders/Coder.java  | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
index 75847219058af..89b35164d4990 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
@@ -35,7 +35,8 @@
 import javax.annotation.Nullable;
 
 /**
- * A {@link Coder<T>} defines how to encode and decode values of type {@code T} into byte streams.
+ * A {@link Coder Coder&lt;T&gt;} defines how to encode and decode values of type {@code T} into
+ * byte streams.
  *
  * <p>{@link Coder} instances are serialized during job creation and deserialized
  * before use, via JSON serialization. See {@link SerializableCoder} for an example of a

From 3bad26310570c9b62440ec2d09e50d7e8e9c78e2 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 2 Dec 2015 11:11:26 -0800
Subject: [PATCH 1208/1541] BlockBasedSource: fix javadoc links

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109217232
---
 .../com/google/cloud/dataflow/sdk/io/BlockBasedSource.java    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
index 62115dfbe5e69..9ef4cdcf30c15 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
@@ -185,8 +185,8 @@ protected boolean isAtSplitPoint() {
     }
 
     /**
-     * Reads the next record from the {@link getCurrentBlock() current block} if
-     * possible. Will call {@link readNextBlock()} to advance to the next block if not.
+     * Reads the next record from the {@link #getCurrentBlock() current block} if
+     * possible. Will call {@link #readNextBlock()} to advance to the next block if not.
      *
      * <p>The first record read from a block is treated as a split point.
      */

From d81e7506dab13d3443f0b4e83403cee9280d65a4 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 2 Dec 2015 14:18:08 -0800
Subject: [PATCH 1209/1541] Fix issue with SerializableCoder and JAXBCoder to
 not close stream

Also add a guard in CoderProperties/CoderUtils to ensure that user
coders when being tested do not attempt to close the stream.

----Release Notes----
Coders do not own the underlying stream and should not attempt to close it.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109236509
---
 .../cloud/dataflow/sdk/coders/JAXBCoder.java  | 16 +++-
 .../sdk/coders/SerializableCoder.java         |  7 +-
 .../dataflow/sdk/testing/CoderProperties.java | 13 +++-
 .../cloud/dataflow/sdk/util/CoderUtils.java   |  4 +-
 .../dataflow/sdk/util/UnownedInputStream.java | 76 +++++++++++++++++++
 .../sdk/util/UnownedOutputStream.java         | 56 ++++++++++++++
 .../sdk/testing/CoderPropertiesTest.java      | 29 +++++++
 .../dataflow/sdk/util/CoderUtilsTest.java     | 44 +++++++++++
 .../sdk/util/UnownedInputStreamTest.java      | 76 +++++++++++++++++++
 .../sdk/util/UnownedOutputStreamTest.java     | 57 ++++++++++++++
 10 files changed, 368 insertions(+), 10 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UnownedInputStream.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UnownedOutputStream.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/UnownedInputStreamTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/UnownedOutputStreamTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java
index 7275b81007bf3..2b0190b5f3d7c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java
@@ -20,6 +20,8 @@
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
 
+import java.io.FilterInputStream;
+import java.io.FilterOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
@@ -67,7 +69,12 @@ public void encode(T value, OutputStream outStream, Context context)
         jaxbMarshaller = jaxbContext.createMarshaller();
       }
 
-      jaxbMarshaller.marshal(value, outStream);
+      jaxbMarshaller.marshal(value, new FilterOutputStream(outStream) {
+        // JAXB closes the underyling stream so we must filter out those calls.
+        @Override
+        public void close() throws IOException {
+        }
+      });
     } catch (JAXBException e) {
       throw new CoderException(e);
     }
@@ -82,7 +89,12 @@ public T decode(InputStream inStream, Context context) throws CoderException, IO
       }
 
       @SuppressWarnings("unchecked")
-      T obj = (T) jaxbUnmarshaller.unmarshal(inStream);
+      T obj = (T) jaxbUnmarshaller.unmarshal(new FilterInputStream(inStream) {
+        // JAXB closes the underyling stream so we must filter out those calls.
+        @Override
+        public void close() throws IOException {
+        }
+      });
       return obj;
     } catch (JAXBException e) {
       throw new CoderException(e);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
index e5f88c36fddb9..593c9f0f809b1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
@@ -117,8 +117,10 @@ public Class<T> getRecordType() {
   @Override
   public void encode(T value, OutputStream outStream, Context context)
       throws IOException, CoderException {
-    try (ObjectOutputStream oos = new ObjectOutputStream(outStream)) {
+    try {
+      ObjectOutputStream oos = new ObjectOutputStream(outStream);
       oos.writeObject(value);
+      oos.flush();
     } catch (IOException exn) {
       throw new CoderException("unable to serialize record " + value, exn);
     }
@@ -127,7 +129,8 @@ public void encode(T value, OutputStream outStream, Context context)
   @Override
   public T decode(InputStream inStream, Context context)
       throws IOException, CoderException {
-    try (ObjectInputStream ois = new ObjectInputStream(inStream)) {
+    try {
+      ObjectInputStream ois = new ObjectInputStream(inStream);
       return type.cast(ois.readObject());
     } catch (ClassNotFoundException e) {
       throw new CoderException("unable to deserialize record", e);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
index 4f0db99e6fff7..5705dc4c78b68 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
@@ -33,6 +33,9 @@
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.Serializer;
 import com.google.cloud.dataflow.sdk.util.Structs;
+import com.google.cloud.dataflow.sdk.util.UnownedInputStream;
+import com.google.cloud.dataflow.sdk.util.UnownedOutputStream;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Iterables;
 
 import java.io.ByteArrayInputStream;
@@ -318,23 +321,25 @@ public static <T, IterableT extends Iterable<T>> void coderDecodesBase64Contents
 
   //////////////////////////////////////////////////////////////////////////
 
-  private static <T> byte[] encode(
+  @VisibleForTesting
+  static <T> byte[] encode(
       Coder<T> coder, Coder.Context context, T value) throws CoderException, IOException {
     @SuppressWarnings("unchecked")
     Coder<T> deserializedCoder = Serializer.deserialize(coder.asCloudObject(), Coder.class);
 
     ByteArrayOutputStream os = new ByteArrayOutputStream();
-    deserializedCoder.encode(value, os, context);
+    deserializedCoder.encode(value, new UnownedOutputStream(os), context);
     return os.toByteArray();
   }
 
-  private static <T> T decode(
+  @VisibleForTesting
+  static <T> T decode(
       Coder<T> coder, Coder.Context context, byte[] bytes) throws CoderException, IOException {
     @SuppressWarnings("unchecked")
     Coder<T> deserializedCoder = Serializer.deserialize(coder.asCloudObject(), Coder.class);
 
     ByteArrayInputStream is = new ByteArrayInputStream(bytes);
-    return deserializedCoder.decode(is, context);
+    return deserializedCoder.decode(new UnownedInputStream(is), context);
   }
 
   private static <T> T decodeEncode(Coder<T> coder, Coder.Context context, T value)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
index 417305f455f93..84098a62d76fa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
@@ -116,7 +116,7 @@ public static <T> byte[] encodeToByteArray(Coder<T> coder, T value, Coder.Contex
   private static <T> void encodeToSafeStream(
       Coder<T> coder, T value, OutputStream stream, Coder.Context context) throws CoderException {
     try {
-      coder.encode(value, stream, context);
+      coder.encode(value, new UnownedOutputStream(stream), context);
     } catch (IOException exn) {
       Throwables.propagateIfPossible(exn, CoderException.class);
       throw new IllegalArgumentException(
@@ -153,7 +153,7 @@ public static <T> T decodeFromByteArray(
   private static <T> T decodeFromSafeStream(
       Coder<T> coder, InputStream stream, Coder.Context context) throws CoderException {
     try {
-      return coder.decode(stream, context);
+      return coder.decode(new UnownedInputStream(stream), context);
     } catch (IOException exn) {
       Throwables.propagateIfPossible(exn, CoderException.class);
       throw new IllegalArgumentException(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UnownedInputStream.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UnownedInputStream.java
new file mode 100644
index 0000000000000..3d80230a52cfc
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UnownedInputStream.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.common.base.MoreObjects;
+
+import java.io.FilterInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+/**
+ * A {@link OutputStream} wrapper which protects against the user attempting to modify
+ * the underlying stream by closing it or using mark.
+ */
+public class UnownedInputStream extends FilterInputStream {
+  public UnownedInputStream(InputStream delegate) {
+    super(delegate);
+  }
+
+  @Override
+  public void close() throws IOException {
+    throw new UnsupportedOperationException("Caller does not own the underlying input stream "
+        + " and should not call close().");
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    return obj instanceof UnownedInputStream
+        && ((UnownedInputStream) obj).in.equals(in);
+  }
+
+  @Override
+  public int hashCode() {
+    return in.hashCode();
+  }
+
+  @SuppressWarnings("UnsynchronizedOverridesSynchronized")
+  @Override
+  public void mark(int readlimit) {
+    throw new UnsupportedOperationException("Caller does not own the underlying input stream "
+        + " and should not call mark().");
+  }
+
+  @Override
+  public boolean markSupported() {
+    return false;
+  }
+
+  @SuppressWarnings("UnsynchronizedOverridesSynchronized")
+  @Override
+  public void reset() throws IOException {
+    throw new UnsupportedOperationException("Caller does not own the underlying input stream "
+        + " and should not call reset().");
+  }
+
+  @Override
+  public String toString() {
+    return MoreObjects.toStringHelper(UnownedInputStream.class).add("in", in).toString();
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UnownedOutputStream.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UnownedOutputStream.java
new file mode 100644
index 0000000000000..29187a1b9da6e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UnownedOutputStream.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.common.base.MoreObjects;
+
+import java.io.FilterOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+/**
+ * A {@link OutputStream} wrapper which protects against the user attempting to modify
+ * the underlying stream by closing it.
+ */
+public class UnownedOutputStream extends FilterOutputStream {
+  public UnownedOutputStream(OutputStream delegate) {
+    super(delegate);
+  }
+
+  @Override
+  public void close() throws IOException {
+    throw new UnsupportedOperationException("Caller does not own the underlying output stream "
+        + " and should not call close().");
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    return obj instanceof UnownedOutputStream
+        && ((UnownedOutputStream) obj).out.equals(out);
+  }
+
+  @Override
+  public int hashCode() {
+    return out.hashCode();
+  }
+
+  @Override
+  public String toString() {
+    return MoreObjects.toStringHelper(UnownedOutputStream.class).add("out", out).toString();
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/CoderPropertiesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/CoderPropertiesTest.java
index 30ec8137268b6..f0fe688459585 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/CoderPropertiesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/CoderPropertiesTest.java
@@ -19,6 +19,7 @@
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.fail;
 
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.CustomCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
@@ -182,4 +183,32 @@ public void testBadCoderThatDependsOnStateThatIsLost() throws Exception {
     expectedException.expectMessage("I forgot something...");
     CoderProperties.coderDecodeEncodeEqual(new ForgetfulSerializingCoder(1), "TestData");
   }
+
+  /** A coder which closes the underlying stream during encoding and decoding. */
+  public static class ClosingCoder extends CustomCoder<String> {
+    @Override
+    public void encode(String value, OutputStream outStream, Context context) throws IOException {
+      outStream.close();
+    }
+
+    @Override
+    public String decode(InputStream inStream, Context context) throws IOException {
+      inStream.close();
+      return null;
+    }
+  }
+
+  @Test
+  public void testClosingCoderFailsWhenDecoding() throws Exception {
+    expectedException.expect(UnsupportedOperationException.class);
+    expectedException.expectMessage("Caller does not own the underlying");
+    CoderProperties.decode(new ClosingCoder(), Context.NESTED, new byte[0]);
+  }
+
+  @Test
+  public void testClosingCoderFailsWhenEncoding() throws Exception {
+    expectedException.expect(UnsupportedOperationException.class);
+    expectedException.expectMessage("Caller does not own the underlying");
+    CoderProperties.encode(new ClosingCoder(), Context.NESTED, "test-value");
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
index ef3f80c6aab96..e192f456bca68 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CoderUtilsTest.java
@@ -25,11 +25,13 @@
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.testing.CoderPropertiesTest.ClosingCoder;
 
 import org.hamcrest.CoreMatchers;
 import org.junit.Assert;
@@ -182,4 +184,46 @@ public void testCreateUnknownCoder() throws Exception {
                             "Unable to convert coder ID UnknownCoder to class"));
     }
   }
+
+  @Test
+  public void testClosingCoderFailsWhenDecodingBase64() throws Exception {
+    expectedException.expect(UnsupportedOperationException.class);
+    expectedException.expectMessage("Caller does not own the underlying");
+    CoderUtils.decodeFromBase64(new ClosingCoder(), "test-value");
+  }
+
+  @Test
+  public void testClosingCoderFailsWhenDecodingByteArray() throws Exception {
+    expectedException.expect(UnsupportedOperationException.class);
+    expectedException.expectMessage("Caller does not own the underlying");
+    CoderUtils.decodeFromByteArray(new ClosingCoder(), new byte[0]);
+  }
+
+  @Test
+  public void testClosingCoderFailsWhenDecodingByteArrayInContext() throws Exception {
+    expectedException.expect(UnsupportedOperationException.class);
+    expectedException.expectMessage("Caller does not own the underlying");
+    CoderUtils.decodeFromByteArray(new ClosingCoder(), new byte[0], Context.NESTED);
+  }
+
+  @Test
+  public void testClosingCoderFailsWhenEncodingToBase64() throws Exception {
+    expectedException.expect(UnsupportedOperationException.class);
+    expectedException.expectMessage("Caller does not own the underlying");
+    CoderUtils.encodeToBase64(new ClosingCoder(), "test-value");
+  }
+
+  @Test
+  public void testClosingCoderFailsWhenEncodingToByteArray() throws Exception {
+    expectedException.expect(UnsupportedOperationException.class);
+    expectedException.expectMessage("Caller does not own the underlying");
+    CoderUtils.encodeToByteArray(new ClosingCoder(), "test-value");
+  }
+
+  @Test
+  public void testClosingCoderFailsWhenEncodingToByteArrayInContext() throws Exception {
+    expectedException.expect(UnsupportedOperationException.class);
+    expectedException.expectMessage("Caller does not own the underlying");
+    CoderUtils.encodeToByteArray(new ClosingCoder(), "test-value", Context.NESTED);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/UnownedInputStreamTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/UnownedInputStreamTest.java
new file mode 100644
index 0000000000000..30da6ae7238b5
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/UnownedInputStreamTest.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.ByteArrayInputStream;
+
+/** Unit tests for {@link UnownedInputStream}. */
+@RunWith(JUnit4.class)
+public class UnownedInputStreamTest {
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+  private ByteArrayInputStream bais;
+  private UnownedInputStream os;
+
+  @Before
+  public void setup() {
+    bais = new ByteArrayInputStream(new byte[]{ 1, 2, 3 });
+    os = new UnownedInputStream(bais);
+  }
+
+  @Test
+  public void testHashCodeEqualsAndToString() throws Exception {
+    assertEquals(bais.hashCode(), os.hashCode());
+    assertEquals("UnownedInputStream{in=" + bais + "}", os.toString());
+    assertEquals(new UnownedInputStream(bais), os);
+  }
+
+  @Test
+  public void testClosingThrows() throws Exception {
+    expectedException.expect(UnsupportedOperationException.class);
+    expectedException.expectMessage("Caller does not own the underlying");
+    expectedException.expectMessage("close()");
+    os.close();
+  }
+
+  @Test
+  public void testMarkThrows() throws Exception {
+    assertFalse(os.markSupported());
+    expectedException.expect(UnsupportedOperationException.class);
+    expectedException.expectMessage("Caller does not own the underlying");
+    expectedException.expectMessage("mark()");
+    os.mark(1);
+  }
+
+  @Test
+  public void testResetThrows() throws Exception {
+    expectedException.expect(UnsupportedOperationException.class);
+    expectedException.expectMessage("Caller does not own the underlying");
+    expectedException.expectMessage("reset()");
+    os.reset();
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/UnownedOutputStreamTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/UnownedOutputStreamTest.java
new file mode 100644
index 0000000000000..eea70fe6cb9cf
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/UnownedOutputStreamTest.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.ByteArrayOutputStream;
+
+/** Unit tests for {@link UnownedOutputStream}. */
+@RunWith(JUnit4.class)
+public class UnownedOutputStreamTest {
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+  private ByteArrayOutputStream baos;
+  private UnownedOutputStream os;
+
+  @Before
+  public void setup() {
+    baos = new ByteArrayOutputStream();
+    os = new UnownedOutputStream(baos);
+  }
+
+  @Test
+  public void testHashCodeEqualsAndToString() throws Exception {
+    assertEquals(baos.hashCode(), os.hashCode());
+    assertEquals("UnownedOutputStream{out=" + baos + "}", os.toString());
+    assertEquals(new UnownedOutputStream(baos), os);
+  }
+
+  @Test
+  public void testClosingThrows() throws Exception {
+    expectedException.expect(UnsupportedOperationException.class);
+    expectedException.expectMessage("Caller does not own the underlying");
+    os.close();
+  }
+}
+

From 0c637d38a89eecf7d31bee913f00e1c89365e379 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Wed, 2 Dec 2015 14:41:02 -0800
Subject: [PATCH 1210/1541] Occasionally compact combined state on write.

This is needed to avoid arbitrarily large blowup
for many-element windows.  Now, with a fixed
probability (currently 0.2%) when writing the state,
instead of doing a blind write we read in all
accumulated values, combine, and replace everything
with the final combined value.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109239045
---
 .../worker/WindmillStateInternals.java        | 36 +++++++++
 .../worker/WindmillStateInternalsTest.java    | 79 ++++++++++++++++---
 2 files changed, 105 insertions(+), 10 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
index 6f0b44c4c4559..9f88c78de915c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
@@ -33,6 +33,7 @@
 import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
 import com.google.cloud.dataflow.sdk.util.state.ValueState;
 import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Supplier;
 import com.google.common.collect.Iterables;
 import com.google.common.util.concurrent.Futures;
@@ -45,6 +46,7 @@
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
+import java.util.Random;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
@@ -102,6 +104,35 @@ public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> cod
   private final boolean useStateFamilies;
   private final Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
 
+  @VisibleForTesting
+  static final ThreadLocal<Supplier<Boolean>> COMPACT_NOW =
+      new ThreadLocal() {
+        public Supplier<Boolean> initialValue() {
+          return new Supplier<Boolean>() {
+            /* The rate at which, on average, this will return true. */
+            static final double RATE = 0.002;
+            Random random = new Random();
+            long counter = nextSample();
+
+            private long nextSample() {
+              // Use geometric distribution to find next true value.
+              // This lets us avoid invoking random.nextDouble() on every call.
+              return (long) Math.floor(Math.log(random.nextDouble()) / Math.log(1 - RATE));
+            }
+
+            public Boolean get() {
+              counter--;
+              if (counter < 0) {
+                counter = nextSample();
+                return true;
+              } else {
+                return false;
+              }
+            }
+          };
+        }
+      };
+
   public WindmillStateInternals(String prefix, boolean useStateFamilies,
       WindmillStateReader reader, Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
     this.prefix = prefix;
@@ -569,6 +600,11 @@ public void clear() {
     @Override
     public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) throws IOException {
       if (hasLocalAdditions) {
+        // TODO: Take into account whether it's in the cache.
+        if (COMPACT_NOW.get().get()) {
+          // Implicitly clears the bag and combines local and persisted accumulators.
+          localAdditionsAccum = getAccum().read();
+        }
         bag.add(combineFn.compact(localAdditionsAccum));
         localAdditionsAccum = combineFn.createAccumulator();
         hasLocalAdditions = false;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
index b976e05051af4..a63d9a0417cea 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
@@ -42,6 +42,7 @@
 import com.google.cloud.dataflow.sdk.util.state.ValueState;
 import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
 import com.google.common.base.Supplier;
+import com.google.common.collect.ImmutableList;
 import com.google.common.util.concurrent.Futures;
 import com.google.common.util.concurrent.SettableFuture;
 import com.google.protobuf.ByteString;
@@ -70,6 +71,7 @@ public class WindmillStateInternalsTest {
   private static final StateTag<CombiningValueState<Integer, Integer>> COMBINING_ADDR =
       StateTags.combiningValueFromInputInternal(
           "combining", VarIntCoder.of(), new Sum.SumIntegerFn());
+  private static final ByteString COMBINING_KEY = key(NAMESPACE, "combining");
   private final Coder<int[]> accumCoder =
       new Sum.SumIntegerFn().getAccumulatorCoder(null, VarIntCoder.of());
 
@@ -81,11 +83,11 @@ public class WindmillStateInternalsTest {
   @Mock
   private Supplier<StateSampler.ScopedState> readStateSupplier;
 
-  private ByteString key(StateNamespace namespace, String addrId) {
+  private static ByteString key(StateNamespace namespace, String addrId) {
     return key("", namespace, addrId);
   }
 
-  private ByteString key(String prefix, StateNamespace namespace, String addrId) {
+  private static ByteString key(String prefix, StateNamespace namespace, String addrId) {
     return ByteString.copyFromUtf8(prefix + namespace.stringKey() + "+u" + addrId);
   }
 
@@ -286,7 +288,7 @@ public void testCombiningAddBeforeRead() throws Exception {
     CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
 
     SettableFuture<Iterable<int[]>> future = SettableFuture.create();
-    when(mockReader.listFuture(key(NAMESPACE, "combining"), STATE_FAMILY, accumCoder))
+    when(mockReader.listFuture(COMBINING_KEY, STATE_FAMILY, accumCoder))
         .thenReturn(future);
 
     StateContents<Integer> result = value.get();
@@ -326,10 +328,10 @@ public void testCombiningIsEmpty() throws Exception {
     CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
 
     SettableFuture<Iterable<int[]>> future = SettableFuture.create();
-    when(mockReader.listFuture(key(NAMESPACE, "combining"), STATE_FAMILY, accumCoder))
+    when(mockReader.listFuture(COMBINING_KEY, STATE_FAMILY, accumCoder))
         .thenReturn(future);
     StateContents<Boolean> result = value.isEmpty();
-    Mockito.verify(mockReader).listFuture(key(NAMESPACE, "combining"), STATE_FAMILY, accumCoder);
+    Mockito.verify(mockReader).listFuture(COMBINING_KEY, STATE_FAMILY, accumCoder);
 
     waitAndSet(future, Arrays.asList(new int[] {29}), 200);
     assertThat(result.read(), Matchers.is(false));
@@ -342,7 +344,7 @@ public void testCombiningIsEmptyAfterClear() throws Exception {
     value.clear();
     StateContents<Boolean> result = value.isEmpty();
     Mockito.verify(mockReader, never())
-        .listFuture(key(NAMESPACE, "combining"), STATE_FAMILY, accumCoder);
+        .listFuture(COMBINING_KEY, STATE_FAMILY, accumCoder);
     assertThat(result.read(), Matchers.is(true));
 
     value.add(87);
@@ -351,6 +353,8 @@ public void testCombiningIsEmptyAfterClear() throws Exception {
 
   @Test
   public void testCombiningAddPersist() throws Exception {
+    disableCompactOnWrite();
+
     CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
 
     value.add(5);
@@ -363,7 +367,7 @@ public void testCombiningAddPersist() throws Exception {
     assertEquals(1, commitBuilder.getListUpdatesCount());
 
     TagList listUpdates = commitBuilder.getListUpdates(0);
-    assertEquals(key(NAMESPACE, "combining"), listUpdates.getTag());
+    assertEquals(COMBINING_KEY, listUpdates.getTag());
     assertEquals(1, listUpdates.getValuesCount());
     assertEquals(
         11,
@@ -375,8 +379,45 @@ public void testCombiningAddPersist() throws Exception {
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
+  @Test
+  public void testCombiningAddPersistWithCompact() throws Exception {
+    forceCompactOnWrite();
+
+    Mockito.stub(
+            mockReader.listFuture(
+                org.mockito.Matchers.<ByteString>any(),
+                org.mockito.Matchers.<String>any(),
+                org.mockito.Matchers.<Coder<int[]>>any()))
+        .toReturn(
+            Futures.<Iterable<int[]>>immediateFuture(
+                ImmutableList.of(new int[] {40}, new int[] {60})));
+
+    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
+
+    value.add(5);
+    value.add(6);
+
+    Windmill.WorkItemCommitRequest.Builder commitBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.persist(commitBuilder);
+
+    assertEquals(2, commitBuilder.getListUpdatesCount());
+    assertEquals(0, commitBuilder.getListUpdates(0).getValuesCount());
+
+    TagList listUpdates = commitBuilder.getListUpdates(1);
+    assertEquals(COMBINING_KEY, listUpdates.getTag());
+    assertEquals(1, listUpdates.getValuesCount());
+    assertEquals(
+        111,
+        CoderUtils.decodeFromByteArray(
+                accumCoder, listUpdates.getValues(0).getData().substring(1).toByteArray())[
+            0]);
+  }
+
   @Test
   public void testCombiningClearPersist() throws Exception {
+    disableCompactOnWrite();
+
     CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
 
     value.clear();
@@ -390,12 +431,12 @@ public void testCombiningClearPersist() throws Exception {
     assertEquals(2, commitBuilder.getListUpdatesCount());
 
     TagList listClear = commitBuilder.getListUpdates(0);
-    assertEquals(key(NAMESPACE, "combining"), listClear.getTag());
+    assertEquals(COMBINING_KEY, listClear.getTag());
     assertEquals(Long.MAX_VALUE, listClear.getEndTimestamp());
     assertEquals(0, listClear.getValuesCount());
 
     TagList listUpdates = commitBuilder.getListUpdates(1);
-    assertEquals(key(NAMESPACE, "combining"), listUpdates.getTag());
+    assertEquals(COMBINING_KEY, listUpdates.getTag());
     assertEquals(1, listUpdates.getValuesCount());
     assertEquals(
         11,
@@ -403,7 +444,7 @@ public void testCombiningClearPersist() throws Exception {
             accumCoder, listUpdates.getValues(0).getData().substring(1).toByteArray())[0]);
 
     // Blind adds should not need to read the future.
-    Mockito.verify(mockReader).listFuture(key(NAMESPACE, "combining"), STATE_FAMILY, accumCoder);
+    Mockito.verify(mockReader).listFuture(COMBINING_KEY, STATE_FAMILY, accumCoder);
     Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
@@ -866,4 +907,22 @@ public void testValueNoStateFamilies() throws Exception {
 
     assertEquals("World", value.get().read());
   }
+
+  private void disableCompactOnWrite() {
+    WindmillStateInternals.COMPACT_NOW.set(
+        new Supplier<Boolean>() {
+          public Boolean get() {
+            return false;
+          }
+        });
+  }
+
+  private void forceCompactOnWrite() {
+    WindmillStateInternals.COMPACT_NOW.set(
+        new Supplier<Boolean>() {
+          public Boolean get() {
+            return true;
+          }
+        });
+  }
 }

From cf1e3f341e52e94e51273436043c4841d9146d81 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Tue, 1 Dec 2015 13:12:14 -0800
Subject: [PATCH 1211/1541] Version management

Prepare codebase for version 1.4.0.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109117479
---
 examples/pom.xml                  | 2 +-
 maven-archetypes/examples/pom.xml | 2 +-
 maven-archetypes/starter/pom.xml  | 2 +-
 pom.xml                           | 2 +-
 sdk/pom.xml                       | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index bd02ea8f9bc83..d33c610a1a32c 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>com.google.cloud.dataflow</groupId>
     <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
-    <version>1.2.1-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
   </parent>
 
   <groupId>com.google.cloud.dataflow</groupId>
diff --git a/maven-archetypes/examples/pom.xml b/maven-archetypes/examples/pom.xml
index 8b0d5e2c0fa7f..42a553aa02575 100644
--- a/maven-archetypes/examples/pom.xml
+++ b/maven-archetypes/examples/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>com.google.cloud.dataflow</groupId>
     <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
-    <version>1.2.1-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/maven-archetypes/starter/pom.xml b/maven-archetypes/starter/pom.xml
index 0298889abf0a8..fee9e33947633 100644
--- a/maven-archetypes/starter/pom.xml
+++ b/maven-archetypes/starter/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>com.google.cloud.dataflow</groupId>
     <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
-    <version>1.2.1-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index f352b8435fa91..099352d7cf0b9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
   <url>http://cloud.google.com/dataflow</url>
   <inceptionYear>2013</inceptionYear>
 
-  <version>1.2.1-SNAPSHOT</version>
+  <version>1.4.0-SNAPSHOT</version>
 
   <licenses>
     <license>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 4514e2121ff4b..376f84a993034 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>com.google.cloud.dataflow</groupId>
     <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
-    <version>1.2.1-SNAPSHOT</version>
+    <version>1.4.0-SNAPSHOT</version>
   </parent>
 
   <groupId>com.google.cloud.dataflow</groupId>

From 25236730f8bdc6ca38a70e19514a6b42cf16afce Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Thu, 3 Dec 2015 14:33:52 -0800
Subject: [PATCH 1212/1541] Improvements to DataflowWorkProgressUpdaterTest

Improves workProgressUpdaterAdaptsProgressInterval
by bringing when() close to verify() to improve readability,
and using relative rather than absolute time and increased
timeouts to decrease flakiness.
----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109347801
---
 .../DataflowWorkProgressUpdaterTest.java      | 79 +++++++++----------
 1 file changed, 39 insertions(+), 40 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
index a6d60d946ca38..c7ec363285d5b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -132,7 +132,6 @@ public void setWorkerProgress(ApproximateProgress progress) {
   private WorkItem workItem;
   private DataflowWorkerHarnessOptions options;
   private DataflowWorkProgressUpdater progressUpdater;
-  private long nowMillis;
 
   @Before
   public void initMocksAndWorkflowServiceAndWorkerAndWork() {
@@ -151,13 +150,12 @@ public Collection<Metric<?>> getOutputMetrics() {
         return metrics;
       }
     };
-    nowMillis = System.currentTimeMillis();
 
     workItem = new WorkItem();
     workItem.setProjectId(PROJECT_ID);
     workItem.setJobId(JOB_ID);
     workItem.setId(WORK_ID);
-    workItem.setLeaseExpireTime(toCloudTime(new Instant(nowMillis + 1000)));
+    workItem.setLeaseExpireTime(toCloudTime(new Instant(System.currentTimeMillis() + 1000)));
     workItem.setReportStatusInterval(toCloudDuration(Duration.millis(300)));
     workItem.setInitialReportIndex(1L);
 
@@ -181,7 +179,7 @@ protected long getLeaseRenewalLatencyMargin() {
   @Test(timeout = 1000)
   public void workProgressUpdaterUpdates() throws Exception {
     when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
-        .thenReturn(generateServiceState(nowMillis + 2000, 1000, null, 2L));
+        .thenReturn(generateServiceState(System.currentTimeMillis() + 2000, 1000, null, 2L));
     setUpCounters(2);
     setUpMetrics(3);
     setUpProgress(approximateProgressAtIndex(1L));
@@ -196,59 +194,59 @@ public void workProgressUpdaterUpdates() throws Exception {
 
   // Verifies that ReportWorkItemStatusRequest contains correct progress report
   // and actual dynamic split result.
-  @Test(timeout = 5000)
+  @Test(timeout = 10000)
   public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
-    // Mock that the next reportProgress call will return a response that asks
-    // us to truncate the task at index 3, and the next two will not ask us to
-    // truncate at all.
-    when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
-        .thenReturn(generateServiceState(nowMillis + 2000, 1000, positionAtIndex(3L), 2L))
-        .thenReturn(generateServiceState(nowMillis + 3000, 2000, null, 3L))
-        .thenReturn(generateServiceState(nowMillis + 1000, 3000, null, 4L))
-        .thenReturn(generateServiceState(nowMillis + 4000, 3000, null, 5L));
-
     setUpCounters(3);
     setUpMetrics(2);
     setUpProgress(approximateProgressAtIndex(1L));
     progressUpdater.startReportingProgress();
-    // The initial update should be sent after 300.
-    verify(workUnitClient, timeout(400))
-        .reportWorkItemStatus(argThat(
-            new ExpectedDataflowWorkItemStatus().withCounters(3).withMetrics(2).withProgress(
-                approximateProgressAtIndex(1L)).withReportIndex(1L)));
+
+    // In tests below, we allow 500ms leeway.
+
+    when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
+        // leaseExpirationTimestamp, progressReportInterval, suggestedStopPosition, nextReportIndex
+        .thenReturn(generateServiceState(
+            System.currentTimeMillis() + 2000, 1000, positionAtIndex(3L), 2L));
+    // The initial update should be sent at nowMillis+300 (+500ms leeway).
+    verify(workUnitClient, timeout(800)).reportWorkItemStatus(argThat(
+        new ExpectedDataflowWorkItemStatus().withCounters(3).withMetrics(2).withProgress(
+            approximateProgressAtIndex(1L)).withReportIndex(1L)));
 
     setUpCounters(5);
     setUpMetrics(6);
     setUpProgress(approximateProgressAtIndex(2L));
-    // The second update should be sent after one second as requested.
-    verify(workUnitClient, timeout(1100))
-        .reportWorkItemStatus(argThat(
-            new ExpectedDataflowWorkItemStatus()
-                .withCounters(5)
-                .withMetrics(6)
-                .withProgress(approximateProgressAtIndex(2L))
-                .withDynamicSplitAtPosition(positionAtIndex(3L))
-                .withReportIndex(2L)));
+    when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
+        .thenReturn(generateServiceState(System.currentTimeMillis() + 3000, 2000, null, 3L));
+    // The second update should be sent after ~1000ms (previous requested report interval).
+    verify(workUnitClient, timeout(1500)).reportWorkItemStatus(argThat(
+        new ExpectedDataflowWorkItemStatus()
+            .withCounters(5)
+            .withMetrics(6)
+            .withProgress(approximateProgressAtIndex(2L))
+            .withDynamicSplitAtPosition(positionAtIndex(3L))
+            .withReportIndex(2L)));
 
     // After the request is sent, reset cached dynamic split result to null.
     assertNull(progressUpdater.getDynamicSplitResultToReport());
 
     setUpProgress(approximateProgressAtIndex(3L));
 
-    // The third update should be sent after 2 seconds.
-    verify(workUnitClient, timeout(2100))
-        .reportWorkItemStatus(argThat(
-            new ExpectedDataflowWorkItemStatus().withProgress(approximateProgressAtIndex(3L))
-                .withReportIndex(3L)));
+    when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
+        .thenReturn(generateServiceState(System.currentTimeMillis() + 1000, 3000, null, 4L));
+    // The third update should be sent after ~2000ms (previous requested report interval).
+    verify(workUnitClient, timeout(2500)).reportWorkItemStatus(argThat(
+        new ExpectedDataflowWorkItemStatus().withProgress(approximateProgressAtIndex(3L))
+            .withReportIndex(3L)));
 
     setUpProgress(approximateProgressAtIndex(4L));
 
-    // The fourth update should not respect the suggested report interval.
-    // It should be sent before the lease expires
-    verify(workUnitClient, timeout(900))
-        .reportWorkItemStatus(argThat(
-            new ExpectedDataflowWorkItemStatus().withProgress(approximateProgressAtIndex(4L))
-                .withReportIndex(4L)));
+    when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
+        .thenReturn(generateServiceState(System.currentTimeMillis() + 4000, 3000, null, 5L));
+    // The fourth update should not respect the suggested report interval (3000ms)
+    // because the lease expires in 1000ms. The update should be sent before the lease expires.
+    verify(workUnitClient, timeout(900)).reportWorkItemStatus(argThat(
+        new ExpectedDataflowWorkItemStatus().withProgress(approximateProgressAtIndex(4L))
+            .withReportIndex(4L)));
 
     progressUpdater.stopReportingProgress();
 
@@ -261,7 +259,8 @@ public void workProgressUpdaterSendsLastPendingUpdateWhenStopped() throws Except
     // The setup process sends one update after 300ms. Enqueue another that should be scheduled
     // 1000ms after that.
     when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
-        .thenReturn(generateServiceState(nowMillis + 2000, 1000, positionAtIndex(2L), 2L));
+        .thenReturn(generateServiceState(
+            System.currentTimeMillis() + 2000, 1000, positionAtIndex(2L), 2L));
 
     setUpProgress(approximateProgressAtIndex(1L));
     progressUpdater.startReportingProgress();

From 8c8f0857c267ba172ab498998c1a13fa803c32ef Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Fri, 4 Dec 2015 11:53:14 -0800
Subject: [PATCH 1213/1541] Reduce availability of
 StepContext#getExecutionContext

getExecutionContext is used in only one place, from a concrete instance
of StreamingModeExecutionContext.StepContext, and is immediately
downcast to StreamingModeExecutionContext.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109435007
---
 .../runners/worker/StreamingModeExecutionContext.java    | 4 ++++
 .../sdk/runners/worker/StreamingSideInputDoFnRunner.java | 5 +----
 .../cloud/dataflow/sdk/util/BaseExecutionContext.java    | 5 -----
 .../google/cloud/dataflow/sdk/util/ExecutionContext.java | 5 -----
 .../runners/worker/StreamingSideInputDoFnRunnerTest.java | 9 ++++-----
 5 files changed, 9 insertions(+), 19 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
index 279c53b7f2b27..40eadbc72e1cc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
@@ -396,6 +396,10 @@ public void flushState() {
       timerInternals.persistTo(outputBuilder);
     }
 
+    public Iterable<Windmill.GlobalDataId> getSideInputNotifications() {
+      return StreamingModeExecutionContext.this.getSideInputNotifications();
+    }
+
     @Override
     public <T, W extends BoundedWindow> void writePCollectionViewData(
         TupleTag<?> tag,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
index 85e79b6438073..d37aaa4a7ead6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
@@ -65,7 +65,6 @@
 public class StreamingSideInputDoFnRunner<InputT, OutputT, W extends BoundedWindow>
     extends DoFnRunner<InputT, OutputT> {
   private StreamingModeExecutionContext.StepContext stepContext;
-  private StreamingModeExecutionContext execContext;
   private Map<String, PCollectionView<?>> sideInputViews;
 
   private final StateTag<BagState<WindowedValue<InputT>>> elementsAddr;
@@ -100,8 +99,6 @@ public StreamingSideInputDoFnRunner(
     for (PCollectionView<?> view : doFnInfo.getSideInputViews()) {
       sideInputViews.put(view.getTagInternal().getId(), view);
     }
-    this.execContext =
-        (StreamingModeExecutionContext) stepContext.getExecutionContext();
 
     this.blockedMapAddr = blockedMapAddr(windowFn);
     this.elementsAddr = StateTags.makeSystemTagInternal(StateTags.bag("elem",
@@ -129,7 +126,7 @@ StateTag<ValueState<Map<W, Set<GlobalDataRequest>>>> blockedMapAddr(WindowFn<?,
   private Set<W> getReadyWindows() {
     Set<W> readyWindows = new HashSet<>();
 
-    for (Windmill.GlobalDataId id : execContext.getSideInputNotifications()) {
+    for (Windmill.GlobalDataId id : stepContext.getSideInputNotifications()) {
       if (sideInputViews.get(id.getTag()) == null) {
         // Side input is for a different DoFn; ignore it.
         continue;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java
index f4862decdddc8..76771c5a073dd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java
@@ -111,11 +111,6 @@ public String getTransformName() {
       return transformName;
     }
 
-    @Override
-    public ExecutionContext getExecutionContext() {
-      return executionContext;
-    }
-
     @Override
     public void noteOutput(WindowedValue<?> output) {
       executionContext.noteOutput(output);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
index 77c66b29a4e58..971c886c88b73 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
@@ -70,11 +70,6 @@ public interface StepContext {
      */
     String getTransformName();
 
-    /**
-     * The context in which this step is executing.
-     */
-    ExecutionContext getExecutionContext();
-
     /**
      * Hook for subclasses to implement that will be called whenever
      * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#output}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java
index 503474b12cb16..2f470140a8b76 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java
@@ -86,7 +86,6 @@ public class StreamingSideInputDoFnRunnerTest {
   @Before
   public void setUp() {
     MockitoAnnotations.initMocks(this);
-    when(stepContext.getExecutionContext()).thenReturn(execContext);
     when(stepContext.stateInternals()).thenReturn(state);
   }
 
@@ -94,7 +93,7 @@ public void setUp() {
   public void testSideInputReady() throws Exception {
     PCollectionView<String> view = createView();
 
-    when(execContext.getSideInputNotifications())
+    when(stepContext.getSideInputNotifications())
         .thenReturn(Arrays.<Windmill.GlobalDataId>asList());
     when(stepContext.issueSideInputFetch(
              eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN)))
@@ -119,7 +118,7 @@ public void testSideInputReady() throws Exception {
   public void testSideInputNotReady() throws Exception {
     PCollectionView<String> view = createView();
 
-    when(execContext.getSideInputNotifications())
+    when(stepContext.getSideInputNotifications())
         .thenReturn(Arrays.<Windmill.GlobalDataId>asList());
     when(stepContext.issueSideInputFetch(
              eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN)))
@@ -185,7 +184,7 @@ public void testSideInputNotification() throws Exception {
     runner.watermarkHold(createWindow(0)).add(new Instant(0));
     runner.elementBag(createWindow(0)).add(createDatum("e", 0));
 
-    when(execContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
+    when(stepContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
     when(stepContext.issueSideInputFetch(
              eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN)))
         .thenReturn(false);
@@ -229,7 +228,7 @@ public void testMultipleSideInputs() throws Exception {
             StreamingSideInputDoFnRunner.blockedMapAddr(WINDOW_FN));
     blockedMapState.set(blockedMap);
 
-    when(execContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
+    when(stepContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
     when(stepContext.issueSideInputFetch(
              any(PCollectionView.class), any(BoundedWindow.class), any(SideInputState.class)))
         .thenReturn(true);

From 9e74526d4ca83d469b1ac8607a28590618c9dabc Mon Sep 17 00:00:00 2001
From: mariand <mariand@google.com>
Date: Fri, 4 Dec 2015 23:53:09 -0800
Subject: [PATCH 1214/1541] Fixed a race in StateSampler.

stateTimestampNs must be initialized before starting the background thread
which uses it, otherwise the initialization will race with run().

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109479932
---
 .../cloud/dataflow/sdk/util/common/worker/StateSampler.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
index 742c86b120327..df916a0a6abb8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
@@ -119,6 +119,7 @@ public StateSampler(String prefix,
     // The current implementation uses a fixed-rate timer with a period samplingPeriodMs as a
     // trampoline to a one-shot random timer which fires with a random delay within
     // samplingPeriodMs.
+    stateTimestampNs = System.nanoTime();
     invocationTriggerFuture =
         executorService.scheduleAtFixedRate(
             new Runnable() {
@@ -145,7 +146,6 @@ public void run() {
             0,
             samplingPeriodMs,
             TimeUnit.MILLISECONDS);
-    stateTimestampNs = System.nanoTime();
   }
 
   /**

From 3ae688368a3edaba6f08cb5a9c0d4d33aaff1387 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Mon, 7 Dec 2015 11:25:29 -0800
Subject: [PATCH 1215/1541] Genericize BaseExecutionContext

This allows implementors that care about specific implementation details
fo the execution context to return a more specific type of
ExecutionContext.StepContext from getOrCreateStepContext.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109601837
---
 .../worker/DataflowExecutionContext.java      |  4 ++-
 .../worker/MapTaskExecutorFactory.java        | 12 +++----
 .../worker/StreamingModeExecutionContext.java |  5 +--
 .../sdk/util/BaseExecutionContext.java        | 33 ++++++++++++++-----
 .../sdk/util/BatchModeExecutionContext.java   |  7 ++--
 .../sdk/util/DirectModeExecutionContext.java  |  7 ++--
 .../dataflow/sdk/util/ExecutionContext.java   |  2 +-
 .../worker/MapTaskExecutorFactoryTest.java    |  5 +--
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  4 +--
 ...ngGroupAlsoByWindowsReshuffleDoFnTest.java |  4 +--
 10 files changed, 53 insertions(+), 30 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java
index 241641e2f1201..cd3ba394b5c67 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java
@@ -18,13 +18,15 @@
 
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.cloud.dataflow.sdk.util.BaseExecutionContext;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 
 /**
  * Extensions to {@link BaseExecutionContext} specific to the Dataflow worker.
  */
-public abstract class DataflowExecutionContext extends BaseExecutionContext {
+public abstract class DataflowExecutionContext<T extends ExecutionContext.StepContext>
+    extends BaseExecutionContext<T> {
   /**
    * Returns a {@link SideInputReader} for all the side inputs described in the given
    * {@link SideInputInfo} descriptors.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index 243f3f40ad5f1..d227e833eccaa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -80,7 +80,7 @@ public class MapTaskExecutorFactory {
   public static MapTaskExecutor create(
       PipelineOptions options,
       MapTask mapTask,
-      DataflowExecutionContext context,
+      DataflowExecutionContext<?> context,
       CounterSet counters,
       StateSampler stateSampler) throws Exception {
     return create(
@@ -100,7 +100,7 @@ public static MapTaskExecutor create(
       PipelineOptions options,
       MapTask mapTask,
       ReaderFactory.Registry registry,
-      DataflowExecutionContext context,
+      DataflowExecutionContext<?> context,
       CounterSet counters,
       StateSampler stateSampler)
           throws Exception {
@@ -133,7 +133,7 @@ public static MapTaskExecutor create(
   static Operation createOperation(
       PipelineOptions options,
       ParallelInstruction instruction,
-      DataflowExecutionContext executionContext,
+      DataflowExecutionContext<?> executionContext,
       List<Operation> priorOperations,
       String counterPrefix,
       String systemStageName,
@@ -160,7 +160,7 @@ static Operation createOperation(
       PipelineOptions options,
       ParallelInstruction instruction,
       ReaderFactory.Registry registry,
-      DataflowExecutionContext executionContext,
+      DataflowExecutionContext<?> executionContext,
       List<Operation> priorOperations,
       String counterPrefix,
       String systemStageName,
@@ -199,7 +199,7 @@ static ReadOperation createReadOperation(
       PipelineOptions options,
       ParallelInstruction instruction,
       ReaderFactory.Registry registry,
-      DataflowExecutionContext executionContext,
+      DataflowExecutionContext<?> executionContext,
       @SuppressWarnings("unused") List<Operation> priorOperations,
       String counterPrefix,
       String systemStageName,
@@ -244,7 +244,7 @@ static WriteOperation createWriteOperation(PipelineOptions options,
   static ParDoOperation createParDoOperation(
       PipelineOptions options,
       ParallelInstruction instruction,
-      DataflowExecutionContext executionContext,
+      DataflowExecutionContext<?> executionContext,
       List<Operation> priorOperations,
       String counterPrefix,
       CounterSet.AddCounterMutator addCounterMutator,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
index 40eadbc72e1cc..d57ed1fe5bd78 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
@@ -56,7 +56,8 @@
 /**
  * {@link ExecutionContext} for use in streaming mode.
  */
-public class StreamingModeExecutionContext extends DataflowExecutionContext {
+public class StreamingModeExecutionContext
+    extends DataflowExecutionContext<StreamingModeExecutionContext.StepContext> {
   private final String stageName;
   private final Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
   // Per-key cache of active Reader objects in use by this process.
@@ -99,7 +100,7 @@ public void start(
   }
 
   @Override
-  public ExecutionContext.StepContext createStepContext(
+  public StepContext createStepContext(
       String stepName, String transformName, StateSampler stateSampler) {
     StepContext context = new StepContext(stepName, transformName, stateSampler);
     context.start(stateReader, inputDataWatermark);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java
index 76771c5a073dd..9d048962038e7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java
@@ -24,6 +24,7 @@
 
 import java.io.IOException;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -31,18 +32,34 @@
  * Base class for implementations of {@link ExecutionContext}.
  *
  * <p>A concrete subclass should implement {@link #createStepContext} to create the appropriate
- * {@link ExecutionContext.StepContext} implementation. Any {@code StepContext} created will
+ * {@link StepContext} implementation. Any {@code StepContext} created will
  * be cached for the lifetime of this {@link ExecutionContext}.
+ *
+ * <p>BaseExecutionContext is generic to allow implementing subclasses to return a concrete subclass
+ * of {@link StepContext} from {@link #getOrCreateStepContext(String, String, StateSampler)} and
+ * {@link #getAllStepContexts()} without forcing each subclass to override the method, e.g.
+ * <pre>
+ * @Override
+ * StreamingModeExecutionContext.StepContext getOrCreateStepContext(...) {
+ *   return (StreamingModeExecutionContext.StepContext) super.getOrCreateStepContext(...);
+ * }
+ * </pre>
+ *
+ * <p>When a subclass of {@code BaseExecutionContext} has been downcast, the return types of
+ * {@link #createStepContext(String, String, StateSampler)},
+ * {@link #getOrCreateStepContext(String, String, StateSampler}, and {@link #getAllStepContexts()}
+ * will be appropriately specialized.
  */
-public abstract class BaseExecutionContext implements ExecutionContext {
+public abstract class BaseExecutionContext<T extends ExecutionContext.StepContext>
+    implements ExecutionContext {
 
-  private Map<String, ExecutionContext.StepContext> cachedStepContexts = new HashMap<>();
+  private Map<String, T> cachedStepContexts = new HashMap<>();
 
   /**
    * Implementations should override this to create the specific type
    * of {@link StepContext} they need.
    */
-  protected abstract ExecutionContext.StepContext createStepContext(
+  protected abstract T createStepContext(
       String stepName, String transformName, StateSampler stateSampler);
 
 
@@ -50,9 +67,9 @@ protected abstract ExecutionContext.StepContext createStepContext(
    * Returns the {@link StepContext} associated with the given step.
    */
   @Override
-  public ExecutionContext.StepContext getOrCreateStepContext(
+  public T getOrCreateStepContext(
       String stepName, String transformName, StateSampler stateSampler) {
-    ExecutionContext.StepContext context = cachedStepContexts.get(stepName);
+    T context = cachedStepContexts.get(stepName);
     if (context == null) {
       context = createStepContext(stepName, transformName, stateSampler);
       cachedStepContexts.put(stepName, context);
@@ -64,8 +81,8 @@ public ExecutionContext.StepContext getOrCreateStepContext(
    * Returns a collection view of all of the {@link StepContext}s.
    */
   @Override
-  public Collection<ExecutionContext.StepContext> getAllStepContexts() {
-    return cachedStepContexts.values();
+  public Collection<? extends T> getAllStepContexts() {
+    return Collections.unmodifiableCollection(cachedStepContexts.values());
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
index 4477bb0a54e6c..d4f239c826c59 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
@@ -31,7 +31,8 @@
 /**
  * {@link ExecutionContext} for use in batch mode.
  */
-public class BatchModeExecutionContext extends DataflowExecutionContext {
+public class BatchModeExecutionContext
+    extends DataflowExecutionContext<BatchModeExecutionContext.StepContext> {
   private Object key;
 
   private PipelineOptions options;
@@ -59,7 +60,7 @@ public static BatchModeExecutionContext fromOptions(PipelineOptions options) {
    * Create a new {@link ExecutionContext.StepContext}.
    */
   @Override
-  protected ExecutionContext.StepContext createStepContext(
+  protected StepContext createStepContext(
       String stepName, String transformName, StateSampler stateSampler) {
     return new StepContext(stepName, transformName);
   }
@@ -117,7 +118,7 @@ public SideInputReader getSideInputReaderForViews(
   /**
    * {@link ExecutionContext.StepContext} used in batch mode.
    */
-  class StepContext extends BaseExecutionContext.StepContext {
+  public class StepContext extends BaseExecutionContext.StepContext {
 
     private final InMemoryStateInternals stateInternals = new InMemoryStateInternals();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
index d9474a61130a4..12f2d208d696c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
@@ -31,7 +31,8 @@
 /**
  * {@link ExecutionContext} for use in direct mode.
  */
-public class DirectModeExecutionContext extends BaseExecutionContext {
+public class DirectModeExecutionContext
+    extends BaseExecutionContext<DirectModeExecutionContext.StepContext> {
 
   private Object key;
   private List<ValueWithMetadata<?>> output = Lists.newArrayList();
@@ -44,7 +45,7 @@ public static DirectModeExecutionContext create() {
   }
 
   @Override
-  protected ExecutionContext.StepContext createStepContext(
+  protected StepContext createStepContext(
       String stepName, String transformName, StateSampler stateSampler) {
     return new StepContext(this, stepName, transformName);
   }
@@ -96,7 +97,7 @@ public <T> List<ValueWithMetadata<T>> getSideOutput(TupleTag<T> tag) {
   /**
    * {@link ExecutionContext.StepContext} used in direct mode.
    */
-  static class StepContext extends BaseExecutionContext.StepContext {
+  public static class StepContext extends BaseExecutionContext.StepContext {
 
     private final Map<Object, InMemoryStateInternals> stateInternals = Maps.newHashMap();
     private InMemoryStateInternals currentStateInternals = null;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
index 971c886c88b73..83d74e6668abd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
@@ -39,7 +39,7 @@ StepContext getOrCreateStepContext(
   /**
    * Returns a collection view of all of the {@link StepContext}s.
    */
-  Collection<StepContext> getAllStepContexts();
+  Collection<? extends StepContext> getAllStepContexts();
 
   /**
    * Hook for subclasses to implement that will be called whenever
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index 8778d7b7842a5..f8799bd9afd48 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -202,7 +202,7 @@ public void testExecutionContextPlumbing() throws Exception {
     mapTask.setStageName("test");
     mapTask.setInstructions(instructions);
 
-    DataflowExecutionContext context = BatchModeExecutionContext.fromOptions(options);
+    BatchModeExecutionContext context = BatchModeExecutionContext.fromOptions(options);
 
     CounterSet counters = new CounterSet();
     try (MapTaskExecutor executor =
@@ -413,7 +413,8 @@ public void testCreateParDoOperation() throws Exception {
     ParallelInstruction instruction =
         createParDoInstruction(producerIndex, producerOutputNum, "DoFn");
 
-    DataflowExecutionContext context = BatchModeExecutionContext.fromOptions(options);
+    BatchModeExecutionContext context =
+        BatchModeExecutionContext.fromOptions(options);
     CounterSet counterSet = new CounterSet();
     String counterPrefix = "test-";
     String systemStageName = "stageName";
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
index a03dfed3c9560..6dadab49b4d53 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -102,9 +102,9 @@ public class StreamingGroupAlsoByWindowsDoFnTest {
       // StreamingGroupAlsoByWindows expects it to. So, hook that up.
 
       @Override
-      public ExecutionContext.StepContext createStepContext(
+      public StepContext createStepContext(
           String stepName, String transformName, StateSampler stateSampler) {
-        ExecutionContext.StepContext context =
+        StepContext context =
             Mockito.spy(super.createStepContext(stepName, transformName, stateSampler));
         Mockito.doReturn(mockTimerInternals).when(context).timerInternals();
         return context;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java
index 01c0f3359b580..a17aca014ae5a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java
@@ -77,9 +77,9 @@ public class StreamingGroupAlsoByWindowsReshuffleDoFnTest {
       // StreamingGroupAlsoByWindows expects it to. So, hook that up.
 
       @Override
-      public ExecutionContext.StepContext createStepContext(
+      public StepContext createStepContext(
           String stepName, String transformName, StateSampler stateSampler) {
-        ExecutionContext.StepContext context =
+        StepContext context =
             Mockito.spy(super.createStepContext(stepName, transformName, stateSampler));
         Mockito.doReturn(null).when(context).timerInternals();
         return context;

From cb5a5192d9a5562aa95c4c114c499ac0a4ec8c8f Mon Sep 17 00:00:00 2001
From: chamikara <chamikara@google.com>
Date: Mon, 7 Dec 2015 15:03:12 -0800
Subject: [PATCH 1216/1541] Fixes a null pointer in SourceOperationExecutor.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109625553
---
 .../worker/SourceOperationExecutor.java        | 18 ++++++++++++++----
 .../sdk/runners/worker/DataflowWorkerTest.java | 15 +++++++++++++++
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
index f66c4d6e16574..f66476ab8e3f7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
@@ -16,11 +16,13 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import com.google.api.client.json.JsonFactory;
 import com.google.api.services.dataflow.model.SourceOperationRequest;
 import com.google.api.services.dataflow.model.SourceOperationResponse;
 import com.google.api.services.dataflow.model.SourceSplitResponse;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
+import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
 
@@ -67,17 +69,25 @@ public SourceOperationResponse getResponse() {
     return response;
   }
 
-  static boolean isSplitResponseTooLarge(SourceOperationResponse operationResponse) {
+  static int determineSplitResponseSize(SourceOperationResponse operationResponse) {
     try {
       SourceSplitResponse splitResponse = operationResponse.getSplit();
-      int size = splitResponse.getFactory().toByteArray(operationResponse).length;
-      return size >= SOURCE_OPERATION_RESPONSE_SIZE_LIMIT_MB * 1024 * 1024;
+      JsonFactory factory = splitResponse.getFactory();
+      if (factory == null) {
+        factory = Transport.getJsonFactory();
+      }
+      return factory.toByteArray(operationResponse).length;
     } catch (OutOfMemoryError e) {
       LOG.error("Got exception when trying to serialize split response: " + e.getMessage());
       // We will go out of memory if split response is extremely large.
-      return true;
+      return Integer.MAX_VALUE;
     } catch (IOException e) {
       throw new RuntimeException(e);
     }
   }
+
+  static boolean isSplitResponseTooLarge(SourceOperationResponse operationResponse) {
+    return determineSplitResponseSize(operationResponse)
+        >= SOURCE_OPERATION_RESPONSE_SIZE_LIMIT_MB * 1024 * 1024;
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
index 58d55ff2773bd..d575d6f429913 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
@@ -18,18 +18,23 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
 import static org.mockito.Matchers.argThat;
 import static org.mockito.Mockito.doThrow;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
+import com.google.api.services.dataflow.model.SourceOperationResponse;
+import com.google.api.services.dataflow.model.SourceSplitResponse;
+import com.google.api.services.dataflow.model.SourceSplitShard;
 import com.google.api.services.dataflow.model.WorkItem;
 import com.google.api.services.dataflow.model.WorkItemStatus;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
 import com.google.cloud.dataflow.sdk.testing.FastNanoClockAndSleeper;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
+import com.google.common.collect.ImmutableList;
 
 import org.eclipse.jetty.server.LocalConnector;
 import org.eclipse.jetty.server.Server;
@@ -138,6 +143,16 @@ public void testUnknownHandler() throws Exception {
     assertThat(response, containsString("HTTP/1.1 404 Not Found"));
   }
 
+  @Test
+  public void testIsSplitResponseTooLarge() {
+    SourceSplitResponse splitResponse = new SourceSplitResponse();
+    splitResponse.setShards(
+        ImmutableList.<SourceSplitShard>of(new SourceSplitShard(), new SourceSplitShard()));
+    assertTrue(
+        SourceOperationExecutor.determineSplitResponseSize(
+            new SourceOperationResponse().setSplit(splitResponse)) > 0);
+  }
+
   @Test
   public void testWorkItemStatusWithStateSamplerInfo() throws Exception {
     WorkItem workItem = new WorkItem()

From f52c53f05f4cdf93b6dd4f2115e292a2699bdae5 Mon Sep 17 00:00:00 2001
From: malo <malo@google.com>
Date: Mon, 7 Dec 2015 17:13:19 -0800
Subject: [PATCH 1217/1541] Remove NullPointerException in
 DataflowWorkProgressUpdaterTest

Only start progress updates when the mock has been set.
Otherwise there is a race between setting the mock return value and sending the progress update. This change reduces the flakiness of this test.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109638627
---
 .../sdk/runners/worker/DataflowWorkProgressUpdaterTest.java  | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
index c7ec363285d5b..b46d34eab3531 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -199,7 +199,6 @@ public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
     setUpCounters(3);
     setUpMetrics(2);
     setUpProgress(approximateProgressAtIndex(1L));
-    progressUpdater.startReportingProgress();
 
     // In tests below, we allow 500ms leeway.
 
@@ -207,6 +206,10 @@ public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
         // leaseExpirationTimestamp, progressReportInterval, suggestedStopPosition, nextReportIndex
         .thenReturn(generateServiceState(
             System.currentTimeMillis() + 2000, 1000, positionAtIndex(3L), 2L));
+
+    // Start progress updates.
+    progressUpdater.startReportingProgress();
+
     // The initial update should be sent at nowMillis+300 (+500ms leeway).
     verify(workUnitClient, timeout(800)).reportWorkItemStatus(argThat(
         new ExpectedDataflowWorkItemStatus().withCounters(3).withMetrics(2).withProgress(

From f111e6762a3404c3241564808460789cffcff41c Mon Sep 17 00:00:00 2001
From: markshields <markshields@google.com>
Date: Tue, 8 Dec 2015 18:01:34 -0800
Subject: [PATCH 1218/1541] Refine LATE pane semantics and implementation

* Rework definition of PaneInfo.{EARLY, ON_TIME, LATE}
  so that panes with only late data are always LATE,
  and an ON_TIME pane can never cause a later computation
  to yield a LATE pane.
* Clarify watermark at T means any message with
  timestamp < T is 'late'.
* Clarify timers fire when watermark progresses
  after timer's timestamp.
* Distinguish 'input' from 'output' watermarks.
* Protect against overflow in Windmill <-> Harness
  timestamp conversions.

----Release Notes----
Rework definition of PaneInfo.{EARLY, ON_TIME, LATE}
so that panes with only late data are always LATE,
and an ON_TIME pane can never cause a later computation
to yield a LATE pane.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109749349
---
 .../dataflow/sdk/io/UnboundedSource.java      |  12 +-
 .../sdk/runners/worker/PubsubSink.java        |   4 +-
 .../sdk/runners/worker/StateFetcher.java      |   7 +-
 .../worker/StreamingDataflowWorker.java       |  53 ++-
 .../worker/StreamingModeExecutionContext.java |  73 +++-
 .../worker/StreamingSideInputDoFnRunner.java  |   8 +-
 .../sdk/runners/worker/WindmillSink.java      |   4 +-
 .../worker/WindmillStateInternals.java        |   9 +-
 .../sdk/runners/worker/WindmillTimeUtils.java | 106 +++++
 .../transforms/windowing/BoundedWindow.java   |   2 +-
 .../sdk/transforms/windowing/PaneInfo.java    |  88 +++-
 .../sdk/util/BatchTimerInternals.java         |  36 +-
 ...GroupAlsoByWindowsViaOutputBufferDoFn.java |  15 +-
 .../dataflow/sdk/util/NonEmptyPanes.java      |   2 +-
 .../dataflow/sdk/util/PaneInfoTracker.java    |  95 ++++-
 .../dataflow/sdk/util/ReduceFnRunner.java     | 402 +++++++++---------
 .../dataflow/sdk/util/TimerInternals.java     |  65 ++-
 .../dataflow/sdk/util/WatermarkHold.java      | 394 ++++++++++++++---
 .../dataflow/sdk/util/WindowTracing.java      |  36 ++
 .../state/MergedWatermarkStateInternal.java   |  14 +-
 .../dataflow/sdk/util/state/StateTags.java    |  16 +-
 .../util/state/WatermarkStateInternal.java    |  13 +-
 .../runners/dataflow/CustomSourcesTest.java   |   3 +-
 .../sdk/runners/worker/KeyedWorkItemTest.java |   5 +-
 .../worker/StreamingDataflowWorkerTest.java   |  42 +-
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  16 +-
 ...ngGroupAlsoByWindowsReshuffleDoFnTest.java |   3 +-
 .../StreamingModeExecutionContextTest.java    |   8 +-
 .../worker/WindmillStateReaderTest.java       |   4 +-
 .../transforms/windowing/AfterAllTest.java    |  10 +-
 .../transforms/windowing/AfterEachTest.java   |   4 +-
 .../transforms/windowing/AfterFirstTest.java  |   2 +-
 .../windowing/AfterProcessingTimeTest.java    |   2 +-
 .../windowing/AfterWatermarkTest.java         |  48 +--
 .../windowing/DefaultTriggerTest.java         |  23 +-
 .../windowing/OrFinallyTriggerTest.java       |   2 +-
 .../sdk/util/BatchTimerInternalsTest.java     |   4 +-
 .../util/GroupAlsoByWindowsProperties.java    |  68 ++-
 .../sdk/util/TriggerExecutorTest.java         | 187 ++++----
 .../dataflow/sdk/util/TriggerTester.java      | 318 ++++++++++----
 .../dataflow/sdk/util/state/StateTagTest.java |   5 +-
 41 files changed, 1539 insertions(+), 669 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillTimeUtils.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowTracing.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
index c904dd25a74fe..e585151c892a6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
@@ -96,7 +96,8 @@ public boolean requiresDeduping() {
   }
 
   /**
-   * A marker representing the progress and state of an {@link UnboundedReader}.
+   * A marker representing the progress and state of an
+   * {@link com.google.cloud.dataflow.sdk.io.UnboundedSource.UnboundedReader}.
    *
    * <p>For example, this could be offsets in a set of files being read.
    */
@@ -176,24 +177,25 @@ public byte[] getCurrentRecordId() throws NoSuchElementException {
     }
 
     /**
-     * Returns a lower bound on timestamps of future elements read by this reader.
+     * Returns a timestamp before or at the timestamps of all future elements read by this reader.
      *
      * <p>This can be approximate.  If records are read that violate this guarantee, they will be
      * considered late, which will affect how they will be processed.  See
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window} for more information on
      * late data and how to handle it.
      *
-     * <p>This bound should be as tight as possible.  Downstream windows will not be able to close
-     * until this watermark passes the end of the window.
+     * <p>However, this value should be as late as possible. Downstream windows may not be able
+     * to close until this watermark passes their end.
      *
      * <p>For example, a source may know that the records it reads will be in timestamp order.  In
-     * this case, the watermark can be the timestamp of the last record read minus one.  For a
+     * this case, the watermark can be the timestamp of the last record read.  For a
      * source that does not have natural timestamps, timestamps can be set to the time of
      * reading, in which case the watermark is the current clock time.
      *
      * <p>See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window} and
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger} for more
      * information on timestamps and watermarks.
+     *
      * <p>May be called after {@link #advance} or {@link #start} has returned false, but not before
      * {@link #start} has been called.
      */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
index 47557b720102e..2c7c142fbe1d1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
@@ -29,7 +29,6 @@
 import com.google.protobuf.ByteString;
 
 import java.io.IOException;
-import java.util.concurrent.TimeUnit;
 
 /**
  * A sink that writes to Pubsub, via a Windmill server.
@@ -97,11 +96,10 @@ private <T> ByteString encode(Coder<T> coder, T object) throws IOException {
     public long add(WindowedValue<T> data) throws IOException {
       ByteString byteString = encode(coder, data);
 
-      long timestampMicros = TimeUnit.MILLISECONDS.toMicros(data.getTimestamp().getMillis());
       outputBuilder.addMessages(
           Windmill.Message.newBuilder()
           .setData(byteString)
-          .setTimestamp(timestampMicros)
+          .setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(data.getTimestamp()))
           .build());
 
       return byteString.size();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcher.java
index b096193436201..3f2aa38adc776 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcher.java
@@ -117,11 +117,10 @@ public SideInputCacheEntry call() throws Exception {
                     .setVersion(windowStream.toByteString())
                     .build())
                 .setStateFamily(stateFamily)
-                .setExistenceWatermarkDeadline(
-                     TimeUnit.MILLISECONDS.toMicros(sideWindowStrategy
+                .setExistenceWatermarkDeadline(WindmillTimeUtils.harnessToWindmillTimestamp(
+                     sideWindowStrategy
                          .getTrigger().getSpec()
-                         .getWatermarkThatGuaranteesFiring(sideWindow)
-                         .getMillis()))
+                         .getWatermarkThatGuaranteesFiring(sideWindow)))
                 .build();
 
         Windmill.GetDataResponse response;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 141271c36ae71..81bab4e2402c9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -74,6 +74,7 @@
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
 
+import javax.annotation.Nullable;
 import javax.servlet.ServletException;
 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;
@@ -418,16 +419,27 @@ private void dispatchLoop() {
           continue;
         }
 
-        long watermarkMicros = computationWork.getInputDataWatermark();
-        final Instant inputDataWatermark = new Instant(watermarkMicros / 1000);
+        // May be null if input watermark not yet known.
+        // TODO: Can assert this is non-null once Windmill waits for known input watermark.
+        @Nullable
+        final Instant inputDataWatermark =
+            WindmillTimeUtils.windmillToHarnessInputWatermark(
+                computationWork.getInputDataWatermark());
         ActiveWorkForComputation activeWork = activeWorkMap.get(computation);
         for (final Windmill.WorkItem workItem : computationWork.getWorkList()) {
+          // May be null if output watermark not yet known.
+          @Nullable
+          final Instant outputDataWatermark =
+              WindmillTimeUtils.windmillToHarnessOutputWatermark(
+                  workItem.getOutputDataWatermark());
+          Preconditions.checkState(inputDataWatermark == null || outputDataWatermark == null
+              || !outputDataWatermark.isAfter(inputDataWatermark));
           Work work = new Work(workItem.getWorkToken()) {
-              @Override
-              public void run() {
-                process(computation, mapTask, inputDataWatermark, workItem);
-              }
-            };
+            @Override
+            public void run() {
+              process(computation, mapTask, inputDataWatermark, outputDataWatermark, workItem);
+            }
+          };
           if (activeWork.activateWork(workItem.getKey(), work)) {
             workUnitExecutor.execute(work);
           }
@@ -447,10 +459,8 @@ public long getWorkToken() {
     }
   }
 
-  private void process(
-      final String computation,
-      final MapTask mapTask,
-      final Instant inputDataWatermark,
+  private void process(final String computation, final MapTask mapTask,
+      @Nullable final Instant inputDataWatermark, @Nullable final Instant outputDataWatermark,
       final Windmill.WorkItem work) {
     LOG.debug("Starting processing for {}:\n{}", computation, work);
 
@@ -494,10 +504,10 @@ private void process(
         ParallelInstruction read = mapTask.getInstructions().get(0);
         if (CustomSources.class.getName().equals(
                 read.getRead().getSource().getSpec().get("@type"))) {
+          Coder<?> coder = Serializer.deserialize(read.getOutputs().get(0).getCodec(), Coder.class);
           readOperation.receivers[0].addOutputCounter(
               new OutputObjectAndByteCounter(
-                  new MapTaskExecutorFactory.ElementByteSizeObservableCoder<>(
-                      Serializer.deserialize(read.getOutputs().get(0).getCodec(), Coder.class)),
+                  new MapTaskExecutorFactory.ElementByteSizeObservableCoder<>(coder),
                   worker.getOutputCounters().getAddCounterMutator())
                   .setSamplingPeriod(100)
                   .countBytes("dataflow_input_size-" + mapTask.getSystemName()));
@@ -510,7 +520,8 @@ private void process(
       WindmillStateReader stateReader = new WindmillStateReader(
           metricTrackingWindmillServer, computation, work.getKey(), work.getWorkToken());
       StateFetcher localStateFetcher = stateFetcher.byteTrackingView();
-      context.start(work, inputDataWatermark, stateReader, localStateFetcher, outputBuilder);
+      context.start(work, inputDataWatermark, outputDataWatermark, stateReader, localStateFetcher,
+          outputBuilder);
 
       for (Long callbackId : context.getReadyCommitCallbackIds()) {
         final Runnable callback = commitCallbacks.remove(callbackId);
@@ -521,6 +532,7 @@ public void run() {
                 try {
                   callback.run();
                 } catch (Throwable t) {
+                  // TODO: Count interesting failures.
                   LOG.error("Source checkpoint finalization failed:", t);
                 }
               }
@@ -607,13 +619,12 @@ public void run() {
         if (reportFailure(computation, work, t)) {
           // Try again, after some delay and at the end of the queue to avoid a tight loop.
           sleep(10000);
-          workUnitExecutor.forceExecute(
-              new Runnable() {
-                @Override
-                public void run() {
-                  process(computation, mapTask, inputDataWatermark, work);
-                }
-              });
+          workUnitExecutor.forceExecute(new Runnable() {
+            @Override
+            public void run() {
+              process(computation, mapTask, inputDataWatermark, outputDataWatermark, work);
+            }
+          });
         } else {
           // If we failed to report the error, the item is invalid and should
           // not be retried internally.  It will be retried at the higher level.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
index d57ed1fe5bd78..116d3c6a7dc5d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
@@ -51,7 +51,8 @@
 import java.util.Set;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.ThreadLocalRandom;
-import java.util.concurrent.TimeUnit;
+
+import javax.annotation.Nullable;
 
 /**
  * {@link ExecutionContext} for use in streaming mode.
@@ -60,12 +61,14 @@ public class StreamingModeExecutionContext
     extends DataflowExecutionContext<StreamingModeExecutionContext.StepContext> {
   private final String stageName;
   private final Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
+
   // Per-key cache of active Reader objects in use by this process.
   private final ConcurrentMap<ByteString, ReaderCacheEntry> readerCache;
   private final ConcurrentMap<String, String> stateNameMap;
 
   private Windmill.WorkItem work;
-  private Instant inputDataWatermark;
+  @Nullable private Instant inputDataWatermark;
+  @Nullable private Instant outputDataWatermark;
   private WindmillStateReader stateReader;
   private StateFetcher stateFetcher;
   private Windmill.WorkItemCommitRequest.Builder outputBuilder;
@@ -83,19 +86,21 @@ public StreamingModeExecutionContext(
 
   public void start(
       Windmill.WorkItem work,
-      Instant inputDataWatermark,
+      @Nullable Instant inputDataWatermark,
+      @Nullable Instant outputDataWatermark,
       WindmillStateReader stateReader,
       StateFetcher stateFetcher,
       Windmill.WorkItemCommitRequest.Builder outputBuilder) {
     this.work = work;
     this.inputDataWatermark = inputDataWatermark;
+    this.outputDataWatermark = outputDataWatermark;
     this.stateReader = stateReader;
     this.stateFetcher = stateFetcher;
     this.outputBuilder = outputBuilder;
     this.sideInputCache.clear();
 
     for (ExecutionContext.StepContext stepContext : getAllStepContexts()) {
-      ((StepContext) stepContext).start(stateReader, inputDataWatermark);
+      ((StepContext) stepContext).start(stateReader, inputDataWatermark, outputDataWatermark);
     }
   }
 
@@ -103,7 +108,7 @@ public void start(
   public StepContext createStepContext(
       String stepName, String transformName, StateSampler stateSampler) {
     StepContext context = new StepContext(stepName, transformName, stateSampler);
-    context.start(stateReader, inputDataWatermark);
+    context.start(stateReader, inputDataWatermark, outputDataWatermark);
     return context;
   }
 
@@ -232,6 +237,7 @@ public void run() {
             }
           });
 
+      @SuppressWarnings("unchecked")
       Coder<UnboundedSource.CheckpointMark> checkpointCoder =
           ((UnboundedSource<?, UnboundedSource.CheckpointMark>) activeReader.getCurrentSource())
               .getCheckpointMarkCoder();
@@ -244,7 +250,8 @@ public void run() {
         }
         sourceStateBuilder.setState(stream.toByteString());
       }
-      outputBuilder.setSourceWatermark(TimeUnit.MILLISECONDS.toMicros(watermark.getMillis()));
+      outputBuilder.setSourceWatermark(
+          WindmillTimeUtils.harnessToWindmillTimestamp(watermark));
 
       long backlogBytes = activeReader.getSplitBacklogBytes();
       if (backlogBytes == UnboundedSource.UnboundedReader.BACKLOG_UNKNOWN
@@ -285,13 +292,16 @@ public static ByteString timerTag(TimerData key) {
   }
 
   private static class WindmillTimerInternals implements TimerInternals {
-
     private Map<TimerData, Boolean> timers = new HashMap<>();
-    private Instant inputDataWatermark;
+    @Nullable private Instant inputDataWatermark;
+    @Nullable private Instant outputDataWatermark;
     private String stateFamily;
 
-    public WindmillTimerInternals(String stateFamily, Instant inputDataWatermark) {
+    public WindmillTimerInternals(
+        String stateFamily, @Nullable Instant inputDataWatermark,
+        @Nullable Instant outputDataWatermark) {
       this.inputDataWatermark = inputDataWatermark;
+      this.outputDataWatermark = outputDataWatermark;
       this.stateFamily = stateFamily;
     }
 
@@ -310,11 +320,36 @@ public Instant currentProcessingTime() {
       return Instant.now();
     }
 
+    /**
+     * {@inheritDoc}
+     *
+     * <p>Note that this value may be arbitrarily behind the global input watermark. Windmill
+     * simply reports the last known input watermark value at the time the GetWork response was
+     * constructed. However, if an element in a GetWork request has a timestamp at or ahead
+     * of the local input watermark then Windmill will not allow the local input watermark
+     * to advance until that element has been committed.
+     */
     @Override
-    public Instant currentWatermarkTime() {
+    @Nullable
+    public Instant currentInputWatermarkTime() {
       return inputDataWatermark;
     }
 
+    /**
+     * {@inheritDoc}
+     *
+     * <p>Note that Windmill will provisionally hold the output watermark to the timestamp of the
+     * earliest element in a computation's GetWork response. (Elements with timestamps already
+     * behind the output watermark at the point the GetWork response is constructed will have
+     * no influence on the output watermark). The provisional hold will last until this work item is
+     * committed. It is the responsibility of the harness to impose any persistent holds it needs.
+     */
+    @Override
+    @Nullable
+    public Instant currentOutputWatermarkTime() {
+      return outputDataWatermark;
+    }
+
     public void persistTo(Windmill.WorkItemCommitRequest.Builder outputBuilder) {
       for (Entry<TimerData, Boolean> entry : timers.entrySet()) {
         Windmill.Timer.Builder timer = outputBuilder.addOutputTimersBuilder()
@@ -326,9 +361,8 @@ public void persistTo(Windmill.WorkItemCommitRequest.Builder outputBuilder) {
 
         // If the timer was being set (not deleted) then set a timestamp for it.
         if (entry.getValue()) {
-          long timestampMicros =
-              TimeUnit.MILLISECONDS.toMicros(entry.getKey().getTimestamp().getMillis());
-          timer.setTimestamp(timestampMicros);
+          timer.setTimestamp(
+              WindmillTimeUtils.harnessToWindmillTimestamp(entry.getKey().getTimestamp()));
         }
       }
       timers.clear();
@@ -383,13 +417,14 @@ public StateSampler.ScopedState get() {
     /**
      * Update the {@code stateReader} used by this {@code StepContext}.
      */
-    public void start(WindmillStateReader stateReader, Instant inputDataWatermark) {
+    public void start(
+        WindmillStateReader stateReader, @Nullable Instant inputDataWatermark,
+        @Nullable Instant outputDataWatermark) {
       boolean useStateFamilies = !stateNameMap.isEmpty();
-      this.stateInternals =
-          new WindmillStateInternals(
-              prefix, useStateFamilies, stateReader, scopedReadStateSupplier);
-      this.timerInternals = new WindmillTimerInternals(
-          stateFamily, Preconditions.checkNotNull(inputDataWatermark));
+      this.stateInternals = new WindmillStateInternals(
+          prefix, useStateFamilies, stateReader, scopedReadStateSupplier);
+      this.timerInternals =
+          new WindmillTimerInternals(stateFamily, inputDataWatermark, outputDataWatermark);
     }
 
     public void flushState() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
index d37aaa4a7ead6..dcbdc2546a5e5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
@@ -53,7 +53,6 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.concurrent.TimeUnit;
 
 /**
  * Runs a DoFn by constructing the appropriate contexts and passing them in.
@@ -286,12 +285,11 @@ private <SideWindowT extends BoundedWindow> Windmill.GlobalDataRequest buildGlob
             .setTag(view.getTagInternal().getId())
             .setVersion(windowStream.toByteString())
             .build())
-        .setExistenceWatermarkDeadline(
-            TimeUnit.MILLISECONDS.toMicros(sideWindowStrategy
+        .setExistenceWatermarkDeadline(WindmillTimeUtils.harnessToWindmillTimestamp(
+            sideWindowStrategy
                 .getTrigger()
                 .getSpec()
-                .getWatermarkThatGuaranteesFiring(sideInputWindow)
-                .getMillis()))
+                .getWatermarkThatGuaranteesFiring(sideInputWindow)))
         .build();
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
index 1d6b15cebb9cb..9fc6cf75effbd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
@@ -41,7 +41,6 @@
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.Map;
-import java.util.concurrent.TimeUnit;
 
 class WindmillSink<T> extends Sink<WindowedValue<T>> {
   private WindmillStreamWriter writer;
@@ -143,9 +142,8 @@ public long add(WindowedValue<T> data) throws IOException {
         productionMap.put(key, keyedOutput);
       }
 
-      long timestampMicros = TimeUnit.MILLISECONDS.toMicros(data.getTimestamp().getMillis());
       Windmill.Message.Builder builder = Windmill.Message.newBuilder()
-          .setTimestamp(timestampMicros)
+          .setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(data.getTimestamp()))
           .setData(value)
           .setMetadata(metadata);
       keyedOutput.addMessages(builder.build());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
index 9f88c78de915c..9f79ad6463839 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
@@ -49,7 +49,6 @@
 import java.util.Random;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Future;
-import java.util.concurrent.TimeUnit;
 
 /**
  * Implementation of {@link StateInternals} using Windmill to manage the underlying data.
@@ -496,7 +495,7 @@ public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) {
             .setTag(stateKey)
             .setStateFamily(stateFamily)
             .setReset(true)
-            .addTimestamps(TimeUnit.MILLISECONDS.toMicros(localAdditions.getMillis()));
+            .addTimestamps(WindmillTimeUtils.harnessToWindmillTimestamp(localAdditions));
       } else if (!cleared && localAdditions != null){
         // Otherwise, we need to combine the local additions with the already persisted data
         combineWithPersisted(commitBuilder);
@@ -529,7 +528,8 @@ private void combineWithPersisted(Windmill.WorkItemCommitRequest.Builder commitB
         commitBuilder.addWatermarkHoldsBuilder()
             .setTag(stateKey)
             .setStateFamily(stateFamily)
-            .addTimestamps(TimeUnit.MILLISECONDS.toMicros(localAdditions.getMillis()));
+            .addTimestamps(
+                WindmillTimeUtils.harnessToWindmillTimestamp(localAdditions));
       } else {
         // The non-fast path does a read-modify-write
         Instant priorHold;
@@ -546,7 +546,8 @@ private void combineWithPersisted(Windmill.WorkItemCommitRequest.Builder commitB
             .setTag(stateKey)
             .setStateFamily(stateFamily)
             .setReset(true)
-            .addTimestamps(TimeUnit.MILLISECONDS.toMicros(combinedHold.getMillis()));
+            .addTimestamps(
+                WindmillTimeUtils.harnessToWindmillTimestamp(combinedHold));
       }
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillTimeUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillTimeUtils.java
new file mode 100644
index 0000000000000..e107aea3806cb
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillTimeUtils.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.common.base.Preconditions;
+
+import org.joda.time.Instant;
+
+import javax.annotation.Nullable;
+
+/**
+ * Some timestamp conversion helpers for working with Windmill.
+ */
+class WindmillTimeUtils {
+  /**
+   * Convert a Windmill output watermark to a harness watermark.
+   *
+   * <p>Windmill tracks time in microseconds while the harness uses milliseconds.
+   * Windmill will 'speculatively' hold the output watermark for a computation to the
+   * earliest input message timestamp, provided that message timestamp is at or after
+   * the current output watermark. Thus for soundness we must ensure
+   * 'Windmill considers message late' implies 'harness considers message late'. Thus we
+   * round up when converting from microseconds to milliseconds.
+   *
+   * <p>In other words, harness output watermark >= windmill output watermark.
+   */
+  @Nullable
+  static Instant windmillToHarnessOutputWatermark(long watermarkUs) {
+    if (watermarkUs == Long.MIN_VALUE) {
+      // Unknown.
+      return null;
+    } else if (watermarkUs == Long.MAX_VALUE) {
+      // End of time.
+      return BoundedWindow.TIMESTAMP_MAX_VALUE;
+    } else {
+      // Round up to nearest millisecond.
+      return new Instant((watermarkUs + 999) / 1000);
+    }
+  }
+
+  /**
+   * Convert a Windmill input watermark to a harness input watermark.
+   *
+   * <p>We round down, thus harness input watermark <= windmill output watermark.
+   */
+  @Nullable
+  static Instant windmillToHarnessInputWatermark(long watermarkUs) {
+    if (watermarkUs == Long.MIN_VALUE) {
+      // Unknown.
+      return null;
+    } else if (watermarkUs == Long.MAX_VALUE) {
+      // End of time.
+      return BoundedWindow.TIMESTAMP_MAX_VALUE;
+    } else {
+      // Round down to nearest millisecond.
+      return new Instant(watermarkUs / 1000);
+    }
+  }
+
+  /**
+   * Convert a Windmill message timestamp to a harness timestamp.
+   *
+   * <p>For soundness we require the test
+   * {@code harness message timestamp >= harness output watermark} to imply
+   * {@code windmill message timestamp >= windmill output watermark}. Thus
+   * we round timestamps down and output watermarks up.
+   */
+  static Instant windmillToHarnessTimestamp(long timestampUs) {
+    // Windmill should never send us an unknown timestamp.
+    Preconditions.checkArgument(timestampUs != Long.MIN_VALUE);
+    if (timestampUs == Long.MAX_VALUE) {
+      // End of time.
+      return BoundedWindow.TIMESTAMP_MAX_VALUE;
+    } else {
+      // Round down to nearest millisecond.
+      return new Instant(timestampUs / 1000);
+    }
+  }
+
+  /**
+   * Convert a harness timestamp to a Windmill timestamp.
+   */
+  static long harnessToWindmillTimestamp(Instant timestamp) {
+    if (timestamp.equals(BoundedWindow.TIMESTAMP_MAX_VALUE)) {
+      // End of time.
+      return Long.MAX_VALUE;
+    } else {
+      return timestamp.getMillis() * 1000;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java
index 6b3ba58802671..0afd8e33c2d7c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java
@@ -40,7 +40,7 @@ public abstract class BoundedWindow {
       new Instant(TimeUnit.MICROSECONDS.toMillis(Long.MAX_VALUE));
 
   /**
-   * Returns the upper bound of timestamps for values in this window.
+   * Returns the inclusive upper bound of timestamps for values in this window.
    */
   public abstract Instant maxTimestamp();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
index 67d2d1b06b07a..18f7a973cc9d2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.util.VarInt;
 import com.google.common.base.MoreObjects;
@@ -31,26 +32,95 @@
 import java.util.Objects;
 
 /**
- * Provides information about the pane this value belongs to. Every pane is implicitly associated
- * with a window.
+ * Provides information about the pane an element belongs to. Every pane is implicitly associated
+ * with a window. Panes are observable only via the
+ * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.ProcessContext#pane} method of the context
+ * passed to a {@link DoFn#processElement} overridden method.
  *
  * <p>Note: This does not uniquely identify a pane, and should not be used for comparisons.
  */
 public final class PaneInfo {
-
   /**
-   * Enumerates the possibilities for how the timing of this pane firing related to the watermark.
+   * Enumerates the possibilities for the timing of this pane firing related to the
+   * input and output watermarks for its computation.
+   *
+   * <p>A window may fire multiple panes, and the timing of those panes generally follows the
+   * regular expression {@code EARLY* ON_TIME? LATE*}. Generally a pane is considered:
+   * <ol>
+   * <li>{@code EARLY} if the system cannot be sure it has seen all data which may contribute
+   * to the pane's window.
+   * <li>{@code ON_TIME} if the system predicts it has seen all the data which may contribute
+   * to the pane's window.
+   * <li>{@code LATE} if the system has encountered new data after predicting no more could arrive.
+   * It is possible an {@code ON_TIME} pane has already been emitted, in which case any
+   * following panes are considered {@code LATE}.
+   * </ol>
+   *
+   * <p>Only an
+   * {@link AfterWatermark#pastEndOfWindow} trigger may produce an {@code ON_TIME} pane.
+   * With merging {@link WindowFn}'s, windows may be merged to produce new windows that satisfy
+   * their own instance of the above regular expression. The only guarantee is that once a window
+   * produces a final pane, it will not be merged into any new windows.
+   *
+   * <p>The predictions above are made using the mechanism of watermarks.
+   * See {@link com.google.cloud.dataflow.sdk.util.TimerInternals} for more information
+   * about watermarks.
+   *
+   * <p>We can state some properties of {@code LATE} and {@code ON_TIME} panes, but first need some
+   * definitions:
+   * <ol>
+   * <li>We'll call a pipeline 'simple' if it does not use
+   * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#outputWithTimestamp} in
+   * any {@code DoFn}, and it uses the same
+   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window.Bound#withAllowedLateness}
+   * argument value on all windows (or uses the default of {@link org.joda.time.Duration#ZERO}).
+   * <li>We'll call an element 'locally late', from the point of view of a computation on a
+   * worker, if the element's timestamp is before the input watermark for that computation
+   * on that worker. The element is otherwise 'locally on-time'.
+   * <li>We'll say 'the pane's timestamp' to mean the timestamp of the element produced to
+   * represent the pane's contents.
+   * </ol>
+   *
+   * <p>Then in simple pipelines:
+   * <ol>
+   * <li> (Soundness) An {@code ON_TIME} pane can never cause a later computation to generate a
+   * {@code LATE} pane. (If it did, it would imply a later computation's input watermark progressed
+   * ahead of an earlier stage's output watermark, which by design is not possible.)
+   * <li> (Liveness) An {@code ON_TIME} pane is emitted as soon as possible after the input
+   * watermark passes the end of the pane's window.
+   * <li> (Consistency) A pane with only locally on-time elements will always be {@code ON_TIME}.
+   * And a {@code LATE} pane cannot contain locally on-time elements.
+   * </ol>
+   *
+   * However, note that:
+   * <ol>
+   * <li> An {@code ON_TIME} pane may contain locally late elements. It may even contain only
+   * locally late elements. Provided a locally late element finds its way into an {@code ON_TIME}
+   * pane its lateness becomes unobservable.
+   * <li> A {@code LATE} pane does not necessarily cause any following computation panes to be
+   * marked as {@code LATE}.
+   * </ol>
    */
   public enum Timing {
-    /** Pane was fired before the watermark passed the end of the window. */
+    /**
+     * Pane was fired before the input watermark had progressed after the end of the window.
+     */
     EARLY,
-    /** First pane fired after the watermark passed the end of the window. */
+    /**
+     * Pane was fired by a {@link AfterWatermark#pastEndOfWindow} trigger because the input
+     * watermark progressed after the end of the window. However the output watermark has not
+     * yet progressed after the end of the window. Thus it is still possible to assign a timestamp
+     * to the element representing this pane which cannot be considered locally late by any
+     * following computation.
+     */
     ON_TIME,
-    /** Panes fired after the {@code ON_TIME} firing. */
+    /**
+     * Pane was fired after the output watermark had progressed past the end of the window.
+     */
     LATE,
     /**
-     * This element was not produced in a triggered pane and its relation to the watermark is
-     * unknown.
+     * This element was not produced in a triggered pane and its relation to input and
+     * output watermarks is unknown.
      */
     UNKNOWN;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java
index d70900d3877ce..29492c3892335 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.common.base.MoreObjects;
+import com.google.common.base.Preconditions;
 
 import org.joda.time.Instant;
 
@@ -25,11 +26,12 @@
 import java.util.PriorityQueue;
 import java.util.Set;
 
+import javax.annotation.Nullable;
+
 /**
  * TimerInternals that uses priority queues to manage the timers that are ready to fire.
  */
 public class BatchTimerInternals implements TimerInternals {
-
   /** Set of timers that are scheduled used for deduplicating timers. */
   private Set<TimerData> existingTimers = new HashSet<>();
 
@@ -37,7 +39,7 @@ public class BatchTimerInternals implements TimerInternals {
   private PriorityQueue<TimerData> watermarkTimers = new PriorityQueue<>(11);
   private PriorityQueue<TimerData> processingTimers = new PriorityQueue<>(11);
 
-  private Instant watermarkTime;
+  private Instant inputWatermarkTime;
   private Instant processingTime;
 
   private PriorityQueue<TimerData> queue(TimeDomain domain) {
@@ -46,7 +48,7 @@ private PriorityQueue<TimerData> queue(TimeDomain domain) {
 
   public BatchTimerInternals(Instant processingTime) {
     this.processingTime = processingTime;
-    this.watermarkTime = BoundedWindow.TIMESTAMP_MIN_VALUE;
+    this.inputWatermarkTime = BoundedWindow.TIMESTAMP_MIN_VALUE;
   }
 
   @Override
@@ -68,8 +70,15 @@ public Instant currentProcessingTime() {
   }
 
   @Override
-  public Instant currentWatermarkTime() {
-    return watermarkTime;
+  public Instant currentInputWatermarkTime() {
+    return inputWatermarkTime;
+  }
+
+  @Override
+  @Nullable
+  public Instant currentOutputWatermarkTime() {
+    // The output watermark is always undefined in batch mode.
+    return null;
   }
 
   @Override
@@ -80,13 +89,18 @@ public String toString() {
         .toString();
   }
 
-  public void advanceWatermark(ReduceFnRunner<?, ?, ?, ?> runner, Instant newWatermark) {
-    this.watermarkTime = newWatermark;
-    advance(runner, newWatermark, TimeDomain.EVENT_TIME);
+  public void advanceInputWatermark(ReduceFnRunner<?, ?, ?, ?> runner, Instant newInputWatermark) {
+    Preconditions.checkState(!newInputWatermark.isBefore(inputWatermarkTime),
+        "Cannot move input watermark time backwards from %s to %s", inputWatermarkTime,
+        newInputWatermark);
+    inputWatermarkTime = newInputWatermark;
+    advance(runner, newInputWatermark, TimeDomain.EVENT_TIME);
   }
 
   public void advanceProcessingTime(ReduceFnRunner<?, ?, ?, ?> runner, Instant newProcessingTime) {
-    this.processingTime = newProcessingTime;
+    Preconditions.checkState(!newProcessingTime.isBefore(processingTime),
+        "Cannot move processing time backwards from %s to %s", processingTime, newProcessingTime);
+    processingTime = newProcessingTime;
     advance(runner, newProcessingTime, TimeDomain.PROCESSING_TIME);
   }
 
@@ -96,8 +110,8 @@ private void advance(ReduceFnRunner<?, ?, ?, ?> runner, Instant newTime, TimeDom
 
     do {
       TimerData timer = timers.peek();
-      // Timers fire if the new time is >= the timer
-      shouldFire = timer != null && !newTime.isBefore(timer.getTimestamp());
+      // Timers fire if the new time is ahead of the timer
+      shouldFire = timer != null && newTime.isAfter(timer.getTimestamp());
       if (shouldFire) {
         // Remove before firing, so that if the trigger adds another identical
         // timer we don't remove it.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
index 4afd7cb4483db..9da4b4deb3998 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
@@ -71,19 +71,22 @@ public void processElement(
       // Process the chunk of elements.
       runner.processElements(chunk);
 
-      // Then, since elements are sorted by their timestamp, advance the watermark to the first
-      // element, and fire any timers that may have been scheduled.
-      timerInternals.advanceWatermark(runner, chunk.iterator().next().getTimestamp());
+      // Then, since elements are sorted by their timestamp, advance the input watermark
+      // to the first element, and fire any timers that may have been scheduled.
+      timerInternals.advanceInputWatermark(runner, chunk.iterator().next().getTimestamp());
 
       // Fire any processing timers that need to fire
       timerInternals.advanceProcessingTime(runner, Instant.now());
+
+      // Leave the output watermark undefined. Since there's no late data in batch mode
+      // there's really no need to track it as we do for streaming.
     }
 
-    // Finish any pending windows by advancing the watermark to infinity.
-    timerInternals.advanceWatermark(runner, new Instant(Long.MAX_VALUE));
+    // Finish any pending windows by advancing the input watermark to infinity.
+    timerInternals.advanceInputWatermark(runner, BoundedWindow.TIMESTAMP_MAX_VALUE);
 
     // Finally, advance the processing time to infinity to fire any timers.
-    timerInternals.advanceProcessingTime(runner, new Instant(Long.MAX_VALUE));
+    timerInternals.advanceProcessingTime(runner, BoundedWindow.TIMESTAMP_MAX_VALUE);
 
     runner.persist();
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
index b1f13c13b8777..c4f39a9b23813 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
@@ -53,7 +53,7 @@ public static <W extends BoundedWindow> NonEmptyPanes<W> create(
   public abstract void clearPane(ReduceFn<?, ?, ?, W>.Context context);
 
   /**
-   * Return true if the current pane for the window in {@code context} is non-empty.
+   * Return true if the current pane for the window in {@code context} is empty.
    */
   public abstract StateContents<Boolean> isEmpty(ReduceFn<?, ?, ?, W>.Context context);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
index 3b7858c1e71bc..05360f79d49e3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
@@ -15,6 +15,7 @@
  */
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.PaneInfoCoder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
@@ -24,40 +25,52 @@
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.cloud.dataflow.sdk.util.state.ValueState;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
 
 import org.joda.time.Instant;
 
 /**
- * Encapsulates the logic for tracking the current {@link PaneInfo} and producing new PaneInfo for
- * a trigger firing.
+ * Determine the timing and other properties of a new pane for a given computation, key and window.
+ * Incorporates any previous pane, whether the pane has been produced because an
+ * on-time {@link AfterWatermark} trigger firing, and the relation between the element's timestamp
+ * and the current output watermark.
  */
 public class PaneInfoTracker {
-
   private TimerInternals timerInternals;
 
   public PaneInfoTracker(TimerInternals timerInternals) {
     this.timerInternals = timerInternals;
   }
 
-  @VisibleForTesting static final StateTag<ValueState<PaneInfo>> PANE_INFO_TAG =
+  @VisibleForTesting
+  static final StateTag<ValueState<PaneInfo>> PANE_INFO_TAG =
       StateTags.makeSystemTagInternal(StateTags.value("pane", PaneInfoCoder.INSTANCE));
 
   public void clear(StateContext state) {
     state.access(PANE_INFO_TAG).clear();
   }
 
-
+  /**
+   * Return a (future for) the pane info appropriate for {@code context}. The pane info
+   * includes the timing for the pane, who's calculation is quite subtle.
+   *
+   * @param isWatermarkTrigger should be {@code true} only if the pane is being emitted
+   * because a {@link AfterWatermark#pastEndOfWindow} trigger has fired.
+   * @param isFinal should be {@code true} only if the triggering machinery can guarantee
+   * no further firings for the
+   */
   public StateContents<PaneInfo> getNextPaneInfo(ReduceFn<?, ?, ?, ?>.Context context,
-      final boolean isForWatermarkTrigger, final boolean isFinal) {
+      final boolean isWatermarkTrigger, final boolean isFinal) {
+    final Object key = context.key();
     final StateContents<PaneInfo> previousPaneFuture =
         context.state().access(PaneInfoTracker.PANE_INFO_TAG).get();
-    final Instant endOfWindow = context.window().maxTimestamp();
+    final Instant windowMaxTimestamp = context.window().maxTimestamp();
 
     return new StateContents<PaneInfo>() {
       @Override
       public PaneInfo read() {
         PaneInfo previousPane = previousPaneFuture.read();
-        return describePane(endOfWindow, previousPane, isForWatermarkTrigger, isFinal);
+        return describePane(key, windowMaxTimestamp, previousPane, isWatermarkTrigger, isFinal);
       }
     };
   }
@@ -66,21 +79,65 @@ public void storeCurrentPaneInfo(ReduceFn<?, ?, ?, ?>.Context context, PaneInfo
     context.state().access(PANE_INFO_TAG).set(currentPane);
   }
 
-  private <W> PaneInfo describePane(Instant endOfWindow, PaneInfo prevPane,
-      boolean isForWatermarkTrigger, boolean isFinal) {
-    boolean isSpeculative = endOfWindow.isAfter(timerInternals.currentWatermarkTime());
-    boolean isFirst = (prevPane == null);
+  private <W> PaneInfo describePane(Object key, Instant windowMaxTimestamp, PaneInfo previousPane,
+      boolean isWatermarkTrigger, boolean isFinal) {
+    boolean isFirst = previousPane == null;
+    Timing previousTiming = isFirst ? null : previousPane.getTiming();
+    long index = isFirst ? 0 : previousPane.getIndex() + 1;
+    long nonSpeculativeIndex = isFirst ? 0 : previousPane.getNonSpeculativeIndex() + 1;
+    Instant outputWM = timerInternals.currentOutputWatermarkTime();
+    Instant inputWM = timerInternals.currentInputWatermarkTime();
+
+    // True if it is not possible to assign the element representing this pane a timestamp
+    // which will make an ON_TIME pane for any following computation.
+    // Ie true if the element's latest possible timestamp is before the current output watermark.
+    boolean isLateForOutput = outputWM != null && windowMaxTimestamp.isBefore(outputWM);
+
+    // True if all emitted panes (if any) were EARLY panes.
+    // Once the ON_TIME pane has fired, all following panes must be considered LATE even
+    // if the output watermark is behind the end of the window.
+    boolean onlyEarlyPanesSoFar = previousTiming == null || previousTiming == Timing.EARLY;
 
-    long index = isFirst ? 0 : prevPane.getIndex() + 1;
-    long nonSpeculativeIndex;
     Timing timing;
-    if (isSpeculative) {
+    if (isLateForOutput || !onlyEarlyPanesSoFar) {
+      // The output watermark has already passed the end of this window, or we have already
+      // emitted a non-EARLY pane. Irrespective of how this pane was triggered we must
+      // consider this pane LATE.
+      timing = Timing.LATE;
+    } else if (isWatermarkTrigger) {
+      // This is the unique ON_TIME firing for the window.
+      timing = Timing.ON_TIME;
+    } else {
+      // All other cases are EARLY.
       timing = Timing.EARLY;
       nonSpeculativeIndex = -1;
-    } else {
-      boolean firstNonSpeculative = prevPane == null || prevPane.getTiming() == Timing.EARLY;
-      timing = (isForWatermarkTrigger && firstNonSpeculative) ? Timing.ON_TIME : Timing.LATE;
-      nonSpeculativeIndex = firstNonSpeculative ? 0 : prevPane.getNonSpeculativeIndex() + 1;
+    }
+
+    WindowTracing.debug(
+        "describePane: {} pane (prev was {}) for key:{}; windowMaxTimestamp:{}; "
+        + "inputWatermark:{}; outputWatermark:{}; isWatermarkTrigger:{}; isLateForOutput:{}",
+        timing, previousTiming, key, windowMaxTimestamp, inputWM, outputWM, isWatermarkTrigger,
+        isLateForOutput);
+
+    if (previousPane != null) {
+      // Timing transitions should follow EARLY* ON_TIME? LATE*
+      switch (previousTiming) {
+        case EARLY:
+          Preconditions.checkState(
+              timing == Timing.EARLY || timing == Timing.ON_TIME || timing == Timing.LATE,
+              "EARLY cannot transition to %s", timing);
+          break;
+        case ON_TIME:
+          Preconditions.checkState(
+              timing == Timing.LATE, "ON_TIME cannot transition to %s", timing);
+          break;
+        case LATE:
+          Preconditions.checkState(timing == Timing.LATE, "LATE cannot transtion to %s", timing);
+          break;
+        case UNKNOWN:
+          break;
+      }
+      Preconditions.checkState(!previousPane.isLast(), "Last pane was not last after all.");
     }
 
     return PaneInfo.createPane(isFirst, isFinal, timing, index, nonSpeculativeIndex);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index d539c84ddc066..742806db48110 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -33,10 +33,12 @@
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Function;
 import com.google.common.base.Functions;
+import com.google.common.base.Preconditions;
 import com.google.common.base.Throwables;
 import com.google.common.collect.FluentIterable;
 import com.google.common.collect.Maps;
 
+import org.joda.time.Duration;
 import org.joda.time.Instant;
 
 import java.util.Collection;
@@ -46,8 +48,6 @@
 import java.util.Map;
 import java.util.Set;
 
-import javax.annotation.Nullable;
-
 /**
  * Manages the execution of a {@link ReduceFn} after a {@link GroupByKeyOnly} has partitioned the
  * {@link PCollection} by key.
@@ -73,7 +73,6 @@
  * @param <W> The type of windows this operates on.
  */
 public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
-
   public static final String DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER = "DroppedDueToClosedWindow";
   public static final String DROPPED_DUE_TO_LATENESS_COUNTER = "DroppedDueToLateness";
 
@@ -121,7 +120,7 @@ public ReduceFnRunner(
         key, reduceFn, this.windowingStrategy, this.windowingInternals.stateInternals(),
         this.activeWindows, timerInternals);
 
-    this.watermarkHold = new WatermarkHold<>(windowingStrategy);
+    this.watermarkHold = new WatermarkHold<>(timerInternals, windowingStrategy);
     this.triggerRunner = new TriggerRunner<>(
         windowingStrategy.getTrigger(),
         new TriggerContextFactory<>(windowingStrategy, this.windowingInternals.stateInternals(),
@@ -156,7 +155,8 @@ public void processElements(Iterable<WindowedValue<InputT>> values) {
 
     // Trigger output from any window that was triggered by merging or processing elements.
     for (Map.Entry<W, TriggerResult> result : results.entrySet()) {
-      handleTriggerResult(contextFactory.base(result.getKey()), false, result.getValue());
+      handleTriggerResult(
+          contextFactory.base(result.getKey()), false/*isEndOfWindow*/, result.getValue());
     }
   }
 
@@ -181,7 +181,7 @@ private Function<W, W> premergeForValues(
     // For any new windows that survived merging, make sure we've scheduled cleanup
     for (W window : newWindows) {
       if (activeWindows.contains(window)) {
-        scheduleCleanup(contextFactory.base(window));
+        scheduleEndOfWindowOrGarbageCollectionTimer(contextFactory.base(window));
       }
     }
 
@@ -226,7 +226,7 @@ public void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResul
           } else {
             // If there were no windows, then merging didn't rearrange the cleanup timers. Make
             // sure that we have one properly scheduled
-            scheduleCleanup(contextFactory.base(resultWindow));
+            scheduleEndOfWindowOrGarbageCollectionTimer(contextFactory.base(resultWindow));
           }
 
           for (W mergedWindow : mergedWindows) {
@@ -235,7 +235,7 @@ public void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResul
             // If the window wasn't in the persisted original set, then we scheduled cleanup above
             // but didn't pass it to merge to have the cleanup canceled. Do so here
             if (!originalWindows.contains(mergedWindow)) {
-              cancelCleanup(contextFactory.base(mergedWindow));
+              cancelEndOfWindowAndGarbageCollectionTimers(contextFactory.base(mergedWindow));
             }
           }
         }
@@ -247,6 +247,13 @@ public void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResul
     return sourceWindowsToResultWindows;
   }
 
+  /** Is {@code value} late w.r.t. the garbage collection watermark? */
+  private <T> boolean canDropDueToLateness(WindowedValue<T> value) {
+    Instant inputWM = timerInternals.currentInputWatermarkTime();
+    return inputWM != null
+        && value.getTimestamp().isBefore(inputWM.minus(windowingStrategy.getAllowedLateness()));
+  }
+
   /**
    * Add the initial windows from each of the values to the active window set. Returns the set of
    * new windows.
@@ -254,7 +261,9 @@ public void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResul
   private Set<W> addToActiveWindows(Iterable<WindowedValue<InputT>> values) {
     Set<W> newWindows = new HashSet<>();
     for (WindowedValue<?> value : values) {
-      if (getLateness(value.getTimestamp()).isPastAllowedLateness) {
+      if (canDropDueToLateness(value)) {
+        // This value will be dropped (and reported in a counter) by processElement.
+        // Hence it won't contribute to any new window.
         continue;
       }
 
@@ -280,15 +289,21 @@ private Set<W> addToActiveWindows(Iterable<WindowedValue<InputT>> values) {
    *     for the given result.
    * @param value the value being processed
    */
-  private void processElement(Function<W, W> windowMapping, Map<W, TriggerResult> results,
-      WindowedValue<InputT> value) {
-    Lateness lateness = getLateness(value.getTimestamp());
-    if (lateness.isPastAllowedLateness) {
+  private void processElement(
+      Function<W, W> windowMapping, Map<W, TriggerResult> results, WindowedValue<InputT> value) {
+    if (canDropDueToLateness(value)) {
       // Drop the element in all assigned windows if it is past the allowed lateness limit.
       droppedDueToLateness.addValue((long) value.getWindows().size());
+      WindowTracing.debug(
+          "processElement: Dropping element at {} for key:{} since too far "
+          + "behind inputWatermark:{}; outputWatermark:{}",
+          value.getTimestamp(), key, timerInternals.currentInputWatermarkTime(),
+          timerInternals.currentOutputWatermarkTime());
       return;
     }
 
+    // Only consider representative windows from among all windows in equivalence classes
+    // induced by window merging.
     @SuppressWarnings("unchecked")
     Iterable<W> windows =
         FluentIterable.from((Collection<W>) value.getWindows()).transform(windowMapping);
@@ -315,16 +330,17 @@ private void processElement(Function<W, W> windowMapping, Map<W, TriggerResult>
 
       nonEmptyPanes.recordContent(context);
 
-      // Make sure we've scheduled the cleanup timer for this window, if the premerge didn't already
-      // do that.
+      // Make sure we've scheduled the end-of-window or garbage collection timer for this window
+      // However if we have pre-merged then they will already have been scheduled.
       if (windowingStrategy.getWindowFn().isNonMerging()) {
-        // Since non-merging window functions don't track the active window set, we always schedule
-        // cleanup.
-        scheduleCleanup(context);
+        scheduleEndOfWindowOrGarbageCollectionTimer(context);
       }
 
-      // Update the watermark hold since the value will be part of the next pane.
-      watermarkHold.addHold(context, lateness.isLate);
+      // Hold back progress of the output watermark until we have processed the pane this
+      // element will be included within. Also add a hold at the end-of-window or garbage
+      // collection time to allow empty panes to contribute elements which won't be dropped
+      // due to lateness.
+      watermarkHold.addHolds(context);
 
       // Execute the reduceFn, which will buffer the value as appropriate
       try {
@@ -348,14 +364,6 @@ private void processElement(Function<W, W> windowMapping, Map<W, TriggerResult>
     }
   }
 
-  private void holdForEmptyPanes(ReduceFn<K, InputT, OutputT, W>.Context context) {
-    if (timerInternals.currentWatermarkTime().isAfter(context.window().maxTimestamp())) {
-      watermarkHold.holdForFinal(context);
-    } else {
-      watermarkHold.holdForOnTime(context);
-    }
-  }
-
   /**
    * Make sure that all the state built up in this runner has been persisted.
    */
@@ -398,7 +406,7 @@ public TriggerResult onMerge(
       if (!mergedWindow.equals(resultWindow)) {
         try {
           ReduceFn<K, InputT, OutputT, W>.Context mergedContext = contextFactory.base(mergedWindow);
-          cancelCleanup(mergedContext);
+          cancelEndOfWindowAndGarbageCollectionTimers(mergedContext);
           triggerRunner.clearEverything(mergedContext);
           paneInfo.clear(mergedContext.state());
         } catch (Exception e) {
@@ -411,87 +419,101 @@ public TriggerResult onMerge(
     // Schedule cleanup if the window is new. Do this after cleaning up the old state in case one
     // of them had a timer at the same point.
     if (isResultWindowNew) {
-      scheduleCleanup(resultContext);
+      scheduleEndOfWindowOrGarbageCollectionTimer(resultContext);
     }
 
     return triggerResult;
   }
 
   /**
-   * Called when a timer fires.
+   * Called when an end-of-window, garbage collection, or trigger-specific timer fires.
    */
   public void onTimer(TimerData timer) {
-    if (!(timer.getNamespace() instanceof WindowNamespace)) {
-      throw new IllegalArgumentException(
-          "Expected WindowNamespace, but was " + timer.getNamespace());
-    }
-
+    // Which window is the timer for?
+    Preconditions.checkArgument(timer.getNamespace() instanceof WindowNamespace,
+        "Expected timer to be in WindowNamespace, but was in %s", timer.getNamespace());
     @SuppressWarnings("unchecked")
     WindowNamespace<W> windowNamespace = (WindowNamespace<W>) timer.getNamespace();
     W window = windowNamespace.getWindow();
-    if (!activeWindows.contains(window) && windowingStrategy.getWindowFn().isNonMerging()) {
-      throw new IllegalStateException(
-          "Internal Error: Received timer " + timer + " for inactive window: " + window);
-    }
+    // If the window is subject to merging then all timers should have been cleared upon merge.
+    Preconditions.checkState(
+        !windowingStrategy.getWindowFn().isNonMerging() || activeWindows.contains(window),
+        "Received timer %s for inactive window %s", timer, window);
 
     ReduceFn<K, InputT, OutputT, W>.Context context = contextFactory.base(window);
 
-    // If this timer firing is at the watermark, then it may cause a trigger firing of an
-    // AfterWatermark trigger.
-    boolean isAtWatermark = TimeDomain.EVENT_TIME == timer.getDomain()
-        && !timer.getTimestamp().isBefore(window.maxTimestamp());
-
-    if (shouldCleanup(timer, window)) {
-      // We're going to cleanup the window. We want to treat any potential output from this as
-      // the at-watermark firing if the current time is the at-watermark firing and there was a
-      // trigger waiting for it.
-      if (isAtWatermark) {
-        TriggerResult timerResult = runTriggersForTimer(context, timer);
-        isAtWatermark = (timerResult != null && timerResult.isFire());
+    // If this is an end-of-window timer then we should test if an AfterWatermark trigger
+    // will fire.
+    // It's fine if the window trigger has such trigger, this flag is only used to decide
+    // if an emitted pane should be classified as ON_TIME.
+    boolean isEndOfWindowTimer =
+        TimeDomain.EVENT_TIME == timer.getDomain()
+        && timer.getTimestamp().equals(window.maxTimestamp());
+
+    // If this is a garbage collection timer then we should trigger and garbage collect the window.
+    Instant cleanupTime = window.maxTimestamp().plus(windowingStrategy.getAllowedLateness());
+    boolean isGarbageCollection =
+        TimeDomain.EVENT_TIME == timer.getDomain() && timer.getTimestamp().equals(cleanupTime);
+
+    if (isGarbageCollection) {
+      WindowTracing.debug(
+          "onTimer: Cleaning up for key:{}; window:{} at {} with "
+          + "inputWatermark:{}; outputWatermark:{}",
+          key, window, timer.getTimestamp(), timerInternals.currentInputWatermarkTime(),
+          timerInternals.currentOutputWatermarkTime());
+
+      if (activeWindows.contains(window) && !triggerRunner.isClosed(context.state())) {
+        // We need to call onTrigger to emit the final pane if required.
+        // The final pane *may* be ON_TIME if:
+        // - AllowedLateness = 0 (ie the timer is at end-of-window), and;
+        // - The trigger fires on the end-of-window timer.
+        boolean isWatermarkTrigger =
+            isEndOfWindowTimer && runTriggersForTimer(context, timer).isFire();
+        onTrigger(context, isWatermarkTrigger, true/*isFinish*/);
       }
 
-      // Do the actual cleanup
+      // Clear all the state for this window since we'll never see elements for it again.
       try {
-        doCleanup(context, isAtWatermark);
+        clearAllState(context);
       } catch (Exception e) {
         Throwables.propagateIfInstanceOf(e, UserCodeException.class);
         throw new RuntimeException(
             "Exception while garbage collecting window " + windowNamespace.getWindow(), e);
       }
     } else {
+      WindowTracing.debug(
+          "onTimer: Triggering for key:{}; window:{} at {} with "
+          + "inputWatermark:{}; outputWatermark:{}",
+          key, window, timer.getTimestamp(), timerInternals.currentInputWatermarkTime(),
+          timerInternals.currentOutputWatermarkTime());
+      boolean isFinish = false;
       if (activeWindows.contains(window) && !triggerRunner.isClosed(context.state())) {
-        handleTriggerResult(context, isAtWatermark, runTriggersForTimer(context, timer));
+        TriggerResult result = runTriggersForTimer(context, timer);
+        handleTriggerResult(context, isEndOfWindowTimer, result);
+        isFinish = result.isFinish();
       }
 
-      if (TimeDomain.EVENT_TIME == timer.getDomain()
-          // If we processed an on-time firing, we should schedule the GC timer.
-          && timer.getTimestamp().isEqual(window.maxTimestamp())) {
-        scheduleCleanup(context);
+      if (isEndOfWindowTimer && !isFinish) {
+        // Since we are processing an on-time firing we should schedule the garbage collection
+        // timer. (If getAllowedLateness is zero then the timer event will be considered a
+        // cleanup event and handled by the above).
+        Preconditions.checkState(
+            windowingStrategy.getAllowedLateness().isLongerThan(Duration.ZERO),
+            "Unexpected zero getAllowedLateness");
+        WindowTracing.debug(
+            "onTimer: Scheduling cleanup timer for key:{}; window:{} at {} with "
+            + "inputWatermark:{}; outputWatermark:{}",
+            key, context.window(), cleanupTime, timerInternals.currentInputWatermarkTime(),
+            timerInternals.currentOutputWatermarkTime());
+        context.timers().setTimer(cleanupTime, TimeDomain.EVENT_TIME);
       }
     }
   }
 
-  /**
-   * Return true if either the timer looks like a cleanup timer or the current watermark is so far
-   * gone that we should cleanup the window.
-   */
-  private boolean shouldCleanup(TimerData timer, W window) {
-    return TimeDomain.EVENT_TIME == timer.getDomain()
-        && (isCleanupTime(window, timer.getTimestamp())
-            || isCleanupTime(window, timerInternals.currentWatermarkTime()));
-  }
-
-  @Nullable
   private TriggerResult runTriggersForTimer(
       ReduceFn<K, InputT, OutputT, W>.Context context, TimerData timer) {
-
     triggerRunner.prefetchForTimer(context.state());
 
-    // Skip timers for windows that were closed by triggers, but haven't expired yet.
-    if (triggerRunner.isClosed(context.state())) {
-      return null;
-    }
-
     try {
       return triggerRunner.onTimer(context, timer);
     } catch (Exception e) {
@@ -500,16 +522,17 @@ private TriggerResult runTriggersForTimer(
     }
   }
 
-  /** Called when the cleanup timer has fired for the given window. */
-  private void doCleanup(
-      ReduceFn<K, InputT, OutputT, W>.Context context, boolean maybeAtWatermark) throws Exception {
-    // If the window isn't closed, or if we should always fire a final pane, then trigger output
-    if (!triggerRunner.isClosed(context.state())
-        || windowingStrategy.getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS) {
-      onTrigger(context, maybeAtWatermark, true /* isFinal */);
-    }
-
-    // Cleanup the associated state.
+  /**
+   * Clear all the state associated with {@code context}'s window.
+   * Should only be invoked if we know all future elements for this window will be considered
+   * beyond allowed lateness.
+   * This is a superset of the clearing done by {@link #handleTriggerResult} below since:
+   * <ol>
+   * <li>We can clear the trigger state tombstone since we'll never need to ask about it again.
+   * <li>We can clear any remaining garbage collection hold.
+   * </ol>
+   */
+  private void clearAllState(ReduceFn<K, InputT, OutputT, W>.Context context) throws Exception {
     nonEmptyPanes.clearPane(context);
     try {
       reduceFn.clearState(context);
@@ -518,19 +541,39 @@ private void doCleanup(
     }
     triggerRunner.clearEverything(context);
     paneInfo.clear(context.state());
-    watermarkHold.releaseOnTime(context);
+    watermarkHold.clear(context);
+  }
+
+  /** Should the reduce function state be cleared? */
+  private boolean shouldDiscardAfterFiring(TriggerResult result) {
+    if (result.isFinish()) {
+      // This is the last firing for trigger.
+      return true;
+    }
+    if (windowingStrategy.getMode() == AccumulationMode.DISCARDING_FIRED_PANES) {
+      // Nothing should be accumulated between panes.
+      return true;
+    }
+    return false;
   }
 
-  private void handleTriggerResult(
-      ReduceFn<K, InputT, OutputT, W>.Context context,
-      boolean maybeAtWatermark, TriggerResult result) {
-    // Unless the trigger is firing, there is nothing to do.
+  /**
+   * Possibly emit a pane if a trigger fired or timers require it, and cleanup state.
+   */
+  private void handleTriggerResult(ReduceFn<K, InputT, OutputT, W>.Context context,
+      boolean isEndOfWindow, TriggerResult result) {
     if (!result.isFire()) {
+      // Ignore unless trigger fired.
       return;
     }
 
+    // If the trigger fired due to an end-of-window timer, treat it as an AfterWatermark trigger.
+    boolean isWatermarkTrigger = isEndOfWindow;
+
     // Run onTrigger to produce the actual pane contents.
-    onTrigger(context, maybeAtWatermark, result.isFinish());
+    // As a side effect it will clear all element holds, but not necessarily any
+    // end-of-window or garbage collection holds.
+    onTrigger(context, isWatermarkTrigger, result.isFinish());
 
     // Now that we've triggered, the pane is empty.
     nonEmptyPanes.clearPane(context);
@@ -550,10 +593,10 @@ private void handleTriggerResult(
 
     if (result.isFinish()) {
       // If we're finishing, clear up the trigger tree as well.
+      // However, we'll leave behind a tombstone so we know the trigger is finished.
       try {
         triggerRunner.clearState(context);
         paneInfo.clear(context.state());
-        watermarkHold.releaseFinal(context);
       } catch (Exception e) {
         Throwables.propagateIfPossible(e);
         throw new RuntimeException("Exception while clearing trigger state", e);
@@ -561,129 +604,108 @@ private void handleTriggerResult(
     }
   }
 
-  public static <T> StateContents<T> stateContentsOf(final T value) {
-    return new StateContents<T>() {
-      @Override
-      public T read() {
-        return value;
-      }
-    };
+  /**
+   * Do we need to emit a pane?
+   */
+  private boolean needToEmit(
+      boolean isEmpty, boolean isWatermarkTrigger, boolean isFinish, PaneInfo.Timing timing) {
+    if (!isEmpty) {
+      // The pane has elements.
+      return true;
+    }
+    if (isWatermarkTrigger && timing == Timing.ON_TIME) {
+      // This is the unique ON_TIME pane, triggered by an AfterWatermark.
+      return true;
+    }
+    if (isFinish && windowingStrategy.getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS) {
+      // This is known to be the final pane, and the user has requested it even when empty.
+      return true;
+    }
+    return false;
   }
 
   /**
    * Run the {@link ReduceFn#onTrigger} method and produce any necessary output.
    *
    * @param context the context for the pane to fire
-   * @param isAtWatermark true if this triggering is for an AfterWatermark trigger
-   * @param isFinal true if this will be the last triggering processed
+   * @param isWatermarkTrigger true if this triggering is for an AfterWatermark trigger
+   * @param isFinish true if this will be the last triggering processed
    */
   private void onTrigger(final ReduceFn<K, InputT, OutputT, W>.Context context,
-      boolean isAtWatermark, boolean isFinal) {
-    StateContents<Instant> outputTimestampFuture = watermarkHold.extractAndRelease(context);
+      boolean isWatermarkTrigger, boolean isFinish) {
+    // Collect state.
+    StateContents<Instant> outputTimestampFuture =
+        watermarkHold.extractAndRelease(context, isFinish);
     StateContents<PaneInfo> paneFuture =
-        paneInfo.getNextPaneInfo(context, isAtWatermark, isFinal);
+        paneInfo.getNextPaneInfo(context, isWatermarkTrigger, isFinish);
     StateContents<Boolean> isEmptyFuture = nonEmptyPanes.isEmpty(context);
 
     reduceFn.prefetchOnTrigger(context.state());
 
+    // Calculate the pane info.
     final PaneInfo pane = paneFuture.read();
+    // Extract the window hold, and as a side effect clear it.
     final Instant outputTimestamp = outputTimestampFuture.read();
 
-    boolean shouldOutput =
-        // If the pane is not empty
-        !isEmptyFuture.read()
-        // or this is the final pane, and the user has asked for it even if its empty
-        || (isFinal && windowingStrategy.getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS)
-        // or this is the on-time firing, and the user explicitly requested it.
-        || (isAtWatermark && pane.getTiming() == Timing.ON_TIME);
-
-    // We've consumed the empty pane hold by reading it, so reinstate that, if necessary.
-    if (!isFinal) {
-      holdForEmptyPanes(context);
-    }
-
-    // If there is nothing to output, we're done.
-    if (!shouldOutput) {
-      return;
-    }
-
-    // Run reduceFn.onTrigger method.
-    final List<W> windows = Collections.singletonList(context.window());
-    ReduceFn<K, InputT, OutputT, W>.OnTriggerContext triggerContext = contextFactory.forTrigger(
-        context.window(), paneFuture, new OnTriggerCallbacks<OutputT>() {
-          @Override
-          public void output(OutputT toOutput) {
-            // We're going to output panes, so commit the (now used) PaneInfo.
-            paneInfo.storeCurrentPaneInfo(context, pane);
-
-            // Output the actual value.
-            windowingInternals.outputWindowedValue(
-                KV.of(key, toOutput), outputTimestamp, windows, pane);
-          }
-    });
+    // Only emit a pane if it has data or empty panes are observable.
+    if (needToEmit(isEmptyFuture.read(), isWatermarkTrigger, isFinish, pane.getTiming())) {
+      // Run reduceFn.onTrigger method.
+      final List<W> windows = Collections.singletonList(context.window());
+      ReduceFn<K, InputT, OutputT, W>.OnTriggerContext triggerContext = contextFactory.forTrigger(
+          context.window(), paneFuture, new OnTriggerCallbacks<OutputT>() {
+            @Override
+            public void output(OutputT toOutput) {
+              // We're going to output panes, so commit the (now used) PaneInfo.
+              // TODO: Unnecessary if isFinal?
+              paneInfo.storeCurrentPaneInfo(context, pane);
+
+              // Output the actual value.
+              windowingInternals.outputWindowedValue(
+                  KV.of(key, toOutput), outputTimestamp, windows, pane);
+            }
+          });
 
-    try {
-      reduceFn.onTrigger(triggerContext);
-    } catch (Exception e) {
-      throw wrapMaybeUserException(e);
+      try {
+        reduceFn.onTrigger(triggerContext);
+      } catch (Exception e) {
+        throw wrapMaybeUserException(e);
+      }
     }
   }
 
-  private Instant cleanupTime(W window) {
-    return window.maxTimestamp().plus(windowingStrategy.getAllowedLateness());
-  }
-
-  /** Return true if {@code timestamp} is past the cleanup time for {@code window}. */
-  private boolean isCleanupTime(W window, Instant timestamp) {
-    return !timestamp.isBefore(cleanupTime(window));
-  }
-
-  private void scheduleCleanup(ReduceFn<?, ?, ?, W>.Context context) {
-    if (timerInternals.currentWatermarkTime().isAfter(context.window().maxTimestamp())) {
-      context.timers().setTimer(cleanupTime(context.window()), TimeDomain.EVENT_TIME);
-    } else {
-      context.timers().setTimer(context.window().maxTimestamp(), TimeDomain.EVENT_TIME);
+  private void scheduleEndOfWindowOrGarbageCollectionTimer(ReduceFn<?, ?, ?, W>.Context context) {
+    Instant fireTime = context.window().maxTimestamp();
+    String which = "end-of-window";
+    Instant inputWM = timerInternals.currentInputWatermarkTime();
+    if (inputWM != null && fireTime.isBefore(inputWM)) {
+      fireTime = fireTime.plus(windowingStrategy.getAllowedLateness());
+      which = "garbage collection";
+      Preconditions.checkState(!fireTime.isBefore(inputWM),
+          "Asking to set a timer at %s behind input watermark %s", fireTime, inputWM);
     }
+    WindowTracing.trace(
+        "scheduleTimer: Scheduling {} timer at {} for "
+        + "key:{}; window:{} where inputWatermark:{}; outputWatermark:{}",
+        which, fireTime, key, context.window(), inputWM,
+        timerInternals.currentOutputWatermarkTime());
+    context.timers().setTimer(fireTime, TimeDomain.EVENT_TIME);
   }
 
-  private void cancelCleanup(ReduceFn<?, ?, ?, W>.Context context) {
-    context.timers().deleteTimer(cleanupTime(context.window()), TimeDomain.EVENT_TIME);
-    context.timers().deleteTimer(context.window().maxTimestamp(), TimeDomain.EVENT_TIME);
-  }
-
-  private boolean shouldDiscardAfterFiring(TriggerResult result) {
-    return result.isFinish()
-        || (result.isFire()
-            && AccumulationMode.DISCARDING_FIRED_PANES == windowingStrategy.getMode());
-  }
-
-  //////////////////////////////////////////////////////////////////////////////////////////////////
-
-  private enum Lateness {
-    NOT_LATE(false, false),
-    LATE(true, false),
-    PAST_ALLOWED_LATENESS(true, true);
-
-    private final boolean isLate;
-    private final boolean isPastAllowedLateness;
-
-    private Lateness(boolean isLate, boolean isPastAllowedLateness) {
-      this.isLate = isLate;
-      this.isPastAllowedLateness = isPastAllowedLateness;
+  private void cancelEndOfWindowAndGarbageCollectionTimers(ReduceFn<?, ?, ?, W>.Context context) {
+    WindowTracing.debug(
+        "cancelTimer: Deleting timers for "
+        + "key:{}; window:{} where inputWatermark:{}; outputWatermark:{}",
+        key, context.window(), timerInternals.currentInputWatermarkTime(),
+        timerInternals.currentOutputWatermarkTime());
+    Instant timer = context.window().maxTimestamp();
+    context.timers().deleteTimer(timer, TimeDomain.EVENT_TIME);
+    if (windowingStrategy.getAllowedLateness().isLongerThan(Duration.ZERO)) {
+      timer = timer.plus(windowingStrategy.getAllowedLateness());
+      context.timers().deleteTimer(timer, TimeDomain.EVENT_TIME);
     }
   }
 
-  private Lateness getLateness(Instant timestamp) {
-    Instant latestAllowed =
-        timerInternals.currentWatermarkTime().minus(windowingStrategy.getAllowedLateness());
-    if (timestamp.isBefore(latestAllowed)) {
-      return Lateness.PAST_ALLOWED_LATENESS;
-    } else if (timestamp.isBefore(timerInternals.currentWatermarkTime())) {
-      return Lateness.LATE;
-    } else {
-      return Lateness.NOT_LATE;
-    }
-  }
+  //////////////////////////////////////////////////////////////////////////////////////////////////
 
   private RuntimeException wrapMaybeUserException(Throwable t) {
     if (reduceFn instanceof SystemReduceFn) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
index 522fbf64afd8a..ff7576cd88831 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
@@ -23,6 +23,8 @@
 
 import java.util.Objects;
 
+import javax.annotation.Nullable;
+
 /**
  * Encapsulate interaction with time within the execution environment.
  *
@@ -51,9 +53,68 @@ public interface TimerInternals {
   Instant currentProcessingTime();
 
   /**
-   * Returns an estimate of the current timestamp in the {@link TimeDomain#EVENT_TIME} time domain.
+   * Return the current, local input watermark timestamp for this computation
+   * in the {@link TimeDomain#EVENT_TIME} time domain. Return {@code null} if unknown.
+   *
+   * <p>This value:
+   * <ol>
+   * <li>Is monotonically increasing.
+   * <li>May differ between workers due to network and other delays.
+   * <li>Will never be ahead of the global input watermark for this computation. But it
+   * may be arbitrarily behind the global input watermark.
+   * <li>Any element with a timestamp before the local input watermark can be considered
+   * 'locally late' and be subject to special processing or be dropped entirely.
+   * </ol>
+   *
+   * <p>Note that because the local input watermark can be behind the global input watermark,
+   * it is possible for an element to be considered locally on-time even though it is
+   * globally late.
+   */
+  @Nullable
+  Instant currentInputWatermarkTime();
+
+  /**
+   * Return the current, local output watermark timestamp for this computation
+   * in the {@link TimeDomain#EVENT_TIME} time domain. Return {@code null} if unknown.
+   *
+   * <p>This value:
+   * <ol>
+   * <li>Is monotonically increasing.
+   * <li>Will never be ahead of {@link #currentInputWatermarkTime} as returned above.
+   * <li>May differ between workers due to network and other delays.
+   * <li>However will never be behind the global input watermark for any following computation.
+   * </ol>
+   *
+   * <p> In pictures:
+   * <pre>
+   *  |              |       |       |       |
+   *  |              |   D   |   C   |   B   |   A
+   *  |              |       |       |       |
+   * GIWN     <=    GOWM <= LOWM <= LIWM <= GIWM
+   * (next stage)
+   * -------------------------------------------------> event time
+   * </pre>
+   * where
+   * <ul>
+   * <li> LOWM = local output water mark.
+   * <li> GOWM = global output water mark.
+   * <li> GIWM = global input water mark.
+   * <li> LIWM = local input water mark.
+   * <li> A = A globally on-time element.
+   * <li> B = A globally late, but locally on-time element.
+   * <li> C = A locally late element which may still contribute to the timestamp of a pane.
+   * <li> D = A locally late element which cannot contribute to the timestamp of a pane.
+   * </ul>
+   *
+   * <p>Note that if a computation emits an element which is not before the current output watermark
+   * then that element will always appear locally on-time in all following computations. However,
+   * it is possible for an element emitted before the current output watermark to appear locally
+   * on-time in a following computation. Thus we must be careful to never assume locally late data
+   * viewed on the output of a computation remains locally late on the input of a following
+   * computation.
    */
-  Instant currentWatermarkTime();
+  @Nullable
+  Instant currentOutputWatermarkTime();
 
   /**
    * Data about a timer as represented within {@link TimerInternals}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
index d480369b52e65..cee3eeb0b95ed 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
@@ -17,13 +17,14 @@
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -31,114 +32,383 @@
 import java.io.Serializable;
 
 /**
- * Implements the logic needed to hold the output watermark back to emit
- * values with specific timestamps.
+ * Implements the logic needed to hold the output watermark for a computation back
+ * until it has seen all the elements it needs based on the input watermark for the
+ * computation.
+ *
+ * <p>The backend ensures the output watermark can never progress beyond the
+ * input watermark for a computation. GroupAlsoByWindows computations may add a 'hold'
+ * to the output watermark in order to prevent it progressing beyond a time within a window.
+ * The hold will be 'cleared' when the associated pane is emitted.
  *
  * @param <W> The kind of {@link BoundedWindow} the hold is for.
  */
 public class WatermarkHold<W extends BoundedWindow> implements Serializable {
-
-  /** Watermark hold used for the actual data-based hold. */
-  @VisibleForTesting static final String DATA_HOLD_ID = "hold";
-  @VisibleForTesting static StateTag<WatermarkStateInternal> watermarkHoldTagForOutputTimeFn(
+  /**
+   * Return tag for state containing the output watermark hold
+   * used for elements.
+   */
+  public static StateTag<WatermarkStateInternal> watermarkHoldTagForOutputTimeFn(
       OutputTimeFn<?> outputTimeFn) {
-    return StateTags.makeSystemTagInternal(
-        StateTags.watermarkStateInternal(DATA_HOLD_ID, outputTimeFn));
+    return StateTags.makeSystemTagInternal(StateTags.watermarkStateInternal("hold", outputTimeFn));
   }
 
+  /**
+   * Tag for state containing end-of-window and garbage collection output watermark holds.
+   * (We can't piggy-back on the data hold state since the outputTimeFn may be
+   * {@link OutputTimeFns#outputAtLatestInputTimestamp()}, in which case every pane will
+   * would take the end-of-window time as its element time.
+   */
+  @VisibleForTesting
+  public static final StateTag<WatermarkStateInternal> EXTRA_HOLD_TAG =
+      StateTags.makeSystemTagInternal(StateTags.watermarkStateInternal(
+          "extra", OutputTimeFns.outputAtEarliestInputTimestamp()));
+
+  private final TimerInternals timerInternals;
   private final WindowingStrategy<?, W> windowingStrategy;
-  private final StateTag<WatermarkStateInternal> watermarkHoldTag;
+  private final StateTag<WatermarkStateInternal> elementHoldTag;
 
-  public WatermarkHold(WindowingStrategy<?, W> windowingStrategy) {
+  public WatermarkHold(TimerInternals timerInternals, WindowingStrategy<?, W> windowingStrategy) {
+    this.timerInternals = timerInternals;
     this.windowingStrategy = windowingStrategy;
-    this.watermarkHoldTag = watermarkHoldTagForOutputTimeFn(windowingStrategy.getOutputTimeFn());
+    this.elementHoldTag = watermarkHoldTagForOutputTimeFn(windowingStrategy.getOutputTimeFn());
   }
 
   /**
-   * Update the watermark hold to include the appropriate output timestamp for the value in
-   * {@code c}.
+   * Add a hold to prevent the output watermark progressing beyond the (possibly adjusted) timestamp
+   * of the element in {@code context}. We allow the actual hold time to be shifted later by
+   * {@link OutputTimeFn#assignOutputTime}, but no further than the end of the window. The hold will
+   * remain until cleared by {@link #extractAndRelease}.
+   *
+   * <p>In the following we'll write {@code E} to represent an element, {@code IWM} for
+   * the local input watermark, {@code OWM} for the local output watermark, and {@code GCWM} for
+   * the garbage collection watermark (which is at {@code IWM - getAllowedLateness}). Time
+   * progresses from left to right, and we write {@code [ ... ]} to denote a bounded window with
+   * implied lower bound.
+   *
+   * <p>Note that the GCWM will be the same as the IWM if {@code getAllowedLateness}
+   * is {@code ZERO}.
+   *
+   * <p>Here are the cases we need to handle. They are conceptually considered in the
+   * sequence written since if getAllowedLateness is ZERO the GCWM is the same as the IWM.
+   * <ol>
+   * <li>(Normal)
+   * <pre>
+   *          |
+   *      [   | E        ]
+   *          |
+   *         IWM
+   * </pre>
+   * This is, hopefully, the common and happy case. The element is locally on-time and can
+   * definitely make it to an {@code ON_TIME} pane which we can still set an end-of-window timer
+   * for. We place an element hold at E which will become the {@code ON_TIME} pane's timestamp
+   * if it is the earliest such hold. (Thus the OWM will not proceed past E until the next pane
+   * fires). We also place an end-of-window and (if required) garbage collection hold in case
+   * this is the first element seen for the window.
+   *
+   * <li>(Discard - no target window)
+   * <pre>
+   *                       |                            |
+   *      [     E        ] |                            |
+   *                       |                            |
+   *                     GCWM  <-getAllowedLateness->  IWM
+   * </pre>
+   * The element is very locally late. The window has been garbage collected, thus there
+   * is no target pane E could be assigned to. We discard E.
+   *
+   * <li>(Discard - beyond allowed lateness)
+   * <pre>
+   *               |                            |
+   *      [     E  |     ]                      |
+   *               |                            |
+   *             GCWM  <-getAllowedLateness->  IWM
+   * </pre>
+   * The element is very locally late, and the window is very close to being garbage collected, at
+   * which point a final {@code LATE} pane could be emitted. We *could* attempt to capture E within
+   * that pane, however that requires checking against all possible windows which may contain E.
+   * We instead discard E.
+   *
+   * <li>(Unobservably late)
+   * <pre>
+   *          |    |
+   *      [   | E  |     ]
+   *          |    |
+   *         OWM  IWM
+   * </pre>
+   * The element is locally late, however we can still treat this case as for 'Normal' above
+   * since the IWM has not yet passed the end of the window and the element is ahead of the
+   * OWM. In effect, we get to 'launder' the locally late element and consider it as locally
+   * on-time because no downstream computation can observe the difference.
+   *
+   * <li>(Input Late)
+   * <pre>
+   *          |            |
+   *      [   | E        ] |
+   *          |            |
+   *         OWM          IWM
+   * </pre>
+   * The end-of-window timer may have already fired for this window, and thus an {@code ON_TIME}
+   * pane may have already been emitted. We can still place an element hold, which will be
+   * cleared when the next pane fires (which could be {@code ON_TIME} or {@code LATE}). We
+   * should not place an end-of-window hold since we cannot guarantee it will be cleared until
+   * the garbage collection timer fires. We can still place a garbage collection hold if required.
+   *
+   * <li>(Possibly unobservably late - 1)
+   * <pre>
+   *               |   |
+   *      [     E  |   | ]
+   *               |   |
+   *              OWM IWM
+   * </pre>
+   * The element is too late to contribute to the output watermark hold, and thus won't
+   * contribute the any pane's timestamp. We don't know if a hold has been placed at or later
+   * than the OWM for this window. Thus we can't be sure E will make an {@code ON_TIME} pane,
+   * even though we know the end-of-window timer is yet to fire. We can still place an
+   * end-of-window hold, and a garbage collection hold if required.
+   *
+   * <li>(Possibly unobservably late - 2)
+   * <pre>
+   *               |       |
+   *      [     E  |     ] |
+   *               |       |
+   *              OWM     IWM
+   * </pre>
+   * As for the previous case, however we don't even know if the end-of-window timer has already
+   * fired, or it is about to fire. We can place only the garbage collection hold, if required.
    *
-   * <p>If the value was not late, then the input watermark must be less than the timestamp, and we
-   * can use {@link WindowFn#getOutputTime} to determine the appropriate output time.
+   * <li>(Definitely late)
+   * <pre>
+   *                       |   |
+   *      [     E        ] |   |
+   *                       |   |
+   *                      OWM IWM
+   * </pre>
+   * The element is definitely too late to make an {@code ON_TIME} pane. We are too late to
+   * place an end-of-window hold. We can still place a garbage collection hold if required.
    *
-   * <p>If the value was late, we pessimistically assume the worst and attempt to hold the watermark
-   * to {@link BoundedWindow#maxTimestamp()} plus {@link WindowingStrategy#getAllowedLateness()}.
-   * That allows us to output the result at {@link BoundedWindow#maxTimestamp()} without being
-   * dropped.
+   * </ol>
    */
-  public void addHold(ReduceFn<?, ?, ?, W>.ProcessValueContext c, boolean isLate) {
-    Instant outputTime = isLate
-        ? c.window().maxTimestamp().plus(windowingStrategy.getAllowedLateness())
-        : windowingStrategy.getOutputTimeFn().assignOutputTime(c.timestamp(), c.window());
-    c.state().access(watermarkHoldTag).add(outputTime);
+  public void addHolds(ReduceFn<?, ?, ?, W>.ProcessValueContext context) {
+    if (!addElementHold(context)) {
+      addEndOfWindowOrGarbageCollectionHolds(context);
+    }
+  }
+
+  /**
+   * Return {@code timestamp}, possibly shifted forward in time according to the window
+   * strategy's output time function.
+   */
+  private Instant shift(Instant timestamp, W window) {
+    Instant shifted = windowingStrategy.getOutputTimeFn().assignOutputTime(timestamp, window);
+    if (shifted.isBefore(timestamp)) {
+      throw new IllegalStateException(
+          String.format("OutputTimeFn moved element from %s to earlier time %s for window %s",
+              timestamp, shifted, window));
+    }
+    if (!timestamp.isAfter(window.maxTimestamp()) && shifted.isAfter(window.maxTimestamp())) {
+      throw new IllegalStateException(
+          String.format("OutputTimeFn moved element from %s to %s which is beyond end of window %s",
+              timestamp, shifted, window));
+    }
+
+    return shifted;
+  }
+
+  /**
+   * Add an element hold if possible. Return true if was added, false if too late to add.
+   */
+  private boolean addElementHold(ReduceFn<?, ?, ?, W>.ProcessValueContext context) {
+    // Give the window function a chance to move the hold timestamp forward to encourage progress.
+    // (A later hold implies less impediment to the output watermark making progress, which in
+    // turn encourages end-of-window triggers to fire earlier in following computations.)
+    Instant elementHold = shift(context.timestamp(), context.window());
+
+    Instant outputWM = timerInternals.currentOutputWatermarkTime();
+    Instant inputWM = timerInternals.currentInputWatermarkTime();
+
+    Instant garbageWM =
+        inputWM == null ? null : inputWM.minus(windowingStrategy.getAllowedLateness());
+    Preconditions.checkState(garbageWM == null || !elementHold.isBefore(garbageWM),
+        "Shifted timestamp %s cannot be beyond garbage collection watermark %s", elementHold,
+        garbageWM);
+
+    // Only add the hold if we can be sure the backend will be able to respect it.
+    boolean tooLate;
+    if (outputWM != null && elementHold.isBefore(outputWM)) {
+      tooLate = true;
+    } else {
+      tooLate = false;
+      context.state().access(elementHoldTag).add(elementHold);
+    }
+    WindowTracing.trace(
+        "WatermarkHold.addHolds: element hold at {} is {} for "
+        + "key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
+        elementHold, tooLate ? "too late" : "on-time", context.key(), context.window(), inputWM,
+        outputWM);
+
+    return !tooLate;
+  }
+
+  /**
+   * Add an end-of-window hold or, if too late for that, a garbage collection hold (if required).
+   *
+   * <p>The end-of-window hold guarantees that an empty {@code ON_TIME} pane can be given
+   * a timestamp which will not be considered beyond allowed lateness by any downstream computation.
+   */
+  private void addEndOfWindowOrGarbageCollectionHolds(ReduceFn<?, ?, ?, W>.Context context) {
+    if (!addEndOfWindowHold(context)) {
+      addGarbageCollectionHold(context);
+    }
+  }
+
+  /**
+   * Add an end-of-window hold. Return true if was added, false if too late to add.
+   *
+   * <p>The end-of-window hold guarantees that any empty {@code ON_TIME} pane can be given
+   * a timestamp which will not be considered beyond allowed lateness by any downstream computation.
+   */
+  private boolean addEndOfWindowHold(ReduceFn<?, ?, ?, W>.Context context) {
+    // Only add an end-of-window hold if we can be sure the end-of-window timer
+    // has not yet fired. Otherwise we risk holding up the output watermark until
+    // the garbage collection timer fires, which may be a very long time in the future.
+    Instant outputWM = timerInternals.currentOutputWatermarkTime();
+    Instant inputWM = timerInternals.currentInputWatermarkTime();
+    boolean tooLate;
+    Instant eowHold = context.window().maxTimestamp();
+    if (inputWM != null && eowHold.isBefore(inputWM)) {
+      tooLate = true;
+    } else {
+      tooLate = false;
+      Preconditions.checkState(outputWM == null || !eowHold.isBefore(outputWM),
+          "End-of-window hold %s cannot be before output watermark %s", eowHold, outputWM);
+      context.state().access(EXTRA_HOLD_TAG).add(eowHold);
+    }
+    WindowTracing.trace(
+        "WatermarkHold.addEndOfWindowHold: end-of-window hold at {} is {} for "
+        + "key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
+        eowHold, tooLate ? "too late" : "on-time", context.key(), context.window(), inputWM,
+        outputWM);
+    return !tooLate;
+  }
+
+  /**
+   * Add a garbage collection hold, if required.
+   *
+   * <p>The garbage collection hold gurantees that any empty final pane can be given
+   * a timestamp which will not be considered beyond allowed lateness by any downstream
+   * computation. If we are sure no empty final panes can be emitted then there's no need
+   * for an additional hold.
+   */
+  private void addGarbageCollectionHold(ReduceFn<?, ?, ?, W>.Context context) {
+    // Only add a garbage collection hold if we are sure we need an empty final pane and
+    // the window will be garbage collected after the end-of-window trigger.
+    if (context.windowingStrategy().getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS
+        && windowingStrategy.getAllowedLateness().isLongerThan(Duration.ZERO)) {
+      Instant gcHold = context.window().maxTimestamp().plus(windowingStrategy.getAllowedLateness());
+      Instant outputWM = timerInternals.currentOutputWatermarkTime();
+      Instant inputWM = timerInternals.currentInputWatermarkTime();
+      WindowTracing.trace(
+          "WatermarkHold.addGarbageCollectionHold: garbage collection hold at {} for "
+          + "key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
+          gcHold, context.key(), context.window(), inputWM, outputWM);
+      Preconditions.checkState(inputWM == null || !gcHold.isBefore(inputWM),
+          "Garbage collection hold %s cannot be before input watermark %s", gcHold, inputWM);
+      context.state().access(EXTRA_HOLD_TAG).add(gcHold);
+    }
   }
 
   /**
    * Updates the watermark hold when windows merge. For example, if the new window implies
    * a later watermark hold, then earlier holds may be released.
    */
-  public void mergeHolds(final ReduceFn<?, ?, ?, W>.OnMergeContext c) {
+  public void mergeHolds(final ReduceFn<?, ?, ?, W>.OnMergeContext context) {
+    WindowTracing.debug("mergeHolds: for key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
+        context.key(), context.window(), timerInternals.currentInputWatermarkTime(),
+        timerInternals.currentOutputWatermarkTime());
     // If the output hold depends only on the window, then there may not be a hold in place
     // for the new merged window, so add one.
     if (windowingStrategy.getOutputTimeFn().dependsOnlyOnWindow()) {
       Instant arbitraryTimestamp = new Instant(0);
-      c.state().access(watermarkHoldTag).add(
-          windowingStrategy.getOutputTimeFn().assignOutputTime(
-              arbitraryTimestamp,
-              c.window()));
+      context.state()
+          .access(elementHoldTag)
+          .add(windowingStrategy.getOutputTimeFn().assignOutputTime(
+              arbitraryTimestamp, context.window()));
     }
 
-    c.state().accessAcrossMergedWindows(watermarkHoldTag).releaseExtraneousHolds();
+    context.state().accessAcrossMergedWindows(elementHoldTag).releaseExtraneousHolds();
+    context.state().accessAcrossMergedWindows(EXTRA_HOLD_TAG).releaseExtraneousHolds();
   }
 
   /**
-   * Returns the combined timestamp at which the output watermark was being held and releases
-   * the hold.
+   * Return (a future for) the earliest data hold for {@code context}. Clear the data hold after
+   * reading. If {@code isFinal}, also clear any end-of-window or garbage collection hold.
    *
    * <p>The returned timestamp is the output timestamp according to the {@link OutputTimeFn}
    * from the windowing strategy of this {@link WatermarkHold}, combined across all the non-late
    * elements in the current pane.
    */
-  public StateContents<Instant> extractAndRelease(final ReduceFn<?, ?, ?, W>.Context c) {
-    final WatermarkStateInternal dataHold = c.state().accessAcrossMergedWindows(watermarkHoldTag);
-    final StateContents<Instant> holdFuture = dataHold.get();
+  public StateContents<Instant> extractAndRelease(
+      final ReduceFn<?, ?, ?, W>.Context context, final boolean isFinal) {
+    WindowTracing.debug(
+        "extractAndRelease: for key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
+        context.key(), context.window(), timerInternals.currentInputWatermarkTime(),
+        timerInternals.currentOutputWatermarkTime());
+    final WatermarkStateInternal elementHoldState =
+        context.state().accessAcrossMergedWindows(elementHoldTag);
+    final StateContents<Instant> elementHoldFuture = elementHoldState.get();
+    final WatermarkStateInternal extraHoldState =
+        context.state().accessAcrossMergedWindows(EXTRA_HOLD_TAG);
+    final StateContents<Instant> extraHoldFuture = extraHoldState.get();
     return new StateContents<Instant>() {
       @Override
       public Instant read() {
-        Instant hold = holdFuture.read();
-        if (hold == null || hold.isAfter(c.window().maxTimestamp())) {
-          hold = c.window().maxTimestamp();
+        // Read both the element and extra holds.
+        Instant elementHold = elementHoldFuture.read();
+        Instant extraHold = extraHoldFuture.read();
+        Instant hold;
+        // Find the minimum, accounting for null.
+        if (elementHold == null) {
+          hold = extraHold;
+        } else if (extraHold == null) {
+          hold = elementHold;
+        } else if (elementHold.isBefore(extraHold)) {
+          hold = elementHold;
+        } else {
+          hold = extraHold;
+        }
+        if (hold == null || hold.isAfter(context.window().maxTimestamp())) {
+          // If no hold (eg because all elements came in behind the output watermark), or
+          // the hold was for garbage collection, take the end of window as the result.
+          WindowTracing.debug(
+              "WatermarkHold.extractAndRelease.read: clipping from {} to end of window "
+              + "for key:{}; window:{}",
+              hold, context.key(), context.window());
+          hold = context.window().maxTimestamp();
         }
+        WindowTracing.debug("WatermarkHold.extractAndRelease.read: clearing for key:{}; window:{}",
+            context.key(), context.window());
 
         // Clear the underlying state to allow the output watermark to progress.
-        dataHold.clear();
+        elementHoldState.clear();
+        extraHoldState.clear();
+
+        // Reinstate the end-of-window and garbage collection holds if still required.
+        if (!isFinal) {
+          addEndOfWindowOrGarbageCollectionHolds(context);
+        }
 
         return hold;
       }
     };
   }
 
-  public void holdForOnTime(final ReduceFn<?, ?, ?, W>.Context c) {
-    c.state().accessAcrossMergedWindows(watermarkHoldTag).add(c.window().maxTimestamp());
-  }
-
-  public void holdForFinal(final ReduceFn<?, ?, ?, W>.Context c) {
-    if (c.windowingStrategy().getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS) {
-      c.state().accessAcrossMergedWindows(watermarkHoldTag)
-           .add(c.window().maxTimestamp().plus(c.windowingStrategy().getAllowedLateness()));
-    }
-  }
-
-  public void releaseOnTime(final ReduceFn<?, ?, ?, W>.Context c) {
-    c.state().accessAcrossMergedWindows(watermarkHoldTag).clear();
-
-    if (c.windowingStrategy().getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS
-        && c.windowingStrategy().getAllowedLateness().isLongerThan(Duration.ZERO)) {
-      holdForFinal(c);
-    }
-  }
-
-  public void releaseFinal(final ReduceFn<?, ?, ?, W>.Context c) {
-    c.state().accessAcrossMergedWindows(watermarkHoldTag).clear();
+  /** Clear any remaining holds. */
+  public void clear(ReduceFn<?, ?, ?, W>.Context context) {
+    WindowTracing.debug(
+        "WatermarkHold.clear: for key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
+        context.key(), context.window(), timerInternals.currentInputWatermarkTime(),
+        timerInternals.currentOutputWatermarkTime());
+    context.state().accessAcrossMergedWindows(elementHoldTag).clear();
+    context.state().accessAcrossMergedWindows(EXTRA_HOLD_TAG).clear();
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowTracing.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowTracing.java
new file mode 100644
index 0000000000000..6ae2f4206c484
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowTracing.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Logging for window operations. Generally only feasible to enable on hand-picked pipelines.
+ */
+public final class WindowTracing {
+  private static final Logger LOG = LoggerFactory.getLogger(WindowTracing.class);
+
+  public static void debug(String format, Object... args) {
+    LOG.debug(format, args);
+  }
+
+  @SuppressWarnings("unused")
+  public static void trace(String format, Object... args) {
+    LOG.trace(format, args);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkStateInternal.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkStateInternal.java
index ec9c492417681..7a1e416aaacee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkStateInternal.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkStateInternal.java
@@ -124,11 +124,17 @@ public Boolean read() {
   @Override
   public void releaseExtraneousHolds() {
     if (outputTimeFn.dependsOnlyOnEarliestInputTimestamp()) {
-      // No need to do anything; the merged watermark state will hold to the earliest
-      // due to semantics of watermark holds.
+      // The backend is implicitly already holding the output watermark to
+      // the minimum of all holds in all merged windows. Therefore, we don't need to
+      // explicitly change it.
+      // When the final (post merged) session window fires, we will collect all holds
+      // over all intermediate (pre merged) windows, take their min, and clear them.
+      // Therefore we also don't need to garbage collect any state here.
     } else {
-      // In all other cases, get() implements the necessary combining logic, and actually
-      // performs compaction that releases the watermark.
+      // The output time function may be able to move the hold forward. get() implements
+      // the necessary combining logic, and as a side effect will compact the hold
+      // in Windmill state. This ensures Windmill's output watermark can progress, and
+      // there is no stale hold left behind.
       get().read();
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
index 8fe1681c3c970..4c6af1ae32d18 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
@@ -115,9 +115,9 @@ public static <T> StateTag<BagState<T>> bag(String id, Coder<T> elemCoder) {
   /**
    * Create a state tag for holding the watermark.
    */
-  public static <T> StateTag<WatermarkStateInternal> watermarkStateInternal(
-      String id, OutputTimeFn<?> outputTimeFn) {
-    return new WatermarkStateTagInternal(new StructuredId(id), outputTimeFn);
+  public static <T, W extends BoundedWindow> StateTag<WatermarkStateInternal>
+      watermarkStateInternal(String id, OutputTimeFn<W> outputTimeFn) {
+    return new WatermarkStateTagInternal<W>(new StructuredId(id), outputTimeFn);
   }
 
   /**
@@ -357,7 +357,8 @@ private static class WatermarkStateTagInternal<W extends BoundedWindow>
 
     /**
      * When multiple output times are added to hold the watermark, this determines how they are
-     * combined, and also the behavior when merging windows.
+     * combined, and also the behavior when merging windows. Does not contribute to equality/hash
+     * since we have at most one watermark hold tag per computation.
      */
     private final OutputTimeFn<? super W> outputTimeFn;
 
@@ -381,9 +382,8 @@ public boolean equals(Object obj) {
         return false;
       }
 
-      WatermarkStateTagInternal that = (WatermarkStateTagInternal) obj;
-      return Objects.equals(this.id, that.id)
-          && Objects.equals(this.outputTimeFn, that.outputTimeFn);
+      WatermarkStateTagInternal<?> that = (WatermarkStateTagInternal<?>) obj;
+      return Objects.equals(this.id, that.id);
     }
 
     @Override
@@ -393,7 +393,7 @@ public int hashCode() {
 
     @Override
     protected StateTag<WatermarkStateInternal> asKind(StateKind kind) {
-      return new WatermarkStateTagInternal(id.asKind(kind), outputTimeFn);
+      return new WatermarkStateTagInternal<W>(id.asKind(kind), outputTimeFn);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java
index 29ed90b0b8141..9521088185670 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java
@@ -17,7 +17,7 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
-import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
 
 import org.joda.time.Instant;
 
@@ -31,15 +31,8 @@
 public interface WatermarkStateInternal extends MergeableState<Instant, Instant> {
 
   /**
-   * Release any holds that have become extraneous so they do not prevent progress of the
-   * output watermark.
-   *
-   * <p>For example, when using {@link OutputTimeFns#outputAtEndOfWindow()}, there will be holds
-   * in place at the end of every initial window that merges into the result window. These holds
-   * need to be released. It is implementation-dependent how (or whether) this happens.
-   *
-   * <p>This method is permitted to be "best effort" but should always try to release holds
-   * as far as possible to allow the output watermark to make progress.
+   * Release all holds for windows which have been merged away and incorporate their
+   * combined values (according to {@link OutputTimeFn#merge}) into the result window hold.
    */
   void releaseExtraneousHolds();
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
index cb7a5c31401a9..e7541f2397622 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
@@ -636,7 +636,8 @@ public void testReadUnboundedReader() throws Exception {
               .setSourceState(
                   Windmill.SourceState.newBuilder().setState(state).build()) // Source state.
               .build(),
-          new Instant(0),
+          new Instant(0), // input watermark
+          null, // output watermark
           null, // StateReader
           null, // StateFetcher
           Windmill.WorkItemCommitRequest.newBuilder());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemTest.java
index d4498cff80d8c..60a151c1dd51c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemTest.java
@@ -44,7 +44,6 @@
 import java.io.IOException;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.concurrent.TimeUnit;
 
 /** Tests for {@link KeyedWorkItem}. */
 @RunWith(JUnit4.class)
@@ -104,7 +103,7 @@ private void addElement(
     ByteString encodedMetadata =
         WindmillSink.encodeMetadata(WINDOWS_CODER, Collections.singletonList(window), pane);
     chunk.addMessagesBuilder()
-        .setTimestamp(TimeUnit.MILLISECONDS.toMicros(timestamp))
+        .setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(new Instant(timestamp)))
         .setData(ByteString.copyFromUtf8(value))
         .setMetadata(encodedMetadata);
   }
@@ -142,7 +141,7 @@ private static Windmill.Timer makeSerializedTimer(
     return Windmill.Timer.newBuilder()
         .setTag(ByteString.copyFromUtf8(
             ns.stringKey() + "+" + type + "-" + timestamp))
-        .setTimestamp(TimeUnit.MILLISECONDS.toMicros(timestamp))
+        .setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(new Instant(timestamp)))
         .setType(type)
         .setStateFamily(STATE_FAMILY)
         .build();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 938975cd3e405..0c56886d07dbd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -779,8 +779,10 @@ public void testMergeWindows() throws Exception {
     ByteString timerTag = ByteString.copyFromUtf8(window + "+0:999"); // GC timer just has window
     ByteString bufferTag = ByteString.copyFromUtf8(window + "+sbuf");
     ByteString paneInfoTag = ByteString.copyFromUtf8(window + "+spane");
-    ByteString watermarkHoldTag =
+    ByteString watermarkDataHoldTag =
         ByteString.copyFromUtf8(window + "+shold");
+    ByteString watermarkExtraHoldTag =
+        ByteString.copyFromUtf8(window + "+sextra");
     String stateFamily = "MergeWindows";
     ByteString bufferData = ByteString.copyFromUtf8("\000data0");
     // Encoded form for Iterable<String>: -1, true, 'data0', false
@@ -811,12 +813,14 @@ public void testMergeWindows() throws Exception {
                 .build())
             .build())));
 
-    assertThat(actualOutput.getWatermarkHoldsList(), Matchers.contains(
-        Matchers.equalTo(Windmill.WatermarkHold.newBuilder()
-            .setTag(watermarkHoldTag)
-            .setStateFamily(stateFamily)
-            .addTimestamps(0)
-            .build())));
+    assertThat(
+        actualOutput.getWatermarkHoldsList(),
+        Matchers.containsInAnyOrder(
+            Windmill.WatermarkHold.newBuilder()
+                .setTag(watermarkDataHoldTag)
+                .setStateFamily(stateFamily)
+                .addTimestamps(0)
+                .build()));
 
     List<Windmill.Counter> counters = actualOutput.getCounterUpdatesList();
     // No state reads
@@ -861,7 +865,11 @@ public void testMergeWindows() throws Exception {
         .setTimestamp(0) // is ignored
         .setData(bufferData);
     dataBuilder.addWatermarkHoldsBuilder()
-        .setTag(watermarkHoldTag)
+        .setTag(watermarkDataHoldTag)
+        .setStateFamily(stateFamily)
+        .addTimestamps(0);
+    dataBuilder.addWatermarkHoldsBuilder()
+        .setTag(watermarkExtraHoldTag)
         .setStateFamily(stateFamily)
         .addTimestamps(0);
     dataBuilder.addValuesBuilder()
@@ -914,13 +922,17 @@ public void testMergeWindows() throws Exception {
 
     assertThat(
         actualOutput.getWatermarkHoldsList(),
-        Matchers.contains(
-            Matchers.equalTo(
-                Windmill.WatermarkHold.newBuilder()
-                    .setTag(watermarkHoldTag)
-                    .setStateFamily(stateFamily)
-                    .setReset(true)
-                    .build())));
+        Matchers.containsInAnyOrder(
+            Windmill.WatermarkHold.newBuilder()
+                .setTag(watermarkDataHoldTag)
+                .setStateFamily(stateFamily)
+                .setReset(true)
+                .build(),
+            Windmill.WatermarkHold.newBuilder()
+                .setTag(watermarkExtraHoldTag)
+                .setStateFamily(stateFamily)
+                .setReset(true)
+                .build()));
 
     counters = actualOutput.getCounterUpdatesList();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
index 6dadab49b4d53..1c6edd4a74d50 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -19,7 +19,6 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -77,7 +76,6 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
-import java.util.concurrent.TimeUnit;
 
 /** Unit tests for {@link StreamingGroupAlsoByWindowsDoFn}. */
 @RunWith(JUnit4.class)
@@ -145,7 +143,7 @@ private void addTimer(WorkItem.Builder workItem,
             namespace, timestamp,
             type == Windmill.Timer.Type.WATERMARK
                 ? TimeDomain.EVENT_TIME : TimeDomain.PROCESSING_TIME)))
-        .setTimestamp(TimeUnit.MILLISECONDS.toMicros(timestamp.getMillis()))
+        .setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(timestamp))
         .setType(type)
         .setStateFamily(STATE_FAMILY);
   }
@@ -162,7 +160,7 @@ private <V> void addElement(
     messageBundle.addMessagesBuilder()
         .setMetadata(WindmillSink.encodeMetadata(windowsCoder, windows, PaneInfo.NO_FIRING))
         .setData(dataOutput.toByteString())
-        .setTimestamp(TimeUnit.MILLISECONDS.toMicros(timestamp.getMillis()));
+        .setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(timestamp));
   }
 
   private <T> WindowedValue<KeyedWorkItem<T>> createValue(
@@ -181,7 +179,6 @@ private <T> WindowedValue<KeyedWorkItem<T>> createValue(
             outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
     runner.startBundle();
-    when(mockTimerInternals.currentWatermarkTime()).thenReturn(new Instant(0));
 
     WorkItem.Builder workItem1 = WorkItem.newBuilder();
     workItem1.setKey(ByteString.copyFromUtf8(KEY));
@@ -238,7 +235,6 @@ private <T> WindowedValue<KeyedWorkItem<T>> createValue(
             SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))));
 
     runner.startBundle();
-    when(mockTimerInternals.currentWatermarkTime()).thenReturn(new Instant(0));
 
     WorkItem.Builder workItem1 = WorkItem.newBuilder();
     workItem1.setKey(ByteString.copyFromUtf8(KEY));
@@ -304,7 +300,6 @@ private <T> WindowedValue<KeyedWorkItem<T>> createValue(
         WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))));
 
     runner.startBundle();
-    when(mockTimerInternals.currentWatermarkTime()).thenReturn(new Instant(0));
 
     WorkItem.Builder workItem1 = WorkItem.newBuilder();
     workItem1.setKey(ByteString.copyFromUtf8(KEY));
@@ -326,7 +321,8 @@ private <T> WindowedValue<KeyedWorkItem<T>> createValue(
     WorkItem.Builder workItem2 = WorkItem.newBuilder();
     workItem2.setKey(ByteString.copyFromUtf8(KEY));
     workItem2.setWorkToken(WORK_TOKEN);
-    addTimer(workItem2, window(0, 10), new Instant(9), Timer.Type.WATERMARK);
+    // Note that the WATERMARK timer for Instant(9) will have been deleted by
+    // ReduceFnRunner when window(0, 10) was merged away.
     addTimer(workItem2, window(0, 15), new Instant(14), Timer.Type.WATERMARK);
     addTimer(workItem2, window(15, 25), new Instant(24), Timer.Type.WATERMARK);
 
@@ -398,7 +394,6 @@ public Long extractOutput(Long accumulator) {
         appliedCombineFn);
 
     runner.startBundle();
-    when(mockTimerInternals.currentWatermarkTime()).thenReturn(new Instant(0));
 
     WorkItem.Builder workItem1 = WorkItem.newBuilder();
     workItem1.setKey(ByteString.copyFromUtf8(KEY));
@@ -420,7 +415,8 @@ public Long extractOutput(Long accumulator) {
     WorkItem.Builder workItem2 = WorkItem.newBuilder();
     workItem2.setKey(ByteString.copyFromUtf8(KEY));
     workItem2.setWorkToken(WORK_TOKEN);
-    addTimer(workItem2, window(0, 10), new Instant(9), Timer.Type.WATERMARK);
+    // Note that the WATERMARK timer for Instant(9) will have been deleted by
+    // ReduceFnRunner when window(0, 10) was merged away.
     addTimer(workItem2, window(0, 15), new Instant(14), Timer.Type.WATERMARK);
     addTimer(workItem2, window(15, 25), new Instant(24), Timer.Type.WATERMARK);
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java
index a17aca014ae5a..6bb9e5b83d49b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java
@@ -57,7 +57,6 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
-import java.util.concurrent.TimeUnit;
 
 /** Unit tests for {@link StreamingGroupAlsoByWindowsReshuffleDoFn}. */
 @RunWith(JUnit4.class)
@@ -116,7 +115,7 @@ private <V> void addElement(
     messageBundle.addMessagesBuilder()
         .setMetadata(WindmillSink.encodeMetadata(windowsCoder, windows, PaneInfo.NO_FIRING))
         .setData(dataOutput.toByteString())
-        .setTimestamp(TimeUnit.MILLISECONDS.toMicros(timestamp.getMillis()));
+        .setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(timestamp));
   }
 
   private <T> WindowedValue<KeyedWorkItem<T>> createValue(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
index 2c4e11fb84bee..c0062ba9284bd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
@@ -81,7 +81,10 @@ public void testTimerInternalsSetTimer() {
 
     Windmill.WorkItemCommitRequest.Builder outputBuilder =
         Windmill.WorkItemCommitRequest.newBuilder();
-    executionContext.start(null, new Instant(1000), stateReader, stateFetcher, outputBuilder);
+    executionContext.start(null,
+        new Instant(1000), // input watermark
+        null, // output watermark
+        stateReader, stateFetcher, outputBuilder);
     ExecutionContext.StepContext step =
         executionContext.getOrCreateStepContext("step", "transform", null);
 
@@ -141,7 +144,8 @@ private void startContext(
                                 .setState(ByteString.EMPTY)
                                 .build()) // Source state.
             .build(),
-        new Instant(0),
+        new Instant(0), // input watermark
+        null, // output watermark
         null, // StateReader
         null, // StateFetcher
         Windmill.WorkItemCommitRequest.newBuilder());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java
index f6ad4190fd1ff..961c33025c3d9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java
@@ -42,7 +42,6 @@
 
 import java.io.IOException;
 import java.util.concurrent.Future;
-import java.util.concurrent.TimeUnit;
 
 /**
  * Tests for {@link WindmillStateReader}.
@@ -83,7 +82,8 @@ private Windmill.Value intValue(int value, boolean padded) throws IOException {
 
     return Windmill.Value.newBuilder()
         .setData(output.toByteString())
-        .setTimestamp(TimeUnit.MILLISECONDS.toMicros(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()))
+        .setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(
+            BoundedWindow.TIMESTAMP_MAX_VALUE))
         .build();
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index 87a00db9b55e9..e4855fb1f7084 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -117,7 +117,7 @@ public void testOnTimerFire() throws Exception {
     when(mockTrigger1.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
     tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
-    tester.advanceWatermark(new Instant(12));
+    tester.advanceInputWatermark(new Instant(12));
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
@@ -139,7 +139,7 @@ public void testOnTimerFireAndFinish() throws Exception {
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
     tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
 
-    tester.advanceWatermark(new Instant(12));
+    tester.advanceInputWatermark(new Instant(12));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
     assertFalse(tester.isMarkedFinished(firstWindow));
 
@@ -239,7 +239,7 @@ public void testAfterAllRealTriggersFixedWindow() throws Exception {
         TimestampedValue.of(5, new Instant(2)));
 
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    tester.advanceProcessingTime(new Instant(5));
+    tester.advanceProcessingTime(new Instant(6));
 
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(0, 1, 2, 3, 4, 5), 0, 0, 50)));
@@ -251,7 +251,7 @@ public void testAfterAllRealTriggersFixedWindow() throws Exception {
         TimestampedValue.of(7, new Instant(3)),
         TimestampedValue.of(8, new Instant(4)),
         TimestampedValue.of(9, new Instant(5)));
-    tester.advanceProcessingTime(new Instant(20));
+    tester.advanceProcessingTime(new Instant(21));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
     tester.injectElements(
         TimestampedValue.of(10, new Instant(6)));
@@ -279,7 +279,7 @@ public void testAfterAllMergingWindowSomeFinished() throws Exception {
     tester.advanceProcessingTime(new Instant(10));
     tester.injectElements(
         TimestampedValue.of(1, new Instant(1))); // in [1, 11), timer for 15
-    tester.advanceProcessingTime(new Instant(15));
+    tester.advanceProcessingTime(new Instant(16));
     tester.injectElements(
         TimestampedValue.of(2, new Instant(1)),  // in [1, 11) count = 1
         TimestampedValue.of(3, new Instant(2))); // in [2, 12), timer for 16
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index a119425f08d28..93084bfe0fde0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -140,7 +140,7 @@ public void testOnTimerFinish() throws Exception {
     when(mockTrigger1.onTimer(Mockito.isA(OnTimerContext.class)))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
 
-    tester.advanceWatermark(new Instant(12));
+    tester.advanceInputWatermark(new Instant(12));
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
 
@@ -261,7 +261,7 @@ public void testAfterEachMergingWindowSomeFinished() throws Exception {
     tester.advanceProcessingTime(new Instant(10));
     tester.injectElements(
         TimestampedValue.of(1, new Instant(1))); // in [1, 11), timer for 15
-    tester.advanceProcessingTime(new Instant(15));
+    tester.advanceProcessingTime(new Instant(16));
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.contains(1), 1, 1, 11)));
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index 23910f1a92cdb..63a240a466a27 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -236,7 +236,7 @@ public void testAfterFirstRealTriggersFixedWindow() throws Exception {
         TimestampedValue.of(8, new Instant(5)));
     tester.advanceProcessingTime(new Instant(5));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    tester.advanceProcessingTime(new Instant(6));
+    tester.advanceProcessingTime(new Instant(7));
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(5, 6, 7, 8), 2, 0, 50)));
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
index 0096216d71f58..cada240cd0e70 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
@@ -122,7 +122,7 @@ public void testAfterProcessingTimeWithMergingWindow() throws Exception {
     tester.injectElements(
         TimestampedValue.of(2, new Instant(2))); // in [2, 12), timer for 16
 
-    tester.advanceProcessingTime(new Instant(15));
+    tester.advanceProcessingTime(new Instant(16));
     // This fires, because the earliest element in [1, 12) arrived at time 10
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 1, 12)));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
index 7450752ec2346..9f34a9c920e14 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
@@ -73,12 +73,12 @@ public void testFirstInPaneWithFixedWindow() throws Exception {
 
     tester.injectElements(
         TimestampedValue.of(1, new Instant(1))); // first in window [0, 10), timer set for 6
-    tester.advanceWatermark(new Instant(5));
+    tester.advanceInputWatermark(new Instant(5));
     tester.injectElements(
         TimestampedValue.of(2, new Instant(9)),
         TimestampedValue.of(3, new Instant(8)));
 
-    tester.advanceWatermark(new Instant(6));
+    tester.advanceInputWatermark(new Instant(7));
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(containsInAnyOrder(1, 2, 3), 1, 0, 10)));
 
@@ -114,12 +114,12 @@ public void testAlignAndDelay() throws Exception {
         TimestampedValue.of(2, zero.plus(Duration.standardSeconds(5))),
         TimestampedValue.of(3, zero.plus(Duration.standardSeconds(55))));
 
-    // Advance almost to 6m, but not quite. No output should be produced.
-    tester.advanceWatermark(zero.plus(Duration.standardMinutes(6)).minus(1));
+    // Advance to 6m. No output should be produced.
+    tester.advanceInputWatermark(zero.plus(Duration.standardMinutes(6)));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
-    // Advance to 6m and see our output
-    tester.advanceWatermark(zero.plus(Duration.standardMinutes(6)));
+    // Advance to 6m+1ms and see our output
+    tester.advanceInputWatermark(zero.plus(Duration.standardMinutes(6).plus(1)));
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(
             containsInAnyOrder(1, 2, 3),
@@ -136,12 +136,12 @@ public void testFirstInPaneWithMerging() throws Exception {
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
 
-    tester.advanceWatermark(new Instant(1));
+    tester.advanceInputWatermark(new Instant(1));
 
     tester.injectElements(
         TimestampedValue.of(1, new Instant(1)),  // in [1, 11), timer for 6
         TimestampedValue.of(2, new Instant(2))); // in [2, 12), timer for 7
-    tester.advanceWatermark(new Instant(6));
+    tester.advanceInputWatermark(new Instant(7));
 
     // We merged, and updated the watermark timer to the earliest timer, which was still 6.
     assertThat(tester.extractOutput(), Matchers.contains(
@@ -164,13 +164,13 @@ public void testEndOfWindowFixedWindow() throws Exception {
 
     tester.injectElements(
         TimestampedValue.of(1, new Instant(1))); // first in window [0, 10), timer set for 9
-    tester.advanceWatermark(new Instant(8));
+    tester.advanceInputWatermark(new Instant(8));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
     tester.injectElements(
         TimestampedValue.of(2, new Instant(9)),
         TimestampedValue.of(3, new Instant(8)));
 
-    tester.advanceWatermark(new Instant(9));
+    tester.advanceInputWatermark(new Instant(10));
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(containsInAnyOrder(1, 2, 3), 1, 0, 10)));
 
@@ -196,19 +196,19 @@ public void testEndOfWindowWithMerging() throws Exception {
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
 
-    tester.advanceWatermark(new Instant(1));
+    tester.advanceInputWatermark(new Instant(1));
 
     tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),  // in [1, 11), timer for 10
-        TimestampedValue.of(2, new Instant(2))); // in [2, 12), timer for 11
-    tester.advanceWatermark(new Instant(10));
+        TimestampedValue.of(1, new Instant(1)),  // in [1, 11], timer for 11
+        TimestampedValue.of(2, new Instant(2))); // in [2, 12], timer for 12
+    tester.advanceInputWatermark(new Instant(10));
 
     // We merged, and updated the watermark timer to the end of the new window.
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
     tester.injectElements(
-        TimestampedValue.of(3, new Instant(1))); // in [1, 11), timer for 10
-    tester.advanceWatermark(new Instant(11));
+        TimestampedValue.of(3, new Instant(1))); // in [1, 11], timer for 11
+    tester.advanceInputWatermark(new Instant(12));
 
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(containsInAnyOrder(1, 2, 3), 1, 1, 12)));
@@ -286,7 +286,7 @@ public void testEarlyAndAtWatermarkProcessElement() throws Exception {
         TimestampedValue.of(2, new Instant(10L))); // Fires due to early trigger
     tester.injectElements(TimestampedValue.of(3, new Instant(15L)));
 
-    tester.advanceWatermark(new Instant(100L)); // Fires due to AtWatermark
+    tester.advanceInputWatermark(new Instant(100L)); // Fires due to AtWatermark
     List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
     assertThat(output, Matchers.contains(
         isSingleWindowedValue(containsInAnyOrder(1, 2), 5, 0, 100),
@@ -308,7 +308,7 @@ public void testLateAndAtWatermarkProcessElement() throws Exception {
         TimestampedValue.of(2, new Instant(10L)),
         TimestampedValue.of(3, new Instant(15L)));
 
-    tester.advanceWatermark(new Instant(100L)); // Fires due to AtWatermark
+    tester.advanceInputWatermark(new Instant(100L)); // Fires due to AtWatermark
 
     when(mockLate.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
         .thenReturn(TriggerResult.CONTINUE)
@@ -354,7 +354,7 @@ public void testEarlyLateAndAtWatermarkProcessElement() throws Exception {
     tester.injectElements(
         TimestampedValue.of(3, new Instant(15L)));
 
-    tester.advanceWatermark(new Instant(100L)); // Fires due to AtWatermark
+    tester.advanceInputWatermark(new Instant(100L)); // Fires due to AtWatermark
 
     when(mockLate.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
         .thenReturn(TriggerResult.CONTINUE)
@@ -396,7 +396,7 @@ public void testEarlyAndAtWatermarkSessions() throws Exception {
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(40))));
     tester.injectElements(TimestampedValue.of(3, new Instant(6L)));
 
-    tester.advanceWatermark(new Instant(100L)); // Fires due to AtWatermark
+    tester.advanceInputWatermark(new Instant(100L)); // Fires due to AtWatermark
     List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
     assertThat(output, Matchers.contains(
         isSingleWindowedValue(containsInAnyOrder(1, 2), 5, 5, 40),
@@ -419,7 +419,7 @@ public void testLateAndAtWatermarkSessionsProcessingTime() throws Exception {
         TimestampedValue.of(1, new Instant(5L)),
         TimestampedValue.of(2, new Instant(20L)));
 
-    tester.advanceWatermark(new Instant(70L)); // Fires due to AtWatermark
+    tester.advanceInputWatermark(new Instant(70L)); // Fires due to AtWatermark
 
     tester.injectElements(TimestampedValue.of(3, new Instant(6L)));
 
@@ -449,7 +449,7 @@ public void testLateAndAtWatermarkSessions() throws Exception {
         TimestampedValue.of(1, new Instant(5L)),
         TimestampedValue.of(2, new Instant(20L)));
 
-    tester.advanceWatermark(new Instant(70L)); // Fires due to AtWatermark
+    tester.advanceInputWatermark(new Instant(70L)); // Fires due to AtWatermark
 
     IntervalWindow window = new IntervalWindow(new Instant(5), new Instant(40));
     assertFalse(tester.isMarkedFinished(window));
@@ -482,9 +482,9 @@ public void testEarlyLateAndAtWatermarkSessions() throws Exception {
         TimestampedValue.of(1, new Instant(5L)),
         TimestampedValue.of(2, new Instant(20L)));
 
-    tester.advanceProcessingTime(new Instant(55)); // Fires due to early trigger
+    tester.advanceProcessingTime(new Instant(56)); // Fires due to early trigger
 
-    tester.advanceWatermark(new Instant(70L)); // Fires due to AtWatermark
+    tester.advanceInputWatermark(new Instant(70L)); // Fires due to AtWatermark
 
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(40))));
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
index 01788e00d9428..ab9c868a5e5a1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
@@ -58,18 +58,18 @@ public void testDefaultTriggerWithFixedWindow() throws Exception {
 
     // Advance the watermark almost to the end of the first window.
     tester.advanceProcessingTime(new Instant(500));
-    tester.advanceWatermark(new Instant(8));
+    tester.advanceInputWatermark(new Instant(8));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
-    // Advance watermark to 9 (the exact end of the window), which causes the first fixed window to
+    // Advance watermark to 10 (past end of the window), which causes the first fixed window to
     // be emitted
-    tester.advanceWatermark(new Instant(9));
+    tester.advanceInputWatermark(new Instant(10));
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
 
     // Advance watermark to 100, which causes the remaining two windows to be emitted.
     // Since their timers were at different timestamps, they should fire in order.
-    tester.advanceWatermark(new Instant(100));
+    tester.advanceInputWatermark(new Instant(100));
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 15, 10, 20),
         isSingleWindowedValue(Matchers.contains(5), 30, 30, 40)));
@@ -93,14 +93,14 @@ public void testDefaultTriggerWithSessionWindow() throws Exception {
         TimestampedValue.of(2, new Instant(9)));
 
     // no output, because we merged into the [9-19) session
-    tester.advanceWatermark(new Instant(10));
+    tester.advanceInputWatermark(new Instant(10));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
     tester.injectElements(
         TimestampedValue.of(3, new Instant(15)),
         TimestampedValue.of(4, new Instant(30)));
 
-    tester.advanceWatermark(new Instant(100));
+    tester.advanceInputWatermark(new Instant(100));
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 1, 25),
         isSingleWindowedValue(Matchers.contains(4), 30, 30, 40)));
@@ -124,19 +124,18 @@ public void testDefaultTriggerWithSlidingWindow() throws Exception {
         TimestampedValue.of(2, new Instant(4)),
         TimestampedValue.of(3, new Instant(9)));
 
-    tester.advanceWatermark(new Instant(100));
+    tester.advanceInputWatermark(new Instant(100));
     assertThat(tester.extractOutput(), Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, -5, 5),
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 5, 0, 10),
         isSingleWindowedValue(Matchers.containsInAnyOrder(3), 10, 5, 15)));
 
-    // This data is late, so it will hold the watermark to 109
+    // This data is too late to hold the output watermark, either to the element
+    // or the end of window.
     tester.injectElements(
         TimestampedValue.of(4, new Instant(8)));
 
-    tester.advanceWatermark(new Instant(101));
-    assertThat(tester.getWatermarkHold(), Matchers.equalTo(new Instant(109)));
-    tester.advanceWatermark(new Instant(120));
+    tester.advanceInputWatermark(new Instant(120));
     List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
     assertThat(output, Matchers.contains(
         isSingleWindowedValue(Matchers.contains(4), 9, 0, 10),
@@ -160,7 +159,7 @@ public void testDefaultTriggerWithContainedSessionWindow() throws Exception {
         TimestampedValue.of(2, new Instant(9)),
         TimestampedValue.of(3, new Instant(7)));
 
-    tester.advanceWatermark(new Instant(20));
+    tester.advanceInputWatermark(new Instant(20));
     Iterable<WindowedValue<Iterable<Integer>>> extractOutput = tester.extractOutput();
     assertThat(extractOutput, Matchers.contains(
         isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 1, 19)));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
index b6b768947ff35..e5be23527fa63 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
@@ -252,7 +252,7 @@ public void testOrFinallyRealTriggersFixedWindow() throws Exception {
         TimestampedValue.of(1, new Instant(0)),
         TimestampedValue.of(2, new Instant(1)),
         TimestampedValue.of(3, new Instant(1)));
-    tester.advanceProcessingTime(new Instant(5));
+    tester.advanceProcessingTime(new Instant(6));
     assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
     tester.injectElements(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternalsTest.java
index b92f5a227f4f3..1accd76248516 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternalsTest.java
@@ -86,7 +86,7 @@ public void testTimerOrdering() {
     underTest.setTimer(processingTime2);
     underTest.setTimer(watermarkTime2);
 
-    underTest.advanceWatermark(mockRunner, new Instant(30));
+    underTest.advanceInputWatermark(mockRunner, new Instant(30));
     Mockito.verify(mockRunner).onTimer(watermarkTime1);
     Mockito.verify(mockRunner).onTimer(watermarkTime2);
     Mockito.verifyNoMoreInteractions(mockRunner);
@@ -107,7 +107,7 @@ public void testDeduplicate() {
     underTest.setTimer(processingTime);
     underTest.setTimer(processingTime);
     underTest.advanceProcessingTime(mockRunner, new Instant(20));
-    underTest.advanceWatermark(mockRunner, new Instant(20));
+    underTest.advanceInputWatermark(mockRunner, new Instant(20));
 
     Mockito.verify(mockRunner).onTimer(processingTime);
     Mockito.verify(mockRunner).onTimer(watermarkTime);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsProperties.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsProperties.java
index 10af67d68b9b7..34edb2489e388 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsProperties.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsProperties.java
@@ -423,62 +423,46 @@ public static void groupsElementsIntoFixedWindowsWithEndOfWindowTimestamp(
    */
   public static void groupsElementsIntoFixedWindowsWithCustomTimestamp(
       GroupAlsoByWindowsDoFnFactory<String, String, Iterable<String>> gabwFactory)
-          throws Exception {
-
+      throws Exception {
     WindowingStrategy<?, IntervalWindow> windowingStrategy =
         WindowingStrategy.of(FixedWindows.of(Duration.millis(10)))
-        .withOutputTimeFn(new OutputTimeFn.Defaults<IntervalWindow>() {
-
-          @Override
-          public Instant assignOutputTime(Instant inputTimestamp, IntervalWindow window) {
-            return window.start();
-          }
-
-          @Override
-          public Instant combine(Instant outputTime, Instant otherOutputTime) {
-            return outputTime;
-          }
-
-          @Override
-          public boolean dependsOnlyOnEarliestInputTimestamp() {
-            return true;
-          }
-        });
-
-    List<WindowedValue<KV<String, Iterable<String>>>> result =
-        runGABW(gabwFactory, windowingStrategy, "key",
-            WindowedValue.of(
-                "v1",
-                new Instant(1),
-                Arrays.asList(window(0, 10)),
-                PaneInfo.NO_FIRING),
-            WindowedValue.of(
-                "v2",
-                new Instant(2),
-                Arrays.asList(window(0, 10)),
-                PaneInfo.NO_FIRING),
-            WindowedValue.of(
-                "v3",
-                new Instant(13),
-                Arrays.asList(window(10, 20)),
-                PaneInfo.NO_FIRING));
+            .withOutputTimeFn(new OutputTimeFn.Defaults<IntervalWindow>() {
+              @Override
+              public Instant assignOutputTime(Instant inputTimestamp, IntervalWindow window) {
+                return inputTimestamp.isBefore(window.maxTimestamp())
+                    ? inputTimestamp.plus(1) : window.maxTimestamp();
+              }
+
+              @Override
+              public Instant combine(Instant outputTime, Instant otherOutputTime) {
+                return outputTime.isBefore(otherOutputTime) ? outputTime : otherOutputTime;
+              }
+
+              @Override
+              public boolean dependsOnlyOnEarliestInputTimestamp() {
+                return true;
+              }
+            });
+
+    List<WindowedValue<KV<String, Iterable<String>>>> result = runGABW(gabwFactory,
+        windowingStrategy, "key",
+        WindowedValue.of("v1", new Instant(1), Arrays.asList(window(0, 10)), PaneInfo.NO_FIRING),
+        WindowedValue.of("v2", new Instant(2), Arrays.asList(window(0, 10)), PaneInfo.NO_FIRING),
+        WindowedValue.of("v3", new Instant(13), Arrays.asList(window(10, 20)), PaneInfo.NO_FIRING));
 
     assertThat(result.size(), equalTo(2));
 
     WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
     assertThat(item0.getValue().getValue(), containsInAnyOrder("v1", "v2"));
     assertThat(item0.getWindows(), contains(window(0, 10)));
-    assertThat(item0.getTimestamp(),
-        equalTo(((IntervalWindow) Iterables.getOnlyElement(item0.getWindows())).start()));
+    assertThat(item0.getTimestamp(), equalTo(new Instant(2)));
 
     WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
     assertThat(item1.getValue().getValue(), contains("v3"));
     assertThat(item1.getWindows(), contains(window(10, 20)));
-    assertThat(item1.getTimestamp(),
-        equalTo(((IntervalWindow) Iterables.getOnlyElement(item1.getWindows())).start()));
+    assertThat(item1.getTimestamp(), equalTo(new Instant(14)));
   }
 
-
   /**
    * Tests that for a simple sequence of elements on the same key, the given GABW implementation
    * correctly groups them according to fixed windows and also sets the output timestamp
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index 104304fae5d47..c6d56bc15d887 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -194,81 +194,109 @@ public void testOnElementCombiningAccumulating() throws Exception {
   @Test
   public void testWatermarkHoldAndLateData() throws Exception {
     // Test handling of late data. Specifically, ensure the watermark hold is correct.
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
-        FixedWindows.of(Duration.millis(10)),
-        mockTrigger,
-        AccumulationMode.ACCUMULATING_FIRED_PANES,
-        Duration.millis(10));
+    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester =
+        TriggerTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
+            AccumulationMode.ACCUMULATING_FIRED_PANES, Duration.millis(10));
+
+    // Input watermark -> null
+    assertEquals(null, tester.getWatermarkHold());
+    assertEquals(null, tester.getOutputWatermark());
 
     // All on time data, verify watermark hold.
     injectElement(tester, 1, TriggerResult.CONTINUE);
     injectElement(tester, 3, TriggerResult.CONTINUE);
     assertEquals(new Instant(1), tester.getWatermarkHold());
     injectElement(tester, 2, TriggerResult.FIRE);
+    assertEquals(1, tester.getOutputSize());
 
     // Holding for the end-of-window transition.
     assertEquals(new Instant(9), tester.getWatermarkHold());
-
+    // Nothing dropped.
     assertEquals(0, tester.getElementsDroppedDueToLateness());
     assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
 
+    // Input watermark -> 4
+    tester.advanceInputWatermark(new Instant(4));
+    assertEquals(new Instant(4), tester.getOutputWatermark());
+
     // Some late, some on time. Verify that we only hold to the minimum of on-time.
-    tester.advanceWatermark(new Instant(4));
     injectElement(tester, 2, TriggerResult.CONTINUE);
     injectElement(tester, 3, TriggerResult.CONTINUE);
     assertEquals(new Instant(9), tester.getWatermarkHold());
     injectElement(tester, 5, TriggerResult.CONTINUE);
     assertEquals(new Instant(5), tester.getWatermarkHold());
     injectElement(tester, 4, TriggerResult.FIRE);
+    assertEquals(2, tester.getOutputSize());
 
     // All late -- output at end of window timestamp.
     when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-         .thenReturn(TriggerResult.CONTINUE);
-    tester.advanceWatermark(new Instant(8));
+        .thenReturn(TriggerResult.CONTINUE);
+    // Input watermark -> 8
+    tester.advanceInputWatermark(new Instant(8));
+    assertEquals(new Instant(8), tester.getOutputWatermark());
     injectElement(tester, 6, TriggerResult.CONTINUE);
     injectElement(tester, 5, TriggerResult.CONTINUE);
     assertEquals(new Instant(9), tester.getWatermarkHold());
-    injectElement(tester, 4, TriggerResult.FIRE);
+    injectElement(tester, 4, TriggerResult.CONTINUE);
 
-    // This is "pending" at the time the watermark makes it way-late.
-    // Because we're about to expire the window, we output it.
+    // This is behind both the input and output watermarks, but will still make it
+    // into an ON_TIME pane.
+    when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
+        .thenReturn(TriggerResult.FIRE);
+    // Input watermark -> 10
+    tester.advanceInputWatermark(new Instant(10));
+    assertEquals(3, tester.getOutputSize());
+    assertEquals(new Instant(10), tester.getOutputWatermark());
     injectElement(tester, 8, TriggerResult.CONTINUE);
 
     assertEquals(0, tester.getElementsDroppedDueToLateness());
     assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
 
     // All very late -- gets dropped.
-    tester.advanceWatermark(new Instant(50));
+    // Input watermark -> 50
+    tester.advanceInputWatermark(new Instant(50));
+    assertEquals(new Instant(50), tester.getOutputWatermark());
     assertEquals(null, tester.getWatermarkHold());
-    injectElement(tester, 2, TriggerResult.FIRE);
+    injectElement(tester, 22, TriggerResult.FIRE);
+    assertEquals(4, tester.getOutputSize());
     assertEquals(null, tester.getWatermarkHold());
 
-    // Late timers are ignored
-    tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
-        new Instant(12), TimeDomain.EVENT_TIME);
-
     assertEquals(1, tester.getElementsDroppedDueToLateness());
     assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
 
+    // Late timers are ignored
+    tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)), new Instant(12),
+        TimeDomain.EVENT_TIME);
+
     List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
-    assertThat(output, Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3, 2, 3, 4, 5), 4, 0, 10),
-        // Output time is end of the window, because all the new data was late
-        isSingleWindowedValue(Matchers.containsInAnyOrder(
-            1, 2, 3, 2, 3, 4, 5, 4, 5, 6), 9, 0, 10),
-        // Output time is not end of the window, because the new data (8) wasn't late
-        isSingleWindowedValue(Matchers.containsInAnyOrder(
-            1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 8), 8, 0, 10)));
+    assertThat(
+        output, Matchers.contains(
+                    isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10),
+                    isSingleWindowedValue(
+                        Matchers.containsInAnyOrder(1, 2, 3, 2, 3, 4, 5), 4, 0, 10),
+                    // Output time is end of the window, because all the new data was late
+                    isSingleWindowedValue(
+                        Matchers.containsInAnyOrder(1, 2, 3, 2, 3, 4, 5, 4, 5, 6), 9, 0, 10),
+                    // Output time is still end of the window, because the new data (8) was behind
+                    // the output watermark.
+                    isSingleWindowedValue(
+                        Matchers.containsInAnyOrder(1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 8), 9, 0, 10)));
+
+    assertThat(
+        output.get(0).getPane(),
+        Matchers.equalTo(PaneInfo.createPane(true, false, Timing.EARLY, 0, -1)));
 
     assertThat(
-               output.get(0).getPane(),
-        Matchers.equalTo(PaneInfo.createPane(true, false, Timing.EARLY)));
+        output.get(1).getPane(),
+        Matchers.equalTo(PaneInfo.createPane(false, false, Timing.EARLY, 1, -1)));
+
+    assertThat(
+        output.get(2).getPane(),
+        Matchers.equalTo(PaneInfo.createPane(false, false, Timing.ON_TIME, 2, 0)));
 
-    // By the time this firing is produced, the input WM already passed the end of the window.
     assertThat(
         output.get(3).getPane(),
-        Matchers.equalTo(PaneInfo.createPane(false, true, Timing.LATE, 3, 0)));
+        Matchers.equalTo(PaneInfo.createPane(false, true, Timing.LATE, 3, 1)));
 
     // And because we're past the end of window + allowed lateness, everything should be cleaned up.
     assertFalse(tester.isMarkedFinished(firstWindow));
@@ -286,7 +314,7 @@ public void testPaneInfoAllStates() throws Exception {
     when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
 
-    tester.advanceWatermark(new Instant(0));
+    tester.advanceInputWatermark(new Instant(0));
     injectElement(tester, 1, TriggerResult.FIRE);
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, false, Timing.EARLY))));
@@ -295,19 +323,24 @@ public void testPaneInfoAllStates() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.EARLY, 1, -1))));
 
-    tester.advanceWatermark(new Instant(15));
+    tester.advanceInputWatermark(new Instant(15));
     injectElement(tester, 3, TriggerResult.FIRE);
-    assertThat(tester.extractOutput(), Matchers.contains(
-        // This is late, because the trigger wasn't waiting for AfterWatermark
-        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.LATE, 2, 0))));
+    assertThat(
+        tester.extractOutput(),
+        Matchers.contains(WindowMatchers.valueWithPaneInfo(
+            PaneInfo.createPane(false, false, Timing.EARLY, 2, -1))));
 
     injectElement(tester, 4, TriggerResult.FIRE);
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.LATE, 3, 1))));
+    assertThat(
+        tester.extractOutput(),
+        Matchers.contains(WindowMatchers.valueWithPaneInfo(
+            PaneInfo.createPane(false, false, Timing.EARLY, 3, -1))));
 
     injectElement(tester, 5, TriggerResult.FIRE_AND_FINISH);
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.LATE, 4, 2))));
+    assertThat(
+        tester.extractOutput(),
+        Matchers.contains(WindowMatchers.valueWithPaneInfo(
+            PaneInfo.createPane(false, true, Timing.EARLY, 4, -1))));
   }
 
   @Test
@@ -322,7 +355,7 @@ public void testPaneInfoAllStatesAfterWatermark() throws Exception {
             .withAllowedLateness(Duration.millis(100))
             .withClosingBehavior(ClosingBehavior.FIRE_ALWAYS));
 
-    tester.advanceWatermark(new Instant(0));
+    tester.advanceInputWatermark(new Instant(0));
     tester.injectElements(
         TimestampedValue.of(1, new Instant(1)),
         TimestampedValue.of(2, new Instant(2)));
@@ -333,7 +366,7 @@ public void testPaneInfoAllStatesAfterWatermark() throws Exception {
     assertThat(output, Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
 
-    tester.advanceWatermark(new Instant(50));
+    tester.advanceInputWatermark(new Instant(50));
 
     // We should get the ON_TIME pane even though it is empty,
     // because we have an AfterWatermark.pastEndOfWindow() trigger.
@@ -344,7 +377,7 @@ public void testPaneInfoAllStatesAfterWatermark() throws Exception {
         WindowMatchers.isSingleWindowedValue(Matchers.emptyIterable(), 9, 0, 10)));
 
     // We should get the final pane even though it is empty.
-    tester.advanceWatermark(new Instant(150));
+    tester.advanceInputWatermark(new Instant(150));
     output = tester.extractOutput();
     assertThat(output, Matchers.contains(
         WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.LATE, 2, 1))));
@@ -364,7 +397,7 @@ public void testPaneInfoAllStatesAfterWatermarkAccumulating() throws Exception {
             .withAllowedLateness(Duration.millis(100))
             .withClosingBehavior(ClosingBehavior.FIRE_ALWAYS));
 
-    tester.advanceWatermark(new Instant(0));
+    tester.advanceInputWatermark(new Instant(0));
     tester.injectElements(
         TimestampedValue.of(1, new Instant(1)),
         TimestampedValue.of(2, new Instant(2)));
@@ -375,7 +408,7 @@ public void testPaneInfoAllStatesAfterWatermarkAccumulating() throws Exception {
     assertThat(output, Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
 
-    tester.advanceWatermark(new Instant(50));
+    tester.advanceInputWatermark(new Instant(50));
 
     // We should get the ON_TIME pane even though it is empty,
     // because we have an AfterWatermark.pastEndOfWindow() trigger.
@@ -386,7 +419,7 @@ public void testPaneInfoAllStatesAfterWatermarkAccumulating() throws Exception {
         WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 9, 0, 10)));
 
     // We should get the final pane even though it is empty.
-    tester.advanceWatermark(new Instant(150));
+    tester.advanceInputWatermark(new Instant(150));
     output = tester.extractOutput();
     assertThat(output, Matchers.contains(
         WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.LATE, 2, 1))));
@@ -405,7 +438,7 @@ public void testPaneInfoFinalAndOnTime() throws Exception {
             .withAllowedLateness(Duration.millis(100))
             .withClosingBehavior(ClosingBehavior.FIRE_ALWAYS));
 
-    tester.advanceWatermark(new Instant(0));
+    tester.advanceInputWatermark(new Instant(0));
     tester.injectElements(
         TimestampedValue.of(1, new Instant(1)),
         TimestampedValue.of(2, new Instant(2)));
@@ -413,7 +446,7 @@ public void testPaneInfoFinalAndOnTime() throws Exception {
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, false, Timing.EARLY, 0, -1))));
 
-    tester.advanceWatermark(new Instant(150));
+    tester.advanceInputWatermark(new Instant(150));
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.ON_TIME, 1, 0))));
   }
@@ -426,7 +459,7 @@ public void testPaneInfoSkipToFinish() throws Exception {
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
 
-    tester.advanceWatermark(new Instant(0));
+    tester.advanceInputWatermark(new Instant(0));
     injectElement(tester, 1, TriggerResult.FIRE_AND_FINISH);
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, true, Timing.EARLY))));
@@ -440,7 +473,7 @@ public void testPaneInfoSkipToNonSpeculativeAndFinish() throws Exception {
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
 
-    tester.advanceWatermark(new Instant(15));
+    tester.advanceInputWatermark(new Instant(15));
     injectElement(tester, 1, TriggerResult.FIRE_AND_FINISH);
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, true, Timing.LATE))));
@@ -468,13 +501,14 @@ public void testMergeBeforeFinalizing() throws Exception {
     when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
 
-    tester.advanceWatermark(new Instant(100));
+    tester.advanceInputWatermark(new Instant(100));
 
     List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
     assertThat(output.size(), Matchers.equalTo(1));
     assertThat(output.get(0), isSingleWindowedValue(Matchers.containsInAnyOrder(1, 10), 1, 1, 20));
-    assertThat(output.get(0).getPane(),
-        Matchers.equalTo(PaneInfo.createPane(true, true, Timing.LATE, 0, 0)));
+    assertThat(
+        output.get(0).getPane(),
+        Matchers.equalTo(PaneInfo.createPane(true, true, Timing.EARLY, 0, 0)));
   }
 
   @Test
@@ -494,7 +528,7 @@ public void testDropDataMultipleWindows() throws Exception {
     assertEquals(0, tester.getElementsDroppedDueToLateness());
     assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
 
-    tester.advanceWatermark(new Instant(70));
+    tester.advanceInputWatermark(new Instant(70));
     tester.injectElements(
         TimestampedValue.of(14, new Instant(60))); // [-30, 70) = closed, [0, 100), [30, 130)
 
@@ -523,16 +557,16 @@ public void testIdempotentEmptyPanes() throws Exception {
     injectElement(tester, 2, TriggerResult.CONTINUE);
     when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
-    tester.advanceWatermark(new Instant(12));
+    tester.advanceInputWatermark(new Instant(12));
 
     when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE);
-    tester.fireTimer(firstWindow, new Instant(10), TimeDomain.EVENT_TIME);
+    tester.fireTimer(firstWindow, new Instant(9), TimeDomain.EVENT_TIME);
 
     // Fire another timer (with no data, so it's an uninteresting pane).
     when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-                    .thenReturn(TriggerResult.FIRE);
-    tester.fireTimer(firstWindow, new Instant(10), TimeDomain.EVENT_TIME);
+        .thenReturn(TriggerResult.FIRE);
+    tester.fireTimer(firstWindow, new Instant(9), TimeDomain.EVENT_TIME);
 
     // Finish it off with another datum.
     injectElement(tester, 3, TriggerResult.FIRE_AND_FINISH);
@@ -571,16 +605,16 @@ public void testIdempotentEmptyPanesAccumulating() throws Exception {
     injectElement(tester, 2, TriggerResult.CONTINUE);
     when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
-    tester.advanceWatermark(new Instant(12));
+    tester.advanceInputWatermark(new Instant(12));
 
     when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE);
-    tester.fireTimer(firstWindow, new Instant(10), TimeDomain.EVENT_TIME);
+    tester.fireTimer(firstWindow, new Instant(9), TimeDomain.EVENT_TIME);
 
     // Fire another timer (with no data, so it's an uninteresting pane).
     when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-                    .thenReturn(TriggerResult.FIRE);
-    tester.fireTimer(firstWindow, new Instant(10), TimeDomain.EVENT_TIME);
+        .thenReturn(TriggerResult.FIRE);
+    tester.fireTimer(firstWindow, new Instant(9), TimeDomain.EVENT_TIME);
 
     // Finish it off with another datum.
     injectElement(tester, 3, TriggerResult.FIRE_AND_FINISH);
@@ -591,6 +625,8 @@ public void testIdempotentEmptyPanesAccumulating() throws Exception {
 
     // The on-time pane is as expected.
     assertThat(output.get(0), isSingleWindowedValue(containsInAnyOrder(1, 2), 1, 0, 10));
+    assertThat(
+        output.get(0).getPane(), equalTo(PaneInfo.createPane(true, false, Timing.ON_TIME, 0, 0)));
 
     // The late pane has the correct indices.
     assertThat(output.get(1).getValue(), containsInAnyOrder(1, 2, 3));
@@ -639,7 +675,7 @@ public void testEmptyOnTimeFromOrFinally() throws Exception {
         new Sum.SumIntegerFn().<String>asKeyedFn(), VarIntCoder.of(),
         Duration.millis(100));
 
-    tester.advanceWatermark(new Instant(0));
+    tester.advanceInputWatermark(new Instant(0));
     tester.advanceProcessingTime(new Instant(0));
 
     tester.injectElements(
@@ -648,9 +684,9 @@ public void testEmptyOnTimeFromOrFinally() throws Exception {
         TimestampedValue.of(1, new Instant(7)),
         TimestampedValue.of(1, new Instant(5)));
 
-    tester.advanceProcessingTime(new Instant(5));
+    tester.advanceProcessingTime(new Instant(6));
 
-    tester.advanceWatermark(new Instant(11));
+    tester.advanceInputWatermark(new Instant(11));
     List<WindowedValue<Integer>> output = tester.extractOutput();
     assertEquals(2, output.size());
 
@@ -684,7 +720,7 @@ public void testProcessingTime() throws Exception {
         new Sum.SumIntegerFn().<String>asKeyedFn(), VarIntCoder.of(),
         Duration.millis(100));
 
-    tester.advanceWatermark(new Instant(0));
+    tester.advanceInputWatermark(new Instant(0));
     tester.advanceProcessingTime(new Instant(0));
 
     tester.injectElements(
@@ -692,30 +728,37 @@ public void testProcessingTime() throws Exception {
         TimestampedValue.of(1, new Instant(3)),
         TimestampedValue.of(1, new Instant(7)),
         TimestampedValue.of(1, new Instant(5)));
+    // 4 elements all at processing time 0
 
-    tester.advanceProcessingTime(new Instant(5));
+    tester.advanceProcessingTime(new Instant(6)); // fire [1,3,7,5] since 6 > 0 + 5
     tester.injectElements(
         TimestampedValue.of(1, new Instant(8)),
         TimestampedValue.of(1, new Instant(4)));
+    // 6 elements
 
-    tester.advanceWatermark(new Instant(11));
+    tester.advanceInputWatermark(new Instant(11)); // fire [1,3,7,5,8,4] since 11 > 9
     tester.injectElements(
         TimestampedValue.of(1, new Instant(8)),
         TimestampedValue.of(1, new Instant(4)),
         TimestampedValue.of(1, new Instant(5)));
+    // 9 elements
 
-    tester.advanceWatermark(new Instant(12));
+    tester.advanceInputWatermark(new Instant(12));
     tester.injectElements(
         TimestampedValue.of(1, new Instant(3)));
+    // 10 elements
 
     tester.advanceProcessingTime(new Instant(15));
     tester.injectElements(
         TimestampedValue.of(1, new Instant(5)));
-    tester.advanceProcessingTime(new Instant(30));
+    // 11 elements
+    tester.advanceProcessingTime(new Instant(32)); // fire since 32 > 6 + 25
 
     tester.injectElements(
         TimestampedValue.of(1, new Instant(3)));
-    tester.advanceWatermark(new Instant(125));
+    // 12 elements
+    // fire [1,3,7,5,8,4,8,4,5,3,5,3] since 125 > 6 + 25
+    tester.advanceInputWatermark(new Instant(125));
 
     List<WindowedValue<Integer>> output = tester.extractOutput();
     assertEquals(4, output.size());
@@ -755,7 +798,7 @@ public void testMultipleTimerTypes() throws Exception {
     doAnswer(result)
         .when(trigger)
         .onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any());
-    tester.advanceWatermark(new Instant(1000));
+    tester.advanceInputWatermark(new Instant(1000));
     assertEquals(TriggerResult.FIRE, result.get());
 
     tester.advanceProcessingTime(Instant.now().plus(Duration.millis(10)));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index f5bef0f3da40f..5d119196086b1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -49,6 +49,7 @@
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Function;
+import com.google.common.base.MoreObjects;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Throwables;
 import com.google.common.collect.FluentIterable;
@@ -59,6 +60,8 @@
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -68,29 +71,28 @@
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.PriorityQueue;
 import java.util.Set;
 
+import javax.annotation.Nullable;
+
 /**
  * Test utility that runs a {@link WindowFn}, {@link Trigger} using in-memory stub implementations
  * to provide the {@link TimerInternals} and {@link WindowingInternals} needed to run
  * {@code Trigger}s and {@code ReduceFn}s.
  *
- * <p>To have all interactions between the trigger and underlying components logged, call
- * {@link #logInteractions(boolean)}.
- *
  * @param <InputT> The element types.
  * @param <OutputT> The final type for elements in the window (for instance,
  *     {@code Iterable<InputT>})
  * @param <W> The type of windows being used.
  */
 public class TriggerTester<InputT, OutputT, W extends BoundedWindow> {
+  private static final Logger LOG = LoggerFactory.getLogger(TriggerTester.class);
 
-  private Instant watermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
-  private Instant processingTime = BoundedWindow.TIMESTAMP_MIN_VALUE;
-
-  private final BatchTimerInternals timerInternals = new BatchTimerInternals(processingTime);
+  private final TestInMemoryStateInternals stateInternals = new TestInMemoryStateInternals();
+  private final TestTimerInternals timerInternals = new TestTimerInternals();
   private final WindowFn<Object, W> windowFn;
-  private final StubContexts stubContexts;
+  private final TestWindowingInternals windowingInternals;
   private final Coder<OutputT> outputCoder;
   private final WindowingStrategy<Object, W> objectStrategy;
   private final ReduceFn<String, InputT, OutputT, W> reduceFn;
@@ -114,25 +116,21 @@ public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>
   public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>, W> nonCombining(
       WindowFn<?, W> windowFn, TriggerBuilder<W> trigger, AccumulationMode mode,
       Duration allowedDataLateness) throws Exception {
-
-    WindowingStrategy<?, W> strategy = WindowingStrategy.of(windowFn)
-        .withTrigger(trigger.buildTrigger())
-        .withMode(mode)
-        .withAllowedLateness(allowedDataLateness);
+    WindowingStrategy<?, W> strategy =
+        WindowingStrategy.of(windowFn)
+            .withTrigger(trigger.buildTrigger())
+            .withMode(mode)
+            .withAllowedLateness(allowedDataLateness);
     return nonCombining(strategy);
   }
 
-  public static <W extends BoundedWindow, AccumT, OutputT>
-      TriggerTester<Integer, OutputT, W> combining(
-          WindowFn<?, W> windowFn, Trigger<W> trigger, AccumulationMode mode,
-          KeyedCombineFn<String, Integer, AccumT, OutputT> combineFn,
-          Coder<OutputT> outputCoder,
+  public static <W extends BoundedWindow, AccumT, OutputT> TriggerTester<Integer, OutputT, W>
+      combining(WindowFn<?, W> windowFn, Trigger<W> trigger, AccumulationMode mode,
+          KeyedCombineFn<String, Integer, AccumT, OutputT> combineFn, Coder<OutputT> outputCoder,
           Duration allowedDataLateness) throws Exception {
-
-    WindowingStrategy<?, W> strategy = WindowingStrategy.of(windowFn)
-        .withTrigger(trigger)
-        .withMode(mode)
-        .withAllowedLateness(allowedDataLateness);
+    WindowingStrategy<?, W> strategy =
+        WindowingStrategy.of(windowFn).withTrigger(trigger).withMode(mode).withAllowedLateness(
+            allowedDataLateness);
 
     CoderRegistry registry = new CoderRegistry();
     registry.registerStandardCoders();
@@ -142,29 +140,26 @@ TriggerTester<Integer, OutputT, W> combining(
 
     return new TriggerTester<Integer, OutputT, W>(
         strategy,
-        SystemReduceFn.<String, Integer, AccumT, OutputT, W>combining(
-            StringUtf8Coder.of(), fn).create(KEY),
+        SystemReduceFn.<String, Integer, AccumT, OutputT, W>combining(StringUtf8Coder.of(), fn)
+            .create(KEY),
         outputCoder);
   }
 
-  private TriggerTester(
-      WindowingStrategy<?, W> wildcardStrategy,
-      ReduceFn<String, InputT, OutputT, W> reduceFn,
-      Coder<OutputT> outputCoder) throws Exception {
+  private TriggerTester(WindowingStrategy<?, W> wildcardStrategy,
+      ReduceFn<String, InputT, OutputT, W> reduceFn, Coder<OutputT> outputCoder) throws Exception {
     @SuppressWarnings("unchecked")
     WindowingStrategy<Object, W> objectStrategy = (WindowingStrategy<Object, W>) wildcardStrategy;
 
     this.objectStrategy = objectStrategy;
     this.reduceFn = reduceFn;
     this.windowFn = objectStrategy.getWindowFn();
-    this.stubContexts = new StubContexts();
+    this.windowingInternals = new TestWindowingInternals();
     this.outputCoder = outputCoder;
     executableTrigger = wildcardStrategy.getTrigger();
   }
 
   ReduceFnRunner<String, InputT, OutputT, W> createRunner() {
-    return new ReduceFnRunner<>(
-        KEY, objectStrategy, timerInternals, stubContexts,
+    return new ReduceFnRunner<>(KEY, objectStrategy, timerInternals, windowingInternals,
         droppedDueToClosedWindow, droppedDueToLateness, reduceFn);
   }
 
@@ -187,11 +182,9 @@ public final void assertHasOnlyGlobalAndFinishedSetsFor(W... expectedWindows) {
   public final void assertHasOnlyGlobalAndFinishedSetsAndPaneInfoFor(W... expectedWindows) {
     assertHasOnlyGlobalAndAllowedTags(
         ImmutableSet.copyOf(expectedWindows),
-        ImmutableSet.<StateTag<?>>of(
-            TriggerRunner.FINISHED_BITS_TAG,
-            PaneInfoTracker.PANE_INFO_TAG,
-            WatermarkHold.watermarkHoldTagForOutputTimeFn(
-                objectStrategy.getOutputTimeFn())));
+        ImmutableSet.<StateTag<?>>of(TriggerRunner.FINISHED_BITS_TAG, PaneInfoTracker.PANE_INFO_TAG,
+            WatermarkHold.watermarkHoldTagForOutputTimeFn(objectStrategy.getOutputTimeFn()),
+            WatermarkHold.EXTRA_HOLD_TAG));
   }
 
   public final void assertHasOnlyGlobalState() {
@@ -205,8 +198,8 @@ public final void assertHasOnlyGlobalAndPaneInfoFor(W... expectedWindows) {
         ImmutableSet.copyOf(expectedWindows),
         ImmutableSet.<StateTag<?>>of(
             PaneInfoTracker.PANE_INFO_TAG,
-            WatermarkHold.watermarkHoldTagForOutputTimeFn(
-                objectStrategy.getOutputTimeFn())));
+            WatermarkHold.watermarkHoldTagForOutputTimeFn(objectStrategy.getOutputTimeFn()),
+            WatermarkHold.EXTRA_HOLD_TAG));
   }
 
   /**
@@ -221,11 +214,11 @@ private void assertHasOnlyGlobalAndAllowedTags(
     }
     Set<StateNamespace> actualWindows = new HashSet<>();
 
-    for (StateNamespace namespace : stubContexts.state.getNamespacesInUse()) {
+    for (StateNamespace namespace : stateInternals.getNamespacesInUse()) {
       if (namespace instanceof StateNamespaces.GlobalNamespace) {
         continue;
       } else if (namespace instanceof StateNamespaces.WindowNamespace) {
-        Set<StateTag<?>> tagsInUse = stubContexts.state.getTagsInUse(namespace);
+        Set<StateTag<?>> tagsInUse = stateInternals.getTagsInUse(namespace);
         if (tagsInUse.isEmpty()) {
           continue;
         }
@@ -237,7 +230,7 @@ private void assertHasOnlyGlobalAndAllowedTags(
           fail(namespace + " has unexpected states: " + tagsInUse);
         }
       } else if (namespace instanceof StateNamespaces.WindowAndTriggerNamespace) {
-        Set<StateTag<?>> tagsInUse = stubContexts.state.getTagsInUse(namespace);
+        Set<StateTag<?>> tagsInUse = stateInternals.getTagsInUse(namespace);
         assertTrue(namespace + " contains " + tagsInUse, tagsInUse.isEmpty());
       } else {
         fail("Unrecognized namespace " + namespace);
@@ -252,7 +245,11 @@ private StateNamespace windowNamespace(W window) {
   }
 
   public Instant getWatermarkHold() {
-    return stubContexts.state.minimumWatermarkHold();
+    return stateInternals.earliestWatermarkHold();
+  }
+
+  public Instant getOutputWatermark() {
+    return timerInternals.currentOutputWatermarkTime();
   }
 
   public long getElementsDroppedDueToClosedWindow() {
@@ -263,40 +260,42 @@ public long getElementsDroppedDueToLateness() {
     return droppedDueToLateness.getSum();
   }
 
+  /**
+   * How many panes do we have in the output?
+   */
+  public int getOutputSize() {
+    return windowingInternals.outputs.size();
+  }
+
   /**
    * Retrieve the values that have been output to this time, and clear out the output accumulator.
    */
   public List<WindowedValue<OutputT>> extractOutput() {
-    ImmutableList<WindowedValue<OutputT>> result = FluentIterable.from(stubContexts.outputs)
-        .transform(new Function<WindowedValue<KV<String, OutputT>>, WindowedValue<OutputT>>() {
-          @Override
-          public WindowedValue<OutputT> apply(WindowedValue<KV<String, OutputT>> input) {
-            return input.withValue(input.getValue().getValue());
-          }
-        })
-        .toList();
-    stubContexts.outputs.clear();
+    ImmutableList<WindowedValue<OutputT>> result =
+        FluentIterable.from(windowingInternals.outputs)
+            .transform(new Function<WindowedValue<KV<String, OutputT>>, WindowedValue<OutputT>>() {
+              @Override
+              public WindowedValue<OutputT> apply(WindowedValue<KV<String, OutputT>> input) {
+                return input.withValue(input.getValue().getValue());
+              }
+            })
+            .toList();
+    windowingInternals.outputs.clear();
     return result;
   }
 
-  /** Advance the watermark to the specified time, firing any timers that should fire. */
-  public void advanceWatermark(Instant newWatermark) throws Exception {
-    Preconditions.checkState(!newWatermark.isBefore(watermark),
-        "Cannot move watermark time backwards from %s to %s",
-        watermark.getMillis(), newWatermark.getMillis());
-    watermark = newWatermark;
+  /**
+   * Advance the input watermark to the specified time, firing any timers that should
+   * fire. Then advance the output watermark as far as possible.
+   */
+  public void advanceInputWatermark(Instant newInputWatermark) throws Exception {
     ReduceFnRunner<String, InputT, OutputT, W> runner = createRunner();
-    timerInternals.advanceWatermark(runner, newWatermark);
+    timerInternals.advanceInputWatermark(runner, newInputWatermark);
     runner.persist();
   }
 
   /** Advance the processing time to the specified time, firing any timers that should fire. */
-  public void advanceProcessingTime(
-      Instant newProcessingTime) throws Exception {
-    Preconditions.checkState(!newProcessingTime.isBefore(processingTime),
-        "Cannot move processing time backwards from %s to %s",
-        processingTime.getMillis(), newProcessingTime.getMillis());
-    processingTime = newProcessingTime;
+  public void advanceProcessingTime(Instant newProcessingTime) throws Exception {
     ReduceFnRunner<String, InputT, OutputT, W> runner = createRunner();
     timerInternals.advanceProcessingTime(runner, newProcessingTime);
     runner.persist();
@@ -308,6 +307,9 @@ public void advanceProcessingTime(
    */
   @SafeVarargs
   public final void injectElements(TimestampedValue<InputT>... values) throws Exception {
+    for (TimestampedValue<InputT> value : values) {
+      WindowTracing.trace("TriggerTester.injectElements: {}", value);
+    }
     ReduceFnRunner<String, InputT, OutputT, W> runner = createRunner();
     runner.processElements(Iterables.transform(
         Arrays.asList(values), new Function<TimestampedValue<InputT>, WindowedValue<InputT>>() {
@@ -316,7 +318,7 @@ public WindowedValue<InputT> apply(TimestampedValue<InputT> input) {
             try {
               InputT value = input.getValue();
               Instant timestamp = input.getTimestamp();
-              Collection<W> windows = windowFn.assignWindows(new TriggerTester.StubAssignContext<W>(
+              Collection<W> windows = windowFn.assignWindows(new TriggerTester.TestAssignContext<W>(
                   windowFn, value, timestamp, Arrays.asList(GlobalWindow.INSTANCE)));
               return WindowedValue.of(value, timestamp, windows, PaneInfo.NO_FIRING);
             } catch (Exception e) {
@@ -331,14 +333,16 @@ public WindowedValue<InputT> apply(TimestampedValue<InputT> input) {
 
   public void fireTimer(W window, Instant timestamp, TimeDomain domain) {
     ReduceFnRunner<String, InputT, OutputT, W> runner = createRunner();
-    runner.onTimer(TimerData.of(
-        StateNamespaces.window(windowFn.windowCoder(), window), timestamp, domain));
+    runner.onTimer(
+        TimerData.of(StateNamespaces.window(windowFn.windowCoder(), window), timestamp, domain));
     runner.persist();
   }
 
-  private static class TestingInMemoryStateInternals extends InMemoryStateInternals {
-
-    protected Set<StateTag<?>> getTagsInUse(StateNamespace namespace) {
+  /**
+   * Simulate state.
+   */
+  private static class TestInMemoryStateInternals extends InMemoryStateInternals {
+    public Set<StateTag<?>> getTagsInUse(StateNamespace namespace) {
       Set<StateTag<?>> inUse = new HashSet<>();
       for (Map.Entry<StateTag<?>, State> entry : inMemoryState.getTagsInUse(namespace).entrySet()) {
         if (!isEmptyForTesting(entry.getValue())) {
@@ -352,7 +356,8 @@ public Set<StateNamespace> getNamespacesInUse() {
       return inMemoryState.getNamespacesInUse();
     }
 
-    public Instant minimumWatermarkHold() {
+    /** Return the earliest output watermark hold in state, or null if none. */
+    public Instant earliestWatermarkHold() {
       Instant minimum = null;
       for (State storage : inMemoryState.values()) {
         if (storage instanceof WatermarkStateInternal) {
@@ -366,10 +371,11 @@ public Instant minimumWatermarkHold() {
     }
   }
 
-  private class StubContexts implements WindowingInternals<InputT, KV<String, OutputT>> {
-
-    private TestingInMemoryStateInternals state = new TestingInMemoryStateInternals();
-
+  /**
+   * Convey the simulated state and implement {@link #outputWindowedValue} to capture all output
+   * elements.
+   */
+  private class TestWindowingInternals implements WindowingInternals<InputT, KV<String, OutputT>> {
     private List<WindowedValue<KV<String, OutputT>>> outputs = new ArrayList<>();
 
     @Override
@@ -385,7 +391,7 @@ public void outputWindowedValue(KV<String, OutputT> output, Instant timestamp,
     @Override
     public TimerInternals timerInternals() {
       throw new UnsupportedOperationException(
-          "getTimerInternals() should not be called on StubContexts.");
+          "Testing triggers should not use timers from WindowingInternals.");
     }
 
     @Override
@@ -399,27 +405,28 @@ public PaneInfo pane() {
       throw new UnsupportedOperationException(
           "Testing triggers should not use pane from WindowingInternals.");
     }
+
     @Override
-    public <T> void writePCollectionViewData(TupleTag<?> tag, Iterable<WindowedValue<T>> data,
-        Coder<T> elemCoder) throws IOException {
+    public <T> void writePCollectionViewData(
+        TupleTag<?> tag, Iterable<WindowedValue<T>> data, Coder<T> elemCoder) throws IOException {
       throw new UnsupportedOperationException(
           "Testing triggers should not use writePCollectionViewData from WindowingInternals.");
     }
 
     @Override
     public StateInternals stateInternals() {
-      return state;
+      return stateInternals;
     }
   }
 
-  private static class StubAssignContext<W extends BoundedWindow>
+  private static class TestAssignContext<W extends BoundedWindow>
       extends WindowFn<Object, W>.AssignContext {
     private Object element;
     private Instant timestamp;
     private Collection<? extends BoundedWindow> windows;
 
-    public StubAssignContext(WindowFn<Object, W> windowFn,
-        Object element, Instant timestamp, Collection<? extends BoundedWindow> windows) {
+    public TestAssignContext(WindowFn<Object, W> windowFn, Object element, Instant timestamp,
+        Collection<? extends BoundedWindow> windows) {
       windowFn.super();
       this.element = element;
       this.timestamp = timestamp;
@@ -443,7 +450,6 @@ public Collection<? extends BoundedWindow> windows() {
   }
 
   private static class InMemoryLongSumAggregator implements Aggregator<Long, Long> {
-
     private final String name;
     private long sum = 0;
 
@@ -470,4 +476,148 @@ public long getSum() {
       return sum;
     }
   }
+
+  /**
+   * Simulate the firing of timers and progression of input and output watermarks for a
+   * single computation and key in a Windmill-like streaming environment. Similar to
+   * {@link BatchTimerInternals}, but also tracks the output watermark.
+   */
+  private class TestTimerInternals implements TimerInternals {
+    /** At most one timer per timestamp is kept. */
+    private Set<TimerData> existingTimers = new HashSet<>();
+
+    /** Pending input watermark timers, in timestamp order. */
+    private PriorityQueue<TimerData> watermarkTimers = new PriorityQueue<>(11);
+
+    /** Pending processing time timers, in timestamp order. */
+    private PriorityQueue<TimerData> processingTimers = new PriorityQueue<>(11);
+
+    /** Current input watermark. */
+    @Nullable
+    private Instant inputWatermarkTime = null;
+
+    /** Current output watermark. */
+    @Nullable
+    private Instant outputWatermarkTime = null;
+
+    /** Current processing time. */
+    private Instant processingTime = BoundedWindow.TIMESTAMP_MIN_VALUE;
+
+    private PriorityQueue<TimerData> queue(TimeDomain domain) {
+      return TimeDomain.EVENT_TIME.equals(domain) ? watermarkTimers : processingTimers;
+    }
+
+    @Override
+    public void setTimer(TimerData timer) {
+      WindowTracing.trace("TestTimerInternals.setTimer: {}", timer);
+      if (existingTimers.add(timer)) {
+        queue(timer.getDomain()).add(timer);
+      }
+    }
+
+    @Override
+    public void deleteTimer(TimerData timer) {
+      WindowTracing.trace("TestTimerInternals.deleteTimer: {}", timer);
+      existingTimers.remove(timer);
+      queue(timer.getDomain()).remove(timer);
+    }
+
+    @Override
+    public Instant currentProcessingTime() {
+      return processingTime;
+    }
+
+    @Override
+    @Nullable
+    public Instant currentInputWatermarkTime() {
+      return inputWatermarkTime;
+    }
+
+    @Override
+    @Nullable
+    public Instant currentOutputWatermarkTime() {
+      return outputWatermarkTime;
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(getClass())
+          .add("watermarkTimers", watermarkTimers)
+          .add("processingTimers", processingTimers)
+          .add("inputWatermarkTime", inputWatermarkTime)
+          .add("outputWatermarkTime", outputWatermarkTime)
+          .add("processingTime", processingTime)
+          .toString();
+    }
+
+    public void advanceInputWatermark(
+        ReduceFnRunner<?, ?, ?, ?> runner, Instant newInputWatermark) {
+      Preconditions.checkNotNull(newInputWatermark);
+      Preconditions.checkState(
+          inputWatermarkTime == null || !newInputWatermark.isBefore(inputWatermarkTime),
+          "Cannot move input watermark time backwards from %s to %s", inputWatermarkTime,
+          newInputWatermark);
+      WindowTracing.trace("TestTimerInternals.advanceInputWatermark: from {} to {}",
+          inputWatermarkTime, newInputWatermark);
+      inputWatermarkTime = newInputWatermark;
+      advanceAndFire(runner, newInputWatermark, TimeDomain.EVENT_TIME);
+
+      Instant hold = stateInternals.earliestWatermarkHold();
+      if (hold == null) {
+        WindowTracing.trace("TestTimerInternals.advanceInputWatermark: no holds, "
+            + "so output watermark = input watermark");
+        hold = inputWatermarkTime;
+      }
+      advanceOutputWatermark(hold);
+    }
+
+    private void advanceOutputWatermark(Instant newOutputWatermark) {
+      Preconditions.checkNotNull(newOutputWatermark);
+      Preconditions.checkNotNull(inputWatermarkTime);
+      if (newOutputWatermark.isAfter(inputWatermarkTime)) {
+        WindowTracing.trace(
+            "TestTimerInternals.advanceOutputWatermark: clipping output watermark from {} to {}",
+            newOutputWatermark, inputWatermarkTime);
+        newOutputWatermark = inputWatermarkTime;
+      }
+      Preconditions.checkState(
+          outputWatermarkTime == null || !newOutputWatermark.isBefore(outputWatermarkTime),
+          "Cannot move output watermark time backwards from %s to %s", outputWatermarkTime,
+          newOutputWatermark);
+      WindowTracing.trace("TestTimerInternals.advanceOutputWatermark: from {} to {}",
+          outputWatermarkTime, newOutputWatermark);
+      outputWatermarkTime = newOutputWatermark;
+    }
+
+    public void advanceProcessingTime(
+        ReduceFnRunner<?, ?, ?, ?> runner, Instant newProcessingTime) {
+      Preconditions.checkState(!newProcessingTime.isBefore(processingTime),
+          "Cannot move processing time backwards from %s to %s", processingTime, newProcessingTime);
+      WindowTracing.trace("TestTimerInternals.advanceProcessingTime: from {} to {}", processingTime,
+          newProcessingTime);
+      processingTime = newProcessingTime;
+      advanceAndFire(runner, newProcessingTime, TimeDomain.PROCESSING_TIME);
+    }
+
+    private void advanceAndFire(
+        ReduceFnRunner<?, ?, ?, ?> runner, Instant currentTime, TimeDomain domain) {
+      PriorityQueue<TimerData> queue = queue(domain);
+      boolean shouldFire = false;
+
+      do {
+        TimerData timer = queue.peek();
+        // Timers fire when the current time progresses past the timer time.
+        shouldFire = timer != null && currentTime.isAfter(timer.getTimestamp());
+        if (shouldFire) {
+          WindowTracing.trace(
+              "TestTimerInternals.advanceAndFire: firing {} at {}", timer, currentTime);
+          // Remove before firing, so that if the trigger adds another identical
+          // timer we don't remove it.
+          queue.remove();
+
+          runner.onTimer(timer);
+        }
+      } while (shouldFire);
+    }
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateTagTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateTagTest.java
index 9b0fed7d11b51..fa3c82634a943 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateTagTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateTagTest.java
@@ -72,9 +72,12 @@ public void testWatermarkBagEquality() {
     StateTag<?> bar2 = StateTags.watermarkStateInternal(
         "bar", OutputTimeFns.outputAtLatestInputTimestamp());
 
+    // Same id, same fn.
     assertEquals(foo1, foo2);
+    // Different id, same fn.
     assertNotEquals(foo1, bar);
-    assertNotEquals(bar, bar2);
+    // Same id, different fn.
+    assertEquals(bar, bar2);
   }
 
   @Test

From 79b427495958a6ea435f818a1c568bce26e2443e Mon Sep 17 00:00:00 2001
From: markshields <markshields@google.com>
Date: Wed, 9 Dec 2015 10:50:41 -0800
Subject: [PATCH 1219/1541] Use 8MB page sizes to match minimums used elsewhere

Eg: GCS recommends 8MB minimum buffer, so if you're
streaming data from a GBK Iterable<T> into GCS seems
best to have our buffer at least in the same ballpack.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109809534
---
 .../dataflow/sdk/runners/worker/WindmillStateReader.java  | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
index 88390e2b37664..fb0421a703757 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
@@ -30,8 +30,6 @@
 import com.google.protobuf.ByteString;
 
 import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -60,7 +58,7 @@ class WindmillStateReader {
    * Ideal maximum bytes in a TagList response. However, Windmill will always return
    * at least one value if possible irrespective of this limit.
    */
-  public static final long MAX_LIST_BYTES = 1L << 20; // 1MB
+  public static final long MAX_LIST_BYTES = 8L << 20; // 8MB
 
   /**
    * When combined with a key and computationId, represents the unique address for
@@ -144,8 +142,6 @@ public ValuesAndContToken(List<T> values, @Nullable ByteString continuationToken
     }
   }
 
-  private static final Logger LOG = LoggerFactory.getLogger(WindmillStateReader.class);
-
   private final String computation;
   private final ByteString key;
   private final long workToken;
@@ -594,7 +590,6 @@ private TagListPagingIterable(List<T> firstPage, StateTag secondPageCont, Coder<
     @Override
     public Iterator<T> iterator() {
       return new AbstractIterator<T>() {
-        private int numPagesRead = 1;
         private Iterator<T> currentPage = firstPage.iterator();
         private StateTag nextPageCont = secondPageCont;
         private Future<ValuesAndContToken<T>> pendingNextPage =
@@ -613,7 +608,6 @@ protected T computeNext() {
             ValuesAndContToken<T> valuesAndContToken;
             try {
               valuesAndContToken = pendingNextPage.get();
-              numPagesRead++;
             } catch (InterruptedException | ExecutionException e) {
               throw new RuntimeException("Unable to read value from state", e);
             }

From 40e53d701722285d0e702291995f5842d252cfa5 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Wed, 9 Dec 2015 15:17:51 -0800
Subject: [PATCH 1220/1541] Updates google-api-services-dataflow dependency

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109838205
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 099352d7cf0b9..734eaf8775291 100644
--- a/pom.xml
+++ b/pom.xml
@@ -69,7 +69,7 @@
     <!-- If updating dependencies, please update any relevant javadoc offlineLinks -->
     <avro.version>1.7.7</avro.version>
     <bigquery.version>v2-rev248-1.21.0</bigquery.version>
-    <dataflow.version>v1b3-rev12-1.21.0</dataflow.version>
+    <dataflow.version>v1b3-rev13-1.21.0</dataflow.version>
     <datastore.version>v1beta2-rev1-3.0.2</datastore.version>
     <google-clients.version>1.21.0</google-clients.version>
     <guava.version>18.0</guava.version>

From 1afaf46c93b5d2e654a4d44ee94260e53ad374a1 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 9 Dec 2015 19:23:26 -0800
Subject: [PATCH 1221/1541] Return a future for WindmillState.persist()

Previously, WindmillState.persist() was a purely local
operation. Recent additions made it necessary to make
remote calls, which could be serialized in unfortunate
ways.

This change makes the need for those calls to be
asynchronous explicit, by making them into Futures.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109857899
---
 .../worker/WindmillStateInternals.java        | 111 +++++++++++++-----
 1 file changed, 81 insertions(+), 30 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
index 9f79ad6463839..63a0256e072da 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
@@ -17,6 +17,7 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.WorkItemCommitRequest;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
@@ -34,6 +35,7 @@
 import com.google.cloud.dataflow.sdk.util.state.ValueState;
 import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Function;
 import com.google.common.base.Supplier;
 import com.google.common.collect.Iterables;
 import com.google.common.util.concurrent.Futures;
@@ -146,6 +148,8 @@ public WindmillStateInternals(String prefix, boolean useStateFamilies,
   }
 
   public void persist(final Windmill.WorkItemCommitRequest.Builder commitBuilder) {
+    List<Future<WorkItemCommitRequest>> commitsToMerge = new ArrayList<>();
+
     // Call persist on each first, which may schedule some futures for reading.
     for (State location : inMemoryState.values()) {
       if (!(location instanceof WindmillState)) {
@@ -156,7 +160,7 @@ public void persist(final Windmill.WorkItemCommitRequest.Builder commitBuilder)
       }
 
       try {
-        ((WindmillState) location).persist(commitBuilder);
+        commitsToMerge.add(((WindmillState) location).persist());
       } catch (IOException e) {
         throw new RuntimeException("Unable to persist state", e);
       }
@@ -168,6 +172,14 @@ public void persist(final Windmill.WorkItemCommitRequest.Builder commitBuilder)
 
     // Clear out the map of already retrieved state instances.
     inMemoryState.clear();
+
+    try {
+      for (Future<WorkItemCommitRequest> commitFuture : commitsToMerge) {
+        commitBuilder.mergeFrom(commitFuture.get());
+      }
+    } catch (ExecutionException | InterruptedException exc) {
+      throw new RuntimeException("Failed to retrieve Windmill state during persist()", exc);
+    }
   }
 
   private ByteString encodeKey(StateNamespace namespace, StateTag<?> address) {
@@ -185,8 +197,34 @@ private ByteString encodeKey(StateNamespace namespace, StateTag<?> address) {
     }
   }
 
+  /**
+   * Anything that can provide a {@link WorkItemCommitRequest} to persist its state; it may need
+   * to read some state in order to build the commit request.
+   */
   private interface WindmillState {
-    void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) throws IOException;
+    /**
+     * Return an asynchronously computed {@link WorkItemCommitRequest}. The request should
+     * be of a form that can be merged with others (only add to repeated fields).
+     */
+    Future<WorkItemCommitRequest> persist()
+        throws IOException;
+  }
+
+  /**
+   * Base class for implementations of {@link WindmillState} where the {@link #persist} call does
+   * not require any asynchronous reading.
+   */
+  private abstract static class SimpleWindmillState implements WindmillState {
+    @Override
+    public final Future<WorkItemCommitRequest> persist() throws IOException{
+      return Futures.immediateFuture(persistDirectly());
+    }
+
+    /**
+     * Returns a {@link WorkItemCommitRequest} that can be used to persist this state to
+     * Windmill.
+     */
+    protected abstract WorkItemCommitRequest persistDirectly() throws IOException;
   }
 
   @Override
@@ -194,7 +232,8 @@ public <T extends State> T state(StateNamespace namespace, StateTag<T> address)
     return inMemoryState.get(namespace, address);
   }
 
-  private static class WindmillValue<T> implements ValueState<T>, WindmillState {
+  private static class WindmillValue<T> extends SimpleWindmillState
+      implements ValueState<T>, WindmillState {
 
     private final ByteString stateKey;
     private final String stateFamily;
@@ -244,11 +283,10 @@ public void set(T value) {
     }
 
     @Override
-    public void persist(
-        Windmill.WorkItemCommitRequest.Builder commitBuilder) throws IOException {
+    protected WorkItemCommitRequest persistDirectly() throws IOException {
       if (!modified) {
         // No in-memory changes.
-        return;
+        return WorkItemCommitRequest.newBuilder().buildPartial();
       }
 
       // We can't write without doing a read, so we need to kick off a read if we get here.
@@ -261,6 +299,7 @@ public void persist(
         coder.encode(modifiedValue, stream, Coder.Context.OUTER);
       }
 
+      WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
       commitBuilder
           .addValueUpdatesBuilder()
           .setTag(stateKey)
@@ -268,10 +307,12 @@ public void persist(
           .getValueBuilder()
           .setData(stream.toByteString())
           .setTimestamp(Long.MAX_VALUE);
+      return commitBuilder.buildPartial();
     }
   }
 
-  private static class WindmillBag<T> implements BagState<T>, WindmillState {
+  private static class WindmillBag<T> extends SimpleWindmillState
+      implements BagState<T>, WindmillState {
 
     private final ByteString stateKey;
     private final String stateFamily;
@@ -351,7 +392,9 @@ public void add(T input) {
     }
 
     @Override
-    public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) throws IOException {
+    public WorkItemCommitRequest persistDirectly() throws IOException {
+      WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
+
       if (cleared) {
         // If we do a delete, we need to have done a read to prevent Windmill complaining about
         // blind deletes. We use the underlying reader, because we normally skip the actual read
@@ -363,7 +406,6 @@ public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) throws
             .setEndTimestamp(Long.MAX_VALUE);
       }
 
-
       if (!localAdditions.isEmpty()) {
         byte[] zero = {0x0};
         Windmill.TagList.Builder listUpdatesBuilder =
@@ -382,6 +424,7 @@ public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) throws
               .setTimestamp(Long.MAX_VALUE);
         }
       }
+      return commitBuilder.buildPartial();
     }
   }
 
@@ -478,27 +521,30 @@ public void add(Instant outputTime) {
     }
 
     @Override
-    public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) {
+    public Future<WorkItemCommitRequest> persist() {
       if (!cleared && localAdditions == null) {
         // Nothing to do
-        return;
+        return Futures.immediateFuture(WorkItemCommitRequest.newBuilder().buildPartial());
       } else if (cleared && localAdditions == null) {
         // Just clearing the persisted state; blind delete
+        WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
         commitBuilder.addWatermarkHoldsBuilder()
             .setTag(stateKey)
             .setStateFamily(stateFamily)
             .setReset(true);
-
+        return Futures.immediateFuture(commitBuilder.buildPartial());
       } else if (cleared && localAdditions != null) {
         // Since we cleared before adding, we can do a blind overwrite of persisted state
+        WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
         commitBuilder.addWatermarkHoldsBuilder()
             .setTag(stateKey)
             .setStateFamily(stateFamily)
             .setReset(true)
             .addTimestamps(WindmillTimeUtils.harnessToWindmillTimestamp(localAdditions));
+        return Futures.immediateFuture(commitBuilder.buildPartial());
       } else if (!cleared && localAdditions != null){
         // Otherwise, we need to combine the local additions with the already persisted data
-        combineWithPersisted(commitBuilder);
+        return combineWithPersisted();
       } else {
         throw new IllegalStateException("Unreachable condition");
       }
@@ -508,7 +554,7 @@ public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) {
      * Combines local additions with persisted data and mutates the {@code commitBuilder}
      * to write the result.
      */
-    private void combineWithPersisted(Windmill.WorkItemCommitRequest.Builder commitBuilder) {
+    private Future<WorkItemCommitRequest> combineWithPersisted() {
       boolean windmillCanCombine = false;
 
       // If the combined output time depends only on the window, then we are just blindly adding
@@ -525,29 +571,34 @@ private void combineWithPersisted(Windmill.WorkItemCommitRequest.Builder commitB
 
       if (windmillCanCombine) {
         // We do a blind write and let Windmill take the MIN
+        WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
         commitBuilder.addWatermarkHoldsBuilder()
             .setTag(stateKey)
             .setStateFamily(stateFamily)
             .addTimestamps(
                 WindmillTimeUtils.harnessToWindmillTimestamp(localAdditions));
+        return Futures.immediateFuture(commitBuilder.buildPartial());
       } else {
         // The non-fast path does a read-modify-write
-        Instant priorHold;
-        try {
-          priorHold = reader.watermarkFuture(stateKey, stateFamily).get();
-        } catch (InterruptedException | ExecutionException e) {
-          throw new RuntimeException("Unable to read state", e);
-        }
+        return Futures.lazyTransform(reader.watermarkFuture(stateKey, stateFamily),
+            new Function<Instant, WorkItemCommitRequest>() {
 
-        Instant combinedHold = (priorHold == null) ? localAdditions
-            : outputTimeFn.combine(priorHold, localAdditions);
+          @Override
+          public WorkItemCommitRequest apply(Instant priorHold) {
 
-        commitBuilder.addWatermarkHoldsBuilder()
-            .setTag(stateKey)
-            .setStateFamily(stateFamily)
-            .setReset(true)
-            .addTimestamps(
-                WindmillTimeUtils.harnessToWindmillTimestamp(combinedHold));
+            Instant combinedHold = (priorHold == null) ? localAdditions
+                : outputTimeFn.combine(priorHold, localAdditions);
+
+            WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
+            commitBuilder.addWatermarkHoldsBuilder()
+                .setTag(stateKey)
+                .setStateFamily(stateFamily)
+                .setReset(true)
+                .addTimestamps(WindmillTimeUtils.harnessToWindmillTimestamp(combinedHold));
+
+            return commitBuilder.buildPartial();
+          }
+        });
       }
     }
   }
@@ -599,7 +650,7 @@ public void clear() {
     }
 
     @Override
-    public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) throws IOException {
+    public Future<WorkItemCommitRequest> persist() throws IOException {
       if (hasLocalAdditions) {
         // TODO: Take into account whether it's in the cache.
         if (COMPACT_NOW.get().get()) {
@@ -610,7 +661,7 @@ public void persist(Windmill.WorkItemCommitRequest.Builder commitBuilder) throws
         localAdditionsAccum = combineFn.createAccumulator();
         hasLocalAdditions = false;
       }
-      bag.persist(commitBuilder);
+      return bag.persist();
     }
 
     @Override

From 8dff8fb53c1ea14a1e4458946fee8772c20c9cdb Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Thu, 10 Dec 2015 10:43:22 -0800
Subject: [PATCH 1222/1541] Minor javadoc updates.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109913802
---
 .../cloud/dataflow/sdk/transforms/Create.java | 40 +++++++++++--------
 .../dataflow/sdk/transforms/Flatten.java      |  4 +-
 .../dataflow/sdk/transforms/GroupByKey.java   | 19 +++++----
 3 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
index 556f3010d13ef..a74e5bff7f65a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
@@ -94,8 +94,9 @@ public class Create<T> {
    * elements.
    *
    * <p>By default, {@code Create.Values} can automatically determine the {@code Coder} to use
-   * if all elements have the same run-time class, and a default coder is registered for that
-   * class. See {@link CoderRegistry} for details on how defaults are determined.
+   * if all elements have the same non-parameterized run-time class, and a default coder is
+   * registered for that class. See {@link CoderRegistry} for details on how defaults are
+   * determined.
    * Otherwise, use {@link Create.Values#withCoder} to set the coder explicitly.
    */
   public static <T> Values<T> of(Iterable<T> elems) {
@@ -110,11 +111,12 @@ public static <T> Values<T> of(Iterable<T> elems) {
    * {@link Create#timestamped} for a way of creating a {@code PCollection}
    * with timestamped elements.
    *
-   * <p>The argument should not be modified after this is called.
+   * <p>The arguments should not be modified after this is called.
    *
    * <p>By default, {@code Create.Values} can automatically determine the {@code Coder} to use
-   * if all elements have the same run-time class, and a default coder is registered for that
-   * class. See {@link CoderRegistry} for details on how defaults are determined.
+   * if all elements have the same non-parameterized run-time class, and a default coder is
+   * registered for that class. See {@link CoderRegistry} for details on how defaults are
+   * determined.
    * Otherwise, use {@link Create.Values#withCoder} to set the coder explicitly.
    */
   @SafeVarargs
@@ -132,8 +134,9 @@ public static <T> Values<T> of(T... elems) {
    * with timestamped elements.
    *
    * <p>By default, {@code Create.Values} can automatically determine the {@code Coder} to use
-   * if all elements have the same run-time class, and a default coder is registered for that
-   * class. See {@link CoderRegistry} for details on how defaults are determined.
+   * if all elements have the same non-parameterized run-time class, and a default coder is
+   * registered for that class. See {@link CoderRegistry} for details on how defaults are
+   * determined.
    * Otherwise, use {@link Create.Values#withCoder} to set the coder explicitly.
    */
   public static <K, V> Values<KV<K, V>> of(Map<K, V> elems) {
@@ -152,8 +155,9 @@ public static <K, V> Values<KV<K, V>> of(Map<K, V> elems) {
    * <p>The argument should not be modified after this is called.
    *
    * <p>By default, {@code Create.TimestampedValues} can automatically determine the {@code Coder}
-   * to use if all elements have the same run-time class, and a default coder is registered for
-   * that class. See {@link CoderRegistry} for details on how defaults are determined.
+   * to use if all elements have the same non-parameterized run-time class, and a default coder is
+   * registered for that class. See {@link CoderRegistry} for details on how defaults are
+   * determined.
    * Otherwise, use {@link Create.TimestampedValues#withCoder} to set the coder explicitly.
    */
   public static <T> TimestampedValues<T> timestamped(Iterable<TimestampedValue<T>> elems) {
@@ -164,7 +168,7 @@ public static <T> TimestampedValues<T> timestamped(Iterable<TimestampedValue<T>>
    * Returns a new {@link Create.TimestampedValues} transform that produces a {@link PCollection}
    * containing the specified elements with the specified timestamps.
    *
-   * <p>The argument should not be modified after this is called.
+   * <p>The arguments should not be modified after this is called.
    */
   @SafeVarargs
   public static <T> TimestampedValues<T> timestamped(
@@ -179,8 +183,9 @@ public static <T> TimestampedValues<T> timestamped(
    * <p>The arguments should not be modified after this is called.
    *
    * <p>By default, {@code Create.TimestampedValues} can automatically determine the {@code Coder}
-   * to use if all elements have the same run-time class, and a default coder is registered for
-   * that class. See {@link CoderRegistry} for details on how defaults are determined.
+   * to use if all elements have the same non-parameterized run-time class, and a default coder
+   * is registered for that class. See {@link CoderRegistry} for details on how defaults are
+   * determined.
    * Otherwise, use {@link Create.TimestampedValues#withCoder} to set the coder explicitly.
 
    * @throws IllegalArgumentException if there are a different number of values
@@ -212,8 +217,9 @@ public static class Values<T> extends PTransform<PInput, PCollection<T>> {
      * value of type {@code T}.
      *
      * <p>By default, {@code Create.Values} can automatically determine the {@code Coder} to use
-     * if all elements have the same run-time class, and a default coder is registered for that
-     * class. See {@link CoderRegistry} for details on how defaults are determined.
+     * if all elements have the same non-parameterized run-time class, and a default coder is
+     * registered for that class. See {@link CoderRegistry} for details on how defaults are
+     * determined.
      *
      * <p>Note that for {@link Create.Values} with no elements, the {@link VoidCoder} is used.
      */
@@ -327,9 +333,9 @@ public static class TimestampedValues<T> extends Values<T> {
      * value of type {@code T}.
      *
      * <p>By default, {@code Create.TimestampedValues} can automatically determine the
-     * {@code Coder} to use if all elements have the same run-time class, and a default coder is
-     * registered for that class. See {@link CoderRegistry} for details on how defaults are
-     * determined.
+     * {@code Coder} to use if all elements have the same non-parameterized run-time class,
+     * and a default coder is registered for that class. See {@link CoderRegistry} for details
+     * on how defaults are determined.
      *
      * <p>Note that for {@link Create.TimestampedValues with no elements}, the {@link VoidCoder}
      * is used.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
index 2e1a3ab04f85e..de6add0ea3c64 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
@@ -58,8 +58,7 @@ public class Flatten {
    * into a {@link PCollection} containing all the elements of all
    * the {@link PCollection}s in its input.
    *
-   * <p>If any of the inputs to {@code Flatten<T>} require window merging,
-   * all inputs must have equal {@link WindowFn}s.
+   * <p>All inputs must have equal {@link WindowFn}s.
    * The output elements of {@code Flatten<T>} are in the same windows and
    * have the same timestamps as their corresponding input elements.  The output
    * {@code PCollection} will have the same
@@ -98,6 +97,7 @@ public static <T> FlattenIterables<T> iterables() {
    * A {@link PTransform} that flattens a {@link PCollectionList}
    * into a {@link PCollection} containing all the elements of all
    * the {@link PCollection}s in its input.
+   * Implements {@link #pCollections}.
    *
    * @param <T> the type of the elements in the input and output
    * {@code PCollection}s.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index ca125545d90d9..6c9defadfe55b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -28,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.ReifyTimestampAndWindowsDoFn;
@@ -53,8 +54,9 @@
  * {@code PCollection<KV<K, Iterable<V>>>} representing a map from
  * each distinct key and window of the input {@code PCollection} to an
  * {@code Iterable} over all the values associated with that key in
- * the input.  Each key in the output {@code PCollection} is unique within
- * each window.
+ * the input per window.  Absent repeatedly-firing
+ * {@link Window#triggering triggering}, each key in the output
+ * {@code PCollection} is unique within each window.
  *
  * <p>{@code GroupByKey} is analogous to converting a multi-map into
  * a uni-map, and related to {@code GROUP BY} in SQL.  It corresponds
@@ -68,7 +70,7 @@
  * encoded bytes.  This admits efficient parallel evaluation.  Note that
  * this requires that the {@code Coder} of the keys be deterministic (see
  * {@link Coder#verifyDeterministic()}).  If the key {@code Coder} is not
- * deterministic, an exception is thrown at runtime.
+ * deterministic, an exception is thrown at pipeline construction time.
  *
  * <p>By default, the {@code Coder} of the keys of the output
  * {@code PCollection} is the same as that of the keys of the input,
@@ -109,18 +111,21 @@
  * {@link com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark}
  * for details on the estimation.
  *
- * <p>The timestamp for each emitted pane is the earliest event time among all elements in
- * the pane. The output {@code PCollection} will have the same {@link WindowFn}
+ * <p>The timestamp for each emitted pane is determined by the
+ * {@link Window.Bound#withOutputTimeFn windowing operation}.
+ * The output {@code PCollection} will have the same {@link WindowFn}
  * as the input.
  *
  * <p>If the input {@code PCollection} contains late data (see
  * {@link com.google.cloud.dataflow.sdk.io.PubsubIO.Read.Bound#timestampLabel}
- * for an example of how this can occur), then there may be multiple elements
+ * for an example of how this can occur) or the
+ * {@link Window#triggering requested TriggerFn} can fire before
+ * the watermark, then there may be multiple elements
  * output by a {@code GroupByKey} that correspond to the same key and window.
  *
  * <p>If the {@link WindowFn} of the input requires merging, it is not
  * valid to apply another {@code GroupByKey} without first applying a new
- * {@link WindowFn}.
+ * {@link WindowFn} or applying {@link Window#remerge()}.
  *
  * @param <K> the type of the keys of the input and output
  * {@code PCollection}s

From bceb7d60204b60736b18ebcd456a9cdd60176a06 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 10 Dec 2015 16:42:16 -0800
Subject: [PATCH 1223/1541] CombineTest: remove unused import, suppress a Java
 warning

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109948609
---
 .../com/google/cloud/dataflow/sdk/transforms/CombineTest.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 987b97e9ae248..99193a9ac6100 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -46,7 +46,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -562,6 +561,7 @@ public static SetCoder<?> of(
       return of((Coder<?>) components.get(0));
     }
 
+    @SuppressWarnings("unused") // required for coder instantiation
     public static <T> List<Object> getInstanceComponents(Set<T> exampleValue) {
       return IterableCoder.getInstanceComponents(exampleValue);
     }

From 8746470d941e8f8f3ee81fbaba338451ca8f871f Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 10 Dec 2015 16:50:07 -0800
Subject: [PATCH 1224/1541] Rename TriggerTester to ReduceFnTester

This name was a legacy holdover. The TriggerTester actually
tests the whole stack from ReduceFnRunner down.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109949189
---
 .../sdk/util/TriggerContextFactory.java       |   9 +-
 .../transforms/windowing/AfterAllTest.java    |  10 +-
 .../transforms/windowing/AfterEachTest.java   |  10 +-
 .../transforms/windowing/AfterFirstTest.java  |  10 +-
 .../transforms/windowing/AfterPaneTest.java   |   8 +-
 .../windowing/AfterProcessingTimeTest.java    |   8 +-
 .../AfterSynchronizedProcessingTimeTest.java  |  10 +-
 .../windowing/AfterWatermarkTest.java         |  28 +-
 .../windowing/DefaultTriggerTest.java         |  10 +-
 .../windowing/OrFinallyTriggerTest.java       |  10 +-
 .../transforms/windowing/RepeatedlyTest.java  |   6 +-
 ...TriggerTester.java => ReduceFnTester.java} |  33 +-
 .../sdk/util/TriggerExecutorTest.java         | 409 +++++++++---------
 13 files changed, 284 insertions(+), 277 deletions(-)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/util/{TriggerTester.java => ReduceFnTester.java} (95%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
index 9e574c27e9151..7627c5c3cf505 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
@@ -40,16 +40,17 @@
 import java.util.Map;
 
 /**
- * Factory for creating instances of the various {@link Trigger} contexts.
+ * Factory for creating instances of the various {@link Trigger} contexts from the corresponding
+ * {@link ReduceFn} contexts.
  */
-class TriggerContextFactory<W extends BoundedWindow> {
+public class TriggerContextFactory<W extends BoundedWindow> {
 
   private final WindowingStrategy<?, W> windowingStrategy;
   private StateInternals stateInternals;
   private ActiveWindowSet<W> activeWindows;
 
-  TriggerContextFactory(WindowingStrategy<?, W> windowingStrategy, StateInternals stateInternals,
-      ActiveWindowSet<W> activeWindows) {
+  public TriggerContextFactory(WindowingStrategy<?, W> windowingStrategy,
+      StateInternals stateInternals, ActiveWindowSet<W> activeWindows) {
     this.windowingStrategy = windowingStrategy;
     this.stateInternals = stateInternals;
     this.activeWindows = activeWindows;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index e4855fb1f7084..d34e788c336c9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -27,8 +27,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.util.ReduceFnTester;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
@@ -50,12 +50,12 @@ public class AfterAllTest {
   @Mock private OnceTrigger<IntervalWindow> mockTrigger1;
   @Mock private OnceTrigger<IntervalWindow> mockTrigger2;
 
-  private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
+  private ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
-    tester = TriggerTester.nonCombining(
+    tester = ReduceFnTester.nonCombining(
         windowFn,
         AfterAll.of(mockTrigger1, mockTrigger2),
         AccumulationMode.DISCARDING_FIRED_PANES,
@@ -219,7 +219,7 @@ public void testFireDeadline() throws Exception {
 
   @Test
   public void testAfterAllRealTriggersFixedWindow() throws Exception {
-    tester = TriggerTester.nonCombining(FixedWindows.of(Duration.millis(50)),
+    tester = ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(50)),
         Repeatedly.<IntervalWindow>forever(
             AfterAll.<IntervalWindow>of(
                 AfterPane.<IntervalWindow>elementCountAtLeast(5),
@@ -267,7 +267,7 @@ public void testAfterAllRealTriggersFixedWindow() throws Exception {
   @Test
   public void testAfterAllMergingWindowSomeFinished() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         Sessions.withGapDuration(windowDuration),
         AfterAll.<IntervalWindow>of(
             AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index 93084bfe0fde0..d28515999cbf8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -28,8 +28,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.util.ReduceFnTester;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
@@ -52,12 +52,12 @@ public class AfterEachTest {
   @Mock private Trigger<IntervalWindow> mockTrigger1;
   @Mock private Trigger<IntervalWindow> mockTrigger2;
 
-  private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
+  private ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
-    tester = TriggerTester.nonCombining(
+    tester = ReduceFnTester.nonCombining(
         windowFn, AfterEach.inOrder(mockTrigger1, mockTrigger2),
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
@@ -219,7 +219,7 @@ public void testFireDeadline() throws Exception {
 
   @Test
   public void testSequenceRealTriggersFixedWindow() throws Exception {
-    tester = TriggerTester.nonCombining(FixedWindows.of(Duration.millis(50)),
+    tester = ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(50)),
         AfterEach.<IntervalWindow>inOrder(
             AfterPane.<IntervalWindow>elementCountAtLeast(5),
             AfterPane.<IntervalWindow>elementCountAtLeast(5),
@@ -249,7 +249,7 @@ public void testSequenceRealTriggersFixedWindow() throws Exception {
   @Test
   public void testAfterEachMergingWindowSomeFinished() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         Sessions.withGapDuration(windowDuration),
         AfterEach.<IntervalWindow>inOrder(
             AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index 63a240a466a27..11c34771b4a85 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -27,8 +27,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.util.ReduceFnTester;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
@@ -51,12 +51,12 @@ public class AfterFirstTest {
   @Mock private OnceTrigger<IntervalWindow> mockTrigger1;
   @Mock private OnceTrigger<IntervalWindow> mockTrigger2;
 
-  private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
+  private ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
-    tester = TriggerTester.nonCombining(
+    tester = ReduceFnTester.nonCombining(
         windowFn, AfterFirst.of(mockTrigger1, mockTrigger2),
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
@@ -206,7 +206,7 @@ public void testFireDeadline() throws Exception {
 
   @Test
   public void testAfterFirstRealTriggersFixedWindow() throws Exception {
-    tester = TriggerTester.nonCombining(FixedWindows.of(Duration.millis(50)),
+    tester = ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(50)),
         Repeatedly.<IntervalWindow>forever(
             AfterFirst.<IntervalWindow>of(
                 AfterPane.<IntervalWindow>elementCountAtLeast(5),
@@ -261,7 +261,7 @@ public void testAfterFirstRealTriggersFixedWindow() throws Exception {
   @Test
   public void testAfterFirstMergingWindowSomeFinished() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         Sessions.withGapDuration(windowDuration),
         AfterFirst.<IntervalWindow>of(
             AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
index 9da3b4bde8826..9f094d916b91e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
@@ -24,7 +24,7 @@
 import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.transforms.Sum.SumIntegerFn;
-import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.ReduceFnTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
@@ -43,7 +43,7 @@ public class AfterPaneTest {
   @Test
   public void testAfterPaneWithGlobalWindowsAndCombining() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Integer, IntervalWindow> tester = TriggerTester.combining(
+    ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(
         FixedWindows.of(windowDuration),
         AfterPane.<IntervalWindow>elementCountAtLeast(2),
         AccumulationMode.DISCARDING_FIRED_PANES,
@@ -74,7 +74,7 @@ public void testAfterPaneWithGlobalWindowsAndCombining() throws Exception {
   @Test
   public void testAfterPaneWithFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         FixedWindows.of(windowDuration),
         AfterPane.<IntervalWindow>elementCountAtLeast(2),
         AccumulationMode.DISCARDING_FIRED_PANES,
@@ -103,7 +103,7 @@ public void testAfterPaneWithFixedWindow() throws Exception {
   @Test
   public void testAfterPaneWithMerging() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         Sessions.withGapDuration(windowDuration),
         AfterPane.<IntervalWindow>elementCountAtLeast(2),
         AccumulationMode.DISCARDING_FIRED_PANES,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
index cada240cd0e70..db041290b9e37 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
@@ -21,8 +21,8 @@
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.WindowMatchers;
+import com.google.cloud.dataflow.sdk.util.ReduceFnTester;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
@@ -41,7 +41,7 @@ public class AfterProcessingTimeTest {
   @Test
   public void testAfterProcessingTimeIgnoresTimer() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         FixedWindows.of(windowDuration),
         AfterProcessingTime
             .<IntervalWindow>pastFirstElementInPane()
@@ -59,7 +59,7 @@ public void testAfterProcessingTimeIgnoresTimer() throws Exception {
   @Test
   public void testAfterProcessingTimeWithFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         FixedWindows.of(windowDuration),
         AfterProcessingTime
             .<IntervalWindow>pastFirstElementInPane()
@@ -107,7 +107,7 @@ public void testAfterProcessingTimeWithFixedWindow() throws Exception {
   @Test
   public void testAfterProcessingTimeWithMergingWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         Sessions.withGapDuration(windowDuration),
         AfterProcessingTime
             .<IntervalWindow>pastFirstElementInPane()
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java
index 3270d073e4abf..074c8dbfec7d2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java
@@ -20,8 +20,8 @@
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.WindowMatchers;
+import com.google.cloud.dataflow.sdk.util.ReduceFnTester;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
@@ -44,7 +44,7 @@ public class AfterSynchronizedProcessingTimeTest {
   @Test
   public void testAfterProcessingTimeWithFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         FixedWindows.of(windowDuration), underTest,
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
@@ -73,7 +73,7 @@ public void testAfterProcessingTimeWithFixedWindow() throws Exception {
   @Test
   public void testAfterProcessingTimeWithMergingWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         Sessions.withGapDuration(windowDuration),
         underTest,
         AccumulationMode.DISCARDING_FIRED_PANES,
@@ -96,7 +96,7 @@ public void testAfterProcessingTimeWithMergingWindow() throws Exception {
   @Test
   public void testAfterProcessingTimeWithMergingWindowAlreadyFired() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         Sessions.withGapDuration(windowDuration),
         underTest,
         AccumulationMode.DISCARDING_FIRED_PANES,
@@ -129,7 +129,7 @@ public void testAfterProcessingTimeWithMergingWindowAlreadyFired() throws Except
   @Test
   public void testAfterSynchronizedProcessingTimeIgnoresTimer() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         FixedWindows.of(windowDuration),
         new AfterSynchronizedProcessingTime<IntervalWindow>(),
         AccumulationMode.DISCARDING_FIRED_PANES,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
index 9f34a9c920e14..06974323e13f5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
@@ -27,8 +27,8 @@
 import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.util.ReduceFnTester;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
@@ -65,7 +65,7 @@ public void setUp() {
   @Test
   public void testFirstInPaneWithFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         FixedWindows.of(windowDuration),
         AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelayOf(Duration.millis(5)),
         AccumulationMode.DISCARDING_FIRED_PANES,
@@ -96,7 +96,7 @@ public void testFirstInPaneWithFixedWindow() throws Exception {
 
   @Test
   public void testAlignAndDelay() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         FixedWindows.of(Duration.standardMinutes(1)),
         AfterWatermark.<IntervalWindow>pastFirstElementInPane()
             .alignedTo(Duration.standardMinutes(1))
@@ -130,7 +130,7 @@ public void testAlignAndDelay() throws Exception {
   @Test
   public void testFirstInPaneWithMerging() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         Sessions.withGapDuration(windowDuration),
         AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelayOf(Duration.millis(5)),
         AccumulationMode.DISCARDING_FIRED_PANES,
@@ -156,7 +156,7 @@ public void testFirstInPaneWithMerging() throws Exception {
   @Test
   public void testEndOfWindowFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         FixedWindows.of(windowDuration),
         AfterWatermark.<IntervalWindow>pastEndOfWindow(),
         AccumulationMode.DISCARDING_FIRED_PANES,
@@ -190,7 +190,7 @@ public void testEndOfWindowFixedWindow() throws Exception {
   @Test
   public void testEndOfWindowWithMerging() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         Sessions.withGapDuration(windowDuration),
         AfterWatermark.<IntervalWindow>pastEndOfWindow(),
         AccumulationMode.DISCARDING_FIRED_PANES,
@@ -222,7 +222,7 @@ public void testEndOfWindowWithMerging() throws Exception {
   @Test
   public void testEndOfWindowIgnoresTimer() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         FixedWindows.of(windowDuration),
         AfterWatermark.<IntervalWindow>pastEndOfWindow(),
         AccumulationMode.DISCARDING_FIRED_PANES,
@@ -270,7 +270,7 @@ public void testContinuation() throws Exception {
 
   @Test
   public void testEarlyAndAtWatermarkProcessElement() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         FixedWindows.of(Duration.millis(100)),
         AfterWatermark.<IntervalWindow>pastEndOfWindow()
             .withEarlyFirings(mockEarly),
@@ -296,7 +296,7 @@ public void testEarlyAndAtWatermarkProcessElement() throws Exception {
 
   @Test
   public void testLateAndAtWatermarkProcessElement() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         FixedWindows.of(Duration.millis(100)),
         AfterWatermark.<IntervalWindow>pastEndOfWindow()
             .withLateFirings(mockLate),
@@ -336,7 +336,7 @@ public void testLateAndAtWatermarkProcessElement() throws Exception {
 
   @Test
   public void testEarlyLateAndAtWatermarkProcessElement() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         FixedWindows.of(Duration.millis(100)),
         AfterWatermark.<IntervalWindow>pastEndOfWindow()
             .withEarlyFirings(mockEarly)
@@ -382,7 +382,7 @@ public void testEarlyLateAndAtWatermarkProcessElement() throws Exception {
 
   @Test
   public void testEarlyAndAtWatermarkSessions() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         Sessions.withGapDuration(Duration.millis(20)),
         AfterWatermark.<IntervalWindow>pastEndOfWindow()
             .withEarlyFirings(AfterPane.<IntervalWindow>elementCountAtLeast(2)),
@@ -406,7 +406,7 @@ public void testEarlyAndAtWatermarkSessions() throws Exception {
 
   @Test
   public void testLateAndAtWatermarkSessionsProcessingTime() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         Sessions.withGapDuration(Duration.millis(20)),
         AfterWatermark.<IntervalWindow>pastEndOfWindow()
             .withLateFirings(AfterProcessingTime
@@ -438,7 +438,7 @@ public void testLateAndAtWatermarkSessionsProcessingTime() throws Exception {
 
   @Test
   public void testLateAndAtWatermarkSessions() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         Sessions.withGapDuration(Duration.millis(20)),
         AfterWatermark.<IntervalWindow>pastEndOfWindow()
             .withLateFirings(AfterPane.<IntervalWindow>elementCountAtLeast(2)),
@@ -468,7 +468,7 @@ public void testLateAndAtWatermarkSessions() throws Exception {
 
   @Test
   public void testEarlyLateAndAtWatermarkSessions() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         Sessions.withGapDuration(Duration.millis(20)),
         AfterWatermark.<IntervalWindow>pastEndOfWindow()
             .withEarlyFirings(AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
index ab9c868a5e5a1..5c068c75d8e16 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
@@ -21,7 +21,7 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 
-import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.ReduceFnTester;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
@@ -43,7 +43,7 @@ public class DefaultTriggerTest {
 
   @Test
   public void testDefaultTriggerWithFixedWindow() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         FixedWindows.of(Duration.millis(10)),
         DefaultTrigger.<IntervalWindow>of(),
         AccumulationMode.DISCARDING_FIRED_PANES,
@@ -82,7 +82,7 @@ public void testDefaultTriggerWithFixedWindow() throws Exception {
 
   @Test
   public void testDefaultTriggerWithSessionWindow() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         Sessions.withGapDuration(Duration.millis(10)),
         DefaultTrigger.<IntervalWindow>of(),
         AccumulationMode.DISCARDING_FIRED_PANES,
@@ -113,7 +113,7 @@ public void testDefaultTriggerWithSessionWindow() throws Exception {
 
   @Test
   public void testDefaultTriggerWithSlidingWindow() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         SlidingWindows.of(Duration.millis(10)).every(Duration.millis(5)),
         DefaultTrigger.<IntervalWindow>of(),
         AccumulationMode.DISCARDING_FIRED_PANES,
@@ -148,7 +148,7 @@ public void testDefaultTriggerWithSlidingWindow() throws Exception {
 
   @Test
   public void testDefaultTriggerWithContainedSessionWindow() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         Sessions.withGapDuration(Duration.millis(10)),
         DefaultTrigger.<IntervalWindow>of(),
         AccumulationMode.DISCARDING_FIRED_PANES,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
index e5be23527fa63..60d15473cb8cb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
@@ -26,8 +26,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.util.ReduceFnTester;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
@@ -49,7 +49,7 @@ public class OrFinallyTriggerTest {
   @Mock private Trigger<IntervalWindow> mockActual;
   @Mock private OnceTrigger<IntervalWindow> mockUntil;
 
-  private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
+  private ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
@@ -58,7 +58,7 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     Trigger<IntervalWindow> underTest =
         new OrFinallyTrigger<IntervalWindow>(mockActual, mockUntil);
 
-    tester = TriggerTester.nonCombining(
+    tester = ReduceFnTester.nonCombining(
         windowFn, underTest, AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
@@ -233,7 +233,7 @@ public void testFireDeadline() throws Exception {
   @Test
   public void testOrFinallyRealTriggersFixedWindow() throws Exception {
     // Test an orFinally with a composite trigger, and make sure it properly resets state, etc.
-    tester = TriggerTester.nonCombining(FixedWindows.of(Duration.millis(50)),
+    tester = ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(50)),
         Repeatedly.<IntervalWindow>forever(
             // This element count should never fire because the orFinally fires sooner, every time
             AfterPane.<IntervalWindow>elementCountAtLeast(12)
@@ -286,7 +286,7 @@ public void testOrFinallyRealTriggersFixedWindow() throws Exception {
   @Test
   public void testOrFinallyMergingWindowSomeFinished() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         Sessions.withGapDuration(windowDuration),
         AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
             .plusDelayOf(Duration.millis(5))
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index 413d1bccd6625..cd25b2f82009c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -24,8 +24,8 @@
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.util.ReduceFnTester;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
@@ -46,13 +46,13 @@
 public class RepeatedlyTest {
   @Mock private Trigger<IntervalWindow> mockRepeated;
 
-  private TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester;
+  private ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
     Trigger<IntervalWindow> underTest = Repeatedly.forever(mockRepeated);
-    tester = TriggerTester.nonCombining(
+    tester = ReduceFnTester.nonCombining(
         windowFn, underTest,
         AccumulationMode.DISCARDING_FIRED_PANES,
         Duration.millis(100));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
similarity index 95%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
index 5d119196086b1..4413d0630890a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
@@ -60,8 +60,6 @@
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -77,20 +75,19 @@
 import javax.annotation.Nullable;
 
 /**
- * Test utility that runs a {@link WindowFn}, {@link Trigger} using in-memory stub implementations
- * to provide the {@link TimerInternals} and {@link WindowingInternals} needed to run
- * {@code Trigger}s and {@code ReduceFn}s.
+ * Test utility that runs a {@link ReduceFn}, {@link WindowFn}, {@link Trigger} using in-memory stub
+ * implementations to provide the {@link TimerInternals} and {@link WindowingInternals} needed to
+ * run {@code Trigger}s and {@code ReduceFn}s.
  *
  * @param <InputT> The element types.
  * @param <OutputT> The final type for elements in the window (for instance,
  *     {@code Iterable<InputT>})
  * @param <W> The type of windows being used.
  */
-public class TriggerTester<InputT, OutputT, W extends BoundedWindow> {
-  private static final Logger LOG = LoggerFactory.getLogger(TriggerTester.class);
-
+public class ReduceFnTester<InputT, OutputT, W extends BoundedWindow> {
   private final TestInMemoryStateInternals stateInternals = new TestInMemoryStateInternals();
   private final TestTimerInternals timerInternals = new TestTimerInternals();
+
   private final WindowFn<Object, W> windowFn;
   private final TestWindowingInternals windowingInternals;
   private final Coder<OutputT> outputCoder;
@@ -105,17 +102,17 @@ public class TriggerTester<InputT, OutputT, W extends BoundedWindow> {
   private final InMemoryLongSumAggregator droppedDueToLateness =
       new InMemoryLongSumAggregator(ReduceFnRunner.DROPPED_DUE_TO_LATENESS_COUNTER);
 
-  public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>, W> nonCombining(
-      WindowingStrategy<?, W> windowingStrategy) throws Exception {
-    return new TriggerTester<Integer, Iterable<Integer>, W>(
+  public static <W extends BoundedWindow> ReduceFnTester<Integer, Iterable<Integer>, W>
+      nonCombining(WindowingStrategy<?, W> windowingStrategy) throws Exception {
+    return new ReduceFnTester<Integer, Iterable<Integer>, W>(
         windowingStrategy,
         SystemReduceFn.<String, Integer, W>buffering(VarIntCoder.of()).create(KEY),
         IterableCoder.of(VarIntCoder.of()));
   }
 
-  public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>, W> nonCombining(
-      WindowFn<?, W> windowFn, TriggerBuilder<W> trigger, AccumulationMode mode,
-      Duration allowedDataLateness) throws Exception {
+  public static <W extends BoundedWindow> ReduceFnTester<Integer, Iterable<Integer>, W>
+      nonCombining(WindowFn<?, W> windowFn, TriggerBuilder<W> trigger, AccumulationMode mode,
+          Duration allowedDataLateness) throws Exception {
     WindowingStrategy<?, W> strategy =
         WindowingStrategy.of(windowFn)
             .withTrigger(trigger.buildTrigger())
@@ -124,7 +121,7 @@ public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>
     return nonCombining(strategy);
   }
 
-  public static <W extends BoundedWindow, AccumT, OutputT> TriggerTester<Integer, OutputT, W>
+  public static <W extends BoundedWindow, AccumT, OutputT> ReduceFnTester<Integer, OutputT, W>
       combining(WindowFn<?, W> windowFn, Trigger<W> trigger, AccumulationMode mode,
           KeyedCombineFn<String, Integer, AccumT, OutputT> combineFn, Coder<OutputT> outputCoder,
           Duration allowedDataLateness) throws Exception {
@@ -138,14 +135,14 @@ public static <W extends BoundedWindow> TriggerTester<Integer, Iterable<Integer>
         AppliedCombineFn.<String, Integer, AccumT, OutputT>withInputCoder(
             combineFn, registry, KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()));
 
-    return new TriggerTester<Integer, OutputT, W>(
+    return new ReduceFnTester<Integer, OutputT, W>(
         strategy,
         SystemReduceFn.<String, Integer, AccumT, OutputT, W>combining(StringUtf8Coder.of(), fn)
             .create(KEY),
         outputCoder);
   }
 
-  private TriggerTester(WindowingStrategy<?, W> wildcardStrategy,
+  private ReduceFnTester(WindowingStrategy<?, W> wildcardStrategy,
       ReduceFn<String, InputT, OutputT, W> reduceFn, Coder<OutputT> outputCoder) throws Exception {
     @SuppressWarnings("unchecked")
     WindowingStrategy<Object, W> objectStrategy = (WindowingStrategy<Object, W>) wildcardStrategy;
@@ -318,7 +315,7 @@ public WindowedValue<InputT> apply(TimestampedValue<InputT> input) {
             try {
               InputT value = input.getValue();
               Instant timestamp = input.getTimestamp();
-              Collection<W> windows = windowFn.assignWindows(new TriggerTester.TestAssignContext<W>(
+              Collection<W> windows = windowFn.assignWindows(new TestAssignContext<W>(
                   windowFn, value, timestamp, Arrays.asList(GlobalWindow.INSTANCE)));
               return WindowedValue.of(value, timestamp, windows, PaneInfo.NO_FIRING);
             } catch (Exception e) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index c6d56bc15d887..164ab3f65183a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -71,8 +71,8 @@
  */
 @RunWith(JUnit4.class)
 public class TriggerExecutorTest {
-
-  @Mock private Trigger<IntervalWindow> mockTrigger;
+  @Mock
+  private Trigger<IntervalWindow> mockTrigger;
   private IntervalWindow firstWindow;
 
   @Before
@@ -82,11 +82,9 @@ public void setUp() {
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
-  private void injectElement(TriggerTester<Integer, ?, IntervalWindow> tester,
-      int element, TriggerResult result)
-      throws Exception {
-    when(mockTrigger.onElement(
-        Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
+  private void injectElement(ReduceFnTester<Integer, ?, IntervalWindow> tester, int element,
+      TriggerResult result) throws Exception {
+    when(mockTrigger.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
         .thenReturn(result);
     tester.injectElements(TimestampedValue.of(element, new Instant(element)));
   }
@@ -94,11 +92,9 @@ private void injectElement(TriggerTester<Integer, ?, IntervalWindow> tester,
   @Test
   public void testOnElementBufferingDiscarding() throws Exception {
     // Test basic execution of a trigger using a non-combining window set and discarding mode.
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
-        FixedWindows.of(Duration.millis(10)),
-        mockTrigger,
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
+        ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
+            AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(100));
 
     injectElement(tester, 1, TriggerResult.CONTINUE);
     injectElement(tester, 2, TriggerResult.FIRE);
@@ -108,9 +104,11 @@ public void testOnElementBufferingDiscarding() throws Exception {
     // This element shouldn't be seen, because the trigger has finished
     injectElement(tester, 4, null);
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
+    assertThat(
+        tester.extractOutput(),
+        Matchers.contains(
+            isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
+            isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
     tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
 
@@ -121,11 +119,9 @@ public void testOnElementBufferingDiscarding() throws Exception {
   @Test
   public void testOnElementBufferingAccumulating() throws Exception {
     // Test basic execution of a trigger using a non-combining window set and accumulating mode.
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
-        FixedWindows.of(Duration.millis(10)),
-        mockTrigger,
-        AccumulationMode.ACCUMULATING_FIRED_PANES,
-        Duration.millis(100));
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
+        ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
+            AccumulationMode.ACCUMULATING_FIRED_PANES, Duration.millis(100));
 
     injectElement(tester, 1, TriggerResult.CONTINUE);
     injectElement(tester, 2, TriggerResult.FIRE);
@@ -134,9 +130,11 @@ public void testOnElementBufferingAccumulating() throws Exception {
     // This element shouldn't be seen, because the trigger has finished
     injectElement(tester, 4, null);
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 3, 0, 10)));
+    assertThat(
+        tester.extractOutput(),
+        Matchers.contains(
+            isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
+            isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 3, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
     tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
@@ -144,13 +142,9 @@ public void testOnElementBufferingAccumulating() throws Exception {
   @Test
   public void testOnElementCombiningDiscarding() throws Exception {
     // Test basic execution of a trigger using a non-combining window set and discarding mode.
-    TriggerTester<Integer, Integer, IntervalWindow> tester = TriggerTester.combining(
-        FixedWindows.of(Duration.millis(10)),
-        mockTrigger,
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        new Sum.SumIntegerFn().<String>asKeyedFn(),
-        VarIntCoder.of(),
-        Duration.millis(100));
+    ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(
+        FixedWindows.of(Duration.millis(10)), mockTrigger, AccumulationMode.DISCARDING_FIRED_PANES,
+        new Sum.SumIntegerFn().<String>asKeyedFn(), VarIntCoder.of(), Duration.millis(100));
 
     injectElement(tester, 2, TriggerResult.CONTINUE);
     injectElement(tester, 3, TriggerResult.FIRE);
@@ -159,9 +153,11 @@ public void testOnElementCombiningDiscarding() throws Exception {
     // This element shouldn't be seen, because the trigger has finished
     injectElement(tester, 6, null);
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.equalTo(5), 2, 0, 10),
-        isSingleWindowedValue(Matchers.equalTo(4), 4, 0, 10)));
+    assertThat(
+        tester.extractOutput(),
+        Matchers.contains(
+            isSingleWindowedValue(Matchers.equalTo(5), 2, 0, 10),
+            isSingleWindowedValue(Matchers.equalTo(4), 4, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
     tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
@@ -169,13 +165,10 @@ public void testOnElementCombiningDiscarding() throws Exception {
   @Test
   public void testOnElementCombiningAccumulating() throws Exception {
     // Test basic execution of a trigger using a non-combining window set and accumulating mode.
-    TriggerTester<Integer, Integer, IntervalWindow> tester = TriggerTester.combining(
-        FixedWindows.of(Duration.millis(10)),
-        mockTrigger,
-        AccumulationMode.ACCUMULATING_FIRED_PANES,
-        new Sum.SumIntegerFn().<String>asKeyedFn(),
-        VarIntCoder.of(),
-        Duration.millis(100));
+    ReduceFnTester<Integer, Integer, IntervalWindow> tester =
+        ReduceFnTester.combining(FixedWindows.of(Duration.millis(10)), mockTrigger,
+            AccumulationMode.ACCUMULATING_FIRED_PANES, new Sum.SumIntegerFn().<String>asKeyedFn(),
+            VarIntCoder.of(), Duration.millis(100));
 
     injectElement(tester, 1, TriggerResult.CONTINUE);
     injectElement(tester, 2, TriggerResult.FIRE);
@@ -184,9 +177,11 @@ public void testOnElementCombiningAccumulating() throws Exception {
     // This element shouldn't be seen, because the trigger has finished
     injectElement(tester, 4, null);
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.equalTo(3), 1, 0, 10),
-        isSingleWindowedValue(Matchers.equalTo(6), 3, 0, 10)));
+    assertThat(
+        tester.extractOutput(),
+        Matchers.contains(
+            isSingleWindowedValue(Matchers.equalTo(3), 1, 0, 10),
+            isSingleWindowedValue(Matchers.equalTo(6), 3, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
     tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
@@ -194,8 +189,8 @@ public void testOnElementCombiningAccumulating() throws Exception {
   @Test
   public void testWatermarkHoldAndLateData() throws Exception {
     // Test handling of late data. Specifically, ensure the watermark hold is correct.
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester =
-        TriggerTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
+        ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
             AccumulationMode.ACCUMULATING_FIRED_PANES, Duration.millis(10));
 
     // Input watermark -> null
@@ -305,23 +300,25 @@ public void testWatermarkHoldAndLateData() throws Exception {
 
   @Test
   public void testPaneInfoAllStates() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
-        FixedWindows.of(Duration.millis(10)),
-        mockTrigger,
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
+        ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
+            AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(100));
 
     when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
 
     tester.advanceInputWatermark(new Instant(0));
     injectElement(tester, 1, TriggerResult.FIRE);
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, false, Timing.EARLY))));
+    assertThat(
+        tester.extractOutput(),
+        Matchers.contains(
+            WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, false, Timing.EARLY))));
 
     injectElement(tester, 2, TriggerResult.FIRE);
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.EARLY, 1, -1))));
+    assertThat(
+        tester.extractOutput(),
+        Matchers.contains(WindowMatchers.valueWithPaneInfo(
+            PaneInfo.createPane(false, false, Timing.EARLY, 1, -1))));
 
     tester.advanceInputWatermark(new Instant(15));
     injectElement(tester, 3, TriggerResult.FIRE);
@@ -345,106 +342,127 @@ public void testPaneInfoAllStates() throws Exception {
 
   @Test
   public void testPaneInfoAllStatesAfterWatermark() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         WindowingStrategy.of(FixedWindows.of(Duration.millis(10)))
-            .withTrigger(Repeatedly.<IntervalWindow>forever(
-                AfterFirst.<IntervalWindow>of(
-                    AfterPane.<IntervalWindow>elementCountAtLeast(2),
-                    AfterWatermark.<IntervalWindow>pastEndOfWindow())))
+            .withTrigger(Repeatedly.<IntervalWindow>forever(AfterFirst.<IntervalWindow>of(
+                AfterPane.<IntervalWindow>elementCountAtLeast(2),
+                AfterWatermark.<IntervalWindow>pastEndOfWindow())))
             .withMode(AccumulationMode.DISCARDING_FIRED_PANES)
             .withAllowedLateness(Duration.millis(100))
             .withClosingBehavior(ClosingBehavior.FIRE_ALWAYS));
 
     tester.advanceInputWatermark(new Instant(0));
     tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),
-        TimestampedValue.of(2, new Instant(2)));
+        TimestampedValue.of(1, new Instant(1)), TimestampedValue.of(2, new Instant(2)));
 
     List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
-    assertThat(output, Matchers.contains(
-        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, false, Timing.EARLY, 0, -1))));
-    assertThat(output, Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertThat(
+        output,
+        Matchers.contains(WindowMatchers.valueWithPaneInfo(
+            PaneInfo.createPane(true, false, Timing.EARLY, 0, -1))));
+    assertThat(
+        output,
+        Matchers.contains(
+            WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
 
     tester.advanceInputWatermark(new Instant(50));
 
     // We should get the ON_TIME pane even though it is empty,
     // because we have an AfterWatermark.pastEndOfWindow() trigger.
     output = tester.extractOutput();
-    assertThat(output, Matchers.contains(
-        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.ON_TIME, 1, 0))));
-    assertThat(output, Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.emptyIterable(), 9, 0, 10)));
+    assertThat(
+        output,
+        Matchers.contains(WindowMatchers.valueWithPaneInfo(
+            PaneInfo.createPane(false, false, Timing.ON_TIME, 1, 0))));
+    assertThat(
+        output,
+        Matchers.contains(
+            WindowMatchers.isSingleWindowedValue(Matchers.emptyIterable(), 9, 0, 10)));
 
     // We should get the final pane even though it is empty.
     tester.advanceInputWatermark(new Instant(150));
     output = tester.extractOutput();
-    assertThat(output, Matchers.contains(
-        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.LATE, 2, 1))));
-    assertThat(output, Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.emptyIterable(), 9, 0, 10)));
+    assertThat(
+        output,
+        Matchers.contains(
+            WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.LATE, 2, 1))));
+    assertThat(
+        output,
+        Matchers.contains(
+            WindowMatchers.isSingleWindowedValue(Matchers.emptyIterable(), 9, 0, 10)));
   }
 
   @Test
   public void testPaneInfoAllStatesAfterWatermarkAccumulating() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         WindowingStrategy.of(FixedWindows.of(Duration.millis(10)))
-            .withTrigger(Repeatedly.<IntervalWindow>forever(
-                AfterFirst.<IntervalWindow>of(
-                    AfterPane.<IntervalWindow>elementCountAtLeast(2),
-                    AfterWatermark.<IntervalWindow>pastEndOfWindow())))
+            .withTrigger(Repeatedly.<IntervalWindow>forever(AfterFirst.<IntervalWindow>of(
+                AfterPane.<IntervalWindow>elementCountAtLeast(2),
+                AfterWatermark.<IntervalWindow>pastEndOfWindow())))
             .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES)
             .withAllowedLateness(Duration.millis(100))
             .withClosingBehavior(ClosingBehavior.FIRE_ALWAYS));
 
     tester.advanceInputWatermark(new Instant(0));
     tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),
-        TimestampedValue.of(2, new Instant(2)));
+        TimestampedValue.of(1, new Instant(1)), TimestampedValue.of(2, new Instant(2)));
 
     List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
-    assertThat(output, Matchers.contains(
-        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, false, Timing.EARLY, 0, -1))));
-    assertThat(output, Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertThat(
+        output,
+        Matchers.contains(WindowMatchers.valueWithPaneInfo(
+            PaneInfo.createPane(true, false, Timing.EARLY, 0, -1))));
+    assertThat(
+        output,
+        Matchers.contains(
+            WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
 
     tester.advanceInputWatermark(new Instant(50));
 
     // We should get the ON_TIME pane even though it is empty,
     // because we have an AfterWatermark.pastEndOfWindow() trigger.
     output = tester.extractOutput();
-    assertThat(output, Matchers.contains(
-        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.ON_TIME, 1, 0))));
-    assertThat(output, Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 9, 0, 10)));
+    assertThat(
+        output,
+        Matchers.contains(WindowMatchers.valueWithPaneInfo(
+            PaneInfo.createPane(false, false, Timing.ON_TIME, 1, 0))));
+    assertThat(
+        output,
+        Matchers.contains(
+            WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 9, 0, 10)));
 
     // We should get the final pane even though it is empty.
     tester.advanceInputWatermark(new Instant(150));
     output = tester.extractOutput();
-    assertThat(output, Matchers.contains(
-        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.LATE, 2, 1))));
-    assertThat(output, Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 9, 0, 10)));
+    assertThat(
+        output,
+        Matchers.contains(
+            WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.LATE, 2, 1))));
+    assertThat(
+        output,
+        Matchers.contains(
+            WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 9, 0, 10)));
   }
 
   @Test
   public void testPaneInfoFinalAndOnTime() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
         WindowingStrategy.of(FixedWindows.of(Duration.millis(10)))
             .withTrigger(
                 Repeatedly.<IntervalWindow>forever(AfterPane.<IntervalWindow>elementCountAtLeast(2))
-                .orFinally(AfterWatermark.<IntervalWindow>pastEndOfWindow()))
+                    .orFinally(AfterWatermark.<IntervalWindow>pastEndOfWindow()))
             .withMode(AccumulationMode.DISCARDING_FIRED_PANES)
             .withAllowedLateness(Duration.millis(100))
             .withClosingBehavior(ClosingBehavior.FIRE_ALWAYS));
 
     tester.advanceInputWatermark(new Instant(0));
     tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),
-        TimestampedValue.of(2, new Instant(2)));
+        TimestampedValue.of(1, new Instant(1)), TimestampedValue.of(2, new Instant(2)));
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, false, Timing.EARLY, 0, -1))));
+    assertThat(
+        tester.extractOutput(),
+        Matchers.contains(WindowMatchers.valueWithPaneInfo(
+            PaneInfo.createPane(true, false, Timing.EARLY, 0, -1))));
 
     tester.advanceInputWatermark(new Instant(150));
     assertThat(tester.extractOutput(), Matchers.contains(
@@ -453,41 +471,39 @@ public void testPaneInfoFinalAndOnTime() throws Exception {
 
   @Test
   public void testPaneInfoSkipToFinish() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
-        FixedWindows.of(Duration.millis(10)),
-        mockTrigger,
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
+        ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
+            AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(100));
 
     tester.advanceInputWatermark(new Instant(0));
     injectElement(tester, 1, TriggerResult.FIRE_AND_FINISH);
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, true, Timing.EARLY))));
+    assertThat(
+        tester.extractOutput(),
+        Matchers.contains(
+            WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, true, Timing.EARLY))));
   }
 
   @Test
   public void testPaneInfoSkipToNonSpeculativeAndFinish() throws Exception {
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
-        FixedWindows.of(Duration.millis(10)),
-        mockTrigger,
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
+        ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
+            AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(100));
 
     tester.advanceInputWatermark(new Instant(15));
     injectElement(tester, 1, TriggerResult.FIRE_AND_FINISH);
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, true, Timing.LATE))));
+    assertThat(
+        tester.extractOutput(),
+        Matchers.contains(
+            WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, true, Timing.LATE))));
   }
 
   @Test
   public void testMergeBeforeFinalizing() throws Exception {
     // Verify that we merge windows before producing output so users don't see undesired
     // unmerged windows.
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
-        Sessions.withGapDuration(Duration.millis(10)),
-        mockTrigger,
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(0));
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
+        ReduceFnTester.nonCombining(Sessions.withGapDuration(Duration.millis(10)), mockTrigger,
+            AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(0));
 
     // All on time data, verify watermark hold.
     when(mockTrigger.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
@@ -495,8 +511,7 @@ public void testMergeBeforeFinalizing() throws Exception {
     when(mockTrigger.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
         .thenReturn(TriggerResult.CONTINUE, TriggerResult.CONTINUE);
     tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),
-        TimestampedValue.of(10, new Instant(10)));
+        TimestampedValue.of(1, new Instant(1)), TimestampedValue.of(10, new Instant(10)));
 
     when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
@@ -513,17 +528,15 @@ public void testMergeBeforeFinalizing() throws Exception {
 
   @Test
   public void testDropDataMultipleWindows() throws Exception {
-    TriggerTester<Integer, Integer, IntervalWindow> tester = TriggerTester.combining(
+    ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(
         SlidingWindows.of(Duration.millis(100)).every(Duration.millis(30)),
-        AfterWatermark.<IntervalWindow>pastEndOfWindow(),
-        AccumulationMode.ACCUMULATING_FIRED_PANES,
-        new Sum.SumIntegerFn().<String>asKeyedFn(),
-        VarIntCoder.of(),
-        Duration.millis(20));
+        AfterWatermark.<IntervalWindow>pastEndOfWindow(), AccumulationMode.ACCUMULATING_FIRED_PANES,
+        new Sum.SumIntegerFn().<String>asKeyedFn(), VarIntCoder.of(), Duration.millis(20));
 
     tester.injectElements(
-        TimestampedValue.of(10, new Instant(23)),  // [-60, 40), [-30, 70), [0, 100)
-        TimestampedValue.of(12, new Instant(40))); // [-30, 70), [0, 100), [30, 130)
+        TimestampedValue.of(10, new Instant(23)), // [-60, 40), [-30, 70), [0, 100)
+        TimestampedValue.of(12, new Instant(40)));
+        // [-30, 70), [0, 100), [30, 130)
 
     assertEquals(0, tester.getElementsDroppedDueToLateness());
     assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
@@ -535,8 +548,8 @@ public void testDropDataMultipleWindows() throws Exception {
     assertEquals(0, tester.getElementsDroppedDueToLateness());
     assertEquals(1, tester.getElementsDroppedDueToClosedWindow());
 
-    tester.injectElements(
-        TimestampedValue.of(16, new Instant(40))); // dropped b/c lateness, assigned to 3 windows
+    tester.injectElements(TimestampedValue.of(16, new Instant(40)));
+        // dropped b/c lateness, assigned to 3 windows
 
     assertEquals(3, tester.getElementsDroppedDueToLateness());
     assertEquals(1, tester.getElementsDroppedDueToClosedWindow());
@@ -546,11 +559,9 @@ public void testDropDataMultipleWindows() throws Exception {
   public void testIdempotentEmptyPanes() throws Exception {
     // Test uninteresting (empty) panes don't increment the index or otherwise
     // modify PaneInfo.
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
-        FixedWindows.of(Duration.millis(10)),
-        mockTrigger,
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
+        ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
+            AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(100));
 
     // Inject a couple of on-time elements and fire at the window end.
     injectElement(tester, 1, TriggerResult.CONTINUE);
@@ -580,8 +591,8 @@ public void testIdempotentEmptyPanes() throws Exception {
 
     // The late pane has the correct indices.
     assertThat(output.get(1).getValue(), contains(3));
-    assertThat(output.get(1).getPane(),
-        equalTo(PaneInfo.createPane(false, true, Timing.LATE, 1, 1)));
+    assertThat(
+        output.get(1).getPane(), equalTo(PaneInfo.createPane(false, true, Timing.LATE, 1, 1)));
 
     assertTrue(tester.isMarkedFinished(firstWindow));
     tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
@@ -594,11 +605,9 @@ public void testIdempotentEmptyPanes() throws Exception {
   public void testIdempotentEmptyPanesAccumulating() throws Exception {
     // Test uninteresting (empty) panes don't increment the index or otherwise
     // modify PaneInfo.
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester = TriggerTester.nonCombining(
-        FixedWindows.of(Duration.millis(10)),
-        mockTrigger,
-        AccumulationMode.ACCUMULATING_FIRED_PANES,
-        Duration.millis(100));
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
+        ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
+            AccumulationMode.ACCUMULATING_FIRED_PANES, Duration.millis(100));
 
     // Inject a couple of on-time elements and fire at the window end.
     injectElement(tester, 1, TriggerResult.CONTINUE);
@@ -630,8 +639,8 @@ public void testIdempotentEmptyPanesAccumulating() throws Exception {
 
     // The late pane has the correct indices.
     assertThat(output.get(1).getValue(), containsInAnyOrder(1, 2, 3));
-    assertThat(output.get(1).getPane(),
-        equalTo(PaneInfo.createPane(false, true, Timing.LATE, 1, 1)));
+    assertThat(
+        output.get(1).getPane(), equalTo(PaneInfo.createPane(false, true, Timing.LATE, 1, 1)));
 
     assertTrue(tester.isMarkedFinished(firstWindow));
     tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
@@ -642,6 +651,7 @@ public void testIdempotentEmptyPanesAccumulating() throws Exception {
 
   private class ResultCaptor<T> implements Answer<T> {
     private T result = null;
+
     public T get() {
       return result;
     }
@@ -661,27 +671,25 @@ public T answer(InvocationOnMock invocationOnMock) throws Throwable {
    */
   @Test
   public void testEmptyOnTimeFromOrFinally() throws Exception {
-    TriggerTester<Integer, Integer, IntervalWindow> tester = TriggerTester.combining(
-        FixedWindows.of(Duration.millis(10)),
-        AfterEach.<IntervalWindow>inOrder(
-            Repeatedly.<IntervalWindow>forever(
-                AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
-                    .plusDelayOf(new Duration(5)))
-            .orFinally(AfterWatermark.<IntervalWindow>pastEndOfWindow()),
-            Repeatedly.<IntervalWindow>forever(
-                AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
-                    .plusDelayOf(new Duration(25)))),
-        AccumulationMode.ACCUMULATING_FIRED_PANES,
-        new Sum.SumIntegerFn().<String>asKeyedFn(), VarIntCoder.of(),
-        Duration.millis(100));
+    ReduceFnTester<Integer, Integer, IntervalWindow> tester =
+        ReduceFnTester.combining(FixedWindows.of(Duration.millis(10)),
+            AfterEach.<IntervalWindow>inOrder(
+                Repeatedly
+                    .<IntervalWindow>forever(
+                        AfterProcessingTime.<IntervalWindow>pastFirstElementInPane().plusDelayOf(
+                            new Duration(5)))
+                    .orFinally(AfterWatermark.<IntervalWindow>pastEndOfWindow()),
+                Repeatedly.<IntervalWindow>forever(
+                    AfterProcessingTime.<IntervalWindow>pastFirstElementInPane().plusDelayOf(
+                        new Duration(25)))),
+            AccumulationMode.ACCUMULATING_FIRED_PANES, new Sum.SumIntegerFn().<String>asKeyedFn(),
+            VarIntCoder.of(), Duration.millis(100));
 
     tester.advanceInputWatermark(new Instant(0));
     tester.advanceProcessingTime(new Instant(0));
 
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),
-        TimestampedValue.of(1, new Instant(3)),
-        TimestampedValue.of(1, new Instant(7)),
+    tester.injectElements(TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(1, new Instant(3)), TimestampedValue.of(1, new Instant(7)),
         TimestampedValue.of(1, new Instant(5)));
 
     tester.advanceProcessingTime(new Instant(6));
@@ -693,10 +701,12 @@ public void testEmptyOnTimeFromOrFinally() throws Exception {
     assertThat(output.get(0), WindowMatchers.isSingleWindowedValue(4, 1, 0, 10));
     assertThat(output.get(1), WindowMatchers.isSingleWindowedValue(4, 9, 0, 10));
 
-    assertThat(output.get(0), WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(
-        true, false, Timing.EARLY, 0, -1)));
-    assertThat(output.get(1), WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(
-        false, false, Timing.ON_TIME, 1, 0)));
+    assertThat(
+        output.get(0),
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, false, Timing.EARLY, 0, -1)));
+    assertThat(
+        output.get(1),
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.ON_TIME, 1, 0)));
   }
 
   /**
@@ -706,27 +716,25 @@ public void testEmptyOnTimeFromOrFinally() throws Exception {
    */
   @Test
   public void testProcessingTime() throws Exception {
-    TriggerTester<Integer, Integer, IntervalWindow> tester = TriggerTester.combining(
-        FixedWindows.of(Duration.millis(10)),
-        AfterEach.<IntervalWindow>inOrder(
-            Repeatedly.<IntervalWindow>forever(
-                AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
-                    .plusDelayOf(new Duration(5)))
-            .orFinally(AfterWatermark.<IntervalWindow>pastEndOfWindow()),
-            Repeatedly.<IntervalWindow>forever(
-                AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
-                    .plusDelayOf(new Duration(25)))),
-        AccumulationMode.ACCUMULATING_FIRED_PANES,
-        new Sum.SumIntegerFn().<String>asKeyedFn(), VarIntCoder.of(),
-        Duration.millis(100));
+    ReduceFnTester<Integer, Integer, IntervalWindow> tester =
+        ReduceFnTester.combining(FixedWindows.of(Duration.millis(10)),
+            AfterEach.<IntervalWindow>inOrder(
+                Repeatedly
+                    .<IntervalWindow>forever(
+                        AfterProcessingTime.<IntervalWindow>pastFirstElementInPane().plusDelayOf(
+                            new Duration(5)))
+                    .orFinally(AfterWatermark.<IntervalWindow>pastEndOfWindow()),
+                Repeatedly.<IntervalWindow>forever(
+                    AfterProcessingTime.<IntervalWindow>pastFirstElementInPane().plusDelayOf(
+                        new Duration(25)))),
+            AccumulationMode.ACCUMULATING_FIRED_PANES, new Sum.SumIntegerFn().<String>asKeyedFn(),
+            VarIntCoder.of(), Duration.millis(100));
 
     tester.advanceInputWatermark(new Instant(0));
     tester.advanceProcessingTime(new Instant(0));
 
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),
-        TimestampedValue.of(1, new Instant(3)),
-        TimestampedValue.of(1, new Instant(7)),
+    tester.injectElements(TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(1, new Instant(3)), TimestampedValue.of(1, new Instant(7)),
         TimestampedValue.of(1, new Instant(5)));
     // 4 elements all at processing time 0
 
@@ -768,29 +776,30 @@ public void testProcessingTime() throws Exception {
     assertThat(output.get(2), WindowMatchers.isSingleWindowedValue(11, 9, 0, 10));
     assertThat(output.get(3), WindowMatchers.isSingleWindowedValue(12, 9, 0, 10));
 
-    assertThat(output.get(0), WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(
-        true, false, Timing.EARLY, 0, -1)));
-    assertThat(output.get(1), WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(
-        false, false, Timing.ON_TIME, 1, 0)));
-    assertThat(output.get(2), WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(
-        false, false, Timing.LATE, 2, 1)));
-    assertThat(output.get(3), WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(
-        false, true, Timing.LATE, 3, 2)));
+    assertThat(
+        output.get(0),
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, false, Timing.EARLY, 0, -1)));
+    assertThat(
+        output.get(1),
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.ON_TIME, 1, 0)));
+    assertThat(
+        output.get(2),
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.LATE, 2, 1)));
+    assertThat(
+        output.get(3),
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.LATE, 3, 2)));
   }
 
   @Test
   public void testMultipleTimerTypes() throws Exception {
-    Trigger<IntervalWindow> trigger = spy(Repeatedly.forever(
-        AfterFirst.of(AfterProcessingTime.<IntervalWindow>pastFirstElementInPane().plusDelayOf(
-                          Duration.millis(10)),
-            AfterWatermark.<IntervalWindow>pastEndOfWindow())));
-
-    TriggerTester<Integer, Iterable<Integer>, IntervalWindow> tester =
-        TriggerTester.nonCombining(
-            FixedWindows.of(Duration.millis(10)),
-            trigger,
-            AccumulationMode.DISCARDING_FIRED_PANES,
-            Duration.standardDays(1));
+    Trigger<IntervalWindow> trigger = spy(Repeatedly.forever(AfterFirst.of(
+        AfterProcessingTime.<IntervalWindow>pastFirstElementInPane().plusDelayOf(
+            Duration.millis(10)),
+        AfterWatermark.<IntervalWindow>pastEndOfWindow())));
+
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
+        ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), trigger,
+            AccumulationMode.DISCARDING_FIRED_PANES, Duration.standardDays(1));
 
     tester.injectElements(TimestampedValue.of(1, new Instant(1)));
 

From a82abf46e52da2e7941171bafaef968c358508f1 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Thu, 10 Dec 2015 16:56:53 -0800
Subject: [PATCH 1225/1541] Fixes a data race in OffsetBasedSource

This race is harmless in practice, because we only read properties
of the returned source that don't change across splitAtFraction(),
however it is better to be safe.

Also clarifies documentation related to Reader.getCurrentSource()
and dynamic work rebalancing.
----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109949629
---
 .../cloud/dataflow/sdk/io/BoundedSource.java  | 88 ++++++++++++++++++-
 .../dataflow/sdk/io/OffsetBasedSource.java    |  5 +-
 .../google/cloud/dataflow/sdk/io/Source.java  | 57 +++++++-----
 3 files changed, 122 insertions(+), 28 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
index 5f91ef39b863a..be3a415cff93e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
@@ -80,14 +80,22 @@ public abstract List<? extends BoundedSource<T>> splitIntoBundles(
    * again on this object.
    *
    * <h3>Thread safety</h3>
-   * All methods will be run from the same thread except {@link #splitAtFraction} and
-   * {@link #getFractionConsumed}, which can be called concurrently from a different thread. There
-   * will not be multiple concurrent calls to {@link #splitAtFraction} but there can be for
-   * {@link #getFractionConsumed} if {@link #splitAtFraction} is implemented.
+   * All methods will be run from the same thread except {@link #splitAtFraction},
+   * {@link #getFractionConsumed} and {@link #getCurrentSource}, which can be called concurrently
+   * from a different thread. There will not be multiple concurrent calls to
+   * {@link #splitAtFraction} but there can be for {@link #getFractionConsumed} if
+   * {@link #splitAtFraction} is implemented.
+   *
    * <p>If the source does not implement {@link #splitAtFraction}, you do not need to worry about
    * thread safety. If implemented, it must be safe to call {@link #splitAtFraction} and
    * {@link #getFractionConsumed} concurrently with other methods.
    *
+   * <p>Additionally, a successful {@link #splitAtFraction} call must, by definition, cause
+   * {@link #getCurrentSource} to start returning a different value.
+   * Callers of {@link #getCurrentSource} need to be aware of the possibility that the returned
+   * value can change at any time, and must only access the properties of the source returned by
+   * {@link #getCurrentSource} which do not change between {@link #splitAtFraction} calls.
+   *
    * <h3>Implementing {@link #splitAtFraction}</h3>
    * In the course of dynamic work rebalancing, the method {@link #splitAtFraction}
    * may be called concurrently with {@link #advance} or {@link #start}. It is critical that
@@ -124,6 +132,78 @@ public Double getFractionConsumed() {
       return null;
     }
 
+    /**
+     * Returns a {@code Source} describing the same input that this {@code Reader} currently reads
+     * (including items already read).
+     *
+     * <h3>Usage</h3>
+     * <p>Reader subclasses can use this method for convenience to access unchanging properties of
+     * the source being read. Alternatively, they can cache these properties in the constructor.
+     * <p>The framework will call this method in the course of dynamic work rebalancing, e.g. after
+     * a successful {@link BoundedSource.BoundedReader#splitAtFraction} call.
+     *
+     * <h3>Mutability and thread safety</h3>
+     * Remember that {@link Source} objects must always be immutable. However, the return value of
+     * this function may be affected by dynamic work rebalancing, happening asynchronously via
+     * {@link BoundedSource.BoundedReader#splitAtFraction}, meaning it can return a different
+     * {@link Source} object. However, the returned object itself will still itself be immutable.
+     * Callers must take care not to rely on properties of the returned source that may be
+     * asynchronously changed as a result of this process (e.g. do not cache an end offset when
+     * reading a file).
+     *
+     * <h3>Implementation</h3>
+     * For convenience, subclasses should usually return the most concrete subclass of
+     * {@link Source} possible.
+     * In practice, the implementation of this method should nearly always be one of the following:
+     * <ul>
+     *   <li>Source that inherits from a base class that already implements
+     *   {@link #getCurrentSource}: delegate to base class. In this case, it is almost always
+     *   an error for the subclass to maintain its own copy of the source.
+     * <pre>{@code
+     *   public FooReader(FooSource<T> source) {
+     *     super(source);
+     *   }
+     *
+     *   public FooSource<T> getCurrentSource() {
+     *     return (FooSource<T>)super.getCurrentSource();
+     *   }
+     * }</pre>
+     *   <li>Source that does not support dynamic work rebalancing: return a private final variable.
+     * <pre>{@code
+     *   private final FooSource<T> source;
+     *
+     *   public FooReader(FooSource<T> source) {
+     *     this.source = source;
+     *   }
+     *
+     *   public FooSource<T> getCurrentSource() {
+     *     return source;
+     *   }
+     * }</pre>
+     *   <li>{@link BoundedSource.BoundedReader} that explicitly supports dynamic work rebalancing:
+     *   maintain a variable pointing to an immutable source object, and protect it with
+     *   synchronization.
+     * <pre>{@code
+     *   private FooSource<T> source;
+     *
+     *   public FooReader(FooSource<T> source) {
+     *     this.source = source;
+     *   }
+     *
+     *   public synchronized FooSource<T> getCurrentSource() {
+     *     return source;
+     *   }
+     *
+     *   public synchronized FooSource<T> splitAtFraction(double fraction) {
+     *     ...
+     *     FooSource<T> primary = ...;
+     *     FooSource<T> residual = ...;
+     *     this.source = primary;
+     *     return residual;
+     *   }
+     * }</pre>
+     * </ul>
+     */
     @Override
     public abstract BoundedSource<T> getCurrentSource();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java
index ea33d3143ff9a..4527b85f222be 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java
@@ -189,6 +189,7 @@ public abstract static class OffsetBasedReader<T> extends BoundedReader<T> {
     private static final Logger LOG = LoggerFactory.getLogger(OffsetBasedReader.class);
 
     private OffsetBasedSource<T> source;
+
     /**
      * The {@link OffsetRangeTracker} managing the range and current position of the source.
      * Subclasses MUST use it before returning records from {@link #start} or {@link #advance}:
@@ -247,7 +248,7 @@ public final boolean advance() throws IOException {
     protected abstract boolean advanceImpl() throws IOException;
 
     @Override
-    public OffsetBasedSource<T> getCurrentSource() {
+    public synchronized OffsetBasedSource<T> getCurrentSource() {
       return source;
     }
 
@@ -257,7 +258,7 @@ public Double getFractionConsumed() {
     }
 
     @Override
-    public final OffsetBasedSource<T> splitAtFraction(double fraction) {
+    public final synchronized OffsetBasedSource<T> splitAtFraction(double fraction) {
       if (rangeTracker.getStopPosition() == Long.MAX_VALUE) {
         LOG.debug(
             "Refusing to split unbounded OffsetBasedReader {} at fraction {}",
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
index 22cb63a6ed6e2..6aa8ac5f54863 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
@@ -72,10 +72,12 @@ public abstract class Source<T> implements Serializable {
    * the current model tends to be easier to program and more efficient in practice
    * for iterating over sources such as files, databases etc. (rather than pure collections).
    *
-   * <p>{@code Reader} implementations do not need to be thread-safe; they may only be accessed
-   * by a single thread at once.
+   * <p>All {@code Reader} functions except {@link #getCurrentSource} do not need to be thread-safe;
+   * they may only be accessed by a single thread at once. However, {@link #getCurrentSource} needs
+   * to be thread-safe, and other functions should assume that its returned value can change
+   * asynchronously.
    *
-   * <p>Callers of {@code Readers} must obey the following access pattern:
+   * <p>Reading data from the {@link Reader} must obey the following access pattern:
    * <ul>
    * <li> One call to {@link Reader#start}
    * <ul><li>If {@link Reader#start} returned true, any number of calls to {@code getCurrent}*
@@ -88,27 +90,35 @@ public abstract class Source<T> implements Serializable {
    *
    * <p>For example, if the reader is reading a fixed set of data:
    * <pre>
-   * for (boolean available = reader.start(); available; available = reader.advance()) {
-   *   T item = reader.getCurrent();
-   *   Instant timestamp = reader.getCurrentTimestamp();
-   *   ...
-   * }
+   *   try {
+   *     for (boolean available = reader.start(); available; available = reader.advance()) {
+   *       T item = reader.getCurrent();
+   *       Instant timestamp = reader.getCurrentTimestamp();
+   *       ...
+   *     }
+   *   } finally {
+   *     reader.close();
+   *   }
    * </pre>
    *
    * <p>If the set of data being read is continually growing:
    * <pre>
-   * boolean available = reader.start();
-   * while (true) {
-   *   if (available) {
-   *     T item = reader.getCurrent();
-   *     Instant timestamp = reader.getCurrentTimestamp();
-   *     ...
-   *     resetExponentialBackoff();
-   *   } else {
-   *     exponentialBackoff();
+   *   try {
+   *     boolean available = reader.start();
+   *     while (true) {
+   *       if (available) {
+   *         T item = reader.getCurrent();
+   *         Instant timestamp = reader.getCurrentTimestamp();
+   *         ...
+   *         resetExponentialBackoff();
+   *       } else {
+   *         exponentialBackoff();
+   *       }
+   *       available = reader.advance();
+   *     }
+   *   } finally {
+   *     reader.close();
    *   }
-   *   available = reader.advance();
-   * }
    * </pre>
    *
    * <p>Note: this interface is a work-in-progress and may change.
@@ -168,11 +178,14 @@ public abstract static class Reader<T> implements AutoCloseable {
     public abstract void close() throws IOException;
 
     /**
-     * Returns a {@code Source} describing the same input that this {@code Reader} reads
+     * Returns a {@code Source} describing the same input that this {@code Reader} currently reads
      * (including items already read).
      *
-     * <p>A reader created from the result of {@code getCurrentSource}, if consumed, MUST
-     * return the same data items as the current reader.
+     * <p>Usually, an implementation will simply return the immutable {@link Source} object from
+     * which the current {@link Reader} was constructed, or delegate to the base class.
+     * However, when using or implementing this method on a {@link BoundedSource.BoundedReader},
+     * special considerations apply, see documentation for
+     * {@link BoundedSource.BoundedReader#getCurrentSource}.
      */
     public abstract Source<T> getCurrentSource();
   }

From 7539d5e81e18590881bb5300ab0dba51cdc8be85 Mon Sep 17 00:00:00 2001
From: amyu <amyu@google.com>
Date: Mon, 31 Aug 2015 08:30:25 -0700
Subject: [PATCH 1226/1541] A series of batch and streaming pipeline examples

A series of batch and streaming pipelines in a 'mobile gaming'
domain that illustrate some advanced topics,
including windowing and triggers

----Release Notes----
A series of batch and streaming pipelines in a 'mobile gaming' domain that illustrate some advanced topics, including windowing and triggers.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=109997483
---
 examples/README.md                            |   8 +
 examples/pom.xml                              |   4 +
 .../examples/complete/game/GameStats.java     | 360 +++++++++++++++
 .../complete/game/HourlyTeamScore.java        | 225 ++++++++++
 .../examples/complete/game/LeaderBoard.java   | 274 ++++++++++++
 .../examples/complete/game/UserScore.java     | 286 ++++++++++++
 .../complete/game/injector/Injector.java      | 419 ++++++++++++++++++
 .../complete/game/injector/InjectorUtils.java | 101 +++++
 .../injector/RetryHttpInitializerWrapper.java | 127 ++++++
 .../examples/complete/game/GameStatsTest.java |  99 +++++
 .../complete/game/HourlyTeamScoreTest.java    | 121 +++++
 .../examples/complete/game/UserScoreTest.java | 156 +++++++
 12 files changed, 2180 insertions(+)
 create mode 100644 examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java
 create mode 100644 examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
 create mode 100644 examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java
 create mode 100644 examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/UserScore.java
 create mode 100644 examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java
 create mode 100644 examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/InjectorUtils.java
 create mode 100644 examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/RetryHttpInitializerWrapper.java
 create mode 100644 examples/src/test/java8/com/google/cloud/dataflow/examples/complete/game/GameStatsTest.java
 create mode 100644 examples/src/test/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScoreTest.java
 create mode 100644 examples/src/test/java8/com/google/cloud/dataflow/examples/complete/game/UserScoreTest.java

diff --git a/examples/README.md b/examples/README.md
index d334200e858ca..cbcd01fc0f1c5 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -85,3 +85,11 @@ directory for some common and useful patterns like joining, filtering, and combi
 
 The [`complete`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete)
 directory contains a few realistic end-to-end pipelines.
+
+See the
+[Java 8](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/tree/master/examples/src/main/java8/com/google/cloud/dataflow/examples)
+examples as well. This directory includes a Java 8 version of the
+MinimalWordCount example, as well as series of examples in a simple 'mobile
+gaming' domain. This series introduces some advanced concepts and provides
+additional examples of using Java 8 syntax. Other than usage of Java 8 lambda
+expressions, the concepts that are used apply equally well in Java 7.
diff --git a/examples/pom.xml b/examples/pom.xml
index d33c610a1a32c..56b76dc54ff90 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -106,6 +106,7 @@
                     <!-- This pattern is brittle; we would prefer to filter on the directory
                          but that seems to be unavailable to us. -->
                     <exclude>**/*Java8Test.java</exclude>
+                    <exclude>**/game/*.java</exclude>
                   </testExcludes>
                 </configuration>
               </execution>
@@ -124,6 +125,7 @@
                     <!-- This pattern is brittle; we would prefer to filter on the directory
                          but that seems to be unavailable to us. -->
                     <include>**/*Java8Test.java</include>
+                    <include>**/game/*.java</include>
                   </includes>
                 </configuration>
               </execution>
@@ -142,6 +144,7 @@
                     <!-- This pattern is brittle; we would prefer to filter on the directory
                          but that seems to be unavailable to us. -->
                     <exclude>**/*Java8*.java</exclude>
+                    <exclude>**/game/*.java</exclude>
                   </excludes>
                 </configuration>
               </execution>
@@ -160,6 +163,7 @@
                     <!-- This pattern is brittle; we would prefer to filter on the directory
                          but that seems to be unavailable to us. -->
                     <include>**/*Java8*.java</include>
+                    <include>**/game/*.java</include>
                   </includes>
                 </configuration>
               </execution>
diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java
new file mode 100644
index 0000000000000..9dc430f6d912f
--- /dev/null
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java
@@ -0,0 +1,360 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
+import com.google.cloud.dataflow.sdk.transforms.Mean;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SerializableComparator;
+import com.google.cloud.dataflow.sdk.transforms.SimpleFunction;
+import com.google.cloud.dataflow.sdk.transforms.Values;
+import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterEach;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterProcessingTime;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.PDone;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+import org.apache.avro.reflect.Nullable;
+import org.joda.time.DateTimeZone;
+import org.joda.time.Duration;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.TimeZone;
+
+/**
+ * This class is the fourth in a series of four pipelines that tell a story in a 'gaming'
+ * domain, following {@link UserScore}, {@link HourlyTeamScore}, and {@link LeaderBoard}.
+ * New concepts: session windows and finding session duration; use of both
+ * singleton and non-singleton side inputs.
+ *
+ * <p> This pipeline builds on the {@link LeaderBoard} functionality, and adds some "business
+ * intelligence" analysis: abuse detection and usage patterns. The pipeline derives the Mean user
+ * score sum for a window, and uses that information to identify likely spammers/robots. (The robots
+ * have a higher click rate than the human users). The 'robot' users are then filtered out when
+ * calculating the team scores.
+ *
+ * <p> Additionally, user sessions are tracked: that is, we find bursts of user activity using
+ * session windows. Then, the mean session duration information is recorded in the context of
+ * subsequent fixed windowing. (This could be used to tell us what games are giving us greater
+ * user retention).
+ *
+ * <p> Run {@link injector.Injector} to generate pubsub data for this pipeline.  The Injector
+ * documentation provides more detail.
+ *
+ * <p> To execute this pipeline using the Dataflow service, specify the pipeline configuration
+ * like this:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ *   --dataset=YOUR-DATASET
+ *   --topic=projects/YOUR-PROJECT/topics/YOUR-TOPIC
+ * }
+ * </pre>
+ * where the BigQuery dataset you specify must already exist. The PubSub topic you specify should
+ * be the same topic to which the Injector is publishing.
+ */
+public class GameStats extends LeaderBoard {
+
+  private static final String TIMESTAMP_ATTRIBUTE = "timestamp_ms";
+  private static final Logger LOG = LoggerFactory.getLogger(GameStats.class);
+
+  private static DateTimeFormatter fmt =
+      DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS")
+          .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")));
+  static final Duration FIVE_MINUTES = Duration.standardMinutes(5);
+  static final Duration TEN_MINUTES = Duration.standardMinutes(10);
+
+
+  /**
+   * Filter out all but those users with a high clickrate, which we will consider as 'spammy' uesrs.
+   * We do this by finding the mean total score per user, then using that information as a side
+   * input to filter out all but those user scores that are > (mean * SCORE_WEIGHT)
+   */
+  public static class CalculateSpammyUsers
+      extends PTransform<PCollection<KV<String, Integer>>, PCollection<KV<String, Integer>>> {
+    private static final Logger LOG = LoggerFactory.getLogger(CalculateSpammyUsers.class);
+    private static final double SCORE_WEIGHT = 2.5;
+
+    @Override
+    public PCollection<KV<String, Integer>> apply(PCollection<KV<String, Integer>> userScores) {
+
+      // Get the sum of scores for each user.
+      PCollection<KV<String, Integer>> sumScores = userScores
+          .apply("UserSum", Sum.<String>integersPerKey());
+
+      // Extract the score from each element, and use it to find the global mean.
+      final PCollectionView<Double> globalMeanScore = sumScores.apply(Values.<Integer>create())
+          .apply(Mean.<Integer>globally().asSingletonView());
+
+      // Filter the user sums using the global mean.
+      PCollection<KV<String, Integer>> filtered = sumScores
+          .apply(ParDo
+              .named("ProcessAndFilter")
+              // use the derived mean total score as a side input
+              .withSideInputs(globalMeanScore)
+              .of(new DoFn<KV<String, Integer>, KV<String, Integer>>() {
+                private final Aggregator<Long, Long> numSpammerUsers =
+                  createAggregator("SpammerUsers", new Sum.SumLongFn());
+                @Override
+                public void processElement(ProcessContext c) {
+                  Integer score = c.element().getValue();
+                  Double gmc = c.sideInput(globalMeanScore);
+                  if (score > (gmc * SCORE_WEIGHT)) {
+                    LOG.info("user " + c.element().getKey() + " spammer score " + score
+                        + " with mean " + gmc);
+                    numSpammerUsers.addValue(1L);
+                    c.output(c.element());
+                  }
+                }
+              }));
+      return filtered;
+    }
+  }
+
+  /**
+   * Calculate and output an element's session duration.
+   */
+  private static class UserSessionInfoFn extends DoFn<KV<String, Integer>, Integer>
+      implements RequiresWindowAccess {
+
+    @Override
+    public void processElement(ProcessContext c) {
+      IntervalWindow w = (IntervalWindow) c.window();
+      int duration = new Duration(
+          w.start(), w.end()).toPeriod().toStandardMinutes().getMinutes();
+      c.output(duration);
+    }
+  }
+
+
+  /**
+   * Options supported by {@link GameStats}.
+   */
+  static interface Options extends LeaderBoard.Options {
+    @Description("Pub/Sub topic to read from")
+    @Validation.Required
+    String getTopic();
+    void setTopic(String value);
+
+    @Description("Numeric value of fixed window duration for user analysis, in minutes")
+    @Default.Integer(60)
+    Integer getFixedWindowDuration();
+    void setFixedWindowDuration(Integer value);
+
+    @Description("Numeric value of gap between user sessions, in minutes")
+    @Default.Integer(5)
+    Integer getSessionGap();
+    void setSessionGap(Integer value);
+
+    @Description("Numeric value of fixed window for finding mean of user session duration, " +
+        "in minutes")
+    @Default.Integer(30)
+    Integer getUserActivityWindowDuration();
+    void setUserActivityWindowDuration(Integer value);
+
+    @Description("Prefix used for the BigQuery table names")
+    @Default.String("game_stats")
+    String getTableName();
+    void setTableName(String value);
+  }
+
+  /**
+   * Format user activity information (objects of type UserActivityInfo) and write to BigQuery.
+   * The constructor argument indicates the table prefix to use.
+   */
+  public static class WriteAverageSessionLengthToBigQuery
+      extends PTransform<PCollection<Double>, PDone> {
+
+    private final String tablePrefix;
+
+    public WriteAverageSessionLengthToBigQuery(String tablePrefix) {
+      this.tablePrefix = tablePrefix;
+    }
+
+    /**
+     *  Convert the user activity info into a BigQuery TableRow.
+     */
+    private class BuildSessionActivityRowFn extends DoFn<Double, TableRow>
+        implements RequiresWindowAccess {
+
+      @Override
+      public void processElement(ProcessContext c) {
+
+        IntervalWindow w = (IntervalWindow) c.window();
+
+        TableRow row = new TableRow()
+         .set("window_start", fmt.print(w.start()))
+         .set("mean_duration", c.element());
+        c.output(row);
+      }
+    }
+
+    /** Build the output table schema. */
+    private TableSchema getMeanSchema() {
+      List<TableFieldSchema> fields = new ArrayList<>();
+      fields.add(new TableFieldSchema().setName("window_start").setType("STRING"));
+      fields.add(new TableFieldSchema().setName("mean_duration").setType("FLOAT"));
+      return new TableSchema().setFields(fields);
+    }
+
+    @Override
+    public PDone apply(PCollection<Double> userInfo) {
+      return userInfo
+        .apply(ParDo.named("ConvertToUserInfoRow").of(new BuildSessionActivityRowFn()))
+        .apply(BigQueryIO.Write
+                  .to(getTable(userInfo.getPipeline(),
+                      tablePrefix + "_mean_sessions"))
+                  .withSchema(getMeanSchema())
+                  .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED));
+    }
+  }
+
+
+  public static void main(String[] args) throws Exception {
+
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    // Enforce that this pipeline is always run in streaming mode.
+    options.setStreaming(true);
+    // Allow the pipeline to be cancelled automatically.
+    options.setRunner(DataflowPipelineRunner.class);
+    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
+    Pipeline pipeline = Pipeline.create(options);
+
+    // Read Events from Pub/Sub using custom timestamps
+    PCollection<GameActionInfo> rawEvents = pipeline
+        .apply(PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).topic(options.getTopic()))
+        .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()));
+
+    // Extract username/score pairs from the event stream
+    PCollection<KV<String, Integer>> userEvents =
+        rawEvents.apply("ExtractUserScore",
+          MapElements.via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))
+            .withOutputType(new TypeDescriptor<KV<String, Integer>>() {}));
+
+    // Calculate the total score per user over fixed windows, and
+    // cumulative updates for late data.
+    final PCollectionView<Map<String, Integer>> spammersView = userEvents
+      .apply(Window.named("FixedWindowsUser")
+          .<KV<String, Integer>>into(FixedWindows.of(
+              Duration.standardMinutes(options.getFixedWindowDuration())))
+          )
+
+      // Filter out everyone but those with (SCORE_WEIGHT * avg) clickrate.
+      // These might be robots/spammers.
+      .apply("CalculateSpammyUsers", new CalculateSpammyUsers())
+      // Derive a view from the collection of spammer users. It will be used as a side input
+      // in calculating the team score sums, below.
+      .apply("CreateSpammersView", View.<String, Integer>asMap());
+
+    // Calculate the total score per team over fixed windows,
+    // and emit cumulative updates for late data. Uses the side input derived above-- the set of
+    // suspected robots-- to filter out scores from those users from the sum.
+    // Write the results to BigQuery.
+    rawEvents
+      .apply(Window.named("WindowIntoFixedWindows")
+          .<GameActionInfo>into(FixedWindows.of(
+              Duration.standardMinutes(options.getFixedWindowDuration())))
+          )
+      // Filter out the detected spammer users, using the side input derived above.
+      .apply(ParDo.named("FilterOutSpammers")
+              .withSideInputs(spammersView)
+              .of(new DoFn<GameActionInfo, GameActionInfo>() {
+                @Override
+                public void processElement(ProcessContext c) {
+                  // If the user is not in the spammers Map, output the data element.
+                  if (c.sideInput(spammersView).get(c.element().getUser().trim()) == null) {
+                    c.output(c.element());
+                  }}}))
+      // Extract and sum teamname/score pairs from the event data.
+      .apply("ExtractTeamScore", new ExtractAndSumScore("team"))
+      // Write the result to BigQuery
+      .apply("WriteTeamSums",
+             new WriteScoresToBigQuery(options.getTableName(), "team", true, false));
+
+    // Calculate the total score for the users per session-- that is, a burst of activity
+    // separated by a gap from further activity. Find and record the mean session lengths.
+    // This information could help the game designers track the changing user engagement
+    // as their set of games changes.
+    userEvents
+      .apply(Window.named("WindowIntoSessions")
+            .<KV<String, Integer>>into(
+                  Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap())))
+        .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow())
+        .withAllowedLateness(Duration.ZERO)
+        .discardingFiredPanes())
+      .apply("UserSessionSum", Sum.<String>integersPerKey())
+      // Get the duration per session.
+      .apply("UserSessionActivity", ParDo.of(new UserSessionInfoFn()))
+
+      // Re-window to process groups of session sums according to when the sessions complete.
+      .apply(Window.named("WindowToExtractSessionMean")
+            .<Integer>into(
+                FixedWindows.of(Duration.standardMinutes(options.getUserActivityWindowDuration())))
+            .withAllowedLateness(Duration.standardMinutes(options.getAllowedLateness()))
+            .accumulatingFiredPanes())
+      // Find the mean session duration in each window.
+      .apply(Mean.<Integer>globally().withoutDefaults())
+      // Write this info to a BigQuery table.
+      .apply("WriteAvgSessionLength",
+             new WriteAverageSessionLengthToBigQuery(options.getTableName()));
+
+
+    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
+    // command line.
+    PipelineResult result = pipeline.run();
+    dataflowUtils.waitToFinish(result);
+  }
+}
diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
new file mode 100644
index 0000000000000..01d500341d37b
--- /dev/null
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
@@ -0,0 +1,225 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
+import com.google.cloud.dataflow.sdk.transforms.Filter;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SimpleFunction;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.WithTimestamps;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PDone;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+import org.joda.time.DateTimeZone;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.TimeZone;
+
+/**
+ * This class is the second in a series of four pipelines that tell a story in a 'gaming'
+ * domain, following {@link UserScore}. In addition to the concepts introduced in {@ UserScore}, new
+ * concepts include: windowing and element timestamps; use of {@code Filter.byPredicate()}.
+ *
+ * <p> This pipeline processes data collected from gaming events in batch, building on {@link
+ * UserScore} but using fixed windows. It calculates the sum of scores per team, for each window,
+ * optionally allowing specification of two timestamps before and after which data is filtered out.
+ * This allows a model where late data collected after the intended analysis window can be included,
+ * and any late-arriving data prior to the beginning of the analysis window can be removed as well.
+ * By using windowing and adding element timestamps, we can do finer-grained analysis than with the
+ * {@link UserScore} pipeline. However, our batch processing is high-latency, in that we don't get
+ * results from plays at the beginning of the batch's time period until the batch is processed.
+ *
+ * <p> To execute this pipeline using the Dataflow service, specify the pipeline configuration
+ * like this:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ *   --dataset=YOUR-DATASET
+ * }
+ * </pre>
+ * where the BigQuery dataset you specify must already exist.
+ *
+ * <p> Optionally include {@code --input} to specify the batch input file path.
+ * To indicate a time after which the data should be filtered out, include the
+ * {@code --stopMin} arg. E.g., {@code --stopMin=2015-10-18-23-59} indicates that any data
+ * timestamped after 23:59 PST on 2015-10-18 should not be included in the analysis.
+ * To indicate a time before which data should be filtered out, include the {@code --startMin} arg.
+ * If you're using the default input specified in {@link UserScore},
+ * "gs://dataflow-samples/game/gaming_data*.csv", then
+ * {@code --startMin=2015-11-16-16-10 --stopMin=2015-11-17-16-10} are good values.
+ */
+public class HourlyTeamScore extends UserScore {
+
+  private static DateTimeFormatter fmt =
+      DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS")
+          .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")));
+  private static DateTimeFormatter minFmt =
+      DateTimeFormat.forPattern("yyyy-MM-dd-HH-mm")
+          .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")));
+
+
+  /** Format fixed window information for scores, and write that info to BigQuery. */
+  public static class WriteWindowedToBigQuery
+      extends PTransform<PCollection<KV<String, Integer>>, PDone> {
+
+    private final String tableName;
+
+    public WriteWindowedToBigQuery(String tableName) {
+      this.tableName = tableName;
+    }
+
+    /** Convert each key/score pair into a BigQuery TableRow. */
+    private class BuildFixedRowFn extends DoFn<KV<String, Integer>, TableRow>
+        implements RequiresWindowAccess {
+
+      @Override
+      public void processElement(ProcessContext c) {
+
+        IntervalWindow w = (IntervalWindow) c.window();
+
+        TableRow row = new TableRow()
+         .set("team", c.element().getKey())
+         .set("total_score", c.element().getValue().longValue())
+         // Add windowing info to the output.
+         .set("window_start", fmt.print(w.start()));
+        c.output(row);
+      }
+    }
+
+    /** Build the output table schema. */
+    private TableSchema getFixedSchema() {
+      List<TableFieldSchema> fields = new ArrayList<>();
+      fields.add(new TableFieldSchema().setName("team").setType("STRING"));
+      fields.add(new TableFieldSchema().setName("total_score").setType("INTEGER"));
+      fields.add(new TableFieldSchema().setName("window_start").setType("STRING"));
+      return new TableSchema().setFields(fields);
+    }
+
+    @Override
+    public PDone apply(PCollection<KV<String, Integer>> teamAndScore) {
+      return teamAndScore
+        .apply(ParDo.named("ConvertToFixedRow").of(new BuildFixedRowFn()))
+        .apply(BigQueryIO.Write
+                  .to(getTable(teamAndScore.getPipeline(),
+                      tableName))
+                  .withSchema(getFixedSchema())
+                  .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED));
+    }
+  }
+
+
+  /**
+   * Options supported by {@link HourlyTeamScore}.
+   */
+  static interface Options extends UserScore.Options {
+
+    @Description("Numeric value of fixed window duration, in minutes")
+    @Default.Integer(60)
+    Integer getWindowDuration();
+    void setWindowDuration(Integer value);
+
+    @Description("String representation of the first minute after which to generate results,"
+        + "in the format: yyyy-MM-dd-HH-mm . This time should be in PST."
+        + "Any input data timestamped prior to that minute won't be included in the sums.")
+    @Default.String("1970-01-01-00-00")
+    String getStartMin();
+    void setStartMin(String value);
+
+    @Description("String representation of the first minute for which to not generate results,"
+        + "in the format: yyyy-MM-dd-HH-mm . This time should be in PST."
+        + "Any input data timestamped after that minute won't be included in the sums.")
+    @Default.String("2100-01-01-00-00")
+    String getStopMin();
+    void setStopMin(String value);
+
+    @Description("The BigQuery table name. Should not already exist.")
+    @Default.String("hourly_team_score")
+    String getTableName();
+    void setTableName(String value);
+  }
+
+
+  /**
+   * Run a batch pipeline to do windowed analysis of the data.
+   */
+  public static void main(String[] args) throws Exception {
+    // Begin constructing a pipeline configured by commandline flags.
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    Pipeline pipeline = Pipeline.create(options);
+
+    final Instant stopMinTimestamp = new Instant(minFmt.parseMillis(options.getStopMin()));
+    final Instant startMinTimestamp = new Instant(minFmt.parseMillis(options.getStartMin()));
+
+    // Read 'gaming' events from a text file.
+    pipeline.apply(TextIO.Read.from(options.getInput()))
+      // Parse the incoming data.
+      .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))
+
+      // Filter out data before and after the given times so that it is not included
+      // in the calculations. As we collect data in batches (say, by day), the batch for the day
+      // that we want to analyze could potentially include some late-arriving data from the previous
+      // day. If so, we want to weed it out. Similarly, if we include data from the following day
+      // (to scoop up late-arriving events from the day we're analyzing), we need to weed out events
+      // that fall after the time period we want to analyze.
+      .apply("FilterStartTime", Filter.byPredicate(
+          (GameActionInfo gInfo)
+              -> gInfo.getTimestamp() > startMinTimestamp.getMillis()))
+      .apply("FilterEndTime", Filter.byPredicate(
+          (GameActionInfo gInfo)
+              -> gInfo.getTimestamp() < stopMinTimestamp.getMillis()))
+
+      // Add an element timestamp based on the event log, and apply fixed windowing.
+      .apply("AddEventTimestamps",
+             WithTimestamps.of((GameActionInfo i) -> new Instant(i.getTimestamp())))
+      .apply(Window.named("FixedWindowsTeam")
+          .<GameActionInfo>into(FixedWindows.of(
+                Duration.standardMinutes(options.getWindowDuration()))))
+
+      // Extract and sum teamname/score pairs from the event data.
+      .apply("ExtractTeamScore", new ExtractAndSumScore("team"))
+      .apply("WriteTeamScoreSums", new WriteWindowedToBigQuery(options.getTableName()));
+
+    pipeline.run();
+  }
+
+}
diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java
new file mode 100644
index 0000000000000..6bb236668b9b1
--- /dev/null
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java
@@ -0,0 +1,274 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
+import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.StreamingOptions;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SimpleFunction;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterEach;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterProcessingTime;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PDone;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+
+import org.joda.time.DateTimeZone;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.TimeZone;
+
+/**
+ * This class is the third in a series of four pipelines that tell a story in a 'gaming' domain,
+ * following {@link UserScore} and {@link HourlyTeamScore}. Concepts include: processing unbounded
+ * data using fixed windows; use of custom timestamps and event-time processing; generation of
+ * early/speculative results; using .accumulatingFiredPanes() to do cumulative processing of late-
+ * arriving data.
+ *
+ * <p> This pipeline processes an unbounded stream of 'game events'. The calculation of the team
+ * scores uses fixed windowing based on event time (the time of the game play event), not
+ * processing time (the time that an event is processed by the pipeline). The pipeline calculates
+ * the sum of scores per team, for each window. By default, the team scores are calculated using
+ * one-hour windows.
+ *
+ * <p> In contrast-- to demo another windowing option-- the user scores are calculated using a
+ * global window, which periodically (every ten minutes) emits cumulative user score sums.
+ *
+ * <p> In contrast to the previous pipelines in the series, which used static, finite input data,
+ * here we're using an unbounded data source, which lets us provide speculative results, and allows
+ * handling of late data, at much lower latency. We can use the early/speculative results to keep a
+ * 'leaderboard' updated in near-realtime. Our handling of late data lets us generate correct
+ * results, e.g. for 'team prizes'. We're now outputing window results as they're
+ * calculated, giving us much lower latency than with the previous batch examples.
+ *
+ * <p> Run {@link injector.Injector} to generate pubsub data for this pipeline.  The Injector
+ * documentation provides more detail on how to do this.
+ *
+ * <p> To execute this pipeline using the Dataflow service, specify the pipeline configuration
+ * like this:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ *   --dataset=YOUR-DATASET
+ *   --topic=projects/YOUR-PROJECT/topics/YOUR-TOPIC
+ * }
+ * </pre>
+ * where the BigQuery dataset you specify must already exist.
+ * The PubSub topic you specify should be the same topic to which the Injector is publishing.
+ */
+public class LeaderBoard extends HourlyTeamScore {
+
+  private static final String TIMESTAMP_ATTRIBUTE = "timestamp_ms";
+
+  private static DateTimeFormatter fmt =
+      DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS")
+          .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")));
+  static final Duration FIVE_MINUTES = Duration.standardMinutes(5);
+  static final Duration TEN_MINUTES = Duration.standardMinutes(10);
+
+
+  /**
+   * Format information for scores, and write that info to BigQuery.
+   * Optionally include fixed windowing information and timing in the result.
+   */
+  public static class WriteScoresToBigQuery
+      extends PTransform<PCollection<KV<String, Integer>>, PDone> {
+
+    private final String fieldName;
+    private final String tablePrefix;
+    private final boolean writeTiming; // Whether to write timing info to the resultant table.
+    private final boolean writeWindowStart; // whether to include window start info.
+
+    public WriteScoresToBigQuery(String tablePrefix, String fieldName,
+        boolean writeWindowStart, boolean writeTiming) {
+      this.fieldName = fieldName;
+      this.tablePrefix = tablePrefix;
+      this.writeWindowStart = writeWindowStart;
+      this.writeTiming = writeTiming;
+    }
+
+    /** Convert each key/score pair into a BigQuery TableRow. */
+    private class BuildFixedRowFn extends DoFn<KV<String, Integer>, TableRow>
+        implements RequiresWindowAccess {
+
+      @Override
+      public void processElement(ProcessContext c) {
+
+        // IntervalWindow w = (IntervalWindow) c.window();
+
+        TableRow row = new TableRow()
+          .set(fieldName, c.element().getKey())
+          .set("total_score", c.element().getValue().longValue())
+          .set("processing_time", fmt.print(Instant.now()));
+         if (writeWindowStart) {
+          IntervalWindow w = (IntervalWindow) c.window();
+          row.set("window_start", fmt.print(w.start()));
+         }
+         if (writeTiming) {
+          row.set("timing", c.pane().getTiming().toString());
+         }
+        c.output(row);
+      }
+    }
+
+    /** Build the output table schema. */
+    private TableSchema getFixedSchema() {
+      List<TableFieldSchema> fields = new ArrayList<>();
+      fields.add(new TableFieldSchema().setName(fieldName).setType("STRING"));
+      fields.add(new TableFieldSchema().setName("total_score").setType("INTEGER"));
+      fields.add(new TableFieldSchema().setName("processing_time").setType("STRING"));
+      if (writeWindowStart) {
+        fields.add(new TableFieldSchema().setName("window_start").setType("STRING"));
+      }
+      if (writeTiming) {
+        fields.add(new TableFieldSchema().setName("timing").setType("STRING"));
+      }
+      return new TableSchema().setFields(fields);
+    }
+
+    @Override
+    public PDone apply(PCollection<KV<String, Integer>> teamAndScore) {
+      return teamAndScore
+        .apply(ParDo.named("ConvertToFixedTriggersRow").of(new BuildFixedRowFn()))
+        .apply(BigQueryIO.Write
+                  .to(getTable(teamAndScore.getPipeline(),
+                      tablePrefix + "_" + fieldName))
+                  .withSchema(getFixedSchema())
+                  .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED));
+    }
+  }
+
+
+  /**
+   * Options supported by {@link LeaderBoard}.
+   */
+  static interface Options extends HourlyTeamScore.Options, DataflowExampleOptions {
+
+    @Description("Pub/Sub topic to read from")
+    @Validation.Required
+    String getTopic();
+    void setTopic(String value);
+
+    @Description("Numeric value of fixed window duration for team analysis, in minutes")
+    @Default.Integer(60)
+    Integer getTeamWindowDuration();
+    void setTeamWindowDuration(Integer value);
+
+    @Description("Numeric value of allowed data lateness, in minutes")
+    @Default.Integer(120)
+    Integer getAllowedLateness();
+    void setAllowedLateness(Integer value);
+
+    @Description("Prefix used for the BigQuery table names")
+    @Default.String("leaderboard")
+    String getTableName();
+    void setTableName(String value);
+  }
+
+
+  public static void main(String[] args) throws Exception {
+
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    // Enforce that this pipeline is always run in streaming mode.
+    options.setStreaming(true);
+    // For example purposes, allow the pipeline to be easily cancelled instead of running
+    // continuously.
+    options.setRunner(DataflowPipelineRunner.class);
+    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
+    Pipeline pipeline = Pipeline.create(options);
+
+    // Read game events from Pub/Sub using custom timestamps, which are extracted from the pubsub
+    // data elements, and parse the data.
+    PCollection<GameActionInfo> gameEvents = pipeline
+        .apply(PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).topic(options.getTopic()))
+        .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()));
+
+    // Extract team/score pairs from the event stream, using hour-long windows by default.
+    gameEvents
+        .apply(Window.named("LeaderboardTeamFixedWindows")
+          .<GameActionInfo>into(FixedWindows.of(
+              Duration.standardMinutes(options.getTeamWindowDuration())))
+          // We will get early (speculative) results as well as cumulative
+          // processing of late data.
+          .triggering(
+            AfterWatermark.pastEndOfWindow()
+            .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane()
+                  .plusDelayOf(FIVE_MINUTES))
+            .withLateFirings(AfterProcessingTime.pastFirstElementInPane()
+                  .plusDelayOf(TEN_MINUTES)))
+          .withAllowedLateness(Duration.standardMinutes(options.getAllowedLateness()))
+          .accumulatingFiredPanes())
+        // Extract and sum teamname/score pairs from the event data.
+        .apply("ExtractTeamScore", new ExtractAndSumScore("team"))
+        // Write the results to BigQuery.
+        .apply("WriteTeamScoreSums",
+               new WriteScoresToBigQuery(options.getTableName(), "team", true, true));
+
+    // Extract user/score pairs from the event stream using processing time, via global windowing.
+    // Get periodic updates on all users' running scores.
+    gameEvents
+        .apply(Window.named("LeaderboardUserGlobalWindow")
+          .<GameActionInfo>into(new GlobalWindows())
+          // Get periodic results every ten minutes.
+              .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()
+                  .plusDelayOf(TEN_MINUTES)))
+              .accumulatingFiredPanes()
+              .withAllowedLateness(Duration.standardMinutes(options.getAllowedLateness())))
+        // Extract and sum username/score pairs from the event data.
+        .apply("ExtractUserScore", new ExtractAndSumScore("user"))
+        // Write the results to BigQuery.
+        .apply("WriteUserScoreSums",
+               new WriteScoresToBigQuery(options.getTableName(), "user", false, false));
+
+    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
+    // command line.
+    PipelineResult result = pipeline.run();
+    dataflowUtils.waitToFinish(result);
+  }
+}
diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/UserScore.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/UserScore.java
new file mode 100644
index 0000000000000..78e7cb8c63490
--- /dev/null
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/UserScore.java
@@ -0,0 +1,286 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.GcpOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SimpleFunction;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PDone;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+import org.apache.avro.reflect.Nullable;
+import org.joda.time.DateTimeZone;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.TimeZone;
+
+/**
+ * This class is the first in a series of four pipelines that tell a story in a 'gaming' domain.
+ * Concepts: batch processing; reading input from Google Cloud Storage and writing output to
+ * BigQuery; using standalone DoFns; use of the sum by key transform; examples of
+ * Java 8 lambda syntax.
+ *
+ * <p> In this gaming scenario, many users play, as members of different teams, over the course of a
+ * day, and their actions are logged for processing.  Some of the logged game events may be late-
+ * arriving, if users play on mobile devices and go transiently offline for a period.
+ *
+ * <p> This pipeline does batch processing of data collected from gaming events. It calculates the
+ * sum of scores per user, over an entire batch of gaming data (collected, say, for each day). The
+ * batch processing will not include any late data that arrives after the day's cutoff point.
+ *
+ * <p> To execute this pipeline using the Dataflow service and static example input data, specify
+ * the pipeline configuration like this:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ *   --dataset=YOUR-DATASET
+ * }
+ * </pre>
+ * where the BigQuery dataset you specify must already exist.
+ *
+ * <p> Optionally include the --input argument to specify a batch input file.
+ * See the --input default value for example batch data file, or use {@link injector.Injector} to
+ * generate your own batch data.
+  */
+public class UserScore {
+
+  /**
+   * Class to hold info about a game event.
+   */
+  @DefaultCoder(AvroCoder.class)
+  static class GameActionInfo {
+    @Nullable String user;
+    @Nullable String team;
+    @Nullable Integer score;
+    @Nullable Long timestamp;
+
+    public GameActionInfo() {}
+
+    public GameActionInfo(String user, String team, Integer score, Long timestamp) {
+      this.user = user;
+      this.team = team;
+      this.score = score;
+      this.timestamp = timestamp;
+    }
+
+    public String getUser() {
+      return this.user;
+    }
+    public String getTeam() {
+      return this.team;
+    }
+    public Integer getScore() {
+      return this.score;
+    }
+    public String getKey(String keyname) {
+      if (keyname.equals("team")) {
+        return this.team;
+      } else {  // return username as default
+        return this.user;
+      }
+    }
+    public Long getTimestamp() {
+      return this.timestamp;
+    }
+  }
+
+
+  /**
+   * Parses the raw game event info into GameActionInfo objects. Each event line has the following
+   * format: username,teamname,score,timestamp_in_ms,readable_time
+   * e.g.:
+   * user2_AsparagusPig,AsparagusPig,10,1445230923951,2015-11-02 09:09:28.224
+   * The human-readable time string is not used here.
+   */
+  static class ParseEventFn extends DoFn<String, GameActionInfo> {
+
+    // Log and count parse errors.
+    private static final Logger LOG = LoggerFactory.getLogger(ParseEventFn.class);
+    private final Aggregator<Long, Long> numParseErrors =
+        createAggregator("ParseErrors", new Sum.SumLongFn());
+
+    @Override
+    public void processElement(ProcessContext c) {
+      String[] components = c.element().split(",");
+      try {
+        String user = components[0].trim();
+        String team = components[1].trim();
+        Integer score = Integer.parseInt(components[2].trim());
+        Long timestamp = Long.parseLong(components[3].trim());
+        GameActionInfo gInfo = new GameActionInfo(user, team, score, timestamp);
+        c.output(gInfo);
+      } catch (ArrayIndexOutOfBoundsException | NumberFormatException e) {
+        numParseErrors.addValue(1L);
+        LOG.info("Parse error on " + c.element() + ", " + e.getMessage());
+      }
+    }
+  }
+
+  /**
+   * A transform to extract key/score information from GameActionInfo, and sum the scores. The
+   * constructor arg determines whether 'team' or 'user' info is extracted.
+   */
+  public static class ExtractAndSumScore
+      extends PTransform<PCollection<GameActionInfo>, PCollection<KV<String, Integer>>> {
+
+    private final String field;
+
+    ExtractAndSumScore(String field) {
+      this.field = field;
+    }
+
+    @Override
+    public PCollection<KV<String, Integer>> apply(
+        PCollection<GameActionInfo> gameInfo) {
+
+      return gameInfo
+        .apply(MapElements
+            .via((GameActionInfo gInfo) -> KV.of(gInfo.getKey(field), gInfo.getScore()))
+            .withOutputType(new TypeDescriptor<KV<String, Integer>>() {}))
+        .apply(Sum.<String>integersPerKey());
+    }
+  }
+
+
+  /**
+   * Format information for key/total_score pairs, and write that info to BigQuery.
+   */
+  public static class WriteToBigQuery
+      extends PTransform<PCollection<KV<String, Integer>>, PDone> {
+
+    private final String tableName;
+
+    public WriteToBigQuery(String tableName) {
+      this.tableName = tableName;
+    }
+
+    /** Convert each key/score pair into a BigQuery TableRow. */
+    private class BuildRowFn extends DoFn<KV<String, Integer>, TableRow> {
+
+      @Override
+      public void processElement(ProcessContext c) {
+
+        TableRow row = new TableRow()
+         .set("user", c.element().getKey())
+         .set("total_score", c.element().getValue().longValue());
+        c.output(row);
+      }
+    }
+
+    /** Build the output table schema. */
+    private TableSchema getSchema() {
+      List<TableFieldSchema> fields = new ArrayList<>();
+      fields.add(new TableFieldSchema().setName("user").setType("STRING"));
+      fields.add(new TableFieldSchema().setName("total_score").setType("INTEGER"));
+      return new TableSchema().setFields(fields);
+    }
+
+    @Override
+    public PDone apply(PCollection<KV<String, Integer>> teamAndScore) {
+      return teamAndScore
+        .apply(ParDo.named("ConvertToRow").of(new BuildRowFn()))
+        .apply(BigQueryIO.Write
+                  .to(getTable(teamAndScore.getPipeline(),
+                      tableName))
+                  .withSchema(getSchema())
+                  .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED));
+    }
+  }
+
+  /** Utility to construct an output table reference. */
+  static TableReference getTable(Pipeline pipeline, String tableName) {
+    PipelineOptions options = pipeline.getOptions();
+    TableReference table = new TableReference();
+    table.setDatasetId(options.as(Options.class).getDataset());
+    table.setProjectId(options.as(GcpOptions.class).getProject());
+    table.setTableId(tableName);
+    return table;
+  }
+
+  /**
+   * Options supported by {@link UserScore}.
+   */
+  static interface Options extends PipelineOptions {
+
+    @Description("Path to the data file(s) containing game data.")
+    // The default maps to two large Google Cloud Storage files (each ~12GB) holding two subsequent
+    // day's worth (roughly) of data.
+    @Default.String("gs://dataflow-samples/game/gaming_data*.csv")
+    String getInput();
+    void setInput(String value);
+
+    @Description("BigQuery Dataset to write tables to. Must already exist.")
+    @Validation.Required
+    String getDataset();
+    void setDataset(String value);
+
+    @Description("The BigQuery table name. Should not already exist.")
+    @Default.String("user_score")
+    String getTableName();
+    void setTableName(String value);
+  }
+
+
+
+  /**
+   * Run a batch pipeline.
+   */
+  public static void main(String[] args) throws Exception {
+    // Begin constructing a pipeline configured by commandline flags.
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    Pipeline pipeline = Pipeline.create(options);
+
+    // Read events from a text file and parse them.
+    pipeline.apply(TextIO.Read.from(options.getInput()))
+      .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))
+      // Extract and sum username/score pairs from the event data.
+      .apply("ExtractUserScore", new ExtractAndSumScore("user"))
+      .apply("WriteUserScoreSums", new WriteToBigQuery(options.getTableName()));
+
+    // Run the batch pipeline.
+    pipeline.run();
+  }
+
+}
diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java
new file mode 100644
index 0000000000000..12bd5b63dfe0d
--- /dev/null
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java
@@ -0,0 +1,419 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game.injector;
+
+import com.google.api.services.pubsub.Pubsub;
+import com.google.api.services.pubsub.model.PublishRequest;
+import com.google.api.services.pubsub.model.PubsubMessage;
+
+import com.google.common.collect.ImmutableMap;
+import org.joda.time.DateTimeZone;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+import java.io.BufferedOutputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.security.GeneralSecurityException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Random;
+import java.util.TimeZone;
+import java.util.UUID;
+
+
+/**
+ * This is a generator that simulates usage data from a mobile game, and either publishes the data
+ * to a pubsub topic or writes it to a file.
+ *
+ * <p> The general model used by the generator is the following. There is a set of teams with team
+ * members. Each member is scoring points for their team. After some period, a team will dissolve
+ * and a new one will be created in its place. There is also a set of 'Robots', or spammer users.
+ * They hop from team to team. The robots are set to have a higher 'click rate' (generate more
+ * events) than the regular team members.
+ *
+ * <p> Each generated line of data has the following form:
+ * username,teamname,score,timestamp_in_ms,readable_time
+ * e.g.:
+ * user2_AsparagusPig,AsparagusPig,10,1445230923951,2015-11-02 09:09:28.224
+ *
+ * <p> The Injector writes either to a PubSub topic, or a file. It will use the PubSub topic if
+ * specified. It takes the following arguments:
+ * {@code Injector project-name (topic-name|none) (filename|none)}.
+ *
+ * <p> To run the Injector in the mode where it publishes to PubSub, you will need to authenticate
+ * locally using project-based service account credentials to avoid running over PubSub
+ * quota.
+ * See https://developers.google.com/identity/protocols/application-default-credentials
+ * for more information on using service account credentials. Set the GOOGLE_APPLICATION_CREDENTIALS
+ * environment variable to point to your downloaded service account credentials before starting the
+ * program, e.g.:
+ * {@code export GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/credentials-key.json}.
+ * If you do not do this, then your injector will only run for a few minutes on your
+ * 'user account' credentials before you will start to see quota error messages like:
+ * "Request throttled due to user QPS limit being reached", and see this exception:
+ * ".com.google.api.client.googleapis.json.GoogleJsonResponseException: 429 Too Many Requests".
+ * Once you've set up your credentials, run the Injector like this":
+  * <pre>{@code
+ * Injector <project-name> <topic-name> none
+ * }
+ * </pre>
+ * The pubsub topic will be created if it does not exist.
+ *
+ * <p> To run the injector in write-to-file-mode, set the topic name to "none" and specify the
+ * filename:
+ * <pre>{@code
+ * Injector <project-name> none <filename>
+ * }
+ * </pre>
+ */
+class Injector {
+  private static Pubsub pubsub;
+  private static Random random = new Random();
+  private static String topic;
+  private static String project;
+  private static final String TIMESTAMP_ATTRIBUTE = "timestamp_ms";
+
+  // QPS ranges from 800 to 1000.
+  private static final int MIN_QPS = 800;
+  private static final int QPS_RANGE = 200;
+  // How long to sleep, in ms, between creation of the threads that make API requests to PubSub.
+  private static final int THREAD_SLEEP_MS = 500;
+
+  // Lists used to generate random team names.
+  private static final ArrayList<String> COLORS =
+      new ArrayList<String>(Arrays.asList(
+         "Magenta", "AliceBlue", "Almond", "Amaranth", "Amber",
+         "Amethyst", "AndroidGreen", "AntiqueBrass", "Fuchsia", "Ruby", "AppleGreen",
+         "Apricot", "Aqua", "ArmyGreen", "Asparagus", "Auburn", "Azure", "Banana",
+         "Beige", "Bisque", "BarnRed", "BattleshipGrey"));
+
+  private static final ArrayList<String> ANIMALS =
+      new ArrayList<String>(Arrays.asList(
+         "Echidna", "Koala", "Wombat", "Marmot", "Quokka", "Kangaroo", "Dingo", "Numbat", "Emu",
+         "Wallaby", "CaneToad", "Bilby", "Possum", "Cassowary", "Kookaburra", "Platypus",
+         "Bandicoot", "Cockatoo", "Antechinus"));
+
+  // The list of live teams.
+  private static ArrayList<TeamInfo> liveTeams = new ArrayList<TeamInfo>();
+
+  private static DateTimeFormatter fmt =
+    DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS")
+        .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")));
+
+
+  // The total number of robots in the system.
+  private static final int NUM_ROBOTS = 20;
+  // Determines the chance that a team will have a robot team member.
+  private static final int ROBOT_PROBABILITY = 3;
+  private static final int NUM_LIVE_TEAMS = 15;
+  private static final int BASE_MEMBERS_PER_TEAM = 5;
+  private static final int MEMBERS_PER_TEAM = 15;
+  private static final int MAX_SCORE = 20;
+  private static final int LATE_DATA_RATE = 5 * 60 * 2;       // Every 10 minutes
+  private static final int BASE_DELAY_IN_MILLIS = 5 * 60 * 1000;  // 5-10 minute delay
+  private static final int FUZZY_DELAY_IN_MILLIS = 5 * 60 * 1000;
+
+  // The minimum time a 'team' can live.
+  private static final int BASE_TEAM_EXPIRATION_TIME_IN_MINS = 20;
+  private static final int TEAM_EXPIRATION_TIME_IN_MINS = 20;
+
+
+  /**
+   * A class for holding team info: the name of the team, when it started,
+   * and the current team members. Teams may but need not include one robot team member.
+   */
+  private static class TeamInfo {
+    String teamName;
+    long startTimeInMillis;
+    int expirationPeriod;
+    // The team might but need not include 1 robot. Will be non-null if so.
+    String robot;
+    int numMembers;
+
+    private TeamInfo(String teamName, long startTimeInMillis, String robot) {
+      this.teamName = teamName;
+      this.startTimeInMillis = startTimeInMillis;
+      // How long until this team is dissolved.
+      this.expirationPeriod = random.nextInt(TEAM_EXPIRATION_TIME_IN_MINS) +
+        BASE_TEAM_EXPIRATION_TIME_IN_MINS;
+      this.robot = robot;
+      // Determine the number of team members.
+      numMembers = random.nextInt(MEMBERS_PER_TEAM) + BASE_MEMBERS_PER_TEAM;
+    }
+
+    String getTeamName() {
+      return teamName;
+    }
+    String getRobot() {
+      return robot;
+    }
+
+    long getStartTimeInMillis() {
+      return startTimeInMillis;
+    }
+    long getEndTimeInMillis() {
+      return startTimeInMillis + (expirationPeriod * 60 * 1000);
+    }
+    String getRandomUser() {
+      int userNum = random.nextInt(numMembers);
+      return "user" + userNum + "_" + teamName;
+    }
+
+    int numMembers() {
+      return numMembers;
+    }
+
+    @Override
+    public String toString() {
+      return "(" + teamName + ", num members: " + numMembers() + ", starting at: "
+        + startTimeInMillis + ", expires in: " + expirationPeriod + ", robot: " + robot + ")";
+    }
+  }
+
+  /** Utility to grab a random element from an array of Strings. */
+  private static String randomElement(ArrayList<String> list) {
+    int index = random.nextInt(list.size());
+    return list.get(index);
+  }
+
+  /**
+   * Get and return a random team. If the selected team is too old w.r.t its expiration, remove
+   * it, replacing it with a new team.
+   */
+  private static TeamInfo randomTeam(ArrayList<TeamInfo> list) {
+    int index = random.nextInt(list.size());
+    TeamInfo team = list.get(index);
+    // If the selected team is expired, remove it and return a new team.
+    long currTime = System.currentTimeMillis();
+    if ((team.getEndTimeInMillis() < currTime) || team.numMembers() == 0) {
+      System.out.println("\nteam " + team + " is too old; replacing.");
+      System.out.println("start time: " + team.getStartTimeInMillis() +
+        ", end time: " + team.getEndTimeInMillis() +
+        ", current time:" + currTime);
+      removeTeam(index);
+      // Add a new team in its stead.
+      return (addLiveTeam());
+    } else {
+      return team;
+    }
+  }
+
+  /**
+   * Create and add a team. Possibly add a robot to the team.
+   */
+  private static synchronized TeamInfo addLiveTeam() {
+    String teamName = randomElement(COLORS) + randomElement(ANIMALS);
+    String robot = null;
+    // Decide if we want to add a robot to the team.
+    if (random.nextInt(ROBOT_PROBABILITY) == 0) {
+      robot = "Robot-" + random.nextInt(NUM_ROBOTS);
+    }
+    long currTime = System.currentTimeMillis();
+    // Create the new team.
+    TeamInfo newTeam = new TeamInfo(teamName, System.currentTimeMillis(), robot);
+    liveTeams.add(newTeam);
+    System.out.println("[+" + newTeam + "]");
+    return newTeam;
+  }
+
+  /**
+   * Remove a specific team.
+   */
+  private static synchronized void removeTeam(int teamIndex) {
+    TeamInfo removedTeam = liveTeams.remove(teamIndex);
+    System.out.println("[-" + removedTeam + "]");
+  }
+
+  /** Generate a user gaming event. */
+  private static String generateEvent(Long currTime, int delayInMillis) {
+    TeamInfo team = randomTeam(liveTeams);
+    String teamName = team.getTeamName();
+    String user;
+    int PARSE_ERROR_RATE = 900000;
+
+    String robot = team.getRobot();
+    // If the team has an associated robot team member...
+    if (robot != null) {
+      // Then use that robot for the message with some probability.
+      // Set this probability to higher than that used to select any of the 'regular' team
+      // members, so that if there is a robot on the team, it has a higher click rate.
+      if (random.nextInt(team.numMembers() / 2) == 0) {
+        user = robot;
+      } else {
+        user = team.getRandomUser();
+      }
+    } else { // No robot.
+      user = team.getRandomUser();
+    }
+    String event = user + "," + teamName + "," + random.nextInt(MAX_SCORE);
+    // Randomly introduce occasional parse errors. You can see a custom counter tracking the number
+    // of such errors in the Dataflow Monitoring UI, as the example pipeline runs.
+    if (random.nextInt(PARSE_ERROR_RATE) == 0) {
+      System.out.println("Introducing a parse error.");
+      event = "THIS LINE REPRESENTS CORRUPT DATA AND WILL CAUSE A PARSE ERROR";
+    }
+    return addTimeInfoToEvent(event, currTime, delayInMillis);
+  }
+
+  /**
+   * Add time info to a generated gaming event.
+   */
+  private static String addTimeInfoToEvent(String message, Long currTime, int delayInMillis) {
+    String eventTimeString =
+        Long.toString((currTime - delayInMillis) / 1000 * 1000);
+    // Add a (redundant) 'human-readable' date string to make the data semantics more clear.
+    String dateString = fmt.print(currTime);
+    message = message + "," + eventTimeString + "," + dateString;
+    return message;
+  }
+
+  /**
+   * Publish 'numMessages' arbitrary events from live users with the provided delay, to a
+   * PubSub topic.
+   */
+  public static void publishData(int numMessages, int delayInMillis)
+      throws IOException {
+    List<PubsubMessage> pubsubMessages = new ArrayList<>();
+
+    for (int i = 0; i < Math.max(1, numMessages); i++) {
+      Long currTime = System.currentTimeMillis();
+      String message = generateEvent(currTime, delayInMillis);
+      PubsubMessage pubsubMessage = new PubsubMessage()
+              .encodeData(message.getBytes("UTF-8"));
+      pubsubMessage.setAttributes(
+          ImmutableMap.of(TIMESTAMP_ATTRIBUTE,
+              Long.toString((currTime - delayInMillis) / 1000 * 1000)));
+      if (delayInMillis != 0) {
+        System.out.println(pubsubMessage.getAttributes());
+        System.out.println("late data for: " + message);
+      }
+      pubsubMessages.add(pubsubMessage);
+    }
+
+    PublishRequest publishRequest = new PublishRequest();
+    publishRequest.setMessages(pubsubMessages);
+    pubsub.projects().topics().publish(topic, publishRequest).execute();
+  }
+
+  /**
+   * Publish generated events to a file.
+   */
+  public static void publishDataToFile(String fileName, int numMessages, int delayInMillis)
+      throws IOException {
+    List<PubsubMessage> pubsubMessages = new ArrayList<>();
+    PrintWriter out = new PrintWriter(new OutputStreamWriter(
+        new BufferedOutputStream(new FileOutputStream(fileName, true)), "UTF-8"));
+
+    try {
+      for (int i = 0; i < Math.max(1, numMessages); i++) {
+        Long currTime = System.currentTimeMillis();
+        String message = generateEvent(currTime, delayInMillis);
+        out.println(message);
+      }
+    } catch (Exception e) {
+      e.printStackTrace();
+    } finally {
+      if (out != null) {
+        out.flush();
+        out.close();
+      }
+    }
+  }
+
+
+  public static void main(String[] args)
+      throws GeneralSecurityException, IOException, InterruptedException {
+    if (args.length < 3) {
+      System.out.println("Usage: Injector project-name (topic-name|none) (filename|none)");
+      System.exit(1);
+    }
+    boolean writeToFile = false;
+    boolean writeToPubsub = true;
+    project = args[0];
+    String topicName = args[1];
+    String fileName = args[2];
+    // The Injector writes either to a PubSub topic, or a file. It will use the PubSub topic if
+    // specified; otherwise, it will try to write to a file.
+    if (topicName.equalsIgnoreCase("none")) {
+      writeToFile = true;
+      writeToPubsub = false;
+    }
+    if (writeToPubsub) {
+      // Create the PubSub client.
+      pubsub = InjectorUtils.getClient();
+      // Create the PubSub topic as necessary.
+      topic = InjectorUtils.getFullyQualifiedTopicName(project, topicName);
+      InjectorUtils.createTopic(pubsub, topic);
+      System.out.println("Injecting to topic: " + topic);
+    } else {
+      if (fileName.equalsIgnoreCase("none")) {
+        System.out.println("Filename not specified.");
+        System.exit(1);
+      }
+      System.out.println("Writing to file: " + fileName);
+    }
+    System.out.println("Starting Injector");
+
+    // Start off with some random live teams.
+    while (liveTeams.size() < NUM_LIVE_TEAMS) {
+      addLiveTeam();
+    }
+
+    // Publish messages at a rate determined by the QPS and Thread sleep settings.
+    for (int i = 0; true; i++) {
+      if (Thread.activeCount() > 10) {
+        System.err.println("I'm falling behind!");
+      }
+
+      // Decide if this should be a batch of late data.
+      final int numMessages;
+      final int delayInMillis;
+      if (i % LATE_DATA_RATE == 0) {
+        // Insert delayed data for one user (one message only)
+        delayInMillis = BASE_DELAY_IN_MILLIS + random.nextInt(FUZZY_DELAY_IN_MILLIS);
+        numMessages = 1;
+        System.out.println("DELAY(" + delayInMillis + ", " + numMessages + ")");
+      } else {
+        System.out.print(".");
+        delayInMillis = 0;
+        numMessages = MIN_QPS + random.nextInt(QPS_RANGE);
+      }
+
+      if (writeToFile) { // Won't use threading for the file write.
+        publishDataToFile(fileName, numMessages, delayInMillis);
+      } else { // Write to PubSub.
+        // Start a thread to inject some data.
+        new Thread(){
+          public void run() {
+            try {
+              publishData(numMessages, delayInMillis);
+            } catch (IOException e) {
+              System.err.println(e);
+            }
+          }
+        }.start();
+      }
+
+      // Wait before creating another injector thread.
+      Thread.sleep(THREAD_SLEEP_MS);
+    }
+  }
+}
diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/InjectorUtils.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/InjectorUtils.java
new file mode 100644
index 0000000000000..06c38646274f2
--- /dev/null
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/InjectorUtils.java
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game.injector;
+
+
+import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
+import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.api.client.googleapis.util.Utils;
+import com.google.api.client.http.HttpRequestInitializer;
+import com.google.api.client.http.HttpStatusCodes;
+import com.google.api.client.http.HttpTransport;
+import com.google.api.client.json.JsonFactory;
+import com.google.api.services.pubsub.Pubsub;
+import com.google.api.services.pubsub.PubsubScopes;
+import com.google.api.services.pubsub.model.Topic;
+
+import com.google.common.base.Preconditions;
+
+import java.io.IOException;
+
+class InjectorUtils {
+
+  private static final String APP_NAME = "injector";
+
+  /**
+   * Builds a new Pubsub client and returns it.
+   */
+  public static Pubsub getClient(final HttpTransport httpTransport,
+                                 final JsonFactory jsonFactory)
+           throws IOException {
+      Preconditions.checkNotNull(httpTransport);
+      Preconditions.checkNotNull(jsonFactory);
+      GoogleCredential credential =
+          GoogleCredential.getApplicationDefault(httpTransport, jsonFactory);
+      if (credential.createScopedRequired()) {
+          credential = credential.createScoped(PubsubScopes.all());
+      }
+      if (credential.getServiceAccountId() == null) {
+        System.out.println("\n***Warning! You are not using service account credentials to "
+          + "authenticate.\nYou need to use service account credentials for this example,"
+          + "\nsince user-level credentials do not have enough pubsub quota,\nand so you will run "
+          + "out of PubSub quota very quickly.\nSee "
+          + "https://developers.google.com/identity/protocols/application-default-credentials.");
+        System.exit(1);
+      }
+      HttpRequestInitializer initializer =
+          new RetryHttpInitializerWrapper(credential);
+      return new Pubsub.Builder(httpTransport, jsonFactory, initializer)
+              .setApplicationName(APP_NAME)
+              .build();
+  }
+
+  /**
+   * Builds a new Pubsub client with default HttpTransport and
+   * JsonFactory and returns it.
+   */
+  public static Pubsub getClient() throws IOException {
+      return getClient(Utils.getDefaultTransport(),
+                       Utils.getDefaultJsonFactory());
+  }
+
+
+  /**
+   * Returns the fully qualified topic name for Pub/Sub.
+   */
+  public static String getFullyQualifiedTopicName(
+          final String project, final String topic) {
+      return String.format("projects/%s/topics/%s", project, topic);
+  }
+
+  /**
+   * Create a topic if it doesn't exist.
+   */
+  public static void createTopic(Pubsub client, String fullTopicName)
+      throws IOException {
+    try {
+        client.projects().topics().get(fullTopicName).execute();
+    } catch (GoogleJsonResponseException e) {
+      if (e.getStatusCode() == HttpStatusCodes.STATUS_CODE_NOT_FOUND) {
+        Topic topic = client.projects().topics()
+                .create(fullTopicName, new Topic())
+                .execute();
+        System.out.printf("Topic %s was created.\n", topic.getName());
+      }
+    }
+  }
+}
diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/RetryHttpInitializerWrapper.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/RetryHttpInitializerWrapper.java
new file mode 100644
index 0000000000000..eeeabcef8bebe
--- /dev/null
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/RetryHttpInitializerWrapper.java
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game.injector;
+
+import com.google.api.client.auth.oauth2.Credential;
+import com.google.api.client.http.HttpBackOffIOExceptionHandler;
+import com.google.api.client.http.HttpBackOffUnsuccessfulResponseHandler;
+import com.google.api.client.http.HttpRequest;
+import com.google.api.client.http.HttpRequestInitializer;
+import com.google.api.client.http.HttpResponse;
+import com.google.api.client.http.HttpUnsuccessfulResponseHandler;
+import com.google.api.client.util.ExponentialBackOff;
+import com.google.api.client.util.Sleeper;
+import com.google.common.base.Preconditions;
+
+import java.io.IOException;
+import java.util.logging.Logger;
+
+/**
+ * RetryHttpInitializerWrapper will automatically retry upon RPC
+ * failures, preserving the auto-refresh behavior of the Google
+ * Credentials.
+ */
+public class RetryHttpInitializerWrapper implements HttpRequestInitializer {
+
+    /**
+     * A private logger.
+     */
+    private static final Logger LOG =
+            Logger.getLogger(RetryHttpInitializerWrapper.class.getName());
+
+    /**
+     * One minutes in miliseconds.
+     */
+    private static final int ONEMINITUES = 60000;
+
+    /**
+     * Intercepts the request for filling in the "Authorization"
+     * header field, as well as recovering from certain unsuccessful
+     * error codes wherein the Credential must refresh its token for a
+     * retry.
+     */
+    private final Credential wrappedCredential;
+
+    /**
+     * A sleeper; you can replace it with a mock in your test.
+     */
+    private final Sleeper sleeper;
+
+    /**
+     * A constructor.
+     *
+     * @param wrappedCredential Credential which will be wrapped and
+     * used for providing auth header.
+     */
+    public RetryHttpInitializerWrapper(final Credential wrappedCredential) {
+        this(wrappedCredential, Sleeper.DEFAULT);
+    }
+
+    /**
+     * A protected constructor only for testing.
+     *
+     * @param wrappedCredential Credential which will be wrapped and
+     * used for providing auth header.
+     * @param sleeper Sleeper for easy testing.
+     */
+    RetryHttpInitializerWrapper(
+            final Credential wrappedCredential, final Sleeper sleeper) {
+        this.wrappedCredential = Preconditions.checkNotNull(wrappedCredential);
+        this.sleeper = sleeper;
+    }
+
+    /**
+     * Initializes the given request.
+     */
+    @Override
+    public final void initialize(final HttpRequest request) {
+        request.setReadTimeout(2 * ONEMINITUES); // 2 minutes read timeout
+        final HttpUnsuccessfulResponseHandler backoffHandler =
+                new HttpBackOffUnsuccessfulResponseHandler(
+                        new ExponentialBackOff())
+                        .setSleeper(sleeper);
+        request.setInterceptor(wrappedCredential);
+        request.setUnsuccessfulResponseHandler(
+                new HttpUnsuccessfulResponseHandler() {
+                    @Override
+                    public boolean handleResponse(
+                            final HttpRequest request,
+                            final HttpResponse response,
+                            final boolean supportsRetry) throws IOException {
+                        if (wrappedCredential.handleResponse(
+                                request, response, supportsRetry)) {
+                            // If credential decides it can handle it,
+                            // the return code or message indicated
+                            // something specific to authentication,
+                            // and no backoff is desired.
+                            return true;
+                        } else if (backoffHandler.handleResponse(
+                                request, response, supportsRetry)) {
+                            // Otherwise, we defer to the judgement of
+                            // our internal backoff handler.
+                            LOG.info("Retrying "
+                                    + request.getUrl().toString());
+                            return true;
+                        } else {
+                            return false;
+                        }
+                    }
+                });
+        request.setIOExceptionHandler(
+                new HttpBackOffIOExceptionHandler(new ExponentialBackOff())
+                        .setSleeper(sleeper));
+    }
+}
+
diff --git a/examples/src/test/java8/com/google/cloud/dataflow/examples/complete/game/GameStatsTest.java b/examples/src/test/java8/com/google/cloud/dataflow/examples/complete/game/GameStatsTest.java
new file mode 100644
index 0000000000000..4795de2fc32d9
--- /dev/null
+++ b/examples/src/test/java8/com/google/cloud/dataflow/examples/complete/game/GameStatsTest.java
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.examples.complete.game.GameStats.CalculateSpammyUsers;
+import com.google.cloud.dataflow.examples.complete.game.UserScore.GameActionInfo;
+import com.google.cloud.dataflow.examples.complete.game.UserScore.ParseEventFn;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+import com.google.cloud.dataflow.sdk.transforms.Filter;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.WithTimestamps;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+import org.hamcrest.CoreMatchers;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests of GameStats.
+ * Because the pipeline was designed for easy readability and explanations, it lacks good
+ * modularity for testing. See our testing documentation for better ideas:
+ * https://cloud.google.com/dataflow/pipelines/testing-your-pipeline.
+ */
+@RunWith(JUnit4.class)
+public class GameStatsTest implements Serializable {
+
+  // User scores
+  static final KV<String, Integer>[] USER_SCORES_ARRAY = new KV[] {
+    KV.of("Robot-2", 66), KV.of("Robot-1", 116), KV.of("user7_AndroidGreenKookaburra", 23),
+    KV.of("user7_AndroidGreenKookaburra", 1),
+    KV.of("user19_BisqueBilby", 14), KV.of("user13_ApricotQuokka", 15),
+    KV.of("user18_BananaEmu", 25), KV.of("user6_AmberEchidna", 8),
+    KV.of("user2_AmberQuokka", 6), KV.of("user0_MagentaKangaroo", 4),
+    KV.of("user0_MagentaKangaroo", 3), KV.of("user2_AmberCockatoo", 13),
+    KV.of("user7_AlmondWallaby", 15), KV.of("user6_AmberNumbat", 11),
+    KV.of("user6_AmberQuokka", 4)
+  };
+
+  static final List<KV<String, Integer>> USER_SCORES = Arrays.asList(USER_SCORES_ARRAY);
+
+  // The expected list of 'spammers'.
+  static final KV[] SPAMMERS = new KV[] {
+      KV.of("Robot-2", 66), KV.of("Robot-1", 116)
+    };
+
+
+  /** Test the calculation of 'spammy users'. */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testCalculateSpammyUsers() throws Exception {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> input = p.apply(Create.of(USER_SCORES));
+    PCollection<KV<String, Integer>> output = input.apply(new CalculateSpammyUsers());
+
+    // Check the set of spammers.
+    DataflowAssert.that(output).containsInAnyOrder(SPAMMERS);
+
+    p.run();
+  }
+
+}
diff --git a/examples/src/test/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScoreTest.java b/examples/src/test/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScoreTest.java
new file mode 100644
index 0000000000000..fe163037f63c5
--- /dev/null
+++ b/examples/src/test/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScoreTest.java
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.examples.complete.game.UserScore.ExtractAndSumScore;
+import com.google.cloud.dataflow.examples.complete.game.UserScore.GameActionInfo;
+import com.google.cloud.dataflow.examples.complete.game.UserScore.ParseEventFn;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+import com.google.cloud.dataflow.sdk.transforms.Filter;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.WithTimestamps;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+import org.hamcrest.CoreMatchers;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests of HourlyTeamScore.
+ * Because the pipeline was designed for easy readability and explanations, it lacks good
+ * modularity for testing. See our testing documentation for better ideas:
+ * https://cloud.google.com/dataflow/pipelines/testing-your-pipeline.
+ */
+@RunWith(JUnit4.class)
+public class HourlyTeamScoreTest implements Serializable {
+
+  static final String[] GAME_EVENTS_ARRAY = new String[] {
+    "user0_MagentaKangaroo,MagentaKangaroo,3,1447955630000,2015-11-19 09:53:53.444",
+    "user13_ApricotQuokka,ApricotQuokka,15,1447955630000,2015-11-19 09:53:53.444",
+    "user6_AmberNumbat,AmberNumbat,11,1447955630000,2015-11-19 09:53:53.444",
+    "user7_AlmondWallaby,AlmondWallaby,15,1447955630000,2015-11-19 09:53:53.444",
+    "user7_AndroidGreenKookaburra,AndroidGreenKookaburra,12,1447955630000,2015-11-19 09:53:53.444",
+    "user7_AndroidGreenKookaburra,AndroidGreenKookaburra,11,1447955630000,2015-11-19 09:53:53.444",
+    "user19_BisqueBilby,BisqueBilby,6,1447955630000,2015-11-19 09:53:53.444",
+    "user19_BisqueBilby,BisqueBilby,8,1447955630000,2015-11-19 09:53:53.444",
+    // time gap...
+    "user0_AndroidGreenEchidna,AndroidGreenEchidna,0,1447965690000,2015-11-19 12:41:31.053",
+    "user0_MagentaKangaroo,MagentaKangaroo,4,1447965690000,2015-11-19 12:41:31.053",
+    "user2_AmberCockatoo,AmberCockatoo,13,1447965690000,2015-11-19 12:41:31.053",
+    "user18_BananaEmu,BananaEmu,7,1447965690000,2015-11-19 12:41:31.053",
+    "user3_BananaEmu,BananaEmu,17,1447965690000,2015-11-19 12:41:31.053",
+    "user18_BananaEmu,BananaEmu,1,1447965690000,2015-11-19 12:41:31.053",
+    "user18_ApricotCaneToad,ApricotCaneToad,14,1447965690000,2015-11-19 12:41:31.053"
+  };
+
+
+  static final List<String> GAME_EVENTS = Arrays.asList(GAME_EVENTS_ARRAY);
+
+
+  // Used to check the filtering.
+  static final KV[] FILTERED_EVENTS = new KV[] {
+      KV.of("user0_AndroidGreenEchidna", 0), KV.of("user0_MagentaKangaroo", 4),
+      KV.of("user2_AmberCockatoo", 13),
+      KV.of("user18_BananaEmu", 7), KV.of("user3_BananaEmu", 17),
+      KV.of("user18_BananaEmu", 1), KV.of("user18_ApricotCaneToad", 14)
+    };
+
+
+  /** Test the filtering. */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testUserScoresFilter() throws Exception {
+    Pipeline p = TestPipeline.create();
+
+    final Instant startMinTimestamp = new Instant(1447965680000L);
+
+    PCollection<String> input = p.apply(Create.of(GAME_EVENTS).withCoder(StringUtf8Coder.of()));
+
+    PCollection<KV<String, Integer>> output = input
+      .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))
+
+      .apply("FilterStartTime", Filter.byPredicate(
+          (GameActionInfo gInfo)
+              -> gInfo.getTimestamp() > startMinTimestamp.getMillis()))
+      // run a map to access the fields in the result.
+      .apply(MapElements
+          .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))
+          .withOutputType(new TypeDescriptor<KV<String, Integer>>() {}));
+
+      DataflowAssert.that(output).containsInAnyOrder(FILTERED_EVENTS);
+
+    p.run();
+  }
+
+}
diff --git a/examples/src/test/java8/com/google/cloud/dataflow/examples/complete/game/UserScoreTest.java b/examples/src/test/java8/com/google/cloud/dataflow/examples/complete/game/UserScoreTest.java
new file mode 100644
index 0000000000000..69601be1bb4d0
--- /dev/null
+++ b/examples/src/test/java8/com/google/cloud/dataflow/examples/complete/game/UserScoreTest.java
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game;
+
+import com.google.cloud.dataflow.examples.complete.game.UserScore.ExtractAndSumScore;
+import com.google.cloud.dataflow.examples.complete.game.UserScore.GameActionInfo;
+import com.google.cloud.dataflow.examples.complete.game.UserScore.ParseEventFn;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests of UserScore.
+ */
+@RunWith(JUnit4.class)
+public class UserScoreTest implements Serializable {
+
+  static final String[] GAME_EVENTS_ARRAY = new String[] {
+    "user0_MagentaKangaroo,MagentaKangaroo,3,1447955630000,2015-11-19 09:53:53.444",
+    "user13_ApricotQuokka,ApricotQuokka,15,1447955630000,2015-11-19 09:53:53.444",
+    "user6_AmberNumbat,AmberNumbat,11,1447955630000,2015-11-19 09:53:53.444",
+    "user7_AlmondWallaby,AlmondWallaby,15,1447955630000,2015-11-19 09:53:53.444",
+    "user7_AndroidGreenKookaburra,AndroidGreenKookaburra,12,1447955630000,2015-11-19 09:53:53.444",
+    "user6_AliceBlueDingo,AliceBlueDingo,4,xxxxxxx,2015-11-19 09:53:53.444",
+    "user7_AndroidGreenKookaburra,AndroidGreenKookaburra,11,1447955630000,2015-11-19 09:53:53.444",
+    "THIS IS A PARSE ERROR,2015-11-19 09:53:53.444",
+    "user19_BisqueBilby,BisqueBilby,6,1447955630000,2015-11-19 09:53:53.444",
+    "user19_BisqueBilby,BisqueBilby,8,1447955630000,2015-11-19 09:53:53.444"
+  };
+
+    static final String[] GAME_EVENTS_ARRAY2 = new String[] {
+    "user6_AliceBlueDingo,AliceBlueDingo,4,xxxxxxx,2015-11-19 09:53:53.444",
+    "THIS IS A PARSE ERROR,2015-11-19 09:53:53.444",
+    "user13_BisqueBilby,BisqueBilby,xxx,1447955630000,2015-11-19 09:53:53.444"
+  };
+
+  static final List<String> GAME_EVENTS = Arrays.asList(GAME_EVENTS_ARRAY);
+  static final List<String> GAME_EVENTS2 = Arrays.asList(GAME_EVENTS_ARRAY2);
+
+  static final KV[] USER_SUMS = new KV[] {
+      KV.of("user0_MagentaKangaroo", 3), KV.of("user13_ApricotQuokka", 15),
+      KV.of("user6_AmberNumbat", 11), KV.of("user7_AlmondWallaby", 15),
+      KV.of("user7_AndroidGreenKookaburra", 23),
+      KV.of("user19_BisqueBilby", 14) };
+
+  static final KV[] TEAM_SUMS = new KV[] {
+      KV.of("MagentaKangaroo", 3), KV.of("ApricotQuokka", 15),
+      KV.of("AmberNumbat", 11), KV.of("AlmondWallaby", 15),
+      KV.of("AndroidGreenKookaburra", 23),
+      KV.of("BisqueBilby", 14) };
+
+  /** Test the ParseEventFn DoFn. */
+  @Test
+  public void testParseEventFn() {
+    DoFnTester<String, GameActionInfo> parseEventFn =
+        DoFnTester.of(new ParseEventFn());
+
+    List<GameActionInfo> results = parseEventFn.processBatch(GAME_EVENTS_ARRAY);
+    Assert.assertEquals(results.size(), 8);
+    Assert.assertEquals(results.get(0).getUser(), "user0_MagentaKangaroo");
+    Assert.assertEquals(results.get(0).getTeam(), "MagentaKangaroo");
+    Assert.assertEquals(results.get(0).getScore(), new Integer(3));
+  }
+
+  /** Tests ExtractAndSumScore("user"). */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testUserScoreSums() throws Exception {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of(GAME_EVENTS).withCoder(StringUtf8Coder.of()));
+
+    PCollection<KV<String, Integer>> output = input
+      .apply(ParDo.of(new ParseEventFn()))
+      // Extract and sum username/score pairs from the event data.
+      .apply("ExtractUserScore", new ExtractAndSumScore("user"));
+
+    // Check the user score sums.
+    DataflowAssert.that(output).containsInAnyOrder(USER_SUMS);
+
+    p.run();
+  }
+
+  /** Tests ExtractAndSumScore("team"). */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testTeamScoreSums() throws Exception {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of(GAME_EVENTS).withCoder(StringUtf8Coder.of()));
+
+    PCollection<KV<String, Integer>> output = input
+      .apply(ParDo.of(new ParseEventFn()))
+      // Extract and sum teamname/score pairs from the event data.
+      .apply("ExtractTeamScore", new ExtractAndSumScore("team"));
+
+    // Check the team score sums.
+    DataflowAssert.that(output).containsInAnyOrder(TEAM_SUMS);
+
+    p.run();
+  }
+
+  /** Test that bad input data is dropped appropriately. */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testUserScoresBadInput() throws Exception {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of(GAME_EVENTS2).withCoder(StringUtf8Coder.of()));
+
+    PCollection<KV<String, Integer>> extract = input
+      .apply(ParDo.of(new ParseEventFn()))
+      .apply(
+          MapElements.via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))
+          .withOutputType(new TypeDescriptor<KV<String, Integer>>() {}));
+
+    DataflowAssert.that(extract).empty();
+
+    p.run();
+  }
+}

From 3c90d44dbaea86f93a51b010b08b249195b277f4 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Sun, 13 Dec 2015 23:33:48 -0800
Subject: [PATCH 1227/1541] Uses the new progress/split request classes

ApproximateProgress has been split into:
* ApproximateReportedProgress for progress reporting;
* ApproximateSplitRequest for dynamic split requests.
----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110127317
---
 .../sdk/runners/dataflow/CustomSources.java   | 15 ++---
 .../sdk/runners/worker/AvroReader.java        | 13 ++--
 .../sdk/runners/worker/ConcatReader.java      |  9 +--
 .../worker/DataflowWorkProgressUpdater.java   |  4 +-
 .../sdk/runners/worker/DataflowWorker.java    |  2 +-
 .../runners/worker/GroupingShuffleReader.java |  9 +--
 .../sdk/runners/worker/InMemoryReader.java    |  8 +--
 .../worker/SourceTranslationUtils.java        | 28 ++++-----
 .../sdk/runners/worker/TextReader.java        | 21 ++++---
 .../runners/dataflow/CustomSourcesTest.java   |  8 ++-
 .../sdk/runners/worker/AvroReaderTest.java    |  6 +-
 .../sdk/runners/worker/ConcatReaderTest.java  |  5 +-
 .../DataflowWorkProgressUpdaterTest.java      | 28 +++++----
 .../worker/GroupingShuffleReaderTest.java     | 12 ++--
 .../runners/worker/InMemoryReaderTest.java    |  5 +-
 .../sdk/runners/worker/ReaderTestUtils.java   | 61 ++++++++++++++-----
 .../sdk/runners/worker/TextReaderTest.java    | 12 ++--
 .../common/worker/MapTaskExecutorTest.java    | 10 +--
 .../util/common/worker/ReadOperationTest.java | 25 +++++---
 19 files changed, 163 insertions(+), 118 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
index 3e14ca615544a..668c33dcd5498 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
@@ -28,7 +28,8 @@
 
 import com.google.api.client.util.BackOff;
 import com.google.api.client.util.Base64;
-import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.ApproximateReportedProgress;
+import com.google.api.services.dataflow.model.ApproximateSplitRequest;
 import com.google.api.services.dataflow.model.DerivedSource;
 import com.google.api.services.dataflow.model.DynamicSourceSplit;
 import com.google.api.services.dataflow.model.SourceMetadata;
@@ -531,10 +532,10 @@ public void close() throws IOException {
     @Override
     public Reader.Progress getProgress() {
       if (reader instanceof BoundedSource.BoundedReader) {
-        ApproximateProgress progress = new ApproximateProgress();
+        ApproximateReportedProgress progress = new ApproximateReportedProgress();
         Double fractionConsumed = reader.getFractionConsumed();
         if (fractionConsumed != null) {
-          progress.setPercentComplete(fractionConsumed.floatValue());
+          progress.setFractionConsumed(fractionConsumed);
         }
         return SourceTranslationUtils.cloudProgressToReaderProgress(progress);
       } else {
@@ -545,15 +546,15 @@ public Reader.Progress getProgress() {
 
     @Override
     public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest request) {
-      ApproximateProgress stopPosition =
-          SourceTranslationUtils.splitRequestToApproximateProgress(request);
-      Float fractionConsumed = stopPosition.getPercentComplete();
+      ApproximateSplitRequest stopPosition =
+          SourceTranslationUtils.splitRequestToApproximateSplitRequest(request);
+      Double fractionConsumed = stopPosition.getFractionConsumed();
       if (fractionConsumed == null) {
         // Only truncating at a fraction is currently supported.
         return null;
       }
       BoundedSource<T> original = reader.getCurrentSource();
-      BoundedSource<T> residual = reader.splitAtFraction(fractionConsumed.doubleValue());
+      BoundedSource<T> residual = reader.splitAtFraction(fractionConsumed);
       if (residual == null) {
         return null;
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
index ebad4ab948301..f957b16f6043e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
@@ -18,7 +18,8 @@
 
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
 
-import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.ApproximateReportedProgress;
+import com.google.api.services.dataflow.model.ApproximateSplitRequest;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.io.AvroSource;
 import com.google.cloud.dataflow.sdk.io.BoundedSource;
@@ -135,8 +136,8 @@ public Progress getProgress() {
       if (readerProgress == null) {
         return null;
       }
-      ApproximateProgress progress = new ApproximateProgress();
-      progress.setPercentComplete(readerProgress.floatValue());
+      ApproximateReportedProgress progress = new ApproximateReportedProgress();
+      progress.setFractionConsumed(readerProgress);
       return cloudProgressToReaderProgress(progress);
     }
 
@@ -147,9 +148,9 @@ public void close() throws IOException {
 
     @Override
     public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
-      ApproximateProgress splitProgress =
-          SourceTranslationUtils.splitRequestToApproximateProgress(splitRequest);
-      double splitAtFraction = splitProgress.getPercentComplete();
+      ApproximateSplitRequest splitProgress =
+          SourceTranslationUtils.splitRequestToApproximateSplitRequest(splitRequest);
+      double splitAtFraction = splitProgress.getFractionConsumed();
       LOG.info("Received request for dynamic split at {}", splitAtFraction);
       OffsetBasedSource<T> residual = reader.splitAtFraction(splitAtFraction);
       if (residual == null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
index a1dd49f078643..2e3bf6cf9a300 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
@@ -18,10 +18,11 @@
 
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateSplitRequest;
 import static com.google.common.base.Preconditions.checkNotNull;
 
-import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.ApproximateReportedProgress;
+import com.google.api.services.dataflow.model.ApproximateSplitRequest;
 import com.google.api.services.dataflow.model.ConcatPosition;
 import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.io.range.OffsetRangeTracker;
@@ -200,7 +201,7 @@ public Progress getProgress() {
         concatPosition.setPosition(positionOfCurrentIterator);
       }
 
-      ApproximateProgress progress = new ApproximateProgress();
+      ApproximateReportedProgress progress = new ApproximateReportedProgress();
       com.google.api.services.dataflow.model.Position currentPosition =
           new com.google.api.services.dataflow.model.Position();
       currentPosition.setConcatPosition(concatPosition);
@@ -213,7 +214,7 @@ public Progress getProgress() {
     public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
       checkNotNull(splitRequest);
 
-      ApproximateProgress splitProgress = splitRequestToApproximateProgress(splitRequest);
+      ApproximateSplitRequest splitProgress = splitRequestToApproximateSplitRequest(splitRequest);
       com.google.api.services.dataflow.model.Position cloudPosition = splitProgress.getPosition();
       if (cloudPosition == null) {
         LOG.warn("Concat only supports split at a Position. Requested: {}", splitRequest);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
index f9b62c3e4a282..c1e99bd671b46 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
@@ -23,7 +23,7 @@
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudTime;
 import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudDuration;
 
-import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.ApproximateSplitRequest;
 import com.google.api.services.dataflow.model.WorkItem;
 import com.google.api.services.dataflow.model.WorkItemServiceState;
 import com.google.api.services.dataflow.model.WorkItemStatus;
@@ -99,7 +99,7 @@ protected void reportProgressHelper() throws Exception {
           fromCloudDuration(result.getReportStatusInterval()).getMillis(),
           leaseRemainingTime(getLeaseExpirationTimestamp(result)));
 
-      ApproximateProgress suggestedStopPoint = result.getSuggestedStopPoint();
+      ApproximateSplitRequest suggestedStopPoint = result.getSplitRequest();
       if (suggestedStopPoint != null) {
         LOG.info("Proposing dynamic split of work unit {} at {}", workString(), suggestedStopPoint);
         dynamicSplitResultToReport = worker.requestDynamicSplit(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 6c24cdb394bf7..e3536a92a5273 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -369,7 +369,7 @@ static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
     }
 
     if (progress != null) {
-      status.setProgress(readerProgressToCloudProgress(progress));
+      status.setReportedProgress(readerProgressToCloudProgress(progress));
     }
     if (dynamicSplitResult instanceof Reader.DynamicSplitResultWithPosition) {
       Reader.DynamicSplitResultWithPosition asPosition =
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index f20b43ca9824c..ff856fe9324a9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -19,10 +19,11 @@
 import static com.google.api.client.util.Preconditions.checkNotNull;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateSplitRequest;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
 
-import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.ApproximateReportedProgress;
+import com.google.api.services.dataflow.model.ApproximateSplitRequest;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
@@ -274,7 +275,7 @@ public double getRemainingParallelism() {
     public Progress getProgress() {
       com.google.api.services.dataflow.model.Position position =
           new com.google.api.services.dataflow.model.Position();
-      ApproximateProgress progress = new ApproximateProgress();
+      ApproximateReportedProgress progress = new ApproximateReportedProgress();
       ByteArrayShufflePosition groupStart = rangeTracker.getLastGroupStart();
       if (groupStart != null) {
         position.setShufflePosition(groupStart.encodeBase64());
@@ -291,7 +292,7 @@ public Progress getProgress() {
     @Override
     public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
       checkNotNull(splitRequest);
-      ApproximateProgress splitProgress = splitRequestToApproximateProgress(
+      ApproximateSplitRequest splitProgress = splitRequestToApproximateSplitRequest(
           splitRequest);
       com.google.api.services.dataflow.model.Position splitPosition = splitProgress.getPosition();
       if (splitPosition == null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
index d6ca259e6279f..b9a60c16ee498 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
@@ -19,10 +19,10 @@
 import static com.google.api.client.util.Preconditions.checkNotNull;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateSplitRequest;
 import static java.lang.Math.min;
 
-import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.ApproximateReportedProgress;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.io.range.OffsetRangeTracker;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
@@ -122,7 +122,7 @@ public Progress getProgress() {
           new com.google.api.services.dataflow.model.Position();
       currentPosition.setRecordIndex((long) nextIndex);
 
-      ApproximateProgress progress = new ApproximateProgress();
+      ApproximateReportedProgress progress = new ApproximateReportedProgress();
       progress.setPosition(currentPosition);
 
       return cloudProgressToReaderProgress(progress);
@@ -138,7 +138,7 @@ public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest)
       checkNotNull(splitRequest);
 
       com.google.api.services.dataflow.model.Position splitPosition =
-          splitRequestToApproximateProgress(splitRequest).getPosition();
+          splitRequestToApproximateSplitRequest(splitRequest).getPosition();
       if (splitPosition == null) {
         LOG.warn("InMemoryReader only supports split at a Position. Requested: {}",
             splitRequest);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
index 8506dad152ad7..7b9fff9d2f24f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
@@ -21,7 +21,8 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
 import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
 
-import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.ApproximateReportedProgress;
+import com.google.api.services.dataflow.model.ApproximateSplitRequest;
 import com.google.api.services.dataflow.model.Position;
 import com.google.api.services.dataflow.model.Source;
 import com.google.api.services.dataflow.model.SourceMetadata;
@@ -39,7 +40,7 @@
  */
 public class SourceTranslationUtils {
   public static Reader.Progress cloudProgressToReaderProgress(
-      @Nullable ApproximateProgress cloudProgress) {
+      @Nullable ApproximateReportedProgress cloudProgress) {
     return cloudProgress == null ? null : new DataflowReaderProgress(cloudProgress);
   }
 
@@ -47,7 +48,7 @@ public static Reader.Position cloudPositionToReaderPosition(@Nullable Position c
     return cloudPosition == null ? null : new DataflowReaderPosition(cloudPosition);
   }
 
-  public static ApproximateProgress readerProgressToCloudProgress(
+  public static ApproximateReportedProgress readerProgressToCloudProgress(
       @Nullable Reader.Progress readerProgress) {
     return readerProgress == null ? null : ((DataflowReaderProgress) readerProgress).cloudProgress;
   }
@@ -56,22 +57,21 @@ public static Position toCloudPosition(@Nullable Reader.Position readerPosition)
     return readerPosition == null ? null : ((DataflowReaderPosition) readerPosition).cloudPosition;
   }
 
-  public static ApproximateProgress splitRequestToApproximateProgress(
+  public static ApproximateSplitRequest splitRequestToApproximateSplitRequest(
       @Nullable Reader.DynamicSplitRequest splitRequest) {
     return (splitRequest == null)
-        ? null : ((DataflowDynamicSplitRequest) splitRequest).approximateProgress;
+        ? null : ((DataflowDynamicSplitRequest) splitRequest).splitRequest;
   }
 
   public static Reader.DynamicSplitRequest toDynamicSplitRequest(
-      @Nullable ApproximateProgress approximateProgress) {
-    return (approximateProgress == null)
-        ? null : new DataflowDynamicSplitRequest(approximateProgress);
+      @Nullable ApproximateSplitRequest splitRequest) {
+    return (splitRequest == null) ? null : new DataflowDynamicSplitRequest(splitRequest);
   }
 
   static class DataflowReaderProgress implements Reader.Progress {
-    public final ApproximateProgress cloudProgress;
+    public final ApproximateReportedProgress cloudProgress;
 
-    public DataflowReaderProgress(ApproximateProgress cloudProgress) {
+    public DataflowReaderProgress(ApproximateReportedProgress cloudProgress) {
       this.cloudProgress = cloudProgress;
     }
 
@@ -135,15 +135,15 @@ public static Source dictionaryToCloudSource(Map<String, Object> params) throws
   }
 
   private static class DataflowDynamicSplitRequest implements Reader.DynamicSplitRequest {
-    public final ApproximateProgress approximateProgress;
+    public final ApproximateSplitRequest splitRequest;
 
-    private DataflowDynamicSplitRequest(ApproximateProgress approximateProgress) {
-      this.approximateProgress = approximateProgress;
+    private DataflowDynamicSplitRequest(ApproximateSplitRequest splitRequest) {
+      this.splitRequest = splitRequest;
     }
 
     @Override
     public String toString() {
-      return String.valueOf(approximateProgress);
+      return String.valueOf(splitRequest);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
index 04bde2c353652..d39faad28cd53 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
@@ -19,9 +19,10 @@
 import static com.google.api.client.util.Preconditions.checkNotNull;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateSplitRequest;
 
-import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.ApproximateReportedProgress;
+import com.google.api.services.dataflow.model.ApproximateSplitRequest;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.io.range.OffsetRangeTracker;
@@ -369,12 +370,12 @@ public Progress getProgress() {
           new com.google.api.services.dataflow.model.Position();
       currentPosition.setByteOffset(offset);
 
-      ApproximateProgress progress = new ApproximateProgress();
+      ApproximateReportedProgress progress = new ApproximateReportedProgress();
       progress.setPosition(currentPosition);
 
       // If endOffset is unspecified, we don't know the fraction consumed.
       if (rangeTracker.getStopPosition() != Long.MAX_VALUE) {
-        progress.setPercentComplete((float) rangeTracker.getFractionConsumed());
+        progress.setFractionConsumed(rangeTracker.getFractionConsumed());
       }
 
       return cloudProgressToReaderProgress(progress);
@@ -385,14 +386,14 @@ public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest)
       checkNotNull(splitRequest);
 
       // Currently, file-based Reader only supports split at a byte offset.
-      ApproximateProgress splitProgress = splitRequestToApproximateProgress(splitRequest);
+      ApproximateSplitRequest splitProgress = splitRequestToApproximateSplitRequest(splitRequest);
       com.google.api.services.dataflow.model.Position splitPosition = splitProgress.getPosition();
       if (splitPosition == null) {
-        if (splitProgress.getPercentComplete() != null) {
-          float percentageComplete = splitProgress.getPercentComplete().floatValue();
-          if (percentageComplete <= 0 || percentageComplete >= 1) {
+        if (splitProgress.getFractionConsumed() != null) {
+          float fractionConsumed = splitProgress.getFractionConsumed().floatValue();
+          if (fractionConsumed <= 0 || fractionConsumed >= 1) {
             LOG.warn(
-                "TextReader cannot be split since the provided percentage of "
+                "TextReader cannot be split since the provided fraction of "
                 + "work to be completed is out of the valid range (0, 1). Requested: {}",
                 splitRequest);
           }
@@ -407,7 +408,7 @@ public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest)
           }
 
           splitPosition.setByteOffset(
-              rangeTracker.getPositionForFractionConsumed(percentageComplete));
+              rangeTracker.getPositionForFractionConsumed(fractionConsumed));
         } else {
           LOG.warn(
               "TextReader requires either a position or percentage of work to be complete to"
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
index e7541f2397622..7b3ddde901333 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
@@ -361,16 +361,18 @@ public void testProgressAndSourceSplitTranslation() throws Exception {
     try (Reader.ReaderIterator<WindowedValue<Integer>> iterator = reader.iterator()) {
       assertTrue(iterator.hasNext());
       assertEquals(
-          0, readerProgressToCloudProgress(iterator.getProgress()).getPercentComplete().intValue());
+          0.1,
+          readerProgressToCloudProgress(iterator.getProgress()).getFractionConsumed().doubleValue(),
+          1e-6);
       assertEquals(valueInGlobalWindow(10), iterator.next());
       assertEquals(
           0.1,
-          readerProgressToCloudProgress(iterator.getProgress()).getPercentComplete().doubleValue(),
+          readerProgressToCloudProgress(iterator.getProgress()).getFractionConsumed().doubleValue(),
           1e-6);
       assertEquals(valueInGlobalWindow(11), iterator.next());
       assertEquals(
           0.2,
-          readerProgressToCloudProgress(iterator.getProgress()).getPercentComplete().doubleValue(),
+          readerProgressToCloudProgress(iterator.getProgress()).getFractionConsumed().doubleValue(),
           1e-6);
       assertEquals(valueInGlobalWindow(12), iterator.next());
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
index 88b3e3314dd35..29f14e9cecdc8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
@@ -163,15 +163,15 @@ private <T> List<T> readElems(String filename, @Nullable Long startOffset,
     AvroReader<T> avroReader = new AvroReader<>(filename, startOffset, endOffset, coder, null);
     new ExecutorTestUtils.TestReaderObserver(avroReader, actualSizes);
 
-    float progressReported = 0;
+    double progressReported = 0;
     List<T> actualElems = new ArrayList<>();
     try (Reader.ReaderIterator<WindowedValue<T>> iterator = avroReader.iterator()) {
       while (iterator.hasNext()) {
         actualElems.add(iterator.next().getValue());
-        float progress = 0.0f;
+        double progress = 0.0;
         Progress readerProgress = iterator.getProgress();
         if (readerProgress != null) {
-          progress = readerProgressToCloudProgress(iterator.getProgress()).getPercentComplete();
+          progress = readerProgressToCloudProgress(iterator.getProgress()).getFractionConsumed();
         }
         // Make sure that the reported progress is monotonous.
         Assert.assertThat(progress, greaterThanOrEqualTo(progressReported));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
index a8adae4148565..6ed712a0e1f24 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
@@ -28,7 +28,7 @@
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.fail;
 
-import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.ApproximateReportedProgress;
 import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
@@ -371,7 +371,8 @@ private void runProgressTest(int... sizes) throws Exception {
       for (int readerIndex = 0; readerIndex < sizes.length; readerIndex++) {
         for (int recordIndex = 0; recordIndex < sizes[readerIndex]; recordIndex++) {
           iterator.next();
-          ApproximateProgress progress = readerProgressToCloudProgress(iterator.getProgress());
+          ApproximateReportedProgress progress =
+              readerProgressToCloudProgress(iterator.getProgress());
           assertEquals(
               readerIndex, progress.getPosition().getConcatPosition().getIndex().intValue());
         }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
index b46d34eab3531..a8c0315cbd22f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -17,11 +17,11 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.approximateProgressAtIndex;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.approximateProgressAtPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.approximateSplitRequestAtPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionAtIndex;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateSplitRequest;
 import static com.google.cloud.dataflow.sdk.testing.SystemNanoTimeSleeper.sleepMillis;
 import static com.google.cloud.dataflow.sdk.util.CloudCounterUtils.extractCounter;
 import static com.google.cloud.dataflow.sdk.util.CloudMetricUtils.extractCloudMetric;
@@ -40,7 +40,8 @@
 import static org.mockito.Mockito.verifyZeroInteractions;
 import static org.mockito.Mockito.when;
 
-import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.ApproximateReportedProgress;
+import com.google.api.services.dataflow.model.ApproximateSplitRequest;
 import com.google.api.services.dataflow.model.MetricUpdate;
 import com.google.api.services.dataflow.model.Position;
 import com.google.api.services.dataflow.model.WorkItem;
@@ -83,7 +84,7 @@
 @RunWith(JUnit4.class)
 public class DataflowWorkProgressUpdaterTest {
   static class TestMapTaskExecutor extends MapTaskExecutor {
-    ApproximateProgress progress = null;
+    ApproximateReportedProgress progress = null;
 
     public TestMapTaskExecutor(CounterSet counters) {
       super(new ArrayList<Operation>(), counters,
@@ -98,15 +99,15 @@ public Reader.Progress getWorkerProgress() {
     @Override
     public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest splitRequest) {
       @Nullable
-      ApproximateProgress progress = splitRequestToApproximateProgress(splitRequest);
-      if (progress == null) {
+      ApproximateSplitRequest split = splitRequestToApproximateSplitRequest(splitRequest);
+      if (split == null) {
         return null;
       }
       return new Reader.DynamicSplitResultWithPosition(
-          cloudPositionToReaderPosition(progress.getPosition()));
+          cloudPositionToReaderPosition(split.getPosition()));
     }
 
-    public void setWorkerProgress(ApproximateProgress progress) {
+    public void setWorkerProgress(ApproximateReportedProgress progress) {
       this.progress = progress;
     }
   }
@@ -331,7 +332,7 @@ private void setUpMetrics(int n) {
     }
   }
 
-  private void setUpProgress(ApproximateProgress progress) {
+  private void setUpProgress(ApproximateReportedProgress progress) {
     worker.setWorkerProgress(progress);
   }
 
@@ -346,7 +347,7 @@ private WorkItemServiceState generateServiceState(long leaseExpirationTimestamp,
     responseState.setNextReportIndex(nextReportIndex);
 
     if (suggestedStopPosition != null) {
-      responseState.setSuggestedStopPoint(approximateProgressAtPosition(suggestedStopPosition));
+      responseState.setSplitRequest(approximateSplitRequestAtPosition(suggestedStopPosition));
     }
 
     return responseState;
@@ -361,7 +362,7 @@ private static final class ExpectedDataflowWorkItemStatus
     Integer metricCount;
 
     @Nullable
-    ApproximateProgress expectedProgress;
+    ApproximateReportedProgress expectedProgress;
 
     @Nullable
     Position expectedSplitPosition;
@@ -379,7 +380,8 @@ public ExpectedDataflowWorkItemStatus withMetrics(Integer metricCount) {
       return this;
     }
 
-    public ExpectedDataflowWorkItemStatus withProgress(ApproximateProgress expectedProgress) {
+    public ExpectedDataflowWorkItemStatus withProgress(
+        ApproximateReportedProgress expectedProgress) {
       this.expectedProgress = expectedProgress;
       return this;
     }
@@ -459,7 +461,7 @@ private boolean matchProgress(WorkItemStatus status) {
       if (expectedProgress == null) {
         return true;
       }
-      ApproximateProgress progress = status.getProgress();
+      ApproximateReportedProgress progress = status.getReportedProgress();
       return expectedProgress.equals(progress);
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
index ce6281fb308a5..4d5ca3f97aa63 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static com.google.api.client.util.Base64.encodeBase64URLSafeString;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.approximateSplitRequestAtPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromSplitResult;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
@@ -29,7 +30,7 @@
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
-import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.ApproximateReportedProgress;
 import com.google.api.services.dataflow.model.Position;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -642,7 +643,8 @@ public void testGetApproximateProgress() throws Exception {
       Integer i = 0;
       while (readerIterator.hasNext()) {
         assertTrue(readerIterator.hasNext());
-        ApproximateProgress progress = readerProgressToCloudProgress(readerIterator.getProgress());
+        ApproximateReportedProgress progress = readerProgressToCloudProgress(
+            readerIterator.getProgress());
         assertNotNull(progress.getPosition().getShufflePosition());
 
         // Compare returned position with the expected position.
@@ -662,11 +664,7 @@ public void testGetApproximateProgress() throws Exception {
       proposedSplitPosition.setShufflePosition(stop);
       assertNull(
           readerIterator.requestDynamicSplit(
-              toDynamicSplitRequest(createApproximateProgress(proposedSplitPosition))));
+              toDynamicSplitRequest(approximateSplitRequestAtPosition(proposedSplitPosition))));
     }
   }
-
-  private ApproximateProgress createApproximateProgress(Position position) {
-    return new ApproximateProgress().setPosition(position);
-  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
index 6b561d0e2b9da..f69d95a5d1cf6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
@@ -29,7 +29,7 @@
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
-import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.ApproximateSplitRequest;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
@@ -145,7 +145,8 @@ public void testDynamicSplit() throws Exception {
     try (Reader.ReaderIterator<Integer> iterator = inMemoryReader.iterator()) {
       // Poke the iterator so that we can test dynamic splitting.
       assertTrue(iterator.hasNext());
-      assertNull(iterator.requestDynamicSplit(toDynamicSplitRequest(new ApproximateProgress())));
+      assertNull(iterator.requestDynamicSplit(toDynamicSplitRequest(
+          new ApproximateSplitRequest())));
       assertNull(iterator.requestDynamicSplit(splitRequestAtIndex(null)));
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
index fb3e0a2f9289c..0c9e4e53a9df8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
@@ -19,7 +19,8 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toDynamicSplitRequest;
 
-import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.ApproximateReportedProgress;
+import com.google.api.services.dataflow.model.ApproximateSplitRequest;
 import com.google.api.services.dataflow.model.ConcatPosition;
 import com.google.api.services.dataflow.model.Position;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
@@ -51,42 +52,72 @@ public static Position positionAtConcatPosition(
         new ConcatPosition().setIndex(index).setPosition(innerPosition));
   }
 
-  public static ApproximateProgress approximateProgressAtPosition(@Nullable Position position) {
-    return new ApproximateProgress().setPosition(position);
+  public static ApproximateReportedProgress approximateProgressAtPosition(
+      @Nullable Position position) {
+    return new ApproximateReportedProgress().setPosition(position);
   }
 
-  public static ApproximateProgress approximateProgressAtIndex(@Nullable Long index) {
+  public static ApproximateSplitRequest approximateSplitRequestAtPosition(
+      @Nullable Position position) {
+    return new ApproximateSplitRequest().setPosition(position);
+  }
+
+  public static ApproximateReportedProgress approximateProgressAtIndex(
+      @Nullable Long index) {
     return approximateProgressAtPosition(positionAtIndex(index));
   }
 
-  public static ApproximateProgress approximateProgressAtByteOffset(@Nullable Long byteOffset) {
+  public static ApproximateSplitRequest approximateSplitRequestAtIndex(
+      @Nullable Long index) {
+    return approximateSplitRequestAtPosition(positionAtIndex(index));
+  }
+
+  public static ApproximateReportedProgress approximateProgressAtByteOffset(
+      @Nullable Long byteOffset) {
     return approximateProgressAtPosition(positionAtByteOffset(byteOffset));
   }
 
-  public static ApproximateProgress approximateProgressAtConcatPosition(
+  public static ApproximateSplitRequest approximateSplitRequestAtByteOffset(
+      @Nullable Long byteOffset) {
+    return approximateSplitRequestAtPosition(positionAtByteOffset(byteOffset));
+  }
+
+  public static ApproximateReportedProgress approximateProgressAtConcatPosition(
       @Nullable Integer index, @Nullable Position innerPosition) {
     return approximateProgressAtPosition(positionAtConcatPosition(index, innerPosition));
   }
 
-  public static ApproximateProgress approximateProgressAtFraction(@Nullable Float fraction) {
-    return new ApproximateProgress().setPercentComplete(fraction);
+  public static ApproximateSplitRequest approximateSplitRequestAtConcatPosition(
+      @Nullable Integer index, @Nullable Position innerPosition) {
+    return approximateSplitRequestAtPosition(positionAtConcatPosition(index, innerPosition));
+  }
+
+  public static ApproximateReportedProgress approximateProgressAtFraction(
+      @Nullable Double fraction) {
+    return new ApproximateReportedProgress().setFractionConsumed(fraction);
+  }
+
+  public static ApproximateSplitRequest approximateSplitRequestAtFraction(
+      @Nullable Double fraction) {
+    return new ApproximateSplitRequest().setFractionConsumed(fraction);
   }
 
-  public static Reader.DynamicSplitRequest splitRequestAtPosition(@Nullable Position position) {
-    return toDynamicSplitRequest(approximateProgressAtPosition(position));
+  public static Reader.DynamicSplitRequest splitRequestAtPosition(
+      @Nullable Position position) {
+    return toDynamicSplitRequest(approximateSplitRequestAtPosition(position));
   }
 
   public static Reader.DynamicSplitRequest splitRequestAtIndex(@Nullable Long index) {
-    return toDynamicSplitRequest(approximateProgressAtIndex(index));
+    return toDynamicSplitRequest(approximateSplitRequestAtIndex(index));
   }
 
   public static Reader.DynamicSplitRequest splitRequestAtByteOffset(@Nullable Long byteOffset) {
-    return toDynamicSplitRequest(approximateProgressAtByteOffset(byteOffset));
+    return toDynamicSplitRequest(approximateSplitRequestAtByteOffset(byteOffset));
   }
 
   public static Reader.DynamicSplitRequest splitRequestAtConcatPosition(
       @Nullable Integer index, @Nullable Position innerPosition) {
-    return toDynamicSplitRequest(approximateProgressAtConcatPosition(index, innerPosition));
+    return toDynamicSplitRequest(approximateSplitRequestAtConcatPosition(index, innerPosition));
   }
 
   public static Position positionFromSplitResult(Reader.DynamicSplitResult dynamicSplitResult) {
@@ -98,8 +129,8 @@ public static Position positionFromProgress(Reader.Progress progress) {
     return readerProgressToCloudProgress(progress).getPosition();
   }
 
-  public static Reader.DynamicSplitRequest splitRequestAtFraction(float fraction) {
-    return toDynamicSplitRequest(approximateProgressAtFraction(fraction));
+  public static Reader.DynamicSplitRequest splitRequestAtFraction(double fraction) {
+    return toDynamicSplitRequest(approximateSplitRequestAtFraction(fraction));
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
index abfe2342d7525..188ce9d20fda3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
@@ -31,7 +31,7 @@
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
-import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.ApproximateReportedProgress;
 import com.google.api.services.dataflow.model.Position;
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
@@ -382,7 +382,7 @@ public void testGetProgressNoEndOffset() throws Exception {
         new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
 
     try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
-      ApproximateProgress progress = readerProgressToCloudProgress(iterator.getProgress());
+      ApproximateReportedProgress progress = readerProgressToCloudProgress(iterator.getProgress());
       assertEquals(0L, progress.getPosition().getByteOffset().longValue());
       iterator.next();
       progress = readerProgressToCloudProgress(iterator.getProgress());
@@ -391,7 +391,7 @@ public void testGetProgressNoEndOffset() throws Exception {
       progress = readerProgressToCloudProgress(iterator.getProgress());
       assertEquals(28L, progress.getPosition().getByteOffset().longValue());
       // Since end position is not specified, percentComplete should be null.
-      assertNull(progress.getPercentComplete());
+      assertNull(progress.getFractionConsumed());
 
       iterator.next();
       progress = readerProgressToCloudProgress(iterator.getProgress());
@@ -408,14 +408,14 @@ public void testGetProgressWithEndOffset() throws Exception {
 
     try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
       iterator.next();
-      ApproximateProgress progress = readerProgressToCloudProgress(iterator.getProgress());
+      ApproximateReportedProgress progress = readerProgressToCloudProgress(iterator.getProgress());
       // Returned a record that starts at position 0 of 40 - 1/40 fraction consumed.
-      assertEquals(1.0f / 40, progress.getPercentComplete(), 1e-6);
+      assertEquals(1.0 / 40, progress.getFractionConsumed(), 1e-6);
       iterator.next();
       iterator.next();
       progress = readerProgressToCloudProgress(iterator.getProgress());
       // Returned a record that starts at position 28 - 29/40 consumed.
-      assertEquals(1.0f * 29 / 40, progress.getPercentComplete(), 1e-6);
+      assertEquals(1.0 * 29 / 40, progress.getFractionConsumed(), 1e-6);
       assertFalse(iterator.hasNext());
     }
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
index ba428df4ce431..a232427315f04 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
@@ -23,10 +23,10 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtIndex;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateSplitRequest;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
 
-import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.ApproximateReportedProgress;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
@@ -92,7 +92,7 @@ public void finish() throws Exception {
 
   // A mock ReadOperation fed to a MapTaskExecutor in test.
   static class TestReadOperation extends ReadOperation {
-    private ApproximateProgress progress = null;
+    private ApproximateReportedProgress progress = null;
 
     TestReadOperation(OutputReceiver outputReceiver, String counterPrefix,
         AddCounterMutator addCounterMutator, StateSampler stateSampler) {
@@ -109,10 +109,10 @@ public Reader.Progress getProgress() {
     public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest splitRequest) {
       // Fakes the return with the same position as proposed.
       return new Reader.DynamicSplitResultWithPosition(cloudPositionToReaderPosition(
-          splitRequestToApproximateProgress(splitRequest).getPosition()));
+          splitRequestToApproximateSplitRequest(splitRequest).getPosition()));
     }
 
-    public void setProgress(ApproximateProgress progress) {
+    public void setProgress(ApproximateReportedProgress progress) {
       this.progress = progress;
     }
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
index 72c24de508b4d..03eaeef261419 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -21,7 +21,7 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateProgress;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateSplitRequest;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
 import static com.google.cloud.dataflow.sdk.testing.SystemNanoTimeSleeper.sleepMillis;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind;
@@ -37,7 +37,8 @@
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
-import com.google.api.services.dataflow.model.ApproximateProgress;
+import com.google.api.services.dataflow.model.ApproximateReportedProgress;
+import com.google.api.services.dataflow.model.ApproximateSplitRequest;
 import com.google.api.services.dataflow.model.Position;
 import com.google.cloud.dataflow.sdk.io.range.OffsetRangeTracker;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
@@ -127,7 +128,8 @@ public void testGetProgress() throws Exception {
     for (int i = 0; i < 5; ++i) {
       sleepMillis(500); // Wait for the operation to start and block.
       // Ensure that getProgress() doesn't block while the next() method is blocked.
-      ApproximateProgress progress = readerProgressToCloudProgress(readOperation.getProgress());
+      ApproximateReportedProgress progress = readerProgressToCloudProgress(
+          readOperation.getProgress());
       long observedIndex = progress.getPosition().getRecordIndex().longValue();
       assertTrue("Actual: " + observedIndex, i == observedIndex || i == observedIndex + 1);
       iterator.offerNext(i);
@@ -161,9 +163,10 @@ public void testDynamicSplit() throws Exception {
     assertEquals(positionAtIndex(8L), toCloudPosition(split.getAcceptedPosition()));
 
     // Check that the progress has been recomputed.
-    ApproximateProgress progress = readerProgressToCloudProgress(readOperation.getProgress());
+    ApproximateReportedProgress progress = readerProgressToCloudProgress(
+        readOperation.getProgress());
     assertEquals(2, progress.getPosition().getRecordIndex().longValue());
-    assertEquals(2.0f / 8.0f, progress.getPercentComplete(), 0.001f);
+    assertEquals(2.0f / 8.0, progress.getFractionConsumed(), 0.001);
 
     receiver.unblockProcess();
     iterator.offerNext(2);
@@ -314,21 +317,23 @@ protected Integer nextImpl() throws IOException {
     public Reader.Progress getProgress() {
       Preconditions.checkState(!isClosed);
       return cloudProgressToReaderProgress(
-          new ApproximateProgress().setPosition(new Position().setRecordIndex((long) current))
-                                   .setPercentComplete((float) tracker.getFractionConsumed()));
+          new ApproximateReportedProgress()
+              .setPosition(new Position().setRecordIndex((long) current))
+              .setFractionConsumed(tracker.getFractionConsumed()));
     }
 
     @Override
     public Reader.DynamicSplitResult requestDynamicSplit(
         Reader.DynamicSplitRequest splitRequest) {
       Preconditions.checkState(!isClosed);
-      ApproximateProgress progress = splitRequestToApproximateProgress(splitRequest);
-      int index = progress.getPosition().getRecordIndex().intValue();
+      ApproximateSplitRequest approximateSplitRequest = splitRequestToApproximateSplitRequest(
+          splitRequest);
+      int index = approximateSplitRequest.getPosition().getRecordIndex().intValue();
       if (!tracker.trySplitAtPosition(index)) {
         return null;
       }
       return new Reader.DynamicSplitResultWithPosition(
-          cloudPositionToReaderPosition(progress.getPosition()));
+          cloudPositionToReaderPosition(approximateSplitRequest.getPosition()));
     }
 
     public int offerNext(int next) {

From d7643e6c5651d92f1187cce608104763d7af27ee Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 14 Dec 2015 15:12:34 -0800
Subject: [PATCH 1228/1541] Update OffsetBasedSource javadoc

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110197208
---
 .../dataflow/sdk/io/OffsetBasedSource.java    | 87 ++++++++++++-------
 1 file changed, 58 insertions(+), 29 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java
index 4527b85f222be..df244f28f2cf5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java
@@ -28,22 +28,27 @@
 import java.util.NoSuchElementException;
 
 /**
- * A {@link Source} that uses offsets to define starting and ending positions.
+ * A {@link BoundedSource} that uses offsets to define starting and ending positions.
  *
- * <p>Extend this class to implement your own offset based custom source.
+ * <p>{@link OffsetBasedSource} is a common base class for all bounded sources where the input can
+ * be represented as a single range, and an input can be efficiently processed in parallel by
+ * splitting the range into a set of disjoint ranges whose union is the original range. This class
+ * should be used for sources that can be cheaply read starting at any given offset.
+ * {@link OffsetBasedSource} stores the range and implements splitting into bundles.
+ *
+ * <p>Extend {@link OffsetBasedSource} to implement your own offset-based custom source.
  * {@link FileBasedSource}, which is a subclass of this, adds additional functionality useful for
  * custom sources that are based on files. If possible implementors should start from
- * {@code FileBasedSource} instead of {@code OffsetBasedSource}.
- *
- * <p>This is a common base class for all sources that use an offset range. It stores the range
- * and implements splitting into bundles. This should be used for sources that can be cheaply read
- * starting at any given offset.
+ * {@link FileBasedSource} instead of {@link OffsetBasedSource}.
  *
  * <p>Consult {@link RangeTracker} for important semantics common to all sources defined by a range
  * of positions of a certain type, including the semantics of split points
  * ({@link OffsetBasedReader#isAtSplitPoint}).
  *
  * @param <T> Type of records represented by the source.
+ * @see BoundedSource
+ * @see FileBasedSource
+ * @see RangeTracker
  */
 public abstract class OffsetBasedSource<T> extends BoundedSource<T> {
   private final long startOffset;
@@ -53,13 +58,13 @@ public abstract class OffsetBasedSource<T> extends BoundedSource<T> {
   /**
    * @param startOffset starting offset (inclusive) of the source. Must be non-negative.
    *
-   * @param endOffset ending offset (exclusive) of the source. Any
-   *        {@code offset >= getMaxEndOffset()}, e.g., {@code Long.MAX_VALUE}, means the same as
-   *        {@code getMaxEndOffset()}. Must be {@code >= startOffset}.
+   * @param endOffset ending offset (exclusive) of the source. Use {@link Long#MAX_VALUE} to
+   *        indicate that the entire source after {@code startOffset} should be read. Must be
+   *        {@code > startOffset}.
    *
    * @param minBundleSize minimum bundle size in offset units that should be used when splitting the
-   *                      source into sub-sources. This will not be respected if the total range of
-   *                      the source is smaller than the specified {@code minBundleSize}.
+   *                      source into sub-sources. This value may not be respected if the total
+   *                      range of the source is smaller than the specified {@code minBundleSize}.
    *                      Must be non-negative.
    */
   public OffsetBasedSource(long startOffset, long endOffset, long minBundleSize) {
@@ -76,8 +81,9 @@ public long getStartOffset() {
   }
 
   /**
-   * Returns the specified ending offset of the source. If this is {@code >= getMaxEndOffset()},
-   * e.g. Long.MAX_VALUE, this implies {@code getMaxEndOffset()}.
+   * Returns the specified ending offset of the source. Any returned value greater than or equal to
+   * {@link #getMaxEndOffset(PipelineOptions)} should be treated as
+   * {@link #getMaxEndOffset(PipelineOptions)}.
    */
   public long getEndOffset() {
     return endOffset;
@@ -85,7 +91,7 @@ public long getEndOffset() {
 
   /**
    * Returns the minimum bundle size that should be used when splitting the source into sub-sources.
-   * This will not be respected if the total range of the source is smaller than the specified
+   * This value may not be respected if the total range of the source is smaller than the specified
    * {@code minBundleSize}.
    */
   public long getMinBundleSize() {
@@ -155,20 +161,29 @@ public String toString() {
    * Returns approximately how many bytes of data correspond to a single offset in this source.
    * Used for translation between this source's range and methods defined in terms of bytes, such
    * as {@link #getEstimatedSizeBytes} and {@link #splitIntoBundles}.
+   *
+   * <p>Defaults to {@code 1} byte, which is the common case for, e.g., file sources.
    */
   public long getBytesPerOffset() {
     return 1L;
   }
 
   /**
-   * Returns the exact ending offset of the current source. This will be used if the source was
-   * constructed with an endOffset value {@code Long.MAX_VALUE}.
+   * Returns the actual ending offset of the current source. The value returned by this function
+   * will be used to clip the end of the range {@code [startOffset, endOffset)} such that the
+   * range used is {@code [startOffset, min(endOffset, maxEndOffset))}.
+   *
+   * <p>As an example in which {@link OffsetBasedSource} is used to implement a file source, suppose
+   * that this source was constructed with an {@code endOffset} of {@link Long#MAX_VALUE} to
+   * indicate that a file should be read to the end. Then {@link #getMaxEndOffset} should determine
+   * the actual, exact size of the file in bytes and return it.
    */
   public abstract long getMaxEndOffset(PipelineOptions options) throws Exception;
 
   /**
-   * Returns an {@code OffsetBasedSource} for a subrange of the current source. [start, end) will
-   * be within the range [startOffset, endOffset] of the current source.
+   * Returns an {@link OffsetBasedSource} for a subrange of the current source. The
+   * subrange {@code [start, end)} must be within the range {@code [startOffset, endOffset)} of
+   * the current source, i.e. {@code startOffset <= start < end <= endOffset}.
    */
   public abstract OffsetBasedSource<T> createSourceForSubrange(long start, long end);
 
@@ -190,15 +205,11 @@ public abstract static class OffsetBasedReader<T> extends BoundedReader<T> {
 
     private OffsetBasedSource<T> source;
 
-    /**
-     * The {@link OffsetRangeTracker} managing the range and current position of the source.
-     * Subclasses MUST use it before returning records from {@link #start} or {@link #advance}:
-     * see documentation of {@link RangeTracker}.
-     */
+    /** The {@link OffsetRangeTracker} managing the range and current position of the source. */
     private final OffsetRangeTracker rangeTracker;
 
     /**
-     * @param source the {@code OffsetBasedSource} to be read by the current reader.
+     * @param source the {@link OffsetBasedSource} to be read by the current reader.
      */
     public OffsetBasedReader(OffsetBasedSource<T> source) {
       this.source = source;
@@ -236,14 +247,32 @@ public final boolean advance() throws IOException {
     }
 
     /**
-     * Same as {@link BoundedReader#start}, except {@link OffsetBasedReader} base class
-     * takes care of coordinating against concurrent calls to {@link #splitAtFraction}.
+     * Initializes the {@link OffsetBasedSource.OffsetBasedReader} and advances to the first record,
+     * returning {@code true} if there is a record available to be read. This method will be
+     * invoked exactly once and may perform expensive setup operations that are needed to
+     * initialize the reader.
+     *
+     * <p>This function is the {@code OffsetBasedReader} implementation of
+     * {@link BoundedReader#start}. The key difference is that the implementor can ignore the
+     * possibility that it should no longer produce the first record, either because it has exceeded
+     * the original {@code endOffset} assigned to the reader, or because a concurrent call to
+     * {@link #splitAtFraction} has changed the source to shrink the offset range being read.
+     *
+     * @see BoundedReader#start
      */
     protected abstract boolean startImpl() throws IOException;
 
     /**
-     * Same as {@link BoundedReader#advance}, except {@link OffsetBasedReader} base class
-     * takes care of coordinating against concurrent calls to {@link #splitAtFraction}.
+     * Advances to the next record and returns {@code true}, or returns false if there is no next
+     * record.
+     *
+     * <p>This function is the {@code OffsetBasedReader} implementation of
+     * {@link BoundedReader#advance}. The key difference is that the implementor can ignore the
+     * possibility that it should no longer produce the next record, either because it has exceeded
+     * the original {@code endOffset} assigned to the reader, or because a concurrent call to
+     * {@link #splitAtFraction} has changed the source to shrink the offset range being read.
+     *
+     * @see BoundedReader#advance
      */
     protected abstract boolean advanceImpl() throws IOException;
 

From 5d6dfdddb020f5aba30dbe8be3139c5e169b26d3 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 14 Dec 2015 18:07:09 -0800
Subject: [PATCH 1229/1541] Improve error messaging when applying unsupported
 transform in batch

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110215096
---
 .../sdk/runners/DataflowPipelineRunner.java   | 43 +++++++++++-------
 .../runners/DataflowPipelineRunnerTest.java   | 45 +++++++++++++------
 2 files changed, 58 insertions(+), 30 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 0f9f8ac6710c5..b1a53446ba3ac 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -43,6 +43,7 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
+import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.JobSpecification;
 import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
 import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowAggregatorTransforms;
@@ -256,15 +257,16 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
           .put(Write.Bound.class, StreamingWrite.class)
           .put(PubsubIO.Write.Bound.class, StreamingPubsubIOWrite.class)
           .put(Read.Unbounded.class, StreamingUnboundedRead.class)
-          .put(Read.Bounded.class, StreamingUnsupportedIO.class)
-          .put(AvroIO.Read.Bound.class, StreamingUnsupportedIO.class)
-          .put(AvroIO.Write.Bound.class, StreamingUnsupportedIO.class)
-          .put(BigQueryIO.Read.Bound.class, StreamingUnsupportedIO.class)
-          .put(TextIO.Read.Bound.class, StreamingUnsupportedIO.class)
-          .put(TextIO.Write.Bound.class, StreamingUnsupportedIO.class)
+          .put(Read.Bounded.class, UnsupportedIO.class)
+          .put(AvroIO.Read.Bound.class, UnsupportedIO.class)
+          .put(AvroIO.Write.Bound.class, UnsupportedIO.class)
+          .put(BigQueryIO.Read.Bound.class, UnsupportedIO.class)
+          .put(TextIO.Read.Bound.class, UnsupportedIO.class)
+          .put(TextIO.Write.Bound.class, UnsupportedIO.class)
           .build();
     } else {
       overrides = ImmutableMap.<Class<?>, Class<?>>builder()
+          .put(Read.Unbounded.class, UnsupportedIO.class)
           .build();
     }
   }
@@ -1036,7 +1038,7 @@ public Coder<List<T>> getDefaultOutputCoder(CoderRegistry registry, Coder<T> inp
   /**
    * Specialized expansion for unsupported IO transforms that throws an error.
    */
-  private static class StreamingUnsupportedIO<InputT extends PInput, OutputT extends POutput>
+  private static class UnsupportedIO<InputT extends PInput, OutputT extends POutput>
       extends PTransform<InputT, OutputT> {
     private PTransform<?, ?> transform;
 
@@ -1044,7 +1046,7 @@ private static class StreamingUnsupportedIO<InputT extends PInput, OutputT exten
      * Builds an instance of this class from the overridden transform.
      */
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public StreamingUnsupportedIO(AvroIO.Read.Bound<?> transform) {
+    public UnsupportedIO(AvroIO.Read.Bound<?> transform) {
       this.transform = transform;
     }
 
@@ -1052,7 +1054,7 @@ public StreamingUnsupportedIO(AvroIO.Read.Bound<?> transform) {
      * Builds an instance of this class from the overridden transform.
      */
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public StreamingUnsupportedIO(BigQueryIO.Read.Bound transform) {
+    public UnsupportedIO(BigQueryIO.Read.Bound transform) {
       this.transform = transform;
     }
 
@@ -1060,7 +1062,7 @@ public StreamingUnsupportedIO(BigQueryIO.Read.Bound transform) {
      * Builds an instance of this class from the overridden transform.
      */
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public StreamingUnsupportedIO(TextIO.Read.Bound<?> transform) {
+    public UnsupportedIO(TextIO.Read.Bound<?> transform) {
       this.transform = transform;
     }
 
@@ -1068,7 +1070,7 @@ public StreamingUnsupportedIO(TextIO.Read.Bound<?> transform) {
      * Builds an instance of this class from the overridden transform.
      */
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public StreamingUnsupportedIO(Read.Bounded<?> transform) {
+    public UnsupportedIO(Read.Bounded<?> transform) {
       this.transform = transform;
     }
 
@@ -1076,7 +1078,7 @@ public StreamingUnsupportedIO(Read.Bounded<?> transform) {
      * Builds an instance of this class from the overridden transform.
      */
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public StreamingUnsupportedIO(AvroIO.Write.Bound<?> transform) {
+    public UnsupportedIO(Read.Unbounded<?> transform) {
       this.transform = transform;
     }
 
@@ -1084,17 +1086,26 @@ public StreamingUnsupportedIO(AvroIO.Write.Bound<?> transform) {
      * Builds an instance of this class from the overridden transform.
      */
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public StreamingUnsupportedIO(TextIO.Write.Bound<?> transform) {
+    public UnsupportedIO(AvroIO.Write.Bound<?> transform) {
+      this.transform = transform;
+    }
+
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+    public UnsupportedIO(TextIO.Write.Bound<?> transform) {
       this.transform = transform;
     }
 
     @Override
     public OutputT apply(InputT input) {
+      String mode = input.getPipeline().getOptions().as(StreamingOptions.class).isStreaming()
+          ? "streaming" : "batch";
       throw new UnsupportedOperationException(
-          "The DataflowPipelineRunner in streaming mode does not support "
-          + approximatePTransformName(transform.getClass()));
+          String.format("The DataflowPipelineRunner in %s mode does not support %s.",
+              mode, approximatePTransformName(transform.getClass())));
     }
-
   }
 
   @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index d1287af33719f..3a29d82040954 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -41,12 +41,14 @@
 import com.google.cloud.dataflow.sdk.io.AvroIO;
 import com.google.cloud.dataflow.sdk.io.AvroSource;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.dataflow.CountingSource;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.DataflowReleaseInfo;
@@ -811,10 +813,10 @@ public void testToString() {
         DataflowPipelineRunner.fromOptions(options).toString());
   }
 
-  private static PipelineOptions makeStreamingOptions() {
+  private static PipelineOptions makeOptions(boolean streaming) {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
     options.setRunner(DataflowPipelineRunner.class);
-    options.setStreaming(true);
+    options.setStreaming(streaming);
     options.setJobName("TestJobName");
     options.setProject("test-project");
     options.setTempLocation("gs://test/temp/location");
@@ -823,55 +825,70 @@ private static PipelineOptions makeStreamingOptions() {
     return options;
   }
 
-  private void testUnsupportedSource(PTransform<PInput, ?> source, String name) throws Exception {
+  private void testUnsupportedSource(PTransform<PInput, ?> source, String name, boolean streaming)
+      throws Exception {
+    String mode = streaming ? "streaming" : "batch";
     thrown.expect(UnsupportedOperationException.class);
     thrown.expectMessage(
-        "The DataflowPipelineRunner in streaming mode does not support " + name);
+        "The DataflowPipelineRunner in " + mode + " mode does not support " + name);
 
-    Pipeline p = Pipeline.create(makeStreamingOptions());
+    Pipeline p = Pipeline.create(makeOptions(streaming));
     p.apply(source);
     p.run();
   }
 
   @Test
   public void testBoundedSourceUnsupportedInStreaming() throws Exception {
-    testUnsupportedSource(AvroSource.readFromFileWithClass("foo", String.class), "Read.Bounded");
+    testUnsupportedSource(
+        AvroSource.readFromFileWithClass("foo", String.class), "Read.Bounded", true);
   }
 
   @Test
   public void testBigQueryIOSourceUnsupportedInStreaming() throws Exception {
     testUnsupportedSource(
-        BigQueryIO.Read.from("project:bar.baz").withoutValidation(), "BigQueryIO.Read");
+        BigQueryIO.Read.from("project:bar.baz").withoutValidation(), "BigQueryIO.Read", true);
   }
 
   @Test
   public void testAvroIOSourceUnsupportedInStreaming() throws Exception {
-    testUnsupportedSource(AvroIO.Read.from("foo"), "AvroIO.Read");
+    testUnsupportedSource(
+        AvroIO.Read.from("foo"), "AvroIO.Read", true);
   }
 
   @Test
   public void testTextIOSourceUnsupportedInStreaming() throws Exception {
-    testUnsupportedSource(TextIO.Read.from("foo"), "TextIO.Read");
+    testUnsupportedSource(TextIO.Read.from("foo"), "TextIO.Read", true);
   }
 
-  private void testUnsupportedSink(PTransform<PCollection<String>, PDone> sink, String name)
-      throws Exception {
+  @Test
+  public void testReadBoundedSourceUnsupportedInStreaming() throws Exception {
+    testUnsupportedSource(Read.from(AvroSource.from("/tmp/test")), "Read.Bounded", true);
+  }
+
+  @Test
+  public void testReadUnboundedUnsupportedInBatch() throws Exception {
+    testUnsupportedSource(Read.from(new CountingSource(1)), "Read.Unbounded", false);
+  }
+
+  private void testUnsupportedSink(
+      PTransform<PCollection<String>, PDone> sink, String name, boolean streaming)
+          throws Exception {
     thrown.expect(UnsupportedOperationException.class);
     thrown.expectMessage(
         "The DataflowPipelineRunner in streaming mode does not support " + name);
 
-    Pipeline p = Pipeline.create(makeStreamingOptions());
+    Pipeline p = Pipeline.create(makeOptions(streaming));
     p.apply(Create.of("foo")).apply(sink);
     p.run();
   }
 
   @Test
   public void testAvroIOSinkUnsupportedInStreaming() throws Exception {
-    testUnsupportedSink(AvroIO.Write.to("foo").withSchema(String.class), "AvroIO.Write");
+    testUnsupportedSink(AvroIO.Write.to("foo").withSchema(String.class), "AvroIO.Write", true);
   }
 
   @Test
   public void testTextIOSinkUnsupportedInStreaming() throws Exception {
-    testUnsupportedSink(TextIO.Write.to("foo"), "TextIO.Write");
+    testUnsupportedSink(TextIO.Write.to("foo"), "TextIO.Write", true);
   }
 }

From 37e7fcf7a9038ea91c6f62a9d38e5eca33c81dae Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 14 Dec 2015 19:48:41 -0800
Subject: [PATCH 1230/1541] Drop data only for expired windows

In streaming mode, a GroupByKey operation now drops late data
only when it is in a window that has expired -- the end of the window
is behind the input watermark by more than the allowed lateness. When an
element is assigned to multiple windows, each occurrence is treated
independently.

Previously, data was dropped if the element's timestamp was beyond
the allowed lateness.

The new condition drops strictly less data.

----Release Notes----
 - Only drop late data if the window it is assigned to has expired
   (the end of the window is passed by more than the allowed lateness)

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110220195
---
 .../dataflow/sdk/util/ReduceFnRunner.java     | 58 +++++++++------
 .../dataflow/sdk/util/WatermarkHold.java      | 18 -----
 .../dataflow/sdk/util/ReduceFnTester.java     | 21 ++++--
 .../sdk/util/TriggerExecutorTest.java         | 70 +++++++++++++++----
 4 files changed, 108 insertions(+), 59 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index 742806db48110..4d11e8a708110 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -34,6 +34,8 @@
 import com.google.common.base.Function;
 import com.google.common.base.Functions;
 import com.google.common.base.Preconditions;
+import com.google.common.base.Predicate;
+import com.google.common.base.Predicates;
 import com.google.common.base.Throwables;
 import com.google.common.collect.FluentIterable;
 import com.google.common.collect.Maps;
@@ -247,12 +249,15 @@ public void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResul
     return sourceWindowsToResultWindows;
   }
 
-  /** Is {@code value} late w.r.t. the garbage collection watermark? */
-  private <T> boolean canDropDueToLateness(WindowedValue<T> value) {
-    Instant inputWM = timerInternals.currentInputWatermarkTime();
-    return inputWM != null
-        && value.getTimestamp().isBefore(inputWM.minus(windowingStrategy.getAllowedLateness()));
-  }
+  /** Is the {@code window} expired w.r.t. the garbage collection watermark? */
+  private Predicate<W> canDropDueToExpiredWindow = new Predicate<W>() {
+    @Override
+    public boolean apply(W window) {
+      Instant inputWM = timerInternals.currentInputWatermarkTime();
+      return inputWM != null
+          && window.maxTimestamp().plus(windowingStrategy.getAllowedLateness()).isBefore(inputWM);
+    }
+  };
 
   /**
    * Add the initial windows from each of the values to the active window set. Returns the set of
@@ -261,15 +266,17 @@ private <T> boolean canDropDueToLateness(WindowedValue<T> value) {
   private Set<W> addToActiveWindows(Iterable<WindowedValue<InputT>> values) {
     Set<W> newWindows = new HashSet<>();
     for (WindowedValue<?> value : values) {
-      if (canDropDueToLateness(value)) {
-        // This value will be dropped (and reported in a counter) by processElement.
-        // Hence it won't contribute to any new window.
-        continue;
-      }
 
       for (BoundedWindow untypedWindow : value.getWindows()) {
         @SuppressWarnings("unchecked")
         W window = (W) untypedWindow;
+
+        if (canDropDueToExpiredWindow.apply(window)) {
+          // This value will be dropped (and reported in a counter) by processElement.
+          // Hence it won't contribute to any new window.
+          continue;
+        }
+
         ReduceFn<K, InputT, OutputT, W>.Context context = contextFactory.base(window);
         if (!triggerRunner.isClosed(context.state())) {
           if (activeWindows.add(window)) {
@@ -291,22 +298,27 @@ private Set<W> addToActiveWindows(Iterable<WindowedValue<InputT>> values) {
    */
   private void processElement(
       Function<W, W> windowMapping, Map<W, TriggerResult> results, WindowedValue<InputT> value) {
-    if (canDropDueToLateness(value)) {
-      // Drop the element in all assigned windows if it is past the allowed lateness limit.
-      droppedDueToLateness.addValue((long) value.getWindows().size());
-      WindowTracing.debug(
-          "processElement: Dropping element at {} for key:{} since too far "
-          + "behind inputWatermark:{}; outputWatermark:{}",
-          value.getTimestamp(), key, timerInternals.currentInputWatermarkTime(),
-          timerInternals.currentOutputWatermarkTime());
-      return;
-    }
 
     // Only consider representative windows from among all windows in equivalence classes
     // induced by window merging.
     @SuppressWarnings("unchecked")
-    Iterable<W> windows =
-        FluentIterable.from((Collection<W>) value.getWindows()).transform(windowMapping);
+    FluentIterable<W> mappedWindows =
+        FluentIterable.from((Collection<W>) value.getWindows())
+        .transform(windowMapping);
+
+    // Some windows may be expired
+    Iterable<W> windows = mappedWindows.filter(Predicates.not(canDropDueToExpiredWindow));
+
+    // Count the number of elements that are dropped
+    for (W expiredWindow : mappedWindows.filter(canDropDueToExpiredWindow)) {
+        droppedDueToLateness.addValue(1L);
+        WindowTracing.debug(
+            "processElement: Dropping element at {} for key:{} and window:{} since window is "
+                + "too far behind inputWatermark:{}; outputWatermark:{}",
+                value.getTimestamp(), key, expiredWindow,
+                timerInternals.currentInputWatermarkTime(),
+                timerInternals.currentOutputWatermarkTime());
+    }
 
     // Prefetch in each of the windows if we're going to need to process triggers
     for (W window : windows) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
index cee3eeb0b95ed..bac3af11f7b58 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
@@ -116,18 +116,6 @@ public WatermarkHold(TimerInternals timerInternals, WindowingStrategy<?, W> wind
    * The element is very locally late. The window has been garbage collected, thus there
    * is no target pane E could be assigned to. We discard E.
    *
-   * <li>(Discard - beyond allowed lateness)
-   * <pre>
-   *               |                            |
-   *      [     E  |     ]                      |
-   *               |                            |
-   *             GCWM  <-getAllowedLateness->  IWM
-   * </pre>
-   * The element is very locally late, and the window is very close to being garbage collected, at
-   * which point a final {@code LATE} pane could be emitted. We *could* attempt to capture E within
-   * that pane, however that requires checking against all possible windows which may contain E.
-   * We instead discard E.
-   *
    * <li>(Unobservably late)
    * <pre>
    *          |    |
@@ -226,12 +214,6 @@ private boolean addElementHold(ReduceFn<?, ?, ?, W>.ProcessValueContext context)
     Instant outputWM = timerInternals.currentOutputWatermarkTime();
     Instant inputWM = timerInternals.currentInputWatermarkTime();
 
-    Instant garbageWM =
-        inputWM == null ? null : inputWM.minus(windowingStrategy.getAllowedLateness());
-    Preconditions.checkState(garbageWM == null || !elementHold.isBefore(garbageWM),
-        "Shifted timestamp %s cannot be beyond garbage collection watermark %s", elementHold,
-        garbageWM);
-
     // Only add the hold if we can be sure the backend will be able to respect it.
     boolean tooLate;
     if (outputWM != null && elementHold.isBefore(outputWM)) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
index 4413d0630890a..b789d6bd33613 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
@@ -122,12 +122,9 @@ public class ReduceFnTester<InputT, OutputT, W extends BoundedWindow> {
   }
 
   public static <W extends BoundedWindow, AccumT, OutputT> ReduceFnTester<Integer, OutputT, W>
-      combining(WindowFn<?, W> windowFn, Trigger<W> trigger, AccumulationMode mode,
-          KeyedCombineFn<String, Integer, AccumT, OutputT> combineFn, Coder<OutputT> outputCoder,
-          Duration allowedDataLateness) throws Exception {
-    WindowingStrategy<?, W> strategy =
-        WindowingStrategy.of(windowFn).withTrigger(trigger).withMode(mode).withAllowedLateness(
-            allowedDataLateness);
+      combining(WindowingStrategy<?, W> strategy,
+          KeyedCombineFn<String, Integer, AccumT, OutputT> combineFn,
+          Coder<OutputT> outputCoder) throws Exception {
 
     CoderRegistry registry = new CoderRegistry();
     registry.registerStandardCoders();
@@ -142,6 +139,18 @@ public class ReduceFnTester<InputT, OutputT, W extends BoundedWindow> {
         outputCoder);
   }
 
+  public static <W extends BoundedWindow, AccumT, OutputT> ReduceFnTester<Integer, OutputT, W>
+      combining(WindowFn<?, W> windowFn, Trigger<W> trigger, AccumulationMode mode,
+          KeyedCombineFn<String, Integer, AccumT, OutputT> combineFn, Coder<OutputT> outputCoder,
+          Duration allowedDataLateness) throws Exception {
+
+    WindowingStrategy<?, W> strategy =
+        WindowingStrategy.of(windowFn).withTrigger(trigger).withMode(mode).withAllowedLateness(
+            allowedDataLateness);
+
+    return combining(strategy, combineFn, outputCoder);
+  }
+
   private ReduceFnTester(WindowingStrategy<?, W> wildcardStrategy,
       ReduceFn<String, InputT, OutputT, W> reduceFn, Coder<OutputT> outputCoder) throws Exception {
     @SuppressWarnings("unchecked")
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
index 164ab3f65183a..5e7681064c780 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
@@ -526,33 +526,79 @@ public void testMergeBeforeFinalizing() throws Exception {
         Matchers.equalTo(PaneInfo.createPane(true, true, Timing.EARLY, 0, 0)));
   }
 
+  /**
+   * Tests that when data is assigned to multiple windows but some of those windows have expired,
+   * then the data is dropped and counted accurately.
+   */
   @Test
-  public void testDropDataMultipleWindows() throws Exception {
-    ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(
-        SlidingWindows.of(Duration.millis(100)).every(Duration.millis(30)),
-        AfterWatermark.<IntervalWindow>pastEndOfWindow(), AccumulationMode.ACCUMULATING_FIRED_PANES,
-        new Sum.SumIntegerFn().<String>asKeyedFn(), VarIntCoder.of(), Duration.millis(20));
+  public void testDropDataMultipleWindowsExpiredWindow() throws Exception {
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
+        WindowingStrategy.of(
+            SlidingWindows.of(Duration.millis(100)).every(Duration.millis(30)))
+        .withAllowedLateness(Duration.millis(10)));
 
     tester.injectElements(
-        TimestampedValue.of(10, new Instant(23)), // [-60, 40), [-30, 70), [0, 100)
+        // assigned to [-60, 40), [-30, 70), [0, 100)
+        TimestampedValue.of(10, new Instant(23)),
+        // assigned to [-30, 70), [0, 100), [30, 130)
         TimestampedValue.of(12, new Instant(40)));
-        // [-30, 70), [0, 100), [30, 130)
 
     assertEquals(0, tester.getElementsDroppedDueToLateness());
-    assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
 
     tester.advanceInputWatermark(new Instant(70));
+
+
     tester.injectElements(
-        TimestampedValue.of(14, new Instant(60))); // [-30, 70) = closed, [0, 100), [30, 130)
+        // assigned to [-30, 70), [0, 100), [30, 130)
+        // but [-30, 70) has past but is not is expired
+        TimestampedValue.of(14, new Instant(50)));
 
     assertEquals(0, tester.getElementsDroppedDueToLateness());
-    assertEquals(1, tester.getElementsDroppedDueToClosedWindow());
 
+    tester.advanceInputWatermark(new Instant(110));
+
+    // assigned to [-30, 70), [0, 100), [30, 130)
+    // but the first two are expired
     tester.injectElements(TimestampedValue.of(16, new Instant(40)));
-        // dropped b/c lateness, assigned to 3 windows
 
-    assertEquals(3, tester.getElementsDroppedDueToLateness());
+    assertEquals(2, tester.getElementsDroppedDueToLateness());
+  }
+
+  /**
+   * Tests that when data is assigned to multiple windows but some of those windows have
+   * had their triggers finish, then the data is dropped and counted accurately.
+   */
+  @Test
+  public void testDropDataMultipleWindowsFinishedTrigger() throws Exception {
+    ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(
+        WindowingStrategy.of(
+            SlidingWindows.of(Duration.millis(100)).every(Duration.millis(30)))
+        .withTrigger(AfterWatermark.<IntervalWindow>pastEndOfWindow())
+        .withAllowedLateness(Duration.millis(1000)),
+        new Sum.SumIntegerFn().<String>asKeyedFn(), VarIntCoder.of());
+
+    tester.injectElements(
+        // assigned to [-60, 40), [-30, 70), [0, 100)
+        TimestampedValue.of(10, new Instant(23)),
+        // assigned to [-30, 70), [0, 100), [30, 130)
+        TimestampedValue.of(12, new Instant(40)));
+
+    assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
+
+    tester.advanceInputWatermark(new Instant(70));
+    tester.injectElements(
+        // assigned to [-30, 70), [0, 100), [30, 130)
+        // but [-30, 70) is closed by the trigger
+        TimestampedValue.of(14, new Instant(60)));
+
     assertEquals(1, tester.getElementsDroppedDueToClosedWindow());
+
+    tester.advanceInputWatermark(new Instant(130));
+    // assigned to [-30, 70), [0, 100), [30, 130)
+    // but they are all closed
+    tester.injectElements(TimestampedValue.of(16, new Instant(40)));
+
+    assertEquals(4, tester.getElementsDroppedDueToClosedWindow());
   }
 
   @Test

From 2398d0b52b49efb0b2d9c374afb214c37176ed56 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 14 Dec 2015 20:27:05 -0800
Subject: [PATCH 1231/1541] Do not require .withAllowedLateness for
 GlobalWindows

----Release Notes----
 - When using GlobalWindows it is no longer required to provide
   the .withAllowedLateness parameter - no data is ever dropped.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110221966
---
 .../cloud/dataflow/sdk/transforms/windowing/Window.java    | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index 531392b551b92..dfdc25903d2ef 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -559,9 +559,12 @@ public void validate(PCollection<T> input) {
       // Make sure that the windowing strategy is complete & valid.
       if (outputStrategy.isTriggerSpecified()
           && !(outputStrategy.getTrigger().getSpec() instanceof DefaultTrigger)) {
-        if (!outputStrategy.isAllowedLatenessSpecified()) {
+
+        if (!(outputStrategy.getWindowFn() instanceof GlobalWindows)
+            && !outputStrategy.isAllowedLatenessSpecified()) {
           throw new IllegalArgumentException(
-              "Calling .triggering() to specify a trigger requires that the allowed lateness be"
+              "Except when using GlobalWindows,"
+              + " calling .triggering() to specify a trigger requires that the allowed lateness be"
               + " specified using .withAllowedLateness() to set the upper bound on how late data"
               + " can arrive before being dropped. See Javadoc for more details.");
         }

From d459ecff6b80979eaa4ee9a5bb62bb1a61c25e83 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 14 Dec 2015 20:39:22 -0800
Subject: [PATCH 1232/1541] Remove Trigger.OnElementContext.element()

This method is not useful for well-behaved triggers, and is
unused in the SDK.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110222447
---
 .../dataflow/sdk/transforms/windowing/Trigger.java    |  3 ---
 .../dataflow/sdk/util/TriggerContextFactory.java      | 11 ++---------
 2 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index e6fc3cd54f6ea..f73401f79315f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -266,9 +266,6 @@ public abstract class TriggerContext {
    * Details about an invocation of {@link Trigger#onElement}.
    */
   public abstract class OnElementContext extends TriggerContext {
-    /** The element being handled by this call to {@link Trigger#onElement}. */
-    public abstract Object element();
-
     /** The event timestamp of the element currently being processed. */
     public abstract Instant eventTimestamp();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
index 7627c5c3cf505..64c6de39b36eb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
@@ -66,7 +66,7 @@ public Trigger<W>.OnElementContext create(
       ExecutableTrigger<W> rootTrigger, BitSet finishedSet) {
     return new OnElementContextImpl(
         context.window(), context.timers(), rootTrigger, finishedSet,
-        context.value(), context.timestamp());
+        context.timestamp());
   }
 
   public Trigger<W>.OnTimerContext create(
@@ -310,7 +310,6 @@ private class OnElementContextImpl extends Trigger<W>.OnElementContext {
     private final StateContextImpl<W> state;
     private final Timers timers;
     private final TriggerInfoImpl triggerInfo;
-    private final Object element;
     private final Instant eventTimestamp;
 
     private OnElementContextImpl(
@@ -318,20 +317,14 @@ private OnElementContextImpl(
         Timers timers,
         ExecutableTrigger<W> trigger,
         BitSet finishedSet,
-        Object element,
         Instant eventTimestamp) {
       trigger.getSpec().super();
       this.state = triggerState(window, trigger);
       this.timers = new TriggerTimers(window, timers);
       this.triggerInfo = new TriggerInfoImpl(trigger, finishedSet, this);
-      this.element = element;
       this.eventTimestamp = eventTimestamp;
     }
 
-    @Override
-    public Object element() {
-      return element;
-    }
 
     @Override
     public Instant eventTimestamp() {
@@ -341,7 +334,7 @@ public Instant eventTimestamp() {
     @Override
     public Trigger<W>.OnElementContext forTrigger(ExecutableTrigger<W> trigger) {
       return new OnElementContextImpl(
-          state.window(), timers, trigger, triggerInfo.finishedSet, element, eventTimestamp);
+          state.window(), timers, trigger, triggerInfo.finishedSet, eventTimestamp);
     }
 
     @Override

From 57de437a77961400dd52bd8c6dbd4f5b4b72d931 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 16 Dec 2015 09:26:39 -0800
Subject: [PATCH 1233/1541] Change the assembly of state keys to be more
 efficient

Rather than using multiple calls to construct strings and
compose them into bigger strings using String.format(),
this appends the parts into a StringBuilder.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110367598
---
 .../worker/WindmillStateInternals.java        | 30 +++++++++++++------
 .../sdk/util/state/StateNamespace.java        |  7 +++++
 .../sdk/util/state/StateNamespaceForTest.java |  6 ++++
 .../sdk/util/state/StateNamespaces.java       | 18 +++++++++++
 .../dataflow/sdk/util/state/StateTag.java     |  4 +++
 .../dataflow/sdk/util/state/StateTags.java    |  9 ++++++
 6 files changed, 65 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
index 63a0256e072da..91dc2236c5622 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
@@ -107,7 +107,8 @@ public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> cod
 
   @VisibleForTesting
   static final ThreadLocal<Supplier<Boolean>> COMPACT_NOW =
-      new ThreadLocal() {
+      new ThreadLocal<Supplier<Boolean>>() {
+        @Override
         public Supplier<Boolean> initialValue() {
           return new Supplier<Boolean>() {
             /* The rate at which, on average, this will return true. */
@@ -121,6 +122,7 @@ private long nextSample() {
               return (long) Math.floor(Math.log(random.nextDouble()) / Math.log(1 - RATE));
             }
 
+            @Override
             public Boolean get() {
               counter--;
               if (counter < 0) {
@@ -182,18 +184,28 @@ public void persist(final Windmill.WorkItemCommitRequest.Builder commitBuilder)
     }
   }
 
-  private ByteString encodeKey(StateNamespace namespace, StateTag<?> address) {
-    if (useStateFamilies) {
-      // We don't use prefix here, since it's being set as the stateFamily.
-      return ByteString.copyFromUtf8(
-          String.format("%s+%s", namespace.stringKey(), address.getId()));
-    } else {
+  @VisibleForTesting ByteString encodeKey(StateNamespace namespace, StateTag<?> address) {
+    try {
+      // Use a StringBuilder rather than concatenation and String.format. We build these keys
+      // a lot, and this leads to better performance results. See associated benchmarks.
+      StringBuilder output = new StringBuilder();
+
+      // We only need the prefix if we aren't using state families
+      if (!useStateFamilies) {
+        output.append(prefix);
+      }
+
       // stringKey starts and ends with a slash. We don't need to seperate it from prefix, because
       // the prefix is guaranteed to be unique and non-overlapping. We separate it from the
       // StateTag ID by a '+' (which is guaranteed not to be in the stringKey) because the
       // ID comes from the user.
-      return ByteString.copyFromUtf8(String.format(
-          "%s%s+%s", prefix, namespace.stringKey(), address.getId()));
+      namespace.appendTo(output);
+      output.append('+');
+      address.appendTo(output);
+      return ByteString.copyFromUtf8(output.toString());
+    } catch (IOException e) {
+      throw new RuntimeException(
+          "Unable to encode state key for " + namespace + ", " + address, e);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
index 423d30e2f4caf..4a0364f1db88e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
@@ -15,6 +15,8 @@
  */
 package com.google.cloud.dataflow.sdk.util.state;
 
+import java.io.IOException;
+
 /**
  * A namespace used for scoping state stored with {@link StateInternals}.
  *
@@ -35,4 +37,9 @@ public interface StateNamespace {
    * the two.
    */
   String stringKey();
+
+  /**
+   * Append the string representation of this key to the {@link Appendable}.
+   */
+  void appendTo(Appendable sb) throws IOException;
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java
index 000244c7aeadb..c11668fd62aef 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java
@@ -15,6 +15,7 @@
  */
 package com.google.cloud.dataflow.sdk.util.state;
 
+import java.io.IOException;
 import java.util.Objects;
 
 /**
@@ -49,4 +50,9 @@ public boolean equals(Object obj) {
   public int hashCode() {
     return key.hashCode();
   }
+
+  @Override
+  public void appendTo(Appendable sb) throws IOException {
+    sb.append(key);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
index 938cf12c87d63..22115847a3f65 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.common.base.Splitter;
 
+import java.io.IOException;
 import java.util.List;
 import java.util.Objects;
 
@@ -76,6 +77,11 @@ public int hashCode() {
     public String toString() {
       return "Global";
     }
+
+    @Override
+    public void appendTo(Appendable sb) throws IOException {
+      sb.append(GLOBAL_STRING);
+    }
   }
 
   /**
@@ -106,6 +112,11 @@ public String stringKey() {
       }
     }
 
+    @Override
+    public void appendTo(Appendable sb) throws IOException {
+      sb.append('/').append(CoderUtils.encodeToBase64(windowCoder, window)).append('/');
+    }
+
     @Override
     public boolean equals(Object obj) {
       if (obj == this) {
@@ -171,6 +182,13 @@ public String stringKey() {
       }
     }
 
+    @Override
+    public void appendTo(Appendable sb) throws IOException {
+      sb.append('/').append(CoderUtils.encodeToBase64(windowCoder, window));
+      sb.append('/').append(Integer.toString(triggerIndex, TRIGGER_RADIX).toUpperCase());
+      sb.append('/');
+    }
+
     @Override
     public boolean equals(Object obj) {
       if (obj == this) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
index 2419bb8ed6007..cba405d81e641 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
 
+import java.io.IOException;
 import java.io.Serializable;
 
 /**
@@ -63,6 +64,9 @@ <T, W extends BoundedWindow> WatermarkStateInternal bindWatermark(
         OutputTimeFn<? super W> outputTimeFn);
   }
 
+  /** Append the UTF-8 encoding of this tag to the given {@link Appendable}. */
+  void appendTo(Appendable sb) throws IOException;
+
   /**
    * Returns the identifier for this state cell.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
index 4c6af1ae32d18..f6f0c84e7dc41 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
 import com.google.common.base.MoreObjects;
 
+import java.io.IOException;
 import java.io.Serializable;
 import java.util.Objects;
 
@@ -153,6 +154,10 @@ public String getIdString() {
       return kind.prefix + rawId;
     }
 
+    public void appendTo(Appendable sb) throws IOException {
+      sb.append(kind.prefix).append(rawId);
+    }
+
     @Override
     public String toString() {
       return MoreObjects.toStringHelper(getClass())
@@ -206,6 +211,10 @@ public String toString() {
     }
 
     protected abstract StateTag<StateT> asKind(StateKind kind);
+
+    public void appendTo(Appendable sb) throws IOException {
+      id.appendTo(sb);
+    }
   }
 
   /**

From 1f072e803be715dc54f143cfb2f8cf536f9d9131 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 17 Dec 2015 16:12:03 -0800
Subject: [PATCH 1234/1541] Improve javadoc for Pipeline, PipelineResult

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110499179
---
 .../google/cloud/dataflow/sdk/Pipeline.java   | 113 ++++++++++--------
 .../cloud/dataflow/sdk/PipelineResult.java    |  25 ++--
 2 files changed, 75 insertions(+), 63 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
index f2e7d72804c7b..b166673e6e9c7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
@@ -17,15 +17,18 @@
 package com.google.cloud.dataflow.sdk;
 
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.TransformHierarchy;
 import com.google.cloud.dataflow.sdk.runners.TransformTreeNode;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.UserCodeException;
 import com.google.cloud.dataflow.sdk.values.PBegin;
+import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.cloud.dataflow.sdk.values.POutput;
 import com.google.cloud.dataflow.sdk.values.PValue;
@@ -44,27 +47,26 @@
 import java.util.Set;
 
 /**
- * A {@code Pipeline} manages a DAG of {@link PTransform}s, and the
- * {@link com.google.cloud.dataflow.sdk.values.PCollection}s
- * that the {@link PTransform}s consume and produce.
+ * A {@link Pipeline} manages a directed acyclic graph of {@link PTransform PTransforms}, and the
+ * {@link PCollection PCollections} that the {@link PTransform}s consume and produce.
  *
- * <p>After a {@code Pipeline} has been constructed, it can be executed,
- * using a default or an explicit {@link PipelineRunner}.
+ * <p>A {@link Pipeline} is initialized with a {@link PipelineRunner} that will later
+ * execute the {@link Pipeline}.
  *
- * <p>Multiple {@code Pipeline}s can be constructed and executed independently
- * and concurrently.
+ * <p>{@link Pipeline Pipelines} are independent, so they can be constructed and executed
+ * concurrently.
  *
- * <p>Each {@code Pipeline} is self-contained and isolated from any other
- * {@code Pipeline}.  The {@link PValue PValues} that are inputs and outputs of each of a
- * {@code Pipeline}'s {@link PTransform PTransforms} are also owned by that {@code Pipeline}.
- * A {@code PValue} owned by one {@code Pipeline} can be read only by {@code PTransform}s
- * also owned by that {@code Pipeline}.
+ * <p>Each {@link Pipeline} is self-contained and isolated from any other
+ * {@link Pipeline}. The {@link PValue PValues} that are inputs and outputs of each of a
+ * {@link Pipeline Pipeline's} {@link PTransform PTransforms} are also owned by that
+ * {@link Pipeline}. A {@link PValue} owned by one {@link Pipeline} can be read only by
+ * {@link PTransform PTransforms} also owned by that {@link Pipeline}.
  *
- * <p>Here's a typical example of use:
+ * <p>Here is a typical example of use:
  * <pre> {@code
  * // Start by defining the options for the pipeline.
  * PipelineOptions options = PipelineOptionsFactory.create();
- * // Then create the pipeline.
+ * // Then create the pipeline. The runner is determined by the options.
  * Pipeline p = Pipeline.create(options);
  *
  * // A root PTransform, like TextIO.Read or Create, gets added
@@ -106,13 +108,14 @@ public class Pipeline {
   private static final Logger LOG = LoggerFactory.getLogger(Pipeline.class);
 
   /**
-   * Thrown during pipeline execution, whenever user code within a pipeline throws an exception.
+   * Thrown during execution of a {@link Pipeline}, whenever user code within that
+   * {@link Pipeline} throws an exception.
    *
-   * <p>The exception thrown during pipeline execution may be retrieved via {@link #getCause}.
+   * <p>The original exception thrown by user code may be retrieved via {@link #getCause}.
    */
   public static class PipelineExecutionException extends RuntimeException {
     /**
-     * Wraps {@code cause} into a {@code PipelineExecutionException}.
+     * Wraps {@code cause} into a {@link PipelineExecutionException}.
      */
     public PipelineExecutionException(Throwable cause) {
       super(cause);
@@ -135,16 +138,18 @@ public static Pipeline create(PipelineOptions options) {
 
   /**
    * Returns a {@link PBegin} owned by this Pipeline.  This is useful
-   * as the input of a root PTransform such as {@code TextIO.Read} or
-   * {@link com.google.cloud.dataflow.sdk.transforms.Create}.
+   * as the input of a root PTransform such as {@link Read} or
+   * {@link Create}.
    */
   public PBegin begin() {
     return PBegin.in(this);
   }
 
   /**
-   * Like {@link #apply(String, PTransform)} but defaulting to the name
-   * of the {@code PTransform}.
+   * Like {@link #apply(String, PTransform)} but the transform node in the {@link Pipeline}
+   * graph will be named according to {@link PTransform#getName}.
+   *
+   * @see #apply(String, PTransform)
    */
   public <OutputT extends POutput> OutputT apply(
       PTransform<? super PBegin, OutputT> root) {
@@ -152,11 +157,12 @@ public <OutputT extends POutput> OutputT apply(
   }
 
   /**
-   * Starts using this pipeline with a root {@code PTransform} such as
-   * {@code TextIO.READ} or {@link com.google.cloud.dataflow.sdk.transforms.Create}.
-   * This specific call to {@code apply} is identified by the provided {@code name}.
+   * Adds a root {@link PTransform}, such as {@link Read} or {@link Create},
+   * to this {@link Pipeline}.
+   *
+   * <p>The node in the {@link Pipeline} graph will use the provided {@code name}.
    * This name is used in various places, including the monitoring UI, logging,
-   * and to stably identify this application node in the job graph.
+   * and to stably identify this node in the {@link Pipeline} graph upon update.
    *
    * <p>Alias for {@code begin().apply(name, root)}.
    */
@@ -166,7 +172,7 @@ public <OutputT extends POutput> OutputT apply(
   }
 
   /**
-   * Runs the Pipeline.
+   * Runs the {@link Pipeline} using its {@link PipelineRunner}.
    */
   public PipelineResult run() {
     LOG.debug("Running {} via {}", this, runner);
@@ -186,7 +192,7 @@ public PipelineResult run() {
   // Below here are operations that aren't normally called by users.
 
   /**
-   * Returns the {@link CoderRegistry} that this Pipeline uses.
+   * Returns the {@link CoderRegistry} that this {@link Pipeline} uses.
    */
   public CoderRegistry getCoderRegistry() {
     if (coderRegistry == null) {
@@ -197,7 +203,7 @@ public CoderRegistry getCoderRegistry() {
   }
 
   /**
-   * Sets the {@link CoderRegistry} that this Pipeline uses.
+   * Sets the {@link CoderRegistry} that this {@link Pipeline} uses.
    */
   public void setCoderRegistry(CoderRegistry coderRegistry) {
     this.coderRegistry = coderRegistry;
@@ -206,17 +212,17 @@ public void setCoderRegistry(CoderRegistry coderRegistry) {
   /**
    * A {@link PipelineVisitor} can be passed into
    * {@link Pipeline#traverseTopologically} to be called for each of the
-   * transforms and values in the Pipeline.
+   * transforms and values in the {@link Pipeline}.
    */
   public interface PipelineVisitor {
     /**
      * Called for each composite transform after all topological predecessors have been visited
-     * but before any of the component transforms.
+     * but before any of its component transforms.
      */
     public void enterCompositeTransform(TransformTreeNode node);
 
     /**
-     * Called for each composite transform after all of its component transforms and their ouputs
+     * Called for each composite transform after all of its component transforms and their outputs
      * have been visited.
      */
     public void leaveCompositeTransform(TransformTreeNode node);
@@ -235,14 +241,15 @@ public interface PipelineVisitor {
   }
 
   /**
-   * Invokes the PipelineVisitor's
+   * Invokes the {@link PipelineVisitor PipelineVisitor's}
    * {@link PipelineVisitor#visitTransform} and
    * {@link PipelineVisitor#visitValue} operations on each of this
-   * Pipeline's PTransforms and PValues, in forward
+   * {@link Pipeline Pipeline's} transform and value nodes, in forward
    * topological order.
    *
-   * <p>Traversal of the pipeline causes PTransform and PValue instances to
-   * be marked as finished, at which point they may no longer be modified.
+   * <p>Traversal of the {@link Pipeline} causes {@link PTransform PTransforms} and
+   * {@link PValue PValues} owned by the {@link Pipeline} to be marked as finished,
+   * at which point they may no longer be modified.
    *
    * <p>Typically invoked by {@link PipelineRunner} subclasses.
    */
@@ -271,9 +278,11 @@ OutputT applyTransform(InputT input,
    * Applies the given {@code PTransform} to this input {@code InputT} and returns
    * its {@code OutputT}. This uses {@code name} to identify this specific application
    * of the transform. This name is used in various places, including the monitoring UI,
-   * logging, and to stably identify this application node in the job graph.
+   * logging, and to stably identify this application node in the {@link Pipeline} graph during
+   * update.
    *
-   * <p>Called by {@link PInput} subclasses in their {@code apply} methods.
+   * <p>Each {@link PInput} subclass that provides an {@code apply} method should delegate to
+   * this method to ensure proper registration with the {@link PipelineRunner}.
    */
   public static <InputT extends PInput, OutputT extends POutput>
   OutputT applyTransform(String name, InputT input,
@@ -312,7 +321,7 @@ public String toString() {
   }
 
   /**
-   * Applies a transformation to the given input.
+   * Applies a {@link PTransform} to the given {@link PInput}.
    *
    * @see Pipeline#apply
    */
@@ -386,16 +395,17 @@ OutputT applyInternal(String name, InputT input,
   }
 
   /**
-   * Verifies that the output of a PTransform is correctly defined.
+   * Verifies that the output of a {@link PTransform} is correctly configured in its
+   * {@link TransformTreeNode} in the {@link Pipeline} graph.
    *
-   * <p>A non-composite transform must have all
-   * of its outputs registered as produced by the transform.
+   * <p>A non-composite {@link PTransform} must have all
+   * of its outputs registered as produced by that {@link PTransform}.
    *
-   * <p>A composite transform must have all of its outputs
-   * registered as produced by the contained primitive transforms.
+   * <p>A composite {@link PTransform} must have all of its outputs
+   * registered as produced by the contained primitive {@link PTransform PTransforms}.
    * They have each had the above check performed already, when
    * they were applied, so the only possible failure state is
-   * that the composite transform has returned a primitive output.
+   * that the composite {@link PTransform} has returned a primitive output.
    */
   private void verifyOutputState(POutput output, TransformTreeNode node) {
     if (!node.isCompositeNode()) {
@@ -427,24 +437,23 @@ private void verifyOutputState(POutput output, TransformTreeNode node) {
   }
 
   /**
-   * Returns the configured pipeline runner.
+   * Returns the configured {@link PipelineRunner}.
    */
   public PipelineRunner<?> getRunner() {
     return runner;
   }
 
   /**
-   * Returns the configured pipeline options.
+   * Returns the configured {@link PipelineOptions}.
    */
   public PipelineOptions getOptions() {
     return options;
   }
 
   /**
-   * Returns the fully qualified name of a transform for testing.
-   *
-   * @throws IllegalStateException if the transform has not been applied to the pipeline
-   * or was applied multiple times.
+   * @deprecated this method is no longer compatible with the design of {@link Pipeline},
+   * as {@link PTransform PTransforms} can be applied multiple times, with different names
+   * each time.
    */
   @Deprecated
   public String getFullNameForTesting(PTransform<?, ?> transform) {
@@ -475,14 +484,14 @@ private String uniquifyInternal(String namePrefix, String origName) {
   }
 
   /**
-   * Builds a name from a /-delimited prefix and a name.
+   * Builds a name from a "/"-delimited prefix and a name.
    */
   private String buildName(String namePrefix, String name) {
     return namePrefix.isEmpty() ? name : namePrefix + "/" + name;
   }
 
   /**
-   * Adds the given PValue to this Pipeline.
+   * Adds the given {@link PValue} to this {@link Pipeline}.
    *
    * <p>For internal use only.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java
index fece71b8486d0..6b9a36b728e63 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java
@@ -21,7 +21,7 @@
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 
 /**
- * Result of {@link com.google.cloud.dataflow.sdk.Pipeline#run()}.
+ * Result of {@link Pipeline#run()}.
  */
 public interface PipelineResult {
 
@@ -35,9 +35,10 @@ public interface PipelineResult {
   /**
    * Retrieves the current value of the provided {@link Aggregator}.
    *
-   * @param aggregator the Aggregator to retrieve values for
-   * @return the current values of the aggregator, which may be empty if there are no values yet
-   * @throws AggregatorRetrievalException if the aggregator values could not be retrieved
+   * @param aggregator the {@link Aggregator} to retrieve values for.
+   * @return the current values of the {@link Aggregator},
+   * which may be empty if there are no values yet.
+   * @throws AggregatorRetrievalException if the {@link Aggregator} values could not be retrieved.
    */
   <T> AggregatorValues<T> getAggregatorValues(Aggregator<?, T> aggregator)
       throws AggregatorRetrievalException;
@@ -46,18 +47,25 @@ <T> AggregatorValues<T> getAggregatorValues(Aggregator<?, T> aggregator)
 
   /** Named constants for common values for the job state. */
   public enum State {
+
     /** The job state could not be obtained or was not specified. */
     UNKNOWN(false, false),
+
     /** The job has been paused, or has not yet started. */
     STOPPED(false, false),
+
     /** The job is currently running. */
     RUNNING(false, false),
+
     /** The job has successfully completed. */
     DONE(true, false),
+
     /** The job has failed. */
     FAILED(true, false),
+
     /** The job has been explicitly cancelled. */
     CANCELLED(true, false),
+
     /** The job has been updated. */
     UPDATED(true, true);
 
@@ -71,22 +79,17 @@ private State(boolean terminal, boolean hasReplacement) {
     }
 
     /**
-     * Returns if the job state can no longer complete work.
-     *
-     * @return if this JobState represents a terminal state.
+     * @return {@code true} if the job state can no longer complete work.
      */
     public final boolean isTerminal() {
       return terminal;
     }
 
     /**
-     * Returns {@code true} if this job state indicates that a replacement job exists.
+     * @return {@code true} if this job state indicates that a replacement job exists.
      */
     public final boolean hasReplacementJob() {
       return hasReplacement;
     }
-
   }
-
-
 }

From afd9c2697c53ec581a15a66556b46511a9f2206e Mon Sep 17 00:00:00 2001
From: Kris Hildrum <hildrum@google.com>
Date: Mon, 21 Dec 2015 14:52:58 -0800
Subject: [PATCH 1235/1541] fix typo

---
 .../java/com/google/cloud/dataflow/sdk/transforms/ParDo.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 965d3cd77a26a..6a043505c9e32 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -149,7 +149,7 @@
  * final PCollectionView<Integer> maxWordLengthCutOffView =
  *     maxWordLengthCutOff.apply(View.<Integer>asSingleton());
  * PCollection<String> wordsBelowCutOff =
- *     words.apply(ParDo.withSideInput(maxWordLengthCutOffView)
+ *     words.apply(ParDo.withSideInputs(maxWordLengthCutOffView)
  *                      .of(new DoFn<String, String>() {
  *         public void processElement(ProcessContext c) {
  *           String word = c.element();

From 1ed2bffbecd3d640698fe33615fe9cd293cd8a90 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 18 Dec 2015 10:52:19 -0800
Subject: [PATCH 1236/1541] Remove jetty-jmx dependency

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110556252
---
 sdk/pom.xml | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 376f84a993034..bea0e986d11aa 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -616,12 +616,6 @@
       <version>9.2.10.v20150310</version>
     </dependency>
 
-    <dependency>
-      <groupId>org.eclipse.jetty</groupId>
-      <artifactId>jetty-jmx</artifactId>
-      <version>9.2.10.v20150310</version>
-    </dependency>
-
     <dependency>
       <groupId>javax.servlet</groupId>
       <artifactId>javax.servlet-api</artifactId>

From 0d0894a35247af19ec1bc6d8d8b2fcc86087ed8d Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Fri, 18 Dec 2015 11:24:59 -0800
Subject: [PATCH 1237/1541] Small javadoc fixes for DoFn and friends

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110559193
---
 .../cloud/dataflow/sdk/transforms/DoFn.java   | 42 +++++++++----------
 .../sdk/transforms/DoFnWithContext.java       |  6 +--
 2 files changed, 21 insertions(+), 27 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 1cb1f9d094f81..f299954b11bd0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -62,15 +62,18 @@
  * mechanism for accessing {@link ProcessContext#window()} without the need
  * to implement {@link RequiresWindowAccess}.
  *
+ * <p>See also {@link #processElement} for details on implementing the transformation
+ * from {@code InputT} to {@code OutputT}.
+ *
  * @param <InputT> the type of the (main) input elements
  * @param <OutputT> the type of the (main) output elements
- *
- * @see #processElement for details on implementing the transformation
- * from {@code InputT} to {@code OutputT}.
  */
 public abstract class DoFn<InputT, OutputT> implements Serializable {
 
-  /** Information accessible to all methods in this {@code DoFn}. */
+  /**
+   * Information accessible to all methods in this {@code DoFn}.
+   * Used primarily to output elements.
+   */
   public abstract class Context {
 
     /**
@@ -89,11 +92,11 @@ public abstract class Context {
      * by the Dataflow runtime or later steps in the pipeline, or used in
      * other unspecified ways.
      *
-     * <p>If invoked from {@link DoFn#processElement}, the output
+     * <p>If invoked from {@link DoFn#processElement processElement}, the output
      * element will have the same timestamp and be in the same windows
-     * as the input element passed to {@link DoFn#processElement}).
+     * as the input element passed to {@link DoFn#processElement processElement}.
      *
-     * <p>If invoked from {@link #startBundle} or {@link #finishBundle},
+     * <p>If invoked from {@link #startBundle startBundle} or {@link #finishBundle finishBundle},
      * this will attempt to use the
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
      * of the input {@code PCollection} to determine what windows the element
@@ -110,12 +113,12 @@ public abstract class Context {
      * <p>Once passed to {@code outputWithTimestamp} the element should not be
      * modified in any way.
      *
-     * <p>If invoked from {@link DoFn#processElement}), the timestamp
+     * <p>If invoked from {@link DoFn#processElement processElement}, the timestamp
      * must not be older than the input element's timestamp minus
-     * {@link DoFn#getAllowedTimestampSkew}.  The output element will
+     * {@link DoFn#getAllowedTimestampSkew getAllowedTimestampSkew}.  The output element will
      * be in the same windows as the input element.
      *
-     * <p>If invoked from {@link #startBundle} or {@link #finishBundle},
+     * <p>If invoked from {@link #startBundle startBundle} or {@link #finishBundle finishBundle},
      * this will attempt to use the
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
      * of the input {@code PCollection} to determine what windows the element
@@ -132,15 +135,15 @@ public abstract class Context {
      * <p>Once passed to {@code sideOutput} the element should not be modified
      * in any way.
      *
-     * <p>The caller of {@code ParDo} uses {@link ParDo#withOutputTags} to
+     * <p>The caller of {@code ParDo} uses {@link ParDo#withOutputTags withOutputTags} to
      * specify the tags of side outputs that it consumes. Non-consumed side
      * outputs, e.g., outputs for monitoring purposes only, don't necessarily
      * need to be specified.
      *
      * <p>The output element will have the same timestamp and be in the same
-     * windows as the input element passed to {@link DoFn#processElement}).
+     * windows as the input element passed to {@link DoFn#processElement processElement}.
      *
-     * <p>If invoked from {@link #startBundle} or {@link #finishBundle},
+     * <p>If invoked from {@link #startBundle startBundle} or {@link #finishBundle finishBundle},
      * this will attempt to use the
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
      * of the input {@code PCollection} to determine what windows the element
@@ -148,8 +151,6 @@ public abstract class Context {
      * to access any information about the input element. The output element
      * will have a timestamp of negative infinity.
      *
-     * @throws IllegalArgumentException if the number of outputs exceeds
-     * the limit of 1,000 outputs per DoFn
      * @see ParDo#withOutputTags
      */
     public abstract <T> void sideOutput(TupleTag<T> tag, T output);
@@ -161,12 +162,12 @@ public abstract class Context {
      * <p>Once passed to {@code sideOutputWithTimestamp} the element should not be
      * modified in any way.
      *
-     * <p>If invoked from {@link DoFn#processElement}), the timestamp
+     * <p>If invoked from {@link DoFn#processElement processElement}, the timestamp
      * must not be older than the input element's timestamp minus
-     * {@link DoFn#getAllowedTimestampSkew}.  The output element will
+     * {@link DoFn#getAllowedTimestampSkew getAllowedTimestampSkew}.  The output element will
      * be in the same windows as the input element.
      *
-     * <p>If invoked from {@link #startBundle} or {@link #finishBundle},
+     * <p>If invoked from {@link #startBundle startBundle} or {@link #finishBundle finishBundle},
      * this will attempt to use the
      * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
      * of the input {@code PCollection} to determine what windows the element
@@ -174,8 +175,6 @@ public abstract class Context {
      * to access any information about the input element except for the
      * timestamp.
      *
-     * @throws IllegalArgumentException if the number of outputs exceeds
-     * the limit of 1,000 outputs per DoFn
      * @see ParDo#withOutputTags
      */
     public abstract <T> void sideOutputWithTimestamp(
@@ -344,8 +343,7 @@ public void startBundle(Context c) throws Exception {
   public abstract void processElement(ProcessContext c) throws Exception;
 
   /**
-   * Finishes processing this batch of elements.  This {@code DoFn}
-   * instance will be thrown away after this operation returns.
+   * Finishes processing this batch of elements.
    *
    * <p>By default, does nothing.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
index 36f2c43b86ffd..f2f5ba7743e27 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
@@ -70,7 +70,7 @@
  * PCollection<String> lines = ... ;
  * PCollection<String> words =
  *     lines.apply(ParDo.of(new DoFnWithContext<String, String>() {
- *         {@literal @}ProcessElement
+ *         @ProcessElement
  *         public void processElement(ProcessContext c, BoundedWindow window) {
  *
  *         }}));
@@ -159,8 +159,6 @@ public abstract class Context {
      * to access any information about the input element. The output element
      * will have a timestamp of negative infinity.
      *
-     * @throws IllegalArgumentException if the number of outputs exceeds
-     * the limit of 1,000 outputs per DoFn
      * @see ParDo#withOutputTags
      */
     public abstract <T> void sideOutput(TupleTag<T> tag, T output);
@@ -185,8 +183,6 @@ public abstract class Context {
      * to access any information about the input element except for the
      * timestamp.
      *
-     * @throws IllegalArgumentException if the number of outputs exceeds
-     * the limit of 1,000 outputs per DoFn
      * @see ParDo#withOutputTags
      */
     public abstract <T> void sideOutputWithTimestamp(

From cfd1c672223a8e13e3f62075b8c5a946fd633202 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 18 Dec 2015 11:31:58 -0800
Subject: [PATCH 1238/1541] Add Proto2Coder as universal fallback

----Release Notes----
 - Any classes that are protocol buffers (v2) Message subclasses
   will have their coders automatically provided.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110559835
---
 .../dataflow/sdk/coders/CoderRegistry.java    |  3 +-
 .../dataflow/sdk/coders/Proto2Coder.java      | 34 ++++++++++++++++++-
 .../sdk/coders/CoderRegistryTest.java         |  8 +++++
 .../dataflow/sdk/coders/Proto2CoderTest.java  | 26 ++++++++++++--
 4 files changed, 66 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 082344c113a9d..82c3a4980e140 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -80,7 +80,8 @@ public class CoderRegistry implements CoderProvider {
   private static final Logger LOG = LoggerFactory.getLogger(CoderRegistry.class);
 
   public CoderRegistry() {
-    setFallbackCoderProvider(SerializableCoder.PROVIDER);
+    setFallbackCoderProvider(CoderProviders.firstOf(Proto2Coder.coderProvider(),
+        SerializableCoder.PROVIDER));
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
index 9305fbeb4ae2a..a012cc36cff22 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
@@ -17,6 +17,7 @@
 
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.Structs;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
@@ -87,11 +88,42 @@ private Proto2Coder(Class<T> protoMessageClass, List<Class<?>> extensionHostClas
     this.extensionHostClasses = extensionHostClasses;
   }
 
+  private static final CoderProvider PROVIDER = new CoderProvider() {
+    @Override
+    public <T> Coder<T> getCoder(TypeDescriptor<T> type) throws CannotProvideCoderException {
+      if (type.isSubtypeOf(new TypeDescriptor<Message>() {})) {
+        @SuppressWarnings("unchecked")
+        TypeDescriptor<? extends Message> messageType = (TypeDescriptor<? extends Message>) type;
+        @SuppressWarnings("unchecked")
+        Coder<T> coder = (Coder<T>) Proto2Coder.of(messageType);
+        return coder;
+      } else {
+        throw new CannotProvideCoderException(
+            String.format("Cannot provide Proto2Coder because %s "
+                + "is not a subclass of protocol buffer Messsage",
+                type));
+      }
+    }
+  };
+
+  public static CoderProvider coderProvider() {
+    return PROVIDER;
+  }
+
   /**
    * Returns a {@code Proto2Coder} for the given Protobuf message class.
    */
   public static <T extends Message> Proto2Coder<T> of(Class<T> protoMessageClass) {
-    return new Proto2Coder<>(protoMessageClass, Collections.<Class<?>>emptyList());
+    return new Proto2Coder<T>(protoMessageClass, Collections.<Class<?>>emptyList());
+  }
+
+  /**
+   * Returns a {@code Proto2Coder} for the given Protobuf message class.
+   */
+  public static <T extends Message> Proto2Coder<T> of(TypeDescriptor<T> protoMessageType) {
+    @SuppressWarnings("unchecked")
+    Class<T> protoMessageClass = (Class<T>) protoMessageType.getRawType();
+    return of(protoMessageClass);
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index 8ad44117721e3..7fd0d22ea5602 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -81,6 +81,14 @@ public void testSerializableFallbackCoderProvider() throws Exception {
     assertEquals(serializableCoder, SerializableCoder.of(SerializableClass.class));
   }
 
+  @Test
+  public void testProto2CoderFallbackCoderProvider() throws Exception {
+    CoderRegistry registry = getStandardRegistry();
+    Coder<Proto2CoderTestMessages.MessageA> coder =
+        registry.getDefaultCoder(Proto2CoderTestMessages.MessageA.class);
+    assertEquals(coder, Proto2Coder.of(new TypeDescriptor<Proto2CoderTestMessages.MessageA>() {}));
+  }
+
   @Test
   public void testAvroFallbackCoderProvider() throws Exception {
     CoderRegistry registry = getStandardRegistry();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
index f845e598aa48c..f4c355715a470 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
@@ -16,11 +16,14 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import static org.junit.Assert.assertEquals;
+
 import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageA;
 import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageB;
 import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageC;
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 import com.google.common.collect.ImmutableList;
 
 import org.junit.Rule;
@@ -35,6 +38,26 @@
 @RunWith(JUnit4.class)
 public class Proto2CoderTest {
 
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void testFactoryMethodAgreement() throws Exception {
+    assertEquals(
+        Proto2Coder.of(new TypeDescriptor<MessageA>() {}),
+        Proto2Coder.of(MessageA.class));
+
+    assertEquals(
+        Proto2Coder.of(new TypeDescriptor<MessageA>() {}),
+        Proto2Coder.coderProvider().getCoder(new TypeDescriptor<MessageA>() {}));
+  }
+
+  @Test
+  public void testProviderCannotProvideCoder() throws Exception {
+    thrown.expect(CannotProvideCoderException.class);
+    Proto2Coder.coderProvider().getCoder(new TypeDescriptor<Integer>() {});
+  }
+
   @Test
   public void testCoderEncodeDecodeEqual() throws Exception {
     MessageA value = MessageA.newBuilder()
@@ -111,9 +134,6 @@ public void testEncodingId() throws Exception {
     CoderProperties.coderHasEncodingId(coder, MessageC.class.getName());
   }
 
-  @Rule
-  public ExpectedException thrown = ExpectedException.none();
-
   @Test
   public void encodeNullThrowsCoderException() throws Exception {
     thrown.expect(CoderException.class);

From af0367ca583a4ec85740218299a21774a0afa3ee Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 18 Dec 2015 11:37:55 -0800
Subject: [PATCH 1239/1541] Add dependency on jetty-servlet

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110560293
---
 sdk/pom.xml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index bea0e986d11aa..5f6d714a8e159 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -616,6 +616,12 @@
       <version>9.2.10.v20150310</version>
     </dependency>
 
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-servlet</artifactId>
+      <version>9.2.10.v20150310</version>
+    </dependency>
+
     <dependency>
       <groupId>javax.servlet</groupId>
       <artifactId>javax.servlet-api</artifactId>

From 4057defd085e8f4f9110e44a86dfb297ab3704a2 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Fri, 18 Dec 2015 17:29:38 -0800
Subject: [PATCH 1240/1541] CountingSource: bounded/unbounded source of longs

These sources are useful for testing and for data generation.
----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110585645
---
 .../cloud/dataflow/sdk/io/CountingSource.java | 386 ++++++++++++++++++
 .../dataflow/sdk/io/CountingSourceTest.java   | 216 ++++++++++
 2 files changed, 602 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CountingSource.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CountingSourceTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CountingSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CountingSource.java
new file mode 100644
index 0000000000000..2938534168abd
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CountingSource.java
@@ -0,0 +1,386 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
+import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
+import com.google.cloud.dataflow.sdk.io.UnboundedSource.UnboundedReader;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.collect.ImmutableList;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * A source that produces longs. When used as a {@link BoundedSource}, {@link CountingSource}
+ * starts at {@code 0} and counts up to a specified maximum. When used as an
+ * {@link UnboundedSource}, it counts up to {@link Long#MAX_VALUE} and then never produces more
+ * output. (In practice, this limit should never be reached.)
+ *
+ * <p>The bounded {@link CountingSource} is implemented based on {@link OffsetBasedSource} and
+ * {@link OffsetBasedSource.OffsetBasedReader}, so it performs efficient initial splitting and it
+ * supports dynamic work rebalancing.
+ *
+ * <p>To produce a bounded {@code PCollection<Long>}, use {@link CountingSource#upTo(long)}:
+ *
+ * <pre>{@code
+ * Pipeline p = ...
+ * BoundedSource<Long> source = CountingSource.upTo(1000);
+ * PCollection<Long> bounded = p.apply(Read.from(source));
+ * }</pre>
+ *
+ * <p>To produce an unbounded {@code PCollection<Long>}, use {@link CountingSource#unbounded} or
+ * {@link CountingSource#unboundedWithTimestampFn}:
+ *
+ * <pre>{@code
+ * Pipeline p = ...
+ *
+ * // To create an unbounded source that uses processing time as the element timestamp.
+ * UnboundedSource<Long, CounterMark> source = CountingSource.unbounded();
+ * // Or, to create an unbounded source that uses a provided function to set the element timestamp.
+ * UnboundedSource<Long, CounterMark> source = CountingSource.unboundedWithTimestampFn(someFn);
+ *
+ * PCollection<Long> unbounded = p.apply(Read.from(source));
+ * }</pre>
+ */
+public class CountingSource {
+  /**
+   * Creates a {@link BoundedSource} that will produce the specified number of elements,
+   * from {@code 0} to {@code numElements - 1}.
+   */
+  public static BoundedSource<Long> upTo(long numElements) {
+    checkArgument(numElements > 0, "numElements (%s) must be greater than 0", numElements);
+    return new BoundedCountingSource(0, numElements);
+  }
+
+  /**
+   * Creates an {@link UnboundedSource} that will produce numbers starting from {@code 0} up to
+   * {@link Long#MAX_VALUE}.
+   *
+   * <p>After {@link Long#MAX_VALUE}, the source never produces more output. (In practice, this
+   * limit should never be reached.)
+   *
+   * <p>Elements in the resulting {@link PCollection PCollection&lt;Long&gt;} will have timestamps
+   * corresponding to processing time at element generation, provided by {@link Instant#now}.
+   */
+  public static UnboundedSource<Long, CounterMark> unbounded() {
+    return unboundedWithTimestampFn(new NowTimestampFn());
+  }
+
+  /**
+   * Creates an {@link UnboundedSource} that will produce numbers starting from {@code 0} up to
+   * {@link Long#MAX_VALUE}, with element timestamps supplied by the specified function.
+   *
+   * <p>After {@link Long#MAX_VALUE}, the source never produces more output. (In practice, this
+   * limit should never be reached.)
+   *
+   * <p>Note that the timestamps produced by {@code timestampFn} may not decrease.
+   */
+  public static UnboundedSource<Long, CounterMark> unboundedWithTimestampFn(
+      SerializableFunction<Long, Instant> timestampFn) {
+    return new UnboundedCountingSource(0, 1, timestampFn);
+  }
+
+  /////////////////////////////////////////////////////////////////////////////////////////////
+
+  /** Prevent instantiation. */
+  private CountingSource() {}
+
+
+  /**
+   * A function that returns {@link Instant#now} as the timestamp for each generated element.
+   */
+  private static class NowTimestampFn implements SerializableFunction<Long, Instant> {
+    @Override
+    public Instant apply(Long input) {
+      return Instant.now();
+    }
+  }
+
+  /**
+   * An implementation of {@link CountingSource} that produces a bounded {@link PCollection}.
+   * It is implemented on top of {@link OffsetBasedSource} (with associated reader
+   * {@link BoundedCountingReader}) and performs efficient initial splitting and supports dynamic
+   * work rebalancing.
+   */
+  private static class BoundedCountingSource extends OffsetBasedSource<Long> {
+    /**
+     * Creates a {@link BoundedCountingSource} that generates the numbers in the specified
+     * {@code [start, end)} range.
+     */
+    public BoundedCountingSource(long start, long end) {
+      super(start, end, 1 /* can be split every 1 offset */);
+    }
+
+    ////////////////////////////////////////////////////////////////////////////////////////////
+
+    @Override
+    public long getBytesPerOffset() {
+      return 8;
+    }
+
+    @Override
+    public long getMaxEndOffset(PipelineOptions options) throws Exception {
+      return getEndOffset();
+    }
+
+    @Override
+    public OffsetBasedSource<Long> createSourceForSubrange(long start, long end) {
+      return new BoundedCountingSource(start, end);
+    }
+
+    @Override
+    public boolean producesSortedKeys(PipelineOptions options) throws Exception {
+      return true;
+    }
+
+    @Override
+    public com.google.cloud.dataflow.sdk.io.BoundedSource.BoundedReader<Long> createReader(
+        PipelineOptions options) throws IOException {
+      return new BoundedCountingReader(this);
+    }
+
+    @Override
+    public Coder<Long> getDefaultOutputCoder() {
+      return VarLongCoder.of();
+    }
+  }
+
+  /**
+   * The reader associated with {@link BoundedCountingSource}.
+   *
+   * @see BoundedCountingSource
+   */
+  private static class BoundedCountingReader extends OffsetBasedSource.OffsetBasedReader<Long> {
+    private long current;
+
+    public BoundedCountingReader(OffsetBasedSource<Long> source) {
+      super(source);
+    }
+
+    @Override
+    protected long getCurrentOffset() throws NoSuchElementException {
+      return current;
+    }
+
+    @Override
+    public synchronized BoundedCountingSource getCurrentSource()  {
+      return (BoundedCountingSource) super.getCurrentSource();
+    }
+
+    @Override
+    public Long getCurrent() throws NoSuchElementException {
+      return current;
+    }
+
+    @Override
+    protected boolean startImpl() throws IOException {
+      current = getCurrentSource().getStartOffset();
+      return true;
+    }
+
+    @Override
+    protected boolean advanceImpl() throws IOException {
+      current++;
+      return true;
+    }
+
+    @Override
+    public void close() throws IOException {}
+  }
+
+  /**
+   * An implementation of {@link CountingSource} that produces an unbounded {@link PCollection}.
+   */
+  private static class UnboundedCountingSource extends UnboundedSource<Long, CounterMark> {
+    /** The first number (>= 0) generated by this {@link UnboundedCountingSource}. */
+    private final long start;
+    /** The interval between numbers generated by this {@link UnboundedCountingSource}. */
+    private final long stride;
+    /** The function used to produce timestamps for the generated elements. */
+    private final SerializableFunction<Long, Instant> timestampFn;
+
+    /**
+     * Creates an {@link UnboundedSource} that will produce numbers starting from {@code 0} up to
+     * {@link Long#MAX_VALUE}, with element timestamps supplied by the specified function.
+     *
+     * <p>After {@link Long#MAX_VALUE}, the source never produces more output. (In practice, this
+     * limit should never be reached.)
+     *
+     * <p>Note that the timestamps produced by {@code timestampFn} may not decrease.
+     */
+    public UnboundedCountingSource(
+        long start, long stride, SerializableFunction<Long, Instant> timestampFn) {
+      this.start = start;
+      this.stride = stride;
+      this.timestampFn = timestampFn;
+    }
+
+    /**
+     * Splits an unbounded source {@code desiredNumSplits} ways by giving each split every
+     * {@code desiredNumSplits}th element that this {@link UnboundedCountingSource}
+     * produces.
+     *
+     * <p>E.g., if a source produces all even numbers {@code [0, 2, 4, 6, 8, ...)} and we want to
+     * split into 3 new sources, then the new sources will produce numbers that are 6 apart and
+     * are offset at the start by the original stride: {@code [0, 6, 12, ...)},
+     * {@code [2, 8, 14, ...)}, and {@code [4, 10, 16, ...)}.
+     */
+    @Override
+    public List<? extends UnboundedSource<Long, CountingSource.CounterMark>> generateInitialSplits(
+        int desiredNumSplits, PipelineOptions options) throws Exception {
+      // Using Javadoc example, stride 2 with 3 splits becomes stride 6.
+      long newStride = stride * desiredNumSplits;
+
+      ImmutableList.Builder<UnboundedCountingSource> splits = ImmutableList.builder();
+      for (int i = 0; i < desiredNumSplits; ++i) {
+        // Starts offset by the original stride. Using Javadoc example, this generates starts of
+        // 0, 2, and 4.
+        splits.add(new UnboundedCountingSource(start + i * stride, newStride, timestampFn));
+      }
+      return splits.build();
+    }
+
+    @Override
+    public UnboundedReader<Long> createReader(
+        PipelineOptions options, CounterMark checkpointMark) {
+      return new UnboundedCountingReader(this, checkpointMark);
+    }
+
+    @Override
+    public Coder<CountingSource.CounterMark> getCheckpointMarkCoder() {
+      return AvroCoder.of(CountingSource.CounterMark.class);
+    }
+
+    @Override
+    public void validate() {}
+
+    @Override
+    public Coder<Long> getDefaultOutputCoder() {
+      return VarLongCoder.of();
+    }
+  }
+
+  /**
+   * The reader associated with {@link UnboundedCountingSource}.
+   *
+   * @see UnboundedCountingSource
+   */
+  private static class UnboundedCountingReader extends UnboundedReader<Long> {
+    private UnboundedCountingSource source;
+    private long current;
+    private Instant currentTimestamp;
+
+    public UnboundedCountingReader(UnboundedCountingSource source, CounterMark mark) {
+      this.source = source;
+      if (mark == null) {
+        // Because we have not emitted an element yet, and start() calls advance, we need to
+        // "un-advance" so that start() produces the correct output.
+        this.current = source.start - source.stride;
+      } else {
+        this.current = mark.getLastEmitted();
+      }
+    }
+
+    @Override
+    public boolean start() throws IOException {
+      return advance();
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      // Overflow-safe check that (current + source.stride) <= LONG.MAX_VALUE. Else, stop producing.
+      if (Long.MAX_VALUE - source.stride < current) {
+        return false;
+      }
+      current += source.stride;
+      currentTimestamp = source.timestampFn.apply(current);
+      return true;
+    }
+
+    @Override
+    public Instant getWatermark() {
+      return source.timestampFn.apply(current);
+    }
+
+    @Override
+    public CounterMark getCheckpointMark() {
+      return new CounterMark(current);
+    }
+
+    @Override
+    public UnboundedSource<Long, CounterMark> getCurrentSource() {
+      return source;
+    }
+
+    @Override
+    public Long getCurrent() throws NoSuchElementException {
+      return current;
+    }
+
+    @Override
+    public Instant getCurrentTimestamp() throws NoSuchElementException {
+      return currentTimestamp;
+    }
+
+    @Override
+    public void close() throws IOException {}
+  }
+
+  /**
+   * The checkpoint for an unbounded {@link CountingSource} is simply the last value produced. The
+   * associated source object encapsulates the information needed to produce the next value.
+   */
+  @DefaultCoder(AvroCoder.class)
+  public static class CounterMark implements UnboundedSource.CheckpointMark {
+    /** The last value emitted. */
+    private final long lastEmitted;
+
+    /**
+     * Creates a checkpoint mark reflecting the last emitted value.
+     */
+    public CounterMark(long lastEmitted) {
+      this.lastEmitted = lastEmitted;
+    }
+
+    /**
+     * Returns the last value emitted by the reader.
+     */
+    public long getLastEmitted() {
+      return lastEmitted;
+    }
+
+    /////////////////////////////////////////////////////////////////////////////////////
+
+    @SuppressWarnings("unused") // For AvroCoder
+    private CounterMark() {
+      this.lastEmitted = 0L;
+    }
+
+    @Override
+    public void finalizeCheckpoint() throws IOException {}
+   }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CountingSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CountingSourceTest.java
new file mode 100644
index 0000000000000..178287660066c
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CountingSourceTest.java
@@ -0,0 +1,216 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.CountingSource.CounterMark;
+import com.google.cloud.dataflow.sdk.io.UnboundedSource.UnboundedReader;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Flatten;
+import com.google.cloud.dataflow.sdk.transforms.Max;
+import com.google.cloud.dataflow.sdk.transforms.Min;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.RemoveDuplicates;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.List;
+
+/**
+ * Tests of {@link CountingSource}.
+ */
+@RunWith(JUnit4.class)
+public class CountingSourceTest {
+
+  public static void addCountingAsserts(PCollection<Long> input, long numElements) {
+    // Count == numElements
+    DataflowAssert
+      .thatSingleton(input.apply("Count", Count.<Long>globally()))
+      .isEqualTo(numElements);
+    // Unique count == numElements
+    DataflowAssert
+      .thatSingleton(input.apply(RemoveDuplicates.<Long>create())
+                          .apply("UniqueCount", Count.<Long>globally()))
+      .isEqualTo(numElements);
+    // Min == 0
+    DataflowAssert
+      .thatSingleton(input.apply("Min", Min.<Long>globally()))
+      .isEqualTo(0L);
+    // Max == numElements-1
+    DataflowAssert
+      .thatSingleton(input.apply("Max", Max.<Long>globally()))
+      .isEqualTo(numElements - 1);
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testBoundedSource() {
+    Pipeline p = TestPipeline.create();
+    long numElements = 1000;
+    PCollection<Long> input = p.apply(Read.from(CountingSource.upTo(numElements)));
+
+    addCountingAsserts(input, numElements);
+    p.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testBoundedSourceSplits() throws Exception {
+    Pipeline p = TestPipeline.create();
+    long numElements = 1000;
+    long numSplits = 10;
+    long splitSizeBytes = numElements * 8 / numSplits;  // 8 bytes per long element.
+
+    BoundedSource<Long> initial = CountingSource.upTo(numElements);
+    List<? extends BoundedSource<Long>> splits =
+        initial.splitIntoBundles(splitSizeBytes, p.getOptions());
+    assertEquals("Expected exact splitting", numSplits, splits.size());
+
+    // Assemble all the splits into one flattened PCollection, also verify their sizes.
+    PCollectionList<Long> pcollections = PCollectionList.empty(p);
+    for (int i = 0; i < splits.size(); ++i) {
+      BoundedSource<Long> split = splits.get(i);
+      pcollections = pcollections.and(p.apply("split" + i, Read.from(split)));
+      assertEquals("Expected even splitting",
+          splitSizeBytes, split.getEstimatedSizeBytes(p.getOptions()));
+    }
+    PCollection<Long> input = pcollections.apply(Flatten.<Long>pCollections());
+
+    addCountingAsserts(input, numElements);
+    p.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testUnboundedSource() {
+    Pipeline p = TestPipeline.create();
+    long numElements = 1000;
+
+    PCollection<Long> input = p
+        .apply(Read.from(CountingSource.unbounded()).withMaxNumRecords(numElements));
+
+    addCountingAsserts(input, numElements);
+    p.run();
+  }
+
+  private static class ElementValueDiff extends DoFn<Long, Long> {
+    @Override
+    public void processElement(ProcessContext c) throws Exception {
+      c.output(c.element() - c.timestamp().getMillis());
+    }
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testUnboundedSourceTimestamps() {
+    Pipeline p = TestPipeline.create();
+    long numElements = 1000;
+
+    PCollection<Long> input = p.apply(
+        Read.from(CountingSource.unboundedWithTimestampFn(new ValueAsTimestampFn()))
+            .withMaxNumRecords(numElements));
+    addCountingAsserts(input, numElements);
+
+    PCollection<Long> diffs = input
+        .apply("TimestampDiff", ParDo.of(new ElementValueDiff()))
+        .apply("RemoveDuplicateTimestamps", RemoveDuplicates.<Long>create());
+    // This assert also confirms that diffs only has one unique value.
+    DataflowAssert.thatSingleton(diffs).equals(0L);
+
+    p.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testUnboundedSourceSplits() throws Exception {
+    Pipeline p = TestPipeline.create();
+    long numElements = 1000;
+    int numSplits = 10;
+
+    UnboundedSource<Long, ?> initial = CountingSource.unbounded();
+    List<? extends UnboundedSource<Long, ?>> splits =
+        initial.generateInitialSplits(numSplits, p.getOptions());
+    assertEquals("Expected exact splitting", numSplits, splits.size());
+
+    long elementsPerSplit = numElements / numSplits;
+    assertEquals("Expected even splits", numElements, elementsPerSplit * numSplits);
+    PCollectionList<Long> pcollections = PCollectionList.empty(p);
+    for (int i = 0; i < splits.size(); ++i) {
+      pcollections = pcollections.and(
+          p.apply("split" + i, Read.from(splits.get(i)).withMaxNumRecords(elementsPerSplit)));
+    }
+    PCollection<Long> input = pcollections.apply(Flatten.<Long>pCollections());
+
+    addCountingAsserts(input, numElements);
+    p.run();
+  }
+
+  /**
+   * A timestamp function that uses the given value as the timestamp. Because the input values will
+   * not wrap, this function is non-decreasing and meets the timestamp function criteria laid out
+   * in {@link CountingSource#unboundedWithTimestampFn(SerializableFunction)}.
+   */
+  private static class ValueAsTimestampFn implements SerializableFunction<Long, Instant> {
+    @Override
+    public Instant apply(Long input) {
+      return new Instant(input);
+    }
+  }
+
+  @Test
+  public void testUnboundedSourceCheckpointMark() throws Exception {
+    UnboundedSource<Long, CounterMark> source =
+        CountingSource.unboundedWithTimestampFn(new ValueAsTimestampFn());
+    UnboundedReader<Long> reader = source.createReader(null, null);
+    final long numToSkip = 3;
+    assertTrue(reader.start());
+
+    // Advance the source numToSkip elements and manually save state.
+    for (long l = 0; l < numToSkip; ++l) {
+      reader.advance();
+    }
+
+    // Confirm that we get the expected element in sequence before checkpointing.
+    assertEquals(numToSkip, (long) reader.getCurrent());
+    assertEquals(numToSkip, reader.getCurrentTimestamp().getMillis());
+
+    // Checkpoint and restart, and confirm that the source continues correctly.
+    CounterMark mark = CoderUtils.clone(
+        source.getCheckpointMarkCoder(), (CounterMark) reader.getCheckpointMark());
+    reader = source.createReader(null, mark);
+    assertTrue(reader.start());
+
+    // Confirm that we get the next element in sequence.
+    assertEquals(numToSkip + 1, (long) reader.getCurrent());
+    assertEquals(numToSkip + 1, reader.getCurrentTimestamp().getMillis());
+  }
+}

From 325471571f0b3cee74b3a80b6ec583b34f752f00 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 18 Dec 2015 20:43:31 -0800
Subject: [PATCH 1241/1541] Have Dataflow internally depend on Guava 19.0

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110591070
---
 pom.xml     | 2 +-
 sdk/pom.xml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 734eaf8775291..bd43b277cea8f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -72,7 +72,7 @@
     <dataflow.version>v1b3-rev13-1.21.0</dataflow.version>
     <datastore.version>v1beta2-rev1-3.0.2</datastore.version>
     <google-clients.version>1.21.0</google-clients.version>
-    <guava.version>18.0</guava.version>
+    <guava.version>19.0</guava.version>
     <hamcrest.version>1.3</hamcrest.version>
     <jackson.version>2.4.2</jackson.version>
     <joda.version>2.4</joda.version>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 5f6d714a8e159..002b2d8e4f727 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -245,7 +245,7 @@
               <location>${basedir}/../javadoc/datastore-docs</location>
             </offlineLink>
             <offlineLink>
-              <url>http://docs.guava-libraries.googlecode.com/git-history/release18/javadoc/</url>
+              <url>http://docs.guava-libraries.googlecode.com/git-history/release19/javadoc/</url>
               <location>${basedir}/../javadoc/guava-docs</location>
             </offlineLink>
             <offlineLink>

From e20747f09654e7a97b67034dbc015551e178e648 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Sat, 19 Dec 2015 14:20:15 -0800
Subject: [PATCH 1242/1541] Add a scalable bloom filter implementation

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110616150
---
 .../sdk/util/ScalableBloomFilter.java         | 331 ++++++++++++++++++
 .../sdk/util/ScalableBloomFilterTest.java     | 169 +++++++++
 2 files changed, 500 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ScalableBloomFilter.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ScalableBloomFilterTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ScalableBloomFilter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ScalableBloomFilter.java
new file mode 100644
index 0000000000000..b509dcfa28b94
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ScalableBloomFilter.java
@@ -0,0 +1,331 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.MoreObjects;
+import com.google.common.hash.BloomFilter;
+import com.google.common.hash.Funnel;
+import com.google.common.hash.PrimitiveSink;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.Serializable;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * A Bloom filter implementation with an expected false positive probability of {@code 0.000001}
+ * which grows dynamically with the number of insertions. For less than {@code 2^20} insertions
+ * which would modify a Bloom filter, we brute force all the Bloom filter combinations in powers of
+ * {@code 2} to only produce a scalable Bloom filter with one slice.
+ *
+ * <p>Otherwise, we use an implementation of
+ * <a href="http://gsd.di.uminho.pt/members/cbm/ps/dbloom.pdf">Scalable Bloom Filters</a>
+ * by Paulo Sergio Almeida, Carlos Baquero, Nuno Preguica, David Hutchison. Our implementation
+ * has an effective positive probability of {@code 0.000001}, given that we use a ratio of
+ * {@code 0.9} and a scaling factor of {@code 2}.
+ */
+public class ScalableBloomFilter implements Serializable {
+  /**
+   * A {@link Coder} for scalable Bloom filters. The encoded format is:
+   * <ul>
+   *   <li>var int encoding of number of Bloom filter slices
+   *   <li>N Bloom filter slices
+   * </ul>
+   *
+   * <p>The encoded form of each Bloom filter slice is:
+   * <ul>
+   *   <li>1 signed byte for the strategy
+   *   <li>1 unsigned byte for the number of hash functions
+   *   <li>1 big endian int, the number of longs in our bitset
+   *   <li>N big endian longs of our bitset
+   * </ul>
+   */
+  public static class ScalableBloomFilterCoder extends AtomicCoder<ScalableBloomFilter> {
+    private static final ScalableBloomFilterCoder INSTANCE = new ScalableBloomFilterCoder();
+
+    @JsonCreator
+    public static ScalableBloomFilterCoder of() {
+      return INSTANCE;
+    }
+
+    @Override
+    public void encode(ScalableBloomFilter value, OutputStream outStream, Coder.Context context)
+        throws CoderException, IOException {
+      VarInt.encode(value.bloomFilterSlices.size(), outStream);
+      for (BloomFilter<ByteBuffer> bloomFilter : value.bloomFilterSlices) {
+        bloomFilter.writeTo(outStream);
+      }
+    }
+
+    @Override
+    public ScalableBloomFilter decode(InputStream inStream, Coder.Context context)
+        throws CoderException, IOException {
+      int numberOfBloomFilters = VarInt.decodeInt(inStream);
+      List<BloomFilter<ByteBuffer>> bloomFilters = new ArrayList<>(numberOfBloomFilters);
+      for (int i = 0; i < numberOfBloomFilters; ++i) {
+        bloomFilters.add(BloomFilter.readFrom(inStream, ByteBufferFunnel.INSTANCE));
+      }
+      return new ScalableBloomFilter(bloomFilters);
+    }
+
+    @Override
+    public boolean consistentWithEquals() {
+      return true;
+    }
+  }
+
+  private final List<BloomFilter<ByteBuffer>> bloomFilterSlices;
+  private ScalableBloomFilter(List<BloomFilter<ByteBuffer>> bloomFilters) {
+    this.bloomFilterSlices = bloomFilters;
+  }
+
+  /**
+   * Returns false if the Bloom filter definitely does not contain the byte
+   * representation of an element contained in {@code buf} from {@code [offset, offset + length)}.
+   */
+  public boolean mightContain(byte[] buf, int offset, int length) {
+    ByteBuffer byteBuffer = ByteBuffer.wrap(buf, offset, length);
+    return mightContain(byteBuffer);
+  }
+
+  /**
+   * Returns false if the Bloom filter definitely does not contain the byte
+   * representation of an element contained in {@code byteBuffer} from {@code [position, limit)}.
+   */
+  public boolean mightContain(ByteBuffer byteBuffer) {
+    for (int i = bloomFilterSlices.size() - 1; i >= 0; i--) {
+      if (bloomFilterSlices.get(i).mightContain(byteBuffer)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other == this) {
+      return true;
+    }
+    if (!(other instanceof ScalableBloomFilter)) {
+      return false;
+    }
+    ScalableBloomFilter scalableBloomFilter = (ScalableBloomFilter) other;
+    if (bloomFilterSlices.size() != scalableBloomFilter.bloomFilterSlices.size()) {
+      return false;
+    }
+    for (int i = 0; i < bloomFilterSlices.size(); ++i) {
+      if (!bloomFilterSlices.get(i).equals(scalableBloomFilter.bloomFilterSlices.get(i))) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    return bloomFilterSlices.hashCode();
+  }
+
+  @Override
+  public String toString() {
+    return MoreObjects.toStringHelper(ScalableBloomFilter.class)
+        .add("bloomFilterSlices", bloomFilterSlices)
+        .toString();
+  }
+
+  @VisibleForTesting
+  int numberOfBloomFilterSlices() {
+    return bloomFilterSlices.size();
+  }
+
+  /**
+   * Returns a scalable Bloom filter builder allowing one to construct a Bloom filter
+   * with an expected false positive probability of {@code 0.000001} irrespective
+   * of the number of elements inserted.
+   */
+  public static Builder builder() {
+    return builder(Builder.MAX_INSERTIONS_FOR_ADD_TO_ALL_MODE_LOG_2);
+  }
+
+  @VisibleForTesting
+  static Builder builder(int maxInsertionsForAddToAllModeLog2) {
+    return new Builder(maxInsertionsForAddToAllModeLog2);
+  }
+
+  /**
+   * A scalable Bloom filter builder which during the build process will attempt to
+   * create a Bloom filter no larger than twice the required size for small Bloom filters.
+   * For large Bloom filters, we create a list of Bloom filters which are successively twice as
+   * large as the previous which we insert elements into.
+   *
+   * <p>This scalable Bloom filter builder uses 8mb of memory per instance to start when
+   * fewer than {@code 2^20} elements have been inserted. Afterwards, it increases in space usage
+   * by a factor of {@code 2.2} for every doubling in the number of unique insertions.
+   */
+  public static class Builder {
+    private static final long MAX_ELEMENTS = 1L << 62;
+    private static final int MAX_INSERTIONS_FOR_ADD_TO_ALL_MODE_LOG_2 = 20;
+    private static final double DEFAULT_FALSE_POSITIVE_PROBABILITY = 0.000001;
+    private static final double RATIO = 0.9;
+
+    private enum Mode {
+      ADD_TO_ALL, ADD_TO_LAST
+    }
+
+    private final List<BloomFilter<ByteBuffer>> bloomFilters;
+    private Mode mode;
+    private long numberOfInsertions;
+
+    private Builder(int maxInsertionsForAddToAllModeLog2) {
+      checkArgument(maxInsertionsForAddToAllModeLog2 < Long.SIZE - 1,
+          "%s does not support an initial size with more than 2^63 elements.",
+          ScalableBloomFilter.class.getSimpleName());
+      this.bloomFilters = new ArrayList<>();
+      this.mode = Mode.ADD_TO_ALL;
+      // 1, 2, 4, 8, 16, 32, ...
+      for (int i = 0; i <= maxInsertionsForAddToAllModeLog2; ++i) {
+        bloomFilters.add(BloomFilter.<ByteBuffer>create(
+            ByteBufferFunnel.INSTANCE,
+            1 << i,
+            DEFAULT_FALSE_POSITIVE_PROBABILITY));
+      }
+    }
+
+    /**
+     * Returns true if the Bloom filter was modified by inserting the byte
+     * representation of an element contained in {@code buf} from {@code [offset, offset + length)}.
+     */
+    public boolean put(final byte[] buf, final int off, final int len) {
+      ByteBuffer buffer = ByteBuffer.wrap(buf, off, len);
+      return put(buffer);
+    }
+
+    /**
+     * Returns true if the Bloom filter was modified by inserting the byte
+     * representation of an element contained in {@code byteBuffer} from {@code [position, limit)}.
+     */
+    public boolean put(final ByteBuffer byteBuffer) {
+      // Check to see if we gain any information by adding this element.
+      switch (mode) {
+        case ADD_TO_ALL:
+          if (bloomFilters.get(bloomFilters.size() - 1).mightContain(byteBuffer)) {
+            // We do not gain any information by adding this element
+            return false;
+          }
+          break;
+        case ADD_TO_LAST:
+          for (int i = bloomFilters.size() - 1; i >= 0; i--) {
+            if (bloomFilters.get(i).mightContain(byteBuffer)) {
+              // One of the Bloom filters already considers that this element exists so skip
+              // adding it.
+              return false;
+            }
+          }
+          break;
+        default:
+          throw new IllegalStateException("Unknown builder mode: " + mode);
+      }
+
+      // We now need to add the element to the appropriate Bloom filter(s) depending on the mode.
+      switch (mode) {
+        case ADD_TO_ALL:
+          int bloomFilterToStartWith =
+              Long.SIZE - Long.numberOfLeadingZeros(numberOfInsertions);
+          // If we were to attempt to add to a non-existent Bloom filter, we need to
+          // swap to the other mode.
+          if (bloomFilterToStartWith == bloomFilters.size()) {
+            BloomFilter<ByteBuffer> last = bloomFilters.get(bloomFilters.size() - 1);
+            bloomFilters.clear();
+            bloomFilters.add(last);
+            mode = Mode.ADD_TO_LAST;
+            addToLast(byteBuffer);
+          } else {
+            for (int i = bloomFilterToStartWith; i < bloomFilters.size(); ++i) {
+              bloomFilters.get(i).put(byteBuffer);
+            }
+          }
+          break;
+        case ADD_TO_LAST:
+          addToLast(byteBuffer);
+          break;
+        default:
+          throw new IllegalStateException("Unknown builder mode: " + mode);
+      }
+      numberOfInsertions += 1;
+      return true;
+    }
+
+    /**
+     * Returns a scalable Bloom filter with the elements that were added.
+     */
+    public ScalableBloomFilter build() {
+      switch (mode) {
+        case ADD_TO_ALL:
+          int bloomFilterToUse = Long.SIZE - Long.numberOfLeadingZeros(numberOfInsertions);
+          if (Long.bitCount(numberOfInsertions) == 1) {
+            bloomFilterToUse -= 1;
+          }
+          return new ScalableBloomFilter(Arrays.asList(bloomFilters.get(bloomFilterToUse)));
+        case ADD_TO_LAST:
+          return new ScalableBloomFilter(bloomFilters);
+        default:
+          throw new IllegalStateException("Unknown builder mode: " + mode);
+      }
+    }
+
+    private void addToLast(ByteBuffer byteBuffer) {
+      // If we are a power of 2, we have hit the number of expected insertions
+      // for the last Bloom filter and we have to add a new one.
+      if (Long.bitCount(numberOfInsertions) == 1) {
+        checkArgument(numberOfInsertions <= MAX_ELEMENTS,
+            "%s does not support Bloom filter slices with more than 2^63 elements.",
+            ScalableBloomFilter.class);
+        bloomFilters.add(BloomFilter.<ByteBuffer>create(
+            ByteBufferFunnel.INSTANCE,
+            numberOfInsertions,
+            DEFAULT_FALSE_POSITIVE_PROBABILITY * Math.pow(RATIO, bloomFilters.size())));
+      }
+      BloomFilter<ByteBuffer> last = bloomFilters.get(bloomFilters.size() - 1);
+      last.put(byteBuffer);
+    }
+  }
+
+  /**
+   * Writes {@link ByteBuffer}s to {@link PrimitiveSink}s and meant to be used
+   * with Guava's {@link BloomFilter} API. This {@link Funnel} does not modify the
+   * underlying byte buffer and assumes that {@code ByteBuffer#array} returns the backing data.
+   */
+  private static class ByteBufferFunnel implements Funnel<ByteBuffer> {
+    private static final ByteBufferFunnel INSTANCE = new ByteBufferFunnel();
+    @Override
+    public void funnel(ByteBuffer from, PrimitiveSink into) {
+      into.putBytes(from.array(), from.position(), from.remaining());
+    }
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ScalableBloomFilterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ScalableBloomFilterTest.java
new file mode 100644
index 0000000000000..ad72e2914726c
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ScalableBloomFilterTest.java
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.ScalableBloomFilter.Builder;
+import com.google.cloud.dataflow.sdk.util.ScalableBloomFilter.ScalableBloomFilterCoder;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.nio.ByteBuffer;
+
+/**
+ * Tests for {@link ScalableBloomFilter}.
+ */
+@RunWith(JUnit4.class)
+public class ScalableBloomFilterTest {
+  private static final ByteBuffer BUFFER = ByteBuffer.wrap(new byte[]{ 0x01, 0x02 });
+
+  @Test
+  public void testBuilderModeAddAll() throws Exception {
+    Builder builder = ScalableBloomFilter.builder();
+    assertTrue("Expected Bloom filter to have been modified.", builder.put(BUFFER));
+
+    // Re-adding should skip and not record the insertion.
+    assertFalse("Expected Bloom filter to not have been modified.", builder.put(BUFFER));
+
+    // Verify insertion
+    int maxValue = insertAndVerifyContents(builder, 31);
+
+    // Verify that we only have one bloom filter instead of many since the number of insertions
+    // is small.
+    ScalableBloomFilter bloomFilter = builder.build();
+    assertEquals(1, bloomFilter.numberOfBloomFilterSlices());
+    verifyCoder(builder.build(), maxValue);
+  }
+
+  @Test
+  public void testBuilderModeAddAllModeAtThreshold() throws Exception {
+    // Use a builder where the insertion threshold to swap to add to last mode is 2^4 elements.
+    Builder builder = ScalableBloomFilter.builder(4);
+
+    // Verify insertion
+    int maxValue = insertAndVerifyContents(builder, 16);
+
+    ScalableBloomFilter bloomFilter = builder.build();
+    // Verify at the threshold we have only built a single Bloom filter slice.
+    assertEquals(1, bloomFilter.numberOfBloomFilterSlices());
+
+    verifyCoder(bloomFilter, maxValue);
+  }
+
+  @Test
+  public void testBuilderModeAddAllModeAtThresholdPlusOne() throws Exception {
+    // Use a builder where the insertion threshold to swap to add to last mode is 2^4 elements.
+    Builder builder = ScalableBloomFilter.builder(4);
+
+    // Verify insertion
+    int maxValue = insertAndVerifyContents(builder, 17);
+
+    ScalableBloomFilter bloomFilter = builder.build();
+    // Verify that at one over the threshold, we created two Bloom filter slices.
+    assertEquals(2, bloomFilter.numberOfBloomFilterSlices());
+
+    verifyCoder(bloomFilter, maxValue);
+  }
+
+  @Test
+  public void testBuilderModeAddLastMode() throws Exception {
+    // Use a builder where the insertion threshold to swap to add to last mode is 2^4 elements.
+    Builder builder = ScalableBloomFilter.builder(4);
+
+    // Verify insertion
+    int maxValue = insertAndVerifyContents(builder, (int) Math.pow(2, 12) - 16);
+
+    ScalableBloomFilter bloomFilter = builder.build();
+    // Verify that we swapped to the scalable mode.
+    // This is 9 because we inserted 16 elements swapping us into the add to all mode.
+    // Then at every power of 2 (e.g. 32, 64, 128, ..) we add another filter.
+    // Thus we have a filter for every power of 2 from 2^4 to 2^12 giving us 9 filters.
+    assertEquals(9, bloomFilter.numberOfBloomFilterSlices());
+
+    verifyCoder(bloomFilter, maxValue);
+  }
+
+  @Test
+  public void testScalableBloomFilterCoder() throws Exception {
+    Builder builderA = ScalableBloomFilter.builder();
+    builderA.put(BUFFER);
+    ScalableBloomFilter filterA = builderA.build();
+    Builder builderB = ScalableBloomFilter.builder();
+    builderB.put(BUFFER);
+    ScalableBloomFilter filterB = builderB.build();
+
+    CoderProperties.coderDecodeEncodeEqual(ScalableBloomFilterCoder.of(), filterA);
+    CoderProperties.coderDeterministic(ScalableBloomFilterCoder.of(), filterA, filterB);
+    CoderProperties.coderConsistentWithEquals(ScalableBloomFilterCoder.of(), filterA, filterB);
+    CoderProperties.coderSerializable(ScalableBloomFilterCoder.of());
+    CoderProperties.structuralValueConsistentWithEquals(
+        ScalableBloomFilterCoder.of(), filterA, filterB);
+  }
+
+  /**
+   * Inserts elements {@code 0, 1, ...} until the internal bloom filters have
+   * been modified {@code maxNumberOfInsertions} times. Returns the largest value inserted.
+   */
+  private int insertAndVerifyContents(Builder builder, int maxNumberOfInsertions) {
+    ByteBuffer byteBuffer = ByteBuffer.allocate(4);
+    int value = -1;
+    while (maxNumberOfInsertions > 0) {
+      value += 1;
+      byteBuffer.clear();
+      byteBuffer.putInt(value);
+      byteBuffer.rewind();
+      if (builder.put(byteBuffer)) {
+        maxNumberOfInsertions -= 1;
+      }
+    }
+
+    verifyContents(builder.build(), value);
+    return value;
+  }
+
+  /**
+   * Verifies that the bloom filter contains all the values from {@code [0, 1, ..., maxValue]}.
+   */
+  private void verifyContents(ScalableBloomFilter bloomFilter, int maxValue) {
+    ByteBuffer byteBuffer = ByteBuffer.allocate(4);
+    // Verify that all the values exist
+    for (int i = 0; i <= maxValue; ++i) {
+      byteBuffer.clear();
+      byteBuffer.putInt(i);
+      byteBuffer.rewind();
+      assertTrue(bloomFilter.mightContain(byteBuffer));
+    }
+  }
+
+  /**
+   * Verifies that the coder correctly encodes and decodes and that all the values
+   * {@code [0, 1, 2, ..., maxValue]} are contained within the decoded bloom filter.
+   */
+  private void verifyCoder(ScalableBloomFilter bloomFilter, int maxValue) throws Exception {
+    byte[] encodedValue =
+        CoderUtils.encodeToByteArray(ScalableBloomFilterCoder.of(), bloomFilter);
+    ScalableBloomFilter decoded =
+        CoderUtils.decodeFromByteArray(ScalableBloomFilterCoder.of(), encodedValue);
+    verifyContents(decoded, maxValue);
+  }
+}
+

From f15d459d0757d63c2e3274a45d92e1bb8ffdef91 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 21 Dec 2015 09:29:50 -0800
Subject: [PATCH 1243/1541] Touch up ParDo javadoc

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110694633
---
 .../cloud/dataflow/sdk/transforms/ParDo.java  | 484 +++++++++---------
 1 file changed, 249 insertions(+), 235 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 6a043505c9e32..245dae01a7123 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -52,40 +52,52 @@
 import javax.annotation.Nullable;
 
 /**
- * {@code ParDo} is the core element-wise transform in Google Cloud
- * Dataflow, invoking a user-specified function (from {@code I} to
- * {@code Output}) on each of the elements of the input
- * {@code PCollection<InputT>} to produce zero or more output elements, all
- * of which are collected into the output {@code PCollection<OutputT>}.
+ * {@link ParDo} is the core element-wise transform in Google Cloud
+ * Dataflow, invoking a user-specified function on each of the elements of the input
+ * {@link PCollection} to produce zero or more output elements, all
+ * of which are collected into the output {@link PCollection}.
  *
  * <p>Elements are processed independently, and possibly in parallel across
  * distributed cloud resources.
  *
- * <p>The {@code ParDo} processing style is similar to what happens inside
+ * <p>The {@link ParDo} processing style is similar to what happens inside
  * the "Mapper" or "Reducer" class of a MapReduce-style algorithm.
  *
- * <h2>{@code DoFn}s</h2>
+ * <h2>{@link DoFn DoFns}</h2>
  *
  * <p>The function to use to process each element is specified by a
- * {@link DoFn}.
+ * {@link DoFn DoFn&lt;InputT, OutputT&gt;}, primarily via its
+ * {@link DoFn#processElement processElement} method. The {@link DoFn} may also
+ * override the default implementations of {@link DoFn#startBundle startBundle}
+ * and {@link DoFn#finishBundle finishBundle}.
  *
- * <p>Conceptually, when a {@code ParDo} transform is executed, the
- * elements of the input {@code PCollection<InputT>} are first divided up
- * into some number of "batches".  These are farmed off to distributed
- * worker machines (or run locally, if using the
- * {@link com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner}).
- * For each batch of input elements, a fresh instance of the argument
- * {@code DoFn<InputT, OutputT>} is created on a worker, then the {@code DoFn}'s
- * optional {@link DoFn#startBundle} method is called to initialize it,
- * then the {@code DoFn}'s required {@link DoFn#processElement} method
- * is called on each of the input elements in the batch, then the
- * {@code DoFn}'s optional {@link DoFn#finishBundle} method is called
- * to complete its work, and finally the {@code DoFn} instance is
- * thrown away.  Each of the calls to any of the {@code DoFn}'s
- * methods can produce zero or more output elements, which are
- * collected together into a batch of output elements.  All of the
- * batches of output elements from all of the {@code DoFn} instances
- * are "flattened" together into the output {@code PCollection<OutputT>}.
+ * <p>Conceptually, when a {@link ParDo} transform is executed, the
+ * elements of the input {@link PCollection} are first divided up
+ * into some number of "bundles". These are farmed off to distributed
+ * worker machines (or run locally, if using the {@link DirectPipelineRunner}).
+ * For each bundle of input elements processing proceeds as follows:
+ *
+ * <ol>
+ *   <li>A fresh instance of the argument {@link DoFn} is created on a worker. This may
+ *     be through deserialization or other means. If the {@link DoFn} subclass
+ *     does not override {@link DoFn#startBundle startBundle} or
+ *     {@link DoFn#finishBundle finishBundle} then this may be optimized since
+ *     it cannot observe the start and end of a bundle.</li>
+ *   <li>The {@link DoFn DoFn's} {@link DoFn#startBundle} method is called to
+ *     initialize it. If this method is not overridden, the call may be optimized
+ *     away.</li>
+ *   <li>The {@link DoFn DoFn's} {@link DoFn#processElement} method
+ *     is called on each of the input elements in the bundle.</li>
+ *   <li>The {@link DoFn DoFn's} {@link DoFn#finishBundle} method is called
+ *     to complete its work. After {@link DoFn#finishBundle} is called, the
+ *     framework will never again invoke any of these three processing methods.
+ *     If this method is not overridden, this call may be optimized away.</li>
+ * </ol>
+ *
+ * Each of the calls to any of the {@link DoFn DoFn's} processing
+ * methods can produce zero or more output elements. All of the
+ * of output elements from all of the {@link DoFn} instances
+ * are included in the output {@link PCollection}.
  *
  * <p>For example:
  *
@@ -109,18 +121,20 @@
  * } </pre>
  *
  * <p>Each output element has the same timestamp and is in the same windows
- * as its corresponding input element, and the output {@code PCollection<OutputT>}
- * has the same
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
- * associated with it as the input.
+ * as its corresponding input element, and the output {@code PCollection}
+ * has the same {@link WindowFn} associated with it as the input.
+ *
+ * <h2>Naming {@link ParDo ParDo} transforms</h2>
  *
- * <h2>Naming {@code ParDo}s</h2>
+ * <p>The name of a transform is used to provide a name for any node in the
+ * {@link Pipeline} graph resulting from application of the transform.
+ * It is best practice to provide a name at the time of application,
+ * via {@link PCollection#apply(String, PTransform)}. Otherwise,
+ * a unique name - which may not be stable across pipeline revision -
+ * will be generated, based on the transform name.
  *
- * <p>A {@code ParDo} transform can be given a name using
- * {@link #named}.  While the system will automatically provide a name
- * if none is specified explicitly, it is still a good practice to
- * provide an explicit name, since that will probably make monitoring
- * output more readable.  For example:
+ * <p>If a {@link ParDo} is applied exactly once inlined, then
+ * it can be given a name via {@link #named}. For example:
  *
  * <pre> {@code
  * PCollection<String> words =
@@ -133,14 +147,14 @@
  *
  * <h2>Side Inputs</h2>
  *
- * <p>While a {@code ParDo} iterates over a single "main input"
- * {@code PCollection}, it can take additional "side input"
- * {@code PCollectionView}s. These side input
- * {@code PCollectionView}s express styles of accessing
- * {@code PCollection}s computed by earlier pipeline operations,
- * passed in to the {@code ParDo} transform using
+ * <p>While a {@link ParDo} processes elements from a single "main input"
+ * {@link PCollection}, it can take additional "side input"
+ * {@link PCollectionView PCollectionViews}. These side input
+ * {@link PCollectionView PCollectionViews} express styles of accessing
+ * {@link PCollection PCollections} computed by earlier pipeline operations,
+ * passed in to the {@link ParDo} transform using
  * {@link #withSideInputs}, and their contents accessible to each of
- * the {@code DoFn} operations via {@link DoFn.ProcessContext#sideInput sideInput}.
+ * the {@link DoFn} operations via {@link DoFn.ProcessContext#sideInput sideInput}.
  * For example:
  *
  * <pre> {@code
@@ -162,18 +176,18 @@
  *
  * <h2>Side Outputs</h2>
  *
- * <p>Optionally, a {@code ParDo} transform can produce multiple
- * output {@code PCollection}s, both a "main output"
+ * <p>Optionally, a {@link ParDo} transform can produce multiple
+ * output {@link PCollection PCollections}, both a "main output"
  * {@code PCollection<OutputT>} plus any number of "side output"
- * {@code PCollection}s, each keyed by a distinct {@link TupleTag},
- * and bundled in a {@link PCollectionTuple}.  The {@code TupleTag}s
- * to be used for the output {@code PCollectionTuple} is specified by
- * invoking {@link #withOutputTags}.  Unconsumed side outputs does not
- * necessarily need to be explicity specified, even if the {@code DoFn}
- * generates them. Within the {@code DoFn}, an element is added to the
- * main output {@code PCollection} as normal, using
+ * {@link PCollection PCollections}, each keyed by a distinct {@link TupleTag},
+ * and bundled in a {@link PCollectionTuple}. The {@link TupleTag TupleTags}
+ * to be used for the output {@link PCollectionTuple} are specified by
+ * invoking {@link #withOutputTags}. Unconsumed side outputs do not
+ * necessarily need to be explicitly specified, even if the {@link DoFn}
+ * generates them. Within the {@link DoFn}, an element is added to the
+ * main output {@link PCollection} as normal, using
  * {@link DoFn.Context#output}, while an element is added to a side output
- * {@code PCollection} using {@link DoFn.Context#sideOutput}.  For example:
+ * {@link PCollection} using {@link DoFn.Context#sideOutput}. For example:
  *
  * <pre> {@code
  * PCollection<String> words = ...;
@@ -229,46 +243,46 @@
  *
  * <h2>Properties May Be Specified In Any Order</h2>
  *
- * <p>Several properties can be specified for a {@code ParDo}
- * {@code PTransform}, including name, side inputs, side output tags,
- * and {@code DoFn} to invoke.  Only the {@code DoFn} is required; the
+ * <p>Several properties can be specified for a {@link ParDo}
+ * {@link PTransform}, including name, side inputs, side output tags,
+ * and {@link DoFn} to invoke. Only the {@link DoFn} is required; the
  * name is encouraged but not required, and side inputs and side
- * output tags are only specified when they're needed.  These
+ * output tags are only specified when they're needed. These
  * properties can be specified in any order, as long as they're
- * specified before the {@code ParDo} {@code PTransform} is applied.
+ * specified before the {@link ParDo} {@link PTransform} is applied.
  *
  * <p>The approach used to allow these properties to be specified in
  * any order, with some properties omitted, is to have each of the
  * property "setter" methods defined as static factory methods on
- * {@code ParDo} itself, which return an instance of either
- * {@link ParDo.Unbound ParDo.Unbound} or
- * {@link ParDo.Bound ParDo.Bound} nested classes, each of which offer
+ * {@link ParDo} itself, which return an instance of either
+ * {@link ParDo.Unbound} or
+ * {@link ParDo.Bound} nested classes, each of which offer
  * property setter instance methods to enable setting additional
- * properties.  {@code ParDo.Bound} is used for {@code ParDo}
- * transforms whose {@code DoFn} is specified and whose input and
- * output static types have been bound.  {@code ParDo.Unbound} is used
- * for {@code ParDo} transforms that have not yet had their
- * {@code DoFn} specified.  Only {@code ParDo.Bound} instances can be
+ * properties. {@link ParDo.Bound} is used for {@link ParDo}
+ * transforms whose {@link DoFn} is specified and whose input and
+ * output static types have been bound. {@link ParDo.Unbound ParDo.Unbound} is used
+ * for {@link ParDo} transforms that have not yet had their
+ * {@link DoFn} specified. Only {@link ParDo.Bound} instances can be
  * applied.
  *
  * <p>Another benefit of this approach is that it reduces the number
- * of type parameters that need to be specified manually.  In
- * particular, the input and output types of the {@code ParDo}
- * {@code PTransform} are inferred automatically from the type
- * parameters of the {@code DoFn} argument passed to {@link ParDo#of}.
+ * of type parameters that need to be specified manually. In
+ * particular, the input and output types of the {@link ParDo}
+ * {@link PTransform} are inferred automatically from the type
+ * parameters of the {@link DoFn} argument passed to {@link ParDo#of}.
  *
  * <h2>Output Coders</h2>
  *
- * <p>By default, the {@code Coder<OutputT>} of the
- * elements of the main output {@code PCollection<OutputT>} is inferred from the
- * concrete type of the {@code DoFn<InputT, OutputT>}'s output type {@code Output}.
+ * <p>By default, the {@link Coder Coder&lt;OutputT&gt;} for the
+ * elements of the main output {@link PCollection PCollection&lt;OutputT&gt;} is
+ * inferred from the concrete type of the {@link DoFn DoFn&lt;InputT, OutputT&gt;}.
  *
- * <p>By default, the {@code Coder<X>} of the elements of a side output
- * {@code PCollection<X>} is inferred from the concrete type of the
- * corresponding {@code TupleTag<X>}'s type {@code X}.  To be
- * successful, the {@code TupleTag} should be created as an instance
+ * <p>By default, the {@link Coder Coder&lt;SideOutputT&gt;} for the elements of
+ * a side output {@link PCollection PCollection&lt;SideOutputT&gt;} is inferred
+ * from the concrete type of the corresponding {@link TupleTag TupleTag&lt;SideOutputT&gt;}.
+ * To be successful, the {@link TupleTag} should be created as an instance
  * of a trivial anonymous subclass, with {@code {}} suffixed to the
- * constructor call.  Such uses block Java's generic type parameter
+ * constructor call. Such uses block Java's generic type parameter
  * inference, so the {@code <X>} argument must be provided explicitly.
  * For example:
  * <pre> {@code
@@ -281,96 +295,96 @@
  * This style of {@code TupleTag} instantiation is used in the example of
  * multiple side outputs, above.
  *
- * <h2>Serializability of {@code DoFn}s</h2>
+ * <h2>Serializability of {@link DoFn DoFns}</h2>
  *
- * <p>A {@code DoFn} passed to a {@code ParDo} transform must be
- * {@code Serializable}.  This allows the {@code DoFn} instance
+ * <p>A {@link DoFn} passed to a {@link ParDo} transform must be
+ * {@link Serializable}. This allows the {@link DoFn} instance
  * created in this "main program" to be sent (in serialized form) to
- * remote worker machines and reconstituted for each batch of elements
- * of the input {@code PCollection} being processed.  A {@code DoFn}
+ * remote worker machines and reconstituted for each bundles of elements
+ * of the input {@link PCollection} being processed. A {@link DoFn}
  * can have instance variable state, and non-transient instance
  * variable state will be serialized in the main program and then
- * deserialized on remote worker machines for each batch of elements
+ * deserialized on remote worker machines for each bundle of elements
  * to process.
  *
- * <p>To aid in ensuring that {@code DoFn}s are properly
- * {@code Serializable}, even local execution using the
+ * <p>To aid in ensuring that {@link DoFn DoFns} are properly
+ * {@link Serializable}, even local execution using the
  * {@link DirectPipelineRunner} will serialize and then deserialize
- * {@code DoFn}s before executing them on a batch.
+ * {@link DoFn DoFns} before executing them on a bundle.
  *
- * <p>{@code DoFn}s expressed as anonymous inner classes can be
+ * <p>{@link DoFn DoFns} expressed as anonymous inner classes can be
  * convenient, but due to a quirk in Java's rules for serializability,
  * non-static inner or nested classes (including anonymous inner
  * classes) automatically capture their enclosing class's instance in
- * their serialized state.  This can lead to including much more than
- * intended in the serialized state of a {@code DoFn}, or even things
- * that aren't {@code Serializable}.
+ * their serialized state. This can lead to including much more than
+ * intended in the serialized state of a {@link DoFn}, or even things
+ * that aren't {@link Serializable}.
  *
  * <p>There are two ways to avoid unintended serialized state in a
- * {@code DoFn}:
+ * {@link DoFn}:
  *
  * <ul>
  *
- * <li> Define the {@code DoFn} as a named, static class.
+ * <li>Define the {@link DoFn} as a named, static class.
  *
- * <li> Define the {@code DoFn} as an anonymous inner class inside of
+ * <li>Define the {@link DoFn} as an anonymous inner class inside of
  * a static method.
  *
  * </ul>
  *
- * <p>Both these approaches ensure that there is no implicit enclosing
- * class instance serialized along with the {@code DoFn} instance.
+ * <p>Both of these approaches ensure that there is no implicit enclosing
+ * instance serialized along with the {@link DoFn} instance.
  *
  * <p>Prior to Java 8, any local variables of the enclosing
  * method referenced from within an anonymous inner class need to be
- * marked as {@code final}.  If defining the {@code DoFn} as a named
+ * marked as {@code final}. If defining the {@link DoFn} as a named
  * static class, such variables would be passed as explicit
  * constructor arguments and stored in explicit instance variables.
  *
  * <p>There are three main ways to initialize the state of a
- * {@code DoFn} instance processing a batch:
+ * {@link DoFn} instance processing a bundle:
  *
  * <ul>
  *
- * <li> Define instance variable state (including implicit instance
+ * <li>Define instance variable state (including implicit instance
  * variables holding final variables captured by an anonymous inner
- * class), initialized by the {@code DoFn}'s constructor (which is
- * implicit for an anonymous inner class).  This state will be
+ * class), initialized by the {@link DoFn}'s constructor (which is
+ * implicit for an anonymous inner class). This state will be
  * automatically serialized and then deserialized in the {@code DoFn}
- * instance created for each batch.  This method is good for state
+ * instance created for each bundle. This method is good for state
  * known when the original {@code DoFn} is created in the main
  * program, if it's not overly large.
  *
- * <li> Compute the state as a singleton {@code PCollection} and pass it
- * in as a side input to the {@code DoFn}.  This is good if the state
+ * <li>Compute the state as a singleton {@link PCollection} and pass it
+ * in as a side input to the {@link DoFn}. This is good if the state
  * needs to be computed by the pipeline, or if the state is very large
  * and so is best read from file(s) rather than sent as part of the
  * {@code DoFn}'s serialized state.
  *
- * <li> Initialize the state in each {@code DoFn} instance, in
- * {@link DoFn#startBundle}.  This is good if the initialization
+ * <li>Initialize the state in each {@link DoFn} instance, in
+ * {@link DoFn#startBundle}. This is good if the initialization
  * doesn't depend on any information known only by the main program or
  * computed by earlier pipeline operations, but is the same for all
- * instances of this {@code DoFn} for all program executions, say
+ * instances of this {@link DoFn} for all program executions, say
  * setting up empty caches or initializing constant data.
  *
  * </ul>
  *
  * <h2>No Global Shared State</h2>
  *
- * <p>{@code ParDo} operations are intended to be able to run in
- * parallel across multiple worker machines.  This precludes easy
- * sharing and updating mutable state across those machines.  There is
+ * <p>{@link ParDo} operations are intended to be able to run in
+ * parallel across multiple worker machines. This precludes easy
+ * sharing and updating mutable state across those machines. There is
  * no support in the Google Cloud Dataflow system for communicating
  * and synchronizing updates to shared state across worker machines,
  * so programs should not access any mutable static variable state in
- * their {@code DoFn}, without understanding that the Java processes
+ * their {@link DoFn}, without understanding that the Java processes
  * for the main program and workers will each have its own independent
  * copy of such state, and there won't be any automatic copying of
- * that state across Java processes.  All information should be
- * communicated to {@code DoFn} instances via main and side inputs and
+ * that state across Java processes. All information should be
+ * communicated to {@link DoFn} instances via main and side inputs and
  * serialized state, and all output should be communicated from a
- * {@code DoFn} instance via main and side outputs, in the absence of
+ * {@link DoFn} instance via main and side outputs, in the absence of
  * external communication mechanisms written by user code.
  *
  * <h2>Fault Tolerance</h2>
@@ -378,53 +392,53 @@
  * <p>In a distributed system, things can fail: machines can crash,
  * machines can be unable to communicate across the network, etc.
  * While individual failures are rare, the larger the job, the greater
- * the chance that something, somewhere, will fail.  The Google Cloud
+ * the chance that something, somewhere, will fail. The Google Cloud
  * Dataflow service strives to mask such failures automatically,
- * principally by retrying failed {@code DoFn} batches.  This means
- * that a {@code DoFn} instance might process a batch partially, then
+ * principally by retrying failed {@link DoFn} bundle. This means
+ * that a {@code DoFn} instance might process a bundle partially, then
  * crash for some reason, then be rerun (often on a different worker
- * machine) on that same batch and on the same elements as before.
- * Sometimes two or more {@code DoFn} instances will be running on the
- * same batch simultaneously, with the system taking the results of
- * the first instance to complete successfully.  Consequently, the
- * code in a {@code DoFn} needs to be written such that these
+ * machine) on that same bundle and on the same elements as before.
+ * Sometimes two or more {@link DoFn} instances will be running on the
+ * same bundle simultaneously, with the system taking the results of
+ * the first instance to complete successfully. Consequently, the
+ * code in a {@link DoFn} needs to be written such that these
  * duplicate (sequential or concurrent) executions do not cause
- * problems.  If the outputs of a {@code DoFn} are a pure function of
- * its inputs, then this requirement is satisfied.  However, if a
- * {@code DoFn}'s execution has external side-effects, say performing
- * updates to external HTTP services, then the {@code DoFn}'s code
+ * problems. If the outputs of a {@link DoFn} are a pure function of
+ * its inputs, then this requirement is satisfied. However, if a
+ * {@link DoFn DoFn's} execution has external side-effects, such as performing
+ * updates to external HTTP services, then the {@link DoFn DoFn's} code
  * needs to take care to ensure that those updates are idempotent and
- * that concurrent updates are acceptable.  This property can be
+ * that concurrent updates are acceptable. This property can be
  * difficult to achieve, so it is advisable to strive to keep
- * {@code DoFn}s as pure functions as much as possible.
+ * {@link DoFn DoFns} as pure functions as much as possible.
  *
  * <h2>Optimization</h2>
  *
  * <p>The Google Cloud Dataflow service automatically optimizes a
- * pipeline before it is executed.  A key optimization, <i>fusion</i>,
- * relates to ParDo operations.  If one ParDo operation produces a
- * PCollection that is then consumed as the main input of another
- * ParDo operation, the two ParDo operations will be <i>fused</i>
+ * pipeline before it is executed. A key optimization, <i>fusion</i>,
+ * relates to {@link ParDo} operations. If one {@link ParDo} operation produces a
+ * {@link PCollection} that is then consumed as the main input of another
+ * {@link ParDo} operation, the two {@link ParDo} operations will be <i>fused</i>
  * together into a single ParDo operation and run in a single pass;
- * this is "producer-consumer fusion".  Similarly, if
- * two or more ParDo operations have the same PCollection main input,
- * they will be fused into a single ParDo that makes just one pass
- * over the input PCollection; this is "sibling fusion".
+ * this is "producer-consumer fusion". Similarly, if
+ * two or more ParDo operations have the same {@link PCollection} main input,
+ * they will be fused into a single {@link ParDo} that makes just one pass
+ * over the input {@link PCollection}; this is "sibling fusion".
  *
  * <p>If after fusion there are no more unfused references to a
- * PCollection (e.g., one between a producer ParDo and a consumer
- * ParDo), the PCollection itself is "fused away" and won't ever be
+ * {@link PCollection} (e.g., one between a producer ParDo and a consumer
+ * {@link ParDo}), the {@link PCollection} itself is "fused away" and won't ever be
  * written to disk, saving all the I/O and space expense of
  * constructing it.
  *
  * <p>The Google Cloud Dataflow service applies fusion as much as
- * possible, greatly reducing the cost of executing pipelines.  As a
- * result, it is essentially "free" to write ParDo operations in a
- * very modular, composable style, each ParDo operation doing one
- * clear task, and stringing together sequences of ParDo operations to
- * get the desired overall effect.  Such programs can be easier to
+ * possible, greatly reducing the cost of executing pipelines. As a
+ * result, it is essentially "free" to write {@link ParDo} operations in a
+ * very modular, composable style, each {@link ParDo} operation doing one
+ * clear task, and stringing together sequences of {@link ParDo} operations to
+ * get the desired overall effect. Such programs can be easier to
  * understand, easier to unit-test, easier to extend and evolve, and
- * easier to reuse in new programs.  The predefined library of
+ * easier to reuse in new programs. The predefined library of
  * PTransforms that come with Google Cloud Dataflow makes heavy use of
  * this modular, composable style, trusting to the Google Cloud
  * Dataflow service's optimizer to "flatten out" all the compositions
@@ -436,44 +450,44 @@
 public class ParDo {
 
   /**
-   * Creates a {@code ParDo} {@code PTransform} with the given name.
+   * Creates a {@link ParDo} {@link PTransform} with the given name.
    *
-   * <p>See the discussion of Naming above for more explanation.
+   * <p>See the discussion of naming above for more explanation.
    *
-   * <p>The resulting {@code PTransform} is incomplete, and its
-   * input/output types are not yet bound.  Use
+   * <p>The resulting {@link PTransform} is incomplete, and its
+   * input/output types are not yet bound. Use
    * {@link ParDo.Unbound#of} to specify the {@link DoFn} to
    * invoke, which will also bind the input/output types of this
-   * {@code PTransform}.
+   * {@link PTransform}.
    */
   public static Unbound named(String name) {
     return new Unbound().named(name);
   }
 
   /**
-   * Creates a {@code ParDo} {@code PTransform} with the given
+   * Creates a {@link ParDo} {@link PTransform} with the given
    * side inputs.
    *
-   * <p>Side inputs are {@link PCollectionView}s, whose contents are
+   * <p>Side inputs are {@link PCollectionView PCollectionViews}, whose contents are
    * computed during pipeline execution and then made accessible to
-   * {@code DoFn} code via {@link DoFn.ProcessContext#sideInput sideInput}. Each
-   * invocation of the {@code DoFn} receives the same values for these
+   * {@link DoFn} code via {@link DoFn.ProcessContext#sideInput sideInput}. Each
+   * invocation of the {@link DoFn} receives the same values for these
    * side inputs.
    *
    * <p>See the discussion of Side Inputs above for more explanation.
    *
-   * <p>The resulting {@code PTransform} is incomplete, and its
-   * input/output types are not yet bound.  Use
+   * <p>The resulting {@link PTransform} is incomplete, and its
+   * input/output types are not yet bound. Use
    * {@link ParDo.Unbound#of} to specify the {@link DoFn} to
    * invoke, which will also bind the input/output types of this
-   * {@code PTransform}.
+   * {@link PTransform}.
    */
   public static Unbound withSideInputs(PCollectionView<?>... sideInputs) {
     return new Unbound().withSideInputs(sideInputs);
   }
 
   /**
-    * Creates a {@code ParDo} with the given side inputs.
+    * Creates a {@link ParDo} with the given side inputs.
     *
    * <p>Side inputs are {@link PCollectionView}s, whose contents are
    * computed during pipeline execution and then made accessible to
@@ -481,11 +495,11 @@ public static Unbound withSideInputs(PCollectionView<?>... sideInputs) {
    *
    * <p>See the discussion of Side Inputs above for more explanation.
    *
-   * <p>The resulting {@code PTransform} is incomplete, and its
-   * input/output types are not yet bound.  Use
+   * <p>The resulting {@link PTransform} is incomplete, and its
+   * input/output types are not yet bound. Use
    * {@link ParDo.Unbound#of} to specify the {@link DoFn} to
    * invoke, which will also bind the input/output types of this
-   * {@code PTransform}.
+   * {@link PTransform}.
    */
   public static Unbound withSideInputs(
       Iterable<? extends PCollectionView<?>> sideInputs) {
@@ -493,18 +507,18 @@ public static Unbound withSideInputs(
   }
 
   /**
-   * Creates a multi-output {@code ParDo} {@code PTransform} whose
+   * Creates a multi-output {@link ParDo} {@link PTransform} whose
    * output {@link PCollection}s will be referenced using the given main
    * output and side output tags.
    *
-   * <p>{@link TupleTag}s are used to name (with its static element
+   * <p>{@link TupleTag TupleTags} are used to name (with its static element
    * type {@code T}) each main and side output {@code PCollection<T>}.
-   * This {@code PTransform}'s {@link DoFn} emits elements to the main
-   * output {@code PCollection} as normal, using
-   * {@link DoFn.Context#output}.  The {@code DoFn} emits elements to
+   * This {@link PTransform PTransform's} {@link DoFn} emits elements to the main
+   * output {@link PCollection} as normal, using
+   * {@link DoFn.Context#output}. The {@link DoFn} emits elements to
    * a side output {@code PCollection} using
    * {@link DoFn.Context#sideOutput}, passing that side output's tag
-   * as an argument.  The result of invoking this {@code PTransform}
+   * as an argument. The result of invoking this {@link PTransform}
    * will be a {@link PCollectionTuple}, and any of the the main and
    * side output {@code PCollection}s can be retrieved from it via
    * {@link PCollectionTuple#get}, passing the output's tag as an
@@ -512,10 +526,10 @@ public static Unbound withSideInputs(
    *
    * <p>See the discussion of Side Outputs above for more explanation.
    *
-   * <p>The resulting {@code PTransform} is incomplete, and its input
-   * type is not yet bound.  Use {@link ParDo.UnboundMulti#of}
+   * <p>The resulting {@link PTransform} is incomplete, and its input
+   * type is not yet bound. Use {@link ParDo.UnboundMulti#of}
    * to specify the {@link DoFn} to invoke, which will also bind the
-   * input type of this {@code PTransform}.
+   * input type of this {@link PTransform}.
    */
   public static <OutputT> UnboundMulti<OutputT> withOutputTags(
       TupleTag<OutputT> mainOutputTag,
@@ -524,13 +538,13 @@ public static <OutputT> UnboundMulti<OutputT> withOutputTags(
   }
 
   /**
-   * Creates a {@code ParDo} {@code PTransform} that will invoke the
+   * Creates a {@link ParDo} {@link PTransform} that will invoke the
    * given {@link DoFn} function.
    *
-   * <p>The resulting {@code PTransform}'s types have been bound, with the
+   * <p>The resulting {@link PTransform PTransform's} types have been bound, with the
    * input being a {@code PCollection<InputT>} and the output a
    * {@code PCollection<OutputT>}, inferred from the types of the argument
-   * {@code DoFn<InputT, OutputT>}.  It is ready to be applied, or further
+   * {@code DoFn<InputT, OutputT>}. It is ready to be applied, or further
    * properties can be set on it first.
    */
   public static <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT> fn) {
@@ -543,13 +557,13 @@ public static <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT>
   }
 
   /**
-   * Creates a {@code ParDo} {@code PTransform} that will invoke the
+   * Creates a {@link ParDo} {@link PTransform} that will invoke the
    * given {@link DoFnWithContext} function.
    *
-   * <p>The resulting {@code PTransform}'s types have been bound, with the
+   * <p>The resulting {@link PTransform PTransform's} types have been bound, with the
    * input being a {@code PCollection<InputT>} and the output a
    * {@code PCollection<OutputT>}, inferred from the types of the argument
-   * {@code DoFn<InputT, OutputT>}.  It is ready to be applied, or further
+   * {@code DoFn<InputT, OutputT>}. It is ready to be applied, or further
    * properties can be set on it first.
    *
    * <p>{@link DoFnWithContext} is an experimental alternative to
@@ -561,11 +575,11 @@ public static <InputT, OutputT> Bound<InputT, OutputT> of(DoFnWithContext<InputT
   }
 
   /**
-   * An incomplete {@code ParDo} transform, with unbound input/output types.
+   * An incomplete {@link ParDo} transform, with unbound input/output types.
    *
    * <p>Before being applied, {@link ParDo.Unbound#of} must be
    * invoked to specify the {@link DoFn} to invoke, which will also
-   * bind the input/output types of this {@code PTransform}.
+   * bind the input/output types of this {@link PTransform}.
    */
   public static class Unbound {
     private final String name;
@@ -581,18 +595,18 @@ public static class Unbound {
     }
 
     /**
-     * Returns a new {@code ParDo} transform that's like this
-     * transform but with the specified name.  Does not modify this
-     * transform.  The resulting transform is still incomplete.
+     * Returns a new {@link ParDo} transform that's like this
+     * transform but with the specified name. Does not modify this
+     * transform. The resulting transform is still incomplete.
      *
-     * <p>See the discussion of Naming above for more explanation.
+     * <p>See the discussion of naming above for more explanation.
      */
     public Unbound named(String name) {
       return new Unbound(name, sideInputs);
     }
 
     /**
-     * Returns a new {@code ParDo} transform that's like this
+     * Returns a new {@link ParDo} transform that's like this
      * transform but with the specified side inputs.
      * Does not modify this transform. The resulting transform is
      * still incomplete.
@@ -605,9 +619,9 @@ public Unbound withSideInputs(PCollectionView<?>... sideInputs) {
     }
 
     /**
-     * Returns a new {@code ParDo} transform that's like this
-     * transform but with the specified side inputs.  Does not modify
-     * this transform.  The resulting transform is still incomplete.
+     * Returns a new {@link ParDo} transform that is like this
+     * transform but with the specified side inputs. Does not modify
+     * this transform. The resulting transform is still incomplete.
      *
      * <p>See the discussion of Side Inputs above and on
      * {@link ParDo#withSideInputs} for more explanation.
@@ -618,9 +632,9 @@ public Unbound withSideInputs(
     }
 
     /**
-     * Returns a new multi-output {@code ParDo} transform that's like
+     * Returns a new multi-output {@link ParDo} transform that's like
      * this transform but with the specified main and side output
-     * tags.  Does not modify this transform.  The resulting transform
+     * tags. Does not modify this transform. The resulting transform
      * is still incomplete.
      *
      * <p>See the discussion of Side Outputs above and on
@@ -633,10 +647,10 @@ public <OutputT> UnboundMulti<OutputT> withOutputTags(TupleTag<OutputT> mainOutp
     }
 
     /**
-     * Returns a new {@code ParDo} {@code PTransform} that's like this
+     * Returns a new {@link ParDo} {@link PTransform} that's like this
      * transform but that will invoke the given {@link DoFn}
-     * function, and that has its input and output types bound.  Does
-     * not modify this transform.  The resulting {@code PTransform} is
+     * function, and that has its input and output types bound. Does
+     * not modify this transform. The resulting {@link PTransform} is
      * sufficiently specified to be applied, but more properties can
      * still be specified.
      */
@@ -645,10 +659,10 @@ public <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT> fn) {
     }
 
     /**
-     * Returns a new {@code ParDo} {@code PTransform} that's like this
+     * Returns a new {@link ParDo} {@link PTransform} that's like this
      * transform but which will invoke the given {@link DoFnWithContext}
-     * function, and which has its input and output types bound.  Does
-     * not modify this transform.  The resulting {@code PTransform} is
+     * function, and which has its input and output types bound. Does
+     * not modify this transform. The resulting {@link PTransform} is
      * sufficiently specified to be applied, but more properties can
      * still be specified.
      */
@@ -658,7 +672,7 @@ public <InputT, OutputT> Bound<InputT, OutputT> of(DoFnWithContext<InputT, Outpu
   }
 
   /**
-   * A {@code PTransform} that, when applied to a {@code PCollection<InputT>},
+   * A {@link PTransform} that, when applied to a {@code PCollection<InputT>},
    * invokes a user-specified {@code DoFn<InputT, OutputT>} on all its elements,
    * with all its outputs collected into an output
    * {@code PCollection<OutputT>}.
@@ -666,8 +680,8 @@ public <InputT, OutputT> Bound<InputT, OutputT> of(DoFnWithContext<InputT, Outpu
    * <p>A multi-output form of this transform can be created with
    * {@link ParDo.Bound#withOutputTags}.
    *
-   * @param <InputT> the type of the (main) input {@code PCollection} elements
-   * @param <OutputT> the type of the (main) output {@code PCollection} elements
+   * @param <InputT> the type of the (main) input {@link PCollection} elements
+   * @param <OutputT> the type of the (main) output {@link PCollection} elements
    */
   public static class Bound<InputT, OutputT>
       extends PTransform<PCollection<? extends InputT>, PCollection<OutputT>> {
@@ -684,9 +698,9 @@ public static class Bound<InputT, OutputT>
     }
 
     /**
-     * Returns a new {@code ParDo} {@code PTransform} that's like this
-     * {@code PTransform} but with the specified name.  Does not
-     * modify this {@code PTransform}.
+     * Returns a new {@link ParDo} {@link PTransform} that's like this
+     * {@link PTransform} but with the specified name. Does not
+     * modify this {@link PTransform}.
      *
      * <p>See the discussion of Naming above for more explanation.
      */
@@ -695,9 +709,9 @@ public Bound<InputT, OutputT> named(String name) {
     }
 
     /**
-     * Returns a new {@code ParDo} {@code PTransform} that's like this
-     * {@code PTransform} but with the specified side inputs.  Does not
-     * modify this {@code PTransform}.
+     * Returns a new {@link ParDo} {@link PTransform} that's like this
+     * {@link PTransform} but with the specified side inputs. Does not
+     * modify this {@link PTransform}.
      *
      * <p>See the discussion of Side Inputs above and on
      * {@link ParDo#withSideInputs} for more explanation.
@@ -707,9 +721,9 @@ public Bound<InputT, OutputT> withSideInputs(PCollectionView<?>... sideInputs) {
     }
 
     /**
-     * Returns a new {@code ParDo} {@code PTransform} that's like this
-     * {@code PTransform} but with the specified side inputs.  Does not
-     * modify this {@code PTransform}.
+     * Returns a new {@link ParDo} {@link PTransform} that's like this
+     * {@link PTransform} but with the specified side inputs. Does not
+     * modify this {@link PTransform}.
      *
      * <p>See the discussion of Side Inputs above and on
      * {@link ParDo#withSideInputs} for more explanation.
@@ -720,9 +734,9 @@ public Bound<InputT, OutputT> withSideInputs(
     }
 
     /**
-     * Returns a new multi-output {@code ParDo} {@code PTransform}
-     * that's like this {@code PTransform} but with the specified main
-     * and side output tags.  Does not modify this {@code PTransform}.
+     * Returns a new multi-output {@link ParDo} {@link PTransform}
+     * that's like this {@link PTransform} but with the specified main
+     * and side output tags. Does not modify this {@link PTransform}.
      *
      * <p>See the discussion of Side Outputs above and on
      * {@link ParDo#withOutputTags} for more explanation.
@@ -772,12 +786,12 @@ public List<PCollectionView<?>> getSideInputs() {
   }
 
   /**
-   * An incomplete multi-output {@code ParDo} transform, with unbound
+   * An incomplete multi-output {@link ParDo} transform, with unbound
    * input type.
    *
    * <p>Before being applied, {@link ParDo.UnboundMulti#of} must be
    * invoked to specify the {@link DoFn} to invoke, which will also
-   * bind the input type of this {@code PTransform}.
+   * bind the input type of this {@link PTransform}.
    *
    * @param <OutputT> the type of the main output {@code PCollection} elements
    */
@@ -798,9 +812,9 @@ public static class UnboundMulti<OutputT> {
     }
 
     /**
-     * Returns a new multi-output {@code ParDo} transform that's like
-     * this transform but with the specified name.  Does not modify
-     * this transform.  The resulting transform is still incomplete.
+     * Returns a new multi-output {@link ParDo} transform that's like
+     * this transform but with the specified name. Does not modify
+     * this transform. The resulting transform is still incomplete.
      *
      * <p>See the discussion of Naming above for more explanation.
      */
@@ -810,9 +824,9 @@ public UnboundMulti<OutputT> named(String name) {
     }
 
     /**
-     * Returns a new multi-output {@code ParDo} transform that's like
-     * this transform but with the specified side inputs.  Does not
-     * modify this transform.  The resulting transform is still
+     * Returns a new multi-output {@link ParDo} transform that's like
+     * this transform but with the specified side inputs. Does not
+     * modify this transform. The resulting transform is still
      * incomplete.
      *
      * <p>See the discussion of Side Inputs above and on
@@ -824,9 +838,9 @@ public UnboundMulti<OutputT> withSideInputs(
     }
 
     /**
-     * Returns a new multi-output {@code ParDo} transform that's like
-     * this transform but with the specified side inputs.  Does not
-     * modify this transform.  The resulting transform is still
+     * Returns a new multi-output {@link ParDo} transform that's like
+     * this transform but with the specified side inputs. Does not
+     * modify this transform. The resulting transform is still
      * incomplete.
      *
      * <p>See the discussion of Side Inputs above and on
@@ -840,11 +854,11 @@ public UnboundMulti<OutputT> withSideInputs(
     }
 
     /**
-     * Returns a new multi-output {@code ParDo} {@code PTransform}
+     * Returns a new multi-output {@link ParDo} {@link PTransform}
      * that's like this transform but that will invoke the given
      * {@link DoFn} function, and that has its input type bound.
-     * Does not modify this transform.  The resulting
-     * {@code PTransform} is sufficiently specified to be applied, but
+     * Does not modify this transform. The resulting
+     * {@link PTransform} is sufficiently specified to be applied, but
      * more properties can still be specified.
      */
     public <InputT> BoundMulti<InputT, OutputT> of(DoFn<InputT, OutputT> fn) {
@@ -853,11 +867,11 @@ public <InputT> BoundMulti<InputT, OutputT> of(DoFn<InputT, OutputT> fn) {
     }
 
     /**
-     * Returns a new multi-output {@code ParDo} {@code PTransform}
+     * Returns a new multi-output {@link ParDo} {@link PTransform}
      * that's like this transform but which will invoke the given
      * {@link DoFnWithContext} function, and which has its input type bound.
-     * Does not modify this transform.  The resulting
-     * {@code PTransform} is sufficiently specified to be applied, but
+     * Does not modify this transform. The resulting
+     * {@link PTransform} is sufficiently specified to be applied, but
      * more properties can still be specified.
      */
     public <InputT> BoundMulti<InputT, OutputT> of(DoFnWithContext<InputT, OutputT> fn) {
@@ -866,10 +880,10 @@ public <InputT> BoundMulti<InputT, OutputT> of(DoFnWithContext<InputT, OutputT>
   }
 
   /**
-   * A {@code PTransform} that, when applied to a
+   * A {@link PTransform} that, when applied to a
    * {@code PCollection<InputT>}, invokes a user-specified
    * {@code DoFn<InputT, OutputT>} on all its elements, which can emit elements
-   * to any of the {@code PTransform}'s main and side output
+   * to any of the {@link PTransform}'s main and side output
    * {@code PCollection}s, which are bundled into a result
    * {@code PCollectionTuple}.
    *
@@ -897,9 +911,9 @@ public static class BoundMulti<InputT, OutputT>
     }
 
     /**
-     * Returns a new multi-output {@code ParDo} {@code PTransform}
-     * that's like this {@code PTransform} but with the specified
-     * name.  Does not modify this {@code PTransform}.
+     * Returns a new multi-output {@link ParDo} {@link PTransform}
+     * that's like this {@link PTransform} but with the specified
+     * name. Does not modify this {@link PTransform}.
      *
      * <p>See the discussion of Naming above for more explanation.
      */
@@ -909,9 +923,9 @@ public BoundMulti<InputT, OutputT> named(String name) {
     }
 
     /**
-     * Returns a new multi-output {@code ParDo} {@code PTransform}
-     * that's like this {@code PTransform} but with the specified side
-     * inputs.  Does not modify this {@code PTransform}.
+     * Returns a new multi-output {@link ParDo} {@link PTransform}
+     * that's like this {@link PTransform} but with the specified side
+     * inputs. Does not modify this {@link PTransform}.
      *
      * <p>See the discussion of Side Inputs above and on
      * {@link ParDo#withSideInputs} for more explanation.
@@ -922,9 +936,9 @@ public BoundMulti<InputT, OutputT> withSideInputs(
     }
 
     /**
-     * Returns a new multi-output {@code ParDo} {@code PTransform}
-     * that's like this {@code PTransform} but with the specified side
-     * inputs.  Does not modify this {@code PTransform}.
+     * Returns a new multi-output {@link ParDo} {@link PTransform}
+     * that's like this {@link PTransform} but with the specified side
+     * inputs. Does not modify this {@link PTransform}.
      *
      * <p>See the discussion of Side Inputs above and on
      * {@link ParDo#withSideInputs} for more explanation.

From 23d96f3cd3a185c8d9429903c3f0be0a86daf064 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 21 Dec 2015 15:01:58 -0800
Subject: [PATCH 1244/1541] Avoid integer overflow when computing maximum cache
 weight

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110719783
---
 .../cloud/dataflow/sdk/runners/worker/DataflowWorker.java       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index e3536a92a5273..65c3f41728f33 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -115,7 +115,7 @@ public class DataflowWorker {
    */
   private static final int OVERHEAD_WEIGHT = 8;
 
-  private static final int MEGABYTES = 1024 * 1024;
+  private static final long MEGABYTES = 1024 * 1024;
 
   public static final int DEFAULT_STATUS_PORT = 18081;
 

From 61bd0816f743abddf78bc4d6bf9a8cfdcb6ac12d Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 21 Dec 2015 15:27:13 -0800
Subject: [PATCH 1245/1541] Enable state caching for per-key windmill state

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110721947
---
 .../worker/CachingSideInputReader.java        |  33 +-
 .../worker/DataflowSideInputReader.java       |  14 +-
 .../sdk/runners/worker/DataflowWorker.java    |  10 +-
 .../worker/StreamingDataflowWorker.java       |  14 +-
 .../worker/StreamingModeExecutionContext.java |  27 +-
 .../{SizedWeigher.java => Weighers.java}      |  34 +-
 .../runners/worker/WindmillStateCache.java    | 292 ++++++++
 .../worker/WindmillStateInternals.java        | 630 +++++++++++-------
 .../runners/worker/WindmillStateReader.java   |  42 +-
 .../sdk/transforms/ApproximateQuantiles.java  |  28 +-
 .../cloud/dataflow/sdk/util/Weighted.java     |  27 +
 ...ader.java => WeightedSideInputReader.java} |  18 +-
 .../util/{Sized.java => WeightedValue.java}   |  19 +-
 .../util/state/InMemoryStateInternals.java    |   2 +-
 .../sdk/util/state/MergingStateInternals.java |   2 +-
 .../sdk/util/state/StateNamespace.java        |   9 +
 .../sdk/util/state/StateNamespaceForTest.java |   5 +
 .../sdk/util/state/StateNamespaces.java       |  21 +
 .../dataflow/sdk/util/state/StateTag.java     |   4 +-
 .../dataflow/sdk/util/state/StateTags.java    |   9 +-
 .../cloud/dataflow/sdk/DataflowMatchers.java  |  65 ++
 .../runners/dataflow/CustomSourcesTest.java   |   5 +-
 .../worker/CachingSideInputReaderTest.java    |  33 +-
 .../worker/DataflowSideInputReaderTest.java   |  16 +-
 .../StreamingModeExecutionContextTest.java    |  13 +-
 ...ava => WeightedDirectSideInputReader.java} |  33 +-
 .../worker/WindmillStateCacheTest.java        | 210 ++++++
 .../worker/WindmillStateInternalsTest.java    | 265 +++++---
 .../dataflow/sdk/util/ApiSurfaceTest.java     |   2 +-
 29 files changed, 1376 insertions(+), 506 deletions(-)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{SizedWeigher.java => Weighers.java} (61%)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Weighted.java
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/util/{SizedSideInputReader.java => WeightedSideInputReader.java} (73%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/util/{Sized.java => WeightedValue.java} (67%)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/DataflowMatchers.java
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/{SizedDirectSideInputReader.java => WeightedDirectSideInputReader.java} (60%)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCacheTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java
index b1dd17efbdd33..afd77f0666cee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java
@@ -19,8 +19,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.PCollectionViewWindow;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.Sized;
-import com.google.cloud.dataflow.sdk.util.SizedSideInputReader;
+import com.google.cloud.dataflow.sdk.util.WeightedSideInputReader;
+import com.google.cloud.dataflow.sdk.util.WeightedValue;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.common.cache.Cache;
 
@@ -37,19 +37,19 @@
  * with a {@code Cache} created by anything other than the SDK.
  */
 final class CachingSideInputReader
-    extends SizedSideInputReader.Defaults
-    implements SizedSideInputReader {
-  private final SizedSideInputReader subReader;
-  private final Cache<PCollectionViewWindow<?>, Sized<Object>> cache;
+    extends WeightedSideInputReader.Defaults
+    implements WeightedSideInputReader {
+  private final WeightedSideInputReader subReader;
+  private final Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache;
 
-  private CachingSideInputReader(
-      SizedSideInputReader subReader, Cache<PCollectionViewWindow<?>, Sized<Object>> cache) {
+  private CachingSideInputReader(WeightedSideInputReader subReader,
+      Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache) {
     this.subReader = subReader;
     this.cache = cache;
   }
 
-  public static CachingSideInputReader of(
-      SizedSideInputReader subReader, Cache<PCollectionViewWindow<?>, Sized<Object>> cache) {
+  public static CachingSideInputReader of(WeightedSideInputReader subReader,
+      Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache) {
     return new CachingSideInputReader(subReader, cache);
   }
 
@@ -64,24 +64,25 @@ public boolean isEmpty() {
   }
 
   @Override
-  public <T> Sized<T> getSized(
+  public <T> WeightedValue<T> getWeighted(
       final PCollectionView<T> view, final BoundedWindow window) {
     PCollectionViewWindow<T> cacheKey = PCollectionViewWindow.of(view, window);
 
       try {
         @SuppressWarnings("unchecked") // safely uncasting the thing from the callback
-        Sized<T> sideInputContents = (Sized<T>) cache.get(cacheKey,
-            new Callable<Sized<Object>>() {
+        WeightedValue<T> sideInputContents = (WeightedValue<T>) cache.get(cacheKey,
+            new Callable<WeightedValue<Object>>() {
               @Override
-              public Sized<Object> call() {
+              public WeightedValue<Object> call() {
                 @SuppressWarnings("unchecked") // safe covariant cast
-                Sized<Object> value = (Sized<Object>) subReader.getSized(view, window);
+                WeightedValue<Object> value =
+                    (WeightedValue<Object>) subReader.getWeighted(view, window);
                 return value;
               }
             });
         return sideInputContents;
       } catch (ExecutionException checkedException) {
-        // The call to subReader.getSized() is not permitted to throw any checked exceptions,
+        // The call to subReader.getWeighted() is not permitted to throw any checked exceptions,
         // so the Callable created above should not throw any either.
         throw new RuntimeException("Unexpected checked exception.", checkedException.getCause());
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
index 545f32bacad43..ed1d702791ab1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
@@ -23,8 +23,8 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.Sized;
-import com.google.cloud.dataflow.sdk.util.SizedSideInputReader;
+import com.google.cloud.dataflow.sdk.util.WeightedSideInputReader;
+import com.google.cloud.dataflow.sdk.util.WeightedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -45,8 +45,8 @@
  * amount of data for each access.
  */
 public class DataflowSideInputReader
-    extends SizedSideInputReader.Defaults
-    implements SizedSideInputReader {
+    extends WeightedSideInputReader.Defaults
+    implements WeightedSideInputReader {
 
   /** An observer for each side input to count its size as it is being read. */
   private final Map<TupleTag<Object>, ByteSizeObserver> observers;
@@ -110,7 +110,7 @@ public boolean isEmpty() {
    * the value for the appropriate window.
    */
   @Override
-  public <T> Sized<T> getSized(PCollectionView<T> view, final BoundedWindow window) {
+  public <T> WeightedValue<T> getWeighted(PCollectionView<T> view, final BoundedWindow window) {
     final TupleTag<Iterable<WindowedValue<?>>> tag = view.getTagInternal();
     if (!sideInputValues.has(tag)) {
       throw new IllegalArgumentException("calling getSideInput() with unknown view");
@@ -125,7 +125,7 @@ public <T> Sized<T> getSized(PCollectionView<T> view, final BoundedWindow window
       value = view.fromIterableInternal(sideInputValues.get(tag));
       long bytesRead = observer.getBytes();
       observer.reset();
-      return Sized.of(value, overhead + bytesRead);
+      return WeightedValue.of(value, overhead + bytesRead);
     } else {
       final long[] sum = new long[]{ 0L };
       value = view.fromIterableInternal(
@@ -142,7 +142,7 @@ public boolean apply(WindowedValue<?> element) {
                     return containsWindow;
                   }
                 }));
-      return Sized.of(value, overhead + sum[0]);
+      return WeightedValue.of(value, overhead + sum[0]);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 65c3f41728f33..5ade506440ab9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -39,8 +39,8 @@
 import com.google.cloud.dataflow.sdk.util.CloudMetricUtils;
 import com.google.cloud.dataflow.sdk.util.PCollectionViewWindow;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.Sized;
 import com.google.cloud.dataflow.sdk.util.UserCodeException;
+import com.google.cloud.dataflow.sdk.util.WeightedValue;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.Metric;
@@ -97,7 +97,7 @@ public class DataflowWorker {
   /**
    * A side input cache shared between all execution contexts.
    */
-  private final Cache<PCollectionViewWindow<?>, Sized<Object>> sideInputCache;
+  private final Cache<PCollectionViewWindow<?>, WeightedValue<Object>> sideInputCache;
 
   /**
    * Status server returning health of worker.
@@ -124,7 +124,7 @@ public DataflowWorker(WorkUnitClient workUnitClient, DataflowWorkerHarnessOption
     this.options = options;
     this.sideInputCache = CacheBuilder.newBuilder()
         .maximumWeight(options.getWorkerCacheMb() * MEGABYTES) // weights are in bytes
-        .weigher(SizedWeigher.<PCollectionViewWindow<?>, Object>withBaseWeight(OVERHEAD_WEIGHT))
+        .weigher(Weighers.fixedWeightKeys(OVERHEAD_WEIGHT))
         .softValues()
         .build();
   }
@@ -421,11 +421,11 @@ public abstract WorkItemServiceState reportWorkItemStatus(WorkItemStatus workIte
    */
   private static class DataflowWorkerExecutionContext extends BatchModeExecutionContext {
 
-    private final Cache<PCollectionViewWindow<?>, Sized<Object>> cache;
+    private final Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache;
     private final PipelineOptions options;
 
     public DataflowWorkerExecutionContext(
-        Cache<PCollectionViewWindow<?>, Sized<Object>> cache, PipelineOptions options) {
+        Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache, PipelineOptions options) {
       super(options);
       this.cache = cache;
       this.options = options;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 81bab4e2402c9..121cf16de9f00 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -221,6 +221,8 @@ public ReaderCacheEntry(UnboundedSource.UnboundedReader<?> reader, long token) {
   private ConcurrentMap<String, String> stateNameMap;
   private ConcurrentMap<String, String> systemNameToComputationIdMap;
 
+  private WindmillStateCache stateCache = new WindmillStateCache();
+
   private ThreadFactory threadFactory;
   private BoundedQueueExecutor workUnitExecutor;
   private ExecutorService commitExecutor;
@@ -479,8 +481,8 @@ private void process(final String computation, final MapTask mapTask,
       WorkerAndContext workerAndContext = mapTaskExecutors.get(computation).poll();
       if (workerAndContext == null) {
         CounterSet counters = new CounterSet();
-        context = new StreamingModeExecutionContext(
-            mapTask.getSystemName(), readerCache.get(computation), stateNameMap);
+        context = new StreamingModeExecutionContext(mapTask.getSystemName(),
+            readerCache.get(computation), stateNameMap, stateCache.forComputation(computation));
         StateSampler sampler =
             new StateSampler(mapTask.getStageName() + "-", counters.getAddCounterMutator());
         // In streaming mode, state samplers are long lived. So here a unique id is generated as
@@ -943,6 +945,8 @@ public void handle(
         printThreads(responseWriter);
       } else if (target.equals("/heapz")) {
         dumpHeap(responseWriter);
+      } else if (target.equals("/cachez")) {
+        stateCache.printDetailedHtml(responseWriter);
       } else {
         printHeader(responseWriter);
         printResources(responseWriter);
@@ -978,6 +982,11 @@ private void printMetrics(PrintWriter response) {
       response.println("</li>");
     }
     response.println("</ul>");
+
+    stateCache.printSummaryHtml(response);
+
+    metricTrackingWindmillServer.printHtml(response);
+
     response.println("Active Keys: <ul>");
     for (Map.Entry<String, ActiveWorkForComputation> computationEntry
              : activeWorkMap.entrySet()) {
@@ -988,7 +997,6 @@ private void printMetrics(PrintWriter response) {
       response.println("</li>");
     }
     response.println("</ul>");
-    metricTrackingWindmillServer.printHtml(response);
   }
 
   private void printResources(PrintWriter response) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
index 116d3c6a7dc5d..38e005e0f24bd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
@@ -65,6 +65,7 @@ public class StreamingModeExecutionContext
   // Per-key cache of active Reader objects in use by this process.
   private final ConcurrentMap<ByteString, ReaderCacheEntry> readerCache;
   private final ConcurrentMap<String, String> stateNameMap;
+  private final WindmillStateCache.ForComputation stateCache;
 
   private Windmill.WorkItem work;
   @Nullable private Instant inputDataWatermark;
@@ -74,14 +75,14 @@ public class StreamingModeExecutionContext
   private Windmill.WorkItemCommitRequest.Builder outputBuilder;
   private UnboundedSource.UnboundedReader<?> activeReader;
 
-  public StreamingModeExecutionContext(
-      String stageName,
+  public StreamingModeExecutionContext(String stageName,
       ConcurrentMap<ByteString, ReaderCacheEntry> readerCache,
-      ConcurrentMap<String, String> stateNameMap) {
+      ConcurrentMap<String, String> stateNameMap, WindmillStateCache.ForComputation stateCache) {
     this.stageName = stageName;
     this.sideInputCache = new HashMap<>();
     this.readerCache = readerCache;
     this.stateNameMap = stateNameMap;
+    this.stateCache = stateCache;
   }
 
   public void start(
@@ -380,9 +381,10 @@ private Windmill.Timer.Type timerType(TimeDomain domain) {
   }
 
   class StepContext extends BaseExecutionContext.StepContext {
+    private static final String DEFAULT_STATE_FAMILY = "";
+
     private WindmillStateInternals stateInternals;
     private WindmillTimerInternals timerInternals;
-    private final String prefix;
     private final String stateFamily;
     private final Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
 
@@ -390,14 +392,9 @@ public StepContext(
         final String stepName, String transformName, final StateSampler stateSampler) {
       super(StreamingModeExecutionContext.this, stepName, transformName);
 
-      if (stateNameMap.isEmpty()) {
-        this.prefix = transformName;
-        this.stateFamily = "";
-      } else {
-        String mappedName = stateNameMap.get(transformName);
-        this.prefix = mappedName == null ? "" : mappedName;
-        this.stateFamily = prefix;
-      }
+      String mappedName = stateNameMap.get(transformName);
+      this.stateFamily = mappedName == null ? DEFAULT_STATE_FAMILY : mappedName;
+
       this.scopedReadStateSupplier = new Supplier<StateSampler.ScopedState>() {
         private int readState = -1;  // Uninitialized value.
 
@@ -420,9 +417,9 @@ public StateSampler.ScopedState get() {
     public void start(
         WindmillStateReader stateReader, @Nullable Instant inputDataWatermark,
         @Nullable Instant outputDataWatermark) {
-      boolean useStateFamilies = !stateNameMap.isEmpty();
-      this.stateInternals = new WindmillStateInternals(
-          prefix, useStateFamilies, stateReader, scopedReadStateSupplier);
+      this.stateInternals = new WindmillStateInternals(stateFamily, stateReader,
+          stateCache.forKey(getSerializedKey(), stateFamily, getWork().getCacheToken()),
+          scopedReadStateSupplier);
       this.timerInternals =
           new WindmillTimerInternals(stateFamily, inputDataWatermark, outputDataWatermark);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SizedWeigher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/Weighers.java
similarity index 61%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SizedWeigher.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/Weighers.java
index 9b4b246beac22..5bafca766e2c8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SizedWeigher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/Weighers.java
@@ -16,8 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import com.google.cloud.dataflow.sdk.util.Sized;
-import com.google.common.base.Preconditions;
+import com.google.cloud.dataflow.sdk.util.Weighted;
 import com.google.common.cache.Weigher;
 
 /**
@@ -28,23 +27,22 @@
  * <p>Package-private here so that the dependency on Guava does not leak into the public API
  * surface.
  */
-class SizedWeigher<K, V> implements Weigher<K, Sized<V>>{
-
-  public static <K, V> SizedWeigher<K, V> withBaseWeight(int baseWeight) {
-    return new SizedWeigher<>(baseWeight);
-  }
-
-  private final int baseWeight;
-
-  private SizedWeigher(int baseWeight) {
-    Preconditions.checkArgument(
-        baseWeight > 0,
-        "base weight for SizedWeigher must be positive");
-    this.baseWeight = baseWeight;
+class Weighers {
+  public static Weigher<Object, Weighted> fixedWeightKeys(final int keyWeight) {
+    return new Weigher<Object, Weighted>() {
+      @Override
+      public int weigh(Object key, Weighted value) {
+        return (int) Math.min(keyWeight + value.getWeight(), Integer.MAX_VALUE);
+      }
+    };
   }
 
-  @Override
-  public int weigh(K key, Sized<V> value) {
-    return baseWeight + (int) Math.min(value.getSize(), Integer.MAX_VALUE);
+  public static Weigher<Weighted, Weighted> weightedKeysAndValues() {
+    return new Weigher<Weighted, Weighted>() {
+      @Override
+      public int weigh(Weighted key, Weighted value) {
+        return (int) Math.min(key.getWeight() + value.getWeight(), Integer.MAX_VALUE);
+      }
+    };
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java
new file mode 100644
index 0000000000000..5eaefa2445be5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java
@@ -0,0 +1,292 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.util.Weighted;
+import com.google.cloud.dataflow.sdk.util.state.State;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.RemovalCause;
+import com.google.common.cache.RemovalListener;
+import com.google.common.cache.RemovalNotification;
+import com.google.common.cache.Weigher;
+import com.google.protobuf.ByteString;
+
+import java.io.PrintWriter;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * Process-wide cache of per-key state.
+ */
+public class WindmillStateCache {
+
+  private Cache<StateId, StateCacheEntry> stateCache;
+  private int weight = 0;
+
+  private static class CacheWeigher implements Weigher<Weighted, Weighted> {
+    @Override
+    public int weigh(Weighted key, Weighted value) {
+      return (int) Math.max(key.getWeight() + value.getWeight(), Integer.MAX_VALUE);
+    }
+  }
+
+  public WindmillStateCache() {
+    final Weigher<Weighted, Weighted> weigher = Weighers.weightedKeysAndValues();
+
+    stateCache =
+        CacheBuilder.newBuilder()
+        .maximumWeight(100000000 /* 100 MB */)
+        .recordStats()
+        .weigher(weigher)
+        .removalListener(new RemovalListener<StateId, StateCacheEntry>() {
+              @Override
+              public void onRemoval(RemovalNotification<StateId, StateCacheEntry> removal) {
+                if (removal.getCause() != RemovalCause.REPLACED) {
+                  weight -= weigher.weigh(removal.getKey(), removal.getValue());
+                }
+              }
+            })
+        .build();
+  }
+
+  public long getWeight() {
+    return weight;
+  }
+
+  /**
+   * Per-computation view of the state cache.
+   */
+  public class ForComputation {
+    private final String computation;
+    private ForComputation(String computation) {
+      this.computation = computation;
+    }
+
+    /**
+     * Returns a per-computation, per-key view of the state cache.
+     */
+    public ForKey forKey(ByteString key, String stateFamily, long cacheToken) {
+      return new ForKey(computation, key, stateFamily, cacheToken);
+    }
+  }
+
+  /**
+   * Per-computation, per-key view of the state cache.
+   */
+  public class ForKey {
+    private final String computation;
+    private final ByteString key;
+    private final String stateFamily;
+    private final long cacheToken;
+
+    private ForKey(String computation, ByteString key, String stateFamily, long cacheToken) {
+      this.computation = computation;
+      this.key = key;
+      this.stateFamily = stateFamily;
+      this.cacheToken = cacheToken;
+    }
+
+    public <T extends State> T get(StateNamespace namespace, StateTag<T> address) {
+      return WindmillStateCache.this.get(
+          computation, key, stateFamily, cacheToken, namespace, address);
+    }
+
+    public <T extends State> void put(
+        StateNamespace namespace, StateTag<T> address, T value, long weight) {
+      WindmillStateCache.this.put(
+          computation, key, stateFamily, cacheToken, namespace, address, value, weight);
+    }
+  }
+
+  /**
+   * Returns a per-computation view of the state cache.
+   */
+  public ForComputation forComputation(String computation) {
+    return new ForComputation(computation);
+  }
+
+  private <T extends State> T get(String computation, ByteString processingKey, String stateFamily,
+      long token, StateNamespace namespace, StateTag<T> address) {
+    StateId id = new StateId(computation, processingKey, stateFamily, namespace);
+    StateCacheEntry entry = stateCache.getIfPresent(id);
+    if (entry == null) {
+      return null;
+    }
+    if (entry.getToken() != token) {
+      stateCache.invalidate(id);
+      return null;
+    }
+    return entry.get(namespace, address);
+  }
+
+  private <T extends State> void put(String computation, ByteString processingKey,
+      String stateFamily, long token, StateNamespace namespace, StateTag<T> address, T value,
+      long weight) {
+    StateId id = new StateId(computation, processingKey, stateFamily, namespace);
+    StateCacheEntry entry = stateCache.getIfPresent(id);
+    if (entry == null || entry.getToken() != token) {
+      entry = new StateCacheEntry(token);
+      this.weight += id.getWeight();
+    }
+    this.weight += entry.put(namespace, address, value, weight);
+    // Always add back to the cache to update the weight.
+    stateCache.put(id, entry);
+  }
+
+  /**
+   * Struct identifying a cache entry that contains all data for a key and namespace.
+   */
+  private static class StateId implements Weighted {
+    public final String computation;
+    public final ByteString processingKey;
+    public final String stateFamily;
+    public final Object namespaceKey;
+
+    public StateId(String computation, ByteString processingKey, String stateFamily,
+        StateNamespace namespace) {
+      this.computation = computation;
+      this.processingKey = processingKey;
+      this.stateFamily = stateFamily;
+      this.namespaceKey = namespace.getCacheKey();
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (other instanceof StateId) {
+        StateId otherId = (StateId) other;
+        return computation.equals(otherId.computation)
+            && processingKey.equals(otherId.processingKey)
+            && stateFamily.equals(otherId.stateFamily)
+            && namespaceKey.equals(otherId.namespaceKey);
+      }
+      return false;
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(computation, processingKey, namespaceKey);
+    }
+
+    @Override
+    public long getWeight() {
+      return processingKey.size();
+    }
+  }
+
+  /**
+   * Entry in the state cache that stores a map of values and a token representing the
+   * validity of the values.
+   */
+  private static class StateCacheEntry implements Weighted {
+    private final long token;
+    private final Map<NamespacedTag<?>, WeightedValue<?>> values;
+    private long weight;
+
+    public StateCacheEntry(long token) {
+      this.values = new HashMap<>();
+      this.token = token;
+      this.weight = 0;
+    }
+
+    @SuppressWarnings("unchecked")
+    public <T extends State> T get(StateNamespace namespace, StateTag<T> tag) {
+      WeightedValue<T> weightedValue =
+          (WeightedValue<T>) values.get(new NamespacedTag(namespace, tag));
+      return weightedValue == null ? null : weightedValue.value;
+    }
+
+    public <T extends State> long put(
+        StateNamespace namespace, StateTag<T> tag, T value, long weight) {
+      WeightedValue<T> weightedValue =
+          (WeightedValue<T>) values.get(new NamespacedTag(namespace, tag));
+      long weightDelta = 0;
+      if (weightedValue == null) {
+        weightedValue = new WeightedValue<T>();
+      } else {
+        weightDelta -= weightedValue.weight;
+      }
+      weightedValue.value = value;
+      weightedValue.weight = weight;
+      weightDelta += weight;
+      this.weight += weightDelta;
+      values.put(new NamespacedTag(namespace, tag), weightedValue);
+      return weightDelta;
+    }
+
+    @Override
+    public long getWeight() {
+      return weight;
+    }
+
+    public long getToken() {
+      return token;
+    }
+
+    private static class NamespacedTag<T extends State> {
+      private final StateNamespace namespace;
+      private final StateTag<T> tag;
+      NamespacedTag(StateNamespace namespace, StateTag<T> tag) {
+        this.namespace = namespace;
+        this.tag = tag;
+      }
+
+      @Override
+      public boolean equals(Object other) {
+        if (!(other instanceof NamespacedTag)) {
+          return false;
+        }
+        NamespacedTag<?> that = (NamespacedTag<?>) other;
+        return namespace.equals(that.namespace) && tag.equals(that.tag);
+      }
+
+      @Override
+      public int hashCode() {
+        return Objects.hash(namespace, tag);
+      }
+    }
+
+    private static class WeightedValue<T> {
+      public long weight = 0;
+      public T value = null;
+    }
+  }
+
+  /**
+   * Print summary statistics of the cache to the given {@link PrintWriter}.
+   */
+  public void printSummaryHtml(PrintWriter response) {
+    response.println("Cache Stats: <br><table border=0>");
+    response.println(
+        "<tr><th>Hit Ratio</th><th>Evictions</th><th>Size</th><th>Weight</th></tr><tr>");
+    response.println("<th>" + stateCache.stats().hitRate() + "</th>");
+    response.println("<th>" + stateCache.stats().evictionCount() + "</th>");
+    response.println("<th>" + stateCache.size() + "</th>");
+    response.println("<th>" + getWeight() + "</th>");
+    response.println("</tr></table><br>");
+  }
+
+  /**
+   * Print detailed information about the cache to the given {@link PrintWriter}.
+   */
+  public void printDetailedHtml(PrintWriter response) {
+    response.println("<h1>Cache Information</h1>");
+    printSummaryHtml(response);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
index 91dc2236c5622..e6a6cd6dadf35 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
+import com.google.cloud.dataflow.sdk.util.Weighted;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.state.BagState;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
@@ -32,128 +33,118 @@
 import com.google.cloud.dataflow.sdk.util.state.StateTable;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
+import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.cloud.dataflow.sdk.util.state.ValueState;
 import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Function;
+import com.google.common.base.Optional;
 import com.google.common.base.Supplier;
+import com.google.common.base.Throwables;
 import com.google.common.collect.Iterables;
+import com.google.common.collect.Iterators;
 import com.google.common.util.concurrent.Futures;
 import com.google.protobuf.ByteString;
 
 import org.joda.time.Instant;
 
 import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Random;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Future;
+import javax.annotation.concurrent.NotThreadSafe;
 
 /**
  * Implementation of {@link StateInternals} using Windmill to manage the underlying data.
  */
 class WindmillStateInternals extends MergingStateInternals {
+  private static class CachingStateTable extends StateTable {
+    private final String stateFamily;
+    private final WindmillStateReader reader;
+    private final WindmillStateCache.ForKey cache;
+    private final Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
 
-  private final StateTable inMemoryState =
-      new StateTable() {
-        @Override
-        protected StateBinder binderForNamespace(final StateNamespace namespace) {
-          return new StateBinder() {
-            @Override
-            public <T> BagState<T> bindBag(StateTag<BagState<T>> address, Coder<T> elemCoder) {
-              return new WindmillBag<>(encodeKey(namespace, address), stateFamily, elemCoder,
-                  reader, scopedReadStateSupplier);
-            }
-
-            @Override
-            public <T, W extends BoundedWindow> WatermarkStateInternal bindWatermark(
-                StateTag<WatermarkStateInternal> address,
-                OutputTimeFn<? super W> outputTimeFn) {
-              return new WindmillWatermarkState(
-                  encodeKey(namespace, address),
-                  stateFamily,
-                  reader,
-                  scopedReadStateSupplier,
-                  outputTimeFn);
-            }
-
-            @Override
-            public <InputT, AccumT, OutputT>
-                CombiningValueStateInternal<InputT, AccumT, OutputT> bindCombiningValue(
-                    StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
-                    Coder<AccumT> accumCoder,
-                    CombineFn<InputT, AccumT, OutputT> combineFn) {
-              return new WindmillCombiningValue<>(encodeKey(namespace, address), stateFamily,
-                  accumCoder, combineFn, reader, scopedReadStateSupplier);
-            }
+    public CachingStateTable(String stateFamily,
+        WindmillStateReader reader, WindmillStateCache.ForKey cache,
+        Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
+      this.stateFamily = stateFamily;
+      this.reader = reader;
+      this.cache = cache;
+      this.scopedReadStateSupplier = scopedReadStateSupplier;
+    }
 
-            @Override
-            public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> coder) {
-              return new WindmillValue<>(encodeKey(namespace, address), stateFamily, coder, reader,
-                  scopedReadStateSupplier);
-            }
-          };
+    @Override
+    protected StateBinder binderForNamespace(final StateNamespace namespace) {
+      // Look up state objects in the cache or create new ones if not found.  The state will
+      // be added to the cache in persist().
+      return new StateBinder() {
+        @Override
+        public <T> BagState<T> bindBag(StateTag<BagState<T>> address, Coder<T> elemCoder) {
+          WindmillBag<T> result = (WindmillBag<T>) cache.get(namespace, address);
+          if (result == null) {
+            result = new WindmillBag<T>(namespace, address, stateFamily, elemCoder);
+          }
+          result.initializeForWorkItem(reader, scopedReadStateSupplier);
+          return result;
         }
-      };
 
-  private final String prefix;
-  private final String stateFamily;
-  private final WindmillStateReader reader;
-  private final boolean useStateFamilies;
-  private final Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
-
-  @VisibleForTesting
-  static final ThreadLocal<Supplier<Boolean>> COMPACT_NOW =
-      new ThreadLocal<Supplier<Boolean>>() {
         @Override
-        public Supplier<Boolean> initialValue() {
-          return new Supplier<Boolean>() {
-            /* The rate at which, on average, this will return true. */
-            static final double RATE = 0.002;
-            Random random = new Random();
-            long counter = nextSample();
+        public <W extends BoundedWindow> WatermarkStateInternal bindWatermark(
+            StateTag<WatermarkStateInternal> address, OutputTimeFn<? super W> outputTimeFn) {
+          WindmillWatermarkState result = (WindmillWatermarkState) cache.get(namespace, address);
+          if (result == null) {
+            result = new WindmillWatermarkState(namespace, address, stateFamily, outputTimeFn);
+          }
+          result.initializeForWorkItem(reader, scopedReadStateSupplier);
+          return result;
+        }
 
-            private long nextSample() {
-              // Use geometric distribution to find next true value.
-              // This lets us avoid invoking random.nextDouble() on every call.
-              return (long) Math.floor(Math.log(random.nextDouble()) / Math.log(1 - RATE));
-            }
+        @Override
+        public <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
+        bindCombiningValue(StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
+            Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
+          WindmillCombiningValue<InputT, AccumT, OutputT> result = new WindmillCombiningValue<>(
+              namespace, address, stateFamily, accumCoder, combineFn, cache);
+          result.initializeForWorkItem(reader, scopedReadStateSupplier);
+          return result;
+        }
 
-            @Override
-            public Boolean get() {
-              counter--;
-              if (counter < 0) {
-                counter = nextSample();
-                return true;
-              } else {
-                return false;
-              }
-            }
-          };
+        @Override
+        public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> coder) {
+          WindmillValue<T> result = (WindmillValue<T>) cache.get(namespace, address);
+          if (result == null) {
+            result = new WindmillValue<T>(namespace, address, stateFamily, coder);
+          }
+          result.initializeForWorkItem(reader, scopedReadStateSupplier);
+          return result;
         }
       };
-
-  public WindmillStateInternals(String prefix, boolean useStateFamilies,
-      WindmillStateReader reader, Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
-    this.prefix = prefix;
-    if (useStateFamilies) {
-      this.stateFamily = prefix;
-    } else {
-      this.stateFamily = "";
     }
-    this.reader = reader;
-    this.useStateFamilies = useStateFamilies;
+  };
+
+  private WindmillStateCache.ForKey cache;
+  Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
+  private StateTable workItemState;
+
+  public WindmillStateInternals(String stateFamily, WindmillStateReader reader,
+      WindmillStateCache.ForKey cache, Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
+    this.cache = cache;
     this.scopedReadStateSupplier = scopedReadStateSupplier;
+    this.workItemState = new CachingStateTable(stateFamily, reader, cache, scopedReadStateSupplier);
   }
 
   public void persist(final Windmill.WorkItemCommitRequest.Builder commitBuilder) {
     List<Future<WorkItemCommitRequest>> commitsToMerge = new ArrayList<>();
 
     // Call persist on each first, which may schedule some futures for reading.
-    for (State location : inMemoryState.values()) {
+    for (State location : workItemState.values()) {
       if (!(location instanceof WindmillState)) {
         throw new IllegalStateException(String.format(
             "%s wasn't created by %s -- unable to persist it",
@@ -162,20 +153,16 @@ public void persist(final Windmill.WorkItemCommitRequest.Builder commitBuilder)
       }
 
       try {
-        commitsToMerge.add(((WindmillState) location).persist());
+        commitsToMerge.add(((WindmillState) location).persist(cache));
       } catch (IOException e) {
         throw new RuntimeException("Unable to persist state", e);
       }
     }
 
-    // Kick off the fetches that prevent blind-writes. We do this before returning
-    // to ensure that the reads have happened before the persist actually happens.
-    reader.startBatchAndBlock();
-
     // Clear out the map of already retrieved state instances.
-    inMemoryState.clear();
+    workItemState.clear();
 
-    try {
+    try (StateSampler.ScopedState scope = scopedReadStateSupplier.get()) {
       for (Future<WorkItemCommitRequest> commitFuture : commitsToMerge) {
         commitBuilder.mergeFrom(commitFuture.get());
       }
@@ -184,103 +171,123 @@ public void persist(final Windmill.WorkItemCommitRequest.Builder commitBuilder)
     }
   }
 
-  @VisibleForTesting ByteString encodeKey(StateNamespace namespace, StateTag<?> address) {
+  /**
+   * Encodes the given namespace and address as {@code &lt;namespace&gt;+&lt;address&gt;}.
+   */
+  @VisibleForTesting
+  static ByteString encodeKey(StateNamespace namespace, StateTag<?> address) {
     try {
-      // Use a StringBuilder rather than concatenation and String.format. We build these keys
+      // Use ByteString.Output rather than concatenation and String.format. We build these keys
       // a lot, and this leads to better performance results. See associated benchmarks.
-      StringBuilder output = new StringBuilder();
-
-      // We only need the prefix if we aren't using state families
-      if (!useStateFamilies) {
-        output.append(prefix);
-      }
+      ByteString.Output stream = ByteString.newOutput();
+      OutputStreamWriter writer = new OutputStreamWriter(stream, StandardCharsets.UTF_8);
 
-      // stringKey starts and ends with a slash. We don't need to seperate it from prefix, because
-      // the prefix is guaranteed to be unique and non-overlapping. We separate it from the
+      // stringKey starts and ends with a slash.  We separate it from the
       // StateTag ID by a '+' (which is guaranteed not to be in the stringKey) because the
       // ID comes from the user.
-      namespace.appendTo(output);
-      output.append('+');
-      address.appendTo(output);
-      return ByteString.copyFromUtf8(output.toString());
+      namespace.appendTo(writer);
+      writer.write('+');
+      address.appendTo(writer);
+      writer.flush();
+      return stream.toByteString();
     } catch (IOException e) {
-      throw new RuntimeException(
-          "Unable to encode state key for " + namespace + ", " + address, e);
+      throw Throwables.propagate(e);
     }
   }
 
   /**
-   * Anything that can provide a {@link WorkItemCommitRequest} to persist its state; it may need
-   * to read some state in order to build the commit request.
+   * Abstract base class for all Windmill state.
+   *
+   * <p>Note that these are not thread safe; each state object is associated with a key
+   * and thus only accessed by a single thread at once.
    */
-  private interface WindmillState {
+  @NotThreadSafe
+  private abstract static class WindmillState {
+    protected Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
+    protected WindmillStateReader reader;
+
     /**
      * Return an asynchronously computed {@link WorkItemCommitRequest}. The request should
      * be of a form that can be merged with others (only add to repeated fields).
      */
-    Future<WorkItemCommitRequest> persist()
+    abstract Future<WorkItemCommitRequest> persist(WindmillStateCache.ForKey cache)
         throws IOException;
+
+    void initializeForWorkItem(
+        WindmillStateReader reader, Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
+      this.reader = reader;
+      this.scopedReadStateSupplier = scopedReadStateSupplier;
+    }
+
+    StateSampler.ScopedState scopedReadState() {
+      return scopedReadStateSupplier.get();
+    }
   }
 
   /**
    * Base class for implementations of {@link WindmillState} where the {@link #persist} call does
    * not require any asynchronous reading.
    */
-  private abstract static class SimpleWindmillState implements WindmillState {
+  private abstract static class SimpleWindmillState extends WindmillState {
     @Override
-    public final Future<WorkItemCommitRequest> persist() throws IOException{
-      return Futures.immediateFuture(persistDirectly());
+    public final Future<WorkItemCommitRequest> persist(WindmillStateCache.ForKey cache)
+        throws IOException {
+      return Futures.immediateFuture(persistDirectly(cache));
     }
 
     /**
      * Returns a {@link WorkItemCommitRequest} that can be used to persist this state to
      * Windmill.
      */
-    protected abstract WorkItemCommitRequest persistDirectly() throws IOException;
+    protected abstract WorkItemCommitRequest persistDirectly(WindmillStateCache.ForKey cache)
+        throws IOException;
   }
 
   @Override
   public <T extends State> T state(StateNamespace namespace, StateTag<T> address) {
-    return inMemoryState.get(namespace, address);
+    return workItemState.get(namespace, address);
   }
 
-  private static class WindmillValue<T> extends SimpleWindmillState
-      implements ValueState<T>, WindmillState {
-
+  private static class WindmillValue<T> extends SimpleWindmillState implements ValueState<T> {
+    private final StateNamespace namespace;
+    private final StateTag<ValueState<T>> address;
     private final ByteString stateKey;
     private final String stateFamily;
     private final Coder<T> coder;
-    private final WindmillStateReader reader;
-    private final Supplier<StateSampler.ScopedState> readStateSupplier;
 
     /** Whether we've modified the value since creation of this state. */
     private boolean modified = false;
-    private T modifiedValue;
-
-    private WindmillValue(ByteString stateKey, String stateFamily, Coder<T> coder,
-        WindmillStateReader reader, Supplier<StateSampler.ScopedState> readStateSupplier) {
-      this.stateKey = stateKey;
+    /** Whether the in memory value is the true value. */
+    private boolean valueIsKnown = false;
+    private T value;
+
+    private WindmillValue(StateNamespace namespace, StateTag<ValueState<T>> address,
+        String stateFamily, Coder<T> coder) {
+      this.namespace = namespace;
+      this.address = address;
+      this.stateKey = encodeKey(namespace, address);
       this.stateFamily = stateFamily;
       this.coder = coder;
-      this.reader = reader;
-      this.readStateSupplier = readStateSupplier;
     }
 
     @Override
     public void clear() {
       modified = true;
-      modifiedValue = null;
+      valueIsKnown = true;
+      value = null;
     }
 
     @Override
     public StateContents<T> get() {
-      final Future<T> future = modified ? null : reader.valueFuture(stateKey, stateFamily, coder);
+      final Future<T> future = valueIsKnown ? Futures.immediateFuture(value)
+                                            : reader.valueFuture(stateKey, stateFamily, coder);
 
       return new StateContents<T>() {
         @Override
         public T read() {
-          try (StateSampler.ScopedState scope = readStateSupplier.get()) {
-            return modified ? modifiedValue : future.get();
+          try (StateSampler.ScopedState scope = scopedReadState()) {
+            valueIsKnown = true;
+            return future.get();
           } catch (InterruptedException | ExecutionException e) {
             throw new RuntimeException("Unable to read value from state", e);
           }
@@ -291,63 +298,96 @@ public T read() {
     @Override
     public void set(T value) {
       modified = true;
-      modifiedValue = value;
+      valueIsKnown = true;
+      this.value = value;
     }
 
     @Override
-    protected WorkItemCommitRequest persistDirectly() throws IOException {
+    protected WorkItemCommitRequest persistDirectly(WindmillStateCache.ForKey cache)
+        throws IOException {
       if (!modified) {
         // No in-memory changes.
         return WorkItemCommitRequest.newBuilder().buildPartial();
       }
 
-      // We can't write without doing a read, so we need to kick off a read if we get here.
-      // Call reader.valueFuture directly, since our read() method will avoid actually reading from
-      // Windmill since the value is already inMemory.
-      reader.valueFuture(stateKey, stateFamily, coder);
-
       ByteString.Output stream = ByteString.newOutput();
-      if (modifiedValue != null) {
-        coder.encode(modifiedValue, stream, Coder.Context.OUTER);
+      if (value != null) {
+        coder.encode(value, stream, Coder.Context.OUTER);
       }
+      ByteString encoded = stream.toByteString();
 
       WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
+      // Update the entry of the cache with the new value and change in encoded size.
+      cache.put(namespace, address, this, encoded.size());
+
+      modified = false;
+
       commitBuilder
           .addValueUpdatesBuilder()
           .setTag(stateKey)
           .setStateFamily(stateFamily)
           .getValueBuilder()
-          .setData(stream.toByteString())
+          .setData(encoded)
           .setTimestamp(Long.MAX_VALUE);
+
       return commitBuilder.buildPartial();
     }
   }
 
-  private static class WindmillBag<T> extends SimpleWindmillState
-      implements BagState<T>, WindmillState {
+  private static class WindmillBag<T> extends SimpleWindmillState implements BagState<T> {
 
+    private final StateNamespace namespace;
+    private final StateTag<BagState<T>> address;
     private final ByteString stateKey;
     private final String stateFamily;
     private final Coder<T> elemCoder;
-    private final WindmillStateReader reader;
-    private final Supplier<StateSampler.ScopedState> readStateSupplier;
-
-    private boolean cleared = false;
-    private final List<T> localAdditions = new ArrayList<>();
 
-    private WindmillBag(ByteString stateKey, String stateFamily, Coder<T> elemCoder,
-        WindmillStateReader reader, Supplier<StateSampler.ScopedState> readStateSupplier) {
-      this.stateKey = stateKey;
+    private boolean cleared;
+    // Cache of all values in this bag. Null if the persisted state is unknown.
+    private ConcatIterables<T> cachedValues = null;
+    private List<T> localAdditions = new ArrayList<>();
+    private long encodedSize = 0;
+
+    private WindmillBag(StateNamespace namespace, StateTag<BagState<T>> address, String stateFamily,
+        Coder<T> elemCoder) {
+      this.namespace = namespace;
+      this.address = address;
+      this.stateKey = encodeKey(namespace, address);
       this.stateFamily = stateFamily;
       this.elemCoder = elemCoder;
-      this.reader = reader;
-      this.readStateSupplier = readStateSupplier;
     }
 
     @Override
     public void clear() {
       cleared = true;
+      cachedValues = new ConcatIterables<T>();
       localAdditions.clear();
+      encodedSize = 0;
+    }
+
+    private Iterable<T> fetchData(Future<Iterable<T>> persistedData) {
+      try (StateSampler.ScopedState scope = scopedReadState()) {
+        if (cachedValues != null) {
+          return cachedValues;
+        }
+        Iterable<T> data = persistedData.get();
+        if (data instanceof Weighted) {
+          // We have a known bounded amount of data; cache it.
+          cachedValues = new ConcatIterables<T>();
+          cachedValues.extendWith(data);
+          encodedSize = ((Weighted) data).getWeight();
+          return cachedValues;
+        } else {
+          // This is an iterable that may not fit in memory at once; don't cache it.
+          return data;
+        }
+      } catch (InterruptedException | ExecutionException e) {
+        throw new RuntimeException("Unable to read state", e);
+      }
+    }
+
+    public boolean valuesAreCached() {
+      return cachedValues != null;
     }
 
     @Override
@@ -355,21 +395,14 @@ public StateContents<Iterable<T>> get() {
       // If we clear after calling get() but before calling read(), technically we didn't need the
       // underlying windmill read. But, we need to register the desire now if we aren't going to
       // clear (in order to get it added to the prefetch).
-      final Future<Iterable<T>> persistedData = cleared
-          ? Futures.<Iterable<T>>immediateFuture(Collections.<T>emptyList())
+      final Future<Iterable<T>> persistedData = (cachedValues != null)
+          ? null
           : reader.listFuture(stateKey, stateFamily, elemCoder);
 
       return new StateContents<Iterable<T>>() {
         @Override
         public Iterable<T> read() {
-          try (StateSampler.ScopedState scope = readStateSupplier.get()) {
-            // We need to check cleared again, because it may have become clear in between creating
-            // the future and calling read.
-            Iterable<T> input = cleared ? Collections.<T>emptyList() : persistedData.get();
-            return Iterables.concat(input, localAdditions);
-          } catch (InterruptedException | ExecutionException e) {
-            throw new RuntimeException("Unable to read state", e);
-          }
+          return Iterables.concat(fetchData(persistedData), localAdditions);
         }
       };
     }
@@ -379,21 +412,14 @@ public StateContents<Boolean> isEmpty() {
       // If we clear after calling isEmpty() but before calling read(), technically we didn't need
       // the underlying windmill read. But, we need to register the desire now if we aren't going to
       // clear (in order to get it added to the prefetch).
-      final Future<Iterable<T>> persistedData = cleared
-          ? Futures.<Iterable<T>>immediateFuture(Collections.<T>emptyList())
+      final Future<Iterable<T>> persistedData = (cachedValues != null)
+          ? null
           : reader.listFuture(stateKey, stateFamily, elemCoder);
 
       return new StateContents<Boolean>() {
         @Override
         public Boolean read() {
-          try (StateSampler.ScopedState scope = readStateSupplier.get()) {
-            // We need to check cleared again, because it may have become clear in between creating
-            // the future and calling read.
-            Iterable<T> input = cleared ? Collections.<T>emptyList() : persistedData.get();
-            return Iterables.isEmpty(input) && Iterables.isEmpty(localAdditions);
-          } catch (InterruptedException | ExecutionException e) {
-            throw new RuntimeException("Unable to read state", e);
-          }
+          return Iterables.isEmpty(fetchData(persistedData)) && localAdditions.isEmpty();
         }
       };
     }
@@ -404,14 +430,11 @@ public void add(T input) {
     }
 
     @Override
-    public WorkItemCommitRequest persistDirectly() throws IOException {
+    public WorkItemCommitRequest persistDirectly(WindmillStateCache.ForKey cache)
+        throws IOException {
       WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
 
       if (cleared) {
-        // If we do a delete, we need to have done a read to prevent Windmill complaining about
-        // blind deletes. We use the underlying reader, because we normally skip the actual read
-        // if we've already cleared the state.
-        reader.listFuture(stateKey, stateFamily, elemCoder);
         commitBuilder.addListUpdatesBuilder()
             .setTag(stateKey)
             .setStateFamily(stateFamily)
@@ -430,43 +453,88 @@ public WorkItemCommitRequest persistDirectly() throws IOException {
 
           // Encode the value
           elemCoder.encode(value, stream, Coder.Context.OUTER);
+          ByteString encoded = stream.toByteString();
+          if (cachedValues != null) {
+            encodedSize += encoded.size() - 1;
+          }
 
           listUpdatesBuilder.addValuesBuilder()
-              .setData(stream.toByteString())
+              .setData(encoded)
               .setTimestamp(Long.MAX_VALUE);
         }
       }
+
+      if (cachedValues != null) {
+        cachedValues.extendWith(localAdditions);
+        // Don't reuse the localAdditions object; we don't want future changes to it to modify the
+        // value of cachedValues.
+        localAdditions = new ArrayList<T>();
+        cache.put(namespace, address, this, encodedSize);
+      } else {
+        localAdditions.clear();
+      }
+      cleared = false;
+
       return commitBuilder.buildPartial();
     }
   }
 
-  private static class WindmillWatermarkState implements WatermarkStateInternal, WindmillState {
+  private static class ConcatIterables<T> implements Iterable<T> {
+    List<Iterable<T>> iterables;
+
+    public ConcatIterables() {
+      this.iterables = new ArrayList<>();
+    }
+
+    public void extendWith(Iterable<T> iterable) {
+      iterables.add(iterable);
+    }
+
+    @Override
+    public Iterator<T> iterator() {
+      return Iterators.concat(
+          Iterables.transform(
+                  iterables,
+                  new Function<Iterable<T>, Iterator<T>>() {
+                    @Override
+                    public Iterator<T> apply(Iterable<T> iterable) {
+                      return iterable.iterator();
+                    }
+                  })
+              .iterator());
+    }
+  }
+
+  private static class WindmillWatermarkState
+      extends WindmillState implements WatermarkStateInternal {
+    // The encoded size of an Instant.
+    private static final int ENCODED_SIZE = 8;
 
     private final OutputTimeFn<?> outputTimeFn;
+    private final StateNamespace namespace;
+    private final StateTag<WatermarkStateInternal> address;
     private final ByteString stateKey;
     private final String stateFamily;
-    private final WindmillStateReader reader;
-    private final Supplier<StateSampler.ScopedState> readStateSupplier;
 
     private boolean cleared = false;
+    // The hold value, Optional.absent() if no hold, or null if unknown.
+    private Optional<Instant> cachedValue = null;
     private Instant localAdditions = null;
 
-    private WindmillWatermarkState(
-        ByteString stateKey,
-        String stateFamily,
-        WindmillStateReader reader,
-        Supplier<StateSampler.ScopedState> readStateSupplier,
+    private WindmillWatermarkState(StateNamespace namespace,
+        StateTag<WatermarkStateInternal> address, String stateFamily,
         OutputTimeFn<?> outputTimeFn) {
-      this.stateKey = stateKey;
+      this.namespace = namespace;
+      this.address = address;
+      this.stateKey = encodeKey(namespace, address);
       this.stateFamily = stateFamily;
-      this.reader = reader;
-      this.readStateSupplier = readStateSupplier;
       this.outputTimeFn = outputTimeFn;
     }
 
     @Override
     public void clear() {
       cleared = true;
+      cachedValue = Optional.<Instant>absent();
       localAdditions = null;
     }
 
@@ -484,46 +552,38 @@ public StateContents<Instant> get() {
       // If we clear after calling get() but before calling read(), technically we didn't need the
       // underlying windmill read. But, we need to register the desire now if we aren't going to
       // clear (in order to get it added to the prefetch).
-      final Future<Instant> persistedData = cleared
-          ? Futures.<Instant>immediateFuture(null)
+      final Future<Instant> persistedData = (cachedValue != null)
+          ? Futures.immediateFuture(cachedValue.orNull())
           : reader.watermarkFuture(stateKey, stateFamily);
 
       return new StateContents<Instant>() {
         @Override
         public Instant read() {
-          Instant value = localAdditions;
-          if (!cleared) {
-            try (StateSampler.ScopedState scope = readStateSupplier.get()) {
-              Instant persisted = persistedData.get();
-              value = (value == null) ? persisted : outputTimeFn.combine(value, persisted);
-            } catch (InterruptedException | ExecutionException e) {
-              throw new RuntimeException("Unable to read state", e);
+          try (StateSampler.ScopedState scope = scopedReadState()) {
+            Instant persistedHold = persistedData.get();
+            if (persistedHold == null || persistedHold.equals(BoundedWindow.TIMESTAMP_MAX_VALUE)) {
+              cachedValue = Optional.absent();
+            } else {
+              cachedValue = Optional.of(persistedHold);
             }
+          } catch (InterruptedException | ExecutionException e) {
+            throw new RuntimeException("Unable to read state", e);
+          }
+
+          if (localAdditions == null) {
+            return cachedValue.orNull();
+          } else if (!cachedValue.isPresent()) {
+            return localAdditions;
+          } else {
+            return outputTimeFn.combine(localAdditions, cachedValue.get());
           }
-          return value;
         }
       };
     }
 
     @Override
     public StateContents<Boolean> isEmpty() {
-      // If we clear after calling get() but before calling read(), technically we didn't need the
-      // underlying windmill read. But, we need to register the desire now if we aren't going to
-      // clear (in order to get it added to the prefetch).
-      final Future<Instant> persistedData = cleared
-          ? Futures.<Instant>immediateFuture(null)
-          : reader.watermarkFuture(stateKey, stateFamily);
-
-      return new StateContents<Boolean>() {
-        @Override
-        public Boolean read() {
-          try (StateSampler.ScopedState scope = readStateSupplier.get()) {
-            return localAdditions == null && (cleared || persistedData.get() == null);
-          } catch (InterruptedException | ExecutionException e) {
-            throw new RuntimeException("Unable to read state", e);
-          }
-        }
-      };
+      throw new UnsupportedOperationException();
     }
 
     @Override
@@ -533,7 +593,9 @@ public void add(Instant outputTime) {
     }
 
     @Override
-    public Future<WorkItemCommitRequest> persist() {
+    public Future<WorkItemCommitRequest> persist(final WindmillStateCache.ForKey cache) {
+      Future<WorkItemCommitRequest> result;
+
       if (!cleared && localAdditions == null) {
         // Nothing to do
         return Futures.immediateFuture(WorkItemCommitRequest.newBuilder().buildPartial());
@@ -544,7 +606,8 @@ public Future<WorkItemCommitRequest> persist() {
             .setTag(stateKey)
             .setStateFamily(stateFamily)
             .setReset(true);
-        return Futures.immediateFuture(commitBuilder.buildPartial());
+
+        result = Futures.immediateFuture(commitBuilder.buildPartial());
       } else if (cleared && localAdditions != null) {
         // Since we cleared before adding, we can do a blind overwrite of persisted state
         WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
@@ -553,13 +616,30 @@ public Future<WorkItemCommitRequest> persist() {
             .setStateFamily(stateFamily)
             .setReset(true)
             .addTimestamps(WindmillTimeUtils.harnessToWindmillTimestamp(localAdditions));
-        return Futures.immediateFuture(commitBuilder.buildPartial());
-      } else if (!cleared && localAdditions != null){
+
+        cachedValue = Optional.of(localAdditions);
+
+        result = Futures.immediateFuture(commitBuilder.buildPartial());
+      } else if (!cleared && localAdditions != null) {
         // Otherwise, we need to combine the local additions with the already persisted data
-        return combineWithPersisted();
+        result = combineWithPersisted();
       } else {
         throw new IllegalStateException("Unreachable condition");
       }
+
+      return Futures.lazyTransform(
+          result, new Function<WorkItemCommitRequest, WorkItemCommitRequest>() {
+            @Override
+            public WorkItemCommitRequest apply(WorkItemCommitRequest result) {
+              cleared = false;
+              localAdditions = null;
+              if (cachedValue != null) {
+                cache.put(
+                    namespace, address, WindmillWatermarkState.this, ENCODED_SIZE);
+              }
+              return result;
+            }
+          });
     }
 
     /**
@@ -589,35 +669,42 @@ private Future<WorkItemCommitRequest> combineWithPersisted() {
             .setStateFamily(stateFamily)
             .addTimestamps(
                 WindmillTimeUtils.harnessToWindmillTimestamp(localAdditions));
-        return Futures.immediateFuture(commitBuilder.buildPartial());
+
+        if (cachedValue != null) {
+          cachedValue = Optional.of(cachedValue.isPresent()
+              ? outputTimeFn.combine(cachedValue.get(), localAdditions)
+              : localAdditions);
+        }
+
+         return Futures.immediateFuture(commitBuilder.buildPartial());
       } else {
         // The non-fast path does a read-modify-write
-        return Futures.lazyTransform(reader.watermarkFuture(stateKey, stateFamily),
+        return Futures.lazyTransform((cachedValue != null)
+                ? Futures.immediateFuture(cachedValue.orNull())
+                : reader.watermarkFuture(stateKey, stateFamily),
             new Function<Instant, WorkItemCommitRequest>() {
-
-          @Override
-          public WorkItemCommitRequest apply(Instant priorHold) {
-
-            Instant combinedHold = (priorHold == null) ? localAdditions
-                : outputTimeFn.combine(priorHold, localAdditions);
-
-            WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
-            commitBuilder.addWatermarkHoldsBuilder()
-                .setTag(stateKey)
-                .setStateFamily(stateFamily)
-                .setReset(true)
-                .addTimestamps(WindmillTimeUtils.harnessToWindmillTimestamp(combinedHold));
-
-            return commitBuilder.buildPartial();
-          }
-        });
+              @Override
+              public WorkItemCommitRequest apply(Instant priorHold) {
+                cachedValue = Optional.of((priorHold != null)
+                        ? outputTimeFn.combine(priorHold, localAdditions)
+                        : localAdditions);
+
+                WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
+                commitBuilder.addWatermarkHoldsBuilder()
+                    .setTag(stateKey)
+                    .setStateFamily(stateFamily)
+                    .setReset(true)
+                    .addTimestamps(WindmillTimeUtils.harnessToWindmillTimestamp(cachedValue.get()));
+
+                return commitBuilder.buildPartial();
+              }
+            });
       }
     }
   }
 
   private static class WindmillCombiningValue<InputT, AccumT, OutputT>
-      implements CombiningValueStateInternal<InputT, AccumT, OutputT>, WindmillState {
-
+      extends WindmillState implements CombiningValueStateInternal<InputT, AccumT, OutputT> {
     private final WindmillBag<AccumT> bag;
     private final CombineFn<InputT, AccumT, OutputT> combineFn;
 
@@ -628,15 +715,28 @@ private static class WindmillCombiningValue<InputT, AccumT, OutputT>
     private AccumT localAdditionsAccum;
     private boolean hasLocalAdditions = false;
 
-    private WindmillCombiningValue(ByteString stateKey, String stateFamily,
-        Coder<AccumT> accumCoder,
-        CombineFn<InputT, AccumT, OutputT> combineFn,
-        WindmillStateReader reader, Supplier<StateSampler.ScopedState> readStateSupplier) {
-      this.bag = new WindmillBag<>(stateKey, stateFamily, accumCoder, reader, readStateSupplier);
+    private WindmillCombiningValue(StateNamespace namespace,
+        StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address, String stateFamily,
+        Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn,
+        WindmillStateCache.ForKey cache) {
+      StateTag<BagState<AccumT>> internalBagAddress = StateTags.bag(address.getId(), accumCoder);
+      WindmillBag<AccumT> cachedBag =
+          (WindmillBag<AccumT>) cache.get(namespace, internalBagAddress);
+      this.bag =
+          (cachedBag != null)
+              ? cachedBag
+              : new WindmillBag<>(namespace, internalBagAddress, stateFamily, accumCoder);
       this.combineFn = combineFn;
       this.localAdditionsAccum = combineFn.createAccumulator();
     }
 
+    @Override
+    void initializeForWorkItem(
+        WindmillStateReader reader, Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
+      super.initializeForWorkItem(reader, scopedReadStateSupplier);
+      this.bag.initializeForWorkItem(reader, scopedReadStateSupplier);
+    }
+
     @Override
     public StateContents<OutputT> get() {
       final StateContents<AccumT> accum = getAccum();
@@ -662,10 +762,10 @@ public void clear() {
     }
 
     @Override
-    public Future<WorkItemCommitRequest> persist() throws IOException {
+    public Future<WorkItemCommitRequest> persist(WindmillStateCache.ForKey cache)
+        throws IOException {
       if (hasLocalAdditions) {
-        // TODO: Take into account whether it's in the cache.
-        if (COMPACT_NOW.get().get()) {
+        if (COMPACT_NOW.get().get() || bag.valuesAreCached()) {
           // Implicitly clears the bag and combines local and persisted accumulators.
           localAdditionsAccum = getAccum().read();
         }
@@ -673,7 +773,8 @@ public Future<WorkItemCommitRequest> persist() throws IOException {
         localAdditionsAccum = combineFn.createAccumulator();
         hasLocalAdditions = false;
       }
-      return bag.persist();
+
+      return bag.persist(cache);
     }
 
     @Override
@@ -715,4 +816,33 @@ public void addAccum(AccumT accum) {
       localAdditionsAccum = combineFn.mergeAccumulators(Arrays.asList(localAdditionsAccum, accum));
     }
   }
+
+  @VisibleForTesting
+  static final ThreadLocal<Supplier<Boolean>> COMPACT_NOW =
+      new ThreadLocal<Supplier<Boolean>>() {
+        public Supplier<Boolean> initialValue() {
+          return new Supplier<Boolean>() {
+            /* The rate at which, on average, this will return true. */
+            static final double RATE = 0.002;
+            Random random = new Random();
+            long counter = nextSample();
+
+            private long nextSample() {
+              // Use geometric distribution to find next true value.
+              // This lets us avoid invoking random.nextDouble() on every call.
+              return (long) Math.floor(Math.log(random.nextDouble()) / Math.log(1 - RATE));
+            }
+
+            public Boolean get() {
+              counter--;
+              if (counter < 0) {
+                counter = nextSample();
+                return true;
+              } else {
+                return false;
+              }
+            }
+          };
+        }
+      };
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
index fb0421a703757..af7a64e4784d9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
@@ -19,11 +19,13 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagList;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagValue;
+import com.google.cloud.dataflow.sdk.util.Weighted;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Function;
 import com.google.common.base.Objects;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.AbstractIterator;
+import com.google.common.collect.ForwardingList;
 import com.google.common.util.concurrent.ForwardingFuture;
 import com.google.common.util.concurrent.Futures;
 import com.google.common.util.concurrent.SettableFuture;
@@ -474,28 +476,60 @@ private void consumeResponse(Windmill.GetDataRequest request,
     }
   }
 
+  @VisibleForTesting
+  static class WeightedList<T> extends ForwardingList<T> implements Weighted {
+    private List<T> delegate;
+    long weight;
+
+    WeightedList(List<T> delegate) {
+      this.delegate = delegate;
+      this.weight = 0;
+    }
+
+    @Override
+    protected List<T> delegate() {
+      return delegate;
+    }
+
+    @Override
+    public boolean add(T elem) {
+      throw new UnsupportedOperationException("Must use AddWeighted()");
+    }
+
+    @Override
+    public long getWeight() {
+      return weight;
+    }
+
+    public void addWeighted(T elem, long weight) {
+      delegate.add(elem);
+      this.weight += weight;
+    }
+  }
+
   /**
    * The deserialized values in {@code tagList} as a read-only array list.
    */
   private <T> List<T> tagListPageValues(TagList tagList, Coder<T> elemCoder) {
     if (tagList.getValuesCount() == 0) {
-      return Collections.<T>emptyList();
+      return new WeightedList<T>(Collections.<T>emptyList());
     }
 
-    List<T> valueList = new ArrayList<>(tagList.getValuesCount());
+    WeightedList<T> valueList = new WeightedList<>(new ArrayList<T>(tagList.getValuesCount()));
     for (Windmill.Value value : tagList.getValuesList()) {
       if (value.hasData() && !value.getData().isEmpty()) {
         // Drop the first byte of the data; it's the zero byte we prepended to avoid writing
         // empty data.
         InputStream inputStream = value.getData().substring(1).newInput();
         try {
-          valueList.add(elemCoder.decode(inputStream, Coder.Context.OUTER));
+          valueList.addWeighted(
+              elemCoder.decode(inputStream, Coder.Context.OUTER),  value.getData().size() - 1);
         } catch (IOException e) {
           throw new IllegalStateException("Unable to decode tag list using " + elemCoder, e);
         }
       }
     }
-    return Collections.unmodifiableList(valueList);
+    return valueList;
   }
 
   private <T> void consumeTagList(TagList tagList, StateTag stateTag) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
index f1b607c182a47..57dd51009b8f1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
@@ -24,7 +24,7 @@
 import com.google.cloud.dataflow.sdk.coders.ListCoder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn.Accumulator;
-import com.google.cloud.dataflow.sdk.util.Sized;
+import com.google.cloud.dataflow.sdk.util.WeightedValue;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -550,30 +550,30 @@ private long offset(long newWeight) {
      */
     private List<T> interpolate(Iterable<QuantileBuffer<T>> buffers,
                                 int count, double step, double offset) {
-      List<Iterator<Sized<T>>> iterators = Lists.newArrayList();
+      List<Iterator<WeightedValue<T>>> iterators = Lists.newArrayList();
       for (QuantileBuffer<T> buffer : buffers) {
         iterators.add(buffer.sizedIterator());
       }
       // Each of the buffers is already sorted by element.
-      Iterator<Sized<T>> sorted = Iterators.mergeSorted(
+      Iterator<WeightedValue<T>> sorted = Iterators.mergeSorted(
           iterators,
-          new Comparator<Sized<T>>() {
+          new Comparator<WeightedValue<T>>() {
             @Override
-            public int compare(Sized<T> a, Sized<T> b) {
+            public int compare(WeightedValue<T> a, WeightedValue<T> b) {
               return compareFn.compare(a.getValue(), b.getValue());
             }
           });
 
       List<T> newElements = Lists.newArrayListWithCapacity(count);
-      Sized<T> sizedElement = sorted.next();
-      double current = sizedElement.getSize();
+      WeightedValue<T> weightedElement = sorted.next();
+      double current = weightedElement.getWeight();
       for (int j = 0; j < count; j++) {
         double target = j * step + offset;
         while (current <= target && sorted.hasNext()) {
-          sizedElement = sorted.next();
-          current += sizedElement.getSize();
+          weightedElement = sorted.next();
+          current += weightedElement.getWeight();
         }
-        newElements.add(sizedElement.getValue());
+        newElements.add(weightedElement.getValue());
       }
       return newElements;
     }
@@ -638,15 +638,15 @@ public String toString() {
           + weight + ", elements=" + elements + "]";
     }
 
-    public Iterator<Sized<T>> sizedIterator() {
-      return new UnmodifiableIterator<Sized<T>>() {
+    public Iterator<WeightedValue<T>> sizedIterator() {
+      return new UnmodifiableIterator<WeightedValue<T>>() {
         Iterator<T> iter = elements.iterator();
         @Override
         public boolean hasNext() {
           return iter.hasNext();
         }
-        @Override public Sized<T> next() {
-          return Sized.of(iter.next(), weight);
+        @Override public WeightedValue<T> next() {
+          return WeightedValue.of(iter.next(), weight);
         }
       };
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Weighted.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Weighted.java
new file mode 100644
index 0000000000000..c31ad7f861c45
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Weighted.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+/**
+ * Interface representing an object that has a weight, in unspecified units.
+ */
+public interface Weighted {
+  /**
+   * Returns the weight of the object.
+   */
+  long getWeight();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SizedSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedSideInputReader.java
similarity index 73%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SizedSideInputReader.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedSideInputReader.java
index 9cc2140d6970b..0323f2cafdab9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SizedSideInputReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedSideInputReader.java
@@ -22,27 +22,27 @@
 /**
  * Extension to {@link SideInputReader} that can approximate the size of the side input.
  */
-public interface SizedSideInputReader extends SideInputReader {
+public interface WeightedSideInputReader extends SideInputReader {
 
   /**
    * Returns the value of the requested {@link PCollectionView} for the given {@link BoundedWindow}
    * along with a rough estimate of the number of bytes of memory it consumes.
    *
    * <p>It is valid for a side input value to be {@code null}. In this case, the return
-   * value of this method must still be non-{@code null}. It should be a {@link Sized}
-   * object where {@link Sized#getValue()} returns {@code null} and {@link Sized#getSize()} may
-   * still return any non-negative value.
+   * value of this method must still be non-{@code null}. It should be a {@link Weighted}
+   * object where {@link WeightedValue#getValue()} returns {@code null} and
+   * {@link WeightedValue#getWeight()} may still return any non-negative value.
    */
-  <T> Sized<T> getSized(PCollectionView<T> view, BoundedWindow window);
+  <T> WeightedValue<T> getWeighted(PCollectionView<T> view, BoundedWindow window);
 
   /**
-   * Abstract class providing default implementations for methods of {@link SizedSideInputReader}.
+   * Abstract class providing default implementations for methods of
+   * {@link WeightedSideInputReader}.
    */
-  abstract static class Defaults implements SizedSideInputReader {
+  abstract static class Defaults implements WeightedSideInputReader {
     @Override
     public <T> T get(PCollectionView<T> view, BoundedWindow window) {
-      return getSized(view, window).getValue();
+      return getWeighted(view, window).getValue();
     }
   }
 }
-
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Sized.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedValue.java
similarity index 67%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Sized.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedValue.java
index f019ecca7a5d6..4a6e84079faa1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Sized.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedValue.java
@@ -17,30 +17,29 @@
 package com.google.cloud.dataflow.sdk.util;
 
 /**
- * A {@code T} with an accompanying size estimate. Units are unspecified.
+ * A {@code T} with an accompanying weight. Units are unspecified.
  *
  * @param <T> the underlying type of object
  */
-public final class Sized<T> {
+public final class WeightedValue<T> implements Weighted {
 
   private final T value;
-  private final long size;
+  private final long weight;
 
-  private Sized(T value, long size) {
+  private WeightedValue(T value, long weight) {
     this.value = value;
-    this.size = size;
+    this.weight = weight;
   }
 
-  public static <T> Sized<T> of(T value, long size) {
-    return new Sized<>(value, size);
+  public static <T> WeightedValue<T> of(T value, long weight) {
+    return new WeightedValue<>(value, weight);
   }
 
-  public long getSize() {
-    return size;
+  public long getWeight() {
+    return weight;
   }
 
   public T getValue() {
     return value;
   }
 }
-
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
index 1af60ad209461..9a28d040c4d91 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
@@ -63,7 +63,7 @@ public <T> BagState<T> bindBag(final StateTag<BagState<T>> address, Coder<T> ele
         }
 
         @Override
-        public <T, W extends BoundedWindow> WatermarkStateInternal bindWatermark(
+        public <W extends BoundedWindow> WatermarkStateInternal bindWatermark(
             StateTag<WatermarkStateInternal> address,
             OutputTimeFn<? super W> outputTimeFn) {
           return new WatermarkStateInternalImplementation(outputTimeFn);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
index c6a7ae17e3c56..11a73ad7d2ea9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
@@ -80,7 +80,7 @@ CombiningValueStateInternal<InputT, AccumT, OutputT> bindCombiningValue(
       }
 
       @Override
-      public <T, W extends BoundedWindow> WatermarkStateInternal bindWatermark(
+      public <W extends BoundedWindow> WatermarkStateInternal bindWatermark(
           StateTag<WatermarkStateInternal> address,
           OutputTimeFn<? super W> outputTimeFn) {
         List<WatermarkStateInternal> sources = new ArrayList<>();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
index 4a0364f1db88e..f972e312f9eec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
@@ -42,4 +42,13 @@ public interface StateNamespace {
    * Append the string representation of this key to the {@link Appendable}.
    */
   void appendTo(Appendable sb) throws IOException;
+
+  /**
+   * Return an {@code Object} to use as a key in a cache.
+   *
+   * <p>Different namespaces may use the same key in order to be treated as a unit in the cache.
+   * The {@code Object}'s {@code hashCode} and {@code equals} methods will be used to determine
+   * equality.
+   */
+  Object getCacheKey();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java
index c11668fd62aef..09b86d67e9bfd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java
@@ -33,6 +33,11 @@ public String stringKey() {
     return key;
   }
 
+  @Override
+  public Object getCacheKey() {
+    return key;
+  }
+
   @Override
   public boolean equals(Object obj) {
     if (this == obj) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
index 22115847a3f65..8fee9959b944e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
@@ -63,6 +63,11 @@ public String stringKey() {
       return GLOBAL_STRING;
     }
 
+    @Override
+    public Object getCacheKey() {
+      return GLOBAL_STRING;
+    }
+
     @Override
     public boolean equals(Object obj) {
       return obj == this || obj instanceof GlobalNamespace;
@@ -117,6 +122,14 @@ public void appendTo(Appendable sb) throws IOException {
       sb.append('/').append(CoderUtils.encodeToBase64(windowCoder, window)).append('/');
     }
 
+    /**
+     * State in the same window will all be evicted together.
+     */
+    @Override
+    public Object getCacheKey() {
+      return window;
+    }
+
     @Override
     public boolean equals(Object obj) {
       if (obj == this) {
@@ -189,6 +202,14 @@ public void appendTo(Appendable sb) throws IOException {
       sb.append('/');
     }
 
+    /**
+     * State in the same window will all be evicted together.
+     */
+    @Override
+    public Object getCacheKey() {
+      return window;
+    }
+
     @Override
     public boolean equals(Object obj) {
       if (obj == this) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
index cba405d81e641..be114f893fc0d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
@@ -59,7 +59,7 @@ public interface StateBinder {
      * <p>This accepts the {@link OutputTimeFn} that dictates how watermark hold timestamps
      * added to the returned {@link WatermarkStateInternal} are to be combined.
      */
-    <T, W extends BoundedWindow> WatermarkStateInternal bindWatermark(
+    <W extends BoundedWindow> WatermarkStateInternal bindWatermark(
         StateTag<WatermarkStateInternal> address,
         OutputTimeFn<? super W> outputTimeFn);
   }
@@ -68,7 +68,7 @@ <T, W extends BoundedWindow> WatermarkStateInternal bindWatermark(
   void appendTo(Appendable sb) throws IOException;
 
   /**
-   * Returns the identifier for this state cell.
+   * Returns the user-provided name of this state cell.
    */
   String getId();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
index f6f0c84e7dc41..7347efcf658f0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
@@ -158,6 +158,10 @@ public void appendTo(Appendable sb) throws IOException {
       sb.append(kind.prefix).append(rawId);
     }
 
+    public String getRawId() {
+      return rawId;
+    }
+
     @Override
     public String toString() {
       return MoreObjects.toStringHelper(getClass())
@@ -195,12 +199,9 @@ protected StateTagBase(StructuredId id) {
       this.id = id;
     }
 
-    /**
-     * Returns the identifier for this state cell.
-     */
     @Override
     public String getId() {
-      return id.getIdString();
+      return id.getRawId();
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/DataflowMatchers.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/DataflowMatchers.java
new file mode 100644
index 0000000000000..ad21072dc4c90
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/DataflowMatchers.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk;
+
+import com.google.protobuf.ByteString;
+
+import org.hamcrest.Description;
+import org.hamcrest.TypeSafeMatcher;
+
+import java.io.Serializable;
+
+/**
+ * Matchers that are useful when writing Dataflow tests.
+ */
+public class DataflowMatchers {
+  /**
+   * Matcher for {@link ByteString} that prints the strings in UTF8.
+   */
+  public static class ByteStringMatcher extends TypeSafeMatcher<ByteString>
+      implements Serializable {
+    private ByteString expected;
+    private ByteStringMatcher(ByteString expected) {
+      this.expected = expected;
+    }
+
+    public static ByteStringMatcher byteStringEq(ByteString expected) {
+      return new ByteStringMatcher(expected);
+    }
+
+    @Override
+    public void describeTo(Description description) {
+      description
+          .appendText("ByteString(")
+          .appendText(expected.toStringUtf8())
+          .appendText(")");
+    }
+
+    @Override
+    public void describeMismatchSafely(ByteString actual, Description description) {
+      description
+          .appendText("was ByteString(")
+          .appendText(actual.toStringUtf8())
+          .appendText(")");
+    }
+
+    @Override
+    protected boolean matchesSafely(ByteString actual) {
+      return actual.equals(expected);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
index 7b3ddde901333..37c11c284ce0f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
@@ -619,8 +619,9 @@ public void testUnboundedSplits() throws Exception {
 
   @Test
   public void testReadUnboundedReader() throws Exception {
-    StreamingModeExecutionContext context = new StreamingModeExecutionContext(
-        "stageName", new ConcurrentHashMap<ByteString, ReaderCacheEntry>(), null);
+    StreamingModeExecutionContext context = new StreamingModeExecutionContext("stageName",
+        new ConcurrentHashMap<ByteString, ReaderCacheEntry>(), /*stateNameMap=*/null,
+        /*stateCache=*/null);
 
     DataflowPipelineOptions options =
         PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java
index 37e9511cf4d34..791d762dc6ef3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java
@@ -25,8 +25,8 @@
 import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.PCollectionViewWindow;
-import com.google.cloud.dataflow.sdk.util.Sized;
-import com.google.cloud.dataflow.sdk.util.SizedSideInputReader;
+import com.google.cloud.dataflow.sdk.util.WeightedSideInputReader;
+import com.google.cloud.dataflow.sdk.util.WeightedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -46,7 +46,7 @@
 public class CachingSideInputReaderTest {
 
   private static boolean isCached(
-      Cache<PCollectionViewWindow<?>, Sized<Object>> cache,
+      Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache,
       PCollectionView<?> view, BoundedWindow window) {
     return null != cache.getIfPresent(PCollectionViewWindow.of(view, window));
   }
@@ -69,13 +69,13 @@ private static boolean isCached(
   private static final int MAXIMUM_CACHE_SIZE = 1000;
 
   /** A {@link Cache} that is set up before each test. */
-  private Cache<PCollectionViewWindow<?>, Sized<Object>> defaultCache;
+  private Cache<PCollectionViewWindow<?>, WeightedValue<Object>> defaultCache;
 
   @Before
   public void setupCache() {
     defaultCache = CacheBuilder.newBuilder()
         .maximumWeight(MAXIMUM_CACHE_SIZE)
-        .weigher(SizedWeigher.withBaseWeight(1))
+        .weigher(Weighers.fixedWeightKeys(1))
         .build();
   }
 
@@ -83,10 +83,10 @@ public void setupCache() {
   public void testCachingSideInputReaderAgreesWithUnderlyingReaderForSmallItem() throws Exception {
     // A SideInputReader that vends fixed contents for LENGTH_VIEW_FOR_DEFAULT_TAG
     // with a chosen size that fits in the maximum size of the cache.
-    SizedSideInputReader reader = SizedDirectSideInputReader.withContents(
+    WeightedSideInputReader reader = WeightedDirectSideInputReader.withContents(
         ImmutableMap.of(
             UNTYPED_ITERABLE_TAG,
-            Sized.<Object>of(
+            WeightedValue.<Object>of(
                 PCollectionViewTesting.contentsInDefaultWindow("some", "small", "collection"),
                 MAXIMUM_CACHE_SIZE - 100)));
 
@@ -108,10 +108,10 @@ public void testCachingSideInputReaderAgreesWithUnderlyingReaderForSmallItem() t
   public void testCachingSideInputReaderAgreesWithUnderlyingReaderForLargeItem() throws Exception {
     // A SideInputReader that vends fixed contents for LENGTH_VIEW_FOR_DEFAULT_TAG
     // with a chosen size that exceeds the maximum size of the cache.
-    SizedSideInputReader reader = SizedDirectSideInputReader.withContents(
+    WeightedSideInputReader reader = WeightedDirectSideInputReader.withContents(
         ImmutableMap.of(
             UNTYPED_ITERABLE_TAG,
-            Sized.<Object>of(
+            WeightedValue.<Object>of(
                 PCollectionViewTesting.contentsInDefaultWindow("some", "large", "collection"),
                 MAXIMUM_CACHE_SIZE + 100)));
 
@@ -134,10 +134,10 @@ public void testCachingSideInputReaderAgreesWithUnderlyingReaderForLargeItem() t
   public void testCachingSideInputReaderCachesSmallItem() throws Exception {
     // A SideInputReader that vends fixed contents for LENGTH_VIEW_FOR_DEFAULT_TAG
     // with a chosen size that fits in the maximum size of the cache.
-    SizedSideInputReader reader = SizedDirectSideInputReader.withContents(
+    WeightedSideInputReader reader = WeightedDirectSideInputReader.withContents(
         ImmutableMap.of(
             UNTYPED_ITERABLE_TAG,
-            Sized.<Object>of(
+            WeightedValue.<Object>of(
                 PCollectionViewTesting.contentsInDefaultWindow("hello", "goodbye"),
                 MAXIMUM_CACHE_SIZE - 1000)));
 
@@ -160,10 +160,10 @@ public void testCachingSideInputReaderCachesSmallItem() throws Exception {
   public void testCachingSideInputReaderDoesNotCacheLargeItem() throws Exception {
     // A SideInputReader that vends fixed contents for LENGTH_VIEW_FOR_DEFAULT_TAG
     // with a chosen size that exceeds in the maximum size of the cache.
-    SizedSideInputReader reader = SizedDirectSideInputReader.withContents(
+    WeightedSideInputReader reader = WeightedDirectSideInputReader.withContents(
         ImmutableMap.of(
             UNTYPED_ITERABLE_TAG,
-            Sized.<Object>of(
+            WeightedValue.<Object>of(
                 PCollectionViewTesting.contentsInDefaultWindow("hello", "goodbye"),
                 MAXIMUM_CACHE_SIZE + 100)));
 
@@ -188,9 +188,10 @@ public void testCachingSideInputReaderEmpty() throws Exception {
     PCollectionView<Long> view = PCollectionViewTesting.testingView(
         tag, new PCollectionViewTesting.LengthViewFn<String>(), StringUtf8Coder.of());
 
-    CachingSideInputReader sideInputReader = CachingSideInputReader.of(
-        SizedDirectSideInputReader.withContents(ImmutableMap.<TupleTag<Object>, Sized<Object>>of()),
-        defaultCache);
+    CachingSideInputReader sideInputReader =
+        CachingSideInputReader.of(WeightedDirectSideInputReader.withContents(
+                                      ImmutableMap.<TupleTag<Object>, WeightedValue<Object>>of()),
+            defaultCache);
 
     assertFalse(sideInputReader.contains(view));
     assertTrue(sideInputReader.isEmpty());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
index c3ad2bbd23e6c..90c55273a528b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
@@ -34,7 +34,7 @@
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.Sized;
+import com.google.cloud.dataflow.sdk.util.WeightedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -161,10 +161,10 @@ public void testDataflowSideInputReaderNotEmpty() throws Exception {
   @Test
   public void testDataflowSideInputReaderFilteredRead() throws Exception {
     assertTrue(defaultSideInputReader.contains(DEFAULT_LENGTH_VIEW));
-    Sized<Long> sizedValue = defaultSideInputReader.getSized(
+    WeightedValue<Long> sizedValue = defaultSideInputReader.getWeighted(
         DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
     assertThat(sizedValue.getValue(), equalTo(DEFAULT_SOURCE_LENGTH));
-    assertThat(sizedValue.getSize(), equalTo(DEFAULT_SOURCE_LENGTH * windowedLongBytes()));
+    assertThat(sizedValue.getWeight(), equalTo(DEFAULT_SOURCE_LENGTH * windowedLongBytes()));
   }
 
   /**
@@ -176,15 +176,15 @@ public void testDataflowSideInputReaderRepeatedRead() throws Exception {
     DataflowSideInputReader sideInputReader = DataflowSideInputReader.of(
         Collections.singletonList(defaultSideInputInfo), options, executionContext);
 
-    Sized<Long> firstRead = sideInputReader.getSized(
+    WeightedValue<Long> firstRead = sideInputReader.getWeighted(
         DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
 
     // A repeated read should yield the same size.
-    Sized<Long> repeatedRead = sideInputReader.getSized(
+    WeightedValue<Long> repeatedRead = sideInputReader.getWeighted(
         DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
 
     assertThat(repeatedRead.getValue(), equalTo(firstRead.getValue()));
-    assertThat(repeatedRead.getSize(), equalTo(firstRead.getSize()));
+    assertThat(repeatedRead.getWeight(), equalTo(firstRead.getWeight()));
 
   }
 
@@ -194,10 +194,10 @@ public void testDataflowSideInputReaderMiss() throws Exception {
         Collections.singletonList(defaultSideInputInfo), options, executionContext);
 
     // Reading an empty window yields the size of 0 elements.
-    Sized<Long> emptyWindowValue = sideInputReader.getSized(
+    WeightedValue<Long> emptyWindowValue = sideInputReader.getWeighted(
         DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_EMPTY_WINDOW);
     assertThat(emptyWindowValue.getValue(), equalTo(0L));
-    assertThat(emptyWindowValue.getSize(), equalTo(0L));
+    assertThat(emptyWindowValue.getWeight(), equalTo(0L));
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
index c0062ba9284bd..9c4f272e17e74 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
@@ -76,12 +76,14 @@ private static TupleTag<Iterable<WindowedValue<String>>> newStringTag() {
 
   @Test
   public void testTimerInternalsSetTimer() {
-    StreamingModeExecutionContext executionContext = new StreamingModeExecutionContext(
-        "stageName", null, new ConcurrentHashMap<String, String>());
+    StreamingModeExecutionContext executionContext = new StreamingModeExecutionContext("stageName",
+        null, new ConcurrentHashMap<String, String>(),
+        new WindmillStateCache().forComputation("comp"));
 
     Windmill.WorkItemCommitRequest.Builder outputBuilder =
         Windmill.WorkItemCommitRequest.newBuilder();
-    executionContext.start(null,
+    executionContext.start(
+        Windmill.WorkItem.newBuilder().setKey(ByteString.EMPTY).setWorkToken(17L).build(),
         new Instant(1000), // input watermark
         null, // output watermark
         stateReader, stateFetcher, outputBuilder);
@@ -108,7 +110,7 @@ public void testTimerInternalsSetTimer() {
   @Test
   public void testSideInputReaderReconstituted() {
     StreamingModeExecutionContext executionContext =
-        new StreamingModeExecutionContext("stageName", null, null);
+        new StreamingModeExecutionContext("stageName", null, null, null);
 
     PCollectionView<String> preview1 = PCollectionViewTesting.<String, String>testingView(
         newStringTag(), new ConstantViewFn<String, String>("view1"), StringUtf8Coder.of());
@@ -160,7 +162,8 @@ public void testReaderCache() throws Exception {
     ConcurrentHashMap<ByteString, ReaderCacheEntry> readerCache =
         new ConcurrentHashMap<ByteString, ReaderCacheEntry>();
     StreamingModeExecutionContext context =
-        new StreamingModeExecutionContext("stageName", readerCache, null);
+        new StreamingModeExecutionContext("stageName", readerCache, /*stateNameMap=*/null,
+            /*stateCache=*/null);
 
     UnboundedSource.UnboundedReader<?> reader1 =
         new CountingSource(Integer.MAX_VALUE).createReader(options, null);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SizedDirectSideInputReader.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WeightedDirectSideInputReader.java
similarity index 60%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SizedDirectSideInputReader.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WeightedDirectSideInputReader.java
index daccfd071cd21..9667df2eddf78 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SizedDirectSideInputReader.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WeightedDirectSideInputReader.java
@@ -20,8 +20,8 @@
 import com.google.cloud.dataflow.sdk.util.DirectSideInputReader;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.Sized;
-import com.google.cloud.dataflow.sdk.util.SizedSideInputReader;
+import com.google.cloud.dataflow.sdk.util.WeightedSideInputReader;
+import com.google.cloud.dataflow.sdk.util.WeightedValue;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
@@ -29,29 +29,30 @@
 import java.util.Map;
 
 /**
- * A {@link SizedSideInputReader} with explicitly provided sizes for all values.
+ * A {@link WeightedSideInputReader} with explicitly provided sizes for all values.
  */
-class SizedDirectSideInputReader extends SizedSideInputReader.Defaults {
+class WeightedDirectSideInputReader extends WeightedSideInputReader.Defaults {
 
   private final SideInputReader subReader;
-  private final Map<TupleTag<?>, Long> sizes;
+  private final Map<TupleTag<?>, Long> weights;
 
   /**
-   * Returns a {@link SizedDirectSideInputReader} containing the contents in the provided
+   * Returns a {@link WeightedDirectSideInputReader} containing the contents in the provided
    * {@code Map}. A {@link DirectSideInputReader} will be used for the actual retrieval logic; this
    * class merely does the size bookkeeping.
    */
-  public static SizedDirectSideInputReader withContents(
-      Map<TupleTag<Object>, Sized<Object>> sizedContents) {
-    return new SizedDirectSideInputReader(sizedContents);
+  public static WeightedDirectSideInputReader withContents(
+      Map<TupleTag<Object>, WeightedValue<Object>> sizedContents) {
+    return new WeightedDirectSideInputReader(sizedContents);
   }
 
-  private SizedDirectSideInputReader(Map<TupleTag<Object>, Sized<Object>> sizedContents) {
-    sizes = new HashMap<>();
+  private WeightedDirectSideInputReader(
+      Map<TupleTag<Object>, WeightedValue<Object>> sizedContents) {
+    weights = new HashMap<>();
     PTuple values = PTuple.empty();
-    for (Map.Entry<TupleTag<Object>, Sized<Object>> entry : sizedContents.entrySet()) {
+    for (Map.Entry<TupleTag<Object>, WeightedValue<Object>> entry : sizedContents.entrySet()) {
       values = values.and(entry.getKey(), entry.getValue().getValue());
-      sizes.put(entry.getKey(), entry.getValue().getSize());
+      weights.put(entry.getKey(), entry.getValue().getWeight());
     }
     subReader = DirectSideInputReader.of(values);
   }
@@ -67,9 +68,9 @@ public <T> boolean contains(PCollectionView<T> view) {
   }
 
   @Override
-  public <T> Sized<T> getSized(PCollectionView<T> view, BoundedWindow window) {
-    return Sized.of(
+  public <T> WeightedValue<T> getWeighted(PCollectionView<T> view, BoundedWindow window) {
+    return WeightedValue.of(
         subReader.get(view, window),
-        sizes.get(view.getTagInternal()));
+        weights.get(view.getTagInternal()));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCacheTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCacheTest.java
new file mode 100644
index 0000000000000..c7ad7b3eac486
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCacheTest.java
@@ -0,0 +1,210 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.util.state.State;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.protobuf.ByteString;
+
+import org.joda.time.Instant;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.util.Objects;
+
+/**
+ * Tests for {@link WindmillStateCache}.
+ */
+@RunWith(JUnit4.class)
+public class WindmillStateCacheTest {
+  private static final String COMPUTATION = "computation";
+  private static final ByteString KEY = ByteString.copyFromUtf8("key");
+  private static final String STATE_FAMILY = "family";
+
+  private static class TestStateTag implements StateTag<TestState> {
+    final String id;
+
+    TestStateTag(String id) {
+      this.id = id;
+    }
+
+    @Override
+    public void appendTo(Appendable appendable) throws IOException {
+      appendable.append(id);
+    }
+
+    @Override
+    public String getId() {
+      return id;
+    }
+
+    @Override
+    public TestState bind(StateBinder binder) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public String toString() {
+      return "Tag(" + id + ")";
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      return (other instanceof TestStateTag) && Objects.equals(((TestStateTag) other).id, id);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(id);
+    }
+  }
+
+  private static class TestState implements State {
+    String value = null;
+
+    TestState(String value) {
+      this.value = value;
+    }
+
+    public String getValue() {
+      return value;
+    }
+
+    @Override
+    public void clear() {
+      this.value = null;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      return (other instanceof TestState) && Objects.equals(((TestState) other).value, value);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(value);
+    }
+
+    @Override
+    public String toString() {
+      return "State(" + value + ")";
+    }
+  }
+
+  private static StateNamespace windowNamespace(long start) {
+    return StateNamespaces.window(
+        IntervalWindow.getCoder(), new IntervalWindow(new Instant(start), new Instant(start + 1)));
+  }
+
+  private static StateNamespace triggerNamespace(long start, int triggerIdx) {
+    return StateNamespaces.windowAndTrigger(IntervalWindow.getCoder(),
+        new IntervalWindow(new Instant(start), new Instant(start + 1)), triggerIdx);
+  }
+
+  WindmillStateCache cache;
+  WindmillStateCache.ForKey keyCache;
+
+  @Before
+  public void setUp() {
+    cache = new WindmillStateCache();
+    keyCache = cache.forComputation(COMPUTATION).forKey(KEY, STATE_FAMILY, 0L);
+    assertEquals(0, cache.getWeight());
+  }
+
+  @Test
+  public void testBasic() throws Exception {
+    assertNull(keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
+    assertNull(keyCache.get(windowNamespace(0), new TestStateTag("tag2")));
+    assertNull(keyCache.get(triggerNamespace(0, 0), new TestStateTag("tag3")));
+    assertNull(keyCache.get(triggerNamespace(0, 0), new TestStateTag("tag2")));
+
+    keyCache.put(StateNamespaces.global(), new TestStateTag("tag1"), new TestState("g1"), 2);
+    assertEquals(5, cache.getWeight());
+    keyCache.put(windowNamespace(0), new TestStateTag("tag2"), new TestState("w2"), 2);
+    assertEquals(10, cache.getWeight());
+    keyCache.put(triggerNamespace(0, 0), new TestStateTag("tag3"), new TestState("t3"), 2);
+    assertEquals(12, cache.getWeight());
+    keyCache.put(triggerNamespace(0, 0), new TestStateTag("tag2"), new TestState("t2"), 2);
+    assertEquals(14, cache.getWeight());
+
+    assertEquals(
+        new TestState("g1"), keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
+    assertEquals(new TestState("w2"), keyCache.get(windowNamespace(0), new TestStateTag("tag2")));
+    assertEquals(
+        new TestState("t3"), keyCache.get(triggerNamespace(0, 0), new TestStateTag("tag3")));
+    assertEquals(
+        new TestState("t2"), keyCache.get(triggerNamespace(0, 0), new TestStateTag("tag2")));
+  }
+
+  /**
+   * Verifies that values are cached in the appropriate namespaces.
+   */
+  @Test
+  public void testInvalidation() throws Exception {
+    assertNull(keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
+    keyCache.put(StateNamespaces.global(), new TestStateTag("tag1"), new TestState("g1"), 2);
+    assertEquals(5, cache.getWeight());
+    assertEquals(
+        new TestState("g1"), keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
+
+    keyCache = cache.forComputation(COMPUTATION).forKey(KEY, STATE_FAMILY, 1L);
+    assertEquals(5, cache.getWeight());
+    assertNull(keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
+    assertEquals(0, cache.getWeight());
+  }
+
+  /**
+   * Verifies that the cache is invalidated when the cache token changes.
+   */
+  @Test
+  public void testEviction() throws Exception {
+    keyCache.put(windowNamespace(0), new TestStateTag("tag2"), new TestState("w2"), 2);
+    assertEquals(5, cache.getWeight());
+    keyCache.put(triggerNamespace(0, 0), new TestStateTag("tag3"), new TestState("t3"), 2000000000);
+    assertEquals(0, cache.getWeight());
+    // Eviction is atomic across the whole window.
+    assertNull(keyCache.get(windowNamespace(0), new TestStateTag("tag2")));
+    assertNull(keyCache.get(triggerNamespace(0, 0), new TestStateTag("tag3")));
+  }
+
+  /**
+   * Verifies that caches are kept indedently per-key.
+   */
+  @Test
+  public void testMultipleKeys() throws Exception {
+    WindmillStateCache.ForKey keyCache1 = cache.forComputation("comp1").forKey(
+        ByteString.copyFromUtf8("key1"), STATE_FAMILY, 0L);
+    WindmillStateCache.ForKey keyCache2 = cache.forComputation("comp1").forKey(
+        ByteString.copyFromUtf8("key2"), STATE_FAMILY, 0L);
+    WindmillStateCache.ForKey keyCache3 = cache.forComputation("comp2").forKey(
+        ByteString.copyFromUtf8("key1"), STATE_FAMILY, 0L);
+
+    keyCache1.put(StateNamespaces.global(), new TestStateTag("tag1"), new TestState("g1"), 2);
+    assertEquals(
+        new TestState("g1"), keyCache1.get(StateNamespaces.global(), new TestStateTag("tag1")));
+    assertNull(keyCache2.get(StateNamespaces.global(), new TestStateTag("tag1")));
+    assertNull(keyCache3.get(StateNamespaces.global(), new TestStateTag("tag1")));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
index a63d9a0417cea..febc376647545 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
@@ -15,10 +15,12 @@
  */
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.DataflowMatchers.ByteStringMatcher.byteStringEq;
 import static com.google.cloud.dataflow.sdk.testing.SystemNanoTimeSleeper.sleepMillis;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.never;
 import static org.mockito.Mockito.when;
 
@@ -53,10 +55,12 @@
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
+import org.mockito.ArgumentCaptor;
 import org.mockito.Mock;
 import org.mockito.Mockito;
 import org.mockito.MockitoAnnotations;
 
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.concurrent.TimeUnit;
 
@@ -79,6 +83,7 @@ public class WindmillStateInternalsTest {
   private WindmillStateReader mockReader;
 
   private WindmillStateInternals underTest;
+  private WindmillStateCache cache;
 
   @Mock
   private Supplier<StateSampler.ScopedState> readStateSupplier;
@@ -94,7 +99,10 @@ private static ByteString key(String prefix, StateNamespace namespace, String ad
   @Before
   public void setUp() {
     MockitoAnnotations.initMocks(this);
-    underTest = new WindmillStateInternals(STATE_FAMILY, true, mockReader, readStateSupplier);
+    cache = new WindmillStateCache();
+    underTest = new WindmillStateInternals(STATE_FAMILY, mockReader,
+        cache.forComputation("comp").forKey(ByteString.EMPTY, STATE_FAMILY, 17L),
+        readStateSupplier);
   }
 
   private <T> void waitAndSet(final SettableFuture<T> future, final T value, final long millis) {
@@ -111,6 +119,15 @@ public void run() {
     }).run();
   }
 
+  private WindmillStateReader.WeightedList<String> weightedList(String... elems) {
+    WindmillStateReader.WeightedList<String> result =
+        new WindmillStateReader.WeightedList<String>(new ArrayList<String>(elems.length));
+    for (String elem : elems) {
+      result.addWeighted(elem, elem.length());
+    }
+    return result;
+  }
+
   @Test
   public void testBagAddBeforeRead() throws Exception {
     StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
@@ -208,8 +225,6 @@ public void testBagAddPersist() throws Exception {
     assertEquals(1, listUpdates.getValuesCount());
     assertEquals("hello", listUpdates.getValues(0).getData().substring(1).toStringUtf8());
 
-    // Blind adds should not need to read the future.
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -238,10 +253,6 @@ public void testBagClearPersist() throws Exception {
     assertEquals(1, listUpdates.getValuesCount());
     assertEquals("world", listUpdates.getValues(0).getData().substring(1).toStringUtf8());
 
-    // Clear should need to read the future.
-    Mockito.verify(mockReader)
-        .listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of());
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -260,29 +271,6 @@ public void testBagPersistEmpty() throws Exception {
     assertEquals(1, commitBuilder.getListUpdatesCount());
   }
 
-  @Test
-  public void testBagNoStateFamilies() throws Exception {
-    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, readStateSupplier);
-
-    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
-    BagState<String> bag = underTest.state(NAMESPACE, addr);
-
-    bag.add("hello");
-    bag.clear();
-    bag.add("world");
-
-    Windmill.WorkItemCommitRequest.Builder commitBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.persist(commitBuilder);
-
-    // Clear should need to read the future.
-    Mockito.verify(mockReader)
-        .listFuture(key(STATE_FAMILY, NAMESPACE, "bag"), "", StringUtf8Coder.of());
-    Mockito.verify(mockReader).startBatchAndBlock();
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-
   @Test
   public void testCombiningAddBeforeRead() throws Exception {
     CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
@@ -331,7 +319,9 @@ public void testCombiningIsEmpty() throws Exception {
     when(mockReader.listFuture(COMBINING_KEY, STATE_FAMILY, accumCoder))
         .thenReturn(future);
     StateContents<Boolean> result = value.isEmpty();
-    Mockito.verify(mockReader).listFuture(COMBINING_KEY, STATE_FAMILY, accumCoder);
+    ArgumentCaptor<ByteString> byteString = ArgumentCaptor.forClass(ByteString.class);
+    Mockito.verify(mockReader).listFuture(byteString.capture(), eq(STATE_FAMILY), eq(accumCoder));
+    assertThat(byteString.getValue(), byteStringEq(COMBINING_KEY));
 
     waitAndSet(future, Arrays.asList(new int[] {29}), 200);
     assertThat(result.read(), Matchers.is(false));
@@ -374,8 +364,6 @@ public void testCombiningAddPersist() throws Exception {
         CoderUtils.decodeFromByteArray(
             accumCoder, listUpdates.getValues(0).getData().substring(1).toByteArray())[0]);
 
-    // Blind adds should not need to read the future.
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -443,9 +431,6 @@ public void testCombiningClearPersist() throws Exception {
         CoderUtils.decodeFromByteArray(
             accumCoder, listUpdates.getValues(0).getData().substring(1).toByteArray())[0]);
 
-    // Blind adds should not need to read the future.
-    Mockito.verify(mockReader).listFuture(COMBINING_KEY, STATE_FAMILY, accumCoder);
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -538,6 +523,8 @@ public void testWatermarkClearBeforeRead() throws Exception {
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
+
+  /*
   @Test
   public void testWatermarkIsEmptyWindmillHasData() throws Exception {
     StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
@@ -582,6 +569,7 @@ public void testWatermarkIsEmptyAfterClear() throws Exception {
     bag.add(new Instant(1000));
     assertThat(result.read(), Matchers.is(false));
   }
+  */
 
   @Test
   public void testWatermarkPersistEarliest() throws Exception {
@@ -602,8 +590,6 @@ public void testWatermarkPersistEarliest() throws Exception {
     assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
     assertEquals(TimeUnit.MILLISECONDS.toMicros(1000), watermarkHold.getTimestamps(0));
 
-    // Blind adds should not need to read the future.
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -629,9 +615,7 @@ public void testWatermarkPersistLatestEmpty() throws Exception {
     assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
     assertEquals(TimeUnit.MILLISECONDS.toMicros(2000), watermarkHold.getTimestamps(0));
 
-    // Blind adds should not need to read the future.
     Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -657,9 +641,7 @@ public void testWatermarkPersistLatestWindmillWins() throws Exception {
     assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
     assertEquals(TimeUnit.MILLISECONDS.toMicros(4000), watermarkHold.getTimestamps(0));
 
-    // Blind adds should not need to read the future.
     Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -685,9 +667,7 @@ public void testWatermarkPersistLatestLocalAdditionsWin() throws Exception {
     assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
     assertEquals(TimeUnit.MILLISECONDS.toMicros(2000), watermarkHold.getTimestamps(0));
 
-    // Blind adds should not need to read the future.
     Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -711,7 +691,6 @@ public void testWatermarkPersistEndOfWindow() throws Exception {
     assertEquals(TimeUnit.MILLISECONDS.toMicros(2000), watermarkHold.getTimestamps(0));
 
     // Blind adds should not need to read the future.
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -735,12 +714,8 @@ public void testWatermarkClearPersist() throws Exception {
     Windmill.WatermarkHold clearAndUpdate = commitBuilder.getWatermarkHolds(0);
     assertEquals(key(NAMESPACE, "watermark"), clearAndUpdate.getTag());
     assertEquals(1, clearAndUpdate.getTimestampsCount());
-    assertEquals(key(NAMESPACE, "watermark"), clearAndUpdate.getTag());
-    assertEquals(1, clearAndUpdate.getTimestampsCount());
     assertEquals(TimeUnit.MILLISECONDS.toMicros(1000), clearAndUpdate.getTimestamps(0));
 
-    // Clearing requires reading the future.
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -761,39 +736,6 @@ public void testWatermarkPersistEmpty() throws Exception {
     assertEquals(1, commitBuilder.getWatermarkHoldsCount());
   }
 
-  @Test
-  public void testWatermarkNoStateFamiliesEarliest() throws Exception {
-    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, readStateSupplier);
-
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-    bag.get();
-    Mockito.verify(mockReader).watermarkFuture(key(STATE_FAMILY, NAMESPACE, "watermark"), "");
-  }
-
-  @Test
-  public void testWatermarkNoStateFamiliesLatest() throws Exception {
-    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, readStateSupplier);
-
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtLatestInputTimestamp());
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-    bag.get();
-    Mockito.verify(mockReader).watermarkFuture(key(STATE_FAMILY, NAMESPACE, "watermark"), "");
-  }
-
-  @Test
-  public void testWatermarkNoStateFamiliesEndOfWindow() throws Exception {
-    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, readStateSupplier);
-
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtLatestInputTimestamp());
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-    bag.get();
-    Mockito.verify(mockReader).watermarkFuture(key(STATE_FAMILY, NAMESPACE, "watermark"), "");
-  }
-
   @Test
   public void testValueSetBeforeRead() throws Exception {
     StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
@@ -846,10 +788,6 @@ public void testValueSetPersist() throws Exception {
     assertEquals("Hi", valueUpdate.getValue().getData().toStringUtf8());
     assertTrue(valueUpdate.isInitialized());
 
-    // Setting a value requires a read to prevent blind writes.
-    Mockito.verify(mockReader)
-        .valueFuture(key(NAMESPACE, "value"), STATE_FAMILY, StringUtf8Coder.of());
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -870,10 +808,6 @@ public void testValueClearPersist() throws Exception {
     assertEquals(key(NAMESPACE, "value"), valueUpdate.getTag());
     assertEquals(0, valueUpdate.getValue().getData().size());
 
-    // Setting a value requires a read to prevent blind writes.
-    Mockito.verify(mockReader)
-        .valueFuture(key(NAMESPACE, "value"), STATE_FAMILY, StringUtf8Coder.of());
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -888,24 +822,157 @@ public void testValueNoChangePersist() throws Exception {
 
     assertEquals(0, commitBuilder.getValueUpdatesCount());
 
-    // No changes shouldn't require getting any futures
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
   @Test
-  public void testValueNoStateFamilies() throws Exception {
-    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, readStateSupplier);
-
+  public void testCachedValue() throws Exception {
     StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
     ValueState<String> value = underTest.state(NAMESPACE, addr);
 
-    SettableFuture<String> future = SettableFuture.create();
-    when(mockReader.valueFuture(key(STATE_FAMILY, NAMESPACE, "value"), "", StringUtf8Coder.of()))
+    assertEquals(0, cache.getWeight());
+
+    value.set("Hi");
+    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
+
+    assertEquals(2, cache.getWeight());
+
+    value = underTest.state(NAMESPACE, addr);
+    assertEquals("Hi", value.get().read());
+    value.clear();
+    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
+
+    assertEquals(0, cache.getWeight());
+
+    value = underTest.state(NAMESPACE, addr);
+    assertEquals(null, value.get().read());
+
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testCachedBag() throws Exception {
+    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
+    BagState<String> bag = underTest.state(NAMESPACE, addr);
+
+    assertEquals(0, cache.getWeight());
+
+    SettableFuture<Iterable<String>> future = SettableFuture.create();
+    when(mockReader.listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of()))
         .thenReturn(future);
-    waitAndSet(future, "World", 200);
 
-    assertEquals("World", value.get().read());
+    StateContents<Iterable<String>> result = bag.get();
+
+    assertEquals(0, cache.getWeight());
+
+    bag.add("hello");
+    waitAndSet(future, weightedList("world"), 200);
+    assertThat(result.read(), Matchers.containsInAnyOrder("hello", "world"));
+
+    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
+
+    assertEquals(10, cache.getWeight());
+
+    bag = underTest.state(NAMESPACE, addr);
+    bag.add("goodbye");
+    assertThat(bag.get().read(), Matchers.containsInAnyOrder("hello", "world", "goodbye"));
+    bag.clear();
+    bag.add("new");
+
+    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
+
+    assertEquals(3, cache.getWeight());
+
+    bag = underTest.state(NAMESPACE, addr);
+    bag.add("new2");
+    assertThat(bag.get().read(), Matchers.containsInAnyOrder("new", "new2"));
+    bag.clear();
+    bag.add("new3");
+
+    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
+
+    assertEquals(4, cache.getWeight());
+
+    bag = underTest.state(NAMESPACE, addr);
+    assertThat(bag.get().read(), Matchers.containsInAnyOrder("new3"));
+
+    Mockito.verify(mockReader)
+        .listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of());
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testCachedWatermarkHold() throws Exception {
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+
+    SettableFuture<Instant> future = SettableFuture.create();
+    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY)).thenReturn(future);
+
+    assertEquals(0, cache.getWeight());
+
+    StateContents<Instant> result = bag.get();
+
+    bag.add(new Instant(3000));
+    waitAndSet(future, new Instant(2000), 200);
+    assertThat(result.read(), Matchers.equalTo(new Instant(2000)));
+
+    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
+
+    assertEquals(8, cache.getWeight());
+
+    bag = underTest.state(NAMESPACE, addr);
+    assertThat(bag.get().read(), Matchers.equalTo(new Instant(2000)));
+    bag.clear();
+
+    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
+
+    assertEquals(8, cache.getWeight());
+
+    bag = underTest.state(NAMESPACE, addr);
+    assertEquals(null, bag.get().read());
+
+    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testCachedCombining() throws Exception {
+    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
+
+    SettableFuture<Iterable<int[]>> future = SettableFuture.create();
+    when(mockReader.listFuture(key(NAMESPACE, "combining"), STATE_FAMILY, accumCoder))
+        .thenReturn(future);
+
+    assertEquals(0, cache.getWeight());
+
+    StateContents<Integer> result = value.get();
+
+    value.add(1);
+    waitAndSet(future, Arrays.asList(new int[]{2}), 200);
+    assertThat(result.read(), Matchers.equalTo(3));
+
+    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
+
+    assertEquals(1, cache.getWeight());
+
+    value = underTest.state(NAMESPACE, COMBINING_ADDR);
+    assertThat(value.get().read(), Matchers.equalTo(3));
+    value.add(3);
+    assertThat(value.get().read(), Matchers.equalTo(6));
+    value.clear();
+
+    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
+
+    assertEquals(0, cache.getWeight());
+
+    value = underTest.state(NAMESPACE, COMBINING_ADDR);
+    assertThat(value.get().read(), Matchers.equalTo(0));
+
+    Mockito.verify(mockReader)
+        .listFuture(key(NAMESPACE, "combining"), STATE_FAMILY, accumCoder);
+    Mockito.verifyNoMoreInteractions(mockReader);
   }
 
   private void disableCompactOnWrite() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java
index d70c329be4a50..e995b821de69f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java
@@ -47,6 +47,7 @@ public void testOurApiSurface() throws Exception {
     ApiSurface checkedApiSurface = ApiSurface.getSdkApiSurface()
       .pruningClassName("com.google.cloud.dataflow.sdk.runners.worker.StateFetcher")
       .pruningClassName("com.google.cloud.dataflow.sdk.util.common.ReflectHelpers")
+      .pruningClassName("com.google.cloud.dataflow.sdk.DataflowMatchers")
       .pruningClassName("com.google.cloud.dataflow.sdk.TestUtils")
       .pruningClassName("com.google.cloud.dataflow.sdk.WindowMatchers");
 
@@ -183,4 +184,3 @@ public void testExposedArrayCycle() throws Exception {
     assertExposed(ExposedArrayCycle.class, Exposed.class);
   }
 }
-

From 55cf870891b1ab080043008278139fa549f81796 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 21 Dec 2015 18:30:29 -0800
Subject: [PATCH 1246/1541] DataflowPipelineRunnerTest: make pass if user has
 default project set

In commit b6c4f8f3f7701a4f0190508e65eeb4aa9d1a4b88, I made
DataflowPipelineOptions use the default project from gcloud in some
cases it wasn't already being used. However, this change made a test fail
on developer machines where this codepath would be used.

Fix the test by explicitly setting the project to null, preventing the
extraction of credentials from the gcloud configuration.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110733418
---
 .../cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 3a29d82040954..cccdccff28941 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -519,6 +519,9 @@ public void testNoProjectFails() {
     DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
 
     options.setRunner(DataflowPipelineRunner.class);
+    // Explicitly set to null to prevent the default instance factory from reading credentials
+    // from a user's environment, causing this test to fail.
+    options.setProject(null);
 
     thrown.expect(IllegalArgumentException.class);
     thrown.expectMessage("Project id");

From e436a1a7b2d99f923adc3ae07169ebaad04bf951 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 21 Dec 2015 19:55:30 -0800
Subject: [PATCH 1247/1541] Touch up KV javadoc

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110736748
---
 .../google/cloud/dataflow/sdk/values/KV.java  | 22 +++++++++++++------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
index 7601c30637be2..5143e063f2a51 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
@@ -29,24 +29,24 @@
 /**
  * An immutable key/value pair.
  *
- * <p>Various {@link PTransform}s like {@link GroupByKey} and {@link Combine#perKey}
- * work on {@link PCollection}s of KVs.
+ * <p>Various {@link PTransform PTransforms} like {@link GroupByKey} and {@link Combine#perKey}
+ * operate on {@link PCollection PCollections} of {@link KV KVs}.
  *
  * @param <K> the type of the key
  * @param <V> the type of the value
  */
 public class KV<K, V> implements Serializable {
-  /** Returns a KV with the given key and value. */
+  /** Returns a {@link KV} with the given key and value. */
   public static <K, V> KV<K, V> of(K key, V value) {
     return new KV<>(key, value);
   }
 
-  /** Returns the key of this KV. */
+  /** Returns the key of this {@link KV}. */
   public K getKey() {
     return key;
   }
 
-  /** Returns the value of this KV. */
+  /** Returns the value of this {@link KV}. */
   public V getValue() {
     return value;
   }
@@ -76,7 +76,11 @@ public boolean equals(Object other) {
         && Objects.deepEquals(this.value, otherKv.value);
   }
 
-  /** Orders the {@link KV} by the key. A null key is less than any non-null key. */
+  /**
+   * Orders the {@link KV} by the key.
+   *
+   * <p>A {@code null} key is less than any non-{@code null} key.
+   */
   public static class OrderByKey<K extends Comparable<? super K>, V> implements
       SerializableComparator<KV<K, V>> {
     @Override
@@ -91,7 +95,11 @@ public int compare(KV<K, V> a, KV<K, V> b) {
     }
   }
 
-  /** Orders the {@link KV} by the value. A null value is less than any non-null value. */
+  /**
+   * Orders the {@link KV} by the value.
+   *
+   * <p>A {@code null} value is less than any non-{@code null} value.
+   */
   public static class OrderByValue<K, V extends Comparable<? super V>>
       implements SerializableComparator<KV<K, V>> {
     @Override

From dcc3fe84bb671d61fa218425294f41223c4f4af7 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 22 Dec 2015 09:54:18 -0800
Subject: [PATCH 1248/1541] RetryHttpRequestInitializer: verify that
 SocketTimeoutExceptions are handled

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110778488
---
 .../util/RetryHttpRequestInitializerTest.java | 53 +++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializerTest.java
index a914b06f096b5..7097190802588 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializerTest.java
@@ -16,7 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.anyInt;
 import static org.mockito.Matchers.anyString;
@@ -36,9 +39,15 @@
 import com.google.api.client.http.LowLevelHttpResponse;
 import com.google.api.client.json.JsonFactory;
 import com.google.api.client.json.jackson2.JacksonFactory;
+import com.google.api.client.testing.http.MockHttpTransport;
+import com.google.api.client.testing.http.MockLowLevelHttpRequest;
 import com.google.api.client.util.NanoClock;
 import com.google.api.client.util.Sleeper;
+import com.google.api.services.bigquery.Bigquery;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.storage.Storage;
+import com.google.common.collect.ImmutableList;
 
 import org.hamcrest.Matchers;
 import org.junit.After;
@@ -53,8 +62,10 @@
 import org.mockito.stubbing.Answer;
 
 import java.io.IOException;
+import java.net.SocketTimeoutException;
 import java.security.PrivateKey;
 import java.util.Arrays;
+import java.util.concurrent.atomic.AtomicLong;
 
 /**
  * Tests for RetryHttpRequestInitializer.
@@ -240,4 +251,46 @@ public Integer answer(InvocationOnMock invocation) {
     verify(mockLowLevelRequest, times(retries)).execute();
     verify(mockLowLevelResponse, times(retries)).getStatusCode();
   }
+
+  /**
+   * Tests that when RPCs fail with {@link SocketTimeoutException}, the IO exception handler
+   * is invoked.
+   */
+  @Test
+  public void testIOExceptionHandlerIsInvokedOnTimeout() throws Exception {
+    // Counts the number of calls to execute the HTTP request.
+    final AtomicLong executeCount = new AtomicLong();
+
+    // 10 is a private internal constant in the Google API Client library. See
+    // com.google.api.client.http.HttpRequest#setNumberOfRetries
+    // TODO: update this test once the private internal constant is public.
+    final int defaultNumberOfRetries = 10;
+
+    // A mock HTTP request that always throws SocketTimeoutException.
+    MockHttpTransport transport =
+        new MockHttpTransport.Builder().setLowLevelHttpRequest(new MockLowLevelHttpRequest() {
+          @Override
+          public LowLevelHttpResponse execute() throws IOException {
+            executeCount.incrementAndGet();
+            throw new SocketTimeoutException("Fake forced timeout exception");
+          }
+        }).build();
+
+    // A sample HTTP request to BigQuery that uses both default Transport and default
+    // RetryHttpInitializer.
+    Bigquery b = new Bigquery.Builder(
+        transport, Transport.getJsonFactory(), new RetryHttpRequestInitializer()).build();
+    BigQueryTableInserter inserter = new BigQueryTableInserter(b);
+    TableReference t = new TableReference()
+        .setProjectId("project").setDatasetId("dataset").setTableId("table");
+
+    try {
+      inserter.insertAll(t, ImmutableList.of(new TableRow()));
+      fail();
+    } catch (Throwable e) {
+      assertThat(e, Matchers.<Throwable>instanceOf(RuntimeException.class));
+      assertThat(e.getCause(), Matchers.<Throwable>instanceOf(SocketTimeoutException.class));
+      assertEquals(1 + defaultNumberOfRetries, executeCount.get());
+    }
+  }
 }

From 2cb3eac4e6a05fd64a9b9550a9d9e4788df707f1 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 22 Dec 2015 10:23:51 -0800
Subject: [PATCH 1249/1541] Touch up javadoc for values package

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110780949
---
 .../google/cloud/dataflow/sdk/values/KV.java  |   4 +-
 .../cloud/dataflow/sdk/values/PBegin.java     |  12 +-
 .../dataflow/sdk/values/PCollection.java      | 118 +++++++++---------
 .../dataflow/sdk/values/PCollectionList.java  |  59 +++++----
 .../dataflow/sdk/values/PCollectionTuple.java |  23 ++--
 .../dataflow/sdk/values/PCollectionView.java  |  21 ++--
 .../cloud/dataflow/sdk/values/PDone.java      |   9 +-
 .../cloud/dataflow/sdk/values/PInput.java     |  15 ++-
 .../cloud/dataflow/sdk/values/POutput.java    |  31 ++---
 .../dataflow/sdk/values/POutputValueBase.java |  19 +--
 .../cloud/dataflow/sdk/values/PValue.java     |   9 +-
 .../cloud/dataflow/sdk/values/PValueBase.java |  44 +++----
 .../dataflow/sdk/values/TimestampedValue.java |   8 +-
 .../cloud/dataflow/sdk/values/TupleTag.java   |  10 +-
 .../dataflow/sdk/values/TupleTagList.java     |  36 +++---
 .../dataflow/sdk/values/TypeDescriptor.java   |  12 +-
 .../dataflow/sdk/values/TypedPValue.java      |  41 +++---
 .../dataflow/sdk/values/package-info.java     |  48 ++++---
 18 files changed, 272 insertions(+), 247 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
index 5143e063f2a51..23cee07cfe051 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
@@ -77,7 +77,7 @@ public boolean equals(Object other) {
   }
 
   /**
-   * Orders the {@link KV} by the key.
+   * A {@link Comparator} that orders {@link KV KVs} by the natural ordering of their keys.
    *
    * <p>A {@code null} key is less than any non-{@code null} key.
    */
@@ -96,7 +96,7 @@ public int compare(KV<K, V> a, KV<K, V> b) {
   }
 
   /**
-   * Orders the {@link KV} by the value.
+   * A {@link Comparator} that orders {@link KV KVs} by the natural ordering of their values.
    *
    * <p>A {@code null} value is less than any non-{@code null} value.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
index 852c7084bec89..23ac3aed32d88 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
@@ -25,14 +25,14 @@
 import java.util.Collections;
 
 /**
- * {@code PBegin} is used as the "input" to a root {@link PTransform} that is the first
- * operation in a {@link Pipeline}, such as {@link Read TextIO.Read} or {@link Create}.
+ * {@link PBegin} is the "input" to a root {@link PTransform}, such as {@link Read Read} or
+ * {@link Create}.
  *
  * <p>Typically created by calling {@link Pipeline#begin} on a Pipeline.
  */
 public class PBegin implements PInput {
   /**
-   * Returns a {@code PBegin} in the given {@code Pipeline}.
+   * Returns a {@link PBegin} in the given {@link Pipeline}.
    */
   public static PBegin in(Pipeline pipeline) {
     return new PBegin(pipeline);
@@ -40,7 +40,7 @@ public static PBegin in(Pipeline pipeline) {
 
   /**
    * Like {@link #apply(String, PTransform)} but defaulting to the name
-   * of the {@code PTransform}.
+   * of the {@link PTransform}.
    */
   public <OutputT extends POutput> OutputT apply(
       PTransform<? super PBegin, OutputT> t) {
@@ -48,7 +48,7 @@ public <OutputT extends POutput> OutputT apply(
   }
 
   /**
-   * Applies the given {@code PTransform} to this input {@code PBegin},
+   * Applies the given {@link PTransform} to this input {@link PBegin},
    * using {@code name} to identify this specific application of the transform.
    * This name is used in various places, including the monitoring UI, logging,
    * and to stably identify this application node in the job graph.
@@ -77,7 +77,7 @@ public void finishSpecifying() {
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * Constructs a {@code PBegin} in the given {@code Pipeline}.
+   * Constructs a {@link PBegin} in the given {@link Pipeline}.
    */
   protected PBegin(Pipeline pipeline) {
     this.pipeline = pipeline;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
index 15e6345b49f15..6fffddfeb9606 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
@@ -18,76 +18,76 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.io.Read;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 
 /**
- * A {@code PCollection<T>} is an immutable collection of values of type
- * {@code T}.  A {@code PCollection} can contain either a bounded or unbounded
- * number of elements.  Bounded and unbounded {@code PCollection}s are produced
- * as the output of {@link com.google.cloud.dataflow.sdk.transforms.PTransform}s
- * (including root PTransforms like
- * {@link com.google.cloud.dataflow.sdk.io.TextIO.Read},
- * {@link com.google.cloud.dataflow.sdk.io.PubsubIO.Read} and
- * {@link com.google.cloud.dataflow.sdk.transforms.Create}), and can
+ * A {@link PCollection PCollection&lt;T&gt;} is an immutable collection of values of type
+ * {@code T}.  A {@link PCollection} can contain either a bounded or unbounded
+ * number of elements.  Bounded and unbounded {@link PCollection PCollections} are produced
+ * as the output of {@link PTransform PTransforms}
+ * (including root PTransforms like {@link Read} and {@link Create}), and can
  * be passed as the inputs of other PTransforms.
  *
  * <p>Some root transforms produce bounded {@code PCollections} and others
- * produce unbounded ones.  For example,
- * {@link com.google.cloud.dataflow.sdk.io.TextIO.Read} reads a static set
- * of files, so it produces a bounded {@code PCollection}.
- * {@link com.google.cloud.dataflow.sdk.io.PubsubIO.Read}, on the other hand,
- * receives a potentially infinite stream of Pubsub messages, so it produces
- * an unbounded {@code PCollection}.
+ * produce unbounded ones.  For example, {@link TextIO.Read} reads a static set
+ * of files, so it produces a bounded {@link PCollection}.
+ * {@link PubsubIO.Read}, on the other hand, receives a potentially infinite stream
+ * of Pubsub messages, so it produces an unbounded {@link PCollection}.
  *
- * <p>Each element in a {@code PCollection} may have an associated implicit
+ * <p>Each element in a {@link PCollection} may have an associated implicit
  * timestamp.  Readers assign timestamps to elements when they create
- * {@code PCollection}s, and other {@code PTransform}s propagate these
- * timestamps from their input to their output. For example, PubsubIO.Read
- * assigns pubsub message timestamps to elements, and TextIO.Read assigns
- * the default value {@code Long.MIN_VALUE} to elements. User code can
+ * {@link PCollection PCollections}, and other {@link PTransform PTransforms} propagate these
+ * timestamps from their input to their output. For example, {@link PubsubIO.Read}
+ * assigns pubsub message timestamps to elements, and {@link TextIO.Read} assigns
+ * the default value {@link BoundedWindow#TIMESTAMP_MIN_VALUE} to elements. User code can
  * explicitly assign timestamps to elements with
  * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#outputWithTimestamp}.
  *
- * <p>Additionally, a {@code PCollection} has an associated
+ * <p>Additionally, a {@link PCollection} has an associated
  * {@link WindowFn} and each element is assigned to a set of windows.
  * By default, the windowing function is {@link GlobalWindows}
  * and all elements are assigned into a single default window.
  * This default can be overridden with the {@link Window}
- * {@code PTransform}. Dataflow pipelines run in classic batch MapReduce style
- * with the default GlobalWindow strategy if timestamps are ignored.
+ * {@link PTransform}.
  *
- * <p>See the individual {@code PTransform} subclasses for specific information
+ * <p>See the individual {@link PTransform} subclasses for specific information
  * on how they propagate timestamps and windowing.
  *
- * @param <T> the type of the elements of this PCollection
+ * @param <T> the type of the elements of this {@link PCollection}
  */
 public class PCollection<T> extends TypedPValue<T> {
 
   /**
-   * The PCollection IsBounded property.
+   * The enumeration of cases for whether a {@link PCollection} is bounded.
    */
   public enum IsBounded {
     /**
-     * {@code PCollection} contains bounded data elements, such as
-     * {@code PCollection}s from {@code TextIO}, {@code BigQueryIO},
-     * {@code Create} e.t.c.
+     * Indicates that a {@link PCollection} contains bounded data elements, such as
+     * {@link PCollection PCollections} from {@link TextIO}, {@link BigQueryIO},
+     * {@link Create} e.t.c.
      */
     BOUNDED,
     /**
-     * {@code PCollection} contains unbounded data elements, such as
-     * {@code PCollection}s from {@code PubsubIO}.
+     * Indicates that a {@link PCollection} contains unbounded data elements, such as
+     * {@link PCollection PCollections} from {@link PubsubIO}.
      */
     UNBOUNDED;
 
     /**
      * Returns the composed IsBounded property.
      *
-     * <p>The composed property is BOUNDED only if all components are BOUNDED.
-     * Otherwise, it is UNBOUNDED.
+     * <p>The composed property is {@link #BOUNDED} only if all components are {@link #BOUNDED}.
+     * Otherwise, it is {@link #UNBOUNDED}.
      */
     public IsBounded and(IsBounded that) {
       if (this == BOUNDED && that == BOUNDED) {
@@ -99,10 +99,10 @@ public IsBounded and(IsBounded that) {
   }
 
   /**
-   * Returns the name of this PCollection.
+   * Returns the name of this {@link PCollection}.
    *
-   * <p>By default, the name of a PCollection is based on the name of the
-   * PTransform that produces it.  It can be specified explicitly by
+   * <p>By default, the name of a {@link PCollection} is based on the name of the
+   * {@link PTransform} that produces it.  It can be specified explicitly by
    * calling {@link #setName}.
    *
    * @throws IllegalStateException if the name hasn't been set yet
@@ -113,11 +113,11 @@ public String getName() {
   }
 
   /**
-   * Sets the name of this PCollection.  Returns {@code this}.
+   * Sets the name of this {@link PCollection}.  Returns {@code this}.
    *
-   * @throws IllegalStateException if this PCollection has already been
-   * finalized and is no longer settable, e.g., by having
-   * {@code apply()} called on it
+   * @throws IllegalStateException if this {@link PCollection} has already been
+   * finalized and may no longer be set.
+   * Once {@link #apply} has been called, this will be the case.
    */
   @Override
   public PCollection<T> setName(String name) {
@@ -126,11 +126,11 @@ public PCollection<T> setName(String name) {
   }
 
   /**
-   * Returns the Coder used by this PCollection to encode and decode
+   * Returns the {@link Coder} used by this {@link PCollection} to encode and decode
    * the values stored in it.
    *
-   * @throws IllegalStateException if the Coder hasn't been set, and
-   * couldn't be inferred
+   * @throws IllegalStateException if the {@link Coder} hasn't been set, and
+   * couldn't be inferred.
    */
   @Override
   public Coder<T> getCoder() {
@@ -138,12 +138,12 @@ public Coder<T> getCoder() {
   }
 
   /**
-   * Sets the Coder used by this PCollection to encode and decode the
-   * values stored in it.  Returns {@code this}.
+   * Sets the {@link Coder} used by this {@link PCollection} to encode and decode the
+   * values stored in it. Returns {@code this}.
    *
-   * @throws IllegalStateException if this PCollection has already
-   * been finalized and is no longer settable, e.g., by having
-   * {@code apply()} called on it
+   * @throws IllegalStateException if this {@link PCollection} has already
+   * been finalized and may no longer be set.
+   * Once {@link #apply} has been called, this will be the case.
    */
   @Override
   public PCollection<T> setCoder(Coder<T> coder) {
@@ -154,16 +154,20 @@ public PCollection<T> setCoder(Coder<T> coder) {
   /**
    * Like {@link IsBounded#apply(String, PTransform)} but defaulting to the name
    * of the {@link PTransform}.
+   *
+   * @return the output of the applied {@link PTransform}
    */
   public <OutputT extends POutput> OutputT apply(PTransform<? super PCollection<T>, OutputT> t) {
     return Pipeline.applyTransform(this, t);
   }
 
   /**
-   * Applies the given {@code PTransform} to this input {@code PCollection<T>},
+   * Applies the given {@link PTransform} to this input {@link PCollection},
    * using {@code name} to identify this specific application of the transform.
    * This name is used in various places, including the monitoring UI, logging,
    * and to stably identify this application node in the job graph.
+   *
+   * @return the output of the applied {@link PTransform}
    */
   public <OutputT extends POutput> OutputT apply(
       String name, PTransform<? super PCollection<T>, OutputT> t) {
@@ -171,7 +175,7 @@ public <OutputT extends POutput> OutputT apply(
   }
 
   /**
-   * Returns the {@link WindowingStrategy} of this {@code PCollection}.
+   * Returns the {@link WindowingStrategy} of this {@link PCollection}.
    */
   public WindowingStrategy<?, ?> getWindowingStrategy() {
     return windowingStrategy;
@@ -186,7 +190,7 @@ public IsBounded isBounded() {
 
   /**
    * {@link WindowingStrategy} that will be used for merging windows and triggering output in this
-   * {@code PCollection} and subsequence {@code PCollection}s produced from this one.
+   * {@link PCollection} and subsequence {@link PCollection PCollections} produced from this one.
    *
    * <p>By default, no merging is performed.
    */
@@ -199,10 +203,10 @@ private PCollection(Pipeline p) {
   }
 
   /**
-   * Sets the {@code TypeDescriptor<T>} for this {@code PCollection<T>}, so that
-   * the enclosing {@code PCollectionTuple}, {@code PCollectionList<T>},
-   * or {@code PTransform<?, PCollection<T>>}, etc., can provide
-   * more detailed reflective information.
+   * Sets the {@link TypeDescriptor TypeDescriptor&lt;T&gt;} for this
+   * {@link PCollection PCollection&lt;T&gt;}. This may allow the enclosing
+   * {@link PCollectionTuple}, {@link PCollectionList}, or {@code PTransform<?, PCollection<T>>},
+   * etc., to provide more detailed reflective information.
    */
   @Override
   public PCollection<T> setTypeDescriptorInternal(TypeDescriptor<T> typeDescriptor) {
@@ -211,7 +215,7 @@ public PCollection<T> setTypeDescriptorInternal(TypeDescriptor<T> typeDescriptor
   }
 
   /**
-   * Sets the {@link WindowingStrategy} of this {@code PCollection}.
+   * Sets the {@link WindowingStrategy} of this {@link PCollection}.
    *
    * <p>For use by primitive transformations only.
    */
@@ -221,7 +225,7 @@ public PCollection<T> setWindowingStrategyInternal(WindowingStrategy<?, ?> windo
   }
 
   /**
-   * Sets the {@link PCollection.IsBounded} of this {@code PCollection}.
+   * Sets the {@link PCollection.IsBounded} of this {@link PCollection}.
    *
    * <p>For use by internal transformations only.
    */
@@ -231,7 +235,7 @@ public PCollection<T> setIsBoundedInternal(IsBounded isBounded) {
   }
 
   /**
-   * Creates and returns a new PCollection for a primitive output.
+   * Creates and returns a new {@link PCollection} for a primitive output.
    *
    * <p>For use by primitive transformations only.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
index 867ca5100aef1..b99af020bfc8d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
@@ -18,7 +18,9 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.Flatten;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.Partition;
 import com.google.common.collect.ImmutableList;
 
 import java.util.ArrayList;
@@ -28,12 +30,10 @@
 import java.util.List;
 
 /**
- * A {@code PCollectionList<T>} is an immutable list of homogeneously
- * typed {@code PCollection<T>}s.  A PCollectionList is used, for
+ * A {@link PCollectionList PCollectionList&lt;T&gt;} is an immutable list of homogeneously
+ * typed {@link PCollection PCollection&lt;T&gt;s}. A {@link PCollectionList} is used, for
  * instance, as the input to
- * {@link com.google.cloud.dataflow.sdk.transforms.Flatten} or the
- * output of
- * {@link com.google.cloud.dataflow.sdk.transforms.Partition}.
+ * {@link Flatten} or the output of {@link Partition}.
  *
  * <p>PCollectionLists can be created and accessed like follows:
  * <pre> {@code
@@ -57,13 +57,13 @@
  * List<PCollection<String>> allPcs = pcs.getAll();
  * } </pre>
  *
- * @param <T> the type of the elements of all the PCollections in this list
+ * @param <T> the type of the elements of all the {@link PCollection PCollections} in this list
  */
 public class PCollectionList<T> implements PInput, POutput {
   /**
-   * Returns an empty PCollectionList that is part of the given Pipeline.
+   * Returns an empty {@link PCollectionList} that is part of the given {@link Pipeline}.
    *
-   * <p>Longer PCollectionLists can be created by calling
+   * <p>Longer {@link PCollectionList PCollectionLists} can be created by calling
    * {@link #and} on the result.
    */
   public static <T> PCollectionList<T> empty(Pipeline pipeline) {
@@ -71,9 +71,9 @@ public static <T> PCollectionList<T> empty(Pipeline pipeline) {
   }
 
   /**
-   * Returns a singleton PCollectionList containing the given PCollection.
+   * Returns a singleton {@link PCollectionList} containing the given {@link PCollection}.
    *
-   * <p>Longer PCollectionLists can be created by calling
+   * <p>Longer {@link PCollectionList PCollectionLists} can be created by calling
    * {@link #and} on the result.
    */
   public static <T> PCollectionList<T> of(PCollection<T> pc) {
@@ -81,12 +81,13 @@ public static <T> PCollectionList<T> of(PCollection<T> pc) {
   }
 
   /**
-   * Returns a PCollectionList containing the given PCollections, in order.
+   * Returns a {@link PCollectionList} containing the given {@link PCollection PCollections},
+   * in order.
    *
    * <p>The argument list cannot be empty.
    *
-   * <p>All the PCollections in the resulting PCollectionList must be
-   * part of the same Pipeline.
+   * <p>All the {@link PCollection PCollections} in the resulting {@link PCollectionList} must be
+   * part of the same {@link Pipeline}.
    *
    * <p>Longer PCollectionLists can be created by calling
    * {@link #and} on the result.
@@ -102,11 +103,11 @@ public static <T> PCollectionList<T> of(Iterable<PCollection<T>> pcs) {
   }
 
   /**
-   * Returns a new PCollectionList that has all the PCollections of
-   * this PCollectionList plus the given PCollection appended to the end.
+   * Returns a new {@link PCollectionList} that has all the {@link PCollection PCollections} of
+   * this {@link PCollectionList} plus the given {@link PCollection} appended to the end.
    *
-   * <p>All the PCollections in the resulting PCollectionList must be
-   * part of the same Pipeline.
+   * <p>All the {@link PCollection PCollections} in the resulting {@link PCollectionList} must be
+   * part of the same {@link Pipeline}.
    */
   public PCollectionList<T> and(PCollection<T> pc) {
     if (pc.getPipeline() != pipeline) {
@@ -121,12 +122,12 @@ public PCollectionList<T> and(PCollection<T> pc) {
   }
 
   /**
-   * Returns a new PCollectionList that has all the PCollections of
-   * this PCollectionList plus the given PCollections appended to the end,
-   * in order.
+   * Returns a new {@link PCollectionList} that has all the {@link PCollection PCollections} of
+   * this {@link PCollectionList} plus the given {@link PCollection PCollections} appended to the
+   * end, in order.
    *
-   * <p>All the PCollections in the resulting PCollectionList must be
-   * part of the same Pipeline.
+   * <p>All the {@link PCollections} in the resulting {@link PCollectionList} must be
+   * part of the same {@link Pipeline}.
    */
   public PCollectionList<T> and(Iterable<PCollection<T>> pcs) {
     List<PCollection<T>> copy = new ArrayList<>(pcollections);
@@ -141,15 +142,16 @@ public PCollectionList<T> and(Iterable<PCollection<T>> pcs) {
   }
 
   /**
-   * Returns the number of PCollections in this PCollectionList.
+   * Returns the number of {@link PCollection PCollections} in this {@link PCollectionList}.
    */
   public int size() {
     return pcollections.size();
   }
 
   /**
-   * Returns the PCollection at the given index (origin zero).  Throws
-   * IndexOutOfBounds if the index is out of the range
+   * Returns the {@link PCollection} at the given index (origin zero).
+   *
+   * @throws IndexOutOfBoundsException if the index is out of the range
    * {@code [0..size()-1]}.
    */
   public PCollection<T> get(int index) {
@@ -157,7 +159,8 @@ public PCollection<T> get(int index) {
   }
 
   /**
-   * Returns an immutable List of all the PCollections in this PCollectionList.
+   * Returns an immutable List of all the {@link PCollection PCollections} in this
+   * {@link PCollectionList}.
    */
   public List<PCollection<T>> getAll() {
     return pcollections;
@@ -173,10 +176,12 @@ public <OutputT extends POutput> OutputT apply(
   }
 
   /**
-   * Applies the given {@code PTransform} to this input {@code PCollectionList<T>},
+   * Applies the given {@link PTransform} to this input {@link PCollectionList},
    * using {@code name} to identify this specific application of the transform.
    * This name is used in various places, including the monitoring UI, logging,
    * and to stably identify this application node in the job graph.
+   *
+   * @return the output of the applied {@link PTransform}
    */
   public <OutputT extends POutput> OutputT apply(
       String name, PTransform<PCollectionList<T>, OutputT> t) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
index 04926392f98c0..58550e4182c74 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.common.collect.ImmutableMap;
@@ -29,17 +30,17 @@
 import java.util.Map;
 
 /**
- * A {@code PCollectionTuple} is an immutable tuple of
- * heterogeneously-typed {@link PCollection}s, "keyed" by
- * {@link TupleTag}s.  A PCollectionTuple can be used as the input or
+ * A {@link PCollectionTuple} is an immutable tuple of
+ * heterogeneously-typed {@link PCollection PCollections}, "keyed" by
+ * {@link TupleTag TupleTags}. A {@link PCollectionTuple} can be used as the input or
  * output of a
- * {@link com.google.cloud.dataflow.sdk.transforms.PTransform} taking
+ * {@link PTransform} taking
  * or producing multiple PCollection inputs or outputs that can be of
  * different types, for instance a
- * {@link com.google.cloud.dataflow.sdk.transforms.ParDo} with side
+ * {@link ParDo} with side
  * outputs.
  *
- * <p>A {@code PCollectionTuple} can be created and accessed like follows:
+ * <p>A {@link PCollectionTuple} can be created and accessed like follows:
  * <pre> {@code
  * PCollection<String> pc1 = ...;
  * PCollection<Integer> pc2 = ...;
@@ -74,7 +75,7 @@
  */
 public class PCollectionTuple implements PInput, POutput {
   /**
-   * Returns an empty {@code PCollectionTuple} that is part of the given {@link Pipeline}.
+   * Returns an empty {@link PCollectionTuple} that is part of the given {@link Pipeline}.
    *
    * <p>A {@link PCollectionTuple} containing additional elements can be created by calling
    * {@link #and} on the result.
@@ -87,7 +88,7 @@ public static PCollectionTuple empty(Pipeline pipeline) {
    * Returns a singleton {@link PCollectionTuple} containing the given
    * {@link PCollection} keyed by the given {@link TupleTag}.
    *
-   * <p>A {@code PCollectionTuple} containing additional elements can be created by calling
+   * <p>A {@link PCollectionTuple} containing additional elements can be created by calling
    * {@link #and} on the result.
    */
   public static <T> PCollectionTuple of(TupleTag<T> tag, PCollection<T> pc) {
@@ -152,6 +153,8 @@ public Map<TupleTag<?>, PCollection<?>> getAll() {
   /**
    * Like {@link #apply(String, PTransform)} but defaulting to the name
    * of the {@link PTransform}.
+   *
+   * @return the output of the applied {@link PTransform}
    */
   public <OutputT extends POutput> OutputT apply(
       PTransform<PCollectionTuple, OutputT> t) {
@@ -159,10 +162,12 @@ public <OutputT extends POutput> OutputT apply(
   }
 
   /**
-   * Applies the given {@code PTransform} to this input {@code PCollectionTuple},
+   * Applies the given {@link PTransform} to this input {@link PCollectionTuple},
    * using {@code name} to identify this specific application of the transform.
    * This name is used in various places, including the monitoring UI, logging,
    * and to stably identify this application node in the job graph.
+   *
+   * @return the output of the applied {@link PTransform}
    */
   public <OutputT extends POutput> OutputT apply(
       String name, PTransform<PCollectionTuple, OutputT> t) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
index e57a9fbc659d5..515e21ba6df97 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
@@ -17,30 +17,29 @@
 package com.google.cloud.dataflow.sdk.values;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.View;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 
 import java.io.Serializable;
 
 /**
- * A {@link PCollectionView PCollectionView&lt;T&gt;} is an immutable view of a
- * {@link PCollection} as a value of type {@code T} that can be accessed e.g. as
- * a side input to a {@link com.google.cloud.dataflow.sdk.transforms.DoFn}.
+ * A {@link PCollectionView PCollectionView&lt;T&gt;} is an immutable view of a {@link PCollection}
+ * as a value of type {@code T} that can be accessed
+ * as a side input to a {@link ParDo} transform.
  *
- * <p>A {@code PCollectionView} should always be the output of a
+ * <p>A {@link PCollectionView} should always be the output of a
  * {@link com.google.cloud.dataflow.sdk.transforms.PTransform}. It is the joint responsibility of
  * this transform and each {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner} to implement
  * the view in a runner-specific manner.
  *
- * <p>The most common case is using the {@link com.google.cloud.dataflow.sdk.transforms.View}
- * transforms to prepare a {@link PCollection} for use as a side input to
- * {@link com.google.cloud.dataflow.sdk.transforms.ParDo}. See
- * {@link com.google.cloud.dataflow.sdk.transforms.View#asSingleton()},
- * {@link com.google.cloud.dataflow.sdk.transforms.View#asIterable()}, and
- * {@link com.google.cloud.dataflow.sdk.transforms.View#asMap()} for more detail on specific views
+ * <p>The most common case is using the {@link View} transforms to prepare a {@link PCollection}
+ * for use as a side input to {@link ParDo}. See {@link View#asSingleton()},
+ * {@link View#asIterable()}, and {@link View#asMap()} for more detail on specific views
  * available in the SDK.
  *
- * @param <T> the type of the value(s) accessible via this {@code PCollectionView}
+ * @param <T> the type of the value(s) accessible via this {@link PCollectionView}
  */
 public interface PCollectionView<T> extends PValue, Serializable {
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PDone.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PDone.java
index f6a2af3e4cf3a..39a00616bf715 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PDone.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PDone.java
@@ -17,20 +17,19 @@
 package com.google.cloud.dataflow.sdk.values;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
 
 import java.util.Collection;
 import java.util.Collections;
 
 /**
- * {@code PDone} is the output of a
- * {@link com.google.cloud.dataflow.sdk.transforms.PTransform} that
- * doesn't have a non-trival result, e.g., a Write.  No more
- * transforms can be applied to it.
+ * {@link PDone} is the output of a {@link PTransform} that has a trivial result,
+ * such as a {@link Write}.
  */
 public class PDone extends POutputValueBase {
 
   /**
-   * Creates a {@code PDone} in the given {@code Pipeline}.
+   * Creates a {@link PDone} in the given {@link Pipeline}.
    */
   public static PDone in(Pipeline pipeline) {
     return new PDone(pipeline);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java
index 39cfaabc735e5..89b097a65318f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java
@@ -26,17 +26,20 @@
  */
 public interface PInput {
   /**
-   * Returns the owning {@link Pipeline} of this {@code PInput}.
+   * Returns the owning {@link Pipeline} of this {@link PInput}.
    */
   public Pipeline getPipeline();
 
   /**
-   * Expands this {@code PInput} into a list of its component input {@link PValue}s.
+   * Expands this {@link PInput} into a list of its component output
+   * {@link PValue PValues}.
    *
-   * <p>A {@link PValue} expands to itself.
-   *
-   * <p>A tuple or list of {@link PValue}s (e.g., {@link PCollectionTuple},
-   * and {@link PCollectionList}) expands to its component {@link PValue}s.
+   * <ul>
+   *   <li>A {@link PValue} expands to itself.</li>
+   *   <li>A tuple or list of {@link PValue PValues} (such as
+   *     {@link PCollectionTuple} or {@link PCollectionList})
+   *     expands to its component {@code PValue PValues}.</li>
+   * </ul>
    *
    * <p>Not intended to be invoked directly by user code.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
index 8e9ccbe34323e..f99bc0b09ddae 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
@@ -28,19 +28,20 @@
 public interface POutput {
 
   /**
-   * Returns the owning {@link Pipeline} of this {@code POutput}.
+   * Returns the owning {@link Pipeline} of this {@link POutput}.
    */
   public Pipeline getPipeline();
 
   /**
-   * Expands this {@code POutput} into a list of its component output
-   * {@code PValue}s.
+   * Expands this {@link POutput} into a list of its component output
+   * {@link PValue PValues}.
    *
-   * <p>A {@link PValue} expands to itself.
-   *
-   * <p>A tuple or list of {@code PValue}s (e.g.,
-   * {@link PCollectionTuple}, and
-   * {@link PCollectionList}) expands to its component {@code PValue}s.
+   * <ul>
+   *   <li>A {@link PValue} expands to itself.</li>
+   *   <li>A tuple or list of {@link PValue PValues} (such as
+   *     {@link PCollectionTuple} or {@link PCollectionList})
+   *     expands to its component {@code PValue PValues}.</li>
+   * </ul>
    *
    * <p>Not intended to be invoked directly by user code.
    */
@@ -55,20 +56,20 @@ public interface POutput {
    *
    * <p>This is not intended to be invoked by user code, but
    * is automatically invoked as part of applying the
-   * producing {@code PTransform}.
+   * producing {@link PTransform}.
    */
   public void recordAsOutput(AppliedPTransform<?, ?, ?> transform);
 
   /**
-   * As part of applying the producing {@code PTransform}, finalizes this
+   * As part of applying the producing {@link PTransform}, finalizes this
    * output to make it ready for being used as an input and for running.
    *
-   * <p>This includes ensuring that all {@code PCollection}s
-   * have {@code Coder}s specified or defaulted.
+   * <p>This includes ensuring that all {@link PCollection PCollections}
+   * have {@link Coder Coders} specified or defaulted.
    *
-   * <p>Automatically invoked whenever this {@code POutput} is used
-   * as a {@code PInput} to another {@code PTransform}, or if never
-   * used as a {@code PInput}, when {@link Pipeline#run}
+   * <p>Automatically invoked whenever this {@link POutput} is used
+   * as a {@link PInput} to another {@link PTransform}, or if never
+   * used as a {@link PInput}, when {@link Pipeline#run}
    * is called, so users do not normally call this explicitly.
    */
   public void finishSpecifyingOutput();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
index 3f28677b3c683..69e04c3436423 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
@@ -18,13 +18,14 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
 
 /**
- * A {@code POutputValueBase} is the abstract base class of
+ * A {@link POutputValueBase} is the abstract base class of
  * {@code PTransform} outputs.
  *
- * <p>A {@code PValueBase} that adds tracking of its producing
- * {@code AppliedPTransform}.
+ * <p>A {@link PValueBase} that adds tracking of its producing
+ * {@link AppliedPTransform}.
  *
  * <p>For internal use.
  */
@@ -38,7 +39,7 @@ protected POutputValueBase(Pipeline pipeline) {
 
   /**
    * No-arg constructor for Java serialization only.
-   * The resulting {@code POutputValueBase} is unlikely to be
+   * The resulting {@link POutputValueBase} is unlikely to be
    * valid.
    */
   protected POutputValueBase() {
@@ -51,7 +52,7 @@ public Pipeline getPipeline() {
   }
 
   /**
-   * Returns the {@code AppliedPTransform} that this {@code POutputValueBase}
+   * Returns the {@link AppliedPTransform} that this {@link POutputValueBase}
    * is an output of.
    *
    * <p>For internal use only.
@@ -61,8 +62,8 @@ public Pipeline getPipeline() {
   }
 
   /**
-   * Records that this {@code POutputValueBase} is an output with the
-   * given name of the given {@code AppliedPTransform}.
+   * Records that this {@link POutputValueBase} is an output with the
+   * given name of the given {@link AppliedPTransform}.
    *
    * <p>To be invoked only by {@link POutput#recordAsOutput}
    * implementations.  Not to be invoked directly by user code.
@@ -87,7 +88,7 @@ public void recordAsOutput(AppliedPTransform<?, ?, ?> transform) {
   }
 
   /**
-   * Default behavior for {@code finishSpecifyingOutput()} is
+   * Default behavior for {@link #finishSpecifyingOutput()} is
    * to do nothing. Override if your {@link PValue} requires
    * finalization.
    */
@@ -95,7 +96,7 @@ public void recordAsOutput(AppliedPTransform<?, ?, ?> transform) {
   public void finishSpecifyingOutput() { }
 
   /**
-   * The {@code PTransform} that produces this {@code POutputValueBase}.
+   * The {@link PTransform} that produces this {@link POutputValueBase}.
    */
   private AppliedPTransform<?, ?, ?> producingTransform;
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java
index 5196f965f48ee..eb95a23f50f43 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java
@@ -17,21 +17,20 @@
 package com.google.cloud.dataflow.sdk.values;
 
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
 
 /**
- * A {@code PValue} is the interface to values that can be
- * input and output from {@link com.google.cloud.dataflow.sdk.transforms.PTransform}s.
+ * The interface for values that can be input to and output from {@link PTransform PTransforms}.
  */
 public interface PValue extends POutput, PInput {
 
   /**
-   * Returns the name of this {@code PValue}.
+   * Returns the name of this {@link PValue}.
    */
   public String getName();
 
   /**
-   * Returns the {@code AppliedPTransform} that this {@code POutputValueBase}
-   * is an output of.
+   * Returns the {@link AppliedPTransform} that this {@link PValue} is an output of.
    *
    * <p>For internal use only.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
index 99cb08c7b2468..7e57204f33060 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
@@ -24,26 +24,24 @@
 import java.util.Collections;
 
 /**
- * A {@code PValueBase} is an abstract base class that provides
+ * A {@link PValueBase} is an abstract base class that provides
  * sensible default implementations for methods of {@link PValue}.
  * In particular, this includes functionality for getting/setting:
  *
  * <ul>
- * <li> The {@code Pipeline} that the {@code PValue} is
- * part of.
- * <li> Whether the {@code PValue} has bee finalized (as an input
- * or an output), after which its properties can
- * no longer be changed.
+ *   <li> The {@link Pipeline} that the {@link PValue} is part of.</li>
+ *   <li> Whether the {@link PValue} has bee finalized (as an input
+ *     or an output), after which its properties can no longer be changed.</li>
  * </ul>
  *
  * <p>For internal use.
  */
 public abstract class PValueBase extends POutputValueBase implements PValue {
   /**
-   * Returns the name of this {@code PValueBase}.
+   * Returns the name of this {@link PValueBase}.
    *
-   * <p>By default, the name of a {@code PValueBase} is based on the
-   * name of the {@code PTransform} that produces it.  It can be
+   * <p>By default, the name of a {@link PValueBase} is based on the
+   * name of the {@link PTransform} that produces it.  It can be
    * specified explicitly by calling {@link #setName}.
    *
    * @throws IllegalStateException if the name hasn't been set yet
@@ -57,11 +55,10 @@ public String getName() {
   }
 
   /**
-   * Sets the name of this {@code PValueBase}.  Returns {@code this}.
+   * Sets the name of this {@link PValueBase}.  Returns {@code this}.
    *
-   * @throws IllegalStateException if this {@code PValueBase} has
-   * already been finalized and is no longer settable, e.g., by having
-   * {@code apply()} called on it
+   * @throws IllegalStateException if this {@link PValueBase} has
+   * already been finalized and may no longer be set.
    */
   public PValueBase setName(String name) {
     if (finishedSpecifying) {
@@ -80,7 +77,7 @@ protected PValueBase(Pipeline pipeline) {
 
   /**
    * No-arg constructor for Java serialization only.
-   * The resulting {@code PValueBase} is unlikely to be
+   * The resulting {@link PValueBase} is unlikely to be
    * valid.
    */
   protected PValueBase() {
@@ -88,12 +85,12 @@ protected PValueBase() {
   }
 
   /**
-   * The name of this {@code PValueBase}, or null if not yet set.
+   * The name of this {@link PValueBase}, or null if not yet set.
    */
   private String name;
 
   /**
-   * Whether this {@code PValueBase} has been finalized, and its core
+   * Whether this {@link PValueBase} has been finalized, and its core
    * properties, e.g., name, can no longer be changed.
    */
   private boolean finishedSpecifying = false;
@@ -104,9 +101,9 @@ public void recordAsOutput(AppliedPTransform<?, ?, ?> transform) {
   }
 
   /**
-   * Records that this {@code POutputValueBase} is an output with the
-   * given name of the given {@code AppliedPTransform} in the given
-   * {@code Pipeline}.
+   * Records that this {@link POutputValueBase} is an output with the
+   * given name of the given {@link AppliedPTransform} in the given
+   * {@link Pipeline}.
    *
    * <p>To be invoked only by {@link POutput#recordAsOutput}
    * implementations.  Not to be invoked directly by user code.
@@ -120,7 +117,7 @@ protected void recordAsOutput(AppliedPTransform<?, ?, ?> transform,
   }
 
   /**
-   * Returns whether this {@code PValueBase} has been finalized, and
+   * Returns whether this {@link PValueBase} has been finalized, and
    * its core properties, e.g., name, can no longer be changed.
    *
    * <p>For internal use only.
@@ -147,11 +144,10 @@ public String toString() {
   }
 
   /**
-   * Returns a {@code String} capturing the kind of this
-   * {@code PValueBase}.
+   * Returns a {@link String} capturing the kind of this
+   * {@link PValueBase}.
    *
-   * <p>By default, uses the base name of this {@code PValueBase}'s
-   * class as its kind string.
+   * <p>By default, uses the base name of the current class as its kind string.
    */
   protected String getKindString() {
     return StringUtils.approximateSimpleName(getClass());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
index 58447933059da..1085d44b135c5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
@@ -37,10 +37,10 @@
 import java.util.Objects;
 
 /**
- * An immutable (value, timestamp) pair.
+ * An immutable pair of a value and a timestamp.
  *
- * <p>Used for assigning initial timestamps to values inserted into a pipeline
- * with {@link com.google.cloud.dataflow.sdk.transforms.Create#timestamped}.
+ * <p>The timestamp of a value determines many properties, such as its assignment to
+ * windows and whether the value is late (with respect to the watermark of a {@link PCollection}).
  *
  * @param <V> the type of the value
  */
@@ -83,7 +83,7 @@ public String toString() {
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * A {@link Coder} for {@code TimestampedValue}.
+   * A {@link Coder} for {@link TimestampedValue}.
    */
   public static class TimestampedValueCoder<T>
       extends StandardCoder<TimestampedValue<T>> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
index e52de62319d75..74949211325c7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
@@ -31,16 +31,16 @@
 import java.util.Random;
 
 /**
- * A {@code TupleTag} is a typed tag to use as the key of a
+ * A {@link TupleTag} is a typed tag to use as the key of a
  * heterogeneously typed tuple, like {@link PCollectionTuple}.
  * Its generic type parameter allows tracking
  * the static type of things stored in tuples.
  *
- * <p>To aid in assigning default {@code Coder}s for results of
- * side outputs of {@code ParDo}, an output
- * {@code TupleTag} should be instantiated with an extra {@code {}} so
+ * <p>To aid in assigning default {@link Coder Coders} for results of
+ * side outputs of {@link ParDo}, an output
+ * {@link TupleTag} should be instantiated with an extra {@code {}} so
  * it is an instance of an anonymous subclass without generic type
- * parameters.  Input {@code TupleTag}s require no such extra
+ * parameters.  Input {@link TupleTag TupleTags} require no such extra
  * instantiation (although it doesn't hurt).  For example:
  *
  * <pre> {@code
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java
index b7e7ae26a249a..f019fc26e4cb0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.values;
 
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.common.collect.ImmutableList;
 
 import java.io.Serializable;
@@ -24,12 +25,12 @@
 import java.util.List;
 
 /**
- * A {@code TupleTagList} is an immutable list of heterogeneously
- * typed {@link TupleTag}s.  A TupleTagList is used, for instance, to
+ * A {@link TupleTagList} is an immutable list of heterogeneously
+ * typed {@link TupleTag TupleTags}. A {@link TupleTagList} is used, for instance, to
  * specify the tags of the side outputs of a
- * {@link com.google.cloud.dataflow.sdk.transforms.ParDo}.
+ * {@link ParDo}.
  *
- * <p>TupleTagLists can be created and accessed like follows:
+ * <p>A {@link TupleTagList} can be created and accessed like follows:
  * <pre> {@code
  * TupleTag<String> tag1 = ...;
  * TupleTag<Integer> tag2 = ...;
@@ -53,9 +54,9 @@
  */
 public class TupleTagList implements Serializable {
   /**
-   * Returns an empty TupleTagList.
+   * Returns an empty {@link TupleTagList}.
    *
-   * <p>Longer TupleTagLists can be created by calling
+   * <p>Longer {@link TupleTagList TupleTagLists} can be created by calling
    * {@link #and} on the result.
    */
   public static TupleTagList empty() {
@@ -63,9 +64,9 @@ public static TupleTagList empty() {
   }
 
   /**
-   * Returns a singleton TupleTagList containing the given TupleTag.
+   * Returns a singleton {@link TupleTagList} containing the given {@link TupleTag}.
    *
-   * <p>Longer TupleTagLists can be created by calling
+   * <p>Longer {@link TupleTagList TupleTagLists} can be created by calling
    * {@link #and} on the result.
    */
   public static TupleTagList of(TupleTag<?> tag) {
@@ -73,9 +74,9 @@ public static TupleTagList of(TupleTag<?> tag) {
   }
 
   /**
-   * Returns a TupleTagList containing the given TupleTags, in order.
+   * Returns a {@link TupleTagList} containing the given {@link TupleTag TupleTags}, in order.
    *
-   * <p>Longer TupleTagLists can be created by calling
+   * <p>Longer {@link TupleTagList TupleTagLists} can be created by calling
    * {@link #and} on the result.
    */
   public static TupleTagList of(List<TupleTag<?>> tags) {
@@ -83,8 +84,8 @@ public static TupleTagList of(List<TupleTag<?>> tags) {
   }
 
   /**
-   * Returns a new TupleTagList that has all the TupleTags of
-   * this TupleTagList plus the given TupleTag appended to the end.
+   * Returns a new {@link TupleTagList} that has all the {@link TupleTag TupleTags} of
+   * this {@link TupleTagList} plus the given {@link TupleTag} appended to the end.
    */
   public TupleTagList and(TupleTag<?> tag) {
     return new TupleTagList(
@@ -95,8 +96,8 @@ public TupleTagList and(TupleTag<?> tag) {
   }
 
   /**
-   * Returns a new TupleTagList that has all the TupleTags of
-   * this TupleTagList plus the given TupleTags appended to the end,
+   * Returns a new {@link TupleTagList} that has all the {@link TupleTag TupleTags} of
+   * this {@link TupleTagList} plus the given {@link TupleTag TupleTags} appended to the end,
    * in order.
    */
   public TupleTagList and(List<TupleTag<?>> tags) {
@@ -115,8 +116,9 @@ public int size() {
   }
 
   /**
-   * Returns the TupleTag at the given index (origin zero).  Throws
-   * IndexOutOfBounds if the index is out of the range
+   * Returns the {@link TupleTag} at the given index (origin zero).
+   *
+   * @throws IndexOutOfBoundsException if the index is out of the range
    * {@code [0..size()-1]}.
    */
   public TupleTag<?> get(int index) {
@@ -124,7 +126,7 @@ public TupleTag<?> get(int index) {
   }
 
   /**
-   * Returns an immutable List of all the TupleTags in this TupleTagList.
+   * Returns an immutable List of all the {@link TupleTag TupleTags} in this {@link TupleTagList}.
    */
   public List<TupleTag<?>> getAll() {
     return tupleTags;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
index 09008c69fcee6..47d2cd5a5f96d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
@@ -49,7 +49,7 @@ public abstract class TypeDescriptor<T> implements Serializable {
   private final TypeToken<T> token;
 
   /**
-   * Creates a TypeDescriptor wrapping the provided token.
+   * Creates a {@link TypeDescriptor} wrapping the provided token.
    * This constructor is private so Guava types do not leak.
    */
   private TypeDescriptor(TypeToken<T> token) {
@@ -93,14 +93,14 @@ public static TypeDescriptor<?> of(Type type) {
   }
 
   /**
-   * Returns the {@code Type} represented by this {@link TypeDescriptor}.
+   * Returns the {@link Type} represented by this {@link TypeDescriptor}.
    */
   public Type getType() {
     return token.getType();
   }
 
   /**
-   * Returns the {@code Class} underlying the {@code Type} represented by
+   * Returns the {@link Class} underlying the {@link Type} represented by
    * this {@link TypeDescriptor}.
    */
   public Class<? super T> getRawType() {
@@ -130,8 +130,8 @@ public final boolean isArray() {
   }
 
   /**
-   * Returns a {@code TypeVariable} for the named type parameter. Throws
-   * {@code IllegalArgumentException} if a type variable by the requested type parameter is not
+   * Returns a {@link TypeVariable} for the named type parameter. Throws
+   * {@link IllegalArgumentException} if a type variable by the requested type parameter is not
    * found.
    *
    * <p>For example, {@code new TypeDescriptor<List>(){}.getTypeParameter("T")} returns a
@@ -186,7 +186,7 @@ public List<TypeDescriptor<?>> getArgumentTypes(Method method) {
   }
 
   /**
-   * Returns a {@code TypeDescriptor} representing the given
+   * Returns a {@link TypeDescriptor} representing the given
    * type, with type variables resolved according to the specialization
    * in this type.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
index 433a6baba38fd..9b210b20b265a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
@@ -26,21 +26,22 @@
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 
 /**
- * A {@code TypedPValue<T>} is the abstract base class of things that
- * store some number of values of type {@code T}. Because we know
- * the type {@code T}, this is the layer of the inheritance hierarchy where
- * we store a coder for objects of type {@code T}
+ * A {@link TypedPValue TypedPValue&lt;T&gt;} is the abstract base class of things that
+ * store some number of values of type {@code T}.
  *
- * @param <T> the type of the values stored in this {@code TypedPValue}
+ * <p>Because we know the type {@code T}, this is the layer of the inheritance hierarchy where
+ * we store a coder for objects of type {@code T}.
+ *
+ * @param <T> the type of the values stored in this {@link TypedPValue}
  */
 public abstract class TypedPValue<T> extends PValueBase implements PValue {
 
   /**
-   * Returns the Coder used by this TypedPValue to encode and decode
+   * Returns the {@link Coder} used by this {@link TypedPValue} to encode and decode
    * the values stored in it.
    *
-   * @throws IllegalStateException if the Coder hasn't been set, and
-   * couldn't be inferred
+   * @throws IllegalStateException if the {@link Coder} hasn't been set, and
+   * couldn't be inferred.
    */
   public Coder<T> getCoder() {
     if (coder == null) {
@@ -50,10 +51,10 @@ public Coder<T> getCoder() {
   }
 
   /**
-   * Sets the Coder used by this TypedPValue to encode and decode the
-   * values stored in it.  Returns {@code this}.
+   * Sets the {@link Coder} used by this {@link TypedPValue} to encode and decode the
+   * values stored in it. Returns {@code this}.
    *
-   * @throws IllegalStateException if this TypedPValue has already
+   * @throws IllegalStateException if this {@link TypedPValue} has already
    * been finalized and is no longer settable, e.g., by having
    * {@code apply()} called on it
    */
@@ -71,10 +72,10 @@ public TypedPValue<T> setCoder(Coder<T> coder) {
   }
 
   /**
-   * After building, finalizes this PValue to make it ready for
-   * running.  Automatically invoked whenever the PValue is "used"
+   * After building, finalizes this {@link PValue} to make it ready for
+   * running.  Automatically invoked whenever the {@link PValue} is "used"
    * (e.g., when apply() is called on it) and when the Pipeline is
-   * run (useful if this is a PValue with no consumers).
+   * run (useful if this is a {@link PValue} with no consumers).
    */
   @Override
   public void finishSpecifying() {
@@ -88,7 +89,7 @@ public void finishSpecifying() {
   // Internal details below here.
 
   /**
-   * The Coder used by this TypedPValue to encode and decode the
+   * The {@link Coder} used by this {@link TypedPValue} to encode and decode the
    * values stored in it, or null if not specified nor inferred yet.
    */
   private Coder<T> coder;
@@ -100,7 +101,7 @@ protected TypedPValue(Pipeline p) {
   private TypeDescriptor<T> typeDescriptor;
 
   /**
-   * Returns a {@code TypeDescriptor<T>} with some reflective information
+   * Returns a {@link TypeDescriptor TypeDescriptor&lt;T&gt;} with some reflective information
    * about {@code T}, if possible. May return {@code null} if no information
    * is available. Subclasses may override this to enable better
    * {@code Coder} inference.
@@ -110,8 +111,8 @@ public TypeDescriptor<T> getTypeDescriptor() {
   }
 
   /**
-   * Sets the {@code TypeDescriptor<T>} associated with this class. Better
-   * reflective type information will lead to better {@code Coder}
+   * Sets the {@link TypeDescriptor TypeDescriptor&lt;T&gt;} associated with this class. Better
+   * reflective type information will lead to better {@link Coder}
    * inference.
    */
   public TypedPValue<T> setTypeDescriptorInternal(TypeDescriptor<T> typeDescriptor) {
@@ -121,8 +122,8 @@ public TypedPValue<T> setTypeDescriptorInternal(TypeDescriptor<T> typeDescriptor
 
   /**
    * If the coder is not explicitly set, this sets the coder for
-   * this {@code TypedPValue<T>} to the best coder that can be inferred
-   * based upon the known {@code TypeDescriptor<T>}. By default, this is null,
+   * this {@link TypedPValue} to the best coder that can be inferred
+   * based upon the known {@link TypeDescriptor}. By default, this is null,
    * but can and should be improved by subclasses.
    */
   @SuppressWarnings({"unchecked", "rawtypes"})
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java
index 6a3bf003bcce0..b8ca756f0ab45 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java
@@ -18,25 +18,35 @@
  * Defines {@link com.google.cloud.dataflow.sdk.values.PCollection} and other classes for
  * representing data in a {@link com.google.cloud.dataflow.sdk.Pipeline}.
  *
- * <p>A {@link com.google.cloud.dataflow.sdk.values.PCollection} is an immutable collection of
- * values of type {@code T} and is the main representation for data.
- * A {@link com.google.cloud.dataflow.sdk.values.PCollectionTuple} is a tuple of PCollections
- * used in cases where PTransforms take or return multiple PCollections.
- *
- * <p>A {@link com.google.cloud.dataflow.sdk.values.PCollectionTuple} is an immutable tuple of
- * heterogeneously-typed {@link com.google.cloud.dataflow.sdk.values.PCollection}s, "keyed" by
- * {@link com.google.cloud.dataflow.sdk.values.TupleTag}s.
- * A PCollectionTuple can be used as the input or
- * output of a
- * {@link com.google.cloud.dataflow.sdk.transforms.PTransform} taking
- * or producing multiple PCollection inputs or outputs that can be of
- * different types, for instance a
- * {@link com.google.cloud.dataflow.sdk.transforms.ParDo} with side
- * outputs.
- *
- * <p>A {@link com.google.cloud.dataflow.sdk.values.PCollectionView} is an immutable view of a
- * PCollection that can be accessed from a DoFn and other user Fns
- * as a side input.
+ * <p>In particular, see these collection abstractions:
  *
+ * <ul>
+ *   <li>{@link com.google.cloud.dataflow.sdk.values.PCollection} - an immutable collection of
+ *     values of type {@code T} and the main representation for data in Dataflow.</li>
+ *   <li>{@link com.google.cloud.dataflow.sdk.values.PCollectionView} - an immutable view of a
+ *     {@link com.google.cloud.dataflow.sdk.values.PCollection} that can be accessed as a
+ *     side input of a {@link com.google.cloud.dataflow.sdk.transforms.ParDo}
+ *     {@link com.google.cloud.dataflow.sdk.transforms.PTransform}.</li>
+ *   <li>{@link com.google.cloud.dataflow.sdk.values.PCollectionTuple} - a heterogeneous tuple of
+ *     {@link com.google.cloud.dataflow.sdk.values.PCollection PCollections}
+ *     used in cases where a {@link com.google.cloud.dataflow.sdk.transforms.PTransform} takes
+ *     or returns multiple
+ *     {@link com.google.cloud.dataflow.sdk.values.PCollection PCollections}.</li>
+ *   <li>{@link com.google.cloud.dataflow.sdk.values.PCollectionList} - a homogeneous list of
+ *     {@link com.google.cloud.dataflow.sdk.values.PCollection PCollections} used, for example,
+ *     as input to {@link com.google.cloud.dataflow.sdk.transforms.Flatten}.</li>
+ * </ul>
+ *
+ * <p>And these classes for individual values play particular roles in Dataflow:
+ *
+ * <ul>
+ *   <li>{@link com.google.cloud.dataflow.sdk.values.KV} - a key/value pair that is used by
+ *     keyed transforms, most notably {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}.
+ *     </li>
+ *   <li>{@link com.google.cloud.dataflow.sdk.values.TimestampedValue} - a timestamp/value pair
+ *     that is used for windowing and handling out-of-order data in streaming execution.</li>
+ * </ul>
+ *
+ * <p>For further details, see the documentation for each class in this package.
  */
 package com.google.cloud.dataflow.sdk.values;

From 7f7f377251f81249ee3ac863d210c9efefac76c1 Mon Sep 17 00:00:00 2001
From: hdeist <hdeist@google.com>
Date: Tue, 22 Dec 2015 11:18:44 -0800
Subject: [PATCH 1250/1541] Add more tests to DoFnTesterTest

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110785121
---
 .../sdk/transforms/DoFnTesterTest.java        | 136 ++++++++++++++++--
 1 file changed, 126 insertions(+), 10 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnTesterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnTesterTest.java
index f2cb40c2e84f8..32b38acffbd5f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnTesterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnTesterTest.java
@@ -15,23 +15,127 @@
  */
 package com.google.cloud.dataflow.sdk.transforms;
 
+import static org.hamcrest.CoreMatchers.hasItems;
 import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.util.List;
+
 /**
  * Tests for {@link DoFnTester}.
  */
 @RunWith(JUnit4.class)
 public class DoFnTesterTest {
+
   @Test
-  public void getAggregatorValuesShouldGetValueOfCounter() {
+  public void processElement() {
     CounterDoFn counterDoFn = new CounterDoFn();
+    DoFnTester<Long, String> tester = DoFnTester.of(counterDoFn);
+
+    tester.processElement(1L);
 
-    DoFnTester<Long, Void> tester = DoFnTester.of(counterDoFn);
+    List<String> take = tester.takeOutputElements();
+
+    assertThat(take, hasItems("1"));
+
+    // Following takeOutputElements(), neither takeOutputElements()
+    // nor peekOutputElements() return anything.
+    assertTrue(tester.takeOutputElements().isEmpty());
+    assertTrue(tester.peekOutputElements().isEmpty());
+
+    // processElement() caused startBundle() to be called, but finishBundle() was never called.
+    CounterDoFn deserializedDoFn = (CounterDoFn) tester.fn;
+    assertTrue(deserializedDoFn.wasStartBundleCalled());
+    assertFalse(deserializedDoFn.wasFinishBundleCalled());
+  }
+
+  @Test
+  public void processElementsWithPeeks() {
+    CounterDoFn counterDoFn = new CounterDoFn();
+    DoFnTester<Long, String> tester = DoFnTester.of(counterDoFn);
+
+    // Explicitly call startBundle().
+    tester.startBundle();
+
+    // verify startBundle() was called but not finishBundle().
+    CounterDoFn deserializedDoFn = (CounterDoFn) tester.fn;
+    assertTrue(deserializedDoFn.wasStartBundleCalled());
+    assertFalse(deserializedDoFn.wasFinishBundleCalled());
+
+    // process a couple of elements.
+    tester.processElement(1L);
+    tester.processElement(2L);
+
+    // peek the first 2 outputs.
+    List<String> peek = tester.peekOutputElements();
+    assertThat(peek, hasItems("1", "2"));
+
+    // process a couple more.
+    tester.processElement(3L);
+    tester.processElement(4L);
+
+    // peek all the outputs so far.
+    peek = tester.peekOutputElements();
+    assertThat(peek, hasItems("1", "2", "3", "4"));
+    // take the outputs.
+    List<String> take = tester.takeOutputElements();
+    assertThat(take, hasItems("1", "2", "3", "4"));
+
+    // Following takeOutputElements(), neither takeOutputElements()
+    // nor peekOutputElements() return anything.
+    assertTrue(tester.peekOutputElements().isEmpty());
+    assertTrue(tester.takeOutputElements().isEmpty());
+
+    // verify finishBundle() hasn't been called yet.
+    assertTrue(deserializedDoFn.wasStartBundleCalled());
+    assertFalse(deserializedDoFn.wasFinishBundleCalled());
+
+    // process a couple more.
+    tester.processElement(5L);
+    tester.processElement(6L);
+
+    // peek and take now have only the 2 last outputs.
+    peek = tester.peekOutputElements();
+    assertThat(peek, hasItems("5", "6"));
+    take = tester.takeOutputElements();
+    assertThat(take, hasItems("5", "6"));
+
+    tester.finishBundle();
+
+    // verify finishBundle() was called.
+    assertTrue(deserializedDoFn.wasStartBundleCalled());
+    assertTrue(deserializedDoFn.wasFinishBundleCalled());
+  }
+
+  @Test
+  public void processBatch() {
+    CounterDoFn counterDoFn = new CounterDoFn();
+    DoFnTester<Long, String> tester = DoFnTester.of(counterDoFn);
+
+    // processBatch() returns all the output like takeOutputElements().
+    List<String> take = tester.processBatch(1L, 2L, 3L, 4L);
+
+    assertThat(take, hasItems("1", "2", "3", "4"));
+
+    // peek now returns nothing.
+    assertTrue(tester.peekOutputElements().isEmpty());
+
+    // verify startBundle() and finishBundle() were both called.
+    CounterDoFn deserializedDoFn = (CounterDoFn) tester.fn;
+    assertTrue(deserializedDoFn.wasStartBundleCalled());
+    assertTrue(deserializedDoFn.wasFinishBundleCalled());
+  }
+
+  @Test
+  public void getAggregatorValuesShouldGetValueOfCounter() {
+    CounterDoFn counterDoFn = new CounterDoFn();
+    DoFnTester<Long, String> tester = DoFnTester.of(counterDoFn);
     tester.processBatch(1L, 2L, 4L, 8L);
 
     Long aggregatorVal = tester.getAggregatorValue(counterDoFn.agg);
@@ -42,8 +146,7 @@ public void getAggregatorValuesShouldGetValueOfCounter() {
   @Test
   public void getAggregatorValuesWithEmptyCounterShouldSucceed() {
     CounterDoFn counterDoFn = new CounterDoFn();
-
-    DoFnTester<Long, Void> tester = DoFnTester.of(counterDoFn);
+    DoFnTester<Long, String> tester = DoFnTester.of(counterDoFn);
     tester.processBatch();
     Long aggregatorVal = tester.getAggregatorValue(counterDoFn.agg);
     // empty bundle
@@ -53,7 +156,7 @@ public void getAggregatorValuesWithEmptyCounterShouldSucceed() {
   @Test
   public void getAggregatorValuesInStartFinishBundleShouldGetValues() {
     CounterDoFn fn = new CounterDoFn(1L, 2L);
-    DoFnTester<Long, Void> tester = DoFnTester.of(fn);
+    DoFnTester<Long, String> tester = DoFnTester.of(fn);
     tester.processBatch(0L, 0L);
 
     Long aggValue = tester.getAggregatorValue(fn.agg);
@@ -61,12 +164,14 @@ public void getAggregatorValuesInStartFinishBundleShouldGetValues() {
   }
 
   /**
-   * A DoFn that adds values to an aggregator in processElement.
+   * A DoFn that adds values to an aggregator and converts input to String in processElement.
    */
-  private static class CounterDoFn extends DoFn<Long, Void> {
+  private static class CounterDoFn extends DoFn<Long, String> {
     Aggregator<Long, Long> agg = createAggregator("ctr", new Sum.SumLongFn());
     private final long startBundleVal;
     private final long finishBundleVal;
+    private boolean startBundleCalled;
+    private boolean finishBundleCalled;
 
     public CounterDoFn() {
       this(0L, 0L);
@@ -78,18 +183,29 @@ public CounterDoFn(long start, long finish) {
     }
 
     @Override
-    public void startBundle(DoFn<Long, Void>.Context c) {
+    public void startBundle(Context c) {
       agg.addValue(startBundleVal);
+      startBundleCalled = true;
     }
 
     @Override
-    public void processElement(DoFn<Long, Void>.ProcessContext c) throws Exception {
+    public void processElement(ProcessContext c) throws Exception {
       agg.addValue(c.element());
+      c.output(c.element().toString());
     }
 
     @Override
-    public void finishBundle(DoFn<Long, Void>.Context c) {
+    public void finishBundle(Context c) {
       agg.addValue(finishBundleVal);
+      finishBundleCalled = true;
+    }
+
+    boolean wasStartBundleCalled() {
+      return startBundleCalled;
+    }
+
+    boolean wasFinishBundleCalled() {
+      return finishBundleCalled;
     }
   }
 }

From d23aff317d275fb0c3ca491b9c3c91fcd6f0476a Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Tue, 22 Dec 2015 13:14:46 -0800
Subject: [PATCH 1251/1541] Add OAuth2 addresses to GcpOptions

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=110793208
---
 .../dataflow/sdk/options/GcpOptions.java      | 23 +++++++++++++++++++
 .../cloud/dataflow/sdk/util/Credentials.java  |  3 +++
 2 files changed, 26 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
index ef36cda04ae09..1ab544809b916 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.options;
 
 import com.google.api.client.auth.oauth2.Credential;
+import com.google.api.client.googleapis.auth.oauth2.GoogleOAuthConstants;
 import com.google.cloud.dataflow.sdk.util.CredentialFactory;
 import com.google.cloud.dataflow.sdk.util.GcpCredentialFactory;
 import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
@@ -259,4 +260,26 @@ public Credential create(PipelineOptions options) {
       }
     }
   }
+
+  /**
+   * The token server URL to use for OAuth 2 authentication. Normally, the default is sufficient,
+   * but some specialized use cases may want to override this value.
+   */
+  @Description("The token server URL to use for OAuth 2 authentication. Normally, the default "
+      + "is sufficient, but some specialized use cases may want to override this value.")
+  @Default.String(GoogleOAuthConstants.TOKEN_SERVER_URL)
+  @Hidden
+  String getTokenServerUrl();
+  void setTokenServerUrl(String value);
+
+  /**
+   * The authorization server URL to use for OAuth 2 authentication. Normally, the default is
+   * sufficient, but some specialized use cases may want to override this value.
+   */
+  @Description("The authorization server URL to use for OAuth 2 authentication. Normally, the "
+      + "default is sufficient, but some specialized use cases may want to override this value.")
+  @Default.String(GoogleOAuthConstants.AUTHORIZATION_SERVER_URL)
+  @Hidden
+  String getAuthorizationServerEncodedUrl();
+  void setAuthorizationServerEncodedUrl(String value);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
index 1af539163bdf8..671b131554ead 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
@@ -24,6 +24,7 @@
 import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
 import com.google.api.client.googleapis.auth.oauth2.GoogleOAuthConstants;
 import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
+import com.google.api.client.http.GenericUrl;
 import com.google.api.client.http.HttpTransport;
 import com.google.api.client.json.JsonFactory;
 import com.google.api.client.json.jackson2.JacksonFactory;
@@ -175,6 +176,8 @@ private static Credential getCredentialFromClientSecrets(
     GoogleAuthorizationCodeFlow flow = new GoogleAuthorizationCodeFlow.Builder(
         httpTransport, jsonFactory, clientSecrets, scopes)
         .setDataStoreFactory(dataStoreFactory)
+        .setTokenServerUrl(new GenericUrl(options.getTokenServerUrl()))
+        .setAuthorizationServerEncodedUrl(options.getAuthorizationServerEncodedUrl())
         .build();
 
     // The credentialId identifies the credential if we're using a persistent

From 2cec5a7e4e7acd0c3e09723f29081899c07c61be Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 28 Dec 2015 16:45:38 -0800
Subject: [PATCH 1252/1541] Make ParDo.withSideInputs cumulative

Previously when building a ParDo, withSideInputs would repace
the currently built set of side inputs with the newly passed in set.
This change makes the builder cumulative so
ParDo.withSideInputs(A).withSideInputs(B) is equivalent to
ParDo.withSideInputs({A, B}).
----Release Notes----
Modified ParDo.withSideInputs such that successive calls are cumulative.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111039573
---
 .../cloud/dataflow/sdk/transforms/ParDo.java  |  34 ++++--
 .../dataflow/sdk/transforms/ParDoTest.java    | 112 +++++++++++++++++-
 2 files changed, 134 insertions(+), 12 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 245dae01a7123..43644d4e737db 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -607,7 +607,7 @@ public Unbound named(String name) {
 
     /**
      * Returns a new {@link ParDo} transform that's like this
-     * transform but with the specified side inputs.
+     * transform but with the specified additional side inputs.
      * Does not modify this transform. The resulting transform is
      * still incomplete.
      *
@@ -620,7 +620,7 @@ public Unbound withSideInputs(PCollectionView<?>... sideInputs) {
 
     /**
      * Returns a new {@link ParDo} transform that is like this
-     * transform but with the specified side inputs. Does not modify
+     * transform but with the specified additional side inputs. Does not modify
      * this transform. The resulting transform is still incomplete.
      *
      * <p>See the discussion of Side Inputs above and on
@@ -628,7 +628,10 @@ public Unbound withSideInputs(PCollectionView<?>... sideInputs) {
      */
     public Unbound withSideInputs(
         Iterable<? extends PCollectionView<?>> sideInputs) {
-      return new Unbound(name, ImmutableList.copyOf(sideInputs));
+      ImmutableList.Builder<PCollectionView<?>> builder = ImmutableList.builder();
+      builder.addAll(this.sideInputs);
+      builder.addAll(sideInputs);
+      return new Unbound(name, builder.build());
     }
 
     /**
@@ -710,7 +713,7 @@ public Bound<InputT, OutputT> named(String name) {
 
     /**
      * Returns a new {@link ParDo} {@link PTransform} that's like this
-     * {@link PTransform} but with the specified side inputs. Does not
+     * {@link PTransform} but with the specified additional side inputs. Does not
      * modify this {@link PTransform}.
      *
      * <p>See the discussion of Side Inputs above and on
@@ -722,7 +725,7 @@ public Bound<InputT, OutputT> withSideInputs(PCollectionView<?>... sideInputs) {
 
     /**
      * Returns a new {@link ParDo} {@link PTransform} that's like this
-     * {@link PTransform} but with the specified side inputs. Does not
+     * {@link PTransform} but with the specified additional side inputs. Does not
      * modify this {@link PTransform}.
      *
      * <p>See the discussion of Side Inputs above and on
@@ -730,7 +733,10 @@ public Bound<InputT, OutputT> withSideInputs(PCollectionView<?>... sideInputs) {
      */
     public Bound<InputT, OutputT> withSideInputs(
         Iterable<? extends PCollectionView<?>> sideInputs) {
-      return new Bound<>(name, ImmutableList.copyOf(sideInputs), fn);
+      ImmutableList.Builder<PCollectionView<?>> builder = ImmutableList.builder();
+      builder.addAll(this.sideInputs);
+      builder.addAll(sideInputs);
+      return new Bound<>(name, builder.build(), fn);
     }
 
     /**
@@ -839,7 +845,7 @@ public UnboundMulti<OutputT> withSideInputs(
 
     /**
      * Returns a new multi-output {@link ParDo} transform that's like
-     * this transform but with the specified side inputs. Does not
+     * this transform but with the specified additional side inputs. Does not
      * modify this transform. The resulting transform is still
      * incomplete.
      *
@@ -848,8 +854,11 @@ public UnboundMulti<OutputT> withSideInputs(
      */
     public UnboundMulti<OutputT> withSideInputs(
         Iterable<? extends PCollectionView<?>> sideInputs) {
+      ImmutableList.Builder<PCollectionView<?>> builder = ImmutableList.builder();
+      builder.addAll(this.sideInputs);
+      builder.addAll(sideInputs);
       return new UnboundMulti<>(
-          name, ImmutableList.copyOf(sideInputs),
+          name, builder.build(),
           mainOutputTag, sideOutputTags);
     }
 
@@ -924,7 +933,7 @@ public BoundMulti<InputT, OutputT> named(String name) {
 
     /**
      * Returns a new multi-output {@link ParDo} {@link PTransform}
-     * that's like this {@link PTransform} but with the specified side
+     * that's like this {@link PTransform} but with the specified additional side
      * inputs. Does not modify this {@link PTransform}.
      *
      * <p>See the discussion of Side Inputs above and on
@@ -937,7 +946,7 @@ public BoundMulti<InputT, OutputT> withSideInputs(
 
     /**
      * Returns a new multi-output {@link ParDo} {@link PTransform}
-     * that's like this {@link PTransform} but with the specified side
+     * that's like this {@link PTransform} but with the specified additional side
      * inputs. Does not modify this {@link PTransform}.
      *
      * <p>See the discussion of Side Inputs above and on
@@ -945,8 +954,11 @@ public BoundMulti<InputT, OutputT> withSideInputs(
      */
     public BoundMulti<InputT, OutputT> withSideInputs(
         Iterable<? extends PCollectionView<?>> sideInputs) {
+      ImmutableList.Builder<PCollectionView<?>> builder = ImmutableList.builder();
+      builder.addAll(this.sideInputs);
+      builder.addAll(sideInputs);
       return new BoundMulti<>(
-          name, ImmutableList.copyOf(sideInputs),
+          name, builder.build(),
           mainOutputTag, sideOutputTags, fn);
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
index 50afbf993418e..f3f9bde92d6d1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ParDoTest.java
@@ -600,7 +600,7 @@ public void testParDoWithSideInputs() {
     PCollectionView<Integer> sideInputUnread = pipeline
         .apply("CreateSideInputUnread", Create.of(-3333))
         .apply("ViewSideInputUnread", View.<Integer>asSingleton());
-    PCollectionView<Integer> sideInput2 =         pipeline
+    PCollectionView<Integer> sideInput2 = pipeline
         .apply("CreateSideInput2", Create.of(222))
         .apply("ViewSideInput2", View.<Integer>asSingleton());
 
@@ -619,6 +619,116 @@ public void testParDoWithSideInputs() {
     pipeline.run();
   }
 
+  @Test
+  @Category(RunnableOnService.class)
+  public void testParDoWithSideInputsIsCumulative() {
+    Pipeline pipeline = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList(3, -42, 666);
+
+    PCollectionView<Integer> sideInput1 = pipeline
+        .apply("CreateSideInput1", Create.of(11))
+        .apply("ViewSideInput1", View.<Integer>asSingleton());
+    PCollectionView<Integer> sideInputUnread = pipeline
+        .apply("CreateSideInputUnread", Create.of(-3333))
+        .apply("ViewSideInputUnread", View.<Integer>asSingleton());
+    PCollectionView<Integer> sideInput2 = pipeline
+        .apply("CreateSideInput2", Create.of(222))
+        .apply("ViewSideInput2", View.<Integer>asSingleton());
+
+    PCollection<String> output = pipeline
+        .apply(Create.of(inputs))
+        .apply(ParDo.withSideInputs(sideInput1)
+            .withSideInputs(sideInputUnread)
+            .withSideInputs(sideInput2)
+            .of(new TestDoFn(
+                Arrays.asList(sideInput1, sideInput2),
+                Arrays.<TupleTag<String>>asList())));
+
+    DataflowAssert.that(output)
+        .satisfies(ParDoTest.HasExpectedOutput
+                   .forInput(inputs)
+                   .andSideInputs(11, 222));
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testMultiOutputParDoWithSideInputs() {
+    Pipeline pipeline = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList(3, -42, 666);
+
+    final TupleTag<String> mainOutputTag = new TupleTag<String>("main"){};
+    final TupleTag<Void> sideOutputTag = new TupleTag<Void>("sideOutput"){};
+
+    PCollectionView<Integer> sideInput1 = pipeline
+        .apply("CreateSideInput1", Create.of(11))
+        .apply("ViewSideInput1", View.<Integer>asSingleton());
+    PCollectionView<Integer> sideInputUnread = pipeline
+        .apply("CreateSideInputUnread", Create.of(-3333))
+        .apply("ViewSideInputUnread", View.<Integer>asSingleton());
+    PCollectionView<Integer> sideInput2 = pipeline
+        .apply("CreateSideInput2", Create.of(222))
+        .apply("ViewSideInput2", View.<Integer>asSingleton());
+
+    PCollectionTuple outputs = pipeline
+        .apply(Create.of(inputs))
+        .apply(ParDo.withSideInputs(sideInput1)
+            .withSideInputs(sideInputUnread)
+            .withSideInputs(sideInput2)
+            .withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag))
+            .of(new TestDoFn(
+                Arrays.asList(sideInput1, sideInput2),
+                Arrays.<TupleTag<String>>asList())));
+
+    DataflowAssert.that(outputs.get(mainOutputTag))
+        .satisfies(ParDoTest.HasExpectedOutput
+                   .forInput(inputs)
+                   .andSideInputs(11, 222));
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testMultiOutputParDoWithSideInputsIsCumulative() {
+    Pipeline pipeline = TestPipeline.create();
+
+    List<Integer> inputs = Arrays.asList(3, -42, 666);
+
+    final TupleTag<String> mainOutputTag = new TupleTag<String>("main"){};
+    final TupleTag<Void> sideOutputTag = new TupleTag<Void>("sideOutput"){};
+
+    PCollectionView<Integer> sideInput1 = pipeline
+        .apply("CreateSideInput1", Create.of(11))
+        .apply("ViewSideInput1", View.<Integer>asSingleton());
+    PCollectionView<Integer> sideInputUnread = pipeline
+        .apply("CreateSideInputUnread", Create.of(-3333))
+        .apply("ViewSideInputUnread", View.<Integer>asSingleton());
+    PCollectionView<Integer> sideInput2 = pipeline
+        .apply("CreateSideInput2", Create.of(222))
+        .apply("ViewSideInput2", View.<Integer>asSingleton());
+
+    PCollectionTuple outputs = pipeline
+        .apply(Create.of(inputs))
+        .apply(ParDo.withSideInputs(sideInput1)
+            .withSideInputs(sideInputUnread)
+            .withSideInputs(sideInput2)
+            .withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag))
+            .of(new TestDoFn(
+                Arrays.asList(sideInput1, sideInput2),
+                Arrays.<TupleTag<String>>asList())));
+
+    DataflowAssert.that(outputs.get(mainOutputTag))
+        .satisfies(ParDoTest.HasExpectedOutput
+                   .forInput(inputs)
+                   .andSideInputs(11, 222));
+
+    pipeline.run();
+  }
+
   @Test
   public void testParDoReadingFromUnknownSideInput() {
     Pipeline pipeline = TestPipeline.create();

From 441cbe22eb8b399eba6990793e1f5efa9d0c169e Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 29 Dec 2015 11:45:34 -0800
Subject: [PATCH 1253/1541] Tidy some worker code

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111082740
---
 .../sdk/runners/worker/AssignWindowsParDoFn.java |  6 +++---
 .../sdk/runners/worker/CombineValuesFn.java      | 16 ++++++++--------
 .../runners/worker/DefaultParDoFnFactory.java    |  2 +-
 .../worker/GroupAlsoByWindowsParDoFn.java        |  2 +-
 .../sdk/runners/worker/InMemoryReader.java       |  2 +-
 .../sdk/runners/worker/NormalParDoFn.java        |  4 ++--
 .../sdk/runners/worker/ParDoFnFactory.java       |  2 +-
 .../worker/ReifyTimestampAndWindowsParDoFn.java  |  2 +-
 .../sdk/runners/worker/InMemoryReaderTest.java   |  2 +-
 .../worker/WindmillStateInternalsTest.java       |  2 ++
 10 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
index 43492792ce221..153fcf5f3085f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
@@ -50,7 +50,7 @@ static AssignWindowsParDoFn of(
       AssignWindowsDoFn<?, ?> fn,
       String stepName,
       String transformName,
-      DataflowExecutionContext executionContext,
+      DataflowExecutionContext<?> executionContext,
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler)
       throws Exception {
@@ -72,7 +72,7 @@ public ParDoFn create(
         @Nullable List<SideInputInfo> sideInputInfos,
         @Nullable List<MultiOutputInfo> multiOutputInfos,
         int numOutputs,
-        DataflowExecutionContext executionContext,
+        DataflowExecutionContext<?> executionContext,
         CounterSet.AddCounterMutator addCounterMutator,
         StateSampler stateSampler)
             throws Exception {
@@ -119,7 +119,7 @@ private AssignWindowsParDoFn(
       AssignWindowsDoFn<?, ?> fn,
       String stepName,
       String transformName,
-      DataflowExecutionContext executionContext,
+      DataflowExecutionContext<?> executionContext,
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler) {
     super(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
index 28e42cb3fc1d2..8b7b53c6e0ba3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
@@ -68,7 +68,7 @@ static CombineValuesFn of(
       String phase,
       String stepName,
       String transformName,
-      DataflowExecutionContext executionContext,
+      DataflowExecutionContext<?> executionContext,
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler)
       throws Exception {
@@ -90,7 +90,7 @@ public ParDoFn create(
         @Nullable List<SideInputInfo> sideInputInfos,
         @Nullable List<MultiOutputInfo> multiOutputInfos,
         int numOutputs,
-        DataflowExecutionContext executionContext,
+        DataflowExecutionContext<?> executionContext,
         CounterSet.AddCounterMutator addCounterMutator,
         StateSampler stateSampler)
             throws Exception {
@@ -126,19 +126,19 @@ public ParDoFn create(
 
   @Override
   protected DoFnInfo<?, ?> getDoFnInfo() {
-    DoFn doFn = null;
+    DoFn<?, ?> doFn = null;
     switch (phase) {
       case CombinePhase.ALL:
-        doFn = new CombineValuesDoFn(combineFn);
+        doFn = new CombineValuesDoFn<>(combineFn);
         break;
       case CombinePhase.ADD:
-        doFn = new AddInputsDoFn(combineFn);
+        doFn = new AddInputsDoFn<>(combineFn);
         break;
       case CombinePhase.MERGE:
-        doFn = new MergeAccumulatorsDoFn(combineFn);
+        doFn = new MergeAccumulatorsDoFn<>(combineFn);
         break;
       case CombinePhase.EXTRACT:
-        doFn = new ExtractOutputDoFn(combineFn);
+        doFn = new ExtractOutputDoFn<>(combineFn);
         break;
       default:
         throw new IllegalArgumentException(
@@ -156,7 +156,7 @@ private CombineValuesFn(
       String phase,
       String stepName,
       String transformName,
-      DataflowExecutionContext executionContext,
+      DataflowExecutionContext<?> executionContext,
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler) {
     super(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DefaultParDoFnFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DefaultParDoFnFactory.java
index 0595ee1f37d28..b11d8327a5ea5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DefaultParDoFnFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DefaultParDoFnFactory.java
@@ -55,7 +55,7 @@ public ParDoFn create(
       List<SideInputInfo> sideInputInfos,
       List<MultiOutputInfo> multiOutputInfos,
       int numOutputs,
-      DataflowExecutionContext executionContext,
+      DataflowExecutionContext<?> executionContext,
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler)
           throws Exception {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index 4bb468f290c14..8203b08b0886a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -88,7 +88,7 @@ public ParDoFn create(
         @Nullable List<SideInputInfo> sideInputInfos,
         @Nullable List<MultiOutputInfo> multiOutputInfos,
         int numOutputs,
-        DataflowExecutionContext executionContext,
+        DataflowExecutionContext<?> executionContext,
         CounterSet.AddCounterMutator addCounterMutator,
         StateSampler stateSampler)
             throws Exception {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
index b9a60c16ee498..650719047d618 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
@@ -100,7 +100,7 @@ public InMemoryReaderIterator() {
 
     @Override
     protected boolean hasNextImpl() {
-      return tracker.tryReturnRecordAt(true, (long) nextIndex);
+      return tracker.tryReturnRecordAt(true, nextIndex);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
index 2df5a1e8ce91d..7155580c8309d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
@@ -55,7 +55,7 @@ static NormalParDoFn of(
       List<String> outputTags,
       String stepName,
       String transformName,
-      DataflowExecutionContext executionContext,
+      DataflowExecutionContext<?> executionContext,
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler) {
     return new NormalParDoFn(
@@ -84,7 +84,7 @@ public ParDoFn create(
         @Nullable List<SideInputInfo> sideInputInfos,
         @Nullable List<MultiOutputInfo> multiOutputInfos,
         int numOutputs,
-        DataflowExecutionContext executionContext,
+        DataflowExecutionContext<?> executionContext,
         CounterSet.AddCounterMutator addCounterMutator,
         StateSampler stateSampler)
             throws Exception {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
index b80709f2142eb..cfb96645c79fd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
@@ -44,7 +44,7 @@ ParDoFn create(
       List<SideInputInfo> sideInputInfos,
       List<MultiOutputInfo> multiOutputInfos,
       int numOutputs,
-      DataflowExecutionContext executionContext,
+      DataflowExecutionContext<?> executionContext,
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler)
       throws Exception;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
index c5edfefebbdaa..3e9e1a6e54f23 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
@@ -67,7 +67,7 @@ public ParDoFn create(
         @Nullable List<SideInputInfo> sideInputInfos,
         @Nullable List<MultiOutputInfo> multiOutputInfos,
         int numOutputs,
-        DataflowExecutionContext executionContext,
+        DataflowExecutionContext<?> executionContext,
         CounterSet.AddCounterMutator addCounterMutator,
         StateSampler stateSampler)
             throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
index f69d95a5d1cf6..dc02053a9c03b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
@@ -196,7 +196,7 @@ public void testParallelism() throws Exception {
     InMemoryReader<Integer> inMemoryReader =
         new InMemoryReader<>(encodedElements(elements, coder), 1L, 4L, coder);
     int count = 0;
-    for (Reader.ReaderIterator iterator = inMemoryReader.iterator();
+    for (Reader.ReaderIterator<?> iterator = inMemoryReader.iterator();
         iterator.hasNext();
         iterator.next()) {
       assertTrue(iterator.getRemainingParallelism() >= 1);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
index febc376647545..58afedb55419e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
@@ -978,6 +978,7 @@ public void testCachedCombining() throws Exception {
   private void disableCompactOnWrite() {
     WindmillStateInternals.COMPACT_NOW.set(
         new Supplier<Boolean>() {
+          @Override
           public Boolean get() {
             return false;
           }
@@ -987,6 +988,7 @@ public Boolean get() {
   private void forceCompactOnWrite() {
     WindmillStateInternals.COMPACT_NOW.set(
         new Supplier<Boolean>() {
+          @Override
           public Boolean get() {
             return true;
           }

From ae47163147e12c783a2bfff027b9c21a948df232 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Tue, 29 Dec 2015 15:12:23 -0800
Subject: [PATCH 1254/1541] DataflowWorkProgressUpdaterTest: fix data races in
 the test itself

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111093921
---
 .../DataflowWorkProgressUpdaterTest.java      | 79 +++++++++----------
 1 file changed, 37 insertions(+), 42 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
index a8c0315cbd22f..9f600eabb2680 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -85,14 +85,17 @@
 public class DataflowWorkProgressUpdaterTest {
   static class TestMapTaskExecutor extends MapTaskExecutor {
     ApproximateReportedProgress progress = null;
+    List<Metric<?>> metrics = new ArrayList<>();
+    CounterSet counters;
 
     public TestMapTaskExecutor(CounterSet counters) {
       super(new ArrayList<Operation>(), counters,
           new StateSampler("test", counters.getAddCounterMutator()));
+      this.counters = counters;
     }
 
     @Override
-    public Reader.Progress getWorkerProgress() {
+    public synchronized Reader.Progress getWorkerProgress() {
       return cloudProgressToReaderProgress(progress);
     }
 
@@ -107,9 +110,28 @@ public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest
           cloudPositionToReaderPosition(split.getPosition()));
     }
 
-    public void setWorkerProgress(ApproximateReportedProgress progress) {
+    public synchronized void setWorkerProgress(ApproximateReportedProgress progress) {
       this.progress = progress;
     }
+
+    @Override
+    public synchronized Collection<Metric<?>> getOutputMetrics() {
+      return metrics;
+    }
+
+    public synchronized void setUpMetrics(int n) {
+      metrics = new ArrayList<>();
+      for (int i = 0; i < n; i++) {
+        metrics.add(makeMetric(i));
+      }
+    }
+
+    public synchronized void setUpCounters(int n) {
+      counters.clear();
+      for (int i = 0; i < n; i++) {
+        counters.add(makeCounter(i));
+      }
+    }
   }
 
   private static final String PROJECT_ID = "TEST_PROJECT_ID";
@@ -127,8 +149,6 @@ public void setWorkerProgress(ApproximateReportedProgress progress) {
 
   @Mock
   private DataflowWorker.WorkUnitClient workUnitClient;
-  private CounterSet counters;
-  private List<Metric<?>> metrics;
   private TestMapTaskExecutor worker;
   private WorkItem workItem;
   private DataflowWorkerHarnessOptions options;
@@ -143,14 +163,7 @@ public void initMocksAndWorkflowServiceAndWorkerAndWork() {
     options.setJobId(JOB_ID);
     options.setWorkerId(WORKER_ID);
 
-    metrics = new ArrayList<>();
-    counters = new CounterSet();
-    worker = new TestMapTaskExecutor(counters) {
-      @Override
-      public Collection<Metric<?>> getOutputMetrics() {
-        return metrics;
-      }
-    };
+    worker = new TestMapTaskExecutor(new CounterSet());
 
     workItem = new WorkItem();
     workItem.setProjectId(PROJECT_ID);
@@ -181,9 +194,9 @@ protected long getLeaseRenewalLatencyMargin() {
   public void workProgressUpdaterUpdates() throws Exception {
     when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
         .thenReturn(generateServiceState(System.currentTimeMillis() + 2000, 1000, null, 2L));
-    setUpCounters(2);
-    setUpMetrics(3);
-    setUpProgress(approximateProgressAtIndex(1L));
+    worker.setUpCounters(2);
+    worker.setUpMetrics(3);
+    worker.setWorkerProgress(approximateProgressAtIndex(1L));
     progressUpdater.startReportingProgress();
     // The initial update should be sent after 300.
     verify(workUnitClient, timeout(400))
@@ -197,9 +210,9 @@ public void workProgressUpdaterUpdates() throws Exception {
   // and actual dynamic split result.
   @Test(timeout = 10000)
   public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
-    setUpCounters(3);
-    setUpMetrics(2);
-    setUpProgress(approximateProgressAtIndex(1L));
+    worker.setUpCounters(3);
+    worker.setUpMetrics(2);
+    worker.setWorkerProgress(approximateProgressAtIndex(1L));
 
     // In tests below, we allow 500ms leeway.
 
@@ -216,9 +229,9 @@ public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
         new ExpectedDataflowWorkItemStatus().withCounters(3).withMetrics(2).withProgress(
             approximateProgressAtIndex(1L)).withReportIndex(1L)));
 
-    setUpCounters(5);
-    setUpMetrics(6);
-    setUpProgress(approximateProgressAtIndex(2L));
+    worker.setUpCounters(5);
+    worker.setUpMetrics(6);
+    worker.setWorkerProgress(approximateProgressAtIndex(2L));
     when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
         .thenReturn(generateServiceState(System.currentTimeMillis() + 3000, 2000, null, 3L));
     // The second update should be sent after ~1000ms (previous requested report interval).
@@ -233,7 +246,7 @@ public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
     // After the request is sent, reset cached dynamic split result to null.
     assertNull(progressUpdater.getDynamicSplitResultToReport());
 
-    setUpProgress(approximateProgressAtIndex(3L));
+    worker.setWorkerProgress(approximateProgressAtIndex(3L));
 
     when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
         .thenReturn(generateServiceState(System.currentTimeMillis() + 1000, 3000, null, 4L));
@@ -242,7 +255,7 @@ public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
         new ExpectedDataflowWorkItemStatus().withProgress(approximateProgressAtIndex(3L))
             .withReportIndex(3L)));
 
-    setUpProgress(approximateProgressAtIndex(4L));
+    worker.setWorkerProgress(approximateProgressAtIndex(4L));
 
     when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
         .thenReturn(generateServiceState(System.currentTimeMillis() + 4000, 3000, null, 5L));
@@ -266,7 +279,7 @@ public void workProgressUpdaterSendsLastPendingUpdateWhenStopped() throws Except
         .thenReturn(generateServiceState(
             System.currentTimeMillis() + 2000, 1000, positionAtIndex(2L), 2L));
 
-    setUpProgress(approximateProgressAtIndex(1L));
+    worker.setWorkerProgress(approximateProgressAtIndex(1L));
     progressUpdater.startReportingProgress();
 
     // The initial update should be sent after 300 msec.
@@ -298,13 +311,6 @@ public void workProgressUpdaterSendsLastPendingUpdateWhenStopped() throws Except
     verifyNoMoreInteractions(workUnitClient);
   }
 
-  private void setUpCounters(int n) {
-    counters.clear();
-    for (int i = 0; i < n; i++) {
-      counters.add(makeCounter(i));
-    }
-  }
-
   private static Counter<?> makeCounter(int i) {
     if (i % 3 == 0) {
       return Counter.longs(COUNTER_NAME + i, COUNTER_KINDS[0])
@@ -325,17 +331,6 @@ private static Metric<?> makeMetric(int i) {
     return new DoubleMetric(String.valueOf(i), i);
   }
 
-  private void setUpMetrics(int n) {
-    metrics = new ArrayList<>();
-    for (int i = 0; i < n; i++) {
-      metrics.add(makeMetric(i));
-    }
-  }
-
-  private void setUpProgress(ApproximateReportedProgress progress) {
-    worker.setWorkerProgress(progress);
-  }
-
   private WorkItemServiceState generateServiceState(long leaseExpirationTimestamp,
       int progressReportIntervalMs, Position suggestedStopPosition,
       long nextReportIndex) {

From 31a8b88b627f2bdc0eb128c4874f70175fb906ef Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 29 Dec 2015 20:42:44 -0800
Subject: [PATCH 1255/1541] Tidy some utility code

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111107107
---
 .../cloud/dataflow/sdk/util/DoFnRunner.java   | 21 ++++++++++++-------
 .../cloud/dataflow/sdk/util/VarInt.java       |  2 +-
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 4a8c105edb4da..b501e90208767 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -224,7 +224,7 @@ private static class DoFnContext<InputT, OutputT>
     final TupleTag<OutputT> mainOutputTag;
     final StepContext stepContext;
     final CounterSet.AddCounterMutator addCounterMutator;
-    final WindowFn windowFn;
+    final WindowFn<?, ?> windowFn;
 
     /**
      * The set of known output tags, some of which may be undeclared, so we can throw an
@@ -240,7 +240,7 @@ public DoFnContext(PipelineOptions options,
                        List<TupleTag<?>> sideOutputTags,
                        StepContext stepContext,
                        CounterSet.AddCounterMutator addCounterMutator,
-                       WindowFn windowFn) {
+                       WindowFn<?, ?> windowFn) {
       fn.super();
       this.options = options;
       this.fn = fn;
@@ -267,8 +267,8 @@ public PipelineOptions getPipelineOptions() {
       return options;
     }
 
-    <T> WindowedValue<T> makeWindowedValue(
-        T output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
+    <T, W extends BoundedWindow> WindowedValue<T> makeWindowedValue(
+        T output, Instant timestamp, Collection<W> windows, PaneInfo pane) {
       final Instant inputTimestamp = timestamp;
 
       if (timestamp == null) {
@@ -277,7 +277,11 @@ <T> WindowedValue<T> makeWindowedValue(
 
       if (windows == null) {
         try {
-          windows = windowFn.assignWindows(windowFn.new AssignContext() {
+          // The windowFn can never succeed at accessing the element, so its type does not
+          // matter here
+          @SuppressWarnings("unchecked")
+          WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
+          windows = objectWindowFn.assignWindows(objectWindowFn.new AssignContext() {
             @Override
             public Object element() {
               throw new UnsupportedOperationException(
@@ -562,7 +566,7 @@ public <T> void writePCollectionViewData(
             Iterable<WindowedValue<T>> data,
             Coder<T> elemCoder) throws IOException {
           @SuppressWarnings("unchecked")
-          Coder<BoundedWindow> windowCoder = context.windowFn.windowCoder();
+          Coder<BoundedWindow> windowCoder = (Coder<BoundedWindow>) context.windowFn.windowCoder();
 
           context.stepContext.writePCollectionViewData(
               tag, data, IterableCoder.of(WindowedValue.getFullCoder(elemCoder, windowCoder)),
@@ -577,8 +581,9 @@ public StateInternals stateInternals() {
     }
 
     @Override
-    protected <InputT, OutputT> Aggregator<InputT, OutputT> createAggregatorInternal(String name,
-        CombineFn<InputT, ?, OutputT> combiner) {
+    protected <AggregatorInputT, AggregatorOutputT> Aggregator<AggregatorInputT, AggregatorOutputT>
+        createAggregatorInternal(
+            String name, CombineFn<AggregatorInputT, ?, AggregatorOutputT> combiner) {
       return context.createAggregatorInternal(name, combiner);
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java
index f2e9c8bd74088..af039112eabac 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java
@@ -32,7 +32,7 @@
 public class VarInt {
 
   private static long convertIntToLongNoSignExtend(int v) {
-    return ((long) v) & 0xFFFFFFFFL;
+    return v & 0xFFFFFFFFL;
   }
 
   /**

From c837233cee865a7fb3bf1a232edf447aa2fa8fda Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 30 Dec 2015 13:32:55 -0800
Subject: [PATCH 1256/1541] Update the overview page for the javadoc

Added relevant links to various docs and talked about the @Experimental API

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111148749
---
 javadoc/overview.html | 31 +++++++++++++++++++++++++++++++
 sdk/pom.xml           |  4 ++--
 2 files changed, 33 insertions(+), 2 deletions(-)
 create mode 100644 javadoc/overview.html

diff --git a/javadoc/overview.html b/javadoc/overview.html
new file mode 100644
index 0000000000000..4ffd33f22fcad
--- /dev/null
+++ b/javadoc/overview.html
@@ -0,0 +1,31 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <title>Google Cloud Dataflow Java SDK</title>
+  </head>
+  <body>
+    <p>The Google Cloud Dataflow SDK for Java provides a simple and elegant
+       programming model to express your data processing pipelines;
+       see <a href="https://cloud.google.com/dataflow/">our product page</a>
+       for more information and getting started instructions.</p>
+
+    <p>The easiest way to use the Google Cloud Dataflow SDK for Java is via
+       one of the released artifacts from the
+       <a href="http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.google.cloud.dataflow%22">
+       Maven Central Repository</a>.
+       See our <a href="https://cloud.google.com/dataflow/release-notes/java">
+       release notes</a> for more information about each released version.<p>
+
+    <p>Version numbers use the form <i>major</i>.<i>minor</i>.<i>incremental</i>
+       and are incremented as follows:<p>
+    <ul>
+      <li>major version for incompatible API changes</li>
+      <li>minor version for new functionality added in a backward-compatible manner</li>
+      <li>incremental version for forward-compatible bug fixes</li>
+    </ul>
+
+    <p>Please note that APIs marked
+    {@link com.google.cloud.dataflow.sdk.annotations.Experimental @Experimental}
+    may change at any point and are not guaranteed to remain compatible across versions.</p>
+  </body>
+</html>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 002b2d8e4f727..3a083ade1e75a 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -218,8 +218,8 @@
         <artifactId>maven-javadoc-plugin</artifactId>
         <configuration>
           <windowtitle>Google Cloud Dataflow SDK ${project.version} API</windowtitle>
-          <doctitle>Google Cloud Dataflow SDK ${project.version} API</doctitle>
-          <overview>../overview.html</overview>
+          <doctitle>Google Cloud Dataflow SDK for Java, version ${project.version}</doctitle>
+          <overview>../javadoc/overview.html</overview>
 
           <subpackages>com.google.cloud.dataflow.sdk</subpackages>
           <additionalparam>-exclude com.google.cloud.dataflow.sdk.runners.worker:com.google.cloud.dataflow.sdk.runners.dataflow:com.google.cloud.dataflow.sdk.util ${dataflow.javadoc_opts}</additionalparam>

From 1b0d4173e6d8d933d3fc300ccefa70e1cf95e33f Mon Sep 17 00:00:00 2001
From: jeremiele <jeremiele@google.com>
Date: Wed, 30 Dec 2015 15:08:43 -0800
Subject: [PATCH 1257/1541] Ensures that BoundedSource and UnboundedSource are
 Serializable. ----Release Notes---- [] ------------- Created by MOE:
 https://github.com/google/moe MOE_MIGRATED_REVID=111153948

---
 .../google/cloud/dataflow/sdk/io/Read.java    |   5 +-
 .../dataflow/sdk/io/FileBasedSourceTest.java  |   2 +-
 .../cloud/dataflow/sdk/io/ReadTest.java       | 144 ++++++++++++++++++
 3 files changed, 148 insertions(+), 3 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ReadTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
index f4936a2375fa7..710fd643b3633 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
@@ -100,7 +101,7 @@ public static class Bounded<T> extends PTransform<PInput, PCollection<T>> {
 
     private Bounded(@Nullable String name, BoundedSource<T> source) {
       super(name);
-      this.source = source;
+      this.source = SerializableUtils.ensureSerializable(source);
     }
 
     /**
@@ -165,7 +166,7 @@ public static class Unbounded<T> extends PTransform<PInput, PCollection<T>> {
 
     private Unbounded(@Nullable String name, UnboundedSource<T, ?> source) {
       super(name);
-      this.source = source;
+      this.source = SerializableUtils.ensureSerializable(source);
     }
 
     /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index 914bfcd4f76b2..d50c6a63b9a17 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -79,7 +79,7 @@ public class FileBasedSourceTest {
    * <p>E.g., if {@code splitHeader} is "h" and the lines of the file are: h, a, b, h, h, c, then
    * the records in this source are a,b,c, and records a and c are split points.
    */
-  class TestFileBasedSource extends FileBasedSource<String> {
+  static class TestFileBasedSource extends FileBasedSource<String> {
 
     final String splitHeader;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ReadTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ReadTest.java
new file mode 100644
index 0000000000000..8dc517a8e8f6a
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/ReadTest.java
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.io.UnboundedSource.CheckpointMark;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.util.List;
+
+import javax.annotation.Nullable;
+
+/**
+ * Tests for {@link Read}.
+ */
+@RunWith(JUnit4.class)
+public class ReadTest {
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void failsWhenCustomBoundedSourceIsNotSerializable() {
+    thrown.expect(IllegalArgumentException.class);
+    Read.from(new NotSerializableBoundedSource());
+  }
+
+  @Test
+  public void succeedsWhenCustomBoundedSourceIsSerializable() {
+    Read.from(new SerializableBoundedSource());
+  }
+
+  @Test
+  public void failsWhenCustomUnboundedSourceIsNotSerializable() {
+    thrown.expect(IllegalArgumentException.class);
+    Read.from(new NotSerializableUnboundedSource());
+  }
+
+  @Test
+  public void succeedsWhenCustomUnboundedSourceIsSerializable() {
+    Read.from(new SerializableUnboundedSource());
+  }
+
+  private abstract static class CustomBoundedSource extends BoundedSource<String> {
+    @Override
+    public List<? extends BoundedSource<String>> splitIntoBundles(
+        long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
+      return null;
+    }
+
+    @Override
+    public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
+      return 0;
+    }
+
+    @Override
+    public boolean producesSortedKeys(PipelineOptions options) throws Exception {
+      return false;
+    }
+
+    @Override
+    public BoundedReader<String> createReader(PipelineOptions options) throws IOException {
+      return null;
+    }
+
+    @Override
+    public void validate() {}
+
+    @Override
+    public Coder<String> getDefaultOutputCoder() {
+      return null;
+    }
+  }
+
+  private static class NotSerializableBoundedSource extends CustomBoundedSource {
+    @SuppressWarnings("unused")
+    private final NotSerializableClass notSerializableClass = new NotSerializableClass();
+  }
+
+  private static class SerializableBoundedSource extends CustomBoundedSource {}
+
+  private abstract static class CustomUnboundedSource
+      extends UnboundedSource<String, NoOpCheckpointMark> {
+    @Override
+    public List<? extends UnboundedSource<String, NoOpCheckpointMark>> generateInitialSplits(
+        int desiredNumSplits, PipelineOptions options) throws Exception {
+      return null;
+    }
+
+    @Override
+    public UnboundedReader<String> createReader(
+        PipelineOptions options, NoOpCheckpointMark checkpointMark) {
+      return null;
+    }
+
+    @Override
+    @Nullable
+    public Coder<NoOpCheckpointMark> getCheckpointMarkCoder() {
+      return null;
+    }
+
+    @Override
+    public void validate() {}
+
+    @Override
+    public Coder<String> getDefaultOutputCoder() {
+      return null;
+    }
+  }
+
+  private static class NoOpCheckpointMark implements CheckpointMark {
+    @Override
+    public void finalizeCheckpoint() throws IOException {}
+  }
+
+  private static class NotSerializableUnboundedSource extends CustomUnboundedSource {
+    @SuppressWarnings("unused")
+    private final NotSerializableClass notSerializableClass = new NotSerializableClass();
+  }
+
+  private static class SerializableUnboundedSource extends CustomUnboundedSource {}
+
+  private static class NotSerializableClass {}
+}

From d19f99f3ce95ddf41b47973119cb0ad09f77cf61 Mon Sep 17 00:00:00 2001
From: malo <malo@google.com>
Date: Wed, 30 Dec 2015 16:59:35 -0800
Subject: [PATCH 1258/1541] Always pass maxNumWorkers in job settings

When submitting a job request, always pass the user defined value of maxNumWorkers, even when no autoscaling algorithm has been explicitely chosen.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111159069
---
 .../DataflowPipelineWorkerPoolOptions.java    | 10 ++--
 .../runners/DataflowPipelineTranslator.java   |  6 +-
 .../DataflowPipelineTranslatorTest.java       | 55 ++++++++++++++++++-
 3 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index d089491c3a156..633b2707b4f2f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -85,12 +85,14 @@ public String getAlgorithm() {
   void setAutoscalingAlgorithm(AutoscalingAlgorithmType value);
 
   /**
-   * The maximum number of workers to use when using workerpool autoscaling.
+   * The maximum number of workers to use for the workerpool. This options limits the size of the
+   * workerpool for the lifetime of the job, including
+   * <a href="https://cloud.google.com/dataflow/pipelines/updating-a-pipeline">pipeline updates</a>.
    * If left unspecified, the Dataflow service will compute a ceiling.
    */
-  @Description("[Experimental] The maximum number of workers to use when using workerpool "
-      + "autoscaling. If left unspecified, the Dataflow service will compute a ceiling.")
-  @Experimental(Experimental.Kind.AUTOSCALING)
+  @Description("The maximum number of workers to use for the workerpool. This options limits the "
+      + "size of the workerpool for the lifetime of the job, including pipeline updates. "
+      + "If left unspecified, the Dataflow service will compute a ceiling.")
   int getMaxNumWorkers();
   void setMaxNumWorkers(int value);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 7956efdfb2e78..f6618f98393fb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -440,12 +440,12 @@ public Job translate(List<DataflowPackage> packages) {
       if (options.getDiskSizeGb() > 0) {
         workerPool.setDiskSizeGb(options.getDiskSizeGb());
       }
+      AutoscalingSettings settings = new AutoscalingSettings();
       if (options.getAutoscalingAlgorithm() != null) {
-        AutoscalingSettings settings = new AutoscalingSettings();
         settings.setAlgorithm(options.getAutoscalingAlgorithm().getAlgorithm());
-        settings.setMaxNumWorkers(options.getMaxNumWorkers());
-        workerPool.setAutoscalingSettings(settings);
       }
+      settings.setMaxNumWorkers(options.getMaxNumWorkers());
+      workerPool.setAutoscalingSettings(settings);
 
       List<WorkerPool> workerPools = new LinkedList<>();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 5c6ad9285bc48..272329f950905 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -225,12 +225,23 @@ public void testScalingAlgorithmMissing() throws IOException {
             .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
+    // Autoscaling settings are always set.
     assertNull(
         job
             .getEnvironment()
             .getWorkerPools()
             .get(0)
-            .getAutoscalingSettings());
+            .getAutoscalingSettings()
+            .getAlgorithm());
+    assertEquals(
+        0,
+        job
+            .getEnvironment()
+            .getWorkerPools()
+            .get(0)
+            .getAutoscalingSettings()
+            .getMaxNumWorkers()
+            .intValue());
   }
 
   @Test
@@ -257,6 +268,48 @@ public void testScalingAlgorithmNone() throws IOException {
             .get(0)
             .getAutoscalingSettings()
             .getAlgorithm());
+    assertEquals(
+        0,
+        job
+            .getEnvironment()
+            .getWorkerPools()
+            .get(0)
+            .getAutoscalingSettings()
+            .getMaxNumWorkers()
+            .intValue());
+  }
+
+  @Test
+  public void testMaxNumWorkersIsPassedWhenNoAlgorithmIsSet() throws IOException {
+    final DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType noScaling = null;
+    DataflowPipelineOptions options = buildPipelineOptions();
+    options.setMaxNumWorkers(42);
+    options.setAutoscalingAlgorithm(noScaling);
+
+    Pipeline p = buildPipeline(options);
+    p.traverseTopologically(new RecordingPipelineVisitor());
+    Job job =
+        DataflowPipelineTranslator.fromOptions(options)
+            .translate(p, Collections.<DataflowPackage>emptyList())
+            .getJob();
+
+    assertEquals(1, job.getEnvironment().getWorkerPools().size());
+    assertNull(
+        job
+            .getEnvironment()
+            .getWorkerPools()
+            .get(0)
+            .getAutoscalingSettings()
+            .getAlgorithm());
+    assertEquals(
+        42,
+        job
+            .getEnvironment()
+            .getWorkerPools()
+            .get(0)
+            .getAutoscalingSettings()
+            .getMaxNumWorkers()
+            .intValue());
   }
 
   @Test

From caec17bae26bbbb7dfeee901b449257dd2867c6c Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Wed, 30 Dec 2015 19:17:38 -0800
Subject: [PATCH 1259/1541] Factor AvroUtils into a separate class

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111163754
---
 .../cloud/dataflow/sdk/io/AvroSource.java     |  93 +---------
 .../cloud/dataflow/sdk/util/AvroUtils.java    | 137 ++++++++++++++
 .../dataflow/sdk/util/AvroUtilsTest.java      | 170 ++++++++++++++++++
 3 files changed, 316 insertions(+), 84 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AvroUtils.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AvroUtilsTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
index 421b9b365f031..6d9279ad82929 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
@@ -20,7 +20,8 @@
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.AvroUtils;
+import com.google.cloud.dataflow.sdk.util.AvroUtils.AvroMetadata;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.Preconditions;
 
@@ -45,7 +46,6 @@
 import java.nio.ByteBuffer;
 import java.nio.channels.Channels;
 import java.nio.channels.ReadableByteChannel;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.zip.Inflater;
 import java.util.zip.InflaterInputStream;
@@ -241,81 +241,6 @@ public void validate() {
     super.validate();
   }
 
-  /**
-   * Avro file metadata. Visible for testing.
-   */
-  static class Metadata {
-    byte[] syncMarker;
-    String codec;
-    String schema;
-
-    public Metadata(byte[] syncMarker, String codec, String schema) {
-      this.syncMarker = syncMarker;
-      this.codec = codec;
-      this.schema = schema;
-    }
-  }
-
-  /**
-   * Reads the {@link Metadata} from the header of an Avro file. Throws an IOException if the file
-   * is an invalid format.
-   *
-   * <p>This method parses the header of an Avro
-   * <a href="https://avro.apache.org/docs/1.7.7/spec.html#Object+Container+Files">
-   * Object Container File</a>.
-   */
-  static Metadata readMetadataFromFile(String fileName) throws IOException {
-    String codec = null;
-    String schema = null;
-    byte[] syncMarker;
-    try (InputStream stream =
-        Channels.newInputStream(IOChannelUtils.getFactory(fileName).open(fileName))) {
-      BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(stream, null);
-
-      // The header of an object container file begins with a four-byte magic number, followed
-      // by the file metadata (including the schema and codec), encoded as a map. Finally, the
-      // header ends with the file's 16-byte sync marker.
-      // See https://avro.apache.org/docs/1.7.7/spec.html#Object+Container+Files for details on
-      // the encoding of container files.
-
-      // Read the magic number.
-      byte[] magic = new byte[DataFileConstants.MAGIC.length];
-      decoder.readFixed(magic);
-      if (!Arrays.equals(magic, DataFileConstants.MAGIC)) {
-        throw new IOException("Missing Avro file signature: " + fileName);
-      }
-
-      // Read the metadata to find the codec and schema.
-      ByteBuffer valueBuffer = ByteBuffer.allocate(512);
-      long numRecords = decoder.readMapStart();
-      while (numRecords > 0) {
-        for (long recordIndex = 0; recordIndex < numRecords; recordIndex++) {
-          String key = decoder.readString();
-          // readBytes() clears the buffer and returns a buffer where:
-          // - position is the start of the bytes read
-          // - limit is the end of the bytes read
-          valueBuffer = decoder.readBytes(valueBuffer);
-          byte[] bytes = new byte[valueBuffer.remaining()];
-          valueBuffer.get(bytes);
-          if (key.equals(DataFileConstants.CODEC)) {
-            codec = new String(bytes, "UTF-8");
-          } else if (key.equals(DataFileConstants.SCHEMA)) {
-            schema = new String(bytes, "UTF-8");
-          }
-        }
-        numRecords = decoder.mapNext();
-      }
-      if (codec == null) {
-        codec = DataFileConstants.NULL_CODEC;
-      }
-
-      // Finally, read the sync marker.
-      syncMarker = new byte[DataFileConstants.SYNC_SIZE];
-      decoder.readFixed(syncMarker);
-    }
-    return new Metadata(syncMarker, codec, schema);
-  }
-
   @Override
   public AvroSource<T> createForSubrangeOfFile(String fileName, long start, long end) {
     byte[] syncMarker = this.syncMarker;
@@ -327,21 +252,21 @@ public AvroSource<T> createForSubrangeOfFile(String fileName, long start, long e
     // for a subrange of a file, we can initialize these values. When the resulting AvroSource
     // is further split, they do not need to be read again.
     if (codec == null || syncMarker == null || fileSchemaString == null) {
-      Metadata metadata;
+      AvroMetadata metadata;
       try {
         Collection<String> files = FileBasedSource.expandFilePattern(fileName);
         Preconditions.checkArgument(files.size() <= 1, "More than 1 file matched %s");
-        metadata = readMetadataFromFile(fileName);
+        metadata = AvroUtils.readMetadataFromFile(fileName);
       } catch (IOException e) {
         throw new RuntimeException("Error reading metadata from file " + fileName, e);
       }
-      codec = metadata.codec;
-      syncMarker = metadata.syncMarker;
-      fileSchemaString = metadata.schema;
+      codec = metadata.getCodec();
+      syncMarker = metadata.getSyncMarker();
+      fileSchemaString = metadata.getSchemaString();
       // If the source was created with a null schema, use the schema that we read from the file's
       // metadata.
       if (readSchemaString == null) {
-        readSchemaString = metadata.schema;
+        readSchemaString = metadata.getSchemaString();
       }
     }
     return new AvroSource<T>(fileName, getMinBundleSize(), start, end, readSchemaString, type,
@@ -551,7 +476,7 @@ public AvroReader(AvroSource<T> source) {
     }
 
     @Override
-    public AvroSource<T> getCurrentSource() {
+    public synchronized AvroSource<T> getCurrentSource() {
       return (AvroSource<T>) super.getCurrentSource();
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AvroUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AvroUtils.java
new file mode 100644
index 0000000000000..b50fe8a7805bf
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AvroUtils.java
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import org.apache.avro.file.DataFileConstants;
+import org.apache.avro.io.BinaryDecoder;
+import org.apache.avro.io.DecoderFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.channels.Channels;
+import java.util.Arrays;
+
+/**
+ * A set of utilities for working with Avro files.
+ *
+ * <p>These utilities are based on the <a
+ * href="https://avro.apache.org/docs/1.7.7/spec.html">Avro 1.7.7</a> specification.
+ */
+public class AvroUtils {
+
+  /**
+   * Avro file metadata.
+   */
+  public static class AvroMetadata {
+    private byte[] syncMarker;
+    private String codec;
+    private String schemaString;
+
+    AvroMetadata(byte[] syncMarker, String codec, String schemaString) {
+      this.syncMarker = syncMarker;
+      this.codec = codec;
+      this.schemaString = schemaString;
+    }
+
+    /**
+     * The JSON-encoded <a href="https://avro.apache.org/docs/1.7.7/spec.html#schemas">schema</a>
+     * string for the file.
+     */
+    public String getSchemaString() {
+      return schemaString;
+    }
+
+    /**
+     * The <a href="https://avro.apache.org/docs/1.7.7/spec.html#Required+Codecs">codec</a> of the
+     * file.
+     */
+    public String getCodec() {
+      return codec;
+    }
+
+    /**
+     * The 16-byte sync marker for the file.  See the documentation for
+     * <a href="https://avro.apache.org/docs/1.7.7/spec.html#Object+Container+Files">Object
+     * Container File</a> for more information.
+     */
+    public byte[] getSyncMarker() {
+      return syncMarker;
+    }
+  }
+
+  /**
+   * Reads the {@link AvroMetadata} from the header of an Avro file.
+   *
+   * <p>This method parses the header of an Avro
+   * <a href="https://avro.apache.org/docs/1.7.7/spec.html#Object+Container+Files">
+   * Object Container File</a>.
+   *
+   * @throws IOException if the file is an invalid format.
+   */
+  public static AvroMetadata readMetadataFromFile(String fileName) throws IOException {
+    String codec = null;
+    String schemaString = null;
+    byte[] syncMarker;
+    try (InputStream stream =
+        Channels.newInputStream(IOChannelUtils.getFactory(fileName).open(fileName))) {
+      BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(stream, null);
+
+      // The header of an object container file begins with a four-byte magic number, followed
+      // by the file metadata (including the schema and codec), encoded as a map. Finally, the
+      // header ends with the file's 16-byte sync marker.
+      // See https://avro.apache.org/docs/1.7.7/spec.html#Object+Container+Files for details on
+      // the encoding of container files.
+
+      // Read the magic number.
+      byte[] magic = new byte[DataFileConstants.MAGIC.length];
+      decoder.readFixed(magic);
+      if (!Arrays.equals(magic, DataFileConstants.MAGIC)) {
+        throw new IOException("Missing Avro file signature: " + fileName);
+      }
+
+      // Read the metadata to find the codec and schema.
+      ByteBuffer valueBuffer = ByteBuffer.allocate(512);
+      long numRecords = decoder.readMapStart();
+      while (numRecords > 0) {
+        for (long recordIndex = 0; recordIndex < numRecords; recordIndex++) {
+          String key = decoder.readString();
+          // readBytes() clears the buffer and returns a buffer where:
+          // - position is the start of the bytes read
+          // - limit is the end of the bytes read
+          valueBuffer = decoder.readBytes(valueBuffer);
+          byte[] bytes = new byte[valueBuffer.remaining()];
+          valueBuffer.get(bytes);
+          if (key.equals(DataFileConstants.CODEC)) {
+            codec = new String(bytes, "UTF-8");
+          } else if (key.equals(DataFileConstants.SCHEMA)) {
+            schemaString = new String(bytes, "UTF-8");
+          }
+        }
+        numRecords = decoder.mapNext();
+      }
+      if (codec == null) {
+        codec = DataFileConstants.NULL_CODEC;
+      }
+
+      // Finally, read the sync marker.
+      syncMarker = new byte[DataFileConstants.SYNC_SIZE];
+      decoder.readFixed(syncMarker);
+    }
+    return new AvroMetadata(syncMarker, codec, schemaString);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AvroUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AvroUtilsTest.java
new file mode 100644
index 0000000000000..68570bfd7a873
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AvroUtilsTest.java
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
+import com.google.cloud.dataflow.sdk.util.AvroUtils.AvroMetadata;
+import com.google.common.base.MoreObjects;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.CodecFactory;
+import org.apache.avro.file.DataFileConstants;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+import java.util.Random;
+
+/**
+ * Tests for AvroUtils.
+ */
+@RunWith(JUnit4.class)
+public class AvroUtilsTest {
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  private static final int DEFAULT_RECORD_COUNT = 10000;
+
+  /**
+   * Generates an input Avro file containing the given records in the temporary directory and
+   * returns the full path of the file.
+   */
+  @SuppressWarnings("deprecation")  // test of internal functionality
+  private <T> String generateTestFile(String filename, List<T> elems, AvroCoder<T> coder,
+      String codec) throws IOException {
+    File tmpFile = tmpFolder.newFile(filename);
+    String path = tmpFile.toString();
+
+    FileOutputStream os = new FileOutputStream(tmpFile);
+    DatumWriter<T> datumWriter = coder.createDatumWriter();
+    try (DataFileWriter<T> writer = new DataFileWriter<>(datumWriter)) {
+      writer.setCodec(CodecFactory.fromString(codec));
+      writer.create(coder.getSchema(), os);
+      for (T elem : elems) {
+        writer.append(elem);
+      }
+    }
+    return path;
+  }
+
+  @Test
+  public void testReadMetadataWithCodecs() throws Exception {
+    // Test reading files generated using all codecs.
+    String codecs[] = {DataFileConstants.NULL_CODEC, DataFileConstants.BZIP2_CODEC,
+        DataFileConstants.DEFLATE_CODEC, DataFileConstants.SNAPPY_CODEC,
+        DataFileConstants.XZ_CODEC};
+    List<Bird> expected = createRandomRecords(DEFAULT_RECORD_COUNT);
+
+    for (String codec : codecs) {
+      String filename = generateTestFile(
+          codec, expected, AvroCoder.of(Bird.class), codec);
+      AvroMetadata metadata = AvroUtils.readMetadataFromFile(filename);
+      assertEquals(codec, metadata.getCodec());
+    }
+  }
+
+  @Test
+  public void testReadSchemaString() throws Exception {
+    List<Bird> expected = createRandomRecords(DEFAULT_RECORD_COUNT);
+    String codec = DataFileConstants.NULL_CODEC;
+    String filename = generateTestFile(
+        codec, expected, AvroCoder.of(Bird.class), codec);
+    AvroMetadata metadata = AvroUtils.readMetadataFromFile(filename);
+    // By default, parse validates the schema, which is what we want.
+    Schema schema = new Schema.Parser().parse(metadata.getSchemaString());
+    assertEquals(4, schema.getFields().size());
+  }
+
+  /**
+   * Class used as the record type in tests.
+   */
+  @DefaultCoder(AvroCoder.class)
+  static class Bird {
+    long number;
+    String species;
+    String quality;
+    long quantity;
+
+    public Bird() {}
+
+    public Bird(long number, String species, String quality, long quantity) {
+      this.number = number;
+      this.species = species;
+      this.quality = quality;
+      this.quantity = quantity;
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(Bird.class)
+          .addValue(number)
+          .addValue(species)
+          .addValue(quantity)
+          .addValue(quality)
+          .toString();
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (obj instanceof Bird) {
+        Bird other = (Bird) obj;
+        return Objects.equals(species, other.species) && Objects.equals(quality, other.quality)
+            && quantity == other.quantity && number == other.number;
+      }
+      return false;
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(number, species, quality, quantity);
+    }
+  }
+
+  /**
+   * Create a list of n random records.
+   */
+  private static List<Bird> createRandomRecords(long n) {
+    String[] qualities = {
+        "miserable", "forelorn", "fidgity", "squirrelly", "fanciful", "chipper", "lazy"};
+    String[] species = {"pigeons", "owls", "gulls", "hawks", "robins", "jays"};
+    Random random = new Random(0);
+
+    List<Bird> records = new ArrayList<>();
+    for (long i = 0; i < n; i++) {
+      Bird bird = new Bird();
+      bird.quality = qualities[random.nextInt(qualities.length)];
+      bird.species = species[random.nextInt(species.length)];
+      bird.number = i;
+      bird.quantity = random.nextLong();
+      records.add(bird);
+    }
+    return records;
+  }
+}

From 52e6b4db2545d14bd5e8fa46a74dcc126e9965e5 Mon Sep 17 00:00:00 2001
From: amyu <amyu@google.com>
Date: Mon, 4 Jan 2016 08:06:38 -0800
Subject: [PATCH 1260/1541] Change the 'gaming' injector credentials check
 logic

In the 'gaming' injector, change the credentials
'exit' logic to check client auth.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111322434
---
 .../dataflow/examples/complete/game/injector/InjectorUtils.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/InjectorUtils.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/InjectorUtils.java
index 06c38646274f2..55982df933e3a 100644
--- a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/InjectorUtils.java
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/InjectorUtils.java
@@ -49,7 +49,7 @@ public static Pubsub getClient(final HttpTransport httpTransport,
       if (credential.createScopedRequired()) {
           credential = credential.createScoped(PubsubScopes.all());
       }
-      if (credential.getServiceAccountId() == null) {
+      if (credential.getClientAuthentication() != null) {
         System.out.println("\n***Warning! You are not using service account credentials to "
           + "authenticate.\nYou need to use service account credentials for this example,"
           + "\nsince user-level credentials do not have enough pubsub quota,\nand so you will run "

From 454f221fff5a352a6dbbbcbae59d13b75cb2ecac Mon Sep 17 00:00:00 2001
From: vladum <vladum@google.com>
Date: Mon, 4 Jan 2016 10:51:41 -0800
Subject: [PATCH 1261/1541] Increment environment major version to 4

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111336513
---
 .../cloud/dataflow/sdk/runners/DataflowPipelineRunner.java      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index b1a53446ba3ac..25084474114a9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -145,7 +145,7 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
   private DataflowPipelineRunnerHooks hooks;
 
   // Environment version information
-  private static final String ENVIRONMENT_MAJOR_VERSION = "3";
+  private static final String ENVIRONMENT_MAJOR_VERSION = "4";
 
   // The limit of CreateJob request size.
   private static final int CREATE_JOB_REQUEST_LIMIT_BYTES = 10 * 1024 * 1024;

From 47b397ca3019e902ea59183704ca0e4676b512f4 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 4 Jan 2016 13:24:38 -0800
Subject: [PATCH 1262/1541] Tidy PCollectionViews

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111350932
---
 .../cloud/dataflow/sdk/util/PCollectionViews.java      | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
index b81732a37cf60..e5308aa58a966 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
@@ -96,7 +96,7 @@ public static <K, V, W extends BoundedWindow> PCollectionView<Map<K, V>> mapView
       WindowingStrategy<?, W> windowingStrategy,
       Coder<KV<K, V>> valueCoder) {
 
-    return new MapPCollectionView(pipeline, windowingStrategy, valueCoder);
+    return new MapPCollectionView<K, V, W>(pipeline, windowingStrategy, valueCoder);
   }
 
   /**
@@ -107,7 +107,7 @@ public static <K, V, W extends BoundedWindow> PCollectionView<Map<K, Iterable<V>
       Pipeline pipeline,
       WindowingStrategy<?, W> windowingStrategy,
       Coder<KV<K, V>> valueCoder) {
-    return new MultimapPCollectionView(pipeline, windowingStrategy, valueCoder);
+    return new MultimapPCollectionView<K, V, W>(pipeline, windowingStrategy, valueCoder);
   }
 
   /**
@@ -229,7 +229,7 @@ private static class MultimapPCollectionView<K, V, W extends BoundedWindow>
       extends PCollectionViewBase<KV<K, V>, Map<K, Iterable<V>>, W> {
     public MultimapPCollectionView(
         Pipeline pipeline,
-        WindowingStrategy<KV<K, V>, W> windowingStrategy,
+        WindowingStrategy<?, W> windowingStrategy,
         Coder<KV<K, V>> valueCoder) {
       super(pipeline, windowingStrategy, valueCoder);
     }
@@ -258,7 +258,7 @@ private static class MapPCollectionView<K, V, W extends BoundedWindow>
       extends PCollectionViewBase<KV<K, V>, Map<K, V>, W> {
     public MapPCollectionView(
         Pipeline pipeline,
-        WindowingStrategy<KV<K, V>, W> windowingStrategy,
+        WindowingStrategy<?, W> windowingStrategy,
         Coder<KV<K, V>> valueCoder) {
       super(pipeline, windowingStrategy, valueCoder);
     }
@@ -362,7 +362,7 @@ public TupleTag<Iterable<WindowedValue<?>>> getTagInternal() {
       // Safe cast: It is required that the rest of the SDK maintain the invariant that
       // this tag is only used to access the contents of an appropriately typed underlying
       // PCollection
-      @SuppressWarnings({"rawtypes, unchecked"})
+      @SuppressWarnings({"rawtypes", "unchecked"})
       TupleTag<Iterable<WindowedValue<?>>> untypedTag = (TupleTag) tag;
       return untypedTag;
     }

From 9262be99f0c8d848ed7a26cdccb2040a30e28413 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 4 Jan 2016 14:01:31 -0800
Subject: [PATCH 1263/1541] Tidy some type variables

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111354348
---
 .../dataflow/sdk/transforms/IntraBundleParallelization.java    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
index 2bcf28a7915a4..b6497b71c4ef1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
@@ -160,7 +160,8 @@ public Bound<InputT, OutputT> withMaxParallelism(int maxParallelism) {
      *
      * <p>Note that the specified {@code doFn} needs to be thread safe.
      */
-    public <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT> doFn) {
+    public <NewInputT, NewOutputT> Bound<NewInputT, NewOutputT>
+        of(DoFn<NewInputT, NewOutputT> doFn) {
       return new Bound<>(doFn, maxParallelism);
     }
 

From 3bdb31bc6dd0ce92568895cbbf0960d92cd40632 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 4 Jan 2016 14:44:45 -0800
Subject: [PATCH 1264/1541] Tidy some tests and support code

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111358603
---
 .../cookbook/MaxPerKeyExamplesTest.java       | 13 +++----
 .../dataflow/sdk/transforms/DoFnTester.java   | 34 ++++++++++++++-----
 2 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamplesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamplesTest.java
index 539012d6fbcbf..3deff2a2e3541 100644
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamplesTest.java
+++ b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamplesTest.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.examples.cookbook.MaxPerKeyExamples.FormatMaxesFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.collect.ImmutableList;
 
 import org.hamcrest.CoreMatchers;
 import org.junit.Assert;
@@ -46,17 +47,13 @@ public class MaxPerKeyExamplesTest {
         .set("month", "6").set("day", "18")
         .set("year", "2014").set("mean_temp", "45.3")
         .set("tornado", true);
-  private static final TableRow[] ROWS_ARRAY = new TableRow[] {
-    row1, row2, row3
-  };
+  private static final List<TableRow> TEST_ROWS = ImmutableList.of(row1, row2, row3);
 
   private static final KV<Integer, Double> kv1 = KV.of(6, 85.3);
   private static final KV<Integer, Double> kv2 = KV.of(6, 45.3);
   private static final KV<Integer, Double> kv3 = KV.of(7, 75.4);
 
-  static final KV[] TUPLES_ARRAY = new KV[] {
-    kv1, kv2, kv3
-  };
+  private static final List<KV<Integer, Double>> TEST_KVS = ImmutableList.of(kv1, kv2, kv3);
 
   private static final TableRow resultRow1 = new TableRow()
       .set("month", 6)
@@ -70,7 +67,7 @@ public class MaxPerKeyExamplesTest {
   public void testExtractTempFn() {
     DoFnTester<TableRow, KV<Integer, Double>> extractTempFn =
         DoFnTester.of(new ExtractTempFn());
-    List<KV<Integer, Double>> results = extractTempFn.processBatch(ROWS_ARRAY);
+    List<KV<Integer, Double>> results = extractTempFn.processBatch(TEST_ROWS);
     Assert.assertThat(results, CoreMatchers.hasItem(kv1));
     Assert.assertThat(results, CoreMatchers.hasItem(kv2));
     Assert.assertThat(results, CoreMatchers.hasItem(kv3));
@@ -80,7 +77,7 @@ public void testExtractTempFn() {
   public void testFormatMaxesFn() {
     DoFnTester<KV<Integer, Double>, TableRow> formatMaxesFnFn =
         DoFnTester.of(new FormatMaxesFn());
-    List<TableRow> results = formatMaxesFnFn.processBatch(TUPLES_ARRAY);
+    List<TableRow> results = formatMaxesFnFn.processBatch(TEST_KVS);
     Assert.assertThat(results, CoreMatchers.hasItem(resultRow1));
     Assert.assertThat(results, CoreMatchers.hasItem(resultRow2));
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index 99fc0a31d0e6a..e24d99448a307 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -35,6 +35,7 @@
 import com.google.common.collect.Lists;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -152,11 +153,11 @@ public void setSideOutputTags(TupleTagList sideOutputTags) {
 
   /**
    * A convenience operation that first calls {@link #startBundle},
-   * then calls {@link #processElement} on each of the arguments, then
+   * then calls {@link #processElement} on each of the input elements, then
    * calls {@link #finishBundle}, then returns the result of
    * {@link #takeOutputElements}.
    */
-  public List<OutputT> processBatch(InputT... inputElements) {
+  public List<OutputT> processBatch(Iterable <? extends InputT> inputElements) {
     startBundle();
     for (InputT inputElement : inputElements) {
       processElement(inputElement);
@@ -165,6 +166,22 @@ public List<OutputT> processBatch(InputT... inputElements) {
     return takeOutputElements();
   }
 
+  /**
+   * A convenience method for testing {@link DoFn DoFns} with bundles of elements.
+   * Logic proceeds as follows:
+   *
+   * <ol>
+   *   <li>Calls {@link #startBundle}.</li>
+   *   <li>Calls {@link #processElement} on each of the arguments.<li>
+   *   <li>Calls {@link #finishBundle}.</li>
+   *   <li>Returns the result of {@link #takeOutputElements}.</li>
+   * </ol>
+   */
+  @SafeVarargs
+  public final List<OutputT> processBatch(InputT... inputElements) {
+    return processBatch(Arrays.asList(inputElements));
+  }
+
   /**
    * Calls {@link DoFn#startBundle} on the {@code DoFn} under test.
    *
@@ -276,10 +293,10 @@ public <T> List<T> peekSideOutputElements(TupleTag<T> tag) {
     // TODO: Should we return an unmodifiable list?
     return Lists.transform(
         outputManager.getOutput(tag),
-        new Function<Object, T>() {
+        new Function<WindowedValue<T>, T>() {
           @Override
-          public T apply(Object input) {
-            return ((WindowedValue<T>) input).getValue();
+          public T apply(WindowedValue<T> input) {
+            return input.getValue();
           }});
   }
 
@@ -308,10 +325,11 @@ public <T> List<T> takeSideOutputElements(TupleTag<T> tag) {
   /**
    * Returns the value of the provided {@link Aggregator}.
    */
-  public <OutputT> OutputT getAggregatorValue(Aggregator<?, OutputT> agg) {
+  public <AggregateT> AggregateT getAggregatorValue(Aggregator<?, AggregateT> agg) {
     @SuppressWarnings("unchecked")
-    Counter<OutputT> counter =
-        (Counter<OutputT>) counterSet.getExistingCounter("user-" + STEP_NAME + "-" + agg.getName());
+    Counter<AggregateT> counter =
+        (Counter<AggregateT>)
+            counterSet.getExistingCounter("user-" + STEP_NAME + "-" + agg.getName());
     return counter.getAggregate();
   }
 

From 486b6419135cb2d2091ae72521fdece6c28848f9 Mon Sep 17 00:00:00 2001
From: jdobry <jdobry@google.com>
Date: Mon, 4 Jan 2016 14:51:31 -0800
Subject: [PATCH 1265/1541] Setting table creation definition in
 WindowedWordCount example

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111359232
---
 .../google/cloud/dataflow/examples/WindowedWordCount.java   | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
index 207d58629c588..fcda15eeca8f7 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
@@ -244,7 +244,11 @@ public static void main(String[] args) throws IOException {
      * The BigQuery output source supports both bounded and unbounded data.
      */
     wordCounts.apply(ParDo.of(new FormatAsTableRowFn()))
-        .apply(BigQueryIO.Write.to(getTableReference(options)).withSchema(getSchema()));
+        .apply(BigQueryIO.Write
+          .to(getTableReference(options))
+          .withSchema(getSchema())
+          .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
+          .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
 
     PipelineResult result = pipeline.run();
 

From 9a3deebd51c17827a148664fb7039ff42001e79d Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 4 Jan 2016 14:53:57 -0800
Subject: [PATCH 1266/1541] Add ByteCoder

ByteCoder encodes a Java Byte directly as a byte, thus a
PCollection<Byte> will be encoded using consumes 1 byte
of storage per element. It is deterministic.

Also add ByteCoder to the CoderRegistry as the default
encoder/decoder for PCollection<Byte>.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111359449
---
 .../cloud/dataflow/sdk/coders/ByteCoder.java  | 111 ++++++++++++++++++
 .../dataflow/sdk/coders/CoderRegistry.java    |   1 +
 .../dataflow/sdk/coders/ByteCoderTest.java    |  91 ++++++++++++++
 3 files changed, 203 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteCoder.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteCoderTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteCoder.java
new file mode 100644
index 0000000000000..9f17497d8dc4a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteCoder.java
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UTFDataFormatException;
+
+/**
+ * A {@link ByteCoder} encodes {@link Byte} values in 1 byte using Java serialization.
+ */
+public class ByteCoder extends AtomicCoder<Byte> {
+
+  @JsonCreator
+  public static ByteCoder of() {
+    return INSTANCE;
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  private static final ByteCoder INSTANCE = new ByteCoder();
+
+  private ByteCoder() {}
+
+  @Override
+  public void encode(Byte value, OutputStream outStream, Context context)
+      throws IOException, CoderException {
+    if (value == null) {
+      throw new CoderException("cannot encode a null Byte");
+    }
+    outStream.write(value.byteValue());
+  }
+
+  @Override
+  public Byte decode(InputStream inStream, Context context)
+      throws IOException, CoderException {
+    try {
+      // value will be between 0-255, -1 for EOF
+      int value = inStream.read();
+      if (value == -1) {
+        throw new EOFException("EOF encountered decoding 1 byte from input stream");
+      }
+      return (byte) value;
+    } catch (EOFException | UTFDataFormatException exn) {
+      // These exceptions correspond to decoding problems, so change
+      // what kind of exception they're branded as.
+      throw new CoderException(exn);
+    }
+  }
+
+  /**
+   * {@inheritDoc}
+   *
+   * {@link ByteCoder} will never throw a {@link Coder.NonDeterministicException}; bytes can always
+   * be encoded deterministically.
+   */
+  @Override
+  public void verifyDeterministic() {}
+
+  /**
+   * {@inheritDoc}
+   *
+   * @return {@code true}. This coder is injective.
+   */
+  @Override
+  public boolean consistentWithEquals() {
+    return true;
+  }
+
+  /**
+   * {@inheritDoc}
+   *
+   * @return {@code true}. {@link ByteCoder#getEncodedElementByteSize} returns a constant.
+   */
+  @Override
+  public boolean isRegisterByteSizeObserverCheap(Byte value, Context context) {
+    return true;
+  }
+
+  /**
+   * {@inheritDoc}
+   *
+   * @return {@code 1}, the byte size of a {@link Byte} encoded using Java serialization.
+   */
+  @Override
+  protected long getEncodedElementByteSize(Byte value, Context context)
+      throws Exception {
+    if (value == null) {
+      throw new CoderException("cannot estimate size for unsupported null value");
+    }
+    return 1;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 82c3a4980e140..a5f77b9429d88 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -88,6 +88,7 @@ public CoderRegistry() {
    * Registers standard Coders with this CoderRegistry.
    */
   public void registerStandardCoders() {
+    registerCoder(Byte.class, ByteCoder.class);
     registerCoder(Double.class, DoubleCoder.class);
     registerCoder(Instant.class, InstantCoder.class);
     registerCoder(Integer.class, VarIntCoder.class);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteCoderTest.java
new file mode 100644
index 0000000000000..6cb852e236139
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteCoderTest.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Test case for {@link ByteCoder}.
+ */
+@RunWith(JUnit4.class)
+public class ByteCoderTest {
+
+  private static final Coder<Byte> TEST_CODER = ByteCoder.of();
+
+  private static final List<Byte> TEST_VALUES = Arrays.asList(
+      (byte) 1,
+      (byte) 4,
+      (byte) 6,
+      (byte) 50,
+      (byte) 124,
+      Byte.MAX_VALUE,
+      Byte.MIN_VALUE);
+
+  @Test
+  public void testDecodeEncodeEqual() throws Exception {
+    for (Byte value : TEST_VALUES) {
+      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
+    }
+  }
+
+  // This should never change. The format is fixed by Java.
+  private static final String EXPECTED_ENCODING_ID = "";
+
+  @Test
+  public void testEncodingId() throws Exception {
+    CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
+  }
+
+  /**
+   * Generated data to check that the wire format has not changed. To regenerate, see
+   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
+   */
+  private static final List<String> TEST_ENCODINGS = Arrays.asList(
+      "AQ",
+      "BA",
+      "Bg",
+      "Mg",
+      "fA",
+      "fw",
+      "gA");
+
+  @Test
+  public void testWireFormatEncode() throws Exception {
+    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
+  }
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void encodeNullThrowsCoderException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null Byte");
+
+    CoderUtils.encodeToBase64(TEST_CODER, null);
+  }
+}

From e02f2e38874bb2250d952c1130b3c9862db2a4a2 Mon Sep 17 00:00:00 2001
From: markshields <markshields@google.com>
Date: Tue, 5 Jan 2016 12:24:20 -0800
Subject: [PATCH 1267/1541] Eagerly merge new windows into existing windows

If a new element introduces a new window, and that
window can be merged into an existing window, make
sure we avoid creating any intermediate state for the
new window. Instead, redirect all state to the existing
window.

In the common case of input elements arriving in (roughly)
event time order, and using session windows, will ensure
state space and state merging cost is O(1) instead of
O(n) for n elements per final session.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111439148
---
 .../sdk/transforms/windowing/AfterPane.java   |  28 +-
 .../windowing/AfterProcessingTime.java        |  33 +-
 .../AfterSynchronizedProcessingTime.java      |  32 +-
 .../transforms/windowing/AfterWatermark.java  |  32 +-
 .../dataflow/sdk/util/ActiveWindowSet.java    | 111 +++-
 .../sdk/util/MergingActiveWindowSet.java      | 421 +++++++++++---
 .../dataflow/sdk/util/NonEmptyPanes.java      |   4 +-
 .../sdk/util/NonMergingActiveWindowSet.java   |  49 +-
 .../cloud/dataflow/sdk/util/ReduceFn.java     |  34 +-
 .../sdk/util/ReduceFnContextFactory.java      |  42 +-
 .../dataflow/sdk/util/ReduceFnRunner.java     | 544 ++++++++++--------
 .../dataflow/sdk/util/SystemReduceFn.java     |   4 +-
 .../sdk/util/TriggerContextFactory.java       |   2 +-
 .../dataflow/sdk/util/TriggerRunner.java      |  56 +-
 .../dataflow/sdk/util/WatermarkHold.java      |  99 ++--
 .../sdk/util/state/MergingStateInternals.java |  18 +-
 .../windowing/AfterProcessingTimeTest.java    |   2 +-
 .../sdk/util/MergingActiveWindowSetTest.java  | 175 ++++++
 .../dataflow/sdk/util/ReduceFnTester.java     |   8 +-
 .../state/InMemoryStateInternalsTest.java     |   6 +-
 20 files changed, 1154 insertions(+), 546 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSetTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index 7e56d0f11ca74..bdc04c2c178bb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -59,6 +59,11 @@ public static <W extends BoundedWindow> AfterPane<W> elementCountAtLeast(int cou
     return new AfterPane<>(countElems);
   }
 
+  @Override
+  public void prefetchOnElement(StateContext state) {
+    state.access(ELEMENTS_IN_PANE_TAG).get();
+  }
+
   @Override
   public TriggerResult onElement(OnElementContext c) throws Exception {
     CombiningValueState<Long, Long> elementsInPane = c.state().access(ELEMENTS_IN_PANE_TAG);
@@ -71,6 +76,11 @@ public TriggerResult onElement(OnElementContext c) throws Exception {
     return count >= countElems ? TriggerResult.FIRE_AND_FINISH : TriggerResult.CONTINUE;
   }
 
+  @Override
+  public void prefetchOnMerge(MergingStateContext state) {
+    state.mergingAccess(ELEMENTS_IN_PANE_TAG).get();
+  }
+
   @Override
   public MergeResult onMerge(OnMergeContext c) throws Exception {
     // If we've already received enough elements and finished in some window, then this trigger
@@ -80,8 +90,12 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
     }
 
     // Otherwise, compute the sum of elements in all the active panes
-    CombiningValueState<Long, Long> elementsInPane =
-        c.state().accessAcrossMergingWindows(ELEMENTS_IN_PANE_TAG);
+    CombiningValueState<Long, Long> elementsInPane = c.state().mergingAccess(ELEMENTS_IN_PANE_TAG);
+    // Both InMemoryStateInternals and WindmillStateInternals implement merging access using
+    // MergeCombiningValue. The implementation of StateContents.read returned by get will
+    // eagerly compact state on read. Thus after the following we are guaranteed all state from
+    // merged windows will have been compacted away.
+    // TODO: Make this more explicit and less fragile with a 'compact' API?
     long count = elementsInPane.get().read();
     return count >= countElems ? MergeResult.FIRE_AND_FINISH : MergeResult.CONTINUE;
   }
@@ -91,16 +105,6 @@ public TriggerResult onTimer(OnTimerContext c) {
     return TriggerResult.CONTINUE;
   }
 
-  @Override
-  public void prefetchOnElement(StateContext state) {
-    state.access(ELEMENTS_IN_PANE_TAG).get();
-  }
-
-  @Override
-  public void prefetchOnMerge(MergingStateContext state) {
-    state.accessAcrossMergingWindows(ELEMENTS_IN_PANE_TAG).get();
-  }
-
   @Override
   public void prefetchOnTimer(StateContext state) {
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 5fe0eb9942c28..a64bb3437a35d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -58,6 +58,12 @@ protected AfterProcessingTime<W> newWith(
     return new AfterProcessingTime<W>(transforms);
   }
 
+  @Override
+  public void prefetchOnElement(StateContext state) {
+    state.access(DELAYED_UNTIL_TAG).get();
+  }
+
+
   @Override
   public TriggerResult onElement(OnElementContext c)
       throws Exception {
@@ -72,6 +78,11 @@ public TriggerResult onElement(OnElementContext c)
     return TriggerResult.CONTINUE;
   }
 
+  @Override
+  public void prefetchOnMerge(MergingStateContext state) {
+    state.mergingAccess(DELAYED_UNTIL_TAG).get();
+  }
+
   @Override
   public MergeResult onMerge(OnMergeContext c) throws Exception {
     // If the processing time timer has fired in any of the windows being merged, it would have
@@ -83,7 +94,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
 
     // Determine the earliest point across all the windows, and delay to that.
     CombiningValueState<Instant, Instant> mergingDelays =
-        c.state().accessAcrossMergingWindows(DELAYED_UNTIL_TAG);
+        c.state().mergingAccess(DELAYED_UNTIL_TAG);
     Instant earliestTimer = mergingDelays.get().read();
     if (earliestTimer != null) {
       mergingDelays.clear();
@@ -94,6 +105,11 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
     return MergeResult.CONTINUE;
   }
 
+  @Override
+  public void prefetchOnTimer(StateContext state) {
+    state.access(DELAYED_UNTIL_TAG).get();
+  }
+
   @Override
   public TriggerResult onTimer(OnTimerContext c) throws Exception {
     if (c.timeDomain() != TimeDomain.PROCESSING_TIME) {
@@ -108,21 +124,6 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
     return TriggerResult.FIRE_AND_FINISH;
   }
 
-  @Override
-  public void prefetchOnElement(StateContext state) {
-    state.access(DELAYED_UNTIL_TAG).get();
-  }
-
-  @Override
-  public void prefetchOnMerge(MergingStateContext state) {
-    state.accessAcrossMergingWindows(DELAYED_UNTIL_TAG).get();
-  }
-
-  @Override
-  public void prefetchOnTimer(StateContext state) {
-    state.access(DELAYED_UNTIL_TAG).get();
-  }
-
   @Override
   public void clear(TriggerContext c) throws Exception {
     CombiningValueState<Instant, Instant> delayed = c.state().access(DELAYED_UNTIL_TAG);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
index 8d7ce10b5dc0b..d38b6f8ad49bf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
@@ -34,6 +34,11 @@ public AfterSynchronizedProcessingTime() {
     super(null);
   }
 
+  @Override
+  public void prefetchOnElement(StateContext state) {
+    state.access(DELAYED_UNTIL_TAG).get();
+  }
+
   @Override
   public TriggerResult onElement(OnElementContext c)
       throws Exception {
@@ -48,6 +53,11 @@ public TriggerResult onElement(OnElementContext c)
     return TriggerResult.CONTINUE;
   }
 
+  @Override
+  public void prefetchOnMerge(MergingStateContext state) {
+    state.mergingAccess(DELAYED_UNTIL_TAG).get();
+  }
+
   @Override
   public MergeResult onMerge(OnMergeContext c) throws Exception {
     // If the processing time timer has fired in any of the windows being merged, it would have
@@ -59,7 +69,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
 
     // Otherwise, determine the earliest delay for all of the windows, and delay to that point.
     CombiningValueState<Instant, Instant> mergingDelays =
-        c.state().accessAcrossMergingWindows(DELAYED_UNTIL_TAG);
+        c.state().mergingAccess(DELAYED_UNTIL_TAG);
     Instant earliestTimer = mergingDelays.get().read();
     if (earliestTimer != null) {
       mergingDelays.clear();
@@ -70,6 +80,11 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
     return MergeResult.CONTINUE;
   }
 
+  @Override
+  public void prefetchOnTimer(StateContext state) {
+    state.access(DELAYED_UNTIL_TAG).get();
+  }
+
   @Override
   public TriggerResult onTimer(OnTimerContext c) throws Exception {
     if (c.timeDomain() != TimeDomain.SYNCHRONIZED_PROCESSING_TIME) {
@@ -84,21 +99,6 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
     return TriggerResult.FIRE_AND_FINISH;
   }
 
-  @Override
-  public void prefetchOnElement(StateContext state) {
-    state.access(DELAYED_UNTIL_TAG).get();
-  }
-
-  @Override
-  public void prefetchOnMerge(MergingStateContext state) {
-    state.accessAcrossMergingWindows(DELAYED_UNTIL_TAG).get();
-  }
-
-  @Override
-  public void prefetchOnTimer(StateContext state) {
-    state.access(DELAYED_UNTIL_TAG).get();
-  }
-
   @Override
   public void clear(TriggerContext c) throws Exception {
     CombiningValueState<Instant, Instant> delayed = c.state().access(DELAYED_UNTIL_TAG);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 8758768486047..5a7e9e782165a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -94,6 +94,11 @@ private FromFirstElementInPane(
       super(delayFunction);
     }
 
+    @Override
+    public void prefetchOnElement(StateContext state) {
+      state.access(DELAYED_UNTIL_TAG).get();
+    }
+
     @Override
     public TriggerResult onElement(OnElementContext c) throws Exception {
       CombiningValueState<Instant, Instant> delayUntilState = c.state().access(DELAYED_UNTIL_TAG);
@@ -107,6 +112,11 @@ public TriggerResult onElement(OnElementContext c) throws Exception {
       return TriggerResult.CONTINUE;
     }
 
+    @Override
+    public void prefetchOnMerge(MergingStateContext state) {
+      state.mergingAccess(DELAYED_UNTIL_TAG).get();
+    }
+
     @Override
     public MergeResult onMerge(OnMergeContext c) throws Exception {
       // If the watermark time timer has fired in any of the windows being merged, it would have
@@ -121,7 +131,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
       // of this first element in each pane).
       // Determine the earliest point across all the windows, and delay to that.
       CombiningValueState<Instant, Instant> mergingDelays =
-          c.state().accessAcrossMergingWindows(DELAYED_UNTIL_TAG);
+          c.state().mergingAccess(DELAYED_UNTIL_TAG);
       Instant earliestTimer = mergingDelays.get().read();
       if (earliestTimer != null) {
         mergingDelays.clear();
@@ -132,6 +142,11 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
       return MergeResult.CONTINUE;
     }
 
+    @Override
+    public void prefetchOnTimer(StateContext state) {
+      state.access(DELAYED_UNTIL_TAG).get();
+    }
+
     @Override
     public TriggerResult onTimer(OnTimerContext c) throws Exception {
       if (c.timeDomain() != TimeDomain.EVENT_TIME) {
@@ -146,21 +161,6 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
       return TriggerResult.FIRE_AND_FINISH;
     }
 
-    @Override
-    public void prefetchOnElement(StateContext state) {
-      state.access(DELAYED_UNTIL_TAG).get();
-    }
-
-    @Override
-    public void prefetchOnMerge(MergingStateContext state) {
-      state.accessAcrossMergingWindows(DELAYED_UNTIL_TAG).get();
-    }
-
-    @Override
-    public void prefetchOnTimer(StateContext state) {
-      state.access(DELAYED_UNTIL_TAG).get();
-    }
-
     @Override
     public void clear(TriggerContext c) throws Exception {
       CombiningValueState<Instant, Instant> delayed = c.state().access(DELAYED_UNTIL_TAG);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
index f0b6a88e2b7f4..c5854ea4ea679 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
@@ -19,65 +19,128 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 
 import java.util.Collection;
+import java.util.Set;
+
+import javax.annotation.Nullable;
 
 /**
- * Tracks the windows that are active.
+ * Track which active windows have their state associated with merged-away windows.
+ *
+ * When windows are merged we must track which state previously associated with the merged windows
+ * must now be associated with the result window. Some of that state may be combined eagerly when
+ * the windows are merged. The rest is combined lazily when the final state is actually
+ * required when emitting a pane. We keep track of this using an {@link ActiveWindowSet}.
+ *
+ * <p>An element may belong to one or more windows. Each key may have zero or more windows
+ * corresponding to elements with that key. A window can be in one of five states:
+ * <ol>
+ * <li>NEW: We have just encountered the window on an incoming element and do not yet know if
+ * it should be merged into an ACTIVE window since we have not yet called
+ * {@link WindowFn#mergeWindows}.
+ * <li>EPHEMERAL: A NEW window has been merged into an ACTIVE window before any state has been
+ * associated with that window. Thus the window is neither ACTIVE nor MERGED. These windows
+ * are not persistently represented since if they reappear the merge function should again
+ * redirect them to an ACTIVE window. (We could collapse EPHEMERAL into MERGED, but keeping them
+ * separate cuts down on the number of windows we need to keep track of in the common case
+ * of SessionWindows over in-order events.)
+ * <li>ACTIVE: A NEW window has state associated with it and has not itself been merged away.
+ * The window may have one or more 'state address' windows under which its non-empty state is
+ * stored. The true state for an ACTIVE window must be derived by reading all of the state in its
+ * state address windows.
+ * <li>MERGED: An ACTIVE window has been merged into another ACTIVE window after it had state
+ * associated with it. The window will thus appear as a state address window for exactly one
+ * ACTIVE window.
+ * <li>GARBAGE: The window has been garbage collected. No new elements (even late elements) will
+ * ever be assigned to that window. These windows are not explicitly represented anywhere.
+ * (Garbage collection is performed by {@link ReduceFnRunner#onTimer}).
+ * </ol>
  *
- * @param <W> the types of windows being managed
+ * <p>If no windows will ever be merged we can use the dummy implementation {@link
+ * NonMergingActiveWindowSet}. Otherwise, the actual implementation of this data structure is in
+ * {@link MergingActiveWindowSet}.
+ *
+ * @param <W> the type of window being managed
  */
 public interface ActiveWindowSet<W extends BoundedWindow> {
-
   /**
    * Callback for {@link #merge}.
    */
   public interface MergeCallback<W extends BoundedWindow> {
-    void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResultNew) throws Exception;
+    /**
+     * Called when windows are about to be merged.
+     *
+     * @param toBeMerged the windows about to be merged.
+     * @param activeToBeMerged the subset of {@code toBeMerged} corresponding to windows which
+     * are currently ACTIVE (and about to be merged). The remaining windows have been deemed
+     * EPHEMERAL.
+     * @param mergeResult the result window, either a member of {@code toBeMerged} or new.
+     */
+    void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W mergeResult)
+        throws Exception;
   }
 
+  /**
+   * Remove EPHEMERAL windows since we only need to know about them while processing new elements.
+   */
+  void removeEphemeralWindows();
+
   /**
    * Save any state changes needed.
    */
   void persist();
 
   /**
-   * Add a window to the {@code ActiveWindowSet}.
-   *
-   * @return false if the window was definitely not-active before being added, true if it either
-   *     was already active, or the implementation doesn't have enough information to know.
+   * Return the ACTIVE window into which {@code window} has been merged.
+   * Return {@code window} itself if it is ACTIVE. Return null if {@code window} has not
+   * yet been seen.
+   */
+  @Nullable
+  W representative(W window);
+
+  /**
+   * Return (a view of) the set of currently ACTIVE windows.
+   */
+  Set<W> getActiveWindows();
+
+  /**
+   * Return {@code true} if {@code window} is ACTIVE.
+   */
+  boolean isActive(W window);
+
+  /**
+   * If {@code window} is not already known to be ACTIVE, MERGED or EPHEMERAL then add it
+   * as NEW. All NEW windows will be accounted for as ACTIVE, MERGED or EPHEMERAL by a call
+   * to {@link #merge}.
    */
-  boolean add(W window);
+  void addNew(W window);
 
   /**
-   * Return true if the window is active.
+   * If {@code window} is not already known to be ACTIVE, MERGED or EPHEMERAL then add it
+   * as ACTIVE.
    */
-  boolean contains(W window);
+  void addActive(W window);
 
   /**
-   * Remove a window from the {@code ActiveWindowSet}.
+   * Remove {@code window} from the set.
    */
   void remove(W window);
 
   /**
    * Invoke {@link WindowFn#mergeWindows} on the {@code WindowFn} associated with this window set,
    * merging as many of the active windows as possible. {@code mergeCallback} will be invoked for
-   * each group of windows that are merged.
+   * each group of windows that are merged. After this no NEW windows will remain.
    */
   void merge(MergeCallback<W> mergeCallback) throws Exception;
 
   /**
-   * Return the set of windows that were merged to produce {@code window}. If the associated
-   * {@code WindowFn} never merges windows, then this should return the singleton list containing
-   * {@code window}.
-   */
-  Iterable<W> sourceWindows(W window);
-
-  /**
-   * Return the subset of {@code windows} that existed in the original merge tree.
+   * Return the state address windows for ACTIVE {@code window} from which all state associated
+   * should be read and merged.
    */
-  Collection<W> originalWindows(Collection<W> windows);
+  Set<W> readStateAddresses(W window);
 
   /**
-   * Return the number of windows that are currently active.
+   * Return the state address window of ACTIVE {@code window} into which all new state should be
+   * written. Always one of the results of {@link #readStateAddresses}.
    */
-  int size();
+  W writeStateAddress(W window);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
index 2d2ba5f370615..ebf1540e6e4bb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
@@ -24,86 +24,184 @@
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.cloud.dataflow.sdk.util.state.ValueState;
-import com.google.common.base.Predicates;
-import com.google.common.collect.Collections2;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterables;
 
+import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 
+import javax.annotation.Nullable;
+
 /**
- * Implementation of {@link ActiveWindowSet} used with {@link WindowFn WindowFns} that support
- * merging.
+ * Implementation of {@link ActiveWindowSet} for use with {@link WindowFn}s that support
+ * merging. In effect maintains an equivalence class of windows (where two windows which have
+ * been merged are in the same class), but also manages which windows contain state which
+ * must be merged when a pane is fired.
+ *
+ * <p>Note that this object must be serialized and stored when work units are committed such
+ * that subsequent work units can recover the equivalence classes etc.
  *
- * @param <W> the types of windows being managed
+ * @param <W> the type of window being managed
  */
-public class MergingActiveWindowSet<W extends BoundedWindow>
-    implements ActiveWindowSet<W> {
-
+public class MergingActiveWindowSet<W extends BoundedWindow> implements ActiveWindowSet<W> {
   private final WindowFn<Object, W> windowFn;
 
   /**
-   * A map of live windows to windows that were merged into them.
+   * A map from ACTIVE windows to their state address windows. Writes to the ACTIVE window
+   * state can be redirected to any one of the state address windows. Reads need to merge
+   * from all state address windows. If the set is empty then the window is NEW.
    *
-   * <p>The keys of the map correspond to the set of (merged) windows and the values
-   * are the no-longer-present windows that were merged into the keys.  A given
-   * window can appear in both the key and value of a single entry, but other at
-   * most once across all keys and values.
+   * <ul>
+   * <li>The state address windows will be empty if the window is NEW, we don't yet know what other
+   * windows it may be merged into, and the window does not yet have any state associated with it.
+   * In this way we can distinguish between MERGED and EPHEMERAL windows when merging.
+   * <li>The state address windows will contain just the window itself it it has never been merged
+   * but has state.
+   * <li>It is possible none of the state address windows correspond to the window itself. For
+   * example, two windows W1 and W2 with state may be merged to form W12. From then on additional
+   * state can be added to just W1 or W2. Thus the state address windows for W12 do not need to
+   * include W12.
+   * <li>If W1 is in the set for W2 then W1 is not a state address window of any other active
+   * window. Furthermore W1 will map to W2 in {@link #windowToActiveWindow}.
+   * </ul>
    */
-  private final Map<W, Set<W>> mergeTree;
+  @Nullable
+  private Map<W, Set<W>> activeWindowToStateAddressWindows;
 
   /**
-   * Used to determine if writing the mergeTree (which is relatively stable)
-   * is necessary.
+   * As above, but only for EPHEMERAL windows. Does not need to be persisted.
    */
-  private final Map<W, Set<W>> originalMergeTree;
+  private final Map<W, Set<W>> activeWindowToEphemeralWindows;
 
-  private final ValueState<Map<W, Set<W>>> mergeTreeValue;
+  /**
+   * A map from window to the ACTIVE window it has been merged into.
+   *
+   * <p>Does not need to be persisted.
+   *
+   * <ul>
+   * <li>Key window may be ACTIVE, MERGED or EPHEMERAL.
+   * <li>ACTIVE windows map to themselves.
+   * <li>If W1 maps to W2 then W2 is in {@link #activeWindowToStateAddressWindows}.
+   * <li>If W1 = W2 then W1 is ACTIVE. If W1 is in the state address window set for W2 then W1 is
+   * MERGED. Otherwise W1 is EPHEMERAL.
+   * </ul>
+   */
+  @Nullable
+  private Map<W, W> windowToActiveWindow;
+
+  /**
+   * Deep clone of {@link #activeWindowToStateAddressWindows} as of last commit.
+   *
+   * <p>Used to avoid writing to state if no changes have been made during the work unit.
+   */
+  @Nullable
+  private Map<W, Set<W>> originalActiveWindowToStateAddressWindows;
+
+  /**
+   * Handle representing our state in the backend.
+   */
+  private final ValueState<Map<W, Set<W>>> valueState;
 
   public MergingActiveWindowSet(WindowFn<Object, W> windowFn, StateInternals state) {
     this.windowFn = windowFn;
 
-    StateTag<ValueState<Map<W, Set<W>>>> mergeTreeAddr = StateTags.makeSystemTagInternal(
-        StateTags.value("tree",
-            MapCoder.of(windowFn.windowCoder(), SetCoder.of(windowFn.windowCoder()))));
-    this.mergeTreeValue = state.state(StateNamespaces.global(), mergeTreeAddr);
-    this.mergeTree = emptyIfNull(mergeTreeValue.get().read());
+    StateTag<ValueState<Map<W, Set<W>>>> mergeTreeAddr =
+        StateTags.makeSystemTagInternal(StateTags.value(
+            "tree", MapCoder.of(windowFn.windowCoder(), SetCoder.of(windowFn.windowCoder()))));
+    valueState = state.state(StateNamespaces.global(), mergeTreeAddr);
+    // Little use trying to prefetch this state since the ReduceFnRunner is stymied until it is
+    // available.
+    activeWindowToStateAddressWindows = emptyIfNull(valueState.get().read());
+    activeWindowToEphemeralWindows = new HashMap<>();
+    originalActiveWindowToStateAddressWindows = deepCopy(activeWindowToStateAddressWindows);
+    windowToActiveWindow = invert(activeWindowToStateAddressWindows);
+  }
 
-    originalMergeTree = deepCopy(mergeTree);
+  @Override
+  public void removeEphemeralWindows() {
+    for (Map.Entry<W, Set<W>> entry : activeWindowToEphemeralWindows.entrySet()) {
+      for (W ephemeral : entry.getValue()) {
+        windowToActiveWindow.remove(ephemeral);
+      }
+    }
+    activeWindowToEphemeralWindows.clear();
   }
 
   @Override
   public void persist() {
-    if (!mergeTree.equals(originalMergeTree)) {
-      mergeTreeValue.set(mergeTree);
+    if (activeWindowToStateAddressWindows.equals(originalActiveWindowToStateAddressWindows)) {
+      // No change.
+      return;
+    }
+    // All NEW windows must have been accounted for.
+    for (Map.Entry<W, Set<W>> entry : activeWindowToStateAddressWindows.entrySet()) {
+      Preconditions.checkState(
+          !entry.getValue().isEmpty(), "Cannot persist NEW window %s", entry.getKey());
     }
+    // Should be no EPHEMERAL windows.
+    Preconditions.checkState(
+        activeWindowToEphemeralWindows.isEmpty(), "Unexpected EPHEMERAL windows before persist");
+
+    valueState.set(activeWindowToStateAddressWindows);
+    // No need to update originalActiveWindowToStateAddressWindows since this object is about to
+    // become garbage.
   }
 
   @Override
-  public boolean contains(W window) {
-    return mergeTree.containsKey(window);
+  @Nullable
+  public W representative(W window) {
+    return windowToActiveWindow.get(window);
   }
 
   @Override
-  public boolean add(W window) {
-    if (mergeTree.containsKey(window)) {
-      return false;
+  public Set<W> getActiveWindows() {
+    return activeWindowToStateAddressWindows.keySet();
+  }
+
+  @Override
+  public boolean isActive(W window) {
+    return activeWindowToStateAddressWindows.containsKey(window);
+  }
+
+  @Override
+  public void addNew(W window) {
+    if (!windowToActiveWindow.containsKey(window)) {
+      activeWindowToStateAddressWindows.put(window, new HashSet<W>());
     }
+  }
 
-    mergeTree.put(window, new HashSet<W>());
-    return true;
+  @Override
+  public void addActive(W window) {
+    if (!windowToActiveWindow.containsKey(window)) {
+      Set<W> stateAddressWindows = new HashSet<>();
+      stateAddressWindows.add(window);
+      activeWindowToStateAddressWindows.put(window, stateAddressWindows);
+      windowToActiveWindow.put(window, window);
+    }
   }
 
   @Override
   public void remove(W window) {
-    mergeTree.remove(window);
+    for (W stateAddressWindow : activeWindowToStateAddressWindows.get(window)) {
+      windowToActiveWindow.remove(stateAddressWindow);
+    }
+    activeWindowToStateAddressWindows.remove(window);
+    Set<W> ephemeralWindows = activeWindowToEphemeralWindows.get(window);
+    if (ephemeralWindows != null) {
+      for (W ephemeralWindow : ephemeralWindows) {
+        windowToActiveWindow.remove(ephemeralWindow);
+      }
+      activeWindowToEphemeralWindows.remove(window);
+    }
+    windowToActiveWindow.remove(window);
   }
 
   private class MergeContextImpl extends WindowFn<Object, W>.MergeContext {
-
     private MergeCallback<W> mergeCallback;
 
     public MergeContextImpl(MergeCallback<W> mergeCallback) {
@@ -113,81 +211,250 @@ public MergeContextImpl(MergeCallback<W> mergeCallback) {
 
     @Override
     public Collection<W> windows() {
-      return mergeTree.keySet();
+      return activeWindowToStateAddressWindows.keySet();
     }
 
     @Override
     public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
-      boolean isResultNew = !mergeTree.containsKey(mergeResult);
-      recordMerge(toBeMerged, mergeResult);
-      mergeCallback.onMerge(toBeMerged, mergeResult, isResultNew);
+      recordMerge(mergeCallback, toBeMerged, mergeResult);
     }
   }
 
   @Override
   public void merge(MergeCallback<W> mergeCallback) throws Exception {
+    // See what the window function does with the NEW and already ACTIVE windows.
     windowFn.mergeWindows(new MergeContextImpl(mergeCallback));
+
+    for (Map.Entry<W, Set<W>> entry : activeWindowToStateAddressWindows.entrySet()) {
+      if (entry.getValue().isEmpty()) {
+        // This window was NEW but since it survived merging must now become ACTIVE.
+        W window = entry.getKey();
+        entry.getValue().add(window);
+        windowToActiveWindow.put(window, window);
+      }
+    }
   }
 
-  @Override
-  public Iterable<W> sourceWindows(W window) {
-    Set<W> curWindows = new HashSet<>();
-    curWindows.add(window);
+  /**
+   * A {@code WindowFn.mergeWindows} call has requested {@code toBeMerged} (which must
+   * all be ACTIVE} be considered equivalent to {@code activeWindow} (which is either a
+   * member of {@code toBeMerged} or is a new window).
+   */
+  private void recordMerge(MergeCallback<W> mergeCallback, Collection<W> toBeMerged, W mergeResult)
+      throws Exception {
+    Set<W> newStateAddressWindows = new HashSet<>();
+    Set<W> existingStateAddressWindows = activeWindowToStateAddressWindows.get(mergeResult);
+    if (existingStateAddressWindows != null) {
+      // Preserve all the existing state address windows for mergeResult.
+      newStateAddressWindows.addAll(existingStateAddressWindows);
+    }
 
-    Set<W> sourceWindows = mergeTree.get(window);
-    if (sourceWindows != null) {
-      curWindows.addAll(sourceWindows);
+    Set<W> newEphemeralWindows = new HashSet<>();
+    Set<W> existingEphemeralWindows = activeWindowToEphemeralWindows.get(mergeResult);
+    if (existingEphemeralWindows != null) {
+      // Preserve all the existing EPHEMERAL windows for meregResult.
+      newEphemeralWindows.addAll(existingEphemeralWindows);
     }
-    return curWindows;
+
+    Collection<W> activeToBeMerged = new ArrayList<>();
+
+    for (W other : toBeMerged) {
+      Set<W> otherStateAddressWindows = activeWindowToStateAddressWindows.get(other);
+      Preconditions.checkState(otherStateAddressWindows != null, "Window %s is not ACTIVE", other);
+
+      for (W otherStateAddressWindow : otherStateAddressWindows) {
+        // Since otherTarget equiv other AND other equiv mergeResult
+        // THEN otherTarget equiv mergeResult.
+        newStateAddressWindows.add(otherStateAddressWindow);
+        windowToActiveWindow.put(otherStateAddressWindow, mergeResult);
+      }
+      activeWindowToStateAddressWindows.remove(other);
+
+      Set<W> otherEphemeralWindows = activeWindowToEphemeralWindows.get(other);
+      if (otherEphemeralWindows != null) {
+        for (W otherEphemeral : otherEphemeralWindows) {
+          // Since otherEphemeral equiv other AND other equiv mergeResult
+          // THEN otherEphemeral equiv mergeResult.
+          newEphemeralWindows.add(otherEphemeral);
+          windowToActiveWindow.put(otherEphemeral, mergeResult);
+        }
+      }
+      activeWindowToEphemeralWindows.remove(other);
+
+      // Now other equiv mergeResult.
+      if (otherStateAddressWindows.contains(other)) {
+        // Other was ACTIVE and is now known to be MERGED.
+        newStateAddressWindows.add(other);
+        activeToBeMerged.add(other);
+      } else if (otherStateAddressWindows.isEmpty()) {
+        // Other was NEW thus has no state. It is now EPHEMERAL.
+        newEphemeralWindows.add(other);
+      } else if (other.equals(mergeResult)) {
+        // Other was ACTIVE, was never used to store elements, but is still ACTIVE.
+        // Leave it as active.
+        activeToBeMerged.add(other);
+      } else {
+        // Other was ACTIVE, was never used to store element, as is no longer considered ACTIVE.
+        // It is now EPHEMERAL.
+        newEphemeralWindows.add(other);
+        // However, since it may have metadata state, include it in the ACTIVE to be merged set.
+        activeToBeMerged.add(other);
+      }
+      windowToActiveWindow.put(other, mergeResult);
+    }
+
+    if (newStateAddressWindows.isEmpty()) {
+      // If stateAddressWindows is empty then toBeMerged must have only contained EPHEMERAL windows.
+      // Promote mergeResult to be active now.
+      newStateAddressWindows.add(mergeResult);
+    }
+    windowToActiveWindow.put(mergeResult, mergeResult);
+
+    activeWindowToStateAddressWindows.put(mergeResult, newStateAddressWindows);
+    if (!newEphemeralWindows.isEmpty()) {
+      activeWindowToEphemeralWindows.put(mergeResult, newEphemeralWindows);
+    }
+
+    mergeCallback.onMerge(toBeMerged, activeToBeMerged, mergeResult);
+  }
+
+  /**
+   * Return the state address windows for ACTIVE {@code window} from which all state associated
+   * should
+   * be read and merged.
+   */
+  @Override
+  public Set<W> readStateAddresses(W window) {
+    Set<W> stateAddressWindows = activeWindowToStateAddressWindows.get(window);
+    Preconditions.checkState(stateAddressWindows != null, "Window %s is not ACTIVE", window);
+    return stateAddressWindows;
+  }
+
+  /**
+   * Return the state address window of ACTIVE {@code window} into which all new state should be
+   * written.
+   */
+  @Override
+  public W writeStateAddress(W window) {
+    Set<W> stateAddressWindows = activeWindowToStateAddressWindows.get(window);
+    Preconditions.checkState(stateAddressWindows != null, "Window %s is not ACTIVE", window);
+    W result = Iterables.getFirst(stateAddressWindows, null);
+    Preconditions.checkState(result != null, "Window %s is still NEW", window);
+    return result;
   }
 
-  private void recordMerge(Collection<W> otherWindows, W newWindow) throws Exception {
-    Set<W> subWindows = mergeTree.get(newWindow);
-    if (subWindows == null) {
-      subWindows = new HashSet<>();
+  @VisibleForTesting
+  public void checkInvariants() {
+    Set<W> knownStateAddressWindows = new HashSet<>();
+    for (Map.Entry<W, Set<W>> entry : activeWindowToStateAddressWindows.entrySet()) {
+      W active = entry.getKey();
+      Preconditions.checkState(!entry.getValue().isEmpty(),
+          "Unexpected empty state address window set for ACTIVE window %s", active);
+      for (W stateAddressWindow : entry.getValue()) {
+        Preconditions.checkState(knownStateAddressWindows.add(stateAddressWindow),
+            "%s is in more than one state address window set", stateAddressWindow);
+        Preconditions.checkState(active.equals(windowToActiveWindow.get(stateAddressWindow)),
+            "%s should have %s as its ACTIVE window", stateAddressWindow, active);
+      }
     }
+    for (Map.Entry<W, Set<W>> entry : activeWindowToEphemeralWindows.entrySet()) {
+      W active = entry.getKey();
+      Preconditions.checkState(activeWindowToStateAddressWindows.containsKey(active),
+          "%s must be ACTIVE window", active);
+      Preconditions.checkState(
+          !entry.getValue().isEmpty(), "Unexpected empty EPHEMERAL set for %s", active);
+      for (W ephemeralWindow : entry.getValue()) {
+        Preconditions.checkState(knownStateAddressWindows.add(ephemeralWindow),
+            "%s is EPHEMERAL/state address of more than one ACTIVE window", ephemeralWindow);
+        Preconditions.checkState(active.equals(windowToActiveWindow.get(ephemeralWindow)),
+            "%s should have %s as its ACTIVE window", ephemeralWindow, active);
+      }
+    }
+    for (Map.Entry<W, W> entry : windowToActiveWindow.entrySet()) {
+      Preconditions.checkState(activeWindowToStateAddressWindows.containsKey(entry.getValue()),
+          "%s should be ACTIVE since representative for %s", entry.getValue(), entry.getKey());
+    }
+  }
 
-    for (W other : otherWindows) {
-      if (!mergeTree.containsKey(other)) {
-        throw new IllegalArgumentException("Tried to merge a non-existent window: " + other);
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append("MergingActiveWindowSet {\n");
+    for (Map.Entry<W, Set<W>> entry : activeWindowToStateAddressWindows.entrySet()) {
+      W active = entry.getKey();
+      Set<W> stateAddressWindows = entry.getValue();
+      if (stateAddressWindows.isEmpty()) {
+        sb.append("  NEW ");
+        sb.append(active);
+        sb.append('\n');
+      } else {
+        sb.append("  ACTIVE ");
+        sb.append(active);
+        sb.append(":\n");
+        for (W stateAddressWindow : stateAddressWindows) {
+          if (stateAddressWindow.equals(active)) {
+            sb.append("    ACTIVE ");
+          } else {
+            sb.append("    MERGED ");
+          }
+          sb.append(stateAddressWindow);
+          sb.append("\n");
+          W active2 = windowToActiveWindow.get(stateAddressWindow);
+          Preconditions.checkState(active2.equals(active));
+        }
+        Set<W> ephemeralWindows = activeWindowToEphemeralWindows.get(active);
+        if (ephemeralWindows != null) {
+          for (W ephemeralWindow : ephemeralWindows) {
+            sb.append("    EPHEMERAL ");
+            sb.append(ephemeralWindow);
+            sb.append('\n');
+          }
+        }
       }
-      subWindows.addAll(mergeTree.get(other));
-      subWindows.add(other);
-      mergeTree.remove(other);
     }
-    mergeTree.put(newWindow, subWindows);
+    sb.append("}");
+    return sb.toString();
   }
 
+  // ======================================================================
 
-  private static <W> Map<W, Set<W>> emptyIfNull(Map<W, Set<W>> input) {
-    if (input == null) {
+  /**
+   * Replace null {@code multimap} with empty map, and replace null entries in {@code multimap} with
+   * empty sets.
+   */
+  private static <W> Map<W, Set<W>> emptyIfNull(Map<W, Set<W>> multimap) {
+    if (multimap == null) {
       return new HashMap<>();
     } else {
-      for (Map.Entry<W, Set<W>> entry : input.entrySet()) {
+      for (Map.Entry<W, Set<W>> entry : multimap.entrySet()) {
         if (entry.getValue() == null) {
           entry.setValue(new HashSet<W>());
         }
       }
-      return input;
+      return multimap;
     }
   }
 
-  private Map<W, Set<W>> deepCopy(Map<W, Set<W>> mergeTree) {
-    Map<W, Set<W>> newMergeTree = new HashMap<>();
-    for (Map.Entry<W, Set<W>> entry : mergeTree.entrySet()) {
-      newMergeTree.put(entry.getKey(), new HashSet<W>(entry.getValue()));
+  /** Return a deep copy of {@code multimap}. */
+  private static <W> Map<W, Set<W>> deepCopy(Map<W, Set<W>> multimap) {
+    Map<W, Set<W>> newMultimap = new HashMap<>();
+    for (Map.Entry<W, Set<W>> entry : multimap.entrySet()) {
+      newMultimap.put(entry.getKey(), new HashSet<W>(entry.getValue()));
     }
-    return newMergeTree;
-  }
-
-  @Override
-  public int size() {
-    return mergeTree.size();
+    return newMultimap;
   }
 
-  @Override
-  public Collection<W> originalWindows(Collection<W> windows) {
-    return Collections.unmodifiableCollection(
-        Collections2.filter(windows, Predicates.in(originalMergeTree.keySet())));
+  /** Return inversion of {@code multimap}, which must be invertible. */
+  private static <W> Map<W, W> invert(Map<W, Set<W>> multimap) {
+    Map<W, W> result = new HashMap<>();
+    for (Map.Entry<W, Set<W>> entry : multimap.entrySet()) {
+      W active = entry.getKey();
+      for (W target : entry.getValue()) {
+        W previous = result.put(target, active);
+        Preconditions.checkState(previous == null,
+            "Window %s has both %s and %s as representatives", target, previous, active);
+      }
+    }
+    return result;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
index c4f39a9b23813..f567888260768 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
@@ -97,7 +97,7 @@ private static class GeneralNonEmptyPanes<W extends BoundedWindow> extends NonEm
 
     @Override
     public void recordContent(ReduceFn<?, ?, ?, W>.Context context) {
-      context.state().access(PANE_ADDITIONS_TAG).add(1L);
+      context.state().accessAcrossMergedWindows(PANE_ADDITIONS_TAG).add(1L);
     }
 
     @Override
@@ -107,6 +107,8 @@ public void clearPane(ReduceFn<?, ?, ?, W>.Context context) {
 
     @Override
     public StateContents<Boolean> isEmpty(ReduceFn<?, ?, ?, W>.Context context) {
+      // Since we only check for empty element sets when a trigger fires it's unreasonable
+      // to require a prefetch.
       return context.state().accessAcrossMergedWindows(PANE_ADDITIONS_TAG).isEmpty();
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java
index 2c453ef95b934..10d7666f5dd53 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java
@@ -17,9 +17,9 @@
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.common.collect.ImmutableSet;
 
-import java.util.Collection;
-import java.util.Collections;
+import java.util.Set;
 
 /**
  * Implementation of {@link ActiveWindowSet} used with {@link WindowFn WindowFns} that don't support
@@ -27,48 +27,51 @@
  *
  * @param <W> the types of windows being managed
  */
-public class NonMergingActiveWindowSet<W extends BoundedWindow>
-    implements ActiveWindowSet<W> {
+public class NonMergingActiveWindowSet<W extends BoundedWindow> implements ActiveWindowSet<W> {
+  @Override
+  public void removeEphemeralWindows() {}
+
+  @Override
+  public void persist() {}
 
   @Override
-  public void persist() {
-    // Nothing to persist.
+  public W representative(W window) {
+    // Always represented by itself.
+    return window;
   }
 
   @Override
-  public boolean add(W window) {
-    // We don't track anything, so we cannot determine if the window is new or not.
-    return true;
+  public Set<W> getActiveWindows() {
+    // Only supported when merging.
+    throw new java.lang.UnsupportedOperationException();
   }
 
   @Override
-  public boolean contains(W window) {
+  public boolean isActive(W window) {
     // Windows should never disappear, since we don't support merging.
     return true;
   }
 
   @Override
-  public void remove(W window) {}
+  public void addNew(W window) {}
 
   @Override
-  public void merge(MergeCallback<W> reduceFnRunner) throws Exception {
-    // We never merge, so there is nothing to do here.
-  }
+  public void addActive(W window) {}
 
   @Override
-  public Iterable<W> sourceWindows(W window) {
-    // There is no merging, so the only source window is the window itself.
-    return Collections.singleton(window);
-  }
+  public void remove(W window) {}
+
+  @Override
+  public void merge(MergeCallback<W> mergeCallback) throws Exception {}
+
 
   @Override
-  public int size() {
-    throw new UnsupportedOperationException("Cannot determine size of NonMergingActiveWindowSet");
+  public Set<W> readStateAddresses(W window) {
+    return ImmutableSet.of(window);
   }
 
   @Override
-  public Collection<W> originalWindows(Collection<W> windows) {
-    throw new UnsupportedOperationException(
-        "Cannot determine original windows of NonMergingActiveWindowSet");
+  public W writeStateAddress(W window) {
+    return window;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
index b8a13f96e8a9b..e76868aa42f38 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
@@ -39,17 +39,24 @@
  */
 public abstract class ReduceFn<K, InputT, OutputT, W extends BoundedWindow>
     implements Serializable {
-
   /** Interface for interacting with persistent state. */
   public interface StateContext {
-    /** Access the storage for the given {@code address} in the current window. */
+    /**
+     * Access the storage for the given {@code address} in the current window.
+     *
+     * <p>Never accounts for merged windows. When windows are merged, any state accessed via
+     * this method must be eagerly combined and written into the result window.
+     */
     <StateT extends State> StateT access(StateTag<StateT> address);
 
     /**
      * Access the storage for the given {@code address} in all of the windows that were
-     * merged into the current window including the current window.
+     * merged into the current window.
      *
-     * <p>If no windows were merged, this reads from just the current window.
+     * <p>If no windows were merged, this reads and writes to just the current window.
+     * Otherwise, when windows merge we do not eagerly combine state, but rather defer the
+     * combination to reading time. Thus reads will be from all 'merged windows' for the
+     * current window, and writes will be to the designated 'writing window' for the current window.
      */
     <StateT extends MergeableState<?, ?>> StateT accessAcrossMergedWindows(
         StateTag<StateT> address);
@@ -58,15 +65,16 @@ public interface StateContext {
   /** Interface for interacting with persistent state within {@link #onMerge}. */
   public interface MergingStateContext extends StateContext {
     /**
-     * Access a merged view of the storage for the given {@code address}
-     * in all of the windows being merged.
+     * Analogous to {@link #access}, but across all windows which are about to be merged.
      */
-    public abstract <StateT extends MergeableState<?, ?>> StateT accessAcrossMergingWindows(
-        StateTag<StateT> address);
+    <StateT extends MergeableState<?, ?>> StateT mergingAccess(StateTag<StateT> address);
 
-    /** Access a map from windows being merged to the associated {@code StateT}. */
-    public abstract <StateT extends State> Map<BoundedWindow, StateT> accessInEachMergingWindow(
-        StateTag<StateT> address);
+    /**
+     * Analogous to {@link #access}, but returned as a map from each window which is
+     * about to be merged to the corresponding state.
+     */
+    public abstract <StateT extends State> Map<BoundedWindow, StateT>
+        mergingAccessInEachMergingWindow(StateTag<StateT> address);
   }
 
   /**
@@ -117,7 +125,6 @@ public abstract class Context {
 
   /** Information accessible within {@link #processValue}. */
   public abstract class ProcessValueContext extends Context {
-
     /** Return the actual value being processed. */
     public abstract InputT value();
 
@@ -141,7 +148,6 @@ public abstract class OnMergeContext extends Context {
 
   /** Information accessible within {@link #onTrigger}. */
   public abstract class OnTriggerContext extends Context {
-
     /** Returns the {@link PaneInfo} for the trigger firing being processed. */
     public abstract PaneInfo paneInfo();
 
@@ -185,7 +191,7 @@ public abstract class OnTriggerContext extends Context {
    *
    * @param c Context to use prefetch from.
    */
-  public void prefetchOnTrigger(StateContext c) { }
+  public void prefetchOnTrigger(StateContext c) {}
 
   /**
    * Called to clear any persisted state that the {@link ReduceFn} may be holding. This will be
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
index df4a853599334..643919035f407 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
@@ -124,7 +124,7 @@ static class StateContextImpl<W extends BoundedWindow>
 
     private final ActiveWindowSet<W> activeWindows;
     private final W window;
-    protected StateNamespace namespace;
+    protected StateNamespace windowNamespace;
     protected final Coder<W> windowCoder;
     private final StateInternals stateInternals;
 
@@ -137,7 +137,7 @@ public StateContextImpl(
       this.windowCoder = windowCoder;
       this.stateInternals = stateInternals;
       this.window = window;
-      this.namespace = namespaceFor(window);
+      this.windowNamespace = namespaceFor(window);
     }
 
     protected StateNamespace namespaceFor(W window) {
@@ -149,23 +149,23 @@ W window() {
     }
 
     StateNamespace namespace() {
-      return namespace;
+      return windowNamespace;
     }
 
     @Override
     public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
-      return stateInternals.state(namespace, address);
+      return stateInternals.state(windowNamespace, address);
     }
 
     @Override
     public <StorageT extends MergeableState<?, ?>> StorageT accessAcrossMergedWindows(
         StateTag<StorageT> address) {
-      List<StateNamespace> sourceNamespaces = new ArrayList<>();
-      for (W sourceWindow : activeWindows.sourceWindows(window)) {
-        sourceNamespaces.add(namespaceFor(sourceWindow));
+      List<StateNamespace> readNamespaces = new ArrayList<>();
+      for (W readWindow : activeWindows.readStateAddresses(window)) {
+        readNamespaces.add(namespaceFor(readWindow));
       }
-
-      return stateInternals.mergedState(sourceNamespaces, namespace, address, window);
+      StateNamespace writeNamespace = namespaceFor(activeWindows.writeStateAddress(window));
+      return stateInternals.mergedState(readNamespaces, writeNamespace, address, window);
     }
   }
 
@@ -180,10 +180,6 @@ public MergingStateContextImpl(StateContextImpl<W> delegate, Collection<W> mergi
       this.mergingWindows = mergingWindows;
     }
 
-    StateNamespace namespace() {
-      return delegate.namespace;
-    }
-
     W window() {
       return delegate.window();
     }
@@ -204,19 +200,17 @@ public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
     }
 
     @Override
-    public <StateT extends MergeableState<?, ?>> StateT accessAcrossMergingWindows(
-        StateTag<StateT> address) {
-      List<StateNamespace> mergingNamespaces = new ArrayList<>();
+    public <StateT extends MergeableState<?, ?>> StateT mergingAccess(StateTag<StateT> address) {
+      List<StateNamespace> readNamespaces = new ArrayList<>();
       for (W mergingWindow : mergingWindows) {
-        mergingNamespaces.add(delegate.namespaceFor(mergingWindow));
+        readNamespaces.add(delegate.namespaceFor(mergingWindow));
       }
-
       return delegate.stateInternals.mergedState(
-          mergingNamespaces, delegate.namespace, address, delegate.window());
+          readNamespaces, delegate.windowNamespace, address, delegate.window);
     }
 
     @Override
-    public <StateT extends State> Map<BoundedWindow, StateT> accessInEachMergingWindow(
+    public <StateT extends State> Map<BoundedWindow, StateT> mergingAccessInEachMergingWindow(
         StateTag<StateT> address) {
       ImmutableMap.Builder<BoundedWindow, StateT> builder = ImmutableMap.builder();
       for (W mergingWindow : mergingWindows) {
@@ -236,7 +230,7 @@ private class ContextImpl extends ReduceFn<K, InputT, OutputT, W>.Context {
     private ContextImpl(StateContextImpl<W> state) {
       reduceFn.super();
       this.state = state;
-      this.timers = new TimersImpl(state.namespace);
+      this.timers = new TimersImpl(state.namespace());
     }
 
     @Override
@@ -278,7 +272,7 @@ private ProcessValueContextImpl(StateContextImpl<W> state, InputT value, Instant
       this.state = state;
       this.value = value;
       this.timestamp = timestamp;
-      this.timers = new TimersImpl(state.namespace);
+      this.timers = new TimersImpl(state.namespace());
     }
 
     @Override
@@ -331,7 +325,7 @@ private OnTriggerContextImpl(StateContextImpl<W> state,
       this.state = state;
       this.pane = pane;
       this.callbacks = callbacks;
-      this.timers = new TimersImpl(state.namespace);
+      this.timers = new TimersImpl(state.namespace());
     }
 
     @Override
@@ -379,7 +373,7 @@ private class OnMergeContextImpl
     private OnMergeContextImpl(MergingStateContextImpl<W> state) {
       reduceFn.super();
       this.state = state;
-      this.timers = new TimersImpl(state.delegate.namespace);
+      this.timers = new TimersImpl(state.delegate.namespace());
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index 4d11e8a708110..d1276800f58bf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -18,10 +18,12 @@
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.ActiveWindowSet.MergeCallback;
 import com.google.cloud.dataflow.sdk.util.ReduceFnContextFactory.OnTriggerCallbacks;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
@@ -31,21 +33,17 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Function;
-import com.google.common.base.Functions;
 import com.google.common.base.Preconditions;
-import com.google.common.base.Predicate;
-import com.google.common.base.Predicates;
 import com.google.common.base.Throwables;
-import com.google.common.collect.FluentIterable;
 import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -79,33 +77,110 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
   public static final String DROPPED_DUE_TO_LATENESS_COUNTER = "DroppedDueToLateness";
 
   private final WindowingStrategy<Object, W> windowingStrategy;
-  private final TimerInternals timerInternals;
+
   private final WindowingInternals<?, KV<K, OutputT>> windowingInternals;
 
   private final Aggregator<Long, Long> droppedDueToClosedWindow;
   private final Aggregator<Long, Long> droppedDueToLateness;
 
-  private final TriggerRunner<W> triggerRunner;
-
   private final K key;
+
+  /**
+   * Track which windows are still active and which 'state address' windows contain state
+   * for a merged window.
+   *
+   * <p>In general, when windows are merged we prefer to defer merging their state until the
+   * overall state is needed. In other words, we prefer to merge state 'lazily' (on read)
+   * instead of 'eagerly' (on merge).
+   */
   private final ActiveWindowSet<W> activeWindows;
+
+  /**
+   * User's reduce function (or {@link SystemReduceFn} for simple GroupByKey operations).
+   * May store its own state.
+   *
+   * <ul>
+   * <li>Merging: Uses {@link #activeWindows} to determine the 'state address' windows under which
+   * state is read and written. Merging may be done lazily, in which case state is merged
+   * only when a pane fires.
+   * <li>Lifetime: Possibly cleared when a pane fires. Always cleared when a window is
+   * garbage collected.
+   * </ul>
+   */
+  private final ReduceFn<K, InputT, OutputT, W> reduceFn;
+
+  /**
+   * Manage the setting and firing of timer events.
+   *
+   * <ul>
+   * <li>Merging: Timers are cancelled when windows are merged away.
+   * <li>Lifetime: Timers automatically disappear after they fire.
+   * </ul>
+   */
+  private final TimerInternals timerInternals;
+
+  /**
+   * Manage the execution and state for triggers.
+   *
+   * <ul>
+   * <li>Merging: All state is keyed by actual window, so does not depend on {@link #activeWindows}.
+   * Individual triggers know how to eagerly merge their state on merge.
+   * <li>Lifetime: Most trigger state is cleared when the final pane is emitted. However
+   * a tombstone is left behind which must be cleared when the window is garbage collected.
+   * </ul>
+   */
+  private final TriggerRunner<W> triggerRunner;
+
+  /**
+   * Store the output watermark holds for each window.
+   *
+   * <ul>
+   * <li>Merging: Generally uses {@link #activeWindows} to maintain the 'state address' windows
+   * under which holds are stored, and holds are merged lazily only when a pane fires.
+   * However there are two special cases:
+   * <ul>
+   * <li>Depending on the window's {@link OutputTimeFn}, it is possible holds need to be read,
+   * recalculated, cleared, and added back on merging.
+   * <li>When a pane fires it may be necessary to add (back) an end-of-window or
+   * garbage collection hold. If the current window is no longer active these holds will
+   * be associated with the current window.
+   * </ul>
+   * <li>Lifetime: Cleared when a pane fires or when the window is garbage collected.
+   * </ul>
+   */
   private final WatermarkHold<W> watermarkHold;
+
   private final ReduceFnContextFactory<K, InputT, OutputT, W> contextFactory;
-  private final ReduceFn<K, InputT, OutputT, W> reduceFn;
-  private final PaneInfoTracker paneInfo;
+
+  /**
+   * Store the previously emitted pane (if any) for each window.
+   *
+   * <ul>
+   * <li>Merging: Always keyed by actual window, so does not depend on {@link #activeWindows}.
+   * Cleared when window is merged away.
+   * <li>Lifetime: Cleared when trigger is finished or window is garbage collected.
+   * </ul>
+   */
+  private final PaneInfoTracker paneInfoTracker;
+
+  /**
+   * Store whether we've seen any elements for a window since the last pane was emitted.
+   *
+   * <ul>
+   * <li>Merging: Uses {@link #activeWindows} determine the state address windows under which
+   * counts are stored. Merging is done lazily when checking if a pane needs to fire.
+   * <li>Lifetime: Cleared when pane fires or window is garbage collected.
+   * </ul>
+   */
   private final NonEmptyPanes<W> nonEmptyPanes;
 
-  public ReduceFnRunner(
-      K key,
-      WindowingStrategy<?, W> windowingStrategy,
-      TimerInternals timerInternals,
-      WindowingInternals<?, KV<K, OutputT>> windowingInternals,
-      Aggregator<Long, Long> droppedDueToClosedWindow,
-      Aggregator<Long, Long> droppedDueToLateness,
+  public ReduceFnRunner(K key, WindowingStrategy<?, W> windowingStrategy,
+      TimerInternals timerInternals, WindowingInternals<?, KV<K, OutputT>> windowingInternals,
+      Aggregator<Long, Long> droppedDueToClosedWindow, Aggregator<Long, Long> droppedDueToLateness,
       ReduceFn<K, InputT, OutputT, W> reduceFn) {
     this.key = key;
     this.timerInternals = timerInternals;
-    this.paneInfo =  new PaneInfoTracker(timerInternals);
+    this.paneInfoTracker = new PaneInfoTracker(timerInternals);
     this.windowingInternals = windowingInternals;
     this.droppedDueToClosedWindow = droppedDueToClosedWindow;
     this.droppedDueToLateness = droppedDueToLateness;
@@ -117,42 +192,67 @@ public ReduceFnRunner(
     this.windowingStrategy = objectWindowingStrategy;
 
     this.nonEmptyPanes = NonEmptyPanes.create(this.windowingStrategy, this.reduceFn);
+    // Note this may trigger a GetData request to load the existing window set.
     this.activeWindows = createActiveWindowSet();
-    this.contextFactory = new ReduceFnContextFactory<K, InputT, OutputT, W>(
-        key, reduceFn, this.windowingStrategy, this.windowingInternals.stateInternals(),
-        this.activeWindows, timerInternals);
+    this.contextFactory =
+        new ReduceFnContextFactory<K, InputT, OutputT, W>(key, reduceFn, this.windowingStrategy,
+            this.windowingInternals.stateInternals(), this.activeWindows, timerInternals);
 
     this.watermarkHold = new WatermarkHold<>(timerInternals, windowingStrategy);
     this.triggerRunner = new TriggerRunner<>(
         windowingStrategy.getTrigger(),
-        new TriggerContextFactory<>(windowingStrategy, this.windowingInternals.stateInternals(),
-            activeWindows));
+        new TriggerContextFactory<>(
+            windowingStrategy, this.windowingInternals.stateInternals(), activeWindows));
   }
 
   private ActiveWindowSet<W> createActiveWindowSet() {
     return windowingStrategy.getWindowFn().isNonMerging()
-        ? new NonMergingActiveWindowSet<W>()
-        : new MergingActiveWindowSet<W>(
-            windowingStrategy.getWindowFn(), windowingInternals.stateInternals());
+        ? new NonMergingActiveWindowSet<W>() : new MergingActiveWindowSet<W>(
+               windowingStrategy.getWindowFn(), windowingInternals.stateInternals());
   }
 
-  @VisibleForTesting boolean isFinished(W window) {
+  @VisibleForTesting
+  boolean isFinished(W window) {
     return triggerRunner.isClosed(contextFactory.base(window).state());
   }
 
+  /**
+   * Incorporate {@code values} into the underlying reduce function, and manage holds, timers,
+   * triggers, and window merging.
+   *
+   * <p>The general strategy is:
+   * <ol>
+   * <li>Use {@link WindowedValue#getWindows} (itself determined using
+   * {@link WindowFn#assignWindows}) to determine which windows each element belongs to. Some of
+   * those windows will already have state associated with them. The rest are considered NEW.
+   * <li>Use {@link WindowFn#mergeWindows} to attempt to merge currently ACTIVE and NEW windows.
+   * Each NEW window will become either ACTIVE, MERGED, or EPHEMERAL. (See {@link ActiveWindowSet}
+   * for definitions of these terms.)
+   * <li>If at all possible, eagerly substitute EPHEMERAL windows with their ACTIVE state address
+   * windows before any state is associated with the EPHEMERAL window. In the common case that
+   * windows for new elements are merged into existing ACTIVE windows then no additional storage
+   * or merging overhead will be incurred.
+   * <li>Otherwise, keep track of the state address windows for ACTIVE windows so that their
+   * states can be merged on-demand when a pane fires.
+   * <li>Process the element for each of the windows it's windows have been merged into according
+   * to {@link ActiveWindowSet}. Processing may require running triggers, setting timers, setting
+   * holds, and invoking {@link ReduceFn#onTrigger}.
+   * </ol>
+   */
   public void processElements(Iterable<WindowedValue<InputT>> values) {
-    Function<W, W> windowMapping = Functions.identity();
-
+    // Map from element window to the result of running its trigger.
     final Map<W, TriggerResult> results = Maps.newHashMap();
 
-    // If windows might merge, extract the windows from all the values, and pre-merge them.
     if (!windowingStrategy.getWindowFn().isNonMerging()) {
-      windowMapping = premergeForValues(values, results);
+      // If an incoming element introduces a new window, attempt to merge it into an existing
+      // window eagerly. Otherwise track which state address windows are used to store the state
+      // for each merged, active window.
+      collectAndMergeWindows(values, results);
     }
 
-    // Process the elements
+    // Process each element, using the updated activeWindows determined by collectAndMergeWindows.
     for (WindowedValue<InputT> value : values) {
-      processElement(windowMapping, results, value);
+      processElement(results, value);
     }
 
     // Trigger output from any window that was triggered by merging or processing elements.
@@ -160,6 +260,20 @@ public void processElements(Iterable<WindowedValue<InputT>> values) {
       handleTriggerResult(
           contextFactory.base(result.getKey()), false/*isEndOfWindow*/, result.getValue());
     }
+
+    // We're all done with merging and emitting elements so can compress the activeWindow state.
+    activeWindows.removeEphemeralWindows();
+  }
+
+  public void persist() {
+    activeWindows.persist();
+  }
+
+  /** Is {@code window} expired w.r.t. the garbage collection watermark? */
+  private boolean canDropDueToExpiredWindow(W window) {
+    Instant inputWM = timerInternals.currentInputWatermarkTime();
+    return inputWM != null
+        && window.maxTimestamp().plus(windowingStrategy.getAllowedLateness()).isBefore(inputWM);
   }
 
   /**
@@ -168,34 +282,43 @@ public void processElements(Iterable<WindowedValue<InputT>> values) {
    * @param results an output parameter that accumulates all of the windows that have had the
    *     trigger return FIRE or FIRE_AND_FINISH. Once present in this map, it is no longer
    *     necessary to evaluate triggers for the given window.
-   * @return A function which maps the initial windows of the values to the intermediate windows
-   *     they should be processed in.
    */
-  private Function<W, W> premergeForValues(
+  private void collectAndMergeWindows(
       Iterable<WindowedValue<InputT>> values, final Map<W, TriggerResult> results) {
-    // Add the windows from the values to the active window set, and keep track of which ones
-    // were not previously in the active window set.
-    Set<W> newWindows = addToActiveWindows(values);
+    Set<W> currentlyActiveWindows = Sets.newHashSet(activeWindows.getActiveWindows());
 
-    // Merge all of the active windows and retain a mapping from source windows to result windows.
-    final Map<W, W> sourceWindowsToResultWindows = mergeActiveWindows(results);
+    // Collect the windows from all elements (except those which are too late) and
+    // make sure they are already in the active window set or are added as NEW windows.
+    for (WindowedValue<?> value : values) {
+      for (BoundedWindow untypedWindow : value.getWindows()) {
+        @SuppressWarnings("unchecked")
+        W window = (W) untypedWindow;
 
-    // For any new windows that survived merging, make sure we've scheduled cleanup
-    for (W window : newWindows) {
-      if (activeWindows.contains(window)) {
-        scheduleEndOfWindowOrGarbageCollectionTimer(contextFactory.base(window));
+        if (canDropDueToExpiredWindow(window)) {
+          // This element is too late to contribute to this window.
+          // We will update the counter for this in the corresponding processElement call.
+          continue;
+        }
+
+        ReduceFn<K, InputT, OutputT, W>.Context context = contextFactory.base(window);
+        if (triggerRunner.isClosed(context.state())) {
+          // This window has already been closed.
+          // We will update the counter for this in the corresponding processElement call.
+          continue;
+        }
+        // Add this window as NEW if we've not yet seen it.
+        activeWindows.addNew(window);
       }
     }
 
-    // Update our window mapping function.
-    return new Function<W, W>() {
-      @Override
-      public W apply(W input) {
-        W result = sourceWindowsToResultWindows.get(input);
-        // If null, the initial window wasn't subject to any merging.
-        return result == null ? input : result;
-      }
-    };
+    // Merge all of the active windows and retain a mapping from source windows to result windows.
+    mergeActiveWindows(results);
+
+    // Make sure we've scheduled timers for any ACTIVE windows we just introduced.
+    // (Timers for ACTIVE windows which are now MERGED will have been discarded above.)
+    for (W window : Sets.difference(activeWindows.getActiveWindows(), currentlyActiveWindows)) {
+      scheduleEndOfWindowOrGarbageCollectionTimer(contextFactory.base(window));
+    }
   }
 
   /**
@@ -204,41 +327,62 @@ public W apply(W input) {
    * @param results an output parameter that accumulates all of the windows that have had the
    *     trigger return FIRE or FIRE_AND_FINISH. Once present in this map, it is no longer
    *     necessary to evaluate triggers for the given window.
-   * @return A map from initial windows of the values to the intermediate windows they should be
-   *     processed in. The domain will be the windows that were merged into intermediate windows
-   *     and the range is the intermediate windows that exist in the active window set.
    */
-  private Map<W, W> mergeActiveWindows(final Map<W, TriggerResult> results) {
-    final Map<W, W> sourceWindowsToResultWindows =
-        Maps.newHashMapWithExpectedSize(activeWindows.size());
-
+  private void mergeActiveWindows(final Map<W, TriggerResult> results) {
     try {
       activeWindows.merge(new MergeCallback<W>() {
         @Override
-        public void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResultNew)
+        public void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W mergeResult)
             throws Exception {
-          // We only need to call onMerge with windows that were previously persisted.
-          Collection<W> originalWindows = activeWindows.originalWindows(mergedWindows);
-          if (!originalWindows.isEmpty()) {
-            TriggerResult result =
-                ReduceFnRunner.this.onMerge(originalWindows, resultWindow, isResultNew);
-            if (result.isFire()) {
-              results.put(resultWindow, result);
-            }
-          } else {
-            // If there were no windows, then merging didn't rearrange the cleanup timers. Make
-            // sure that we have one properly scheduled
-            scheduleEndOfWindowOrGarbageCollectionTimer(contextFactory.base(resultWindow));
+          // At this point activeWindows has already incorporated the results of the merge.
+          ReduceFn<K, InputT, OutputT, W>.OnMergeContext mergeResultContext =
+              contextFactory.forMerge(toBeMerged, mergeResult);
+
+          // Prefetch various state.
+          triggerRunner.prefetchForMerge(mergeResultContext.state());
+
+          // Run the reduceFn to perform any needed merging.
+          try {
+            reduceFn.onMerge(mergeResultContext);
+          } catch (Exception e) {
+            throw wrapMaybeUserException(e);
           }
 
-          for (W mergedWindow : mergedWindows) {
-            sourceWindowsToResultWindows.put(mergedWindow, resultWindow);
+          // Merge the watermark holds if the output time function is not just MIN.
+          // Otherwise, leave all the merging window watermark holds where they are.
+          watermarkHold.onMerge(mergeResultContext);
+
+          // Have the trigger merge state as needed, and handle the result.
+          TriggerResult result;
+          try {
+            result = triggerRunner.onMerge(mergeResultContext);
+          } catch (Exception e) {
+            Throwables.propagateIfPossible(e);
+            throw new RuntimeException("Failed to merge the triggers", e);
+          }
 
-            // If the window wasn't in the persisted original set, then we scheduled cleanup above
-            // but didn't pass it to merge to have the cleanup canceled. Do so here
-            if (!originalWindows.contains(mergedWindow)) {
-              cancelEndOfWindowAndGarbageCollectionTimers(contextFactory.base(mergedWindow));
+          if (result.isFire()) {
+            results.put(mergeResult, result);
+          }
+
+          for (W active : activeToBeMerged) {
+            if (active.equals(mergeResult)) {
+              // Not merged away.
+              continue;
             }
+            WindowTracing.debug("ReduceFnRunner.mergeActiveWindows/onMerge: Merging {} into {}",
+                active, mergeResult);
+            // Currently ACTIVE window is about to become MERGED.
+            ReduceFn<K, InputT, OutputT, W>.Context clearContext = contextFactory.base(active);
+            // We are going to take care of any cleanup now, so cancel timers.
+            cancelEndOfWindowAndGarbageCollectionTimers(clearContext);
+            // All the trigger state has been merged. Clear any tombstones.
+            triggerRunner.clearEverything(clearContext);
+            // We no longer care about any previous panes of merged away windows. The
+            // merge result window gets to start fresh if it is new.
+            paneInfoTracker.clear(clearContext.state());
+            // Any reduceFn state, watermark holds and non-empty pane state have either been
+            // merged away or will be lazily merged when the next pane fires.
           }
         }
       });
@@ -246,78 +390,36 @@ public void onMerge(Collection<W> mergedWindows, W resultWindow, boolean isResul
       Throwables.propagateIfPossible(e);
       throw new RuntimeException("Exception while merging windows", e);
     }
-    return sourceWindowsToResultWindows;
-  }
-
-  /** Is the {@code window} expired w.r.t. the garbage collection watermark? */
-  private Predicate<W> canDropDueToExpiredWindow = new Predicate<W>() {
-    @Override
-    public boolean apply(W window) {
-      Instant inputWM = timerInternals.currentInputWatermarkTime();
-      return inputWM != null
-          && window.maxTimestamp().plus(windowingStrategy.getAllowedLateness()).isBefore(inputWM);
-    }
-  };
-
-  /**
-   * Add the initial windows from each of the values to the active window set. Returns the set of
-   * new windows.
-   */
-  private Set<W> addToActiveWindows(Iterable<WindowedValue<InputT>> values) {
-    Set<W> newWindows = new HashSet<>();
-    for (WindowedValue<?> value : values) {
-
-      for (BoundedWindow untypedWindow : value.getWindows()) {
-        @SuppressWarnings("unchecked")
-        W window = (W) untypedWindow;
-
-        if (canDropDueToExpiredWindow.apply(window)) {
-          // This value will be dropped (and reported in a counter) by processElement.
-          // Hence it won't contribute to any new window.
-          continue;
-        }
-
-        ReduceFn<K, InputT, OutputT, W>.Context context = contextFactory.base(window);
-        if (!triggerRunner.isClosed(context.state())) {
-          if (activeWindows.add(window)) {
-            newWindows.add(window);
-          }
-        }
-      }
-    }
-    return newWindows;
   }
 
   /**
-   * @param windowMapping a function which maps windows associated with the value to the window that
-   *     it was merged into, and in which we should actually process the element
    * @param results a record of all of the windows that have had the trigger return FIRE or
    *     FIRE_AND_FINISH. Once present in this map, it is no longer necessary to evaluate triggers
    *     for the given result.
    * @param value the value being processed
    */
-  private void processElement(
-      Function<W, W> windowMapping, Map<W, TriggerResult> results, WindowedValue<InputT> value) {
-
-    // Only consider representative windows from among all windows in equivalence classes
-    // induced by window merging.
+  private void processElement(Map<W, TriggerResult> results, WindowedValue<InputT> value) {
+    // Redirect element windows to the ACTIVE windows they have been merged into.
+    // It is possible two of the element's windows have been merged into the same window.
+    // In that case we'll process the same element for the same window twice.
     @SuppressWarnings("unchecked")
-    FluentIterable<W> mappedWindows =
-        FluentIterable.from((Collection<W>) value.getWindows())
-        .transform(windowMapping);
-
-    // Some windows may be expired
-    Iterable<W> windows = mappedWindows.filter(Predicates.not(canDropDueToExpiredWindow));
-
-    // Count the number of elements that are dropped
-    for (W expiredWindow : mappedWindows.filter(canDropDueToExpiredWindow)) {
+    Collection<W> windows = new ArrayList<>();
+    for (BoundedWindow untypedWindow : value.getWindows()) {
+      @SuppressWarnings("unchecked")
+      W window = (W) untypedWindow;
+      if (canDropDueToExpiredWindow(window)) {
+        // The element is too late for this window.
         droppedDueToLateness.addValue(1L);
         WindowTracing.debug(
-            "processElement: Dropping element at {} for key:{} and window:{} since window is "
-                + "too far behind inputWatermark:{}; outputWatermark:{}",
-                value.getTimestamp(), key, expiredWindow,
-                timerInternals.currentInputWatermarkTime(),
-                timerInternals.currentOutputWatermarkTime());
+            "ReduceFnRunner.processElement: Dropping element at {} for key:{}; window:{} "
+            + "since too far behind inputWatermark:{}; outputWatermark:{}",
+            value.getTimestamp(), key, window, timerInternals.currentInputWatermarkTime(),
+            timerInternals.currentOutputWatermarkTime());
+      } else {
+        W active = activeWindows.representative(window);
+        Preconditions.checkState(active != null, "Window %s should have been added", window);
+        windows.add(active);
+      }
     }
 
     // Prefetch in each of the windows if we're going to need to process triggers
@@ -329,15 +431,20 @@ private void processElement(
       }
     }
 
-    // And process each of the windows
+    // Process the element for each (representative) window it belongs to.
     for (W window : windows) {
       ReduceFn<K, InputT, OutputT, W>.ProcessValueContext context =
           contextFactory.forValue(window, value.getValue(), value.getTimestamp());
 
       // Check to see if the triggerRunner thinks the window is closed. If so, drop that window.
       if (!results.containsKey(window) && triggerRunner.isClosed(context.state())) {
-          droppedDueToClosedWindow.addValue(1L);
-          continue;
+        droppedDueToClosedWindow.addValue(1L);
+        WindowTracing.debug(
+            "ReduceFnRunner.processElement: Dropping element at {} for key:{}; window:{} "
+            + "since window is no longer active at inputWatermark:{}; outputWatermark:{}",
+            value.getTimestamp(), key, window, timerInternals.currentInputWatermarkTime(),
+            timerInternals.currentOutputWatermarkTime());
+        continue;
       }
 
       nonEmptyPanes.recordContent(context);
@@ -376,67 +483,6 @@ private void processElement(
     }
   }
 
-  /**
-   * Make sure that all the state built up in this runner has been persisted.
-   */
-  public void persist() {
-    activeWindows.persist();
-  }
-
-  /**
-   * Called when windows merge.
-   */
-  public TriggerResult onMerge(
-      Collection<W> mergedWindows, W resultWindow, boolean isResultWindowNew) {
-    ReduceFn<K, InputT, OutputT, W>.OnMergeContext resultContext =
-        contextFactory.forMerge(mergedWindows, resultWindow);
-
-    // Schedule state reads for trigger execution.
-    triggerRunner.prefetchForMerge(resultContext.state());
-
-    // Run the reduceFn to perform any needed merging.
-    try {
-      reduceFn.onMerge(resultContext);
-    } catch (Exception e) {
-      throw wrapMaybeUserException(e);
-    }
-
-    // Merge the watermark hold
-    watermarkHold.mergeHolds(resultContext);
-
-    // Have the trigger merge state as needed, and handle the result.
-    TriggerResult triggerResult;
-    try {
-      triggerResult = triggerRunner.onMerge(resultContext);
-    } catch (Exception e) {
-      Throwables.propagateIfPossible(e);
-      throw new RuntimeException("Failed to merge the triggers", e);
-    }
-
-    // Cleanup the trigger state in the old windows.
-    for (W mergedWindow : mergedWindows) {
-      if (!mergedWindow.equals(resultWindow)) {
-        try {
-          ReduceFn<K, InputT, OutputT, W>.Context mergedContext = contextFactory.base(mergedWindow);
-          cancelEndOfWindowAndGarbageCollectionTimers(mergedContext);
-          triggerRunner.clearEverything(mergedContext);
-          paneInfo.clear(mergedContext.state());
-        } catch (Exception e) {
-          Throwables.propagateIfPossible(e);
-          throw new RuntimeException("Exception while clearing trigger state", e);
-        }
-      }
-    }
-
-    // Schedule cleanup if the window is new. Do this after cleaning up the old state in case one
-    // of them had a timer at the same point.
-    if (isResultWindowNew) {
-      scheduleEndOfWindowOrGarbageCollectionTimer(resultContext);
-    }
-
-    return triggerResult;
-  }
-
   /**
    * Called when an end-of-window, garbage collection, or trigger-specific timer fires.
    */
@@ -447,10 +493,11 @@ public void onTimer(TimerData timer) {
     @SuppressWarnings("unchecked")
     WindowNamespace<W> windowNamespace = (WindowNamespace<W>) timer.getNamespace();
     W window = windowNamespace.getWindow();
-    // If the window is subject to merging then all timers should have been cleared upon merge.
-    Preconditions.checkState(
-        !windowingStrategy.getWindowFn().isNonMerging() || activeWindows.contains(window),
-        "Received timer %s for inactive window %s", timer, window);
+
+    if (!activeWindows.isActive(window)) {
+      WindowTracing.debug(
+          "ReduceFnRunner.onTimer: Note that timer {} is for non-ACTIVE window {}", timer, window);
+    }
 
     ReduceFn<K, InputT, OutputT, W>.Context context = contextFactory.base(window);
 
@@ -469,19 +516,19 @@ public void onTimer(TimerData timer) {
 
     if (isGarbageCollection) {
       WindowTracing.debug(
-          "onTimer: Cleaning up for key:{}; window:{} at {} with "
+          "ReduceFnRunner.onTimer: Cleaning up for key:{}; window:{} at {} with "
           + "inputWatermark:{}; outputWatermark:{}",
           key, window, timer.getTimestamp(), timerInternals.currentInputWatermarkTime(),
           timerInternals.currentOutputWatermarkTime());
 
-      if (activeWindows.contains(window) && !triggerRunner.isClosed(context.state())) {
+      if (activeWindows.isActive(window) && !triggerRunner.isClosed(context.state())) {
         // We need to call onTrigger to emit the final pane if required.
         // The final pane *may* be ON_TIME if:
         // - AllowedLateness = 0 (ie the timer is at end-of-window), and;
         // - The trigger fires on the end-of-window timer.
         boolean isWatermarkTrigger =
             isEndOfWindowTimer && runTriggersForTimer(context, timer).isFire();
-        onTrigger(context, isWatermarkTrigger, true/*isFinish*/);
+        onTrigger(context, isWatermarkTrigger, true/*isFinish*/, false/*willStillBeActive*/);
       }
 
       // Clear all the state for this window since we'll never see elements for it again.
@@ -494,26 +541,26 @@ public void onTimer(TimerData timer) {
       }
     } else {
       WindowTracing.debug(
-          "onTimer: Triggering for key:{}; window:{} at {} with "
+          "ReduceFnRunner.onTimer: Triggering for key:{}; window:{} at {} with "
           + "inputWatermark:{}; outputWatermark:{}",
           key, window, timer.getTimestamp(), timerInternals.currentInputWatermarkTime(),
           timerInternals.currentOutputWatermarkTime());
-      boolean isFinish = false;
-      if (activeWindows.contains(window) && !triggerRunner.isClosed(context.state())) {
+      if (activeWindows.isActive(window) && !triggerRunner.isClosed(context.state())) {
         TriggerResult result = runTriggersForTimer(context, timer);
         handleTriggerResult(context, isEndOfWindowTimer, result);
-        isFinish = result.isFinish();
       }
 
-      if (isEndOfWindowTimer && !isFinish) {
+      if (isEndOfWindowTimer) {
         // Since we are processing an on-time firing we should schedule the garbage collection
         // timer. (If getAllowedLateness is zero then the timer event will be considered a
         // cleanup event and handled by the above).
+        // Note we must do this even if the trigger is finished so that we are sure to cleanup
+        // any final trigger tombstones.
         Preconditions.checkState(
             windowingStrategy.getAllowedLateness().isLongerThan(Duration.ZERO),
             "Unexpected zero getAllowedLateness");
         WindowTracing.debug(
-            "onTimer: Scheduling cleanup timer for key:{}; window:{} at {} with "
+            "ReduceFnRunner.onTimer: Scheduling cleanup timer for key:{}; window:{} at {} with "
             + "inputWatermark:{}; outputWatermark:{}",
             key, context.window(), cleanupTime, timerInternals.currentInputWatermarkTime(),
             timerInternals.currentOutputWatermarkTime());
@@ -545,15 +592,20 @@ private TriggerResult runTriggersForTimer(
    * </ol>
    */
   private void clearAllState(ReduceFn<K, InputT, OutputT, W>.Context context) throws Exception {
-    nonEmptyPanes.clearPane(context);
-    try {
-      reduceFn.clearState(context);
-    } catch (Exception e) {
-      throw wrapMaybeUserException(e);
+    boolean isActive = activeWindows.isActive(context.window());
+    watermarkHold.clearHolds(context, isActive);
+    if (isActive) {
+      // The trigger never finished, so make sure we clear any remaining state.
+      try {
+        reduceFn.clearState(context);
+      } catch (Exception e) {
+        throw wrapMaybeUserException(e);
+      }
+      nonEmptyPanes.clearPane(context);
+      activeWindows.remove(context.window());
     }
     triggerRunner.clearEverything(context);
-    paneInfo.clear(context.state());
-    watermarkHold.clear(context);
+    paneInfoTracker.clear(context.state());
   }
 
   /** Should the reduce function state be cleared? */
@@ -582,37 +634,51 @@ private void handleTriggerResult(ReduceFn<K, InputT, OutputT, W>.Context context
     // If the trigger fired due to an end-of-window timer, treat it as an AfterWatermark trigger.
     boolean isWatermarkTrigger = isEndOfWindow;
 
+    // Will be able to clear all element state after triggering?
+    boolean shouldDiscard = shouldDiscardAfterFiring(result);
+
     // Run onTrigger to produce the actual pane contents.
     // As a side effect it will clear all element holds, but not necessarily any
     // end-of-window or garbage collection holds.
-    onTrigger(context, isWatermarkTrigger, result.isFinish());
+    onTrigger(context, isWatermarkTrigger, result.isFinish(), !shouldDiscard);
 
     // Now that we've triggered, the pane is empty.
     nonEmptyPanes.clearPane(context);
 
     // Cleanup buffered data if appropriate
-    if (shouldDiscardAfterFiring(result)) {
-      // Clear the reduceFn state
+    if (shouldDiscard) {
+      // Clear the reduceFn state across all windows in the equivalence class for the current
+      // window.
       try {
         reduceFn.clearState(context);
       } catch (Exception e) {
         throw wrapMaybeUserException(e);
       }
 
-      // Remove the window from active set -- nothing is buffered.
+      // Remove the window from active set.
+      // This will forget the equivalence class for this window.
+      WindowTracing.debug("ReduceFnRunner.handleTriggerResult: removing {}", context.window());
       activeWindows.remove(context.window());
+
+      if (!result.isFinish()) {
+        // We still need to consider this window active since we may have had to add an
+        // end-of-window or garbage collection hold above.
+        activeWindows.addActive(context.window());
+      }
     }
 
     if (result.isFinish()) {
-      // If we're finishing, clear up the trigger tree as well.
-      // However, we'll leave behind a tombstone so we know the trigger is finished.
+      // If we're finishing, eagerly clear state to reduce pressure on the backend.
+      // Leave behind a tombstone in the trigger runner so we know the trigger is finished.
       try {
         triggerRunner.clearState(context);
-        paneInfo.clear(context.state());
+        paneInfoTracker.clear(context.state());
       } catch (Exception e) {
         Throwables.propagateIfPossible(e);
         throw new RuntimeException("Exception while clearing trigger state", e);
       }
+      // No more watermark holds will be placed (even for end-of-window or garbage
+      // collection holds).
     }
   }
 
@@ -644,12 +710,12 @@ private boolean needToEmit(
    * @param isFinish true if this will be the last triggering processed
    */
   private void onTrigger(final ReduceFn<K, InputT, OutputT, W>.Context context,
-      boolean isWatermarkTrigger, boolean isFinish) {
+      boolean isWatermarkTrigger, boolean isFinish, boolean willStillBeActive) {
     // Collect state.
     StateContents<Instant> outputTimestampFuture =
-        watermarkHold.extractAndRelease(context, isFinish);
+        watermarkHold.extractAndRelease(context, isFinish, willStillBeActive);
     StateContents<PaneInfo> paneFuture =
-        paneInfo.getNextPaneInfo(context, isWatermarkTrigger, isFinish);
+        paneInfoTracker.getNextPaneInfo(context, isWatermarkTrigger, isFinish);
     StateContents<Boolean> isEmptyFuture = nonEmptyPanes.isEmpty(context);
 
     reduceFn.prefetchOnTrigger(context.state());
@@ -669,7 +735,7 @@ private void onTrigger(final ReduceFn<K, InputT, OutputT, W>.Context context,
             public void output(OutputT toOutput) {
               // We're going to output panes, so commit the (now used) PaneInfo.
               // TODO: Unnecessary if isFinal?
-              paneInfo.storeCurrentPaneInfo(context, pane);
+              paneInfoTracker.storeCurrentPaneInfo(context, pane);
 
               // Output the actual value.
               windowingInternals.outputWindowedValue(
@@ -696,7 +762,7 @@ private void scheduleEndOfWindowOrGarbageCollectionTimer(ReduceFn<?, ?, ?, W>.Co
           "Asking to set a timer at %s behind input watermark %s", fireTime, inputWM);
     }
     WindowTracing.trace(
-        "scheduleTimer: Scheduling {} timer at {} for "
+        "ReduceFnRunner.scheduleEndOfWindowOrGarbageCollectionTimer: Scheduling {} timer at {} for "
         + "key:{}; window:{} where inputWatermark:{}; outputWatermark:{}",
         which, fireTime, key, context.window(), inputWM,
         timerInternals.currentOutputWatermarkTime());
@@ -705,7 +771,7 @@ private void scheduleEndOfWindowOrGarbageCollectionTimer(ReduceFn<?, ?, ?, W>.Co
 
   private void cancelEndOfWindowAndGarbageCollectionTimers(ReduceFn<?, ?, ?, W>.Context context) {
     WindowTracing.debug(
-        "cancelTimer: Deleting timers for "
+        "ReduceFnRunner.cancelEndOfWindowAndGarbageCollectionTimers: Deleting timers for "
         + "key:{}; window:{} where inputWatermark:{}; outputWatermark:{}",
         key, context.window(), timerInternals.currentInputWatermarkTime(),
         timerInternals.currentOutputWatermarkTime());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
index ab55d3f0b306c..d7d77f9a330b9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
@@ -98,7 +98,7 @@ public SystemReduceFn(StateTag<? extends MergeableState<InputT, OutputT>> buffer
 
   @Override
   public void processValue(ProcessValueContext c) throws Exception {
-    c.state().access(bufferTag).add(c.value());
+    c.state().accessAcrossMergedWindows(bufferTag).add(c.value());
   }
 
   @Override
@@ -125,6 +125,8 @@ public void clearState(Context c) throws Exception {
 
   @Override
   public StateContents<Boolean> isEmpty(StateContext state) {
+    // Since we only check for empty element sets when a trigger fires it's unreasonable
+    // to require a prefetch.
     return state.accessAcrossMergedWindows(bufferTag).isEmpty();
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
index 64c6de39b36eb..4dfef33e1de16 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
@@ -252,7 +252,7 @@ public TriggerStateContextImpl(ActiveWindowSet<W> activeWindows,
 
       // Annoyingly, since we hadn't set the triggerIndex yet (we can't do it before super)
       // This will would otherwise have incorporated 0 as the trigger index.
-      this.namespace = namespaceFor(window);
+      this.windowNamespace = namespaceFor(window);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
index 34116057dc333..7397189ae94f3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
@@ -45,8 +45,8 @@
  * @param <W> The kind of windows being processed.
  */
 public class TriggerRunner<W extends BoundedWindow> {
-
-  @VisibleForTesting static final StateTag<ValueState<BitSet>> FINISHED_BITS_TAG =
+  @VisibleForTesting
+  static final StateTag<ValueState<BitSet>> FINISHED_BITS_TAG =
       StateTags.makeSystemTagInternal(StateTags.value("closed", BitSetCoder.of()));
 
   private final ExecutableTrigger<W> rootTrigger;
@@ -75,6 +75,13 @@ public boolean isClosed(ReduceFn.StateContext state) {
     return readFinishedBits(state.access(FINISHED_BITS_TAG)).get(0);
   }
 
+  public void prefetchForValue(ReduceFn.StateContext state) {
+    if (isFinishedSetNeeded()) {
+      state.access(FINISHED_BITS_TAG).get();
+    }
+    rootTrigger.getSpec().prefetchOnElement(state);
+  }
+
   /**
    * Run the trigger logic to deal with a new value.
    */
@@ -87,6 +94,16 @@ public TriggerResult processValue(ReduceFn<?, ?, ?, W>.ProcessValueContext c) th
     return result;
   }
 
+  public void prefetchForMerge(ReduceFn.MergingStateContext state) {
+    if (isFinishedSetNeeded()) {
+      for (ValueState<?> value :
+          state.mergingAccessInEachMergingWindow(FINISHED_BITS_TAG).values()) {
+        value.get();
+      }
+    }
+    rootTrigger.getSpec().prefetchOnMerge(state);
+  }
+
   /**
    * Run the trigger merging logic as part of executing the specified merge.
    */
@@ -97,11 +114,10 @@ public TriggerResult onMerge(ReduceFn<?, ?, ?, W>.OnMergeContext c) throws Excep
     // And read the finished bits in each merging window.
     ImmutableMap.Builder<W, BitSet> mergingFinishedSets = ImmutableMap.builder();
     Map<BoundedWindow, ValueState<BitSet>> mergingFinishedSetState =
-        c.state().accessInEachMergingWindow(FINISHED_BITS_TAG);
+        c.state().mergingAccessInEachMergingWindow(FINISHED_BITS_TAG);
     for (W window : c.mergingWindows()) {
       // Don't need to clone these, since the trigger context doesn't allow modification
-      mergingFinishedSets.put(window,
-          readFinishedBits(mergingFinishedSetState.get(window)));
+      mergingFinishedSets.put(window, readFinishedBits(mergingFinishedSetState.get(window)));
     }
 
     Trigger<W>.OnMergeContext mergeContext =
@@ -117,6 +133,13 @@ public TriggerResult onMerge(ReduceFn<?, ?, ?, W>.OnMergeContext c) throws Excep
     return result.getTriggerResult();
   }
 
+  public void prefetchForTimer(ReduceFn.StateContext state) {
+    if (isFinishedSetNeeded()) {
+      state.access(FINISHED_BITS_TAG).get();
+    }
+    rootTrigger.getSpec().prefetchOnElement(state);
+  }
+
   /**
    * Run the trigger logic appropriate for receiving a timer with the specified destination ID.
    */
@@ -166,29 +189,6 @@ public void clearEverything(ReduceFn<?, ?, ?, W>.Context c) throws Exception {
     }
   }
 
-  public void prefetchForValue(ReduceFn.StateContext state) {
-    if (isFinishedSetNeeded()) {
-      state.access(FINISHED_BITS_TAG).get();
-    }
-    rootTrigger.getSpec().prefetchOnElement(state);
-  }
-
-  public void prefetchForMerge(ReduceFn.MergingStateContext state) {
-    if (isFinishedSetNeeded()) {
-      for (ValueState<?> value : state.accessInEachMergingWindow(FINISHED_BITS_TAG).values()) {
-        value.get();
-      }
-    }
-    rootTrigger.getSpec().prefetchOnMerge(state);
-  }
-
-  public void prefetchForTimer(ReduceFn.StateContext state) {
-    if (isFinishedSetNeeded()) {
-      state.access(FINISHED_BITS_TAG).get();
-    }
-    rootTrigger.getSpec().prefetchOnElement(state);
-  }
-
   private boolean isFinishedSetNeeded() {
     // TODO: If we know that no trigger in the tree will ever finish, we don't need to do the
     // lookup. Right now, we special case this for the DefaultTrigger.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
index bac3af11f7b58..0c6413eacc7d4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
@@ -178,7 +178,7 @@ public WatermarkHold(TimerInternals timerInternals, WindowingStrategy<?, W> wind
    */
   public void addHolds(ReduceFn<?, ?, ?, W>.ProcessValueContext context) {
     if (!addElementHold(context)) {
-      addEndOfWindowOrGarbageCollectionHolds(context);
+      addEndOfWindowOrGarbageCollectionHolds(context, true);
     }
   }
 
@@ -220,7 +220,7 @@ private boolean addElementHold(ReduceFn<?, ?, ?, W>.ProcessValueContext context)
       tooLate = true;
     } else {
       tooLate = false;
-      context.state().access(elementHoldTag).add(elementHold);
+      context.state().accessAcrossMergedWindows(elementHoldTag).add(elementHold);
     }
     WindowTracing.trace(
         "WatermarkHold.addHolds: element hold at {} is {} for "
@@ -237,9 +237,10 @@ private boolean addElementHold(ReduceFn<?, ?, ?, W>.ProcessValueContext context)
    * <p>The end-of-window hold guarantees that an empty {@code ON_TIME} pane can be given
    * a timestamp which will not be considered beyond allowed lateness by any downstream computation.
    */
-  private void addEndOfWindowOrGarbageCollectionHolds(ReduceFn<?, ?, ?, W>.Context context) {
-    if (!addEndOfWindowHold(context)) {
-      addGarbageCollectionHold(context);
+  private void addEndOfWindowOrGarbageCollectionHolds(
+      ReduceFn<?, ?, ?, W>.Context context, boolean isActive) {
+    if (!addEndOfWindowHold(context, isActive)) {
+      addGarbageCollectionHold(context, isActive);
     }
   }
 
@@ -249,7 +250,7 @@ private void addEndOfWindowOrGarbageCollectionHolds(ReduceFn<?, ?, ?, W>.Context
    * <p>The end-of-window hold guarantees that any empty {@code ON_TIME} pane can be given
    * a timestamp which will not be considered beyond allowed lateness by any downstream computation.
    */
-  private boolean addEndOfWindowHold(ReduceFn<?, ?, ?, W>.Context context) {
+  private boolean addEndOfWindowHold(ReduceFn<?, ?, ?, W>.Context context, boolean isActive) {
     // Only add an end-of-window hold if we can be sure the end-of-window timer
     // has not yet fired. Otherwise we risk holding up the output watermark until
     // the garbage collection timer fires, which may be a very long time in the future.
@@ -263,13 +264,21 @@ private boolean addEndOfWindowHold(ReduceFn<?, ?, ?, W>.Context context) {
       tooLate = false;
       Preconditions.checkState(outputWM == null || !eowHold.isBefore(outputWM),
           "End-of-window hold %s cannot be before output watermark %s", eowHold, outputWM);
-      context.state().access(EXTRA_HOLD_TAG).add(eowHold);
+      if (isActive) {
+        context.state().accessAcrossMergedWindows(EXTRA_HOLD_TAG).add(eowHold);
+      } else {
+        // The window is not currently ACTIVE, so we can't use accessAcrossMergedWindows
+        // to collect its state. Instead, store the holds under the window itself. The
+        // caller will be responsible for ensuring the active window set now considers this
+        // window ACTIVE.
+        context.state().access(EXTRA_HOLD_TAG).add(eowHold);
+      }
     }
     WindowTracing.trace(
-        "WatermarkHold.addEndOfWindowHold: end-of-window hold at {} is {} for "
+        "WatermarkHold.addEndOfWindowHold: end-of-window hold for %s at {} is {} for "
         + "key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
-        eowHold, tooLate ? "too late" : "on-time", context.key(), context.window(), inputWM,
-        outputWM);
+        isActive ? "active" : "inactive", eowHold, tooLate ? "too late" : "on-time", context.key(),
+        context.window(), inputWM, outputWM);
     return !tooLate;
   }
 
@@ -281,7 +290,7 @@ private boolean addEndOfWindowHold(ReduceFn<?, ?, ?, W>.Context context) {
    * computation. If we are sure no empty final panes can be emitted then there's no need
    * for an additional hold.
    */
-  private void addGarbageCollectionHold(ReduceFn<?, ?, ?, W>.Context context) {
+  private void addGarbageCollectionHold(ReduceFn<?, ?, ?, W>.Context context, boolean isActive) {
     // Only add a garbage collection hold if we are sure we need an empty final pane and
     // the window will be garbage collected after the end-of-window trigger.
     if (context.windowingStrategy().getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS
@@ -290,21 +299,30 @@ private void addGarbageCollectionHold(ReduceFn<?, ?, ?, W>.Context context) {
       Instant outputWM = timerInternals.currentOutputWatermarkTime();
       Instant inputWM = timerInternals.currentInputWatermarkTime();
       WindowTracing.trace(
-          "WatermarkHold.addGarbageCollectionHold: garbage collection hold at {} for "
+          "WatermarkHold.addGarbageCollectionHold: garbage collection hold for %s at {} for "
           + "key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
-          gcHold, context.key(), context.window(), inputWM, outputWM);
+          isActive ? "active" : "inactive", gcHold, context.key(), context.window(), inputWM,
+          outputWM);
       Preconditions.checkState(inputWM == null || !gcHold.isBefore(inputWM),
           "Garbage collection hold %s cannot be before input watermark %s", gcHold, inputWM);
-      context.state().access(EXTRA_HOLD_TAG).add(gcHold);
+      if (isActive) {
+        context.state().accessAcrossMergedWindows(EXTRA_HOLD_TAG).add(gcHold);
+      } else {
+        // See comment above for addEndOfWindowHold.
+        context.state().access(EXTRA_HOLD_TAG).add(gcHold);
+      }
     }
   }
 
   /**
-   * Updates the watermark hold when windows merge. For example, if the new window implies
-   * a later watermark hold, then earlier holds may be released.
+   * Updates the watermark hold when windows merge if it is possible the merged value does
+   * not equal all of the existing holds. For example, if the new window implies a later
+   * watermark hold, then earlier holds may be released.
+   *
+   * <p>Note that state may be left behind in merged windows.
    */
-  public void mergeHolds(final ReduceFn<?, ?, ?, W>.OnMergeContext context) {
-    WindowTracing.debug("mergeHolds: for key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
+  public void onMerge(final ReduceFn<?, ?, ?, W>.OnMergeContext context) {
+    WindowTracing.debug("onMerge: for key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
         context.key(), context.window(), timerInternals.currentInputWatermarkTime(),
         timerInternals.currentOutputWatermarkTime());
     // If the output hold depends only on the window, then there may not be a hold in place
@@ -312,7 +330,7 @@ public void mergeHolds(final ReduceFn<?, ?, ?, W>.OnMergeContext context) {
     if (windowingStrategy.getOutputTimeFn().dependsOnlyOnWindow()) {
       Instant arbitraryTimestamp = new Instant(0);
       context.state()
-          .access(elementHoldTag)
+          .accessAcrossMergedWindows(elementHoldTag)
           .add(windowingStrategy.getOutputTimeFn().assignOutputTime(
               arbitraryTimestamp, context.window()));
     }
@@ -322,21 +340,28 @@ public void mergeHolds(final ReduceFn<?, ?, ?, W>.OnMergeContext context) {
   }
 
   /**
-   * Return (a future for) the earliest data hold for {@code context}. Clear the data hold after
-   * reading. If {@code isFinal}, also clear any end-of-window or garbage collection hold.
+   * Return (a future for) the earliest hold for {@code context}. Clear all the holds after
+   * reading, but add/restore an end-of-window or garbage collection hold if required.
    *
    * <p>The returned timestamp is the output timestamp according to the {@link OutputTimeFn}
    * from the windowing strategy of this {@link WatermarkHold}, combined across all the non-late
-   * elements in the current pane.
+   * elements in the current pane. If there is no such value the timestamp is the end
+   * of the window.
+   *
+   * <p>If {@code willStillBeActive} then any end-of-window or garbage collection holds will
+   * be reestablished in one of the target windows alread in use for this window. Otherwise,
+   * the holds will be placed in this window itself.
    */
-  public StateContents<Instant> extractAndRelease(
-      final ReduceFn<?, ?, ?, W>.Context context, final boolean isFinal) {
+  public StateContents<Instant> extractAndRelease(final ReduceFn<?, ?, ?, W>.Context context,
+      final boolean isFinal, final boolean willStillBeActive) {
     WindowTracing.debug(
         "extractAndRelease: for key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
         context.key(), context.window(), timerInternals.currentInputWatermarkTime(),
         timerInternals.currentOutputWatermarkTime());
     final WatermarkStateInternal elementHoldState =
         context.state().accessAcrossMergedWindows(elementHoldTag);
+    // Since we only extract holds when a trigger fires it is unreasonable to expect
+    // the state to be prefetched.
     final StateContents<Instant> elementHoldFuture = elementHoldState.get();
     final WatermarkStateInternal extraHoldState =
         context.state().accessAcrossMergedWindows(EXTRA_HOLD_TAG);
@@ -374,9 +399,8 @@ public Instant read() {
         elementHoldState.clear();
         extraHoldState.clear();
 
-        // Reinstate the end-of-window and garbage collection holds if still required.
         if (!isFinal) {
-          addEndOfWindowOrGarbageCollectionHolds(context);
+          addEndOfWindowOrGarbageCollectionHolds(context, willStillBeActive);
         }
 
         return hold;
@@ -384,13 +408,22 @@ public Instant read() {
     };
   }
 
-  /** Clear any remaining holds. */
-  public void clear(ReduceFn<?, ?, ?, W>.Context context) {
+  /**
+   * Clear any remaining holds. If {@code isActive} then we assume holds could be placed in any
+   * of the target windows for this window. Otherwise we assume only this window has any
+   * end-of-window or garbage collection holds.
+   */
+  public void clearHolds(ReduceFn<?, ?, ?, W>.Context context, boolean isActive) {
     WindowTracing.debug(
-        "WatermarkHold.clear: for key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
-        context.key(), context.window(), timerInternals.currentInputWatermarkTime(),
-        timerInternals.currentOutputWatermarkTime());
-    context.state().accessAcrossMergedWindows(elementHoldTag).clear();
-    context.state().accessAcrossMergedWindows(EXTRA_HOLD_TAG).clear();
+        "WatermarkHold.clearHolds: For key:{}; %s window:{}; "
+        + "inputWatermark:{}; outputWatermark:{}",
+        context.key(), isActive ? "active" : "inactive", context.window(),
+        timerInternals.currentInputWatermarkTime(), timerInternals.currentOutputWatermarkTime());
+    if (isActive) {
+      context.state().accessAcrossMergedWindows(elementHoldTag).clear();
+      context.state().accessAcrossMergedWindows(EXTRA_HOLD_TAG).clear();
+    } else {
+      context.state().access(EXTRA_HOLD_TAG).clear();
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
index 11a73ad7d2ea9..f47c9ddbcf5d2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
@@ -50,14 +50,10 @@ public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> cod
       public <T> BagState<T> bindBag(StateTag<BagState<T>> address, Coder<T> elemCoder) {
         List<BagState<T>> sources = new ArrayList<>();
         for (StateNamespace sourceNamespace : sourceNamespaces) {
-          // Skip adding the result namespace for now.
-          if (!sourceNamespace.equals(resultNamespace)) {
-            sources.add(state(sourceNamespace, address));
-          }
+          sources.add(state(sourceNamespace, address));
         }
 
         BagState<T> results = state(resultNamespace, address);
-        sources.add(results);
         return new MergedBag<>(sources, results);
       }
 
@@ -68,14 +64,10 @@ CombiningValueStateInternal<InputT, AccumT, OutputT> bindCombiningValue(
           Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
         List<CombiningValueStateInternal<InputT, AccumT, OutputT>> sources = new ArrayList<>();
         for (StateNamespace sourceNamespace : sourceNamespaces) {
-          // Skip adding the result namespace for now.
-          if (!sourceNamespace.equals(resultNamespace)) {
-            sources.add(state(sourceNamespace, address));
-          }
+          sources.add(state(sourceNamespace, address));
         }
         CombiningValueStateInternal<InputT, AccumT, OutputT> result =
             state(resultNamespace, address);
-        sources.add(result);
         return new MergedCombiningValue<>(sources, result, combineFn);
       }
 
@@ -85,13 +77,9 @@ public <W extends BoundedWindow> WatermarkStateInternal bindWatermark(
           OutputTimeFn<? super W> outputTimeFn) {
         List<WatermarkStateInternal> sources = new ArrayList<>();
         for (StateNamespace sourceNamespace : sourceNamespaces) {
-          // Skip adding the result namespace for now.
-          if (!sourceNamespace.equals(resultNamespace)) {
-            sources.add(state(sourceNamespace, address));
-          }
+          sources.add(state(sourceNamespace, address));
         }
         WatermarkStateInternal result = state(resultNamespace, address);
-        sources.add(result);
 
         // It is the responsibility of the SDK to only pass allowed result windows.
         @SuppressWarnings("unchecked")
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
index db041290b9e37..05aba6008fed5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
@@ -123,7 +123,7 @@ public void testAfterProcessingTimeWithMergingWindow() throws Exception {
         TimestampedValue.of(2, new Instant(2))); // in [2, 12), timer for 16
 
     tester.advanceProcessingTime(new Instant(16));
-    // This fires, because the earliest element in [1, 12) arrived at time 10
+    // This fires, because the earliest element in [1, 11) arrived at time 10
     assertThat(tester.extractOutput(), Matchers.contains(
         WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 1, 12)));
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSetTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSetTest.java
new file mode 100644
index 0000000000000..0b1a7521f40b0
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSetTest.java
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
+import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals;
+import com.google.cloud.dataflow.sdk.util.state.StateInternals;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Collection;
+
+/**
+ * Test NonMergingActiveWindowSet.
+ */
+@RunWith(JUnit4.class)
+public class MergingActiveWindowSetTest {
+  private Sessions windowFn;
+  private StateInternals state;
+  private MergingActiveWindowSet<IntervalWindow> set;
+
+  @Before
+  public void before() {
+    windowFn = Sessions.withGapDuration(Duration.millis(10));
+    state = new InMemoryStateInternals();
+    set = new MergingActiveWindowSet<>(windowFn, state);
+  }
+
+  @After
+  public void after() {
+    set = null;
+    state = null;
+    windowFn = null;
+  }
+
+  private void add(final long instant) {
+    System.out.println("ADD " + instant);
+    final Object element = new Long(instant);
+    Sessions.AssignContext context = windowFn.new AssignContext() {
+      @Override
+      public Object element() {
+        return element;
+      }
+
+      @Override
+      public Instant timestamp() {
+        return new Instant(instant);
+      }
+
+      @Override
+      public Collection<? extends BoundedWindow> windows() {
+        return ImmutableList.of();
+      }
+    };
+
+    for (IntervalWindow window : windowFn.assignWindows(context)) {
+      set.addNew(window);
+    }
+  }
+
+  private void merge(ActiveWindowSet.MergeCallback<IntervalWindow> callback) throws Exception {
+    System.out.println("MERGE");
+    set.merge(callback);
+    set.checkInvariants();
+    System.out.println(set);
+  }
+
+  private void pruneAndPersist() {
+    System.out.println("PRUNE");
+    set.removeEphemeralWindows();
+    set.checkInvariants();
+    System.out.println(set);
+    set.persist();
+  }
+
+  private IntervalWindow window(long start, long size) {
+    return new IntervalWindow(new Instant(start), new Duration(size));
+  }
+
+  @Test
+  public void test() throws Exception {
+    @SuppressWarnings("unchecked")
+    ActiveWindowSet.MergeCallback<IntervalWindow> callback =
+        mock(ActiveWindowSet.MergeCallback.class);
+
+    // NEW 1+10
+    // NEW 2+10
+    // NEW 15+10
+    // =>
+    // ACTIVE 1+11 (target 1+11)
+    // EPHEMERAL 1+10 -> 1+11
+    // EPHEMERAL 2+10 -> 1+11
+    // ACTIVE 15+10 (target 15+10)
+    add(1);
+    add(2);
+    add(15);
+    merge(callback);
+    verify(callback).onMerge(ImmutableList.of(window(1, 10), window(2, 10)),
+        ImmutableList.<IntervalWindow>of(), window(1, 11));
+    assertEquals(ImmutableSet.of(window(1, 11), window(15, 10)), set.getActiveWindows());
+    assertEquals(window(1, 11), set.representative(window(1, 10)));
+    assertEquals(window(1, 11), set.representative(window(2, 10)));
+    assertEquals(window(1, 11), set.representative(window(1, 11)));
+    assertEquals(window(15, 10), set.representative(window(15, 10)));
+    assertEquals(
+        ImmutableSet.<IntervalWindow>of(window(1, 11)), set.readStateAddresses(window(1, 11)));
+    assertEquals(
+        ImmutableSet.<IntervalWindow>of(window(15, 10)), set.readStateAddresses(window(15, 10)));
+
+    // NEW 3+10
+    // =>
+    // ACTIVE 1+12 (target 1+11)
+    // EPHEMERAL 3+10 -> 1+12
+    // ACTIVE 15+10 (target 15+10)
+    add(3);
+    merge(callback);
+    verify(callback).onMerge(ImmutableList.of(window(1, 11), window(3, 10)),
+        ImmutableList.<IntervalWindow>of(window(1, 11)), window(1, 12));
+    assertEquals(ImmutableSet.of(window(1, 12), window(15, 10)), set.getActiveWindows());
+    assertEquals(window(1, 12), set.representative(window(3, 10)));
+
+    // NEW 8+10
+    // =>
+    // ACTIVE 1+24 (target 1+11, 15+10)
+    // MERGED 1+11 -> 1+24
+    // MERGED 15+10 -> 1+24
+    // EPHEMERAL 1+12 -> 1+24
+    add(8);
+    merge(callback);
+    verify(callback).onMerge(ImmutableList.of(window(1, 12), window(8, 10), window(15, 10)),
+        ImmutableList.<IntervalWindow>of(window(1, 12), window(15, 10)), window(1, 24));
+    assertEquals(ImmutableSet.of(window(1, 24)), set.getActiveWindows());
+    assertEquals(window(1, 24), set.representative(window(1, 12)));
+    assertEquals(window(1, 24), set.representative(window(1, 11)));
+    assertEquals(window(1, 24), set.representative(window(15, 10)));
+
+    // NEW 9+10
+    // =>
+    // ACTIVE 1+24 (target 1+11, 15+10)
+    add(9);
+    merge(callback);
+    verify(callback).onMerge(ImmutableList.of(window(1, 24), window(9, 10)),
+        ImmutableList.<IntervalWindow>of(window(1, 24)), window(1, 24));
+
+    pruneAndPersist();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
index b789d6bd33613..8327359277b44 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
@@ -66,6 +66,7 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
@@ -218,7 +219,7 @@ private void assertHasOnlyGlobalAndAllowedTags(
     for (W expectedWindow : expectedWindows) {
       expectedWindowsSet.add(windowNamespace(expectedWindow));
     }
-    Set<StateNamespace> actualWindows = new HashSet<>();
+    Map<StateNamespace, Set<StateTag<?>>> actualWindows = new HashMap<>();
 
     for (StateNamespace namespace : stateInternals.getNamespacesInUse()) {
       if (namespace instanceof StateNamespaces.GlobalNamespace) {
@@ -228,7 +229,7 @@ private void assertHasOnlyGlobalAndAllowedTags(
         if (tagsInUse.isEmpty()) {
           continue;
         }
-        actualWindows.add(namespace);
+        actualWindows.put(namespace, tagsInUse);
         Set<StateTag<?>> unexpected = Sets.difference(tagsInUse, allowedTags);
         if (unexpected.isEmpty()) {
           continue;
@@ -243,7 +244,8 @@ private void assertHasOnlyGlobalAndAllowedTags(
       }
     }
 
-    assertEquals(expectedWindowsSet, actualWindows);
+    assertEquals("Still in use: " + actualWindows.toString(), expectedWindowsSet,
+        actualWindows.keySet());
   }
 
   private StateNamespace windowNamespace(W window) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
index ee09e08c37ebd..bf9c6f7bb8dce 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
@@ -154,7 +154,8 @@ public void testMergeBagIntoNewNamespace() throws Exception {
     bag1.add("!");
 
     BagState<String> merged = underTest.mergedState(
-        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_3, STRING_BAG_ADDR, WINDOW_3);
+        Arrays.asList(NAMESPACE_1, NAMESPACE_2, NAMESPACE_3),
+        NAMESPACE_3, STRING_BAG_ADDR, WINDOW_3);
 
     // Reading the merged bag gets both the contents
     assertThat(merged.get().read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
@@ -245,7 +246,8 @@ public void testMergeCombiningValueIntoNewNamespace() throws Exception {
     assertThat(value2.get().read(), Matchers.equalTo(10));
 
     CombiningValueState<Integer, Integer> merged = underTest.mergedState(
-        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_3, SUM_INTEGER_ADDR, WINDOW_3);
+        Arrays.asList(NAMESPACE_1, NAMESPACE_2, NAMESPACE_3),
+        NAMESPACE_3, SUM_INTEGER_ADDR, WINDOW_3);
 
     assertThat(value1.get().read(), Matchers.equalTo(11));
     assertThat(value2.get().read(), Matchers.equalTo(10));

From c98f0438350f2fe4f00e1276605349594c60d5f2 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 5 Jan 2016 15:17:48 -0800
Subject: [PATCH 1268/1541] Native reader/sink for sorted key/value data

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111456179
---
 .../sdk/runners/worker/IsmFormat.java         | 276 +++++++++++++++
 .../sdk/runners/worker/IsmReader.java         | 316 ++++++++++++++++++
 .../dataflow/sdk/runners/worker/IsmSink.java  | 188 +++++++++++
 .../dataflow/sdk/util/RandomAccessData.java   | 266 +++++++++++++++
 .../sdk/runners/worker/IsmFormatTest.java     |  86 +++++
 .../sdk/runners/worker/IsmReaderTest.java     | 266 +++++++++++++++
 .../sdk/runners/worker/IsmSinkTest.java       |  90 +++++
 .../sdk/util/RandomAccessDataTest.java        | 158 +++++++++
 8 files changed, 1646 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormat.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSink.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RandomAccessData.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormatTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReaderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RandomAccessDataTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormat.java
new file mode 100644
index 0000000000000..517b0e1a115b8
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormat.java
@@ -0,0 +1,276 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.util.ScalableBloomFilter;
+import com.google.cloud.dataflow.sdk.util.VarInt;
+import com.google.common.base.MoreObjects;
+import com.google.common.base.Preconditions;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Objects;
+
+/**
+ * An Ism file is a prefix encoded key value file with a bloom filter and an index to
+ * enable lookups.
+ *
+ * <p>An Ism file is composed of these high level sections (in order):
+ * <ul>
+ *   <li>data block</li>
+ *   <li>bloom filter (See {@link ScalableBloomFilter} for details on encoding format)</li>
+ *   <li>index</li>
+ *   <li>footer (See {@link Footer} for details on encoding format)</li>
+ * </ul>
+ *
+ * <p>The data block is composed of multiple copies of the following:
+ * <ul>
+ *   <li>key prefix (See {@link KeyPrefix} for details on encoding format)</li>
+ *   <li>unshared key bytes</li>
+ *   <li>value bytes</li>
+ * </ul>
+ *
+ * <p>The index is composed of {@code N} copies of the following:
+ * <ul>
+ *   <li>key prefix (See {@link KeyPrefix} for details on encoding format)</li>
+ *   <li>unshared key bytes</li>
+ *   <li>byte offset to key prefix in data block (variable length long coding)</li>
+ * </ul>
+ */
+class IsmFormat {
+  /**
+   * The prefix used before each key which contains the number of shared and unshared
+   * bytes from the previous key that was read. The key prefix along with the previous key
+   * and the unshared key bytes allows one to construct the current key by doing the following
+   * {@code currentKey = previousKey[0 : sharedBytes] + read(unsharedBytes)}.
+   *
+   * <p>The key prefix is encoded as:
+   * <ul>
+   *   <li>number of shared key bytes (variable length integer coding)</li>
+   *   <li>number of unshared key bytes (variable length integer coding)</li>
+   * </ul>
+   */
+  static class KeyPrefix {
+    private final int sharedKeySize;
+    private final int unsharedKeySize;
+
+    KeyPrefix(int sharedBytes, int unsharedBytes) {
+      this.sharedKeySize = sharedBytes;
+      this.unsharedKeySize = unsharedBytes;
+    }
+
+    public int getSharedKeySize() {
+      return sharedKeySize;
+    }
+
+    public int getUnsharedKeySize() {
+      return unsharedKeySize;
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(sharedKeySize, unsharedKeySize);
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (other == this) {
+        return true;
+      }
+      if (!(other instanceof KeyPrefix)) {
+        return false;
+      }
+      KeyPrefix keyPrefix = (KeyPrefix) other;
+      return sharedKeySize == keyPrefix.sharedKeySize
+          && unsharedKeySize == keyPrefix.unsharedKeySize;
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(this)
+          .add("sharedKeySize", sharedKeySize)
+          .add("unsharedKeySize", unsharedKeySize)
+          .toString();
+    }
+  }
+
+  /** A {@link Coder} for {@link KeyPrefix}. */
+  static final class KeyPrefixCoder extends AtomicCoder<KeyPrefix> {
+    private static final KeyPrefixCoder INSTANCE = new KeyPrefixCoder();
+
+    @JsonCreator
+    public static KeyPrefixCoder of() {
+      return INSTANCE;
+    }
+
+    @Override
+    public void encode(KeyPrefix value, OutputStream outStream, Coder.Context context)
+        throws CoderException, IOException {
+      VarInt.encode(value.sharedKeySize, outStream);
+      VarInt.encode(value.unsharedKeySize, outStream);
+    }
+
+    @Override
+    public KeyPrefix decode(InputStream inStream, Coder.Context context)
+        throws CoderException, IOException {
+      return new KeyPrefix(VarInt.decodeInt(inStream), VarInt.decodeInt(inStream));
+    }
+
+    @Override
+    public boolean consistentWithEquals() {
+      return true;
+    }
+
+    @Override
+    public boolean isRegisterByteSizeObserverCheap(KeyPrefix value, Coder.Context context) {
+      return true;
+    }
+
+    @Override
+    protected long getEncodedElementByteSize(KeyPrefix value, Coder.Context context)
+        throws Exception {
+      Preconditions.checkNotNull(value);
+      return VarInt.getLength(value.sharedKeySize) + VarInt.getLength(value.unsharedKeySize);
+    }
+  }
+
+  /**
+   * The footer stores the relevant information required to locate the index and bloom filter.
+   * It also stores a version byte and the number of keys stored.
+   *
+   * <p>The footer is encoded as the value containing:
+   * <ul>
+   *   <li>start of bloom filter offset (big endian long coding)</li>
+   *   <li>start of index position offset (big endian long coding)</li>
+   *   <li>number of keys in file (big endian long coding)</li>
+   *   <li>0x01 (version key as a single byte)</li>
+   * </ul>
+   */
+  static class Footer {
+    static final int LONG_BYTES = 8;
+    static final long FIXED_LENGTH = 3 * LONG_BYTES + 1;
+    static final byte VERSION = 1;
+
+    private final long indexPosition;
+    private final long bloomFilterPosition;
+    private final long numberOfKeys;
+
+    Footer(long indexPosition, long bloomFilterPosition, long numberOfKeys) {
+      this.indexPosition = indexPosition;
+      this.bloomFilterPosition = bloomFilterPosition;
+      this.numberOfKeys = numberOfKeys;
+    }
+
+    public long getIndexPosition() {
+      return indexPosition;
+    }
+
+    public long getBloomFilterPosition() {
+      return bloomFilterPosition;
+    }
+
+    public long getNumberOfKeys() {
+      return numberOfKeys;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (other == this) {
+        return true;
+      }
+      if (!(other instanceof Footer)) {
+        return false;
+      }
+      Footer footer = (Footer) other;
+      return indexPosition == footer.indexPosition
+          && bloomFilterPosition == footer.bloomFilterPosition
+          && numberOfKeys == footer.numberOfKeys;
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(indexPosition, bloomFilterPosition, numberOfKeys);
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(this)
+          .add("version", 1)
+          .add("indexPosition", indexPosition)
+          .add("bloomFilterPosition", bloomFilterPosition)
+          .add("numberOfKeys", numberOfKeys)
+          .toString();
+    }
+  }
+
+  /** A {@link Coder} for {@link Footer}. */
+  static final class FooterCoder extends AtomicCoder<Footer> {
+    private static final FooterCoder INSTANCE = new FooterCoder();
+
+    @JsonCreator
+    public static FooterCoder of() {
+      return INSTANCE;
+    }
+
+    @Override
+    public void encode(Footer value, OutputStream outStream, Coder.Context context)
+        throws CoderException, IOException {
+      DataOutputStream dataOut = new DataOutputStream(outStream);
+      dataOut.writeLong(value.indexPosition);
+      dataOut.writeLong(value.bloomFilterPosition);
+      dataOut.writeLong(value.numberOfKeys);
+      dataOut.write(Footer.VERSION);
+    }
+
+    @Override
+    public Footer decode(InputStream inStream, Coder.Context context)
+        throws CoderException, IOException {
+      DataInputStream dataIn = new DataInputStream(inStream);
+      Footer footer = new Footer(dataIn.readLong(), dataIn.readLong(), dataIn.readLong());
+      int version = dataIn.read();
+      if (version != Footer.VERSION) {
+        throw new IOException("Unknown version " + version + ". "
+            + "Only version 0x01 is currently supported.");
+      }
+      return footer;
+    }
+
+    @Override
+    public boolean consistentWithEquals() {
+      return true;
+    }
+
+    @Override
+    public boolean isRegisterByteSizeObserverCheap(Footer value, Coder.Context context) {
+      return true;
+    }
+
+    @Override
+    protected long getEncodedElementByteSize(Footer value, Coder.Context context)
+        throws Exception {
+      return Footer.FIXED_LENGTH;
+    }
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java
new file mode 100644
index 0000000000000..03c99150f8214
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java
@@ -0,0 +1,316 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
+import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.Footer;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.FooterCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.KeyPrefix;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.KeyPrefixCoder;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.RandomAccessData;
+import com.google.cloud.dataflow.sdk.util.RandomAccessData.RandomAccessDataCoder;
+import com.google.cloud.dataflow.sdk.util.ScalableBloomFilter;
+import com.google.cloud.dataflow.sdk.util.ScalableBloomFilter.ScalableBloomFilterCoder;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableSortedMap;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.channels.Channels;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.SeekableByteChannel;
+import java.util.AbstractMap;
+import java.util.Map.Entry;
+import java.util.NoSuchElementException;
+
+/**
+ * A {@link Reader} that reads Ism files. The coder provided is used to encode each key value
+ * record. See {@link IsmFormat} for encoded format details.
+ *
+ * @param <K> the type of the keys written to the sink
+ * @param <V> the type of the values written to the sink
+ */
+public class IsmReader<K, V> extends Reader<KV<K, V>> {
+  private final String filename;
+  private final Coder<K> keyCoder;
+  private final Coder<V> valueCoder;
+
+  /** Lazily initialized on first read. */
+  private long length;
+  private Footer footer;
+
+  /** Lazily initialized on first keyed read. */
+  private ImmutableSortedMap<RandomAccessData, Long> index;
+  ScalableBloomFilter bloomFilter;
+
+  IsmReader(final String filename, Coder<K> keyCoder, Coder<V> valueCoder) {
+    this.filename = filename;
+    this.keyCoder = keyCoder;
+    this.valueCoder = valueCoder;
+  }
+
+  @Override
+  public Reader.ReaderIterator<KV<K, V>> iterator() throws IOException {
+    return new LazyIsmReaderIterator();
+  }
+
+  /**
+   * Returns a {@code KV<K, V>} pair for the given {@code K} or null if {@code K} is not
+   * present within this Ism file.
+   */
+  public KV<K, V> get(K k) throws IOException {
+    try (SeekableByteChannel inChannel = initializeForKeyedRead()) {
+      RandomAccessData keyBytes = new RandomAccessData();
+
+      // Encode the requested key
+      keyCoder.encode(k, keyBytes.asOutputStream(), Context.OUTER);
+
+      // If the Bloom filter says we don't have the key, we have nothing further to do.
+      if (!bloomFilterMightContain(keyBytes)) {
+        return null;
+      }
+
+      // Find the index record which is less than or equal to the passed in key.
+      Entry<RandomAccessData, Long> entry = index.floorEntry(keyBytes);
+
+      // If no indexed entry is less than or equal to the passed in key then we start
+      // at the beginning of the file.
+      if (entry == null) {
+        entry = new AbstractMap.SimpleEntry<>(new RandomAccessData(), 0L);
+      }
+
+      // Reposition the stream to the data block that should contain the key.
+      inChannel.position(entry.getValue());
+
+      // Seek through the data block till we find a key that matches or a greater key.
+      try (ReaderIterator<KV<RandomAccessData, V>> iterator =
+          new IsmReaderIterator<RandomAccessData, V>(
+          inChannel, entry.getKey(), RandomAccessDataCoder.of(),
+          valueCoder, footer.getBloomFilterPosition())) {
+        while (iterator.hasNext()) {
+          long startPosition = inChannel.position();
+          KV<RandomAccessData, V> next = iterator.next();
+          int comparison = RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(
+              next.getKey(), keyBytes);
+          // If the current key is greater then the requested key, this Ism file does not contain
+          // the record.
+          if (comparison > 0) {
+            return null;
+          } else if (comparison == 0) {
+            notifyElementRead(inChannel.position() - startPosition);
+            return KV.of(k, next.getValue());
+          }
+        }
+      }
+      // We hit the end of the file, therefore this Ism file does not contain the key.
+      return null;
+    }
+  }
+
+  // Overridable by tests to get around the bloom filter not containing any values.
+  @VisibleForTesting
+  boolean bloomFilterMightContain(RandomAccessData keyBytes) {
+    return bloomFilter.mightContain(keyBytes.array(), 0, keyBytes.size());
+  }
+
+  /**
+   * Initialize this Ism reader by reading the footer.
+   */
+  private synchronized void initializeFooter(SeekableByteChannel in) throws IOException {
+    if (footer != null) {
+      return;
+    }
+    this.length = in.size();
+    in.position(length - Footer.FIXED_LENGTH);
+    this.footer = FooterCoder.of().decode(Channels.newInputStream(in), Context.OUTER);
+    in.position(0L);
+  }
+
+  /**
+   * A {@link com.google.cloud.dataflow.sdk.util.common.worker.Reader.ReaderIterator
+   * Reader.ReaderIterator} which initializes its input stream lazily.
+   */
+  private class LazyIsmReaderIterator extends Reader.AbstractReaderIterator<KV<K, V>> {
+    private IsmReaderIterator<K, V> delegate;
+    private SeekableByteChannel inChannel;
+
+    @Override
+    public boolean hasNext() throws IOException {
+      return getDelegate().hasNext();
+    }
+
+    @Override
+    public KV<K, V> next() throws IOException, NoSuchElementException {
+      long startPosition = getChannel().position();
+      KV<K, V> rval = getDelegate().next();
+      notifyElementRead(getChannel().position() - startPosition);
+      return rval;
+    }
+
+    @Override
+    public void close() throws IOException {
+      inChannel.close();
+    }
+
+    /**
+     * Return a reader, caching the creation on the first call.
+     */
+    private IsmReaderIterator<K, V> getDelegate() throws IOException {
+      if (delegate == null) {
+        inChannel = getChannel();
+        initializeFooter(inChannel);
+        delegate = new IsmReaderIterator<>(inChannel, new RandomAccessData(),
+            keyCoder, valueCoder, footer.getBloomFilterPosition());
+      }
+      return delegate;
+    }
+
+    /**
+     * Return a connection, caching the creation on the first call.
+     */
+    private SeekableByteChannel getChannel() throws IOException {
+      if (inChannel == null) {
+        inChannel = openConnection(filename);
+      }
+      return inChannel;
+    }
+  }
+
+  /**
+   * A {@link com.google.cloud.dataflow.sdk.util.common.worker.Reader.ReaderIterator
+   * Reader.ReaderIterator} for Ism formatted files which returns a sequence of
+   * {@code KV<K, V>}'s read from a {@link SeekableByteChannel}.
+   */
+  private static class IsmReaderIterator<K, V> extends Reader.AbstractReaderIterator<KV<K, V>> {
+    private final SeekableByteChannel inChannel;
+    private final InputStream inStream;
+    private final RandomAccessData currentKeyBytes;
+    private final Coder<K> keyCoder;
+    private final Coder<V> valueCoder;
+    private final long readLimit;
+
+    /**
+     * Start an initialized reader that will start from the given key. This reader iterator does
+     * not own the channel and the caller must ensure that it is closed.
+     */
+    public IsmReaderIterator(SeekableByteChannel unownedChannel,
+        RandomAccessData currentKeyBytes, Coder<K> keyCoder, Coder<V> valueCoder, long readLimit)
+            throws IOException {
+      checkNotNull(unownedChannel);
+      checkNotNull(currentKeyBytes);
+      checkNotNull(keyCoder);
+      checkNotNull(valueCoder);
+      checkArgument(readLimit >= 0L);
+      this.inChannel = unownedChannel;
+      this.inStream = Channels.newInputStream(unownedChannel);
+
+      // Copy the key since the IsmReaderIterator mutates the key.
+      this.currentKeyBytes = new RandomAccessData(currentKeyBytes.size());
+      currentKeyBytes.writeTo(this.currentKeyBytes.asOutputStream(), 0, currentKeyBytes.size());
+
+      this.keyCoder = keyCoder;
+      this.valueCoder = valueCoder;
+      this.readLimit = readLimit;
+    }
+
+    @Override
+    public boolean hasNext() throws IOException {
+      if (inChannel.position() > readLimit) {
+        throw new IllegalStateException("Read past end of stream");
+      }
+      return inChannel.position() < readLimit;
+    }
+
+    @Override
+    public KV<K, V> next() throws IOException, NoSuchElementException {
+      if (!hasNext()) {
+        throw new NoSuchElementException();
+      }
+      KeyPrefix keyPrefix = KeyPrefixCoder.of().decode(inStream, Context.NESTED);
+      int totalKeyLength = keyPrefix.getSharedKeySize() + keyPrefix.getUnsharedKeySize();
+      // currentKey = prevKey[0 : sharedKeySize] + read(unsharedKeySize)
+      currentKeyBytes.readFrom(
+          inStream,
+          keyPrefix.getSharedKeySize() /* start to overwrite the previous key at sharedKeySize */,
+          keyPrefix.getUnsharedKeySize() /* read unsharedKeySize bytes from the stream */);
+      K key = keyCoder.decode(currentKeyBytes.asInputStream(0, totalKeyLength), Context.OUTER);
+      V value = valueCoder.decode(inStream, Context.NESTED);
+      return KV.of(key, value);
+    }
+  }
+
+  /**
+   * Initializes the footer, Bloom filter and index if they have not yet been initialized.
+   * Returns a {@link SeekableByteChannel} at an arbitrary position within the stream.
+   * Callers should re-position the channel to their desired location.
+   */
+  private SeekableByteChannel initializeForKeyedRead() throws IOException {
+    SeekableByteChannel inChannel = openConnection(filename);
+    if (index != null) {
+      checkState(footer != null, "Footer expected to have been initialized.");
+      checkState(bloomFilter != null, "Bloom filter expected to have been initialized.");
+      return inChannel;
+    }
+    checkState(bloomFilter == null, "Bloom filter not expected to have been initialized.");
+
+    initializeFooter(inChannel);
+
+    // Set the position to where the bloom filter is and read it in.
+    inChannel.position(footer.getBloomFilterPosition());
+    bloomFilter = ScalableBloomFilterCoder.of().decode(
+        Channels.newInputStream(inChannel), Context.NESTED);
+
+    // The index follows the bloom filter directly, so we do not need to do a seek here.
+    // This is an optimization.
+    @SuppressWarnings("resource")
+    ReaderIterator<KV<RandomAccessData, Long>> iterator =
+        new IsmReaderIterator<RandomAccessData, Long>(
+            inChannel, new RandomAccessData(), RandomAccessDataCoder.of(),
+            VarLongCoder.of(), length - Footer.FIXED_LENGTH);
+    ImmutableSortedMap.Builder<RandomAccessData, Long> builder =
+        ImmutableSortedMap.orderedBy(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR);
+
+    // Read the index into memory.
+    while (iterator.hasNext()) {
+      KV<RandomAccessData, Long> next = iterator.next();
+      builder.put(next.getKey(), next.getValue());
+    }
+    index = builder.build();
+    return inChannel;
+  }
+
+  /**
+   * Returns a {@link SeekableByteChannel} for the given {@code filename}.
+   */
+  private static SeekableByteChannel openConnection(String filename) throws IOException {
+    ReadableByteChannel channel = IOChannelUtils.getFactory(filename).open(filename);
+    Preconditions.checkArgument(channel instanceof SeekableByteChannel,
+        "IsmReader requires a SeekableByteChannel for path %s but received %s.",
+        filename, channel);
+    return (SeekableByteChannel) channel;
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSink.java
new file mode 100644
index 0000000000000..9f077802e19cb
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSink.java
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.Footer;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.FooterCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.KeyPrefix;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.KeyPrefixCoder;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.MimeTypes;
+import com.google.cloud.dataflow.sdk.util.RandomAccessData;
+import com.google.cloud.dataflow.sdk.util.ScalableBloomFilter;
+import com.google.cloud.dataflow.sdk.util.ScalableBloomFilter.ScalableBloomFilterCoder;
+import com.google.cloud.dataflow.sdk.util.VarInt;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.io.CountingOutputStream;
+
+import java.io.IOException;
+import java.nio.channels.Channels;
+import java.nio.channels.WritableByteChannel;
+
+/**
+ * A {@link Sink} that writes Ism files. The coder provided is used to encode each key value
+ * record. See {@link IsmFormat} for encoded format details.
+ *
+ * @param <K> the type of the keys written to the sink
+ * @param <V> the type of the values written to the sink
+ */
+public class IsmSink<K, V> extends Sink<WindowedValue<KV<K, V>>> {
+  private final String filename;
+  private final Coder<K> keyCoder;
+  private final Coder<V> valueCoder;
+
+  IsmSink(String filename, Coder<K> keyCoder, Coder<V> valueCoder) {
+    this.filename = filename;
+    this.keyCoder = keyCoder;
+    this.valueCoder = valueCoder;
+  }
+
+  @Override
+  public SinkWriter<WindowedValue<KV<K, V>>> writer() throws IOException {
+    return new IsmSinkWriter(IOChannelUtils.create(filename, MimeTypes.BINARY));
+  }
+
+  private class IsmSinkWriter implements SinkWriter<WindowedValue<KV<K, V>>> {
+    private static final long MAX_BLOCK_SIZE = 1024 * 1024;
+
+    private final CountingOutputStream out;
+    private final RandomAccessData indexOut;
+    private RandomAccessData lastKeyBytes;
+    private RandomAccessData currentKeyBytes;
+    private RandomAccessData lastIndexKeyBytes;
+    private long lastIndexedPosition;
+    private long numberOfKeysWritten;
+    private final ScalableBloomFilter.Builder bloomFilterBuilder;
+
+    /**
+     * Creates an IsmSinkWriter for the given channel.
+     */
+    private IsmSinkWriter(WritableByteChannel channel) {
+      checkNotNull(channel);
+      out = new CountingOutputStream(Channels.newOutputStream(channel));
+      indexOut = new RandomAccessData();
+      lastKeyBytes = new RandomAccessData();
+      currentKeyBytes = new RandomAccessData();
+      lastIndexKeyBytes = new RandomAccessData();
+      bloomFilterBuilder = ScalableBloomFilter.builder();
+    }
+
+    @Override
+    public long add(WindowedValue<KV<K, V>> windowedValue) throws IOException {
+      // The windowed portion of the value is ignored.
+      KV<K, V> value = windowedValue.getValue();
+
+      long currentPosition = out.getCount();
+      // Marshal the key, compute the common prefix length
+      keyCoder.encode(value.getKey(), currentKeyBytes.asOutputStream(), Context.OUTER);
+      int keySize = currentKeyBytes.size();
+      int sharedKeySize = commonPrefixLength(lastKeyBytes, currentKeyBytes);
+
+      // Put key-value mapping record into block buffer
+      int unsharedKeySize = keySize - sharedKeySize;
+      KeyPrefix keyPrefix = new KeyPrefix(sharedKeySize, unsharedKeySize);
+      KeyPrefixCoder.of().encode(keyPrefix, out, Context.NESTED);
+      currentKeyBytes.writeTo(out, sharedKeySize, unsharedKeySize);
+      valueCoder.encode(value.getValue(), out, Context.NESTED);
+
+      // If we have emitted enough bytes to add another entry into the index.
+      if (lastIndexedPosition + MAX_BLOCK_SIZE < out.getCount()) {
+        int sharedIndexKeySize = commonPrefixLength(lastIndexKeyBytes, currentKeyBytes);
+        int unsharedIndexKeySize = keySize - sharedIndexKeySize;
+        KeyPrefix indexKeyPrefix = new KeyPrefix(sharedIndexKeySize, unsharedIndexKeySize);
+        KeyPrefixCoder.of().encode(indexKeyPrefix, indexOut.asOutputStream(), Context.NESTED);
+        currentKeyBytes.writeTo(
+            indexOut.asOutputStream(), sharedIndexKeySize, unsharedIndexKeySize);
+        VarInt.encode(currentPosition, indexOut.asOutputStream());
+        lastIndexKeyBytes.resetTo(0);
+        currentKeyBytes.writeTo(lastIndexKeyBytes.asOutputStream(), 0, currentKeyBytes.size());
+      }
+
+      // Update the bloom filter
+      bloomFilterBuilder.put(currentKeyBytes.array(), 0, currentKeyBytes.size());
+
+      // Swap the current key and the previous key, resetting the previous key to be re-used.
+      RandomAccessData temp = lastKeyBytes;
+      lastKeyBytes = currentKeyBytes;
+      currentKeyBytes = temp;
+      currentKeyBytes.resetTo(0);
+
+      numberOfKeysWritten += 1;
+      return out.getCount() - currentPosition;
+    }
+
+    /**
+     * Compute the length of the common prefix of the previous key and the given key
+     * and perform a key order check. We check that the currently being inserted key
+     * is strictly greater than the previous key.
+     */
+    private int commonPrefixLength(
+        RandomAccessData prevKeyBytes, RandomAccessData currentKeyBytes) {
+      byte[] prevKey = prevKeyBytes.array();
+      byte[] currentKey = currentKeyBytes.array();
+      int minBytesLen = Math.min(prevKeyBytes.size(), currentKeyBytes.size());
+      for (int i = 0; i < minBytesLen; i++) {
+        // unsigned comparison
+        int b1 = prevKey[i] & 0xFF;
+        int b2 = currentKey[i] & 0xFF;
+        if (b1 > b2) {
+          throw new IllegalArgumentException(IsmSinkWriter.class.getSimpleName()
+              + " expects keys to be written in strictly increasing order but was given "
+              + prevKeyBytes + " as the previous key and " + currentKeyBytes
+              + " as the current key. Expected " + b1 + " <= " + b2 + " at position " + i + ".");
+        }
+        if (b1 != b2) {
+          return i;
+        }
+      }
+      if (prevKeyBytes.size() >= currentKeyBytes.size()) {
+        throw new IllegalArgumentException(IsmSinkWriter.class.getSimpleName()
+            + " expects keys to be written in strictly increasing order but was given "
+            + prevKeyBytes + " as the previous key and " + currentKeyBytes
+            + " as the current key. Expected length of previous key " + prevKeyBytes.size()
+            + " <= " + currentKeyBytes.size() + " to current key.");
+      }
+      return minBytesLen;
+    }
+
+    /**
+     * Completes the construction of the Ism file.
+     *
+     * @throws IOException if an underlying write fails
+     */
+    private void finish() throws IOException {
+      long startOfBloomFilter = out.getCount();
+      ScalableBloomFilterCoder.of().encode(bloomFilterBuilder.build(), out, Context.NESTED);
+      long startOfIndex = out.getCount();
+      indexOut.writeTo(out, 0, indexOut.size());
+      FooterCoder.of().encode(new Footer(startOfIndex, startOfBloomFilter, numberOfKeysWritten),
+          out, Coder.Context.OUTER);
+    }
+
+    @Override
+    public void close() throws IOException {
+      finish();
+      out.close();
+    }
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RandomAccessData.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RandomAccessData.java
new file mode 100644
index 0000000000000..4d86866ff2c7a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RandomAccessData.java
@@ -0,0 +1,266 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.common.base.MoreObjects;
+import com.google.common.io.ByteStreams;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.Comparator;
+
+import javax.annotation.concurrent.NotThreadSafe;
+
+/**
+ * An elastic-sized byte array which allows you to manipulate it as a stream, or access
+ * it directly. This allows for a quick succession of moving bytes from an {@link InputStream}
+ * to this wrapper to be used as an {@link OutputStream} and vice versa. This wrapper
+ * also provides random access to bytes stored within. This wrapper allows users to finely
+ * control the number of byte copies that occur.
+ *
+ * Anything stored within the in-memory buffer from offset {@link #size()} is considered temporary
+ * unused storage.
+ */
+@NotThreadSafe
+public class RandomAccessData {
+  /**
+   * A {@link Coder} which encodes the valid parts of this stream.
+   * This follows the same encoding scheme as {@link ByteArrayCoder}.
+   * This coder is deterministic and the consistent with equals.
+   */
+  public static class RandomAccessDataCoder extends AtomicCoder<RandomAccessData> {
+    private static final RandomAccessDataCoder INSTANCE = new RandomAccessDataCoder();
+
+    @JsonCreator
+    public static RandomAccessDataCoder of() {
+      return INSTANCE;
+    }
+
+    @Override
+    public void encode(RandomAccessData value, OutputStream outStream, Coder.Context context)
+        throws CoderException, IOException {
+      if (!context.isWholeStream) {
+        VarInt.encode(value.size, outStream);
+      }
+      value.writeTo(outStream, 0, value.size);
+    }
+
+    @Override
+    public RandomAccessData decode(InputStream inStream, Coder.Context context)
+        throws CoderException, IOException {
+      RandomAccessData rval = new RandomAccessData();
+      if (!context.isWholeStream) {
+        int length = VarInt.decodeInt(inStream);
+        rval.readFrom(inStream, 0, length);
+      } else {
+        ByteStreams.copy(inStream, rval.asOutputStream());
+      }
+      return rval;
+    }
+
+    @Override
+    public boolean consistentWithEquals() {
+      return true;
+    }
+
+    @Override
+    public boolean isRegisterByteSizeObserverCheap(
+        RandomAccessData value, Coder.Context context) {
+      return true;
+    }
+
+    @Override
+    protected long getEncodedElementByteSize(RandomAccessData value, Coder.Context context)
+        throws Exception {
+      if (value == null) {
+        throw new CoderException("cannot encode a null in memory stream");
+      }
+      long size = 0;
+      if (!context.isWholeStream) {
+        size += VarInt.getLength(value.size);
+      }
+      return size + value.size;
+    }
+  }
+
+  /**
+   * A {@link Comparator} that compares two byte arrays lexicographically. It compares
+   * values as a list of unsigned bytes. The first pair of values that follow any common prefix,
+   * or when one array is a prefix of the other, treats the shorter array as the lesser.
+   * For example, [] < [0x01] < [0x01, 0x7F] < [0x01, 0x80] < [0x02].
+   */
+  public static final Comparator<RandomAccessData> UNSIGNED_LEXICOGRAPHICAL_COMPARATOR =
+      new Comparator<RandomAccessData>() {
+    @Override
+    public int compare(RandomAccessData o1, RandomAccessData o2) {
+      int minBytesLen = Math.min(o1.size, o2.size);
+      for (int i = 0; i < minBytesLen; i++) {
+        // unsigned comparison
+        int b1 = o1.buffer[i] & 0xFF;
+        int b2 = o2.buffer[i] & 0xFF;
+        if (b1 == b2) {
+          continue;
+        }
+        // Return the stream with the smaller byte as the smaller value.
+        return b1 - b2;
+      }
+      // If one is a prefix of the other, return the shorter one as the smaller one.
+      // If both lengths are equal, then both streams are equal.
+      return o1.size - o2.size;
+    }
+  };
+
+  private static final int DEFAULT_INITIAL_BUFFER_SIZE = 128;
+
+  /** Constructs a RandomAccessData with a default buffer size. */
+  public RandomAccessData() {
+    this(DEFAULT_INITIAL_BUFFER_SIZE);
+  }
+
+  /** Constructs a RandomAccessData with the given buffer size. */
+  public RandomAccessData(int initialBufferSize) {
+    checkArgument(initialBufferSize >= 0, "Expected initial buffer size to be greater than zero.");
+    buffer = new byte[initialBufferSize];
+  }
+
+  private byte[] buffer;
+  private int size;
+
+  /** Returns the backing array. */
+  public byte[] array() {
+    return buffer;
+  }
+
+  /** Returns the number of bytes in the backing array that are valid. */
+  public int size() {
+    return size;
+  }
+
+  /** Resets the end of the stream to the specified position. */
+  public void resetTo(int position) {
+    ensureCapacity(position);
+    size = position;
+  }
+
+  private final OutputStream outputStream = new OutputStream() {
+    @Override
+    public void write(int b) throws IOException {
+      ensureCapacity(size + 1);
+      buffer[size] = (byte) b;
+      size += 1;
+    }
+
+    @Override
+    public void write(byte[] b, int offset, int length) throws IOException {
+      ensureCapacity(size + length);
+      System.arraycopy(b, offset, buffer, size, length);
+      size += length;
+    }
+  };
+
+  /**
+   * Returns an output stream which writes to the backing buffer from the current position.
+   * Note that the internal buffer will grow as required to accomodate all data written.
+   */
+  public OutputStream asOutputStream() {
+    return outputStream;
+  }
+
+  /**
+   * Returns an {@link InputStream} wrapper which supplies the portion of this backing byte buffer
+   * starting at {@code offset} and up to {@code length} bytes. Note that the returned
+   * {@link InputStream} is only a wrapper and any modifications to the underlying
+   * {@link RandomAccessData} will be visible by the {@link InputStream}.
+   */
+  public InputStream asInputStream(final int offset, final int length) {
+    return new ByteArrayInputStream(buffer, offset, length);
+  }
+
+  /**
+   * Writes {@code length} bytes starting at {@code offset} from the backing data store to the
+   * specified output stream.
+   */
+  public void writeTo(OutputStream out, int offset, int length) throws IOException {
+    out.write(buffer, offset, length);
+  }
+
+  /**
+   * Reads {@code length} bytes from the specified input stream writing them into the backing
+   * data store starting at {@code offset}.
+   *
+   * <p>Note that the in memory stream will be grown to ensure there is enough capacity.
+   */
+  public void readFrom(InputStream inStream, int offset, int length) throws IOException {
+    ensureCapacity(offset + length);
+    ByteStreams.readFully(inStream, buffer, offset, length);
+    size = offset + length;
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other == this) {
+      return true;
+    }
+    if (!(other instanceof RandomAccessData)) {
+      return false;
+    }
+    return UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(this, (RandomAccessData) other) == 0;
+  }
+
+  @Override
+  public int hashCode() {
+    int result = 1;
+    for (int i = 0; i < size; ++i) {
+        result = 31 * result + buffer[i];
+    }
+
+    return result;
+  }
+
+  @Override
+  public String toString() {
+    return MoreObjects.toStringHelper(this)
+        .add("buffer", buffer)
+        .add("size", size)
+        .toString();
+  }
+
+  private void ensureCapacity(int minCapacity) {
+    // If we have enough space, don't grow the buffer.
+    if (minCapacity <= buffer.length) {
+        return;
+    }
+
+    // Try to double the size of the buffer, if thats not enough, just use the new capacity.
+    // Note that we use Math.min(long, long) to not cause overflow on the multiplication.
+    int newCapacity = (int) Math.min(Integer.MAX_VALUE, buffer.length * 2L);
+    if (newCapacity < minCapacity) {
+        newCapacity = minCapacity;
+    }
+    buffer = Arrays.copyOf(buffer, newCapacity);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormatTest.java
new file mode 100644
index 0000000000000..3a540b6192450
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormatTest.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.Footer;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.FooterCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.KeyPrefix;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.KeyPrefixCoder;
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+
+/**
+ * Tests for {@link IsmFormat}.
+ */
+@RunWith(JUnit4.class)
+public class IsmFormatTest {
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+
+  @Test
+  public void testKeyPrefixCoder() throws Exception {
+    KeyPrefix keyPrefixA = new KeyPrefix(5, 7);
+    KeyPrefix keyPrefixB = new KeyPrefix(5, 7);
+    CoderProperties.coderDecodeEncodeEqual(KeyPrefixCoder.of(), keyPrefixA);
+    CoderProperties.coderDeterministic(KeyPrefixCoder.of(), keyPrefixA, keyPrefixB);
+    CoderProperties.coderConsistentWithEquals(KeyPrefixCoder.of(), keyPrefixA, keyPrefixB);
+    CoderProperties.coderSerializable(KeyPrefixCoder.of());
+    CoderProperties.structuralValueConsistentWithEquals(
+        KeyPrefixCoder.of(), keyPrefixA, keyPrefixB);
+    assertTrue(KeyPrefixCoder.of().isRegisterByteSizeObserverCheap(keyPrefixA, Context.NESTED));
+    assertTrue(KeyPrefixCoder.of().isRegisterByteSizeObserverCheap(keyPrefixA, Context.OUTER));
+    assertEquals(2, KeyPrefixCoder.of().getEncodedElementByteSize(keyPrefixA, Context.NESTED));
+    assertEquals(2, KeyPrefixCoder.of().getEncodedElementByteSize(keyPrefixA, Context.OUTER));
+  }
+
+  @Test
+  public void testFooterCoder() throws Exception {
+    Footer footerA = new Footer(1, 2, 3);
+    Footer footerB = new Footer(1, 2, 3);
+    CoderProperties.coderDecodeEncodeEqual(FooterCoder.of(), footerA);
+    CoderProperties.coderDeterministic(FooterCoder.of(), footerA, footerB);
+    CoderProperties.coderConsistentWithEquals(FooterCoder.of(), footerA, footerB);
+    CoderProperties.coderSerializable(FooterCoder.of());
+    CoderProperties.structuralValueConsistentWithEquals(FooterCoder.of(), footerA, footerB);
+    assertTrue(FooterCoder.of().isRegisterByteSizeObserverCheap(footerA, Context.NESTED));
+    assertTrue(FooterCoder.of().isRegisterByteSizeObserverCheap(footerA, Context.OUTER));
+    assertEquals(25, FooterCoder.of().getEncodedElementByteSize(footerA, Context.NESTED));
+    assertEquals(25, FooterCoder.of().getEncodedElementByteSize(footerA, Context.OUTER));
+  }
+
+  @Test
+  public void testUnknownVersion() throws Exception {
+    byte[] data = new byte[25];
+    data[24] = 5; // unknown version
+    ByteArrayInputStream bais = new ByteArrayInputStream(data);
+
+    expectedException.expect(IOException.class);
+    expectedException.expectMessage("Unknown version 5");
+    FooterCoder.of().decode(bais, Context.OUTER);
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReaderTest.java
new file mode 100644
index 0000000000000..8d9022a285e5b
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReaderTest.java
@@ -0,0 +1,266 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
+import com.google.cloud.dataflow.sdk.util.RandomAccessData;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReaderObserver;
+import com.google.cloud.dataflow.sdk.util.common.worker.Reader.ReaderIterator;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink.SinkWriter;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.base.Predicate;
+import com.google.common.base.Predicates;
+import com.google.common.collect.Iterables;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+import java.util.Random;
+
+/**
+ * Tests for {@link IsmReader}.
+ */
+@RunWith(JUnit4.class)
+public class IsmReaderTest {
+  @Rule
+  public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  @Test
+  public void testReadEmpty() throws Exception {
+    runTestRead(Collections.<KV<byte[], byte[]>>emptyList(), tmpFolder.newFile());
+  }
+
+  @Test
+  public void testRead() throws Exception {
+    Random random = new Random(23498321490L);
+    for (int i : Arrays.asList(4, 8, 12)) {
+      int minElements = (int) Math.pow(2, i);
+      // Generates between 2^i and 2^(i + 1) elements.
+      runTestRead(dataGenerator(minElements + random.nextInt(minElements),
+          8 /* approximate key size */, 8 /* max value size */), tmpFolder.newFile());
+    }
+  }
+
+  @Test
+  public void testReadRandomOrder() throws Exception {
+    Random random = new Random(2348238943L);
+    for (int i : Arrays.asList(4, 8, 12)) {
+      int minElements = (int) Math.pow(2, i);
+      // Generates between 2^i and 2^(i + 1) elements.
+      runTestReadRandomOrder(
+          dataGenerator(minElements + random.nextInt(minElements),
+              8 /* approximate key size */, 4096 /* max value size */), tmpFolder.newFile());
+    }
+  }
+
+  @Test
+  public void testReadMissingKeysBypassingBloomFilter() throws Exception {
+    List<KV<byte[], byte[]>> data = new ArrayList<>();
+    data.add(KV.of(new byte[]{ 0x04 }, new byte[] { 0x00 }));
+    data.add(KV.of(new byte[]{ 0x08 }, new byte[] { 0x01 }));
+    String path = initInputFile(data, tmpFolder.newFile());
+    IsmReader<byte[], byte[]> reader =
+        new IsmReader<byte[], byte[]>(path, ByteArrayCoder.of(), ByteArrayCoder.of()) {
+      // We use this override to get around the Bloom filter saying that the key doesn't exist.
+      @Override
+      boolean bloomFilterMightContain(RandomAccessData keyBytes) {
+        return true;
+      }
+    };
+
+    // Check that we got null with a key before all keys contained in the file.
+    assertNull(reader.get(new byte[]{ 0x02 }));
+    // Check that we got null with a key between two other keys contained in the file.
+    assertNull(reader.get(new byte[]{ 0x06 }));
+    // Check that we got null with a key that is after all keys contained in the file.
+    assertNull(reader.get(new byte[]{ 0x10 }));
+  }
+
+  /** Write input elements to a file and return the file name. */
+  static String initInputFile(Iterable<KV<byte[], byte[]>> elements, File tmpFile)
+      throws Exception {
+    Sink<WindowedValue<KV<byte[], byte[]>>> sink =
+        new IsmSink<byte[], byte[]>(tmpFile.getPath(), ByteArrayCoder.of(), ByteArrayCoder.of());
+
+    try (SinkWriter<WindowedValue<KV<byte[], byte[]>>> writer = sink.writer()) {
+      for (KV<byte[], byte[]> element : elements) {
+        writer.add(WindowedValue.valueInGlobalWindow(element));
+      }
+    }
+    return tmpFile.getPath();
+  }
+
+  /**
+   * Reads from a file generated from a collection of elements and verifies that the elements read
+   * are the same as the elements written.
+   */
+  static void runTestRead(Iterable<KV<byte[], byte[]>> expectedData, File tmpFile)
+      throws Exception {
+    String filename = initInputFile(expectedData, tmpFile);
+    IsmReader<byte[], byte[]> reader =
+        new IsmReader<>(filename, ByteArrayCoder.of(), ByteArrayCoder.of());
+    TestReaderObserver observer = new TestReaderObserver(reader);
+    reader.addObserver(observer);
+
+    Iterator<KV<byte[], byte[]>> expectedIterator = expectedData.iterator();
+    try (ReaderIterator<KV<byte[], byte[]>> iterator = reader.iterator()) {
+      while (iterator.hasNext() && expectedIterator.hasNext()) {
+        KV<byte[], byte[]> actual = iterator.next();
+        KV<byte[], byte[]> expectedNext = expectedIterator.next();
+        assertArrayEquals(actual.getKey(), expectedNext.getKey());
+        assertArrayEquals(actual.getValue(), expectedNext.getValue());
+
+        // Verify that the observer saw at least as many bytes as the size of the value.
+        assertTrue(actual.getValue().length
+            <= observer.getActualSizes().get(observer.getActualSizes().size() - 1));
+      }
+      if (iterator.hasNext()) {
+        fail("Read more elements then expected, did not expect: " + iterator.next());
+      } else if (expectedIterator.hasNext()) {
+        fail("Read less elements then expected, expected: " + expectedIterator.next());
+      }
+
+      // Verify that we see a {@link NoSuchElementException} if we attempt to go further.
+      try {
+        iterator.next();
+        fail("Expected a NoSuchElementException to have been thrown.");
+      } catch (NoSuchElementException expected) {
+      }
+    }
+  }
+
+  static class EvenFilter implements Predicate<KV<byte[], byte[]>> {
+    private static final EvenFilter INSTANCE = new EvenFilter();
+
+    @Override
+    public boolean apply(KV<byte[], byte[]> input) {
+      return input.getValue()[input.getValue().length - 1] % 2 == 0;
+    }
+  }
+
+  static void runTestReadRandomOrder(Iterable<KV<byte[], byte[]>> elements, File tmpFile)
+      throws Exception {
+    Iterable<KV<byte[], byte[]>> oddValues =
+        Iterables.filter(elements, Predicates.not(EvenFilter.INSTANCE));
+    Iterable<KV<byte[], byte[]>> evenValues =
+        Iterables.filter(elements, EvenFilter.INSTANCE);
+
+    String filename = initInputFile(oddValues, tmpFile);
+    IsmReader<byte[], byte[]> reader =
+        new IsmReader<>(filename, ByteArrayCoder.of(), ByteArrayCoder.of());
+    TestReaderObserver observer = new TestReaderObserver(reader);
+    reader.addObserver(observer);
+
+    Iterator<KV<byte[], byte[]>> expectedIterator = oddValues.iterator();
+    while (expectedIterator.hasNext()) {
+      KV<byte[], byte[]> expectedNext = expectedIterator.next();
+      KV<byte[], byte[]> actual = reader.get(expectedNext.getKey());
+      assertArrayEquals(actual.getValue(), expectedNext.getValue());
+
+      // Verify that the observer saw at least as many bytes as the size of the value.
+      assertTrue(actual.getValue().length
+          <= observer.getActualSizes().get(observer.getActualSizes().size() - 1));
+    }
+
+    Iterator<KV<byte[], byte[]>> missingIterator = evenValues.iterator();
+    while (missingIterator.hasNext()) {
+      KV<byte[], byte[]> missingNext = missingIterator.next();
+      assertNull(reader.get(missingNext.getKey()));
+    }
+  }
+
+  // Creates an iterable with key bytes in ascending order.
+  static Iterable<KV<byte[], byte[]>> dataGenerator(
+      final int numberOfKeys, final int approximateKeySize, final int maxValueSize) {
+    final int minimumNumberOfKeyBytes = 4;
+    return new Iterable<KV<byte[], byte[]>>() {
+      @Override
+      public Iterator<KV<byte[], byte[]>> iterator() {
+        final Random random = new Random(numberOfKeys);
+        return new Iterator<KV<byte[], byte[]>>() {
+          int current;
+          byte[] previousKey = new byte[random.nextInt(approximateKeySize)
+                                        + minimumNumberOfKeyBytes];
+          {
+            // Generate a random key with enough space at the front for 2^32 values.
+            random.nextBytes(previousKey);
+            previousKey[0] = 0;
+            previousKey[1] = 0;
+            previousKey[2] = 0;
+            previousKey[3] = 0;
+          }
+
+          @Override
+          public boolean hasNext() {
+            return current < numberOfKeys;
+          }
+
+          @Override
+          public KV<byte[], byte[]> next() {
+            current += 1;
+            byte[] currentKey = new byte[random.nextInt(approximateKeySize)
+                                         + minimumNumberOfKeyBytes];
+            int matchingPrefix = Math.min(currentKey.length,
+                random.nextInt(approximateKeySize) + minimumNumberOfKeyBytes);
+            byte[] randomSuffix = new byte[currentKey.length - matchingPrefix];
+            random.nextBytes(randomSuffix);
+
+            System.arraycopy(previousKey, 0,
+                currentKey, 0, Math.min(currentKey.length, previousKey.length));
+            System.arraycopy(randomSuffix, 0, currentKey, matchingPrefix, randomSuffix.length);
+
+            matchingPrefix -= 1;
+            // Find the first byte which is less than 255 at the end of the matching portion.
+            while ((currentKey[matchingPrefix] & 0xFF) == 0xFF) {
+              currentKey[matchingPrefix] = 0;
+              matchingPrefix -= 1;
+            }
+            // Increment the last byte of the matching prefix to make sure this key is
+            // larger than the previous key.
+            currentKey[matchingPrefix] = (byte) ((currentKey[matchingPrefix] & 0xFF) + 1);
+
+            byte[] value = new byte[random.nextInt(maxValueSize) + 1];
+            random.nextBytes(value);
+            previousKey = currentKey;
+            return KV.of(currentKey, value);
+          }
+
+          @Override
+          public void remove() {
+            throw new UnsupportedOperationException();
+          }
+        };
+      }
+    };
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkTest.java
new file mode 100644
index 0000000000000..621305912b7cf
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkTest.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink.SinkWriter;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.Random;
+
+/**
+ * Tests for {@link IsmSink}.
+ * Note that {@link IsmReaderTest} covers reading/writing tests. This tests
+ * error cases for the {@link IsmSink}.
+ */
+@RunWith(JUnit4.class)
+public class IsmSinkTest {
+  @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+
+  @Test
+  public void testWriteOutOfOrderKeysIsError() throws Exception {
+    IsmSink<byte[], byte[]> sink =
+        new IsmSink<>(tmpFolder.newFile().getPath(), ByteArrayCoder.of(), ByteArrayCoder.of());
+    SinkWriter<WindowedValue<KV<byte[], byte[]>>> sinkWriter = sink.writer();
+    sinkWriter.add(WindowedValue.valueInGlobalWindow(KV.of(new byte[]{ 0x01 }, new byte[0])));
+
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("expects keys to be written in strictly increasing order");
+    sinkWriter.add(WindowedValue.valueInGlobalWindow(KV.of(new byte[]{ 0x00 }, new byte[0])));
+  }
+
+  @Test
+  public void testWriteEqualsKeysIsError() throws Exception {
+    IsmSink<byte[], byte[]> sink =
+        new IsmSink<>(tmpFolder.newFile().getPath(), ByteArrayCoder.of(), ByteArrayCoder.of());
+    SinkWriter<WindowedValue<KV<byte[], byte[]>>> sinkWriter = sink.writer();
+    sinkWriter.add(WindowedValue.valueInGlobalWindow(KV.of(new byte[]{ 0x00 }, new byte[0])));
+
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("expects keys to be written in strictly increasing order");
+    sinkWriter.add(WindowedValue.valueInGlobalWindow(KV.of(new byte[]{ 0x00 }, new byte[0])));
+  }
+
+  @Test
+  public void testWriteKeyWhichIsProperPrefixOfPreviousKeyIsError() throws Exception {
+    IsmSink<byte[], byte[]> sink =
+        new IsmSink<>(tmpFolder.newFile().getPath(), ByteArrayCoder.of(), ByteArrayCoder.of());
+    SinkWriter<WindowedValue<KV<byte[], byte[]>>> sinkWriter = sink.writer();
+    sinkWriter.add(WindowedValue.valueInGlobalWindow(KV.of(new byte[]{ 0x00, 0x00 }, new byte[0])));
+
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("expects keys to be written in strictly increasing order");
+    sinkWriter.add(WindowedValue.valueInGlobalWindow(KV.of(new byte[]{ 0x00 }, new byte[0])));
+  }
+
+  @Test
+  public void testWrite() throws Exception {
+    Random random = new Random(23498321490L);
+    for (int i : Arrays.asList(4, 8, 12)) {
+      int minElements = (int) Math.pow(2, i);
+      // Generates between 2^i and 2^(i + 1) elements.
+      IsmReaderTest.runTestRead(
+          IsmReaderTest.dataGenerator(minElements + random.nextInt(minElements),
+              8 /* approximate key size */, 8 /* max value size */), tmpFolder.newFile());
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RandomAccessDataTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RandomAccessDataTest.java
new file mode 100644
index 0000000000000..a9e522370241c
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RandomAccessDataTest.java
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.RandomAccessData.RandomAccessDataCoder;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
+import java.util.Arrays;
+
+/**
+ * Tests for {@link RandomAccessData}.
+ */
+@RunWith(JUnit4.class)
+public class RandomAccessDataTest {
+  private static final byte[] TEST_DATA_A = new byte[]{ 0x01, 0x02, 0x03 };
+  private static final byte[] TEST_DATA_B = new byte[]{ 0x06, 0x05, 0x04, 0x03 };
+
+  @Test
+  public void testCoder() throws Exception {
+    RandomAccessData streamA = new RandomAccessData();
+    streamA.asOutputStream().write(TEST_DATA_A);
+    RandomAccessData streamB = new RandomAccessData();
+    streamB.asOutputStream().write(TEST_DATA_A);
+    CoderProperties.coderDecodeEncodeEqual(RandomAccessDataCoder.of(), streamA);
+    CoderProperties.coderDeterministic(RandomAccessDataCoder.of(), streamA, streamB);
+    CoderProperties.coderConsistentWithEquals(RandomAccessDataCoder.of(), streamA, streamB);
+    CoderProperties.coderSerializable(RandomAccessDataCoder.of());
+    CoderProperties.structuralValueConsistentWithEquals(
+        RandomAccessDataCoder.of(), streamA, streamB);
+    assertTrue(RandomAccessDataCoder.of().isRegisterByteSizeObserverCheap(streamA, Context.NESTED));
+    assertTrue(RandomAccessDataCoder.of().isRegisterByteSizeObserverCheap(streamA, Context.OUTER));
+    assertEquals(4, RandomAccessDataCoder.of().getEncodedElementByteSize(streamA, Context.NESTED));
+    assertEquals(3, RandomAccessDataCoder.of().getEncodedElementByteSize(streamA, Context.OUTER));
+  }
+
+  @Test
+  public void testLexicographicalComparator() throws Exception {
+    RandomAccessData streamA = new RandomAccessData();
+    streamA.asOutputStream().write(TEST_DATA_A);
+    RandomAccessData streamB = new RandomAccessData();
+    streamB.asOutputStream().write(TEST_DATA_B);
+    assertTrue(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(streamA, streamB) < 0);
+    assertTrue(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(streamB, streamA) > 0);
+    assertTrue(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(streamB, streamB) == 0);
+  }
+
+  @Test
+  public void testEqualsAndHashCode() throws Exception {
+    // Test that equality by reference works
+    RandomAccessData streamA = new RandomAccessData();
+    streamA.asOutputStream().write(TEST_DATA_A);
+    assertEquals(streamA, streamA);
+    assertEquals(streamA.hashCode(), streamA.hashCode());
+
+    // Test different objects containing the same data are the same
+    RandomAccessData streamACopy = new RandomAccessData();
+    streamACopy.asOutputStream().write(TEST_DATA_A);
+    assertEquals(streamA, streamACopy);
+    assertEquals(streamA.hashCode(), streamACopy.hashCode());
+
+    // Test same length streams with different data differ
+    RandomAccessData streamB = new RandomAccessData();
+    streamB.asOutputStream().write(new byte[]{ 0x01, 0x02, 0x04 });
+    assertNotEquals(streamA, streamB);
+    assertNotEquals(streamA.hashCode(), streamB.hashCode());
+
+    // Test different length streams differ
+    streamB.asOutputStream().write(TEST_DATA_B);
+    assertNotEquals(streamA, streamB);
+    assertNotEquals(streamA.hashCode(), streamB.hashCode());
+  }
+
+  @Test
+  public void testResetTo() throws Exception {
+    RandomAccessData stream = new RandomAccessData();
+    stream.asOutputStream().write(TEST_DATA_A);
+    stream.resetTo(1);
+    assertEquals(1, stream.size());
+    stream.asOutputStream().write(TEST_DATA_A);
+    assertArrayEquals(new byte[]{ 0x01, 0x01, 0x02, 0x03 },
+        Arrays.copyOf(stream.array(), stream.size()));
+  }
+
+  @Test
+  public void testAsInputStream() throws Exception {
+    RandomAccessData stream = new RandomAccessData();
+    stream.asOutputStream().write(TEST_DATA_A);
+    InputStream in = stream.asInputStream(1, 1);
+    assertEquals(0x02, in.read());
+    assertEquals(-1, in.read());
+    in.close();
+  }
+
+  @Test
+  public void testReadFrom() throws Exception {
+    ByteArrayInputStream bais = new ByteArrayInputStream(TEST_DATA_A);
+    RandomAccessData stream = new RandomAccessData();
+    stream.readFrom(bais, 3, 2);
+    assertArrayEquals(new byte[]{ 0x00, 0x00, 0x00, 0x01, 0x02 },
+        Arrays.copyOf(stream.array(), stream.size()));
+    bais.close();
+  }
+
+  @Test
+  public void testWriteTo() throws Exception {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    RandomAccessData stream = new RandomAccessData();
+    stream.asOutputStream().write(TEST_DATA_B);
+    stream.writeTo(baos, 1, 2);
+    assertArrayEquals(new byte[]{ 0x05, 0x04 }, baos.toByteArray());
+    baos.close();
+  }
+
+  @Test
+  public void testThatRandomAccessDataGrowsWhenResettingToPositionBeyondEnd() throws Exception {
+    RandomAccessData stream = new RandomAccessData(0);
+    assertArrayEquals(new byte[0], stream.array());
+    stream.resetTo(3);  // force resize
+    assertArrayEquals(new byte[]{ 0x00, 0x00, 0x00 }, stream.array());
+  }
+
+  @Test
+  public void testThatRandomAccessDataGrowsWhenReading() throws Exception {
+    RandomAccessData stream = new RandomAccessData(0);
+    assertArrayEquals(new byte[0], stream.array());
+    stream.readFrom(new ByteArrayInputStream(TEST_DATA_A), 0, TEST_DATA_A.length);
+    assertArrayEquals(TEST_DATA_A,
+        Arrays.copyOf(stream.array(), TEST_DATA_A.length));
+  }
+
+}
+

From 356364ecec5f49c5be17e830c35e4cefa8f6cb90 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 6 Jan 2016 09:16:09 -0800
Subject: [PATCH 1269/1541] FlatMapElementsJava8Test: fix a typo

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111515621
---
 .../cloud/dataflow/sdk/transforms/FlatMapElementsJava8Test.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsJava8Test.java b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsJava8Test.java
index 1f00cc9d28643..e0b946b77f40e 100644
--- a/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsJava8Test.java
+++ b/sdk/src/test/java8/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsJava8Test.java
@@ -43,7 +43,7 @@ public class FlatMapElementsJava8Test implements Serializable {
 
   /**
    * Basic test of {@link FlatMapElements} with a lambda (which is instantiated as a
-   * {@link SerializableFunctaion}).
+   * {@link SerializableFunction}).
    */
   @Test
   public void testFlatMapBasic() throws Exception {

From cfacdd9489bc3682c30248c5c5216d423e628d0d Mon Sep 17 00:00:00 2001
From: hdeist <hdeist@google.com>
Date: Wed, 6 Jan 2016 10:05:57 -0800
Subject: [PATCH 1270/1541] Add support for testing timestamps in DoFnTester

Add takeOutputElementsWithTimestamp() and
peekOutputElementsWithTimestamp() methods to DoFnTester so that
emitted timestamps may be tested.

----Release Notes----
This change introduces the DoFnTester.peekOutputElementsWithTimestamp() and DoFnTester.takeOutputElementsWithTimestamp() methods, which permit testing the result of ProcessContext.outputWithTimestamp() within DoFn.processElement().
They work in a manner analogous to peekOutputElement() and takeOutputElement(), producing a List of OutputElementWithTimestamp objects enveloping the timestamp as well as the output element.

These methods are currently designated @Experimental, meaning they are subject to non backward compatible change in the future.

See DoFnTesterTest for usage examples.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111519867
---
 .../dataflow/sdk/transforms/DoFnTester.java   | 90 +++++++++++++++++--
 .../sdk/transforms/DoFnTesterTest.java        | 43 ++++++++-
 2 files changed, 126 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index e24d99448a307..2903c48cf277e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
@@ -31,9 +32,12 @@
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.cloud.dataflow.sdk.values.TupleTagList;
 import com.google.common.base.Function;
+import com.google.common.base.Objects;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 
+import org.joda.time.Instant;
+
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -243,21 +247,42 @@ public void finishBundle() {
    * @see #takeOutputElements
    * @see #clearOutputElements
    *
-   * <p>TODO: provide accessors that take and return {@code WindowedValue}s
-   * in order to test timestamp- and window-sensitive DoFns.
    */
   public List<OutputT> peekOutputElements() {
     // TODO: Should we return an unmodifiable list?
     return Lists.transform(
-        outputManager.getOutput(mainOutputTag),
-        new Function<Object, OutputT>() {
+        peekOutputElementsWithTimestamp(),
+        new Function<OutputElementWithTimestamp<OutputT>, OutputT>() {
           @Override
           @SuppressWarnings("unchecked")
-          public OutputT apply(Object input) {
-            return ((WindowedValue<OutputT>) input).getValue();
+          public OutputT apply(OutputElementWithTimestamp<OutputT> input) {
+            return input.getValue();
           }
         });
+  }
 
+  /**
+   * Returns the elements output so far to the main output with associated timestamps.  Does not
+   * clear them, so subsequent calls will continue to include these.
+   * elements.
+   *
+   * @see #takeOutputElementsWithTimestamp
+   * @see #clearOutputElements
+   */
+  @Experimental
+  public List<OutputElementWithTimestamp<OutputT>> peekOutputElementsWithTimestamp() {
+    // TODO: Should we return an unmodifiable list?
+    return Lists.transform(
+        outputManager.getOutput(mainOutputTag),
+        new Function<Object, OutputElementWithTimestamp<OutputT>>() {
+          @Override
+          @SuppressWarnings("unchecked")
+          public OutputElementWithTimestamp<OutputT> apply(Object input) {
+            return new OutputElementWithTimestamp<OutputT>(
+                ((WindowedValue<OutputT>) input).getValue(),
+                ((WindowedValue<OutputT>) input).getTimestamp());
+          }
+        });
   }
 
   /**
@@ -281,6 +306,22 @@ public List<OutputT> takeOutputElements() {
     return resultElems;
   }
 
+  /**
+   * Returns the elements output so far to the main output with associated timestamps.
+   * Clears the list so these elements don't appear in future calls.
+   *
+   * @see #peekOutputElementsWithTimestamp
+   * @see #takeOutputElements
+   * @see #clearOutputElements
+   */
+  @Experimental
+  public List<OutputElementWithTimestamp<OutputT>> takeOutputElementsWithTimestamp() {
+    List<OutputElementWithTimestamp<OutputT>> resultElems =
+        new ArrayList<>(peekOutputElementsWithTimestamp());
+    clearOutputElements();
+    return resultElems;
+  }
+
   /**
    * Returns the elements output so far to the side output with the
    * given tag.  Does not clear them, so subsequent calls will
@@ -294,6 +335,7 @@ public <T> List<T> peekSideOutputElements(TupleTag<T> tag) {
     return Lists.transform(
         outputManager.getOutput(tag),
         new Function<WindowedValue<T>, T>() {
+          @SuppressWarnings("unchecked")
           @Override
           public T apply(WindowedValue<T> input) {
             return input.getValue();
@@ -333,6 +375,42 @@ public <AggregateT> AggregateT getAggregatorValue(Aggregator<?, AggregateT> agg)
     return counter.getAggregate();
   }
 
+  /**
+   * Holder for an OutputElement along with its associated timestamp.
+   */
+  @Experimental
+  public static class OutputElementWithTimestamp<OutputT> {
+    private final OutputT value;
+    private final Instant timestamp;
+
+    OutputElementWithTimestamp(OutputT value, Instant timestamp) {
+      this.value = value;
+      this.timestamp = timestamp;
+    }
+
+    OutputT getValue() {
+      return value;
+    }
+
+    Instant getTimestamp() {
+      return timestamp;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (!(obj instanceof OutputElementWithTimestamp)) {
+        return false;
+      }
+      OutputElementWithTimestamp<?> other = (OutputElementWithTimestamp<?>) obj;
+      return Objects.equal(other.value, value) && Objects.equal(other.timestamp, timestamp);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(value, timestamp);
+    }
+  }
+
   /////////////////////////////////////////////////////////////////////////////
 
   /** The possible states of processing a DoFn. */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnTesterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnTesterTest.java
index 32b38acffbd5f..89588104913d8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnTesterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnTesterTest.java
@@ -21,6 +21,9 @@
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester.OutputElementWithTimestamp;
+
+import org.joda.time.Instant;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -132,6 +135,43 @@ public void processBatch() {
     assertTrue(deserializedDoFn.wasFinishBundleCalled());
   }
 
+  @Test
+  public void processElementWithTimestamp() {
+    CounterDoFn counterDoFn = new CounterDoFn();
+    DoFnTester<Long, String> tester = DoFnTester.of(counterDoFn);
+
+    tester.processElement(1L);
+    tester.processElement(2L);
+
+    List<OutputElementWithTimestamp<String>> peek = tester.peekOutputElementsWithTimestamp();
+    OutputElementWithTimestamp<String> one =
+        new OutputElementWithTimestamp<>("1", new Instant(1000L));
+    OutputElementWithTimestamp<String> two =
+        new OutputElementWithTimestamp<>("2", new Instant(2000L));
+    assertThat(peek, hasItems(one, two));
+
+    tester.processElement(3L);
+    tester.processElement(4L);
+
+    OutputElementWithTimestamp<String> three =
+        new OutputElementWithTimestamp<>("3", new Instant(3000L));
+    OutputElementWithTimestamp<String> four =
+        new OutputElementWithTimestamp<>("4", new Instant(4000L));
+    peek = tester.peekOutputElementsWithTimestamp();
+    assertThat(peek, hasItems(one, two, three, four));
+    List<OutputElementWithTimestamp<String>> take = tester.takeOutputElementsWithTimestamp();
+    assertThat(take, hasItems(one, two, three, four));
+
+    // Following takeOutputElementsWithTimestamp(), neither takeOutputElementsWithTimestamp()
+    // nor peekOutputElementsWithTimestamp() return anything.
+    assertTrue(tester.takeOutputElementsWithTimestamp().isEmpty());
+    assertTrue(tester.peekOutputElementsWithTimestamp().isEmpty());
+
+    // peekOutputElements() and takeOutputElements() also return nothing.
+    assertTrue(tester.peekOutputElements().isEmpty());
+    assertTrue(tester.takeOutputElements().isEmpty());
+  }
+
   @Test
   public void getAggregatorValuesShouldGetValueOfCounter() {
     CounterDoFn counterDoFn = new CounterDoFn();
@@ -191,7 +231,8 @@ public void startBundle(Context c) {
     @Override
     public void processElement(ProcessContext c) throws Exception {
       agg.addValue(c.element());
-      c.output(c.element().toString());
+      Instant instant = new Instant(1000L * c.element());
+      c.outputWithTimestamp(c.element().toString(), instant);
     }
 
     @Override

From d514f1184a96cc05893d4852f9094d2245ab5507 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Wed, 6 Jan 2016 10:47:31 -0800
Subject: [PATCH 1271/1541] Read bounded PubSubIO in examples pipelines

Test examples pipelines with DirectPipelineRunner.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111524065
---
 .../dataflow/examples/WindowedWordCount.java  |   7 +-
 .../common/DataflowExampleOptions.java        |   5 +
 .../examples/common/DataflowExampleUtils.java | 135 +++++++++++++-----
 ...mplePubsubTopicAndSubscriptionOptions.java |  44 ++++++
 .../common/ExamplePubsubTopicOptions.java     |   5 -
 .../examples/complete/AutoComplete.java       |   2 +-
 .../examples/complete/TrafficMaxLaneFlow.java | 130 ++++++++++-------
 .../examples/complete/TrafficRoutes.java      | 131 ++++++++++-------
 .../examples/cookbook/TriggerExample.java     |   8 +-
 9 files changed, 321 insertions(+), 146 deletions(-)
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicAndSubscriptionOptions.java

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
index fcda15eeca8f7..dddab3a6927f2 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
@@ -20,7 +20,10 @@
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
 import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
+import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
@@ -178,8 +181,8 @@ private static TableReference getTableReference(Options options) {
    * table and the PubSub topic, as well as the {@link WordCount.WordCountOptions} support for
    * specification of the input file.
    */
-  public static interface Options
-        extends WordCount.WordCountOptions, DataflowExampleUtils.DataflowExampleUtilsOptions {
+  public static interface Options extends WordCount.WordCountOptions,
+      DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
     @Description("Fixed window duration, in minutes")
     @Default.Integer(WINDOW_SIZE)
     Integer getWindowSize();
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java
index 28b0818f64a70..606bfb4c03e9d 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java
@@ -26,4 +26,9 @@ public interface DataflowExampleOptions extends DataflowPipelineOptions {
   @Default.Boolean(false)
   boolean getKeepJobsRunning();
   void setKeepJobsRunning(boolean keepJobsRunning);
+
+  @Description("Number of workers to use when executing the injector pipeline")
+  @Default.Integer(1)
+  int getInjectorNumWorkers();
+  void setInjectorNumWorkers(int numWorkers);
 }
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
index 0183d8eccbbd9..677341f9b0eee 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
@@ -29,6 +29,7 @@
 import com.google.api.services.bigquery.model.TableSchema;
 import com.google.api.services.dataflow.Dataflow;
 import com.google.api.services.pubsub.Pubsub;
+import com.google.api.services.pubsub.model.Subscription;
 import com.google.api.services.pubsub.model.Topic;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult;
@@ -39,9 +40,13 @@
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
 import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.cloud.dataflow.sdk.values.PBegin;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.Strings;
 import com.google.common.base.Throwables;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
@@ -69,13 +74,6 @@ public class DataflowExampleUtils {
   private Set<DataflowPipelineJob> jobsToCancel = Sets.newHashSet();
   private List<String> pendingMessages = Lists.newArrayList();
 
-  /**
-   * Define an interface that supports the PubSub and BigQuery example options.
-   */
-  public static interface DataflowExampleUtilsOptions
-        extends DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
-  }
-
   public DataflowExampleUtils(DataflowPipelineOptions options) {
     this.options = options;
   }
@@ -102,7 +100,7 @@ public void setup() throws IOException {
     try {
       do {
         try {
-          setupPubsubTopic();
+          setupPubsub();
           setupBigQueryTable();
           return;
         } catch (GoogleJsonResponseException e) {
@@ -133,13 +131,21 @@ public void setupResourcesAndRunner(boolean isUnbounded) throws IOException {
    *
    * @throws IOException if there is a problem setting up the Pub/Sub topic
    */
-  public void setupPubsubTopic() throws IOException {
-    ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
-    if (!pubsubTopicOptions.getPubsubTopic().isEmpty()) {
-      pendingMessages.add("*******************Set Up Pubsub Topic*********************");
-      setupPubsubTopic(pubsubTopicOptions.getPubsubTopic());
+  public void setupPubsub() throws IOException {
+    ExamplePubsubTopicAndSubscriptionOptions pubsubOptions =
+        options.as(ExamplePubsubTopicAndSubscriptionOptions.class);
+    if (!pubsubOptions.getPubsubTopic().isEmpty()) {
+      pendingMessages.add("**********************Set Up Pubsub************************");
+      setupPubsubTopic(pubsubOptions.getPubsubTopic());
       pendingMessages.add("The Pub/Sub topic has been set up for this example: "
-          + pubsubTopicOptions.getPubsubTopic());
+          + pubsubOptions.getPubsubTopic());
+
+      if (!pubsubOptions.getPubsubSubscription().isEmpty()) {
+        setupPubsubSubscription(
+            pubsubOptions.getPubsubTopic(), pubsubOptions.getPubsubSubscription());
+        pendingMessages.add("The Pub/Sub subscription has been set up for this example: "
+            + pubsubOptions.getPubsubSubscription());
+      }
     }
   }
 
@@ -175,15 +181,26 @@ public void setupBigQueryTable() throws IOException {
    */
   private void tearDown() {
     pendingMessages.add("*************************Tear Down*************************");
-    ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
-    if (!pubsubTopicOptions.getPubsubTopic().isEmpty()) {
+    ExamplePubsubTopicAndSubscriptionOptions pubsubOptions =
+        options.as(ExamplePubsubTopicAndSubscriptionOptions.class);
+    if (!pubsubOptions.getPubsubTopic().isEmpty()) {
       try {
-        deletePubsubTopic(pubsubTopicOptions.getPubsubTopic());
+        deletePubsubTopic(pubsubOptions.getPubsubTopic());
         pendingMessages.add("The Pub/Sub topic has been deleted: "
-            + pubsubTopicOptions.getPubsubTopic());
+            + pubsubOptions.getPubsubTopic());
       } catch (IOException e) {
         pendingMessages.add("Failed to delete the Pub/Sub topic : "
-            + pubsubTopicOptions.getPubsubTopic());
+            + pubsubOptions.getPubsubTopic());
+      }
+      if (!pubsubOptions.getPubsubSubscription().isEmpty()) {
+        try {
+          deletePubsubSubscription(pubsubOptions.getPubsubSubscription());
+          pendingMessages.add("The Pub/Sub subscription has been deleted: "
+              + pubsubOptions.getPubsubSubscription());
+        } catch (IOException e) {
+          pendingMessages.add("Failed to delete the Pub/Sub subscription : "
+              + pubsubOptions.getPubsubSubscription());
+        }
       }
     }
 
@@ -237,6 +254,18 @@ private void setupPubsubTopic(String topic) throws IOException {
     }
   }
 
+  private void setupPubsubSubscription(String topic, String subscription) throws IOException {
+    if (pubsubClient == null) {
+      pubsubClient = Transport.newPubsubClient(options).build();
+    }
+    if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) == null) {
+      Subscription subInfo = new Subscription()
+        .setAckDeadlineSeconds(60)
+        .setTopic(topic);
+      pubsubClient.projects().subscriptions().create(subscription, subInfo).execute();
+    }
+  }
+
   /**
    * Deletes the Google Cloud Pub/Sub topic.
    *
@@ -251,6 +280,20 @@ private void deletePubsubTopic(String topic) throws IOException {
     }
   }
 
+  /**
+   * Deletes the Google Cloud Pub/Sub subscription.
+   *
+   * @throws IOException if there is a problem deleting the Pub/Sub subscription
+   */
+  private void deletePubsubSubscription(String subscription) throws IOException {
+    if (pubsubClient == null) {
+      pubsubClient = Transport.newPubsubClient(options).build();
+    }
+    if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) != null) {
+      pubsubClient.projects().subscriptions().delete(subscription).execute();
+    }
+  }
+
   /**
    * If this is an unbounded (streaming) pipeline, and both inputFile and pubsub topic are defined,
    * start an 'injector' pipeline that publishes the contents of the file to the given topic, first
@@ -259,9 +302,8 @@ private void deletePubsubTopic(String topic) throws IOException {
   public void startInjectorIfNeeded(String inputFile) {
     ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
     if (pubsubTopicOptions.isStreaming()
-        && inputFile != null && !inputFile.isEmpty()
-        && pubsubTopicOptions.getPubsubTopic() != null
-        && !pubsubTopicOptions.getPubsubTopic().isEmpty()) {
+        && !Strings.isNullOrEmpty(inputFile)
+        && !Strings.isNullOrEmpty(pubsubTopicOptions.getPubsubTopic())) {
       runInjectorPipeline(inputFile, pubsubTopicOptions.getPubsubTopic());
     }
   }
@@ -272,11 +314,7 @@ public void startInjectorIfNeeded(String inputFile) {
    * flag value.
    */
   public void setupRunner() {
-    if (options.isStreaming()) {
-      if (options.getRunner() == DirectPipelineRunner.class) {
-        throw new IllegalArgumentException(
-          "Processing of unbounded input sources is not supported with the DirectPipelineRunner.");
-      }
+    if (options.isStreaming() && options.getRunner() != DirectPipelineRunner.class) {
       // In order to cancel the pipelines automatically,
       // {@literal DataflowPipelineRunner} is forced to be used.
       options.setRunner(DataflowPipelineRunner.class);
@@ -284,32 +322,53 @@ public void setupRunner() {
   }
 
   /**
-   * Runs the batch injector for the streaming pipeline.
+   * Runs a batch pipeline to inject data into the PubSubIO input topic.
    *
    * <p>The injector pipeline will read from the given text file, and inject data
    * into the Google Cloud Pub/Sub topic.
    */
   public void runInjectorPipeline(String inputFile, String topic) {
+    runInjectorPipeline(TextIO.Read.from(inputFile), topic, null);
+  }
+
+  /**
+   * Runs a batch pipeline to inject data into the PubSubIO input topic.
+   *
+   * <p>The injector pipeline will read from the given source, and inject data
+   * into the Google Cloud Pub/Sub topic.
+   */
+  public void runInjectorPipeline(PTransform<? super PBegin, PCollection<String>> readSource,
+                                  String topic,
+                                  String pubsubTimestampTabelKey) {
+    PubsubFileInjector.Bound injector;
+    if (Strings.isNullOrEmpty(pubsubTimestampTabelKey)) {
+      injector = PubsubFileInjector.publish(topic);
+    } else {
+      injector = PubsubFileInjector.withTimestampLabelKey(pubsubTimestampTabelKey).publish(topic);
+    }
     DataflowPipelineOptions copiedOptions = options.cloneAs(DataflowPipelineOptions.class);
     copiedOptions.setStreaming(false);
-    copiedOptions.setNumWorkers(
-        options.as(ExamplePubsubTopicOptions.class).getInjectorNumWorkers());
+    copiedOptions.setNumWorkers(options.as(DataflowExampleOptions.class).getInjectorNumWorkers());
     copiedOptions.setJobName(options.getJobName() + "-injector");
     Pipeline injectorPipeline = Pipeline.create(copiedOptions);
-    injectorPipeline.apply(TextIO.Read.from(inputFile))
+    injectorPipeline.apply(readSource)
                     .apply(IntraBundleParallelization
-                        .of(PubsubFileInjector.publish(topic))
+                        .of(injector)
                         .withMaxParallelism(20));
-    DataflowPipelineJob injectorJob = (DataflowPipelineJob) injectorPipeline.run();
-    jobsToCancel.add(injectorJob);
+    PipelineResult result = injectorPipeline.run();
+    if (result instanceof DataflowPipelineJob) {
+      jobsToCancel.add(((DataflowPipelineJob) result));
+    }
   }
 
   /**
-   * Runs the provided injector pipeline for the streaming pipeline.
+   * Runs the provided pipeline to inject data into the PubSubIO input topic.
    */
   public void runInjectorPipeline(Pipeline injectorPipeline) {
-    DataflowPipelineJob injectorJob = (DataflowPipelineJob) injectorPipeline.run();
-    jobsToCancel.add(injectorJob);
+    PipelineResult result = injectorPipeline.run();
+    if (result instanceof DataflowPipelineJob) {
+      jobsToCancel.add(((DataflowPipelineJob) result));
+    }
   }
 
   /**
@@ -339,6 +398,8 @@ public void waitToFinish(PipelineResult result) {
     } else {
       // Do nothing if the given PipelineResult doesn't support waitToFinish(),
       // such as EvaluationResults returned by DirectPipelineRunner.
+      tearDown();
+      printPendingMessages();
     }
   }
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicAndSubscriptionOptions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicAndSubscriptionOptions.java
new file mode 100644
index 0000000000000..d7bd4b8edc3d8
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicAndSubscriptionOptions.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.common;
+
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+/**
+ * Options that can be used to configure Pub/Sub topic/subscription in Dataflow examples.
+ */
+public interface ExamplePubsubTopicAndSubscriptionOptions extends ExamplePubsubTopicOptions {
+  @Description("Pub/Sub subscription")
+  @Default.InstanceFactory(PubsubSubscriptionFactory.class)
+  String getPubsubSubscription();
+  void setPubsubSubscription(String subscription);
+
+  /**
+   * Returns a default Pub/Sub subscription based on the project and the job names.
+   */
+  static class PubsubSubscriptionFactory implements DefaultValueFactory<String> {
+    @Override
+    public String create(PipelineOptions options) {
+      DataflowPipelineOptions dataflowPipelineOptions =
+          options.as(DataflowPipelineOptions.class);
+      return "projects/" + dataflowPipelineOptions.getProject()
+          + "/subscriptions/" + dataflowPipelineOptions.getJobName();
+    }
+  }
+}
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
index 17c1bd284c527..4bedf318ef5af 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
@@ -29,11 +29,6 @@ public interface ExamplePubsubTopicOptions extends DataflowPipelineOptions {
   String getPubsubTopic();
   void setPubsubTopic(String topic);
 
-  @Description("Number of workers to use when executing the injector pipeline")
-  @Default.Integer(1)
-  int getInjectorNumWorkers();
-  void setInjectorNumWorkers(int numWorkers);
-
   /**
    * Returns a default Pub/Sub topic based on the project and the job names.
    */
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
index e8c6d405b7638..1bccc4ace2785 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
@@ -457,7 +457,7 @@ public static void main(String[] args) throws IOException {
     if (options.isStreaming()) {
       Preconditions.checkArgument(
           !options.getOutputToDatastore(), "DatastoreIO is not supported in streaming.");
-      dataflowUtils.setupPubsubTopic();
+      dataflowUtils.setupPubsub();
 
       readSource = PubsubIO.Read.topic(options.getPubsubTopic());
       windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5));
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
index 31fef75d26b53..2d5425208bdbb 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
@@ -23,7 +23,7 @@
 import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
 import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
 import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
-import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
+import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicAndSubscriptionOptions;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
@@ -34,7 +34,6 @@
 import com.google.cloud.dataflow.sdk.options.Default;
 import com.google.cloud.dataflow.sdk.options.Description;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -43,7 +42,9 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.Strings;
 
 import org.apache.avro.reflect.Nullable;
 import org.joda.time.Duration;
@@ -89,6 +90,9 @@
  */
 public class TrafficMaxLaneFlow {
 
+  private static final String PUBSUB_TIMESTAMP_LABEL_KEY = "timestamp_ms";
+  private static final Integer VALID_INPUTS = 4999;
+
   static final int WINDOW_DURATION = 60;  // Default sliding window duration in minutes
   static final int WINDOW_SLIDE_EVERY = 5;  // Default window 'slide every' setting in minutes
 
@@ -153,6 +157,27 @@ public Integer getTotalFlow() {
     }
   }
 
+  /**
+   * Extract the timestamp field from the input string, and use it as the element timestamp.
+   */
+  static class ExtractTimestamps extends DoFn<String, String> {
+    private static final DateTimeFormatter dateTimeFormat =
+        DateTimeFormat.forPattern("MM/dd/yyyy HH:mm:ss");
+
+    @Override
+    public void processElement(DoFn<String, String>.ProcessContext c) throws Exception {
+      String[] items = c.element().split(",");
+      if (items.length > 0) {
+        try {
+          String timestamp = items[0];
+          c.outputWithTimestamp(c.element(), new Instant(dateTimeFormat.parseMillis(timestamp)));
+        } catch (IllegalArgumentException e) {
+          // Skip the invalid input.
+        }
+      }
+    }
+  }
+
   /**
    * Extract flow information for each of the 8 lanes in a reading, and output as separate tuples.
    * This will let us determine which lane has the max flow for that station over the span of the
@@ -161,14 +186,6 @@ public Integer getTotalFlow() {
    * point comes from.
    */
   static class ExtractFlowInfoFn extends DoFn<String, KV<String, LaneInfo>> {
-    private static final DateTimeFormatter dateTimeFormat =
-        DateTimeFormat.forPattern("MM/dd/yyyy HH:mm:ss");
-
-    private final boolean outputTimestamp;
-
-    public ExtractFlowInfoFn(boolean outputTimestamp) {
-      this.outputTimestamp = outputTimestamp;
-    }
 
     @Override
     public void processElement(ProcessContext c) {
@@ -192,16 +209,7 @@ public void processElement(ProcessContext c) {
         }
         LaneInfo laneInfo = new LaneInfo(stationId, "lane" + i, direction, freeway, timestamp,
             laneFlow, laneAvgOccupancy, laneAvgSpeed, totalFlow);
-        if (outputTimestamp) {
-          try {
-            c.outputWithTimestamp(KV.of(stationId, laneInfo),
-                                  new Instant(dateTimeFormat.parseMillis(timestamp)));
-          } catch (IllegalArgumentException e) {
-            // Skip the invalid input.
-          }
-        } else {
-          c.output(KV.of(stationId, laneInfo));
-        }
+        c.output(KV.of(stationId, laneInfo));
       }
     }
   }
@@ -291,13 +299,28 @@ public PCollection<TableRow> apply(PCollection<KV<String, LaneInfo>> flowInfo) {
     }
   }
 
+  static class ReadFileAndExtractTimestamps extends PTransform<PBegin, PCollection<String>> {
+    private final String inputFile;
+
+    public ReadFileAndExtractTimestamps(String inputFile) {
+      this.inputFile = inputFile;
+    }
+
+    @Override
+    public PCollection<String> apply(PBegin begin) {
+      return begin
+          .apply(TextIO.Read.from(inputFile))
+          .apply(ParDo.of(new ExtractTimestamps()));
+    }
+  }
+
   /**
     * Options supported by {@link TrafficMaxLaneFlow}.
     *
     * <p>Inherits standard configuration options.
     */
-  private interface TrafficMaxLaneFlowOptions
-      extends DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
+  private interface TrafficMaxLaneFlowOptions extends DataflowExampleOptions,
+      ExamplePubsubTopicAndSubscriptionOptions, ExampleBigQueryTableOptions {
         @Description("Input file to inject to Pub/Sub topic")
     @Default.String("gs://dataflow-samples/traffic_sensor/"
         + "Freeways-5Minaa2010-01-01_to_2010-02-15_test2.csv")
@@ -313,6 +336,11 @@ private interface TrafficMaxLaneFlowOptions
     @Default.Integer(WINDOW_SLIDE_EVERY)
     Integer getWindowSlideEvery();
     void setWindowSlideEvery(Integer value);
+
+    @Description("Whether to run the pipeline with unbounded input")
+    @Default.Boolean(false)
+    boolean isUnbounded();
+    void setUnbounded(boolean value);
   }
 
   /**
@@ -324,15 +352,9 @@ public static void main(String[] args) throws IOException {
     TrafficMaxLaneFlowOptions options = PipelineOptionsFactory.fromArgs(args)
         .withValidation()
         .as(TrafficMaxLaneFlowOptions.class);
-    if (options.isStreaming()) {
-      // In order to cancel the pipelines automatically,
-      // {@literal DataflowPipelineRunner} is forced to be used.
-      options.setRunner(DataflowPipelineRunner.class);
-    }
     options.setBigQuerySchema(FormatMaxesFn.getSchema());
     // Using DataflowExampleUtils to set up required resources.
-    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
-    dataflowUtils.setup();
+    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options, options.isUnbounded());
 
     Pipeline pipeline = Pipeline.create(options);
     TableReference tableRef = new TableReference();
@@ -340,35 +362,47 @@ public static void main(String[] args) throws IOException {
     tableRef.setDatasetId(options.getBigQueryDataset());
     tableRef.setTableId(options.getBigQueryTable());
 
-    PCollection<KV<String, LaneInfo>> input;
-    if (options.isStreaming()) {
-      input = pipeline
-          .apply(PubsubIO.Read.topic(options.getPubsubTopic()))
-          // row... => <stationId, LaneInfo> ...
-          .apply(ParDo.of(new ExtractFlowInfoFn(false /* outputTimestamp */)));
+    PCollection<String> input;
+    if (options.isUnbounded()) {
+      // Read unbounded PubSubIO.
+      input = pipeline.apply(PubsubIO.Read
+          .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
+          .subscription(options.getPubsubSubscription()));
     } else {
-      input = pipeline
-          .apply(TextIO.Read.from(options.getInputFile()))
-          // row... => <stationId, LaneInfo> ...
-          .apply(ParDo.of(new ExtractFlowInfoFn(true /* outputTimestamp */)));
+      // Read bounded PubSubIO.
+      input = pipeline.apply(PubsubIO.Read
+          .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
+          .subscription(options.getPubsubSubscription()).maxNumRecords(VALID_INPUTS));
+
+      // To read bounded TextIO files, use:
+      // input = pipeline.apply(new ReadFileAndExtractTimestamps(options.getInputFile()));
     }
-    // map the incoming data stream into sliding windows. The default window duration values
-    // work well if you're running the accompanying Pub/Sub generator script with the
-    // --replay flag, which simulates pauses in the sensor data publication. You may want to
-    // adjust them otherwise.
-    input.apply(Window.<KV<String, LaneInfo>>into(SlidingWindows.of(
+    input
+        // row... => <station route, station speed> ...
+        .apply(ParDo.of(new ExtractFlowInfoFn()))
+        // map the incoming data stream into sliding windows. The default window duration values
+        // work well if you're running the accompanying Pub/Sub generator script with the
+        // --replay flag, which simulates pauses in the sensor data publication. You may want to
+        // adjust them otherwise.
+        .apply(Window.<KV<String, LaneInfo>>into(SlidingWindows.of(
             Duration.standardMinutes(options.getWindowDuration())).
             every(Duration.standardMinutes(options.getWindowSlideEvery()))))
         .apply(new MaxLaneFlow())
         .apply(BigQueryIO.Write.to(tableRef)
             .withSchema(FormatMaxesFn.getSchema()));
 
-    PipelineResult result = pipeline.run();
-    if (options.isStreaming() && !options.getInputFile().isEmpty()) {
-      // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
-      dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
+    // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
+    if (!Strings.isNullOrEmpty(options.getInputFile())
+        && !Strings.isNullOrEmpty(options.getPubsubTopic())) {
+      dataflowUtils.runInjectorPipeline(
+          new ReadFileAndExtractTimestamps(options.getInputFile()),
+          options.getPubsubTopic(),
+          PUBSUB_TIMESTAMP_LABEL_KEY);
     }
 
+    // Run the pipeline.
+    PipelineResult result = pipeline.run();
+
     // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
     dataflowUtils.waitToFinish(result);
   }
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
index cd0ba7602278c..e3e88c22da579 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
@@ -23,7 +23,7 @@
 import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
 import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
 import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
-import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
+import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicAndSubscriptionOptions;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
@@ -34,7 +34,6 @@
 import com.google.cloud.dataflow.sdk.options.Default;
 import com.google.cloud.dataflow.sdk.options.Description;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -42,7 +41,9 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.Strings;
 import com.google.common.collect.Lists;
 
 import org.apache.avro.reflect.Nullable;
@@ -94,6 +95,9 @@
 
 public class TrafficRoutes {
 
+  private static final String PUBSUB_TIMESTAMP_LABEL_KEY = "timestamp_ms";
+  private static final Integer VALID_INPUTS = 4999;
+
   // Instantiate some small predefined San Diego routes to analyze
   static Map<String, String> sdStations = buildStationInfo();
   static final int WINDOW_DURATION = 3;  // Default sliding window duration in minutes
@@ -159,19 +163,31 @@ public Boolean getSlowdownEvent() {
   }
 
   /**
-   * Filter out readings for the stations along predefined 'routes', and output
-   * (station, speed info) keyed on route.
+   * Extract the timestamp field from the input string, and use it as the element timestamp.
    */
-  static class ExtractStationSpeedFn extends DoFn<String, KV<String, StationSpeed>> {
+  static class ExtractTimestamps extends DoFn<String, String> {
     private static final DateTimeFormatter dateTimeFormat =
         DateTimeFormat.forPattern("MM/dd/yyyy HH:mm:ss");
 
-    private final boolean outputTimestamp;
-
-    public ExtractStationSpeedFn(boolean outputTimestamp) {
-      this.outputTimestamp = outputTimestamp;
+    @Override
+    public void processElement(DoFn<String, String>.ProcessContext c) throws Exception {
+      String[] items = c.element().split(",");
+      String timestamp = tryParseTimestamp(items);
+      if (timestamp != null) {
+        try {
+          c.outputWithTimestamp(c.element(), new Instant(dateTimeFormat.parseMillis(timestamp)));
+        } catch (IllegalArgumentException e) {
+          // Skip the invalid input.
+        }
+      }
     }
+  }
 
+  /**
+   * Filter out readings for the stations along predefined 'routes', and output
+   * (station, speed info) keyed on route.
+   */
+  static class ExtractStationSpeedFn extends DoFn<String, KV<String, StationSpeed>> {
 
     @Override
     public void processElement(ProcessContext c) {
@@ -183,20 +199,11 @@ public void processElement(ProcessContext c) {
         String stationId = tryParseStationId(items);
         // For this simple example, filter out everything but some hardwired routes.
         if (avgSpeed != null && stationId != null && sdStations.containsKey(stationId)) {
-          Instant timestamp;
-          if (outputTimestamp) {
-            timestamp = new Instant(dateTimeFormat.parseMillis(tryParseTimestamp(items)));
-          } else {
-            timestamp = c.timestamp();
-          }
-          StationSpeed stationSpeed = new StationSpeed(stationId, avgSpeed, timestamp.getMillis());
+          StationSpeed stationSpeed =
+              new StationSpeed(stationId, avgSpeed, c.timestamp().getMillis());
           // The tuple key is the 'route' name stored in the 'sdStations' hash.
           KV<String, StationSpeed> outputValue = KV.of(sdStations.get(stationId), stationSpeed);
-          if (outputTimestamp) {
-            c.outputWithTimestamp(outputValue, timestamp);
-          } else {
-            c.output(outputValue);
-          }
+          c.output(outputValue);
         }
       }
     }
@@ -303,14 +310,28 @@ public PCollection<TableRow> apply(PCollection<KV<String, StationSpeed>> station
     }
   }
 
+  static class ReadFileAndExtractTimestamps extends PTransform<PBegin, PCollection<String>> {
+    private final String inputFile;
+
+    public ReadFileAndExtractTimestamps(String inputFile) {
+      this.inputFile = inputFile;
+    }
+
+    @Override
+    public PCollection<String> apply(PBegin begin) {
+      return begin
+          .apply(TextIO.Read.from(inputFile))
+          .apply(ParDo.of(new ExtractTimestamps()));
+    }
+  }
 
   /**
   * Options supported by {@link TrafficRoutes}.
   *
   * <p>Inherits standard configuration options.
   */
-  private interface TrafficRoutesOptions
-      extends DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
+  private interface TrafficRoutesOptions extends DataflowExampleOptions,
+      ExamplePubsubTopicAndSubscriptionOptions, ExampleBigQueryTableOptions {
     @Description("Input file to inject to Pub/Sub topic")
     @Default.String("gs://dataflow-samples/traffic_sensor/"
         + "Freeways-5Minaa2010-01-01_to_2010-02-15_test2.csv")
@@ -326,6 +347,11 @@ private interface TrafficRoutesOptions
     @Default.Integer(WINDOW_SLIDE_EVERY)
     Integer getWindowSlideEvery();
     void setWindowSlideEvery(Integer value);
+
+    @Description("Whether to run the pipeline with unbounded input")
+    @Default.Boolean(false)
+    boolean isUnbounded();
+    void setUnbounded(boolean value);
   }
 
   /**
@@ -338,15 +364,9 @@ public static void main(String[] args) throws IOException {
         .withValidation()
         .as(TrafficRoutesOptions.class);
 
-    if (options.isStreaming()) {
-      // In order to cancel the pipelines automatically,
-      // {@literal DataflowPipelineRunner} is forced to be used.
-      options.setRunner(DataflowPipelineRunner.class);
-    }
     options.setBigQuerySchema(FormatStatsFn.getSchema());
     // Using DataflowExampleUtils to set up required resources.
-    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
-    dataflowUtils.setup();
+    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options, options.isUnbounded());
 
     Pipeline pipeline = Pipeline.create(options);
     TableReference tableRef = new TableReference();
@@ -354,35 +374,48 @@ public static void main(String[] args) throws IOException {
     tableRef.setDatasetId(options.getBigQueryDataset());
     tableRef.setTableId(options.getBigQueryTable());
 
-    PCollection<KV<String, StationSpeed>> input;
-    if (options.isStreaming()) {
-      input = pipeline
-          .apply(PubsubIO.Read.topic(options.getPubsubTopic()))
-          // row... => <station route, station speed> ...
-          .apply(ParDo.of(new ExtractStationSpeedFn(false /* outputTimestamp */)));
+    PCollection<String> input;
+    if (options.isUnbounded()) {
+      // Read unbounded PubSubIO.
+      input = pipeline.apply(PubsubIO.Read
+          .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
+          .subscription(options.getPubsubSubscription()));
     } else {
-      input = pipeline
-          .apply(TextIO.Read.from(options.getInputFile()))
-          .apply(ParDo.of(new ExtractStationSpeedFn(true /* outputTimestamp */)));
+      // Read bounded PubSubIO.
+      input = pipeline.apply(PubsubIO.Read
+          .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
+          .subscription(options.getPubsubSubscription()).maxNumRecords(VALID_INPUTS));
+
+      // To read bounded TextIO files, use:
+      // input = pipeline.apply(TextIO.Read.from(options.getInputFile()))
+      //    .apply(ParDo.of(new ExtractTimestamps()));
     }
-
-    // map the incoming data stream into sliding windows.
-    // The default window duration values work well if you're running the accompanying Pub/Sub
-    // generator script without the --replay flag, so that there are no simulated pauses in
-    // the sensor data publication. You may want to adjust the values otherwise.
-    input.apply(Window.<KV<String, StationSpeed>>into(SlidingWindows.of(
+    input
+        // row... => <station route, station speed> ...
+        .apply(ParDo.of(new ExtractStationSpeedFn()))
+        // map the incoming data stream into sliding windows.
+        // The default window duration values work well if you're running the accompanying Pub/Sub
+        // generator script without the --replay flag, so that there are no simulated pauses in
+        // the sensor data publication. You may want to adjust the values otherwise.
+        .apply(Window.<KV<String, StationSpeed>>into(SlidingWindows.of(
             Duration.standardMinutes(options.getWindowDuration())).
             every(Duration.standardMinutes(options.getWindowSlideEvery()))))
         .apply(new TrackSpeed())
         .apply(BigQueryIO.Write.to(tableRef)
             .withSchema(FormatStatsFn.getSchema()));
 
-    PipelineResult result = pipeline.run();
-    if (options.isStreaming() && !options.getInputFile().isEmpty()) {
-      // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
-      dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
+    // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
+    if (!Strings.isNullOrEmpty(options.getInputFile())
+        && !Strings.isNullOrEmpty(options.getPubsubTopic())) {
+      dataflowUtils.runInjectorPipeline(
+          new ReadFileAndExtractTimestamps(options.getInputFile()),
+          options.getPubsubTopic(),
+          PUBSUB_TIMESTAMP_LABEL_KEY);
     }
 
+    // Run the pipeline.
+    PipelineResult result = pipeline.run();
+
     // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
     dataflowUtils.waitToFinish(result);
   }
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
index b68f048ca84fc..ce5e08e7d2fa8 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
@@ -20,6 +20,7 @@
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
 import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
 import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
 import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
@@ -431,8 +432,8 @@ public void processElement(ProcessContext c) throws Exception {
   /**
    * Inherits standard configuration options.
    */
-  public interface TrafficFlowOptions extends
-  ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
+  public interface TrafficFlowOptions
+      extends ExamplePubsubTopicOptions, ExampleBigQueryTableOptions, DataflowExampleOptions {
 
     @Description("Input file to inject to Pub/Sub topic")
     @Default.String("gs://dataflow-samples/traffic_sensor/"
@@ -489,8 +490,7 @@ public static void main(String[] args) throws Exception {
   private static Pipeline runInjector(TrafficFlowOptions options){
     DataflowPipelineOptions copiedOptions = options.cloneAs(DataflowPipelineOptions.class);
     copiedOptions.setStreaming(false);
-    copiedOptions.setNumWorkers(
-        options.as(ExamplePubsubTopicOptions.class).getInjectorNumWorkers());
+    copiedOptions.setNumWorkers(options.as(DataflowExampleOptions.class).getInjectorNumWorkers());
     copiedOptions.setJobName(options.getJobName() + "-injector");
     Pipeline injectorPipeline = Pipeline.create(copiedOptions);
     injectorPipeline

From 34dee75deb68fc46cd725311564212671b7599ae Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Wed, 6 Jan 2016 12:46:46 -0800
Subject: [PATCH 1272/1541] Rollback state caching.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111535618
---
 .../worker/CachingSideInputReader.java        |  33 +-
 .../worker/DataflowSideInputReader.java       |  14 +-
 .../sdk/runners/worker/DataflowWorker.java    |  10 +-
 .../{Weighers.java => SizedWeigher.java}      |  34 +-
 .../worker/StreamingDataflowWorker.java       |  14 +-
 .../worker/StreamingModeExecutionContext.java |  27 +-
 .../runners/worker/WindmillStateCache.java    | 292 --------
 .../worker/WindmillStateInternals.java        | 630 +++++++-----------
 .../runners/worker/WindmillStateReader.java   |  42 +-
 .../sdk/transforms/ApproximateQuantiles.java  |  28 +-
 .../util/{WeightedValue.java => Sized.java}   |  19 +-
 ...tReader.java => SizedSideInputReader.java} |  18 +-
 .../cloud/dataflow/sdk/util/Weighted.java     |  27 -
 .../util/state/InMemoryStateInternals.java    |   2 +-
 .../sdk/util/state/MergingStateInternals.java |   2 +-
 .../sdk/util/state/StateNamespace.java        |   9 -
 .../sdk/util/state/StateNamespaceForTest.java |   5 -
 .../sdk/util/state/StateNamespaces.java       |  21 -
 .../dataflow/sdk/util/state/StateTag.java     |   4 +-
 .../dataflow/sdk/util/state/StateTags.java    |   9 +-
 .../cloud/dataflow/sdk/DataflowMatchers.java  |  65 --
 .../runners/dataflow/CustomSourcesTest.java   |   5 +-
 .../worker/CachingSideInputReaderTest.java    |  33 +-
 .../worker/DataflowSideInputReaderTest.java   |  16 +-
 ...r.java => SizedDirectSideInputReader.java} |  33 +-
 .../StreamingModeExecutionContextTest.java    |  13 +-
 .../worker/WindmillStateCacheTest.java        | 210 ------
 .../worker/WindmillStateInternalsTest.java    | 265 +++-----
 .../dataflow/sdk/util/ApiSurfaceTest.java     |   2 +-
 29 files changed, 506 insertions(+), 1376 deletions(-)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{Weighers.java => SizedWeigher.java} (61%)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/util/{WeightedValue.java => Sized.java} (67%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/util/{WeightedSideInputReader.java => SizedSideInputReader.java} (73%)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Weighted.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/DataflowMatchers.java
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/{WeightedDirectSideInputReader.java => SizedDirectSideInputReader.java} (60%)
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCacheTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java
index afd77f0666cee..b1dd17efbdd33 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java
@@ -19,8 +19,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.PCollectionViewWindow;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.WeightedSideInputReader;
-import com.google.cloud.dataflow.sdk.util.WeightedValue;
+import com.google.cloud.dataflow.sdk.util.Sized;
+import com.google.cloud.dataflow.sdk.util.SizedSideInputReader;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.common.cache.Cache;
 
@@ -37,19 +37,19 @@
  * with a {@code Cache} created by anything other than the SDK.
  */
 final class CachingSideInputReader
-    extends WeightedSideInputReader.Defaults
-    implements WeightedSideInputReader {
-  private final WeightedSideInputReader subReader;
-  private final Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache;
+    extends SizedSideInputReader.Defaults
+    implements SizedSideInputReader {
+  private final SizedSideInputReader subReader;
+  private final Cache<PCollectionViewWindow<?>, Sized<Object>> cache;
 
-  private CachingSideInputReader(WeightedSideInputReader subReader,
-      Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache) {
+  private CachingSideInputReader(
+      SizedSideInputReader subReader, Cache<PCollectionViewWindow<?>, Sized<Object>> cache) {
     this.subReader = subReader;
     this.cache = cache;
   }
 
-  public static CachingSideInputReader of(WeightedSideInputReader subReader,
-      Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache) {
+  public static CachingSideInputReader of(
+      SizedSideInputReader subReader, Cache<PCollectionViewWindow<?>, Sized<Object>> cache) {
     return new CachingSideInputReader(subReader, cache);
   }
 
@@ -64,25 +64,24 @@ public boolean isEmpty() {
   }
 
   @Override
-  public <T> WeightedValue<T> getWeighted(
+  public <T> Sized<T> getSized(
       final PCollectionView<T> view, final BoundedWindow window) {
     PCollectionViewWindow<T> cacheKey = PCollectionViewWindow.of(view, window);
 
       try {
         @SuppressWarnings("unchecked") // safely uncasting the thing from the callback
-        WeightedValue<T> sideInputContents = (WeightedValue<T>) cache.get(cacheKey,
-            new Callable<WeightedValue<Object>>() {
+        Sized<T> sideInputContents = (Sized<T>) cache.get(cacheKey,
+            new Callable<Sized<Object>>() {
               @Override
-              public WeightedValue<Object> call() {
+              public Sized<Object> call() {
                 @SuppressWarnings("unchecked") // safe covariant cast
-                WeightedValue<Object> value =
-                    (WeightedValue<Object>) subReader.getWeighted(view, window);
+                Sized<Object> value = (Sized<Object>) subReader.getSized(view, window);
                 return value;
               }
             });
         return sideInputContents;
       } catch (ExecutionException checkedException) {
-        // The call to subReader.getWeighted() is not permitted to throw any checked exceptions,
+        // The call to subReader.getSized() is not permitted to throw any checked exceptions,
         // so the Callable created above should not throw any either.
         throw new RuntimeException("Unexpected checked exception.", checkedException.getCause());
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
index ed1d702791ab1..545f32bacad43 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
@@ -23,8 +23,8 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.WeightedSideInputReader;
-import com.google.cloud.dataflow.sdk.util.WeightedValue;
+import com.google.cloud.dataflow.sdk.util.Sized;
+import com.google.cloud.dataflow.sdk.util.SizedSideInputReader;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -45,8 +45,8 @@
  * amount of data for each access.
  */
 public class DataflowSideInputReader
-    extends WeightedSideInputReader.Defaults
-    implements WeightedSideInputReader {
+    extends SizedSideInputReader.Defaults
+    implements SizedSideInputReader {
 
   /** An observer for each side input to count its size as it is being read. */
   private final Map<TupleTag<Object>, ByteSizeObserver> observers;
@@ -110,7 +110,7 @@ public boolean isEmpty() {
    * the value for the appropriate window.
    */
   @Override
-  public <T> WeightedValue<T> getWeighted(PCollectionView<T> view, final BoundedWindow window) {
+  public <T> Sized<T> getSized(PCollectionView<T> view, final BoundedWindow window) {
     final TupleTag<Iterable<WindowedValue<?>>> tag = view.getTagInternal();
     if (!sideInputValues.has(tag)) {
       throw new IllegalArgumentException("calling getSideInput() with unknown view");
@@ -125,7 +125,7 @@ public <T> WeightedValue<T> getWeighted(PCollectionView<T> view, final BoundedWi
       value = view.fromIterableInternal(sideInputValues.get(tag));
       long bytesRead = observer.getBytes();
       observer.reset();
-      return WeightedValue.of(value, overhead + bytesRead);
+      return Sized.of(value, overhead + bytesRead);
     } else {
       final long[] sum = new long[]{ 0L };
       value = view.fromIterableInternal(
@@ -142,7 +142,7 @@ public boolean apply(WindowedValue<?> element) {
                     return containsWindow;
                   }
                 }));
-      return WeightedValue.of(value, overhead + sum[0]);
+      return Sized.of(value, overhead + sum[0]);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 5ade506440ab9..65c3f41728f33 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -39,8 +39,8 @@
 import com.google.cloud.dataflow.sdk.util.CloudMetricUtils;
 import com.google.cloud.dataflow.sdk.util.PCollectionViewWindow;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
+import com.google.cloud.dataflow.sdk.util.Sized;
 import com.google.cloud.dataflow.sdk.util.UserCodeException;
-import com.google.cloud.dataflow.sdk.util.WeightedValue;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.Metric;
@@ -97,7 +97,7 @@ public class DataflowWorker {
   /**
    * A side input cache shared between all execution contexts.
    */
-  private final Cache<PCollectionViewWindow<?>, WeightedValue<Object>> sideInputCache;
+  private final Cache<PCollectionViewWindow<?>, Sized<Object>> sideInputCache;
 
   /**
    * Status server returning health of worker.
@@ -124,7 +124,7 @@ public DataflowWorker(WorkUnitClient workUnitClient, DataflowWorkerHarnessOption
     this.options = options;
     this.sideInputCache = CacheBuilder.newBuilder()
         .maximumWeight(options.getWorkerCacheMb() * MEGABYTES) // weights are in bytes
-        .weigher(Weighers.fixedWeightKeys(OVERHEAD_WEIGHT))
+        .weigher(SizedWeigher.<PCollectionViewWindow<?>, Object>withBaseWeight(OVERHEAD_WEIGHT))
         .softValues()
         .build();
   }
@@ -421,11 +421,11 @@ public abstract WorkItemServiceState reportWorkItemStatus(WorkItemStatus workIte
    */
   private static class DataflowWorkerExecutionContext extends BatchModeExecutionContext {
 
-    private final Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache;
+    private final Cache<PCollectionViewWindow<?>, Sized<Object>> cache;
     private final PipelineOptions options;
 
     public DataflowWorkerExecutionContext(
-        Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache, PipelineOptions options) {
+        Cache<PCollectionViewWindow<?>, Sized<Object>> cache, PipelineOptions options) {
       super(options);
       this.cache = cache;
       this.options = options;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/Weighers.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SizedWeigher.java
similarity index 61%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/Weighers.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SizedWeigher.java
index 5bafca766e2c8..9b4b246beac22 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/Weighers.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SizedWeigher.java
@@ -16,7 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import com.google.cloud.dataflow.sdk.util.Weighted;
+import com.google.cloud.dataflow.sdk.util.Sized;
+import com.google.common.base.Preconditions;
 import com.google.common.cache.Weigher;
 
 /**
@@ -27,22 +28,23 @@
  * <p>Package-private here so that the dependency on Guava does not leak into the public API
  * surface.
  */
-class Weighers {
-  public static Weigher<Object, Weighted> fixedWeightKeys(final int keyWeight) {
-    return new Weigher<Object, Weighted>() {
-      @Override
-      public int weigh(Object key, Weighted value) {
-        return (int) Math.min(keyWeight + value.getWeight(), Integer.MAX_VALUE);
-      }
-    };
+class SizedWeigher<K, V> implements Weigher<K, Sized<V>>{
+
+  public static <K, V> SizedWeigher<K, V> withBaseWeight(int baseWeight) {
+    return new SizedWeigher<>(baseWeight);
+  }
+
+  private final int baseWeight;
+
+  private SizedWeigher(int baseWeight) {
+    Preconditions.checkArgument(
+        baseWeight > 0,
+        "base weight for SizedWeigher must be positive");
+    this.baseWeight = baseWeight;
   }
 
-  public static Weigher<Weighted, Weighted> weightedKeysAndValues() {
-    return new Weigher<Weighted, Weighted>() {
-      @Override
-      public int weigh(Weighted key, Weighted value) {
-        return (int) Math.min(key.getWeight() + value.getWeight(), Integer.MAX_VALUE);
-      }
-    };
+  @Override
+  public int weigh(K key, Sized<V> value) {
+    return baseWeight + (int) Math.min(value.getSize(), Integer.MAX_VALUE);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 121cf16de9f00..81bab4e2402c9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -221,8 +221,6 @@ public ReaderCacheEntry(UnboundedSource.UnboundedReader<?> reader, long token) {
   private ConcurrentMap<String, String> stateNameMap;
   private ConcurrentMap<String, String> systemNameToComputationIdMap;
 
-  private WindmillStateCache stateCache = new WindmillStateCache();
-
   private ThreadFactory threadFactory;
   private BoundedQueueExecutor workUnitExecutor;
   private ExecutorService commitExecutor;
@@ -481,8 +479,8 @@ private void process(final String computation, final MapTask mapTask,
       WorkerAndContext workerAndContext = mapTaskExecutors.get(computation).poll();
       if (workerAndContext == null) {
         CounterSet counters = new CounterSet();
-        context = new StreamingModeExecutionContext(mapTask.getSystemName(),
-            readerCache.get(computation), stateNameMap, stateCache.forComputation(computation));
+        context = new StreamingModeExecutionContext(
+            mapTask.getSystemName(), readerCache.get(computation), stateNameMap);
         StateSampler sampler =
             new StateSampler(mapTask.getStageName() + "-", counters.getAddCounterMutator());
         // In streaming mode, state samplers are long lived. So here a unique id is generated as
@@ -945,8 +943,6 @@ public void handle(
         printThreads(responseWriter);
       } else if (target.equals("/heapz")) {
         dumpHeap(responseWriter);
-      } else if (target.equals("/cachez")) {
-        stateCache.printDetailedHtml(responseWriter);
       } else {
         printHeader(responseWriter);
         printResources(responseWriter);
@@ -982,11 +978,6 @@ private void printMetrics(PrintWriter response) {
       response.println("</li>");
     }
     response.println("</ul>");
-
-    stateCache.printSummaryHtml(response);
-
-    metricTrackingWindmillServer.printHtml(response);
-
     response.println("Active Keys: <ul>");
     for (Map.Entry<String, ActiveWorkForComputation> computationEntry
              : activeWorkMap.entrySet()) {
@@ -997,6 +988,7 @@ private void printMetrics(PrintWriter response) {
       response.println("</li>");
     }
     response.println("</ul>");
+    metricTrackingWindmillServer.printHtml(response);
   }
 
   private void printResources(PrintWriter response) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
index 38e005e0f24bd..116d3c6a7dc5d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
@@ -65,7 +65,6 @@ public class StreamingModeExecutionContext
   // Per-key cache of active Reader objects in use by this process.
   private final ConcurrentMap<ByteString, ReaderCacheEntry> readerCache;
   private final ConcurrentMap<String, String> stateNameMap;
-  private final WindmillStateCache.ForComputation stateCache;
 
   private Windmill.WorkItem work;
   @Nullable private Instant inputDataWatermark;
@@ -75,14 +74,14 @@ public class StreamingModeExecutionContext
   private Windmill.WorkItemCommitRequest.Builder outputBuilder;
   private UnboundedSource.UnboundedReader<?> activeReader;
 
-  public StreamingModeExecutionContext(String stageName,
+  public StreamingModeExecutionContext(
+      String stageName,
       ConcurrentMap<ByteString, ReaderCacheEntry> readerCache,
-      ConcurrentMap<String, String> stateNameMap, WindmillStateCache.ForComputation stateCache) {
+      ConcurrentMap<String, String> stateNameMap) {
     this.stageName = stageName;
     this.sideInputCache = new HashMap<>();
     this.readerCache = readerCache;
     this.stateNameMap = stateNameMap;
-    this.stateCache = stateCache;
   }
 
   public void start(
@@ -381,10 +380,9 @@ private Windmill.Timer.Type timerType(TimeDomain domain) {
   }
 
   class StepContext extends BaseExecutionContext.StepContext {
-    private static final String DEFAULT_STATE_FAMILY = "";
-
     private WindmillStateInternals stateInternals;
     private WindmillTimerInternals timerInternals;
+    private final String prefix;
     private final String stateFamily;
     private final Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
 
@@ -392,9 +390,14 @@ public StepContext(
         final String stepName, String transformName, final StateSampler stateSampler) {
       super(StreamingModeExecutionContext.this, stepName, transformName);
 
-      String mappedName = stateNameMap.get(transformName);
-      this.stateFamily = mappedName == null ? DEFAULT_STATE_FAMILY : mappedName;
-
+      if (stateNameMap.isEmpty()) {
+        this.prefix = transformName;
+        this.stateFamily = "";
+      } else {
+        String mappedName = stateNameMap.get(transformName);
+        this.prefix = mappedName == null ? "" : mappedName;
+        this.stateFamily = prefix;
+      }
       this.scopedReadStateSupplier = new Supplier<StateSampler.ScopedState>() {
         private int readState = -1;  // Uninitialized value.
 
@@ -417,9 +420,9 @@ public StateSampler.ScopedState get() {
     public void start(
         WindmillStateReader stateReader, @Nullable Instant inputDataWatermark,
         @Nullable Instant outputDataWatermark) {
-      this.stateInternals = new WindmillStateInternals(stateFamily, stateReader,
-          stateCache.forKey(getSerializedKey(), stateFamily, getWork().getCacheToken()),
-          scopedReadStateSupplier);
+      boolean useStateFamilies = !stateNameMap.isEmpty();
+      this.stateInternals = new WindmillStateInternals(
+          prefix, useStateFamilies, stateReader, scopedReadStateSupplier);
       this.timerInternals =
           new WindmillTimerInternals(stateFamily, inputDataWatermark, outputDataWatermark);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java
deleted file mode 100644
index 5eaefa2445be5..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java
+++ /dev/null
@@ -1,292 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.util.Weighted;
-import com.google.cloud.dataflow.sdk.util.state.State;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.common.cache.Cache;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.cache.RemovalCause;
-import com.google.common.cache.RemovalListener;
-import com.google.common.cache.RemovalNotification;
-import com.google.common.cache.Weigher;
-import com.google.protobuf.ByteString;
-
-import java.io.PrintWriter;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Objects;
-
-/**
- * Process-wide cache of per-key state.
- */
-public class WindmillStateCache {
-
-  private Cache<StateId, StateCacheEntry> stateCache;
-  private int weight = 0;
-
-  private static class CacheWeigher implements Weigher<Weighted, Weighted> {
-    @Override
-    public int weigh(Weighted key, Weighted value) {
-      return (int) Math.max(key.getWeight() + value.getWeight(), Integer.MAX_VALUE);
-    }
-  }
-
-  public WindmillStateCache() {
-    final Weigher<Weighted, Weighted> weigher = Weighers.weightedKeysAndValues();
-
-    stateCache =
-        CacheBuilder.newBuilder()
-        .maximumWeight(100000000 /* 100 MB */)
-        .recordStats()
-        .weigher(weigher)
-        .removalListener(new RemovalListener<StateId, StateCacheEntry>() {
-              @Override
-              public void onRemoval(RemovalNotification<StateId, StateCacheEntry> removal) {
-                if (removal.getCause() != RemovalCause.REPLACED) {
-                  weight -= weigher.weigh(removal.getKey(), removal.getValue());
-                }
-              }
-            })
-        .build();
-  }
-
-  public long getWeight() {
-    return weight;
-  }
-
-  /**
-   * Per-computation view of the state cache.
-   */
-  public class ForComputation {
-    private final String computation;
-    private ForComputation(String computation) {
-      this.computation = computation;
-    }
-
-    /**
-     * Returns a per-computation, per-key view of the state cache.
-     */
-    public ForKey forKey(ByteString key, String stateFamily, long cacheToken) {
-      return new ForKey(computation, key, stateFamily, cacheToken);
-    }
-  }
-
-  /**
-   * Per-computation, per-key view of the state cache.
-   */
-  public class ForKey {
-    private final String computation;
-    private final ByteString key;
-    private final String stateFamily;
-    private final long cacheToken;
-
-    private ForKey(String computation, ByteString key, String stateFamily, long cacheToken) {
-      this.computation = computation;
-      this.key = key;
-      this.stateFamily = stateFamily;
-      this.cacheToken = cacheToken;
-    }
-
-    public <T extends State> T get(StateNamespace namespace, StateTag<T> address) {
-      return WindmillStateCache.this.get(
-          computation, key, stateFamily, cacheToken, namespace, address);
-    }
-
-    public <T extends State> void put(
-        StateNamespace namespace, StateTag<T> address, T value, long weight) {
-      WindmillStateCache.this.put(
-          computation, key, stateFamily, cacheToken, namespace, address, value, weight);
-    }
-  }
-
-  /**
-   * Returns a per-computation view of the state cache.
-   */
-  public ForComputation forComputation(String computation) {
-    return new ForComputation(computation);
-  }
-
-  private <T extends State> T get(String computation, ByteString processingKey, String stateFamily,
-      long token, StateNamespace namespace, StateTag<T> address) {
-    StateId id = new StateId(computation, processingKey, stateFamily, namespace);
-    StateCacheEntry entry = stateCache.getIfPresent(id);
-    if (entry == null) {
-      return null;
-    }
-    if (entry.getToken() != token) {
-      stateCache.invalidate(id);
-      return null;
-    }
-    return entry.get(namespace, address);
-  }
-
-  private <T extends State> void put(String computation, ByteString processingKey,
-      String stateFamily, long token, StateNamespace namespace, StateTag<T> address, T value,
-      long weight) {
-    StateId id = new StateId(computation, processingKey, stateFamily, namespace);
-    StateCacheEntry entry = stateCache.getIfPresent(id);
-    if (entry == null || entry.getToken() != token) {
-      entry = new StateCacheEntry(token);
-      this.weight += id.getWeight();
-    }
-    this.weight += entry.put(namespace, address, value, weight);
-    // Always add back to the cache to update the weight.
-    stateCache.put(id, entry);
-  }
-
-  /**
-   * Struct identifying a cache entry that contains all data for a key and namespace.
-   */
-  private static class StateId implements Weighted {
-    public final String computation;
-    public final ByteString processingKey;
-    public final String stateFamily;
-    public final Object namespaceKey;
-
-    public StateId(String computation, ByteString processingKey, String stateFamily,
-        StateNamespace namespace) {
-      this.computation = computation;
-      this.processingKey = processingKey;
-      this.stateFamily = stateFamily;
-      this.namespaceKey = namespace.getCacheKey();
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      if (other instanceof StateId) {
-        StateId otherId = (StateId) other;
-        return computation.equals(otherId.computation)
-            && processingKey.equals(otherId.processingKey)
-            && stateFamily.equals(otherId.stateFamily)
-            && namespaceKey.equals(otherId.namespaceKey);
-      }
-      return false;
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(computation, processingKey, namespaceKey);
-    }
-
-    @Override
-    public long getWeight() {
-      return processingKey.size();
-    }
-  }
-
-  /**
-   * Entry in the state cache that stores a map of values and a token representing the
-   * validity of the values.
-   */
-  private static class StateCacheEntry implements Weighted {
-    private final long token;
-    private final Map<NamespacedTag<?>, WeightedValue<?>> values;
-    private long weight;
-
-    public StateCacheEntry(long token) {
-      this.values = new HashMap<>();
-      this.token = token;
-      this.weight = 0;
-    }
-
-    @SuppressWarnings("unchecked")
-    public <T extends State> T get(StateNamespace namespace, StateTag<T> tag) {
-      WeightedValue<T> weightedValue =
-          (WeightedValue<T>) values.get(new NamespacedTag(namespace, tag));
-      return weightedValue == null ? null : weightedValue.value;
-    }
-
-    public <T extends State> long put(
-        StateNamespace namespace, StateTag<T> tag, T value, long weight) {
-      WeightedValue<T> weightedValue =
-          (WeightedValue<T>) values.get(new NamespacedTag(namespace, tag));
-      long weightDelta = 0;
-      if (weightedValue == null) {
-        weightedValue = new WeightedValue<T>();
-      } else {
-        weightDelta -= weightedValue.weight;
-      }
-      weightedValue.value = value;
-      weightedValue.weight = weight;
-      weightDelta += weight;
-      this.weight += weightDelta;
-      values.put(new NamespacedTag(namespace, tag), weightedValue);
-      return weightDelta;
-    }
-
-    @Override
-    public long getWeight() {
-      return weight;
-    }
-
-    public long getToken() {
-      return token;
-    }
-
-    private static class NamespacedTag<T extends State> {
-      private final StateNamespace namespace;
-      private final StateTag<T> tag;
-      NamespacedTag(StateNamespace namespace, StateTag<T> tag) {
-        this.namespace = namespace;
-        this.tag = tag;
-      }
-
-      @Override
-      public boolean equals(Object other) {
-        if (!(other instanceof NamespacedTag)) {
-          return false;
-        }
-        NamespacedTag<?> that = (NamespacedTag<?>) other;
-        return namespace.equals(that.namespace) && tag.equals(that.tag);
-      }
-
-      @Override
-      public int hashCode() {
-        return Objects.hash(namespace, tag);
-      }
-    }
-
-    private static class WeightedValue<T> {
-      public long weight = 0;
-      public T value = null;
-    }
-  }
-
-  /**
-   * Print summary statistics of the cache to the given {@link PrintWriter}.
-   */
-  public void printSummaryHtml(PrintWriter response) {
-    response.println("Cache Stats: <br><table border=0>");
-    response.println(
-        "<tr><th>Hit Ratio</th><th>Evictions</th><th>Size</th><th>Weight</th></tr><tr>");
-    response.println("<th>" + stateCache.stats().hitRate() + "</th>");
-    response.println("<th>" + stateCache.stats().evictionCount() + "</th>");
-    response.println("<th>" + stateCache.size() + "</th>");
-    response.println("<th>" + getWeight() + "</th>");
-    response.println("</tr></table><br>");
-  }
-
-  /**
-   * Print detailed information about the cache to the given {@link PrintWriter}.
-   */
-  public void printDetailedHtml(PrintWriter response) {
-    response.println("<h1>Cache Information</h1>");
-    printSummaryHtml(response);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
index e6a6cd6dadf35..91dc2236c5622 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
@@ -21,7 +21,6 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
-import com.google.cloud.dataflow.sdk.util.Weighted;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.state.BagState;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
@@ -33,118 +32,128 @@
 import com.google.cloud.dataflow.sdk.util.state.StateTable;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
-import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.cloud.dataflow.sdk.util.state.ValueState;
 import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Function;
-import com.google.common.base.Optional;
 import com.google.common.base.Supplier;
-import com.google.common.base.Throwables;
 import com.google.common.collect.Iterables;
-import com.google.common.collect.Iterators;
 import com.google.common.util.concurrent.Futures;
 import com.google.protobuf.ByteString;
 
 import org.joda.time.Instant;
 
 import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Random;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Future;
-import javax.annotation.concurrent.NotThreadSafe;
 
 /**
  * Implementation of {@link StateInternals} using Windmill to manage the underlying data.
  */
 class WindmillStateInternals extends MergingStateInternals {
-  private static class CachingStateTable extends StateTable {
-    private final String stateFamily;
-    private final WindmillStateReader reader;
-    private final WindmillStateCache.ForKey cache;
-    private final Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
-
-    public CachingStateTable(String stateFamily,
-        WindmillStateReader reader, WindmillStateCache.ForKey cache,
-        Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
-      this.stateFamily = stateFamily;
-      this.reader = reader;
-      this.cache = cache;
-      this.scopedReadStateSupplier = scopedReadStateSupplier;
-    }
 
-    @Override
-    protected StateBinder binderForNamespace(final StateNamespace namespace) {
-      // Look up state objects in the cache or create new ones if not found.  The state will
-      // be added to the cache in persist().
-      return new StateBinder() {
+  private final StateTable inMemoryState =
+      new StateTable() {
         @Override
-        public <T> BagState<T> bindBag(StateTag<BagState<T>> address, Coder<T> elemCoder) {
-          WindmillBag<T> result = (WindmillBag<T>) cache.get(namespace, address);
-          if (result == null) {
-            result = new WindmillBag<T>(namespace, address, stateFamily, elemCoder);
-          }
-          result.initializeForWorkItem(reader, scopedReadStateSupplier);
-          return result;
-        }
+        protected StateBinder binderForNamespace(final StateNamespace namespace) {
+          return new StateBinder() {
+            @Override
+            public <T> BagState<T> bindBag(StateTag<BagState<T>> address, Coder<T> elemCoder) {
+              return new WindmillBag<>(encodeKey(namespace, address), stateFamily, elemCoder,
+                  reader, scopedReadStateSupplier);
+            }
 
-        @Override
-        public <W extends BoundedWindow> WatermarkStateInternal bindWatermark(
-            StateTag<WatermarkStateInternal> address, OutputTimeFn<? super W> outputTimeFn) {
-          WindmillWatermarkState result = (WindmillWatermarkState) cache.get(namespace, address);
-          if (result == null) {
-            result = new WindmillWatermarkState(namespace, address, stateFamily, outputTimeFn);
-          }
-          result.initializeForWorkItem(reader, scopedReadStateSupplier);
-          return result;
-        }
+            @Override
+            public <T, W extends BoundedWindow> WatermarkStateInternal bindWatermark(
+                StateTag<WatermarkStateInternal> address,
+                OutputTimeFn<? super W> outputTimeFn) {
+              return new WindmillWatermarkState(
+                  encodeKey(namespace, address),
+                  stateFamily,
+                  reader,
+                  scopedReadStateSupplier,
+                  outputTimeFn);
+            }
 
-        @Override
-        public <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
-        bindCombiningValue(StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
-            Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
-          WindmillCombiningValue<InputT, AccumT, OutputT> result = new WindmillCombiningValue<>(
-              namespace, address, stateFamily, accumCoder, combineFn, cache);
-          result.initializeForWorkItem(reader, scopedReadStateSupplier);
-          return result;
+            @Override
+            public <InputT, AccumT, OutputT>
+                CombiningValueStateInternal<InputT, AccumT, OutputT> bindCombiningValue(
+                    StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
+                    Coder<AccumT> accumCoder,
+                    CombineFn<InputT, AccumT, OutputT> combineFn) {
+              return new WindmillCombiningValue<>(encodeKey(namespace, address), stateFamily,
+                  accumCoder, combineFn, reader, scopedReadStateSupplier);
+            }
+
+            @Override
+            public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> coder) {
+              return new WindmillValue<>(encodeKey(namespace, address), stateFamily, coder, reader,
+                  scopedReadStateSupplier);
+            }
+          };
         }
+      };
+
+  private final String prefix;
+  private final String stateFamily;
+  private final WindmillStateReader reader;
+  private final boolean useStateFamilies;
+  private final Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
 
+  @VisibleForTesting
+  static final ThreadLocal<Supplier<Boolean>> COMPACT_NOW =
+      new ThreadLocal<Supplier<Boolean>>() {
         @Override
-        public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> coder) {
-          WindmillValue<T> result = (WindmillValue<T>) cache.get(namespace, address);
-          if (result == null) {
-            result = new WindmillValue<T>(namespace, address, stateFamily, coder);
-          }
-          result.initializeForWorkItem(reader, scopedReadStateSupplier);
-          return result;
+        public Supplier<Boolean> initialValue() {
+          return new Supplier<Boolean>() {
+            /* The rate at which, on average, this will return true. */
+            static final double RATE = 0.002;
+            Random random = new Random();
+            long counter = nextSample();
+
+            private long nextSample() {
+              // Use geometric distribution to find next true value.
+              // This lets us avoid invoking random.nextDouble() on every call.
+              return (long) Math.floor(Math.log(random.nextDouble()) / Math.log(1 - RATE));
+            }
+
+            @Override
+            public Boolean get() {
+              counter--;
+              if (counter < 0) {
+                counter = nextSample();
+                return true;
+              } else {
+                return false;
+              }
+            }
+          };
         }
       };
-    }
-  };
 
-  private WindmillStateCache.ForKey cache;
-  Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
-  private StateTable workItemState;
-
-  public WindmillStateInternals(String stateFamily, WindmillStateReader reader,
-      WindmillStateCache.ForKey cache, Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
-    this.cache = cache;
+  public WindmillStateInternals(String prefix, boolean useStateFamilies,
+      WindmillStateReader reader, Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
+    this.prefix = prefix;
+    if (useStateFamilies) {
+      this.stateFamily = prefix;
+    } else {
+      this.stateFamily = "";
+    }
+    this.reader = reader;
+    this.useStateFamilies = useStateFamilies;
     this.scopedReadStateSupplier = scopedReadStateSupplier;
-    this.workItemState = new CachingStateTable(stateFamily, reader, cache, scopedReadStateSupplier);
   }
 
   public void persist(final Windmill.WorkItemCommitRequest.Builder commitBuilder) {
     List<Future<WorkItemCommitRequest>> commitsToMerge = new ArrayList<>();
 
     // Call persist on each first, which may schedule some futures for reading.
-    for (State location : workItemState.values()) {
+    for (State location : inMemoryState.values()) {
       if (!(location instanceof WindmillState)) {
         throw new IllegalStateException(String.format(
             "%s wasn't created by %s -- unable to persist it",
@@ -153,16 +162,20 @@ public void persist(final Windmill.WorkItemCommitRequest.Builder commitBuilder)
       }
 
       try {
-        commitsToMerge.add(((WindmillState) location).persist(cache));
+        commitsToMerge.add(((WindmillState) location).persist());
       } catch (IOException e) {
         throw new RuntimeException("Unable to persist state", e);
       }
     }
 
+    // Kick off the fetches that prevent blind-writes. We do this before returning
+    // to ensure that the reads have happened before the persist actually happens.
+    reader.startBatchAndBlock();
+
     // Clear out the map of already retrieved state instances.
-    workItemState.clear();
+    inMemoryState.clear();
 
-    try (StateSampler.ScopedState scope = scopedReadStateSupplier.get()) {
+    try {
       for (Future<WorkItemCommitRequest> commitFuture : commitsToMerge) {
         commitBuilder.mergeFrom(commitFuture.get());
       }
@@ -171,123 +184,103 @@ public void persist(final Windmill.WorkItemCommitRequest.Builder commitBuilder)
     }
   }
 
-  /**
-   * Encodes the given namespace and address as {@code &lt;namespace&gt;+&lt;address&gt;}.
-   */
-  @VisibleForTesting
-  static ByteString encodeKey(StateNamespace namespace, StateTag<?> address) {
+  @VisibleForTesting ByteString encodeKey(StateNamespace namespace, StateTag<?> address) {
     try {
-      // Use ByteString.Output rather than concatenation and String.format. We build these keys
+      // Use a StringBuilder rather than concatenation and String.format. We build these keys
       // a lot, and this leads to better performance results. See associated benchmarks.
-      ByteString.Output stream = ByteString.newOutput();
-      OutputStreamWriter writer = new OutputStreamWriter(stream, StandardCharsets.UTF_8);
+      StringBuilder output = new StringBuilder();
 
-      // stringKey starts and ends with a slash.  We separate it from the
+      // We only need the prefix if we aren't using state families
+      if (!useStateFamilies) {
+        output.append(prefix);
+      }
+
+      // stringKey starts and ends with a slash. We don't need to seperate it from prefix, because
+      // the prefix is guaranteed to be unique and non-overlapping. We separate it from the
       // StateTag ID by a '+' (which is guaranteed not to be in the stringKey) because the
       // ID comes from the user.
-      namespace.appendTo(writer);
-      writer.write('+');
-      address.appendTo(writer);
-      writer.flush();
-      return stream.toByteString();
+      namespace.appendTo(output);
+      output.append('+');
+      address.appendTo(output);
+      return ByteString.copyFromUtf8(output.toString());
     } catch (IOException e) {
-      throw Throwables.propagate(e);
+      throw new RuntimeException(
+          "Unable to encode state key for " + namespace + ", " + address, e);
     }
   }
 
   /**
-   * Abstract base class for all Windmill state.
-   *
-   * <p>Note that these are not thread safe; each state object is associated with a key
-   * and thus only accessed by a single thread at once.
+   * Anything that can provide a {@link WorkItemCommitRequest} to persist its state; it may need
+   * to read some state in order to build the commit request.
    */
-  @NotThreadSafe
-  private abstract static class WindmillState {
-    protected Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
-    protected WindmillStateReader reader;
-
+  private interface WindmillState {
     /**
      * Return an asynchronously computed {@link WorkItemCommitRequest}. The request should
      * be of a form that can be merged with others (only add to repeated fields).
      */
-    abstract Future<WorkItemCommitRequest> persist(WindmillStateCache.ForKey cache)
+    Future<WorkItemCommitRequest> persist()
         throws IOException;
-
-    void initializeForWorkItem(
-        WindmillStateReader reader, Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
-      this.reader = reader;
-      this.scopedReadStateSupplier = scopedReadStateSupplier;
-    }
-
-    StateSampler.ScopedState scopedReadState() {
-      return scopedReadStateSupplier.get();
-    }
   }
 
   /**
    * Base class for implementations of {@link WindmillState} where the {@link #persist} call does
    * not require any asynchronous reading.
    */
-  private abstract static class SimpleWindmillState extends WindmillState {
+  private abstract static class SimpleWindmillState implements WindmillState {
     @Override
-    public final Future<WorkItemCommitRequest> persist(WindmillStateCache.ForKey cache)
-        throws IOException {
-      return Futures.immediateFuture(persistDirectly(cache));
+    public final Future<WorkItemCommitRequest> persist() throws IOException{
+      return Futures.immediateFuture(persistDirectly());
     }
 
     /**
      * Returns a {@link WorkItemCommitRequest} that can be used to persist this state to
      * Windmill.
      */
-    protected abstract WorkItemCommitRequest persistDirectly(WindmillStateCache.ForKey cache)
-        throws IOException;
+    protected abstract WorkItemCommitRequest persistDirectly() throws IOException;
   }
 
   @Override
   public <T extends State> T state(StateNamespace namespace, StateTag<T> address) {
-    return workItemState.get(namespace, address);
+    return inMemoryState.get(namespace, address);
   }
 
-  private static class WindmillValue<T> extends SimpleWindmillState implements ValueState<T> {
-    private final StateNamespace namespace;
-    private final StateTag<ValueState<T>> address;
+  private static class WindmillValue<T> extends SimpleWindmillState
+      implements ValueState<T>, WindmillState {
+
     private final ByteString stateKey;
     private final String stateFamily;
     private final Coder<T> coder;
+    private final WindmillStateReader reader;
+    private final Supplier<StateSampler.ScopedState> readStateSupplier;
 
     /** Whether we've modified the value since creation of this state. */
     private boolean modified = false;
-    /** Whether the in memory value is the true value. */
-    private boolean valueIsKnown = false;
-    private T value;
-
-    private WindmillValue(StateNamespace namespace, StateTag<ValueState<T>> address,
-        String stateFamily, Coder<T> coder) {
-      this.namespace = namespace;
-      this.address = address;
-      this.stateKey = encodeKey(namespace, address);
+    private T modifiedValue;
+
+    private WindmillValue(ByteString stateKey, String stateFamily, Coder<T> coder,
+        WindmillStateReader reader, Supplier<StateSampler.ScopedState> readStateSupplier) {
+      this.stateKey = stateKey;
       this.stateFamily = stateFamily;
       this.coder = coder;
+      this.reader = reader;
+      this.readStateSupplier = readStateSupplier;
     }
 
     @Override
     public void clear() {
       modified = true;
-      valueIsKnown = true;
-      value = null;
+      modifiedValue = null;
     }
 
     @Override
     public StateContents<T> get() {
-      final Future<T> future = valueIsKnown ? Futures.immediateFuture(value)
-                                            : reader.valueFuture(stateKey, stateFamily, coder);
+      final Future<T> future = modified ? null : reader.valueFuture(stateKey, stateFamily, coder);
 
       return new StateContents<T>() {
         @Override
         public T read() {
-          try (StateSampler.ScopedState scope = scopedReadState()) {
-            valueIsKnown = true;
-            return future.get();
+          try (StateSampler.ScopedState scope = readStateSupplier.get()) {
+            return modified ? modifiedValue : future.get();
           } catch (InterruptedException | ExecutionException e) {
             throw new RuntimeException("Unable to read value from state", e);
           }
@@ -298,96 +291,63 @@ public T read() {
     @Override
     public void set(T value) {
       modified = true;
-      valueIsKnown = true;
-      this.value = value;
+      modifiedValue = value;
     }
 
     @Override
-    protected WorkItemCommitRequest persistDirectly(WindmillStateCache.ForKey cache)
-        throws IOException {
+    protected WorkItemCommitRequest persistDirectly() throws IOException {
       if (!modified) {
         // No in-memory changes.
         return WorkItemCommitRequest.newBuilder().buildPartial();
       }
 
+      // We can't write without doing a read, so we need to kick off a read if we get here.
+      // Call reader.valueFuture directly, since our read() method will avoid actually reading from
+      // Windmill since the value is already inMemory.
+      reader.valueFuture(stateKey, stateFamily, coder);
+
       ByteString.Output stream = ByteString.newOutput();
-      if (value != null) {
-        coder.encode(value, stream, Coder.Context.OUTER);
+      if (modifiedValue != null) {
+        coder.encode(modifiedValue, stream, Coder.Context.OUTER);
       }
-      ByteString encoded = stream.toByteString();
 
       WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
-      // Update the entry of the cache with the new value and change in encoded size.
-      cache.put(namespace, address, this, encoded.size());
-
-      modified = false;
-
       commitBuilder
           .addValueUpdatesBuilder()
           .setTag(stateKey)
           .setStateFamily(stateFamily)
           .getValueBuilder()
-          .setData(encoded)
+          .setData(stream.toByteString())
           .setTimestamp(Long.MAX_VALUE);
-
       return commitBuilder.buildPartial();
     }
   }
 
-  private static class WindmillBag<T> extends SimpleWindmillState implements BagState<T> {
+  private static class WindmillBag<T> extends SimpleWindmillState
+      implements BagState<T>, WindmillState {
 
-    private final StateNamespace namespace;
-    private final StateTag<BagState<T>> address;
     private final ByteString stateKey;
     private final String stateFamily;
     private final Coder<T> elemCoder;
+    private final WindmillStateReader reader;
+    private final Supplier<StateSampler.ScopedState> readStateSupplier;
+
+    private boolean cleared = false;
+    private final List<T> localAdditions = new ArrayList<>();
 
-    private boolean cleared;
-    // Cache of all values in this bag. Null if the persisted state is unknown.
-    private ConcatIterables<T> cachedValues = null;
-    private List<T> localAdditions = new ArrayList<>();
-    private long encodedSize = 0;
-
-    private WindmillBag(StateNamespace namespace, StateTag<BagState<T>> address, String stateFamily,
-        Coder<T> elemCoder) {
-      this.namespace = namespace;
-      this.address = address;
-      this.stateKey = encodeKey(namespace, address);
+    private WindmillBag(ByteString stateKey, String stateFamily, Coder<T> elemCoder,
+        WindmillStateReader reader, Supplier<StateSampler.ScopedState> readStateSupplier) {
+      this.stateKey = stateKey;
       this.stateFamily = stateFamily;
       this.elemCoder = elemCoder;
+      this.reader = reader;
+      this.readStateSupplier = readStateSupplier;
     }
 
     @Override
     public void clear() {
       cleared = true;
-      cachedValues = new ConcatIterables<T>();
       localAdditions.clear();
-      encodedSize = 0;
-    }
-
-    private Iterable<T> fetchData(Future<Iterable<T>> persistedData) {
-      try (StateSampler.ScopedState scope = scopedReadState()) {
-        if (cachedValues != null) {
-          return cachedValues;
-        }
-        Iterable<T> data = persistedData.get();
-        if (data instanceof Weighted) {
-          // We have a known bounded amount of data; cache it.
-          cachedValues = new ConcatIterables<T>();
-          cachedValues.extendWith(data);
-          encodedSize = ((Weighted) data).getWeight();
-          return cachedValues;
-        } else {
-          // This is an iterable that may not fit in memory at once; don't cache it.
-          return data;
-        }
-      } catch (InterruptedException | ExecutionException e) {
-        throw new RuntimeException("Unable to read state", e);
-      }
-    }
-
-    public boolean valuesAreCached() {
-      return cachedValues != null;
     }
 
     @Override
@@ -395,14 +355,21 @@ public StateContents<Iterable<T>> get() {
       // If we clear after calling get() but before calling read(), technically we didn't need the
       // underlying windmill read. But, we need to register the desire now if we aren't going to
       // clear (in order to get it added to the prefetch).
-      final Future<Iterable<T>> persistedData = (cachedValues != null)
-          ? null
+      final Future<Iterable<T>> persistedData = cleared
+          ? Futures.<Iterable<T>>immediateFuture(Collections.<T>emptyList())
           : reader.listFuture(stateKey, stateFamily, elemCoder);
 
       return new StateContents<Iterable<T>>() {
         @Override
         public Iterable<T> read() {
-          return Iterables.concat(fetchData(persistedData), localAdditions);
+          try (StateSampler.ScopedState scope = readStateSupplier.get()) {
+            // We need to check cleared again, because it may have become clear in between creating
+            // the future and calling read.
+            Iterable<T> input = cleared ? Collections.<T>emptyList() : persistedData.get();
+            return Iterables.concat(input, localAdditions);
+          } catch (InterruptedException | ExecutionException e) {
+            throw new RuntimeException("Unable to read state", e);
+          }
         }
       };
     }
@@ -412,14 +379,21 @@ public StateContents<Boolean> isEmpty() {
       // If we clear after calling isEmpty() but before calling read(), technically we didn't need
       // the underlying windmill read. But, we need to register the desire now if we aren't going to
       // clear (in order to get it added to the prefetch).
-      final Future<Iterable<T>> persistedData = (cachedValues != null)
-          ? null
+      final Future<Iterable<T>> persistedData = cleared
+          ? Futures.<Iterable<T>>immediateFuture(Collections.<T>emptyList())
           : reader.listFuture(stateKey, stateFamily, elemCoder);
 
       return new StateContents<Boolean>() {
         @Override
         public Boolean read() {
-          return Iterables.isEmpty(fetchData(persistedData)) && localAdditions.isEmpty();
+          try (StateSampler.ScopedState scope = readStateSupplier.get()) {
+            // We need to check cleared again, because it may have become clear in between creating
+            // the future and calling read.
+            Iterable<T> input = cleared ? Collections.<T>emptyList() : persistedData.get();
+            return Iterables.isEmpty(input) && Iterables.isEmpty(localAdditions);
+          } catch (InterruptedException | ExecutionException e) {
+            throw new RuntimeException("Unable to read state", e);
+          }
         }
       };
     }
@@ -430,11 +404,14 @@ public void add(T input) {
     }
 
     @Override
-    public WorkItemCommitRequest persistDirectly(WindmillStateCache.ForKey cache)
-        throws IOException {
+    public WorkItemCommitRequest persistDirectly() throws IOException {
       WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
 
       if (cleared) {
+        // If we do a delete, we need to have done a read to prevent Windmill complaining about
+        // blind deletes. We use the underlying reader, because we normally skip the actual read
+        // if we've already cleared the state.
+        reader.listFuture(stateKey, stateFamily, elemCoder);
         commitBuilder.addListUpdatesBuilder()
             .setTag(stateKey)
             .setStateFamily(stateFamily)
@@ -453,88 +430,43 @@ public WorkItemCommitRequest persistDirectly(WindmillStateCache.ForKey cache)
 
           // Encode the value
           elemCoder.encode(value, stream, Coder.Context.OUTER);
-          ByteString encoded = stream.toByteString();
-          if (cachedValues != null) {
-            encodedSize += encoded.size() - 1;
-          }
 
           listUpdatesBuilder.addValuesBuilder()
-              .setData(encoded)
+              .setData(stream.toByteString())
               .setTimestamp(Long.MAX_VALUE);
         }
       }
-
-      if (cachedValues != null) {
-        cachedValues.extendWith(localAdditions);
-        // Don't reuse the localAdditions object; we don't want future changes to it to modify the
-        // value of cachedValues.
-        localAdditions = new ArrayList<T>();
-        cache.put(namespace, address, this, encodedSize);
-      } else {
-        localAdditions.clear();
-      }
-      cleared = false;
-
       return commitBuilder.buildPartial();
     }
   }
 
-  private static class ConcatIterables<T> implements Iterable<T> {
-    List<Iterable<T>> iterables;
-
-    public ConcatIterables() {
-      this.iterables = new ArrayList<>();
-    }
-
-    public void extendWith(Iterable<T> iterable) {
-      iterables.add(iterable);
-    }
-
-    @Override
-    public Iterator<T> iterator() {
-      return Iterators.concat(
-          Iterables.transform(
-                  iterables,
-                  new Function<Iterable<T>, Iterator<T>>() {
-                    @Override
-                    public Iterator<T> apply(Iterable<T> iterable) {
-                      return iterable.iterator();
-                    }
-                  })
-              .iterator());
-    }
-  }
-
-  private static class WindmillWatermarkState
-      extends WindmillState implements WatermarkStateInternal {
-    // The encoded size of an Instant.
-    private static final int ENCODED_SIZE = 8;
+  private static class WindmillWatermarkState implements WatermarkStateInternal, WindmillState {
 
     private final OutputTimeFn<?> outputTimeFn;
-    private final StateNamespace namespace;
-    private final StateTag<WatermarkStateInternal> address;
     private final ByteString stateKey;
     private final String stateFamily;
+    private final WindmillStateReader reader;
+    private final Supplier<StateSampler.ScopedState> readStateSupplier;
 
     private boolean cleared = false;
-    // The hold value, Optional.absent() if no hold, or null if unknown.
-    private Optional<Instant> cachedValue = null;
     private Instant localAdditions = null;
 
-    private WindmillWatermarkState(StateNamespace namespace,
-        StateTag<WatermarkStateInternal> address, String stateFamily,
+    private WindmillWatermarkState(
+        ByteString stateKey,
+        String stateFamily,
+        WindmillStateReader reader,
+        Supplier<StateSampler.ScopedState> readStateSupplier,
         OutputTimeFn<?> outputTimeFn) {
-      this.namespace = namespace;
-      this.address = address;
-      this.stateKey = encodeKey(namespace, address);
+      this.stateKey = stateKey;
       this.stateFamily = stateFamily;
+      this.reader = reader;
+      this.readStateSupplier = readStateSupplier;
       this.outputTimeFn = outputTimeFn;
     }
 
     @Override
     public void clear() {
       cleared = true;
-      cachedValue = Optional.<Instant>absent();
       localAdditions = null;
     }
 
@@ -552,38 +484,46 @@ public StateContents<Instant> get() {
       // If we clear after calling get() but before calling read(), technically we didn't need the
       // underlying windmill read. But, we need to register the desire now if we aren't going to
       // clear (in order to get it added to the prefetch).
-      final Future<Instant> persistedData = (cachedValue != null)
-          ? Futures.immediateFuture(cachedValue.orNull())
+      final Future<Instant> persistedData = cleared
+          ? Futures.<Instant>immediateFuture(null)
           : reader.watermarkFuture(stateKey, stateFamily);
 
       return new StateContents<Instant>() {
         @Override
         public Instant read() {
-          try (StateSampler.ScopedState scope = scopedReadState()) {
-            Instant persistedHold = persistedData.get();
-            if (persistedHold == null || persistedHold.equals(BoundedWindow.TIMESTAMP_MAX_VALUE)) {
-              cachedValue = Optional.absent();
-            } else {
-              cachedValue = Optional.of(persistedHold);
+          Instant value = localAdditions;
+          if (!cleared) {
+            try (StateSampler.ScopedState scope = readStateSupplier.get()) {
+              Instant persisted = persistedData.get();
+              value = (value == null) ? persisted : outputTimeFn.combine(value, persisted);
+            } catch (InterruptedException | ExecutionException e) {
+              throw new RuntimeException("Unable to read state", e);
             }
-          } catch (InterruptedException | ExecutionException e) {
-            throw new RuntimeException("Unable to read state", e);
-          }
-
-          if (localAdditions == null) {
-            return cachedValue.orNull();
-          } else if (!cachedValue.isPresent()) {
-            return localAdditions;
-          } else {
-            return outputTimeFn.combine(localAdditions, cachedValue.get());
           }
+          return value;
         }
       };
     }
 
     @Override
     public StateContents<Boolean> isEmpty() {
-      throw new UnsupportedOperationException();
+      // If we clear after calling get() but before calling read(), technically we didn't need the
+      // underlying windmill read. But, we need to register the desire now if we aren't going to
+      // clear (in order to get it added to the prefetch).
+      final Future<Instant> persistedData = cleared
+          ? Futures.<Instant>immediateFuture(null)
+          : reader.watermarkFuture(stateKey, stateFamily);
+
+      return new StateContents<Boolean>() {
+        @Override
+        public Boolean read() {
+          try (StateSampler.ScopedState scope = readStateSupplier.get()) {
+            return localAdditions == null && (cleared || persistedData.get() == null);
+          } catch (InterruptedException | ExecutionException e) {
+            throw new RuntimeException("Unable to read state", e);
+          }
+        }
+      };
     }
 
     @Override
@@ -593,9 +533,7 @@ public void add(Instant outputTime) {
     }
 
     @Override
-    public Future<WorkItemCommitRequest> persist(final WindmillStateCache.ForKey cache) {
-      Future<WorkItemCommitRequest> result;
-
+    public Future<WorkItemCommitRequest> persist() {
       if (!cleared && localAdditions == null) {
         // Nothing to do
         return Futures.immediateFuture(WorkItemCommitRequest.newBuilder().buildPartial());
@@ -606,8 +544,7 @@ public Future<WorkItemCommitRequest> persist(final WindmillStateCache.ForKey cac
             .setTag(stateKey)
             .setStateFamily(stateFamily)
             .setReset(true);
-
-        result = Futures.immediateFuture(commitBuilder.buildPartial());
+        return Futures.immediateFuture(commitBuilder.buildPartial());
       } else if (cleared && localAdditions != null) {
         // Since we cleared before adding, we can do a blind overwrite of persisted state
         WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
@@ -616,30 +553,13 @@ public Future<WorkItemCommitRequest> persist(final WindmillStateCache.ForKey cac
             .setStateFamily(stateFamily)
             .setReset(true)
             .addTimestamps(WindmillTimeUtils.harnessToWindmillTimestamp(localAdditions));
-
-        cachedValue = Optional.of(localAdditions);
-
-        result = Futures.immediateFuture(commitBuilder.buildPartial());
-      } else if (!cleared && localAdditions != null) {
+        return Futures.immediateFuture(commitBuilder.buildPartial());
+      } else if (!cleared && localAdditions != null){
         // Otherwise, we need to combine the local additions with the already persisted data
-        result = combineWithPersisted();
+        return combineWithPersisted();
       } else {
         throw new IllegalStateException("Unreachable condition");
       }
-
-      return Futures.lazyTransform(
-          result, new Function<WorkItemCommitRequest, WorkItemCommitRequest>() {
-            @Override
-            public WorkItemCommitRequest apply(WorkItemCommitRequest result) {
-              cleared = false;
-              localAdditions = null;
-              if (cachedValue != null) {
-                cache.put(
-                    namespace, address, WindmillWatermarkState.this, ENCODED_SIZE);
-              }
-              return result;
-            }
-          });
     }
 
     /**
@@ -669,42 +589,35 @@ private Future<WorkItemCommitRequest> combineWithPersisted() {
             .setStateFamily(stateFamily)
             .addTimestamps(
                 WindmillTimeUtils.harnessToWindmillTimestamp(localAdditions));
-
-        if (cachedValue != null) {
-          cachedValue = Optional.of(cachedValue.isPresent()
-              ? outputTimeFn.combine(cachedValue.get(), localAdditions)
-              : localAdditions);
-        }
-
-         return Futures.immediateFuture(commitBuilder.buildPartial());
+        return Futures.immediateFuture(commitBuilder.buildPartial());
       } else {
         // The non-fast path does a read-modify-write
-        return Futures.lazyTransform((cachedValue != null)
-                ? Futures.immediateFuture(cachedValue.orNull())
-                : reader.watermarkFuture(stateKey, stateFamily),
+        return Futures.lazyTransform(reader.watermarkFuture(stateKey, stateFamily),
             new Function<Instant, WorkItemCommitRequest>() {
-              @Override
-              public WorkItemCommitRequest apply(Instant priorHold) {
-                cachedValue = Optional.of((priorHold != null)
-                        ? outputTimeFn.combine(priorHold, localAdditions)
-                        : localAdditions);
-
-                WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
-                commitBuilder.addWatermarkHoldsBuilder()
-                    .setTag(stateKey)
-                    .setStateFamily(stateFamily)
-                    .setReset(true)
-                    .addTimestamps(WindmillTimeUtils.harnessToWindmillTimestamp(cachedValue.get()));
-
-                return commitBuilder.buildPartial();
-              }
-            });
+
+          @Override
+          public WorkItemCommitRequest apply(Instant priorHold) {
+
+            Instant combinedHold = (priorHold == null) ? localAdditions
+                : outputTimeFn.combine(priorHold, localAdditions);
+
+            WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
+            commitBuilder.addWatermarkHoldsBuilder()
+                .setTag(stateKey)
+                .setStateFamily(stateFamily)
+                .setReset(true)
+                .addTimestamps(WindmillTimeUtils.harnessToWindmillTimestamp(combinedHold));
+
+            return commitBuilder.buildPartial();
+          }
+        });
       }
     }
   }
 
   private static class WindmillCombiningValue<InputT, AccumT, OutputT>
-      extends WindmillState implements CombiningValueStateInternal<InputT, AccumT, OutputT> {
+      implements CombiningValueStateInternal<InputT, AccumT, OutputT>, WindmillState {
+
     private final WindmillBag<AccumT> bag;
     private final CombineFn<InputT, AccumT, OutputT> combineFn;
 
@@ -715,28 +628,15 @@ private static class WindmillCombiningValue<InputT, AccumT, OutputT>
     private AccumT localAdditionsAccum;
     private boolean hasLocalAdditions = false;
 
-    private WindmillCombiningValue(StateNamespace namespace,
-        StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address, String stateFamily,
-        Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn,
-        WindmillStateCache.ForKey cache) {
-      StateTag<BagState<AccumT>> internalBagAddress = StateTags.bag(address.getId(), accumCoder);
-      WindmillBag<AccumT> cachedBag =
-          (WindmillBag<AccumT>) cache.get(namespace, internalBagAddress);
-      this.bag =
-          (cachedBag != null)
-              ? cachedBag
-              : new WindmillBag<>(namespace, internalBagAddress, stateFamily, accumCoder);
+    private WindmillCombiningValue(ByteString stateKey, String stateFamily,
+        Coder<AccumT> accumCoder,
+        CombineFn<InputT, AccumT, OutputT> combineFn,
+        WindmillStateReader reader, Supplier<StateSampler.ScopedState> readStateSupplier) {
+      this.bag = new WindmillBag<>(stateKey, stateFamily, accumCoder, reader, readStateSupplier);
       this.combineFn = combineFn;
       this.localAdditionsAccum = combineFn.createAccumulator();
     }
 
-    @Override
-    void initializeForWorkItem(
-        WindmillStateReader reader, Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
-      super.initializeForWorkItem(reader, scopedReadStateSupplier);
-      this.bag.initializeForWorkItem(reader, scopedReadStateSupplier);
-    }
-
     @Override
     public StateContents<OutputT> get() {
       final StateContents<AccumT> accum = getAccum();
@@ -762,10 +662,10 @@ public void clear() {
     }
 
     @Override
-    public Future<WorkItemCommitRequest> persist(WindmillStateCache.ForKey cache)
-        throws IOException {
+    public Future<WorkItemCommitRequest> persist() throws IOException {
       if (hasLocalAdditions) {
-        if (COMPACT_NOW.get().get() || bag.valuesAreCached()) {
+        // TODO: Take into account whether it's in the cache.
+        if (COMPACT_NOW.get().get()) {
           // Implicitly clears the bag and combines local and persisted accumulators.
           localAdditionsAccum = getAccum().read();
         }
@@ -773,8 +673,7 @@ public Future<WorkItemCommitRequest> persist(WindmillStateCache.ForKey cache)
         localAdditionsAccum = combineFn.createAccumulator();
         hasLocalAdditions = false;
       }
-
-      return bag.persist(cache);
+      return bag.persist();
     }
 
     @Override
@@ -816,33 +715,4 @@ public void addAccum(AccumT accum) {
       localAdditionsAccum = combineFn.mergeAccumulators(Arrays.asList(localAdditionsAccum, accum));
     }
   }
-
-  @VisibleForTesting
-  static final ThreadLocal<Supplier<Boolean>> COMPACT_NOW =
-      new ThreadLocal<Supplier<Boolean>>() {
-        public Supplier<Boolean> initialValue() {
-          return new Supplier<Boolean>() {
-            /* The rate at which, on average, this will return true. */
-            static final double RATE = 0.002;
-            Random random = new Random();
-            long counter = nextSample();
-
-            private long nextSample() {
-              // Use geometric distribution to find next true value.
-              // This lets us avoid invoking random.nextDouble() on every call.
-              return (long) Math.floor(Math.log(random.nextDouble()) / Math.log(1 - RATE));
-            }
-
-            public Boolean get() {
-              counter--;
-              if (counter < 0) {
-                counter = nextSample();
-                return true;
-              } else {
-                return false;
-              }
-            }
-          };
-        }
-      };
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
index af7a64e4784d9..fb0421a703757 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
@@ -19,13 +19,11 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagList;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagValue;
-import com.google.cloud.dataflow.sdk.util.Weighted;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Function;
 import com.google.common.base.Objects;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.AbstractIterator;
-import com.google.common.collect.ForwardingList;
 import com.google.common.util.concurrent.ForwardingFuture;
 import com.google.common.util.concurrent.Futures;
 import com.google.common.util.concurrent.SettableFuture;
@@ -476,60 +474,28 @@ private void consumeResponse(Windmill.GetDataRequest request,
     }
   }
 
-  @VisibleForTesting
-  static class WeightedList<T> extends ForwardingList<T> implements Weighted {
-    private List<T> delegate;
-    long weight;
-
-    WeightedList(List<T> delegate) {
-      this.delegate = delegate;
-      this.weight = 0;
-    }
-
-    @Override
-    protected List<T> delegate() {
-      return delegate;
-    }
-
-    @Override
-    public boolean add(T elem) {
-      throw new UnsupportedOperationException("Must use AddWeighted()");
-    }
-
-    @Override
-    public long getWeight() {
-      return weight;
-    }
-
-    public void addWeighted(T elem, long weight) {
-      delegate.add(elem);
-      this.weight += weight;
-    }
-  }
-
   /**
    * The deserialized values in {@code tagList} as a read-only array list.
    */
   private <T> List<T> tagListPageValues(TagList tagList, Coder<T> elemCoder) {
     if (tagList.getValuesCount() == 0) {
-      return new WeightedList<T>(Collections.<T>emptyList());
+      return Collections.<T>emptyList();
     }
 
-    WeightedList<T> valueList = new WeightedList<>(new ArrayList<T>(tagList.getValuesCount()));
+    List<T> valueList = new ArrayList<>(tagList.getValuesCount());
     for (Windmill.Value value : tagList.getValuesList()) {
       if (value.hasData() && !value.getData().isEmpty()) {
         // Drop the first byte of the data; it's the zero byte we prepended to avoid writing
         // empty data.
         InputStream inputStream = value.getData().substring(1).newInput();
         try {
-          valueList.addWeighted(
-              elemCoder.decode(inputStream, Coder.Context.OUTER),  value.getData().size() - 1);
+          valueList.add(elemCoder.decode(inputStream, Coder.Context.OUTER));
         } catch (IOException e) {
           throw new IllegalStateException("Unable to decode tag list using " + elemCoder, e);
         }
       }
     }
-    return valueList;
+    return Collections.unmodifiableList(valueList);
   }
 
   private <T> void consumeTagList(TagList tagList, StateTag stateTag) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
index 57dd51009b8f1..f1b607c182a47 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
@@ -24,7 +24,7 @@
 import com.google.cloud.dataflow.sdk.coders.ListCoder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn.Accumulator;
-import com.google.cloud.dataflow.sdk.util.WeightedValue;
+import com.google.cloud.dataflow.sdk.util.Sized;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -550,30 +550,30 @@ private long offset(long newWeight) {
      */
     private List<T> interpolate(Iterable<QuantileBuffer<T>> buffers,
                                 int count, double step, double offset) {
-      List<Iterator<WeightedValue<T>>> iterators = Lists.newArrayList();
+      List<Iterator<Sized<T>>> iterators = Lists.newArrayList();
       for (QuantileBuffer<T> buffer : buffers) {
         iterators.add(buffer.sizedIterator());
       }
       // Each of the buffers is already sorted by element.
-      Iterator<WeightedValue<T>> sorted = Iterators.mergeSorted(
+      Iterator<Sized<T>> sorted = Iterators.mergeSorted(
           iterators,
-          new Comparator<WeightedValue<T>>() {
+          new Comparator<Sized<T>>() {
             @Override
-            public int compare(WeightedValue<T> a, WeightedValue<T> b) {
+            public int compare(Sized<T> a, Sized<T> b) {
               return compareFn.compare(a.getValue(), b.getValue());
             }
           });
 
       List<T> newElements = Lists.newArrayListWithCapacity(count);
-      WeightedValue<T> weightedElement = sorted.next();
-      double current = weightedElement.getWeight();
+      Sized<T> sizedElement = sorted.next();
+      double current = sizedElement.getSize();
       for (int j = 0; j < count; j++) {
         double target = j * step + offset;
         while (current <= target && sorted.hasNext()) {
-          weightedElement = sorted.next();
-          current += weightedElement.getWeight();
+          sizedElement = sorted.next();
+          current += sizedElement.getSize();
         }
-        newElements.add(weightedElement.getValue());
+        newElements.add(sizedElement.getValue());
       }
       return newElements;
     }
@@ -638,15 +638,15 @@ public String toString() {
           + weight + ", elements=" + elements + "]";
     }
 
-    public Iterator<WeightedValue<T>> sizedIterator() {
-      return new UnmodifiableIterator<WeightedValue<T>>() {
+    public Iterator<Sized<T>> sizedIterator() {
+      return new UnmodifiableIterator<Sized<T>>() {
         Iterator<T> iter = elements.iterator();
         @Override
         public boolean hasNext() {
           return iter.hasNext();
         }
-        @Override public WeightedValue<T> next() {
-          return WeightedValue.of(iter.next(), weight);
+        @Override public Sized<T> next() {
+          return Sized.of(iter.next(), weight);
         }
       };
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Sized.java
similarity index 67%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedValue.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Sized.java
index 4a6e84079faa1..f019ecca7a5d6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Sized.java
@@ -17,29 +17,30 @@
 package com.google.cloud.dataflow.sdk.util;
 
 /**
- * A {@code T} with an accompanying weight. Units are unspecified.
+ * A {@code T} with an accompanying size estimate. Units are unspecified.
  *
  * @param <T> the underlying type of object
  */
-public final class WeightedValue<T> implements Weighted {
+public final class Sized<T> {
 
   private final T value;
-  private final long weight;
+  private final long size;
 
-  private WeightedValue(T value, long weight) {
+  private Sized(T value, long size) {
     this.value = value;
-    this.weight = weight;
+    this.size = size;
   }
 
-  public static <T> WeightedValue<T> of(T value, long weight) {
-    return new WeightedValue<>(value, weight);
+  public static <T> Sized<T> of(T value, long size) {
+    return new Sized<>(value, size);
   }
 
-  public long getWeight() {
-    return weight;
+  public long getSize() {
+    return size;
   }
 
   public T getValue() {
     return value;
   }
 }
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SizedSideInputReader.java
similarity index 73%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedSideInputReader.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SizedSideInputReader.java
index 0323f2cafdab9..9cc2140d6970b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedSideInputReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SizedSideInputReader.java
@@ -22,27 +22,27 @@
 /**
  * Extension to {@link SideInputReader} that can approximate the size of the side input.
  */
-public interface WeightedSideInputReader extends SideInputReader {
+public interface SizedSideInputReader extends SideInputReader {
 
   /**
    * Returns the value of the requested {@link PCollectionView} for the given {@link BoundedWindow}
    * along with a rough estimate of the number of bytes of memory it consumes.
    *
    * <p>It is valid for a side input value to be {@code null}. In this case, the return
-   * value of this method must still be non-{@code null}. It should be a {@link Weighted}
-   * object where {@link WeightedValue#getValue()} returns {@code null} and
-   * {@link WeightedValue#getWeight()} may still return any non-negative value.
+   * value of this method must still be non-{@code null}. It should be a {@link Sized}
+   * object where {@link Sized#getValue()} returns {@code null} and {@link Sized#getSize()} may
+   * still return any non-negative value.
    */
-  <T> WeightedValue<T> getWeighted(PCollectionView<T> view, BoundedWindow window);
+  <T> Sized<T> getSized(PCollectionView<T> view, BoundedWindow window);
 
   /**
-   * Abstract class providing default implementations for methods of
-   * {@link WeightedSideInputReader}.
+   * Abstract class providing default implementations for methods of {@link SizedSideInputReader}.
    */
-  abstract static class Defaults implements WeightedSideInputReader {
+  abstract static class Defaults implements SizedSideInputReader {
     @Override
     public <T> T get(PCollectionView<T> view, BoundedWindow window) {
-      return getWeighted(view, window).getValue();
+      return getSized(view, window).getValue();
     }
   }
 }
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Weighted.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Weighted.java
deleted file mode 100644
index c31ad7f861c45..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Weighted.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-/**
- * Interface representing an object that has a weight, in unspecified units.
- */
-public interface Weighted {
-  /**
-   * Returns the weight of the object.
-   */
-  long getWeight();
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
index 9a28d040c4d91..1af60ad209461 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
@@ -63,7 +63,7 @@ public <T> BagState<T> bindBag(final StateTag<BagState<T>> address, Coder<T> ele
         }
 
         @Override
-        public <W extends BoundedWindow> WatermarkStateInternal bindWatermark(
+        public <T, W extends BoundedWindow> WatermarkStateInternal bindWatermark(
             StateTag<WatermarkStateInternal> address,
             OutputTimeFn<? super W> outputTimeFn) {
           return new WatermarkStateInternalImplementation(outputTimeFn);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
index f47c9ddbcf5d2..ea5ebdde19d84 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
@@ -72,7 +72,7 @@ CombiningValueStateInternal<InputT, AccumT, OutputT> bindCombiningValue(
       }
 
       @Override
-      public <W extends BoundedWindow> WatermarkStateInternal bindWatermark(
+      public <T, W extends BoundedWindow> WatermarkStateInternal bindWatermark(
           StateTag<WatermarkStateInternal> address,
           OutputTimeFn<? super W> outputTimeFn) {
         List<WatermarkStateInternal> sources = new ArrayList<>();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
index f972e312f9eec..4a0364f1db88e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
@@ -42,13 +42,4 @@ public interface StateNamespace {
    * Append the string representation of this key to the {@link Appendable}.
    */
   void appendTo(Appendable sb) throws IOException;
-
-  /**
-   * Return an {@code Object} to use as a key in a cache.
-   *
-   * <p>Different namespaces may use the same key in order to be treated as a unit in the cache.
-   * The {@code Object}'s {@code hashCode} and {@code equals} methods will be used to determine
-   * equality.
-   */
-  Object getCacheKey();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java
index 09b86d67e9bfd..c11668fd62aef 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java
@@ -33,11 +33,6 @@ public String stringKey() {
     return key;
   }
 
-  @Override
-  public Object getCacheKey() {
-    return key;
-  }
-
   @Override
   public boolean equals(Object obj) {
     if (this == obj) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
index 8fee9959b944e..22115847a3f65 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
@@ -63,11 +63,6 @@ public String stringKey() {
       return GLOBAL_STRING;
     }
 
-    @Override
-    public Object getCacheKey() {
-      return GLOBAL_STRING;
-    }
-
     @Override
     public boolean equals(Object obj) {
       return obj == this || obj instanceof GlobalNamespace;
@@ -122,14 +117,6 @@ public void appendTo(Appendable sb) throws IOException {
       sb.append('/').append(CoderUtils.encodeToBase64(windowCoder, window)).append('/');
     }
 
-    /**
-     * State in the same window will all be evicted together.
-     */
-    @Override
-    public Object getCacheKey() {
-      return window;
-    }
-
     @Override
     public boolean equals(Object obj) {
       if (obj == this) {
@@ -202,14 +189,6 @@ public void appendTo(Appendable sb) throws IOException {
       sb.append('/');
     }
 
-    /**
-     * State in the same window will all be evicted together.
-     */
-    @Override
-    public Object getCacheKey() {
-      return window;
-    }
-
     @Override
     public boolean equals(Object obj) {
       if (obj == this) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
index be114f893fc0d..cba405d81e641 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
@@ -59,7 +59,7 @@ public interface StateBinder {
      * <p>This accepts the {@link OutputTimeFn} that dictates how watermark hold timestamps
      * added to the returned {@link WatermarkStateInternal} are to be combined.
      */
-    <W extends BoundedWindow> WatermarkStateInternal bindWatermark(
+    <T, W extends BoundedWindow> WatermarkStateInternal bindWatermark(
         StateTag<WatermarkStateInternal> address,
         OutputTimeFn<? super W> outputTimeFn);
   }
@@ -68,7 +68,7 @@ <W extends BoundedWindow> WatermarkStateInternal bindWatermark(
   void appendTo(Appendable sb) throws IOException;
 
   /**
-   * Returns the user-provided name of this state cell.
+   * Returns the identifier for this state cell.
    */
   String getId();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
index 7347efcf658f0..f6f0c84e7dc41 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
@@ -158,10 +158,6 @@ public void appendTo(Appendable sb) throws IOException {
       sb.append(kind.prefix).append(rawId);
     }
 
-    public String getRawId() {
-      return rawId;
-    }
-
     @Override
     public String toString() {
       return MoreObjects.toStringHelper(getClass())
@@ -199,9 +195,12 @@ protected StateTagBase(StructuredId id) {
       this.id = id;
     }
 
+    /**
+     * Returns the identifier for this state cell.
+     */
     @Override
     public String getId() {
-      return id.getRawId();
+      return id.getIdString();
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/DataflowMatchers.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/DataflowMatchers.java
deleted file mode 100644
index ad21072dc4c90..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/DataflowMatchers.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk;
-
-import com.google.protobuf.ByteString;
-
-import org.hamcrest.Description;
-import org.hamcrest.TypeSafeMatcher;
-
-import java.io.Serializable;
-
-/**
- * Matchers that are useful when writing Dataflow tests.
- */
-public class DataflowMatchers {
-  /**
-   * Matcher for {@link ByteString} that prints the strings in UTF8.
-   */
-  public static class ByteStringMatcher extends TypeSafeMatcher<ByteString>
-      implements Serializable {
-    private ByteString expected;
-    private ByteStringMatcher(ByteString expected) {
-      this.expected = expected;
-    }
-
-    public static ByteStringMatcher byteStringEq(ByteString expected) {
-      return new ByteStringMatcher(expected);
-    }
-
-    @Override
-    public void describeTo(Description description) {
-      description
-          .appendText("ByteString(")
-          .appendText(expected.toStringUtf8())
-          .appendText(")");
-    }
-
-    @Override
-    public void describeMismatchSafely(ByteString actual, Description description) {
-      description
-          .appendText("was ByteString(")
-          .appendText(actual.toStringUtf8())
-          .appendText(")");
-    }
-
-    @Override
-    protected boolean matchesSafely(ByteString actual) {
-      return actual.equals(expected);
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
index 37c11c284ce0f..7b3ddde901333 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
@@ -619,9 +619,8 @@ public void testUnboundedSplits() throws Exception {
 
   @Test
   public void testReadUnboundedReader() throws Exception {
-    StreamingModeExecutionContext context = new StreamingModeExecutionContext("stageName",
-        new ConcurrentHashMap<ByteString, ReaderCacheEntry>(), /*stateNameMap=*/null,
-        /*stateCache=*/null);
+    StreamingModeExecutionContext context = new StreamingModeExecutionContext(
+        "stageName", new ConcurrentHashMap<ByteString, ReaderCacheEntry>(), null);
 
     DataflowPipelineOptions options =
         PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java
index 791d762dc6ef3..37e9511cf4d34 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java
@@ -25,8 +25,8 @@
 import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.PCollectionViewWindow;
-import com.google.cloud.dataflow.sdk.util.WeightedSideInputReader;
-import com.google.cloud.dataflow.sdk.util.WeightedValue;
+import com.google.cloud.dataflow.sdk.util.Sized;
+import com.google.cloud.dataflow.sdk.util.SizedSideInputReader;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -46,7 +46,7 @@
 public class CachingSideInputReaderTest {
 
   private static boolean isCached(
-      Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache,
+      Cache<PCollectionViewWindow<?>, Sized<Object>> cache,
       PCollectionView<?> view, BoundedWindow window) {
     return null != cache.getIfPresent(PCollectionViewWindow.of(view, window));
   }
@@ -69,13 +69,13 @@ private static boolean isCached(
   private static final int MAXIMUM_CACHE_SIZE = 1000;
 
   /** A {@link Cache} that is set up before each test. */
-  private Cache<PCollectionViewWindow<?>, WeightedValue<Object>> defaultCache;
+  private Cache<PCollectionViewWindow<?>, Sized<Object>> defaultCache;
 
   @Before
   public void setupCache() {
     defaultCache = CacheBuilder.newBuilder()
         .maximumWeight(MAXIMUM_CACHE_SIZE)
-        .weigher(Weighers.fixedWeightKeys(1))
+        .weigher(SizedWeigher.withBaseWeight(1))
         .build();
   }
 
@@ -83,10 +83,10 @@ public void setupCache() {
   public void testCachingSideInputReaderAgreesWithUnderlyingReaderForSmallItem() throws Exception {
     // A SideInputReader that vends fixed contents for LENGTH_VIEW_FOR_DEFAULT_TAG
     // with a chosen size that fits in the maximum size of the cache.
-    WeightedSideInputReader reader = WeightedDirectSideInputReader.withContents(
+    SizedSideInputReader reader = SizedDirectSideInputReader.withContents(
         ImmutableMap.of(
             UNTYPED_ITERABLE_TAG,
-            WeightedValue.<Object>of(
+            Sized.<Object>of(
                 PCollectionViewTesting.contentsInDefaultWindow("some", "small", "collection"),
                 MAXIMUM_CACHE_SIZE - 100)));
 
@@ -108,10 +108,10 @@ public void testCachingSideInputReaderAgreesWithUnderlyingReaderForSmallItem() t
   public void testCachingSideInputReaderAgreesWithUnderlyingReaderForLargeItem() throws Exception {
     // A SideInputReader that vends fixed contents for LENGTH_VIEW_FOR_DEFAULT_TAG
     // with a chosen size that exceeds the maximum size of the cache.
-    WeightedSideInputReader reader = WeightedDirectSideInputReader.withContents(
+    SizedSideInputReader reader = SizedDirectSideInputReader.withContents(
         ImmutableMap.of(
             UNTYPED_ITERABLE_TAG,
-            WeightedValue.<Object>of(
+            Sized.<Object>of(
                 PCollectionViewTesting.contentsInDefaultWindow("some", "large", "collection"),
                 MAXIMUM_CACHE_SIZE + 100)));
 
@@ -134,10 +134,10 @@ public void testCachingSideInputReaderAgreesWithUnderlyingReaderForLargeItem() t
   public void testCachingSideInputReaderCachesSmallItem() throws Exception {
     // A SideInputReader that vends fixed contents for LENGTH_VIEW_FOR_DEFAULT_TAG
     // with a chosen size that fits in the maximum size of the cache.
-    WeightedSideInputReader reader = WeightedDirectSideInputReader.withContents(
+    SizedSideInputReader reader = SizedDirectSideInputReader.withContents(
         ImmutableMap.of(
             UNTYPED_ITERABLE_TAG,
-            WeightedValue.<Object>of(
+            Sized.<Object>of(
                 PCollectionViewTesting.contentsInDefaultWindow("hello", "goodbye"),
                 MAXIMUM_CACHE_SIZE - 1000)));
 
@@ -160,10 +160,10 @@ public void testCachingSideInputReaderCachesSmallItem() throws Exception {
   public void testCachingSideInputReaderDoesNotCacheLargeItem() throws Exception {
     // A SideInputReader that vends fixed contents for LENGTH_VIEW_FOR_DEFAULT_TAG
     // with a chosen size that exceeds in the maximum size of the cache.
-    WeightedSideInputReader reader = WeightedDirectSideInputReader.withContents(
+    SizedSideInputReader reader = SizedDirectSideInputReader.withContents(
         ImmutableMap.of(
             UNTYPED_ITERABLE_TAG,
-            WeightedValue.<Object>of(
+            Sized.<Object>of(
                 PCollectionViewTesting.contentsInDefaultWindow("hello", "goodbye"),
                 MAXIMUM_CACHE_SIZE + 100)));
 
@@ -188,10 +188,9 @@ public void testCachingSideInputReaderEmpty() throws Exception {
     PCollectionView<Long> view = PCollectionViewTesting.testingView(
         tag, new PCollectionViewTesting.LengthViewFn<String>(), StringUtf8Coder.of());
 
-    CachingSideInputReader sideInputReader =
-        CachingSideInputReader.of(WeightedDirectSideInputReader.withContents(
-                                      ImmutableMap.<TupleTag<Object>, WeightedValue<Object>>of()),
-            defaultCache);
+    CachingSideInputReader sideInputReader = CachingSideInputReader.of(
+        SizedDirectSideInputReader.withContents(ImmutableMap.<TupleTag<Object>, Sized<Object>>of()),
+        defaultCache);
 
     assertFalse(sideInputReader.contains(view));
     assertTrue(sideInputReader.isEmpty());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
index 90c55273a528b..c3ad2bbd23e6c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
@@ -34,7 +34,7 @@
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.WeightedValue;
+import com.google.cloud.dataflow.sdk.util.Sized;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -161,10 +161,10 @@ public void testDataflowSideInputReaderNotEmpty() throws Exception {
   @Test
   public void testDataflowSideInputReaderFilteredRead() throws Exception {
     assertTrue(defaultSideInputReader.contains(DEFAULT_LENGTH_VIEW));
-    WeightedValue<Long> sizedValue = defaultSideInputReader.getWeighted(
+    Sized<Long> sizedValue = defaultSideInputReader.getSized(
         DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
     assertThat(sizedValue.getValue(), equalTo(DEFAULT_SOURCE_LENGTH));
-    assertThat(sizedValue.getWeight(), equalTo(DEFAULT_SOURCE_LENGTH * windowedLongBytes()));
+    assertThat(sizedValue.getSize(), equalTo(DEFAULT_SOURCE_LENGTH * windowedLongBytes()));
   }
 
   /**
@@ -176,15 +176,15 @@ public void testDataflowSideInputReaderRepeatedRead() throws Exception {
     DataflowSideInputReader sideInputReader = DataflowSideInputReader.of(
         Collections.singletonList(defaultSideInputInfo), options, executionContext);
 
-    WeightedValue<Long> firstRead = sideInputReader.getWeighted(
+    Sized<Long> firstRead = sideInputReader.getSized(
         DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
 
     // A repeated read should yield the same size.
-    WeightedValue<Long> repeatedRead = sideInputReader.getWeighted(
+    Sized<Long> repeatedRead = sideInputReader.getSized(
         DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
 
     assertThat(repeatedRead.getValue(), equalTo(firstRead.getValue()));
-    assertThat(repeatedRead.getWeight(), equalTo(firstRead.getWeight()));
+    assertThat(repeatedRead.getSize(), equalTo(firstRead.getSize()));
 
   }
 
@@ -194,10 +194,10 @@ public void testDataflowSideInputReaderMiss() throws Exception {
         Collections.singletonList(defaultSideInputInfo), options, executionContext);
 
     // Reading an empty window yields the size of 0 elements.
-    WeightedValue<Long> emptyWindowValue = sideInputReader.getWeighted(
+    Sized<Long> emptyWindowValue = sideInputReader.getSized(
         DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_EMPTY_WINDOW);
     assertThat(emptyWindowValue.getValue(), equalTo(0L));
-    assertThat(emptyWindowValue.getWeight(), equalTo(0L));
+    assertThat(emptyWindowValue.getSize(), equalTo(0L));
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WeightedDirectSideInputReader.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SizedDirectSideInputReader.java
similarity index 60%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WeightedDirectSideInputReader.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SizedDirectSideInputReader.java
index 9667df2eddf78..daccfd071cd21 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WeightedDirectSideInputReader.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SizedDirectSideInputReader.java
@@ -20,8 +20,8 @@
 import com.google.cloud.dataflow.sdk.util.DirectSideInputReader;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.WeightedSideInputReader;
-import com.google.cloud.dataflow.sdk.util.WeightedValue;
+import com.google.cloud.dataflow.sdk.util.Sized;
+import com.google.cloud.dataflow.sdk.util.SizedSideInputReader;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
@@ -29,30 +29,29 @@
 import java.util.Map;
 
 /**
- * A {@link WeightedSideInputReader} with explicitly provided sizes for all values.
+ * A {@link SizedSideInputReader} with explicitly provided sizes for all values.
  */
-class WeightedDirectSideInputReader extends WeightedSideInputReader.Defaults {
+class SizedDirectSideInputReader extends SizedSideInputReader.Defaults {
 
   private final SideInputReader subReader;
-  private final Map<TupleTag<?>, Long> weights;
+  private final Map<TupleTag<?>, Long> sizes;
 
   /**
-   * Returns a {@link WeightedDirectSideInputReader} containing the contents in the provided
+   * Returns a {@link SizedDirectSideInputReader} containing the contents in the provided
    * {@code Map}. A {@link DirectSideInputReader} will be used for the actual retrieval logic; this
    * class merely does the size bookkeeping.
    */
-  public static WeightedDirectSideInputReader withContents(
-      Map<TupleTag<Object>, WeightedValue<Object>> sizedContents) {
-    return new WeightedDirectSideInputReader(sizedContents);
+  public static SizedDirectSideInputReader withContents(
+      Map<TupleTag<Object>, Sized<Object>> sizedContents) {
+    return new SizedDirectSideInputReader(sizedContents);
   }
 
-  private WeightedDirectSideInputReader(
-      Map<TupleTag<Object>, WeightedValue<Object>> sizedContents) {
-    weights = new HashMap<>();
+  private SizedDirectSideInputReader(Map<TupleTag<Object>, Sized<Object>> sizedContents) {
+    sizes = new HashMap<>();
     PTuple values = PTuple.empty();
-    for (Map.Entry<TupleTag<Object>, WeightedValue<Object>> entry : sizedContents.entrySet()) {
+    for (Map.Entry<TupleTag<Object>, Sized<Object>> entry : sizedContents.entrySet()) {
       values = values.and(entry.getKey(), entry.getValue().getValue());
-      weights.put(entry.getKey(), entry.getValue().getWeight());
+      sizes.put(entry.getKey(), entry.getValue().getSize());
     }
     subReader = DirectSideInputReader.of(values);
   }
@@ -68,9 +67,9 @@ public <T> boolean contains(PCollectionView<T> view) {
   }
 
   @Override
-  public <T> WeightedValue<T> getWeighted(PCollectionView<T> view, BoundedWindow window) {
-    return WeightedValue.of(
+  public <T> Sized<T> getSized(PCollectionView<T> view, BoundedWindow window) {
+    return Sized.of(
         subReader.get(view, window),
-        weights.get(view.getTagInternal()));
+        sizes.get(view.getTagInternal()));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
index 9c4f272e17e74..c0062ba9284bd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
@@ -76,14 +76,12 @@ private static TupleTag<Iterable<WindowedValue<String>>> newStringTag() {
 
   @Test
   public void testTimerInternalsSetTimer() {
-    StreamingModeExecutionContext executionContext = new StreamingModeExecutionContext("stageName",
-        null, new ConcurrentHashMap<String, String>(),
-        new WindmillStateCache().forComputation("comp"));
+    StreamingModeExecutionContext executionContext = new StreamingModeExecutionContext(
+        "stageName", null, new ConcurrentHashMap<String, String>());
 
     Windmill.WorkItemCommitRequest.Builder outputBuilder =
         Windmill.WorkItemCommitRequest.newBuilder();
-    executionContext.start(
-        Windmill.WorkItem.newBuilder().setKey(ByteString.EMPTY).setWorkToken(17L).build(),
+    executionContext.start(null,
         new Instant(1000), // input watermark
         null, // output watermark
         stateReader, stateFetcher, outputBuilder);
@@ -110,7 +108,7 @@ public void testTimerInternalsSetTimer() {
   @Test
   public void testSideInputReaderReconstituted() {
     StreamingModeExecutionContext executionContext =
-        new StreamingModeExecutionContext("stageName", null, null, null);
+        new StreamingModeExecutionContext("stageName", null, null);
 
     PCollectionView<String> preview1 = PCollectionViewTesting.<String, String>testingView(
         newStringTag(), new ConstantViewFn<String, String>("view1"), StringUtf8Coder.of());
@@ -162,8 +160,7 @@ public void testReaderCache() throws Exception {
     ConcurrentHashMap<ByteString, ReaderCacheEntry> readerCache =
         new ConcurrentHashMap<ByteString, ReaderCacheEntry>();
     StreamingModeExecutionContext context =
-        new StreamingModeExecutionContext("stageName", readerCache, /*stateNameMap=*/null,
-            /*stateCache=*/null);
+        new StreamingModeExecutionContext("stageName", readerCache, null);
 
     UnboundedSource.UnboundedReader<?> reader1 =
         new CountingSource(Integer.MAX_VALUE).createReader(options, null);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCacheTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCacheTest.java
deleted file mode 100644
index c7ad7b3eac486..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCacheTest.java
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.util.state.State;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.protobuf.ByteString;
-
-import org.joda.time.Instant;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.IOException;
-import java.util.Objects;
-
-/**
- * Tests for {@link WindmillStateCache}.
- */
-@RunWith(JUnit4.class)
-public class WindmillStateCacheTest {
-  private static final String COMPUTATION = "computation";
-  private static final ByteString KEY = ByteString.copyFromUtf8("key");
-  private static final String STATE_FAMILY = "family";
-
-  private static class TestStateTag implements StateTag<TestState> {
-    final String id;
-
-    TestStateTag(String id) {
-      this.id = id;
-    }
-
-    @Override
-    public void appendTo(Appendable appendable) throws IOException {
-      appendable.append(id);
-    }
-
-    @Override
-    public String getId() {
-      return id;
-    }
-
-    @Override
-    public TestState bind(StateBinder binder) {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public String toString() {
-      return "Tag(" + id + ")";
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      return (other instanceof TestStateTag) && Objects.equals(((TestStateTag) other).id, id);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(id);
-    }
-  }
-
-  private static class TestState implements State {
-    String value = null;
-
-    TestState(String value) {
-      this.value = value;
-    }
-
-    public String getValue() {
-      return value;
-    }
-
-    @Override
-    public void clear() {
-      this.value = null;
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      return (other instanceof TestState) && Objects.equals(((TestState) other).value, value);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(value);
-    }
-
-    @Override
-    public String toString() {
-      return "State(" + value + ")";
-    }
-  }
-
-  private static StateNamespace windowNamespace(long start) {
-    return StateNamespaces.window(
-        IntervalWindow.getCoder(), new IntervalWindow(new Instant(start), new Instant(start + 1)));
-  }
-
-  private static StateNamespace triggerNamespace(long start, int triggerIdx) {
-    return StateNamespaces.windowAndTrigger(IntervalWindow.getCoder(),
-        new IntervalWindow(new Instant(start), new Instant(start + 1)), triggerIdx);
-  }
-
-  WindmillStateCache cache;
-  WindmillStateCache.ForKey keyCache;
-
-  @Before
-  public void setUp() {
-    cache = new WindmillStateCache();
-    keyCache = cache.forComputation(COMPUTATION).forKey(KEY, STATE_FAMILY, 0L);
-    assertEquals(0, cache.getWeight());
-  }
-
-  @Test
-  public void testBasic() throws Exception {
-    assertNull(keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
-    assertNull(keyCache.get(windowNamespace(0), new TestStateTag("tag2")));
-    assertNull(keyCache.get(triggerNamespace(0, 0), new TestStateTag("tag3")));
-    assertNull(keyCache.get(triggerNamespace(0, 0), new TestStateTag("tag2")));
-
-    keyCache.put(StateNamespaces.global(), new TestStateTag("tag1"), new TestState("g1"), 2);
-    assertEquals(5, cache.getWeight());
-    keyCache.put(windowNamespace(0), new TestStateTag("tag2"), new TestState("w2"), 2);
-    assertEquals(10, cache.getWeight());
-    keyCache.put(triggerNamespace(0, 0), new TestStateTag("tag3"), new TestState("t3"), 2);
-    assertEquals(12, cache.getWeight());
-    keyCache.put(triggerNamespace(0, 0), new TestStateTag("tag2"), new TestState("t2"), 2);
-    assertEquals(14, cache.getWeight());
-
-    assertEquals(
-        new TestState("g1"), keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
-    assertEquals(new TestState("w2"), keyCache.get(windowNamespace(0), new TestStateTag("tag2")));
-    assertEquals(
-        new TestState("t3"), keyCache.get(triggerNamespace(0, 0), new TestStateTag("tag3")));
-    assertEquals(
-        new TestState("t2"), keyCache.get(triggerNamespace(0, 0), new TestStateTag("tag2")));
-  }
-
-  /**
-   * Verifies that values are cached in the appropriate namespaces.
-   */
-  @Test
-  public void testInvalidation() throws Exception {
-    assertNull(keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
-    keyCache.put(StateNamespaces.global(), new TestStateTag("tag1"), new TestState("g1"), 2);
-    assertEquals(5, cache.getWeight());
-    assertEquals(
-        new TestState("g1"), keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
-
-    keyCache = cache.forComputation(COMPUTATION).forKey(KEY, STATE_FAMILY, 1L);
-    assertEquals(5, cache.getWeight());
-    assertNull(keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
-    assertEquals(0, cache.getWeight());
-  }
-
-  /**
-   * Verifies that the cache is invalidated when the cache token changes.
-   */
-  @Test
-  public void testEviction() throws Exception {
-    keyCache.put(windowNamespace(0), new TestStateTag("tag2"), new TestState("w2"), 2);
-    assertEquals(5, cache.getWeight());
-    keyCache.put(triggerNamespace(0, 0), new TestStateTag("tag3"), new TestState("t3"), 2000000000);
-    assertEquals(0, cache.getWeight());
-    // Eviction is atomic across the whole window.
-    assertNull(keyCache.get(windowNamespace(0), new TestStateTag("tag2")));
-    assertNull(keyCache.get(triggerNamespace(0, 0), new TestStateTag("tag3")));
-  }
-
-  /**
-   * Verifies that caches are kept indedently per-key.
-   */
-  @Test
-  public void testMultipleKeys() throws Exception {
-    WindmillStateCache.ForKey keyCache1 = cache.forComputation("comp1").forKey(
-        ByteString.copyFromUtf8("key1"), STATE_FAMILY, 0L);
-    WindmillStateCache.ForKey keyCache2 = cache.forComputation("comp1").forKey(
-        ByteString.copyFromUtf8("key2"), STATE_FAMILY, 0L);
-    WindmillStateCache.ForKey keyCache3 = cache.forComputation("comp2").forKey(
-        ByteString.copyFromUtf8("key1"), STATE_FAMILY, 0L);
-
-    keyCache1.put(StateNamespaces.global(), new TestStateTag("tag1"), new TestState("g1"), 2);
-    assertEquals(
-        new TestState("g1"), keyCache1.get(StateNamespaces.global(), new TestStateTag("tag1")));
-    assertNull(keyCache2.get(StateNamespaces.global(), new TestStateTag("tag1")));
-    assertNull(keyCache3.get(StateNamespaces.global(), new TestStateTag("tag1")));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
index 58afedb55419e..35407832a981f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
@@ -15,12 +15,10 @@
  */
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import static com.google.cloud.dataflow.sdk.DataflowMatchers.ByteStringMatcher.byteStringEq;
 import static com.google.cloud.dataflow.sdk.testing.SystemNanoTimeSleeper.sleepMillis;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
-import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.never;
 import static org.mockito.Mockito.when;
 
@@ -55,12 +53,10 @@
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
-import org.mockito.ArgumentCaptor;
 import org.mockito.Mock;
 import org.mockito.Mockito;
 import org.mockito.MockitoAnnotations;
 
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.concurrent.TimeUnit;
 
@@ -83,7 +79,6 @@ public class WindmillStateInternalsTest {
   private WindmillStateReader mockReader;
 
   private WindmillStateInternals underTest;
-  private WindmillStateCache cache;
 
   @Mock
   private Supplier<StateSampler.ScopedState> readStateSupplier;
@@ -99,10 +94,7 @@ private static ByteString key(String prefix, StateNamespace namespace, String ad
   @Before
   public void setUp() {
     MockitoAnnotations.initMocks(this);
-    cache = new WindmillStateCache();
-    underTest = new WindmillStateInternals(STATE_FAMILY, mockReader,
-        cache.forComputation("comp").forKey(ByteString.EMPTY, STATE_FAMILY, 17L),
-        readStateSupplier);
+    underTest = new WindmillStateInternals(STATE_FAMILY, true, mockReader, readStateSupplier);
   }
 
   private <T> void waitAndSet(final SettableFuture<T> future, final T value, final long millis) {
@@ -119,15 +111,6 @@ public void run() {
     }).run();
   }
 
-  private WindmillStateReader.WeightedList<String> weightedList(String... elems) {
-    WindmillStateReader.WeightedList<String> result =
-        new WindmillStateReader.WeightedList<String>(new ArrayList<String>(elems.length));
-    for (String elem : elems) {
-      result.addWeighted(elem, elem.length());
-    }
-    return result;
-  }
-
   @Test
   public void testBagAddBeforeRead() throws Exception {
     StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
@@ -225,6 +208,8 @@ public void testBagAddPersist() throws Exception {
     assertEquals(1, listUpdates.getValuesCount());
     assertEquals("hello", listUpdates.getValues(0).getData().substring(1).toStringUtf8());
 
+    // Blind adds should not need to read the future.
+    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -253,6 +238,10 @@ public void testBagClearPersist() throws Exception {
     assertEquals(1, listUpdates.getValuesCount());
     assertEquals("world", listUpdates.getValues(0).getData().substring(1).toStringUtf8());
 
+    // Clear should need to read the future.
+    Mockito.verify(mockReader)
+        .listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of());
+    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -271,6 +260,29 @@ public void testBagPersistEmpty() throws Exception {
     assertEquals(1, commitBuilder.getListUpdatesCount());
   }
 
+  @Test
+  public void testBagNoStateFamilies() throws Exception {
+    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, readStateSupplier);
+
+    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
+    BagState<String> bag = underTest.state(NAMESPACE, addr);
+
+    bag.add("hello");
+    bag.clear();
+    bag.add("world");
+
+    Windmill.WorkItemCommitRequest.Builder commitBuilder =
+        Windmill.WorkItemCommitRequest.newBuilder();
+    underTest.persist(commitBuilder);
+
+    // Clear should need to read the future.
+    Mockito.verify(mockReader)
+        .listFuture(key(STATE_FAMILY, NAMESPACE, "bag"), "", StringUtf8Coder.of());
+    Mockito.verify(mockReader).startBatchAndBlock();
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+
   @Test
   public void testCombiningAddBeforeRead() throws Exception {
     CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
@@ -319,9 +331,7 @@ public void testCombiningIsEmpty() throws Exception {
     when(mockReader.listFuture(COMBINING_KEY, STATE_FAMILY, accumCoder))
         .thenReturn(future);
     StateContents<Boolean> result = value.isEmpty();
-    ArgumentCaptor<ByteString> byteString = ArgumentCaptor.forClass(ByteString.class);
-    Mockito.verify(mockReader).listFuture(byteString.capture(), eq(STATE_FAMILY), eq(accumCoder));
-    assertThat(byteString.getValue(), byteStringEq(COMBINING_KEY));
+    Mockito.verify(mockReader).listFuture(COMBINING_KEY, STATE_FAMILY, accumCoder);
 
     waitAndSet(future, Arrays.asList(new int[] {29}), 200);
     assertThat(result.read(), Matchers.is(false));
@@ -364,6 +374,8 @@ public void testCombiningAddPersist() throws Exception {
         CoderUtils.decodeFromByteArray(
             accumCoder, listUpdates.getValues(0).getData().substring(1).toByteArray())[0]);
 
+    // Blind adds should not need to read the future.
+    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -431,6 +443,9 @@ public void testCombiningClearPersist() throws Exception {
         CoderUtils.decodeFromByteArray(
             accumCoder, listUpdates.getValues(0).getData().substring(1).toByteArray())[0]);
 
+    // Blind adds should not need to read the future.
+    Mockito.verify(mockReader).listFuture(COMBINING_KEY, STATE_FAMILY, accumCoder);
+    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -523,8 +538,6 @@ public void testWatermarkClearBeforeRead() throws Exception {
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
-
-  /*
   @Test
   public void testWatermarkIsEmptyWindmillHasData() throws Exception {
     StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
@@ -569,7 +582,6 @@ public void testWatermarkIsEmptyAfterClear() throws Exception {
     bag.add(new Instant(1000));
     assertThat(result.read(), Matchers.is(false));
   }
-  */
 
   @Test
   public void testWatermarkPersistEarliest() throws Exception {
@@ -590,6 +602,8 @@ public void testWatermarkPersistEarliest() throws Exception {
     assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
     assertEquals(TimeUnit.MILLISECONDS.toMicros(1000), watermarkHold.getTimestamps(0));
 
+    // Blind adds should not need to read the future.
+    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -615,7 +629,9 @@ public void testWatermarkPersistLatestEmpty() throws Exception {
     assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
     assertEquals(TimeUnit.MILLISECONDS.toMicros(2000), watermarkHold.getTimestamps(0));
 
+    // Blind adds should not need to read the future.
     Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
+    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -641,7 +657,9 @@ public void testWatermarkPersistLatestWindmillWins() throws Exception {
     assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
     assertEquals(TimeUnit.MILLISECONDS.toMicros(4000), watermarkHold.getTimestamps(0));
 
+    // Blind adds should not need to read the future.
     Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
+    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -667,7 +685,9 @@ public void testWatermarkPersistLatestLocalAdditionsWin() throws Exception {
     assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
     assertEquals(TimeUnit.MILLISECONDS.toMicros(2000), watermarkHold.getTimestamps(0));
 
+    // Blind adds should not need to read the future.
     Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
+    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -691,6 +711,7 @@ public void testWatermarkPersistEndOfWindow() throws Exception {
     assertEquals(TimeUnit.MILLISECONDS.toMicros(2000), watermarkHold.getTimestamps(0));
 
     // Blind adds should not need to read the future.
+    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -714,8 +735,12 @@ public void testWatermarkClearPersist() throws Exception {
     Windmill.WatermarkHold clearAndUpdate = commitBuilder.getWatermarkHolds(0);
     assertEquals(key(NAMESPACE, "watermark"), clearAndUpdate.getTag());
     assertEquals(1, clearAndUpdate.getTimestampsCount());
+    assertEquals(key(NAMESPACE, "watermark"), clearAndUpdate.getTag());
+    assertEquals(1, clearAndUpdate.getTimestampsCount());
     assertEquals(TimeUnit.MILLISECONDS.toMicros(1000), clearAndUpdate.getTimestamps(0));
 
+    // Clearing requires reading the future.
+    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -736,6 +761,39 @@ public void testWatermarkPersistEmpty() throws Exception {
     assertEquals(1, commitBuilder.getWatermarkHoldsCount());
   }
 
+  @Test
+  public void testWatermarkNoStateFamiliesEarliest() throws Exception {
+    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, readStateSupplier);
+
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+    bag.get();
+    Mockito.verify(mockReader).watermarkFuture(key(STATE_FAMILY, NAMESPACE, "watermark"), "");
+  }
+
+  @Test
+  public void testWatermarkNoStateFamiliesLatest() throws Exception {
+    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, readStateSupplier);
+
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtLatestInputTimestamp());
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+    bag.get();
+    Mockito.verify(mockReader).watermarkFuture(key(STATE_FAMILY, NAMESPACE, "watermark"), "");
+  }
+
+  @Test
+  public void testWatermarkNoStateFamiliesEndOfWindow() throws Exception {
+    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, readStateSupplier);
+
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtLatestInputTimestamp());
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+    bag.get();
+    Mockito.verify(mockReader).watermarkFuture(key(STATE_FAMILY, NAMESPACE, "watermark"), "");
+  }
+
   @Test
   public void testValueSetBeforeRead() throws Exception {
     StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
@@ -788,6 +846,10 @@ public void testValueSetPersist() throws Exception {
     assertEquals("Hi", valueUpdate.getValue().getData().toStringUtf8());
     assertTrue(valueUpdate.isInitialized());
 
+    // Setting a value requires a read to prevent blind writes.
+    Mockito.verify(mockReader)
+        .valueFuture(key(NAMESPACE, "value"), STATE_FAMILY, StringUtf8Coder.of());
+    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -808,6 +870,10 @@ public void testValueClearPersist() throws Exception {
     assertEquals(key(NAMESPACE, "value"), valueUpdate.getTag());
     assertEquals(0, valueUpdate.getValue().getData().size());
 
+    // Setting a value requires a read to prevent blind writes.
+    Mockito.verify(mockReader)
+        .valueFuture(key(NAMESPACE, "value"), STATE_FAMILY, StringUtf8Coder.of());
+    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -822,157 +888,24 @@ public void testValueNoChangePersist() throws Exception {
 
     assertEquals(0, commitBuilder.getValueUpdatesCount());
 
+    // No changes shouldn't require getting any futures
+    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
   @Test
-  public void testCachedValue() throws Exception {
+  public void testValueNoStateFamilies() throws Exception {
+    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, readStateSupplier);
+
     StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
     ValueState<String> value = underTest.state(NAMESPACE, addr);
 
-    assertEquals(0, cache.getWeight());
-
-    value.set("Hi");
-    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
-
-    assertEquals(2, cache.getWeight());
-
-    value = underTest.state(NAMESPACE, addr);
-    assertEquals("Hi", value.get().read());
-    value.clear();
-    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
-
-    assertEquals(0, cache.getWeight());
-
-    value = underTest.state(NAMESPACE, addr);
-    assertEquals(null, value.get().read());
-
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testCachedBag() throws Exception {
-    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
-    BagState<String> bag = underTest.state(NAMESPACE, addr);
-
-    assertEquals(0, cache.getWeight());
-
-    SettableFuture<Iterable<String>> future = SettableFuture.create();
-    when(mockReader.listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of()))
-        .thenReturn(future);
-
-    StateContents<Iterable<String>> result = bag.get();
-
-    assertEquals(0, cache.getWeight());
-
-    bag.add("hello");
-    waitAndSet(future, weightedList("world"), 200);
-    assertThat(result.read(), Matchers.containsInAnyOrder("hello", "world"));
-
-    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
-
-    assertEquals(10, cache.getWeight());
-
-    bag = underTest.state(NAMESPACE, addr);
-    bag.add("goodbye");
-    assertThat(bag.get().read(), Matchers.containsInAnyOrder("hello", "world", "goodbye"));
-    bag.clear();
-    bag.add("new");
-
-    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
-
-    assertEquals(3, cache.getWeight());
-
-    bag = underTest.state(NAMESPACE, addr);
-    bag.add("new2");
-    assertThat(bag.get().read(), Matchers.containsInAnyOrder("new", "new2"));
-    bag.clear();
-    bag.add("new3");
-
-    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
-
-    assertEquals(4, cache.getWeight());
-
-    bag = underTest.state(NAMESPACE, addr);
-    assertThat(bag.get().read(), Matchers.containsInAnyOrder("new3"));
-
-    Mockito.verify(mockReader)
-        .listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of());
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testCachedWatermarkHold() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-
-    SettableFuture<Instant> future = SettableFuture.create();
-    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY)).thenReturn(future);
-
-    assertEquals(0, cache.getWeight());
-
-    StateContents<Instant> result = bag.get();
-
-    bag.add(new Instant(3000));
-    waitAndSet(future, new Instant(2000), 200);
-    assertThat(result.read(), Matchers.equalTo(new Instant(2000)));
-
-    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
-
-    assertEquals(8, cache.getWeight());
-
-    bag = underTest.state(NAMESPACE, addr);
-    assertThat(bag.get().read(), Matchers.equalTo(new Instant(2000)));
-    bag.clear();
-
-    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
-
-    assertEquals(8, cache.getWeight());
-
-    bag = underTest.state(NAMESPACE, addr);
-    assertEquals(null, bag.get().read());
-
-    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testCachedCombining() throws Exception {
-    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
-
-    SettableFuture<Iterable<int[]>> future = SettableFuture.create();
-    when(mockReader.listFuture(key(NAMESPACE, "combining"), STATE_FAMILY, accumCoder))
+    SettableFuture<String> future = SettableFuture.create();
+    when(mockReader.valueFuture(key(STATE_FAMILY, NAMESPACE, "value"), "", StringUtf8Coder.of()))
         .thenReturn(future);
+    waitAndSet(future, "World", 200);
 
-    assertEquals(0, cache.getWeight());
-
-    StateContents<Integer> result = value.get();
-
-    value.add(1);
-    waitAndSet(future, Arrays.asList(new int[]{2}), 200);
-    assertThat(result.read(), Matchers.equalTo(3));
-
-    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
-
-    assertEquals(1, cache.getWeight());
-
-    value = underTest.state(NAMESPACE, COMBINING_ADDR);
-    assertThat(value.get().read(), Matchers.equalTo(3));
-    value.add(3);
-    assertThat(value.get().read(), Matchers.equalTo(6));
-    value.clear();
-
-    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
-
-    assertEquals(0, cache.getWeight());
-
-    value = underTest.state(NAMESPACE, COMBINING_ADDR);
-    assertThat(value.get().read(), Matchers.equalTo(0));
-
-    Mockito.verify(mockReader)
-        .listFuture(key(NAMESPACE, "combining"), STATE_FAMILY, accumCoder);
-    Mockito.verifyNoMoreInteractions(mockReader);
+    assertEquals("World", value.get().read());
   }
 
   private void disableCompactOnWrite() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java
index e995b821de69f..d70c329be4a50 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java
@@ -47,7 +47,6 @@ public void testOurApiSurface() throws Exception {
     ApiSurface checkedApiSurface = ApiSurface.getSdkApiSurface()
       .pruningClassName("com.google.cloud.dataflow.sdk.runners.worker.StateFetcher")
       .pruningClassName("com.google.cloud.dataflow.sdk.util.common.ReflectHelpers")
-      .pruningClassName("com.google.cloud.dataflow.sdk.DataflowMatchers")
       .pruningClassName("com.google.cloud.dataflow.sdk.TestUtils")
       .pruningClassName("com.google.cloud.dataflow.sdk.WindowMatchers");
 
@@ -184,3 +183,4 @@ public void testExposedArrayCycle() throws Exception {
     assertExposed(ExposedArrayCycle.class, Exposed.class);
   }
 }
+

From 0718469560a583f2cbb2be69ab58233c9c6a7323 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 6 Jan 2016 19:04:58 -0800
Subject: [PATCH 1273/1541] Decouple TriggerContextFactory from ReduceFn
 somewhat

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111567176
---
 .../sdk/transforms/windowing/Trigger.java     | 18 ++--
 .../sdk/util/TriggerContextFactory.java       | 86 +++++++++----------
 .../dataflow/sdk/util/TriggerRunner.java      | 16 ++--
 3 files changed, 61 insertions(+), 59 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index f73401f79315f..20d89c024dedb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -19,8 +19,6 @@
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.ReduceFn;
-import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
-import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.common.base.Joiner;
 
@@ -250,7 +248,7 @@ public abstract class TriggerContext {
     public abstract TriggerInfo<W> trigger();
 
     /** Returns the interface for accessing persistent state. */
-    public abstract StateContext state();
+    public abstract ReduceFn.StateContext state();
 
     /** The window that the current context is executing in. */
     public abstract W window();
@@ -286,7 +284,7 @@ public abstract class OnMergeContext extends TriggerContext {
     public abstract OnMergeContext forTrigger(ExecutableTrigger<W> trigger);
 
     @Override
-    public abstract MergingStateContext state();
+    public abstract ReduceFn.MergingStateContext state();
 
     @Override
     public abstract MergingTriggerInfo<W> trigger();
@@ -348,9 +346,9 @@ protected Trigger(@Nullable List<Trigger<W>> subTriggers) {
    * Called to allow the trigger to prefetch any state it will likely need to read from during
    * an {@link #onElement} call.
    *
-   * @param state StateContext to prefetch from.
+   * @param state {@link ReduceFn.StateContext} to prefetch from.
    */
-  public void prefetchOnElement(StateContext state) {
+  public void prefetchOnElement(ReduceFn.StateContext state) {
     if (subTriggers != null) {
       for (Trigger<W> trigger : subTriggers) {
         trigger.prefetchOnElement(state);
@@ -362,9 +360,9 @@ public void prefetchOnElement(StateContext state) {
    * Called to allow the trigger to prefetch any state it will likely need to read from during
    * an {@link #onMerge} call.
    *
-   * @param state StateContext to prefetch from.
+   * @param state {@link ReduceFn.MergingStateContext} to prefetch from.
    */
-  public void prefetchOnMerge(MergingStateContext state) {
+  public void prefetchOnMerge(ReduceFn.MergingStateContext state) {
     if (subTriggers != null) {
       for (Trigger<W> trigger : subTriggers) {
         trigger.prefetchOnMerge(state);
@@ -376,9 +374,9 @@ public void prefetchOnMerge(MergingStateContext state) {
    * Called to allow the trigger to prefetch any state it will likely need to read from during
    * an {@link #onTimer} call.
    *
-   * @param state StateContext to prefetch from.
+   * @param state {@lijnk ReduceFn.StateContext} to prefetch from.
    */
-  public void prefetchOnTimer(StateContext state) {
+  public void prefetchOnTimer(ReduceFn.StateContext state) {
     if (subTriggers != null) {
       for (Trigger<W> trigger : subTriggers) {
         trigger.prefetchOnTimer(state);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
index 4dfef33e1de16..916b172fb3029 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
@@ -20,11 +20,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergingTriggerInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerInfo;
-import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
-import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
-import com.google.cloud.dataflow.sdk.util.ReduceFn.Timers;
-import com.google.cloud.dataflow.sdk.util.ReduceFnContextFactory.MergingStateContextImpl;
-import com.google.cloud.dataflow.sdk.util.ReduceFnContextFactory.StateContextImpl;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
@@ -40,8 +35,10 @@
 import java.util.Map;
 
 /**
- * Factory for creating instances of the various {@link Trigger} contexts from the corresponding
- * {@link ReduceFn} contexts.
+ * Factory for creating instances of the various {@link Trigger} contexts.
+ *
+ * <p>These contexts are highly interdependent and share many fields; it is inadvisable
+ * to create them via any means other than this factory class.
  */
 public class TriggerContextFactory<W extends BoundedWindow> {
 
@@ -57,32 +54,32 @@ public TriggerContextFactory(WindowingStrategy<?, W> windowingStrategy,
   }
 
   public Trigger<W>.TriggerContext base(
-      ReduceFn<?, ?, ?, W>.Context context, ExecutableTrigger<W> rootTrigger, BitSet finishedSet) {
-    return new TriggerContextImpl(context.window(), context.timers(), rootTrigger, finishedSet);
+      W window, ReduceFn.Timers timers, ExecutableTrigger<W> rootTrigger, BitSet finishedSet) {
+    return new TriggerContextImpl(window, timers, rootTrigger, finishedSet);
   }
 
-  public Trigger<W>.OnElementContext create(
-      ReduceFn<?, ?, ?, W>.ProcessValueContext context,
+  public Trigger<W>.OnElementContext createOnElementContext(
+      W window, ReduceFn.Timers timers, Instant elementTimestamp,
       ExecutableTrigger<W> rootTrigger, BitSet finishedSet) {
     return new OnElementContextImpl(
-        context.window(), context.timers(), rootTrigger, finishedSet,
-        context.timestamp());
+        window, timers, rootTrigger, finishedSet,
+        elementTimestamp);
   }
 
-  public Trigger<W>.OnTimerContext create(
-      ReduceFn<?, ?, ?, W>.Context context,
+  public Trigger<W>.OnTimerContext createOnTimerContext(
+      W window, ReduceFn.Timers timers,
       ExecutableTrigger<W> rootTrigger, BitSet finishedSet,
       Instant timestamp, TimeDomain domain) {
     return new OnTimerContextImpl(
-        context.window(), context.timers(), rootTrigger, finishedSet, timestamp, domain);
+        window, timers, rootTrigger, finishedSet, timestamp, domain);
   }
 
-  public Trigger<W>.OnMergeContext create(
-      ReduceFn<?, ?, ?, W>.OnMergeContext context,
+  public Trigger<W>.OnMergeContext createOnMergeContext(
+      W window, ReduceFn.Timers timers, Collection<W> mergingWindows,
       ExecutableTrigger<W> rootTrigger, BitSet finishedSet,
       Map<W, BitSet> finishedSets) {
-    return new OnMergeContextImpl(context.window(), context.timers(), rootTrigger, finishedSet,
-        context.mergingWindows(), finishedSets);
+    return new OnMergeContextImpl(window, timers, rootTrigger, finishedSet,
+        mergingWindows, finishedSets);
   }
 
   private class TriggerInfoImpl implements Trigger.TriggerInfo<W> {
@@ -167,12 +164,12 @@ public void setFinished(boolean finished, int subTriggerIndex) {
     }
   }
 
-  private class TriggerTimers implements Timers {
+  private class TriggerTimers implements ReduceFn.Timers {
 
-    private final Timers timers;
+    private final ReduceFn.Timers timers;
     private final W window;
 
-    public TriggerTimers(W window, Timers timers) {
+    public TriggerTimers(W window, ReduceFn.Timers timers) {
       this.timers = timers;
       this.window = window;
     }
@@ -234,13 +231,15 @@ public boolean apply(BitSet input) {
     }
   }
 
-  private StateContextImpl<W> triggerState(W window, ExecutableTrigger<W> trigger) {
-    return new TriggerStateContextImpl<W>(
+  private ReduceFnContextFactory.StateContextImpl<W> triggerState(
+      W window, ExecutableTrigger<W> trigger) {
+    return new TriggerStateContextImpl(
         activeWindows, windowingStrategy.getWindowFn().windowCoder(),
         stateInternals, window, trigger);
   }
 
-  private class TriggerStateContextImpl<W extends BoundedWindow> extends StateContextImpl<W> {
+  private class TriggerStateContextImpl
+      extends ReduceFnContextFactory.StateContextImpl<W> {
 
     private int triggerIndex;
 
@@ -263,13 +262,13 @@ protected StateNamespace namespaceFor(W window) {
 
   private class TriggerContextImpl extends Trigger<W>.TriggerContext {
 
-    private final StateContextImpl<W> state;
-    private final Timers timers;
+    private final ReduceFnContextFactory.StateContextImpl<W> state;
+    private final ReduceFn.Timers timers;
     private final TriggerInfoImpl triggerInfo;
 
     private TriggerContextImpl(
         W window,
-        Timers timers,
+        ReduceFn.Timers timers,
         ExecutableTrigger<W> trigger,
         BitSet finishedSet) {
       trigger.getSpec().super();
@@ -289,7 +288,7 @@ public TriggerInfo<W> trigger() {
     }
 
     @Override
-    public StateContext state() {
+    public ReduceFn.StateContext state() {
       return state;
     }
 
@@ -307,14 +306,14 @@ public ReduceFn.Timers timers() {
 
   private class OnElementContextImpl extends Trigger<W>.OnElementContext {
 
-    private final StateContextImpl<W> state;
-    private final Timers timers;
+    private final ReduceFnContextFactory.StateContextImpl<W> state;
+    private final ReduceFn.Timers timers;
     private final TriggerInfoImpl triggerInfo;
     private final Instant eventTimestamp;
 
     private OnElementContextImpl(
         W window,
-        Timers timers,
+        ReduceFn.Timers timers,
         ExecutableTrigger<W> trigger,
         BitSet finishedSet,
         Instant eventTimestamp) {
@@ -343,7 +342,7 @@ public TriggerInfo<W> trigger() {
     }
 
     @Override
-    public StateContext state() {
+    public ReduceFn.StateContext state() {
       return state;
     }
 
@@ -360,15 +359,15 @@ public ReduceFn.Timers timers() {
 
   private class OnTimerContextImpl extends Trigger<W>.OnTimerContext {
 
-    private final StateContextImpl<W> state;
-    private final Timers timers;
+    private final ReduceFnContextFactory.StateContextImpl<W> state;
+    private final ReduceFn.Timers timers;
     private final TriggerInfoImpl triggerInfo;
     private final Instant timestamp;
     private final TimeDomain domain;
 
     private OnTimerContextImpl(
         W window,
-        Timers timers,
+        ReduceFn.Timers timers,
         ExecutableTrigger<W> trigger,
         BitSet finishedSet,
         Instant timestamp,
@@ -393,7 +392,7 @@ public TriggerInfo<W> trigger() {
     }
 
     @Override
-    public StateContext state() {
+    public ReduceFn.StateContext state() {
       return state;
     }
 
@@ -420,19 +419,20 @@ public TimeDomain timeDomain() {
 
   private class OnMergeContextImpl extends Trigger<W>.OnMergeContext {
 
-    private final MergingStateContextImpl<W> state;
-    private final Timers timers;
+    private final ReduceFnContextFactory.MergingStateContextImpl<W> state;
+    private final ReduceFn.Timers timers;
     private final MergingTriggerInfoImpl triggerInfo;
 
     private OnMergeContextImpl(
         W window,
-        Timers timers,
+        ReduceFn.Timers timers,
         ExecutableTrigger<W> trigger,
         BitSet finishedSet,
         Collection<W> mergingWindows,
         Map<W, BitSet> finishedSets) {
       trigger.getSpec().super();
-      this.state = new MergingStateContextImpl<>(triggerState(window, trigger), mergingWindows);
+      this.state = new ReduceFnContextFactory.MergingStateContextImpl<>(
+          triggerState(window, trigger), mergingWindows);
       this.timers = new TriggerTimers(window, timers);
       this.triggerInfo = new MergingTriggerInfoImpl(trigger, finishedSet, this, finishedSets);
     }
@@ -450,7 +450,7 @@ public Iterable<W> oldWindows() {
     }
 
     @Override
-    public MergingStateContext state() {
+    public ReduceFn.MergingStateContext state() {
       return state;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
index 7397189ae94f3..6efd5b7b458f0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
@@ -88,7 +88,8 @@ public void prefetchForValue(ReduceFn.StateContext state) {
   public TriggerResult processValue(ReduceFn<?, ?, ?, W>.ProcessValueContext c) throws Exception {
     // Clone so that we can detect changes and so that changes here don't pollute merging.
     BitSet finishedSet = (BitSet) readFinishedBits(c.state().access(FINISHED_BITS_TAG)).clone();
-    Trigger<W>.OnElementContext triggerContext = contextFactory.create(c, rootTrigger, finishedSet);
+    Trigger<W>.OnElementContext triggerContext = contextFactory.createOnElementContext(
+        c.window(), c.timers(), c.timestamp(), rootTrigger, finishedSet);
     TriggerResult result = rootTrigger.invokeElement(triggerContext);
     persistFinishedSet(c.state(), finishedSet);
     return result;
@@ -120,8 +121,9 @@ public TriggerResult onMerge(ReduceFn<?, ?, ?, W>.OnMergeContext c) throws Excep
       mergingFinishedSets.put(window, readFinishedBits(mergingFinishedSetState.get(window)));
     }
 
-    Trigger<W>.OnMergeContext mergeContext =
-        contextFactory.create(c, rootTrigger, finishedSet, mergingFinishedSets.build());
+    Trigger<W>.OnMergeContext mergeContext = contextFactory.createOnMergeContext(
+            c.window(), c.timers(), c.mergingWindows(), rootTrigger,
+            finishedSet, mergingFinishedSets.build());
 
     // Run the merge from the trigger
     MergeResult result = rootTrigger.invokeMerge(mergeContext);
@@ -146,8 +148,9 @@ public void prefetchForTimer(ReduceFn.StateContext state) {
   public TriggerResult onTimer(ReduceFn<?, ?, ?, W>.Context c, TimerData timer) throws Exception {
     // Clone so that we can detect changes and so that changes here don't pollute merging.
     BitSet finishedSet = (BitSet) readFinishedBits(c.state().access(FINISHED_BITS_TAG)).clone();
-    Trigger<W>.OnTimerContext triggerContext =
-        contextFactory.create(c, rootTrigger, finishedSet, timer.getTimestamp(), timer.getDomain());
+    Trigger<W>.OnTimerContext triggerContext = contextFactory.createOnTimerContext(
+        c.window(), c.timers(), rootTrigger, finishedSet,
+        timer.getTimestamp(), timer.getDomain());
     TriggerResult result = rootTrigger.invokeTimer(triggerContext);
     persistFinishedSet(c.state(), finishedSet);
     return result;
@@ -175,7 +178,8 @@ private void persistFinishedSet(ReduceFn.StateContext state, BitSet modifiedFini
   public void clearState(ReduceFn<?, ?, ?, W>.Context c) throws Exception {
     // Don't need to clone, because we'll be clearing the finished bits anyways.
     BitSet finishedSet = readFinishedBits(c.state().access(FINISHED_BITS_TAG));
-    rootTrigger.invokeClear(contextFactory.base(c, rootTrigger, finishedSet));
+    rootTrigger.invokeClear(contextFactory.base(
+        c.window(), c.timers(), rootTrigger, finishedSet));
   }
 
   /**

From 715b01778a0451f08a37ca4d9744a008e0b318e9 Mon Sep 17 00:00:00 2001
From: cpovirk <cpovirk@google.com>
Date: Thu, 7 Jan 2016 08:04:05 -0800
Subject: [PATCH 1274/1541] Migrate from deprecated isAssignableFrom to
 equivalent isSupertypeOf.

In most cases, it may be worthwhile to make the further change of replacing foo.isSupertypeOf(bar) with bar.isSubtypeOf(foo).

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111604674
---
 .../cloud/dataflow/sdk/transforms/DoFnReflector.java       | 7 +------
 .../google/cloud/dataflow/sdk/values/TypeDescriptor.java   | 4 ++--
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
index d9556270092fc..af2005428039c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
@@ -299,7 +299,7 @@ static <InputT, OutputT> ExtraContextInfo[] verifyBundleMethodArguments(Method m
 
       // If we get here, the class matches, but maybe the generics don't:
       TypeToken<?> expected = info.tokenFor(iActual, oActual);
-      if (!isSupertypeOf(param, expected)) {
+      if (!expected.isSubtypeOf(param)) {
         throw new IllegalStateException(String.format(
             "Incompatible generics in context parameter %s for method %s. Should be %s",
             formatType(param), format(m), formatType(info.tokenFor(iActual, oActual))));
@@ -311,11 +311,6 @@ static <InputT, OutputT> ExtraContextInfo[] verifyBundleMethodArguments(Method m
     return contextInfos;
   }
 
-  @SuppressWarnings("deprecation")
-  private static boolean isSupertypeOf(TypeToken<?> param, TypeToken<?> expected) {
-    return param.isAssignableFrom(expected);
-  }
-
   /**
    * Implementation of {@link DoFnReflector} for the arbitrary {@link DoFnWithContext}.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
index 47d2cd5a5f96d..86670109188b5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
@@ -161,14 +161,14 @@ public final TypeVariable<Class<? super T>> getTypeParameter(String paramName) {
    * Returns true if this type is assignable from the given type.
    */
   public final boolean isSupertypeOf(TypeDescriptor<?> source) {
-    return token.isAssignableFrom(source.token);
+    return token.isSupertypeOf(source.token);
   }
 
   /**
    * Return true if this type is a subtype of the given type.
    */
   public final boolean isSubtypeOf(TypeDescriptor<?> parent) {
-    return parent.token.isAssignableFrom(token);
+    return token.isSubtypeOf(parent.token);
   }
 
   /**

From 8c8ed019052ff8f55128c11937353be7a81612d2 Mon Sep 17 00:00:00 2001
From: stevewheeler <stevewheeler@google.com>
Date: Thu, 7 Jan 2016 08:45:39 -0800
Subject: [PATCH 1275/1541] Rollback of read bounded PubSubIO in examples
 pipelines.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111607538
---
 .../dataflow/examples/WindowedWordCount.java  |   7 +-
 .../common/DataflowExampleOptions.java        |   5 -
 .../examples/common/DataflowExampleUtils.java | 135 +++++-------------
 ...mplePubsubTopicAndSubscriptionOptions.java |  44 ------
 .../common/ExamplePubsubTopicOptions.java     |   5 +
 .../examples/complete/AutoComplete.java       |   2 +-
 .../examples/complete/TrafficMaxLaneFlow.java | 130 +++++++----------
 .../examples/complete/TrafficRoutes.java      | 131 +++++++----------
 .../examples/cookbook/TriggerExample.java     |   8 +-
 9 files changed, 146 insertions(+), 321 deletions(-)
 delete mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicAndSubscriptionOptions.java

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
index dddab3a6927f2..fcda15eeca8f7 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
@@ -20,10 +20,7 @@
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
-import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
 import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
-import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
-import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
@@ -181,8 +178,8 @@ private static TableReference getTableReference(Options options) {
    * table and the PubSub topic, as well as the {@link WordCount.WordCountOptions} support for
    * specification of the input file.
    */
-  public static interface Options extends WordCount.WordCountOptions,
-      DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
+  public static interface Options
+        extends WordCount.WordCountOptions, DataflowExampleUtils.DataflowExampleUtilsOptions {
     @Description("Fixed window duration, in minutes")
     @Default.Integer(WINDOW_SIZE)
     Integer getWindowSize();
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java
index 606bfb4c03e9d..28b0818f64a70 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java
@@ -26,9 +26,4 @@ public interface DataflowExampleOptions extends DataflowPipelineOptions {
   @Default.Boolean(false)
   boolean getKeepJobsRunning();
   void setKeepJobsRunning(boolean keepJobsRunning);
-
-  @Description("Number of workers to use when executing the injector pipeline")
-  @Default.Integer(1)
-  int getInjectorNumWorkers();
-  void setInjectorNumWorkers(int numWorkers);
 }
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
index 677341f9b0eee..0183d8eccbbd9 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
@@ -29,7 +29,6 @@
 import com.google.api.services.bigquery.model.TableSchema;
 import com.google.api.services.dataflow.Dataflow;
 import com.google.api.services.pubsub.Pubsub;
-import com.google.api.services.pubsub.model.Subscription;
 import com.google.api.services.pubsub.model.Topic;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult;
@@ -40,13 +39,9 @@
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
 import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.cloud.dataflow.sdk.values.PBegin;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.base.Strings;
 import com.google.common.base.Throwables;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
@@ -74,6 +69,13 @@ public class DataflowExampleUtils {
   private Set<DataflowPipelineJob> jobsToCancel = Sets.newHashSet();
   private List<String> pendingMessages = Lists.newArrayList();
 
+  /**
+   * Define an interface that supports the PubSub and BigQuery example options.
+   */
+  public static interface DataflowExampleUtilsOptions
+        extends DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
+  }
+
   public DataflowExampleUtils(DataflowPipelineOptions options) {
     this.options = options;
   }
@@ -100,7 +102,7 @@ public void setup() throws IOException {
     try {
       do {
         try {
-          setupPubsub();
+          setupPubsubTopic();
           setupBigQueryTable();
           return;
         } catch (GoogleJsonResponseException e) {
@@ -131,21 +133,13 @@ public void setupResourcesAndRunner(boolean isUnbounded) throws IOException {
    *
    * @throws IOException if there is a problem setting up the Pub/Sub topic
    */
-  public void setupPubsub() throws IOException {
-    ExamplePubsubTopicAndSubscriptionOptions pubsubOptions =
-        options.as(ExamplePubsubTopicAndSubscriptionOptions.class);
-    if (!pubsubOptions.getPubsubTopic().isEmpty()) {
-      pendingMessages.add("**********************Set Up Pubsub************************");
-      setupPubsubTopic(pubsubOptions.getPubsubTopic());
+  public void setupPubsubTopic() throws IOException {
+    ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
+    if (!pubsubTopicOptions.getPubsubTopic().isEmpty()) {
+      pendingMessages.add("*******************Set Up Pubsub Topic*********************");
+      setupPubsubTopic(pubsubTopicOptions.getPubsubTopic());
       pendingMessages.add("The Pub/Sub topic has been set up for this example: "
-          + pubsubOptions.getPubsubTopic());
-
-      if (!pubsubOptions.getPubsubSubscription().isEmpty()) {
-        setupPubsubSubscription(
-            pubsubOptions.getPubsubTopic(), pubsubOptions.getPubsubSubscription());
-        pendingMessages.add("The Pub/Sub subscription has been set up for this example: "
-            + pubsubOptions.getPubsubSubscription());
-      }
+          + pubsubTopicOptions.getPubsubTopic());
     }
   }
 
@@ -181,26 +175,15 @@ public void setupBigQueryTable() throws IOException {
    */
   private void tearDown() {
     pendingMessages.add("*************************Tear Down*************************");
-    ExamplePubsubTopicAndSubscriptionOptions pubsubOptions =
-        options.as(ExamplePubsubTopicAndSubscriptionOptions.class);
-    if (!pubsubOptions.getPubsubTopic().isEmpty()) {
+    ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
+    if (!pubsubTopicOptions.getPubsubTopic().isEmpty()) {
       try {
-        deletePubsubTopic(pubsubOptions.getPubsubTopic());
+        deletePubsubTopic(pubsubTopicOptions.getPubsubTopic());
         pendingMessages.add("The Pub/Sub topic has been deleted: "
-            + pubsubOptions.getPubsubTopic());
+            + pubsubTopicOptions.getPubsubTopic());
       } catch (IOException e) {
         pendingMessages.add("Failed to delete the Pub/Sub topic : "
-            + pubsubOptions.getPubsubTopic());
-      }
-      if (!pubsubOptions.getPubsubSubscription().isEmpty()) {
-        try {
-          deletePubsubSubscription(pubsubOptions.getPubsubSubscription());
-          pendingMessages.add("The Pub/Sub subscription has been deleted: "
-              + pubsubOptions.getPubsubSubscription());
-        } catch (IOException e) {
-          pendingMessages.add("Failed to delete the Pub/Sub subscription : "
-              + pubsubOptions.getPubsubSubscription());
-        }
+            + pubsubTopicOptions.getPubsubTopic());
       }
     }
 
@@ -254,18 +237,6 @@ private void setupPubsubTopic(String topic) throws IOException {
     }
   }
 
-  private void setupPubsubSubscription(String topic, String subscription) throws IOException {
-    if (pubsubClient == null) {
-      pubsubClient = Transport.newPubsubClient(options).build();
-    }
-    if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) == null) {
-      Subscription subInfo = new Subscription()
-        .setAckDeadlineSeconds(60)
-        .setTopic(topic);
-      pubsubClient.projects().subscriptions().create(subscription, subInfo).execute();
-    }
-  }
-
   /**
    * Deletes the Google Cloud Pub/Sub topic.
    *
@@ -280,20 +251,6 @@ private void deletePubsubTopic(String topic) throws IOException {
     }
   }
 
-  /**
-   * Deletes the Google Cloud Pub/Sub subscription.
-   *
-   * @throws IOException if there is a problem deleting the Pub/Sub subscription
-   */
-  private void deletePubsubSubscription(String subscription) throws IOException {
-    if (pubsubClient == null) {
-      pubsubClient = Transport.newPubsubClient(options).build();
-    }
-    if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) != null) {
-      pubsubClient.projects().subscriptions().delete(subscription).execute();
-    }
-  }
-
   /**
    * If this is an unbounded (streaming) pipeline, and both inputFile and pubsub topic are defined,
    * start an 'injector' pipeline that publishes the contents of the file to the given topic, first
@@ -302,8 +259,9 @@ private void deletePubsubSubscription(String subscription) throws IOException {
   public void startInjectorIfNeeded(String inputFile) {
     ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
     if (pubsubTopicOptions.isStreaming()
-        && !Strings.isNullOrEmpty(inputFile)
-        && !Strings.isNullOrEmpty(pubsubTopicOptions.getPubsubTopic())) {
+        && inputFile != null && !inputFile.isEmpty()
+        && pubsubTopicOptions.getPubsubTopic() != null
+        && !pubsubTopicOptions.getPubsubTopic().isEmpty()) {
       runInjectorPipeline(inputFile, pubsubTopicOptions.getPubsubTopic());
     }
   }
@@ -314,7 +272,11 @@ public void startInjectorIfNeeded(String inputFile) {
    * flag value.
    */
   public void setupRunner() {
-    if (options.isStreaming() && options.getRunner() != DirectPipelineRunner.class) {
+    if (options.isStreaming()) {
+      if (options.getRunner() == DirectPipelineRunner.class) {
+        throw new IllegalArgumentException(
+          "Processing of unbounded input sources is not supported with the DirectPipelineRunner.");
+      }
       // In order to cancel the pipelines automatically,
       // {@literal DataflowPipelineRunner} is forced to be used.
       options.setRunner(DataflowPipelineRunner.class);
@@ -322,53 +284,32 @@ public void setupRunner() {
   }
 
   /**
-   * Runs a batch pipeline to inject data into the PubSubIO input topic.
+   * Runs the batch injector for the streaming pipeline.
    *
    * <p>The injector pipeline will read from the given text file, and inject data
    * into the Google Cloud Pub/Sub topic.
    */
   public void runInjectorPipeline(String inputFile, String topic) {
-    runInjectorPipeline(TextIO.Read.from(inputFile), topic, null);
-  }
-
-  /**
-   * Runs a batch pipeline to inject data into the PubSubIO input topic.
-   *
-   * <p>The injector pipeline will read from the given source, and inject data
-   * into the Google Cloud Pub/Sub topic.
-   */
-  public void runInjectorPipeline(PTransform<? super PBegin, PCollection<String>> readSource,
-                                  String topic,
-                                  String pubsubTimestampTabelKey) {
-    PubsubFileInjector.Bound injector;
-    if (Strings.isNullOrEmpty(pubsubTimestampTabelKey)) {
-      injector = PubsubFileInjector.publish(topic);
-    } else {
-      injector = PubsubFileInjector.withTimestampLabelKey(pubsubTimestampTabelKey).publish(topic);
-    }
     DataflowPipelineOptions copiedOptions = options.cloneAs(DataflowPipelineOptions.class);
     copiedOptions.setStreaming(false);
-    copiedOptions.setNumWorkers(options.as(DataflowExampleOptions.class).getInjectorNumWorkers());
+    copiedOptions.setNumWorkers(
+        options.as(ExamplePubsubTopicOptions.class).getInjectorNumWorkers());
     copiedOptions.setJobName(options.getJobName() + "-injector");
     Pipeline injectorPipeline = Pipeline.create(copiedOptions);
-    injectorPipeline.apply(readSource)
+    injectorPipeline.apply(TextIO.Read.from(inputFile))
                     .apply(IntraBundleParallelization
-                        .of(injector)
+                        .of(PubsubFileInjector.publish(topic))
                         .withMaxParallelism(20));
-    PipelineResult result = injectorPipeline.run();
-    if (result instanceof DataflowPipelineJob) {
-      jobsToCancel.add(((DataflowPipelineJob) result));
-    }
+    DataflowPipelineJob injectorJob = (DataflowPipelineJob) injectorPipeline.run();
+    jobsToCancel.add(injectorJob);
   }
 
   /**
-   * Runs the provided pipeline to inject data into the PubSubIO input topic.
+   * Runs the provided injector pipeline for the streaming pipeline.
    */
   public void runInjectorPipeline(Pipeline injectorPipeline) {
-    PipelineResult result = injectorPipeline.run();
-    if (result instanceof DataflowPipelineJob) {
-      jobsToCancel.add(((DataflowPipelineJob) result));
-    }
+    DataflowPipelineJob injectorJob = (DataflowPipelineJob) injectorPipeline.run();
+    jobsToCancel.add(injectorJob);
   }
 
   /**
@@ -398,8 +339,6 @@ public void waitToFinish(PipelineResult result) {
     } else {
       // Do nothing if the given PipelineResult doesn't support waitToFinish(),
       // such as EvaluationResults returned by DirectPipelineRunner.
-      tearDown();
-      printPendingMessages();
     }
   }
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicAndSubscriptionOptions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicAndSubscriptionOptions.java
deleted file mode 100644
index d7bd4b8edc3d8..0000000000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicAndSubscriptionOptions.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.common;
-
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-
-/**
- * Options that can be used to configure Pub/Sub topic/subscription in Dataflow examples.
- */
-public interface ExamplePubsubTopicAndSubscriptionOptions extends ExamplePubsubTopicOptions {
-  @Description("Pub/Sub subscription")
-  @Default.InstanceFactory(PubsubSubscriptionFactory.class)
-  String getPubsubSubscription();
-  void setPubsubSubscription(String subscription);
-
-  /**
-   * Returns a default Pub/Sub subscription based on the project and the job names.
-   */
-  static class PubsubSubscriptionFactory implements DefaultValueFactory<String> {
-    @Override
-    public String create(PipelineOptions options) {
-      DataflowPipelineOptions dataflowPipelineOptions =
-          options.as(DataflowPipelineOptions.class);
-      return "projects/" + dataflowPipelineOptions.getProject()
-          + "/subscriptions/" + dataflowPipelineOptions.getJobName();
-    }
-  }
-}
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
index 4bedf318ef5af..17c1bd284c527 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
@@ -29,6 +29,11 @@ public interface ExamplePubsubTopicOptions extends DataflowPipelineOptions {
   String getPubsubTopic();
   void setPubsubTopic(String topic);
 
+  @Description("Number of workers to use when executing the injector pipeline")
+  @Default.Integer(1)
+  int getInjectorNumWorkers();
+  void setInjectorNumWorkers(int numWorkers);
+
   /**
    * Returns a default Pub/Sub topic based on the project and the job names.
    */
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
index 1bccc4ace2785..e8c6d405b7638 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
@@ -457,7 +457,7 @@ public static void main(String[] args) throws IOException {
     if (options.isStreaming()) {
       Preconditions.checkArgument(
           !options.getOutputToDatastore(), "DatastoreIO is not supported in streaming.");
-      dataflowUtils.setupPubsub();
+      dataflowUtils.setupPubsubTopic();
 
       readSource = PubsubIO.Read.topic(options.getPubsubTopic());
       windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5));
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
index 2d5425208bdbb..31fef75d26b53 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
@@ -23,7 +23,7 @@
 import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
 import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
 import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
-import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicAndSubscriptionOptions;
+import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
@@ -34,6 +34,7 @@
 import com.google.cloud.dataflow.sdk.options.Default;
 import com.google.cloud.dataflow.sdk.options.Description;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -42,9 +43,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.base.Strings;
 
 import org.apache.avro.reflect.Nullable;
 import org.joda.time.Duration;
@@ -90,9 +89,6 @@
  */
 public class TrafficMaxLaneFlow {
 
-  private static final String PUBSUB_TIMESTAMP_LABEL_KEY = "timestamp_ms";
-  private static final Integer VALID_INPUTS = 4999;
-
   static final int WINDOW_DURATION = 60;  // Default sliding window duration in minutes
   static final int WINDOW_SLIDE_EVERY = 5;  // Default window 'slide every' setting in minutes
 
@@ -157,27 +153,6 @@ public Integer getTotalFlow() {
     }
   }
 
-  /**
-   * Extract the timestamp field from the input string, and use it as the element timestamp.
-   */
-  static class ExtractTimestamps extends DoFn<String, String> {
-    private static final DateTimeFormatter dateTimeFormat =
-        DateTimeFormat.forPattern("MM/dd/yyyy HH:mm:ss");
-
-    @Override
-    public void processElement(DoFn<String, String>.ProcessContext c) throws Exception {
-      String[] items = c.element().split(",");
-      if (items.length > 0) {
-        try {
-          String timestamp = items[0];
-          c.outputWithTimestamp(c.element(), new Instant(dateTimeFormat.parseMillis(timestamp)));
-        } catch (IllegalArgumentException e) {
-          // Skip the invalid input.
-        }
-      }
-    }
-  }
-
   /**
    * Extract flow information for each of the 8 lanes in a reading, and output as separate tuples.
    * This will let us determine which lane has the max flow for that station over the span of the
@@ -186,6 +161,14 @@ public void processElement(DoFn<String, String>.ProcessContext c) throws Excepti
    * point comes from.
    */
   static class ExtractFlowInfoFn extends DoFn<String, KV<String, LaneInfo>> {
+    private static final DateTimeFormatter dateTimeFormat =
+        DateTimeFormat.forPattern("MM/dd/yyyy HH:mm:ss");
+
+    private final boolean outputTimestamp;
+
+    public ExtractFlowInfoFn(boolean outputTimestamp) {
+      this.outputTimestamp = outputTimestamp;
+    }
 
     @Override
     public void processElement(ProcessContext c) {
@@ -209,7 +192,16 @@ public void processElement(ProcessContext c) {
         }
         LaneInfo laneInfo = new LaneInfo(stationId, "lane" + i, direction, freeway, timestamp,
             laneFlow, laneAvgOccupancy, laneAvgSpeed, totalFlow);
-        c.output(KV.of(stationId, laneInfo));
+        if (outputTimestamp) {
+          try {
+            c.outputWithTimestamp(KV.of(stationId, laneInfo),
+                                  new Instant(dateTimeFormat.parseMillis(timestamp)));
+          } catch (IllegalArgumentException e) {
+            // Skip the invalid input.
+          }
+        } else {
+          c.output(KV.of(stationId, laneInfo));
+        }
       }
     }
   }
@@ -299,28 +291,13 @@ public PCollection<TableRow> apply(PCollection<KV<String, LaneInfo>> flowInfo) {
     }
   }
 
-  static class ReadFileAndExtractTimestamps extends PTransform<PBegin, PCollection<String>> {
-    private final String inputFile;
-
-    public ReadFileAndExtractTimestamps(String inputFile) {
-      this.inputFile = inputFile;
-    }
-
-    @Override
-    public PCollection<String> apply(PBegin begin) {
-      return begin
-          .apply(TextIO.Read.from(inputFile))
-          .apply(ParDo.of(new ExtractTimestamps()));
-    }
-  }
-
   /**
     * Options supported by {@link TrafficMaxLaneFlow}.
     *
     * <p>Inherits standard configuration options.
     */
-  private interface TrafficMaxLaneFlowOptions extends DataflowExampleOptions,
-      ExamplePubsubTopicAndSubscriptionOptions, ExampleBigQueryTableOptions {
+  private interface TrafficMaxLaneFlowOptions
+      extends DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
         @Description("Input file to inject to Pub/Sub topic")
     @Default.String("gs://dataflow-samples/traffic_sensor/"
         + "Freeways-5Minaa2010-01-01_to_2010-02-15_test2.csv")
@@ -336,11 +313,6 @@ private interface TrafficMaxLaneFlowOptions extends DataflowExampleOptions,
     @Default.Integer(WINDOW_SLIDE_EVERY)
     Integer getWindowSlideEvery();
     void setWindowSlideEvery(Integer value);
-
-    @Description("Whether to run the pipeline with unbounded input")
-    @Default.Boolean(false)
-    boolean isUnbounded();
-    void setUnbounded(boolean value);
   }
 
   /**
@@ -352,9 +324,15 @@ public static void main(String[] args) throws IOException {
     TrafficMaxLaneFlowOptions options = PipelineOptionsFactory.fromArgs(args)
         .withValidation()
         .as(TrafficMaxLaneFlowOptions.class);
+    if (options.isStreaming()) {
+      // In order to cancel the pipelines automatically,
+      // {@literal DataflowPipelineRunner} is forced to be used.
+      options.setRunner(DataflowPipelineRunner.class);
+    }
     options.setBigQuerySchema(FormatMaxesFn.getSchema());
     // Using DataflowExampleUtils to set up required resources.
-    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options, options.isUnbounded());
+    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
+    dataflowUtils.setup();
 
     Pipeline pipeline = Pipeline.create(options);
     TableReference tableRef = new TableReference();
@@ -362,46 +340,34 @@ public static void main(String[] args) throws IOException {
     tableRef.setDatasetId(options.getBigQueryDataset());
     tableRef.setTableId(options.getBigQueryTable());
 
-    PCollection<String> input;
-    if (options.isUnbounded()) {
-      // Read unbounded PubSubIO.
-      input = pipeline.apply(PubsubIO.Read
-          .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
-          .subscription(options.getPubsubSubscription()));
+    PCollection<KV<String, LaneInfo>> input;
+    if (options.isStreaming()) {
+      input = pipeline
+          .apply(PubsubIO.Read.topic(options.getPubsubTopic()))
+          // row... => <stationId, LaneInfo> ...
+          .apply(ParDo.of(new ExtractFlowInfoFn(false /* outputTimestamp */)));
     } else {
-      // Read bounded PubSubIO.
-      input = pipeline.apply(PubsubIO.Read
-          .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
-          .subscription(options.getPubsubSubscription()).maxNumRecords(VALID_INPUTS));
-
-      // To read bounded TextIO files, use:
-      // input = pipeline.apply(new ReadFileAndExtractTimestamps(options.getInputFile()));
+      input = pipeline
+          .apply(TextIO.Read.from(options.getInputFile()))
+          // row... => <stationId, LaneInfo> ...
+          .apply(ParDo.of(new ExtractFlowInfoFn(true /* outputTimestamp */)));
     }
-    input
-        // row... => <station route, station speed> ...
-        .apply(ParDo.of(new ExtractFlowInfoFn()))
-        // map the incoming data stream into sliding windows. The default window duration values
-        // work well if you're running the accompanying Pub/Sub generator script with the
-        // --replay flag, which simulates pauses in the sensor data publication. You may want to
-        // adjust them otherwise.
-        .apply(Window.<KV<String, LaneInfo>>into(SlidingWindows.of(
+    // map the incoming data stream into sliding windows. The default window duration values
+    // work well if you're running the accompanying Pub/Sub generator script with the
+    // --replay flag, which simulates pauses in the sensor data publication. You may want to
+    // adjust them otherwise.
+    input.apply(Window.<KV<String, LaneInfo>>into(SlidingWindows.of(
             Duration.standardMinutes(options.getWindowDuration())).
             every(Duration.standardMinutes(options.getWindowSlideEvery()))))
         .apply(new MaxLaneFlow())
         .apply(BigQueryIO.Write.to(tableRef)
             .withSchema(FormatMaxesFn.getSchema()));
 
-    // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
-    if (!Strings.isNullOrEmpty(options.getInputFile())
-        && !Strings.isNullOrEmpty(options.getPubsubTopic())) {
-      dataflowUtils.runInjectorPipeline(
-          new ReadFileAndExtractTimestamps(options.getInputFile()),
-          options.getPubsubTopic(),
-          PUBSUB_TIMESTAMP_LABEL_KEY);
-    }
-
-    // Run the pipeline.
     PipelineResult result = pipeline.run();
+    if (options.isStreaming() && !options.getInputFile().isEmpty()) {
+      // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
+      dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
+    }
 
     // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
     dataflowUtils.waitToFinish(result);
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
index e3e88c22da579..cd0ba7602278c 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
@@ -23,7 +23,7 @@
 import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
 import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
 import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
-import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicAndSubscriptionOptions;
+import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
@@ -34,6 +34,7 @@
 import com.google.cloud.dataflow.sdk.options.Default;
 import com.google.cloud.dataflow.sdk.options.Description;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -41,9 +42,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.base.Strings;
 import com.google.common.collect.Lists;
 
 import org.apache.avro.reflect.Nullable;
@@ -95,9 +94,6 @@
 
 public class TrafficRoutes {
 
-  private static final String PUBSUB_TIMESTAMP_LABEL_KEY = "timestamp_ms";
-  private static final Integer VALID_INPUTS = 4999;
-
   // Instantiate some small predefined San Diego routes to analyze
   static Map<String, String> sdStations = buildStationInfo();
   static final int WINDOW_DURATION = 3;  // Default sliding window duration in minutes
@@ -163,31 +159,19 @@ public Boolean getSlowdownEvent() {
   }
 
   /**
-   * Extract the timestamp field from the input string, and use it as the element timestamp.
+   * Filter out readings for the stations along predefined 'routes', and output
+   * (station, speed info) keyed on route.
    */
-  static class ExtractTimestamps extends DoFn<String, String> {
+  static class ExtractStationSpeedFn extends DoFn<String, KV<String, StationSpeed>> {
     private static final DateTimeFormatter dateTimeFormat =
         DateTimeFormat.forPattern("MM/dd/yyyy HH:mm:ss");
 
-    @Override
-    public void processElement(DoFn<String, String>.ProcessContext c) throws Exception {
-      String[] items = c.element().split(",");
-      String timestamp = tryParseTimestamp(items);
-      if (timestamp != null) {
-        try {
-          c.outputWithTimestamp(c.element(), new Instant(dateTimeFormat.parseMillis(timestamp)));
-        } catch (IllegalArgumentException e) {
-          // Skip the invalid input.
-        }
-      }
+    private final boolean outputTimestamp;
+
+    public ExtractStationSpeedFn(boolean outputTimestamp) {
+      this.outputTimestamp = outputTimestamp;
     }
-  }
 
-  /**
-   * Filter out readings for the stations along predefined 'routes', and output
-   * (station, speed info) keyed on route.
-   */
-  static class ExtractStationSpeedFn extends DoFn<String, KV<String, StationSpeed>> {
 
     @Override
     public void processElement(ProcessContext c) {
@@ -199,11 +183,20 @@ public void processElement(ProcessContext c) {
         String stationId = tryParseStationId(items);
         // For this simple example, filter out everything but some hardwired routes.
         if (avgSpeed != null && stationId != null && sdStations.containsKey(stationId)) {
-          StationSpeed stationSpeed =
-              new StationSpeed(stationId, avgSpeed, c.timestamp().getMillis());
+          Instant timestamp;
+          if (outputTimestamp) {
+            timestamp = new Instant(dateTimeFormat.parseMillis(tryParseTimestamp(items)));
+          } else {
+            timestamp = c.timestamp();
+          }
+          StationSpeed stationSpeed = new StationSpeed(stationId, avgSpeed, timestamp.getMillis());
           // The tuple key is the 'route' name stored in the 'sdStations' hash.
           KV<String, StationSpeed> outputValue = KV.of(sdStations.get(stationId), stationSpeed);
-          c.output(outputValue);
+          if (outputTimestamp) {
+            c.outputWithTimestamp(outputValue, timestamp);
+          } else {
+            c.output(outputValue);
+          }
         }
       }
     }
@@ -310,28 +303,14 @@ public PCollection<TableRow> apply(PCollection<KV<String, StationSpeed>> station
     }
   }
 
-  static class ReadFileAndExtractTimestamps extends PTransform<PBegin, PCollection<String>> {
-    private final String inputFile;
-
-    public ReadFileAndExtractTimestamps(String inputFile) {
-      this.inputFile = inputFile;
-    }
-
-    @Override
-    public PCollection<String> apply(PBegin begin) {
-      return begin
-          .apply(TextIO.Read.from(inputFile))
-          .apply(ParDo.of(new ExtractTimestamps()));
-    }
-  }
 
   /**
   * Options supported by {@link TrafficRoutes}.
   *
   * <p>Inherits standard configuration options.
   */
-  private interface TrafficRoutesOptions extends DataflowExampleOptions,
-      ExamplePubsubTopicAndSubscriptionOptions, ExampleBigQueryTableOptions {
+  private interface TrafficRoutesOptions
+      extends DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
     @Description("Input file to inject to Pub/Sub topic")
     @Default.String("gs://dataflow-samples/traffic_sensor/"
         + "Freeways-5Minaa2010-01-01_to_2010-02-15_test2.csv")
@@ -347,11 +326,6 @@ private interface TrafficRoutesOptions extends DataflowExampleOptions,
     @Default.Integer(WINDOW_SLIDE_EVERY)
     Integer getWindowSlideEvery();
     void setWindowSlideEvery(Integer value);
-
-    @Description("Whether to run the pipeline with unbounded input")
-    @Default.Boolean(false)
-    boolean isUnbounded();
-    void setUnbounded(boolean value);
   }
 
   /**
@@ -364,9 +338,15 @@ public static void main(String[] args) throws IOException {
         .withValidation()
         .as(TrafficRoutesOptions.class);
 
+    if (options.isStreaming()) {
+      // In order to cancel the pipelines automatically,
+      // {@literal DataflowPipelineRunner} is forced to be used.
+      options.setRunner(DataflowPipelineRunner.class);
+    }
     options.setBigQuerySchema(FormatStatsFn.getSchema());
     // Using DataflowExampleUtils to set up required resources.
-    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options, options.isUnbounded());
+    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
+    dataflowUtils.setup();
 
     Pipeline pipeline = Pipeline.create(options);
     TableReference tableRef = new TableReference();
@@ -374,47 +354,34 @@ public static void main(String[] args) throws IOException {
     tableRef.setDatasetId(options.getBigQueryDataset());
     tableRef.setTableId(options.getBigQueryTable());
 
-    PCollection<String> input;
-    if (options.isUnbounded()) {
-      // Read unbounded PubSubIO.
-      input = pipeline.apply(PubsubIO.Read
-          .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
-          .subscription(options.getPubsubSubscription()));
+    PCollection<KV<String, StationSpeed>> input;
+    if (options.isStreaming()) {
+      input = pipeline
+          .apply(PubsubIO.Read.topic(options.getPubsubTopic()))
+          // row... => <station route, station speed> ...
+          .apply(ParDo.of(new ExtractStationSpeedFn(false /* outputTimestamp */)));
     } else {
-      // Read bounded PubSubIO.
-      input = pipeline.apply(PubsubIO.Read
-          .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
-          .subscription(options.getPubsubSubscription()).maxNumRecords(VALID_INPUTS));
-
-      // To read bounded TextIO files, use:
-      // input = pipeline.apply(TextIO.Read.from(options.getInputFile()))
-      //    .apply(ParDo.of(new ExtractTimestamps()));
+      input = pipeline
+          .apply(TextIO.Read.from(options.getInputFile()))
+          .apply(ParDo.of(new ExtractStationSpeedFn(true /* outputTimestamp */)));
     }
-    input
-        // row... => <station route, station speed> ...
-        .apply(ParDo.of(new ExtractStationSpeedFn()))
-        // map the incoming data stream into sliding windows.
-        // The default window duration values work well if you're running the accompanying Pub/Sub
-        // generator script without the --replay flag, so that there are no simulated pauses in
-        // the sensor data publication. You may want to adjust the values otherwise.
-        .apply(Window.<KV<String, StationSpeed>>into(SlidingWindows.of(
+
+    // map the incoming data stream into sliding windows.
+    // The default window duration values work well if you're running the accompanying Pub/Sub
+    // generator script without the --replay flag, so that there are no simulated pauses in
+    // the sensor data publication. You may want to adjust the values otherwise.
+    input.apply(Window.<KV<String, StationSpeed>>into(SlidingWindows.of(
             Duration.standardMinutes(options.getWindowDuration())).
             every(Duration.standardMinutes(options.getWindowSlideEvery()))))
         .apply(new TrackSpeed())
         .apply(BigQueryIO.Write.to(tableRef)
             .withSchema(FormatStatsFn.getSchema()));
 
-    // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
-    if (!Strings.isNullOrEmpty(options.getInputFile())
-        && !Strings.isNullOrEmpty(options.getPubsubTopic())) {
-      dataflowUtils.runInjectorPipeline(
-          new ReadFileAndExtractTimestamps(options.getInputFile()),
-          options.getPubsubTopic(),
-          PUBSUB_TIMESTAMP_LABEL_KEY);
-    }
-
-    // Run the pipeline.
     PipelineResult result = pipeline.run();
+    if (options.isStreaming() && !options.getInputFile().isEmpty()) {
+      // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
+      dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
+    }
 
     // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
     dataflowUtils.waitToFinish(result);
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
index ce5e08e7d2fa8..b68f048ca84fc 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
@@ -20,7 +20,6 @@
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
-import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
 import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
 import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
 import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
@@ -432,8 +431,8 @@ public void processElement(ProcessContext c) throws Exception {
   /**
    * Inherits standard configuration options.
    */
-  public interface TrafficFlowOptions
-      extends ExamplePubsubTopicOptions, ExampleBigQueryTableOptions, DataflowExampleOptions {
+  public interface TrafficFlowOptions extends
+  ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
 
     @Description("Input file to inject to Pub/Sub topic")
     @Default.String("gs://dataflow-samples/traffic_sensor/"
@@ -490,7 +489,8 @@ public static void main(String[] args) throws Exception {
   private static Pipeline runInjector(TrafficFlowOptions options){
     DataflowPipelineOptions copiedOptions = options.cloneAs(DataflowPipelineOptions.class);
     copiedOptions.setStreaming(false);
-    copiedOptions.setNumWorkers(options.as(DataflowExampleOptions.class).getInjectorNumWorkers());
+    copiedOptions.setNumWorkers(
+        options.as(ExamplePubsubTopicOptions.class).getInjectorNumWorkers());
     copiedOptions.setJobName(options.getJobName() + "-injector");
     Pipeline injectorPipeline = Pipeline.create(copiedOptions);
     injectorPipeline

From 2bc1033fa125a4fe16803055a7cd0d98b3503531 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 7 Jan 2016 16:18:50 -0800
Subject: [PATCH 1276/1541] Fix AfterAll merge logic

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111649265
---
 .../cloud/dataflow/sdk/transforms/windowing/AfterAll.java     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
index e76c14b77a86b..a40454631a148 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
@@ -76,11 +76,11 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
     // trigger itself was already finished in some window.
     // FIRE_AND_FINISH otherwise: It means this trigger is ready to fire (because all subtriggers
     // are satisfied) but has never fired as a whole.
-    boolean anyContinue = true;
+    boolean anyContinue = false;
     boolean alreadyFinished = true;
     for (ExecutableTrigger<W> subTrigger : c.trigger().subTriggers()) {
       MergeResult result = subTrigger.invokeMerge(c);
-      anyContinue |= !(result.isFire() && result.isFinish());
+      anyContinue |= !result.isFire() && !result.isFinish();
       alreadyFinished &= !result.isFire() && result.isFinish();
     }
 

From 09577015594166dba85c84482c05cd3b7a3a7654 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 7 Jan 2016 17:30:26 -0800
Subject: [PATCH 1277/1541] Run OutputTimeFn-related tests on the service and
 local runner

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111654593
---
 .../google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java  | 2 ++
 .../cloud/dataflow/sdk/transforms/windowing/WindowTest.java   | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
index 90aefa01de40a..455541ae7bd20 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
@@ -371,6 +371,7 @@ public PCollection<KV<String, Integer>> apply(PBegin input) {
    * the two values.
    */
   @Test
+  @Category(RunnableOnService.class)
   public void testOutputTimeFnEarliest() {
     Pipeline pipeline = TestPipeline.create();
 
@@ -392,6 +393,7 @@ public void testOutputTimeFnEarliest() {
    * with the windowing function customized to use the latest value.
    */
   @Test
+  @Category(RunnableOnService.class)
   public void testOutputTimeFnLatest() {
     Pipeline pipeline = TestPipeline.create();
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java
index ac0e15886374f..72f2b4c12d1c8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowTest.java
@@ -26,6 +26,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
@@ -41,6 +42,7 @@
 import org.joda.time.Instant;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.experimental.categories.Category;
 import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -176,6 +178,7 @@ public void testMissingLateness() {
    * with the windowing function default, the earlier of the two values.
    */
   @Test
+  @Category(RunnableOnService.class)
   public void testOutputTimeFnDefault() {
     Pipeline pipeline = TestPipeline.create();
 
@@ -200,6 +203,7 @@ public void processElement(ProcessContext c) throws Exception {
    * with the windowing function customized to use the end of the window.
    */
   @Test
+  @Category(RunnableOnService.class)
   public void testOutputTimeFnEndOfWindow() {
     Pipeline pipeline = TestPipeline.create();
 

From c2e1d908e1123b297919868d510e5b660d7e9aa3 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Fri, 8 Jan 2016 09:20:49 -0800
Subject: [PATCH 1278/1541] Decreases size of input for AvroSourceTest

This makes the test run faster.
----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111700823
---
 .../cloud/dataflow/sdk/io/AvroSourceTest.java | 25 +++++++++++++------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
index 8e6b2f7f48213..0990294d209f9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroSourceTest.java
@@ -74,7 +74,7 @@ private enum SyncBehavior {
     SYNC_DEFAULT; // Sync at default intervals (i.e., no manual syncing).
   }
 
-  private static final int DEFAULT_RECORD_COUNT = 10000;
+  private static final int DEFAULT_RECORD_COUNT = 1000;
 
   /**
    * Generates an input Avro file containing the given records in the temporary directory and
@@ -140,9 +140,12 @@ public void testReadWithDifferentCodecs() throws Exception {
 
   @Test
   public void testSplitAtFraction() throws Exception {
+    // A reduced dataset is enough here.
     List<FixedRecord> expected = createFixedRecords(DEFAULT_RECORD_COUNT);
-    // Create an AvroSource where each block is 16k
-    String filename = generateTestFile("tmp.avro", expected, SyncBehavior.SYNC_REGULAR, 1000,
+    // Create an AvroSource where each block is 1/10th of the total set of records.
+    String filename = generateTestFile(
+        "tmp.avro", expected, SyncBehavior.SYNC_REGULAR,
+        DEFAULT_RECORD_COUNT / 10 /* max records per block */,
         AvroCoder.of(FixedRecord.class), DataFileConstants.NULL_CODEC);
     File file = new File(filename);
 
@@ -155,9 +158,12 @@ public void testSplitAtFraction() throws Exception {
       SourceTestUtils.assertSplitAtFractionFails(subSource, 0, 0.0, null);
       SourceTestUtils.assertSplitAtFractionFails(subSource, 0, 0.7, null);
       SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(subSource, 1, 0.7, null);
-      SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(subSource, 100, 0.7, null);
-      SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(subSource, 1000, 0.1, null);
-      SourceTestUtils.assertSplitAtFractionFails(subSource, 1001, 0.1, null);
+      SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(
+          subSource, DEFAULT_RECORD_COUNT / 100, 0.7, null);
+      SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(
+          subSource, DEFAULT_RECORD_COUNT / 10, 0.1, null);
+      SourceTestUtils.assertSplitAtFractionFails(
+          subSource, DEFAULT_RECORD_COUNT / 10 + 1, 0.1, null);
       SourceTestUtils.assertSplitAtFractionFails(subSource, DEFAULT_RECORD_COUNT / 3, 0.3, null);
       SourceTestUtils.assertSplitAtFractionFails(subSource, items, 0.9, null);
       SourceTestUtils.assertSplitAtFractionFails(subSource, items, 1.0, null);
@@ -205,7 +211,8 @@ public void testGetCurrentFromUnstartedReader() throws Exception {
 
   @Test
   public void testSplitAtFractionExhaustive() throws Exception {
-    List<FixedRecord> expected = createFixedRecords(50);
+    // A small-sized input is sufficient, because the test verifies that splitting is non-vacuous.
+    List<FixedRecord> expected = createFixedRecords(20);
     String filename = generateTestFile("tmp.avro", expected, SyncBehavior.SYNC_REGULAR, 5,
         AvroCoder.of(FixedRecord.class), DataFileConstants.NULL_CODEC);
 
@@ -217,9 +224,11 @@ public void testSplitAtFractionExhaustive() throws Exception {
   public void testSplitsWithSmallBlocks() throws Exception {
     PipelineOptions options = PipelineOptionsFactory.create();
     // Test reading from an object file with many small random-sized blocks.
+    // The file itself doesn't have to be big; we can use a decreased record count.
     List<Bird> expected = createRandomRecords(DEFAULT_RECORD_COUNT);
     String filename = generateTestFile("tmp.avro", expected, SyncBehavior.SYNC_RANDOM,
-        100/* max records/block */, AvroCoder.of(Bird.class), DataFileConstants.NULL_CODEC);
+        DEFAULT_RECORD_COUNT / 20 /* max records/block */,
+        AvroCoder.of(Bird.class), DataFileConstants.NULL_CODEC);
     File file = new File(filename);
 
     // Small minimum bundle size

From bf9e7cb1b3560d00e5884ecbcab957301b25e2fa Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 8 Jan 2016 12:12:57 -0800
Subject: [PATCH 1279/1541] Decouple TriggerContext and friends from
 ReduceFn.Timers

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111716823
---
 .../windowing/AfterProcessingTime.java        |  8 +--
 .../AfterSynchronizedProcessingTime.java      |  8 +--
 .../transforms/windowing/AfterWatermark.java  | 12 ++---
 .../transforms/windowing/DefaultTrigger.java  |  6 +--
 .../sdk/transforms/windowing/Trigger.java     | 40 ++++++++++++--
 .../sdk/util/TriggerContextFactory.java       | 54 +++++++++++++++----
 6 files changed, 97 insertions(+), 31 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index a64bb3437a35d..652c3e8bf2213 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -70,8 +70,8 @@ public TriggerResult onElement(OnElementContext c)
     CombiningValueState<Instant, Instant> delayUntilState = c.state().access(DELAYED_UNTIL_TAG);
     Instant delayUntil = delayUntilState.get().read();
     if (delayUntil == null) {
-      delayUntil = computeTargetTimestamp(c.timers().currentProcessingTime());
-      c.timers().setTimer(delayUntil, TimeDomain.PROCESSING_TIME);
+      delayUntil = computeTargetTimestamp(c.currentProcessingTime());
+      c.setTimer(delayUntil, TimeDomain.PROCESSING_TIME);
       delayUntilState.add(delayUntil);
     }
 
@@ -99,7 +99,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
     if (earliestTimer != null) {
       mergingDelays.clear();
       mergingDelays.add(earliestTimer);
-      c.timers().setTimer(earliestTimer, TimeDomain.PROCESSING_TIME);
+      c.setTimer(earliestTimer, TimeDomain.PROCESSING_TIME);
     }
 
     return MergeResult.CONTINUE;
@@ -130,7 +130,7 @@ public void clear(TriggerContext c) throws Exception {
     Instant timestamp = delayed.get().read();
     delayed.clear();
     if (timestamp != null) {
-      c.timers().deleteTimer(timestamp, TimeDomain.PROCESSING_TIME);
+      c.deleteTimer(timestamp, TimeDomain.PROCESSING_TIME);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
index d38b6f8ad49bf..e15f4a96c3d86 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
@@ -45,8 +45,8 @@ public TriggerResult onElement(OnElementContext c)
     CombiningValueState<Instant, Instant> delayUntilState = c.state().access(DELAYED_UNTIL_TAG);
     Instant delayUntil = delayUntilState.get().read();
     if (delayUntil == null) {
-      delayUntil = c.timers().currentProcessingTime();
-      c.timers().setTimer(delayUntil, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+      delayUntil = c.currentProcessingTime();
+      c.setTimer(delayUntil, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
       delayUntilState.add(delayUntil);
     }
 
@@ -74,7 +74,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
     if (earliestTimer != null) {
       mergingDelays.clear();
       mergingDelays.add(earliestTimer);
-      c.timers().setTimer(earliestTimer, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+      c.setTimer(earliestTimer, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
     }
 
     return MergeResult.CONTINUE;
@@ -105,7 +105,7 @@ public void clear(TriggerContext c) throws Exception {
     Instant timestamp = delayed.get().read();
     delayed.clear();
     if (timestamp != null) {
-      c.timers().deleteTimer(timestamp, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+      c.deleteTimer(timestamp, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 5a7e9e782165a..3b8302f9cc7e1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -105,7 +105,7 @@ public TriggerResult onElement(OnElementContext c) throws Exception {
       Instant delayUntil = delayUntilState.get().read();
       if (delayUntil == null) {
         delayUntil = computeTargetTimestamp(c.eventTimestamp());
-        c.timers().setTimer(delayUntil, TimeDomain.EVENT_TIME);
+        c.setTimer(delayUntil, TimeDomain.EVENT_TIME);
         delayUntilState.add(delayUntil);
       }
 
@@ -136,7 +136,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
       if (earliestTimer != null) {
         mergingDelays.clear();
         mergingDelays.add(earliestTimer);
-        c.timers().setTimer(earliestTimer, TimeDomain.EVENT_TIME);
+        c.setTimer(earliestTimer, TimeDomain.EVENT_TIME);
       }
 
       return MergeResult.CONTINUE;
@@ -167,7 +167,7 @@ public void clear(TriggerContext c) throws Exception {
       Instant timestamp = delayed.get().read();
       delayed.clear();
       if (timestamp != null) {
-        c.timers().deleteTimer(timestamp, TimeDomain.EVENT_TIME);
+        c.deleteTimer(timestamp, TimeDomain.EVENT_TIME);
       }
     }
 
@@ -497,7 +497,7 @@ public AfterWatermarkLate<W> withLateFirings(OnceTrigger<W> lateFirings) {
 
     @Override
     public TriggerResult onElement(OnElementContext c) throws Exception {
-      c.timers().setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
+      c.setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
       return TriggerResult.CONTINUE;
     }
 
@@ -513,7 +513,7 @@ public MergeResult onMerge(OnMergeContext c) throws Exception {
       }
 
       // Otherwise, set a timer for this window, and return.
-      c.timers().setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
+      c.setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
       return MergeResult.CONTINUE;
     }
 
@@ -529,7 +529,7 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
 
     @Override
     public void clear(TriggerContext c) throws Exception {
-      c.timers().deleteTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
+      c.deleteTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
index 5ac0becd1099a..ddcbe1ce7120d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
@@ -45,13 +45,13 @@ public static <W extends BoundedWindow> DefaultTrigger<W> of() {
 
   @Override
   public TriggerResult onElement(OnElementContext c) throws Exception {
-    c.timers().setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
+    c.setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
     return TriggerResult.CONTINUE;
   }
 
   @Override
   public MergeResult onMerge(OnMergeContext c) throws Exception {
-    c.timers().setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
+    c.setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
     return MergeResult.CONTINUE;
   }
 
@@ -62,7 +62,7 @@ public TriggerResult onTimer(OnTimerContext c) throws Exception {
 
   @Override
   public void clear(TriggerContext c) throws Exception {
-    c.timers().deleteTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
+    c.deleteTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 20d89c024dedb..880f9e05fb3eb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -253,11 +253,17 @@ public abstract class TriggerContext {
     /** The window that the current context is executing in. */
     public abstract W window();
 
-    /** Returns the interface for accessing timers. */
-    public abstract ReduceFn.Timers timers();
-
     /** Create a sub-context for the given sub-trigger. */
     public abstract TriggerContext forTrigger(ExecutableTrigger<W> trigger);
+
+    /**
+     * Removes the timer set in this trigger context for the given {@link Instant}
+     * and {@link TimeDomain}.
+     */
+    public abstract void deleteTimer(Instant timestamp, TimeDomain domain);
+
+    /** The current processing time. */
+    public abstract Instant currentProcessingTime();
   }
 
   /**
@@ -267,6 +273,20 @@ public abstract class OnElementContext extends TriggerContext {
     /** The event timestamp of the element currently being processed. */
     public abstract Instant eventTimestamp();
 
+    /**
+     * Sets a timer to fire when the watermark or processing time is beyond the given timestamp.
+     * Timers are not guaranteed to fire immediately, but will be delivered at some time afterwards.
+     *
+     * <p>As with {@link #state}, timers are implicitly scoped to the current window. All
+     * timer firings for a window will be received, but the implementation should choose to ignore
+     * those that are not applicable.
+     *
+     * @param timestamp the time at which the trigger’s {@link Trigger#onTimer} callback should
+     *        execute
+     * @param domain the domain that the {@code timestamp} applies to
+     */
+    public abstract void setTimer(Instant timestamp, TimeDomain domain);
+
     /** Create an {@code OnElementContext} for executing the given trigger. */
     @Override
     public abstract OnElementContext forTrigger(ExecutableTrigger<W> trigger);
@@ -279,6 +299,20 @@ public abstract class OnMergeContext extends TriggerContext {
     /** The old windows that were merged. */
     public abstract Iterable<W> oldWindows();
 
+    /**
+     * Sets a timer to fire when the watermark or processing time is beyond the given timestamp.
+     * Timers are not guaranteed to fire immediately, but will be delivered at some time afterwards.
+     *
+     * <p>As with {@link #state}, timers are implicitly scoped to the current window. All
+     * timer firings for a window will be received, but the implementation should choose to ignore
+     * those that are not applicable.
+     *
+     * @param timestamp the time at which the trigger’s {@link Trigger#onTimer} callback should
+     *        execute
+     * @param domain the domain that the {@code timestamp} applies to
+     */
+    public abstract void setTimer(Instant timestamp, TimeDomain domain);
+
     /** Create an {@code OnMergeContext} for executing the given trigger. */
     @Override
     public abstract OnMergeContext forTrigger(ExecutableTrigger<W> trigger);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
index 916b172fb3029..e7193b3dcec35 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
@@ -298,8 +298,13 @@ public W window() {
     }
 
     @Override
-    public ReduceFn.Timers timers() {
-      return timers;
+    public void deleteTimer(Instant timestamp, TimeDomain domain) {
+      timers.deleteTimer(timestamp, domain);
+    }
+
+    @Override
+    public Instant currentProcessingTime() {
+      return timers.currentProcessingTime();
     }
   }
 
@@ -352,8 +357,19 @@ public W window() {
     }
 
     @Override
-    public ReduceFn.Timers timers() {
-      return timers;
+    public void setTimer(Instant timestamp, TimeDomain domain) {
+      timers.setTimer(timestamp, domain);
+    }
+
+
+    @Override
+    public void deleteTimer(Instant timestamp, TimeDomain domain) {
+      timers.deleteTimer(timestamp, domain);
+    }
+
+    @Override
+    public Instant currentProcessingTime() {
+      return timers.currentProcessingTime();
     }
   }
 
@@ -401,11 +417,6 @@ public W window() {
       return state.window();
     }
 
-    @Override
-    public ReduceFn.Timers timers() {
-      return timers;
-    }
-
     @Override
     public Instant timestamp() {
       return timestamp;
@@ -415,6 +426,16 @@ public Instant timestamp() {
     public TimeDomain timeDomain() {
       return domain;
     }
+
+    @Override
+    public void deleteTimer(Instant timestamp, TimeDomain domain) {
+      timers.deleteTimer(timestamp, domain);
+    }
+
+    @Override
+    public Instant currentProcessingTime() {
+      return timers.currentProcessingTime();
+    }
   }
 
   private class OnMergeContextImpl extends Trigger<W>.OnMergeContext {
@@ -465,8 +486,19 @@ public W window() {
     }
 
     @Override
-    public ReduceFn.Timers timers() {
-      return timers;
+    public void setTimer(Instant timestamp, TimeDomain domain) {
+      timers.setTimer(timestamp, domain);
+    }
+
+    @Override
+    public void deleteTimer(Instant timestamp, TimeDomain domain) {
+      timers.setTimer(timestamp, domain);
+
+    }
+
+    @Override
+    public Instant currentProcessingTime() {
+      return timers.currentProcessingTime();
     }
   }
 }

From 3bd9a43bead068d989b6523860a1e39f5de90c7e Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Fri, 8 Jan 2016 17:21:43 -0800
Subject: [PATCH 1280/1541] XmlSource: clarify requirement for unique keys in
 Javadoc

And minor cleanup

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111743442
---
 .../cloud/dataflow/sdk/io/XmlSource.java      | 42 ++++++++++---------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
index adec1f1308d21..d664732330f04 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
@@ -53,8 +53,8 @@
  * created by passing an {@code XmlSource} object to {@code Read.from()}. Please note the
  * example given below.
  *
- * <p>The XML file must be of the following form where root and record are XML element names that
- * are defined by the user. Root is the name of the root element of the XML document.
+ * <p>The XML file must be of the following form, where {@code root} and {@code record} are XML
+ * element names that are defined by the user:
  *
  * <pre>
  * {@code
@@ -68,26 +68,30 @@
  * }
  * </pre>
  *
- * <p>Basically the XML document should contain a set of record elements where a record may contain
- * arbitrary XML content. Root and/or record elements may additionally contain an arbitrary number
- * of XML attributes. Users must provide the name of the root element and record
- * element when creating the source. Additionally users must provide a class of a JAXB annotated
- * Java type that can be used convert records into Java objects and vice versa using JAXB
- * marshalling/unmarshalling mechanisms. Reading the source will generate a {@code PCollection} of
- * the given JAXB annotated Java type. Optionally users may provide a minimum size of a bundle that
- * should be created for the source. An example Dataflow read transformation that uses XmlSource is
- * given below.
+ * <p>Basically, the XML document should contain a single root element with an inner list consisting
+ * entirely of record elements. The records may contain arbitrary XML content; however, that content
+ * <b>must not</b> contain the start {@code <record>} or end {@code </record>} tags. This
+ * restriction enables reading from large XML files in parallel from different offsets in the file.
+ *
+ * <p>Root and/or record elements may additionally contain an arbitrary number of XML attributes.
+ * Additionally users must provide a class of a JAXB annotated Java type that can be used convert
+ * records into Java objects and vice versa using JAXB marshalling/unmarshalling mechanisms. Reading
+ * the source will generate a {@code PCollection} of the given JAXB annotated Java type.
+ * Optionally users may provide a minimum size of a bundle that should be created for the source.
+ *
+ * <p>The following example shows how to read from {@link XmlSource} in a Dataflow pipeline:
  *
  * <pre>
  * {@code
  * XmlSource<String> source = XmlSource.<String>from(file.toPath().toString())
- * .withRootElement("root").withRecordElement("record")
- * .withRecordClass(Record.class).withMinBundleSize(128);
- * PCollection<String> output = p.apply(Read.from(source);
+ *     .withRootElement("root")
+ *     .withRecordElement("record")
+ *     .withRecordClass(Record.class);
+ * PCollection<String> output = p.apply(Read.from(source));
  * }
  * </pre>
  *
- * <p> Currently only XML files that use single-byte characters are supported. Using a file that
+ * <p>Currently, only XML files that use single-byte characters are supported. Using a file that
  * contains multi-byte characters may result in data loss or duplication.
  *
  * <p>To use {@code XmlSource}, explicitly declare dependencies on following two jars from Woodstox
@@ -97,14 +101,14 @@
  * These dependencies have been declared as optional in Maven sdk/pom.xml file of Google Cloud
  * Dataflow.
  *
- * @param <T> Type of the objects that represent the records of the XML file. The
- *        {@code PCollection} generated by this source will be of this type.
- *
  * <p><h3>Permissions</h3>
  * Permission requirements depend on the
  * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner PipelineRunner} that is
  * used to execute the Dataflow job. Please refer to the documentation of corresponding
  * {@code PipelineRunner}s for more details.
+ *
+ * @param <T> Type of the objects that represent the records of the XML file. The
+ *        {@code PCollection} generated by this source will be of this type.
  */
 // JAVADOCSTYLE ON
 public class XmlSource<T> extends FileBasedSource<T> {
@@ -287,7 +291,7 @@ public boolean handleEvent(ValidationEvent event) {
     }
 
     @Override
-    public XmlSource<T> getCurrentSource() {
+    public synchronized XmlSource<T> getCurrentSource() {
       return (XmlSource<T>) super.getCurrentSource();
     }
 

From a918a31a52544faf0fd6027a5d0428c481a0bd69 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Fri, 8 Jan 2016 17:49:56 -0800
Subject: [PATCH 1281/1541] Resubmit: Read bounded PubSubIO in examples
 pipelines

Fix the test failure by manually copying serviceAccountName and serviceAccountKeyfile.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111744879
---
 .../dataflow/examples/WindowedWordCount.java  |   7 +-
 .../common/DataflowExampleOptions.java        |   5 +
 .../examples/common/DataflowExampleUtils.java | 141 +++++++++++++-----
 ...mplePubsubTopicAndSubscriptionOptions.java |  44 ++++++
 .../common/ExamplePubsubTopicOptions.java     |   5 -
 .../examples/complete/AutoComplete.java       |   2 +-
 .../examples/complete/TrafficMaxLaneFlow.java | 130 ++++++++++------
 .../examples/complete/TrafficRoutes.java      | 131 ++++++++++------
 .../examples/cookbook/TriggerExample.java     |   8 +-
 9 files changed, 327 insertions(+), 146 deletions(-)
 create mode 100644 examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicAndSubscriptionOptions.java

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
index fcda15eeca8f7..dddab3a6927f2 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
@@ -20,7 +20,10 @@
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
 import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
+import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
@@ -178,8 +181,8 @@ private static TableReference getTableReference(Options options) {
    * table and the PubSub topic, as well as the {@link WordCount.WordCountOptions} support for
    * specification of the input file.
    */
-  public static interface Options
-        extends WordCount.WordCountOptions, DataflowExampleUtils.DataflowExampleUtilsOptions {
+  public static interface Options extends WordCount.WordCountOptions,
+      DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
     @Description("Fixed window duration, in minutes")
     @Default.Integer(WINDOW_SIZE)
     Integer getWindowSize();
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java
index 28b0818f64a70..606bfb4c03e9d 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java
@@ -26,4 +26,9 @@ public interface DataflowExampleOptions extends DataflowPipelineOptions {
   @Default.Boolean(false)
   boolean getKeepJobsRunning();
   void setKeepJobsRunning(boolean keepJobsRunning);
+
+  @Description("Number of workers to use when executing the injector pipeline")
+  @Default.Integer(1)
+  int getInjectorNumWorkers();
+  void setInjectorNumWorkers(int numWorkers);
 }
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
index 0183d8eccbbd9..4dfdd85b803a7 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
@@ -29,6 +29,7 @@
 import com.google.api.services.bigquery.model.TableSchema;
 import com.google.api.services.dataflow.Dataflow;
 import com.google.api.services.pubsub.Pubsub;
+import com.google.api.services.pubsub.model.Subscription;
 import com.google.api.services.pubsub.model.Topic;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult;
@@ -39,9 +40,13 @@
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
 import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
 import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.cloud.dataflow.sdk.values.PBegin;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.Strings;
 import com.google.common.base.Throwables;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
@@ -69,13 +74,6 @@ public class DataflowExampleUtils {
   private Set<DataflowPipelineJob> jobsToCancel = Sets.newHashSet();
   private List<String> pendingMessages = Lists.newArrayList();
 
-  /**
-   * Define an interface that supports the PubSub and BigQuery example options.
-   */
-  public static interface DataflowExampleUtilsOptions
-        extends DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
-  }
-
   public DataflowExampleUtils(DataflowPipelineOptions options) {
     this.options = options;
   }
@@ -102,7 +100,7 @@ public void setup() throws IOException {
     try {
       do {
         try {
-          setupPubsubTopic();
+          setupPubsub();
           setupBigQueryTable();
           return;
         } catch (GoogleJsonResponseException e) {
@@ -133,13 +131,21 @@ public void setupResourcesAndRunner(boolean isUnbounded) throws IOException {
    *
    * @throws IOException if there is a problem setting up the Pub/Sub topic
    */
-  public void setupPubsubTopic() throws IOException {
-    ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
-    if (!pubsubTopicOptions.getPubsubTopic().isEmpty()) {
-      pendingMessages.add("*******************Set Up Pubsub Topic*********************");
-      setupPubsubTopic(pubsubTopicOptions.getPubsubTopic());
+  public void setupPubsub() throws IOException {
+    ExamplePubsubTopicAndSubscriptionOptions pubsubOptions =
+        options.as(ExamplePubsubTopicAndSubscriptionOptions.class);
+    if (!pubsubOptions.getPubsubTopic().isEmpty()) {
+      pendingMessages.add("**********************Set Up Pubsub************************");
+      setupPubsubTopic(pubsubOptions.getPubsubTopic());
       pendingMessages.add("The Pub/Sub topic has been set up for this example: "
-          + pubsubTopicOptions.getPubsubTopic());
+          + pubsubOptions.getPubsubTopic());
+
+      if (!pubsubOptions.getPubsubSubscription().isEmpty()) {
+        setupPubsubSubscription(
+            pubsubOptions.getPubsubTopic(), pubsubOptions.getPubsubSubscription());
+        pendingMessages.add("The Pub/Sub subscription has been set up for this example: "
+            + pubsubOptions.getPubsubSubscription());
+      }
     }
   }
 
@@ -175,15 +181,26 @@ public void setupBigQueryTable() throws IOException {
    */
   private void tearDown() {
     pendingMessages.add("*************************Tear Down*************************");
-    ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
-    if (!pubsubTopicOptions.getPubsubTopic().isEmpty()) {
+    ExamplePubsubTopicAndSubscriptionOptions pubsubOptions =
+        options.as(ExamplePubsubTopicAndSubscriptionOptions.class);
+    if (!pubsubOptions.getPubsubTopic().isEmpty()) {
       try {
-        deletePubsubTopic(pubsubTopicOptions.getPubsubTopic());
+        deletePubsubTopic(pubsubOptions.getPubsubTopic());
         pendingMessages.add("The Pub/Sub topic has been deleted: "
-            + pubsubTopicOptions.getPubsubTopic());
+            + pubsubOptions.getPubsubTopic());
       } catch (IOException e) {
         pendingMessages.add("Failed to delete the Pub/Sub topic : "
-            + pubsubTopicOptions.getPubsubTopic());
+            + pubsubOptions.getPubsubTopic());
+      }
+      if (!pubsubOptions.getPubsubSubscription().isEmpty()) {
+        try {
+          deletePubsubSubscription(pubsubOptions.getPubsubSubscription());
+          pendingMessages.add("The Pub/Sub subscription has been deleted: "
+              + pubsubOptions.getPubsubSubscription());
+        } catch (IOException e) {
+          pendingMessages.add("Failed to delete the Pub/Sub subscription : "
+              + pubsubOptions.getPubsubSubscription());
+        }
       }
     }
 
@@ -237,6 +254,18 @@ private void setupPubsubTopic(String topic) throws IOException {
     }
   }
 
+  private void setupPubsubSubscription(String topic, String subscription) throws IOException {
+    if (pubsubClient == null) {
+      pubsubClient = Transport.newPubsubClient(options).build();
+    }
+    if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) == null) {
+      Subscription subInfo = new Subscription()
+        .setAckDeadlineSeconds(60)
+        .setTopic(topic);
+      pubsubClient.projects().subscriptions().create(subscription, subInfo).execute();
+    }
+  }
+
   /**
    * Deletes the Google Cloud Pub/Sub topic.
    *
@@ -251,6 +280,20 @@ private void deletePubsubTopic(String topic) throws IOException {
     }
   }
 
+  /**
+   * Deletes the Google Cloud Pub/Sub subscription.
+   *
+   * @throws IOException if there is a problem deleting the Pub/Sub subscription
+   */
+  private void deletePubsubSubscription(String subscription) throws IOException {
+    if (pubsubClient == null) {
+      pubsubClient = Transport.newPubsubClient(options).build();
+    }
+    if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) != null) {
+      pubsubClient.projects().subscriptions().delete(subscription).execute();
+    }
+  }
+
   /**
    * If this is an unbounded (streaming) pipeline, and both inputFile and pubsub topic are defined,
    * start an 'injector' pipeline that publishes the contents of the file to the given topic, first
@@ -259,9 +302,8 @@ private void deletePubsubTopic(String topic) throws IOException {
   public void startInjectorIfNeeded(String inputFile) {
     ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
     if (pubsubTopicOptions.isStreaming()
-        && inputFile != null && !inputFile.isEmpty()
-        && pubsubTopicOptions.getPubsubTopic() != null
-        && !pubsubTopicOptions.getPubsubTopic().isEmpty()) {
+        && !Strings.isNullOrEmpty(inputFile)
+        && !Strings.isNullOrEmpty(pubsubTopicOptions.getPubsubTopic())) {
       runInjectorPipeline(inputFile, pubsubTopicOptions.getPubsubTopic());
     }
   }
@@ -272,11 +314,7 @@ public void startInjectorIfNeeded(String inputFile) {
    * flag value.
    */
   public void setupRunner() {
-    if (options.isStreaming()) {
-      if (options.getRunner() == DirectPipelineRunner.class) {
-        throw new IllegalArgumentException(
-          "Processing of unbounded input sources is not supported with the DirectPipelineRunner.");
-      }
+    if (options.isStreaming() && options.getRunner() != DirectPipelineRunner.class) {
       // In order to cancel the pipelines automatically,
       // {@literal DataflowPipelineRunner} is forced to be used.
       options.setRunner(DataflowPipelineRunner.class);
@@ -284,32 +322,59 @@ public void setupRunner() {
   }
 
   /**
-   * Runs the batch injector for the streaming pipeline.
+   * Runs a batch pipeline to inject data into the PubSubIO input topic.
    *
    * <p>The injector pipeline will read from the given text file, and inject data
    * into the Google Cloud Pub/Sub topic.
    */
   public void runInjectorPipeline(String inputFile, String topic) {
+    runInjectorPipeline(TextIO.Read.from(inputFile), topic, null);
+  }
+
+  /**
+   * Runs a batch pipeline to inject data into the PubSubIO input topic.
+   *
+   * <p>The injector pipeline will read from the given source, and inject data
+   * into the Google Cloud Pub/Sub topic.
+   */
+  public void runInjectorPipeline(PTransform<? super PBegin, PCollection<String>> readSource,
+                                  String topic,
+                                  String pubsubTimestampTabelKey) {
+    PubsubFileInjector.Bound injector;
+    if (Strings.isNullOrEmpty(pubsubTimestampTabelKey)) {
+      injector = PubsubFileInjector.publish(topic);
+    } else {
+      injector = PubsubFileInjector.withTimestampLabelKey(pubsubTimestampTabelKey).publish(topic);
+    }
     DataflowPipelineOptions copiedOptions = options.cloneAs(DataflowPipelineOptions.class);
+    if (options.getServiceAccountName() != null) {
+      copiedOptions.setServiceAccountName(options.getServiceAccountName());
+    }
+    if (options.getServiceAccountKeyfile() != null) {
+      copiedOptions.setServiceAccountKeyfile(options.getServiceAccountKeyfile());
+    }
     copiedOptions.setStreaming(false);
-    copiedOptions.setNumWorkers(
-        options.as(ExamplePubsubTopicOptions.class).getInjectorNumWorkers());
+    copiedOptions.setNumWorkers(options.as(DataflowExampleOptions.class).getInjectorNumWorkers());
     copiedOptions.setJobName(options.getJobName() + "-injector");
     Pipeline injectorPipeline = Pipeline.create(copiedOptions);
-    injectorPipeline.apply(TextIO.Read.from(inputFile))
+    injectorPipeline.apply(readSource)
                     .apply(IntraBundleParallelization
-                        .of(PubsubFileInjector.publish(topic))
+                        .of(injector)
                         .withMaxParallelism(20));
-    DataflowPipelineJob injectorJob = (DataflowPipelineJob) injectorPipeline.run();
-    jobsToCancel.add(injectorJob);
+    PipelineResult result = injectorPipeline.run();
+    if (result instanceof DataflowPipelineJob) {
+      jobsToCancel.add(((DataflowPipelineJob) result));
+    }
   }
 
   /**
-   * Runs the provided injector pipeline for the streaming pipeline.
+   * Runs the provided pipeline to inject data into the PubSubIO input topic.
    */
   public void runInjectorPipeline(Pipeline injectorPipeline) {
-    DataflowPipelineJob injectorJob = (DataflowPipelineJob) injectorPipeline.run();
-    jobsToCancel.add(injectorJob);
+    PipelineResult result = injectorPipeline.run();
+    if (result instanceof DataflowPipelineJob) {
+      jobsToCancel.add(((DataflowPipelineJob) result));
+    }
   }
 
   /**
@@ -339,6 +404,8 @@ public void waitToFinish(PipelineResult result) {
     } else {
       // Do nothing if the given PipelineResult doesn't support waitToFinish(),
       // such as EvaluationResults returned by DirectPipelineRunner.
+      tearDown();
+      printPendingMessages();
     }
   }
 
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicAndSubscriptionOptions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicAndSubscriptionOptions.java
new file mode 100644
index 0000000000000..d7bd4b8edc3d8
--- /dev/null
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicAndSubscriptionOptions.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.common;
+
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+/**
+ * Options that can be used to configure Pub/Sub topic/subscription in Dataflow examples.
+ */
+public interface ExamplePubsubTopicAndSubscriptionOptions extends ExamplePubsubTopicOptions {
+  @Description("Pub/Sub subscription")
+  @Default.InstanceFactory(PubsubSubscriptionFactory.class)
+  String getPubsubSubscription();
+  void setPubsubSubscription(String subscription);
+
+  /**
+   * Returns a default Pub/Sub subscription based on the project and the job names.
+   */
+  static class PubsubSubscriptionFactory implements DefaultValueFactory<String> {
+    @Override
+    public String create(PipelineOptions options) {
+      DataflowPipelineOptions dataflowPipelineOptions =
+          options.as(DataflowPipelineOptions.class);
+      return "projects/" + dataflowPipelineOptions.getProject()
+          + "/subscriptions/" + dataflowPipelineOptions.getJobName();
+    }
+  }
+}
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
index 17c1bd284c527..4bedf318ef5af 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
@@ -29,11 +29,6 @@ public interface ExamplePubsubTopicOptions extends DataflowPipelineOptions {
   String getPubsubTopic();
   void setPubsubTopic(String topic);
 
-  @Description("Number of workers to use when executing the injector pipeline")
-  @Default.Integer(1)
-  int getInjectorNumWorkers();
-  void setInjectorNumWorkers(int numWorkers);
-
   /**
    * Returns a default Pub/Sub topic based on the project and the job names.
    */
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
index e8c6d405b7638..1bccc4ace2785 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
@@ -457,7 +457,7 @@ public static void main(String[] args) throws IOException {
     if (options.isStreaming()) {
       Preconditions.checkArgument(
           !options.getOutputToDatastore(), "DatastoreIO is not supported in streaming.");
-      dataflowUtils.setupPubsubTopic();
+      dataflowUtils.setupPubsub();
 
       readSource = PubsubIO.Read.topic(options.getPubsubTopic());
       windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5));
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
index 31fef75d26b53..2d5425208bdbb 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
@@ -23,7 +23,7 @@
 import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
 import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
 import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
-import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
+import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicAndSubscriptionOptions;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
@@ -34,7 +34,6 @@
 import com.google.cloud.dataflow.sdk.options.Default;
 import com.google.cloud.dataflow.sdk.options.Description;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -43,7 +42,9 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.Strings;
 
 import org.apache.avro.reflect.Nullable;
 import org.joda.time.Duration;
@@ -89,6 +90,9 @@
  */
 public class TrafficMaxLaneFlow {
 
+  private static final String PUBSUB_TIMESTAMP_LABEL_KEY = "timestamp_ms";
+  private static final Integer VALID_INPUTS = 4999;
+
   static final int WINDOW_DURATION = 60;  // Default sliding window duration in minutes
   static final int WINDOW_SLIDE_EVERY = 5;  // Default window 'slide every' setting in minutes
 
@@ -153,6 +157,27 @@ public Integer getTotalFlow() {
     }
   }
 
+  /**
+   * Extract the timestamp field from the input string, and use it as the element timestamp.
+   */
+  static class ExtractTimestamps extends DoFn<String, String> {
+    private static final DateTimeFormatter dateTimeFormat =
+        DateTimeFormat.forPattern("MM/dd/yyyy HH:mm:ss");
+
+    @Override
+    public void processElement(DoFn<String, String>.ProcessContext c) throws Exception {
+      String[] items = c.element().split(",");
+      if (items.length > 0) {
+        try {
+          String timestamp = items[0];
+          c.outputWithTimestamp(c.element(), new Instant(dateTimeFormat.parseMillis(timestamp)));
+        } catch (IllegalArgumentException e) {
+          // Skip the invalid input.
+        }
+      }
+    }
+  }
+
   /**
    * Extract flow information for each of the 8 lanes in a reading, and output as separate tuples.
    * This will let us determine which lane has the max flow for that station over the span of the
@@ -161,14 +186,6 @@ public Integer getTotalFlow() {
    * point comes from.
    */
   static class ExtractFlowInfoFn extends DoFn<String, KV<String, LaneInfo>> {
-    private static final DateTimeFormatter dateTimeFormat =
-        DateTimeFormat.forPattern("MM/dd/yyyy HH:mm:ss");
-
-    private final boolean outputTimestamp;
-
-    public ExtractFlowInfoFn(boolean outputTimestamp) {
-      this.outputTimestamp = outputTimestamp;
-    }
 
     @Override
     public void processElement(ProcessContext c) {
@@ -192,16 +209,7 @@ public void processElement(ProcessContext c) {
         }
         LaneInfo laneInfo = new LaneInfo(stationId, "lane" + i, direction, freeway, timestamp,
             laneFlow, laneAvgOccupancy, laneAvgSpeed, totalFlow);
-        if (outputTimestamp) {
-          try {
-            c.outputWithTimestamp(KV.of(stationId, laneInfo),
-                                  new Instant(dateTimeFormat.parseMillis(timestamp)));
-          } catch (IllegalArgumentException e) {
-            // Skip the invalid input.
-          }
-        } else {
-          c.output(KV.of(stationId, laneInfo));
-        }
+        c.output(KV.of(stationId, laneInfo));
       }
     }
   }
@@ -291,13 +299,28 @@ public PCollection<TableRow> apply(PCollection<KV<String, LaneInfo>> flowInfo) {
     }
   }
 
+  static class ReadFileAndExtractTimestamps extends PTransform<PBegin, PCollection<String>> {
+    private final String inputFile;
+
+    public ReadFileAndExtractTimestamps(String inputFile) {
+      this.inputFile = inputFile;
+    }
+
+    @Override
+    public PCollection<String> apply(PBegin begin) {
+      return begin
+          .apply(TextIO.Read.from(inputFile))
+          .apply(ParDo.of(new ExtractTimestamps()));
+    }
+  }
+
   /**
     * Options supported by {@link TrafficMaxLaneFlow}.
     *
     * <p>Inherits standard configuration options.
     */
-  private interface TrafficMaxLaneFlowOptions
-      extends DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
+  private interface TrafficMaxLaneFlowOptions extends DataflowExampleOptions,
+      ExamplePubsubTopicAndSubscriptionOptions, ExampleBigQueryTableOptions {
         @Description("Input file to inject to Pub/Sub topic")
     @Default.String("gs://dataflow-samples/traffic_sensor/"
         + "Freeways-5Minaa2010-01-01_to_2010-02-15_test2.csv")
@@ -313,6 +336,11 @@ private interface TrafficMaxLaneFlowOptions
     @Default.Integer(WINDOW_SLIDE_EVERY)
     Integer getWindowSlideEvery();
     void setWindowSlideEvery(Integer value);
+
+    @Description("Whether to run the pipeline with unbounded input")
+    @Default.Boolean(false)
+    boolean isUnbounded();
+    void setUnbounded(boolean value);
   }
 
   /**
@@ -324,15 +352,9 @@ public static void main(String[] args) throws IOException {
     TrafficMaxLaneFlowOptions options = PipelineOptionsFactory.fromArgs(args)
         .withValidation()
         .as(TrafficMaxLaneFlowOptions.class);
-    if (options.isStreaming()) {
-      // In order to cancel the pipelines automatically,
-      // {@literal DataflowPipelineRunner} is forced to be used.
-      options.setRunner(DataflowPipelineRunner.class);
-    }
     options.setBigQuerySchema(FormatMaxesFn.getSchema());
     // Using DataflowExampleUtils to set up required resources.
-    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
-    dataflowUtils.setup();
+    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options, options.isUnbounded());
 
     Pipeline pipeline = Pipeline.create(options);
     TableReference tableRef = new TableReference();
@@ -340,35 +362,47 @@ public static void main(String[] args) throws IOException {
     tableRef.setDatasetId(options.getBigQueryDataset());
     tableRef.setTableId(options.getBigQueryTable());
 
-    PCollection<KV<String, LaneInfo>> input;
-    if (options.isStreaming()) {
-      input = pipeline
-          .apply(PubsubIO.Read.topic(options.getPubsubTopic()))
-          // row... => <stationId, LaneInfo> ...
-          .apply(ParDo.of(new ExtractFlowInfoFn(false /* outputTimestamp */)));
+    PCollection<String> input;
+    if (options.isUnbounded()) {
+      // Read unbounded PubSubIO.
+      input = pipeline.apply(PubsubIO.Read
+          .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
+          .subscription(options.getPubsubSubscription()));
     } else {
-      input = pipeline
-          .apply(TextIO.Read.from(options.getInputFile()))
-          // row... => <stationId, LaneInfo> ...
-          .apply(ParDo.of(new ExtractFlowInfoFn(true /* outputTimestamp */)));
+      // Read bounded PubSubIO.
+      input = pipeline.apply(PubsubIO.Read
+          .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
+          .subscription(options.getPubsubSubscription()).maxNumRecords(VALID_INPUTS));
+
+      // To read bounded TextIO files, use:
+      // input = pipeline.apply(new ReadFileAndExtractTimestamps(options.getInputFile()));
     }
-    // map the incoming data stream into sliding windows. The default window duration values
-    // work well if you're running the accompanying Pub/Sub generator script with the
-    // --replay flag, which simulates pauses in the sensor data publication. You may want to
-    // adjust them otherwise.
-    input.apply(Window.<KV<String, LaneInfo>>into(SlidingWindows.of(
+    input
+        // row... => <station route, station speed> ...
+        .apply(ParDo.of(new ExtractFlowInfoFn()))
+        // map the incoming data stream into sliding windows. The default window duration values
+        // work well if you're running the accompanying Pub/Sub generator script with the
+        // --replay flag, which simulates pauses in the sensor data publication. You may want to
+        // adjust them otherwise.
+        .apply(Window.<KV<String, LaneInfo>>into(SlidingWindows.of(
             Duration.standardMinutes(options.getWindowDuration())).
             every(Duration.standardMinutes(options.getWindowSlideEvery()))))
         .apply(new MaxLaneFlow())
         .apply(BigQueryIO.Write.to(tableRef)
             .withSchema(FormatMaxesFn.getSchema()));
 
-    PipelineResult result = pipeline.run();
-    if (options.isStreaming() && !options.getInputFile().isEmpty()) {
-      // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
-      dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
+    // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
+    if (!Strings.isNullOrEmpty(options.getInputFile())
+        && !Strings.isNullOrEmpty(options.getPubsubTopic())) {
+      dataflowUtils.runInjectorPipeline(
+          new ReadFileAndExtractTimestamps(options.getInputFile()),
+          options.getPubsubTopic(),
+          PUBSUB_TIMESTAMP_LABEL_KEY);
     }
 
+    // Run the pipeline.
+    PipelineResult result = pipeline.run();
+
     // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
     dataflowUtils.waitToFinish(result);
   }
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
index cd0ba7602278c..e3e88c22da579 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
@@ -23,7 +23,7 @@
 import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
 import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
 import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
-import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
+import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicAndSubscriptionOptions;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
@@ -34,7 +34,6 @@
 import com.google.cloud.dataflow.sdk.options.Default;
 import com.google.cloud.dataflow.sdk.options.Description;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -42,7 +41,9 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.Strings;
 import com.google.common.collect.Lists;
 
 import org.apache.avro.reflect.Nullable;
@@ -94,6 +95,9 @@
 
 public class TrafficRoutes {
 
+  private static final String PUBSUB_TIMESTAMP_LABEL_KEY = "timestamp_ms";
+  private static final Integer VALID_INPUTS = 4999;
+
   // Instantiate some small predefined San Diego routes to analyze
   static Map<String, String> sdStations = buildStationInfo();
   static final int WINDOW_DURATION = 3;  // Default sliding window duration in minutes
@@ -159,19 +163,31 @@ public Boolean getSlowdownEvent() {
   }
 
   /**
-   * Filter out readings for the stations along predefined 'routes', and output
-   * (station, speed info) keyed on route.
+   * Extract the timestamp field from the input string, and use it as the element timestamp.
    */
-  static class ExtractStationSpeedFn extends DoFn<String, KV<String, StationSpeed>> {
+  static class ExtractTimestamps extends DoFn<String, String> {
     private static final DateTimeFormatter dateTimeFormat =
         DateTimeFormat.forPattern("MM/dd/yyyy HH:mm:ss");
 
-    private final boolean outputTimestamp;
-
-    public ExtractStationSpeedFn(boolean outputTimestamp) {
-      this.outputTimestamp = outputTimestamp;
+    @Override
+    public void processElement(DoFn<String, String>.ProcessContext c) throws Exception {
+      String[] items = c.element().split(",");
+      String timestamp = tryParseTimestamp(items);
+      if (timestamp != null) {
+        try {
+          c.outputWithTimestamp(c.element(), new Instant(dateTimeFormat.parseMillis(timestamp)));
+        } catch (IllegalArgumentException e) {
+          // Skip the invalid input.
+        }
+      }
     }
+  }
 
+  /**
+   * Filter out readings for the stations along predefined 'routes', and output
+   * (station, speed info) keyed on route.
+   */
+  static class ExtractStationSpeedFn extends DoFn<String, KV<String, StationSpeed>> {
 
     @Override
     public void processElement(ProcessContext c) {
@@ -183,20 +199,11 @@ public void processElement(ProcessContext c) {
         String stationId = tryParseStationId(items);
         // For this simple example, filter out everything but some hardwired routes.
         if (avgSpeed != null && stationId != null && sdStations.containsKey(stationId)) {
-          Instant timestamp;
-          if (outputTimestamp) {
-            timestamp = new Instant(dateTimeFormat.parseMillis(tryParseTimestamp(items)));
-          } else {
-            timestamp = c.timestamp();
-          }
-          StationSpeed stationSpeed = new StationSpeed(stationId, avgSpeed, timestamp.getMillis());
+          StationSpeed stationSpeed =
+              new StationSpeed(stationId, avgSpeed, c.timestamp().getMillis());
           // The tuple key is the 'route' name stored in the 'sdStations' hash.
           KV<String, StationSpeed> outputValue = KV.of(sdStations.get(stationId), stationSpeed);
-          if (outputTimestamp) {
-            c.outputWithTimestamp(outputValue, timestamp);
-          } else {
-            c.output(outputValue);
-          }
+          c.output(outputValue);
         }
       }
     }
@@ -303,14 +310,28 @@ public PCollection<TableRow> apply(PCollection<KV<String, StationSpeed>> station
     }
   }
 
+  static class ReadFileAndExtractTimestamps extends PTransform<PBegin, PCollection<String>> {
+    private final String inputFile;
+
+    public ReadFileAndExtractTimestamps(String inputFile) {
+      this.inputFile = inputFile;
+    }
+
+    @Override
+    public PCollection<String> apply(PBegin begin) {
+      return begin
+          .apply(TextIO.Read.from(inputFile))
+          .apply(ParDo.of(new ExtractTimestamps()));
+    }
+  }
 
   /**
   * Options supported by {@link TrafficRoutes}.
   *
   * <p>Inherits standard configuration options.
   */
-  private interface TrafficRoutesOptions
-      extends DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
+  private interface TrafficRoutesOptions extends DataflowExampleOptions,
+      ExamplePubsubTopicAndSubscriptionOptions, ExampleBigQueryTableOptions {
     @Description("Input file to inject to Pub/Sub topic")
     @Default.String("gs://dataflow-samples/traffic_sensor/"
         + "Freeways-5Minaa2010-01-01_to_2010-02-15_test2.csv")
@@ -326,6 +347,11 @@ private interface TrafficRoutesOptions
     @Default.Integer(WINDOW_SLIDE_EVERY)
     Integer getWindowSlideEvery();
     void setWindowSlideEvery(Integer value);
+
+    @Description("Whether to run the pipeline with unbounded input")
+    @Default.Boolean(false)
+    boolean isUnbounded();
+    void setUnbounded(boolean value);
   }
 
   /**
@@ -338,15 +364,9 @@ public static void main(String[] args) throws IOException {
         .withValidation()
         .as(TrafficRoutesOptions.class);
 
-    if (options.isStreaming()) {
-      // In order to cancel the pipelines automatically,
-      // {@literal DataflowPipelineRunner} is forced to be used.
-      options.setRunner(DataflowPipelineRunner.class);
-    }
     options.setBigQuerySchema(FormatStatsFn.getSchema());
     // Using DataflowExampleUtils to set up required resources.
-    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
-    dataflowUtils.setup();
+    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options, options.isUnbounded());
 
     Pipeline pipeline = Pipeline.create(options);
     TableReference tableRef = new TableReference();
@@ -354,35 +374,48 @@ public static void main(String[] args) throws IOException {
     tableRef.setDatasetId(options.getBigQueryDataset());
     tableRef.setTableId(options.getBigQueryTable());
 
-    PCollection<KV<String, StationSpeed>> input;
-    if (options.isStreaming()) {
-      input = pipeline
-          .apply(PubsubIO.Read.topic(options.getPubsubTopic()))
-          // row... => <station route, station speed> ...
-          .apply(ParDo.of(new ExtractStationSpeedFn(false /* outputTimestamp */)));
+    PCollection<String> input;
+    if (options.isUnbounded()) {
+      // Read unbounded PubSubIO.
+      input = pipeline.apply(PubsubIO.Read
+          .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
+          .subscription(options.getPubsubSubscription()));
     } else {
-      input = pipeline
-          .apply(TextIO.Read.from(options.getInputFile()))
-          .apply(ParDo.of(new ExtractStationSpeedFn(true /* outputTimestamp */)));
+      // Read bounded PubSubIO.
+      input = pipeline.apply(PubsubIO.Read
+          .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
+          .subscription(options.getPubsubSubscription()).maxNumRecords(VALID_INPUTS));
+
+      // To read bounded TextIO files, use:
+      // input = pipeline.apply(TextIO.Read.from(options.getInputFile()))
+      //    .apply(ParDo.of(new ExtractTimestamps()));
     }
-
-    // map the incoming data stream into sliding windows.
-    // The default window duration values work well if you're running the accompanying Pub/Sub
-    // generator script without the --replay flag, so that there are no simulated pauses in
-    // the sensor data publication. You may want to adjust the values otherwise.
-    input.apply(Window.<KV<String, StationSpeed>>into(SlidingWindows.of(
+    input
+        // row... => <station route, station speed> ...
+        .apply(ParDo.of(new ExtractStationSpeedFn()))
+        // map the incoming data stream into sliding windows.
+        // The default window duration values work well if you're running the accompanying Pub/Sub
+        // generator script without the --replay flag, so that there are no simulated pauses in
+        // the sensor data publication. You may want to adjust the values otherwise.
+        .apply(Window.<KV<String, StationSpeed>>into(SlidingWindows.of(
             Duration.standardMinutes(options.getWindowDuration())).
             every(Duration.standardMinutes(options.getWindowSlideEvery()))))
         .apply(new TrackSpeed())
         .apply(BigQueryIO.Write.to(tableRef)
             .withSchema(FormatStatsFn.getSchema()));
 
-    PipelineResult result = pipeline.run();
-    if (options.isStreaming() && !options.getInputFile().isEmpty()) {
-      // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
-      dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
+    // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
+    if (!Strings.isNullOrEmpty(options.getInputFile())
+        && !Strings.isNullOrEmpty(options.getPubsubTopic())) {
+      dataflowUtils.runInjectorPipeline(
+          new ReadFileAndExtractTimestamps(options.getInputFile()),
+          options.getPubsubTopic(),
+          PUBSUB_TIMESTAMP_LABEL_KEY);
     }
 
+    // Run the pipeline.
+    PipelineResult result = pipeline.run();
+
     // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
     dataflowUtils.waitToFinish(result);
   }
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
index b68f048ca84fc..ce5e08e7d2fa8 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
@@ -20,6 +20,7 @@
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
 import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
 import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
 import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
@@ -431,8 +432,8 @@ public void processElement(ProcessContext c) throws Exception {
   /**
    * Inherits standard configuration options.
    */
-  public interface TrafficFlowOptions extends
-  ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
+  public interface TrafficFlowOptions
+      extends ExamplePubsubTopicOptions, ExampleBigQueryTableOptions, DataflowExampleOptions {
 
     @Description("Input file to inject to Pub/Sub topic")
     @Default.String("gs://dataflow-samples/traffic_sensor/"
@@ -489,8 +490,7 @@ public static void main(String[] args) throws Exception {
   private static Pipeline runInjector(TrafficFlowOptions options){
     DataflowPipelineOptions copiedOptions = options.cloneAs(DataflowPipelineOptions.class);
     copiedOptions.setStreaming(false);
-    copiedOptions.setNumWorkers(
-        options.as(ExamplePubsubTopicOptions.class).getInjectorNumWorkers());
+    copiedOptions.setNumWorkers(options.as(DataflowExampleOptions.class).getInjectorNumWorkers());
     copiedOptions.setJobName(options.getJobName() + "-injector");
     Pipeline injectorPipeline = Pipeline.create(copiedOptions);
     injectorPipeline

From 6a11a72fd73a97fdc2c23f55c84696ce6e2a4e78 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Fri, 8 Jan 2016 18:15:10 -0800
Subject: [PATCH 1282/1541] Fix BigQueryIO.Read to work the same in Direct and
 Dataflow runners

This is a partial revert of commits f5e3b8e and 18c82ad.

When running a batch Dataflow job on Cloud Dataflow service, the data
are produced by running a BigQuery export job and then reading all the
files in parallel. When run in the DirectPipelineRunner, BigQuery's JSON
API is used directly. These data come back in different formats.

To compensate, we use BigQueryTableRowIterator to normalize the behavior in
DirectPipelineRunner to the behavior seen when running on the service.
  (We cannot change this decision without a major breaking change.)

This patch fixes some discrepancies in the way that BigQueryTableRowIterator is
implemented. Specifically,

*) In commit 18c82ad (response to issue #20) we updated the format of
timestamps to be printed as strings. However, we did not correctly match the
behavior of BigQuery export. Here is a sample set of times from the export job
vs the JSON API.

2016-01-06 06:38:00 UTC		1.45206228E9
2016-01-06 06:38:11 UTC		1.452062291E9
2016-01-06 06:38:11.1 UTC	1.4520622911E9
2016-01-06 06:38:11.12 UTC	1.45206229112E9
2016-01-06 06:38:11.123 UTC	1.452062291123E9   *
2016-01-06 06:38:11.1234 UTC	1.4520622911234E9
2016-01-06 06:38:11.12345 UTC	1.45206229112345E9
2016-01-06 06:38:11.123456 UTC	1.452062291123456E9

Before, only the * test would have passed.

*) In commit f5e3b8e we updated TableRow iterator to preserve the
usual TableRow field `f` corresponding to getF(), which returns a
list of fields in Schema order. This was my mistaken attempt to better support
users who have prior experience with BigQuery's API and expect to use
getF()/getV(). However, there were two issues:
  1. this change did not affect the behavior in the DataflowPipelineRunner.
  2. this was actually a breaking backwards-incompatible change, because common
     downstream DoFns may iterate over the keys of the TableRow, and it added
     the field "f".
So we should not propagate the change to DataflowPipelineRunner, but instead we
should revert the change to BigQueryTableRowIterator.
  (Note this is also a slightly-backwards-incompatible change, but it's
  reverting to old behavior and users are more likely to be depending on
  DataflowPipelineRunner rather than DirectPipelineRunner.)

Fix both these issues and add tests.

This is still ugly for now. The long-term fix here is to support a parser that
lets users skip TableRow altogether and goes straight to POJOs of their
choosing (See #41). That would also eliminate our performance and typing issues
using TableRow as an inner type in pipelines (See e.g.
http://stackoverflow.com/questions/33622227/dataflow-mixing-integer-long-types).

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111746236
---
 .../sdk/util/BigQueryTableRowIterator.java    | 122 +++++++++++-------
 .../runners/worker/BigQueryReaderTest.java    |  37 ++----
 .../dataflow/sdk/util/BigQueryUtilTest.java   |  69 ++++++----
 3 files changed, 130 insertions(+), 98 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
index 349031c1464cf..e0fac614ea622 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
@@ -16,12 +16,14 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
 import static com.google.common.base.Preconditions.checkState;
 
 import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
 import com.google.api.client.util.BackOff;
 import com.google.api.client.util.BackOffUtils;
+import com.google.api.client.util.ClassInfo;
 import com.google.api.client.util.Data;
 import com.google.api.client.util.Sleeper;
 import com.google.api.services.bigquery.Bigquery;
@@ -49,6 +51,7 @@
 
 import java.io.Closeable;
 import java.io.IOException;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
@@ -133,8 +136,44 @@ public boolean hasNext() {
   }
 
   /**
-   * Adjusts a field returned from the BigQuery API to match the type that will be seen when
-   * run on the backend service. The end result is:
+   * Formats BigQuery seconds-since-epoch into String matching JSON export. Thread-safe and
+   * immutable.
+   */
+  private static final DateTimeFormatter DATE_AND_SECONDS_FORMATER =
+      DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss").withZoneUTC();
+  private static String formatTimestamp(String timestamp) {
+    // timestamp is in "seconds since epoch" format, with scientific notation.
+    // e.g., "1.45206229112345E9" to mean "2016-01-06 06:38:11.123456 UTC".
+    // Separate into seconds and microseconds.
+    double timestampDoubleMicros = Double.parseDouble(timestamp) * 1000000;
+    long timestampMicros = (long) timestampDoubleMicros;
+    long seconds = timestampMicros / 1000000;
+    int micros = (int) (timestampMicros % 1000000);
+    String dayAndTime = DATE_AND_SECONDS_FORMATER.print(seconds * 1000);
+
+    // No sub-second component.
+    if (micros == 0) {
+      return String.format("%s UTC", dayAndTime);
+    }
+
+    // Sub-second component.
+    int digits = 6;
+    int subsecond = micros;
+    while (subsecond % 10 == 0) {
+      digits--;
+      subsecond /= 10;
+    }
+    String formatString = String.format("%%0%dd", digits);
+    String fractionalSeconds = String.format(formatString, subsecond);
+    return String.format("%s.%s UTC", dayAndTime, fractionalSeconds);
+  }
+
+  /**
+   * Adjusts a field returned from the BigQuery API to match what we will receive when running
+   * BigQuery's export-to-GCS and parallel read, which is the efficient parallel implementation
+   * used for batch jobs executed on the Cloud Dataflow service.
+   *
+   * <p>The following is the relationship between BigQuery schema and Java types:
    *
    * <ul>
    *   <li>Nulls are {@code null}.
@@ -143,18 +182,17 @@ public boolean hasNext() {
    *   <li>{@code BOOLEAN} columns are JSON booleans, hence Java {@code Boolean} objects.
    *   <li>{@code FLOAT} columns are JSON floats, hence Java {@code Double} objects.
    *   <li>{@code TIMESTAMP} columns are {@code String} objects that are of the format
-   *       {@code yyyy-MM-dd HH:mm:ss.SSS UTC}.
+   *       {@code yyyy-MM-dd HH:mm:ss[.SSSSSS] UTC}, where the {@code .SSSSSS} has no trailing
+   *       zeros and can be 1 to 6 digits long.
    *   <li>Every other atomic type is a {@code String}.
    * </ul>
    *
-   * <p>Note that currently integers are encoded as strings to match
-   * the behavior of the backend service.
+   * <p>Note that integers are encoded as strings to match BigQuery's exported JSON format.
+   *
+   * <p>Finally, values are stored in the {@link TableRow} as {"field name": value} pairs
+   * and are not accessible through the {@link TableRow#getF} function.
    */
-  private Object getTypedCellValue(TableFieldSchema fieldSchema, Object v) {
-    // In the input from the BQ API, atomic types all come in as
-    // strings, while on the Dataflow service they have more precise
-    // types.
-
+  @Nullable private Object getTypedCellValue(TableFieldSchema fieldSchema, Object v) {
     if (Data.isNull(v)) {
       return null;
     }
@@ -185,16 +223,22 @@ private Object getTypedCellValue(TableFieldSchema fieldSchema, Object v) {
     }
 
     if (fieldSchema.getType().equals("TIMESTAMP")) {
-      // Seconds to milliseconds
-      long milliSecs = (new Double(Double.parseDouble((String) v) * 1000)).longValue();
-      DateTimeFormatter formatter =
-          DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS").withZoneUTC();
-      return formatter.print(milliSecs) + " UTC";
+      return formatTimestamp((String) v);
     }
 
     return v;
   }
 
+  /**
+   * A list of the field names that cannot be used in BigQuery tables processed by Dataflow,
+   * because they are reserved keywords in {@link TableRow}.
+   */
+  // TODO: This limitation is unfortunate. We need to give users a way to use BigQueryIO that does
+  // not indirect through our broken use of {@link TableRow}.
+  //     See discussion: https://github.com/GoogleCloudPlatform/DataflowJavaSDK/pull/41
+  private static final Collection<String> RESERVED_FIELD_NAMES =
+      ClassInfo.of(TableRow.class).getNames();
+
   /**
    * Converts a row returned from the BigQuery JSON API as a {@code Map<String, Object>} into a
    * Java {@link TableRow} with nested {@link TableCell TableCells}. The {@code Object} values in
@@ -206,10 +250,15 @@ private Object getTypedCellValue(TableFieldSchema fieldSchema, Object v) {
   private TableRow getTypedTableRow(List<TableFieldSchema> fields, Map<String, Object> rawRow) {
     // If rawRow is a TableRow, use it. If not, create a new one.
     TableRow row;
+    List<? extends Map<String, Object>> cells;
     if (rawRow instanceof TableRow) {
-      // Since rawRow is a TableRow, we also know that rawRow.getF() returns a List<TableCell>.
-      // We do not need to do any type conversion.
+      // Since rawRow is a TableRow it already has TableCell objects in setF. We do not need to do
+      // any type conversion, but extract the cells for cell-wise processing below.
       row = (TableRow) rawRow;
+      cells = row.getF();
+      // Clear the cells from the row, so that row.getF() will return null. This matches the
+      // behavior of rows produced by the BigQuery export API used on the service.
+      row.setF(null);
     } else {
       row = new TableRow();
 
@@ -217,51 +266,30 @@ private TableRow getTypedTableRow(List<TableFieldSchema> fields, Map<String, Obj
       // get its cells. Similarly, when rawCell is a Map<String, Object> instead of a TableCell,
       // we will use Map.get("v") instead of TableCell.getV() get its value.
       @SuppressWarnings("unchecked")
-      List<Map<String, Object>> rawCells = (List<Map<String, Object>>) rawRow.get("f");
-
-      ImmutableList.Builder<TableCell> builder = ImmutableList.builder();
-      for (Map<String, Object> rawCell : rawCells) {
-        // If rawCell is a TableCell, use it. If not, create a new one.
-        if (rawCell instanceof TableCell) {
-          builder.add((TableCell) rawCell);
-        } else {
-          builder.add(new TableCell().setV(rawCell.get("v")));
-        }
-      }
-      row.setF(builder.build());
+      List<? extends Map<String, Object>> rawCells =
+          (List<? extends Map<String, Object>>) rawRow.get("f");
+      cells = rawCells;
     }
 
-    // From here, everything is a TableRow/TableCell, no need to interpret as Map<String,Object>.
-    List<TableCell> cells = row.getF();
     checkState(cells.size() == fields.size(),
         "Expected that the row has the same number of cells %s as fields in the schema %s",
         cells.size(), fields.size());
 
     // Loop through all the fields in the row, normalizing their types with the TableFieldSchema
-    // and also storing the normalized values by field name in the Map<String, Object> that
+    // and storing the normalized values by field name in the Map<String, Object> that
     // underlies the TableRow.
-    Iterator<TableCell> cellIt = cells.iterator();
+    Iterator<? extends Map<String, Object>> cellIt = cells.iterator();
     Iterator<TableFieldSchema> fieldIt = fields.iterator();
     while (cellIt.hasNext()) {
-      TableCell cell = cellIt.next();
+      Map<String, Object> cell = cellIt.next();
       TableFieldSchema fieldSchema = fieldIt.next();
 
       // Convert the object in this cell to the Java type corresponding to its type in the schema.
-      Object convertedValue = getTypedCellValue(fieldSchema, cell.getV());
-      cell.setV(convertedValue);
+      Object convertedValue = getTypedCellValue(fieldSchema, cell.get("v"));
 
       String fieldName = fieldSchema.getName();
-      if (fieldName.equals("f")) {
-        // This is a workaround for a crash when the schema has a field named "f". Specifically,
-        // tableRow.set("f", value) is equivalent to tableRow.setF(value), and value must be a
-        // List<TableCell> or a ClassCastException will be thrown. To avoid the crash, we simply
-        // do not set the Map property named "f".
-        //
-        // The value for a field named "f" can instead be retrieved by calling tableRow.getF() and
-        // to get the list of cells, and accessing the positional entry that corresponds to the
-        // position of the "f" field in the TableSchema.
-        continue;
-      }
+      checkArgument(!RESERVED_FIELD_NAMES.contains(fieldName),
+          "BigQueryIO does not support records with columns named %s", fieldName);
       row.set(fieldName, convertedValue);
     }
     return row;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
index a1ddca84378d8..ef94c1da2ecac 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
@@ -18,8 +18,8 @@
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
-
 import static org.mockito.Matchers.contains;
 import static org.mockito.Matchers.endsWith;
 import static org.mockito.Matchers.eq;
@@ -35,16 +35,16 @@
 import com.google.api.client.testing.http.MockLowLevelHttpRequest;
 import com.google.api.client.testing.http.MockLowLevelHttpResponse;
 import com.google.api.services.bigquery.Bigquery;
-import com.google.api.services.bigquery.model.TableCell;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
-import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.mockito.Mock;
@@ -63,6 +63,8 @@
  */
 @RunWith(JUnit4.class)
 public class BigQueryReaderTest {
+  @Rule public final ExpectedException thrown = ExpectedException.none();
+
   private static final String PROJECT_ID = "project";
   private static final String DATASET = "dataset";
   private static final String TABLE = "table";
@@ -73,14 +75,6 @@ public class BigQueryReaderTest {
   private static final String GET_TABLE_REQUEST_PATH =
       String.format("projects/%s/datasets/%s/tables/%s", PROJECT_ID, DATASET, TABLE);
 
-  private static final List<TableCell> makeCellList(Object... fields) {
-    ImmutableList.Builder<TableCell> cells = ImmutableList.builder();
-    for (Object o : fields) {
-      cells.add(new TableCell().setV(o));
-    }
-    return cells.build();
-  }
-
   // This is a real response (with some unused fields removed) for the table created from this
   // schema:
   // [
@@ -578,12 +572,12 @@ public void testReadQuery() throws Exception {
 
     assertEquals("Arthur", row.get("name"));
     assertEquals("42", row.get("integer"));
-    assertEquals(makeCellList("Arthur", "42"), row.getF());
+    assertNull(row.getF());
 
     row = iterator.next().getValue();
     assertEquals("Allison", row.get("name"));
     assertEquals("79", row.get("integer"));
-    assertEquals(makeCellList("Allison", "79"), row.getF());
+    assertNull(row.getF());
 
     iterator.close();
 
@@ -784,7 +778,7 @@ public void testReadTable() throws Exception {
     TableRow nested = (TableRow) row.get("record");
     assertEquals("43", nested.get("nestedInt"));
     assertEquals(4.14159, nested.get("nestedFloat"));
-    assertEquals(makeCellList("43", 4.14159), nested.getF());
+    assertNull(nested.getF());
 
     assertEquals(Lists.newArrayList("42", "43", "79"), row.get("repeatedInt"));
     assertTrue(((List<?>) row.get("repeatedFloat")).isEmpty());
@@ -800,7 +794,7 @@ public void testReadTable() throws Exception {
     nested = (TableRow) row.get("record");
     assertEquals("80", nested.get("nestedInt"));
     assertEquals(3.71828, nested.get("nestedFloat"));
-    assertEquals(makeCellList("80", 3.71828), nested.getF());
+    assertNull(nested.getF());
 
     assertTrue(((List<?>) row.get("repeatedInt")).isEmpty());
     assertEquals(Lists.newArrayList(3.14159, 2.71828), row.get("repeatedFloat"));
@@ -810,10 +804,10 @@ public void testReadTable() throws Exception {
     assertEquals(2, nestedRecords.size());
     assertEquals("hello", nestedRecords.get(0).get("string"));
     assertEquals(true, nestedRecords.get(0).get("bool"));
-    assertEquals(makeCellList(true, "hello"), nestedRecords.get(0).getF());
+    assertNull(nestedRecords.get(0).getF());
     assertEquals("world", nestedRecords.get(1).get("string"));
     assertEquals(false, nestedRecords.get(1).get("bool"));
-    assertEquals(makeCellList(false, "world"), nestedRecords.get(1).getF());
+    assertNull(nestedRecords.get(1).getF());
 
     assertFalse(iterator.hasNext());
 
@@ -909,12 +903,9 @@ public void testReadTableWithFieldF() throws Exception {
     Reader.ReaderIterator<WindowedValue<TableRow>> iterator = reader.iterator();
     assertTrue(iterator.hasNext());
 
-    TableRow row = iterator.next().getValue();
-    assertEquals(makeCellList("5", "Arthur"), row.getF());
-    assertEquals("Arthur", row.getF().get(1).getV());
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("BigQueryIO does not support records with columns named f");
 
-    row = iterator.next().getValue();
-    assertEquals(makeCellList("42", "Allison"), row.getF());
-    assertEquals("Allison", row.getF().get(1).getV());
+    iterator.next().getValue();
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
index eb3699216483a..7e8dc1f5c5d60 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static org.junit.Assert.assertEquals;
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.anyLong;
 import static org.mockito.Matchers.anyString;
@@ -38,6 +39,7 @@
 import com.google.api.services.bigquery.model.TableSchema;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.common.base.Function;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterators;
 
 import org.hamcrest.Matchers;
@@ -170,44 +172,55 @@ private Table basicTableSchemaWithTime() {
     return new Table()
         .setSchema(new TableSchema()
             .setFields(Arrays.asList(
-                new TableFieldSchema()
-                    .setName("name")
-                    .setType("STRING"),
                 new TableFieldSchema()
                     .setName("time")
-                    .setType("TIMESTAMP"),
-                new TableFieldSchema()
-                    .setName("answer")
-                    .setType("INTEGER")
+                    .setType("TIMESTAMP")
             )));
   }
 
   @Test
   public void testReadWithTime() throws IOException {
+    // The BigQuery JSON API returns timestamps in the following format: floating-point seconds
+    // since epoch (UTC) with microsecond precision. Test that we faithfully preserve a set of
+    // known values.
+    TableDataList input = rawDataList(
+        rawRow("1.430397296789E9"),
+        rawRow("1.45206228E9"),
+        rawRow("1.452062291E9"),
+        rawRow("1.4520622911E9"),
+        rawRow("1.45206229112E9"),
+        rawRow("1.452062291123E9"),
+        rawRow("1.4520622911234E9"),
+        rawRow("1.45206229112345E9"),
+        rawRow("1.452062291123456E9"));
     onTableGet(basicTableSchemaWithTime());
+    onTableList(input);
+
+    // Known results verified from BigQuery's export to JSON on GCS API.
+    List<String> expected = ImmutableList.of(
+        "2015-04-30 12:34:56.789 UTC",
+        "2016-01-06 06:38:00 UTC",
+        "2016-01-06 06:38:11 UTC",
+        "2016-01-06 06:38:11.1 UTC",
+        "2016-01-06 06:38:11.12 UTC",
+        "2016-01-06 06:38:11.123 UTC",
+        "2016-01-06 06:38:11.1234 UTC",
+        "2016-01-06 06:38:11.12345 UTC",
+        "2016-01-06 06:38:11.123456 UTC");
+
+    // Download the rows, verify the interactions.
+    List<TableRow> rows = ImmutableList.copyOf(BigQueryTableRowIterator.fromTable(
+        BigQueryIO.parseTableSpec("project:dataset.table"), mockClient));
+    verifyTableGet();
+    verifyTabledataList();
 
-    TableDataList dataList = rawDataList(rawRow("Arthur", "1.430397296789E9", 42));
-    onTableList(dataList);
-
-    try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.fromTable(
-        BigQueryIO.parseTableSpec("project:dataset.table"),
-        mockClient)) {
-
-      Assert.assertTrue(iterator.hasNext());
-      TableRow row = iterator.next();
-
-      Assert.assertTrue(row.containsKey("name"));
-      Assert.assertTrue(row.containsKey("time"));
-      Assert.assertTrue(row.containsKey("answer"));
-      Assert.assertEquals("Arthur", row.get("name"));
-      Assert.assertEquals("2015-04-30 12:34:56.789 UTC", row.get("time"));
-      Assert.assertEquals(42, row.get("answer"));
-
-      Assert.assertFalse(iterator.hasNext());
-
-      verifyTableGet();
-      verifyTabledataList();
+    // Verify the timestamp converted as desired.
+    assertEquals("Expected input and output rows to have the same size",
+        expected.size(), rows.size());
+    for (int i = 0; i < expected.size(); ++i) {
+      assertEquals("i=" + i, expected.get(i), rows.get(i).get("time"));
     }
+
   }
 
   private TableRow rawRow(Object...args) {

From ca4523e65631e65d6043615cea81e151a0097d80 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Mon, 11 Jan 2016 01:24:16 -0800
Subject: [PATCH 1283/1541] CombineFnWithContext SDK API and worker code

This CL doesn't include combiner liftings support.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111831274
---
 .../sdk/runners/DataflowPipelineRunner.java   |   3 +-
 .../runners/DataflowPipelineTranslator.java   |   5 +-
 .../sdk/runners/DirectPipelineRunner.java     |  38 +-
 .../sdk/runners/worker/CombineValuesFn.java   | 110 +--
 .../worker/DataflowExecutionContext.java      |  38 +-
 .../sdk/runners/worker/DataflowWorker.java    |   4 +-
 .../worker/GroupAlsoByWindowsParDoFn.java     |  31 +-
 .../worker/MapTaskExecutorFactory.java        |  14 +-
 .../sdk/runners/worker/NormalParDoFn.java     |  25 +-
 .../worker/StreamingModeExecutionContext.java |   5 +-
 .../dataflow/sdk/transforms/Combine.java      | 626 ++++++++++--------
 .../sdk/transforms/CombineFnBase.java         | 283 ++++++++
 .../sdk/transforms/CombineWithContext.java    | 277 ++++++++
 .../cloud/dataflow/sdk/transforms/Top.java    |   2 +-
 .../dataflow/sdk/util/AppliedCombineFn.java   |  82 ++-
 .../sdk/util/BatchModeExecutionContext.java   |   4 +-
 .../sdk/util/CombineContextFactory.java       |  89 +++
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |   3 +-
 .../sdk/util/PerKeyCombineFnRunner.java       |  97 +++
 .../sdk/util/PerKeyCombineFnRunners.java      | 182 +++++
 .../dataflow/sdk/util/SystemReduceFn.java     |   9 +-
 .../dataflow/sdk/transforms/CombineTest.java  | 287 +++++++-
 22 files changed, 1784 insertions(+), 430 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/CombineFnBase.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/CombineWithContext.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombineContextFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunner.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunners.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 25084474114a9..4636c1f86f59b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -105,7 +105,6 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -974,7 +973,7 @@ public PCollectionView<OutputT> apply(PCollection<InputT> input) {
           combined.getWindowingStrategy(),
           transform.getInsertDefault(),
           transform.getInsertDefault()
-            ? transform.getCombineFn().apply(Collections.<InputT>emptyList()) : null,
+            ? transform.getCombineFn().defaultValue() : null,
           combined.getCoder());
       return combined
           .apply(ParDo.of(new WrapAsList<OutputT>()))
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index f6618f98393fb..bcbd3e7eb598b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -807,12 +807,13 @@ private <K, InputT, OutputT> void translateHelper(
               final Combine.GroupedValues<K, InputT, OutputT> transform,
               DataflowPipelineTranslator.TranslationContext context) {
             context.addStep(transform, "CombineValues");
-            context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
+            translateInputs(context.getInput(transform), transform.getSideInputs(), context);
 
             AppliedCombineFn<? super K, ? super InputT, ?, OutputT> fn =
                 transform.getAppliedFn(
                     context.getInput(transform).getPipeline().getCoderRegistry(),
-                    context.getInput(transform).getCoder());
+                context.getInput(transform).getCoder(),
+                context.getInput(transform).getWindowingStrategy());
 
             context.addEncodingInput(fn.getAccumulatorCoder());
             context.addInput(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 3109f2f6a2d7c..332a49603a394 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -40,6 +40,8 @@
 import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.MapAggregatorValues;
+import com.google.cloud.dataflow.sdk.util.PerKeyCombineFnRunner;
+import com.google.cloud.dataflow.sdk.util.PerKeyCombineFnRunners;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
@@ -240,7 +242,8 @@ private <K, InputT, AccumT, OutputT> PCollection<KV<K, OutputT>> applyTestCombin
       PCollection<KV<K, Iterable<InputT>>> input) {
 
     PCollection<KV<K, OutputT>> output = input
-        .apply(ParDo.of(TestCombineDoFn.create(transform, input, testSerializability, rand)));
+        .apply(ParDo.of(TestCombineDoFn.create(transform, input, testSerializability, rand))
+            .withSideInputs(transform.getSideInputs()));
 
     try {
       output.setCoder(transform.getDefaultOutputCoder(input));
@@ -261,7 +264,7 @@ private <K, InputT, AccumT, OutputT> PCollection<KV<K, OutputT>> applyTestCombin
    */
   public static class TestCombineDoFn<K, InputT, AccumT, OutputT>
       extends DoFn<KV<K, Iterable<InputT>>, KV<K, OutputT>> {
-    private final KeyedCombineFn<? super K, ? super InputT, AccumT, OutputT> fn;
+    private final PerKeyCombineFnRunner<? super K, ? super InputT, AccumT, OutputT> fnRunner;
     private final Coder<AccumT> accumCoder;
     private final boolean testSerializability;
     private final Random rand;
@@ -273,21 +276,21 @@ public static <K, InputT, AccumT, OutputT> TestCombineDoFn<K, InputT, AccumT, Ou
         Random rand) {
 
       AppliedCombineFn<? super K, ? super InputT, ?, OutputT> fn = transform.getAppliedFn(
-            input.getPipeline().getCoderRegistry(), input.getCoder());
+          input.getPipeline().getCoderRegistry(), input.getCoder(), input.getWindowingStrategy());
 
       return new TestCombineDoFn(
-          fn.getFn(),
+          PerKeyCombineFnRunners.create(fn.getFn()),
           fn.getAccumulatorCoder(),
           testSerializability,
           rand);
     }
 
     public TestCombineDoFn(
-        KeyedCombineFn<? super K, ? super InputT, AccumT, OutputT> fn,
+        PerKeyCombineFnRunner<? super K, ? super InputT, AccumT, OutputT> fnRunner,
         Coder<AccumT> accumCoder,
         boolean testSerializability,
         Random rand) {
-      this.fn = fn;
+      this.fnRunner = fnRunner;
       this.accumCoder = accumCoder;
       this.testSerializability = testSerializability;
       this.rand = rand;
@@ -302,15 +305,15 @@ public void processElement(ProcessContext c) throws Exception {
       Iterable<InputT> values = c.element().getValue();
       List<AccumT> groupedPostShuffle =
           ensureSerializableByCoder(ListCoder.of(accumCoder),
-              addInputsRandomly(fn, key, values, rand),
-              "After addInputs of KeyedCombineFn " + fn.toString());
+              addInputsRandomly(fnRunner, key, values, rand, c),
+              "After addInputs of KeyedCombineFn " + fnRunner.fn().toString());
       AccumT merged =
           ensureSerializableByCoder(accumCoder,
-              fn.mergeAccumulators(key, groupedPostShuffle),
-              "After mergeAccumulators of KeyedCombineFn " + fn.toString());
+            fnRunner.mergeAccumulators(key, groupedPostShuffle, c),
+            "After mergeAccumulators of KeyedCombineFn " + fnRunner.fn().toString());
       // Note: The serializability of KV<K, OutputT> is ensured by the
       // runner itself, since it's a transform output.
-      c.output(KV.of(key, fn.extractOutput(key, merged)));
+      c.output(KV.of(key, fnRunner.extractOutput(key, merged, c)));
     }
 
     /**
@@ -319,17 +322,18 @@ public void processElement(ProcessContext c) throws Exception {
      * <p>Visible for testing purposes only.
      */
     public static <K, AccumT, InputT> List<AccumT> addInputsRandomly(
-        KeyedCombineFn<? super K, ? super InputT, AccumT, ?> fn,
+        PerKeyCombineFnRunner<? super K, ? super InputT, AccumT, ?> fnRunner,
         K key,
         Iterable<InputT> values,
-        Random random) {
+        Random random,
+        DoFn<?, ?>.ProcessContext c) {
       List<AccumT> out = new ArrayList<AccumT>();
       int i = 0;
-      AccumT accumulator = fn.createAccumulator(key);
+      AccumT accumulator = fnRunner.createAccumulator(key, c);
       boolean hasInput = false;
 
       for (InputT value : values) {
-        accumulator = fn.addInput(key, accumulator, value);
+        accumulator = fnRunner.addInput(key, accumulator, value, c);
         hasInput = true;
 
         // For each index i, flip a 1/2^i weighted coin for whether to
@@ -340,10 +344,10 @@ public static <K, AccumT, InputT> List<AccumT> addInputsRandomly(
         // of the accumulators.
         if (i == 0 || random.nextInt(1 << Math.min(i, 30)) == 0) {
           if (i % 2 == 0) {
-            accumulator = fn.compact(key, accumulator);
+            accumulator = fnRunner.compact(key, accumulator, c);
           }
           out.add(accumulator);
-          accumulator = fn.createAccumulator(key);
+          accumulator = fnRunner.createAccumulator(key, c);
           hasInput = false;
         }
         i++;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
index 8b7b53c6e0ba3..2b381f15b574c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
@@ -21,20 +21,25 @@
 
 import com.google.api.services.dataflow.model.MultiOutputInfo;
 import com.google.api.services.dataflow.model.SideInputInfo;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
+import com.google.cloud.dataflow.sdk.util.PerKeyCombineFnRunner;
+import com.google.cloud.dataflow.sdk.util.PerKeyCombineFnRunners;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.util.SideInputReader;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.common.base.Preconditions;
 
 import java.util.Arrays;
@@ -64,16 +69,17 @@ public static class CombinePhase {
 
   static CombineValuesFn of(
       PipelineOptions options,
-      Combine.KeyedCombineFn<?, ?, ?, ?> combineFn,
+      AppliedCombineFn<?, ?, ?, ?> combineFn,
       String phase,
+      SideInputReader sideInputReader,
       String stepName,
       String transformName,
       DataflowExecutionContext<?> executionContext,
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler)
       throws Exception {
-    return new CombineValuesFn(options, combineFn, phase, stepName, transformName, executionContext,
-        addCounterMutator, stateSampler);
+    return new CombineValuesFn(options, combineFn, phase, sideInputReader, stepName, transformName,
+        executionContext, addCounterMutator, stateSampler);
   }
 
   /**
@@ -95,9 +101,6 @@ public ParDoFn create(
         StateSampler stateSampler)
             throws Exception {
 
-      Preconditions.checkArgument(
-          sideInputInfos == null || sideInputInfos.size() == 0,
-          "unexpected side inputs for CombineValuesFn");
       Preconditions.checkArgument(
           numOutputs == 1, "expected exactly one output for CombineValuesFn");
 
@@ -107,6 +110,9 @@ public ParDoFn create(
               "serialized user fn");
       Preconditions.checkArgument(deserializedFn instanceof AppliedCombineFn);
       AppliedCombineFn<?, ?, ?, ?> combineFn = (AppliedCombineFn<?, ?, ?, ?>) deserializedFn;
+      Iterable<PCollectionView<?>> sideInputViews = combineFn.getSideInputViews();
+      final SideInputReader sideInputReader =
+          executionContext.getSideInputReader(sideInputInfos, sideInputViews);
 
       // Get the combine phase, default to ALL. (The implementation
       // doesn't have to split the combiner).
@@ -114,8 +120,9 @@ public ParDoFn create(
 
       return CombineValuesFn.of(
           options,
-          combineFn.getFn(),
+          combineFn,
           phase,
+          sideInputReader,
           stepName,
           transformName,
           executionContext,
@@ -126,34 +133,61 @@ public ParDoFn create(
 
   @Override
   protected DoFnInfo<?, ?> getDoFnInfo() {
+    PerKeyCombineFnRunner<?, ?, ?, ?> combineFnRunner =
+        PerKeyCombineFnRunners.create(combineFn.getFn());
     DoFn<?, ?> doFn = null;
     switch (phase) {
       case CombinePhase.ALL:
-        doFn = new CombineValuesDoFn<>(combineFn);
+        doFn = new CombineValuesDoFn<>(combineFnRunner);
         break;
       case CombinePhase.ADD:
-        doFn = new AddInputsDoFn<>(combineFn);
+        doFn = new AddInputsDoFn<>(combineFnRunner);
         break;
       case CombinePhase.MERGE:
-        doFn = new MergeAccumulatorsDoFn<>(combineFn);
+        doFn = new MergeAccumulatorsDoFn<>(combineFnRunner);
         break;
       case CombinePhase.EXTRACT:
-        doFn = new ExtractOutputDoFn<>(combineFn);
+        doFn = new ExtractOutputDoFn<>(combineFnRunner);
         break;
       default:
         throw new IllegalArgumentException(
             "phase must be one of 'all', 'add', 'merge', 'extract'");
     }
-    return new DoFnInfo<>(doFn, null);
+
+    Coder inputCoder = null;
+    if (combineFn.getKvCoder() != null) {
+      switch (phase) {
+        case CombinePhase.ALL:
+          inputCoder = KvCoder.of(
+              combineFn.getKvCoder().getKeyCoder(),
+              IterableCoder.of(combineFn.getKvCoder().getValueCoder()));
+          break;
+        case CombinePhase.ADD:
+          inputCoder = combineFn.getKvCoder();
+          break;
+        case CombinePhase.MERGE:
+          inputCoder = KvCoder.of(
+              combineFn.getKvCoder().getKeyCoder(),
+              IterableCoder.of(combineFn.getAccumulatorCoder()));
+          break;
+        case CombinePhase.EXTRACT:
+          inputCoder =
+              KvCoder.of(combineFn.getKvCoder().getKeyCoder(), combineFn.getAccumulatorCoder());
+          break;
+      }
+    }
+    return new DoFnInfo<>(
+        doFn, combineFn.getWindowingStrategy(), combineFn.getSideInputViews(), inputCoder);
   }
 
   private final String phase;
-  private final Combine.KeyedCombineFn<?, ?, ?, ?> combineFn;
+  private final AppliedCombineFn<?, ?, ?, ?> combineFn;
 
   private CombineValuesFn(
       PipelineOptions options,
-      Combine.KeyedCombineFn<?, ?, ?, ?> combineFn,
+      AppliedCombineFn<?, ?, ?, ?> combineFn,
       String phase,
+      SideInputReader sideInputReader,
       String stepName,
       String transformName,
       DataflowExecutionContext<?> executionContext,
@@ -161,7 +195,7 @@ private CombineValuesFn(
       StateSampler stateSampler) {
     super(
         options,
-        NullSideInputReader.empty(),
+        sideInputReader,
         Arrays.asList("output"),
         stepName,
         transformName,
@@ -178,11 +212,10 @@ private CombineValuesFn(
    */
   private static class CombineValuesDoFn<K, InputT, OutputT>
       extends DoFn<KV<K, Iterable<InputT>>, KV<K, OutputT>>{
-    private final Combine.KeyedCombineFn<K, InputT, ?, OutputT> combineFn;
+    private final PerKeyCombineFnRunner<K, InputT, ?, OutputT> combinefnRunner;
 
-    private CombineValuesDoFn(
-        Combine.KeyedCombineFn<K, InputT, ?, OutputT> combineFn) {
-      this.combineFn = combineFn;
+    private CombineValuesDoFn(PerKeyCombineFnRunner<K, InputT, ?, OutputT> combinefnRunner) {
+      this.combinefnRunner = combinefnRunner;
     }
 
     @Override
@@ -190,7 +223,7 @@ public void processElement(ProcessContext c) {
       KV<K, Iterable<InputT>> kv = c.element();
       K key = kv.getKey();
 
-      c.output(KV.of(key, this.combineFn.apply(key, kv.getValue())));
+      c.output(KV.of(key, this.combinefnRunner.apply(key, kv.getValue(), c)));
     }
   }
 
@@ -199,22 +232,17 @@ public void processElement(ProcessContext c) {
    */
   private static class AddInputsDoFn<K, InputT, AccumT>
       extends DoFn<KV<K, Iterable<InputT>>, KV<K, AccumT>>{
-    private final Combine.KeyedCombineFn<K, InputT, AccumT, ?> combineFn;
+    private final PerKeyCombineFnRunner<K, InputT, AccumT, ?> combinefnRunner;
 
-    private AddInputsDoFn(
-        Combine.KeyedCombineFn<K, InputT, AccumT, ?> combineFn) {
-      this.combineFn = combineFn;
+    private AddInputsDoFn(PerKeyCombineFnRunner<K, InputT, AccumT, ?> combinefnRunner) {
+      this.combinefnRunner = combinefnRunner;
     }
 
     @Override
     public void processElement(ProcessContext c) {
       KV<K, Iterable<InputT>> kv = c.element();
       K key = kv.getKey();
-      AccumT accum = this.combineFn.createAccumulator(key);
-      for (InputT input : kv.getValue()) {
-        accum = this.combineFn.addInput(key, accum, input);
-      }
-
+      AccumT accum = combinefnRunner.addInputs(key, kv.getValue(), c);
       c.output(KV.of(key, accum));
     }
   }
@@ -224,19 +252,17 @@ public void processElement(ProcessContext c) {
    */
   private static class MergeAccumulatorsDoFn<K, AccumT>
       extends DoFn<KV<K, Iterable<AccumT>>, KV<K, AccumT>>{
-    private final Combine.KeyedCombineFn<K, ?, AccumT, ?> combineFn;
+    private final PerKeyCombineFnRunner<K, ?, AccumT, ?> combinefnRunner;
 
-    private MergeAccumulatorsDoFn(
-        Combine.KeyedCombineFn<K, ?, AccumT, ?> combineFn) {
-      this.combineFn = combineFn;
+    private MergeAccumulatorsDoFn(PerKeyCombineFnRunner<K, ?, AccumT, ?> combinefnRunner) {
+      this.combinefnRunner = combinefnRunner;
     }
 
     @Override
     public void processElement(ProcessContext c) {
       KV<K, Iterable<AccumT>> kv = c.element();
       K key = kv.getKey();
-      AccumT accum = this.combineFn.mergeAccumulators(key, kv.getValue());
-
+      AccumT accum = this.combinefnRunner.mergeAccumulators(key, kv.getValue(), c);
       c.output(KV.of(key, accum));
     }
   }
@@ -246,19 +272,17 @@ public void processElement(ProcessContext c) {
    */
   private static class ExtractOutputDoFn<K, AccumT, OutputT>
       extends DoFn<KV<K, AccumT>, KV<K, OutputT>>{
-    private final Combine.KeyedCombineFn<K, ?, AccumT, OutputT> combineFn;
+    private final PerKeyCombineFnRunner<K, ?, AccumT, OutputT> combinefnRunner;
 
-    private ExtractOutputDoFn(
-        Combine.KeyedCombineFn<K, ?, AccumT, OutputT> combineFn) {
-      this.combineFn = combineFn;
+    private ExtractOutputDoFn(PerKeyCombineFnRunner<K, ?, AccumT, OutputT> combinefnRunner) {
+      this.combinefnRunner = combinefnRunner;
     }
 
     @Override
     public void processElement(ProcessContext c) {
       KV<K, AccumT> kv = c.element();
       K key = kv.getKey();
-      OutputT output = this.combineFn.extractOutput(key, kv.getValue());
-
+      OutputT output = this.combinefnRunner.extractOutput(key, kv.getValue(), c);
       c.output(KV.of(key, output));
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java
index cd3ba394b5c67..da48470d619f3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java
@@ -17,27 +17,61 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import com.google.api.services.dataflow.model.SideInputInfo;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.util.BaseExecutionContext;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.common.collect.Iterables;
+
+import javax.annotation.Nullable;
 
 /**
  * Extensions to {@link BaseExecutionContext} specific to the Dataflow worker.
  */
 public abstract class DataflowExecutionContext<T extends ExecutionContext.StepContext>
     extends BaseExecutionContext<T> {
+
+  /**
+   * Returns a {@link SideInputReader} based on {@link SideInputInfo} descriptors
+   * and {@link PCollectionView PCollectionViews}.
+   *
+   * <p>If side input source metadata is provided by the service in
+   * {@link SideInputInfo sideInputInfos}, we request
+   * a {@link SideInputReader} from the {@code executionContext} using that info.
+   * If no side input source metadata is provided but the DoFn expects side inputs, as a
+   * fallback, we request a {@link SideInputReader} based only on the expected views.
+   *
+   * <p>These cases are not disjoint: Whenever a {@link DoFn} takes side inputs,
+   * {@code doFnInfo.getSideInputViews()} should be non-empty.
+   *
+   * <p>A note on the behavior of the Dataflow service: Today, the first case corresponds to
+   * batch mode, while the fallback corresponds to streaming mode.
+   */
+  public SideInputReader getSideInputReader(
+      @Nullable Iterable<? extends SideInputInfo> sideInputInfos,
+      @Nullable Iterable<? extends PCollectionView<?>> views) throws Exception {
+    if (sideInputInfos != null && sideInputInfos.iterator().hasNext()) {
+      return getSideInputReader(sideInputInfos);
+    } else if (views != null && Iterables.size(views) > 0) {
+      return getSideInputReaderForViews(views);
+    } else {
+      return NullSideInputReader.empty();
+    }
+  }
+
   /**
    * Returns a {@link SideInputReader} for all the side inputs described in the given
    * {@link SideInputInfo} descriptors.
    */
-  public abstract SideInputReader getSideInputReader(
+  protected abstract SideInputReader getSideInputReader(
       Iterable<? extends SideInputInfo> sideInputInfos) throws Exception;
 
   /**
    * Returns a {@link SideInputReader} for all the provided views, where the execution context
    * itself knows how to read data for the view.
    */
-  public abstract SideInputReader getSideInputReaderForViews(
+  protected abstract SideInputReader getSideInputReaderForViews(
       Iterable<? extends PCollectionView<?>> views) throws Exception;
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 65c3f41728f33..200bfe2a24fde 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -432,7 +432,7 @@ public DataflowWorkerExecutionContext(
     }
 
     @Override
-    public SideInputReader getSideInputReader(Iterable<? extends SideInputInfo> sideInputInfos)
+    protected SideInputReader getSideInputReader(Iterable<? extends SideInputInfo> sideInputInfos)
       throws Exception {
       return CachingSideInputReader.of(
           DataflowSideInputReader.of(sideInputInfos, options, this),
@@ -440,7 +440,7 @@ public SideInputReader getSideInputReader(Iterable<? extends SideInputInfo> side
     }
 
     @Override
-    public SideInputReader getSideInputReaderForViews(
+    protected SideInputReader getSideInputReaderForViews(
         Iterable<? extends PCollectionView<?>> sideInputViews) {
       throw new UnsupportedOperationException(
         "Cannot call getSideInputReaderForViews for batch DataflowWorker: "
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index 8203b08b0886a..ec7dd8ee5a5e2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -31,6 +31,7 @@
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.worker.CombineValuesFn.CombinePhase;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.RequiresContextInternal;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
@@ -138,6 +139,10 @@ public ParDoFn create(
 
       @Nullable AppliedCombineFn<?, ?, ?, ?> maybeMergingCombineFn = null;
       if (combineFn != null) {
+        Preconditions.checkState(
+            !(combineFn.getFn() instanceof RequiresContextInternal),
+            "Combiner lifting is not supported for combine functions with contexts: %s",
+            combineFn.getFn().getClass().getName());
         String phase = getString(cloudUserFn, PropertyNames.PHASE, CombinePhase.ALL);
         Preconditions.checkArgument(
             phase.equals(CombinePhase.ALL) || phase.equals(CombinePhase.MERGE),
@@ -195,7 +200,14 @@ public ParDoFn create(
 
   private static <K, AccumT> AppliedCombineFn<K, AccumT, List<AccumT>, AccumT>
   makeAppliedMergingFunction(AppliedCombineFn<K, ?, AccumT, ?> appliedFn) {
-    MergingKeyedCombineFn<K, AccumT> mergingCombineFn = new MergingKeyedCombineFn<>(appliedFn);
+    Preconditions.checkArgument(
+      !(appliedFn.getFn() instanceof RequiresContextInternal),
+      "Combiner lifting is not supported for combine functions with contexts: %s",
+      appliedFn.getFn().getClass().getName());
+    KeyedCombineFn<K, ?, AccumT, ?> keyedCombineFn =
+        ((KeyedCombineFn<K, ?, AccumT, ?>) appliedFn.getFn());
+    MergingKeyedCombineFn<K, AccumT> mergingCombineFn =
+        new MergingKeyedCombineFn<>(keyedCombineFn , appliedFn.getAccumulatorCoder());
     return AppliedCombineFn.<K, AccumT, List<AccumT>, AccumT>withAccumulatorCoder(
         mergingCombineFn, ListCoder.of(appliedFn.getAccumulatorCoder()));
   }
@@ -203,10 +215,13 @@ public ParDoFn create(
   static class MergingKeyedCombineFn<K, AccumT>
       extends KeyedCombineFn<K, AccumT, List<AccumT>, AccumT> {
 
-    final AppliedCombineFn<K, ?, AccumT, ?> appliedCombineFn;
+    private final KeyedCombineFn<K, ?, AccumT, ?> keyedCombineFn;
+    private final Coder<AccumT> accumCoder;
 
-    MergingKeyedCombineFn(AppliedCombineFn<K, ?, AccumT, ?> keyedCombineFn) {
-      this.appliedCombineFn = keyedCombineFn;
+    MergingKeyedCombineFn(
+      KeyedCombineFn<K, ?, AccumT, ?> keyedCombineFn, Coder<AccumT> accumCoder) {
+      this.keyedCombineFn = keyedCombineFn;
+      this.accumCoder = accumCoder;
     }
     @Override
     public List<AccumT> createAccumulator(K key) {
@@ -228,21 +243,21 @@ public List<AccumT> compact(K key, List<AccumT> accumulator) {
     @Override
     public AccumT extractOutput(K key, List<AccumT> accumulator) {
       if (accumulator.size() == 0) {
-        return appliedCombineFn.getFn().createAccumulator(key);
+        return keyedCombineFn.createAccumulator(key);
       } else {
-        return appliedCombineFn.getFn().mergeAccumulators(key, accumulator);
+        return keyedCombineFn.mergeAccumulators(key, accumulator);
       }
     }
     private List<AccumT> mergeToSingleton(K key, Iterable<AccumT> accumulators) {
       List<AccumT> singleton = new ArrayList<>();
-      singleton.add(appliedCombineFn.getFn().mergeAccumulators(key, accumulators));
+      singleton.add(keyedCombineFn.mergeAccumulators(key, accumulators));
       return singleton;
     }
 
     @Override
     public Coder<List<AccumT>> getAccumulatorCoder(CoderRegistry registry, Coder<K> keyCoder,
         Coder<AccumT> inputCoder) throws CannotProvideCoderException {
-      return ListCoder.of(appliedCombineFn.getAccumulatorCoder());
+      return ListCoder.of(accumCoder);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index d227e833eccaa..e4d099d1e0159 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static com.google.cloud.dataflow.sdk.util.Structs.getBytes;
+import static com.google.common.base.Preconditions.checkArgument;
 
 import com.google.api.services.dataflow.model.FlattenInstruction;
 import com.google.api.services.dataflow.model.InstructionInput;
@@ -31,6 +32,8 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.RequiresContextInternal;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
@@ -314,8 +317,8 @@ static PartialGroupByKeyOperation createPartialGroupByKeyOperation(
     return operation;
   }
 
-  @SuppressWarnings({"rawtypes", "unchecked"})
-  static ValueCombiner createValueCombiner(PartialGroupByKeyInstruction pgbk) throws Exception {
+  static ValueCombiner<?, ?, ?, ?> createValueCombiner(PartialGroupByKeyInstruction pgbk)
+      throws Exception {
     if (pgbk.getValueCombiningFn() == null) {
       return null;
     }
@@ -323,7 +326,12 @@ static ValueCombiner createValueCombiner(PartialGroupByKeyInstruction pgbk) thro
     Object deserializedFn = SerializableUtils.deserializeFromByteArray(
         getBytes(CloudObject.fromSpec(pgbk.getValueCombiningFn()), PropertyNames.SERIALIZED_FN),
         "serialized combine fn");
-    return new ValueCombiner(((AppliedCombineFn) deserializedFn).getFn());
+    AppliedCombineFn<?, ?, ?, ?> appliedCombineFn = (AppliedCombineFn<?, ?, ?, ?>) deserializedFn;
+    checkArgument(
+        !(appliedCombineFn.getFn() instanceof RequiresContextInternal),
+        "Combiner lifting is not supported for combine functions with contexts: %s",
+        appliedCombineFn.getFn().getClass().getName());
+    return new ValueCombiner<>(((KeyedCombineFn<?, ?, ?, ?>) appliedCombineFn.getFn()));
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
index 7155580c8309d..86d159f2817df 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
@@ -25,7 +25,6 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
@@ -33,7 +32,6 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.common.collect.Iterables;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -98,26 +96,9 @@ public ParDoFn create(
       }
       DoFnInfo<?, ?> doFnInfo = (DoFnInfo<?, ?>) deserializedFnInfo;
 
-      // If side input source metadata is provided by the service in sideInputInfos, we request
-      // a SideInputReader from the executionContext using that info.
-      //
-      // If no side input source metadata is provided but the DoFn expects side inputs, as a
-      // fallback, we request a SideInputReader based only on the expected views.
-      //
-      // These cases are not disjoint: Whenever a DoFn takes side inputs,
-      // doFnInfo.getSideInputViews() should be non-empty.
-      //
-      // A note on the behavior of the Dataflow service: Today, the first case corresponds to
-      // batch mode, while the fallback corresponds to streaming mode.
-      SideInputReader sideInputReader;
-      final Iterable<PCollectionView<?>> sideInputViews = doFnInfo.getSideInputViews();
-      if (sideInputInfos != null && !sideInputInfos.isEmpty()) {
-        sideInputReader = executionContext.getSideInputReader(sideInputInfos);
-      } else if (sideInputViews != null && Iterables.size(sideInputViews) > 0) {
-        sideInputReader = executionContext.getSideInputReaderForViews(sideInputViews);
-      } else {
-        sideInputReader = NullSideInputReader.empty();
-      }
+      Iterable<PCollectionView<?>> sideInputViews = doFnInfo.getSideInputViews();
+      SideInputReader sideInputReader =
+          executionContext.getSideInputReader(sideInputInfos, sideInputViews);
 
       List<String> outputTags = new ArrayList<>();
       if (multiOutputInfos != null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
index 116d3c6a7dc5d..71a477c13db44 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
@@ -113,7 +113,7 @@ public StepContext createStepContext(
   }
 
   @Override
-  public SideInputReader getSideInputReader(Iterable<? extends SideInputInfo> sideInputInfos) {
+  protected SideInputReader getSideInputReader(Iterable<? extends SideInputInfo> sideInputInfos) {
     throw new UnsupportedOperationException(
         "Cannot call getSideInputReader for StreamingDataflowWorker: "
         + "the MapTask specification should not have had any SideInputInfo descriptors "
@@ -121,7 +121,8 @@ public SideInputReader getSideInputReader(Iterable<? extends SideInputInfo> side
   }
 
   @Override
-  public SideInputReader getSideInputReaderForViews(Iterable<? extends PCollectionView<?>> views) {
+  protected SideInputReader getSideInputReaderForViews(
+      Iterable<? extends PCollectionView<?>> views) {
     return StreamingModeSideInputReader.of(views, this);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 16d12e2350499..607f6e6c8d21e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -13,7 +13,6 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-
 package com.google.cloud.dataflow.sdk.transforms;
 
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
@@ -27,10 +26,20 @@
 import com.google.cloud.dataflow.sdk.coders.StandardCoder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.transforms.CombineFnBase.AbstractGlobalCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.CombineFnBase.AbstractPerKeyCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.CombineFnBase.GlobalCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.CombineFnBase.PerKeyCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.CombineFnWithContext;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.Context;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.RequiresContextInternal;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
+import com.google.cloud.dataflow.sdk.util.PerKeyCombineFnRunner;
+import com.google.cloud.dataflow.sdk.util.PerKeyCombineFnRunners;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
@@ -43,8 +52,8 @@
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.cloud.dataflow.sdk.values.TupleTagList;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterables;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
@@ -53,12 +62,8 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
-import java.io.Serializable;
-import java.lang.reflect.Type;
-import java.lang.reflect.TypeVariable;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 import java.util.concurrent.ThreadLocalRandom;
@@ -97,10 +102,10 @@ public static <V> Globally<V, V> globally(
 
   /**
    * Returns a {@link Globally Combine.Globally} {@code PTransform}
-   * that uses the given {@code SerializableFunction} to combine all
+   * that uses the given {@code GloballyCombineFn} to combine all
    * the elements in each window of the input {@code PCollection} into a
    * single value in the output {@code PCollection}.  The types of the input
-   * elements and the output elements can differ
+   * elements and the output elements can differ.
    *
    * <p>If the input {@code PCollection} is windowed into {@link GlobalWindows},
    * a default value in the {@link GlobalWindow} will be output if the input
@@ -111,7 +116,7 @@ public static <V> Globally<V, V> globally(
    * <p>See {@link Globally Combine.Globally} for more information.
    */
   public static <InputT, OutputT> Globally<InputT, OutputT> globally(
-      CombineFn<? super InputT, ?, OutputT> fn) {
+      GlobalCombineFn<? super InputT, ?, OutputT> fn) {
     return new Globally<>(fn, true, 0);
   }
 
@@ -153,7 +158,7 @@ public static <K, V> PerKey<K, V, V> perKey(
    * <p>See {@link PerKey Combine.PerKey} for more information.
    */
   public static <K, InputT, OutputT> PerKey<K, InputT, OutputT> perKey(
-      CombineFn<? super InputT, ?, OutputT> fn) {
+      GlobalCombineFn<? super InputT, ?, OutputT> fn) {
     return perKey(fn.<K>asKeyedFn());
   }
 
@@ -174,7 +179,7 @@ public static <K, InputT, OutputT> PerKey<K, InputT, OutputT> perKey(
    * <p>See {@link PerKey Combine.PerKey} for more information.
    */
   public static <K, InputT, OutputT> PerKey<K, InputT, OutputT> perKey(
-      KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
+      PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
     return new PerKey<>(fn, false /*fewKeys*/);
   }
 
@@ -183,7 +188,7 @@ public static <K, InputT, OutputT> PerKey<K, InputT, OutputT> perKey(
    * in {@link GroupByKey}.
    */
   private static <K, InputT, OutputT> PerKey<K, InputT, OutputT> fewKeys(
-      KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
+      PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
     return new PerKey<>(fn, true /*fewKeys*/);
   }
 
@@ -230,12 +235,12 @@ public static <K, V> GroupedValues<K, V, V> groupedValues(
    *
    * <p>See {@link GroupedValues Combine.GroupedValues} for more information.
    *
-   * <p>Note that {@link #perKey(CombineFn)} is typically
+   * <p>Note that {@link #perKey(GlobalCombineFn)} is typically
    * more convenient to use than {@link GroupByKey} followed by
    * {@code groupedValues(...)}.
    */
   public static <K, InputT, OutputT> GroupedValues<K, InputT, OutputT> groupedValues(
-      CombineFn<? super InputT, ?, OutputT> fn) {
+      GlobalCombineFn<? super InputT, ?, OutputT> fn) {
     return groupedValues(fn.<K>asKeyedFn());
   }
 
@@ -256,12 +261,12 @@ public static <K, InputT, OutputT> GroupedValues<K, InputT, OutputT> groupedValu
    *
    * <p>See {@link GroupedValues Combine.GroupedValues} for more information.
    *
-   * <p>Note that {@link #perKey(KeyedCombineFn)} is typically
+   * <p>Note that {@link #perKey(PerKeyCombineFn)} is typically
    * more convenient to use than {@link GroupByKey} followed by
    * {@code groupedValues(...)}.
    */
   public static <K, InputT, OutputT> GroupedValues<K, InputT, OutputT> groupedValues(
-      KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
+      PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
     return new GroupedValues<>(fn);
   }
 
@@ -351,11 +356,11 @@ public static <K, InputT, OutputT> GroupedValues<K, InputT, OutputT> groupedValu
    * @param <AccumT> type of mutable accumulator values
    * @param <OutputT> type of output values
    */
-  public abstract static class CombineFn<InputT, AccumT, OutputT> implements Serializable {
+  public abstract static class CombineFn<InputT, AccumT, OutputT>
+      extends AbstractGlobalCombineFn<InputT, AccumT, OutputT> {
 
     /**
-     * Returns a new, mutable accumulator value, representing the
-     * accumulation of zero input values.
+     * Returns a new, mutable accumulator value, representing the accumulation of zero input values.
      */
     public abstract AccumT createAccumulator();
 
@@ -386,7 +391,8 @@ public abstract static class CombineFn<InputT, AccumT, OutputT> implements Seria
     /**
      * Returns an accumulator that represents the same logical value as the
      * input accumulator, but may have a more compact representation.
-     * For most CombineFns this would be a no-op, but should be overridden
+     *
+     * <p>For most CombineFns this would be a no-op, but should be overridden
      * by CombineFns that (for example) buffer up elements and combine
      * them in batches.
      *
@@ -414,6 +420,16 @@ public OutputT apply(Iterable<? extends InputT> inputs) {
       return extractOutput(accum);
     }
 
+    /**
+     * {@inheritDoc}
+     *
+     * <p>By default returns the extract output of an empty accumulator.
+     */
+    @Override
+    public OutputT defaultValue() {
+      return extractOutput(createAccumulator());
+    }
+
     /**
      * Returns a {@link TypeDescriptor} capturing what is known statically
      * about the output type of this {@code CombineFn} instance's
@@ -427,81 +443,8 @@ public TypeDescriptor<OutputT> getOutputType() {
       return new TypeDescriptor<OutputT>(getClass()) {};
     }
 
-    @SuppressWarnings("unchecked")
-    private TypeVariable<Class<CombineFn<?, ?, ?>>> getInputTVariable() {
-      return (TypeVariable<Class<CombineFn<?, ?, ?>>>)
-          new TypeDescriptor<InputT>(CombineFn.class) {}
-          .getType();
-    }
-
-    @SuppressWarnings("unchecked")
-    private TypeVariable<Class<CombineFn<?, ?, ?>>> getAccumTVariable() {
-      return (TypeVariable<Class<CombineFn<?, ?, ?>>>)
-          new TypeDescriptor<AccumT>(CombineFn.class) {}
-          .getType();
-    }
-
-    @SuppressWarnings("unchecked")
-    private TypeVariable<Class<CombineFn<?, ?, ?>>> getOutputTVariable() {
-      return (TypeVariable<Class<CombineFn<?, ?, ?>>>)
-          new TypeDescriptor<OutputT>(CombineFn.class) {}
-          .getType();
-    }
-
-    String getIncompatibleGlobalWindowErrorMessage() {
-      return "Default values are not supported in Combine.globally() if the output "
-          + "PCollection is not windowed by GlobalWindows. Instead, use "
-          + "Combine.globally().withoutDefaults() to output an empty PCollection if the input "
-          + "PCollection is empty, or Combine.globally().asSingletonView() to get the default "
-          + "output of the CombineFn if the input PCollection is empty.";
-    }
-
-    /**
-     * Returns the {@code Coder} to use for accumulator {@code AccumT}
-     * values, or null if it is not able to be inferred.
-     *
-     * <p>By default, uses the knowledge of the {@code Coder} being used
-     * for {@code InputT} values and the enclosing {@code Pipeline}'s
-     * {@code CoderRegistry} to try to infer the Coder for {@code AccumT}
-     * values.
-     *
-     * <p>This is the Coder used to send data through a communication-intensive
-     * shuffle step, so a compact and efficient representation may have
-     * significant performance benefits.
-     */
-    public Coder<AccumT> getAccumulatorCoder(
-        CoderRegistry registry, Coder<InputT> inputCoder) throws CannotProvideCoderException {
-      return registry.getDefaultCoder(getClass(), CombineFn.class,
-          ImmutableMap.<Type, Coder<?>>of(getInputTVariable(), inputCoder),
-          getAccumTVariable());
-    }
-
-    /**
-     * Returns the {@code Coder} to use by default for output
-     * {@code OutputT} values, or null if it is not able to be inferred.
-     *
-     * <p>By default, uses the knowledge of the {@code Coder} being
-     * used for input {@code InputT} values and the enclosing
-     * {@code Pipeline}'s {@code CoderRegistry} to try to infer the
-     * Coder for {@code OutputT} values.
-     */
-    public Coder<OutputT> getDefaultOutputCoder(
-        CoderRegistry registry, Coder<InputT> inputCoder) throws CannotProvideCoderException {
-      return registry.getDefaultCoder(getClass(), CombineFn.class,
-          ImmutableMap.<Type, Coder<?>>of(
-              getInputTVariable(), inputCoder,
-              getAccumTVariable(), getAccumulatorCoder(registry, inputCoder)),
-          getOutputTVariable());
-    }
-
-    /**
-     * Converts this {@code CombineFn} into an equivalent
-     * {@link KeyedCombineFn} that ignores the keys passed to it and
-     * combines the values according to this {@code CombineFn}.
-     *
-     * @param <K> the type of the (ignored) keys
-     */
     @SuppressWarnings({"unchecked", "rawtypes"})
+    @Override
     public <K> KeyedCombineFn<K, InputT, AccumT, OutputT> asKeyedFn() {
       // The key, an object, is never even looked at.
       return new KeyedCombineFn<K, InputT, AccumT, OutputT>() {
@@ -1112,11 +1055,9 @@ public final OutputT extractOutput(AccumT accumulator) {
    * @param <OutputT> type of output values
    */
   public abstract static class KeyedCombineFn<K, InputT, AccumT, OutputT>
-      implements Serializable {
-
+      extends AbstractPerKeyCombineFn<K, InputT, AccumT, OutputT> {
     /**
-     * Returns a new, mutable accumulator value representing the
-     * accumulation of zero input values.
+     * Returns a new, mutable accumulator value representing the accumulation of zero input values.
      *
      * @param key the key that all the accumulated values using the
      * accumulator are associated with
@@ -1124,8 +1065,7 @@ public abstract static class KeyedCombineFn<K, InputT, AccumT, OutputT>
     public abstract AccumT createAccumulator(K key);
 
     /**
-     * Adds the given input value to the given accumulator, returning the
-     * new accumulator value.
+     * Adds the given input value to the given accumulator, returning the new accumulator value.
      *
      * <p>For efficiency, the input accumulator may be modified and returned.
      *
@@ -1159,7 +1099,8 @@ public abstract static class KeyedCombineFn<K, InputT, AccumT, OutputT>
     /**
      * Returns an accumulator that represents the same logical value as the
      * input accumulator, but may have a more compact representation.
-     * For most CombineFns this would be a no-op, but should be overridden
+     *
+     * <p>For most CombineFns this would be a no-op, but should be overridden
      * by CombineFns that (for example) buffer up elements and combine
      * them in batches.
      *
@@ -1171,9 +1112,7 @@ public AccumT compact(K key, AccumT accumulator) {
       return accumulator;
     }
 
-    /**
-     * Returns the a regular {@link CombineFn} that operates on a specific key.
-     */
+    @Override
     public CombineFn<InputT, AccumT, OutputT> forKey(final K key, final Coder<K> keyCoder) {
       return new CombineFn<InputT, AccumT, OutputT>() {
 
@@ -1230,80 +1169,8 @@ public OutputT apply(K key, Iterable<? extends InputT> inputs) {
       }
       return extractOutput(key, accum);
     }
-
-    /**
-     * Returns the {@code Coder} to use for accumulator {@code AccumT}
-     * values, or null if it is not able to be inferred.
-     *
-     * <p>By default, uses the knowledge of the {@code Coder} being
-     * used for {@code K} keys and input {@code InputT} values and the
-     * enclosing {@code Pipeline}'s {@code CoderRegistry} to try to
-     * infer the Coder for {@code AccumT} values.
-     *
-     * <p>This is the Coder used to send data through a communication-intensive
-     * shuffle step, so a compact and efficient representation may have
-     * significant performance benefits.
-     */
-    public Coder<AccumT> getAccumulatorCoder(
-        CoderRegistry registry, Coder<K> keyCoder, Coder<InputT> inputCoder)
-        throws CannotProvideCoderException {
-      return registry.getDefaultCoder(getClass(), KeyedCombineFn.class,
-          ImmutableMap.<Type, Coder<?>>of(
-              getKTypeVariable(), keyCoder,
-              getInputTVariable(), inputCoder),
-          getAccumTVariable());
-    }
-
-    /**
-     * Returns the {@code Coder} to use by default for output
-     * {@code OutputT} values, or null if it is not able to be inferred.
-     *
-     * <p>By default, uses the knowledge of the {@code Coder} being
-     * used for {@code K} keys and input {@code InputT} values and the
-     * enclosing {@code Pipeline}'s {@code CoderRegistry} to try to
-     * infer the Coder for {@code OutputT} values.
-     */
-    public Coder<OutputT> getDefaultOutputCoder(
-        CoderRegistry registry, Coder<K> keyCoder, Coder<InputT> inputCoder)
-        throws CannotProvideCoderException {
-      return registry.getDefaultCoder(getClass(), KeyedCombineFn.class,
-          ImmutableMap.<Type, Coder<?>>of(
-              getKTypeVariable(), keyCoder,
-              getInputTVariable(), inputCoder,
-              getAccumTVariable(), getAccumulatorCoder(registry, keyCoder, inputCoder)),
-          getOutputTVariable());
-    }
-
-    @SuppressWarnings("unchecked")
-    private TypeVariable<Class<KeyedCombineFn<?, ?, ?, ?>>> getKTypeVariable() {
-      return (TypeVariable<Class<KeyedCombineFn<?, ?, ?, ?>>>)
-          new TypeDescriptor<K>(KeyedCombineFn.class) {}
-          .getType();
-    }
-
-    @SuppressWarnings("unchecked")
-    private TypeVariable<Class<KeyedCombineFn<?, ?, ?, ?>>> getInputTVariable() {
-      return (TypeVariable<Class<KeyedCombineFn<?, ?, ?, ?>>>)
-          new TypeDescriptor<InputT>(KeyedCombineFn.class) {}
-          .getType();
-    }
-
-    @SuppressWarnings("unchecked")
-    private TypeVariable<Class<KeyedCombineFn<?, ?, ?, ?>>> getAccumTVariable() {
-      return (TypeVariable<Class<KeyedCombineFn<?, ?, ?, ?>>>)
-          new TypeDescriptor<AccumT>(KeyedCombineFn.class) {}
-          .getType();
-    }
-
-    @SuppressWarnings("unchecked")
-    private TypeVariable<Class<KeyedCombineFn<?, ?, ?, ?>>> getOutputTVariable() {
-      return (TypeVariable<Class<KeyedCombineFn<?, ?, ?, ?>>>)
-          new TypeDescriptor<OutputT>(KeyedCombineFn.class) {}
-          .getType();
-    }
   }
 
-
   ////////////////////////////////////////////////////////////////////////////
 
   /**
@@ -1349,22 +1216,35 @@ public Coder<OutputT> getDefaultOutputCoder(
   public static class Globally<InputT, OutputT>
       extends PTransform<PCollection<InputT>, PCollection<OutputT>> {
 
-    private final CombineFn<? super InputT, ?, OutputT> fn;
+    private final GlobalCombineFn<? super InputT, ?, OutputT> fn;
     private final boolean insertDefault;
     private final int fanout;
+    private final List<PCollectionView<?>> sideInputs;
 
-    private Globally(CombineFn<? super InputT, ?, OutputT> fn, boolean insertDefault, int fanout) {
+    private Globally(GlobalCombineFn<? super InputT, ?, OutputT> fn,
+        boolean insertDefault, int fanout) {
       this.fn = fn;
       this.insertDefault = insertDefault;
       this.fanout = fanout;
+      this.sideInputs = ImmutableList.<PCollectionView<?>>of();
     }
 
-    private Globally(
-        String name, CombineFn<? super InputT, ?, OutputT> fn, boolean insertDefault, int fanout) {
+    private Globally(String name, GlobalCombineFn<? super InputT, ?, OutputT> fn,
+        boolean insertDefault, int fanout) {
       super(name);
       this.fn = fn;
       this.insertDefault = insertDefault;
       this.fanout = fanout;
+      this.sideInputs = ImmutableList.<PCollectionView<?>>of();
+    }
+
+    private Globally(String name, GlobalCombineFn<? super InputT, ?, OutputT> fn,
+        boolean insertDefault, int fanout, List<PCollectionView<?>> sideInputs) {
+      super(name);
+      this.fn = fn;
+      this.insertDefault = insertDefault;
+      this.fanout = fanout;
+      this.sideInputs = sideInputs;
     }
 
     /**
@@ -1406,18 +1286,34 @@ public Globally<InputT, OutputT> withFanout(int fanout) {
       return new Globally<>(name, fn, insertDefault, fanout);
     }
 
+    /**
+     * Returns a {@link PTransform} identical to this, but with the specified side inputs to use
+     * in {@link CombineFnWithContext}.
+     */
+    public Globally<InputT, OutputT> withSideInputs(
+        Iterable<? extends PCollectionView<?>> sideInputs) {
+      Preconditions.checkState(fn instanceof RequiresContextInternal);
+      return new Globally<InputT, OutputT>(name, fn, insertDefault, fanout,
+          ImmutableList.<PCollectionView<?>>copyOf(sideInputs));
+    }
+
     @Override
     public PCollection<OutputT> apply(PCollection<InputT> input) {
       PCollection<KV<Void, InputT>> withKeys = input
           .apply(WithKeys.<Void, InputT>of((Void) null))
           .setCoder(KvCoder.of(VoidCoder.of(), input.getCoder()));
 
+      Combine.PerKey<Void, InputT, OutputT> combine =
+          Combine.<Void, InputT, OutputT>fewKeys(fn.asKeyedFn());
+      if (!sideInputs.isEmpty()) {
+        combine = combine.withSideInputs(sideInputs);
+      }
+
       PCollection<KV<Void, OutputT>> combined;
       if (fanout >= 2) {
-        combined = withKeys.apply(
-            Combine.<Void, InputT, OutputT>fewKeys(fn.<Void>asKeyedFn()).withHotKeyFanout(fanout));
+        combined = withKeys.apply(combine.withHotKeyFanout(fanout));
       } else {
-        combined = withKeys.apply(Combine.<Void, InputT, OutputT>fewKeys(fn.<Void>asKeyedFn()));
+        combined = withKeys.apply(combine);
       }
 
       PCollection<OutputT> output = combined.apply(Values.<OutputT>create());
@@ -1436,6 +1332,8 @@ private PCollection<OutputT> insertDefaultValueIfEmpty(PCollection<OutputT> mayb
       final PCollectionView<Iterable<OutputT>> maybeEmptyView = maybeEmpty.apply(
           View.<OutputT>asIterable());
 
+
+      final OutputT defaultValue = fn.defaultValue();
       PCollection<OutputT> defaultIfEmpty = maybeEmpty.getPipeline()
           .apply("CreateVoid", Create.of((Void) null).withCoder(VoidCoder.of()))
           .apply(ParDo.named("ProduceDefault").withSideInputs(maybeEmptyView).of(
@@ -1444,7 +1342,7 @@ private PCollection<OutputT> insertDefaultValueIfEmpty(PCollection<OutputT> mayb
                 public void processElement(DoFn<Void, OutputT>.ProcessContext c) {
                   Iterator<OutputT> combined = c.sideInput(maybeEmptyView).iterator();
                   if (!combined.hasNext()) {
-                    c.output(fn.apply(Collections.<InputT>emptyList()));
+                    c.output(defaultValue);
                   }
                 }
               }))
@@ -1498,12 +1396,12 @@ public void processElement(DoFn<Void, OutputT>.ProcessContext c) {
   public static class GloballyAsSingletonView<InputT, OutputT>
       extends PTransform<PCollection<InputT>, PCollectionView<OutputT>> {
 
-    private final CombineFn<? super InputT, ?, OutputT> fn;
+    private final GlobalCombineFn<? super InputT, ?, OutputT> fn;
     private final boolean insertDefault;
     private final int fanout;
 
     private GloballyAsSingletonView(
-        CombineFn<? super InputT, ?, OutputT> fn, boolean insertDefault, int fanout) {
+        GlobalCombineFn<? super InputT, ?, OutputT> fn, boolean insertDefault, int fanout) {
       this.fn = fn;
       this.insertDefault = insertDefault;
       this.fanout = fanout;
@@ -1511,14 +1409,16 @@ private GloballyAsSingletonView(
 
     @Override
     public PCollectionView<OutputT> apply(PCollection<InputT> input) {
-      PCollection<OutputT> combined =
-          input.apply(Combine.<InputT, OutputT>globally(fn).withoutDefaults().withFanout(fanout));
+      Globally<InputT, OutputT> combineGlobally =
+          Combine.<InputT, OutputT>globally(fn).withoutDefaults().withFanout(fanout);
       if (insertDefault) {
-        return combined
-            .apply(View.<OutputT>asSingleton().withDefaultValue(
-                fn.apply(Collections.<InputT>emptyList())));
+        return input
+            .apply(combineGlobally)
+            .apply(View.<OutputT>asSingleton().withDefaultValue(fn.defaultValue()));
       } else {
-        return combined.apply(View.<OutputT>asSingleton());
+        return input
+            .apply(combineGlobally)
+            .apply(View.<OutputT>asSingleton());
       }
     }
 
@@ -1530,7 +1430,7 @@ public boolean getInsertDefault() {
       return insertDefault;
     }
 
-    public CombineFn<? super InputT, ?, OutputT> getCombineFn() {
+    public GlobalCombineFn<? super InputT, ?, OutputT> getCombineFn() {
       return fn;
     }
   }
@@ -1684,20 +1584,33 @@ protected SimpleCombineFn(SerializableFunction<Iterable<V>, V> combiner) {
   public static class PerKey<K, InputT, OutputT>
     extends PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> {
 
-    private final transient KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn;
+    private final transient PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn;
     private final boolean fewKeys;
+    private final List<PCollectionView<?>> sideInputs;
+
+    private PerKey(
+        PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn, boolean fewKeys) {
+      this.fn = fn;
+      this.fewKeys = fewKeys;
+      this.sideInputs = ImmutableList.of();
+    }
 
-    private PerKey(KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn, boolean fewKeys) {
+    private PerKey(String name,
+        PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn,
+        boolean fewKeys, List<PCollectionView<?>> sideInputs) {
+      super(name);
       this.fn = fn;
       this.fewKeys = fewKeys;
+      this.sideInputs = sideInputs;
     }
 
     private PerKey(
-        String name, KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn,
+        String name, PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn,
         boolean fewKeys) {
       super(name);
       this.fn = fn;
       this.fewKeys = fewKeys;
+      this.sideInputs = ImmutableList.of();
     }
 
     /**
@@ -1708,6 +1621,17 @@ public PerKey<K, InputT, OutputT> named(String name) {
       return new PerKey<K, InputT, OutputT>(name, fn, fewKeys);
     }
 
+    /**
+     * Returns a {@link PTransform} identical to this, but with the specified side inputs to use
+     * in {@link KeyedCombineFnWithContext}.
+     */
+    public PerKey<K, InputT, OutputT> withSideInputs(
+        Iterable<? extends PCollectionView<?>> sideInputs) {
+      Preconditions.checkState(fn instanceof RequiresContextInternal);
+      return new PerKey<K, InputT, OutputT>(name, fn, fewKeys,
+          ImmutableList.<PCollectionView<?>>copyOf(sideInputs));
+    }
+
     /**
      * If a single key has disproportionately many values, it may become a
      * bottleneck, especially in streaming mode.  This returns a new per-key
@@ -1739,17 +1663,36 @@ public Integer apply(K unused) {
     }
 
     /**
-     * Returns the KeyedCombineFn used by this Combine operation.
+     * Returns the {@link PerKeyCombineFn} used by this Combine operation.
      */
-    public KeyedCombineFn<? super K, ? super InputT, ?, OutputT> getFn() {
+    public PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> getFn() {
       return fn;
     }
 
+    /**
+     * Returns the side inputs used by this Combine operation.
+     */
+    public List<PCollectionView<?>> getSideInputs() {
+      return sideInputs;
+    }
+
     @Override
     public PCollection<KV<K, OutputT>> apply(PCollection<KV<K, InputT>> input) {
-      return input
-        .apply(GroupByKey.<K, InputT>create(fewKeys))
-        .apply(Combine.<K, InputT, OutputT>groupedValues(fn));
+      if (fn instanceof RequiresContextInternal) {
+        return input
+            .apply(GroupByKey.<K, InputT>create(fewKeys))
+            .apply(ParDo.of(new DoFn<KV<K, Iterable<InputT>>, KV<K, Iterable<InputT>>>() {
+              @Override
+              public void processElement(ProcessContext c) throws Exception {
+                c.output(c.element());
+              }
+            }))
+            .apply(Combine.<K, InputT, OutputT>groupedValues(fn).withSideInputs(sideInputs));
+      } else {
+        return input
+            .apply(GroupByKey.<K, InputT>create(fewKeys))
+            .apply(Combine.<K, InputT, OutputT>groupedValues(fn).withSideInputs(sideInputs));
+      }
     }
   }
 
@@ -1759,11 +1702,11 @@ public PCollection<KV<K, OutputT>> apply(PCollection<KV<K, InputT>> input) {
   public static class PerKeyWithHotKeyFanout<K, InputT, OutputT>
       extends PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> {
 
-    private final transient KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn;
+    private final transient PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn;
     private final SerializableFunction<? super K, Integer> hotKeyFanout;
 
     private PerKeyWithHotKeyFanout(String name,
-        KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn,
+        PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn,
         SerializableFunction<? super K, Integer> hotKeyFanout) {
       super(name);
       this.fn = fn;
@@ -1779,8 +1722,8 @@ private <AccumT> PCollection<KV<K, OutputT>> applyHelper(PCollection<KV<K, Input
 
       // Name the accumulator type.
       @SuppressWarnings("unchecked")
-      final KeyedCombineFn<K, InputT, AccumT, OutputT> fn =
-          (KeyedCombineFn<K, InputT, AccumT, OutputT>) this.fn;
+      final PerKeyCombineFn<K, InputT, AccumT, OutputT> typedFn =
+          (PerKeyCombineFn<K, InputT, AccumT, OutputT>) this.fn;
 
       if (!(input.getCoder() instanceof KvCoder)) {
         throw new IllegalStateException(
@@ -1792,7 +1735,7 @@ private <AccumT> PCollection<KV<K, OutputT>> applyHelper(PCollection<KV<K, Input
       final Coder<AccumT> accumCoder;
 
       try {
-        accumCoder = fn.getAccumulatorCoder(
+        accumCoder = typedFn.getAccumulatorCoder(
             input.getPipeline().getCoderRegistry(),
             inputCoder.getKeyCoder(), inputCoder.getValueCoder());
       } catch (CannotProvideCoderException e) {
@@ -1807,79 +1750,172 @@ private <AccumT> PCollection<KV<K, OutputT>> applyHelper(PCollection<KV<K, Input
       // set of values, then drop the nonce and do a final combine of the
       // aggregates.  We do this by splitting the original CombineFn into two,
       // on that does addInput + merge and another that does merge + extract.
-      KeyedCombineFn<KV<K, Integer>, InputT, AccumT, AccumT> hotPreCombine =
-          new KeyedCombineFn<KV<K, Integer>, InputT, AccumT, AccumT>() {
-            @Override
-            public AccumT createAccumulator(KV<K, Integer> key) {
-              return fn.createAccumulator(key.getKey());
-            }
-            @Override
-            public AccumT addInput(KV<K, Integer> key, AccumT accumulator, InputT value) {
-              return fn.addInput(key.getKey(), accumulator, value);
-            }
-            @Override
-            public AccumT mergeAccumulators(
-                KV<K, Integer> key, Iterable<AccumT> accumulators) {
-              return fn.mergeAccumulators(key.getKey(), accumulators);
-            }
-            @Override
-            public AccumT compact(KV<K, Integer> key, AccumT accumulator) {
-              return fn.compact(key.getKey(), accumulator);
-            }
-            @Override
-            public AccumT extractOutput(KV<K, Integer> key, AccumT accumulator) {
-              return accumulator;
-            }
-            @Override
-            @SuppressWarnings("unchecked")
-            public Coder<AccumT> getAccumulatorCoder(
-                CoderRegistry registry, Coder<KV<K, Integer>> keyCoder, Coder<InputT> inputCoder)
-                throws CannotProvideCoderException {
-              return accumCoder;
-            }
-      };
+      PerKeyCombineFn<KV<K, Integer>, InputT, AccumT, AccumT> hotPreCombine;
+      PerKeyCombineFn<K, InputOrAccum<InputT, AccumT>, AccumT, OutputT> postCombine;
+      if (!(typedFn instanceof RequiresContextInternal)) {
+        final KeyedCombineFn<K, InputT, AccumT, OutputT> keyedFn =
+            (KeyedCombineFn<K, InputT, AccumT, OutputT>) typedFn;
+        hotPreCombine =
+            new KeyedCombineFn<KV<K, Integer>, InputT, AccumT, AccumT>() {
+              @Override
+              public AccumT createAccumulator(KV<K, Integer> key) {
+                return keyedFn.createAccumulator(key.getKey());
+              }
+              @Override
+              public AccumT addInput(KV<K, Integer> key, AccumT accumulator, InputT value) {
+                return keyedFn.addInput(key.getKey(), accumulator, value);
+              }
+              @Override
+              public AccumT mergeAccumulators(
+                  KV<K, Integer> key, Iterable<AccumT> accumulators) {
+                return keyedFn.mergeAccumulators(key.getKey(), accumulators);
+              }
+              @Override
+              public AccumT compact(KV<K, Integer> key, AccumT accumulator) {
+                return keyedFn.compact(key.getKey(), accumulator);
+              }
+              @Override
+              public AccumT extractOutput(KV<K, Integer> key, AccumT accumulator) {
+                return accumulator;
+              }
+              @Override
+              @SuppressWarnings("unchecked")
+              public Coder<AccumT> getAccumulatorCoder(
+                  CoderRegistry registry, Coder<KV<K, Integer>> keyCoder, Coder<InputT> inputCoder)
+                  throws CannotProvideCoderException {
+                return accumCoder;
+              }
+            };
+        postCombine =
+            new KeyedCombineFn<K, InputOrAccum<InputT, AccumT>, AccumT, OutputT>() {
+              @Override
+              public AccumT createAccumulator(K key) {
+                return keyedFn.createAccumulator(key);
+              }
+              @Override
+              public AccumT addInput(
+                  K key, AccumT accumulator, InputOrAccum<InputT, AccumT> value) {
+                if (value.accum == null) {
+                  return keyedFn.addInput(key, accumulator, value.input);
+                } else {
+                  return keyedFn.mergeAccumulators(key, ImmutableList.of(accumulator, value.accum));
+                }
+              }
+              @Override
+              public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators) {
+                return keyedFn.mergeAccumulators(key, accumulators);
+              }
+              @Override
+              public AccumT compact(K key, AccumT accumulator) {
+                return keyedFn.compact(key, accumulator);
+              }
+              @Override
+              public OutputT extractOutput(K key, AccumT accumulator) {
+                return keyedFn.extractOutput(key, accumulator);
+              }
+              @Override
+              public Coder<OutputT> getDefaultOutputCoder(
+                  CoderRegistry registry,
+                  Coder<K> keyCoder,
+                  Coder<InputOrAccum<InputT, AccumT>> accumulatorCoder)
+                  throws CannotProvideCoderException {
+                return keyedFn.getDefaultOutputCoder(
+                    registry, keyCoder, inputCoder.getValueCoder());
+              }
 
-      KeyedCombineFn<K, InputOrAccum<InputT, AccumT>, AccumT, OutputT> postCombine =
-          new KeyedCombineFn<K, InputOrAccum<InputT, AccumT>, AccumT, OutputT>() {
-            @Override
-            public AccumT createAccumulator(K key) {
-              return fn.createAccumulator(key);
-            }
-            @Override
-            public AccumT addInput(K key, AccumT accumulator, InputOrAccum<InputT, AccumT> value) {
-              if (value.accum == null) {
-                return fn.addInput(key, accumulator, value.input);
-              } else {
-                return fn.mergeAccumulators(key, ImmutableList.of(accumulator, value.accum));
+              @Override
+              public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<K> keyCoder,
+                  Coder<InputOrAccum<InputT, AccumT>> inputCoder)
+                      throws CannotProvideCoderException {
+                return accumCoder;
+              }
+            };
+      } else {
+        final KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> keyedFnWithContext =
+            (KeyedCombineFnWithContext<K, InputT, AccumT, OutputT>) typedFn;
+        hotPreCombine =
+            new KeyedCombineFnWithContext<KV<K, Integer>, InputT, AccumT, AccumT>() {
+              @Override
+              public AccumT createAccumulator(KV<K, Integer> key, Context c) {
+                return keyedFnWithContext.createAccumulator(key.getKey(), c);
               }
-            }
-            @Override
-            public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators) {
-              return fn.mergeAccumulators(key, accumulators);
-            }
-            @Override
-            public AccumT compact(K key, AccumT accumulator) {
-              return fn.compact(key, accumulator);
-            }
-            @Override
-            public OutputT extractOutput(K key, AccumT accumulator) {
-              return fn.extractOutput(key, accumulator);
-            }
-            @Override
-            public Coder<OutputT> getDefaultOutputCoder(
-                CoderRegistry registry,
-                Coder<K> keyCoder,
-                Coder<InputOrAccum<InputT, AccumT>> accumulatorCoder)
-                throws CannotProvideCoderException {
-              return fn.getDefaultOutputCoder(registry, keyCoder, inputCoder.getValueCoder());
-            }
 
-            @Override
-            public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<K> keyCoder,
-                Coder<InputOrAccum<InputT, AccumT>> inputCoder) throws CannotProvideCoderException {
-              return accumCoder;
-            }
-      };
+              @Override
+              public AccumT addInput(
+                  KV<K, Integer> key, AccumT accumulator, InputT value, Context c) {
+                return keyedFnWithContext.addInput(key.getKey(), accumulator, value, c);
+              }
+
+              @Override
+              public AccumT mergeAccumulators(
+                  KV<K, Integer> key, Iterable<AccumT> accumulators, Context c) {
+                return keyedFnWithContext.mergeAccumulators(key.getKey(), accumulators, c);
+              }
+
+              @Override
+              public AccumT compact(KV<K, Integer> key, AccumT accumulator, Context c) {
+                return keyedFnWithContext.compact(key.getKey(), accumulator, c);
+              }
+
+              @Override
+              public AccumT extractOutput(KV<K, Integer> key, AccumT accumulator, Context c) {
+                return accumulator;
+              }
+
+              @Override
+              @SuppressWarnings("unchecked")
+              public Coder<AccumT> getAccumulatorCoder(
+                  CoderRegistry registry, Coder<KV<K, Integer>> keyCoder, Coder<InputT> inputCoder)
+                  throws CannotProvideCoderException {
+                return accumCoder;
+              }
+            };
+        postCombine =
+            new KeyedCombineFnWithContext<K, InputOrAccum<InputT, AccumT>, AccumT, OutputT>() {
+              @Override
+              public AccumT createAccumulator(K key, Context c) {
+                return keyedFnWithContext.createAccumulator(key, c);
+              }
+              @Override
+              public AccumT addInput(
+                  K key, AccumT accumulator, InputOrAccum<InputT, AccumT> value, Context c) {
+                if (value.accum == null) {
+                  return keyedFnWithContext.addInput(key, accumulator, value.input, c);
+                } else {
+                  return keyedFnWithContext.mergeAccumulators(
+                      key, ImmutableList.of(accumulator, value.accum), c);
+                }
+              }
+              @Override
+              public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators, Context c) {
+                return keyedFnWithContext.mergeAccumulators(key, accumulators, c);
+              }
+              @Override
+              public AccumT compact(K key, AccumT accumulator, Context c) {
+                return keyedFnWithContext.compact(key, accumulator, c);
+              }
+              @Override
+              public OutputT extractOutput(K key, AccumT accumulator, Context c) {
+                return keyedFnWithContext.extractOutput(key, accumulator, c);
+              }
+              @Override
+              public Coder<OutputT> getDefaultOutputCoder(
+                  CoderRegistry registry,
+                  Coder<K> keyCoder,
+                  Coder<InputOrAccum<InputT, AccumT>> accumulatorCoder)
+                  throws CannotProvideCoderException {
+                return keyedFnWithContext.getDefaultOutputCoder(
+                    registry, keyCoder, inputCoder.getValueCoder());
+              }
+
+              @Override
+              public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<K> keyCoder,
+                  Coder<InputOrAccum<InputT, AccumT>> inputCoder)
+                  throws CannotProvideCoderException {
+                return accumCoder;
+              }
+            };
+      }
 
       // Use the provided hotKeyFanout fn to split into "hot" and "cold" keys,
       // augmenting the hot keys with a nonce.
@@ -2096,29 +2132,50 @@ public static class GroupedValues<K, InputT, OutputT>
                         <PCollection<? extends KV<K, ? extends Iterable<InputT>>>,
                          PCollection<KV<K, OutputT>>> {
 
-    private final KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn;
+    private final PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn;
+    private final List<PCollectionView<?>> sideInputs;
+
+    private GroupedValues(PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
+      this.fn = SerializableUtils.clone(fn);
+      this.sideInputs = ImmutableList.<PCollectionView<?>>of();
+    }
 
-    private GroupedValues(KeyedCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
+    private GroupedValues(
+        PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn,
+        List<PCollectionView<?>> sideInputs) {
       this.fn = SerializableUtils.clone(fn);
+      this.sideInputs = sideInputs;
+    }
+
+    public GroupedValues<K, InputT, OutputT> withSideInputs(
+        Iterable<? extends PCollectionView<?>> sideInputs) {
+      return new GroupedValues<>(fn, ImmutableList.<PCollectionView<?>>copyOf(sideInputs));
     }
 
     /**
      * Returns the KeyedCombineFn used by this Combine operation.
      */
-    public KeyedCombineFn<? super K, ? super InputT, ?, OutputT> getFn() {
+    public PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> getFn() {
       return fn;
     }
 
+    public List<PCollectionView<?>> getSideInputs() {
+      return sideInputs;
+    }
+
     @Override
     public PCollection<KV<K, OutputT>> apply(
         PCollection<? extends KV<K, ? extends Iterable<InputT>>> input) {
 
+      final PerKeyCombineFnRunner<? super K, ? super InputT, ?, OutputT> combineFnRunner =
+          PerKeyCombineFnRunners.create(fn);
       PCollection<KV<K, OutputT>> output = input.apply(ParDo.of(
           new DoFn<KV<K, ? extends Iterable<InputT>>, KV<K, OutputT>>() {
             @Override
             public void processElement(ProcessContext c) {
               K key = c.element().getKey();
-              c.output(KV.of(key, fn.apply(key, c.element().getValue())));
+
+              c.output(KV.of(key, combineFnRunner.apply(key, c.element().getValue(), c)));
             }
           }));
 
@@ -2138,9 +2195,11 @@ public void processElement(ProcessContext c) {
      * <p>For internal use.
      */
     public AppliedCombineFn<? super K, ? super InputT, ?, OutputT> getAppliedFn(
-        CoderRegistry registry, Coder<? extends KV<K, ? extends Iterable<InputT>>> inputCoder) {
+        CoderRegistry registry, Coder<? extends KV<K, ? extends Iterable<InputT>>> inputCoder,
+        WindowingStrategy<?, ?> windowingStrategy) {
       KvCoder<K, InputT> kvCoder = getKvCoder(inputCoder);
-      return AppliedCombineFn.withInputCoder(fn, registry, kvCoder);
+      return AppliedCombineFn.withInputCoder(
+          fn, registry, kvCoder, sideInputs, windowingStrategy);
     }
 
     private KvCoder<K, InputT> getKvCoder(
@@ -2170,7 +2229,8 @@ public Coder<KV<K, OutputT>> getDefaultOutputCoder(
         throws CannotProvideCoderException {
       KvCoder<K, InputT> kvCoder = getKvCoder(input.getCoder());
       @SuppressWarnings("unchecked")
-      Coder<OutputT> outputValueCoder = ((KeyedCombineFn<K, InputT, ?, OutputT>) fn)
+      Coder<OutputT> outputValueCoder =
+          ((PerKeyCombineFn<K, InputT, ?, OutputT>) fn)
           .getDefaultOutputCoder(
               input.getPipeline().getCoderRegistry(),
               kvCoder.getKeyCoder(), kvCoder.getValueCoder());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/CombineFnBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/CombineFnBase.java
new file mode 100644
index 0000000000000..a0b06cf1fc29e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/CombineFnBase.java
@@ -0,0 +1,283 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.CombineFnWithContext;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.collect.ImmutableMap;
+
+import java.io.Serializable;
+import java.lang.reflect.Type;
+import java.lang.reflect.TypeVariable;
+
+/**
+ * This class contains the shared interfaces and abstract classes for different types of combine
+ * functions.
+ *
+ * <p>Users should not implement or extend them directly.
+ */
+public class CombineFnBase {
+  /**
+   * A {@code GloballyCombineFn<InputT, AccumT, OutputT>} specifies how to combine a
+   * collection of input values of type {@code InputT} into a single
+   * output value of type {@code OutputT}.  It does this via one or more
+   * intermediate mutable accumulator values of type {@code AccumT}.
+   *
+   * <p>Do not implement this interface directly.
+   * Extends {@link CombineFn} and {@link CombineFnWithContext} instead.
+   *
+   * @param <InputT> type of input values
+   * @param <AccumT> type of mutable accumulator values
+   * @param <OutputT> type of output values
+   */
+  public interface GlobalCombineFn<InputT, AccumT, OutputT> extends Serializable {
+
+    /**
+     * Returns the {@code Coder} to use for accumulator {@code AccumT}
+     * values, or null if it is not able to be inferred.
+     *
+     * <p>By default, uses the knowledge of the {@code Coder} being used
+     * for {@code InputT} values and the enclosing {@code Pipeline}'s
+     * {@code CoderRegistry} to try to infer the Coder for {@code AccumT}
+     * values.
+     *
+     * <p>This is the Coder used to send data through a communication-intensive
+     * shuffle step, so a compact and efficient representation may have
+     * significant performance benefits.
+     */
+    public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<InputT> inputCoder)
+        throws CannotProvideCoderException;
+
+    /**
+     * Returns the {@code Coder} to use by default for output
+     * {@code OutputT} values, or null if it is not able to be inferred.
+     *
+     * <p>By default, uses the knowledge of the {@code Coder} being
+     * used for input {@code InputT} values and the enclosing
+     * {@code Pipeline}'s {@code CoderRegistry} to try to infer the
+     * Coder for {@code OutputT} values.
+     */
+    public Coder<OutputT> getDefaultOutputCoder(CoderRegistry registry, Coder<InputT> inputCoder)
+        throws CannotProvideCoderException;
+
+    /**
+     * Returns the error message for not supported default values in Combine.globally().
+     */
+    public String getIncompatibleGlobalWindowErrorMessage();
+
+    /**
+     * Returns the default value when there are no values added to the accumulator.
+     */
+    public OutputT defaultValue();
+
+    /**
+     * Converts this {@code GloballyCombineFn} into an equivalent
+     * {@link PerKeyCombineFn} that ignores the keys passed to it and
+     * combines the values according to this {@code GloballyCombineFn}.
+     *
+     * @param <K> the type of the (ignored) keys
+     */
+    public <K> PerKeyCombineFn<K, InputT, AccumT, OutputT> asKeyedFn();
+  }
+
+  /**
+   * A {@code PerKeyCombineFn<K, InputT, AccumT, OutputT>} specifies how to combine
+   * a collection of input values of type {@code InputT}, associated with
+   * a key of type {@code K}, into a single output value of type
+   * {@code OutputT}.  It does this via one or more intermediate mutable
+   * accumulator values of type {@code AccumT}.
+   *
+   * <p>Do not implement this interface directly.
+   * Extends {@link KeyedCombineFn} and {@link KeyedCombineFnWithContext} instead.
+   *
+   * @param <K> type of keys
+   * @param <InputT> type of input values
+   * @param <AccumT> type of mutable accumulator values
+   * @param <OutputT> type of output values
+   */
+  public interface PerKeyCombineFn<K, InputT, AccumT, OutputT> extends Serializable {
+    /**
+     * Returns the {@code Coder} to use for accumulator {@code AccumT}
+     * values, or null if it is not able to be inferred.
+     *
+     * <p>By default, uses the knowledge of the {@code Coder} being
+     * used for {@code K} keys and input {@code InputT} values and the
+     * enclosing {@code Pipeline}'s {@code CoderRegistry} to try to
+     * infer the Coder for {@code AccumT} values.
+     *
+     * <p>This is the Coder used to send data through a communication-intensive
+     * shuffle step, so a compact and efficient representation may have
+     * significant performance benefits.
+     */
+    public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<K> keyCoder,
+        Coder<InputT> inputCoder) throws CannotProvideCoderException;
+
+    /**
+     * Returns the {@code Coder} to use by default for output
+     * {@code OutputT} values, or null if it is not able to be inferred.
+     *
+     * <p>By default, uses the knowledge of the {@code Coder} being
+     * used for {@code K} keys and input {@code InputT} values and the
+     * enclosing {@code Pipeline}'s {@code CoderRegistry} to try to
+     * infer the Coder for {@code OutputT} values.
+     */
+    public Coder<OutputT> getDefaultOutputCoder(CoderRegistry registry, Coder<K> keyCoder,
+        Coder<InputT> inputCoder) throws CannotProvideCoderException;
+
+    /**
+     * Returns the a regular {@link GlobalCombineFn} that operates on a specific key.
+     */
+    public abstract GlobalCombineFn<InputT, AccumT, OutputT> forKey(
+        final K key, final Coder<K> keyCoder);
+  }
+
+  /**
+   * An abstract {@link GlobalCombineFn} base class shared by
+   * {@link CombineFn} and {@link CombineFnWithContext}.
+   *
+   * <p>Do not extend this class directly.
+   * Extends {@link CombineFn} and {@link CombineFnWithContext} instead.
+   *
+   * @param <InputT> type of input values
+   * @param <AccumT> type of mutable accumulator values
+   * @param <OutputT> type of output values
+   */
+  abstract static class AbstractGlobalCombineFn<InputT, AccumT, OutputT>
+      implements GlobalCombineFn<InputT, AccumT, OutputT>, Serializable {
+    private static final String INCOMPATIBLE_GLOBAL_WINDOW_ERROR_MESSAGE =
+        "Default values are not supported in Combine.globally() if the output "
+        + "PCollection is not windowed by GlobalWindows. Instead, use "
+        + "Combine.globally().withoutDefaults() to output an empty PCollection if the input "
+        + "PCollection is empty, or Combine.globally().asSingletonView() to get the default "
+        + "output of the CombineFn if the input PCollection is empty.";
+
+    @Override
+    public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<InputT> inputCoder)
+        throws CannotProvideCoderException {
+      return registry.getDefaultCoder(getClass(), AbstractGlobalCombineFn.class,
+          ImmutableMap.<Type, Coder<?>>of(getInputTVariable(), inputCoder), getAccumTVariable());
+    }
+
+    @Override
+    public Coder<OutputT> getDefaultOutputCoder(CoderRegistry registry, Coder<InputT> inputCoder)
+        throws CannotProvideCoderException {
+      return registry.getDefaultCoder(getClass(), AbstractGlobalCombineFn.class,
+          ImmutableMap.<Type, Coder<?>>of(getInputTVariable(), inputCoder, getAccumTVariable(),
+              this.getAccumulatorCoder(registry, inputCoder)),
+          getOutputTVariable());
+    }
+
+    @Override
+    public String getIncompatibleGlobalWindowErrorMessage() {
+      return INCOMPATIBLE_GLOBAL_WINDOW_ERROR_MESSAGE;
+    }
+
+    /**
+     * Returns the {@link TypeVariable} of {@code InputT}.
+     */
+    public TypeVariable<?> getInputTVariable() {
+      return (TypeVariable<?>)
+          new TypeDescriptor<InputT>(AbstractGlobalCombineFn.class) {}.getType();
+    }
+
+    /**
+     * Returns the {@link TypeVariable} of {@code AccumT}.
+     */
+    public TypeVariable<?> getAccumTVariable() {
+      return (TypeVariable<?>)
+          new TypeDescriptor<AccumT>(AbstractGlobalCombineFn.class) {}.getType();
+    }
+
+    /**
+     * Returns the {@link TypeVariable} of {@code OutputT}.
+     */
+    public TypeVariable<?> getOutputTVariable() {
+      return (TypeVariable<?>)
+          new TypeDescriptor<OutputT>(AbstractGlobalCombineFn.class) {}.getType();
+    }
+  }
+
+  /**
+   * An abstract {@link PerKeyCombineFn} base class shared by
+   * {@link KeyedCombineFn} and {@link KeyedCombineFnWithContext}.
+   *
+   * <p>Do not extends this class directly.
+   * Extends {@link KeyedCombineFn} and {@link KeyedCombineFnWithContext} instead.
+   *
+   * @param <K> type of keys
+   * @param <InputT> type of input values
+   * @param <AccumT> type of mutable accumulator values
+   * @param <OutputT> type of output values
+   */
+  abstract static class AbstractPerKeyCombineFn<K, InputT, AccumT, OutputT>
+      implements PerKeyCombineFn<K, InputT, AccumT, OutputT> {
+    @Override
+    public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<K> keyCoder,
+        Coder<InputT> inputCoder) throws CannotProvideCoderException {
+      return registry.getDefaultCoder(getClass(), AbstractPerKeyCombineFn.class,
+          ImmutableMap.<Type, Coder<?>>of(
+              getKTypeVariable(), keyCoder, getInputTVariable(), inputCoder),
+          getAccumTVariable());
+    }
+
+    @Override
+    public Coder<OutputT> getDefaultOutputCoder(CoderRegistry registry, Coder<K> keyCoder,
+        Coder<InputT> inputCoder) throws CannotProvideCoderException {
+      return registry.getDefaultCoder(getClass(), AbstractPerKeyCombineFn.class,
+          ImmutableMap.<Type, Coder<?>>of(getKTypeVariable(), keyCoder, getInputTVariable(),
+              inputCoder, getAccumTVariable(),
+              this.getAccumulatorCoder(registry, keyCoder, inputCoder)),
+          getOutputTVariable());
+    }
+
+    /**
+     * Returns the {@link TypeVariable} of {@code K}.
+     */
+    public TypeVariable<?> getKTypeVariable() {
+      return (TypeVariable<?>) new TypeDescriptor<K>(AbstractPerKeyCombineFn.class) {}.getType();
+    }
+
+    /**
+     * Returns the {@link TypeVariable} of {@code InputT}.
+     */
+    public TypeVariable<?> getInputTVariable() {
+      return (TypeVariable<?>)
+          new TypeDescriptor<InputT>(AbstractPerKeyCombineFn.class) {}.getType();
+    }
+
+    /**
+     * Returns the {@link TypeVariable} of {@code AccumT}.
+     */
+    public TypeVariable<?> getAccumTVariable() {
+      return (TypeVariable<?>)
+          new TypeDescriptor<AccumT>(AbstractPerKeyCombineFn.class) {}.getType();
+    }
+
+    /**
+     * Returns the {@link TypeVariable} of {@code OutputT}.
+     */
+    public TypeVariable<?> getOutputTVariable() {
+      return (TypeVariable<?>)
+          new TypeDescriptor<OutputT>(AbstractPerKeyCombineFn.class) {}.getType();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/CombineWithContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/CombineWithContext.java
new file mode 100644
index 0000000000000..fdf56e33c04fe
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/CombineWithContext.java
@@ -0,0 +1,277 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+
+/**
+ * This class contains combine functions that have access to {@code PipelineOptions} and side inputs
+ * through {@code CombineWithContext.Context}.
+ *
+ * <p>{@link CombineFnWithContext} and {@link KeyedCombineFnWithContext} are for users to extend.
+ */
+public class CombineWithContext {
+
+  /**
+   * Information accessible to all methods in {@code CombineFnWithContext}
+   * and {@code KeyedCombineFnWithContext}.
+   */
+  public abstract static class Context {
+    /**
+     * Returns the {@code PipelineOptions} specified with the
+     * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner}
+     * invoking this {@code KeyedCombineFn}.
+     */
+    public abstract PipelineOptions getPipelineOptions();
+
+    /**
+     * Returns the value of the side input for the window corresponding to the
+     * window of the main input element.
+     */
+    public abstract <T> T sideInput(PCollectionView<T> view);
+  }
+
+  /**
+   * An internal interface for signaling that a {@code GloballyCombineFn}
+   * or a {@code PerKeyCombineFn} needs to access {@code CombineWithContext.Context}.
+   *
+   * <p>For internal use only.
+   */
+  public interface RequiresContextInternal {}
+
+  /**
+   * A combine function that has access to {@code PipelineOptions} and side inputs through
+   * {@code CombineWithContext.Context}.
+   *
+   * See the equivalent {@link CombineFn} for details about combine functions.
+   */
+  public abstract static class CombineFnWithContext<InputT, AccumT, OutputT>
+      extends CombineFnBase.AbstractGlobalCombineFn<InputT, AccumT, OutputT>
+      implements RequiresContextInternal {
+    /**
+     * Returns a new, mutable accumulator value, representing the accumulation of zero input values.
+     *
+     * <p>It is equivalent to {@link CombineFn#createAccumulator}, but it has additional access to
+     * {@code CombineWithContext.Context}.
+     */
+    public abstract AccumT createAccumulator(Context c);
+
+    /**
+     * Adds the given input value to the given accumulator, returning the
+     * new accumulator value.
+     *
+     * <p>It is equivalent to {@link CombineFn#addInput}, but it has additional access to
+     * {@code CombineWithContext.Context}.
+     */
+    public abstract AccumT addInput(AccumT accumulator, InputT input, Context c);
+
+    /**
+     * Returns an accumulator representing the accumulation of all the
+     * input values accumulated in the merging accumulators.
+     *
+     * <p>It is equivalent to {@link CombineFn#mergeAccumulators}, but it has additional access to
+     * {@code CombineWithContext.Context}.
+     */
+    public abstract AccumT mergeAccumulators(Iterable<AccumT> accumulators, Context c);
+
+    /**
+     * Returns the output value that is the result of combining all
+     * the input values represented by the given accumulator.
+     *
+     * <p>It is equivalent to {@link CombineFn#extractOutput}, but it has additional access to
+     * {@code CombineWithContext.Context}.
+     */
+    public abstract OutputT extractOutput(AccumT accumulator, Context c);
+
+    /**
+     * Returns an accumulator that represents the same logical value as the
+     * input accumulator, but may have a more compact representation.
+     *
+     * <p>It is equivalent to {@link CombineFn#compact}, but it has additional access to
+     * {@code CombineWithContext.Context}.
+     */
+    public AccumT compact(AccumT accumulator, Context c) {
+      return accumulator;
+    }
+
+    @Override
+    public OutputT defaultValue() {
+      throw new UnsupportedOperationException(
+          "Override this function to provide the default value.");
+    }
+
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    @Override
+    public <K> KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> asKeyedFn() {
+      // The key, an object, is never even looked at.
+      return new KeyedCombineFnWithContext<K, InputT, AccumT, OutputT>() {
+        @Override
+        public AccumT createAccumulator(K key, Context c) {
+          return CombineFnWithContext.this.createAccumulator(c);
+        }
+
+        @Override
+        public AccumT addInput(K key, AccumT accumulator, InputT input, Context c) {
+          return CombineFnWithContext.this.addInput(accumulator, input, c);
+        }
+
+        @Override
+        public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators, Context c) {
+          return CombineFnWithContext.this.mergeAccumulators(accumulators, c);
+        }
+
+        @Override
+        public OutputT extractOutput(K key, AccumT accumulator, Context c) {
+          return CombineFnWithContext.this.extractOutput(accumulator, c);
+        }
+
+        @Override
+        public AccumT compact(K key, AccumT accumulator, Context c) {
+          return CombineFnWithContext.this.compact(accumulator, c);
+        }
+
+        @Override
+        public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<K> keyCoder,
+            Coder<InputT> inputCoder) throws CannotProvideCoderException {
+          return CombineFnWithContext.this.getAccumulatorCoder(registry, inputCoder);
+        }
+
+        @Override
+        public Coder<OutputT> getDefaultOutputCoder(CoderRegistry registry, Coder<K> keyCoder,
+            Coder<InputT> inputCoder) throws CannotProvideCoderException {
+          return CombineFnWithContext.this.getDefaultOutputCoder(registry, inputCoder);
+        }
+
+        @Override
+        public CombineFnWithContext<InputT, AccumT, OutputT> forKey(K key, Coder<K> keyCoder) {
+          return CombineFnWithContext.this;
+        }
+      };
+    }
+  }
+
+  /**
+   * A keyed combine function that has access to {@code PipelineOptions} and side inputs through
+   * {@code CombineWithContext.Context}.
+   *
+   * See the equivalent {@link KeyedCombineFn} for details about keyed combine functions.
+   */
+  public abstract static class KeyedCombineFnWithContext<K, InputT, AccumT, OutputT>
+      extends CombineFnBase.AbstractPerKeyCombineFn<K, InputT, AccumT, OutputT>
+      implements RequiresContextInternal {
+    /**
+     * Returns a new, mutable accumulator value representing the accumulation of zero input values.
+     *
+     * <p>It is equivalent to {@link KeyedCombineFn#createAccumulator},
+     * but it has additional access to {@code CombineWithContext.Context}.
+     */
+    public abstract AccumT createAccumulator(K key, Context c);
+
+    /**
+     * Adds the given input value to the given accumulator, returning the new accumulator value.
+     *
+     * <p>It is equivalent to {@link KeyedCombineFn#addInput}, but it has additional access to
+     * {@code CombineWithContext.Context}.
+     */
+    public abstract AccumT addInput(K key, AccumT accumulator, InputT value, Context c);
+
+    /**
+     * Returns an accumulator representing the accumulation of all the
+     * input values accumulated in the merging accumulators.
+     *
+     * <p>It is equivalent to {@link KeyedCombineFn#mergeAccumulators},
+     * but it has additional access to {@code CombineWithContext.Context}..
+     */
+    public abstract AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators, Context c);
+
+    /**
+     * Returns the output value that is the result of combining all
+     * the input values represented by the given accumulator.
+     *
+     * <p>It is equivalent to {@link KeyedCombineFn#extractOutput}, but it has additional access to
+     * {@code CombineWithContext.Context}.
+     */
+    public abstract OutputT extractOutput(K key, AccumT accumulator, Context c);
+
+    /**
+     * Returns an accumulator that represents the same logical value as the
+     * input accumulator, but may have a more compact representation.
+     *
+     * <p>It is equivalent to {@link KeyedCombineFn#compact}, but it has additional access to
+     * {@code CombineWithContext.Context}.
+     */
+    public AccumT compact(K key, AccumT accumulator, Context c) {
+      return accumulator;
+    }
+
+    /**
+     * Applies this {@code KeyedCombineFnWithContext} to a key and a collection
+     * of input values to produce a combined output value.
+     */
+    public OutputT apply(K key, Iterable<? extends InputT> inputs, Context c) {
+      AccumT accum = createAccumulator(key, c);
+      for (InputT input : inputs) {
+        accum = addInput(key, accum, input, c);
+      }
+      return extractOutput(key, accum, c);
+    }
+
+    @Override
+    public CombineFnWithContext<InputT, AccumT, OutputT> forKey(
+        final K key, final Coder<K> keyCoder) {
+      return new CombineFnWithContext<InputT, AccumT, OutputT>() {
+        @Override
+        public AccumT createAccumulator(Context c) {
+          return KeyedCombineFnWithContext.this.createAccumulator(key, c);
+        }
+
+        @Override
+        public AccumT addInput(AccumT accumulator, InputT input, Context c) {
+          return KeyedCombineFnWithContext.this.addInput(key, accumulator, input, c);
+        }
+
+        @Override
+        public AccumT mergeAccumulators(Iterable<AccumT> accumulators, Context c) {
+          return KeyedCombineFnWithContext.this.mergeAccumulators(key, accumulators, c);
+        }
+
+        @Override
+        public OutputT extractOutput(AccumT accumulator, Context c) {
+          return KeyedCombineFnWithContext.this.extractOutput(key, accumulator, c);
+        }
+
+        @Override
+        public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<InputT> inputCoder)
+            throws CannotProvideCoderException {
+          return KeyedCombineFnWithContext.this.getAccumulatorCoder(registry, keyCoder, inputCoder);
+        }
+
+        @Override
+        public Coder<OutputT> getDefaultOutputCoder(
+            CoderRegistry registry, Coder<InputT> inputCoder) throws CannotProvideCoderException {
+          return KeyedCombineFnWithContext.this.getDefaultOutputCoder(
+              registry, keyCoder, inputCoder);
+        }
+      };
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
index b5af914b8d66b..98fe53c0a8471 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
@@ -391,7 +391,7 @@ public Coder<BoundedHeap<T, ComparatorT>> getAccumulatorCoder(
     }
 
     @Override
-    String getIncompatibleGlobalWindowErrorMessage() {
+    public String getIncompatibleGlobalWindowErrorMessage() {
       return "Default values are not supported in Top.[of, smallest, largest]() if the output "
           + "PCollection is not windowed by GlobalWindows. Instead, use "
           + "Top.[of, smallest, largest]().withoutDefaults() to output an empty PCollection if the"
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppliedCombineFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppliedCombineFn.java
index 66da713526fe5..512d72def90b9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppliedCombineFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppliedCombineFn.java
@@ -19,13 +19,16 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.CombineFnBase.PerKeyCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.common.annotations.VisibleForTesting;
 
 import java.io.Serializable;
 
 /**
- * A {@link KeyedCombineFn} with a fixed accumulator coder. This is created from a specific
- * application of the {@link KeyedCombineFn}.
+ * A {@link KeyedCombineFnWithContext} with a fixed accumulator coder. This is created from a
+ * specific application of the {@link KeyedCombineFnWithContext}.
  *
  *  <p>Because the {@code AccumT} may reference {@code InputT}, the specific {@code Coder<AccumT>}
  *  may depend on the {@code Coder<InputT>}.
@@ -37,46 +40,91 @@
  */
 public class AppliedCombineFn<K, InputT, AccumT, OutputT> implements Serializable {
 
-  private final KeyedCombineFn<K, InputT, AccumT, OutputT> fn;
+  private final PerKeyCombineFn<K, InputT, AccumT, OutputT> fn;
   private final Coder<AccumT> accumulatorCoder;
 
-  private AppliedCombineFn(
-      KeyedCombineFn<K, InputT, AccumT, OutputT> fn, Coder<AccumT> accumulatorCoder) {
+  private final Iterable<PCollectionView<?>> sideInputViews;
+  private final KvCoder<K, InputT> kvCoder;
+  private final WindowingStrategy<?, ?> windowingStrategy;
+
+  private AppliedCombineFn(PerKeyCombineFn<K, InputT, AccumT, OutputT> fn,
+      Coder<AccumT> accumulatorCoder, Iterable<PCollectionView<?>> sideInputViews,
+      KvCoder<K, InputT> kvCoder, WindowingStrategy<?, ?> windowingStrategy) {
     this.fn = fn;
     this.accumulatorCoder = accumulatorCoder;
+    this.sideInputViews = sideInputViews;
+    this.kvCoder = kvCoder;
+    this.windowingStrategy = windowingStrategy;
+  }
+
+  public static <K, InputT, AccumT, OutputT> AppliedCombineFn<K, InputT, AccumT, OutputT>
+      withAccumulatorCoder(
+          PerKeyCombineFn<? super K, ? super InputT, AccumT, OutputT> fn,
+          Coder<AccumT> accumCoder) {
+    return withAccumulatorCoder(fn, accumCoder, null, null, null);
   }
 
   public static <K, InputT, AccumT, OutputT> AppliedCombineFn<K, InputT, AccumT, OutputT>
-  withAccumulatorCoder(KeyedCombineFn<? super K, ? super InputT, AccumT, OutputT> fn,
-      Coder<AccumT> accumCoder) {
+      withAccumulatorCoder(
+          PerKeyCombineFn<? super K, ? super InputT, AccumT, OutputT> fn,
+          Coder<AccumT> accumCoder, Iterable<PCollectionView<?>> sideInputViews,
+          KvCoder<K, InputT> kvCoder, WindowingStrategy<?, ?> windowingStrategy) {
     // Casting down the K and InputT is safe because they're only used as inputs.
     @SuppressWarnings("unchecked")
-    KeyedCombineFn<K, InputT, AccumT, OutputT> clonedFn =
-        (KeyedCombineFn<K, InputT, AccumT, OutputT>) SerializableUtils.clone(fn);
-    return new AppliedCombineFn<>(clonedFn, accumCoder);
+    PerKeyCombineFn<K, InputT, AccumT, OutputT> clonedFn =
+        (PerKeyCombineFn<K, InputT, AccumT, OutputT>) SerializableUtils.clone(fn);
+    return create(clonedFn, accumCoder, sideInputViews, kvCoder, windowingStrategy);
   }
 
+  @VisibleForTesting
   public static <K, InputT, AccumT, OutputT> AppliedCombineFn<K, InputT, AccumT, OutputT>
-  withInputCoder(KeyedCombineFn<? super K, ? super InputT, AccumT, OutputT> fn,
-      CoderRegistry registry, KvCoder<K, InputT> kvCoder) {
+      withInputCoder(PerKeyCombineFn<? super K, ? super InputT, AccumT, OutputT> fn,
+          CoderRegistry registry, KvCoder<K, InputT> kvCoder) {
+    return withInputCoder(fn, registry, kvCoder, null, null);
+  }
+
+  public static <K, InputT, AccumT, OutputT> AppliedCombineFn<K, InputT, AccumT, OutputT>
+      withInputCoder(PerKeyCombineFn<? super K, ? super InputT, AccumT, OutputT> fn,
+          CoderRegistry registry, KvCoder<K, InputT> kvCoder,
+          Iterable<PCollectionView<?>> sideInputViews, WindowingStrategy<?, ?> windowingStrategy) {
     // Casting down the K and InputT is safe because they're only used as inputs.
     @SuppressWarnings("unchecked")
-    KeyedCombineFn<K, InputT, AccumT, OutputT> clonedFn =
-        (KeyedCombineFn<K, InputT, AccumT, OutputT>) SerializableUtils.clone(fn);
+    PerKeyCombineFn<K, InputT, AccumT, OutputT> clonedFn =
+        (PerKeyCombineFn<K, InputT, AccumT, OutputT>) SerializableUtils.clone(fn);
     try {
       Coder<AccumT> accumulatorCoder = clonedFn.getAccumulatorCoder(
           registry, kvCoder.getKeyCoder(), kvCoder.getValueCoder());
-      return new AppliedCombineFn<>(clonedFn, accumulatorCoder);
+      return create(clonedFn, accumulatorCoder, sideInputViews, kvCoder, windowingStrategy);
     } catch (CannotProvideCoderException e) {
       throw new IllegalStateException("Could not determine coder for accumulator", e);
     }
   }
 
-  public KeyedCombineFn<K, InputT, AccumT, OutputT> getFn() {
+  private static <K, InputT, AccumT, OutputT> AppliedCombineFn<K, InputT, AccumT, OutputT> create(
+      PerKeyCombineFn<K, InputT, AccumT, OutputT> fn,
+      Coder<AccumT> accumulatorCoder, Iterable<PCollectionView<?>> sideInputViews,
+      KvCoder<K, InputT> kvCoder, WindowingStrategy<?, ?> windowingStrategy) {
+    return new AppliedCombineFn<>(
+        fn, accumulatorCoder, sideInputViews, kvCoder, windowingStrategy);
+  }
+
+  public PerKeyCombineFn<K, InputT, AccumT, OutputT> getFn() {
     return fn;
   }
 
+  public Iterable<PCollectionView<?>> getSideInputViews() {
+    return sideInputViews;
+  }
+
   public Coder<AccumT> getAccumulatorCoder() {
     return accumulatorCoder;
   }
+
+  public KvCoder<K, InputT> getKvCoder() {
+    return kvCoder;
+  }
+
+  public WindowingStrategy<?, ?> getWindowingStrategy() {
+    return windowingStrategy;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
index d4f239c826c59..98065952aa569 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
@@ -102,13 +102,13 @@ public Object getKey() {
   }
 
   @Override
-  public SideInputReader getSideInputReader(
+  protected SideInputReader getSideInputReader(
       Iterable<? extends SideInputInfo> sideInputInfos) throws Exception {
     return DataflowSideInputReader.of(sideInputInfos, options, this);
   }
 
   @Override
-  public SideInputReader getSideInputReaderForViews(
+  protected SideInputReader getSideInputReaderForViews(
       Iterable<? extends PCollectionView<?>> views) throws Exception {
     throw new UnsupportedOperationException(
         "BatchModeExecutionContext.withoutSideInputs().getSideInputReaderForViews(...)");
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombineContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombineContextFactory.java
new file mode 100644
index 0000000000000..bf09587367016
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombineContextFactory.java
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.Context;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+
+/**
+ * Factory that produces {@code Combine.Context} based on different inputs.
+ */
+public class CombineContextFactory {
+
+  private static final Context NULL_CONTEXT = new Context() {
+    @Override
+    public PipelineOptions getPipelineOptions() {
+      throw new IllegalArgumentException("cannot call getPipelineOptions() in a null context");
+    }
+
+    @Override
+    public <T> T sideInput(PCollectionView<T> view) {
+      throw new IllegalArgumentException("cannot call sideInput() in a null context");
+    }
+  };
+
+  /**
+   * Returns a fake {@code Combine.Context} for tests.
+   */
+  public static Context nullContext() {
+    return NULL_CONTEXT;
+  }
+
+  /**
+   * Returns a {@code Combine.Context} that wraps a {@code DoFn.ProcessContext}.
+   */
+  public static Context createFromProcessContext(final DoFn<?, ?>.ProcessContext c) {
+    return new Context() {
+      @Override
+      public PipelineOptions getPipelineOptions() {
+        return c.getPipelineOptions();
+      }
+
+      @Override
+      public <T> T sideInput(PCollectionView<T> view) {
+        return c.sideInput(view);
+      }
+    };
+  }
+
+  /**
+   * Returns a {@code Combine.Context} from {@code PipelineOptions}, {@code SideInputReader},
+   * and the main input window.
+   */
+  public static Context createFromComponents(final PipelineOptions options,
+      final SideInputReader sideInputReader, final BoundedWindow mainInputWindow) {
+    return new Context() {
+      @Override
+      public PipelineOptions getPipelineOptions() {
+        return options;
+      }
+
+      @Override
+      public <T> T sideInput(PCollectionView<T> view) {
+        if (!sideInputReader.contains(view)) {
+          throw new IllegalArgumentException("calling sideInput() with unknown view");
+        }
+
+        BoundedWindow sideInputWindow =
+            view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(mainInputWindow);
+        return sideInputReader.get(view, sideInputWindow);
+      }
+    };
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 4ed180b67df30..7c7d1e1ee7bc4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -19,6 +19,7 @@
 import static com.google.common.base.Preconditions.checkNotNull;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -68,7 +69,7 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
     return GroupAlsoByWindowsAndCombineDoFn.isSupported(windowingStrategy)
         ? new GroupAlsoByWindowsAndCombineDoFn<>(
             windowingStrategy,
-            combineFn.getFn())
+            (KeyedCombineFn<K, InputT, AccumT, OutputT>) combineFn.getFn())
         : new GroupAlsoByWindowsViaOutputBufferDoFn<>(
             windowingStrategy,
             SystemReduceFn.<K, InputT, AccumT, OutputT, W>combining(keyCoder, combineFn));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunner.java
new file mode 100644
index 0000000000000..295d20ef2282a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunner.java
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.CombineFnBase.PerKeyCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+
+import java.io.Serializable;
+
+/**
+ * An interface that runs a {@link PerKeyCombineFn} with unified APIs.
+ *
+ * <p>Different keyed combine functions have their own implementations.
+ * For example, the implementation can skip allocating {@code Combine.Context},
+ * if the keyed combine function doesn't use it.
+ */
+public interface PerKeyCombineFnRunner<K, InputT, AccumT, OutputT> extends Serializable {
+  /**
+   * Returns the {@link PerKeyCombineFn} it holds.
+   *
+   * <p>It can be a {@code KeyedCombineFn} or a {@code KeyedCombineFnWithContext}.
+   */
+  public PerKeyCombineFn<K, InputT, AccumT, OutputT> fn();
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Forwards the call to a keyed combine function to create accumulator in a {@link DoFn}.
+   *
+   * <p>It constructs a {@code CombineWithContext.Context} from {@code DoFn.ProcessContext}
+   * if it is required.
+   */
+  public AccumT createAccumulator(K key, DoFn<?, ?>.ProcessContext c);
+
+  /**
+   * Forwards the call to a keyed combine function to add input in a {@link DoFn}.
+   *
+   * <p>It constructs a {@code CombineWithContext.Context} from {@code DoFn.ProcessContext}
+   * if it is required.
+   */
+  public AccumT addInput(K key, AccumT accumulator, InputT input, DoFn<?, ?>.ProcessContext c);
+
+  /**
+   * Forwards the call to a keyed combine function to merge accumulators in a {@link DoFn}.
+   *
+   * <p>It constructs a {@code CombineWithContext.Context} from {@code DoFn.ProcessContext}
+   * if it is required.
+   */
+  public AccumT mergeAccumulators(
+      K key, Iterable<AccumT> accumulators, DoFn<?, ?>.ProcessContext c);
+
+  /**
+   * Forwards the call to a keyed combine function to extract output in a {@link DoFn}.
+   *
+   * <p>It constructs a {@code CombineWithContext.Context} from {@code DoFn.ProcessContext}
+   * if it is required.
+   */
+  public OutputT extractOutput(K key, AccumT accumulator, DoFn<?, ?>.ProcessContext c);
+
+  /**
+   * Forwards the call to a keyed combine function to compact the accumulator in a {@link DoFn}.
+   *
+   * <p>It constructs a {@code CombineWithContext.Context} from {@code DoFn.ProcessContext}
+   * if it is required.
+   */
+  public AccumT compact(K key, AccumT accumulator, DoFn<?, ?>.ProcessContext c);
+
+  /**
+   * Forwards the call to a keyed combine function to combine the inputs and extract output
+   * in a {@link DoFn}.
+   *
+   * <p>It constructs a {@code CombineWithContext.Context} from {@code DoFn.ProcessContext}
+   * if it is required.
+   */
+  public OutputT apply(K key, Iterable<? extends InputT> inputs, DoFn<?, ?>.ProcessContext c);
+
+  /**
+   * Forwards the call to a keyed combine function to add all inputs in a {@link DoFn}.
+   *
+   * <p>It constructs a {@code CombineWithContext.Context} from {@code DoFn.ProcessContext}
+   * if it is required.
+   */
+  public AccumT addInputs(K key, Iterable<InputT> inputs, DoFn<?, ?>.ProcessContext c);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunners.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunners.java
new file mode 100644
index 0000000000000..b574632633358
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunners.java
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.CombineFnBase.PerKeyCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.RequiresContextInternal;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+
+/**
+ * Static utility methods that provide {@link PerKeyCombineFnRunner} implementations
+ * for different keyed combine functions.
+ */
+public class PerKeyCombineFnRunners {
+  /**
+   * Returns a {@link PerKeyCombineFnRunner} from a {@link PerKeyCombineFn}.
+   */
+  public static <K, InputT, AccumT, OutputT> PerKeyCombineFnRunner<K, InputT, AccumT, OutputT>
+  create(PerKeyCombineFn<K, InputT, AccumT, OutputT> perKeyCombineFn) {
+    if (perKeyCombineFn instanceof RequiresContextInternal) {
+      return new KeyedCombineFnWithContextRunner<>(
+          (KeyedCombineFnWithContext<K, InputT, AccumT, OutputT>) perKeyCombineFn);
+    } else {
+      return new KeyedCombineFnRunner<>(
+          (KeyedCombineFn<K, InputT, AccumT, OutputT>) perKeyCombineFn);
+    }
+  }
+
+  /**
+   * An implementation of {@link PerKeyCombineFnRunner} with {@link KeyedCombineFn}.
+   *
+   * It forwards functions calls to the {@link KeyedCombineFn}.
+   */
+  private static class KeyedCombineFnRunner<K, InputT, AccumT, OutputT>
+      implements PerKeyCombineFnRunner<K, InputT, AccumT, OutputT> {
+    private final KeyedCombineFn<K, InputT, AccumT, OutputT> keyedCombineFn;
+
+    private KeyedCombineFnRunner(
+        KeyedCombineFn<K, InputT, AccumT, OutputT> keyedCombineFn) {
+      this.keyedCombineFn = keyedCombineFn;
+    }
+
+    @Override
+    public KeyedCombineFn<K, InputT, AccumT, OutputT> fn() {
+      return keyedCombineFn;
+    }
+
+    @Override
+    public AccumT createAccumulator(K key, DoFn<?, ?>.ProcessContext c) {
+      return keyedCombineFn.createAccumulator(key);
+    }
+
+    @Override
+    public AccumT addInput(
+        K key, AccumT accumulator, InputT input, DoFn<?, ?>.ProcessContext c) {
+      return keyedCombineFn.addInput(key, accumulator, input);
+    }
+
+    @Override
+    public AccumT mergeAccumulators(
+        K key, Iterable<AccumT> accumulators, DoFn<?, ?>.ProcessContext c) {
+      return keyedCombineFn.mergeAccumulators(key, accumulators);
+    }
+
+    @Override
+    public OutputT extractOutput(K key, AccumT accumulator, DoFn<?, ?>.ProcessContext c) {
+      return keyedCombineFn.extractOutput(key, accumulator);
+    }
+
+    @Override
+    public AccumT compact(K key, AccumT accumulator, DoFn<?, ?>.ProcessContext c) {
+      return keyedCombineFn.compact(key, accumulator);
+    }
+
+    @Override
+    public OutputT apply(K key, Iterable<? extends InputT> inputs, DoFn<?, ?>.ProcessContext c) {
+      return keyedCombineFn.apply(key, inputs);
+    }
+
+    @Override
+    public AccumT addInputs(K key, Iterable<InputT> inputs, DoFn<?, ?>.ProcessContext c) {
+      AccumT accum = keyedCombineFn.createAccumulator(key);
+      for (InputT input : inputs) {
+        accum = keyedCombineFn.addInput(key, accum, input);
+      }
+      return accum;
+    }
+
+    @Override
+    public String toString() {
+      return keyedCombineFn.toString();
+    }
+  }
+
+  /**
+   * An implementation of {@link PerKeyCombineFnRunner} with {@link KeyedCombineFnWithContext}.
+   *
+   * It forwards functions calls to the {@link KeyedCombineFnWithContext}.
+   */
+  private static class KeyedCombineFnWithContextRunner<K, InputT, AccumT, OutputT>
+      implements PerKeyCombineFnRunner<K, InputT, AccumT, OutputT> {
+    private final KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> keyedCombineFnWithContext;
+
+    private KeyedCombineFnWithContextRunner(
+        KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> keyedCombineFnWithContext) {
+      this.keyedCombineFnWithContext = keyedCombineFnWithContext;
+    }
+
+    @Override
+    public KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> fn() {
+      return keyedCombineFnWithContext;
+    }
+
+    @Override
+    public AccumT createAccumulator(K key, DoFn<?, ?>.ProcessContext c) {
+      return keyedCombineFnWithContext.createAccumulator(key,
+          CombineContextFactory.createFromProcessContext(c));
+    }
+
+    @Override
+    public AccumT addInput(
+        K key, AccumT accumulator, InputT value, DoFn<?, ?>.ProcessContext c) {
+      return keyedCombineFnWithContext.addInput(key, accumulator, value,
+          CombineContextFactory.createFromProcessContext(c));
+    }
+
+    @Override
+    public AccumT mergeAccumulators(
+        K key, Iterable<AccumT> accumulators, DoFn<?, ?>.ProcessContext c) {
+      return keyedCombineFnWithContext.mergeAccumulators(
+          key, accumulators, CombineContextFactory.createFromProcessContext(c));
+    }
+
+    @Override
+    public OutputT extractOutput(K key, AccumT accumulator, DoFn<?, ?>.ProcessContext c) {
+      return keyedCombineFnWithContext.extractOutput(key, accumulator,
+          CombineContextFactory.createFromProcessContext(c));
+    }
+
+    @Override
+    public AccumT compact(K key, AccumT accumulator, DoFn<?, ?>.ProcessContext c) {
+      return keyedCombineFnWithContext.compact(key, accumulator,
+          CombineContextFactory.createFromProcessContext(c));
+    }
+
+    @Override
+    public OutputT apply(K key, Iterable<? extends InputT> inputs, DoFn<?, ?>.ProcessContext c) {
+      return keyedCombineFnWithContext.apply(key, inputs,
+          CombineContextFactory.createFromProcessContext(c));
+    }
+
+    @Override
+    public AccumT addInputs(K key, Iterable<InputT> inputs, DoFn<?, ?>.ProcessContext c) {
+      CombineWithContext.Context combineContext = CombineContextFactory.createFromProcessContext(c);
+      AccumT accum = keyedCombineFnWithContext.createAccumulator(key, combineContext);
+      for (InputT input : inputs) {
+        accum = keyedCombineFnWithContext.addInput(key, accum, input, combineContext);
+      }
+      return accum;
+    }
+
+    @Override
+    public String toString() {
+      return keyedCombineFnWithContext.toString();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
index d7d77f9a330b9..08ba98121b809 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
@@ -15,8 +15,11 @@
  */
 package com.google.cloud.dataflow.sdk.util;
 
+import static com.google.common.base.Preconditions.checkArgument;
+
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.RequiresContextInternal;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.state.BagState;
@@ -77,6 +80,10 @@ public ReduceFn<K, T, Iterable<T>, W> create(K key) {
   public static
   <K, InputT, AccumT, OutputT, W extends BoundedWindow> Factory<K, InputT, OutputT, W> combining(
       final Coder<K> keyCoder, final AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
+    checkArgument(
+        !(combineFn.getFn() instanceof RequiresContextInternal),
+        "Combiner lifting is not supported for combine functions with contexts: %s",
+        combineFn.getFn().getClass().getName());
     return new Factory<K, InputT, OutputT, W>() {
 
       @Override
@@ -84,7 +91,7 @@ public ReduceFn<K, InputT, OutputT, W> create(K key) {
         StateTag<CombiningValueState<InputT, OutputT>> bufferTag =
             StateTags.makeSystemTagInternal(StateTags.<InputT, AccumT, OutputT>combiningValue(
                 BUFFER_NAME, combineFn.getAccumulatorCoder(),
-                combineFn.getFn().forKey(key, keyCoder)));
+                (CombineFn<InputT, AccumT, OutputT>) combineFn.getFn().forKey(key, keyCoder)));
         return new SystemReduceFn<K, InputT, OutputT, W>(bufferTag);
       }
     };
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 99193a9ac6100..63789a7c209ff 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -38,13 +38,17 @@
 import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.Context;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
+import com.google.cloud.dataflow.sdk.util.PerKeyCombineFnRunners;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -64,6 +68,7 @@
 import org.junit.experimental.categories.Category;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
+import org.mockito.Mock;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -101,6 +106,8 @@ public class CombineTest implements Serializable {
     1, 1, 2, 3, 5, 8, 13, 21, 34, 55
   };
 
+  @Mock private DoFn<?, ?>.ProcessContext processContext;
+
   PCollection<KV<String, Integer>> createInput(Pipeline p,
                                                KV<String, Integer>[] table) {
     return p.apply(Create.of(Arrays.asList(table)).withCoder(
@@ -127,6 +134,36 @@ private void runTestSimpleCombine(KV<String, Integer>[] table,
     p.run();
   }
 
+  private void runTestSimpleCombineWithContext(KV<String, Integer>[] table,
+                                               int globalSum,
+                                               KV<String, String>[] perKeyCombines,
+                                               String[] globallyCombines) {
+    Pipeline p = TestPipeline.create();
+    PCollection<KV<String, Integer>> perKeyInput = createInput(p, table);
+    PCollection<Integer> globallyInput = perKeyInput.apply(Values.<Integer>create());
+
+    PCollection<Integer> sum = globallyInput.apply("Sum", Combine.globally(new SumInts()));
+
+    PCollectionView<Integer> globallySumView = sum.apply(View.<Integer>asSingleton());
+
+    // Java 8 will infer.
+    PCollection<KV<String, String>> combinePerKey = perKeyInput
+        .apply(Combine.perKey(new TestKeyedCombineFnWithContext(globallySumView))
+            .withSideInputs(Arrays.asList(globallySumView)));
+
+    PCollection<String> combineGlobally = globallyInput
+        .apply(Combine.globally(new TestKeyedCombineFnWithContext(globallySumView)
+            .forKey("G", StringUtf8Coder.of()))
+            .withoutDefaults()
+            .withSideInputs(Arrays.asList(globallySumView)));
+
+    DataflowAssert.that(sum).containsInAnyOrder(globalSum);
+    DataflowAssert.that(combinePerKey).containsInAnyOrder(perKeyCombines);
+    DataflowAssert.that(combineGlobally).containsInAnyOrder(globallyCombines);
+
+    p.run();
+  }
+
   @Test
   @Category(RunnableOnService.class)
   @SuppressWarnings({"rawtypes", "unchecked"})
@@ -135,6 +172,22 @@ public void testSimpleCombine() {
         KV.of("a", "114a"), KV.of("b", "113b") });
   }
 
+  @Test
+  @Category(RunnableOnService.class)
+  @SuppressWarnings({"rawtypes", "unchecked"})
+  public void testSimpleCombineWithContext() {
+    runTestSimpleCombineWithContext(TABLE, 20,
+        new KV[] {KV.of("a", "01124a"), KV.of("b", "01123b") },
+        new String[] {"01111234G"});
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  @SuppressWarnings({"rawtypes", "unchecked"})
+  public void testSimpleCombineWithContextEmpty() {
+    runTestSimpleCombineWithContext(EMPTY_TABLE, 0, new KV[] {}, new String[] {});
+  }
+
   @Test
   @Category(RunnableOnService.class)
   @SuppressWarnings({"rawtypes", "unchecked"})
@@ -227,6 +280,87 @@ public void testFixedWindowsCombine() {
     p.run();
   }
 
+  @Test
+  @Category(RunnableOnService.class)
+  public void testFixedWindowsCombineWithContext() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> perKeyInput =
+        p.apply(Create.timestamped(Arrays.asList(TABLE),
+                                   Arrays.asList(0L, 1L, 6L, 7L, 8L))
+                .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())))
+         .apply(Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(2))));
+
+    PCollection<Integer> globallyInput = perKeyInput.apply(Values.<Integer>create());
+
+    PCollection<Integer> sum = globallyInput
+        .apply("Sum", Combine.globally(new SumInts()).withoutDefaults());
+
+    PCollectionView<Integer> globallySumView = sum.apply(View.<Integer>asSingleton());
+
+    PCollection<KV<String, String>> combinePerKeyWithContext = perKeyInput
+        .apply(Combine.perKey(new TestKeyedCombineFnWithContext(globallySumView))
+            .withSideInputs(Arrays.asList(globallySumView)));
+
+    PCollection<String> combineGloballyWithContext = globallyInput
+        .apply(Combine.globally(new TestKeyedCombineFnWithContext(globallySumView)
+            .forKey("G", StringUtf8Coder.of()))
+            .withoutDefaults()
+            .withSideInputs(Arrays.asList(globallySumView)));
+
+    DataflowAssert.that(sum).containsInAnyOrder(2, 5, 13);
+    DataflowAssert.that(combinePerKeyWithContext).containsInAnyOrder(
+        KV.of("a", "112a"),
+        KV.of("a", "45a"),
+        KV.of("b", "15b"),
+        KV.of("b", "1133b"));
+    DataflowAssert.that(combineGloballyWithContext).containsInAnyOrder("112G", "145G", "1133G");
+    p.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testSlidingWindowsCombineWithContext() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> perKeyInput =
+        p.apply(Create.timestamped(Arrays.asList(TABLE),
+                                   Arrays.asList(2L, 3L, 8L, 9L, 10L))
+                .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())))
+         .apply(Window.<KV<String, Integer>>into(SlidingWindows.of(Duration.millis(2))));
+
+    PCollection<Integer> globallyInput = perKeyInput.apply(Values.<Integer>create());
+
+    PCollection<Integer> sum = globallyInput
+        .apply("Sum", Combine.globally(new SumInts()).withoutDefaults());
+
+    PCollectionView<Integer> globallySumView = sum.apply(View.<Integer>asSingleton());
+
+    PCollection<KV<String, String>> combinePerKeyWithContext = perKeyInput
+        .apply(Combine.perKey(new TestKeyedCombineFnWithContext(globallySumView))
+            .withSideInputs(Arrays.asList(globallySumView)));
+
+    PCollection<String> combineGloballyWithContext = globallyInput
+        .apply(Combine.globally(new TestKeyedCombineFnWithContext(globallySumView)
+            .forKey("G", StringUtf8Coder.of()))
+            .withoutDefaults()
+            .withSideInputs(Arrays.asList(globallySumView)));
+
+    DataflowAssert.that(sum).containsInAnyOrder(1, 2, 1, 4, 5, 14, 13);
+    DataflowAssert.that(combinePerKeyWithContext).containsInAnyOrder(
+        KV.of("a", "11a"),
+        KV.of("a", "112a"),
+        KV.of("a", "11a"),
+        KV.of("a", "44a"),
+        KV.of("a", "45a"),
+        KV.of("b", "15b"),
+        KV.of("b", "11134b"),
+        KV.of("b", "1133b"));
+    DataflowAssert.that(combineGloballyWithContext).containsInAnyOrder(
+      "11G", "112G", "11G", "44G", "145G", "11134G", "1133G");
+    p.run();
+  }
+
   private static class FormatPaneInfo extends DoFn<Integer, String> {
     @Override
     public void processElement(ProcessContext c) {
@@ -277,6 +411,49 @@ public void testSessionsCombine() {
     p.run();
   }
 
+  @Test
+  @Category(RunnableOnService.class)
+  public void testSessionsCombineWithContext() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> perKeyInput =
+        p.apply(Create.timestamped(Arrays.asList(TABLE),
+                                   Arrays.asList(0L, 4L, 7L, 10L, 16L))
+                .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of())));
+
+    PCollection<Integer> globallyInput = perKeyInput.apply(Values.<Integer>create());
+
+    PCollection<Integer> fixedWindowsSum = globallyInput
+        .apply("FixedWindows",
+            Window.<Integer>into(FixedWindows.of(Duration.millis(5))))
+        .apply("Sum", Combine.globally(new SumInts()).withoutDefaults());
+
+    PCollectionView<Integer> globallyFixedWindowsView =
+        fixedWindowsSum.apply(View.<Integer>asSingleton().withDefaultValue(0));
+
+    PCollection<KV<String, String>> sessionsCombinePerKey = perKeyInput
+        .apply("PerKey Input Sessions",
+            Window.<KV<String, Integer>>into(Sessions.withGapDuration(Duration.millis(5))))
+        .apply(Combine.perKey(new TestKeyedCombineFnWithContext(globallyFixedWindowsView))
+            .withSideInputs(Arrays.asList(globallyFixedWindowsView)));
+
+    PCollection<String> sessionsCombineGlobally = globallyInput
+        .apply("Globally Input Sessions",
+            Window.<Integer>into(Sessions.withGapDuration(Duration.millis(5))))
+        .apply(Combine.globally(new TestKeyedCombineFnWithContext(globallyFixedWindowsView)
+            .forKey("G", StringUtf8Coder.of()))
+            .withoutDefaults()
+            .withSideInputs(Arrays.asList(globallyFixedWindowsView)));
+
+    DataflowAssert.that(fixedWindowsSum).containsInAnyOrder(2, 4, 1, 13);
+    DataflowAssert.that(sessionsCombinePerKey).containsInAnyOrder(
+        KV.of("a", "1114a"),
+        KV.of("b", "11b"),
+        KV.of("b", "013b"));
+    DataflowAssert.that(sessionsCombineGlobally).containsInAnyOrder("11114G", "013G");
+    p.run();
+  }
+
   @Test
   @Category(RunnableOnService.class)
   public void testWindowedCombineEmpty() {
@@ -328,7 +505,8 @@ public void testAddInputsRandomly() {
         counter.asKeyedFn();
 
     List<TestCounter.Counter> accums = DirectPipelineRunner.TestCombineDoFn.addInputsRandomly(
-        fn, "bob", Arrays.asList(NUMBERS), new Random(42));
+        PerKeyCombineFnRunners.create(fn), "bob", Arrays.asList(NUMBERS), new Random(42),
+        processContext);
 
     assertThat(accums, Matchers.contains(
         counter.new Counter(3, 2, 0, 0),
@@ -725,38 +903,42 @@ public void registerByteSizeObserver(
    * characters occurring in the key and the decimal representations of
    * each value.
    */
-  public class TestKeyedCombineFn extends KeyedCombineFn
-      <String, Integer, TestKeyedCombineFn.Accumulator, String> {
+  public static class TestKeyedCombineFn
+      extends KeyedCombineFn<String, Integer, TestKeyedCombineFn.Accumulator, String> {
 
     // Not serializable.
-    private class Accumulator {
+    static class Accumulator {
       String value;
       public Accumulator(String value) {
         this.value = value;
       }
+
+      public static Coder<Accumulator> getCoder() {
+        return new CustomCoder<Accumulator>() {
+          @Override
+          public void encode(Accumulator accumulator, OutputStream outStream, Coder.Context context)
+              throws CoderException, IOException {
+            StringUtf8Coder.of().encode(accumulator.value, outStream, context);
+          }
+
+          @Override
+          public Accumulator decode(InputStream inStream, Coder.Context context)
+              throws CoderException, IOException {
+            return new Accumulator(StringUtf8Coder.of().decode(inStream, context));
+          }
+
+          @Override
+          public String getEncodingId() {
+            return "CombineTest.TestKeyedCombineFn.getAccumulatorCoder()";
+          }
+        };
+      }
     }
 
     @Override
     public Coder<Accumulator> getAccumulatorCoder(
         CoderRegistry registry, Coder<String> keyCoder, Coder<Integer> inputCoder) {
-      return new CustomCoder<Accumulator>() {
-        @Override
-        public void encode(Accumulator accumulator, OutputStream outStream, Coder.Context context)
-            throws CoderException, IOException {
-          StringUtf8Coder.of().encode(accumulator.value, outStream, context);
-        }
-
-        @Override
-        public Accumulator decode(InputStream inStream, Coder.Context context)
-            throws CoderException, IOException {
-          return new Accumulator(StringUtf8Coder.of().decode(inStream, context));
-        }
-
-        @Override
-        public String getEncodingId() {
-          return "CombineTest.TestKeyedCombineFn.getAccumulatorCoder()";
-        }
-      };
+      return Accumulator.getCoder();
     }
 
     @Override
@@ -794,6 +976,67 @@ public String extractOutput(String key, Accumulator accumulator) {
     }
   }
 
+  /**
+   * A {@link KeyedCombineFnWithContext} that exercises the full generality
+   * of [Keyed]CombineFnWithContext.
+   *
+   * <p>The net result of applying this CombineFn is a sorted list of all
+   * characters occurring in the key and the decimal representations of
+   * main and side inputs values.
+   */
+  public class TestKeyedCombineFnWithContext
+      extends KeyedCombineFnWithContext<String, Integer, TestKeyedCombineFn.Accumulator, String> {
+    private final PCollectionView<Integer> view;
+
+    public TestKeyedCombineFnWithContext(PCollectionView<Integer> view) {
+      this.view = view;
+    }
+
+    @Override
+    public Coder<TestKeyedCombineFn.Accumulator> getAccumulatorCoder(
+        CoderRegistry registry, Coder<String> keyCoder, Coder<Integer> inputCoder) {
+      return TestKeyedCombineFn.Accumulator.getCoder();
+    }
+
+    @Override
+    public TestKeyedCombineFn.Accumulator createAccumulator(String key, Context c) {
+      return new TestKeyedCombineFn.Accumulator(key + c.sideInput(view).toString());
+    }
+
+    @Override
+    public TestKeyedCombineFn.Accumulator addInput(
+        String key, TestKeyedCombineFn.Accumulator accumulator, Integer value, Context c) {
+      try {
+        assertThat(accumulator.value, Matchers.startsWith(key + c.sideInput(view).toString()));
+        return new TestKeyedCombineFn.Accumulator(accumulator.value + String.valueOf(value));
+      } finally {
+        accumulator.value = "cleared in addInput";
+      }
+
+    }
+
+    @Override
+    public TestKeyedCombineFn.Accumulator mergeAccumulators(
+        String key, Iterable<TestKeyedCombineFn.Accumulator> accumulators, Context c) {
+      String keyPrefix = key + c.sideInput(view).toString();
+      String all = keyPrefix;
+      for (TestKeyedCombineFn.Accumulator accumulator : accumulators) {
+        assertThat(accumulator.value, Matchers.startsWith(keyPrefix));
+        all += accumulator.value.substring(keyPrefix.length());
+        accumulator.value = "cleared in mergeAccumulators";
+      }
+      return new TestKeyedCombineFn.Accumulator(all);
+    }
+
+    @Override
+    public String extractOutput(String key, TestKeyedCombineFn.Accumulator accumulator, Context c) {
+      assertThat(accumulator.value, Matchers.startsWith(key + c.sideInput(view).toString()));
+      char[] chars = accumulator.value.toCharArray();
+      Arrays.sort(chars);
+      return new String(chars);
+    }
+  }
+
   /** Another example AccumulatingCombineFn. */
   public static class TestCounter extends
       Combine.AccumulatingCombineFn<

From e88907084155cb7e1152bb893b17ff1cd2e668e5 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Mon, 11 Jan 2016 01:57:01 -0800
Subject: [PATCH 1284/1541] Upgrade google-api-services-dataflow to
 v1b3-rev14-1.21.0

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111832855
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index bd43b277cea8f..93912e36efd67 100644
--- a/pom.xml
+++ b/pom.xml
@@ -69,7 +69,7 @@
     <!-- If updating dependencies, please update any relevant javadoc offlineLinks -->
     <avro.version>1.7.7</avro.version>
     <bigquery.version>v2-rev248-1.21.0</bigquery.version>
-    <dataflow.version>v1b3-rev13-1.21.0</dataflow.version>
+    <dataflow.version>v1b3-rev14-1.21.0</dataflow.version>
     <datastore.version>v1beta2-rev1-3.0.2</datastore.version>
     <google-clients.version>1.21.0</google-clients.version>
     <guava.version>19.0</guava.version>

From eb92417331faba570c716c3882148e61939a90cb Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Mon, 11 Jan 2016 12:45:54 -0800
Subject: [PATCH 1285/1541] Roll forward state caching

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111878217
---
 .../worker/CachingSideInputReader.java        |  33 +-
 .../worker/DataflowSideInputReader.java       |  14 +-
 .../sdk/runners/worker/DataflowWorker.java    |  10 +-
 .../worker/StreamingDataflowWorker.java       |  14 +-
 .../worker/StreamingModeExecutionContext.java |  27 +-
 .../{SizedWeigher.java => Weighers.java}      |  34 +-
 .../runners/worker/WindmillStateCache.java    | 292 ++++++++
 .../worker/WindmillStateInternals.java        | 630 +++++++++++-------
 .../runners/worker/WindmillStateReader.java   |  42 +-
 .../sdk/transforms/ApproximateQuantiles.java  |  28 +-
 .../cloud/dataflow/sdk/util/Weighted.java     |  27 +
 ...ader.java => WeightedSideInputReader.java} |  18 +-
 .../util/{Sized.java => WeightedValue.java}   |  19 +-
 .../util/state/InMemoryStateInternals.java    |   2 +-
 .../sdk/util/state/MergingStateInternals.java |   2 +-
 .../sdk/util/state/StateNamespace.java        |   9 +
 .../sdk/util/state/StateNamespaceForTest.java |   5 +
 .../sdk/util/state/StateNamespaces.java       |  21 +
 .../dataflow/sdk/util/state/StateTag.java     |   4 +-
 .../dataflow/sdk/util/state/StateTags.java    |  23 +-
 .../cloud/dataflow/sdk/DataflowMatchers.java  |  65 ++
 .../runners/dataflow/CustomSourcesTest.java   |   5 +-
 .../worker/CachingSideInputReaderTest.java    |  33 +-
 .../worker/DataflowSideInputReaderTest.java   |  16 +-
 .../StreamingModeExecutionContextTest.java    |  13 +-
 ...ava => WeightedDirectSideInputReader.java} |  33 +-
 .../worker/WindmillStateCacheTest.java        | 210 ++++++
 .../worker/WindmillStateInternalsTest.java    | 327 +++++----
 .../dataflow/sdk/util/ApiSurfaceTest.java     |   2 +-
 29 files changed, 1403 insertions(+), 555 deletions(-)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/{SizedWeigher.java => Weighers.java} (61%)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Weighted.java
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/util/{SizedSideInputReader.java => WeightedSideInputReader.java} (73%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/util/{Sized.java => WeightedValue.java} (67%)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/DataflowMatchers.java
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/{SizedDirectSideInputReader.java => WeightedDirectSideInputReader.java} (60%)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCacheTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java
index b1dd17efbdd33..afd77f0666cee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java
@@ -19,8 +19,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.PCollectionViewWindow;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.Sized;
-import com.google.cloud.dataflow.sdk.util.SizedSideInputReader;
+import com.google.cloud.dataflow.sdk.util.WeightedSideInputReader;
+import com.google.cloud.dataflow.sdk.util.WeightedValue;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.common.cache.Cache;
 
@@ -37,19 +37,19 @@
  * with a {@code Cache} created by anything other than the SDK.
  */
 final class CachingSideInputReader
-    extends SizedSideInputReader.Defaults
-    implements SizedSideInputReader {
-  private final SizedSideInputReader subReader;
-  private final Cache<PCollectionViewWindow<?>, Sized<Object>> cache;
+    extends WeightedSideInputReader.Defaults
+    implements WeightedSideInputReader {
+  private final WeightedSideInputReader subReader;
+  private final Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache;
 
-  private CachingSideInputReader(
-      SizedSideInputReader subReader, Cache<PCollectionViewWindow<?>, Sized<Object>> cache) {
+  private CachingSideInputReader(WeightedSideInputReader subReader,
+      Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache) {
     this.subReader = subReader;
     this.cache = cache;
   }
 
-  public static CachingSideInputReader of(
-      SizedSideInputReader subReader, Cache<PCollectionViewWindow<?>, Sized<Object>> cache) {
+  public static CachingSideInputReader of(WeightedSideInputReader subReader,
+      Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache) {
     return new CachingSideInputReader(subReader, cache);
   }
 
@@ -64,24 +64,25 @@ public boolean isEmpty() {
   }
 
   @Override
-  public <T> Sized<T> getSized(
+  public <T> WeightedValue<T> getWeighted(
       final PCollectionView<T> view, final BoundedWindow window) {
     PCollectionViewWindow<T> cacheKey = PCollectionViewWindow.of(view, window);
 
       try {
         @SuppressWarnings("unchecked") // safely uncasting the thing from the callback
-        Sized<T> sideInputContents = (Sized<T>) cache.get(cacheKey,
-            new Callable<Sized<Object>>() {
+        WeightedValue<T> sideInputContents = (WeightedValue<T>) cache.get(cacheKey,
+            new Callable<WeightedValue<Object>>() {
               @Override
-              public Sized<Object> call() {
+              public WeightedValue<Object> call() {
                 @SuppressWarnings("unchecked") // safe covariant cast
-                Sized<Object> value = (Sized<Object>) subReader.getSized(view, window);
+                WeightedValue<Object> value =
+                    (WeightedValue<Object>) subReader.getWeighted(view, window);
                 return value;
               }
             });
         return sideInputContents;
       } catch (ExecutionException checkedException) {
-        // The call to subReader.getSized() is not permitted to throw any checked exceptions,
+        // The call to subReader.getWeighted() is not permitted to throw any checked exceptions,
         // so the Callable created above should not throw any either.
         throw new RuntimeException("Unexpected checked exception.", checkedException.getCause());
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
index 545f32bacad43..ed1d702791ab1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
@@ -23,8 +23,8 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.Sized;
-import com.google.cloud.dataflow.sdk.util.SizedSideInputReader;
+import com.google.cloud.dataflow.sdk.util.WeightedSideInputReader;
+import com.google.cloud.dataflow.sdk.util.WeightedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -45,8 +45,8 @@
  * amount of data for each access.
  */
 public class DataflowSideInputReader
-    extends SizedSideInputReader.Defaults
-    implements SizedSideInputReader {
+    extends WeightedSideInputReader.Defaults
+    implements WeightedSideInputReader {
 
   /** An observer for each side input to count its size as it is being read. */
   private final Map<TupleTag<Object>, ByteSizeObserver> observers;
@@ -110,7 +110,7 @@ public boolean isEmpty() {
    * the value for the appropriate window.
    */
   @Override
-  public <T> Sized<T> getSized(PCollectionView<T> view, final BoundedWindow window) {
+  public <T> WeightedValue<T> getWeighted(PCollectionView<T> view, final BoundedWindow window) {
     final TupleTag<Iterable<WindowedValue<?>>> tag = view.getTagInternal();
     if (!sideInputValues.has(tag)) {
       throw new IllegalArgumentException("calling getSideInput() with unknown view");
@@ -125,7 +125,7 @@ public <T> Sized<T> getSized(PCollectionView<T> view, final BoundedWindow window
       value = view.fromIterableInternal(sideInputValues.get(tag));
       long bytesRead = observer.getBytes();
       observer.reset();
-      return Sized.of(value, overhead + bytesRead);
+      return WeightedValue.of(value, overhead + bytesRead);
     } else {
       final long[] sum = new long[]{ 0L };
       value = view.fromIterableInternal(
@@ -142,7 +142,7 @@ public boolean apply(WindowedValue<?> element) {
                     return containsWindow;
                   }
                 }));
-      return Sized.of(value, overhead + sum[0]);
+      return WeightedValue.of(value, overhead + sum[0]);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 200bfe2a24fde..696dd4b44639d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -39,8 +39,8 @@
 import com.google.cloud.dataflow.sdk.util.CloudMetricUtils;
 import com.google.cloud.dataflow.sdk.util.PCollectionViewWindow;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.Sized;
 import com.google.cloud.dataflow.sdk.util.UserCodeException;
+import com.google.cloud.dataflow.sdk.util.WeightedValue;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.Metric;
@@ -97,7 +97,7 @@ public class DataflowWorker {
   /**
    * A side input cache shared between all execution contexts.
    */
-  private final Cache<PCollectionViewWindow<?>, Sized<Object>> sideInputCache;
+  private final Cache<PCollectionViewWindow<?>, WeightedValue<Object>> sideInputCache;
 
   /**
    * Status server returning health of worker.
@@ -124,7 +124,7 @@ public DataflowWorker(WorkUnitClient workUnitClient, DataflowWorkerHarnessOption
     this.options = options;
     this.sideInputCache = CacheBuilder.newBuilder()
         .maximumWeight(options.getWorkerCacheMb() * MEGABYTES) // weights are in bytes
-        .weigher(SizedWeigher.<PCollectionViewWindow<?>, Object>withBaseWeight(OVERHEAD_WEIGHT))
+        .weigher(Weighers.fixedWeightKeys(OVERHEAD_WEIGHT))
         .softValues()
         .build();
   }
@@ -421,11 +421,11 @@ public abstract WorkItemServiceState reportWorkItemStatus(WorkItemStatus workIte
    */
   private static class DataflowWorkerExecutionContext extends BatchModeExecutionContext {
 
-    private final Cache<PCollectionViewWindow<?>, Sized<Object>> cache;
+    private final Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache;
     private final PipelineOptions options;
 
     public DataflowWorkerExecutionContext(
-        Cache<PCollectionViewWindow<?>, Sized<Object>> cache, PipelineOptions options) {
+        Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache, PipelineOptions options) {
       super(options);
       this.cache = cache;
       this.options = options;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 81bab4e2402c9..121cf16de9f00 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -221,6 +221,8 @@ public ReaderCacheEntry(UnboundedSource.UnboundedReader<?> reader, long token) {
   private ConcurrentMap<String, String> stateNameMap;
   private ConcurrentMap<String, String> systemNameToComputationIdMap;
 
+  private WindmillStateCache stateCache = new WindmillStateCache();
+
   private ThreadFactory threadFactory;
   private BoundedQueueExecutor workUnitExecutor;
   private ExecutorService commitExecutor;
@@ -479,8 +481,8 @@ private void process(final String computation, final MapTask mapTask,
       WorkerAndContext workerAndContext = mapTaskExecutors.get(computation).poll();
       if (workerAndContext == null) {
         CounterSet counters = new CounterSet();
-        context = new StreamingModeExecutionContext(
-            mapTask.getSystemName(), readerCache.get(computation), stateNameMap);
+        context = new StreamingModeExecutionContext(mapTask.getSystemName(),
+            readerCache.get(computation), stateNameMap, stateCache.forComputation(computation));
         StateSampler sampler =
             new StateSampler(mapTask.getStageName() + "-", counters.getAddCounterMutator());
         // In streaming mode, state samplers are long lived. So here a unique id is generated as
@@ -943,6 +945,8 @@ public void handle(
         printThreads(responseWriter);
       } else if (target.equals("/heapz")) {
         dumpHeap(responseWriter);
+      } else if (target.equals("/cachez")) {
+        stateCache.printDetailedHtml(responseWriter);
       } else {
         printHeader(responseWriter);
         printResources(responseWriter);
@@ -978,6 +982,11 @@ private void printMetrics(PrintWriter response) {
       response.println("</li>");
     }
     response.println("</ul>");
+
+    stateCache.printSummaryHtml(response);
+
+    metricTrackingWindmillServer.printHtml(response);
+
     response.println("Active Keys: <ul>");
     for (Map.Entry<String, ActiveWorkForComputation> computationEntry
              : activeWorkMap.entrySet()) {
@@ -988,7 +997,6 @@ private void printMetrics(PrintWriter response) {
       response.println("</li>");
     }
     response.println("</ul>");
-    metricTrackingWindmillServer.printHtml(response);
   }
 
   private void printResources(PrintWriter response) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
index 71a477c13db44..a8bdceb1866b5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
@@ -65,6 +65,7 @@ public class StreamingModeExecutionContext
   // Per-key cache of active Reader objects in use by this process.
   private final ConcurrentMap<ByteString, ReaderCacheEntry> readerCache;
   private final ConcurrentMap<String, String> stateNameMap;
+  private final WindmillStateCache.ForComputation stateCache;
 
   private Windmill.WorkItem work;
   @Nullable private Instant inputDataWatermark;
@@ -74,14 +75,14 @@ public class StreamingModeExecutionContext
   private Windmill.WorkItemCommitRequest.Builder outputBuilder;
   private UnboundedSource.UnboundedReader<?> activeReader;
 
-  public StreamingModeExecutionContext(
-      String stageName,
+  public StreamingModeExecutionContext(String stageName,
       ConcurrentMap<ByteString, ReaderCacheEntry> readerCache,
-      ConcurrentMap<String, String> stateNameMap) {
+      ConcurrentMap<String, String> stateNameMap, WindmillStateCache.ForComputation stateCache) {
     this.stageName = stageName;
     this.sideInputCache = new HashMap<>();
     this.readerCache = readerCache;
     this.stateNameMap = stateNameMap;
+    this.stateCache = stateCache;
   }
 
   public void start(
@@ -381,9 +382,10 @@ private Windmill.Timer.Type timerType(TimeDomain domain) {
   }
 
   class StepContext extends BaseExecutionContext.StepContext {
+    private static final String DEFAULT_STATE_FAMILY = "";
+
     private WindmillStateInternals stateInternals;
     private WindmillTimerInternals timerInternals;
-    private final String prefix;
     private final String stateFamily;
     private final Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
 
@@ -391,14 +393,9 @@ public StepContext(
         final String stepName, String transformName, final StateSampler stateSampler) {
       super(StreamingModeExecutionContext.this, stepName, transformName);
 
-      if (stateNameMap.isEmpty()) {
-        this.prefix = transformName;
-        this.stateFamily = "";
-      } else {
-        String mappedName = stateNameMap.get(transformName);
-        this.prefix = mappedName == null ? "" : mappedName;
-        this.stateFamily = prefix;
-      }
+      String mappedName = stateNameMap.get(transformName);
+      this.stateFamily = mappedName == null ? DEFAULT_STATE_FAMILY : mappedName;
+
       this.scopedReadStateSupplier = new Supplier<StateSampler.ScopedState>() {
         private int readState = -1;  // Uninitialized value.
 
@@ -421,9 +418,9 @@ public StateSampler.ScopedState get() {
     public void start(
         WindmillStateReader stateReader, @Nullable Instant inputDataWatermark,
         @Nullable Instant outputDataWatermark) {
-      boolean useStateFamilies = !stateNameMap.isEmpty();
-      this.stateInternals = new WindmillStateInternals(
-          prefix, useStateFamilies, stateReader, scopedReadStateSupplier);
+      this.stateInternals = new WindmillStateInternals(stateFamily, stateReader,
+          stateCache.forKey(getSerializedKey(), stateFamily, getWork().getCacheToken()),
+          scopedReadStateSupplier);
       this.timerInternals =
           new WindmillTimerInternals(stateFamily, inputDataWatermark, outputDataWatermark);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SizedWeigher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/Weighers.java
similarity index 61%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SizedWeigher.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/Weighers.java
index 9b4b246beac22..5bafca766e2c8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SizedWeigher.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/Weighers.java
@@ -16,8 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import com.google.cloud.dataflow.sdk.util.Sized;
-import com.google.common.base.Preconditions;
+import com.google.cloud.dataflow.sdk.util.Weighted;
 import com.google.common.cache.Weigher;
 
 /**
@@ -28,23 +27,22 @@
  * <p>Package-private here so that the dependency on Guava does not leak into the public API
  * surface.
  */
-class SizedWeigher<K, V> implements Weigher<K, Sized<V>>{
-
-  public static <K, V> SizedWeigher<K, V> withBaseWeight(int baseWeight) {
-    return new SizedWeigher<>(baseWeight);
-  }
-
-  private final int baseWeight;
-
-  private SizedWeigher(int baseWeight) {
-    Preconditions.checkArgument(
-        baseWeight > 0,
-        "base weight for SizedWeigher must be positive");
-    this.baseWeight = baseWeight;
+class Weighers {
+  public static Weigher<Object, Weighted> fixedWeightKeys(final int keyWeight) {
+    return new Weigher<Object, Weighted>() {
+      @Override
+      public int weigh(Object key, Weighted value) {
+        return (int) Math.min(keyWeight + value.getWeight(), Integer.MAX_VALUE);
+      }
+    };
   }
 
-  @Override
-  public int weigh(K key, Sized<V> value) {
-    return baseWeight + (int) Math.min(value.getSize(), Integer.MAX_VALUE);
+  public static Weigher<Weighted, Weighted> weightedKeysAndValues() {
+    return new Weigher<Weighted, Weighted>() {
+      @Override
+      public int weigh(Weighted key, Weighted value) {
+        return (int) Math.min(key.getWeight() + value.getWeight(), Integer.MAX_VALUE);
+      }
+    };
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java
new file mode 100644
index 0000000000000..5eaefa2445be5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java
@@ -0,0 +1,292 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.util.Weighted;
+import com.google.cloud.dataflow.sdk.util.state.State;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.RemovalCause;
+import com.google.common.cache.RemovalListener;
+import com.google.common.cache.RemovalNotification;
+import com.google.common.cache.Weigher;
+import com.google.protobuf.ByteString;
+
+import java.io.PrintWriter;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * Process-wide cache of per-key state.
+ */
+public class WindmillStateCache {
+
+  private Cache<StateId, StateCacheEntry> stateCache;
+  private int weight = 0;
+
+  private static class CacheWeigher implements Weigher<Weighted, Weighted> {
+    @Override
+    public int weigh(Weighted key, Weighted value) {
+      return (int) Math.max(key.getWeight() + value.getWeight(), Integer.MAX_VALUE);
+    }
+  }
+
+  public WindmillStateCache() {
+    final Weigher<Weighted, Weighted> weigher = Weighers.weightedKeysAndValues();
+
+    stateCache =
+        CacheBuilder.newBuilder()
+        .maximumWeight(100000000 /* 100 MB */)
+        .recordStats()
+        .weigher(weigher)
+        .removalListener(new RemovalListener<StateId, StateCacheEntry>() {
+              @Override
+              public void onRemoval(RemovalNotification<StateId, StateCacheEntry> removal) {
+                if (removal.getCause() != RemovalCause.REPLACED) {
+                  weight -= weigher.weigh(removal.getKey(), removal.getValue());
+                }
+              }
+            })
+        .build();
+  }
+
+  public long getWeight() {
+    return weight;
+  }
+
+  /**
+   * Per-computation view of the state cache.
+   */
+  public class ForComputation {
+    private final String computation;
+    private ForComputation(String computation) {
+      this.computation = computation;
+    }
+
+    /**
+     * Returns a per-computation, per-key view of the state cache.
+     */
+    public ForKey forKey(ByteString key, String stateFamily, long cacheToken) {
+      return new ForKey(computation, key, stateFamily, cacheToken);
+    }
+  }
+
+  /**
+   * Per-computation, per-key view of the state cache.
+   */
+  public class ForKey {
+    private final String computation;
+    private final ByteString key;
+    private final String stateFamily;
+    private final long cacheToken;
+
+    private ForKey(String computation, ByteString key, String stateFamily, long cacheToken) {
+      this.computation = computation;
+      this.key = key;
+      this.stateFamily = stateFamily;
+      this.cacheToken = cacheToken;
+    }
+
+    public <T extends State> T get(StateNamespace namespace, StateTag<T> address) {
+      return WindmillStateCache.this.get(
+          computation, key, stateFamily, cacheToken, namespace, address);
+    }
+
+    public <T extends State> void put(
+        StateNamespace namespace, StateTag<T> address, T value, long weight) {
+      WindmillStateCache.this.put(
+          computation, key, stateFamily, cacheToken, namespace, address, value, weight);
+    }
+  }
+
+  /**
+   * Returns a per-computation view of the state cache.
+   */
+  public ForComputation forComputation(String computation) {
+    return new ForComputation(computation);
+  }
+
+  private <T extends State> T get(String computation, ByteString processingKey, String stateFamily,
+      long token, StateNamespace namespace, StateTag<T> address) {
+    StateId id = new StateId(computation, processingKey, stateFamily, namespace);
+    StateCacheEntry entry = stateCache.getIfPresent(id);
+    if (entry == null) {
+      return null;
+    }
+    if (entry.getToken() != token) {
+      stateCache.invalidate(id);
+      return null;
+    }
+    return entry.get(namespace, address);
+  }
+
+  private <T extends State> void put(String computation, ByteString processingKey,
+      String stateFamily, long token, StateNamespace namespace, StateTag<T> address, T value,
+      long weight) {
+    StateId id = new StateId(computation, processingKey, stateFamily, namespace);
+    StateCacheEntry entry = stateCache.getIfPresent(id);
+    if (entry == null || entry.getToken() != token) {
+      entry = new StateCacheEntry(token);
+      this.weight += id.getWeight();
+    }
+    this.weight += entry.put(namespace, address, value, weight);
+    // Always add back to the cache to update the weight.
+    stateCache.put(id, entry);
+  }
+
+  /**
+   * Struct identifying a cache entry that contains all data for a key and namespace.
+   */
+  private static class StateId implements Weighted {
+    public final String computation;
+    public final ByteString processingKey;
+    public final String stateFamily;
+    public final Object namespaceKey;
+
+    public StateId(String computation, ByteString processingKey, String stateFamily,
+        StateNamespace namespace) {
+      this.computation = computation;
+      this.processingKey = processingKey;
+      this.stateFamily = stateFamily;
+      this.namespaceKey = namespace.getCacheKey();
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (other instanceof StateId) {
+        StateId otherId = (StateId) other;
+        return computation.equals(otherId.computation)
+            && processingKey.equals(otherId.processingKey)
+            && stateFamily.equals(otherId.stateFamily)
+            && namespaceKey.equals(otherId.namespaceKey);
+      }
+      return false;
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(computation, processingKey, namespaceKey);
+    }
+
+    @Override
+    public long getWeight() {
+      return processingKey.size();
+    }
+  }
+
+  /**
+   * Entry in the state cache that stores a map of values and a token representing the
+   * validity of the values.
+   */
+  private static class StateCacheEntry implements Weighted {
+    private final long token;
+    private final Map<NamespacedTag<?>, WeightedValue<?>> values;
+    private long weight;
+
+    public StateCacheEntry(long token) {
+      this.values = new HashMap<>();
+      this.token = token;
+      this.weight = 0;
+    }
+
+    @SuppressWarnings("unchecked")
+    public <T extends State> T get(StateNamespace namespace, StateTag<T> tag) {
+      WeightedValue<T> weightedValue =
+          (WeightedValue<T>) values.get(new NamespacedTag(namespace, tag));
+      return weightedValue == null ? null : weightedValue.value;
+    }
+
+    public <T extends State> long put(
+        StateNamespace namespace, StateTag<T> tag, T value, long weight) {
+      WeightedValue<T> weightedValue =
+          (WeightedValue<T>) values.get(new NamespacedTag(namespace, tag));
+      long weightDelta = 0;
+      if (weightedValue == null) {
+        weightedValue = new WeightedValue<T>();
+      } else {
+        weightDelta -= weightedValue.weight;
+      }
+      weightedValue.value = value;
+      weightedValue.weight = weight;
+      weightDelta += weight;
+      this.weight += weightDelta;
+      values.put(new NamespacedTag(namespace, tag), weightedValue);
+      return weightDelta;
+    }
+
+    @Override
+    public long getWeight() {
+      return weight;
+    }
+
+    public long getToken() {
+      return token;
+    }
+
+    private static class NamespacedTag<T extends State> {
+      private final StateNamespace namespace;
+      private final StateTag<T> tag;
+      NamespacedTag(StateNamespace namespace, StateTag<T> tag) {
+        this.namespace = namespace;
+        this.tag = tag;
+      }
+
+      @Override
+      public boolean equals(Object other) {
+        if (!(other instanceof NamespacedTag)) {
+          return false;
+        }
+        NamespacedTag<?> that = (NamespacedTag<?>) other;
+        return namespace.equals(that.namespace) && tag.equals(that.tag);
+      }
+
+      @Override
+      public int hashCode() {
+        return Objects.hash(namespace, tag);
+      }
+    }
+
+    private static class WeightedValue<T> {
+      public long weight = 0;
+      public T value = null;
+    }
+  }
+
+  /**
+   * Print summary statistics of the cache to the given {@link PrintWriter}.
+   */
+  public void printSummaryHtml(PrintWriter response) {
+    response.println("Cache Stats: <br><table border=0>");
+    response.println(
+        "<tr><th>Hit Ratio</th><th>Evictions</th><th>Size</th><th>Weight</th></tr><tr>");
+    response.println("<th>" + stateCache.stats().hitRate() + "</th>");
+    response.println("<th>" + stateCache.stats().evictionCount() + "</th>");
+    response.println("<th>" + stateCache.size() + "</th>");
+    response.println("<th>" + getWeight() + "</th>");
+    response.println("</tr></table><br>");
+  }
+
+  /**
+   * Print detailed information about the cache to the given {@link PrintWriter}.
+   */
+  public void printDetailedHtml(PrintWriter response) {
+    response.println("<h1>Cache Information</h1>");
+    printSummaryHtml(response);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
index 91dc2236c5622..87eaa1f58f6bb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
+import com.google.cloud.dataflow.sdk.util.Weighted;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.state.BagState;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
@@ -32,128 +33,118 @@
 import com.google.cloud.dataflow.sdk.util.state.StateTable;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
+import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.cloud.dataflow.sdk.util.state.ValueState;
 import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Function;
+import com.google.common.base.Optional;
 import com.google.common.base.Supplier;
+import com.google.common.base.Throwables;
 import com.google.common.collect.Iterables;
+import com.google.common.collect.Iterators;
 import com.google.common.util.concurrent.Futures;
 import com.google.protobuf.ByteString;
 
 import org.joda.time.Instant;
 
 import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Random;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Future;
+import javax.annotation.concurrent.NotThreadSafe;
 
 /**
  * Implementation of {@link StateInternals} using Windmill to manage the underlying data.
  */
 class WindmillStateInternals extends MergingStateInternals {
+  private static class CachingStateTable extends StateTable {
+    private final String stateFamily;
+    private final WindmillStateReader reader;
+    private final WindmillStateCache.ForKey cache;
+    private final Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
 
-  private final StateTable inMemoryState =
-      new StateTable() {
-        @Override
-        protected StateBinder binderForNamespace(final StateNamespace namespace) {
-          return new StateBinder() {
-            @Override
-            public <T> BagState<T> bindBag(StateTag<BagState<T>> address, Coder<T> elemCoder) {
-              return new WindmillBag<>(encodeKey(namespace, address), stateFamily, elemCoder,
-                  reader, scopedReadStateSupplier);
-            }
-
-            @Override
-            public <T, W extends BoundedWindow> WatermarkStateInternal bindWatermark(
-                StateTag<WatermarkStateInternal> address,
-                OutputTimeFn<? super W> outputTimeFn) {
-              return new WindmillWatermarkState(
-                  encodeKey(namespace, address),
-                  stateFamily,
-                  reader,
-                  scopedReadStateSupplier,
-                  outputTimeFn);
-            }
-
-            @Override
-            public <InputT, AccumT, OutputT>
-                CombiningValueStateInternal<InputT, AccumT, OutputT> bindCombiningValue(
-                    StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
-                    Coder<AccumT> accumCoder,
-                    CombineFn<InputT, AccumT, OutputT> combineFn) {
-              return new WindmillCombiningValue<>(encodeKey(namespace, address), stateFamily,
-                  accumCoder, combineFn, reader, scopedReadStateSupplier);
-            }
+    public CachingStateTable(String stateFamily,
+        WindmillStateReader reader, WindmillStateCache.ForKey cache,
+        Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
+      this.stateFamily = stateFamily;
+      this.reader = reader;
+      this.cache = cache;
+      this.scopedReadStateSupplier = scopedReadStateSupplier;
+    }
 
-            @Override
-            public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> coder) {
-              return new WindmillValue<>(encodeKey(namespace, address), stateFamily, coder, reader,
-                  scopedReadStateSupplier);
-            }
-          };
+    @Override
+    protected StateBinder binderForNamespace(final StateNamespace namespace) {
+      // Look up state objects in the cache or create new ones if not found.  The state will
+      // be added to the cache in persist().
+      return new StateBinder() {
+        @Override
+        public <T> BagState<T> bindBag(StateTag<BagState<T>> address, Coder<T> elemCoder) {
+          WindmillBag<T> result = (WindmillBag<T>) cache.get(namespace, address);
+          if (result == null) {
+            result = new WindmillBag<T>(namespace, address, stateFamily, elemCoder);
+          }
+          result.initializeForWorkItem(reader, scopedReadStateSupplier);
+          return result;
         }
-      };
 
-  private final String prefix;
-  private final String stateFamily;
-  private final WindmillStateReader reader;
-  private final boolean useStateFamilies;
-  private final Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
-
-  @VisibleForTesting
-  static final ThreadLocal<Supplier<Boolean>> COMPACT_NOW =
-      new ThreadLocal<Supplier<Boolean>>() {
         @Override
-        public Supplier<Boolean> initialValue() {
-          return new Supplier<Boolean>() {
-            /* The rate at which, on average, this will return true. */
-            static final double RATE = 0.002;
-            Random random = new Random();
-            long counter = nextSample();
+        public <W extends BoundedWindow> WatermarkStateInternal bindWatermark(
+            StateTag<WatermarkStateInternal> address, OutputTimeFn<? super W> outputTimeFn) {
+          WindmillWatermarkState result = (WindmillWatermarkState) cache.get(namespace, address);
+          if (result == null) {
+            result = new WindmillWatermarkState(namespace, address, stateFamily, outputTimeFn);
+          }
+          result.initializeForWorkItem(reader, scopedReadStateSupplier);
+          return result;
+        }
 
-            private long nextSample() {
-              // Use geometric distribution to find next true value.
-              // This lets us avoid invoking random.nextDouble() on every call.
-              return (long) Math.floor(Math.log(random.nextDouble()) / Math.log(1 - RATE));
-            }
+        @Override
+        public <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
+        bindCombiningValue(StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
+            Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
+          WindmillCombiningValue<InputT, AccumT, OutputT> result = new WindmillCombiningValue<>(
+              namespace, address, stateFamily, accumCoder, combineFn, cache);
+          result.initializeForWorkItem(reader, scopedReadStateSupplier);
+          return result;
+        }
 
-            @Override
-            public Boolean get() {
-              counter--;
-              if (counter < 0) {
-                counter = nextSample();
-                return true;
-              } else {
-                return false;
-              }
-            }
-          };
+        @Override
+        public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> coder) {
+          WindmillValue<T> result = (WindmillValue<T>) cache.get(namespace, address);
+          if (result == null) {
+            result = new WindmillValue<T>(namespace, address, stateFamily, coder);
+          }
+          result.initializeForWorkItem(reader, scopedReadStateSupplier);
+          return result;
         }
       };
-
-  public WindmillStateInternals(String prefix, boolean useStateFamilies,
-      WindmillStateReader reader, Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
-    this.prefix = prefix;
-    if (useStateFamilies) {
-      this.stateFamily = prefix;
-    } else {
-      this.stateFamily = "";
     }
-    this.reader = reader;
-    this.useStateFamilies = useStateFamilies;
+  };
+
+  private WindmillStateCache.ForKey cache;
+  Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
+  private StateTable workItemState;
+
+  public WindmillStateInternals(String stateFamily, WindmillStateReader reader,
+      WindmillStateCache.ForKey cache, Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
+    this.cache = cache;
     this.scopedReadStateSupplier = scopedReadStateSupplier;
+    this.workItemState = new CachingStateTable(stateFamily, reader, cache, scopedReadStateSupplier);
   }
 
   public void persist(final Windmill.WorkItemCommitRequest.Builder commitBuilder) {
     List<Future<WorkItemCommitRequest>> commitsToMerge = new ArrayList<>();
 
     // Call persist on each first, which may schedule some futures for reading.
-    for (State location : inMemoryState.values()) {
+    for (State location : workItemState.values()) {
       if (!(location instanceof WindmillState)) {
         throw new IllegalStateException(String.format(
             "%s wasn't created by %s -- unable to persist it",
@@ -162,20 +153,16 @@ public void persist(final Windmill.WorkItemCommitRequest.Builder commitBuilder)
       }
 
       try {
-        commitsToMerge.add(((WindmillState) location).persist());
+        commitsToMerge.add(((WindmillState) location).persist(cache));
       } catch (IOException e) {
         throw new RuntimeException("Unable to persist state", e);
       }
     }
 
-    // Kick off the fetches that prevent blind-writes. We do this before returning
-    // to ensure that the reads have happened before the persist actually happens.
-    reader.startBatchAndBlock();
-
     // Clear out the map of already retrieved state instances.
-    inMemoryState.clear();
+    workItemState.clear();
 
-    try {
+    try (StateSampler.ScopedState scope = scopedReadStateSupplier.get()) {
       for (Future<WorkItemCommitRequest> commitFuture : commitsToMerge) {
         commitBuilder.mergeFrom(commitFuture.get());
       }
@@ -184,103 +171,123 @@ public void persist(final Windmill.WorkItemCommitRequest.Builder commitBuilder)
     }
   }
 
-  @VisibleForTesting ByteString encodeKey(StateNamespace namespace, StateTag<?> address) {
+  /**
+   * Encodes the given namespace and address as {@code &lt;namespace&gt;+&lt;address&gt;}.
+   */
+  @VisibleForTesting
+  static ByteString encodeKey(StateNamespace namespace, StateTag<?> address) {
     try {
-      // Use a StringBuilder rather than concatenation and String.format. We build these keys
+      // Use ByteString.Output rather than concatenation and String.format. We build these keys
       // a lot, and this leads to better performance results. See associated benchmarks.
-      StringBuilder output = new StringBuilder();
-
-      // We only need the prefix if we aren't using state families
-      if (!useStateFamilies) {
-        output.append(prefix);
-      }
+      ByteString.Output stream = ByteString.newOutput();
+      OutputStreamWriter writer = new OutputStreamWriter(stream, StandardCharsets.UTF_8);
 
-      // stringKey starts and ends with a slash. We don't need to seperate it from prefix, because
-      // the prefix is guaranteed to be unique and non-overlapping. We separate it from the
+      // stringKey starts and ends with a slash.  We separate it from the
       // StateTag ID by a '+' (which is guaranteed not to be in the stringKey) because the
       // ID comes from the user.
-      namespace.appendTo(output);
-      output.append('+');
-      address.appendTo(output);
-      return ByteString.copyFromUtf8(output.toString());
+      namespace.appendTo(writer);
+      writer.write('+');
+      address.appendTo(writer);
+      writer.flush();
+      return stream.toByteString();
     } catch (IOException e) {
-      throw new RuntimeException(
-          "Unable to encode state key for " + namespace + ", " + address, e);
+      throw Throwables.propagate(e);
     }
   }
 
   /**
-   * Anything that can provide a {@link WorkItemCommitRequest} to persist its state; it may need
-   * to read some state in order to build the commit request.
+   * Abstract base class for all Windmill state.
+   *
+   * <p>Note that these are not thread safe; each state object is associated with a key
+   * and thus only accessed by a single thread at once.
    */
-  private interface WindmillState {
+  @NotThreadSafe
+  private abstract static class WindmillState {
+    protected Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
+    protected WindmillStateReader reader;
+
     /**
      * Return an asynchronously computed {@link WorkItemCommitRequest}. The request should
      * be of a form that can be merged with others (only add to repeated fields).
      */
-    Future<WorkItemCommitRequest> persist()
+    abstract Future<WorkItemCommitRequest> persist(WindmillStateCache.ForKey cache)
         throws IOException;
+
+    void initializeForWorkItem(
+        WindmillStateReader reader, Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
+      this.reader = reader;
+      this.scopedReadStateSupplier = scopedReadStateSupplier;
+    }
+
+    StateSampler.ScopedState scopedReadState() {
+      return scopedReadStateSupplier.get();
+    }
   }
 
   /**
    * Base class for implementations of {@link WindmillState} where the {@link #persist} call does
    * not require any asynchronous reading.
    */
-  private abstract static class SimpleWindmillState implements WindmillState {
+  private abstract static class SimpleWindmillState extends WindmillState {
     @Override
-    public final Future<WorkItemCommitRequest> persist() throws IOException{
-      return Futures.immediateFuture(persistDirectly());
+    public final Future<WorkItemCommitRequest> persist(WindmillStateCache.ForKey cache)
+        throws IOException {
+      return Futures.immediateFuture(persistDirectly(cache));
     }
 
     /**
      * Returns a {@link WorkItemCommitRequest} that can be used to persist this state to
      * Windmill.
      */
-    protected abstract WorkItemCommitRequest persistDirectly() throws IOException;
+    protected abstract WorkItemCommitRequest persistDirectly(WindmillStateCache.ForKey cache)
+        throws IOException;
   }
 
   @Override
   public <T extends State> T state(StateNamespace namespace, StateTag<T> address) {
-    return inMemoryState.get(namespace, address);
+    return workItemState.get(namespace, address);
   }
 
-  private static class WindmillValue<T> extends SimpleWindmillState
-      implements ValueState<T>, WindmillState {
-
+  private static class WindmillValue<T> extends SimpleWindmillState implements ValueState<T> {
+    private final StateNamespace namespace;
+    private final StateTag<ValueState<T>> address;
     private final ByteString stateKey;
     private final String stateFamily;
     private final Coder<T> coder;
-    private final WindmillStateReader reader;
-    private final Supplier<StateSampler.ScopedState> readStateSupplier;
 
     /** Whether we've modified the value since creation of this state. */
     private boolean modified = false;
-    private T modifiedValue;
-
-    private WindmillValue(ByteString stateKey, String stateFamily, Coder<T> coder,
-        WindmillStateReader reader, Supplier<StateSampler.ScopedState> readStateSupplier) {
-      this.stateKey = stateKey;
+    /** Whether the in memory value is the true value. */
+    private boolean valueIsKnown = false;
+    private T value;
+
+    private WindmillValue(StateNamespace namespace, StateTag<ValueState<T>> address,
+        String stateFamily, Coder<T> coder) {
+      this.namespace = namespace;
+      this.address = address;
+      this.stateKey = encodeKey(namespace, address);
       this.stateFamily = stateFamily;
       this.coder = coder;
-      this.reader = reader;
-      this.readStateSupplier = readStateSupplier;
     }
 
     @Override
     public void clear() {
       modified = true;
-      modifiedValue = null;
+      valueIsKnown = true;
+      value = null;
     }
 
     @Override
     public StateContents<T> get() {
-      final Future<T> future = modified ? null : reader.valueFuture(stateKey, stateFamily, coder);
+      final Future<T> future = valueIsKnown ? Futures.immediateFuture(value)
+                                            : reader.valueFuture(stateKey, stateFamily, coder);
 
       return new StateContents<T>() {
         @Override
         public T read() {
-          try (StateSampler.ScopedState scope = readStateSupplier.get()) {
-            return modified ? modifiedValue : future.get();
+          try (StateSampler.ScopedState scope = scopedReadState()) {
+            valueIsKnown = true;
+            return future.get();
           } catch (InterruptedException | ExecutionException e) {
             throw new RuntimeException("Unable to read value from state", e);
           }
@@ -291,63 +298,96 @@ public T read() {
     @Override
     public void set(T value) {
       modified = true;
-      modifiedValue = value;
+      valueIsKnown = true;
+      this.value = value;
     }
 
     @Override
-    protected WorkItemCommitRequest persistDirectly() throws IOException {
+    protected WorkItemCommitRequest persistDirectly(WindmillStateCache.ForKey cache)
+        throws IOException {
       if (!modified) {
         // No in-memory changes.
         return WorkItemCommitRequest.newBuilder().buildPartial();
       }
 
-      // We can't write without doing a read, so we need to kick off a read if we get here.
-      // Call reader.valueFuture directly, since our read() method will avoid actually reading from
-      // Windmill since the value is already inMemory.
-      reader.valueFuture(stateKey, stateFamily, coder);
-
       ByteString.Output stream = ByteString.newOutput();
-      if (modifiedValue != null) {
-        coder.encode(modifiedValue, stream, Coder.Context.OUTER);
+      if (value != null) {
+        coder.encode(value, stream, Coder.Context.OUTER);
       }
+      ByteString encoded = stream.toByteString();
 
       WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
+      // Update the entry of the cache with the new value and change in encoded size.
+      cache.put(namespace, address, this, encoded.size());
+
+      modified = false;
+
       commitBuilder
           .addValueUpdatesBuilder()
           .setTag(stateKey)
           .setStateFamily(stateFamily)
           .getValueBuilder()
-          .setData(stream.toByteString())
+          .setData(encoded)
           .setTimestamp(Long.MAX_VALUE);
+
       return commitBuilder.buildPartial();
     }
   }
 
-  private static class WindmillBag<T> extends SimpleWindmillState
-      implements BagState<T>, WindmillState {
+  private static class WindmillBag<T> extends SimpleWindmillState implements BagState<T> {
 
+    private final StateNamespace namespace;
+    private final StateTag<BagState<T>> address;
     private final ByteString stateKey;
     private final String stateFamily;
     private final Coder<T> elemCoder;
-    private final WindmillStateReader reader;
-    private final Supplier<StateSampler.ScopedState> readStateSupplier;
-
-    private boolean cleared = false;
-    private final List<T> localAdditions = new ArrayList<>();
 
-    private WindmillBag(ByteString stateKey, String stateFamily, Coder<T> elemCoder,
-        WindmillStateReader reader, Supplier<StateSampler.ScopedState> readStateSupplier) {
-      this.stateKey = stateKey;
+    private boolean cleared;
+    // Cache of all values in this bag. Null if the persisted state is unknown.
+    private ConcatIterables<T> cachedValues = null;
+    private List<T> localAdditions = new ArrayList<>();
+    private long encodedSize = 0;
+
+    private WindmillBag(StateNamespace namespace, StateTag<BagState<T>> address, String stateFamily,
+        Coder<T> elemCoder) {
+      this.namespace = namespace;
+      this.address = address;
+      this.stateKey = encodeKey(namespace, address);
       this.stateFamily = stateFamily;
       this.elemCoder = elemCoder;
-      this.reader = reader;
-      this.readStateSupplier = readStateSupplier;
     }
 
     @Override
     public void clear() {
       cleared = true;
+      cachedValues = new ConcatIterables<T>();
       localAdditions.clear();
+      encodedSize = 0;
+    }
+
+    private Iterable<T> fetchData(Future<Iterable<T>> persistedData) {
+      try (StateSampler.ScopedState scope = scopedReadState()) {
+        if (cachedValues != null) {
+          return cachedValues;
+        }
+        Iterable<T> data = persistedData.get();
+        if (data instanceof Weighted) {
+          // We have a known bounded amount of data; cache it.
+          cachedValues = new ConcatIterables<T>();
+          cachedValues.extendWith(data);
+          encodedSize = ((Weighted) data).getWeight();
+          return cachedValues;
+        } else {
+          // This is an iterable that may not fit in memory at once; don't cache it.
+          return data;
+        }
+      } catch (InterruptedException | ExecutionException e) {
+        throw new RuntimeException("Unable to read state", e);
+      }
+    }
+
+    public boolean valuesAreCached() {
+      return cachedValues != null;
     }
 
     @Override
@@ -355,21 +395,14 @@ public StateContents<Iterable<T>> get() {
       // If we clear after calling get() but before calling read(), technically we didn't need the
       // underlying windmill read. But, we need to register the desire now if we aren't going to
       // clear (in order to get it added to the prefetch).
-      final Future<Iterable<T>> persistedData = cleared
-          ? Futures.<Iterable<T>>immediateFuture(Collections.<T>emptyList())
+      final Future<Iterable<T>> persistedData = (cachedValues != null)
+          ? null
           : reader.listFuture(stateKey, stateFamily, elemCoder);
 
       return new StateContents<Iterable<T>>() {
         @Override
         public Iterable<T> read() {
-          try (StateSampler.ScopedState scope = readStateSupplier.get()) {
-            // We need to check cleared again, because it may have become clear in between creating
-            // the future and calling read.
-            Iterable<T> input = cleared ? Collections.<T>emptyList() : persistedData.get();
-            return Iterables.concat(input, localAdditions);
-          } catch (InterruptedException | ExecutionException e) {
-            throw new RuntimeException("Unable to read state", e);
-          }
+          return Iterables.concat(fetchData(persistedData), localAdditions);
         }
       };
     }
@@ -379,21 +412,14 @@ public StateContents<Boolean> isEmpty() {
       // If we clear after calling isEmpty() but before calling read(), technically we didn't need
       // the underlying windmill read. But, we need to register the desire now if we aren't going to
       // clear (in order to get it added to the prefetch).
-      final Future<Iterable<T>> persistedData = cleared
-          ? Futures.<Iterable<T>>immediateFuture(Collections.<T>emptyList())
+      final Future<Iterable<T>> persistedData = (cachedValues != null)
+          ? null
           : reader.listFuture(stateKey, stateFamily, elemCoder);
 
       return new StateContents<Boolean>() {
         @Override
         public Boolean read() {
-          try (StateSampler.ScopedState scope = readStateSupplier.get()) {
-            // We need to check cleared again, because it may have become clear in between creating
-            // the future and calling read.
-            Iterable<T> input = cleared ? Collections.<T>emptyList() : persistedData.get();
-            return Iterables.isEmpty(input) && Iterables.isEmpty(localAdditions);
-          } catch (InterruptedException | ExecutionException e) {
-            throw new RuntimeException("Unable to read state", e);
-          }
+          return Iterables.isEmpty(fetchData(persistedData)) && localAdditions.isEmpty();
         }
       };
     }
@@ -404,14 +430,11 @@ public void add(T input) {
     }
 
     @Override
-    public WorkItemCommitRequest persistDirectly() throws IOException {
+    public WorkItemCommitRequest persistDirectly(WindmillStateCache.ForKey cache)
+        throws IOException {
       WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
 
       if (cleared) {
-        // If we do a delete, we need to have done a read to prevent Windmill complaining about
-        // blind deletes. We use the underlying reader, because we normally skip the actual read
-        // if we've already cleared the state.
-        reader.listFuture(stateKey, stateFamily, elemCoder);
         commitBuilder.addListUpdatesBuilder()
             .setTag(stateKey)
             .setStateFamily(stateFamily)
@@ -430,43 +453,88 @@ public WorkItemCommitRequest persistDirectly() throws IOException {
 
           // Encode the value
           elemCoder.encode(value, stream, Coder.Context.OUTER);
+          ByteString encoded = stream.toByteString();
+          if (cachedValues != null) {
+            encodedSize += encoded.size() - 1;
+          }
 
           listUpdatesBuilder.addValuesBuilder()
-              .setData(stream.toByteString())
+              .setData(encoded)
               .setTimestamp(Long.MAX_VALUE);
         }
       }
+
+      if (cachedValues != null) {
+        cachedValues.extendWith(localAdditions);
+        // Don't reuse the localAdditions object; we don't want future changes to it to modify the
+        // value of cachedValues.
+        localAdditions = new ArrayList<T>();
+        cache.put(namespace, address, this, encodedSize);
+      } else {
+        localAdditions.clear();
+      }
+      cleared = false;
+
       return commitBuilder.buildPartial();
     }
   }
 
-  private static class WindmillWatermarkState implements WatermarkStateInternal, WindmillState {
+  private static class ConcatIterables<T> implements Iterable<T> {
+    List<Iterable<T>> iterables;
+
+    public ConcatIterables() {
+      this.iterables = new ArrayList<>();
+    }
+
+    public void extendWith(Iterable<T> iterable) {
+      iterables.add(iterable);
+    }
+
+    @Override
+    public Iterator<T> iterator() {
+      return Iterators.concat(
+          Iterables.transform(
+                  iterables,
+                  new Function<Iterable<T>, Iterator<T>>() {
+                    @Override
+                    public Iterator<T> apply(Iterable<T> iterable) {
+                      return iterable.iterator();
+                    }
+                  })
+              .iterator());
+    }
+  }
+
+  private static class WindmillWatermarkState
+      extends WindmillState implements WatermarkStateInternal {
+    // The encoded size of an Instant.
+    private static final int ENCODED_SIZE = 8;
 
     private final OutputTimeFn<?> outputTimeFn;
+    private final StateNamespace namespace;
+    private final StateTag<WatermarkStateInternal> address;
     private final ByteString stateKey;
     private final String stateFamily;
-    private final WindmillStateReader reader;
-    private final Supplier<StateSampler.ScopedState> readStateSupplier;
 
     private boolean cleared = false;
+    // The hold value, Optional.absent() if no hold, or null if unknown.
+    private Optional<Instant> cachedValue = null;
     private Instant localAdditions = null;
 
-    private WindmillWatermarkState(
-        ByteString stateKey,
-        String stateFamily,
-        WindmillStateReader reader,
-        Supplier<StateSampler.ScopedState> readStateSupplier,
+    private WindmillWatermarkState(StateNamespace namespace,
+        StateTag<WatermarkStateInternal> address, String stateFamily,
         OutputTimeFn<?> outputTimeFn) {
-      this.stateKey = stateKey;
+      this.namespace = namespace;
+      this.address = address;
+      this.stateKey = encodeKey(namespace, address);
       this.stateFamily = stateFamily;
-      this.reader = reader;
-      this.readStateSupplier = readStateSupplier;
       this.outputTimeFn = outputTimeFn;
     }
 
     @Override
     public void clear() {
       cleared = true;
+      cachedValue = Optional.<Instant>absent();
       localAdditions = null;
     }
 
@@ -484,46 +552,38 @@ public StateContents<Instant> get() {
       // If we clear after calling get() but before calling read(), technically we didn't need the
       // underlying windmill read. But, we need to register the desire now if we aren't going to
       // clear (in order to get it added to the prefetch).
-      final Future<Instant> persistedData = cleared
-          ? Futures.<Instant>immediateFuture(null)
+      final Future<Instant> persistedData = (cachedValue != null)
+          ? Futures.immediateFuture(cachedValue.orNull())
           : reader.watermarkFuture(stateKey, stateFamily);
 
       return new StateContents<Instant>() {
         @Override
         public Instant read() {
-          Instant value = localAdditions;
-          if (!cleared) {
-            try (StateSampler.ScopedState scope = readStateSupplier.get()) {
-              Instant persisted = persistedData.get();
-              value = (value == null) ? persisted : outputTimeFn.combine(value, persisted);
-            } catch (InterruptedException | ExecutionException e) {
-              throw new RuntimeException("Unable to read state", e);
+          try (StateSampler.ScopedState scope = scopedReadState()) {
+            Instant persistedHold = persistedData.get();
+            if (persistedHold == null || persistedHold.equals(BoundedWindow.TIMESTAMP_MAX_VALUE)) {
+              cachedValue = Optional.absent();
+            } else {
+              cachedValue = Optional.of(persistedHold);
             }
+          } catch (InterruptedException | ExecutionException e) {
+            throw new RuntimeException("Unable to read state", e);
+          }
+
+          if (localAdditions == null) {
+            return cachedValue.orNull();
+          } else if (!cachedValue.isPresent()) {
+            return localAdditions;
+          } else {
+            return outputTimeFn.combine(localAdditions, cachedValue.get());
           }
-          return value;
         }
       };
     }
 
     @Override
     public StateContents<Boolean> isEmpty() {
-      // If we clear after calling get() but before calling read(), technically we didn't need the
-      // underlying windmill read. But, we need to register the desire now if we aren't going to
-      // clear (in order to get it added to the prefetch).
-      final Future<Instant> persistedData = cleared
-          ? Futures.<Instant>immediateFuture(null)
-          : reader.watermarkFuture(stateKey, stateFamily);
-
-      return new StateContents<Boolean>() {
-        @Override
-        public Boolean read() {
-          try (StateSampler.ScopedState scope = readStateSupplier.get()) {
-            return localAdditions == null && (cleared || persistedData.get() == null);
-          } catch (InterruptedException | ExecutionException e) {
-            throw new RuntimeException("Unable to read state", e);
-          }
-        }
-      };
+      throw new UnsupportedOperationException();
     }
 
     @Override
@@ -533,7 +593,9 @@ public void add(Instant outputTime) {
     }
 
     @Override
-    public Future<WorkItemCommitRequest> persist() {
+    public Future<WorkItemCommitRequest> persist(final WindmillStateCache.ForKey cache) {
+      Future<WorkItemCommitRequest> result;
+
       if (!cleared && localAdditions == null) {
         // Nothing to do
         return Futures.immediateFuture(WorkItemCommitRequest.newBuilder().buildPartial());
@@ -544,7 +606,8 @@ public Future<WorkItemCommitRequest> persist() {
             .setTag(stateKey)
             .setStateFamily(stateFamily)
             .setReset(true);
-        return Futures.immediateFuture(commitBuilder.buildPartial());
+
+        result = Futures.immediateFuture(commitBuilder.buildPartial());
       } else if (cleared && localAdditions != null) {
         // Since we cleared before adding, we can do a blind overwrite of persisted state
         WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
@@ -553,13 +616,30 @@ public Future<WorkItemCommitRequest> persist() {
             .setStateFamily(stateFamily)
             .setReset(true)
             .addTimestamps(WindmillTimeUtils.harnessToWindmillTimestamp(localAdditions));
-        return Futures.immediateFuture(commitBuilder.buildPartial());
-      } else if (!cleared && localAdditions != null){
+
+        cachedValue = Optional.of(localAdditions);
+
+        result = Futures.immediateFuture(commitBuilder.buildPartial());
+      } else if (!cleared && localAdditions != null) {
         // Otherwise, we need to combine the local additions with the already persisted data
-        return combineWithPersisted();
+        result = combineWithPersisted();
       } else {
         throw new IllegalStateException("Unreachable condition");
       }
+
+      return Futures.lazyTransform(
+          result, new Function<WorkItemCommitRequest, WorkItemCommitRequest>() {
+            @Override
+            public WorkItemCommitRequest apply(WorkItemCommitRequest result) {
+              cleared = false;
+              localAdditions = null;
+              if (cachedValue != null) {
+                cache.put(
+                    namespace, address, WindmillWatermarkState.this, ENCODED_SIZE);
+              }
+              return result;
+            }
+          });
     }
 
     /**
@@ -589,35 +669,42 @@ private Future<WorkItemCommitRequest> combineWithPersisted() {
             .setStateFamily(stateFamily)
             .addTimestamps(
                 WindmillTimeUtils.harnessToWindmillTimestamp(localAdditions));
-        return Futures.immediateFuture(commitBuilder.buildPartial());
+
+        if (cachedValue != null) {
+          cachedValue = Optional.of(cachedValue.isPresent()
+              ? outputTimeFn.combine(cachedValue.get(), localAdditions)
+              : localAdditions);
+        }
+
+         return Futures.immediateFuture(commitBuilder.buildPartial());
       } else {
         // The non-fast path does a read-modify-write
-        return Futures.lazyTransform(reader.watermarkFuture(stateKey, stateFamily),
+        return Futures.lazyTransform((cachedValue != null)
+                ? Futures.immediateFuture(cachedValue.orNull())
+                : reader.watermarkFuture(stateKey, stateFamily),
             new Function<Instant, WorkItemCommitRequest>() {
-
-          @Override
-          public WorkItemCommitRequest apply(Instant priorHold) {
-
-            Instant combinedHold = (priorHold == null) ? localAdditions
-                : outputTimeFn.combine(priorHold, localAdditions);
-
-            WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
-            commitBuilder.addWatermarkHoldsBuilder()
-                .setTag(stateKey)
-                .setStateFamily(stateFamily)
-                .setReset(true)
-                .addTimestamps(WindmillTimeUtils.harnessToWindmillTimestamp(combinedHold));
-
-            return commitBuilder.buildPartial();
-          }
-        });
+              @Override
+              public WorkItemCommitRequest apply(Instant priorHold) {
+                cachedValue = Optional.of((priorHold != null)
+                        ? outputTimeFn.combine(priorHold, localAdditions)
+                        : localAdditions);
+
+                WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
+                commitBuilder.addWatermarkHoldsBuilder()
+                    .setTag(stateKey)
+                    .setStateFamily(stateFamily)
+                    .setReset(true)
+                    .addTimestamps(WindmillTimeUtils.harnessToWindmillTimestamp(cachedValue.get()));
+
+                return commitBuilder.buildPartial();
+              }
+            });
       }
     }
   }
 
   private static class WindmillCombiningValue<InputT, AccumT, OutputT>
-      implements CombiningValueStateInternal<InputT, AccumT, OutputT>, WindmillState {
-
+      extends WindmillState implements CombiningValueStateInternal<InputT, AccumT, OutputT> {
     private final WindmillBag<AccumT> bag;
     private final CombineFn<InputT, AccumT, OutputT> combineFn;
 
@@ -628,15 +715,28 @@ private static class WindmillCombiningValue<InputT, AccumT, OutputT>
     private AccumT localAdditionsAccum;
     private boolean hasLocalAdditions = false;
 
-    private WindmillCombiningValue(ByteString stateKey, String stateFamily,
-        Coder<AccumT> accumCoder,
-        CombineFn<InputT, AccumT, OutputT> combineFn,
-        WindmillStateReader reader, Supplier<StateSampler.ScopedState> readStateSupplier) {
-      this.bag = new WindmillBag<>(stateKey, stateFamily, accumCoder, reader, readStateSupplier);
+    private WindmillCombiningValue(StateNamespace namespace,
+        StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address, String stateFamily,
+        Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn,
+        WindmillStateCache.ForKey cache) {
+      StateTag<BagState<AccumT>> internalBagAddress = StateTags.convertToBagTagInternal(address);
+      WindmillBag<AccumT> cachedBag =
+          (WindmillBag<AccumT>) cache.get(namespace, internalBagAddress);
+      this.bag =
+          (cachedBag != null)
+              ? cachedBag
+              : new WindmillBag<>(namespace, internalBagAddress, stateFamily, accumCoder);
       this.combineFn = combineFn;
       this.localAdditionsAccum = combineFn.createAccumulator();
     }
 
+    @Override
+    void initializeForWorkItem(
+        WindmillStateReader reader, Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
+      super.initializeForWorkItem(reader, scopedReadStateSupplier);
+      this.bag.initializeForWorkItem(reader, scopedReadStateSupplier);
+    }
+
     @Override
     public StateContents<OutputT> get() {
       final StateContents<AccumT> accum = getAccum();
@@ -662,10 +762,10 @@ public void clear() {
     }
 
     @Override
-    public Future<WorkItemCommitRequest> persist() throws IOException {
+    public Future<WorkItemCommitRequest> persist(WindmillStateCache.ForKey cache)
+        throws IOException {
       if (hasLocalAdditions) {
-        // TODO: Take into account whether it's in the cache.
-        if (COMPACT_NOW.get().get()) {
+        if (COMPACT_NOW.get().get() || bag.valuesAreCached()) {
           // Implicitly clears the bag and combines local and persisted accumulators.
           localAdditionsAccum = getAccum().read();
         }
@@ -673,7 +773,8 @@ public Future<WorkItemCommitRequest> persist() throws IOException {
         localAdditionsAccum = combineFn.createAccumulator();
         hasLocalAdditions = false;
       }
-      return bag.persist();
+
+      return bag.persist(cache);
     }
 
     @Override
@@ -715,4 +816,33 @@ public void addAccum(AccumT accum) {
       localAdditionsAccum = combineFn.mergeAccumulators(Arrays.asList(localAdditionsAccum, accum));
     }
   }
+
+  @VisibleForTesting
+  static final ThreadLocal<Supplier<Boolean>> COMPACT_NOW =
+      new ThreadLocal<Supplier<Boolean>>() {
+        public Supplier<Boolean> initialValue() {
+          return new Supplier<Boolean>() {
+            /* The rate at which, on average, this will return true. */
+            static final double RATE = 0.002;
+            Random random = new Random();
+            long counter = nextSample();
+
+            private long nextSample() {
+              // Use geometric distribution to find next true value.
+              // This lets us avoid invoking random.nextDouble() on every call.
+              return (long) Math.floor(Math.log(random.nextDouble()) / Math.log(1 - RATE));
+            }
+
+            public Boolean get() {
+              counter--;
+              if (counter < 0) {
+                counter = nextSample();
+                return true;
+              } else {
+                return false;
+              }
+            }
+          };
+        }
+      };
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
index fb0421a703757..af7a64e4784d9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
@@ -19,11 +19,13 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagList;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagValue;
+import com.google.cloud.dataflow.sdk.util.Weighted;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Function;
 import com.google.common.base.Objects;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.AbstractIterator;
+import com.google.common.collect.ForwardingList;
 import com.google.common.util.concurrent.ForwardingFuture;
 import com.google.common.util.concurrent.Futures;
 import com.google.common.util.concurrent.SettableFuture;
@@ -474,28 +476,60 @@ private void consumeResponse(Windmill.GetDataRequest request,
     }
   }
 
+  @VisibleForTesting
+  static class WeightedList<T> extends ForwardingList<T> implements Weighted {
+    private List<T> delegate;
+    long weight;
+
+    WeightedList(List<T> delegate) {
+      this.delegate = delegate;
+      this.weight = 0;
+    }
+
+    @Override
+    protected List<T> delegate() {
+      return delegate;
+    }
+
+    @Override
+    public boolean add(T elem) {
+      throw new UnsupportedOperationException("Must use AddWeighted()");
+    }
+
+    @Override
+    public long getWeight() {
+      return weight;
+    }
+
+    public void addWeighted(T elem, long weight) {
+      delegate.add(elem);
+      this.weight += weight;
+    }
+  }
+
   /**
    * The deserialized values in {@code tagList} as a read-only array list.
    */
   private <T> List<T> tagListPageValues(TagList tagList, Coder<T> elemCoder) {
     if (tagList.getValuesCount() == 0) {
-      return Collections.<T>emptyList();
+      return new WeightedList<T>(Collections.<T>emptyList());
     }
 
-    List<T> valueList = new ArrayList<>(tagList.getValuesCount());
+    WeightedList<T> valueList = new WeightedList<>(new ArrayList<T>(tagList.getValuesCount()));
     for (Windmill.Value value : tagList.getValuesList()) {
       if (value.hasData() && !value.getData().isEmpty()) {
         // Drop the first byte of the data; it's the zero byte we prepended to avoid writing
         // empty data.
         InputStream inputStream = value.getData().substring(1).newInput();
         try {
-          valueList.add(elemCoder.decode(inputStream, Coder.Context.OUTER));
+          valueList.addWeighted(
+              elemCoder.decode(inputStream, Coder.Context.OUTER),  value.getData().size() - 1);
         } catch (IOException e) {
           throw new IllegalStateException("Unable to decode tag list using " + elemCoder, e);
         }
       }
     }
-    return Collections.unmodifiableList(valueList);
+    return valueList;
   }
 
   private <T> void consumeTagList(TagList tagList, StateTag stateTag) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
index f1b607c182a47..57dd51009b8f1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
@@ -24,7 +24,7 @@
 import com.google.cloud.dataflow.sdk.coders.ListCoder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn.Accumulator;
-import com.google.cloud.dataflow.sdk.util.Sized;
+import com.google.cloud.dataflow.sdk.util.WeightedValue;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -550,30 +550,30 @@ private long offset(long newWeight) {
      */
     private List<T> interpolate(Iterable<QuantileBuffer<T>> buffers,
                                 int count, double step, double offset) {
-      List<Iterator<Sized<T>>> iterators = Lists.newArrayList();
+      List<Iterator<WeightedValue<T>>> iterators = Lists.newArrayList();
       for (QuantileBuffer<T> buffer : buffers) {
         iterators.add(buffer.sizedIterator());
       }
       // Each of the buffers is already sorted by element.
-      Iterator<Sized<T>> sorted = Iterators.mergeSorted(
+      Iterator<WeightedValue<T>> sorted = Iterators.mergeSorted(
           iterators,
-          new Comparator<Sized<T>>() {
+          new Comparator<WeightedValue<T>>() {
             @Override
-            public int compare(Sized<T> a, Sized<T> b) {
+            public int compare(WeightedValue<T> a, WeightedValue<T> b) {
               return compareFn.compare(a.getValue(), b.getValue());
             }
           });
 
       List<T> newElements = Lists.newArrayListWithCapacity(count);
-      Sized<T> sizedElement = sorted.next();
-      double current = sizedElement.getSize();
+      WeightedValue<T> weightedElement = sorted.next();
+      double current = weightedElement.getWeight();
       for (int j = 0; j < count; j++) {
         double target = j * step + offset;
         while (current <= target && sorted.hasNext()) {
-          sizedElement = sorted.next();
-          current += sizedElement.getSize();
+          weightedElement = sorted.next();
+          current += weightedElement.getWeight();
         }
-        newElements.add(sizedElement.getValue());
+        newElements.add(weightedElement.getValue());
       }
       return newElements;
     }
@@ -638,15 +638,15 @@ public String toString() {
           + weight + ", elements=" + elements + "]";
     }
 
-    public Iterator<Sized<T>> sizedIterator() {
-      return new UnmodifiableIterator<Sized<T>>() {
+    public Iterator<WeightedValue<T>> sizedIterator() {
+      return new UnmodifiableIterator<WeightedValue<T>>() {
         Iterator<T> iter = elements.iterator();
         @Override
         public boolean hasNext() {
           return iter.hasNext();
         }
-        @Override public Sized<T> next() {
-          return Sized.of(iter.next(), weight);
+        @Override public WeightedValue<T> next() {
+          return WeightedValue.of(iter.next(), weight);
         }
       };
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Weighted.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Weighted.java
new file mode 100644
index 0000000000000..c31ad7f861c45
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Weighted.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+/**
+ * Interface representing an object that has a weight, in unspecified units.
+ */
+public interface Weighted {
+  /**
+   * Returns the weight of the object.
+   */
+  long getWeight();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SizedSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedSideInputReader.java
similarity index 73%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SizedSideInputReader.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedSideInputReader.java
index 9cc2140d6970b..0323f2cafdab9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SizedSideInputReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedSideInputReader.java
@@ -22,27 +22,27 @@
 /**
  * Extension to {@link SideInputReader} that can approximate the size of the side input.
  */
-public interface SizedSideInputReader extends SideInputReader {
+public interface WeightedSideInputReader extends SideInputReader {
 
   /**
    * Returns the value of the requested {@link PCollectionView} for the given {@link BoundedWindow}
    * along with a rough estimate of the number of bytes of memory it consumes.
    *
    * <p>It is valid for a side input value to be {@code null}. In this case, the return
-   * value of this method must still be non-{@code null}. It should be a {@link Sized}
-   * object where {@link Sized#getValue()} returns {@code null} and {@link Sized#getSize()} may
-   * still return any non-negative value.
+   * value of this method must still be non-{@code null}. It should be a {@link Weighted}
+   * object where {@link WeightedValue#getValue()} returns {@code null} and
+   * {@link WeightedValue#getWeight()} may still return any non-negative value.
    */
-  <T> Sized<T> getSized(PCollectionView<T> view, BoundedWindow window);
+  <T> WeightedValue<T> getWeighted(PCollectionView<T> view, BoundedWindow window);
 
   /**
-   * Abstract class providing default implementations for methods of {@link SizedSideInputReader}.
+   * Abstract class providing default implementations for methods of
+   * {@link WeightedSideInputReader}.
    */
-  abstract static class Defaults implements SizedSideInputReader {
+  abstract static class Defaults implements WeightedSideInputReader {
     @Override
     public <T> T get(PCollectionView<T> view, BoundedWindow window) {
-      return getSized(view, window).getValue();
+      return getWeighted(view, window).getValue();
     }
   }
 }
-
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Sized.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedValue.java
similarity index 67%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Sized.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedValue.java
index f019ecca7a5d6..4a6e84079faa1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Sized.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedValue.java
@@ -17,30 +17,29 @@
 package com.google.cloud.dataflow.sdk.util;
 
 /**
- * A {@code T} with an accompanying size estimate. Units are unspecified.
+ * A {@code T} with an accompanying weight. Units are unspecified.
  *
  * @param <T> the underlying type of object
  */
-public final class Sized<T> {
+public final class WeightedValue<T> implements Weighted {
 
   private final T value;
-  private final long size;
+  private final long weight;
 
-  private Sized(T value, long size) {
+  private WeightedValue(T value, long weight) {
     this.value = value;
-    this.size = size;
+    this.weight = weight;
   }
 
-  public static <T> Sized<T> of(T value, long size) {
-    return new Sized<>(value, size);
+  public static <T> WeightedValue<T> of(T value, long weight) {
+    return new WeightedValue<>(value, weight);
   }
 
-  public long getSize() {
-    return size;
+  public long getWeight() {
+    return weight;
   }
 
   public T getValue() {
     return value;
   }
 }
-
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
index 1af60ad209461..9a28d040c4d91 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
@@ -63,7 +63,7 @@ public <T> BagState<T> bindBag(final StateTag<BagState<T>> address, Coder<T> ele
         }
 
         @Override
-        public <T, W extends BoundedWindow> WatermarkStateInternal bindWatermark(
+        public <W extends BoundedWindow> WatermarkStateInternal bindWatermark(
             StateTag<WatermarkStateInternal> address,
             OutputTimeFn<? super W> outputTimeFn) {
           return new WatermarkStateInternalImplementation(outputTimeFn);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
index ea5ebdde19d84..f47c9ddbcf5d2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
@@ -72,7 +72,7 @@ CombiningValueStateInternal<InputT, AccumT, OutputT> bindCombiningValue(
       }
 
       @Override
-      public <T, W extends BoundedWindow> WatermarkStateInternal bindWatermark(
+      public <W extends BoundedWindow> WatermarkStateInternal bindWatermark(
           StateTag<WatermarkStateInternal> address,
           OutputTimeFn<? super W> outputTimeFn) {
         List<WatermarkStateInternal> sources = new ArrayList<>();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
index 4a0364f1db88e..f972e312f9eec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
@@ -42,4 +42,13 @@ public interface StateNamespace {
    * Append the string representation of this key to the {@link Appendable}.
    */
   void appendTo(Appendable sb) throws IOException;
+
+  /**
+   * Return an {@code Object} to use as a key in a cache.
+   *
+   * <p>Different namespaces may use the same key in order to be treated as a unit in the cache.
+   * The {@code Object}'s {@code hashCode} and {@code equals} methods will be used to determine
+   * equality.
+   */
+  Object getCacheKey();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java
index c11668fd62aef..09b86d67e9bfd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java
@@ -33,6 +33,11 @@ public String stringKey() {
     return key;
   }
 
+  @Override
+  public Object getCacheKey() {
+    return key;
+  }
+
   @Override
   public boolean equals(Object obj) {
     if (this == obj) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
index 22115847a3f65..8fee9959b944e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
@@ -63,6 +63,11 @@ public String stringKey() {
       return GLOBAL_STRING;
     }
 
+    @Override
+    public Object getCacheKey() {
+      return GLOBAL_STRING;
+    }
+
     @Override
     public boolean equals(Object obj) {
       return obj == this || obj instanceof GlobalNamespace;
@@ -117,6 +122,14 @@ public void appendTo(Appendable sb) throws IOException {
       sb.append('/').append(CoderUtils.encodeToBase64(windowCoder, window)).append('/');
     }
 
+    /**
+     * State in the same window will all be evicted together.
+     */
+    @Override
+    public Object getCacheKey() {
+      return window;
+    }
+
     @Override
     public boolean equals(Object obj) {
       if (obj == this) {
@@ -189,6 +202,14 @@ public void appendTo(Appendable sb) throws IOException {
       sb.append('/');
     }
 
+    /**
+     * State in the same window will all be evicted together.
+     */
+    @Override
+    public Object getCacheKey() {
+      return window;
+    }
+
     @Override
     public boolean equals(Object obj) {
       if (obj == this) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
index cba405d81e641..be114f893fc0d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
@@ -59,7 +59,7 @@ public interface StateBinder {
      * <p>This accepts the {@link OutputTimeFn} that dictates how watermark hold timestamps
      * added to the returned {@link WatermarkStateInternal} are to be combined.
      */
-    <T, W extends BoundedWindow> WatermarkStateInternal bindWatermark(
+    <W extends BoundedWindow> WatermarkStateInternal bindWatermark(
         StateTag<WatermarkStateInternal> address,
         OutputTimeFn<? super W> outputTimeFn);
   }
@@ -68,7 +68,7 @@ <T, W extends BoundedWindow> WatermarkStateInternal bindWatermark(
   void appendTo(Appendable sb) throws IOException;
 
   /**
-   * Returns the identifier for this state cell.
+   * Returns the user-provided name of this state cell.
    */
   String getId();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
index f6f0c84e7dc41..4ad50df24228a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
@@ -128,11 +128,19 @@ public static <T> StateTag<BagState<T>> bag(String id, Coder<T> elemCoder) {
   public static <StateT extends State> StateTag<StateT> makeSystemTagInternal(
       StateTag<StateT> tag) {
     if (!(tag instanceof StateTagBase)) {
-      throw new IllegalArgumentException("Unexpected StateTag " + tag);
+      throw new IllegalArgumentException("Expected subclass of StateTagBase, got " + tag);
     }
     return ((StateTagBase<StateT>) tag).asKind(StateKind.SYSTEM);
   }
 
+  public static <InputT, AccumT, OutputT> StateTag<BagState<AccumT>> convertToBagTagInternal(
+      StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> combiningTag) {
+    if (!(combiningTag instanceof CombiningValueStateTag)) {
+      throw new IllegalArgumentException("Unexpected StateTag " + combiningTag);
+    }
+    return ((CombiningValueStateTag<InputT, AccumT, OutputT>) combiningTag).asBagTag();
+  }
+
   private static class StructuredId implements Serializable {
     private final StateKind kind;
     private final String rawId;
@@ -158,6 +166,10 @@ public void appendTo(Appendable sb) throws IOException {
       sb.append(kind.prefix).append(rawId);
     }
 
+    public String getRawId() {
+      return rawId;
+    }
+
     @Override
     public String toString() {
       return MoreObjects.toStringHelper(getClass())
@@ -195,12 +207,9 @@ protected StateTagBase(StructuredId id) {
       this.id = id;
     }
 
-    /**
-     * Returns the identifier for this state cell.
-     */
     @Override
     public String getId() {
-      return id.getIdString();
+      return id.getRawId();
     }
 
     @Override
@@ -313,6 +322,10 @@ protected StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> asKind(
         StateKind kind) {
       return new CombiningValueStateTag<>(id.asKind(kind), accumCoder, combineFn);
     }
+
+    private StateTag<BagState<AccumT>> asBagTag() {
+      return new BagStateTag<AccumT>(id, accumCoder);
+    }
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/DataflowMatchers.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/DataflowMatchers.java
new file mode 100644
index 0000000000000..ad21072dc4c90
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/DataflowMatchers.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk;
+
+import com.google.protobuf.ByteString;
+
+import org.hamcrest.Description;
+import org.hamcrest.TypeSafeMatcher;
+
+import java.io.Serializable;
+
+/**
+ * Matchers that are useful when writing Dataflow tests.
+ */
+public class DataflowMatchers {
+  /**
+   * Matcher for {@link ByteString} that prints the strings in UTF8.
+   */
+  public static class ByteStringMatcher extends TypeSafeMatcher<ByteString>
+      implements Serializable {
+    private ByteString expected;
+    private ByteStringMatcher(ByteString expected) {
+      this.expected = expected;
+    }
+
+    public static ByteStringMatcher byteStringEq(ByteString expected) {
+      return new ByteStringMatcher(expected);
+    }
+
+    @Override
+    public void describeTo(Description description) {
+      description
+          .appendText("ByteString(")
+          .appendText(expected.toStringUtf8())
+          .appendText(")");
+    }
+
+    @Override
+    public void describeMismatchSafely(ByteString actual, Description description) {
+      description
+          .appendText("was ByteString(")
+          .appendText(actual.toStringUtf8())
+          .appendText(")");
+    }
+
+    @Override
+    protected boolean matchesSafely(ByteString actual) {
+      return actual.equals(expected);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
index 7b3ddde901333..37c11c284ce0f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
@@ -619,8 +619,9 @@ public void testUnboundedSplits() throws Exception {
 
   @Test
   public void testReadUnboundedReader() throws Exception {
-    StreamingModeExecutionContext context = new StreamingModeExecutionContext(
-        "stageName", new ConcurrentHashMap<ByteString, ReaderCacheEntry>(), null);
+    StreamingModeExecutionContext context = new StreamingModeExecutionContext("stageName",
+        new ConcurrentHashMap<ByteString, ReaderCacheEntry>(), /*stateNameMap=*/null,
+        /*stateCache=*/null);
 
     DataflowPipelineOptions options =
         PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java
index 37e9511cf4d34..791d762dc6ef3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java
@@ -25,8 +25,8 @@
 import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.PCollectionViewWindow;
-import com.google.cloud.dataflow.sdk.util.Sized;
-import com.google.cloud.dataflow.sdk.util.SizedSideInputReader;
+import com.google.cloud.dataflow.sdk.util.WeightedSideInputReader;
+import com.google.cloud.dataflow.sdk.util.WeightedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -46,7 +46,7 @@
 public class CachingSideInputReaderTest {
 
   private static boolean isCached(
-      Cache<PCollectionViewWindow<?>, Sized<Object>> cache,
+      Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache,
       PCollectionView<?> view, BoundedWindow window) {
     return null != cache.getIfPresent(PCollectionViewWindow.of(view, window));
   }
@@ -69,13 +69,13 @@ private static boolean isCached(
   private static final int MAXIMUM_CACHE_SIZE = 1000;
 
   /** A {@link Cache} that is set up before each test. */
-  private Cache<PCollectionViewWindow<?>, Sized<Object>> defaultCache;
+  private Cache<PCollectionViewWindow<?>, WeightedValue<Object>> defaultCache;
 
   @Before
   public void setupCache() {
     defaultCache = CacheBuilder.newBuilder()
         .maximumWeight(MAXIMUM_CACHE_SIZE)
-        .weigher(SizedWeigher.withBaseWeight(1))
+        .weigher(Weighers.fixedWeightKeys(1))
         .build();
   }
 
@@ -83,10 +83,10 @@ public void setupCache() {
   public void testCachingSideInputReaderAgreesWithUnderlyingReaderForSmallItem() throws Exception {
     // A SideInputReader that vends fixed contents for LENGTH_VIEW_FOR_DEFAULT_TAG
     // with a chosen size that fits in the maximum size of the cache.
-    SizedSideInputReader reader = SizedDirectSideInputReader.withContents(
+    WeightedSideInputReader reader = WeightedDirectSideInputReader.withContents(
         ImmutableMap.of(
             UNTYPED_ITERABLE_TAG,
-            Sized.<Object>of(
+            WeightedValue.<Object>of(
                 PCollectionViewTesting.contentsInDefaultWindow("some", "small", "collection"),
                 MAXIMUM_CACHE_SIZE - 100)));
 
@@ -108,10 +108,10 @@ public void testCachingSideInputReaderAgreesWithUnderlyingReaderForSmallItem() t
   public void testCachingSideInputReaderAgreesWithUnderlyingReaderForLargeItem() throws Exception {
     // A SideInputReader that vends fixed contents for LENGTH_VIEW_FOR_DEFAULT_TAG
     // with a chosen size that exceeds the maximum size of the cache.
-    SizedSideInputReader reader = SizedDirectSideInputReader.withContents(
+    WeightedSideInputReader reader = WeightedDirectSideInputReader.withContents(
         ImmutableMap.of(
             UNTYPED_ITERABLE_TAG,
-            Sized.<Object>of(
+            WeightedValue.<Object>of(
                 PCollectionViewTesting.contentsInDefaultWindow("some", "large", "collection"),
                 MAXIMUM_CACHE_SIZE + 100)));
 
@@ -134,10 +134,10 @@ public void testCachingSideInputReaderAgreesWithUnderlyingReaderForLargeItem() t
   public void testCachingSideInputReaderCachesSmallItem() throws Exception {
     // A SideInputReader that vends fixed contents for LENGTH_VIEW_FOR_DEFAULT_TAG
     // with a chosen size that fits in the maximum size of the cache.
-    SizedSideInputReader reader = SizedDirectSideInputReader.withContents(
+    WeightedSideInputReader reader = WeightedDirectSideInputReader.withContents(
         ImmutableMap.of(
             UNTYPED_ITERABLE_TAG,
-            Sized.<Object>of(
+            WeightedValue.<Object>of(
                 PCollectionViewTesting.contentsInDefaultWindow("hello", "goodbye"),
                 MAXIMUM_CACHE_SIZE - 1000)));
 
@@ -160,10 +160,10 @@ public void testCachingSideInputReaderCachesSmallItem() throws Exception {
   public void testCachingSideInputReaderDoesNotCacheLargeItem() throws Exception {
     // A SideInputReader that vends fixed contents for LENGTH_VIEW_FOR_DEFAULT_TAG
     // with a chosen size that exceeds in the maximum size of the cache.
-    SizedSideInputReader reader = SizedDirectSideInputReader.withContents(
+    WeightedSideInputReader reader = WeightedDirectSideInputReader.withContents(
         ImmutableMap.of(
             UNTYPED_ITERABLE_TAG,
-            Sized.<Object>of(
+            WeightedValue.<Object>of(
                 PCollectionViewTesting.contentsInDefaultWindow("hello", "goodbye"),
                 MAXIMUM_CACHE_SIZE + 100)));
 
@@ -188,9 +188,10 @@ public void testCachingSideInputReaderEmpty() throws Exception {
     PCollectionView<Long> view = PCollectionViewTesting.testingView(
         tag, new PCollectionViewTesting.LengthViewFn<String>(), StringUtf8Coder.of());
 
-    CachingSideInputReader sideInputReader = CachingSideInputReader.of(
-        SizedDirectSideInputReader.withContents(ImmutableMap.<TupleTag<Object>, Sized<Object>>of()),
-        defaultCache);
+    CachingSideInputReader sideInputReader =
+        CachingSideInputReader.of(WeightedDirectSideInputReader.withContents(
+                                      ImmutableMap.<TupleTag<Object>, WeightedValue<Object>>of()),
+            defaultCache);
 
     assertFalse(sideInputReader.contains(view));
     assertTrue(sideInputReader.isEmpty());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
index c3ad2bbd23e6c..90c55273a528b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
@@ -34,7 +34,7 @@
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.Sized;
+import com.google.cloud.dataflow.sdk.util.WeightedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -161,10 +161,10 @@ public void testDataflowSideInputReaderNotEmpty() throws Exception {
   @Test
   public void testDataflowSideInputReaderFilteredRead() throws Exception {
     assertTrue(defaultSideInputReader.contains(DEFAULT_LENGTH_VIEW));
-    Sized<Long> sizedValue = defaultSideInputReader.getSized(
+    WeightedValue<Long> sizedValue = defaultSideInputReader.getWeighted(
         DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
     assertThat(sizedValue.getValue(), equalTo(DEFAULT_SOURCE_LENGTH));
-    assertThat(sizedValue.getSize(), equalTo(DEFAULT_SOURCE_LENGTH * windowedLongBytes()));
+    assertThat(sizedValue.getWeight(), equalTo(DEFAULT_SOURCE_LENGTH * windowedLongBytes()));
   }
 
   /**
@@ -176,15 +176,15 @@ public void testDataflowSideInputReaderRepeatedRead() throws Exception {
     DataflowSideInputReader sideInputReader = DataflowSideInputReader.of(
         Collections.singletonList(defaultSideInputInfo), options, executionContext);
 
-    Sized<Long> firstRead = sideInputReader.getSized(
+    WeightedValue<Long> firstRead = sideInputReader.getWeighted(
         DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
 
     // A repeated read should yield the same size.
-    Sized<Long> repeatedRead = sideInputReader.getSized(
+    WeightedValue<Long> repeatedRead = sideInputReader.getWeighted(
         DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
 
     assertThat(repeatedRead.getValue(), equalTo(firstRead.getValue()));
-    assertThat(repeatedRead.getSize(), equalTo(firstRead.getSize()));
+    assertThat(repeatedRead.getWeight(), equalTo(firstRead.getWeight()));
 
   }
 
@@ -194,10 +194,10 @@ public void testDataflowSideInputReaderMiss() throws Exception {
         Collections.singletonList(defaultSideInputInfo), options, executionContext);
 
     // Reading an empty window yields the size of 0 elements.
-    Sized<Long> emptyWindowValue = sideInputReader.getSized(
+    WeightedValue<Long> emptyWindowValue = sideInputReader.getWeighted(
         DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_EMPTY_WINDOW);
     assertThat(emptyWindowValue.getValue(), equalTo(0L));
-    assertThat(emptyWindowValue.getSize(), equalTo(0L));
+    assertThat(emptyWindowValue.getWeight(), equalTo(0L));
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
index c0062ba9284bd..9c4f272e17e74 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
@@ -76,12 +76,14 @@ private static TupleTag<Iterable<WindowedValue<String>>> newStringTag() {
 
   @Test
   public void testTimerInternalsSetTimer() {
-    StreamingModeExecutionContext executionContext = new StreamingModeExecutionContext(
-        "stageName", null, new ConcurrentHashMap<String, String>());
+    StreamingModeExecutionContext executionContext = new StreamingModeExecutionContext("stageName",
+        null, new ConcurrentHashMap<String, String>(),
+        new WindmillStateCache().forComputation("comp"));
 
     Windmill.WorkItemCommitRequest.Builder outputBuilder =
         Windmill.WorkItemCommitRequest.newBuilder();
-    executionContext.start(null,
+    executionContext.start(
+        Windmill.WorkItem.newBuilder().setKey(ByteString.EMPTY).setWorkToken(17L).build(),
         new Instant(1000), // input watermark
         null, // output watermark
         stateReader, stateFetcher, outputBuilder);
@@ -108,7 +110,7 @@ public void testTimerInternalsSetTimer() {
   @Test
   public void testSideInputReaderReconstituted() {
     StreamingModeExecutionContext executionContext =
-        new StreamingModeExecutionContext("stageName", null, null);
+        new StreamingModeExecutionContext("stageName", null, null, null);
 
     PCollectionView<String> preview1 = PCollectionViewTesting.<String, String>testingView(
         newStringTag(), new ConstantViewFn<String, String>("view1"), StringUtf8Coder.of());
@@ -160,7 +162,8 @@ public void testReaderCache() throws Exception {
     ConcurrentHashMap<ByteString, ReaderCacheEntry> readerCache =
         new ConcurrentHashMap<ByteString, ReaderCacheEntry>();
     StreamingModeExecutionContext context =
-        new StreamingModeExecutionContext("stageName", readerCache, null);
+        new StreamingModeExecutionContext("stageName", readerCache, /*stateNameMap=*/null,
+            /*stateCache=*/null);
 
     UnboundedSource.UnboundedReader<?> reader1 =
         new CountingSource(Integer.MAX_VALUE).createReader(options, null);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SizedDirectSideInputReader.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WeightedDirectSideInputReader.java
similarity index 60%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SizedDirectSideInputReader.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WeightedDirectSideInputReader.java
index daccfd071cd21..9667df2eddf78 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SizedDirectSideInputReader.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WeightedDirectSideInputReader.java
@@ -20,8 +20,8 @@
 import com.google.cloud.dataflow.sdk.util.DirectSideInputReader;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.Sized;
-import com.google.cloud.dataflow.sdk.util.SizedSideInputReader;
+import com.google.cloud.dataflow.sdk.util.WeightedSideInputReader;
+import com.google.cloud.dataflow.sdk.util.WeightedValue;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
@@ -29,29 +29,30 @@
 import java.util.Map;
 
 /**
- * A {@link SizedSideInputReader} with explicitly provided sizes for all values.
+ * A {@link WeightedSideInputReader} with explicitly provided sizes for all values.
  */
-class SizedDirectSideInputReader extends SizedSideInputReader.Defaults {
+class WeightedDirectSideInputReader extends WeightedSideInputReader.Defaults {
 
   private final SideInputReader subReader;
-  private final Map<TupleTag<?>, Long> sizes;
+  private final Map<TupleTag<?>, Long> weights;
 
   /**
-   * Returns a {@link SizedDirectSideInputReader} containing the contents in the provided
+   * Returns a {@link WeightedDirectSideInputReader} containing the contents in the provided
    * {@code Map}. A {@link DirectSideInputReader} will be used for the actual retrieval logic; this
    * class merely does the size bookkeeping.
    */
-  public static SizedDirectSideInputReader withContents(
-      Map<TupleTag<Object>, Sized<Object>> sizedContents) {
-    return new SizedDirectSideInputReader(sizedContents);
+  public static WeightedDirectSideInputReader withContents(
+      Map<TupleTag<Object>, WeightedValue<Object>> sizedContents) {
+    return new WeightedDirectSideInputReader(sizedContents);
   }
 
-  private SizedDirectSideInputReader(Map<TupleTag<Object>, Sized<Object>> sizedContents) {
-    sizes = new HashMap<>();
+  private WeightedDirectSideInputReader(
+      Map<TupleTag<Object>, WeightedValue<Object>> sizedContents) {
+    weights = new HashMap<>();
     PTuple values = PTuple.empty();
-    for (Map.Entry<TupleTag<Object>, Sized<Object>> entry : sizedContents.entrySet()) {
+    for (Map.Entry<TupleTag<Object>, WeightedValue<Object>> entry : sizedContents.entrySet()) {
       values = values.and(entry.getKey(), entry.getValue().getValue());
-      sizes.put(entry.getKey(), entry.getValue().getSize());
+      weights.put(entry.getKey(), entry.getValue().getWeight());
     }
     subReader = DirectSideInputReader.of(values);
   }
@@ -67,9 +68,9 @@ public <T> boolean contains(PCollectionView<T> view) {
   }
 
   @Override
-  public <T> Sized<T> getSized(PCollectionView<T> view, BoundedWindow window) {
-    return Sized.of(
+  public <T> WeightedValue<T> getWeighted(PCollectionView<T> view, BoundedWindow window) {
+    return WeightedValue.of(
         subReader.get(view, window),
-        sizes.get(view.getTagInternal()));
+        weights.get(view.getTagInternal()));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCacheTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCacheTest.java
new file mode 100644
index 0000000000000..590663819971a
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCacheTest.java
@@ -0,0 +1,210 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.util.state.State;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.protobuf.ByteString;
+
+import org.joda.time.Instant;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.util.Objects;
+
+/**
+ * Tests for {@link WindmillStateCache}.
+ */
+@RunWith(JUnit4.class)
+public class WindmillStateCacheTest {
+  private static final String COMPUTATION = "computation";
+  private static final ByteString KEY = ByteString.copyFromUtf8("key");
+  private static final String STATE_FAMILY = "family";
+
+  private static class TestStateTag implements StateTag<TestState> {
+    final String id;
+
+    TestStateTag(String id) {
+      this.id = id;
+    }
+
+    @Override
+    public void appendTo(Appendable appendable) throws IOException {
+      appendable.append(id);
+    }
+
+    @Override
+    public String getId() {
+      return id;
+    }
+
+    @Override
+    public TestState bind(StateBinder binder) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public String toString() {
+      return "Tag(" + id + ")";
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      return (other instanceof TestStateTag) && Objects.equals(((TestStateTag) other).id, id);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(id);
+    }
+  }
+
+  private static class TestState implements State {
+    String value = null;
+
+    TestState(String value) {
+      this.value = value;
+    }
+
+    public String getValue() {
+      return value;
+    }
+
+    @Override
+    public void clear() {
+      this.value = null;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      return (other instanceof TestState) && Objects.equals(((TestState) other).value, value);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(value);
+    }
+
+    @Override
+    public String toString() {
+      return "State(" + value + ")";
+    }
+  }
+
+  private static StateNamespace windowNamespace(long start) {
+    return StateNamespaces.window(
+        IntervalWindow.getCoder(), new IntervalWindow(new Instant(start), new Instant(start + 1)));
+  }
+
+  private static StateNamespace triggerNamespace(long start, int triggerIdx) {
+    return StateNamespaces.windowAndTrigger(IntervalWindow.getCoder(),
+        new IntervalWindow(new Instant(start), new Instant(start + 1)), triggerIdx);
+  }
+
+  WindmillStateCache cache;
+  WindmillStateCache.ForKey keyCache;
+
+  @Before
+  public void setUp() {
+    cache = new WindmillStateCache();
+    keyCache = cache.forComputation(COMPUTATION).forKey(KEY, STATE_FAMILY, 0L);
+    assertEquals(0, cache.getWeight());
+  }
+
+  @Test
+  public void testBasic() throws Exception {
+    assertNull(keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
+    assertNull(keyCache.get(windowNamespace(0), new TestStateTag("tag2")));
+    assertNull(keyCache.get(triggerNamespace(0, 0), new TestStateTag("tag3")));
+    assertNull(keyCache.get(triggerNamespace(0, 0), new TestStateTag("tag2")));
+
+    keyCache.put(StateNamespaces.global(), new TestStateTag("tag1"), new TestState("g1"), 2);
+    assertEquals(5, cache.getWeight());
+    keyCache.put(windowNamespace(0), new TestStateTag("tag2"), new TestState("w2"), 2);
+    assertEquals(10, cache.getWeight());
+    keyCache.put(triggerNamespace(0, 0), new TestStateTag("tag3"), new TestState("t3"), 2);
+    assertEquals(12, cache.getWeight());
+    keyCache.put(triggerNamespace(0, 0), new TestStateTag("tag2"), new TestState("t2"), 2);
+    assertEquals(14, cache.getWeight());
+
+    assertEquals(
+        new TestState("g1"), keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
+    assertEquals(new TestState("w2"), keyCache.get(windowNamespace(0), new TestStateTag("tag2")));
+    assertEquals(
+        new TestState("t3"), keyCache.get(triggerNamespace(0, 0), new TestStateTag("tag3")));
+    assertEquals(
+        new TestState("t2"), keyCache.get(triggerNamespace(0, 0), new TestStateTag("tag2")));
+  }
+
+  /**
+   * Verifies that values are cached in the appropriate namespaces.
+   */
+  @Test
+  public void testInvalidation() throws Exception {
+    assertNull(keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
+    keyCache.put(StateNamespaces.global(), new TestStateTag("tag1"), new TestState("g1"), 2);
+    assertEquals(5, cache.getWeight());
+    assertEquals(
+        new TestState("g1"), keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
+
+    keyCache = cache.forComputation(COMPUTATION).forKey(KEY, STATE_FAMILY, 1L);
+    assertEquals(5, cache.getWeight());
+    assertNull(keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
+    assertEquals(0, cache.getWeight());
+  }
+
+  /**
+   * Verifies that the cache is invalidated when the cache token changes.
+   */
+  @Test
+  public void testEviction() throws Exception {
+    keyCache.put(windowNamespace(0), new TestStateTag("tag2"), new TestState("w2"), 2);
+    assertEquals(5, cache.getWeight());
+    keyCache.put(triggerNamespace(0, 0), new TestStateTag("tag3"), new TestState("t3"), 2000000000);
+    assertEquals(0, cache.getWeight());
+    // Eviction is atomic across the whole window.
+    assertNull(keyCache.get(windowNamespace(0), new TestStateTag("tag2")));
+    assertNull(keyCache.get(triggerNamespace(0, 0), new TestStateTag("tag3")));
+  }
+
+  /**
+   * Verifies that caches are kept independently per-key.
+   */
+  @Test
+  public void testMultipleKeys() throws Exception {
+    WindmillStateCache.ForKey keyCache1 = cache.forComputation("comp1").forKey(
+        ByteString.copyFromUtf8("key1"), STATE_FAMILY, 0L);
+    WindmillStateCache.ForKey keyCache2 = cache.forComputation("comp1").forKey(
+        ByteString.copyFromUtf8("key2"), STATE_FAMILY, 0L);
+    WindmillStateCache.ForKey keyCache3 = cache.forComputation("comp2").forKey(
+        ByteString.copyFromUtf8("key1"), STATE_FAMILY, 0L);
+
+    keyCache1.put(StateNamespaces.global(), new TestStateTag("tag1"), new TestState("g1"), 2);
+    assertEquals(
+        new TestState("g1"), keyCache1.get(StateNamespaces.global(), new TestStateTag("tag1")));
+    assertNull(keyCache2.get(StateNamespaces.global(), new TestStateTag("tag1")));
+    assertNull(keyCache3.get(StateNamespaces.global(), new TestStateTag("tag1")));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
index 35407832a981f..355ffe14fb4b0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
@@ -15,10 +15,12 @@
  */
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.DataflowMatchers.ByteStringMatcher.byteStringEq;
 import static com.google.cloud.dataflow.sdk.testing.SystemNanoTimeSleeper.sleepMillis;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.never;
 import static org.mockito.Mockito.when;
 
@@ -53,10 +55,12 @@
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
+import org.mockito.ArgumentCaptor;
 import org.mockito.Mock;
 import org.mockito.Mockito;
 import org.mockito.MockitoAnnotations;
 
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.concurrent.TimeUnit;
 
@@ -79,22 +83,26 @@ public class WindmillStateInternalsTest {
   private WindmillStateReader mockReader;
 
   private WindmillStateInternals underTest;
+  private WindmillStateCache cache;
 
   @Mock
   private Supplier<StateSampler.ScopedState> readStateSupplier;
 
   private static ByteString key(StateNamespace namespace, String addrId) {
-    return key("", namespace, addrId);
+    return ByteString.copyFromUtf8(namespace.stringKey() + "+u" + addrId);
   }
 
-  private static ByteString key(String prefix, StateNamespace namespace, String addrId) {
-    return ByteString.copyFromUtf8(prefix + namespace.stringKey() + "+u" + addrId);
+  private static ByteString systemKey(StateNamespace namespace, String addrId) {
+    return ByteString.copyFromUtf8(namespace.stringKey() + "+s" + addrId);
   }
 
   @Before
   public void setUp() {
     MockitoAnnotations.initMocks(this);
-    underTest = new WindmillStateInternals(STATE_FAMILY, true, mockReader, readStateSupplier);
+    cache = new WindmillStateCache();
+    underTest = new WindmillStateInternals(STATE_FAMILY, mockReader,
+        cache.forComputation("comp").forKey(ByteString.EMPTY, STATE_FAMILY, 17L),
+        readStateSupplier);
   }
 
   private <T> void waitAndSet(final SettableFuture<T> future, final T value, final long millis) {
@@ -111,6 +119,15 @@ public void run() {
     }).run();
   }
 
+  private WindmillStateReader.WeightedList<String> weightedList(String... elems) {
+    WindmillStateReader.WeightedList<String> result =
+        new WindmillStateReader.WeightedList<String>(new ArrayList<String>(elems.length));
+    for (String elem : elems) {
+      result.addWeighted(elem, elem.length());
+    }
+    return result;
+  }
+
   @Test
   public void testBagAddBeforeRead() throws Exception {
     StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
@@ -208,8 +225,6 @@ public void testBagAddPersist() throws Exception {
     assertEquals(1, listUpdates.getValuesCount());
     assertEquals("hello", listUpdates.getValues(0).getData().substring(1).toStringUtf8());
 
-    // Blind adds should not need to read the future.
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -238,10 +253,6 @@ public void testBagClearPersist() throws Exception {
     assertEquals(1, listUpdates.getValuesCount());
     assertEquals("world", listUpdates.getValues(0).getData().substring(1).toStringUtf8());
 
-    // Clear should need to read the future.
-    Mockito.verify(mockReader)
-        .listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of());
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -260,29 +271,6 @@ public void testBagPersistEmpty() throws Exception {
     assertEquals(1, commitBuilder.getListUpdatesCount());
   }
 
-  @Test
-  public void testBagNoStateFamilies() throws Exception {
-    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, readStateSupplier);
-
-    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
-    BagState<String> bag = underTest.state(NAMESPACE, addr);
-
-    bag.add("hello");
-    bag.clear();
-    bag.add("world");
-
-    Windmill.WorkItemCommitRequest.Builder commitBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.persist(commitBuilder);
-
-    // Clear should need to read the future.
-    Mockito.verify(mockReader)
-        .listFuture(key(STATE_FAMILY, NAMESPACE, "bag"), "", StringUtf8Coder.of());
-    Mockito.verify(mockReader).startBatchAndBlock();
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-
   @Test
   public void testCombiningAddBeforeRead() throws Exception {
     CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
@@ -331,7 +319,9 @@ public void testCombiningIsEmpty() throws Exception {
     when(mockReader.listFuture(COMBINING_KEY, STATE_FAMILY, accumCoder))
         .thenReturn(future);
     StateContents<Boolean> result = value.isEmpty();
-    Mockito.verify(mockReader).listFuture(COMBINING_KEY, STATE_FAMILY, accumCoder);
+    ArgumentCaptor<ByteString> byteString = ArgumentCaptor.forClass(ByteString.class);
+    Mockito.verify(mockReader).listFuture(byteString.capture(), eq(STATE_FAMILY), eq(accumCoder));
+    assertThat(byteString.getValue(), byteStringEq(COMBINING_KEY));
 
     waitAndSet(future, Arrays.asList(new int[] {29}), 200);
     assertThat(result.read(), Matchers.is(false));
@@ -374,8 +364,6 @@ public void testCombiningAddPersist() throws Exception {
         CoderUtils.decodeFromByteArray(
             accumCoder, listUpdates.getValues(0).getData().substring(1).toByteArray())[0]);
 
-    // Blind adds should not need to read the future.
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -443,9 +431,6 @@ public void testCombiningClearPersist() throws Exception {
         CoderUtils.decodeFromByteArray(
             accumCoder, listUpdates.getValues(0).getData().substring(1).toByteArray())[0]);
 
-    // Blind adds should not need to read the future.
-    Mockito.verify(mockReader).listFuture(COMBINING_KEY, STATE_FAMILY, accumCoder);
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -538,51 +523,6 @@ public void testWatermarkClearBeforeRead() throws Exception {
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
-  @Test
-  public void testWatermarkIsEmptyWindmillHasData() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-
-    SettableFuture<Instant> future = SettableFuture.create();
-    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY)).thenReturn(future);
-    StateContents<Boolean> result = bag.isEmpty();
-    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
-
-    waitAndSet(future, new Instant(1000), 200);
-    assertThat(result.read(), Matchers.is(false));
-  }
-
-  @Test
-  public void testWatermarkIsEmpty() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-
-    SettableFuture<Instant> future = SettableFuture.create();
-    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY)).thenReturn(future);
-    StateContents<Boolean> result = bag.isEmpty();
-    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
-
-    waitAndSet(future, null, 200);
-    assertThat(result.read(), Matchers.is(true));
-  }
-
-  @Test
-  public void testWatermarkIsEmptyAfterClear() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-
-    bag.clear();
-    StateContents<Boolean> result = bag.isEmpty();
-    Mockito.verify(mockReader, never()).watermarkFuture(key(NAMESPACE, addr.getId()), STATE_FAMILY);
-    assertThat(result.read(), Matchers.is(true));
-
-    bag.add(new Instant(1000));
-    assertThat(result.read(), Matchers.is(false));
-  }
-
   @Test
   public void testWatermarkPersistEarliest() throws Exception {
     StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
@@ -602,8 +542,6 @@ public void testWatermarkPersistEarliest() throws Exception {
     assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
     assertEquals(TimeUnit.MILLISECONDS.toMicros(1000), watermarkHold.getTimestamps(0));
 
-    // Blind adds should not need to read the future.
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -629,9 +567,7 @@ public void testWatermarkPersistLatestEmpty() throws Exception {
     assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
     assertEquals(TimeUnit.MILLISECONDS.toMicros(2000), watermarkHold.getTimestamps(0));
 
-    // Blind adds should not need to read the future.
     Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -657,9 +593,7 @@ public void testWatermarkPersistLatestWindmillWins() throws Exception {
     assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
     assertEquals(TimeUnit.MILLISECONDS.toMicros(4000), watermarkHold.getTimestamps(0));
 
-    // Blind adds should not need to read the future.
     Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -685,9 +619,7 @@ public void testWatermarkPersistLatestLocalAdditionsWin() throws Exception {
     assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
     assertEquals(TimeUnit.MILLISECONDS.toMicros(2000), watermarkHold.getTimestamps(0));
 
-    // Blind adds should not need to read the future.
     Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -711,7 +643,6 @@ public void testWatermarkPersistEndOfWindow() throws Exception {
     assertEquals(TimeUnit.MILLISECONDS.toMicros(2000), watermarkHold.getTimestamps(0));
 
     // Blind adds should not need to read the future.
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -735,12 +666,8 @@ public void testWatermarkClearPersist() throws Exception {
     Windmill.WatermarkHold clearAndUpdate = commitBuilder.getWatermarkHolds(0);
     assertEquals(key(NAMESPACE, "watermark"), clearAndUpdate.getTag());
     assertEquals(1, clearAndUpdate.getTimestampsCount());
-    assertEquals(key(NAMESPACE, "watermark"), clearAndUpdate.getTag());
-    assertEquals(1, clearAndUpdate.getTimestampsCount());
     assertEquals(TimeUnit.MILLISECONDS.toMicros(1000), clearAndUpdate.getTimestamps(0));
 
-    // Clearing requires reading the future.
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -761,39 +688,6 @@ public void testWatermarkPersistEmpty() throws Exception {
     assertEquals(1, commitBuilder.getWatermarkHoldsCount());
   }
 
-  @Test
-  public void testWatermarkNoStateFamiliesEarliest() throws Exception {
-    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, readStateSupplier);
-
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-    bag.get();
-    Mockito.verify(mockReader).watermarkFuture(key(STATE_FAMILY, NAMESPACE, "watermark"), "");
-  }
-
-  @Test
-  public void testWatermarkNoStateFamiliesLatest() throws Exception {
-    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, readStateSupplier);
-
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtLatestInputTimestamp());
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-    bag.get();
-    Mockito.verify(mockReader).watermarkFuture(key(STATE_FAMILY, NAMESPACE, "watermark"), "");
-  }
-
-  @Test
-  public void testWatermarkNoStateFamiliesEndOfWindow() throws Exception {
-    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, readStateSupplier);
-
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtLatestInputTimestamp());
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-    bag.get();
-    Mockito.verify(mockReader).watermarkFuture(key(STATE_FAMILY, NAMESPACE, "watermark"), "");
-  }
-
   @Test
   public void testValueSetBeforeRead() throws Exception {
     StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
@@ -846,10 +740,6 @@ public void testValueSetPersist() throws Exception {
     assertEquals("Hi", valueUpdate.getValue().getData().toStringUtf8());
     assertTrue(valueUpdate.isInitialized());
 
-    // Setting a value requires a read to prevent blind writes.
-    Mockito.verify(mockReader)
-        .valueFuture(key(NAMESPACE, "value"), STATE_FAMILY, StringUtf8Coder.of());
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -870,10 +760,6 @@ public void testValueClearPersist() throws Exception {
     assertEquals(key(NAMESPACE, "value"), valueUpdate.getTag());
     assertEquals(0, valueUpdate.getValue().getData().size());
 
-    // Setting a value requires a read to prevent blind writes.
-    Mockito.verify(mockReader)
-        .valueFuture(key(NAMESPACE, "value"), STATE_FAMILY, StringUtf8Coder.of());
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
@@ -888,24 +774,171 @@ public void testValueNoChangePersist() throws Exception {
 
     assertEquals(0, commitBuilder.getValueUpdatesCount());
 
-    // No changes shouldn't require getting any futures
-    Mockito.verify(mockReader).startBatchAndBlock();
     Mockito.verifyNoMoreInteractions(mockReader);
   }
 
   @Test
-  public void testValueNoStateFamilies() throws Exception {
-    underTest = new WindmillStateInternals(STATE_FAMILY, false, mockReader, readStateSupplier);
-
+  public void testCachedValue() throws Exception {
     StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
     ValueState<String> value = underTest.state(NAMESPACE, addr);
 
-    SettableFuture<String> future = SettableFuture.create();
-    when(mockReader.valueFuture(key(STATE_FAMILY, NAMESPACE, "value"), "", StringUtf8Coder.of()))
+    assertEquals(0, cache.getWeight());
+
+    value.set("Hi");
+    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
+
+    assertEquals(2, cache.getWeight());
+
+    value = underTest.state(NAMESPACE, addr);
+    assertEquals("Hi", value.get().read());
+    value.clear();
+    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
+
+    assertEquals(0, cache.getWeight());
+
+    value = underTest.state(NAMESPACE, addr);
+    assertEquals(null, value.get().read());
+
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testCachedBag() throws Exception {
+    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
+    BagState<String> bag = underTest.state(NAMESPACE, addr);
+
+    assertEquals(0, cache.getWeight());
+
+    SettableFuture<Iterable<String>> future = SettableFuture.create();
+    when(mockReader.listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of()))
         .thenReturn(future);
-    waitAndSet(future, "World", 200);
 
-    assertEquals("World", value.get().read());
+    StateContents<Iterable<String>> result = bag.get();
+
+    assertEquals(0, cache.getWeight());
+
+    bag.add("hello");
+    waitAndSet(future, weightedList("world"), 200);
+    assertThat(result.read(), Matchers.containsInAnyOrder("hello", "world"));
+
+    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
+
+    assertEquals(10, cache.getWeight());
+
+    bag = underTest.state(NAMESPACE, addr);
+    bag.add("goodbye");
+    assertThat(bag.get().read(), Matchers.containsInAnyOrder("hello", "world", "goodbye"));
+    bag.clear();
+    bag.add("new");
+
+    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
+
+    assertEquals(3, cache.getWeight());
+
+    bag = underTest.state(NAMESPACE, addr);
+    bag.add("new2");
+    assertThat(bag.get().read(), Matchers.containsInAnyOrder("new", "new2"));
+    bag.clear();
+    bag.add("new3");
+
+    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
+
+    assertEquals(4, cache.getWeight());
+
+    bag = underTest.state(NAMESPACE, addr);
+    assertThat(bag.get().read(), Matchers.containsInAnyOrder("new3"));
+
+    Mockito.verify(mockReader)
+        .listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of());
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testCachedWatermarkHold() throws Exception {
+    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
+        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
+    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
+
+    SettableFuture<Instant> future = SettableFuture.create();
+    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY)).thenReturn(future);
+
+    assertEquals(0, cache.getWeight());
+
+    StateContents<Instant> result = bag.get();
+
+    bag.add(new Instant(3000));
+    waitAndSet(future, new Instant(2000), 200);
+    assertThat(result.read(), Matchers.equalTo(new Instant(2000)));
+
+    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
+
+    assertEquals(8, cache.getWeight());
+
+    bag = underTest.state(NAMESPACE, addr);
+    assertThat(bag.get().read(), Matchers.equalTo(new Instant(2000)));
+    bag.clear();
+
+    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
+
+    assertEquals(8, cache.getWeight());
+
+    bag = underTest.state(NAMESPACE, addr);
+    assertEquals(null, bag.get().read());
+
+    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testCachedCombining() throws Exception {
+    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
+
+    SettableFuture<Iterable<int[]>> future = SettableFuture.create();
+    when(mockReader.listFuture(key(NAMESPACE, "combining"), STATE_FAMILY, accumCoder))
+        .thenReturn(future);
+
+    assertEquals(0, cache.getWeight());
+
+    StateContents<Integer> result = value.get();
+
+    value.add(1);
+    waitAndSet(future, Arrays.asList(new int[]{2}), 200);
+    assertThat(result.read(), Matchers.equalTo(3));
+
+    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
+
+    assertEquals(1, cache.getWeight());
+
+    value = underTest.state(NAMESPACE, COMBINING_ADDR);
+    assertThat(value.get().read(), Matchers.equalTo(3));
+    value.add(3);
+    assertThat(value.get().read(), Matchers.equalTo(6));
+    value.clear();
+
+    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
+
+    assertEquals(0, cache.getWeight());
+
+    value = underTest.state(NAMESPACE, COMBINING_ADDR);
+    assertThat(value.get().read(), Matchers.equalTo(0));
+
+    Mockito.verify(mockReader)
+        .listFuture(key(NAMESPACE, "combining"), STATE_FAMILY, accumCoder);
+    Mockito.verifyNoMoreInteractions(mockReader);
+  }
+
+  @Test
+  public void testSystemTags() throws Exception {
+    CombiningValueState<Integer, Integer> value =
+        underTest.state(NAMESPACE, StateTags.makeSystemTagInternal(COMBINING_ADDR));
+
+    SettableFuture<Iterable<int[]>> future = SettableFuture.create();
+    when(mockReader.listFuture(systemKey(NAMESPACE, "combining"), STATE_FAMILY, accumCoder))
+        .thenReturn(future);
+    value.isEmpty();  // Ignore result, we just want to generate the future.
+    ArgumentCaptor<ByteString> byteString = ArgumentCaptor.forClass(ByteString.class);
+    Mockito.verify(mockReader).listFuture(byteString.capture(), eq(STATE_FAMILY), eq(accumCoder));
+    assertThat(byteString.getValue(), byteStringEq(systemKey(NAMESPACE, "combining")));
   }
 
   private void disableCompactOnWrite() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java
index d70c329be4a50..e995b821de69f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ApiSurfaceTest.java
@@ -47,6 +47,7 @@ public void testOurApiSurface() throws Exception {
     ApiSurface checkedApiSurface = ApiSurface.getSdkApiSurface()
       .pruningClassName("com.google.cloud.dataflow.sdk.runners.worker.StateFetcher")
       .pruningClassName("com.google.cloud.dataflow.sdk.util.common.ReflectHelpers")
+      .pruningClassName("com.google.cloud.dataflow.sdk.DataflowMatchers")
       .pruningClassName("com.google.cloud.dataflow.sdk.TestUtils")
       .pruningClassName("com.google.cloud.dataflow.sdk.WindowMatchers");
 
@@ -183,4 +184,3 @@ public void testExposedArrayCycle() throws Exception {
     assertExposed(ExposedArrayCycle.class, Exposed.class);
   }
 }
-

From c4a03a1bc8a754db297d7c017cf7dce0b32d296d Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 11 Jan 2016 13:43:18 -0800
Subject: [PATCH 1286/1541] Cleanup a comment

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111884273
---
 .../google/cloud/dataflow/sdk/runners/worker/TextReader.java   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
index d39faad28cd53..2887bf5abff62 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
@@ -82,6 +82,7 @@ public TextReader(String filepattern, boolean stripTrailingNewlines,
     this.compressionType = compressionType;
   }
 
+  @Override
   public double getTotalParallelism() {
     try {
       if (compressionType == TextIO.CompressionType.UNCOMPRESSED) {
@@ -98,7 +99,7 @@ public double getTotalParallelism() {
         // All files were compressed.
         return getTotalParallelismUnsplittable();
       } else {
-        // No compressed formats support liquid sharding yet.
+        // No compressed formats support dynamic work rebalancing yet.
         return getTotalParallelismUnsplittable();
       }
     } catch (IOException exn) {

From 33505affc6da0de024547a59e0eb9775c6cd171a Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Mon, 11 Jan 2016 16:26:20 -0800
Subject: [PATCH 1287/1541] Implement a utility to check dynamic work
 rebalancing for a source.

Added a test for shuffle.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111900360
---
 .../dataflow/sdk/testing/TestPipeline.java    |  14 +-
 .../testing/VerifyDynamicWorkRebalancing.java | 190 ++++++++++++++++++
 .../sdk/util/common/worker/ReadOperation.java |   6 +-
 .../sdk/transforms/GroupByKeyTest.java        |  30 +++
 4 files changed, 235 insertions(+), 5 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/VerifyDynamicWorkRebalancing.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
index b77b2555ba990..05b5bad135b5c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
@@ -77,7 +77,7 @@ public class TestPipeline extends Pipeline {
    * {@link Pipeline#run} to execute the pipeline and check the tests.
    */
   public static TestPipeline create() {
-    if (Boolean.parseBoolean(System.getProperty("runIntegrationTestOnService"))) {
+    if (isIntegrationTest()) {
       TestDataflowPipelineOptions options = getPipelineOptions();
       LOG.info("Using passed in options: " + options);
       options.setStableUniqueNames(CheckEnabled.ERROR);
@@ -90,7 +90,15 @@ public static TestPipeline create() {
     }
   }
 
-  private TestPipeline(PipelineRunner<? extends PipelineResult> runner, PipelineOptions options) {
+  /**
+   * Returns whether this test is running on the Cloud Dataflow service as described
+   * in {@link TestPipeline}.
+   */
+  public static boolean isIntegrationTest() {
+    return Boolean.parseBoolean(System.getProperty("runIntegrationTestOnService"));
+  }
+
+  TestPipeline(PipelineRunner<? extends PipelineResult> runner, PipelineOptions options) {
     super(runner, options);
   }
 
@@ -120,7 +128,7 @@ public String toString() {
   /**
    * Creates PipelineOptions for testing with a DataflowPipelineRunner.
    */
-  static TestDataflowPipelineOptions getPipelineOptions() {
+  public static TestDataflowPipelineOptions getPipelineOptions() {
     try {
       TestDataflowPipelineOptions options = PipelineOptionsFactory.fromArgs(
               MAPPER.readValue(System.getProperty(PROPERTY_DATAFLOW_OPTIONS), String[].class))
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/VerifyDynamicWorkRebalancing.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/VerifyDynamicWorkRebalancing.java
new file mode 100644
index 0000000000000..a49d0b496ed2a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/VerifyDynamicWorkRebalancing.java
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.testing;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.AggregatorRetrievalException;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineJob;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation;
+import com.google.cloud.dataflow.sdk.values.PBegin;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterables;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Collection;
+
+/**
+ * Test framework for verifying that particular elements of a collection can be processed
+ * in parallel via dynamic work rebalancing, regardless of the initial work partitioning.
+ * The motivating use case is verifying the quality of an implementation of
+ * {@link com.google.cloud.dataflow.sdk.io.BoundedSource.BoundedReader#splitAtFraction}
+ * (correctness in terms of data consistency can already be verified by methods in
+ * {@link com.google.cloud.dataflow.sdk.testing.SourceTestUtils}).
+ *
+ * <p>This test works by blocking at a pre-selected set of sentinel values and making sure
+ * the work gets split up such that a thread eventually gets allocated to each of them.
+ */
+public class VerifyDynamicWorkRebalancing {
+
+  private static final Logger LOG = LoggerFactory.getLogger(VerifyDynamicWorkRebalancing.class);
+
+  private VerifyDynamicWorkRebalancing() {
+    // Do not instantiate.
+  }
+
+  /**
+   * Reads a source and attempts to dynamically rebalance work to bundles each containing a single
+   * one of the sentinel values using the dataflow runner.  It does this by waiting at each
+   * sentinel value and letting the service dynamically split the work into bundles until
+   * the sentinels are completely separated.
+   *
+   * <p>Sentinels should be chosen such that the source's inherent parallelization allows them
+   * to be separated. For example, in a simple record-based file format, they can be chosen
+   * arbitrarily (e.g. every record is a sentinel), however e.g. in a block-based file format
+   * where parallelization can only happen down to blocks, but not down to individual records,
+   * sentinels must be in different blocks. However, there should be not too many sentinels,
+   * because the test naturally requires at least as many threads (possibly via autoscaling)
+   * as there are sentinels to complete successfully.
+   *
+   * @param source a source PTransform producing the PCollection to be split
+   * @param sentinels a collection of elements that should be separable in the source
+   * @param nonSentinelSleepMsec how long each non-sentinel element should take to process
+   */
+  public static <T> void run(
+      PTransform<PBegin, PCollection<T>> source,
+      Collection<T> sentinels,
+      long nonSentinelSleepMsec) {
+    runWithPipeline(configurePipeline(sentinels.size(), nonSentinelSleepMsec),
+                    source, sentinels, nonSentinelSleepMsec);
+  }
+
+  /**
+   * Like {@link #run}, but uses a pre-configured pipeline.
+   */
+  public static <T> void runWithPipeline(
+      Pipeline p,
+      PTransform<PBegin, PCollection<T>> source,
+      Collection<T> sentinels,
+      long nonSentinelSleepMsec) {
+    HangOnSentinels<T> hangingDoFn = new HangOnSentinels<T>(sentinels, nonSentinelSleepMsec);
+    p.apply(source).apply(ParDo.of(hangingDoFn));
+    PipelineResult result = p.run();
+    long start = System.currentTimeMillis();
+    try {
+      int seenSentinels;
+      do {
+        seenSentinels =
+            Iterables.getOnlyElement(
+                result.getAggregatorValues(hangingDoFn.sentinelCounter).getValues(), 0);
+        int seenNonSentinels =
+            Iterables.getOnlyElement(
+                result.getAggregatorValues(hangingDoFn.nonSentinelCounter).getValues(), 0);
+        LOG.info("Seen {} sentinels, {} non-sentinels so far.", seenSentinels, seenNonSentinels);
+        sleep(1000);
+      } while (seenSentinels < sentinels.size());
+      LOG.info("Took {} ms to separate all the sentinels.", System.currentTimeMillis() - start);
+      LOG.info("Canceling...");
+      ((DataflowPipelineJob) result).cancel();
+      LOG.info("Done.");
+    } catch (AggregatorRetrievalException exn) {
+      throw new RuntimeException(exn);
+    } catch (IOException exn) {
+      throw new RuntimeException(exn);
+    }
+  }
+
+  private static Pipeline configurePipeline(int sentinelCount, long nonSentinelSleepMsec) {
+    if (TestPipeline.isIntegrationTest()) {
+      // The existing implementation updates the progress at this rate, but never while a data item
+      // (e.g. a sentinel value) is being processed, so if non-sentinel sleeps are smaller than
+      // the progress update period, that can lead to reporting progress a few items behind,
+      // leading to the service making stale split suggestions which will be refused by the worker,
+      // and the sentinel will not be separated.
+      // TODO: fix progress reporting and dynamic work rebalancing to work well regardless of this
+      // condition.
+      Preconditions.checkArgument(
+          nonSentinelSleepMsec > ReadOperation.DEFAULT_PROGRESS_UPDATE_PERIOD_MS);
+      // Does not work in (single-threaded) direct runner.  Also, we can't use TestPipeline.create()
+      // directly as we need a handle on the job (to monitor and cancel it) while it's running.
+      PipelineOptions options = TestPipeline.getPipelineOptions();
+      options
+          .as(DataflowPipelineDebugOptions.class)
+          .setNumberOfWorkerHarnessThreads(sentinelCount);
+      // Enable autoscaling so the system will scale up to enough workers to trigger dynamic
+      // work rebalancing.
+      options
+          .as(DataflowPipelineWorkerPoolOptions.class)
+          .setAutoscalingAlgorithm(
+              DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType.THROUGHPUT_BASED);
+      return new TestPipeline(DataflowPipelineRunner.fromOptions(options), options);
+    } else {
+      // Support for other runners could be added here.
+      throw new IllegalArgumentException("Unsupported for this runner.");
+    }
+  }
+
+  private static class HangOnSentinels<T> extends DoFn<T, Void> {
+
+    private Collection<T> sentinels;
+    private final long nonSentinelSleepMsec;
+
+    public final Aggregator<Integer, Integer> sentinelCounter =
+        createAggregator("sentinels", new Sum.SumIntegerFn());
+    public final Aggregator<Integer, Integer> nonSentinelCounter =
+        createAggregator("nonSentinels", new Sum.SumIntegerFn());
+
+    public HangOnSentinels(Collection<T> sentinels, long nonSentinelSleepMsec) {
+      this.sentinels = sentinels;
+      this.nonSentinelSleepMsec = nonSentinelSleepMsec;
+    }
+
+    public void processElement(ProcessContext c) {
+      if (sentinels.contains(c.element())) {
+        sentinelCounter.addValue(1);
+        while (true) {
+          LOG.info("Waiting at sentinel {}.", c.element());
+          sleep(10000);
+        }
+      } else {
+        nonSentinelCounter.addValue(1);
+        sleep(nonSentinelSleepMsec);
+      }
+    }
+  }
+
+  private static void sleep(long slowdownMsec) {
+    try {
+      Thread.sleep(slowdownMsec);
+    } catch (InterruptedException exn) {
+      throw new RuntimeException(exn);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index 127a88427daf7..3395cc121b8ec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -28,7 +28,6 @@
 
 import java.util.Observable;
 import java.util.Observer;
-import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicReference;
 
@@ -40,7 +39,10 @@
  */
 public class ReadOperation extends Operation {
   private static final Logger LOG = LoggerFactory.getLogger(ReadOperation.class);
-  private static final long DEFAULT_PROGRESS_UPDATE_PERIOD_MS = TimeUnit.SECONDS.toMillis(1);
+
+  // This is the rate at which the local, threadsafe progress variable is updated from the iterator,
+  // not the rate of reporting.
+  public static final long DEFAULT_PROGRESS_UPDATE_PERIOD_MS = 100;
 
   /**
    * For the reader parallelism counters, large enough values should be sufficient, and there
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
index 455541ae7bd20..11a0ad1c7d8c5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
@@ -30,11 +30,13 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.testing.VerifyDynamicWorkRebalancing;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
@@ -57,6 +59,7 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
@@ -426,4 +429,31 @@ public void processElement(ProcessContext c) throws Exception {
   public void testGroupByKeyGetName() {
     Assert.assertEquals("GroupByKey", GroupByKey.<String, Integer>create().getName());
   }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testDynamicWorkRebalancing() {
+    if (TestPipeline.isIntegrationTest()
+        && !TestPipeline.getPipelineOptions().as(StreamingOptions.class).isStreaming()) {
+      VerifyDynamicWorkRebalancing.run(
+          new PTransform<PBegin, PCollection<Integer>>() {
+            public PCollection<Integer> apply(PBegin begin) {
+              List<KV<Integer, Void>> ungroupedPairs = new ArrayList<>();
+              for (int k = 0; k < 100; k++) {
+                ungroupedPairs.add(KV.of(k, (Void) null));
+              }
+              return begin
+                  .apply(Create.of(ungroupedPairs))
+                  .apply(GroupByKey.<Integer, Void>create())
+                  .apply(Keys.<Integer>create());
+            }
+          },
+          // Verified manually to trigger dynamic work rebalancing.
+          // Also, the current implementation starts with an initial splitting into 3 bundles by
+          // default, so by the pigeonhole principle at least one bundle has more than one sentinel.
+          Arrays.asList(5, 7, 17, 18, 19),
+          120);
+      return;
+    }
+  }
 }

From 9936f851bbb6e3ee5d4ac71515cdcc598c9e12f1 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 11 Jan 2016 19:32:19 -0800
Subject: [PATCH 1288/1541] DatastoreWordCount: use an ancestor query to ensure
 consistent results

See https://cloud.google.com/datastore/docs/concepts/queries#Datastore_Ancestor_filters

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111912103
---
 .../examples/cookbook/DatastoreWordCount.java | 66 +++++++++++++++----
 1 file changed, 52 insertions(+), 14 deletions(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
index 3c7f045cef2f9..eaf1e2053d557 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
@@ -16,12 +16,18 @@
 
 package com.google.cloud.dataflow.examples.cookbook;
 
+import static com.google.api.services.datastore.client.DatastoreHelper.getPropertyMap;
+import static com.google.api.services.datastore.client.DatastoreHelper.getString;
+import static com.google.api.services.datastore.client.DatastoreHelper.makeFilter;
+import static com.google.api.services.datastore.client.DatastoreHelper.makeKey;
+import static com.google.api.services.datastore.client.DatastoreHelper.makeValue;
+
 import com.google.api.services.datastore.DatastoreV1.Entity;
 import com.google.api.services.datastore.DatastoreV1.Key;
 import com.google.api.services.datastore.DatastoreV1.Property;
+import com.google.api.services.datastore.DatastoreV1.PropertyFilter;
 import com.google.api.services.datastore.DatastoreV1.Query;
 import com.google.api.services.datastore.DatastoreV1.Value;
-import com.google.api.services.datastore.client.DatastoreHelper;
 import com.google.cloud.dataflow.examples.WordCount;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.io.DatastoreIO;
@@ -64,6 +70,13 @@
  * provide either {@literal --stagingLocation} or {@literal --tempLocation}, and
  * select one of the Dataflow pipeline runners, eg
  * {@literal --runner=BlockingDataflowPipelineRunner}.
+ *
+ * <p><b>Note:</b> this example creates entities with <i>Ancestor keys</i> to ensure that all
+ * entities created are in the same entity group. Similarly, the query used to read from the Cloud
+ * Datastore uses an <i>Ancestor filter</i>. Ancestors are used to ensure strongly consistent
+ * results in Cloud Datastore. For more information, see the Cloud Datastore documentation on
+ * <a href="https://cloud.google.com/datastore/docs/concepts/structuring_for_strong_consistency">
+ * Structing Data for Strong Consistency</a>.
  */
 public class DatastoreWordCount {
 
@@ -74,14 +87,28 @@ public class DatastoreWordCount {
   static class GetContentFn extends DoFn<Entity, String> {
     @Override
     public void processElement(ProcessContext c) {
-      Map<String, Value> props = DatastoreHelper.getPropertyMap(c.element());
+      Map<String, Value> props = getPropertyMap(c.element());
       Value value = props.get("content");
       if (value != null) {
-        c.output(DatastoreHelper.getString(value));
+        c.output(getString(value));
       }
     }
   }
 
+  /**
+   * A helper function to create the ancestor key for all created and queried entities.
+   *
+   * <p>We use ancestor keys and ancestor queries for strong consistency. See
+   * {@link DatastoreWordCount} javadoc for more information.
+   */
+  static Key makeAncestorKey(@Nullable String namespace, String kind) {
+    Key.Builder keyBuilder = makeKey(kind, "root");
+    if (namespace != null) {
+      keyBuilder.getPartitionIdBuilder().setNamespace(namespace);
+    }
+    return keyBuilder.build();
+  }
+
   /**
    * A DoFn that creates entity for every line in Shakespeare.
    */
@@ -95,19 +122,14 @@ static class CreateEntityFn extends DoFn<String, Entity> {
       this.kind = kind;
 
       // Build the ancestor key for all created entities once, including the namespace.
-      Key.Builder keyBuilder = DatastoreHelper.makeKey(kind, "root");
-      if (namespace != null) {
-        keyBuilder.getPartitionIdBuilder().setNamespace(namespace);
-      }
-      ancestorKey = keyBuilder.build();
+      ancestorKey = makeAncestorKey(namespace, kind);
     }
 
     public Entity makeEntity(String content) {
       Entity.Builder entityBuilder = Entity.newBuilder();
 
       // All created entities have the same ancestor Key.
-      Key.Builder keyBuilder =
-          DatastoreHelper.makeKey(ancestorKey, kind, UUID.randomUUID().toString());
+      Key.Builder keyBuilder = makeKey(ancestorKey, kind, UUID.randomUUID().toString());
       // NOTE: Namespace is not inherited between keys created with DatastoreHelper.makeKey, so
       // we must set the namespace on keyBuilder. TODO: Once partitionId inheritance is added,
       // we can simplify this code.
@@ -181,13 +203,29 @@ public static void writeDataToDatastore(Options options) {
   }
 
   /**
-   * An example that creates a pipeline to do DatastoreIO.Read from Datastore.
+   * Build a Cloud Datastore ancestor query for the specified {@link Options#getNamespace} and
+   * {@link Options#getKind}.
+   *
+   * <p>We use ancestor keys and ancestor queries for strong consistency. See
+   * {@link DatastoreWordCount} javadoc for more information.
+   *
+   * @see <a href="https://cloud.google.com/datastore/docs/concepts/queries#Datastore_Ancestor_filters">Ancestor filters</a>
    */
-  public static void readDataFromDatastore(Options options) {
-    // Build a query: read all entities of the specified kind.
+  static Query makeAncestorKindQuery(Options options) {
     Query.Builder q = Query.newBuilder();
     q.addKindBuilder().setName(options.getKind());
-    Query query = q.build();
+    q.setFilter(makeFilter(
+        "__key__",
+        PropertyFilter.Operator.HAS_ANCESTOR,
+        makeValue(makeAncestorKey(options.getNamespace(), options.getKind()))));
+    return q.build();
+  }
+
+  /**
+   * An example that creates a pipeline to do DatastoreIO.Read from Datastore.
+   */
+  public static void readDataFromDatastore(Options options) {
+    Query query = makeAncestorKindQuery(options);
 
     // For Datastore sources, the read namespace can be set on the entire query.
     DatastoreIO.Source source = DatastoreIO.source()

From 3b24183e90fe769911d04ca8508537411f7df896 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Tue, 12 Jan 2016 12:45:02 -0800
Subject: [PATCH 1289/1541] Makes ReaderIterator have the same interface as
 Source.Reader

This is step 1 of merging ReaderIterator and Source.Reader.
This change uses a bridge class LegacyReaderIterator.
(Also renames ReaderIterator to NativeReaderIterator)

Then one by one, subclasses can be transitioned to implement
NativeReaderIterator directly.
----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111977652
---
 .../google/cloud/dataflow/sdk/io/Source.java  |  27 +--
 .../sdk/runners/dataflow/CustomSources.java   | 199 ++++++++----------
 .../sdk/runners/worker/AvroByteReader.java    |   8 +-
 .../sdk/runners/worker/AvroReader.java        |   6 +-
 .../sdk/runners/worker/AvroReaderFactory.java |   7 +-
 .../sdk/runners/worker/BigQueryReader.java    |  10 +-
 .../runners/worker/BigQueryReaderFactory.java |   4 +-
 .../sdk/runners/worker/ConcatReader.java      |  42 ++--
 .../runners/worker/ConcatReaderFactory.java   |  12 +-
 .../sdk/runners/worker/DataflowWorker.java    |  38 ++--
 .../runners/worker/GroupingShuffleReader.java |  48 +++--
 .../worker/GroupingShuffleReaderFactory.java  |   4 +-
 ...uffleReaderWithFaultyBytesReadCounter.java |   5 +-
 .../sdk/runners/worker/InMemoryReader.java    |   6 +-
 .../runners/worker/InMemoryReaderFactory.java |   4 +-
 .../sdk/runners/worker/IsmReader.java         |  36 ++--
 .../worker/LazyMultiReaderIterator.java       |   6 +-
 .../worker/MapTaskExecutorFactory.java        |   7 +-
 .../worker/PartitioningShuffleReader.java     |   8 +-
 .../PartitioningShuffleReaderFactory.java     |   4 +-
 .../sdk/runners/worker/PubsubReader.java      |  12 +-
 .../sdk/runners/worker/ReaderFactory.java     |  20 +-
 .../sdk/runners/worker/SideInputUtils.java    |  78 +++++--
 .../worker/SourceTranslationUtils.java        |  21 +-
 .../sdk/runners/worker/TextReader.java        |  15 +-
 .../sdk/runners/worker/TextReaderFactory.java |   4 +-
 .../worker/UngroupedShuffleReader.java        |   8 +-
 .../worker/UngroupedShuffleReaderFactory.java |   6 +-
 .../worker/UngroupedWindmillReader.java       |  10 +-
 .../worker/WindowingWindmillReader.java       |  16 +-
 .../dataflow/sdk/testing/SourceTestUtils.java | 101 ++++++---
 .../dataflow/sdk/util/CloudSourceUtils.java   |  13 +-
 .../cloud/dataflow/sdk/util/ReaderUtils.java  |  16 +-
 .../worker/AbstractBoundedReaderIterator.java |   3 +-
 .../util/common/worker/MapTaskExecutor.java   |   6 +-
 .../worker/{Reader.java => NativeReader.java} | 194 +++++++++++------
 .../sdk/util/common/worker/ReadOperation.java |  41 ++--
 .../sdk/util/common/worker/WorkExecutor.java  |   8 +-
 .../common/worker/WorkProgressUpdater.java    |  10 +-
 .../runners/dataflow/CustomSourcesTest.java   |  62 +++---
 .../runners/worker/AvroByteReaderTest.java    |  47 +++--
 .../sdk/runners/worker/AvroByteSinkTest.java  |   5 +-
 .../runners/worker/AvroReaderFactoryTest.java |  30 +--
 .../sdk/runners/worker/AvroReaderTest.java    |  63 +++---
 .../sdk/runners/worker/AvroSinkTest.java      |   9 +-
 .../worker/BigQueryReaderFactoryTest.java     |  20 +-
 .../runners/worker/BigQueryReaderTest.java    |   8 +-
 .../worker/ConcatReaderFactoryTest.java       |  28 +--
 .../sdk/runners/worker/ConcatReaderTest.java  |  50 ++---
 .../DataflowWorkProgressUpdaterTest.java      |   9 +-
 .../worker/GroupingShuffleReaderTest.java     |  23 +-
 .../worker/InMemoryReaderFactoryTest.java     |  16 +-
 .../runners/worker/InMemoryReaderTest.java    |  10 +-
 .../sdk/runners/worker/IsmReaderTest.java     |   4 +-
 .../worker/PartitioningShuffleReaderTest.java |   5 +-
 .../sdk/runners/worker/ReaderFactoryTest.java |  39 ++--
 .../sdk/runners/worker/ReaderTestUtils.java   |  96 ++++-----
 .../worker/ShuffleReaderFactoryTest.java      |  18 +-
 .../runners/worker/TextReaderFactoryTest.java |  12 +-
 .../sdk/runners/worker/TextReaderTest.java    |  33 +--
 .../worker/UngroupedShuffleReaderTest.java    |   5 +-
 .../dataflow/sdk/util/IOFactoryTest.java      |   4 +-
 .../util/common/worker/ExecutorTestUtils.java |  10 +-
 .../common/worker/MapTaskExecutorTest.java    |  10 +-
 .../util/common/worker/ReadOperationTest.java |  37 ++--
 65 files changed, 938 insertions(+), 778 deletions(-)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/{Reader.java => NativeReader.java} (58%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
index 6aa8ac5f54863..4a020787f5c29 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
@@ -72,19 +72,14 @@ public abstract class Source<T> implements Serializable {
    * the current model tends to be easier to program and more efficient in practice
    * for iterating over sources such as files, databases etc. (rather than pure collections).
    *
-   * <p>All {@code Reader} functions except {@link #getCurrentSource} do not need to be thread-safe;
-   * they may only be accessed by a single thread at once. However, {@link #getCurrentSource} needs
-   * to be thread-safe, and other functions should assume that its returned value can change
-   * asynchronously.
-   *
    * <p>Reading data from the {@link Reader} must obey the following access pattern:
    * <ul>
-   * <li> One call to {@link Reader#start}
-   * <ul><li>If {@link Reader#start} returned true, any number of calls to {@code getCurrent}*
+   * <li> One call to {@link #start}
+   * <ul><li>If {@link #start} returned true, any number of calls to {@code getCurrent}*
    *   methods</ul>
-   * <li> Repeatedly, a call to {@link Reader#advance}. This may be called regardless
-   *   of what the previous {@link Reader#start}/{@link Reader#advance} returned.
-   * <ul><li>If {@link Reader#advance} returned true, any number of calls to {@code getCurrent}*
+   * <li> Repeatedly, a call to {@link #advance}. This may be called regardless
+   *   of what the previous {@link #start}/{@link #advance} returned.
+   * <ul><li>If {@link #advance} returned true, any number of calls to {@code getCurrent}*
    *   methods</ul>
    * </ul>
    *
@@ -122,6 +117,11 @@ public abstract class Source<T> implements Serializable {
    * </pre>
    *
    * <p>Note: this interface is a work-in-progress and may change.
+   *
+   * <p>All {@code Reader} functions except {@link #getCurrentSource} do not need to be thread-safe;
+   * they may only be accessed by a single thread at once. However, {@link #getCurrentSource} needs
+   * to be thread-safe, and other functions should assume that its returned value can change
+   * asynchronously.
    */
   public abstract static class Reader<T> implements AutoCloseable {
     /**
@@ -138,6 +138,8 @@ public abstract static class Reader<T> implements AutoCloseable {
     /**
      * Advances the reader to the next valid record.
      *
+     * <p>It is an error to call this without having called {@link #start} first.
+     *
      * @return {@code true} if a record was read, {@code false} if there is no more input available.
      */
     public abstract boolean advance() throws IOException;
@@ -150,9 +152,8 @@ public abstract static class Reader<T> implements AutoCloseable {
      * <p>Multiple calls to this method without an intervening call to {@link #advance} should
      * return the same result.
      *
-     * @throws java.util.NoSuchElementException if the reader is at the beginning of the input and
-     *         {@link #start} or {@link #advance} wasn't called, or if the last {@link #start} or
-     *         {@link #advance} returned {@code false}.
+     * @throws java.util.NoSuchElementException if {@link #start} was never called, or if
+     *         the last {@link #start} or {@link #advance} returned {@code false}.
      */
     public abstract T getCurrent() throws NoSuchElementException;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
index 668c33dcd5498..5dbeaa05dd6c2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
@@ -58,7 +58,7 @@
 import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.values.PValue;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
@@ -103,7 +103,7 @@ public class CustomSources {
    * A {@code DynamicSplitResult} specified explicitly by a pair of {@code BoundedSource}
    * objects describing the primary and residual sources.
    */
-  public static final class BoundedSourceSplit<T> implements Reader.DynamicSplitResult {
+  public static final class BoundedSourceSplit<T> implements NativeReader.DynamicSplitResult {
     public final BoundedSource<T> primary;
     public final BoundedSource<T> residual;
 
@@ -163,7 +163,7 @@ public static SourceOperationResponse performSourceOperation(
    */
   public static class Factory implements ReaderFactory {
     @Override
-    public Reader<?> create(
+    public NativeReader<?> create(
         CloudObject spec,
         @Nullable Coder<?> coder,
         @Nullable PipelineOptions options,
@@ -178,28 +178,31 @@ public Reader<?> create(
     }
   }
 
-  public static Reader<WindowedValue<?>> create(
-      final CloudObject spec,
-      final PipelineOptions options,
-      ExecutionContext executionContext) throws Exception {
+  public static NativeReader<WindowedValue<?>> create(
+      final CloudObject spec, final PipelineOptions options, ExecutionContext executionContext)
+          throws Exception {
 
     @SuppressWarnings("unchecked")
     final Source<Object> source = (Source<Object>) deserializeFromCloudSource(spec);
 
     if (source instanceof BoundedSource) {
       @SuppressWarnings({"unchecked", "rawtypes"})
-      Reader<WindowedValue<?>> reader = (Reader) new Reader<WindowedValue<Object>>() {
-        @Override
-        public Reader.ReaderIterator<WindowedValue<Object>> iterator() throws IOException {
-          return new BoundedReaderIterator<>(
-              ((BoundedSource<Object>) source).createReader(options));
-        }
-      };
+      NativeReader<WindowedValue<?>> reader =
+          (NativeReader)
+              new NativeReader<WindowedValue<Object>>() {
+                @Override
+                public NativeReaderIterator<WindowedValue<Object>> iterator() throws IOException {
+                  return new BoundedReaderIterator<>(
+                      ((BoundedSource<Object>) source).createReader(options));
+                }
+              };
       return reader;
     } else if (source instanceof UnboundedSource) {
       @SuppressWarnings({"unchecked", "rawtypes"})
-      Reader<WindowedValue<?>> reader = (Reader) new UnboundedReader<Object>(
-          options, spec, (StreamingModeExecutionContext) executionContext);
+      NativeReader<WindowedValue<?>> reader =
+          (NativeReader)
+              new UnboundedReader<Object>(
+                  options, spec, (StreamingModeExecutionContext) executionContext);
       return reader;
     } else {
       throw new IllegalArgumentException("Unexpected source kind: " + source.getClass());
@@ -213,9 +216,10 @@ public static boolean isFirstUnboundedSourceSplit(ByteString splitKey) {
   }
 
   /**
-   * {@link Reader} for reading from {@link UnboundedSource UnboundedSources}.
+   * {@link NativeReader} for reading from {@link UnboundedSource UnboundedSources}.
    */
-  private static class UnboundedReader<T> extends Reader<WindowedValue<ValueWithRecordId<T>>> {
+  private static class UnboundedReader<T>
+      extends NativeReader<WindowedValue<ValueWithRecordId<T>>> {
     private final PipelineOptions options;
     private final CloudObject spec;
     private final StreamingModeExecutionContext context;
@@ -229,11 +233,10 @@ private static class UnboundedReader<T> extends Reader<WindowedValue<ValueWithRe
 
     @Override
     @SuppressWarnings("unchecked")
-    public Reader.ReaderIterator<WindowedValue<ValueWithRecordId<T>>> iterator() {
+    public NativeReaderIterator<WindowedValue<ValueWithRecordId<T>>> iterator() {
       UnboundedSource.UnboundedReader<T> reader =
           (UnboundedSource.UnboundedReader<T>) context.getCachedReader();
       final boolean started = reader != null;
-
       if (reader == null) {
         String key = context.getSerializedKey().toStringUtf8();
         // Key is expected to be a zero-padded integer representing the split index.
@@ -439,89 +442,38 @@ public static <T> void translateReadHelper(Source<T> source,
     }
   }
 
-  /**
-   * Adapter from the {@code Source.Reader} interface to {@code Iterator},
-   * wrapping every value into the global window. Proper windowing will be assigned by the
-   * subsequent Window transform.
-   *
-   * <p>TODO: Consider changing the API of Reader.ReaderIterator so this adapter wouldn't be
-   * needed.
-   */
-  private static class ReaderToIteratorAdapter<T> {
-    private enum NextState {
-      UNKNOWN_BEFORE_START,
-      UNKNOWN_BEFORE_ADVANCE,
-      AVAILABLE,
-      UNAVAILABLE
-    }
-    private Source.Reader<T> reader;
-    private NextState state;
-
-    /**
-     * Creates an iterator adapter for the given reader.  {@code started} represents whether
-     * {@link Source.Reader#start} has previously been called on this reader.
-     */
-    private ReaderToIteratorAdapter(Source.Reader<T> reader, boolean started) {
-      this.reader = reader;
-      this.state = started ? NextState.UNKNOWN_BEFORE_ADVANCE : NextState.UNKNOWN_BEFORE_START;
-    }
-
-    public boolean hasNext() throws IOException {
-      switch(state) {
-        case UNKNOWN_BEFORE_START:
-          try {
-            if (reader.start()) {
-              state = NextState.AVAILABLE;
-              return true;
-            } else {
-              state = NextState.UNAVAILABLE;
-              return false;
-            }
-          } catch (Exception e) {
-            throw new IOException(
-                "Failed to start reading from source: " + reader.getCurrentSource(), e);
-          }
-        case UNKNOWN_BEFORE_ADVANCE:
-          if (reader.advance()) {
-            state = NextState.AVAILABLE;
-            return true;
-          } else {
-            state = NextState.UNAVAILABLE;
-            return false;
-          }
-        case AVAILABLE: return true;
-        case UNAVAILABLE: return false;
-        default: throw new AssertionError();
-      }
-    }
-
-    public WindowedValue<T> next() throws IOException {
-      if (!hasNext()) {
-        throw new NoSuchElementException();
-      }
-      state = NextState.UNKNOWN_BEFORE_ADVANCE;
-      return WindowedValue.timestampedValueInGlobalWindow(
-          reader.getCurrent(), reader.getCurrentTimestamp());
-    }
-  }
-
-  private static class BoundedReaderIterator<T> implements Reader.ReaderIterator<WindowedValue<T>> {
+  private static class BoundedReaderIterator<T>
+      extends NativeReader.NativeReaderIterator<WindowedValue<T>> {
     private BoundedSource.BoundedReader<T> reader;
-    private ReaderToIteratorAdapter<T> iteratorAdapter;
 
     private BoundedReaderIterator(BoundedSource.BoundedReader<T> reader) {
       this.reader = reader;
-      this.iteratorAdapter = new ReaderToIteratorAdapter<>(reader, false);
     }
 
     @Override
-    public boolean hasNext() throws IOException {
-      return iteratorAdapter.hasNext();
+    public boolean start() throws IOException {
+      try {
+        return reader.start();
+      } catch (Exception e) {
+        throw new IOException(
+            "Failed to start reading from source: " + reader.getCurrentSource(), e);
+      }
     }
 
     @Override
-    public WindowedValue<T> next() throws IOException {
-      return iteratorAdapter.next();
+    public boolean advance() throws IOException {
+      try {
+        return reader.advance();
+      } catch (Exception e) {
+        throw new IOException(
+            "Failed to advance reader of source: " + reader.getCurrentSource(), e);
+      }
+    }
+
+    @Override
+    public WindowedValue<T> getCurrent() throws NoSuchElementException {
+      return WindowedValue.timestampedValueInGlobalWindow(
+          reader.getCurrent(), reader.getCurrentTimestamp());
     }
 
     @Override
@@ -530,7 +482,7 @@ public void close() throws IOException {
     }
 
     @Override
-    public Reader.Progress getProgress() {
+    public NativeReader.Progress getProgress() {
       if (reader instanceof BoundedSource.BoundedReader) {
         ApproximateReportedProgress progress = new ApproximateReportedProgress();
         Double fractionConsumed = reader.getFractionConsumed();
@@ -545,7 +497,8 @@ public Reader.Progress getProgress() {
     }
 
     @Override
-    public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest request) {
+    public NativeReader.DynamicSplitResult requestDynamicSplit(
+        NativeReader.DynamicSplitRequest request) {
       ApproximateSplitRequest stopPosition =
           SourceTranslationUtils.splitRequestToApproximateSplitRequest(request);
       Double fractionConsumed = stopPosition.getFractionConsumed();
@@ -594,27 +547,47 @@ public double getRemainingParallelism() {
   }
 
   private static class UnboundedReaderIterator<T>
-      implements Reader.ReaderIterator<WindowedValue<ValueWithRecordId<T>>> {
+      extends NativeReader.NativeReaderIterator<WindowedValue<ValueWithRecordId<T>>> {
     // Commit at least once every 10 seconds or 10k records.  This keeps the watermark advancing
     // smoothly, and ensures that not too much work will have to be reprocessed in the event of
     // a crash.
     private static final int MAX_BUNDLE_SIZE = 10000;
     private static final Duration MAX_BUNDLE_READ_TIME = Duration.standardSeconds(10);
 
-    private ReaderToIteratorAdapter<T> iteratorAdapter;
-    private UnboundedSource.UnboundedReader<T> reader;
-    private Instant endTime;
+    private final UnboundedSource.UnboundedReader<T> reader;
+    private final boolean started;
+    private final Instant endTime;
     private int elemsRead;
 
     private UnboundedReaderIterator(UnboundedSource.UnboundedReader<T> reader, boolean started) {
-      this.iteratorAdapter = new ReaderToIteratorAdapter<>(reader, started);
       this.reader = reader;
       this.endTime = Instant.now().plus(MAX_BUNDLE_READ_TIME);
       this.elemsRead = 0;
+      this.started = started;
     }
 
     @Override
-    public boolean hasNext() throws IOException {
+    public boolean start() throws IOException {
+      if (started) {
+        // This is a reader that has been restored from the unbounded reader cache.
+        // It has already been started, so this call to start() should delegate
+        // to advance() instead.
+        return advance();
+      }
+      try {
+        if (!reader.start()) {
+          return false;
+        }
+      } catch (Exception e) {
+        throw new IOException(
+            "Failed to start reading from source: " + reader.getCurrentSource(), e);
+      }
+      elemsRead++;
+      return true;
+    }
+
+    @Override
+    public boolean advance() throws IOException {
       if (elemsRead >= MAX_BUNDLE_SIZE
           || Instant.now().isAfter(endTime)) {
         return false;
@@ -622,7 +595,14 @@ public boolean hasNext() throws IOException {
 
       // Backoff starting at 100ms, for approximately 1s total. 100+150+225+337.5~=1000.
       BackOff backoff = new AttemptBoundedExponentialBackOff(5, 100);
-      while (!iteratorAdapter.hasNext()) {
+      while (true) {
+        try {
+          if (reader.advance()) {
+            return true;
+          }
+        } catch (Exception e) {
+          throw new IOException("Failed to advance source: " + reader.getCurrentSource(), e);
+        }
         long nextBackoff = backoff.nextBackOffMillis();
         if (nextBackoff == BackOff.STOP) {
           return false;
@@ -633,27 +613,28 @@ public boolean hasNext() throws IOException {
           // ignore.
         }
       }
-      return true;
     }
 
     @Override
-    public WindowedValue<ValueWithRecordId<T>> next() throws IOException {
-      WindowedValue<T> result = iteratorAdapter.next();
-      elemsRead++;
+    public WindowedValue<ValueWithRecordId<T>> getCurrent() throws NoSuchElementException {
+      WindowedValue<T> result =
+          WindowedValue.timestampedValueInGlobalWindow(
+              reader.getCurrent(), reader.getCurrentTimestamp());
       return result.withValue(
           new ValueWithRecordId<>(result.getValue(), reader.getCurrentRecordId()));
     }
 
     @Override
-    public void close() { }
+    public void close() {}
 
     @Override
-    public Reader.Progress getProgress() {
+    public NativeReader.Progress getProgress() {
       return null;
     }
 
     @Override
-    public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest request) {
+    public NativeReader.DynamicSplitResult requestDynamicSplit(
+        NativeReader.DynamicSplitRequest request) {
       return null;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
index 2b61e89509048..5c6b4c036b31a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
@@ -22,7 +22,7 @@
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import org.apache.avro.Schema;
 
@@ -38,7 +38,7 @@
  *
  * @param <T> the type of the elements read from the source
  */
-public class AvroByteReader<T> extends Reader<T> {
+public class AvroByteReader<T> extends NativeReader<T> {
   final AvroReader<ByteBuffer> avroReader;
   final Coder<T> coder;
   private final Schema schema = Schema.create(Schema.Type.BYTES);
@@ -51,12 +51,12 @@ public AvroByteReader(String filename, @Nullable Long startPosition, @Nullable L
   }
 
   @Override
-  public ReaderIterator<T> iterator() throws IOException {
+  public NativeReaderIterator<T> iterator() throws IOException {
     return new AvroByteFileIterator();
   }
 
   class AvroByteFileIterator extends AbstractBoundedReaderIterator<T> {
-    private final ReaderIterator<WindowedValue<ByteBuffer>> avroFileIterator;
+    private final LegacyReaderIterator<WindowedValue<ByteBuffer>> avroFileIterator;
 
     public AvroByteFileIterator() throws IOException {
       avroFileIterator = avroReader.iterator();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
index f957b16f6043e..af46561cae8db 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
@@ -27,7 +27,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import org.apache.avro.generic.GenericRecord;
 import org.slf4j.Logger;
@@ -42,7 +42,7 @@
  *
  * @param <T> the type of the elements read from the source
  */
-public class AvroReader<T> extends Reader<WindowedValue<T>> {
+public class AvroReader<T> extends NativeReader<WindowedValue<T>> {
   private static final Logger LOG = LoggerFactory.getLogger(AvroReader.class);
 
   @Nullable
@@ -78,7 +78,7 @@ public AvroReader(String filename, @Nullable Long startPosition, @Nullable Long
   }
 
   @Override
-  public ReaderIterator<WindowedValue<T>> iterator() throws IOException {
+  public LegacyReaderIterator<WindowedValue<T>> iterator() throws IOException {
     Long endPosition = this.endPosition;
     Long startPosition = this.startPosition;
     if (endPosition == null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
index 82fd6fe027d32..75242abd411f7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
@@ -27,7 +27,7 @@
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.ValueOnlyWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import javax.annotation.Nullable;
 
@@ -39,7 +39,7 @@ public class AvroReaderFactory implements ReaderFactory {
   public AvroReaderFactory() {}
 
   @Override
-  public Reader<?> create(
+  public NativeReader<?> create(
       CloudObject spec,
       @Nullable Coder<?> coder,
       @Nullable PipelineOptions options,
@@ -50,7 +50,8 @@ public Reader<?> create(
     return create(spec, coder, options);
   }
 
-  Reader<?> create(CloudObject spec, Coder<?> coder, PipelineOptions options) throws Exception {
+  NativeReader<?> create(CloudObject spec, Coder<?> coder, PipelineOptions options)
+      throws Exception {
     String filename = getString(spec, PropertyNames.FILENAME);
     Long startOffset = getLong(spec, PropertyNames.START_OFFSET, null);
     Long endOffset = getLong(spec, PropertyNames.END_OFFSET, null);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
index 5771f9088a41f..dc3771c0aef2a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
@@ -26,7 +26,8 @@
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
+import com.google.common.annotations.VisibleForTesting;
 
 import java.io.IOException;
 
@@ -40,7 +41,7 @@
  * progress reporting because the source is used only in situations where the entire table must be
  * read by each worker (i.e. the source is used as a side input).
  */
-public class BigQueryReader extends Reader<WindowedValue<TableRow>> {
+public class BigQueryReader extends NativeReader<WindowedValue<TableRow>> {
   @Nullable private final TableReference tableRef;
   @Nullable private final String query;
   @Nullable private final String projectId;
@@ -100,7 +101,7 @@ public String getQuery() {
   }
 
   @Override
-  public ReaderIterator<WindowedValue<TableRow>> iterator() throws IOException {
+  public BigQueryReaderIterator iterator() throws IOException {
     if (tableRef != null) {
       return new BigQueryReaderIterator(tableRef, bigQueryClient);
     } else {
@@ -111,7 +112,8 @@ public ReaderIterator<WindowedValue<TableRow>> iterator() throws IOException {
   /**
    * A ReaderIterator that yields TableRow objects for each row of a BigQuery table.
    */
-  private static class BigQueryReaderIterator
+  @VisibleForTesting
+  static class BigQueryReaderIterator
       extends AbstractBoundedReaderIterator<WindowedValue<TableRow>> {
     private BigQueryTableRowIterator rowIterator;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
index bbf23f23df1cc..e3d529f90f793 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
@@ -27,7 +27,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import javax.annotation.Nullable;
 
@@ -37,7 +37,7 @@
 public class BigQueryReaderFactory implements ReaderFactory {
 
   @Override
-  public Reader<?> create(
+  public NativeReader<?> create(
       CloudObject spec,
       @Nullable Coder<?> coder,
       @Nullable PipelineOptions options,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
index 2e3bf6cf9a300..9323b4edd2653 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
@@ -31,7 +31,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.common.base.Preconditions;
 
 import org.slf4j.Logger;
@@ -43,23 +43,24 @@
 import java.util.NoSuchElementException;
 
 /**
- * A {@link Reader} that reads elements from a given set of encoded {@link Source}s. Creates
- * {@code Reader}s for sources lazily, i.e. only when elements from the particular {@code Reader}
- * are about to be read.
+ * A {@link NativeReader} that reads elements from a given set of encoded {@link Source}s. Creates
+ * {@link NativeReader}s for sources lazily, i.e. only when elements from the particular
+ * {@code NativeReader} are about to be read.
  *
- * <p>This class does does not cache {@code Reader}s and instead creates new set of {@code Reader}s
- * for every new {@link ConcatIterator}. Because of this, multiple {@code ConcatIterator}s
- * created using the same {@code ConcatReader} will not be able to share any state between each
- * other. This design was chosen since keeping a large number of {@code Reader} objects alive within
- * a single {@code ConcatReader} could be highly memory consuming.
+ * <p>This class does does not cache {@link NativeReader}s and instead creates new set of
+ * {@link NativeReader}s for every new {@link ConcatIterator}. Because of this, multiple
+ * {@link ConcatIterator}s created using the same {@link ConcatReader} will not be able to share
+ * any state between each other. This design was chosen since keeping a large number of
+ * {@link NativeReader} objects alive within a single {@link ConcatReader} could be highly
+ * memory consuming.
  *
- * <p> For progress reporting and dynamic work rebalancing purposes, {@code ConcatIterator} uses a
- * position of type {@link ConcatPosition}. Progress reporting and dynamic work rebalancing
+ * <p> For progress reporting and dynamic work rebalancing purposes, {@link ConcatIterator} uses
+ * a position of type {@link ConcatPosition}. Progress reporting and dynamic work rebalancing
  * currently work only at the granularity of full sources being concatenated.
  *
- * @param <T> Type of the elements read by the {@code Reader}s.
+ * @param <T> Type of the elements read by the {@link NativeReader}s.
  */
-public class ConcatReader<T> extends Reader<T> {
+public class ConcatReader<T> extends NativeReader<T> {
   private static final Logger LOG = LoggerFactory.getLogger(ConcatReader.class);
 
   public static final String SOURCE_NAME = "ConcatSource";
@@ -72,7 +73,7 @@ public class ConcatReader<T> extends Reader<T> {
   private final ReaderFactory.Registry registry;
 
   /**
-   * Create a {@code ConcatReader} using a given list of encoded {@code Source}s.
+   * Create a {@link ConcatReader} using a given list of encoded {@link Source}s.
    */
   public ConcatReader(
       ReaderFactory.Registry registry,
@@ -95,7 +96,7 @@ public Iterator<Source> getSources() {
   }
 
   @Override
-  public ReaderIterator<T> iterator() throws IOException {
+  public LegacyReaderIterator<T> iterator() throws IOException {
     return new ConcatIterator<T>(
         registry,
         options,
@@ -107,7 +108,7 @@ public ReaderIterator<T> iterator() throws IOException {
 
   private static class ConcatIterator<T> extends AbstractBoundedReaderIterator<T> {
     private int currentIteratorIndex = -1;
-    private ReaderIterator<T> currentIterator = null;
+    private LegacyReaderIterator<T> currentIterator = null;
     private final List<Source> sources;
     private final PipelineOptions options;
     private final ExecutionContext executionContext;
@@ -153,10 +154,11 @@ protected boolean hasNextImpl() throws IOException {
         Source currentSource = sources.get(currentIteratorIndex);
         try {
           @SuppressWarnings("unchecked")
-          Reader<T> currentReader =
-              (Reader<T>) registry.create(
-                  currentSource, options, executionContext, addCounterMutator, operationName);
-          currentIterator = currentReader.iterator();
+          NativeReader<T> currentReader =
+              (NativeReader<T>)
+                  registry.create(
+                      currentSource, options, executionContext, addCounterMutator, operationName);
+          currentIterator = (LegacyReaderIterator) currentReader.iterator();
           isAtFirstRecordInCurrentSource = true;
         } catch (Exception e) {
           throw new IOException("Failed to create a reader for source: " + currentSource, e);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java
index e090d416b7473..6fb8ef647319b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java
@@ -29,7 +29,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -50,7 +50,7 @@ private ConcatReaderFactory(ReaderFactory.Registry registry) {
 
   /**
    * Returns a new {@link ConcatReaderFactory} that will use the default
-   * {@link ReaderFactory.Registry} to create sub-{@link Reader} instances.
+   * {@link ReaderFactory.Registry} to create sub-{@link NativeReader} instances.
    */
   public static ConcatReaderFactory withDefaultRegistry() {
     return withRegistry(ReaderFactory.Registry.defaultRegistry());
@@ -58,28 +58,28 @@ public static ConcatReaderFactory withDefaultRegistry() {
 
   /**
    * Returns a new {@link ConcatReaderFactory} that will use the provided
-   * {@link ReaderFactory.Registry} to create sub-{@link Reader} instances.
+   * {@link ReaderFactory.Registry} to create sub-{@link NativeReader} instances.
    */
   public static ConcatReaderFactory withRegistry(ReaderFactory.Registry registry) {
     return new ConcatReaderFactory(registry);
   }
 
   @Override
-  public Reader<?> create(
+  public NativeReader<?> create(
       CloudObject spec,
       @Nullable Coder<?> coder,
       @Nullable PipelineOptions options,
       @Nullable ExecutionContext executionContext,
       @Nullable CounterSet.AddCounterMutator addCounterMutator,
       @Nullable String operationName)
-          throws Exception {
+      throws Exception {
     @SuppressWarnings("unchecked")
     Coder<Object> typedCoder = (Coder<Object>) coder;
     return createTyped(
         spec, typedCoder, options, executionContext, addCounterMutator, operationName);
   }
 
-  public <T> Reader<T> createTyped(
+  public <T> NativeReader<T> createTyped(
       CloudObject spec,
       @Nullable Coder<T> coder,
       @Nullable PipelineOptions options,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 696dd4b44639d..9848c4c35fd77 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -44,7 +44,7 @@
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.Metric;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -294,22 +294,32 @@ private void reportStatus(DataflowWorkerHarnessOptions options, String status, W
     workUnitClient.reportWorkItemStatus(workItemStatus);
   }
 
-  static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
-      @Nullable CounterSet counters, @Nullable Collection<Metric<?>> metrics,
-      DataflowWorkerHarnessOptions options, @Nullable Reader.Progress progress,
-      @Nullable Reader.DynamicSplitResult dynamicSplitResult,
-      @Nullable SourceOperationResponse operationResponse, @Nullable List<Status> errors,
+  static WorkItemStatus buildStatus(
+      WorkItem workItem,
+      boolean completed,
+      @Nullable CounterSet counters,
+      @Nullable Collection<Metric<?>> metrics,
+      DataflowWorkerHarnessOptions options,
+      @Nullable NativeReader.Progress progress,
+      @Nullable NativeReader.DynamicSplitResult dynamicSplitResult,
+      @Nullable SourceOperationResponse operationResponse,
+      @Nullable List<Status> errors,
       long reportIndex) {
 
     return buildStatus(workItem, completed, counters, metrics, options, progress,
         dynamicSplitResult, operationResponse, errors, reportIndex, null);
   }
 
-  static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
-      @Nullable CounterSet counters, @Nullable Collection<Metric<?>> metrics,
-      DataflowWorkerHarnessOptions options, @Nullable Reader.Progress progress,
-      @Nullable Reader.DynamicSplitResult dynamicSplitResult,
-      @Nullable SourceOperationResponse operationResponse, @Nullable List<Status> errors,
+  static WorkItemStatus buildStatus(
+      WorkItem workItem,
+      boolean completed,
+      @Nullable CounterSet counters,
+      @Nullable Collection<Metric<?>> metrics,
+      DataflowWorkerHarnessOptions options,
+      @Nullable NativeReader.Progress progress,
+      @Nullable NativeReader.DynamicSplitResult dynamicSplitResult,
+      @Nullable SourceOperationResponse operationResponse,
+      @Nullable List<Status> errors,
       long reportIndex,
       @Nullable StateSampler.StateSamplerInfo stateSamplerInfo) {
     WorkItemStatus status = new WorkItemStatus();
@@ -371,9 +381,9 @@ static WorkItemStatus buildStatus(WorkItem workItem, boolean completed,
     if (progress != null) {
       status.setReportedProgress(readerProgressToCloudProgress(progress));
     }
-    if (dynamicSplitResult instanceof Reader.DynamicSplitResultWithPosition) {
-      Reader.DynamicSplitResultWithPosition asPosition =
-          (Reader.DynamicSplitResultWithPosition) dynamicSplitResult;
+    if (dynamicSplitResult instanceof NativeReader.DynamicSplitResultWithPosition) {
+      NativeReader.DynamicSplitResultWithPosition asPosition =
+          (NativeReader.DynamicSplitResultWithPosition) dynamicSplitResult;
       status.setStopPosition(toCloudPosition(asPosition.getAcceptedPosition()));
     } else if (dynamicSplitResult instanceof CustomSources.BoundedSourceSplit) {
       status.setDynamicSourceSplit(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index ff856fe9324a9..46e4b85e4e467 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -42,12 +42,13 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.GroupingShuffleEntryIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.KeyGroupedShuffleEntries;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 
 import org.slf4j.Logger;
@@ -65,7 +66,7 @@
  * @param <K> the type of the keys read from the shuffle
  * @param <V> the type of the values read from the shuffle
  */
-public class GroupingShuffleReader<K, V> extends Reader<WindowedValue<KV<K, Reiterable<V>>>> {
+public class GroupingShuffleReader<K, V> extends NativeReader<WindowedValue<KV<K, Reiterable<V>>>> {
   private static final Logger LOG = LoggerFactory.getLogger(GroupingShuffleReader.class);
   public static final String SOURCE_NAME = "GroupingShuffleSource";
 
@@ -120,7 +121,7 @@ protected StateKind getStateSamplerStateKind() {
   }
 
   @Override
-  public ReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> iterator() throws IOException {
+  public GroupingShuffleReaderIterator<K, V> iterator() throws IOException {
     Preconditions.checkArgument(shuffleReaderConfig != null);
     ApplianceShuffleReader asr = new ApplianceShuffleReader(shuffleReaderConfig);
     String datasetId = asr.getDatasetId();
@@ -153,8 +154,8 @@ private void initCoder(Coder<WindowedValue<KV<K, Iterable<V>>>> coder) throws Ex
     this.valueCoder = iterCoder.getElemCoder();
   }
 
-  final ReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> iterator(ShuffleEntryReader reader) {
-    return new GroupingShuffleReaderIterator(reader);
+  final GroupingShuffleReaderIterator<K, V> iterator(ShuffleEntryReader reader) {
+    return new GroupingShuffleReaderIterator<K, V>(this, reader);
   }
 
   /**
@@ -174,10 +175,11 @@ final ReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> iterator(ShuffleEntryR
    * to the current key, which would introduce a performance
    * penalty.
    */
-  final class GroupingShuffleReaderIterator
+  @VisibleForTesting
+  static final class GroupingShuffleReaderIterator<K, V>
       extends AbstractBoundedReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> {
-    // N.B. This class is *not* static; it uses the keyCoder, valueCoder, and
-    // executionContext from its enclosing GroupingShuffleReader.
+    // The enclosing GroupingShuffleReader.
+    private final GroupingShuffleReader<K, V> parentReader;
 
     /** The iterator over shuffle entries, grouped by common key. */
     private final Iterator<KeyGroupedShuffleEntries> groups;
@@ -192,35 +194,37 @@ final class GroupingShuffleReaderIterator
     protected StateSampler stateSampler = null;
     protected int readState;
 
-    public GroupingShuffleReaderIterator(ShuffleEntryReader reader) {
-      if (GroupingShuffleReader.this.stateSampler == null) {
+    public GroupingShuffleReaderIterator(
+        final GroupingShuffleReader<K, V> parentReader, ShuffleEntryReader entryReader) {
+      this.parentReader = parentReader;
+      if (parentReader.stateSampler == null) {
         // This code path is only used in tests.
         CounterSet counterSet = new CounterSet();
         this.stateSampler = new StateSampler("local", counterSet.getAddCounterMutator());
         this.readState = stateSampler.stateForName("shuffle", StateSampler.StateKind.FRAMEWORK);
       } else {
-        checkNotNull(GroupingShuffleReader.this.stateSamplerOperationName);
-        this.stateSampler = GroupingShuffleReader.this.stateSampler;
+        checkNotNull(parentReader.stateSamplerOperationName);
+        this.stateSampler = parentReader.stateSampler;
         this.readState = stateSampler.stateForName(
-            GroupingShuffleReader.this.stateSamplerOperationName + "-process",
+            parentReader.stateSamplerOperationName + "-process",
             StateSampler.StateKind.FRAMEWORK);
       }
 
       this.rangeTracker =
           new GroupingShuffleRangeTracker(
-              ByteArrayShufflePosition.fromBase64(startShufflePosition),
-              ByteArrayShufflePosition.fromBase64(stopShufflePosition));
+              ByteArrayShufflePosition.fromBase64(parentReader.startShufflePosition),
+              ByteArrayShufflePosition.fromBase64(parentReader.stopShufflePosition));
       try (StateSampler.ScopedState read = stateSampler.scopedState(readState)) {
         this.groups =
             new GroupingShuffleEntryIterator(
-                reader.read(rangeTracker.getStartPosition(), rangeTracker.getStopPosition()),
-                GroupingShuffleReader.this.perOperationPerDatasetBytesCounter) {
+                entryReader.read(rangeTracker.getStartPosition(), rangeTracker.getStopPosition()),
+                parentReader.perOperationPerDatasetBytesCounter) {
               @Override
               protected void notifyElementRead(long byteSize) {
                 // We accumulate the sum of bytes read in a local variable. This sum will be counted
                 // when the values are actually read by the consumer of the shuffle reader.
                 currentGroupSize.addAndGet(byteSize);
-                GroupingShuffleReader.this.notifyElementRead(byteSize);
+                parentReader.notifyElementRead(byteSize);
               }
             };
       }
@@ -242,9 +246,9 @@ protected boolean hasNextImpl() throws IOException {
 
     @Override
     protected WindowedValue<KV<K, Reiterable<V>>> nextImpl() throws IOException {
-      K key = CoderUtils.decodeFromByteArray(keyCoder, currentGroup.key);
-      if (executionContext != null) {
-        executionContext.setKey(key);
+      K key = CoderUtils.decodeFromByteArray(parentReader.keyCoder, currentGroup.key);
+      if (parentReader.executionContext != null) {
+        parentReader.executionContext.setKey(key);
       }
 
       KeyGroupedShuffleEntries group = currentGroup;
@@ -386,7 +390,7 @@ public V next() {
           // notify the bytes that have been read so far.
           notifyValueReturned(currentGroupSize.getAndSet(0L));
           try {
-            return CoderUtils.decodeFromByteArray(valueCoder, entry.getValue());
+            return CoderUtils.decodeFromByteArray(parentReader.valueCoder, entry.getValue());
           } catch (IOException exn) {
             throw new RuntimeException(exn);
           }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
index c8b154989566c..602e27b24c00c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
@@ -28,7 +28,7 @@
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.values.KV;
 
 import java.util.List;
@@ -41,7 +41,7 @@
 public class GroupingShuffleReaderFactory implements ReaderFactory {
 
   @Override
-  public Reader<?> create(
+  public NativeReader<?> create(
       CloudObject spec,
       @Nullable Coder<?> coder,
       @Nullable PipelineOptions options,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderWithFaultyBytesReadCounter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderWithFaultyBytesReadCounter.java
index 8fe2e8146667f..df10ece46579b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderWithFaultyBytesReadCounter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderWithFaultyBytesReadCounter.java
@@ -21,7 +21,6 @@
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.Reiterable;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.annotations.VisibleForTesting;
 
@@ -55,9 +54,9 @@ public GroupingShuffleReaderWithFaultyBytesReadCounter(
   }
 
   @Override
-  public ReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> iterator() throws IOException {
+  public GroupingShuffleReaderIterator<K, V> iterator() throws IOException {
     // This causes perOperationPerDatasetBytesCounter to be initialized.
-    ReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> it = super.iterator();
+    GroupingShuffleReaderIterator<K, V> it = super.iterator();
 
     // Inject an error in the counter tracking how many bytes are read
     // from this reader's data source.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
index 650719047d618..56b999ecedb8b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
@@ -28,7 +28,7 @@
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.common.annotations.VisibleForTesting;
 
 import org.slf4j.Logger;
@@ -44,7 +44,7 @@
  *
  * @param <T> the type of the elements read from the source
  */
-public class InMemoryReader<T> extends Reader<T> {
+public class InMemoryReader<T> extends NativeReader<T> {
   private static final Logger LOG = LoggerFactory.getLogger(InMemoryReader.class);
 
   final List<String> encodedElements;
@@ -76,7 +76,7 @@ public InMemoryReader(List<String> encodedElements, @Nullable Long startIndex,
   }
 
   @Override
-  public ReaderIterator<T> iterator() throws IOException {
+  public InMemoryReaderIterator iterator() throws IOException {
     return new InMemoryReaderIterator();
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java
index af6facf31521d..5a37a872adb08 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java
@@ -25,7 +25,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import java.util.Collections;
 
@@ -37,7 +37,7 @@
 public class InMemoryReaderFactory implements ReaderFactory {
 
   @Override
-  public Reader<?> create(
+  public NativeReader<?> create(
       CloudObject spec,
       @Nullable Coder<?> coder,
       @Nullable PipelineOptions options,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java
index 03c99150f8214..a006a1890f9f8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java
@@ -31,7 +31,7 @@
 import com.google.cloud.dataflow.sdk.util.RandomAccessData.RandomAccessDataCoder;
 import com.google.cloud.dataflow.sdk.util.ScalableBloomFilter;
 import com.google.cloud.dataflow.sdk.util.ScalableBloomFilter.ScalableBloomFilterCoder;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
@@ -47,13 +47,13 @@
 import java.util.NoSuchElementException;
 
 /**
- * A {@link Reader} that reads Ism files. The coder provided is used to encode each key value
+ * A {@link NativeReader} that reads Ism files. The coder provided is used to encode each key value
  * record. See {@link IsmFormat} for encoded format details.
  *
  * @param <K> the type of the keys written to the sink
  * @param <V> the type of the values written to the sink
  */
-public class IsmReader<K, V> extends Reader<KV<K, V>> {
+public class IsmReader<K, V> extends NativeReader<KV<K, V>> {
   private final String filename;
   private final Coder<K> keyCoder;
   private final Coder<V> valueCoder;
@@ -73,7 +73,7 @@ public class IsmReader<K, V> extends Reader<KV<K, V>> {
   }
 
   @Override
-  public Reader.ReaderIterator<KV<K, V>> iterator() throws IOException {
+  public LazyIsmReaderIterator iterator() throws IOException {
     return new LazyIsmReaderIterator();
   }
 
@@ -106,10 +106,13 @@ public KV<K, V> get(K k) throws IOException {
       inChannel.position(entry.getValue());
 
       // Seek through the data block till we find a key that matches or a greater key.
-      try (ReaderIterator<KV<RandomAccessData, V>> iterator =
-          new IsmReaderIterator<RandomAccessData, V>(
-          inChannel, entry.getKey(), RandomAccessDataCoder.of(),
-          valueCoder, footer.getBloomFilterPosition())) {
+      try (IsmReaderIterator<RandomAccessData, V> iterator =
+              new IsmReaderIterator<>(
+                  inChannel,
+                  entry.getKey(),
+                  RandomAccessDataCoder.of(),
+                  valueCoder,
+                  footer.getBloomFilterPosition())) {
         while (iterator.hasNext()) {
           long startPosition = inChannel.position();
           KV<RandomAccessData, V> next = iterator.next();
@@ -150,10 +153,10 @@ private synchronized void initializeFooter(SeekableByteChannel in) throws IOExce
   }
 
   /**
-   * A {@link com.google.cloud.dataflow.sdk.util.common.worker.Reader.ReaderIterator
+   * A {@link NativeReaderIterator
    * Reader.ReaderIterator} which initializes its input stream lazily.
    */
-  private class LazyIsmReaderIterator extends Reader.AbstractReaderIterator<KV<K, V>> {
+  private class LazyIsmReaderIterator extends LegacyReaderIterator<KV<K, V>> {
     private IsmReaderIterator<K, V> delegate;
     private SeekableByteChannel inChannel;
 
@@ -200,11 +203,11 @@ private SeekableByteChannel getChannel() throws IOException {
   }
 
   /**
-   * A {@link com.google.cloud.dataflow.sdk.util.common.worker.Reader.ReaderIterator
+   * A {@link NativeReaderIterator
    * Reader.ReaderIterator} for Ism formatted files which returns a sequence of
    * {@code KV<K, V>}'s read from a {@link SeekableByteChannel}.
    */
-  private static class IsmReaderIterator<K, V> extends Reader.AbstractReaderIterator<KV<K, V>> {
+  private static class IsmReaderIterator<K, V> extends LegacyReaderIterator<KV<K, V>> {
     private final SeekableByteChannel inChannel;
     private final InputStream inStream;
     private final RandomAccessData currentKeyBytes;
@@ -286,10 +289,13 @@ private SeekableByteChannel initializeForKeyedRead() throws IOException {
     // The index follows the bloom filter directly, so we do not need to do a seek here.
     // This is an optimization.
     @SuppressWarnings("resource")
-    ReaderIterator<KV<RandomAccessData, Long>> iterator =
+    LegacyReaderIterator<KV<RandomAccessData, Long>> iterator =
         new IsmReaderIterator<RandomAccessData, Long>(
-            inChannel, new RandomAccessData(), RandomAccessDataCoder.of(),
-            VarLongCoder.of(), length - Footer.FIXED_LENGTH);
+            inChannel,
+            new RandomAccessData(),
+            RandomAccessDataCoder.of(),
+            VarLongCoder.of(),
+            length - Footer.FIXED_LENGTH);
     ImmutableSortedMap.Builder<RandomAccessData, Long> builder =
         ImmutableSortedMap.orderedBy(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
index 563d5e8ad4bff..4d3085120d586 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
@@ -17,7 +17,7 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import java.io.IOException;
 import java.util.Iterator;
@@ -35,7 +35,7 @@
  */
 abstract class LazyMultiReaderIterator<T> extends AbstractBoundedReaderIterator<T> {
   private final Iterator<String> inputs;
-  Reader.ReaderIterator<T> current;
+  NativeReader.LegacyReaderIterator<T> current;
 
   public LazyMultiReaderIterator(Iterator<String> inputs) {
     this.inputs = inputs;
@@ -67,7 +67,7 @@ public void close() throws IOException {
     }
   }
 
-  protected abstract Reader.ReaderIterator<T> open(String input) throws IOException;
+  protected abstract NativeReader.LegacyReaderIterator<T> open(String input) throws IOException;
 
   boolean selectReader() throws IOException {
     if (current != null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index e4d099d1e0159..efab417e21bc4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -50,6 +50,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.ElementCounter;
 import com.google.cloud.dataflow.sdk.util.common.worker.FlattenOperation;
 import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.Operation;
 import com.google.cloud.dataflow.sdk.util.common.worker.OutputReceiver;
 import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
@@ -57,7 +58,6 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation;
 import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.GroupingKeyCreator;
 import com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ReceivingOperation;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
@@ -212,8 +212,9 @@ static ReadOperation createReadOperation(
     ReadInstruction read = instruction.getRead();
 
     String operationName = instruction.getSystemName();
-    Reader<?> reader = registry.create(
-        read.getSource(), options, executionContext, addCounterMutator, operationName);
+    NativeReader<?> reader =
+        registry.create(
+            read.getSource(), options, executionContext, addCounterMutator, operationName);
 
     OutputReceiver[] receivers =
         createOutputReceivers(instruction, counterPrefix, addCounterMutator, stateSampler, 1);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
index 18e401f0e4232..b2fc62f09eb19 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
@@ -24,7 +24,7 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -40,7 +40,7 @@
  * @param <K> the type of the keys read from the shuffle
  * @param <V> the type of the values read from the shuffle
  */
-public class PartitioningShuffleReader<K, V> extends Reader<WindowedValue<KV<K, V>>> {
+public class PartitioningShuffleReader<K, V> extends NativeReader<WindowedValue<KV<K, V>>> {
   final byte[] shuffleReaderConfig;
   final String startShufflePosition;
   final String stopShufflePosition;
@@ -77,13 +77,13 @@ private void initCoder(Coder<WindowedValue<KV<K, V>>> coder) throws Exception {
   }
 
   @Override
-  public ReaderIterator<WindowedValue<KV<K, V>>> iterator() throws IOException {
+  public NativeReaderIterator<WindowedValue<KV<K, V>>> iterator() throws IOException {
     Preconditions.checkArgument(shuffleReaderConfig != null);
     return iterator(new BatchingShuffleEntryReader(
         new ChunkingShuffleBatchReader(new ApplianceShuffleReader(shuffleReaderConfig))));
   }
 
-  ReaderIterator<WindowedValue<KV<K, V>>> iterator(ShuffleEntryReader reader) {
+  PartitioningShuffleReaderIterator iterator(ShuffleEntryReader reader) {
     return new PartitioningShuffleReaderIterator(reader);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java
index b14ded5763e4c..2e36c0513d4b6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java
@@ -27,7 +27,7 @@
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.values.KV;
 
 import javax.annotation.Nullable;
@@ -38,7 +38,7 @@
 public class PartitioningShuffleReaderFactory implements ReaderFactory {
 
   @Override
-  public Reader<?> create(
+  public NativeReader<?> create(
       CloudObject spec,
       @Nullable Coder<?> coder,
       @Nullable PipelineOptions options,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
index 260cf07437704..a25af9752c50c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
@@ -24,7 +24,7 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.ValueOnlyWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import org.joda.time.Instant;
 
@@ -37,7 +37,7 @@
 /**
  * A Reader that receives elements from Pubsub, via a Windmill server.
  */
-class PubsubReader<T> extends Reader<WindowedValue<T>> {
+class PubsubReader<T> extends NativeReader<WindowedValue<T>> {
   private final ValueOnlyWindowedValueCoder<?> coder;
   private StreamingModeExecutionContext context;
 
@@ -50,14 +50,14 @@ class PubsubReader<T> extends Reader<WindowedValue<T>> {
 
   static class Factory implements ReaderFactory {
     @Override
-    public Reader<?> create(
+    public NativeReader<?> create(
         CloudObject cloudSourceSpec,
         @Nullable Coder<?> coder,
         @Nullable PipelineOptions options,
         @Nullable ExecutionContext executionContext,
         @Nullable CounterSet.AddCounterMutator addCounterMutator,
         @Nullable String operationName)
-        throws Exception {
+            throws Exception {
       @SuppressWarnings("unchecked")
       Coder<WindowedValue<Object>> typedCoder = (Coder<WindowedValue<Object>>) coder;
       return new PubsubReader<>(typedCoder, (StreamingModeExecutionContext) executionContext);
@@ -65,11 +65,11 @@ public Reader<?> create(
   }
 
   @Override
-  public ReaderIterator<WindowedValue<T>> iterator() throws IOException {
+  public NativeReaderIterator<WindowedValue<T>> iterator() throws IOException {
     return new PubsubReaderIterator();
   }
 
-  class PubsubReaderIterator extends AbstractReaderIterator<WindowedValue<T>> {
+  class PubsubReaderIterator extends LegacyReaderIterator<WindowedValue<T>> {
     private int bundleIndex = 0;
     private int messageIndex = 0;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
index 26bbd699c541a..d55258b681e73 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
@@ -25,7 +25,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.Serializer;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.common.collect.Maps;
 
 import java.util.Map;
@@ -33,18 +33,18 @@
 import javax.annotation.Nullable;
 
 /**
- * Creates a {@link Reader} from a Dataflow API source definition, presented as a
+ * Creates a {@link NativeReader} from a Dataflow API source definition, presented as a
  * {@link CloudObject}.
  */
 public interface ReaderFactory {
 
   /**
-   * Creates a {@link Reader} from a Dataflow API source definition, presented as a
+   * Creates a {@link NativeReader} from a Dataflow API source definition, presented as a
    * {@link CloudObject}.
    *
-   * @throws Exception if a {@link Reader} could not be created
+   * @throws Exception if a {@link NativeReader} could not be created
    */
-  Reader<?> create(
+  NativeReader<?> create(
       CloudObject cloudSourceSpec,
       @Nullable Coder<?> coder,
       @Nullable PipelineOptions options,
@@ -132,11 +132,11 @@ public Registry register(String readerSpecType, ReaderFactory factory) {
     }
 
     /**
-     * Creates a {@link Reader} according to the provided {@code sourceSpec}, by dispatching on
-     * the type of {@link CloudObject} to instantiate.
+     * Creates a {@link NativeReader} according to the provided {@code sourceSpec},
+     * by dispatching on the type of {@link CloudObject} to instantiate.
      */
     @Override
-    public Reader<?> create(
+    public NativeReader<?> create(
         CloudObject sourceSpec,
         @Nullable Coder<?> coder,
         @Nullable PipelineOptions options,
@@ -157,10 +157,10 @@ public Reader<?> create(
     }
 
     /**
-     * Creates a {@link Reader} from a Dataflow API {@link Source} specification, using the
+     * Creates a {@link NativeReader} from a Dataflow API {@link Source} specification, using the
      * {@link Coder} contained in the {@link Source} specification.
      */
-    public Reader<?> create(
+    public NativeReader<?> create(
         Source cloudSource,
         @Nullable PipelineOptions options,
         @Nullable ExecutionContext executionContext,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
index 309c50b5ec3b6..ce01a9bfdbe48 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
@@ -24,14 +24,16 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.common.collect.Iterables;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.NoSuchElementException;
 import java.util.Observer;
 
 /**
@@ -45,7 +47,7 @@ public class SideInputUtils {
    * Reads the given side input, producing the contents associated
    * with a {@code PCollectionView}.
    *
-   * @throws Exception anything thrown by the delegate {@link Reader}
+   * @throws Exception anything thrown by the delegate {@link NativeReader}
    * @see com.google.cloud.dataflow.sdk.values.PCollectionView
    */
   public static Object readSideInput(
@@ -97,8 +99,10 @@ private static Iterable<Object> readSideInputSource(
       throws Exception {
     // We don't do shuffle sanity check on side inputs, as they don't have to be read completely.
     @SuppressWarnings("unchecked")
-    Reader<Object> reader = (Reader<Object>) ReaderFactory.Registry.defaultRegistry().create(
-        sideInputSource, options, executionContext, null, null);
+    NativeReader<Object> reader =
+        (NativeReader<Object>)
+            ReaderFactory.Registry.defaultRegistry()
+                .create(sideInputSource, options, executionContext, null, null);
     if (observer != null) {
       reader.addObserver(observer);
     }
@@ -131,45 +135,78 @@ static Object readSideInputValue(Map<String, Object> sideInputKind, Iterable<Obj
 
 
   static class ReaderIterable<T> implements Iterable<T> {
-    final Reader<T> reader;
+    final NativeReader<T> reader;
 
-    public ReaderIterable(Reader<T> reader) {
+    public ReaderIterable(NativeReader<T> reader) {
       this.reader = reader;
     }
 
     @Override
     public Iterator<T> iterator() {
       try {
-        return new ReaderIterator<>(reader.iterator());
+        return new NativeReaderToIteratorAdapter<>(reader.iterator());
       } catch (Exception exn) {
         throw new RuntimeException(exn);
       }
     }
   }
 
-  static class ReaderIterator<T> implements Iterator<T> {
-    final Reader.ReaderIterator<T> iterator;
+  private static class NativeReaderToIteratorAdapter<T> implements Iterator<T> {
+    private enum NextState {
+      UNKNOWN_BEFORE_START,
+      UNKNOWN_BEFORE_ADVANCE,
+      AVAILABLE,
+      UNAVAILABLE
+    }
+
+    private NativeReader.NativeReaderIterator<T> reader;
+    private NextState state;
 
-    public ReaderIterator(Reader.ReaderIterator<T> iterator) {
-      this.iterator = iterator;
+    /**
+     * Creates an iterator adapter for the given reader.
+     */
+    private NativeReaderToIteratorAdapter(NativeReader.NativeReaderIterator<T> reader) {
+      this.reader = reader;
+      this.state = NextState.UNKNOWN_BEFORE_START;
     }
 
-    @Override
     public boolean hasNext() {
       try {
-        return iterator.hasNext();
-      } catch (Exception exn) {
-        throw new RuntimeException(exn);
+        switch (state) {
+          case UNKNOWN_BEFORE_START:
+            if (reader.start()) {
+              state = NextState.AVAILABLE;
+              return true;
+            } else {
+              state = NextState.UNAVAILABLE;
+              return false;
+            }
+          case UNKNOWN_BEFORE_ADVANCE:
+            if (reader.advance()) {
+              state = NextState.AVAILABLE;
+              return true;
+            } else {
+              state = NextState.UNAVAILABLE;
+              return false;
+            }
+          case AVAILABLE:
+            return true;
+          case UNAVAILABLE:
+            return false;
+          default:
+            throw new AssertionError();
+        }
+      } catch (IOException e) {
+        throw new RuntimeException(e);
       }
     }
 
-    @Override
     public T next() {
-      try {
-        return iterator.next();
-      } catch (Exception exn) {
-        throw new RuntimeException(exn);
+      if (!hasNext()) {
+        throw new NoSuchElementException();
       }
+      state = NextState.UNKNOWN_BEFORE_ADVANCE;
+      return reader.getCurrent();
     }
 
     @Override
@@ -178,7 +215,6 @@ public void remove() {
     }
   }
 
-
   /////////////////////////////////////////////////////////////////////////////
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
index 7b9fff9d2f24f..6c2c2c2da2813 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
@@ -27,7 +27,7 @@
 import com.google.api.services.dataflow.model.Source;
 import com.google.api.services.dataflow.model.SourceMetadata;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -39,36 +39,37 @@
  * using Dataflow model protos.
  */
 public class SourceTranslationUtils {
-  public static Reader.Progress cloudProgressToReaderProgress(
+  public static NativeReader.Progress cloudProgressToReaderProgress(
       @Nullable ApproximateReportedProgress cloudProgress) {
     return cloudProgress == null ? null : new DataflowReaderProgress(cloudProgress);
   }
 
-  public static Reader.Position cloudPositionToReaderPosition(@Nullable Position cloudPosition) {
+  public static NativeReader.Position cloudPositionToReaderPosition(
+      @Nullable Position cloudPosition) {
     return cloudPosition == null ? null : new DataflowReaderPosition(cloudPosition);
   }
 
   public static ApproximateReportedProgress readerProgressToCloudProgress(
-      @Nullable Reader.Progress readerProgress) {
+      @Nullable NativeReader.Progress readerProgress) {
     return readerProgress == null ? null : ((DataflowReaderProgress) readerProgress).cloudProgress;
   }
 
-  public static Position toCloudPosition(@Nullable Reader.Position readerPosition) {
+  public static Position toCloudPosition(@Nullable NativeReader.Position readerPosition) {
     return readerPosition == null ? null : ((DataflowReaderPosition) readerPosition).cloudPosition;
   }
 
   public static ApproximateSplitRequest splitRequestToApproximateSplitRequest(
-      @Nullable Reader.DynamicSplitRequest splitRequest) {
+      @Nullable NativeReader.DynamicSplitRequest splitRequest) {
     return (splitRequest == null)
         ? null : ((DataflowDynamicSplitRequest) splitRequest).splitRequest;
   }
 
-  public static Reader.DynamicSplitRequest toDynamicSplitRequest(
+  public static NativeReader.DynamicSplitRequest toDynamicSplitRequest(
       @Nullable ApproximateSplitRequest splitRequest) {
     return (splitRequest == null) ? null : new DataflowDynamicSplitRequest(splitRequest);
   }
 
-  static class DataflowReaderProgress implements Reader.Progress {
+  static class DataflowReaderProgress implements NativeReader.Progress {
     public final ApproximateReportedProgress cloudProgress;
 
     public DataflowReaderProgress(ApproximateReportedProgress cloudProgress) {
@@ -81,7 +82,7 @@ public String toString() {
     }
   }
 
-  static class DataflowReaderPosition implements Reader.Position {
+  static class DataflowReaderPosition implements NativeReader.Position {
     public final Position cloudPosition;
 
     public DataflowReaderPosition(Position cloudPosition) {
@@ -134,7 +135,7 @@ public static Source dictionaryToCloudSource(Map<String, Object> params) throws
     return res;
   }
 
-  private static class DataflowDynamicSplitRequest implements Reader.DynamicSplitRequest {
+  private static class DataflowDynamicSplitRequest implements NativeReader.DynamicSplitRequest {
     public final ApproximateSplitRequest splitRequest;
 
     private DataflowDynamicSplitRequest(ApproximateSplitRequest splitRequest) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
index 2887bf5abff62..5558be8816367 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
@@ -30,9 +30,9 @@
 import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ProgressTracker;
 import com.google.cloud.dataflow.sdk.util.common.worker.ProgressTrackerGroup;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.common.annotations.VisibleForTesting;
 
 import org.slf4j.Logger;
@@ -57,7 +57,7 @@
  *
  * @param <T> the type of the elements read from the source
  */
-public class TextReader<T> extends Reader<T> {
+public class TextReader<T> extends NativeReader<T> {
   private static final Logger LOG = LoggerFactory.getLogger(TextReader.class);
 
   @VisibleForTesting static final int BUF_SIZE = 200;
@@ -118,8 +118,9 @@ private double getTotalParallelismUnsplittable() throws IOException {
     return expandedFilepattern().size();
   }
 
-  private ReaderIterator<T> newReaderIteratorForRangeInFile(IOChannelFactory factory,
-      String oneFile, long startPosition, @Nullable Long endPosition) throws IOException {
+  private LegacyReaderIterator<T> newReaderIteratorForRangeInFile(
+      IOChannelFactory factory, String oneFile, long startPosition, @Nullable Long endPosition)
+          throws IOException {
     // Position before the first record, so we can find the record beginning.
     final long start = startPosition > 0 ? startPosition - 1 : 0;
 
@@ -134,7 +135,7 @@ private ReaderIterator<T> newReaderIteratorForRangeInFile(IOChannelFactory facto
     return iterator;
   }
 
-  private ReaderIterator<T> newReaderIteratorForFiles(
+  private LegacyReaderIterator<T> newReaderIteratorForFiles(
       IOChannelFactory factory, Collection<String> files) throws IOException {
     if (files.size() == 1) {
       return newReaderIteratorForFile(factory, files.iterator().next(), stripTrailingNewlines);
@@ -178,7 +179,7 @@ private Collection<String> expandedFilepattern() throws IOException {
   }
 
   @Override
-  public ReaderIterator<T> iterator() throws IOException {
+  public LegacyReaderIterator<T> iterator() throws IOException {
     IOChannelFactory factory = IOChannelUtils.getFactory(filepattern);
     Collection<String> inputs = expandedFilepattern();
     if (inputs.isEmpty()) {
@@ -264,7 +265,7 @@ public TextFileMultiIterator(
     }
 
     @Override
-    protected ReaderIterator<T> open(String input) throws IOException {
+    protected LegacyReaderIterator<T> open(String input) throws IOException {
       return newReaderIteratorForFile(factory, input, stripTrailingNewlines);
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java
index aa4f42e3bb668..2845498eb3c6c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java
@@ -27,7 +27,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import javax.annotation.Nullable;
 
@@ -45,7 +45,7 @@ public static TextReaderFactory getInstance() {
   private TextReaderFactory() {}
 
   @Override
-  public Reader<?> create(
+  public NativeReader<?> create(
       CloudObject spec,
       @Nullable Coder<?> coder,
       @Nullable PipelineOptions options,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
index 36ca63badc9a0..033cdb16780a5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
@@ -21,7 +21,7 @@
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
 import com.google.common.base.Preconditions;
@@ -37,7 +37,7 @@
  *
  * @param <T> the type of the elements read from the source
  */
-public class UngroupedShuffleReader<T> extends Reader<T> {
+public class UngroupedShuffleReader<T> extends NativeReader<T> {
   final byte[] shuffleReaderConfig;
   final String startShufflePosition;
   final String stopShufflePosition;
@@ -53,13 +53,13 @@ public UngroupedShuffleReader(
   }
 
   @Override
-  public ReaderIterator<T> iterator() throws IOException {
+  public NativeReaderIterator<T> iterator() throws IOException {
     Preconditions.checkArgument(shuffleReaderConfig != null);
     return iterator(new BatchingShuffleEntryReader(
         new ChunkingShuffleBatchReader(new ApplianceShuffleReader(shuffleReaderConfig))));
   }
 
-  ReaderIterator<T> iterator(ShuffleEntryReader reader) {
+  UngroupedShuffleReaderIterator iterator(ShuffleEntryReader reader) {
     return new UngroupedShuffleReaderIterator(reader);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java
index d0d75baeb5b53..68dbacc058724 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java
@@ -25,7 +25,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import javax.annotation.Nullable;
 
@@ -35,14 +35,14 @@
 public class UngroupedShuffleReaderFactory implements ReaderFactory {
 
   @Override
-  public Reader<?> create(
+  public NativeReader<?> create(
       CloudObject spec,
       @Nullable Coder<?> coder,
       @Nullable PipelineOptions options,
       @Nullable ExecutionContext executionContext,
       @Nullable CounterSet.AddCounterMutator addCounterMutator,
       @Nullable String operationName)
-          throws Exception {
+      throws Exception {
     return create(spec, coder, options);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
index 7703cef556578..1467baffc710f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
@@ -27,7 +27,7 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.values.KV;
 
 import org.joda.time.Instant;
@@ -43,7 +43,7 @@
  * A Reader that receives input data from a Windmill server, and returns it as
  * individual elements.
  */
-class UngroupedWindmillReader<T> extends Reader<WindowedValue<T>> {
+class UngroupedWindmillReader<T> extends NativeReader<WindowedValue<T>> {
   private final Coder<T> valueCoder;
   private final Coder<Collection<? extends BoundedWindow>> windowsCoder;
   private StreamingModeExecutionContext context;
@@ -57,7 +57,7 @@ class UngroupedWindmillReader<T> extends Reader<WindowedValue<T>> {
 
   static class Factory implements ReaderFactory {
     @Override
-    public Reader<?> create(
+    public NativeReader<?> create(
         CloudObject spec,
         @Nullable Coder<?> coder,
         @Nullable PipelineOptions options,
@@ -73,11 +73,11 @@ public Reader<?> create(
   }
 
   @Override
-  public ReaderIterator<WindowedValue<T>> iterator() throws IOException {
+  public NativeReaderIterator<WindowedValue<T>> iterator() throws IOException {
     return new UngroupedWindmillReaderIterator();
   }
 
-  class UngroupedWindmillReaderIterator extends AbstractReaderIterator<WindowedValue<T>> {
+  class UngroupedWindmillReaderIterator extends LegacyReaderIterator<WindowedValue<T>> {
     private int bundleIndex = 0;
     private int messageIndex = 0;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
index 1c71c3bc5ef6b..1daa31f138582 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
@@ -27,7 +27,7 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import java.io.IOException;
 import java.util.Collection;
@@ -39,7 +39,7 @@
  * A Reader that receives input data from a Windmill server, and returns a singleton iterable
  * containing the work item.
  */
-class WindowingWindmillReader<T> extends Reader<WindowedValue<KeyedWorkItem<T>>> {
+class WindowingWindmillReader<T> extends NativeReader<WindowedValue<KeyedWorkItem<T>>> {
 
   private final KvCoder<?, T> kvCoder;
   private final Coder<? extends BoundedWindow> windowCoder;
@@ -66,7 +66,7 @@ class WindowingWindmillReader<T> extends Reader<WindowedValue<KeyedWorkItem<T>>>
 
   static class Factory implements ReaderFactory {
     @Override
-    public Reader<?> create(
+    public NativeReader<?> create(
         CloudObject spec,
         @Nullable Coder<?> coder,
         @Nullable PipelineOptions options,
@@ -92,23 +92,25 @@ public static <T> WindowingWindmillReader<T> create(
   }
 
   @Override
-  public ReaderIterator<WindowedValue<KeyedWorkItem<T>>> iterator() throws IOException {
+  public NativeReaderIterator<WindowedValue<KeyedWorkItem<T>>> iterator() throws IOException {
     final Object key = kvCoder.getKeyCoder().decode(
         context.getSerializedKey().newInput(), Coder.Context.OUTER);
     final WorkItem workItem = context.getWork();
 
-    return new AbstractReaderIterator<WindowedValue<KeyedWorkItem<T>>>() {
+    return new LegacyReaderIterator<WindowedValue<KeyedWorkItem<T>>>() {
       boolean consumed = false;
 
       @Override
       public boolean hasNext() throws IOException {
         return !consumed;
       }
+
       @Override
       public WindowedValue<KeyedWorkItem<T>> next() throws IOException, NoSuchElementException {
         consumed = true;
-        return WindowedValue.valueInEmptyWindows(KeyedWorkItem.<T>workItem(
-            key, workItem, windowCoder, windowsCoder, kvCoder.getValueCoder()));
+        return WindowedValue.valueInEmptyWindows(
+            KeyedWorkItem.<T>workItem(
+                key, workItem, windowCoder, windowsCoder, kvCoder.getValueCoder()));
       }
     };
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SourceTestUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SourceTestUtils.java
index 9c4035c4368af..b8f9b0b4233ea 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SourceTestUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SourceTestUtils.java
@@ -131,36 +131,68 @@ public static <T> List<T> readFromSource(BoundedSource<T> source, PipelineOption
   }
 
   /**
-   * Reads all elements from the given unstarted {@link BoundedSource.BoundedReader}.
+   * Reads all elements from the given unstarted {@link Source.Reader}.
    */
-  public static <T> List<T> readFromUnstartedReader(BoundedSource.BoundedReader<T> reader)
+  public static <T> List<T> readFromUnstartedReader(Source.Reader<T> reader) throws IOException {
+    return readRemainingFromReader(reader, false);
+  }
+
+  /**
+   * Reads all elements from the given started {@link Source.Reader}.
+   */
+  public static <T> List<T> readFromStartedReader(Source.Reader<T> reader) throws IOException {
+    return readRemainingFromReader(reader, true);
+  }
+
+  /**
+   * Read elements from a {@link Source.Reader} until n elements are read.
+   */
+  public static <T> List<T> readNItemsFromUnstartedReader(Source.Reader<T> reader, int n)
       throws IOException {
-    List<T> res = new ArrayList<>();
-    for (boolean more = reader.start(); more; more = reader.advance()) {
-      res.add(reader.getCurrent());
-    }
-    return res;
+    return readNItemsFromReader(reader, n, false);
   }
 
-  public static <T> List<T> readFromStartedReader(BoundedSource.BoundedReader<T> reader)
+  /**
+   * Read elements from a {@link Source.Reader} that has already had {@link Source.Reader#start}
+   * called on it, until n elements are read.
+   */
+  public static <T> List<T> readNItemsFromStartedReader(Source.Reader<T> reader, int n)
       throws IOException {
-    List<T> res = new ArrayList<>();
-    while (reader.advance()) {
-      res.add(reader.getCurrent());
-    }
-    return res;
+    return readNItemsFromReader(reader, n, true);
   }
 
-  public static <T> List<T> readNItemsFromUnstartedReader(Source.Reader<T> reader, int n)
+  /**
+   * Read elements from a {@link Source.Reader} until n elements are read.
+   *
+   * <p>There must be at least n elements remaining in the reader, except for
+   * the case when n is {@code Integer.MAX_VALUE}, which means "read all
+   * remaining elements".
+   */
+  private static <T> List<T> readNItemsFromReader(Source.Reader<T> reader, int n, boolean started)
       throws IOException {
     List<T> res = new ArrayList<>();
-    for (int i = 0; i < n; ++i) {
-      assertTrue((i == 0) ? reader.start() : reader.advance());
+    for (int i = 0; i < n; i++) {
+      boolean shouldStart = (i == 0 && !started);
+      boolean more = shouldStart ? reader.start() : reader.advance();
+      if (n != Integer.MAX_VALUE) {
+        assertTrue(more);
+      }
+      if (!more) {
+        break;
+      }
       res.add(reader.getCurrent());
     }
     return res;
   }
 
+  /**
+   * Read all remaining elements from a {@link Source.Reader}.
+   */
+  public static <T> List<T> readRemainingFromReader(Source.Reader<T> reader, boolean started)
+      throws IOException {
+    return readNItemsFromReader(reader, Integer.MAX_VALUE, started);
+  }
+
   /**
    * Given a reference {@code Source} and a list of {@code Source}s, assert that the union of
    * the records read from the list of sources is equal to the records read from the reference
@@ -296,8 +328,7 @@ private static <T> SourceTestUtils.SplitAtFractionResult assertSplitAtFractionBe
       throws Exception {
     try (BoundedSource.BoundedReader<T> reader = source.createReader(options)) {
       BoundedSource<T> originalSource = reader.getCurrentSource();
-      List<T> currentItems = new ArrayList<>();
-      currentItems.addAll(readNItemsFromUnstartedReader(reader, numItemsToReadBeforeSplit));
+      List<T> currentItems = readNItemsFromUnstartedReader(reader, numItemsToReadBeforeSplit);
       BoundedSource<T> residual = reader.splitAtFraction(splitFraction);
       if (residual != null) {
         assertFalse(
@@ -337,10 +368,7 @@ private static <T> SourceTestUtils.SplitAtFractionResult assertSplitAtFractionBe
           // Nothing.
           break;
       }
-      currentItems.addAll(
-          numItemsToReadBeforeSplit > 0
-              ? readFromStartedReader(reader)
-              : readFromUnstartedReader(reader));
+      currentItems.addAll(readRemainingFromReader(reader, numItemsToReadBeforeSplit > 0));
       BoundedSource<T> primary = reader.getCurrentSource();
       return verifySingleSplitAtFractionResult(
           source, expectedItems, currentItems, primary, residual,
@@ -573,19 +601,22 @@ private static <T> boolean assertSplitAtFractionConcurrent(
     @SuppressWarnings("resource")  // Closed in readerThread
     final BoundedSource.BoundedReader<T> reader = source.createReader(options);
     final CountDownLatch unblockSplitter = new CountDownLatch(1);
-    Future<List<T>> readerThread = executor.submit(new Callable<List<T>>() {
-      @Override
-      public List<T> call() throws Exception {
-        try {
-          List<T> items = readNItemsFromUnstartedReader(reader, numItemsToReadBeforeSplitting);
-          unblockSplitter.countDown();
-          items.addAll(readFromStartedReader(reader));
-          return items;
-        } finally {
-          reader.close();
-        }
-      }
-    });
+    Future<List<T>> readerThread =
+        executor.submit(
+            new Callable<List<T>>() {
+              @Override
+              public List<T> call() throws Exception {
+                try {
+                  List<T> items =
+                      readNItemsFromUnstartedReader(reader, numItemsToReadBeforeSplitting);
+                  unblockSplitter.countDown();
+                  items.addAll(readRemainingFromReader(reader, numItemsToReadBeforeSplitting > 0));
+                  return items;
+                } finally {
+                  reader.close();
+                }
+              }
+            });
     Future<KV<BoundedSource<T>, BoundedSource<T>>> splitterThread = executor.submit(
         new Callable<KV<BoundedSource<T>, BoundedSource<T>>>() {
           @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
index 142a38669983d..326af02af8fe9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
@@ -19,7 +19,7 @@
 import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import java.util.HashMap;
 import java.util.List;
@@ -27,7 +27,7 @@
 
 /**
  * Utilities for working with Source Dataflow API definitions and
- * {@link com.google.cloud.dataflow.sdk.util.common.worker.Reader}
+ * {@link NativeReader}
  * objects.
  */
 public class CloudSourceUtils {
@@ -53,15 +53,16 @@ public static Source flattenBaseSpecs(Source source) {
   }
 
   /**
-   * Creates a {@link com.google.cloud.dataflow.sdk.util.common.worker.Reader}
-   * from the given Dataflow Source API definition and reads all elements from it.
+   * Creates a {@link NativeReader} from the given Dataflow Source API definition and
+   * reads all elements from it.
    */
 
   public static <T> List<T> readElemsFromSource(PipelineOptions options, Source source) {
     try {
       @SuppressWarnings("unchecked")
-      Reader<T> reader = (Reader<T>) ReaderFactory.Registry.defaultRegistry().create(
-          source, options, null, null, null);
+      NativeReader<T> reader =
+          (NativeReader<T>)
+              ReaderFactory.Registry.defaultRegistry().create(source, options, null, null, null);
       return ReaderUtils.readElemsFromReader(reader);
     } catch (Exception e) {
       throw new RuntimeException("Failed to read from source: " + source.toString(), e);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReaderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReaderUtils.java
index edbd2994f6fed..290d9ddfefc6c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReaderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReaderUtils.java
@@ -15,26 +15,24 @@
  ******************************************************************************/
 package com.google.cloud.dataflow.sdk.util;
 
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
 /**
- * Utilities for working with {@link com.google.cloud.dataflow.sdk.util.common.worker.Reader}
- * objects.
+ * Utilities for working with {@link NativeReader} objects.
  */
 public class ReaderUtils {
   /**
-   * Reads all elements from the given
-   * {@link com.google.cloud.dataflow.sdk.util.common.worker.Reader}.
+   * Reads all elements from the given {@link NativeReader}.
    */
-  public static <T> List<T> readElemsFromReader(Reader<T> reader) {
+  public static <T> List<T> readElemsFromReader(NativeReader<T> reader) {
     List<T> elems = new ArrayList<>();
-    try (Reader.ReaderIterator<T> it = reader.iterator()) {
-      while (it.hasNext()) {
-        elems.add(it.next());
+    try (NativeReader.NativeReaderIterator<T> it = reader.iterator()) {
+      for (boolean more = it.start(); more; more = it.advance()) {
+        elems.add(it.getCurrent());
       }
     } catch (IOException e) {
       throw new RuntimeException("Failed to read from reader: " + reader, e);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/AbstractBoundedReaderIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/AbstractBoundedReaderIterator.java
index 0e674c2f211b6..37ebabc7b4b7f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/AbstractBoundedReaderIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/AbstractBoundedReaderIterator.java
@@ -30,7 +30,8 @@
  *
  * @param <T> Type of records returned by the reader.
  */
-public abstract class AbstractBoundedReaderIterator<T> extends Reader.AbstractReaderIterator<T> {
+public abstract class AbstractBoundedReaderIterator<T>
+    extends NativeReader.LegacyReaderIterator<T> {
   private Boolean cachedHasNext;
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
index da1a6404c73a4..abe2e2063e5bd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
@@ -84,7 +84,7 @@ public void execute() throws Exception {
   }
 
   @Override
-  public Reader.Progress getWorkerProgress() throws Exception {
+  public NativeReader.Progress getWorkerProgress() throws Exception {
     return getReadOperation().getProgress();
   }
 
@@ -94,8 +94,8 @@ public StateSampler.StateSamplerInfo getWorkerStateSamplerInfo() throws Exceptio
   }
 
   @Override
-  public Reader.DynamicSplitResult requestDynamicSplit(
-      Reader.DynamicSplitRequest splitRequest) throws Exception {
+  public NativeReader.DynamicSplitResult requestDynamicSplit(
+      NativeReader.DynamicSplitRequest splitRequest) throws Exception {
     return getReadOperation().requestDynamicSplit(splitRequest);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/NativeReader.java
similarity index 58%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/NativeReader.java
index 2ecc686701130..be97c08c64fab 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Reader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/NativeReader.java
@@ -13,7 +13,6 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  ******************************************************************************/
-
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
@@ -23,7 +22,7 @@
 import java.util.Observable;
 
 /**
- * Abstract base class for readers.
+ * Abstract base class for native readers in the Dataflow runner.
  *
  * <p>A {@link com.google.api.services.dataflow.model.Source} is read from by getting an
  * {@code Iterator}-like value and iterating through it.
@@ -34,7 +33,7 @@
  *
  * @param <T> the type of the elements read from the source
  */
-public abstract class Reader<T> extends Observable {
+public abstract class NativeReader<T> extends Observable {
   /**
    * StateSampler object for readers interested in further breaking
    * down of the state space at a finer granularity.
@@ -53,8 +52,8 @@ public abstract class Reader<T> extends Observable {
    * @param stateSamplerOperationName the operation name to be used by
    * the state sampler
    */
-  public void setStateSamplerAndOperationName(StateSampler stateSampler,
-      String stateSamplerOperationName) {
+  public void setStateSamplerAndOperationName(
+      StateSampler stateSampler, String stateSamplerOperationName) {
     this.stateSampler = stateSampler;
     this.stateSamplerOperationName = stateSamplerOperationName;
   }
@@ -62,86 +61,71 @@ public void setStateSamplerAndOperationName(StateSampler stateSampler,
   /**
    * Returns a ReaderIterator that allows reading from this source.
    */
-  public abstract ReaderIterator<T> iterator() throws IOException;
+  public abstract NativeReaderIterator<T> iterator() throws IOException;
 
   /**
-   * A stateful iterator over the data in a Reader.
+   * A stateful iterator over the data in a {@link NativeReader}.
+   *
+   * <p>Partially thread-safe: methods {@link #start}, {@link #advance}, {@link #getCurrent},
+   * {@link #close} are called serially, but {@link #requestDynamicSplit} can be called
+   * asynchronously to those.
    *
-   * <p>Partially thread-safe: methods {@link #hasNext}, {@link #next}, {@link #close}
-   * are called serially, but {@link #requestDynamicSplit} can be called asynchronously
-   * to those. There will not be multiple concurrent calls to {@link #requestDynamicSplit}).
+   * <p>There will not be multiple concurrent calls to {@link #requestDynamicSplit}).
    * {@link #getProgress} can be called concurrently to any other call, including itself, if
    * {@link #requestDynamicSplit} is implemented.
    */
-  public interface ReaderIterator<T> extends AutoCloseable {
+  public abstract static class NativeReaderIterator<T> implements AutoCloseable {
 
     /**
      * A value to return from {@link #getRemainingParallelism()} when remaining parallelism
-     * can be interpolated from {@link Reader#getTotalParallelism} and the progress fraction.
+     * can be interpolated from {@link NativeReader#getTotalParallelism} and the progress fraction.
      */
     public static final double REMAINING_PARALLELISM_FROM_PROGRESS_FRACTION = Double.NaN;
 
-    /**
-     * Returns whether the source has any more elements. Some sources,
-     * such as GroupingShuffleReader, invalidate the return value of
-     * the previous next() call during the call to hasNext().
-     */
-    public boolean hasNext() throws IOException;
-
-    /**
-     * Returns the next element.
-     *
-     * @throws IOException if attempting to access an element involves IO that fails
-     * @throws NoSuchElementException if there are no more elements
-     */
-    public T next() throws IOException, NoSuchElementException;
-
-    @Override
-    public void close() throws IOException;
-
     /**
      * Returns a representation of how far this iterator is through the source.
      * @return the progress, or {@code null} if no progress measure
      * can be provided (implementors are discouraged from throwing
      * {@code UnsupportedOperationException} in this case).
      */
-    public Progress getProgress();
+    public abstract Progress getProgress();
 
     /**
      * Attempts to split the input in two parts: the "primary" part and the "residual" part.
-     * The current {@link ReaderIterator} keeps processing the primary part, while the residual part
-     * will be processed elsewhere (e.g. perhaps on a different worker).
+     * The current {@link NativeReaderIterator} keeps processing the primary part, while the
+     * residual part will be processed elsewhere (e.g. perhaps on a different worker).
      *
      * <p>The primary and residual parts, if concatenated, must represent the same input as the
-     * current input of this {@link ReaderIterator} before this call.
+     * current input of this {@link NativeReaderIterator} before this call.
      *
      * <p>The boundary between the primary part and the residual part is specified in
-     * a framework-specific way using {@link Reader.DynamicSplitRequest}: e.g., if the framework
-     * supports the notion of positions, it might be a position at which the input is asked to split
-     * itself (which is not necessarily the same position at which it <i>will</i> split itself);
-     * it might be an approximate fraction of input, or something else.
+     * a framework-specific way using {@link NativeReader.DynamicSplitRequest}: e.g., if the
+     * framework supports the notion of positions, it might be a position at which the input is
+     * asked to split itself (which is not necessarily the same position at which it <i>will</i>
+     * split itself); it might be an approximate fraction of input, or something else.
      *
-     * <p>{@link Reader.DynamicSplitResult} encodes, in a framework-specific way, the information
-     * sufficient to construct a description of the resulting primary and residual inputs.
+     * <p>{@link NativeReader.DynamicSplitResult} encodes, in a framework-specific way, the
+     * information sufficient to construct a description of the resulting primary and
+     * residual inputs.
      * For example, it might, again, be a position demarcating these parts, or it might be a pair of
      * fully-specified input descriptions, or something else.
      *
      * <p>After a successful call to {@link #requestDynamicSplit}, subsequent calls should be
      * interpreted relative to the new primary.
      *
-     * <p>This call should not affect the range of input represented by the {@link Reader} that
-     * produced this {@link ReaderIterator}.
+     * <p>This call should not affect the range of input represented by the {@link NativeReader}
+     * that produced this {@link NativeReaderIterator}.
      *
-     * @return {@code null} if the {@link Reader.DynamicSplitRequest} cannot be honored
-     *   (in that case the input represented by this {@link ReaderIterator} stays the same), or
-     *   a {@link Reader.DynamicSplitResult} describing how the input was split into a primary
-     *   and residual part.
+     * @return {@code null} if the {@link NativeReader.DynamicSplitRequest} cannot be honored
+     *   (in that case the input represented by this {@link NativeReaderIterator} stays the same),
+     *   or a {@link NativeReader.DynamicSplitResult} describing how the input was split into
+     *   a primary and residual part.
      */
-    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest request);
+    public abstract DynamicSplitResult requestDynamicSplit(DynamicSplitRequest request);
 
     /**
      * Returns an estimate of the degree of parallelism that could be achieved by
-     * {@link #requestDynamicSplit()} taking into account what has already been consumed.
+     * {@link #requestDynamicSplit} taking into account what has already been consumed.
      * E.g., if the reader has just returned the last record in the source, the remaining
      * parallelism is 1 because it can't be split up any further. If the reader just
      * returned the 3rd record in a perfectly parallelizable source with 5 records,
@@ -152,17 +136,56 @@ public interface ReaderIterator<T> extends AutoCloseable {
      * <p>An exact number isn't required, mostly we want to be able to distinguish
      * between many, few, or one. Should not block.
      *
-     * <p>An implementor may return {@link REMAINING_PARALLELISM_FROM_PROGRESS_FRACTION},
+     * <p>An implementor may return {@link #REMAINING_PARALLELISM_FROM_PROGRESS_FRACTION},
      * in which case the remaining parallelism will be interpolated from
-     * {@link Reader#getTotalParallelism} using the current progress fraction.
+     * {@link NativeReader#getTotalParallelism} using the current progress fraction.
      * Infinity may also be returned (indicating no known bound on parallelism),
      * as may fractional estimates (in which case the sum over all shards is taken).
      */
-    public double getRemainingParallelism();
+    public abstract double getRemainingParallelism();
+
+    /**
+     * Initializes the reader and advances the reader to the first record.
+     *
+     * <p>This method should be called exactly once. The invocation should occur prior to calling
+     * {@link #advance} or {@link #getCurrent}. This method may perform expensive operations that
+     * are needed to initialize the reader.
+     *
+     * @return {@code true} if a record was read, {@code false} if there is no more input available.
+     */
+    public abstract boolean start() throws IOException;
+
+    /**
+     * Advances the reader to the next valid record.
+     *
+     * <p>It is an error to call this without having called {@link #start} first.
+     *
+     * @return {@code true} if a record was read, {@code false} if there is no more input available.
+     */
+    public abstract boolean advance() throws IOException;
+
+    /**
+     * Returns the value of the data item that was read by the last {@link #start} or
+     * {@link #advance} call. The returned value must be effectively immutable and remain valid
+     * indefinitely.
+     *
+     * <p>Multiple calls to this method without an intervening call to {@link #advance} should
+     * return the same result.
+     *
+     * @throws NoSuchElementException if {@link #start} was never called, or if
+     *         the last {@link #start} or {@link #advance} returned {@code false}
+     */
+    public abstract T getCurrent() throws NoSuchElementException;
+
+    /**
+     * @inheritDoc
+     */
+    @Override
+    public abstract void close() throws IOException;
   }
 
   /** An abstract base class for ReaderIterator implementations. */
-  public abstract static class AbstractReaderIterator<T> implements ReaderIterator<T> {
+  public abstract static class AbstractReaderIterator<T> extends NativeReaderIterator<T> {
 
     @Override
     public void close() throws IOException {
@@ -185,6 +208,59 @@ public double getRemainingParallelism() {
     }
   }
 
+  /**
+   * Adapter from old-style reader interface ({@link #hasNext}, {@link #next}) to new-style
+   * iteration interface ({@link NativeReaderIterator#start}, {@link NativeReaderIterator#advance},
+   * {@link NativeReaderIterator#getCurrent}).
+   *
+   * This class is temporary and the intention is to get rid of its subclasses one by one,
+   * converting them to use the new-style interface directly, and then remove this class.
+   */
+  public abstract static class LegacyReaderIterator<T> extends AbstractReaderIterator<T> {
+    private T current;
+    private boolean hasCurrent;
+
+    /**
+     * Returns whether the source has any more elements. Some sources,
+     * such as GroupingShuffleReader, invalidate the return value of
+     * the previous next() call during the call to hasNext().
+     */
+    public abstract boolean hasNext() throws IOException;
+
+    /**
+     * Returns the next element.
+     *
+     * @throws IOException if attempting to access an element involves IO that fails
+     * @throws NoSuchElementException if there are no more elements
+     */
+    public abstract T next() throws IOException, NoSuchElementException;
+
+    @Override
+    public boolean start() throws IOException {
+      hasCurrent = advance();
+      return hasCurrent;
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      if (!hasNext()) {
+        hasCurrent = false;
+        return false;
+      }
+      current = next();
+      hasCurrent = true;
+      return true;
+    }
+
+    @Override
+    public T getCurrent() throws NoSuchElementException {
+      if (!hasCurrent) {
+        throw new NoSuchElementException();
+      }
+      return current;
+    }
+  }
+
   /**
    * A representation of how far a {@code ReaderIterator} is through a
    * {@code Reader}.
@@ -206,19 +282,19 @@ public interface Progress {}
   public interface Position {}
 
   /**
-   * A framework-specific way to specify how {@link ReaderIterator#requestDynamicSplit} should split
-   * the input into a primary and residual part.
+   * A framework-specific way to specify how {@link NativeReaderIterator#requestDynamicSplit}
+   * should split the input into a primary and residual part.
    */
   public interface DynamicSplitRequest {}
 
   /**
-   * A framework-specific way to specify how {@link ReaderIterator#requestDynamicSplit} has split
-   * the input into a primary and residual part.
+   * A framework-specific way to specify how {@link NativeReaderIterator#requestDynamicSplit}
+   * has split the input into a primary and residual part.
    */
   public interface DynamicSplitResult {}
 
   /**
-   * A {@link Reader.DynamicSplitResult} that specifies the boundary between the primary and
+   * A {@link NativeReader.DynamicSplitResult} that specifies the boundary between the primary and
    * residual parts of the input using a {@link Position}.
    */
   public static final class DynamicSplitResultWithPosition implements DynamicSplitResult {
@@ -267,9 +343,9 @@ public boolean supportsRestart() {
    * <p>Defaults to positive infinity, indicating unbounded parallelism.  An unsplittable source
    * would have parallelism exactly 1.
    *
-   * <p>See also {@link ReaderIterator#getRemainingParallelism} which may be implemented to
+   * <p>See also {@link NativeReaderIterator#getRemainingParallelism} which may be implemented to
    * complement this method if a better-than-linear estimate of remaining parallelism can be
-   * obtained (e.g. it is easy to detect when one is at the last record.
+   * obtained (e.g. it is easy to detect when one is at the last record).
    */
   public double getTotalParallelism() {
     // By default, don't assume any limitations.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index 3395cc121b8ec..3244acd4d75dd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -57,7 +57,7 @@ public class ReadOperation extends Operation {
   public static final double LARGE_PARALLELISM_BOUND = 1e7;
 
   /** The Reader this operation reads from. */
-  public final Reader<?> reader;
+  public final NativeReader<?> reader;
 
   /** The total byte counter for all data read by this operation. */
   final Counter<Long> byteCount;
@@ -72,7 +72,7 @@ public class ReadOperation extends Operation {
    * The Reader's iterator this operation reads from, created by start().
    * Guarded by sourceIteratorLock.
    */
-  volatile Reader.ReaderIterator<?> readerIterator = null;
+  volatile NativeReader.NativeReaderIterator<?> readerIterator = null;
 
   /**
    * A cache of sourceIterator.getProgress() updated inside the read loop at a bounded rate.
@@ -81,7 +81,7 @@ public class ReadOperation extends Operation {
    * potentially wait for a read to complete (which can take an unbounded time, delay a worker
    * progress update, and cause lease expiration and all sorts of trouble).
    */
-  private AtomicReference<Reader.Progress> progress = new AtomicReference<>();
+  private AtomicReference<NativeReader.Progress> progress = new AtomicReference<>();
 
   /**
    * On every iteration of the read loop, "progress" is fetched from sourceIterator if requested.
@@ -95,8 +95,13 @@ public class ReadOperation extends Operation {
   private AtomicBoolean isProgressUpdateRequested = new AtomicBoolean(true);
 
 
-  public ReadOperation(String operationName, Reader<?> reader, OutputReceiver[] receivers,
-      String counterPrefix, String systemStageName, CounterSet.AddCounterMutator addCounterMutator,
+  public ReadOperation(
+      String operationName,
+      NativeReader<?> reader,
+      OutputReceiver[] receivers,
+      String counterPrefix,
+      String systemStageName,
+      CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler) {
     super(operationName, receivers, counterPrefix, addCounterMutator,
           stateSampler, reader.getStateSamplerStateKind());
@@ -113,8 +118,11 @@ public ReadOperation(String operationName, Reader<?> reader, OutputReceiver[] re
         Counter.doubles(remainingParallelismCounterName(systemStageName), SUM));
   }
 
-  static ReadOperation forTest(Reader<?> reader, OutputReceiver outputReceiver,
-      String counterPrefix, CounterSet.AddCounterMutator addCounterMutator,
+  static ReadOperation forTest(
+      NativeReader<?> reader,
+      OutputReceiver outputReceiver,
+      String counterPrefix,
+      CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler) {
     return new ReadOperation("ReadOperation", reader, new OutputReceiver[]{outputReceiver},
         counterPrefix, "systemStageName", addCounterMutator, stateSampler);
@@ -145,7 +153,7 @@ protected String remainingParallelismCounterName(String systemStageName) {
     return "dataflow_remaining_parallelism-" + systemStageName;
   }
 
-  public Reader<?> getReader() {
+  public NativeReader<?> getReader() {
     return reader;
   }
 
@@ -198,18 +206,12 @@ public void run() {
       try {
         // Force a progress update at the beginning and at the end.
         setProgressFromIterator();
-        while (true) {
-          Object value;
-          if (!readerIterator.hasNext()) {
-            break;
-          }
-          value = readerIterator.next();
-
+        for (boolean more = readerIterator.start(); more; more = readerIterator.advance()) {
           if (isProgressUpdateRequested.getAndSet(false) ||
               progressUpdatePeriodMs == UPDATE_ON_EACH_ITERATION) {
             setProgressFromIterator();
           }
-          receiver.process(value);
+          receiver.process(readerIterator.getCurrent());
         }
         setProgressFromIterator();
       } finally {
@@ -250,14 +252,15 @@ private void setProgressFromIterator() {
    * @return the task progress, or {@code null} if the source iterator has not
    * been initialized
    */
-  public Reader.Progress getProgress() {
+  public NativeReader.Progress getProgress() {
     return progress.get();
   }
 
   /**
    * Relays the split request to {@code ReaderIterator}.
    */
-  public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest splitRequest) {
+  public NativeReader.DynamicSplitResult requestDynamicSplit(
+      NativeReader.DynamicSplitRequest splitRequest) {
     synchronized (initializationStateLock) {
       if (isFinished()) {
         LOG.warn("Iterator is in the Finished state, returning null stop position.");
@@ -268,7 +271,7 @@ public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest
             splitRequest);
         return null;
       }
-      Reader.DynamicSplitResult result = readerIterator.requestDynamicSplit(splitRequest);
+      NativeReader.DynamicSplitResult result = readerIterator.requestDynamicSplit(splitRequest);
       if (result != null) {
         // After a successful split, the stop position changed and progress has to be recomputed.
         setProgressFromIterator();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
index 7ef12cbfd4845..2d8dfa6971d29 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
@@ -79,17 +79,17 @@ public Collection<Metric<?>> getOutputMetrics() {
   /**
    * Returns the worker's current progress.
    */
-  public Reader.Progress getWorkerProgress() throws Exception {
+  public NativeReader.Progress getWorkerProgress() throws Exception {
     // By default, return null indicating worker progress not available.
     return null;
   }
 
   /**
-   * See {@link Reader.ReaderIterator#requestDynamicSplit}.
+   * See {@link NativeReader.NativeReaderIterator#requestDynamicSplit}.
    * Makes sense only for tasks that read input.
    */
-  public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest splitRequest)
-      throws Exception {
+  public NativeReader.DynamicSplitResult requestDynamicSplit(
+      NativeReader.DynamicSplitRequest splitRequest) throws Exception {
     // By default, dynamic splitting is unsupported.
     return null;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
index 93fd325eb4ad2..285f03f7b8355 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
@@ -76,11 +76,11 @@ public abstract class WorkProgressUpdater {
   protected long progressReportIntervalMs;
 
   /**
-   * The {@link Reader.DynamicSplitResult} to report to the service in the next progress update,
-   * or {@code null} if there is nothing to report (if no dynamic split happened since the last
-   * progress update).
+   * The {@link NativeReader.DynamicSplitResult} to report to the service in the next
+   * progress update, or {@code null} if there is nothing to report (if no dynamic split happened
+   * since the last progress update).
    */
-  protected Reader.DynamicSplitResult dynamicSplitResultToReport;
+  protected NativeReader.DynamicSplitResult dynamicSplitResultToReport;
 
   public WorkProgressUpdater(WorkExecutor worker) {
     this.worker = worker;
@@ -206,7 +206,7 @@ protected long leaseRemainingTime(long leaseExpirationTimestamp) {
   }
 
   // Visible for testing.
-  public Reader.DynamicSplitResult getDynamicSplitResultToReport() {
+  public NativeReader.DynamicSplitResult getDynamicSplitResultToReport() {
     return dynamicSplitResultToReport;
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
index 37c11c284ce0f..15256df634cd4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
@@ -77,7 +77,7 @@
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.Preconditions;
@@ -351,30 +351,34 @@ public void testProgressAndSourceSplitTranslation() throws Exception {
     // now check that we are wrapping it correctly.
     DataflowPipelineOptions options = PipelineOptionsFactory.create()
         .as(DataflowPipelineOptions.class);
-    Reader<WindowedValue<Integer>> reader =
-        (Reader<WindowedValue<Integer>>) ReaderFactory.Registry.defaultRegistry().create(
-            translateIOToCloudSource(TestIO.fromRange(10, 20), options),
-            options,
-            null, // executionContext
-            null, // addCounterMutator
-            null); // operationName
-    try (Reader.ReaderIterator<WindowedValue<Integer>> iterator = reader.iterator()) {
-      assertTrue(iterator.hasNext());
+    NativeReader<WindowedValue<Integer>> reader =
+        (NativeReader<WindowedValue<Integer>>)
+            ReaderFactory.Registry.defaultRegistry()
+                .create(
+                    translateIOToCloudSource(TestIO.fromRange(10, 20), options),
+                    options,
+                    null, // executionContext
+                    null, // addCounterMutator
+                    null); // operationName
+    try (NativeReader.NativeReaderIterator<WindowedValue<Integer>> iterator = reader.iterator()) {
+      assertTrue(iterator.start());
       assertEquals(
           0.1,
           readerProgressToCloudProgress(iterator.getProgress()).getFractionConsumed().doubleValue(),
           1e-6);
-      assertEquals(valueInGlobalWindow(10), iterator.next());
+      assertEquals(valueInGlobalWindow(10), iterator.getCurrent());
       assertEquals(
           0.1,
           readerProgressToCloudProgress(iterator.getProgress()).getFractionConsumed().doubleValue(),
           1e-6);
-      assertEquals(valueInGlobalWindow(11), iterator.next());
+      assertTrue(iterator.advance());
+      assertEquals(valueInGlobalWindow(11), iterator.getCurrent());
       assertEquals(
           0.2,
           readerProgressToCloudProgress(iterator.getProgress()).getFractionConsumed().doubleValue(),
           1e-6);
-      assertEquals(valueInGlobalWindow(12), iterator.next());
+      assertTrue(iterator.advance());
+      assertEquals(valueInGlobalWindow(12), iterator.getCurrent());
 
       assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(0)));
       assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(0.1f)));
@@ -392,9 +396,9 @@ public void testProgressAndSourceSplitTranslation() throws Exception {
       assertThat(readFromSource(sourceSplit.primary, options), contains(10, 11, 12, 13));
       assertThat(readFromSource(sourceSplit.residual, options), contains(14));
 
-      assertTrue(iterator.hasNext());
-      assertEquals(valueInGlobalWindow(13), iterator.next());
-      assertFalse(iterator.hasNext());
+      assertTrue(iterator.advance());
+      assertEquals(valueInGlobalWindow(13), iterator.getCurrent());
+      assertFalse(iterator.advance());
     }
   }
 
@@ -646,20 +650,22 @@ public void testReadUnboundedReader() throws Exception {
           Windmill.WorkItemCommitRequest.newBuilder());
 
       @SuppressWarnings({"unchecked", "rawtypes"})
-      Reader<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>> reader = (Reader)
-          CustomSources.create(
-              (CloudObject) CustomSources.serializeToCloudSource(
-                  new CountingSource(Integer.MAX_VALUE), options)
-                  .getSpec(),
-              options,
-              context);
-
-      Reader.ReaderIterator<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>> iterator =
-          reader.iterator();
+      NativeReader<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>> reader =
+          (NativeReader)
+              CustomSources.create(
+                  (CloudObject)
+                      CustomSources.serializeToCloudSource(
+                              new CountingSource(Integer.MAX_VALUE), options)
+                          .getSpec(),
+                  options,
+                  context);
+
+      NativeReader.NativeReaderIterator<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>>
+          iterator = reader.iterator();
 
       // Verify data.
-      while (iterator.hasNext()) {
-        value = iterator.next();
+      for (boolean more = iterator.start(); more; more = iterator.advance()) {
+        value = iterator.getCurrent();
         assertEquals(KV.of(0, i), value.getValue().getValue());
         assertArrayEquals(
             encodeToByteArray(KvCoder.of(VarIntCoder.of(), VarIntCoder.of()), KV.of(0, i)),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
index ef61bc6b88945..cbc9a4cac3042 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
@@ -16,6 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readAllFromReader;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readNItemsFromUnstartedReader;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readRemainingFromReader;
+
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -24,8 +28,8 @@
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.MimeTypes;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader.DynamicSplitResult;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader.DynamicSplitResult;
 
 import org.apache.avro.Schema;
 import org.apache.avro.file.DataFileWriter;
@@ -160,10 +164,7 @@ private <T> List<T> readElems(String filename, @Nullable Long startOffset,
     AvroByteReader<T> avroReader =
         new AvroByteReader<>(filename, startOffset, endOffset, coder, null);
     new ExecutorTestUtils.TestReaderObserver(avroReader, actualSizes);
-
-    List<T> actualElems = new ArrayList<>();
-    ReaderTestUtils.readRemainingFromReader(avroReader, actualElems);
-    return actualElems;
+    return readAllFromReader(avroReader);
   }
 
   @Test
@@ -216,19 +217,21 @@ private static enum SplitVerificationBehavior {
     DO_NOT_VERIFY; // Perform no verification.
   }
 
-  private <T> void testRequestDynamicSplitInternal(AvroByteReader<T> reader, float splitAtFraction,
-      long readBeforeSplit, SplitVerificationBehavior splitVerificationBehavior) throws Exception {
+  private <T> void testRequestDynamicSplitInternal(
+      AvroByteReader<T> reader,
+      float splitAtFraction,
+      int readBeforeSplit,
+      SplitVerificationBehavior splitVerificationBehavior)
+          throws Exception {
     // Read all elements from the reader
-    List<T> expectedElements = new ArrayList<>();
     Long endOffset = reader.avroReader.endPosition;
-    ReaderTestUtils.readRemainingFromReader(reader, expectedElements);
-
+    List<T> expectedElements = readAllFromReader(reader);
 
-    List<T> primaryElements = new ArrayList<>();
+    List<T> primaryElements;
     List<T> residualElements = new ArrayList<>();
-    try (Reader.ReaderIterator<T> iterator = reader.iterator()) {
+    try (NativeReader.NativeReaderIterator<T> iterator = reader.iterator()) {
       // Read n elements from the reader
-      ReaderTestUtils.readAtMostNElementsFromIterator(iterator, readBeforeSplit, primaryElements);
+      primaryElements = readNItemsFromUnstartedReader(iterator, readBeforeSplit);
 
       // Request a split at the specified position
       DynamicSplitResult splitResult =
@@ -245,7 +248,7 @@ private <T> void testRequestDynamicSplitInternal(AvroByteReader<T> reader, float
       }
 
       // Finish reading from the original reader.
-      ReaderTestUtils.readRemainingFromIterator(iterator, primaryElements);
+      primaryElements.addAll(readRemainingFromReader(iterator, readBeforeSplit > 0));
 
       if (splitResult != null) {
         Long splitPosition = ReaderTestUtils.positionFromSplitResult(splitResult).getByteOffset();
@@ -253,7 +256,7 @@ private <T> void testRequestDynamicSplitInternal(AvroByteReader<T> reader, float
             new AvroByteReader<T>(reader.avroReader.avroSource.getFileOrPatternSpec(),
                 splitPosition, endOffset, reader.coder, reader.avroReader.options);
         // Read from the residual until it is complete.
-        ReaderTestUtils.readRemainingFromReader(residualReader, residualElements);
+        residualElements = readAllFromReader(residualReader);
       }
     }
 
@@ -273,15 +276,15 @@ public void testRequestDynamicSplit() throws Exception {
     AvroByteReader<String> reader =
         new AvroByteReader<String>(fileInfo.filename, null, null, coder, null);
     // Read most of the records before the proposed split point.
-    testRequestDynamicSplitInternal(reader, 0.5F, 490L, SplitVerificationBehavior.VERIFY_SUCCESS);
+    testRequestDynamicSplitInternal(reader, 0.5F, 490, SplitVerificationBehavior.VERIFY_SUCCESS);
     // Read a single record.
-    testRequestDynamicSplitInternal(reader, 0.5F, 1L, SplitVerificationBehavior.VERIFY_SUCCESS);
+    testRequestDynamicSplitInternal(reader, 0.5F, 1, SplitVerificationBehavior.VERIFY_SUCCESS);
     // Read zero records.
-    testRequestDynamicSplitInternal(reader, 0.5F, 0L, SplitVerificationBehavior.VERIFY_FAILURE);
+    testRequestDynamicSplitInternal(reader, 0.5F, 0, SplitVerificationBehavior.VERIFY_FAILURE);
     // Read almost the entire input.
-    testRequestDynamicSplitInternal(reader, 0.5F, 900L, SplitVerificationBehavior.VERIFY_FAILURE);
+    testRequestDynamicSplitInternal(reader, 0.5F, 900, SplitVerificationBehavior.VERIFY_FAILURE);
     // Read the entire input.
-    testRequestDynamicSplitInternal(reader, 0.5F, 2000L, SplitVerificationBehavior.VERIFY_FAILURE);
+    testRequestDynamicSplitInternal(reader, 0.5F, 2000, SplitVerificationBehavior.VERIFY_FAILURE);
   }
 
   @Test
@@ -292,7 +295,7 @@ public void testRequestDynamicSplitExhaustive() throws Exception {
     AvroByteReader<String> reader =
         new AvroByteReader<String>(fileInfo.filename, null, null, coder, null);
     for (float splitFraction = 0.0F; splitFraction < 1.0F; splitFraction += 0.02F) {
-      for (long recordsToRead = 0L; recordsToRead <= 500; recordsToRead += 5) {
+      for (int recordsToRead = 0; recordsToRead <= 500; recordsToRead += 5) {
         testRequestDynamicSplitInternal(
             reader, splitFraction, recordsToRead, SplitVerificationBehavior.DO_NOT_VERIFY);
       }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
index d81a9c182c0d0..10474e711afe5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readAllFromReader;
+
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -59,8 +61,7 @@ <T> void runTestWriteFile(List<T> elems, Coder<T> coder) throws Exception {
     AvroByteReader<T> reader = new AvroByteReader<>(filename, null, null, coder, null);
 
 
-    List<T> actual = new ArrayList<>();
-    ReaderTestUtils.readRemainingFromReader(reader, actual);
+    List<T> actual = readAllFromReader(reader);
     List<Long> expectedSizes = new ArrayList<>();
 
     for (T value : actual) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
index fda18132d355c..86d4085561bde 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
@@ -28,7 +28,7 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import org.hamcrest.core.IsInstanceOf;
 import org.junit.Assert;
@@ -46,8 +46,9 @@
 public class AvroReaderFactoryTest {
   private final String pathToAvroFile = "/path/to/file.avro";
 
-  Reader<?> runTestCreateAvroReader(String filename, @Nullable Long start, @Nullable Long end,
-      CloudObject encoding) throws Exception {
+  NativeReader<?> runTestCreateAvroReader(
+      String filename, @Nullable Long start, @Nullable Long end, CloudObject encoding)
+          throws Exception {
     CloudObject spec = CloudObject.forClassName("AvroSource");
     addString(spec, "filename", filename);
     if (start != null) {
@@ -61,12 +62,14 @@ Reader<?> runTestCreateAvroReader(String filename, @Nullable Long start, @Nullab
     cloudSource.setSpec(spec);
     cloudSource.setCodec(encoding);
 
-    Reader<?> reader = ReaderFactory.Registry.defaultRegistry().create(
-        cloudSource,
-        PipelineOptionsFactory.create(),
-        DirectModeExecutionContext.create(),
-        null,
-        null);
+    NativeReader<?> reader =
+        ReaderFactory.Registry.defaultRegistry()
+            .create(
+                cloudSource,
+                PipelineOptionsFactory.create(),
+                DirectModeExecutionContext.create(),
+                null,
+                null);
     return reader;
   }
 
@@ -74,7 +77,8 @@ Reader<?> runTestCreateAvroReader(String filename, @Nullable Long start, @Nullab
   public void testCreatePlainAvroByteReader() throws Exception {
     Coder<?> coder = WindowedValue.getFullCoder(
         BigEndianIntegerCoder.of(), GlobalWindow.Coder.INSTANCE);
-    Reader<?> reader = runTestCreateAvroReader(pathToAvroFile, null, null, coder.asCloudObject());
+    NativeReader<?> reader =
+        runTestCreateAvroReader(pathToAvroFile, null, null, coder.asCloudObject());
 
     Assert.assertThat(reader, new IsInstanceOf(AvroByteReader.class));
     AvroByteReader avroReader = (AvroByteReader) reader;
@@ -88,7 +92,8 @@ public void testCreatePlainAvroByteReader() throws Exception {
   public void testCreateRichAvroByteReader() throws Exception {
     Coder<?> coder = WindowedValue.getFullCoder(
         BigEndianIntegerCoder.of(), GlobalWindow.Coder.INSTANCE);
-    Reader<?> reader = runTestCreateAvroReader(pathToAvroFile, 200L, 500L, coder.asCloudObject());
+    NativeReader<?> reader =
+        runTestCreateAvroReader(pathToAvroFile, 200L, 500L, coder.asCloudObject());
 
     Assert.assertThat(reader, new IsInstanceOf(AvroByteReader.class));
     AvroByteReader avroReader = (AvroByteReader) reader;
@@ -102,7 +107,8 @@ public void testCreateRichAvroByteReader() throws Exception {
   public void testCreateRichAvroReader() throws Exception {
     WindowedValue.WindowedValueCoder<?> coder =
         WindowedValue.getValueOnlyCoder(AvroCoder.of(Integer.class));
-    Reader<?> reader = runTestCreateAvroReader(pathToAvroFile, 200L, 500L, coder.asCloudObject());
+    NativeReader<?> reader =
+        runTestCreateAvroReader(pathToAvroFile, 200L, 500L, coder.asCloudObject());
 
     Assert.assertThat(reader, new IsInstanceOf(AvroReader.class));
     AvroReader avroReader = (AvroReader) reader;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
index 29f14e9cecdc8..ff0278179bf0f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
@@ -16,6 +16,12 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromSplitResult;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readAllFromReader;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readNItemsFromUnstartedReader;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readRemainingFromReader;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtFraction;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.windowedValuesToValues;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.hamcrest.Matchers.isA;
@@ -27,9 +33,9 @@
 import com.google.cloud.dataflow.sdk.util.MimeTypes;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader.DynamicSplitResult;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader.Progress;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader.DynamicSplitResult;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader.Progress;
 
 import org.apache.avro.file.DataFileWriter;
 import org.apache.avro.io.DatumWriter;
@@ -165,7 +171,7 @@ private <T> List<T> readElems(String filename, @Nullable Long startOffset,
 
     double progressReported = 0;
     List<T> actualElems = new ArrayList<>();
-    try (Reader.ReaderIterator<WindowedValue<T>> iterator = avroReader.iterator()) {
+    try (NativeReader.LegacyReaderIterator<WindowedValue<T>> iterator = avroReader.iterator()) {
       while (iterator.hasNext()) {
         actualElems.add(iterator.next().getValue());
         double progress = 0.0;
@@ -188,27 +194,24 @@ private static enum SplitVerificationBehavior {
     DO_NOT_VERIFY; // Perform no verification.
   }
 
-  private <T> void testRequestDynamicSplitInternal(AvroReader<T> reader, float splitAtFraction,
-      long readBeforeSplit, SplitVerificationBehavior splitVerificationBehavior) throws Exception {
+  private <T> void testRequestDynamicSplitInternal(
+      AvroReader<T> reader,
+      float splitAtFraction,
+      int readBeforeSplit,
+      SplitVerificationBehavior splitVerificationBehavior)
+          throws Exception {
     // Read all elements from the reader
-    List<T> expectedElements = new ArrayList<>();
     Long endOffset = reader.endPosition;
-    List<WindowedValue<T>> windowedValues = new ArrayList<>();
-    ReaderTestUtils.readRemainingFromReader(reader, windowedValues);
-    ReaderTestUtils.windowedValuesToValues(windowedValues, expectedElements);
-
-
-    List<T> primaryElements = new ArrayList<>();
+    List<T> primaryElements;
     List<T> residualElements = new ArrayList<>();
-    try (Reader.ReaderIterator<WindowedValue<T>> iterator = reader.iterator()) {
+    try (NativeReader.NativeReaderIterator<WindowedValue<T>> iterator = reader.iterator()) {
       // Read n elements from the reader
-      windowedValues.clear();
-      ReaderTestUtils.readAtMostNElementsFromIterator(iterator, readBeforeSplit, windowedValues);
-      ReaderTestUtils.windowedValuesToValues(windowedValues, primaryElements);
+      primaryElements =
+          windowedValuesToValues(readNItemsFromUnstartedReader(iterator, readBeforeSplit));
 
       // Request a split at the specified position
       DynamicSplitResult splitResult =
-          iterator.requestDynamicSplit(ReaderTestUtils.splitRequestAtFraction(splitAtFraction));
+          iterator.requestDynamicSplit(splitRequestAtFraction(splitAtFraction));
 
       switch (splitVerificationBehavior) {
         case VERIFY_SUCCESS:
@@ -221,22 +224,20 @@ private <T> void testRequestDynamicSplitInternal(AvroReader<T> reader, float spl
       }
 
       // Finish reading from the original reader.
-      windowedValues.clear();
-      ReaderTestUtils.readRemainingFromIterator(iterator, windowedValues);
-      ReaderTestUtils.windowedValuesToValues(windowedValues, primaryElements);
+      primaryElements.addAll(
+          windowedValuesToValues(readRemainingFromReader(iterator, readBeforeSplit > 0)));
 
       if (splitResult != null) {
-        Long splitPosition = ReaderTestUtils.positionFromSplitResult(splitResult).getByteOffset();
+        Long splitPosition = positionFromSplitResult(splitResult).getByteOffset();
         AvroReader<T> residualReader = new AvroReader<T>(reader.avroSource.getFileOrPatternSpec(),
             splitPosition, endOffset, reader.avroCoder, reader.options);
         // Read from the residual until it is complete.
-        windowedValues.clear();
-        ReaderTestUtils.readRemainingFromReader(residualReader, windowedValues);
-        ReaderTestUtils.windowedValuesToValues(windowedValues, residualElements);
+        residualElements = windowedValuesToValues(readAllFromReader(residualReader));
       }
     }
 
     primaryElements.addAll(residualElements);
+    List<T> expectedElements = windowedValuesToValues(readAllFromReader(reader));
     Assert.assertEquals(expectedElements, primaryElements);
     if (splitVerificationBehavior == SplitVerificationBehavior.VERIFY_SUCCESS) {
       Assert.assertNotEquals(0, residualElements.size());
@@ -251,15 +252,15 @@ public void testRequestDynamicSplit() throws Exception {
     AvroFileInfo<String> fileInfo = initInputFile(elements, coder);
     AvroReader<String> reader = new AvroReader<String>(fileInfo.filename, null, null, coder, null);
     // Read most of the records before the proposed split point.
-    testRequestDynamicSplitInternal(reader, 0.5F, 490L, SplitVerificationBehavior.VERIFY_SUCCESS);
+    testRequestDynamicSplitInternal(reader, 0.5F, 490, SplitVerificationBehavior.VERIFY_SUCCESS);
     // Read a single record.
-    testRequestDynamicSplitInternal(reader, 0.5F, 1L, SplitVerificationBehavior.VERIFY_SUCCESS);
+    testRequestDynamicSplitInternal(reader, 0.5F, 1, SplitVerificationBehavior.VERIFY_SUCCESS);
     // Read zero records.
-    testRequestDynamicSplitInternal(reader, 0.5F, 0L, SplitVerificationBehavior.VERIFY_FAILURE);
+    testRequestDynamicSplitInternal(reader, 0.5F, 0, SplitVerificationBehavior.VERIFY_FAILURE);
     // Read almost the entire input.
-    testRequestDynamicSplitInternal(reader, 0.5F, 900L, SplitVerificationBehavior.VERIFY_FAILURE);
+    testRequestDynamicSplitInternal(reader, 0.5F, 900, SplitVerificationBehavior.VERIFY_FAILURE);
     // Read the entire input.
-    testRequestDynamicSplitInternal(reader, 0.5F, 2000L, SplitVerificationBehavior.VERIFY_FAILURE);
+    testRequestDynamicSplitInternal(reader, 0.5F, 2000, SplitVerificationBehavior.VERIFY_FAILURE);
   }
 
   @Test
@@ -269,7 +270,7 @@ public void testRequestDynamicSplitExhaustive() throws Exception {
     AvroFileInfo<String> fileInfo = initInputFile(elements, coder);
     AvroReader<String> reader = new AvroReader<String>(fileInfo.filename, null, null, coder, null);
     for (float splitFraction = 0.0F; splitFraction < 1.0F; splitFraction += 0.02F) {
-      for (long recordsToRead = 0L; recordsToRead <= 500; recordsToRead += 5) {
+      for (int recordsToRead = 0; recordsToRead <= 500; recordsToRead += 5) {
         testRequestDynamicSplitInternal(
             reader, splitFraction, recordsToRead, SplitVerificationBehavior.DO_NOT_VERIFY);
       }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
index 301b02758f3bb..9f005df0a02d3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
@@ -16,6 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readAllFromReader;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.windowedValuesToValues;
+
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
@@ -58,11 +61,7 @@ <T> void runTestWriteFile(List<T> elems, AvroCoder<T> coder) throws Exception {
     // Read back the file.
     AvroReader<T> reader = new AvroReader<>(filename, null, null, coder, null);
 
-    List<WindowedValue<T>> windowedValues = new ArrayList<>();
-    ReaderTestUtils.readRemainingFromReader(reader, windowedValues);
-
-    List<T> actual = new ArrayList<>();
-    ReaderTestUtils.windowedValuesToValues(windowedValues, actual);
+    List<T> actual = windowedValuesToValues(readAllFromReader(reader));
 
     List<Long> expectedSizes = new ArrayList<>();
     for (T value : actual) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
index bd00dfbc95681..7d526586444b6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
@@ -28,7 +28,7 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import org.hamcrest.core.IsInstanceOf;
 import org.junit.Test;
@@ -54,12 +54,9 @@ void runTestCreateBigQueryReaderFromTable(
     GcpOptions options = PipelineOptionsFactory.create().as(GcpOptions.class);
     options.setGcpCredential(new TestCredential());
 
-    Reader<?> reader = ReaderFactory.Registry.defaultRegistry().create(
-        cloudSource,
-        options,
-        DirectModeExecutionContext.create(),
-        null,
-        null);
+    NativeReader<?> reader =
+        ReaderFactory.Registry.defaultRegistry()
+            .create(cloudSource, options, DirectModeExecutionContext.create(), null, null);
     assertThat(reader, new IsInstanceOf(BigQueryReader.class));
     BigQueryReader bigQueryReader = (BigQueryReader) reader;
     TableReference tableRef = bigQueryReader.getTableRef();
@@ -79,12 +76,9 @@ void runTestCreateBigQueryReaderFromQuery(String query, CloudObject encoding) th
     GcpOptions options = PipelineOptionsFactory.create().as(GcpOptions.class);
     options.setGcpCredential(new TestCredential());
 
-    Reader<?> reader = ReaderFactory.Registry.defaultRegistry().create(
-        cloudSource,
-        options,
-        DirectModeExecutionContext.create(),
-        null,
-        null);
+    NativeReader<?> reader =
+        ReaderFactory.Registry.defaultRegistry()
+            .create(cloudSource, options, DirectModeExecutionContext.create(), null, null);
 
     assertThat(reader, new IsInstanceOf(BigQueryReader.class));
     BigQueryReader bigQueryReader = (BigQueryReader) reader;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
index ef94c1da2ecac..55acaf834f71b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
@@ -38,8 +38,6 @@
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.common.collect.Lists;
 
 import org.junit.Rule;
@@ -565,7 +563,7 @@ public void testReadQuery() throws Exception {
 
     Bigquery bigQueryClient = new Bigquery(mockTransport, Transport.getJsonFactory(), null);
     BigQueryReader reader = BigQueryReader.fromQuery(QUERY, PROJECT_ID, bigQueryClient);
-    Reader.ReaderIterator<WindowedValue<TableRow>> iterator = reader.iterator();
+    BigQueryReader.BigQueryReaderIterator iterator = reader.iterator();
 
     assertTrue(iterator.hasNext());
     TableRow row = iterator.next().getValue();
@@ -765,7 +763,7 @@ public void testReadTable() throws Exception {
         new TableReference().setProjectId(PROJECT_ID).setDatasetId(DATASET).setTableId(TABLE),
         bigQueryClient);
 
-    Reader.ReaderIterator<WindowedValue<TableRow>> iterator = reader.iterator();
+    BigQueryReader.BigQueryReaderIterator iterator = reader.iterator();
     assertTrue(iterator.hasNext());
 
     TableRow row = iterator.next().getValue();
@@ -900,7 +898,7 @@ public void testReadTableWithFieldF() throws Exception {
         .setProjectId(PROJECT_ID).setDatasetId(DATASET).setTableId(TABLE_WITH_FIELD_F);
     BigQueryReader reader = BigQueryReader.fromTable(tableRef, bigQueryClient);
 
-    Reader.ReaderIterator<WindowedValue<TableRow>> iterator = reader.iterator();
+    BigQueryReader.BigQueryReaderIterator iterator = reader.iterator();
     assertTrue(iterator.hasNext());
 
     thrown.expect(IllegalArgumentException.class);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
index 85d8bf203b3d4..ede98df74612b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
@@ -16,20 +16,19 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readRemainingFromReader;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readAllFromReader;
 import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
 import static com.google.cloud.dataflow.sdk.util.Structs.addList;
 import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
 import static com.google.cloud.dataflow.sdk.util.Structs.addStringList;
 import static org.hamcrest.Matchers.containsInAnyOrder;
-import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertThat;
 
 import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -91,20 +90,16 @@ public void testCreateConcatReaderWithOneSubSource() throws Exception {
     Source source = createSourcesWithInMemorySources(allData);
 
     @SuppressWarnings("unchecked")
-    Reader<String> reader = (Reader<String>) ReaderFactory.Registry.defaultRegistry().create(
-        source, null, null, null, null);
+    NativeReader<String> reader =
+        (NativeReader<String>)
+            ReaderFactory.Registry.defaultRegistry().create(source, null, null, null, null);
     assertNotNull(reader);
 
     List<String> expected = new ArrayList<>();
     for (List<String> data : allData) {
       expected.addAll(data);
     }
-
-    List<String> actual = new ArrayList<>();
-    readRemainingFromReader(reader, actual);
-
-    assertEquals(actual.size(), 10);
-    assertThat(actual, containsInAnyOrder(expected.toArray()));
+    assertThat(readAllFromReader(reader), containsInAnyOrder(expected.toArray()));
   }
 
   @Test
@@ -114,8 +109,9 @@ public void testCreateConcatReaderWithManySubSources() throws Exception {
     Source source = createSourcesWithInMemorySources(allData);
 
     @SuppressWarnings("unchecked")
-    Reader<String> reader = (Reader<String>) ReaderFactory.Registry.defaultRegistry().create(
-        source, null, null, null, null);
+    NativeReader<String> reader =
+        (NativeReader<String>)
+            ReaderFactory.Registry.defaultRegistry().create(source, null, null, null, null);
     assertNotNull(reader);
 
     List<String> expected = new ArrayList<>();
@@ -123,10 +119,6 @@ public void testCreateConcatReaderWithManySubSources() throws Exception {
       expected.addAll(data);
     }
 
-    List<String> actual = new ArrayList<>();
-    readRemainingFromReader(reader, actual);
-
-    assertEquals(actual.size(), 150);
-    assertThat(actual, containsInAnyOrder(expected.toArray()));
+    assertThat(readAllFromReader(reader), containsInAnyOrder(expected.toArray()));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
index 6ed712a0e1f24..16db911e93c11 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
@@ -17,7 +17,7 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromSplitResult;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readRemainingFromReader;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readAllFromReader;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtConcatPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static org.hamcrest.Matchers.containsInAnyOrder;
@@ -36,9 +36,8 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader.DynamicSplitResult;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader.ReaderIterator;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader.DynamicSplitResult;
 
 import org.junit.Before;
 import org.junit.Rule;
@@ -77,14 +76,14 @@ public void setUp() {
   }
 
   /**
-   * A {@code Reader} used for testing purposes. Delegates functionality to an underlying {@link
-   * InMemoryReader}.
+   * A {@link NativeReader} used for testing purposes. Delegates functionality to an underlying
+   * {@link InMemoryReader}.
    */
-  public class TestReader<T> extends Reader<T> {
+  public class TestReader<T> extends NativeReader<T> {
     private final long recordToFailAt;
     private final boolean failWhenClosing;
     private TestIterator<T> lastIterator = null;
-    private final Reader<T> readerDelegator;
+    private final NativeReader<T> readerDelegator;
 
     /**
      * Create a TestReader.
@@ -115,17 +114,17 @@ public boolean isClosedOrUnopened() {
     }
 
     @Override
-    public ReaderIterator<T> iterator() throws IOException {
-      lastIterator = new TestIterator<T>(readerDelegator.iterator());
+    public LegacyReaderIterator<T> iterator() throws IOException {
+      lastIterator = new TestIterator<T>((LegacyReaderIterator<T>) readerDelegator.iterator());
       return lastIterator;
     }
 
-    private class TestIterator<T> implements ReaderIterator<T> {
-      private final ReaderIterator<T> iteratorImpl;
+    private class TestIterator<T> extends LegacyReaderIterator<T> {
+      private final LegacyReaderIterator<T> iteratorImpl;
       private long currentRecord;
       private boolean isClosed = false;
 
-      private TestIterator(ReaderIterator<T> iteratorImpl) {
+      private TestIterator(LegacyReaderIterator<T> iteratorImpl) {
         this.iteratorImpl = iteratorImpl;
       }
 
@@ -171,15 +170,15 @@ public double getRemainingParallelism() {
 
   private static class TestReaderFactory implements ReaderFactory {
     @Override
-    public Reader<?> create(
+    public NativeReader<?> create(
         CloudObject spec,
         @Nullable Coder<?> coder,
         @Nullable PipelineOptions options,
         @Nullable ExecutionContext executionContext,
         @Nullable CounterSet.AddCounterMutator addCounterMutator,
         @Nullable String operationName)
-            throws Exception {
-      Reader<?> reader = (Reader<?>) spec.get(READER_OBJECT);
+        throws Exception {
+      NativeReader<?> reader = (NativeReader<?>) spec.get(READER_OBJECT);
       return reader;
     }
   }
@@ -237,9 +236,7 @@ private ConcatReader<String> createConcatReadersOfSizes(
   private void testReadersOfSizes(int... recordsPerReader) throws Exception {
     List<String> expected = new ArrayList<>();
     ConcatReader<String> concatReader = createConcatReadersOfSizes(expected, recordsPerReader);
-    List<String> actual = new ArrayList<>();
-    readRemainingFromReader(concatReader, actual);
-    assertThat(actual, containsInAnyOrder(expected.toArray()));
+    assertThat(readAllFromReader(concatReader), containsInAnyOrder(expected.toArray()));
     assertEquals(recordedReaders.size(), recordsPerReader.length);
     assertAllOpenReadersClosed(recordedReaders);
   }
@@ -266,7 +263,7 @@ public void testReadEmptyList() throws Exception {
             null /* addCounterMutator */,
             null /* operationName */,
             new ArrayList<Source>());
-    ReaderIterator<String> iterator = concat.iterator();
+    NativeReader.LegacyReaderIterator<String> iterator = concat.iterator();
     assertNotNull(iterator);
     assertFalse(concat.iterator().hasNext());
 
@@ -322,12 +319,10 @@ public void testAReaderFailsAtClose() throws Exception {
         null /* addCounterMutator */,
         null /* operationName */,
         sources);
-    List<String> actual = new ArrayList<>();
     try {
-      readRemainingFromReader(concatReader, actual);
+      readAllFromReader(concatReader);
       fail();
     } catch (IOException e) {
-      assertThat(actual, containsInAnyOrder(expected.toArray()));
       assertEquals(3, recordedReaders.size());
       assertAllOpenReadersClosed(recordedReaders);
     }
@@ -353,13 +348,10 @@ public void testReaderFailsAtRead() throws Exception {
         null  /* addCounterMutator */,
         null  /* operationName */,
         sources);
-    List<String> actual = new ArrayList<>();
     try {
-      readRemainingFromReader(concatReader, actual);
+      readAllFromReader(concatReader);
       fail();
     } catch (IOException e) {
-      assertThat(actual, containsInAnyOrder(expected.toArray()));
-
       assertEquals(3, recordedReaders.size());
       assertAllOpenReadersClosed(recordedReaders);
     }
@@ -367,7 +359,7 @@ public void testReaderFailsAtRead() throws Exception {
 
   private void runProgressTest(int... sizes) throws Exception {
     ConcatReader<String> concatReader = createConcatReadersOfSizes(new ArrayList<String>(), sizes);
-    try (Reader.ReaderIterator<String> iterator = concatReader.iterator()) {
+    try (NativeReader.LegacyReaderIterator<String> iterator = concatReader.iterator()) {
       for (int readerIndex = 0; readerIndex < sizes.length; readerIndex++) {
         for (int recordIndex = 0; recordIndex < sizes[readerIndex]; recordIndex++) {
           iterator.next();
@@ -418,7 +410,7 @@ public void runUpdateStopPositionTest(int... readerSizes) throws Exception {
 
           recordsToRead++;
 
-          ReaderIterator<String> iterator = concatReader.iterator();
+          NativeReader.LegacyReaderIterator<String> iterator = concatReader.iterator();
           for (int i = 0; i < recordsToRead; i++) {
             iterator.next();
           }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
index 9f600eabb2680..e98c014c41847 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
@@ -57,8 +57,8 @@
 import com.google.cloud.dataflow.sdk.util.common.Metric;
 import com.google.cloud.dataflow.sdk.util.common.Metric.DoubleMetric;
 import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.Operation;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 
 import org.hamcrest.Description;
@@ -95,18 +95,19 @@ public TestMapTaskExecutor(CounterSet counters) {
     }
 
     @Override
-    public synchronized Reader.Progress getWorkerProgress() {
+    public synchronized NativeReader.Progress getWorkerProgress() {
       return cloudProgressToReaderProgress(progress);
     }
 
     @Override
-    public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest splitRequest) {
+    public NativeReader.DynamicSplitResult requestDynamicSplit(
+        NativeReader.DynamicSplitRequest splitRequest) {
       @Nullable
       ApproximateSplitRequest split = splitRequestToApproximateSplitRequest(splitRequest);
       if (split == null) {
         return null;
       }
-      return new Reader.DynamicSplitResultWithPosition(
+      return new NativeReader.DynamicSplitResultWithPosition(
           cloudPositionToReaderPosition(split.getPosition()));
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
index 4d5ca3f97aa63..798a08b016cb4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
@@ -40,6 +40,7 @@
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.worker.GroupingShuffleReader.GroupingShuffleReaderIterator;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
@@ -51,7 +52,7 @@
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.cloud.dataflow.sdk.util.common.Reiterable;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -146,8 +147,8 @@ private List<KV<Integer, List<String>>> runIterationOverGroupingShuffleReader(
     Counter<Long> elementByteSizeCounter = Counter.longs("element-byte-size-counter", SUM);
     ElementByteSizeObserver elementObserver = new ElementByteSizeObserver(elementByteSizeCounter);
     List<KV<Integer, List<String>>> actual = new ArrayList<>();
-    try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<String>>>> iter =
-        groupingShuffleReader.iterator(shuffleReader)) {
+    try (GroupingShuffleReaderIterator<Integer, String> iter =
+            groupingShuffleReader.iterator(shuffleReader)) {
       Iterable<String> prevValuesIterable = null;
       Iterator<String> prevValuesIterator = null;
       while (iter.hasNext()) {
@@ -418,8 +419,8 @@ public void testReadFromShuffleDataAndFailToSplit() throws Exception {
             IntervalWindow.getCoder()),
         context, null, null);
 
-    try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
-        groupingShuffleReader.iterator(shuffleReader)) {
+    try (GroupingShuffleReaderIterator<Integer, Integer> iter =
+            groupingShuffleReader.iterator(shuffleReader)) {
       // Poke the iterator so we can test dynamic splitting.
       assertTrue(iter.hasNext());
 
@@ -481,7 +482,7 @@ public void testRemainingParallelism() throws Exception {
             null,
             null);
 
-    try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
+    try (GroupingShuffleReaderIterator<Integer, Integer> iter =
             groupingShuffleReader.iterator(shuffleReader)) {
 
       assertEquals(Double.POSITIVE_INFINITY, iter.getRemainingParallelism(), 0);
@@ -562,15 +563,15 @@ public void testReadFromShuffleAndDynamicSplit() throws Exception {
     }
 
     int i = 0;
-    try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> iter =
-        groupingShuffleReader.iterator(shuffleReader)) {
+    try (GroupingShuffleReaderIterator<Integer, Integer> iter =
+            groupingShuffleReader.iterator(shuffleReader)) {
       // Poke the iterator so we can test dynamic splitting.
       assertTrue(iter.hasNext());
 
       assertNull(iter.requestDynamicSplit(splitRequestAtPosition(new Position())));
 
       // Split at the shard boundary
-      Reader.DynamicSplitResult dynamicSplitResult =
+      NativeReader.DynamicSplitResult dynamicSplitResult =
           iter.requestDynamicSplit(splitRequestAtPosition(makeShufflePosition(kSecondShard, null)));
       assertNotNull(dynamicSplitResult);
       assertEquals(
@@ -638,8 +639,8 @@ public void testGetApproximateProgress() throws Exception {
       shuffleReader.addEntry(entry);
     }
 
-    try (Reader.ReaderIterator<WindowedValue<KV<Integer, Reiterable<Integer>>>> readerIterator =
-        groupingShuffleReader.iterator(shuffleReader)) {
+    try (GroupingShuffleReaderIterator<Integer, Integer> readerIterator =
+            groupingShuffleReader.iterator(shuffleReader)) {
       Integer i = 0;
       while (readerIterator.hasNext()) {
         assertTrue(readerIterator.hasNext());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
index e57f945eac27a..eb7d54cd7f85f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
@@ -28,7 +28,7 @@
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import org.hamcrest.core.IsInstanceOf;
 import org.junit.Assert;
@@ -69,12 +69,14 @@ <T> void runTestCreateInMemoryReader(List<T> elements, Long start, Long end, int
       int expectedEnd, Coder<T> coder) throws Exception {
     Source cloudSource = createInMemoryCloudSource(elements, start, end, coder);
 
-    Reader<?> reader = ReaderFactory.Registry.defaultRegistry().create(
-        cloudSource,
-        PipelineOptionsFactory.create(),
-        BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create()),
-        null,
-        null);
+    NativeReader<?> reader =
+        ReaderFactory.Registry.defaultRegistry()
+            .create(
+                cloudSource,
+                PipelineOptionsFactory.create(),
+                BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create()),
+                null,
+                null);
     Assert.assertThat(reader, new IsInstanceOf(InMemoryReader.class));
     InMemoryReader<?> inMemoryReader = (InMemoryReader<?>) reader;
     Assert.assertEquals(encodedElements(elements, coder), inMemoryReader.encodedElements);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
index dc02053a9c03b..76a002a675a1a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
@@ -33,7 +33,7 @@
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -65,7 +65,7 @@ <T> void runTestReadInMemory(List<T> elements, Long startIndex, Long endIndex,
     ExecutorTestUtils.TestReaderObserver observer =
         new ExecutorTestUtils.TestReaderObserver(inMemoryReader);
     List<T> actualElements = new ArrayList<>();
-    try (Reader.ReaderIterator<T> iterator = inMemoryReader.iterator()) {
+    try (InMemoryReader<T>.InMemoryReaderIterator iterator = inMemoryReader.iterator()) {
       for (long i = inMemoryReader.startIndex; iterator.hasNext(); i++) {
         assertEquals(
             approximateProgressAtIndex(i), readerProgressToCloudProgress(iterator.getProgress()));
@@ -142,7 +142,7 @@ public void testDynamicSplit() throws Exception {
         new InMemoryReader<>(encodedElements(elements, coder), 1L, 4L, coder);
 
     // Illegal proposed split position.
-    try (Reader.ReaderIterator<Integer> iterator = inMemoryReader.iterator()) {
+    try (InMemoryReader<Integer>.InMemoryReaderIterator iterator = inMemoryReader.iterator()) {
       // Poke the iterator so that we can test dynamic splitting.
       assertTrue(iterator.hasNext());
       assertNull(iterator.requestDynamicSplit(toDynamicSplitRequest(
@@ -155,7 +155,7 @@ public void testDynamicSplit() throws Exception {
         (InMemoryReader<Integer>.InMemoryReaderIterator) inMemoryReader.iterator()) {
       // Poke the iterator so that we can test dynamic splitting.
       assertTrue(iterator.hasNext());
-      Reader.DynamicSplitResult dynamicSplitResult =
+      NativeReader.DynamicSplitResult dynamicSplitResult =
           iterator.requestDynamicSplit(splitRequestAtIndex(3L));
       assertEquals(positionAtIndex(3L), positionFromSplitResult(dynamicSplitResult));
       assertEquals(3, iterator.tracker.getStopPosition().longValue());
@@ -196,7 +196,7 @@ public void testParallelism() throws Exception {
     InMemoryReader<Integer> inMemoryReader =
         new InMemoryReader<>(encodedElements(elements, coder), 1L, 4L, coder);
     int count = 0;
-    for (Reader.ReaderIterator<?> iterator = inMemoryReader.iterator();
+    for (InMemoryReader<Integer>.InMemoryReaderIterator iterator = inMemoryReader.iterator();
         iterator.hasNext();
         iterator.next()) {
       assertTrue(iterator.getRemainingParallelism() >= 1);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReaderTest.java
index 8d9022a285e5b..a99cb98015d49 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReaderTest.java
@@ -24,7 +24,7 @@
 import com.google.cloud.dataflow.sdk.util.RandomAccessData;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReaderObserver;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader.ReaderIterator;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink.SinkWriter;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -133,7 +133,7 @@ static void runTestRead(Iterable<KV<byte[], byte[]>> expectedData, File tmpFile)
     reader.addObserver(observer);
 
     Iterator<KV<byte[], byte[]>> expectedIterator = expectedData.iterator();
-    try (ReaderIterator<KV<byte[], byte[]>> iterator = reader.iterator()) {
+    try (NativeReader.LegacyReaderIterator<KV<byte[], byte[]>> iterator = reader.iterator()) {
       while (iterator.hasNext() && expectedIterator.hasNext()) {
         KV<byte[], byte[]> actual = iterator.next();
         KV<byte[], byte[]> expectedNext = expectedIterator.next();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
index faf3e0a136798..4891dc5bbc0b5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
@@ -27,7 +27,6 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -107,8 +106,8 @@ private void runTestReadFromShuffle(List<WindowedValue<KV<Integer, String>>> exp
     }
 
     List<WindowedValue<KV<Integer, String>>> actual = new ArrayList<>();
-    try (Reader.ReaderIterator<WindowedValue<KV<Integer, String>>> iter =
-        partitioningShuffleReader.iterator(shuffleReader)) {
+    try (PartitioningShuffleReader<Integer, String>.PartitioningShuffleReaderIterator iter =
+            partitioningShuffleReader.iterator(shuffleReader)) {
       while (iter.hasNext()) {
         Assert.assertTrue(iter.hasNext());
         actual.add(iter.next());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
index 48ca57ca85327..89bb4da6de040 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
@@ -28,7 +28,7 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import org.hamcrest.CoreMatchers;
 import org.hamcrest.core.IsInstanceOf;
@@ -49,7 +49,7 @@ public class ReaderFactoryTest {
 
   static class TestReaderFactory implements ReaderFactory {
     @Override
-    public Reader<?> create(
+    public NativeReader<?> create(
         CloudObject spec,
         @Nullable Coder<?> coder,
         @Nullable PipelineOptions options,
@@ -60,14 +60,14 @@ public Reader<?> create(
     }
   }
 
-  static class TestReader extends Reader<Integer> {
+  static class TestReader extends NativeReader<Integer> {
     @Override
-    public ReaderIterator<Integer> iterator() {
+    public NativeReaderIterator<Integer> iterator() {
       return new TestReaderIterator();
     }
 
     /** A source iterator that produces no values, for testing. */
-    class TestReaderIterator extends AbstractReaderIterator<Integer> {
+    class TestReaderIterator extends LegacyReaderIterator<Integer> {
       @Override
       public boolean hasNext() {
         return false;
@@ -85,7 +85,7 @@ public void close() {}
 
   static class SingletonTestReaderFactory implements ReaderFactory {
     @Override
-    public Reader<?> create(
+    public NativeReader<?> create(
         CloudObject spec,
         @Nullable Coder<?> coder,
         @Nullable PipelineOptions options,
@@ -96,14 +96,14 @@ public Reader<?> create(
     }
   }
 
-  static class SingletonTestReader extends Reader<WindowedValue<String>> {
+  static class SingletonTestReader extends NativeReader<WindowedValue<String>> {
     @Override
     public SingletonTestReaderIterator iterator() {
       return new SingletonTestReaderIterator();
     }
 
     /** A source iterator that produces no values, for testing. */
-    class SingletonTestReaderIterator extends AbstractReaderIterator<WindowedValue<String>> {
+    class SingletonTestReaderIterator extends LegacyReaderIterator<WindowedValue<String>> {
       private boolean seen = false;
       @Override
       public boolean hasNext() {
@@ -135,12 +135,10 @@ public void testCreatePredefinedReader() throws Exception {
     cloudSource.setCodec(makeCloudEncoding("StringUtf8Coder"));
 
     PipelineOptions options = PipelineOptionsFactory.create();
-    Reader<?> reader = ReaderFactory.Registry.defaultRegistry().create(
-        cloudSource,
-        options,
-        BatchModeExecutionContext.fromOptions(options),
-        null,
-        null);
+    NativeReader<?> reader =
+        ReaderFactory.Registry.defaultRegistry()
+            .create(
+                cloudSource, options, BatchModeExecutionContext.fromOptions(options), null, null);
     Assert.assertThat(reader, new IsInstanceOf(TextReader.class));
   }
 
@@ -155,12 +153,13 @@ public void testCreateUserDefinedReader() throws Exception {
     PipelineOptions options = PipelineOptionsFactory.create();
     ReaderFactory.Registry registry = ReaderFactory.Registry.defaultRegistry()
         .register(TestReaderFactory.class.getName(), new TestReaderFactory());
-    Reader<?> reader = registry.create(
-        cloudSource,
-        PipelineOptionsFactory.create(),
-        BatchModeExecutionContext.fromOptions(options),
-        null,
-        null);
+    NativeReader<?> reader =
+        registry.create(
+            cloudSource,
+            PipelineOptionsFactory.create(),
+            BatchModeExecutionContext.fromOptions(options),
+            null,
+            null);
     Assert.assertThat(reader, new IsInstanceOf(TestReader.class));
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
index 0c9e4e53a9df8..7b4ce9be513c1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
@@ -24,18 +24,19 @@
 import com.google.api.services.dataflow.model.ConcatPosition;
 import com.google.api.services.dataflow.model.Position;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader.ReaderIterator;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader.NativeReaderIterator;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 
 import javax.annotation.Nullable;
 
 /**
- * Helpers for testing {@code Reader} and related classes, especially
- * {@link Reader.ReaderIterator#getProgress} and {@link Reader.ReaderIterator#requestDynamicSplit}.
+ * Helpers for testing {@link NativeReader} and related classes, especially
+ * {@link NativeReaderIterator#getProgress} and {@link NativeReaderIterator#requestDynamicSplit}.
  */
 public class ReaderTestUtils {
   public static Position positionAtIndex(@Nullable Long index) {
@@ -102,91 +103,90 @@ public static ApproximateSplitRequest approximateSplitRequestAtFraction(
     return new ApproximateSplitRequest().setFractionConsumed(fraction);
   }
 
-  public static Reader.DynamicSplitRequest splitRequestAtPosition(
+  public static NativeReader.DynamicSplitRequest splitRequestAtPosition(
       @Nullable Position position) {
     return toDynamicSplitRequest(approximateSplitRequestAtPosition(position));
   }
 
-  public static Reader.DynamicSplitRequest splitRequestAtIndex(@Nullable Long index) {
+  public static NativeReader.DynamicSplitRequest splitRequestAtIndex(@Nullable Long index) {
     return toDynamicSplitRequest(approximateSplitRequestAtIndex(index));
   }
 
-  public static Reader.DynamicSplitRequest splitRequestAtByteOffset(@Nullable Long byteOffset) {
+  public static NativeReader.DynamicSplitRequest splitRequestAtByteOffset(
+      @Nullable Long byteOffset) {
     return toDynamicSplitRequest(approximateSplitRequestAtByteOffset(byteOffset));
   }
 
-  public static Reader.DynamicSplitRequest splitRequestAtConcatPosition(
+  public static NativeReader.DynamicSplitRequest splitRequestAtConcatPosition(
       @Nullable Integer index, @Nullable Position innerPosition) {
     return toDynamicSplitRequest(approximateSplitRequestAtConcatPosition(index, innerPosition));
   }
 
-  public static Position positionFromSplitResult(Reader.DynamicSplitResult dynamicSplitResult) {
+  public static Position positionFromSplitResult(
+      NativeReader.DynamicSplitResult dynamicSplitResult) {
     return toCloudPosition(
-        ((Reader.DynamicSplitResultWithPosition) dynamicSplitResult).getAcceptedPosition());
+        ((NativeReader.DynamicSplitResultWithPosition) dynamicSplitResult).getAcceptedPosition());
   }
 
-  public static Position positionFromProgress(Reader.Progress progress) {
+  public static Position positionFromProgress(NativeReader.Progress progress) {
     return readerProgressToCloudProgress(progress).getPosition();
   }
 
-  public static Reader.DynamicSplitRequest splitRequestAtFraction(double fraction) {
+  public static NativeReader.DynamicSplitRequest splitRequestAtFraction(double fraction) {
     return toDynamicSplitRequest(approximateSplitRequestAtFraction(fraction));
   }
 
   /**
-   * Creates a {@link ReaderIterator} from the given {@code Reader} and reads it to the end.
-   *
-   * @param reader {@code Reader} to read from
-   * @param results elements that are read are added to this list. Will contain partially read
-   * results if an exception is thrown
-   * @throws IOException
+   * Appends all values from a collection of {@code WindowedValue} values to a collection of values.
    */
-  public static <T> void readRemainingFromReader(Reader<T> reader, List<T> results)
-      throws IOException {
-    try (ReaderIterator<T> iterator = reader.iterator()) {
-      readRemainingFromIterator(iterator, results);
+  public static <T> List<T> windowedValuesToValues(Collection<WindowedValue<T>> windowedValues) {
+    List<T> res = new ArrayList<>();
+    for (WindowedValue<T> windowedValue : windowedValues) {
+      res.add(windowedValue.getValue());
     }
+    return res;
   }
 
   /**
-   * Read elements from a {@code Reader} until either the reader is exhausted, or n elements are
-   * read.
+   * Creates a {@link NativeReaderIterator} from the given {@link NativeReader} and reads it
+   * to the end.
+   *
+   * @param reader {@link NativeReader} to read from
+   * @throws IOException
    */
-  public static <T> void readAtMostNElementsFromReader(
-      Reader<T> reader, long numElementsToRead, List<T> results) throws IOException {
-    try (ReaderIterator<T> iterator = reader.iterator()) {
-      readAtMostNElementsFromIterator(iterator, numElementsToRead, results);
+  public static <T> List<T> readAllFromReader(NativeReader<T> reader) throws IOException {
+    try (NativeReaderIterator<T> iterator = reader.iterator()) {
+      return readRemainingFromReader(iterator, false);
     }
   }
 
   /**
-   * Read elements from a {@link ReaderIterator} until either the iterator is exhausted, or n
-   * elements are read.
+   * Read elements from a {@link NativeReader.NativeReaderIterator} until either the reader is
+   * exhausted, or n elements are read.
    */
-  public static <T> void readAtMostNElementsFromIterator(
-      ReaderIterator<T> iterator, long numElementsToRead, List<T> results) throws IOException {
-    for (long i = 0L; i < numElementsToRead && iterator.hasNext(); i++) {
-      results.add(iterator.next());
+  public static <T> List<T> readNItemsFromReader(
+      NativeReader.NativeReaderIterator<T> reader, int n, boolean started) throws IOException {
+    List<T> res = new ArrayList<>();
+    for (int i = 0; i < n; i++) {
+      if (!((i == 0 && !started) ? reader.start() : reader.advance())) {
+        break;
+      }
+      res.add(reader.getCurrent());
     }
+    return res;
   }
 
   /**
-   * Read all remaining elements from a {@link ReaderIterator}.
+   * Read elements from a {@link NativeReader.NativeReaderIterator} until either the reader is
+   * exhausted, or n elements are read.
    */
-  public static <T> void readRemainingFromIterator(ReaderIterator<T> iterator, List<T> results)
-      throws IOException {
-    while (iterator.hasNext()) {
-      results.add(iterator.next());
-    }
+  public static <T> List<T> readNItemsFromUnstartedReader(
+      NativeReader.NativeReaderIterator<T> reader, int n) throws IOException {
+    return readNItemsFromReader(reader, n, false);
   }
 
-  /**
-   * Appends all values from a collection of {@code WindowedValue} values to a collection of values.
-   */
-  public static <T> void windowedValuesToValues(
-      Collection<WindowedValue<T>> windowedValues, Collection<T> values) {
-    for (WindowedValue<T> windowedValue : windowedValues) {
-      values.add(windowedValue.getValue());
-    }
+  public static <T> List<T> readRemainingFromReader(
+      NativeReader.NativeReaderIterator<T> reader, boolean started) throws IOException {
+    return readNItemsFromReader(reader, Integer.MAX_VALUE, started);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
index 870092b3ad7df..394abcca992fb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
@@ -31,7 +31,7 @@
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import org.hamcrest.core.IsInstanceOf;
 import org.junit.Assert;
@@ -48,9 +48,14 @@
 @RunWith(JUnit4.class)
 @SuppressWarnings({"rawtypes", "unchecked"})
 public class ShuffleReaderFactoryTest {
-  <T extends Reader> T runTestCreateShuffleReader(byte[] shuffleReaderConfig,
-      @Nullable String start, @Nullable String end, CloudObject encoding,
-      BatchModeExecutionContext context, Class<T> shuffleReaderClass, String shuffleSourceAlias)
+  <T extends NativeReader> T runTestCreateShuffleReader(
+      byte[] shuffleReaderConfig,
+      @Nullable String start,
+      @Nullable String end,
+      CloudObject encoding,
+      BatchModeExecutionContext context,
+      Class<T> shuffleReaderClass,
+      String shuffleSourceAlias)
       throws Exception {
     CloudObject spec = CloudObject.forClassName(shuffleSourceAlias);
     addString(spec, "shuffle_reader_config", encodeBase64String(shuffleReaderConfig));
@@ -65,8 +70,9 @@ <T extends Reader> T runTestCreateShuffleReader(byte[] shuffleReaderConfig,
     cloudSource.setSpec(spec);
     cloudSource.setCodec(encoding);
 
-    Reader<?> reader = ReaderFactory.Registry.defaultRegistry().create(
-        cloudSource, PipelineOptionsFactory.create(), context, null, null);
+    NativeReader<?> reader =
+        ReaderFactory.Registry.defaultRegistry()
+            .create(cloudSource, PipelineOptionsFactory.create(), context, null, null);
     Assert.assertThat(reader, new IsInstanceOf(shuffleReaderClass));
     T shuffleSource = (T) reader;
     return shuffleSource;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
index f833149f8ca54..0d4ea0599da8c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
@@ -30,7 +30,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import org.hamcrest.core.IsInstanceOf;
 import org.junit.Assert;
@@ -67,12 +67,10 @@ void runTestCreateTextReader(String filename, @Nullable Boolean stripTrailingNew
     cloudSource.setCodec(encoding);
 
     PipelineOptions options = PipelineOptionsFactory.create();
-    Reader<?> reader = ReaderFactory.Registry.defaultRegistry().create(
-        cloudSource,
-        options,
-        BatchModeExecutionContext.fromOptions(options),
-        null,
-        null);
+    NativeReader<?> reader =
+        ReaderFactory.Registry.defaultRegistry()
+            .create(
+                cloudSource, options, BatchModeExecutionContext.fromOptions(options), null, null);
     Assert.assertThat(reader, new IsInstanceOf(TextReader.class));
     TextReader<?> textReader = (TextReader<?>) reader;
     Assert.assertEquals(filename, textReader.filepattern);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
index 188ce9d20fda3..a1a009a233c0f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
@@ -44,7 +44,8 @@
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.MimeTypes;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader.LegacyReaderIterator;
 
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
 import org.junit.Rule;
@@ -129,8 +130,8 @@ private File initTestFile() throws IOException {
   public void testReadEmptyFile() throws Exception {
     TextReader<String> textReader = new TextReader<>(tmpFolder.newFile().getPath(), true, null,
         null, new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
-    try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
-      assertFalse(iterator.hasNext());
+    try (NativeReader.NativeReaderIterator<String> iterator = textReader.iterator()) {
+      assertFalse(iterator.start());
     }
   }
 
@@ -186,7 +187,7 @@ public void testStartPosition() throws Exception {
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
-      try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
+      try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
         assertEquals("<Second line>\r\n", iterator.next());
         assertEquals("<Third line>", iterator.next());
         assertFalse(iterator.hasNext());
@@ -202,7 +203,7 @@ public void testStartPosition() throws Exception {
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
-      try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
+      try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
         assertEquals("<Third line>", iterator.next());
         assertFalse(iterator.hasNext());
         // The first '5' in the array represents the reading of a portion of the second
@@ -217,7 +218,7 @@ public void testStartPosition() throws Exception {
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
-      try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
+      try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
         assertEquals("<First line>", iterator.next());
         assertEquals("<Second line>", iterator.next());
         assertFalse(iterator.hasNext());
@@ -231,7 +232,7 @@ public void testStartPosition() throws Exception {
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
-      try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
+      try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
         assertEquals("<Second line>", iterator.next());
         assertFalse(iterator.hasNext());
         // The first '13' in the array represents the reading of the entire first
@@ -260,7 +261,7 @@ public void testUtf8Handling() throws Exception {
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
-      try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
+      try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
         assertArrayEquals("€".getBytes("UTF-8"), iterator.next().getBytes("UTF-8"));
         assertFalse(iterator.hasNext());
         assertEquals(Arrays.asList(4), observer.getActualSizes());
@@ -275,7 +276,7 @@ public void testUtf8Handling() throws Exception {
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
 
-      try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
+      try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
         assertArrayEquals("¢".getBytes("UTF-8"), iterator.next().getBytes("UTF-8"));
         assertFalse(iterator.hasNext());
         // The first '3' in the array represents the reading of a portion of the first
@@ -303,7 +304,7 @@ private void testNewlineHandling(String separator, boolean stripNewlines) throws
         new ExecutorTestUtils.TestReaderObserver(textReader);
 
     List<String> actual = new ArrayList<>();
-    try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
+    try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
       while (iterator.hasNext()) {
         actual.add(iterator.next());
       }
@@ -337,7 +338,7 @@ private void testStringPayload(String[] lines, String separator, boolean stripNe
     TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), stripNewlines, null, null,
         StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
     List<String> actual = new ArrayList<>();
-    try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
+    try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
       while (iterator.hasNext()) {
         actual.add(iterator.next());
       }
@@ -365,7 +366,7 @@ public void testNonStringCoders() throws Exception {
         new ExecutorTestUtils.TestReaderObserver(textReader);
 
     List<Integer> actual = new ArrayList<>();
-    try (Reader.ReaderIterator<Integer> iterator = textReader.iterator()) {
+    try (LegacyReaderIterator<Integer> iterator = textReader.iterator()) {
       while (iterator.hasNext()) {
         actual.add(iterator.next());
       }
@@ -381,7 +382,7 @@ public void testGetProgressNoEndOffset() throws Exception {
     TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 0L, null,
         new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
 
-    try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
+    try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
       ApproximateReportedProgress progress = readerProgressToCloudProgress(iterator.getProgress());
       assertEquals(0L, progress.getPosition().getByteOffset().longValue());
       iterator.next();
@@ -406,7 +407,7 @@ public void testGetProgressWithEndOffset() throws Exception {
     TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 0L, 40L,
         new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
 
-    try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
+    try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
       iterator.next();
       ApproximateReportedProgress progress = readerProgressToCloudProgress(iterator.getProgress());
       // Returned a record that starts at position 0 of 40 - 1/40 fraction consumed.
@@ -665,7 +666,7 @@ private void testCompressionTypeHelper(String[] lines, String filename,
         tmpFile.getPath(), true, null, null, new WholeLineVerifyingCoder(), inputCompressionType);
 
     List<String> actual = new ArrayList<>();
-    try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
+    try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
       while (iterator.hasNext()) {
         actual.add(iterator.next());
       }
@@ -717,7 +718,7 @@ public void testCompressionTypeFileGlob() throws IOException {
                          CompressionType.AUTO);
 
     List<String> actual = new ArrayList<>();
-    try (Reader.ReaderIterator<String> iterator = textReader.iterator()) {
+    try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
       while (iterator.hasNext()) {
         actual.add(iterator.next());
       }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
index cdfdd2baefd9e..831bfc5851763 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
@@ -25,7 +25,6 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.common.collect.Lists;
@@ -92,8 +91,8 @@ void runTestReadFromShuffle(List<Integer> expected) throws Exception {
     }
 
     List<Integer> actual = new ArrayList<>();
-    try (Reader.ReaderIterator<WindowedValue<Integer>> iter =
-        ungroupedShuffleReader.iterator(shuffleReader)) {
+    try (UngroupedShuffleReader<WindowedValue<Integer>>.UngroupedShuffleReaderIterator iter =
+            ungroupedShuffleReader.iterator(shuffleReader)) {
       while (iter.hasNext()) {
         Assert.assertTrue(iter.hasNext());
         Assert.assertTrue(iter.hasNext());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
index 277d741f0b320..2bbdf6b61cb65 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
@@ -19,7 +19,7 @@
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.runners.worker.TextReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.Reader;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import org.junit.Assert;
 import org.junit.Rule;
@@ -82,7 +82,7 @@ public void testMultiFileRead() throws Exception {
         TextIO.CompressionType.UNCOMPRESSED);
 
     Set<String> records = new TreeSet<>();
-    try (Reader.ReaderIterator<String> iterator = reader.iterator()) {
+    try (NativeReader.LegacyReaderIterator<String> iterator = reader.iterator()) {
       while (iterator.hasNext()) {
         records.add(iterator.next());
       }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
index 909180c499397..4942eadfebd3c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
@@ -72,7 +72,7 @@ private static OutputReceiver[] createOutputReceivers(int numOutputs, String cou
 
 
   /** A {@code Reader<String>} that yields a specified set of values. */
-  public static class TestReader extends Reader<String> {
+  public static class TestReader extends NativeReader<String> {
     private final List<String> inputs;
 
     public TestReader(String... inputs) {
@@ -80,11 +80,11 @@ public TestReader(String... inputs) {
     }
 
     @Override
-    public ReaderIterator<String> iterator() {
+    public NativeReaderIterator<String> iterator() {
       return new TestReaderIterator(inputs);
     }
 
-    class TestReaderIterator extends AbstractReaderIterator<String> {
+    class TestReaderIterator extends LegacyReaderIterator<String> {
       Iterator<String> iter;
       boolean closed = false;
 
@@ -119,11 +119,11 @@ public void close() {
   public static class TestReaderObserver implements Observer {
     private final List<Integer> sizes;
 
-    public TestReaderObserver(Reader reader) {
+    public TestReaderObserver(NativeReader reader) {
       this(reader, new ArrayList<Integer>());
     }
 
-    public TestReaderObserver(Reader reader, List<Integer> sizes) {
+    public TestReaderObserver(NativeReader reader, List<Integer> sizes) {
       this.sizes = sizes;
       reader.addObserver(this);
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
index a232427315f04..6a60e96a26b23 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
@@ -101,15 +101,17 @@ static class TestReadOperation extends ReadOperation {
     }
 
     @Override
-    public Reader.Progress getProgress() {
+    public NativeReader.Progress getProgress() {
       return cloudProgressToReaderProgress(progress);
     }
 
     @Override
-    public Reader.DynamicSplitResult requestDynamicSplit(Reader.DynamicSplitRequest splitRequest) {
+    public NativeReader.DynamicSplitResult requestDynamicSplit(
+        NativeReader.DynamicSplitRequest splitRequest) {
       // Fakes the return with the same position as proposed.
-      return new Reader.DynamicSplitResultWithPosition(cloudPositionToReaderPosition(
-          splitRequestToApproximateSplitRequest(splitRequest).getPosition()));
+      return new NativeReader.DynamicSplitResultWithPosition(
+          cloudPositionToReaderPosition(
+              splitRequestToApproximateSplitRequest(splitRequest).getPosition()));
     }
 
     public void setProgress(ApproximateReportedProgress progress) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
index 03eaeef261419..0eae253e4d5bd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -156,9 +156,9 @@ public void testDynamicSplit() throws Exception {
     receiver.unblockProcess();
     iterator.offerNext(1);
     // Read loop is now blocked in process() (not next()).
-    Reader.DynamicSplitResultWithPosition split =
-        (Reader.DynamicSplitResultWithPosition) readOperation.requestDynamicSplit(
-          splitRequestAtIndex(8L));
+    NativeReader.DynamicSplitResultWithPosition split =
+        (NativeReader.DynamicSplitResultWithPosition)
+            readOperation.requestDynamicSplit(splitRequestAtIndex(8L));
     assertNotNull(split);
     assertEquals(positionAtIndex(8L), toCloudPosition(split.getAcceptedPosition()));
 
@@ -177,12 +177,14 @@ public void testDynamicSplit() throws Exception {
     // Should reject a split at a later position than previously requested.
     // Note that here we're testing our own MockReaderIterator class, so it's kind of pointless,
     // but we're also testing that ReadOperation correctly relays the request to the iterator.
-    split = (Reader.DynamicSplitResultWithPosition) readOperation.requestDynamicSplit(
-        splitRequestAtIndex(6L));
+    split =
+        (NativeReader.DynamicSplitResultWithPosition)
+            readOperation.requestDynamicSplit(splitRequestAtIndex(6L));
     assertNotNull(split);
     assertEquals(positionAtIndex(6L), toCloudPosition(split.getAcceptedPosition()));
-    split = (Reader.DynamicSplitResultWithPosition) readOperation.requestDynamicSplit(
-        splitRequestAtIndex(6L));
+    split =
+        (NativeReader.DynamicSplitResultWithPosition)
+            readOperation.requestDynamicSplit(splitRequestAtIndex(6L));
     assertNull(split);
     receiver.unblockProcess();
 
@@ -213,8 +215,9 @@ public void testDynamicSplitDoesNotBlock() throws Exception {
     receiver.unblockProcess();
     // Read loop is blocked in next(). Do not offer another next item,
     // but check that we can still split while the read loop is blocked.
-    Reader.DynamicSplitResultWithPosition split = (Reader.DynamicSplitResultWithPosition)
-        readOperation.requestDynamicSplit(splitRequestAtIndex(5L));
+    NativeReader.DynamicSplitResultWithPosition split =
+        (NativeReader.DynamicSplitResultWithPosition)
+            readOperation.requestDynamicSplit(splitRequestAtIndex(5L));
     assertNotNull(split);
     assertEquals(positionAtIndex(5L), toCloudPosition(split.getAcceptedPosition()));
 
@@ -314,7 +317,7 @@ protected Integer nextImpl() throws IOException {
     }
 
     @Override
-    public Reader.Progress getProgress() {
+    public NativeReader.Progress getProgress() {
       Preconditions.checkState(!isClosed);
       return cloudProgressToReaderProgress(
           new ApproximateReportedProgress()
@@ -323,8 +326,8 @@ public Reader.Progress getProgress() {
     }
 
     @Override
-    public Reader.DynamicSplitResult requestDynamicSplit(
-        Reader.DynamicSplitRequest splitRequest) {
+    public NativeReader.DynamicSplitResult requestDynamicSplit(
+        NativeReader.DynamicSplitRequest splitRequest) {
       Preconditions.checkState(!isClosed);
       ApproximateSplitRequest approximateSplitRequest = splitRequestToApproximateSplitRequest(
           splitRequest);
@@ -332,7 +335,7 @@ public Reader.DynamicSplitResult requestDynamicSplit(
       if (!tracker.trySplitAtPosition(index)) {
         return null;
       }
-      return new Reader.DynamicSplitResultWithPosition(
+      return new NativeReader.DynamicSplitResultWithPosition(
           cloudPositionToReaderPosition(approximateSplitRequest.getPosition()));
     }
 
@@ -350,15 +353,15 @@ public void close() throws IOException {
     }
   }
 
-  private static class MockReader extends Reader<Integer> {
-    private ReaderIterator<Integer> iterator;
+  private static class MockReader extends NativeReader<Integer> {
+    private NativeReaderIterator<Integer> iterator;
 
-    private MockReader(ReaderIterator<Integer> iterator) {
+    private MockReader(NativeReaderIterator<Integer> iterator) {
       this.iterator = iterator;
     }
 
     @Override
-    public ReaderIterator<Integer> iterator() throws IOException {
+    public NativeReaderIterator<Integer> iterator() throws IOException {
       return iterator;
     }
   }

From 590e4742c8c15b0e18c09f31d8760459234c4e8b Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 12 Jan 2016 13:40:29 -0800
Subject: [PATCH 1290/1541] Rename TriggerExecutorTest to ReduceFnRunnerTest

TriggerExecutor is a defunct name. These tests mock the trigger,
but exercise the full combination of ReduceFnRunner,
system ReduceFn, and TriggerRunner.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111983140
---
 .../{TriggerExecutorTest.java => ReduceFnRunnerTest.java}  | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/util/{TriggerExecutorTest.java => ReduceFnRunnerTest.java} (99%)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunnerTest.java
similarity index 99%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunnerTest.java
index 5e7681064c780..854a89ae00551 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerExecutorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunnerTest.java
@@ -67,10 +67,13 @@
 import java.util.List;
 
 /**
- * Tests for Trigger execution.
+ * Tests for {@link ReduceFnRunner}. These tests instantiate a full "stack" of
+ * {@link ReduceFnRunner} with enclosed {@link ReduceFn}, down to the installed {@link Trigger}
+ * (sometimes mocked). They proceed by injecting elements and advancing watermark and
+ * processing time, then verifying produced panes and counters.
  */
 @RunWith(JUnit4.class)
-public class TriggerExecutorTest {
+public class ReduceFnRunnerTest {
   @Mock
   private Trigger<IntervalWindow> mockTrigger;
   private IntervalWindow firstWindow;

From 951723fe875fdbcb8eb2bb12132647943b4cb806 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Tue, 12 Jan 2016 15:41:06 -0800
Subject: [PATCH 1291/1541] Fix Maven warning by specifying version of the
 build-helper plugin

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111995783
---
 pom.xml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pom.xml b/pom.xml
index 93912e36efd67..fcb79d435ed6f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -296,6 +296,11 @@
           </configuration>
         </plugin>
 
+        <plugin>
+          <groupId>org.codehaus.mojo</groupId>
+          <artifactId>build-helper-maven-plugin</artifactId>
+          <version>1.10</version>
+        </plugin>
       </plugins>
     </pluginManagement>
   </build>

From 44e3e9851b3c88f466f2d6eaa0692d635825f77a Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 12 Jan 2016 18:18:56 -0800
Subject: [PATCH 1292/1541] Break out TriggerTester from ReduceFnTester

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112009014
---
 .../windowing/AfterProcessingTime.java        |   1 -
 .../dataflow/sdk/util/TimerInternals.java     |   8 +-
 .../transforms/windowing/AfterAllTest.java    | 156 ++---
 .../transforms/windowing/AfterEachTest.java   | 174 ++---
 .../transforms/windowing/AfterFirstTest.java  | 147 ++---
 .../transforms/windowing/AfterPaneTest.java   | 103 ++-
 .../windowing/AfterProcessingTimeTest.java    | 116 ++--
 .../AfterSynchronizedProcessingTimeTest.java  | 106 +---
 .../windowing/AfterWatermarkTest.java         | 361 ++++-------
 .../windowing/DefaultTriggerTest.java         | 115 +---
 .../windowing/OrFinallyTriggerTest.java       | 149 ++---
 .../transforms/windowing/RepeatedlyTest.java  |  59 +-
 .../dataflow/sdk/util/TriggerTester.java      | 598 ++++++++++++++++++
 13 files changed, 1151 insertions(+), 942 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 652c3e8bf2213..4c269c28e6ca6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -63,7 +63,6 @@ public void prefetchOnElement(StateContext state) {
     state.access(DELAYED_UNTIL_TAG).get();
   }
 
-
   @Override
   public TriggerResult onElement(OnElementContext c)
       throws Exception {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
index ff7576cd88831..aa44c5005895d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static com.google.common.base.Preconditions.checkNotNull;
+
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
 import com.google.common.base.MoreObjects;
 
@@ -125,9 +127,9 @@ public static class TimerData implements Comparable<TimerData> {
     private final TimeDomain domain;
 
     private TimerData(StateNamespace namespace, Instant timestamp, TimeDomain domain) {
-      this.namespace = namespace;
-      this.timestamp = timestamp;
-      this.domain = domain;
+      this.namespace = checkNotNull(namespace);
+      this.timestamp = checkNotNull(timestamp);
+      this.domain = checkNotNull(domain);
     }
 
     public StateNamespace getNamespace() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index d34e788c336c9..dde39c21b4c7a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -16,23 +16,21 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.everyItem;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.ReduceFnTester;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.TriggerTester.SimpleTriggerTester;
 
-import org.hamcrest.Matchers;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Test;
@@ -50,16 +48,14 @@ public class AfterAllTest {
   @Mock private OnceTrigger<IntervalWindow> mockTrigger1;
   @Mock private OnceTrigger<IntervalWindow> mockTrigger2;
 
-  private ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester;
+  private SimpleTriggerTester<IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
-    tester = ReduceFnTester.nonCombining(
-        windowFn,
+    tester = TriggerTester.forTrigger(
         AfterAll.of(mockTrigger1, mockTrigger2),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+        windowFn);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
@@ -75,7 +71,7 @@ private void injectElement(int element, TriggerResult result1, TriggerResult res
           Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
           .thenReturn(result2);
     }
-    tester.injectElements(TimestampedValue.of(element, new Instant(element)));
+    tester.injectElements(element);
   }
 
   @Test
@@ -83,14 +79,14 @@ public void testOnElementT1FiresFirst() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
+
     injectElement(2, TriggerResult.FIRE_AND_FINISH, TriggerResult.CONTINUE);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
+
     injectElement(3, null, TriggerResult.FIRE_AND_FINISH);
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
     assertTrue(tester.isMarkedFinished(firstWindow));
-
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @Test
@@ -98,13 +94,12 @@ public void testOnElementT2FiresFirst() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
+
     injectElement(2, TriggerResult.FIRE_AND_FINISH, null);
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
-    assertTrue(tester.isMarkedFinished(firstWindow));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
 
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
+    assertTrue(tester.isMarkedFinished(firstWindow));
   }
 
   @SuppressWarnings("unchecked")
@@ -113,25 +108,22 @@ public void testOnTimerFire() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
 
     when(mockTrigger1.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
-    tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
     tester.advanceInputWatermark(new Instant(12));
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
     assertTrue(tester.isMarkedFinished(firstWindow));
-
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
-  @SuppressWarnings("unchecked")
   @Test
   public void testOnTimerFireAndFinish() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
 
     when(mockTrigger1.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
@@ -140,16 +132,12 @@ public void testOnTimerFireAndFinish() throws Exception {
     tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
 
     tester.advanceInputWatermark(new Instant(12));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
     assertFalse(tester.isMarkedFinished(firstWindow));
 
     injectElement(2, TriggerResult.FIRE_AND_FINISH, null);
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
-
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
     assertTrue(tester.isMarkedFinished(firstWindow));
-
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @Test
@@ -160,22 +148,17 @@ public void testOnMergeFires() throws Exception {
         .thenReturn(TriggerResult.CONTINUE);
     when(mockTrigger2.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),
-        TimestampedValue.of(5, new Instant(12)));
+    tester.injectElements(1, 5);
+    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
 
     when(mockTrigger1.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.ALREADY_FINISHED);
     when(mockTrigger2.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.FIRE_AND_FINISH);
-    tester.injectElements(
-        TimestampedValue.of(12, new Instant(5)));
+    tester.mergeWindows();
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(1), new Instant(22)));
+    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(15))));
   }
 
   @Test
@@ -186,22 +169,18 @@ public void testOnMergeFiresNotAlreadyFinished() throws Exception {
         .thenReturn(TriggerResult.CONTINUE);
     when(mockTrigger2.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),
-        TimestampedValue.of(5, new Instant(12)));
+    tester.injectElements(1, 12);
+    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
 
     when(mockTrigger1.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.ALREADY_FINISHED);
     when(mockTrigger2.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.ALREADY_FINISHED);
-    tester.injectElements(
-        TimestampedValue.of(12, new Instant(5)));
+    tester.injectElements(5);
+    tester.mergeWindows();
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
+    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(1), new Instant(22)));
   }
 
   @Test
@@ -219,83 +198,58 @@ public void testFireDeadline() throws Exception {
 
   @Test
   public void testAfterAllRealTriggersFixedWindow() throws Exception {
-    tester = ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(50)),
-        Repeatedly.<IntervalWindow>forever(
-            AfterAll.<IntervalWindow>of(
-                AfterPane.<IntervalWindow>elementCountAtLeast(5),
-                AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
-                    .plusDelayOf(Duration.millis(5)))),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+    tester = TriggerTester.forTrigger(Repeatedly.<IntervalWindow>forever(
+        AfterAll.<IntervalWindow>of(
+            AfterPane.<IntervalWindow>elementCountAtLeast(5),
+            AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
+                .plusDelayOf(Duration.millis(5)))),
+        FixedWindows.of(Duration.millis(50)));
 
     tester.advanceProcessingTime(new Instant(0));
     // 6 elements -> after pane fires
-    tester.injectElements(
-        TimestampedValue.of(0, new Instant(0)),
-        TimestampedValue.of(1, new Instant(0)),
-        TimestampedValue.of(2, new Instant(1)),
-        TimestampedValue.of(3, new Instant(1)),
-        TimestampedValue.of(4, new Instant(1)),
-        TimestampedValue.of(5, new Instant(2)));
-
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    tester.injectElements(0, 0, 1, 1, 1, 2);
+
+    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
     tester.advanceProcessingTime(new Instant(6));
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(0, 1, 2, 3, 4, 5), 0, 0, 50)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
 
     // 4 elements, advance processing time, then deliver the last elem
+    tester.clearResultSequence();
     tester.advanceProcessingTime(new Instant(15));
-    tester.injectElements(
-        TimestampedValue.of(6, new Instant(2)),
-        TimestampedValue.of(7, new Instant(3)),
-        TimestampedValue.of(8, new Instant(4)),
-        TimestampedValue.of(9, new Instant(5)));
+    tester.injectElements(2, 3, 4, 5);
     tester.advanceProcessingTime(new Instant(21));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    tester.injectElements(
-        TimestampedValue.of(10, new Instant(6)));
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(6, 7, 8, 9, 10), 2, 0, 50)));
+    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
+    tester.injectElements(6);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
 
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(50))));
-    // We're holding some finished bits, but that should be it.
-    tester.assertHasOnlyGlobalAndFinishedSetsAndPaneInfoFor(
-        new IntervalWindow(new Instant(0), new Instant(50)));
   }
 
   @Test
   public void testAfterAllMergingWindowSomeFinished() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        Sessions.withGapDuration(windowDuration),
+    tester = TriggerTester.forTrigger(
         AfterAll.<IntervalWindow>of(
             AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
                 .plusDelayOf(Duration.millis(5)),
             AfterPane.<IntervalWindow>elementCountAtLeast(5)),
-        AccumulationMode.ACCUMULATING_FIRED_PANES,
-        Duration.millis(100));
+        Sessions.withGapDuration(windowDuration));
 
     tester.advanceProcessingTime(new Instant(10));
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1))); // in [1, 11), timer for 15
+    tester.injectElements(1); // in [1, 11), timer for 15
     tester.advanceProcessingTime(new Instant(16));
     tester.injectElements(
-        TimestampedValue.of(2, new Instant(1)),  // in [1, 11) count = 1
-        TimestampedValue.of(3, new Instant(2))); // in [2, 12), timer for 16
+        1,  // in [1, 11) count = 1
+        2); // in [2, 12), timer for 16
 
     // Enough data comes in for 2 that combined, we should fire
-    tester.injectElements(
-        TimestampedValue.of(4, new Instant(2)),
-        TimestampedValue.of(5, new Instant(2)));
+    tester.injectElements(2, 2);
+    tester.mergeWindows();
 
     // This fires, because the earliest element in [1, 12) arrived at time 10
-    assertThat(tester.extractOutput(), Matchers.contains(WindowMatchers.isSingleWindowedValue(
-        Matchers.containsInAnyOrder(1, 2, 3, 4, 5), 1, 1, 12)));
-
+    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(1), new Instant(12)));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index d28515999cbf8..0dc7345b92f00 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -16,24 +16,22 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.ReduceFnTester;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.TriggerTester.SimpleTriggerTester;
 
-import org.hamcrest.Matchers;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Test;
@@ -52,15 +50,13 @@ public class AfterEachTest {
   @Mock private Trigger<IntervalWindow> mockTrigger1;
   @Mock private Trigger<IntervalWindow> mockTrigger2;
 
-  private ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester;
+  private SimpleTriggerTester<IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
-    tester = ReduceFnTester.nonCombining(
-        windowFn, AfterEach.inOrder(mockTrigger1, mockTrigger2),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+    tester = TriggerTester.forTrigger(
+        AfterEach.inOrder(mockTrigger1, mockTrigger2), windowFn);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
@@ -77,31 +73,29 @@ private void injectElement(int element, TriggerResult result1, TriggerResult res
           .thenReturn(result2);
     }
 
-    tester.injectElements(
-        TimestampedValue.of(element, new Instant(element)));
+    tester.injectElements(element);
   }
 
+  /**
+   * Tests that the {@link AfterEach} trigger fires and finishes the first trigger then the second.
+   */
   @Test
   public void testOnElementT1Fires() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
 
     injectElement(2, TriggerResult.FIRE, null);
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
+    tester.clearResultSequence();
 
     injectElement(3, TriggerResult.FIRE_AND_FINISH, null);
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
+    tester.clearResultSequence();
 
     injectElement(4, null, TriggerResult.FIRE_AND_FINISH);
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(4), 4, 0, 10)));
-    assertTrue(tester.isMarkedFinished(firstWindow));
-
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
   }
 
   @Test
@@ -109,7 +103,7 @@ public void testOnElementT2Fires() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.FIRE);
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
     assertFalse(tester.isMarkedFinished(firstWindow));
   }
 
@@ -119,39 +113,17 @@ public void testOnTimerFire() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
 
     when(mockTrigger1.onTimer(Mockito.isA(OnTimerContext.class)))
         .thenReturn(TriggerResult.FIRE);
     tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
     assertFalse("Should still be waiting for the second trigger.",
         tester.isMarkedFinished(firstWindow));
   }
 
-  @SuppressWarnings("unchecked")
-  @Test
-  public void testOnTimerFinish() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-
-    when(mockTrigger1.onTimer(Mockito.isA(OnTimerContext.class)))
-        .thenReturn(TriggerResult.FIRE_AND_FINISH);
-
-    tester.advanceInputWatermark(new Instant(12));
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
-
-    injectElement(2, null, TriggerResult.FIRE_AND_FINISH);
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(2), 9, 0, 10)));
-    assertTrue(tester.isMarkedFinished(firstWindow));
-
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
-  }
-
   @Test
   public void testOnMergeFinishes() throws Exception {
     setUp(Sessions.withGapDuration(Duration.millis(10)));
@@ -160,22 +132,18 @@ public void testOnMergeFinishes() throws Exception {
         .thenReturn(TriggerResult.CONTINUE);
     when(mockTrigger2.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),
-        TimestampedValue.of(5, new Instant(12)));
+    tester.injectElements(1, 5);
 
     when(mockTrigger1.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.ALREADY_FINISHED);
     when(mockTrigger2.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.FIRE_AND_FINISH);
-    tester.injectElements(TimestampedValue.of(12, new Instant(5)));
+    tester.injectElements(5);
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
+    tester.mergeWindows();
 
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(1), new Instant(22)));
+    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(15))));
   }
 
   @Test
@@ -186,22 +154,16 @@ public void testOnMergeFires() throws Exception {
         .thenReturn(TriggerResult.CONTINUE);
     when(mockTrigger2.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),
-        TimestampedValue.of(12, new Instant(12)));
+    tester.injectElements(1, 5);
 
     when(mockTrigger1.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.ALREADY_FINISHED);
     when(mockTrigger2.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.FIRE);
 
-    tester.injectElements(
-        TimestampedValue.of(5, new Instant(5)));
+    tester.mergeWindows();
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    tester.assertHasOnlyGlobalAndFinishedSetsAndPaneInfoFor(
-        new IntervalWindow(new Instant(1), new Instant(22)));
+    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE));
   }
 
   @Test
@@ -219,71 +181,69 @@ public void testFireDeadline() throws Exception {
 
   @Test
   public void testSequenceRealTriggersFixedWindow() throws Exception {
-    tester = ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(50)),
-        AfterEach.<IntervalWindow>inOrder(
-            AfterPane.<IntervalWindow>elementCountAtLeast(5),
-            AfterPane.<IntervalWindow>elementCountAtLeast(5),
-            Repeatedly.<IntervalWindow>forever(AfterEach.inOrder(
-                AfterPane.<IntervalWindow>elementCountAtLeast(2),
-                AfterPane.<IntervalWindow>elementCountAtLeast(2)))
-                .orFinally(AfterPane.<IntervalWindow>elementCountAtLeast(7))),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
-
-    // Inject a bunch of elements
+    tester = TriggerTester.forTrigger(AfterEach.<IntervalWindow>inOrder(
+        AfterPane.<IntervalWindow>elementCountAtLeast(5),
+        AfterPane.<IntervalWindow>elementCountAtLeast(5),
+        Repeatedly.<IntervalWindow>forever(AfterEach.inOrder(
+            AfterPane.<IntervalWindow>elementCountAtLeast(2),
+            AfterPane.<IntervalWindow>elementCountAtLeast(2)))
+            .orFinally(AfterPane.<IntervalWindow>elementCountAtLeast(7))),
+        FixedWindows.of(Duration.millis(50)));
+
+    // Inject a bunch of elements all in the same window
     for (int i = 0; i < 20; i++) {
-      tester.injectElements(TimestampedValue.of(i, new Instant(i)));
+      tester.injectElements(i);
     }
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(0, 1, 2, 3, 4), 0, 0, 50),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(5, 6, 7, 8, 9), 5, 0, 50),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(10, 11), 10, 0, 50),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(12, 13), 12, 0, 50),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(14, 15), 14, 0, 50),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(16), 16, 0, 50)));
+    assertThat(tester.getResultSequence(), contains(
+        TriggerResult.CONTINUE,
+        TriggerResult.CONTINUE,
+        TriggerResult.CONTINUE,
+        TriggerResult.CONTINUE,
+        TriggerResult.FIRE, // 5 elements
+        TriggerResult.CONTINUE,
+        TriggerResult.CONTINUE,
+        TriggerResult.CONTINUE,
+        TriggerResult.CONTINUE,
+        TriggerResult.FIRE, // 5 elements
+        TriggerResult.CONTINUE,
+        TriggerResult.FIRE, // 2 elements (OrFinally at 2)
+        TriggerResult.CONTINUE,
+        TriggerResult.FIRE, // 2 elements (OrFinally at 4)
+        TriggerResult.CONTINUE,
+        TriggerResult.FIRE, // 2 elements (OrFinally at 6)
+        TriggerResult.FIRE_AND_FINISH)); // 1 elements (OrFinally at 7)
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(50))));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(0), new Instant(50)));
   }
 
   @Test
   public void testAfterEachMergingWindowSomeFinished() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        Sessions.withGapDuration(windowDuration),
+    tester = TriggerTester.forTrigger(
         AfterEach.<IntervalWindow>inOrder(
             AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
                 .plusDelayOf(Duration.millis(5)),
             AfterPane.<IntervalWindow>elementCountAtLeast(5)),
-        AccumulationMode.ACCUMULATING_FIRED_PANES,
-        Duration.millis(100));
+        Sessions.withGapDuration(windowDuration));
 
     tester.advanceProcessingTime(new Instant(10));
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1))); // in [1, 11), timer for 15
+    tester.injectElements(1); // in [1, 11), timer for 15
     tester.advanceProcessingTime(new Instant(16));
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.contains(1), 1, 1, 11)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
 
     tester.injectElements(
-        TimestampedValue.of(2, new Instant(1)),   // in [1, 11) count = 1
-        TimestampedValue.of(3, new Instant(2))); // in [2, 12), timer for 16
+        1,   // in [1, 11) count = 1
+        2); // in [2, 12), timer for 16
 
     // [2, 12) tries to fire, but gets merged; count = 2
     tester.advanceProcessingTime(new Instant(30));
 
-    tester.injectElements(
-        TimestampedValue.of(4, new Instant(1)),
-        TimestampedValue.of(5, new Instant(2)),
-        TimestampedValue.of(6, new Instant(1))); // count = 5, but need to call merge fire
+    tester.injectElements(1, 2, 1); // count = 5, but need to call merge fire
 
-    // This fires, because the earliest element in [1, 12) arrived at time 10
-    assertThat(tester.extractOutput(), Matchers.contains(WindowMatchers.isSingleWindowedValue(
-        Matchers.containsInAnyOrder(1, 2, 3, 4, 5, 6), 1, 1, 12)));
+    tester.mergeWindows();
 
+    // This fires, because the earliest element in [1, 12) arrived at time 10
+    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(1), new Instant(12)));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index 11c34771b4a85..9b43782eec140 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -16,23 +16,21 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.ReduceFnTester;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.TriggerTester.SimpleTriggerTester;
 
-import org.hamcrest.Matchers;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Test;
@@ -51,15 +49,13 @@ public class AfterFirstTest {
   @Mock private OnceTrigger<IntervalWindow> mockTrigger1;
   @Mock private OnceTrigger<IntervalWindow> mockTrigger2;
 
-  private ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester;
+  private SimpleTriggerTester<IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
-    tester = ReduceFnTester.nonCombining(
-        windowFn, AfterFirst.of(mockTrigger1, mockTrigger2),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+    tester = TriggerTester.forTrigger(
+        AfterFirst.of(mockTrigger1, mockTrigger2), windowFn);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
@@ -76,8 +72,7 @@ private void injectElement(int element, TriggerResult result1, TriggerResult res
           .thenReturn(result2);
     }
 
-    tester.injectElements(
-        TimestampedValue.of(element, new Instant(element)));
+    tester.injectElements(element);
   }
 
   @Test
@@ -85,12 +80,10 @@ public void testOnElementT1Fires() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
     injectElement(2, TriggerResult.FIRE_AND_FINISH, TriggerResult.CONTINUE);
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertThat(tester.getResultSequence(),
+        contains(TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @Test
@@ -98,37 +91,37 @@ public void testOnElementT2Fires() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
+
     injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
+
     assertTrue(tester.isMarkedFinished(firstWindow));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @SuppressWarnings("unchecked")
   @Test
-  public void testOnTimerFire() throws Exception {
+  public void testOnTimerT1Fires() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
 
     when(mockTrigger1.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
     tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @SuppressWarnings("unchecked")
   @Test
-  public void testOnTimerFinish() throws Exception {
+  public void testOnTimerT2Fires() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
 
     when(mockTrigger1.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
@@ -136,11 +129,9 @@ public void testOnTimerFinish() throws Exception {
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
     tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
 
     assertTrue(tester.isMarkedFinished(firstWindow));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @SuppressWarnings("unchecked")
@@ -149,20 +140,14 @@ public void testOnTimerContinue() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
 
     when(mockTrigger1.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
     when(mockTrigger2.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
     tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
-
-    injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
-
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
-
-    assertTrue(tester.isMarkedFinished(firstWindow));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
   }
 
   @Test
@@ -173,22 +158,17 @@ public void testOnMergeFires() throws Exception {
         .thenReturn(TriggerResult.CONTINUE);
     when(mockTrigger2.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),
-        TimestampedValue.of(5, new Instant(12)));
+    tester.injectElements(1, 8);
 
     when(mockTrigger1.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.CONTINUE);
     when(mockTrigger2.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.FIRE_AND_FINISH);
-    tester.injectElements(TimestampedValue.of(12, new Instant(5)));
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
+    tester.mergeWindows();
 
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(1), new Instant(22)));
+    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(18))));
   }
 
   @Test
@@ -206,88 +186,61 @@ public void testFireDeadline() throws Exception {
 
   @Test
   public void testAfterFirstRealTriggersFixedWindow() throws Exception {
-    tester = ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(50)),
-        Repeatedly.<IntervalWindow>forever(
-            AfterFirst.<IntervalWindow>of(
-                AfterPane.<IntervalWindow>elementCountAtLeast(5),
-                AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
-                    .plusDelayOf(Duration.millis(5)))),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+    tester = TriggerTester.forTrigger(Repeatedly.<IntervalWindow>forever(
+        AfterFirst.<IntervalWindow>of(
+            AfterPane.<IntervalWindow>elementCountAtLeast(5),
+            AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
+                .plusDelayOf(Duration.millis(5)))),
+        FixedWindows.of(Duration.millis(50)));
 
     tester.advanceProcessingTime(new Instant(0));
     // 5 elements -> after pane fires
-    tester.injectElements(
-        TimestampedValue.of(0, new Instant(0)),
-        TimestampedValue.of(1, new Instant(0)),
-        TimestampedValue.of(2, new Instant(1)),
-        TimestampedValue.of(3, new Instant(1)),
-        TimestampedValue.of(4, new Instant(1)));
+    tester.injectElements(0, 0, 1, 1, 1);
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(0, 1, 2, 3, 4), 0, 0, 50)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
 
     // 4 elements, advance processing time to 5 (shouldn't fire yet), then advance it to 6
     tester.advanceProcessingTime(new Instant(1));
-    tester.injectElements(
-        TimestampedValue.of(5, new Instant(2)),
-        TimestampedValue.of(6, new Instant(3)),
-        TimestampedValue.of(7, new Instant(4)),
-        TimestampedValue.of(8, new Instant(5)));
+    tester.injectElements(2, 3, 4, 5);
     tester.advanceProcessingTime(new Instant(5));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
+    // Fires now
     tester.advanceProcessingTime(new Instant(7));
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(5, 6, 7, 8), 2, 0, 50)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
 
     // Now, send in 5 more elements, and make sure they come out as a group. State should not
     // be carried over.
-    tester.injectElements(
-        TimestampedValue.of(9, new Instant(6)),
-        TimestampedValue.of(10, new Instant(7)),
-        TimestampedValue.of(11, new Instant(8)),
-        TimestampedValue.of(12, new Instant(9)),
-        TimestampedValue.of(13, new Instant(10)));
-
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(9, 10, 11, 12, 13), 6, 0, 50)));
+    tester.injectElements(6, 7, 8, 9, 10);
+
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(50))));
-    // Because none of the triggers ever stay finished (we always immediately reset) there is no
-    // persisted per-window state. But there may be pane-info.
-    tester.assertHasOnlyGlobalAndPaneInfoFor(
-        new IntervalWindow(new Instant(0), new Instant(50)));
   }
 
   @Test
   public void testAfterFirstMergingWindowSomeFinished() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        Sessions.withGapDuration(windowDuration),
+    tester = TriggerTester.forTrigger(
         AfterFirst.<IntervalWindow>of(
             AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
                 .plusDelayOf(Duration.millis(5)),
             AfterPane.<IntervalWindow>elementCountAtLeast(5)),
-        AccumulationMode.ACCUMULATING_FIRED_PANES,
-        Duration.millis(100));
+        Sessions.withGapDuration(windowDuration));
 
     tester.advanceProcessingTime(new Instant(10));
     tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),   // in [1, 11), timer for 15
-        TimestampedValue.of(2, new Instant(1)),   // in [1, 11) count = 1
-        TimestampedValue.of(3, new Instant(2)));  // in [2, 12), timer for 16
+        1,   // in [1, 11), timer for 15
+        1,   // in [1, 11) count = 1
+        2);  // in [2, 12), timer for 16
 
     // Enough data comes in for 2 that combined, we should fire
-    tester.injectElements(
-        TimestampedValue.of(4, new Instant(2)),
-        TimestampedValue.of(5, new Instant(2)));
+    tester.injectElements(2, 2);
+
+    tester.mergeWindows();
 
     // This fires, because the earliest element in [1, 12) arrived at time 10
-    assertThat(tester.extractOutput(), Matchers.contains(WindowMatchers.isSingleWindowedValue(
-        Matchers.containsInAnyOrder(1, 2, 3, 4, 5), 1, 1, 12)));
+    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
 
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(1), new Instant(12)));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
index 9f094d916b91e..8c6b165f74904 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
@@ -16,19 +16,17 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
-import com.google.cloud.dataflow.sdk.WindowMatchers;
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.transforms.Sum.SumIntegerFn;
-import com.google.cloud.dataflow.sdk.util.ReduceFnTester;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.TriggerTester.SimpleTriggerTester;
 
-import org.hamcrest.Matchers;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Test;
@@ -43,94 +41,65 @@ public class AfterPaneTest {
   @Test
   public void testAfterPaneWithGlobalWindowsAndCombining() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(
-        FixedWindows.of(windowDuration),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         AfterPane.<IntervalWindow>elementCountAtLeast(2),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        new SumIntegerFn().<String>asKeyedFn(),
-        VarIntCoder.of(),
-        Duration.millis(100));
-
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),
-        TimestampedValue.of(2, new Instant(9)));
-
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.equalTo(3), 1, 0, 10)));
-
-    // This element should not be output because that trigger (which was one-time) has already
-    // gone off.
-    tester.injectElements(
-        TimestampedValue.of(6, new Instant(2)));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+        FixedWindows.of(windowDuration));
 
+    tester.injectElements(1, 9);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(10), new Instant(20))));
-
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(0), new Instant(10)));
   }
 
   @Test
   public void testAfterPaneWithFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        FixedWindows.of(windowDuration),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         AfterPane.<IntervalWindow>elementCountAtLeast(2),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
-
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)), // first in window [0, 10)
-        TimestampedValue.of(2, new Instant(9)));
-
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
-
-    // This element belongs in the window that has already fired. It should not be re-output because
-    // that trigger (which was one-time) has already gone off.
-    tester.injectElements(
-        TimestampedValue.of(6, new Instant(2)));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+        FixedWindows.of(windowDuration));
 
+    tester.injectElements(1, 9);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(10), new Instant(20))));
+  }
+
+  @Test
+  public void testClear() throws Exception {
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
+        AfterPane.<IntervalWindow>elementCountAtLeast(2),
+        FixedWindows.of(Duration.millis(10)));
 
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(0), new Instant(10)));
+    tester.injectElements(1, 2, 3);
+    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
+    tester.clearState(window);
+    tester.assertCleared(window);
   }
 
   @Test
   public void testAfterPaneWithMerging() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        Sessions.withGapDuration(windowDuration),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         AfterPane.<IntervalWindow>elementCountAtLeast(2),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
-
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+        Sessions.withGapDuration(windowDuration));
 
     tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),  // in [1, 11)
-        TimestampedValue.of(2, new Instant(2))); // in [2, 12)
+        1, // in [1, 11)
+        2); // in [2, 12)
+    tester.mergeWindows();
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 1, 12)));
+    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
+    tester.clearLatestMergeResult();
 
-    // Because we closed the previous window, we don't have it around to merge with.
+    // Because we closed the previous window, we don't have it around to merge with. So there
+    // will be a new FIRE_AND_FINISH result.
     tester.injectElements(
-        TimestampedValue.of(3, new Instant(7)),  // in [7, 17)
-        TimestampedValue.of(4, new Instant(8))); // in [8, 18)
+        7,  // in [7, 17)
+        9); // in [8, 18)
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 7, 7, 18)));
+    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
 
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
-
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(1), new Instant(12)),
-        new IntervalWindow(new Instant(7), new Instant(18)));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
index 05aba6008fed5..0da184a8c94b6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
@@ -16,17 +16,18 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import static org.hamcrest.Matchers.emptyIterable;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.everyItem;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
-import com.google.cloud.dataflow.sdk.WindowMatchers;
-import com.google.cloud.dataflow.sdk.util.ReduceFnTester;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.TriggerTester.SimpleTriggerTester;
 
-import org.hamcrest.Matchers;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Test;
@@ -41,17 +42,15 @@ public class AfterProcessingTimeTest {
   @Test
   public void testAfterProcessingTimeIgnoresTimer() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        FixedWindows.of(windowDuration),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         AfterProcessingTime
             .<IntervalWindow>pastFirstElementInPane()
             .plusDelayOf(Duration.millis(5)),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+        FixedWindows.of(windowDuration));
 
     tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
         new Instant(15), TimeDomain.PROCESSING_TIME);
-    tester.injectElements(TimestampedValue.of(1, new Instant(1)));
+    tester.injectElements(1);
     tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
         new Instant(5), TimeDomain.PROCESSING_TIME);
   }
@@ -59,77 +58,84 @@ public void testAfterProcessingTimeIgnoresTimer() throws Exception {
   @Test
   public void testAfterProcessingTimeWithFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        FixedWindows.of(windowDuration),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         AfterProcessingTime
             .<IntervalWindow>pastFirstElementInPane()
             .plusDelayOf(Duration.millis(5)),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+        FixedWindows.of(windowDuration));
 
     tester.advanceProcessingTime(new Instant(10));
 
-    tester.injectElements(
-        // first in window [0, 10), timer set for 15
-        TimestampedValue.of(1, new Instant(1)));
-    tester.advanceProcessingTime(new Instant(11));
-    tester.injectElements(
-        TimestampedValue.of(2, new Instant(9)));
-
+    // Timer at 15
+    tester.injectElements(1);
     tester.advanceProcessingTime(new Instant(12));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
-    tester.injectElements(
-        TimestampedValue.of(3, new Instant(8)),
-        TimestampedValue.of(4, new Instant(19)),
-        TimestampedValue.of(5, new Instant(30)));
+    // Load up elements in the next window, timer at 17 for them
+    tester.injectElements(11, 12, 13);
 
-    tester.advanceProcessingTime(new Instant(16));
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10)));
+    tester.advanceProcessingTime(new Instant(14));
+
+    // Timer at 19 for these; it should be ignored since the 15 will fire first
+    tester.injectElements(2, 3);
 
-    // This element belongs in the window that has already fired. It should not be re-output because
-    // that trigger (which was one-time) has already gone off.
-    tester.injectElements(TimestampedValue.of(6, new Instant(2)));
+    // We should not have fired yet
+    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
 
-    tester.advanceProcessingTime(new Instant(19));
-    assertThat(tester.extractOutput(), Matchers.containsInAnyOrder(
-        WindowMatchers.isSingleWindowedValue(Matchers.contains(4), 19, 10, 20),
-        WindowMatchers.isSingleWindowedValue(Matchers.contains(5), 30, 30, 40)));
+    // Advance past the first timer and fire, finishing the first window
+    tester.advanceProcessingTime(new Instant(16));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
 
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(0), new Instant(10)),
-        new IntervalWindow(new Instant(10), new Instant(20)),
-        new IntervalWindow(new Instant(30), new Instant(40)));
+    // This element belongs in the window that has already fired; it should not
+    // be passed to onElement, so there should be no resultSequence to observe
+    // (thus necessarily no timer)
+    tester.clearResultSequence();
+    tester.injectElements(2);
+    assertThat(tester.getResultSequence(), emptyIterable());
+
+    // The next window fires and finishes
+    tester.advanceProcessingTime(new Instant(18));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(10), new Instant(20))));
+
+    // The timer for the finished window, from later elements, should also not do anything
+    tester.clearResultSequence();
+    tester.advanceProcessingTime(new Instant(20));
+    assertThat(tester.getResultSequence(), emptyIterable());
+  }
+
+  @Test
+  public void testClear() throws Exception {
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
+        AfterProcessingTime
+            .<IntervalWindow>pastFirstElementInPane()
+            .plusDelayOf(Duration.millis(5)),
+        FixedWindows.of(Duration.millis(10)));
+
+    tester.injectElements(1, 2, 3);
+    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
+    tester.clearState(window);
+    tester.assertCleared(window);
   }
 
   @Test
   public void testAfterProcessingTimeWithMergingWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        Sessions.withGapDuration(windowDuration),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         AfterProcessingTime
             .<IntervalWindow>pastFirstElementInPane()
             .plusDelayOf(Duration.millis(5)),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+        Sessions.withGapDuration(windowDuration));
 
     tester.advanceProcessingTime(new Instant(10));
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1))); // in [1, 11), timer for 15
+    tester.injectElements(1); // in [1, 11), timer for 15
     tester.advanceProcessingTime(new Instant(11));
-    tester.injectElements(
-        TimestampedValue.of(2, new Instant(2))); // in [2, 12), timer for 16
+    tester.injectElements(2); // in [2, 12), timer for 16
 
     tester.advanceProcessingTime(new Instant(16));
-    // This fires, because the earliest element in [1, 11) arrived at time 10
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 1, 12)));
-
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(1), new Instant(12)));
+    // This fires, because the earliest element in [1, 12) arrived at time 10
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
+    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(11))));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java
index 074c8dbfec7d2..23f46f24ffe83 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java
@@ -15,17 +15,18 @@
  */
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import static org.hamcrest.Matchers.emptyIterable;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.everyItem;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
-import com.google.cloud.dataflow.sdk.WindowMatchers;
-import com.google.cloud.dataflow.sdk.util.ReduceFnTester;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.TriggerTester.SimpleTriggerTester;
 
-import org.hamcrest.Matchers;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Test;
@@ -44,102 +45,57 @@ public class AfterSynchronizedProcessingTimeTest {
   @Test
   public void testAfterProcessingTimeWithFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        FixedWindows.of(windowDuration), underTest,
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
+        underTest, FixedWindows.of(windowDuration));
 
     tester.advanceProcessingTime(new Instant(10));
 
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)), // first in window [0, 10), timer set for 15
-        TimestampedValue.of(2, new Instant(9)),
-        TimestampedValue.of(3, new Instant(8)));
-
-    tester.advanceProcessingTime(new Instant(16));
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10)));
-
-    // This element belongs in the window that has already fired. It should not be re-output because
-    // that trigger (which was one-time) has already gone off.
-    tester.injectElements(
-        TimestampedValue.of(6, new Instant(2)));
+    // synchronized timers for 10
+    tester.injectElements(1, 9, 8);
+    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
 
+    tester.advanceProcessingTime(new Instant(11));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(0), new Instant(10)));
+
+    // This element belongs in the window that has already fired. It should not be
+    // processed.
+    tester.clearResultSequence();
+    tester.injectElements(2);
+    assertThat(tester.getResultSequence(), emptyIterable());
   }
 
   @Test
   public void testAfterProcessingTimeWithMergingWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        Sessions.withGapDuration(windowDuration),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         underTest,
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+        Sessions.withGapDuration(windowDuration));
 
     tester.advanceProcessingTime(new Instant(10));
     tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),   // in [1, 11), synchronized timer for 10
-        TimestampedValue.of(2, new Instant(2)));  // in [2, 12), synchronized timer for 10
-    tester.advanceProcessingTime(new Instant(11));
-
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 1, 12)));
+        1,   // in [1, 11), synchronized timer for 10
+        2);  // in [2, 12), synchronized timer for 10
+    tester.mergeWindows();
+    tester.clearResultSequence();
 
+    // The timers for the pre-merged windows should be ignored.
+    tester.advanceProcessingTime(new Instant(11));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(1), new Instant(12)));
-  }
-
-  @Test
-  public void testAfterProcessingTimeWithMergingWindowAlreadyFired() throws Exception {
-    Duration windowDuration = Duration.millis(10);
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        Sessions.withGapDuration(windowDuration),
-        underTest,
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
-
-    tester.advanceProcessingTime(new Instant(10));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1))); // in [1, 11), synchronized timer for 15
-
-    tester.advanceProcessingTime(new Instant(16));
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.contains(1), 1, 1, 11)));
-
-    // Because we discarded the previous window, we don't have it around to merge with.
-    tester.injectElements(
-        TimestampedValue.of(2, new Instant(2))); // in [2, 12), synchronized timer for 21
-
-    tester.advanceProcessingTime(new Instant(100));
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(Matchers.contains(2), 2, 2, 12)));
-
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(2), new Instant(12))));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(1), new Instant(11)),
-        new IntervalWindow(new Instant(2), new Instant(12)));
   }
 
   @Test
   public void testAfterSynchronizedProcessingTimeIgnoresTimer() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        FixedWindows.of(windowDuration),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         new AfterSynchronizedProcessingTime<IntervalWindow>(),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+        FixedWindows.of(windowDuration));
 
     tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
         new Instant(15), TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
     tester.advanceProcessingTime(new Instant(5));
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)));
+    tester.injectElements(1);
     tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
         new Instant(0), TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
index 06974323e13f5..517bf20920e57 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
@@ -16,24 +16,20 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
-import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.everyItem;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.WindowMatchers;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.ReduceFnTester;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.TriggerTester.SimpleTriggerTester;
 
-import org.hamcrest.Matchers;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Before;
@@ -44,8 +40,6 @@
 import org.mockito.Mockito;
 import org.mockito.MockitoAnnotations;
 
-import java.util.List;
-
 /**
  * Tests the {@link AfterWatermark} triggers.
  */
@@ -65,175 +59,120 @@ public void setUp() {
   @Test
   public void testFirstInPaneWithFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        FixedWindows.of(windowDuration),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelayOf(Duration.millis(5)),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+        FixedWindows.of(windowDuration));
 
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1))); // first in window [0, 10), timer set for 6
+    tester.injectElements(1); // first in window [0, 10), timer set for 6
     tester.advanceInputWatermark(new Instant(5));
-    tester.injectElements(
-        TimestampedValue.of(2, new Instant(9)),
-        TimestampedValue.of(3, new Instant(8)));
+    tester.injectElements(9, 8);
+    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
 
     tester.advanceInputWatermark(new Instant(7));
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(containsInAnyOrder(1, 2, 3), 1, 0, 10)));
-
-    // This element belongs in the window that has already fired. It should not be re-output because
-    // that trigger (which was one-time) has already gone off.
-    tester.injectElements(
-        TimestampedValue.of(6, new Instant(2)));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
 
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(10), new Instant(20))));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(0), new Instant(10)));
   }
 
   @Test
   public void testAlignAndDelay() throws Exception {
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        FixedWindows.of(Duration.standardMinutes(1)),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         AfterWatermark.<IntervalWindow>pastFirstElementInPane()
             .alignedTo(Duration.standardMinutes(1))
             .plusDelayOf(Duration.standardMinutes(5)),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-
-        // Don't drop right away at the end of the window, since we have a delay.
-        Duration.standardMinutes(10));
+        FixedWindows.of(Duration.standardMinutes(1)));
 
     Instant zero = new Instant(0);
 
     // first in window [0, 1m), timer set for 6m
-    tester.injectElements(
-        TimestampedValue.of(1, zero.plus(Duration.standardSeconds(1))),
-        TimestampedValue.of(2, zero.plus(Duration.standardSeconds(5))),
-        TimestampedValue.of(3, zero.plus(Duration.standardSeconds(55))));
+    tester.injectElements(1000, 5000, 55000);
 
     // Advance to 6m. No output should be produced.
     tester.advanceInputWatermark(zero.plus(Duration.standardMinutes(6)));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
 
     // Advance to 6m+1ms and see our output
     tester.advanceInputWatermark(zero.plus(Duration.standardMinutes(6).plus(1)));
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(
-            containsInAnyOrder(1, 2, 3),
-            zero.plus(Duration.standardSeconds(1)).getMillis(),
-            zero.getMillis(), zero.plus(Duration.standardMinutes(1)).getMillis())));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
+  }
+
+  @Test
+  public void testAfterFirstInPaneClear() throws Exception {
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
+        AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelayOf(Duration.millis(5)),
+        FixedWindows.of(Duration.millis(10)));
+
+    tester.injectElements(1, 2, 3);
+    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
+    tester.clearState(window);
+    tester.assertCleared(window);
   }
 
   @Test
   public void testFirstInPaneWithMerging() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        Sessions.withGapDuration(windowDuration),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelayOf(Duration.millis(5)),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+        Sessions.withGapDuration(windowDuration));
 
     tester.advanceInputWatermark(new Instant(1));
 
     tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),  // in [1, 11), timer for 6
-        TimestampedValue.of(2, new Instant(2))); // in [2, 12), timer for 7
+        1,  // in [1, 11), timer for 6
+        2); // in [2, 12), timer for 7
+    tester.mergeWindows();
     tester.advanceInputWatermark(new Instant(7));
 
-    // We merged, and updated the watermark timer to the earliest timer, which was still 6.
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(containsInAnyOrder(1, 2), 1, 1, 12)));
-
+    // We merged, and updated the timer to the earliest timer, which was still 6.
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
-
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(1), new Instant(12)));
   }
 
   @Test
   public void testEndOfWindowFixedWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        FixedWindows.of(windowDuration),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         AfterWatermark.<IntervalWindow>pastEndOfWindow(),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+        FixedWindows.of(windowDuration));
 
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1))); // first in window [0, 10), timer set for 9
+    tester.injectElements(1); // first in window [0, 10), timer set for 9
     tester.advanceInputWatermark(new Instant(8));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-    tester.injectElements(
-        TimestampedValue.of(2, new Instant(9)),
-        TimestampedValue.of(3, new Instant(8)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
 
+    tester.injectElements(9, 8);
     tester.advanceInputWatermark(new Instant(10));
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(containsInAnyOrder(1, 2, 3), 1, 0, 10)));
-
-    // This element belongs in the window that has already fired. It should not be re-output because
-    // that trigger (which was one-time) has already gone off.
-    tester.injectElements(
-        TimestampedValue.of(6, new Instant(2)));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
 
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(10), new Instant(20))));
-
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(0), new Instant(10)));
   }
 
   @Test
   public void testEndOfWindowWithMerging() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        Sessions.withGapDuration(windowDuration),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         AfterWatermark.<IntervalWindow>pastEndOfWindow(),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+        Sessions.withGapDuration(windowDuration));
 
     tester.advanceInputWatermark(new Instant(1));
 
     tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),  // in [1, 11], timer for 11
-        TimestampedValue.of(2, new Instant(2))); // in [2, 12], timer for 12
+        1, // in [1, 11], timer for 11
+        2); // in [2, 12], timer for 12
+    tester.mergeWindows();
     tester.advanceInputWatermark(new Instant(10));
 
     // We merged, and updated the watermark timer to the end of the new window.
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
 
-    tester.injectElements(
-        TimestampedValue.of(3, new Instant(1))); // in [1, 11], timer for 11
+    tester.injectElements(1); // in [1, 11], timer for 11
+    tester.mergeWindows();
     tester.advanceInputWatermark(new Instant(12));
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        WindowMatchers.isSingleWindowedValue(containsInAnyOrder(1, 2, 3), 1, 1, 12)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
 
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
-
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(1), new Instant(12)));
-  }
-
-  @Test
-  public void testEndOfWindowIgnoresTimer() throws Exception {
-    Duration windowDuration = Duration.millis(10);
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        FixedWindows.of(windowDuration),
-        AfterWatermark.<IntervalWindow>pastEndOfWindow(),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
-
-    tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
-        new Instant(15), TimeDomain.EVENT_TIME);
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)));
-    tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
-        new Instant(9), TimeDomain.EVENT_TIME);
   }
 
   @Test
@@ -270,45 +209,38 @@ public void testContinuation() throws Exception {
 
   @Test
   public void testEarlyAndAtWatermarkProcessElement() throws Exception {
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        FixedWindows.of(Duration.millis(100)),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         AfterWatermark.<IntervalWindow>pastEndOfWindow()
             .withEarlyFirings(mockEarly),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+        FixedWindows.of(Duration.millis(100)));
 
+    // Fire the early trigger
     when(mockEarly.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
         .thenReturn(TriggerResult.CONTINUE)
         .thenReturn(TriggerResult.FIRE_AND_FINISH)
         .thenReturn(TriggerResult.CONTINUE);
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(5L)),
-        TimestampedValue.of(2, new Instant(10L))); // Fires due to early trigger
-    tester.injectElements(TimestampedValue.of(3, new Instant(15L)));
+    tester.injectElements(5, 10); // Fires due to early trigger
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
 
+    // Fire the watermark trigger
+    tester.injectElements(15);
     tester.advanceInputWatermark(new Instant(100L)); // Fires due to AtWatermark
-    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
-    assertThat(output, Matchers.contains(
-        isSingleWindowedValue(containsInAnyOrder(1, 2), 5, 0, 100),
-        isSingleWindowedValue(containsInAnyOrder(3), 15, 0, 100)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(100))));
   }
 
   @Test
   public void testLateAndAtWatermarkProcessElement() throws Exception {
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        FixedWindows.of(Duration.millis(100)),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         AfterWatermark.<IntervalWindow>pastEndOfWindow()
             .withLateFirings(mockLate),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+        FixedWindows.of(Duration.millis(100)));
 
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(5L)),
-        TimestampedValue.of(2, new Instant(10L)),
-        TimestampedValue.of(3, new Instant(15L)));
+    tester.injectElements(5, 10, 15);
+    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
 
     tester.advanceInputWatermark(new Instant(100L)); // Fires due to AtWatermark
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
 
     when(mockLate.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
         .thenReturn(TriggerResult.CONTINUE)
@@ -317,44 +249,40 @@ public void testLateAndAtWatermarkProcessElement() throws Exception {
         .thenReturn(TriggerResult.CONTINUE)
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
 
-    tester.injectElements(
-        TimestampedValue.of(4, new Instant(20L)),
-        TimestampedValue.of(5, new Instant(25L)),
-        TimestampedValue.of(6, new Instant(30L)));  // Fires due to late trigger
-    tester.injectElements(
-        TimestampedValue.of(7, new Instant(35L)),
-        TimestampedValue.of(8, new Instant(40L))); // Fires due to late trigger
+    tester.injectElements(20, 25, 30); // Fires due to late trigger
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
 
-    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
-    assertThat(output, Matchers.contains(
-        isSingleWindowedValue(containsInAnyOrder(1, 2, 3), 5, 0, 100),
-        isSingleWindowedValue(containsInAnyOrder(4, 5, 6), 99, 0, 100),
-        isSingleWindowedValue(containsInAnyOrder(7, 8), 99, 0, 100)));
+    tester.injectElements(35, 40); // Fires due to late trigger
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
 
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(100))));
   }
 
   @Test
   public void testEarlyLateAndAtWatermarkProcessElement() throws Exception {
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        FixedWindows.of(Duration.millis(100)),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         AfterWatermark.<IntervalWindow>pastEndOfWindow()
             .withEarlyFirings(mockEarly)
             .withLateFirings(mockLate),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+        FixedWindows.of(Duration.millis(100)));
 
     when(mockEarly.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
         .thenReturn(TriggerResult.CONTINUE)
         .thenReturn(TriggerResult.FIRE_AND_FINISH)
         .thenReturn(TriggerResult.CONTINUE);
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(5L)),
-        TimestampedValue.of(2, new Instant(10L))); // Fires due to early trigger
-    tester.injectElements(
-        TimestampedValue.of(3, new Instant(15L)));
+
+    // These should set a timer for 100
+    tester.injectElements(5, 10); // Fires due to early trigger
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
+
+    tester.injectElements(15);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
+
+    tester.advanceInputWatermark(new Instant(99L)); // Checking for off-by-one
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
 
     tester.advanceInputWatermark(new Instant(100L)); // Fires due to AtWatermark
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
 
     when(mockLate.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
         .thenReturn(TriggerResult.CONTINUE)
@@ -362,143 +290,120 @@ public void testEarlyLateAndAtWatermarkProcessElement() throws Exception {
         .thenReturn(TriggerResult.FIRE_AND_FINISH)
         .thenReturn(TriggerResult.CONTINUE)
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
-    tester.injectElements(
-        TimestampedValue.of(4, new Instant(20L)),
-        TimestampedValue.of(5, new Instant(25L)),
-        TimestampedValue.of(6, new Instant(30L)));  // Fires due to late trigger
-    tester.injectElements(
-        TimestampedValue.of(7, new Instant(35L)),
-        TimestampedValue.of(8, new Instant(40L))); // Fires due to late
 
-    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
-    assertThat(output, Matchers.contains(
-        isSingleWindowedValue(containsInAnyOrder(1, 2), 5, 0, 100),
-        isSingleWindowedValue(containsInAnyOrder(3), 15, 0, 100),
-        isSingleWindowedValue(containsInAnyOrder(4, 5, 6), 99, 0, 100),
-        isSingleWindowedValue(containsInAnyOrder(7, 8), 99, 0, 100)));
+    // Get the late trigger to fire once
+    tester.clearResultSequence();
+    tester.injectElements(20, 25);
+    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
+    tester.injectElements(30); // Fires due to late trigger
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
+
+    // Get the late trigger to fire again
+    tester.clearResultSequence();
+    tester.injectElements(35);
+    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
+    tester.injectElements(40); // Fires due to late again
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
 
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(100))));
   }
 
   @Test
   public void testEarlyAndAtWatermarkSessions() throws Exception {
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        Sessions.withGapDuration(Duration.millis(20)),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         AfterWatermark.<IntervalWindow>pastEndOfWindow()
             .withEarlyFirings(AfterPane.<IntervalWindow>elementCountAtLeast(2)),
-        AccumulationMode.ACCUMULATING_FIRED_PANES,
-        Duration.millis(100));
-
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(5L)),
-        TimestampedValue.of(2, new Instant(20L)));
+        Sessions.withGapDuration(Duration.millis(20)));
 
+    tester.injectElements(5, 20);
+    tester.mergeWindows();
+    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(40))));
-    tester.injectElements(TimestampedValue.of(3, new Instant(6L)));
+
+    tester.injectElements(6);
+    tester.mergeWindows();
+    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.CONTINUE));
 
     tester.advanceInputWatermark(new Instant(100L)); // Fires due to AtWatermark
-    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
-    assertThat(output, Matchers.contains(
-        isSingleWindowedValue(containsInAnyOrder(1, 2), 5, 5, 40),
-        isSingleWindowedValue(containsInAnyOrder(1, 2, 3), 6, 5, 40)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(40))));
   }
 
   @Test
   public void testLateAndAtWatermarkSessionsProcessingTime() throws Exception {
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        Sessions.withGapDuration(Duration.millis(20)),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         AfterWatermark.<IntervalWindow>pastEndOfWindow()
             .withLateFirings(AfterProcessingTime
                 .<IntervalWindow>pastFirstElementInPane().plusDelayOf(Duration.millis(10))),
-        AccumulationMode.ACCUMULATING_FIRED_PANES,
-        Duration.millis(100));
+        Sessions.withGapDuration(Duration.millis(20)));
 
     tester.advanceProcessingTime(new Instant(0L));
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(5L)),
-        TimestampedValue.of(2, new Instant(20L)));
+    tester.injectElements(5, 20);
+    tester.advanceProcessingTime(new Instant(50L)); // Check that  we don't trigger wrongly
+    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
 
     tester.advanceInputWatermark(new Instant(70L)); // Fires due to AtWatermark
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
 
-    tester.injectElements(TimestampedValue.of(3, new Instant(6L)));
-
+    tester.injectElements(6);
+    tester.mergeWindows(); // merge must be manually triggered to learn we are on late trigger
     tester.advanceProcessingTime(new Instant(100L)); // Fires the late trigger
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
 
-    tester.injectElements(TimestampedValue.of(4, new Instant(9L)));
-
-    tester.advanceProcessingTime(new Instant(100L)); // Fires the late trigger again
+    tester.injectElements(9);
+    tester.mergeWindows(); // merge must be manually triggered to learn we are on late trigger
+    tester.advanceProcessingTime(new Instant(150L)); // Fires the late trigger again
 
-    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
-    assertThat(output, Matchers.contains(
-        isSingleWindowedValue(containsInAnyOrder(1, 2), 5, 5, 40),
-        isSingleWindowedValue(containsInAnyOrder(1, 2, 3), 39, 5, 40)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(40))));
   }
 
   @Test
   public void testLateAndAtWatermarkSessions() throws Exception {
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        Sessions.withGapDuration(Duration.millis(20)),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         AfterWatermark.<IntervalWindow>pastEndOfWindow()
             .withLateFirings(AfterPane.<IntervalWindow>elementCountAtLeast(2)),
-        AccumulationMode.ACCUMULATING_FIRED_PANES,
-        Duration.millis(100));
+        Sessions.withGapDuration(Duration.millis(20)));
 
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(5L)),
-        TimestampedValue.of(2, new Instant(20L)));
+    tester.injectElements(5, 20);
 
     tester.advanceInputWatermark(new Instant(70L)); // Fires due to AtWatermark
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
 
     IntervalWindow window = new IntervalWindow(new Instant(5), new Instant(40));
     assertFalse(tester.isMarkedFinished(window));
 
-    tester.injectElements(
-        TimestampedValue.of(3, new Instant(7L)),
-        TimestampedValue.of(4, new Instant(8L)),
-        TimestampedValue.of(5, new Instant(9L)));  // Fires because we have at least 5 items
-
-    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
-    assertThat(output, Matchers.contains(
-        isSingleWindowedValue(containsInAnyOrder(1, 2), 5, 5, 40),
-        isSingleWindowedValue(containsInAnyOrder(1, 2, 3, 4, 5), 39, 5, 40)));
+    tester.injectElements(7, 8, 9); // Fires because we have at least 5 items
+    tester.mergeWindows();
+
+    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE));
     assertFalse(tester.isMarkedFinished(window));
   }
 
   @Test
   public void testEarlyLateAndAtWatermarkSessions() throws Exception {
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        Sessions.withGapDuration(Duration.millis(20)),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         AfterWatermark.<IntervalWindow>pastEndOfWindow()
             .withEarlyFirings(AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
                 .plusDelayOf(Duration.millis(50)))
             .withLateFirings(AfterPane.<IntervalWindow>elementCountAtLeast(2)),
-        AccumulationMode.ACCUMULATING_FIRED_PANES,
-        Duration.millis(100));
+        Sessions.withGapDuration(Duration.millis(20)));
 
     tester.advanceProcessingTime(new Instant(0));
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(5L)),
-        TimestampedValue.of(2, new Instant(20L)));
+    tester.injectElements(5, 20);
 
     tester.advanceProcessingTime(new Instant(56)); // Fires due to early trigger
-
     tester.advanceInputWatermark(new Instant(70L)); // Fires due to AtWatermark
 
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(40))));
 
     tester.injectElements(
-        TimestampedValue.of(3, new Instant(6L)),
-        TimestampedValue.of(4, new Instant(7L)), // Should fire due to late trigger
-        TimestampedValue.of(5, new Instant(8L)));
-
-    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
-    assertThat(output, Matchers.contains(
-        isSingleWindowedValue(containsInAnyOrder(1, 2), 5, 5, 40),
-        // We get an empty on time pane
-        isSingleWindowedValue(containsInAnyOrder(1, 2), 39, 5, 40),
-        isSingleWindowedValue(containsInAnyOrder(1, 2, 3, 4, 5), 39, 5, 40)));
+        6,
+        7, // Should fire due to late trigger
+        8);
+    tester.mergeWindows();
+
+    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(40))));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
index 5c068c75d8e16..3e660dfd56b30 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
@@ -16,25 +16,22 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.everyItem;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 
-import com.google.cloud.dataflow.sdk.util.ReduceFnTester;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.TriggerTester.SimpleTriggerTester;
 
-import org.hamcrest.Matchers;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-import java.util.List;
-
 /**
  * Tests the {@link DefaultTrigger} in a variety of windowing modes.
  */
@@ -43,128 +40,80 @@ public class DefaultTriggerTest {
 
   @Test
   public void testDefaultTriggerWithFixedWindow() throws Exception {
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        FixedWindows.of(Duration.millis(10)),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         DefaultTrigger.<IntervalWindow>of(),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+        FixedWindows.of(Duration.millis(10)));
 
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),
-        TimestampedValue.of(2, new Instant(9)),
-        TimestampedValue.of(3, new Instant(15)),
-        TimestampedValue.of(4, new Instant(19)),
-        TimestampedValue.of(5, new Instant(30)));
+    tester.injectElements(1, 9, 15, 19, 30);
 
     // Advance the watermark almost to the end of the first window.
     tester.advanceProcessingTime(new Instant(500));
     tester.advanceInputWatermark(new Instant(8));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
 
     // Advance watermark to 10 (past end of the window), which causes the first fixed window to
     // be emitted
     tester.advanceInputWatermark(new Instant(10));
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
 
     // Advance watermark to 100, which causes the remaining two windows to be emitted.
     // Since their timers were at different timestamps, they should fire in order.
     tester.advanceInputWatermark(new Instant(100));
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 15, 10, 20),
-        isSingleWindowedValue(Matchers.contains(5), 30, 30, 40)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(30), new Instant(40))));
-    tester.assertHasOnlyGlobalAndPaneInfoFor(
-        new IntervalWindow(new Instant(0), new Instant(10)),
-        new IntervalWindow(new Instant(10), new Instant(20)),
-        new IntervalWindow(new Instant(30), new Instant(40)));
   }
 
   @Test
   public void testDefaultTriggerWithSessionWindow() throws Exception {
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        Sessions.withGapDuration(Duration.millis(10)),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         DefaultTrigger.<IntervalWindow>of(),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+        Sessions.withGapDuration(Duration.millis(10)));
 
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),
-        TimestampedValue.of(2, new Instant(9)));
+    tester.injectElements(1, 9);
 
     // no output, because we merged into the [9-19) session
     tester.advanceInputWatermark(new Instant(10));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
 
-    tester.injectElements(
-        TimestampedValue.of(3, new Instant(15)),
-        TimestampedValue.of(4, new Instant(30)));
+    tester.injectElements(15, 30);
 
     tester.advanceInputWatermark(new Instant(100));
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 1, 25),
-        isSingleWindowedValue(Matchers.contains(4), 30, 30, 40)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(25))));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(30), new Instant(40))));
-    tester.assertHasOnlyGlobalAndPaneInfoFor(
-        new IntervalWindow(new Instant(1), new Instant(25)),
-        new IntervalWindow(new Instant(30), new Instant(40)));
   }
 
   @Test
   public void testDefaultTriggerWithSlidingWindow() throws Exception {
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        SlidingWindows.of(Duration.millis(10)).every(Duration.millis(5)),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         DefaultTrigger.<IntervalWindow>of(),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+        SlidingWindows.of(Duration.millis(10)).every(Duration.millis(5)));
 
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),
-        TimestampedValue.of(2, new Instant(4)),
-        TimestampedValue.of(3, new Instant(9)));
+    tester.injectElements(1, 4, 9);
+    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
+    tester.clearResultSequence();
 
-    tester.advanceInputWatermark(new Instant(100));
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, -5, 5),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 5, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 10, 5, 15)));
-
-    // This data is too late to hold the output watermark, either to the element
-    // or the end of window.
-    tester.injectElements(
-        TimestampedValue.of(4, new Instant(8)));
-
-    tester.advanceInputWatermark(new Instant(120));
-    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
-    assertThat(output, Matchers.contains(
-        isSingleWindowedValue(Matchers.contains(4), 9, 0, 10),
-        isSingleWindowedValue(Matchers.contains(4), 14, 5, 15)));
+    tester.advanceInputWatermark(new Instant(5));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
+    tester.advanceInputWatermark(new Instant(10));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
+    tester.advanceInputWatermark(new Instant(15));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
 
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(10))));
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(15))));
-    tester.assertHasOnlyGlobalState();
   }
 
   @Test
   public void testDefaultTriggerWithContainedSessionWindow() throws Exception {
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        Sessions.withGapDuration(Duration.millis(10)),
+    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         DefaultTrigger.<IntervalWindow>of(),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+        Sessions.withGapDuration(Duration.millis(10)));
 
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),
-        TimestampedValue.of(2, new Instant(9)),
-        TimestampedValue.of(3, new Instant(7)));
+    tester.injectElements(1, 9, 7);
 
     tester.advanceInputWatermark(new Instant(20));
-    Iterable<WindowedValue<Iterable<Integer>>> extractOutput = tester.extractOutput();
-    assertThat(extractOutput, Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 1, 19)));
-    tester.assertHasOnlyGlobalAndPaneInfoFor(
-        new IntervalWindow(new Instant(1), new Instant(19)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
index 60d15473cb8cb..2660074a473ce 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
@@ -16,22 +16,21 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.everyItem;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.ReduceFnTester;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.TriggerTester.SimpleTriggerTester;
 
-import org.hamcrest.Matchers;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Test;
@@ -49,7 +48,7 @@ public class OrFinallyTriggerTest {
   @Mock private Trigger<IntervalWindow> mockActual;
   @Mock private OnceTrigger<IntervalWindow> mockUntil;
 
-  private ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester;
+  private SimpleTriggerTester<IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
@@ -58,9 +57,8 @@ public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     Trigger<IntervalWindow> underTest =
         new OrFinallyTrigger<IntervalWindow>(mockActual, mockUntil);
 
-    tester = ReduceFnTester.nonCombining(
-        windowFn, underTest, AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+    tester = TriggerTester.forTrigger(
+        underTest, windowFn);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
@@ -76,8 +74,7 @@ private void injectElement(int element, TriggerResult result1, TriggerResult res
           Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
           .thenReturn(result2);
     }
-    tester.injectElements(
-        TimestampedValue.of(element, new Instant(element)));
+    tester.injectElements(element);
   }
 
   @Test
@@ -86,15 +83,15 @@ public void testOnElementActualFires() throws Exception {
 
     injectElement(1, TriggerResult.FIRE, TriggerResult.CONTINUE);
     injectElement(2, TriggerResult.FIRE_AND_FINISH, TriggerResult.CONTINUE);
+    assertThat(tester.getResultSequence(),
+        contains(TriggerResult.FIRE, TriggerResult.FIRE_AND_FINISH));
 
     // This should do nothing (we've already fired and finished)
     injectElement(3, TriggerResult.FIRE, TriggerResult.FIRE_AND_FINISH);
+    assertThat(tester.getResultSequence(),
+        contains(TriggerResult.FIRE, TriggerResult.FIRE_AND_FINISH));
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(2), 2, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @Test
@@ -102,10 +99,11 @@ public void testOnElementUntilFires() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
+
     injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
   }
 
@@ -114,12 +112,12 @@ public void testOnElementUntilFiresAndFinishes() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
 
     injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
+
     injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @Test
@@ -144,12 +142,8 @@ public void testOnTimerFinishesUntil() throws Exception {
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
     tester.fireTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME);
 
-    assertThat(tester.extractOutput(), Matchers.containsInAnyOrder(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
     assertTrue(tester.isMarkedFinished(firstWindow));
-
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
   @Test
@@ -161,20 +155,17 @@ public void testMergeActualFires() throws Exception {
     when(mockUntil.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
 
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),
-        TimestampedValue.of(12, new Instant(12)));
+    tester.injectElements(1, 12);
 
     when(mockActual.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.FIRE);
     when(mockUntil.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.CONTINUE);
 
-    tester.injectElements(TimestampedValue.of(5, new Instant(5)));
+    tester.injectElements(5);
+    tester.mergeWindows();
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    tester.assertHasOnlyGlobalAndPaneInfoFor(new IntervalWindow(new Instant(1), new Instant(22)));
+    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE));
   }
 
   @Test
@@ -185,25 +176,19 @@ public void testMergeUntilFires() throws Exception {
         .thenReturn(TriggerResult.CONTINUE);
     when(mockUntil.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),
-        TimestampedValue.of(12, new Instant(12)));
+    tester.injectElements(1, 12);
 
     when(mockActual.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.CONTINUE);
     when(mockUntil.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.FIRE_AND_FINISH);
 
-    tester.injectElements(
-        TimestampedValue.of(5, new Instant(5)));
+    tester.injectElements(5);
+    tester.mergeWindows();
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
+    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
     // the until fired during the merge
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
-
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(1), new Instant(22)));
   }
 
   @Test
@@ -233,85 +218,63 @@ public void testFireDeadline() throws Exception {
   @Test
   public void testOrFinallyRealTriggersFixedWindow() throws Exception {
     // Test an orFinally with a composite trigger, and make sure it properly resets state, etc.
-    tester = ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(50)),
-        Repeatedly.<IntervalWindow>forever(
-            // This element count should never fire because the orFinally fires sooner, every time
-            AfterPane.<IntervalWindow>elementCountAtLeast(12)
-                .orFinally(AfterAll.<IntervalWindow>of(
-                    AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
-                        .plusDelayOf(Duration.millis(5)),
-                    AfterPane.<IntervalWindow>elementCountAtLeast(5)))),
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+    tester = TriggerTester.forTrigger(Repeatedly.<IntervalWindow>forever(
+        // This element count should never fire because the orFinally fires sooner, every time
+        AfterPane.<IntervalWindow>elementCountAtLeast(12)
+            .orFinally(AfterAll.<IntervalWindow>of(
+                AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
+                    .plusDelayOf(Duration.millis(5)),
+                AfterPane.<IntervalWindow>elementCountAtLeast(5)))),
+        FixedWindows.of(Duration.millis(50)));
 
     // First, fire processing time then the 5 element
 
     tester.advanceProcessingTime(new Instant(0));
-    tester.injectElements(
-        TimestampedValue.of(0, new Instant(0)),
-        TimestampedValue.of(1, new Instant(0)),
-        TimestampedValue.of(2, new Instant(1)),
-        TimestampedValue.of(3, new Instant(1)));
+    tester.injectElements(0, 0, 1, 1);
     tester.advanceProcessingTime(new Instant(6));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
-
-    tester.injectElements(
-        TimestampedValue.of(4, new Instant(1)));
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(0, 1, 2, 3, 4), 0, 0, 50)));
+    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
 
-    tester.assertHasOnlyGlobalAndPaneInfoFor(new IntervalWindow(new Instant(0), new Instant(50)));
+    tester.injectElements(1);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
 
     // Then fire 6 new elements, then processing time
-    tester.injectElements(
-        TimestampedValue.of(6, new Instant(2)),
-        TimestampedValue.of(7, new Instant(3)),
-        TimestampedValue.of(8, new Instant(4)),
-        TimestampedValue.of(9, new Instant(5)),
-        TimestampedValue.of(10, new Instant(2)),
-        TimestampedValue.of(11, new Instant(3)));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    tester.clearResultSequence();
+    tester.injectElements(2, 3, 4, 5, 2, 3);
+    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
+    tester.clearResultSequence();
     tester.advanceProcessingTime(new Instant(15));
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(6, 7, 8, 9, 10, 11), 2, 0, 50)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
 
     // Finally, fire 3 more elements and verify the base of the orFinally doesn't fire.
-    TimestampedValue.of(100, new Instant(1));
-    TimestampedValue.of(101, new Instant(1));
-    TimestampedValue.of(102, new Instant(1));
-    assertThat(tester.extractOutput(), Matchers.emptyIterable());
+    tester.clearResultSequence();
+    tester.injectElements(1, 1, 1);
+    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
   }
 
   @Test
   public void testOrFinallyMergingWindowSomeFinished() throws Exception {
     Duration windowDuration = Duration.millis(10);
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        Sessions.withGapDuration(windowDuration),
+    tester = TriggerTester.forTrigger(
         AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
             .plusDelayOf(Duration.millis(5))
             .orFinally(AfterPane.<IntervalWindow>elementCountAtLeast(5)),
-        AccumulationMode.ACCUMULATING_FIRED_PANES,
-        Duration.millis(100));
+        Sessions.withGapDuration(windowDuration));
 
     tester.advanceProcessingTime(new Instant(10));
     tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),  // in [1, 11), timer for 15
-        TimestampedValue.of(2, new Instant(1)),  // in [1, 11) count = 1
-        TimestampedValue.of(3, new Instant(2))); // in [2, 12), timer for 16
+        1,  // in [1, 11), timer for 15
+        1,  // in [1, 11) count = 1
+        2); // in [2, 12), timer for 16
 
     // Enough data comes in for 2 that combined, we should fire
-    tester.injectElements(
-        TimestampedValue.of(4, new Instant(2)),
-        TimestampedValue.of(5, new Instant(2)));
+    tester.injectElements(2, 2);
+    tester.mergeWindows();
 
     // This fires, because the earliest element in [1, 12) arrived at time 10
-    assertThat(tester.extractOutput(), Matchers.contains(WindowMatchers.isSingleWindowedValue(
-        Matchers.containsInAnyOrder(1, 2, 3, 4, 5), 1, 1, 12)));
+    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
 
     assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
-    tester.assertHasOnlyGlobalAndFinishedSetsFor(
-        new IntervalWindow(new Instant(1), new Instant(12)));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index cd25b2f82009c..2888f134b5359 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -16,7 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
+import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
@@ -24,12 +24,10 @@
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.ReduceFnTester;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.cloud.dataflow.sdk.util.TriggerTester;
+import com.google.cloud.dataflow.sdk.util.TriggerTester.SimpleTriggerTester;
 
-import org.hamcrest.Matchers;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Test;
@@ -46,16 +44,14 @@
 public class RepeatedlyTest {
   @Mock private Trigger<IntervalWindow> mockRepeated;
 
-  private ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester;
+  private SimpleTriggerTester<IntervalWindow> tester;
   private IntervalWindow firstWindow;
 
   public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
     MockitoAnnotations.initMocks(this);
     Trigger<IntervalWindow> underTest = Repeatedly.forever(mockRepeated);
-    tester = ReduceFnTester.nonCombining(
-        windowFn, underTest,
-        AccumulationMode.DISCARDING_FIRED_PANES,
-        Duration.millis(100));
+    tester = TriggerTester.forTrigger(
+        underTest, windowFn);
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
@@ -67,7 +63,7 @@ private void injectElement(int element, TriggerResult result1)
           .thenReturn(result1);
     }
 
-    tester.injectElements(TimestampedValue.of(element, new Instant(element)));
+    tester.injectElements(element);
   }
 
   @Test
@@ -75,13 +71,17 @@ public void testOnElement() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
 
     injectElement(1, TriggerResult.CONTINUE);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
+
     injectElement(2, TriggerResult.FIRE);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
+
     injectElement(3, TriggerResult.FIRE_AND_FINISH);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
+
     injectElement(4, TriggerResult.CONTINUE);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
     assertFalse(tester.isMarkedFinished(firstWindow));
   }
 
@@ -94,33 +94,33 @@ public void testOnElementTimerFires() throws Exception {
     when(mockRepeated.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE);
     tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
 
     injectElement(2, TriggerResult.CONTINUE);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
 
     when(mockRepeated.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE_AND_FINISH);
     tester.fireTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
 
     injectElement(3, TriggerResult.CONTINUE);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
 
     when(mockRepeated.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
     tester.fireTimer(firstWindow, new Instant(13), TimeDomain.EVENT_TIME);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
 
     injectElement(4, TriggerResult.CONTINUE);
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
 
     when(mockRepeated.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
         .thenReturn(TriggerResult.FIRE);
     tester.fireTimer(firstWindow, new Instant(14), TimeDomain.EVENT_TIME);
 
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1), 1, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(2), 2, 0, 10),
-        isSingleWindowedValue(Matchers.containsInAnyOrder(3, 4), 3, 0, 10)));
+    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
     assertFalse(tester.isMarkedFinished(firstWindow));
-
-    tester.assertHasOnlyGlobalAndPaneInfoFor(
-        new IntervalWindow(new Instant(0), new Instant(10)));
   }
 
   @Test
@@ -129,20 +129,15 @@ public void testMerge() throws Exception {
 
     when(mockRepeated.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
         .thenReturn(TriggerResult.CONTINUE);
-    tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)),
-        TimestampedValue.of(5, new Instant(12)));
+    tester.injectElements(1, 5);
 
     when(mockRepeated.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
         .thenReturn(MergeResult.FIRE_AND_FINISH);
-    tester.injectElements(
-        TimestampedValue.of(12, new Instant(5)));
-
-    assertThat(tester.extractOutput(), Matchers.contains(
-        isSingleWindowedValue(Matchers.containsInAnyOrder(1, 5, 12), 1, 1, 22)));
-    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
-    tester.assertHasOnlyGlobalAndPaneInfoFor(
-        new IntervalWindow(new Instant(1), new Instant(22)));
+
+    tester.mergeWindows();
+
+    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE));
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(16))));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
new file mode 100644
index 0000000000000..dc5836b3a3c33
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -0,0 +1,598 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.transforms.windowing.TriggerBuilder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.ActiveWindowSet.MergeCallback;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
+import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals;
+import com.google.cloud.dataflow.sdk.util.state.State;
+import com.google.cloud.dataflow.sdk.util.state.StateInternals;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces.WindowAndTriggerNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces.WindowNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.common.base.MoreObjects;
+import com.google.common.base.Throwables;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
+import org.joda.time.Instant;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Set;
+
+import javax.annotation.Nullable;
+
+/**
+ * Test utility that runs a {@link Trigger}, using in-memory stub implementation to provide
+ * the {@link StateInternals}.
+ *
+ * @param <W> The type of windows being used.
+ */
+public class TriggerTester<InputT, W extends BoundedWindow> {
+
+  /**
+   * A {@link TriggerTester} specialized to {@link Integer} values, so elements and timestamps
+   * can be conflated. Today, triggers should not observed the element type, so this is the
+   * only trigger tester that needs to be used.
+   */
+  public static class SimpleTriggerTester<W extends BoundedWindow>
+      extends TriggerTester<Integer, W> {
+
+    private SimpleTriggerTester(WindowingStrategy<?, W> wildcardStrategy) throws Exception {
+      super(wildcardStrategy);
+    }
+
+    public void injectElements(int... values) throws Exception {
+      List<TimestampedValue<Integer>> timestampedValues =
+          Lists.newArrayListWithCapacity(values.length);
+      for (int value : values) {
+        timestampedValues.add(TimestampedValue.of(value, new Instant(value)));
+      }
+      injectElements(timestampedValues);
+    }
+  }
+
+  private final TestInMemoryStateInternals stateInternals = new TestInMemoryStateInternals();
+  private final TestTimerInternals timerInternals = new TestTimerInternals();
+  private final TriggerContextFactory<W> contextFactory;
+
+  private final WindowFn<Object, W> windowFn;
+  private final ActiveWindowSet<W> activeWindows;
+  private final List<Trigger.TriggerResult> resultSequence;
+  private Trigger.TriggerResult latestResult;
+  private Trigger.MergeResult latestMergeResult;
+
+  /**
+   * An {@link ExecutableTrigger} built from the {@link Trigger} or {@link TriggerBuilder}
+   * under test.
+   */
+  private final ExecutableTrigger<W> executableTrigger;
+
+  /**
+   * A map from a window and trigger to whether that trigger is finished for the window.
+   */
+  private final Map<W, BitSet> finishedSets;
+
+  public static <W extends BoundedWindow> SimpleTriggerTester<W> forTrigger(
+      TriggerBuilder<W> trigger, WindowFn<?, W> windowFn) throws Exception {
+    WindowingStrategy<?, W> strategy =
+        WindowingStrategy.of(windowFn).withTrigger(trigger.buildTrigger())
+        // Merging requires accumulation mode or early firings can break up a session.
+        // Not currently an issue with the tester (because we never GC) but we don't want
+        // mystery failures due to violating this need.
+        .withMode(windowFn.isNonMerging()
+            ? AccumulationMode.DISCARDING_FIRED_PANES
+            : AccumulationMode.ACCUMULATING_FIRED_PANES);
+
+    return new SimpleTriggerTester<>(strategy);
+  }
+
+  public static <InputT, W extends BoundedWindow> TriggerTester<Integer, W> forAdvancedTrigger(
+      TriggerBuilder<W> trigger, WindowFn<InputT, W> windowFn) throws Exception {
+    WindowingStrategy<?, W> strategy =
+        WindowingStrategy.of(windowFn).withTrigger(trigger.buildTrigger())
+        // Merging requires accumulation mode or early firings can break up a session.
+        // Not currently an issue with the tester (because we never GC) but we don't want
+        // mystery failures due to violating this need.
+        .withMode(windowFn.isNonMerging()
+            ? AccumulationMode.DISCARDING_FIRED_PANES
+            : AccumulationMode.ACCUMULATING_FIRED_PANES);
+
+    return new TriggerTester<>(strategy);
+  }
+
+  protected TriggerTester(WindowingStrategy<?, W> wildcardStrategy) throws Exception {
+    @SuppressWarnings("unchecked")
+    WindowingStrategy<Object, W> objectStrategy = (WindowingStrategy<Object, W>) wildcardStrategy;
+
+    this.windowFn = objectStrategy.getWindowFn();
+    this.executableTrigger = wildcardStrategy.getTrigger();
+    this.resultSequence = new ArrayList<>();
+    this.finishedSets = new HashMap<>();
+
+    this.activeWindows =
+        windowFn.isNonMerging()
+            ? new NonMergingActiveWindowSet<W>()
+            : new MergingActiveWindowSet<W>(windowFn, stateInternals);
+
+    this.contextFactory =
+        new TriggerContextFactory<>(objectStrategy, stateInternals, activeWindows);
+  }
+
+  /**
+   * Returns the most recent {@link TriggerResult} from any invocation of the
+   * {@link Trigger#onElement} or {@link Trigger#onTimer} methods
+   * of the trigger under test.
+   *
+   * <p>Note that this is not window-aware, but will return the most recent
+   * for any window. Tests should mostly be able to check
+   * the latest result at an opportune moment.
+   */
+  public TriggerResult getLatestResult() {
+    return latestResult;
+  }
+
+  /**
+   * Returns the most recent {@link MergeResult} from any invocation of the
+   * {@link Trigger#onMerge} of the trigger under test.
+   *
+   * <p>Note that this is not window-aware, but will return the most recent
+   * of any merge result, not for any particular result window. Tests should generally
+   * be able to check the latest merge result at an opportune moment.
+   */
+  public MergeResult getLatestMergeResult() {
+    return latestMergeResult;
+  }
+
+  public void clearLatestMergeResult() {
+    latestResult = null;
+  }
+
+  /**
+   * Returns the full sequence of returned {@link TriggerResult TriggerResults} from
+   * invocations of {@link Trigger#onElement} or {@link Trigger#onTimer} methods
+   * of the trigger under test.
+   */
+  public List<Trigger.TriggerResult> getResultSequence() {
+    return ImmutableList.copyOf(resultSequence);
+  }
+
+  /**
+   * Clears the result sequence returned by {@link #getResultSequence}.
+   */
+  public void clearResultSequence() {
+    resultSequence.clear();
+  }
+
+  /**
+   * Instructs the trigger to clear its state for the given window.
+   */
+  public void clearState(W window) throws Exception {
+    executableTrigger.invokeClear(contextFactory.base(window,
+        new TestTimers(windowNamespace(window)), executableTrigger, getFinishedSet(window)));
+  }
+
+  /**
+   * Asserts that the trigger has actually cleared all of its state for the given window. Since
+   * the trigger under test is the root, this makes the assert for all triggers regardless
+   * of their position in the trigger tree.
+   */
+  public void assertCleared(W window) {
+    for (StateNamespace untypedNamespace : stateInternals.getNamespacesInUse()) {
+      if (untypedNamespace instanceof WindowAndTriggerNamespace) {
+        @SuppressWarnings("unchecked")
+        WindowAndTriggerNamespace<W> namespace = (WindowAndTriggerNamespace<W>) untypedNamespace;
+        if (namespace.getWindow().equals(window)) {
+          Set<StateTag<?>> tagsInUse = stateInternals.getTagsInUse(namespace);
+          assertTrue("Trigger has not cleared tags: " + tagsInUse, tagsInUse.isEmpty());
+        }
+      }
+    }
+  }
+
+  /**
+   * Returns {@code true} if the {@link Trigger} under test is finished for the given window.
+   */
+  public boolean isMarkedFinished(W window) {
+    BitSet finishedSet = finishedSets.get(window);
+    if (finishedSet == null) {
+      return false;
+    }
+
+    return finishedSet.get(executableTrigger.getTriggerIndex());
+  }
+
+  private StateNamespace windowNamespace(W window) {
+    return StateNamespaces.window(windowFn.windowCoder(), checkNotNull(window));
+  }
+
+  /**
+   * Advance the input watermark to the specified time, firing any timers that should
+   * fire. Then advance the output watermark as far as possible.
+   */
+  public void advanceInputWatermark(Instant newInputWatermark) throws Exception {
+    timerInternals.advanceInputWatermark(newInputWatermark);
+  }
+
+  /** Advance the processing time to the specified time, firing any timers that should fire. */
+  public void advanceProcessingTime(Instant newProcessingTime) throws Exception {
+    timerInternals.advanceProcessingTime(newProcessingTime);
+  }
+
+  /**
+   * Inject all the timestamped values (after passing through the window function) as if they
+   * arrived in a single chunk of a bundle (or work-unit).
+   */
+  @SafeVarargs
+  public final void injectElements(TimestampedValue<InputT>... values) throws Exception {
+    injectElements(Arrays.asList(values));
+  }
+
+  public final void injectElements(Collection<TimestampedValue<InputT>> values) throws Exception {
+    for (TimestampedValue<InputT> value : values) {
+      WindowTracing.trace("TriggerTester.injectElements: {}", value);
+    }
+
+    List<WindowedValue<InputT>> windowedValues = Lists.newArrayListWithCapacity(values.size());
+
+    for (TimestampedValue<InputT> input : values) {
+      try {
+        InputT value = input.getValue();
+        Instant timestamp = input.getTimestamp();
+        Collection<W> assignedWindows = windowFn.assignWindows(new TestAssignContext<W>(
+            windowFn, value, timestamp, Arrays.asList(GlobalWindow.INSTANCE)));
+
+        for (W window : assignedWindows) {
+          activeWindows.addActive(window);
+
+          // Today, triggers assume onTimer firing at the watermark time, whether or not they
+          // explicitly set the timer themselves. So this tester must set it.
+          timerInternals.setTimer(
+              TimerData.of(windowNamespace(window), window.maxTimestamp(), TimeDomain.EVENT_TIME));
+        }
+
+        windowedValues.add(WindowedValue.of(value, timestamp, assignedWindows, PaneInfo.NO_FIRING));
+      } catch (Exception e) {
+        throw Throwables.propagate(e);
+      }
+    }
+
+    for (WindowedValue<InputT> windowedValue : windowedValues) {
+      for (BoundedWindow untypedWindow : windowedValue.getWindows()) {
+        // SDK is responsible for type safety
+        @SuppressWarnings("unchecked")
+        W window = activeWindows.representative((W) untypedWindow);
+
+        Trigger<W>.OnElementContext context = contextFactory.createOnElementContext(window,
+            new TestTimers(windowNamespace(window)), windowedValue.getTimestamp(),
+            executableTrigger, getFinishedSet(window));
+
+        if (!context.trigger().isFinished()) {
+          latestResult = executableTrigger.invokeElement(context);
+          resultSequence.add(latestResult);
+          if (latestResult.isFinish()) {
+            context.trigger().setFinished(true);
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Invokes merge from the {@link WindowFn} a single time and passes the resulting merge
+   * events on to the trigger under test. Does not persist the fact that merging happened,
+   * since it is just to test the trigger's {@code OnMerge} method.
+   */
+  public final void mergeWindows() throws Exception {
+    final Map<W, Collection<W>> windowToComponents = new HashMap<>();
+
+    activeWindows.merge(new MergeCallback<W>() {
+      @Override
+      public void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W mergeResult)
+          throws Exception {
+        windowToComponents.put(mergeResult, toBeMerged);
+        timerInternals.setTimer(TimerData.of(
+            windowNamespace(mergeResult), mergeResult.maxTimestamp(), TimeDomain.EVENT_TIME));
+      }
+    });
+
+    for (Map.Entry<W, Collection<W>> merged : windowToComponents.entrySet()) {
+      W window = merged.getKey();
+      Collection<W> oldWindows = merged.getValue();
+      latestMergeResult = executableTrigger.invokeMerge(
+          contextFactory.createOnMergeContext(window, new TestTimers(windowNamespace(window)),
+              oldWindows, executableTrigger, getFinishedSet(window), finishedSets));
+    }
+  }
+
+  private BitSet getFinishedSet(W window) {
+    BitSet finishedSet = finishedSets.get(window);
+    if (finishedSet == null) {
+      finishedSet = new BitSet();
+      finishedSets.put(window, finishedSet);
+    }
+    return finishedSet;
+  }
+
+  public void fireTimer(W window, Instant timestamp, TimeDomain domain) throws Exception {
+    Trigger<W>.OnTimerContext context =
+        contextFactory.createOnTimerContext(window, new TestTimers(windowNamespace(window)),
+            executableTrigger, getFinishedSet(window), timestamp, domain);
+    latestResult = executableTrigger.invokeTimer(context);
+    resultSequence.add(latestResult);
+    if (latestResult.isFinish()) {
+      context.trigger().setFinished(true);
+    }
+  }
+
+  /**
+   * Simulate state.
+   */
+  private static class TestInMemoryStateInternals extends InMemoryStateInternals {
+
+    public Set<StateTag<?>> getTagsInUse(StateNamespace namespace) {
+      Set<StateTag<?>> inUse = new HashSet<>();
+      for (Map.Entry<StateTag<?>, State> entry : inMemoryState.getTagsInUse(namespace).entrySet()) {
+        if (!isEmptyForTesting(entry.getValue())) {
+          inUse.add(entry.getKey());
+        }
+      }
+      return inUse;
+    }
+
+    public Set<StateNamespace> getNamespacesInUse() {
+      return inMemoryState.getNamespacesInUse();
+    }
+
+    /** Return the earliest output watermark hold in state, or null if none. */
+    public Instant earliestWatermarkHold() {
+      Instant minimum = null;
+      for (State storage : inMemoryState.values()) {
+        if (storage instanceof WatermarkStateInternal) {
+          Instant hold = ((WatermarkStateInternal) storage).get().read();
+          if (minimum == null || (hold != null && hold.isBefore(minimum))) {
+            minimum = hold;
+          }
+        }
+      }
+      return minimum;
+    }
+  }
+
+  private static class TestAssignContext<W extends BoundedWindow>
+      extends WindowFn<Object, W>.AssignContext {
+    private Object element;
+    private Instant timestamp;
+    private Collection<? extends BoundedWindow> windows;
+
+    public TestAssignContext(WindowFn<Object, W> windowFn, Object element, Instant timestamp,
+        Collection<? extends BoundedWindow> windows) {
+      windowFn.super();
+      this.element = element;
+      this.timestamp = timestamp;
+      this.windows = windows;
+    }
+
+    @Override
+    public Object element() {
+      return element;
+    }
+
+    @Override
+    public Instant timestamp() {
+      return timestamp;
+    }
+
+    @Override
+    public Collection<? extends BoundedWindow> windows() {
+      return windows;
+    }
+  }
+
+  /**
+   * Simulate the firing of timers and progression of input and output watermarks for a
+   * single computation and key in a Windmill-like streaming environment. Similar to
+   * {@link BatchTimerInternals}, but also tracks the output watermark.
+   */
+  private class TestTimerInternals implements TimerInternals {
+    /** At most one timer per timestamp is kept. */
+    private Set<TimerData> existingTimers = new HashSet<>();
+
+    /** Pending input watermark timers, in timestamp order. */
+    private PriorityQueue<TimerData> watermarkTimers = new PriorityQueue<>(11);
+
+    /** Pending processing time timers, in timestamp order. */
+    private PriorityQueue<TimerData> processingTimers = new PriorityQueue<>(11);
+
+    /** Current input watermark. */
+    @Nullable
+    private Instant inputWatermarkTime = null;
+
+    /** Current output watermark. */
+    @Nullable
+    private Instant outputWatermarkTime = null;
+
+    /** Current processing time. */
+    private Instant processingTime = BoundedWindow.TIMESTAMP_MIN_VALUE;
+
+    private PriorityQueue<TimerData> queue(TimeDomain domain) {
+      return TimeDomain.EVENT_TIME.equals(domain) ? watermarkTimers : processingTimers;
+    }
+
+    @Override
+    public void setTimer(TimerData timer) {
+      WindowTracing.trace("TestTimerInternals.setTimer: {}", timer);
+      if (existingTimers.add(timer)) {
+        queue(timer.getDomain()).add(timer);
+      }
+    }
+
+    @Override
+    public void deleteTimer(TimerData timer) {
+      WindowTracing.trace("TestTimerInternals.deleteTimer: {}", timer);
+      existingTimers.remove(timer);
+      queue(timer.getDomain()).remove(timer);
+    }
+
+    @Override
+    public Instant currentProcessingTime() {
+      return processingTime;
+    }
+
+    @Override
+    @Nullable
+    public Instant currentInputWatermarkTime() {
+      return inputWatermarkTime;
+    }
+
+    @Override
+    @Nullable
+    public Instant currentOutputWatermarkTime() {
+      return outputWatermarkTime;
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(getClass())
+          .add("watermarkTimers", watermarkTimers)
+          .add("processingTimers", processingTime)
+          .add("inputWatermarkTime", inputWatermarkTime)
+          .add("outputWatermarkTime", outputWatermarkTime)
+          .add("processingTime", processingTime)
+          .toString();
+    }
+
+    public void advanceInputWatermark(Instant newInputWatermark) throws Exception {
+      checkNotNull(newInputWatermark);
+      checkState(inputWatermarkTime == null || !newInputWatermark.isBefore(inputWatermarkTime),
+          "Cannot move input watermark time backwards from %s to %s", inputWatermarkTime,
+          newInputWatermark);
+      WindowTracing.trace("TestTimerInternals.advanceInputWatermark: from {} to {}",
+          inputWatermarkTime, newInputWatermark);
+      inputWatermarkTime = newInputWatermark;
+      advanceAndFire(newInputWatermark, TimeDomain.EVENT_TIME);
+
+      Instant hold = stateInternals.earliestWatermarkHold();
+      if (hold == null) {
+        WindowTracing.trace("TestTimerInternals.advanceInputWatermark: no holds, "
+            + "so output watermark = input watermark");
+        hold = inputWatermarkTime;
+      }
+      advanceOutputWatermark(hold);
+    }
+
+    private void advanceOutputWatermark(Instant newOutputWatermark) throws Exception {
+      checkNotNull(newOutputWatermark);
+      checkNotNull(inputWatermarkTime);
+      if (newOutputWatermark.isAfter(inputWatermarkTime)) {
+        WindowTracing.trace(
+            "TestTimerInternals.advanceOutputWatermark: clipping output watermark from {} to {}",
+            newOutputWatermark, inputWatermarkTime);
+        newOutputWatermark = inputWatermarkTime;
+      }
+      checkState(outputWatermarkTime == null || !newOutputWatermark.isBefore(outputWatermarkTime),
+          "Cannot move output watermark time backwards from %s to %s", outputWatermarkTime,
+          newOutputWatermark);
+      WindowTracing.trace("TestTimerInternals.advanceOutputWatermark: from {} to {}",
+          outputWatermarkTime, newOutputWatermark);
+      outputWatermarkTime = newOutputWatermark;
+    }
+
+    public void advanceProcessingTime(Instant newProcessingTime) throws Exception {
+      checkState(!newProcessingTime.isBefore(processingTime),
+          "Cannot move processing time backwards from %s to %s", processingTime, newProcessingTime);
+      WindowTracing.trace("TestTimerInternals.advanceProcessingTime: from {} to {}", processingTime,
+          newProcessingTime);
+      processingTime = newProcessingTime;
+      advanceAndFire(newProcessingTime, TimeDomain.PROCESSING_TIME);
+    }
+
+    private void advanceAndFire(Instant currentTime, TimeDomain domain) throws Exception {
+      PriorityQueue<TimerData> queue = queue(domain);
+
+      TimerData nextTimer = queue.peek();
+      while (nextTimer != null && currentTime.isAfter(nextTimer.getTimestamp())) {
+        // Timers fire when the current time progresses past the timer time.
+        WindowTracing.trace(
+            "TestTimerInternals.advanceAndFire: firing {} at {}", nextTimer, currentTime);
+        // Remove before firing, so that if the trigger adds another identical
+        // timer we don't remove it.
+        queue.remove();
+
+        @SuppressWarnings("unchecked")
+        WindowNamespace<W> windowNamespace = (WindowNamespace<W>) nextTimer.getNamespace();
+        W window = windowNamespace.getWindow();
+
+        if (activeWindows.isActive(window)) {
+          fireTimer(window, nextTimer.getTimestamp(), nextTimer.getDomain());
+        }
+
+        nextTimer = queue.peek();
+      }
+    }
+  }
+
+  private class TestTimers implements ReduceFn.Timers {
+    private final StateNamespace namespace;
+
+    public TestTimers(StateNamespace namespace) {
+      checkArgument(namespace instanceof WindowNamespace);
+      this.namespace = namespace;
+    }
+
+    @Override
+    public void setTimer(Instant timestamp, TimeDomain timeDomain) {
+      timerInternals.setTimer(TimerData.of(namespace, timestamp, timeDomain));
+    }
+
+    @Override
+    public void deleteTimer(Instant timestamp, TimeDomain timeDomain) {
+      timerInternals.deleteTimer(TimerData.of(namespace, timestamp, timeDomain));
+    }
+
+    @Override
+    public Instant currentProcessingTime() {
+      return timerInternals.currentProcessingTime();
+    }
+  }
+}

From f341a6f1e123a6520bb7c6e0dcfa1fcea949891d Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 13 Jan 2016 10:35:45 -0800
Subject: [PATCH 1293/1541] DatastoreIO: support user-provided limits on number
 of results

If the user sets the limit on the provided Query, then Dataflow
will honor that limit. Using this feature prevents reading from Cloud
Datastore in parallel, and instead all data is read from a single worker.

----Release Notes----
DatastoreIO.Source now supports a limit on the number of results returned.
However, when this limit is set the read from Cloud Datastore is performed
by a single worker rather than executing in parallel across a cluster.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112066221
---
 .../cloud/dataflow/sdk/io/DatastoreIO.java    |  55 ++++++-
 .../dataflow/sdk/io/DatastoreIOTest.java      | 144 +++++++++++++++++-
 2 files changed, 192 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 353cacac67d21..9b784d0bcdec9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -23,7 +23,9 @@
 import static com.google.api.services.datastore.client.DatastoreHelper.makeFilter;
 import static com.google.api.services.datastore.client.DatastoreHelper.makeOrder;
 import static com.google.api.services.datastore.client.DatastoreHelper.makeValue;
+import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Verify.verify;
 
 import com.google.api.client.auth.oauth2.Credential;
 import com.google.api.client.util.BackOff;
@@ -124,6 +126,11 @@
  * p.run();
  * } </pre>
  *
+ * <p><b>Note:</b> Normally, a Cloud Dataflow job will read from Cloud Datastore in parallel across
+ * many workers. However, when the {@link Query} is configured with a limit using
+ * {@link com.google.api.services.datastore.DatastoreV1.Query.Builder#setLimit(int)}, then
+ * all returned results will be read by a single Dataflow worker in order to ensure correct data.
+ *
  * <p>To write a {@link PCollection} to a Datastore, use {@link DatastoreIO#writeTo},
  * specifying the datastore to write to:
  *
@@ -248,8 +255,20 @@ public Source withDataset(String datasetId) {
       return new Source(host, datasetId, query, namespace);
     }
 
+    /**
+     * Returns a new {@link Source} that reads the results of the specified query.
+     *
+     * <p>Does not modify this object.
+     *
+     * <p><b>Note:</b> Normally, a Cloud Dataflow job will read from Cloud Datastore in parallel
+     * across many workers. However, when the {@link Query} is configured with a limit using
+     * {@link com.google.api.services.datastore.DatastoreV1.Query.Builder#setLimit(int)}, then all
+     * returned results will be read by a single Dataflow worker in order to ensure correct data.
+     */
     public Source withQuery(Query query) {
       checkNotNull(query, "query");
+      checkArgument(!query.hasLimit() || query.getLimit() > 0,
+          "Invalid query limit %s: must be positive", query.getLimit());
       return new Source(host, datasetId, query, namespace);
     }
 
@@ -276,6 +295,12 @@ public boolean producesSortedKeys(PipelineOptions options) {
     @Override
     public List<Source> splitIntoBundles(long desiredBundleSizeBytes, PipelineOptions options)
         throws Exception {
+      // Users may request a limit on the number of results. We can currently support this by
+      // simply disabling parallel reads and using only a single split.
+      if (query.hasLimit()) {
+        return ImmutableList.of(this);
+      }
+
       long numSplits;
       try {
         numSplits = Math.round(((double) getEstimatedSizeBytes(options)) / desiredBundleSizeBytes);
@@ -805,7 +830,14 @@ public static class DatastoreReader extends BoundedSource.BoundedReader<Entity>
      * <p>Must be set, or it may result in an I/O error when querying
      * Cloud Datastore.
      */
-    private static final int QUERY_LIMIT = 500;
+    private static final int QUERY_BATCH_LIMIT = 500;
+
+    /**
+     * Remaining user-requested limit on the number of sources to return. If the user did not set a
+     * limit, then this variable will always have the value {@link Integer#MAX_VALUE} and will never
+     * be decremented.
+     */
+    private int userLimit;
 
     private Entity currentEntity;
 
@@ -817,6 +849,8 @@ public static class DatastoreReader extends BoundedSource.BoundedReader<Entity>
     public DatastoreReader(Source source, Datastore datastore) {
       this.source = source;
       this.datastore = datastore;
+      // If the user set a limit on the query, remember it. Otherwise pin to MAX_VALUE.
+      userLimit = source.query.hasLimit() ? source.query.getLimit() : Integer.MAX_VALUE;
     }
 
     @Override
@@ -875,10 +909,9 @@ public Double getFractionConsumed() {
      * and updates the cursor to get the next batch as needed.
      * Query has specified limit and offset from InputSplit.
      */
-    private Iterator<EntityResult> getIteratorAndMoveCursor()
-        throws DatastoreException {
-      Query.Builder query = this.source.query.toBuilder().clone();
-      query.setLimit(QUERY_LIMIT);
+    private Iterator<EntityResult> getIteratorAndMoveCursor() throws DatastoreException {
+      Query.Builder query = source.query.toBuilder().clone();
+      query.setLimit(Math.min(userLimit, QUERY_BATCH_LIMIT));
       if (currentBatch != null && currentBatch.hasEndCursor()) {
         query.setStartCursor(currentBatch.getEndCursor());
       }
@@ -892,7 +925,17 @@ private Iterator<EntityResult> getIteratorAndMoveCursor()
       // https://groups.google.com/forum/#!topic/gcd-discuss/iNs6M1jA2Vw, so
       // use result count to determine if more results might exist.
       int numFetch = currentBatch.getEntityResultCount();
-      moreResults = (numFetch == QUERY_LIMIT) || (currentBatch.getMoreResults() == NOT_FINISHED);
+      if (source.query.hasLimit()) {
+        verify(userLimit >= numFetch,
+            "Expected userLimit %s >= numFetch %s, because query limit %s should be <= userLimit",
+            userLimit, numFetch, query.getLimit());
+        userLimit -= numFetch;
+      }
+      moreResults =
+          // User-limit does not exist (so userLimit == MAX_VALUE) and/or has not been satisfied.
+          (userLimit > 0)
+          // All indications from the API are that there are/may be more results.
+          && ((numFetch == QUERY_BATCH_LIMIT) || (currentBatch.getMoreResults() == NOT_FINISHED));
 
       // May receive a batch of 0 results if the number of records is a multiple
       // of the request limit.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
index abc145d42c1c2..96e29b0ae6b27 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
@@ -16,7 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
+import static com.google.api.services.datastore.client.DatastoreHelper.makeKey;
 import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
+import static org.hamcrest.Matchers.lessThanOrEqualTo;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
@@ -31,15 +34,20 @@
 import static org.mockito.Mockito.when;
 
 import com.google.api.services.datastore.DatastoreV1.Entity;
+import com.google.api.services.datastore.DatastoreV1.EntityResult;
 import com.google.api.services.datastore.DatastoreV1.Key;
 import com.google.api.services.datastore.DatastoreV1.KindExpression;
 import com.google.api.services.datastore.DatastoreV1.PartitionId;
 import com.google.api.services.datastore.DatastoreV1.PropertyFilter;
 import com.google.api.services.datastore.DatastoreV1.Query;
+import com.google.api.services.datastore.DatastoreV1.QueryResultBatch;
+import com.google.api.services.datastore.DatastoreV1.RunQueryRequest;
+import com.google.api.services.datastore.DatastoreV1.RunQueryResponse;
 import com.google.api.services.datastore.DatastoreV1.Value;
 import com.google.api.services.datastore.client.Datastore;
 import com.google.api.services.datastore.client.DatastoreHelper;
 import com.google.api.services.datastore.client.QuerySplitter;
+import com.google.cloud.dataflow.sdk.io.DatastoreIO.DatastoreReader;
 import com.google.cloud.dataflow.sdk.io.DatastoreIO.DatastoreWriter;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
@@ -55,6 +63,8 @@
 import org.junit.runners.JUnit4;
 import org.mockito.Mock;
 import org.mockito.MockitoAnnotations;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -65,7 +75,7 @@
 import javax.annotation.Nullable;
 
 /**
- * Tests for DatastoreIO Read and Write transforms.
+ * Tests for {@link DatastoreIO}.
  */
 @RunWith(JUnit4.class)
 public class DatastoreIOTest {
@@ -154,6 +164,24 @@ public void testSourceValidationFailsQuery() throws Exception {
     source.validate();
   }
 
+  @Test
+  public void testSourceValidationFailsQueryLimitZero() throws Exception {
+    Query invalidLimit = Query.newBuilder().setLimit(0).build();
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Invalid query limit 0");
+
+    DatastoreIO.source().withQuery(invalidLimit);
+  }
+
+  @Test
+  public void testSourceValidationFailsQueryLimitNegative() throws Exception {
+    Query invalidLimit = Query.newBuilder().setLimit(-5).build();
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Invalid query limit -5");
+
+    DatastoreIO.source().withQuery(invalidLimit);
+  }
+
   @Test
   public void testSourceValidationSucceedsNamespace() throws Exception {
     DatastoreIO.Source source = DatastoreIO.source().withDataset(DATASET).withQuery(QUERY);
@@ -277,6 +305,31 @@ public void testQuerySplitWithZeroSize() throws Exception {
     assertFalse(bundleQuery.hasFilter());
   }
 
+  /**
+   * Tests that a query with a user-provided limit field does not split, and does not even
+   * interact with a query splitter.
+   */
+  @Test
+  public void testQueryDoesNotSplitWithLimitSet() throws Exception {
+    // Minimal query with a limit
+    Query query = Query.newBuilder().setLimit(5).build();
+
+    // Mock query splitter, should not be invoked.
+    QuerySplitter splitter = mock(QuerySplitter.class);
+    when(splitter.getSplits(any(Query.class), any(PartitionId.class), eq(2), any(Datastore.class)))
+        .thenThrow(new AssertionError("Splitter should not be invoked"));
+
+    List<DatastoreIO.Source> bundles =
+        initialSource
+            .withQuery(query)
+            .withMockSplitter(splitter)
+            .splitIntoBundles(1024, testPipelineOptions(null));
+
+    assertEquals(1, bundles.size());
+    assertEquals(query, bundles.get(0).getQuery());
+    verifyNoMoreInteractions(splitter);
+  }
+
   @Test
   public void testQuerySplitSizeUnavailable() throws Exception {
     KindExpression mykind = KindExpression.newBuilder().setName("mykind").build();
@@ -451,4 +504,93 @@ public void testAddingEntities() throws Exception {
     assertEquals(expected.size(), writer.entities.size());
     assertThat(writer.entities, containsInAnyOrder(expected.toArray()));
   }
+
+  /** Datastore batch API limit in number of records per query. */
+  private static final int DATASTORE_QUERY_BATCH_LIMIT = 500;
+
+  /**
+   * A helper function that creates mock {@link Entity} results in response to a query. Always
+   * indicates that more results are available, unless the batch is limited to fewer than
+   * {@link #DATASTORE_QUERY_BATCH_LIMIT} results.
+   */
+  private static RunQueryResponse mockResponseForQuery(Query q) {
+    // Every query DatastoreIO sends should have a limit.
+    assertTrue(q.hasLimit());
+
+    // The limit should be in the range [1, DATASTORE_QUERY_BATCH_LIMIT]
+    int limit = q.getLimit();
+    assertThat(limit, greaterThanOrEqualTo(1));
+    assertThat(limit, lessThanOrEqualTo(DATASTORE_QUERY_BATCH_LIMIT));
+
+    // Create the requested number of entities.
+    List<EntityResult> entities = new ArrayList<>(limit);
+    for (int i = 0; i < limit; ++i) {
+      entities.add(
+          EntityResult.newBuilder()
+              .setEntity(Entity.newBuilder().setKey(makeKey("key" + i, i + 1)))
+              .build());
+    }
+
+    // Fill out the other parameters on the returned result batch.
+    RunQueryResponse.Builder ret = RunQueryResponse.newBuilder();
+    ret.getBatchBuilder()
+        .addAllEntityResult(entities)
+        .setEntityResultType(EntityResult.ResultType.FULL)
+        .setMoreResults(
+            limit == DATASTORE_QUERY_BATCH_LIMIT
+                ? QueryResultBatch.MoreResultsType.NOT_FINISHED
+                : QueryResultBatch.MoreResultsType.NO_MORE_RESULTS);
+
+    return ret.build();
+  }
+
+  /** Helper function to run a test reading from a limited-result query. */
+  private void runQueryLimitReadTest(int numEntities) throws Exception {
+    // An empty query to read entities.
+    Query query = Query.newBuilder().setLimit(numEntities).build();
+    DatastoreIO.Source source = DatastoreIO.source().withQuery(query).withDataset("mockDataset");
+
+    // Use mockResponseForQuery to generate results.
+    when(mockDatastore.runQuery(any(RunQueryRequest.class)))
+        .thenAnswer(
+            new Answer<RunQueryResponse>() {
+              @Override
+              public RunQueryResponse answer(InvocationOnMock invocation) throws Throwable {
+                Query q = ((RunQueryRequest) invocation.getArguments()[0]).getQuery();
+                return mockResponseForQuery(q);
+              }
+            });
+
+    // Actually instantiate the reader.
+    DatastoreReader reader = new DatastoreReader(source, mockDatastore);
+
+    // Simply count the number of results returned by the reader.
+    assertTrue(reader.start());
+    int resultCount = 1;
+    while (reader.advance()) {
+      resultCount++;
+    }
+    reader.close();
+
+    // Validate the number of results.
+    assertEquals(numEntities, resultCount);
+  }
+
+  /** Tests reading with a query limit less than one batch. */
+  @Test
+  public void testReadingWithLimitOneBatch() throws Exception {
+    runQueryLimitReadTest(5);
+  }
+
+  /** Tests reading with a query limit more than one batch, and not a multiple. */
+  @Test
+  public void testReadingWithLimitMultipleBatches() throws Exception {
+    runQueryLimitReadTest(DATASTORE_QUERY_BATCH_LIMIT + 5);
+  }
+
+  /** Tests reading several batches, using an exact multiple of batch size results. */
+  @Test
+  public void testReadingWithLimitMultipleBatchesExactMultiple() throws Exception {
+    runQueryLimitReadTest(5 * DATASTORE_QUERY_BATCH_LIMIT);
+  }
 }

From 6805ac99727971c7273a7a0f44cd68259ae4edf8 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Wed, 13 Jan 2016 12:53:18 -0800
Subject: [PATCH 1294/1541] Bound the size of commit queues in the streaming
 worker

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112080882
---
 .../worker/StreamingDataflowWorker.java       | 106 +++++++++++++++---
 1 file changed, 90 insertions(+), 16 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 121cf16de9f00..9fa20848f4fd5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -40,6 +40,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.OutputObjectAndByteCounter;
 import com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+import com.google.common.base.Function;
 import com.google.common.base.Preconditions;
 import com.google.protobuf.ByteString;
 
@@ -60,6 +61,7 @@
 import java.util.Map;
 import java.util.Queue;
 import java.util.Random;
+import java.util.Set;
 import java.util.Timer;
 import java.util.TimerTask;
 import java.util.concurrent.ConcurrentHashMap;
@@ -67,6 +69,7 @@
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.Semaphore;
 import java.util.concurrent.ThreadFactory;
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
@@ -95,6 +98,7 @@ public class StreamingDataflowWorker {
   static final int DEFAULT_STATUS_PORT = 8081;
   static final String DEFAULT_WINDMILL_SERVER_CLASS_NAME =
       "com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServer";
+  static final int MAX_COMMIT_QUEUE_BYTES = 500 << 20;  // 500MB
 
   // Maximum size of the result of a GetWork request.
   private static final long MAX_GET_WORK_FETCH_BYTES = 64L << 20; // 64m
@@ -205,14 +209,87 @@ public ReaderCacheEntry(UnboundedSource.UnboundedReader<?> reader, long token) {
     }
   }
 
+  /**
+   * Bounded set of queues, with a maximum total weight.
+   */
+  private static class KeyedWeightBoundedQueue<K, V> {
+    private final ConcurrentMap<K, ConcurrentLinkedQueue<V>> queueMap = new ConcurrentHashMap<>();
+    private final int maxWeight;
+    private final Semaphore limit;
+    private final Function<V, Integer> weigher;
+
+    public KeyedWeightBoundedQueue(int maxWeight, Function<V, Integer> weigher) {
+      this.maxWeight = maxWeight;
+      this.limit = new Semaphore(maxWeight, true);
+      this.weigher = weigher;
+    }
+
+    /**
+     * Adds a new sub-queue for the given key.
+     */
+    public void addQueue(K key) {
+      queueMap.put(key, new ConcurrentLinkedQueue<V>());
+    }
+
+    /**
+     * Adds the value to the queue for the key, blocking if this would cause the overall weight to
+     * exceed the limit.
+     */
+    public void put(K key, V value) {
+      limit.acquireUninterruptibly(Math.max(maxWeight, weigher.apply(value)));
+      Preconditions.checkNotNull(queueMap.get(key),
+          "Must create a queue by calling addQueue() before put. Missing key %s", key).add(value);
+    }
+
+    /**
+     * Return the set of keys for which there are sub-queues.
+     */
+    public Set<K> keySet() {
+      return queueMap.keySet();
+    }
+
+    /**
+     * Returns and removes the next value from the given sub-queue, or null if there is no such
+     * value.
+     */
+    @Nullable
+    public V poll(K key) {
+      V result = queueMap.get(key).poll();
+      if (result != null) {
+        limit.release(Math.max(maxWeight, weigher.apply(result)));
+      }
+      return result;
+    }
+
+    /**
+     * Returns the size of the given sub-queue.
+     */
+    public int queueSize(K key) {
+      return queueMap.get(key).size();
+    }
+
+    /**
+     * Returns the current weight of all queues.
+     */
+    public int weight() {
+      return maxWeight - limit.availablePermits();
+    }
+  }
+
   // Maps from computation ids to per-computation state.
   private final ConcurrentMap<String, MapTask> instructionMap;
-  private final ConcurrentMap<String, ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest>>
-      outputMap;
   private final ConcurrentMap<String, ConcurrentLinkedQueue<WorkerAndContext>> mapTaskExecutors;
   private final ConcurrentMap<String, ActiveWorkForComputation> activeWorkMap;
   // Per computation cache of active readers, keyed by split ID.
   private final ConcurrentMap<String, ConcurrentMap<ByteString, ReaderCacheEntry>> readerCache;
+  private final KeyedWeightBoundedQueue<String, Windmill.WorkItemCommitRequest> commitQueue =
+      new KeyedWeightBoundedQueue<>(
+          MAX_COMMIT_QUEUE_BYTES, new Function<Windmill.WorkItemCommitRequest, Integer>() {
+            @Override
+            public Integer apply(Windmill.WorkItemCommitRequest input) {
+              return input.getSerializedSize();
+            }
+          });
 
   // Map of tokens to commit callbacks.
   private ConcurrentMap<Long, Runnable> commitCallbacks;
@@ -244,7 +321,6 @@ public StreamingDataflowWorker(
       List<MapTask> mapTasks, WindmillServerStub server, DataflowWorkerHarnessOptions options) {
     this.options = options;
     this.instructionMap = new ConcurrentHashMap<>();
-    this.outputMap = new ConcurrentHashMap<>();
     this.mapTaskExecutors = new ConcurrentHashMap<>();
     this.activeWorkMap = new ConcurrentHashMap<>();
     this.readerCache = new ConcurrentHashMap<>();
@@ -376,7 +452,7 @@ private void addComputation(MapTask mapTask) {
             : mapTask.getSystemName();
     if (!instructionMap.containsKey(computationId)) {
       LOG.info("Adding config for {}: {}", computationId, mapTask);
-      outputMap.put(computationId, new ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest>());
+      commitQueue.addQueue(computationId);
       instructionMap.put(computationId, mapTask);
       mapTaskExecutors.put(computationId, new ConcurrentLinkedQueue<WorkerAndContext>());
       activeWorkMap.put(computationId, new ActiveWorkForComputation(workUnitExecutor));
@@ -580,8 +656,7 @@ public void run() {
       worker = null;
       context = null;
 
-      Windmill.WorkItemCommitRequest output = outputBuilder.build();
-      outputMap.get(computation).add(output);
+      commitQueue.put(computation, outputBuilder.build());
       scheduleCommit();
 
       LOG.debug("Processing done for work token: {}", work.getWorkToken());
@@ -651,13 +726,11 @@ public void run() {
         Windmill.CommitWorkRequest.Builder commitRequestBuilder =
             Windmill.CommitWorkRequest.newBuilder();
         long remainingCommitBytes = MAX_COMMIT_BYTES;
-        for (Map.Entry<String, ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest>> entry :
-                 outputMap.entrySet()) {
+        for (String computation : commitQueue.keySet()) {
           Windmill.ComputationCommitWorkRequest.Builder computationRequestBuilder =
               Windmill.ComputationCommitWorkRequest.newBuilder();
-          ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest> queue = entry.getValue();
           while (remainingCommitBytes > 0) {
-            Windmill.WorkItemCommitRequest request = queue.poll();
+            Windmill.WorkItemCommitRequest request = commitQueue.poll(computation);
             if (request == null) {
               break;
             }
@@ -665,7 +738,7 @@ public void run() {
             computationRequestBuilder.addRequests(request);
           }
           if (computationRequestBuilder.getRequestsCount() > 0) {
-            computationRequestBuilder.setComputationId(entry.getKey());
+            computationRequestBuilder.setComputationId(computation);
             commitRequestBuilder.addRequests(computationRequestBuilder);
           }
         }
@@ -972,13 +1045,14 @@ private void printMetrics(PrintWriter response) {
     response.println("Active Threads: " + workUnitExecutor.getActiveCount() + "<br>");
     response.println("Work Queue Size: " + workUnitExecutor.getQueue().size()
         + "/" + MAX_WORK_UNITS_QUEUED + "<br>");
-    response.println("Commit Queues: <ul>");
-    for (Map.Entry<String, ConcurrentLinkedQueue<Windmill.WorkItemCommitRequest>> entry
-             : outputMap.entrySet()) {
+    response.print("Commit Queues: (");
+    response.print(commitQueue.weight() >> 20);
+    response.println("MB)<ul>");
+    for (String computation : commitQueue.keySet()) {
       response.print("<li>");
-      response.print(entry.getKey());
+      response.print(computation);
       response.print(": ");
-      response.print(entry.getValue().size());
+      response.print(commitQueue.queueSize(computation));
       response.println("</li>");
     }
     response.println("</ul>");

From 9c1473c9422b5ff8dbeacb72bdef7c40fbdcb38e Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Wed, 13 Jan 2016 15:30:43 -0800
Subject: [PATCH 1295/1541] Fix data races in BigQueryTableInserter and
 BigQueryUtilTest

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112097116
---
 .../sdk/util/BigQueryTableInserter.java       | 69 +++++++++----------
 .../dataflow/sdk/util/BigQueryUtilTest.java   | 29 +++++---
 2 files changed, 50 insertions(+), 48 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
index 20ba789d91934..0ed22947c2943 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
@@ -39,6 +39,7 @@
 import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -166,21 +167,22 @@ public void insertAll(TableReference ref, List<TableRow> rowList,
         MAX_INSERT_ATTEMPTS,
         INITIAL_INSERT_BACKOFF_INTERVAL_MS);
 
-    final List<TableDataInsertAllResponse.InsertErrors> allErrors = new ArrayList<>();
+    List<TableDataInsertAllResponse.InsertErrors> allErrors = new ArrayList<>();
     // These lists contain the rows to publish. Initially the contain the entire list. If there are
     // failures, they will contain only the failed rows to be retried.
     List<TableRow> rowsToPublish = rowList;
     List<String> idsToPublish = insertIdList;
     while (true) {
-      final List<TableRow> retryRows = new ArrayList<>();
-      final List<String> retryIds = (idsToPublish != null) ? new ArrayList<String>() : null;
+      List<TableRow> retryRows = new ArrayList<>();
+      List<String> retryIds = (idsToPublish != null) ? new ArrayList<String>() : null;
 
       int strideIndex = 0;
       // Upload in batches.
       List<TableDataInsertAllRequest.Rows> rows = new LinkedList<>();
       int dataSize = 0;
 
-      List<Future<?>> futures = new ArrayList<>();
+      List<Future<List<TableDataInsertAllResponse.InsertErrors>>> futures = new ArrayList<>();
+      List<Integer> strideIndices = new ArrayList<>();
 
       for (int i = 0; i < rowsToPublish.size(); ++i) {
         TableRow row = rowsToPublish.get(i);
@@ -201,38 +203,14 @@ public void insertAll(TableReference ref, List<TableRow> rowList,
               .insertAll(ref.getProjectId(), ref.getDatasetId(), ref.getTableId(),
                   content);
 
-          final int finalStrideIndex = strideIndex;
-          final List<TableRow> finalRowsToPublish = rowsToPublish;
-          final List<String> finalIdsToPublish = idsToPublish;
-
-          futures.add(executor.submit(new Runnable() {
-              @Override
-              public void run() {
-                try {
-                  TableDataInsertAllResponse response = insert.execute();
-
-                  List<TableDataInsertAllResponse.InsertErrors> errors = response.getInsertErrors();
-                  if (errors != null) {
-                    synchronized (this) {
-                      allErrors.addAll(errors);
-                      for (TableDataInsertAllResponse.InsertErrors error : errors) {
-                        if (error.getIndex() == null) {
-                          throw new IOException("Insert failed: " + allErrors);
-                        }
-
-                        int errorIndex = error.getIndex().intValue() + finalStrideIndex;
-                        retryRows.add(finalRowsToPublish.get(errorIndex));
-                        if (retryIds != null) {
-                          retryIds.add(finalIdsToPublish.get(errorIndex));
-                        }
-                      }
-                    }
-                  }
-                } catch (IOException e) {
-                  throw new RuntimeException(e);
+          futures.add(
+              executor.submit(new Callable<List<TableDataInsertAllResponse.InsertErrors>>() {
+                @Override
+                public List<TableDataInsertAllResponse.InsertErrors> call() throws IOException {
+                  return insert.execute().getInsertErrors();
                 }
-              }
-            }));
+              }));
+          strideIndices.add(strideIndex);
 
           dataSize = 0;
           strideIndex = i + 1;
@@ -241,10 +219,25 @@ public void run() {
       }
 
       try {
-        for (Future<?> future : futures) {
-          future.get();
+        for (int i = 0; i < futures.size(); i++) {
+          List<TableDataInsertAllResponse.InsertErrors> errors = futures.get(i).get();
+          if (errors != null) {
+            for (TableDataInsertAllResponse.InsertErrors error : errors) {
+              allErrors.add(error);
+              if (error.getIndex() == null) {
+                throw new IOException("Insert failed: " + allErrors);
+              }
+
+              int errorIndex = error.getIndex().intValue() + strideIndices.get(i);
+              retryRows.add(rowsToPublish.get(errorIndex));
+              if (retryIds != null) {
+                retryIds.add(idsToPublish.get(errorIndex));
+              }
+            }
+          }
         }
       } catch (InterruptedException e) {
+        throw new IOException("Interrupted while inserting " + rowsToPublish);
       } catch (ExecutionException e) {
         Throwables.propagate(e.getCause());
       }
@@ -253,7 +246,7 @@ public void run() {
         try {
           Thread.sleep(backoff.nextBackOffMillis());
         } catch (InterruptedException e) {
-          // ignore.
+          throw new IOException("Interrupted while waiting before retrying insert of " + retryRows);
         }
         LOG.info("Retrying failed inserts to BigQuery");
         rowsToPublish = retryRows;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
index 7e8dc1f5c5d60..3589123315995 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
@@ -22,6 +22,8 @@
 import static org.mockito.Matchers.anyString;
 import static org.mockito.Mockito.atLeast;
 import static org.mockito.Mockito.atLeastOnce;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.verifyNoMoreInteractions;
@@ -53,6 +55,8 @@
 import org.junit.runners.JUnit4;
 import org.mockito.Mock;
 import org.mockito.MockitoAnnotations;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -73,7 +77,6 @@ public class BigQueryUtilTest {
   @Mock private Bigquery.Tables mockTables;
   @Mock private Bigquery.Tables.Get mockTablesGet;
   @Mock private Bigquery.Tabledata mockTabledata;
-  @Mock private Bigquery.Tabledata.InsertAll mockInsertAll;
   @Mock private Bigquery.Tabledata.List mockTabledataList;
 
   @Before
@@ -94,7 +97,7 @@ private void onInsertAll(List<List<Long>> errorIndicesSequence) throws Exception
     when(mockClient.tabledata())
         .thenReturn(mockTabledata);
 
-    List<TableDataInsertAllResponse> responses = new ArrayList<>();
+    final List<TableDataInsertAllResponse> responses = new ArrayList<>();
     for (List<Long> errorIndices : errorIndicesSequence) {
       List<TableDataInsertAllResponse.InsertErrors> errors = new ArrayList<>();
       for (long i : errorIndices) {
@@ -107,14 +110,20 @@ private void onInsertAll(List<List<Long>> errorIndicesSequence) throws Exception
       responses.add(response);
     }
 
-
-    when(mockTabledata.insertAll(
-        anyString(), anyString(), anyString(), any(TableDataInsertAllRequest.class)))
-        .thenReturn(mockInsertAll);
-    when(mockInsertAll.execute())
-        .thenReturn(responses.get(0),
-            responses.subList(1, responses.size()).toArray(
-                new TableDataInsertAllResponse[responses.size() - 1]));
+    doAnswer(
+        new Answer<Bigquery.Tabledata.InsertAll>() {
+          @Override
+          public Bigquery.Tabledata.InsertAll answer(InvocationOnMock invocation) throws Throwable {
+            Bigquery.Tabledata.InsertAll mockInsertAll = mock(Bigquery.Tabledata.InsertAll.class);
+            when(mockInsertAll.execute())
+                .thenReturn(responses.get(0),
+                    responses.subList(1, responses.size()).toArray(
+                        new TableDataInsertAllResponse[responses.size() - 1]));
+            return mockInsertAll;
+          }
+        })
+        .when(mockTabledata)
+        .insertAll(anyString(), anyString(), anyString(), any(TableDataInsertAllRequest.class));
   }
 
   private void verifyInsertAll(int expectedRetries) throws IOException {

From 6bb5987b28e5c9a23938a4801a2d56d833dd0414 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Wed, 13 Jan 2016 16:53:05 -0800
Subject: [PATCH 1296/1541] Fix javadoc @link warnings

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112105439
---
 .../com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java  | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/VarIntCoder.java     | 2 +-
 .../com/google/cloud/dataflow/sdk/coders/VarLongCoder.java    | 2 +-
 .../java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java  | 2 +-
 .../main/java/com/google/cloud/dataflow/sdk/io/TextIO.java    | 2 +-
 .../com/google/cloud/dataflow/sdk/transforms/Combine.java     | 4 ++--
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
index 47efbedc66949..18182688d7e96 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
@@ -116,7 +116,7 @@ public Object structuralValue(byte[] value) {
   /**
    * {@inheritDoc}
    *
-   * @return {@code true} since {@link #getEncodedElementByteSize(byte[], Context)} runs in
+   * @return {@code true} since {@link #getEncodedElementByteSize} runs in
    * constant time using the {@code length} of the provided array.
    */
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
index 1010601f40553..18ec250381b03 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
@@ -79,7 +79,7 @@ public boolean consistentWithEquals() {
   /**
    * {@inheritDoc}
    *
-   * @return {@code true}. {@link #getEncodedElementByteSize(Integer, Context)} is cheap.
+   * @return {@code true}. {@link #getEncodedElementByteSize} is cheap.
    */
   @Override
   public boolean isRegisterByteSizeObserverCheap(Integer value, Context context) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
index 177ea09d596cf..520245e49749a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
@@ -78,7 +78,7 @@ public boolean consistentWithEquals() {
   /**
    * {@inheritDoc}
    *
-   * @return {@code true}. {@link #getEncodedElementByteSize(Long, Context)} is cheap.
+   * @return {@code true}. {@link #getEncodedElementByteSize} is cheap.
    */
   @Override
   public boolean isRegisterByteSizeObserverCheap(Long value, Context context) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
index 794452540352c..0de606b789bc7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
@@ -61,7 +61,7 @@ public boolean consistentWithEquals() {
   /**
    * {@inheritDoc}
    *
-   * @return {@code true}. {@link VoidCoder#getEncodedElementByteSize()} runs in constant time.
+   * @return {@code true}. {@link VoidCoder#getEncodedElementByteSize} runs in constant time.
    */
   @Override
   public boolean isRegisterByteSizeObserverCheap(Void value, Context context) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 875f84b664451..4f2f1b8acdc1b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -278,7 +278,7 @@ public Bound<T> withoutValidation() {
        * reads from input sources using the specified compression type.
        *
        * <p>If no compression type is specified, the default is {@link TextIO.CompressionType#AUTO}.
-       * See {@link TextIO.Read#withCompressionType(CompressionType)} for more details.
+       * See {@link TextIO.Read#withCompressionType} for more details.
        *
        * <p>Does not modify this object.
        */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 607f6e6c8d21e..3e3d9f406337f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -235,7 +235,7 @@ public static <K, V> GroupedValues<K, V, V> groupedValues(
    *
    * <p>See {@link GroupedValues Combine.GroupedValues} for more information.
    *
-   * <p>Note that {@link #perKey(GlobalCombineFn)} is typically
+   * <p>Note that {@link #perKey(CombineFnBase.GlobalCombineFn)} is typically
    * more convenient to use than {@link GroupByKey} followed by
    * {@code groupedValues(...)}.
    */
@@ -261,7 +261,7 @@ public static <K, InputT, OutputT> GroupedValues<K, InputT, OutputT> groupedValu
    *
    * <p>See {@link GroupedValues Combine.GroupedValues} for more information.
    *
-   * <p>Note that {@link #perKey(PerKeyCombineFn)} is typically
+   * <p>Note that {@link #perKey(CombineFnBase.PerKeyCombineFn)} is typically
    * more convenient to use than {@link GroupByKey} followed by
    * {@code groupedValues(...)}.
    */

From 86fd527eeabadf9d469608d15074e6af53c56aa6 Mon Sep 17 00:00:00 2001
From: ddonnelly <ddonnelly@google.com>
Date: Wed, 13 Jan 2016 21:21:09 -0800
Subject: [PATCH 1297/1541] Adding "DocInclude" metadata comments to the "game"
 example

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112118850
---
 .../cloud/dataflow/examples/complete/game/GameStats.java | 9 ++++++++-
 .../dataflow/examples/complete/game/HourlyTeamScore.java | 6 ++++++
 .../dataflow/examples/complete/game/LeaderBoard.java     | 4 ++++
 .../cloud/dataflow/examples/complete/game/UserScore.java | 4 ++++
 4 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java
index 9dc430f6d912f..fed0ac2d1ec61 100644
--- a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java
@@ -123,6 +123,7 @@ public class GameStats extends LeaderBoard {
    * We do this by finding the mean total score per user, then using that information as a side
    * input to filter out all but those user scores that are > (mean * SCORE_WEIGHT)
    */
+  // [START DocInclude_AbuseDetect]
   public static class CalculateSpammyUsers
       extends PTransform<PCollection<KV<String, Integer>>, PCollection<KV<String, Integer>>> {
     private static final Logger LOG = LoggerFactory.getLogger(CalculateSpammyUsers.class);
@@ -163,6 +164,7 @@ public void processElement(ProcessContext c) {
       return filtered;
     }
   }
+  // [END DocInclude_AbuseDetect]
 
   /**
    * Calculate and output an element's session duration.
@@ -299,6 +301,7 @@ public static void main(String[] args) throws Exception {
       // in calculating the team score sums, below.
       .apply("CreateSpammersView", View.<String, Integer>asMap());
 
+    // [START DocInclude_FilterAndCalc]
     // Calculate the total score per team over fixed windows,
     // and emit cumulative updates for late data. Uses the side input derived above-- the set of
     // suspected robots-- to filter out scores from those users from the sum.
@@ -320,10 +323,12 @@ public void processElement(ProcessContext c) {
                   }}}))
       // Extract and sum teamname/score pairs from the event data.
       .apply("ExtractTeamScore", new ExtractAndSumScore("team"))
+      // [END DocInclude_FilterAndCalc]
       // Write the result to BigQuery
       .apply("WriteTeamSums",
              new WriteScoresToBigQuery(options.getTableName(), "team", true, false));
 
+    // [START DocInclude_SessionCalc]
     // Calculate the total score for the users per session-- that is, a burst of activity
     // separated by a gap from further activity. Find and record the mean session lengths.
     // This information could help the game designers track the changing user engagement
@@ -338,7 +343,8 @@ public void processElement(ProcessContext c) {
       .apply("UserSessionSum", Sum.<String>integersPerKey())
       // Get the duration per session.
       .apply("UserSessionActivity", ParDo.of(new UserSessionInfoFn()))
-
+      // [END DocInclude_SessionCalc]
+      // [START DocInclude_Rewindow]
       // Re-window to process groups of session sums according to when the sessions complete.
       .apply(Window.named("WindowToExtractSessionMean")
             .<Integer>into(
@@ -350,6 +356,7 @@ public void processElement(ProcessContext c) {
       // Write this info to a BigQuery table.
       .apply("WriteAvgSessionLength",
              new WriteAverageSessionLengthToBigQuery(options.getTableName()));
+    // [END DocInclude_Rewindow]
 
 
     // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
index 01d500341d37b..bfb7d98a3e8ef 100644
--- a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
@@ -182,6 +182,7 @@ static interface Options extends UserScore.Options {
   /**
    * Run a batch pipeline to do windowed analysis of the data.
    */
+  // [START DocInclude_HTSMain]
   public static void main(String[] args) throws Exception {
     // Begin constructing a pipeline configured by commandline flags.
     Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
@@ -201,19 +202,23 @@ public static void main(String[] args) throws Exception {
       // day. If so, we want to weed it out. Similarly, if we include data from the following day
       // (to scoop up late-arriving events from the day we're analyzing), we need to weed out events
       // that fall after the time period we want to analyze.
+      // [START DocInclude_HTSFilters]
       .apply("FilterStartTime", Filter.byPredicate(
           (GameActionInfo gInfo)
               -> gInfo.getTimestamp() > startMinTimestamp.getMillis()))
       .apply("FilterEndTime", Filter.byPredicate(
           (GameActionInfo gInfo)
               -> gInfo.getTimestamp() < stopMinTimestamp.getMillis()))
+      // [END DocInclude_HTSFilters]
 
+      // [START DocInclude_HTSAddTsAndWindow]
       // Add an element timestamp based on the event log, and apply fixed windowing.
       .apply("AddEventTimestamps",
              WithTimestamps.of((GameActionInfo i) -> new Instant(i.getTimestamp())))
       .apply(Window.named("FixedWindowsTeam")
           .<GameActionInfo>into(FixedWindows.of(
                 Duration.standardMinutes(options.getWindowDuration()))))
+      // [END DocInclude_HTSAddTsAndWindow]
 
       // Extract and sum teamname/score pairs from the event data.
       .apply("ExtractTeamScore", new ExtractAndSumScore("team"))
@@ -221,5 +226,6 @@ public static void main(String[] args) throws Exception {
 
     pipeline.run();
   }
+  // [END DocInclude_HTSMain]
 
 }
diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java
index 6bb236668b9b1..f12c2bbd88c12 100644
--- a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java
@@ -229,6 +229,7 @@ public static void main(String[] args) throws Exception {
         .apply(PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).topic(options.getTopic()))
         .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()));
 
+    // [START DocInclude_WindowAndTrigger]
     // Extract team/score pairs from the event stream, using hour-long windows by default.
     gameEvents
         .apply(Window.named("LeaderboardTeamFixedWindows")
@@ -249,7 +250,9 @@ public static void main(String[] args) throws Exception {
         // Write the results to BigQuery.
         .apply("WriteTeamScoreSums",
                new WriteScoresToBigQuery(options.getTableName(), "team", true, true));
+    // [END DocInclude_WindowAndTrigger]
 
+    // [START DocInclude_ProcTimeTrigger]
     // Extract user/score pairs from the event stream using processing time, via global windowing.
     // Get periodic updates on all users' running scores.
     gameEvents
@@ -265,6 +268,7 @@ public static void main(String[] args) throws Exception {
         // Write the results to BigQuery.
         .apply("WriteUserScoreSums",
                new WriteScoresToBigQuery(options.getTableName(), "user", false, false));
+    // [END DocInclude_ProcTimeTrigger]
 
     // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
     // command line.
diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/UserScore.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/UserScore.java
index 78e7cb8c63490..e263c45fe23d0 100644
--- a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/UserScore.java
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/UserScore.java
@@ -162,6 +162,7 @@ public void processElement(ProcessContext c) {
    * A transform to extract key/score information from GameActionInfo, and sum the scores. The
    * constructor arg determines whether 'team' or 'user' info is extracted.
    */
+  // [START DocInclude_USExtractXform]
   public static class ExtractAndSumScore
       extends PTransform<PCollection<GameActionInfo>, PCollection<KV<String, Integer>>> {
 
@@ -182,6 +183,7 @@ public PCollection<KV<String, Integer>> apply(
         .apply(Sum.<String>integersPerKey());
     }
   }
+  // [END DocInclude_USExtractXform]
 
 
   /**
@@ -267,6 +269,7 @@ static interface Options extends PipelineOptions {
   /**
    * Run a batch pipeline.
    */
+ // [START DocInclude_USMain]
   public static void main(String[] args) throws Exception {
     // Begin constructing a pipeline configured by commandline flags.
     Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
@@ -282,5 +285,6 @@ public static void main(String[] args) throws Exception {
     // Run the batch pipeline.
     pipeline.run();
   }
+  // [END DocInclude_USMain]
 
 }

From b298fe12095c77a35b96ae7e7a26ae92e24f9f6d Mon Sep 17 00:00:00 2001
From: laraschmidt <laraschmidt@google.com>
Date: Thu, 14 Jan 2016 11:45:43 -0800
Subject: [PATCH 1298/1541] Adding worker ID to the upload id logging

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112173826
---
 .../dataflow/sdk/util/UploadIdResponseInterceptor.java      | 6 +++++-
 .../dataflow/sdk/util/UploadIdResponseInterceptorTest.java  | 3 ++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UploadIdResponseInterceptor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UploadIdResponseInterceptor.java
index a7a6fbbddabd5..da597e692e63b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UploadIdResponseInterceptor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UploadIdResponseInterceptor.java
@@ -51,7 +51,11 @@ public void interceptResponse(HttpResponse response) throws IOException {
     // The check for no upload id limits the output to one log line per upload.
     // The check for upload type makes sure this is an upload and not a read.
     if (url.get(UPLOAD_ID_PARAM) == null && url.get(UPLOAD_TYPE_PARAM) != null) {
-      LOG.debug("Upload ID for url {} is {}", url, uploadId);
+      LOG.debug(
+          "Upload ID for url {} on worker {} is {}",
+          url,
+          System.getProperty("worker_id"),
+          uploadId);
     }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/UploadIdResponseInterceptorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/UploadIdResponseInterceptorTest.java
index 1a537a6fbeddd..698d0cb5a5170 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/UploadIdResponseInterceptorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/UploadIdResponseInterceptorTest.java
@@ -93,6 +93,7 @@ public void testResponseLogs() throws IOException {
     new UploadIdResponseInterceptor().interceptResponse(buildHttpResponse("abc", null, "type"));
     GenericUrl url = new GenericUrl(HttpTesting.SIMPLE_URL);
     url.put("uploadType", "type");
-    expectedLogs.verifyDebug("Upload ID for url " + url + " is abc");
+    String worker = System.getProperty("worker_id");
+    expectedLogs.verifyDebug("Upload ID for url " + url + " on worker " + worker + " is abc");
   }
 }

From 79d9892e6df1f78896ad4735ac5383cbc2c9cfb0 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 14 Jan 2016 16:05:39 -0800
Subject: [PATCH 1299/1541] DataflowAssert: throw when .equals(Object) is
 called

Users should not need to compare DataflowAssert objects on Java equality.
Instead, it's nearly always a broken test that will silently fail.

Throw an UnsupportedOperationException instead, and direct users to
isEqualTo (Singleton) or containsInAnyOrder (Iterable).

This change caught a broken test.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112200184
---
 .../dataflow/sdk/testing/DataflowAssert.java  | 62 ++++++++++++++++---
 .../dataflow/sdk/io/CountingSourceTest.java   |  2 +-
 .../sdk/testing/DataflowAssertTest.java       | 60 ++++++++++++++++++
 3 files changed, 115 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index a28219273240c..596c43fa0010e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -28,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.coders.MapCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
+import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
@@ -234,7 +235,6 @@ public Coder<T> getCoder() {
       }
     }
 
-
     /**
      * Applies a {@link SerializableFunction} to check the elements of the {@code Iterable}.
      *
@@ -297,16 +297,39 @@ public Void apply(T actual) {
       }
     }
 
-
     /**
      * Checks that the {@code Iterable} is empty.
      *
-     * <p> Returns this {@code IterableAssert}.
+     * <p>Returns this {@code IterableAssert}.
      */
     public IterableAssert<T> empty() {
       return satisfies(new AssertContainsInAnyOrderRelation<T>(), Collections.<T>emptyList());
     }
 
+    /**
+     * @throws UnsupportedOperationException always
+     * @deprecated {@link Object#equals(Object)} is not supported on DataflowAssert objects.
+     *    If you meant to test object equality, use a variant of {@link #containsInAnyOrder}
+     *    instead.
+     */
+    @Deprecated
+    @Override
+    public boolean equals(Object o) {
+      throw new UnsupportedOperationException(
+          "If you meant to test object equality, use .containsInAnyOrder instead.");
+    }
+
+    /**
+     * @throws UnsupportedOperationException always.
+     * @deprecated {@link Object#hashCode()} is not supported on DataflowAssert objects.
+     */
+    @Deprecated
+    @Override
+    public int hashCode() {
+      throw new UnsupportedOperationException(
+          String.format("%s.hashCode() is not supported.", IterableAssert.class.getSimpleName()));
+    }
+
     /**
      * Checks that the {@code Iterable} contains the expected elements, in any
      * order.
@@ -334,7 +357,7 @@ public final IterableAssert<T> containsInAnyOrder(T... expectedElements) {
      * Checks that the {@code Iterable} contains elements that match the provided matchers,
      * in any order.
      *
-     * <p> Returns this {@code IterableAssert}.
+     * <p>Returns this {@code IterableAssert}.
      */
     @SafeVarargs
     final IterableAssert<T> containsInAnyOrder(
@@ -359,6 +382,31 @@ protected SingletonAssert(
       this.coder = Optional.absent();
     }
 
+    /**
+     * Always throws an {@link UnsupportedOperationException}: users are probably looking for
+     * {@link #isEqualTo}.
+     */
+    @Deprecated
+    @Override
+    public boolean equals(Object o) {
+      throw new UnsupportedOperationException(
+          String.format(
+              "tests for Java equality of the %s object, not the PCollection in question. "
+                  + "Call a test method, such as isEqualTo.",
+              getClass().getSimpleName()));
+    }
+
+    /**
+     * @throws UnsupportedOperationException always.
+     * @deprecated {@link Object#hashCode()} is not supported on DataflowAssert objects.
+     */
+    @Deprecated
+    @Override
+    public int hashCode() {
+      throw new UnsupportedOperationException(
+          String.format("%s.hashCode() is not supported.", SingletonAssert.class.getSimpleName()));
+    }
+
     /**
      * Sets the coder to use for elements of type {@code T}, as needed
      * for internal purposes.
@@ -378,8 +426,7 @@ public Coder<T> getCoder() {
         return coder.get();
       } else {
         throw new IllegalStateException(
-            "Attempting to access the coder of an IterableAssert"
-                + " that has not been set yet.");
+            "Attempting to access the coder of an IterableAssert that has not been set yet.");
       }
     }
 
@@ -690,9 +737,8 @@ public AssertContainsInAnyOrder(Collection<T> expected) {
       this((T[]) expected.toArray());
     }
 
-    @SuppressWarnings("unchecked")
     public AssertContainsInAnyOrder(Iterable<T> expected) {
-      this(Lists.newArrayList(expected));
+      this(Lists.<T>newArrayList(expected));
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CountingSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CountingSourceTest.java
index 178287660066c..cc9db7978f78f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CountingSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CountingSourceTest.java
@@ -144,7 +144,7 @@ public void testUnboundedSourceTimestamps() {
         .apply("TimestampDiff", ParDo.of(new ElementValueDiff()))
         .apply("RemoveDuplicateTimestamps", RemoveDuplicates.<Long>create());
     // This assert also confirms that diffs only has one unique value.
-    DataflowAssert.thatSingleton(diffs).equals(0L);
+    DataflowAssert.thatSingleton(diffs).isEqualTo(0L);
 
     p.run();
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
index e3d70f7c95d2c..44e64491a5f54 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
@@ -177,6 +177,66 @@ public void testBasicMatcherFailure() throws Exception {
     runExpectingAssertionFailure(pipeline);
   }
 
+  /**
+   * Test that we throw an error at pipeline construction time when the user mistakenly uses
+   * {@code DataflowAssert.thatSingleton().equals()} instead of the test method {@code .isEqualTo}.
+   */
+  @SuppressWarnings("deprecation") // test of deprecated function
+  @Test
+  public void testDataflowAssertEqualsSingletonUnsupported() throws Exception {
+    thrown.expect(UnsupportedOperationException.class);
+    thrown.expectMessage("isEqualTo");
+
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> pcollection = pipeline.apply(Create.of(42));
+    DataflowAssert.thatSingleton(pcollection).equals(42);
+  }
+
+  /**
+   * Test that we throw an error at pipeline construction time when the user mistakenly uses
+   * {@code DataflowAssert.that().equals()} instead of the test method {@code .containsInAnyOrder}.
+   */
+  @SuppressWarnings("deprecation") // test of deprecated function
+  @Test
+  public void testDataflowAssertEqualsIterableUnsupported() throws Exception {
+    thrown.expect(UnsupportedOperationException.class);
+    thrown.expectMessage("containsInAnyOrder");
+
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> pcollection = pipeline.apply(Create.of(42));
+    DataflowAssert.that(pcollection).equals(42);
+  }
+
+  /**
+   * Test that {@code DataflowAssert.thatSingleton().hashCode()} is unsupported.
+   * See {@link #testDataflowAssertEqualsSingletonUnsupported}.
+   */
+  @SuppressWarnings("deprecation") // test of deprecated function
+  @Test
+  public void testDataflowAssertHashCodeSingletonUnsupported() throws Exception {
+    thrown.expect(UnsupportedOperationException.class);
+    thrown.expectMessage(".hashCode() is not supported.");
+
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> pcollection = pipeline.apply(Create.of(42));
+    DataflowAssert.thatSingleton(pcollection).hashCode();
+  }
+
+  /**
+   * Test that {@code DataflowAssert.thatIterable().hashCode()} is unsupported.
+   * See {@link #testDataflowAssertEqualsIterableUnsupported}.
+   */
+  @SuppressWarnings("deprecation") // test of deprecated function
+  @Test
+  public void testDataflowAssertHashCodeIterableUnsupported() throws Exception {
+    thrown.expect(UnsupportedOperationException.class);
+    thrown.expectMessage(".hashCode() is not supported.");
+
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> pcollection = pipeline.apply(Create.of(42));
+    DataflowAssert.that(pcollection).hashCode();
+  }
+
   /**
    * Basic test for {@code isEqualTo}.
    */

From e821982d9c8dc2c723feb0695281e5b0cda6f207 Mon Sep 17 00:00:00 2001
From: amyu <amyu@google.com>
Date: Fri, 15 Jan 2016 08:53:57 -0800
Subject: [PATCH 1300/1541] Generalize the 'game' example BigQuery write
 classes

Generalize the 'game' example BigQuery write classes to take a map that specifies how
to generate the output fields.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112253306
---
 examples/pom.xml                              |   8 +-
 .../examples/complete/game/GameStats.java     | 123 +++++++--------
 .../complete/game/HourlyTeamScore.java        |  98 ++++--------
 .../examples/complete/game/LeaderBoard.java   | 143 +++++++-----------
 .../examples/complete/game/UserScore.java     |  93 +++---------
 .../complete/game/injector/Injector.java      |   2 -
 .../complete/game/utils/WriteToBigQuery.java  | 132 ++++++++++++++++
 .../game/utils/WriteWindowedToBigQuery.java   |  74 +++++++++
 8 files changed, 364 insertions(+), 309 deletions(-)
 create mode 100644 examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/utils/WriteToBigQuery.java
 create mode 100644 examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/utils/WriteWindowedToBigQuery.java

diff --git a/examples/pom.xml b/examples/pom.xml
index 56b76dc54ff90..60ada167feeb7 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -106,7 +106,7 @@
                     <!-- This pattern is brittle; we would prefer to filter on the directory
                          but that seems to be unavailable to us. -->
                     <exclude>**/*Java8Test.java</exclude>
-                    <exclude>**/game/*.java</exclude>
+                    <exclude>**/game/**/*.java</exclude>
                   </testExcludes>
                 </configuration>
               </execution>
@@ -125,7 +125,7 @@
                     <!-- This pattern is brittle; we would prefer to filter on the directory
                          but that seems to be unavailable to us. -->
                     <include>**/*Java8Test.java</include>
-                    <include>**/game/*.java</include>
+                    <include>**/game/**/*.java</include>
                   </includes>
                 </configuration>
               </execution>
@@ -144,7 +144,7 @@
                     <!-- This pattern is brittle; we would prefer to filter on the directory
                          but that seems to be unavailable to us. -->
                     <exclude>**/*Java8*.java</exclude>
-                    <exclude>**/game/*.java</exclude>
+                    <exclude>**/game/**/*.java</exclude>
                   </excludes>
                 </configuration>
               </execution>
@@ -163,7 +163,7 @@
                     <!-- This pattern is brittle; we would prefer to filter on the directory
                          but that seems to be unavailable to us. -->
                     <include>**/*Java8*.java</include>
-                    <include>**/game/*.java</include>
+                    <include>**/game/**/*.java</include>
                   </includes>
                 </configuration>
               </execution>
diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java
index fed0ac2d1ec61..d826995ac6386 100644
--- a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java
@@ -16,16 +16,10 @@
 
 package com.google.cloud.dataflow.examples.complete.game;
 
-import com.google.api.services.bigquery.model.TableFieldSchema;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
 import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.examples.complete.game.utils.WriteWindowedToBigQuery;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.options.Default;
 import com.google.cloud.dataflow.sdk.options.Description;
@@ -39,37 +33,28 @@
 import com.google.cloud.dataflow.sdk.transforms.Mean;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.SerializableComparator;
-import com.google.cloud.dataflow.sdk.transforms.SimpleFunction;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.Values;
 import com.google.cloud.dataflow.sdk.transforms.View;
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterEach;
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterProcessingTime;
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
-import org.apache.avro.reflect.Nullable;
 import org.joda.time.DateTimeZone;
 import org.joda.time.Duration;
+import org.joda.time.Instant;
 import org.joda.time.format.DateTimeFormat;
 import org.joda.time.format.DateTimeFormatter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.ArrayList;
-import java.util.List;
+import java.util.HashMap;
 import java.util.Map;
 import java.util.TimeZone;
 
@@ -201,70 +186,63 @@ static interface Options extends LeaderBoard.Options {
     Integer getSessionGap();
     void setSessionGap(Integer value);
 
-    @Description("Numeric value of fixed window for finding mean of user session duration, " +
-        "in minutes")
+    @Description("Numeric value of fixed window for finding mean of user session duration, "
+        + "in minutes")
     @Default.Integer(30)
     Integer getUserActivityWindowDuration();
     void setUserActivityWindowDuration(Integer value);
 
     @Description("Prefix used for the BigQuery table names")
     @Default.String("game_stats")
-    String getTableName();
-    void setTableName(String value);
+    String getTablePrefix();
+    void setTablePrefix(String value);
   }
 
+
   /**
-   * Format user activity information (objects of type UserActivityInfo) and write to BigQuery.
-   * The constructor argument indicates the table prefix to use.
+   * Create a map of information that describes how to write pipeline output to BigQuery. This map
+   * is used to write information about team score sums.
    */
-  public static class WriteAverageSessionLengthToBigQuery
-      extends PTransform<PCollection<Double>, PDone> {
-
-    private final String tablePrefix;
-
-    public WriteAverageSessionLengthToBigQuery(String tablePrefix) {
-      this.tablePrefix = tablePrefix;
-    }
-
-    /**
-     *  Convert the user activity info into a BigQuery TableRow.
-     */
-    private class BuildSessionActivityRowFn extends DoFn<Double, TableRow>
-        implements RequiresWindowAccess {
-
-      @Override
-      public void processElement(ProcessContext c) {
-
-        IntervalWindow w = (IntervalWindow) c.window();
-
-        TableRow row = new TableRow()
-         .set("window_start", fmt.print(w.start()))
-         .set("mean_duration", c.element());
-        c.output(row);
-      }
-    }
-
-    /** Build the output table schema. */
-    private TableSchema getMeanSchema() {
-      List<TableFieldSchema> fields = new ArrayList<>();
-      fields.add(new TableFieldSchema().setName("window_start").setType("STRING"));
-      fields.add(new TableFieldSchema().setName("mean_duration").setType("FLOAT"));
-      return new TableSchema().setFields(fields);
-    }
+  protected static Map<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>>
+      configureWindowedWrite() {
+    Map<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>> tableConfigure =
+        new HashMap<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>>();
+    tableConfigure.put("team",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("STRING",
+            c -> c.element().getKey()));
+    tableConfigure.put("total_score",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("INTEGER",
+            c -> c.element().getValue()));
+    tableConfigure.put("window_start",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("STRING",
+          c -> { IntervalWindow w = (IntervalWindow) c.window();
+                 return fmt.print(w.start()); }));
+    tableConfigure.put("processing_time",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>(
+            "STRING", c -> fmt.print(Instant.now())));
+    return tableConfigure;
+  }
 
-    @Override
-    public PDone apply(PCollection<Double> userInfo) {
-      return userInfo
-        .apply(ParDo.named("ConvertToUserInfoRow").of(new BuildSessionActivityRowFn()))
-        .apply(BigQueryIO.Write
-                  .to(getTable(userInfo.getPipeline(),
-                      tablePrefix + "_mean_sessions"))
-                  .withSchema(getMeanSchema())
-                  .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED));
-    }
+  /**
+   * Create a map of information that describes how to write pipeline output to BigQuery. This map
+   * is used to write information about mean user session time.
+   */
+  protected static Map<String, WriteWindowedToBigQuery.FieldInfo<Double>>
+      configureSessionWindowWrite() {
+
+    Map<String, WriteWindowedToBigQuery.FieldInfo<Double>> tableConfigure =
+        new HashMap<String, WriteWindowedToBigQuery.FieldInfo<Double>>();
+    tableConfigure.put("window_start",
+        new WriteWindowedToBigQuery.FieldInfo<Double>("STRING",
+          c -> { IntervalWindow w = (IntervalWindow) c.window();
+                 return fmt.print(w.start()); }));
+    tableConfigure.put("mean_duration",
+        new WriteWindowedToBigQuery.FieldInfo<Double>("FLOAT", c -> c.element()));
+    return tableConfigure;
   }
 
 
+
   public static void main(String[] args) throws Exception {
 
     Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
@@ -326,7 +304,9 @@ public void processElement(ProcessContext c) {
       // [END DocInclude_FilterAndCalc]
       // Write the result to BigQuery
       .apply("WriteTeamSums",
-             new WriteScoresToBigQuery(options.getTableName(), "team", true, false));
+             new WriteWindowedToBigQuery<KV<String, Integer>>(
+                options.getTablePrefix() + "_team", configureWindowedWrite()));
+
 
     // [START DocInclude_SessionCalc]
     // Calculate the total score for the users per session-- that is, a burst of activity
@@ -355,7 +335,8 @@ public void processElement(ProcessContext c) {
       .apply(Mean.<Integer>globally().withoutDefaults())
       // Write this info to a BigQuery table.
       .apply("WriteAvgSessionLength",
-             new WriteAverageSessionLengthToBigQuery(options.getTableName()));
+             new WriteWindowedToBigQuery<Double>(
+                options.getTablePrefix() + "_sessions", configureSessionWindowWrite()));
     // [END DocInclude_Rewindow]
 
 
diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
index bfb7d98a3e8ef..7a98b58f4e538 100644
--- a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
@@ -16,33 +16,20 @@
 
 package com.google.cloud.dataflow.examples.complete.game;
 
-import com.google.api.services.bigquery.model.TableFieldSchema;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.examples.complete.game.utils.WriteWindowedToBigQuery;
+
 import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.Default;
 import com.google.cloud.dataflow.sdk.options.Description;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
 import com.google.cloud.dataflow.sdk.transforms.Filter;
-import com.google.cloud.dataflow.sdk.transforms.MapElements;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.SimpleFunction;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.WithTimestamps;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PDone;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 import org.joda.time.DateTimeZone;
 import org.joda.time.Duration;
@@ -50,8 +37,8 @@
 import org.joda.time.format.DateTimeFormat;
 import org.joda.time.format.DateTimeFormatter;
 
-import java.util.ArrayList;
-import java.util.List;
+import java.util.HashMap;
+import java.util.Map;
 import java.util.TimeZone;
 
 /**
@@ -98,56 +85,6 @@ public class HourlyTeamScore extends UserScore {
           .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")));
 
 
-  /** Format fixed window information for scores, and write that info to BigQuery. */
-  public static class WriteWindowedToBigQuery
-      extends PTransform<PCollection<KV<String, Integer>>, PDone> {
-
-    private final String tableName;
-
-    public WriteWindowedToBigQuery(String tableName) {
-      this.tableName = tableName;
-    }
-
-    /** Convert each key/score pair into a BigQuery TableRow. */
-    private class BuildFixedRowFn extends DoFn<KV<String, Integer>, TableRow>
-        implements RequiresWindowAccess {
-
-      @Override
-      public void processElement(ProcessContext c) {
-
-        IntervalWindow w = (IntervalWindow) c.window();
-
-        TableRow row = new TableRow()
-         .set("team", c.element().getKey())
-         .set("total_score", c.element().getValue().longValue())
-         // Add windowing info to the output.
-         .set("window_start", fmt.print(w.start()));
-        c.output(row);
-      }
-    }
-
-    /** Build the output table schema. */
-    private TableSchema getFixedSchema() {
-      List<TableFieldSchema> fields = new ArrayList<>();
-      fields.add(new TableFieldSchema().setName("team").setType("STRING"));
-      fields.add(new TableFieldSchema().setName("total_score").setType("INTEGER"));
-      fields.add(new TableFieldSchema().setName("window_start").setType("STRING"));
-      return new TableSchema().setFields(fields);
-    }
-
-    @Override
-    public PDone apply(PCollection<KV<String, Integer>> teamAndScore) {
-      return teamAndScore
-        .apply(ParDo.named("ConvertToFixedRow").of(new BuildFixedRowFn()))
-        .apply(BigQueryIO.Write
-                  .to(getTable(teamAndScore.getPipeline(),
-                      tableName))
-                  .withSchema(getFixedSchema())
-                  .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED));
-    }
-  }
-
-
   /**
    * Options supported by {@link HourlyTeamScore}.
    */
@@ -178,6 +115,28 @@ static interface Options extends UserScore.Options {
     void setTableName(String value);
   }
 
+  /**
+   * Create a map of information that describes how to write pipeline output to BigQuery. This map
+   * is passed to the {@link WriteWindowedToBigQuery} constructor to write team score sums and
+   * includes information about window start time.
+   */
+  protected static Map<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>>
+      configureWindowedTableWrite() {
+    Map<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>> tableConfig =
+        new HashMap<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>>();
+    tableConfig.put("user",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("STRING",
+            c -> c.element().getKey()));
+    tableConfig.put("total_score",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("INTEGER",
+            c -> c.element().getValue()));
+    tableConfig.put("window_start",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("STRING",
+          c -> { IntervalWindow w = (IntervalWindow) c.window();
+                 return fmt.print(w.start()); }));
+    return tableConfig;
+  }
+
 
   /**
    * Run a batch pipeline to do windowed analysis of the data.
@@ -222,7 +181,10 @@ public static void main(String[] args) throws Exception {
 
       // Extract and sum teamname/score pairs from the event data.
       .apply("ExtractTeamScore", new ExtractAndSumScore("team"))
-      .apply("WriteTeamScoreSums", new WriteWindowedToBigQuery(options.getTableName()));
+      .apply("WriteTeamScoreSums",
+        new WriteWindowedToBigQuery<KV<String, Integer>>(options.getTableName(),
+            configureWindowedTableWrite()));
+
 
     pipeline.run();
   }
diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java
index f12c2bbd88c12..41853768680b8 100644
--- a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java
@@ -16,30 +16,19 @@
 
 package com.google.cloud.dataflow.examples.complete.game;
 
-import com.google.api.services.bigquery.model.TableFieldSchema;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
 import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
 import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.examples.complete.game.utils.WriteToBigQuery;
+import com.google.cloud.dataflow.examples.complete.game.utils.WriteWindowedToBigQuery;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.options.Default;
 import com.google.cloud.dataflow.sdk.options.Description;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.options.Validation;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
-import com.google.cloud.dataflow.sdk.transforms.MapElements;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.SimpleFunction;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterEach;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterProcessingTime;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
@@ -49,9 +38,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PDone;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-
 
 import org.joda.time.DateTimeZone;
 import org.joda.time.Duration;
@@ -59,8 +45,8 @@
 import org.joda.time.format.DateTimeFormat;
 import org.joda.time.format.DateTimeFormatter;
 
-import java.util.ArrayList;
-import java.util.List;
+import java.util.HashMap;
+import java.util.Map;
 import java.util.TimeZone;
 
 /**
@@ -113,78 +99,6 @@ public class LeaderBoard extends HourlyTeamScore {
   static final Duration TEN_MINUTES = Duration.standardMinutes(10);
 
 
-  /**
-   * Format information for scores, and write that info to BigQuery.
-   * Optionally include fixed windowing information and timing in the result.
-   */
-  public static class WriteScoresToBigQuery
-      extends PTransform<PCollection<KV<String, Integer>>, PDone> {
-
-    private final String fieldName;
-    private final String tablePrefix;
-    private final boolean writeTiming; // Whether to write timing info to the resultant table.
-    private final boolean writeWindowStart; // whether to include window start info.
-
-    public WriteScoresToBigQuery(String tablePrefix, String fieldName,
-        boolean writeWindowStart, boolean writeTiming) {
-      this.fieldName = fieldName;
-      this.tablePrefix = tablePrefix;
-      this.writeWindowStart = writeWindowStart;
-      this.writeTiming = writeTiming;
-    }
-
-    /** Convert each key/score pair into a BigQuery TableRow. */
-    private class BuildFixedRowFn extends DoFn<KV<String, Integer>, TableRow>
-        implements RequiresWindowAccess {
-
-      @Override
-      public void processElement(ProcessContext c) {
-
-        // IntervalWindow w = (IntervalWindow) c.window();
-
-        TableRow row = new TableRow()
-          .set(fieldName, c.element().getKey())
-          .set("total_score", c.element().getValue().longValue())
-          .set("processing_time", fmt.print(Instant.now()));
-         if (writeWindowStart) {
-          IntervalWindow w = (IntervalWindow) c.window();
-          row.set("window_start", fmt.print(w.start()));
-         }
-         if (writeTiming) {
-          row.set("timing", c.pane().getTiming().toString());
-         }
-        c.output(row);
-      }
-    }
-
-    /** Build the output table schema. */
-    private TableSchema getFixedSchema() {
-      List<TableFieldSchema> fields = new ArrayList<>();
-      fields.add(new TableFieldSchema().setName(fieldName).setType("STRING"));
-      fields.add(new TableFieldSchema().setName("total_score").setType("INTEGER"));
-      fields.add(new TableFieldSchema().setName("processing_time").setType("STRING"));
-      if (writeWindowStart) {
-        fields.add(new TableFieldSchema().setName("window_start").setType("STRING"));
-      }
-      if (writeTiming) {
-        fields.add(new TableFieldSchema().setName("timing").setType("STRING"));
-      }
-      return new TableSchema().setFields(fields);
-    }
-
-    @Override
-    public PDone apply(PCollection<KV<String, Integer>> teamAndScore) {
-      return teamAndScore
-        .apply(ParDo.named("ConvertToFixedTriggersRow").of(new BuildFixedRowFn()))
-        .apply(BigQueryIO.Write
-                  .to(getTable(teamAndScore.getPipeline(),
-                      tablePrefix + "_" + fieldName))
-                  .withSchema(getFixedSchema())
-                  .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED));
-    }
-  }
-
-
   /**
    * Options supported by {@link LeaderBoard}.
    */
@@ -211,6 +125,49 @@ static interface Options extends HourlyTeamScore.Options, DataflowExampleOptions
     void setTableName(String value);
   }
 
+  /**
+   * Create a map of information that describes how to write pipeline output to BigQuery. This map
+   * is used to write team score sums and includes event timing information.
+   */
+  protected static Map<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>>
+      configureWindowedTableWrite() {
+
+    Map<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>> tableConfigure =
+        new HashMap<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>>();
+    tableConfigure.put("team",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("STRING",
+            c -> c.element().getKey()));
+    tableConfigure.put("total_score",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("INTEGER",
+            c -> c.element().getValue()));
+    tableConfigure.put("window_start",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("STRING",
+          c -> { IntervalWindow w = (IntervalWindow) c.window();
+                 return fmt.print(w.start()); }));
+    tableConfigure.put("processing_time",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>(
+            "STRING", c -> fmt.print(Instant.now())));
+    tableConfigure.put("timing",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>(
+            "STRING", c -> c.pane().getTiming().toString()));
+    return tableConfigure;
+  }
+
+  /**
+   * Create a map of information that describes how to write pipeline output to BigQuery. This map
+   * is used to write user score sums.
+   */
+  protected static Map<String, WriteToBigQuery.FieldInfo<KV<String, Integer>>>
+      configureGlobalWindowBigQueryWrite() {
+
+    Map<String, WriteToBigQuery.FieldInfo<KV<String, Integer>>> tableConfigure =
+        configureBigQueryWrite();
+    tableConfigure.put("processing_time",
+        new WriteToBigQuery.FieldInfo<KV<String, Integer>>(
+            "STRING", c -> fmt.print(Instant.now())));
+    return tableConfigure;
+  }
+
 
   public static void main(String[] args) throws Exception {
 
@@ -249,7 +206,8 @@ public static void main(String[] args) throws Exception {
         .apply("ExtractTeamScore", new ExtractAndSumScore("team"))
         // Write the results to BigQuery.
         .apply("WriteTeamScoreSums",
-               new WriteScoresToBigQuery(options.getTableName(), "team", true, true));
+               new WriteWindowedToBigQuery<KV<String, Integer>>(
+                  options.getTableName() + "_team", configureWindowedTableWrite()));
     // [END DocInclude_WindowAndTrigger]
 
     // [START DocInclude_ProcTimeTrigger]
@@ -267,7 +225,8 @@ public static void main(String[] args) throws Exception {
         .apply("ExtractUserScore", new ExtractAndSumScore("user"))
         // Write the results to BigQuery.
         .apply("WriteUserScoreSums",
-               new WriteScoresToBigQuery(options.getTableName(), "user", false, false));
+               new WriteToBigQuery<KV<String, Integer>>(
+                  options.getTableName() + "_user", configureGlobalWindowBigQueryWrite()));
     // [END DocInclude_ProcTimeTrigger]
 
     // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/UserScore.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/UserScore.java
index e263c45fe23d0..de06ce3aaa588 100644
--- a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/UserScore.java
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/UserScore.java
@@ -16,19 +16,13 @@
 
 package com.google.cloud.dataflow.examples.complete.game;
 
-import com.google.api.services.bigquery.model.TableFieldSchema;
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.examples.complete.game.utils.WriteToBigQuery;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.Default;
 import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.GcpOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.options.Validation;
@@ -37,23 +31,17 @@
 import com.google.cloud.dataflow.sdk.transforms.MapElements;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.SimpleFunction;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 import org.apache.avro.reflect.Nullable;
-import org.joda.time.DateTimeZone;
-import org.joda.time.format.DateTimeFormat;
-import org.joda.time.format.DateTimeFormatter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.ArrayList;
-import java.util.List;
-import java.util.TimeZone;
+import java.util.HashMap;
+import java.util.Map;
 
 /**
  * This class is the first in a series of four pipelines that tell a story in a 'gaming' domain.
@@ -186,65 +174,10 @@ public PCollection<KV<String, Integer>> apply(
   // [END DocInclude_USExtractXform]
 
 
-  /**
-   * Format information for key/total_score pairs, and write that info to BigQuery.
-   */
-  public static class WriteToBigQuery
-      extends PTransform<PCollection<KV<String, Integer>>, PDone> {
-
-    private final String tableName;
-
-    public WriteToBigQuery(String tableName) {
-      this.tableName = tableName;
-    }
-
-    /** Convert each key/score pair into a BigQuery TableRow. */
-    private class BuildRowFn extends DoFn<KV<String, Integer>, TableRow> {
-
-      @Override
-      public void processElement(ProcessContext c) {
-
-        TableRow row = new TableRow()
-         .set("user", c.element().getKey())
-         .set("total_score", c.element().getValue().longValue());
-        c.output(row);
-      }
-    }
-
-    /** Build the output table schema. */
-    private TableSchema getSchema() {
-      List<TableFieldSchema> fields = new ArrayList<>();
-      fields.add(new TableFieldSchema().setName("user").setType("STRING"));
-      fields.add(new TableFieldSchema().setName("total_score").setType("INTEGER"));
-      return new TableSchema().setFields(fields);
-    }
-
-    @Override
-    public PDone apply(PCollection<KV<String, Integer>> teamAndScore) {
-      return teamAndScore
-        .apply(ParDo.named("ConvertToRow").of(new BuildRowFn()))
-        .apply(BigQueryIO.Write
-                  .to(getTable(teamAndScore.getPipeline(),
-                      tableName))
-                  .withSchema(getSchema())
-                  .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED));
-    }
-  }
-
-  /** Utility to construct an output table reference. */
-  static TableReference getTable(Pipeline pipeline, String tableName) {
-    PipelineOptions options = pipeline.getOptions();
-    TableReference table = new TableReference();
-    table.setDatasetId(options.as(Options.class).getDataset());
-    table.setProjectId(options.as(GcpOptions.class).getProject());
-    table.setTableId(tableName);
-    return table;
-  }
-
   /**
    * Options supported by {@link UserScore}.
    */
-  static interface Options extends PipelineOptions {
+  public static interface Options extends PipelineOptions {
 
     @Description("Path to the data file(s) containing game data.")
     // The default maps to two large Google Cloud Storage files (each ~12GB) holding two subsequent
@@ -264,6 +197,20 @@ static interface Options extends PipelineOptions {
     void setTableName(String value);
   }
 
+  /**
+   * Create a map of information that describes how to write pipeline output to BigQuery. This map
+   * is passed to the {@link WriteToBigQuery} constructor to write user score sums.
+   */
+  protected static Map<String, WriteToBigQuery.FieldInfo<KV<String, Integer>>>
+    configureBigQueryWrite() {
+    Map<String, WriteToBigQuery.FieldInfo<KV<String, Integer>>> tableConfigure =
+        new HashMap<String, WriteToBigQuery.FieldInfo<KV<String, Integer>>>();
+    tableConfigure.put("user",
+        new WriteToBigQuery.FieldInfo<KV<String, Integer>>("STRING", c -> c.element().getKey()));
+    tableConfigure.put("total_score",
+        new WriteToBigQuery.FieldInfo<KV<String, Integer>>("INTEGER", c -> c.element().getValue()));
+    return tableConfigure;
+  }
 
 
   /**
@@ -280,7 +227,9 @@ public static void main(String[] args) throws Exception {
       .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))
       // Extract and sum username/score pairs from the event data.
       .apply("ExtractUserScore", new ExtractAndSumScore("user"))
-      .apply("WriteUserScoreSums", new WriteToBigQuery(options.getTableName()));
+      .apply("WriteUserScoreSums",
+          new WriteToBigQuery<KV<String, Integer>>(options.getTableName(),
+                                                   configureBigQueryWrite()));
 
     // Run the batch pipeline.
     pipeline.run();
diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java
index 12bd5b63dfe0d..d47886db43ab7 100644
--- a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java
@@ -32,11 +32,9 @@
 import java.security.GeneralSecurityException;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Random;
 import java.util.TimeZone;
-import java.util.UUID;
 
 
 /**
diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/utils/WriteToBigQuery.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/utils/WriteToBigQuery.java
new file mode 100644
index 0000000000000..401251489dde8
--- /dev/null
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/utils/WriteToBigQuery.java
@@ -0,0 +1,132 @@
+  /*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game.utils;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.examples.complete.game.UserScore;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
+import com.google.cloud.dataflow.sdk.options.GcpOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PDone;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Generate, format, and write BigQuery table row information. Use provided information about
+ * the field names and types, as well as lambda functions that describe how to generate their
+ * values.
+ */
+public class WriteToBigQuery<T>
+    extends PTransform<PCollection<T>, PDone> {
+
+  protected String tableName;
+  protected Map<String, FieldInfo<T>> fieldInfo;
+
+  public WriteToBigQuery() {
+  }
+
+  public WriteToBigQuery(String tableName,
+      Map<String, FieldInfo<T>> fieldInfo) {
+    this.tableName = tableName;
+    this.fieldInfo = fieldInfo;
+  }
+
+  /** Define a class to hold information about output table field definitions. */
+  public static class FieldInfo<T> implements Serializable {
+    // The BigQuery 'type' of the field
+    private String fieldType;
+    // A lambda function to generate the field value
+    private SerializableFunction<DoFn<T, TableRow>.ProcessContext, Object> fieldFn;
+
+    public FieldInfo(String fieldType,
+        SerializableFunction<DoFn<T, TableRow>.ProcessContext, Object> fieldFn) {
+      this.fieldType = fieldType;
+      this.fieldFn = fieldFn;
+    }
+
+    String getFieldType() {
+      return this.fieldType;
+    }
+
+    SerializableFunction<DoFn<T, TableRow>.ProcessContext, Object> getFieldFn() {
+      return this.fieldFn;
+    }
+  }
+  /** Convert each key/score pair into a BigQuery TableRow as specified by fieldFn. */
+  protected class BuildRowFn extends DoFn<T, TableRow> {
+
+    @Override
+    public void processElement(ProcessContext c) {
+
+      TableRow row = new TableRow();
+      for (Map.Entry<String, FieldInfo<T>> entry : fieldInfo.entrySet()) {
+          String key = entry.getKey();
+          FieldInfo<T> fcnInfo = entry.getValue();
+          SerializableFunction<DoFn<T, TableRow>.ProcessContext, Object> fcn =
+            fcnInfo.getFieldFn();
+          row.set(key, fcn.apply(c));
+        }
+      c.output(row);
+    }
+  }
+
+  /** Build the output table schema. */
+  protected TableSchema getSchema() {
+    List<TableFieldSchema> fields = new ArrayList<>();
+    for (Map.Entry<String, FieldInfo<T>> entry : fieldInfo.entrySet()) {
+      String key = entry.getKey();
+      FieldInfo<T> fcnInfo = entry.getValue();
+      String bqType = fcnInfo.getFieldType();
+      fields.add(new TableFieldSchema().setName(key).setType(bqType));
+    }
+    return new TableSchema().setFields(fields);
+  }
+
+  @Override
+  public PDone apply(PCollection<T> teamAndScore) {
+    return teamAndScore
+      .apply(ParDo.named("ConvertToRow").of(new BuildRowFn()))
+      .apply(BigQueryIO.Write
+                .to(getTable(teamAndScore.getPipeline(),
+                    tableName))
+                .withSchema(getSchema())
+                .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED));
+  }
+
+  /** Utility to construct an output table reference. */
+  static TableReference getTable(Pipeline pipeline, String tableName) {
+    PipelineOptions options = pipeline.getOptions();
+    TableReference table = new TableReference();
+    table.setDatasetId(options.as(UserScore.Options.class).getDataset());
+    table.setProjectId(options.as(GcpOptions.class).getProject());
+    table.setTableId(tableName);
+    return table;
+  }
+}
diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/utils/WriteWindowedToBigQuery.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/utils/WriteWindowedToBigQuery.java
new file mode 100644
index 0000000000000..538af018f7cce
--- /dev/null
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/utils/WriteWindowedToBigQuery.java
@@ -0,0 +1,74 @@
+  /*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game.utils;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PDone;
+
+import java.util.Map;
+
+/**
+ * Generate, format, and write BigQuery table row information. Subclasses {@link WriteToBigQuery}
+ * to require windowing; so this subclass may be used for writes that require access to the
+ * context's window information.
+ */
+public class WriteWindowedToBigQuery<T>
+    extends WriteToBigQuery<T> {
+
+  public WriteWindowedToBigQuery(String tableName,
+      Map<String, FieldInfo<T>> fieldInfo) {
+    super(tableName, fieldInfo);
+  }
+
+  /** Convert each key/score pair into a BigQuery TableRow. */
+  protected class BuildRowFn extends DoFn<T, TableRow>
+      implements RequiresWindowAccess {
+
+    @Override
+    public void processElement(ProcessContext c) {
+
+      TableRow row = new TableRow();
+      for (Map.Entry<String, FieldInfo<T>> entry : fieldInfo.entrySet()) {
+          String key = entry.getKey();
+          FieldInfo<T> fcnInfo = entry.getValue();
+          SerializableFunction<DoFn<T, TableRow>.ProcessContext, Object> fcn =
+            fcnInfo.getFieldFn();
+          row.set(key, fcn.apply(c));
+        }
+      c.output(row);
+    }
+  }
+
+  @Override
+  public PDone apply(PCollection<T> teamAndScore) {
+    return teamAndScore
+      .apply(ParDo.named("ConvertToRow").of(new BuildRowFn()))
+      .apply(BigQueryIO.Write
+                .to(getTable(teamAndScore.getPipeline(),
+                    tableName))
+                .withSchema(getSchema())
+                .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED));
+  }
+
+}

From 848e6a037b3747a97737af857c50ba85e71eec0b Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Fri, 15 Jan 2016 10:26:29 -0800
Subject: [PATCH 1301/1541] Use .jar for staged directory packages

Some tools don't support .zip in the class path.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112261905
---
 .../java/com/google/cloud/dataflow/sdk/util/PackageUtil.java  | 4 ++--
 .../com/google/cloud/dataflow/sdk/util/PackageUtilTest.java   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
index 438de74574de9..8b2d56f13320e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
@@ -246,7 +246,7 @@ static List<DataflowPackage> stageClasspathElements(
    *
    * <p>Directory paths are removed. Example:
    * <pre>
-   * dir="a/b/c/d", contentHash="f000" => d-f000.zip
+   * dir="a/b/c/d", contentHash="f000" => d-f000.jar
    * file="a/b/c/d.txt", contentHash="f000" => d-f000.txt
    * file="a/b/c/d", contentHash="f000" => d-f000
    * </pre>
@@ -255,7 +255,7 @@ static String getUniqueContentName(File classpathElement, String contentHash) {
     String fileName = Files.getNameWithoutExtension(classpathElement.getAbsolutePath());
     String fileExtension = Files.getFileExtension(classpathElement.getAbsolutePath());
     if (classpathElement.isDirectory()) {
-      return fileName + "-" + contentHash + ".zip";
+      return fileName + "-" + contentHash + ".jar";
     } else if (fileExtension.isEmpty()) {
       return fileName + "-" + contentHash;
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
index 7b43d7602ceed..e051219b78ef0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/PackageUtilTest.java
@@ -175,7 +175,7 @@ public void testPackageNamingWithDirectory() throws Exception {
     File tmpDirectory = tmpFolder.newFolder("folder");
     DataflowPackage target = makePackageAttributes(tmpDirectory, null).getDataflowPackage();
 
-    assertThat(target.getName(), RegexMatcher.matches("folder-" + HASH_PATTERN + ".zip"));
+    assertThat(target.getName(), RegexMatcher.matches("folder-" + HASH_PATTERN + ".jar"));
     assertThat(target.getLocation(), equalTo(STAGING_PATH + '/' + target.getName()));
   }
 
@@ -310,7 +310,7 @@ public void testPackageUploadWithEmptyDirectorySucceeds() throws Exception {
     verify(mockGcsUtil).create(any(GcsPath.class), anyString());
     verifyNoMoreInteractions(mockGcsUtil);
 
-    assertThat(target.getName(), RegexMatcher.matches("folder-" + HASH_PATTERN + ".zip"));
+    assertThat(target.getName(), RegexMatcher.matches("folder-" + HASH_PATTERN + ".jar"));
     assertThat(target.getLocation(), equalTo(STAGING_PATH + '/' + target.getName()));
     assertNull(new ZipInputStream(Channels.newInputStream(pipe.source())).getNextEntry());
   }

From a904090943e5a5fd550e4053f7f0fae109145543 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Fri, 15 Jan 2016 14:05:00 -0800
Subject: [PATCH 1302/1541] DefaultProjectFactory: make it use new gcloud
 properties files

gcloud moved where it stores the credentials configured on the command line.
Since there is still no support in standard libraries to get the default
project, update DefaultProjectFactory to support the new location.

Note that users who have not upgraded gcloud are still supported.

----Release Notes----
The DataflowPipelineRunner will now prefer the default project configuration
produced by newer versions of the gcloud utility. Users with old gcloud clients
are still supported.
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112281533
---
 .../dataflow/sdk/options/GcpOptions.java      | 18 ++++++---
 .../dataflow/sdk/options/GcpOptionsTest.java  | 38 ++++++++++++++++---
 2 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
index 1ab544809b916..7b70f4c31a1dc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
@@ -186,19 +186,25 @@ public static class DefaultProjectFactory implements DefaultValueFactory<String>
     @Override
     public String create(PipelineOptions options) {
       try {
-        File configDir;
+        File configFile;
         if (getEnvironment().containsKey("CLOUDSDK_CONFIG")) {
-          configDir = new File(getEnvironment().get("CLOUDSDK_CONFIG"));
+          configFile = new File(getEnvironment().get("CLOUDSDK_CONFIG"), "properties");
         } else if (isWindows() && getEnvironment().containsKey("APPDATA")) {
-          configDir = new File(getEnvironment().get("APPDATA"), "gcloud");
+          configFile = new File(getEnvironment().get("APPDATA"), "gcloud/properties");
         } else {
-          configDir = new File(System.getProperty("user.home"), ".config/gcloud");
+          // New versions of gcloud use this file
+          configFile = new File(
+              System.getProperty("user.home"),
+              ".config/gcloud/configurations/config_default");
+          if (!configFile.exists()) {
+            // Old versions of gcloud use this file
+            configFile = new File(System.getProperty("user.home"), ".config/gcloud/properties");
+          }
         }
         String section = null;
         Pattern projectPattern = Pattern.compile("^project\\s*=\\s*(.*)$");
         Pattern sectionPattern = Pattern.compile("^\\[(.*)\\]$");
-        for (String line : Files.readLines(
-            new File(configDir, "properties"), StandardCharsets.UTF_8)) {
+        for (String line : Files.readLines(configFile, StandardCharsets.UTF_8)) {
           line = line.trim();
           if (line.isEmpty() || line.startsWith(";")) {
             continue;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GcpOptionsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GcpOptionsTest.java
index 1a11175680c4e..40024d082b9b6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GcpOptionsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/options/GcpOptionsTest.java
@@ -33,6 +33,7 @@
 import org.junit.runners.JUnit4;
 
 import java.io.File;
+import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.util.Map;
 
@@ -61,7 +62,7 @@ public void testGetProjectFromAppDataEnv() throws Exception {
   }
 
   @Test
-  public void testGetProjectFromUserHomeEnv() throws Exception {
+  public void testGetProjectFromUserHomeEnvOld() throws Exception {
     Map<String, String> environment = ImmutableMap.of();
     System.setProperty("user.home", tmpFolder.getRoot().getAbsolutePath());
     assertEquals("test-project",
@@ -70,6 +71,28 @@ public void testGetProjectFromUserHomeEnv() throws Exception {
             environment));
   }
 
+  @Test
+  public void testGetProjectFromUserHomeEnv() throws Exception {
+    Map<String, String> environment = ImmutableMap.of();
+    System.setProperty("user.home", tmpFolder.getRoot().getAbsolutePath());
+    assertEquals("test-project",
+        runGetProjectTest(
+            new File(tmpFolder.newFolder(".config", "gcloud", "configurations"), "config_default"),
+            environment));
+  }
+
+  @Test
+  public void testGetProjectFromUserHomeOldAndNewPrefersNew() throws Exception {
+    Map<String, String> environment = ImmutableMap.of();
+    System.setProperty("user.home", tmpFolder.getRoot().getAbsolutePath());
+    makePropertiesFileWithProject(new File(tmpFolder.newFolder(".config", "gcloud"), "properties"),
+        "old-project");
+    assertEquals("test-project",
+        runGetProjectTest(
+            new File(tmpFolder.newFolder(".config", "gcloud", "configurations"), "config_default"),
+            environment));
+  }
+
   @Test
   public void testUnableToGetDefaultProject() throws Exception {
     System.setProperty("user.home", tmpFolder.getRoot().getAbsolutePath());
@@ -78,15 +101,20 @@ public void testUnableToGetDefaultProject() throws Exception {
     assertNull(projectFactory.create(PipelineOptionsFactory.create()));
   }
 
-  private static String runGetProjectTest(File path, Map<String, String> environment)
-      throws Exception {
+  private static void makePropertiesFileWithProject(File path, String projectId)
+      throws IOException {
     String properties = String.format("[core]%n"
         + "account = test-account@google.com%n"
-        + "project = test-project%n"
+        + "project = %s%n"
         + "%n"
         + "[dataflow]%n"
-        + "magic = true%n");
+        + "magic = true%n", projectId);
     Files.write(properties, path, StandardCharsets.UTF_8);
+  }
+
+  private static String runGetProjectTest(File path, Map<String, String> environment)
+      throws Exception {
+    makePropertiesFileWithProject(path, "test-project");
     DefaultProjectFactory projectFactory = spy(new DefaultProjectFactory());
     when(projectFactory.getEnvironment()).thenReturn(environment);
     return projectFactory.create(PipelineOptionsFactory.create());

From ddbeb9741bf7a430c1e8648b5334c41cfabdf932 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Fri, 15 Jan 2016 15:09:16 -0800
Subject: [PATCH 1303/1541] Checkstyle: support disabling specific analyzers

From: http://stackoverflow.com/a/4023351/1715495

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112287922
---
 checkstyle.xml                                              | 6 +++---
 .../java/com/google/cloud/dataflow/sdk/io/AvroSource.java   | 4 ++--
 .../main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java | 4 ++--
 .../java/com/google/cloud/dataflow/sdk/io/XmlSource.java    | 4 ++--
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/checkstyle.xml b/checkstyle.xml
index 495260e3165b0..5d7c563dfb14c 100644
--- a/checkstyle.xml
+++ b/checkstyle.xml
@@ -78,9 +78,9 @@ page at http://checkstyle.sourceforge.net/config.html -->
 
   <!-- Allow use of comment to suppress javadocstyle -->
   <module name="SuppressionCommentFilter">
-    <property name="offCommentFormat" value="JAVADOCSTYLE OFF"/>
-    <property name="onCommentFormat" value="JAVADOCSTYLE ON"/>
-    <property name="checkFormat" value="JavadocStyle"/>
+    <property name="offCommentFormat" value="CHECKSTYLE.OFF\: ([\w\|]+)"/>
+    <property name="onCommentFormat" value="CHECKSTYLE.ON\: ([\w\|]+)"/>
+    <property name="checkFormat" value="$1"/>
   </module>
 
   <!-- All Java AST specific tests live under TreeWalker module. -->
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
index 6d9279ad82929..da8458c2f78ba 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
@@ -50,7 +50,7 @@
 import java.util.zip.Inflater;
 import java.util.zip.InflaterInputStream;
 
-// JAVADOCSTYLE OFF
+// CHECKSTYLE.OFF: JavadocStyle
 /**
  * A {@link FileBasedSource} for reading Avro files.
  *
@@ -114,7 +114,7 @@
  *
  * @param <T> The type of records to be read from the source.
  */
-//JAVADOCSTYLE ON
+// CHECKSTYLE.ON: JavadocStyle
 @Experimental(Experimental.Kind.SOURCE_SINK)
 public class AvroSource<T> extends BlockBasedSource<T> {
   // Default minimum bundle size (chosen as two default-size Avro blocks to attempt to
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
index 455d180479b6e..b728c0a792f1d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
@@ -32,7 +32,7 @@
 import javax.xml.bind.JAXBException;
 import javax.xml.bind.Marshaller;
 
-// JAVADOCSTYLE OFF
+// CHECKSTYLE.OFF: JavadocStyle
 /**
  * A {@link Sink} that outputs records as XML-formatted elements. Writes a {@link PCollection} of
  * records from JAXB-annotated classes to a single file location.
@@ -127,7 +127,7 @@
  * </words>
  * }</pre>
  */
-// JAVADOCSTYLE ON
+// CHECKSTYLE.ON: JavadocStyle
 @SuppressWarnings("checkstyle:javadocstyle")
 public class XmlSink {
   protected static final String XML_EXTENSION = "xml";
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
index d664732330f04..668361b7d953d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
@@ -46,7 +46,7 @@
 import javax.xml.stream.XMLStreamException;
 import javax.xml.stream.XMLStreamReader;
 
-// JAVADOCSTYLE OFF
+// CHECKSTYLE.OFF: JavadocStyle
 /**
  * A source that can be used to read XML files. This source reads one or more
  * XML files and creates a {@code PCollection} of a given type. An Dataflow read transform can be
@@ -110,7 +110,7 @@
  * @param <T> Type of the objects that represent the records of the XML file. The
  *        {@code PCollection} generated by this source will be of this type.
  */
-// JAVADOCSTYLE ON
+// CHECKSTYLE.ON: JavadocStyle
 public class XmlSource<T> extends FileBasedSource<T> {
 
   private static final String XML_VERSION = "1.1";

From 2fc26c5d41d01ff476f293d388cc308f47ea7c7f Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Fri, 15 Jan 2016 23:24:51 -0800
Subject: [PATCH 1304/1541] Add README.md for the "game" example series

Fix Javadoc issue in HourlyTeamScore pipeline.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112311676
---
 .../complete/game/HourlyTeamScore.java        |   4 +-
 .../dataflow/examples/complete/game/README.md | 119 ++++++++++++++++++
 2 files changed, 121 insertions(+), 2 deletions(-)
 create mode 100644 examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/README.md

diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
index 7a98b58f4e538..079a29eed1a83 100644
--- a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
@@ -43,8 +43,8 @@
 
 /**
  * This class is the second in a series of four pipelines that tell a story in a 'gaming'
- * domain, following {@link UserScore}. In addition to the concepts introduced in {@ UserScore}, new
- * concepts include: windowing and element timestamps; use of {@code Filter.byPredicate()}.
+ * domain, following {@link UserScore}. In addition to the concepts introduced in {@link UserScore},
+ * new concepts include: windowing and element timestamps; use of {@code Filter.byPredicate()}.
  *
  * <p> This pipeline processes data collected from gaming events in batch, building on {@link
  * UserScore} but using fixed windows. It calculates the sum of scores per team, for each window,
diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/README.md b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/README.md
new file mode 100644
index 0000000000000..4cad16d5f59ff
--- /dev/null
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/README.md
@@ -0,0 +1,119 @@
+
+# 'Gaming' examples
+
+
+This directory holds a series of example Dataflow pipelines in a simple 'mobile
+gaming' domain. They all require Java 8.  Each pipeline successively introduces
+new concepts, and gives some examples of using Java 8 syntax in constructing
+Dataflow pipelines. Other than usage of Java 8 lambda expressions, the concepts
+that are used apply equally well in Java 7.
+
+In the gaming scenario, many users play, as members of different teams, over
+the course of a day, and their actions are logged for processing. Some of the
+logged game events may be late-arriving, if users play on mobile devices and go
+transiently offline for a period.
+
+The scenario includes not only "regular" users, but "robot users", which have a
+higher click rate than the regular users, and may move from team to team.
+
+The first two pipelines in the series use pre-generated batch data samples. The
+second two pipelines read from a [PubSub](https://cloud.google.com/pubsub/)
+topic input.  For these examples, you will also need to run the
+`injector.Injector` program, which generates and publishes the gaming data to
+PubSub. The javadocs for each pipeline have more detailed information on how to
+run that pipeline.
+
+All of these pipelines write their results to BigQuery table(s).
+
+
+## The pipelines in the 'gaming' series
+
+### UserScore
+
+The first pipeline in the series is `UserScore`. This pipeline does batch
+processing of data collected from gaming events. It calculates the sum of
+scores per user, over an entire batch of gaming data (collected, say, for each
+day). The batch processing will not include any late data that arrives after
+the day's cutoff point.
+
+### HourlyTeamScore
+
+The next pipeline in the series is `HourlyTeamScore`. This pipeline also
+processes data collected from gaming events in batch. It builds on `UserScore`,
+but uses [fixed windows](https://cloud.google.com/dataflow/model/windowing), by
+default an hour in duration. It calculates the sum of scores per team, for each
+window, optionally allowing specification of two timestamps before and after
+which data is filtered out. This allows a model where late data collected after
+the intended analysis window can be included in the analysis, and any late-
+arriving data prior to the beginning of the analysis window can be removed as
+well.
+
+By using windowing and adding element timestamps, we can do finer-grained
+analysis than with the `UserScore` pipeline — we're now tracking scores for
+each hour rather than over the course of a whole day. However, our batch
+processing is high-latency, in that we don't get results from plays at the
+beginning of the batch's time period until the complete batch is processed.
+
+### LeaderBoard
+
+The third pipeline in the series is `LeaderBoard`. This pipeline processes an
+unbounded stream of 'game events' from a PubSub topic. The calculation of the
+team scores uses fixed windowing based on event time (the time of the game play
+event), not processing time (the time that an event is processed by the
+pipeline). The pipeline calculates the sum of scores per team, for each window.
+By default, the team scores are calculated using one-hour windows.
+
+In contrast — to demo another windowing option — the user scores are calculated
+using a global window, which periodically (every ten minutes) emits cumulative
+user score sums.
+
+In contrast to the previous pipelines in the series, which used static, finite
+input data, here we're using an unbounded data source, which lets us provide
+_speculative_ results, and allows handling of late data, at much lower latency.
+E.g., we could use the early/speculative results to keep a 'leaderboard'
+updated in near-realtime. Our handling of late data lets us generate correct
+results, e.g. for 'team prizes'. We're now outputing window results as they're
+calculated, giving us much lower latency than with the previous batch examples.
+
+### GameStats
+
+The fourth pipeline in the series is `GameStats`. This pipeline builds
+on the `LeaderBoard` functionality — supporting output of speculative and late
+data — and adds some "business intelligence" analysis: identifying abuse
+detection. The pipeline derives the Mean user score sum for a window, and uses
+that information to identify likely spammers/robots. (The injector is designed
+so that the "robots" have a higher click rate than the "real" users). The robot
+users are then filtered out when calculating the team scores.
+
+Additionally, user sessions are tracked: that is, we find bursts of user
+activity using session windows. Then, the mean session duration information is
+recorded in the context of subsequent fixed windowing. (This could be used to
+tell us what games are giving us greater user retention).
+
+### Running the PubSub Injector
+
+The `LeaderBoard` and `GameStats` example pipelines read unbounded data
+from a PubSub topic.
+
+Use the `injector.Injector` program to generate this data and publish to a
+PubSub topic. See the `Injector`javadocs for more information on how to run the
+injector. Set up the injector before you start one of these pipelines. Then,
+when you start the pipeline, pass as an argument the name of that PubSub topic.
+See the pipeline javadocs for the details.
+
+## Viewing the results in BigQuery
+
+All of the pipelines write their results to BigQuery.  `UserScore` and
+`HourlyTeamScore` each write one table, and `LeaderBoard` and
+`GameStats` each write two. The pipelines have default table names that
+you can override when you start up the pipeline if those tables already exist.
+
+Depending on the windowing intervals defined in a given pipeline, you may have
+to wait for a while (more than an hour) before you start to see results written
+to the BigQuery tables.
+
+
+
+
+
+

From cabbfeae5be24685ff0ca63146db8cae699c8e91 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Sun, 17 Jan 2016 19:43:20 -0800
Subject: [PATCH 1305/1541] Stops holding initializationStateLock while opening
 the reader

initializationStateLock should be held for short, bounded amounts of time,
because it is acquired on the dynamic work rebalancing code path
(requestDynamicSplit) which must be effectively non-blocking.
NativeReader.iterator() can do I/O and thus can take unbounded amount
of time, so it shouldn't be done under the lock.
----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112375806
---
 .../sdk/util/common/worker/Operation.java     | 14 ++++++++++---
 .../sdk/util/common/worker/ReadOperation.java | 21 +++++++++++++------
 2 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
index 744ffd3633e97..df7450cc4513c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
@@ -60,12 +60,20 @@ public enum InitializationState {
     FINISHED
   }
 
-  /** The initialization state of this Operation. */
+  /**
+   * The initialization state of this Operation.
+   *
+   * <p>Written from one thread, but can be read by concurrent threads.
+   */
   public InitializationState initializationState =
       InitializationState.UNSTARTED;
 
-  /** The lock protecting the initialization state. InitializationState is only
-   * written from one thread, but can be read by concurrent threads.
+  /**
+   * The lock protecting the initialization state.
+   *
+   * <p>Subclasses can use this lock to protect their own state.
+   * However, this lock should be held only for short, bounded
+   * amounts of time.
    */
   protected final Object initializationStateLock = new Object();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index 3244acd4d75dd..4cf434de10d3f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -70,12 +70,14 @@ public class ReadOperation extends Operation {
 
   /**
    * The Reader's iterator this operation reads from, created by start().
-   * Guarded by sourceIteratorLock.
+   *
+   * Guarded by {@link Operation#initializationStateLock}.
    */
   volatile NativeReader.NativeReaderIterator<?> readerIterator = null;
 
   /**
-   * A cache of sourceIterator.getProgress() updated inside the read loop at a bounded rate.
+   * A cache of {@link #readerIterator}'s progress updated inside the read loop
+   * at a bounded rate.
    *
    * <p>Necessary so that ReadOperation.getProgress() can return immediately, rather than
    * potentially wait for a read to complete (which can take an unbounded time, delay a worker
@@ -84,13 +86,15 @@ public class ReadOperation extends Operation {
   private AtomicReference<NativeReader.Progress> progress = new AtomicReference<>();
 
   /**
-   * On every iteration of the read loop, "progress" is fetched from sourceIterator if requested.
+   * On every iteration of the read loop, "progress" is fetched from
+   * {@link #readerIterator} if requested.
    */
   private long progressUpdatePeriodMs = DEFAULT_PROGRESS_UPDATE_PERIOD_MS;
 
   /**
    * Signals whether the next iteration of the read loop should update the progress.
-   * Set to true every progressUpdatePeriodMs.
+   *
+   * <p>Set to true every progressUpdatePeriodMs.
    */
   private AtomicBoolean isProgressUpdateRequested = new AtomicBoolean(true);
 
@@ -180,8 +184,13 @@ protected void runReadLoop() throws Exception {
 
     try (StateSampler.ScopedState process = stateSampler.scopedState(processState)) {
       assert process != null;
-      synchronized (initializationStateLock) {
-        readerIterator = reader.iterator();
+      {
+        // Call reader.iterator() outside the lock, because it can take an
+        // unbounded amount of time.
+        NativeReader.NativeReaderIterator<?> iterator = reader.iterator();
+        synchronized (initializationStateLock) {
+          readerIterator = iterator;
+        }
       }
 
       // TODO: Consider using the ExecutorService from PipelineOptions instead.

From b2c22146402db7df4a5503454bd7e5bf72f30c3a Mon Sep 17 00:00:00 2001
From: andreich <andreich@google.com>
Date: Mon, 18 Jan 2016 09:37:39 -0800
Subject: [PATCH 1306/1541] Upgrade JaCoCo to 0.7.5

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112415033
---
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index fcb79d435ed6f..74bbd5678541d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -180,7 +180,7 @@
         <plugin>
           <groupId>org.jacoco</groupId>
           <artifactId>jacoco-maven-plugin</artifactId>
-          <version>0.7.1.201405082137</version>
+          <version>0.7.5.201505241946</version>
           <executions>
             <execution>
               <goals>
@@ -272,7 +272,7 @@
                   <pluginExecutionFilter>
                     <groupId>org.jacoco</groupId>
                     <artifactId>jacoco-maven-plugin</artifactId>
-                    <versionRange>[0.7.1,)</versionRange>
+                    <versionRange>[0.7.5,)</versionRange>
                     <goals>
                       <goal>report</goal>
                       <goal>prepare-agent</goal>

From e9bb2a2794fc31a9aa456465616b5e8fd06b8f6b Mon Sep 17 00:00:00 2001
From: amyu <amyu@google.com>
Date: Tue, 19 Jan 2016 06:02:11 -0800
Subject: [PATCH 1307/1541] Fix typo: wrong table column name

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112466110
---
 .../cloud/dataflow/examples/complete/game/HourlyTeamScore.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
index 079a29eed1a83..481b9df35b1d0 100644
--- a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
@@ -124,7 +124,7 @@ static interface Options extends UserScore.Options {
       configureWindowedTableWrite() {
     Map<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>> tableConfig =
         new HashMap<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>>();
-    tableConfig.put("user",
+    tableConfig.put("team",
         new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("STRING",
             c -> c.element().getKey()));
     tableConfig.put("total_score",

From 608fd213eabb3d76af882f052d587b5c0cb289b5 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 19 Jan 2016 09:25:27 -0800
Subject: [PATCH 1308/1541] Fix typo in OutputTimeFn

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112480742
---
 .../cloud/dataflow/sdk/transforms/windowing/OutputTimeFn.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFn.java
index a72a9cb1eb8b5..785bc5ab5aa0a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFn.java
@@ -111,7 +111,7 @@ private OutputTimeFn() { }
    *
    * <p>For many other {@link OutputTimeFn} implementations, such as taking the earliest or latest
    * timestamp, this will be the same as {@link #combine combine()}. Defaults for this
-   * case are provided by {@link Default}.
+   * case are provided by {@link Defaults}.
    */
   public abstract Instant merge(W intoWindow, Iterable<? extends Instant> mergingTimestamps);
 

From 2713af13c2e7830681e5a5d3f51b567c3f03f32b Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 19 Jan 2016 10:25:15 -0800
Subject: [PATCH 1309/1541] Implement typeFromId(DatabindContext,String) within
 CoderUtils

This resolves the user issue on SO:
http://stackoverflow.com/questions/34780459/runtimeexception-from-cloud-dataflow-related-to-serializing-coder
Since Jackson 2.3, TypeIdResolvers were meant to implement
this method since typeFromId(String) became deprecated.
This newer versions of Jackson enforce this.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112487029
---
 .../java/com/google/cloud/dataflow/sdk/util/CoderUtils.java | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
index 84098a62d76fa..771bf09eb346b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
@@ -32,6 +32,7 @@
 import com.fasterxml.jackson.annotation.JsonTypeInfo;
 import com.fasterxml.jackson.annotation.JsonTypeInfo.As;
 import com.fasterxml.jackson.annotation.JsonTypeInfo.Id;
+import com.fasterxml.jackson.databind.DatabindContext;
 import com.fasterxml.jackson.databind.JavaType;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.annotation.JsonTypeIdResolver;
@@ -248,6 +249,11 @@ public Resolver() {
       @Deprecated
       @Override
       public JavaType typeFromId(String id) {
+        return typeFromId(null, id);
+      }
+
+      @Override
+      public JavaType typeFromId(DatabindContext context, String id) {
         Class<?> clazz = getClassForId(id);
         if (clazz == KvCoder.class) {
           clazz = KvCoderBase.class;

From 02acad47eef27d06fef99da8df58da6fa2ad8368 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Thu, 21 Jan 2016 14:21:32 -0800
Subject: [PATCH 1310/1541] Fixes a bug in custom unbounded readers

Custom unbounded readers are read in bundles of at most
10k elements or 10 seconds. A recent change accidentally removed
the 10k element limit. This change reintroduces it and
adds a test.

The previous test also was passing vacuously because
the iteration limit was incorrect (it would always
have only one iteration).
----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112723469
---
 .../sdk/runners/dataflow/CustomSources.java   | 19 +++++++++++--------
 .../runners/dataflow/CustomSourcesTest.java   | 13 ++++++++++++-
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
index 5dbeaa05dd6c2..f159f66a85a2c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
@@ -546,14 +546,16 @@ public double getRemainingParallelism() {
     }
   }
 
+  // Commit at least once every 10 seconds or 10k records.  This keeps the watermark advancing
+  // smoothly, and ensures that not too much work will have to be reprocessed in the event of
+  // a crash.
+  @VisibleForTesting
+  static final int MAX_UNBOUNDED_BUNDLE_SIZE = 10000;
+  @VisibleForTesting
+  static final Duration MAX_UNBOUNDED_BUNDLE_READ_TIME = Duration.standardSeconds(10);
+
   private static class UnboundedReaderIterator<T>
       extends NativeReader.NativeReaderIterator<WindowedValue<ValueWithRecordId<T>>> {
-    // Commit at least once every 10 seconds or 10k records.  This keeps the watermark advancing
-    // smoothly, and ensures that not too much work will have to be reprocessed in the event of
-    // a crash.
-    private static final int MAX_BUNDLE_SIZE = 10000;
-    private static final Duration MAX_BUNDLE_READ_TIME = Duration.standardSeconds(10);
-
     private final UnboundedSource.UnboundedReader<T> reader;
     private final boolean started;
     private final Instant endTime;
@@ -561,7 +563,7 @@ private static class UnboundedReaderIterator<T>
 
     private UnboundedReaderIterator(UnboundedSource.UnboundedReader<T> reader, boolean started) {
       this.reader = reader;
-      this.endTime = Instant.now().plus(MAX_BUNDLE_READ_TIME);
+      this.endTime = Instant.now().plus(MAX_UNBOUNDED_BUNDLE_READ_TIME);
       this.elemsRead = 0;
       this.started = started;
     }
@@ -588,7 +590,7 @@ public boolean start() throws IOException {
 
     @Override
     public boolean advance() throws IOException {
-      if (elemsRead >= MAX_BUNDLE_SIZE
+      if (elemsRead >= MAX_UNBOUNDED_BUNDLE_SIZE
           || Instant.now().isAfter(endTime)) {
         return false;
       }
@@ -598,6 +600,7 @@ public boolean advance() throws IOException {
       while (true) {
         try {
           if (reader.advance()) {
+            elemsRead++;
             return true;
           }
         } catch (Exception e) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
index 15256df634cd4..b4fbb3b24d829 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
@@ -32,6 +32,7 @@
 import static org.hamcrest.Matchers.allOf;
 import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.lessThanOrEqualTo;
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -632,7 +633,8 @@ public void testReadUnboundedReader() throws Exception {
     options.setNumWorkers(5);
 
     ByteString state = ByteString.EMPTY;
-    for (int i = 0; i < 100; /* Incremented in inner loop */) {
+    for (int i = 0; i < 10 * CustomSources.MAX_UNBOUNDED_BUNDLE_SIZE;
+         /* Incremented in inner loop */) {
       WindowedValue<ValueWithRecordId<KV<Integer, Integer>>> value;
 
       // Initialize streaming context with state from previous iteration.
@@ -664,6 +666,8 @@ public void testReadUnboundedReader() throws Exception {
           iterator = reader.iterator();
 
       // Verify data.
+      Instant beforeReading = Instant.now();
+      int numReadOnThisIteration = 0;
       for (boolean more = iterator.start(); more; more = iterator.advance()) {
         value = iterator.getCurrent();
         assertEquals(KV.of(0, i), value.getValue().getValue());
@@ -673,7 +677,14 @@ public void testReadUnboundedReader() throws Exception {
         assertThat(value.getWindows(), contains((BoundedWindow) GlobalWindow.INSTANCE));
         assertEquals(i, value.getTimestamp().getMillis());
         i++;
+        numReadOnThisIteration++;
       }
+      Instant afterReading = Instant.now();
+      assertThat(
+          new Duration(beforeReading, afterReading).getStandardSeconds(),
+          lessThanOrEqualTo(CustomSources.MAX_UNBOUNDED_BUNDLE_READ_TIME.getStandardSeconds() + 1));
+      assertThat(
+          numReadOnThisIteration, lessThanOrEqualTo(CustomSources.MAX_UNBOUNDED_BUNDLE_SIZE));
 
       // Extract and verify state modifications.
       context.flushState();

From 4114f629ef939dc265f93a55cb3edc5e05018462 Mon Sep 17 00:00:00 2001
From: Magnus Runesson <M.Runesson@gmail.com>
Date: Sun, 24 Jan 2016 16:33:40 +0100
Subject: [PATCH 1311/1541] Adapt to be able to upload to maven-central

---
 contrib/join-library/README.md                |  7 +++
 contrib/join-library/pom.xml                  | 54 ++++++++++++++++---
 .../dataflow/contrib/joinlibrary/Join.java    |  3 +-
 .../contrib/joinlibrary/InnerJoinTest.java    |  6 ++-
 4 files changed, 60 insertions(+), 10 deletions(-)

diff --git a/contrib/join-library/README.md b/contrib/join-library/README.md
index a6e0b73d6c2e2..8cf55108d059d 100644
--- a/contrib/join-library/README.md
+++ b/contrib/join-library/README.md
@@ -22,4 +22,11 @@ Example how to use join-library:
     PCollection<KV<String, KV<String, Long>>> joinedPcollection =
       Join.innerJoin(leftPcollection, rightPcollection);
 
+Join-library can be found on maven-central:
+    <dependency>
+      <groupId>org.linuxalert.dataflow</groupId>
+      <artifactId>google-cloud-dataflow-java-contrib-joinlibrary</artifactId>
+      <version>0.0.3</version>
+    </dependency>
+
 Questions or comments: `M.Runesson [at] gmail [dot] com`
diff --git a/contrib/join-library/pom.xml b/contrib/join-library/pom.xml
index 5e843a20c9f74..352f198ff8fb3 100644
--- a/contrib/join-library/pom.xml
+++ b/contrib/join-library/pom.xml
@@ -5,11 +5,19 @@
 
   <modelVersion>4.0.0</modelVersion>
 
-  <groupId>com.google.cloud.dataflow</groupId>
+  <groupId>org.linuxalert.dataflow</groupId>
   <artifactId>google-cloud-dataflow-java-contrib-joinlibrary</artifactId>
-  <name>Join library</name>
+  <name>Dataflow Join Library</name>
   <description>Library with generic join functions for Dataflow.</description>
-  <version>0.0.2-SNAPSHOT</version>
+  <url>https://github.com/GoogleCloudPlatform/DataflowJavaSDK/tree/master/contrib/join-library</url>
+  <contributors>
+    <contributor>
+      <name>Magnus Runesson</name>
+      <email>M (dot) Runesson (at) gmail (dot) com</email>
+      <url>https://github.com/mrunesson</url>
+    </contributor>
+  </contributors>
+  <version>0.0.3</version>
   <packaging>jar</packaging>
 
   <licenses>
@@ -20,9 +28,15 @@
     </license>
   </licenses>
 
+  <scm>
+    <connection>scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git</connection>
+    <developerConnection>scm:git:git@github.com:GoogleCloudPlatform/DataflowJavaSDK.git</developerConnection>
+    <url>https://github.com/GoogleCloudPlatform/DataflowJavaSDK/tree/master/contrib/join-library</url>
+  </scm>
+
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-    <google-cloud-dataflow-version>0.3.150227</google-cloud-dataflow-version>
+    <google-cloud-dataflow-version>1.3.0</google-cloud-dataflow-version>
   </properties>
 
   <build>
@@ -40,7 +54,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-checkstyle-plugin</artifactId>
-        <version>2.12</version>
+        <version>2.17</version>
         <configuration>
           <configLocation>../../checkstyle.xml</configLocation>
           <consoleOutput>true</consoleOutput>
@@ -56,6 +70,34 @@
           </execution>
         </executions>
       </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-source-plugin</artifactId>
+        <version>2.4</version>
+        <executions>
+          <execution>
+            <id>attach-sources</id>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-javadoc-plugin</artifactId>
+        <version>2.10.3</version>
+        <executions>
+          <execution>
+            <id>attach-javadocs</id>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 
@@ -69,7 +111,7 @@
     <dependency>
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
-      <version>18.0</version>
+      <version>19.0</version>
     </dependency>
 
     <!-- Dependency for tests -->
diff --git a/contrib/join-library/src/main/java/com/google/cloud/dataflow/contrib/joinlibrary/Join.java b/contrib/join-library/src/main/java/com/google/cloud/dataflow/contrib/joinlibrary/Join.java
index ad409c7090b1b..81d8a7f9af759 100644
--- a/contrib/join-library/src/main/java/com/google/cloud/dataflow/contrib/joinlibrary/Join.java
+++ b/contrib/join-library/src/main/java/com/google/cloud/dataflow/contrib/joinlibrary/Join.java
@@ -44,8 +44,7 @@ public class Join {
    *         KV where Key is of type V1 and Value is type V2.
    */
   public static <K, V1, V2> PCollection<KV<K, KV<V1, V2>>> innerJoin(
-    final PCollection<KV<K, V1>> leftCollection, final PCollection<KV<K, V2>> rightCollection)
-  {
+    final PCollection<KV<K, V1>> leftCollection, final PCollection<KV<K, V2>> rightCollection) {
     Preconditions.checkNotNull(leftCollection);
     Preconditions.checkNotNull(rightCollection);
 
diff --git a/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/InnerJoinTest.java b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/InnerJoinTest.java
index c077ea5603d60..839c4519508e9 100644
--- a/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/InnerJoinTest.java
+++ b/contrib/join-library/src/test/java/com/google/cloud/dataflow/contrib/joinlibrary/InnerJoinTest.java
@@ -53,11 +53,13 @@ public void setup() {
   public void testJoinOneToOneMapping() {
     leftListOfKv.add(KV.of("Key1", 5L));
     leftListOfKv.add(KV.of("Key2", 4L));
-    PCollection<KV<String, Long>> leftCollection = p.apply(Create.of(leftListOfKv));
+    PCollection<KV<String, Long>> leftCollection =
+        p.apply("CreateLeft", Create.of(leftListOfKv));
 
     listRightOfKv.add(KV.of("Key1", "foo"));
     listRightOfKv.add(KV.of("Key2", "bar"));
-    PCollection<KV<String, String>> rightCollection = p.apply(Create.of(listRightOfKv));
+    PCollection<KV<String, String>> rightCollection =
+        p.apply("CreateRight", Create.of(listRightOfKv));
 
     PCollection<KV<String, KV<Long, String>>> output = Join.innerJoin(
       leftCollection, rightCollection);

From 1c224b6f6eb020daecb47b37b0ca3e49c485c6ae Mon Sep 17 00:00:00 2001
From: Magnus Runesson <M.Runesson@gmail.com>
Date: Mon, 25 Jan 2016 20:31:46 +0100
Subject: [PATCH 1312/1541] Aditional info to simplify release.

---
 contrib/join-library/pom.xml | 52 ++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/contrib/join-library/pom.xml b/contrib/join-library/pom.xml
index 352f198ff8fb3..eb12bd00fa47b 100644
--- a/contrib/join-library/pom.xml
+++ b/contrib/join-library/pom.xml
@@ -10,6 +10,20 @@
   <name>Dataflow Join Library</name>
   <description>Library with generic join functions for Dataflow.</description>
   <url>https://github.com/GoogleCloudPlatform/DataflowJavaSDK/tree/master/contrib/join-library</url>
+  <developers>
+    <developer>
+      <id>magru</id>
+      <name>Magnus Runesson</name>
+      <email>M (dot) Runesson (at) gmail (dot) com</email>
+      <organization>Linuxalert</organization>
+      <organizationUrl>http://www.linuxalert.org/</organizationUrl>
+      <roles>
+        <role>Project-Administrator</role>
+        <role>Developer</role>
+      </roles>
+      <timezone>+1</timezone>
+    </developer>
+  </developers>
   <contributors>
     <contributor>
       <name>Magnus Runesson</name>
@@ -98,9 +112,47 @@
           </execution>
         </executions>
       </plugin>
+
+      <plugin>
+        <groupId>org.sonatype.plugins</groupId>
+        <artifactId>nexus-staging-maven-plugin</artifactId>
+        <version>1.6.3</version>
+        <extensions>true</extensions>
+        <configuration>
+          <serverId>ossrh</serverId>
+          <nexusUrl>https://oss.sonatype.org/</nexusUrl>
+          <autoReleaseAfterClose>true</autoReleaseAfterClose>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-gpg-plugin</artifactId>
+        <version>1.5</version>
+        <executions>
+          <execution>
+            <id>sign-artifacts</id>
+            <phase>verify</phase>
+            <goals>
+              <goal>sign</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 
+  <distributionManagement>
+    <snapshotRepository>
+      <id>ossrh</id>
+      <url>https://oss.sonatype.org/content/repositories/snapshots</url>
+    </snapshotRepository>
+    <repository>
+      <id>ossrh</id>
+      <url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
+    </repository>
+  </distributionManagement>
+
   <dependencies>
     <dependency>
       <groupId>com.google.cloud.dataflow</groupId>

From 7fbb20600d9dfbc3b493879fe4207ec0107b30eb Mon Sep 17 00:00:00 2001
From: Kris Hildrum <hildrum@google.com>
Date: Wed, 27 Jan 2016 09:37:56 -0800
Subject: [PATCH 1313/1541] fix typo

---
 .../com/google/cloud/dataflow/sdk/util/state/ValueState.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ValueState.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ValueState.java
index 4bed0f88fd1f8..d72c67e22cac8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ValueState.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ValueState.java
@@ -19,7 +19,7 @@
 import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
 
 /**
- * sTate holding a single value.
+ * State holding a single value.
  *
  * @param <T> The type of values being stored.
  */

From 575b7334bbcd32adf7fcda7aeafb5149a9f8512d Mon Sep 17 00:00:00 2001
From: Magnus Runesson <M.Runesson@gmail.com>
Date: Sat, 30 Jan 2016 16:01:20 +0100
Subject: [PATCH 1314/1541] Fix review comments.

---
 contrib/join-library/README.md |  1 +
 contrib/join-library/pom.xml   | 14 +++++++-------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/contrib/join-library/README.md b/contrib/join-library/README.md
index 8cf55108d059d..8e2a011c34021 100644
--- a/contrib/join-library/README.md
+++ b/contrib/join-library/README.md
@@ -23,6 +23,7 @@ Example how to use join-library:
       Join.innerJoin(leftPcollection, rightPcollection);
 
 Join-library can be found on maven-central:
+
     <dependency>
       <groupId>org.linuxalert.dataflow</groupId>
       <artifactId>google-cloud-dataflow-java-contrib-joinlibrary</artifactId>
diff --git a/contrib/join-library/pom.xml b/contrib/join-library/pom.xml
index eb12bd00fa47b..df39545dacb71 100644
--- a/contrib/join-library/pom.xml
+++ b/contrib/join-library/pom.xml
@@ -7,18 +7,18 @@
 
   <groupId>org.linuxalert.dataflow</groupId>
   <artifactId>google-cloud-dataflow-java-contrib-joinlibrary</artifactId>
-  <name>Dataflow Join Library</name>
+  <name>Google Cloud Dataflow Join Library</name>
   <description>Library with generic join functions for Dataflow.</description>
   <url>https://github.com/GoogleCloudPlatform/DataflowJavaSDK/tree/master/contrib/join-library</url>
   <developers>
     <developer>
-      <id>magru</id>
+      <organization>Google Inc.</organization>
+      <organizationUrl>http://www.google.com</organizationUrl>
+    </developer>
+    <developer>
       <name>Magnus Runesson</name>
       <email>M (dot) Runesson (at) gmail (dot) com</email>
-      <organization>Linuxalert</organization>
-      <organizationUrl>http://www.linuxalert.org/</organizationUrl>
       <roles>
-        <role>Project-Administrator</role>
         <role>Developer</role>
       </roles>
       <timezone>+1</timezone>
@@ -31,7 +31,7 @@
       <url>https://github.com/mrunesson</url>
     </contributor>
   </contributors>
-  <version>0.0.3</version>
+  <version>0.0.4</version>
   <packaging>jar</packaging>
 
   <licenses>
@@ -50,7 +50,7 @@
 
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-    <google-cloud-dataflow-version>1.3.0</google-cloud-dataflow-version>
+    <google-cloud-dataflow-version>[1.0.0, 2.0.0)</google-cloud-dataflow-version>
   </properties>
 
   <build>

From acb83b71f3a6c062a780c5cfba9fd5415468d540 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 25 Jan 2016 22:10:00 -0800
Subject: [PATCH 1315/1541] Version management

Updating version numbers from 1.4.0-SNAPSHOT to 1.5.0-SNAPSHOT

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113022038
---
 examples/pom.xml                  | 2 +-
 maven-archetypes/examples/pom.xml | 2 +-
 maven-archetypes/starter/pom.xml  | 2 +-
 pom.xml                           | 2 +-
 sdk/pom.xml                       | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index 60ada167feeb7..2b8c3d83b8bf0 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>com.google.cloud.dataflow</groupId>
     <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
-    <version>1.4.0-SNAPSHOT</version>
+    <version>1.5.0-SNAPSHOT</version>
   </parent>
 
   <groupId>com.google.cloud.dataflow</groupId>
diff --git a/maven-archetypes/examples/pom.xml b/maven-archetypes/examples/pom.xml
index 42a553aa02575..6cb1852562a53 100644
--- a/maven-archetypes/examples/pom.xml
+++ b/maven-archetypes/examples/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>com.google.cloud.dataflow</groupId>
     <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
-    <version>1.4.0-SNAPSHOT</version>
+    <version>1.5.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/maven-archetypes/starter/pom.xml b/maven-archetypes/starter/pom.xml
index fee9e33947633..b5b32514dcb2a 100644
--- a/maven-archetypes/starter/pom.xml
+++ b/maven-archetypes/starter/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>com.google.cloud.dataflow</groupId>
     <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
-    <version>1.4.0-SNAPSHOT</version>
+    <version>1.5.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 74bbd5678541d..3c49657d913bc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -35,7 +35,7 @@
   <url>http://cloud.google.com/dataflow</url>
   <inceptionYear>2013</inceptionYear>
 
-  <version>1.4.0-SNAPSHOT</version>
+  <version>1.5.0-SNAPSHOT</version>
 
   <licenses>
     <license>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 3a083ade1e75a..e182ccabe6d3c 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>com.google.cloud.dataflow</groupId>
     <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
-    <version>1.4.0-SNAPSHOT</version>
+    <version>1.5.0-SNAPSHOT</version>
   </parent>
 
   <groupId>com.google.cloud.dataflow</groupId>

From 96ec971835a0eb8773fbed3f3cfc18a6fe1dcc10 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 19 Jan 2016 11:41:15 -0800
Subject: [PATCH 1316/1541] BigQueryTableRowIterator: elide columns with null
 values

As in 6a11a72f, this makes BigQueryIO.Read work in the
DirectPipelineRunner as it does in the DataflowPipelineRunner.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112496161
---
 .../cloud/dataflow/sdk/util/BigQueryTableRowIterator.java | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
index e0fac614ea622..0fc21b93a78e9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
@@ -290,6 +290,14 @@ private TableRow getTypedTableRow(List<TableFieldSchema> fields, Map<String, Obj
       String fieldName = fieldSchema.getName();
       checkArgument(!RESERVED_FIELD_NAMES.contains(fieldName),
           "BigQueryIO does not support records with columns named %s", fieldName);
+
+      if (convertedValue == null) {
+        // BigQuery does not include null values when the export operation (to JSON) is used.
+        // To match that behavior, BigQueryTableRowiterator, and the DirectPipelineRunner,
+        // intentionally omits columns with null values.
+        continue;
+      }
+
       row.set(fieldName, convertedValue);
     }
     return row;

From 0ed16ff2f56d5829d1b06e8f3f34d271d42a2a6d Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 19 Jan 2016 14:33:17 -0800
Subject: [PATCH 1317/1541] Rollback "BigQueryTableRowIterator: elide columns
 with null values"

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112515243
---
 .../cloud/dataflow/sdk/util/BigQueryTableRowIterator.java | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
index 0fc21b93a78e9..e0fac614ea622 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
@@ -290,14 +290,6 @@ private TableRow getTypedTableRow(List<TableFieldSchema> fields, Map<String, Obj
       String fieldName = fieldSchema.getName();
       checkArgument(!RESERVED_FIELD_NAMES.contains(fieldName),
           "BigQueryIO does not support records with columns named %s", fieldName);
-
-      if (convertedValue == null) {
-        // BigQuery does not include null values when the export operation (to JSON) is used.
-        // To match that behavior, BigQueryTableRowiterator, and the DirectPipelineRunner,
-        // intentionally omits columns with null values.
-        continue;
-      }
-
       row.set(fieldName, convertedValue);
     }
     return row;

From 7d82215a3e84c49610064dca4551111b2b6895b1 Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Tue, 19 Jan 2016 16:45:57 -0800
Subject: [PATCH 1318/1541] Deterministically choose freshest aggregations in
 pipeline results

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112529131
---
 .../sdk/runners/dataflow/DataflowMetricUpdateExtractor.java | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DataflowMetricUpdateExtractor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DataflowMetricUpdateExtractor.java
index 00412e82ffb18..13016dd4938b2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DataflowMetricUpdateExtractor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DataflowMetricUpdateExtractor.java
@@ -34,6 +34,7 @@
  */
 public final class DataflowMetricUpdateExtractor {
   private static final String STEP_NAME_CONTEXT_KEY = "step";
+  private static final String IS_TENTATIVE_KEY = "tentative";
 
   private DataflowMetricUpdateExtractor() {
     // Do not instantiate.
@@ -63,7 +64,10 @@ public static <OutputT> Map<String, OutputT> fromMetricUpdates(Aggregator<?, Out
             aggregatorTransforms.getAppliedTransformForStepName(
                 context.get(STEP_NAME_CONTEXT_KEY));
         String fullName = transform.getFullName();
-        results.put(fullName, toValue(aggregator, metricUpdate));
+        // Prefer the tentative (fresher) value if it exists.
+        if (Boolean.parseBoolean(context.get(IS_TENTATIVE_KEY)) || !results.containsKey(fullName)) {
+          results.put(fullName, toValue(aggregator, metricUpdate));
+        }
       }
     }
 

From 73cee98e60a9e86d683d366e5c96a75b3cdaaae9 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 19 Jan 2016 17:56:20 -0800
Subject: [PATCH 1319/1541] Split streaming status pages into servlets

Also updates /heapz so that it downloads the heapdump rather than just
telling you where on the worker it is.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112535088
---
 .../worker/StreamingDataflowWorker.java       | 221 +++++++-----------
 .../runners/worker/WindmillStateCache.java    |  31 +--
 .../worker/status/BaseStatusServlet.java      |  60 +++++
 .../runners/worker/status/HealthzServlet.java |  40 ++++
 .../runners/worker/status/HeapzServlet.java   |  97 ++++++++
 .../runners/worker/status/ThreadzServlet.java |  55 +++++
 .../worker/status/WorkerStatusPages.java      | 119 ++++++++++
 .../runners/worker/status/package-info.java   |  23 ++
 .../dataflow/sdk/util/MemoryMonitor.java      |  18 +-
 9 files changed, 505 insertions(+), 159 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/BaseStatusServlet.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/HealthzServlet.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/HeapzServlet.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/ThreadzServlet.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPages.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/package-info.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 9fa20848f4fd5..94a59691b4fd7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -25,6 +25,8 @@
 import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingInitializer;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
+import com.google.cloud.dataflow.sdk.runners.worker.status.BaseStatusServlet;
+import com.google.cloud.dataflow.sdk.runners.worker.status.WorkerStatusPages;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
 import com.google.cloud.dataflow.sdk.util.BoundedQueueExecutor;
@@ -44,9 +46,6 @@
 import com.google.common.base.Preconditions;
 import com.google.protobuf.ByteString;
 
-import org.eclipse.jetty.server.Request;
-import org.eclipse.jetty.server.Server;
-import org.eclipse.jetty.server.handler.AbstractHandler;
 import org.joda.time.Instant;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -78,7 +77,6 @@
 import java.util.concurrent.atomic.AtomicReference;
 
 import javax.annotation.Nullable;
-import javax.servlet.ServletException;
 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;
 
@@ -95,7 +93,6 @@ public class StreamingDataflowWorker {
   // prioritization / utilization.
   static final int MAX_WORK_UNITS_QUEUED = 100;
   static final long MAX_COMMIT_BYTES = 32 << 20;
-  static final int DEFAULT_STATUS_PORT = 8081;
   static final String DEFAULT_WINDMILL_SERVER_CLASS_NAME =
       "com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServer";
   static final int MAX_COMMIT_QUEUE_BYTES = 500 << 20;  // 500MB
@@ -108,8 +105,6 @@ public class StreamingDataflowWorker {
    */
   private static final long MAX_GET_WORK_ITEMS = 100;
 
-  private static final MemoryMonitor memoryMonitor = new MemoryMonitor();
-
   /**
    * Indicates that the key token was invalid when data was attempted to be fetched.
    */
@@ -169,12 +164,6 @@ public static void main(String[] args) throws Exception {
     if (hostport == null) {
       throw new Exception("-Dwindmill.hostport must be set to the location of the windmill server");
     }
-
-    int statusPort = DEFAULT_STATUS_PORT;
-    if (System.getProperties().containsKey("status_port")) {
-      statusPort = Integer.parseInt(System.getProperty("status_port"));
-    }
-
     String windmillServerClassName = DEFAULT_WINDMILL_SERVER_CLASS_NAME;
     if (System.getProperties().containsKey("windmill.serverclassname")) {
       windmillServerClassName = System.getProperty("windmill.serverclassname");
@@ -191,9 +180,9 @@ public static void main(String[] args) throws Exception {
 
     StreamingDataflowWorker worker =
         new StreamingDataflowWorker(mapTasks, windmillServer, options);
-    worker.start();
 
-    worker.runStatusServer(statusPort);
+    worker.start();
+    worker.startStatusPages();
   }
 
   /**
@@ -309,14 +298,17 @@ public Integer apply(Windmill.WorkItemCommitRequest input) {
   private StateFetcher stateFetcher;
   private DataflowWorkerHarnessOptions options;
   private long clientId;
-  private Server statusServer;
-  private final AtomicReference<Throwable> lastException;
   private final MetricTrackingWindmillServerStub metricTrackingWindmillServer;
   private Timer globalCountersUpdatesTimer;
 
+  private static final MemoryMonitor memoryMonitor = new MemoryMonitor();
+  private final AtomicReference<Throwable> lastException = new AtomicReference<>();
+
   private final UserCodeTimeTracker userCodeTimeTracker = new UserCodeTimeTracker();
   private final AtomicInteger nextStateSamplerId = new AtomicInteger();
 
+  private final WorkerStatusPages statusPages = WorkerStatusPages.create();
+
   public StreamingDataflowWorker(
       List<MapTask> mapTasks, WindmillServerStub server, DataflowWorkerHarnessOptions options) {
     this.options = options;
@@ -361,7 +353,6 @@ public Thread newThread(Runnable r) {
     this.running = new AtomicBoolean();
     this.stateFetcher = new StateFetcher(metricTrackingWindmillServer);
     this.clientId = new Random().nextLong();
-    this.lastException = new AtomicReference<>();
 
     for (MapTask mapTask : mapTasks) {
       addComputation(mapTask);
@@ -404,14 +395,18 @@ public void run() {
     reportHarnessStartup();
   }
 
+  public void startStatusPages() {
+    statusPages.addPage(new StreamingStatuszServlet());
+    statusPages.addPage(stateCache.statusServlet());
+    statusPages.start();
+  }
+
   public void stop() {
     try {
       if (globalCountersUpdatesTimer != null) {
         globalCountersUpdatesTimer.cancel();
       }
-      if (statusServer != null) {
-        statusServer.stop();
-      }
+      statusPages.stop();
       running.set(false);
       dispatchThread.join();
       workUnitExecutor.shutdown();
@@ -433,18 +428,6 @@ public void stop() {
     }
   }
 
-  public void runStatusServer(int statusPort) {
-    statusServer = new Server(statusPort);
-    statusServer.setHandler(new StatusHandler());
-    try {
-      statusServer.start();
-      LOG.info("Status server started on port {}", statusPort);
-      statusServer.join();
-    } catch (Exception e) {
-      LOG.warn("Status server failed to start: ", e);
-    }
-  }
-
   private void addComputation(MapTask mapTask) {
     String computationId =
         systemNameToComputationIdMap.containsKey(mapTask.getSystemName())
@@ -997,128 +980,96 @@ public StreamingModeExecutionContext getContext() {
     }
   }
 
-  private class StatusHandler extends AbstractHandler {
+  private class StreamingStatuszServlet extends BaseStatusServlet {
+
+    private StreamingStatuszServlet() {
+      super("statusz");
+    }
+
     @Override
-    public void handle(
-        String target, Request baseRequest,
-        HttpServletRequest request, HttpServletResponse response)
-        throws IOException, ServletException {
+    public void doGet(HttpServletRequest request, HttpServletResponse response)
+      throws IOException {
 
       response.setContentType("text/html;charset=utf-8");
       response.setStatus(HttpServletResponse.SC_OK);
-      baseRequest.setHandled(true);
 
-      PrintWriter responseWriter = response.getWriter();
+      PrintWriter writer = response.getWriter();
+      writer.println("<html><body>");
+      printHeader(writer);
+      printResources(writer);
+      printMetrics(writer);
+      printLastException(writer);
+      printSpecs(writer);
 
-      responseWriter.println("<html><body>");
-
-      if (target.equals("/healthz")) {
-        responseWriter.println("ok");
-      } else if (target.equals("/threadz")) {
-        printThreads(responseWriter);
-      } else if (target.equals("/heapz")) {
-        dumpHeap(responseWriter);
-      } else if (target.equals("/cachez")) {
-        stateCache.printDetailedHtml(responseWriter);
-      } else {
-        printHeader(responseWriter);
-        printResources(responseWriter);
-        printMetrics(responseWriter);
-        printLastException(responseWriter);
-        printSpecs(responseWriter);
-      }
-
-      responseWriter.println("</body></html>");
+      stateCache.printSummaryHtml(writer);
+      writer.println("</body></html>");
     }
-  }
 
-  private void printHeader(PrintWriter response) {
-    response.println("<h1>Streaming Worker Harness</h1>");
-    response.println("Running: " + running.get() + "<br>");
-    response.println("ID: " + clientId + "<br>");
-  }
-
-  private void printMetrics(PrintWriter response) {
-    response.println("<h2>Metrics</h2>");
-    response.println("Worker Threads: " + workUnitExecutor.getPoolSize()
-        + "/" + workUnitExecutor.getMaximumPoolSize() + "<br>");
-    response.println("Active Threads: " + workUnitExecutor.getActiveCount() + "<br>");
-    response.println("Work Queue Size: " + workUnitExecutor.getQueue().size()
-        + "/" + MAX_WORK_UNITS_QUEUED + "<br>");
-    response.print("Commit Queues: (");
-    response.print(commitQueue.weight() >> 20);
-    response.println("MB)<ul>");
-    for (String computation : commitQueue.keySet()) {
-      response.print("<li>");
-      response.print(computation);
-      response.print(": ");
-      response.print(commitQueue.queueSize(computation));
-      response.println("</li>");
+    private void printHeader(PrintWriter response) {
+      response.println("<h1>Streaming Worker Harness</h1>");
+      response.println("Running: " + running.get() + "<br>");
+      response.println("ID: " + clientId + "<br>");
     }
-    response.println("</ul>");
-
-    stateCache.printSummaryHtml(response);
 
-    metricTrackingWindmillServer.printHtml(response);
+    private void printMetrics(PrintWriter response) {
+      response.println("<h2>Metrics</h2>");
+      response.println("Worker Threads: " + workUnitExecutor.getPoolSize()
+          + "/" + workUnitExecutor.getMaximumPoolSize() + "<br>");
+      response.println("Active Threads: " + workUnitExecutor.getActiveCount() + "<br>");
+      response.println("Work Queue Size: " + workUnitExecutor.getQueue().size()
+          + "/" + MAX_WORK_UNITS_QUEUED + "<br>");
+      response.print("Commit Queues: (");
+      response.print(commitQueue.weight() >> 20);
+      response.println("MB)<ul>");
+      for (String computation : commitQueue.keySet()) {
+        response.print("<li>");
+        response.print(computation);
+        response.print(": ");
+        response.print(commitQueue.queueSize(computation));
+        response.println("</li>");
+      }
+      response.println("</ul>");
 
-    response.println("Active Keys: <ul>");
-    for (Map.Entry<String, ActiveWorkForComputation> computationEntry
-             : activeWorkMap.entrySet()) {
-      response.print("<li>");
-      response.print(computationEntry.getKey());
-      response.print(":");
-      computationEntry.getValue().printActiveWork(response);
-      response.println("</li>");
-    }
-    response.println("</ul>");
-  }
+      stateCache.printSummaryHtml(response);
 
-  private void printResources(PrintWriter response) {
-    response.append("<h2>Resources</h2>\n");
-    response.append("Memory is " + memoryMonitor.describeMemory() + "<br>\n");
-  }
+      metricTrackingWindmillServer.printHtml(response);
 
-  private void printSpecs(PrintWriter response) {
-    response.append("<h2>Specs</h2>\n");
-    for (Map.Entry<String, MapTask> entry : instructionMap.entrySet()) {
-      response.println("<h3>" + entry.getKey() + "</h3>");
-      response.print("<script>document.write(JSON.stringify(");
-      response.print(entry.getValue().toString());
-      response.println(", null, \"&nbsp&nbsp\").replace(/\\n/g, \"<br>\"))</script>");
+      response.println("Active Keys: <ul>");
+      for (Map.Entry<String, ActiveWorkForComputation> computationEntry
+               : activeWorkMap.entrySet()) {
+        response.print("<li>");
+        response.print(computationEntry.getKey());
+        response.print(":");
+        computationEntry.getValue().printActiveWork(response);
+        response.println("</li>");
+      }
+      response.println("</ul>");
     }
-  }
 
-  private void printLastException(PrintWriter response) {
-    Throwable t = lastException.get();
-    if (t != null) {
-      response.println("<h2>Last Exception</h2>");
-      StringWriter writer = new StringWriter();
-      t.printStackTrace(new PrintWriter(writer));
-      response.println(writer.toString().replace("\t", "&nbsp&nbsp").replace("\n", "<br>"));
+
+    private void printResources(PrintWriter response) {
+      response.append("<h2>Resources</h2>\n");
+      response.append("Memory is " + memoryMonitor.describeMemory() + "<br>\n");
     }
-  }
 
-  private void printThreads(PrintWriter response) {
-    Map<Thread, StackTraceElement[]> stacks = Thread.getAllStackTraces();
-    for (Map.Entry<Thread,  StackTraceElement[]> entry : stacks.entrySet()) {
-      Thread thread = entry.getKey();
-      response.println("Thread: " + thread + " State: " + thread.getState() + "<br>");
-      for (StackTraceElement element : entry.getValue()) {
-        response.println("&nbsp&nbsp" + element + "<br>");
+    private void printSpecs(PrintWriter response) {
+      response.append("<h2>Specs</h2>\n");
+      for (Map.Entry<String, MapTask> entry : instructionMap.entrySet()) {
+        response.println("<h3>" + entry.getKey() + "</h3>");
+        response.print("<script>document.write(JSON.stringify(");
+        response.print(entry.getValue().toString());
+        response.println(", null, \"&nbsp&nbsp\").replace(/\\n/g, \"<br>\"))</script>");
       }
-      response.println("<br>");
     }
-  }
 
-  private void dumpHeap(PrintWriter response) {
-    try {
-      String dumpFile = MemoryMonitor.dumpHeap();
-      response.println("Dumped heap to " + dumpFile);
-    } catch (Exception e) {
-      response.println("Failed to dump heap: <br>");
-      StringWriter writer = new StringWriter();
-      e.printStackTrace(new PrintWriter(writer));
-      response.println(writer.toString().replace("\t", "&nbsp&nbsp").replace("\n", "<br>"));
+    private void printLastException(PrintWriter response) {
+      Throwable t = lastException.get();
+      if (t != null) {
+        response.println("<h2>Last Exception</h2>");
+        StringWriter writer = new StringWriter();
+        t.printStackTrace(new PrintWriter(writer));
+        response.println(writer.toString().replace("\t", "&nbsp&nbsp").replace("\n", "<br>"));
+      }
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java
index 5eaefa2445be5..8eb37c3139609 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java
@@ -15,6 +15,7 @@
  */
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import com.google.cloud.dataflow.sdk.runners.worker.status.BaseStatusServlet;
 import com.google.cloud.dataflow.sdk.util.Weighted;
 import com.google.cloud.dataflow.sdk.util.state.State;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
@@ -27,11 +28,16 @@
 import com.google.common.cache.Weigher;
 import com.google.protobuf.ByteString;
 
+import java.io.IOException;
 import java.io.PrintWriter;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Objects;
 
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
 /**
  * Process-wide cache of per-key state.
  */
@@ -40,13 +46,6 @@ public class WindmillStateCache {
   private Cache<StateId, StateCacheEntry> stateCache;
   private int weight = 0;
 
-  private static class CacheWeigher implements Weigher<Weighted, Weighted> {
-    @Override
-    public int weigh(Weighted key, Weighted value) {
-      return (int) Math.max(key.getWeight() + value.getWeight(), Integer.MAX_VALUE);
-    }
-  }
-
   public WindmillStateCache() {
     final Weigher<Weighted, Weighted> weigher = Weighers.weightedKeysAndValues();
 
@@ -282,11 +281,17 @@ public void printSummaryHtml(PrintWriter response) {
     response.println("</tr></table><br>");
   }
 
-  /**
-   * Print detailed information about the cache to the given {@link PrintWriter}.
-   */
-  public void printDetailedHtml(PrintWriter response) {
-    response.println("<h1>Cache Information</h1>");
-    printSummaryHtml(response);
+
+  public BaseStatusServlet statusServlet() {
+    return new BaseStatusServlet("/cachez") {
+      @Override
+      protected void doGet(HttpServletRequest request, HttpServletResponse response)
+          throws IOException, ServletException {
+
+        PrintWriter writer = response.getWriter();
+        writer.println("<h1>Cache Information</h1>");
+        printSummaryHtml(writer);
+      }
+    };
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/BaseStatusServlet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/BaseStatusServlet.java
new file mode 100644
index 0000000000000..23245bbab2d25
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/BaseStatusServlet.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker.status;
+
+import com.google.common.base.Strings;
+
+import java.io.IOException;
+
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+/**
+ * Base class for status servlets.
+ */
+public abstract class BaseStatusServlet extends HttpServlet {
+
+  private final String path;
+
+  protected BaseStatusServlet(String path) {
+    this.path = path.startsWith("/") ? path : "/" + path;
+  }
+
+  /**
+   * Return the path that this servlet should listen on.
+   */
+  public String getPath() {
+    return path;
+  }
+
+  /**
+   * Return the servlet that this path is on, modified to include the specified query
+   * {@code parameters} if any.
+   */
+  protected String getPath(String parameters) {
+    if (Strings.isNullOrEmpty(parameters)) {
+      return path;
+    } else {
+      return path + "?" + parameters;
+    }
+  }
+
+  @Override
+  protected abstract void doGet(HttpServletRequest request, HttpServletResponse response)
+      throws IOException, ServletException;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/HealthzServlet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/HealthzServlet.java
new file mode 100644
index 0000000000000..1f7c83c83f7c5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/HealthzServlet.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker.status;
+
+import java.io.IOException;
+
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+/**
+ * Respond to /healthz with "ok".
+ */
+public class HealthzServlet extends BaseStatusServlet {
+
+  public HealthzServlet() {
+    super("healthz");
+  }
+
+  @Override
+  public void doGet(HttpServletRequest request, HttpServletResponse response)
+      throws IOException {
+    response.setContentType("text/html;charset=utf-8");
+    response.setStatus(HttpServletResponse.SC_OK);
+    response.getWriter().println("ok");
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/HeapzServlet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/HeapzServlet.java
new file mode 100644
index 0000000000000..363d26fb57a92
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/HeapzServlet.java
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker.status;
+
+import com.google.cloud.dataflow.sdk.util.MemoryMonitor;
+import com.google.common.io.Files;
+
+import java.io.File;
+import java.io.IOException;
+
+import javax.management.InstanceNotFoundException;
+import javax.management.MBeanException;
+import javax.management.MalformedObjectNameException;
+import javax.management.ReflectionException;
+import javax.servlet.ServletOutputStream;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+/**
+ * Respond to /heapz with a page allowing downloading of the heap dumps.
+ *
+ * <p>Respond to /heapz?action=download with a download of the actual heap dump.
+ */
+public class HeapzServlet extends BaseStatusServlet {
+
+  public HeapzServlet() {
+    super("heapz");
+  }
+
+  @Override
+  protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws IOException {
+    String action = req.getParameter("action");
+    if (action == null || action.isEmpty()) {
+      resp.setContentType("text/html;charset=utf-8");
+      resp.setStatus(HttpServletResponse.SC_OK);
+
+      ServletOutputStream writer = resp.getOutputStream();
+      writer.println("<html>");
+      writer.println(String.format(
+          "Click <a href=\"%s\">here to download heap dump</a>", getPath("action=download")));
+      writer.println("</html>");
+      return;
+    } else if ("download".equals(action)) {
+      doDownload(resp);
+    }
+  }
+
+  private void doDownload(HttpServletResponse resp) throws IOException {
+    File file;
+
+    try {
+      file = MemoryMonitor.dumpHeap();
+    } catch (MalformedObjectNameException
+        | InstanceNotFoundException | ReflectionException | MBeanException e) {
+      resp.setContentType("text/html;charset=utf-8");
+      resp.setStatus(HttpServletResponse.SC_OK);
+
+      ServletOutputStream writer = resp.getOutputStream();
+      writer.println("<html>\nFailed to dump heap: <br>\n<pre>");
+      writer.println(e.toString());
+      writer.println("</pre>\n</html>");
+      return;
+    }
+
+    resp.setContentType("application/octet-stream");
+    resp.setContentLength((int) file.length());
+    resp.setHeader(
+        "Content-Disposition", String.format("attachment; filename=\"%s\"", file.getName()));
+
+    try {
+      Files.copy(file, resp.getOutputStream());
+      resp.setStatus(HttpServletResponse.SC_OK);
+    } catch (IOException e) {
+      resp.reset();
+      resp.setContentType("text/html;charset=utf-8");
+      ServletOutputStream writer = resp.getOutputStream();
+      writer.println("<html>\nFailed to dump heap: <br>\n<pre>\n");
+      writer.println(e.toString());
+      writer.println("</pre>\n</html>");
+      resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/ThreadzServlet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/ThreadzServlet.java
new file mode 100644
index 0000000000000..0c8dc6a8aa607
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/ThreadzServlet.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker.status;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.Map;
+
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+/**
+ * Respond to /threadz with the stack traces of all running threads.
+ */
+class ThreadzServlet extends BaseStatusServlet {
+
+  public ThreadzServlet() {
+    super("threadz");
+  }
+
+  @Override
+  public void doGet(HttpServletRequest request, HttpServletResponse response)
+      throws IOException {
+    response.setContentType("text/html;charset=utf-8");
+    response.setStatus(HttpServletResponse.SC_OK);
+
+    PrintWriter writer = response.getWriter();
+    writer.println("<html>");
+
+    Map<Thread, StackTraceElement[]> stacks = Thread.getAllStackTraces();
+    for (Map.Entry<Thread,  StackTraceElement[]> entry : stacks.entrySet()) {
+      Thread thread = entry.getKey();
+      writer.println("Thread: " + thread + " State: " + thread.getState() + "<br>");
+      for (StackTraceElement element : entry.getValue()) {
+        writer.println("&nbsp&nbsp" + element + "<br>");
+      }
+      writer.println("<br>");
+    }
+    writer.println("</html>");
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPages.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPages.java
new file mode 100644
index 0000000000000..db04ddbe57ab1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPages.java
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker.status;
+
+import org.eclipse.jetty.server.Server;
+import org.eclipse.jetty.servlet.ServletHandler;
+import org.eclipse.jetty.servlet.ServletHolder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+/**
+ * Manages the server providing the worker status pages.
+ */
+public class WorkerStatusPages {
+
+  private static final Logger LOG = LoggerFactory.getLogger(WorkerStatusPages.class);
+
+  private static final int DEFAULT_STATUS_PORT = 8081;
+
+  private final Server statusServer;
+  private final ServletHandler servletHandler = new ServletHandler();
+
+  private WorkerStatusPages(int statusPort) {
+    this.statusServer = new Server(statusPort);
+    this.statusServer.setHandler(servletHandler);
+
+    // Install the default servlets (threadz, healthz, heapz)
+    addPage(new ThreadzServlet());
+    addPage(new HealthzServlet());
+    addPage(new HeapzServlet());
+  }
+
+  public static WorkerStatusPages create() {
+    int statusPort = DEFAULT_STATUS_PORT;
+    if (System.getProperties().containsKey("status_port")) {
+      statusPort = Integer.parseInt(System.getProperty("status_port"));
+    }
+    return new WorkerStatusPages(statusPort);
+  }
+
+  /** Start the server. */
+  public void start() {
+    if (statusServer.isStarted()) {
+      LOG.warn("Status server already started on port {}", statusServer.getURI().getPort());
+      return;
+    }
+
+    try {
+      addPage(new RedirectToStatusz404Handler());
+      statusServer.start();
+
+      LOG.info("Status server started on port {}", statusServer.getURI().getPort());
+      statusServer.join();
+    } catch (Exception e) {
+      LOG.warn("Status server failed to start: ", e);
+    }
+  }
+
+  /** Stop the server. */
+  public void stop() {
+    try {
+      statusServer.stop();
+    } catch (Exception e) {
+      LOG.warn("Status server failed to stop: ", e);
+    }
+  }
+
+  /**
+   * Add a status servlet.
+   */
+  public void addPage(BaseStatusServlet servlet) {
+    ServletHolder holder = new ServletHolder();
+    holder.setServlet(servlet);
+    servletHandler.addServletWithMapping(holder, servlet.getPath());
+  }
+
+  /**
+   * Redirect missing pages to /statusz.
+   */
+  private static class RedirectToStatusz404Handler extends BaseStatusServlet {
+
+    protected RedirectToStatusz404Handler() {
+      super("/*");
+    }
+
+    @Override
+    protected void doGet(HttpServletRequest req, HttpServletResponse resp)
+        throws ServletException, IOException {
+      resp.setContentType("text/html;charset=utf-8");
+      resp.setStatus(HttpServletResponse.SC_NOT_FOUND);
+
+      PrintWriter writer = resp.getWriter();
+      writer.println("<html>");
+      writer.println("404 Not Found. Try <a href=\"/statusz\">/statusz</a>");
+      writer.println("</html>");
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/package-info.java
new file mode 100644
index 0000000000000..9138c155a63b5
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/package-info.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/**
+ * Server and tools for the Worker Status pages.
+ */
+@ParametersAreNonnullByDefault
+package com.google.cloud.dataflow.sdk.runners.worker.status;
+
+import javax.annotation.ParametersAreNonnullByDefault;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MemoryMonitor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MemoryMonitor.java
index 7a7c7d974ae08..2704223a7810c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MemoryMonitor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MemoryMonitor.java
@@ -286,7 +286,7 @@ private void updateData(long now, long lastTimeWokeUp) {
    *
    * @return The name of the file the heap was dumped to.
    */
-  private String tryToDumpHeap() {
+  private File tryToDumpHeap() {
     // clearing this list should "release" some memory
     // that will be needed to dump the heap
     reservedForDumpingHeap = null;
@@ -340,7 +340,7 @@ public void run() {
 
           if (shutDownAfterNumGCThrashing > 0
               && (currentThrashingCount >= shutDownAfterNumGCThrashing)) {
-            String heapDumpFile = tryToDumpHeap();
+            File heapDumpFile = tryToDumpHeap();
             LOG.error(
                 "Shutting down JVM after {} consecutive periods of measured GC thrashing. "
                 + "Memory is {}. Heap dump written to {}",
@@ -401,16 +401,12 @@ private static String getLoggingDir() {
 
   /**
    * Dump the current heap profile to a file and return its name.
-   *
-   * @throws MalformedObjectNameException
-   * @throws MBeanException
-   * @throws ReflectionException
-   * @throws InstanceNotFoundException
    */
-  public static String dumpHeap() throws MalformedObjectNameException, InstanceNotFoundException,
-                                         ReflectionException, MBeanException {
+  public static File dumpHeap() throws
+      MalformedObjectNameException, InstanceNotFoundException, ReflectionException, MBeanException {
     boolean liveObjectsOnly = true;
-    String fileName = String.format("%s/heap_dump_%d", getLoggingDir(), System.currentTimeMillis());
+    String fileName = String.format(
+        "%s/heap_dump_%d.hprof", getLoggingDir(), System.currentTimeMillis());
 
     MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
     ObjectName oname = new ObjectName("com.sun.management:type=HotSpotDiagnostic");
@@ -418,6 +414,6 @@ public static String dumpHeap() throws MalformedObjectNameException, InstanceNot
     String[] signatures = {"java.lang.String", boolean.class.getName()};
     mbs.invoke(oname, "dumpHeap", parameters, signatures);
 
-    return fileName;
+    return new File(fileName);
   }
 }

From 6db9723e77066c34f85b623b8dbe398ffa2883c3 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 19 Jan 2016 21:32:37 -0800
Subject: [PATCH 1320/1541] Expose dependent realtime watermark via Windmill
 protos

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112546981
---
 sdk/src/main/proto/windmill.proto | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index cc24ab322d38d..8120ec85712f3 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -150,6 +150,8 @@ message ComputationWorkItems {
   required string computation_id = 1;
   repeated WorkItem work = 2;
   optional int64 input_data_watermark = 3 [default=-0x8000000000000000];
+  optional int64 dependent_realtime_input_watermark = 4
+      [default = -0x8000000000000000];
 }
 
 ////////////////////////////////////////////////////////////////////////////////

From 219d22a3ab9c1928c8334573c650390c4a785452 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 20 Jan 2016 09:10:56 -0800
Subject: [PATCH 1321/1541] Add ByteStringCoder, a coder for ByteStrings

This is a deterministic coder for ByteString. In the
wholeStream context, it simply writes the string. Otherwise,
it writes the string delimited with its length (encoded as a
VarInt).

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112586805
---
 .../dataflow/sdk/coders/ByteStringCoder.java  | 106 +++++++++++++++
 .../dataflow/sdk/coders/CoderRegistry.java    |   2 +
 .../sdk/coders/ByteStringCoderTest.java       | 121 ++++++++++++++++++
 3 files changed, 229 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteStringCoder.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteStringCoderTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteStringCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteStringCoder.java
new file mode 100644
index 0000000000000..b7c1a3cd0adee
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteStringCoder.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import com.google.cloud.dataflow.sdk.util.VarInt;
+import com.google.common.io.ByteStreams;
+import com.google.protobuf.ByteString;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+/**
+ * A {@link Coder} for {@link ByteString} objects based on their encoded Protocol Buffer form.
+ *
+ * <p>When this code is used in a nested {@link Coder.Context}, the serialized {@link ByteString}
+ * objects are first delimited by their size.
+ */
+public class ByteStringCoder extends AtomicCoder<ByteString> {
+
+  @JsonCreator
+  public static ByteStringCoder of() {
+    return INSTANCE;
+  }
+
+  /***************************/
+
+  private static final ByteStringCoder INSTANCE = new ByteStringCoder();
+
+  private ByteStringCoder() {}
+
+  @Override
+  public void encode(ByteString value, OutputStream outStream, Context context)
+      throws IOException, CoderException {
+    if (value == null) {
+      throw new CoderException("cannot encode a null ByteString");
+    }
+
+    if (!context.isWholeStream) {
+      // ByteString is not delimited, so write its size before its contents.
+      VarInt.encode(value.size(), outStream);
+    }
+    value.writeTo(outStream);
+  }
+
+  @Override
+  public ByteString decode(InputStream inStream, Context context) throws IOException {
+    if (context.isWholeStream) {
+      return ByteString.readFrom(inStream);
+    }
+
+    int size = VarInt.decodeInt(inStream);
+    // ByteString reads to the end of the input stream, so give it a limited stream of exactly
+    // the right length. Also set its chunk size so that the ByteString will contain exactly
+    // one chunk.
+    return ByteString.readFrom(ByteStreams.limit(inStream, size), size);
+  }
+
+  @Override
+  protected long getEncodedElementByteSize(ByteString value, Context context) throws Exception {
+    int size = value.size();
+
+    if (context.isWholeStream) {
+      return size;
+    }
+    return VarInt.getLength(size) + size;
+  }
+
+  /**
+   * {@inheritDoc}
+   *
+   * <p>Returns true; the encoded output of two invocations of {@link ByteStringCoder} in the same
+   * {@link Coder.Context} will be identical if and only if the original {@link ByteString} objects
+   * are equal according to {@link Object#equals}.
+   */
+  @Override
+  public boolean consistentWithEquals() {
+    return true;
+  }
+
+  /**
+   * {@inheritDoc}
+   *
+   * <p>Returns true. {@link ByteString#size} returns the size of an array and a {@link VarInt}.
+   */
+  @Override
+  public boolean isRegisterByteSizeObserverCheap(ByteString value, Context context) {
+    return true;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index a5f77b9429d88..1ef09712d4d4f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -27,6 +27,7 @@
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Maps;
+import com.google.protobuf.ByteString;
 
 import org.joda.time.Instant;
 import org.slf4j.Logger;
@@ -89,6 +90,7 @@ public CoderRegistry() {
    */
   public void registerStandardCoders() {
     registerCoder(Byte.class, ByteCoder.class);
+    registerCoder(ByteString.class, ByteStringCoder.class);
     registerCoder(Double.class, DoubleCoder.class);
     registerCoder(Instant.class, InstantCoder.class);
     registerCoder(Integer.class, VarIntCoder.class);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteStringCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteStringCoderTest.java
new file mode 100644
index 0000000000000..debae71fddf7d
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteStringCoderTest.java
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.common.collect.ImmutableList;
+import com.google.protobuf.ByteString;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Test case for {@link ByteStringCoder}.
+ */
+@RunWith(JUnit4.class)
+public class ByteStringCoderTest {
+
+  private static final ByteStringCoder TEST_CODER = ByteStringCoder.of();
+
+  private static final List<String> TEST_STRING_VALUES = Arrays.asList(
+      "", "a", "13", "hello",
+      "a longer string with spaces and all that",
+      "a string with a \n newline",
+      "???????????????");
+  private static final ImmutableList<ByteString> TEST_VALUES;
+  static {
+    ImmutableList.Builder<ByteString> builder = ImmutableList.<ByteString>builder();
+    for (String s : TEST_STRING_VALUES) {
+      builder.add(ByteString.copyFrom(s.getBytes()));
+    }
+    TEST_VALUES = builder.build();
+  }
+
+  /**
+   * Generated data to check that the wire format has not changed. To regenerate, see
+   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
+   */
+  private static final List<String> TEST_ENCODINGS = Arrays.asList(
+      "",
+      "YQ",
+      "MTM",
+      "aGVsbG8",
+      "YSBsb25nZXIgc3RyaW5nIHdpdGggc3BhY2VzIGFuZCBhbGwgdGhhdA",
+      "YSBzdHJpbmcgd2l0aCBhIAogbmV3bGluZQ",
+      "Pz8_Pz8_Pz8_Pz8_Pz8_");
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void testDecodeEncodeEqualInAllContexts() throws Exception {
+    for (ByteString value : TEST_VALUES) {
+      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
+    }
+  }
+
+  @Test
+  public void testWireFormatEncode() throws Exception {
+    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
+  }
+
+  @Test
+  public void testCoderDeterministic() throws Throwable {
+    TEST_CODER.verifyDeterministic();
+  }
+
+  @Test
+  public void testConsistentWithEquals() {
+    assertTrue(TEST_CODER.consistentWithEquals());
+  }
+
+  @Test
+  public void testEncodeNullThrowsCoderException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null ByteString");
+
+    CoderUtils.encodeToBase64(TEST_CODER, null);
+  }
+
+  @Test
+  public void testNestedCoding() throws Throwable {
+    Coder<List<ByteString>> listCoder = ListCoder.of(TEST_CODER);
+    CoderProperties.coderDecodeEncodeContentsEqual(listCoder, TEST_VALUES);
+    CoderProperties.coderDecodeEncodeContentsInSameOrder(listCoder, TEST_VALUES);
+  }
+
+  @Test
+  public void testEncodedElementByteSizeInAllContexts() throws Throwable {
+    for (Context context : CoderProperties.ALL_CONTEXTS) {
+      for (ByteString value : TEST_VALUES) {
+        byte[] encoded = CoderUtils.encodeToByteArray(TEST_CODER, value, context);
+        assertEquals(encoded.length, TEST_CODER.getEncodedElementByteSize(value, context));
+      }
+    }
+  }
+}

From 1e5524a7f5d0d774488cb0206ea6433085461775 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 20 Jan 2016 09:13:48 -0800
Subject: [PATCH 1322/1541] CustomSources: remove dead code

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112587034
---
 .../sdk/runners/dataflow/CustomSources.java       | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
index f159f66a85a2c..44fbc55fceb50 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
@@ -483,17 +483,12 @@ public void close() throws IOException {
 
     @Override
     public NativeReader.Progress getProgress() {
-      if (reader instanceof BoundedSource.BoundedReader) {
-        ApproximateReportedProgress progress = new ApproximateReportedProgress();
-        Double fractionConsumed = reader.getFractionConsumed();
-        if (fractionConsumed != null) {
-          progress.setFractionConsumed(fractionConsumed);
-        }
-        return SourceTranslationUtils.cloudProgressToReaderProgress(progress);
-      } else {
-        // Progress estimation for unbounded sources not yet supported.
-        return null;
+      ApproximateReportedProgress progress = new ApproximateReportedProgress();
+      Double fractionConsumed = reader.getFractionConsumed();
+      if (fractionConsumed != null) {
+        progress.setFractionConsumed(fractionConsumed);
       }
+      return SourceTranslationUtils.cloudProgressToReaderProgress(progress);
     }
 
     @Override

From 7509f8737a37a84128a74f22b61797141ee782fe Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 20 Jan 2016 10:58:41 -0800
Subject: [PATCH 1323/1541] Ignore Eclipse project files in root .gitignore

Users who check out and edit the SDK in Eclipse should
use m2e's Eclipse import wizard, and should not want to
commit their actual project configurations.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112597945
---
 .gitignore | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.gitignore b/.gitignore
index 52aee3fa88a13..0ba351f6d6415 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,11 @@ target/
 *.ipr
 *.iws
 
+# Ignore Eclipse files.
+.classpath
+.project
+.settings/
+
 # The build process generates the dependency-reduced POM, but it shouldn't be
 # committed.
 dependency-reduced-pom.xml

From c14418013e13fa0bf952109ad835a365881d4816 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 20 Jan 2016 11:00:36 -0800
Subject: [PATCH 1324/1541] BigQueryTableRowIterator: elide columns with null
 values

As in 6a11a72f, this makes BigQueryIO.Read work in the
DirectPipelineRunner as it does in the DataflowPipelineRunner.

This reverts "Rollback "BigQueryTableRowIterator: elide columns
with null values""

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112598156
---
 .../cloud/dataflow/sdk/util/BigQueryTableRowIterator.java | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
index e0fac614ea622..0fc21b93a78e9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
@@ -290,6 +290,14 @@ private TableRow getTypedTableRow(List<TableFieldSchema> fields, Map<String, Obj
       String fieldName = fieldSchema.getName();
       checkArgument(!RESERVED_FIELD_NAMES.contains(fieldName),
           "BigQueryIO does not support records with columns named %s", fieldName);
+
+      if (convertedValue == null) {
+        // BigQuery does not include null values when the export operation (to JSON) is used.
+        // To match that behavior, BigQueryTableRowiterator, and the DirectPipelineRunner,
+        // intentionally omits columns with null values.
+        continue;
+      }
+
       row.set(fieldName, convertedValue);
     }
     return row;

From d87e2e2f03e7d0a3ddb776cb648a20c5bd4e83a4 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 20 Jan 2016 11:17:05 -0800
Subject: [PATCH 1325/1541] Split out StatusDataProviders

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112600047
---
 .../worker/StreamingDataflowWorker.java       | 140 +++++++-----------
 .../runners/worker/WindmillStateCache.java    |  10 +-
 .../status/LastExceptionDataProvider.java     |  46 ++++++
 .../worker/status/StatusDataProvider.java     |  25 ++++
 .../runners/worker/status/StatuszServlet.java |  68 +++++++++
 .../worker/status/WorkerStatusPages.java      |  42 +++---
 .../dataflow/sdk/util/MemoryMonitor.java      |  39 +++--
 7 files changed, 246 insertions(+), 124 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/LastExceptionDataProvider.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/StatusDataProvider.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/StatuszServlet.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index 94a59691b4fd7..c9f3485b15820 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -25,7 +25,8 @@
 import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingInitializer;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
-import com.google.cloud.dataflow.sdk.runners.worker.status.BaseStatusServlet;
+import com.google.cloud.dataflow.sdk.runners.worker.status.LastExceptionDataProvider;
+import com.google.cloud.dataflow.sdk.runners.worker.status.StatusDataProvider;
 import com.google.cloud.dataflow.sdk.runners.worker.status.WorkerStatusPages;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
@@ -52,7 +53,6 @@
 
 import java.io.IOException;
 import java.io.PrintWriter;
-import java.io.StringWriter;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.LinkedList;
@@ -74,11 +74,8 @@
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicReference;
 
 import javax.annotation.Nullable;
-import javax.servlet.http.HttpServletRequest;
-import javax.servlet.http.HttpServletResponse;
 
 /**
  * Implements a Streaming Dataflow worker.
@@ -302,7 +299,6 @@ public Integer apply(Windmill.WorkItemCommitRequest input) {
   private Timer globalCountersUpdatesTimer;
 
   private static final MemoryMonitor memoryMonitor = new MemoryMonitor();
-  private final AtomicReference<Throwable> lastException = new AtomicReference<>();
 
   private final UserCodeTimeTracker userCodeTimeTracker = new UserCodeTimeTracker();
   private final AtomicInteger nextStateSamplerId = new AtomicInteger();
@@ -396,8 +392,16 @@ public void run() {
   }
 
   public void startStatusPages() {
-    statusPages.addPage(new StreamingStatuszServlet());
-    statusPages.addPage(stateCache.statusServlet());
+    statusPages.addServlet(stateCache.statusServlet());
+
+    statusPages.addStatusDataProvider("harness", "Harness", new HarnessDataProvider());
+    statusPages.addStatusDataProvider("resources", "Resources", memoryMonitor);
+    statusPages.addStatusDataProvider("metrics", "Metrics", new MetricsDataProvider());
+    statusPages.addStatusDataProvider(
+        "exception", "Last Exception", new LastExceptionDataProvider());
+    statusPages.addStatusDataProvider("specs", "Specs", new SpecsDataProvider());
+    statusPages.addStatusDataProvider("cache", "State Cache", stateCache);
+
     statusPages.start();
   }
 
@@ -674,7 +678,7 @@ public void run() {
             computation,
             work.getKey().toStringUtf8());
         LOG.error("\nError: ", t);
-        lastException.set(t);
+        LastExceptionDataProvider.reportException(t);
         LOG.debug("Failed work: {}", work);
         if (reportFailure(computation, work, t)) {
           // Try again, after some delay and at the end of the queue to avoid a tight loop.
@@ -980,96 +984,58 @@ public StreamingModeExecutionContext getContext() {
     }
   }
 
-  private class StreamingStatuszServlet extends BaseStatusServlet {
-
-    private StreamingStatuszServlet() {
-      super("statusz");
-    }
-
+  private class HarnessDataProvider implements StatusDataProvider {
     @Override
-    public void doGet(HttpServletRequest request, HttpServletResponse response)
-      throws IOException {
-
-      response.setContentType("text/html;charset=utf-8");
-      response.setStatus(HttpServletResponse.SC_OK);
-
-      PrintWriter writer = response.getWriter();
-      writer.println("<html><body>");
-      printHeader(writer);
-      printResources(writer);
-      printMetrics(writer);
-      printLastException(writer);
-      printSpecs(writer);
-
-      stateCache.printSummaryHtml(writer);
-      writer.println("</body></html>");
+    public void appendSummaryHtml(PrintWriter writer) {
+      writer.println("Running: " + running.get() + "<br>");
+      writer.println("ID: " + clientId + "<br>");
     }
+  }
 
-    private void printHeader(PrintWriter response) {
-      response.println("<h1>Streaming Worker Harness</h1>");
-      response.println("Running: " + running.get() + "<br>");
-      response.println("ID: " + clientId + "<br>");
+  private class SpecsDataProvider implements StatusDataProvider {
+    @Override
+    public void appendSummaryHtml(PrintWriter writer) {
+      for (Map.Entry<String, MapTask> entry : instructionMap.entrySet()) {
+        writer.println("<h3>" + entry.getKey() + "</h3>");
+        writer.print("<script>document.write(JSON.stringify(");
+        writer.print(entry.getValue().toString());
+        writer.println(", null, \"&nbsp&nbsp\").replace(/\\n/g, \"<br>\"))</script>");
+      }
     }
+  }
 
-    private void printMetrics(PrintWriter response) {
-      response.println("<h2>Metrics</h2>");
-      response.println("Worker Threads: " + workUnitExecutor.getPoolSize()
-          + "/" + workUnitExecutor.getMaximumPoolSize() + "<br>");
-      response.println("Active Threads: " + workUnitExecutor.getActiveCount() + "<br>");
-      response.println("Work Queue Size: " + workUnitExecutor.getQueue().size()
+  private class MetricsDataProvider implements StatusDataProvider {
+    @Override
+    public void appendSummaryHtml(PrintWriter writer) {
+      writer.println("Worker Threads: " + workUnitExecutor.getPoolSize()
+      + "/" + workUnitExecutor.getMaximumPoolSize() + "<br>");
+      writer.println("Active Threads: " + workUnitExecutor.getActiveCount() + "<br>");
+      writer.println("Work Queue Size: " + workUnitExecutor.getQueue().size()
           + "/" + MAX_WORK_UNITS_QUEUED + "<br>");
-      response.print("Commit Queues: (");
-      response.print(commitQueue.weight() >> 20);
-      response.println("MB)<ul>");
+      writer.print("Commit Queues: (");
+      writer.print(commitQueue.weight() >> 20);
+      writer.println("MB)<ul>");
       for (String computation : commitQueue.keySet()) {
-        response.print("<li>");
-        response.print(computation);
-        response.print(": ");
-        response.print(commitQueue.queueSize(computation));
-        response.println("</li>");
+        writer.print("<li>");
+        writer.print(computation);
+        writer.print(": ");
+        writer.print(commitQueue.queueSize(computation));
+        writer.println("</li>");
       }
-      response.println("</ul>");
-
-      stateCache.printSummaryHtml(response);
-
-      metricTrackingWindmillServer.printHtml(response);
+      writer.println("</ul>");
 
-      response.println("Active Keys: <ul>");
+      writer.println("Active Keys: <ul>");
       for (Map.Entry<String, ActiveWorkForComputation> computationEntry
-               : activeWorkMap.entrySet()) {
-        response.print("<li>");
-        response.print(computationEntry.getKey());
-        response.print(":");
-        computationEntry.getValue().printActiveWork(response);
-        response.println("</li>");
-      }
-      response.println("</ul>");
-    }
-
-
-    private void printResources(PrintWriter response) {
-      response.append("<h2>Resources</h2>\n");
-      response.append("Memory is " + memoryMonitor.describeMemory() + "<br>\n");
-    }
-
-    private void printSpecs(PrintWriter response) {
-      response.append("<h2>Specs</h2>\n");
-      for (Map.Entry<String, MapTask> entry : instructionMap.entrySet()) {
-        response.println("<h3>" + entry.getKey() + "</h3>");
-        response.print("<script>document.write(JSON.stringify(");
-        response.print(entry.getValue().toString());
-        response.println(", null, \"&nbsp&nbsp\").replace(/\\n/g, \"<br>\"))</script>");
+          : activeWorkMap.entrySet()) {
+        writer.print("<li>");
+        writer.print(computationEntry.getKey());
+        writer.print(":");
+        computationEntry.getValue().printActiveWork(writer);
+        writer.println("</li>");
       }
-    }
+      writer.println("</ul>");
 
-    private void printLastException(PrintWriter response) {
-      Throwable t = lastException.get();
-      if (t != null) {
-        response.println("<h2>Last Exception</h2>");
-        StringWriter writer = new StringWriter();
-        t.printStackTrace(new PrintWriter(writer));
-        response.println(writer.toString().replace("\t", "&nbsp&nbsp").replace("\n", "<br>"));
-      }
+      metricTrackingWindmillServer.printHtml(writer);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java
index 8eb37c3139609..b50be9122637c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java
@@ -16,6 +16,7 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import com.google.cloud.dataflow.sdk.runners.worker.status.BaseStatusServlet;
+import com.google.cloud.dataflow.sdk.runners.worker.status.StatusDataProvider;
 import com.google.cloud.dataflow.sdk.util.Weighted;
 import com.google.cloud.dataflow.sdk.util.state.State;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
@@ -41,7 +42,7 @@
 /**
  * Process-wide cache of per-key state.
  */
-public class WindmillStateCache {
+public class WindmillStateCache implements StatusDataProvider {
 
   private Cache<StateId, StateCacheEntry> stateCache;
   private int weight = 0;
@@ -270,7 +271,8 @@ private static class WeightedValue<T> {
   /**
    * Print summary statistics of the cache to the given {@link PrintWriter}.
    */
-  public void printSummaryHtml(PrintWriter response) {
+  @Override
+  public void appendSummaryHtml(PrintWriter response) {
     response.println("Cache Stats: <br><table border=0>");
     response.println(
         "<tr><th>Hit Ratio</th><th>Evictions</th><th>Size</th><th>Weight</th></tr><tr>");
@@ -290,8 +292,10 @@ protected void doGet(HttpServletRequest request, HttpServletResponse response)
 
         PrintWriter writer = response.getWriter();
         writer.println("<h1>Cache Information</h1>");
-        printSummaryHtml(writer);
+        appendSummaryHtml(writer);
       }
     };
   }
+
+
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/LastExceptionDataProvider.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/LastExceptionDataProvider.java
new file mode 100644
index 0000000000000..00df485ae2b78
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/LastExceptionDataProvider.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker.status;
+
+import java.io.PrintWriter;
+import java.util.concurrent.atomic.AtomicReference;
+
+/**
+ * Capture the last exception thrown during processing, and display that on the statusz page.
+ */
+public class LastExceptionDataProvider implements StatusDataProvider {
+
+  private static final AtomicReference<Throwable> lastException = new AtomicReference<>();
+
+  /**
+   * Report an exception to the exception data provider.
+   */
+  public static void reportException(Throwable t) {
+    lastException.set(t);
+  }
+
+  @Override
+  public void appendSummaryHtml(PrintWriter writer) {
+    Throwable t = lastException.get();
+    if (t == null) {
+      writer.println("None<br>");
+    } else {
+      writer.println("<pre>");
+      t.printStackTrace(writer);
+      writer.println("</pre>");
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/StatusDataProvider.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/StatusDataProvider.java
new file mode 100644
index 0000000000000..91e962d331690
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/StatusDataProvider.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker.status;
+
+import java.io.PrintWriter;
+
+/**
+ * Interface for providing information to the /statusz page.
+ */
+public interface StatusDataProvider {
+  void appendSummaryHtml(PrintWriter writer);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/StatuszServlet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/StatuszServlet.java
new file mode 100644
index 0000000000000..78bf91ae27067
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/StatuszServlet.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker.status;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.LinkedHashMap;
+
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+/**
+ * General servlet for providing a bunch of information on the statusz page.
+ */
+public class StatuszServlet extends BaseStatusServlet {
+
+  private static class DataProviderInfo {
+    private final String longName;
+    private final StatusDataProvider dataProvider;
+
+    public DataProviderInfo(String longName, StatusDataProvider dataProvider) {
+      this.longName = longName;
+      this.dataProvider = dataProvider;
+    }
+  }
+
+  private LinkedHashMap<String, DataProviderInfo> dataProviders = new LinkedHashMap<>();
+
+  public StatuszServlet() {
+    super("statusz");
+  }
+
+  public void addDataProvider(String shortName, String longName, StatusDataProvider provider) {
+    dataProviders.put(shortName, new DataProviderInfo(longName, provider));
+  }
+
+  @Override
+  protected void doGet(HttpServletRequest request, HttpServletResponse response)
+      throws IOException, ServletException {
+    PrintWriter writer = response.getWriter();
+    writer.println("<html>");
+
+    writer.println("<h1>Worker Harness</h1>");
+
+    for (DataProviderInfo info : dataProviders.values()) {
+      writer.print("<h2>");
+      writer.print(info.longName);
+      writer.println("</h2>");
+
+      info.dataProvider.appendSummaryHtml(writer);
+    }
+    writer.println("<html>");
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPages.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPages.java
index db04ddbe57ab1..f383b8e2e1ddb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPages.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPages.java
@@ -23,7 +23,6 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.io.PrintWriter;
 
 import javax.servlet.ServletException;
 import javax.servlet.http.HttpServletRequest;
@@ -39,16 +38,18 @@ public class WorkerStatusPages {
   private static final int DEFAULT_STATUS_PORT = 8081;
 
   private final Server statusServer;
+  private final StatuszServlet statuszServlet = new StatuszServlet();
   private final ServletHandler servletHandler = new ServletHandler();
 
   private WorkerStatusPages(int statusPort) {
     this.statusServer = new Server(statusPort);
     this.statusServer.setHandler(servletHandler);
 
-    // Install the default servlets (threadz, healthz, heapz)
-    addPage(new ThreadzServlet());
-    addPage(new HealthzServlet());
-    addPage(new HeapzServlet());
+    // Install the default servlets (threadz, healthz, heapz, statusz)
+    addServlet(new ThreadzServlet());
+    addServlet(new HealthzServlet());
+    addServlet(new HeapzServlet());
+    addServlet(statuszServlet);
   }
 
   public static WorkerStatusPages create() {
@@ -67,7 +68,7 @@ public void start() {
     }
 
     try {
-      addPage(new RedirectToStatusz404Handler());
+      addServlet(new RedirectToStatusz404Handler());
       statusServer.start();
 
       LOG.info("Status server started on port {}", statusServer.getURI().getPort());
@@ -89,31 +90,34 @@ public void stop() {
   /**
    * Add a status servlet.
    */
-  public void addPage(BaseStatusServlet servlet) {
+  public void addServlet(BaseStatusServlet servlet) {
     ServletHolder holder = new ServletHolder();
     holder.setServlet(servlet);
     servletHandler.addServletWithMapping(holder, servlet.getPath());
   }
 
   /**
-   * Redirect missing pages to /statusz.
+   * Add data to the main statusz servlet.
+   */
+  public void addStatusDataProvider(
+      String shortName, String longName, StatusDataProvider dataProvider) {
+    statuszServlet.addDataProvider(shortName, longName, dataProvider);
+  }
+
+  /**
+   * Redirect all invalid pages to /statusz.
    */
   private static class RedirectToStatusz404Handler extends BaseStatusServlet {
 
-    protected RedirectToStatusz404Handler() {
-      super("/*");
+    public RedirectToStatusz404Handler() {
+      super("*");
     }
 
     @Override
-    protected void doGet(HttpServletRequest req, HttpServletResponse resp)
-        throws ServletException, IOException {
-      resp.setContentType("text/html;charset=utf-8");
-      resp.setStatus(HttpServletResponse.SC_NOT_FOUND);
-
-      PrintWriter writer = resp.getWriter();
-      writer.println("<html>");
-      writer.println("404 Not Found. Try <a href=\"/statusz\">/statusz</a>");
-      writer.println("</html>");
+    protected void doGet(HttpServletRequest request, HttpServletResponse response)
+        throws IOException, ServletException {
+      response.sendRedirect("/statusz");
     }
+
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MemoryMonitor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MemoryMonitor.java
index 2704223a7810c..78df47ad5d381 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MemoryMonitor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MemoryMonitor.java
@@ -18,6 +18,7 @@
 
 import static com.google.common.base.Preconditions.checkState;
 
+import com.google.cloud.dataflow.sdk.runners.worker.status.StatusDataProvider;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.util.concurrent.AtomicDouble;
 
@@ -25,6 +26,7 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.File;
+import java.io.PrintWriter;
 import java.lang.management.GarbageCollectorMXBean;
 import java.lang.management.ManagementFactory;
 import java.util.ArrayDeque;
@@ -60,7 +62,7 @@
  *      thrashing count is met. A heap dump is made before shutdown.
  * </ul>
  */
-public class MemoryMonitor implements Runnable {
+public class MemoryMonitor implements Runnable, StatusDataProvider {
   private static final Logger LOG = LoggerFactory.getLogger(MemoryMonitor.class);
 
   /** Directory to hold heap dumps if not overridden. */
@@ -298,20 +300,6 @@ private File tryToDumpHeap() {
     }
   }
 
-  /**
-   * Return a string describing the current memory state of the server.
-   */
-  public String describeMemory() {
-    Runtime runtime = Runtime.getRuntime();
-    long maxMemory = runtime.maxMemory();
-    long totalMemory = runtime.totalMemory();
-    long usedMemory = totalMemory - runtime.freeMemory();
-    return String.format(
-        "used/total/max = %d/%d/%d MB, GC last/max = %.2f/%.2f %%, #pushbacks=%d, gc thrashing=%s",
-        usedMemory >> 20, totalMemory >> 20, maxMemory >> 20, lastMeasuredGCPercentage.get(),
-        maxGCPercentage.get(), numPushbacks.get(), isThrashing.get());
-  }
-
   /**
    * Runs this thread.
    */
@@ -416,4 +404,25 @@ public static File dumpHeap() throws
 
     return new File(fileName);
   }
+
+  /**
+   * Return a string describing the current memory state of the server.
+   */
+  private String describeMemory() {
+    Runtime runtime = Runtime.getRuntime();
+    long maxMemory = runtime.maxMemory();
+    long totalMemory = runtime.totalMemory();
+    long usedMemory = totalMemory - runtime.freeMemory();
+    return String.format(
+        "used/total/max = %d/%d/%d MB, GC last/max = %.2f/%.2f %%, #pushbacks=%d, gc thrashing=%s",
+        usedMemory >> 20, totalMemory >> 20, maxMemory >> 20, lastMeasuredGCPercentage.get(),
+        maxGCPercentage.get(), numPushbacks.get(), isThrashing.get());
+  }
+
+  @Override
+  public void appendSummaryHtml(PrintWriter writer) {
+    writer.print("Memory: ");
+    writer.print(describeMemory());
+    writer.println("<br>");
+  }
 }

From 7248ecfa70b9cdaa6b3dedd9a15231bf60b64b0f Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Wed, 20 Jan 2016 13:14:11 -0800
Subject: [PATCH 1326/1541] Proto changes for multi-worker support.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112612082
---
 sdk/src/main/proto/windmill.proto | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sdk/src/main/proto/windmill.proto b/sdk/src/main/proto/windmill.proto
index 8120ec85712f3..d9d9706cd0f6b 100644
--- a/sdk/src/main/proto/windmill.proto
+++ b/sdk/src/main/proto/windmill.proto
@@ -161,8 +161,11 @@ message ComputationWorkItems {
 
 message GetWorkRequest {
   required fixed64 client_id = 1;
+  optional string worker_id = 4;
+  optional string job_id = 5;
   optional int64 max_items = 2 [default = 0xffffffff];
   optional int64 max_bytes = 3 [default = 0x7fffffffffffffff];
+  // reserved field number = 6
 }
 
 message GetWorkResponse {
@@ -185,6 +188,7 @@ message ComputationGetDataRequest {
 }
 
 message GetDataRequest {
+  optional string job_id = 4;
   repeated ComputationGetDataRequest requests = 1;
   repeated GlobalDataRequest global_data_fetch_requests = 3;
 
@@ -270,6 +274,7 @@ message ComputationCommitWorkRequest {
 }
 
 message CommitWorkRequest {
+  optional string job_id = 2;
   repeated ComputationCommitWorkRequest requests = 1;
 }
 
@@ -278,6 +283,7 @@ message CommitWorkResponse {}
 // Configuration
 
 message GetConfigRequest {
+  optional string job_id = 2;
   repeated string computations = 1;
 }
 
@@ -308,6 +314,7 @@ message Exception {
 }
 
 message ReportStatsRequest {
+  optional string job_id = 6;
   optional string computation_id = 1;
   optional bytes key = 2;
   optional fixed64 work_token = 3;

From cb110627cd155e29ab3b32ab044604f4eb14d686 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 20 Jan 2016 13:30:10 -0800
Subject: [PATCH 1327/1541] Use the standard set of status pages for Batch

This includes all the status information that was previously available
and adds /heapz to download a heap dump.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112613644
---
 .../sdk/runners/worker/DataflowWorker.java    | 65 ++--------------
 .../runners/worker/DataflowWorkerHarness.java |  6 +-
 .../worker/StreamingDataflowWorker.java       |  4 +-
 .../worker/status/WorkerStatusPages.java      | 20 ++---
 .../runners/worker/DataflowWorkerTest.java    | 44 -----------
 .../worker/status/WorkerStatusPagesTest.java  | 76 +++++++++++++++++++
 6 files changed, 96 insertions(+), 119 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPagesTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 9848c4c35fd77..ffef4a053d9d4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -34,6 +34,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingHandler;
+import com.google.cloud.dataflow.sdk.runners.worker.status.WorkerStatusPages;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudCounterUtils;
 import com.google.cloud.dataflow.sdk.util.CloudMetricUtils;
@@ -51,14 +52,10 @@
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 
-import org.eclipse.jetty.server.Request;
-import org.eclipse.jetty.server.Server;
-import org.eclipse.jetty.server.handler.AbstractHandler;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.io.PrintWriter;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@@ -67,9 +64,6 @@
 import java.util.Map;
 
 import javax.annotation.Nullable;
-import javax.servlet.ServletException;
-import javax.servlet.http.HttpServletRequest;
-import javax.servlet.http.HttpServletResponse;
 
 /**
  * This is a semi-abstract harness for executing WorkItem tasks in
@@ -99,10 +93,12 @@ public class DataflowWorker {
    */
   private final Cache<PCollectionViewWindow<?>, WeightedValue<Object>> sideInputCache;
 
+  private static final int DEFAULT_STATUS_PORT = 18081;
+
   /**
    * Status server returning health of worker.
    */
-  private Server statusServer;
+  private WorkerStatusPages statusServer = WorkerStatusPages.create(DEFAULT_STATUS_PORT);
 
   /**
    * Tracker for user code time.
@@ -117,7 +113,6 @@ public class DataflowWorker {
 
   private static final long MEGABYTES = 1024 * 1024;
 
-  public static final int DEFAULT_STATUS_PORT = 18081;
 
   public DataflowWorker(WorkUnitClient workUnitClient, DataflowWorkerHarnessOptions options) {
     this.workUnitClient = workUnitClient;
@@ -462,55 +457,7 @@ protected SideInputReader getSideInputReaderForViews(
   /**
    * Runs the status server to report worker health on the specified port.
    */
-  public void runStatusServer(int statusPort) {
-    LOG.info("Status server started on port {}", statusPort);
-    runStatusServer(new Server(statusPort));
-  }
-
-  // @VisibleForTesting
-  void runStatusServer(Server server) {
-    statusServer = server;
-    statusServer.setHandler(new StatusHandler());
-    try {
-      // Run status server in separate thread.
-      statusServer.start();
-    } catch (Exception e) {
-      LOG.warn("Status server failed to start: ", e);
-    }
-  }
-
-  private class StatusHandler extends AbstractHandler {
-    @Override
-    public void handle(String target, Request baseRequest, HttpServletRequest request,
-        HttpServletResponse response) throws IOException, ServletException {
-      response.setContentType("text/html;charset=utf-8");
-      baseRequest.setHandled(true);
-
-      PrintWriter responseWriter = response.getWriter();
-
-      if (target.equals("/healthz")) {
-        response.setStatus(HttpServletResponse.SC_OK);
-        // Right now, we always return "ok".
-        responseWriter.println("ok");
-      } else if (target.equals("/threadz")) {
-        response.setStatus(HttpServletResponse.SC_OK);
-        printThreads(responseWriter);
-      } else {
-        response.setStatus(HttpServletResponse.SC_NOT_FOUND);
-        responseWriter.println("not found");
-      }
-    }
-
-    private void printThreads(PrintWriter response) {
-      Map<Thread, StackTraceElement[]> stacks = Thread.getAllStackTraces();
-      for (Map.Entry<Thread,  StackTraceElement[]> entry : stacks.entrySet()) {
-        Thread thread = entry.getKey();
-        response.println("--- Thread: " + thread + " State: "
-            + thread.getState() + " stack: ---");
-        for (StackTraceElement element : entry.getValue()) {
-          response.println("  " + element);
-        }
-      }
-    }
+  public void runStatusServer() {
+    statusServer.start();
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
index 5f8d5db70bc44..9de057fcd4195 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
@@ -133,11 +133,7 @@ public static void main(String[] args) throws Exception {
     final Sleeper sleeper = Sleeper.DEFAULT;
     final DataflowWorker worker = create(pipelineOptions);
 
-    int statusPort = DataflowWorker.DEFAULT_STATUS_PORT;
-    if (System.getProperties().containsKey("status_port")) {
-      statusPort = Integer.parseInt(System.getProperty("status_port"));
-    }
-    worker.runStatusServer(statusPort);
+    worker.runStatusServer();
 
     processWork(pipelineOptions, worker, sleeper);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index c9f3485b15820..fc7259ea8fd1a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -94,6 +94,8 @@ public class StreamingDataflowWorker {
       "com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServer";
   static final int MAX_COMMIT_QUEUE_BYTES = 500 << 20;  // 500MB
 
+  private static final int DEFAULT_STATUS_PORT = 8081;
+
   // Maximum size of the result of a GetWork request.
   private static final long MAX_GET_WORK_FETCH_BYTES = 64L << 20; // 64m
 
@@ -303,7 +305,7 @@ public Integer apply(Windmill.WorkItemCommitRequest input) {
   private final UserCodeTimeTracker userCodeTimeTracker = new UserCodeTimeTracker();
   private final AtomicInteger nextStateSamplerId = new AtomicInteger();
 
-  private final WorkerStatusPages statusPages = WorkerStatusPages.create();
+  private final WorkerStatusPages statusPages = WorkerStatusPages.create(DEFAULT_STATUS_PORT);
 
   public StreamingDataflowWorker(
       List<MapTask> mapTasks, WindmillServerStub server, DataflowWorkerHarnessOptions options) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPages.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPages.java
index f383b8e2e1ddb..ad16654e27825 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPages.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPages.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker.status;
 
+import com.google.common.annotations.VisibleForTesting;
+
 import org.eclipse.jetty.server.Server;
 import org.eclipse.jetty.servlet.ServletHandler;
 import org.eclipse.jetty.servlet.ServletHolder;
@@ -35,14 +37,12 @@ public class WorkerStatusPages {
 
   private static final Logger LOG = LoggerFactory.getLogger(WorkerStatusPages.class);
 
-  private static final int DEFAULT_STATUS_PORT = 8081;
-
   private final Server statusServer;
   private final StatuszServlet statuszServlet = new StatuszServlet();
   private final ServletHandler servletHandler = new ServletHandler();
 
-  private WorkerStatusPages(int statusPort) {
-    this.statusServer = new Server(statusPort);
+  @VisibleForTesting WorkerStatusPages(Server server) {
+    this.statusServer = server;
     this.statusServer.setHandler(servletHandler);
 
     // Install the default servlets (threadz, healthz, heapz, statusz)
@@ -50,20 +50,21 @@ private WorkerStatusPages(int statusPort) {
     addServlet(new HealthzServlet());
     addServlet(new HeapzServlet());
     addServlet(statuszServlet);
+
   }
 
-  public static WorkerStatusPages create() {
-    int statusPort = DEFAULT_STATUS_PORT;
+  public static WorkerStatusPages create(int defaultStatusPort) {
+    int statusPort = defaultStatusPort;
     if (System.getProperties().containsKey("status_port")) {
       statusPort = Integer.parseInt(System.getProperty("status_port"));
     }
-    return new WorkerStatusPages(statusPort);
+    return new WorkerStatusPages(new Server(statusPort));
   }
 
   /** Start the server. */
   public void start() {
     if (statusServer.isStarted()) {
-      LOG.warn("Status server already started on port {}", statusServer.getURI().getPort());
+      LOG.warn("Status server already started on {}", statusServer.getURI());
       return;
     }
 
@@ -71,8 +72,7 @@ public void start() {
       addServlet(new RedirectToStatusz404Handler());
       statusServer.start();
 
-      LOG.info("Status server started on port {}", statusServer.getURI().getPort());
-      statusServer.join();
+      LOG.info("Status server started on {}", statusServer.getURI());
     } catch (Exception e) {
       LOG.warn("Status server failed to start: ", e);
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
index d575d6f429913..afafa1c285b30 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
@@ -14,7 +14,6 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import static org.hamcrest.Matchers.containsString;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
@@ -36,8 +35,6 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
 import com.google.common.collect.ImmutableList;
 
-import org.eclipse.jetty.server.LocalConnector;
-import org.eclipse.jetty.server.Server;
 import org.hamcrest.CoreMatchers;
 import org.hamcrest.Description;
 import org.hamcrest.Matcher;
@@ -118,31 +115,6 @@ public void testStopProgressReportInCaseOfFailure() throws Exception {
       verify(mockProgressUpdater, times(1)).stopReportingProgress();
   }
 
-  @Test
-  public void testHealthzHandler() throws Exception {
-    String response = testStatusServer(
-        "GET /healthz HTTP/1.1\nhost: localhost\n\n");
-    assertThat(response, containsString("HTTP/1.1 200 OK"));
-    assertThat(response, containsString("ok"));
-  }
-
-  @Test
-  public void testThreadzHandler() throws Exception {
-    String response = testStatusServer(
-        "GET /threadz HTTP/1.1\nhost: localhost\n\n");
-    assertThat(response, containsString("HTTP/1.1 200 OK"));
-    assertThat(response, containsString("--- Thread: "));
-    // testThreadzHandler should be somewhere in the stack trace of one of the threads.
-    assertThat(response, containsString("testThreadzHandler"));
-  }
-
-  @Test
-  public void testUnknownHandler() throws Exception {
-    String response = testStatusServer(
-        "GET /missinghandlerz HTTP/1.1\nhost: localhost\n\n");
-    assertThat(response, containsString("HTTP/1.1 404 Not Found"));
-  }
-
   @Test
   public void testIsSplitResponseTooLarge() {
     SourceSplitResponse splitResponse = new SourceSplitResponse();
@@ -170,22 +142,6 @@ public void testWorkItemStatusWithStateSamplerInfo() throws Exception {
     assertEquals(101L, metric.get("num-transitions"));
   }
 
-  private String testStatusServer(String request) throws Exception {
-    Server server = new Server();
-    LocalConnector connector = new LocalConnector(server);
-    try {
-      DataflowWorker worker = new DataflowWorker(mockWorkUnitClient, options);
-      worker.runStatusServer(server);
-      connector.start();
-      return connector.getResponses(request);
-    } finally {
-      connector.stop();
-      connector.join();
-      server.stop();
-      server.join();
-    }
-  }
-
   private Matcher<WorkItemStatus> cloudWorkHasErrors(final long expectedReportIndex) {
     return new TypeSafeMatcher<WorkItemStatus>() {
       @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPagesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPagesTest.java
new file mode 100644
index 0000000000000..8730a61497595
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPagesTest.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker.status;
+
+import static org.hamcrest.Matchers.containsString;
+import static org.junit.Assert.assertThat;
+
+import org.eclipse.jetty.server.LocalConnector;
+import org.eclipse.jetty.server.Server;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link WorkerStatusPages}.
+ */
+@RunWith(JUnit4.class)
+public class WorkerStatusPagesTest {
+
+  private final Server server = new Server();
+  private final LocalConnector connector = new LocalConnector(server);
+  private final WorkerStatusPages wsp = new WorkerStatusPages(server);
+
+  @Before
+  public void setUp() throws Exception {
+    server.addConnector(connector);
+    wsp.start();
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    wsp.stop();
+  }
+
+  @Test
+  public void testThreadz() throws Exception {
+    String response = getPage("/threadz");
+    assertThat(response, containsString("HTTP/1.1 200 OK"));
+    assertThat("Test method should appear in stack trace",
+        response, containsString("WorkerStatusPagesTest.testThreadz"));
+  }
+
+  @Test
+  public void testHealthz() throws Exception {
+    String response = getPage("/threadz");
+    assertThat(response, containsString("HTTP/1.1 200 OK"));
+    assertThat(response, containsString("ok"));
+  }
+
+  @Test
+  public void testUnknownHandler() throws Exception {
+    String response = getPage("/missinghandlerz");
+    assertThat(response, containsString("HTTP/1.1 302 Found"));
+    assertThat(response, containsString("Location: http://localhost/statusz"));
+  }
+
+  private String getPage(String requestURL) throws Exception {
+    String request = String.format("GET %s HTTP/1.1\nhost: localhost\n\n", requestURL);
+    return connector.getResponses(request);
+  }
+}

From 46a1ece9a3912bf9c95d4a5cab9c960a7795bb23 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 20 Jan 2016 15:02:22 -0800
Subject: [PATCH 1328/1541] Add FinishedTriggers abstraction with BitSet and
 Set implementations

Previously, we directly interacted with a BitSet using indices
stored on each ExecutableTrigger to track which triggers are
finished for a window. This change introduces a very small
interface for this interaction, and two implementations:

1. Via a BitSet for efficient storage and transmittal.
2. Via a Set for simple in-memory execution and debugging.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112623430
---
 .../dataflow/sdk/util/FinishedTriggers.java   |  42 +++++++
 .../sdk/util/FinishedTriggersBitSet.java      |  68 +++++++++++
 .../sdk/util/FinishedTriggersSet.java         |  74 ++++++++++++
 .../sdk/util/TriggerContextFactory.java       |  61 +++++-----
 .../dataflow/sdk/util/TriggerRunner.java      |  30 +++--
 .../sdk/util/FinishedTriggersBitSetTest.java  |  54 +++++++++
 .../sdk/util/FinishedTriggersProperties.java  | 109 ++++++++++++++++++
 .../sdk/util/FinishedTriggersSetTest.java     |  60 ++++++++++
 .../dataflow/sdk/util/TriggerTester.java      |  19 +--
 9 files changed, 467 insertions(+), 50 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggers.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersBitSet.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersSet.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersBitSetTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersProperties.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersSetTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggers.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggers.java
new file mode 100644
index 0000000000000..e75be23eee41d
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggers.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+/**
+ * A mutable set which tracks whether any particular {@link ExecutableTrigger} is
+ * finished.
+ */
+public interface FinishedTriggers {
+  /**
+   * Returns {@code true} if the trigger is finished.
+   */
+  public boolean isFinished(ExecutableTrigger<?> trigger);
+
+  /**
+   * Sets the fact that the trigger is finished.
+   */
+  public void setFinished(ExecutableTrigger<?> trigger, boolean value);
+
+  /**
+   * Sets the trigger and all of its subtriggers to unfinished.
+   */
+  public void clearRecursively(ExecutableTrigger<?> trigger);
+
+  /**
+   * Create an independent copy of this mutable {@link FinishedTriggers}.
+   */
+  public FinishedTriggers copy();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersBitSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersBitSet.java
new file mode 100644
index 0000000000000..09f7af7a95f1b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersBitSet.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import java.util.BitSet;
+
+/**
+ * A {@link FinishedTriggers} implementation based on an underlying {@link BitSet}.
+ */
+public class FinishedTriggersBitSet implements FinishedTriggers {
+
+  private final BitSet bitSet;
+
+  private FinishedTriggersBitSet(BitSet bitSet) {
+    this.bitSet = bitSet;
+  }
+
+  public static FinishedTriggersBitSet emptyWithCapacity(int capacity) {
+    return new FinishedTriggersBitSet(new BitSet(capacity));
+  }
+
+  public static FinishedTriggersBitSet fromBitSet(BitSet bitSet) {
+    return new FinishedTriggersBitSet(bitSet);
+  }
+
+  /**
+   * Returns the underlying {@link BitSet} for this {@link FinishedTriggersBitSet}.
+   */
+  public BitSet getBitSet() {
+    return bitSet;
+  }
+
+  @Override
+  public boolean isFinished(ExecutableTrigger<?> trigger) {
+    return bitSet.get(trigger.getTriggerIndex());
+  }
+
+  @Override
+  public void setFinished(ExecutableTrigger<?> trigger, boolean value) {
+    bitSet.set(trigger.getTriggerIndex(), value);
+  }
+
+  @Override
+  public void clearRecursively(ExecutableTrigger<?> trigger) {
+    bitSet.clear(trigger.getTriggerIndex(), trigger.getFirstIndexAfterSubtree());
+  }
+
+  @Override
+  public FinishedTriggersBitSet copy() {
+    return new FinishedTriggersBitSet((BitSet) bitSet.clone());
+  }
+}
+
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersSet.java
new file mode 100644
index 0000000000000..6da673d28c08e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersSet.java
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.common.collect.Sets;
+
+import java.util.Set;
+
+/**
+ * An implementation of {@link FinishedTriggers} atop a user-provided mutable {@link Set}.
+ */
+public class FinishedTriggersSet implements FinishedTriggers {
+
+  private final Set<ExecutableTrigger<?>> finishedTriggers;
+
+  private FinishedTriggersSet(Set<ExecutableTrigger<?>> finishedTriggers) {
+    this.finishedTriggers = finishedTriggers;
+  }
+
+  public static FinishedTriggersSet fromSet(Set<ExecutableTrigger<?>> finishedTriggers) {
+    return new FinishedTriggersSet(finishedTriggers);
+  }
+
+  /**
+   * Returns a mutable {@link Set} of the underlying triggers that are finished.
+   */
+  public Set<ExecutableTrigger<?>> getFinishedTriggers() {
+    return finishedTriggers;
+  }
+
+  @Override
+  public boolean isFinished(ExecutableTrigger<?> trigger) {
+    return finishedTriggers.contains(trigger);
+  }
+
+  @Override
+  public void setFinished(ExecutableTrigger<?> trigger, boolean value) {
+    if (value) {
+      finishedTriggers.add(trigger);
+    } else {
+      finishedTriggers.remove(trigger);
+    }
+  }
+
+  @Override
+  public void clearRecursively(ExecutableTrigger<?> trigger) {
+    finishedTriggers.remove(trigger);
+    for (ExecutableTrigger<?> subTrigger : trigger.subTriggers()) {
+      clearRecursively(subTrigger);
+    }
+  }
+
+  @Override
+  public FinishedTriggersSet copy() {
+    return fromSet(Sets.newHashSet(finishedTriggers));
+  }
+
+}
+
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
index e7193b3dcec35..8c438cea1432f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
@@ -30,7 +30,6 @@
 
 import org.joda.time.Instant;
 
-import java.util.BitSet;
 import java.util.Collection;
 import java.util.Map;
 
@@ -53,14 +52,14 @@ public TriggerContextFactory(WindowingStrategy<?, W> windowingStrategy,
     this.activeWindows = activeWindows;
   }
 
-  public Trigger<W>.TriggerContext base(
-      W window, ReduceFn.Timers timers, ExecutableTrigger<W> rootTrigger, BitSet finishedSet) {
+  public Trigger<W>.TriggerContext base(W window, ReduceFn.Timers timers,
+      ExecutableTrigger<W> rootTrigger, FinishedTriggers finishedSet) {
     return new TriggerContextImpl(window, timers, rootTrigger, finishedSet);
   }
 
   public Trigger<W>.OnElementContext createOnElementContext(
       W window, ReduceFn.Timers timers, Instant elementTimestamp,
-      ExecutableTrigger<W> rootTrigger, BitSet finishedSet) {
+      ExecutableTrigger<W> rootTrigger, FinishedTriggers finishedSet) {
     return new OnElementContextImpl(
         window, timers, rootTrigger, finishedSet,
         elementTimestamp);
@@ -68,7 +67,7 @@ public Trigger<W>.OnElementContext createOnElementContext(
 
   public Trigger<W>.OnTimerContext createOnTimerContext(
       W window, ReduceFn.Timers timers,
-      ExecutableTrigger<W> rootTrigger, BitSet finishedSet,
+      ExecutableTrigger<W> rootTrigger, FinishedTriggers finishedSet,
       Instant timestamp, TimeDomain domain) {
     return new OnTimerContextImpl(
         window, timers, rootTrigger, finishedSet, timestamp, domain);
@@ -76,8 +75,8 @@ public Trigger<W>.OnTimerContext createOnTimerContext(
 
   public Trigger<W>.OnMergeContext createOnMergeContext(
       W window, ReduceFn.Timers timers, Collection<W> mergingWindows,
-      ExecutableTrigger<W> rootTrigger, BitSet finishedSet,
-      Map<W, BitSet> finishedSets) {
+      ExecutableTrigger<W> rootTrigger, FinishedTriggers finishedSet,
+      Map<W, FinishedTriggers> finishedSets) {
     return new OnMergeContextImpl(window, timers, rootTrigger, finishedSet,
         mergingWindows, finishedSets);
   }
@@ -85,11 +84,11 @@ public Trigger<W>.OnMergeContext createOnMergeContext(
   private class TriggerInfoImpl implements Trigger.TriggerInfo<W> {
 
     protected final ExecutableTrigger<W> trigger;
-    protected final BitSet finishedSet;
+    protected final FinishedTriggers finishedSet;
     private final Trigger<W>.TriggerContext context;
 
-    public TriggerInfoImpl(
-        ExecutableTrigger<W> trigger, BitSet finishedSet, Trigger<W>.TriggerContext context) {
+    public TriggerInfoImpl(ExecutableTrigger<W> trigger, FinishedTriggers finishedSet,
+        Trigger<W>.TriggerContext context) {
       this.trigger = trigger;
       this.finishedSet = finishedSet;
       this.context = context;
@@ -112,12 +111,12 @@ public ExecutableTrigger<W> subTrigger(int subtriggerIndex) {
 
     @Override
     public boolean isFinished() {
-      return finishedSet.get(trigger.getTriggerIndex());
+      return finishedSet.isFinished(trigger);
     }
 
     @Override
     public boolean isFinished(int subtriggerIndex) {
-      return finishedSet.get(subTrigger(subtriggerIndex).getTriggerIndex());
+      return finishedSet.isFinished(subTrigger(subtriggerIndex));
     }
 
     @Override
@@ -131,8 +130,8 @@ public Iterable<ExecutableTrigger<W>> unfinishedSubTriggers() {
           .from(trigger.subTriggers())
           .filter(new Predicate<ExecutableTrigger<W>>() {
             @Override
-            public boolean apply(ExecutableTrigger<W> input) {
-              return !finishedSet.get(input.getTriggerIndex());
+            public boolean apply(ExecutableTrigger<W> trigger) {
+              return !finishedSet.isFinished(trigger);
             }
           });
     }
@@ -140,7 +139,7 @@ public boolean apply(ExecutableTrigger<W> input) {
     @Override
     public ExecutableTrigger<W> firstUnfinishedSubTrigger() {
       for (ExecutableTrigger<W> subTrigger : trigger.subTriggers()) {
-        if (!finishedSet.get(subTrigger.getTriggerIndex())) {
+        if (!finishedSet.isFinished(subTrigger)) {
           return subTrigger;
         }
       }
@@ -149,18 +148,18 @@ public ExecutableTrigger<W> firstUnfinishedSubTrigger() {
 
     @Override
     public void resetTree() throws Exception {
-      finishedSet.clear(trigger.getTriggerIndex(), trigger.getFirstIndexAfterSubtree());
+      finishedSet.clearRecursively(trigger);
       trigger.invokeClear(context);
     }
 
     @Override
     public void setFinished(boolean finished) {
-      finishedSet.set(trigger.getTriggerIndex(), finished);
+      finishedSet.setFinished(trigger, finished);
     }
 
     @Override
     public void setFinished(boolean finished, int subTriggerIndex) {
-      finishedSet.set(subTrigger(subTriggerIndex).getTriggerIndex(), finished);
+      finishedSet.setFinished(subTrigger(subTriggerIndex), finished);
     }
   }
 
@@ -199,21 +198,21 @@ public Instant currentProcessingTime() {
   private class MergingTriggerInfoImpl
       extends TriggerInfoImpl implements Trigger.MergingTriggerInfo<W> {
 
-    private final Map<W, BitSet> finishedSets;
+    private final Map<W, FinishedTriggers> finishedSets;
 
     public MergingTriggerInfoImpl(
         ExecutableTrigger<W> trigger,
-        BitSet finishedSet,
+        FinishedTriggers finishedSet,
         Trigger<W>.TriggerContext context,
-        Map<W, BitSet> finishedSets) {
+        Map<W, FinishedTriggers> finishedSets) {
       super(trigger, finishedSet, context);
       this.finishedSets = finishedSets;
     }
 
     @Override
     public boolean finishedInAnyMergingWindow() {
-      for (BitSet bitSet : finishedSets.values()) {
-        if (bitSet.get(trigger.getTriggerIndex())) {
+      for (FinishedTriggers finishedSet : finishedSets.values()) {
+        if (finishedSet.isFinished(trigger)) {
           return true;
         }
       }
@@ -222,10 +221,10 @@ public boolean finishedInAnyMergingWindow() {
 
     @Override
     public Iterable<W> getFinishedMergingWindows() {
-      return Maps.filterValues(finishedSets, new Predicate<BitSet>() {
+      return Maps.filterValues(finishedSets, new Predicate<FinishedTriggers>() {
         @Override
-        public boolean apply(BitSet input) {
-          return input.get(trigger.getTriggerIndex());
+        public boolean apply(FinishedTriggers finishedSet) {
+          return finishedSet.isFinished(trigger);
         }
       }).keySet();
     }
@@ -270,7 +269,7 @@ private TriggerContextImpl(
         W window,
         ReduceFn.Timers timers,
         ExecutableTrigger<W> trigger,
-        BitSet finishedSet) {
+        FinishedTriggers finishedSet) {
       trigger.getSpec().super();
       this.state = triggerState(window, trigger);
       this.timers = new TriggerTimers(window, timers);
@@ -320,7 +319,7 @@ private OnElementContextImpl(
         W window,
         ReduceFn.Timers timers,
         ExecutableTrigger<W> trigger,
-        BitSet finishedSet,
+        FinishedTriggers finishedSet,
         Instant eventTimestamp) {
       trigger.getSpec().super();
       this.state = triggerState(window, trigger);
@@ -385,7 +384,7 @@ private OnTimerContextImpl(
         W window,
         ReduceFn.Timers timers,
         ExecutableTrigger<W> trigger,
-        BitSet finishedSet,
+        FinishedTriggers finishedSet,
         Instant timestamp,
         TimeDomain domain) {
       trigger.getSpec().super();
@@ -448,9 +447,9 @@ private OnMergeContextImpl(
         W window,
         ReduceFn.Timers timers,
         ExecutableTrigger<W> trigger,
-        BitSet finishedSet,
+        FinishedTriggers finishedSet,
         Collection<W> mergingWindows,
-        Map<W, BitSet> finishedSets) {
+        Map<W, FinishedTriggers> finishedSets) {
       trigger.getSpec().super();
       this.state = new ReduceFnContextFactory.MergingStateContextImpl<>(
           triggerState(window, trigger), mergingWindows);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
index 6efd5b7b458f0..1fd077f0a473b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
@@ -58,21 +58,23 @@ public TriggerRunner(ExecutableTrigger<W> rootTrigger, TriggerContextFactory<W>
     this.contextFactory = contextFactory;
   }
 
-  private BitSet readFinishedBits(ValueState<BitSet> state) {
+  private FinishedTriggersBitSet readFinishedBits(ValueState<BitSet> state) {
     if (!isFinishedSetNeeded()) {
       // If no trigger in the tree will ever have finished bits, then we don't need to read them.
       // So that the code can be agnostic to that fact, we create a BitSet that is all 0 (not
       // finished) for each trigger in the tree.
-      return new BitSet(rootTrigger.getFirstIndexAfterSubtree());
+      return FinishedTriggersBitSet.emptyWithCapacity(rootTrigger.getFirstIndexAfterSubtree());
     }
 
     BitSet bitSet = state.get().read();
-    return bitSet == null ? new BitSet(rootTrigger.getFirstIndexAfterSubtree()) : bitSet;
+    return bitSet == null
+        ? FinishedTriggersBitSet.emptyWithCapacity(rootTrigger.getFirstIndexAfterSubtree())
+            : FinishedTriggersBitSet.fromBitSet(bitSet);
   }
 
   /** Return true if the trigger is closed in the window corresponding to the specified state. */
   public boolean isClosed(ReduceFn.StateContext state) {
-    return readFinishedBits(state.access(FINISHED_BITS_TAG)).get(0);
+    return readFinishedBits(state.access(FINISHED_BITS_TAG)).isFinished(rootTrigger);
   }
 
   public void prefetchForValue(ReduceFn.StateContext state) {
@@ -87,7 +89,8 @@ public void prefetchForValue(ReduceFn.StateContext state) {
    */
   public TriggerResult processValue(ReduceFn<?, ?, ?, W>.ProcessValueContext c) throws Exception {
     // Clone so that we can detect changes and so that changes here don't pollute merging.
-    BitSet finishedSet = (BitSet) readFinishedBits(c.state().access(FINISHED_BITS_TAG)).clone();
+    FinishedTriggersBitSet finishedSet =
+        readFinishedBits(c.state().access(FINISHED_BITS_TAG)).copy();
     Trigger<W>.OnElementContext triggerContext = contextFactory.createOnElementContext(
         c.window(), c.timers(), c.timestamp(), rootTrigger, finishedSet);
     TriggerResult result = rootTrigger.invokeElement(triggerContext);
@@ -110,10 +113,11 @@ public void prefetchForMerge(ReduceFn.MergingStateContext state) {
    */
   public TriggerResult onMerge(ReduceFn<?, ?, ?, W>.OnMergeContext c) throws Exception {
     // Clone so that we can detect changes and so that changes here don't pollute merging.
-    BitSet finishedSet = (BitSet) readFinishedBits(c.state().access(FINISHED_BITS_TAG)).clone();
+    FinishedTriggersBitSet finishedSet =
+        readFinishedBits(c.state().access(FINISHED_BITS_TAG)).copy();
 
     // And read the finished bits in each merging window.
-    ImmutableMap.Builder<W, BitSet> mergingFinishedSets = ImmutableMap.builder();
+    ImmutableMap.Builder<W, FinishedTriggers> mergingFinishedSets = ImmutableMap.builder();
     Map<BoundedWindow, ValueState<BitSet>> mergingFinishedSetState =
         c.state().mergingAccessInEachMergingWindow(FINISHED_BITS_TAG);
     for (W window : c.mergingWindows()) {
@@ -147,7 +151,8 @@ public void prefetchForTimer(ReduceFn.StateContext state) {
    */
   public TriggerResult onTimer(ReduceFn<?, ?, ?, W>.Context c, TimerData timer) throws Exception {
     // Clone so that we can detect changes and so that changes here don't pollute merging.
-    BitSet finishedSet = (BitSet) readFinishedBits(c.state().access(FINISHED_BITS_TAG)).clone();
+    FinishedTriggersBitSet finishedSet =
+        readFinishedBits(c.state().access(FINISHED_BITS_TAG)).copy();
     Trigger<W>.OnTimerContext triggerContext = contextFactory.createOnTimerContext(
         c.window(), c.timers(), rootTrigger, finishedSet,
         timer.getTimestamp(), timer.getDomain());
@@ -156,17 +161,18 @@ public TriggerResult onTimer(ReduceFn<?, ?, ?, W>.Context c, TimerData timer) th
     return result;
   }
 
-  private void persistFinishedSet(ReduceFn.StateContext state, BitSet modifiedFinishedSet) {
+  private void persistFinishedSet(
+      ReduceFn.StateContext state, FinishedTriggersBitSet modifiedFinishedSet) {
     if (!isFinishedSetNeeded()) {
       return;
     }
 
     ValueState<BitSet> finishedSet = state.access(FINISHED_BITS_TAG);
     if (!finishedSet.get().equals(modifiedFinishedSet)) {
-      if (modifiedFinishedSet.isEmpty()) {
+      if (modifiedFinishedSet.getBitSet().isEmpty()) {
         finishedSet.clear();
       } else {
-        finishedSet.set(modifiedFinishedSet);
+        finishedSet.set(modifiedFinishedSet.getBitSet());
       }
     }
   }
@@ -177,7 +183,7 @@ private void persistFinishedSet(ReduceFn.StateContext state, BitSet modifiedFini
    */
   public void clearState(ReduceFn<?, ?, ?, W>.Context c) throws Exception {
     // Don't need to clone, because we'll be clearing the finished bits anyways.
-    BitSet finishedSet = readFinishedBits(c.state().access(FINISHED_BITS_TAG));
+    FinishedTriggers finishedSet = readFinishedBits(c.state().access(FINISHED_BITS_TAG));
     rootTrigger.invokeClear(contextFactory.base(
         c.window(), c.timers(), rootTrigger, finishedSet));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersBitSetTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersBitSetTest.java
new file mode 100644
index 0000000000000..7e66683e26026
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersBitSetTest.java
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.hamcrest.Matchers.not;
+import static org.hamcrest.Matchers.theInstance;
+import static org.junit.Assert.assertThat;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link FinishedTriggersBitSet}.
+ */
+@RunWith(JUnit4.class)
+public class FinishedTriggersBitSetTest {
+  /**
+   * Tests that after a trigger is set to finished, it reads back as finished.
+   */
+  @Test
+  public void testSetGet() {
+    FinishedTriggersProperties.verifyGetAfterSet(FinishedTriggersBitSet.emptyWithCapacity(1));
+  }
+
+  /**
+   * Tests that clearing a trigger recursively clears all of that triggers subTriggers, but no
+   * others.
+   */
+  @Test
+  public void testClearRecursively() {
+    FinishedTriggersProperties.verifyClearRecursively(FinishedTriggersBitSet.emptyWithCapacity(1));
+  }
+
+  @Test
+  public void testCopy() throws Exception {
+    FinishedTriggersBitSet finishedSet = FinishedTriggersBitSet.emptyWithCapacity(10);
+    assertThat(finishedSet.copy().getBitSet(), not(theInstance(finishedSet.getBitSet())));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersProperties.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersProperties.java
new file mode 100644
index 0000000000000..7b3ac689af1e9
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersProperties.java
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterAll;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterFirst;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterProcessingTime;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
+
+/**
+ * Generalized tests for {@link FinishedTriggers} implementations.
+ */
+public class FinishedTriggersProperties {
+  /**
+   * Tests that for the provided trigger and {@link FinishedTriggers}, when the trigger is set
+   * finished, it is correctly reported as finished.
+   */
+  public static void verifyGetAfterSet(FinishedTriggers finishedSet, ExecutableTrigger<?> trigger) {
+    assertFalse(finishedSet.isFinished(trigger));
+    finishedSet.setFinished(trigger, true);
+    assertTrue(finishedSet.isFinished(trigger));
+  }
+
+  /**
+   * For a few arbitrary triggers, tests that when the trigger is set finished it is correctly
+   * reported as finished.
+   */
+  public static void verifyGetAfterSet(FinishedTriggers finishedSet) {
+    ExecutableTrigger<?> trigger = ExecutableTrigger.create(AfterAll.of(
+        AfterFirst.of(AfterPane.elementCountAtLeast(3), AfterWatermark.pastEndOfWindow()),
+        AfterAll.of(
+            AfterPane.elementCountAtLeast(10), AfterProcessingTime.pastFirstElementInPane())));
+
+    verifyGetAfterSet(finishedSet, trigger);
+    verifyGetAfterSet(finishedSet, trigger.subTriggers().get(0).subTriggers().get(1));
+    verifyGetAfterSet(finishedSet, trigger.subTriggers().get(0));
+    verifyGetAfterSet(finishedSet, trigger.subTriggers().get(1));
+    verifyGetAfterSet(finishedSet, trigger.subTriggers().get(1).subTriggers().get(1));
+    verifyGetAfterSet(finishedSet, trigger.subTriggers().get(1).subTriggers().get(0));
+  }
+
+  /**
+   * Tests that clearing a trigger recursively clears all of that triggers subTriggers, but no
+   * others.
+   */
+  public static void verifyClearRecursively(FinishedTriggers finishedSet) {
+    ExecutableTrigger<?> trigger = ExecutableTrigger.create(AfterAll.of(
+        AfterFirst.of(AfterPane.elementCountAtLeast(3), AfterWatermark.pastEndOfWindow()),
+        AfterAll.of(
+            AfterPane.elementCountAtLeast(10), AfterProcessingTime.pastFirstElementInPane())));
+
+    // Set them all finished. This method is not on a trigger as it makes no sense outside tests.
+    setFinishedRecursively(finishedSet, trigger);
+    assertTrue(finishedSet.isFinished(trigger));
+    assertTrue(finishedSet.isFinished(trigger.subTriggers().get(0)));
+    assertTrue(finishedSet.isFinished(trigger.subTriggers().get(0).subTriggers().get(0)));
+    assertTrue(finishedSet.isFinished(trigger.subTriggers().get(0).subTriggers().get(1)));
+
+    // Clear just the second AfterAll
+    finishedSet.clearRecursively(trigger.subTriggers().get(1));
+
+    // Check that the first and all that are still finished
+    assertTrue(finishedSet.isFinished(trigger));
+    verifyFinishedRecursively(finishedSet, trigger.subTriggers().get(0));
+    verifyUnfinishedRecursively(finishedSet, trigger.subTriggers().get(1));
+  }
+
+  private static void setFinishedRecursively(
+      FinishedTriggers finishedSet, ExecutableTrigger<?> trigger) {
+    finishedSet.setFinished(trigger, true);
+    for (ExecutableTrigger<?> subTrigger : trigger.subTriggers()) {
+      setFinishedRecursively(finishedSet, subTrigger);
+    }
+  }
+
+  private static void verifyFinishedRecursively(
+      FinishedTriggers finishedSet, ExecutableTrigger<?> trigger) {
+    assertTrue(finishedSet.isFinished(trigger));
+    for (ExecutableTrigger<?> subTrigger : trigger.subTriggers()) {
+      verifyFinishedRecursively(finishedSet, subTrigger);
+    }
+  }
+
+  private static void verifyUnfinishedRecursively(
+      FinishedTriggers finishedSet, ExecutableTrigger<?> trigger) {
+    assertFalse(finishedSet.isFinished(trigger));
+    for (ExecutableTrigger<?> subTrigger : trigger.subTriggers()) {
+      verifyUnfinishedRecursively(finishedSet, subTrigger);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersSetTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersSetTest.java
new file mode 100644
index 0000000000000..60384deebdd8c
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersSetTest.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.hamcrest.Matchers.not;
+import static org.hamcrest.Matchers.theInstance;
+import static org.junit.Assert.assertThat;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.HashSet;
+
+/**
+ * Tests for {@link FinishedTriggersSet}.
+ */
+@RunWith(JUnit4.class)
+public class FinishedTriggersSetTest {
+  /**
+   * Tests that after a trigger is set to finished, it reads back as finished.
+   */
+  @Test
+  public void testSetGet() {
+    FinishedTriggersProperties.verifyGetAfterSet(
+        FinishedTriggersSet.fromSet(new HashSet<ExecutableTrigger<?>>()));
+  }
+
+  /**
+   * Tests that clearing a trigger recursively clears all of that triggers subTriggers, but no
+   * others.
+   */
+  @Test
+  public void testClearRecursively() {
+    FinishedTriggersProperties.verifyClearRecursively(
+        FinishedTriggersSet.fromSet(new HashSet<ExecutableTrigger<?>>()));
+  }
+
+  @Test
+  public void testCopy() throws Exception {
+    FinishedTriggersSet finishedSet =
+        FinishedTriggersSet.fromSet(new HashSet<ExecutableTrigger<?>>());
+    assertThat(finishedSet.copy().getFinishedTriggers(),
+        not(theInstance(finishedSet.getFinishedTriggers())));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index dc5836b3a3c33..7b3a6593e0f82 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -46,12 +46,12 @@
 import com.google.common.base.Throwables;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 
 import org.joda.time.Instant;
 
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.BitSet;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -111,7 +111,7 @@ public void injectElements(int... values) throws Exception {
   /**
    * A map from a window and trigger to whether that trigger is finished for the window.
    */
-  private final Map<W, BitSet> finishedSets;
+  private final Map<W, FinishedTriggers> finishedSets;
 
   public static <W extends BoundedWindow> SimpleTriggerTester<W> forTrigger(
       TriggerBuilder<W> trigger, WindowFn<?, W> windowFn) throws Exception {
@@ -234,12 +234,12 @@ public void assertCleared(W window) {
    * Returns {@code true} if the {@link Trigger} under test is finished for the given window.
    */
   public boolean isMarkedFinished(W window) {
-    BitSet finishedSet = finishedSets.get(window);
+    FinishedTriggers finishedSet = finishedSets.get(window);
     if (finishedSet == null) {
       return false;
     }
 
-    return finishedSet.get(executableTrigger.getTriggerIndex());
+    return finishedSet.isFinished(executableTrigger);
   }
 
   private StateNamespace windowNamespace(W window) {
@@ -339,16 +339,21 @@ public void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W
     for (Map.Entry<W, Collection<W>> merged : windowToComponents.entrySet()) {
       W window = merged.getKey();
       Collection<W> oldWindows = merged.getValue();
+      Map<W, FinishedTriggers> mergingFinishedSets =
+          Maps.newHashMapWithExpectedSize(oldWindows.size());
+      for (W oldWindow : oldWindows) {
+        mergingFinishedSets.put(oldWindow, getFinishedSet(oldWindow));
+      }
       latestMergeResult = executableTrigger.invokeMerge(
           contextFactory.createOnMergeContext(window, new TestTimers(windowNamespace(window)),
               oldWindows, executableTrigger, getFinishedSet(window), finishedSets));
     }
   }
 
-  private BitSet getFinishedSet(W window) {
-    BitSet finishedSet = finishedSets.get(window);
+  private FinishedTriggers getFinishedSet(W window) {
+    FinishedTriggers finishedSet = finishedSets.get(window);
     if (finishedSet == null) {
-      finishedSet = new BitSet();
+      finishedSet = FinishedTriggersSet.fromSet(new HashSet<ExecutableTrigger<?>>());
       finishedSets.put(window, finishedSet);
     }
     return finishedSet;

From a46d3bde653b5b073c7d684e16d4adbaf79f779a Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 20 Jan 2016 16:53:37 -0800
Subject: [PATCH 1329/1541] CustomSources: add logs and normalize log levels

- Log when dynamic work rebalancing fails.
- Downgrade expected logs from warn to info.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112634778
---
 .../cloud/dataflow/sdk/runners/dataflow/CustomSources.java   | 4 ++++
 .../cloud/dataflow/sdk/util/common/worker/ReadOperation.java | 5 ++---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
index 44fbc55fceb50..0039281c4a3b2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
@@ -499,11 +499,15 @@ public NativeReader.DynamicSplitResult requestDynamicSplit(
       Double fractionConsumed = stopPosition.getFractionConsumed();
       if (fractionConsumed == null) {
         // Only truncating at a fraction is currently supported.
+        LOG.info(
+            "Rejecting split request because custom sources only support splits at fraction: {}",
+            stopPosition);
         return null;
       }
       BoundedSource<T> original = reader.getCurrentSource();
       BoundedSource<T> residual = reader.splitAtFraction(fractionConsumed);
       if (residual == null) {
+        LOG.info("Rejecting split request because custom reader returned null residual source.");
         return null;
       }
       // Try to catch some potential subclass implementation errors early.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
index 4cf434de10d3f..c46e2a3669349 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
@@ -272,12 +272,11 @@ public NativeReader.DynamicSplitResult requestDynamicSplit(
       NativeReader.DynamicSplitRequest splitRequest) {
     synchronized (initializationStateLock) {
       if (isFinished()) {
-        LOG.warn("Iterator is in the Finished state, returning null stop position.");
+        LOG.info("Iterator is in the Finished state, returning null stop position.");
         return null;
       }
       if (readerIterator == null) {
-        LOG.warn("Iterator has not been initialized, refusing to split at {}",
-            splitRequest);
+        LOG.info("Iterator has not been initialized, refusing to split at {}", splitRequest);
         return null;
       }
       NativeReader.DynamicSplitResult result = readerIterator.requestDynamicSplit(splitRequest);

From a923d7712e7415289b4ecb7c27e89a85cdd2797c Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Thu, 21 Jan 2016 09:17:24 -0800
Subject: [PATCH 1330/1541] SingletonAssert: add a notEqualTo matcher

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112692113
---
 .../dataflow/sdk/testing/DataflowAssert.java  | 40 +++++++++++++++++++
 .../sdk/testing/DataflowAssertTest.java       | 12 ++++++
 2 files changed, 52 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
index 596c43fa0010e..6c9643cc3c0e1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
@@ -18,6 +18,7 @@
 
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.not;
 import static org.junit.Assert.assertThat;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
@@ -471,6 +472,16 @@ public SingletonAssert<T> isEqualTo(T expectedValue) {
       return satisfies(new AssertIsEqualToRelation<T>(), expectedValue);
     }
 
+    /**
+     * Checks that the value of this {@code SingletonAssert}'s view is not equal
+     * to the expected value.
+     *
+     * <p>Returns this {@code SingletonAssert}.
+     */
+    public SingletonAssert<T> notEqualTo(T expectedValue) {
+      return satisfies(new AssertNotEqualToRelation<T>(), expectedValue);
+    }
+
     /**
      * Checks that the value of this {@code SingletonAssert}'s view is equal to
      * the expected value.
@@ -719,6 +730,24 @@ public Void apply(T actual) {
     }
   }
 
+  /**
+   * A {@link SerializableFunction} that verifies that an actual value is not equal to an
+   * expected value.
+   */
+  private static class AssertNotEqualTo<T> implements SerializableFunction<T, Void> {
+    private T expected;
+
+    public AssertNotEqualTo(T expected) {
+      this.expected = expected;
+    }
+
+    @Override
+    public Void apply(T actual) {
+      assertThat(actual, not(equalTo(expected)));
+      return null;
+    }
+  }
+
   /**
    * A {@link SerializableFunction} that verifies that an {@code Iterable} contains
    * expected items in any order.
@@ -771,6 +800,17 @@ public SerializableFunction<T, Void> assertFor(T expected) {
     }
   }
 
+  /**
+   * An {@link AssertRelation} implementing the binary predicate that two objects are not equal.
+   */
+  private static class AssertNotEqualToRelation<T>
+      implements AssertRelation<T, T> {
+    @Override
+    public SerializableFunction<T, Void> assertFor(T expected) {
+      return new AssertNotEqualTo<T>(expected);
+    }
+  }
+
   /**
    * An {@code AssertRelation} implementing the binary predicate that two collections are equal
    * modulo reordering.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
index 44e64491a5f54..2cd3014b110b9 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/DataflowAssertTest.java
@@ -249,6 +249,18 @@ public void testIsEqualTo() throws Exception {
     pipeline.run();
   }
 
+  /**
+   * Basic test for {@code notEqualTo}.
+   */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testNotEqualTo() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> pcollection = pipeline.apply(Create.of(43));
+    DataflowAssert.thatSingleton(pcollection).notEqualTo(42);
+    pipeline.run();
+  }
+
   /**
    * Tests that {@code containsInAnyOrder} is actually order-independent.
    */

From 6e5d74394fccd91bf1a26adc802224a2dca5ccef Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Thu, 21 Jan 2016 14:21:09 -0800
Subject: [PATCH 1331/1541] Implement Counter#merge

Merge is referenced in isCompatibleWith but is not implemented via the
Counter interface.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112723417
---
 .../dataflow/sdk/util/common/Counter.java     | 81 ++++++++++++++-
 .../dataflow/sdk/util/common/CounterTest.java | 98 +++++++++++++++++++
 2 files changed, 178 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
index 32acfef28dd41..2c1985c0535b6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
@@ -19,6 +19,7 @@
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.AND;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.OR;
+import static com.google.common.base.Preconditions.checkArgument;
 
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 import com.google.common.util.concurrent.AtomicDouble;
@@ -328,6 +329,13 @@ public boolean isCompatibleWith(Counter<?> that) {
         && this.getClass().equals(that.getClass());
   }
 
+  /**
+   * Merges this counter with the provided counter, returning this counter with the combined value
+   * of both counters. This may reset the delta of this counter.
+   *
+   * @throws IllegalArgumentException if the provided Counter is not compatible with this Counter
+   */
+  public abstract Counter<T> merge(Counter<T> that);
 
   //////////////////////////////////////////////////////////////////////////////
 
@@ -494,6 +502,25 @@ public CounterMean<Long> getMean() {
       return mean.get();
     }
 
+    @Override
+    public Counter<Long> merge(Counter<Long> that) {
+      checkArgument(this.isCompatibleWith(that), "Counters %s and %s are incompatible", this, that);
+      switch (kind) {
+        case SUM:
+        case MIN:
+        case MAX:
+          return addValue(that.getAggregate());
+        case MEAN:
+          CounterMean<Long> thisCounterMean = this.getMean();
+          CounterMean<Long> thatCounterMean = that.getMean();
+          return resetMeanToValue(
+              thisCounterMean.getCount() + thatCounterMean.getCount(),
+              thisCounterMean.getAggregate() + thatCounterMean.getAggregate());
+        default:
+          throw illegalArgumentException();
+      }
+    }
+
     private static class LongCounterMean implements CounterMean<Long> {
       private final long aggregate;
       private final long count;
@@ -670,6 +697,25 @@ public CounterMean<Double> getMean() {
       return mean.get();
     }
 
+    @Override
+    public Counter<Double> merge(Counter<Double> that) {
+      checkArgument(this.isCompatibleWith(that), "Counters %s and %s are incompatible", this, that);
+      switch (kind) {
+        case SUM:
+        case MIN:
+        case MAX:
+          return addValue(that.getAggregate());
+        case MEAN:
+          CounterMean<Double> thisCounterMean = this.getMean();
+          CounterMean<Double> thatCounterMean = that.getMean();
+          return resetMeanToValue(
+              thisCounterMean.getCount() + thatCounterMean.getCount(),
+              thisCounterMean.getAggregate() + thatCounterMean.getAggregate());
+        default:
+          throw illegalArgumentException();
+      }
+    }
+
     private static class DoubleCounterMean implements CounterMean<Double> {
       private final double aggregate;
       private final long count;
@@ -763,13 +809,18 @@ public Boolean getAggregate() {
     public CounterMean<Boolean> getMean() {
       throw illegalArgumentException();
     }
+
+    @Override
+    public Counter<Boolean> merge(Counter<Boolean> that) {
+      checkArgument(this.isCompatibleWith(that), "Counters %s and %s are incompatible", this, that);
+      return addValue(that.getAggregate());
+    }
   }
 
   /**
    * Implements a {@link Counter} for {@link String} values.
    */
   private static class StringCounter extends Counter<String> {
-
     /** Initializes a new {@link Counter} for {@link String} values. */
     private StringCounter(String name, AggregationKind kind) {
       super(name, kind);
@@ -833,6 +884,15 @@ public CounterMean<String> getMean() {
           throw illegalArgumentException();
       }
     }
+
+    @Override
+    public Counter<String> merge(Counter<String> that) {
+      checkArgument(this.isCompatibleWith(that), "Counters %s and %s are incompatible", this, that);
+      switch (kind) {
+        default:
+          throw illegalArgumentException();
+      }
+    }
   }
 
   /**
@@ -985,6 +1045,25 @@ public CounterMean<Integer> getMean() {
       return mean.get();
     }
 
+    @Override
+    public Counter<Integer> merge(Counter<Integer> that) {
+      checkArgument(this.isCompatibleWith(that), "Counters %s and %s are incompatible", this, that);
+      switch (kind) {
+        case SUM:
+        case MIN:
+        case MAX:
+          return addValue(that.getAggregate());
+        case MEAN:
+          CounterMean<Integer> thisCounterMean = this.getMean();
+          CounterMean<Integer> thatCounterMean = that.getMean();
+          return resetMeanToValue(
+              thisCounterMean.getCount() + thatCounterMean.getCount(),
+              thisCounterMean.getAggregate() + thatCounterMean.getAggregate());
+        default:
+          throw illegalArgumentException();
+      }
+    }
+
     private static class IntegerCounterMean implements CounterMean<Integer> {
       private final int aggregate;
       private final long count;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
index ddae986ef1bff..33d5d3e6473bf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
@@ -33,12 +33,15 @@
 import com.google.cloud.dataflow.sdk.util.CloudCounterUtils;
 import com.google.cloud.dataflow.sdk.util.common.Counter.CounterMean;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
 import java.util.Arrays;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Set;
 
 /**
@@ -47,6 +50,9 @@
 @RunWith(JUnit4.class)
 public class CounterTest {
 
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
   private static MetricUpdate flush(Counter<?> c) {
     // TODO: Move this out into a separate Counter test.
     return CounterTestUtils.extractCounterUpdate(c, true);
@@ -211,6 +217,13 @@ public void testSumLong() {
     c.resetToValue(100L).addValue(17L).addValue(49L);
     expectedTotal = expectedDelta = 166;
     assertOK(expectedTotal, expectedDelta, c);
+
+    Counter<Long> other = Counter.longs("sum-long", SUM);
+    other.addValue(12L);
+    expectedDelta = 12L;
+    expectedTotal += 12L;
+    c.merge(other);
+    assertOK(expectedTotal, expectedDelta, c);
   }
 
   @Test
@@ -241,6 +254,13 @@ public void testSumDouble() {
     c.resetToValue(Math.sqrt(17)).addValue(17.0).addValue(49.0);
     expectedTotal = expectedDelta = Math.sqrt(17.0) + 17.0 + 49.0;
     assertOK(expectedTotal, expectedDelta, c);
+
+    Counter<Double> other = Counter.doubles("sum-double", SUM);
+    other.addValue(12 * Math.PI);
+    expectedDelta = 12 * Math.PI;
+    expectedTotal += 12 * Math.PI;
+    c.merge(other);
+    assertOK(expectedTotal, expectedDelta, c);
   }
 
 
@@ -272,6 +292,12 @@ public void testMaxLong() {
     c.resetToValue(100L).addValue(171L).addValue(49L);
     expectedTotal = expectedDelta = 171;
     assertOK(expectedTotal, expectedDelta, c);
+
+    Counter<Long> other = Counter.longs("max-long", MAX);
+    other.addValue(12L);
+    expectedDelta = 12L;
+    c.merge(other);
+    assertOK(expectedTotal, expectedDelta, c);
   }
 
   @Test
@@ -300,6 +326,12 @@ public void testMaxDouble() {
     c.resetToValue(Math.sqrt(17)).addValue(171.0).addValue(49.0);
     expectedTotal = expectedDelta = 171.0;
     assertOK(expectedTotal, expectedDelta, c);
+
+    Counter<Double> other = Counter.doubles("max-double", MAX);
+    other.addValue(12 * Math.PI);
+    expectedDelta = 12 * Math.PI;
+    c.merge(other);
+    assertOK(expectedTotal, expectedDelta, c);
   }
 
 
@@ -331,6 +363,12 @@ public void testMinLong() {
     c.resetToValue(100L).addValue(171L).addValue(49L);
     expectedTotal = expectedDelta = 49;
     assertOK(expectedTotal, expectedDelta, c);
+
+    Counter<Long> other = Counter.longs("min-long", MIN);
+    other.addValue(42L);
+    expectedTotal = expectedDelta = 42L;
+    c.merge(other);
+    assertOK(expectedTotal, expectedDelta, c);
   }
 
   @Test
@@ -359,6 +397,12 @@ public void testMinDouble() {
     c.resetToValue(Math.sqrt(17)).addValue(171.0).addValue(0.0);
     expectedTotal = expectedDelta = 0.0;
     assertOK(expectedTotal, expectedDelta, c);
+
+    Counter<Double> other = Counter.doubles("min-double", MIN);
+    other.addValue(42 * Math.E);
+    expectedDelta = 42 * Math.E;
+    c.merge(other);
+    assertOK(expectedTotal, expectedDelta, c);
   }
 
 
@@ -419,6 +463,15 @@ public void testMeanLong() {
     expTotal = expDelta = 166;
     expCountTotal = expCountDelta = 5;
     assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
+
+    Counter<Long> other = Counter.longs("mean-long", MEAN);
+    other.addValue(12L).addValue(44L).addValue(-5L);
+    expTotal += 12L + 44L - 5L;
+    expDelta += 12L + 44L - 5L;
+    expCountTotal += 3;
+    expCountDelta += 3;
+    c.merge(other);
+    assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
   }
 
   @Test
@@ -458,6 +511,15 @@ public void testMeanDouble() {
     expTotal = expDelta = Math.sqrt(17.0) + 17.0 + 49.0;
     expCountTotal = expCountDelta = 5;
     assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
+
+    Counter<Double> other = Counter.doubles("mean-double", MEAN);
+    other.addValue(3 * Math.PI).addValue(12 * Math.E);
+    expTotal += 3 * Math.PI + 12 * Math.E;
+    expDelta += 3 * Math.PI + 12 * Math.E;
+    expCountTotal += 2;
+    expCountDelta += 2;
+    c.merge(other);
+    assertMean(expTotal, expDelta, expCountTotal, expCountDelta, c);
   }
 
 
@@ -590,4 +652,40 @@ public void testExtraction() {
     assertEquals(cloudCountersFromSet, cloudCountersFromArray);
     assertEquals(2, cloudCountersFromSet.size());
   }
+
+  @Test
+  public void testMergeIncompatibleCounters() {
+    Counter<Long> longSums = Counter.longs("longsums", SUM);
+    Counter<Long> longMean = Counter.longs("longmean", MEAN);
+    Counter<Long> longMin = Counter.longs("longmin", MIN);
+
+    Counter<Long> otherLongSums = Counter.longs("othersums", SUM);
+    Counter<Long> otherLongMean = Counter.longs("otherlongmean", MEAN);
+
+    Counter<Double> doubleSums = Counter.doubles("doublesums", SUM);
+    Counter<Double> doubleMean = Counter.doubles("doublemean", MEAN);
+
+    Counter<Boolean> boolAnd = Counter.booleans("and", AND);
+    Counter<Boolean> boolOr = Counter.booleans("or", OR);
+
+    List<Counter<Long>> longCounters =
+        Arrays.asList(longSums, longMean, longMin, otherLongSums, otherLongMean);
+    for (Counter<Long> left : longCounters) {
+      for (Counter<Long> right : longCounters) {
+        if (left != right) {
+          assertIncompatibleMerge(left, right);
+        }
+      }
+    }
+
+    assertIncompatibleMerge(doubleSums, doubleMean);
+    assertIncompatibleMerge(boolAnd, boolOr);
+  }
+
+  private <T> void assertIncompatibleMerge(Counter<T> left, Counter<T> right) {
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Counters");
+    thrown.expectMessage("are incompatible");
+    left.merge(right);
+  }
 }

From 054f1e6e3b5aa79aa4f61726ec802f4b3124c623 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Thu, 21 Jan 2016 14:25:03 -0800
Subject: [PATCH 1332/1541] Optimize mergeAccumulators by reusing an existing
 accumulator

Also, added units tests for Sum, Min, Max, Mean.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112723885
---
 .../dataflow/sdk/transforms/Combine.java      | 48 ++++++++++++-------
 .../dataflow/sdk/transforms/MaxTest.java      | 27 +++++++++++
 .../dataflow/sdk/transforms/MeanTest.java     | 10 ++++
 .../dataflow/sdk/transforms/MinTest.java      | 27 +++++++++++
 .../dataflow/sdk/transforms/SumTest.java      | 28 ++++++++++-
 5 files changed, 121 insertions(+), 19 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 3e3d9f406337f..0e3ae9be48cd5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -534,17 +534,23 @@ public Holder<V> addInput(Holder<V> accumulator, V input) {
 
     @Override
     public Holder<V> mergeAccumulators(Iterable<Holder<V>> accumulators) {
-      Holder<V> running = new Holder<>();
-      for (Holder<V> accumulator : accumulators) {
-        if (accumulator.present) {
-          if (running.present) {
-            running.set(apply(running.value, accumulator.value));
-          } else {
-            running.set(accumulator.value);
+      Iterator<Holder<V>> iter = accumulators.iterator();
+      if (!iter.hasNext()) {
+        return createAccumulator();
+      } else {
+        Holder<V> running = iter.next();
+        while (iter.hasNext()) {
+          Holder<V> accum = iter.next();
+          if (accum.present) {
+            if (running.present) {
+              running.set(apply(running.value, accum.value));
+            } else {
+              running.set(accum.value);
+            }
           }
         }
+        return running;
       }
-      return running;
     }
 
     @Override
@@ -632,7 +638,9 @@ public void verifyDeterministic() throws NonDeterministicException {
 
   /**
    * An abstract subclass of {@link CombineFn} for implementing combiners that are more
-   * easily and efficiently expressed as binary operations on <code>int</code>s.
+   * easily and efficiently expressed as binary operations on <code>int</code>s
+   *
+   * <p> It uses {@code int[0]} as the mutable accumulator.
    */
   public abstract static class BinaryCombineIntegerFn extends CombineFn<Integer, int[], Integer> {
 
@@ -664,11 +672,11 @@ public int[] mergeAccumulators(Iterable<int[]> accumulators) {
       if (!iter.hasNext()) {
         return createAccumulator();
       } else {
-        int running = iter.next()[0];
+        int[] running = iter.next();
         while (iter.hasNext()) {
-          running = apply(running, iter.next()[0]);
+          running[0] = apply(running[0], iter.next()[0]);
         }
-        return wrap(running);
+        return running;
       }
     }
 
@@ -713,6 +721,8 @@ public Counter<Integer> getCounter(String name) {
   /**
    * An abstract subclass of {@link CombineFn} for implementing combiners that are more
    * easily and efficiently expressed as binary operations on <code>long</code>s.
+   *
+   * <p> It uses {@code long[0]} as the mutable accumulator.
    */
   public abstract static class BinaryCombineLongFn extends CombineFn<Long, long[], Long> {
     /**
@@ -743,11 +753,11 @@ public long[] mergeAccumulators(Iterable<long[]> accumulators) {
       if (!iter.hasNext()) {
         return createAccumulator();
       } else {
-        long running = iter.next()[0];
+        long[] running = iter.next();
         while (iter.hasNext()) {
-          running = apply(running, iter.next()[0]);
+          running[0] = apply(running[0], iter.next()[0]);
         }
-        return wrap(running);
+        return running;
       }
     }
 
@@ -791,6 +801,8 @@ public Counter<Long> getCounter(String name) {
   /**
    * An abstract subclass of {@link CombineFn} for implementing combiners that are more
    * easily and efficiently expressed as binary operations on <code>double</code>s.
+   *
+   * <p> It uses {@code double[0]} as the mutable accumulator.
    */
   public abstract static class BinaryCombineDoubleFn extends CombineFn<Double, double[], Double> {
 
@@ -822,11 +834,11 @@ public double[] mergeAccumulators(Iterable<double[]> accumulators) {
       if (!iter.hasNext()) {
         return createAccumulator();
       } else {
-        double running = iter.next()[0];
+        double[] running = iter.next();
         while (iter.hasNext()) {
-          running = apply(running, iter.next()[0]);
+          running[0] = apply(running[0], iter.next()[0]);
         }
-        return wrap(running);
+        return running;
       }
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MaxTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MaxTest.java
index cd61c2302dee1..e1ea33bcf28f0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MaxTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MaxTest.java
@@ -16,8 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import static com.google.cloud.dataflow.sdk.TestUtils.checkCombineFn;
 import static org.junit.Assert.assertEquals;
 
+import com.google.common.collect.Lists;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -36,4 +39,28 @@ public void testMeanGetNames() {
     assertEquals("Max.PerKey", Max.doublesPerKey().getName());
     assertEquals("Max.PerKey", Max.longsPerKey().getName());
   }
+
+  @Test
+  public void testMaxIntegerFn() {
+    checkCombineFn(
+        new Max.MaxIntegerFn(),
+        Lists.newArrayList(1, 2, 3, 4),
+        4);
+  }
+
+  @Test
+  public void testMaxLongFn() {
+    checkCombineFn(
+        new Max.MaxLongFn(),
+        Lists.newArrayList(1L, 2L, 3L, 4L),
+        4L);
+  }
+
+  @Test
+  public void testMaxDoubleFn() {
+    checkCombineFn(
+        new Max.MaxDoubleFn(),
+        Lists.newArrayList(1.0, 2.0, 3.0, 4.0),
+        4.0);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MeanTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MeanTest.java
index bb2729f659daa..64a2f9de94bdb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MeanTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MeanTest.java
@@ -16,12 +16,14 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import static com.google.cloud.dataflow.sdk.TestUtils.checkCombineFn;
 import static org.junit.Assert.assertEquals;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 import com.google.cloud.dataflow.sdk.transforms.Mean.CountSum;
 import com.google.cloud.dataflow.sdk.transforms.Mean.CountSumCoder;
+import com.google.common.collect.Lists;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -59,4 +61,12 @@ public void testCountSumCoderEncodeDecode() throws Exception {
   public void testCountSumCoderSerializable() throws Exception {
     CoderProperties.coderSerializable(TEST_CODER);
   }
+
+  @Test
+  public void testMeanFn() throws Exception {
+    checkCombineFn(
+        new Mean.MeanFn<Integer>(),
+        Lists.newArrayList(1, 2, 3, 4),
+        2.5);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MinTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MinTest.java
index 3c5b78d80eb75..a291537adf176 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MinTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MinTest.java
@@ -16,8 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import static com.google.cloud.dataflow.sdk.TestUtils.checkCombineFn;
 import static org.junit.Assert.assertEquals;
 
+import com.google.common.collect.Lists;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -36,4 +39,28 @@ public void testMeanGetNames() {
     assertEquals("Min.PerKey", Min.doublesPerKey().getName());
     assertEquals("Min.PerKey", Min.longsPerKey().getName());
   }
+
+  @Test
+  public void testMinIntegerFn() {
+    checkCombineFn(
+        new Min.MinIntegerFn(),
+        Lists.newArrayList(1, 2, 3, 4),
+        1);
+  }
+
+  @Test
+  public void testMinLongFn() {
+    checkCombineFn(
+        new Min.MinLongFn(),
+        Lists.newArrayList(1L, 2L, 3L, 4L),
+        1L);
+  }
+
+  @Test
+  public void testMinDoubleFn() {
+    checkCombineFn(
+        new Min.MinDoubleFn(),
+        Lists.newArrayList(1.0, 2.0, 3.0, 4.0),
+        1.0);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SumTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SumTest.java
index 634f4ef9c07b1..b5ad51ce132f5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SumTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/SumTest.java
@@ -13,11 +13,13 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-
 package com.google.cloud.dataflow.sdk.transforms;
 
+import static com.google.cloud.dataflow.sdk.TestUtils.checkCombineFn;
 import static org.junit.Assert.assertEquals;
 
+import com.google.common.collect.Lists;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -37,4 +39,28 @@ public void testSumGetNames() {
     assertEquals("Sum.PerKey", Sum.doublesPerKey().getName());
     assertEquals("Sum.PerKey", Sum.longsPerKey().getName());
   }
+
+  @Test
+  public void testSumIntegerFn() {
+    checkCombineFn(
+        new Sum.SumIntegerFn(),
+        Lists.newArrayList(1, 2, 3, 4),
+        10);
+  }
+
+  @Test
+  public void testSumLongFn() {
+    checkCombineFn(
+        new Sum.SumLongFn(),
+        Lists.newArrayList(1L, 2L, 3L, 4L),
+        10L);
+  }
+
+  @Test
+  public void testSumDoubleFn() {
+    checkCombineFn(
+        new Sum.SumDoubleFn(),
+        Lists.newArrayList(1.0, 2.0, 3.0, 4.0),
+        10.0);
+  }
 }

From baa8e2f271b211a9ed153274c87c33fb6dc14476 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 21 Jan 2016 14:37:32 -0800
Subject: [PATCH 1333/1541] Handle when Dataflow service tells us that values
 are sorted in GBK

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112725244
---
 .../runners/worker/GroupingShuffleReader.java |  41 +++-
 .../worker/GroupingShuffleReaderFactory.java  |   8 +-
 ...uffleReaderWithFaultyBytesReadCounter.java |   5 +-
 .../sdk/runners/worker/ShuffleSink.java       |  20 +-
 .../dataflow/sdk/util/PropertyNames.java      |   1 +
 .../worker/GroupingShuffleReaderTest.java     | 182 +++++++++++-------
 .../sdk/runners/worker/ShuffleSinkTest.java   |   7 +-
 .../sdk/runners/worker/TestShuffleReader.java |  69 +++++--
 .../runners/worker/TestShuffleReaderTest.java |  84 ++++----
 9 files changed, 263 insertions(+), 154 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index 46e4b85e4e467..ee5b600b4557e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -21,10 +21,12 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateSplitRequest;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
+import static com.google.common.base.Preconditions.checkState;
 
 import com.google.api.services.dataflow.model.ApproximateReportedProgress;
 import com.google.api.services.dataflow.model.ApproximateSplitRequest;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
@@ -54,6 +56,7 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.util.Iterator;
 import java.util.concurrent.atomic.AtomicLong;
@@ -80,7 +83,8 @@ public class GroupingShuffleReader<K, V> extends NativeReader<WindowedValue<KV<K
   // Counts how many bytes were from by a given operation from a given shuffle session.
   @Nullable Counter<Long> perOperationPerDatasetBytesCounter;
   Coder<K> keyCoder;
-  Coder<V> valueCoder;
+  Coder<?> valueCoder;
+  @Nullable Coder<?> secondaryKeyCoder;
 
   public GroupingShuffleReader(
       PipelineOptions options,
@@ -90,7 +94,8 @@ public GroupingShuffleReader(
       Coder<WindowedValue<KV<K, Iterable<V>>>> coder,
       BatchModeExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator,
-      String operationName)
+      String operationName,
+      boolean valuesAreSorted)
       throws Exception {
     this.shuffleReaderConfig = shuffleReaderConfig;
     this.startShufflePosition = startShufflePosition;
@@ -98,7 +103,7 @@ public GroupingShuffleReader(
     this.executionContext = executionContext;
     this.addCounterMutator = addCounterMutator;
     this.operationName = operationName;
-    initCoder(coder);
+    initCoder(coder, valuesAreSorted);
     // We cannot initialize perOperationPerDatasetBytesCounter here, as it
     // depends on shuffleReaderConfig, which isn't populated yet.
   }
@@ -131,7 +136,8 @@ public GroupingShuffleReaderIterator<K, V> iterator() throws IOException {
         new ChunkingShuffleBatchReader(asr)));
   }
 
-  private void initCoder(Coder<WindowedValue<KV<K, Iterable<V>>>> coder) throws Exception {
+  private void initCoder(Coder<WindowedValue<KV<K, Iterable<V>>>> coder,
+      boolean valuesAreSorted) throws Exception {
     if (!(coder instanceof WindowedValueCoder)) {
       throw new Exception("unexpected kind of coder for WindowedValue: " + coder);
     }
@@ -151,7 +157,17 @@ private void initCoder(Coder<WindowedValue<KV<K, Iterable<V>>>> coder) throws Ex
           + "a key-grouping shuffle");
     }
     IterableCoder<V> iterCoder = (IterableCoder<V>) kvValueCoder;
-    this.valueCoder = iterCoder.getElemCoder();
+    if (valuesAreSorted) {
+      checkState(iterCoder.getElemCoder() instanceof KvCoder,
+          "unexpected kind of coder for elements read from a "
+          + "key-grouping value sorting shuffle: %s", iterCoder.getElemCoder());
+      @SuppressWarnings("rawtypes")
+      KvCoder<?, ?> valueKvCoder = (KvCoder) iterCoder.getElemCoder();
+      this.secondaryKeyCoder = valueKvCoder.getKeyCoder();
+      this.valueCoder = valueKvCoder.getValueCoder();
+    } else {
+      this.valueCoder = iterCoder.getElemCoder();
+    }
   }
 
   final GroupingShuffleReaderIterator<K, V> iterator(ShuffleEntryReader reader) {
@@ -390,7 +406,20 @@ public V next() {
           // notify the bytes that have been read so far.
           notifyValueReturned(currentGroupSize.getAndSet(0L));
           try {
-            return CoderUtils.decodeFromByteArray(parentReader.valueCoder, entry.getValue());
+            if (parentReader.secondaryKeyCoder != null) {
+              ByteArrayInputStream bais = new ByteArrayInputStream(entry.getSecondaryKey());
+              @SuppressWarnings("unchecked")
+              V value = (V) KV.of(
+                  // We ignore decoding the timestamp.
+                  parentReader.secondaryKeyCoder.decode(bais, Context.NESTED),
+                  CoderUtils.decodeFromByteArray(parentReader.valueCoder, entry.getValue()));
+              return value;
+            } else {
+              @SuppressWarnings("unchecked")
+              V value = (V) CoderUtils.decodeFromByteArray(parentReader.valueCoder,
+                                                           entry.getValue());
+              return value;
+            }
           } catch (IOException exn) {
             throw new RuntimeException(exn);
           }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
index 602e27b24c00c..a0f1c9d73a364 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static com.google.api.client.util.Base64.decodeBase64;
+import static com.google.cloud.dataflow.sdk.util.Structs.getBoolean;
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -39,7 +40,6 @@
  * Creates a GroupingShuffleReader from a CloudObject spec.
  */
 public class GroupingShuffleReaderFactory implements ReaderFactory {
-
   @Override
   public NativeReader<?> create(
       CloudObject spec,
@@ -69,7 +69,8 @@ public <K, V> GroupingShuffleReader<K, V> createTyped(
           decodeBase64(getString(spec, PropertyNames.SHUFFLE_READER_CONFIG)),
           getString(spec, PropertyNames.START_SHUFFLE_POSITION, null),
           getString(spec, PropertyNames.END_SHUFFLE_POSITION, null), coder,
-          (BatchModeExecutionContext) executionContext, addCounterMutator, operationName);
+          (BatchModeExecutionContext) executionContext, addCounterMutator, operationName,
+          getBoolean(spec, PropertyNames.SORT_VALUES, false));
     }
 
     return new GroupingShuffleReader<K, V>(options,
@@ -78,7 +79,8 @@ public <K, V> GroupingShuffleReader<K, V> createTyped(
         getString(spec, PropertyNames.END_SHUFFLE_POSITION, null),
         coder,
         (BatchModeExecutionContext) executionContext,
-        addCounterMutator, operationName);
+        addCounterMutator, operationName,
+        getBoolean(spec, PropertyNames.SORT_VALUES, false));
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderWithFaultyBytesReadCounter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderWithFaultyBytesReadCounter.java
index df10ece46579b..d4a4170f726b1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderWithFaultyBytesReadCounter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderWithFaultyBytesReadCounter.java
@@ -47,10 +47,11 @@ public GroupingShuffleReaderWithFaultyBytesReadCounter(
       Coder<WindowedValue<KV<K, Iterable<V>>>> coder,
       BatchModeExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator,
-      String operationName)
+      String operationName,
+      boolean sortValues)
       throws Exception {
     super(options, shuffleReaderConfig, startShufflePosition, stopShufflePosition, coder,
-        executionContext, addCounterMutator, operationName);
+        executionContext, addCounterMutator, operationName, sortValues);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
index 86cb099b888b0..793ed61bde7d2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
@@ -20,6 +20,7 @@
 
 import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
@@ -36,6 +37,7 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
 
+import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 
 /**
@@ -220,14 +222,18 @@ public long add(WindowedValue<T> windowedElem) throws IOException {
           Object sortKey = kvValue.getKey();
           Object sortValue = kvValue.getValue();
 
-          // TODO: Need to coordinate with the
-          // GroupingShuffleReader, to make sure it knows how to
-          // reconstruct the value from the sortKeyBytes and
-          // sortValueBytes.  Right now, it doesn't know between
-          // sorting and non-sorting GBKs.
-          secondaryKeyBytes = CoderUtils.encodeToByteArray(sortKeyCoder, sortKey);
+          // Sort values by key and then timestamp so that any GroupAlsoByWindows
+          // can run more efficiently.
+          ByteArrayOutputStream baos = new ByteArrayOutputStream();
+          sortKeyCoder.encode(sortKey, baos, Context.NESTED);
+          if (!windowedElem.getTimestamp().equals(BoundedWindow.TIMESTAMP_MIN_VALUE)) {
+            // Empty timestamp suffixes sort before all other sort value keys with
+            // the same prefix. So We can omit this suffix for this common value here
+            // for efficiency and only encode when its not the minimum timestamp.
+            InstantCoder.of().encode(windowedElem.getTimestamp(), baos, Context.OUTER);
+          }
+          secondaryKeyBytes = baos.toByteArray();
           valueBytes = CoderUtils.encodeToByteArray(sortValueCoder, sortValue);
-
         } else if (groupValues) {
           // Sort values by timestamp so that GroupAlsoByWindows can run efficiently.
           if (windowedElem.getTimestamp().equals(BoundedWindow.TIMESTAMP_MIN_VALUE)) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
index 6fbb6bdefa132..5a7ff3d8817c1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
@@ -84,6 +84,7 @@ public class PropertyNames {
   public static final String SHUFFLE_KIND = "shuffle_kind";
   public static final String SHUFFLE_READER_CONFIG = "shuffle_reader_config";
   public static final String SHUFFLE_WRITER_CONFIG = "shuffle_writer_config";
+  public static final String SORT_VALUES = "sort_values";
   public static final String START_INDEX = "start_index";
   public static final String START_OFFSET = "start_offset";
   public static final String START_SHUFFLE_POSITION = "start_shuffle_position";
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
index 798a08b016cb4..5fa96599afab7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
@@ -37,10 +37,10 @@
 import com.google.cloud.dataflow.sdk.coders.Coder.Context;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.worker.GroupingShuffleReader.GroupingShuffleReaderIterator;
+import com.google.cloud.dataflow.sdk.runners.worker.ShuffleSink.ShuffleKind;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
@@ -79,18 +79,24 @@
  */
 @RunWith(JUnit4.class)
 public class GroupingShuffleReaderTest {
-  private static final List<KV<Integer, List<String>>> NO_KVS = Collections.emptyList();
+  private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
+  private static final List<KV<Integer, List<KV<Integer, Integer>>>> NO_KVS =
+      Collections.emptyList();
 
   private static final Instant timestamp = new Instant(123000);
   private static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
 
-  // As Shuffle records, {@code KVS} is encoded as 10 records. Each records uses an integer as key
-  // (4 bytes), and a length-prefixed string as value (4 bytes + length of utf8-encoded string).
-  // Overall {@code KVS} has a byte size of 17 + 17 + 17 + 17 + 16 + 17 + 17 + 17 + 17 + 16 = 168.
-  private static final List<KV<Integer, List<String>>> KVS = Arrays.asList(
-      KV.of(1, Arrays.asList("in 1a", "in 1b")), KV.of(2, Arrays.asList("in 2a", "in 2b")),
-      KV.of(3, Arrays.asList("in 3")), KV.of(4, Arrays.asList("in 4a", "in 4b", "in 4c", "in 4d")),
-      KV.of(5, Arrays.asList("in 5")));
+  // As Shuffle records, {@code KV} is encoded as 10 records. Each records uses an integer as key
+  // (4 bytes), and a {@code KV} of an integer key and value (each 4 bytes).
+  // Overall {@code KV}s have a byte size of 25 * 4 = 100. Note that we also encode the
+  // timestamp into the secondary key adding another 100 bytes.
+  private static final List<KV<Integer, List<KV<Integer, Integer>>>> KVS = Arrays.asList(
+      KV.of(1, Arrays.asList(KV.of(1, 11), KV.of(2, 12))),
+      KV.of(2, Arrays.asList(KV.of(1, 21), KV.of(2, 22))),
+      KV.of(3, Arrays.asList(KV.of(1, 31))),
+      KV.of(4, Arrays.asList(KV.of(1, 41), KV.of(2, 42),
+                             KV.of(3, 43), KV.of(4, 44))),
+      KV.of(5, Arrays.asList(KV.of(1, 51))));
 
   /** How many of the values with each key are to be read.
    * Note that the order matters as the conversion to ordinal is used below.
@@ -108,25 +114,31 @@ private enum ValuesToRead {
     READ_ALL_VALUES_TWICE
   }
 
-  private List<ShuffleEntry> writeShuffleEntries(List<KV<Integer, List<String>>> input)
+  private List<ShuffleEntry> writeShuffleEntries(
+      List<KV<Integer, List<KV<Integer, Integer>>>> input, boolean sortValues)
       throws Exception {
-    Coder<WindowedValue<KV<Integer, String>>> sinkElemCoder = WindowedValue.getFullCoder(
-        KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()), IntervalWindow.getCoder());
+    Coder<WindowedValue<KV<Integer, KV<Integer, Integer>>>> sinkElemCoder =
+        WindowedValue.getFullCoder(
+            KvCoder.of(BigEndianIntegerCoder.of(),
+                       KvCoder.of(BigEndianIntegerCoder.of(),
+                                  BigEndianIntegerCoder.of())),
+            IntervalWindow.getCoder());
     CounterSet.AddCounterMutator addCounterMutator = new CounterSet().getAddCounterMutator();
     // Write to shuffle with GROUP_KEYS ShuffleSink.
-    ShuffleSink<KV<Integer, String>> shuffleSink =
-        new ShuffleSink<>(PipelineOptionsFactory.create(), null, ShuffleSink.ShuffleKind.GROUP_KEYS,
+    ShuffleSink<KV<Integer, KV<Integer, Integer>>> shuffleSink =
+        new ShuffleSink<>(PipelineOptionsFactory.create(), null,
+            sortValues ? ShuffleKind.GROUP_KEYS_AND_SORT_VALUES : ShuffleKind.GROUP_KEYS,
             sinkElemCoder, addCounterMutator);
 
     TestShuffleWriter shuffleWriter = new TestShuffleWriter();
 
     int kvCount = 0;
     List<Long> actualSizes = new ArrayList<>();
-    try (Sink.SinkWriter<WindowedValue<KV<Integer, String>>> shuffleSinkWriter =
+    try (Sink.SinkWriter<WindowedValue<KV<Integer, KV<Integer, Integer>>>> shuffleSinkWriter =
         shuffleSink.writer(shuffleWriter, "dataset")) {
-      for (KV<Integer, List<String>> kvs : input) {
+      for (KV<Integer, List<KV<Integer, Integer>>> kvs : input) {
         Integer key = kvs.getKey();
-        for (String value : kvs.getValue()) {
+        for (KV<Integer, Integer> value : kvs.getValue()) {
           ++kvCount;
           actualSizes.add(shuffleSinkWriter.add(WindowedValue.of(
               KV.of(key, value), timestamp, Lists.newArrayList(window), PaneInfo.NO_FIRING)));
@@ -139,24 +151,25 @@ private List<ShuffleEntry> writeShuffleEntries(List<KV<Integer, List<String>>> i
     return records;
   }
 
-  private List<KV<Integer, List<String>>> runIterationOverGroupingShuffleReader(
+  private List<KV<Integer, List<KV<Integer, Integer>>>> runIterationOverGroupingShuffleReader(
       BatchModeExecutionContext context, TestShuffleReader shuffleReader,
-      GroupingShuffleReader<Integer, String> groupingShuffleReader,
-      Coder<WindowedValue<KV<Integer, Iterable<String>>>> coder, ValuesToRead valuesToRead)
-      throws Exception {
+      GroupingShuffleReader<Integer, KV<Integer, Integer>> groupingShuffleReader,
+      Coder<WindowedValue<KV<Integer, Iterable<KV<Integer, Integer>>>>> coder,
+      ValuesToRead valuesToRead) throws Exception {
     Counter<Long> elementByteSizeCounter = Counter.longs("element-byte-size-counter", SUM);
     ElementByteSizeObserver elementObserver = new ElementByteSizeObserver(elementByteSizeCounter);
-    List<KV<Integer, List<String>>> actual = new ArrayList<>();
-    try (GroupingShuffleReaderIterator<Integer, String> iter =
-            groupingShuffleReader.iterator(shuffleReader)) {
-      Iterable<String> prevValuesIterable = null;
-      Iterator<String> prevValuesIterator = null;
+    List<KV<Integer, List<KV<Integer, Integer>>>> actual = new ArrayList<>();
+    try (GroupingShuffleReaderIterator<Integer, KV<Integer, Integer>> iter =
+        groupingShuffleReader.iterator(shuffleReader)) {
+      Iterable<KV<Integer, Integer>> prevValuesIterable = null;
+      Iterator<KV<Integer, Integer>> prevValuesIterator = null;
       while (iter.hasNext()) {
         assertTrue(iter.hasNext());
         assertTrue(iter.hasNext());
 
         @SuppressWarnings({"rawtypes", "unchecked"})  // safe co-variant cast.
-        WindowedValue<KV<Integer, Iterable<String>>> windowedValue = (WindowedValue) iter.next();
+        WindowedValue<KV<Integer, Iterable<KV<Integer, Integer>>>> windowedValue =
+            (WindowedValue) iter.next();
         // Verify that the byte size observer is lazy for every value the GroupingShuffleReader
         // produces.
         coder.registerByteSizeObserver(windowedValue, elementObserver, Context.OUTER);
@@ -166,9 +179,9 @@ private List<KV<Integer, List<String>>> runIterationOverGroupingShuffleReader(
         assertEquals(BoundedWindow.TIMESTAMP_MIN_VALUE, windowedValue.getTimestamp());
         assertEquals(0, windowedValue.getWindows().size());
 
-        KV<Integer, Iterable<String>> elem = windowedValue.getValue();
+        KV<Integer, Iterable<KV<Integer, Integer>>> elem = windowedValue.getValue();
         Integer key = elem.getKey();
-        List<String> values = new ArrayList<>();
+        List<KV<Integer, Integer>> values = new ArrayList<>();
         if (valuesToRead.ordinal() > ValuesToRead.SKIP_VALUES.ordinal()) {
           if (prevValuesIterable != null) {
             prevValuesIterable.iterator(); // Verifies that this does not throw.
@@ -177,8 +190,8 @@ private List<KV<Integer, List<String>>> runIterationOverGroupingShuffleReader(
             prevValuesIterator.hasNext(); // Verifies that this does not throw.
           }
 
-          Iterable<String> valuesIterable = elem.getValue();
-          Iterator<String> valuesIterator = valuesIterable.iterator();
+          Iterable<KV<Integer, Integer>> valuesIterable = elem.getValue();
+          Iterator<KV<Integer, Integer>> valuesIterator = valuesIterable.iterator();
 
           if (valuesToRead.ordinal() >= ValuesToRead.READ_ONE_VALUE.ordinal()) {
             while (valuesIterator.hasNext()) {
@@ -242,19 +255,23 @@ private List<KV<Integer, List<String>>> runIterationOverGroupingShuffleReader(
   }
 
   private void runTestReadFromShuffle(
-      List<KV<Integer, List<String>>> input, ValuesToRead valuesToRead) throws Exception {
-    Coder<WindowedValue<KV<Integer, Iterable<String>>>> sourceElemCoder =
+      List<KV<Integer, List<KV<Integer, Integer>>>> input, boolean sortValues,
+      ValuesToRead valuesToRead) throws Exception {
+    Coder<WindowedValue<KV<Integer, Iterable<KV<Integer, Integer>>>>> sourceElemCoder =
         WindowedValue.getFullCoder(
-            KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(StringUtf8Coder.of())),
+            KvCoder.of(BigEndianIntegerCoder.of(),
+                       IterableCoder.of(KvCoder.of(BigEndianIntegerCoder.of(),
+                                                   BigEndianIntegerCoder.of()))),
             IntervalWindow.getCoder());
 
-    List<ShuffleEntry> records = writeShuffleEntries(input);
+    List<ShuffleEntry> records = writeShuffleEntries(input, sortValues);
 
     PipelineOptions options = PipelineOptionsFactory.create();
     // Read from shuffle with GroupingShuffleReader.
     BatchModeExecutionContext context = BatchModeExecutionContext.fromOptions(options);
-    GroupingShuffleReader<Integer, String> groupingShuffleReader = new GroupingShuffleReader<>(
-        options, null, null, null, sourceElemCoder, context, null, null);
+    GroupingShuffleReader<Integer, KV<Integer, Integer>> groupingShuffleReader =
+        new GroupingShuffleReader<>(
+            options, null, null, null, sourceElemCoder, context, null, null, sortValues);
     ExecutorTestUtils.TestReaderObserver observer =
         new ExecutorTestUtils.TestReaderObserver(groupingShuffleReader);
 
@@ -265,15 +282,15 @@ private void runTestReadFromShuffle(
       shuffleReader.addEntry(record);
     }
 
-    List<KV<Integer, List<String>>> actual = runIterationOverGroupingShuffleReader(
+    List<KV<Integer, List<KV<Integer, Integer>>>> actual = runIterationOverGroupingShuffleReader(
         context, shuffleReader, groupingShuffleReader, sourceElemCoder, valuesToRead);
 
-    List<KV<Integer, List<String>>> expected = new ArrayList<>();
-    for (KV<Integer, List<String>> kvs : input) {
+    List<KV<Integer, List<KV<Integer, Integer>>>> expected = new ArrayList<>();
+    for (KV<Integer, List<KV<Integer, Integer>>> kvs : input) {
       Integer key = kvs.getKey();
-      List<String> values = new ArrayList<>();
+      List<KV<Integer, Integer>> values = new ArrayList<>();
       if (valuesToRead.ordinal() >= ValuesToRead.READ_ONE_VALUE.ordinal()) {
-        for (String value : kvs.getValue()) {
+        for (KV<Integer, Integer> value : kvs.getValue()) {
           values.add(value);
           if (valuesToRead == ValuesToRead.READ_ONE_VALUE) {
             break;
@@ -288,60 +305,70 @@ private void runTestReadFromShuffle(
 
   @Test
   public void testReadEmptyShuffleData() throws Exception {
-    runTestReadFromShuffle(NO_KVS, ValuesToRead.READ_ALL_VALUES);
+    runTestReadFromShuffle(NO_KVS, false /* do not sort values */, ValuesToRead.READ_ALL_VALUES);
+    runTestReadFromShuffle(NO_KVS, true /* sort values */, ValuesToRead.READ_ALL_VALUES);
   }
 
   @Test
   public void testReadEmptyShuffleDataSkippingValues() throws Exception {
-    runTestReadFromShuffle(NO_KVS, ValuesToRead.SKIP_VALUES);
+    runTestReadFromShuffle(NO_KVS, false /* do not sort values */, ValuesToRead.SKIP_VALUES);
+    runTestReadFromShuffle(NO_KVS, true /* sort values */, ValuesToRead.SKIP_VALUES);
   }
 
   @Test
   public void testReadNonEmptyShuffleData() throws Exception {
-    runTestReadFromShuffle(KVS, ValuesToRead.READ_ALL_VALUES);
+    runTestReadFromShuffle(KVS, false /* do not sort values */, ValuesToRead.READ_ALL_VALUES);
+    runTestReadFromShuffle(KVS, true /* sort values */, ValuesToRead.READ_ALL_VALUES);
   }
 
   @Test
   public void testReadNonEmptyShuffleDataTwice() throws Exception {
-    runTestReadFromShuffle(KVS, ValuesToRead.READ_ALL_VALUES_TWICE);
+    runTestReadFromShuffle(KVS, false /* do not sort values */, ValuesToRead.READ_ALL_VALUES_TWICE);
+    runTestReadFromShuffle(KVS, true /* sort values */, ValuesToRead.READ_ALL_VALUES_TWICE);
   }
 
   @Test
   public void testReadNonEmptyShuffleDataReadingOneValue() throws Exception {
-    runTestReadFromShuffle(KVS, ValuesToRead.READ_ONE_VALUE);
+    runTestReadFromShuffle(KVS, false /* do not sort values */, ValuesToRead.READ_ONE_VALUE);
+    runTestReadFromShuffle(KVS, true /* sort values */, ValuesToRead.READ_ONE_VALUE);
   }
 
   @Test
   public void testReadNonEmptyShuffleDataReadingNoValues() throws Exception {
-    runTestReadFromShuffle(KVS, ValuesToRead.READ_NO_VALUES);
+    runTestReadFromShuffle(KVS, false /* do not sort values */, ValuesToRead.READ_NO_VALUES);
+    runTestReadFromShuffle(KVS, true /* sort values */, ValuesToRead.READ_NO_VALUES);
   }
 
   @Test
   public void testReadNonEmptyShuffleDataSkippingValues() throws Exception {
-    runTestReadFromShuffle(KVS, ValuesToRead.SKIP_VALUES);
+    runTestReadFromShuffle(KVS, false /* do not sort values */, ValuesToRead.SKIP_VALUES);
+    runTestReadFromShuffle(KVS, true /* sort values */, ValuesToRead.SKIP_VALUES);
   }
 
   private void runTestBytesReadCounter(
-      List<KV<Integer, List<String>>> input, ValuesToRead valuesToRead,
-      long expectedReadBytes) throws Exception {
+      List<KV<Integer, List<KV<Integer, Integer>>>> input, boolean useSecondaryKey,
+      ValuesToRead valuesToRead, long expectedReadBytes) throws Exception {
     // Create a shuffle reader with the shuffle values provided as input.
-    List<ShuffleEntry> records = writeShuffleEntries(input);
+    List<ShuffleEntry> records = writeShuffleEntries(input, useSecondaryKey);
     TestShuffleReader shuffleReader = new TestShuffleReader();
     for (ShuffleEntry record : records) {
       shuffleReader.addEntry(record);
     }
 
-    Coder<WindowedValue<KV<Integer, Iterable<String>>>> sourceElemCoder =
+    Coder<WindowedValue<KV<Integer, Iterable<KV<Integer, Integer>>>>> sourceElemCoder =
         WindowedValue.getFullCoder(
-            KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(StringUtf8Coder.of())),
+            KvCoder.of(BigEndianIntegerCoder.of(),
+                IterableCoder.of(KvCoder.of(BigEndianIntegerCoder.of(),
+                                            BigEndianIntegerCoder.of()))),
             IntervalWindow.getCoder());
     PipelineOptions options = PipelineOptionsFactory.create();
     CounterSet.AddCounterMutator addCounterMutator =
         new CounterSet().getAddCounterMutator();
     // Read from shuffle with GroupingShuffleReader.
     BatchModeExecutionContext context = BatchModeExecutionContext.fromOptions(options);
-    GroupingShuffleReader<Integer, String> groupingShuffleReader = new GroupingShuffleReader<>(
-        options, null, null, null, sourceElemCoder, context, null, null);
+    GroupingShuffleReader<Integer, KV<Integer, Integer>> groupingShuffleReader =
+        new GroupingShuffleReader<>(
+            options, null, null, null, sourceElemCoder, context, null, null, useSecondaryKey);
     groupingShuffleReader.perOperationPerDatasetBytesCounter =
         addCounterMutator.addCounter(Counter.longs("dax-shuffle-test-wf-read-bytes", SUM));
 
@@ -354,27 +381,35 @@ private void runTestBytesReadCounter(
 
   @Test
   public void testBytesReadNonEmptyShuffleData() throws Exception {
-    runTestBytesReadCounter(KVS, ValuesToRead.READ_ALL_VALUES, 168L);
+    runTestBytesReadCounter(KVS, false /* do not sort values */,
+        ValuesToRead.READ_ALL_VALUES, 200L);
+    runTestBytesReadCounter(KVS, true /* sort values */, ValuesToRead.READ_ALL_VALUES, 200L);
   }
 
   @Test
   public void testBytesReadNonEmptyShuffleDataTwice() throws Exception {
-    runTestBytesReadCounter(KVS, ValuesToRead.READ_ALL_VALUES_TWICE, 168L);
+    runTestBytesReadCounter(KVS, false /* do not sort values */,
+        ValuesToRead.READ_ALL_VALUES_TWICE, 200L);
+    runTestBytesReadCounter(KVS, true /* sort values */, ValuesToRead.READ_ALL_VALUES_TWICE, 200L);
   }
 
   @Test
   public void testBytesReadNonEmptyShuffleDataReadingOneValue() throws Exception {
-    runTestBytesReadCounter(KVS, ValuesToRead.READ_ONE_VALUE, 168L);
+    runTestBytesReadCounter(KVS, false /* do not sort values */, ValuesToRead.READ_ONE_VALUE, 200L);
+    runTestBytesReadCounter(KVS, true /* sort values */, ValuesToRead.READ_ONE_VALUE, 200L);
   }
 
   @Test
   public void testBytesReadNonEmptyShuffleDataSkippingValues() throws Exception {
-    runTestBytesReadCounter(KVS, ValuesToRead.SKIP_VALUES, 168L);
+    runTestBytesReadCounter(KVS, false /* do not sort values */, ValuesToRead.SKIP_VALUES, 200L);
+    runTestBytesReadCounter(KVS, true /* sort values */, ValuesToRead.SKIP_VALUES, 200L);
   }
 
   @Test
   public void testBytesReadEmptyShuffleData() throws Exception {
-    runTestBytesReadCounter(NO_KVS, ValuesToRead.READ_ALL_VALUES, 0L);
+    runTestBytesReadCounter(NO_KVS, false /* do not sort values */,
+        ValuesToRead.READ_ALL_VALUES, 0L);
+    runTestBytesReadCounter(NO_KVS, true /* sort values */, ValuesToRead.READ_ALL_VALUES, 0L);
   }
 
   static byte[] fabricatePosition(int shard) throws Exception {
@@ -405,7 +440,8 @@ public void testReadFromShuffleDataAndFailToSplit() throws Exception {
     final int kNumRecords = 2;
     for (int i = 0; i < kNumRecords; ++i) {
       byte[] key = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
-      shuffleReader.addEntry(new ShuffleEntry(fabricatePosition(kFirstShard, key), key, null, key));
+      shuffleReader.addEntry(
+          new ShuffleEntry(fabricatePosition(kFirstShard, key), key, EMPTY_BYTE_ARRAY, key));
     }
 
     // Note that TestShuffleReader start/end positions are in the
@@ -417,7 +453,7 @@ public void testReadFromShuffleDataAndFailToSplit() throws Exception {
         WindowedValue.getFullCoder(
             KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())),
             IntervalWindow.getCoder()),
-        context, null, null);
+        context, null, null, false /* do not sort values */);
 
     try (GroupingShuffleReaderIterator<Integer, Integer> iter =
             groupingShuffleReader.iterator(shuffleReader)) {
@@ -464,7 +500,8 @@ public void testRemainingParallelism() throws Exception {
     final int kNumRecords = 5;
     for (int i = 0; i < kNumRecords; ++i) {
       byte[] key = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
-      ShuffleEntry entry = new ShuffleEntry(fabricatePosition(kFirstShard, i), key, null, key);
+      ShuffleEntry entry = new ShuffleEntry(
+          fabricatePosition(kFirstShard, i), key, EMPTY_BYTE_ARRAY, key);
       shuffleReader.addEntry(entry);
     }
 
@@ -480,7 +517,8 @@ public void testRemainingParallelism() throws Exception {
                 IntervalWindow.getCoder()),
             context,
             null,
-            null);
+            null,
+            false /* do not sort values */);
 
     try (GroupingShuffleReaderIterator<Integer, Integer> iter =
             groupingShuffleReader.iterator(shuffleReader)) {
@@ -535,7 +573,7 @@ public void testReadFromShuffleAndDynamicSplit() throws Exception {
         WindowedValue.getFullCoder(
             KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())),
             IntervalWindow.getCoder()),
-        context, null, null);
+        context, null, null, false /* do not sort values */);
     groupingShuffleReader.perOperationPerDatasetBytesCounter =
           addCounterMutator.addCounter(Counter.longs("dax-shuffle-test-wf-read-bytes", SUM));
 
@@ -549,16 +587,16 @@ public void testReadFromShuffleAndDynamicSplit() throws Exception {
     // therefore each record comes with a unique position constructed.
     for (int i = 0; i < kNumRecords; ++i) {
       byte[] keyByte = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
-      ShuffleEntry entry =
-          new ShuffleEntry(fabricatePosition(kFirstShard, keyByte), keyByte, null, keyByte);
+      ShuffleEntry entry = new ShuffleEntry(
+          fabricatePosition(kFirstShard, keyByte), keyByte, EMPTY_BYTE_ARRAY, keyByte);
       shuffleReader.addEntry(entry);
     }
 
     for (int i = kNumRecords; i < 2 * kNumRecords; ++i) {
       byte[] keyByte = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
 
-      ShuffleEntry entry =
-          new ShuffleEntry(fabricatePosition(kSecondShard, keyByte), keyByte, null, keyByte);
+      ShuffleEntry entry = new ShuffleEntry(
+          fabricatePosition(kSecondShard, keyByte), keyByte, EMPTY_BYTE_ARRAY, keyByte);
       shuffleReader.addEntry(entry);
     }
 
@@ -626,7 +664,7 @@ public void testGetApproximateProgress() throws Exception {
         WindowedValue.getFullCoder(
             KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())),
             IntervalWindow.getCoder()),
-        context, null, null);
+        context, null, null, false /* do not sort values */);
 
     TestShuffleReader shuffleReader = new TestShuffleReader();
     final int kNumRecords = 10;
@@ -635,7 +673,7 @@ public void testGetApproximateProgress() throws Exception {
       byte[] position = fabricatePosition(i);
       byte[] keyByte = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
       positionsList.add(position);
-      ShuffleEntry entry = new ShuffleEntry(position, keyByte, null, keyByte);
+      ShuffleEntry entry = new ShuffleEntry(position, keyByte, EMPTY_BYTE_ARRAY, keyByte);
       shuffleReader.addEntry(entry);
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
index 9a4eaa8965578..c4f2ea03b5e28 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
@@ -42,6 +43,7 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.ByteArrayInputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -206,9 +208,8 @@ void runTestWriteGroupingSortingShuffleSink(
       Integer key =
           CoderUtils.decodeFromByteArray(BigEndianIntegerCoder.of(),
                                          keyBytes);
-      String sortKey =
-          CoderUtils.decodeFromByteArray(StringUtf8Coder.of(),
-                                         sortKeyBytes);
+      ByteArrayInputStream bais = new ByteArrayInputStream(sortKeyBytes);
+      String sortKey = StringUtf8Coder.of().decode(bais, Context.NESTED);
       Integer sortValue = CoderUtils.decodeFromByteArray(BigEndianIntegerCoder.of(), valueBytes);
 
       actual.add(KV.of(key, KV.of(sortKey, sortValue)));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReader.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReader.java
index 4aba127e044d5..34bbead2bfef5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReader.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReader.java
@@ -20,7 +20,6 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShufflePosition;
-// TODO: Decide how we want to handle this Guava dependency.
 import com.google.common.primitives.UnsignedBytes;
 
 import org.junit.Assert;
@@ -40,31 +39,44 @@
  * A fake implementation of a ShuffleEntryReader, for testing.
  */
 public class TestShuffleReader implements ShuffleEntryReader {
-  static final Comparator<byte[]> SHUFFLE_KEY_COMPARATOR =
-      UnsignedBytes.lexicographicalComparator();
-  final NavigableMap<byte[], List<ShuffleEntry>> records;
+  // Sorts by secondary key where an empty secondary key sorts before all other secondary keys.
+  static final Comparator<byte[]> SHUFFLE_KEY_COMPARATOR = new Comparator<byte[]>() {
 
-  public TestShuffleReader(NavigableMap<byte[], List<ShuffleEntry>> records) {
-    this.records = records;
-  }
+    @Override
+    public int compare(byte[] o1, byte[] o2) {
+      if (o1 == o2) {
+        return 0;
+      }
+      if (o1 == null) {
+        return -1;
+      }
+      if (o2 == null) {
+        return 1;
+      }
+      return UnsignedBytes.lexicographicalComparator().compare(o1, o2);
+    }
+  };
 
-  public TestShuffleReader() {
-    this(new TreeMap<byte[], List<ShuffleEntry>>(SHUFFLE_KEY_COMPARATOR));
-  }
+  final NavigableMap<byte[], NavigableMap<byte[], List<ShuffleEntry>>> records;
 
-  public void addEntry(String key, String value) {
-    addEntry(key.getBytes(), value.getBytes());
+  public TestShuffleReader() {
+    this.records = new TreeMap<>(SHUFFLE_KEY_COMPARATOR);
   }
 
-  public void addEntry(byte[] key, byte[] value) {
-    addEntry(new ShuffleEntry(key, null, value));
+  public void addEntry(String key, String secondaryKey, String value) {
+    addEntry(new ShuffleEntry(key.getBytes(), secondaryKey.getBytes(), value.getBytes()));
   }
 
   public void addEntry(ShuffleEntry entry) {
-    List<ShuffleEntry> values = records.get(entry.getKey());
+    NavigableMap<byte[], List<ShuffleEntry>> valuesBySecondaryKey = records.get(entry.getKey());
+    if (valuesBySecondaryKey == null) {
+      valuesBySecondaryKey = new TreeMap<>(SHUFFLE_KEY_COMPARATOR);
+      records.put(entry.getKey(), valuesBySecondaryKey);
+    }
+    List<ShuffleEntry> values = valuesBySecondaryKey.get(entry.getSecondaryKey());
     if (values == null) {
       values = new ArrayList<>();
-      records.put(entry.getKey(), values);
+      valuesBySecondaryKey.put(entry.getSecondaryKey(), values);
     }
     values.add(entry);
   }
@@ -90,11 +102,12 @@ public Reiterator<ShuffleEntry> read(String startKey, String endKey) {
   }
 
   class ShuffleReaderIterator implements Reiterator<ShuffleEntry> {
-    final Iterator<Map.Entry<byte[], List<ShuffleEntry>>> recordsIter;
+    final Iterator<Map.Entry<byte[], NavigableMap<byte[], List<ShuffleEntry>>>> recordsIter;
     final byte[] startKey;
     final byte[] endKey;
     byte[] currentKey;
-    Map.Entry<byte[], List<ShuffleEntry>> currentRecord;
+    byte[] currentSecondaryKey;
+    Map.Entry<byte[], NavigableMap<byte[], List<ShuffleEntry>>> currentRecord;
     ListIterator<ShuffleEntry> currentValuesIter;
 
     public ShuffleReaderIterator(byte[] startKey, byte[] endKey) {
@@ -116,8 +129,9 @@ private ShuffleReaderIterator(ShuffleReaderIterator it) {
       this.currentKey = it.currentKey;
       this.currentRecord = it.currentRecord;
       if (it.currentValuesIter != null) {
+        this.currentSecondaryKey = it.currentSecondaryKey;
         this.currentValuesIter =
-            it.currentRecord.getValue().listIterator(
+            it.currentRecord.getValue().get(currentSecondaryKey).listIterator(
                 it.currentValuesIter.nextIndex());
       } else {
         this.currentValuesIter = null;
@@ -137,7 +151,7 @@ public ShuffleEntry next() {
       ShuffleEntry resultValue = currentValuesIter.next();
       Assert.assertTrue(Arrays.equals(currentKey, resultValue.getKey()));
       if (!currentValuesIter.hasNext()) {
-        advanceKey();
+        advanceSecondaryKey();
       }
       return resultValue;
     }
@@ -152,6 +166,17 @@ public Reiterator<ShuffleEntry> copy() {
       return new ShuffleReaderIterator(this);
     }
 
+    private void advanceSecondaryKey() {
+      NavigableMap<byte[], List<ShuffleEntry>> tailMap =
+          currentRecord.getValue().tailMap(currentSecondaryKey, false /* do not include key */);
+      if (tailMap.isEmpty()) {
+        advanceKey();
+      } else {
+        currentSecondaryKey = tailMap.firstKey();
+        currentValuesIter = tailMap.get(currentSecondaryKey).listIterator();
+      }
+    }
+
     private void advanceKey() {
       while (recordsIter.hasNext()) {
         currentRecord = recordsIter.next();
@@ -167,10 +192,12 @@ private void advanceKey() {
           break;
         }
         // In range.
-        currentValuesIter = currentRecord.getValue().listIterator();
+        currentSecondaryKey = currentRecord.getValue().firstKey();
+        currentValuesIter = currentRecord.getValue().get(currentSecondaryKey).listIterator();
         return;
       }
       currentKey = null;
+      currentSecondaryKey = null;
       currentValuesIter = null;
     }
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReaderTest.java
index 7c349108ce5ea..f4b87bc01b74f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReaderTest.java
@@ -16,20 +16,21 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
+import com.google.cloud.dataflow.sdk.values.KV;
 
-import org.junit.Assert;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-import java.util.AbstractMap.SimpleEntry;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
-import java.util.Map;
 import java.util.NoSuchElementException;
 
 /**
@@ -40,71 +41,74 @@ public class TestShuffleReaderTest {
   static final String START_KEY = "ddd";
   static final String END_KEY = "mmm";
 
-  static final List<Map.Entry<String, String>> NO_ENTRIES =
+  static final List<KV<String, KV<String, String>>> NO_ENTRIES =
       Collections.emptyList();
 
-  static final List<Map.Entry<String, String>> IN_RANGE_ENTRIES =
-      Arrays.<Map.Entry<String, String>>asList(
-          new SimpleEntry<>("ddd", "in 1"),
-          new SimpleEntry<>("ddd", "in 1"),
-          new SimpleEntry<>("ddd", "in 1"),
-          new SimpleEntry<>("dddd", "in 2"),
-          new SimpleEntry<>("dddd", "in 2"),
-          new SimpleEntry<>("de", "in 3"),
-          new SimpleEntry<>("ee", "in 4"),
-          new SimpleEntry<>("ee", "in 4"),
-          new SimpleEntry<>("ee", "in 4"),
-          new SimpleEntry<>("ee", "in 4"),
-          new SimpleEntry<>("mm", "in 5"));
-  static final List<Map.Entry<String, String>> BEFORE_RANGE_ENTRIES =
-      Arrays.<Map.Entry<String, String>>asList(
-          new SimpleEntry<>("", "out 1"),
-          new SimpleEntry<>("dd", "out 2"));
-  static final List<Map.Entry<String, String>> AFTER_RANGE_ENTRIES =
-      Arrays.<Map.Entry<String, String>>asList(
-          new SimpleEntry<>("mmm", "out 3"),
-          new SimpleEntry<>("mmm", "out 3"),
-          new SimpleEntry<>("mmmm", "out 4"),
-          new SimpleEntry<>("mn", "out 5"),
-          new SimpleEntry<>("zzz", "out 6"));
-  static final List<Map.Entry<String, String>> OUT_OF_RANGE_ENTRIES =
+  static final List<KV<String, KV<String, String>>> IN_RANGE_ENTRIES =
+      Arrays.<KV<String, KV<String, String>>>asList(
+          KV.of("ddd", KV.of("1", "in 1")),
+          KV.of("ddd", KV.of("2", "in 1")),
+          KV.of("ddd", KV.of("3", "in 1")),
+          KV.of("dddd", KV.of("1", "in 2")),
+          KV.of("dddd", KV.of("2", "in 2")),
+          KV.of("de", KV.of("1", "in 3")),
+          KV.of("ee", KV.of("1", "in 4")),
+          KV.of("ee", KV.of("2", "in 4")),
+          KV.of("ee", KV.of("3", "in 4")),
+          KV.of("ee", KV.of("4", "in 4")),
+          KV.of("mm", KV.of("1", "in 5")));
+  static final List<KV<String, KV<String, String>>> BEFORE_RANGE_ENTRIES =
+      Arrays.<KV<String, KV<String, String>>>asList(
+          KV.of("", KV.of("1", "out 1")),
+          KV.of("dd", KV.of("1", "out 2")));
+  static final List<KV<String, KV<String, String>>> AFTER_RANGE_ENTRIES =
+      Arrays.<KV<String, KV<String, String>>>asList(
+          KV.of("mmm", KV.of("1", "out 3")),
+          KV.of("mmm", KV.of("2", "out 3")),
+          KV.of("mmmm", KV.of("1", "out 4")),
+          KV.of("mn", KV.of("1", "out 5")),
+          KV.of("zzz", KV.of("1", "out 6")));
+  static final List<KV<String, KV<String, String>>> OUT_OF_RANGE_ENTRIES =
       new ArrayList<>();
   static {
     OUT_OF_RANGE_ENTRIES.addAll(BEFORE_RANGE_ENTRIES);
     OUT_OF_RANGE_ENTRIES.addAll(AFTER_RANGE_ENTRIES);
   }
-  static final List<Map.Entry<String, String>> ALL_ENTRIES = new ArrayList<>();
+  static final List<KV<String, KV<String, String>>> ALL_ENTRIES = new ArrayList<>();
   static {
     ALL_ENTRIES.addAll(BEFORE_RANGE_ENTRIES);
     ALL_ENTRIES.addAll(IN_RANGE_ENTRIES);
     ALL_ENTRIES.addAll(AFTER_RANGE_ENTRIES);
   }
 
-  void runTest(List<Map.Entry<String, String>> expected,
-               List<Map.Entry<String, String>> outOfRange,
+  void runTest(List<KV<String, KV<String, String>>> expected,
+               List<KV<String, KV<String, String>>> outOfRange,
                String startKey,
                String endKey) {
     TestShuffleReader shuffleReader = new TestShuffleReader();
-    List<Map.Entry<String, String>> expectedCopy = new ArrayList<>(expected);
+    List<KV<String, KV<String, String>>> expectedCopy = new ArrayList<>(expected);
     expectedCopy.addAll(outOfRange);
     Collections.shuffle(expectedCopy);
-    for (Map.Entry<String, String> entry : expectedCopy) {
-      shuffleReader.addEntry(entry.getKey(), entry.getValue());
+    for (KV<String, KV<String, String>> entry : expectedCopy) {
+      shuffleReader.addEntry(entry.getKey(),
+                             entry.getValue().getKey() /* secondary key */,
+                             entry.getValue().getValue());
     }
     Iterator<ShuffleEntry> iter = shuffleReader.read(startKey, endKey);
-    List<Map.Entry<String, String>> actual = new ArrayList<>();
+    List<KV<String, KV<String, String>>> actual = new ArrayList<>();
     while (iter.hasNext()) {
       ShuffleEntry entry = iter.next();
-      actual.add(new SimpleEntry<>(new String(entry.getKey()),
-              new String(entry.getValue())));
+      actual.add(KV.of(new String(entry.getKey()),
+                       KV.of(new String(entry.getSecondaryKey()),
+                             new String(entry.getValue()))));
     }
     try {
       iter.next();
-      Assert.fail("should have failed");
+      fail("should have failed");
     } catch (NoSuchElementException exn) {
       // Success.
     }
-    Assert.assertEquals(expected, actual);
+    assertEquals(expected, actual);
   }
 
   @Test

From c41d15418d1664b28a3e6c8d43c5ae0fd71c762d Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Thu, 21 Jan 2016 15:13:17 -0800
Subject: [PATCH 1334/1541] Continues unifying ReaderIterator and Source.Reader

* Flattens the ReaderIterator hierarchy a bit:
  removes Abstract{Bounded,}ReaderIterator, instead merges them all
  into LegacyReaderIterator directly.
* Converts InMemoryReaderIterator, PubsubReaderIterator,
  UngroupedWindmillReaderIterator, ConcatIterator, and a couple of
  anonymous classes to the new interface.
  File-based and shuffle readers remain.
* Extracts a common base class from Pubsub and UngroupedWindmill readers.
* Converts most call sites that could use Source.Reader instead of
  ReaderIterator to use either Source.Reader, or the concrete class
  of the reader, to ease future conversion.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112728940
---
 .../sdk/runners/worker/AvroByteReader.java    |   5 +-
 .../sdk/runners/worker/AvroReader.java        |   5 +-
 .../sdk/runners/worker/BigQueryReader.java    |   4 +-
 .../sdk/runners/worker/ConcatReader.java      |  55 ++++--
 .../runners/worker/GroupingShuffleReader.java |   3 +-
 .../sdk/runners/worker/InMemoryReader.java    |  80 ++++++---
 .../runners/worker/InMemoryReaderFactory.java |   7 +-
 .../sdk/runners/worker/IsmReader.java         |  11 +-
 .../worker/LazyMultiReaderIterator.java       |   3 +-
 .../worker/PartitioningShuffleReader.java     |   4 +-
 .../sdk/runners/worker/PubsubReader.java      |  25 +--
 .../sdk/runners/worker/TextReader.java        |   3 +-
 .../worker/UngroupedShuffleReader.java        |   3 +-
 .../worker/UngroupedWindmillReader.java       |  25 +--
 .../worker/WindmillReaderIteratorBase.java    |  71 ++++++++
 .../worker/WindowingWindmillReader.java       |  29 ++-
 .../cloud/dataflow/sdk/util/Structs.java      |  14 +-
 .../worker/AbstractBoundedReaderIterator.java |  57 ------
 .../sdk/util/common/worker/NativeReader.java  |  90 +++++-----
 .../runners/dataflow/CustomSourcesTest.java   |  10 +-
 .../runners/worker/AvroByteReaderTest.java    |   3 +-
 .../sdk/runners/worker/AvroReaderTest.java    |   2 +-
 .../sdk/runners/worker/ConcatReaderTest.java  |  60 ++++---
 .../runners/worker/InMemoryReaderTest.java    | 169 +++++++++++++-----
 .../sdk/runners/worker/ReaderFactoryTest.java |  33 ++--
 .../sdk/runners/worker/TextReaderTest.java    |   4 +-
 .../WindmillReaderIteratorBaseTest.java       |  97 ++++++++++
 .../cloud/dataflow/sdk/util/StructsTest.java  |  11 ++
 .../util/common/worker/ExecutorTestUtils.java |  37 ++--
 .../util/common/worker/ReadOperationTest.java |  82 ++++++---
 30 files changed, 641 insertions(+), 361 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillReaderIteratorBase.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/AbstractBoundedReaderIterator.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillReaderIteratorBaseTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
index 5c6b4c036b31a..4f73a0503ab62 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
@@ -21,7 +21,6 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import org.apache.avro.Schema;
@@ -51,11 +50,11 @@ public AvroByteReader(String filename, @Nullable Long startPosition, @Nullable L
   }
 
   @Override
-  public NativeReaderIterator<T> iterator() throws IOException {
+  public AvroByteFileIterator iterator() throws IOException {
     return new AvroByteFileIterator();
   }
 
-  class AvroByteFileIterator extends AbstractBoundedReaderIterator<T> {
+  class AvroByteFileIterator extends LegacyReaderIterator<T> {
     private final LegacyReaderIterator<WindowedValue<ByteBuffer>> avroFileIterator;
 
     public AvroByteFileIterator() throws IOException {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
index af46561cae8db..725ebec9efd57 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
@@ -26,7 +26,6 @@
 import com.google.cloud.dataflow.sdk.io.OffsetBasedSource;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import org.apache.avro.generic.GenericRecord;
@@ -78,7 +77,7 @@ public AvroReader(String filename, @Nullable Long startPosition, @Nullable Long
   }
 
   @Override
-  public LegacyReaderIterator<WindowedValue<T>> iterator() throws IOException {
+  public AvroFileIterator iterator() throws IOException {
     Long endPosition = this.endPosition;
     Long startPosition = this.startPosition;
     if (endPosition == null) {
@@ -99,7 +98,7 @@ public LegacyReaderIterator<WindowedValue<T>> iterator() throws IOException {
     return new AvroFileIterator((AvroSource.AvroReader<T>) reader);
   }
 
-  class AvroFileIterator extends AbstractBoundedReaderIterator<WindowedValue<T>> {
+  class AvroFileIterator extends LegacyReaderIterator<WindowedValue<T>> {
     final AvroSource.AvroReader<T> reader;
     boolean hasStarted = false;
     long blockOffset = -1;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
index dc3771c0aef2a..e03966059f830 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
@@ -25,7 +25,6 @@
 import com.google.cloud.dataflow.sdk.util.BigQueryTableRowIterator;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.common.annotations.VisibleForTesting;
 
@@ -113,8 +112,7 @@ public BigQueryReaderIterator iterator() throws IOException {
    * A ReaderIterator that yields TableRow objects for each row of a BigQuery table.
    */
   @VisibleForTesting
-  static class BigQueryReaderIterator
-      extends AbstractBoundedReaderIterator<WindowedValue<TableRow>> {
+  static class BigQueryReaderIterator extends LegacyReaderIterator<WindowedValue<TableRow>> {
     private BigQueryTableRowIterator rowIterator;
 
     public BigQueryReaderIterator(TableReference tableRef, Bigquery bigQueryClient) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
index 9323b4edd2653..3ef0a9a9a2c1d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
@@ -30,8 +30,8 @@
 import com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.DataflowReaderProgress;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 
 import org.slf4j.Logger;
@@ -42,6 +42,8 @@
 import java.util.List;
 import java.util.NoSuchElementException;
 
+import javax.annotation.Nullable;
+
 /**
  * A {@link NativeReader} that reads elements from a given set of encoded {@link Source}s. Creates
  * {@link NativeReader}s for sources lazily, i.e. only when elements from the particular
@@ -96,7 +98,7 @@ public Iterator<Source> getSources() {
   }
 
   @Override
-  public LegacyReaderIterator<T> iterator() throws IOException {
+  public ConcatIterator<T> iterator() throws IOException {
     return new ConcatIterator<T>(
         registry,
         options,
@@ -106,16 +108,16 @@ public LegacyReaderIterator<T> iterator() throws IOException {
         sources);
   }
 
-  private static class ConcatIterator<T> extends AbstractBoundedReaderIterator<T> {
+  @VisibleForTesting
+  static class ConcatIterator<T> extends NativeReaderIterator<T> {
     private int currentIteratorIndex = -1;
-    private LegacyReaderIterator<T> currentIterator = null;
+    @Nullable private NativeReaderIterator<T> currentIterator = null;
     private final List<Source> sources;
     private final PipelineOptions options;
     private final ExecutionContext executionContext;
     private final CounterSet.AddCounterMutator addCounterMutator;
     private final String operationName;
     private final OffsetRangeTracker rangeTracker;
-    private boolean isAtFirstRecordInCurrentSource = true;
     private final ReaderFactory.Registry registry;
 
     public ConcatIterator(
@@ -135,16 +137,29 @@ public ConcatIterator(
     }
 
     @Override
-    protected boolean hasNextImpl() throws IOException {
-      for (;;) {
-        if (currentIterator != null && currentIterator.hasNext()) {
-          break;
-        }
+    public boolean start() throws IOException {
+      return advance();
+    }
 
+    @Override
+    public boolean advance() throws IOException {
+      while (true) {
+        // Invariant: we call currentIterator.start() immediately when opening an iterator
+        // (below). So if currentIterator != null, then start() has already been called on it.
+        if (currentIterator != null && currentIterator.advance()) {
+          // Happy case: current iterator has a next record.
+          return true;
+        }
+        // Now current iterator is either non-existent or exhausted.
+        // Close it, and try opening a new one.
         if (currentIterator != null) {
           currentIterator.close();
+          currentIterator = null;
         }
 
+        if (!rangeTracker.tryReturnRecordAt(true, currentIteratorIndex + 1)) {
+          return false;
+        }
         currentIteratorIndex++;
         if (currentIteratorIndex == sources.size()) {
           // All sources were read.
@@ -158,20 +173,26 @@ protected boolean hasNextImpl() throws IOException {
               (NativeReader<T>)
                   registry.create(
                       currentSource, options, executionContext, addCounterMutator, operationName);
-          currentIterator = (LegacyReaderIterator) currentReader.iterator();
-          isAtFirstRecordInCurrentSource = true;
+          currentIterator = currentReader.iterator();
         } catch (Exception e) {
           throw new IOException("Failed to create a reader for source: " + currentSource, e);
         }
+        if (!currentIterator.start()) {
+          currentIterator.close();
+          currentIterator = null;
+          continue;
+        }
+        // Happy case: newly opened iterator has a first record.
+        return true;
       }
-
-      return rangeTracker.tryReturnRecordAt(isAtFirstRecordInCurrentSource, currentIteratorIndex);
     }
 
     @Override
-    protected T nextImpl() throws IOException, NoSuchElementException {
-      isAtFirstRecordInCurrentSource = false;
-      return currentIterator.next();
+    public T getCurrent() throws NoSuchElementException {
+      if (currentIterator == null) {
+        throw new NoSuchElementException();
+      }
+      return currentIterator.getCurrent();
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index ee5b600b4557e..19ab083a3b346 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -40,7 +40,6 @@
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservableIterator;
 import com.google.cloud.dataflow.sdk.util.common.Reiterable;
 import com.google.cloud.dataflow.sdk.util.common.Reiterator;
-import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.GroupingShuffleEntryIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.KeyGroupedShuffleEntries;
@@ -193,7 +192,7 @@ final GroupingShuffleReaderIterator<K, V> iterator(ShuffleEntryReader reader) {
    */
   @VisibleForTesting
   static final class GroupingShuffleReaderIterator<K, V>
-      extends AbstractBoundedReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> {
+      extends LegacyReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> {
     // The enclosing GroupingShuffleReader.
     private final GroupingShuffleReader<K, V> parentReader;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
index 56b999ecedb8b..ffcfa0ca4f2d2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
@@ -20,22 +20,25 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateSplitRequest;
-import static java.lang.Math.min;
+import static com.google.common.base.MoreObjects.firstNonNull;
+import static com.google.common.base.Preconditions.checkArgument;
 
 import com.google.api.services.dataflow.model.ApproximateReportedProgress;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.io.range.OffsetRangeTracker;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.util.List;
+import java.util.NoSuchElementException;
 
 import javax.annotation.Nullable;
 
@@ -52,26 +55,20 @@ public class InMemoryReader<T> extends NativeReader<T> {
   final int endIndex;
   final Coder<T> coder;
 
-  public InMemoryReader(List<String> encodedElements, @Nullable Long startIndex,
-      @Nullable Long endIndex, Coder<T> coder) {
+  public InMemoryReader(
+      List<String> encodedElements,
+      @Nullable Integer startIndex,
+      @Nullable Integer endIndex,
+      Coder<T> coder) {
+    checkNotNull(encodedElements);
     this.encodedElements = encodedElements;
     int maxIndex = encodedElements.size();
-    if (startIndex == null) {
-      this.startIndex = 0;
-    } else {
-      if (startIndex < 0) {
-        throw new IllegalArgumentException("start index should be >= 0");
-      }
-      this.startIndex = (int) min(startIndex, maxIndex);
-    }
-    if (endIndex == null) {
-      this.endIndex = maxIndex;
-    } else {
-      if (endIndex < this.startIndex) {
-        throw new IllegalArgumentException("end index should be >= start index");
-      }
-      this.endIndex = (int) min(endIndex, maxIndex);
-    }
+    this.startIndex = Math.min(maxIndex, firstNonNull(startIndex, 0));
+    this.endIndex = Math.min(maxIndex, firstNonNull(endIndex, maxIndex));
+    checkArgument(this.startIndex >= 0, "negative start index: " + startIndex);
+    checkArgument(
+        this.endIndex >= this.startIndex,
+        "end index before start: [" + this.startIndex + ", " + this.endIndex + ")");
     this.coder = coder;
   }
 
@@ -88,24 +85,45 @@ public double getTotalParallelism() {
   /**
    * A ReaderIterator that yields an in-memory list of elements.
    */
-  class InMemoryReaderIterator extends AbstractBoundedReaderIterator<T> {
+  class InMemoryReaderIterator extends NativeReaderIterator<T> {
     @VisibleForTesting
     OffsetRangeTracker tracker;
-    private int nextIndex;
+    @Nullable private Integer lastReturnedIndex;
+    private T current;
 
     public InMemoryReaderIterator() {
       this.tracker = new OffsetRangeTracker(startIndex, endIndex);
-      this.nextIndex = startIndex;
+      this.lastReturnedIndex = null;
     }
 
     @Override
-    protected boolean hasNextImpl() {
-      return tracker.tryReturnRecordAt(true, nextIndex);
+    public boolean start() throws IOException {
+      Preconditions.checkState(lastReturnedIndex == null, "Already started");
+      if (!tracker.tryReturnRecordAt(true, startIndex)) {
+        return false;
+      }
+      current = decode(encodedElements.get(startIndex));
+      lastReturnedIndex = startIndex;
+      return true;
     }
 
     @Override
-    protected T nextImpl() throws IOException {
-      String encodedElementString = encodedElements.get(nextIndex++);
+    public boolean advance() throws IOException {
+      Preconditions.checkNotNull(lastReturnedIndex, "Not started");
+      if (!tracker.tryReturnRecordAt(true, (long) lastReturnedIndex + 1)) {
+        return false;
+      }
+      ++lastReturnedIndex;
+      current = decode(encodedElements.get(lastReturnedIndex));
+      return true;
+    }
+
+    @Override
+    public T getCurrent() throws NoSuchElementException {
+      return current;
+    }
+
+    private T decode(String encodedElementString) throws CoderException {
       // TODO: Replace with the real encoding used by the
       // front end, when we know what it is.
       byte[] encodedElement = StringUtils.jsonStringToByteArray(encodedElementString);
@@ -115,12 +133,15 @@ protected T nextImpl() throws IOException {
 
     @Override
     public Progress getProgress() {
+      if (lastReturnedIndex == null) {
+        return null;
+      }
       // Currently we assume that only a record index position is reported as
       // current progress. An implementer can override this method to update
       // other metrics, e.g. completion percentage or remaining time.
       com.google.api.services.dataflow.model.Position currentPosition =
           new com.google.api.services.dataflow.model.Position();
-      currentPosition.setRecordIndex((long) nextIndex);
+      currentPosition.setRecordIndex((long) lastReturnedIndex);
 
       ApproximateReportedProgress progress = new ApproximateReportedProgress();
       progress.setPosition(currentPosition);
@@ -130,7 +151,8 @@ public Progress getProgress() {
 
     @Override
     public double getRemainingParallelism() {
-      return tracker.getStopPosition() - nextIndex;
+      // Use the starting index if no elements have yet been returned.
+      return tracker.getStopPosition() - firstNonNull(lastReturnedIndex, startIndex);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java
index 5a37a872adb08..3a8f74834877e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java
@@ -16,7 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import static com.google.cloud.dataflow.sdk.util.Structs.getLong;
+import static com.google.cloud.dataflow.sdk.util.Structs.getInt;
 import static com.google.cloud.dataflow.sdk.util.Structs.getStrings;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -51,7 +51,8 @@ public NativeReader<?> create(
   <T> InMemoryReader<T> create(CloudObject spec, Coder<T> coder) throws Exception {
     return new InMemoryReader<>(
         getStrings(spec, PropertyNames.ELEMENTS, Collections.<String>emptyList()),
-        getLong(spec, PropertyNames.START_INDEX, null),
-        getLong(spec, PropertyNames.END_INDEX, null), coder);
+        getInt(spec, PropertyNames.START_INDEX, null),
+        getInt(spec, PropertyNames.END_INDEX, null),
+        coder);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java
index a006a1890f9f8..2d0458437a4ce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java
@@ -161,12 +161,12 @@ private class LazyIsmReaderIterator extends LegacyReaderIterator<KV<K, V>> {
     private SeekableByteChannel inChannel;
 
     @Override
-    public boolean hasNext() throws IOException {
+    public boolean hasNextImpl() throws IOException {
       return getDelegate().hasNext();
     }
 
     @Override
-    public KV<K, V> next() throws IOException, NoSuchElementException {
+    public KV<K, V> nextImpl() throws IOException, NoSuchElementException {
       long startPosition = getChannel().position();
       KV<K, V> rval = getDelegate().next();
       notifyElementRead(getChannel().position() - startPosition);
@@ -240,7 +240,7 @@ public IsmReaderIterator(SeekableByteChannel unownedChannel,
     }
 
     @Override
-    public boolean hasNext() throws IOException {
+    public boolean hasNextImpl() throws IOException {
       if (inChannel.position() > readLimit) {
         throw new IllegalStateException("Read past end of stream");
       }
@@ -248,10 +248,7 @@ public boolean hasNext() throws IOException {
     }
 
     @Override
-    public KV<K, V> next() throws IOException, NoSuchElementException {
-      if (!hasNext()) {
-        throw new NoSuchElementException();
-      }
+    public KV<K, V> nextImpl() throws IOException, NoSuchElementException {
       KeyPrefix keyPrefix = KeyPrefixCoder.of().decode(inStream, Context.NESTED);
       int totalKeyLength = keyPrefix.getSharedKeySize() + keyPrefix.getUnsharedKeySize();
       // currentKey = prevKey[0 : sharedKeySize] + read(unsharedKeySize)
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
index 4d3085120d586..96432eda548cd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import java.io.IOException;
@@ -33,7 +32,7 @@
  * to be produced lazily, as an open source iterator may consume process
  * resources such as file descriptors.
  */
-abstract class LazyMultiReaderIterator<T> extends AbstractBoundedReaderIterator<T> {
+abstract class LazyMultiReaderIterator<T> extends NativeReader.LegacyReaderIterator<T> {
   private final Iterator<String> inputs;
   NativeReader.LegacyReaderIterator<T> current;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
index b2fc62f09eb19..dda76f117c14b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
@@ -22,7 +22,6 @@
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
@@ -92,8 +91,7 @@ PartitioningShuffleReaderIterator iterator(ShuffleEntryReader reader) {
    * extracts K and {@code WindowedValue<V>}, and returns a constructed
    * {@code WindowedValue<KV>}.
    */
-  class PartitioningShuffleReaderIterator
-      extends AbstractBoundedReaderIterator<WindowedValue<KV<K, V>>> {
+  class PartitioningShuffleReaderIterator extends LegacyReaderIterator<WindowedValue<KV<K, V>>> {
     Iterator<ShuffleEntry> iterator;
 
     PartitioningShuffleReaderIterator(ShuffleEntryReader reader) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
index a25af9752c50c..b341c0553ae92 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
@@ -66,31 +66,16 @@ public NativeReader<?> create(
 
   @Override
   public NativeReaderIterator<WindowedValue<T>> iterator() throws IOException {
-    return new PubsubReaderIterator();
+    return new PubsubReaderIterator(context.getWork());
   }
 
-  class PubsubReaderIterator extends LegacyReaderIterator<WindowedValue<T>> {
-    private int bundleIndex = 0;
-    private int messageIndex = 0;
-
-    @Override
-    public boolean hasNext() throws IOException {
-      Windmill.WorkItem work = context.getWork();
-      return bundleIndex < work.getMessageBundlesCount() &&
-          messageIndex < work.getMessageBundles(bundleIndex).getMessagesCount();
+  class PubsubReaderIterator extends WindmillReaderIteratorBase<T> {
+    protected PubsubReaderIterator(Windmill.WorkItem work) {
+      super(work);
     }
 
     @Override
-    public WindowedValue<T> next() throws IOException {
-      Windmill.Message message =
-          context.getWork().getMessageBundles(bundleIndex).getMessages(messageIndex);
-      if (messageIndex >=
-          context.getWork().getMessageBundles(bundleIndex).getMessagesCount() - 1) {
-        messageIndex = 0;
-        bundleIndex++;
-      } else {
-        messageIndex++;
-      }
+    protected WindowedValue<T> decodeMessage(Windmill.Message message) throws IOException {
       long timestampMillis = TimeUnit.MICROSECONDS.toMillis(message.getTimestamp());
       InputStream data = message.getData().newInput();
       notifyElementRead(data.available());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
index 5558be8816367..fe681aaa75952 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
@@ -29,7 +29,6 @@
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ProgressTracker;
 import com.google.cloud.dataflow.sdk.util.common.worker.ProgressTrackerGroup;
@@ -270,7 +269,7 @@ protected LegacyReaderIterator<T> open(String input) throws IOException {
     }
   }
 
-  class TextFileIterator extends AbstractBoundedReaderIterator<T> {
+  class TextFileIterator extends LegacyReaderIterator<T> {
     private final CopyableSeekableByteChannel seeker;
     private final PushbackInputStream stream;
     private final OffsetRangeTracker rangeTracker;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
index 033cdb16780a5..18a217dafc195 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
@@ -19,7 +19,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
@@ -67,7 +66,7 @@ UngroupedShuffleReaderIterator iterator(ShuffleEntryReader reader) {
    * A ReaderIterator that reads from a ShuffleEntryReader and extracts
    * just the values.
    */
-  class UngroupedShuffleReaderIterator extends AbstractBoundedReaderIterator<T> {
+  class UngroupedShuffleReaderIterator extends LegacyReaderIterator<T> {
     Iterator<ShuffleEntry> iterator;
 
     UngroupedShuffleReaderIterator(ShuffleEntryReader reader) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
index 1467baffc710f..abcb9c5fe909d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
@@ -74,31 +74,16 @@ public NativeReader<?> create(
 
   @Override
   public NativeReaderIterator<WindowedValue<T>> iterator() throws IOException {
-    return new UngroupedWindmillReaderIterator();
+    return new UngroupedWindmillReaderIterator(context.getWork());
   }
 
-  class UngroupedWindmillReaderIterator extends LegacyReaderIterator<WindowedValue<T>> {
-    private int bundleIndex = 0;
-    private int messageIndex = 0;
-
-    @Override
-    public boolean hasNext() throws IOException {
-      Windmill.WorkItem work = context.getWork();
-      return bundleIndex < work.getMessageBundlesCount() &&
-          messageIndex < work.getMessageBundles(bundleIndex).getMessagesCount();
+  class UngroupedWindmillReaderIterator extends WindmillReaderIteratorBase {
+    UngroupedWindmillReaderIterator(Windmill.WorkItem work) {
+      super(work);
     }
 
     @Override
-    public WindowedValue<T> next() throws IOException {
-      Windmill.Message message =
-          context.getWork().getMessageBundles(bundleIndex).getMessages(messageIndex);
-      if (messageIndex >=
-          context.getWork().getMessageBundles(bundleIndex).getMessagesCount() - 1) {
-        messageIndex = 0;
-        bundleIndex++;
-      } else {
-        messageIndex++;
-      }
+    protected WindowedValue<T> decodeMessage(Windmill.Message message) throws IOException {
       Instant timestampMillis = new Instant(TimeUnit.MICROSECONDS.toMillis(message.getTimestamp()));
       InputStream data = message.getData().newInput();
       InputStream metadata = message.getMetadata().newInput();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillReaderIteratorBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillReaderIteratorBase.java
new file mode 100644
index 0000000000000..9a576218035e0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillReaderIteratorBase.java
@@ -0,0 +1,71 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
+
+import java.io.IOException;
+import java.util.NoSuchElementException;
+
+/**
+ * Base class for iterators that decode messages from bundles inside a {@link Windmill.WorkItem}.
+ */
+public abstract class WindmillReaderIteratorBase<T>
+    extends NativeReader.NativeReaderIterator<WindowedValue<T>> {
+  private Windmill.WorkItem work;
+  private int bundleIndex = 0;
+  private int messageIndex = -1;
+  private WindowedValue<T> current;
+
+  protected WindmillReaderIteratorBase(Windmill.WorkItem work) {
+    this.work = work;
+  }
+
+  @Override
+  public boolean start() throws IOException {
+    return advance();
+  }
+
+  @Override
+  public boolean advance() throws IOException {
+    if (bundleIndex == work.getMessageBundlesCount()
+        || messageIndex == work.getMessageBundles(bundleIndex).getMessagesCount()) {
+      return false;
+    }
+    ++messageIndex;
+    for (; bundleIndex < work.getMessageBundlesCount(); ++bundleIndex, messageIndex = 0) {
+      Windmill.InputMessageBundle bundle = work.getMessageBundles(bundleIndex);
+      if (messageIndex < bundle.getMessagesCount()) {
+        current = decodeMessage(bundle.getMessages(messageIndex));
+        return true;
+      }
+    }
+    current = null;
+    return false;
+  }
+
+  protected abstract WindowedValue<T> decodeMessage(Windmill.Message message) throws IOException;
+
+  @Override
+  public WindowedValue<T> getCurrent() throws NoSuchElementException {
+    if (current == null) {
+      throw new NoSuchElementException();
+    }
+    return current;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
index 1daa31f138582..38d9b28603dab 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
@@ -96,21 +96,32 @@ public NativeReaderIterator<WindowedValue<KeyedWorkItem<T>>> iterator() throws I
     final Object key = kvCoder.getKeyCoder().decode(
         context.getSerializedKey().newInput(), Coder.Context.OUTER);
     final WorkItem workItem = context.getWork();
+    final WindowedValue<KeyedWorkItem<T>> value =
+        WindowedValue.valueInEmptyWindows(
+            KeyedWorkItem.workItem(
+                key, workItem, windowCoder, windowsCoder, kvCoder.getValueCoder()));
 
-    return new LegacyReaderIterator<WindowedValue<KeyedWorkItem<T>>>() {
-      boolean consumed = false;
+    return new NativeReaderIterator<WindowedValue<KeyedWorkItem<T>>>() {
+      private WindowedValue<KeyedWorkItem<T>> current;
 
       @Override
-      public boolean hasNext() throws IOException {
-        return !consumed;
+      public boolean start() throws IOException {
+        current = value;
+        return true;
       }
 
       @Override
-      public WindowedValue<KeyedWorkItem<T>> next() throws IOException, NoSuchElementException {
-        consumed = true;
-        return WindowedValue.valueInEmptyWindows(
-            KeyedWorkItem.<T>workItem(
-                key, workItem, windowCoder, windowsCoder, kvCoder.getValueCoder()));
+      public boolean advance() throws IOException {
+        current = null;
+        return false;
+      }
+
+      @Override
+      public WindowedValue<KeyedWorkItem<T>> getCurrent() {
+        if (current == null) {
+          throw new NoSuchElementException();
+        }
+        return value;
       }
     };
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java
index c9d223c654dfa..c621c5564e1c8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java
@@ -76,13 +76,23 @@ public static Boolean getBoolean(
   }
 
   public static Long getLong(Map<String, Object> map, String name) throws Exception {
-    return getValue(map, name, Long.class, "an int");
+    return getValue(map, name, Long.class, "a long");
   }
 
   @Nullable
   public static Long getLong(Map<String, Object> map, String name, @Nullable Long defaultValue)
       throws Exception {
-    return getValue(map, name, Long.class, "an int", defaultValue);
+    return getValue(map, name, Long.class, "a long", defaultValue);
+  }
+
+  public static Integer getInt(Map<String, Object> map, String name) throws Exception {
+    return getValue(map, name, Integer.class, "an int");
+  }
+
+  @Nullable
+  public static Integer getInt(Map<String, Object> map, String name, @Nullable Integer defaultValue)
+      throws Exception {
+    return getValue(map, name, Integer.class, "an int", defaultValue);
   }
 
   @Nullable
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/AbstractBoundedReaderIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/AbstractBoundedReaderIterator.java
deleted file mode 100644
index 37ebabc7b4b7f..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/AbstractBoundedReaderIterator.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import java.io.IOException;
-import java.util.NoSuchElementException;
-
-/**
- * An abstract base class for implementations of ReaderIterator classes for bounded sources,
- * where hasNext() returns the same value if called multiple times.
- *
- * <p>Provides basic treatment of hasNext()/next() to simplify implementations (e.g. ensuring
- * hasNext() is called only once and verifying hasNext() in next()) and default no-op
- * implementations of other operations.
- *
- * <p><i>This class is intended for internal usage. Users of Dataflow must not subclass it.</i>
- *
- * @param <T> Type of records returned by the reader.
- */
-public abstract class AbstractBoundedReaderIterator<T>
-    extends NativeReader.LegacyReaderIterator<T> {
-  private Boolean cachedHasNext;
-
-  @Override
-  public final boolean hasNext() throws IOException {
-    if (cachedHasNext == null) {
-      cachedHasNext = hasNextImpl();
-    }
-    return cachedHasNext;
-  }
-
-  protected abstract boolean hasNextImpl() throws IOException;
-
-  @Override
-  public final T next() throws IOException, NoSuchElementException {
-    if (!hasNext()) {
-      throw new NoSuchElementException();
-    }
-    cachedHasNext = null;
-    return nextImpl();
-  }
-
-  protected abstract T nextImpl()  throws IOException;
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/NativeReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/NativeReader.java
index be97c08c64fab..20f399ffa4ecf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/NativeReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/NativeReader.java
@@ -21,6 +21,8 @@
 import java.util.NoSuchElementException;
 import java.util.Observable;
 
+import javax.annotation.Nullable;
+
 /**
  * Abstract base class for native readers in the Dataflow runner.
  *
@@ -84,11 +86,15 @@ public abstract static class NativeReaderIterator<T> implements AutoCloseable {
 
     /**
      * Returns a representation of how far this iterator is through the source.
-     * @return the progress, or {@code null} if no progress measure
-     * can be provided (implementors are discouraged from throwing
-     * {@code UnsupportedOperationException} in this case).
+     *
+     * @return the progress, or {@code null} if no progress measure can be provided
+     * (implementors are discouraged from throwing {@code UnsupportedOperationException}
+     * in this case). By default, returns {@code null}.
      */
-    public abstract Progress getProgress();
+    @Nullable
+    public Progress getProgress() {
+      return null;
+    }
 
     /**
      * Attempts to split the input in two parts: the "primary" part and the "residual" part.
@@ -119,9 +125,12 @@ public abstract static class NativeReaderIterator<T> implements AutoCloseable {
      * @return {@code null} if the {@link NativeReader.DynamicSplitRequest} cannot be honored
      *   (in that case the input represented by this {@link NativeReaderIterator} stays the same),
      *   or a {@link NativeReader.DynamicSplitResult} describing how the input was split into
-     *   a primary and residual part.
+     *   a primary and residual part. By default, returns {@code null}.
      */
-    public abstract DynamicSplitResult requestDynamicSplit(DynamicSplitRequest request);
+    @Nullable
+    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest request) {
+      return null;
+    }
 
     /**
      * Returns an estimate of the degree of parallelism that could be achieved by
@@ -141,8 +150,12 @@ public abstract static class NativeReaderIterator<T> implements AutoCloseable {
      * {@link NativeReader#getTotalParallelism} using the current progress fraction.
      * Infinity may also be returned (indicating no known bound on parallelism),
      * as may fractional estimates (in which case the sum over all shards is taken).
+     *
+     * <p>By default, returns {@link #REMAINING_PARALLELISM_FROM_PROGRESS_FRACTION}.
      */
-    public abstract double getRemainingParallelism();
+    public double getRemainingParallelism() {
+      return REMAINING_PARALLELISM_FROM_PROGRESS_FRACTION;
+    }
 
     /**
      * Initializes the reader and advances the reader to the first record.
@@ -181,30 +194,8 @@ public abstract static class NativeReaderIterator<T> implements AutoCloseable {
      * @inheritDoc
      */
     @Override
-    public abstract void close() throws IOException;
-  }
-
-  /** An abstract base class for ReaderIterator implementations. */
-  public abstract static class AbstractReaderIterator<T> extends NativeReaderIterator<T> {
-
-    @Override
     public void close() throws IOException {
-      // By default, nothing is needed for close.
-    }
-
-    @Override
-    public Progress getProgress() {
-      return null;
-    }
-
-    @Override
-    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
-      return null;
-    }
-
-    @Override
-    public double getRemainingParallelism() {
-      return REMAINING_PARALLELISM_FROM_PROGRESS_FRACTION;
+      // By default, do nothing.
     }
   }
 
@@ -215,25 +206,36 @@ public double getRemainingParallelism() {
    *
    * This class is temporary and the intention is to get rid of its subclasses one by one,
    * converting them to use the new-style interface directly, and then remove this class.
+   *
+   * <p>Provides basic treatment of hasNext()/next() to simplify implementations (e.g. ensuring
+   * hasNext() is called only once and verifying hasNext() in next()) and default no-op
+   * implementations of other operations.
+   *
+   * <p><i>This class is intended for internal usage. Users of Dataflow must not subclass it.</i>
    */
-  public abstract static class LegacyReaderIterator<T> extends AbstractReaderIterator<T> {
+  public abstract static class LegacyReaderIterator<T> extends NativeReaderIterator<T> {
+    private Boolean cachedHasNext;
     private T current;
     private boolean hasCurrent;
 
-    /**
-     * Returns whether the source has any more elements. Some sources,
-     * such as GroupingShuffleReader, invalidate the return value of
-     * the previous next() call during the call to hasNext().
-     */
-    public abstract boolean hasNext() throws IOException;
+    public final boolean hasNext() throws IOException {
+      if (cachedHasNext == null) {
+        cachedHasNext = hasNextImpl();
+      }
+      return cachedHasNext;
+    }
 
-    /**
-     * Returns the next element.
-     *
-     * @throws IOException if attempting to access an element involves IO that fails
-     * @throws NoSuchElementException if there are no more elements
-     */
-    public abstract T next() throws IOException, NoSuchElementException;
+    protected abstract boolean hasNextImpl() throws IOException;
+
+    public final T next() throws IOException, NoSuchElementException {
+      if (!hasNext()) {
+        throw new NoSuchElementException();
+      }
+      cachedHasNext = null;
+      return nextImpl();
+    }
+
+    protected abstract T nextImpl() throws IOException;
 
     @Override
     public boolean start() throws IOException {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
index b4fbb3b24d829..57f6185b4287a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
@@ -76,6 +76,7 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
@@ -635,8 +636,6 @@ public void testReadUnboundedReader() throws Exception {
     ByteString state = ByteString.EMPTY;
     for (int i = 0; i < 10 * CustomSources.MAX_UNBOUNDED_BUNDLE_SIZE;
          /* Incremented in inner loop */) {
-      WindowedValue<ValueWithRecordId<KV<Integer, Integer>>> value;
-
       // Initialize streaming context with state from previous iteration.
       context.start(
           Windmill.WorkItem.newBuilder()
@@ -662,14 +661,11 @@ public void testReadUnboundedReader() throws Exception {
                   options,
                   context);
 
-      NativeReader.NativeReaderIterator<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>>
-          iterator = reader.iterator();
-
       // Verify data.
       Instant beforeReading = Instant.now();
       int numReadOnThisIteration = 0;
-      for (boolean more = iterator.start(); more; more = iterator.advance()) {
-        value = iterator.getCurrent();
+      for (WindowedValue<ValueWithRecordId<KV<Integer, Integer>>> value :
+          ReaderUtils.readElemsFromReader(reader)) {
         assertEquals(KV.of(0, i), value.getValue().getValue());
         assertArrayEquals(
             encodeToByteArray(KvCoder.of(VarIntCoder.of(), VarIntCoder.of()), KV.of(0, i)),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
index cbc9a4cac3042..eab3a68ba0cec 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
@@ -28,7 +28,6 @@
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.MimeTypes;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader.DynamicSplitResult;
 
 import org.apache.avro.Schema;
@@ -229,7 +228,7 @@ private <T> void testRequestDynamicSplitInternal(
 
     List<T> primaryElements;
     List<T> residualElements = new ArrayList<>();
-    try (NativeReader.NativeReaderIterator<T> iterator = reader.iterator()) {
+    try (AvroByteReader<T>.AvroByteFileIterator iterator = reader.iterator()) {
       // Read n elements from the reader
       primaryElements = readNItemsFromUnstartedReader(iterator, readBeforeSplit);
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
index ff0278179bf0f..5ba037b3e84bc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
@@ -204,7 +204,7 @@ private <T> void testRequestDynamicSplitInternal(
     Long endOffset = reader.endPosition;
     List<T> primaryElements;
     List<T> residualElements = new ArrayList<>();
-    try (NativeReader.NativeReaderIterator<WindowedValue<T>> iterator = reader.iterator()) {
+    try (AvroReader<T>.AvroFileIterator iterator = reader.iterator()) {
       // Read n elements from the reader
       primaryElements =
           windowedValuesToValues(readNItemsFromUnstartedReader(iterator, readBeforeSplit));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
index 16db911e93c11..5f7323d1595fd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
@@ -99,8 +99,7 @@ public TestReader(List<String> encodedElements, Coder<T> coder, long recordToFai
         boolean failWhenClosing) {
       this.recordToFailAt = recordToFailAt;
       this.failWhenClosing = failWhenClosing;
-      readerDelegator =
-          new InMemoryReader<>(encodedElements, 0L, (long) encodedElements.size(), coder);
+      readerDelegator = new InMemoryReader<>(encodedElements, 0, encodedElements.size(), coder);
       recordedReaders.add(this);
     }
 
@@ -114,32 +113,41 @@ public boolean isClosedOrUnopened() {
     }
 
     @Override
-    public LegacyReaderIterator<T> iterator() throws IOException {
-      lastIterator = new TestIterator<T>((LegacyReaderIterator<T>) readerDelegator.iterator());
+    public NativeReaderIterator<T> iterator() throws IOException {
+      lastIterator = new TestIterator<T>(readerDelegator.iterator());
       return lastIterator;
     }
 
-    private class TestIterator<T> extends LegacyReaderIterator<T> {
-      private final LegacyReaderIterator<T> iteratorImpl;
-      private long currentRecord;
+    private class TestIterator<T> extends NativeReaderIterator<T> {
+      private final NativeReaderIterator<T> iteratorImpl;
+      private long currentIndex = -1;
       private boolean isClosed = false;
 
-      private TestIterator(LegacyReaderIterator<T> iteratorImpl) {
+      private TestIterator(NativeReaderIterator<T> iteratorImpl) {
         this.iteratorImpl = iteratorImpl;
       }
 
       @Override
-      public boolean hasNext() throws IOException {
-        return iteratorImpl.hasNext();
+      public boolean start() throws IOException {
+        currentIndex++;
+        if (currentIndex == recordToFailAt) {
+          throw new IOException("Failing at record " + currentIndex);
+        }
+        return iteratorImpl.start();
       }
 
       @Override
-      public T next() throws IOException, NoSuchElementException {
-        if (currentRecord == recordToFailAt) {
-          throw new IOException("Failing at record " + currentRecord);
+      public boolean advance() throws IOException {
+        currentIndex++;
+        if (currentIndex == recordToFailAt) {
+          throw new IOException("Failing at record " + currentIndex);
         }
-        currentRecord++;
-        return iteratorImpl.next();
+        return iteratorImpl.advance();
+      }
+
+      @Override
+      public T getCurrent() throws NoSuchElementException {
+        return iteratorImpl.getCurrent();
       }
 
       @Override
@@ -263,12 +271,12 @@ public void testReadEmptyList() throws Exception {
             null /* addCounterMutator */,
             null /* operationName */,
             new ArrayList<Source>());
-    NativeReader.LegacyReaderIterator<String> iterator = concat.iterator();
+    ConcatReader.ConcatIterator<String> iterator = concat.iterator();
     assertNotNull(iterator);
-    assertFalse(concat.iterator().hasNext());
+    assertFalse(concat.iterator().start());
 
     expectedException.expect(NoSuchElementException.class);
-    iterator.next();
+    iterator.getCurrent();
   }
 
   @Test
@@ -359,10 +367,14 @@ public void testReaderFailsAtRead() throws Exception {
 
   private void runProgressTest(int... sizes) throws Exception {
     ConcatReader<String> concatReader = createConcatReadersOfSizes(new ArrayList<String>(), sizes);
-    try (NativeReader.LegacyReaderIterator<String> iterator = concatReader.iterator()) {
+    try (ConcatReader.ConcatIterator<String> iterator = concatReader.iterator()) {
       for (int readerIndex = 0; readerIndex < sizes.length; readerIndex++) {
         for (int recordIndex = 0; recordIndex < sizes[readerIndex]; recordIndex++) {
-          iterator.next();
+          if (readerIndex == 0 && recordIndex == 0) {
+            iterator.start();
+          } else {
+            iterator.advance();
+          }
           ApproximateReportedProgress progress =
               readerProgressToCloudProgress(iterator.getProgress());
           assertEquals(
@@ -410,9 +422,13 @@ public void runUpdateStopPositionTest(int... readerSizes) throws Exception {
 
           recordsToRead++;
 
-          NativeReader.LegacyReaderIterator<String> iterator = concatReader.iterator();
+          NativeReader.NativeReaderIterator<String> iterator = concatReader.iterator();
           for (int i = 0; i < recordsToRead; i++) {
-            iterator.next();
+            if (i == 0) {
+              iterator.start();
+            } else {
+              iterator.advance();
+            }
           }
 
           DynamicSplitResult splitResult =
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
index 76a002a675a1a..19a022d0e7744 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
@@ -18,6 +18,7 @@
 
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.approximateProgressAtIndex;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionAtIndex;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromProgress;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromSplitResult;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtIndex;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
@@ -35,7 +36,9 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -43,11 +46,15 @@
 import java.util.Arrays;
 import java.util.List;
 
+import javax.annotation.Nullable;
+
 /**
  * Tests for InMemoryReader.
  */
 @RunWith(JUnit4.class)
 public class InMemoryReaderTest {
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+
   static <T> List<String> encodedElements(List<T> elements, Coder<T> coder) throws Exception {
     List<String> encodedElements = new ArrayList<>();
     for (T element : elements) {
@@ -58,18 +65,26 @@ static <T> List<String> encodedElements(List<T> elements, Coder<T> coder) throws
     return encodedElements;
   }
 
-  <T> void runTestReadInMemory(List<T> elements, Long startIndex, Long endIndex,
-      List<T> expectedElements, List<Integer> expectedSizes, Coder<T> coder) throws Exception {
+  <T> void runTestReadInMemory(
+      List<T> elements,
+      @Nullable Integer startIndex,
+      @Nullable Integer endIndex,
+      List<T> expectedElements,
+      List<Integer> expectedSizes,
+      Coder<T> coder)
+      throws Exception {
     InMemoryReader<T> inMemoryReader =
         new InMemoryReader<>(encodedElements(elements, coder), startIndex, endIndex, coder);
     ExecutorTestUtils.TestReaderObserver observer =
         new ExecutorTestUtils.TestReaderObserver(inMemoryReader);
     List<T> actualElements = new ArrayList<>();
     try (InMemoryReader<T>.InMemoryReaderIterator iterator = inMemoryReader.iterator()) {
-      for (long i = inMemoryReader.startIndex; iterator.hasNext(); i++) {
+      for (long i = inMemoryReader.startIndex;
+          (i == inMemoryReader.startIndex) ? iterator.start() : iterator.advance();
+          i++) {
         assertEquals(
             approximateProgressAtIndex(i), readerProgressToCloudProgress(iterator.getProgress()));
-        actualElements.add(iterator.next());
+        actualElements.add(iterator.getCurrent());
       }
     }
     assertEquals(expectedElements, actualElements);
@@ -85,39 +100,68 @@ public void testReadAllElements() throws Exception {
 
   @Test
   public void testReadElementsFromStart() throws Exception {
-    runTestReadInMemory(Arrays.asList(33, 44, 55, 66, 77, 88), 2L, null,
-        Arrays.asList(55, 66, 77, 88), Arrays.asList(4, 4, 4, 4), BigEndianIntegerCoder.of());
+    runTestReadInMemory(
+        Arrays.asList(33, 44, 55, 66, 77, 88),
+        2,
+        null,
+        Arrays.asList(55, 66, 77, 88),
+        Arrays.asList(4, 4, 4, 4),
+        BigEndianIntegerCoder.of());
   }
 
   @Test
   public void testReadElementsToEnd() throws Exception {
-    runTestReadInMemory(Arrays.asList(33, 44, 55, 66, 77, 88), null, 3L, Arrays.asList(33, 44, 55),
-        Arrays.asList(4, 4, 4), BigEndianIntegerCoder.of());
+    runTestReadInMemory(
+        Arrays.asList(33, 44, 55, 66, 77, 88),
+        null,
+        3,
+        Arrays.asList(33, 44, 55),
+        Arrays.asList(4, 4, 4),
+        BigEndianIntegerCoder.of());
   }
 
   @Test
   public void testReadElementsFromStartToEnd() throws Exception {
-    runTestReadInMemory(Arrays.asList(33, 44, 55, 66, 77, 88), 2L, 5L, Arrays.asList(55, 66, 77),
-        Arrays.asList(4, 4, 4), BigEndianIntegerCoder.of());
+    runTestReadInMemory(
+        Arrays.asList(33, 44, 55, 66, 77, 88),
+        2,
+        5,
+        Arrays.asList(55, 66, 77),
+        Arrays.asList(4, 4, 4),
+        BigEndianIntegerCoder.of());
   }
 
   @Test
   public void testReadElementsOffEnd() throws Exception {
-    runTestReadInMemory(Arrays.asList(33, 44, 55, 66, 77, 88), null, 30L,
-        Arrays.asList(33, 44, 55, 66, 77, 88), Arrays.asList(4, 4, 4, 4, 4, 4),
+    runTestReadInMemory(
+        Arrays.asList(33, 44, 55, 66, 77, 88),
+        null,
+        30,
+        Arrays.asList(33, 44, 55, 66, 77, 88),
+        Arrays.asList(4, 4, 4, 4, 4, 4),
         BigEndianIntegerCoder.of());
   }
 
   @Test
   public void testReadElementsFromStartPastEnd() throws Exception {
-    runTestReadInMemory(Arrays.asList(33, 44, 55, 66, 77, 88), 20L, null, Arrays.<Integer>asList(),
-        Arrays.<Integer>asList(), BigEndianIntegerCoder.of());
+    runTestReadInMemory(
+        Arrays.asList(33, 44, 55, 66, 77, 88),
+        20,
+        null,
+        Arrays.<Integer>asList(),
+        Arrays.<Integer>asList(),
+        BigEndianIntegerCoder.of());
   }
 
   @Test
   public void testReadElementsFromStartToEndEmptyRange() throws Exception {
-    runTestReadInMemory(Arrays.asList(33, 44, 55, 66, 77, 88), 2L, 2L, Arrays.<Integer>asList(),
-        Arrays.<Integer>asList(), BigEndianIntegerCoder.of());
+    runTestReadInMemory(
+        Arrays.asList(33, 44, 55, 66, 77, 88),
+        2,
+        2,
+        Arrays.<Integer>asList(),
+        Arrays.<Integer>asList(),
+        BigEndianIntegerCoder.of());
   }
 
   @Test
@@ -128,8 +172,41 @@ public void testReadNoElements() throws Exception {
 
   @Test
   public void testReadNoElementsFromStartToEndEmptyRange() throws Exception {
-    runTestReadInMemory(Arrays.<Integer>asList(), 0L, 0L, Arrays.<Integer>asList(),
-        Arrays.<Integer>asList(), BigEndianIntegerCoder.of());
+    runTestReadInMemory(
+        Arrays.<Integer>asList(),
+        0,
+        0,
+        Arrays.<Integer>asList(),
+        Arrays.<Integer>asList(),
+        BigEndianIntegerCoder.of());
+  }
+
+  @Test
+  public void testProgressReporting() throws Exception {
+    List<Integer> elements = Arrays.asList(33, 44, 55, 66, 77, 88);
+    // Should initially read elements at indices: 44@1, 55@2, 66@3, 77@4
+
+    Coder<Integer> coder = BigEndianIntegerCoder.of();
+    InMemoryReader<Integer> inMemoryReader =
+        new InMemoryReader<>(encodedElements(elements, coder), 1, 4, coder);
+    try (InMemoryReader<Integer>.InMemoryReaderIterator iterator = inMemoryReader.iterator()) {
+      assertNull(iterator.getProgress());
+      assertEquals(3, iterator.getRemainingParallelism(), 0.0);
+
+      assertTrue(iterator.start());
+      assertEquals(positionAtIndex(1L), positionFromProgress(iterator.getProgress()));
+      assertEquals(3, iterator.getRemainingParallelism(), 0.0);
+
+      assertTrue(iterator.advance());
+      assertEquals(positionAtIndex(2L), positionFromProgress(iterator.getProgress()));
+      assertEquals(2, iterator.getRemainingParallelism(), 0.0);
+
+      assertTrue(iterator.advance());
+      assertEquals(positionAtIndex(3L), positionFromProgress(iterator.getProgress()));
+      assertEquals(1, iterator.getRemainingParallelism(), 0.0);
+
+      assertFalse(iterator.advance());
+    }
   }
 
   @Test
@@ -139,50 +216,56 @@ public void testDynamicSplit() throws Exception {
 
     Coder<Integer> coder = BigEndianIntegerCoder.of();
     InMemoryReader<Integer> inMemoryReader =
-        new InMemoryReader<>(encodedElements(elements, coder), 1L, 4L, coder);
+        new InMemoryReader<>(encodedElements(elements, coder), 1, 4, coder);
+
+    // Unstarted iterator.
+    try (InMemoryReader<Integer>.InMemoryReaderIterator iterator = inMemoryReader.iterator()) {
+      assertNull(iterator.requestDynamicSplit(splitRequestAtIndex(3L)));
+    }
 
     // Illegal proposed split position.
     try (InMemoryReader<Integer>.InMemoryReaderIterator iterator = inMemoryReader.iterator()) {
+      assertNull(iterator.requestDynamicSplit(splitRequestAtIndex(3L)));
       // Poke the iterator so that we can test dynamic splitting.
-      assertTrue(iterator.hasNext());
+      assertTrue(iterator.start());
       assertNull(iterator.requestDynamicSplit(toDynamicSplitRequest(
           new ApproximateSplitRequest())));
       assertNull(iterator.requestDynamicSplit(splitRequestAtIndex(null)));
     }
 
     // Successful update.
-    try (InMemoryReader<Integer>.InMemoryReaderIterator iterator =
-        (InMemoryReader<Integer>.InMemoryReaderIterator) inMemoryReader.iterator()) {
+    try (InMemoryReader<Integer>.InMemoryReaderIterator iterator = inMemoryReader.iterator()) {
       // Poke the iterator so that we can test dynamic splitting.
-      assertTrue(iterator.hasNext());
+      assertTrue(iterator.start());
       NativeReader.DynamicSplitResult dynamicSplitResult =
           iterator.requestDynamicSplit(splitRequestAtIndex(3L));
       assertEquals(positionAtIndex(3L), positionFromSplitResult(dynamicSplitResult));
       assertEquals(3, iterator.tracker.getStopPosition().longValue());
-      assertEquals(44, iterator.next().intValue());
-      assertEquals(55, iterator.next().intValue());
-      assertFalse(iterator.hasNext());
+      assertEquals(44, iterator.getCurrent().intValue());
+      assertTrue(iterator.advance());
+      assertEquals(55, iterator.getCurrent().intValue());
+      assertFalse(iterator.advance());
     }
 
     // Proposed split position is before the current position, no update.
-    try (InMemoryReader<Integer>.InMemoryReaderIterator iterator =
-        (InMemoryReader<Integer>.InMemoryReaderIterator) inMemoryReader.iterator()) {
+    try (InMemoryReader<Integer>.InMemoryReaderIterator iterator = inMemoryReader.iterator()) {
       // Poke the iterator so that we can test dynamic splitting.
-      assertTrue(iterator.hasNext());
-      assertEquals(44, iterator.next().intValue());
-      assertEquals(55, iterator.next().intValue());
-      assertTrue(iterator.hasNext()); // Returns true => we promised to return 66.
+      assertTrue(iterator.start());
+      assertEquals(44, iterator.getCurrent().intValue());
+      assertTrue(iterator.advance());
+      assertEquals(55, iterator.getCurrent().intValue());
+      assertTrue(iterator.advance()); // Returns true => we promised to return 66.
       // Now we have to refuse the split.
       assertNull(iterator.requestDynamicSplit(splitRequestAtIndex(3L)));
       assertEquals(4, iterator.tracker.getStopPosition().longValue());
-      assertTrue(iterator.hasNext());
+      assertEquals(66, iterator.getCurrent().intValue());
+      assertFalse(iterator.advance());
     }
 
     // Proposed split position is after the current stop (end) position, no update.
-    try (InMemoryReader<Integer>.InMemoryReaderIterator iterator =
-        (InMemoryReader<Integer>.InMemoryReaderIterator) inMemoryReader.iterator()) {
+    try (InMemoryReader<Integer>.InMemoryReaderIterator iterator = inMemoryReader.iterator()) {
       // Poke the iterator so that we can test dynamic splitting.
-      assertTrue(iterator.hasNext());
+      assertTrue(iterator.start());
       assertNull(iterator.requestDynamicSplit(splitRequestAtIndex(5L)));
       assertEquals(4, iterator.tracker.getStopPosition().longValue());
     }
@@ -194,14 +277,14 @@ public void testParallelism() throws Exception {
 
     Coder<Integer> coder = BigEndianIntegerCoder.of();
     InMemoryReader<Integer> inMemoryReader =
-        new InMemoryReader<>(encodedElements(elements, coder), 1L, 4L, coder);
+        new InMemoryReader<>(encodedElements(elements, coder), 1, 4, coder);
     int count = 0;
-    for (InMemoryReader<Integer>.InMemoryReaderIterator iterator = inMemoryReader.iterator();
-        iterator.hasNext();
-        iterator.next()) {
-      assertTrue(iterator.getRemainingParallelism() >= 1);
-      assertEquals(3 - count, iterator.getRemainingParallelism(), 0 /*tolerance*/);
-      count++;
+    try (InMemoryReader<Integer>.InMemoryReaderIterator iterator = inMemoryReader.iterator()) {
+      for (boolean more = iterator.start(); more; more = iterator.advance()) {
+        assertTrue(iterator.getRemainingParallelism() >= 1);
+        assertEquals(3 - count, iterator.getRemainingParallelism(), 0 /*tolerance*/);
+        count++;
+      }
     }
     assertEquals(count, inMemoryReader.getTotalParallelism(), 0 /*tolerance*/);
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
index 89bb4da6de040..b2e0a19fca43c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
@@ -37,6 +37,7 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.IOException;
 import java.util.NoSuchElementException;
 
 import javax.annotation.Nullable;
@@ -67,19 +68,21 @@ public NativeReaderIterator<Integer> iterator() {
     }
 
     /** A source iterator that produces no values, for testing. */
-    class TestReaderIterator extends LegacyReaderIterator<Integer> {
+    class TestReaderIterator extends NativeReaderIterator<Integer> {
       @Override
-      public boolean hasNext() {
+      public boolean start() {
         return false;
       }
 
       @Override
-      public Integer next() {
-        throw new NoSuchElementException();
+      public boolean advance() {
+        return false;
       }
 
       @Override
-      public void close() {}
+      public Integer getCurrent() {
+        throw new NoSuchElementException();
+      }
     }
   }
 
@@ -103,25 +106,21 @@ public SingletonTestReaderIterator iterator() {
     }
 
     /** A source iterator that produces no values, for testing. */
-    class SingletonTestReaderIterator extends LegacyReaderIterator<WindowedValue<String>> {
-      private boolean seen = false;
+    class SingletonTestReaderIterator extends NativeReaderIterator<WindowedValue<String>> {
       @Override
-      public boolean hasNext() {
-        return !seen;
+      public boolean start() {
+        return true;
       }
 
       @Override
-      public WindowedValue<String> next() {
-        if (seen) {
-          throw new NoSuchElementException();
-        } else {
-          seen = true;
-          return WindowedValue.valueInGlobalWindow("something");
-        }
+      public boolean advance() throws IOException {
+        return false;
       }
 
       @Override
-      public void close() {}
+      public WindowedValue<String> getCurrent() {
+        return WindowedValue.valueInGlobalWindow("something");
+      }
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
index a1a009a233c0f..ef559e3d309c4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
@@ -44,8 +44,8 @@
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.MimeTypes;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader.LegacyReaderIterator;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader.NativeReaderIterator;
 
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
 import org.junit.Rule;
@@ -130,7 +130,7 @@ private File initTestFile() throws IOException {
   public void testReadEmptyFile() throws Exception {
     TextReader<String> textReader = new TextReader<>(tmpFolder.newFile().getPath(), true, null,
         null, new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
-    try (NativeReader.NativeReaderIterator<String> iterator = textReader.iterator()) {
+    try (NativeReaderIterator<String> iterator = textReader.iterator()) {
       assertFalse(iterator.start());
     }
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillReaderIteratorBaseTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillReaderIteratorBaseTest.java
new file mode 100644
index 0000000000000..0de53ef679d72
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillReaderIteratorBaseTest.java
@@ -0,0 +1,97 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.protobuf.ByteString;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests for {@link WindmillReaderIteratorBase}.
+ */
+@RunWith(JUnit4.class)
+public class WindmillReaderIteratorBaseTest {
+  private static class TestWindmillReaderIterator extends WindmillReaderIteratorBase<Long> {
+    protected TestWindmillReaderIterator(Windmill.WorkItem work) {
+      super(work);
+    }
+
+    @Override
+    protected WindowedValue<Long> decodeMessage(Windmill.Message message) {
+      return WindowedValue.valueInGlobalWindow(message.getTimestamp());
+    }
+  }
+
+  @Test
+  public void testBasic() throws IOException {
+    testForMessageBundleCounts();
+    testForMessageBundleCounts(0);
+    testForMessageBundleCounts(0, 0);
+    testForMessageBundleCounts(1);
+    testForMessageBundleCounts(2);
+    testForMessageBundleCounts(1, 1);
+    testForMessageBundleCounts(0, 1);
+    testForMessageBundleCounts(1, 0);
+    testForMessageBundleCounts(0, 0, 1, 3, 0, 1, 0, 0, 0, 1);
+    testForMessageBundleCounts(0, 0, 1, 3, 0, 1, 0, 0, 0, 0);
+  }
+
+  private void testForMessageBundleCounts(int... messageBundleCounts) throws IOException {
+    List<Windmill.InputMessageBundle> bundles = new ArrayList<>();
+    long numTotalMessages = 0;
+    for (int count : messageBundleCounts) {
+      Windmill.InputMessageBundle.Builder bundle =
+          Windmill.InputMessageBundle.newBuilder().setSourceComputationId("foo");
+      for (int i = 0; i < count; ++i) {
+        bundle.addMessages(
+            Windmill.Message.newBuilder()
+                .setTimestamp(numTotalMessages++)
+                .setData(ByteString.EMPTY)
+                .build());
+      }
+      bundles.add(bundle.build());
+    }
+    Windmill.WorkItem workItem =
+        Windmill.WorkItem.newBuilder()
+            .setKey(ByteString.EMPTY)
+            .setWorkToken(0L)
+            .addAllMessageBundles(bundles)
+            .build();
+    try (TestWindmillReaderIterator iter = new TestWindmillReaderIterator(workItem)) {
+      List<Long> actual =
+          ReaderTestUtils.windowedValuesToValues(
+              ReaderTestUtils.readRemainingFromReader(iter, false));
+      assertFalse(iter.advance());
+      List<Long> expected = new ArrayList<>();
+      for (int i = 0; i < numTotalMessages; ++i) {
+        expected.add((long) i);
+      }
+      assertEquals(Arrays.toString(messageBundleCounts), expected, actual);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StructsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StructsTest.java
index b50d9d2bdc682..bc0bdd82ec1c0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StructsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/StructsTest.java
@@ -26,6 +26,7 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.addStringList;
 import static com.google.cloud.dataflow.sdk.util.Structs.getBoolean;
 import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
+import static com.google.cloud.dataflow.sdk.util.Structs.getInt;
 import static com.google.cloud.dataflow.sdk.util.Structs.getListOfMaps;
 import static com.google.cloud.dataflow.sdk.util.Structs.getLong;
 import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
@@ -161,6 +162,9 @@ public void testGetLongParameter() throws Exception {
     Assert.assertEquals(
         (Long) 42L,
         getLong(o, "singletonLongKey", 666L));
+    Assert.assertEquals(
+        (Integer) 42,
+        getInt(o, "singletonLongKey", 666));
     Assert.assertEquals(
         (Long) 666L,
         getLong(o, "missingKey", 666L));
@@ -168,6 +172,13 @@ public void testGetLongParameter() throws Exception {
     try {
       getLong(o, "emptyKey", 666L);
       Assert.fail("should have thrown an exception");
+    } catch (Exception exn) {
+      Assert.assertThat(exn.toString(),
+                        Matchers.containsString("not a long"));
+    }
+    try {
+      getInt(o, "emptyKey", 666);
+      Assert.fail("should have thrown an exception");
     } catch (Exception exn) {
       Assert.assertThat(exn.toString(),
                         Matchers.containsString("not an int"));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
index 4942eadfebd3c..18b703564a35c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
@@ -19,13 +19,15 @@
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.ElementByteSizeObservableCoder;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.common.collect.ImmutableList;
 
 import org.junit.Assert;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Iterator;
 import java.util.List;
+import java.util.NoSuchElementException;
 import java.util.Observable;
 import java.util.Observer;
 
@@ -84,24 +86,37 @@ public NativeReaderIterator<String> iterator() {
       return new TestReaderIterator(inputs);
     }
 
-    class TestReaderIterator extends LegacyReaderIterator<String> {
-      Iterator<String> iter;
-      boolean closed = false;
+    class TestReaderIterator extends NativeReaderIterator<String> {
+      private final List<String> inputs;
+      private int currentIndex;
+      private boolean closed = false;
 
       public TestReaderIterator(List<String> inputs) {
-        iter = inputs.iterator();
+        this.inputs = ImmutableList.copyOf(inputs);
       }
 
       @Override
-      public boolean hasNext() {
-        return iter.hasNext();
+      public boolean start() {
+        if (inputs.isEmpty()) {
+          return false;
+        }
+        notifyElementRead(getCurrent().length());
+        return true;
       }
 
       @Override
-      public String next() {
-        String next = iter.next();
-        notifyElementRead(next.length());
-        return next;
+      public boolean advance() throws IOException {
+        if (currentIndex >= inputs.size() - 1) {
+          return false;
+        }
+        ++currentIndex;
+        notifyElementRead(getCurrent().length());
+        return true;
+      }
+
+      @Override
+      public String getCurrent() throws NoSuchElementException {
+        return inputs.get(currentIndex);
       }
 
       @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
index 0eae253e4d5bd..7385199d5d980 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
@@ -52,6 +52,7 @@
 
 import java.io.IOException;
 import java.util.NoSuchElementException;
+import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.Exchanger;
 
 /**
@@ -152,10 +153,10 @@ public void testDynamicSplit() throws Exception {
         readOperation.requestDynamicSplit(splitRequestAtIndex(8L)));
 
     Thread thread = runReadLoopInThread(readOperation);
-    iterator.offerNext(0); // Await first next() and return 0 from it.
+    iterator.offerNext(0); // Await start() and return 0 from getCurrent().
     receiver.unblockProcess();
+    // Await advance() and return 1 from getCurrent().
     iterator.offerNext(1);
-    // Read loop is now blocked in process() (not next()).
     NativeReader.DynamicSplitResultWithPosition split =
         (NativeReader.DynamicSplitResultWithPosition)
             readOperation.requestDynamicSplit(splitRequestAtIndex(8L));
@@ -165,7 +166,7 @@ public void testDynamicSplit() throws Exception {
     // Check that the progress has been recomputed.
     ApproximateReportedProgress progress = readerProgressToCloudProgress(
         readOperation.getProgress());
-    assertEquals(2, progress.getPosition().getRecordIndex().longValue());
+    assertEquals(1, progress.getPosition().getRecordIndex().longValue());
     assertEquals(2.0f / 8.0, progress.getFractionConsumed(), 0.001);
 
     receiver.unblockProcess();
@@ -239,35 +240,47 @@ public void testRaceBetweenCloseAndDynamicSplit() throws Exception {
         counterSet.getAddCounterMutator(),
         new StateSampler("test-", counterSet.getAddCounterMutator()));
 
-    final Exchanger<Void> startCompleted = new Exchanger<>();
-    final Exchanger<Void> requestDynamicSplitCompleted = new Exchanger<>();
-    Thread thread = new Thread() {
-      @Override
-      public void run() {
-        try {
-          readOperation.start();
-          startCompleted.exchange(null);
-          requestDynamicSplitCompleted.exchange(null);
-          readOperation.finish();
-        } catch (Exception e) {
-          e.printStackTrace();
-        }
-      }
-    };
+    // We simulate the following sequence:
+    // "Reader thread" calls ReadOperation.start() and returns from it
+    // "Main thread" calls requestDynamicSplit()
+    // "Reader thread" calls ReadOperation.finish()
+    // We use CountDownLatch as synchronization barriers to establish this sequence.
+    final CountDownLatch startCompleted = new CountDownLatch(1);
+    final CountDownLatch requestDynamicSplitCompleted = new CountDownLatch(1);
+    Thread thread =
+        new Thread() {
+          @Override
+          public void run() {
+            try {
+              readOperation.start();
+              // Synchronize with main test thread to notify it that .start() has finished,
+              // meaning the ReadOperation has finished reading.
+              startCompleted.countDown();
+              // Synchronize with main test thread to wait until requestDynamicSplit()
+              // has completed.
+              requestDynamicSplitCompleted.await();
+              // Now finish the ReadOperation.
+              readOperation.finish();
+            } catch (Exception e) {
+              e.printStackTrace();
+            }
+          }
+        };
     thread.start();
 
     for (int i = 0; i < 10; ++i) {
       iterator.offerNext(i);
       receiver.unblockProcess();
     }
-    // Wait for ReadOperation.start() to finish
-    startCompleted.exchange(null);
+    // Synchronize with reader thread to wait until ReadOperation.start() finishes.
+    startCompleted.await();
     // Check that requestDynamicSplit is safe (no-op) if the operation is done with start()
     // but not yet done with finish()
     readOperation.requestDynamicSplit(splitRequestAtIndex(5L));
-    // Allow thread to finish() and join.
-    requestDynamicSplitCompleted.exchange(null);
+    // Synchronize with reader thread to notify it that we're done calling requestDynamicSplit().
+    requestDynamicSplitCompleted.countDown();
 
+    // Let the reader thread complete (it just calls finish()).
     thread.join();
 
     // Check once more that requestDynamicSplit on a finished operation is also safe (no-op).
@@ -290,7 +303,7 @@ public void run() {
     return thread;
   }
 
-  private static class MockReaderIterator extends AbstractBoundedReaderIterator<Integer> {
+  private static class MockReaderIterator extends NativeReader.NativeReaderIterator<Integer> {
     private final OffsetRangeTracker tracker;
     private Exchanger<Integer> exchanger = new Exchanger<>();
     private int current;
@@ -298,24 +311,37 @@ private static class MockReaderIterator extends AbstractBoundedReaderIterator<In
 
     public MockReaderIterator(int from, int to) {
       this.tracker = new OffsetRangeTracker(from, to);
-      this.current = from;
+      this.current = from - 1;
     }
 
     @Override
-    protected boolean hasNextImpl() throws IOException {
-      return tracker.tryReturnRecordAt(true, current);
+    public boolean start() throws IOException {
+      return advance();
     }
 
     @Override
-    protected Integer nextImpl() throws IOException {
+    public boolean advance() throws IOException {
+      if (!tracker.tryReturnRecordAt(true, current + 1)) {
+        return false;
+      }
       ++current;
+      exchangeCurrent();
+      return true;
+    }
+
+    private void exchangeCurrent() {
       try {
-        return exchanger.exchange(current);
+        current = exchanger.exchange(current);
       } catch (InterruptedException e) {
         throw new NoSuchElementException("interrupted");
       }
     }
 
+    @Override
+    public Integer getCurrent() {
+      return current;
+    }
+
     @Override
     public NativeReader.Progress getProgress() {
       Preconditions.checkState(!isClosed);

From 169e340f2531094a379346a185e486d5c67fbd87 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 21 Jan 2016 16:32:18 -0800
Subject: [PATCH 1335/1541] Fix Coder.Context equality and hashCode

This only worked if you depended on the instances directly and didn't construct your own.
Also add Coder.Context#toString()

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112736780
---
 .../cloud/dataflow/sdk/coders/Coder.java      | 22 +++++-
 .../cloud/dataflow/sdk/coders/CoderTest.java  | 78 +++++++++++++++++++
 2 files changed, 99 insertions(+), 1 deletion(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
index 89b35164d4990..f3a8bec4a6205 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
@@ -21,6 +21,8 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
 import com.google.common.base.Joiner;
+import com.google.common.base.MoreObjects;
+import com.google.common.base.Objects;
 import com.google.common.base.Preconditions;
 
 import java.io.IOException;
@@ -30,7 +32,6 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
-import java.util.Objects;
 
 import javax.annotation.Nullable;
 
@@ -92,6 +93,25 @@ public Context(boolean isWholeStream) {
     public Context nested() {
       return NESTED;
     }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (!(obj instanceof Context)) {
+        return false;
+      }
+      return Objects.equal(isWholeStream, ((Context) obj).isWholeStream);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(isWholeStream);
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(Context.class)
+          .addValue(isWholeStream ? "OUTER" : "NESTED").toString();
+    }
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderTest.java
new file mode 100644
index 0000000000000..c5d275cf63f11
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderTest.java
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.coders;
+
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.containsString;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
+import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Collections;
+
+/** Tests for constructs defined within {@link Coder}. */
+@RunWith(JUnit4.class)
+public class CoderTest {
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+
+  @Test
+  public void testContextEqualsAndHashCode() {
+    assertEquals(Context.NESTED, new Context(false));
+    assertEquals(Context.OUTER, new Context(true));
+    assertNotEquals(Context.NESTED, Context.OUTER);
+
+    assertEquals(Context.NESTED.hashCode(), new Context(false).hashCode());
+    assertEquals(Context.OUTER.hashCode(), new Context(true).hashCode());
+    // Even though this isn't strictly required by the hashCode contract,
+    // we still want this to be true.
+    assertNotEquals(Context.NESTED.hashCode(), Context.OUTER.hashCode());
+  }
+
+  @Test
+  public void testContextToString() {
+    assertEquals("Context{NESTED}", Context.NESTED.toString());
+    assertEquals("Context{OUTER}", Context.OUTER.toString());
+  }
+
+  @Test
+  public void testNonDeterministicExcpetionRequiresReason() {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Reasons must not be empty");
+    new NonDeterministicException(VoidCoder.of(), Collections.<String>emptyList());
+  }
+
+  @Test
+  public void testNonDeterministicException() {
+    NonDeterministicException rootCause =
+        new NonDeterministicException(VoidCoder.of(), "Root Cause");
+    NonDeterministicException exception =
+        new NonDeterministicException(StringUtf8Coder.of(), "Problem", rootCause);
+    assertEquals(rootCause, exception.getCause());
+    assertThat(exception.getReasons(), contains("Problem"));
+    assertThat(exception.toString(), containsString("Problem"));
+    assertThat(exception.toString(), containsString("is not deterministic"));
+  }
+}
+

From 1c47353fe5c46e21421c3d5161cb69a7295d4043 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 22 Jan 2016 09:43:24 -0800
Subject: [PATCH 1336/1541] Update prebuilt proto libraries for Dataflow to
 2016-01-20 version

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112792425
---
 sdk/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index e182ccabe6d3c..5378ffa1f3ecf 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -430,7 +430,7 @@
     <dependency>
       <groupId>com.google.cloud.dataflow</groupId>
       <artifactId>google-cloud-dataflow-java-proto-library-all</artifactId>
-      <version>0.4.151117</version>
+      <version>0.4.160120</version>
     </dependency>
 
     <dependency>

From b5b8d95ccee226c57ed4a42b073583bc6d40d77f Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 22 Jan 2016 11:08:37 -0800
Subject: [PATCH 1337/1541] Upgrade Jackson dependency from 2.4.5 to 2.7.0

https://github.com/FasterXML/jackson-databind/issues/543
was fixed and we have deprecated KvCoderBase and MapCoderBase
which were used as work arounds for the issue.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112801278
---
 examples/pom.xml                               |  4 ++--
 javadoc/jackson-databind-docs/package-list     | 18 ++++++++++++++++++
 pom.xml                                        |  2 +-
 sdk/pom.xml                                    |  4 ++--
 .../cloud/dataflow/sdk/coders/KvCoderBase.java |  2 ++
 .../dataflow/sdk/coders/MapCoderBase.java      |  2 ++
 6 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index 2b8c3d83b8bf0..0e7db3d59bf3c 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -276,11 +276,11 @@
               <location>${basedir}/../javadoc/guava-docs</location>
             </offlineLink>
             <offlineLink>
-              <url>http://fasterxml.github.io/jackson-annotations/javadoc/2.4/</url>
+              <url>http://fasterxml.github.io/jackson-annotations/javadoc/2.7/</url>
               <location>${basedir}/../javadoc/jackson-annotations-docs</location>
             </offlineLink>
             <offlineLink>
-              <url>http://fasterxml.github.io/jackson-databind/javadoc/2.4/</url>
+              <url>http://fasterxml.github.io/jackson-databind/javadoc/2.7/</url>
               <location>${basedir}/../javadoc/jackson-databind-docs</location>
             </offlineLink>
             <offlineLink>
diff --git a/javadoc/jackson-databind-docs/package-list b/javadoc/jackson-databind-docs/package-list
index 8a7d4399ff192..8a2cd8be56f83 100644
--- a/javadoc/jackson-databind-docs/package-list
+++ b/javadoc/jackson-databind-docs/package-list
@@ -1,2 +1,20 @@
 com.fasterxml.jackson.databind
 com.fasterxml.jackson.databind.annotation
+com.fasterxml.jackson.databind.cfg
+com.fasterxml.jackson.databind.deser
+com.fasterxml.jackson.databind.deser.impl
+com.fasterxml.jackson.databind.deser.std
+com.fasterxml.jackson.databind.exc
+com.fasterxml.jackson.databind.ext
+com.fasterxml.jackson.databind.introspect
+com.fasterxml.jackson.databind.jsonFormatVisitors
+com.fasterxml.jackson.databind.jsonschema
+com.fasterxml.jackson.databind.jsontype
+com.fasterxml.jackson.databind.jsontype.impl
+com.fasterxml.jackson.databind.module
+com.fasterxml.jackson.databind.node
+com.fasterxml.jackson.databind.ser
+com.fasterxml.jackson.databind.ser.impl
+com.fasterxml.jackson.databind.ser.std
+com.fasterxml.jackson.databind.type
+com.fasterxml.jackson.databind.util
diff --git a/pom.xml b/pom.xml
index 3c49657d913bc..7bdee339651ac 100644
--- a/pom.xml
+++ b/pom.xml
@@ -74,7 +74,7 @@
     <google-clients.version>1.21.0</google-clients.version>
     <guava.version>19.0</guava.version>
     <hamcrest.version>1.3</hamcrest.version>
-    <jackson.version>2.4.2</jackson.version>
+    <jackson.version>2.7.0</jackson.version>
     <joda.version>2.4</joda.version>
     <junit.version>4.11</junit.version>
     <protobuf.version>2.5.0</protobuf.version>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 5378ffa1f3ecf..891bb220d9308 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -253,11 +253,11 @@
               <location>${basedir}/../javadoc/hamcrest-docs</location>
             </offlineLink>
             <offlineLink>
-              <url>http://fasterxml.github.io/jackson-annotations/javadoc/2.4/</url>
+              <url>http://fasterxml.github.io/jackson-annotations/javadoc/2.7/</url>
               <location>${basedir}/../javadoc/jackson-annotations-docs</location>
             </offlineLink>
             <offlineLink>
-              <url>http://fasterxml.github.io/jackson-databind/javadoc/2.4/</url>
+              <url>http://fasterxml.github.io/jackson-databind/javadoc/2.7/</url>
               <location>${basedir}/../javadoc/jackson-databind-docs</location>
             </offlineLink>
             <offlineLink>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java
index d33da00d5c8bb..4a12ee0d9663b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java
@@ -31,6 +31,7 @@
  *
  * @param <T> the type of values being transcoded
  */
+@Deprecated
 public abstract class KvCoderBase<T> extends StandardCoder<T> {
   /**
    * A constructor used only for decoding from JSON.
@@ -38,6 +39,7 @@ public abstract class KvCoderBase<T> extends StandardCoder<T> {
    * @param typeId present in the JSON encoding, but unused
    * @param isPairLike present in the JSON encoding, but unused
    */
+  @Deprecated
   @JsonCreator
   public static KvCoderBase<?> of(
       // N.B. typeId is a required parameter here, since a field named "@type"
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java
index 7a6a99790e906..d32406c50d3ae 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java
@@ -30,7 +30,9 @@
  * parameters).  This should be removed in favor of a better workaround.
  * @param <T> the type of values being transcoded
  */
+@Deprecated
 public abstract class MapCoderBase<T> extends StandardCoder<T> {
+  @Deprecated
   @JsonCreator
   public static MapCoderBase<?> of(
       // N.B. typeId is a required parameter here, since a field named "@type"

From 61a2e27acd21bd1028162be22a6df3ccb5cb38b7 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 22 Jan 2016 11:28:16 -0800
Subject: [PATCH 1338/1541] Expose event time and synchronized upstream
 processing time

Event time track the input event time watermark for a step. Synchronized
processing time tracks the processing time which all upstream workers
have reached and processed timers for.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112803163
---
 .../worker/StreamingDataflowWorker.java       | 15 +++--
 .../worker/StreamingModeExecutionContext.java | 26 +++++---
 .../sdk/transforms/windowing/Trigger.java     |  8 +++
 .../sdk/util/BatchTimerInternals.java         | 12 ++++
 .../cloud/dataflow/sdk/util/ReduceFn.java     | 10 +++
 .../sdk/util/ReduceFnContextFactory.java      | 14 +++++
 .../dataflow/sdk/util/TimerInternals.java     |  7 +++
 .../sdk/util/TriggerContextFactory.java       | 63 ++++++++++++++++++-
 .../runners/dataflow/CustomSourcesTest.java   |  1 +
 .../StreamingModeExecutionContextTest.java    |  2 +
 .../dataflow/sdk/util/ReduceFnTester.java     | 32 ++++++++++
 .../dataflow/sdk/util/TriggerTester.java      | 38 +++++++++++
 12 files changed, 216 insertions(+), 12 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
index fc7259ea8fd1a..6f507e9f80196 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
@@ -492,6 +492,10 @@ private void dispatchLoop() {
         final Instant inputDataWatermark =
             WindmillTimeUtils.windmillToHarnessInputWatermark(
                 computationWork.getInputDataWatermark());
+        @Nullable
+        final Instant synchronizedProcessingTime =
+            WindmillTimeUtils.windmillToHarnessInputWatermark(
+                computationWork.getDependentRealtimeInputWatermark());
         ActiveWorkForComputation activeWork = activeWorkMap.get(computation);
         for (final Windmill.WorkItem workItem : computationWork.getWorkList()) {
           // May be null if output watermark not yet known.
@@ -504,7 +508,8 @@ private void dispatchLoop() {
           Work work = new Work(workItem.getWorkToken()) {
             @Override
             public void run() {
-              process(computation, mapTask, inputDataWatermark, outputDataWatermark, workItem);
+              process(computation, mapTask, inputDataWatermark, outputDataWatermark,
+                  synchronizedProcessingTime, workItem);
             }
           };
           if (activeWork.activateWork(workItem.getKey(), work)) {
@@ -528,6 +533,7 @@ public long getWorkToken() {
 
   private void process(final String computation, final MapTask mapTask,
       @Nullable final Instant inputDataWatermark, @Nullable final Instant outputDataWatermark,
+      @Nullable final Instant synchronizedProcessingTime,
       final Windmill.WorkItem work) {
     LOG.debug("Starting processing for {}:\n{}", computation, work);
 
@@ -587,8 +593,8 @@ private void process(final String computation, final MapTask mapTask,
       WindmillStateReader stateReader = new WindmillStateReader(
           metricTrackingWindmillServer, computation, work.getKey(), work.getWorkToken());
       StateFetcher localStateFetcher = stateFetcher.byteTrackingView();
-      context.start(work, inputDataWatermark, outputDataWatermark, stateReader, localStateFetcher,
-          outputBuilder);
+      context.start(work, inputDataWatermark, outputDataWatermark, synchronizedProcessingTime,
+          stateReader, localStateFetcher, outputBuilder);
 
       for (Long callbackId : context.getReadyCommitCallbackIds()) {
         final Runnable callback = commitCallbacks.remove(callbackId);
@@ -688,7 +694,8 @@ public void run() {
           workUnitExecutor.forceExecute(new Runnable() {
             @Override
             public void run() {
-              process(computation, mapTask, inputDataWatermark, outputDataWatermark, work);
+              process(computation, mapTask, inputDataWatermark, outputDataWatermark,
+                  synchronizedProcessingTime, work);
             }
           });
         } else {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
index a8bdceb1866b5..b4bbdcc61b314 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
@@ -70,6 +70,7 @@ public class StreamingModeExecutionContext
   private Windmill.WorkItem work;
   @Nullable private Instant inputDataWatermark;
   @Nullable private Instant outputDataWatermark;
+  @Nullable private Instant synchronizedProcessingTime;
   private WindmillStateReader stateReader;
   private StateFetcher stateFetcher;
   private Windmill.WorkItemCommitRequest.Builder outputBuilder;
@@ -89,19 +90,22 @@ public void start(
       Windmill.WorkItem work,
       @Nullable Instant inputDataWatermark,
       @Nullable Instant outputDataWatermark,
+      @Nullable Instant synchronizedProcessingTime,
       WindmillStateReader stateReader,
       StateFetcher stateFetcher,
       Windmill.WorkItemCommitRequest.Builder outputBuilder) {
     this.work = work;
     this.inputDataWatermark = inputDataWatermark;
     this.outputDataWatermark = outputDataWatermark;
+    this.synchronizedProcessingTime = synchronizedProcessingTime;
     this.stateReader = stateReader;
     this.stateFetcher = stateFetcher;
     this.outputBuilder = outputBuilder;
     this.sideInputCache.clear();
 
     for (ExecutionContext.StepContext stepContext : getAllStepContexts()) {
-      ((StepContext) stepContext).start(stateReader, inputDataWatermark, outputDataWatermark);
+      ((StepContext) stepContext)
+          .start(stateReader, inputDataWatermark, outputDataWatermark, synchronizedProcessingTime);
     }
   }
 
@@ -109,7 +113,7 @@ public void start(
   public StepContext createStepContext(
       String stepName, String transformName, StateSampler stateSampler) {
     StepContext context = new StepContext(stepName, transformName, stateSampler);
-    context.start(stateReader, inputDataWatermark, outputDataWatermark);
+    context.start(stateReader, inputDataWatermark, outputDataWatermark, synchronizedProcessingTime);
     return context;
   }
 
@@ -297,11 +301,11 @@ private static class WindmillTimerInternals implements TimerInternals {
     private Map<TimerData, Boolean> timers = new HashMap<>();
     @Nullable private Instant inputDataWatermark;
     @Nullable private Instant outputDataWatermark;
+    @Nullable private Instant synchronizedProcessingTime;
     private String stateFamily;
 
-    public WindmillTimerInternals(
-        String stateFamily, @Nullable Instant inputDataWatermark,
-        @Nullable Instant outputDataWatermark) {
+    public WindmillTimerInternals(String stateFamily, @Nullable Instant inputDataWatermark,
+        @Nullable Instant outputDataWatermark, @Nullable Instant synchronizedProcessingTime) {
       this.inputDataWatermark = inputDataWatermark;
       this.outputDataWatermark = outputDataWatermark;
       this.stateFamily = stateFamily;
@@ -322,6 +326,12 @@ public Instant currentProcessingTime() {
       return Instant.now();
     }
 
+    @Override
+    @Nullable
+    public Instant currentSynchronizedProcessingTime() {
+      return synchronizedProcessingTime;
+    }
+
     /**
      * {@inheritDoc}
      *
@@ -417,12 +427,14 @@ public StateSampler.ScopedState get() {
      */
     public void start(
         WindmillStateReader stateReader, @Nullable Instant inputDataWatermark,
-        @Nullable Instant outputDataWatermark) {
+        @Nullable Instant outputDataWatermark,
+        @Nullable Instant synchronizedProcessingTime) {
       this.stateInternals = new WindmillStateInternals(stateFamily, stateReader,
           stateCache.forKey(getSerializedKey(), stateFamily, getWork().getCacheToken()),
           scopedReadStateSupplier);
       this.timerInternals =
-          new WindmillTimerInternals(stateFamily, inputDataWatermark, outputDataWatermark);
+new WindmillTimerInternals(
+          stateFamily, inputDataWatermark, outputDataWatermark, synchronizedProcessingTime);
     }
 
     public void flushState() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 880f9e05fb3eb..8b5038a81c775 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -264,6 +264,14 @@ public abstract class TriggerContext {
 
     /** The current processing time. */
     public abstract Instant currentProcessingTime();
+
+    /** The current synchronized upstream processing time or {@code null} if unknown. */
+    @Nullable
+    public abstract Instant currentSynchronizedProcessingTime();
+
+    /** The current event time for the input or {@code null} if unknown. */
+    @Nullable
+    public abstract Instant currentEventTime();
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java
index 29492c3892335..8df54064178f5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java
@@ -69,6 +69,18 @@ public Instant currentProcessingTime() {
     return processingTime;
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * @return {@link BoundedWindow#TIMESTAMP_MAX_VALUE}: in batch mode, upstream processing
+   * is already complete.
+   */
+  @Override
+  @Nullable
+  public Instant currentSynchronizedProcessingTime() {
+    return BoundedWindow.TIMESTAMP_MAX_VALUE;
+  }
+
   @Override
   public Instant currentInputWatermarkTime() {
     return inputWatermarkTime;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
index e76868aa42f38..3ac2126fdd3e4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
@@ -29,6 +29,8 @@
 import java.util.Collection;
 import java.util.Map;
 
+import javax.annotation.Nullable;
+
 /**
  * Specification for processing to happen after elements have been grouped by key.
  *
@@ -103,6 +105,14 @@ public interface Timers {
 
     /** Returns the current processing time. */
     public abstract Instant currentProcessingTime();
+
+    /** Returns the current synchronized processing time or {@code null} if unknown. */
+    @Nullable
+    public abstract Instant currentSynchronizedProcessingTime();
+
+    /** Returns the current event time or {@code null} if unknown. */
+    @Nullable
+    public abstract Instant currentEventTime();
   }
 
   /** Information accessible to all the processing methods in this {@code ReduceFn}. */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
index 643919035f407..e0d7c2e22c359 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
@@ -40,6 +40,8 @@
 import java.util.List;
 import java.util.Map;
 
+import javax.annotation.Nullable;
+
 /**
  * Factory for creating instances of the various {@link ReduceFn} contexts.
  */
@@ -117,6 +119,18 @@ public void deleteTimer(Instant timestamp, TimeDomain timeDomain) {
     public Instant currentProcessingTime() {
       return timerInternals.currentProcessingTime();
     }
+
+    @Override
+    @Nullable
+    public Instant currentSynchronizedProcessingTime() {
+      return timerInternals.currentSynchronizedProcessingTime();
+    }
+
+    @Override
+    @Nullable
+    public Instant currentEventTime() {
+      return timerInternals.currentInputWatermarkTime();
+    }
   }
 
   static class StateContextImpl<W extends BoundedWindow>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
index aa44c5005895d..bedff3e8d6a42 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
@@ -54,6 +54,13 @@ public interface TimerInternals {
    */
   Instant currentProcessingTime();
 
+  /**
+   * Returns the current timestamp in the {@link TimeDomain#SYNCHRONIZED_PROCESSING_TIME} time
+   * domain or {@code null} if unknown.
+   */
+  @Nullable
+  Instant currentSynchronizedProcessingTime();
+
   /**
    * Return the current, local input watermark timestamp for this computation
    * in the {@link TimeDomain#EVENT_TIME} time domain. Return {@code null} if unknown.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
index 8c438cea1432f..86e17eb1cca60 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
@@ -33,6 +33,8 @@
 import java.util.Collection;
 import java.util.Map;
 
+import javax.annotation.Nullable;
+
 /**
  * Factory for creating instances of the various {@link Trigger} contexts.
  *
@@ -193,6 +195,18 @@ public void deleteTimer(Instant timestamp, TimeDomain timeDomain) {
     public Instant currentProcessingTime() {
       return timers.currentProcessingTime();
     }
+
+    @Override
+    @Nullable
+    public Instant currentSynchronizedProcessingTime() {
+      return timers.currentSynchronizedProcessingTime();
+    }
+
+    @Override
+    @Nullable
+    public Instant currentEventTime() {
+      return timers.currentEventTime();
+    }
   }
 
   private class MergingTriggerInfoImpl
@@ -305,8 +319,19 @@ public void deleteTimer(Instant timestamp, TimeDomain domain) {
     public Instant currentProcessingTime() {
       return timers.currentProcessingTime();
     }
-  }
 
+    @Override
+    @Nullable
+    public Instant currentSynchronizedProcessingTime() {
+      return timers.currentSynchronizedProcessingTime();
+    }
+
+    @Override
+    @Nullable
+    public Instant currentEventTime() {
+      return timers.currentEventTime();
+    }
+  }
 
   private class OnElementContextImpl extends Trigger<W>.OnElementContext {
 
@@ -370,6 +395,18 @@ public void deleteTimer(Instant timestamp, TimeDomain domain) {
     public Instant currentProcessingTime() {
       return timers.currentProcessingTime();
     }
+
+    @Override
+    @Nullable
+    public Instant currentSynchronizedProcessingTime() {
+      return timers.currentSynchronizedProcessingTime();
+    }
+
+    @Override
+    @Nullable
+    public Instant currentEventTime() {
+      return timers.currentEventTime();
+    }
   }
 
   private class OnTimerContextImpl extends Trigger<W>.OnTimerContext {
@@ -435,6 +472,18 @@ public void deleteTimer(Instant timestamp, TimeDomain domain) {
     public Instant currentProcessingTime() {
       return timers.currentProcessingTime();
     }
+
+    @Override
+    @Nullable
+    public Instant currentSynchronizedProcessingTime() {
+      return timers.currentSynchronizedProcessingTime();
+    }
+
+    @Override
+    @Nullable
+    public Instant currentEventTime() {
+      return timers.currentEventTime();
+    }
   }
 
   private class OnMergeContextImpl extends Trigger<W>.OnMergeContext {
@@ -499,5 +548,17 @@ public void deleteTimer(Instant timestamp, TimeDomain domain) {
     public Instant currentProcessingTime() {
       return timers.currentProcessingTime();
     }
+
+    @Override
+    @Nullable
+    public Instant currentSynchronizedProcessingTime() {
+      return timers.currentSynchronizedProcessingTime();
+    }
+
+    @Override
+    @Nullable
+    public Instant currentEventTime() {
+      return timers.currentEventTime();
+    }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
index 57f6185b4287a..54bdf60adba12 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
@@ -646,6 +646,7 @@ public void testReadUnboundedReader() throws Exception {
               .build(),
           new Instant(0), // input watermark
           null, // output watermark
+          null, // synchronized processing time
           null, // StateReader
           null, // StateFetcher
           Windmill.WorkItemCommitRequest.newBuilder());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
index 9c4f272e17e74..826ce11eef4eb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
@@ -86,6 +86,7 @@ public void testTimerInternalsSetTimer() {
         Windmill.WorkItem.newBuilder().setKey(ByteString.EMPTY).setWorkToken(17L).build(),
         new Instant(1000), // input watermark
         null, // output watermark
+        null, // synchronized processing time
         stateReader, stateFetcher, outputBuilder);
     ExecutionContext.StepContext step =
         executionContext.getOrCreateStepContext("step", "transform", null);
@@ -148,6 +149,7 @@ private void startContext(
             .build(),
         new Instant(0), // input watermark
         null, // output watermark
+        null, // synchronized processing time
         null, // StateReader
         null, // StateFetcher
         Windmill.WorkItemCommitRequest.newBuilder());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
index 8327359277b44..bb889d8283853 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
@@ -309,6 +309,16 @@ public void advanceProcessingTime(Instant newProcessingTime) throws Exception {
     runner.persist();
   }
 
+  /**
+   * Advance the synchronized processing time to the specified time,
+   * firing any timers that should fire.
+   */
+  public void advanceSynchronizedProcessingTime(Instant newProcessingTime) throws Exception {
+    ReduceFnRunner<String, InputT, OutputT, W> runner = createRunner();
+    timerInternals.advanceSynchronizedProcessingTime(runner, newProcessingTime);
+    runner.persist();
+  }
+
   /**
    * Inject all the timestamped values (after passing through the window function) as if they
    * arrived in a single chunk of a bundle (or work-unit).
@@ -511,6 +521,10 @@ private class TestTimerInternals implements TimerInternals {
     /** Current processing time. */
     private Instant processingTime = BoundedWindow.TIMESTAMP_MIN_VALUE;
 
+    /** Current synchronized processing time. */
+    @Nullable
+    private Instant synchronizedProcessingTime = null;
+
     private PriorityQueue<TimerData> queue(TimeDomain domain) {
       return TimeDomain.EVENT_TIME.equals(domain) ? watermarkTimers : processingTimers;
     }
@@ -535,6 +549,12 @@ public Instant currentProcessingTime() {
       return processingTime;
     }
 
+    @Override
+    @Nullable
+    public Instant currentSynchronizedProcessingTime() {
+      return synchronizedProcessingTime;
+    }
+
     @Override
     @Nullable
     public Instant currentInputWatermarkTime() {
@@ -607,6 +627,18 @@ public void advanceProcessingTime(
       advanceAndFire(runner, newProcessingTime, TimeDomain.PROCESSING_TIME);
     }
 
+    public void advanceSynchronizedProcessingTime(
+        ReduceFnRunner<?, ?, ?, ?> runner, Instant newSynchronizedProcessingTime) {
+      Preconditions.checkState(!newSynchronizedProcessingTime.isBefore(synchronizedProcessingTime),
+          "Cannot move processing time backwards from %s to %s", processingTime,
+          newSynchronizedProcessingTime);
+      WindowTracing.trace("TestTimerInternals.advanceProcessingTime: from {} to {}",
+          synchronizedProcessingTime, newSynchronizedProcessingTime);
+      synchronizedProcessingTime = newSynchronizedProcessingTime;
+      advanceAndFire(
+          runner, newSynchronizedProcessingTime, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+    }
+
     private void advanceAndFire(
         ReduceFnRunner<?, ?, ?, ?> runner, Instant currentTime, TimeDomain domain) {
       PriorityQueue<TimerData> queue = queue(domain);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 7b3a6593e0f82..6de9382041575 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -259,6 +259,12 @@ public void advanceProcessingTime(Instant newProcessingTime) throws Exception {
     timerInternals.advanceProcessingTime(newProcessingTime);
   }
 
+  /** Advance the processing time to the specified time, firing any timers that should fire. */
+  public void advanceSynchronizedProcessingTime(Instant newSynchronizedProcessingTime)
+      throws Exception {
+    timerInternals.advanceSynchronizedProcessingTime(newSynchronizedProcessingTime);
+  }
+
   /**
    * Inject all the timestamped values (after passing through the window function) as if they
    * arrived in a single chunk of a bundle (or work-unit).
@@ -460,6 +466,9 @@ private class TestTimerInternals implements TimerInternals {
     /** Current processing time. */
     private Instant processingTime = BoundedWindow.TIMESTAMP_MIN_VALUE;
 
+    /** Current processing time. */
+    private Instant synchronizedProcessingTime = null;
+
     private PriorityQueue<TimerData> queue(TimeDomain domain) {
       return TimeDomain.EVENT_TIME.equals(domain) ? watermarkTimers : processingTimers;
     }
@@ -484,6 +493,12 @@ public Instant currentProcessingTime() {
       return processingTime;
     }
 
+    @Override
+    @Nullable
+    public Instant currentSynchronizedProcessingTime() {
+      return synchronizedProcessingTime;
+    }
+
     @Override
     @Nullable
     public Instant currentInputWatermarkTime() {
@@ -552,6 +567,17 @@ public void advanceProcessingTime(Instant newProcessingTime) throws Exception {
       advanceAndFire(newProcessingTime, TimeDomain.PROCESSING_TIME);
     }
 
+    public void advanceSynchronizedProcessingTime(Instant newSynchronizedProcessingTime)
+        throws Exception {
+      checkState(!newSynchronizedProcessingTime.isBefore(synchronizedProcessingTime),
+          "Cannot move processing time backwards from %s to %s", synchronizedProcessingTime,
+          newSynchronizedProcessingTime);
+      WindowTracing.trace("TestTimerInternals.advanceProcessingTime: from {} to {}",
+          synchronizedProcessingTime, newSynchronizedProcessingTime);
+      synchronizedProcessingTime = newSynchronizedProcessingTime;
+      advanceAndFire(newSynchronizedProcessingTime, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+    }
+
     private void advanceAndFire(Instant currentTime, TimeDomain domain) throws Exception {
       PriorityQueue<TimerData> queue = queue(domain);
 
@@ -599,5 +625,17 @@ public void deleteTimer(Instant timestamp, TimeDomain timeDomain) {
     public Instant currentProcessingTime() {
       return timerInternals.currentProcessingTime();
     }
+
+    @Override
+    @Nullable
+    public Instant currentSynchronizedProcessingTime() {
+      return timerInternals.currentSynchronizedProcessingTime();
+    }
+
+    @Override
+    @Nullable
+    public Instant currentEventTime() {
+      return timerInternals.currentInputWatermarkTime();
+    }
   }
 }

From cfda3ff87ef04ed8603f77eca5b5fe72f900a562 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 22 Jan 2016 12:36:05 -0800
Subject: [PATCH 1339/1541] Update Triggers to new shouldFire() based semantics

User-facing trigger expressions are unchanged in syntax or semantics.

The high-level change to the internal-facing API is as follows:

 - onElement no longer returns a TriggerResult, now void
 - onMerge no longer returns a MergeResult, now void
 - onTimer no longer exists in Trigger (it exists in the runner only)
 - shouldFire says whether to fire; the runner controls when to ask
 - onFire transitions/resets a trigger for sequential firings

This also addresses a particular semantic issue where OrFinally did not
fire as soon as expected.

----Release Notes----
 - The "finally" branch of OrFinally now fires as soon as it is
   satisfied; before it would often take much longer due to seeing
   only the prefix of each work unit that satisfied the "main" branch.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112809409
---
 .../sdk/transforms/windowing/AfterAll.java    |  77 ++-
 .../windowing/AfterDelayFromFirstElement.java | 127 ++++
 .../sdk/transforms/windowing/AfterEach.java   | 123 ++--
 .../sdk/transforms/windowing/AfterFirst.java  |  67 ++-
 .../sdk/transforms/windowing/AfterPane.java   |  52 +-
 .../windowing/AfterProcessingTime.java        |  90 +--
 .../AfterSynchronizedProcessingTime.java      | 100 +---
 .../transforms/windowing/AfterWatermark.java  | 452 +++++----------
 .../transforms/windowing/DefaultTrigger.java  |  40 +-
 .../windowing/OrFinallyTrigger.java           |  65 ++-
 .../sdk/transforms/windowing/Repeatedly.java  |  47 +-
 .../sdk/transforms/windowing/Trigger.java     | 164 ++----
 .../dataflow/sdk/util/ExecutableTrigger.java  |  75 +--
 .../sdk/util/ReduceFnContextFactory.java      |   4 +-
 .../dataflow/sdk/util/ReduceFnRunner.java     | 187 +++---
 .../dataflow/sdk/util/ReshuffleTrigger.java   |  22 +-
 .../sdk/util/TriggerContextFactory.java       |  95 +--
 .../dataflow/sdk/util/TriggerRunner.java      |  54 +-
 .../util/state/InMemoryStateInternals.java    |   4 +
 .../transforms/windowing/AfterAllTest.java    | 249 +++-----
 .../transforms/windowing/AfterEachTest.java   | 233 ++------
 .../transforms/windowing/AfterFirstTest.java  | 240 +++-----
 .../transforms/windowing/AfterPaneTest.java   |  67 ++-
 .../windowing/AfterProcessingTimeTest.java    |  88 ++-
 .../AfterSynchronizedProcessingTimeTest.java  |  95 +--
 .../windowing/AfterWatermarkTest.java         | 545 ++++++++----------
 .../windowing/DefaultTriggerTest.java         | 167 ++++--
 .../windowing/OrFinallyTriggerTest.java       | 300 ++++------
 .../transforms/windowing/RepeatedlyTest.java  | 155 ++---
 .../transforms/windowing/TimeTriggerTest.java |  20 +-
 .../sdk/transforms/windowing/TriggerTest.java |  42 +-
 .../sdk/util/BatchTimerInternalsTest.java     |   6 +-
 .../sdk/util/ExecutableTriggerTest.java       |  25 +-
 .../dataflow/sdk/util/ReduceFnRunnerTest.java | 512 ++++++++--------
 .../sdk/util/ReshuffleTriggerTest.java        |  29 +-
 .../dataflow/sdk/util/TriggerTester.java      | 169 ++----
 36 files changed, 1945 insertions(+), 2842 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
index a40454631a148..bb43010ae512b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
@@ -49,59 +49,25 @@ public static <W extends BoundedWindow> OnceTrigger<W> of(
     return new AfterAll<W>(Arrays.<Trigger<W>>asList(triggers));
   }
 
-  private TriggerResult result(TriggerContext c) {
-    // If all children have finished, then they must have each fired at least once.
-    if (c.trigger().areAllSubtriggersFinished()) {
-      return TriggerResult.FIRE_AND_FINISH;
-    }
-
-    return TriggerResult.CONTINUE;
-  }
-
   @Override
-  public TriggerResult onElement(OnElementContext c) throws Exception {
+  public void onElement(OnElementContext c) throws Exception {
     for (ExecutableTrigger<W> subTrigger : c.trigger().unfinishedSubTriggers()) {
       // Since subTriggers are all OnceTriggers, they must either CONTINUE or FIRE_AND_FINISH.
       // invokeElement will automatically mark the finish bit if they return FIRE_AND_FINISH.
-      subTrigger.invokeElement(c);
+      subTrigger.invokeOnElement(c);
     }
-
-    return result(c);
   }
 
   @Override
-  public MergeResult onMerge(OnMergeContext c) throws Exception {
-    // CONTINUE if merging returns CONTINUE for at least one sub-trigger
-    // ALREADY_FINISHED if merging returns ALREADY_FINISHED for all sub-triggers and this
-    // trigger itself was already finished in some window.
-    // FIRE_AND_FINISH otherwise: It means this trigger is ready to fire (because all subtriggers
-    // are satisfied) but has never fired as a whole.
-    boolean anyContinue = false;
-    boolean alreadyFinished = true;
+  public void onMerge(OnMergeContext c) throws Exception {
     for (ExecutableTrigger<W> subTrigger : c.trigger().subTriggers()) {
-      MergeResult result = subTrigger.invokeMerge(c);
-      anyContinue |= !result.isFire() && !result.isFinish();
-      alreadyFinished &= !result.isFire() && result.isFinish();
+      subTrigger.invokeOnMerge(c);
     }
-
-    if (anyContinue) {
-      return MergeResult.CONTINUE;
-    } else if (alreadyFinished && c.trigger().finishedInAnyMergingWindow()) {
-      return MergeResult.ALREADY_FINISHED;
-    } else {
-      return MergeResult.FIRE_AND_FINISH;
+    boolean allFinished = true;
+    for (ExecutableTrigger<W> subTrigger1 : c.trigger().subTriggers()) {
+      allFinished &= c.forTrigger(subTrigger1).trigger().isFinished();
     }
-  }
-
-  @Override
-  public TriggerResult onTimer(OnTimerContext c) throws Exception {
-    for (ExecutableTrigger<W> subTrigger : c.trigger().unfinishedSubTriggers()) {
-      // Since subTriggers are all OnceTriggers, they must either CONTINUE or FIRE_AND_FINISH.
-      // invokeTimer will automatically mark the finish bit if they return FIRE_AND_FINISH.
-      subTrigger.invokeTimer(c);
-    }
-
-    return result(c);
+    c.trigger().setFinished(allFinished);
   }
 
   @Override
@@ -121,4 +87,31 @@ public Instant getWatermarkThatGuaranteesFiring(W window) {
   public OnceTrigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
     return new AfterAll<W>(continuationTriggers);
   }
+
+  /**
+   * {@inheritDoc}
+   *
+   * @return {@code true} if all subtriggers return {@code true}.
+   */
+  @Override
+  public boolean shouldFire(TriggerContext context) throws Exception {
+    for (ExecutableTrigger<W> subtrigger : context.trigger().subTriggers()) {
+      if (!context.forTrigger(subtrigger).trigger().isFinished()
+          && !subtrigger.invokeShouldFire(context)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Invokes {@link #onFire} for all subtriggers, eliding redundant calls to {@link #shouldFire}
+   * because they all must be ready to fire.
+   */
+  @Override
+  public void onOnlyFiring(TriggerContext context) throws Exception {
+    for (ExecutableTrigger<W> subtrigger : context.trigger().subTriggers()) {
+      subtrigger.invokeOnFire(context);
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
new file mode 100644
index 0000000000000..027262939921a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms.windowing;
+
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
+import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
+
+import org.joda.time.Instant;
+
+import java.util.List;
+
+import javax.annotation.Nullable;
+
+/**
+ * A base class for triggers that happen after a processing time delay from the arrival
+ * of the first element in a pane.
+ */
+abstract class AfterDelayFromFirstElement<W extends BoundedWindow> extends TimeTrigger<W> {
+
+  /**
+   * To complete an implementation, return the desired time from the TriggerContext.
+   */
+  @Nullable
+  public abstract Instant getCurrentTime(Trigger<W>.TriggerContext context);
+
+  private final TimeDomain timeDomain;
+
+  public AfterDelayFromFirstElement(
+      TimeDomain timeDomain, List<SerializableFunction<Instant, Instant>> timestampMappers) {
+    super(timestampMappers);
+    this.timeDomain = timeDomain;
+  }
+
+  private Instant getTargetTimestamp(OnElementContext c) {
+    return computeTargetTimestamp(c.currentProcessingTime());
+  }
+
+  @Override
+  public void prefetchOnElement(StateContext state) {
+    state.access(DELAYED_UNTIL_TAG).get();
+  }
+
+  @Override
+  public void onElement(OnElementContext c) throws Exception {
+    CombiningValueState<Instant, Instant> delayUntilState = c.state().access(DELAYED_UNTIL_TAG);
+    Instant oldDelayUntil = delayUntilState.get().read();
+
+    // Since processing time can only advance, resulting in target wake-up times we would
+    // ignore anyhow, we don't bother with it if it is already set.
+    if (oldDelayUntil != null) {
+      return;
+    }
+
+    Instant targetTimestamp = getTargetTimestamp(c);
+    delayUntilState.add(targetTimestamp);
+    c.setTimer(targetTimestamp, timeDomain);
+  }
+
+  @Override
+  public void prefetchOnMerge(MergingStateContext state) {
+    state.mergingAccess(DELAYED_UNTIL_TAG).get();
+  }
+
+  @Override
+  public void onMerge(OnMergeContext c) throws Exception {
+    // If the trigger is already finished, there is no way it will become re-activated
+    if (c.trigger().isFinished()) {
+      return;
+    }
+
+    // Determine the earliest point across all the windows, and delay to that.
+    CombiningValueState<Instant, Instant> mergingDelays =
+        c.state().mergingAccess(DELAYED_UNTIL_TAG);
+
+    Instant earliestTargetTime = mergingDelays.get().read();
+    if (earliestTargetTime != null) {
+      mergingDelays.clear();
+      mergingDelays.add(earliestTargetTime);
+      c.setTimer(earliestTargetTime, timeDomain);
+    }
+  }
+
+  @Override
+  public void prefetchShouldFire(StateContext state) {
+    state.access(DELAYED_UNTIL_TAG).get();
+  }
+
+  @Override
+  public void clear(TriggerContext c) throws Exception {
+    c.state().access(DELAYED_UNTIL_TAG).clear();
+  }
+
+  @Override
+  public Instant getWatermarkThatGuaranteesFiring(W window) {
+    return BoundedWindow.TIMESTAMP_MAX_VALUE;
+  }
+
+  @Override
+  public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
+    Instant delayedUntil = context.state().access(DELAYED_UNTIL_TAG).get().read();
+    return delayedUntil != null
+        && getCurrentTime(context) != null
+        && getCurrentTime(context).isAfter(delayedUntil);
+  }
+
+  @Override
+  protected void onOnlyFiring(Trigger<W>.TriggerContext context) throws Exception {
+    clear(context);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
index bf16365bd8303..4b052faeb8c6e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
@@ -16,14 +16,14 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import static com.google.common.base.Preconditions.checkArgument;
+
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
-import com.google.common.base.Preconditions;
 
 import org.joda.time.Instant;
 
 import java.util.Arrays;
-import java.util.Iterator;
 import java.util.List;
 
 /**
@@ -50,7 +50,7 @@ public class AfterEach<W extends BoundedWindow> extends Trigger<W> {
 
   private AfterEach(List<Trigger<W>> subTriggers) {
     super(subTriggers);
-    Preconditions.checkArgument(subTriggers.size() > 1);
+    checkArgument(subTriggers.size() > 1);
   }
 
   /**
@@ -61,94 +61,75 @@ public static <W extends BoundedWindow> Trigger<W> inOrder(Trigger<W>... trigger
     return new AfterEach<W>(Arrays.<Trigger<W>>asList(triggers));
   }
 
-  private TriggerResult result(TriggerContext c, TriggerResult subResult)
-      throws Exception {
-    if (subResult.isFire()) {
-      return c.trigger().areAllSubtriggersFinished()
-          ? TriggerResult.FIRE_AND_FINISH : TriggerResult.FIRE;
+  @Override
+  public void onElement(OnElementContext c) throws Exception {
+    if (!c.trigger().isMerging()) {
+      // If merges are not possible, we need only run the first unfinished subtrigger
+      c.trigger().firstUnfinishedSubTrigger().invokeOnElement(c);
     } else {
-      return TriggerResult.CONTINUE;
+      // If merges are possible, we need to run all subtriggers in parallel
+      for (ExecutableTrigger<W> subTrigger :  c.trigger().subTriggers()) {
+        // Even if the subTrigger is done, it may be revived via merging and must have
+        // adequate state.
+        subTrigger.invokeOnElement(c);
+      }
     }
   }
 
   @Override
-  public TriggerResult onElement(OnElementContext c) throws Exception {
-    Iterator<ExecutableTrigger<W>> iterator = c.trigger().unfinishedSubTriggers().iterator();
-
-    // If all the sub-triggers have finished, we should have already finished, so we know there is
-    // at least one unfinished trigger.
-    TriggerResult firstResult = iterator.next().invokeElement(c);
-
-    // If onMerge might be called, we need to make sure we have proper state for future triggers.
-    if (c.trigger().isMerging()) {
-      if (firstResult.isFire()) {
-        // If we're firing, clear out all of the later subtriggers, since we don't want to pollute
-        // their state.
-        resetRemaining(c, iterator);
+  public void onMerge(OnMergeContext context) throws Exception {
+    // If merging makes a subtrigger no-longer-finished, it will automatically
+    // begin participating in shouldFire and onFire appropriately.
+
+    // All the following triggers are retroactively "not started" but that is
+    // also automatic because they are cleared whenever this trigger
+    // fires.
+    boolean priorTriggersAllFinished = true;
+    for (ExecutableTrigger<W> subTrigger : context.trigger().subTriggers()) {
+      if (priorTriggersAllFinished) {
+        subTrigger.invokeOnMerge(context);
+        priorTriggersAllFinished &= context.forTrigger(subTrigger).trigger().isFinished();
       } else {
-        // Otherwise, iterate over all of them to build up some state.
-        while (iterator.hasNext()) {
-          iterator.next().invokeElement(c);
-        }
+        subTrigger.invokeClear(context);
       }
     }
-
-    return result(c, firstResult);
+    updateFinishedState(context);
   }
 
   @Override
-  public MergeResult onMerge(OnMergeContext c) throws Exception {
-    // Iterate over the sub-triggers to identify the "current" sub-trigger.
-    Iterator<ExecutableTrigger<W>> iterator = c.trigger().subTriggers().iterator();
-    while (iterator.hasNext()) {
-      ExecutableTrigger<W> subTrigger = iterator.next();
-
-      MergeResult mergeResult = subTrigger.invokeMerge(c);
-
-      if (MergeResult.CONTINUE.equals(mergeResult)) {
-        resetRemaining(c, iterator);
-        return MergeResult.CONTINUE;
-      } else if (MergeResult.FIRE.equals(mergeResult)) {
-        resetRemaining(c, iterator);
-        return MergeResult.FIRE;
-      } else if (MergeResult.FIRE_AND_FINISH.equals(mergeResult)) {
-        resetRemaining(c, iterator);
-        return c.trigger().areAllSubtriggersFinished()
-            ? MergeResult.FIRE_AND_FINISH : MergeResult.FIRE;
-      }
-    }
-
-    // If we get here, all the merges indicated they were finished, which means there was at least
-    // one merged window in which the triggers had all already finished. Given that, this AfterEach
-    // would have already finished in that window as well. Since the window was still in the window
-    // set for merging, we can return FINISHED (because we were finished in that window) and we also
-    // know that there must be another trigger (parent or sibling) which hasn't finished yet, which
-    // will FIRE, CONTINUE, or FIRE_AND_FINISH.
-    return MergeResult.ALREADY_FINISHED;
+  public Instant getWatermarkThatGuaranteesFiring(W window) {
+    // This trigger will fire at least once when the first trigger in the sequence
+    // fires at least once.
+    return subTriggers.get(0).getWatermarkThatGuaranteesFiring(window);
   }
 
-  private void resetRemaining(
-      TriggerContext c, Iterator<ExecutableTrigger<W>> triggers) throws Exception {
-    while (triggers.hasNext()) {
-      c.forTrigger(triggers.next()).trigger().resetTree();
-    }
+  @Override
+  public Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
+    return Repeatedly.forever(new AfterFirst<W>(continuationTriggers));
   }
 
   @Override
-  public TriggerResult onTimer(OnTimerContext c) throws Exception {
-    // Only deliver to the currently active subtrigger
-    return result(c, c.trigger().firstUnfinishedSubTrigger().invokeTimer(c));
+  public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
+    ExecutableTrigger<W> firstUnfinished = context.trigger().firstUnfinishedSubTrigger();
+    return firstUnfinished.invokeShouldFire(context);
   }
 
   @Override
-  public Instant getWatermarkThatGuaranteesFiring(W window) {
-    // This trigger will fire at least once when the first trigger in the sequence
-    // fires at least once.
-    return subTriggers.get(0).getWatermarkThatGuaranteesFiring(window);
+  public void onFire(Trigger<W>.TriggerContext context) throws Exception {
+    context.trigger().firstUnfinishedSubTrigger().invokeOnFire(context);
+
+    // Reset all subtriggers if in a merging context; any may be revived by merging so they are
+    // all run in parallel for each pending pane.
+    if (context.trigger().isMerging()) {
+      for (ExecutableTrigger<W> subTrigger : context.trigger().subTriggers()) {
+        subTrigger.invokeClear(context);
+      }
+    }
+
+    updateFinishedState(context);
   }
 
-  @Override
-  public Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
-    return Repeatedly.forever(new AfterFirst<W>(continuationTriggers));
+  private void updateFinishedState(TriggerContext context) {
+    context.trigger().setFinished(context.trigger().firstUnfinishedSubTrigger() == null);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
index 534649b5875b2..29b19bf9b9c23 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
@@ -51,41 +51,18 @@ public static <W extends BoundedWindow> OnceTrigger<W> of(
   }
 
   @Override
-  public TriggerResult onElement(OnElementContext c) throws Exception {
+  public void onElement(OnElementContext c) throws Exception {
     for (ExecutableTrigger<W> subTrigger : c.trigger().subTriggers()) {
-      if (subTrigger.invokeElement(c).isFire()) {
-        return TriggerResult.FIRE_AND_FINISH;
-      }
-    }
-
-    return TriggerResult.CONTINUE;
-  }
-
-  @Override
-  public MergeResult onMerge(OnMergeContext c) throws Exception {
-    // FINISH if merging returns FINISH for any sub-trigger.
-    // FIRE_AND_FINISH if merging returns FIRE or FIRE_AND_FINISH for at least one sub-trigger.
-    // CONTINUE otherwise
-    boolean fired = false;
-    for (ExecutableTrigger<W> subTrigger : c.trigger().subTriggers()) {
-      MergeResult mergeResult = subTrigger.invokeMerge(c);
-      if (MergeResult.ALREADY_FINISHED.equals(mergeResult)) {
-        return MergeResult.ALREADY_FINISHED;
-      } else if (mergeResult.isFire()) {
-        fired = true;
-      }
+      subTrigger.invokeOnElement(c);
     }
-    return fired ? MergeResult.FIRE_AND_FINISH : MergeResult.CONTINUE;
   }
 
   @Override
-  public TriggerResult onTimer(OnTimerContext c) throws Exception {
+  public void onMerge(OnMergeContext c) throws Exception {
     for (ExecutableTrigger<W> subTrigger : c.trigger().subTriggers()) {
-      if (subTrigger.invokeTimer(c).isFire()) {
-        return TriggerResult.FIRE_AND_FINISH;
-      }
+      subTrigger.invokeOnMerge(c);
     }
-    return TriggerResult.CONTINUE;
+    updateFinishedStatus(c);
   }
 
   @Override
@@ -105,4 +82,38 @@ public Instant getWatermarkThatGuaranteesFiring(W window) {
   public OnceTrigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
     return new AfterFirst<W>(continuationTriggers);
   }
+
+  @Override
+  public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
+    for (ExecutableTrigger<W> subtrigger : context.trigger().subTriggers()) {
+      if (context.forTrigger(subtrigger).trigger().isFinished()
+          || subtrigger.invokeShouldFire(context)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  @Override
+  protected void onOnlyFiring(TriggerContext context) throws Exception {
+    for (ExecutableTrigger<W> subtrigger : context.trigger().subTriggers()) {
+      TriggerContext subContext = context.forTrigger(subtrigger);
+      if (subtrigger.invokeShouldFire(subContext)) {
+        // If the trigger is ready to fire, then do whatever it needs to do.
+        subtrigger.invokeOnFire(subContext);
+      } else {
+        // If the trigger is not ready to fire, it is nonetheless true that whatever
+        // pending pane it was tracking is now gone.
+        subtrigger.invokeClear(subContext);
+      }
+    }
+  }
+
+  private void updateFinishedStatus(TriggerContext c) {
+    boolean anyFinished = false;
+    for (ExecutableTrigger<W> subTrigger : c.trigger().subTriggers()) {
+      anyFinished |= c.forTrigger(subTrigger).trigger().isFinished();
+    }
+    c.trigger().setFinished(anyFinished);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index bdc04c2c178bb..642efd1875824 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -23,7 +23,6 @@
 import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
 import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
-import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 
@@ -60,20 +59,8 @@ public static <W extends BoundedWindow> AfterPane<W> elementCountAtLeast(int cou
   }
 
   @Override
-  public void prefetchOnElement(StateContext state) {
-    state.access(ELEMENTS_IN_PANE_TAG).get();
-  }
-
-  @Override
-  public TriggerResult onElement(OnElementContext c) throws Exception {
-    CombiningValueState<Long, Long> elementsInPane = c.state().access(ELEMENTS_IN_PANE_TAG);
-    StateContents<Long> countContents = elementsInPane.get();
-    elementsInPane.add(1L);
-
-    // TODO: Consider waiting to read the value until the end of a bundle, since we don't need to
-    // fire immediately when the count exceeds countElems.
-    long count = countContents.read();
-    return count >= countElems ? TriggerResult.FIRE_AND_FINISH : TriggerResult.CONTINUE;
+  public void onElement(OnElementContext c) throws Exception {
+    c.state().access(ELEMENTS_IN_PANE_TAG).add(1L);
   }
 
   @Override
@@ -82,31 +69,27 @@ public void prefetchOnMerge(MergingStateContext state) {
   }
 
   @Override
-  public MergeResult onMerge(OnMergeContext c) throws Exception {
-    // If we've already received enough elements and finished in some window, then this trigger
-    // is just finished.
-    if (c.trigger().finishedInAnyMergingWindow()) {
-      return MergeResult.ALREADY_FINISHED;
+  public void onMerge(OnMergeContext context) throws Exception {
+    if (context.trigger().finishedInAnyMergingWindow()) {
+      context.trigger().setFinished(true);
+      return;
     }
 
-    // Otherwise, compute the sum of elements in all the active panes
-    CombiningValueState<Long, Long> elementsInPane = c.state().mergingAccess(ELEMENTS_IN_PANE_TAG);
-    // Both InMemoryStateInternals and WindmillStateInternals implement merging access using
-    // MergeCombiningValue. The implementation of StateContents.read returned by get will
-    // eagerly compact state on read. Thus after the following we are guaranteed all state from
-    // merged windows will have been compacted away.
-    // TODO: Make this more explicit and less fragile with a 'compact' API?
-    long count = elementsInPane.get().read();
-    return count >= countElems ? MergeResult.FIRE_AND_FINISH : MergeResult.CONTINUE;
+    // Eagerly merge
+    long count = context.state().mergingAccess(ELEMENTS_IN_PANE_TAG).get().read();
+    context.state().mergingAccess(ELEMENTS_IN_PANE_TAG).clear();
+    context.state().access(ELEMENTS_IN_PANE_TAG).add(count);
   }
 
   @Override
-  public TriggerResult onTimer(OnTimerContext c) {
-    return TriggerResult.CONTINUE;
+  public void prefetchShouldFire(StateContext state) {
+    state.access(ELEMENTS_IN_PANE_TAG).get();
   }
 
   @Override
-  public void prefetchOnTimer(StateContext state) {
+  public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
+    long count = context.state().access(ELEMENTS_IN_PANE_TAG).get().read();
+    return count >= countElems;
   }
 
   @Override
@@ -150,4 +133,9 @@ public boolean equals(Object obj) {
   public int hashCode() {
     return Objects.hash(countElems);
   }
+
+  @Override
+  protected void onOnlyFiring(Trigger<W>.TriggerContext context) throws Exception {
+    clear(context);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
index 4c269c28e6ca6..7e89902741361 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
@@ -18,16 +18,15 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
-import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
 
 import org.joda.time.Instant;
 
 import java.util.List;
 import java.util.Objects;
 
+import javax.annotation.Nullable;
+
 /**
  * {@code AfterProcessingTime} triggers fire based on the current processing time. They operate in
  * the real-time domain.
@@ -38,10 +37,16 @@
  * @param <W> {@link BoundedWindow} subclass used to represent the windows used
  */
 @Experimental(Experimental.Kind.TRIGGER)
-public class AfterProcessingTime<W extends BoundedWindow> extends TimeTrigger<W> {
+public class AfterProcessingTime<W extends BoundedWindow> extends AfterDelayFromFirstElement<W> {
+
+  @Override
+  @Nullable
+  public Instant getCurrentTime(Trigger<W>.TriggerContext context) {
+    return context.currentProcessingTime();
+  }
 
   private AfterProcessingTime(List<SerializableFunction<Instant, Instant>> transforms) {
-    super(transforms);
+    super(TimeDomain.PROCESSING_TIME, transforms);
   }
 
   /**
@@ -58,81 +63,6 @@ protected AfterProcessingTime<W> newWith(
     return new AfterProcessingTime<W>(transforms);
   }
 
-  @Override
-  public void prefetchOnElement(StateContext state) {
-    state.access(DELAYED_UNTIL_TAG).get();
-  }
-
-  @Override
-  public TriggerResult onElement(OnElementContext c)
-      throws Exception {
-    CombiningValueState<Instant, Instant> delayUntilState = c.state().access(DELAYED_UNTIL_TAG);
-    Instant delayUntil = delayUntilState.get().read();
-    if (delayUntil == null) {
-      delayUntil = computeTargetTimestamp(c.currentProcessingTime());
-      c.setTimer(delayUntil, TimeDomain.PROCESSING_TIME);
-      delayUntilState.add(delayUntil);
-    }
-
-    return TriggerResult.CONTINUE;
-  }
-
-  @Override
-  public void prefetchOnMerge(MergingStateContext state) {
-    state.mergingAccess(DELAYED_UNTIL_TAG).get();
-  }
-
-  @Override
-  public MergeResult onMerge(OnMergeContext c) throws Exception {
-    // If the processing time timer has fired in any of the windows being merged, it would have
-    // fired at the same point if it had been added to the merged window. So, we just report it as
-    // finished.
-    if (c.trigger().finishedInAnyMergingWindow()) {
-      return MergeResult.ALREADY_FINISHED;
-    }
-
-    // Determine the earliest point across all the windows, and delay to that.
-    CombiningValueState<Instant, Instant> mergingDelays =
-        c.state().mergingAccess(DELAYED_UNTIL_TAG);
-    Instant earliestTimer = mergingDelays.get().read();
-    if (earliestTimer != null) {
-      mergingDelays.clear();
-      mergingDelays.add(earliestTimer);
-      c.setTimer(earliestTimer, TimeDomain.PROCESSING_TIME);
-    }
-
-    return MergeResult.CONTINUE;
-  }
-
-  @Override
-  public void prefetchOnTimer(StateContext state) {
-    state.access(DELAYED_UNTIL_TAG).get();
-  }
-
-  @Override
-  public TriggerResult onTimer(OnTimerContext c) throws Exception {
-    if (c.timeDomain() != TimeDomain.PROCESSING_TIME) {
-      return TriggerResult.CONTINUE;
-    }
-
-    Instant delayedUntil = c.state().access(DELAYED_UNTIL_TAG).get().read();
-    if (delayedUntil == null || delayedUntil.isAfter(c.timestamp())) {
-      return TriggerResult.CONTINUE;
-    }
-
-    return TriggerResult.FIRE_AND_FINISH;
-  }
-
-  @Override
-  public void clear(TriggerContext c) throws Exception {
-    CombiningValueState<Instant, Instant> delayed = c.state().access(DELAYED_UNTIL_TAG);
-    Instant timestamp = delayed.get().read();
-    delayed.clear();
-    if (timestamp != null) {
-      c.deleteTimer(timestamp, TimeDomain.PROCESSING_TIME);
-    }
-  }
-
   @Override
   public Instant getWatermarkThatGuaranteesFiring(W window) {
     return BoundedWindow.TIMESTAMP_MAX_VALUE;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
index e15f4a96c3d86..a811a9b2734ab 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
@@ -15,98 +15,30 @@
  */
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static com.google.cloud.dataflow.sdk.transforms.windowing.TimeTrigger.DELAYED_UNTIL_TAG;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
-import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
 import com.google.common.base.Objects;
 
 import org.joda.time.Instant;
 
+import java.util.Collections;
 import java.util.List;
 
-class AfterSynchronizedProcessingTime<W extends BoundedWindow> extends OnceTrigger<W> {
-
-  public AfterSynchronizedProcessingTime() {
-    super(null);
-  }
-
-  @Override
-  public void prefetchOnElement(StateContext state) {
-    state.access(DELAYED_UNTIL_TAG).get();
-  }
-
-  @Override
-  public TriggerResult onElement(OnElementContext c)
-      throws Exception {
-    CombiningValueState<Instant, Instant> delayUntilState = c.state().access(DELAYED_UNTIL_TAG);
-    Instant delayUntil = delayUntilState.get().read();
-    if (delayUntil == null) {
-      delayUntil = c.currentProcessingTime();
-      c.setTimer(delayUntil, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
-      delayUntilState.add(delayUntil);
-    }
-
-    return TriggerResult.CONTINUE;
-  }
-
-  @Override
-  public void prefetchOnMerge(MergingStateContext state) {
-    state.mergingAccess(DELAYED_UNTIL_TAG).get();
-  }
-
-  @Override
-  public MergeResult onMerge(OnMergeContext c) throws Exception {
-    // If the processing time timer has fired in any of the windows being merged, it would have
-    // fired at the same point if it had been added to the merged window. So, we just report it as
-    // finished.
-    if (c.trigger().finishedInAnyMergingWindow()) {
-      return MergeResult.ALREADY_FINISHED;
-    }
-
-    // Otherwise, determine the earliest delay for all of the windows, and delay to that point.
-    CombiningValueState<Instant, Instant> mergingDelays =
-        c.state().mergingAccess(DELAYED_UNTIL_TAG);
-    Instant earliestTimer = mergingDelays.get().read();
-    if (earliestTimer != null) {
-      mergingDelays.clear();
-      mergingDelays.add(earliestTimer);
-      c.setTimer(earliestTimer, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
-    }
-
-    return MergeResult.CONTINUE;
-  }
+import javax.annotation.Nullable;
 
-  @Override
-  public void prefetchOnTimer(StateContext state) {
-    state.access(DELAYED_UNTIL_TAG).get();
-  }
+class AfterSynchronizedProcessingTime<W extends BoundedWindow>
+    extends AfterDelayFromFirstElement<W> {
 
   @Override
-  public TriggerResult onTimer(OnTimerContext c) throws Exception {
-    if (c.timeDomain() != TimeDomain.SYNCHRONIZED_PROCESSING_TIME) {
-      return TriggerResult.CONTINUE;
-    }
-
-    Instant delayedUntil = c.state().access(DELAYED_UNTIL_TAG).get().read();
-    if (delayedUntil == null || delayedUntil.isAfter(c.timestamp())) {
-      return TriggerResult.CONTINUE;
-    }
-
-    return TriggerResult.FIRE_AND_FINISH;
+  @Nullable
+  public Instant getCurrentTime(Trigger<W>.TriggerContext context) {
+    // TODO: plumb synchronized processing time
+    return context.currentProcessingTime();
   }
 
-  @Override
-  public void clear(TriggerContext c) throws Exception {
-    CombiningValueState<Instant, Instant> delayed = c.state().access(DELAYED_UNTIL_TAG);
-    Instant timestamp = delayed.get().read();
-    delayed.clear();
-    if (timestamp != null) {
-      c.deleteTimer(timestamp, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
-    }
+  public AfterSynchronizedProcessingTime() {
+    super(TimeDomain.SYNCHRONIZED_PROCESSING_TIME,
+        Collections.<SerializableFunction<Instant, Instant>>emptyList());
   }
 
   @Override
@@ -133,4 +65,12 @@ public boolean equals(Object obj) {
   public int hashCode() {
     return Objects.hashCode(AfterSynchronizedProcessingTime.class);
   }
+
+  @Override
+  protected AfterSynchronizedProcessingTime<W>
+      newWith(List<SerializableFunction<Instant, Instant>> transforms) {
+    // ignore transforms
+    return this;
+  }
+
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 3b8302f9cc7e1..694a538f6cb26 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -16,14 +16,12 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import static com.google.common.base.Preconditions.checkNotNull;
+
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
-import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
-import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 
@@ -69,14 +67,6 @@ public class AfterWatermark<W extends BoundedWindow> {
   // Static factory class.
   private AfterWatermark() {}
 
-  /**
-   * Creates a trigger that fires when the watermark passes timestamp of the first element added to
-   * the pane.
-   */
-  static <W extends BoundedWindow> TimeTrigger<W> pastFirstElementInPane() {
-    return new FromFirstElementInPane<W>(TimeTrigger.IDENTITY);
-  }
-
   /**
    * Creates a trigger that fires when the watermark passes the end of the window.
    */
@@ -84,132 +74,6 @@ public static <W extends BoundedWindow> FromEndOfWindow<W> pastEndOfWindow() {
     return new FromEndOfWindow<W>();
   }
 
-  /**
-   * A watermark trigger targeted relative to the event time of the first element in the pane.
-   */
-  private static class FromFirstElementInPane<W extends BoundedWindow> extends TimeTrigger<W> {
-
-    private FromFirstElementInPane(
-        List<SerializableFunction<Instant, Instant>> delayFunction) {
-      super(delayFunction);
-    }
-
-    @Override
-    public void prefetchOnElement(StateContext state) {
-      state.access(DELAYED_UNTIL_TAG).get();
-    }
-
-    @Override
-    public TriggerResult onElement(OnElementContext c) throws Exception {
-      CombiningValueState<Instant, Instant> delayUntilState = c.state().access(DELAYED_UNTIL_TAG);
-      Instant delayUntil = delayUntilState.get().read();
-      if (delayUntil == null) {
-        delayUntil = computeTargetTimestamp(c.eventTimestamp());
-        c.setTimer(delayUntil, TimeDomain.EVENT_TIME);
-        delayUntilState.add(delayUntil);
-      }
-
-      return TriggerResult.CONTINUE;
-    }
-
-    @Override
-    public void prefetchOnMerge(MergingStateContext state) {
-      state.mergingAccess(DELAYED_UNTIL_TAG).get();
-    }
-
-    @Override
-    public MergeResult onMerge(OnMergeContext c) throws Exception {
-      // If the watermark time timer has fired in any of the windows being merged, it would have
-      // fired at the same point if it had been added to the merged window. So, we just record it as
-      // finished.
-      if (c.trigger().finishedInAnyMergingWindow()) {
-        return MergeResult.ALREADY_FINISHED;
-      }
-
-      // To have gotten here, we must not have fired in any of the oldWindows. Determine the event
-      // timestamp from the minimum (we could also just pick one, or try to record the arrival times
-      // of this first element in each pane).
-      // Determine the earliest point across all the windows, and delay to that.
-      CombiningValueState<Instant, Instant> mergingDelays =
-          c.state().mergingAccess(DELAYED_UNTIL_TAG);
-      Instant earliestTimer = mergingDelays.get().read();
-      if (earliestTimer != null) {
-        mergingDelays.clear();
-        mergingDelays.add(earliestTimer);
-        c.setTimer(earliestTimer, TimeDomain.EVENT_TIME);
-      }
-
-      return MergeResult.CONTINUE;
-    }
-
-    @Override
-    public void prefetchOnTimer(StateContext state) {
-      state.access(DELAYED_UNTIL_TAG).get();
-    }
-
-    @Override
-    public TriggerResult onTimer(OnTimerContext c) throws Exception {
-      if (c.timeDomain() != TimeDomain.EVENT_TIME) {
-        return TriggerResult.CONTINUE;
-      }
-
-      Instant delayedUntil = c.state().access(DELAYED_UNTIL_TAG).get().read();
-      if (delayedUntil == null || delayedUntil.isAfter(c.timestamp())) {
-        return TriggerResult.CONTINUE;
-      }
-
-      return TriggerResult.FIRE_AND_FINISH;
-    }
-
-    @Override
-    public void clear(TriggerContext c) throws Exception {
-      CombiningValueState<Instant, Instant> delayed = c.state().access(DELAYED_UNTIL_TAG);
-      Instant timestamp = delayed.get().read();
-      delayed.clear();
-      if (timestamp != null) {
-        c.deleteTimer(timestamp, TimeDomain.EVENT_TIME);
-      }
-    }
-
-    @Override
-    public Instant getWatermarkThatGuaranteesFiring(W window) {
-      return computeTargetTimestamp(window.maxTimestamp());
-    }
-
-    @Override
-    protected FromFirstElementInPane<W> newWith(
-        List<SerializableFunction<Instant, Instant>> transforms) {
-      return new FromFirstElementInPane<W>(transforms);
-    }
-
-    @Override
-    public OnceTrigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
-      return this;
-    }
-
-    @Override
-    public String toString() {
-      return "AfterWatermark.pastFirstElementInPane(" + timestampMappers + ")";
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (this == obj) {
-        return true;
-      }
-      if (!(obj instanceof FromFirstElementInPane)) {
-        return false;
-      }
-      FromFirstElementInPane<?> that = (FromFirstElementInPane<?>) obj;
-      return Objects.equals(this.timestampMappers, that.timestampMappers);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hashCode(timestampMappers);
-    }
-  }
-
   /**
    * Interface for building an AfterWatermarkTrigger with early firings already filled in.
    */
@@ -243,21 +107,10 @@ protected NeverTrigger() {
     }
 
     @Override
-    public TriggerResult onElement(OnElementContext c) throws Exception {
-      return TriggerResult.CONTINUE;
-    }
+    public void onElement(OnElementContext c) throws Exception { }
 
     @Override
-    public MergeResult onMerge(OnMergeContext c) throws Exception {
-      return c.trigger().finishedInAnyMergingWindow()
-          ? MergeResult.ALREADY_FINISHED
-          : MergeResult.CONTINUE;
-    }
-
-    @Override
-    public TriggerResult onTimer(OnTimerContext c) throws Exception {
-      return TriggerResult.CONTINUE;
-    }
+    public void onMerge(OnMergeContext c) throws Exception { }
 
     @Override
     protected Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
@@ -268,6 +121,17 @@ protected Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTrigger
     public Instant getWatermarkThatGuaranteesFiring(W window) {
       return BoundedWindow.TIMESTAMP_MAX_VALUE;
     }
+
+    @Override
+    public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
+      return false;
+    }
+
+    @Override
+    protected void onOnlyFiring(Trigger<W>.TriggerContext context) throws Exception {
+      throw new UnsupportedOperationException(
+          String.format("%s should never fire", getClass().getSimpleName()));
+    }
   }
 
   private static class AfterWatermarkEarlyAndLate<W extends BoundedWindow>
@@ -285,8 +149,7 @@ private AfterWatermarkEarlyAndLate(OnceTrigger<W> earlyTrigger, OnceTrigger<W> l
       super(lateTrigger == null
           ? ImmutableList.<Trigger<W>>of(earlyTrigger)
           : ImmutableList.<Trigger<W>>of(earlyTrigger, lateTrigger));
-      this.earlyTrigger =
-          Preconditions.checkNotNull(earlyTrigger, "earlyTrigger should not be null");
+      this.earlyTrigger = checkNotNull(earlyTrigger, "earlyTrigger should not be null");
       this.lateTrigger = lateTrigger;
     }
 
@@ -301,147 +164,42 @@ public TriggerBuilder<W> withLateFirings(OnceTrigger<W> lateTrigger) {
     }
 
     @Override
-    public TriggerResult onElement(OnElementContext c) throws Exception {
-      // We always have an early trigger, even if it is the one that never fires. It will be marked
-      // as finished once the watermark has passed the end of the window.
-
-      if (!c.trigger().isFinished(EARLY_INDEX)) {
-        // We're running the early trigger. If the window function is merging, we need to also
-        // pass the events to the late trigger, so that merging data is available.
-        ExecutableTrigger<W> current = c.trigger().subTrigger(EARLY_INDEX);
-        TriggerResult result = current.invokeElement(c);
-        if (result.isFire()) {
-          // the subtriggers are OnceTriggers that are implicitly repeated. Rather than having
-          // wrapped them explicitly, we implement that logic here. This allows us to take advantage
-          // of the fact that they're being repeated to improve the implementation of this trigger.
-          current.invokeClear(c);
-          c.trigger().setFinished(false, EARLY_INDEX);
-
-          if (lateTrigger != null && c.trigger().isMerging()) {
-            c.trigger().subTrigger(LATE_INDEX).invokeClear(c);
-          }
-
-          return TriggerResult.FIRE;
-        } else {
-
-          if (lateTrigger != null && c.trigger().isMerging()) {
-            if (c.trigger().subTrigger(LATE_INDEX).invokeElement(c).isFinish()) {
-              // If late trigegr finishes, clear it out and keep going.
-              c.trigger().subTrigger(LATE_INDEX).invokeClear(c);
-              c.trigger().setFinished(false, LATE_INDEX);
-            }
-          }
-
-          return TriggerResult.CONTINUE;
-        }
-      } else if (lateTrigger != null) {
-        // We're running the late trigger -- otherwise the root would have finished when the early
-        // finished.
-        ExecutableTrigger<W> current = c.trigger().subTrigger(LATE_INDEX);
-        TriggerResult result = current.invokeElement(c);
-        if (result.isFire()) {
-          // the subtriggers are OnceTriggers that need an implicit repeat around them. So, reset
-          // the trigger after it fires.
-          current.invokeClear(c);
-          c.trigger().setFinished(false, LATE_INDEX);
-          return TriggerResult.FIRE;
-        } else {
-          return TriggerResult.CONTINUE;
-        }
+    public void onElement(OnElementContext c) throws Exception {
+      if (!c.trigger().isMerging()) {
+        // If merges can never happen, we just run the unfinished subtrigger
+        c.trigger().firstUnfinishedSubTrigger().invokeOnElement(c);
       } else {
-        throw new IllegalStateException(
-            "Shouldn't receive elements after the watermark with no late trigger");
+        // If merges can happen, we run for all subtriggers because they might be
+        // de-activated or re-activated
+        for (ExecutableTrigger<W> subTrigger : c.trigger().subTriggers()) {
+          subTrigger.invokeOnElement(c);
+        }
       }
     }
 
     @Override
-    public MergeResult onMerge(OnMergeContext c) throws Exception {
-      boolean pastEndOfWindow = false;
-
-      // If the watermark was past the end of a window that is past the end of the new window,
-      // then the watermark must also be past the end of this window. What's more, we've already
-      // fired some elements for that trigger firing, so we report FINISHED (without firing).
-      OnMergeContext earlySubContext = c.forTrigger(c.trigger().subTrigger(EARLY_INDEX));
-      for (W finishedWindow : earlySubContext.trigger().getFinishedMergingWindows()) {
-        if (!finishedWindow.maxTimestamp().isBefore(c.window().maxTimestamp())) {
-          pastEndOfWindow = true;
-          break;
-        }
-      }
-
-      c.trigger().setFinished(pastEndOfWindow, EARLY_INDEX);
+    public void onMerge(OnMergeContext c) throws Exception {
+      ExecutableTrigger<W> earlySubtrigger = c.trigger().subTrigger(EARLY_INDEX);
+      // We check the early trigger to determine if we are still processing it or
+      // if the end of window has transitioned us to the late trigger
+      OnMergeContext earlyContext = c.forTrigger(earlySubtrigger);
 
-      if (pastEndOfWindow) {
-        // If we've already fired the AtWatermark for a watermark that is >= the end of this window,
-        // then we should merge the late trigger (if any)
+      // If the early trigger is still active in any merging window then it is still active in
+      // the new merged window, because even if the merged window is "done" some pending elements
+      // haven't had a chance to fire.
+      if (!earlyContext.trigger().finishedInAllMergingWindows() || !endOfWindowReached(c)) {
+        earlyContext.trigger().setFinished(false);
         if (lateTrigger != null) {
-          ExecutableTrigger<W> lateTrigger = c.trigger().subTrigger(LATE_INDEX);
-          MergeResult result = lateTrigger.invokeMerge(c);
-          // clear merge state if it got marked finished
-          if (result.isFire()) {
-            c.trigger().setFinished(false, 1);
-            lateTrigger.invokeClear(c);
-            return MergeResult.FIRE;
-          } else {
-            return MergeResult.CONTINUE;
-          }
-        } else {
-          throw new IllegalStateException(
-              "Shouldn't merge with windows that have already finished");
-        }
-      } else {
-        // We haven't reached the watermark yet, so merge the early trigger.
-        ExecutableTrigger<W> earlyTrigger = c.trigger().subTrigger(EARLY_INDEX);
-        MergeResult result = earlyTrigger.invokeMerge(c);
-
-        // clear merge state if it got marked finished
-        if (result.isFire()) {
-          c.trigger().setFinished(false, EARLY_INDEX);
-          earlyTrigger.invokeClear(c);
-          return MergeResult.FIRE;
-        } else {
-          return MergeResult.CONTINUE;
-        }
-      }
-    }
-
-    @Override
-    public TriggerResult onTimer(OnTimerContext c) throws Exception {
-      boolean isOnTime = c.timeDomain() == TimeDomain.EVENT_TIME
-          && c.timestamp().isEqual(c.window().maxTimestamp());
-
-      if (!isOnTime) {
-        // If this timer isn't the ON_TIME firing, send it to the current subtree.
-        ExecutableTrigger<W> current = c.trigger().firstUnfinishedSubTrigger();
-        if (current == null) {
-          return TriggerResult.CONTINUE;
-        }
-        TriggerResult result = current.invokeTimer(c);
-        if (result.isFire()) {
-          // the subtriggers are OnceTriggers that need an implicit repeat around them. So, reset
-          // the trigger after it fires.
-          c.trigger().setFinished(false, LATE_INDEX);
-          current.invokeClear(c);
-          return TriggerResult.FIRE;
-        } else {
-          return TriggerResult.CONTINUE;
+          ExecutableTrigger<W> lateSubtrigger = c.trigger().subTrigger(LATE_INDEX);
+          OnMergeContext lateContext = c.forTrigger(lateSubtrigger);
+          lateContext.trigger().setFinished(false);
+          lateSubtrigger.invokeClear(lateContext);
         }
       } else {
-        // Mark the early trigger finished.
-        c.trigger().setFinished(true, EARLY_INDEX);
-
+        // Otherwise the early trigger and end-of-window bit is done for good.
+        earlyContext.trigger().setFinished(true);
         if (lateTrigger != null) {
-          // In case we ran the late trigger in parallel, clear out its state.
-
-          if (c.trigger().isMerging()) {
-            // If we were pre-running the late trigger, clear out any state since we're firing.
-            c.trigger().setFinished(false, LATE_INDEX);
-            c.trigger().subTrigger(LATE_INDEX).invokeClear(c);
-          }
-
-          return TriggerResult.FIRE;
-        } else {
-          return TriggerResult.FIRE_AND_FINISH;
+          c.trigger().subTrigger(LATE_INDEX).invokeOnMerge(c);
         }
       }
     }
@@ -464,6 +222,79 @@ public Instant getWatermarkThatGuaranteesFiring(W window) {
       // Even without an early or late trigger, we'll still produce a firing at the watermark.
       return window.maxTimestamp();
     }
+
+    private boolean endOfWindowReached(Trigger<W>.TriggerContext context) {
+      return context.currentEventTime() != null
+          && context.currentEventTime().isAfter(context.window().maxTimestamp());
+    }
+
+    @Override
+    public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
+      if (!context.trigger().isFinished(EARLY_INDEX)) {
+        // We have not yet transitioned to late firings.
+        // We should fire if either the trigger is ready or we reach the end of the window.
+        return context.trigger().subTrigger(EARLY_INDEX).invokeShouldFire(context)
+            || endOfWindowReached(context);
+      } else if (lateTrigger == null) {
+        return false;
+      } else {
+        // We are running the late trigger
+        return context.trigger().subTrigger(LATE_INDEX).invokeShouldFire(context);
+      }
+    }
+
+    @Override
+    public void onFire(Trigger<W>.TriggerContext context) throws Exception {
+      if (!context.forTrigger(context.trigger().subTrigger(EARLY_INDEX)).trigger().isFinished()) {
+        onNonLateFiring(context);
+      } else if (lateTrigger != null) {
+        onLateFiring(context);
+      } else {
+        // all done
+        context.trigger().setFinished(true);
+      }
+    }
+
+    private void onNonLateFiring(Trigger<W>.TriggerContext context) throws Exception {
+      // We have not yet transitioned to late firings.
+      ExecutableTrigger<W> earlySubtrigger = context.trigger().subTrigger(EARLY_INDEX);
+      Trigger<W>.TriggerContext earlyContext = context.forTrigger(earlySubtrigger);
+
+      if (!endOfWindowReached(context)) {
+        // This is an early firing, since we have not arrived at the end of the window
+        // Implicitly repeats
+        earlySubtrigger.invokeOnFire(context);
+        earlySubtrigger.invokeClear(context);
+        earlyContext.trigger().setFinished(false);
+      } else {
+        // We have arrived at the end of the window; terminate the early trigger
+        // and clear out the late trigger's state
+        if (earlySubtrigger.invokeShouldFire(context)) {
+          earlySubtrigger.invokeOnFire(context);
+        }
+        earlyContext.trigger().setFinished(true);
+        earlySubtrigger.invokeClear(context);
+
+        if (lateTrigger == null) {
+          // Done if there is no late trigger.
+          context.trigger().setFinished(true);
+        } else {
+          // If there is a late trigger, we transition to it, and need to clear its state
+          // because it was run in parallel.
+          context.trigger().subTrigger(LATE_INDEX).invokeClear(context);
+        }
+      }
+
+    }
+
+    private void onLateFiring(Trigger<W>.TriggerContext context) throws Exception {
+      // We are firing the late trigger, with implicit repeat
+      ExecutableTrigger<W> lateSubtrigger = context.trigger().subTrigger(LATE_INDEX);
+      lateSubtrigger.invokeOnFire(context);
+      // It is a OnceTrigger, so it must have finished; unfinished it and clear it
+      lateSubtrigger.invokeClear(context);
+      context.forTrigger(lateSubtrigger).trigger().setFinished(false);
+    }
   }
 
   /**
@@ -496,42 +327,26 @@ public AfterWatermarkLate<W> withLateFirings(OnceTrigger<W> lateFirings) {
     }
 
     @Override
-    public TriggerResult onElement(OnElementContext c) throws Exception {
+    public void onElement(OnElementContext c) throws Exception {
       c.setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
-      return TriggerResult.CONTINUE;
     }
 
     @Override
-    public MergeResult onMerge(OnMergeContext c) throws Exception {
-      // If the watermark was past the end of a window that is past the end of the new window,
-      // then the watermark must also be past the end of this window. What's more, we've already
-      // fired some elements for that trigger firing, so we report FINISHED (without firing).
-      for (W finishedWindow : c.trigger().getFinishedMergingWindows()) {
-        if (!finishedWindow.maxTimestamp().isBefore(c.window().maxTimestamp())) {
-          return MergeResult.ALREADY_FINISHED;
-        }
-      }
-
-      // Otherwise, set a timer for this window, and return.
-      c.setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
-      return MergeResult.CONTINUE;
-    }
-
-    @Override
-    public TriggerResult onTimer(OnTimerContext c) throws Exception {
-      if (c.timeDomain() != TimeDomain.EVENT_TIME
-          || c.timestamp().isBefore(c.window().maxTimestamp())) {
-        return TriggerResult.CONTINUE;
+    public void onMerge(OnMergeContext c) throws Exception {
+      if (!c.trigger().finishedInAllMergingWindows()) {
+        // If the trigger is still active in any merging window then it is still active in the new
+        // merged window, because even if the merged window is "done" some pending elements haven't
+        // had a chance to fire
+        c.trigger().setFinished(false);
+      } else if (!endOfWindowReached(c)) {
+        // If the end of the new window has not been reached, then the trigger is active again.
+        c.trigger().setFinished(false);
       } else {
-        return TriggerResult.FIRE_AND_FINISH;
+        // Otherwise it is done for good
+        c.trigger().setFinished(true);
       }
     }
 
-    @Override
-    public void clear(TriggerContext c) throws Exception {
-      c.deleteTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
-    }
-
     @Override
     public Instant getWatermarkThatGuaranteesFiring(W window) {
       return window.maxTimestamp();
@@ -556,5 +371,18 @@ public boolean equals(Object obj) {
     public int hashCode() {
       return Objects.hash(getClass());
     }
+
+    @Override
+    public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
+      return endOfWindowReached(context);
+    }
+
+    private boolean endOfWindowReached(Trigger<W>.TriggerContext context) {
+      return context.currentEventTime() != null
+          && context.currentEventTime().isAfter(context.window().maxTimestamp());
+    }
+
+    @Override
+    protected void onOnlyFiring(Trigger<W>.TriggerContext context) throws Exception { }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
index ddcbe1ce7120d..9ac4abd894be2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
@@ -44,26 +44,25 @@ public static <W extends BoundedWindow> DefaultTrigger<W> of() {
   }
 
   @Override
-  public TriggerResult onElement(OnElementContext c) throws Exception {
-    c.setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
-    return TriggerResult.CONTINUE;
+  public void onElement(OnElementContext c) throws Exception {
+    // If the end of the window has already been reached, then we are already ready to fire
+    // and do not need to set a wake-up timer.
+    if (!endOfWindowReached(c)) {
+      c.setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
+    }
   }
 
   @Override
-  public MergeResult onMerge(OnMergeContext c) throws Exception {
-    c.setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
-    return MergeResult.CONTINUE;
+  public void onMerge(OnMergeContext c) throws Exception {
+    // If the end of the window has already been reached, then we are already ready to fire
+    // and do not need to set a wake-up timer.
+    if (!endOfWindowReached(c)) {
+      c.setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
+    }
   }
 
   @Override
-  public TriggerResult onTimer(OnTimerContext c) throws Exception {
-    return TriggerResult.FIRE;
-  }
-
-  @Override
-  public void clear(TriggerContext c) throws Exception {
-    c.deleteTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
-  }
+  public void clear(TriggerContext c) throws Exception { }
 
   @Override
   public Instant getWatermarkThatGuaranteesFiring(W window) {
@@ -80,4 +79,17 @@ public boolean isCompatible(Trigger<?> other) {
   public Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
     return this;
   }
+
+  @Override
+  public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
+    return endOfWindowReached(context);
+  }
+
+  private boolean endOfWindowReached(Trigger<W>.TriggerContext context) {
+    return context.currentEventTime() != null
+        && context.currentEventTime().isAfter(context.window().maxTimestamp());
+  }
+
+  @Override
+  public void onFire(Trigger<W>.TriggerContext context) throws Exception { }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java
index a2717d218993a..652092ad6e27a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java
@@ -15,6 +15,7 @@
  */
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.common.annotations.VisibleForTesting;
 
 import org.joda.time.Instant;
@@ -35,36 +36,17 @@ class OrFinallyTrigger<W extends BoundedWindow> extends Trigger<W> {
   }
 
   @Override
-  public Trigger.TriggerResult onElement(OnElementContext c) throws Exception {
-    Trigger.TriggerResult untilResult = c.trigger().subTrigger(UNTIL).invokeElement(c);
-    if (untilResult != TriggerResult.CONTINUE) {
-      return TriggerResult.FIRE_AND_FINISH;
-    }
-
-    return c.trigger().subTrigger(ACTUAL).invokeElement(c);
-  }
-
-  @Override
-  public Trigger.MergeResult onMerge(OnMergeContext c) throws Exception {
-    Trigger.MergeResult untilResult = c.trigger().subTrigger(UNTIL).invokeMerge(c);
-    if (untilResult == MergeResult.ALREADY_FINISHED) {
-      return MergeResult.ALREADY_FINISHED;
-    } else if (untilResult.isFire()) {
-      return MergeResult.FIRE_AND_FINISH;
-    } else {
-      // was CONTINUE -- so merge the underlying trigger
-      return c.trigger().subTrigger(ACTUAL).invokeMerge(c);
-    }
+  public void onElement(OnElementContext c) throws Exception {
+    c.trigger().subTrigger(ACTUAL).invokeOnElement(c);
+    c.trigger().subTrigger(UNTIL).invokeOnElement(c);
   }
 
   @Override
-  public Trigger.TriggerResult onTimer(OnTimerContext c) throws Exception {
-    Trigger.TriggerResult untilResult = c.trigger().subTrigger(UNTIL).invokeTimer(c);
-    if (untilResult != TriggerResult.CONTINUE) {
-      return TriggerResult.FIRE_AND_FINISH;
+  public void onMerge(OnMergeContext c) throws Exception {
+    for (ExecutableTrigger<W> subTrigger : c.trigger().subTriggers()) {
+      subTrigger.invokeOnMerge(c);
     }
-
-    return c.trigger().subTrigger(ACTUAL).invokeTimer(c);
+    updateFinishedState(c);
   }
 
   @Override
@@ -84,4 +66,35 @@ public Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers)
             continuationTriggers.get(ACTUAL),
             (Trigger.OnceTrigger<W>) continuationTriggers.get(UNTIL)));
   }
+
+  @Override
+  public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
+    return context.trigger().subTrigger(ACTUAL).invokeShouldFire(context)
+        || context.trigger().subTrigger(UNTIL).invokeShouldFire(context);
+  }
+
+  @Override
+  public void onFire(Trigger<W>.TriggerContext context) throws Exception {
+    ExecutableTrigger<W> actualSubtrigger = context.trigger().subTrigger(ACTUAL);
+    ExecutableTrigger<W> untilSubtrigger = context.trigger().subTrigger(UNTIL);
+
+    if (untilSubtrigger.invokeShouldFire(context)) {
+      untilSubtrigger.invokeOnFire(context);
+      actualSubtrigger.invokeClear(context);
+    } else {
+      // If until didn't fire, then the actual must have (or it is forbidden to call
+      // onFire) so we are done only if actual is done.
+      actualSubtrigger.invokeOnFire(context);
+      // Do not clear the until trigger, because it tracks data cross firings.
+    }
+    updateFinishedState(context);
+  }
+
+  private void updateFinishedState(TriggerContext c) throws Exception {
+    boolean anyStillFinished = false;
+    for (ExecutableTrigger<W> subTrigger : c.trigger().subTriggers()) {
+      anyStillFinished |= c.forTrigger(subTrigger).trigger().isFinished();
+    }
+    c.trigger().setFinished(anyStillFinished);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
index 79006867a3362..e77e2a1203384 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
+
 import org.joda.time.Instant;
 
 import java.util.Arrays;
@@ -57,31 +59,13 @@ private Repeatedly(Trigger<W> repeated) {
 
 
   @Override
-  public TriggerResult onElement(OnElementContext c)
-      throws Exception {
-    TriggerResult result = c.trigger().subTrigger(REPEATED).invokeElement(c);
-    if (result.isFinish()) {
-      c.forTrigger(c.trigger().subTrigger(REPEATED)).trigger().resetTree();
-    }
-    return result.isFire() ? TriggerResult.FIRE : TriggerResult.CONTINUE;
+  public void onElement(OnElementContext c) throws Exception {
+    getRepeated(c).invokeOnElement(c);
   }
 
   @Override
-  public MergeResult onMerge(OnMergeContext c) throws Exception {
-    MergeResult mergeResult = c.trigger().subTrigger(REPEATED).invokeMerge(c);
-    if (mergeResult.isFinish()) {
-      c.forTrigger(c.trigger().subTrigger(REPEATED)).trigger().resetTree();
-    }
-    return mergeResult.isFire() ? MergeResult.FIRE : MergeResult.CONTINUE;
-  }
-
-  @Override
-  public TriggerResult onTimer(OnTimerContext c) throws Exception {
-    TriggerResult result = c.trigger().subTrigger(REPEATED).invokeTimer(c);
-    if (result.isFinish()) {
-      c.forTrigger(c.trigger().subTrigger(REPEATED)).trigger().resetTree();
-    }
-    return result.isFire() ? TriggerResult.FIRE : TriggerResult.CONTINUE;
+  public void onMerge(OnMergeContext c) throws Exception {
+    getRepeated(c).invokeOnMerge(c);
   }
 
   @Override
@@ -94,4 +78,23 @@ public Instant getWatermarkThatGuaranteesFiring(W window) {
   public Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
     return new Repeatedly<W>(continuationTriggers.get(REPEATED));
   }
+
+  @Override
+  public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
+    return getRepeated(context).invokeShouldFire(context);
+  }
+
+  @Override
+  public void onFire(TriggerContext context) throws Exception {
+    getRepeated(context).invokeOnFire(context);
+
+    if (context.trigger().isFinished(REPEATED)) {
+      context.trigger().setFinished(false, REPEATED);
+      getRepeated(context).invokeClear(context);
+    }
+  }
+
+  private ExecutableTrigger<W> getRepeated(TriggerContext context) {
+    return context.trigger().subTrigger(REPEATED);
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 8b5038a81c775..1cb88ebd0eecd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -95,71 +95,6 @@
 @Experimental(Experimental.Kind.TRIGGER)
 public abstract class Trigger<W extends BoundedWindow> implements Serializable, TriggerBuilder<W> {
 
-  /**
-   * {@code TriggerResult} enumerates the possible result a trigger can have when it is executed.
-   */
-  public enum TriggerResult {
-    FIRE(true, false),
-    CONTINUE(false, false),
-    FIRE_AND_FINISH(true, true);
-
-    private boolean finish;
-    private boolean fire;
-
-    private TriggerResult(boolean fire, boolean finish) {
-      this.fire = fire;
-      this.finish = finish;
-    }
-
-    public boolean isFire() {
-      return fire;
-    }
-
-    public boolean isFinish() {
-      return finish;
-    }
-  }
-
-  /**
-   * {@code TriggerResult} enumerates the possible result a trigger can have when it is merged.
-   */
-  public enum MergeResult {
-    FIRE(true, false, TriggerResult.FIRE),
-    CONTINUE(false, false, TriggerResult.CONTINUE),
-    FIRE_AND_FINISH(true, true, TriggerResult.FIRE_AND_FINISH),
-
-    /**
-     * A trigger can only return {@code ALREADY_FINISHED} from {@code onMerge}, and it should only
-     * be returned if the trigger was previously finished in at least one window.
-     *
-     * <p>Returning this indicates that the sub-trigger should be treated as finished in the output
-     * window.
-     */
-    ALREADY_FINISHED(false, true, null);
-
-    private boolean finish;
-    private boolean fire;
-    private TriggerResult triggerResult;
-
-    private MergeResult(boolean fire, boolean finish, TriggerResult triggerResult) {
-      this.fire = fire;
-      this.finish = finish;
-      this.triggerResult = triggerResult;
-    }
-
-    public boolean isFire() {
-      return fire;
-    }
-
-    public boolean isFinish() {
-      return finish;
-    }
-
-    public TriggerResult getTriggerResult() {
-      return triggerResult;
-    }
-  }
-
   /**
    * Interface for accessing information about the trigger being executed and other triggers in the
    * same tree.
@@ -235,12 +170,18 @@ public interface MergingTriggerInfo<W extends BoundedWindow> extends TriggerInfo
     /** Return true if the trigger is finished in any window being merged. */
     public abstract boolean finishedInAnyMergingWindow();
 
+    /** Return true if the trigger is finished in all windows being merged. */
+    public abstract boolean finishedInAllMergingWindows();
+
     /** Return the merging windows in which the trigger is finished. */
     public abstract Iterable<W> getFinishedMergingWindows();
   }
 
   /**
-   * Information accessible to all of the callbacks that are executed on a {@link Trigger}.
+   * Information accessible to all operational hooks in this {@code Trigger}.
+   *
+   * <p>Used directly in {@link Trigger#shouldFire} and {@link Trigger#clear}, and
+   * extended with additional information in other methods.
    */
   public abstract class TriggerContext {
 
@@ -275,7 +216,8 @@ public abstract class TriggerContext {
   }
 
   /**
-   * Details about an invocation of {@link Trigger#onElement}.
+   * Extended {@link Context} containing information accessible to the {@link #onElement}
+   * operational hook.
    */
   public abstract class OnElementContext extends TriggerContext {
     /** The event timestamp of the element currently being processed. */
@@ -301,7 +243,8 @@ public abstract class OnElementContext extends TriggerContext {
   }
 
   /**
-   * Details about an invocation of {@link Trigger#onMerge}.
+   * Extended {@link Context} containing information accessible to the {@link #onMerge}
+   * operational hook.
    */
   public abstract class OnMergeContext extends TriggerContext {
     /** The old windows that were merged. */
@@ -332,24 +275,6 @@ public abstract class OnMergeContext extends TriggerContext {
     public abstract MergingTriggerInfo<W> trigger();
   }
 
-  /**
-   * Details about an invocation of {@link Trigger#onTimer}.
-   */
-  public abstract class OnTimerContext extends TriggerContext {
-
-    /** Returns the time that the timer was set for. */
-    public abstract Instant timestamp();
-
-    /** Returns the time domain that thet timer was set for. */
-    public abstract TimeDomain timeDomain();
-
-    /**
-     * Create an {@code OnTimerContext} for executing the given trigger.
-     */
-    @Override
-    public abstract OnTimerContext forTrigger(ExecutableTrigger<W> trigger);
-  }
-
   @Nullable
   protected final List<Trigger<W>> subTriggers;
 
@@ -361,28 +286,38 @@ protected Trigger(@Nullable List<Trigger<W>> subTriggers) {
   /**
    * Called immediately after an element is first incorporated into a window.
    */
-  public abstract TriggerResult onElement(OnElementContext c) throws Exception;
+  public abstract void onElement(OnElementContext c) throws Exception;
 
   /**
    * Called immediately after windows have been merged.
    *
-   * <p>Leaf triggers should determine their result by inspecting their status and any state
-   * in the merging windows. Composite triggers should determine their result by calling
+   * <p>Leaf triggers should update their state by inspecting their status and any state
+   * in the merging windows. Composite triggers should update their state by calling
    * {@link ExecutableTrigger#invokeMerge} on their sub-triggers, and applying appropriate logic.
    *
-   * <p>A trigger can only return {@link MergeResult#ALREADY_FINISHED} if it is marked as finished
-   * in at least one of the windows being merged.
+   * <p>A trigger such as {@link AfterWatermark#pastEndOfWindow} may no longer be finished;
+   * it is the responsibility of the trigger itself to record this fact. It is forbidden for
+   * a trigger to become finished due to {@link onMerge}, as it has not yet fired the pending
+   * elements that led to it being ready to fire.
    *
    * <p>The implementation does not need to clear out any state associated with the old windows.
    */
-  public abstract MergeResult onMerge(OnMergeContext c) throws Exception;
+  public abstract void onMerge(OnMergeContext c) throws Exception;
 
   /**
-   * Called when a timer has fired for the current window. Composite triggers should pass the event
-   * to all sub-triggers. Triggers that set timers should verify the timer matches what they set
-   * before processing the firing.
+   * Returns {@code true} if the current state of the trigger indicates that its condition
+   * is satisfied and it is ready to fire.
    */
-  public abstract TriggerResult onTimer(OnTimerContext c) throws Exception;
+  public abstract boolean shouldFire(TriggerContext context) throws Exception;
+
+  /**
+   * Adjusts the state of the trigger to be ready for the next pane. For example, a
+   * {@link Repeatedly} trigger will reset its inner trigger, since it has fired.
+   *
+   * <p>If the trigger is finished, it is the responsibility of the trigger itself to
+   * record that fact via the {@code context}.
+   */
+  public abstract void onFire(TriggerContext context) throws Exception;
 
   /**
    * Called to allow the trigger to prefetch any state it will likely need to read from during
@@ -414,14 +349,28 @@ public void prefetchOnMerge(ReduceFn.MergingStateContext state) {
 
   /**
    * Called to allow the trigger to prefetch any state it will likely need to read from during
-   * an {@link #onTimer} call.
+   * an {@link #shouldFire} call.
+   *
+   * @param state {@link ReduceFn.StateContext} to prefetch from.
+   */
+  public void prefetchShouldFire(ReduceFn.StateContext state) {
+    if (subTriggers != null) {
+      for (Trigger<W> trigger : subTriggers) {
+        trigger.prefetchShouldFire(state);
+      }
+    }
+  }
+
+  /**
+   * Called to allow the trigger to prefetch any state it will likely need to read from during
+   * an {@link #onFire} call.
    *
-   * @param state {@lijnk ReduceFn.StateContext} to prefetch from.
+   * @param state {@link ReduceFn.StateContext} to prefetch from.
    */
-  public void prefetchOnTimer(ReduceFn.StateContext state) {
+  public void prefetchOnFire(ReduceFn.StateContext state) {
     if (subTriggers != null) {
       for (Trigger<W> trigger : subTriggers) {
-        trigger.prefetchOnTimer(state);
+        trigger.prefetchOnFire(state);
       }
     }
   }
@@ -588,5 +537,20 @@ public final OnceTrigger<W> getContinuationTrigger() {
       }
       return (OnceTrigger<W>) continuation;
     }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public final void onFire(TriggerContext context) throws Exception {
+      onOnlyFiring(context);
+      context.trigger().setFinished(true);
+    }
+
+    /**
+     * Called exactly once by {@link #onFire} when the trigger is fired. By default,
+     * invokes {@link #onFire} on all subtriggers for which {@link #shouldFire} is {@code true}.
+     */
+    protected abstract void onOnlyFiring(TriggerContext context) throws Exception;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
index 326b1c3876a68..22a3762dfc105 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
@@ -18,9 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.common.base.Preconditions;
 
 import java.io.Serializable;
@@ -120,47 +118,25 @@ public ExecutableTrigger<W> getSubTriggerContaining(int index) {
    * Invoke the {@link Trigger#onElement} method for this trigger, ensuring that the bits are
    * properly updated if the trigger finishes.
    */
-  public TriggerResult invokeElement(Trigger<W>.OnElementContext c) throws Exception {
-    Trigger<W>.OnElementContext subContext = c.forTrigger(this);
-    if (subContext.trigger().isFinished()) {
-      throw new IllegalStateException("Shouldn't invokeElement on finished triggers.");
-    }
-
-    Trigger.TriggerResult result = trigger.onElement(subContext);
-
-    if (result.isFinish()) {
-      subContext.trigger().setFinished(true);
-    }
-
-    return result;
-  }
-
-  /**
-   * Invoke the {@link Trigger#onTimer} method for this trigger, ensuring that the bits are properly
-   * updated if the trigger finishes.
-   */
-  public TriggerResult invokeTimer(Trigger<W>.OnTimerContext c) throws Exception {
-    Trigger<W>.OnTimerContext subContext = c.forTrigger(this);
-    if (subContext.trigger().isFinished()) {
-      throw new IllegalStateException("Shouldn't invokeTimer on finished triggers.");
-    }
-
-    Trigger.TriggerResult result = trigger.onTimer(subContext);
-    if (result.isFinish()) {
-      subContext.trigger().setFinished(true);
-    }
-    return result;
+  public void invokeOnElement(Trigger<W>.OnElementContext c) throws Exception {
+    trigger.onElement(c.forTrigger(this));
   }
 
   /**
    * Invoke the {@link Trigger#onMerge} method for this trigger, ensuring that the bits are properly
    * updated.
    */
-  public MergeResult invokeMerge(Trigger<W>.OnMergeContext c) throws Exception {
+  public void invokeOnMerge(Trigger<W>.OnMergeContext c) throws Exception {
     Trigger<W>.OnMergeContext subContext = c.forTrigger(this);
-    Trigger.MergeResult result = trigger.onMerge(subContext);
-    subContext.trigger().setFinished(result.isFinish());
-    return result;
+    trigger.onMerge(subContext);
+  }
+
+  public boolean invokeShouldFire(Trigger<W>.TriggerContext c) throws Exception {
+    return trigger.shouldFire(c.forTrigger(this));
+  }
+
+  public void invokeOnFire(Trigger<W>.TriggerContext c) throws Exception {
+    trigger.onFire(c.forTrigger(this));
   }
 
   /**
@@ -179,32 +155,5 @@ private static class ExecutableOnceTrigger<W extends BoundedWindow> extends Exec
     public ExecutableOnceTrigger(OnceTrigger<W> trigger, int nextUnusedIndex) {
       super(trigger, nextUnusedIndex);
     }
-
-    @Override
-    public TriggerResult invokeElement(Trigger<W>.OnElementContext c) throws Exception {
-      TriggerResult result = super.invokeElement(c);
-      if (TriggerResult.FIRE.equals(result)) {
-        throw new IllegalStateException("TriggerResult.FIRE returned from once trigger");
-      }
-      return result;
-    }
-
-    @Override
-    public TriggerResult invokeTimer(Trigger<W>.OnTimerContext c) throws Exception {
-      TriggerResult result = super.invokeTimer(c);
-      if (TriggerResult.FIRE.equals(result)) {
-        throw new IllegalStateException("TriggerResult.FIRE returned from once trigger");
-      }
-      return result;
-    }
-
-    @Override
-    public MergeResult invokeMerge(Trigger<W>.OnMergeContext c) throws Exception {
-      MergeResult result = super.invokeMerge(c);
-      if (MergeResult.FIRE.equals(result)) {
-        throw new IllegalStateException("MergeResult.FIRE returned from once trigger");
-      }
-      return result;
-    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
index e0d7c2e22c359..baa48cc7a78ce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
@@ -15,6 +15,8 @@
  */
 package com.google.cloud.dataflow.sdk.util;
 
+import static com.google.common.base.Preconditions.checkNotNull;
+
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
@@ -150,7 +152,7 @@ public StateContextImpl(
       this.activeWindows = activeWindows;
       this.windowCoder = windowCoder;
       this.stateInternals = stateInternals;
-      this.window = window;
+      this.window = checkNotNull(window);
       this.windowNamespace = namespaceFor(window);
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index d1276800f58bf..a5cd11b442a0b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -21,7 +21,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.ActiveWindowSet.MergeCallback;
@@ -35,7 +34,6 @@
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Throwables;
-import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 
 import org.joda.time.Duration;
@@ -44,8 +42,8 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.List;
-import java.util.Map;
 import java.util.Set;
 
 /**
@@ -239,26 +237,23 @@ boolean isFinished(W window) {
    * holds, and invoking {@link ReduceFn#onTrigger}.
    * </ol>
    */
-  public void processElements(Iterable<WindowedValue<InputT>> values) {
-    // Map from element window to the result of running its trigger.
-    final Map<W, TriggerResult> results = Maps.newHashMap();
-
-    if (!windowingStrategy.getWindowFn().isNonMerging()) {
-      // If an incoming element introduces a new window, attempt to merge it into an existing
-      // window eagerly. Otherwise track which state address windows are used to store the state
-      // for each merged, active window.
-      collectAndMergeWindows(values, results);
-    }
+  public void processElements(Iterable<WindowedValue<InputT>> values) throws Exception {
+    // If an incoming element introduces a new window, attempt to merge it into an existing
+    // window eagerly. The outcome is stored in the ActiveWindowSet.
+    collectAndMergeWindows(values);
+
+    Set<W> windowsToConsider = new HashSet<>();
 
     // Process each element, using the updated activeWindows determined by collectAndMergeWindows.
     for (WindowedValue<InputT> value : values) {
-      processElement(results, value);
+      windowsToConsider.addAll(processElement(value));
     }
 
-    // Trigger output from any window that was triggered by merging or processing elements.
-    for (Map.Entry<W, TriggerResult> result : results.entrySet()) {
-      handleTriggerResult(
-          contextFactory.base(result.getKey()), false/*isEndOfWindow*/, result.getValue());
+    // Trigger output from any window for which the trigger is ready
+    for (W mergedWindow : windowsToConsider) {
+      ReduceFn<K, InputT, OutputT, W>.Context context = contextFactory.base(mergedWindow);
+      triggerRunner.prefetchShouldFire(context.state());
+      emitIfAppropriate(context, false /* isEndOfWindow */);
     }
 
     // We're all done with merging and emitting elements so can compress the activeWindow state.
@@ -278,13 +273,13 @@ private boolean canDropDueToExpiredWindow(W window) {
 
   /**
    * Extract the windows associated with the values, and invoke merge.
-   *
-   * @param results an output parameter that accumulates all of the windows that have had the
-   *     trigger return FIRE or FIRE_AND_FINISH. Once present in this map, it is no longer
-   *     necessary to evaluate triggers for the given window.
    */
-  private void collectAndMergeWindows(
-      Iterable<WindowedValue<InputT>> values, final Map<W, TriggerResult> results) {
+  private void collectAndMergeWindows(Iterable<WindowedValue<InputT>> values) {
+    // No-op if no merging can take place
+    if (windowingStrategy.getWindowFn().isNonMerging()) {
+      return;
+    }
+
     Set<W> currentlyActiveWindows = Sets.newHashSet(activeWindows.getActiveWindows());
 
     // Collect the windows from all elements (except those which are too late) and
@@ -312,7 +307,7 @@ private void collectAndMergeWindows(
     }
 
     // Merge all of the active windows and retain a mapping from source windows to result windows.
-    mergeActiveWindows(results);
+    mergeActiveWindows();
 
     // Make sure we've scheduled timers for any ACTIVE windows we just introduced.
     // (Timers for ACTIVE windows which are now MERGED will have been discarded above.)
@@ -321,14 +316,7 @@ private void collectAndMergeWindows(
     }
   }
 
-  /**
-   * Merge the active windows.
-   *
-   * @param results an output parameter that accumulates all of the windows that have had the
-   *     trigger return FIRE or FIRE_AND_FINISH. Once present in this map, it is no longer
-   *     necessary to evaluate triggers for the given window.
-   */
-  private void mergeActiveWindows(final Map<W, TriggerResult> results) {
+  private void mergeActiveWindows() {
     try {
       activeWindows.merge(new MergeCallback<W>() {
         @Override
@@ -352,19 +340,14 @@ public void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W
           // Otherwise, leave all the merging window watermark holds where they are.
           watermarkHold.onMerge(mergeResultContext);
 
-          // Have the trigger merge state as needed, and handle the result.
-          TriggerResult result;
+          // Have the trigger merge state as needed
           try {
-            result = triggerRunner.onMerge(mergeResultContext);
+            triggerRunner.onMerge(mergeResultContext);
           } catch (Exception e) {
             Throwables.propagateIfPossible(e);
             throw new RuntimeException("Failed to merge the triggers", e);
           }
 
-          if (result.isFire()) {
-            results.put(mergeResult, result);
-          }
-
           for (W active : activeToBeMerged) {
             if (active.equals(mergeResult)) {
               // Not merged away.
@@ -393,16 +376,15 @@ public void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W
   }
 
   /**
-   * @param results a record of all of the windows that have had the trigger return FIRE or
-   *     FIRE_AND_FINISH. Once present in this map, it is no longer necessary to evaluate triggers
-   *     for the given result.
+   * Process an element.
    * @param value the value being processed
+   *
+   * @return the set of windows in which the element was actually processed
    */
-  private void processElement(Map<W, TriggerResult> results, WindowedValue<InputT> value) {
+  private Collection<W> processElement(WindowedValue<InputT> value) {
     // Redirect element windows to the ACTIVE windows they have been merged into.
     // It is possible two of the element's windows have been merged into the same window.
     // In that case we'll process the same element for the same window twice.
-    @SuppressWarnings("unchecked")
     Collection<W> windows = new ArrayList<>();
     for (BoundedWindow untypedWindow : value.getWindows()) {
       @SuppressWarnings("unchecked")
@@ -424,11 +406,9 @@ private void processElement(Map<W, TriggerResult> results, WindowedValue<InputT>
 
     // Prefetch in each of the windows if we're going to need to process triggers
     for (W window : windows) {
-      if (!results.containsKey(window)) {
-        ReduceFn<K, InputT, OutputT, W>.ProcessValueContext context =
-            contextFactory.forValue(window, value.getValue(), value.getTimestamp());
-        triggerRunner.prefetchForValue(context.state());
-      }
+      ReduceFn<K, InputT, OutputT, W>.ProcessValueContext context =
+          contextFactory.forValue(window, value.getValue(), value.getTimestamp());
+      triggerRunner.prefetchForValue(context.state());
     }
 
     // Process the element for each (representative) window it belongs to.
@@ -437,13 +417,13 @@ private void processElement(Map<W, TriggerResult> results, WindowedValue<InputT>
           contextFactory.forValue(window, value.getValue(), value.getTimestamp());
 
       // Check to see if the triggerRunner thinks the window is closed. If so, drop that window.
-      if (!results.containsKey(window) && triggerRunner.isClosed(context.state())) {
+      if (triggerRunner.isClosed(context.state())) {
         droppedDueToClosedWindow.addValue(1L);
         WindowTracing.debug(
             "ReduceFnRunner.processElement: Dropping element at {} for key:{}; window:{} "
-            + "since window is no longer active at inputWatermark:{}; outputWatermark:{}",
-            value.getTimestamp(), key, window, timerInternals.currentInputWatermarkTime(),
-            timerInternals.currentOutputWatermarkTime());
+                + "since window is no longer active at inputWatermark:{}; outputWatermark:{}",
+                value.getTimestamp(), key, window, timerInternals.currentInputWatermarkTime(),
+                timerInternals.currentOutputWatermarkTime());
         continue;
       }
 
@@ -468,19 +448,16 @@ private void processElement(Map<W, TriggerResult> results, WindowedValue<InputT>
         throw wrapMaybeUserException(e);
       }
 
-      // Run the trigger and handle the result as appropriate
-      if (!results.containsKey(window)) {
-        try {
-          TriggerResult result = triggerRunner.processValue(context);
-          if (result.isFire()) {
-            results.put(window, result);
-          }
-        } catch (Exception e) {
-          Throwables.propagateIfPossible(e);
-          throw new RuntimeException("Failed to run trigger", e);
-        }
+      // Run the trigger to update its state
+      try {
+        triggerRunner.processValue(context);
+      } catch (Exception e) {
+        Throwables.propagateIfPossible(e);
+        throw new RuntimeException("Failed to run trigger", e);
       }
     }
+
+    return windows;
   }
 
   /**
@@ -523,12 +500,11 @@ public void onTimer(TimerData timer) {
 
       if (activeWindows.isActive(window) && !triggerRunner.isClosed(context.state())) {
         // We need to call onTrigger to emit the final pane if required.
-        // The final pane *may* be ON_TIME if:
-        // - AllowedLateness = 0 (ie the timer is at end-of-window), and;
-        // - The trigger fires on the end-of-window timer.
-        boolean isWatermarkTrigger =
-            isEndOfWindowTimer && runTriggersForTimer(context, timer).isFire();
-        onTrigger(context, isWatermarkTrigger, true/*isFinish*/, false/*willStillBeActive*/);
+        // The final pane *may* be ON_TIME if no prior ON_TIME pane has been emitted,
+        // and the watermark has passed the end of the window.
+        boolean isWatermarkTrigger = isEndOfWindowTimer;
+        onTrigger(context, isWatermarkTrigger,
+            true /* isFinished */, false /* willStillBeActive */);
       }
 
       // Clear all the state for this window since we'll never see elements for it again.
@@ -546,8 +522,12 @@ public void onTimer(TimerData timer) {
           key, window, timer.getTimestamp(), timerInternals.currentInputWatermarkTime(),
           timerInternals.currentOutputWatermarkTime());
       if (activeWindows.isActive(window) && !triggerRunner.isClosed(context.state())) {
-        TriggerResult result = runTriggersForTimer(context, timer);
-        handleTriggerResult(context, isEndOfWindowTimer, result);
+        try {
+          emitIfAppropriate(context, isEndOfWindowTimer);
+        } catch (Exception e) {
+          Throwables.propagateIfPossible(e);
+          throw Throwables.propagate(e);
+        }
       }
 
       if (isEndOfWindowTimer) {
@@ -569,18 +549,6 @@ public void onTimer(TimerData timer) {
     }
   }
 
-  private TriggerResult runTriggersForTimer(
-      ReduceFn<K, InputT, OutputT, W>.Context context, TimerData timer) {
-    triggerRunner.prefetchForTimer(context.state());
-
-    try {
-      return triggerRunner.onTimer(context, timer);
-    } catch (Exception e) {
-      Throwables.propagateIfPossible(e);
-      throw new RuntimeException("Exception in onTimer for trigger", e);
-    }
-  }
-
   /**
    * Clear all the state associated with {@code context}'s window.
    * Should only be invoked if we know all future elements for this window will be considered
@@ -609,8 +577,8 @@ private void clearAllState(ReduceFn<K, InputT, OutputT, W>.Context context) thro
   }
 
   /** Should the reduce function state be cleared? */
-  private boolean shouldDiscardAfterFiring(TriggerResult result) {
-    if (result.isFinish()) {
+  private boolean shouldDiscardAfterFiring(boolean isFinished) {
+    if (isFinished) {
       // This is the last firing for trigger.
       return true;
     }
@@ -622,25 +590,26 @@ private boolean shouldDiscardAfterFiring(TriggerResult result) {
   }
 
   /**
-   * Possibly emit a pane if a trigger fired or timers require it, and cleanup state.
+   * Possibly emit a pane if a trigger is ready to fire or timers require it, and cleanup state.
    */
-  private void handleTriggerResult(ReduceFn<K, InputT, OutputT, W>.Context context,
-      boolean isEndOfWindow, TriggerResult result) {
-    if (!result.isFire()) {
-      // Ignore unless trigger fired.
+  private void emitIfAppropriate(ReduceFn<K, InputT, OutputT, W>.Context context,
+      boolean isEndOfWindow) throws Exception {
+    if (!triggerRunner.shouldFire(context)) {
+      // Ignore unless trigger is ready to fire
       return;
     }
 
-    // If the trigger fired due to an end-of-window timer, treat it as an AfterWatermark trigger.
-    boolean isWatermarkTrigger = isEndOfWindow;
+    // Inform the trigger of the transition to see if it is finished
+    triggerRunner.onFire(context);
+    boolean isFinished = triggerRunner.isClosed(context.state());
 
     // Will be able to clear all element state after triggering?
-    boolean shouldDiscard = shouldDiscardAfterFiring(result);
+    boolean shouldDiscard = shouldDiscardAfterFiring(isFinished);
 
     // Run onTrigger to produce the actual pane contents.
     // As a side effect it will clear all element holds, but not necessarily any
     // end-of-window or garbage collection holds.
-    onTrigger(context, isWatermarkTrigger, result.isFinish(), !shouldDiscard);
+    onTrigger(context, isEndOfWindow, isFinished, !shouldDiscard);
 
     // Now that we've triggered, the pane is empty.
     nonEmptyPanes.clearPane(context);
@@ -660,16 +629,16 @@ private void handleTriggerResult(ReduceFn<K, InputT, OutputT, W>.Context context
       WindowTracing.debug("ReduceFnRunner.handleTriggerResult: removing {}", context.window());
       activeWindows.remove(context.window());
 
-      if (!result.isFinish()) {
+      if (!triggerRunner.isClosed(context.state())) {
         // We still need to consider this window active since we may have had to add an
         // end-of-window or garbage collection hold above.
         activeWindows.addActive(context.window());
       }
     }
 
-    if (result.isFinish()) {
-      // If we're finishing, eagerly clear state to reduce pressure on the backend.
-      // Leave behind a tombstone in the trigger runner so we know the trigger is finished.
+    if (triggerRunner.isClosed(context.state())) {
+      // If we're finishing, clear up the trigger tree as well.
+      // However, we'll leave behind a tombstone so we know the trigger is finished.
       try {
         triggerRunner.clearState(context);
         paneInfoTracker.clear(context.state());
@@ -686,7 +655,7 @@ private void handleTriggerResult(ReduceFn<K, InputT, OutputT, W>.Context context
    * Do we need to emit a pane?
    */
   private boolean needToEmit(
-      boolean isEmpty, boolean isWatermarkTrigger, boolean isFinish, PaneInfo.Timing timing) {
+      boolean isEmpty, boolean isWatermarkTrigger, boolean isFinished, PaneInfo.Timing timing) {
     if (!isEmpty) {
       // The pane has elements.
       return true;
@@ -695,7 +664,8 @@ private boolean needToEmit(
       // This is the unique ON_TIME pane, triggered by an AfterWatermark.
       return true;
     }
-    if (isFinish && windowingStrategy.getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS) {
+    if (isFinished
+        && windowingStrategy.getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS) {
       // This is known to be the final pane, and the user has requested it even when empty.
       return true;
     }
@@ -710,15 +680,16 @@ private boolean needToEmit(
    * @param isFinish true if this will be the last triggering processed
    */
   private void onTrigger(final ReduceFn<K, InputT, OutputT, W>.Context context,
-      boolean isWatermarkTrigger, boolean isFinish, boolean willStillBeActive) {
+      boolean isWatermarkTrigger, boolean isFinished, boolean willStillBeActive) {
     // Collect state.
-    StateContents<Instant> outputTimestampFuture =
-        watermarkHold.extractAndRelease(context, isFinish, willStillBeActive);
-    StateContents<PaneInfo> paneFuture =
-        paneInfoTracker.getNextPaneInfo(context, isWatermarkTrigger, isFinish);
+    StateContents<Instant> outputTimestampFuture = watermarkHold.extractAndRelease(
+        context, isFinished, willStillBeActive);
+    StateContents<PaneInfo> paneFuture = paneInfoTracker.getNextPaneInfo(
+        context, isWatermarkTrigger, isFinished);
     StateContents<Boolean> isEmptyFuture = nonEmptyPanes.isEmpty(context);
 
     reduceFn.prefetchOnTrigger(context.state());
+    triggerRunner.prefetchOnFire(context.state());
 
     // Calculate the pane info.
     final PaneInfo pane = paneFuture.read();
@@ -726,7 +697,7 @@ private void onTrigger(final ReduceFn<K, InputT, OutputT, W>.Context context,
     final Instant outputTimestamp = outputTimestampFuture.read();
 
     // Only emit a pane if it has data or empty panes are observable.
-    if (needToEmit(isEmptyFuture.read(), isWatermarkTrigger, isFinish, pane.getTiming())) {
+    if (needToEmit(isEmptyFuture.read(), isWatermarkTrigger, isFinished, pane.getTiming())) {
       // Run reduceFn.onTrigger method.
       final List<W> windows = Collections.singletonList(context.window());
       ReduceFn<K, InputT, OutputT, W>.OnTriggerContext triggerContext = contextFactory.forTrigger(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReshuffleTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReshuffleTrigger.java
index f27ff380e01e9..248f00589d936 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReshuffleTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReshuffleTrigger.java
@@ -35,20 +35,10 @@ public class ReshuffleTrigger<W extends BoundedWindow> extends Trigger<W> {
   }
 
   @Override
-  public TriggerResult onElement(Trigger<W>.OnElementContext c) {
-    return TriggerResult.FIRE;
-  }
+  public void onElement(Trigger<W>.OnElementContext c) { }
 
   @Override
-  public MergeResult onMerge(Trigger<W>.OnMergeContext c) {
-    // We fire on every element, so merging will never produce a pane containing new elements.
-    return MergeResult.CONTINUE;
-  }
-
-  @Override
-  public TriggerResult onTimer(Trigger<W>.OnTimerContext c) {
-    return TriggerResult.CONTINUE;
-  }
+  public void onMerge(Trigger<W>.OnMergeContext c) { }
 
   @Override
   protected Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
@@ -60,4 +50,12 @@ public Instant getWatermarkThatGuaranteesFiring(W window) {
     throw new UnsupportedOperationException(
         "ReshuffleTrigger should not be used outside of Reshuffle");
   }
+
+  @Override
+  public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
+    return true;
+  }
+
+  @Override
+  public void onFire(Trigger<W>.TriggerContext context) throws Exception { }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
index 86e17eb1cca60..83ea415d4d1fb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
@@ -67,14 +67,6 @@ public Trigger<W>.OnElementContext createOnElementContext(
         elementTimestamp);
   }
 
-  public Trigger<W>.OnTimerContext createOnTimerContext(
-      W window, ReduceFn.Timers timers,
-      ExecutableTrigger<W> rootTrigger, FinishedTriggers finishedSet,
-      Instant timestamp, TimeDomain domain) {
-    return new OnTimerContextImpl(
-        window, timers, rootTrigger, finishedSet, timestamp, domain);
-  }
-
   public Trigger<W>.OnMergeContext createOnMergeContext(
       W window, ReduceFn.Timers timers, Collection<W> mergingWindows,
       ExecutableTrigger<W> rootTrigger, FinishedTriggers finishedSet,
@@ -233,6 +225,16 @@ public boolean finishedInAnyMergingWindow() {
       return false;
     }
 
+    @Override
+    public boolean finishedInAllMergingWindows() {
+      for (FinishedTriggers finishedSet : finishedSets.values()) {
+        if (!finishedSet.isFinished(trigger)) {
+          return false;
+        }
+      }
+      return true;
+    }
+
     @Override
     public Iterable<W> getFinishedMergingWindows() {
       return Maps.filterValues(finishedSets, new Predicate<FinishedTriggers>() {
@@ -409,83 +411,6 @@ public Instant currentEventTime() {
     }
   }
 
-  private class OnTimerContextImpl extends Trigger<W>.OnTimerContext {
-
-    private final ReduceFnContextFactory.StateContextImpl<W> state;
-    private final ReduceFn.Timers timers;
-    private final TriggerInfoImpl triggerInfo;
-    private final Instant timestamp;
-    private final TimeDomain domain;
-
-    private OnTimerContextImpl(
-        W window,
-        ReduceFn.Timers timers,
-        ExecutableTrigger<W> trigger,
-        FinishedTriggers finishedSet,
-        Instant timestamp,
-        TimeDomain domain) {
-      trigger.getSpec().super();
-      this.state = triggerState(window, trigger);
-      this.timers = new TriggerTimers(window, timers);
-      this.triggerInfo = new TriggerInfoImpl(trigger, finishedSet, this);
-      this.timestamp = timestamp;
-      this.domain = domain;
-    }
-
-    @Override
-    public Trigger<W>.OnTimerContext forTrigger(ExecutableTrigger<W> trigger) {
-      return new OnTimerContextImpl(
-          state.window(), timers, trigger, triggerInfo.finishedSet, timestamp, domain);
-    }
-
-    @Override
-    public TriggerInfo<W> trigger() {
-      return triggerInfo;
-    }
-
-    @Override
-    public ReduceFn.StateContext state() {
-      return state;
-    }
-
-    @Override
-    public W window() {
-      return state.window();
-    }
-
-    @Override
-    public Instant timestamp() {
-      return timestamp;
-    }
-
-    @Override
-    public TimeDomain timeDomain() {
-      return domain;
-    }
-
-    @Override
-    public void deleteTimer(Instant timestamp, TimeDomain domain) {
-      timers.deleteTimer(timestamp, domain);
-    }
-
-    @Override
-    public Instant currentProcessingTime() {
-      return timers.currentProcessingTime();
-    }
-
-    @Override
-    @Nullable
-    public Instant currentSynchronizedProcessingTime() {
-      return timers.currentSynchronizedProcessingTime();
-    }
-
-    @Override
-    @Nullable
-    public Instant currentEventTime() {
-      return timers.currentEventTime();
-    }
-  }
-
   private class OnMergeContextImpl extends Trigger<W>.OnMergeContext {
 
     private final ReduceFnContextFactory.MergingStateContextImpl<W> state;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
index 1fd077f0a473b..c846dac86057e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
@@ -18,9 +18,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.cloud.dataflow.sdk.util.state.ValueState;
@@ -84,18 +81,31 @@ public void prefetchForValue(ReduceFn.StateContext state) {
     rootTrigger.getSpec().prefetchOnElement(state);
   }
 
+  public void prefetchOnFire(ReduceFn.StateContext state) {
+    if (isFinishedSetNeeded()) {
+      state.access(FINISHED_BITS_TAG).get();
+    }
+    rootTrigger.getSpec().prefetchOnFire(state);
+  }
+
+  public void prefetchShouldFire(ReduceFn.StateContext state) {
+    if (isFinishedSetNeeded()) {
+      state.access(FINISHED_BITS_TAG).get();
+    }
+    rootTrigger.getSpec().prefetchShouldFire(state);
+  }
+
   /**
    * Run the trigger logic to deal with a new value.
    */
-  public TriggerResult processValue(ReduceFn<?, ?, ?, W>.ProcessValueContext c) throws Exception {
+  public void processValue(ReduceFn<?, ?, ?, W>.ProcessValueContext c) throws Exception {
     // Clone so that we can detect changes and so that changes here don't pollute merging.
     FinishedTriggersBitSet finishedSet =
         readFinishedBits(c.state().access(FINISHED_BITS_TAG)).copy();
     Trigger<W>.OnElementContext triggerContext = contextFactory.createOnElementContext(
         c.window(), c.timers(), c.timestamp(), rootTrigger, finishedSet);
-    TriggerResult result = rootTrigger.invokeElement(triggerContext);
+    rootTrigger.invokeOnElement(triggerContext);
     persistFinishedSet(c.state(), finishedSet);
-    return result;
   }
 
   public void prefetchForMerge(ReduceFn.MergingStateContext state) {
@@ -111,7 +121,7 @@ public void prefetchForMerge(ReduceFn.MergingStateContext state) {
   /**
    * Run the trigger merging logic as part of executing the specified merge.
    */
-  public TriggerResult onMerge(ReduceFn<?, ?, ?, W>.OnMergeContext c) throws Exception {
+  public void onMerge(ReduceFn<?, ?, ?, W>.OnMergeContext c) throws Exception {
     // Clone so that we can detect changes and so that changes here don't pollute merging.
     FinishedTriggersBitSet finishedSet =
         readFinishedBits(c.state().access(FINISHED_BITS_TAG)).copy();
@@ -130,35 +140,25 @@ public TriggerResult onMerge(ReduceFn<?, ?, ?, W>.OnMergeContext c) throws Excep
             finishedSet, mergingFinishedSets.build());
 
     // Run the merge from the trigger
-    MergeResult result = rootTrigger.invokeMerge(mergeContext);
-    if (MergeResult.ALREADY_FINISHED.equals(result)) {
-      throw new IllegalStateException("Root trigger returned MergeResult.ALREADY_FINISHED.");
-    }
+    rootTrigger.invokeOnMerge(mergeContext);
 
     persistFinishedSet(c.state(), finishedSet);
-    return result.getTriggerResult();
   }
 
-  public void prefetchForTimer(ReduceFn.StateContext state) {
-    if (isFinishedSetNeeded()) {
-      state.access(FINISHED_BITS_TAG).get();
-    }
-    rootTrigger.getSpec().prefetchOnElement(state);
+  public boolean shouldFire(ReduceFn<?, ?, ?, W>.Context c) throws Exception {
+    FinishedTriggers finishedSet = readFinishedBits(c.state().access(FINISHED_BITS_TAG)).copy();
+    Trigger<W>.TriggerContext context = contextFactory.base(c.window(), c.timers(),
+        rootTrigger, finishedSet);
+    return rootTrigger.invokeShouldFire(context);
   }
 
-  /**
-   * Run the trigger logic appropriate for receiving a timer with the specified destination ID.
-   */
-  public TriggerResult onTimer(ReduceFn<?, ?, ?, W>.Context c, TimerData timer) throws Exception {
-    // Clone so that we can detect changes and so that changes here don't pollute merging.
+  public void onFire(ReduceFn<?, ?, ?, W>.Context c) throws Exception {
     FinishedTriggersBitSet finishedSet =
         readFinishedBits(c.state().access(FINISHED_BITS_TAG)).copy();
-    Trigger<W>.OnTimerContext triggerContext = contextFactory.createOnTimerContext(
-        c.window(), c.timers(), rootTrigger, finishedSet,
-        timer.getTimestamp(), timer.getDomain());
-    TriggerResult result = rootTrigger.invokeTimer(triggerContext);
+    Trigger<W>.TriggerContext context = contextFactory.base(c.window(), c.timers(),
+        rootTrigger, finishedSet);
+    rootTrigger.invokeOnFire(context);
     persistFinishedSet(c.state(), finishedSet);
-    return result;
   }
 
   private void persistFinishedSet(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
index 9a28d040c4d91..64341b9180597 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
@@ -30,6 +30,8 @@
 import java.util.List;
 import java.util.Objects;
 
+import javax.annotation.Nullable;
+
 /**
  * In-memory implementation of {@link StateInternals}. Used in {@code BatchModeExecutionContext}
  * and for running tests that need state.
@@ -127,6 +129,8 @@ private final class WatermarkStateInternalImplementation
       implements WatermarkStateInternal, InMemoryState {
 
     private final OutputTimeFn<?> outputTimeFn;
+
+    @Nullable
     private Instant combinedHold = null;
 
     public WatermarkStateInternalImplementation(OutputTimeFn<?> outputTimeFn) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
index dde39c21b4c7a..06f0c3f759b22 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAllTest.java
@@ -16,18 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.everyItem;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.TriggerTester.SimpleTriggerTester;
 
@@ -36,222 +29,115 @@
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
-import org.mockito.Mock;
-import org.mockito.Mockito;
-import org.mockito.MockitoAnnotations;
 
 /**
  * Tests for {@link AfterAll}.
  */
 @RunWith(JUnit4.class)
 public class AfterAllTest {
-  @Mock private OnceTrigger<IntervalWindow> mockTrigger1;
-  @Mock private OnceTrigger<IntervalWindow> mockTrigger2;
 
   private SimpleTriggerTester<IntervalWindow> tester;
-  private IntervalWindow firstWindow;
-
-  public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
-    MockitoAnnotations.initMocks(this);
-    tester = TriggerTester.forTrigger(
-        AfterAll.of(mockTrigger1, mockTrigger2),
-        windowFn);
-    firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
-  }
-
-  private void injectElement(int element, TriggerResult result1, TriggerResult result2)
-      throws Exception {
-    if (result1 != null) {
-      when(mockTrigger1.onElement(
-          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-          .thenReturn(result1);
-    }
-    if (result2 != null) {
-      when(mockTrigger2.onElement(
-          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-          .thenReturn(result2);
-    }
-    tester.injectElements(element);
-  }
-
-  @Test
-  public void testOnElementT1FiresFirst() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
-
-    injectElement(2, TriggerResult.FIRE_AND_FINISH, TriggerResult.CONTINUE);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
-
-    injectElement(3, null, TriggerResult.FIRE_AND_FINISH);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(firstWindow));
-  }
 
   @Test
-  public void testOnElementT2FiresFirst() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
+  public void testT1FiresFirst() throws Exception {
+    tester = TriggerTester.forTrigger(
+        AfterAll.of(
+            AfterPane.<IntervalWindow>elementCountAtLeast(1),
+            AfterPane.<IntervalWindow>elementCountAtLeast(2)),
+        FixedWindows.of(Duration.millis(100)));
 
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
+    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(100));
 
-    injectElement(2, TriggerResult.FIRE_AND_FINISH, null);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
+    tester.injectElements(1);
+    assertFalse(tester.shouldFire(window));
 
-    assertTrue(tester.isMarkedFinished(firstWindow));
+    tester.injectElements(2);
+    assertTrue(tester.shouldFire(window));
+    tester.fireIfShouldFire(window);
+    assertTrue(tester.isMarkedFinished(window));
   }
 
-  @SuppressWarnings("unchecked")
   @Test
-  public void testOnTimerFire() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
+  public void testT2FiresFirst() throws Exception {
+    tester = TriggerTester.forTrigger(
+        AfterAll.of(
+            AfterPane.<IntervalWindow>elementCountAtLeast(2),
+            AfterPane.<IntervalWindow>elementCountAtLeast(1)),
+        FixedWindows.of(Duration.millis(100)));
 
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
+    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(100));
 
-    when(mockTrigger1.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.FIRE_AND_FINISH);
-    tester.advanceInputWatermark(new Instant(12));
+    tester.injectElements(1);
+    assertFalse(tester.shouldFire(window));
 
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(firstWindow));
+    tester.injectElements(2);
+    assertTrue(tester.shouldFire(window));
+    tester.fireIfShouldFire(window);
+    assertTrue(tester.isMarkedFinished(window));
   }
 
+  /**
+   * Tests that the AfterAll properly unsets finished bits when a merge causing it to become
+   * unfinished.
+   */
   @Test
-  public void testOnTimerFireAndFinish() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
-
-    when(mockTrigger1.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    when(mockTrigger2.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.FIRE_AND_FINISH);
-    tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
-
-    tester.advanceInputWatermark(new Instant(12));
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
-    assertFalse(tester.isMarkedFinished(firstWindow));
+  public void testOnMergeRewinds() throws Exception {
+    tester = TriggerTester.forTrigger(
+        AfterEach.inOrder(
+            AfterAll.of(
+                AfterWatermark.<IntervalWindow>pastEndOfWindow(),
+                AfterPane.<IntervalWindow>elementCountAtLeast(1)),
+            Repeatedly.forever(AfterPane.<IntervalWindow>elementCountAtLeast(1))),
+        Sessions.withGapDuration(Duration.millis(10)));
 
-    injectElement(2, TriggerResult.FIRE_AND_FINISH, null);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(firstWindow));
-  }
+    tester.injectElements(1);
+    IntervalWindow firstWindow = new IntervalWindow(new Instant(1), new Instant(11));
 
-  @Test
-  public void testOnMergeFires() throws Exception {
-    setUp(Sessions.withGapDuration(Duration.millis(10)));
+    tester.injectElements(5);
+    IntervalWindow secondWindow = new IntervalWindow(new Instant(5), new Instant(15));
 
-    when(mockTrigger1.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    when(mockTrigger2.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    tester.injectElements(1, 5);
-    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
+    // Finish the AfterAll in the first window
+    tester.advanceInputWatermark(new Instant(11));
+    assertTrue(tester.shouldFire(firstWindow));
+    assertFalse(tester.shouldFire(secondWindow));
+    tester.fireIfShouldFire(firstWindow);
 
-    when(mockTrigger1.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
-        .thenReturn(MergeResult.ALREADY_FINISHED);
-    when(mockTrigger2.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
-        .thenReturn(MergeResult.FIRE_AND_FINISH);
+    // Merge them; the AfterAll should not be finished
     tester.mergeWindows();
+    IntervalWindow mergedWindow = new IntervalWindow(new Instant(1), new Instant(15));
+    assertFalse(tester.isMarkedFinished(mergedWindow));
 
-    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(15))));
-  }
-
-  @Test
-  public void testOnMergeFiresNotAlreadyFinished() throws Exception {
-    setUp(Sessions.withGapDuration(Duration.millis(10)));
+    // Confirm that we are back on the first trigger by probing that it is not ready to fire
+    // after an element (with merging)
+    tester.injectElements(3);
+    tester.mergeWindows();
+    assertFalse(tester.shouldFire(mergedWindow));
 
-    when(mockTrigger1.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    when(mockTrigger2.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    tester.injectElements(1, 12);
-    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
+    // Fire the AfterAll in the merged window
+    tester.advanceInputWatermark(new Instant(15));
+    assertTrue(tester.shouldFire(mergedWindow));
+    tester.fireIfShouldFire(mergedWindow);
 
-    when(mockTrigger1.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
-        .thenReturn(MergeResult.ALREADY_FINISHED);
-    when(mockTrigger2.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
-        .thenReturn(MergeResult.ALREADY_FINISHED);
-    tester.injectElements(5);
+    // Confirm that we are on the second trigger by probing
+    tester.injectElements(2);
     tester.mergeWindows();
-
-    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
+    assertTrue(tester.shouldFire(mergedWindow));
+    tester.fireIfShouldFire(mergedWindow);
+    tester.injectElements(2);
+    tester.mergeWindows();
+    assertTrue(tester.shouldFire(mergedWindow));
+    tester.fireIfShouldFire(mergedWindow);
   }
 
   @Test
   public void testFireDeadline() throws Exception {
     BoundedWindow window = new IntervalWindow(new Instant(0), new Instant(10));
 
-    assertEquals(new Instant(19),
-        AfterAll.of(AfterWatermark.pastEndOfWindow(),
-                    AfterWatermark.pastFirstElementInPane().plusDelayOf(Duration.millis(10)))
-            .getWatermarkThatGuaranteesFiring(window));
     assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
         AfterAll.of(AfterWatermark.pastEndOfWindow(), AfterPane.elementCountAtLeast(1))
             .getWatermarkThatGuaranteesFiring(window));
   }
 
-  @Test
-  public void testAfterAllRealTriggersFixedWindow() throws Exception {
-    tester = TriggerTester.forTrigger(Repeatedly.<IntervalWindow>forever(
-        AfterAll.<IntervalWindow>of(
-            AfterPane.<IntervalWindow>elementCountAtLeast(5),
-            AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
-                .plusDelayOf(Duration.millis(5)))),
-        FixedWindows.of(Duration.millis(50)));
-
-    tester.advanceProcessingTime(new Instant(0));
-    // 6 elements -> after pane fires
-    tester.injectElements(0, 0, 1, 1, 1, 2);
-
-    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
-    tester.advanceProcessingTime(new Instant(6));
-
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-
-    // 4 elements, advance processing time, then deliver the last elem
-    tester.clearResultSequence();
-    tester.advanceProcessingTime(new Instant(15));
-    tester.injectElements(2, 3, 4, 5);
-    tester.advanceProcessingTime(new Instant(21));
-    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
-    tester.injectElements(6);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-
-    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(50))));
-  }
-
-  @Test
-  public void testAfterAllMergingWindowSomeFinished() throws Exception {
-    Duration windowDuration = Duration.millis(10);
-    tester = TriggerTester.forTrigger(
-        AfterAll.<IntervalWindow>of(
-            AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
-                .plusDelayOf(Duration.millis(5)),
-            AfterPane.<IntervalWindow>elementCountAtLeast(5)),
-        Sessions.withGapDuration(windowDuration));
-
-    tester.advanceProcessingTime(new Instant(10));
-    tester.injectElements(1); // in [1, 11), timer for 15
-    tester.advanceProcessingTime(new Instant(16));
-    tester.injectElements(
-        1,  // in [1, 11) count = 1
-        2); // in [2, 12), timer for 16
-
-    // Enough data comes in for 2 that combined, we should fire
-    tester.injectElements(2, 2);
-    tester.mergeWindows();
-
-    // This fires, because the earliest element in [1, 12) arrived at time 10
-    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
-  }
-
   @Test
   public void testContinuation() throws Exception {
     OnceTrigger<IntervalWindow> trigger1 = AfterProcessingTime.pastFirstElementInPane();
@@ -261,4 +147,5 @@ public void testContinuation() throws Exception {
         AfterAll.of(trigger1.getContinuationTrigger(), trigger2.getContinuationTrigger()),
         afterAll.getContinuationTrigger());
   }
+
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
index 0dc7345b92f00..5d6632d1c4eec 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEachTest.java
@@ -16,29 +16,20 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static org.hamcrest.Matchers.contains;
-import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnTimerContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.TriggerTester.SimpleTriggerTester;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
+import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
-import org.mockito.Mock;
-import org.mockito.Mockito;
 import org.mockito.MockitoAnnotations;
 
 /**
@@ -47,123 +38,61 @@
 @RunWith(JUnit4.class)
 public class AfterEachTest {
 
-  @Mock private Trigger<IntervalWindow> mockTrigger1;
-  @Mock private Trigger<IntervalWindow> mockTrigger2;
-
   private SimpleTriggerTester<IntervalWindow> tester;
-  private IntervalWindow firstWindow;
 
-  public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
+  @Before
+  public void initMocks() {
     MockitoAnnotations.initMocks(this);
-    tester = TriggerTester.forTrigger(
-        AfterEach.inOrder(mockTrigger1, mockTrigger2), windowFn);
-    firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
-  }
-
-  private void injectElement(int element, TriggerResult result1, TriggerResult result2)
-      throws Exception {
-    if (result1 != null) {
-      when(mockTrigger1.onElement(
-          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-          .thenReturn(result1);
-    }
-    if (result2 != null) {
-      when(mockTrigger2.onElement(
-          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-          .thenReturn(result2);
-    }
-
-    tester.injectElements(element);
   }
 
   /**
    * Tests that the {@link AfterEach} trigger fires and finishes the first trigger then the second.
    */
   @Test
-  public void testOnElementT1Fires() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
-
-    injectElement(2, TriggerResult.FIRE, null);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-    tester.clearResultSequence();
-
-    injectElement(3, TriggerResult.FIRE_AND_FINISH, null);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-    tester.clearResultSequence();
-
-    injectElement(4, null, TriggerResult.FIRE_AND_FINISH);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
-  }
-
-  @Test
-  public void testOnElementT2Fires() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.FIRE);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
-    assertFalse(tester.isMarkedFinished(firstWindow));
-  }
-
-  @SuppressWarnings("unchecked")
-  @Test
-  public void testOnTimerFire() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
-
-    when(mockTrigger1.onTimer(Mockito.isA(OnTimerContext.class)))
-        .thenReturn(TriggerResult.FIRE);
-    tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
-
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-    assertFalse("Should still be waiting for the second trigger.",
-        tester.isMarkedFinished(firstWindow));
-  }
-
-  @Test
-  public void testOnMergeFinishes() throws Exception {
-    setUp(Sessions.withGapDuration(Duration.millis(10)));
-
-    when(mockTrigger1.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    when(mockTrigger2.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    tester.injectElements(1, 5);
-
-    when(mockTrigger1.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
-        .thenReturn(MergeResult.ALREADY_FINISHED);
-    when(mockTrigger2.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
-        .thenReturn(MergeResult.FIRE_AND_FINISH);
+  public void testAfterEachInSequence() throws Exception {
+    tester = TriggerTester.forTrigger(
+        AfterEach.inOrder(
+            Repeatedly.forever(AfterPane.<IntervalWindow>elementCountAtLeast(2))
+                .orFinally(AfterPane.<IntervalWindow>elementCountAtLeast(3)),
+            Repeatedly.forever(AfterPane.<IntervalWindow>elementCountAtLeast(5))
+                .orFinally(AfterWatermark.<IntervalWindow>pastEndOfWindow())),
+            FixedWindows.of(Duration.millis(10)));
+
+    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
+
+    // AfterCount(2) not ready
+    tester.injectElements(1);
+    assertFalse(tester.shouldFire(window));
+
+    // AfterCount(2) ready, not finished
+    tester.injectElements(2);
+    assertTrue(tester.shouldFire(window));
+    tester.fireIfShouldFire(window);
+    assertFalse(tester.isMarkedFinished(window));
+
+    // orFinally(AfterCount(3)) ready and will finish the first
+    tester.injectElements(1, 2, 3);
+    assertTrue(tester.shouldFire(window));
+    tester.fireIfShouldFire(window);
+    assertFalse(tester.isMarkedFinished(window));
+
+    // Now running as the second trigger
+    assertFalse(tester.shouldFire(window));
+    // This quantity of elements would fire and finish if it were erroneously still the first
+    tester.injectElements(1, 2, 3, 4);
+    assertFalse(tester.shouldFire(window));
+
+    // Now fire once
     tester.injectElements(5);
-
-    tester.mergeWindows();
-
-    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(15))));
-  }
-
-  @Test
-  public void testOnMergeFires() throws Exception {
-    setUp(Sessions.withGapDuration(Duration.millis(10)));
-
-    when(mockTrigger1.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    when(mockTrigger2.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    tester.injectElements(1, 5);
-
-    when(mockTrigger1.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
-        .thenReturn(MergeResult.ALREADY_FINISHED);
-    when(mockTrigger2.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
-        .thenReturn(MergeResult.FIRE);
-
-    tester.mergeWindows();
-
-    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE));
+    assertTrue(tester.shouldFire(window));
+    tester.fireIfShouldFire(window);
+    assertFalse(tester.isMarkedFinished(window));
+
+    // This time advance the watermark to finish the whole mess.
+    tester.advanceInputWatermark(new Instant(10));
+    assertTrue(tester.shouldFire(window));
+    tester.fireIfShouldFire(window);
+    assertTrue(tester.isMarkedFinished(window));
   }
 
   @Test
@@ -172,80 +101,14 @@ public void testFireDeadline() throws Exception {
 
     assertEquals(new Instant(9),
         AfterEach.inOrder(AfterWatermark.pastEndOfWindow(),
-                          AfterWatermark.pastFirstElementInPane().plusDelayOf(Duration.millis(10)))
+                          AfterPane.elementCountAtLeast(4))
             .getWatermarkThatGuaranteesFiring(window));
+
     assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
         AfterEach.inOrder(AfterPane.elementCountAtLeast(2), AfterWatermark.pastEndOfWindow())
             .getWatermarkThatGuaranteesFiring(window));
   }
 
-  @Test
-  public void testSequenceRealTriggersFixedWindow() throws Exception {
-    tester = TriggerTester.forTrigger(AfterEach.<IntervalWindow>inOrder(
-        AfterPane.<IntervalWindow>elementCountAtLeast(5),
-        AfterPane.<IntervalWindow>elementCountAtLeast(5),
-        Repeatedly.<IntervalWindow>forever(AfterEach.inOrder(
-            AfterPane.<IntervalWindow>elementCountAtLeast(2),
-            AfterPane.<IntervalWindow>elementCountAtLeast(2)))
-            .orFinally(AfterPane.<IntervalWindow>elementCountAtLeast(7))),
-        FixedWindows.of(Duration.millis(50)));
-
-    // Inject a bunch of elements all in the same window
-    for (int i = 0; i < 20; i++) {
-      tester.injectElements(i);
-    }
-    assertThat(tester.getResultSequence(), contains(
-        TriggerResult.CONTINUE,
-        TriggerResult.CONTINUE,
-        TriggerResult.CONTINUE,
-        TriggerResult.CONTINUE,
-        TriggerResult.FIRE, // 5 elements
-        TriggerResult.CONTINUE,
-        TriggerResult.CONTINUE,
-        TriggerResult.CONTINUE,
-        TriggerResult.CONTINUE,
-        TriggerResult.FIRE, // 5 elements
-        TriggerResult.CONTINUE,
-        TriggerResult.FIRE, // 2 elements (OrFinally at 2)
-        TriggerResult.CONTINUE,
-        TriggerResult.FIRE, // 2 elements (OrFinally at 4)
-        TriggerResult.CONTINUE,
-        TriggerResult.FIRE, // 2 elements (OrFinally at 6)
-        TriggerResult.FIRE_AND_FINISH)); // 1 elements (OrFinally at 7)
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(50))));
-  }
-
-  @Test
-  public void testAfterEachMergingWindowSomeFinished() throws Exception {
-    Duration windowDuration = Duration.millis(10);
-    tester = TriggerTester.forTrigger(
-        AfterEach.<IntervalWindow>inOrder(
-            AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
-                .plusDelayOf(Duration.millis(5)),
-            AfterPane.<IntervalWindow>elementCountAtLeast(5)),
-        Sessions.withGapDuration(windowDuration));
-
-    tester.advanceProcessingTime(new Instant(10));
-    tester.injectElements(1); // in [1, 11), timer for 15
-    tester.advanceProcessingTime(new Instant(16));
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-
-    tester.injectElements(
-        1,   // in [1, 11) count = 1
-        2); // in [2, 12), timer for 16
-
-    // [2, 12) tries to fire, but gets merged; count = 2
-    tester.advanceProcessingTime(new Instant(30));
-
-    tester.injectElements(1, 2, 1); // count = 5, but need to call merge fire
-
-    tester.mergeWindows();
-
-    // This fires, because the earliest element in [1, 12) arrived at time 10
-    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
-  }
-
   @Test
   public void testContinuation() throws Exception {
     OnceTrigger<IntervalWindow> trigger1 = AfterProcessingTime.pastFirstElementInPane();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
index 9b43782eec140..135638c363e87 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirstTest.java
@@ -16,23 +16,18 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static org.hamcrest.Matchers.contains;
-import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.TriggerTester.SimpleTriggerTester;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
+import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -48,127 +43,112 @@ public class AfterFirstTest {
 
   @Mock private OnceTrigger<IntervalWindow> mockTrigger1;
   @Mock private OnceTrigger<IntervalWindow> mockTrigger2;
-
   private SimpleTriggerTester<IntervalWindow> tester;
-  private IntervalWindow firstWindow;
-
-  public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
-    MockitoAnnotations.initMocks(this);
-    tester = TriggerTester.forTrigger(
-        AfterFirst.of(mockTrigger1, mockTrigger2), windowFn);
-    firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
+  private static Trigger<IntervalWindow>.TriggerContext anyTriggerContext() {
+    return Mockito.<Trigger<IntervalWindow>.TriggerContext>any();
   }
 
-  private void injectElement(int element, TriggerResult result1, TriggerResult result2)
-      throws Exception {
-    if (result1 != null) {
-      when(mockTrigger1.onElement(
-          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-          .thenReturn(result1);
-    }
-    if (result2 != null) {
-      when(mockTrigger2.onElement(
-          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-          .thenReturn(result2);
-    }
-
-    tester.injectElements(element);
-  }
-
-  @Test
-  public void testOnElementT1Fires() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    injectElement(2, TriggerResult.FIRE_AND_FINISH, TriggerResult.CONTINUE);
-    assertThat(tester.getResultSequence(),
-        contains(TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(firstWindow));
+  @Before
+  public void initMocks() {
+    MockitoAnnotations.initMocks(this);
   }
 
   @Test
-  public void testOnElementT2Fires() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
+  public void testNeitherShouldFireFixedWindows() throws Exception {
+    tester = TriggerTester.forTrigger(
+        AfterFirst.of(mockTrigger1, mockTrigger2), FixedWindows.of(Duration.millis(10)));
 
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
+    tester.injectElements(1);
+    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
 
-    injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
+    when(mockTrigger1.shouldFire(anyTriggerContext())).thenReturn(false);
+    when(mockTrigger2.shouldFire(anyTriggerContext())).thenReturn(false);
 
-    assertTrue(tester.isMarkedFinished(firstWindow));
+    assertFalse(tester.shouldFire(window)); // should not fire
+    assertFalse(tester.isMarkedFinished(window)); // not finished
   }
 
-  @SuppressWarnings("unchecked")
   @Test
-  public void testOnTimerT1Fires() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
+  public void testOnlyT1ShouldFireFixedWindows() throws Exception {
+    tester = TriggerTester.forTrigger(
+        AfterFirst.of(mockTrigger1, mockTrigger2), FixedWindows.of(Duration.millis(10)));
+    tester.injectElements(1);
+    IntervalWindow window = new IntervalWindow(new Instant(1), new Instant(11));
 
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
+    when(mockTrigger1.shouldFire(anyTriggerContext())).thenReturn(true);
+    when(mockTrigger2.shouldFire(anyTriggerContext())).thenReturn(false);
 
-    when(mockTrigger1.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.FIRE_AND_FINISH);
-    tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
+    assertTrue(tester.shouldFire(window)); // should fire
 
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(firstWindow));
+    tester.fireIfShouldFire(window);
+    assertTrue(tester.isMarkedFinished(window));
   }
 
-  @SuppressWarnings("unchecked")
   @Test
-  public void testOnTimerT2Fires() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
-
-    when(mockTrigger1.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    when(mockTrigger2.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.FIRE_AND_FINISH);
-    tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
-
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
+  public void testOnlyT2ShouldFireFixedWindows() throws Exception {
+    tester = TriggerTester.forTrigger(
+    AfterFirst.of(mockTrigger1, mockTrigger2), FixedWindows.of(Duration.millis(10)));
+    tester.injectElements(1);
+    IntervalWindow window = new IntervalWindow(new Instant(1), new Instant(11));
 
-    assertTrue(tester.isMarkedFinished(firstWindow));
-  }
+    when(mockTrigger1.shouldFire(anyTriggerContext())).thenReturn(false);
+    when(mockTrigger2.shouldFire(anyTriggerContext())).thenReturn(true);
+    assertTrue(tester.shouldFire(window)); // should fire
 
-  @SuppressWarnings("unchecked")
-  @Test
-  public void testOnTimerContinue() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
-
-    when(mockTrigger1.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    when(mockTrigger2.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
+    tester.fireIfShouldFire(window); // now finished
+    assertTrue(tester.isMarkedFinished(window));
   }
 
   @Test
-  public void testOnMergeFires() throws Exception {
-    setUp(Sessions.withGapDuration(Duration.millis(10)));
+  public void testBothShouldFireFixedWindows() throws Exception {
+    tester = TriggerTester.forTrigger(
+    AfterFirst.of(mockTrigger1, mockTrigger2), FixedWindows.of(Duration.millis(10)));
+    tester.injectElements(1);
+    IntervalWindow window = new IntervalWindow(new Instant(1), new Instant(11));
 
-    when(mockTrigger1.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    when(mockTrigger2.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    tester.injectElements(1, 8);
+    when(mockTrigger1.shouldFire(anyTriggerContext())).thenReturn(true);
+    when(mockTrigger2.shouldFire(anyTriggerContext())).thenReturn(true);
+    assertTrue(tester.shouldFire(window)); // should fire
 
-    when(mockTrigger1.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
-        .thenReturn(MergeResult.CONTINUE);
-    when(mockTrigger2.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
-        .thenReturn(MergeResult.FIRE_AND_FINISH);
+    tester.fireIfShouldFire(window);
+    assertTrue(tester.isMarkedFinished(window));
+  }
 
+  /**
+   * Tests that if the first trigger rewinds to be non-finished in the merged window,
+   * then it becomes the currently active trigger again, with real triggers.
+   */
+  @Test
+  public void testShouldFireAfterMerge() throws Exception {
+    tester = TriggerTester.forTrigger(
+        AfterEach.inOrder(
+            AfterFirst.of(AfterPane.<IntervalWindow>elementCountAtLeast(5),
+                AfterWatermark.<IntervalWindow>pastEndOfWindow()),
+            Repeatedly.forever(AfterPane.<IntervalWindow>elementCountAtLeast(1))),
+        Sessions.withGapDuration(Duration.millis(10)));
+
+    // Finished the AfterFirst in the first window
+    tester.injectElements(1);
+    IntervalWindow firstWindow = new IntervalWindow(new Instant(1), new Instant(11));
+    assertFalse(tester.shouldFire(firstWindow));
+    tester.advanceInputWatermark(new Instant(11));
+    assertTrue(tester.shouldFire(firstWindow));
+    tester.fireIfShouldFire(firstWindow);
+
+    // Set up second window where it is not done
+    tester.injectElements(5);
+    IntervalWindow secondWindow = new IntervalWindow(new Instant(5), new Instant(15));
+    assertFalse(tester.shouldFire(secondWindow));
+
+    // Merge them, if the merged window were on the second trigger, it would be ready
     tester.mergeWindows();
+    IntervalWindow mergedWindow = new IntervalWindow(new Instant(1), new Instant(15));
+    assertFalse(tester.shouldFire(mergedWindow));
 
-    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(18))));
+    // Now adding 3 more makes the AfterFirst ready to fire
+    tester.injectElements(1, 2, 3, 4, 5);
+    tester.mergeWindows();
+    assertTrue(tester.shouldFire(mergedWindow));
   }
 
   @Test
@@ -176,73 +156,13 @@ public void testFireDeadline() throws Exception {
     BoundedWindow window = new IntervalWindow(new Instant(0), new Instant(10));
 
     assertEquals(new Instant(9),
-        AfterFirst.of(AfterWatermark.pastEndOfWindow(),
-                       AfterWatermark.pastFirstElementInPane().plusDelayOf(Duration.millis(10)))
+        AfterFirst.of(AfterWatermark.pastEndOfWindow(), AfterPane.elementCountAtLeast(4))
             .getWatermarkThatGuaranteesFiring(window));
     assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
         AfterFirst.of(AfterPane.elementCountAtLeast(2), AfterPane.elementCountAtLeast(1))
             .getWatermarkThatGuaranteesFiring(window));
   }
 
-  @Test
-  public void testAfterFirstRealTriggersFixedWindow() throws Exception {
-    tester = TriggerTester.forTrigger(Repeatedly.<IntervalWindow>forever(
-        AfterFirst.<IntervalWindow>of(
-            AfterPane.<IntervalWindow>elementCountAtLeast(5),
-            AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
-                .plusDelayOf(Duration.millis(5)))),
-        FixedWindows.of(Duration.millis(50)));
-
-    tester.advanceProcessingTime(new Instant(0));
-    // 5 elements -> after pane fires
-    tester.injectElements(0, 0, 1, 1, 1);
-
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-
-    // 4 elements, advance processing time to 5 (shouldn't fire yet), then advance it to 6
-    tester.advanceProcessingTime(new Instant(1));
-    tester.injectElements(2, 3, 4, 5);
-    tester.advanceProcessingTime(new Instant(5));
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
-    // Fires now
-    tester.advanceProcessingTime(new Instant(7));
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-
-    // Now, send in 5 more elements, and make sure they come out as a group. State should not
-    // be carried over.
-    tester.injectElements(6, 7, 8, 9, 10);
-
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(50))));
-  }
-
-  @Test
-  public void testAfterFirstMergingWindowSomeFinished() throws Exception {
-    Duration windowDuration = Duration.millis(10);
-    tester = TriggerTester.forTrigger(
-        AfterFirst.<IntervalWindow>of(
-            AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
-                .plusDelayOf(Duration.millis(5)),
-            AfterPane.<IntervalWindow>elementCountAtLeast(5)),
-        Sessions.withGapDuration(windowDuration));
-
-    tester.advanceProcessingTime(new Instant(10));
-    tester.injectElements(
-        1,   // in [1, 11), timer for 15
-        1,   // in [1, 11) count = 1
-        2);  // in [2, 12), timer for 16
-
-    // Enough data comes in for 2 that combined, we should fire
-    tester.injectElements(2, 2);
-
-    tester.mergeWindows();
-
-    // This fires, because the earliest element in [1, 12) arrived at time 10
-    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
-
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
-  }
-
   @Test
   public void testContinuation() throws Exception {
     OnceTrigger<IntervalWindow> trigger1 = AfterProcessingTime.pastFirstElementInPane();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
index 8c6b165f74904..9139bc588495c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPaneTest.java
@@ -16,14 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.TriggerTester.SimpleTriggerTester;
 
@@ -38,29 +34,30 @@
  */
 @RunWith(JUnit4.class)
 public class AfterPaneTest {
+
+  SimpleTriggerTester<IntervalWindow> tester;
+  /**
+   * Tests that the trigger does fire when enough elements are in a window, and that it only
+   * fires that window (no leakage).
+   */
   @Test
-  public void testAfterPaneWithGlobalWindowsAndCombining() throws Exception {
-    Duration windowDuration = Duration.millis(10);
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
+  public void testAfterPaneElementCountFixedWindows() throws Exception {
+    tester = TriggerTester.forTrigger(
         AfterPane.<IntervalWindow>elementCountAtLeast(2),
-        FixedWindows.of(windowDuration));
+        FixedWindows.of(Duration.millis(10)));
 
-    tester.injectElements(1, 9);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
-    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(10), new Instant(20))));
-  }
+    tester.injectElements(1); // [0, 10)
+    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
+    assertFalse(tester.shouldFire(window));
 
-  @Test
-  public void testAfterPaneWithFixedWindow() throws Exception {
-    Duration windowDuration = Duration.millis(10);
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
-        AfterPane.<IntervalWindow>elementCountAtLeast(2),
-        FixedWindows.of(windowDuration));
+    tester.injectElements(2); // [0, 10)
+    tester.injectElements(11); // [10, 20)
 
-    tester.injectElements(1, 9);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
+    assertTrue(tester.shouldFire(window)); // ready to fire
+    tester.fireIfShouldFire(window); // and finished
+    assertTrue(tester.isMarkedFinished(window));
+
+    // But don't finish the other window
     assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(10), new Instant(20))));
   }
 
@@ -77,29 +74,37 @@ public void testClear() throws Exception {
   }
 
   @Test
-  public void testAfterPaneWithMerging() throws Exception {
-    Duration windowDuration = Duration.millis(10);
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
+  public void testAfterPaneElementCountSessions() throws Exception {
+    tester = TriggerTester.forTrigger(
         AfterPane.<IntervalWindow>elementCountAtLeast(2),
-        Sessions.withGapDuration(windowDuration));
+        Sessions.withGapDuration(Duration.millis(10)));
 
     tester.injectElements(
         1, // in [1, 11)
         2); // in [2, 12)
+
+    assertFalse(tester.shouldFire(new IntervalWindow(new Instant(1), new Instant(11))));
+    assertFalse(tester.shouldFire(new IntervalWindow(new Instant(2), new Instant(12))));
+
     tester.mergeWindows();
 
-    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
-    tester.clearLatestMergeResult();
+    IntervalWindow mergedWindow = new IntervalWindow(new Instant(1), new Instant(12));
+    assertTrue(tester.shouldFire(mergedWindow));
+    tester.fireIfShouldFire(mergedWindow);
+    assertTrue(tester.isMarkedFinished(mergedWindow));
 
     // Because we closed the previous window, we don't have it around to merge with. So there
     // will be a new FIRE_AND_FINISH result.
     tester.injectElements(
         7,  // in [7, 17)
-        9); // in [8, 18)
+        9); // in [9, 19)
 
-    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
+    tester.mergeWindows();
 
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
+    IntervalWindow newMergedWindow = new IntervalWindow(new Instant(7), new Instant(19));
+    assertTrue(tester.shouldFire(newMergedWindow));
+    tester.fireIfShouldFire(newMergedWindow);
+    assertTrue(tester.isMarkedFinished(newMergedWindow));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
index 0da184a8c94b6..013c5395bbf03 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
@@ -16,15 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static org.hamcrest.Matchers.emptyIterable;
-import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.everyItem;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.TriggerTester.SimpleTriggerTester;
 
@@ -39,24 +34,13 @@
  */
 @RunWith(JUnit4.class)
 public class AfterProcessingTimeTest {
-  @Test
-  public void testAfterProcessingTimeIgnoresTimer() throws Exception {
-    Duration windowDuration = Duration.millis(10);
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
-        AfterProcessingTime
-            .<IntervalWindow>pastFirstElementInPane()
-            .plusDelayOf(Duration.millis(5)),
-        FixedWindows.of(windowDuration));
-
-    tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
-        new Instant(15), TimeDomain.PROCESSING_TIME);
-    tester.injectElements(1);
-    tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
-        new Instant(5), TimeDomain.PROCESSING_TIME);
-  }
 
+  /**
+   * Tests the basic property that the trigger does wait for processing time to be
+   * far enough advanced.
+   */
   @Test
-  public void testAfterProcessingTimeWithFixedWindow() throws Exception {
+  public void testAfterProcessingTimeFixedWindows() throws Exception {
     Duration windowDuration = Duration.millis(10);
     SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         AfterProcessingTime
@@ -68,42 +52,41 @@ public void testAfterProcessingTimeWithFixedWindow() throws Exception {
 
     // Timer at 15
     tester.injectElements(1);
+    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
     tester.advanceProcessingTime(new Instant(12));
+    assertFalse(tester.shouldFire(firstWindow));
 
     // Load up elements in the next window, timer at 17 for them
     tester.injectElements(11, 12, 13);
+    IntervalWindow secondWindow = new IntervalWindow(new Instant(10), new Instant(20));
+    assertFalse(tester.shouldFire(secondWindow));
 
+    // Not quite time to fire
     tester.advanceProcessingTime(new Instant(14));
+    assertFalse(tester.shouldFire(firstWindow));
+    assertFalse(tester.shouldFire(secondWindow));
 
-    // Timer at 19 for these; it should be ignored since the 15 will fire first
+    // Timer at 19 for these in the first window; it should be ignored since the 15 will fire first
     tester.injectElements(2, 3);
 
-    // We should not have fired yet
-    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
-
     // Advance past the first timer and fire, finishing the first window
     tester.advanceProcessingTime(new Instant(16));
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
+    assertTrue(tester.shouldFire(firstWindow));
+    assertFalse(tester.shouldFire(secondWindow));
+    tester.fireIfShouldFire(firstWindow);
+    assertTrue(tester.isMarkedFinished(firstWindow));
 
-    // This element belongs in the window that has already fired; it should not
-    // be passed to onElement, so there should be no resultSequence to observe
-    // (thus necessarily no timer)
-    tester.clearResultSequence();
-    tester.injectElements(2);
-    assertThat(tester.getResultSequence(), emptyIterable());
-
-    // The next window fires and finishes
+    // The next window fires and finishes now
     tester.advanceProcessingTime(new Instant(18));
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(10), new Instant(20))));
-
-    // The timer for the finished window, from later elements, should also not do anything
-    tester.clearResultSequence();
-    tester.advanceProcessingTime(new Instant(20));
-    assertThat(tester.getResultSequence(), emptyIterable());
+    assertTrue(tester.shouldFire(secondWindow));
+    tester.fireIfShouldFire(secondWindow);
+    assertTrue(tester.isMarkedFinished(secondWindow));
   }
 
+  /**
+   * Tests that when windows merge, if the trigger is waiting for "N millis after the first
+   * element" that it is relative to the earlier of the two merged windows.
+   */
   @Test
   public void testClear() throws Exception {
     SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
@@ -120,22 +103,27 @@ public void testClear() throws Exception {
 
   @Test
   public void testAfterProcessingTimeWithMergingWindow() throws Exception {
-    Duration windowDuration = Duration.millis(10);
     SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
         AfterProcessingTime
             .<IntervalWindow>pastFirstElementInPane()
             .plusDelayOf(Duration.millis(5)),
-        Sessions.withGapDuration(windowDuration));
+        Sessions.withGapDuration(Duration.millis(10)));
 
     tester.advanceProcessingTime(new Instant(10));
     tester.injectElements(1); // in [1, 11), timer for 15
-    tester.advanceProcessingTime(new Instant(11));
-    tester.injectElements(2); // in [2, 12), timer for 16
+    IntervalWindow firstWindow = new IntervalWindow(new Instant(1), new Instant(11));
+    assertFalse(tester.shouldFire(firstWindow));
+
+    tester.advanceProcessingTime(new Instant(12));
+    tester.injectElements(3); // in [3, 13), timer for 17
+    IntervalWindow secondWindow = new IntervalWindow(new Instant(3), new Instant(13));
+    assertFalse(tester.shouldFire(secondWindow));
+
+    tester.mergeWindows();
+    IntervalWindow mergedWindow = new IntervalWindow(new Instant(1), new Instant(13));
 
     tester.advanceProcessingTime(new Instant(16));
-    // This fires, because the earliest element in [1, 12) arrived at time 10
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(11))));
+    assertTrue(tester.shouldFire(mergedWindow));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java
index 23f46f24ffe83..b37bba42e9edc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTimeTest.java
@@ -15,15 +15,10 @@
  */
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static org.hamcrest.Matchers.emptyIterable;
-import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.everyItem;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.TriggerTester.SimpleTriggerTester;
 
@@ -43,61 +38,73 @@ public class AfterSynchronizedProcessingTimeTest {
       new AfterSynchronizedProcessingTime<IntervalWindow>();
 
   @Test
-  public void testAfterProcessingTimeWithFixedWindow() throws Exception {
+  public void testAfterProcessingTimeWithFixedWindows() throws Exception {
     Duration windowDuration = Duration.millis(10);
     SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
-        underTest, FixedWindows.of(windowDuration));
+        AfterProcessingTime
+            .<IntervalWindow>pastFirstElementInPane()
+            .plusDelayOf(Duration.millis(5)),
+        FixedWindows.of(windowDuration));
 
     tester.advanceProcessingTime(new Instant(10));
 
-    // synchronized timers for 10
-    tester.injectElements(1, 9, 8);
-    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
-
-    tester.advanceProcessingTime(new Instant(11));
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
-
-    // This element belongs in the window that has already fired. It should not be
-    // processed.
-    tester.clearResultSequence();
-    tester.injectElements(2);
-    assertThat(tester.getResultSequence(), emptyIterable());
+    // Timer at 15
+    tester.injectElements(1);
+    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
+    tester.advanceProcessingTime(new Instant(12));
+    assertFalse(tester.shouldFire(firstWindow));
+
+    // Load up elements in the next window, timer at 17 for them
+    tester.injectElements(11, 12, 13);
+    IntervalWindow secondWindow = new IntervalWindow(new Instant(10), new Instant(20));
+    assertFalse(tester.shouldFire(secondWindow));
+
+    // Not quite time to fire
+    tester.advanceProcessingTime(new Instant(14));
+    assertFalse(tester.shouldFire(firstWindow));
+    assertFalse(tester.shouldFire(secondWindow));
+
+    // Timer at 19 for these in the first window; it should be ignored since the 15 will fire first
+    tester.injectElements(2, 3);
+
+    // Advance past the first timer and fire, finishing the first window
+    tester.advanceProcessingTime(new Instant(16));
+    assertTrue(tester.shouldFire(firstWindow));
+    assertFalse(tester.shouldFire(secondWindow));
+    tester.fireIfShouldFire(firstWindow);
+    assertTrue(tester.isMarkedFinished(firstWindow));
+
+    // The next window fires and finishes now
+    tester.advanceProcessingTime(new Instant(18));
+    assertTrue(tester.shouldFire(secondWindow));
+    tester.fireIfShouldFire(secondWindow);
+    assertTrue(tester.isMarkedFinished(secondWindow));
   }
 
   @Test
   public void testAfterProcessingTimeWithMergingWindow() throws Exception {
     Duration windowDuration = Duration.millis(10);
     SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
-        underTest,
+        AfterProcessingTime
+            .<IntervalWindow>pastFirstElementInPane()
+            .plusDelayOf(Duration.millis(5)),
         Sessions.withGapDuration(windowDuration));
 
     tester.advanceProcessingTime(new Instant(10));
-    tester.injectElements(
-        1,   // in [1, 11), synchronized timer for 10
-        2);  // in [2, 12), synchronized timer for 10
-    tester.mergeWindows();
-    tester.clearResultSequence();
+    tester.injectElements(1); // in [1, 11), timer for 15
+    IntervalWindow firstWindow = new IntervalWindow(new Instant(1), new Instant(11));
+    assertFalse(tester.shouldFire(firstWindow));
 
-    // The timers for the pre-merged windows should be ignored.
-    tester.advanceProcessingTime(new Instant(11));
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
-  }
+    tester.advanceProcessingTime(new Instant(12));
+    tester.injectElements(3); // in [3, 13), timer for 17
+    IntervalWindow secondWindow = new IntervalWindow(new Instant(3), new Instant(13));
+    assertFalse(tester.shouldFire(secondWindow));
 
-  @Test
-  public void testAfterSynchronizedProcessingTimeIgnoresTimer() throws Exception {
-    Duration windowDuration = Duration.millis(10);
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
-        new AfterSynchronizedProcessingTime<IntervalWindow>(),
-        FixedWindows.of(windowDuration));
+    tester.mergeWindows();
+    IntervalWindow mergedWindow = new IntervalWindow(new Instant(1), new Instant(13));
 
-    tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
-        new Instant(15), TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
-    tester.advanceProcessingTime(new Instant(5));
-    tester.injectElements(1);
-    tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)),
-        new Instant(0), TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+    tester.advanceProcessingTime(new Instant(16));
+    assertTrue(tester.shouldFire(mergedWindow));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
index 517bf20920e57..fb610aadd16b0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermarkTest.java
@@ -16,17 +16,12 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.everyItem;
-import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.doNothing;
 import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.TriggerTester.SimpleTriggerTester;
 
@@ -46,364 +41,298 @@
 @RunWith(JUnit4.class)
 public class AfterWatermarkTest {
 
-  @Mock
-  private OnceTrigger<IntervalWindow> mockEarly;
-  @Mock
-  private OnceTrigger<IntervalWindow> mockLate;
+  @Mock private OnceTrigger<IntervalWindow> mockEarly;
+  @Mock private OnceTrigger<IntervalWindow> mockLate;
 
-  @Before
-  public void setUp() {
-    MockitoAnnotations.initMocks(this);
+  private SimpleTriggerTester<IntervalWindow> tester;
+  private static Trigger<IntervalWindow>.TriggerContext anyTriggerContext() {
+    return Mockito.<Trigger<IntervalWindow>.TriggerContext>any();
   }
-
-  @Test
-  public void testFirstInPaneWithFixedWindow() throws Exception {
-    Duration windowDuration = Duration.millis(10);
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
-        AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelayOf(Duration.millis(5)),
-        FixedWindows.of(windowDuration));
-
-    tester.injectElements(1); // first in window [0, 10), timer set for 6
-    tester.advanceInputWatermark(new Instant(5));
-    tester.injectElements(9, 8);
-    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
-
-    tester.advanceInputWatermark(new Instant(7));
-
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
-    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(10), new Instant(20))));
+  private static Trigger<IntervalWindow>.OnElementContext anyElementContext() {
+    return Mockito.<Trigger<IntervalWindow>.OnElementContext>any();
   }
 
-  @Test
-  public void testAlignAndDelay() throws Exception {
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
-        AfterWatermark.<IntervalWindow>pastFirstElementInPane()
-            .alignedTo(Duration.standardMinutes(1))
-            .plusDelayOf(Duration.standardMinutes(5)),
-        FixedWindows.of(Duration.standardMinutes(1)));
-
-    Instant zero = new Instant(0);
-
-    // first in window [0, 1m), timer set for 6m
-    tester.injectElements(1000, 5000, 55000);
-
-    // Advance to 6m. No output should be produced.
-    tester.advanceInputWatermark(zero.plus(Duration.standardMinutes(6)));
-    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
-
-    // Advance to 6m+1ms and see our output
-    tester.advanceInputWatermark(zero.plus(Duration.standardMinutes(6).plus(1)));
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
-  }
-
-  @Test
-  public void testAfterFirstInPaneClear() throws Exception {
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
-        AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelayOf(Duration.millis(5)),
-        FixedWindows.of(Duration.millis(10)));
-
-    tester.injectElements(1, 2, 3);
-    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
-    tester.clearState(window);
-    tester.assertCleared(window);
-  }
-
-  @Test
-  public void testFirstInPaneWithMerging() throws Exception {
-    Duration windowDuration = Duration.millis(10);
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
-        AfterWatermark.<IntervalWindow>pastFirstElementInPane().plusDelayOf(Duration.millis(5)),
-        Sessions.withGapDuration(windowDuration));
-
-    tester.advanceInputWatermark(new Instant(1));
-
-    tester.injectElements(
-        1,  // in [1, 11), timer for 6
-        2); // in [2, 12), timer for 7
-    tester.mergeWindows();
-    tester.advanceInputWatermark(new Instant(7));
-
-    // We merged, and updated the timer to the earliest timer, which was still 6.
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
+  private void injectElements(int... elements) throws Exception {
+    for (int element : elements) {
+      doNothing().when(mockEarly).onElement(anyElementContext());
+      doNothing().when(mockLate).onElement(anyElementContext());
+      tester.injectElements(element);
+    }
   }
 
-  @Test
-  public void testEndOfWindowFixedWindow() throws Exception {
-    Duration windowDuration = Duration.millis(10);
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
-        AfterWatermark.<IntervalWindow>pastEndOfWindow(),
-        FixedWindows.of(windowDuration));
-
-    tester.injectElements(1); // first in window [0, 10), timer set for 9
-    tester.advanceInputWatermark(new Instant(8));
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
-
-    tester.injectElements(9, 8);
-    tester.advanceInputWatermark(new Instant(10));
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
-
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(10))));
-    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(10), new Instant(20))));
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
   }
 
-  @Test
-  public void testEndOfWindowWithMerging() throws Exception {
-    Duration windowDuration = Duration.millis(10);
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
-        AfterWatermark.<IntervalWindow>pastEndOfWindow(),
-        Sessions.withGapDuration(windowDuration));
-
-    tester.advanceInputWatermark(new Instant(1));
-
-    tester.injectElements(
-        1, // in [1, 11], timer for 11
-        2); // in [2, 12], timer for 12
-    tester.mergeWindows();
-    tester.advanceInputWatermark(new Instant(10));
-
-    // We merged, and updated the watermark timer to the end of the new window.
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
-
-    tester.injectElements(1); // in [1, 11], timer for 11
-    tester.mergeWindows();
-    tester.advanceInputWatermark(new Instant(12));
-
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
-
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
-  }
+  public void testRunningAsTrigger(OnceTrigger<IntervalWindow> mockTrigger, IntervalWindow window)
+      throws Exception {
 
-  @Test
-  public void testFireDeadline() throws Exception {
-    BoundedWindow window = new IntervalWindow(new Instant(0), new Instant(10));
-
-    assertEquals(new Instant(9), AfterWatermark.pastEndOfWindow()
-        .getWatermarkThatGuaranteesFiring(window));
-    assertEquals(GlobalWindow.INSTANCE.maxTimestamp(),
-        AfterWatermark.pastEndOfWindow().getWatermarkThatGuaranteesFiring(GlobalWindow.INSTANCE));
-    assertEquals(new Instant(19),
-        AfterWatermark
-            .pastFirstElementInPane()
-            .plusDelayOf(Duration.millis(10)).getWatermarkThatGuaranteesFiring(window));
-  }
+    // Don't fire due to mock saying no
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(false);
+    assertFalse(tester.shouldFire(window)); // not ready
 
-  @Test
-  public void testContinuation() throws Exception {
-    Trigger<?> endOfWindow = AfterWatermark.pastEndOfWindow();
-    assertEquals(endOfWindow, endOfWindow.getContinuationTrigger());
-    assertEquals(
-        endOfWindow,
-        endOfWindow.getContinuationTrigger().getContinuationTrigger());
-
-    Trigger<?> firstElementAligned =
-        AfterWatermark.pastFirstElementInPane().alignedTo(Duration.standardDays(1));
-    assertEquals(
-        firstElementAligned,
-        firstElementAligned.getContinuationTrigger());
-    assertEquals(
-        firstElementAligned,
-        firstElementAligned.getContinuationTrigger().getContinuationTrigger());
+    // Fire due to mock trigger; early trigger is required to be a OnceTrigger
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    assertTrue(tester.shouldFire(window)); // ready
+    tester.fireIfShouldFire(window);
+    assertFalse(tester.isMarkedFinished(window));
   }
 
   @Test
-  public void testEarlyAndAtWatermarkProcessElement() throws Exception {
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
+  public void testEarlyAndAtWatermark() throws Exception {
+    tester = TriggerTester.forTrigger(
         AfterWatermark.<IntervalWindow>pastEndOfWindow()
             .withEarlyFirings(mockEarly),
         FixedWindows.of(Duration.millis(100)));
 
-    // Fire the early trigger
-    when(mockEarly.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-        .thenReturn(TriggerResult.CONTINUE)
-        .thenReturn(TriggerResult.FIRE_AND_FINISH)
-        .thenReturn(TriggerResult.CONTINUE);
-    tester.injectElements(5, 10); // Fires due to early trigger
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-
-    // Fire the watermark trigger
-    tester.injectElements(15);
-    tester.advanceInputWatermark(new Instant(100L)); // Fires due to AtWatermark
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(100))));
+    injectElements(1);
+    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(100));
+
+    testRunningAsTrigger(mockEarly, window);
+
+    // Fire due to watermark
+    when(mockEarly.shouldFire(anyTriggerContext())).thenReturn(false);
+    tester.advanceInputWatermark(new Instant(100));
+    assertTrue(tester.shouldFire(window));
+    tester.fireIfShouldFire(window);
+    assertTrue(tester.isMarkedFinished(window));
   }
 
   @Test
-  public void testLateAndAtWatermarkProcessElement() throws Exception {
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
+  public void testAtWatermarkAndLate() throws Exception {
+    tester = TriggerTester.forTrigger(
         AfterWatermark.<IntervalWindow>pastEndOfWindow()
             .withLateFirings(mockLate),
         FixedWindows.of(Duration.millis(100)));
 
-    tester.injectElements(5, 10, 15);
-    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
-
-    tester.advanceInputWatermark(new Instant(100L)); // Fires due to AtWatermark
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
+    injectElements(1);
+    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(100));
 
-    when(mockLate.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-        .thenReturn(TriggerResult.CONTINUE)
-        .thenReturn(TriggerResult.CONTINUE)
-        .thenReturn(TriggerResult.FIRE_AND_FINISH)
-        .thenReturn(TriggerResult.CONTINUE)
-        .thenReturn(TriggerResult.FIRE_AND_FINISH);
-
-    tester.injectElements(20, 25, 30); // Fires due to late trigger
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
+    // No early firing, just double checking
+    when(mockEarly.shouldFire(anyTriggerContext())).thenReturn(true);
+    assertFalse(tester.shouldFire(window));
+    tester.fireIfShouldFire(window);
+    assertFalse(tester.isMarkedFinished(window));
 
-    tester.injectElements(35, 40); // Fires due to late trigger
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
+    // Fire due to watermark
+    when(mockEarly.shouldFire(anyTriggerContext())).thenReturn(false);
+    tester.advanceInputWatermark(new Instant(100));
+    assertTrue(tester.shouldFire(window));
+    tester.fireIfShouldFire(window);
+    assertFalse(tester.isMarkedFinished(window));
 
-    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(100))));
+    testRunningAsTrigger(mockLate, window);
   }
 
   @Test
-  public void testEarlyLateAndAtWatermarkProcessElement() throws Exception {
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
+  public void testEarlyAndAtWatermarkAndLate() throws Exception {
+    tester = TriggerTester.forTrigger(
         AfterWatermark.<IntervalWindow>pastEndOfWindow()
             .withEarlyFirings(mockEarly)
             .withLateFirings(mockLate),
         FixedWindows.of(Duration.millis(100)));
 
-    when(mockEarly.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-        .thenReturn(TriggerResult.CONTINUE)
-        .thenReturn(TriggerResult.FIRE_AND_FINISH)
-        .thenReturn(TriggerResult.CONTINUE);
-
-    // These should set a timer for 100
-    tester.injectElements(5, 10); // Fires due to early trigger
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-
-    tester.injectElements(15);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
-
-    tester.advanceInputWatermark(new Instant(99L)); // Checking for off-by-one
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
-
-    tester.advanceInputWatermark(new Instant(100L)); // Fires due to AtWatermark
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-
-    when(mockLate.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-        .thenReturn(TriggerResult.CONTINUE)
-        .thenReturn(TriggerResult.CONTINUE)
-        .thenReturn(TriggerResult.FIRE_AND_FINISH)
-        .thenReturn(TriggerResult.CONTINUE)
-        .thenReturn(TriggerResult.FIRE_AND_FINISH);
-
-    // Get the late trigger to fire once
-    tester.clearResultSequence();
-    tester.injectElements(20, 25);
-    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
-    tester.injectElements(30); // Fires due to late trigger
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-
-    // Get the late trigger to fire again
-    tester.clearResultSequence();
-    tester.injectElements(35);
-    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
-    tester.injectElements(40); // Fires due to late again
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-
-    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(0), new Instant(100))));
-  }
-
-  @Test
-  public void testEarlyAndAtWatermarkSessions() throws Exception {
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
-        AfterWatermark.<IntervalWindow>pastEndOfWindow()
-            .withEarlyFirings(AfterPane.<IntervalWindow>elementCountAtLeast(2)),
-        Sessions.withGapDuration(Duration.millis(20)));
+    injectElements(1);
+    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(100));
 
-    tester.injectElements(5, 20);
-    tester.mergeWindows();
-    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE));
-    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(40))));
+    testRunningAsTrigger(mockEarly, window);
 
-    tester.injectElements(6);
-    tester.mergeWindows();
-    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.CONTINUE));
+    // Fire due to watermark
+    when(mockEarly.shouldFire(anyTriggerContext())).thenReturn(false);
+    tester.advanceInputWatermark(new Instant(100));
+    assertTrue(tester.shouldFire(window));
+    tester.fireIfShouldFire(window);
+    assertFalse(tester.isMarkedFinished(window));
 
-    tester.advanceInputWatermark(new Instant(100L)); // Fires due to AtWatermark
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(40))));
+    testRunningAsTrigger(mockLate, window);
   }
 
+  /**
+   * Tests that if the EOW is finished in both as well as the merged window, then
+   * it is finished in the merged result.
+   *
+   * <p>Because windows are discarded when a trigger finishes, we need to embed this
+   * in a sequence in order to check that it is re-activated. So this test is potentially
+   * sensitive to other triggers' correctness.
+   */
   @Test
-  public void testLateAndAtWatermarkSessionsProcessingTime() throws Exception {
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
-        AfterWatermark.<IntervalWindow>pastEndOfWindow()
-            .withLateFirings(AfterProcessingTime
-                .<IntervalWindow>pastFirstElementInPane().plusDelayOf(Duration.millis(10))),
-        Sessions.withGapDuration(Duration.millis(20)));
-
-    tester.advanceProcessingTime(new Instant(0L));
-    tester.injectElements(5, 20);
-    tester.advanceProcessingTime(new Instant(50L)); // Check that  we don't trigger wrongly
-    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
+  public void testOnMergeAlreadyFinished() throws Exception {
+    tester = TriggerTester.forTrigger(
+        AfterEach.inOrder(
+            AfterWatermark.<IntervalWindow>pastEndOfWindow(),
+            Repeatedly.forever(AfterPane.<IntervalWindow>elementCountAtLeast(1))),
+        Sessions.withGapDuration(Duration.millis(10)));
+
+    tester.injectElements(1);
+    tester.injectElements(5);
+    IntervalWindow firstWindow = new IntervalWindow(new Instant(1), new Instant(11));
+    IntervalWindow secondWindow = new IntervalWindow(new Instant(5), new Instant(15));
+    IntervalWindow mergedWindow = new IntervalWindow(new Instant(1), new Instant(15));
+
+    // Finish the AfterWatermark.pastEndOfWindow() trigger in both windows
+    tester.advanceInputWatermark(new Instant(15));
+    assertTrue(tester.shouldFire(firstWindow));
+    assertTrue(tester.shouldFire(secondWindow));
+    tester.fireIfShouldFire(firstWindow);
+    tester.fireIfShouldFire(secondWindow);
+
+    // Confirm that we are on the second trigger by probing
+    assertFalse(tester.shouldFire(firstWindow));
+    assertFalse(tester.shouldFire(secondWindow));
+    tester.injectElements(1);
+    tester.injectElements(5);
+    assertTrue(tester.shouldFire(firstWindow));
+    assertTrue(tester.shouldFire(secondWindow));
+    tester.fireIfShouldFire(firstWindow);
+    tester.fireIfShouldFire(secondWindow);
+
+    // Merging should leave it finished
+    tester.mergeWindows();
 
-    tester.advanceInputWatermark(new Instant(70L)); // Fires due to AtWatermark
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
+    // Confirm that we are on the second trigger by probing
+    assertFalse(tester.shouldFire(mergedWindow));
+    tester.injectElements(1);
+    assertTrue(tester.shouldFire(mergedWindow));
+  }
 
-    tester.injectElements(6);
-    tester.mergeWindows(); // merge must be manually triggered to learn we are on late trigger
-    tester.advanceProcessingTime(new Instant(100L)); // Fires the late trigger
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
+  /**
+   * Tests that the trigger rewinds to be non-finished in the merged window.
+   *
+   * <p>Because windows are discarded when a trigger finishes, we need to embed this
+   * in a sequence in order to check that it is re-activated. So this test is potentially
+   * sensitive to other triggers' correctness.
+   */
+  @Test
+  public void testOnMergeRewinds() throws Exception {
+    tester = TriggerTester.forTrigger(
+        AfterEach.inOrder(
+            AfterWatermark.<IntervalWindow>pastEndOfWindow(),
+            Repeatedly.forever(AfterPane.<IntervalWindow>elementCountAtLeast(1))),
+        Sessions.withGapDuration(Duration.millis(10)));
+
+    tester.injectElements(1);
+    tester.injectElements(5);
+    IntervalWindow firstWindow = new IntervalWindow(new Instant(1), new Instant(11));
+    IntervalWindow secondWindow = new IntervalWindow(new Instant(5), new Instant(15));
+    IntervalWindow mergedWindow = new IntervalWindow(new Instant(1), new Instant(15));
+
+    // Finish the AfterWatermark.pastEndOfWindow() trigger in only the first window
+    tester.advanceInputWatermark(new Instant(11));
+    assertTrue(tester.shouldFire(firstWindow));
+    assertFalse(tester.shouldFire(secondWindow));
+    tester.fireIfShouldFire(firstWindow);
+
+    // Confirm that we are on the second trigger by probing
+    assertFalse(tester.shouldFire(firstWindow));
+    tester.injectElements(1);
+    assertTrue(tester.shouldFire(firstWindow));
+    tester.fireIfShouldFire(firstWindow);
+
+    // Merging should re-activate the watermark trigger in the merged window
+    tester.mergeWindows();
 
-    tester.injectElements(9);
-    tester.mergeWindows(); // merge must be manually triggered to learn we are on late trigger
-    tester.advanceProcessingTime(new Instant(150L)); // Fires the late trigger again
+    // Confirm that we are not on the second trigger by probing
+    assertFalse(tester.shouldFire(mergedWindow));
+    tester.injectElements(1);
+    assertFalse(tester.shouldFire(mergedWindow));
 
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(40))));
+    // And confirm that advancing the watermark fires again
+    tester.advanceInputWatermark(new Instant(15));
+    assertTrue(tester.shouldFire(mergedWindow));
   }
 
+  /**
+   * Tests that if the EOW is finished in both as well as the merged window, then
+   * it is finished in the merged result.
+   *
+   * <p>Because windows are discarded when a trigger finishes, we need to embed this
+   * in a sequence in order to check that it is re-activated. So this test is potentially
+   * sensitive to other triggers' correctness.
+   */
   @Test
-  public void testLateAndAtWatermarkSessions() throws Exception {
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
+  public void testEarlyAndLateOnMergeAlreadyFinished() throws Exception {
+    tester = TriggerTester.forTrigger(
         AfterWatermark.<IntervalWindow>pastEndOfWindow()
-            .withLateFirings(AfterPane.<IntervalWindow>elementCountAtLeast(2)),
-        Sessions.withGapDuration(Duration.millis(20)));
-
-    tester.injectElements(5, 20);
-
-    tester.advanceInputWatermark(new Instant(70L)); // Fires due to AtWatermark
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-
-    IntervalWindow window = new IntervalWindow(new Instant(5), new Instant(40));
-    assertFalse(tester.isMarkedFinished(window));
-
-    tester.injectElements(7, 8, 9); // Fires because we have at least 5 items
+            .withEarlyFirings(AfterPane.<IntervalWindow>elementCountAtLeast(100))
+            .withLateFirings(AfterPane.<IntervalWindow>elementCountAtLeast(1)),
+        Sessions.withGapDuration(Duration.millis(10)));
+
+    tester.injectElements(1);
+    tester.injectElements(5);
+    IntervalWindow firstWindow = new IntervalWindow(new Instant(1), new Instant(11));
+    IntervalWindow secondWindow = new IntervalWindow(new Instant(5), new Instant(15));
+    IntervalWindow mergedWindow = new IntervalWindow(new Instant(1), new Instant(15));
+
+    // Finish the AfterWatermark.pastEndOfWindow() bit of the trigger in both windows
+    tester.advanceInputWatermark(new Instant(15));
+    assertTrue(tester.shouldFire(firstWindow));
+    assertTrue(tester.shouldFire(secondWindow));
+    tester.fireIfShouldFire(firstWindow);
+    tester.fireIfShouldFire(secondWindow);
+
+    // Confirm that we are on the late trigger by probing
+    assertFalse(tester.shouldFire(firstWindow));
+    assertFalse(tester.shouldFire(secondWindow));
+    tester.injectElements(1);
+    tester.injectElements(5);
+    assertTrue(tester.shouldFire(firstWindow));
+    assertTrue(tester.shouldFire(secondWindow));
+    tester.fireIfShouldFire(firstWindow);
+    tester.fireIfShouldFire(secondWindow);
+
+    // Merging should leave it on the late trigger
     tester.mergeWindows();
 
-    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE));
-    assertFalse(tester.isMarkedFinished(window));
+    // Confirm that we are on the late trigger by probing
+    assertFalse(tester.shouldFire(mergedWindow));
+    tester.injectElements(1);
+    assertTrue(tester.shouldFire(mergedWindow));
   }
 
+  /**
+   * Tests that the trigger rewinds to be non-finished in the merged window.
+   *
+   * <p>Because windows are discarded when a trigger finishes, we need to embed this
+   * in a sequence in order to check that it is re-activated. So this test is potentially
+   * sensitive to other triggers' correctness.
+   */
   @Test
-  public void testEarlyLateAndAtWatermarkSessions() throws Exception {
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
+  public void testEarlyAndLateOnMergeRewinds() throws Exception {
+    tester = TriggerTester.forTrigger(
         AfterWatermark.<IntervalWindow>pastEndOfWindow()
-            .withEarlyFirings(AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
-                .plusDelayOf(Duration.millis(50)))
-            .withLateFirings(AfterPane.<IntervalWindow>elementCountAtLeast(2)),
-        Sessions.withGapDuration(Duration.millis(20)));
-
-    tester.advanceProcessingTime(new Instant(0));
-    tester.injectElements(5, 20);
-
-    tester.advanceProcessingTime(new Instant(56)); // Fires due to early trigger
-    tester.advanceInputWatermark(new Instant(70L)); // Fires due to AtWatermark
-
-    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(40))));
-
-    tester.injectElements(
-        6,
-        7, // Should fire due to late trigger
-        8);
+            .withEarlyFirings(AfterPane.<IntervalWindow>elementCountAtLeast(100))
+            .withLateFirings(AfterPane.<IntervalWindow>elementCountAtLeast(1)),
+        Sessions.withGapDuration(Duration.millis(10)));
+
+    tester.injectElements(1);
+    tester.injectElements(5);
+    IntervalWindow firstWindow = new IntervalWindow(new Instant(1), new Instant(11));
+    IntervalWindow secondWindow = new IntervalWindow(new Instant(5), new Instant(15));
+    IntervalWindow mergedWindow = new IntervalWindow(new Instant(1), new Instant(15));
+
+    // Finish the AfterWatermark.pastEndOfWindow() bit of the trigger in only the first window
+    tester.advanceInputWatermark(new Instant(11));
+    assertTrue(tester.shouldFire(firstWindow));
+    assertFalse(tester.shouldFire(secondWindow));
+    tester.fireIfShouldFire(firstWindow);
+
+    // Confirm that we are on the late trigger by probing
+    assertFalse(tester.shouldFire(firstWindow));
+    tester.injectElements(1);
+    assertTrue(tester.shouldFire(firstWindow));
+    tester.fireIfShouldFire(firstWindow);
+
+    // Merging should re-activate the early trigger in the merged window
     tester.mergeWindows();
 
-    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE));
-    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(40))));
+    // Confirm that we are not on the second trigger by probing
+    assertFalse(tester.shouldFire(mergedWindow));
+    tester.injectElements(1);
+    assertFalse(tester.shouldFire(mergedWindow));
+
+    // And confirm that advancing the watermark fires again
+    tester.advanceInputWatermark(new Instant(15));
+    assertTrue(tester.shouldFire(mergedWindow));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
index 3e660dfd56b30..bdc31e27fd836 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTriggerTest.java
@@ -16,13 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.everyItem;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.TriggerTester.SimpleTriggerTester;
 
@@ -33,87 +30,135 @@
 import org.junit.runners.JUnit4;
 
 /**
- * Tests the {@link DefaultTrigger} in a variety of windowing modes.
+ * Tests the {@link DefaultTrigger}, which should be equivalent to
+ * {@code Repeatedly.forever(AfterWatermark.pastEndOfWindow())}.
  */
 @RunWith(JUnit4.class)
 public class DefaultTriggerTest {
 
+  SimpleTriggerTester<IntervalWindow> tester;
+
   @Test
-  public void testDefaultTriggerWithFixedWindow() throws Exception {
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
+  public void testDefaultTriggerFixedWindows() throws Exception {
+    tester = TriggerTester.forTrigger(
         DefaultTrigger.<IntervalWindow>of(),
-        FixedWindows.of(Duration.millis(10)));
+        FixedWindows.of(Duration.millis(100)));
 
-    tester.injectElements(1, 9, 15, 19, 30);
+    tester.injectElements(
+        1, // [0, 100)
+        101); // [100, 200)
 
-    // Advance the watermark almost to the end of the first window.
-    tester.advanceProcessingTime(new Instant(500));
-    tester.advanceInputWatermark(new Instant(8));
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
+    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(100));
+    IntervalWindow secondWindow = new IntervalWindow(new Instant(100), new Instant(200));
 
-    // Advance watermark to 10 (past end of the window), which causes the first fixed window to
-    // be emitted
-    tester.advanceInputWatermark(new Instant(10));
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
+    // Advance the watermark almost to the end of the first window.
+    tester.advanceInputWatermark(new Instant(99));
+    assertFalse(tester.shouldFire(firstWindow));
+    assertFalse(tester.shouldFire(secondWindow));
 
-    // Advance watermark to 100, which causes the remaining two windows to be emitted.
-    // Since their timers were at different timestamps, they should fire in order.
+    // Advance watermark past end of the first window, which is then ready
     tester.advanceInputWatermark(new Instant(100));
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(30), new Instant(40))));
-  }
-
-  @Test
-  public void testDefaultTriggerWithSessionWindow() throws Exception {
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
-        DefaultTrigger.<IntervalWindow>of(),
-        Sessions.withGapDuration(Duration.millis(10)));
-
-    tester.injectElements(1, 9);
+    assertTrue(tester.shouldFire(firstWindow));
+    assertFalse(tester.shouldFire(secondWindow));
 
-    // no output, because we merged into the [9-19) session
-    tester.advanceInputWatermark(new Instant(10));
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
+    // Fire, but the first window is still allowed to fire
+    tester.fireIfShouldFire(firstWindow);
+    assertTrue(tester.shouldFire(firstWindow));
+    assertFalse(tester.shouldFire(secondWindow));
 
-    tester.injectElements(15, 30);
+    // Advance watermark to 200, then both are ready
+    tester.advanceInputWatermark(new Instant(200));
+    assertTrue(tester.shouldFire(firstWindow));
+    assertTrue(tester.shouldFire(secondWindow));
 
-    tester.advanceInputWatermark(new Instant(100));
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(25))));
-    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(30), new Instant(40))));
+    assertFalse(tester.isMarkedFinished(firstWindow));
+    assertFalse(tester.isMarkedFinished(secondWindow));
   }
 
   @Test
-  public void testDefaultTriggerWithSlidingWindow() throws Exception {
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
+  public void testDefaultTriggerSlidingWindows() throws Exception {
+    tester = TriggerTester.forTrigger(
         DefaultTrigger.<IntervalWindow>of(),
-        SlidingWindows.of(Duration.millis(10)).every(Duration.millis(5)));
-
-    tester.injectElements(1, 4, 9);
-    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
-    tester.clearResultSequence();
-
-    tester.advanceInputWatermark(new Instant(5));
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-    tester.advanceInputWatermark(new Instant(10));
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-    tester.advanceInputWatermark(new Instant(15));
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-
-    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(10))));
-    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(5), new Instant(15))));
+        SlidingWindows.of(Duration.millis(100)).every(Duration.millis(50)));
+
+    tester.injectElements(
+        1, // [-50, 50), [0, 100)
+        50); // [0, 100), [50, 150)
+
+    IntervalWindow firstWindow = new IntervalWindow(new Instant(-50), new Instant(50));
+    IntervalWindow secondWindow = new IntervalWindow(new Instant(0), new Instant(100));
+    IntervalWindow thirdWindow = new IntervalWindow(new Instant(50), new Instant(150));
+
+    assertFalse(tester.shouldFire(firstWindow));
+    assertFalse(tester.shouldFire(secondWindow));
+    assertFalse(tester.shouldFire(thirdWindow));
+
+    // At 50, the first becomes ready; it stays ready after firing
+    tester.advanceInputWatermark(new Instant(50));
+    assertTrue(tester.shouldFire(firstWindow));
+    assertFalse(tester.shouldFire(secondWindow));
+    assertFalse(tester.shouldFire(thirdWindow));
+    tester.fireIfShouldFire(firstWindow);
+    assertTrue(tester.shouldFire(firstWindow));
+    assertFalse(tester.shouldFire(secondWindow));
+    assertFalse(tester.shouldFire(thirdWindow));
+
+    // At 99, the first is still the only one ready
+    tester.advanceInputWatermark(new Instant(99));
+    assertTrue(tester.shouldFire(firstWindow));
+    assertFalse(tester.shouldFire(secondWindow));
+    assertFalse(tester.shouldFire(thirdWindow));
+
+    // At 100, the first and second are ready
+    tester.advanceInputWatermark(new Instant(100));
+    assertTrue(tester.shouldFire(firstWindow));
+    assertTrue(tester.shouldFire(secondWindow));
+    assertFalse(tester.shouldFire(thirdWindow));
+    tester.fireIfShouldFire(firstWindow);
+
+    assertFalse(tester.isMarkedFinished(firstWindow));
+    assertFalse(tester.isMarkedFinished(secondWindow));
+    assertFalse(tester.isMarkedFinished(thirdWindow));
   }
 
   @Test
-  public void testDefaultTriggerWithContainedSessionWindow() throws Exception {
-    SimpleTriggerTester<IntervalWindow> tester = TriggerTester.forTrigger(
+  public void testDefaultTriggerSessions() throws Exception {
+    tester = TriggerTester.forTrigger(
         DefaultTrigger.<IntervalWindow>of(),
-        Sessions.withGapDuration(Duration.millis(10)));
+        Sessions.withGapDuration(Duration.millis(100)));
+
+    tester.injectElements(
+        1, // [1, 101)
+        50); // [50, 150)
+    tester.mergeWindows();
 
-    tester.injectElements(1, 9, 7);
+    IntervalWindow firstWindow = new IntervalWindow(new Instant(1), new Instant(101));
+    IntervalWindow secondWindow = new IntervalWindow(new Instant(50), new Instant(150));
+    IntervalWindow mergedWindow = new IntervalWindow(new Instant(1), new Instant(150));
 
-    tester.advanceInputWatermark(new Instant(20));
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
+    // Not ready in any window yet
+    tester.advanceInputWatermark(new Instant(100));
+    assertFalse(tester.shouldFire(firstWindow));
+    assertFalse(tester.shouldFire(secondWindow));
+    assertFalse(tester.shouldFire(mergedWindow));
+
+    // The first window is "ready": the caller owns knowledge of which windows are merged away
+    tester.advanceInputWatermark(new Instant(149));
+    assertTrue(tester.shouldFire(firstWindow));
+    assertFalse(tester.shouldFire(secondWindow));
+    assertFalse(tester.shouldFire(mergedWindow));
+
+    // Now ready on all windows
+    tester.advanceInputWatermark(new Instant(150));
+    assertTrue(tester.shouldFire(firstWindow));
+    assertTrue(tester.shouldFire(secondWindow));
+    assertTrue(tester.shouldFire(mergedWindow));
+
+    // Ensure it repeats
+    tester.fireIfShouldFire(mergedWindow);
+    assertTrue(tester.shouldFire(mergedWindow));
+
+    assertFalse(tester.isMarkedFinished(mergedWindow));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
index 2660074a473ce..a416e6042506b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTriggerTest.java
@@ -16,18 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static org.hamcrest.Matchers.contains;
-import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.everyItem;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.TriggerTester.SimpleTriggerTester;
 
@@ -36,159 +29,142 @@
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
-import org.mockito.Mock;
-import org.mockito.Mockito;
-import org.mockito.MockitoAnnotations;
 
 /**
  * Tests for {@link OrFinallyTrigger}.
  */
 @RunWith(JUnit4.class)
 public class OrFinallyTriggerTest {
-  @Mock private Trigger<IntervalWindow> mockActual;
-  @Mock private OnceTrigger<IntervalWindow> mockUntil;
 
   private SimpleTriggerTester<IntervalWindow> tester;
-  private IntervalWindow firstWindow;
-
-  public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
-    MockitoAnnotations.initMocks(this);
-
-    Trigger<IntervalWindow> underTest =
-        new OrFinallyTrigger<IntervalWindow>(mockActual, mockUntil);
-
-    tester = TriggerTester.forTrigger(
-        underTest, windowFn);
-    firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
-  }
-
-  private void injectElement(int element, TriggerResult result1, TriggerResult result2)
-      throws Exception {
-    if (result1 != null) {
-      when(mockActual.onElement(
-          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-          .thenReturn(result1);
-    }
-    if (result2 != null) {
-      when(mockUntil.onElement(
-          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-          .thenReturn(result2);
-    }
-    tester.injectElements(element);
-  }
-
-  @Test
-  public void testOnElementActualFires() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.FIRE, TriggerResult.CONTINUE);
-    injectElement(2, TriggerResult.FIRE_AND_FINISH, TriggerResult.CONTINUE);
-    assertThat(tester.getResultSequence(),
-        contains(TriggerResult.FIRE, TriggerResult.FIRE_AND_FINISH));
-
-    // This should do nothing (we've already fired and finished)
-    injectElement(3, TriggerResult.FIRE, TriggerResult.FIRE_AND_FINISH);
-    assertThat(tester.getResultSequence(),
-        contains(TriggerResult.FIRE, TriggerResult.FIRE_AND_FINISH));
-
-    assertTrue(tester.isMarkedFinished(firstWindow));
-  }
-
-  @Test
-  public void testOnElementUntilFires() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
-
-    injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
-
-    assertTrue(tester.isMarkedFinished(firstWindow));
-  }
 
+  /**
+   * Tests that for {@code OrFinally(actual, ...)} when {@code actual}
+   * fires and finishes, the {@code OrFinally} also fires and finishes.
+   */
   @Test
-  public void testOnElementUntilFiresAndFinishes() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
+  public void testActualFiresAndFinishes() throws Exception {
+    tester = TriggerTester.forTrigger(
+        new OrFinallyTrigger<>(
+            AfterPane.<IntervalWindow>elementCountAtLeast(2),
+            AfterPane.<IntervalWindow>elementCountAtLeast(100)),
+        FixedWindows.of(Duration.millis(100)));
 
-    injectElement(2, TriggerResult.CONTINUE, TriggerResult.FIRE_AND_FINISH);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
+    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(100));
 
-    assertTrue(tester.isMarkedFinished(firstWindow));
+    // Not yet firing
+    tester.injectElements(1);
+    assertFalse(tester.shouldFire(window));
+    assertFalse(tester.isMarkedFinished(window));
+
+    // The actual fires and finishes
+    tester.injectElements(2);
+    assertTrue(tester.shouldFire(window));
+    tester.fireIfShouldFire(window);
+    assertTrue(tester.isMarkedFinished(window));
   }
 
+  /**
+   * Tests that for {@code OrFinally(actual, ...)} when {@code actual}
+   * fires but does not finish, the {@code OrFinally} also fires and also does not
+   * finish.
+   */
   @Test
-  public void testOnTimerFinishesUntil() throws Exception {
-    setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
-
-    // Timer fires for until, which says continue
-    when(mockUntil.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    when(mockActual.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
-
-    injectElement(2, TriggerResult.FIRE, TriggerResult.CONTINUE);
-
-    injectElement(3, TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+  public void testActualFiresOnly() throws Exception {
+    tester = TriggerTester.forTrigger(
+        new OrFinallyTrigger<>(
+            Repeatedly.forever(AfterPane.<IntervalWindow>elementCountAtLeast(2)),
+            AfterPane.<IntervalWindow>elementCountAtLeast(100)),
+        FixedWindows.of(Duration.millis(100)));
 
-    // Timer fires for until, which says FIRE, so we fire and finish
-    when(mockUntil.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.FIRE_AND_FINISH);
-    tester.fireTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME);
+    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(100));
 
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE_AND_FINISH));
-    assertTrue(tester.isMarkedFinished(firstWindow));
+    // Not yet firing
+    tester.injectElements(1);
+    assertFalse(tester.shouldFire(window));
+    assertFalse(tester.isMarkedFinished(window));
+
+    // The actual fires but does not finish
+    tester.injectElements(2);
+    assertTrue(tester.shouldFire(window));
+    tester.fireIfShouldFire(window);
+    assertFalse(tester.isMarkedFinished(window));
+
+    // And again
+    tester.injectElements(3, 4);
+    assertTrue(tester.shouldFire(window));
+    tester.fireIfShouldFire(window);
+    assertFalse(tester.isMarkedFinished(window));
   }
 
+  /**
+   * Tests that if the first trigger rewinds to be non-finished in the merged window,
+   * then it becomes the currently active trigger again, with real triggers.
+   */
   @Test
-  public void testMergeActualFires() throws Exception {
-    setUp(Sessions.withGapDuration(Duration.millis(10)));
-
-    when(mockActual.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    when(mockUntil.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-
-    tester.injectElements(1, 12);
+  public void testShouldFireAfterMerge() throws Exception {
+    tester = TriggerTester.forTrigger(
+        AfterEach.inOrder(
+            AfterPane.<IntervalWindow>elementCountAtLeast(5)
+                .orFinally(AfterWatermark.<IntervalWindow>pastEndOfWindow()),
+            Repeatedly.forever(AfterPane.<IntervalWindow>elementCountAtLeast(1))),
+        Sessions.withGapDuration(Duration.millis(10)));
 
-    when(mockActual.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
-        .thenReturn(MergeResult.FIRE);
-    when(mockUntil.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
-        .thenReturn(MergeResult.CONTINUE);
+    // Finished the orFinally in the first window
+    tester.injectElements(1);
+    IntervalWindow firstWindow = new IntervalWindow(new Instant(1), new Instant(11));
+    assertFalse(tester.shouldFire(firstWindow));
+    tester.advanceInputWatermark(new Instant(11));
+    assertTrue(tester.shouldFire(firstWindow));
+    tester.fireIfShouldFire(firstWindow);
 
+    // Set up second window where it is not done
     tester.injectElements(5);
+    IntervalWindow secondWindow = new IntervalWindow(new Instant(5), new Instant(15));
+    assertFalse(tester.shouldFire(secondWindow));
+
+    // Merge them, if the merged window were on the second trigger, it would be ready
     tester.mergeWindows();
+    IntervalWindow mergedWindow = new IntervalWindow(new Instant(1), new Instant(15));
+    assertFalse(tester.shouldFire(mergedWindow));
 
-    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE));
+    // Now adding 3 more makes the main trigger ready to fire
+    tester.injectElements(1, 2, 3, 4, 5);
+    tester.mergeWindows();
+    assertTrue(tester.shouldFire(mergedWindow));
   }
 
+  /**
+   * Tests that for {@code OrFinally(actual, until)} when {@code actual}
+   * fires but does not finish, then {@code until} fires and finishes, the
+   * whole thing fires and finished.
+   */
   @Test
-  public void testMergeUntilFires() throws Exception {
-    setUp(Sessions.withGapDuration(Duration.millis(10)));
-
-    when(mockActual.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    when(mockUntil.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    tester.injectElements(1, 12);
+  public void testActualFiresButUntilFinishes() throws Exception {
+    tester = TriggerTester.forTrigger(
+        new OrFinallyTrigger<IntervalWindow>(
+            Repeatedly.forever(AfterPane.<IntervalWindow>elementCountAtLeast(2)),
+                AfterPane.<IntervalWindow>elementCountAtLeast(3)),
+        FixedWindows.of(Duration.millis(10)));
 
-    when(mockActual.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
-        .thenReturn(MergeResult.CONTINUE);
-    when(mockUntil.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
-        .thenReturn(MergeResult.FIRE_AND_FINISH);
+    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
 
-    tester.injectElements(5);
-    tester.mergeWindows();
-
-    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
-    // the until fired during the merge
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(22))));
+    // Before any firing
+    tester.injectElements(1);
+    assertFalse(tester.shouldFire(window));
+    assertFalse(tester.isMarkedFinished(window));
+
+    // The actual fires but doesn't finish
+    tester.injectElements(2);
+    assertTrue(tester.shouldFire(window));
+    tester.fireIfShouldFire(window);
+    assertFalse(tester.isMarkedFinished(window));
+
+    // The until fires and finishes; the trigger is finished
+    tester.injectElements(3);
+    assertTrue(tester.shouldFire(window));
+    tester.fireIfShouldFire(window);
+    assertTrue(tester.isMarkedFinished(window));
   }
 
   @Test
@@ -215,68 +191,6 @@ public void testFireDeadline() throws Exception {
         .getWatermarkThatGuaranteesFiring(window));
   }
 
-  @Test
-  public void testOrFinallyRealTriggersFixedWindow() throws Exception {
-    // Test an orFinally with a composite trigger, and make sure it properly resets state, etc.
-    tester = TriggerTester.forTrigger(Repeatedly.<IntervalWindow>forever(
-        // This element count should never fire because the orFinally fires sooner, every time
-        AfterPane.<IntervalWindow>elementCountAtLeast(12)
-            .orFinally(AfterAll.<IntervalWindow>of(
-                AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
-                    .plusDelayOf(Duration.millis(5)),
-                AfterPane.<IntervalWindow>elementCountAtLeast(5)))),
-        FixedWindows.of(Duration.millis(50)));
-
-    // First, fire processing time then the 5 element
-
-    tester.advanceProcessingTime(new Instant(0));
-    tester.injectElements(0, 0, 1, 1);
-    tester.advanceProcessingTime(new Instant(6));
-    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
-
-    tester.injectElements(1);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-
-    // Then fire 6 new elements, then processing time
-    tester.clearResultSequence();
-    tester.injectElements(2, 3, 4, 5, 2, 3);
-    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
-    tester.clearResultSequence();
-    tester.advanceProcessingTime(new Instant(15));
-
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-
-    // Finally, fire 3 more elements and verify the base of the orFinally doesn't fire.
-    tester.clearResultSequence();
-    tester.injectElements(1, 1, 1);
-    assertThat(tester.getResultSequence(), everyItem(equalTo(TriggerResult.CONTINUE)));
-  }
-
-  @Test
-  public void testOrFinallyMergingWindowSomeFinished() throws Exception {
-    Duration windowDuration = Duration.millis(10);
-    tester = TriggerTester.forTrigger(
-        AfterProcessingTime.<IntervalWindow>pastFirstElementInPane()
-            .plusDelayOf(Duration.millis(5))
-            .orFinally(AfterPane.<IntervalWindow>elementCountAtLeast(5)),
-        Sessions.withGapDuration(windowDuration));
-
-    tester.advanceProcessingTime(new Instant(10));
-    tester.injectElements(
-        1,  // in [1, 11), timer for 15
-        1,  // in [1, 11) count = 1
-        2); // in [2, 12), timer for 16
-
-    // Enough data comes in for 2 that combined, we should fire
-    tester.injectElements(2, 2);
-    tester.mergeWindows();
-
-    // This fires, because the earliest element in [1, 12) arrived at time 10
-    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE_AND_FINISH));
-
-    assertTrue(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(12))));
-  }
-
   @Test
   public void testContinuation() throws Exception {
     OnceTrigger<IntervalWindow> triggerA = AfterProcessingTime.pastFirstElementInPane();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
index 2888f134b5359..f445b52565ed5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/RepeatedlyTest.java
@@ -20,11 +20,10 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.TriggerTester.SimpleTriggerTester;
 
@@ -42,121 +41,58 @@
  */
 @RunWith(JUnit4.class)
 public class RepeatedlyTest {
-  @Mock private Trigger<IntervalWindow> mockRepeated;
 
+  @Mock private Trigger<IntervalWindow> mockTrigger;
   private SimpleTriggerTester<IntervalWindow> tester;
-  private IntervalWindow firstWindow;
-
-  public void setUp(WindowFn<?, IntervalWindow> windowFn) throws Exception {
-    MockitoAnnotations.initMocks(this);
-    Trigger<IntervalWindow> underTest = Repeatedly.forever(mockRepeated);
-    tester = TriggerTester.forTrigger(
-        underTest, windowFn);
-    firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
+  private static Trigger<IntervalWindow>.TriggerContext anyTriggerContext() {
+    return Mockito.<Trigger<IntervalWindow>.TriggerContext>any();
   }
 
-  private void injectElement(int element, TriggerResult result1)
-      throws Exception {
-    if (result1 != null) {
-      when(mockRepeated.onElement(
-          Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-          .thenReturn(result1);
-    }
-
-    tester.injectElements(element);
+  public void setUp(WindowFn<Object, IntervalWindow> windowFn) throws Exception {
+    MockitoAnnotations.initMocks(this);
+    tester = TriggerTester.forTrigger(Repeatedly.forever(mockTrigger), windowFn);
   }
 
+  /**
+   * Tests that onElement correctly passes the data on to the subtrigger.
+   */
   @Test
   public void testOnElement() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
-
-    injectElement(1, TriggerResult.CONTINUE);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
-
-    injectElement(2, TriggerResult.FIRE);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-
-    injectElement(3, TriggerResult.FIRE_AND_FINISH);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-
-    injectElement(4, TriggerResult.CONTINUE);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
-
-    assertFalse(tester.isMarkedFinished(firstWindow));
+    tester.injectElements(37);
+    verify(mockTrigger).onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any());
   }
 
+  /**
+   * Tests that the repeatedly is ready to fire whenever the subtrigger is ready.
+   */
   @Test
-  public void testOnElementTimerFires() throws Exception {
+  public void testShouldFire() throws Exception {
     setUp(FixedWindows.of(Duration.millis(10)));
 
-    injectElement(1, TriggerResult.CONTINUE);
-
-    when(mockRepeated.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.FIRE);
-    tester.fireTimer(firstWindow, new Instant(11), TimeDomain.EVENT_TIME);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-
-    injectElement(2, TriggerResult.CONTINUE);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
-
-    when(mockRepeated.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.FIRE_AND_FINISH);
-    tester.fireTimer(firstWindow, new Instant(12), TimeDomain.EVENT_TIME);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-
-    injectElement(3, TriggerResult.CONTINUE);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
-
-    when(mockRepeated.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    tester.fireTimer(firstWindow, new Instant(13), TimeDomain.EVENT_TIME);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
-
-    injectElement(4, TriggerResult.CONTINUE);
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.CONTINUE));
-
-    when(mockRepeated.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.FIRE);
-    tester.fireTimer(firstWindow, new Instant(14), TimeDomain.EVENT_TIME);
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    assertTrue(tester.shouldFire(new IntervalWindow(new Instant(0), new Instant(10))));
 
-    assertThat(tester.getLatestResult(), equalTo(TriggerResult.FIRE));
-    assertFalse(tester.isMarkedFinished(firstWindow));
+    when(mockTrigger.shouldFire(Mockito.<Trigger<IntervalWindow>.TriggerContext>any()))
+        .thenReturn(false);
+    assertFalse(tester.shouldFire(new IntervalWindow(new Instant(0), new Instant(10))));
   }
 
+  /**
+   * Tests that the watermark that guarantees firing is that of the subtrigger.
+   */
   @Test
-  public void testMerge() throws Exception {
-    setUp(Sessions.withGapDuration(Duration.millis(10)));
-
-    when(mockRepeated.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    tester.injectElements(1, 5);
-
-    when(mockRepeated.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
-        .thenReturn(MergeResult.FIRE_AND_FINISH);
-
-    tester.mergeWindows();
+  public void testFireDeadline() throws Exception {
+    setUp(FixedWindows.of(Duration.millis(10)));
+    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
+    Instant arbitraryInstant = new Instant(34957849);
 
-    assertThat(tester.getLatestMergeResult(), equalTo(MergeResult.FIRE));
-    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(1), new Instant(16))));
-  }
+    when(mockTrigger.getWatermarkThatGuaranteesFiring(Mockito.<IntervalWindow>any()))
+        .thenReturn(arbitraryInstant);
 
-  @Test
-  public void testFireDeadline() throws Exception {
-    BoundedWindow window = new IntervalWindow(new Instant(0), new Instant(10));
-
-    assertEquals(new Instant(9),
-        Repeatedly.forever(AfterWatermark.pastEndOfWindow())
-        .getWatermarkThatGuaranteesFiring(window));
-    assertEquals(new Instant(9), Repeatedly.forever(AfterWatermark.pastEndOfWindow())
-        .orFinally(AfterPane.elementCountAtLeast(1))
-        .getWatermarkThatGuaranteesFiring(window));
-    assertEquals(new Instant(9), Repeatedly.forever(AfterPane.elementCountAtLeast(1))
-        .orFinally(AfterWatermark.pastEndOfWindow())
-        .getWatermarkThatGuaranteesFiring(window));
-    assertEquals(BoundedWindow.TIMESTAMP_MAX_VALUE,
-        Repeatedly.forever(AfterPane.elementCountAtLeast(1))
-        .orFinally(AfterPane.elementCountAtLeast(10))
-        .getWatermarkThatGuaranteesFiring(window));
+    assertThat(
+        Repeatedly.forever(mockTrigger).getWatermarkThatGuaranteesFiring(window),
+        equalTo(arbitraryInstant));
   }
 
   @Test
@@ -164,10 +100,29 @@ public void testContinuation() throws Exception {
     Trigger<IntervalWindow> trigger = AfterProcessingTime.pastFirstElementInPane();
     Trigger<IntervalWindow> repeatedly = Repeatedly.forever(trigger);
     assertEquals(
-        Repeatedly.forever(trigger.getContinuationTrigger()),
-        repeatedly.getContinuationTrigger());
+        Repeatedly.forever(trigger.getContinuationTrigger()), repeatedly.getContinuationTrigger());
     assertEquals(
         Repeatedly.forever(trigger.getContinuationTrigger().getContinuationTrigger()),
         repeatedly.getContinuationTrigger().getContinuationTrigger());
   }
+
+  @Test
+  public void testShouldFireAfterMerge() throws Exception {
+    tester = TriggerTester.forTrigger(
+        Repeatedly.forever(AfterPane.<IntervalWindow>elementCountAtLeast(2)),
+        Sessions.withGapDuration(Duration.millis(10)));
+
+    tester.injectElements(1);
+    IntervalWindow firstWindow = new IntervalWindow(new Instant(1), new Instant(11));
+    assertFalse(tester.shouldFire(firstWindow));
+
+    tester.injectElements(5);
+    IntervalWindow secondWindow = new IntervalWindow(new Instant(5), new Instant(15));
+    assertFalse(tester.shouldFire(secondWindow));
+
+    // Merge them, if the merged window were on the second trigger, it would be ready
+    tester.mergeWindows();
+    IntervalWindow mergedWindow = new IntervalWindow(new Instant(1), new Instant(15));
+    assertTrue(tester.shouldFire(mergedWindow));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
index 4d4b4dc2049bf..b15a6037d7415 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
@@ -67,19 +67,10 @@ protected TestTimeTrigger newWith(List<SerializableFunction<Instant, Instant>> t
     }
 
     @Override
-    public TriggerResult onElement(OnElementContext c) throws Exception {
-      return null;
-    }
-
-    @Override
-    public MergeResult onMerge(OnMergeContext c) throws Exception {
-      return null;
-    }
+    public void onElement(OnElementContext c) throws Exception { }
 
     @Override
-    public TriggerResult onTimer(OnTimerContext c) throws Exception {
-      return null;
-    }
+    public void onMerge(OnMergeContext c) throws Exception { }
 
     @Override
     protected Trigger<IntervalWindow> getContinuationTrigger(
@@ -92,5 +83,12 @@ public Instant getWatermarkThatGuaranteesFiring(IntervalWindow window) {
       return null;
     }
 
+    @Override
+    public boolean shouldFire(TriggerContext context) throws Exception {
+      return false;
+    }
+
+    @Override
+    public void onOnlyFiring(TriggerContext context) throws Exception { }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
index 84ecce58aa0dc..ddff33fbda548 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerTest.java
@@ -58,19 +58,10 @@ private Trigger1(List<Trigger<IntervalWindow>> subTriggers) {
     }
 
     @Override
-    public Trigger.TriggerResult onElement(Trigger<IntervalWindow>.OnElementContext c) {
-      return null;
-    }
+    public void onElement(Trigger<IntervalWindow>.OnElementContext c) { }
 
     @Override
-    public Trigger.MergeResult onMerge(Trigger<IntervalWindow>.OnMergeContext c) {
-      return null;
-    }
-
-    @Override
-    public Trigger.TriggerResult onTimer(Trigger<IntervalWindow>.OnTimerContext c) {
-      return null;
-    }
+    public void onMerge(Trigger<IntervalWindow>.OnMergeContext c) { }
 
     @Override
     protected Trigger<IntervalWindow> getContinuationTrigger(
@@ -82,6 +73,14 @@ protected Trigger<IntervalWindow> getContinuationTrigger(
     public Instant getWatermarkThatGuaranteesFiring(IntervalWindow window) {
       return null;
     }
+
+    @Override
+    public boolean shouldFire(Trigger<IntervalWindow>.TriggerContext context) throws Exception {
+      return false;
+    }
+
+    @Override
+    public void onFire(Trigger<IntervalWindow>.TriggerContext context) throws Exception { }
   }
 
   private static class Trigger2 extends Trigger<IntervalWindow> {
@@ -91,19 +90,10 @@ private Trigger2(List<Trigger<IntervalWindow>> subTriggers) {
     }
 
     @Override
-    public Trigger.TriggerResult onElement(Trigger<IntervalWindow>.OnElementContext c) {
-      return null;
-    }
+    public void onElement(Trigger<IntervalWindow>.OnElementContext c) { }
 
     @Override
-    public Trigger.MergeResult onMerge(Trigger<IntervalWindow>.OnMergeContext c) {
-      return null;
-    }
-
-    @Override
-    public Trigger.TriggerResult onTimer(Trigger<IntervalWindow>.OnTimerContext c) {
-      return null;
-    }
+    public void onMerge(Trigger<IntervalWindow>.OnMergeContext c) { }
 
     @Override
     protected Trigger<IntervalWindow> getContinuationTrigger(
@@ -115,5 +105,13 @@ protected Trigger<IntervalWindow> getContinuationTrigger(
     public Instant getWatermarkThatGuaranteesFiring(IntervalWindow window) {
       return null;
     }
+
+    @Override
+    public boolean shouldFire(Trigger<IntervalWindow>.TriggerContext context) throws Exception {
+      return false;
+    }
+
+    @Override
+    public void onFire(Trigger<IntervalWindow>.TriggerContext context) throws Exception { }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternalsTest.java
index 1accd76248516..25d07d62d7929 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternalsTest.java
@@ -45,7 +45,7 @@ public void setUp() {
   }
 
   @Test
-  public void testFiringTimers() {
+  public void testFiringTimers() throws Exception {
     BatchTimerInternals underTest = new BatchTimerInternals(new Instant(0));
     TimerData processingTime1 = TimerData.of(NS1, new Instant(19), TimeDomain.PROCESSING_TIME);
     TimerData processingTime2 = TimerData.of(NS1, new Instant(29), TimeDomain.PROCESSING_TIME);
@@ -74,7 +74,7 @@ public void testFiringTimers() {
   }
 
   @Test
-  public void testTimerOrdering() {
+  public void testTimerOrdering() throws Exception {
     BatchTimerInternals underTest = new BatchTimerInternals(new Instant(0));
     TimerData watermarkTime1 = TimerData.of(NS1, new Instant(19), TimeDomain.EVENT_TIME);
     TimerData processingTime1 = TimerData.of(NS1, new Instant(19), TimeDomain.PROCESSING_TIME);
@@ -98,7 +98,7 @@ public void testTimerOrdering() {
   }
 
   @Test
-  public void testDeduplicate() {
+  public void testDeduplicate() throws Exception {
     BatchTimerInternals underTest = new BatchTimerInternals(new Instant(0));
     TimerData watermarkTime = TimerData.of(NS1, new Instant(19), TimeDomain.EVENT_TIME);
     TimerData processingTime = TimerData.of(NS1, new Instant(19), TimeDomain.PROCESSING_TIME);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
index cafd14a3032d5..7b8466a5faed7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ExecutableTriggerTest.java
@@ -94,23 +94,10 @@ protected StubTrigger(Trigger<IntervalWindow>... subTriggers) {
     }
 
     @Override
-    public TriggerResult onElement(
-        OnElementContext c) throws Exception {
-      return TriggerResult.CONTINUE;
-    }
-
-    @Override
-    public MergeResult onMerge(OnMergeContext c)
-        throws Exception {
-      return MergeResult.CONTINUE;
-    }
+    public void onElement(OnElementContext c) throws Exception { }
 
     @Override
-    public TriggerResult onTimer(
-        OnTimerContext c)
-        throws Exception {
-      return TriggerResult.CONTINUE;
-    }
+    public void onMerge(OnMergeContext c) throws Exception { }
 
     @Override
     public void clear(TriggerContext c) throws Exception {
@@ -131,5 +118,13 @@ public Trigger<IntervalWindow> getContinuationTrigger(
         List<Trigger<IntervalWindow>> continuationTriggers) {
       return this;
     }
+
+    @Override
+    public boolean shouldFire(TriggerContext c) {
+      return false;
+    }
+
+    @Override
+    public void onFire(TriggerContext c) { }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunnerTest.java
index 854a89ae00551..c68d67670cb17 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunnerTest.java
@@ -19,14 +19,14 @@
 import static com.google.cloud.dataflow.sdk.WindowMatchers.isSingleWindowedValue;
 import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.emptyIterable;
 import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.doAnswer;
-import static org.mockito.Mockito.spy;
-import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.doNothing;
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.WindowMatchers;
@@ -45,13 +45,10 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 
-import org.hamcrest.Matchers;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Before;
@@ -78,6 +75,13 @@ public class ReduceFnRunnerTest {
   private Trigger<IntervalWindow> mockTrigger;
   private IntervalWindow firstWindow;
 
+  private static Trigger<IntervalWindow>.TriggerContext anyTriggerContext() {
+    return Mockito.<Trigger<IntervalWindow>.TriggerContext>any();
+  }
+  private static Trigger<IntervalWindow>.OnElementContext anyElementContext() {
+    return Mockito.<Trigger<IntervalWindow>.OnElementContext>any();
+  }
+
   @Before
   public void setUp() {
     MockitoAnnotations.initMocks(this);
@@ -85,13 +89,26 @@ public void setUp() {
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
-  private void injectElement(ReduceFnTester<Integer, ?, IntervalWindow> tester, int element,
-      TriggerResult result) throws Exception {
-    when(mockTrigger.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-        .thenReturn(result);
+  private void injectElement(ReduceFnTester<Integer, ?, IntervalWindow> tester, int element)
+      throws Exception {
+    doNothing().when(mockTrigger).onElement(anyElementContext());
     tester.injectElements(TimestampedValue.of(element, new Instant(element)));
   }
 
+  private void triggerShouldFinish(Trigger<IntervalWindow> mockTrigger) throws Exception {
+    doAnswer(new Answer<Void>() {
+      @Override
+      public Void answer(InvocationOnMock invocation) throws Exception {
+        @SuppressWarnings("unchecked")
+        Trigger<IntervalWindow>.TriggerContext context =
+            (Trigger<IntervalWindow>.TriggerContext) invocation.getArguments()[0];
+        context.trigger().setFinished(true);
+        return null;
+      }
+    })
+    .when(mockTrigger).onFire(anyTriggerContext());
+ }
+
   @Test
   public void testOnElementBufferingDiscarding() throws Exception {
     // Test basic execution of a trigger using a non-combining window set and discarding mode.
@@ -99,22 +116,25 @@ public void testOnElementBufferingDiscarding() throws Exception {
         ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
             AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(100));
 
-    injectElement(tester, 1, TriggerResult.CONTINUE);
-    injectElement(tester, 2, TriggerResult.FIRE);
-
-    injectElement(tester, 3, TriggerResult.FIRE_AND_FINISH);
-
-    // This element shouldn't be seen, because the trigger has finished
-    injectElement(tester, 4, null);
-
-    assertThat(
-        tester.extractOutput(),
-        Matchers.contains(
-            isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
-            isSingleWindowedValue(Matchers.containsInAnyOrder(3), 3, 0, 10)));
+    // Pane of {1, 2}
+    injectElement(tester, 1);
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    injectElement(tester, 2);
+    assertThat(tester.extractOutput(),
+        contains(isSingleWindowedValue(containsInAnyOrder(1, 2), 1, 0, 10)));
+
+    // Pane of just 3, and finish
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    triggerShouldFinish(mockTrigger);
+    injectElement(tester, 3);
+    assertThat(tester.extractOutput(),
+            contains(isSingleWindowedValue(containsInAnyOrder(3), 3, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
     tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
 
+    // This element shouldn't be seen, because the trigger has finished
+    injectElement(tester, 4);
+
     assertEquals(1, tester.getElementsDroppedDueToClosedWindow());
     assertEquals(0, tester.getElementsDroppedDueToLateness());
   }
@@ -126,18 +146,25 @@ public void testOnElementBufferingAccumulating() throws Exception {
         ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
             AccumulationMode.ACCUMULATING_FIRED_PANES, Duration.millis(100));
 
-    injectElement(tester, 1, TriggerResult.CONTINUE);
-    injectElement(tester, 2, TriggerResult.FIRE);
-    injectElement(tester, 3, TriggerResult.FIRE_AND_FINISH);
+    injectElement(tester, 1);
+
+    // Fires {1, 2}
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    injectElement(tester, 2);
+
+    // Fires {1, 2, 3} because we are in accumulating mode
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    triggerShouldFinish(mockTrigger);
+    injectElement(tester, 3);
 
     // This element shouldn't be seen, because the trigger has finished
-    injectElement(tester, 4, null);
+    injectElement(tester, 4);
 
     assertThat(
         tester.extractOutput(),
-        Matchers.contains(
-            isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10),
-            isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 3, 0, 10)));
+        contains(
+            isSingleWindowedValue(containsInAnyOrder(1, 2), 1, 0, 10),
+            isSingleWindowedValue(containsInAnyOrder(1, 2, 3), 3, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
     tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
@@ -149,18 +176,23 @@ public void testOnElementCombiningDiscarding() throws Exception {
         FixedWindows.of(Duration.millis(10)), mockTrigger, AccumulationMode.DISCARDING_FIRED_PANES,
         new Sum.SumIntegerFn().<String>asKeyedFn(), VarIntCoder.of(), Duration.millis(100));
 
-    injectElement(tester, 2, TriggerResult.CONTINUE);
-    injectElement(tester, 3, TriggerResult.FIRE);
-    injectElement(tester, 4, TriggerResult.FIRE_AND_FINISH);
+    injectElement(tester, 2);
+
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    injectElement(tester, 3);
+
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    triggerShouldFinish(mockTrigger);
+    injectElement(tester, 4);
 
     // This element shouldn't be seen, because the trigger has finished
-    injectElement(tester, 6, null);
+    injectElement(tester, 6);
 
     assertThat(
         tester.extractOutput(),
-        Matchers.contains(
-            isSingleWindowedValue(Matchers.equalTo(5), 2, 0, 10),
-            isSingleWindowedValue(Matchers.equalTo(4), 4, 0, 10)));
+        contains(
+            isSingleWindowedValue(equalTo(5), 2, 0, 10),
+            isSingleWindowedValue(equalTo(4), 4, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
     tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
@@ -173,18 +205,23 @@ public void testOnElementCombiningAccumulating() throws Exception {
             AccumulationMode.ACCUMULATING_FIRED_PANES, new Sum.SumIntegerFn().<String>asKeyedFn(),
             VarIntCoder.of(), Duration.millis(100));
 
-    injectElement(tester, 1, TriggerResult.CONTINUE);
-    injectElement(tester, 2, TriggerResult.FIRE);
-    injectElement(tester, 3, TriggerResult.FIRE_AND_FINISH);
+    injectElement(tester, 1);
+
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    injectElement(tester, 2);
+
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    triggerShouldFinish(mockTrigger);
+    injectElement(tester, 3);
 
     // This element shouldn't be seen, because the trigger has finished
-    injectElement(tester, 4, null);
+    injectElement(tester, 4);
 
     assertThat(
         tester.extractOutput(),
-        Matchers.contains(
-            isSingleWindowedValue(Matchers.equalTo(3), 1, 0, 10),
-            isSingleWindowedValue(Matchers.equalTo(6), 3, 0, 10)));
+        contains(
+            isSingleWindowedValue(equalTo(3), 1, 0, 10),
+            isSingleWindowedValue(equalTo(6), 3, 0, 10)));
     assertTrue(tester.isMarkedFinished(firstWindow));
     tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
@@ -201,11 +238,19 @@ public void testWatermarkHoldAndLateData() throws Exception {
     assertEquals(null, tester.getOutputWatermark());
 
     // All on time data, verify watermark hold.
-    injectElement(tester, 1, TriggerResult.CONTINUE);
-    injectElement(tester, 3, TriggerResult.CONTINUE);
+    injectElement(tester, 1);
+    injectElement(tester, 3);
     assertEquals(new Instant(1), tester.getWatermarkHold());
-    injectElement(tester, 2, TriggerResult.FIRE);
-    assertEquals(1, tester.getOutputSize());
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    injectElement(tester, 2);
+    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
+    assertThat(output, contains(
+        isSingleWindowedValue(containsInAnyOrder(1, 2, 3),
+            1, // timestamp
+            0, // window start
+            10))); // window end
+    assertThat(output.get(0).getPane(),
+        equalTo(PaneInfo.createPane(true, false, Timing.EARLY, 0, -1)));
 
     // Holding for the end-of-window transition.
     assertEquals(new Instant(9), tester.getWatermarkHold());
@@ -213,51 +258,90 @@ public void testWatermarkHoldAndLateData() throws Exception {
     assertEquals(0, tester.getElementsDroppedDueToLateness());
     assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
 
-    // Input watermark -> 4
+    // Input watermark -> 4, output watermark should advance that far as well
     tester.advanceInputWatermark(new Instant(4));
     assertEquals(new Instant(4), tester.getOutputWatermark());
 
     // Some late, some on time. Verify that we only hold to the minimum of on-time.
-    injectElement(tester, 2, TriggerResult.CONTINUE);
-    injectElement(tester, 3, TriggerResult.CONTINUE);
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(false);
+    tester.advanceInputWatermark(new Instant(4));
+    injectElement(tester, 2);
+    injectElement(tester, 3);
     assertEquals(new Instant(9), tester.getWatermarkHold());
-    injectElement(tester, 5, TriggerResult.CONTINUE);
+    injectElement(tester, 5);
     assertEquals(new Instant(5), tester.getWatermarkHold());
-    injectElement(tester, 4, TriggerResult.FIRE);
-    assertEquals(2, tester.getOutputSize());
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    injectElement(tester, 4);
+    output = tester.extractOutput();
+    assertThat(output,
+        contains(
+            isSingleWindowedValue(containsInAnyOrder(
+                1, 2, 3, // earlier firing
+                2, 3, 4, 5), // new elements
+            4, // timestamp
+            0, // window start
+            10))); // window end
+    assertThat(output.get(0).getPane(),
+        equalTo(PaneInfo.createPane(false, false, Timing.EARLY, 1, -1)));
 
     // All late -- output at end of window timestamp.
-    when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-    // Input watermark -> 8
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(false);
     tester.advanceInputWatermark(new Instant(8));
-    assertEquals(new Instant(8), tester.getOutputWatermark());
-    injectElement(tester, 6, TriggerResult.CONTINUE);
-    injectElement(tester, 5, TriggerResult.CONTINUE);
+    injectElement(tester, 6);
+    injectElement(tester, 5);
     assertEquals(new Instant(9), tester.getWatermarkHold());
-    injectElement(tester, 4, TriggerResult.CONTINUE);
+    injectElement(tester, 4);
 
-    // This is behind both the input and output watermarks, but will still make it
-    // into an ON_TIME pane.
-    when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.FIRE);
-    // Input watermark -> 10
+    // Fire the ON_TIME pane
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
     tester.advanceInputWatermark(new Instant(10));
-    assertEquals(3, tester.getOutputSize());
-    assertEquals(new Instant(10), tester.getOutputWatermark());
-    injectElement(tester, 8, TriggerResult.CONTINUE);
 
+    // Output time is end of the window, because all the new data was late, but the pane
+    // is the ON_TIME pane.
+    output = tester.extractOutput();
+    assertThat(output,
+        contains(isSingleWindowedValue(
+            containsInAnyOrder(1, 2, 3, // earlier firing
+                2, 3, 4, 5, // earlier firing
+                4, 5, 6), // new elements
+            9, // timestamp
+            0, // window start
+            10))); // window end
+    assertThat(output.get(0).getPane(),
+        equalTo(PaneInfo.createPane(false, false, Timing.ON_TIME, 2, 0)));
+
+    // This is "pending" at the time the watermark makes it way-late.
+    // Because we're about to expire the window, we output it.
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(false);
+    injectElement(tester, 8);
     assertEquals(0, tester.getElementsDroppedDueToLateness());
     assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
 
-    // All very late -- gets dropped.
-    // Input watermark -> 50
+    // Exceed the GC limit, triggering the last pane to be fired
     tester.advanceInputWatermark(new Instant(50));
+    output = tester.extractOutput();
+    // Output time is still end of the window, because the new data (8) was behind
+    // the output watermark.
+    assertThat(output,
+        contains(isSingleWindowedValue(
+            containsInAnyOrder(1, 2, 3, // earlier firing
+                2, 3, 4, 5, // earlier firing
+                4, 5, 6, // earlier firing
+                8), // new element prior to window becoming expired
+            9, // timestamp
+            0, // window start
+            10))); // window end
+    assertThat(
+        output.get(0).getPane(),
+        equalTo(PaneInfo.createPane(false, true, Timing.LATE, 3, 1)));
+
+    // All very late -- gets dropped, because the window is expired
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
     assertEquals(new Instant(50), tester.getOutputWatermark());
     assertEquals(null, tester.getWatermarkHold());
-    injectElement(tester, 22, TriggerResult.FIRE);
-    assertEquals(4, tester.getOutputSize());
+    injectElement(tester, 2);
     assertEquals(null, tester.getWatermarkHold());
+    assertThat(tester.extractOutput(), emptyIterable());
 
     assertEquals(1, tester.getElementsDroppedDueToLateness());
     assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
@@ -266,36 +350,6 @@ public void testWatermarkHoldAndLateData() throws Exception {
     tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)), new Instant(12),
         TimeDomain.EVENT_TIME);
 
-    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
-    assertThat(
-        output, Matchers.contains(
-                    isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2, 3), 1, 0, 10),
-                    isSingleWindowedValue(
-                        Matchers.containsInAnyOrder(1, 2, 3, 2, 3, 4, 5), 4, 0, 10),
-                    // Output time is end of the window, because all the new data was late
-                    isSingleWindowedValue(
-                        Matchers.containsInAnyOrder(1, 2, 3, 2, 3, 4, 5, 4, 5, 6), 9, 0, 10),
-                    // Output time is still end of the window, because the new data (8) was behind
-                    // the output watermark.
-                    isSingleWindowedValue(
-                        Matchers.containsInAnyOrder(1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 8), 9, 0, 10)));
-
-    assertThat(
-        output.get(0).getPane(),
-        Matchers.equalTo(PaneInfo.createPane(true, false, Timing.EARLY, 0, -1)));
-
-    assertThat(
-        output.get(1).getPane(),
-        Matchers.equalTo(PaneInfo.createPane(false, false, Timing.EARLY, 1, -1)));
-
-    assertThat(
-        output.get(2).getPane(),
-        Matchers.equalTo(PaneInfo.createPane(false, false, Timing.ON_TIME, 2, 0)));
-
-    assertThat(
-        output.get(3).getPane(),
-        Matchers.equalTo(PaneInfo.createPane(false, true, Timing.LATE, 3, 1)));
-
     // And because we're past the end of window + allowed lateness, everything should be cleaned up.
     assertFalse(tester.isMarkedFinished(firstWindow));
     tester.assertHasOnlyGlobalAndFinishedSetsFor();
@@ -307,40 +361,35 @@ public void testPaneInfoAllStates() throws Exception {
         ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
             AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(100));
 
-    when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
-
     tester.advanceInputWatermark(new Instant(0));
-    injectElement(tester, 1, TriggerResult.FIRE);
-    assertThat(
-        tester.extractOutput(),
-        Matchers.contains(
-            WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, false, Timing.EARLY))));
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    injectElement(tester, 1);
+    assertThat(tester.extractOutput(), contains(
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, false, Timing.EARLY))));
 
-    injectElement(tester, 2, TriggerResult.FIRE);
-    assertThat(
-        tester.extractOutput(),
-        Matchers.contains(WindowMatchers.valueWithPaneInfo(
-            PaneInfo.createPane(false, false, Timing.EARLY, 1, -1))));
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    injectElement(tester, 2);
+    assertThat(tester.extractOutput(), contains(
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.EARLY, 1, -1))));
 
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(false);
     tester.advanceInputWatermark(new Instant(15));
-    injectElement(tester, 3, TriggerResult.FIRE);
-    assertThat(
-        tester.extractOutput(),
-        Matchers.contains(WindowMatchers.valueWithPaneInfo(
-            PaneInfo.createPane(false, false, Timing.EARLY, 2, -1))));
-
-    injectElement(tester, 4, TriggerResult.FIRE);
-    assertThat(
-        tester.extractOutput(),
-        Matchers.contains(WindowMatchers.valueWithPaneInfo(
-            PaneInfo.createPane(false, false, Timing.EARLY, 3, -1))));
-
-    injectElement(tester, 5, TriggerResult.FIRE_AND_FINISH);
-    assertThat(
-        tester.extractOutput(),
-        Matchers.contains(WindowMatchers.valueWithPaneInfo(
-            PaneInfo.createPane(false, true, Timing.EARLY, 4, -1))));
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    injectElement(tester, 3);
+    assertThat(tester.extractOutput(), contains(
+        // This is late, because the trigger wasn't waiting for AfterWatermark
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.EARLY, 2, -1))));
+
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    injectElement(tester, 4);
+    assertThat(tester.extractOutput(), contains(
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.EARLY, 3, -1))));
+
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    triggerShouldFinish(mockTrigger);
+    injectElement(tester, 5);
+    assertThat(tester.extractOutput(), contains(
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.EARLY, 4, -1))));
   }
 
   @Test
@@ -361,12 +410,12 @@ public void testPaneInfoAllStatesAfterWatermark() throws Exception {
     List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
     assertThat(
         output,
-        Matchers.contains(WindowMatchers.valueWithPaneInfo(
+        contains(WindowMatchers.valueWithPaneInfo(
             PaneInfo.createPane(true, false, Timing.EARLY, 0, -1))));
     assertThat(
         output,
-        Matchers.contains(
-            WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+        contains(
+            WindowMatchers.isSingleWindowedValue(containsInAnyOrder(1, 2), 1, 0, 10)));
 
     tester.advanceInputWatermark(new Instant(50));
 
@@ -375,24 +424,24 @@ public void testPaneInfoAllStatesAfterWatermark() throws Exception {
     output = tester.extractOutput();
     assertThat(
         output,
-        Matchers.contains(WindowMatchers.valueWithPaneInfo(
+        contains(WindowMatchers.valueWithPaneInfo(
             PaneInfo.createPane(false, false, Timing.ON_TIME, 1, 0))));
     assertThat(
         output,
-        Matchers.contains(
-            WindowMatchers.isSingleWindowedValue(Matchers.emptyIterable(), 9, 0, 10)));
+        contains(
+            WindowMatchers.isSingleWindowedValue(emptyIterable(), 9, 0, 10)));
 
     // We should get the final pane even though it is empty.
     tester.advanceInputWatermark(new Instant(150));
     output = tester.extractOutput();
     assertThat(
         output,
-        Matchers.contains(
+        contains(
             WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.LATE, 2, 1))));
     assertThat(
         output,
-        Matchers.contains(
-            WindowMatchers.isSingleWindowedValue(Matchers.emptyIterable(), 9, 0, 10)));
+        contains(
+            WindowMatchers.isSingleWindowedValue(emptyIterable(), 9, 0, 10)));
   }
 
   @Test
@@ -413,12 +462,12 @@ public void testPaneInfoAllStatesAfterWatermarkAccumulating() throws Exception {
     List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
     assertThat(
         output,
-        Matchers.contains(WindowMatchers.valueWithPaneInfo(
+        contains(WindowMatchers.valueWithPaneInfo(
             PaneInfo.createPane(true, false, Timing.EARLY, 0, -1))));
     assertThat(
         output,
-        Matchers.contains(
-            WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 1, 0, 10)));
+        contains(
+            WindowMatchers.isSingleWindowedValue(containsInAnyOrder(1, 2), 1, 0, 10)));
 
     tester.advanceInputWatermark(new Instant(50));
 
@@ -427,24 +476,24 @@ public void testPaneInfoAllStatesAfterWatermarkAccumulating() throws Exception {
     output = tester.extractOutput();
     assertThat(
         output,
-        Matchers.contains(WindowMatchers.valueWithPaneInfo(
+        contains(WindowMatchers.valueWithPaneInfo(
             PaneInfo.createPane(false, false, Timing.ON_TIME, 1, 0))));
     assertThat(
         output,
-        Matchers.contains(
-            WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 9, 0, 10)));
+        contains(
+            WindowMatchers.isSingleWindowedValue(containsInAnyOrder(1, 2), 9, 0, 10)));
 
     // We should get the final pane even though it is empty.
     tester.advanceInputWatermark(new Instant(150));
     output = tester.extractOutput();
     assertThat(
         output,
-        Matchers.contains(
+        contains(
             WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.LATE, 2, 1))));
     assertThat(
         output,
-        Matchers.contains(
-            WindowMatchers.isSingleWindowedValue(Matchers.containsInAnyOrder(1, 2), 9, 0, 10)));
+        contains(
+            WindowMatchers.isSingleWindowedValue(containsInAnyOrder(1, 2), 9, 0, 10)));
   }
 
   @Test
@@ -459,16 +508,18 @@ public void testPaneInfoFinalAndOnTime() throws Exception {
             .withClosingBehavior(ClosingBehavior.FIRE_ALWAYS));
 
     tester.advanceInputWatermark(new Instant(0));
+
+    // Should trigger due to element count
     tester.injectElements(
         TimestampedValue.of(1, new Instant(1)), TimestampedValue.of(2, new Instant(2)));
 
     assertThat(
         tester.extractOutput(),
-        Matchers.contains(WindowMatchers.valueWithPaneInfo(
+        contains(WindowMatchers.valueWithPaneInfo(
             PaneInfo.createPane(true, false, Timing.EARLY, 0, -1))));
 
     tester.advanceInputWatermark(new Instant(150));
-    assertThat(tester.extractOutput(), Matchers.contains(
+    assertThat(tester.extractOutput(), contains(
         WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.ON_TIME, 1, 0))));
   }
 
@@ -479,11 +530,11 @@ public void testPaneInfoSkipToFinish() throws Exception {
             AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(100));
 
     tester.advanceInputWatermark(new Instant(0));
-    injectElement(tester, 1, TriggerResult.FIRE_AND_FINISH);
-    assertThat(
-        tester.extractOutput(),
-        Matchers.contains(
-            WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, true, Timing.EARLY))));
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    triggerShouldFinish(mockTrigger);
+    injectElement(tester, 1);
+    assertThat(tester.extractOutput(), contains(
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, true, Timing.EARLY))));
   }
 
   @Test
@@ -493,11 +544,11 @@ public void testPaneInfoSkipToNonSpeculativeAndFinish() throws Exception {
             AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(100));
 
     tester.advanceInputWatermark(new Instant(15));
-    injectElement(tester, 1, TriggerResult.FIRE_AND_FINISH);
-    assertThat(
-        tester.extractOutput(),
-        Matchers.contains(
-            WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, true, Timing.LATE))));
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    triggerShouldFinish(mockTrigger);
+    injectElement(tester, 1);
+    assertThat(tester.extractOutput(), contains(
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, true, Timing.LATE))));
   }
 
   @Test
@@ -509,24 +560,24 @@ public void testMergeBeforeFinalizing() throws Exception {
             AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(0));
 
     // All on time data, verify watermark hold.
-    when(mockTrigger.onMerge(Mockito.<Trigger<IntervalWindow>.OnMergeContext>any()))
-        .thenReturn(MergeResult.CONTINUE);
-    when(mockTrigger.onElement(Mockito.<Trigger<IntervalWindow>.OnElementContext>any()))
-        .thenReturn(TriggerResult.CONTINUE, TriggerResult.CONTINUE);
+    // These two windows should pre-merge immediately to [1, 20)
     tester.injectElements(
-        TimestampedValue.of(1, new Instant(1)), TimestampedValue.of(10, new Instant(10)));
-
-    when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
+        TimestampedValue.of(1, new Instant(1)), // in [1, 11)
+        TimestampedValue.of(10, new Instant(10))); // in [10, 20)
 
+    // And this should fire the end-of-window timer
     tester.advanceInputWatermark(new Instant(100));
 
     List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
-    assertThat(output.size(), Matchers.equalTo(1));
-    assertThat(output.get(0), isSingleWindowedValue(Matchers.containsInAnyOrder(1, 10), 1, 1, 20));
+    assertThat(output.size(), equalTo(1));
+    assertThat(output.get(0),
+        isSingleWindowedValue(containsInAnyOrder(1, 10),
+            1, // timestamp
+            1, // window start
+            20)); // window end
     assertThat(
         output.get(0).getPane(),
-        Matchers.equalTo(PaneInfo.createPane(true, true, Timing.EARLY, 0, 0)));
+        equalTo(PaneInfo.createPane(true, true, Timing.ON_TIME, 0, 0)));
   }
 
   /**
@@ -605,7 +656,7 @@ public void testDropDataMultipleWindowsFinishedTrigger() throws Exception {
   }
 
   @Test
-  public void testIdempotentEmptyPanes() throws Exception {
+  public void testIdempotentEmptyPanesDiscarding() throws Exception {
     // Test uninteresting (empty) panes don't increment the index or otherwise
     // modify PaneInfo.
     ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
@@ -613,23 +664,22 @@ public void testIdempotentEmptyPanes() throws Exception {
             AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(100));
 
     // Inject a couple of on-time elements and fire at the window end.
-    injectElement(tester, 1, TriggerResult.CONTINUE);
-    injectElement(tester, 2, TriggerResult.CONTINUE);
-    when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
+    injectElement(tester, 1);
+    injectElement(tester, 2);
     tester.advanceInputWatermark(new Instant(12));
 
-    when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.FIRE);
+    // Fire the on-time pane
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
     tester.fireTimer(firstWindow, new Instant(9), TimeDomain.EVENT_TIME);
 
-    // Fire another timer (with no data, so it's an uninteresting pane).
-    when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.FIRE);
+    // Fire another timer (with no data, so it's an uninteresting pane that should not be output).
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
     tester.fireTimer(firstWindow, new Instant(9), TimeDomain.EVENT_TIME);
 
     // Finish it off with another datum.
-    injectElement(tester, 3, TriggerResult.FIRE_AND_FINISH);
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    triggerShouldFinish(mockTrigger);
+    injectElement(tester, 3);
 
     // The intermediate trigger firing shouldn't result in any output.
     List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
@@ -659,37 +709,36 @@ public void testIdempotentEmptyPanesAccumulating() throws Exception {
             AccumulationMode.ACCUMULATING_FIRED_PANES, Duration.millis(100));
 
     // Inject a couple of on-time elements and fire at the window end.
-    injectElement(tester, 1, TriggerResult.CONTINUE);
-    injectElement(tester, 2, TriggerResult.CONTINUE);
-    when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.CONTINUE);
+    injectElement(tester, 1);
+    injectElement(tester, 2);
     tester.advanceInputWatermark(new Instant(12));
 
-    when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.FIRE);
+    // Trigger the on-time pane
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
     tester.fireTimer(firstWindow, new Instant(9), TimeDomain.EVENT_TIME);
+    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
+    assertThat(output.size(), equalTo(1));
+    assertThat(output.get(0), isSingleWindowedValue(containsInAnyOrder(1, 2), 1, 0, 10));
+    assertThat(output.get(0).getPane(),
+        equalTo(PaneInfo.createPane(true, false, Timing.ON_TIME, 0, 0)));
 
-    // Fire another timer (with no data, so it's an uninteresting pane).
-    when(mockTrigger.onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any()))
-        .thenReturn(TriggerResult.FIRE);
+    // Fire another timer with no data; the empty pane should not be output even though the
+    // trigger is ready to fire
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
     tester.fireTimer(firstWindow, new Instant(9), TimeDomain.EVENT_TIME);
+    assertThat(tester.extractOutput().size(), equalTo(0));
 
-    // Finish it off with another datum.
-    injectElement(tester, 3, TriggerResult.FIRE_AND_FINISH);
-
-    // The intermediate trigger firing shouldn't result in any output.
-    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
-    assertThat(output.size(), equalTo(2));
-
-    // The on-time pane is as expected.
-    assertThat(output.get(0), isSingleWindowedValue(containsInAnyOrder(1, 2), 1, 0, 10));
-    assertThat(
-        output.get(0).getPane(), equalTo(PaneInfo.createPane(true, false, Timing.ON_TIME, 0, 0)));
+    // Finish it off with another datum, which is late
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    triggerShouldFinish(mockTrigger);
+    injectElement(tester, 3);
+    output = tester.extractOutput();
+    assertThat(output.size(), equalTo(1));
 
     // The late pane has the correct indices.
-    assertThat(output.get(1).getValue(), containsInAnyOrder(1, 2, 3));
-    assertThat(
-        output.get(1).getPane(), equalTo(PaneInfo.createPane(false, true, Timing.LATE, 1, 1)));
+    assertThat(output.get(0).getValue(), containsInAnyOrder(1, 2, 3));
+    assertThat(output.get(0).getPane(),
+        equalTo(PaneInfo.createPane(false, true, Timing.LATE, 1, 1)));
 
     assertTrue(tester.isMarkedFinished(firstWindow));
     tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
@@ -698,21 +747,6 @@ public void testIdempotentEmptyPanesAccumulating() throws Exception {
     assertEquals(0, tester.getElementsDroppedDueToLateness());
   }
 
-  private class ResultCaptor<T> implements Answer<T> {
-    private T result = null;
-
-    public T get() {
-      return result;
-    }
-
-    @Override
-    @SuppressWarnings("unchecked")
-    public T answer(InvocationOnMock invocationOnMock) throws Throwable {
-      result = (T) invocationOnMock.callRealMethod();
-      return result;
-    }
-  }
-
   /**
    * Test that we receive an empty on-time pane when an or-finally waiting for the watermark fires.
    * Specifically, verify the proper triggerings and pane-info of a typical speculative/on-time/late
@@ -737,12 +771,17 @@ public void testEmptyOnTimeFromOrFinally() throws Exception {
     tester.advanceInputWatermark(new Instant(0));
     tester.advanceProcessingTime(new Instant(0));
 
-    tester.injectElements(TimestampedValue.of(1, new Instant(1)),
-        TimestampedValue.of(1, new Instant(3)), TimestampedValue.of(1, new Instant(7)),
+    // Processing time timer for 5
+    tester.injectElements(
+        TimestampedValue.of(1, new Instant(1)),
+        TimestampedValue.of(1, new Instant(3)),
+        TimestampedValue.of(1, new Instant(7)),
         TimestampedValue.of(1, new Instant(5)));
 
+    // Should fire early pane
     tester.advanceProcessingTime(new Instant(6));
 
+    // Should fire empty on time pane
     tester.advanceInputWatermark(new Instant(11));
     List<WindowedValue<Integer>> output = tester.extractOutput();
     assertEquals(2, output.size());
@@ -838,29 +877,4 @@ public void testProcessingTime() throws Exception {
         output.get(3),
         WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.LATE, 3, 2)));
   }
-
-  @Test
-  public void testMultipleTimerTypes() throws Exception {
-    Trigger<IntervalWindow> trigger = spy(Repeatedly.forever(AfterFirst.of(
-        AfterProcessingTime.<IntervalWindow>pastFirstElementInPane().plusDelayOf(
-            Duration.millis(10)),
-        AfterWatermark.<IntervalWindow>pastEndOfWindow())));
-
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
-        ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), trigger,
-            AccumulationMode.DISCARDING_FIRED_PANES, Duration.standardDays(1));
-
-    tester.injectElements(TimestampedValue.of(1, new Instant(1)));
-
-    ResultCaptor<TriggerResult> result = new ResultCaptor<>();
-    doAnswer(result)
-        .when(trigger)
-        .onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any());
-    tester.advanceInputWatermark(new Instant(1000));
-    assertEquals(TriggerResult.FIRE, result.get());
-
-    tester.advanceProcessingTime(Instant.now().plus(Duration.millis(10)));
-    // Verify that the only onTimer call was the one from advancing the watermark.
-    verify(trigger).onTimer(Mockito.<Trigger<IntervalWindow>.OnTimerContext>any());
-  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReshuffleTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReshuffleTriggerTest.java
index 4ff5f620f800b..4b3a77ce61c19 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReshuffleTriggerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReshuffleTriggerTest.java
@@ -15,12 +15,15 @@
  */
 package com.google.cloud.dataflow.sdk.util;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 
+import org.joda.time.Duration;
+import org.joda.time.Instant;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -37,17 +40,19 @@ public static <W extends BoundedWindow> ReshuffleTrigger<W> forTest() {
   }
 
   @Test
-  public void testOnElement() {
-    assertEquals(TriggerResult.FIRE, forTest().onElement(null));
+  public void testShouldFire() throws Exception {
+    TriggerTester<Integer, IntervalWindow> tester = TriggerTester.forTrigger(
+        new ReshuffleTrigger<IntervalWindow>(), FixedWindows.of(Duration.millis(100)));
+    IntervalWindow arbitraryWindow = new IntervalWindow(new Instant(300), new Instant(400));
+    assertTrue(tester.shouldFire(arbitraryWindow));
   }
 
   @Test
-  public void testOnMerge() {
-    assertEquals(MergeResult.CONTINUE, forTest().onMerge(null));
-  }
-
-  @Test
-  public void testOnTimer() {
-    assertEquals(TriggerResult.CONTINUE, forTest().onTimer(null));
+  public void testOnTimer() throws Exception {
+    TriggerTester<Integer, IntervalWindow> tester = TriggerTester.forTrigger(
+        new ReshuffleTrigger<IntervalWindow>(), FixedWindows.of(Duration.millis(100)));
+    IntervalWindow arbitraryWindow = new IntervalWindow(new Instant(100), new Instant(200));
+    tester.fireIfShouldFire(arbitraryWindow);
+    assertFalse(tester.isMarkedFinished(arbitraryWindow));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 6de9382041575..0c362e6438ea0 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -25,8 +25,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergeResult;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerResult;
 import com.google.cloud.dataflow.sdk.transforms.windowing.TriggerBuilder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.ActiveWindowSet.MergeCallback;
@@ -44,13 +42,12 @@
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.common.base.MoreObjects;
 import com.google.common.base.Throwables;
-import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 
+import org.joda.time.Duration;
 import org.joda.time.Instant;
 
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
@@ -78,8 +75,8 @@ public class TriggerTester<InputT, W extends BoundedWindow> {
   public static class SimpleTriggerTester<W extends BoundedWindow>
       extends TriggerTester<Integer, W> {
 
-    private SimpleTriggerTester(WindowingStrategy<?, W> wildcardStrategy) throws Exception {
-      super(wildcardStrategy);
+    private SimpleTriggerTester(WindowingStrategy<Object, W> windowingStrategy) throws Exception {
+      super(windowingStrategy);
     }
 
     public void injectElements(int... values) throws Exception {
@@ -90,17 +87,20 @@ public void injectElements(int... values) throws Exception {
       }
       injectElements(timestampedValues);
     }
+
+    public SimpleTriggerTester<W> withAllowedLateness(Duration allowedLateness) throws Exception {
+      return new SimpleTriggerTester<>(
+          windowingStrategy.withAllowedLateness(allowedLateness));
+    }
   }
 
+  protected final WindowingStrategy<Object, W> windowingStrategy;
+
   private final TestInMemoryStateInternals stateInternals = new TestInMemoryStateInternals();
   private final TestTimerInternals timerInternals = new TestTimerInternals();
   private final TriggerContextFactory<W> contextFactory;
-
   private final WindowFn<Object, W> windowFn;
   private final ActiveWindowSet<W> activeWindows;
-  private final List<Trigger.TriggerResult> resultSequence;
-  private Trigger.TriggerResult latestResult;
-  private Trigger.MergeResult latestMergeResult;
 
   /**
    * An {@link ExecutableTrigger} built from the {@link Trigger} or {@link TriggerBuilder}
@@ -114,8 +114,9 @@ public void injectElements(int... values) throws Exception {
   private final Map<W, FinishedTriggers> finishedSets;
 
   public static <W extends BoundedWindow> SimpleTriggerTester<W> forTrigger(
-      TriggerBuilder<W> trigger, WindowFn<?, W> windowFn) throws Exception {
-    WindowingStrategy<?, W> strategy =
+      TriggerBuilder<W> trigger, WindowFn<Object, W> windowFn)
+          throws Exception {
+    WindowingStrategy<Object, W> windowingStrategy =
         WindowingStrategy.of(windowFn).withTrigger(trigger.buildTrigger())
         // Merging requires accumulation mode or early firings can break up a session.
         // Not currently an issue with the tester (because we never GC) but we don't want
@@ -124,12 +125,12 @@ public static <W extends BoundedWindow> SimpleTriggerTester<W> forTrigger(
             ? AccumulationMode.DISCARDING_FIRED_PANES
             : AccumulationMode.ACCUMULATING_FIRED_PANES);
 
-    return new SimpleTriggerTester<>(strategy);
+    return new SimpleTriggerTester<>(windowingStrategy);
   }
 
-  public static <InputT, W extends BoundedWindow> TriggerTester<Integer, W> forAdvancedTrigger(
-      TriggerBuilder<W> trigger, WindowFn<InputT, W> windowFn) throws Exception {
-    WindowingStrategy<?, W> strategy =
+  public static <InputT, W extends BoundedWindow> TriggerTester<InputT, W> forAdvancedTrigger(
+      TriggerBuilder<W> trigger, WindowFn<Object, W> windowFn) throws Exception {
+    WindowingStrategy<Object, W> strategy =
         WindowingStrategy.of(windowFn).withTrigger(trigger.buildTrigger())
         // Merging requires accumulation mode or early firings can break up a session.
         // Not currently an issue with the tester (because we never GC) but we don't want
@@ -141,13 +142,10 @@ public static <InputT, W extends BoundedWindow> TriggerTester<Integer, W> forAdv
     return new TriggerTester<>(strategy);
   }
 
-  protected TriggerTester(WindowingStrategy<?, W> wildcardStrategy) throws Exception {
-    @SuppressWarnings("unchecked")
-    WindowingStrategy<Object, W> objectStrategy = (WindowingStrategy<Object, W>) wildcardStrategy;
-
-    this.windowFn = objectStrategy.getWindowFn();
-    this.executableTrigger = wildcardStrategy.getTrigger();
-    this.resultSequence = new ArrayList<>();
+  protected TriggerTester(WindowingStrategy<Object, W> windowingStrategy) throws Exception {
+    this.windowingStrategy = windowingStrategy;
+    this.windowFn = windowingStrategy.getWindowFn();
+    this.executableTrigger = windowingStrategy.getTrigger();
     this.finishedSets = new HashMap<>();
 
     this.activeWindows =
@@ -156,52 +154,7 @@ protected TriggerTester(WindowingStrategy<?, W> wildcardStrategy) throws Excepti
             : new MergingActiveWindowSet<W>(windowFn, stateInternals);
 
     this.contextFactory =
-        new TriggerContextFactory<>(objectStrategy, stateInternals, activeWindows);
-  }
-
-  /**
-   * Returns the most recent {@link TriggerResult} from any invocation of the
-   * {@link Trigger#onElement} or {@link Trigger#onTimer} methods
-   * of the trigger under test.
-   *
-   * <p>Note that this is not window-aware, but will return the most recent
-   * for any window. Tests should mostly be able to check
-   * the latest result at an opportune moment.
-   */
-  public TriggerResult getLatestResult() {
-    return latestResult;
-  }
-
-  /**
-   * Returns the most recent {@link MergeResult} from any invocation of the
-   * {@link Trigger#onMerge} of the trigger under test.
-   *
-   * <p>Note that this is not window-aware, but will return the most recent
-   * of any merge result, not for any particular result window. Tests should generally
-   * be able to check the latest merge result at an opportune moment.
-   */
-  public MergeResult getLatestMergeResult() {
-    return latestMergeResult;
-  }
-
-  public void clearLatestMergeResult() {
-    latestResult = null;
-  }
-
-  /**
-   * Returns the full sequence of returned {@link TriggerResult TriggerResults} from
-   * invocations of {@link Trigger#onElement} or {@link Trigger#onTimer} methods
-   * of the trigger under test.
-   */
-  public List<Trigger.TriggerResult> getResultSequence() {
-    return ImmutableList.copyOf(resultSequence);
-  }
-
-  /**
-   * Clears the result sequence returned by {@link #getResultSequence}.
-   */
-  public void clearResultSequence() {
-    resultSequence.clear();
+        new TriggerContextFactory<>(windowingStrategy, stateInternals, activeWindows);
   }
 
   /**
@@ -314,16 +267,42 @@ public final void injectElements(Collection<TimestampedValue<InputT>> values) th
             executableTrigger, getFinishedSet(window));
 
         if (!context.trigger().isFinished()) {
-          latestResult = executableTrigger.invokeElement(context);
-          resultSequence.add(latestResult);
-          if (latestResult.isFinish()) {
-            context.trigger().setFinished(true);
-          }
+          executableTrigger.invokeOnElement(context);
         }
       }
     }
   }
 
+  public boolean shouldFire(W window) throws Exception {
+    Trigger<W>.TriggerContext context = contextFactory.base(
+        window,
+        new TestTimers(windowNamespace(window)),
+        executableTrigger, getFinishedSet(window));
+    executableTrigger.getSpec().prefetchShouldFire(context.state());
+    return executableTrigger.invokeShouldFire(context);
+  }
+
+  public void fireIfShouldFire(W window) throws Exception {
+    Trigger<W>.TriggerContext context = contextFactory.base(
+        window,
+        new TestTimers(windowNamespace(window)),
+        executableTrigger, getFinishedSet(window));
+
+    executableTrigger.getSpec().prefetchShouldFire(context.state());
+    if (executableTrigger.invokeShouldFire(context)) {
+      executableTrigger.getSpec().prefetchOnFire(context.state());
+      executableTrigger.invokeOnFire(context);
+      if (context.trigger().isFinished()) {
+        activeWindows.remove(context.window());
+        executableTrigger.invokeClear(context);
+      }
+    }
+  }
+
+  public void setSubTriggerFinishedForWindow(int subTriggerIndex, W window, boolean value) {
+    getFinishedSet(window).setFinished(executableTrigger.subTriggers().get(subTriggerIndex), value);
+  }
+
   /**
    * Invokes merge from the {@link WindowFn} a single time and passes the resulting merge
    * events on to the trigger under test. Does not persist the fact that merging happened,
@@ -350,9 +329,9 @@ public void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W
       for (W oldWindow : oldWindows) {
         mergingFinishedSets.put(oldWindow, getFinishedSet(oldWindow));
       }
-      latestMergeResult = executableTrigger.invokeMerge(
+      executableTrigger.invokeOnMerge(
           contextFactory.createOnMergeContext(window, new TestTimers(windowNamespace(window)),
-              oldWindows, executableTrigger, getFinishedSet(window), finishedSets));
+              oldWindows, executableTrigger, getFinishedSet(window), mergingFinishedSets));
     }
   }
 
@@ -365,17 +344,6 @@ private FinishedTriggers getFinishedSet(W window) {
     return finishedSet;
   }
 
-  public void fireTimer(W window, Instant timestamp, TimeDomain domain) throws Exception {
-    Trigger<W>.OnTimerContext context =
-        contextFactory.createOnTimerContext(window, new TestTimers(windowNamespace(window)),
-            executableTrigger, getFinishedSet(window), timestamp, domain);
-    latestResult = executableTrigger.invokeTimer(context);
-    resultSequence.add(latestResult);
-    if (latestResult.isFinish()) {
-      context.trigger().setFinished(true);
-    }
-  }
-
   /**
    * Simulate state.
    */
@@ -530,7 +498,6 @@ public void advanceInputWatermark(Instant newInputWatermark) throws Exception {
       WindowTracing.trace("TestTimerInternals.advanceInputWatermark: from {} to {}",
           inputWatermarkTime, newInputWatermark);
       inputWatermarkTime = newInputWatermark;
-      advanceAndFire(newInputWatermark, TimeDomain.EVENT_TIME);
 
       Instant hold = stateInternals.earliestWatermarkHold();
       if (hold == null) {
@@ -564,7 +531,6 @@ public void advanceProcessingTime(Instant newProcessingTime) throws Exception {
       WindowTracing.trace("TestTimerInternals.advanceProcessingTime: from {} to {}", processingTime,
           newProcessingTime);
       processingTime = newProcessingTime;
-      advanceAndFire(newProcessingTime, TimeDomain.PROCESSING_TIME);
     }
 
     public void advanceSynchronizedProcessingTime(Instant newSynchronizedProcessingTime)
@@ -575,31 +541,6 @@ public void advanceSynchronizedProcessingTime(Instant newSynchronizedProcessingT
       WindowTracing.trace("TestTimerInternals.advanceProcessingTime: from {} to {}",
           synchronizedProcessingTime, newSynchronizedProcessingTime);
       synchronizedProcessingTime = newSynchronizedProcessingTime;
-      advanceAndFire(newSynchronizedProcessingTime, TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
-    }
-
-    private void advanceAndFire(Instant currentTime, TimeDomain domain) throws Exception {
-      PriorityQueue<TimerData> queue = queue(domain);
-
-      TimerData nextTimer = queue.peek();
-      while (nextTimer != null && currentTime.isAfter(nextTimer.getTimestamp())) {
-        // Timers fire when the current time progresses past the timer time.
-        WindowTracing.trace(
-            "TestTimerInternals.advanceAndFire: firing {} at {}", nextTimer, currentTime);
-        // Remove before firing, so that if the trigger adds another identical
-        // timer we don't remove it.
-        queue.remove();
-
-        @SuppressWarnings("unchecked")
-        WindowNamespace<W> windowNamespace = (WindowNamespace<W>) nextTimer.getNamespace();
-        W window = windowNamespace.getWindow();
-
-        if (activeWindows.isActive(window)) {
-          fireTimer(window, nextTimer.getTimestamp(), nextTimer.getDomain());
-        }
-
-        nextTimer = queue.peek();
-      }
     }
   }
 

From 4fa7bd3ab96975ff1618a01f0b2920d15fb46efb Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Fri, 22 Jan 2016 14:20:43 -0800
Subject: [PATCH 1340/1541] StreamingWriteFn: check if table exists before
 creating

In StreamingWriteFn, every worker tries to create the table
without checking beforehand whether the table exists. This
behavior can lead to temporarily violating the BigQuery API
quota limits on table.insert:

{
  "code" : 403,
  "errors" : [ {
    "domain" : "global",
    "location" : "table.write",
    "locationType" : "other",
    "message" : "Exceeded rate limits: Your table exceeded quota for table.insert or table.update per table. For more information, see https://cloud.google.com/bigquery/troubleshooting-errors";,
    "reason" : "rateLimitExceeded"
  } ],
  "message" : "Exceeded rate limits: Your table exceeded quota for table.insert or table.update per table. For more information, see https://cloud.google.com/bigquery/troubleshooting-errors";
}

Note that although this error appears severe, it should not cause
jobs to fail. This change primarily aims to reduce the occurrence of
this alarming log.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112818882
---
 .../java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java    | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 5da265e6d7cc1..b591982379703 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -31,6 +31,8 @@
 import com.google.cloud.dataflow.sdk.coders.TableRowJsonCoder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.WriteDisposition;
 import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
 import com.google.cloud.dataflow.sdk.options.GcpOptions;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
@@ -1086,7 +1088,8 @@ public TableReference getOrCreateTable(BigQueryOptions options, String tableSpec
             TableSchema tableSchema = JSON_FACTORY.fromString(jsonTableSchema, TableSchema.class);
             Bigquery client = Transport.newBigQueryClient(options).build();
             BigQueryTableInserter inserter = new BigQueryTableInserter(client);
-            inserter.tryCreateTable(tableReference, tableSchema);
+            inserter.getOrCreateTable(tableReference, WriteDisposition.WRITE_APPEND,
+                CreateDisposition.CREATE_IF_NEEDED, tableSchema);
             createdTables.add(tableSpec);
           }
         }

From 25b4e8737aa57b01cf6bdf4cdcdfda574ce34832 Mon Sep 17 00:00:00 2001
From: mwegiel <mwegiel@google.com>
Date: Fri, 22 Jan 2016 16:11:37 -0800
Subject: [PATCH 1341/1541] Let shuffle reader and writer update counters.

To this end, provide a reference to CounterSet.AddCounterMutator to ApplianceShuffleWriter and ApplianceShuffleReader.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112828809
---
 .../runners/worker/ApplianceShuffleReader.java | 13 ++++++++++++-
 .../runners/worker/ApplianceShuffleWriter.java | 15 +++++++++++++--
 .../runners/worker/GroupingShuffleReader.java  |  3 ++-
 .../worker/PartitioningShuffleReader.java      | 11 +++++++++--
 .../PartitioningShuffleReaderFactory.java      |  8 +++++---
 .../sdk/runners/worker/ShuffleSink.java        |  2 +-
 .../runners/worker/UngroupedShuffleReader.java | 11 +++++++++--
 .../worker/UngroupedShuffleReaderFactory.java  | 18 +++++++++++++-----
 .../worker/PartitioningShuffleReaderTest.java  |  3 ++-
 .../worker/UngroupedShuffleReaderTest.java     |  2 +-
 10 files changed, 67 insertions(+), 19 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java
index 5bea5a41d603b..8ebcd52e0baba 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+
 import java.io.IOException;
 
 import javax.annotation.concurrent.ThreadSafe;
@@ -37,12 +39,21 @@ public final class ApplianceShuffleReader implements ShuffleReader {
    */
   private long nativePointer;
 
+  /**
+   * Mutator that can be used to update counters.
+   */
+  private CounterSet.AddCounterMutator addCounterMutator;
+
   /**
    * @param shuffleReaderConfig opaque configuration for creating a
    * shuffle reader
+   * @param addCounterMutator mutator that can be used to update counters
    */
-  public ApplianceShuffleReader(byte[] shuffleReaderConfig) {
+  public ApplianceShuffleReader(
+      byte[] shuffleReaderConfig,
+      CounterSet.AddCounterMutator addCounterMutator) {
     this.nativePointer = createFromConfig(shuffleReaderConfig);
+    this.addCounterMutator = addCounterMutator;
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java
index 2e4601c844d1c..ca633f00eadc4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+
 import java.io.IOException;
 import javax.annotation.concurrent.ThreadSafe;
 
@@ -35,14 +37,23 @@ public final class ApplianceShuffleWriter implements ShuffleWriter {
    */
   private long nativePointer;
 
+  /**
+   * Mutator that can be used to update counters.
+   */
+  private final CounterSet.AddCounterMutator addCounterMutator;
+
   /**
    * @param shuffleWriterConfig opaque configuration for creating a
    * shuffle writer
    * @param bufferSize the writer buffer size
+   * @param addCounterMutator mutator that can be used to update counters
    */
-  public ApplianceShuffleWriter(byte[] shuffleWriterConfig,
-                                long bufferSize) {
+  public ApplianceShuffleWriter(
+      byte[] shuffleWriterConfig,
+      long bufferSize,
+      CounterSet.AddCounterMutator addCounterMutator) {
     this.nativePointer = createFromConfig(shuffleWriterConfig, bufferSize);
+    this.addCounterMutator = addCounterMutator;
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
index 19ab083a3b346..ce9a626b60fce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
@@ -127,7 +127,8 @@ protected StateKind getStateSamplerStateKind() {
   @Override
   public GroupingShuffleReaderIterator<K, V> iterator() throws IOException {
     Preconditions.checkArgument(shuffleReaderConfig != null);
-    ApplianceShuffleReader asr = new ApplianceShuffleReader(shuffleReaderConfig);
+    ApplianceShuffleReader asr =
+        new ApplianceShuffleReader(shuffleReaderConfig, addCounterMutator);
     String datasetId = asr.getDatasetId();
     initCounter(datasetId);
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
index dda76f117c14b..72abe59a81e7c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
@@ -43,15 +44,18 @@ public class PartitioningShuffleReader<K, V> extends NativeReader<WindowedValue<
   final byte[] shuffleReaderConfig;
   final String startShufflePosition;
   final String stopShufflePosition;
+  final CounterSet.AddCounterMutator addCounterMutator;
   Coder<K> keyCoder;
   WindowedValueCoder<V> windowedValueCoder;
 
   public PartitioningShuffleReader(PipelineOptions options, byte[] shuffleReaderConfig,
-      String startShufflePosition, String stopShufflePosition, Coder<WindowedValue<KV<K, V>>> coder)
+      String startShufflePosition, String stopShufflePosition, Coder<WindowedValue<KV<K, V>>> coder,
+      CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
     this.shuffleReaderConfig = shuffleReaderConfig;
     this.startShufflePosition = startShufflePosition;
     this.stopShufflePosition = stopShufflePosition;
+    this.addCounterMutator = addCounterMutator;
     initCoder(coder);
   }
 
@@ -79,7 +83,10 @@ private void initCoder(Coder<WindowedValue<KV<K, V>>> coder) throws Exception {
   public NativeReaderIterator<WindowedValue<KV<K, V>>> iterator() throws IOException {
     Preconditions.checkArgument(shuffleReaderConfig != null);
     return iterator(new BatchingShuffleEntryReader(
-        new ChunkingShuffleBatchReader(new ApplianceShuffleReader(shuffleReaderConfig))));
+        new ChunkingShuffleBatchReader(
+            new ApplianceShuffleReader(
+                shuffleReaderConfig,
+                addCounterMutator))));
   }
 
   PartitioningShuffleReaderIterator iterator(ShuffleEntryReader reader) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java
index 2e36c0513d4b6..cb68eefa3f8dd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java
@@ -50,19 +50,21 @@ public NativeReader<?> create(
     @SuppressWarnings({"unchecked", "rawtypes"})
     Coder<WindowedValue<KV<Object, Object>>> typedCoder =
         (Coder<WindowedValue<KV<Object, Object>>>) coder;
-    return createTyped(spec, options, typedCoder);
+    return createTyped(spec, options, typedCoder, addCounterMutator);
   }
 
   public <K, V> PartitioningShuffleReader<K, V> createTyped(
       CloudObject spec,
       PipelineOptions options,
-      Coder<WindowedValue<KV<K, V>>> coder)
+      Coder<WindowedValue<KV<K, V>>> coder,
+      CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
     return new PartitioningShuffleReader<K, V>(
         options,
         decodeBase64(getString(spec, PropertyNames.SHUFFLE_READER_CONFIG)),
         getString(spec, PropertyNames.START_SHUFFLE_POSITION, null),
         getString(spec, PropertyNames.END_SHUFFLE_POSITION, null),
-        coder);
+        coder,
+        addCounterMutator);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
index 793ed61bde7d2..ea538db1a4b4e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
@@ -283,7 +283,7 @@ public void close() throws IOException {
   public SinkWriter<WindowedValue<T>> writer() throws IOException {
     Preconditions.checkArgument(shuffleWriterConfig != null);
     ApplianceShuffleWriter applianceWriter = new ApplianceShuffleWriter(
-        shuffleWriterConfig, SHUFFLE_WRITER_BUFFER_SIZE);
+        shuffleWriterConfig, SHUFFLE_WRITER_BUFFER_SIZE, addCounterMutator);
     String datasetId = applianceWriter.getDatasetId();
     return writer(new ChunkingShuffleEntryWriter(applianceWriter), datasetId);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
index 18a217dafc195..391f169bf68c0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
@@ -41,21 +42,27 @@ public class UngroupedShuffleReader<T> extends NativeReader<T> {
   final String startShufflePosition;
   final String stopShufflePosition;
   final Coder<T> coder;
+  final CounterSet.AddCounterMutator addCounterMutator;
 
   public UngroupedShuffleReader(
       @SuppressWarnings("unused") PipelineOptions options, byte[] shuffleReaderConfig,
-      @Nullable String startShufflePosition, @Nullable String stopShufflePosition, Coder<T> coder) {
+      @Nullable String startShufflePosition, @Nullable String stopShufflePosition, Coder<T> coder,
+      @Nullable CounterSet.AddCounterMutator addCounterMutator) {
     this.shuffleReaderConfig = shuffleReaderConfig;
     this.startShufflePosition = startShufflePosition;
     this.stopShufflePosition = stopShufflePosition;
     this.coder = coder;
+    this.addCounterMutator = addCounterMutator;
   }
 
   @Override
   public NativeReaderIterator<T> iterator() throws IOException {
     Preconditions.checkArgument(shuffleReaderConfig != null);
     return iterator(new BatchingShuffleEntryReader(
-        new ChunkingShuffleBatchReader(new ApplianceShuffleReader(shuffleReaderConfig))));
+        new ChunkingShuffleBatchReader(
+            new ApplianceShuffleReader(
+                shuffleReaderConfig,
+                addCounterMutator))));
   }
 
   UngroupedShuffleReaderIterator iterator(ShuffleEntryReader reader) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java
index 68dbacc058724..dbf0bed40360a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java
@@ -43,19 +43,27 @@ public NativeReader<?> create(
       @Nullable CounterSet.AddCounterMutator addCounterMutator,
       @Nullable String operationName)
       throws Exception {
-    return create(spec, coder, options);
+    return create(spec, coder, options, addCounterMutator);
   }
 
   public <T> UngroupedShuffleReader<T> create(
-      CloudObject spec, Coder<T> coder, PipelineOptions options) throws Exception {
-    return create(options, spec, coder);
+      CloudObject spec,
+      Coder<T> coder,
+      PipelineOptions options,
+      CounterSet.AddCounterMutator addCounterMutator) throws Exception {
+    return create(options, spec, coder, addCounterMutator);
   }
 
   <T> UngroupedShuffleReader<T> create(
-      PipelineOptions options, CloudObject spec, Coder<T> coder) throws Exception {
+      PipelineOptions options,
+      CloudObject spec,
+      Coder<T> coder,
+      CounterSet.AddCounterMutator addCounterMutator) throws Exception {
     return new UngroupedShuffleReader<>(options,
         decodeBase64(getString(spec, PropertyNames.SHUFFLE_READER_CONFIG)),
         getString(spec, PropertyNames.START_SHUFFLE_POSITION, null),
-        getString(spec, PropertyNames.END_SHUFFLE_POSITION, null), coder);
+        getString(spec, PropertyNames.END_SHUFFLE_POSITION, null),
+        coder,
+        addCounterMutator);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
index 4891dc5bbc0b5..cd3d8e52b68df 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
@@ -94,7 +94,8 @@ private void runTestReadFromShuffle(List<WindowedValue<KV<Integer, String>>> exp
     // Read from shuffle with PartitioningShuffleReader.
     PartitioningShuffleReader<Integer, String> partitioningShuffleReader =
         new PartitioningShuffleReader<>(
-            PipelineOptionsFactory.create(), null, null, null, elemCoder);
+            PipelineOptionsFactory.create(), null, null, null, elemCoder,
+            addCounterMutator);
     ExecutorTestUtils.TestReaderObserver observer =
         new ExecutorTestUtils.TestReaderObserver(partitioningShuffleReader);
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
index 831bfc5851763..657ed050d522f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
@@ -79,7 +79,7 @@ void runTestReadFromShuffle(List<Integer> expected) throws Exception {
         new UngroupedShuffleReader<>(
             PipelineOptionsFactory.create(),
             null, null, null,
-            elemCoder);
+            elemCoder, addCounterMutator);
     ExecutorTestUtils.TestReaderObserver observer =
         new ExecutorTestUtils.TestReaderObserver(ungroupedShuffleReader);
 

From 8929f3e428a50ea0217a020c4cb4fcc954815a9a Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Fri, 22 Jan 2016 16:46:10 -0800
Subject: [PATCH 1342/1541] Provide a reasonable default to Window.Bound#apply

Window.into can be implemented by use of the AssignWindowsDoFn and a
call to PCollection#setWindowingStrategyInternal. Providing this as the
default behavior of Window#into allows runner implementors who do not
need to be aware of window applications to not implement an override and
instead just implement windowing on top of ParDo.

Add DataflowWindow to the DataflowPipelineRunner to maintain current
graph structure and behavior.

Remove DirectPipelineRunner TransformEvaluator for Window.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112831751
---
 .../sdk/runners/DataflowPipelineRunner.java   |  24 +++-
 .../sdk/runners/dataflow/AssignWindows.java   |  88 +++++++++++++++
 .../sdk/transforms/windowing/Window.java      | 104 +++++-------------
 3 files changed, 137 insertions(+), 79 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AssignWindows.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 4636c1f86f59b..846c81dc3cc09 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -45,6 +45,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.JobSpecification;
+import com.google.cloud.dataflow.sdk.runners.dataflow.AssignWindows;
 import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
 import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowAggregatorTransforms;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
@@ -262,10 +263,12 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
           .put(BigQueryIO.Read.Bound.class, UnsupportedIO.class)
           .put(TextIO.Read.Bound.class, UnsupportedIO.class)
           .put(TextIO.Write.Bound.class, UnsupportedIO.class)
+          .put(Window.Bound.class, AssignWindows.class)
           .build();
     } else {
       overrides = ImmutableMap.<Class<?>, Class<?>>builder()
           .put(Read.Unbounded.class, UnsupportedIO.class)
+          .put(Window.Bound.class, AssignWindows.class)
           .build();
     }
   }
@@ -294,7 +297,16 @@ public <OutputT extends POutput, InputT extends PInput> OutputT apply(
               : pc.getWindowingStrategy(),
           pc.isBounded());
       return outputT;
-
+    } else if (Window.Bound.class.equals(transform.getClass())) {
+      /*
+       * TODO: make this the generic way overrides are applied (using super.apply() rather than
+       * Pipeline.applyTransform(); this allows the apply method to be replaced without inserting
+       * additional nodes into the graph.
+       */
+      // casting to wildcard
+      @SuppressWarnings("unchecked")
+      OutputT windowed = (OutputT) applyWindow((Window.Bound<?>) transform, (PCollection<?>) input);
+      return windowed;
     } else if (overrides.containsKey(transform.getClass())) {
       // It is the responsibility of whoever constructs overrides to ensure this is type safe.
       @SuppressWarnings("unchecked")
@@ -316,6 +328,16 @@ public <OutputT extends POutput, InputT extends PInput> OutputT apply(
     }
   }
 
+  private <T> PCollection<T> applyWindow(
+      Window.Bound<?> intitialTransform, PCollection<?> initialInput) {
+    // types are matched at compile time
+    @SuppressWarnings("unchecked")
+    Window.Bound<T> transform = (Window.Bound<T>) intitialTransform;
+    @SuppressWarnings("unchecked")
+    PCollection<T> input = (PCollection<T>) initialInput;
+    return super.apply(new AssignWindows<>(transform), input);
+  }
+
   @Override
   public DataflowPipelineJob run(Pipeline pipeline) {
     LOG.info("Executing pipeline on the Dataflow Service, which will have billing implications "
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AssignWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AssignWindows.java
new file mode 100644
index 0000000000000..093783de85832
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AssignWindows.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.dataflow;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+/**
+ * A primitive {@link PTransform} that implements the {@link Window#into(WindowFn)}
+ * {@link PTransform}.
+ *
+ * For an application of {@link Window#into(WindowFn)} that changes the {@link WindowFn}, applies
+ * a primitive {@link PTransform} in the Dataflow service.
+ *
+ * For an application of {@link Window#into(WindowFn)} that does not change the {@link WindowFn},
+ * applies an identity {@link ParDo} and sets the windowing strategy of the output
+ * {@link PCollection}.
+ *
+ * For internal use only.
+ *
+ * @param <T> the type of input element
+ */
+public class AssignWindows<T> extends PTransform<PCollection<T>, PCollection<T>> {
+  private final Window.Bound<T> transform;
+
+  /**
+   * Builds an instance of this class from the overriden transform.
+   */
+  @SuppressWarnings("unused") // Used via reflection
+  public AssignWindows(Window.Bound<T> transform) {
+    this.transform = transform;
+  }
+
+  @Override
+  public PCollection<T> apply(PCollection<T> input) {
+    WindowingStrategy<?, ?> outputStrategy =
+        transform.getOutputStrategyInternal(input.getWindowingStrategy());
+    if (transform.getWindowFn() != null) {
+      // If the windowFn changed, we create a primitive, and run the AssignWindows operation here.
+      return PCollection.<T>createPrimitiveOutputInternal(
+                            input.getPipeline(), outputStrategy, input.isBounded());
+    } else {
+      // If the windowFn didn't change, we just run a pass-through transform and then set the
+      // new windowing strategy.
+      return input.apply(ParDo.named("Identity").of(new DoFn<T, T>() {
+        @Override
+        public void processElement(DoFn<T, T>.ProcessContext c) throws Exception {
+          c.output(c.element());
+        }
+      })).setWindowingStrategyInternal(outputStrategy);
+    }
+  }
+
+  @Override
+  public void validate(PCollection<T> input) {
+    transform.validate(input);
+  }
+
+  @Override
+  protected Coder<?> getDefaultOutputCoder(PCollection<T> input) {
+    return input.getCoder();
+  }
+
+  @Override
+  protected String getKindString() {
+    return "Window.Into()";
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
index dfdc25903d2ef..6793e7648ff6c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
@@ -20,24 +20,17 @@
 import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
-import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner;
-import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
 
 import org.joda.time.Duration;
 
-import java.util.ArrayList;
-
 import javax.annotation.Nullable;
 
 /**
@@ -527,9 +520,14 @@ public Bound<T> withAllowedLateness(Duration allowedLateness, ClosingBehavior be
       return new Bound<T>(name, windowFn, trigger, mode, allowedLateness, behavior, outputTimeFn);
     }
 
+    /**
+     * Get the output strategy of this {@link Window.Bound Window PTransform}. For internal use
+     * only.
+     */
     // Rawtype cast of OutputTimeFn cannot be eliminated with intermediate variable, as it is
     // casting between wildcards
-    private WindowingStrategy<?, ?> getOutputStrategy(WindowingStrategy<?, ?> inputStrategy) {
+    public WindowingStrategy<?, ?> getOutputStrategyInternal(
+        WindowingStrategy<?, ?> inputStrategy) {
       WindowingStrategy<?, ?> result = inputStrategy;
       if (windowFn != null) {
         result = result.withWindowFn(windowFn);
@@ -552,18 +550,24 @@ public Bound<T> withAllowedLateness(Duration allowedLateness, ClosingBehavior be
       return result;
     }
 
+    /**
+     * Get the {@link WindowFn} of this {@link Window.Bound Window PTransform}.
+     */
+    public WindowFn<? super T, ?> getWindowFn() {
+      return windowFn;
+    }
+
     @Override
     public void validate(PCollection<T> input) {
-      WindowingStrategy<?, ?> outputStrategy = getOutputStrategy(input.getWindowingStrategy());
+      WindowingStrategy<?, ?> outputStrategy =
+          getOutputStrategyInternal(input.getWindowingStrategy());
 
       // Make sure that the windowing strategy is complete & valid.
       if (outputStrategy.isTriggerSpecified()
           && !(outputStrategy.getTrigger().getSpec() instanceof DefaultTrigger)) {
-
         if (!(outputStrategy.getWindowFn() instanceof GlobalWindows)
             && !outputStrategy.isAllowedLatenessSpecified()) {
-          throw new IllegalArgumentException(
-              "Except when using GlobalWindows,"
+          throw new IllegalArgumentException("Except when using GlobalWindows,"
               + " calling .triggering() to specify a trigger requires that the allowed lateness be"
               + " specified using .withAllowedLateness() to set the upper bound on how late data"
               + " can arrive before being dropped. See Javadoc for more details.");
@@ -580,17 +584,23 @@ public void validate(PCollection<T> input) {
 
     @Override
     public PCollection<T> apply(PCollection<T> input) {
-      WindowingStrategy<?, ?> outputStrategy = getOutputStrategy(input.getWindowingStrategy());
+      WindowingStrategy<?, ?> outputStrategy =
+          getOutputStrategyInternal(input.getWindowingStrategy());
+      PCollection<T> output;
       if (windowFn != null) {
         // If the windowFn changed, we create a primitive, and run the AssignWindows operation here.
-        return PCollection.<T>createPrimitiveOutputInternal(
-            input.getPipeline(), outputStrategy, input.isBounded());
+        output = assignWindows(input, windowFn);
       } else {
         // If the windowFn didn't change, we just run a pass-through transform and then set the
         // new windowing strategy.
-        return input.apply(Window.<T>identity()).setWindowingStrategyInternal(outputStrategy);
+        output = input.apply(Window.<T>identity());
       }
+      return output.setWindowingStrategyInternal(outputStrategy);
+    }
 
+    private <T, W extends BoundedWindow> PCollection<T> assignWindows(
+        PCollection<T> input, WindowFn<? super T, W> windowFn) {
+      return input.apply("AssignWindows", ParDo.of(new AssignWindowsDoFn<T, W>(windowFn)));
     }
 
     @Override
@@ -608,7 +618,6 @@ protected String getKindString() {
 
   private static <T> PTransform<PCollection<? extends T>, PCollection<T>> identity() {
     return ParDo.named("Identity").of(new DoFn<T, T>() {
-
       @Override public void processElement(ProcessContext c) {
         c.output(c.element());
       }
@@ -650,65 +659,4 @@ private <W extends BoundedWindow> WindowingStrategy<?, W> getOutputWindowing(
       }
     }
   }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  static {
-    DirectPipelineRunner.registerDefaultTransformEvaluator(
-        Bound.class,
-        new DirectPipelineRunner.TransformEvaluator<Bound>() {
-          @Override
-          public void evaluate(
-              Bound transform,
-              DirectPipelineRunner.EvaluationContext context) {
-            evaluateHelper(transform, context);
-          }
-        });
-  }
-
-  private static <T, W extends BoundedWindow> void evaluateHelper(
-      Bound<T> transform,
-      DirectPipelineRunner.EvaluationContext context) {
-
-    // If this use of Window didn't change the WindowFn, there is nothing to do.
-    if (transform.windowFn == null) {
-      throw new IllegalStateException("Shouldn't reach evaluateHelper with no windowFn");
-    }
-
-    PCollection<T> input = context.getInput(transform);
-
-    DirectModeExecutionContext executionContext = DirectModeExecutionContext.create();
-
-    TupleTag<T> outputTag = new TupleTag<>();
-    WindowFn<? super T, W> windowFn = (WindowFn<? super T, W>) transform.windowFn;
-    String name = context.getStepName(transform);
-    @SuppressWarnings("unchecked")
-    DoFn<T, T> addWindowsDoFn = new AssignWindowsDoFn<T, W>(windowFn);
-    DoFnRunner<T, T> addWindowsRunner =
-        DoFnRunner.create(
-            context.getPipelineOptions(),
-            addWindowsDoFn,
-            NullSideInputReader.empty(),
-            new DoFnRunner.ListOutputManager(),
-            outputTag,
-            new ArrayList<TupleTag<?>>(),
-            executionContext.getOrCreateStepContext(name, name, null),
-            context.getAddCounterMutator(),
-            context.getOutput(transform).getWindowingStrategy());
-
-    addWindowsRunner.startBundle();
-
-    // Process input elements.
-    for (DirectPipelineRunner.ValueWithMetadata<T> inputElem
-             : context.getPCollectionValuesWithMetadata(input)) {
-      executionContext.setKey(inputElem.getKey());
-      addWindowsRunner.processElement(inputElem.getWindowedValue());
-    }
-
-    addWindowsRunner.finishBundle();
-
-    context.setPCollectionValuesWithMetadata(
-        context.getOutput(transform),
-        executionContext.getOutput(outputTag));
-  }
 }

From 4ed7f131ee2fde89911433f3bc74283c973fd522 Mon Sep 17 00:00:00 2001
From: gardnerj <gardnerj@google.com>
Date: Sun, 24 Jan 2016 20:52:23 -0800
Subject: [PATCH 1343/1541] Allow 'game' examples to append to existing
 BigQuery tables.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112915940
---
 .../examples/complete/game/utils/WriteToBigQuery.java         | 4 +++-
 .../examples/complete/game/utils/WriteWindowedToBigQuery.java | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/utils/WriteToBigQuery.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/utils/WriteToBigQuery.java
index 401251489dde8..2cf719a7eff26 100644
--- a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/utils/WriteToBigQuery.java
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/utils/WriteToBigQuery.java
@@ -24,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.WriteDisposition;
 import com.google.cloud.dataflow.sdk.options.GcpOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
@@ -117,7 +118,8 @@ public PDone apply(PCollection<T> teamAndScore) {
                 .to(getTable(teamAndScore.getPipeline(),
                     tableName))
                 .withSchema(getSchema())
-                .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED));
+                .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
+                .withWriteDisposition(WriteDisposition.WRITE_APPEND));
   }
 
   /** Utility to construct an output table reference. */
diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/utils/WriteWindowedToBigQuery.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/utils/WriteWindowedToBigQuery.java
index 538af018f7cce..8433021f2ee00 100644
--- a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/utils/WriteWindowedToBigQuery.java
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/utils/WriteWindowedToBigQuery.java
@@ -19,6 +19,7 @@
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.WriteDisposition;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
@@ -68,7 +69,8 @@ public PDone apply(PCollection<T> teamAndScore) {
                 .to(getTable(teamAndScore.getPipeline(),
                     tableName))
                 .withSchema(getSchema())
-                .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED));
+                .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
+                .withWriteDisposition(WriteDisposition.WRITE_APPEND));
   }
 
 }

From ec5dfe82f77442a559d5d6ac43a9b9bf17184dbc Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Mon, 25 Jan 2016 13:45:48 -0800
Subject: [PATCH 1344/1541] Create StreamingGroupAlsoByWindowsDoFnRunner

Move the input elements lateness check to DoFnRunner.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112982861
---
 .../worker/GroupAlsoByWindowsParDoFn.java     |  15 +-
 .../sdk/runners/worker/KeyedWorkItem.java     | 221 +------
 .../sdk/runners/worker/KeyedWorkItems.java    | 313 ++++++++++
 .../worker/LateDataDroppingDoFnRunner.java    | 135 +++++
 .../sdk/runners/worker/ParDoFnBase.java       |  24 +-
 .../StreamingGroupAlsoByWindowsDoFn.java      |  25 +-
 ...eamingGroupAlsoByWindowsReshuffleDoFn.java |   4 +-
 .../worker/StreamingSideInputDoFnRunner.java  |  14 +-
 .../worker/WindowingWindmillReader.java       |  41 +-
 .../dataflow/sdk/transforms/DoFnTester.java   |   8 +-
 .../cloud/dataflow/sdk/transforms/ParDo.java  |  14 +-
 .../cloud/dataflow/sdk/util/DoFnRunner.java   | 571 +-----------------
 .../dataflow/sdk/util/DoFnRunnerBase.java     | 559 +++++++++++++++++
 .../cloud/dataflow/sdk/util/DoFnRunners.java  | 106 ++++
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |   9 +
 ...GroupAlsoByWindowsViaOutputBufferDoFn.java |   9 +-
 .../dataflow/sdk/util/ReduceFnRunner.java     |  36 +-
 .../dataflow/sdk/util/SimpleDoFnRunner.java   |  58 ++
 .../dataflow/sdk/util/TimerOrElement.java     |   4 +-
 .../sdk/runners/worker/KeyedWorkItemTest.java |   8 +-
 .../worker/StreamingDataflowWorkerTest.java   |   4 +-
 .../StreamingGroupAlsoByWindowsDoFnTest.java  | 152 +++--
 ...ngGroupAlsoByWindowsReshuffleDoFnTest.java |  28 +-
 .../StreamingSideInputDoFnRunnerTest.java     |  33 +-
 .../util/GroupAlsoByWindowsProperties.java    |   6 +-
 .../dataflow/sdk/util/ReduceFnRunnerTest.java |  52 --
 .../dataflow/sdk/util/ReduceFnTester.java     |  10 +-
 27 files changed, 1459 insertions(+), 1000 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItems.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LateDataDroppingDoFnRunner.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunners.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SimpleDoFnRunner.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
index ec7dd8ee5a5e2..e8b93062b32ef 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
@@ -65,14 +65,15 @@ class GroupAlsoByWindowsParDoFn extends ParDoFnBase {
   static GroupAlsoByWindowsParDoFn of(
       PipelineOptions options,
       DoFn<?, ?> groupAlsoByWindowsDoFn,
+      WindowingStrategy<?, ?> windowingStrategy,
       String stepName,
       String transformName,
-      DataflowExecutionContext executionContext,
+      DataflowExecutionContext<?> executionContext,
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler)
       throws Exception {
-    return new GroupAlsoByWindowsParDoFn(options, groupAlsoByWindowsDoFn, stepName, transformName,
-        executionContext, addCounterMutator, stateSampler);
+    return new GroupAlsoByWindowsParDoFn(options, groupAlsoByWindowsDoFn, windowingStrategy,
+        stepName, transformName, executionContext, addCounterMutator, stateSampler);
   }
 
   /**
@@ -160,6 +161,7 @@ public ParDoFn create(
       return GroupAlsoByWindowsParDoFn.of(
           options,
           groupAlsoByWindowsDoFn,
+          windowingStrategy,
           stepName,
           transformName,
           executionContext,
@@ -170,7 +172,7 @@ public ParDoFn create(
 
   @Override
   protected DoFnInfo<?, ?> getDoFnInfo() {
-    return new DoFnInfo<>(groupAlsoByWindowsDoFn, null);
+    return new DoFnInfo<>(groupAlsoByWindowsDoFn, windowingStrategy);
   }
 
   @SuppressWarnings({"unchecked", "rawtypes"})
@@ -262,13 +264,15 @@ public Coder<List<AccumT>> getAccumulatorCoder(CoderRegistry registry, Coder<K>
   }
 
   private final DoFn<?, ?> groupAlsoByWindowsDoFn;
+  private final WindowingStrategy<?, ?> windowingStrategy;
 
   private GroupAlsoByWindowsParDoFn(
       PipelineOptions options,
       DoFn<?, ?> groupAlsoByWindowsDoFn,
+      WindowingStrategy<?, ?> windowingStrategy,
       String stepName,
       String transformName,
-      DataflowExecutionContext executionContext,
+      DataflowExecutionContext<?> executionContext,
       CounterSet.AddCounterMutator addCounterMutator,
       StateSampler stateSampler) {
     super(
@@ -281,5 +285,6 @@ private GroupAlsoByWindowsParDoFn(
         addCounterMutator,
         stateSampler);
     this.groupAlsoByWindowsDoFn = groupAlsoByWindowsDoFn;
+    this.windowingStrategy = windowingStrategy;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItem.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItem.java
index 1a05a5bec4f4b..d6a87e17fa0d2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItem.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItem.java
@@ -13,228 +13,33 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.StandardCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.InputMessageBundle;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.Message;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.Timer;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
-import com.google.common.base.Function;
-import com.google.common.base.Predicate;
-import com.google.common.base.Predicates;
-import com.google.common.base.Throwables;
-import com.google.common.collect.FluentIterable;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-import java.util.Objects;
-import java.util.concurrent.TimeUnit;
 
 /**
- * Wrapper around a {@link Windmill.WorkItem} which contains all the timers and elements associated
- * with a specific work item.
+ * Interface that contains all the timers and elements associated with a specific work item.
  *
  * <p>Used as the input type of {@link StreamingGroupAlsoByWindowsDoFn}.
  *
+ * @param <K> the key type
  * @param <ElemT> the element type
  */
-public class KeyedWorkItem<ElemT> {
-
-  private static final Predicate<Timer> IS_WATERMARK = new Predicate<Timer>() {
-    @Override
-    public boolean apply(Timer input) {
-      return input.getType() == Timer.Type.WATERMARK;
-    }
-  };
-
-  public static <ElemT> KeyedWorkItem<ElemT> workItem(
-      Object key,
-      Windmill.WorkItem workItem,
-      Coder<? extends BoundedWindow> windowCoder,
-      Coder<Collection<? extends BoundedWindow>> windowsCoder,
-      Coder<ElemT> valueCoder) {
-    return new KeyedWorkItem<>(key, workItem, windowCoder, windowsCoder, valueCoder);
-  }
-
-  public Object key() {
-    return key;
-  }
-
-  public Iterable<TimerData> timersIterable() {
-    FluentIterable<Timer> allTimers = FluentIterable.from(workItem.getTimers().getTimersList());
-    FluentIterable<Timer> eventTimers = allTimers.filter(IS_WATERMARK);
-    FluentIterable<Timer> nonEventTimers = allTimers.filter(Predicates.not(IS_WATERMARK));
-    return eventTimers.append(nonEventTimers).transform(new Function<Timer, TimerData>() {
-      private TimeDomain getTimeDomain(Windmill.Timer.Type type) {
-        switch (type) {
-          case REALTIME:
-            return TimeDomain.PROCESSING_TIME;
-          case DEPENDENT_REALTIME:
-            return TimeDomain.SYNCHRONIZED_PROCESSING_TIME;
-          case WATERMARK:
-            return TimeDomain.EVENT_TIME;
-          default:
-            throw new IllegalArgumentException("Unsupported timer type " + type);
-        }
-      }
-
-      @Override
-      public TimerData apply(Timer timer) {
-        String tag = timer.getTag().toStringUtf8();
-        String namespaceString = tag.substring(0, tag.indexOf('+'));
-        StateNamespace namespace = StateNamespaces.fromString(namespaceString, windowCoder);
-
-        Instant timestamp = new Instant(TimeUnit.MICROSECONDS.toMillis(timer.getTimestamp()));
-        return TimerData.of(namespace, timestamp, getTimeDomain(timer.getType()));
-      }
-    });
-  }
-
-  public Iterable<WindowedValue<ElemT>> elementsIterable() {
-    return FluentIterable.from(workItem.getMessageBundlesList())
-        .transformAndConcat(new Function<InputMessageBundle, Iterable<Message>>() {
-          @Override
-          public Iterable<Message> apply(InputMessageBundle input) {
-            return input.getMessagesList();
-          }
-        })
-        .transform(new Function<Message, WindowedValue<ElemT>>() {
-          @Override
-          public WindowedValue<ElemT> apply(Message message) {
-            try {
-              Instant timestamp = new Instant(
-                  TimeUnit.MICROSECONDS.toMillis(message.getTimestamp()));
-              Collection<? extends BoundedWindow> windows =
-                  WindmillSink.decodeMetadataWindows(windowsCoder, message.getMetadata());
-              PaneInfo pane = WindmillSink.decodeMetadataPane(message.getMetadata());
-
-              InputStream inputStream = message.getData().newInput();
-              ElemT value = valueCoder.decode(inputStream, Coder.Context.OUTER);
-              return WindowedValue.of(value, timestamp, windows, pane);
-            } catch (IOException e) {
-              throw Throwables.propagate(e);
-            }
-          }
-        });
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (!(other instanceof KeyedWorkItem)) {
-      return false;
-    }
-
-    KeyedWorkItem<?> that = (KeyedWorkItem<?>) other;
-    return Objects.equals(this.key, that.key)
-        && Objects.equals(this.workItem, that.workItem);
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hash(key, workItem);
-  }
+public interface KeyedWorkItem<K, ElemT> {
 
   /**
-   * Coder that forwards {@code ByteSizeObserver} calls to an underlying element coder.
-   * {@code TimerOrElement} objects never need to be encoded, so this class does not
-   * support the {@code encode} and {@code decode} methods.
+   * Returns the key.
    */
-  public static class KeyedWorkItemCoder<T> extends StandardCoder<KeyedWorkItem<T>> {
-    final Coder<T> elemCoder;
-
-    /**
-     * Creates a new {@code TimerOrElement.Coder} that wraps the given {@link Coder}.
-     */
-    public static <T> KeyedWorkItemCoder<T> of(Coder<T> elemCoder) {
-      return new KeyedWorkItemCoder<>(elemCoder);
-    }
+  public K key();
 
-    @JsonCreator
-    public static KeyedWorkItemCoder<?> of(
-            @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-            List<Object> components) {
-      return of((Coder<?>) components.get(0));
-    }
-
-    @Override
-    public void encode(KeyedWorkItem<T> value, OutputStream outStream, Context context) {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public KeyedWorkItem<T> decode(InputStream inStream, Context context) {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public boolean isRegisterByteSizeObserverCheap(KeyedWorkItem<T> value, Context context) {
-      return true;
-    }
-
-    @Override
-    public void registerByteSizeObserver(
-        KeyedWorkItem<T> value, ElementByteSizeObserver observer, Context context)
-        throws Exception {
-      observer.update((long) value.workItem.getSerializedSize());
-    }
-
-    @Override
-    public void verifyDeterministic() throws NonDeterministicException {}
-
-    @Override
-    public List<? extends Coder<?>> getCoderArguments() {
-      return Arrays.asList(elemCoder);
-    }
-
-    public Coder<T> getElementCoder() {
-      return elemCoder;
-    }
-
-    protected KeyedWorkItemCoder(Coder<T> elemCoder) {
-      this.elemCoder = elemCoder;
-    }
-  }
-
-  //////////////////////////////////////////////////////////////////////////////
-
-  private final Windmill.WorkItem workItem;
-  private final Object key;
-
-  private final transient Coder<? extends BoundedWindow> windowCoder;
-  private final transient Coder<Collection<? extends BoundedWindow>> windowsCoder;
-  private final transient Coder<ElemT> valueCoder;
+  /**
+   * Returns the timers iterable.
+   */
+  public Iterable<TimerData> timersIterable();
 
-  KeyedWorkItem(
-      Object key,
-      Windmill.WorkItem workItem,
-      Coder<? extends BoundedWindow> windowCoder,
-      Coder<Collection<? extends BoundedWindow>> windowsCoder,
-      Coder<ElemT> valueCoder) {
-    this.key = key;
-    this.workItem = workItem;
-    this.windowCoder = windowCoder;
-    this.windowsCoder = windowsCoder;
-    this.valueCoder = valueCoder;
-  }
+  /**
+   * Returns the elements iterable.
+   */
+  public Iterable<WindowedValue<ElemT>> elementsIterable();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItems.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItems.java
new file mode 100644
index 0000000000000..8311fdcfdec7c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItems.java
@@ -0,0 +1,313 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.InputMessageBundle;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.Message;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.Timer;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+import com.google.common.base.Function;
+import com.google.common.base.Predicate;
+import com.google.common.base.Predicates;
+import com.google.common.base.Throwables;
+import com.google.common.collect.FluentIterable;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Static utility methods that provide {@link KeyedWorkItem} implementations.
+ */
+public class KeyedWorkItems {
+  /**
+   * Return an implementation of {@link KeyedWorkItem} that wraps around
+   * a {@code Windmill.WorkItem}.
+   *
+   * @param <K> the key type
+   * @param <ElemT> the element type
+   */
+  public static <K, ElemT> KeyedWorkItem<K, ElemT> windmillWorkItem(
+      K key,
+      Windmill.WorkItem workItem,
+      Coder<? extends BoundedWindow> windowCoder,
+      Coder<Collection<? extends BoundedWindow>> windowsCoder,
+      Coder<ElemT> valueCoder) {
+    return new WindmillKeyedWorkItem<>(key, workItem, windowCoder, windowsCoder, valueCoder);
+  }
+
+  /**
+   * Returns an implementation of {@link KeyedWorkItem} that wraps around an elements iterable.
+   *
+   * @param <K> the key type
+   * @param <ElemT> the element type
+   */
+  public static <K, ElemT> KeyedWorkItem<K, ElemT> elementsWorkItem(
+      K key, Iterable<WindowedValue<ElemT>> elementsIterable) {
+    return new ComposedKeyedWorkItem<>(key, Collections.<TimerData>emptyList(), elementsIterable);
+  }
+
+  /**
+   * Returns an implementation of {@link KeyedWorkItem} that wraps around an timers iterable.
+   *
+   * @param <K> the key type
+   * @param <ElemT> the element type
+   */
+  public static <K, ElemT> KeyedWorkItem<K, ElemT> timersWorkItem(
+      K key, Iterable<TimerData> timersIterable) {
+    return new ComposedKeyedWorkItem<>(
+        key, timersIterable, Collections.<WindowedValue<ElemT>>emptyList());
+  }
+
+  /**
+   * Returns an implementation of {@link KeyedWorkItem} that wraps around
+   * an timers iterable and an elements iterable.
+   *
+   * @param <K> the key type
+   * @param <ElemT> the element type
+   */
+  public static <K, ElemT> KeyedWorkItem<K, ElemT> workItem(
+      K key, Iterable<TimerData> timersIterable, Iterable<WindowedValue<ElemT>> elementsIterable) {
+    return new ComposedKeyedWorkItem<>(
+        key, timersIterable, elementsIterable);
+  }
+
+  private static class WindmillKeyedWorkItem<K, ElemT> implements KeyedWorkItem<K, ElemT> {
+    private static final Predicate<Timer> IS_WATERMARK = new Predicate<Timer>() {
+      @Override
+      public boolean apply(Timer input) {
+        return input.getType() == Timer.Type.WATERMARK;
+      }
+    };
+
+    private final Windmill.WorkItem workItem;
+    private final K key;
+
+    private final transient Coder<? extends BoundedWindow> windowCoder;
+    private final transient Coder<Collection<? extends BoundedWindow>> windowsCoder;
+    private final transient Coder<ElemT> valueCoder;
+
+    WindmillKeyedWorkItem(
+        K key,
+        Windmill.WorkItem workItem,
+        Coder<? extends BoundedWindow> windowCoder,
+        Coder<Collection<? extends BoundedWindow>> windowsCoder,
+        Coder<ElemT> valueCoder) {
+      this.key = key;
+      this.workItem = workItem;
+      this.windowCoder = windowCoder;
+      this.windowsCoder = windowsCoder;
+      this.valueCoder = valueCoder;
+    }
+
+    @Override
+    public K key() {
+      return key;
+    }
+
+    @Override
+    public Iterable<TimerData> timersIterable() {
+      FluentIterable<Timer> allTimers = FluentIterable.from(workItem.getTimers().getTimersList());
+      FluentIterable<Timer> eventTimers = allTimers.filter(IS_WATERMARK);
+      FluentIterable<Timer> nonEventTimers = allTimers.filter(Predicates.not(IS_WATERMARK));
+      return eventTimers.append(nonEventTimers).transform(new Function<Timer, TimerData>() {
+        private TimeDomain getTimeDomain(Windmill.Timer.Type type) {
+          switch (type) {
+            case REALTIME:
+              return TimeDomain.PROCESSING_TIME;
+            case DEPENDENT_REALTIME:
+              return TimeDomain.SYNCHRONIZED_PROCESSING_TIME;
+            case WATERMARK:
+              return TimeDomain.EVENT_TIME;
+            default:
+              throw new IllegalArgumentException("Unsupported timer type " + type);
+          }
+        }
+
+        @Override
+        public TimerData apply(Timer timer) {
+          String tag = timer.getTag().toStringUtf8();
+          String namespaceString = tag.substring(0, tag.indexOf('+'));
+          StateNamespace namespace = StateNamespaces.fromString(namespaceString, windowCoder);
+
+          Instant timestamp = new Instant(TimeUnit.MICROSECONDS.toMillis(timer.getTimestamp()));
+          return TimerData.of(namespace, timestamp, getTimeDomain(timer.getType()));
+        }
+      });
+    }
+
+    @Override
+    public Iterable<WindowedValue<ElemT>> elementsIterable() {
+      return FluentIterable.from(workItem.getMessageBundlesList())
+          .transformAndConcat(new Function<InputMessageBundle, Iterable<Message>>() {
+            @Override
+            public Iterable<Message> apply(InputMessageBundle input) {
+              return input.getMessagesList();
+            }
+          })
+          .transform(new Function<Message, WindowedValue<ElemT>>() {
+            @Override
+            public WindowedValue<ElemT> apply(Message message) {
+              try {
+                Instant timestamp = new Instant(
+                    TimeUnit.MICROSECONDS.toMillis(message.getTimestamp()));
+                Collection<? extends BoundedWindow> windows =
+                    WindmillSink.decodeMetadataWindows(windowsCoder, message.getMetadata());
+                PaneInfo pane = WindmillSink.decodeMetadataPane(message.getMetadata());
+
+                InputStream inputStream = message.getData().newInput();
+                ElemT value = valueCoder.decode(inputStream, Coder.Context.OUTER);
+                return WindowedValue.of(value, timestamp, windows, pane);
+              } catch (IOException e) {
+                throw Throwables.propagate(e);
+              }
+            }
+          });
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (!(other instanceof WindmillKeyedWorkItem)) {
+        return false;
+      }
+
+      WindmillKeyedWorkItem<?, ?> that = (WindmillKeyedWorkItem<?, ?>) other;
+      return Objects.equals(this.key, that.key)
+          && Objects.equals(this.workItem, that.workItem);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(key, workItem);
+    }
+  }
+
+  private static class ComposedKeyedWorkItem<K, ElemT> implements KeyedWorkItem<K, ElemT> {
+    private final K key;
+    private final Iterable<TimerData> timersIterable;
+    private final Iterable<WindowedValue<ElemT>> elementsIterable;
+
+    ComposedKeyedWorkItem(K key, Iterable<TimerData> timersIterable,
+        Iterable<WindowedValue<ElemT>> elementsIterable) {
+      this.key = key;
+      this.timersIterable = timersIterable;
+      this.elementsIterable = elementsIterable;
+    }
+    @Override
+    public K key() {
+      return key;
+    }
+
+    @Override
+    public Iterable<TimerData> timersIterable() {
+      return timersIterable;
+    }
+
+    @Override
+    public Iterable<WindowedValue<ElemT>> elementsIterable() {
+      return elementsIterable;
+    }
+  }
+
+  /**
+   * Coder that forwards {@code ByteSizeObserver} calls to an underlying element coder.
+   * {@code TimerOrElement} objects never need to be encoded, so this class does not
+   * support the {@code encode} and {@code decode} methods.
+   */
+  public static class FakeKeyedWorkItemCoder<K, T> extends StandardCoder<KeyedWorkItem<K, T>> {
+    final Coder<T> elemCoder;
+
+    /**
+     * Creates a new {@code TimerOrElement.Coder} that wraps the given {@link Coder}.
+     */
+    public static <T> FakeKeyedWorkItemCoder<?, T> of(Coder<T> elemCoder) {
+      return new FakeKeyedWorkItemCoder<>(elemCoder);
+    }
+
+    @JsonCreator
+    public static FakeKeyedWorkItemCoder<?, ?> of(
+            @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+            List<Object> components) {
+      return of((Coder<?>) components.get(0));
+    }
+
+    @Override
+    public void encode(KeyedWorkItem<K, T> value, OutputStream outStream, Context context) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public KeyedWorkItem<K, T> decode(InputStream inStream, Context context) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean isRegisterByteSizeObserverCheap(KeyedWorkItem<K, T> value, Context context) {
+      return true;
+    }
+
+    @Override
+    public void registerByteSizeObserver(
+        KeyedWorkItem<K, T> value, ElementByteSizeObserver observer, Context context)
+        throws Exception {
+      if (value instanceof WindmillKeyedWorkItem) {
+        long serializedSize = ((WindmillKeyedWorkItem<?, ?>) value).workItem.getSerializedSize();
+        observer.update(serializedSize);
+      } else {
+        throw new UnsupportedOperationException();
+      }
+    }
+
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException {}
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return Arrays.asList(elemCoder);
+    }
+
+    public Coder<T> getElementCoder() {
+      return elemCoder;
+    }
+
+    protected FakeKeyedWorkItemCoder(Coder<T> elemCoder) {
+      this.elemCoder = elemCoder;
+    }
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LateDataDroppingDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LateDataDroppingDoFnRunner.java
new file mode 100644
index 0000000000000..dc4027627276e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LateDataDroppingDoFnRunner.java
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.DoFnRunnerBase;
+import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
+import com.google.cloud.dataflow.sdk.util.SideInputReader;
+import com.google.cloud.dataflow.sdk.util.TimerInternals;
+import com.google.cloud.dataflow.sdk.util.UserCodeException;
+import com.google.cloud.dataflow.sdk.util.WindowTracing;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Function;
+import com.google.common.base.Predicate;
+import com.google.common.base.Throwables;
+import com.google.common.collect.Iterables;
+
+import org.joda.time.Instant;
+
+import java.util.List;
+
+/**
+ * A customized {@link DoFnRunner} that handles late data dropping.
+ *
+ * <p>It expands windows before checking data lateness.
+ *
+ * @param <K> key type
+ * @param <InputT> input value element type
+ * @param <OutputT> output value element type
+ * @param <W> window type
+ */
+public class LateDataDroppingDoFnRunner<K, InputT, OutputT, W extends BoundedWindow>
+    extends DoFnRunnerBase<KeyedWorkItem<K, InputT>, KV<K, OutputT>> {
+  private final WindowingStrategy<?, W> windowingStrategy;
+  private final TimerInternals timerInternals;
+  private final Aggregator<Long, Long> droppedDueToLateness;
+
+  public LateDataDroppingDoFnRunner(
+      PipelineOptions options,
+      StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, ?> doFn,
+      SideInputReader sideInputReader,
+      OutputManager outputManager,
+      TupleTag<KV<K, OutputT>> mainOutputTag,
+      List<TupleTag<?>> sideOutputTags,
+      StepContext stepContext,
+      CounterSet.AddCounterMutator addCounterMutator,
+      WindowingStrategy<?, W> windowingStrategy) {
+    super(options, doFn, sideInputReader, outputManager,
+        mainOutputTag, sideOutputTags, stepContext,
+        addCounterMutator, windowingStrategy);
+    this.windowingStrategy = windowingStrategy;
+    this.timerInternals = stepContext.timerInternals();
+
+    droppedDueToLateness = doFn.droppedDueToLateness;
+  }
+
+  @Override
+  public void invokeProcessElement(WindowedValue<KeyedWorkItem<K, InputT>> elem) {
+    final K key = elem.getValue().key();
+    Iterable<Iterable<WindowedValue<InputT>>> elements = Iterables.transform(
+        elem.getValue().elementsIterable(),
+        new Function<WindowedValue<InputT>, Iterable<WindowedValue<InputT>>>() {
+          @Override
+          public Iterable<WindowedValue<InputT>> apply(final WindowedValue<InputT> input) {
+            return Iterables.transform(
+                input.getWindows(),
+                new Function<BoundedWindow, WindowedValue<InputT>>() {
+                  @Override
+                  public WindowedValue<InputT> apply(BoundedWindow window) {
+                    return WindowedValue.of(
+                        input.getValue(), input.getTimestamp(), window, input.getPane());
+                  }
+                });
+          }});
+
+    Iterable<WindowedValue<InputT>> nonLateElements = Iterables.filter(
+        Iterables.concat(elements),
+        new Predicate<WindowedValue<InputT>>() {
+          @Override
+          public boolean apply(WindowedValue<InputT> input) {
+            BoundedWindow window = Iterables.getOnlyElement(input.getWindows());
+            if (canDropDueToExpiredWindow(window)) {
+              // The element is too late for this window.
+              droppedDueToLateness.addValue(1L);
+              WindowTracing.debug(
+                  "ReduceFnRunner.processElement: Dropping element at {} for key:{}; window:{} "
+                  + "since too far behind inputWatermark:{}; outputWatermark:{}",
+                  input.getTimestamp(), key, window, timerInternals.currentInputWatermarkTime(),
+                  timerInternals.currentOutputWatermarkTime());
+              return false;
+            } else {
+              return true;
+            }
+          }
+        });
+    KeyedWorkItem<K, InputT> keyedWorkItem = KeyedWorkItems.workItem(
+        elem.getValue().key(), elem.getValue().timersIterable(), nonLateElements);
+    // This can contain user code. Wrap it in case it throws an exception.
+    try {
+      fn.processElement(createProcessContext(elem.withValue(keyedWorkItem)));
+    } catch (Throwable t) {
+      // Exception in user code.
+      Throwables.propagateIfInstanceOf(t, UserCodeException.class);
+      throw new UserCodeException(t);
+    }
+  }
+
+  /** Is {@code window} expired w.r.t. the garbage collection watermark? */
+  private boolean canDropDueToExpiredWindow(BoundedWindow window) {
+    Instant inputWM = timerInternals.currentInputWatermarkTime();
+    return inputWM != null
+        && window.maxTimestamp().plus(windowingStrategy.getAllowedLateness()).isBefore(inputWM);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
index fea0544344ce6..585cd06ebd2a9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
@@ -19,10 +19,11 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner.OutputManager;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner.RequiresLateDataDropping;
+import com.google.cloud.dataflow.sdk.util.DoFnRunners;
+import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
@@ -178,8 +179,21 @@ public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
       }
     };
 
-    if (hasStreamingSideInput) {
-      fnRunner = new StreamingSideInputDoFnRunner<Object, Object, BoundedWindow>(
+    if (doFnInfo.getDoFn() instanceof StreamingGroupAlsoByWindowsDoFn
+        && doFnInfo.getDoFn() instanceof RequiresLateDataDropping) {
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      DoFnRunner<Object, Object> objectFnRunner = DoFnRunners.lateDataDroppingRunner(
+          options,
+          (DoFnInfo) doFnInfo,
+          sideInputReader,
+          outputManager,
+          (TupleTag) mainOutputTag,
+          sideOutputTags,
+          stepContext,
+          addCounterMutator);
+      fnRunner = objectFnRunner;
+    } else if (hasStreamingSideInput) {
+      fnRunner = DoFnRunners.streamingSideInputRunner(
           options,
           doFnInfo,
           sideInputReader,
@@ -189,7 +203,7 @@ public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
           stepContext,
           addCounterMutator);
     } else {
-      fnRunner = DoFnRunner.create(
+      fnRunner = DoFnRunners.simpleRunner(
           options,
           doFnInfo.getDoFn(),
           sideInputReader,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
index cfc98e5c964d1..78977d51b3bcb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
@@ -22,6 +22,8 @@
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner.RequiresLateDataDropping;
+import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
 import com.google.cloud.dataflow.sdk.util.ReduceFnRunner;
 import com.google.cloud.dataflow.sdk.util.SystemDoFnInternal;
 import com.google.cloud.dataflow.sdk.util.SystemReduceFn;
@@ -41,7 +43,11 @@
  */
 @SystemDoFnInternal
 public abstract class StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends BoundedWindow>
-    extends DoFn<KeyedWorkItem<InputT>, KV<K, OutputT>> {
+    extends DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> {
+  protected final Aggregator<Long, Long> droppedDueToClosedWindow = createAggregator(
+      GroupAlsoByWindowsDoFn.DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER, new Sum.SumLongFn());
+  protected final Aggregator<Long, Long> droppedDueToLateness = createAggregator(
+      GroupAlsoByWindowsDoFn.DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
 
   public static <K, InputT, AccumT, OutputT, W extends BoundedWindow>
       StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> create(
@@ -54,7 +60,7 @@ StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> create(
   }
 
   public static <K, V, W extends BoundedWindow>
-  DoFn<KeyedWorkItem<V>, KV<K, Iterable<V>>> createForIterable(
+  DoFn<KeyedWorkItem<K, V>, KV<K, Iterable<V>>> createForIterable(
       final WindowingStrategy<?, W> windowingStrategy,
       final Coder<V> inputCoder) {
     // If the windowing strategy indicates we're doing a reshuffle, use the special-path.
@@ -67,13 +73,8 @@ DoFn<KeyedWorkItem<V>, KV<K, Iterable<V>>> createForIterable(
   }
 
   private static class StreamingGABWViaWindowSetDoFn<K, InputT, OutputT, W extends BoundedWindow>
-  extends StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
-
-    private final Aggregator<Long, Long> droppedDueToClosedWindow =
-        createAggregator(ReduceFnRunner.DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER, new Sum.SumLongFn());
-    private final Aggregator<Long, Long> droppedDueToLateness =
-        createAggregator(ReduceFnRunner.DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
-
+  extends StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W>
+  implements RequiresLateDataDropping {
     private final WindowingStrategy<Object, W> windowingStrategy;
     private SystemReduceFn.Factory<K, InputT, OutputT, W> reduceFnFactory;
 
@@ -87,14 +88,14 @@ public StreamingGABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
 
     @Override
     public void processElement(ProcessContext c) throws Exception {
-      KeyedWorkItem<InputT> element = c.element();
+      KeyedWorkItem<K, InputT> element = c.element();
 
       @SuppressWarnings("unchecked")
-      K key = (K) c.element().key();
+      K key = c.element().key();
       TimerInternals timerInternals = c.windowingInternals().timerInternals();
       ReduceFnRunner<K, InputT, OutputT, W> runner = new ReduceFnRunner<>(
             key, windowingStrategy, timerInternals, c.windowingInternals(),
-            droppedDueToClosedWindow, droppedDueToLateness, reduceFnFactory.create(key));
+            droppedDueToClosedWindow, reduceFnFactory.create(key));
 
       for (TimerData timer : element.timersIterable()) {
         runner.onTimer(timer);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFn.java
index 39c521a8959fb..5bb5d64712291 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFn.java
@@ -33,7 +33,7 @@
  */
 @SystemDoFnInternal
 public class StreamingGroupAlsoByWindowsReshuffleDoFn<K, T>
-    extends DoFn<KeyedWorkItem<T>, KV<K, Iterable<T>>> {
+    extends DoFn<KeyedWorkItem<K, T>, KV<K, Iterable<T>>> {
 
   public static boolean isReshuffle(WindowingStrategy<?, ?> strategy) {
     return strategy.getTrigger().getSpec() instanceof ReshuffleTrigger;
@@ -42,7 +42,7 @@ public static boolean isReshuffle(WindowingStrategy<?, ?> strategy) {
   @Override
   public void processElement(ProcessContext c) throws Exception {
     @SuppressWarnings("unchecked")
-    K key = (K) c.element().key();
+    K key = c.element().key();
     for (WindowedValue<T> item : c.element().elementsIterable()) {
       c.windowingInternals().outputWindowedValue(
           KV.of(key, (Iterable<T>) Collections.singletonList(item.getValue())),
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
index dcbdc2546a5e5..b61a48d3eafbb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
@@ -27,7 +27,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.DoFnRunnerBase;
+import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
 import com.google.cloud.dataflow.sdk.util.UserCodeException;
@@ -62,7 +63,7 @@
  * @param <W> the type of the windows of the main input
  */
 public class StreamingSideInputDoFnRunner<InputT, OutputT, W extends BoundedWindow>
-    extends DoFnRunner<InputT, OutputT> {
+    extends DoFnRunnerBase<InputT, OutputT> {
   private StreamingModeExecutionContext.StepContext stepContext;
   private Map<String, PCollectionView<?>> sideInputViews;
 
@@ -82,17 +83,14 @@ public StreamingSideInputDoFnRunner(
       TupleTag<OutputT> mainOutputTag,
       List<TupleTag<?>> sideOutputTags,
       StepContext stepContext,
-      CounterSet.AddCounterMutator addCounterMutator) throws Exception {
+      CounterSet.AddCounterMutator addCounterMutator,
+      WindowingStrategy<?, W> windowingStrategy) {
     super(options, doFnInfo.getDoFn(), sideInputReader, outputManager,
         mainOutputTag, sideOutputTags, stepContext,
         addCounterMutator, doFnInfo.getWindowingStrategy());
     this.stepContext = (StreamingModeExecutionContext.StepContext) stepContext;
 
-    WindowFn<?, ? extends BoundedWindow> wildcardWindowFn =
-        doFnInfo.getWindowingStrategy().getWindowFn();
-    @SuppressWarnings("unchecked")
-    WindowFn<?, W> typedWindowFn = (WindowFn<?, W>) wildcardWindowFn;
-    this.windowFn = typedWindowFn;
+    this.windowFn = windowingStrategy.getWindowFn();
 
     this.sideInputViews = new HashMap<>();
     for (PCollectionView<?> view : doFnInfo.getSideInputViews()) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
index 38d9b28603dab..36f7a03ece96a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
@@ -19,7 +19,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.worker.KeyedWorkItem.KeyedWorkItemCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.KeyedWorkItems.FakeKeyedWorkItemCoder;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.WorkItem;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
@@ -39,26 +39,27 @@
  * A Reader that receives input data from a Windmill server, and returns a singleton iterable
  * containing the work item.
  */
-class WindowingWindmillReader<T> extends NativeReader<WindowedValue<KeyedWorkItem<T>>> {
+class WindowingWindmillReader<K, T> extends NativeReader<WindowedValue<KeyedWorkItem<K, T>>> {
 
-  private final KvCoder<?, T> kvCoder;
+  private final KvCoder<K, T> kvCoder;
   private final Coder<? extends BoundedWindow> windowCoder;
   private final Coder<Collection<? extends BoundedWindow>> windowsCoder;
   private StreamingModeExecutionContext context;
 
-  WindowingWindmillReader(Coder<WindowedValue<KeyedWorkItem<T>>> coder,
+  WindowingWindmillReader(Coder<WindowedValue<KeyedWorkItem<K, T>>> coder,
                           StreamingModeExecutionContext context) {
-    FullWindowedValueCoder<KeyedWorkItem<T>> inputCoder =
-        (FullWindowedValueCoder<KeyedWorkItem<T>>) coder;
+    FullWindowedValueCoder<KeyedWorkItem<K, T>> inputCoder =
+        (FullWindowedValueCoder<KeyedWorkItem<K, T>>) coder;
     this.windowsCoder = inputCoder.getWindowsCoder();
     this.windowCoder = inputCoder.getWindowCoder();
-    Coder<T> elementCoder = ((KeyedWorkItemCoder<T>) inputCoder.getValueCoder()).getElementCoder();
+    Coder<T> elementCoder =
+        ((FakeKeyedWorkItemCoder<K, T>) inputCoder.getValueCoder()).getElementCoder();
     if (!(elementCoder instanceof KvCoder)) {
       throw new IllegalArgumentException(
           "WindowingWindmillReader only works with KvCoders.");
     }
     @SuppressWarnings("unchecked")
-    KvCoder<?, T> kvCoder = (KvCoder<?, T>)
+    KvCoder<K, T> kvCoder = (KvCoder<K, T>)
         elementCoder;
     this.kvCoder = kvCoder;
     this.context = context;
@@ -75,8 +76,8 @@ public NativeReader<?> create(
         @Nullable String operationName)
             throws Exception {
       @SuppressWarnings({"rawtypes", "unchecked"})
-      Coder<WindowedValue<KeyedWorkItem<Object>>> typedCoder =
-          (Coder<WindowedValue<KeyedWorkItem<Object>>>) coder;
+      Coder<WindowedValue<KeyedWorkItem<Object, Object>>> typedCoder =
+          (Coder<WindowedValue<KeyedWorkItem<Object, Object>>>) coder;
       return WindowingWindmillReader.create(typedCoder, (StreamingModeExecutionContext) context);
     }
   }
@@ -85,24 +86,24 @@ public NativeReader<?> create(
    * Creates a {@link WindowingWindmillReader} from the provided {@link Coder}
    * and {@link StreamingModeExecutionContext}.
    */
-  public static <T> WindowingWindmillReader<T> create(
-      Coder<WindowedValue<KeyedWorkItem<T>>> coder,
+  public static <K, T> WindowingWindmillReader<K, T> create(
+      Coder<WindowedValue<KeyedWorkItem<K, T>>> coder,
       StreamingModeExecutionContext context) {
-    return new WindowingWindmillReader<T>(coder, context);
+    return new WindowingWindmillReader<K, T>(coder, context);
   }
 
   @Override
-  public NativeReaderIterator<WindowedValue<KeyedWorkItem<T>>> iterator() throws IOException {
-    final Object key = kvCoder.getKeyCoder().decode(
+  public NativeReaderIterator<WindowedValue<KeyedWorkItem<K, T>>> iterator() throws IOException {
+    final K key = kvCoder.getKeyCoder().decode(
         context.getSerializedKey().newInput(), Coder.Context.OUTER);
     final WorkItem workItem = context.getWork();
-    final WindowedValue<KeyedWorkItem<T>> value =
+    final WindowedValue<KeyedWorkItem<K, T>> value =
         WindowedValue.valueInEmptyWindows(
-            KeyedWorkItem.workItem(
+            KeyedWorkItems.windmillWorkItem(
                 key, workItem, windowCoder, windowsCoder, kvCoder.getValueCoder()));
 
-    return new NativeReaderIterator<WindowedValue<KeyedWorkItem<T>>>() {
-      private WindowedValue<KeyedWorkItem<T>> current;
+    return new NativeReaderIterator<WindowedValue<KeyedWorkItem<K, T>>>() {
+      private WindowedValue<KeyedWorkItem<K, T>> current;
 
       @Override
       public boolean start() throws IOException {
@@ -117,7 +118,7 @@ public boolean advance() throws IOException {
       }
 
       @Override
-      public WindowedValue<KeyedWorkItem<T>> getCurrent() {
+      public WindowedValue<KeyedWorkItem<K, T>> getCurrent() {
         if (current == null) {
           throw new NoSuchElementException();
         }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index 2903c48cf277e..02980713b3818 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -22,6 +22,8 @@
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DirectSideInputReader;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.DoFnRunnerBase;
+import com.google.cloud.dataflow.sdk.util.DoFnRunners;
 import com.google.cloud.dataflow.sdk.util.PTuple;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
@@ -442,7 +444,7 @@ enum State {
   DoFn<InputT, OutputT> fn;
 
   /** The ListOutputManager to examine the outputs. */
-  DoFnRunner.ListOutputManager outputManager;
+  DoFnRunnerBase.ListOutputManager outputManager;
 
   /** The DoFnRunner if processing is in progress. */
   DoFnRunner<InputT, OutputT> fnRunner;
@@ -478,8 +480,8 @@ void initializeState() {
         : sideInputs.entrySet()) {
       runnerSideInputs = runnerSideInputs.and(entry.getKey().getTagInternal(), entry.getValue());
     }
-    outputManager = new DoFnRunner.ListOutputManager();
-    fnRunner = DoFnRunner.create(
+    outputManager = new DoFnRunnerBase.ListOutputManager();
+    fnRunner = DoFnRunners.simpleRunner(
         options,
         fn,
         DirectSideInputReader.of(runnerSideInputs),
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 43644d4e737db..15dd0abd52387 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -24,6 +24,8 @@
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DirectSideInputReader;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.DoFnRunnerBase;
+import com.google.cloud.dataflow.sdk.util.DoFnRunners;
 import com.google.cloud.dataflow.sdk.util.IllegalMutationException;
 import com.google.cloud.dataflow.sdk.util.MutationDetector;
 import com.google.cloud.dataflow.sdk.util.MutationDetectors;
@@ -1138,11 +1140,11 @@ private static <InputT, OutputT, ActualInputT extends InputT> void evaluateHelpe
     ImmutabilityCheckingOutputManager<ActualInputT> outputManager =
         new ImmutabilityCheckingOutputManager<>(
             fn.getClass().getSimpleName(),
-            new DoFnRunner.ListOutputManager(),
+            new DoFnRunnerBase.ListOutputManager(),
             outputs);
 
     DoFnRunner<InputT, OutputT> fnRunner =
-        DoFnRunner.create(
+        DoFnRunners.simpleRunner(
             context.getPipelineOptions(),
             fn,
             sideInputReader,
@@ -1210,23 +1212,23 @@ private static SideInputReader makeSideInputReader(
   }
 
   /**
-   * A {@link DoFnRunner.OutputManager} that provides facilities for checking output values for
+   * A {@code DoFnRunner.OutputManager} that provides facilities for checking output values for
    * illegal mutations.
    *
    * <p>When used via the try-with-resources pattern, it is guaranteed that every value passed
    * to {@link #output} will have been checked for illegal mutation.
    */
   private static class ImmutabilityCheckingOutputManager<InputT>
-      implements DoFnRunner.OutputManager, AutoCloseable {
+      implements DoFnRunners.OutputManager, AutoCloseable {
 
-    private final DoFnRunner.OutputManager underlyingOutputManager;
+    private final DoFnRunners.OutputManager underlyingOutputManager;
     private final ConcurrentMap<TupleTag<?>, MutationDetector> mutationDetectorForTag;
     private final PCollectionTuple outputs;
     private String doFnName;
 
     public ImmutabilityCheckingOutputManager(
         String doFnName,
-        DoFnRunner.OutputManager underlyingOutputManager,
+        DoFnRunners.OutputManager underlyingOutputManager,
         PCollectionTuple outputs) {
       this.doFnName = doFnName;
       this.underlyingOutputManager = underlyingOutputManager;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index b501e90208767..3cc78f0999ac9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 Google Inc.
+ * Copyright (C) 2016 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
@@ -13,578 +13,33 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-
 package com.google.cloud.dataflow.sdk.util;
 
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.state.StateInternals;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Throwables;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-
-import org.joda.time.Instant;
-import org.joda.time.format.PeriodFormat;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.ProcessContext;
 
 /**
- * Runs a DoFn by constructing the appropriate contexts and passing them in.
- *
- * @param <InputT> the type of the DoFn's (main) input elements
- * @param <OutputT> the type of the DoFn's (main) output elements
+ * An wrapper interface that represents the execution of a {@link DoFn}.
  */
-public class DoFnRunner<InputT, OutputT> {
-
-  /** Information about how to create output receivers and output to them. */
-  public interface OutputManager {
-
-    /** Outputs a single element to the receiver indicated by the given {@link TupleTag}. */
-    public <T> void output(TupleTag<T> tag, WindowedValue<T> output);
-  }
-
-  /** The DoFn being run. */
-  public final DoFn<InputT, OutputT> fn;
-
-  /** The context used for running the DoFn. */
-  public final DoFnContext<InputT, OutputT> context;
-
-  protected DoFnRunner(
-      PipelineOptions options,
-      DoFn<InputT, OutputT> fn,
-      SideInputReader sideInputReader,
-      OutputManager outputManager,
-      TupleTag<OutputT> mainOutputTag,
-      List<TupleTag<?>> sideOutputTags,
-      StepContext stepContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      WindowingStrategy<?, ?> windowingStrategy) {
-    this.fn = fn;
-    this.context = new DoFnContext<>(
-        options,
-        fn,
-        sideInputReader,
-        outputManager,
-        mainOutputTag,
-        sideOutputTags,
-        stepContext,
-        addCounterMutator,
-        windowingStrategy == null ? null : windowingStrategy.getWindowFn());
-  }
-
-  public static <InputT, OutputT> DoFnRunner<InputT, OutputT> create(
-      PipelineOptions options,
-      DoFn<InputT, OutputT> fn,
-      SideInputReader sideInputReader,
-      OutputManager outputManager,
-      TupleTag<OutputT> mainOutputTag,
-      List<TupleTag<?>> sideOutputTags,
-      StepContext stepContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      WindowingStrategy<?, ?> windowingStrategy) {
-    return new DoFnRunner<>(
-        options, fn, sideInputReader, outputManager,
-        mainOutputTag, sideOutputTags, stepContext, addCounterMutator, windowingStrategy);
-  }
-
+public interface DoFnRunner<InputT, OutputT> {
   /**
-   * An implementation of {@link OutputManager} using simple lists, for testing and in-memory
-   * contexts such as the {@link com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner}.
+   * Prepares and calls {@link DoFn#startBundle}.
    */
-  public static class ListOutputManager implements OutputManager {
-
-    private Map<TupleTag<?>, List<WindowedValue<?>>> outputLists = Maps.newHashMap();
-
-    @Override
-    public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      List<WindowedValue<T>> outputList = (List) outputLists.get(tag);
-
-      if (outputList == null) {
-        outputList = Lists.newArrayList();
-        @SuppressWarnings({"rawtypes", "unchecked"})
-        List<WindowedValue<?>> untypedList = (List) outputList;
-        outputLists.put(tag, untypedList);
-      }
-
-      outputList.add(output);
-    }
-
-    public <T> List<WindowedValue<T>> getOutput(TupleTag<T> tag) {
-      // Safe cast by design, inexpressible in Java without rawtypes
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      List<WindowedValue<T>> outputList = (List) outputLists.get(tag);
-      return (outputList != null) ? outputList : Collections.<WindowedValue<T>>emptyList();
-    }
-  }
-
-  /** Calls {@link DoFn#startBundle}. */
-  public void startBundle() {
-    // This can contain user code. Wrap it in case it throws an exception.
-    try {
-      fn.startBundle(context);
-    } catch (Throwable t) {
-      // Exception in user code.
-      Throwables.propagateIfInstanceOf(t, UserCodeException.class);
-      throw new UserCodeException(t);
-    }
-  }
+  public void startBundle();
 
   /**
-   * Calls {@link DoFn#processElement} with a ProcessContext containing
-   * the current element.
+   * Calls {@link DoFn#processElement} with a {@link ProcessContext} containing the current element.
    */
-  public void processElement(WindowedValue<InputT> elem) {
-    // Setup new thread local logging before running user code, and restore it after.
-    // Needs to happen here (per-element) since fusion may be running this as part of a call to
-    // output in an earlier step.
-    String previousStepName = DataflowWorkerLoggingMDC.getStepName();
-    DataflowWorkerLoggingMDC.setStepName(context.stepContext.getStepName());
-    try {
-      if (elem.getWindows().size() <= 1
-          || (!RequiresWindowAccess.class.isAssignableFrom(fn.getClass())
-          && context.sideInputReader.isEmpty())) {
-        invokeProcessElement(elem);
-      } else {
-        // We could modify the windowed value (and the processContext) to
-        // avoid repeated allocations, but this is more straightforward.
-        for (BoundedWindow window : elem.getWindows()) {
-          invokeProcessElement(WindowedValue.of(
-              elem.getValue(), elem.getTimestamp(), window, elem.getPane()));
-        }
-      }
-    } finally {
-      DataflowWorkerLoggingMDC.setStepName(previousStepName);
-    }
-  }
-
-  protected void invokeProcessElement(WindowedValue<InputT> elem) {
-    DoFn<InputT, OutputT>.ProcessContext processContext = createProcessContext(elem);
-    // This can contain user code. Wrap it in case it throws an exception.
-    try {
-      fn.processElement(processContext);
-    } catch (Throwable t) {
-      // Exception in user code.
-      Throwables.propagateIfInstanceOf(t, UserCodeException.class);
-      throw new UserCodeException(t);
-    }
-  }
-
-  /** Calls {@link DoFn#finishBundle}. */
-  public void finishBundle() {
-    // This can contain user code. Wrap it in case it throws an exception.
-    try {
-      fn.finishBundle(context);
-    } catch (Throwable t) {
-      // Exception in user code.
-      Throwables.propagateIfInstanceOf(t, UserCodeException.class);
-      throw new UserCodeException(t);
-    }
-  }
+  public void processElement(WindowedValue<InputT> elem);
 
   /**
-   * A concrete implementation of {@link DoFn.Context} used for running
-   * a {@link DoFn}.
-   *
-   * @param <InputT> the type of the DoFn's (main) input elements
-   * @param <OutputT> the type of the DoFn's (main) output elements
+   * Calls {@link DoFn#finishBundle} and performs additional tasks, such as
+   * flushing in-memory states.
    */
-  private static class DoFnContext<InputT, OutputT>
-      extends DoFn<InputT, OutputT>.Context {
-    private static final int MAX_SIDE_OUTPUTS = 1000;
-
-    final PipelineOptions options;
-    final DoFn<InputT, OutputT> fn;
-    final SideInputReader sideInputReader;
-    final OutputManager outputManager;
-    final TupleTag<OutputT> mainOutputTag;
-    final StepContext stepContext;
-    final CounterSet.AddCounterMutator addCounterMutator;
-    final WindowFn<?, ?> windowFn;
-
-    /**
-     * The set of known output tags, some of which may be undeclared, so we can throw an
-     * exception when it exceeds {@link #MAX_SIDE_OUTPUTS}.
-     */
-    private Set<TupleTag<?>> outputTags;
-
-    public DoFnContext(PipelineOptions options,
-                       DoFn<InputT, OutputT> fn,
-                       SideInputReader sideInputReader,
-                       OutputManager outputManager,
-                       TupleTag<OutputT> mainOutputTag,
-                       List<TupleTag<?>> sideOutputTags,
-                       StepContext stepContext,
-                       CounterSet.AddCounterMutator addCounterMutator,
-                       WindowFn<?, ?> windowFn) {
-      fn.super();
-      this.options = options;
-      this.fn = fn;
-      this.sideInputReader = sideInputReader;
-      this.outputManager = outputManager;
-      this.mainOutputTag = mainOutputTag;
-      this.outputTags = Sets.newHashSet();
-
-      outputTags.add(mainOutputTag);
-      for (TupleTag<?> sideOutputTag : sideOutputTags) {
-        outputTags.add(sideOutputTag);
-      }
-
-      this.stepContext = stepContext;
-      this.addCounterMutator = addCounterMutator;
-      this.windowFn = windowFn;
-      super.setupDelegateAggregators();
-    }
-
-    //////////////////////////////////////////////////////////////////////////////
-
-    @Override
-    public PipelineOptions getPipelineOptions() {
-      return options;
-    }
-
-    <T, W extends BoundedWindow> WindowedValue<T> makeWindowedValue(
-        T output, Instant timestamp, Collection<W> windows, PaneInfo pane) {
-      final Instant inputTimestamp = timestamp;
-
-      if (timestamp == null) {
-        timestamp = BoundedWindow.TIMESTAMP_MIN_VALUE;
-      }
-
-      if (windows == null) {
-        try {
-          // The windowFn can never succeed at accessing the element, so its type does not
-          // matter here
-          @SuppressWarnings("unchecked")
-          WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
-          windows = objectWindowFn.assignWindows(objectWindowFn.new AssignContext() {
-            @Override
-            public Object element() {
-              throw new UnsupportedOperationException(
-                  "WindowFn attempted to access input element when none was available");
-            }
-
-            @Override
-            public Instant timestamp() {
-              if (inputTimestamp == null) {
-                throw new UnsupportedOperationException(
-                    "WindowFn attempted to access input timestamp when none was available");
-              }
-              return inputTimestamp;
-            }
-
-            @Override
-            public Collection<? extends BoundedWindow> windows() {
-              throw new UnsupportedOperationException(
-                  "WindowFn attempted to access input windows when none were available");
-            }
-          });
-        } catch (Exception e) {
-          Throwables.propagateIfInstanceOf(e, UserCodeException.class);
-          throw new UserCodeException(e);
-        }
-      }
-
-      return WindowedValue.of(output, timestamp, windows, pane);
-    }
-
-    public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
-      if (!sideInputReader.contains(view)) {
-        throw new IllegalArgumentException("calling sideInput() with unknown view");
-      }
-      BoundedWindow sideInputWindow =
-          view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(mainInputWindow);
-      return sideInputReader.get(view, sideInputWindow);
-    }
-
-    void outputWindowedValue(
-        OutputT output,
-        Instant timestamp,
-        Collection<? extends BoundedWindow> windows,
-        PaneInfo pane) {
-      outputWindowedValue(makeWindowedValue(output, timestamp, windows, pane));
-    }
-
-    void outputWindowedValue(WindowedValue<OutputT> windowedElem) {
-      outputManager.output(mainOutputTag, windowedElem);
-      if (stepContext != null) {
-        stepContext.noteOutput(windowedElem);
-      }
-    }
-
-    protected <T> void sideOutputWindowedValue(TupleTag<T> tag,
-                                               T output,
-                                               Instant timestamp,
-                                               Collection<? extends BoundedWindow> windows,
-                                               PaneInfo pane) {
-      sideOutputWindowedValue(tag, makeWindowedValue(output, timestamp, windows, pane));
-    }
-
-    protected <T> void sideOutputWindowedValue(TupleTag<T> tag, WindowedValue<T> windowedElem) {
-      if (!outputTags.contains(tag)) {
-        // This tag wasn't declared nor was it seen before during this execution.
-        // Thus, this must be a new, undeclared and unconsumed output.
-        // To prevent likely user errors, enforce the limit on the number of side
-        // outputs.
-        if (outputTags.size() >= MAX_SIDE_OUTPUTS) {
-          throw new IllegalArgumentException(
-              "the number of side outputs has exceeded a limit of " + MAX_SIDE_OUTPUTS);
-        }
-        outputTags.add(tag);
-      }
-
-      outputManager.output(tag, windowedElem);
-      if (stepContext != null) {
-        stepContext.noteSideOutput(tag, windowedElem);
-      }
-    }
-
-    // Following implementations of output, outputWithTimestamp, and sideOutput
-    // are only accessible in DoFn.startBundle and DoFn.finishBundle, and will be shadowed by
-    // ProcessContext's versions in DoFn.processElement.
-    @Override
-    public void output(OutputT output) {
-      outputWindowedValue(output, null, null, PaneInfo.NO_FIRING);
-    }
-
-    @Override
-    public void outputWithTimestamp(OutputT output, Instant timestamp) {
-      outputWindowedValue(output, timestamp, null, PaneInfo.NO_FIRING);
-    }
-
-    @Override
-    public <T> void sideOutput(TupleTag<T> tag, T output) {
-      Preconditions.checkNotNull(tag, "TupleTag passed to sideOutput cannot be null");
-      sideOutputWindowedValue(tag, output, null, null, PaneInfo.NO_FIRING);
-    }
-
-    @Override
-    public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
-      Preconditions.checkNotNull(tag, "TupleTag passed to sideOutputWithTimestamp cannot be null");
-      sideOutputWindowedValue(tag, output, timestamp, null, PaneInfo.NO_FIRING);
-    }
-
-    private String generateInternalAggregatorName(String userName) {
-      boolean system = fn.getClass().isAnnotationPresent(SystemDoFnInternal.class);
-      return (system ? "" : "user-") + stepContext.getStepName() + "-" + userName;
-    }
-
-    @Override
-    protected <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT> createAggregatorInternal(
-        String name, CombineFn<AggInputT, ?, AggOutputT> combiner) {
-      Preconditions.checkNotNull(combiner,
-          "Combiner passed to createAggregator cannot be null");
-      return new CounterAggregator<>(generateInternalAggregatorName(name),
-          combiner, addCounterMutator);
-    }
-  }
-
-  /**
-   * Returns a new {@link DoFn.ProcessContext} for the given element.
-   */
-  protected DoFn<InputT, OutputT>.ProcessContext createProcessContext(WindowedValue<InputT> elem) {
-    return new DoFnProcessContext<InputT, OutputT>(fn, context, elem);
-  }
+  public void finishBundle();
 
   /**
-   * A concrete implementation of {@link DoFn.ProcessContext} used for
-   * running a {@link DoFn} over a single element.
-   *
-   * @param <InputT> the type of the DoFn's (main) input elements
-   * @param <OutputT> the type of the DoFn's (main) output elements
+   * An internal interface for signaling that a {@link DoFn} requires late data dropping.
    */
-  static class DoFnProcessContext<InputT, OutputT>
-      extends DoFn<InputT, OutputT>.ProcessContext {
-
-
-    final DoFn<InputT, OutputT> fn;
-    final DoFnContext<InputT, OutputT> context;
-    final WindowedValue<InputT> windowedValue;
-
-    public DoFnProcessContext(DoFn<InputT, OutputT> fn,
-                              DoFnContext<InputT, OutputT> context,
-                              WindowedValue<InputT> windowedValue) {
-      fn.super();
-      this.fn = fn;
-      this.context = context;
-      this.windowedValue = windowedValue;
-    }
-
-    @Override
-    public PipelineOptions getPipelineOptions() {
-      return context.getPipelineOptions();
-    }
-
-    @Override
-    public InputT element() {
-      return windowedValue.getValue();
-    }
-
-    @Override
-    public <T> T sideInput(PCollectionView<T> view) {
-      Preconditions.checkNotNull(view, "View passed to sideInput cannot be null");
-      Iterator<? extends BoundedWindow> windowIter = windows().iterator();
-      BoundedWindow window;
-      if (!windowIter.hasNext()) {
-        if (context.windowFn instanceof GlobalWindows) {
-          // TODO: Remove this once GroupByKeyOnly no longer outputs elements
-          // without windows
-          window = GlobalWindow.INSTANCE;
-        } else {
-          throw new IllegalStateException(
-              "sideInput called when main input element is not in any windows");
-        }
-      } else {
-        window = windowIter.next();
-        if (windowIter.hasNext()) {
-          throw new IllegalStateException(
-              "sideInput called when main input element is in multiple windows");
-        }
-      }
-      return context.sideInput(view, window);
-    }
-
-    @Override
-    public BoundedWindow window() {
-      if (!(fn instanceof RequiresWindowAccess)) {
-        throw new UnsupportedOperationException(
-            "window() is only available in the context of a DoFn marked as RequiresWindow.");
-      }
-      return Iterables.getOnlyElement(windows());
-    }
-
-    @Override
-    public PaneInfo pane() {
-      return windowedValue.getPane();
-    }
-
-    @Override
-    public void output(OutputT output) {
-      context.outputWindowedValue(windowedValue.withValue(output));
-    }
-
-    @Override
-    public void outputWithTimestamp(OutputT output, Instant timestamp) {
-      checkTimestamp(timestamp);
-      context.outputWindowedValue(output, timestamp,
-          windowedValue.getWindows(), windowedValue.getPane());
-    }
-
-    void outputWindowedValue(
-        OutputT output,
-        Instant timestamp,
-        Collection<? extends BoundedWindow> windows,
-        PaneInfo pane) {
-      context.outputWindowedValue(output, timestamp, windows, pane);
-    }
-
-    @Override
-    public <T> void sideOutput(TupleTag<T> tag, T output) {
-      Preconditions.checkNotNull(tag, "Tag passed to sideOutput cannot be null");
-      context.sideOutputWindowedValue(tag, windowedValue.withValue(output));
-    }
-
-    @Override
-    public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
-      Preconditions.checkNotNull(tag, "Tag passed to sideOutputWithTimestamp cannot be null");
-      checkTimestamp(timestamp);
-      context.sideOutputWindowedValue(
-          tag, output, timestamp, windowedValue.getWindows(), windowedValue.getPane());
-    }
-
-    @Override
-    public Instant timestamp() {
-      return windowedValue.getTimestamp();
-    }
-
-    public Collection<? extends BoundedWindow> windows() {
-      return windowedValue.getWindows();
-    }
-
-    private void checkTimestamp(Instant timestamp) {
-      if (timestamp.isBefore(windowedValue.getTimestamp().minus(fn.getAllowedTimestampSkew()))) {
-        throw new IllegalArgumentException(String.format(
-            "Cannot output with timestamp %s. Output timestamps must be no earlier than the "
-            + "timestamp of the current input (%s) minus the allowed skew (%s). See the "
-            + "DoFn#getAllowedTimestampSkew() Javadoc for details on changing the allowed skew.",
-            timestamp, windowedValue.getTimestamp(),
-            PeriodFormat.getDefault().print(fn.getAllowedTimestampSkew().toPeriod())));
-      }
-    }
-
-    @Override
-    public WindowingInternals<InputT, OutputT> windowingInternals() {
-      return new WindowingInternals<InputT, OutputT>() {
-        @Override
-        public void outputWindowedValue(OutputT output, Instant timestamp,
-            Collection<? extends BoundedWindow> windows, PaneInfo pane) {
-          context.outputWindowedValue(output, timestamp, windows, pane);
-        }
-
-        @Override
-        public Collection<? extends BoundedWindow> windows() {
-          return windowedValue.getWindows();
-        }
-
-        @Override
-        public PaneInfo pane() {
-          return windowedValue.getPane();
-        }
-
-        @Override
-        public TimerInternals timerInternals() {
-          return context.stepContext.timerInternals();
-        }
-
-        @Override
-        public <T> void writePCollectionViewData(
-            TupleTag<?> tag,
-            Iterable<WindowedValue<T>> data,
-            Coder<T> elemCoder) throws IOException {
-          @SuppressWarnings("unchecked")
-          Coder<BoundedWindow> windowCoder = (Coder<BoundedWindow>) context.windowFn.windowCoder();
-
-          context.stepContext.writePCollectionViewData(
-              tag, data, IterableCoder.of(WindowedValue.getFullCoder(elemCoder, windowCoder)),
-              window(), windowCoder);
-        }
-
-        @Override
-        public StateInternals stateInternals() {
-          return context.stepContext.stateInternals();
-        }
-      };
-    }
-
-    @Override
-    protected <AggregatorInputT, AggregatorOutputT> Aggregator<AggregatorInputT, AggregatorOutputT>
-        createAggregatorInternal(
-            String name, CombineFn<AggregatorInputT, ?, AggregatorOutputT> combiner) {
-      return context.createAggregatorInternal(name, combiner);
-    }
-  }
+  public interface RequiresLateDataDropping {}
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java
new file mode 100644
index 0000000000000..d4954e298a3cd
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java
@@ -0,0 +1,559 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.state.StateInternals;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+import org.joda.time.Instant;
+import org.joda.time.format.PeriodFormat;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * A base implementation of {@link DoFnRunner}.
+ *
+ * <p> Sub-classes should override {@link #invokeProcessElement}.
+ */
+public abstract class DoFnRunnerBase<InputT, OutputT> implements DoFnRunner<InputT, OutputT> {
+
+  /** The DoFn being run. */
+  public final DoFn<InputT, OutputT> fn;
+
+  /** The context used for running the DoFn. */
+  public final DoFnContext<InputT, OutputT> context;
+
+  protected DoFnRunnerBase(
+      PipelineOptions options,
+      DoFn<InputT, OutputT> fn,
+      SideInputReader sideInputReader,
+      OutputManager outputManager,
+      TupleTag<OutputT> mainOutputTag,
+      List<TupleTag<?>> sideOutputTags,
+      StepContext stepContext,
+      CounterSet.AddCounterMutator addCounterMutator,
+      WindowingStrategy<?, ?> windowingStrategy) {
+    this.fn = fn;
+    this.context = new DoFnContext<>(
+        options,
+        fn,
+        sideInputReader,
+        outputManager,
+        mainOutputTag,
+        sideOutputTags,
+        stepContext,
+        addCounterMutator,
+        windowingStrategy == null ? null : windowingStrategy.getWindowFn());
+  }
+
+  /**
+   * An implementation of {@code OutputManager} using simple lists, for testing and in-memory
+   * contexts such as the {@link DirectPipelineRunner}.
+   */
+  public static class ListOutputManager implements OutputManager {
+
+    private Map<TupleTag<?>, List<WindowedValue<?>>> outputLists = Maps.newHashMap();
+
+    @Override
+    public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      List<WindowedValue<T>> outputList = (List) outputLists.get(tag);
+
+      if (outputList == null) {
+        outputList = Lists.newArrayList();
+        @SuppressWarnings({"rawtypes", "unchecked"})
+        List<WindowedValue<?>> untypedList = (List) outputList;
+        outputLists.put(tag, untypedList);
+      }
+
+      outputList.add(output);
+    }
+
+    public <T> List<WindowedValue<T>> getOutput(TupleTag<T> tag) {
+      // Safe cast by design, inexpressible in Java without rawtypes
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      List<WindowedValue<T>> outputList = (List) outputLists.get(tag);
+      return (outputList != null) ? outputList : Collections.<WindowedValue<T>>emptyList();
+    }
+  }
+
+  @Override
+  public void startBundle() {
+    // This can contain user code. Wrap it in case it throws an exception.
+    try {
+      fn.startBundle(context);
+    } catch (Throwable t) {
+      // Exception in user code.
+      Throwables.propagateIfInstanceOf(t, UserCodeException.class);
+      throw new UserCodeException(t);
+    }
+  }
+
+  @Override
+  public void processElement(WindowedValue<InputT> elem) {
+    // Setup new thread local logging before running user code, and restore it after.
+    // Needs to happen here (per-element) since fusion may be running this as part of a call to
+    // output in an earlier step.
+    String previousStepName = DataflowWorkerLoggingMDC.getStepName();
+    DataflowWorkerLoggingMDC.setStepName(context.stepContext.getStepName());
+    try {
+      if (elem.getWindows().size() <= 1
+          || (!RequiresWindowAccess.class.isAssignableFrom(fn.getClass())
+          && context.sideInputReader.isEmpty())) {
+        invokeProcessElement(elem);
+      } else {
+        // We could modify the windowed value (and the processContext) to
+        // avoid repeated allocations, but this is more straightforward.
+        for (BoundedWindow window : elem.getWindows()) {
+          invokeProcessElement(WindowedValue.of(
+              elem.getValue(), elem.getTimestamp(), window, elem.getPane()));
+        }
+      }
+    } finally {
+      DataflowWorkerLoggingMDC.setStepName(previousStepName);
+    }
+  }
+
+  /**
+   * Invokes {@link DoFn#processElement} after certain pre-processings has been done in
+   * {@link DoFnRunnerBase#processElement}.
+   */
+  protected abstract void invokeProcessElement(WindowedValue<InputT> elem);
+
+  @Override
+  public void finishBundle() {
+    // This can contain user code. Wrap it in case it throws an exception.
+    try {
+      fn.finishBundle(context);
+    } catch (Throwable t) {
+      // Exception in user code.
+      Throwables.propagateIfInstanceOf(t, UserCodeException.class);
+      throw new UserCodeException(t);
+    }
+  }
+
+  /**
+   * A concrete implementation of {@code DoFn.Context} used for running a {@link DoFn}.
+   *
+   * @param <InputT> the type of the DoFn's (main) input elements
+   * @param <OutputT> the type of the DoFn's (main) output elements
+   */
+  private static class DoFnContext<InputT, OutputT>
+      extends DoFn<InputT, OutputT>.Context {
+    private static final int MAX_SIDE_OUTPUTS = 1000;
+
+    final PipelineOptions options;
+    final DoFn<InputT, OutputT> fn;
+    final SideInputReader sideInputReader;
+    final OutputManager outputManager;
+    final TupleTag<OutputT> mainOutputTag;
+    final StepContext stepContext;
+    final CounterSet.AddCounterMutator addCounterMutator;
+    final WindowFn<?, ?> windowFn;
+
+    /**
+     * The set of known output tags, some of which may be undeclared, so we can throw an
+     * exception when it exceeds {@link #MAX_SIDE_OUTPUTS}.
+     */
+    private Set<TupleTag<?>> outputTags;
+
+    public DoFnContext(PipelineOptions options,
+                       DoFn<InputT, OutputT> fn,
+                       SideInputReader sideInputReader,
+                       OutputManager outputManager,
+                       TupleTag<OutputT> mainOutputTag,
+                       List<TupleTag<?>> sideOutputTags,
+                       StepContext stepContext,
+                       CounterSet.AddCounterMutator addCounterMutator,
+                       WindowFn<?, ?> windowFn) {
+      fn.super();
+      this.options = options;
+      this.fn = fn;
+      this.sideInputReader = sideInputReader;
+      this.outputManager = outputManager;
+      this.mainOutputTag = mainOutputTag;
+      this.outputTags = Sets.newHashSet();
+
+      outputTags.add(mainOutputTag);
+      for (TupleTag<?> sideOutputTag : sideOutputTags) {
+        outputTags.add(sideOutputTag);
+      }
+
+      this.stepContext = stepContext;
+      this.addCounterMutator = addCounterMutator;
+      this.windowFn = windowFn;
+      super.setupDelegateAggregators();
+    }
+
+    //////////////////////////////////////////////////////////////////////////////
+
+    @Override
+    public PipelineOptions getPipelineOptions() {
+      return options;
+    }
+
+    <T, W extends BoundedWindow> WindowedValue<T> makeWindowedValue(
+        T output, Instant timestamp, Collection<W> windows, PaneInfo pane) {
+      final Instant inputTimestamp = timestamp;
+
+      if (timestamp == null) {
+        timestamp = BoundedWindow.TIMESTAMP_MIN_VALUE;
+      }
+
+      if (windows == null) {
+        try {
+          // The windowFn can never succeed at accessing the element, so its type does not
+          // matter here
+          @SuppressWarnings("unchecked")
+          WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
+          windows = objectWindowFn.assignWindows(objectWindowFn.new AssignContext() {
+            @Override
+            public Object element() {
+              throw new UnsupportedOperationException(
+                  "WindowFn attempted to access input element when none was available");
+            }
+
+            @Override
+            public Instant timestamp() {
+              if (inputTimestamp == null) {
+                throw new UnsupportedOperationException(
+                    "WindowFn attempted to access input timestamp when none was available");
+              }
+              return inputTimestamp;
+            }
+
+            @Override
+            public Collection<? extends BoundedWindow> windows() {
+              throw new UnsupportedOperationException(
+                  "WindowFn attempted to access input windows when none were available");
+            }
+          });
+        } catch (Exception e) {
+          Throwables.propagateIfInstanceOf(e, UserCodeException.class);
+          throw new UserCodeException(e);
+        }
+      }
+
+      return WindowedValue.of(output, timestamp, windows, pane);
+    }
+
+    public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
+      if (!sideInputReader.contains(view)) {
+        throw new IllegalArgumentException("calling sideInput() with unknown view");
+      }
+      BoundedWindow sideInputWindow =
+          view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(mainInputWindow);
+      return sideInputReader.get(view, sideInputWindow);
+    }
+
+    void outputWindowedValue(
+        OutputT output,
+        Instant timestamp,
+        Collection<? extends BoundedWindow> windows,
+        PaneInfo pane) {
+      outputWindowedValue(makeWindowedValue(output, timestamp, windows, pane));
+    }
+
+    void outputWindowedValue(WindowedValue<OutputT> windowedElem) {
+      outputManager.output(mainOutputTag, windowedElem);
+      if (stepContext != null) {
+        stepContext.noteOutput(windowedElem);
+      }
+    }
+
+    protected <T> void sideOutputWindowedValue(TupleTag<T> tag,
+                                               T output,
+                                               Instant timestamp,
+                                               Collection<? extends BoundedWindow> windows,
+                                               PaneInfo pane) {
+      sideOutputWindowedValue(tag, makeWindowedValue(output, timestamp, windows, pane));
+    }
+
+    protected <T> void sideOutputWindowedValue(TupleTag<T> tag, WindowedValue<T> windowedElem) {
+      if (!outputTags.contains(tag)) {
+        // This tag wasn't declared nor was it seen before during this execution.
+        // Thus, this must be a new, undeclared and unconsumed output.
+        // To prevent likely user errors, enforce the limit on the number of side
+        // outputs.
+        if (outputTags.size() >= MAX_SIDE_OUTPUTS) {
+          throw new IllegalArgumentException(
+              "the number of side outputs has exceeded a limit of " + MAX_SIDE_OUTPUTS);
+        }
+        outputTags.add(tag);
+      }
+
+      outputManager.output(tag, windowedElem);
+      if (stepContext != null) {
+        stepContext.noteSideOutput(tag, windowedElem);
+      }
+    }
+
+    // Following implementations of output, outputWithTimestamp, and sideOutput
+    // are only accessible in DoFn.startBundle and DoFn.finishBundle, and will be shadowed by
+    // ProcessContext's versions in DoFn.processElement.
+    @Override
+    public void output(OutputT output) {
+      outputWindowedValue(output, null, null, PaneInfo.NO_FIRING);
+    }
+
+    @Override
+    public void outputWithTimestamp(OutputT output, Instant timestamp) {
+      outputWindowedValue(output, timestamp, null, PaneInfo.NO_FIRING);
+    }
+
+    @Override
+    public <T> void sideOutput(TupleTag<T> tag, T output) {
+      Preconditions.checkNotNull(tag, "TupleTag passed to sideOutput cannot be null");
+      sideOutputWindowedValue(tag, output, null, null, PaneInfo.NO_FIRING);
+    }
+
+    @Override
+    public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
+      Preconditions.checkNotNull(tag, "TupleTag passed to sideOutputWithTimestamp cannot be null");
+      sideOutputWindowedValue(tag, output, timestamp, null, PaneInfo.NO_FIRING);
+    }
+
+    private String generateInternalAggregatorName(String userName) {
+      boolean system = fn.getClass().isAnnotationPresent(SystemDoFnInternal.class);
+      return (system ? "" : "user-") + stepContext.getStepName() + "-" + userName;
+    }
+
+    @Override
+    protected <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT> createAggregatorInternal(
+        String name, CombineFn<AggInputT, ?, AggOutputT> combiner) {
+      Preconditions.checkNotNull(combiner,
+          "Combiner passed to createAggregator cannot be null");
+      return new CounterAggregator<>(generateInternalAggregatorName(name),
+          combiner, addCounterMutator);
+    }
+  }
+
+  /**
+   * Returns a new {@code DoFn.ProcessContext} for the given element.
+   */
+  protected DoFn<InputT, OutputT>.ProcessContext createProcessContext(WindowedValue<InputT> elem) {
+    return new DoFnProcessContext<InputT, OutputT>(fn, context, elem);
+  }
+
+  /**
+   * A concrete implementation of {@code DoFn.ProcessContext} used for
+   * running a {@link DoFn} over a single element.
+   *
+   * @param <InputT> the type of the DoFn's (main) input elements
+   * @param <OutputT> the type of the DoFn's (main) output elements
+   */
+  static class DoFnProcessContext<InputT, OutputT>
+      extends DoFn<InputT, OutputT>.ProcessContext {
+
+
+    final DoFn<InputT, OutputT> fn;
+    final DoFnContext<InputT, OutputT> context;
+    final WindowedValue<InputT> windowedValue;
+
+    public DoFnProcessContext(DoFn<InputT, OutputT> fn,
+                              DoFnContext<InputT, OutputT> context,
+                              WindowedValue<InputT> windowedValue) {
+      fn.super();
+      this.fn = fn;
+      this.context = context;
+      this.windowedValue = windowedValue;
+    }
+
+    @Override
+    public PipelineOptions getPipelineOptions() {
+      return context.getPipelineOptions();
+    }
+
+    @Override
+    public InputT element() {
+      return windowedValue.getValue();
+    }
+
+    @Override
+    public <T> T sideInput(PCollectionView<T> view) {
+      Preconditions.checkNotNull(view, "View passed to sideInput cannot be null");
+      Iterator<? extends BoundedWindow> windowIter = windows().iterator();
+      BoundedWindow window;
+      if (!windowIter.hasNext()) {
+        if (context.windowFn instanceof GlobalWindows) {
+          // TODO: Remove this once GroupByKeyOnly no longer outputs elements
+          // without windows
+          window = GlobalWindow.INSTANCE;
+        } else {
+          throw new IllegalStateException(
+              "sideInput called when main input element is not in any windows");
+        }
+      } else {
+        window = windowIter.next();
+        if (windowIter.hasNext()) {
+          throw new IllegalStateException(
+              "sideInput called when main input element is in multiple windows");
+        }
+      }
+      return context.sideInput(view, window);
+    }
+
+    @Override
+    public BoundedWindow window() {
+      if (!(fn instanceof RequiresWindowAccess)) {
+        throw new UnsupportedOperationException(
+            "window() is only available in the context of a DoFn marked as RequiresWindow.");
+      }
+      return Iterables.getOnlyElement(windows());
+    }
+
+    @Override
+    public PaneInfo pane() {
+      return windowedValue.getPane();
+    }
+
+    @Override
+    public void output(OutputT output) {
+      context.outputWindowedValue(windowedValue.withValue(output));
+    }
+
+    @Override
+    public void outputWithTimestamp(OutputT output, Instant timestamp) {
+      checkTimestamp(timestamp);
+      context.outputWindowedValue(output, timestamp,
+          windowedValue.getWindows(), windowedValue.getPane());
+    }
+
+    void outputWindowedValue(
+        OutputT output,
+        Instant timestamp,
+        Collection<? extends BoundedWindow> windows,
+        PaneInfo pane) {
+      context.outputWindowedValue(output, timestamp, windows, pane);
+    }
+
+    @Override
+    public <T> void sideOutput(TupleTag<T> tag, T output) {
+      Preconditions.checkNotNull(tag, "Tag passed to sideOutput cannot be null");
+      context.sideOutputWindowedValue(tag, windowedValue.withValue(output));
+    }
+
+    @Override
+    public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
+      Preconditions.checkNotNull(tag, "Tag passed to sideOutputWithTimestamp cannot be null");
+      checkTimestamp(timestamp);
+      context.sideOutputWindowedValue(
+          tag, output, timestamp, windowedValue.getWindows(), windowedValue.getPane());
+    }
+
+    @Override
+    public Instant timestamp() {
+      return windowedValue.getTimestamp();
+    }
+
+    public Collection<? extends BoundedWindow> windows() {
+      return windowedValue.getWindows();
+    }
+
+    private void checkTimestamp(Instant timestamp) {
+      if (timestamp.isBefore(windowedValue.getTimestamp().minus(fn.getAllowedTimestampSkew()))) {
+        throw new IllegalArgumentException(String.format(
+            "Cannot output with timestamp %s. Output timestamps must be no earlier than the "
+            + "timestamp of the current input (%s) minus the allowed skew (%s). See the "
+            + "DoFn#getAllowedTimestampSkew() Javadoc for details on changing the allowed skew.",
+            timestamp, windowedValue.getTimestamp(),
+            PeriodFormat.getDefault().print(fn.getAllowedTimestampSkew().toPeriod())));
+      }
+    }
+
+    @Override
+    public WindowingInternals<InputT, OutputT> windowingInternals() {
+      return new WindowingInternals<InputT, OutputT>() {
+        @Override
+        public void outputWindowedValue(OutputT output, Instant timestamp,
+            Collection<? extends BoundedWindow> windows, PaneInfo pane) {
+          context.outputWindowedValue(output, timestamp, windows, pane);
+        }
+
+        @Override
+        public Collection<? extends BoundedWindow> windows() {
+          return windowedValue.getWindows();
+        }
+
+        @Override
+        public PaneInfo pane() {
+          return windowedValue.getPane();
+        }
+
+        @Override
+        public TimerInternals timerInternals() {
+          return context.stepContext.timerInternals();
+        }
+
+        @Override
+        public <T> void writePCollectionViewData(
+            TupleTag<?> tag,
+            Iterable<WindowedValue<T>> data,
+            Coder<T> elemCoder) throws IOException {
+          @SuppressWarnings("unchecked")
+          Coder<BoundedWindow> windowCoder = (Coder<BoundedWindow>) context.windowFn.windowCoder();
+
+          context.stepContext.writePCollectionViewData(
+              tag, data, IterableCoder.of(WindowedValue.getFullCoder(elemCoder, windowCoder)),
+              window(), windowCoder);
+        }
+
+        @Override
+        public StateInternals stateInternals() {
+          return context.stepContext.stateInternals();
+        }
+      };
+    }
+
+    @Override
+    protected <AggregatorInputT, AggregatorOutputT> Aggregator<AggregatorInputT, AggregatorOutputT>
+        createAggregatorInternal(
+            String name, CombineFn<AggregatorInputT, ?, AggregatorOutputT> combiner) {
+      return context.createAggregatorInternal(name, combiner);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunners.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunners.java
new file mode 100644
index 0000000000000..a65192a23d05e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunners.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.worker.KeyedWorkItem;
+import com.google.cloud.dataflow.sdk.runners.worker.LateDataDroppingDoFnRunner;
+import com.google.cloud.dataflow.sdk.runners.worker.StreamingGroupAlsoByWindowsDoFn;
+import com.google.cloud.dataflow.sdk.runners.worker.StreamingSideInputDoFnRunner;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import java.util.List;
+
+/**
+ * Static utility methods that provide {@link DoFnRunner} implementations.
+ */
+public class DoFnRunners {
+  /**
+   * Information about how to create output receivers and output to them.
+   */
+  public interface OutputManager {
+    /**
+     * Outputs a single element to the receiver indicated by the given {@link TupleTag}.
+     */
+    public <T> void output(TupleTag<T> tag, WindowedValue<T> output);
+  }
+
+  /**
+   * Returns a basic implementation of {@link DoFnRunner} that works for most {@link DoFn DoFns}.
+   *
+   * <p>It invokes {@link DoFn#processElement} for each input.
+   */
+  public static <InputT, OutputT> DoFnRunner<InputT, OutputT> simpleRunner(
+      PipelineOptions options,
+      DoFn<InputT, OutputT> fn,
+      SideInputReader sideInputReader,
+      OutputManager outputManager,
+      TupleTag<OutputT> mainOutputTag,
+      List<TupleTag<?>> sideOutputTags,
+      StepContext stepContext,
+      CounterSet.AddCounterMutator addCounterMutator,
+      WindowingStrategy<?, ?> windowingStrategy) {
+    return new SimpleDoFnRunner<>(
+        options, fn, sideInputReader, outputManager, mainOutputTag, sideOutputTags,
+        stepContext, addCounterMutator, windowingStrategy);
+  }
+
+  /**
+   * Returns an implementation of {@link DoFnRunner} that handles streaming side inputs.
+   *
+   * <p>It blocks and caches input elements if their side inputs are not ready.
+   */
+  public static <InputT, OutputT> DoFnRunner<InputT, OutputT> streamingSideInputRunner(
+      PipelineOptions options,
+      DoFnInfo<InputT, OutputT> doFnInfo,
+      SideInputReader sideInputReader,
+      OutputManager outputManager,
+      TupleTag<OutputT> mainOutputTag,
+      List<TupleTag<?>> sideOutputTags,
+      StepContext stepContext,
+      CounterSet.AddCounterMutator addCounterMutator) {
+  return new StreamingSideInputDoFnRunner<>(
+      options, doFnInfo, sideInputReader, outputManager, mainOutputTag, sideOutputTags,
+      stepContext, addCounterMutator, doFnInfo.getWindowingStrategy());
+  }
+
+  /**
+   * Returns an implementation of {@link DoFnRunner} that handles late data dropping.
+   *
+   * <p>It drops elements from expired windows before they reach the underlying {@link DoFn}.
+   */
+  public static <K, InputT, OutputT> DoFnRunner<KeyedWorkItem<K, InputT>, KV<K, OutputT>>
+  lateDataDroppingRunner(
+      PipelineOptions options,
+      DoFnInfo<KeyedWorkItem<K, InputT>, KV<K, OutputT>> doFnInfo,
+      SideInputReader sideInputReader,
+      OutputManager outputManager,
+      TupleTag<KV<K, OutputT>> mainOutputTag,
+      List<TupleTag<?>> sideOutputTags,
+      StepContext stepContext,
+      CounterSet.AddCounterMutator addCounterMutator) {
+    @SuppressWarnings({"unchecked"})
+    StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, ?> streamingGabwDoFn =
+        (StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, ?>) doFnInfo.getDoFn();
+    return new LateDataDroppingDoFnRunner<>(
+        options,  streamingGabwDoFn, sideInputReader, outputManager, mainOutputTag, sideOutputTags,
+        stepContext, addCounterMutator, doFnInfo.getWindowingStrategy());
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index 7c7d1e1ee7bc4..f15a0c5e2ad55 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -19,8 +19,10 @@
 import static com.google.common.base.Preconditions.checkNotNull;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.values.KV;
 
@@ -36,6 +38,13 @@
 @SystemDoFnInternal
 public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends BoundedWindow>
     extends DoFn<KV<K, Iterable<WindowedValue<InputT>>>, KV<K, OutputT>> {
+  public static final String DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER = "DroppedDueToClosedWindow";
+  public static final String DROPPED_DUE_TO_LATENESS_COUNTER = "DroppedDueToLateness";
+
+  protected final Aggregator<Long, Long> droppedDueToClosedWindow =
+      createAggregator(DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER, new Sum.SumLongFn());
+  protected final Aggregator<Long, Long> droppedDueToLateness =
+      createAggregator(DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
 
   /**
    * Create a {@link GroupAlsoByWindowsDoFn} without a combine function. Depending on the
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
index 9da4b4deb3998..bb3b2194b67ff 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
@@ -16,9 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.collect.Iterables;
@@ -35,11 +33,6 @@
 class GroupAlsoByWindowsViaOutputBufferDoFn<K, InputT, OutputT, W extends BoundedWindow>
    extends GroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
 
-  private final Aggregator<Long, Long> droppedDueToClosedWindow =
-      createAggregator(ReduceFnRunner.DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER, new Sum.SumLongFn());
-  private final Aggregator<Long, Long> droppedDueToLateness =
-      createAggregator(ReduceFnRunner.DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
-
   private final WindowingStrategy<?, W> strategy;
   private SystemReduceFn.Factory<K, InputT, OutputT, W> reduceFnFactory;
 
@@ -63,7 +56,7 @@ public void processElement(
 
     ReduceFnRunner<K, InputT, OutputT, W> runner = new ReduceFnRunner<>(
         key, strategy, timerInternals, c.windowingInternals(),
-        droppedDueToClosedWindow, droppedDueToLateness, reduceFnFactory.create(key));
+        droppedDueToClosedWindow, reduceFnFactory.create(key));
 
     Iterable<List<WindowedValue<InputT>>> chunks =
         Iterables.partition(c.element().getValue(), 1000);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index a5cd11b442a0b..54906b1e5c286 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -71,15 +71,11 @@
  * @param <W> The type of windows this operates on.
  */
 public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
-  public static final String DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER = "DroppedDueToClosedWindow";
-  public static final String DROPPED_DUE_TO_LATENESS_COUNTER = "DroppedDueToLateness";
-
   private final WindowingStrategy<Object, W> windowingStrategy;
 
   private final WindowingInternals<?, KV<K, OutputT>> windowingInternals;
 
   private final Aggregator<Long, Long> droppedDueToClosedWindow;
-  private final Aggregator<Long, Long> droppedDueToLateness;
 
   private final K key;
 
@@ -174,14 +170,13 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
 
   public ReduceFnRunner(K key, WindowingStrategy<?, W> windowingStrategy,
       TimerInternals timerInternals, WindowingInternals<?, KV<K, OutputT>> windowingInternals,
-      Aggregator<Long, Long> droppedDueToClosedWindow, Aggregator<Long, Long> droppedDueToLateness,
+      Aggregator<Long, Long> droppedDueToClosedWindow,
       ReduceFn<K, InputT, OutputT, W> reduceFn) {
     this.key = key;
     this.timerInternals = timerInternals;
     this.paneInfoTracker = new PaneInfoTracker(timerInternals);
     this.windowingInternals = windowingInternals;
     this.droppedDueToClosedWindow = droppedDueToClosedWindow;
-    this.droppedDueToLateness = droppedDueToLateness;
     this.reduceFn = reduceFn;
 
     @SuppressWarnings("unchecked")
@@ -264,13 +259,6 @@ public void persist() {
     activeWindows.persist();
   }
 
-  /** Is {@code window} expired w.r.t. the garbage collection watermark? */
-  private boolean canDropDueToExpiredWindow(W window) {
-    Instant inputWM = timerInternals.currentInputWatermarkTime();
-    return inputWM != null
-        && window.maxTimestamp().plus(windowingStrategy.getAllowedLateness()).isBefore(inputWM);
-  }
-
   /**
    * Extract the windows associated with the values, and invoke merge.
    */
@@ -289,12 +277,6 @@ private void collectAndMergeWindows(Iterable<WindowedValue<InputT>> values) {
         @SuppressWarnings("unchecked")
         W window = (W) untypedWindow;
 
-        if (canDropDueToExpiredWindow(window)) {
-          // This element is too late to contribute to this window.
-          // We will update the counter for this in the corresponding processElement call.
-          continue;
-        }
-
         ReduceFn<K, InputT, OutputT, W>.Context context = contextFactory.base(window);
         if (triggerRunner.isClosed(context.state())) {
           // This window has already been closed.
@@ -389,19 +371,9 @@ private Collection<W> processElement(WindowedValue<InputT> value) {
     for (BoundedWindow untypedWindow : value.getWindows()) {
       @SuppressWarnings("unchecked")
       W window = (W) untypedWindow;
-      if (canDropDueToExpiredWindow(window)) {
-        // The element is too late for this window.
-        droppedDueToLateness.addValue(1L);
-        WindowTracing.debug(
-            "ReduceFnRunner.processElement: Dropping element at {} for key:{}; window:{} "
-            + "since too far behind inputWatermark:{}; outputWatermark:{}",
-            value.getTimestamp(), key, window, timerInternals.currentInputWatermarkTime(),
-            timerInternals.currentOutputWatermarkTime());
-      } else {
-        W active = activeWindows.representative(window);
-        Preconditions.checkState(active != null, "Window %s should have been added", window);
-        windows.add(active);
-      }
+      W active = activeWindows.representative(window);
+      Preconditions.checkState(active != null, "Window %s should have been added", window);
+      windows.add(active);
     }
 
     // Prefetch in each of the windows if we're going to need to process triggers
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SimpleDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SimpleDoFnRunner.java
new file mode 100644
index 0000000000000..66824e39f2008
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SimpleDoFnRunner.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.base.Throwables;
+
+import java.util.List;
+
+/**
+ * Runs a {@link DoFn} by constructing the appropriate contexts and passing them in.
+ *
+ * @param <InputT> the type of the DoFn's (main) input elements
+ * @param <OutputT> the type of the DoFn's (main) output elements
+ */
+public class SimpleDoFnRunner<InputT, OutputT> extends DoFnRunnerBase<InputT, OutputT>{
+
+  protected SimpleDoFnRunner(PipelineOptions options, DoFn<InputT, OutputT> fn,
+      SideInputReader sideInputReader,
+      OutputManager outputManager,
+      TupleTag<OutputT> mainOutputTag, List<TupleTag<?>> sideOutputTags, StepContext stepContext,
+      AddCounterMutator addCounterMutator, WindowingStrategy<?, ?> windowingStrategy) {
+    super(options, fn, sideInputReader, outputManager, mainOutputTag, sideOutputTags, stepContext,
+        addCounterMutator, windowingStrategy);
+  }
+
+  @Override
+  protected void invokeProcessElement(WindowedValue<InputT> elem) {
+    DoFn<InputT, OutputT>.ProcessContext processContext = createProcessContext(elem);
+    // This can contain user code. Wrap it in case it throws an exception.
+    try {
+      fn.processElement(processContext);
+    } catch (Throwable t) {
+      // Exception in user code.
+      Throwables.propagateIfInstanceOf(t, UserCodeException.class);
+      throw new UserCodeException(t);
+    }
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
index 8b9b7e0a4a34e..7cd5f82340108 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
@@ -16,7 +16,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.runners.worker.KeyedWorkItem.KeyedWorkItemCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.KeyedWorkItems.FakeKeyedWorkItemCoder;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
@@ -38,7 +38,7 @@ private TimerOrElement() {}
    * {@code com.google.cloud.dataflow.sdk.util.TimerOrElement$TimerOrElementCoder} and we'd like to
    * be able to rename/move that without breaking things.
    */
-  public static class TimerOrElementCoder<ElemT> extends KeyedWorkItemCoder<ElemT> {
+  public static class TimerOrElementCoder<ElemT> extends FakeKeyedWorkItemCoder<Object, ElemT> {
 
     private TimerOrElementCoder(Coder<ElemT> elemCoder) {
       super(elemCoder);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemTest.java
index 60a151c1dd51c..8f436df0ba039 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemTest.java
@@ -88,8 +88,8 @@ public void testElementIteration() throws Exception {
     chunk2.setSourceComputationId("computation");
     addElement(chunk2, 6, "earth", WINDOW_1, paneInfo(1));
 
-    KeyedWorkItem<String> keyedWorkItem =
-        KeyedWorkItem.workItem(KEY, workItem.build(), WINDOW_CODER, WINDOWS_CODER, VALUE_CODER);
+    KeyedWorkItem<String, String> keyedWorkItem = KeyedWorkItems.windmillWorkItem(
+        KEY, workItem.build(), WINDOW_CODER, WINDOWS_CODER, VALUE_CODER);
 
     assertThat(keyedWorkItem.elementsIterable(), Matchers.contains(
         WindowedValue.of("hello", new Instant(5), WINDOW_1, paneInfo(0)),
@@ -126,8 +126,8 @@ public void testTimerOrdering() throws Exception {
             .build())
         .build();
 
-    KeyedWorkItem<String> keyedWorkItem =
-        KeyedWorkItem.<String>workItem(KEY, workItem, WINDOW_CODER, WINDOWS_CODER, VALUE_CODER);
+    KeyedWorkItem<String, String> keyedWorkItem =
+        KeyedWorkItems.windmillWorkItem(KEY, workItem, WINDOW_CODER, WINDOWS_CODER, VALUE_CODER);
 
     assertThat(keyedWorkItem.timersIterable(), Matchers.contains(
         makeTimer(STATE_NAMESPACE_1, 1, TimeDomain.EVENT_TIME),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 0c56886d07dbd..b0714e7b80fbc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -50,7 +50,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.dataflow.CountingSource;
 import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
-import com.google.cloud.dataflow.sdk.runners.worker.KeyedWorkItem.KeyedWorkItemCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.KeyedWorkItems.FakeKeyedWorkItemCoder;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.WorkItemCommitRequest;
@@ -161,7 +161,7 @@ private String dataStringForIndex(long index) {
 
   private ParallelInstruction makeWindowingSourceInstruction(Coder<?> coder) {
     CloudObject encodedCoder = FullWindowedValueCoder.of(
-        KeyedWorkItemCoder.of(coder), IntervalWindow.getCoder()).asCloudObject();
+        FakeKeyedWorkItemCoder.of(coder), IntervalWindow.getCoder()).asCloudObject();
     return new ParallelInstruction()
         .setSystemName(DEFAULT_SOURCE_SYSTEM_NAME)
         .setRead(new ReadInstruction().setSource(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
index 1c6edd4a74d50..ae185f4b133e3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -19,6 +19,7 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -43,7 +44,10 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
 import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
+import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.DoFnRunnerBase;
+import com.google.cloud.dataflow.sdk.util.DoFnRunners;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
 import com.google.cloud.dataflow.sdk.util.ReshuffleTriggerTest;
@@ -121,8 +125,8 @@ public StepContext createStepContext(
 
   @Test public void testEmpty() throws Exception {
     TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<KeyedWorkItem<String>, KV<String, Iterable<String>>> runner =
+    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
+    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner =
         makeRunner(
             outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
@@ -163,18 +167,18 @@ private <V> void addElement(
         .setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(timestamp));
   }
 
-  private <T> WindowedValue<KeyedWorkItem<T>> createValue(
+  private <T> WindowedValue<KeyedWorkItem<String, T>> createValue(
       WorkItem.Builder workItem, Coder<T> valueCoder) {
     @SuppressWarnings({"unchecked", "rawtypes"})
     Coder<Collection<? extends BoundedWindow>> wildcardWindowsCoder = (Coder) windowsCoder;
-    return WindowedValue.valueInEmptyWindows(KeyedWorkItem.workItem(
+    return WindowedValue.valueInEmptyWindows(KeyedWorkItems.windmillWorkItem(
         KEY, workItem.build(), windowCoder, wildcardWindowsCoder, valueCoder));
   }
 
   @Test public void testFixedWindows() throws Exception {
     TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<KeyedWorkItem<String>, KV<String, Iterable<String>>> runner =
+    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
+    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner =
         makeRunner(
             outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
@@ -226,14 +230,16 @@ private <T> WindowedValue<KeyedWorkItem<T>> createValue(
 
   @Test public void testSlidingWindows() throws Exception {
     TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<KeyedWorkItem<String>, KV<String, Iterable<String>>> runner =
+    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
+    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner =
         makeRunner(
             outputTag,
             outputManager,
             WindowingStrategy.of(
             SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))));
 
+    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(5));
+
     runner.startBundle();
 
     WorkItem.Builder workItem1 = WorkItem.newBuilder();
@@ -291,10 +297,87 @@ private <T> WindowedValue<KeyedWorkItem<T>> createValue(
     assertThat(item2.getWindows(), Matchers.<BoundedWindow>contains(window(10, 30)));
   }
 
+  @Test public void testSlidingWindowsAndLateData() throws Exception {
+    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
+    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
+    WindowingStrategy<? super String, IntervalWindow> windowingStrategy = WindowingStrategy.of(
+        SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10)));
+    DoFn<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> fn =
+        StreamingGroupAlsoByWindowsDoFn.createForIterable(windowingStrategy, StringUtf8Coder.of());
+    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner =
+        makeRunner(
+            outputTag,
+            outputManager,
+            windowingStrategy,
+            fn);
+
+    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(15));
+
+    runner.startBundle();
+
+    WorkItem.Builder workItem1 = WorkItem.newBuilder();
+    workItem1.setKey(ByteString.copyFromUtf8(KEY));
+    workItem1.setWorkToken(WORK_TOKEN);
+    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
+    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
+
+    Coder<String> valueCoder = StringUtf8Coder.of();
+    addElement(messageBundle,
+        Arrays.asList(window(-10, 10), window(0, 20)), new Instant(5), valueCoder, "v1");
+    addElement(messageBundle,
+        Arrays.asList(window(-10, 10), window(0, 20)), new Instant(2), valueCoder, "v0");
+    addElement(messageBundle,
+        Arrays.asList(window(0, 20), window(10, 30)), new Instant(15), valueCoder, "v2");
+
+    runner.processElement(createValue(workItem1, valueCoder));
+
+    runner.finishBundle();
+    runner.startBundle();
+
+    WorkItem.Builder workItem2 = WorkItem.newBuilder();
+    workItem2.setKey(ByteString.copyFromUtf8(KEY));
+    workItem2.setWorkToken(WORK_TOKEN);
+    addTimer(workItem2, window(-10, 10), new Instant(9), Timer.Type.WATERMARK);
+    addTimer(workItem2, window(0, 20), new Instant(19), Timer.Type.WATERMARK);
+    addTimer(workItem2, window(10, 30), new Instant(29), Timer.Type.WATERMARK);
+
+    runner.processElement(createValue(workItem2, valueCoder));
+
+    runner.finishBundle();
+
+    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
+
+    assertEquals(3, result.size());
+
+    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
+    assertEquals(KEY, item0.getValue().getKey());
+    assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder());
+    assertEquals(new Instant(9), item0.getTimestamp());
+    assertThat(item0.getWindows(), Matchers.<BoundedWindow>contains(window(-10, 10)));
+
+    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
+    assertEquals(KEY, item1.getValue().getKey());
+    assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1", "v2"));
+    // For this sliding window, the minimum output timestmap was 10, since we didn't want to overlap
+    // with the previous window that was [-10, 10).
+    assertEquals(new Instant(10), item1.getTimestamp());
+    assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(0, 20)));
+
+    WindowedValue<KV<String, Iterable<String>>> item2 = result.get(2);
+    assertEquals(KEY, item2.getValue().getKey());
+    assertThat(item2.getValue().getValue(), Matchers.containsInAnyOrder("v2"));
+    assertEquals(new Instant(20), item2.getTimestamp());
+    assertThat(item2.getWindows(), Matchers.<BoundedWindow>contains(window(10, 30)));
+
+    assertEquals(
+        counters.getExistingCounter("user-merge-DroppedDueToLateness").getAggregate(),
+        new Long(2));
+  }
+
   @Test public void testSessions() throws Exception {
     TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<KeyedWorkItem<String>, KV<String, Iterable<String>>> runner = makeRunner(
+    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
+    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(
         outputTag,
         outputManager,
         WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))));
@@ -386,8 +469,8 @@ public Long extractOutput(Long accumulator) {
     AppliedCombineFn<String, Long, ?, Long> appliedCombineFn = AppliedCombineFn.withInputCoder(
         combineFn.asKeyedFn(), registry, KvCoder.of(StringUtf8Coder.of(), BigEndianLongCoder.of()));
 
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<KeyedWorkItem<Long>, KV<String, Long>> runner = makeRunner(
+    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
+    DoFnRunner<KeyedWorkItem<String, Long>, KV<String, Long>> runner = makeRunner(
         outputTag,
         outputManager,
         WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))),
@@ -441,46 +524,47 @@ public Long extractOutput(Long accumulator) {
     assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(15, 25)));
   }
 
-  private DoFnRunner<KeyedWorkItem<String>, KV<String, Iterable<String>>> makeRunner(
+  private DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> makeRunner(
           TupleTag<KV<String, Iterable<String>>> outputTag,
-          DoFnRunner.OutputManager outputManager,
-          WindowingStrategy<? super String, IntervalWindow> windowingStrategy) {
+          DoFnRunners.OutputManager outputManager,
+          WindowingStrategy<? super String, IntervalWindow> windowingStrategy) throws Exception {
 
-    DoFn<KeyedWorkItem<String>, KV<String, Iterable<String>>> fn =
+    DoFn<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> fn =
         StreamingGroupAlsoByWindowsDoFn.createForIterable(windowingStrategy, StringUtf8Coder.of());
 
     return makeRunner(outputTag, outputManager, windowingStrategy, fn);
   }
 
-  private DoFnRunner<KeyedWorkItem<Long>, KV<String, Long>> makeRunner(
+  private DoFnRunner<KeyedWorkItem<String, Long>, KV<String, Long>> makeRunner(
           TupleTag<KV<String, Long>> outputTag,
-          DoFnRunner.OutputManager outputManager,
+          DoFnRunners.OutputManager outputManager,
           WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
-          AppliedCombineFn<String, Long, ?, Long> combineFn) {
+          AppliedCombineFn<String, Long, ?, Long> combineFn) throws Exception {
 
-    DoFn<KeyedWorkItem<Long>, KV<String, Long>> fn =
+    DoFn<KeyedWorkItem<String, Long>, KV<String, Long>> fn =
         StreamingGroupAlsoByWindowsDoFn.create(windowingStrategy, combineFn, StringUtf8Coder.of());
 
     return makeRunner(outputTag, outputManager, windowingStrategy, fn);
   }
 
   private <InputT, OutputT>
-      DoFnRunner<KeyedWorkItem<InputT>, KV<String, OutputT>> makeRunner(
+      DoFnRunner<KeyedWorkItem<String, InputT>, KV<String, OutputT>> makeRunner(
           TupleTag<KV<String, OutputT>> outputTag,
-          DoFnRunner.OutputManager outputManager,
+          DoFnRunners.OutputManager outputManager,
           WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
-          DoFn<KeyedWorkItem<InputT>, KV<String, OutputT>> fn) {
-    return
-        DoFnRunner.create(
-            PipelineOptionsFactory.create(),
-            fn,
-            NullSideInputReader.empty(),
-            outputManager,
-            outputTag,
-            new ArrayList<TupleTag<?>>(),
-            execContext.getOrCreateStepContext("merge", "merge", null),
-            counters.getAddCounterMutator(),
-            windowingStrategy);
+          DoFn<KeyedWorkItem<String, InputT>, KV<String, OutputT>> fn) throws Exception {
+    DoFnInfo<KeyedWorkItem<String, InputT>, KV<String, OutputT>> doFnInfo =
+        new DoFnInfo<>(fn, windowingStrategy);
+
+    return DoFnRunners.lateDataDroppingRunner(
+        PipelineOptionsFactory.create(),
+        doFnInfo,
+        NullSideInputReader.empty(),
+        outputManager,
+        outputTag,
+        new ArrayList<TupleTag<?>>(),
+        execContext.getOrCreateStepContext("merge", "merge", null),
+        counters.getAddCounterMutator());
   }
 
   private IntervalWindow window(long start, long end) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java
index 6bb9e5b83d49b..0c7cfce3cf50d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java
@@ -33,6 +33,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.DoFnRunnerBase;
+import com.google.cloud.dataflow.sdk.util.DoFnRunners;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
@@ -89,8 +91,8 @@ public StepContext createStepContext(
 
   @Test public void testEmpty() throws Exception {
     TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<KeyedWorkItem<String>, KV<String, Iterable<String>>> runner =
+    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
+    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner =
         makeRunner(
             outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
@@ -118,18 +120,18 @@ private <V> void addElement(
         .setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(timestamp));
   }
 
-  private <T> WindowedValue<KeyedWorkItem<T>> createValue(
+  private <T> WindowedValue<KeyedWorkItem<String, T>> createValue(
       WorkItem.Builder workItem, Coder<T> valueCoder) {
     @SuppressWarnings({"unchecked", "rawtypes"})
     Coder<Collection<? extends BoundedWindow>> wildcardWindowsCoder = (Coder) windowsCoder;
-    return WindowedValue.valueInEmptyWindows(KeyedWorkItem.workItem(
+    return WindowedValue.valueInEmptyWindows(KeyedWorkItems.windmillWorkItem(
         KEY, workItem.build(), windowCoder, wildcardWindowsCoder, valueCoder));
   }
 
   @Test public void testFixedWindows() throws Exception {
     TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
-    DoFnRunner<KeyedWorkItem<String>, KV<String, Iterable<String>>> runner =
+    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
+    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner =
         makeRunner(
             outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
 
@@ -180,25 +182,25 @@ private <T> WindowedValue<KeyedWorkItem<T>> createValue(
     assertThat(item3.getWindows(), Matchers.<BoundedWindow>contains(window(10, 20)));
   }
 
-  private DoFnRunner<KeyedWorkItem<String>, KV<String, Iterable<String>>> makeRunner(
+  private DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> makeRunner(
           TupleTag<KV<String, Iterable<String>>> outputTag,
-          DoFnRunner.OutputManager outputManager,
+          DoFnRunners.OutputManager outputManager,
           WindowingStrategy<? super String, IntervalWindow> windowingStrategy) {
 
-    DoFn<KeyedWorkItem<String>, KV<String, Iterable<String>>> fn =
+    DoFn<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> fn =
         new StreamingGroupAlsoByWindowsReshuffleDoFn<>();
 
     return makeRunner(outputTag, outputManager, windowingStrategy, fn);
   }
 
   private <InputT, OutputT>
-      DoFnRunner<KeyedWorkItem<InputT>, KV<String, OutputT>> makeRunner(
+      DoFnRunner<KeyedWorkItem<String, InputT>, KV<String, OutputT>> makeRunner(
           TupleTag<KV<String, OutputT>> outputTag,
-          DoFnRunner.OutputManager outputManager,
+          DoFnRunners.OutputManager outputManager,
           WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
-          DoFn<KeyedWorkItem<InputT>, KV<String, OutputT>> fn) {
+          DoFn<KeyedWorkItem<String, InputT>, KV<String, OutputT>> fn) {
     return
-        DoFnRunner.create(
+        DoFnRunners.simpleRunner(
             PipelineOptionsFactory.create(),
             fn,
             NullSideInputReader.empty(),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java
index 2f470140a8b76..90e5bd2693c33 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java
@@ -40,7 +40,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.DoFnRunnerBase;
+import com.google.cloud.dataflow.sdk.util.DoFnRunners;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
@@ -103,7 +104,7 @@ public void testSideInputReady() throws Exception {
     when(mockSideInputReader.contains(eq(view))).thenReturn(true);
     when(mockSideInputReader.get(eq(view), any(BoundedWindow.class))).thenReturn("data");
 
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
+    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
     StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner =
         createRunner(outputManager, Arrays.asList(view));
 
@@ -124,7 +125,7 @@ public void testSideInputNotReady() throws Exception {
              eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN)))
         .thenReturn(false);
 
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
+    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
     StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner =
         createRunner(outputManager, Arrays.asList(view));
 
@@ -178,7 +179,7 @@ public void testSideInputNotification() throws Exception {
             StreamingSideInputDoFnRunner.blockedMapAddr(WINDOW_FN));
     blockedMapState.set(blockedMap);
 
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
+    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
     StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner =
         createRunner(outputManager, Arrays.asList(view));
     runner.watermarkHold(createWindow(0)).add(new Instant(0));
@@ -239,7 +240,7 @@ public void testMultipleSideInputs() throws Exception {
     when(mockSideInputReader.get(eq(view1), any(BoundedWindow.class))).thenReturn("data1");
     when(mockSideInputReader.get(eq(view2), any(BoundedWindow.class))).thenReturn("data2");
 
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
+    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
     StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner =
         createRunner(outputManager, Arrays.asList(view1, view2));
     runner.watermarkHold(createWindow(0)).add(new Instant(0));
@@ -257,8 +258,9 @@ public void testMultipleSideInputs() throws Exception {
     assertThat(runner.elementBag(createWindow(0)).get().read(), Matchers.emptyIterable());
   }
 
+  @SuppressWarnings("unchecked")
   private <ReceiverT> StreamingSideInputDoFnRunner<String, String, IntervalWindow>
-      createRunner(DoFnRunner.OutputManager outputManager, List<PCollectionView<String>> views)
+      createRunner(DoFnRunners.OutputManager outputManager, List<PCollectionView<String>> views)
           throws Exception {
     @SuppressWarnings({"unchecked", "rawtypes"})
     Iterable<PCollectionView<?>> typedViews = (Iterable) views;
@@ -267,15 +269,16 @@ public void testMultipleSideInputs() throws Exception {
         new SideInputFn(views), WindowingStrategy.of(WINDOW_FN),
         typedViews, StringUtf8Coder.of());
 
-    return new StreamingSideInputDoFnRunner<String, String, IntervalWindow>(
-        PipelineOptionsFactory.create(),
-        doFnInfo,
-        mockSideInputReader,
-        outputManager,
-        mainOutputTag,
-        Arrays.<TupleTag<?>>asList(),
-        stepContext,
-        null);
+    return (StreamingSideInputDoFnRunner<String, String, IntervalWindow>)
+        DoFnRunners.streamingSideInputRunner(
+            PipelineOptionsFactory.create(),
+            doFnInfo,
+            mockSideInputReader,
+            outputManager,
+            mainOutputTag,
+            Arrays.<TupleTag<?>>asList(),
+            stepContext,
+            null);
   }
 
   private static class SideInputFn extends DoFn<String, String> {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsProperties.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsProperties.java
index 34edb2489e388..ce06299d61cdf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsProperties.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsProperties.java
@@ -660,7 +660,7 @@ List<WindowedValue<KV<K, OutputT>>> runGABW(
       Collection<WindowedValue<InputT>> values) {
 
     TupleTag<KV<K, OutputT>> outputTag = new TupleTag<>();
-    DoFnRunner.ListOutputManager outputManager = new DoFnRunner.ListOutputManager();
+    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
 
     DoFnRunner<KV<K, Iterable<WindowedValue<InputT>>>, KV<K, OutputT>> runner =
         makeRunner(
@@ -694,12 +694,12 @@ List<WindowedValue<KV<K, OutputT>>> runGABW(
           GroupAlsoByWindowsDoFn<K, InputT, OutputT, W> fn,
           WindowingStrategy<?, W> windowingStrategy,
           TupleTag<KV<K, OutputT>> outputTag,
-          DoFnRunner.OutputManager outputManager) {
+          DoFnRunners.OutputManager outputManager) {
 
     ExecutionContext executionContext = DirectModeExecutionContext.create();
     CounterSet counters = new CounterSet();
 
-    return DoFnRunner.create(
+    return DoFnRunners.simpleRunner(
         PipelineOptionsFactory.create(),
         fn,
         NullSideInputReader.empty(),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunnerTest.java
index c68d67670cb17..ced9de5413a66 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunnerTest.java
@@ -136,7 +136,6 @@ public void testOnElementBufferingDiscarding() throws Exception {
     injectElement(tester, 4);
 
     assertEquals(1, tester.getElementsDroppedDueToClosedWindow());
-    assertEquals(0, tester.getElementsDroppedDueToLateness());
   }
 
   @Test
@@ -255,7 +254,6 @@ public void testWatermarkHoldAndLateData() throws Exception {
     // Holding for the end-of-window transition.
     assertEquals(new Instant(9), tester.getWatermarkHold());
     // Nothing dropped.
-    assertEquals(0, tester.getElementsDroppedDueToLateness());
     assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
 
     // Input watermark -> 4, output watermark should advance that far as well
@@ -314,7 +312,6 @@ public void testWatermarkHoldAndLateData() throws Exception {
     // Because we're about to expire the window, we output it.
     when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(false);
     injectElement(tester, 8);
-    assertEquals(0, tester.getElementsDroppedDueToLateness());
     assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
 
     // Exceed the GC limit, triggering the last pane to be fired
@@ -334,17 +331,8 @@ public void testWatermarkHoldAndLateData() throws Exception {
     assertThat(
         output.get(0).getPane(),
         equalTo(PaneInfo.createPane(false, true, Timing.LATE, 3, 1)));
-
-    // All very late -- gets dropped, because the window is expired
-    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
     assertEquals(new Instant(50), tester.getOutputWatermark());
     assertEquals(null, tester.getWatermarkHold());
-    injectElement(tester, 2);
-    assertEquals(null, tester.getWatermarkHold());
-    assertThat(tester.extractOutput(), emptyIterable());
-
-    assertEquals(1, tester.getElementsDroppedDueToLateness());
-    assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
 
     // Late timers are ignored
     tester.fireTimer(new IntervalWindow(new Instant(0), new Instant(10)), new Instant(12),
@@ -580,44 +568,6 @@ public void testMergeBeforeFinalizing() throws Exception {
         equalTo(PaneInfo.createPane(true, true, Timing.ON_TIME, 0, 0)));
   }
 
-  /**
-   * Tests that when data is assigned to multiple windows but some of those windows have expired,
-   * then the data is dropped and counted accurately.
-   */
-  @Test
-  public void testDropDataMultipleWindowsExpiredWindow() throws Exception {
-    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining(
-        WindowingStrategy.of(
-            SlidingWindows.of(Duration.millis(100)).every(Duration.millis(30)))
-        .withAllowedLateness(Duration.millis(10)));
-
-    tester.injectElements(
-        // assigned to [-60, 40), [-30, 70), [0, 100)
-        TimestampedValue.of(10, new Instant(23)),
-        // assigned to [-30, 70), [0, 100), [30, 130)
-        TimestampedValue.of(12, new Instant(40)));
-
-    assertEquals(0, tester.getElementsDroppedDueToLateness());
-
-    tester.advanceInputWatermark(new Instant(70));
-
-
-    tester.injectElements(
-        // assigned to [-30, 70), [0, 100), [30, 130)
-        // but [-30, 70) has past but is not is expired
-        TimestampedValue.of(14, new Instant(50)));
-
-    assertEquals(0, tester.getElementsDroppedDueToLateness());
-
-    tester.advanceInputWatermark(new Instant(110));
-
-    // assigned to [-30, 70), [0, 100), [30, 130)
-    // but the first two are expired
-    tester.injectElements(TimestampedValue.of(16, new Instant(40)));
-
-    assertEquals(2, tester.getElementsDroppedDueToLateness());
-  }
-
   /**
    * Tests that when data is assigned to multiple windows but some of those windows have
    * had their triggers finish, then the data is dropped and counted accurately.
@@ -697,7 +647,6 @@ public void testIdempotentEmptyPanesDiscarding() throws Exception {
     tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
 
     assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
-    assertEquals(0, tester.getElementsDroppedDueToLateness());
   }
 
   @Test
@@ -744,7 +693,6 @@ public void testIdempotentEmptyPanesAccumulating() throws Exception {
     tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
 
     assertEquals(0, tester.getElementsDroppedDueToClosedWindow());
-    assertEquals(0, tester.getElementsDroppedDueToLateness());
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
index bb889d8283853..c2220051d001b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
@@ -99,9 +99,7 @@ public class ReduceFnTester<InputT, OutputT, W extends BoundedWindow> {
   private ExecutableTrigger<W> executableTrigger;
 
   private final InMemoryLongSumAggregator droppedDueToClosedWindow =
-      new InMemoryLongSumAggregator(ReduceFnRunner.DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER);
-  private final InMemoryLongSumAggregator droppedDueToLateness =
-      new InMemoryLongSumAggregator(ReduceFnRunner.DROPPED_DUE_TO_LATENESS_COUNTER);
+      new InMemoryLongSumAggregator(GroupAlsoByWindowsDoFn.DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER);
 
   public static <W extends BoundedWindow> ReduceFnTester<Integer, Iterable<Integer>, W>
       nonCombining(WindowingStrategy<?, W> windowingStrategy) throws Exception {
@@ -167,7 +165,7 @@ private ReduceFnTester(WindowingStrategy<?, W> wildcardStrategy,
 
   ReduceFnRunner<String, InputT, OutputT, W> createRunner() {
     return new ReduceFnRunner<>(KEY, objectStrategy, timerInternals, windowingInternals,
-        droppedDueToClosedWindow, droppedDueToLateness, reduceFn);
+        droppedDueToClosedWindow, reduceFn);
   }
 
   public ExecutableTrigger<W> getTrigger() {
@@ -264,10 +262,6 @@ public long getElementsDroppedDueToClosedWindow() {
     return droppedDueToClosedWindow.getSum();
   }
 
-  public long getElementsDroppedDueToLateness() {
-    return droppedDueToLateness.getSum();
-  }
-
   /**
    * How many panes do we have in the output?
    */

From e468caddd803b0dd5aaa10a0dfa92f15bad4962e Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 25 Jan 2016 16:07:59 -0800
Subject: [PATCH 1345/1541] DatastoreIO: do not split when QuerySplitter fails

As opposed to throwing an exception. Fixes #101.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=112999725
---
 .../cloud/dataflow/sdk/io/DatastoreIO.java    | 10 +++++-
 .../dataflow/sdk/io/DatastoreIOTest.java      | 35 +++++++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
index 9b784d0bcdec9..f618bc9d63bcb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
@@ -326,8 +326,16 @@ public List<Source> splitIntoBundles(long desiredBundleSizeBytes, PipelineOption
         return ImmutableList.of(this);
       }
 
+      List<Query> datastoreSplits;
+      try {
+        datastoreSplits = getSplitQueries(Ints.checkedCast(numSplits), options);
+      } catch (IllegalArgumentException | DatastoreException e) {
+        LOG.warn("Unable to parallelize the given query: {}", query, e);
+        return ImmutableList.of(this);
+      }
+
       ImmutableList.Builder<Source> splits = ImmutableList.builder();
-      for (Query splitQuery : getSplitQueries(Ints.checkedCast(numSplits), options)) {
+      for (Query splitQuery : datastoreSplits) {
         splits.add(new Source(host, datasetId, splitQuery, namespace));
       }
       return splits.build();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
index 96e29b0ae6b27..4cc3ace1b5789 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/DatastoreIOTest.java
@@ -29,6 +29,7 @@
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.never;
 import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.verifyNoMoreInteractions;
 import static org.mockito.Mockito.when;
@@ -52,6 +53,7 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
 import com.google.common.collect.Lists;
 
@@ -97,6 +99,8 @@ public class DatastoreIOTest {
   @Rule
   public final ExpectedException thrown = ExpectedException.none();
 
+  @Rule public final ExpectedLogs logged = ExpectedLogs.none(DatastoreIO.Source.class);
+
   @Before
   public void setUp() {
     MockitoAnnotations.initMocks(this);
@@ -330,6 +334,37 @@ public void testQueryDoesNotSplitWithLimitSet() throws Exception {
     verifyNoMoreInteractions(splitter);
   }
 
+  /**
+   * Tests that when {@link QuerySplitter} cannot split a query, {@link DatastoreIO} falls back to
+   * a single split.
+   */
+  @Test
+  public void testQuerySplitterThrows() throws Exception {
+    // Mock query splitter that throws IllegalArgumentException
+    IllegalArgumentException exception =
+        new IllegalArgumentException("query not supported by splitter");
+    QuerySplitter splitter = mock(QuerySplitter.class);
+    when(
+            splitter.getSplits(
+                any(Query.class), any(PartitionId.class), any(Integer.class), any(Datastore.class)))
+        .thenThrow(exception);
+
+    Query query = Query.newBuilder().addKind(KindExpression.newBuilder().setName("myKind")).build();
+    List<DatastoreIO.Source> bundles =
+        initialSource
+            .withQuery(query)
+            .withMockSplitter(splitter)
+            .withMockEstimateSizeBytes(10240L)
+            .splitIntoBundles(1024, testPipelineOptions(null));
+
+    assertEquals(1, bundles.size());
+    assertEquals(query, bundles.get(0).getQuery());
+    verify(splitter, times(1))
+        .getSplits(
+            any(Query.class), any(PartitionId.class), any(Integer.class), any(Datastore.class));
+    logged.verifyWarn("Unable to parallelize the given query", exception);
+  }
+
   @Test
   public void testQuerySplitSizeUnavailable() throws Exception {
     KindExpression mykind = KindExpression.newBuilder().setName("mykind").build();

From 50f98a77634f10e6b2eb5468363133e3a0a71eff Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 25 Jan 2016 17:59:43 -0800
Subject: [PATCH 1346/1541] BigQueryTableInserter: retry rateLimitExceeded API
 calls

When using BigQueryIO with a custom table per window, we may
temporarily exceed BigQuery's quota. In these cases, retry for
a short period of time before failing.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113010175
---
 .../sdk/util/BigQueryTableInserter.java       |  67 ++++-
 .../sdk/util/BigQueryTableInserterTest.java   | 239 ++++++++++++++++++
 2 files changed, 294 insertions(+), 12 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserterTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
index 0ed22947c2943..bcd972034aae4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
@@ -16,6 +16,10 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.api.client.util.BackOff;
+import com.google.api.client.util.BackOffUtils;
+import com.google.api.client.util.ExponentialBackOff;
+import com.google.api.client.util.Sleeper;
 import com.google.api.services.bigquery.Bigquery;
 import com.google.api.services.bigquery.model.Table;
 import com.google.api.services.bigquery.model.TableDataInsertAllRequest;
@@ -28,6 +32,7 @@
 import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.WriteDisposition;
 import com.google.cloud.hadoop.util.ApiErrorExtractor;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Throwables;
 import com.google.common.util.concurrent.MoreExecutors;
@@ -350,6 +355,13 @@ public boolean isEmpty(TableReference ref) throws IOException {
     return dataList.getRows() == null || dataList.getRows().isEmpty();
   }
 
+  /**
+   * Retry table creation up to 5 minutes (with exponential backoff) when this user is near the
+   * quota for table creation. This relatively innocuous behavior can happen when BigQueryIO is
+   * configured with a table spec function to use different tables for each window.
+   */
+  private static final int RETRY_CREATE_TABLE_DURATION_MILLIS = (int) TimeUnit.MINUTES.toMillis(5);
+
   /**
    * Tries to create the BigQuery table.
    * If a table with the same name already exists in the dataset, the table
@@ -365,21 +377,52 @@ public boolean isEmpty(TableReference ref) throws IOException {
   @Nullable
   public Table tryCreateTable(TableReference ref, TableSchema schema) throws IOException {
     LOG.info("Trying to create BigQuery table: {}", BigQueryIO.toTableSpec(ref));
+    BackOff backoff =
+        new ExponentialBackOff.Builder()
+            .setMaxElapsedTimeMillis(RETRY_CREATE_TABLE_DURATION_MILLIS)
+            .build();
 
-    Table content = new Table();
-    content.setTableReference(ref);
-    content.setSchema(schema);
+    Table table = new Table().setTableReference(ref).setSchema(schema);
+    return tryCreateTable(table, ref.getProjectId(), ref.getDatasetId(), backoff, Sleeper.DEFAULT);
+  }
 
-    try {
-      return client.tables()
-          .insert(ref.getProjectId(), ref.getDatasetId(), content)
-          .execute();
-    } catch (IOException e) {
-      if (new ApiErrorExtractor().itemAlreadyExists(e)) {
-        LOG.info("The BigQuery table already exists.");
-        return null;
+  @VisibleForTesting
+  @Nullable
+  Table tryCreateTable(
+      Table table, String projectId, String datasetId, BackOff backoff, Sleeper sleeper)
+          throws IOException {
+    boolean retry = false;
+    while (true) {
+      try {
+        return client.tables().insert(projectId, datasetId, table).execute();
+      } catch (IOException e) {
+        ApiErrorExtractor extractor = new ApiErrorExtractor();
+        if (extractor.itemAlreadyExists(e)) {
+          // The table already exists, nothing to return.
+          return null;
+        } else if (extractor.rateLimited(e)) {
+          // The request failed because we hit a temporary quota. Back off and try again.
+          try {
+            if (BackOffUtils.next(sleeper, backoff)) {
+              if (!retry) {
+                LOG.info(
+                    "Quota limit reached when creating table {}:{}.{}, retrying up to {} minutes",
+                    projectId,
+                    datasetId,
+                    table.getTableReference().getTableId(),
+                    TimeUnit.MILLISECONDS.toSeconds(RETRY_CREATE_TABLE_DURATION_MILLIS) / 60.0);
+                retry = true;
+              }
+              continue;
+            }
+          } catch (InterruptedException e1) {
+            // Restore interrupted state and throw the last failure.
+            Thread.currentThread().interrupt();
+            throw e;
+          }
+        }
+        throw e;
       }
-      throw e;
     }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserterTest.java
new file mode 100644
index 0000000000000..d53315ba66885
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserterTest.java
@@ -0,0 +1,239 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.common.base.Verify.verifyNotNull;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.fail;
+import static org.mockito.Mockito.atLeastOnce;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+import com.google.api.client.googleapis.json.GoogleJsonError;
+import com.google.api.client.googleapis.json.GoogleJsonError.ErrorInfo;
+import com.google.api.client.googleapis.json.GoogleJsonErrorContainer;
+import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.api.client.http.LowLevelHttpResponse;
+import com.google.api.client.json.GenericJson;
+import com.google.api.client.json.Json;
+import com.google.api.client.json.jackson2.JacksonFactory;
+import com.google.api.client.testing.http.MockHttpTransport;
+import com.google.api.client.testing.http.MockLowLevelHttpRequest;
+import com.google.api.client.util.BackOff;
+import com.google.api.client.util.Sleeper;
+import com.google.api.services.bigquery.Bigquery;
+import com.google.api.services.bigquery.model.Table;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
+import com.google.cloud.hadoop.util.RetryBoundedBackOff;
+import com.google.common.collect.ImmutableList;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Tests of {@link BigQueryTableInserter}.
+ */
+@RunWith(JUnit4.class)
+public class BigQueryTableInserterTest {
+  @Rule public ExpectedException thrown = ExpectedException.none();
+  @Rule public ExpectedLogs expectedLogs = ExpectedLogs.none(BigQueryTableInserter.class);
+  @Mock private LowLevelHttpResponse response;
+  private Bigquery bigquery;
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+
+    // A mock transport that lets us mock the API responses.
+    MockHttpTransport transport =
+        new MockHttpTransport.Builder()
+            .setLowLevelHttpRequest(
+                new MockLowLevelHttpRequest() {
+                  @Override
+                  public LowLevelHttpResponse execute() throws IOException {
+                    return response;
+                  }
+                })
+            .build();
+
+    // A sample BigQuery API client that uses default JsonFactory and RetryHttpInitializer.
+    bigquery =
+        new Bigquery.Builder(
+                transport, Transport.getJsonFactory(), new RetryHttpRequestInitializer())
+            .build();
+  }
+
+  @After
+  public void tearDown() throws IOException {
+    // These three interactions happen for every request in the normal response parsing.
+    verify(response, atLeastOnce()).getContentEncoding();
+    verify(response, atLeastOnce()).getHeaderCount();
+    verify(response, atLeastOnce()).getReasonPhrase();
+    verifyNoMoreInteractions(response);
+  }
+
+  /** A helper to wrap a {@link GenericJson} object in a content stream. */
+  private static InputStream toStream(GenericJson content) throws IOException {
+    return new ByteArrayInputStream(JacksonFactory.getDefaultInstance().toByteArray(content));
+  }
+
+  /** A helper that generates the error JSON payload that Google APIs produce. */
+  private static GoogleJsonErrorContainer errorWithReasonAndStatus(String reason, int status) {
+    ErrorInfo info = new ErrorInfo();
+    info.setReason(reason);
+    info.setDomain("global");
+    // GoogleJsonError contains one or more ErrorInfo objects; our utiities read the first one.
+    GoogleJsonError error = new GoogleJsonError();
+    error.setErrors(ImmutableList.of(info));
+    error.setCode(status);
+    // The actual JSON response is an error container.
+    GoogleJsonErrorContainer container = new GoogleJsonErrorContainer();
+    container.setError(error);
+    return container;
+  }
+
+  /**
+   * Tests that {@link BigQueryTableInserter} succeeds on the first try.
+   */
+  @Test
+  public void testCreateTableSucceeds() throws IOException {
+    Table testTable = new Table().setDescription("a table");
+
+    when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
+    when(response.getStatusCode()).thenReturn(200);
+    when(response.getContent()).thenReturn(toStream(testTable));
+
+    BigQueryTableInserter inserter = new BigQueryTableInserter(bigquery);
+    Table ret =
+        inserter.tryCreateTable(
+            new Table(),
+            "project",
+            "dataset",
+            new RetryBoundedBackOff(0, BackOff.ZERO_BACKOFF),
+            Sleeper.DEFAULT);
+    assertEquals(testTable, ret);
+    verify(response, times(1)).getStatusCode();
+    verify(response, times(1)).getContent();
+    verify(response, times(1)).getContentType();
+  }
+
+  /**
+   * Tests that {@link BigQueryTableInserter} succeeds when the table already exists.
+   */
+  @Test
+  public void testCreateTableSucceedsAlreadyExists() throws IOException {
+    when(response.getStatusCode()).thenReturn(409); // 409 means already exists
+
+    BigQueryTableInserter inserter = new BigQueryTableInserter(bigquery);
+    Table ret =
+        inserter.tryCreateTable(
+            new Table(),
+            "project",
+            "dataset",
+            new RetryBoundedBackOff(0, BackOff.ZERO_BACKOFF),
+            Sleeper.DEFAULT);
+
+    assertNull(ret);
+    verify(response, times(1)).getStatusCode();
+    verify(response, times(1)).getContent();
+    verify(response, times(1)).getContentType();
+  }
+
+  /**
+   * Tests that {@link BigQueryTableInserter} retries quota rate limited attempts.
+   */
+  @Test
+  public void testCreateTableRetry() throws IOException {
+    TableReference ref =
+        new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
+    Table testTable = new Table().setTableReference(ref);
+
+    // First response is 403 rate limited, second response has valid payload.
+    when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
+    when(response.getStatusCode()).thenReturn(403).thenReturn(200);
+    when(response.getContent())
+        .thenReturn(toStream(errorWithReasonAndStatus("rateLimitExceeded", 403)))
+        .thenReturn(toStream(testTable));
+
+    BigQueryTableInserter inserter = new BigQueryTableInserter(bigquery);
+    Table ret =
+        inserter.tryCreateTable(
+            testTable,
+            "project",
+            "dataset",
+            new RetryBoundedBackOff(3, BackOff.ZERO_BACKOFF),
+            Sleeper.DEFAULT);
+    assertEquals(testTable, ret);
+    verify(response, times(2)).getStatusCode();
+    verify(response, times(2)).getContent();
+    verify(response, times(2)).getContentType();
+    verifyNotNull(ret.getTableReference());
+    expectedLogs.verifyInfo(
+        "Quota limit reached when creating table project:dataset.table, "
+            + "retrying up to 5.0 minutes");
+  }
+
+  /**
+   * Tests that {@link BigQueryTableInserter} does not retry non-rate-limited attempts.
+   */
+  @Test
+  public void testCreateTableDoesNotRetry() throws IOException {
+    Table testTable = new Table().setDescription("a table");
+
+    // First response is 403 not-rate-limited, second response has valid payload but should not
+    // be invoked.
+    when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
+    when(response.getStatusCode()).thenReturn(403).thenReturn(200);
+    when(response.getContent())
+        .thenReturn(toStream(errorWithReasonAndStatus("actually forbidden", 403)))
+        .thenReturn(toStream(testTable));
+
+    thrown.expect(GoogleJsonResponseException.class);
+    thrown.expectMessage("actually forbidden");
+
+    BigQueryTableInserter inserter = new BigQueryTableInserter(bigquery);
+    try {
+      inserter.tryCreateTable(
+          new Table(),
+          "project",
+          "dataset",
+          new RetryBoundedBackOff(3, BackOff.ZERO_BACKOFF),
+          Sleeper.DEFAULT);
+      fail();
+    } catch (IOException e) {
+      verify(response, times(1)).getStatusCode();
+      verify(response, times(1)).getContent();
+      verify(response, times(1)).getContentType();
+      throw e;
+    }
+  }
+}

From ab733b4557f82589d661d5093a6e0c6d7905bae4 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 25 Jan 2016 20:49:12 -0800
Subject: [PATCH 1347/1541] AvroCoder: more efficient use of Avro APIs

- Make the Encoder/Decoder factories static -- they are thread-safe
  and immutable.
- Reuse the DirectBinaryEncoder/DirectBinaryDecoder objects across
  invocations to encode/decode. Though reuse is only "when possible",
  it's always applicable in our invocations. This change reduces the
  allocations of these objects from 1/element to 1/coder instance.
- Remove the call to flush from encoder(), because the
  DirectBinaryEncoder does not need to be flushed.
- Cache the objects in ThreadLocal variables for thread safety.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113018546
---
 .../cloud/dataflow/sdk/coders/AvroCoder.java  | 68 +++++++++++++------
 1 file changed, 48 insertions(+), 20 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
index 0afbe98fd2064..91efb43f35ac2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
@@ -171,19 +171,42 @@ public <T> Coder<T> getCoder(TypeDescriptor<T> typeDescriptor) {
 
   private final List<String> nonDeterministicReasons;
 
-  private final DatumWriter<T> writer;
-  private final DatumReader<T> reader;
-  private final EncoderFactory encoderFactory = new EncoderFactory();
-  private final DecoderFactory decoderFactory = new DecoderFactory();
+  // Factories allocated by .get() are thread-safe and immutable.
+  private static final EncoderFactory ENCODER_FACTORY = EncoderFactory.get();
+  private static final DecoderFactory DECODER_FACTORY = DecoderFactory.get();
+  // Cache the old encoder/decoder and let the factories reuse them when possible. To be threadsafe,
+  // these are ThreadLocal. This code does not need to be re-entrant as AvroCoder does not use
+  // an inner coder.
+  private final ThreadLocal<BinaryDecoder> decoder;
+  private final ThreadLocal<BinaryEncoder> encoder;
+  private final ThreadLocal<DatumWriter<T>> writer;
+  private final ThreadLocal<DatumReader<T>> reader;
 
   protected AvroCoder(Class<T> type, Schema schema) {
     this.type = type;
     this.schema = schema;
 
-    nonDeterministicReasons = new AvroDeterminismChecker()
-        .check(TypeDescriptor.of(type), schema);
-    this.reader = createDatumReader();
-    this.writer = createDatumWriter();
+    nonDeterministicReasons = new AvroDeterminismChecker().check(TypeDescriptor.of(type), schema);
+
+    // Decoder and Encoder start off null for each thread. They are allocated and potentially
+    // reused inside encode/decode.
+    this.decoder = new ThreadLocal<>();
+    this.encoder = new ThreadLocal<>();
+
+    // Reader and writer are allocated once per thread and are "final" for thread-local Coder
+    // instance.
+    this.reader = new ThreadLocal<DatumReader<T>>() {
+      @Override
+      public DatumReader<T> initialValue() {
+        return createDatumReader();
+      }
+    };
+    this.writer = new ThreadLocal<DatumWriter<T>>() {
+      @Override
+      public DatumWriter<T> initialValue() {
+        return createDatumWriter();
+      }
+    };
   }
 
   /**
@@ -233,17 +256,22 @@ private Object writeReplace() {
   }
 
   @Override
-  public void encode(T value, OutputStream outStream, Context context)
-      throws IOException {
-    BinaryEncoder encoder = encoderFactory.directBinaryEncoder(outStream, null);
-    writer.write(value, encoder);
-    encoder.flush();
+  public void encode(T value, OutputStream outStream, Context context) throws IOException {
+    // Get a BinaryEncoder instance from the ThreadLocal cache and attempt to reuse it.
+    BinaryEncoder encoderInstance = ENCODER_FACTORY.directBinaryEncoder(outStream, encoder.get());
+    // Save the potentially-new instance for reuse later.
+    encoder.set(encoderInstance);
+    writer.get().write(value, encoderInstance);
+    // Direct binary encoder does not buffer any data and need not be flushed.
   }
 
   @Override
   public T decode(InputStream inStream, Context context) throws IOException {
-    BinaryDecoder decoder = decoderFactory.directBinaryDecoder(inStream, null);
-    return reader.read(null, decoder);
+    // Get a BinaryDecoder instance from the ThreadLocal cache and attempt to reuse it.
+    BinaryDecoder decoderInstance = DECODER_FACTORY.directBinaryDecoder(inStream, decoder.get());
+    // Save the potentially-new instance for later.
+    decoder.set(decoderInstance);
+    return reader.get().read(null, decoderInstance);
   }
 
   @Override
@@ -272,12 +300,12 @@ public void verifyDeterministic() throws NonDeterministicException {
   }
 
   /**
-   * Returns a new DatumReader that can be used to read from
-   * an Avro file directly. Assumes the schema used to read is
-   * the same as the schema that was used when writing.
+   * Returns a new {@link DatumReader} that can be used to read from an Avro file directly. Assumes
+   * the schema used to read is the same as the schema that was used when writing.
    *
    * @deprecated For {@code AvroCoder} internal use only.
    */
+  // TODO: once we can remove this deprecated function, inline in constructor.
   @Deprecated
   public DatumReader<T> createDatumReader() {
     if (type.equals(GenericRecord.class)) {
@@ -288,11 +316,11 @@ public DatumReader<T> createDatumReader() {
   }
 
   /**
-   * Returns a new DatumWriter that can be used to write to
-   * an Avro file directly.
+   * Returns a new {@link DatumWriter} that can be used to write to an Avro file directly.
    *
    * @deprecated For {@code AvroCoder} internal use only.
    */
+  // TODO: once we can remove this deprecated function, inline in constructor.
   @Deprecated
   public DatumWriter<T> createDatumWriter() {
     if (type.equals(GenericRecord.class)) {

From 46a80827cdc86c5958ade908452d084ffbeb5a50 Mon Sep 17 00:00:00 2001
From: amyu <amyu@google.com>
Date: Tue, 26 Jan 2016 07:12:51 -0800
Subject: [PATCH 1348/1541] Modify GameStats window definitions for blog post

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113051755
---
 .../cloud/dataflow/examples/complete/game/GameStats.java    | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java
index d826995ac6386..175af748bb85c 100644
--- a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java
@@ -318,8 +318,7 @@ public void processElement(ProcessContext c) {
             .<KV<String, Integer>>into(
                   Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap())))
         .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow())
-        .withAllowedLateness(Duration.ZERO)
-        .discardingFiredPanes())
+        .withAllowedLateness(Duration.ZERO))
       .apply("UserSessionSum", Sum.<String>integersPerKey())
       // Get the duration per session.
       .apply("UserSessionActivity", ParDo.of(new UserSessionInfoFn()))
@@ -329,8 +328,7 @@ public void processElement(ProcessContext c) {
       .apply(Window.named("WindowToExtractSessionMean")
             .<Integer>into(
                 FixedWindows.of(Duration.standardMinutes(options.getUserActivityWindowDuration())))
-            .withAllowedLateness(Duration.standardMinutes(options.getAllowedLateness()))
-            .accumulatingFiredPanes())
+            .withAllowedLateness(Duration.ZERO))
       // Find the mean session duration in each window.
       .apply(Mean.<Integer>globally().withoutDefaults())
       // Write this info to a BigQuery table.

From 186a7ff20f058e91ef6a6eeeb2daa3df1d8903a8 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 26 Jan 2016 09:37:02 -0800
Subject: [PATCH 1349/1541] Add new property names

This is towards indexed side inputs.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113064801
---
 .../java/com/google/cloud/dataflow/sdk/util/PropertyNames.java | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
index 5a7ff3d8817c1..59fa0e9b5c73f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
@@ -67,6 +67,8 @@ public class PropertyNames {
   public static final String IS_WRAPPER = "is_wrapper";
   public static final String DISALLOW_COMBINER_LIFTING = "disallow_combiner_lifting";
   public static final String NON_PARALLEL_INPUTS = "non_parallel_inputs";
+  public static final String NUM_SHARD_CODERS = "num_shard_coders";
+  public static final String NUM_METADATA_SHARD_CODERS = "num_metadata_shard_coders";
   public static final String NUM_SHARDS = "num_shards";
   public static final String OBJECT_TYPE_NAME = "@type";
   public static final String OUTPUT = "output";
@@ -90,6 +92,7 @@ public class PropertyNames {
   public static final String START_SHUFFLE_POSITION = "start_shuffle_position";
   public static final String STRIP_TRAILING_NEWLINES = "strip_trailing_newlines";
   public static final String TUPLE_TAGS = "tuple_tags";
+  public static final String USE_INDEXED_FORMAT = "use_indexed_format";
   public static final String USER_FN = "user_fn";
   public static final String USER_NAME = "user_name";
   public static final String USES_KEYED_STATE = "uses_keyed_state";

From ede56efb56f53bea548691c1efa15416cfb4cfe0 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 26 Jan 2016 21:03:25 -0800
Subject: [PATCH 1350/1541] Add increment support with positive infinity

Add support for compare from offset and common prefix length
to comparator.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113125854
---
 .../dataflow/sdk/util/RandomAccessData.java   | 102 ++++++++++++++++--
 .../sdk/util/RandomAccessDataTest.java        |  53 ++++++++-
 2 files changed, 144 insertions(+), 11 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RandomAccessData.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RandomAccessData.java
index 4d86866ff2c7a..6c96c8e7033bc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RandomAccessData.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RandomAccessData.java
@@ -16,6 +16,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
 
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
@@ -23,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.common.base.MoreObjects;
 import com.google.common.io.ByteStreams;
+import com.google.common.primitives.UnsignedBytes;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 
@@ -50,7 +52,9 @@ public class RandomAccessData {
   /**
    * A {@link Coder} which encodes the valid parts of this stream.
    * This follows the same encoding scheme as {@link ByteArrayCoder}.
-   * This coder is deterministic and the consistent with equals.
+   * This coder is deterministic and consistent with equals.
+   *
+   * This coder does not support encoding positive infinity.
    */
   public static class RandomAccessDataCoder extends AtomicCoder<RandomAccessData> {
     private static final RandomAccessDataCoder INSTANCE = new RandomAccessDataCoder();
@@ -63,6 +67,9 @@ public static RandomAccessDataCoder of() {
     @Override
     public void encode(RandomAccessData value, OutputStream outStream, Coder.Context context)
         throws CoderException, IOException {
+      if (value == POSITIVE_INFINITY) {
+        throw new CoderException("Positive infinity can not be encoded.");
+      }
       if (!context.isWholeStream) {
         VarInt.encode(value.size, outStream);
       }
@@ -107,18 +114,45 @@ protected long getEncodedElementByteSize(RandomAccessData value, Coder.Context c
     }
   }
 
+  public static final UnsignedLexicographicalComparator UNSIGNED_LEXICOGRAPHICAL_COMPARATOR =
+      new UnsignedLexicographicalComparator();
+
   /**
    * A {@link Comparator} that compares two byte arrays lexicographically. It compares
    * values as a list of unsigned bytes. The first pair of values that follow any common prefix,
    * or when one array is a prefix of the other, treats the shorter array as the lesser.
-   * For example, [] < [0x01] < [0x01, 0x7F] < [0x01, 0x80] < [0x02].
+   * For example, [] < [0x01] < [0x01, 0x7F] < [0x01, 0x80] < [0x02] < POSITIVE INFINITY.
+   *
+   * <p>Note that a token type of positive infinity is supported and is greater than
+   * all other {@link RandomAccessData}.
    */
-  public static final Comparator<RandomAccessData> UNSIGNED_LEXICOGRAPHICAL_COMPARATOR =
-      new Comparator<RandomAccessData>() {
+  public static final class UnsignedLexicographicalComparator
+      implements Comparator<RandomAccessData> {
+    // Do not instantiate
+    private UnsignedLexicographicalComparator() {
+    }
+
     @Override
     public int compare(RandomAccessData o1, RandomAccessData o2) {
+      return compare(o1, o2, 0 /* start from the beginning */);
+    }
+
+    /**
+     * Compare the two sets of bytes starting at the given offset.
+     */
+    public int compare(RandomAccessData o1, RandomAccessData o2, int startOffset) {
+      if (o1 == o2) {
+        return 0;
+      }
+      if (o1 == POSITIVE_INFINITY) {
+        return 1;
+      }
+      if (o2 == POSITIVE_INFINITY) {
+        return -1;
+      }
+
       int minBytesLen = Math.min(o1.size, o2.size);
-      for (int i = 0; i < minBytesLen; i++) {
+      for (int i = startOffset; i < minBytesLen; i++) {
         // unsigned comparison
         int b1 = o1.buffer[i] & 0xFF;
         int b2 = o2.buffer[i] & 0xFF;
@@ -132,7 +166,45 @@ public int compare(RandomAccessData o1, RandomAccessData o2) {
       // If both lengths are equal, then both streams are equal.
       return o1.size - o2.size;
     }
-  };
+
+    /**
+     * Compute the length of the common prefix of the two provided sets of bytes.
+     */
+    public int commonPrefixLength(RandomAccessData o1, RandomAccessData o2) {
+      int minBytesLen = Math.min(o1.size, o2.size);
+      for (int i = 0; i < minBytesLen; i++) {
+        // unsigned comparison
+        int b1 = o1.buffer[i] & 0xFF;
+        int b2 = o2.buffer[i] & 0xFF;
+        if (b1 != b2) {
+          return i;
+        }
+      }
+      return minBytesLen;
+    }
+  }
+
+  /** A token type representing positive infinity. */
+  static final RandomAccessData POSITIVE_INFINITY = new RandomAccessData(0);
+
+  /**
+   * Returns a RandomAccessData that is the smallest value of same length which
+   * is strictly greater than this. Note that if this is empty or is all 0xFF then
+   * a token value of positive infinity is returned.
+   *
+   * The {@link UnsignedLexicographicalComparator} supports comparing {@link RandomAccessData}
+   * with support for positive infinitiy.
+   */
+  public RandomAccessData increment() throws IOException {
+    RandomAccessData copy = copy();
+    for (int i = copy.size - 1; i >= 0; --i) {
+      if (copy.buffer[i] != UnsignedBytes.MAX_VALUE) {
+        copy.buffer[i] = UnsignedBytes.checkedCast(UnsignedBytes.toInt(copy.buffer[i]) + 1);
+        return copy;
+      }
+    }
+    return POSITIVE_INFINITY;
+  }
 
   private static final int DEFAULT_INITIAL_BUFFER_SIZE = 128;
 
@@ -141,10 +213,17 @@ public RandomAccessData() {
     this(DEFAULT_INITIAL_BUFFER_SIZE);
   }
 
+  /** Constructs a RandomAccessData with the initial buffer. */
+  public RandomAccessData(byte[] initialBuffer) {
+    checkNotNull(initialBuffer);
+    this.buffer = initialBuffer;
+    this.size = initialBuffer.length;
+  }
+
   /** Constructs a RandomAccessData with the given buffer size. */
   public RandomAccessData(int initialBufferSize) {
     checkArgument(initialBufferSize >= 0, "Expected initial buffer size to be greater than zero.");
-    buffer = new byte[initialBufferSize];
+    this.buffer = new byte[initialBufferSize];
   }
 
   private byte[] buffer;
@@ -220,6 +299,13 @@ public void readFrom(InputStream inStream, int offset, int length) throws IOExce
     size = offset + length;
   }
 
+  /** Returns a copy of this RandomAccessData. */
+  public RandomAccessData copy() throws IOException {
+    RandomAccessData copy = new RandomAccessData(size);
+    writeTo(copy.asOutputStream(), 0, size);
+    return copy;
+  }
+
   @Override
   public boolean equals(Object other) {
     if (other == this) {
@@ -244,7 +330,7 @@ public int hashCode() {
   @Override
   public String toString() {
     return MoreObjects.toStringHelper(this)
-        .add("buffer", buffer)
+        .add("buffer", Arrays.copyOf(buffer, size))
         .add("size", size)
         .toString();
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RandomAccessDataTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RandomAccessDataTest.java
index a9e522370241c..6d8830506878d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RandomAccessDataTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/RandomAccessDataTest.java
@@ -18,13 +18,18 @@
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.coders.Coder.Context;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
 import com.google.cloud.dataflow.sdk.util.RandomAccessData.RandomAccessDataCoder;
+import com.google.common.primitives.UnsignedBytes;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -40,6 +45,9 @@
 public class RandomAccessDataTest {
   private static final byte[] TEST_DATA_A = new byte[]{ 0x01, 0x02, 0x03 };
   private static final byte[] TEST_DATA_B = new byte[]{ 0x06, 0x05, 0x04, 0x03 };
+  private static final byte[] TEST_DATA_C = new byte[]{ 0x06, 0x05, 0x03, 0x03 };
+
+  @Rule public ExpectedException expectedException = ExpectedException.none();
 
   @Test
   public void testCoder() throws Exception {
@@ -59,15 +67,41 @@ public void testCoder() throws Exception {
     assertEquals(3, RandomAccessDataCoder.of().getEncodedElementByteSize(streamA, Context.OUTER));
   }
 
+  @Test
+  public void testCoderWithPositiveInfinityIsError() throws Exception {
+    expectedException.expect(CoderException.class);
+    expectedException.expectMessage("Positive infinity can not be encoded");
+    RandomAccessDataCoder.of().encode(
+        RandomAccessData.POSITIVE_INFINITY, new ByteArrayOutputStream(), Context.OUTER);
+  }
+
   @Test
   public void testLexicographicalComparator() throws Exception {
     RandomAccessData streamA = new RandomAccessData();
     streamA.asOutputStream().write(TEST_DATA_A);
     RandomAccessData streamB = new RandomAccessData();
     streamB.asOutputStream().write(TEST_DATA_B);
-    assertTrue(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(streamA, streamB) < 0);
-    assertTrue(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(streamB, streamA) > 0);
-    assertTrue(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(streamB, streamB) == 0);
+    RandomAccessData streamC = new RandomAccessData();
+    streamC.asOutputStream().write(TEST_DATA_C);
+    assertTrue(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(
+        streamA, streamB) < 0);
+    assertTrue(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(
+        streamB, streamA) > 0);
+    assertTrue(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(
+        streamB, streamB) == 0);
+    // Check common prefix length.
+    assertEquals(2, RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.commonPrefixLength(
+        streamB, streamC));
+    // Check that we honor the start offset.
+    assertTrue(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(
+        streamB, streamC, 3) == 0);
+    // Test positive infinity comparisons.
+    assertTrue(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(
+        streamA, RandomAccessData.POSITIVE_INFINITY) < 0);
+    assertTrue(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(
+        RandomAccessData.POSITIVE_INFINITY, RandomAccessData.POSITIVE_INFINITY) == 0);
+    assertTrue(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(
+        RandomAccessData.POSITIVE_INFINITY, streamA) > 0);
   }
 
   @Test
@@ -154,5 +188,18 @@ public void testThatRandomAccessDataGrowsWhenReading() throws Exception {
         Arrays.copyOf(stream.array(), TEST_DATA_A.length));
   }
 
+  @Test
+  public void testIncrement() throws Exception {
+    assertEquals(new RandomAccessData(new byte[]{ 0x00, 0x01 }),
+        new RandomAccessData(new byte[]{ 0x00, 0x00 }).increment());
+    assertEquals(new RandomAccessData(new byte[]{ 0x01, UnsignedBytes.MAX_VALUE }),
+        new RandomAccessData(new byte[]{ 0x00, UnsignedBytes.MAX_VALUE }).increment());
+
+    // Test for positive infinity
+    assertSame(RandomAccessData.POSITIVE_INFINITY, new RandomAccessData(new byte[0]).increment());
+    assertSame(RandomAccessData.POSITIVE_INFINITY,
+        new RandomAccessData(new byte[]{ UnsignedBytes.MAX_VALUE }).increment());
+    assertSame(RandomAccessData.POSITIVE_INFINITY, RandomAccessData.POSITIVE_INFINITY.increment());
+  }
 }
 

From ec6a695f71dda84b79773f6dabef383093e62940 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 27 Jan 2016 14:09:24 -0800
Subject: [PATCH 1351/1541] BigQueryTableRowIterator: handle query failures

In the DirectPipelineRunner, when BigQuery fails to
execute a query, make sure that the query failure is
reported and make sure only the right resources are
cleaned up.

Before, the query would silently fail and there would be
an error reading the query results instead. The temporary
dataset would be left around until garbage-collected.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113198888
---
 .../sdk/util/BigQueryTableRowIterator.java    |  25 +-
 .../util/BigQueryTableRowIteratorTest.java    | 254 ++++++++++++++++++
 2 files changed, 276 insertions(+), 3 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIteratorTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
index 0fc21b93a78e9..924552016c547 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
@@ -30,10 +30,12 @@
 import com.google.api.services.bigquery.Bigquery.Jobs.Insert;
 import com.google.api.services.bigquery.model.Dataset;
 import com.google.api.services.bigquery.model.DatasetReference;
+import com.google.api.services.bigquery.model.ErrorProto;
 import com.google.api.services.bigquery.model.Job;
 import com.google.api.services.bigquery.model.JobConfiguration;
 import com.google.api.services.bigquery.model.JobConfigurationQuery;
 import com.google.api.services.bigquery.model.JobReference;
+import com.google.api.services.bigquery.model.JobStatus;
 import com.google.api.services.bigquery.model.Table;
 import com.google.api.services.bigquery.model.TableCell;
 import com.google.api.services.bigquery.model.TableDataList;
@@ -352,6 +354,12 @@ private void deleteDataset(String datasetId) throws IOException, InterruptedExce
         + projectId + ". Manual deletion may be required. Error message : {}");
   }
 
+  /**
+   * Executes the specified query and returns a reference to the temporary BigQuery table created
+   * to hold the results.
+   *
+   * @throws IOException if the query fails.
+   */
   private TableReference executeQueryAndWaitForCompletion()
       throws IOException, InterruptedException {
     // Create a temporary dataset to store results.
@@ -384,8 +392,17 @@ private TableReference executeQueryAndWaitForCompletion()
       Job pollJob = executeWithBackOff(
           client.jobs().get(projectId, jobId.getJobId()),
           "Error when trying to get status of the job for query " + query + " :{}");
-      if (pollJob.getStatus().getState().equals("DONE")) {
-        return pollJob.getConfiguration().getQuery().getDestinationTable();
+      JobStatus status = pollJob.getStatus();
+      if (status.getState().equals("DONE")) {
+        // Job is DONE, but did not necessarily succeed.
+        ErrorProto error = status.getErrorResult();
+        if (error == null) {
+          return pollJob.getConfiguration().getQuery().getDestinationTable();
+        } else {
+          // There will be no temporary table to delete, so null out the reference.
+          temporaryTableId = null;
+          throw new IOException("Executing query " + query + " failed: " + error.getMessage());
+        }
       }
       try {
         Thread.sleep(QUERY_COMPLETION_POLL_TIME.getMillis());
@@ -460,7 +477,9 @@ public void close() throws IOException {
     try {
       // Deleting temporary table and dataset that gets generated when executing a query.
       if (temporaryDatasetId != null) {
-        deleteTable(temporaryDatasetId, temporaryTableId);
+        if (temporaryTableId != null) {
+          deleteTable(temporaryDatasetId, temporaryTableId);
+        }
         deleteDataset(temporaryDatasetId);
       }
     } catch (InterruptedException e) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIteratorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIteratorTest.java
new file mode 100644
index 0000000000000..cab74579a09da
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIteratorTest.java
@@ -0,0 +1,254 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.hamcrest.Matchers.containsString;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+import com.google.api.services.bigquery.Bigquery;
+import com.google.api.services.bigquery.model.Dataset;
+import com.google.api.services.bigquery.model.ErrorProto;
+import com.google.api.services.bigquery.model.Job;
+import com.google.api.services.bigquery.model.JobConfiguration;
+import com.google.api.services.bigquery.model.JobConfigurationQuery;
+import com.google.api.services.bigquery.model.JobReference;
+import com.google.api.services.bigquery.model.JobStatus;
+import com.google.api.services.bigquery.model.Table;
+import com.google.api.services.bigquery.model.TableCell;
+import com.google.api.services.bigquery.model.TableDataList;
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Tests for {@link BigQueryTableRowIterator}.
+ */
+@RunWith(JUnit4.class)
+public class BigQueryTableRowIteratorTest {
+
+  @Rule public ExpectedException thrown = ExpectedException.none();
+
+  @Mock private Bigquery mockClient;
+  @Mock private Bigquery.Datasets mockDatasets;
+  @Mock private Bigquery.Datasets.Delete mockDatasetsDelete;
+  @Mock private Bigquery.Datasets.Insert mockDatasetsInsert;
+  @Mock private Bigquery.Jobs mockJobs;
+  @Mock private Bigquery.Jobs.Get mockJobsGet;
+  @Mock private Bigquery.Jobs.Insert mockJobsInsert;
+  @Mock private Bigquery.Tables mockTables;
+  @Mock private Bigquery.Tables.Get mockTablesGet;
+  @Mock private Bigquery.Tables.Delete mockTablesDelete;
+  @Mock private Bigquery.Tabledata mockTabledata;
+  @Mock private Bigquery.Tabledata.List mockTabledataList;
+
+  @Before
+  public void setUp() throws IOException {
+    MockitoAnnotations.initMocks(this);
+    when(mockClient.tabledata()).thenReturn(mockTabledata);
+    when(mockTabledata.list(anyString(), anyString(), anyString())).thenReturn(mockTabledataList);
+
+    when(mockClient.tables()).thenReturn(mockTables);
+    when(mockTables.delete(anyString(), anyString(), anyString())).thenReturn(mockTablesDelete);
+    when(mockTables.get(anyString(), anyString(), anyString())).thenReturn(mockTablesGet);
+
+    when(mockClient.datasets()).thenReturn(mockDatasets);
+    when(mockDatasets.delete(anyString(), anyString())).thenReturn(mockDatasetsDelete);
+    when(mockDatasets.insert(anyString(), any(Dataset.class))).thenReturn(mockDatasetsInsert);
+
+    when(mockClient.jobs()).thenReturn(mockJobs);
+    when(mockJobs.insert(anyString(), any(Job.class))).thenReturn(mockJobsInsert);
+    when(mockJobs.get(anyString(), anyString())).thenReturn(mockJobsGet);
+  }
+
+  @After
+  public void tearDown() {
+    verifyNoMoreInteractions(mockClient);
+    verifyNoMoreInteractions(mockDatasets);
+    verifyNoMoreInteractions(mockDatasetsDelete);
+    verifyNoMoreInteractions(mockDatasetsInsert);
+    verifyNoMoreInteractions(mockJobs);
+    verifyNoMoreInteractions(mockJobsGet);
+    verifyNoMoreInteractions(mockJobsInsert);
+    verifyNoMoreInteractions(mockTables);
+    verifyNoMoreInteractions(mockTablesDelete);
+    verifyNoMoreInteractions(mockTablesGet);
+    verifyNoMoreInteractions(mockTabledata);
+    verifyNoMoreInteractions(mockTabledataList);
+  }
+
+  private static Table tableWithBasicSchema() {
+    return new Table()
+        .setSchema(
+            new TableSchema()
+                .setFields(
+                    Arrays.asList(
+                        new TableFieldSchema().setName("name").setType("STRING"),
+                        new TableFieldSchema().setName("answer").setType("INTEGER"))));
+  }
+
+  private TableRow rawRow(Object... args) {
+    List<TableCell> cells = new LinkedList<>();
+    for (Object a : args) {
+      cells.add(new TableCell().setV(a));
+    }
+    return new TableRow().setF(cells);
+  }
+
+  private TableDataList rawDataList(TableRow... rows) {
+    return new TableDataList().setRows(Arrays.asList(rows));
+  }
+
+  /**
+   * Verifies that when the query runs, the correct data is returned and the temporary dataset and
+   * table are both cleaned up.
+   */
+  @Test
+  public void testReadFromQuery() throws IOException {
+    // Mock job inserting.
+    Job insertedJob = new Job().setJobReference(new JobReference());
+    when(mockJobsInsert.execute()).thenReturn(insertedJob);
+
+    // Mock job polling.
+    JobStatus status = new JobStatus().setState("DONE");
+    TableReference tableRef =
+        new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
+    JobConfigurationQuery queryConfig = new JobConfigurationQuery().setDestinationTable(tableRef);
+    Job getJob =
+        new Job()
+            .setJobReference(new JobReference())
+            .setStatus(status)
+            .setConfiguration(new JobConfiguration().setQuery(queryConfig));
+    when(mockJobsGet.execute()).thenReturn(getJob);
+
+    // Mock table schema fetch.
+    when(mockTablesGet.execute()).thenReturn(tableWithBasicSchema());
+
+    // Mock table data fetch.
+    when(mockTabledataList.execute()).thenReturn(rawDataList(rawRow("Arthur", 42)));
+
+    // Run query and verify
+    String query = "SELECT name, count from table";
+    try (BigQueryTableRowIterator iterator =
+            BigQueryTableRowIterator.fromQuery(query, "project", mockClient)) {
+      assertTrue(iterator.hasNext());
+      TableRow row = iterator.next();
+
+      assertTrue(row.containsKey("name"));
+      assertTrue(row.containsKey("answer"));
+      assertEquals("Arthur", row.get("name"));
+      assertEquals(42, row.get("answer"));
+
+      assertFalse(iterator.hasNext());
+    }
+
+    // Temp dataset created and later deleted.
+    verify(mockClient, times(2)).datasets();
+    verify(mockDatasets).insert(anyString(), any(Dataset.class));
+    verify(mockDatasetsInsert).execute();
+    verify(mockDatasets).delete(anyString(), anyString());
+    verify(mockDatasetsDelete).execute();
+    // Job inserted to run the query, polled once.
+    verify(mockClient, times(2)).jobs();
+    verify(mockJobs).insert(anyString(), any(Job.class));
+    verify(mockJobsInsert).execute();
+    verify(mockJobs).get(anyString(), anyString());
+    verify(mockJobsGet).execute();
+    // Temp table get after query finish, deleted after reading.
+    verify(mockClient, times(2)).tables();
+    verify(mockTables).get("project", "dataset", "table");
+    verify(mockTablesGet).execute();
+    verify(mockTables).delete(anyString(), anyString(), anyString());
+    verify(mockTablesDelete).execute();
+    // Table data read.
+    verify(mockClient).tabledata();
+    verify(mockTabledata).list("project", "dataset", "table");
+    verify(mockTabledataList).execute();
+  }
+
+  /**
+   * Verifies that when the query fails, the user gets a useful exception and the temporary dataset
+   * is cleaned up. Also verifies that the temporary table (which is never created) is not
+   * erroneously attempted to be deleted.
+   */
+  @Test
+  public void testQueryFailed() throws IOException {
+    // Job can be created.
+    JobReference ref = new JobReference();
+    Job insertedJob = new Job().setJobReference(ref);
+    when(mockJobsInsert.execute()).thenReturn(insertedJob);
+
+    // Job state polled with an error.
+    String errorReason = "bad query";
+    JobStatus status =
+        new JobStatus().setState("DONE").setErrorResult(new ErrorProto().setMessage(errorReason));
+    Job getJob = new Job().setJobReference(ref).setStatus(status);
+    when(mockJobsGet.execute()).thenReturn(getJob);
+
+    String query = "NOT A QUERY";
+    try (BigQueryTableRowIterator iterator =
+            BigQueryTableRowIterator.fromQuery(query, "project", mockClient)) {
+      try {
+        iterator.hasNext();
+        fail();
+      } catch (Exception expected) {
+        // Verify message explains cause and reports the query.
+        assertThat(expected.getMessage(), containsString("failed"));
+        assertThat(expected.getMessage(), containsString(errorReason));
+        assertThat(expected.getMessage(), containsString(query));
+      }
+    }
+
+    // Temp dataset created and then later deleted.
+    verify(mockClient, times(2)).datasets();
+    verify(mockDatasets).insert(anyString(), any(Dataset.class));
+    verify(mockDatasetsInsert).execute();
+    verify(mockDatasets).delete(anyString(), anyString());
+    verify(mockDatasetsDelete).execute();
+    // Job inserted to run the query, then polled once.
+    verify(mockClient, times(2)).jobs();
+    verify(mockJobs).insert(anyString(), any(Job.class));
+    verify(mockJobsInsert).execute();
+    verify(mockJobs).get(anyString(), anyString());
+    verify(mockJobsGet).execute();
+  }
+}

From e00629256a68060a98fc67f4f9a79dd960e4ece7 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 27 Jan 2016 19:40:44 -0800
Subject: [PATCH 1352/1541] Add CounterSet#merge(CounterSet)

This method merges the contents of the two counter sets into the called
CounterSet, using the underlying Counter#merge method. All counters that
are present only in one of the CounterSets are added to the called set.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113225321
---
 .../dataflow/sdk/util/common/CounterSet.java  | 25 +++++++
 .../sdk/util/common/CounterSetTest.java       | 65 +++++++++++++++++++
 2 files changed, 90 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
index 62904a56620ae..873add4c85038 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.util.common;
 
+import static com.google.common.base.Preconditions.checkArgument;
+
 import java.util.AbstractSet;
 import java.util.HashMap;
 import java.util.Iterator;
@@ -129,6 +131,29 @@ public synchronized boolean add(Counter<?> e) {
     return true;
   }
 
+  public synchronized void merge(CounterSet that) {
+    for (Counter<?> theirCounter : that) {
+      Counter<?> myCounter = counters.get(theirCounter.getName());
+      if (myCounter != null) {
+        mergeCounters(myCounter, theirCounter);
+      } else {
+        addCounter(theirCounter);
+      }
+    }
+  }
+
+  private <T> void mergeCounters(Counter<T> mine, Counter<?> theirCounter) {
+    checkArgument(
+        mine.isCompatibleWith(theirCounter),
+        "Can't merge CounterSets containing incompatible counters with the same name: "
+            + "%s (existing) and %s (merged)",
+        mine,
+        theirCounter);
+    @SuppressWarnings("unchecked")
+    Counter<T> theirs = (Counter<T>) theirCounter;
+    mine.merge(theirs);
+  }
+
   /**
    * A nested class that supports adding additional counters into the
    * enclosing CounterSet. This is useful as a mutator, hiding other
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterSetTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterSetTest.java
index d3f7a23cf3067..55ed714ad7e19 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterSetTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterSetTest.java
@@ -19,6 +19,7 @@
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MAX;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
 import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertThat;
@@ -109,6 +110,70 @@ public void testAddOrReuseWithIncompatibleTypesThrowsException() {
     set.addOrReuseCounter(c1Incompatible);
   }
 
+  @Test
+  public void testMergeWithDifferentNamesAddsAll() {
+    Counter<?> c1 = Counter.longs("c1", SUM);
+    Counter<?> c2 = Counter.ints("c2", MAX);
+
+    set.add(c1);
+    set.add(c2);
+
+    CounterSet newSet = new CounterSet();
+    newSet.merge(set);
+
+    assertThat(newSet, containsInAnyOrder(c1, c2));
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testMergeWithSameNamesMerges() {
+    Counter<Long> c1 = Counter.longs("c1", SUM);
+    Counter<Integer> c2 = Counter.ints("c2", MAX);
+
+    set.add(c1);
+    set.add(c2);
+
+    c1.addValue(3L);
+    c2.addValue(22);
+
+    CounterSet newSet = new CounterSet();
+    Counter<Long> c1Prime = Counter.longs("c1", SUM);
+    Counter<Integer> c2Prime = Counter.ints("c2", MAX);
+
+    c1Prime.addValue(7L);
+    c2Prime.addValue(14);
+
+    newSet.add(c1Prime);
+    newSet.add(c2Prime);
+
+    newSet.merge(set);
+
+    assertThat((Counter<Long>) newSet.getExistingCounter("c1"), equalTo(c1Prime));
+    assertThat((Long) newSet.getExistingCounter("c1").getAggregate(), equalTo(10L));
+
+    assertThat((Counter<Integer>) newSet.getExistingCounter("c2"), equalTo(c2Prime));
+    assertThat((Integer) newSet.getExistingCounter("c2").getAggregate(), equalTo(22));
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testMergeWithIncompatibleTypesThrowsException() {
+    Counter<Long> c1 = Counter.longs("c1", SUM);
+
+    set.add(c1);
+
+    CounterSet newSet = new CounterSet();
+    Counter<Long> c1Prime = Counter.longs("c1", MAX);
+
+    newSet.add(c1Prime);
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("c1");
+    thrown.expectMessage("incompatible counters with the same name");
+
+    newSet.merge(set);
+  }
+
   @Test
   public void testAddCounterMutatorAddCounterAddsCounter() {
     Counter<?> c1 = Counter.longs("c1", SUM);

From b08ec20935afea90a89e4ba08e7aaf5daaf2a583 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Thu, 28 Jan 2016 09:45:02 -0800
Subject: [PATCH 1353/1541] Allow unflattened results from a BigQuery
 query-based export

----Release Notes----
Add an option to not flatten results from a BigQuery query-based export

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113271738
---
 .../cloud/dataflow/sdk/io/BigQueryIO.java     | 48 ++++++++++++++++---
 .../dataflow/BigQueryIOTranslator.java        |  1 +
 .../sdk/runners/worker/BigQueryReader.java    | 23 +++++----
 .../runners/worker/BigQueryReaderFactory.java |  4 +-
 .../sdk/util/BigQueryTableRowIterator.java    | 14 ++++--
 .../dataflow/sdk/util/PropertyNames.java      |  1 +
 .../cloud/dataflow/sdk/io/BigQueryIOTest.java | 25 ++++++++--
 .../util/BigQueryTableRowIteratorTest.java    |  4 +-
 8 files changed, 93 insertions(+), 27 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index b591982379703..87258fe42cfd5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -57,6 +57,7 @@
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.cloud.hadoop.util.ApiErrorExtractor;
+import com.google.common.base.MoreObjects;
 import com.google.common.base.Preconditions;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
@@ -80,6 +81,7 @@
 import java.util.concurrent.ThreadLocalRandom;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import javax.annotation.Nullable;
 
 /**
  * {@link PTransform}s for reading and writing
@@ -339,20 +341,24 @@ public static class Bound extends PTransform<PInput, PCollection<TableRow>> {
       TableReference table;
       final String query;
       final boolean validate;
+      @Nullable
+      Boolean flattenResults;
 
       private static final String QUERY_VALIDATION_FAILURE_ERROR =
           "Validation of query \"%1$s\" failed. If the query depends on an earlier stage of the"
           + " pipeline, This validation can be disabled using #withoutValidation.";
 
       private Bound() {
-        this(null, null, null, true);
+        this(null, null, null, true, null);
       }
 
-      private Bound(String name, String query, TableReference reference, boolean validate) {
+      private Bound(String name, String query, TableReference reference, boolean validate,
+          Boolean flattenResults) {
         super(name);
         this.table = reference;
         this.query = query;
         this.validate = validate;
+        this.flattenResults = flattenResults;
       }
 
       /**
@@ -361,7 +367,7 @@ private Bound(String name, String query, TableReference reference, boolean valid
        * <p>Does not modify this object.
        */
       public Bound named(String name) {
-        return new Bound(name, query, table, validate);
+        return new Bound(name, query, table, validate, flattenResults);
       }
 
       /**
@@ -380,23 +386,40 @@ public Bound from(String tableSpec) {
        * <p>Does not modify this object.
        */
       public Bound from(TableReference table) {
-        return new Bound(name, query, table, validate);
+        return new Bound(name, query, table, validate, flattenResults);
       }
 
       /**
        * Returns a copy of this transform that reads the results of the specified query.
        *
        * <p>Does not modify this object.
+       *
+       * <p>By default, the query results will be flattened -- see
+       * "flattenResults" in the <a href="https://cloud.google.com/bigquery/docs/reference/v2/jobs">
+       * Jobs documentation</a> for more information.  To disable flattening, use
+       * {@link BigQueryIO.Read.Bound#withoutResultFlattening}.
        */
       public Bound fromQuery(String query) {
-        return new Bound(name, query, table, validate);
+        return new Bound(name, query, table, validate,
+            MoreObjects.firstNonNull(flattenResults, Boolean.TRUE));
       }
 
       /**
        * Disable table validation.
        */
       public Bound withoutValidation() {
-        return new Bound(name, query, table, false);
+        return new Bound(name, query, table, false, flattenResults);
+      }
+
+      /**
+       * Disable <a href="https://cloud.google.com/bigquery/docs/reference/v2/jobs">
+       * flattening of query results</a>.
+       *
+       * <p>Only valid when a query is used ({@link #fromQuery}). Setting this option when reading
+       * from a table will cause an error during validation.
+       */
+      public Bound withoutResultFlattening() {
+        return new Bound(name, query, table, validate, false);
       }
 
       /**
@@ -410,6 +433,12 @@ public void validate(PInput input) {
         } else if (table != null && query != null) {
           throw new IllegalStateException("Invalid BigQuery read operation. Specifies both a"
               + " query and a table, only one of these should be provided");
+        } else if (table != null && flattenResults != null) {
+          throw new IllegalStateException("Invalid BigQuery read operation. Specifies a"
+              + " table with a result flattening preference, which is not configurable");
+        } else if (query != null && flattenResults == null) {
+          throw new IllegalStateException("Invalid BigQuery read operation. Specifies a"
+              + " query without a result flattening preference");
         }
 
         BigQueryOptions bqOptions = input.getPipeline().getOptions().as(BigQueryOptions.class);
@@ -500,6 +529,13 @@ public String getQuery() {
       public boolean getValidate() {
         return validate;
       }
+
+      /**
+       * Returns true/false if result flattening is enabled/disabled, or null if not applicable.
+       */
+      public Boolean getFlattenResults() {
+        return flattenResults;
+      }
     }
 
     /** Disallow construction of utility class. */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
index 2aa6c9f0d34ba..d6856633dd885 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
@@ -52,6 +52,7 @@ public void translate(
 
       if (transform.getQuery() != null) {
         context.addInput(PropertyNames.BIGQUERY_QUERY, transform.getQuery());
+        context.addInput(PropertyNames.BIGQUERY_FLATTEN_RESULTS, transform.getFlattenResults());
       } else {
         TableReference table = transform.getTable();
         if (table.getProjectId() == null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
index e03966059f830..617e5dc2f5b08 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
@@ -44,13 +44,15 @@ public class BigQueryReader extends NativeReader<WindowedValue<TableRow>> {
   @Nullable private final TableReference tableRef;
   @Nullable private final String query;
   @Nullable private final String projectId;
+  @Nullable private final Boolean flattenResults;
   private final Bigquery bigQueryClient;
 
   private BigQueryReader(TableReference tableRef, String query,  String projectId,
-      Bigquery bigQueryClient) {
+      Bigquery bigQueryClient, Boolean flattenResults) {
     this.tableRef = tableRef;
     this.query = query;
     this.projectId = projectId;
+    this.flattenResults = flattenResults;
     this.bigQueryClient = checkNotNull(bigQueryClient, "bigQueryClient");
   }
 
@@ -59,7 +61,7 @@ private BigQueryReader(TableReference tableRef, String query,  String projectId,
    * table.
    */
   public static BigQueryReader fromTable(TableReference tableRef, Bigquery bigQueryClient) {
-    return new BigQueryReader(tableRef, null, null, bigQueryClient);
+    return new BigQueryReader(tableRef, null, null, bigQueryClient, null);
   }
 
   /**
@@ -69,7 +71,7 @@ public static BigQueryReader fromTable(TableReference tableRef, Bigquery bigQuer
   public static BigQueryReader fromTableWithOptions(
       TableReference tableRef, BigQueryOptions bigQueryOptions) {
     Bigquery client = Transport.newBigQueryClient(bigQueryOptions).build();
-    return new BigQueryReader(tableRef, null, null, client);
+    return new BigQueryReader(tableRef, null, null, client, null);
   }
 
   /**
@@ -77,7 +79,7 @@ public static BigQueryReader fromTableWithOptions(
    * executing the specified query in the specified project.
    */
   public static BigQueryReader fromQuery(String query, String projectId, Bigquery bigQueryClient) {
-    return new BigQueryReader(null, query, projectId, bigQueryClient);
+    return new BigQueryReader(null, query, projectId, bigQueryClient, true);
   }
 
   /**
@@ -86,9 +88,10 @@ public static BigQueryReader fromQuery(String query, String projectId, Bigquery
    * specified options.
    */
   public static BigQueryReader fromQueryWithOptions(
-      String query, String projectId, BigQueryOptions bigQueryOptions) {
+      String query, String projectId, BigQueryOptions bigQueryOptions,
+      @Nullable Boolean flattenResults) {
     Bigquery client = Transport.newBigQueryClient(bigQueryOptions).build();
-    return new BigQueryReader(null, query, projectId, client);
+    return new BigQueryReader(null, query, projectId, client, flattenResults);
   }
 
   public TableReference getTableRef() {
@@ -104,7 +107,7 @@ public BigQueryReaderIterator iterator() throws IOException {
     if (tableRef != null) {
       return new BigQueryReaderIterator(tableRef, bigQueryClient);
     } else {
-      return new BigQueryReaderIterator(query, projectId, bigQueryClient);
+      return new BigQueryReaderIterator(query, projectId, bigQueryClient, flattenResults);
     }
   }
 
@@ -119,8 +122,10 @@ public BigQueryReaderIterator(TableReference tableRef, Bigquery bigQueryClient)
       rowIterator = BigQueryTableRowIterator.fromTable(tableRef, bigQueryClient);
     }
 
-    public BigQueryReaderIterator(String query, String projectId, Bigquery bigQueryClient) {
-      rowIterator = BigQueryTableRowIterator.fromQuery(query, projectId, bigQueryClient);
+    public BigQueryReaderIterator(String query, String projectId, Bigquery bigQueryClient,
+        @Nullable Boolean flattenResults) {
+      rowIterator = BigQueryTableRowIterator.fromQuery(query, projectId, bigQueryClient,
+          flattenResults);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
index e3d529f90f793..a389aef3af7a4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.util.Structs.getBoolean;
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 
 import com.google.api.services.bigquery.model.TableReference;
@@ -54,10 +55,11 @@ public BigQueryReader createTyped(
       PipelineOptions options,
       ExecutionContext executionContext) throws Exception {
     String query = getString(spec, PropertyNames.BIGQUERY_QUERY, null);
+    Boolean flatten = getBoolean(spec, PropertyNames.BIGQUERY_FLATTEN_RESULTS, true);
     if (query != null) {
       GcpOptions gcpOptions = options.as(GcpOptions.class);
       return BigQueryReader.fromQueryWithOptions(
-          query, gcpOptions.getProject(), options.as(BigQueryOptions.class));
+          query, gcpOptions.getProject(), options.as(BigQueryOptions.class), flatten);
     }
 
     String tableId = getString(spec, PropertyNames.BIGQUERY_TABLE, null);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
index 924552016c547..b867c27434f46 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
@@ -43,6 +43,7 @@
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
+import com.google.common.base.MoreObjects;
 import com.google.common.collect.ImmutableList;
 
 import org.joda.time.Duration;
@@ -89,6 +90,8 @@ public class BigQueryTableRowIterator implements Iterator<TableRow>, Closeable {
   private static final Duration QUERY_COMPLETION_POLL_TIME = Duration.standardSeconds(1);
 
   private final String query;
+  // Whether to flatten query results.
+  private final boolean flattenResults;
   // Temporary dataset used to store query results.
   private String temporaryDatasetId = null;
   // Temporary table used to store query results.
@@ -96,11 +99,12 @@ public class BigQueryTableRowIterator implements Iterator<TableRow>, Closeable {
 
   private BigQueryTableRowIterator(
       @Nullable TableReference ref, @Nullable String query, @Nullable String projectId,
-      Bigquery client) {
+      Bigquery client, boolean flattenResults) {
     this.ref = ref;
     this.query = query;
     this.projectId = projectId;
     this.client = checkNotNull(client, "client");
+    this.flattenResults = flattenResults;
   }
 
   /**
@@ -109,7 +113,7 @@ private BigQueryTableRowIterator(
   public static BigQueryTableRowIterator fromTable(TableReference ref, Bigquery client) {
     checkNotNull(ref, "ref");
     checkNotNull(client, "client");
-    return new BigQueryTableRowIterator(ref, null, ref.getProjectId(), client);
+    return new BigQueryTableRowIterator(ref, null, ref.getProjectId(), client, true);
   }
 
   /**
@@ -117,11 +121,12 @@ public static BigQueryTableRowIterator fromTable(TableReference ref, Bigquery cl
    * specified query in the specified project.
    */
   public static BigQueryTableRowIterator fromQuery(
-      String query, String projectId, Bigquery client) {
+      String query, String projectId, Bigquery client, @Nullable Boolean flattenResults) {
     checkNotNull(query, "query");
     checkNotNull(projectId, "projectId");
     checkNotNull(client, "client");
-    return new BigQueryTableRowIterator(null, query, projectId, client);
+    return new BigQueryTableRowIterator(null, query, projectId, client,
+        MoreObjects.firstNonNull(flattenResults, Boolean.TRUE));
   }
 
   @Override
@@ -376,6 +381,7 @@ private TableReference executeQueryAndWaitForCompletion()
     job.setConfiguration(config);
     queryConfig.setQuery(query);
     queryConfig.setAllowLargeResults(true);
+    queryConfig.setFlattenResults(flattenResults);
 
     TableReference destinationTable = new TableReference();
     destinationTable.setProjectId(projectId);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
index 59fa0e9b5c73f..2f105ec224b96 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
@@ -28,6 +28,7 @@ public class PropertyNames {
   public static final String BIGQUERY_SCHEMA = "schema";
   public static final String BIGQUERY_TABLE = "table";
   public static final String BIGQUERY_QUERY = "bigquery_query";
+  public static final String BIGQUERY_FLATTEN_RESULTS = "bigquery_flatten_results";
   public static final String BIGQUERY_WRITE_DISPOSITION = "write_disposition";
   public static final String CO_GBK_RESULT_SCHEMA = "co_gbk_result_schema";
   public static final String COMBINE_FN = "combine_fn";
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
index a3dc18f4623ab..a081de095c750 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BigQueryIOTest.java
@@ -185,8 +185,8 @@ public void testValidateReadSetsDefaultProject() {
   public void testBuildSourceWithoutTableOrQuery() {
     Pipeline p = TestPipeline.create();
     thrown.expect(IllegalStateException.class);
-    thrown.expectMessage(Matchers.containsString(
-        "Invalid BigQuery read operation, either table reference or query has to be set"));
+    thrown.expectMessage(
+        "Invalid BigQuery read operation, either table reference or query has to be set");
     p.apply(BigQueryIO.Read.named("ReadMyTable"));
     p.run();
   }
@@ -196,9 +196,9 @@ public void testBuildSourceWithoutTableOrQuery() {
   public void testBuildSourceWithTableAndQuery() {
     Pipeline p = TestPipeline.create();
     thrown.expect(IllegalStateException.class);
-    thrown.expectMessage(Matchers.containsString(
+    thrown.expectMessage(
         "Invalid BigQuery read operation. Specifies both a query and a table, only one of these"
-        + " should be provided"));
+        + " should be provided");
     p.apply(
         BigQueryIO.Read.named("ReadMyTable")
             .from("foo.com:project:somedataset.sometable")
@@ -206,6 +206,21 @@ public void testBuildSourceWithTableAndQuery() {
     p.run();
   }
 
+  @Test
+  @Category(RunnableOnService.class)
+  public void testBuildSourceWithTableAndFlatten() {
+    Pipeline p = TestPipeline.create();
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage(
+        "Invalid BigQuery read operation. Specifies a"
+              + " table with a result flattening preference, which is not configurable");
+    p.apply(
+        BigQueryIO.Read.named("ReadMyTable")
+            .from("foo.com:project:somedataset.sometable")
+            .withoutResultFlattening());
+    p.run();
+  }
+
   @Test
   public void testBuildSink() {
     BigQueryIO.Write.Bound bound = BigQueryIO.Write.named("WriteMyTable")
@@ -253,7 +268,7 @@ public void testBuildSinkWithTableReference() {
   public void testBuildSinkWithoutTable() {
     Pipeline p = TestPipeline.create();
     thrown.expect(IllegalStateException.class);
-    thrown.expectMessage(Matchers.containsString("must set the table reference"));
+    thrown.expectMessage("must set the table reference");
     p.apply(Create.<TableRow>of().withCoder(TableRowJsonCoder.of()))
         .apply(BigQueryIO.Write.named("WriteMyTable"));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIteratorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIteratorTest.java
index cab74579a09da..b7903ca26cb56 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIteratorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIteratorTest.java
@@ -169,7 +169,7 @@ public void testReadFromQuery() throws IOException {
     // Run query and verify
     String query = "SELECT name, count from table";
     try (BigQueryTableRowIterator iterator =
-            BigQueryTableRowIterator.fromQuery(query, "project", mockClient)) {
+            BigQueryTableRowIterator.fromQuery(query, "project", mockClient, null)) {
       assertTrue(iterator.hasNext());
       TableRow row = iterator.next();
 
@@ -226,7 +226,7 @@ public void testQueryFailed() throws IOException {
 
     String query = "NOT A QUERY";
     try (BigQueryTableRowIterator iterator =
-            BigQueryTableRowIterator.fromQuery(query, "project", mockClient)) {
+            BigQueryTableRowIterator.fromQuery(query, "project", mockClient, null)) {
       try {
         iterator.hasNext();
         fail();

From 97cf4f24f7ee1f62873946a7d8d74f02a949fcb5 Mon Sep 17 00:00:00 2001
From: amyu <amyu@google.com>
Date: Thu, 28 Jan 2016 11:41:57 -0800
Subject: [PATCH 1354/1541] Clean up GameStats session branch for blog post

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113285110
---
 .../examples/complete/game/GameStats.java         | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java
index 175af748bb85c..39d7a760c0a72 100644
--- a/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java
+++ b/examples/src/main/java8/com/google/cloud/dataflow/examples/complete/game/GameStats.java
@@ -27,6 +27,7 @@
 import com.google.cloud.dataflow.sdk.options.Validation;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
 import com.google.cloud.dataflow.sdk.transforms.MapElements;
@@ -309,17 +310,18 @@ public void processElement(ProcessContext c) {
 
 
     // [START DocInclude_SessionCalc]
-    // Calculate the total score for the users per session-- that is, a burst of activity
-    // separated by a gap from further activity. Find and record the mean session lengths.
+    // Detect user sessions-- that is, a burst of activity separated by a gap from further
+    // activity. Find and record the mean session lengths.
     // This information could help the game designers track the changing user engagement
     // as their set of games changes.
     userEvents
       .apply(Window.named("WindowIntoSessions")
             .<KV<String, Integer>>into(
                   Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap())))
-        .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow())
-        .withAllowedLateness(Duration.ZERO))
-      .apply("UserSessionSum", Sum.<String>integersPerKey())
+        .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow()))
+      // For this use, we care only about the existence of the session, not any particular
+      // information aggregated over it, so the following is an efficient way to do that.
+      .apply(Combine.perKey(x -> 0))
       // Get the duration per session.
       .apply("UserSessionActivity", ParDo.of(new UserSessionInfoFn()))
       // [END DocInclude_SessionCalc]
@@ -327,8 +329,7 @@ public void processElement(ProcessContext c) {
       // Re-window to process groups of session sums according to when the sessions complete.
       .apply(Window.named("WindowToExtractSessionMean")
             .<Integer>into(
-                FixedWindows.of(Duration.standardMinutes(options.getUserActivityWindowDuration())))
-            .withAllowedLateness(Duration.ZERO))
+                FixedWindows.of(Duration.standardMinutes(options.getUserActivityWindowDuration()))))
       // Find the mean session duration in each window.
       .apply(Mean.<Integer>globally().withoutDefaults())
       // Write this info to a BigQuery table.

From 76b86897ddeea484d6f38f1cddf2c45ab2c532e8 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 28 Jan 2016 13:12:36 -0800
Subject: [PATCH 1355/1541] Account for per-window overhead in
 WindmillStateCache

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113294041
---
 .../runners/worker/WindmillStateCache.java    | 27 +++++++++++++------
 .../worker/WindmillStateCacheTest.java        | 14 +++++-----
 .../worker/WindmillStateInternalsTest.java    | 18 ++++++-------
 3 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java
index b50be9122637c..9849ed8695045 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java
@@ -43,9 +43,18 @@
  * Process-wide cache of per-key state.
  */
 public class WindmillStateCache implements StatusDataProvider {
+  // Estimate of overhead per StateId.
+  private static final int PER_STATE_ID_OVERHEAD = 20;
+  // Initial size of hash tables per entry.
+  private static final int INITIAL_HASH_MAP_CAPACITY = 4;
+  // Overhead of each hash map entry.
+  private static final int HASH_MAP_ENTRY_OVERHEAD = 16;
+  // Overhead of each cache entry.  Two longs, plus a hash table.
+  private static final int PER_CACHE_ENTRY_OVERHEAD =
+      16 + HASH_MAP_ENTRY_OVERHEAD * INITIAL_HASH_MAP_CAPACITY;
 
   private Cache<StateId, StateCacheEntry> stateCache;
-  private int weight = 0;
+  private int displayedWeight = 0;  // Only used for status pages and unit tests.
 
   public WindmillStateCache() {
     final Weigher<Weighted, Weighted> weigher = Weighers.weightedKeysAndValues();
@@ -59,7 +68,7 @@ public WindmillStateCache() {
               @Override
               public void onRemoval(RemovalNotification<StateId, StateCacheEntry> removal) {
                 if (removal.getCause() != RemovalCause.REPLACED) {
-                  weight -= weigher.weigh(removal.getKey(), removal.getValue());
+                  displayedWeight -= weigher.weigh(removal.getKey(), removal.getValue());
                 }
               }
             })
@@ -67,7 +76,7 @@ public void onRemoval(RemovalNotification<StateId, StateCacheEntry> removal) {
   }
 
   public long getWeight() {
-    return weight;
+    return displayedWeight;
   }
 
   /**
@@ -143,9 +152,10 @@ private <T extends State> void put(String computation, ByteString processingKey,
     StateCacheEntry entry = stateCache.getIfPresent(id);
     if (entry == null || entry.getToken() != token) {
       entry = new StateCacheEntry(token);
-      this.weight += id.getWeight();
+      this.displayedWeight += id.getWeight();
+      this.displayedWeight += entry.getWeight();
     }
-    this.weight += entry.put(namespace, address, value, weight);
+    this.displayedWeight += entry.put(namespace, address, value, weight);
     // Always add back to the cache to update the weight.
     stateCache.put(id, entry);
   }
@@ -186,7 +196,7 @@ public int hashCode() {
 
     @Override
     public long getWeight() {
-      return processingKey.size();
+      return processingKey.size() + PER_STATE_ID_OVERHEAD;
     }
   }
 
@@ -200,7 +210,7 @@ private static class StateCacheEntry implements Weighted {
     private long weight;
 
     public StateCacheEntry(long token) {
-      this.values = new HashMap<>();
+      this.values = new HashMap<>(INITIAL_HASH_MAP_CAPACITY);
       this.token = token;
       this.weight = 0;
     }
@@ -219,6 +229,7 @@ public <T extends State> long put(
       long weightDelta = 0;
       if (weightedValue == null) {
         weightedValue = new WeightedValue<T>();
+        weightDelta += HASH_MAP_ENTRY_OVERHEAD;
       } else {
         weightDelta -= weightedValue.weight;
       }
@@ -232,7 +243,7 @@ public <T extends State> long put(
 
     @Override
     public long getWeight() {
-      return weight;
+      return weight + PER_CACHE_ENTRY_OVERHEAD;
     }
 
     public long getToken() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCacheTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCacheTest.java
index 590663819971a..6d2e8b5675377 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCacheTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCacheTest.java
@@ -141,13 +141,13 @@ public void testBasic() throws Exception {
     assertNull(keyCache.get(triggerNamespace(0, 0), new TestStateTag("tag2")));
 
     keyCache.put(StateNamespaces.global(), new TestStateTag("tag1"), new TestState("g1"), 2);
-    assertEquals(5, cache.getWeight());
+    assertEquals(121, cache.getWeight());
     keyCache.put(windowNamespace(0), new TestStateTag("tag2"), new TestState("w2"), 2);
-    assertEquals(10, cache.getWeight());
+    assertEquals(242, cache.getWeight());
     keyCache.put(triggerNamespace(0, 0), new TestStateTag("tag3"), new TestState("t3"), 2);
-    assertEquals(12, cache.getWeight());
+    assertEquals(260, cache.getWeight());
     keyCache.put(triggerNamespace(0, 0), new TestStateTag("tag2"), new TestState("t2"), 2);
-    assertEquals(14, cache.getWeight());
+    assertEquals(278, cache.getWeight());
 
     assertEquals(
         new TestState("g1"), keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
@@ -165,12 +165,12 @@ public void testBasic() throws Exception {
   public void testInvalidation() throws Exception {
     assertNull(keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
     keyCache.put(StateNamespaces.global(), new TestStateTag("tag1"), new TestState("g1"), 2);
-    assertEquals(5, cache.getWeight());
+    assertEquals(121, cache.getWeight());
     assertEquals(
         new TestState("g1"), keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
 
     keyCache = cache.forComputation(COMPUTATION).forKey(KEY, STATE_FAMILY, 1L);
-    assertEquals(5, cache.getWeight());
+    assertEquals(121, cache.getWeight());
     assertNull(keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
     assertEquals(0, cache.getWeight());
   }
@@ -181,7 +181,7 @@ public void testInvalidation() throws Exception {
   @Test
   public void testEviction() throws Exception {
     keyCache.put(windowNamespace(0), new TestStateTag("tag2"), new TestState("w2"), 2);
-    assertEquals(5, cache.getWeight());
+    assertEquals(121, cache.getWeight());
     keyCache.put(triggerNamespace(0, 0), new TestStateTag("tag3"), new TestState("t3"), 2000000000);
     assertEquals(0, cache.getWeight());
     // Eviction is atomic across the whole window.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
index 355ffe14fb4b0..a279b80acf781 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
@@ -787,14 +787,14 @@ public void testCachedValue() throws Exception {
     value.set("Hi");
     underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
 
-    assertEquals(2, cache.getWeight());
+    assertEquals(118, cache.getWeight());
 
     value = underTest.state(NAMESPACE, addr);
     assertEquals("Hi", value.get().read());
     value.clear();
     underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
 
-    assertEquals(0, cache.getWeight());
+    assertEquals(116, cache.getWeight());
 
     value = underTest.state(NAMESPACE, addr);
     assertEquals(null, value.get().read());
@@ -823,7 +823,7 @@ public void testCachedBag() throws Exception {
 
     underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
 
-    assertEquals(10, cache.getWeight());
+    assertEquals(126, cache.getWeight());
 
     bag = underTest.state(NAMESPACE, addr);
     bag.add("goodbye");
@@ -833,7 +833,7 @@ public void testCachedBag() throws Exception {
 
     underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
 
-    assertEquals(3, cache.getWeight());
+    assertEquals(119, cache.getWeight());
 
     bag = underTest.state(NAMESPACE, addr);
     bag.add("new2");
@@ -843,7 +843,7 @@ public void testCachedBag() throws Exception {
 
     underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
 
-    assertEquals(4, cache.getWeight());
+    assertEquals(120, cache.getWeight());
 
     bag = underTest.state(NAMESPACE, addr);
     assertThat(bag.get().read(), Matchers.containsInAnyOrder("new3"));
@@ -872,7 +872,7 @@ public void testCachedWatermarkHold() throws Exception {
 
     underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
 
-    assertEquals(8, cache.getWeight());
+    assertEquals(124, cache.getWeight());
 
     bag = underTest.state(NAMESPACE, addr);
     assertThat(bag.get().read(), Matchers.equalTo(new Instant(2000)));
@@ -880,7 +880,7 @@ public void testCachedWatermarkHold() throws Exception {
 
     underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
 
-    assertEquals(8, cache.getWeight());
+    assertEquals(124, cache.getWeight());
 
     bag = underTest.state(NAMESPACE, addr);
     assertEquals(null, bag.get().read());
@@ -907,7 +907,7 @@ public void testCachedCombining() throws Exception {
 
     underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
 
-    assertEquals(1, cache.getWeight());
+    assertEquals(117, cache.getWeight());
 
     value = underTest.state(NAMESPACE, COMBINING_ADDR);
     assertThat(value.get().read(), Matchers.equalTo(3));
@@ -917,7 +917,7 @@ public void testCachedCombining() throws Exception {
 
     underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
 
-    assertEquals(0, cache.getWeight());
+    assertEquals(116, cache.getWeight());
 
     value = underTest.state(NAMESPACE, COMBINING_ADDR);
     assertThat(value.get().read(), Matchers.equalTo(0));

From e64446cc10c663a22c57e02d9badeada7e92b837 Mon Sep 17 00:00:00 2001
From: tudorm <tudorm@google.com>
Date: Thu, 28 Jan 2016 15:14:28 -0800
Subject: [PATCH 1356/1541] Add support to configure the shuffle client library
 with a property.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113307012
---
 .../cloud/dataflow/sdk/runners/worker/ShuffleLibrary.java | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleLibrary.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleLibrary.java
index 6694e99924544..999809b2f8b81 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleLibrary.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleLibrary.java
@@ -32,9 +32,13 @@ class ShuffleLibrary {
    */
   static void load() {
     try {
+      final String shuffleClientLibraryPropertyKey = "batch.shuffle_client_library";
+      String shuffleClientLibrary = "libshuffle_client_jni.so.stripped";
+      if (System.getProperties().containsKey(shuffleClientLibraryPropertyKey)) {
+        shuffleClientLibrary = System.getProperty(shuffleClientLibraryPropertyKey);
+      }
       File tempfile = File.createTempFile("libshuffle_client_jni", ".so");
-      InputStream input = ClassLoader.getSystemResourceAsStream(
-          "libshuffle_client_jni.so.stripped");
+      InputStream input = ClassLoader.getSystemResourceAsStream(shuffleClientLibrary);
       Files.copy(input, tempfile.toPath(), StandardCopyOption.REPLACE_EXISTING);
       System.load(tempfile.getAbsolutePath());
     } catch (IOException e) {

From 9b0b39587e8db9d80053647b8186c8c69d5d655e Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 28 Jan 2016 16:56:35 -0800
Subject: [PATCH 1357/1541] Create worker maven module

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113317733
---
 pom.xml        |   1 +
 worker/pom.xml | 243 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 244 insertions(+)
 create mode 100644 worker/pom.xml

diff --git a/pom.xml b/pom.xml
index 7bdee339651ac..7d31ee07fbb5e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -86,6 +86,7 @@
   <packaging>pom</packaging>
   <modules>
     <module>sdk</module>
+    <module>worker</module>
     <module>examples</module>
     <module>maven-archetypes/starter</module>
     <module>maven-archetypes/examples</module>
diff --git a/worker/pom.xml b/worker/pom.xml
new file mode 100644
index 0000000000000..46def118c2827
--- /dev/null
+++ b/worker/pom.xml
@@ -0,0 +1,243 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  ~ Copyright (C) 2015 Google Inc.
+  ~
+  ~ Licensed under the Apache License, Version 2.0 (the "License"); you may not
+  ~ use this file except in compliance with the License. You may obtain a copy of
+  ~ the License at
+  ~
+  ~ http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+  ~ License for the specific language governing permissions and limitations under
+  ~ the License.
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>com.google.cloud.dataflow</groupId>
+    <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
+    <version>1.5.0-SNAPSHOT</version>
+  </parent>
+
+  <groupId>com.google.cloud.dataflow</groupId>
+  <artifactId>google-cloud-dataflow-java-worker-all</artifactId>
+  <name>Google Cloud Dataflow Java Worker - All</name>
+  <description>Google Cloud Dataflow Java SDK provides a simple, Java-based
+    interface for processing virtually any size data using Google cloud
+    resources. This artifact includes entire Dataflow Java Worker.</description>
+  <url>http://cloud.google.com/dataflow</url>
+
+  <packaging>jar</packaging>
+
+  <properties>
+    <timestamp>${maven.build.timestamp}</timestamp>
+    <maven.build.timestamp.format>yyyy-MM-dd HH:mm</maven.build.timestamp.format>
+    <testParallelValue>none</testParallelValue>
+  </properties>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+      </plugin>
+
+      <!-- Run CheckStyle pass on transforms, as they are release in
+           source form. -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+        <version>2.12</version>
+        <dependencies>
+          <dependency>
+            <groupId>com.puppycrawl.tools</groupId>
+            <artifactId>checkstyle</artifactId>
+            <version>6.6</version>
+          </dependency>
+        </dependencies>
+        <configuration>
+          <configLocation>../checkstyle.xml</configLocation>
+          <consoleOutput>true</consoleOutput>
+          <failOnViolation>true</failOnViolation>
+          <includeResources>false</includeResources>
+          <includeTestSourceDirectory>true</includeTestSourceDirectory>
+          <excludes>${project.build.directory}/generated-test-sources/**</excludes>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>check</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>default-jar</id>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>default-test-jar</id>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <!-- Source plugin for generating source and test-source JARs. -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-source-plugin</artifactId>
+        <version>2.4</version>
+        <executions>
+          <execution>
+            <id>attach-sources</id>
+            <phase>compile</phase>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>attach-test-sources</id>
+            <phase>test-compile</phase>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>2.3</version>
+        <executions>
+          <!-- In the first phase, we pick dependencies and relocate them. -->
+          <execution>
+            <id>bundle-and-repackage</id>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <shadeTestJar>true</shadeTestJar>
+              <artifactSet>
+                <includes>
+                  <include>com.google.guava:guava</include>
+                </includes>
+              </artifactSet>
+              <filters>
+                <filter>
+                  <artifact>*:*</artifact>
+                  <excludes>
+                    <exclude>META-INF/*.SF</exclude>
+                    <exclude>META-INF/*.DSA</exclude>
+                    <exclude>META-INF/*.RSA</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+              <relocations>
+                <!-- TODO: Once ready, change the following pattern to 'com'
+                     only, exclude 'com.google.cloud.dataflow.**', and remove
+                     the second relocation. -->
+                <relocation>
+                  <pattern>com.google.common</pattern>
+                  <shadedPattern>com.google.cloud.dataflow.sdk.repackaged.com.google.common</shadedPattern>
+                </relocation>
+                <relocation>
+                  <pattern>com.google.thirdparty</pattern>
+                  <shadedPattern>com.google.cloud.dataflow.sdk.repackaged.com.google.thirdparty</shadedPattern>
+                </relocation>
+              </relocations>
+            </configuration>
+          </execution>
+
+          <!-- In the second phase, we pick remaining dependencies and bundle
+               them without repackaging. -->
+          <execution>
+            <id>bundle-rest-without-repackaging</id>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <shadeTestJar>true</shadeTestJar>
+              <finalName>${project.artifactId}-bundled-${project.version}</finalName>
+              <artifactSet>
+                <excludes>
+                  <exclude>com.google.guava:guava</exclude>
+                </excludes>
+              </artifactSet>
+              <filters>
+                <filter>
+                  <artifact>*:*</artifact>
+                  <excludes>
+                    <exclude>META-INF/*.SF</exclude>
+                    <exclude>META-INF/*.DSA</exclude>
+                    <exclude>META-INF/*.RSA</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+      <!-- Coverage analysis for unit tests. -->
+      <plugin>
+        <groupId>org.jacoco</groupId>
+        <artifactId>jacoco-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
+  <dependencies>
+    <dependency>
+      <groupId>com.google.cloud.dataflow</groupId>
+      <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <!-- test dependencies -->
+    <dependency>
+      <groupId>org.hamcrest</groupId>
+      <artifactId>hamcrest-all</artifactId>
+      <version>${hamcrest.version}</version>
+      <scope>provided</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>${junit.version}</version>
+      <scope>provided</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-jdk14</artifactId>
+      <version>${slf4j.version}</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <version>1.9.5</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+</project>

From ce8c03f930055f51125a1b0f0e4648121a801ca6 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 29 Jan 2016 10:41:02 -0800
Subject: [PATCH 1358/1541] Add explicit record type for Ism files

This change adds the record coder, and shard coder for a 2nd level index, and hashing
for shard generation.
----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113376303
---
 .../sdk/runners/worker/IsmFormat.java         | 681 +++++++++++++++++-
 .../sdk/runners/worker/IsmFormatTest.java     | 214 ++++++
 2 files changed, 880 insertions(+), 15 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormat.java
index 517b0e1a115b8..118f31692e274 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormat.java
@@ -15,50 +15,702 @@
  */
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
+
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.ListCoder;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.RandomAccessData;
 import com.google.cloud.dataflow.sdk.util.ScalableBloomFilter;
 import com.google.cloud.dataflow.sdk.util.VarInt;
+import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.MoreObjects;
+import com.google.common.base.MoreObjects.ToStringHelper;
+import com.google.common.base.Objects;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hashing;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
 
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
-import java.util.Objects;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
 
 /**
- * An Ism file is a prefix encoded key value file with a bloom filter and an index to
- * enable lookups.
+ * An Ism file is a prefix encoded composite key value file broken into shards. Each composite
+ * key is composed of a fixed number of component keys. A fixed number of those sub keys represent
+ * the shard key portion; see {@link IsmRecord} and {@link IsmRecordCoder} for further details
+ * around the data format. In addition to the data, there is a bloom filter,
+ * and multiple indices to allow for efficient retrieval.
  *
  * <p>An Ism file is composed of these high level sections (in order):
  * <ul>
- *   <li>data block</li>
+ *   <li>shard block</li>
  *   <li>bloom filter (See {@link ScalableBloomFilter} for details on encoding format)</li>
- *   <li>index</li>
+ *   <li>shard index</li>
  *   <li>footer (See {@link Footer} for details on encoding format)</li>
  * </ul>
  *
+ * <p>The shard block is composed of multiple copies of the following:
+ * <ul>
+ *   <li>data block</li>
+ *   <li>data index</li>
+ * </ul>
+ *
  * <p>The data block is composed of multiple copies of the following:
  * <ul>
  *   <li>key prefix (See {@link KeyPrefix} for details on encoding format)</li>
  *   <li>unshared key bytes</li>
  *   <li>value bytes</li>
+ *   <li>optional 0x00 0x00 bytes followed by metadata bytes
+ *       (if the following 0x00 0x00 bytes are not present, then there are no metadata bytes)</li>
  * </ul>
+ * Each key written into the data block must be in unsigned lexicographically increasing order
+ * and also its shard portion of the key must hash to the same shard id as all other keys
+ * within the same data block. The hashing function used is the
+ * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">
+ * 32-bit murmur3 algorithm, x86 variant</a> (little-endian variant),
+ * using {@code 1225801234} as the seed value.
  *
- * <p>The index is composed of {@code N} copies of the following:
+ * <p>The data index is composed of {@code N} copies of the following:
  * <ul>
  *   <li>key prefix (See {@link KeyPrefix} for details on encoding format)</li>
  *   <li>unshared key bytes</li>
  *   <li>byte offset to key prefix in data block (variable length long coding)</li>
  * </ul>
+ *
+ * <p>The shard index is composed of a variable length integer encoding representing
+ * the number of shard index records followed by that many shard index records.
+ * See {@link IsmShardCoder} for further details as to its encoding scheme.
  */
-class IsmFormat {
+public class IsmFormat {
+  private static final int HASH_SEED = 1225801234;
+  private static final HashFunction HASH_FUNCTION = Hashing.murmur3_32(HASH_SEED);
+  static final int SHARD_BITS = 0x7F; // [0-127] shards + [128-255] metadata shards
+
+  /**
+   * A record containing a composite key and either a value or metadata. The composite key
+   * must not contain the metadata key component place holder if producing a value record, and must
+   * contain the metadata component key place holder if producing a metadata record.
+   *
+   * <p>The composite key is a fixed number of component keys where the first {@code N} component
+   * keys are used to create a shard id via hashing. See {@link IsmRecordCoder#hash(List)} for
+   * further details.
+   */
+  public static class IsmRecord<V> {
+    /** Returns an IsmRecord with the specified key components and value. */
+    public static <V> IsmRecord<V> of(List<?> keyComponents, V value) {
+      checkNotNull(keyComponents);
+      checkArgument(!keyComponents.isEmpty(), "Expected non-empty list of key components.");
+      checkArgument(!isMetadataKey(keyComponents),
+          "Expected key components to not contain metadata key.");
+      return new IsmRecord<>(keyComponents, value, null);
+    }
+
+    public static <V> IsmRecord<V> meta(List<?> keyComponents, byte[] metadata) {
+      checkNotNull(keyComponents);
+      checkNotNull(metadata);
+      checkArgument(!keyComponents.isEmpty(), "Expected non-empty list of key components.");
+      checkArgument(isMetadataKey(keyComponents),
+          "Expected key components to contain metadata key.");
+      return new IsmRecord<V>(keyComponents, null, metadata);
+    }
+
+    private final List<?> keyComponents;
+    private final V value;
+    private final byte[] metadata;
+    private IsmRecord(List<?> keyComponents, V value, byte[] metadata) {
+      this.keyComponents = keyComponents;
+      this.value = value;
+      this.metadata = metadata;
+    }
+
+    /** Returns the list of key components. */
+    public List<?> getKeyComponents() {
+      return keyComponents;
+    }
+
+    /** Returns the key component at the specified index. */
+    public Object getKeyComponent(int index) {
+      return keyComponents.get(index);
+    }
+
+    /**
+     * Returns the value. Throws {@link IllegalStateException} if this is not a
+     * value record.
+     */
+    public V getValue() {
+      checkState(!isMetadataKey(keyComponents),
+          "This is a metadata record and not a value record.");
+      return value;
+    }
+
+    /**
+     * Returns the metadata. Throws {@link IllegalStateException} if this is not a
+     * metadata record.
+     */
+    public byte[] getMetadata() {
+      checkState(isMetadataKey(keyComponents),
+          "This is a value record and not a metadata record.");
+      return metadata;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (!(obj instanceof IsmRecord)) {
+        return false;
+      }
+      IsmRecord<?> other = (IsmRecord<?>) obj;
+      return Objects.equal(keyComponents, other.keyComponents)
+          && Objects.equal(value, other.value)
+          && Arrays.equals(metadata, other.metadata);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(keyComponents, value, Arrays.hashCode(metadata));
+    }
+
+    @Override
+    public String toString() {
+      ToStringHelper builder = MoreObjects.toStringHelper(IsmRecord.class)
+          .add("keyComponents", keyComponents);
+      if (isMetadataKey(keyComponents)) {
+        builder.add("metadata", metadata);
+      } else {
+        builder.add("value", value);
+      }
+      return builder.toString();
+    }
+  }
+
+  /** A {@link Coder} for {@link IsmRecord}s.
+   *
+   * <p>Note that this coder standalone will not produce an Ism file. This coder can be used
+   * to materialize a {@link PCollection} of {@link IsmRecord}s. Only when this coder
+   * is combined with an {@link IsmSink} will one produce an Ism file.
+   *
+   * <p>The {@link IsmRecord} encoded format is:
+   * <ul>
+   *   <li>encoded key component 1 using key component coder 1</li>
+   *   <li>...</li>
+   *   <li>encoded key component N using key component coder N</li>
+   *   <li>encoded value using value coder</li>
+   * </ul>
+   */
+  public static class IsmRecordCoder<V>
+      extends StandardCoder<IsmRecord<V>> {
+    /** Returns an IsmRecordCoder with the specified key component coders, value coder. */
+    public static <V> IsmRecordCoder<V> of(
+        int numberOfShardKeyCoders,
+        int numberOfMetadataShardKeyCoders,
+        List<Coder<?>> keyComponentCoders,
+        Coder<V> valueCoder) {
+      checkNotNull(keyComponentCoders);
+      checkArgument(keyComponentCoders.size() > 0);
+      checkArgument(numberOfShardKeyCoders > 0);
+      checkArgument(numberOfShardKeyCoders <= keyComponentCoders.size());
+      checkArgument(numberOfMetadataShardKeyCoders <= keyComponentCoders.size());
+      return new IsmRecordCoder<>(
+          numberOfShardKeyCoders,
+          numberOfMetadataShardKeyCoders,
+          keyComponentCoders,
+          valueCoder);
+    }
+
+    /**
+     * Returns an IsmRecordCoder with the specified coders. Note that this method is not meant
+     * to be called by users but used by Jackson when decoding this coder.
+     */
+    @JsonCreator
+    public static IsmRecordCoder<?> of(
+        @JsonProperty(PropertyNames.NUM_SHARD_CODERS) int numberOfShardCoders,
+        @JsonProperty(PropertyNames.NUM_METADATA_SHARD_CODERS) int numberOfMetadataShardCoders,
+        @JsonProperty(PropertyNames.COMPONENT_ENCODINGS) List<Coder<?>> components) {
+      Preconditions.checkArgument(components.size() >= 2,
+          "Expecting at least 2 components, got " + components.size());
+      return of(
+          numberOfShardCoders,
+          numberOfMetadataShardCoders,
+          components.subList(0, components.size() - 1),
+          components.get(components.size() - 1));
+    }
+
+    private final int numberOfShardKeyCoders;
+    private final int numberOfMetadataShardKeyCoders;
+    private final List<Coder<?>> keyComponentCoders;
+    private final Coder<V> valueCoder;
+
+    private IsmRecordCoder(
+        int numberOfShardKeyCoders,
+        int numberOfMetadataShardKeyCoders,
+        List<Coder<?>> keyComponentCoders, Coder<V> valueCoder) {
+      this.numberOfShardKeyCoders = numberOfShardKeyCoders;
+      this.numberOfMetadataShardKeyCoders = numberOfMetadataShardKeyCoders;
+      this.keyComponentCoders = keyComponentCoders;
+      this.valueCoder = valueCoder;
+    }
+
+    /** Returns the list of key component coders. */
+    public List<Coder<?>> getKeyComponentCoders() {
+      return keyComponentCoders;
+    }
+
+    /** Returns the key coder at the specified index. */
+    public Coder getKeyComponentCoder(int index) {
+      return keyComponentCoders.get(index);
+    }
+
+    /** Returns the value coder. */
+    public Coder<V> getValueCoder() {
+      return valueCoder;
+    }
+
+    @Override
+    public void encode(IsmRecord<V> value, OutputStream outStream,
+        Coder.Context context) throws CoderException, IOException {
+      if (value.getKeyComponents().size() != keyComponentCoders.size()) {
+        throw new CoderException(String.format(
+            "Expected %s key component(s) but received key component(s) %s.",
+            keyComponentCoders.size(), value.getKeyComponents()));
+      }
+      for (int i = 0; i < keyComponentCoders.size(); ++i) {
+        getKeyComponentCoder(i).encode(value.getKeyComponent(i), outStream, context.nested());
+      }
+      if (isMetadataKey(value.getKeyComponents())) {
+        ByteArrayCoder.of().encode(value.getMetadata(), outStream, context.nested());
+      } else {
+        valueCoder.encode(value.getValue(), outStream, context.nested());
+      }
+    }
+
+    @Override
+    public IsmRecord<V> decode(InputStream inStream, Coder.Context context)
+        throws CoderException, IOException {
+      List<Object> keyComponents = new ArrayList<>(keyComponentCoders.size());
+      for (Coder<?> keyCoder : keyComponentCoders) {
+        keyComponents.add(keyCoder.decode(inStream, context.nested()));
+      }
+      if (isMetadataKey(keyComponents)) {
+        return IsmRecord.<V>meta(
+            keyComponents, ByteArrayCoder.of().decode(inStream, context.nested()));
+      } else {
+        return IsmRecord.<V>of(keyComponents, valueCoder.decode(inStream, context.nested()));
+      }
+    }
+
+    int getNumberOfShardKeyCoders(List<?> keyComponents) {
+      if (isMetadataKey(keyComponents)) {
+        return numberOfMetadataShardKeyCoders;
+      } else {
+        return numberOfShardKeyCoders;
+      }
+    }
+
+    /**
+     * Computes the shard id for the given key component(s).
+     *
+     * The shard keys are encoded into their byte representations and hashed using the
+     * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">
+     * 32-bit murmur3 algorithm, x86 variant</a> (little-endian variant),
+     * using {@code 1225801234} as the seed value. We ensure that shard ids for
+     * metadata keys and normal keys do not overlap.
+     */
+    public <V, T> int hash(List<?> keyComponents) {
+      return encodeAndHash(keyComponents, new RandomAccessData(), new ArrayList<Integer>());
+    }
+
+    /**
+     * Computes the shard id for the given key component(s).
+     *
+     * Mutates {@code keyBytes} such that when returned, contains the encoded
+     * version of the key components.
+     */
+    <V, T> int encodeAndHash(List<?> keyComponents, RandomAccessData keyBytesToMutate) {
+      return encodeAndHash(keyComponents, keyBytesToMutate, new ArrayList<Integer>());
+    }
+
+    /**
+     * Computes the shard id for the given key component(s).
+     *
+     * Mutates {@code keyBytes} such that when returned, contains the encoded
+     * version of the key components. Also, mutates {@code keyComponentByteOffsetsToMutate} to
+     * store the location where each key component's encoded byte representation ends within
+     * {@code keyBytes}.
+     */
+    <V, T> int encodeAndHash(
+        List<?> keyComponents,
+        RandomAccessData keyBytesToMutate,
+        List<Integer> keyComponentByteOffsetsToMutate) {
+      checkNotNull(keyComponents);
+      checkArgument(keyComponents.size() <= keyComponentCoders.size(),
+          "Expected at most %s key component(s) but received %s.",
+          keyComponentCoders.size(), keyComponents);
+
+      final int numberOfKeyCodersToUse;
+      final int shardOffset;
+      if (isMetadataKey(keyComponents)) {
+        numberOfKeyCodersToUse = numberOfMetadataShardKeyCoders;
+        shardOffset = SHARD_BITS + 1;
+      } else {
+        numberOfKeyCodersToUse = numberOfShardKeyCoders;
+        shardOffset = 0;
+      }
+
+      checkArgument(numberOfKeyCodersToUse <= keyComponents.size(),
+          "Expected at least %s key component(s) but received %s.",
+          numberOfShardKeyCoders, keyComponents);
+
+      try {
+        // Encode the shard portion
+        for (int i = 0; i < numberOfKeyCodersToUse; ++i) {
+          getKeyComponentCoder(i).encode(
+              keyComponents.get(i), keyBytesToMutate.asOutputStream(), Context.NESTED);
+          keyComponentByteOffsetsToMutate.add(keyBytesToMutate.size());
+        }
+        int rval = HASH_FUNCTION.hashBytes(
+            keyBytesToMutate.array(), 0, keyBytesToMutate.size()).asInt() & SHARD_BITS;
+        rval += shardOffset;
+
+        // Encode the remainder
+        for (int i = numberOfKeyCodersToUse; i < keyComponents.size(); ++i) {
+          getKeyComponentCoder(i).encode(
+              keyComponents.get(i), keyBytesToMutate.asOutputStream(), Context.NESTED);
+          keyComponentByteOffsetsToMutate.add(keyBytesToMutate.size());
+        }
+        return rval;
+      } catch (IOException e) {
+        throw new IllegalStateException(
+            String.format("Failed to hash %s with coder %s", keyComponents, this), e);
+      }
+    }
+
+    @Override
+    public List<Coder<?>> getCoderArguments() {
+      return ImmutableList.<Coder<?>>builder()
+          .addAll(keyComponentCoders)
+          .add(valueCoder)
+          .build();
+    }
+
+    @Override
+    public CloudObject asCloudObject() {
+      CloudObject cloudObject = super.asCloudObject();
+      addLong(cloudObject, PropertyNames.NUM_SHARD_CODERS, numberOfShardKeyCoders);
+      addLong(cloudObject, PropertyNames.NUM_METADATA_SHARD_CODERS, numberOfMetadataShardKeyCoders);
+      return cloudObject;
+    }
+
+    @Override
+    public void verifyDeterministic() throws Coder.NonDeterministicException {
+      verifyDeterministic("Key component coders expected to be deterministic.", keyComponentCoders);
+      verifyDeterministic("Value coder expected to be deterministic.", valueCoder);
+    }
+
+    @Override
+    public boolean consistentWithEquals() {
+      for (Coder<?> keyComponentCoder : keyComponentCoders) {
+        if (!keyComponentCoder.consistentWithEquals()) {
+          return false;
+        }
+      }
+      return valueCoder.consistentWithEquals();
+    }
+
+    @Override
+    public Object structuralValue(IsmRecord<V> record) throws Exception {
+      checkState(record.getKeyComponents().size() == keyComponentCoders.size(),
+          "Expected the number of key component coders %s "
+          + "to match the number of key components %s.",
+          keyComponentCoders.size(), record.getKeyComponents());
+
+      if (record != null && consistentWithEquals()) {
+        ArrayList<Object> keyComponentStructuralValues = new ArrayList<>();
+        for (int i = 0; i < keyComponentCoders.size(); ++i) {
+          keyComponentStructuralValues.add(
+              getKeyComponentCoder(i).structuralValue(record.getKeyComponent(i)));
+        }
+        if (isMetadataKey(record.getKeyComponents())) {
+          return IsmRecord.meta(keyComponentStructuralValues, record.getMetadata());
+        } else {
+          return IsmRecord.of(keyComponentStructuralValues,
+              valueCoder.structuralValue(record.getValue()));
+        }
+      }
+      return super.structuralValue(record);
+    }
+  }
+
+  /** Returns true if and only if any of the passed in key components represent a metadata key. */
+  public static boolean isMetadataKey(List<?> keyComponents) {
+    for (Object keyComponent : keyComponents) {
+      if (keyComponent == METADATA_KEY) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /** A marker object representing the wildcard metadata key component. */
+  private static final Object METADATA_KEY = new Object() {
+    @Override
+    public String toString() {
+      return "META";
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      return this == obj;
+    }
+
+    @Override
+    public int hashCode() {
+      return -1248902349;
+    }
+  };
+
+  /**
+   * An object representing a wild card for a key component.
+   * Encoded using {@link MetadataKeyCoder}.
+   */
+  public static Object getMetadataKey() {
+    return METADATA_KEY;
+  }
+
+  /**
+   * A coder for metadata key component. Can be used to wrap key component coder allowing for
+   * the metadata key component to be used as a place holder instead of an actual key.
+   */
+  public static class MetadataKeyCoder<K> extends StandardCoder<K> {
+    public static <K> MetadataKeyCoder<K> of(Coder<K> keyCoder) {
+      checkNotNull(keyCoder);
+      return new MetadataKeyCoder<>(keyCoder);
+    }
+
+    /**
+     * Returns an IsmRecordCoder with the specified coders. Note that this method is not meant
+     * to be called by users but used by Jackson when decoding this coder.
+     */
+    @JsonCreator
+    public static MetadataKeyCoder<?> of(
+        @JsonProperty(PropertyNames.COMPONENT_ENCODINGS) List<Coder<?>> components) {
+      Preconditions.checkArgument(components.size() == 1,
+          "Expecting one component, got " + components.size());
+      return of(components.get(0));
+    }
+
+    private final Coder<K> keyCoder;
+
+    private MetadataKeyCoder(Coder<K> keyCoder) {
+      this.keyCoder = keyCoder;
+    }
+
+    public Coder<K> getKeyCoder() {
+      return keyCoder;
+    }
+
+    @Override
+    public void encode(K value, OutputStream outStream, Coder.Context context)
+        throws CoderException, IOException {
+      if (value == METADATA_KEY) {
+        outStream.write(0);
+      } else {
+        outStream.write(1);
+        keyCoder.encode(value, outStream, context.nested());
+      }
+    }
+
+    @Override
+    public K decode(InputStream inStream, Coder.Context context)
+        throws CoderException, IOException {
+      int marker = inStream.read();
+      if (marker == 0) {
+        return (K) getMetadataKey();
+      } else if (marker == 1) {
+        return keyCoder.decode(inStream, context.nested());
+      } else {
+        throw new CoderException(String.format("Expected marker but got %s.", marker));
+      }
+    }
+
+    @Override
+    public List<Coder<?>> getCoderArguments() {
+      return ImmutableList.<Coder<?>>of(keyCoder);
+    }
+
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException {
+      verifyDeterministic("Expected key coder to be deterministic", keyCoder);
+    }
+  }
+
+  /**
+   * A shard descriptor containing shard id, the data block offset, and the index offset for the
+   * given shard.
+   */
+  public static class IsmShard {
+    private final int id;
+    private final long blockOffset;
+    private final long indexOffset;
+
+    /** Returns an IsmShard with the given id, block offset and no index offset. */
+    public static IsmShard of(int id, long blockOffset) {
+      IsmShard ismShard = new IsmShard(id, blockOffset, -1);
+      checkState(id >= 0,
+          "%s attempting to be written with negative shard id.",
+          ismShard);
+      checkState(blockOffset >= 0,
+          "%s attempting to be written with negative block offset.",
+          ismShard);
+      return ismShard;
+    }
+
+    /** Returns an IsmShard with the given id, block offset, and index offset. */
+    public static IsmShard of(int id, long blockOffset, long indexOffset) {
+      IsmShard ismShard = new IsmShard(id, blockOffset, indexOffset);
+      checkState(id >= 0,
+          "%s attempting to be written with negative shard id.",
+          ismShard);
+      checkState(blockOffset >= 0,
+          "%s attempting to be written with negative block offset.",
+          ismShard);
+      checkState(indexOffset >= 0,
+          "%s attempting to be written with negative index offset.",
+          ismShard);
+      return ismShard;
+    }
+
+    private IsmShard(int id, long blockOffset, long indexOffset) {
+      this.id = id;
+      this.blockOffset = blockOffset;
+      this.indexOffset = indexOffset;
+    }
+
+    /** Return the shard id. */
+    public int getId() {
+      return id;
+    }
+
+    /** Return the absolute position within the Ism file where the data block begins. */
+    public long getBlockOffset() {
+      return blockOffset;
+    }
+
+    /**
+     * Return the absolute position within the Ism file where the index block begins.
+     * Throws {@link IllegalStateException} if the index offset was never specified.
+     */
+    public long getIndexOffset() {
+      checkState(indexOffset >= 0,
+            "Unable to fetch index offset because it was never specified.");
+      return indexOffset;
+    }
+
+    /** Returns a new IsmShard like this one with the specified index offset. */
+    public IsmShard withIndexOffset(long indexOffset) {
+      return of(id, blockOffset, indexOffset);
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(IsmShard.class)
+          .add("id", id)
+          .add("blockOffset", blockOffset)
+          .add("indexOffset", indexOffset)
+          .toString();
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (!(obj instanceof IsmShard)) {
+        return false;
+      }
+      IsmShard other = (IsmShard) obj;
+      return Objects.equal(id, other.id)
+          && Objects.equal(blockOffset, other.blockOffset)
+          && Objects.equal(indexOffset, other.indexOffset);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(id, blockOffset, indexOffset);
+    }
+  }
+
+  /**
+   * A {@link ListCoder} wrapping a {@link IsmShardCoder} used to encode the shard index.
+   * See {@link ListCoder} for its encoding specification and {@link IsmShardCoder} for its
+   * encoding specification.
+   */
+  public static final Coder<List<IsmShard>> ISM_SHARD_INDEX_CODER =
+      ListCoder.of(IsmShardCoder.of());
+
+  /**
+   * A coder for {@link IsmShard}s.
+   *
+   * The shard descriptor is encoded as:
+   * <ul>
+   *   <li>id (variable length integer encoding)</li>
+   *   <li>blockOffset (variable length long encoding)</li>
+   *   <li>indexOffset (variable length long encoding)</li>
+   * </ul>
+   */
+  public static class IsmShardCoder extends AtomicCoder<IsmShard> {
+    private static final IsmShardCoder INSTANCE = new IsmShardCoder();
+
+    /** Returns an IsmShardCoder. */
+    @JsonCreator
+    public static IsmShardCoder of() {
+      return INSTANCE;
+    }
+
+    private IsmShardCoder() {
+    }
+
+    @Override
+    public void encode(IsmShard value, OutputStream outStream, Coder.Context context)
+        throws CoderException, IOException {
+      checkState(value.getIndexOffset() >= 0,
+          "%s attempting to be written without index offset.",
+          value);
+      VarIntCoder.of().encode(value.getId(), outStream, context.nested());
+      VarLongCoder.of().encode(value.getBlockOffset(), outStream, context.nested());
+      VarLongCoder.of().encode(value.getIndexOffset(), outStream, context.nested());
+    }
+
+    @Override
+    public IsmShard decode(
+        InputStream inStream, Coder.Context context) throws CoderException, IOException {
+      return IsmShard.of(
+          VarIntCoder.of().decode(inStream, context),
+          VarLongCoder.of().decode(inStream, context),
+          VarLongCoder.of().decode(inStream, context));
+    }
+
+    @Override
+    public boolean consistentWithEquals() {
+      return true;
+    }
+  }
+
   /**
    * The prefix used before each key which contains the number of shared and unshared
    * bytes from the previous key that was read. The key prefix along with the previous key
@@ -90,7 +742,7 @@ public int getUnsharedKeySize() {
 
     @Override
     public int hashCode() {
-      return Objects.hash(sharedKeySize, unsharedKeySize);
+      return Objects.hashCode(sharedKeySize, unsharedKeySize);
     }
 
     @Override
@@ -162,15 +814,15 @@ protected long getEncodedElementByteSize(KeyPrefix value, Coder.Context context)
    * <p>The footer is encoded as the value containing:
    * <ul>
    *   <li>start of bloom filter offset (big endian long coding)</li>
-   *   <li>start of index position offset (big endian long coding)</li>
+   *   <li>start of shard index position offset (big endian long coding)</li>
    *   <li>number of keys in file (big endian long coding)</li>
    *   <li>0x01 (version key as a single byte)</li>
    * </ul>
    */
   static class Footer {
     static final int LONG_BYTES = 8;
-    static final long FIXED_LENGTH = 3 * LONG_BYTES + 1;
-    static final byte VERSION = 1;
+    static final int FIXED_LENGTH = 3 * LONG_BYTES + 1;
+    static final byte VERSION = 2;
 
     private final long indexPosition;
     private final long bloomFilterPosition;
@@ -210,13 +862,13 @@ public boolean equals(Object other) {
 
     @Override
     public int hashCode() {
-      return Objects.hash(indexPosition, bloomFilterPosition, numberOfKeys);
+      return Objects.hashCode(indexPosition, bloomFilterPosition, numberOfKeys);
     }
 
     @Override
     public String toString() {
       return MoreObjects.toStringHelper(this)
-          .add("version", 1)
+          .add("version", Footer.VERSION)
           .add("indexPosition", indexPosition)
           .add("bloomFilterPosition", bloomFilterPosition)
           .add("numberOfKeys", numberOfKeys)
@@ -251,7 +903,7 @@ public Footer decode(InputStream inStream, Coder.Context context)
       int version = dataIn.read();
       if (version != Footer.VERSION) {
         throw new IOException("Unknown version " + version + ". "
-            + "Only version 0x01 is currently supported.");
+            + "Only version 2 is currently supported.");
       }
       return footer;
     }
@@ -273,4 +925,3 @@ protected long getEncodedElementByteSize(Footer value, Coder.Context context)
     }
   }
 }
-
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormatTest.java
index 3a540b6192450..f03a2e9887a41 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormatTest.java
@@ -15,15 +15,29 @@
  */
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static org.hamcrest.Matchers.allOf;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.not;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
+import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.Coder.Context;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.Footer;
 import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.FooterCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecord;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecordCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmShard;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmShardCoder;
 import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.KeyPrefix;
 import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.KeyPrefixCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.MetadataKeyCoder;
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.common.collect.ImmutableList;
 
 import org.junit.Rule;
 import org.junit.Test;
@@ -32,6 +46,7 @@
 import org.junit.runners.JUnit4;
 
 import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 
 /**
@@ -72,6 +87,205 @@ public void testFooterCoder() throws Exception {
     assertEquals(25, FooterCoder.of().getEncodedElementByteSize(footerA, Context.OUTER));
   }
 
+  @Test
+  public void testNormalIsmRecordWithMetadataKeyIsError() throws Exception {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Expected key components to not contain metadata key");
+    IsmRecord.of(ImmutableList.of(IsmFormat.getMetadataKey()), "test");
+  }
+
+  @Test
+  public void testMetadataIsmRecordWithoutMetadataKeyIsError() throws Exception {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Expected key components to contain metadata key");
+    IsmRecord.meta(ImmutableList.of("non-metadata key"), "test".getBytes());
+  }
+
+  @Test
+  public void testIsmRecordCoder() throws Exception {
+    IsmRecord<String> ismRecordA = IsmRecord.of(ImmutableList.of("0"), "1");
+    IsmRecord<String> ismRecordB = IsmRecord.of(ImmutableList.of("0"), "1");
+    IsmRecord<String> ismMetaRecordA =
+        IsmRecord.meta(ImmutableList.of(IsmFormat.getMetadataKey()), "2".getBytes());
+    IsmRecord<String> ismMetaRecordB =
+        IsmRecord.meta(ImmutableList.of(IsmFormat.getMetadataKey()), "2".getBytes());
+    IsmRecordCoder<String> coder =
+        IsmRecordCoder.of(
+            1,
+            0,
+            ImmutableList.<Coder<?>>of(StringUtf8Coder.of()),
+            StringUtf8Coder.of());
+    IsmRecordCoder<String> coderWithMetadata =
+        IsmRecordCoder.of(
+            1,
+            1,
+            ImmutableList.<Coder<?>>of(MetadataKeyCoder.of(StringUtf8Coder.of())),
+            StringUtf8Coder.of());
+
+    // Non-metadata records against coder without metadata key support
+    CoderProperties.coderDecodeEncodeEqual(coder, ismRecordA);
+    CoderProperties.coderDeterministic(coder, ismRecordA, ismRecordB);
+    CoderProperties.coderConsistentWithEquals(coder, ismRecordA, ismRecordB);
+    CoderProperties.coderSerializable(coder);
+    CoderProperties.structuralValueConsistentWithEquals(coder, ismRecordA, ismRecordB);
+
+    // Non-metadata records against coder with metadata key support
+    CoderProperties.coderDecodeEncodeEqual(coderWithMetadata, ismRecordA);
+    CoderProperties.coderDeterministic(coderWithMetadata, ismRecordA, ismRecordB);
+    CoderProperties.coderConsistentWithEquals(coderWithMetadata, ismRecordA, ismRecordB);
+    CoderProperties.coderSerializable(coderWithMetadata);
+    CoderProperties.structuralValueConsistentWithEquals(coderWithMetadata, ismRecordA, ismRecordB);
+
+    // Metadata records
+    CoderProperties.coderDecodeEncodeEqual(coderWithMetadata, ismMetaRecordA);
+    CoderProperties.coderDeterministic(coderWithMetadata, ismMetaRecordA, ismMetaRecordB);
+    CoderProperties.coderConsistentWithEquals(
+        coderWithMetadata, ismMetaRecordA, ismMetaRecordB);
+    CoderProperties.coderSerializable(coderWithMetadata);
+    CoderProperties.structuralValueConsistentWithEquals(
+        coderWithMetadata, ismMetaRecordA, ismMetaRecordB);
+  }
+
+  @Test
+  public void testIsmRecordCoderHashWithinExpectedRanges() throws Exception {
+    IsmRecordCoder<String> coder =
+        IsmRecordCoder.of(
+            2,
+            0,
+            ImmutableList.<Coder<?>>of(StringUtf8Coder.of(), StringUtf8Coder.of()),
+            StringUtf8Coder.of());
+    IsmRecordCoder<String> coderWithMetadata =
+        IsmRecordCoder.of(
+            2,
+            2,
+            ImmutableList.<Coder<?>>of(
+                MetadataKeyCoder.of(StringUtf8Coder.of()), StringUtf8Coder.of()),
+            StringUtf8Coder.of());
+
+    assertTrue(coder.hash(ImmutableList.of("A", "B")) < IsmFormat.SHARD_BITS + 1);
+    int hash = coderWithMetadata.hash(ImmutableList.of(IsmFormat.getMetadataKey(), "B"));
+    assertTrue(hash > IsmFormat.SHARD_BITS && hash < (IsmFormat.SHARD_BITS + 1) * 2);
+  }
+
+  @Test
+  public void testIsmRecordCoderWithTooManyKeysIsError() throws Exception {
+    IsmRecordCoder<String> coder =
+        IsmRecordCoder.of(
+            2,
+            0,
+            ImmutableList.<Coder<?>>of(StringUtf8Coder.of(), StringUtf8Coder.of()),
+            StringUtf8Coder.of());
+
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Expected at most");
+    coder.hash(ImmutableList.of("A", "B", "C"));
+  }
+
+  @Test
+  public void testIsmRecordCoderHashWithoutEnoughKeysIsError() throws Exception {
+    IsmRecordCoder<String> coder =
+        IsmRecordCoder.of(
+            2,
+            0,
+            ImmutableList.<Coder<?>>of(StringUtf8Coder.of(), StringUtf8Coder.of()),
+            StringUtf8Coder.of());
+
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Expected at least");
+    coder.hash(ImmutableList.of("A"));
+  }
+
+  @Test
+  public void testIsmRecordCoderMetadataHashWithoutEnoughKeysIsError() throws Exception {
+    IsmRecordCoder<String> coderWithMetadata =
+        IsmRecordCoder.of(
+            2,
+            2,
+            ImmutableList.<Coder<?>>of(
+                MetadataKeyCoder.of(StringUtf8Coder.of()), StringUtf8Coder.of()),
+            StringUtf8Coder.of());
+
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("Expected at least");
+    coderWithMetadata.hash(ImmutableList.of(IsmFormat.getMetadataKey()));
+  }
+
+
+  @Test
+  public void testIsmRecordCoderKeyCoderCountMismatch() throws Exception {
+    IsmRecord<String> ismRecord = IsmRecord.of(ImmutableList.of("0", "too many"), "1");
+    IsmRecordCoder<String> coder =
+        IsmRecordCoder.of(
+            1,
+            0,
+            ImmutableList.<Coder<?>>of(StringUtf8Coder.of()),
+            StringUtf8Coder.of());
+
+    expectedException.expect(CoderException.class);
+    expectedException.expectMessage("Expected 1 key component(s) but received key");
+    coder.encode(ismRecord, new ByteArrayOutputStream(), Context.NESTED);
+  }
+
+  @Test
+  public void testIsmRecordToStringEqualsAndHashCode() {
+    IsmRecord<String> ismRecordA = IsmRecord.of(ImmutableList.of("0"), "1");
+    IsmRecord<String> ismRecordB = IsmRecord.of(ImmutableList.of("0"), "1");
+    IsmRecord<String> ismRecordC = IsmRecord.of(ImmutableList.of("3"), "4");
+    IsmRecord<String> ismRecordAWithMeta =
+        IsmRecord.meta(ImmutableList.of(IsmFormat.getMetadataKey(), "0"), "2".getBytes());
+    IsmRecord<String> ismRecordBWithMeta =
+        IsmRecord.meta(ImmutableList.of(IsmFormat.getMetadataKey(), "0"), "2".getBytes());
+    IsmRecord<String> ismRecordCWithMeta =
+        IsmRecord.meta(ImmutableList.of(IsmFormat.getMetadataKey(), "0"), "5".getBytes());
+
+    assertEquals(ismRecordA, ismRecordB);
+    assertEquals(ismRecordAWithMeta, ismRecordBWithMeta);
+    assertNotEquals(ismRecordA, ismRecordAWithMeta);
+    assertNotEquals(ismRecordA, ismRecordC);
+    assertNotEquals(ismRecordAWithMeta, ismRecordCWithMeta);
+
+    assertEquals(ismRecordA.hashCode(), ismRecordB.hashCode());
+    assertEquals(ismRecordAWithMeta.hashCode(), ismRecordBWithMeta.hashCode());
+    assertNotEquals(ismRecordA.hashCode(), ismRecordAWithMeta.hashCode());
+    assertNotEquals(ismRecordA.hashCode(), ismRecordC.hashCode());
+    assertNotEquals(ismRecordAWithMeta.hashCode(), ismRecordCWithMeta.hashCode());
+
+    assertThat(ismRecordA.toString(), allOf(
+        containsString("keyComponents=[0]"),
+        containsString("value=1"),
+        not(containsString("metadata"))));
+    assertThat(ismRecordAWithMeta.toString(), allOf(
+        containsString("keyComponents=[META, 0]"),
+        containsString("metadata="),
+        not(containsString("value"))));
+  }
+
+  @Test
+  public void testIsmShardCoder() throws Exception {
+    IsmShard shardA = IsmShard.of(1, 2, 3);
+    IsmShard shardB = IsmShard.of(1, 2, 3);
+    CoderProperties.coderDecodeEncodeEqual(IsmShardCoder.of(), shardA);
+    CoderProperties.coderDeterministic(IsmShardCoder.of(), shardA, shardB);
+    CoderProperties.coderConsistentWithEquals(IsmShardCoder.of(), shardA, shardB);
+    CoderProperties.coderSerializable(IsmShardCoder.of());
+    CoderProperties.structuralValueConsistentWithEquals(IsmShardCoder.of(), shardA, shardB);
+  }
+
+  @Test
+  public void testIsmShardToStringEqualsAndHashCode() {
+    IsmShard shardA = IsmShard.of(1, 2, 3);
+    IsmShard shardB = IsmShard.of(1, 2, 3);
+    IsmShard shardC = IsmShard.of(4, 5, 6);
+    assertEquals(shardA, shardB);
+    assertNotEquals(shardA, shardC);
+    assertEquals(shardA.hashCode(), shardB.hashCode());
+    assertNotEquals(shardA.hashCode(), shardC.hashCode());
+    assertThat(shardA.toString(), allOf(
+        containsString("id=1"),
+        containsString("blockOffset=2"),
+        containsString("indexOffset=3")));
+  }
+
   @Test
   public void testUnknownVersion() throws Exception {
     byte[] data = new byte[25];

From 8b4c34e4bed59a775e601381d130e449a4b257f0 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Fri, 29 Jan 2016 11:28:49 -0800
Subject: [PATCH 1359/1541] Upgrade protobuf runtime to version 3.0.0-beta-1

----Release Notes----
Dataflow now uses Protocol Buffers version 3.0.0-beta-1, in order
to support Google Cloud Platform products such as Cloud Datastore
and Cloud Bigtable. This change does not impact older versions of
the SDK, which will continue to work as long as they are supported.

Users whose pipelines do not use additional proto dependencies
should not notice any change. Users that use existing protos, in
proto2 or proto3 syntax, are supported but these users must recompile
with a version of the Protocol Buffer compiler protoc matching the
Dataflow runtime version, 3.0.0-beta-1. User pipeline code does not
need to change.
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113381485
---
 pom.xml     | 4 ++--
 sdk/pom.xml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pom.xml b/pom.xml
index 7d31ee07fbb5e..78b82b6d65b8a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -70,14 +70,14 @@
     <avro.version>1.7.7</avro.version>
     <bigquery.version>v2-rev248-1.21.0</bigquery.version>
     <dataflow.version>v1b3-rev14-1.21.0</dataflow.version>
-    <datastore.version>v1beta2-rev1-3.0.2</datastore.version>
+    <datastore.version>v1beta2-rev1-4.0.0</datastore.version>
     <google-clients.version>1.21.0</google-clients.version>
     <guava.version>19.0</guava.version>
     <hamcrest.version>1.3</hamcrest.version>
     <jackson.version>2.7.0</jackson.version>
     <joda.version>2.4</joda.version>
     <junit.version>4.11</junit.version>
-    <protobuf.version>2.5.0</protobuf.version>
+    <protobuf.version>3.0.0-beta-1</protobuf.version>
     <pubsub.version>v1-rev7-1.21.0</pubsub.version>
     <slf4j.version>1.7.7</slf4j.version>
     <storage.version>v1-rev53-1.21.0</storage.version>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 891bb220d9308..89cd29ee88a68 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -430,7 +430,7 @@
     <dependency>
       <groupId>com.google.cloud.dataflow</groupId>
       <artifactId>google-cloud-dataflow-java-proto-library-all</artifactId>
-      <version>0.4.160120</version>
+      <version>0.5.160127</version>
     </dependency>
 
     <dependency>

From 61853d1e58f1d888b0bcec3989997c4ee158d9c1 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Fri, 29 Jan 2016 15:40:06 -0800
Subject: [PATCH 1360/1541] Refactor LateDataDroppingDoFnRunner

This moves the LateDataDroppingDoFnRunner into util, removes references
to worker code from util, and genericizes the general
GroupAlsoByWindowsDoFn.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113405451
---
 .../sdk/runners/worker/KeyedWorkItems.java    |  36 +-----
 .../sdk/runners/worker/ParDoFnBase.java       |  20 +---
 .../runners/worker/StreamingDoFnRunners.java  |  65 +++++++++++
 .../StreamingGroupAlsoByWindowsDoFn.java      |  63 ++---------
 ...eamingGroupAlsoByWindowsReshuffleDoFn.java |   1 +
 .../worker/WindowingWindmillReader.java       |   3 +-
 .../dataflow/sdk/transforms/DoFnTester.java   |   2 +-
 .../cloud/dataflow/sdk/transforms/ParDo.java  |   2 +-
 .../sdk/util/ComposedKeyedWorkItem.java       |  56 ++++++++++
 .../cloud/dataflow/sdk/util/DoFnRunner.java   |  17 ++-
 .../dataflow/sdk/util/DoFnRunnerBase.java     |   2 +
 .../cloud/dataflow/sdk/util/DoFnRunners.java  | 104 +++++++++++-------
 .../GroupAlsoByWindowViaWindowSetDoFn.java    |  96 ++++++++++++++++
 .../worker => util}/KeyedWorkItem.java        |  16 +--
 .../LateDataDroppingDoFnRunner.java           |  31 +++---
 ...kItemTest.java => KeyedWorkItemsTest.java} |   5 +-
 .../worker/StreamingDataflowWorkerTest.java   |  21 ++--
 .../StreamingGroupAlsoByWindowsDoFnTest.java  |  15 ++-
 ...ngGroupAlsoByWindowsReshuffleDoFnTest.java |   3 +-
 .../StreamingSideInputDoFnRunnerTest.java     |   2 +-
 20 files changed, 373 insertions(+), 187 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDoFnRunners.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ComposedKeyedWorkItem.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/{runners/worker => util}/KeyedWorkItem.java (70%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/{runners/worker => util}/LateDataDroppingDoFnRunner.java (83%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/{KeyedWorkItemTest.java => KeyedWorkItemsTest.java} (98%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItems.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItems.java
index 8311fdcfdec7c..7a9ece1edaafc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItems.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItems.java
@@ -23,6 +23,8 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.Timer;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.util.ComposedKeyedWorkItem;
+import com.google.cloud.dataflow.sdk.util.KeyedWorkItem;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
@@ -79,7 +81,7 @@ public static <K, ElemT> KeyedWorkItem<K, ElemT> windmillWorkItem(
    */
   public static <K, ElemT> KeyedWorkItem<K, ElemT> elementsWorkItem(
       K key, Iterable<WindowedValue<ElemT>> elementsIterable) {
-    return new ComposedKeyedWorkItem<>(key, Collections.<TimerData>emptyList(), elementsIterable);
+    return ComposedKeyedWorkItem.create(key, Collections.<TimerData>emptyList(), elementsIterable);
   }
 
   /**
@@ -90,7 +92,7 @@ public static <K, ElemT> KeyedWorkItem<K, ElemT> elementsWorkItem(
    */
   public static <K, ElemT> KeyedWorkItem<K, ElemT> timersWorkItem(
       K key, Iterable<TimerData> timersIterable) {
-    return new ComposedKeyedWorkItem<>(
+    return ComposedKeyedWorkItem.create(
         key, timersIterable, Collections.<WindowedValue<ElemT>>emptyList());
   }
 
@@ -103,8 +105,7 @@ public static <K, ElemT> KeyedWorkItem<K, ElemT> timersWorkItem(
    */
   public static <K, ElemT> KeyedWorkItem<K, ElemT> workItem(
       K key, Iterable<TimerData> timersIterable, Iterable<WindowedValue<ElemT>> elementsIterable) {
-    return new ComposedKeyedWorkItem<>(
-        key, timersIterable, elementsIterable);
+    return ComposedKeyedWorkItem.create(key, timersIterable, elementsIterable);
   }
 
   private static class WindmillKeyedWorkItem<K, ElemT> implements KeyedWorkItem<K, ElemT> {
@@ -217,33 +218,6 @@ public int hashCode() {
     }
   }
 
-  private static class ComposedKeyedWorkItem<K, ElemT> implements KeyedWorkItem<K, ElemT> {
-    private final K key;
-    private final Iterable<TimerData> timersIterable;
-    private final Iterable<WindowedValue<ElemT>> elementsIterable;
-
-    ComposedKeyedWorkItem(K key, Iterable<TimerData> timersIterable,
-        Iterable<WindowedValue<ElemT>> elementsIterable) {
-      this.key = key;
-      this.timersIterable = timersIterable;
-      this.elementsIterable = elementsIterable;
-    }
-    @Override
-    public K key() {
-      return key;
-    }
-
-    @Override
-    public Iterable<TimerData> timersIterable() {
-      return timersIterable;
-    }
-
-    @Override
-    public Iterable<WindowedValue<ElemT>> elementsIterable() {
-      return elementsIterable;
-    }
-  }
-
   /**
    * Coder that forwards {@code ByteSizeObserver} calls to an underlying element coder.
    * {@code TimerOrElement} objects never need to be encoded, so this class does not
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
index 585cd06ebd2a9..a1a1181498542 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
@@ -21,7 +21,6 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner.RequiresLateDataDropping;
 import com.google.cloud.dataflow.sdk.util.DoFnRunners;
 import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
@@ -179,21 +178,8 @@ public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
       }
     };
 
-    if (doFnInfo.getDoFn() instanceof StreamingGroupAlsoByWindowsDoFn
-        && doFnInfo.getDoFn() instanceof RequiresLateDataDropping) {
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      DoFnRunner<Object, Object> objectFnRunner = DoFnRunners.lateDataDroppingRunner(
-          options,
-          (DoFnInfo) doFnInfo,
-          sideInputReader,
-          outputManager,
-          (TupleTag) mainOutputTag,
-          sideOutputTags,
-          stepContext,
-          addCounterMutator);
-      fnRunner = objectFnRunner;
-    } else if (hasStreamingSideInput) {
-      fnRunner = DoFnRunners.streamingSideInputRunner(
+    if (hasStreamingSideInput) {
+      fnRunner = StreamingDoFnRunners.streamingSideInputRunner(
           options,
           doFnInfo,
           sideInputReader,
@@ -203,7 +189,7 @@ public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
           stepContext,
           addCounterMutator);
     } else {
-      fnRunner = DoFnRunners.simpleRunner(
+      fnRunner = DoFnRunners.createDefault(
           options,
           doFnInfo.getDoFn(),
           sideInputReader,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDoFnRunners.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDoFnRunners.java
new file mode 100644
index 0000000000000..f3c2a859e3f6f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDoFnRunners.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.DoFnInfo;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
+import com.google.cloud.dataflow.sdk.util.SideInputReader;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import java.util.List;
+
+/**
+ * Utility methods for creating {@link DoFnRunner} instances used by streaming Dataflow.
+ */
+public final class StreamingDoFnRunners {
+  private StreamingDoFnRunners() {
+    // Do not instantiate
+  }
+
+  /**
+   * Returns an implementation of {@link DoFnRunner} that handles streaming side inputs.
+   *
+   * <p>It blocks and caches input elements if their side inputs are not ready.
+   */
+  public static <InputT, OutputT> DoFnRunner<InputT, OutputT> streamingSideInputRunner(
+      PipelineOptions options,
+      DoFnInfo<InputT, OutputT> doFnInfo,
+      SideInputReader sideInputReader,
+      OutputManager outputManager,
+      TupleTag<OutputT> mainOutputTag,
+      List<TupleTag<?>> sideOutputTags,
+      StepContext stepContext,
+      CounterSet.AddCounterMutator addCounterMutator) {
+    return new StreamingSideInputDoFnRunner<>(
+        options,
+        doFnInfo,
+        sideInputReader,
+        outputManager,
+        mainOutputTag,
+        sideOutputTags,
+        stepContext,
+        addCounterMutator,
+        doFnInfo.getWindowingStrategy());
+  }
+
+
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
index 78977d51b3bcb..c589ec3762e15 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
@@ -17,18 +17,13 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner.RequiresLateDataDropping;
-import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
-import com.google.cloud.dataflow.sdk.util.ReduceFnRunner;
+import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowViaWindowSetDoFn;
+import com.google.cloud.dataflow.sdk.util.KeyedWorkItem;
 import com.google.cloud.dataflow.sdk.util.SystemDoFnInternal;
 import com.google.cloud.dataflow.sdk.util.SystemReduceFn;
-import com.google.cloud.dataflow.sdk.util.TimerInternals;
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.Preconditions;
@@ -44,64 +39,28 @@
 @SystemDoFnInternal
 public abstract class StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends BoundedWindow>
     extends DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> {
-  protected final Aggregator<Long, Long> droppedDueToClosedWindow = createAggregator(
-      GroupAlsoByWindowsDoFn.DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER, new Sum.SumLongFn());
-  protected final Aggregator<Long, Long> droppedDueToLateness = createAggregator(
-      GroupAlsoByWindowsDoFn.DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
-
   public static <K, InputT, AccumT, OutputT, W extends BoundedWindow>
-      StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W> create(
+      DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> create(
           final WindowingStrategy<?, W> windowingStrategy,
           final AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn,
           final Coder<K> keyCoder) {
     Preconditions.checkNotNull(combineFn);
-    return new StreamingGABWViaWindowSetDoFn<>(windowingStrategy,
-        SystemReduceFn.<K, InputT, AccumT, OutputT, W>combining(keyCoder, combineFn));
+    DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> fn =
+        GroupAlsoByWindowViaWindowSetDoFn.create(
+            windowingStrategy,
+            SystemReduceFn.<K, InputT, AccumT, OutputT, W>combining(keyCoder, combineFn));
+    return fn;
   }
 
   public static <K, V, W extends BoundedWindow>
-  DoFn<KeyedWorkItem<K, V>, KV<K, Iterable<V>>> createForIterable(
-      final WindowingStrategy<?, W> windowingStrategy,
-      final Coder<V> inputCoder) {
+      DoFn<KeyedWorkItem<K, V>, KV<K, Iterable<V>>> createForIterable(
+          final WindowingStrategy<?, W> windowingStrategy, final Coder<V> inputCoder) {
     // If the windowing strategy indicates we're doing a reshuffle, use the special-path.
     if (StreamingGroupAlsoByWindowsReshuffleDoFn.isReshuffle(windowingStrategy)) {
       return new StreamingGroupAlsoByWindowsReshuffleDoFn<>();
     } else {
-      return new StreamingGABWViaWindowSetDoFn<>(
+      return GroupAlsoByWindowViaWindowSetDoFn.create(
           windowingStrategy, SystemReduceFn.<K, V, W>buffering(inputCoder));
     }
   }
-
-  private static class StreamingGABWViaWindowSetDoFn<K, InputT, OutputT, W extends BoundedWindow>
-  extends StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W>
-  implements RequiresLateDataDropping {
-    private final WindowingStrategy<Object, W> windowingStrategy;
-    private SystemReduceFn.Factory<K, InputT, OutputT, W> reduceFnFactory;
-
-    public StreamingGABWViaWindowSetDoFn(WindowingStrategy<?, W> windowingStrategy,
-        SystemReduceFn.Factory<K, InputT, OutputT, W> reduceFnFactory) {
-      @SuppressWarnings("unchecked")
-      WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
-      this.windowingStrategy = noWildcard;
-      this.reduceFnFactory = reduceFnFactory;
-    }
-
-    @Override
-    public void processElement(ProcessContext c) throws Exception {
-      KeyedWorkItem<K, InputT> element = c.element();
-
-      @SuppressWarnings("unchecked")
-      K key = c.element().key();
-      TimerInternals timerInternals = c.windowingInternals().timerInternals();
-      ReduceFnRunner<K, InputT, OutputT, W> runner = new ReduceFnRunner<>(
-            key, windowingStrategy, timerInternals, c.windowingInternals(),
-            droppedDueToClosedWindow, reduceFnFactory.create(key));
-
-      for (TimerData timer : element.timersIterable()) {
-        runner.onTimer(timer);
-      }
-      runner.processElements(element.elementsIterable());
-      runner.persist();
-    }
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFn.java
index 5bb5d64712291..ddd9b00bd1444 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFn.java
@@ -16,6 +16,7 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.util.KeyedWorkItem;
 import com.google.cloud.dataflow.sdk.util.ReshuffleTrigger;
 import com.google.cloud.dataflow.sdk.util.SystemDoFnInternal;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
index 36f7a03ece96a..5cff492296fca 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
@@ -24,6 +24,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.KeyedWorkItem;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -99,7 +100,7 @@ public NativeReaderIterator<WindowedValue<KeyedWorkItem<K, T>>> iterator() throw
     final WorkItem workItem = context.getWork();
     final WindowedValue<KeyedWorkItem<K, T>> value =
         WindowedValue.valueInEmptyWindows(
-            KeyedWorkItems.windmillWorkItem(
+            KeyedWorkItems.<K, T>windmillWorkItem(
                 key, workItem, windowCoder, windowsCoder, kvCoder.getValueCoder()));
 
     return new NativeReaderIterator<WindowedValue<KeyedWorkItem<K, T>>>() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
index 02980713b3818..544766433c5b4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
@@ -481,7 +481,7 @@ void initializeState() {
       runnerSideInputs = runnerSideInputs.and(entry.getKey().getTagInternal(), entry.getValue());
     }
     outputManager = new DoFnRunnerBase.ListOutputManager();
-    fnRunner = DoFnRunners.simpleRunner(
+    fnRunner = DoFnRunners.createDefault(
         options,
         fn,
         DirectSideInputReader.of(runnerSideInputs),
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index 15dd0abd52387..f0ed484258a29 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -1144,7 +1144,7 @@ private static <InputT, OutputT, ActualInputT extends InputT> void evaluateHelpe
             outputs);
 
     DoFnRunner<InputT, OutputT> fnRunner =
-        DoFnRunners.simpleRunner(
+        DoFnRunners.createDefault(
             context.getPipelineOptions(),
             fn,
             sideInputReader,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ComposedKeyedWorkItem.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ComposedKeyedWorkItem.java
new file mode 100644
index 0000000000000..25dd6a7dd08a1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ComposedKeyedWorkItem.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
+
+/**
+ * A {@link KeyedWorkItem} composed of an underlying key, {@link TimerData} iterable, and element
+ * iterable.
+ */
+public class ComposedKeyedWorkItem<K, ElemT> implements KeyedWorkItem<K, ElemT> {
+
+  private final K key;
+  private final Iterable<TimerData> timers;
+  private final Iterable<WindowedValue<ElemT>> elements;
+
+  public static <K, ElemT> ComposedKeyedWorkItem<K, ElemT> create(
+      K key, Iterable<TimerData> timers, Iterable<WindowedValue<ElemT>> elements) {
+    return new ComposedKeyedWorkItem<K, ElemT>(key, timers, elements);
+  }
+
+  private ComposedKeyedWorkItem(
+      K key, Iterable<TimerData> timers, Iterable<WindowedValue<ElemT>> elements) {
+    this.key = key;
+    this.timers = timers;
+    this.elements = elements;
+  }
+
+  @Override
+  public K key() {
+    return key;
+  }
+
+  @Override
+  public Iterable<TimerData> timersIterable() {
+    return timers;
+  }
+
+  @Override
+  public Iterable<WindowedValue<ElemT>> elementsIterable() {
+    return elements;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
index 3cc78f0999ac9..51c3f39584cbc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
@@ -15,8 +15,10 @@
  */
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.ProcessContext;
+import com.google.cloud.dataflow.sdk.values.KV;
 
 /**
  * An wrapper interface that represents the execution of a {@link DoFn}.
@@ -41,5 +43,18 @@ public interface DoFnRunner<InputT, OutputT> {
   /**
    * An internal interface for signaling that a {@link DoFn} requires late data dropping.
    */
-  public interface RequiresLateDataDropping {}
+  public interface ReduceFnExecutor<K, InputT, OutputT, W> {
+    /**
+     * Gets this object as a {@link DoFn}.
+     *
+     * Most implementors of this interface are expected to be {@link DoFn} instances, and will
+     * return themselves.
+     */
+    DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> asDoFn();
+
+    /**
+     * Returns an aggregator that tracks elements that are dropped due to being late.
+     */
+    Aggregator<Long, Long> getDroppedDueToLatenessAggregator();
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java
index d4954e298a3cd..de2844c7ff4ae 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java
@@ -135,6 +135,8 @@ public void startBundle() {
 
   @Override
   public void processElement(WindowedValue<InputT> elem) {
+    // TODO: Move the DataflowWorkerLoggingMDC into wrapper used on the Dataflow worker in
+    // invokeProcessElement
     // Setup new thread local logging before running user code, and restore it after.
     // Needs to happen here (per-element) since fusion may be running this as part of a call to
     // output in an earlier step.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunners.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunners.java
index a65192a23d05e..d78b45a09abe0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunners.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunners.java
@@ -16,13 +16,12 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.worker.KeyedWorkItem;
-import com.google.cloud.dataflow.sdk.runners.worker.LateDataDroppingDoFnRunner;
-import com.google.cloud.dataflow.sdk.runners.worker.StreamingGroupAlsoByWindowsDoFn;
-import com.google.cloud.dataflow.sdk.runners.worker.StreamingSideInputDoFnRunner;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner.ReduceFnExecutor;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
@@ -47,7 +46,7 @@ public interface OutputManager {
    *
    * <p>It invokes {@link DoFn#processElement} for each input.
    */
-  public static <InputT, OutputT> DoFnRunner<InputT, OutputT> simpleRunner(
+  static <InputT, OutputT> DoFnRunner<InputT, OutputT> simpleRunner(
       PipelineOptions options,
       DoFn<InputT, OutputT> fn,
       SideInputReader sideInputReader,
@@ -58,27 +57,15 @@ public static <InputT, OutputT> DoFnRunner<InputT, OutputT> simpleRunner(
       CounterSet.AddCounterMutator addCounterMutator,
       WindowingStrategy<?, ?> windowingStrategy) {
     return new SimpleDoFnRunner<>(
-        options, fn, sideInputReader, outputManager, mainOutputTag, sideOutputTags,
-        stepContext, addCounterMutator, windowingStrategy);
-  }
-
-  /**
-   * Returns an implementation of {@link DoFnRunner} that handles streaming side inputs.
-   *
-   * <p>It blocks and caches input elements if their side inputs are not ready.
-   */
-  public static <InputT, OutputT> DoFnRunner<InputT, OutputT> streamingSideInputRunner(
-      PipelineOptions options,
-      DoFnInfo<InputT, OutputT> doFnInfo,
-      SideInputReader sideInputReader,
-      OutputManager outputManager,
-      TupleTag<OutputT> mainOutputTag,
-      List<TupleTag<?>> sideOutputTags,
-      StepContext stepContext,
-      CounterSet.AddCounterMutator addCounterMutator) {
-  return new StreamingSideInputDoFnRunner<>(
-      options, doFnInfo, sideInputReader, outputManager, mainOutputTag, sideOutputTags,
-      stepContext, addCounterMutator, doFnInfo.getWindowingStrategy());
+        options,
+        fn,
+        sideInputReader,
+        outputManager,
+        mainOutputTag,
+        sideOutputTags,
+        stepContext,
+        addCounterMutator,
+        windowingStrategy);
   }
 
   /**
@@ -86,21 +73,64 @@ public static <InputT, OutputT> DoFnRunner<InputT, OutputT> streamingSideInputRu
    *
    * <p>It drops elements from expired windows before they reach the underlying {@link DoFn}.
    */
-  public static <K, InputT, OutputT> DoFnRunner<KeyedWorkItem<K, InputT>, KV<K, OutputT>>
-  lateDataDroppingRunner(
+  static <K, InputT, OutputT, W extends BoundedWindow>
+      DoFnRunner<KeyedWorkItem<K, InputT>, KV<K, OutputT>> lateDataDroppingRunner(
+          PipelineOptions options,
+          ReduceFnExecutor<K, InputT, OutputT, W> reduceFnExecutor,
+          SideInputReader sideInputReader,
+          OutputManager outputManager,
+          TupleTag<KV<K, OutputT>> mainOutputTag,
+          List<TupleTag<?>> sideOutputTags,
+          StepContext stepContext,
+          CounterSet.AddCounterMutator addCounterMutator,
+          WindowingStrategy<?, W> windowingStrategy) {
+    LateDataDroppingDoFnRunner<K, InputT, OutputT, W> runner =
+        new LateDataDroppingDoFnRunner<>(
+            options,
+            reduceFnExecutor,
+            sideInputReader,
+            outputManager,
+            mainOutputTag,
+            sideOutputTags,
+            stepContext,
+            addCounterMutator,
+            windowingStrategy);
+    return runner;
+  }
+
+  public static <InputT, OutputT> DoFnRunner<InputT, OutputT> createDefault(
       PipelineOptions options,
-      DoFnInfo<KeyedWorkItem<K, InputT>, KV<K, OutputT>> doFnInfo,
+      DoFn<InputT, OutputT> doFn,
       SideInputReader sideInputReader,
       OutputManager outputManager,
-      TupleTag<KV<K, OutputT>> mainOutputTag,
+      TupleTag<OutputT> mainOutputTag,
       List<TupleTag<?>> sideOutputTags,
       StepContext stepContext,
-      CounterSet.AddCounterMutator addCounterMutator) {
-    @SuppressWarnings({"unchecked"})
-    StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, ?> streamingGabwDoFn =
-        (StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, ?>) doFnInfo.getDoFn();
-    return new LateDataDroppingDoFnRunner<>(
-        options,  streamingGabwDoFn, sideInputReader, outputManager, mainOutputTag, sideOutputTags,
-        stepContext, addCounterMutator, doFnInfo.getWindowingStrategy());
+      AddCounterMutator addCounterMutator,
+      WindowingStrategy<?, ?> windowingStrategy) {
+    if (doFn instanceof ReduceFnExecutor) {
+      @SuppressWarnings("rawtypes")
+      ReduceFnExecutor fn = (ReduceFnExecutor) doFn;
+      return lateDataDroppingRunner(
+          options,
+          fn,
+          sideInputReader,
+          outputManager,
+          (TupleTag) mainOutputTag,
+          sideOutputTags,
+          stepContext,
+          addCounterMutator,
+          (WindowingStrategy) windowingStrategy);
+    }
+    return simpleRunner(
+        options,
+        doFn,
+        sideInputReader,
+        outputManager,
+        mainOutputTag,
+        sideOutputTags,
+        stepContext,
+        addCounterMutator,
+        windowingStrategy);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java
new file mode 100644
index 0000000000000..e9f23fb32715c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner.ReduceFnExecutor;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+/**
+ * A general {@link GroupAlsoByWindowsDoFn}. This delegates all of the logic to the
+ * {@link ReduceFnRunner}.
+ */
+@SystemDoFnInternal
+public class GroupAlsoByWindowViaWindowSetDoFn<
+        K, InputT, OutputT, W extends BoundedWindow, RinT extends KeyedWorkItem<K, InputT>>
+    extends DoFn<RinT, KV<K, OutputT>> implements ReduceFnExecutor<K, InputT, OutputT, W> {
+
+  public static <K, InputT, OutputT, W extends BoundedWindow,
+          RinputsT extends KeyedWorkItem<K, InputT>>
+      DoFn<RinputsT, KV<K, OutputT>> create(
+          WindowingStrategy<?, W> strategy, SystemReduceFn.Factory<K, InputT, OutputT, W> factory) {
+    return new GroupAlsoByWindowViaWindowSetDoFn<>(strategy, factory);
+  }
+
+  protected final Aggregator<Long, Long> droppedDueToClosedWindow =
+      createAggregator(
+          GroupAlsoByWindowsDoFn.DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER, new Sum.SumLongFn());
+  protected final Aggregator<Long, Long> droppedDueToLateness =
+      createAggregator(GroupAlsoByWindowsDoFn.DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
+
+  private final WindowingStrategy<Object, W> windowingStrategy;
+  private SystemReduceFn.Factory<K, InputT, OutputT, W> reduceFnFactory;
+
+  private GroupAlsoByWindowViaWindowSetDoFn(
+      WindowingStrategy<?, W> windowingStrategy,
+      SystemReduceFn.Factory<K, InputT, OutputT, W> reduceFnFactory) {
+    @SuppressWarnings("unchecked")
+    WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
+    this.windowingStrategy = noWildcard;
+    this.reduceFnFactory = reduceFnFactory;
+  }
+
+  @Override
+  public void processElement(ProcessContext c) throws Exception {
+    KeyedWorkItem<K, InputT> element = c.element();
+
+    K key = c.element().key();
+    TimerInternals timerInternals = c.windowingInternals().timerInternals();
+    ReduceFnRunner<K, InputT, OutputT, W> runner =
+        new ReduceFnRunner<>(
+            key,
+            windowingStrategy,
+            timerInternals,
+            c.windowingInternals(),
+            droppedDueToClosedWindow,
+            reduceFnFactory.create(key));
+
+    for (TimerData timer : element.timersIterable()) {
+      runner.onTimer(timer);
+    }
+    runner.processElements(element.elementsIterable());
+    runner.persist();
+  }
+
+  @Override
+  public DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> asDoFn() {
+    // Safe contravariant cast
+    @SuppressWarnings("unchecked")
+    DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> asFn =
+        (DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>>) this;
+    return asFn;
+  }
+
+  @Override
+  public Aggregator<Long, Long> getDroppedDueToLatenessAggregator() {
+    return droppedDueToLateness;
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItem.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItem.java
similarity index 70%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItem.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItem.java
index d6a87e17fa0d2..355f0bbc476e6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItem.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItem.java
@@ -13,33 +13,29 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.worker;
+package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
 
 /**
  * Interface that contains all the timers and elements associated with a specific work item.
  *
- * <p>Used as the input type of {@link StreamingGroupAlsoByWindowsDoFn}.
- *
  * @param <K> the key type
  * @param <ElemT> the element type
  */
 public interface KeyedWorkItem<K, ElemT> {
-
   /**
    * Returns the key.
    */
-  public K key();
+  K key();
 
   /**
-   * Returns the timers iterable.
+   * Returns an iterable containing the timers.
    */
-  public Iterable<TimerData> timersIterable();
+  Iterable<TimerData> timersIterable();
 
   /**
-   * Returns the elements iterable.
+   * Returns an iterable containing the elements.
    */
-  public Iterable<WindowedValue<ElemT>> elementsIterable();
+  Iterable<WindowedValue<ElemT>> elementsIterable();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LateDataDroppingDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/LateDataDroppingDoFnRunner.java
similarity index 83%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LateDataDroppingDoFnRunner.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/LateDataDroppingDoFnRunner.java
index dc4027627276e..749438a99afd3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LateDataDroppingDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/LateDataDroppingDoFnRunner.java
@@ -13,21 +13,13 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.worker;
+package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner;
-import com.google.cloud.dataflow.sdk.util.DoFnRunnerBase;
 import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
-import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.TimerInternals;
-import com.google.cloud.dataflow.sdk.util.UserCodeException;
-import com.google.cloud.dataflow.sdk.util.WindowTracing;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -58,7 +50,7 @@ public class LateDataDroppingDoFnRunner<K, InputT, OutputT, W extends BoundedWin
 
   public LateDataDroppingDoFnRunner(
       PipelineOptions options,
-      StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, ?> doFn,
+      ReduceFnExecutor<K, InputT, OutputT, W> reduceFnExecutor,
       SideInputReader sideInputReader,
       OutputManager outputManager,
       TupleTag<KV<K, OutputT>> mainOutputTag,
@@ -66,13 +58,20 @@ public LateDataDroppingDoFnRunner(
       StepContext stepContext,
       CounterSet.AddCounterMutator addCounterMutator,
       WindowingStrategy<?, W> windowingStrategy) {
-    super(options, doFn, sideInputReader, outputManager,
-        mainOutputTag, sideOutputTags, stepContext,
-        addCounterMutator, windowingStrategy);
+    super(
+        options,
+        reduceFnExecutor.asDoFn(),
+        sideInputReader,
+        outputManager,
+        mainOutputTag,
+        sideOutputTags,
+        stepContext,
+        addCounterMutator,
+        windowingStrategy);
     this.windowingStrategy = windowingStrategy;
     this.timerInternals = stepContext.timerInternals();
 
-    droppedDueToLateness = doFn.droppedDueToLateness;
+    droppedDueToLateness = reduceFnExecutor.getDroppedDueToLatenessAggregator();
   }
 
   @Override
@@ -114,11 +113,11 @@ public boolean apply(WindowedValue<InputT> input) {
             }
           }
         });
-    KeyedWorkItem<K, InputT> keyedWorkItem = KeyedWorkItems.workItem(
+    KeyedWorkItem<K, InputT> inputs = ComposedKeyedWorkItem.create(
         elem.getValue().key(), elem.getValue().timersIterable(), nonLateElements);
     // This can contain user code. Wrap it in case it throws an exception.
     try {
-      fn.processElement(createProcessContext(elem.withValue(keyedWorkItem)));
+      fn.processElement(createProcessContext(elem.withValue(inputs)));
     } catch (Throwable t) {
       // Exception in user code.
       Throwables.propagateIfInstanceOf(t, UserCodeException.class);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemsTest.java
similarity index 98%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemsTest.java
index 8f436df0ba039..8db559c14df6c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemsTest.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
+import com.google.cloud.dataflow.sdk.util.KeyedWorkItem;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
@@ -45,9 +46,9 @@
 import java.util.Collection;
 import java.util.Collections;
 
-/** Tests for {@link KeyedWorkItem}. */
+/** Tests for {@link KeyedWorkItems}. */
 @RunWith(JUnit4.class)
-public class KeyedWorkItemTest {
+public class KeyedWorkItemsTest {
 
   private static final String STATE_FAMILY = "state";
   private static final String KEY = "key";
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index b0714e7b80fbc..1e2f433e6002a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -160,18 +160,19 @@ private String dataStringForIndex(long index) {
   }
 
   private ParallelInstruction makeWindowingSourceInstruction(Coder<?> coder) {
-    CloudObject encodedCoder = FullWindowedValueCoder.of(
-        FakeKeyedWorkItemCoder.of(coder), IntervalWindow.getCoder()).asCloudObject();
+    CloudObject encodedCoder =
+        FullWindowedValueCoder.of(FakeKeyedWorkItemCoder.of(coder), IntervalWindow.getCoder())
+            .asCloudObject();
     return new ParallelInstruction()
         .setSystemName(DEFAULT_SOURCE_SYSTEM_NAME)
-        .setRead(new ReadInstruction().setSource(
-            new Source()
-            .setSpec(CloudObject.forClass(WindowingWindmillReader.class))
-            .setCodec(encodedCoder)))
-        .setOutputs(Arrays.asList(
-            new InstructionOutput()
-            .setName("read_output")
-            .setCodec(encodedCoder)));
+        .setRead(
+            new ReadInstruction()
+                .setSource(
+                    new Source()
+                        .setSpec(CloudObject.forClass(WindowingWindmillReader.class))
+                        .setCodec(encodedCoder)))
+        .setOutputs(
+            Arrays.asList(new InstructionOutput().setName("read_output").setCodec(encodedCoder)));
   }
 
   private ParallelInstruction makeSourceInstruction(Coder<?> coder) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
index ae185f4b133e3..5f5f712b320e5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
@@ -49,6 +49,7 @@
 import com.google.cloud.dataflow.sdk.util.DoFnRunnerBase;
 import com.google.cloud.dataflow.sdk.util.DoFnRunners;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.KeyedWorkItem;
 import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
 import com.google.cloud.dataflow.sdk.util.ReshuffleTriggerTest;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
@@ -171,8 +172,9 @@ private <T> WindowedValue<KeyedWorkItem<String, T>> createValue(
       WorkItem.Builder workItem, Coder<T> valueCoder) {
     @SuppressWarnings({"unchecked", "rawtypes"})
     Coder<Collection<? extends BoundedWindow>> wildcardWindowsCoder = (Coder) windowsCoder;
-    return WindowedValue.valueInEmptyWindows(KeyedWorkItems.windmillWorkItem(
-        KEY, workItem.build(), windowCoder, wildcardWindowsCoder, valueCoder));
+    return WindowedValue.valueInEmptyWindows(
+        KeyedWorkItems.windmillWorkItem(
+            KEY, workItem.build(), windowCoder, wildcardWindowsCoder, valueCoder));
   }
 
   @Test public void testFixedWindows() throws Exception {
@@ -370,7 +372,7 @@ private <T> WindowedValue<KeyedWorkItem<String, T>> createValue(
     assertThat(item2.getWindows(), Matchers.<BoundedWindow>contains(window(10, 30)));
 
     assertEquals(
-        counters.getExistingCounter("user-merge-DroppedDueToLateness").getAggregate(),
+        counters.getExistingCounter("merge-DroppedDueToLateness").getAggregate(),
         new Long(2));
   }
 
@@ -556,15 +558,16 @@ DoFnRunner<KeyedWorkItem<String, InputT>, KV<String, OutputT>> makeRunner(
     DoFnInfo<KeyedWorkItem<String, InputT>, KV<String, OutputT>> doFnInfo =
         new DoFnInfo<>(fn, windowingStrategy);
 
-    return DoFnRunners.lateDataDroppingRunner(
+    return DoFnRunners.createDefault(
         PipelineOptionsFactory.create(),
-        doFnInfo,
+        doFnInfo.getDoFn(),
         NullSideInputReader.empty(),
         outputManager,
         outputTag,
         new ArrayList<TupleTag<?>>(),
         execContext.getOrCreateStepContext("merge", "merge", null),
-        counters.getAddCounterMutator());
+        counters.getAddCounterMutator(),
+        doFnInfo.getWindowingStrategy());
   }
 
   private IntervalWindow window(long start, long end) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java
index 0c7cfce3cf50d..e2f6ef28b6f8c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java
@@ -36,6 +36,7 @@
 import com.google.cloud.dataflow.sdk.util.DoFnRunnerBase;
 import com.google.cloud.dataflow.sdk.util.DoFnRunners;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.KeyedWorkItem;
 import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
@@ -200,7 +201,7 @@ DoFnRunner<KeyedWorkItem<String, InputT>, KV<String, OutputT>> makeRunner(
           WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
           DoFn<KeyedWorkItem<String, InputT>, KV<String, OutputT>> fn) {
     return
-        DoFnRunners.simpleRunner(
+        DoFnRunners.createDefault(
             PipelineOptionsFactory.create(),
             fn,
             NullSideInputReader.empty(),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java
index 90e5bd2693c33..0c16555604b52 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java
@@ -270,7 +270,7 @@ public void testMultipleSideInputs() throws Exception {
         typedViews, StringUtf8Coder.of());
 
     return (StreamingSideInputDoFnRunner<String, String, IntervalWindow>)
-        DoFnRunners.streamingSideInputRunner(
+        StreamingDoFnRunners.streamingSideInputRunner(
             PipelineOptionsFactory.create(),
             doFnInfo,
             mockSideInputReader,

From f7feb7191d4d88dd955fbbd7c2d15275d7d0bf21 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Sun, 31 Jan 2016 10:28:43 -0800
Subject: [PATCH 1361/1541] BigQueryIO: remove DirectRunner dependence on
 Dataflow native reader

The Dataflow-service-specific BigQueryReader should not need to be
referenced in BigQueryIO itself. Simplify the code by using the actual
element iterator.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113477479
---
 .../google/cloud/dataflow/sdk/io/BigQueryIO.java | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 87258fe42cfd5..fbebd52dc2e61 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -37,7 +37,6 @@
 import com.google.cloud.dataflow.sdk.options.GcpOptions;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.worker.BigQueryReader;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
@@ -46,7 +45,6 @@
 import com.google.cloud.dataflow.sdk.util.BigQueryTableInserter;
 import com.google.cloud.dataflow.sdk.util.BigQueryTableRowIterator;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.Reshuffle;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
@@ -59,6 +57,7 @@
 import com.google.cloud.hadoop.util.ApiErrorExtractor;
 import com.google.common.base.MoreObjects;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
@@ -81,6 +80,7 @@
 import java.util.concurrent.ThreadLocalRandom;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+
 import javax.annotation.Nullable;
 
 /**
@@ -1411,18 +1411,20 @@ private static void evaluateReadHelper(
       transform.table.setProjectId(options.getProject());
     }
 
-    BigQueryReader reader = null;
+    BigQueryTableRowIterator iterator;
     if (transform.query != null) {
       LOG.info("Reading from BigQuery query {}", transform.query);
-      reader = BigQueryReader.fromQuery(transform.query, options.getProject(), client);
+      iterator =
+          BigQueryTableRowIterator.fromQuery(
+              transform.query, options.getProject(), client, transform.getFlattenResults());
     } else {
-      reader = BigQueryReader.fromTable(transform.table, client);
       LOG.info("Reading from BigQuery table {}", toTableSpec(transform.table));
+      iterator = BigQueryTableRowIterator.fromTable(transform.table, client);
     }
 
-    List<WindowedValue<TableRow>> elems = ReaderUtils.readElemsFromReader(reader);
+    List<TableRow> elems = ImmutableList.copyOf(iterator);
     LOG.info("Number of records read from BigQuery: {}", elems.size());
-    context.setPCollectionWindowedValue(context.getOutput(transform), elems);
+    context.setPCollection(context.getOutput(transform), elems);
   }
 
   private static <K, V> List<V> getOrCreateMapListValue(Map<K, List<V>> map, K key) {

From 0adaa91510e2a0c7301c456a663f0952ab8e531e Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Sun, 31 Jan 2016 22:10:31 -0800
Subject: [PATCH 1362/1541] runners.CountingSource: rename to
 TestCountingSource

To de-dupe names.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113496863
---
 .../BoundedReadFromUnboundedSourceTest.java   |  7 +++--
 .../runners/DataflowPipelineRunnerTest.java   |  4 +--
 .../runners/dataflow/CustomSourcesTest.java   |  7 +++--
 ...ingSource.java => TestCountingSource.java} | 27 ++++++++++---------
 .../worker/StreamingDataflowWorkerTest.java   |  6 ++---
 .../StreamingModeExecutionContextTest.java    |  6 ++---
 6 files changed, 28 insertions(+), 29 deletions(-)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/{CountingSource.java => TestCountingSource.java} (86%)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSourceTest.java
index f0105b676b551..d01c036344600 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSourceTest.java
@@ -23,7 +23,7 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.dataflow.CountingSource;
+import com.google.cloud.dataflow.sdk.runners.dataflow.TestCountingSource;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
@@ -32,7 +32,6 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
 import org.joda.time.Duration;
-
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.runner.RunWith;
@@ -108,9 +107,9 @@ private void test(boolean dedup, boolean timeBound) throws Exception {
 
     if (p.getOptions().getRunner() == DirectPipelineRunner.class) {
       finalizeTracker = new ArrayList<>();
-      CountingSource.setFinalizeTracker(finalizeTracker);
+      TestCountingSource.setFinalizeTracker(finalizeTracker);
     }
-    CountingSource source = new CountingSource(Integer.MAX_VALUE);
+    TestCountingSource source = new TestCountingSource(Integer.MAX_VALUE);
     if (dedup) {
       source = source.withDedup();
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index cccdccff28941..deabbc0a2fb0b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -48,7 +48,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.dataflow.CountingSource;
+import com.google.cloud.dataflow.sdk.runners.dataflow.TestCountingSource;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.DataflowReleaseInfo;
@@ -870,7 +870,7 @@ public void testReadBoundedSourceUnsupportedInStreaming() throws Exception {
 
   @Test
   public void testReadUnboundedUnsupportedInBatch() throws Exception {
-    testUnsupportedSource(Read.from(new CountingSource(1)), "Read.Unbounded", false);
+    testUnsupportedSource(Read.from(new TestCountingSource(1)), "Read.Unbounded", false);
   }
 
   private void testUnsupportedSink(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
index 54bdf60adba12..c31a7f066f277 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
@@ -611,15 +611,14 @@ public void testUnboundedSplits() throws Exception {
     DataflowPipelineOptions options =
         PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
     com.google.api.services.dataflow.model.Source source =
-        CustomSources.serializeToCloudSource(
-            new CountingSource(Integer.MAX_VALUE), options);
+        CustomSources.serializeToCloudSource(new TestCountingSource(Integer.MAX_VALUE), options);
     List<String> serializedSplits =
         getStrings(source.getSpec(), CustomSources.SERIALIZED_SOURCE_SPLITS, null);
     assertEquals(20, serializedSplits.size());
     for (String serializedSplit : serializedSplits) {
       assertTrue(
           deserializeFromByteArray(decodeBase64(serializedSplit), "source")
-              instanceof CountingSource);
+              instanceof TestCountingSource);
     }
   }
 
@@ -657,7 +656,7 @@ public void testReadUnboundedReader() throws Exception {
               CustomSources.create(
                   (CloudObject)
                       CustomSources.serializeToCloudSource(
-                              new CountingSource(Integer.MAX_VALUE), options)
+                              new TestCountingSource(Integer.MAX_VALUE), options)
                           .getSpec(),
                   options,
                   context);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/TestCountingSource.java
similarity index 86%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/TestCountingSource.java
index 933f62fa82ec5..181ddcae5bcc6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CountingSource.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/TestCountingSource.java
@@ -44,30 +44,30 @@
  * from {@code advance}, in order to simulate a source where not all the data is
  * available immediately.
  */
-public class CountingSource
-    extends UnboundedSource<KV<Integer, Integer>, CountingSource.CounterMark> {
+public class TestCountingSource
+    extends UnboundedSource<KV<Integer, Integer>, TestCountingSource.CounterMark> {
   private static List<Integer> finalizeTracker;
   private final int numMessagesPerShard;
   private final int shardNumber;
   private final boolean dedup;
 
   public static void setFinalizeTracker(List<Integer> finalizeTracker) {
-    CountingSource.finalizeTracker = finalizeTracker;
+    TestCountingSource.finalizeTracker = finalizeTracker;
   }
 
-  public CountingSource(int numMessagesPerShard) {
+  public TestCountingSource(int numMessagesPerShard) {
     this(numMessagesPerShard, 0, false);
   }
 
-  public CountingSource withDedup() {
-    return new CountingSource(numMessagesPerShard, shardNumber, true);
+  public TestCountingSource withDedup() {
+    return new TestCountingSource(numMessagesPerShard, shardNumber, true);
   }
 
-  private CountingSource withShardNumber(int shardNumber) {
-    return new CountingSource(numMessagesPerShard, shardNumber, dedup);
+  private TestCountingSource withShardNumber(int shardNumber) {
+    return new TestCountingSource(numMessagesPerShard, shardNumber, dedup);
   }
 
-  private CountingSource(int numMessagesPerShard, int shardNumber, boolean dedup) {
+  private TestCountingSource(int numMessagesPerShard, int shardNumber, boolean dedup) {
     this.numMessagesPerShard = numMessagesPerShard;
     this.shardNumber = shardNumber;
     this.dedup = dedup;
@@ -78,8 +78,9 @@ public int getShardNumber() {
   }
 
   @Override
-  public List<CountingSource> generateInitialSplits(int desiredNumSplits, PipelineOptions options) {
-    List<CountingSource> splits = new ArrayList<>();
+  public List<TestCountingSource> generateInitialSplits(
+      int desiredNumSplits, PipelineOptions options) {
+    List<TestCountingSource> splits = new ArrayList<>();
     for (int i = 0; i < desiredNumSplits; i++) {
       splits.add(withShardNumber(i));
     }
@@ -173,8 +174,8 @@ public byte[] getCurrentRecordId() {
     public void close() {}
 
     @Override
-    public CountingSource getCurrentSource() {
-      return CountingSource.this;
+    public TestCountingSource getCurrentSource() {
+      return TestCountingSource.this;
     }
 
     @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
index 1e2f433e6002a..d7e6fe706ae11 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
@@ -48,8 +48,8 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.dataflow.CountingSource;
 import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
+import com.google.cloud.dataflow.sdk.runners.dataflow.TestCountingSource;
 import com.google.cloud.dataflow.sdk.runners.worker.KeyedWorkItems.FakeKeyedWorkItemCoder;
 import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
@@ -989,7 +989,7 @@ public void testUnboundedSources() throws Exception {
                     new ReadInstruction()
                         .setSource(
                             CustomSources.serializeToCloudSource(
-                                new CountingSource(1), options)))
+                                new TestCountingSource(1), options)))
                 .setOutputs(
                     Arrays.asList(
                         new InstructionOutput()
@@ -1004,7 +1004,7 @@ public void testUnboundedSources() throws Exception {
                 new PrintFn(), 0, StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE),
             makeSinkInstruction(StringUtf8Coder.of(), 1, GlobalWindow.Coder.INSTANCE));
 
-    CountingSource.setFinalizeTracker(finalizeTracker);
+    TestCountingSource.setFinalizeTracker(finalizeTracker);
 
     FakeWindmillServer server = new FakeWindmillServer();
     StreamingDataflowWorker worker = new StreamingDataflowWorker(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
index 826ce11eef4eb..e8f07830b5d76 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
@@ -24,7 +24,7 @@
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.dataflow.CountingSource;
+import com.google.cloud.dataflow.sdk.runners.dataflow.TestCountingSource;
 import com.google.cloud.dataflow.sdk.runners.worker.StreamingDataflowWorker.ReaderCacheEntry;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting;
@@ -168,9 +168,9 @@ public void testReaderCache() throws Exception {
             /*stateCache=*/null);
 
     UnboundedSource.UnboundedReader<?> reader1 =
-        new CountingSource(Integer.MAX_VALUE).createReader(options, null);
+        new TestCountingSource(Integer.MAX_VALUE).createReader(options, null);
     UnboundedSource.UnboundedReader<?> reader2 =
-        new CountingSource(Integer.MAX_VALUE).createReader(options, null);
+        new TestCountingSource(Integer.MAX_VALUE).createReader(options, null);
 
     readerCache.put(ByteString.copyFromUtf8("0000000000000001"), new ReaderCacheEntry(reader1, 1L));
     readerCache.put(ByteString.copyFromUtf8("0000000000000002"), new ReaderCacheEntry(reader2, 2L));

From ef71d473817d514b0982bc90a59383a9766fb130 Mon Sep 17 00:00:00 2001
From: swegner <swegner@google.com>
Date: Mon, 1 Feb 2016 11:04:03 -0800
Subject: [PATCH 1363/1541] Validate DoFn.createAggregate is not called during
 pipeline processing

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113546770
---
 .../cloud/dataflow/sdk/transforms/DoFn.java   | 20 ++++-
 .../sdk/transforms/DoFnReflector.java         |  7 +-
 .../sdk/transforms/DoFnWithContext.java       | 26 ++++++-
 .../dataflow/sdk/transforms/DoFnTest.java     | 77 ++++++++++++++++++-
 .../sdk/transforms/DoFnWithContextTest.java   | 77 ++++++++++++++++++-
 5 files changed, 197 insertions(+), 10 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index f299954b11bd0..85bd43307192f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -18,6 +18,7 @@
 
 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
@@ -207,6 +208,8 @@ protected final void setupDelegateAggregators() {
       for (DelegatingAggregator<?, ?> aggregator : aggregators.values()) {
         setupDelegateAggregator(aggregator);
       }
+
+      aggregatorsAreFinal = true;
     }
 
     private final <AggInputT, AggOutputT> void setupDelegateAggregator(
@@ -316,6 +319,11 @@ public DoFn() {
 
   private final Map<String, DelegatingAggregator<?, ?>> aggregators;
 
+  /**
+   * Protects aggregators from being created after initialization.
+   */
+  private boolean aggregatorsAreFinal;
+
   /**
    * Prepares this {@code DoFn} instance for processing a batch of elements.
    *
@@ -382,7 +390,8 @@ protected TypeDescriptor<OutputT> getOutputTypeDescriptor() {
   /**
    * Returns an {@link Aggregator} with aggregation logic specified by the
    * {@link CombineFn} argument. The name provided must be unique across
-   * {@link Aggregator}s created within the DoFn.
+   * {@link Aggregator}s created within the DoFn. Aggregators can only be created
+   * during pipeline construction.
    *
    * @param name the name of the aggregator
    * @param combiner the {@link CombineFn} to use in the aggregator
@@ -391,6 +400,7 @@ protected TypeDescriptor<OutputT> getOutputTypeDescriptor() {
    * @throws NullPointerException if the name or combiner is null
    * @throws IllegalArgumentException if the given name collides with another
    *         aggregator in this scope
+   * @throws IllegalStateException if called during pipeline processing.
    */
   protected final <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT>
       createAggregator(String name, CombineFn<? super AggInputT, ?, AggOutputT> combiner) {
@@ -400,6 +410,10 @@ protected TypeDescriptor<OutputT> getOutputTypeDescriptor() {
         "Cannot create aggregator with name %s."
         + " An Aggregator with that name already exists within this scope.",
         name);
+
+    checkState(!aggregatorsAreFinal, "Cannot create an aggregator during DoFn processing."
+        + " Aggregators should be registered during pipeline construction.");
+
     DelegatingAggregator<AggInputT, AggOutputT> aggregator =
         new DelegatingAggregator<>(name, combiner);
     aggregators.put(name, aggregator);
@@ -409,7 +423,8 @@ protected TypeDescriptor<OutputT> getOutputTypeDescriptor() {
   /**
    * Returns an {@link Aggregator} with the aggregation logic specified by the
    * {@link SerializableFunction} argument. The name provided must be unique
-   * across {@link Aggregator}s created within the DoFn.
+   * across {@link Aggregator}s created within the DoFn. Aggregators can only be
+   * created during pipeline construction.
    *
    * @param name the name of the aggregator
    * @param combiner the {@link SerializableFunction} to use in the aggregator
@@ -418,6 +433,7 @@ protected TypeDescriptor<OutputT> getOutputTypeDescriptor() {
    * @throws NullPointerException if the name or combiner is null
    * @throws IllegalArgumentException if the given name collides with another
    *         aggregator in this scope
+   * @throws IllegalStateException if called during pipeline processing.
    */
   protected final <AggInputT> Aggregator<AggInputT, AggInputT> createAggregator(String name,
       SerializableFunction<Iterable<AggInputT>, AggInputT> combiner) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
index af2005428039c..c75c013b2c5b4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
@@ -133,10 +133,12 @@ abstract <InputT, OutputT> void invokeProcessElement(
    * @param c the {@link com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.Context}
    *     to pass to {@link StartBundle}.
    */
-  abstract <InputT, OutputT> void invokeStartBundle(
+  <InputT, OutputT> void invokeStartBundle(
      DoFnWithContext<InputT, OutputT> fn,
      DoFnWithContext<InputT, OutputT>.Context c,
-     ExtraContextFactory<InputT, OutputT> extra);
+     ExtraContextFactory<InputT, OutputT> extra) {
+    fn.prepareForProcessing();
+  }
 
   /**
    * Invoke the reflected {@link FinishBundle} method on the given instance.
@@ -438,6 +440,7 @@ <InputT, OutputT> void invokeStartBundle(
         DoFnWithContext<InputT, OutputT> fn,
         DoFnWithContext<InputT, OutputT>.Context c,
         ExtraContextFactory<InputT, OutputT> extra) {
+      super.invokeStartBundle(fn, c, extra);
       if (startBundle != null) {
         invoke(startBundle, fn, c, extra, startBundleArgs);
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
index f2f5ba7743e27..10bf0eb16a90f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
@@ -18,6 +18,7 @@
 
 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
@@ -248,6 +249,11 @@ public Duration getAllowedTimestampSkew() {
 
   Map<String, DelegatingAggregator<?, ?>> aggregators = new HashMap<>();
 
+  /**
+   * Protects aggregators from being created after initialization.
+   */
+  private boolean aggregatorsAreFinal;
+
   /**
    * Returns a {@link TypeDescriptor} capturing what is known statically
    * about the input type of this {@code DoFnWithContext} instance's most-derived
@@ -349,7 +355,8 @@ public interface ExtraContextFactory<InputT, OutputT> {
   /**
    * Returns an {@link Aggregator} with aggregation logic specified by the
    * {@link CombineFn} argument. The name provided must be unique across
-   * {@link Aggregator}s created within the DoFn.
+   * {@link Aggregator}s created within the DoFn. Aggregators can only be created
+   * during pipeline construction.
    *
    * @param name the name of the aggregator
    * @param combiner the {@link CombineFn} to use in the aggregator
@@ -358,6 +365,7 @@ public interface ExtraContextFactory<InputT, OutputT> {
    * @throws NullPointerException if the name or combiner is null
    * @throws IllegalArgumentException if the given name collides with another
    *         aggregator in this scope
+   * @throws IllegalStateException if called during pipeline execution.
    */
   public final <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT>
       createAggregator(String name, Combine.CombineFn<? super AggInputT, ?, AggOutputT> combiner) {
@@ -367,6 +375,10 @@ public interface ExtraContextFactory<InputT, OutputT> {
         "Cannot create aggregator with name %s."
         + " An Aggregator with that name already exists within this scope.",
         name);
+    checkState(!aggregatorsAreFinal,
+        "Cannot create an aggregator during pipeline execution."
+        + " Aggregators should be registered during pipeline construction.");
+
     DelegatingAggregator<AggInputT, AggOutputT> aggregator =
         new DelegatingAggregator<>(name, combiner);
     aggregators.put(name, aggregator);
@@ -376,7 +388,8 @@ public interface ExtraContextFactory<InputT, OutputT> {
   /**
    * Returns an {@link Aggregator} with the aggregation logic specified by the
    * {@link SerializableFunction} argument. The name provided must be unique
-   * across {@link Aggregator}s created within the DoFn.
+   * across {@link Aggregator}s created within the DoFn. Aggregators can only be
+   * created during pipeline construction.
    *
    * @param name the name of the aggregator
    * @param combiner the {@link SerializableFunction} to use in the aggregator
@@ -385,10 +398,19 @@ public interface ExtraContextFactory<InputT, OutputT> {
    * @throws NullPointerException if the name or combiner is null
    * @throws IllegalArgumentException if the given name collides with another
    *         aggregator in this scope
+   * @throws IllegalStateException if called during pipeline execution.
    */
   public final <AggInputT> Aggregator<AggInputT, AggInputT> createAggregator(
       String name, SerializableFunction<Iterable<AggInputT>, AggInputT> combiner) {
     checkNotNull(combiner, "combiner cannot be null.");
     return createAggregator(name, Combine.IterableCombineFn.of(combiner));
   }
+
+  /**
+   * Finalize the @{link DoFnWithContext} construction to prepare for processing.
+   * This method should be called by runners before any processing methods.
+   */
+  void prepareForProcessing() {
+    aggregatorsAreFinal = true;
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnTest.java
index b275cd2252792..f6df14a72b2d8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnTest.java
@@ -16,25 +16,33 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import static org.hamcrest.CoreMatchers.isA;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotEquals;
 
+import com.google.cloud.dataflow.sdk.Pipeline.PipelineExecutionException;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Max.MaxIntegerFn;
 
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.experimental.categories.Category;
 import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.Serializable;
+
 /**
  * Tests for DoFn.
  */
 @RunWith(JUnit4.class)
-public class DoFnTest {
+public class DoFnTest implements Serializable {
 
   @Rule
-  public ExpectedException thrown = ExpectedException.none();
+  public transient ExpectedException thrown = ExpectedException.none();
 
   @Test
   public void testCreateAggregatorWithCombinerSucceeds() {
@@ -115,4 +123,69 @@ public void testCreateAggregatorsWithDifferentNamesSucceeds() {
 
     assertNotEquals(aggregatorOne, aggregatorTwo);
   }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testCreateAggregatorInStartBundleThrows() {
+    TestPipeline p = createTestPipeline(new DoFn<String, String>() {
+      @Override
+      public void startBundle(DoFn<String, String>.Context c) throws Exception {
+        createAggregator("anyAggregate", new MaxIntegerFn());
+      }
+
+      @Override
+      public void processElement(DoFn<String, String>.ProcessContext c) throws Exception {}
+    });
+
+    thrown.expect(PipelineExecutionException.class);
+    thrown.expectCause(isA(IllegalStateException.class));
+
+    p.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testCreateAggregatorInProcessElementThrows() {
+    TestPipeline p = createTestPipeline(new DoFn<String, String>() {
+      @Override
+      public void processElement(ProcessContext c) throws Exception {
+        createAggregator("anyAggregate", new MaxIntegerFn());
+      }
+    });
+
+    thrown.expect(PipelineExecutionException.class);
+    thrown.expectCause(isA(IllegalStateException.class));
+
+    p.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testCreateAggregatorInFinishBundleThrows() {
+    TestPipeline p = createTestPipeline(new DoFn<String, String>() {
+      @Override
+      public void finishBundle(DoFn<String, String>.Context c) throws Exception {
+        createAggregator("anyAggregate", new MaxIntegerFn());
+      }
+
+      @Override
+      public void processElement(DoFn<String, String>.ProcessContext c) throws Exception {}
+    });
+
+    thrown.expect(PipelineExecutionException.class);
+    thrown.expectCause(isA(IllegalStateException.class));
+
+    p.run();
+  }
+
+  /**
+   * Initialize a test pipeline with the specified @{link DoFn}.
+   */
+  private <InputT, OutputT> TestPipeline createTestPipeline(DoFn<InputT, OutputT> fn) {
+    TestPipeline pipeline = TestPipeline.create();
+    pipeline.apply(Create.of((InputT) null))
+     .apply(ParDo.of(fn));
+
+    return pipeline;
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContextTest.java
index b580139fd7aca..228b961421944 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContextTest.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import static org.hamcrest.CoreMatchers.isA;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotEquals;
 import static org.mockito.Mockito.mock;
@@ -23,19 +24,26 @@
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
+import com.google.cloud.dataflow.sdk.Pipeline.PipelineExecutionException;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Max.MaxIntegerFn;
 
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.experimental.categories.Category;
 import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.Serializable;
+
 /** Tests for {@link DoFnWithContext}. */
 @RunWith(JUnit4.class)
-public class DoFnWithContextTest {
+public class DoFnWithContextTest implements Serializable {
   @Rule
-  public ExpectedException thrown = ExpectedException.none();
+  public transient ExpectedException thrown = ExpectedException.none();
 
   private class NoOpDoFnWithContext extends DoFnWithContext<Void, Void> {
 
@@ -149,4 +157,69 @@ public void testDoFnWithContextUsingAggregators() {
 
     verify(agg).addValue(1L);
   }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testCreateAggregatorInStartBundleThrows() {
+    TestPipeline p = createTestPipeline(new DoFnWithContext<String, String>() {
+      @StartBundle
+      public void startBundle(Context c) {
+        createAggregator("anyAggregate", new MaxIntegerFn());
+      }
+
+      @ProcessElement
+      public void processElement(ProcessContext c) {}
+    });
+
+    thrown.expect(PipelineExecutionException.class);
+    thrown.expectCause(isA(IllegalStateException.class));
+
+    p.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testCreateAggregatorInProcessElementThrows() {
+    TestPipeline p = createTestPipeline(new DoFnWithContext<String, String>() {
+      @ProcessElement
+      public void processElement(ProcessContext c) {
+        createAggregator("anyAggregate", new MaxIntegerFn());
+      }
+    });
+
+    thrown.expect(PipelineExecutionException.class);
+    thrown.expectCause(isA(IllegalStateException.class));
+
+    p.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testCreateAggregatorInFinishBundleThrows() {
+    TestPipeline p = createTestPipeline(new DoFnWithContext<String, String>() {
+      @FinishBundle
+      public void finishBundle(Context c) {
+        createAggregator("anyAggregate", new MaxIntegerFn());
+      }
+
+      @ProcessElement
+      public void processElement(ProcessContext c) {}
+    });
+
+    thrown.expect(PipelineExecutionException.class);
+    thrown.expectCause(isA(IllegalStateException.class));
+
+    p.run();
+  }
+
+  /**
+   * Initialize a test pipeline with the specified @{link DoFn}.
+   */
+  private <InputT, OutputT> TestPipeline createTestPipeline(DoFnWithContext<InputT, OutputT> fn) {
+    TestPipeline pipeline = TestPipeline.create();
+    pipeline.apply(Create.of((InputT) null))
+     .apply(ParDo.of(fn));
+
+    return pipeline;
+  }
 }

From 12be5db69759db5c76ce38e43f037eb28bf0e4fc Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 1 Feb 2016 12:38:27 -0800
Subject: [PATCH 1364/1541] DataflowPipelineRunner: retry source splitting when
 too many bundles

The Cloud Dataflow API has a limited number of bytes it will accept
in a request or response payload -- today, 20 MB.

One common place this limit can be reached is when a custom source split
into many bundles. A common cause of this behavior is when the initial
size estimation is wildly inaccurate, so that the Dataflow service
requests bundles with a small amount of work.

Today, messages hitting this API limit during initial splitting will cause a
job to fail. This change adds a one-time step that increases the desired
bundle size and retries splitting the source, to let jobs affected by
inaccurate size estimation succeed in some cases.

Also clean up related code, generalizing and extracting some utility functions
to common utility classes and reducing the memory usage in pathological
cases, preventing out-of-memory errors.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113557076
---
 .../sdk/runners/dataflow/CustomSources.java   | 136 +++++++++---------
 .../sdk/runners/worker/DataflowApiUtils.java  |  60 ++++++++
 .../sdk/runners/worker/DataflowWorker.java    |  17 ++-
 .../worker/SourceOperationExecutor.java       |  44 +++---
 .../runners/dataflow/CustomSourcesTest.java   |  62 ++++++--
 .../runners/worker/DataflowWorkerTest.java    |  10 +-
 6 files changed, 211 insertions(+), 118 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowApiUtils.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
index 0039281c4a3b2..81d607e469923 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
@@ -25,6 +25,7 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.addStringList;
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 import static com.google.cloud.dataflow.sdk.util.Structs.getStrings;
+import static com.google.common.base.Preconditions.checkArgument;
 
 import com.google.api.client.util.BackOff;
 import com.google.api.client.util.Base64;
@@ -33,7 +34,6 @@
 import com.google.api.services.dataflow.model.DerivedSource;
 import com.google.api.services.dataflow.model.DynamicSourceSplit;
 import com.google.api.services.dataflow.model.SourceMetadata;
-import com.google.api.services.dataflow.model.SourceOperationRequest;
 import com.google.api.services.dataflow.model.SourceOperationResponse;
 import com.google.api.services.dataflow.model.SourceSplitOptions;
 import com.google.api.services.dataflow.model.SourceSplitRequest;
@@ -47,6 +47,7 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.worker.DataflowApiUtils;
 import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory;
 import com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils;
 import com.google.cloud.dataflow.sdk.runners.worker.StreamingModeExecutionContext;
@@ -61,7 +62,6 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.values.PValue;
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
 import com.google.protobuf.ByteString;
 
 import org.joda.time.Duration;
@@ -87,15 +87,11 @@ public class CustomSources {
   private static final String SERIALIZED_SOURCE = "serialized_source";
   @VisibleForTesting static final String SERIALIZED_SOURCE_SPLITS = "serialized_source_splits";
   private static final long DEFAULT_DESIRED_BUNDLE_SIZE_BYTES = 64 * (1 << 20);
-
-  public static final String TOO_MANY_SOURCE_SPLITS_ERROR =
-      "Total number of Source objects generated by splitIntoBundles() operation, %d, is"
-      + " larger than the allowable limit, %d. For more information, please check the corresponding"
-      + " FAQ entry at:\n"
-      + "https://cloud.google.com/dataflow/faq";
-
-  // Maximum number of custom source splits currently supported by Dataflow.
-  private static final int MAX_NUMBER_OF_SPLITS = 16000;
+  /**
+   * The current limit on the size of a ReportWorkItemStatus RPC to Google Cloud Dataflow, which
+   * includes the initial splits, is 20 MB.
+   */
+  public static final long DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES = 20 * (1 << 20);
 
   private static final Logger LOG = LoggerFactory.getLogger(CustomSources.class);
 
@@ -144,17 +140,47 @@ public static DynamicSourceSplit toSourceSplit(
    * Executes a protocol-level split {@code SourceOperationRequest} for bounded sources
    * by deserializing its source to a {@code BoundedSource}, splitting it, and
    * serializing results back.
+   *
+   * <p>When the splits produced by this function are too large to be serialized to the Dataflow
+   * API, splitting is retried once with an increase in the desired bundle size. This change aims
+   * to work around API limitations on split size.
    */
-  public static SourceOperationResponse performSourceOperation(
-      SourceOperationRequest request, PipelineOptions options) throws Exception {
-    SourceOperationResponse response = new SourceOperationResponse();
-    if (request.getSplit() != null) {
-      response.setSplit(performSplit(request.getSplit(), options));
-    } else {
-      throw new UnsupportedOperationException(
-          "Unsupported source operation request: " + request);
-    }
-    return response;
+  public static SourceOperationResponse performSplit(
+      SourceSplitRequest request, PipelineOptions options) throws Exception {
+    Source<?> anySource = deserializeFromCloudSource(request.getSource().getSpec());
+    checkArgument(
+        anySource instanceof BoundedSource, "Cannot split a non-Bounded source: %s", anySource);
+    BoundedSource<?> source = (BoundedSource<?>) anySource;
+
+    // Compute the desired bundle size given by the service, or default if none was provided.
+    long desiredBundleSizeBytes = DEFAULT_DESIRED_BUNDLE_SIZE_BYTES;
+    SourceSplitOptions splitOptions = request.getOptions();
+    if (splitOptions != null && splitOptions.getDesiredBundleSizeBytes() != null) {
+      desiredBundleSizeBytes = splitOptions.getDesiredBundleSizeBytes();
+    }
+
+    // Try generating initial splits normally.
+    SourceSplitResponse splits = performSplit(source, options, desiredBundleSizeBytes);
+    long serializedSize = DataflowApiUtils.computeSerializedSizeBytes(splits);
+
+    // If split response is too large, scale desired size for expected DATAFLOW_API_SIZE_BYTES/2.
+    if (serializedSize > DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES) {
+      double expansion = 2 * (double) serializedSize / DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES;
+      long expandedBundleSizeBytes = (long) (desiredBundleSizeBytes * expansion);
+      LOG.warn(
+          "Splitting source {} into bundles of estimated size {} bytes produced {} bundles, which"
+              + " have total serialized size {} bytes. As this is too large for the Google Cloud"
+              + " Dataflow API, retrying splitting once with increased desiredBundleSizeBytes {}"
+              + " to reduce the number of splits.",
+          source,
+          desiredBundleSizeBytes,
+          splits.getBundles().size(),
+          serializedSize,
+          expandedBundleSizeBytes);
+      splits = performSplit(source, options, expandedBundleSizeBytes);
+    }
+
+    return new SourceOperationResponse().setSplit(splits);
   }
 
   /**
@@ -270,9 +296,8 @@ private UnboundedSource<T, UnboundedSource.CheckpointMark> parseSource(int index
       } catch (Exception e) {
         throw new RuntimeException("Parsing serialized source splits failed: ", e);
       }
-      Preconditions.checkArgument(
-          serializedSplits != null, "UnboundedSource object did not contain splits");
-      Preconditions.checkArgument(
+      checkArgument(serializedSplits != null, "UnboundedSource object did not contain splits");
+      checkArgument(
           index < serializedSplits.size(),
           "UnboundedSource splits contained too few splits.  Requested index was %s, size was %s",
           index,
@@ -287,66 +312,48 @@ private UnboundedSource<T, UnboundedSource.CheckpointMark> parseSource(int index
   }
 
   private static SourceSplitResponse performSplit(
-      SourceSplitRequest request, PipelineOptions options)
+      BoundedSource<?> source, PipelineOptions options, long desiredBundleSizeBytes)
       throws Exception {
-    Source<?> anySource = deserializeFromCloudSource(request.getSource().getSpec());
-    if (!(anySource instanceof BoundedSource)) {
-      throw new UnsupportedOperationException("Cannot split a non-Bounded source: " + anySource);
-    }
-    BoundedSource<?> source = (BoundedSource<?>) anySource;
-    LOG.debug("Splitting source: {}", source);
+    LOG.debug("Splitting source {} into bundles of size {}", source, desiredBundleSizeBytes);
 
-    // Produce simple independent, unsplittable bundles with no metadata attached.
-    SourceSplitResponse response = new SourceSplitResponse();
-    response.setBundles(new ArrayList<DerivedSource>());
-    SourceSplitOptions splitOptions = request.getOptions();
-    Long desiredBundleSizeBytes =
-        (splitOptions == null) ? null : splitOptions.getDesiredBundleSizeBytes();
-    if (desiredBundleSizeBytes == null) {
-      desiredBundleSizeBytes = DEFAULT_DESIRED_BUNDLE_SIZE_BYTES;
-    }
     List<? extends BoundedSource<?>> bundles =
-        source.splitIntoBundles(desiredBundleSizeBytes, options);
-
-    if (bundles.size() > MAX_NUMBER_OF_SPLITS) {
-      throw new IOException(
-          String.format(TOO_MANY_SOURCE_SPLITS_ERROR, bundles.size(), MAX_NUMBER_OF_SPLITS));
-    }
+        ((BoundedSource<?>) source).splitIntoBundles(desiredBundleSizeBytes, options);
+    List<DerivedSource> splits = new ArrayList<>(bundles.size());
 
+    // Produce simple independent, unsplittable bundles with no metadata attached.
     LOG.debug("Splitting produced {} bundles", bundles.size());
     for (BoundedSource<?> split : bundles) {
       try {
         split.validate();
       } catch (Exception e) {
         throw new IllegalArgumentException(
-            "Splitting a valid source produced an invalid bundle. "
-                + "\nOriginal source: "
-                + source
-                + "\nInvalid bundle: "
-                + split,
+            String.format(
+                "Splitting a valid source produced an invalid source."
+                    + "\nOriginal source: %s\nInvalid source: %s",
+                source,
+                split),
             e);
       }
-      DerivedSource bundle = new DerivedSource();
 
-      com.google.api.services.dataflow.model.Source cloudSource =
-          serializeToCloudSource(split, options);
-      cloudSource.setDoesNotNeedSplitting(true);
-
-      bundle.setDerivationMode("SOURCE_DERIVATION_MODE_INDEPENDENT");
-      bundle.setSource(cloudSource);
-      response.getBundles().add(bundle);
+      splits.add(
+          new DerivedSource()
+              .setDerivationMode("SOURCE_DERIVATION_MODE_INDEPENDENT")
+              .setSource(serializeToCloudSource(split, options).setDoesNotNeedSplitting(true)));
     }
-    response.setOutcome("SOURCE_SPLIT_OUTCOME_SPLITTING_HAPPENED");
-    return response;
+
+    // Return all the splits in the SourceSplitResponse.
+    return new SourceSplitResponse()
+        .setBundles(splits)
+        .setOutcome("SOURCE_SPLIT_OUTCOME_SPLITTING_HAPPENED");
   }
 
-  public static Source<?> deserializeFromCloudSource(Map<String, Object> spec) throws Exception {
+  private static Source<?> deserializeFromCloudSource(Map<String, Object> spec) throws Exception {
     Source<?> source = (Source<?>) deserializeFromByteArray(
         Base64.decodeBase64(getString(spec, SERIALIZED_SOURCE)), "Source");
     try {
       source.validate();
     } catch (Exception e) {
-      LOG.error("Invalid source: " + source, e);
+      LOG.error("Invalid source: {}", source, e);
       throw e;
     }
     return source;
@@ -396,8 +403,7 @@ public static com.google.api.services.dataflow.model.Source serializeToCloudSour
           unboundedSource.generateInitialSplits(desiredNumSplits, options)) {
         encodedSplits.add(encodeBase64String(serializeToByteArray(split)));
       }
-      Preconditions.checkArgument(
-          !encodedSplits.isEmpty(), "UnboundedSources must have at least one split");
+      checkArgument(!encodedSplits.isEmpty(), "UnboundedSources must have at least one split");
       addStringList(cloudSource.getSpec(), SERIALIZED_SOURCE_SPLITS, encodedSplits);
     } else {
       throw new IllegalArgumentException("Unexpected source kind: " + source.getClass());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowApiUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowApiUtils.java
new file mode 100644
index 0000000000000..5c0d7d7da0245
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowApiUtils.java
@@ -0,0 +1,60 @@
+/*******************************************************************************
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.api.client.json.GenericJson;
+import com.google.api.client.json.JsonFactory;
+import com.google.api.client.json.JsonGenerator;
+import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.common.io.ByteStreams;
+import com.google.common.io.CountingOutputStream;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+
+/**
+ * A utility class for generic interactions with the Google Cloud Dataflow API.
+ */
+public final class DataflowApiUtils {
+  /**
+   * Determines the serialized size (in bytes) of the {@link GenericJson} object that will be
+   * serialized and sent to the Google Cloud Dataflow service API.
+   *
+   * <p>Uses only constant memory.
+   */
+  public static long computeSerializedSizeBytes(GenericJson object) throws IOException {
+    JsonFactory factory = object.getFactory();
+    if (factory == null) {
+      factory = Transport.getJsonFactory();
+    }
+
+    CountingOutputStream stream = new CountingOutputStream(ByteStreams.nullOutputStream());
+    JsonGenerator generator = null;
+    try {
+      generator = factory.createJsonGenerator(stream, StandardCharsets.UTF_8);
+      generator.serialize(object);
+      generator.close(); // also closes the stream.
+    } finally {
+      if (generator != null) {
+        generator.close();
+      }
+    }
+    return stream.getCount();
+  }
+
+  // Prevent construction of utility class.
+  private DataflowApiUtils() {}
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index ffef4a053d9d4..322845b1277b7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -41,6 +41,7 @@
 import com.google.cloud.dataflow.sdk.util.PCollectionViewWindow;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
 import com.google.cloud.dataflow.sdk.util.UserCodeException;
+import com.google.cloud.dataflow.sdk.util.Weighted;
 import com.google.cloud.dataflow.sdk.util.WeightedValue;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -106,7 +107,7 @@ public class DataflowWorker {
   private final UserCodeTimeTracker userCodeTimeTracker = new UserCodeTimeTracker();
 
   /**
-   * A weight in "bytes" for the overhead of a {@link Sized} wrapper in the cache. It is just an
+   * A weight in "bytes" for the overhead of a {@link Weighted} wrapper in the cache. It is just an
    * approximation so it is OK for it to be fairly arbitrary as long as it is nonzero.
    */
   private static final int OVERHEAD_WEIGHT = 8;
@@ -154,7 +155,7 @@ private boolean doWork(WorkItem workItem) throws IOException {
       // Populate PipelineOptions with data from work unit.
       options.setProject(workItem.getProjectId());
 
-      DataflowExecutionContext executionContext =
+      DataflowExecutionContext<?> executionContext =
           new DataflowWorkerExecutionContext(sideInputCache, options);
 
       CounterSet counters = new CounterSet();
@@ -268,9 +269,15 @@ private void handleWorkError(WorkItem workItem, WorkExecutor worker, long nextRe
     // TODO: Attach the stack trace as exception details, not to the message.
     error.setMessage(DataflowWorkerLoggingHandler.formatException(t));
 
-    reportStatus(options, "Failure", workItem, worker == null ? null : worker.getOutputCounters(),
-        worker == null ? null : worker.getOutputMetrics(), null/*sourceOperationResponse*/,
-        error == null ? null : Collections.singletonList(error), nextReportIndex);
+    reportStatus(
+        options,
+        "Failure",
+        workItem,
+        worker == null ? null : worker.getOutputCounters(),
+        worker == null ? null : worker.getOutputMetrics(),
+        null /*sourceOperationResponse*/,
+        Collections.singletonList(error),
+        nextReportIndex);
   }
 
   private void reportStatus(DataflowWorkerHarnessOptions options, String status, WorkItem workItem,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
index f66476ab8e3f7..cdace38d2d14d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
@@ -16,13 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import com.google.api.client.json.JsonFactory;
 import com.google.api.services.dataflow.model.SourceOperationRequest;
 import com.google.api.services.dataflow.model.SourceOperationResponse;
-import com.google.api.services.dataflow.model.SourceSplitResponse;
+import com.google.api.services.dataflow.model.SourceSplitRequest;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
-import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
 
@@ -34,17 +32,12 @@
 /**
  * An executor for a source operation, defined by a {@code SourceOperationRequest}.
  */
-@SuppressWarnings("resource")
 public class SourceOperationExecutor extends WorkExecutor {
   private static final Logger LOG = LoggerFactory.getLogger(SourceOperationExecutor.class);
   public static final String SPLIT_RESPONSE_TOO_LARGE_ERROR =
       "Total size of the BoundedSource objects generated by splitIntoBundles() operation is larger"
-      + " than the allowable limit. For more information, please check the corresponding FAQ"
-      + " entry at :\n"
-      + "https://cloud.google.com/dataflow/faq";
-  // This limit is only used to produce an error message. Actual current limit offered by the
-  // service may be different.
-  private static final int SOURCE_OPERATION_RESPONSE_SIZE_LIMIT_MB = 20;
+          + " than the allowable limit. For more information, please check the corresponding FAQ"
+          + " entry at https://cloud.google.com/dataflow/pipelines/troubleshooting-your-pipeline";
 
   private final PipelineOptions options;
   private final SourceOperationRequest request;
@@ -61,7 +54,12 @@ public SourceOperationExecutor(PipelineOptions options,
   @Override
   public void execute() throws Exception {
     LOG.debug("Executing source operation");
-    this.response = CustomSources.performSourceOperation(request, options);
+    SourceSplitRequest split = request.getSplit();
+    if (split != null) {
+      this.response = CustomSources.performSplit(split, options);
+    } else {
+      throw new UnsupportedOperationException("Unsupported source operation request: " + request);
+    }
     LOG.debug("Source operation execution complete");
   }
 
@@ -69,25 +67,15 @@ public SourceOperationResponse getResponse() {
     return response;
   }
 
-  static int determineSplitResponseSize(SourceOperationResponse operationResponse) {
+  static boolean isSplitResponseTooLarge(SourceOperationResponse operationResponse) {
     try {
-      SourceSplitResponse splitResponse = operationResponse.getSplit();
-      JsonFactory factory = splitResponse.getFactory();
-      if (factory == null) {
-        factory = Transport.getJsonFactory();
-      }
-      return factory.toByteArray(operationResponse).length;
-    } catch (OutOfMemoryError e) {
-      LOG.error("Got exception when trying to serialize split response: " + e.getMessage());
-      // We will go out of memory if split response is extremely large.
-      return Integer.MAX_VALUE;
+      long splitResponseSize =
+          DataflowApiUtils.computeSerializedSizeBytes(operationResponse.getSplit());
+      return splitResponseSize > CustomSources.DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES;
     } catch (IOException e) {
-      throw new RuntimeException(e);
+      /* Assume that the size is not too large, so that the actual API error is exposed. */
+      LOG.warn("Error determining the size of the split response.", e);
+      return false;
     }
   }
-
-  static boolean isSplitResponseTooLarge(SourceOperationResponse operationResponse) {
-    return determineSplitResponseSize(operationResponse)
-        >= SOURCE_OPERATION_RESPONSE_SIZE_LIMIT_MB * 1024 * 1024;
-  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
index c31a7f066f277..10a432bb3c6a4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
@@ -32,6 +32,7 @@
 import static org.hamcrest.Matchers.allOf;
 import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.lessThan;
 import static org.hamcrest.Matchers.lessThanOrEqualTo;
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
@@ -45,8 +46,8 @@
 import com.google.api.services.dataflow.model.DataflowPackage;
 import com.google.api.services.dataflow.model.DerivedSource;
 import com.google.api.services.dataflow.model.Job;
-import com.google.api.services.dataflow.model.SourceOperationRequest;
 import com.google.api.services.dataflow.model.SourceOperationResponse;
+import com.google.api.services.dataflow.model.SourceSplitOptions;
 import com.google.api.services.dataflow.model.SourceSplitRequest;
 import com.google.api.services.dataflow.model.SourceSplitResponse;
 import com.google.api.services.dataflow.model.Step;
@@ -56,16 +57,19 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.io.BoundedSource;
+import com.google.cloud.dataflow.sdk.io.CountingSource;
 import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
+import com.google.cloud.dataflow.sdk.runners.worker.DataflowApiUtils;
 import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory;
 import com.google.cloud.dataflow.sdk.runners.worker.StreamingDataflowWorker.ReaderCacheEntry;
 import com.google.cloud.dataflow.sdk.runners.worker.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Sample;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
@@ -102,13 +106,15 @@
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.TimeUnit;
 
+import javax.annotation.Nullable;
+
 /**
  * Tests for {@code BasicSerializableSourceFormat}.
  */
 @RunWith(JUnit4.class)
 public class CustomSourcesTest {
-  @Rule
-  public ExpectedException expectedException = ExpectedException.none();
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+  @Rule public ExpectedLogs logged = ExpectedLogs.none(CustomSources.class);
 
   static class TestIO {
     public static Read fromRange(int from, int to) {
@@ -263,7 +269,7 @@ public void testSplitAndReadBundlesBack() throws Exception {
     for (int i = 0; i < 10; ++i) {
       assertEquals(valueInGlobalWindow(10 + i), elems.get(i));
     }
-    SourceSplitResponse response = performSplit(source, options);
+    SourceSplitResponse response = performSplit(source, options, null /*desiredBundleSizeBytes*/);
     assertEquals("SOURCE_SPLIT_OUTCOME_SPLITTING_HAPPENED", response.getOutcome());
     List<DerivedSource> bundles = response.getBundles();
     assertEquals(5, bundles.size());
@@ -480,12 +486,13 @@ public void testSplittingProducedInvalidSource() throws Exception {
         translateIOToCloudSource(new SourceProducingInvalidSplits("original", null), options);
 
     expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage(allOf(
-        containsString("Splitting a valid source produced an invalid bundle"),
-        containsString("original"),
-        containsString("badBundle")));
+    expectedException.expectMessage(
+        allOf(
+            containsString("Splitting a valid source produced an invalid source"),
+            containsString("original"),
+            containsString("badBundle")));
     expectedException.expectCause(hasMessage(containsString("intentionally invalid")));
-    performSplit(cloudSource, options);
+    performSplit(cloudSource, options, null /*desiredBundleSizeBytes*/);
   }
 
   private static class FailingReader extends BoundedSource.BoundedReader<Integer> {
@@ -596,13 +603,17 @@ private static com.google.api.services.dataflow.model.Source stepToCloudSource(S
   }
 
   private static SourceSplitResponse performSplit(
-      com.google.api.services.dataflow.model.Source source, PipelineOptions options)
-      throws Exception {
+      com.google.api.services.dataflow.model.Source source,
+      PipelineOptions options,
+      @Nullable Long desiredBundleSizeBytes)
+          throws Exception {
     SourceSplitRequest splitRequest = new SourceSplitRequest();
     splitRequest.setSource(source);
-    SourceOperationRequest request = new SourceOperationRequest();
-    request.setSplit(splitRequest);
-    SourceOperationResponse response = CustomSources.performSourceOperation(request, options);
+    if (desiredBundleSizeBytes != null) {
+      splitRequest.setOptions(
+          new SourceSplitOptions().setDesiredBundleSizeBytes(desiredBundleSizeBytes));
+    }
+    SourceOperationResponse response = CustomSources.performSplit(splitRequest, options);
     return response.getSplit();
   }
 
@@ -714,4 +725,27 @@ private Windmill.Counter getCounter(StreamingModeExecutionContext context, Strin
     }
     return null;
   }
+
+  @Test
+  public void testLargeSerializedSizeResplits() throws Exception {
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    // Figure out how many splits of CountingSource are needed to exceed the API limits, using an
+    // extra factor of 2 to ensure that we go over the limits.
+    BoundedSource<Long> justForSizing = CountingSource.upTo(1000000L);
+    long size =
+        DataflowApiUtils.computeSerializedSizeBytes(
+            translateIOToCloudSource(justForSizing, options));
+    long numberToSplitToExceedLimit =
+        2 * CustomSources.DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES / size;
+
+    // Generate a CountingSource and split it into the desired number of splits
+    // (desired size = 8 bytes, 1 long), triggering the re-split with a larger bundle size.
+    com.google.api.services.dataflow.model.Source source =
+        translateIOToCloudSource(CountingSource.upTo(numberToSplitToExceedLimit), options);
+    SourceSplitResponse split = performSplit(source, options, 8L);
+    logged.verifyWarn("too large for the Google Cloud Dataflow API");
+    logged.verifyWarn(String.format("%d bundles", numberToSplitToExceedLimit));
+    assertThat((long) split.getBundles().size(), lessThan(numberToSplitToExceedLimit));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
index afafa1c285b30..6b11af91b62d8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
@@ -14,17 +14,16 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static org.hamcrest.Matchers.greaterThan;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
 import static org.mockito.Matchers.argThat;
 import static org.mockito.Mockito.doThrow;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
-import com.google.api.services.dataflow.model.SourceOperationResponse;
 import com.google.api.services.dataflow.model.SourceSplitResponse;
 import com.google.api.services.dataflow.model.SourceSplitShard;
 import com.google.api.services.dataflow.model.WorkItem;
@@ -47,6 +46,7 @@
 import org.mockito.Mock;
 import org.mockito.MockitoAnnotations;
 
+import java.io.IOException;
 import java.util.Map;
 
 /** Unit tests for {@link DataflowWorker}. */
@@ -116,13 +116,11 @@ public void testStopProgressReportInCaseOfFailure() throws Exception {
   }
 
   @Test
-  public void testIsSplitResponseTooLarge() {
+  public void testIsSplitResponseTooLarge() throws IOException {
     SourceSplitResponse splitResponse = new SourceSplitResponse();
     splitResponse.setShards(
         ImmutableList.<SourceSplitShard>of(new SourceSplitShard(), new SourceSplitShard()));
-    assertTrue(
-        SourceOperationExecutor.determineSplitResponseSize(
-            new SourceOperationResponse().setSplit(splitResponse)) > 0);
+    assertThat(DataflowApiUtils.computeSerializedSizeBytes(splitResponse), greaterThan(0L));
   }
 
   @Test

From da4f771142f46c5e6d34b2a0dfeea425a8244f26 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 1 Feb 2016 15:18:38 -0800
Subject: [PATCH 1365/1541] DataflowWorker: elide some for loops for unused log
 levels

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113573783
---
 .../sdk/runners/worker/DataflowWorker.java       | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
index 322845b1277b7..2b5ee083c931e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
@@ -192,15 +192,19 @@ private boolean doWork(WorkItem workItem) throws IOException {
         }
       }
 
-      // Log all counter values for debugging purposes.
-      for (Counter<?> counter : counters) {
-        LOG.trace("COUNTER {}.", counter);
+      // Log counters for debugging purposes, if TRACE logging is on.
+      if (LOG.isTraceEnabled()) {
+        for (Counter<?> counter : counters) {
+          LOG.trace("COUNTER {}.", counter);
+        }
       }
 
-      // Log all metrics for debugging purposes.
+      // Log metrics for debugging purposes, if TRACE logging is on.
       Collection<Metric<?>> metrics = worker.getOutputMetrics();
-      for (Metric<?> metric : metrics) {
-        LOG.trace("METRIC {}: {}", metric.getName(), metric.getValue());
+      if (LOG.isTraceEnabled()) {
+        for (Metric<?> metric : metrics) {
+          LOG.trace("METRIC {}: {}", metric.getName(), metric.getValue());
+        }
       }
 
       // Report job success.

From 4638244cc797617fbbc286e61929008ba45859e3 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 1 Feb 2016 15:42:27 -0800
Subject: [PATCH 1366/1541] BigQueryReader: simplify and remove some redundant
 code

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113576126
---
 .../sdk/runners/worker/BigQueryReader.java    | 31 ++------------
 .../runners/worker/BigQueryReaderFactory.java | 40 +++++++++----------
 .../runners/worker/BigQueryReaderTest.java    |  2 +-
 3 files changed, 23 insertions(+), 50 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
index 617e5dc2f5b08..de726712a2678 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
@@ -21,9 +21,7 @@
 import com.google.api.services.bigquery.Bigquery;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
-import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
 import com.google.cloud.dataflow.sdk.util.BigQueryTableRowIterator;
-import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.common.annotations.VisibleForTesting;
@@ -60,38 +58,17 @@ private BigQueryReader(TableReference tableRef, String query,  String projectId,
    * Returns a {@code BigQueryReader} that uses the specified client to read from the specified
    * table.
    */
-  public static BigQueryReader fromTable(TableReference tableRef, Bigquery bigQueryClient) {
+  static BigQueryReader fromTable(TableReference tableRef, Bigquery bigQueryClient) {
     return new BigQueryReader(tableRef, null, null, bigQueryClient, null);
   }
 
-  /**
-   * Returns a {@code BigQueryReader} that reads from the specified table. The {@link Bigquery}
-   * client is constructed at runtime from the specified options.
-   */
-  public static BigQueryReader fromTableWithOptions(
-      TableReference tableRef, BigQueryOptions bigQueryOptions) {
-    Bigquery client = Transport.newBigQueryClient(bigQueryOptions).build();
-    return new BigQueryReader(tableRef, null, null, client, null);
-  }
-
   /**
    * Returns a {@code BigQueryReader} that uses the specified client to read the results from
    * executing the specified query in the specified project.
    */
-  public static BigQueryReader fromQuery(String query, String projectId, Bigquery bigQueryClient) {
-    return new BigQueryReader(null, query, projectId, bigQueryClient, true);
-  }
-
-  /**
-   * Returns a {@code BigQueryReader} that reads the results from executing the specified query in
-   * the specified project. The {@link Bigquery} client is constructed at runtime from the
-   * specified options.
-   */
-  public static BigQueryReader fromQueryWithOptions(
-      String query, String projectId, BigQueryOptions bigQueryOptions,
-      @Nullable Boolean flattenResults) {
-    Bigquery client = Transport.newBigQueryClient(bigQueryOptions).build();
-    return new BigQueryReader(null, query, projectId, client, flattenResults);
+  static BigQueryReader fromQuery(
+      String query, String projectId, Bigquery bigQueryClient, boolean flatten) {
+    return new BigQueryReader(null, query, projectId, bigQueryClient, flatten);
   }
 
   public TableReference getTableRef() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
index a389aef3af7a4..06486e9a1765b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
@@ -18,7 +18,9 @@
 
 import static com.google.cloud.dataflow.sdk.util.Structs.getBoolean;
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+import static com.google.common.base.Preconditions.checkArgument;
 
+import com.google.api.services.bigquery.Bigquery;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
@@ -27,6 +29,7 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
@@ -46,32 +49,25 @@ public NativeReader<?> create(
       @Nullable CounterSet.AddCounterMutator addCounterMutator,
       @Nullable String operationName)
           throws Exception {
-    return createTyped(spec, coder, options, executionContext);
+    return createTyped(spec, options);
   }
 
-  public BigQueryReader createTyped(
-      CloudObject spec,
-      Coder<?> coder,
-      PipelineOptions options,
-      ExecutionContext executionContext) throws Exception {
+  private BigQueryReader createTyped(CloudObject spec, PipelineOptions options) throws Exception {
+    Bigquery client = Transport.newBigQueryClient(options.as(BigQueryOptions.class)).build();
+
     String query = getString(spec, PropertyNames.BIGQUERY_QUERY, null);
-    Boolean flatten = getBoolean(spec, PropertyNames.BIGQUERY_FLATTEN_RESULTS, true);
     if (query != null) {
-      GcpOptions gcpOptions = options.as(GcpOptions.class);
-      return BigQueryReader.fromQueryWithOptions(
-          query, gcpOptions.getProject(), options.as(BigQueryOptions.class), flatten);
+      String project = options.as(GcpOptions.class).getProject();
+      Boolean flatten = getBoolean(spec, PropertyNames.BIGQUERY_FLATTEN_RESULTS, true);
+      return BigQueryReader.fromQuery(query, project, client, flatten);
+    } else {
+      String tableId = getString(spec, PropertyNames.BIGQUERY_TABLE, null);
+      checkArgument(tableId != null, "Either a table or a query has to be specified");
+      String project = getString(spec, PropertyNames.BIGQUERY_PROJECT);
+      String dataset = getString(spec, PropertyNames.BIGQUERY_DATASET);
+      return BigQueryReader.fromTable(
+          new TableReference().setProjectId(project).setDatasetId(dataset).setTableId(tableId),
+          client);
     }
-
-    String tableId = getString(spec, PropertyNames.BIGQUERY_TABLE, null);
-    if (tableId != null) {
-      return BigQueryReader.fromTableWithOptions(
-          new TableReference()
-              .setProjectId(getString(spec, PropertyNames.BIGQUERY_PROJECT))
-              .setDatasetId(getString(spec, PropertyNames.BIGQUERY_DATASET))
-              .setTableId(tableId),
-          options.as(BigQueryOptions.class));
-    }
-
-    throw new IllegalArgumentException("Either a table or a query has to be specified");
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
index 55acaf834f71b..e30413c5a3135 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
@@ -562,7 +562,7 @@ public void testReadQuery() throws Exception {
     setUpMockQuery();
 
     Bigquery bigQueryClient = new Bigquery(mockTransport, Transport.getJsonFactory(), null);
-    BigQueryReader reader = BigQueryReader.fromQuery(QUERY, PROJECT_ID, bigQueryClient);
+    BigQueryReader reader = BigQueryReader.fromQuery(QUERY, PROJECT_ID, bigQueryClient, true);
     BigQueryReader.BigQueryReaderIterator iterator = reader.iterator();
 
     assertTrue(iterator.hasNext());

From ea0e7bb62c6911b5ef71bbb79b6a3a05c8812895 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 1 Feb 2016 19:07:19 -0800
Subject: [PATCH 1367/1541] Create internal-only classifier for Dataflow SDK

This creates a valid module which the worker package
can depend on so it can share Guava objects between
itself and the SDK module until the worker package
is fully separated.

Users should not depend on this classifier as
it will be removed.

This is towards ASF Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113593122
---
 sdk/pom.xml    | 21 +++++++++++++++++++--
 worker/pom.xml |  1 +
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 89cd29ee88a68..4e6cb6c7777d0 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -289,7 +289,24 @@
         <artifactId>maven-shade-plugin</artifactId>
         <version>2.3</version>
         <executions>
-          <!-- In the first phase, we pick dependencies and relocate them. -->
+          <!-- In the first phase, we create a special classifier for a
+               jar with no relocations. This classifier should not
+               be depended on as it is considered deprecated and will
+               be removed. TODO: Remove once worker module does not
+               share Guava dependency. -->
+          <execution>
+            <id>internal-only</id>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <shadedArtifactAttached>true</shadedArtifactAttached>
+              <shadedClassifierName>internal-only</shadedClassifierName>
+            </configuration>
+          </execution>
+
+          <!-- In the second phase, we pick dependencies and relocate them. -->
           <execution>
             <id>bundle-and-repackage</id>
             <phase>package</phase>
@@ -329,7 +346,7 @@
             </configuration>
           </execution>
 
-          <!-- In the second phase, we pick remaining dependencies and bundle
+          <!-- In the third phase, we pick remaining dependencies and bundle
                them without repackaging. -->
           <execution>
             <id>bundle-rest-without-repackaging</id>
diff --git a/worker/pom.xml b/worker/pom.xml
index 46def118c2827..476d3fbb2f931 100644
--- a/worker/pom.xml
+++ b/worker/pom.xml
@@ -209,6 +209,7 @@
       <groupId>com.google.cloud.dataflow</groupId>
       <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
       <version>${project.version}</version>
+      <classifier>internal-only</classifier>
     </dependency>
 
     <!-- test dependencies -->

From fc98f52f8587755b2cceb73bbcb702a59276388b Mon Sep 17 00:00:00 2001
From: Rafal Wojdyla <ravwojdyla@gmail.com>
Date: Thu, 4 Feb 2016 17:14:41 -0500
Subject: [PATCH 1368/1541] Change BQ mode to append instead of truncate

It appears that truncate mode is not supported for unbounded
PCollection.

Error message:

[ERROR] Failed to execute goal
org.codehaus.mojo:exec-maven-plugin:1.1:java (default-cli) on
project
google-cloud-dataflow-java-examples-all: An exception occured while
executing th
e Java class. null: InvocationTargetException:
WriteDisposition.WRITE_TRUNCATE is not supported for unbounded
PCollections or when using tablespec functions. -> [Help 1]
[ERROR]
[ERROR] To see the full stack trace of the errors, re-run Maven with
the
-e switch.
[ERROR] Re-run Maven using the -X switch to enable full debug
logging.
[ERROR]
[ERROR] For more information about the errors and possible
solutions,
please read the following articles:
[ERROR] [Help 1]
http://cwiki.apache.org/confluence/display/MAVEN/MojoExecutionException
---
 .../com/google/cloud/dataflow/examples/WindowedWordCount.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
index dddab3a6927f2..2adac55627310 100644
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
+++ b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
@@ -251,7 +251,7 @@ public static void main(String[] args) throws IOException {
           .to(getTableReference(options))
           .withSchema(getSchema())
           .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
-          .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
+          .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND));
 
     PipelineResult result = pipeline.run();
 

From 47178ec5c26993141a8f5149e47e3a703365711b Mon Sep 17 00:00:00 2001
From: Dan Halperin <dhalperi@google.com>
Date: Tue, 9 Feb 2016 21:48:11 -0800
Subject: [PATCH 1369/1541] Disable surefire forking in Travis-CI

Attempt to fix build issues.

See: https://github.com/travis-ci/travis-ci/issues/3396
---
 .travis.yml | 35 ++++++++++++-----------------------
 1 file changed, 12 insertions(+), 23 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index b12caf64d4342..52e1d3a5cbd2b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,29 +9,18 @@ notifications:
     on_success: change
     on_failure: always
 
-os:
-  - linux
-  - osx
-
-env:
-  matrix:
-    - CUSTOM_JDK="default"
-    - CUSTOM_JDK="oraclejdk8"
-    - CUSTOM_JDK="oraclejdk7"
-    - CUSTOM_JDK="openjdk7"
-
 matrix:
-  exclude:
-     # On OSX, run with default JDK only.
-     - os: osx
-       env: CUSTOM_JDK="oraclejdk8"
-     - os: osx
-       env: CUSTOM_JDK="oraclejdk7"
-     - os: osx
-       env: CUSTOM_JDK="openjdk7"
-     # On Linux, run with specific JDKs only.
-     - os: linux
-       env: CUSTOM_JDK="default"
+  include:
+    # On OSX, run with default JDK only.
+    - os: osx
+      env: MAVEN_OVERRIDE=""
+    # On Linux, run with specific JDKs only.
+    - os: linux
+      env: CUSTOM_JDK="oraclejdk8" MAVEN_OVERRIDE="-DforkCount=0"
+    - os: linux
+      env: CUSTOM_JDK="oraclejdk7" MAVEN_OVERRIDE="-DforkCount=0"
+    - os: linux
+      env: CUSTOM_JDK="openjdk7" MAVEN_OVERRIDE="-DforkCount=0"
 
 before_install:
   - if [ "$TRAVIS_OS_NAME" == "osx" ]; then export JAVA_HOME=$(/usr/libexec/java_home); fi
@@ -42,5 +31,5 @@ install:
 
 script:
   - travis_retry mvn versions:set -DnewVersion=manual_build
-  - travis_retry mvn install -U
+  - travis_retry mvn $MAVEN_OVERRIDE install -U
   - travis_retry travis/test_wordcount.sh

From 0166accb000f0ffa8748e47918194cb1929295ec Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 1 Feb 2016 20:10:06 -0800
Subject: [PATCH 1370/1541] Move Dataflow worker mains to worker maven module

This is towards ASF Beam.
----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113595976
---
 .../sdk/options/PipelineOptionsFactory.java   |   11 +-
 .../worker/DataflowWorkProgressUpdater.java   |  128 --
 .../sdk/runners/worker/DataflowWorker.java    |  474 -------
 .../runners/worker/DataflowWorkerHarness.java |  375 ------
 .../worker/KeyTokenInvalidException.java      |   38 +
 .../sdk/runners/worker/ReaderCacheEntry.java  |   32 +
 .../worker/StreamingDataflowWorker.java       | 1050 ---------------
 .../worker/StreamingModeExecutionContext.java |    2 +-
 .../runners/worker/WindmillStateReader.java   |    4 +-
 .../runners/dataflow/CustomSourcesTest.java   |    2 +-
 .../DataflowWorkProgressUpdaterTest.java      |  479 -------
 .../worker/DataflowWorkerHarnessTest.java     |  301 -----
 .../runners/worker/DataflowWorkerTest.java    |  162 ---
 .../worker/KeyTokenInvalidExceptionTest.java  |   37 +
 .../worker/StreamingDataflowWorkerTest.java   | 1182 -----------------
 .../StreamingModeExecutionContextTest.java    |    1 -
 .../worker/WindmillStateReaderTest.java       |    4 +-
 worker/pom.xml                                |   18 +-
 18 files changed, 134 insertions(+), 4166 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyTokenInvalidException.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderCacheEntry.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyTokenInvalidExceptionTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
index bbdc8d5406808..e77b89f9a4ece 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
@@ -19,7 +19,6 @@
 import com.google.cloud.dataflow.sdk.options.Validation.Required;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunnerRegistrar;
-import com.google.cloud.dataflow.sdk.runners.worker.DataflowWorkerHarness;
 import com.google.cloud.dataflow.sdk.util.StringUtils;
 import com.google.cloud.dataflow.sdk.util.common.ReflectHelpers;
 import com.google.common.base.Function;
@@ -819,14 +818,14 @@ static List<PropertyDescriptor> getPropertyDescriptors(
   }
 
   /**
-   * Creates a set of {@link DataflowWorkerHarnessOptions} based of a set of known system
-   * properties. This is meant to only be used from the {@link DataflowWorkerHarness} as a method to
+   * Creates a set of Dataflow worker harness options based of a set of known system
+   * properties. This is meant to only be used from the Dataflow worker harness as a method to
    * bootstrap the worker harness.
    *
    * <p>For internal use only.
    *
    * @return A {@link DataflowWorkerHarnessOptions} object configured for the
-   *         {@link DataflowWorkerHarness}.
+   *         Dataflow worker harness.
    */
   public static DataflowWorkerHarnessOptions createFromSystemPropertiesInternal()
       throws IOException {
@@ -835,11 +834,11 @@ public static DataflowWorkerHarnessOptions createFromSystemPropertiesInternal()
 
   /**
    * Creates a set of {@link DataflowWorkerHarnessOptions} based of a set of known system
-   * properties. This is meant to only be used from the {@link DataflowWorkerHarness} as a method to
+   * properties. This is meant to only be used from the Dataflow worker harness as a method to
    * bootstrap the worker harness.
    *
    * @return A {@link DataflowWorkerHarnessOptions} object configured for the
-   *         {@link DataflowWorkerHarness}.
+   *         Dataflow worker harness.
    * @deprecated for internal use only
    */
   @Deprecated
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
deleted file mode 100644
index c1e99bd671b46..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdater.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.DataflowWorker.buildStatus;
-import static com.google.cloud.dataflow.sdk.runners.worker.DataflowWorker.uniqueId;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toDynamicSplitRequest;
-import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudDuration;
-import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudTime;
-import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudDuration;
-
-import com.google.api.services.dataflow.model.ApproximateSplitRequest;
-import com.google.api.services.dataflow.model.WorkItem;
-import com.google.api.services.dataflow.model.WorkItemServiceState;
-import com.google.api.services.dataflow.model.WorkItemStatus;
-import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
-import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
-import com.google.cloud.dataflow.sdk.util.common.worker.WorkProgressUpdater;
-
-import org.joda.time.Duration;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import javax.annotation.concurrent.NotThreadSafe;
-
-/**
- * DataflowWorkProgressUpdater implements the WorkProgressUpdater
- * interface for the Cloud Dataflow system.
- */
-@NotThreadSafe
-public class DataflowWorkProgressUpdater extends WorkProgressUpdater {
-  private static final Logger LOG = LoggerFactory.getLogger(DataflowWorkProgressUpdater.class);
-
-  /** The Dataflow Worker WorkItem client. */
-  private final DataflowWorker.WorkUnitClient workUnitClient;
-
-  /** The WorkItem for which work progress updates are sent. */
-  private final WorkItem workItem;
-
-  /** Options specifying information about the pipeline run by the worker.*/
-  private final DataflowWorkerHarnessOptions options;
-
-  /** The index to use for the next report sent for the updater's work item. */
-  private long nextReportIndex;
-
-  public DataflowWorkProgressUpdater(WorkItem workItem, WorkExecutor worker,
-      DataflowWorker.WorkUnitClient workUnitClient, DataflowWorkerHarnessOptions options) {
-    super(worker);
-    this.workItem = workItem;
-    this.workUnitClient = workUnitClient;
-    this.options = options;
-    this.nextReportIndex = workItem.getInitialReportIndex();
-  }
-
-  @Override
-  protected String workString() {
-    return uniqueId(workItem);
-  }
-
-  @Override
-  protected long getWorkUnitLeaseExpirationTimestamp() {
-    return getLeaseExpirationTimestamp(workItem);
-  }
-
-  @Override
-  protected long getWorkUnitSuggestedReportingInterval(){
-    return fromCloudDuration(workItem.getReportStatusInterval()).getMillis();
-  }
-
-  @Override
-  protected void reportProgressHelper() throws Exception {
-    WorkItemStatus status = buildStatus(workItem, false/*completed*/, worker.getOutputCounters(),
-        worker.getOutputMetrics(), options, worker.getWorkerProgress(), dynamicSplitResultToReport,
-        null/*sourceOperationResponse*/, null/*errors*/,
-        getNextReportIndex(), worker.getWorkerStateSamplerInfo());
-    status.setRequestedLeaseDuration(toCloudDuration(Duration.millis(requestedLeaseDurationMs)));
-
-    WorkItemServiceState result = workUnitClient.reportWorkItemStatus(status);
-    if (result != null) {
-      // Resets state after a successful progress report.
-      dynamicSplitResultToReport = null;
-      nextReportIndex = result.getNextReportIndex();
-
-      progressReportIntervalMs = nextProgressReportInterval(
-          fromCloudDuration(result.getReportStatusInterval()).getMillis(),
-          leaseRemainingTime(getLeaseExpirationTimestamp(result)));
-
-      ApproximateSplitRequest suggestedStopPoint = result.getSplitRequest();
-      if (suggestedStopPoint != null) {
-        LOG.info("Proposing dynamic split of work unit {} at {}", workString(), suggestedStopPoint);
-        dynamicSplitResultToReport = worker.requestDynamicSplit(
-            toDynamicSplitRequest(suggestedStopPoint));
-      }
-    }
-  }
-
-  /** Returns the given work unit's lease expiration timestamp. */
-  private long getLeaseExpirationTimestamp(WorkItem workItem) {
-    return fromCloudTime(workItem.getLeaseExpireTime()).getMillis();
-  }
-
-  /** Returns the given work unit service state lease expiration timestamp. */
-  private long getLeaseExpirationTimestamp(WorkItemServiceState workItemServiceState) {
-    return fromCloudTime(workItemServiceState.getLeaseExpireTime()).getMillis();
-  }
-
-  /**
-   * Returns the index to use for the next work item report for the work
-   * progress updater's work item.
-   */
-  long getNextReportIndex() {
-    return nextReportIndex;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
deleted file mode 100644
index 2b5ee083c931e..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorker.java
+++ /dev/null
@@ -1,474 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceOperationExecutor.SPLIT_RESPONSE_TOO_LARGE_ERROR;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceOperationExecutor.isSplitResponseTooLarge;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
-
-import com.google.api.client.googleapis.json.GoogleJsonResponseException;
-import com.google.api.services.dataflow.model.MetricStructuredName;
-import com.google.api.services.dataflow.model.MetricUpdate;
-import com.google.api.services.dataflow.model.SideInputInfo;
-import com.google.api.services.dataflow.model.SourceOperationResponse;
-import com.google.api.services.dataflow.model.Status;
-import com.google.api.services.dataflow.model.WorkItem;
-import com.google.api.services.dataflow.model.WorkItemServiceState;
-import com.google.api.services.dataflow.model.WorkItemStatus;
-import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
-import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingHandler;
-import com.google.cloud.dataflow.sdk.runners.worker.status.WorkerStatusPages;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.CloudCounterUtils;
-import com.google.cloud.dataflow.sdk.util.CloudMetricUtils;
-import com.google.cloud.dataflow.sdk.util.PCollectionViewWindow;
-import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.UserCodeException;
-import com.google.cloud.dataflow.sdk.util.Weighted;
-import com.google.cloud.dataflow.sdk.util.WeightedValue;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.Metric;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.common.cache.Cache;
-import com.google.common.cache.CacheBuilder;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import javax.annotation.Nullable;
-
-/**
- * This is a semi-abstract harness for executing WorkItem tasks in
- * Java workers. Concrete implementations need to implement a
- * WorkUnitClient.
- *
- * <p>DataflowWorker presents one public interface,
- * getAndPerformWork(), which uses the WorkUnitClient to get work,
- * execute it, and update the work.
- */
-public class DataflowWorker {
-  private static final Logger LOG = LoggerFactory.getLogger(DataflowWorker.class);
-
-  /**
-   * A client to get and update work items.
-   */
-  private final WorkUnitClient workUnitClient;
-
-  /**
-   * Pipeline options, initially provided via the constructor and
-   * partially provided via each work work unit.
-   */
-  private final DataflowWorkerHarnessOptions options;
-
-  /**
-   * A side input cache shared between all execution contexts.
-   */
-  private final Cache<PCollectionViewWindow<?>, WeightedValue<Object>> sideInputCache;
-
-  private static final int DEFAULT_STATUS_PORT = 18081;
-
-  /**
-   * Status server returning health of worker.
-   */
-  private WorkerStatusPages statusServer = WorkerStatusPages.create(DEFAULT_STATUS_PORT);
-
-  /**
-   * Tracker for user code time.
-   */
-  private final UserCodeTimeTracker userCodeTimeTracker = new UserCodeTimeTracker();
-
-  /**
-   * A weight in "bytes" for the overhead of a {@link Weighted} wrapper in the cache. It is just an
-   * approximation so it is OK for it to be fairly arbitrary as long as it is nonzero.
-   */
-  private static final int OVERHEAD_WEIGHT = 8;
-
-  private static final long MEGABYTES = 1024 * 1024;
-
-
-  public DataflowWorker(WorkUnitClient workUnitClient, DataflowWorkerHarnessOptions options) {
-    this.workUnitClient = workUnitClient;
-    this.options = options;
-    this.sideInputCache = CacheBuilder.newBuilder()
-        .maximumWeight(options.getWorkerCacheMb() * MEGABYTES) // weights are in bytes
-        .weigher(Weighers.fixedWeightKeys(OVERHEAD_WEIGHT))
-        .softValues()
-        .build();
-  }
-
-  /**
-   * Gets WorkItem and performs it; returns true if work was
-   * successfully completed.
-   *
-   * <p>getAndPerformWork may throw if there is a failure of the
-   * WorkUnitClient.
-   */
-  public boolean getAndPerformWork() throws IOException {
-    WorkItem work = workUnitClient.getWorkItem();
-    if (work == null) {
-      return false;
-    }
-    return doWork(work);
-  }
-
-  /**
-   * Performs the given work; returns true if successful.
-   *
-   * @throws IOException Only if the WorkUnitClient fails.
-   */
-  private boolean doWork(WorkItem workItem) throws IOException {
-    LOG.debug("Executing: {}", workItem);
-
-    WorkExecutor worker = null;
-    SourceOperationResponse operationResponse = null;
-    long nextReportIndex = workItem.getInitialReportIndex();
-    try {
-      // Populate PipelineOptions with data from work unit.
-      options.setProject(workItem.getProjectId());
-
-      DataflowExecutionContext<?> executionContext =
-          new DataflowWorkerExecutionContext(sideInputCache, options);
-
-      CounterSet counters = new CounterSet();
-      StateSampler sampler = null;
-
-      if (workItem.getMapTask() != null) {
-        sampler = new StateSampler(
-            workItem.getMapTask().getStageName() + "-", counters.getAddCounterMutator());
-        worker = MapTaskExecutorFactory.create(
-            options, workItem.getMapTask(), executionContext, counters, sampler);
-      } else if (workItem.getSourceOperationTask() != null) {
-        sampler = new StateSampler(
-            "source-operation-", counters.getAddCounterMutator());
-        worker = SourceOperationExecutorFactory.create(options, workItem.getSourceOperationTask());
-      } else {
-        throw new RuntimeException("Unknown kind of work item: " + workItem.toString());
-      }
-
-      sampler.addSamplingCallback(
-          new UserCodeTimeTracker.StateSamplerCallback(
-              userCodeTimeTracker, workItem.getId()));
-
-      DataflowWorkProgressUpdater progressUpdater =
-          new DataflowWorkProgressUpdater(workItem, worker, workUnitClient, options);
-      try (AutoCloseable scope = userCodeTimeTracker.scopedWork(
-              sampler.getPrefix(), workItem.getId(), counters.getAddCounterMutator())) {
-        // Nested try/finally is used to make sure worker.close() happen before scope.close().
-        try {
-          executeWork(worker, progressUpdater);
-        } finally {
-          worker.close();
-          // Grab nextReportIndex so we can use it in handleWorkError if there is an exception.
-          nextReportIndex = progressUpdater.getNextReportIndex();
-        }
-      }
-
-      // Log counters for debugging purposes, if TRACE logging is on.
-      if (LOG.isTraceEnabled()) {
-        for (Counter<?> counter : counters) {
-          LOG.trace("COUNTER {}.", counter);
-        }
-      }
-
-      // Log metrics for debugging purposes, if TRACE logging is on.
-      Collection<Metric<?>> metrics = worker.getOutputMetrics();
-      if (LOG.isTraceEnabled()) {
-        for (Metric<?> metric : metrics) {
-          LOG.trace("METRIC {}: {}", metric.getName(), metric.getValue());
-        }
-      }
-
-      // Report job success.
-      // TODO: Find out a generic way for the WorkExecutor to report work-specific results
-      // into the work update.
-      operationResponse =
-          (worker instanceof SourceOperationExecutor)
-              ? ((SourceOperationExecutor) worker).getResponse()
-              : null;
-
-      try {
-        reportStatus(
-          options, "Success", workItem, counters, metrics, operationResponse, null/*errors*/,
-          nextReportIndex);
-      } catch (GoogleJsonResponseException e) {
-        if ((operationResponse != null) && (worker instanceof SourceOperationExecutor)) {
-          if (isSplitResponseTooLarge(operationResponse)) {
-            throw new RuntimeException(SPLIT_RESPONSE_TOO_LARGE_ERROR, e);
-          }
-        }
-        throw e;
-      }
-
-      return true;
-
-    } catch (Throwable e) {
-      handleWorkError(workItem, worker, nextReportIndex, e);
-      return false;
-
-    } finally {
-      if (worker != null) {
-        try {
-          worker.close();
-        } catch (Exception exn) {
-          LOG.warn("Uncaught exception occurred during work unit shutdown:", exn);
-        }
-      }
-    }
-  }
-
-  /** Executes the work and report progress. For testing only. */
-  void executeWork(WorkExecutor worker, DataflowWorkProgressUpdater progressUpdater)
-      throws Exception {
-    progressUpdater.startReportingProgress();
-    // Blocks while executing the work.
-    try {
-      worker.execute();
-    } finally {
-      // stopReportingProgress can throw an exception if the final progress
-      // update fails. For correctness, the task must then be marked as failed.
-      progressUpdater.stopReportingProgress();
-    }
-  }
-
-
-  /** Handles the exception thrown when reading and executing the work. */
-  private void handleWorkError(WorkItem workItem, WorkExecutor worker, long nextReportIndex,
-      Throwable e) throws IOException {
-    LOG.warn("Uncaught exception occurred during work unit execution:", e);
-
-    // TODO: Look into moving the stack trace thinning
-    // into the client.
-    Throwable t = e instanceof UserCodeException ? e.getCause() : e;
-    Status error = new Status();
-    error.setCode(2); // Code.UNKNOWN.  TODO: Replace with a generated definition.
-    // TODO: Attach the stack trace as exception details, not to the message.
-    error.setMessage(DataflowWorkerLoggingHandler.formatException(t));
-
-    reportStatus(
-        options,
-        "Failure",
-        workItem,
-        worker == null ? null : worker.getOutputCounters(),
-        worker == null ? null : worker.getOutputMetrics(),
-        null /*sourceOperationResponse*/,
-        Collections.singletonList(error),
-        nextReportIndex);
-  }
-
-  private void reportStatus(DataflowWorkerHarnessOptions options, String status, WorkItem workItem,
-      @Nullable CounterSet counters, @Nullable Collection<Metric<?>> metrics,
-      @Nullable SourceOperationResponse operationResponse, @Nullable List<Status> errors,
-      long reportIndex)
-      throws IOException {
-    String message = "{} processing work item {}";
-    if (null != errors && errors.size() > 0) {
-      LOG.warn(message, status, uniqueId(workItem));
-    } else {
-      LOG.debug(message, status, uniqueId(workItem));
-    }
-    WorkItemStatus workItemStatus = buildStatus(workItem, true/*completed*/, counters, metrics,
-        options, null, null, operationResponse, errors, reportIndex);
-    workUnitClient.reportWorkItemStatus(workItemStatus);
-  }
-
-  static WorkItemStatus buildStatus(
-      WorkItem workItem,
-      boolean completed,
-      @Nullable CounterSet counters,
-      @Nullable Collection<Metric<?>> metrics,
-      DataflowWorkerHarnessOptions options,
-      @Nullable NativeReader.Progress progress,
-      @Nullable NativeReader.DynamicSplitResult dynamicSplitResult,
-      @Nullable SourceOperationResponse operationResponse,
-      @Nullable List<Status> errors,
-      long reportIndex) {
-
-    return buildStatus(workItem, completed, counters, metrics, options, progress,
-        dynamicSplitResult, operationResponse, errors, reportIndex, null);
-  }
-
-  static WorkItemStatus buildStatus(
-      WorkItem workItem,
-      boolean completed,
-      @Nullable CounterSet counters,
-      @Nullable Collection<Metric<?>> metrics,
-      DataflowWorkerHarnessOptions options,
-      @Nullable NativeReader.Progress progress,
-      @Nullable NativeReader.DynamicSplitResult dynamicSplitResult,
-      @Nullable SourceOperationResponse operationResponse,
-      @Nullable List<Status> errors,
-      long reportIndex,
-      @Nullable StateSampler.StateSamplerInfo stateSamplerInfo) {
-    WorkItemStatus status = new WorkItemStatus();
-    status.setWorkItemId(Long.toString(workItem.getId()));
-    status.setCompleted(completed);
-    status.setReportIndex(reportIndex);
-
-    List<MetricUpdate> counterUpdates = null;
-    List<MetricUpdate> metricUpdates = null;
-
-    if (counters != null) {
-      // Currently we lack a reliable exactly-once delivery mechanism for
-      // work updates, i.e. they can be retried or reordered, so sending
-      // delta updates could lead to double-counted or missed contributions.
-      // However, delta updates may be beneficial for performance.
-      // TODO: Implement exactly-once delivery and use deltas,
-      // if it ever becomes clear that deltas are necessary for performance.
-      boolean delta = false;
-      counterUpdates = CloudCounterUtils.extractCounters(counters, delta);
-    }
-    if (metrics != null) {
-      metricUpdates = CloudMetricUtils.extractCloudMetrics(metrics, options.getWorkerId());
-    }
-    List<MetricUpdate> updates = new ArrayList<>();
-    if (counterUpdates != null) {
-      updates.addAll(counterUpdates);
-    }
-    if (metricUpdates != null) {
-      updates.addAll(metricUpdates);
-    }
-    if (stateSamplerInfo != null) {
-      MetricUpdate update = new MetricUpdate();
-      update.setKind("internal");
-      MetricStructuredName name = new MetricStructuredName();
-      name.setName("state-sampler");
-      update.setName(name);
-      Map<String, Object> metric = new HashMap<String, Object>();
-      if (stateSamplerInfo.state != null) {
-        metric.put("last-state-name", stateSamplerInfo.state);
-      }
-      if (stateSamplerInfo.transitionCount != null) {
-        metric.put("num-transitions", stateSamplerInfo.transitionCount);
-      }
-      if (stateSamplerInfo.stateDurationMillis != null) {
-        metric.put("last-state-duration-ms",
-            stateSamplerInfo.stateDurationMillis);
-      }
-      update.setInternal(metric);
-      updates.add(update);
-    }
-    status.setMetricUpdates(updates);
-
-    // TODO: Provide more structure representation of error,
-    // e.g., the serialized exception object.
-    if (errors != null) {
-      status.setErrors(errors);
-    }
-
-    if (progress != null) {
-      status.setReportedProgress(readerProgressToCloudProgress(progress));
-    }
-    if (dynamicSplitResult instanceof NativeReader.DynamicSplitResultWithPosition) {
-      NativeReader.DynamicSplitResultWithPosition asPosition =
-          (NativeReader.DynamicSplitResultWithPosition) dynamicSplitResult;
-      status.setStopPosition(toCloudPosition(asPosition.getAcceptedPosition()));
-    } else if (dynamicSplitResult instanceof CustomSources.BoundedSourceSplit) {
-      status.setDynamicSourceSplit(
-          CustomSources.toSourceSplit(
-              (CustomSources.BoundedSourceSplit<?>) dynamicSplitResult, options));
-    } else if (dynamicSplitResult != null) {
-      throw new IllegalArgumentException(
-          "Unexpected type of dynamic split result: " + dynamicSplitResult);
-    }
-
-    if (workItem.getSourceOperationTask() != null) {
-      status.setSourceOperationResponse(operationResponse);
-    }
-
-    return status;
-  }
-
-  static String uniqueId(WorkItem work) {
-    return work.getProjectId() + ";" + work.getJobId() + ";" + work.getId();
-  }
-
-  /**
-   * Abstract base class describing a client for WorkItem work units.
-   */
-  public abstract static class WorkUnitClient {
-    /**
-     * Returns a new WorkItem unit for this Worker to work on or null
-     * if no work item is available.
-     */
-    public abstract WorkItem getWorkItem() throws IOException;
-
-    /**
-     * Reports a {@link WorkItemStatus} for an assigned {@link WorkItem}.
-     *
-     * @param workItemStatus the status to report
-     * @return a {@link WorkItemServiceState} (e.g. a new stop position)
-     */
-    public abstract WorkItemServiceState reportWorkItemStatus(WorkItemStatus workItemStatus)
-        throws IOException;
-  }
-
-  /**
-   * A {@link DataflowExecutionContext} that provides a caching side input reader using
-   * the worker's shared cache.
-   */
-  private static class DataflowWorkerExecutionContext extends BatchModeExecutionContext {
-
-    private final Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache;
-    private final PipelineOptions options;
-
-    public DataflowWorkerExecutionContext(
-        Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache, PipelineOptions options) {
-      super(options);
-      this.cache = cache;
-      this.options = options;
-    }
-
-    @Override
-    protected SideInputReader getSideInputReader(Iterable<? extends SideInputInfo> sideInputInfos)
-      throws Exception {
-      return CachingSideInputReader.of(
-          DataflowSideInputReader.of(sideInputInfos, options, this),
-          cache);
-    }
-
-    @Override
-    protected SideInputReader getSideInputReaderForViews(
-        Iterable<? extends PCollectionView<?>> sideInputViews) {
-      throw new UnsupportedOperationException(
-        "Cannot call getSideInputReaderForViews for batch DataflowWorker: "
-        + "the MapTask specification should have had SideInputInfo descriptors "
-        + "for each side input, and a SideInputReader provided via getSideInputReader");
-    }
-  }
-
-  /**
-   * Runs the status server to report worker health on the specified port.
-   */
-  public void runStatusServer() {
-    statusServer.start();
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
deleted file mode 100644
index 9de057fcd4195..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarness.java
+++ /dev/null
@@ -1,375 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudDuration;
-import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudTime;
-
-import com.google.api.client.util.BackOff;
-import com.google.api.client.util.BackOffUtils;
-import com.google.api.client.util.Sleeper;
-import com.google.api.services.dataflow.Dataflow;
-import com.google.api.services.dataflow.model.LeaseWorkItemRequest;
-import com.google.api.services.dataflow.model.LeaseWorkItemResponse;
-import com.google.api.services.dataflow.model.ReportWorkItemStatusRequest;
-import com.google.api.services.dataflow.model.ReportWorkItemStatusResponse;
-import com.google.api.services.dataflow.model.WorkItem;
-import com.google.api.services.dataflow.model.WorkItemServiceState;
-import com.google.api.services.dataflow.model.WorkItemStatus;
-import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingInitializer;
-import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
-import com.google.cloud.dataflow.sdk.util.GcsIOChannelFactory;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.IntervalBoundedExponentialBackOff;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.cloud.dataflow.sdk.util.common.worker.WorkProgressUpdater;
-import com.google.common.collect.ImmutableList;
-
-import org.joda.time.DateTime;
-import org.joda.time.Duration;
-import org.joda.time.Interval;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.io.PrintStream;
-import java.lang.Thread.UncaughtExceptionHandler;
-import java.util.Collections;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutorService;
-
-import javax.annotation.concurrent.ThreadSafe;
-
-/**
- * This is a harness for executing WorkItem tasks in Java workers.
- *
- * <p>The worker fetches WorkItem units from the Dataflow Service.
- * When the work is complete, the program sends results via the worker service API.
- *
- * <p>Returns status code 0 on successful completion, 1 on any uncaught failures.
- *
- * <p>TODO: add support for VM initialization via config.
- * During initialization, we should take a configuration that specifies
- * an initialization function, allowing user code to run on VM startup.
- */
-public class DataflowWorkerHarness {
-  private static final Logger LOG = LoggerFactory.getLogger(DataflowWorkerHarness.class);
-
-  private static final String APPLICATION_NAME = "DataflowWorkerHarness";
-
-  // ExponentialBackOff parameters for the task retry strategy. Visible for testing.
-  static final int BACKOFF_INITIAL_INTERVAL_MILLIS = 5000;  // 5 second
-  static final int BACKOFF_MAX_INTERVAL_MILLIS = 5 * 60 * 1000;  // 5 min.
-
-  /**
-   * This uncaught exception handler logs the {@link Throwable} to the logger, {@link System#err}
-   * and exits the application with status code 1.
-   */
-  static class WorkerUncaughtExceptionHandler implements UncaughtExceptionHandler {
-    static final WorkerUncaughtExceptionHandler INSTANCE = new WorkerUncaughtExceptionHandler();
-
-    // Cache the original System.err, because the logging infrastructure modifies it and redirects
-    // it to a logger.
-    private static final PrintStream originalStandardError = System.err;
-
-    @Override
-    public void uncaughtException(Thread thread, Throwable e) {
-      try {
-        LOG.error("Uncaught exception in main thread. Exiting with status code 1.", e);
-        System.err.println("Uncaught exception in main thread. Exiting with status code 1.");
-        e.printStackTrace();
-      } catch (Throwable t) {
-        originalStandardError.println(
-            "Uncaught exception in main thread. Exiting with status code 1.");
-        e.printStackTrace(originalStandardError);
-
-        originalStandardError.println(
-            "UncaughtExceptionHandler caused another exception to be thrown, as follows:");
-        t.printStackTrace(originalStandardError);
-      } finally {
-        System.exit(1);
-      }
-    }
-  }
-
-  /**
-   * Helper for initializing the BackOff used for retries.
-   */
-  private static BackOff createBackOff() {
-    return new IntervalBoundedExponentialBackOff(
-            BACKOFF_MAX_INTERVAL_MILLIS, BACKOFF_INITIAL_INTERVAL_MILLIS);
-  }
-
-  /**
-   * Fetches and processes work units from the Dataflow service.
-   */
-  public static void main(String[] args) throws Exception {
-    Thread.setDefaultUncaughtExceptionHandler(WorkerUncaughtExceptionHandler.INSTANCE);
-    DataflowWorkerLoggingInitializer.initialize();
-
-    DataflowWorkerHarnessOptions pipelineOptions =
-        PipelineOptionsFactory.createFromSystemPropertiesInternal();
-    DataflowWorkerLoggingInitializer.configure(pipelineOptions);
-
-    final Sleeper sleeper = Sleeper.DEFAULT;
-    final DataflowWorker worker = create(pipelineOptions);
-
-    worker.runStatusServer();
-
-    processWork(pipelineOptions, worker, sleeper);
-  }
-
-  /**
-   * A thread that repeatedly fetches and processes work units from the Dataflow service.
-   */
-  private static class WorkerThread implements Callable<Boolean> {
-    // sleeper is used to sleep the appropriate amount of time
-    WorkerThread(final DataflowWorker worker, final Sleeper sleeper) {
-      this.worker = worker;
-      this.sleeper = sleeper;
-      this.backOff = createBackOff();
-    }
-
-    @Override
-    public Boolean call() {
-      boolean success = true;
-      try {
-        do { // We loop getting and processing work.
-          success = doWork();
-          if (success) {
-            backOff.reset();
-          }
-          // Sleeping a while if there is a problem with the work, then go on with the next work.
-        } while (success || BackOffUtils.next(sleeper, backOff));
-      } catch (IOException e) {  // Failure of BackOff.
-        LOG.error("Already tried several attempts at working on tasks. Aborting.", e);
-      } catch (InterruptedException e) {
-        LOG.error("Interrupted during thread execution or sleep.", e);
-      } catch (Throwable t) {
-        LOG.error("Thread {} died.", Thread.currentThread().getId(), t);
-      }
-      return false;
-    }
-
-    private boolean doWork() {
-      try {
-        LOG.debug("Thread starting getAndPerformWork.");
-        boolean success = worker.getAndPerformWork();
-        LOG.debug("{} processing one WorkItem.", success ? "Finished" : "Failed");
-        return success;
-      } catch (IOException e) {  // If there is a problem getting work.
-        LOG.debug("There was a problem getting work.", e);
-        return false;
-      } catch (Exception e) {  // These exceptions are caused by bugs within the SDK
-        LOG.error("There was an unhandled error caused by the Dataflow SDK.", e);
-        return false;
-      }
-    }
-
-    private final DataflowWorker worker;
-    private final Sleeper sleeper;
-    private final BackOff backOff;
-  }
-
-  // Visible for testing.
-  static void processWork(DataflowWorkerHarnessOptions pipelineOptions,
-      final DataflowWorker worker, Sleeper sleeper) throws InterruptedException {
-    int numThreads = chooseNumberOfThreads(pipelineOptions);
-    ExecutorService executor = pipelineOptions.getExecutorService();
-    final List<Callable<Boolean>> tasks = new LinkedList<>();
-
-    LOG.debug("Starting {} worker threads", numThreads);
-    // We start the appropriate number of threads.
-    for (int i = 0; i < numThreads; ++i) {
-      tasks.add(new WorkerThread(worker, sleeper));
-    }
-
-    LOG.debug("Waiting for {} worker threads", numThreads);
-    // We wait forever unless there is a big problem.
-    executor.invokeAll(tasks);
-    LOG.error("All threads died.");
-  }
-
-  static DataflowWorker create(DataflowWorkerHarnessOptions options) {
-    DataflowWorkerLoggingMDC.setJobId(options.getJobId());
-    DataflowWorkerLoggingMDC.setWorkerId(options.getWorkerId());
-    options.setAppName(APPLICATION_NAME);
-
-    // Configure standard IO factories.
-    IOChannelUtils.setIOFactory("gs", new GcsIOChannelFactory(options));
-
-    DataflowWorkUnitClient client = DataflowWorkUnitClient.fromOptions(options);
-    return new DataflowWorker(client, options);
-  }
-
-  private static int chooseNumberOfThreads(DataflowWorkerHarnessOptions pipelineOptions) {
-    if (pipelineOptions.getNumberOfWorkerHarnessThreads() != 0) {
-      return pipelineOptions.getNumberOfWorkerHarnessThreads();
-    }
-    return Math.max(Runtime.getRuntime().availableProcessors(), 1);
-  }
-
-  /**
-   * A Dataflow WorkUnit client that fetches WorkItems from the Dataflow service.
-   */
-  @ThreadSafe
-  static class DataflowWorkUnitClient extends DataflowWorker.WorkUnitClient {
-    /**
-     * Work items are reported as complete using this class's reportWorkItemStatus() method
-     * on the same thread that requested the item using getWorkItem().
-     * This thread local variable is used to tag the current thread with the stage start time
-     * during getWorkItem() so that the elapsed execution time can be easily determined in
-     * reportWorkItemStatus(). A similar thread-local mechanism is used in DataflowWorkerLoggingMDC
-     * to track other metadata about the current operation being executed.
-     */
-    private static final ThreadLocal<DateTime> stageStartTime = new ThreadLocal<>();
-
-    private final Dataflow dataflow;
-    private final DataflowWorkerHarnessOptions options;
-
-    /**
-     * Creates a client that fetches WorkItems from the Dataflow service.
-     *
-     * @param options The pipeline options.
-     * @return A WorkItemClient that fetches WorkItems from the Dataflow service.
-     */
-    static DataflowWorkUnitClient fromOptions(DataflowWorkerHarnessOptions options) {
-      return new DataflowWorkUnitClient(
-          Transport.newDataflowClient(options).build(),
-          options);
-    }
-
-    /**
-     * Package private constructor for testing.
-     */
-    DataflowWorkUnitClient(Dataflow dataflow, DataflowWorkerHarnessOptions options) {
-      this.dataflow = dataflow;
-      this.options = options;
-    }
-
-    /**
-     * Gets a {@link WorkItem} from the Dataflow service, or returns null if no work was found.
-     *
-     * <p>If work is returned, the calling thread should call reportWorkItemStatus after completing
-     * it and before requesting another work item.
-     */
-    @Override
-    public WorkItem getWorkItem() throws IOException {
-      LeaseWorkItemRequest request = new LeaseWorkItemRequest();
-      request.setFactory(Transport.getJsonFactory());
-      request.setWorkItemTypes(ImmutableList.<String>of(
-          "map_task", "seq_map_task", "remote_source_task"));
-      // All remote sources require the "remote_source" capability. Dataflow's
-      // custom sources are further tagged with the format "custom_source".
-      request.setWorkerCapabilities(ImmutableList.<String>of(
-          options.getWorkerId(), "remote_source", PropertyNames.CUSTOM_SOURCE_FORMAT));
-      request.setWorkerId(options.getWorkerId());
-      request.setCurrentWorkerTime(toCloudTime(DateTime.now()));
-
-      // This shouldn't be necessary, but a valid cloud duration string is
-      // required by the Google API parsing framework.  TODO: Fix the framework
-      // so that an empty or not-present string can be used as a default value.
-      request.setRequestedLeaseDuration(
-          toCloudDuration(Duration.millis(WorkProgressUpdater.DEFAULT_LEASE_DURATION_MILLIS)));
-
-      LOG.debug("Leasing work: {}", request);
-
-      LeaseWorkItemResponse response = dataflow.projects().jobs().workItems().lease(
-          options.getProject(), options.getJobId(), request).execute();
-      LOG.debug("Lease work response: {}", response);
-
-      List<WorkItem> workItems = response.getWorkItems();
-      if (workItems == null || workItems.isEmpty()) {
-        // We didn't lease any work.
-        return null;
-      } else if (workItems.size() > 1) {
-        throw new IOException(
-            "This version of the SDK expects no more than one work item from the service: "
-                + response);
-      }
-
-      WorkItem work = response.getWorkItems().get(0);
-      if (work == null || work.getId() == null) {
-        return null;
-      }
-
-      // Capture the work item's stage name.
-      if (work.getMapTask() != null) {
-        String stage = work.getMapTask().getStageName();
-        DataflowWorkerLoggingMDC.setStageName(stage);
-        LOG.info("Starting MapTask stage {}", stage);
-      } else if (work.getSeqMapTask() != null) {
-        String stage = work.getSeqMapTask().getStageName();
-        DataflowWorkerLoggingMDC.setStageName(stage);
-        LOG.info("Starting SeqMapTask stage {}", stage);
-      } else {
-        // Only MapTask and SeqMapTask currently have a stage name.
-        DataflowWorkerLoggingMDC.setStageName(null);
-      }
-
-      stageStartTime.set(DateTime.now());
-      DataflowWorkerLoggingMDC.setWorkId(Long.toString(work.getId()));
-      // Looks like the work's a'ight.
-      return work;
-    }
-
-    /**
-     * Reports the status of the most recently requested work item.
-     */
-    @Override
-    public WorkItemServiceState reportWorkItemStatus(WorkItemStatus workItemStatus)
-        throws IOException {
-      DateTime endTime = DateTime.now();
-      workItemStatus.setFactory(Transport.getJsonFactory());
-      LOG.debug("Reporting work status: {}", workItemStatus);
-      // Log the stage execution time of finished stages that have a stage name.
-      if (workItemStatus.getCompleted() && DataflowWorkerLoggingMDC.getStageName() != null) {
-        DateTime startTime = stageStartTime.get();
-        if (startTime != null) {
-          // This thread should have been tagged with the stage start time during getWorkItem(),
-          Interval elapsed = new Interval(startTime, endTime);
-          int numErrors = workItemStatus.getErrors() == null
-              ? 0 : workItemStatus.getErrors().size();
-          LOG.info("Finished processing stage {} with {} errors in {} seconds ",
-              DataflowWorkerLoggingMDC.getStageName(), numErrors,
-              (double) elapsed.toDurationMillis() / 1000);
-        }
-      }
-      ReportWorkItemStatusResponse result =
-          dataflow.projects().jobs().workItems().reportStatus(
-              options.getProject(), options.getJobId(),
-              new ReportWorkItemStatusRequest()
-              .setWorkerId(options.getWorkerId())
-              .setWorkItemStatuses(Collections.singletonList(workItemStatus))
-              .setCurrentWorkerTime(toCloudTime(endTime)))
-          .execute();
-      if (result == null || result.getWorkItemServiceStates() == null
-          || result.getWorkItemServiceStates().size() != 1) {
-        throw new IOException(
-            "This version of the SDK expects exactly one work item service state from the service");
-      }
-      WorkItemServiceState state = result.getWorkItemServiceStates().get(0);
-      LOG.debug("ReportWorkItemStatus result: {}", state);
-      return state;
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyTokenInvalidException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyTokenInvalidException.java
new file mode 100644
index 0000000000000..096a50d3564df
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyTokenInvalidException.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+/**
+ * Indicates that the key token was invalid when data was attempted to be fetched.
+ */
+public class KeyTokenInvalidException extends RuntimeException {
+  public KeyTokenInvalidException(String key) {
+    super("Unable to fetch data due to token mismatch for key " + key);
+  }
+
+  /**
+   * Returns whether an exception was caused by a {@link KeyTokenInvalidException}.
+   */
+  public static boolean isKeyTokenInvalidException(Throwable t) {
+    while (t != null) {
+      if (t instanceof KeyTokenInvalidException) {
+        return true;
+      }
+      t = t.getCause();
+    }
+    return false;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderCacheEntry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderCacheEntry.java
new file mode 100644
index 0000000000000..0fe080081049d
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderCacheEntry.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.io.UnboundedSource;
+
+/**
+ * Entry in a per-key UnboundedSource#UnboundedReader cache.
+ */
+public class ReaderCacheEntry {
+  UnboundedSource.UnboundedReader<?> reader;
+  long token;
+
+  public ReaderCacheEntry(UnboundedSource.UnboundedReader<?> reader, long token) {
+    this.reader = reader;
+    this.token = token;
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
deleted file mode 100644
index 6f507e9f80196..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorker.java
+++ /dev/null
@@ -1,1050 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.api.services.dataflow.model.MapTask;
-import com.google.api.services.dataflow.model.ParallelInstruction;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.io.UnboundedSource;
-import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
-import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingInitializer;
-import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
-import com.google.cloud.dataflow.sdk.runners.worker.status.LastExceptionDataProvider;
-import com.google.cloud.dataflow.sdk.runners.worker.status.StatusDataProvider;
-import com.google.cloud.dataflow.sdk.runners.worker.status.WorkerStatusPages;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
-import com.google.cloud.dataflow.sdk.util.BoundedQueueExecutor;
-import com.google.cloud.dataflow.sdk.util.MemoryMonitor;
-import com.google.cloud.dataflow.sdk.util.Serializer;
-import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.cloud.dataflow.sdk.util.UserCodeException;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind;
-import com.google.cloud.dataflow.sdk.util.common.Counter.CounterMean;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
-import com.google.cloud.dataflow.sdk.util.common.worker.OutputObjectAndByteCounter;
-import com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.common.base.Function;
-import com.google.common.base.Preconditions;
-import com.google.protobuf.ByteString;
-
-import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Queue;
-import java.util.Random;
-import java.util.Set;
-import java.util.Timer;
-import java.util.TimerTask;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.ConcurrentMap;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.Semaphore;
-import java.util.concurrent.ThreadFactory;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import javax.annotation.Nullable;
-
-/**
- * Implements a Streaming Dataflow worker.
- */
-public class StreamingDataflowWorker {
-  private static final Logger LOG = LoggerFactory.getLogger(StreamingDataflowWorker.class);
-  // Maximum number of threads for processing.  Currently each thread processes one key at a time.
-  static final int MAX_PROCESSING_THREADS = 300;
-  static final long THREAD_EXPIRATION_TIME_SEC = 60;
-  // Maximum work units retrieved from Windmill and queued before processing. Limiting this delays
-  // retrieving extra work from Windmill without working on it, leading to better
-  // prioritization / utilization.
-  static final int MAX_WORK_UNITS_QUEUED = 100;
-  static final long MAX_COMMIT_BYTES = 32 << 20;
-  static final String DEFAULT_WINDMILL_SERVER_CLASS_NAME =
-      "com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServer";
-  static final int MAX_COMMIT_QUEUE_BYTES = 500 << 20;  // 500MB
-
-  private static final int DEFAULT_STATUS_PORT = 8081;
-
-  // Maximum size of the result of a GetWork request.
-  private static final long MAX_GET_WORK_FETCH_BYTES = 64L << 20; // 64m
-
-  /**
-   * Maximum number of items to return in a GetWork request.
-   */
-  private static final long MAX_GET_WORK_ITEMS = 100;
-
-  /**
-   * Indicates that the key token was invalid when data was attempted to be fetched.
-   */
-  public static class KeyTokenInvalidException extends RuntimeException {
-    public KeyTokenInvalidException(String key) {
-      super("Unable to fetch data due to token mismatch for key " + key);
-    }
-  }
-
-  /**
-   * Returns whether an exception was caused by a {@link KeyTokenInvalidException}.
-   */
-  public static boolean isKeyTokenInvalidException(Throwable t) {
-    while (t != null) {
-      if (t instanceof KeyTokenInvalidException) {
-        return true;
-      }
-      t = t.getCause();
-    }
-    return false;
-  }
-
-  /**
-   * Returns whether an exception was caused by a {@link OutOfMemoryError}.
-   */
-  private static boolean isOutOfMemoryError(Throwable t) {
-    while (t != null) {
-      if (t instanceof OutOfMemoryError) {
-        return true;
-      }
-      t = t.getCause();
-    }
-    return false;
-  }
-
-  static MapTask parseMapTask(String input) throws IOException {
-    return Transport.getJsonFactory()
-        .fromString(input, MapTask.class);
-  }
-
-  public static void main(String[] args) throws Exception {
-    Thread.setDefaultUncaughtExceptionHandler(
-        DataflowWorkerHarness.WorkerUncaughtExceptionHandler.INSTANCE);
-
-    new Thread(memoryMonitor).start();
-
-    DataflowWorkerLoggingInitializer.initialize();
-    DataflowWorkerHarnessOptions options =
-        PipelineOptionsFactory.createFromSystemPropertiesInternal();
-    // TODO: Remove setting these options once we have migrated to passing
-    // through the pipeline options.
-    options.setAppName("StreamingWorkerHarness");
-    options.setStreaming(true);
-
-    DataflowWorkerLoggingInitializer.configure(options);
-    String hostport = System.getProperty("windmill.hostport");
-    if (hostport == null) {
-      throw new Exception("-Dwindmill.hostport must be set to the location of the windmill server");
-    }
-    String windmillServerClassName = DEFAULT_WINDMILL_SERVER_CLASS_NAME;
-    if (System.getProperties().containsKey("windmill.serverclassname")) {
-      windmillServerClassName = System.getProperty("windmill.serverclassname");
-    }
-
-    ArrayList<MapTask> mapTasks = new ArrayList<>();
-    for (String arg : args) {
-      mapTasks.add(parseMapTask(arg));
-    }
-
-    WindmillServerStub windmillServer =
-        (WindmillServerStub) Class.forName(windmillServerClassName)
-        .getDeclaredConstructor(String.class).newInstance(hostport);
-
-    StreamingDataflowWorker worker =
-        new StreamingDataflowWorker(mapTasks, windmillServer, options);
-
-    worker.start();
-    worker.startStatusPages();
-  }
-
-  /**
-   * Entry in a per-key UnboundedSource#UnboundedReader cache.
-   */
-  public static class ReaderCacheEntry {
-    UnboundedSource.UnboundedReader<?> reader;
-    long token;
-
-    public ReaderCacheEntry(UnboundedSource.UnboundedReader<?> reader, long token) {
-      this.reader = reader;
-      this.token = token;
-    }
-  }
-
-  /**
-   * Bounded set of queues, with a maximum total weight.
-   */
-  private static class KeyedWeightBoundedQueue<K, V> {
-    private final ConcurrentMap<K, ConcurrentLinkedQueue<V>> queueMap = new ConcurrentHashMap<>();
-    private final int maxWeight;
-    private final Semaphore limit;
-    private final Function<V, Integer> weigher;
-
-    public KeyedWeightBoundedQueue(int maxWeight, Function<V, Integer> weigher) {
-      this.maxWeight = maxWeight;
-      this.limit = new Semaphore(maxWeight, true);
-      this.weigher = weigher;
-    }
-
-    /**
-     * Adds a new sub-queue for the given key.
-     */
-    public void addQueue(K key) {
-      queueMap.put(key, new ConcurrentLinkedQueue<V>());
-    }
-
-    /**
-     * Adds the value to the queue for the key, blocking if this would cause the overall weight to
-     * exceed the limit.
-     */
-    public void put(K key, V value) {
-      limit.acquireUninterruptibly(Math.max(maxWeight, weigher.apply(value)));
-      Preconditions.checkNotNull(queueMap.get(key),
-          "Must create a queue by calling addQueue() before put. Missing key %s", key).add(value);
-    }
-
-    /**
-     * Return the set of keys for which there are sub-queues.
-     */
-    public Set<K> keySet() {
-      return queueMap.keySet();
-    }
-
-    /**
-     * Returns and removes the next value from the given sub-queue, or null if there is no such
-     * value.
-     */
-    @Nullable
-    public V poll(K key) {
-      V result = queueMap.get(key).poll();
-      if (result != null) {
-        limit.release(Math.max(maxWeight, weigher.apply(result)));
-      }
-      return result;
-    }
-
-    /**
-     * Returns the size of the given sub-queue.
-     */
-    public int queueSize(K key) {
-      return queueMap.get(key).size();
-    }
-
-    /**
-     * Returns the current weight of all queues.
-     */
-    public int weight() {
-      return maxWeight - limit.availablePermits();
-    }
-  }
-
-  // Maps from computation ids to per-computation state.
-  private final ConcurrentMap<String, MapTask> instructionMap;
-  private final ConcurrentMap<String, ConcurrentLinkedQueue<WorkerAndContext>> mapTaskExecutors;
-  private final ConcurrentMap<String, ActiveWorkForComputation> activeWorkMap;
-  // Per computation cache of active readers, keyed by split ID.
-  private final ConcurrentMap<String, ConcurrentMap<ByteString, ReaderCacheEntry>> readerCache;
-  private final KeyedWeightBoundedQueue<String, Windmill.WorkItemCommitRequest> commitQueue =
-      new KeyedWeightBoundedQueue<>(
-          MAX_COMMIT_QUEUE_BYTES, new Function<Windmill.WorkItemCommitRequest, Integer>() {
-            @Override
-            public Integer apply(Windmill.WorkItemCommitRequest input) {
-              return input.getSerializedSize();
-            }
-          });
-
-  // Map of tokens to commit callbacks.
-  private ConcurrentMap<Long, Runnable> commitCallbacks;
-
-  // Map of user state names to system state names.
-  private ConcurrentMap<String, String> stateNameMap;
-  private ConcurrentMap<String, String> systemNameToComputationIdMap;
-
-  private WindmillStateCache stateCache = new WindmillStateCache();
-
-  private ThreadFactory threadFactory;
-  private BoundedQueueExecutor workUnitExecutor;
-  private ExecutorService commitExecutor;
-  private WindmillServerStub windmillServer;
-  private Thread dispatchThread;
-  private AtomicBoolean running;
-  private StateFetcher stateFetcher;
-  private DataflowWorkerHarnessOptions options;
-  private long clientId;
-  private final MetricTrackingWindmillServerStub metricTrackingWindmillServer;
-  private Timer globalCountersUpdatesTimer;
-
-  private static final MemoryMonitor memoryMonitor = new MemoryMonitor();
-
-  private final UserCodeTimeTracker userCodeTimeTracker = new UserCodeTimeTracker();
-  private final AtomicInteger nextStateSamplerId = new AtomicInteger();
-
-  private final WorkerStatusPages statusPages = WorkerStatusPages.create(DEFAULT_STATUS_PORT);
-
-  public StreamingDataflowWorker(
-      List<MapTask> mapTasks, WindmillServerStub server, DataflowWorkerHarnessOptions options) {
-    this.options = options;
-    this.instructionMap = new ConcurrentHashMap<>();
-    this.mapTaskExecutors = new ConcurrentHashMap<>();
-    this.activeWorkMap = new ConcurrentHashMap<>();
-    this.readerCache = new ConcurrentHashMap<>();
-    this.commitCallbacks = new ConcurrentHashMap<>();
-    this.stateNameMap = new ConcurrentHashMap<>();
-    this.systemNameToComputationIdMap = new ConcurrentHashMap<>();
-    this.threadFactory = new ThreadFactory() {
-        @Override
-        public Thread newThread(Runnable r) {
-          Thread t = new Thread(r);
-          t.setDaemon(true);
-          return t;
-        }
-      };
-    this.workUnitExecutor = new BoundedQueueExecutor(
-        chooseMaximumNumberOfThreads(options), THREAD_EXPIRATION_TIME_SEC, TimeUnit.SECONDS,
-        MAX_WORK_UNITS_QUEUED, threadFactory);
-    this.commitExecutor =
-        new ThreadPoolExecutor(
-            1,
-            1,
-            Long.MAX_VALUE,
-            TimeUnit.SECONDS,
-            new LinkedBlockingQueue<Runnable>(2),
-            new ThreadFactory() {
-              @Override
-              public Thread newThread(Runnable r) {
-                Thread t = new Thread(r);
-                t.setDaemon(true);
-                t.setPriority(Thread.MAX_PRIORITY);
-                t.setName("CommitThread");
-                return t;
-              }
-            },
-            new ThreadPoolExecutor.DiscardPolicy());
-    this.windmillServer = server;
-    this.metricTrackingWindmillServer = new MetricTrackingWindmillServerStub(server, memoryMonitor);
-    this.running = new AtomicBoolean();
-    this.stateFetcher = new StateFetcher(metricTrackingWindmillServer);
-    this.clientId = new Random().nextLong();
-
-    for (MapTask mapTask : mapTasks) {
-      addComputation(mapTask);
-    }
-
-    DataflowWorkerLoggingMDC.setJobId(options.getJobId());
-    DataflowWorkerLoggingMDC.setWorkerId(options.getWorkerId());
-  }
-
-  private static int chooseMaximumNumberOfThreads(DataflowWorkerHarnessOptions pipelineOptions) {
-    if (pipelineOptions.getNumberOfWorkerHarnessThreads() != 0) {
-      return pipelineOptions.getNumberOfWorkerHarnessThreads();
-    }
-    return MAX_PROCESSING_THREADS;
-  }
-
-  void addStateNameMappings(Map<String, String> nameMap) {
-    stateNameMap.putAll(nameMap);
-  }
-
-  public void start() {
-    running.set(true);
-    dispatchThread = threadFactory.newThread(new Runnable() {
-      @Override
-      public void run() {
-        dispatchLoop();
-      }
-    });
-    dispatchThread.setPriority(Thread.MIN_PRIORITY);
-    dispatchThread.setName("DispatchThread");
-    dispatchThread.start();
-    globalCountersUpdatesTimer = new Timer("GlobalCountersUpdates");
-    //  Report counters update every second.
-    globalCountersUpdatesTimer.schedule(new TimerTask() {
-      @Override
-      public void run() {
-        reportPeriodicStats();
-      }
-    }, 1000, 1000);
-    reportHarnessStartup();
-  }
-
-  public void startStatusPages() {
-    statusPages.addServlet(stateCache.statusServlet());
-
-    statusPages.addStatusDataProvider("harness", "Harness", new HarnessDataProvider());
-    statusPages.addStatusDataProvider("resources", "Resources", memoryMonitor);
-    statusPages.addStatusDataProvider("metrics", "Metrics", new MetricsDataProvider());
-    statusPages.addStatusDataProvider(
-        "exception", "Last Exception", new LastExceptionDataProvider());
-    statusPages.addStatusDataProvider("specs", "Specs", new SpecsDataProvider());
-    statusPages.addStatusDataProvider("cache", "State Cache", stateCache);
-
-    statusPages.start();
-  }
-
-  public void stop() {
-    try {
-      if (globalCountersUpdatesTimer != null) {
-        globalCountersUpdatesTimer.cancel();
-      }
-      statusPages.stop();
-      running.set(false);
-      dispatchThread.join();
-      workUnitExecutor.shutdown();
-      if (!workUnitExecutor.awaitTermination(5, TimeUnit.MINUTES)) {
-        throw new RuntimeException("Work executor did not terminate within 5 minutes");
-      }
-      for (ConcurrentLinkedQueue<WorkerAndContext> queue : mapTaskExecutors.values()) {
-        WorkerAndContext workerAndContext;
-        while ((workerAndContext = queue.poll()) != null) {
-          workerAndContext.getWorker().close();
-        }
-      }
-      commitExecutor.shutdown();
-      if (!commitExecutor.awaitTermination(5, TimeUnit.MINUTES)) {
-        throw new RuntimeException("Commit executor did not terminate within 5 minutes");
-      }
-    } catch (Exception e) {
-      LOG.warn("Exception while shutting down: ", e);
-    }
-  }
-
-  private void addComputation(MapTask mapTask) {
-    String computationId =
-        systemNameToComputationIdMap.containsKey(mapTask.getSystemName())
-            ? systemNameToComputationIdMap.get(mapTask.getSystemName())
-            : mapTask.getSystemName();
-    if (!instructionMap.containsKey(computationId)) {
-      LOG.info("Adding config for {}: {}", computationId, mapTask);
-      commitQueue.addQueue(computationId);
-      instructionMap.put(computationId, mapTask);
-      mapTaskExecutors.put(computationId, new ConcurrentLinkedQueue<WorkerAndContext>());
-      activeWorkMap.put(computationId, new ActiveWorkForComputation(workUnitExecutor));
-      readerCache.put(
-          computationId, new ConcurrentHashMap<ByteString, ReaderCacheEntry>());
-    }
-  }
-
-  private static void sleep(int millis) {
-    try {
-      Thread.sleep(millis);
-    } catch (InterruptedException e) {
-      // NOLINT
-    }
-  }
-
-  private void dispatchLoop() {
-    LOG.info("Dispatch starting");
-    while (running.get()) {
-      memoryMonitor.waitForResources("GetWork");
-
-      int backoff = 1;
-      Windmill.GetWorkResponse workResponse;
-      do {
-        workResponse = getWork();
-        if (workResponse.getWorkCount() > 0) {
-          break;
-        }
-        sleep(backoff);
-        backoff = Math.min(1000, backoff * 2);
-      } while (running.get());
-      for (final Windmill.ComputationWorkItems computationWork : workResponse.getWorkList()) {
-        final String computation = computationWork.getComputationId();
-        if (!instructionMap.containsKey(computation)) {
-          getConfig(computation);
-        }
-        final MapTask mapTask = instructionMap.get(computation);
-        if (mapTask == null) {
-          LOG.warn(
-              "Received work for unknown computation: {}. Known computations are {}",
-              computation, instructionMap.keySet());
-          continue;
-        }
-
-        // May be null if input watermark not yet known.
-        // TODO: Can assert this is non-null once Windmill waits for known input watermark.
-        @Nullable
-        final Instant inputDataWatermark =
-            WindmillTimeUtils.windmillToHarnessInputWatermark(
-                computationWork.getInputDataWatermark());
-        @Nullable
-        final Instant synchronizedProcessingTime =
-            WindmillTimeUtils.windmillToHarnessInputWatermark(
-                computationWork.getDependentRealtimeInputWatermark());
-        ActiveWorkForComputation activeWork = activeWorkMap.get(computation);
-        for (final Windmill.WorkItem workItem : computationWork.getWorkList()) {
-          // May be null if output watermark not yet known.
-          @Nullable
-          final Instant outputDataWatermark =
-              WindmillTimeUtils.windmillToHarnessOutputWatermark(
-                  workItem.getOutputDataWatermark());
-          Preconditions.checkState(inputDataWatermark == null || outputDataWatermark == null
-              || !outputDataWatermark.isAfter(inputDataWatermark));
-          Work work = new Work(workItem.getWorkToken()) {
-            @Override
-            public void run() {
-              process(computation, mapTask, inputDataWatermark, outputDataWatermark,
-                  synchronizedProcessingTime, workItem);
-            }
-          };
-          if (activeWork.activateWork(workItem.getKey(), work)) {
-            workUnitExecutor.execute(work);
-          }
-        }
-      }
-    }
-    LOG.info("Dispatch done");
-  }
-
-  abstract static class Work implements Runnable {
-    private final long workToken;
-    public Work(long workToken) {
-      this.workToken = workToken;
-    }
-    public long getWorkToken() {
-      return workToken;
-    }
-  }
-
-  private void process(final String computation, final MapTask mapTask,
-      @Nullable final Instant inputDataWatermark, @Nullable final Instant outputDataWatermark,
-      @Nullable final Instant synchronizedProcessingTime,
-      final Windmill.WorkItem work) {
-    LOG.debug("Starting processing for {}:\n{}", computation, work);
-
-    Windmill.WorkItemCommitRequest.Builder outputBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder()
-        .setKey(work.getKey())
-        .setWorkToken(work.getWorkToken());
-
-    StreamingModeExecutionContext context = null;
-    MapTaskExecutor worker = null;
-
-    try {
-      DataflowWorkerLoggingMDC.setWorkId(
-          work.getKey().toStringUtf8() + "-" + Long.toString(work.getWorkToken()));
-      DataflowWorkerLoggingMDC.setStageName(computation);
-      WorkerAndContext workerAndContext = mapTaskExecutors.get(computation).poll();
-      if (workerAndContext == null) {
-        CounterSet counters = new CounterSet();
-        context = new StreamingModeExecutionContext(mapTask.getSystemName(),
-            readerCache.get(computation), stateNameMap, stateCache.forComputation(computation));
-        StateSampler sampler =
-            new StateSampler(mapTask.getStageName() + "-", counters.getAddCounterMutator());
-        // In streaming mode, state samplers are long lived. So here a unique id is generated as
-        // the item_id for the userCodeTimeTracker.
-        int stateSamplerId = nextStateSamplerId.incrementAndGet();
-        sampler.addSamplingCallback(
-            new UserCodeTimeTracker.StateSamplerCallback(
-                userCodeTimeTracker, stateSamplerId));
-        // "work" will never finish here.
-        userCodeTimeTracker.workStarted(
-            sampler.getPrefix(), stateSamplerId, counters.getAddCounterMutator());
-        worker = MapTaskExecutorFactory.create(options, mapTask, context, counters, sampler);
-        ReadOperation readOperation = worker.getReadOperation();
-        // Disable progress updates since its results are unused for streaming
-        // and involves starting a thread.
-        readOperation.setProgressUpdatePeriodMs(ReadOperation.DONT_UPDATE_PERIODICALLY);
-        Preconditions.checkState(
-            worker.supportsRestart(), "Streaming runner requires all operations support restart.");
-
-        // If using a custom source, count bytes read for autoscaling.
-        ParallelInstruction read = mapTask.getInstructions().get(0);
-        if (CustomSources.class.getName().equals(
-                read.getRead().getSource().getSpec().get("@type"))) {
-          Coder<?> coder = Serializer.deserialize(read.getOutputs().get(0).getCodec(), Coder.class);
-          readOperation.receivers[0].addOutputCounter(
-              new OutputObjectAndByteCounter(
-                  new MapTaskExecutorFactory.ElementByteSizeObservableCoder<>(coder),
-                  worker.getOutputCounters().getAddCounterMutator())
-                  .setSamplingPeriod(100)
-                  .countBytes("dataflow_input_size-" + mapTask.getSystemName()));
-        }
-      } else {
-        worker = workerAndContext.getWorker();
-        context = workerAndContext.getContext();
-      }
-
-      WindmillStateReader stateReader = new WindmillStateReader(
-          metricTrackingWindmillServer, computation, work.getKey(), work.getWorkToken());
-      StateFetcher localStateFetcher = stateFetcher.byteTrackingView();
-      context.start(work, inputDataWatermark, outputDataWatermark, synchronizedProcessingTime,
-          stateReader, localStateFetcher, outputBuilder);
-
-      for (Long callbackId : context.getReadyCommitCallbackIds()) {
-        final Runnable callback = commitCallbacks.remove(callbackId);
-        if (callback != null) {
-          workUnitExecutor.forceExecute(new Runnable() {
-              @Override
-              public void run() {
-                try {
-                  callback.run();
-                } catch (Throwable t) {
-                  // TODO: Count interesting failures.
-                  LOG.error("Source checkpoint finalization failed:", t);
-                }
-              }
-            });
-        }
-      }
-
-      // Blocks while executing work.
-      worker.execute();
-
-      commitCallbacks.putAll(context.flushState());
-
-      // Compute shuffle and state byte statistics after the work is completely done, but before
-      // counters are added to the outputBuilder.
-      long shuffleBytesRead = 0;
-      for (Windmill.InputMessageBundle bundle : work.getMessageBundlesList()) {
-        for (Windmill.Message message : bundle.getMessagesList()) {
-          shuffleBytesRead += message.getSerializedSize();
-        }
-      }
-      long stateBytesRead = stateReader.getBytesRead() + localStateFetcher.getBytesRead();
-      long stateBytesWritten = Windmill.WorkItemCommitRequest.newBuilder(outputBuilder.build())
-                                   .clearOutputMessages()
-                                   .build()
-                                   .getSerializedSize();
-      CounterSet counters = worker.getOutputCounters();
-      counters
-          .getAddCounterMutator()
-          .addCounter(Counter.longs("WindmillShuffleBytesRead", Counter.AggregationKind.SUM))
-          .addValue(shuffleBytesRead);
-      counters
-          .getAddCounterMutator()
-          .addCounter(Counter.longs("WindmillStateBytesRead", Counter.AggregationKind.SUM))
-          .addValue(stateBytesRead);
-      counters
-          .getAddCounterMutator()
-          .addCounter(Counter.longs("WindmillStateBytesWritten", Counter.AggregationKind.SUM))
-          .addValue(stateBytesWritten);
-
-      buildCounters(counters, outputBuilder);
-
-      mapTaskExecutors.get(computation).offer(new WorkerAndContext(worker, context));
-      worker = null;
-      context = null;
-
-      commitQueue.put(computation, outputBuilder.build());
-      scheduleCommit();
-
-      LOG.debug("Processing done for work token: {}", work.getWorkToken());
-    } catch (Throwable t) {
-      if (worker != null) {
-        try {
-          worker.close();
-        } catch (Exception e) {
-          LOG.warn("Failed to close worker: ", e);
-        } finally {
-          // Release references to potentially large objects early.
-          worker = null;
-          context = null;
-        }
-      }
-
-      t = t instanceof UserCodeException ? t.getCause() : t;
-
-      if (isOutOfMemoryError(t)) {
-        reportFailure(computation, work, t);
-        LOG.error("Received OutOfMemoryError, crashing.  Error was ", t);
-        System.exit(1);
-      } else if (isKeyTokenInvalidException(t)) {
-        LOG.debug(
-            "Execution of work for {} for key {} failed due to token expiration, "
-            + "will not retry locally.",
-            computation, work.getKey().toStringUtf8());
-        activeWorkMap.get(computation).completeWork(work.getKey());
-      } else {
-        LOG.error(
-            "Execution of work for {} for key {} failed, retrying.",
-            computation,
-            work.getKey().toStringUtf8());
-        LOG.error("\nError: ", t);
-        LastExceptionDataProvider.reportException(t);
-        LOG.debug("Failed work: {}", work);
-        if (reportFailure(computation, work, t)) {
-          // Try again, after some delay and at the end of the queue to avoid a tight loop.
-          sleep(10000);
-          workUnitExecutor.forceExecute(new Runnable() {
-            @Override
-            public void run() {
-              process(computation, mapTask, inputDataWatermark, outputDataWatermark,
-                  synchronizedProcessingTime, work);
-            }
-          });
-        } else {
-          // If we failed to report the error, the item is invalid and should
-          // not be retried internally.  It will be retried at the higher level.
-          LOG.debug("Aborting processing due to exception reporting failure");
-          activeWorkMap.get(computation).completeWork(work.getKey());
-        }
-      }
-    } finally {
-      DataflowWorkerLoggingMDC.setWorkId(null);
-      DataflowWorkerLoggingMDC.setStageName(null);
-    }
-  }
-
-  private void scheduleCommit() {
-    commitExecutor.execute(new Commit());
-  }
-
-  private class Commit implements Runnable {
-    @Override
-    public void run() {
-      while (true) {
-        Windmill.CommitWorkRequest.Builder commitRequestBuilder =
-            Windmill.CommitWorkRequest.newBuilder();
-        long remainingCommitBytes = MAX_COMMIT_BYTES;
-        for (String computation : commitQueue.keySet()) {
-          Windmill.ComputationCommitWorkRequest.Builder computationRequestBuilder =
-              Windmill.ComputationCommitWorkRequest.newBuilder();
-          while (remainingCommitBytes > 0) {
-            Windmill.WorkItemCommitRequest request = commitQueue.poll(computation);
-            if (request == null) {
-              break;
-            }
-            remainingCommitBytes -= request.getSerializedSize();
-            computationRequestBuilder.addRequests(request);
-          }
-          if (computationRequestBuilder.getRequestsCount() > 0) {
-            computationRequestBuilder.setComputationId(computation);
-            commitRequestBuilder.addRequests(computationRequestBuilder);
-          }
-        }
-        if (commitRequestBuilder.getRequestsCount() > 0) {
-          Windmill.CommitWorkRequest commitRequest = commitRequestBuilder.build();
-          LOG.trace("Commit: {}", commitRequest);
-          commitWork(commitRequest);
-          for (Windmill.ComputationCommitWorkRequest computationRequest :
-              commitRequest.getRequestsList()) {
-            ActiveWorkForComputation activeWork =
-                activeWorkMap.get(computationRequest.getComputationId());
-            for (Windmill.WorkItemCommitRequest workRequest :
-                computationRequest.getRequestsList()) {
-              activeWork.completeWork(workRequest.getKey());
-            }
-          }
-        } else {
-          break;
-        }
-      }
-    }
-  }
-
-  private Windmill.GetWorkResponse getWork() {
-    return windmillServer.getWork(
-        Windmill.GetWorkRequest.newBuilder()
-            .setClientId(clientId)
-            .setMaxItems(MAX_GET_WORK_ITEMS)
-            .setMaxBytes(MAX_GET_WORK_FETCH_BYTES)
-            .build());
-  }
-
-  private void commitWork(Windmill.CommitWorkRequest request) {
-    windmillServer.commitWork(request);
-  }
-
-  private void getConfig(String computation) {
-    Windmill.GetConfigRequest request =
-        Windmill.GetConfigRequest.newBuilder().addComputations(computation).build();
-
-    Windmill.GetConfigResponse response = windmillServer.getConfig(request);
-    for (Windmill.GetConfigResponse.SystemNameToComputationIdMapEntry entry :
-        response.getSystemNameToComputationIdMapList()) {
-      systemNameToComputationIdMap.put(entry.getSystemName(), entry.getComputationId());
-    }
-    for (String serializedMapTask : response.getCloudWorksList()) {
-      try {
-        addComputation(parseMapTask(serializedMapTask));
-      } catch (IOException e) {
-        LOG.warn("Parsing MapTask failed: {}", serializedMapTask);
-        LOG.warn("Error: ", e);
-      }
-    }
-    for (Windmill.GetConfigResponse.NameMapEntry entry : response.getNameMapList()) {
-      stateNameMap.put(entry.getUserName(), entry.getSystemName());
-    }
-  }
-
-  private void buildCounters(CounterSet counterSet,
-                             Windmill.WorkItemCommitRequest.Builder builder) {
-    for (Counter<?> counter : counterSet) {
-      Windmill.Counter.Builder counterBuilder = Windmill.Counter.newBuilder();
-      Windmill.Counter.Kind kind;
-      Object aggregateObj = null;
-      switch (counter.getKind()) {
-        case SUM: kind = Windmill.Counter.Kind.SUM; break;
-        case MAX: kind = Windmill.Counter.Kind.MAX; break;
-        case MIN: kind = Windmill.Counter.Kind.MIN; break;
-        case MEAN:
-          kind = Windmill.Counter.Kind.MEAN;
-          CounterMean<?> mean = counter.getAndResetMeanDelta();
-          long count = mean.getCount();
-          aggregateObj = mean.getAggregate();
-          if (count <= 0) {
-            continue;
-          }
-          counterBuilder.setMeanCount(count);
-          break;
-        default:
-          LOG.debug("Unhandled counter type: {}", counter.getKind());
-          continue;
-      }
-      if (counter.getKind() != AggregationKind.MEAN) {
-        aggregateObj = counter.getAndResetDelta();
-      }
-      if (addKnownTypeToCounterBuilder(aggregateObj, counterBuilder)) {
-        counterBuilder.setName(counter.getName()).setKind(kind);
-        builder.addCounterUpdates(counterBuilder);
-      }
-    }
-  }
-
-  private boolean addKnownTypeToCounterBuilder(Object aggregateObj,
-      Windmill.Counter.Builder counterBuilder) {
-    if (aggregateObj instanceof Double) {
-      double aggregate = (Double) aggregateObj;
-      if (aggregate != 0) {
-        counterBuilder.setDoubleScalar(aggregate);
-      }
-    } else if (aggregateObj instanceof Long) {
-      long aggregate = (Long) aggregateObj;
-      if (aggregate != 0) {
-        counterBuilder.setIntScalar(aggregate);
-      }
-    } else if (aggregateObj instanceof Integer) {
-      long aggregate = ((Integer) aggregateObj).longValue();
-      if (aggregate != 0) {
-        counterBuilder.setIntScalar(aggregate);
-      }
-    } else {
-      LOG.debug("Unhandled aggregate class: {}", aggregateObj.getClass());
-      return false;
-    }
-    return true;
-  }
-
-  private Windmill.Exception buildExceptionReport(Throwable t) {
-    Windmill.Exception.Builder builder = Windmill.Exception.newBuilder();
-
-    builder.addStackFrames(t.toString());
-    for (StackTraceElement frame : t.getStackTrace()) {
-      builder.addStackFrames(frame.toString());
-    }
-    if (t.getCause() != null) {
-      builder.setCause(buildExceptionReport(t.getCause()));
-    }
-
-    return builder.build();
-  }
-
-  // Returns true if reporting the exception is successful and the work should be retried.
-  private boolean reportFailure(String computation, Windmill.WorkItem work, Throwable t) {
-    Windmill.ReportStatsResponse response =
-        windmillServer.reportStats(Windmill.ReportStatsRequest.newBuilder()
-            .setComputationId(computation)
-            .setKey(work.getKey())
-            .setWorkToken(work.getWorkToken())
-            .addExceptions(buildExceptionReport(t))
-            .build());
-    return !response.getFailed();
-  }
-
-  private void reportHarnessStartup() {
-    Windmill.Counter.Builder counterBuilder = Windmill.Counter.newBuilder();
-    counterBuilder =
-        counterBuilder.setName("dataflow_java_harness_restarts")
-            .setKind(Windmill.Counter.Kind.SUM)
-            .setIntScalar(1);
-    Windmill.ReportStatsResponse response = windmillServer.reportStats(
-        Windmill.ReportStatsRequest.newBuilder().addCounterUpdates(counterBuilder).build());
-    if (response.getFailed()) {
-      LOG.warn("Failed to notify windmill on harness startup. dataflow_java_harness_restarts will "
-          + " not be incremented.");
-    }
-  }
-
-  private void reportPeriodicStats() {
-    Runtime rt = Runtime.getRuntime();
-    long usedMemory = rt.totalMemory() - rt.freeMemory();
-    long maxMemory =  rt.maxMemory();
-    Windmill.Counter.Builder counterBuilder = Windmill.Counter.newBuilder();
-    counterBuilder =
-        counterBuilder.setName("dataflow_java_harness_memory_utilization")
-            .setKind(Windmill.Counter.Kind.MEAN)
-            .setCumulative(true)
-            .setIntScalar(usedMemory)
-            .setMeanCount(maxMemory);
-    Windmill.ReportStatsResponse response = windmillServer.reportStats(
-        Windmill.ReportStatsRequest.newBuilder()
-            .addCounterUpdates(counterBuilder)
-            .build());
-    if (response.getFailed()) {
-      LOG.warn("Failed to send periodic counters to windmill.");
-    }
-  }
-
-  /**
-   * Class representing the state of active work for a computation.
-   *
-   * <p>This class is synchronized, but only used from the dispatch and commit threads, so should
-   * not be heavily contended.  Still, blocking work should not be done by it.
-   */
-  static class ActiveWorkForComputation {
-    private Map<ByteString, Queue<Work>> activeWork = new HashMap<>();
-    private BoundedQueueExecutor executor;
-
-    ActiveWorkForComputation(BoundedQueueExecutor executor) {
-      this.executor = executor;
-    }
-
-    /**
-     * Mark the given key and work as active.  Returns whether the work is ready to be run
-     * immediately.
-     */
-    public synchronized boolean activateWork(ByteString key, Work work) {
-      Queue<Work> queue = activeWork.get(key);
-      if (queue == null) {
-        queue = new LinkedList<>();
-        activeWork.put(key, queue);
-        queue.add(work);
-        return true;
-      }
-      if (queue.peek().getWorkToken() != work.getWorkToken()) {
-        queue.add(work);
-      }
-      return false;
-    }
-
-    /**
-     * Marks the work for a the given key as complete.  Schedules queued work for the key if any.
-     */
-    public synchronized void completeWork(ByteString key) {
-      Queue<Work> queue = activeWork.get(key);
-      queue.poll();
-      if (queue.peek() != null) {
-        executor.forceExecute(queue.peek());
-      } else {
-        activeWork.remove(key);
-      }
-    }
-
-    public synchronized void printActiveWork(PrintWriter writer) {
-      writer.println("<ul>");
-      for (Map.Entry<ByteString, Queue<Work>> entry : activeWork.entrySet()) {
-        Queue<Work> queue = entry.getValue();
-        writer.print("<li>Key: ");
-        writer.print(entry.getKey().toStringUtf8());
-        writer.print(" Token: ");
-        writer.print(queue.peek().getWorkToken());
-        if (queue.size() > 1) {
-          writer.print("(");
-          writer.print(queue.size() - 1);
-          writer.print(" queued)");
-        }
-        writer.println("</li>");
-      }
-      writer.println("</ul>");
-    }
-  }
-
-  private static class WorkerAndContext {
-    public MapTaskExecutor worker;
-    public StreamingModeExecutionContext context;
-
-    public WorkerAndContext(MapTaskExecutor worker, StreamingModeExecutionContext context) {
-      this.worker = worker;
-      this.context = context;
-    }
-
-    public MapTaskExecutor getWorker() {
-      return worker;
-    }
-
-    public StreamingModeExecutionContext getContext() {
-      return context;
-    }
-  }
-
-  private class HarnessDataProvider implements StatusDataProvider {
-    @Override
-    public void appendSummaryHtml(PrintWriter writer) {
-      writer.println("Running: " + running.get() + "<br>");
-      writer.println("ID: " + clientId + "<br>");
-    }
-  }
-
-  private class SpecsDataProvider implements StatusDataProvider {
-    @Override
-    public void appendSummaryHtml(PrintWriter writer) {
-      for (Map.Entry<String, MapTask> entry : instructionMap.entrySet()) {
-        writer.println("<h3>" + entry.getKey() + "</h3>");
-        writer.print("<script>document.write(JSON.stringify(");
-        writer.print(entry.getValue().toString());
-        writer.println(", null, \"&nbsp&nbsp\").replace(/\\n/g, \"<br>\"))</script>");
-      }
-    }
-  }
-
-  private class MetricsDataProvider implements StatusDataProvider {
-    @Override
-    public void appendSummaryHtml(PrintWriter writer) {
-      writer.println("Worker Threads: " + workUnitExecutor.getPoolSize()
-      + "/" + workUnitExecutor.getMaximumPoolSize() + "<br>");
-      writer.println("Active Threads: " + workUnitExecutor.getActiveCount() + "<br>");
-      writer.println("Work Queue Size: " + workUnitExecutor.getQueue().size()
-          + "/" + MAX_WORK_UNITS_QUEUED + "<br>");
-      writer.print("Commit Queues: (");
-      writer.print(commitQueue.weight() >> 20);
-      writer.println("MB)<ul>");
-      for (String computation : commitQueue.keySet()) {
-        writer.print("<li>");
-        writer.print(computation);
-        writer.print(": ");
-        writer.print(commitQueue.queueSize(computation));
-        writer.println("</li>");
-      }
-      writer.println("</ul>");
-
-      writer.println("Active Keys: <ul>");
-      for (Map.Entry<String, ActiveWorkForComputation> computationEntry
-          : activeWorkMap.entrySet()) {
-        writer.print("<li>");
-        writer.print(computationEntry.getKey());
-        writer.print(":");
-        computationEntry.getValue().printActiveWork(writer);
-        writer.println("</li>");
-      }
-      writer.println("</ul>");
-
-      metricTrackingWindmillServer.printHtml(writer);
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
index b4bbdcc61b314..3b8dd591e21b1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
@@ -21,7 +21,6 @@
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
 import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
 import com.google.cloud.dataflow.sdk.runners.worker.StateFetcher.SideInputState;
-import com.google.cloud.dataflow.sdk.runners.worker.StreamingDataflowWorker.ReaderCacheEntry;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.BaseExecutionContext;
@@ -59,6 +58,7 @@
  */
 public class StreamingModeExecutionContext
     extends DataflowExecutionContext<StreamingModeExecutionContext.StepContext> {
+
   private final String stageName;
   private final Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
index af7a64e4784d9..53e910407494c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
@@ -428,8 +428,8 @@ private void consumeResponse(Windmill.GetDataRequest request,
 
     if (response.getFailed()) {
       // Set up all the futures for this key to throw an exception:
-      StreamingDataflowWorker.KeyTokenInvalidException keyTokenInvalidException =
-          new StreamingDataflowWorker.KeyTokenInvalidException(key.toStringUtf8());
+      KeyTokenInvalidException keyTokenInvalidException =
+          new KeyTokenInvalidException(key.toStringUtf8());
       for (StateTag stateTag : toFetch) {
         waiting.get(stateTag).future.setException(keyTokenInvalidException);
       }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
index 10a432bb3c6a4..1387224ddeb7f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
@@ -64,8 +64,8 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
 import com.google.cloud.dataflow.sdk.runners.worker.DataflowApiUtils;
+import com.google.cloud.dataflow.sdk.runners.worker.ReaderCacheEntry;
 import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory;
-import com.google.cloud.dataflow.sdk.runners.worker.StreamingDataflowWorker.ReaderCacheEntry;
 import com.google.cloud.dataflow.sdk.runners.worker.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
deleted file mode 100644
index e98c014c41847..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkProgressUpdaterTest.java
+++ /dev/null
@@ -1,479 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.approximateProgressAtIndex;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.approximateSplitRequestAtPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionAtIndex;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateSplitRequest;
-import static com.google.cloud.dataflow.sdk.testing.SystemNanoTimeSleeper.sleepMillis;
-import static com.google.cloud.dataflow.sdk.util.CloudCounterUtils.extractCounter;
-import static com.google.cloud.dataflow.sdk.util.CloudMetricUtils.extractCloudMetric;
-import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudDuration;
-import static com.google.cloud.dataflow.sdk.util.TimeUtil.toCloudTime;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MAX;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MIN;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.argThat;
-import static org.mockito.Mockito.timeout;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.verifyNoMoreInteractions;
-import static org.mockito.Mockito.verifyZeroInteractions;
-import static org.mockito.Mockito.when;
-
-import com.google.api.services.dataflow.model.ApproximateReportedProgress;
-import com.google.api.services.dataflow.model.ApproximateSplitRequest;
-import com.google.api.services.dataflow.model.MetricUpdate;
-import com.google.api.services.dataflow.model.Position;
-import com.google.api.services.dataflow.model.WorkItem;
-import com.google.api.services.dataflow.model.WorkItemServiceState;
-import com.google.api.services.dataflow.model.WorkItemStatus;
-import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.CounterTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.Metric;
-import com.google.cloud.dataflow.sdk.util.common.Metric.DoubleMetric;
-import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.Operation;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-
-import org.hamcrest.Description;
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.ArgumentMatcher;
-import org.mockito.Mock;
-import org.mockito.MockitoAnnotations;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-/** Unit tests for {@link DataflowWorkProgressUpdater}. */
-@RunWith(JUnit4.class)
-public class DataflowWorkProgressUpdaterTest {
-  static class TestMapTaskExecutor extends MapTaskExecutor {
-    ApproximateReportedProgress progress = null;
-    List<Metric<?>> metrics = new ArrayList<>();
-    CounterSet counters;
-
-    public TestMapTaskExecutor(CounterSet counters) {
-      super(new ArrayList<Operation>(), counters,
-          new StateSampler("test", counters.getAddCounterMutator()));
-      this.counters = counters;
-    }
-
-    @Override
-    public synchronized NativeReader.Progress getWorkerProgress() {
-      return cloudProgressToReaderProgress(progress);
-    }
-
-    @Override
-    public NativeReader.DynamicSplitResult requestDynamicSplit(
-        NativeReader.DynamicSplitRequest splitRequest) {
-      @Nullable
-      ApproximateSplitRequest split = splitRequestToApproximateSplitRequest(splitRequest);
-      if (split == null) {
-        return null;
-      }
-      return new NativeReader.DynamicSplitResultWithPosition(
-          cloudPositionToReaderPosition(split.getPosition()));
-    }
-
-    public synchronized void setWorkerProgress(ApproximateReportedProgress progress) {
-      this.progress = progress;
-    }
-
-    @Override
-    public synchronized Collection<Metric<?>> getOutputMetrics() {
-      return metrics;
-    }
-
-    public synchronized void setUpMetrics(int n) {
-      metrics = new ArrayList<>();
-      for (int i = 0; i < n; i++) {
-        metrics.add(makeMetric(i));
-      }
-    }
-
-    public synchronized void setUpCounters(int n) {
-      counters.clear();
-      for (int i = 0; i < n; i++) {
-        counters.add(makeCounter(i));
-      }
-    }
-  }
-
-  private static final String PROJECT_ID = "TEST_PROJECT_ID";
-  private static final String JOB_ID = "TEST_JOB_ID";
-  private static final String WORKER_ID = "TEST_WORKER_ID";
-  private static final Long WORK_ID = 1234567890L;
-  private static final String COUNTER_NAME = "test-counter-";
-  private static final AggregationKind[] COUNTER_KINDS = {SUM, MAX, MIN};
-  private static final Long COUNTER_VALUE1 = 12345L;
-  private static final Double COUNTER_VALUE2 = Math.PI;
-  private static final Long COUNTER_VALUE3 = -389L;
-
-  @Rule
-  public final ExpectedException thrown = ExpectedException.none();
-
-  @Mock
-  private DataflowWorker.WorkUnitClient workUnitClient;
-  private TestMapTaskExecutor worker;
-  private WorkItem workItem;
-  private DataflowWorkerHarnessOptions options;
-  private DataflowWorkProgressUpdater progressUpdater;
-
-  @Before
-  public void initMocksAndWorkflowServiceAndWorkerAndWork() {
-    MockitoAnnotations.initMocks(this);
-
-    options = PipelineOptionsFactory.as(DataflowWorkerHarnessOptions.class);
-    options.setProject(PROJECT_ID);
-    options.setJobId(JOB_ID);
-    options.setWorkerId(WORKER_ID);
-
-    worker = new TestMapTaskExecutor(new CounterSet());
-
-    workItem = new WorkItem();
-    workItem.setProjectId(PROJECT_ID);
-    workItem.setJobId(JOB_ID);
-    workItem.setId(WORK_ID);
-    workItem.setLeaseExpireTime(toCloudTime(new Instant(System.currentTimeMillis() + 1000)));
-    workItem.setReportStatusInterval(toCloudDuration(Duration.millis(300)));
-    workItem.setInitialReportIndex(1L);
-
-    progressUpdater = new DataflowWorkProgressUpdater(workItem, worker, workUnitClient, options) {
-      // Shorten reporting interval boundaries for faster testing.
-      @Override
-      protected long getMinReportingInterval() {
-        return 100;
-      }
-
-      @Override
-      protected long getLeaseRenewalLatencyMargin() {
-        return 100;
-      }
-    };
-  }
-
-  // TODO: Remove sleeps from this test by using a mock sleeper.  This
-  // requires a redesign of the WorkProgressUpdater to use a Sleeper and
-  // not use a ScheduledThreadExecutor that relies on real time passing.
-  @Test(timeout = 1000)
-  public void workProgressUpdaterUpdates() throws Exception {
-    when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
-        .thenReturn(generateServiceState(System.currentTimeMillis() + 2000, 1000, null, 2L));
-    worker.setUpCounters(2);
-    worker.setUpMetrics(3);
-    worker.setWorkerProgress(approximateProgressAtIndex(1L));
-    progressUpdater.startReportingProgress();
-    // The initial update should be sent after 300.
-    verify(workUnitClient, timeout(400))
-        .reportWorkItemStatus(argThat(
-            new ExpectedDataflowWorkItemStatus().withCounters(2).withMetrics(3).withProgress(
-                approximateProgressAtIndex(1L))));
-    progressUpdater.stopReportingProgress();
-  }
-
-  // Verifies that ReportWorkItemStatusRequest contains correct progress report
-  // and actual dynamic split result.
-  @Test(timeout = 10000)
-  public void workProgressUpdaterAdaptsProgressInterval() throws Exception {
-    worker.setUpCounters(3);
-    worker.setUpMetrics(2);
-    worker.setWorkerProgress(approximateProgressAtIndex(1L));
-
-    // In tests below, we allow 500ms leeway.
-
-    when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
-        // leaseExpirationTimestamp, progressReportInterval, suggestedStopPosition, nextReportIndex
-        .thenReturn(generateServiceState(
-            System.currentTimeMillis() + 2000, 1000, positionAtIndex(3L), 2L));
-
-    // Start progress updates.
-    progressUpdater.startReportingProgress();
-
-    // The initial update should be sent at nowMillis+300 (+500ms leeway).
-    verify(workUnitClient, timeout(800)).reportWorkItemStatus(argThat(
-        new ExpectedDataflowWorkItemStatus().withCounters(3).withMetrics(2).withProgress(
-            approximateProgressAtIndex(1L)).withReportIndex(1L)));
-
-    worker.setUpCounters(5);
-    worker.setUpMetrics(6);
-    worker.setWorkerProgress(approximateProgressAtIndex(2L));
-    when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
-        .thenReturn(generateServiceState(System.currentTimeMillis() + 3000, 2000, null, 3L));
-    // The second update should be sent after ~1000ms (previous requested report interval).
-    verify(workUnitClient, timeout(1500)).reportWorkItemStatus(argThat(
-        new ExpectedDataflowWorkItemStatus()
-            .withCounters(5)
-            .withMetrics(6)
-            .withProgress(approximateProgressAtIndex(2L))
-            .withDynamicSplitAtPosition(positionAtIndex(3L))
-            .withReportIndex(2L)));
-
-    // After the request is sent, reset cached dynamic split result to null.
-    assertNull(progressUpdater.getDynamicSplitResultToReport());
-
-    worker.setWorkerProgress(approximateProgressAtIndex(3L));
-
-    when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
-        .thenReturn(generateServiceState(System.currentTimeMillis() + 1000, 3000, null, 4L));
-    // The third update should be sent after ~2000ms (previous requested report interval).
-    verify(workUnitClient, timeout(2500)).reportWorkItemStatus(argThat(
-        new ExpectedDataflowWorkItemStatus().withProgress(approximateProgressAtIndex(3L))
-            .withReportIndex(3L)));
-
-    worker.setWorkerProgress(approximateProgressAtIndex(4L));
-
-    when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
-        .thenReturn(generateServiceState(System.currentTimeMillis() + 4000, 3000, null, 5L));
-    // The fourth update should not respect the suggested report interval (3000ms)
-    // because the lease expires in 1000ms. The update should be sent before the lease expires.
-    verify(workUnitClient, timeout(900)).reportWorkItemStatus(argThat(
-        new ExpectedDataflowWorkItemStatus().withProgress(approximateProgressAtIndex(4L))
-            .withReportIndex(4L)));
-
-    progressUpdater.stopReportingProgress();
-
-    assertEquals(5L, progressUpdater.getNextReportIndex());
-  }
-
-  // Verifies that a last update is sent when there is an unacknowledged split request.
-  @Test(timeout = 2000)
-  public void workProgressUpdaterSendsLastPendingUpdateWhenStopped() throws Exception {
-    // The setup process sends one update after 300ms. Enqueue another that should be scheduled
-    // 1000ms after that.
-    when(workUnitClient.reportWorkItemStatus(any(WorkItemStatus.class)))
-        .thenReturn(generateServiceState(
-            System.currentTimeMillis() + 2000, 1000, positionAtIndex(2L), 2L));
-
-    worker.setWorkerProgress(approximateProgressAtIndex(1L));
-    progressUpdater.startReportingProgress();
-
-    // The initial update should be sent after 300 msec.
-    sleepMillis(50);
-    verifyZeroInteractions(workUnitClient);
-
-    verify(workUnitClient, timeout(350))
-        .reportWorkItemStatus(argThat(
-            new ExpectedDataflowWorkItemStatus().withProgress(approximateProgressAtIndex(1L))));
-
-    // The second update should be scheduled to happen after one second.
-
-    // not immediately
-    verifyNoMoreInteractions(workUnitClient);
-
-    // still not yet after 50ms
-    sleepMillis(50);
-    verifyNoMoreInteractions(workUnitClient);
-
-    // Stop the progressUpdater now, and expect the last update immediately
-    progressUpdater.stopReportingProgress();
-
-    // Verify that the last update is sent immediately and contained the latest split result.
-    verify(workUnitClient)
-        .reportWorkItemStatus(argThat(
-            new ExpectedDataflowWorkItemStatus().withDynamicSplitAtPosition(positionAtIndex(2L))));
-
-    // And nothing happened after that.
-    verifyNoMoreInteractions(workUnitClient);
-  }
-
-  private static Counter<?> makeCounter(int i) {
-    if (i % 3 == 0) {
-      return Counter.longs(COUNTER_NAME + i, COUNTER_KINDS[0])
-          .addValue(COUNTER_VALUE1 + i)
-          .addValue(COUNTER_VALUE1 + i * 2);
-    } else if (i % 3 == 1) {
-      return Counter.doubles(COUNTER_NAME + i, COUNTER_KINDS[1])
-          .addValue(COUNTER_VALUE2 + i)
-          .addValue(COUNTER_VALUE2 + i * 3);
-    } else {
-      return Counter.longs(COUNTER_NAME + i, COUNTER_KINDS[2])
-          .addValue(COUNTER_VALUE3 + i)
-          .addValue(COUNTER_VALUE3 + i * 5);
-    }
-  }
-
-  private static Metric<?> makeMetric(int i) {
-    return new DoubleMetric(String.valueOf(i), i);
-  }
-
-  private WorkItemServiceState generateServiceState(long leaseExpirationTimestamp,
-      int progressReportIntervalMs, Position suggestedStopPosition,
-      long nextReportIndex) {
-    WorkItemServiceState responseState = new WorkItemServiceState();
-    responseState.setFactory(Transport.getJsonFactory());
-    responseState.setLeaseExpireTime(toCloudTime(new Instant(leaseExpirationTimestamp)));
-    responseState.setReportStatusInterval(
-        toCloudDuration(Duration.millis(progressReportIntervalMs)));
-    responseState.setNextReportIndex(nextReportIndex);
-
-    if (suggestedStopPosition != null) {
-      responseState.setSplitRequest(approximateSplitRequestAtPosition(suggestedStopPosition));
-    }
-
-    return responseState;
-  }
-
-  private static final class ExpectedDataflowWorkItemStatus
-      extends ArgumentMatcher<WorkItemStatus> {
-    @Nullable
-    Integer counterCount;
-
-    @Nullable
-    Integer metricCount;
-
-    @Nullable
-    ApproximateReportedProgress expectedProgress;
-
-    @Nullable
-    Position expectedSplitPosition;
-
-    @Nullable
-    Long expectedReportIndex;
-
-    public ExpectedDataflowWorkItemStatus withCounters(Integer counterCount) {
-      this.counterCount = counterCount;
-      return this;
-    }
-
-    public ExpectedDataflowWorkItemStatus withMetrics(Integer metricCount) {
-      this.metricCount = metricCount;
-      return this;
-    }
-
-    public ExpectedDataflowWorkItemStatus withProgress(
-        ApproximateReportedProgress expectedProgress) {
-      this.expectedProgress = expectedProgress;
-      return this;
-    }
-
-    public ExpectedDataflowWorkItemStatus withDynamicSplitAtPosition(
-        Position expectedSplitPosition) {
-      this.expectedSplitPosition = expectedSplitPosition;
-      return this;
-    }
-
-    public ExpectedDataflowWorkItemStatus withReportIndex(Long reportIndex) {
-      this.expectedReportIndex = reportIndex;
-      return this;
-    }
-
-    @Override
-    public void describeTo(Description description) {
-      List<String> values = new ArrayList<>();
-      if (this.counterCount != null) {
-        for (int i = 0; i < counterCount; i++) {
-          values.add(extractCounter(makeCounter(i), false).toString());
-        }
-      }
-      if (this.metricCount != null) {
-        for (int i = 0; i < metricCount; i++) {
-          values.add(extractCloudMetric(makeMetric(i), WORKER_ID).toString());
-        }
-      }
-      if (this.expectedProgress != null) {
-        values.add("progress " + this.expectedProgress);
-      }
-      if (this.expectedSplitPosition != null) {
-        values.add("split position " + this.expectedSplitPosition);
-      } else {
-        values.add("no split position present");
-      }
-      if (this.expectedReportIndex != null) {
-        values.add("reportIndex " + this.expectedReportIndex);
-      }
-      description.appendValueList("Dataflow WorkItemStatus with ", ", ", ".", values);
-    }
-
-    @Override
-    public boolean matches(Object status) {
-      WorkItemStatus st = (WorkItemStatus) status;
-      return matchCountersAndMetrics(st) && matchProgress(st) && matchStopPosition(st)
-          && matchReportIndex(st);
-    }
-
-    private boolean matchCountersAndMetrics(WorkItemStatus status) {
-      if (counterCount == null && metricCount == null) {
-        return true;
-      }
-
-      List<MetricUpdate> sentUpdates = status.getMetricUpdates();
-
-      if (counterCount + metricCount != sentUpdates.size()) {
-        return false;
-      }
-
-      for (int i = 0; i < counterCount; i++) {
-        if (!sentUpdates.contains(CounterTestUtils.extractCounterUpdate(makeCounter(i), false))) {
-          return false;
-        }
-      }
-
-      for (int i = 0; i < metricCount; i++) {
-        if (!sentUpdates.contains(extractCloudMetric(makeMetric(i), WORKER_ID))) {
-          return false;
-        }
-      }
-
-      return true;
-    }
-
-    private boolean matchProgress(WorkItemStatus status) {
-      if (expectedProgress == null) {
-        return true;
-      }
-      ApproximateReportedProgress progress = status.getReportedProgress();
-      return expectedProgress.equals(progress);
-    }
-
-    private boolean matchStopPosition(WorkItemStatus status) {
-      Position actualStopPosition = status.getStopPosition();
-      if (expectedSplitPosition == null) {
-        return actualStopPosition == null;
-      }
-      return expectedSplitPosition.equals(actualStopPosition);
-    }
-
-    private boolean matchReportIndex(WorkItemStatus status) {
-      if (expectedReportIndex == null) {
-        return true;
-      }
-      return expectedReportIndex.equals(status.getReportIndex());
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
deleted file mode 100644
index 66710fd22944b..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerHarnessTest.java
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.mockito.Matchers.anyString;
-import static org.mockito.Mockito.doCallRealMethod;
-import static org.mockito.Mockito.times;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.verifyNoMoreInteractions;
-import static org.mockito.Mockito.when;
-
-import com.google.api.client.http.LowLevelHttpResponse;
-import com.google.api.client.json.Json;
-import com.google.api.client.testing.http.MockHttpTransport;
-import com.google.api.client.testing.http.MockLowLevelHttpRequest;
-import com.google.api.client.testing.http.MockLowLevelHttpResponse;
-import com.google.api.client.util.Sleeper;
-import com.google.api.services.dataflow.Dataflow;
-import com.google.api.services.dataflow.model.LeaseWorkItemRequest;
-import com.google.api.services.dataflow.model.LeaseWorkItemResponse;
-import com.google.api.services.dataflow.model.MapTask;
-import com.google.api.services.dataflow.model.SeqMapTask;
-import com.google.api.services.dataflow.model.WorkItem;
-import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
-import com.google.cloud.dataflow.sdk.testing.FastNanoClockAndSleeper;
-import com.google.cloud.dataflow.sdk.testing.RestoreDataflowLoggingMDC;
-import com.google.cloud.dataflow.sdk.testing.RestoreSystemProperties;
-import com.google.cloud.dataflow.sdk.util.IntervalBoundedExponentialBackOff;
-import com.google.cloud.dataflow.sdk.util.TestCredential;
-import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.rules.TestRule;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.Mock;
-import org.mockito.MockitoAnnotations;
-
-import java.io.IOException;
-import java.util.concurrent.atomic.AtomicInteger;
-
-/** Unit tests for {@link DataflowWorkerHarness}. */
-@RunWith(JUnit4.class)
-public class DataflowWorkerHarnessTest {
-
-  @Rule public TestRule restoreSystemProperties = new RestoreSystemProperties();
-  @Rule public TestRule restoreLogging = new RestoreDataflowLoggingMDC();
-  @Rule public ExpectedException expectedException = ExpectedException.none();
-  @Rule public FastNanoClockAndSleeper fastNanoClockAndSleeper = new FastNanoClockAndSleeper();
-  @Mock private MockHttpTransport transport;
-  @Mock private MockLowLevelHttpRequest request;
-  @Mock private DataflowWorker mockDataflowWorker;
-  private DataflowWorkerHarnessOptions pipelineOptions;
-
-  private Dataflow service;
-
-  private static final String PROJECT_ID = "TEST_PROJECT_ID";
-  private static final String JOB_ID = "TEST_JOB_ID";
-  private static final String WORKER_ID = "TEST_WORKER_ID";
-
-  @Before
-  public void setUp() throws Exception {
-    MockitoAnnotations.initMocks(this);
-    when(transport.buildRequest(anyString(), anyString())).thenReturn(request);
-    doCallRealMethod().when(request).getContentAsString();
-
-    service = new Dataflow(transport, Transport.getJsonFactory(), null);
-    pipelineOptions = PipelineOptionsFactory.as(DataflowWorkerHarnessOptions.class);
-    pipelineOptions.setProject(PROJECT_ID);
-    pipelineOptions.setJobId(JOB_ID);
-    pipelineOptions.setWorkerId(WORKER_ID);
-    pipelineOptions.setGcpCredential(new TestCredential());
-  }
-
-  public void runTestThatWeRetryIfTaskExecutionFailsAgainAndAgain() throws Exception {
-    final int numWorkers = Math.max(Runtime.getRuntime().availableProcessors(), 1);
-    final AtomicInteger sleepCount = new AtomicInteger(0);
-    final AtomicInteger illegalIntervalCount = new AtomicInteger(0);
-    DataflowWorkerHarness.processWork(
-        pipelineOptions,
-        mockDataflowWorker,
-        new Sleeper() {
-          @Override
-          public void sleep(long millis) throws InterruptedException {
-            if ((millis
-                    > DataflowWorkerHarness.BACKOFF_MAX_INTERVAL_MILLIS
-                        * (1 + IntervalBoundedExponentialBackOff.DEFAULT_RANDOMIZATION_FACTOR))) {
-              // We count the times the sleep interval is greater than the backoff max interval with
-              // randomization to make sure it does not happen.
-              illegalIntervalCount.incrementAndGet();
-            }
-            if (sleepCount.incrementAndGet() > 1000) {
-              throw new InterruptedException("Stopping the retry loop.");
-            }
-          }
-        });
-    // Test that the backoff mechanism will allow at least 1000 failures.
-    verify(mockDataflowWorker, times(numWorkers + 1000)).getAndPerformWork();
-    verifyNoMoreInteractions(mockDataflowWorker);
-    assertEquals(0, illegalIntervalCount.get());
-  }
-
-  @Test
-  public void testThatWeRetryIfTaskExecutionFailAgainAndAgain() throws Exception {
-    when(mockDataflowWorker.getAndPerformWork()).thenReturn(false);
-    runTestThatWeRetryIfTaskExecutionFailsAgainAndAgain();
-  }
-
-  @Test
-  public void testThatWeRetryIfTaskExecutionFailAgainAndAgainByIOException() throws Exception {
-    when(mockDataflowWorker.getAndPerformWork()).thenThrow(new IOException());
-    runTestThatWeRetryIfTaskExecutionFailsAgainAndAgain();
-  }
-
-  @Test
-  public void testThatWeRetryIfTaskExecutionFailAgainAndAgainByUnknownException() throws Exception {
-    when(mockDataflowWorker.getAndPerformWork()).thenThrow(new RuntimeException());
-    runTestThatWeRetryIfTaskExecutionFailsAgainAndAgain();
-  }
-
-  @Test
-  public void testNumberOfWorkerHarnessThreadsIsHonored() throws Exception {
-    final int expectedNumberOfThreads = 5;
-    pipelineOptions.setNumberOfWorkerHarnessThreads(expectedNumberOfThreads);
-
-    when(mockDataflowWorker.getAndPerformWork()).thenReturn(false);
-    DataflowWorkerHarness.processWork(
-        pipelineOptions,
-        mockDataflowWorker,
-        new Sleeper() {
-          @Override
-          public void sleep(long millis) throws InterruptedException {
-            throw new InterruptedException("Stopping the retry loop.");
-          }
-        });
-    // Verify that the number of requested worker harness threads is honored.
-    verify(mockDataflowWorker, times(expectedNumberOfThreads)).getAndPerformWork();
-    verifyNoMoreInteractions(mockDataflowWorker);
-  }
-
-  @Test
-  public void testCreationOfWorkerHarness() throws Exception {
-    assertNotNull(DataflowWorkerHarness.create(pipelineOptions));
-    assertEquals(JOB_ID, DataflowWorkerLoggingMDC.getJobId());
-    assertEquals(WORKER_ID, DataflowWorkerLoggingMDC.getWorkerId());
-  }
-
-  @Test
-  public void testCloudServiceCall() throws Exception {
-    WorkItem workItem = createWorkItem(PROJECT_ID, JOB_ID);
-
-    when(request.execute()).thenReturn(generateMockResponse(workItem));
-
-    DataflowWorker.WorkUnitClient client =
-        new DataflowWorkerHarness.DataflowWorkUnitClient(service, pipelineOptions);
-
-    assertEquals(workItem, client.getWorkItem());
-
-    LeaseWorkItemRequest actualRequest = Transport.getJsonFactory().fromString(
-        request.getContentAsString(), LeaseWorkItemRequest.class);
-    assertEquals(WORKER_ID, actualRequest.getWorkerId());
-    assertEquals(ImmutableList.<String>of(WORKER_ID, "remote_source", "custom_source"),
-        actualRequest.getWorkerCapabilities());
-    assertEquals(ImmutableList.<String>of("map_task", "seq_map_task", "remote_source_task"),
-        actualRequest.getWorkItemTypes());
-    assertEquals("1234", DataflowWorkerLoggingMDC.getWorkId());
-  }
-
-  @Test
-  public void testCloudServiceCallMapTaskStagePropagation() throws Exception {
-    DataflowWorker.WorkUnitClient client =
-        new DataflowWorkerHarness.DataflowWorkUnitClient(service, pipelineOptions);
-
-    // Publish and acquire a map task work item, and verify we're now processing that stage.
-    final String stageName = "test_stage_name";
-    MapTask mapTask = new MapTask();
-    mapTask.setStageName(stageName);
-    WorkItem workItem = createWorkItem(PROJECT_ID, JOB_ID);
-    workItem.setMapTask(mapTask);
-    when(request.execute()).thenReturn(generateMockResponse(workItem));
-    assertEquals(workItem, client.getWorkItem());
-    assertEquals(stageName, DataflowWorkerLoggingMDC.getStageName());
-  }
-
-  @Test
-  public void testCloudServiceCallSeqMapTaskStagePropagation() throws Exception {
-    DataflowWorker.WorkUnitClient client =
-        new DataflowWorkerHarness.DataflowWorkUnitClient(service, pipelineOptions);
-
-    // Publish and acquire a seq map task work item, and verify we're now processing that stage.
-    final String stageName = "test_stage_name";
-    SeqMapTask seqMapTask = new SeqMapTask();
-    seqMapTask.setStageName(stageName);
-    WorkItem workItem = createWorkItem(PROJECT_ID, JOB_ID);
-    workItem.setSeqMapTask(seqMapTask);
-    when(request.execute()).thenReturn(generateMockResponse(workItem));
-    assertEquals(workItem, client.getWorkItem());
-    assertEquals(stageName, DataflowWorkerLoggingMDC.getStageName());
-  }
-
-  @Test
-  public void testCloudServiceCallNoWorkId() throws Exception {
-    // If there's no work the service should return an empty work item.
-    WorkItem workItem = new WorkItem();
-
-    when(request.execute()).thenReturn(generateMockResponse(workItem));
-
-    DataflowWorker.WorkUnitClient client =
-        new DataflowWorkerHarness.DataflowWorkUnitClient(service, pipelineOptions);
-
-    assertNull(client.getWorkItem());
-
-    LeaseWorkItemRequest actualRequest = Transport.getJsonFactory().fromString(
-        request.getContentAsString(), LeaseWorkItemRequest.class);
-    assertEquals(WORKER_ID, actualRequest.getWorkerId());
-    assertEquals(ImmutableList.<String>of(WORKER_ID, "remote_source", "custom_source"),
-        actualRequest.getWorkerCapabilities());
-    assertEquals(ImmutableList.<String>of("map_task", "seq_map_task",  "remote_source_task"),
-        actualRequest.getWorkItemTypes());
-  }
-
-  @Test
-  public void testCloudServiceCallNoWorkItem() throws Exception {
-    when(request.execute()).thenReturn(generateMockResponse());
-
-    DataflowWorker.WorkUnitClient client =
-        new DataflowWorkerHarness.DataflowWorkUnitClient(service, pipelineOptions);
-
-    assertNull(client.getWorkItem());
-
-    LeaseWorkItemRequest actualRequest = Transport.getJsonFactory().fromString(
-        request.getContentAsString(), LeaseWorkItemRequest.class);
-    assertEquals(WORKER_ID, actualRequest.getWorkerId());
-    assertEquals(ImmutableList.<String>of(WORKER_ID, "remote_source", "custom_source"),
-        actualRequest.getWorkerCapabilities());
-    assertEquals(ImmutableList.<String>of("map_task", "seq_map_task",  "remote_source_task"),
-        actualRequest.getWorkItemTypes());
-  }
-
-  @Test
-  public void testCloudServiceCallMultipleWorkItems() throws Exception {
-    expectedException.expect(IOException.class);
-    expectedException.expectMessage(
-        "This version of the SDK expects no more than one work item from the service");
-
-    WorkItem workItem1 = createWorkItem(PROJECT_ID, JOB_ID);
-    WorkItem workItem2 = createWorkItem(PROJECT_ID, JOB_ID);
-
-    when(request.execute()).thenReturn(generateMockResponse(workItem1, workItem2));
-
-    DataflowWorker.WorkUnitClient client =
-        new DataflowWorkerHarness.DataflowWorkUnitClient(service, pipelineOptions);
-
-    client.getWorkItem();
-  }
-
-  private LowLevelHttpResponse generateMockResponse(WorkItem ... workItems) throws Exception {
-    MockLowLevelHttpResponse response = new MockLowLevelHttpResponse();
-    response.setContentType(Json.MEDIA_TYPE);
-    LeaseWorkItemResponse lease = new LeaseWorkItemResponse();
-    lease.setWorkItems(Lists.newArrayList(workItems));
-    // N.B. Setting the factory is necessary in order to get valid JSON.
-    lease.setFactory(Transport.getJsonFactory());
-    response.setContent(lease.toPrettyString());
-    return response;
-  }
-
-  private WorkItem createWorkItem(String projectId, String jobId) {
-    WorkItem workItem = new WorkItem();
-    workItem.setFactory(Transport.getJsonFactory());
-    workItem.setProjectId(projectId);
-    workItem.setJobId(jobId);
-
-    // We need to set a work id because otherwise the client will treat the response as
-    // indicating no work is available.
-    workItem.setId(1234L);
-    return workItem;
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
deleted file mode 100644
index 6b11af91b62d8..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowWorkerTest.java
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.hamcrest.Matchers.greaterThan;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertThat;
-import static org.mockito.Matchers.argThat;
-import static org.mockito.Mockito.doThrow;
-import static org.mockito.Mockito.times;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.when;
-
-import com.google.api.services.dataflow.model.SourceSplitResponse;
-import com.google.api.services.dataflow.model.SourceSplitShard;
-import com.google.api.services.dataflow.model.WorkItem;
-import com.google.api.services.dataflow.model.WorkItemStatus;
-import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
-import com.google.cloud.dataflow.sdk.testing.FastNanoClockAndSleeper;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
-import com.google.common.collect.ImmutableList;
-
-import org.hamcrest.CoreMatchers;
-import org.hamcrest.Description;
-import org.hamcrest.Matcher;
-import org.hamcrest.TypeSafeMatcher;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.Mock;
-import org.mockito.MockitoAnnotations;
-
-import java.io.IOException;
-import java.util.Map;
-
-/** Unit tests for {@link DataflowWorker}. */
-@RunWith(JUnit4.class)
-public class DataflowWorkerTest {
-
-  private class WorkerException extends Exception { }
-
-  @Rule
-  public FastNanoClockAndSleeper clockAndSleeper = new FastNanoClockAndSleeper();
-
-  @Mock
-  DataflowWorker.WorkUnitClient mockWorkUnitClient;
-
-  @Mock
-  DataflowWorkerHarnessOptions options;
-
-  @Mock
-  DataflowWorkProgressUpdater mockProgressUpdater;
-
-  @Mock
-  WorkExecutor mockWorkExecutor;
-
-  @Before
-  public void setUp() {
-    MockitoAnnotations.initMocks(this);
-  }
-
-  @Test
-  public void testWhenNoWorkThatWeReturnFalse() throws Exception {
-    DataflowWorker worker = new DataflowWorker(mockWorkUnitClient, options);
-    when(mockWorkUnitClient.getWorkItem()).thenReturn(null);
-
-    assertFalse(worker.getAndPerformWork());
-  }
-
-  @Test
-  public void testWhenProcessingWorkUnitFailsWeReportStatus() throws Exception {
-    DataflowWorker worker = new DataflowWorker(mockWorkUnitClient, options);
-    // In practice this value is always 1, but for the sake of testing send a different value.
-    long initialReportIndex = 4L;
-    WorkItem workItem = new WorkItem()
-        .setId(1L).setJobId("Expected to fail the job").setInitialReportIndex(initialReportIndex);
-    when(mockWorkUnitClient.getWorkItem()).thenReturn(workItem).thenReturn(null);
-
-    assertFalse(worker.getAndPerformWork());
-    verify(mockWorkUnitClient)
-        .reportWorkItemStatus(argThat(cloudWorkHasErrors(initialReportIndex)));
-  }
-
-  @Test
-  public void testStartAndStopProgressReport() throws Exception {
-    DataflowWorker worker = new DataflowWorker(mockWorkUnitClient, options);
-    worker.executeWork(mockWorkExecutor, mockProgressUpdater);
-    verify(mockProgressUpdater, times(1)).startReportingProgress();
-    verify(mockProgressUpdater, times(1)).stopReportingProgress();
-  }
-
-  @Test
-  public void testStopProgressReportInCaseOfFailure() throws Exception {
-    doThrow(new WorkerException()).when(mockWorkExecutor).execute();
-    DataflowWorker worker = new DataflowWorker(mockWorkUnitClient, options);
-    try {
-      worker.executeWork(mockWorkExecutor, mockProgressUpdater);
-    } catch (WorkerException e) { /* Expected - ignore. */ }
-      verify(mockProgressUpdater, times(1)).stopReportingProgress();
-  }
-
-  @Test
-  public void testIsSplitResponseTooLarge() throws IOException {
-    SourceSplitResponse splitResponse = new SourceSplitResponse();
-    splitResponse.setShards(
-        ImmutableList.<SourceSplitShard>of(new SourceSplitShard(), new SourceSplitShard()));
-    assertThat(DataflowApiUtils.computeSerializedSizeBytes(splitResponse), greaterThan(0L));
-  }
-
-  @Test
-  public void testWorkItemStatusWithStateSamplerInfo() throws Exception {
-    WorkItem workItem = new WorkItem()
-        .setId(1L).setJobId("jobid").setInitialReportIndex(4L);
-    WorkItemStatus status = DataflowWorker.buildStatus(workItem, false,
-        null, null, options, null, null, null, null, 0,
-        new StateSampler.StateSamplerInfo("state", 101L, null));
-    assertEquals(1, status.getMetricUpdates().size());
-    assertEquals("internal", status.getMetricUpdates().get(0).getKind());
-    assertEquals("state-sampler", status.getMetricUpdates().get(0).getName().getName());
-    @SuppressWarnings("unchecked")
-    Map<String, Object> metric =
-        (Map<String, Object>) status.getMetricUpdates().get(0).getInternal();
-    assertEquals("state", metric.get("last-state-name"));
-    assertEquals(101L, metric.get("num-transitions"));
-  }
-
-  private Matcher<WorkItemStatus> cloudWorkHasErrors(final long expectedReportIndex) {
-    return new TypeSafeMatcher<WorkItemStatus>() {
-      @Override
-      public void describeTo(Description description) {
-        description.appendText("WorkItemStatus expected to have errors");
-      }
-
-      @Override
-      protected boolean matchesSafely(WorkItemStatus status) {
-        assertEquals(expectedReportIndex, (long) status.getReportIndex());
-        boolean returnValue = status.getCompleted() && !status.getErrors().isEmpty();
-        if (returnValue) {
-          assertThat(status.getErrors().get(0).getMessage(),
-              CoreMatchers.containsString("java.lang.RuntimeException: Unknown kind of work"));
-        }
-        return returnValue;
-      }
-    };
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyTokenInvalidExceptionTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyTokenInvalidExceptionTest.java
new file mode 100644
index 0000000000000..8507fb90d92ea
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyTokenInvalidExceptionTest.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link KeyTokenInvalidException}. */
+@RunWith(JUnit4.class)
+public final class KeyTokenInvalidExceptionTest {
+  @Test
+  public void testIsKeyTokenInvalidException() throws Exception {
+    KeyTokenInvalidException exception = new KeyTokenInvalidException("test");
+    RuntimeException keyTokenCauseException = new RuntimeException("key token cause", exception);
+    assertTrue(KeyTokenInvalidException.isKeyTokenInvalidException(exception));
+    assertTrue(KeyTokenInvalidException.isKeyTokenInvalidException(keyTokenCauseException));
+    assertFalse(KeyTokenInvalidException.isKeyTokenInvalidException(
+        new RuntimeException("non key token")));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
deleted file mode 100644
index d7e6fe706ae11..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDataflowWorkerTest.java
+++ /dev/null
@@ -1,1182 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.addObject;
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-import static org.hamcrest.Matchers.contains;
-import static org.hamcrest.Matchers.containsString;
-import static org.hamcrest.Matchers.equalTo;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import com.google.api.services.dataflow.model.InstructionInput;
-import com.google.api.services.dataflow.model.InstructionOutput;
-import com.google.api.services.dataflow.model.MapTask;
-import com.google.api.services.dataflow.model.ParDoInstruction;
-import com.google.api.services.dataflow.model.ParallelInstruction;
-import com.google.api.services.dataflow.model.ReadInstruction;
-import com.google.api.services.dataflow.model.Sink;
-import com.google.api.services.dataflow.model.Source;
-import com.google.api.services.dataflow.model.WriteInstruction;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.CollectionCoder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.ListCoder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
-import com.google.cloud.dataflow.sdk.runners.dataflow.TestCountingSource;
-import com.google.cloud.dataflow.sdk.runners.worker.KeyedWorkItems.FakeKeyedWorkItemCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.WorkItemCommitRequest;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
-import com.google.cloud.dataflow.sdk.util.BoundedQueueExecutor;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.util.StringUtils;
-import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
-import com.google.cloud.dataflow.sdk.util.VarInt;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.collect.Lists;
-import com.google.common.primitives.UnsignedLong;
-import com.google.protobuf.ByteString;
-import com.google.protobuf.ByteString.Output;
-import com.google.protobuf.TextFormat;
-
-import org.hamcrest.Matchers;
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestRule;
-import org.junit.runner.Description;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.junit.runners.model.Statement;
-import org.mockito.Mockito;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.Semaphore;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
-
-/** Unit tests for {@link StreamingDataflowWorker}. */
-@RunWith(JUnit4.class)
-public class StreamingDataflowWorkerTest {
-  private static final IntervalWindow DEFAULT_WINDOW =
-      new IntervalWindow(new Instant(1234), new Duration(1000));
-
-  private static final IntervalWindow WINDOW_AT_ZERO =
-      new IntervalWindow(new Instant(0), new Instant(1000));
-
-  private static final IntervalWindow WINDOW_AT_ONE_SECOND =
-      new IntervalWindow(new Instant(1000), new Instant(2000));
-
-  private static final Coder<IntervalWindow> DEFAULT_WINDOW_CODER = IntervalWindow.getCoder();
-  private static final Coder<Collection<IntervalWindow>> DEFAULT_WINDOW_COLLECTION_CODER =
-      CollectionCoder.of(DEFAULT_WINDOW_CODER);
-
-  private static final byte[] defaultWindowsBytes() throws Exception {
-    return CoderUtils.encodeToByteArray(
-        DEFAULT_WINDOW_COLLECTION_CODER, Arrays.asList(DEFAULT_WINDOW));
-  }
-
-  private static final byte[] windowAtZeroBytes() throws Exception {
-    return CoderUtils.encodeToByteArray(
-        DEFAULT_WINDOW_COLLECTION_CODER, Arrays.asList(WINDOW_AT_ZERO));
-  }
-
-  private static final byte[] windowAtOneSecondBytes() throws Exception {
-    return CoderUtils.encodeToByteArray(
-        DEFAULT_WINDOW_COLLECTION_CODER, Arrays.asList(WINDOW_AT_ONE_SECOND));
-  }
-
-  // Default values that are unimportant for correctness, but must be consistent
-  // between pieces of this test suite
-  private static final String DEFAULT_COMPUTATION_ID = "computation";
-  private static final String DEFAULT_MAP_STAGE_NAME = "computation";
-  private static final String DEFAULT_MAP_SYSTEM_NAME = "computation";
-  private static final String DEFAULT_PARDO_SYSTEM_NAME = "parDo";
-  private static final String DEFAULT_PARDO_USER_NAME = "parDoUserName";
-  private static final String DEFAULT_SOURCE_SYSTEM_NAME = "source";
-  private static final String DEFAULT_SINK_SYSTEM_NAME = "sink";
-  private static final String DEFAULT_SOURCE_COMPUTATION_ID = "upstream";
-  private static final String DEFAULT_KEY_STRING = "key";
-  private static final String DEFAULT_DATA_STRING = "data";
-  private static final String DEFAULT_DESTINATION_STREAM_ID = "out";
-
-  @Rule public BlockingFn blockingFn = new BlockingFn();
-
-  private String keyStringForIndex(int index) {
-    return DEFAULT_KEY_STRING + index;
-  }
-
-  private String dataStringForIndex(long index) {
-    return DEFAULT_DATA_STRING + index;
-  }
-
-  private ParallelInstruction makeWindowingSourceInstruction(Coder<?> coder) {
-    CloudObject encodedCoder =
-        FullWindowedValueCoder.of(FakeKeyedWorkItemCoder.of(coder), IntervalWindow.getCoder())
-            .asCloudObject();
-    return new ParallelInstruction()
-        .setSystemName(DEFAULT_SOURCE_SYSTEM_NAME)
-        .setRead(
-            new ReadInstruction()
-                .setSource(
-                    new Source()
-                        .setSpec(CloudObject.forClass(WindowingWindmillReader.class))
-                        .setCodec(encodedCoder)))
-        .setOutputs(
-            Arrays.asList(new InstructionOutput().setName("read_output").setCodec(encodedCoder)));
-  }
-
-  private ParallelInstruction makeSourceInstruction(Coder<?> coder) {
-    return new ParallelInstruction()
-        .setSystemName(DEFAULT_SOURCE_SYSTEM_NAME)
-        .setRead(new ReadInstruction().setSource(
-            new Source()
-            .setSpec(CloudObject.forClass(UngroupedWindmillReader.class))
-            .setCodec(WindowedValue.getFullCoder(coder, IntervalWindow.getCoder())
-                                   .asCloudObject())))
-        .setOutputs(Arrays.asList(
-            new InstructionOutput()
-            .setName("read_output")
-            .setCodec(WindowedValue.getFullCoder(coder, IntervalWindow.getCoder())
-                                   .asCloudObject())));
-  }
-
-  private ParallelInstruction makeDoFnInstruction(
-      DoFn<?, ?> doFn,
-      int producerIndex,
-      Coder<?> outputCoder,
-      Coder<? extends BoundedWindow> windowCoder) {
-    CloudObject spec = CloudObject.forClassName("DoFn");
-    addString(spec, PropertyNames.SERIALIZED_FN,
-        StringUtils.byteArrayToJsonString(
-            SerializableUtils.serializeToByteArray(new DoFnInfo<>(doFn, null))));
-    return new ParallelInstruction()
-        .setSystemName(DEFAULT_PARDO_SYSTEM_NAME)
-        .setName(DEFAULT_PARDO_USER_NAME)
-        .setParDo(new ParDoInstruction()
-            .setInput(
-                new InstructionInput().setProducerInstructionIndex(producerIndex).setOutputNum(0))
-            .setNumOutputs(1)
-            .setUserFn(spec))
-        .setOutputs(Arrays.asList(
-            new InstructionOutput()
-            .setName("par_do_output")
-            .setCodec(WindowedValue.getFullCoder(outputCoder, windowCoder)
-                                   .asCloudObject())));
-  }
-
-  private ParallelInstruction makeDoFnInstruction(
-      DoFn<?, ?> doFn, int producerIndex, Coder<?> outputCoder) {
-    return makeDoFnInstruction(doFn, producerIndex, outputCoder, IntervalWindow.getCoder());
-  }
-
-  private ParallelInstruction makeSinkInstruction(
-      Coder<?> coder, int producerIndex, Coder<? extends BoundedWindow> windowCoder) {
-    CloudObject spec = CloudObject.forClass(WindmillSink.class);
-    addString(spec, "stream_id", DEFAULT_DESTINATION_STREAM_ID);
-    return new ParallelInstruction()
-        .setSystemName(DEFAULT_SINK_SYSTEM_NAME)
-        .setWrite(new WriteInstruction()
-            .setInput(
-                new InstructionInput().setProducerInstructionIndex(producerIndex).setOutputNum(0))
-            .setSink(new Sink()
-                .setSpec(spec)
-                .setCodec(WindowedValue.getFullCoder(coder, windowCoder)
-                                       .asCloudObject())));
-  }
-
-  private ParallelInstruction makeSinkInstruction(Coder<?> coder, int producerIndex) {
-    return makeSinkInstruction(coder, producerIndex, IntervalWindow.getCoder());
-  }
-
-  /**
-   * Returns a {@link MapTask} with the provided {@code instructions} and default values
-   * everywhere else.
-   */
-  private MapTask defaultMapTask(List<ParallelInstruction> instructions) {
-    return new MapTask()
-        .setStageName(DEFAULT_MAP_STAGE_NAME)
-        .setSystemName(DEFAULT_MAP_SYSTEM_NAME)
-        .setInstructions(instructions);
-  }
-
-  private Windmill.GetWorkResponse buildInput(String input, byte[] metadata) throws Exception {
-    Windmill.GetWorkResponse.Builder builder = Windmill.GetWorkResponse.newBuilder();
-    TextFormat.merge(input, builder);
-    if (metadata != null) {
-      Windmill.InputMessageBundle.Builder messageBundleBuilder =
-          builder.getWorkBuilder(0).getWorkBuilder(0).getMessageBundlesBuilder(0);
-      for (Windmill.Message.Builder messageBuilder :
-          messageBundleBuilder.getMessagesBuilderList()) {
-        messageBuilder.setMetadata(addPaneTag(PaneInfo.NO_FIRING, metadata));
-      }
-    }
-    return builder.build();
-  }
-
-  private Windmill.GetWorkResponse makeInput(int index, long timestamp) throws Exception {
-    return makeInput(index, timestamp, keyStringForIndex(index));
-  }
-
-  private Windmill.GetWorkResponse makeInput(int index, long timestamp, String key)
-      throws Exception {
-    return buildInput(
-        "work {" +
-        "  computation_id: \"" + DEFAULT_COMPUTATION_ID + "\"" +
-        "  work {" +
-        "    key: \"" + key + "\"" +
-        "    work_token: " + index +
-        "    message_bundles {" +
-        "      source_computation_id: \"" + DEFAULT_SOURCE_COMPUTATION_ID + "\"" +
-        "      messages {" +
-        "        timestamp: " + timestamp +
-        "        data: \"data" + index + "\"" +
-        "      }" +
-        "    }" +
-        "  }" +
-        "}",
-        CoderUtils.encodeToByteArray(
-            CollectionCoder.of(IntervalWindow.getCoder()), Arrays.asList(DEFAULT_WINDOW)));
-  }
-
-  /**
-   * Returns a
-   * {@link com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.WorkItemCommitRequest}
-   * builder parsed from the provided text format proto.
-   */
-  private WorkItemCommitRequest.Builder parseCommitRequest(String output) throws Exception {
-    WorkItemCommitRequest.Builder builder = Windmill.WorkItemCommitRequest.newBuilder();
-    TextFormat.merge(output, builder);
-    return builder;
-  }
-
-  /**
-   * Sets the metadata of the first contained message in this WorkItemCommitRequest
-   * (it should only have one message).
-   */
-  private WorkItemCommitRequest.Builder setMessagesMetadata(
-      PaneInfo pane, byte[] windowBytes, WorkItemCommitRequest.Builder builder) throws Exception {
-    if (windowBytes != null) {
-      builder.getOutputMessagesBuilder(0)
-          .getBundlesBuilder(0)
-          .getMessagesBuilder(0)
-          .setMetadata(addPaneTag(pane, windowBytes));
-    }
-    return builder;
-  }
-
-  private WorkItemCommitRequest.Builder makeExpectedOutput(int index, long timestamp)
-      throws Exception {
-    return makeExpectedOutput(index, timestamp, keyStringForIndex(index), keyStringForIndex(index));
-  }
-
-  private WorkItemCommitRequest.Builder makeExpectedOutput(
-      int index, long timestamp, String key, String outKey) throws Exception {
-    return setMessagesMetadata(PaneInfo.NO_FIRING, defaultWindowsBytes(),
-        parseCommitRequest(
-            "key: \"" + key + "\" " +
-            "work_token: " + index + " " +
-            "output_messages {" +
-            "  destination_stream_id: \"" + DEFAULT_DESTINATION_STREAM_ID + "\"" +
-            "  bundles {" +
-            "    key: \"" + outKey + "\"" +
-            "    messages {" +
-            "      timestamp: " + timestamp +
-            "      data: \"" + dataStringForIndex(index) + "\"" +
-            "      metadata: \"\"" +
-            "    }" +
-            "    messages_ids: \"\"" +
-            "  }" +
-            "}"));
-  }
-
-  private ByteString addPaneTag(PaneInfo pane, byte[] windowBytes)
-      throws CoderException, IOException {
-    Output output = ByteString.newOutput();
-    PaneInfo.PaneInfoCoder.INSTANCE.encode(pane, output, Context.OUTER);
-    output.write(windowBytes);
-    return output.toByteString();
-  }
-
-  private DataflowWorkerHarnessOptions createTestingPipelineOptions() {
-    DataflowWorkerHarnessOptions options =
-        PipelineOptionsFactory.as(DataflowWorkerHarnessOptions.class);
-    options.setAppName("StreamingWorkerHarnessTest");
-    options.setStreaming(true);
-    return options;
-  }
-
-  private Windmill.WorkItemCommitRequest stripCounters(Windmill.WorkItemCommitRequest request) {
-    return Windmill.WorkItemCommitRequest.newBuilder(request).clearCounterUpdates().build();
-  }
-
-  @Test
-  public void testBasicHarness() throws Exception {
-    List<ParallelInstruction> instructions = Arrays.asList(
-        makeSourceInstruction(StringUtf8Coder.of()),
-        makeSinkInstruction(StringUtf8Coder.of(), 0));
-
-    FakeWindmillServer server = new FakeWindmillServer();
-    DataflowWorkerHarnessOptions options = createTestingPipelineOptions();
-    StreamingDataflowWorker worker =
-        new StreamingDataflowWorker(Arrays.asList(defaultMapTask(instructions)), server, options);
-    worker.start();
-
-    // Thread locals for the job and worker should have been updated for logging.
-    assertEquals(options.getJobId(), DataflowWorkerLoggingMDC.getJobId());
-    assertEquals(options.getWorkerId(), DataflowWorkerLoggingMDC.getWorkerId());
-
-    final int numIters = 2000;
-    for (int i = 0; i < numIters; ++i) {
-      server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
-    }
-
-    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(numIters);
-    worker.stop();
-
-    for (int i = 0; i < numIters; ++i) {
-      assertTrue(result.containsKey((long) i));
-      assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i)).build(),
-                          stripCounters(result.get((long) i)));
-    }
-  }
-
-  static class BlockingFn extends DoFn<String, String> implements TestRule {
-    public static CountDownLatch blocker = new CountDownLatch(1);
-    public static Semaphore counter = new Semaphore(0);
-    public static AtomicInteger callCounter = new AtomicInteger(0);
-
-    @Override
-    public void processElement(ProcessContext c) throws InterruptedException {
-      callCounter.incrementAndGet();
-      counter.release();
-      blocker.await();
-      c.output(c.element());
-    }
-
-    @Override
-    public Statement apply(final Statement base, final Description description) {
-      return new Statement() {
-        @Override
-        public void evaluate() throws Throwable {
-          blocker = new CountDownLatch(1);
-          counter = new Semaphore(0);
-          callCounter = new AtomicInteger();
-          base.evaluate();
-        }
-      };
-    }
-  }
-
-  @Test
-  public void testIgnoreRetriedKeys() throws Exception {
-    final int numIters = 4;
-    List<ParallelInstruction> instructions = Arrays.asList(
-        makeSourceInstruction(StringUtf8Coder.of()),
-        makeDoFnInstruction(blockingFn, 0, StringUtf8Coder.of()),
-        makeSinkInstruction(StringUtf8Coder.of(), 0));
-
-    FakeWindmillServer server = new FakeWindmillServer();
-    DataflowWorkerHarnessOptions options = createTestingPipelineOptions();
-    StreamingDataflowWorker worker =
-        new StreamingDataflowWorker(Arrays.asList(defaultMapTask(instructions)), server, options);
-    worker.start();
-
-    // Thread locals for the job and worker should have been updated for logging.
-    assertEquals(options.getJobId(), DataflowWorkerLoggingMDC.getJobId());
-    assertEquals(options.getWorkerId(), DataflowWorkerLoggingMDC.getWorkerId());
-
-    for (int i = 0; i < numIters; ++i) {
-      server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
-    }
-
-    // Wait for keys to schedule.  They will be blocked.
-    BlockingFn.counter.acquire(numIters);
-
-    // Re-add the work, it should be ignored due to the keys being active.
-    for (int i = 0; i < numIters; ++i) {
-      // Same work token.
-      server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
-    }
-
-    // Give all added calls a chance to run.
-    server.waitForEmptyWorkQueue();
-
-    for (int i = 0; i < numIters; ++i) {
-      // Different work token same keys.
-      server.addWorkToOffer(
-          makeInput(i + numIters, TimeUnit.MILLISECONDS.toMicros(i), keyStringForIndex(i)));
-    }
-
-    // Give all added calls a chance to run.
-    server.waitForEmptyWorkQueue();
-
-    // Release the blocked calls.
-    BlockingFn.blocker.countDown();
-
-    // Verify the output
-    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(numIters * 2);
-    for (int i = 0; i < numIters; ++i) {
-      assertTrue(result.containsKey((long) i));
-      assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i)).build(),
-                          stripCounters(result.get((long) i)));
-      assertTrue(result.containsKey((long) i + numIters));
-      assertEquals(makeExpectedOutput(i + numIters, TimeUnit.MILLISECONDS.toMicros(i),
-              keyStringForIndex(i), keyStringForIndex(i)).build(),
-          stripCounters(result.get((long) i + numIters)));
-    }
-
-    // Re-add the work, it should process due to the keys no longer being active.
-    for (int i = 0; i < numIters; ++i) {
-      server.addWorkToOffer(makeInput(i + numIters * 2, TimeUnit.MILLISECONDS.toMicros(i),
-              keyStringForIndex(i)));
-    }
-    result = server.waitForAndGetCommits(numIters);
-    worker.stop();
-    for (int i = 0; i < numIters; ++i) {
-      assertTrue(result.containsKey((long) i + numIters * 2));
-      assertEquals(makeExpectedOutput(i + numIters * 2, TimeUnit.MILLISECONDS.toMicros(i),
-              keyStringForIndex(i), keyStringForIndex(i)).build(),
-          stripCounters(result.get((long) i + numIters * 2)));
-    }
-  }
-
-  @Test(timeout = 10000)
-  public void testNumberOfWorkerHarnessThreadsIsHonored() throws Exception {
-    int expectedNumberOfThreads = 5;
-    List<ParallelInstruction> instructions = Arrays.asList(
-        makeSourceInstruction(StringUtf8Coder.of()),
-        makeDoFnInstruction(blockingFn, 0, StringUtf8Coder.of()),
-        makeSinkInstruction(StringUtf8Coder.of(), 0));
-
-    FakeWindmillServer server = new FakeWindmillServer();
-    DataflowWorkerHarnessOptions options = createTestingPipelineOptions();
-    options.setNumberOfWorkerHarnessThreads(expectedNumberOfThreads);
-
-    StreamingDataflowWorker worker =
-        new StreamingDataflowWorker(Arrays.asList(defaultMapTask(instructions)), server, options);
-    worker.start();
-
-    for (int i = 0; i < expectedNumberOfThreads * 2; ++i) {
-      server.addWorkToOffer(makeInput(i, TimeUnit.MILLISECONDS.toMicros(i)));
-    }
-
-    // This will fail to complete if the number of threads is less than the amount of work.
-    // Forcing this test to timeout.
-    BlockingFn.counter.acquire(expectedNumberOfThreads);
-
-    // Attempt to acquire an additional permit, if we were able to then that means
-    // too many items were being processed concurrently.
-    if (BlockingFn.counter.tryAcquire(500, TimeUnit.MILLISECONDS)) {
-      fail("Expected number of threads " + expectedNumberOfThreads + " does not match actual "
-          + "number of work items processed concurrently " + BlockingFn.callCounter.get() + ".");
-    }
-
-    BlockingFn.blocker.countDown();
-  }
-
-  static class KeyTokenInvalidFn extends DoFn<KV<String, String>, KV<String, String>> {
-    static boolean thrown = false;
-
-    @Override
-    public void processElement(ProcessContext c) {
-      if (!thrown) {
-        thrown = true;
-        throw new StreamingDataflowWorker.KeyTokenInvalidException("key");
-      } else {
-        c.output(c.element());
-      }
-    }
-  }
-
-  @Test
-  public void testKeyTokenInvalidException() throws Exception {
-    KvCoder<String, String> kvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
-
-    List<ParallelInstruction> instructions = Arrays.asList(
-        makeSourceInstruction(kvCoder),
-        makeDoFnInstruction(new KeyTokenInvalidFn(), 0, kvCoder),
-        makeSinkInstruction(kvCoder, 1));
-
-    FakeWindmillServer server = new FakeWindmillServer();
-    server.addWorkToOffer(makeInput(0, 0, "key"));
-
-    StreamingDataflowWorker worker = new StreamingDataflowWorker(
-        Arrays.asList(defaultMapTask(instructions)), server, createTestingPipelineOptions());
-    worker.start();
-
-    server.waitForEmptyWorkQueue();
-
-    server.addWorkToOffer(makeInput(1, 0, "key"));
-
-    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
-
-    assertEquals(makeExpectedOutput(1, 0, "key", "key").build(), stripCounters(result.get(1L)));
-    assertEquals(1, result.size());
-  }
-
-  static class ChangeKeysFn extends DoFn<KV<String, String>, KV<String, String>> {
-    @Override
-    public void processElement(ProcessContext c) {
-      KV<String, String> elem = c.element();
-      c.output(KV.of(elem.getKey() + "_" + elem.getValue(), elem.getValue()));
-    }
-  }
-
-  @Test
-  public void testKeyChange() throws Exception {
-    KvCoder<String, String> kvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
-
-    List<ParallelInstruction> instructions = Arrays.asList(
-        makeSourceInstruction(kvCoder),
-        makeDoFnInstruction(new ChangeKeysFn(), 0, kvCoder),
-        makeSinkInstruction(kvCoder, 1));
-
-    FakeWindmillServer server = new FakeWindmillServer();
-    server.addWorkToOffer(makeInput(0, 0));
-    server.addWorkToOffer(makeInput(1, TimeUnit.MILLISECONDS.toMicros(1)));
-
-    StreamingDataflowWorker worker = new StreamingDataflowWorker(
-        Arrays.asList(defaultMapTask(instructions)), server, createTestingPipelineOptions());
-    worker.start();
-
-    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(2);
-
-    for (int i = 0; i < 2; i++) {
-      assertEquals(makeExpectedOutput(i, TimeUnit.MILLISECONDS.toMicros(i),
-              keyStringForIndex(i), keyStringForIndex(i) + "_data" + i).build(),
-          stripCounters(result.get((long) i)));
-    }
-  }
-
-  static class TestExceptionFn extends DoFn<String, String> {
-    @Override
-    public void processElement(ProcessContext c) throws Exception {
-      try {
-        throw new Exception("Exception!");
-      } catch (Exception e) {
-        throw new Exception("Another exception!", e);
-      }
-    }
-  }
-
-  @Test
-  public void testExceptions() throws Exception {
-    List<ParallelInstruction> instructions = Arrays.asList(
-        makeSourceInstruction(StringUtf8Coder.of()),
-        makeDoFnInstruction(new TestExceptionFn(), 0, StringUtf8Coder.of()),
-        makeSinkInstruction(StringUtf8Coder.of(), 1));
-
-    FakeWindmillServer server = new FakeWindmillServer();
-    server.setExpectedExceptionCount(1);
-    server.addWorkToOffer(buildInput(
-        "work {" +
-        "  computation_id: \"" + DEFAULT_COMPUTATION_ID + "\"" +
-        "  work {" +
-        "    key: \"" + keyStringForIndex(0) + "\"" +
-        "    work_token: 0" +
-        "    message_bundles {" +
-        "      source_computation_id: \"" + DEFAULT_SOURCE_COMPUTATION_ID + "\"" +
-        "      messages {" +
-        "        timestamp: 0" +
-        "        data: \"0\"" +
-        "      }" +
-        "    }" +
-        "  }" +
-        "}",
-        CoderUtils.encodeToByteArray(CollectionCoder.of(IntervalWindow.getCoder()),
-                                     Arrays.asList(DEFAULT_WINDOW))));
-
-    StreamingDataflowWorker worker = new StreamingDataflowWorker(
-        Arrays.asList(defaultMapTask(instructions)), server, createTestingPipelineOptions());
-    worker.start();
-
-    Windmill.Exception exception = server.getException();
-
-    assertThat(exception.getStackFrames(0),
-        containsString("Another exception!"));
-    assertThat(exception.getStackFrames(1),
-        containsString("processElement"));
-    assertTrue(exception.hasCause());
-
-    assertThat(exception.getCause().getStackFrames(0),
-        containsString("Exception!"));
-    assertThat(exception.getCause().getStackFrames(1),
-        containsString("processElement"));
-    assertFalse(exception.getCause().hasCause());
-
-    // The server should retry the work since reporting the exception succeeded.
-    // Make next retry should fail because we only expected 1 exception.
-    exception = server.getException();
-  }
-
-  @Test
-  public void testAssignWindows() throws Exception {
-    Duration gapDuration = Duration.standardSeconds(1);
-    CloudObject spec = CloudObject.forClassName("AssignWindowsDoFn");
-    addString(spec, PropertyNames.SERIALIZED_FN,
-        StringUtils.byteArrayToJsonString(
-            SerializableUtils.serializeToByteArray(
-                WindowingStrategy.of(FixedWindows.of(gapDuration)))));
-
-    ParallelInstruction addWindowsInstruction =
-        new ParallelInstruction()
-        .setSystemName("AssignWindows")
-        .setName("AssignWindows")
-        .setParDo(new ParDoInstruction()
-            .setInput(new InstructionInput().setProducerInstructionIndex(0).setOutputNum(0))
-            .setNumOutputs(1)
-            .setUserFn(spec))
-        .setOutputs(Arrays.asList(new InstructionOutput()
-                .setName("output")
-                .setCodec(WindowedValue.getFullCoder(StringUtf8Coder.of(),
-                                                     IntervalWindow.getCoder()).asCloudObject())));
-
-    List<ParallelInstruction> instructions = Arrays.asList(
-        makeSourceInstruction(StringUtf8Coder.of()),
-        addWindowsInstruction,
-        makeSinkInstruction(StringUtf8Coder.of(), 1));
-
-    FakeWindmillServer server = new FakeWindmillServer();
-
-    int timestamp1 = 0;
-    int timestamp2 = 1000000;
-
-    server.addWorkToOffer(makeInput(timestamp1, timestamp1));
-    server.addWorkToOffer(makeInput(timestamp2, timestamp2));
-
-    StreamingDataflowWorker worker = new StreamingDataflowWorker(
-        Arrays.asList(defaultMapTask(instructions)), server, createTestingPipelineOptions());
-    worker.start();
-
-    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(2);
-
-    assertThat(
-        stripCounters(result.get((long) timestamp1)),
-        equalTo(setMessagesMetadata(PaneInfo.NO_FIRING, windowAtZeroBytes(),
-                makeExpectedOutput(timestamp1, timestamp1))
-            .build()));
-
-    assertThat(stripCounters(result.get((long) timestamp2)),
-        equalTo(setMessagesMetadata(PaneInfo.NO_FIRING, windowAtOneSecondBytes(),
-                makeExpectedOutput(timestamp2, timestamp2))
-            .build()));
-  }
-
-  @Test
-  public void testMergeWindows() throws Exception {
-    Coder<KV<String, String>> kvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
-    Coder<WindowedValue<KV<String, String>>> windowedKvCoder =
-        FullWindowedValueCoder.of(kvCoder, IntervalWindow.getCoder());
-    KvCoder<String, List<String>> groupedCoder =
-        KvCoder.of(StringUtf8Coder.of(), ListCoder.of(StringUtf8Coder.of()));
-    Coder<WindowedValue<KV<String, List<String>>>> windowedGroupedCoder =
-        FullWindowedValueCoder.of(groupedCoder, IntervalWindow.getCoder());
-
-    CloudObject spec = CloudObject.forClassName("MergeWindowsDoFn");
-    addString(spec, PropertyNames.SERIALIZED_FN,
-        StringUtils.byteArrayToJsonString(
-            SerializableUtils.serializeToByteArray(
-                WindowingStrategy.of(FixedWindows.of(Duration.standardSeconds(1))))));
-    addObject(spec, PropertyNames.INPUT_CODER, windowedKvCoder.asCloudObject());
-
-    ParallelInstruction mergeWindowsInstruction =
-        new ParallelInstruction()
-        .setSystemName("MergeWindows-System")
-        .setName("MergeWindowsStep")
-        .setParDo(new ParDoInstruction()
-            .setInput(new InstructionInput().setProducerInstructionIndex(0).setOutputNum(0))
-            .setNumOutputs(1)
-            .setUserFn(spec))
-        .setOutputs(Arrays.asList(new InstructionOutput()
-                .setName("output")
-                .setCodec(windowedGroupedCoder.asCloudObject())));
-
-    List<ParallelInstruction> instructions = Arrays.asList(
-        makeWindowingSourceInstruction(kvCoder),
-        mergeWindowsInstruction,
-        makeSinkInstruction(groupedCoder, 1));
-
-    FakeWindmillServer server = new FakeWindmillServer();
-
-    StreamingDataflowWorker worker = new StreamingDataflowWorker(
-        Arrays.asList(defaultMapTask(instructions)), server, createTestingPipelineOptions());
-    Map<String, String> nameMap = new HashMap<>();
-    nameMap.put("MergeWindowsStep", "MergeWindows");
-    worker.addStateNameMappings(nameMap);
-    worker.start();
-
-    server.addWorkToOffer(buildInput(
-        "work {" +
-            "  computation_id: \"" + DEFAULT_COMPUTATION_ID + "\"" +
-            "  input_data_watermark: 0" +
-            "  work {" +
-            "    key: \"" + DEFAULT_KEY_STRING + "\"" +
-            "    work_token: 0" +
-            "    message_bundles {" +
-            "      source_computation_id: \"" + DEFAULT_SOURCE_COMPUTATION_ID + "\"" +
-            "      messages {" +
-            "        timestamp: 0" +
-            "        data: \"" + dataStringForIndex(0) + "\"" +
-            "      }" +
-            "    }" +
-            "  }" +
-            "}",
-            windowAtZeroBytes()));
-
-    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
-
-    // These tags and data are opaque strings and this is a change detector test.
-    String window = "/gAAAAAAAA-joBw/";
-    ByteString timerTag = ByteString.copyFromUtf8(window + "+0:999"); // GC timer just has window
-    ByteString bufferTag = ByteString.copyFromUtf8(window + "+sbuf");
-    ByteString paneInfoTag = ByteString.copyFromUtf8(window + "+spane");
-    ByteString watermarkDataHoldTag =
-        ByteString.copyFromUtf8(window + "+shold");
-    ByteString watermarkExtraHoldTag =
-        ByteString.copyFromUtf8(window + "+sextra");
-    String stateFamily = "MergeWindows";
-    ByteString bufferData = ByteString.copyFromUtf8("\000data0");
-    // Encoded form for Iterable<String>: -1, true, 'data0', false
-    ByteString outputData = ByteString.copyFrom(
-        new byte[] { (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff,
-                     0x01, 0x05, 0x64, 0x61, 0x74, 0x61, 0x30, 0x00 });
-
-    // These values are not essential to the change detector test
-    long timerTimestamp = 999000L;
-
-    WorkItemCommitRequest actualOutput = result.get(0L);
-
-    // Set timer
-    assertThat(actualOutput.getOutputTimersList(), Matchers.contains(
-        Matchers.equalTo(Windmill.Timer.newBuilder()
-            .setTag(timerTag)
-            .setStateFamily(stateFamily)
-            .setTimestamp(timerTimestamp)
-            .setType(Windmill.Timer.Type.WATERMARK).build())));
-
-    assertThat(actualOutput.getListUpdatesList(), Matchers.contains(
-        Matchers.equalTo(Windmill.TagList.newBuilder()
-            .setTag(bufferTag)
-            .setStateFamily(stateFamily)
-            .addValues(Windmill.Value.newBuilder()
-                .setTimestamp(Long.MAX_VALUE)
-                .setData(bufferData)
-                .build())
-            .build())));
-
-    assertThat(
-        actualOutput.getWatermarkHoldsList(),
-        Matchers.containsInAnyOrder(
-            Windmill.WatermarkHold.newBuilder()
-                .setTag(watermarkDataHoldTag)
-                .setStateFamily(stateFamily)
-                .addTimestamps(0)
-                .build()));
-
-    List<Windmill.Counter> counters = actualOutput.getCounterUpdatesList();
-    // No state reads
-    assertEquals(0L, getCounter(counters, "WindmillStateBytesRead").getIntScalar());
-    // Timer + buffer + watermark hold
-    assertEquals(
-        Windmill.WorkItemCommitRequest.newBuilder(actualOutput)
-            .clearCounterUpdates()
-            .clearOutputMessages()
-            .build()
-            .getSerializedSize(),
-        getCounter(counters, "WindmillStateBytesWritten").getIntScalar());
-    // Input messages
-    assertEquals(VarInt.getLength(0L) + dataStringForIndex(0).length()
-            + addPaneTag(PaneInfo.NO_FIRING, windowAtZeroBytes()).size() + 5L /* proto overhead */,
-        getCounter(counters, "WindmillShuffleBytesRead").getIntScalar());
-
-    Windmill.GetWorkResponse.Builder getWorkResponse = Windmill.GetWorkResponse.newBuilder();
-    getWorkResponse.addWorkBuilder()
-        .setComputationId(DEFAULT_COMPUTATION_ID)
-        .setInputDataWatermark(timerTimestamp)
-        .addWorkBuilder()
-        .setKey(ByteString.copyFromUtf8(DEFAULT_KEY_STRING))
-        .setWorkToken(1)
-        .getTimersBuilder().addTimersBuilder()
-        .setTag(timerTag)
-        .setStateFamily(stateFamily)
-        .setTimestamp(timerTimestamp);
-    server.addWorkToOffer(getWorkResponse.build());
-
-    long expectedBytesRead = 0L;
-
-    Windmill.GetDataResponse.Builder dataResponse = Windmill.GetDataResponse.newBuilder();
-    Windmill.KeyedGetDataResponse.Builder dataBuilder = dataResponse.addDataBuilder()
-        .setComputationId(DEFAULT_COMPUTATION_ID)
-        .addDataBuilder()
-        .setKey(ByteString.copyFromUtf8(DEFAULT_KEY_STRING));
-    dataBuilder.addListsBuilder()
-        .setTag(bufferTag)
-        .setStateFamily(stateFamily)
-        .addValuesBuilder()
-        .setTimestamp(0) // is ignored
-        .setData(bufferData);
-    dataBuilder.addWatermarkHoldsBuilder()
-        .setTag(watermarkDataHoldTag)
-        .setStateFamily(stateFamily)
-        .addTimestamps(0);
-    dataBuilder.addWatermarkHoldsBuilder()
-        .setTag(watermarkExtraHoldTag)
-        .setStateFamily(stateFamily)
-        .addTimestamps(0);
-    dataBuilder.addValuesBuilder()
-        .setTag(paneInfoTag)
-        .setStateFamily(stateFamily)
-        .getValueBuilder()
-        .setTimestamp(0)
-        .setData(ByteString.EMPTY);
-    server.addDataToOffer(dataResponse.build());
-
-    expectedBytesRead += dataBuilder.build().getSerializedSize();
-
-    result = server.waitForAndGetCommits(1);
-
-    actualOutput = result.get(1L);
-
-    assertEquals(1, actualOutput.getOutputMessagesCount());
-    assertEquals(DEFAULT_DESTINATION_STREAM_ID,
-        actualOutput.getOutputMessages(0).getDestinationStreamId());
-    assertEquals(DEFAULT_KEY_STRING,
-        actualOutput.getOutputMessages(0).getBundles(0).getKey().toStringUtf8());
-    assertEquals(0,
-        actualOutput.getOutputMessages(0).getBundles(0).getMessages(0).getTimestamp());
-    assertEquals(outputData,
-        actualOutput.getOutputMessages(0).getBundles(0).getMessages(0).getData());
-
-    ByteString metadata =
-        actualOutput.getOutputMessages(0).getBundles(0).getMessages(0).getMetadata();
-    assertEquals(PaneInfo.createPane(true, true, Timing.ON_TIME),
-        PaneInfo.decodePane(metadata.byteAt(0)));
-    Assert.assertArrayEquals(windowAtZeroBytes(), metadata.substring(1).toByteArray());
-
-    // Data was deleted
-    assertThat("" + actualOutput.getValueUpdatesList(),
-        actualOutput.getValueUpdatesList(), Matchers.contains(
-            Matchers.equalTo(Windmill.TagValue.newBuilder()
-                .setTag(paneInfoTag)
-                .setStateFamily(stateFamily)
-                .setValue(Windmill.Value.newBuilder()
-                     .setTimestamp(Long.MAX_VALUE).setData(ByteString.EMPTY))
-                .build())));
-
-    assertThat("" + actualOutput.getListUpdatesList(),
-        actualOutput.getListUpdatesList(), Matchers.contains(
-        Matchers.equalTo(Windmill.TagList.newBuilder()
-            .setTag(bufferTag)
-            .setStateFamily(stateFamily)
-            .setEndTimestamp(Long.MAX_VALUE)
-            .build())));
-
-    assertThat(
-        actualOutput.getWatermarkHoldsList(),
-        Matchers.containsInAnyOrder(
-            Windmill.WatermarkHold.newBuilder()
-                .setTag(watermarkDataHoldTag)
-                .setStateFamily(stateFamily)
-                .setReset(true)
-                .build(),
-            Windmill.WatermarkHold.newBuilder()
-                .setTag(watermarkExtraHoldTag)
-                .setStateFamily(stateFamily)
-                .setReset(true)
-                .build()));
-
-    counters = actualOutput.getCounterUpdatesList();
-
-    Windmill.Counter actualReadCounter =
-        getCounter(counters, "computation-MergeWindows-System-windmill-read-msecs");
-    assertNotNull(actualReadCounter);
-    assertThat(actualReadCounter.getIntScalar(), Matchers.greaterThan(0L));
-
-    // State reads for windowing
-    assertEquals(expectedBytesRead, getCounter(counters, "WindmillStateBytesRead").getIntScalar());
-    // State updates to clear state
-    assertEquals(
-        Windmill.WorkItemCommitRequest.newBuilder(actualOutput)
-            .clearCounterUpdates()
-            .clearOutputMessages()
-            .build()
-            .getSerializedSize(),
-        getCounter(counters, "WindmillStateBytesWritten").getIntScalar());
-    // No input messages
-    assertEquals(0L, getCounter(counters, "WindmillShuffleBytesRead").getIntScalar());
-  }
-
-  private static Windmill.Counter getCounter(List<Windmill.Counter> counters, String name) {
-    for (Windmill.Counter counter : counters) {
-      if (counter.getName().equals(name)) {
-        return counter;
-      }
-    }
-    return null;
-  }
-
-  static class PrintFn extends DoFn<ValueWithRecordId<KV<Integer, Integer>>, String> {
-    @Override
-    public void processElement(ProcessContext c) {
-      KV<Integer, Integer> elem = c.element().getValue();
-      c.output(elem.getKey() + ":" + elem.getValue());
-    }
-  }
-
-  @Test
-  public void testUnboundedSources() throws Exception {
-    List<Integer> finalizeTracker = Lists.newArrayList();
-
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    options.setNumWorkers(1);
-
-    List<ParallelInstruction> instructions =
-        Arrays.asList(
-            new ParallelInstruction()
-                .setSystemName("Read")
-                .setRead(
-                    new ReadInstruction()
-                        .setSource(
-                            CustomSources.serializeToCloudSource(
-                                new TestCountingSource(1), options)))
-                .setOutputs(
-                    Arrays.asList(
-                        new InstructionOutput()
-                            .setName("read_output")
-                            .setCodec(
-                                WindowedValue.getFullCoder(
-                                        ValueWithRecordId.ValueWithRecordIdCoder.of(
-                                            KvCoder.of(VarIntCoder.of(), VarIntCoder.of())),
-                                        GlobalWindow.Coder.INSTANCE)
-                                    .asCloudObject()))),
-            makeDoFnInstruction(
-                new PrintFn(), 0, StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE),
-            makeSinkInstruction(StringUtf8Coder.of(), 1, GlobalWindow.Coder.INSTANCE));
-
-    TestCountingSource.setFinalizeTracker(finalizeTracker);
-
-    FakeWindmillServer server = new FakeWindmillServer();
-    StreamingDataflowWorker worker = new StreamingDataflowWorker(
-        Arrays.asList(defaultMapTask(instructions)), server, createTestingPipelineOptions());
-    worker.start();
-
-    // Test new key.
-    server.addWorkToOffer(buildInput(
-        "work {" +
-        "  computation_id: \"computation\"" +
-        "  input_data_watermark: 0" +
-        "  work {" +
-        "    key: \"0000000000000001\"" +
-        "    work_token: 1" +
-        "  }" +
-        "}", null));
-
-    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
-
-    Windmill.WorkItemCommitRequest commit = stripCounters(result.get(1L));
-    UnsignedLong finalizeId =
-        UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
-
-    assertThat(commit,
-        equalTo(setMessagesMetadata(PaneInfo.NO_FIRING,
-            CoderUtils.encodeToByteArray(
-                CollectionCoder.of(GlobalWindow.Coder.INSTANCE),
-                Arrays.asList(GlobalWindow.INSTANCE)),
-            parseCommitRequest(
-                "key: \"0000000000000001\" " +
-                "work_token: 1 " +
-                "output_messages {" +
-                "  destination_stream_id: \"out\"" +
-                "  bundles {" +
-                "    key: \"0000000000000001\"" +
-                "    messages {" +
-                "      timestamp: 0" +
-                "      data: \"0:0\"" +
-                "    }" +
-                "    messages_ids: \"\"" +
-                "  }" +
-                "} " +
-                "source_state_updates {" +
-                "  state: \"\000\"" +
-                "  finalize_ids: " + finalizeId +
-                "} " +
-                "source_watermark: 1000")).build()));
-
-    assertEquals(
-        16L,
-        getCounter(result.get(1L).getCounterUpdatesList(), "dataflow_input_size-computation")
-            .getIntScalar());
-
-    // Test same key continuing.
-    server.addWorkToOffer(buildInput(
-        "work {" +
-        "  computation_id: \"computation\"" +
-        "  input_data_watermark: 0" +
-        "  work {" +
-        "    key: \"0000000000000001\"" +
-        "    work_token: 2" +
-        "    source_state {" +
-        "      state: \"\000\"" +
-        "      finalize_ids: " + finalizeId +
-        "    } " +
-        "  }" +
-        "}", null));
-
-    result = server.waitForAndGetCommits(1);
-
-    commit = stripCounters(result.get(2L));
-    finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
-
-    assertThat(commit,
-        equalTo(parseCommitRequest(
-            "key: \"0000000000000001\" " +
-            "work_token: 2 " +
-            "source_state_updates {" +
-            "  state: \"\000\"" +
-            "  finalize_ids: " + finalizeId +
-            "} " +
-            "source_watermark: 1000").build()));
-
-    assertThat(finalizeTracker, contains(0));
-
-    assertEquals(
-        0L,
-        getCounter(result.get(2L).getCounterUpdatesList(), "dataflow_input_size-computation")
-            .getIntScalar());
-
-    // Test recovery.
-    server.addWorkToOffer(buildInput(
-        "work {" +
-        "  computation_id: \"computation\"" +
-        "  input_data_watermark: 0" +
-        "  work {" +
-        "    key: \"0000000000000002\"" +
-        "    work_token: 3" +
-        "    source_state {" +
-        "      state: \"\005\"" +
-        "    } " +
-        "  }" +
-        "}", null));
-
-    result = server.waitForAndGetCommits(1);
-
-    commit = stripCounters(result.get(3L));
-    finalizeId = UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
-
-    assertThat(commit,
-        equalTo(setMessagesMetadata(PaneInfo.NO_FIRING,
-            CoderUtils.encodeToByteArray(
-                CollectionCoder.of(GlobalWindow.Coder.INSTANCE),
-                Arrays.asList(GlobalWindow.INSTANCE)),
-            parseCommitRequest(
-                "key: \"0000000000000002\" " +
-                "work_token: 3 " +
-                "output_messages {" +
-                "  destination_stream_id: \"out\"" +
-                "  bundles {" +
-                "    key: \"0000000000000002\"" +
-                "    messages {" +
-                "      timestamp: 5000" +
-                "      data: \"1:5\"" +
-                "    }" +
-                "    messages_ids: \"\"" +
-                "  }" +
-                "} " +
-                "source_state_updates {" +
-                "  state: \"\005\"" +
-                "  finalize_ids: " + finalizeId +
-                "} " +
-                "source_watermark: 6000")).build()));
-
-    assertEquals(
-        16L,
-        getCounter(result.get(3L).getCounterUpdatesList(), "dataflow_input_size-computation")
-            .getIntScalar());
-  }
-
-  private static class MockWork extends StreamingDataflowWorker.Work {
-    public MockWork(long workToken) {
-      super(workToken);
-    }
-    @Override
-    public void run() {}
-  }
-
-  @Test
-  public void testActiveWork() throws Exception {
-    BoundedQueueExecutor mockExecutor = Mockito.mock(BoundedQueueExecutor.class);
-    StreamingDataflowWorker.ActiveWorkForComputation activeWork =
-        new StreamingDataflowWorker.ActiveWorkForComputation(mockExecutor);
-
-    ByteString key1 = ByteString.copyFromUtf8("key1");
-    ByteString key2 = ByteString.copyFromUtf8("key2");
-
-    assertEquals(true, activeWork.activateWork(key1, new MockWork(1)));
-    activeWork.completeWork(key1);
-
-    assertEquals(true, activeWork.activateWork(key1, new MockWork(2)));
-    assertEquals(false, activeWork.activateWork(key1, new MockWork(2)));
-    assertEquals(false, activeWork.activateWork(key1, new MockWork(3)));
-    assertEquals(true, activeWork.activateWork(key2, new MockWork(4)));
-    activeWork.completeWork(key2);
-    Mockito.verifyNoMoreInteractions(mockExecutor);
-
-    activeWork.completeWork(key1);
-    Mockito.verify(mockExecutor).forceExecute(Mockito.<Runnable>any());
-    activeWork.completeWork(key1);
-
-    assertEquals(true, activeWork.activateWork(key1, new MockWork(1)));
-    activeWork.completeWork(key1);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
index e8f07830b5d76..b781546b1b2db 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
@@ -25,7 +25,6 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.dataflow.TestCountingSource;
-import com.google.cloud.dataflow.sdk.runners.worker.StreamingDataflowWorker.ReaderCacheEntry;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting;
 import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting.ConstantViewFn;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java
index 961c33025c3d9..9a411f935471c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java
@@ -383,14 +383,14 @@ public void testKeyTokenInvalid() throws Exception {
       watermarkFuture.get();
       fail("Expected KeyTokenInvalidException");
     } catch (Throwable e) {
-      assertTrue(StreamingDataflowWorker.isKeyTokenInvalidException(e));
+      assertTrue(KeyTokenInvalidException.isKeyTokenInvalidException(e));
     }
 
     try {
       listFuture.get();
       fail("Expected KeyTokenInvalidException");
     } catch (Throwable e) {
-      assertTrue(StreamingDataflowWorker.isKeyTokenInvalidException(e));
+      assertTrue(KeyTokenInvalidException.isKeyTokenInvalidException(e));
     }
   }
 
diff --git a/worker/pom.xml b/worker/pom.xml
index 476d3fbb2f931..111301fda7227 100644
--- a/worker/pom.xml
+++ b/worker/pom.xml
@@ -212,19 +212,33 @@
       <classifier>internal-only</classifier>
     </dependency>
 
+    <dependency>
+      <groupId>com.google.cloud.dataflow</groupId>
+      <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>${guava.version}</version>
+    </dependency>
+
     <!-- test dependencies -->
     <dependency>
       <groupId>org.hamcrest</groupId>
       <artifactId>hamcrest-all</artifactId>
       <version>${hamcrest.version}</version>
-      <scope>provided</scope>
+      <scope>test</scope>
     </dependency>
 
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
       <version>${junit.version}</version>
-      <scope>provided</scope>
+      <scope>test</scope>
     </dependency>
 
     <dependency>

From e852da2511ef78c755ffc7afcf9835a502d1bd2d Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Mon, 1 Feb 2016 23:54:25 -0800
Subject: [PATCH 1371/1541] Removes dependency on LegacyReaderIterator from
 more readers

This time, file-based readers, ISM and BigQuery reader.
Only shuffle readers remain.
----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113606351
---
 .../google/cloud/dataflow/sdk/io/AvroIO.java  |  16 +-
 .../cloud/dataflow/sdk/io/BigQueryIO.java     |  15 +-
 .../dataflow/sdk/io/CompressedSource.java     |   1 -
 .../google/cloud/dataflow/sdk/io/TextIO.java  |  10 +-
 .../sdk/runners/worker/AvroByteReader.java    |  36 ++-
 .../sdk/runners/worker/AvroReader.java        |  40 ++-
 .../sdk/runners/worker/BigQueryReader.java    |  33 ++-
 .../sdk/runners/worker/IsmReader.java         | 114 ++++----
 .../worker/LazyMultiReaderIterator.java       |  52 ++--
 .../sdk/runners/worker/ReaderUtils.java       |  74 ++++++
 .../sdk/runners/worker/TextReader.java        |  65 +++--
 .../sdk/util/BigQueryTableRowIterator.java    | 146 +++++------
 .../dataflow/sdk/util/CloudSourceUtils.java   |   3 +-
 .../cloud/dataflow/sdk/util/ReaderUtils.java  |  42 ---
 .../runners/dataflow/CustomSourcesTest.java   |   4 +-
 .../runners/worker/AvroByteReaderTest.java    |  10 +-
 .../sdk/runners/worker/AvroByteSinkTest.java  |   2 +-
 .../sdk/runners/worker/AvroReaderTest.java    |  18 +-
 .../sdk/runners/worker/AvroSinkTest.java      |   2 +-
 .../runners/worker/BigQueryReaderTest.java    |  27 +-
 .../worker/ConcatReaderFactoryTest.java       |   2 +-
 .../sdk/runners/worker/ConcatReaderTest.java  |   2 +-
 .../sdk/runners/worker/IsmReaderTest.java     |  18 +-
 .../sdk/runners/worker/ReaderTestUtils.java   |  43 ---
 .../sdk/runners/worker/TextReaderTest.java    | 246 ++++++++----------
 .../WindmillReaderIteratorBaseTest.java       |   2 +-
 .../util/BigQueryTableRowIteratorTest.java    |  11 +-
 .../dataflow/sdk/util/BigQueryUtilTest.java   |  49 ++--
 .../dataflow/sdk/util/IOFactoryTest.java      |  21 +-
 29 files changed, 573 insertions(+), 531 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderUtils.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReaderUtils.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index 7997d96f4543a..b18e7266e5269 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -24,8 +24,8 @@
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.worker.AvroReader;
 import com.google.cloud.dataflow.sdk.runners.worker.AvroSink;
+import com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
@@ -785,12 +785,16 @@ private static <T> void evaluateReadHelper(
       Read.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
     AvroReader<T> reader = new AvroReader<>(transform.filepattern, null, null,
         (AvroCoder<T>) transform.getDefaultOutputCoder(), context.getPipelineOptions());
-    List<WindowedValue<T>> elems = ReaderUtils.readElemsFromReader(reader);
-    List<ValueWithMetadata<T>> output = new ArrayList<>();
-    for (WindowedValue<T> elem : elems) {
-      output.add(ValueWithMetadata.of(elem));
+    try {
+      List<WindowedValue<T>> elems = ReaderUtils.readAllFromReader(reader);
+      List<ValueWithMetadata<T>> output = new ArrayList<>();
+      for (WindowedValue<T> elem : elems) {
+        output.add(ValueWithMetadata.of(elem));
+      }
+      context.setPCollectionValuesWithMetadata(context.getOutput(transform), output);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
     }
-    context.setPCollectionValuesWithMetadata(context.getOutput(transform), output);
   }
 
   private static <T> void evaluateWriteHelper(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index fbebd52dc2e61..6a56a81a989e7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -57,7 +57,6 @@
 import com.google.cloud.hadoop.util.ApiErrorExtractor;
 import com.google.common.base.MoreObjects;
 import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableList;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
@@ -1422,9 +1421,17 @@ private static void evaluateReadHelper(
       iterator = BigQueryTableRowIterator.fromTable(transform.table, client);
     }
 
-    List<TableRow> elems = ImmutableList.copyOf(iterator);
-    LOG.info("Number of records read from BigQuery: {}", elems.size());
-    context.setPCollection(context.getOutput(transform), elems);
+    try (BigQueryTableRowIterator ignored = iterator) {
+      List<TableRow> elems = new ArrayList<>();
+      iterator.open();
+      while (iterator.advance()) {
+        elems.add(iterator.getCurrent());
+      }
+      LOG.info("Number of records read from BigQuery: {}", elems.size());
+      context.setPCollection(context.getOutput(transform), elems);
+    } catch (IOException | InterruptedException e) {
+      throw new RuntimeException(e);
+    }
   }
 
   private static <K, V> List<V> getOrCreateMapListValue(Map<K, List<V>> map, K key) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
index 5f98732608ead..687fa04ab6984 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
@@ -18,7 +18,6 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.io.FileBasedSource.FileBasedReader;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.common.base.Preconditions;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 4f2f1b8acdc1b..2e95b968a270f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -21,10 +21,10 @@
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils;
 import com.google.cloud.dataflow.sdk.runners.worker.TextReader;
 import com.google.cloud.dataflow.sdk.runners.worker.TextSink;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
@@ -782,8 +782,12 @@ private static <T> void evaluateReadHelper(
     TextReader<T> reader =
         new TextReader<>(transform.filepattern, true, null, null, transform.coder,
             transform.getCompressionType());
-    List<T> elems = ReaderUtils.readElemsFromReader(reader);
-    context.setPCollection(context.getOutput(transform), elems);
+    try {
+      List<T> elems = ReaderUtils.readAllFromReader(reader);
+      context.setPCollection(context.getOutput(transform), elems);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
   }
 
   private static <T> void evaluateWriteHelper(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
index 4f73a0503ab62..418dd05d9c888 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
@@ -27,6 +27,7 @@
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.util.NoSuchElementException;
 
 import javax.annotation.Nullable;
 
@@ -54,27 +55,48 @@ public AvroByteFileIterator iterator() throws IOException {
     return new AvroByteFileIterator();
   }
 
-  class AvroByteFileIterator extends LegacyReaderIterator<T> {
-    private final LegacyReaderIterator<WindowedValue<ByteBuffer>> avroFileIterator;
+  class AvroByteFileIterator extends NativeReaderIterator<T> {
+    private final NativeReaderIterator<WindowedValue<ByteBuffer>> avroFileIterator;
+    private T current;
 
     public AvroByteFileIterator() throws IOException {
       avroFileIterator = avroReader.iterator();
     }
 
     @Override
-    protected boolean hasNextImpl() throws IOException {
-      return avroFileIterator.hasNext();
+    public boolean start() throws IOException {
+      if (!avroFileIterator.start()) {
+        return false;
+      }
+      updateCurrent();
+      return true;
     }
 
     @Override
-    protected T nextImpl() throws IOException {
-      ByteBuffer inBuffer = avroFileIterator.next().getValue();
+    public boolean advance() throws IOException {
+      if (!avroFileIterator.advance()) {
+        return false;
+      }
+      updateCurrent();
+      return true;
+    }
+
+    @Override
+    public T getCurrent() throws NoSuchElementException {
+      if (current == null) {
+        throw new NoSuchElementException();
+      }
+      return current;
+    }
+
+    private void updateCurrent() throws IOException {
+      ByteBuffer inBuffer = avroFileIterator.getCurrent().getValue();
       byte[] encodedElem = new byte[inBuffer.remaining()];
       inBuffer.get(encodedElem);
       assert inBuffer.remaining() == 0;
       inBuffer.clear();
       notifyElementRead(encodedElem.length);
-      return CoderUtils.decodeFromByteArray(coder, encodedElem);
+      current = CoderUtils.decodeFromByteArray(coder, encodedElem);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
index 725ebec9efd57..90452c1f04b32 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
@@ -33,6 +33,7 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.util.NoSuchElementException;
 
 import javax.annotation.Nullable;
 
@@ -98,18 +99,34 @@ public AvroFileIterator iterator() throws IOException {
     return new AvroFileIterator((AvroSource.AvroReader<T>) reader);
   }
 
-  class AvroFileIterator extends LegacyReaderIterator<WindowedValue<T>> {
-    final AvroSource.AvroReader<T> reader;
-    boolean hasStarted = false;
-    long blockOffset = -1;
+  class AvroFileIterator extends NativeReaderIterator<WindowedValue<T>> {
+    private final AvroSource.AvroReader<T> reader;
+    private long blockOffset = -1;
+    private WindowedValue<T> current;
 
     public AvroFileIterator(AvroSource.AvroReader<T> reader) {
       this.reader = reader;
     }
 
     @Override
-    protected WindowedValue<T> nextImpl() throws IOException {
-      T next = reader.getCurrent();
+    public boolean start() throws IOException {
+      if (!reader.start()) {
+        return false;
+      }
+      updateBlockOffsetAndCurrent();
+      return true;
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      if (!reader.advance()) {
+        return false;
+      }
+      updateBlockOffsetAndCurrent();
+      return true;
+    }
+
+    private void updateBlockOffsetAndCurrent() {
       // Coarse-grained reporting of input bytes consumed.
       // After completing reading a block, the block offset changes.
       long currentOffset = reader.getCurrentBlockOffset();
@@ -117,16 +134,15 @@ protected WindowedValue<T> nextImpl() throws IOException {
         notifyElementRead(reader.getCurrentBlockSize());
         blockOffset = currentOffset;
       }
-      return WindowedValue.valueInGlobalWindow(next);
+      current = WindowedValue.valueInGlobalWindow(reader.getCurrent());
     }
 
     @Override
-    protected boolean hasNextImpl() throws IOException {
-      if (!hasStarted) {
-        hasStarted = true;
-        return reader.start();
+    public WindowedValue<T> getCurrent() throws NoSuchElementException {
+      if (current == null) {
+        throw new NoSuchElementException();
       }
-      return reader.advance();
+      return current;
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
index de726712a2678..975bf382187cb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
@@ -27,6 +27,7 @@
 import com.google.common.annotations.VisibleForTesting;
 
 import java.io.IOException;
+import java.util.NoSuchElementException;
 
 import javax.annotation.Nullable;
 
@@ -92,8 +93,9 @@ public BigQueryReaderIterator iterator() throws IOException {
    * A ReaderIterator that yields TableRow objects for each row of a BigQuery table.
    */
   @VisibleForTesting
-  static class BigQueryReaderIterator extends LegacyReaderIterator<WindowedValue<TableRow>> {
+  static class BigQueryReaderIterator extends NativeReaderIterator<WindowedValue<TableRow>> {
     private BigQueryTableRowIterator rowIterator;
+    private WindowedValue<TableRow> current;
 
     public BigQueryReaderIterator(TableReference tableRef, Bigquery bigQueryClient) {
       rowIterator = BigQueryTableRowIterator.fromTable(tableRef, bigQueryClient);
@@ -106,13 +108,34 @@ public BigQueryReaderIterator(String query, String projectId, Bigquery bigQueryC
     }
 
     @Override
-    protected boolean hasNextImpl() {
-      return rowIterator.hasNext();
+    public boolean start() throws IOException {
+      try {
+        rowIterator.open();
+      } catch (InterruptedException e) {
+        throw new IOException(e);
+      }
+      return advance();
     }
 
     @Override
-    protected WindowedValue<TableRow> nextImpl() throws IOException {
-      return WindowedValue.valueInGlobalWindow(rowIterator.next());
+    public boolean advance() throws IOException {
+      try {
+        if (!rowIterator.advance()) {
+          return false;
+        }
+      } catch (InterruptedException e) {
+        throw new IOException(e);
+      }
+      current = WindowedValue.valueInGlobalWindow(rowIterator.getCurrent());
+      return true;
+    }
+
+    @Override
+    public WindowedValue<TableRow> getCurrent() throws NoSuchElementException {
+      if (current == null) {
+        throw new NoSuchElementException();
+      }
+      return current;
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java
index 2d0458437a4ce..88c4f54f7a3c9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java
@@ -34,7 +34,6 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableSortedMap;
 
 import java.io.IOException;
@@ -113,9 +112,9 @@ public KV<K, V> get(K k) throws IOException {
                   RandomAccessDataCoder.of(),
                   valueCoder,
                   footer.getBloomFilterPosition())) {
-        while (iterator.hasNext()) {
-          long startPosition = inChannel.position();
-          KV<RandomAccessData, V> next = iterator.next();
+        long startPosition = inChannel.position();
+        for (boolean more = iterator.start(); more; more = iterator.advance()) {
+          KV<RandomAccessData, V> next = iterator.getCurrent();
           int comparison = RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(
               next.getKey(), keyBytes);
           // If the current key is greater then the requested key, this Ism file does not contain
@@ -126,6 +125,7 @@ public KV<K, V> get(K k) throws IOException {
             notifyElementRead(inChannel.position() - startPosition);
             return KV.of(k, next.getValue());
           }
+          startPosition = inChannel.position();
         }
       }
       // We hit the end of the file, therefore this Ism file does not contain the key.
@@ -156,49 +156,50 @@ private synchronized void initializeFooter(SeekableByteChannel in) throws IOExce
    * A {@link NativeReaderIterator
    * Reader.ReaderIterator} which initializes its input stream lazily.
    */
-  private class LazyIsmReaderIterator extends LegacyReaderIterator<KV<K, V>> {
+  private class LazyIsmReaderIterator extends NativeReaderIterator<KV<K, V>> {
     private IsmReaderIterator<K, V> delegate;
     private SeekableByteChannel inChannel;
 
     @Override
-    public boolean hasNextImpl() throws IOException {
-      return getDelegate().hasNext();
+    public boolean start() throws IOException {
+      inChannel = openConnection(filename);
+      initializeFooter(inChannel);
+      delegate =
+          new IsmReaderIterator<>(
+              inChannel,
+              new RandomAccessData(),
+              keyCoder,
+              valueCoder,
+              footer.getBloomFilterPosition());
+
+      long startPosition = inChannel.position();
+      boolean rval = delegate.start();
+      notifyElementRead(inChannel.position() - startPosition);
+      return rval;
     }
 
     @Override
-    public KV<K, V> nextImpl() throws IOException, NoSuchElementException {
-      long startPosition = getChannel().position();
-      KV<K, V> rval = getDelegate().next();
-      notifyElementRead(getChannel().position() - startPosition);
+    public boolean advance() throws IOException {
+      checkState(delegate != null, "unstarted");
+      long startPosition = inChannel.position();
+      boolean rval = delegate.advance();
+      notifyElementRead(inChannel.position() - startPosition);
       return rval;
     }
 
     @Override
-    public void close() throws IOException {
-      inChannel.close();
-    }
-
-    /**
-     * Return a reader, caching the creation on the first call.
-     */
-    private IsmReaderIterator<K, V> getDelegate() throws IOException {
+    public KV<K, V> getCurrent() {
+      // By the time getCurrent() is called, delegate should already be created by
+      // a start() call.
       if (delegate == null) {
-        inChannel = getChannel();
-        initializeFooter(inChannel);
-        delegate = new IsmReaderIterator<>(inChannel, new RandomAccessData(),
-            keyCoder, valueCoder, footer.getBloomFilterPosition());
+        throw new NoSuchElementException();
       }
-      return delegate;
+      return delegate.getCurrent();
     }
 
-    /**
-     * Return a connection, caching the creation on the first call.
-     */
-    private SeekableByteChannel getChannel() throws IOException {
-      if (inChannel == null) {
-        inChannel = openConnection(filename);
-      }
-      return inChannel;
+    @Override
+    public void close() throws IOException {
+      inChannel.close();
     }
   }
 
@@ -207,21 +208,26 @@ private SeekableByteChannel getChannel() throws IOException {
    * Reader.ReaderIterator} for Ism formatted files which returns a sequence of
    * {@code KV<K, V>}'s read from a {@link SeekableByteChannel}.
    */
-  private static class IsmReaderIterator<K, V> extends LegacyReaderIterator<KV<K, V>> {
+  private static class IsmReaderIterator<K, V> extends NativeReaderIterator<KV<K, V>> {
     private final SeekableByteChannel inChannel;
     private final InputStream inStream;
     private final RandomAccessData currentKeyBytes;
     private final Coder<K> keyCoder;
     private final Coder<V> valueCoder;
     private final long readLimit;
+    private KV<K, V> current;
 
     /**
      * Start an initialized reader that will start from the given key. This reader iterator does
      * not own the channel and the caller must ensure that it is closed.
      */
-    public IsmReaderIterator(SeekableByteChannel unownedChannel,
-        RandomAccessData currentKeyBytes, Coder<K> keyCoder, Coder<V> valueCoder, long readLimit)
-            throws IOException {
+    public IsmReaderIterator(
+        SeekableByteChannel unownedChannel,
+        RandomAccessData currentKeyBytes,
+        Coder<K> keyCoder,
+        Coder<V> valueCoder,
+        long readLimit)
+        throws IOException {
       checkNotNull(unownedChannel);
       checkNotNull(currentKeyBytes);
       checkNotNull(keyCoder);
@@ -240,15 +246,16 @@ public IsmReaderIterator(SeekableByteChannel unownedChannel,
     }
 
     @Override
-    public boolean hasNextImpl() throws IOException {
-      if (inChannel.position() > readLimit) {
-        throw new IllegalStateException("Read past end of stream");
-      }
-      return inChannel.position() < readLimit;
+    public boolean start() throws IOException {
+      return advance();
     }
 
     @Override
-    public KV<K, V> nextImpl() throws IOException, NoSuchElementException {
+    public boolean advance() throws IOException {
+      if (inChannel.position() >= readLimit) {
+        current = null;
+        return false;
+      }
       KeyPrefix keyPrefix = KeyPrefixCoder.of().decode(inStream, Context.NESTED);
       int totalKeyLength = keyPrefix.getSharedKeySize() + keyPrefix.getUnsharedKeySize();
       // currentKey = prevKey[0 : sharedKeySize] + read(unsharedKeySize)
@@ -258,7 +265,16 @@ public KV<K, V> nextImpl() throws IOException, NoSuchElementException {
           keyPrefix.getUnsharedKeySize() /* read unsharedKeySize bytes from the stream */);
       K key = keyCoder.decode(currentKeyBytes.asInputStream(0, totalKeyLength), Context.OUTER);
       V value = valueCoder.decode(inStream, Context.NESTED);
-      return KV.of(key, value);
+      current = KV.of(key, value);
+      return true;
+    }
+
+    @Override
+    public KV<K, V> getCurrent() {
+      if (current == null) {
+        throw new NoSuchElementException();
+      }
+      return current;
     }
   }
 
@@ -286,7 +302,7 @@ private SeekableByteChannel initializeForKeyedRead() throws IOException {
     // The index follows the bloom filter directly, so we do not need to do a seek here.
     // This is an optimization.
     @SuppressWarnings("resource")
-    LegacyReaderIterator<KV<RandomAccessData, Long>> iterator =
+    NativeReaderIterator<KV<RandomAccessData, Long>> iterator =
         new IsmReaderIterator<RandomAccessData, Long>(
             inChannel,
             new RandomAccessData(),
@@ -297,8 +313,8 @@ private SeekableByteChannel initializeForKeyedRead() throws IOException {
         ImmutableSortedMap.orderedBy(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR);
 
     // Read the index into memory.
-    while (iterator.hasNext()) {
-      KV<RandomAccessData, Long> next = iterator.next();
+    for (boolean more = iterator.start(); more; more = iterator.advance()) {
+      KV<RandomAccessData, Long> next = iterator.getCurrent();
       builder.put(next.getKey(), next.getValue());
     }
     index = builder.build();
@@ -310,9 +326,11 @@ private SeekableByteChannel initializeForKeyedRead() throws IOException {
    */
   private static SeekableByteChannel openConnection(String filename) throws IOException {
     ReadableByteChannel channel = IOChannelUtils.getFactory(filename).open(filename);
-    Preconditions.checkArgument(channel instanceof SeekableByteChannel,
+    checkArgument(
+        channel instanceof SeekableByteChannel,
         "IsmReader requires a SeekableByteChannel for path %s but received %s.",
-        filename, channel);
+        filename,
+        channel);
     return (SeekableByteChannel) channel;
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
index 96432eda548cd..4a35fa6f709b5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
@@ -20,6 +20,7 @@
 
 import java.io.IOException;
 import java.util.Iterator;
+import java.util.NoSuchElementException;
 
 /**
  * Implements a ReaderIterator over a collection of inputs.
@@ -32,50 +33,55 @@
  * to be produced lazily, as an open source iterator may consume process
  * resources such as file descriptors.
  */
-abstract class LazyMultiReaderIterator<T> extends NativeReader.LegacyReaderIterator<T> {
+abstract class LazyMultiReaderIterator<T> extends NativeReader.NativeReaderIterator<T> {
   private final Iterator<String> inputs;
-  NativeReader.LegacyReaderIterator<T> current;
+  private NativeReader.NativeReaderIterator<T> current;
 
   public LazyMultiReaderIterator(Iterator<String> inputs) {
     this.inputs = inputs;
   }
 
   @Override
-  protected boolean hasNextImpl() throws IOException {
-    while (selectReader()) {
-      if (!current.hasNext()) {
+  public boolean start() throws IOException {
+    return advance();
+  }
+
+  @Override
+  public boolean advance() throws IOException {
+    boolean currentStarted = true;
+    while (true) {
+      // Try moving through the current reader
+      if (current != null) {
+        if (currentStarted ? current.advance() : current.start()) {
+          return true;
+        }
         current.close();
         current = null;
-      } else {
-        return true;
       }
+      // Current reader is done - move on to the next one.
+      if (!inputs.hasNext()) {
+        return false;
+      }
+      current = open(inputs.next());
+      currentStarted = false;
     }
-    return false;
   }
 
   @Override
-  protected T nextImpl() throws IOException {
-    return current.next();
+  public T getCurrent() throws NoSuchElementException {
+    if (current == null) {
+      throw new NoSuchElementException();
+    }
+    return current.getCurrent();
   }
 
   @Override
   public void close() throws IOException {
-    while (selectReader()) {
+    if (current != null) {
       current.close();
       current = null;
     }
   }
 
-  protected abstract NativeReader.LegacyReaderIterator<T> open(String input) throws IOException;
-
-  boolean selectReader() throws IOException {
-    if (current != null) {
-      return true;
-    }
-    if (inputs.hasNext()) {
-      current = open(inputs.next());
-      return true;
-    }
-    return false;
-  }
+  protected abstract NativeReader.NativeReaderIterator<T> open(String input) throws IOException;
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderUtils.java
new file mode 100644
index 0000000000000..3e065f40ddd39
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderUtils.java
@@ -0,0 +1,74 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Utilities for working with {@link NativeReader} objects.
+ */
+public class ReaderUtils {
+  /**
+   * Creates a {@link NativeReader.NativeReaderIterator} from the given {@link NativeReader} and
+   * reads it to the end.
+   *
+   * @param reader {@link NativeReader} to read from
+   */
+  public static <T> List<T> readAllFromReader(NativeReader<T> reader) throws IOException {
+    try (NativeReader.NativeReaderIterator<T> iterator = reader.iterator()) {
+      return readRemainingFromIterator(iterator, false);
+    }
+  }
+
+  /**
+   * Read elements from a {@link NativeReader.NativeReaderIterator} until either the reader is
+   * exhausted, or {@code n} elements are read. Specifying {@code n == Integer.MAX_VALUE} means
+   * read all remaining elements.
+   */
+  public static <T> List<T> readNItemsFromIterator(
+      NativeReader.NativeReaderIterator<T> reader, int n, boolean started) throws IOException {
+    List<T> res = new ArrayList<>();
+    for (long i = 0; n == Integer.MAX_VALUE || i < n; i++) {
+      if (!((i == 0 && !started) ? reader.start() : reader.advance())) {
+        break;
+      }
+      res.add(reader.getCurrent());
+    }
+    return res;
+  }
+
+  /**
+   * Read elements from a {@link NativeReader.NativeReaderIterator} until either the reader is
+   * exhausted, or n elements are read. Specifying {@code n == Integer.MAX_VALUE} means
+   * read all remaining elements.
+   */
+  public static <T> List<T> readNItemsFromUnstartedIterator(
+      NativeReader.NativeReaderIterator<T> reader, int n) throws IOException {
+    return readNItemsFromIterator(reader, n, false);
+  }
+
+  /**
+   * Read elements from a {@link NativeReader.NativeReaderIterator} until the reader is exhausted.
+   */
+  public static <T> List<T> readRemainingFromIterator(
+      NativeReader.NativeReaderIterator<T> reader, boolean started) throws IOException {
+    return readNItemsFromIterator(reader, Integer.MAX_VALUE, started);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
index fe681aaa75952..cda7fedbf1970 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
@@ -48,6 +48,7 @@
 import java.nio.channels.SeekableByteChannel;
 import java.util.Collection;
 import java.util.Iterator;
+import java.util.NoSuchElementException;
 
 import javax.annotation.Nullable;
 
@@ -117,24 +118,14 @@ private double getTotalParallelismUnsplittable() throws IOException {
     return expandedFilepattern().size();
   }
 
-  private LegacyReaderIterator<T> newReaderIteratorForRangeInFile(
+  private NativeReaderIterator<T> newReaderIteratorForRangeInFile(
       IOChannelFactory factory, String oneFile, long startPosition, @Nullable Long endPosition)
-          throws IOException {
-    // Position before the first record, so we can find the record beginning.
-    final long start = startPosition > 0 ? startPosition - 1 : 0;
-
-    TextFileIterator iterator = newReaderIteratorForRangeWithStrictStart(
-        factory, oneFile, stripTrailingNewlines, start, endPosition);
-
-    // Skip the initial record if start position was set.
-    if (startPosition > 0) {
-      iterator.hasNextImpl();
-    }
-
-    return iterator;
+      throws IOException {
+    return newReaderIteratorForRangeWithStrictStart(
+        factory, oneFile, stripTrailingNewlines, startPosition, endPosition);
   }
 
-  private LegacyReaderIterator<T> newReaderIteratorForFiles(
+  private NativeReaderIterator<T> newReaderIteratorForFiles(
       IOChannelFactory factory, Collection<String> files) throws IOException {
     if (files.size() == 1) {
       return newReaderIteratorForFile(factory, files.iterator().next(), stripTrailingNewlines);
@@ -178,7 +169,7 @@ private Collection<String> expandedFilepattern() throws IOException {
   }
 
   @Override
-  public LegacyReaderIterator<T> iterator() throws IOException {
+  public NativeReaderIterator<T> iterator() throws IOException {
     IOChannelFactory factory = IOChannelUtils.getFactory(filepattern);
     Collection<String> inputs = expandedFilepattern();
     if (inputs.isEmpty()) {
@@ -264,32 +255,34 @@ public TextFileMultiIterator(
     }
 
     @Override
-    protected LegacyReaderIterator<T> open(String input) throws IOException {
+    protected NativeReaderIterator<T> open(String input) throws IOException {
       return newReaderIteratorForFile(factory, input, stripTrailingNewlines);
     }
   }
 
-  class TextFileIterator extends LegacyReaderIterator<T> {
+  class TextFileIterator extends NativeReaderIterator<T> {
     private final CopyableSeekableByteChannel seeker;
     private final PushbackInputStream stream;
     private final OffsetRangeTracker rangeTracker;
     private final ProgressTracker<Integer> progressTracker;
+    private final long startOffset;
     private long offset;
-    private ByteArrayOutputStream nextElement;
+    private T current;
     private ScanState state;
 
     TextFileIterator(CopyableSeekableByteChannel seeker, boolean stripTrailingNewlines,
         long startOffset, @Nullable Long endOffset,
         DecompressingStreamFactory compressionStreamFactory) throws IOException {
+      this.offset = Math.max(0, startOffset - 1);
       this.seeker = checkNotNull(seeker);
-      this.seeker.position(startOffset);
+      this.seeker.position(offset);
       InputStream inputStream =
           compressionStreamFactory.createInputStream(Channels.newInputStream(seeker));
       BufferedInputStream bufferedStream = new BufferedInputStream(inputStream);
       this.stream = new PushbackInputStream(bufferedStream, BUF_SIZE);
       long stopOffset = (endOffset == null) ? OffsetRangeTracker.OFFSET_INFINITY : endOffset;
+      this.startOffset = startOffset;
       this.rangeTracker = new OffsetRangeTracker(startOffset, stopOffset);
-      this.offset = startOffset;
       this.progressTracker = checkNotNull(new ProgressTrackerGroup<Integer>() {
             @Override
             protected void report(Integer lineLength) {
@@ -312,6 +305,7 @@ protected void report(Integer lineLength) {
      *     been reached.
      * @throws IOException if an I/O error occurs
      */
+    @Nullable
     protected ByteArrayOutputStream readElement() throws IOException {
       ByteArrayOutputStream buffer = new ByteArrayOutputStream(BUF_SIZE);
 
@@ -345,21 +339,38 @@ protected ByteArrayOutputStream readElement() throws IOException {
     }
 
     @Override
-    protected boolean hasNextImpl() throws IOException {
+    public boolean start() throws IOException {
+      if (this.startOffset > 0) {
+        long savedOffset = offset;
+        // Skip initial partial line.
+        if (readElement() == null) {
+          return false;
+        } else {
+          progressTracker.saw((int) (offset - savedOffset));
+        }
+      }
+      return advance();
+    }
+
+    @Override
+    public boolean advance() throws IOException {
       long startOffset = offset;
       ByteArrayOutputStream element = readElement(); // As a side effect, updates "offset"
       if (element != null && rangeTracker.tryReturnRecordAt(true, startOffset)) {
-        nextElement = element;
+        current = CoderUtils.decodeFromByteArray(coder, element.toByteArray());
         progressTracker.saw((int) (offset - startOffset));
       } else {
-        nextElement = null;
+        current = null;
       }
-      return nextElement != null;
+      return current != null;
     }
 
     @Override
-    protected T nextImpl() throws IOException {
-      return CoderUtils.decodeFromByteArray(coder, nextElement.toByteArray());
+    public T getCurrent() throws NoSuchElementException {
+      if (current == null) {
+        throw new NoSuchElementException();
+      }
+      return current;
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
index b867c27434f46..1a9f7869c19f8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
@@ -52,7 +52,6 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.Closeable;
 import java.io.IOException;
 import java.util.Collection;
 import java.util.Collections;
@@ -68,7 +67,7 @@
 /**
  * Iterates over all rows in a table.
  */
-public class BigQueryTableRowIterator implements Iterator<TableRow>, Closeable {
+public class BigQueryTableRowIterator implements AutoCloseable {
   private static final Logger LOG = LoggerFactory.getLogger(BigQueryTableRowIterator.class);
 
   @Nullable private TableReference ref;
@@ -76,7 +75,8 @@ public class BigQueryTableRowIterator implements Iterator<TableRow>, Closeable {
   @Nullable private TableSchema schema;
   private final Bigquery client;
   private String pageToken;
-  private Iterator<TableRow> rowIterator;
+  private Iterator<TableRow> iteratorOverCurrentBatch;
+  private TableRow current;
   // Set true when the final page is seen from the service.
   private boolean lastPage = false;
 
@@ -129,17 +129,71 @@ public static BigQueryTableRowIterator fromQuery(
         MoreObjects.firstNonNull(flattenResults, Boolean.TRUE));
   }
 
-  @Override
-  public boolean hasNext() {
-    try {
-      if (rowIterator == null || (!rowIterator.hasNext() && !lastPage)) {
-        readNext();
+  /**
+   * Opens the table for read.
+   * @throws IOException on failure
+   */
+  public void open() throws IOException, InterruptedException {
+    if (query != null) {
+      ref = executeQueryAndWaitForCompletion();
+    }
+    // Get table schema.
+    Bigquery.Tables.Get get =
+        client.tables().get(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
+
+    Table table =
+        executeWithBackOff(
+            get,
+            "Error opening BigQuery table  %s of dataset %s  : {}",
+            ref.getTableId(),
+            ref.getDatasetId());
+    schema = table.getSchema();
+  }
+
+  public boolean advance() throws IOException, InterruptedException {
+    while (true) {
+      if (iteratorOverCurrentBatch != null && iteratorOverCurrentBatch.hasNext()) {
+        // Embed schema information into the raw row, so that values have an
+        // associated key.  This matches how rows are read when using the
+        // DataflowPipelineRunner.
+        current = getTypedTableRow(schema.getFields(), iteratorOverCurrentBatch.next());
+        return true;
+      }
+      if (lastPage) {
+        return false;
+      }
+
+      Bigquery.Tabledata.List list =
+          client.tabledata().list(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
+      if (pageToken != null) {
+        list.setPageToken(pageToken);
+      }
+
+      TableDataList result =
+          executeWithBackOff(
+              list,
+              "Error reading from BigQuery table %s of dataset %s : {}",
+              ref.getTableId(),
+              ref.getDatasetId());
+
+      pageToken = result.getPageToken();
+      iteratorOverCurrentBatch =
+          result.getRows() != null
+              ? result.getRows().iterator()
+              : Collections.<TableRow>emptyIterator();
+
+      // The server may return a page token indefinitely on a zero-length table.
+      if (pageToken == null || result.getTotalRows() != null && result.getTotalRows() == 0) {
+        lastPage = true;
       }
-    } catch (IOException | InterruptedException e) {
-      throw new RuntimeException(e);
     }
+  }
 
-    return rowIterator.hasNext();
+  public TableRow getCurrent() {
+    if (current == null) {
+      throw new NoSuchElementException();
+    }
+    return current;
   }
 
   /**
@@ -310,23 +364,6 @@ private TableRow getTypedTableRow(List<TableFieldSchema> fields, Map<String, Obj
     return row;
   }
 
-  @Override
-  public TableRow next() {
-    if (!hasNext()) {
-      throw new NoSuchElementException();
-    }
-
-    // Embed schema information into the raw row, so that values have an
-    // associated key.  This matches how rows are read when using the
-    // DataflowPipelineRunner.
-    return getTypedTableRow(schema.getFields(), rowIterator.next());
-  }
-
-  @Override
-  public void remove() {
-    throw new UnsupportedOperationException();
-  }
-
   // Create a new BigQuery dataset
   private void createDataset(String datasetId) throws IOException, InterruptedException {
     Dataset dataset = new Dataset();
@@ -446,37 +483,8 @@ public static <T> T executeWithBackOff(AbstractGoogleClientRequest<T> client, St
     return result;
   }
 
-  private void readNext() throws IOException, InterruptedException {
-    if (query != null && ref == null) {
-      ref = executeQueryAndWaitForCompletion();
-    }
-    if (!isOpen()) {
-      open();
-    }
-
-    Bigquery.Tabledata.List list =
-        client.tabledata().list(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
-    if (pageToken != null) {
-      list.setPageToken(pageToken);
-    }
-
-    TableDataList result =
-        executeWithBackOff(list, "Error reading from BigQuery table %s of dataset %s : {}",
-            ref.getTableId(), ref.getDatasetId());
-
-    pageToken = result.getPageToken();
-    rowIterator =
-        result.getRows() != null
-            ? result.getRows().iterator() : Collections.<TableRow>emptyIterator();
-
-    // The server may return a page token indefinitely on a zero-length table.
-    if (pageToken == null || result.getTotalRows() != null && result.getTotalRows() == 0) {
-      lastPage = true;
-    }
-  }
-
   @Override
-  public void close() throws IOException {
+  public void close() {
     // Prevent any further requests.
     lastPage = true;
 
@@ -488,27 +496,9 @@ public void close() throws IOException {
         }
         deleteDataset(temporaryDatasetId);
       }
-    } catch (InterruptedException e) {
-      throw new IOException(e);
+    } catch (IOException | InterruptedException e) {
+      throw new RuntimeException(e);
     }
 
   }
-
-  private boolean isOpen() {
-    return schema != null;
-  }
-
-  /**
-   * Opens the table for read.
-   * @throws IOException on failure
-   */
-  private void open() throws IOException, InterruptedException {
-    // Get table schema.
-    Bigquery.Tables.Get get =
-        client.tables().get(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
-
-    Table table = executeWithBackOff(get, "Error opening BigQuery table  %s of dataset %s  : {}",
-        ref.getTableId(), ref.getDatasetId());
-    schema = table.getSchema();
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
index 326af02af8fe9..4d480106470b8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
@@ -19,6 +19,7 @@
 import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory;
+import com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import java.util.HashMap;
@@ -63,7 +64,7 @@ public static <T> List<T> readElemsFromSource(PipelineOptions options, Source so
       NativeReader<T> reader =
           (NativeReader<T>)
               ReaderFactory.Registry.defaultRegistry().create(source, options, null, null, null);
-      return ReaderUtils.readElemsFromReader(reader);
+      return ReaderUtils.readAllFromReader(reader);
     } catch (Exception e) {
       throw new RuntimeException("Failed to read from source: " + source.toString(), e);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReaderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReaderUtils.java
deleted file mode 100644
index 290d9ddfefc6c..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReaderUtils.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Utilities for working with {@link NativeReader} objects.
- */
-public class ReaderUtils {
-  /**
-   * Reads all elements from the given {@link NativeReader}.
-   */
-  public static <T> List<T> readElemsFromReader(NativeReader<T> reader) {
-    List<T> elems = new ArrayList<>();
-    try (NativeReader.NativeReaderIterator<T> it = reader.iterator()) {
-      for (boolean more = it.start(); more; more = it.advance()) {
-        elems.add(it.getCurrent());
-      }
-    } catch (IOException e) {
-      throw new RuntimeException("Failed to read from reader: " + reader, e);
-    }
-    return elems;
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
index 1387224ddeb7f..a0f19012d3c68 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
@@ -66,6 +66,7 @@
 import com.google.cloud.dataflow.sdk.runners.worker.DataflowApiUtils;
 import com.google.cloud.dataflow.sdk.runners.worker.ReaderCacheEntry;
 import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory;
+import com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils;
 import com.google.cloud.dataflow.sdk.runners.worker.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
@@ -80,7 +81,6 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
@@ -676,7 +676,7 @@ public void testReadUnboundedReader() throws Exception {
       Instant beforeReading = Instant.now();
       int numReadOnThisIteration = 0;
       for (WindowedValue<ValueWithRecordId<KV<Integer, Integer>>> value :
-          ReaderUtils.readElemsFromReader(reader)) {
+          ReaderUtils.readAllFromReader(reader)) {
         assertEquals(KV.of(0, i), value.getValue().getValue());
         assertArrayEquals(
             encodeToByteArray(KvCoder.of(VarIntCoder.of(), VarIntCoder.of()), KV.of(0, i)),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
index eab3a68ba0cec..babd10a7fd488 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
@@ -16,9 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readAllFromReader;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readNItemsFromUnstartedReader;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readRemainingFromReader;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils.readAllFromReader;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils.readNItemsFromUnstartedIterator;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils.readRemainingFromIterator;
 
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
@@ -230,7 +230,7 @@ private <T> void testRequestDynamicSplitInternal(
     List<T> residualElements = new ArrayList<>();
     try (AvroByteReader<T>.AvroByteFileIterator iterator = reader.iterator()) {
       // Read n elements from the reader
-      primaryElements = readNItemsFromUnstartedReader(iterator, readBeforeSplit);
+      primaryElements = readNItemsFromUnstartedIterator(iterator, readBeforeSplit);
 
       // Request a split at the specified position
       DynamicSplitResult splitResult =
@@ -247,7 +247,7 @@ private <T> void testRequestDynamicSplitInternal(
       }
 
       // Finish reading from the original reader.
-      primaryElements.addAll(readRemainingFromReader(iterator, readBeforeSplit > 0));
+      primaryElements.addAll(readRemainingFromIterator(iterator, readBeforeSplit > 0));
 
       if (splitResult != null) {
         Long splitPosition = ReaderTestUtils.positionFromSplitResult(splitResult).getByteOffset();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
index 10474e711afe5..9f1a4eacc51b7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
@@ -16,7 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readAllFromReader;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils.readAllFromReader;
 
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
index 5ba037b3e84bc..bc56cf56cf75c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
@@ -17,11 +17,11 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromSplitResult;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readAllFromReader;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readNItemsFromUnstartedReader;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readRemainingFromReader;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtFraction;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.windowedValuesToValues;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils.readAllFromReader;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils.readNItemsFromUnstartedIterator;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils.readRemainingFromIterator;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.hamcrest.Matchers.isA;
@@ -31,9 +31,7 @@
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.MimeTypes;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader.DynamicSplitResult;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader.Progress;
 
@@ -171,9 +169,9 @@ private <T> List<T> readElems(String filename, @Nullable Long startOffset,
 
     double progressReported = 0;
     List<T> actualElems = new ArrayList<>();
-    try (NativeReader.LegacyReaderIterator<WindowedValue<T>> iterator = avroReader.iterator()) {
-      while (iterator.hasNext()) {
-        actualElems.add(iterator.next().getValue());
+    try (AvroReader<T>.AvroFileIterator iterator = avroReader.iterator()) {
+      for (boolean more = iterator.start(); more; more = iterator.advance()) {
+        actualElems.add(iterator.getCurrent().getValue());
         double progress = 0.0;
         Progress readerProgress = iterator.getProgress();
         if (readerProgress != null) {
@@ -207,7 +205,7 @@ private <T> void testRequestDynamicSplitInternal(
     try (AvroReader<T>.AvroFileIterator iterator = reader.iterator()) {
       // Read n elements from the reader
       primaryElements =
-          windowedValuesToValues(readNItemsFromUnstartedReader(iterator, readBeforeSplit));
+          windowedValuesToValues(readNItemsFromUnstartedIterator(iterator, readBeforeSplit));
 
       // Request a split at the specified position
       DynamicSplitResult splitResult =
@@ -225,7 +223,7 @@ private <T> void testRequestDynamicSplitInternal(
 
       // Finish reading from the original reader.
       primaryElements.addAll(
-          windowedValuesToValues(readRemainingFromReader(iterator, readBeforeSplit > 0)));
+          windowedValuesToValues(readRemainingFromIterator(iterator, readBeforeSplit > 0)));
 
       if (splitResult != null) {
         Long splitPosition = positionFromSplitResult(splitResult).getByteOffset();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
index 9f005df0a02d3..e881913ed867a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
@@ -16,8 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readAllFromReader;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.windowedValuesToValues;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils.readAllFromReader;
 
 import com.google.cloud.dataflow.sdk.TestUtils;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
index e30413c5a3135..579bb6b1fb87b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
@@ -565,14 +565,15 @@ public void testReadQuery() throws Exception {
     BigQueryReader reader = BigQueryReader.fromQuery(QUERY, PROJECT_ID, bigQueryClient, true);
     BigQueryReader.BigQueryReaderIterator iterator = reader.iterator();
 
-    assertTrue(iterator.hasNext());
-    TableRow row = iterator.next().getValue();
+    assertTrue(iterator.start());
+    TableRow row = iterator.getCurrent().getValue();
 
     assertEquals("Arthur", row.get("name"));
     assertEquals("42", row.get("integer"));
     assertNull(row.getF());
 
-    row = iterator.next().getValue();
+    assertTrue(iterator.advance());
+    row = iterator.getCurrent().getValue();
     assertEquals("Allison", row.get("name"));
     assertEquals("79", row.get("integer"));
     assertNull(row.getF());
@@ -764,9 +765,9 @@ public void testReadTable() throws Exception {
         bigQueryClient);
 
     BigQueryReader.BigQueryReaderIterator iterator = reader.iterator();
-    assertTrue(iterator.hasNext());
+    assertTrue(iterator.start());
 
-    TableRow row = iterator.next().getValue();
+    TableRow row = iterator.getCurrent().getValue();
 
     assertEquals("Arthur", row.get("name"));
     assertEquals("42", row.get("integer"));
@@ -782,7 +783,8 @@ public void testReadTable() throws Exception {
     assertTrue(((List<?>) row.get("repeatedFloat")).isEmpty());
     assertTrue(((List<?>) row.get("repeatedRecord")).isEmpty());
 
-    row = iterator.next().getValue();
+    assertTrue(iterator.advance());
+    row = iterator.getCurrent().getValue();
 
     assertEquals("Allison", row.get("name"));
     assertEquals("79", row.get("integer"));
@@ -807,7 +809,7 @@ public void testReadTable() throws Exception {
     assertEquals(false, nestedRecords.get(1).get("bool"));
     assertNull(nestedRecords.get(1).getF());
 
-    assertFalse(iterator.hasNext());
+    assertFalse(iterator.advance());
 
     verifyTableGet();
     verifyTabledataList();
@@ -898,12 +900,11 @@ public void testReadTableWithFieldF() throws Exception {
         .setProjectId(PROJECT_ID).setDatasetId(DATASET).setTableId(TABLE_WITH_FIELD_F);
     BigQueryReader reader = BigQueryReader.fromTable(tableRef, bigQueryClient);
 
-    BigQueryReader.BigQueryReaderIterator iterator = reader.iterator();
-    assertTrue(iterator.hasNext());
-
-    thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage("BigQueryIO does not support records with columns named f");
+    try (BigQueryReader.BigQueryReaderIterator iterator = reader.iterator()) {
+      thrown.expect(IllegalArgumentException.class);
+      thrown.expectMessage("BigQueryIO does not support records with columns named f");
 
-    iterator.next().getValue();
+      iterator.start();
+    }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
index ede98df74612b..59cfcfe0d8865 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
@@ -16,7 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readAllFromReader;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils.readAllFromReader;
 import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
 import static com.google.cloud.dataflow.sdk.util.Structs.addList;
 import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
index 5f7323d1595fd..5eebb84823062 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
@@ -17,8 +17,8 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromSplitResult;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.readAllFromReader;
 import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtConcatPosition;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils.readAllFromReader;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReaderTest.java
index a99cb98015d49..7ee710ac40583 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReaderTest.java
@@ -133,9 +133,11 @@ static void runTestRead(Iterable<KV<byte[], byte[]>> expectedData, File tmpFile)
     reader.addObserver(observer);
 
     Iterator<KV<byte[], byte[]>> expectedIterator = expectedData.iterator();
-    try (NativeReader.LegacyReaderIterator<KV<byte[], byte[]>> iterator = reader.iterator()) {
-      while (iterator.hasNext() && expectedIterator.hasNext()) {
-        KV<byte[], byte[]> actual = iterator.next();
+    try (NativeReader.NativeReaderIterator<KV<byte[], byte[]>> iterator = reader.iterator()) {
+      for (boolean more = iterator.start();
+          more && expectedIterator.hasNext();
+          more = iterator.advance()) {
+        KV<byte[], byte[]> actual = iterator.getCurrent();
         KV<byte[], byte[]> expectedNext = expectedIterator.next();
         assertArrayEquals(actual.getKey(), expectedNext.getKey());
         assertArrayEquals(actual.getValue(), expectedNext.getValue());
@@ -144,15 +146,15 @@ static void runTestRead(Iterable<KV<byte[], byte[]>> expectedData, File tmpFile)
         assertTrue(actual.getValue().length
             <= observer.getActualSizes().get(observer.getActualSizes().size() - 1));
       }
-      if (iterator.hasNext()) {
-        fail("Read more elements then expected, did not expect: " + iterator.next());
+      if (iterator.advance()) {
+        fail("Read more elements then expected, did not expect: " + iterator.getCurrent());
       } else if (expectedIterator.hasNext()) {
         fail("Read less elements then expected, expected: " + expectedIterator.next());
       }
 
       // Verify that we see a {@link NoSuchElementException} if we attempt to go further.
       try {
-        iterator.next();
+        iterator.getCurrent();
         fail("Expected a NoSuchElementException to have been thrown.");
       } catch (NoSuchElementException expected) {
       }
@@ -188,8 +190,8 @@ static void runTestReadRandomOrder(Iterable<KV<byte[], byte[]>> elements, File t
       assertArrayEquals(actual.getValue(), expectedNext.getValue());
 
       // Verify that the observer saw at least as many bytes as the size of the value.
-      assertTrue(actual.getValue().length
-          <= observer.getActualSizes().get(observer.getActualSizes().size() - 1));
+      List<Integer> actualSizes = observer.getActualSizes();
+      assertTrue(actual.getValue().length <= actualSizes.get(actualSizes.size() - 1));
     }
 
     Iterator<KV<byte[], byte[]>> missingIterator = evenValues.iterator();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
index 7b4ce9be513c1..11a1faddff5ec 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
@@ -27,7 +27,6 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader.NativeReaderIterator;
 
-import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
@@ -147,46 +146,4 @@ public static <T> List<T> windowedValuesToValues(Collection<WindowedValue<T>> wi
     return res;
   }
 
-  /**
-   * Creates a {@link NativeReaderIterator} from the given {@link NativeReader} and reads it
-   * to the end.
-   *
-   * @param reader {@link NativeReader} to read from
-   * @throws IOException
-   */
-  public static <T> List<T> readAllFromReader(NativeReader<T> reader) throws IOException {
-    try (NativeReaderIterator<T> iterator = reader.iterator()) {
-      return readRemainingFromReader(iterator, false);
-    }
-  }
-
-  /**
-   * Read elements from a {@link NativeReader.NativeReaderIterator} until either the reader is
-   * exhausted, or n elements are read.
-   */
-  public static <T> List<T> readNItemsFromReader(
-      NativeReader.NativeReaderIterator<T> reader, int n, boolean started) throws IOException {
-    List<T> res = new ArrayList<>();
-    for (int i = 0; i < n; i++) {
-      if (!((i == 0 && !started) ? reader.start() : reader.advance())) {
-        break;
-      }
-      res.add(reader.getCurrent());
-    }
-    return res;
-  }
-
-  /**
-   * Read elements from a {@link NativeReader.NativeReaderIterator} until either the reader is
-   * exhausted, or n elements are read.
-   */
-  public static <T> List<T> readNItemsFromUnstartedReader(
-      NativeReader.NativeReaderIterator<T> reader, int n) throws IOException {
-    return readNItemsFromReader(reader, n, false);
-  }
-
-  public static <T> List<T> readRemainingFromReader(
-      NativeReader.NativeReaderIterator<T> reader, boolean started) throws IOException {
-    return readNItemsFromReader(reader, Integer.MAX_VALUE, started);
-  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
index ef559e3d309c4..59f494241d5a6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
@@ -23,7 +23,6 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.greaterThan;
-import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNull;
@@ -44,8 +43,8 @@
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.MimeTypes;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader.LegacyReaderIterator;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader.NativeReaderIterator;
+import com.google.common.base.Joiner;
 
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
 import org.junit.Rule;
@@ -180,21 +179,19 @@ public void testUnstrippedNewlinesAtEndOfReadBuffer() throws Exception {
   @Test
   public void testStartPosition() throws Exception {
     File tmpFile = initTestFile();
+    assertEquals(40, tmpFile.length());
 
     {
       TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 13L, null,
           new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
-
-      try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
-        assertEquals("<Second line>\r\n", iterator.next());
-        assertEquals("<Third line>", iterator.next());
-        assertFalse(iterator.hasNext());
-        // The first '1' in the array represents the reading of '\n' between first and
-        // second line, to confirm that we are reading from the beginning of a record.
-        assertEquals(Arrays.asList(1, 15, 12), observer.getActualSizes());
-      }
+      assertEquals(
+          Arrays.asList("<Second line>\r\n", "<Third line>"),
+          ReaderUtils.readAllFromReader(textReader));
+      // The first '1' in the array represents the reading of '\n' between first and
+      // second line, to confirm that we are reading from the beginning of a record.
+      assertEquals(Arrays.asList(1, 15, 12), observer.getActualSizes());
     }
 
     {
@@ -202,28 +199,57 @@ public void testStartPosition() throws Exception {
           new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
+      assertEquals(Arrays.asList("<Third line>"), ReaderUtils.readAllFromReader(textReader));
+      // The first '5' in the array represents the reading of a portion of the second
+      // line, which had to be read to find the beginning of the third line.
+      assertEquals(Arrays.asList(5, 12), observer.getActualSizes());
+    }
 
-      try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
-        assertEquals("<Third line>", iterator.next());
-        assertFalse(iterator.hasNext());
-        // The first '5' in the array represents the reading of a portion of the second
-        // line, which had to be read to find the beginning of the third line.
-        assertEquals(Arrays.asList(5, 12), observer.getActualSizes());
-      }
+    {
+      TextReader<String> textReader =
+          new TextReader<>(
+              tmpFile.getPath(),
+              false,
+              32L,
+              null,
+              new WholeLineVerifyingCoder(),
+              TextIO.CompressionType.UNCOMPRESSED);
+      ExecutorTestUtils.TestReaderObserver observer =
+          new ExecutorTestUtils.TestReaderObserver(textReader);
+      assertEquals(Arrays.asList(), ReaderUtils.readAllFromReader(textReader));
+      assertEquals(Arrays.asList(9), observer.getActualSizes());
     }
 
     {
-      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), true, 0L, 22L,
-          new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
+      TextReader<String> textReader =
+          new TextReader<>(
+              tmpFile.getPath(),
+              false,
+              41L,
+              null,
+              new WholeLineVerifyingCoder(),
+              TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
+      assertEquals(Arrays.asList(), ReaderUtils.readAllFromReader(textReader));
+      assertEquals(Arrays.asList(), observer.getActualSizes());
+    }
 
-      try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
-        assertEquals("<First line>", iterator.next());
-        assertEquals("<Second line>", iterator.next());
-        assertFalse(iterator.hasNext());
-        assertEquals(Arrays.asList(13, 15), observer.getActualSizes());
-      }
+    {
+      TextReader<String> textReader =
+          new TextReader<>(
+              tmpFile.getPath(),
+              true,
+              0L,
+              22L,
+              new WholeLineVerifyingCoder(),
+              TextIO.CompressionType.UNCOMPRESSED);
+      ExecutorTestUtils.TestReaderObserver observer =
+          new ExecutorTestUtils.TestReaderObserver(textReader);
+      assertEquals(
+          Arrays.asList("<First line>", "<Second line>"),
+          ReaderUtils.readAllFromReader(textReader));
+      assertEquals(Arrays.asList(13, 15), observer.getActualSizes());
     }
 
     {
@@ -231,14 +257,10 @@ public void testStartPosition() throws Exception {
           new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
-
-      try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
-        assertEquals("<Second line>", iterator.next());
-        assertFalse(iterator.hasNext());
-        // The first '13' in the array represents the reading of the entire first
-        // line, which had to be read to find the beginning of the second line.
-        assertEquals(Arrays.asList(13, 15), observer.getActualSizes());
-      }
+      assertEquals(Arrays.asList("<Second line>"), ReaderUtils.readAllFromReader(textReader));
+      // The first '13' in the array represents the reading of the entire first
+      // line, which had to be read to find the beginning of the second line.
+      assertEquals(Arrays.asList(13, 15), observer.getActualSizes());
     }
   }
 
@@ -260,12 +282,8 @@ public void testUtf8Handling() throws Exception {
           StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
-
-      try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
-        assertArrayEquals("€".getBytes("UTF-8"), iterator.next().getBytes("UTF-8"));
-        assertFalse(iterator.hasNext());
-        assertEquals(Arrays.asList(4), observer.getActualSizes());
-      }
+      assertEquals(Arrays.asList("€"), ReaderUtils.readAllFromReader(textReader));
+      assertEquals(Arrays.asList(4), observer.getActualSizes());
     }
 
     {
@@ -275,14 +293,10 @@ public void testUtf8Handling() throws Exception {
           StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
       ExecutorTestUtils.TestReaderObserver observer =
           new ExecutorTestUtils.TestReaderObserver(textReader);
-
-      try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
-        assertArrayEquals("¢".getBytes("UTF-8"), iterator.next().getBytes("UTF-8"));
-        assertFalse(iterator.hasNext());
-        // The first '3' in the array represents the reading of a portion of the first
-        // line, which had to be read to find the beginning of the second line.
-        assertEquals(Arrays.asList(3, 3), observer.getActualSizes());
-      }
+      assertEquals(Arrays.asList("¢"), ReaderUtils.readAllFromReader(textReader));
+      // The first '3' in the array represents the reading of a portion of the first
+      // line, which had to be read to find the beginning of the second line.
+      assertEquals(Arrays.asList(3, 3), observer.getActualSizes());
     }
   }
 
@@ -302,14 +316,7 @@ private void testNewlineHandling(String separator, boolean stripNewlines) throws
         StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
     ExecutorTestUtils.TestReaderObserver observer =
         new ExecutorTestUtils.TestReaderObserver(textReader);
-
-    List<String> actual = new ArrayList<>();
-    try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
-      while (iterator.hasNext()) {
-        actual.add(iterator.next());
-      }
-    }
-
+    List<String> actual = ReaderUtils.readAllFromReader(textReader);
     if (stripNewlines) {
       assertEquals(expected, actual);
     } else {
@@ -337,13 +344,7 @@ private void testStringPayload(String[] lines, String separator, boolean stripNe
 
     TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), stripNewlines, null, null,
         StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
-    List<String> actual = new ArrayList<>();
-    try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
-      while (iterator.hasNext()) {
-        actual.add(iterator.next());
-      }
-    }
-    assertEquals(expected, actual);
+    assertEquals(expected, ReaderUtils.readAllFromReader(textReader));
   }
 
   @Test
@@ -364,15 +365,7 @@ public void testNonStringCoders() throws Exception {
         TextualIntegerCoder.of(), TextIO.CompressionType.UNCOMPRESSED);
     ExecutorTestUtils.TestReaderObserver observer =
         new ExecutorTestUtils.TestReaderObserver(textReader);
-
-    List<Integer> actual = new ArrayList<>();
-    try (LegacyReaderIterator<Integer> iterator = textReader.iterator()) {
-      while (iterator.hasNext()) {
-        actual.add(iterator.next());
-      }
-    }
-
-    assertEquals(expected, actual);
+    assertEquals(expected, ReaderUtils.readAllFromReader(textReader));
     assertEquals(expectedSizes, observer.getActualSizes());
   }
 
@@ -382,22 +375,22 @@ public void testGetProgressNoEndOffset() throws Exception {
     TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 0L, null,
         new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
 
-    try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
+    try (NativeReaderIterator<String> iterator = textReader.iterator()) {
       ApproximateReportedProgress progress = readerProgressToCloudProgress(iterator.getProgress());
       assertEquals(0L, progress.getPosition().getByteOffset().longValue());
-      iterator.next();
+      assertTrue(iterator.start());
       progress = readerProgressToCloudProgress(iterator.getProgress());
       assertEquals(13L, progress.getPosition().getByteOffset().longValue());
-      iterator.next();
+      assertTrue(iterator.advance());
       progress = readerProgressToCloudProgress(iterator.getProgress());
       assertEquals(28L, progress.getPosition().getByteOffset().longValue());
       // Since end position is not specified, percentComplete should be null.
       assertNull(progress.getFractionConsumed());
 
-      iterator.next();
+      assertTrue(iterator.advance());
       progress = readerProgressToCloudProgress(iterator.getProgress());
       assertEquals(40L, progress.getPosition().getByteOffset().longValue());
-      assertFalse(iterator.hasNext());
+      assertFalse(iterator.advance());
     }
   }
 
@@ -407,17 +400,17 @@ public void testGetProgressWithEndOffset() throws Exception {
     TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 0L, 40L,
         new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
 
-    try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
-      iterator.next();
+    try (NativeReaderIterator<String> iterator = textReader.iterator()) {
+      assertTrue(iterator.start());
       ApproximateReportedProgress progress = readerProgressToCloudProgress(iterator.getProgress());
       // Returned a record that starts at position 0 of 40 - 1/40 fraction consumed.
       assertEquals(1.0 / 40, progress.getFractionConsumed(), 1e-6);
-      iterator.next();
-      iterator.next();
+      assertTrue(iterator.advance());
+      assertTrue(iterator.advance());
       progress = readerProgressToCloudProgress(iterator.getProgress());
       // Returned a record that starts at position 28 - 29/40 consumed.
       assertEquals(1.0 * 29 / 40, progress.getFractionConsumed(), 1e-6);
-      assertFalse(iterator.hasNext());
+      assertFalse(iterator.advance());
     }
   }
 
@@ -434,10 +427,9 @@ public void testUpdateStopPosition() throws Exception {
           tmpFile.getPath(), false, 0L, fileSize,
           new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
 
-      try (TextReader<String>.TextFileIterator iterator =
-          (TextReader<String>.TextFileIterator) textReader.iterator()) {
+      try (NativeReaderIterator<String> iterator = textReader.iterator()) {
         // Poke the iterator so we can test dynamic splitting.
-        assertTrue(iterator.hasNext());
+        assertTrue(iterator.start());
 
         assertNull(iterator.requestDynamicSplit(splitRequestAtPosition(new Position())));
       }
@@ -454,7 +446,7 @@ public void testUpdateStopPosition() throws Exception {
       try (TextReader<String>.TextFileIterator iterator =
           (TextReader<String>.TextFileIterator) textReader.iterator()) {
         // Poke the iterator so we can test dynamic splitting.
-        assertTrue(iterator.hasNext());
+        assertTrue(iterator.start());
 
         assertEquals(fileSize, iterator.getEndOffset());
         assertEquals(
@@ -462,9 +454,10 @@ public void testUpdateStopPosition() throws Exception {
             positionFromSplitResult(iterator.requestDynamicSplit(splitRequestAtByteOffset(stop)))
                 .getByteOffset());
         assertEquals(stop, iterator.getEndOffset());
-        assertEquals(fileContent[0], iterator.next());
-        assertEquals(fileContent[1], iterator.next());
-        assertFalse(iterator.hasNext());
+        assertEquals(fileContent[0], iterator.getCurrent());
+        assertTrue(iterator.advance());
+        assertEquals(fileContent[1], iterator.getCurrent());
+        assertFalse(iterator.advance());
         assertEquals(
             Arrays.asList(fileContent[0].length(), fileContent[1].length()),
             observer.getActualSizes());
@@ -481,7 +474,7 @@ public void testUpdateStopPosition() throws Exception {
       try (TextReader<String>.TextFileIterator iterator =
           (TextReader<String>.TextFileIterator) textReader.iterator()) {
         // Poke the iterator so we can test dynamic splitting.
-        assertTrue(iterator.hasNext());
+        assertTrue(iterator.start());
 
         // Trying to split at 0 or 1 will fail.
         assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(0)));
@@ -498,9 +491,10 @@ public void testUpdateStopPosition() throws Exception {
             positionFromSplitResult(iterator.requestDynamicSplit(splitRequestAtFraction(splitPos)))
                 .getByteOffset());
         assertEquals(stopPosition, iterator.getEndOffset());
-        assertEquals(fileContent[0], iterator.next());
-        assertEquals(fileContent[1], iterator.next());
-        assertFalse(iterator.hasNext());
+        assertEquals(fileContent[0], iterator.getCurrent());
+        assertTrue(iterator.advance());
+        assertEquals(fileContent[1], iterator.getCurrent());
+        assertFalse(iterator.advance());
         assertEquals(
             Arrays.asList(fileContent[0].length(), fileContent[1].length()),
             observer.getActualSizes());
@@ -517,21 +511,23 @@ public void testUpdateStopPosition() throws Exception {
 
       try (TextReader<String>.TextFileIterator iterator =
           (TextReader<String>.TextFileIterator) textReader.iterator()) {
-        assertEquals(fileContent[0], iterator.next());
-        assertEquals(fileContent[1], iterator.next());
+        assertTrue(iterator.start());
+        assertEquals(fileContent[0], iterator.getCurrent());
+        assertTrue(iterator.advance());
+        assertEquals(fileContent[1], iterator.getCurrent());
         assertThat(
             readerProgressToCloudProgress(iterator.getProgress()).getPosition().getByteOffset(),
             greaterThan(stop));
-        assertTrue(iterator.hasNext());
+        assertTrue(iterator.advance());
         // The iterator just promised to return the next record, which is beyond "stop".
         assertNull(iterator.requestDynamicSplit(splitRequestAtByteOffset(stop)));
         assertEquals(fileSize, iterator.getEndOffset());
-        assertTrue(iterator.hasNext());
-        assertEquals(fileContent[2], iterator.next());
+        assertEquals(fileContent[2], iterator.getCurrent());
         assertEquals(
             Arrays.asList(
                 fileContent[0].length(), fileContent[1].length(), fileContent[2].length()),
             observer.getActualSizes());
+        assertFalse(iterator.advance());
       }
     }
 
@@ -544,11 +540,11 @@ public void testUpdateStopPosition() throws Exception {
 
       try (TextReader<String>.TextFileIterator iterator =
           (TextReader<String>.TextFileIterator) textReader.iterator()) {
-        assertTrue(iterator.hasNext());
-        assertEquals(fileContent[0], iterator.next());
+        assertTrue(iterator.start());
+        assertEquals(fileContent[0], iterator.getCurrent());
         assertNull(iterator.requestDynamicSplit(splitRequestAtByteOffset(stop)));
         assertEquals(end, iterator.getEndOffset());
-        assertFalse(iterator.hasNext());
+        assertFalse(iterator.advance());
         assertEquals(Arrays.asList(fileContent[0].length()), observer.getActualSizes());
       }
     }
@@ -579,45 +575,21 @@ private void stopPositionTestInternal(
       Long startOffset, Long endOffset, Long stopOffset, File tmpFile) throws Exception {
     String readWithoutSplit;
     String readWithSplit1, readWithSplit2;
-    StringBuilder accumulatedRead = new StringBuilder();
 
     // Read from source without split attempts.
     TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, startOffset,
         endOffset, new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
-
-    try (TextReader<String>.TextFileIterator iterator =
-        (TextReader<String>.TextFileIterator) textReader.iterator()) {
-      while (iterator.hasNext()) {
-        accumulatedRead.append(iterator.next());
-      }
-      readWithoutSplit = accumulatedRead.toString();
-    }
+    readWithoutSplit = Joiner.on("").join(ReaderUtils.readAllFromReader(textReader));
 
     // Read the first half of the split.
     textReader = new TextReader<>(tmpFile.getPath(), false, startOffset, stopOffset,
         new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
-    accumulatedRead = new StringBuilder();
-
-    try (TextReader<String>.TextFileIterator iterator =
-        (TextReader<String>.TextFileIterator) textReader.iterator()) {
-      while (iterator.hasNext()) {
-        accumulatedRead.append(iterator.next());
-      }
-      readWithSplit1 = accumulatedRead.toString();
-    }
+    readWithSplit1 = Joiner.on("").join(ReaderUtils.readAllFromReader(textReader));
 
     // Read the second half of the split.
     textReader = new TextReader<>(tmpFile.getPath(), false, stopOffset, endOffset,
         new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
-    accumulatedRead = new StringBuilder();
-
-    try (TextReader<String>.TextFileIterator iterator =
-        (TextReader<String>.TextFileIterator) textReader.iterator()) {
-      while (iterator.hasNext()) {
-        accumulatedRead.append(iterator.next());
-      }
-      readWithSplit2 = accumulatedRead.toString();
-    }
+    readWithSplit2 = Joiner.on("").join(ReaderUtils.readAllFromReader(textReader));
 
     assertEquals(readWithoutSplit, readWithSplit1 + readWithSplit2);
   }
@@ -664,14 +636,7 @@ private void testCompressionTypeHelper(String[] lines, String filename,
 
     TextReader<String> textReader = new TextReader<>(
         tmpFile.getPath(), true, null, null, new WholeLineVerifyingCoder(), inputCompressionType);
-
-    List<String> actual = new ArrayList<>();
-    try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
-      while (iterator.hasNext()) {
-        actual.add(iterator.next());
-      }
-    }
-    assertEquals(expected, actual);
+    assertEquals(expected, ReaderUtils.readAllFromReader(textReader));
     tmpFile.delete();
   }
 
@@ -716,14 +681,7 @@ public void testCompressionTypeFileGlob() throws IOException {
     TextReader<String> textReader =
         new TextReader<>(path, true, null, null, new WholeLineVerifyingCoder(),
                          CompressionType.AUTO);
-
-    List<String> actual = new ArrayList<>();
-    try (LegacyReaderIterator<String> iterator = textReader.iterator()) {
-      while (iterator.hasNext()) {
-        actual.add(iterator.next());
-      }
-    }
-    assertThat(actual, containsInAnyOrder(expected.toArray()));
+    assertThat(ReaderUtils.readAllFromReader(textReader), containsInAnyOrder(expected.toArray()));
     for (File file : files) {
       file.delete();
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillReaderIteratorBaseTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillReaderIteratorBaseTest.java
index 0de53ef679d72..7c09fcc51d656 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillReaderIteratorBaseTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillReaderIteratorBaseTest.java
@@ -85,7 +85,7 @@ private void testForMessageBundleCounts(int... messageBundleCounts) throws IOExc
     try (TestWindmillReaderIterator iter = new TestWindmillReaderIterator(workItem)) {
       List<Long> actual =
           ReaderTestUtils.windowedValuesToValues(
-              ReaderTestUtils.readRemainingFromReader(iter, false));
+              ReaderUtils.readRemainingFromIterator(iter, false));
       assertFalse(iter.advance());
       List<Long> expected = new ArrayList<>();
       for (int i = 0; i < numTotalMessages; ++i) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIteratorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIteratorTest.java
index b7903ca26cb56..b82e62595cd8b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIteratorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIteratorTest.java
@@ -143,7 +143,7 @@ private TableDataList rawDataList(TableRow... rows) {
    * table are both cleaned up.
    */
   @Test
-  public void testReadFromQuery() throws IOException {
+  public void testReadFromQuery() throws IOException, InterruptedException {
     // Mock job inserting.
     Job insertedJob = new Job().setJobReference(new JobReference());
     when(mockJobsInsert.execute()).thenReturn(insertedJob);
@@ -170,15 +170,16 @@ public void testReadFromQuery() throws IOException {
     String query = "SELECT name, count from table";
     try (BigQueryTableRowIterator iterator =
             BigQueryTableRowIterator.fromQuery(query, "project", mockClient, null)) {
-      assertTrue(iterator.hasNext());
-      TableRow row = iterator.next();
+      iterator.open();
+      assertTrue(iterator.advance());
+      TableRow row = iterator.getCurrent();
 
       assertTrue(row.containsKey("name"));
       assertTrue(row.containsKey("answer"));
       assertEquals("Arthur", row.get("name"));
       assertEquals(42, row.get("answer"));
 
-      assertFalse(iterator.hasNext());
+      assertFalse(iterator.advance());
     }
 
     // Temp dataset created and later deleted.
@@ -228,7 +229,7 @@ public void testQueryFailed() throws IOException {
     try (BigQueryTableRowIterator iterator =
             BigQueryTableRowIterator.fromQuery(query, "project", mockClient, null)) {
       try {
-        iterator.hasNext();
+        iterator.open();
         fail();
       } catch (Exception expected) {
         // Verify message explains cause and reports the query.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
index 3589123315995..99dad0f54131f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
@@ -40,9 +40,7 @@
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.common.base.Function;
 import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Iterators;
 
 import org.hamcrest.Matchers;
 import org.junit.After;
@@ -188,7 +186,7 @@ private Table basicTableSchemaWithTime() {
   }
 
   @Test
-  public void testReadWithTime() throws IOException {
+  public void testReadWithTime() throws IOException, InterruptedException {
     // The BigQuery JSON API returns timestamps in the following format: floating-point seconds
     // since epoch (UTC) with microsecond precision. Test that we faithfully preserve a set of
     // known values.
@@ -218,8 +216,15 @@ public void testReadWithTime() throws IOException {
         "2016-01-06 06:38:11.123456 UTC");
 
     // Download the rows, verify the interactions.
-    List<TableRow> rows = ImmutableList.copyOf(BigQueryTableRowIterator.fromTable(
-        BigQueryIO.parseTableSpec("project:dataset.table"), mockClient));
+    List<TableRow> rows = new ArrayList<>();
+    try (BigQueryTableRowIterator iterator =
+            BigQueryTableRowIterator.fromTable(
+                BigQueryIO.parseTableSpec("project:dataset.table"), mockClient)) {
+      iterator.open();
+      while (iterator.advance()) {
+        rows.add(iterator.getCurrent());
+      }
+    }
     verifyTableGet();
     verifyTabledataList();
 
@@ -246,7 +251,7 @@ private TableDataList rawDataList(TableRow...rows) {
   }
 
   @Test
-  public void testRead() throws IOException {
+  public void testRead() throws IOException, InterruptedException {
     onTableGet(basicTableSchema());
 
     TableDataList dataList = rawDataList(rawRow("Arthur", 42));
@@ -255,16 +260,16 @@ public void testRead() throws IOException {
     try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.fromTable(
         BigQueryIO.parseTableSpec("project:dataset.table"),
         mockClient)) {
-
-      Assert.assertTrue(iterator.hasNext());
-      TableRow row = iterator.next();
+      iterator.open();
+      Assert.assertTrue(iterator.advance());
+      TableRow row = iterator.getCurrent();
 
       Assert.assertTrue(row.containsKey("name"));
       Assert.assertTrue(row.containsKey("answer"));
       Assert.assertEquals("Arthur", row.get("name"));
       Assert.assertEquals(42, row.get("answer"));
 
-      Assert.assertFalse(iterator.hasNext());
+      Assert.assertFalse(iterator.advance());
 
       verifyTableGet();
       verifyTabledataList();
@@ -272,7 +277,7 @@ public void testRead() throws IOException {
   }
 
   @Test
-  public void testReadEmpty() throws IOException {
+  public void testReadEmpty() throws IOException, InterruptedException {
     onTableGet(basicTableSchema());
 
     // BigQuery may respond with a page token for an empty table, ensure we
@@ -285,8 +290,9 @@ public void testReadEmpty() throws IOException {
     try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.fromTable(
         BigQueryIO.parseTableSpec("project:dataset.table"),
         mockClient)) {
+      iterator.open();
 
-      Assert.assertFalse(iterator.hasNext());
+      Assert.assertFalse(iterator.advance());
 
       verifyTableGet();
       verifyTabledataList();
@@ -294,7 +300,7 @@ public void testReadEmpty() throws IOException {
   }
 
   @Test
-  public void testReadMultiPage() throws IOException {
+  public void testReadMultiPage() throws IOException, InterruptedException {
     onTableGet(basicTableSchema());
 
     TableDataList page1 = rawDataList(rawRow("Row1", 1))
@@ -313,15 +319,12 @@ public void testReadMultiPage() throws IOException {
     try (BigQueryTableRowIterator iterator = BigQueryTableRowIterator.fromTable(
         BigQueryIO.parseTableSpec("project:dataset.table"),
         mockClient)) {
+      iterator.open();
 
       List<String> names = new LinkedList<>();
-      Iterators.addAll(names,
-          Iterators.transform(iterator, new Function<TableRow, String>(){
-            @Override
-            public String apply(TableRow input) {
-              return (String) input.get("name");
-            }
-          }));
+      while (iterator.advance()) {
+        names.add((String) iterator.getCurrent().get("name"));
+      }
 
       Assert.assertThat(names, Matchers.hasItems("Row1", "Row2"));
 
@@ -333,8 +336,8 @@ public String apply(TableRow input) {
   }
 
   @Test
-  public void testReadOpenFailure() throws IOException {
-    thrown.expect(RuntimeException.class);
+  public void testReadOpenFailure() throws IOException, InterruptedException {
+    thrown.expect(IOException.class);
 
     when(mockClient.tables())
         .thenReturn(mockTables);
@@ -347,7 +350,7 @@ public void testReadOpenFailure() throws IOException {
         BigQueryIO.parseTableSpec("project:dataset.table"),
         mockClient)) {
       try {
-        Assert.assertFalse(iterator.hasNext());  // throws.
+        iterator.open(); // throws.
       } finally {
         verifyTableGet();
       }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
index 2bbdf6b61cb65..0b8c48b3cf53a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
@@ -16,10 +16,13 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils;
 import com.google.cloud.dataflow.sdk.runners.worker.TextReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import org.junit.Assert;
 import org.junit.Rule;
@@ -31,8 +34,6 @@
 import java.io.File;
 import java.io.FileOutputStream;
 import java.util.Collection;
-import java.util.Set;
-import java.util.TreeSet;
 
 /**
  * Tests for IOFactory.
@@ -81,18 +82,6 @@ public void testMultiFileRead() throws Exception {
         tmpFolder.getRoot() + "/file*", true /* strip newlines */, null, null, StringUtf8Coder.of(),
         TextIO.CompressionType.UNCOMPRESSED);
 
-    Set<String> records = new TreeSet<>();
-    try (NativeReader.LegacyReaderIterator<String> iterator = reader.iterator()) {
-      while (iterator.hasNext()) {
-        records.add(iterator.next());
-      }
-    }
-
-    Assert.assertEquals(records.toString(), 5, records.size());
-    Assert.assertTrue(records.contains("1"));
-    Assert.assertTrue(records.contains("2"));
-    Assert.assertTrue(records.contains("3"));
-    Assert.assertTrue(records.contains("4"));
-    Assert.assertTrue(records.contains("5"));
+    assertThat(ReaderUtils.readAllFromReader(reader), containsInAnyOrder("1", "2", "3", "4", "5"));
   }
 }

From 579c8f3e7a79008f312b43712ff7d05f9951a108 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Tue, 2 Feb 2016 08:07:49 -0800
Subject: [PATCH 1372/1541] Add support to export BigQuery files in Avro format

----Release Notes----

When activated, greatly increases the efficiency of the BigQuery source by using Avro instead of JSON as an export format.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113637797
---
 .../runners/worker/BigQueryAvroReader.java    | 160 ++++++++++++++
 .../worker/BigQueryAvroReaderFactory.java     |  68 ++++++
 .../sdk/runners/worker/ReaderFactory.java     |   1 +
 .../cloud/dataflow/sdk/util/AvroUtils.java    | 200 ++++++++++++++++++
 .../sdk/util/BigQueryTableRowIterator.java    |  37 +---
 .../dataflow/sdk/util/PropertyNames.java      |   2 +
 .../dataflow/sdk/util/AvroUtilsTest.java      | 129 +++++++----
 7 files changed, 520 insertions(+), 77 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryAvroReader.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryAvroReaderFactory.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryAvroReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryAvroReader.java
new file mode 100644
index 0000000000000..a7ad24b3da4f0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryAvroReader.java
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
+import static com.google.common.base.MoreObjects.firstNonNull;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.api.services.dataflow.model.ApproximateReportedProgress;
+import com.google.api.services.dataflow.model.ApproximateSplitRequest;
+import com.google.cloud.dataflow.sdk.io.AvroSource;
+import com.google.cloud.dataflow.sdk.io.BoundedSource;
+import com.google.cloud.dataflow.sdk.io.OffsetBasedSource;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.AvroUtils;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
+
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Parser;
+import org.apache.avro.generic.GenericRecord;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+
+import javax.annotation.Nullable;
+
+/**
+ * A reader for Avro files exported by BigQuery, which converts them to {@link TableRow} records.
+ */
+public class BigQueryAvroReader extends NativeReader<WindowedValue<TableRow>> {
+  private static final Logger LOG = LoggerFactory.getLogger(BigQueryAvroReader.class);
+
+  @Nullable
+  final Long startPosition;
+  @Nullable
+  final Long endPosition;
+  final String filename;
+  final AvroSource<GenericRecord> avroSource;
+  final TableSchema tableSchema;
+  final PipelineOptions options;
+
+  public BigQueryAvroReader(
+      String filename,
+      @Nullable Long startPosition,
+      @Nullable Long endPosition,
+      TableSchema schema,
+      @Nullable PipelineOptions options)
+      throws IOException {
+    this.filename = filename;
+    this.startPosition = startPosition;
+    this.endPosition = endPosition;
+    Schema recordSchema =
+        new Parser().parse(AvroUtils.readMetadataFromFile(filename).getSchemaString());
+    this.tableSchema = schema;
+    this.options = options;
+    this.avroSource = AvroSource.from(filename).withSchema(recordSchema);
+  }
+
+  @Override
+  public BigQueryAvroFileIterator iterator() throws IOException {
+    Long endPosition = firstNonNull(this.endPosition, Long.MAX_VALUE);
+    Long startPosition = firstNonNull(this.startPosition, 0L);
+    BoundedSource.BoundedReader<GenericRecord> reader;
+    if (startPosition == 0 && endPosition == Long.MAX_VALUE) {
+      // Read entire file (or collection of files).
+      reader = avroSource.createReader(options);
+    } else {
+      // Read a subrange of file.
+      reader = avroSource.createForSubrangeOfFile(filename, startPosition, endPosition)
+          .createReader(options);
+    }
+    return new BigQueryAvroFileIterator((AvroSource.AvroReader<GenericRecord>) reader);
+  }
+
+  class BigQueryAvroFileIterator extends LegacyReaderIterator<WindowedValue<TableRow>> {
+    final AvroSource.AvroReader<GenericRecord> reader;
+    boolean hasStarted = false;
+    long blockOffset = -1;
+
+    public BigQueryAvroFileIterator(AvroSource.AvroReader<GenericRecord> reader) {
+      this.reader = reader;
+    }
+
+    @Override
+    protected WindowedValue<TableRow> nextImpl() throws IOException {
+      GenericRecord next = reader.getCurrent();
+      // Coarse-grained reporting of input bytes consumed.
+      // After completing reading a block, the block offset changes.
+      long currentOffset = reader.getCurrentBlockOffset();
+      if (currentOffset != blockOffset) {
+        notifyElementRead(reader.getCurrentBlockSize());
+        blockOffset = currentOffset;
+      }
+      return WindowedValue.valueInGlobalWindow(
+          AvroUtils.convertGenericRecordToTableRow(next, tableSchema));
+    }
+
+    @Override
+    protected boolean hasNextImpl() throws IOException {
+      if (!hasStarted) {
+        hasStarted = true;
+        return reader.start();
+      }
+      return reader.advance();
+    }
+
+    @Override
+    public Progress getProgress() {
+      Double readerProgress = reader.getFractionConsumed();
+      if (readerProgress == null) {
+        return null;
+      }
+      ApproximateReportedProgress progress = new ApproximateReportedProgress();
+      progress.setFractionConsumed(readerProgress);
+      return cloudProgressToReaderProgress(progress);
+    }
+
+    @Override
+    public void close() throws IOException {
+      reader.close();
+    }
+
+    @Override
+    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
+      ApproximateSplitRequest splitProgress =
+          SourceTranslationUtils.splitRequestToApproximateSplitRequest(splitRequest);
+      double splitAtFraction = splitProgress.getFractionConsumed();
+      LOG.info("Received request for dynamic split at {}", splitAtFraction);
+      OffsetBasedSource<GenericRecord> residual = reader.splitAtFraction(splitAtFraction);
+      if (residual == null) {
+        LOG.info("Rejected split request for split at {}", splitAtFraction);
+        return null;
+      }
+      com.google.api.services.dataflow.model.Position acceptedPosition =
+          new com.google.api.services.dataflow.model.Position();
+      acceptedPosition.setByteOffset(residual.getStartOffset());
+      LOG.info("Accepted split for position {} which resulted in a new source with byte offset {}",
+          splitAtFraction, residual.getStartOffset());
+      return new DynamicSplitResultWithPosition(
+          SourceTranslationUtils.cloudPositionToReaderPosition(acceptedPosition));
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryAvroReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryAvroReaderFactory.java
new file mode 100644
index 0000000000000..9ab6534049011
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryAvroReaderFactory.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.getLong;
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.google.api.client.json.jackson2.JacksonFactory;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
+
+import java.io.IOException;
+
+import javax.annotation.Nullable;
+
+/**
+ * Creates an AvroReader specifically for the output of a BigQuery export job.
+ */
+public class BigQueryAvroReaderFactory implements ReaderFactory {
+
+  public BigQueryAvroReaderFactory() {}
+
+  @Override
+  public NativeReader<?> create(
+      CloudObject spec,
+      @Nullable Coder<?> coder,
+      @Nullable PipelineOptions options,
+      @Nullable ExecutionContext executionContext,
+      @Nullable CounterSet.AddCounterMutator addCounterMutator,
+      @Nullable String operationName)
+      throws Exception {
+    return createTyped(spec, options);
+  }
+
+  private static TableSchema parseTableSchema(String schemaStr) throws IOException {
+    checkArgument(!schemaStr.isEmpty(), "Provided BigQuery schema is empty");
+    return JacksonFactory.getDefaultInstance().fromString(schemaStr, TableSchema.class);
+  }
+
+  NativeReader<?> createTyped(CloudObject spec, PipelineOptions options) throws Exception {
+    String filename = getString(spec, PropertyNames.FILENAME);
+    Long startOffset = getLong(spec, PropertyNames.START_OFFSET, null);
+    Long endOffset = getLong(spec, PropertyNames.END_OFFSET, null);
+    TableSchema bqSchema = parseTableSchema(getString(spec, PropertyNames.BIGQUERY_EXPORT_SCHEMA));
+    return new BigQueryAvroReader(filename, startOffset, endOffset, bqSchema, options);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
index d55258b681e73..5ea2490475b56 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
@@ -68,6 +68,7 @@ public static Registry defaultRegistry() {
 
       factories.put("TextSource", TextReaderFactory.getInstance());
       factories.put("AvroSource", new AvroReaderFactory());
+      factories.put("BigQueryAvroSource", new BigQueryAvroReaderFactory());
       factories.put("UngroupedShuffleSource", new UngroupedShuffleReaderFactory());
       factories.put("PartitioningShuffleSource", new PartitioningShuffleReaderFactory());
       factories.put("GroupingShuffleSource", new GroupingShuffleReaderFactory());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AvroUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AvroUtils.java
index b50fe8a7805bf..335e529873f07 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AvroUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AvroUtils.java
@@ -16,15 +16,33 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Verify.verify;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Field;
+import org.apache.avro.Schema.Type;
 import org.apache.avro.file.DataFileConstants;
+import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.io.BinaryDecoder;
 import org.apache.avro.io.DecoderFactory;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
 
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.nio.channels.Channels;
 import java.util.Arrays;
+import java.util.List;
+
+import javax.annotation.Nullable;
 
 /**
  * A set of utilities for working with Avro files.
@@ -134,4 +152,186 @@ public static AvroMetadata readMetadataFromFile(String fileName) throws IOExcept
     }
     return new AvroMetadata(syncMarker, codec, schemaString);
   }
+
+  /**
+   * Formats BigQuery seconds-since-epoch into String matching JSON export. Thread-safe and
+   * immutable.
+   */
+  private static final DateTimeFormatter DATE_AND_SECONDS_FORMATTER =
+      DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss").withZoneUTC();
+  // Package private for BigQueryTableRowIterator to use.
+  static String formatTimestamp(String timestamp) {
+    // timestamp is in "seconds since epoch" format, with scientific notation.
+    // e.g., "1.45206229112345E9" to mean "2016-01-06 06:38:11.123456 UTC".
+    // Separate into seconds and microseconds.
+    double timestampDoubleMicros = Double.parseDouble(timestamp) * 1000000;
+    long timestampMicros = (long) timestampDoubleMicros;
+    long seconds = timestampMicros / 1000000;
+    int micros = (int) (timestampMicros % 1000000);
+    String dayAndTime = DATE_AND_SECONDS_FORMATTER.print(seconds * 1000);
+
+    // No sub-second component.
+    if (micros == 0) {
+      return String.format("%s UTC", dayAndTime);
+    }
+
+    // Sub-second component.
+    int digits = 6;
+    int subsecond = micros;
+    while (subsecond % 10 == 0) {
+      digits--;
+      subsecond /= 10;
+    }
+    String formatString = String.format("%%0%dd", digits);
+    String fractionalSeconds = String.format(formatString, subsecond);
+    return String.format("%s.%s UTC", dayAndTime, fractionalSeconds);
+  }
+
+  /**
+   * Utility function to convert from an Avro {@link GenericRecord} to a BigQuery {@link TableRow}.
+   *
+   * See <a href="https://cloud.google.com/bigquery/exporting-data-from-bigquery#config">
+   * "Avro format"</a> for more information.
+   */
+  public static TableRow convertGenericRecordToTableRow(GenericRecord record, TableSchema schema) {
+    return convertGenericRecordToTableRow(record, schema.getFields());
+  }
+
+  private static TableRow convertGenericRecordToTableRow(
+      GenericRecord record, List<TableFieldSchema> fields) {
+    TableRow row = new TableRow();
+    for (TableFieldSchema subSchema : fields) {
+      Field field = record.getSchema().getField(subSchema.getName());
+      Object convertedValue =
+          getTypedCellValue(field.schema(), subSchema, record.get(field.name()));
+      if (convertedValue != null) {
+        // To match the JSON files exported by BigQuery, do not include null values in the output.
+        row.set(field.name(), convertedValue);
+      }
+    }
+    return row;
+  }
+
+  @Nullable
+  private static Object getTypedCellValue(Schema schema, TableFieldSchema fieldSchema, Object v) {
+    switch (fieldSchema.getMode()) {
+      case "REQUIRED":
+        return convertRequiredField(schema.getType(), fieldSchema, v);
+      case "REPEATED":
+        return convertRepeatedField(schema, fieldSchema, v);
+      case "NULLABLE":
+        return convertNullableField(schema, fieldSchema, v);
+      default:
+        throw new UnsupportedOperationException(
+            "Parsing a field with BigQuery field schema mode " + fieldSchema.getMode());
+    }
+  }
+
+  private static List<Object> convertRepeatedField(
+      Schema schema, TableFieldSchema fieldSchema, Object v) {
+    Type arrayType = schema.getType();
+    verify(
+        arrayType == Type.ARRAY,
+        "BigQuery REPEATED field %s should be Avro ARRAY, not %s",
+        fieldSchema.getName(),
+        arrayType);
+    // REPEATED fields are represented as Avro arrays.
+    if (v == null) {
+      // Handle the case of an empty repeated field.
+      return ImmutableList.of();
+    }
+    @SuppressWarnings("unchecked")
+    List<Object> elements = (List<Object>) v;
+    ImmutableList.Builder<Object> values = ImmutableList.builder();
+    Type elementType = schema.getElementType().getType();
+    for (Object element : elements) {
+      values.add(convertRequiredField(elementType, fieldSchema, element));
+    }
+    return values.build();
+  }
+
+  private static Object convertRequiredField(
+      Type avroType, TableFieldSchema fieldSchema, Object v) {
+    // REQUIRED fields are represented as the corresponding Avro types. For example, a BigQuery
+    // INTEGER type maps to an Avro LONG type.
+    checkNotNull(v, "REQUIRED field %s should not be null", fieldSchema.getName());
+    ImmutableMap<String, Type> fieldMap =
+        ImmutableMap.<String, Type>builder()
+            .put("STRING", Type.STRING)
+            .put("INTEGER", Type.LONG)
+            .put("FLOAT", Type.DOUBLE)
+            .put("BOOLEAN", Type.BOOLEAN)
+            .put("TIMESTAMP", Type.LONG)
+            .put("RECORD", Type.RECORD)
+            .build();
+    String bqType = fieldSchema.getType();
+    Type expectedAvroType = fieldMap.get(bqType);
+    verify(
+        avroType == expectedAvroType,
+        "Expected Avro schema type %s, not %s, for BigQuery %s field %s",
+        expectedAvroType,
+        avroType,
+        bqType,
+        fieldSchema.getName());
+    switch (fieldSchema.getType()) {
+      case "STRING":
+        // Avro will use a CharSequence to represent String objects, but it may not always use
+        // java.lang.String; for example, it may prefer org.apache.avro.util.Utf8.
+        verify(v instanceof CharSequence, "Expected CharSequence (String), got %s", v.getClass());
+        return v.toString();
+      case "INTEGER":
+        verify(v instanceof Long, "Expected Long, got %s", v.getClass());
+        return ((Long) v).toString();
+      case "FLOAT":
+        verify(v instanceof Double, "Expected Double, got %s", v.getClass());
+        return v;
+      case "BOOLEAN":
+        verify(v instanceof Boolean, "Expected Boolean, got %s", v.getClass());
+        return v;
+      case "TIMESTAMP":
+        // TIMESTAMP data types are represented as Avro LONG types. They are converted back to
+        // Strings with variable-precision (up to six digits) to match the JSON files export
+        // by BigQuery.
+        verify(v instanceof Long, "Expected Long, got %s", v.getClass());
+        Double doubleValue = ((Long) v) / 1000000.0;
+        return formatTimestamp(doubleValue.toString());
+      case "RECORD":
+        verify(v instanceof GenericRecord, "Expected GenericRecord, got %s", v.getClass());
+        return convertGenericRecordToTableRow((GenericRecord) v, fieldSchema.getFields());
+      default:
+        throw new UnsupportedOperationException(
+            String.format(
+                "Unexpected BigQuery field schema type %s for field named %s",
+                fieldSchema.getType(),
+                fieldSchema.getName()));
+    }
+  }
+
+  @Nullable
+  private static Object convertNullableField(
+      Schema avroSchema, TableFieldSchema fieldSchema, Object v) {
+    // NULLABLE fields are represented as an Avro Union of the corresponding type and "null".
+    verify(
+        avroSchema.getType() == Type.UNION,
+        "Expected Avro schema type UNION, not %s, for BigQuery NULLABLE field %s",
+        avroSchema.getType(),
+        fieldSchema.getName());
+    List<Schema> unionTypes = avroSchema.getTypes();
+    verify(
+        unionTypes.size() == 2,
+        "BigQuery NULLABLE field %s should be an Avro UNION of NULL and another type, not %s",
+        fieldSchema.getName(),
+        unionTypes);
+
+    if (v == null) {
+      return null;
+    }
+
+    Type firstType = unionTypes.get(0).getType();
+    if (!firstType.equals(Type.NULL)) {
+      return convertRequiredField(firstType, fieldSchema, v);
+    }
+    return convertRequiredField(unionTypes.get(1).getType(), fieldSchema, v);
+  }
 }
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
index 1a9f7869c19f8..c2c80f79c31d3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
@@ -47,8 +47,6 @@
 import com.google.common.collect.ImmutableList;
 
 import org.joda.time.Duration;
-import org.joda.time.format.DateTimeFormat;
-import org.joda.time.format.DateTimeFormatter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -196,39 +194,6 @@ public TableRow getCurrent() {
     return current;
   }
 
-  /**
-   * Formats BigQuery seconds-since-epoch into String matching JSON export. Thread-safe and
-   * immutable.
-   */
-  private static final DateTimeFormatter DATE_AND_SECONDS_FORMATER =
-      DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss").withZoneUTC();
-  private static String formatTimestamp(String timestamp) {
-    // timestamp is in "seconds since epoch" format, with scientific notation.
-    // e.g., "1.45206229112345E9" to mean "2016-01-06 06:38:11.123456 UTC".
-    // Separate into seconds and microseconds.
-    double timestampDoubleMicros = Double.parseDouble(timestamp) * 1000000;
-    long timestampMicros = (long) timestampDoubleMicros;
-    long seconds = timestampMicros / 1000000;
-    int micros = (int) (timestampMicros % 1000000);
-    String dayAndTime = DATE_AND_SECONDS_FORMATER.print(seconds * 1000);
-
-    // No sub-second component.
-    if (micros == 0) {
-      return String.format("%s UTC", dayAndTime);
-    }
-
-    // Sub-second component.
-    int digits = 6;
-    int subsecond = micros;
-    while (subsecond % 10 == 0) {
-      digits--;
-      subsecond /= 10;
-    }
-    String formatString = String.format("%%0%dd", digits);
-    String fractionalSeconds = String.format(formatString, subsecond);
-    return String.format("%s.%s UTC", dayAndTime, fractionalSeconds);
-  }
-
   /**
    * Adjusts a field returned from the BigQuery API to match what we will receive when running
    * BigQuery's export-to-GCS and parallel read, which is the efficient parallel implementation
@@ -284,7 +249,7 @@ private static String formatTimestamp(String timestamp) {
     }
 
     if (fieldSchema.getType().equals("TIMESTAMP")) {
-      return formatTimestamp((String) v);
+      return AvroUtils.formatTimestamp((String) v);
     }
 
     return v;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
index 2f105ec224b96..5611fabe28a7b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
@@ -30,6 +30,8 @@ public class PropertyNames {
   public static final String BIGQUERY_QUERY = "bigquery_query";
   public static final String BIGQUERY_FLATTEN_RESULTS = "bigquery_flatten_results";
   public static final String BIGQUERY_WRITE_DISPOSITION = "write_disposition";
+  public static final String BIGQUERY_EXPORT_FORMAT = "bigquery_export_format";
+  public static final String BIGQUERY_EXPORT_SCHEMA = "bigquery_export_schema";
   public static final String CO_GBK_RESULT_SCHEMA = "co_gbk_result_schema";
   public static final String COMBINE_FN = "combine_fn";
   public static final String COMPONENT_ENCODINGS = "component_encodings";
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AvroUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AvroUtilsTest.java
index 68570bfd7a873..28bd98adc7352 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AvroUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AvroUtilsTest.java
@@ -18,16 +18,22 @@
 
 import static org.junit.Assert.assertEquals;
 
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
 import com.google.cloud.dataflow.sdk.util.AvroUtils.AvroMetadata;
-import com.google.common.base.MoreObjects;
+import com.google.common.collect.Lists;
 
 import org.apache.avro.Schema;
 import org.apache.avro.file.CodecFactory;
 import org.apache.avro.file.DataFileConstants;
 import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.io.DatumWriter;
+import org.apache.avro.reflect.Nullable;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
@@ -39,7 +45,6 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Objects;
 import java.util.Random;
 
 /**
@@ -99,51 +104,95 @@ public void testReadSchemaString() throws Exception {
     AvroMetadata metadata = AvroUtils.readMetadataFromFile(filename);
     // By default, parse validates the schema, which is what we want.
     Schema schema = new Schema.Parser().parse(metadata.getSchemaString());
-    assertEquals(4, schema.getFields().size());
+    assertEquals(8, schema.getFields().size());
+  }
+
+  @Test
+  public void testConvertGenericRecordToTableRow() throws Exception {
+    TableSchema tableSchema = new TableSchema();
+    List<TableFieldSchema> subFields = Lists.<TableFieldSchema>newArrayList(
+        new TableFieldSchema().setName("species").setType("STRING").setMode("NULLABLE"));
+    List<TableFieldSchema> fields =
+        Lists.<TableFieldSchema>newArrayList(
+            new TableFieldSchema().setName("number").setType("INTEGER").setMode("REQUIRED"),
+            new TableFieldSchema().setName("species").setType("STRING").setMode("NULLABLE"),
+            new TableFieldSchema().setName("quality").setType("FLOAT").setMode("NULLABLE"),
+            new TableFieldSchema().setName("quantity").setType("INTEGER").setMode("NULLABLE"),
+            new TableFieldSchema().setName("birthday").setType("TIMESTAMP").setMode("NULLABLE"),
+            new TableFieldSchema().setName("flighted").setType("BOOLEAN").setMode("NULLABLE"),
+            new TableFieldSchema().setName("scion").setType("RECORD").setMode("NULLABLE")
+                .setFields(subFields),
+            new TableFieldSchema().setName("associates").setType("RECORD").setMode("REPEATED")
+                .setFields(subFields));
+    tableSchema.setFields(fields);
+    Schema avroSchema = AvroCoder.of(Bird.class).getSchema();
+
+    {
+      // Test nullable fields.
+      GenericRecord record = new GenericData.Record(avroSchema);
+      record.put("number", 5L);
+      TableRow convertedRow = AvroUtils.convertGenericRecordToTableRow(record, tableSchema);
+      TableRow row = new TableRow()
+          .set("number", "5")
+          .set("associates", new ArrayList<TableRow>());
+      assertEquals(row, convertedRow);
+    }
+    {
+      // Test type conversion for TIMESTAMP, INTEGER, BOOLEAN, and FLOAT.
+      GenericRecord record = new GenericData.Record(avroSchema);
+      record.put("number", 5L);
+      record.put("quality", 5.0);
+      record.put("birthday", 5L);
+      record.put("flighted", Boolean.TRUE);
+      TableRow convertedRow = AvroUtils.convertGenericRecordToTableRow(record, tableSchema);
+      TableRow row = new TableRow()
+          .set("number", "5")
+          .set("birthday", "1970-01-01 00:00:00.000005 UTC")
+          .set("quality", 5.0)
+          .set("associates", new ArrayList<TableRow>())
+          .set("flighted", Boolean.TRUE);
+      assertEquals(row, convertedRow);
+    }
+    {
+      // Test repeated fields.
+      Schema subBirdSchema = AvroCoder.of(Bird.SubBird.class).getSchema();
+      GenericRecord nestedRecord = new GenericData.Record(subBirdSchema);
+      nestedRecord.put("species", "other");
+      GenericRecord record = new GenericData.Record(avroSchema);
+      record.put("number", 5L);
+      record.put("associates", Lists.<GenericRecord>newArrayList(nestedRecord));
+      TableRow convertedRow = AvroUtils.convertGenericRecordToTableRow(record, tableSchema);
+      TableRow row = new TableRow()
+          .set("associates", Lists.<TableRow>newArrayList(
+              new TableRow().set("species", "other")))
+          .set("number", "5");
+      assertEquals(row, convertedRow);
+    }
   }
 
   /**
-   * Class used as the record type in tests.
+   * Pojo class used as the record type in tests.
    */
   @DefaultCoder(AvroCoder.class)
   static class Bird {
     long number;
-    String species;
-    String quality;
-    long quantity;
-
-    public Bird() {}
-
-    public Bird(long number, String species, String quality, long quantity) {
-      this.number = number;
-      this.species = species;
-      this.quality = quality;
-      this.quantity = quantity;
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(Bird.class)
-          .addValue(number)
-          .addValue(species)
-          .addValue(quantity)
-          .addValue(quality)
-          .toString();
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (obj instanceof Bird) {
-        Bird other = (Bird) obj;
-        return Objects.equals(species, other.species) && Objects.equals(quality, other.quality)
-            && quantity == other.quantity && number == other.number;
-      }
-      return false;
+    @Nullable String species;
+    @Nullable Double quality;
+    @Nullable Long quantity;
+    @Nullable Long birthday;  // Exercises TIMESTAMP.
+    @Nullable Boolean flighted;
+    @Nullable SubBird scion;
+    SubBird[] associates;
+
+    static class SubBird {
+      @Nullable String species;
+
+      public SubBird() {}
     }
 
-    @Override
-    public int hashCode() {
-      return Objects.hash(number, species, quality, quantity);
+    public Bird() {
+      associates = new SubBird[1];
+      associates[0] = new SubBird();
     }
   }
 
@@ -151,15 +200,13 @@ public int hashCode() {
    * Create a list of n random records.
    */
   private static List<Bird> createRandomRecords(long n) {
-    String[] qualities = {
-        "miserable", "forelorn", "fidgity", "squirrelly", "fanciful", "chipper", "lazy"};
     String[] species = {"pigeons", "owls", "gulls", "hawks", "robins", "jays"};
     Random random = new Random(0);
 
     List<Bird> records = new ArrayList<>();
     for (long i = 0; i < n; i++) {
       Bird bird = new Bird();
-      bird.quality = qualities[random.nextInt(qualities.length)];
+      bird.quality = random.nextDouble();
       bird.species = species[random.nextInt(species.length)];
       bird.number = i;
       bird.quantity = random.nextLong();

From ed87d95abd39a12c742092b2a4795bf6b4da5069 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 2 Feb 2016 09:51:46 -0800
Subject: [PATCH 1373/1541] Move some status pages to worker module

This is for ASF Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113646973
---
 .../status/LastExceptionDataProvider.java     |  46 -------
 .../runners/worker/status/StatuszServlet.java |  68 ----------
 .../worker/status/WorkerStatusPages.java      | 123 ------------------
 .../worker/status/WorkerStatusPagesTest.java  |  76 -----------
 4 files changed, 313 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/LastExceptionDataProvider.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/StatuszServlet.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPages.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPagesTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/LastExceptionDataProvider.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/LastExceptionDataProvider.java
deleted file mode 100644
index 00df485ae2b78..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/LastExceptionDataProvider.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker.status;
-
-import java.io.PrintWriter;
-import java.util.concurrent.atomic.AtomicReference;
-
-/**
- * Capture the last exception thrown during processing, and display that on the statusz page.
- */
-public class LastExceptionDataProvider implements StatusDataProvider {
-
-  private static final AtomicReference<Throwable> lastException = new AtomicReference<>();
-
-  /**
-   * Report an exception to the exception data provider.
-   */
-  public static void reportException(Throwable t) {
-    lastException.set(t);
-  }
-
-  @Override
-  public void appendSummaryHtml(PrintWriter writer) {
-    Throwable t = lastException.get();
-    if (t == null) {
-      writer.println("None<br>");
-    } else {
-      writer.println("<pre>");
-      t.printStackTrace(writer);
-      writer.println("</pre>");
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/StatuszServlet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/StatuszServlet.java
deleted file mode 100644
index 78bf91ae27067..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/StatuszServlet.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker.status;
-
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.LinkedHashMap;
-
-import javax.servlet.ServletException;
-import javax.servlet.http.HttpServletRequest;
-import javax.servlet.http.HttpServletResponse;
-
-/**
- * General servlet for providing a bunch of information on the statusz page.
- */
-public class StatuszServlet extends BaseStatusServlet {
-
-  private static class DataProviderInfo {
-    private final String longName;
-    private final StatusDataProvider dataProvider;
-
-    public DataProviderInfo(String longName, StatusDataProvider dataProvider) {
-      this.longName = longName;
-      this.dataProvider = dataProvider;
-    }
-  }
-
-  private LinkedHashMap<String, DataProviderInfo> dataProviders = new LinkedHashMap<>();
-
-  public StatuszServlet() {
-    super("statusz");
-  }
-
-  public void addDataProvider(String shortName, String longName, StatusDataProvider provider) {
-    dataProviders.put(shortName, new DataProviderInfo(longName, provider));
-  }
-
-  @Override
-  protected void doGet(HttpServletRequest request, HttpServletResponse response)
-      throws IOException, ServletException {
-    PrintWriter writer = response.getWriter();
-    writer.println("<html>");
-
-    writer.println("<h1>Worker Harness</h1>");
-
-    for (DataProviderInfo info : dataProviders.values()) {
-      writer.print("<h2>");
-      writer.print(info.longName);
-      writer.println("</h2>");
-
-      info.dataProvider.appendSummaryHtml(writer);
-    }
-    writer.println("<html>");
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPages.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPages.java
deleted file mode 100644
index ad16654e27825..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPages.java
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker.status;
-
-import com.google.common.annotations.VisibleForTesting;
-
-import org.eclipse.jetty.server.Server;
-import org.eclipse.jetty.servlet.ServletHandler;
-import org.eclipse.jetty.servlet.ServletHolder;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-
-import javax.servlet.ServletException;
-import javax.servlet.http.HttpServletRequest;
-import javax.servlet.http.HttpServletResponse;
-
-/**
- * Manages the server providing the worker status pages.
- */
-public class WorkerStatusPages {
-
-  private static final Logger LOG = LoggerFactory.getLogger(WorkerStatusPages.class);
-
-  private final Server statusServer;
-  private final StatuszServlet statuszServlet = new StatuszServlet();
-  private final ServletHandler servletHandler = new ServletHandler();
-
-  @VisibleForTesting WorkerStatusPages(Server server) {
-    this.statusServer = server;
-    this.statusServer.setHandler(servletHandler);
-
-    // Install the default servlets (threadz, healthz, heapz, statusz)
-    addServlet(new ThreadzServlet());
-    addServlet(new HealthzServlet());
-    addServlet(new HeapzServlet());
-    addServlet(statuszServlet);
-
-  }
-
-  public static WorkerStatusPages create(int defaultStatusPort) {
-    int statusPort = defaultStatusPort;
-    if (System.getProperties().containsKey("status_port")) {
-      statusPort = Integer.parseInt(System.getProperty("status_port"));
-    }
-    return new WorkerStatusPages(new Server(statusPort));
-  }
-
-  /** Start the server. */
-  public void start() {
-    if (statusServer.isStarted()) {
-      LOG.warn("Status server already started on {}", statusServer.getURI());
-      return;
-    }
-
-    try {
-      addServlet(new RedirectToStatusz404Handler());
-      statusServer.start();
-
-      LOG.info("Status server started on {}", statusServer.getURI());
-    } catch (Exception e) {
-      LOG.warn("Status server failed to start: ", e);
-    }
-  }
-
-  /** Stop the server. */
-  public void stop() {
-    try {
-      statusServer.stop();
-    } catch (Exception e) {
-      LOG.warn("Status server failed to stop: ", e);
-    }
-  }
-
-  /**
-   * Add a status servlet.
-   */
-  public void addServlet(BaseStatusServlet servlet) {
-    ServletHolder holder = new ServletHolder();
-    holder.setServlet(servlet);
-    servletHandler.addServletWithMapping(holder, servlet.getPath());
-  }
-
-  /**
-   * Add data to the main statusz servlet.
-   */
-  public void addStatusDataProvider(
-      String shortName, String longName, StatusDataProvider dataProvider) {
-    statuszServlet.addDataProvider(shortName, longName, dataProvider);
-  }
-
-  /**
-   * Redirect all invalid pages to /statusz.
-   */
-  private static class RedirectToStatusz404Handler extends BaseStatusServlet {
-
-    public RedirectToStatusz404Handler() {
-      super("*");
-    }
-
-    @Override
-    protected void doGet(HttpServletRequest request, HttpServletResponse response)
-        throws IOException, ServletException {
-      response.sendRedirect("/statusz");
-    }
-
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPagesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPagesTest.java
deleted file mode 100644
index 8730a61497595..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/status/WorkerStatusPagesTest.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker.status;
-
-import static org.hamcrest.Matchers.containsString;
-import static org.junit.Assert.assertThat;
-
-import org.eclipse.jetty.server.LocalConnector;
-import org.eclipse.jetty.server.Server;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for {@link WorkerStatusPages}.
- */
-@RunWith(JUnit4.class)
-public class WorkerStatusPagesTest {
-
-  private final Server server = new Server();
-  private final LocalConnector connector = new LocalConnector(server);
-  private final WorkerStatusPages wsp = new WorkerStatusPages(server);
-
-  @Before
-  public void setUp() throws Exception {
-    server.addConnector(connector);
-    wsp.start();
-  }
-
-  @After
-  public void tearDown() throws Exception {
-    wsp.stop();
-  }
-
-  @Test
-  public void testThreadz() throws Exception {
-    String response = getPage("/threadz");
-    assertThat(response, containsString("HTTP/1.1 200 OK"));
-    assertThat("Test method should appear in stack trace",
-        response, containsString("WorkerStatusPagesTest.testThreadz"));
-  }
-
-  @Test
-  public void testHealthz() throws Exception {
-    String response = getPage("/threadz");
-    assertThat(response, containsString("HTTP/1.1 200 OK"));
-    assertThat(response, containsString("ok"));
-  }
-
-  @Test
-  public void testUnknownHandler() throws Exception {
-    String response = getPage("/missinghandlerz");
-    assertThat(response, containsString("HTTP/1.1 302 Found"));
-    assertThat(response, containsString("Location: http://localhost/statusz"));
-  }
-
-  private String getPage(String requestURL) throws Exception {
-    String request = String.format("GET %s HTTP/1.1\nhost: localhost\n\n", requestURL);
-    return connector.getResponses(request);
-  }
-}

From 748ada7762f4033665b8daaa726fdc5b65952a2a Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Tue, 2 Feb 2016 13:57:40 -0800
Subject: [PATCH 1374/1541] CombineWithContext support in batch
 PartialGroupByKeyOperation

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113674191
---
 .../worker/MapTaskExecutorFactory.java        | 118 +++++++++++++-----
 .../sdk/util/PerKeyCombineFnRunner.java       |  70 +++++++++--
 .../sdk/util/PerKeyCombineFnRunners.java      |  75 +++++++++++
 .../worker/PartialGroupByKeyOperation.java    |  10 +-
 .../worker/MapTaskExecutorFactoryTest.java    |  51 ++++++++
 .../PartialGroupByKeyOperationTest.java       |   8 +-
 6 files changed, 284 insertions(+), 48 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index efab417e21bc4..b38d9c852dddf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -17,7 +17,6 @@
 package com.google.cloud.dataflow.sdk.runners.worker;
 
 import static com.google.cloud.dataflow.sdk.util.Structs.getBytes;
-import static com.google.common.base.Preconditions.checkArgument;
 
 import com.google.api.services.dataflow.model.FlattenInstruction;
 import com.google.api.services.dataflow.model.InstructionInput;
@@ -31,17 +30,18 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.Combine;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.RequiresContextInternal;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PerKeyCombineFnRunner;
+import com.google.cloud.dataflow.sdk.util.PerKeyCombineFnRunners;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.util.Serializer;
+import com.google.cloud.dataflow.sdk.util.SideInputReader;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -63,6 +63,10 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.common.worker.WriteOperation;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
 
 import org.joda.time.Instant;
 
@@ -280,9 +284,9 @@ static ParDoOperation createParDoOperation(
   }
 
   static PartialGroupByKeyOperation createPartialGroupByKeyOperation(
-      @SuppressWarnings("unused") PipelineOptions options,
+      PipelineOptions options,
       ParallelInstruction instruction,
-      @SuppressWarnings("unused") ExecutionContext executionContext,
+      DataflowExecutionContext<?> executionContext,
       List<Operation> priorOperations, String counterPrefix,
       CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) throws Exception {
     PartialGroupByKeyInstruction pgbk = instruction.getPartialGroupByKey();
@@ -304,71 +308,81 @@ static PartialGroupByKeyOperation createPartialGroupByKeyOperation(
     OutputReceiver[] receivers =
         createOutputReceivers(instruction, counterPrefix, addCounterMutator, stateSampler, 1);
 
-    PartialGroupByKeyOperation.Combiner<?, ?, ?, ?> valueCombiner = createValueCombiner(pgbk);
+    PartialGroupByKeyOperation.Combiner<?, ?, ?, ?> valueCombiner = null;
+    PartialGroupByKeyOperation.PairInfo pairInfo = PairInfo.create();
+    if (pgbk.getValueCombiningFn() != null) {
+      Object deserializedFn = SerializableUtils.deserializeFromByteArray(
+          getBytes(CloudObject.fromSpec(pgbk.getValueCombiningFn()), PropertyNames.SERIALIZED_FN),
+          "serialized combine fn");
+      AppliedCombineFn<?, ?, ?, ?> combineFn = ((AppliedCombineFn<?, ?, ?, ?>) deserializedFn);
+
+      SideInputReader sideInputReader =
+          executionContext.getSideInputReader(pgbk.getSideInputs(), combineFn.getSideInputViews());
+      valueCombiner = new ValueCombiner<>(
+          PerKeyCombineFnRunners.create(combineFn.getFn()), sideInputReader, options);
+      if (combineFn.getFn() instanceof RequiresContextInternal) {
+        pairInfo = WindowsExpandingPairInfo.create();
+      }
+    }
 
     PartialGroupByKeyOperation operation = new PartialGroupByKeyOperation(
         instruction.getSystemName(),
         new WindowingCoderGroupingKeyCreator<>(keyCoder),
         new CoderSizeEstimator<>(WindowedValue.getValueOnlyCoder(keyCoder)),
         new CoderSizeEstimator<>(valueCoder), 0.001 /*sizeEstimatorSampleRate*/, valueCombiner,
-        PairInfo.create(), receivers, counterPrefix, addCounterMutator, stateSampler);
+        pairInfo, receivers, counterPrefix, addCounterMutator, stateSampler);
 
     attachInput(operation, pgbk.getInput(), priorOperations);
 
     return operation;
   }
 
-  static ValueCombiner<?, ?, ?, ?> createValueCombiner(PartialGroupByKeyInstruction pgbk)
-      throws Exception {
-    if (pgbk.getValueCombiningFn() == null) {
-      return null;
-    }
-
-    Object deserializedFn = SerializableUtils.deserializeFromByteArray(
-        getBytes(CloudObject.fromSpec(pgbk.getValueCombiningFn()), PropertyNames.SERIALIZED_FN),
-        "serialized combine fn");
-    AppliedCombineFn<?, ?, ?, ?> appliedCombineFn = (AppliedCombineFn<?, ?, ?, ?>) deserializedFn;
-    checkArgument(
-        !(appliedCombineFn.getFn() instanceof RequiresContextInternal),
-        "Combiner lifting is not supported for combine functions with contexts: %s",
-        appliedCombineFn.getFn().getClass().getName());
-    return new ValueCombiner<>(((KeyedCombineFn<?, ?, ?, ?>) appliedCombineFn.getFn()));
-  }
-
   /**
    * Implements PGBKOp.Combiner via Combine.KeyedCombineFn.
    */
   public static class ValueCombiner<K, InputT, AccumT, OutputT>
       implements PartialGroupByKeyOperation.Combiner<WindowedValue<K>, InputT, AccumT, OutputT> {
-    private final Combine.KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn;
-
-    private ValueCombiner(Combine.KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
+    private final PerKeyCombineFnRunner<K, InputT, AccumT, OutputT> combineFn;
+    private final SideInputReader sideInputReader;
+    private final PipelineOptions options;
+
+    private ValueCombiner(
+        PerKeyCombineFnRunner<K, InputT, AccumT, OutputT> combineFn,
+        SideInputReader sideInputReader,
+        PipelineOptions options) {
       this.combineFn = combineFn;
+      this.sideInputReader = sideInputReader;
+      this.options = options;
     }
 
     @Override
     public AccumT createAccumulator(WindowedValue<K> windowedKey) {
-      return this.combineFn.createAccumulator(windowedKey.getValue());
+      return this.combineFn.createAccumulator(windowedKey.getValue(),
+          options, sideInputReader, windowedKey.getWindows());
     }
 
     @Override
     public AccumT add(WindowedValue<K> windowedKey, AccumT accumulator, InputT value) {
-      return this.combineFn.addInput(windowedKey.getValue(), accumulator, value);
+      return this.combineFn.addInput(windowedKey.getValue(), accumulator, value,
+          options, sideInputReader, windowedKey.getWindows());
     }
 
     @Override
     public AccumT merge(WindowedValue<K> windowedKey, Iterable<AccumT> accumulators) {
-      return this.combineFn.mergeAccumulators(windowedKey.getValue(), accumulators);
+      return this.combineFn.mergeAccumulators(windowedKey.getValue(), accumulators,
+          options, sideInputReader, windowedKey.getWindows());
     }
 
     @Override
     public AccumT compact(WindowedValue<K> windowedKey, AccumT accumulator) {
-      return this.combineFn.compact(windowedKey.getValue(), accumulator);
+      return this.combineFn.compact(windowedKey.getValue(), accumulator,
+          options, sideInputReader, windowedKey.getWindows());
     }
 
     @Override
     public OutputT extract(WindowedValue<K> windowedKey, AccumT accumulator) {
-      return this.combineFn.extractOutput(windowedKey.getValue(), accumulator);
+      return this.combineFn.extractOutput(windowedKey.getValue(), accumulator,
+          options, sideInputReader, windowedKey.getWindows());
     }
   }
 
@@ -382,10 +396,46 @@ public static PairInfo create() {
     }
     private PairInfo() {}
     @Override
-    public Object getKeyFromInputPair(Object pair) {
+    public Iterable<Object> getKeysFromInputPair(Object pair) {
+      @SuppressWarnings("unchecked")
+      WindowedValue<KV<?, ?>> windowedKv = (WindowedValue<KV<?, ?>>) pair;
+      return ImmutableList.<Object>of(windowedKv.withValue(windowedKv.getValue().getKey()));
+    }
+    @Override
+    public Object getValueFromInputPair(Object pair) {
       @SuppressWarnings("unchecked")
       WindowedValue<KV<?, ?>> windowedKv = (WindowedValue<KV<?, ?>>) pair;
-      return windowedKv.withValue(windowedKv.getValue().getKey());
+      return windowedKv.getValue().getValue();
+    }
+    @Override
+    public Object makeOutputPair(Object key, Object values) {
+      WindowedValue<?> windowedKey = (WindowedValue<?>) key;
+      return windowedKey.withValue(KV.of(windowedKey.getValue(), values));
+    }
+  }
+
+  /**
+   * Implements windows expanding PGBKOp.PairInfo via KVs.
+   */
+  public static class WindowsExpandingPairInfo implements PartialGroupByKeyOperation.PairInfo {
+    private static WindowsExpandingPairInfo theInstance = new WindowsExpandingPairInfo();
+    public static WindowsExpandingPairInfo create() {
+      return theInstance;
+    }
+    private WindowsExpandingPairInfo() {}
+    @Override
+    public Iterable<Object> getKeysFromInputPair(Object pair) {
+      @SuppressWarnings("unchecked")
+      final WindowedValue<KV<?, ?>> windowedKv = (WindowedValue<KV<?, ?>>) pair;
+      Preconditions.checkArgument(!windowedKv.getWindows().isEmpty());
+      return Iterables.transform(windowedKv.getWindows(),
+          new Function<BoundedWindow, Object>() {
+            @Override
+            public Object apply(BoundedWindow window) {
+              return WindowedValue.of(windowedKv.getValue().getKey(),
+                  windowedKv.getTimestamp(), window, windowedKv.getPane());
+            }
+      });
     }
     @Override
     public Object getValueFromInputPair(Object pair) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunner.java
index 295d20ef2282a..516525a79598a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunner.java
@@ -15,10 +15,14 @@
  */
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.CombineFnBase.PerKeyCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation;
 
 import java.io.Serializable;
+import java.util.Collection;
 
 /**
  * An interface that runs a {@link PerKeyCombineFn} with unified APIs.
@@ -38,7 +42,7 @@ public interface PerKeyCombineFnRunner<K, InputT, AccumT, OutputT> extends Seria
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * Forwards the call to a keyed combine function to create accumulator in a {@link DoFn}.
+   * Forwards the call to a {@link PerKeyCombineFn} to create the accumulator in a {@link DoFn}.
    *
    * <p>It constructs a {@code CombineWithContext.Context} from {@code DoFn.ProcessContext}
    * if it is required.
@@ -46,7 +50,7 @@ public interface PerKeyCombineFnRunner<K, InputT, AccumT, OutputT> extends Seria
   public AccumT createAccumulator(K key, DoFn<?, ?>.ProcessContext c);
 
   /**
-   * Forwards the call to a keyed combine function to add input in a {@link DoFn}.
+   * Forwards the call to a {@link PerKeyCombineFn} to add the input in a {@link DoFn}.
    *
    * <p>It constructs a {@code CombineWithContext.Context} from {@code DoFn.ProcessContext}
    * if it is required.
@@ -54,7 +58,7 @@ public interface PerKeyCombineFnRunner<K, InputT, AccumT, OutputT> extends Seria
   public AccumT addInput(K key, AccumT accumulator, InputT input, DoFn<?, ?>.ProcessContext c);
 
   /**
-   * Forwards the call to a keyed combine function to merge accumulators in a {@link DoFn}.
+   * Forwards the call to a {@link PerKeyCombineFn} to merge accumulators in a {@link DoFn}.
    *
    * <p>It constructs a {@code CombineWithContext.Context} from {@code DoFn.ProcessContext}
    * if it is required.
@@ -63,7 +67,7 @@ public AccumT mergeAccumulators(
       K key, Iterable<AccumT> accumulators, DoFn<?, ?>.ProcessContext c);
 
   /**
-   * Forwards the call to a keyed combine function to extract output in a {@link DoFn}.
+   * Forwards the call to a {@link PerKeyCombineFn} to extract the output in a {@link DoFn}.
    *
    * <p>It constructs a {@code CombineWithContext.Context} from {@code DoFn.ProcessContext}
    * if it is required.
@@ -71,7 +75,7 @@ public AccumT mergeAccumulators(
   public OutputT extractOutput(K key, AccumT accumulator, DoFn<?, ?>.ProcessContext c);
 
   /**
-   * Forwards the call to a keyed combine function to compact the accumulator in a {@link DoFn}.
+   * Forwards the call to a {@link PerKeyCombineFn} to compact the accumulator in a {@link DoFn}.
    *
    * <p>It constructs a {@code CombineWithContext.Context} from {@code DoFn.ProcessContext}
    * if it is required.
@@ -79,7 +83,7 @@ public AccumT mergeAccumulators(
   public AccumT compact(K key, AccumT accumulator, DoFn<?, ?>.ProcessContext c);
 
   /**
-   * Forwards the call to a keyed combine function to combine the inputs and extract output
+   * Forwards the call to a {@link PerKeyCombineFn} to combine the inputs and extract output
    * in a {@link DoFn}.
    *
    * <p>It constructs a {@code CombineWithContext.Context} from {@code DoFn.ProcessContext}
@@ -88,10 +92,62 @@ public AccumT mergeAccumulators(
   public OutputT apply(K key, Iterable<? extends InputT> inputs, DoFn<?, ?>.ProcessContext c);
 
   /**
-   * Forwards the call to a keyed combine function to add all inputs in a {@link DoFn}.
+   * Forwards the call to a {@link PerKeyCombineFn} to add all inputs in a {@link DoFn}.
    *
    * <p>It constructs a {@code CombineWithContext.Context} from {@code DoFn.ProcessContext}
    * if it is required.
    */
   public AccumT addInputs(K key, Iterable<InputT> inputs, DoFn<?, ?>.ProcessContext c);
+
+  /////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Forwards the call to a {@link PerKeyCombineFn} to create the accumulator
+   * in a {@link PartialGroupByKeyOperation}.
+   *
+   * <p>It constructs a {@code CombineWithContext.Context} from
+   * {@link PipelineOptions} and {@link SideInputReader} if it is required.
+   */
+  public AccumT createAccumulator(K key, PipelineOptions options,
+      SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows);
+
+  /**
+   * Forwards the call to a {@link PerKeyCombineFn} to add the input
+   * in a {@link PartialGroupByKeyOperation}.
+   *
+   * <p>It constructs a {@code CombineWithContext.Context} from
+   * {@link PipelineOptions} and {@link SideInputReader} if it is required.
+   */
+  public AccumT addInput(K key, AccumT accumulator, InputT value, PipelineOptions options,
+      SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows);
+
+  /**
+   * Forwards the call to a {@link PerKeyCombineFn} to merge accumulators
+   * in a {@link PartialGroupByKeyOperation}.
+   *
+   * <p>It constructs a {@code CombineWithContext.Context} from
+   * {@link PipelineOptions} and {@link SideInputReader} if it is required.
+   */
+  public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators, PipelineOptions options,
+      SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows);
+
+  /**
+   * Forwards the call to a {@link PerKeyCombineFn} to extract the output
+   * in a {@link PartialGroupByKeyOperation}.
+   *
+   * <p>It constructs a {@code CombineWithContext.Context} from
+   * {@link PipelineOptions} and {@link SideInputReader} if it is required.
+   */
+  public OutputT extractOutput(K key, AccumT accumulator, PipelineOptions options,
+      SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows);
+
+  /**
+   * Forwards the call to a {@link PerKeyCombineFn} to compact the accumulator
+   * in a {@link PartialGroupByKeyOperation}.
+   *
+   * <p>It constructs a {@code CombineWithContext.Context} from
+   * {@link PipelineOptions} and {@link SideInputReader} if it is required.
+   */
+  public AccumT compact(K key, AccumT accumulator, PipelineOptions options,
+      SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunners.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunners.java
index b574632633358..6606c5451f7e9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunners.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunners.java
@@ -15,12 +15,17 @@
  */
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.CombineFnBase.PerKeyCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.CombineWithContext;
 import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
 import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.RequiresContextInternal;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.common.collect.Iterables;
+
+import java.util.Collection;
 
 /**
  * Static utility methods that provide {@link PerKeyCombineFnRunner} implementations
@@ -105,6 +110,36 @@ public AccumT addInputs(K key, Iterable<InputT> inputs, DoFn<?, ?>.ProcessContex
     public String toString() {
       return keyedCombineFn.toString();
     }
+
+    @Override
+    public AccumT createAccumulator(K key, PipelineOptions options,
+        SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows) {
+      return keyedCombineFn.createAccumulator(key);
+    }
+
+    @Override
+    public AccumT addInput(K key, AccumT accumulator, InputT input, PipelineOptions options,
+        SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows) {
+      return keyedCombineFn.addInput(key, accumulator, input);
+    }
+
+    @Override
+    public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators, PipelineOptions options,
+        SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows) {
+      return keyedCombineFn.mergeAccumulators(key, accumulators);
+    }
+
+    @Override
+    public OutputT extractOutput(K key, AccumT accumulator, PipelineOptions options,
+        SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows) {
+      return keyedCombineFn.extractOutput(key, accumulator);
+    }
+
+    @Override
+    public AccumT compact(K key, AccumT accumulator, PipelineOptions options,
+        SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows) {
+      return keyedCombineFn.compact(key, accumulator);
+    }
   }
 
   /**
@@ -178,5 +213,45 @@ public AccumT addInputs(K key, Iterable<InputT> inputs, DoFn<?, ?>.ProcessContex
     public String toString() {
       return keyedCombineFnWithContext.toString();
     }
+
+    @Override
+    public AccumT createAccumulator(K key, PipelineOptions options, SideInputReader sideInputReader,
+        Collection<? extends BoundedWindow> windows) {
+      return keyedCombineFnWithContext.createAccumulator(key,
+        CombineContextFactory.createFromComponents(
+          options, sideInputReader, Iterables.getOnlyElement(windows)));
+    }
+
+    @Override
+    public AccumT addInput(K key, AccumT accumulator, InputT input, PipelineOptions options,
+        SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows) {
+      return keyedCombineFnWithContext.addInput(key, accumulator, input,
+        CombineContextFactory.createFromComponents(
+          options, sideInputReader, Iterables.getOnlyElement(windows)));
+    }
+
+    @Override
+    public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators, PipelineOptions options,
+        SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows) {
+      return keyedCombineFnWithContext.mergeAccumulators(key, accumulators,
+        CombineContextFactory.createFromComponents(
+          options, sideInputReader, Iterables.getOnlyElement(windows)));
+    }
+
+    @Override
+    public OutputT extractOutput(K key, AccumT accumulator, PipelineOptions options,
+        SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows) {
+      return keyedCombineFnWithContext.extractOutput(key, accumulator,
+        CombineContextFactory.createFromComponents(
+          options, sideInputReader, Iterables.getOnlyElement(windows)));
+    }
+
+    @Override
+    public AccumT compact(K key, AccumT accumulator, PipelineOptions options,
+        SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows) {
+      return keyedCombineFnWithContext.compact(key, accumulator,
+        CombineContextFactory.createFromComponents(
+          options, sideInputReader, Iterables.getOnlyElement(windows)));
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
index 5f8f9d32fe108..cbd4f567992f6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
@@ -51,7 +51,7 @@ public static interface SizeEstimator<T> {
    * that are key/value or key/values pairs.
    */
   public interface PairInfo {
-    public Object getKeyFromInputPair(Object pair);
+    public Iterable<Object> getKeysFromInputPair(Object pair);
     public Object getValueFromInputPair(Object pair);
     public Object makeOutputPair(Object key, Object value);
   }
@@ -310,9 +310,11 @@ interface GroupingTableEntry<K, InputT, AccumT> {
      */
     @SuppressWarnings("unchecked")
     public void put(Object pair, Receiver receiver) throws Exception {
-      put((K) pairInfo.getKeyFromInputPair(pair),
-          (InputT) pairInfo.getValueFromInputPair(pair),
-          receiver);
+      for (Object key : pairInfo.getKeysFromInputPair(pair)) {
+        put((K) key,
+            (InputT) pairInfo.getValueFromInputPair(pair),
+            receiver);
+      }
     }
 
     /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index f8799bd9afd48..4dad6be28b2d2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -20,6 +20,8 @@
 import static com.google.cloud.dataflow.sdk.runners.worker.DataflowOutputCounter.getMeanByteCounterName;
 import static com.google.cloud.dataflow.sdk.runners.worker.DataflowOutputCounter.getObjectCounterName;
 import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
+import static com.google.cloud.dataflow.sdk.util.SerializableUtils.serializeToByteArray;
+import static com.google.cloud.dataflow.sdk.util.StringUtils.byteArrayToJsonString;
 import static com.google.cloud.dataflow.sdk.util.Structs.addString;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
@@ -40,6 +42,9 @@
 import com.google.api.services.dataflow.model.ReadInstruction;
 import com.google.api.services.dataflow.model.Source;
 import com.google.api.services.dataflow.model.WriteInstruction;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
@@ -49,7 +54,9 @@
 import com.google.cloud.dataflow.sdk.runners.worker.SinkFactoryTest.TestSink;
 import com.google.cloud.dataflow.sdk.runners.worker.SinkFactoryTest.TestSinkFactory;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
 import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.DoFnInfo;
@@ -512,6 +519,50 @@ public void testCreatePartialGroupByKeyOperation() throws Exception {
         priorOperations.get(producerIndex).receivers[producerOutputNum].getOnlyReceiver());
   }
 
+  @Test
+  public void testCreatePartialGroupByKeyOperationWithCombine() throws Exception {
+    List<Operation> priorOperations = Arrays.asList(
+        new Operation[] {new TestOperation(3), new TestOperation(5), new TestOperation(1)});
+
+    int producerIndex = 1;
+    int producerOutputNum = 2;
+
+    ParallelInstruction instruction =
+        createPartialGroupByKeyInstruction(producerIndex, producerOutputNum);
+
+    AppliedCombineFn<?, ?, ?, ?> combineFn = AppliedCombineFn.withInputCoder(
+        new Sum.SumIntegerFn().asKeyedFn(), new CoderRegistry(),
+        KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
+    CloudObject cloudCombineFn = CloudObject.forClassName("CombineFn");
+    addString(cloudCombineFn, PropertyNames.SERIALIZED_FN,
+        byteArrayToJsonString(serializeToByteArray(combineFn)));
+    instruction.getPartialGroupByKey().setValueCombiningFn(cloudCombineFn);
+
+    CounterSet counterSet = new CounterSet();
+    String counterPrefix = "test-";
+    String systemStageName = "stageName";
+    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
+    Operation operation = MapTaskExecutorFactory.createOperation(
+        options,
+        instruction,
+        readerFactoryRegistry,
+        BatchModeExecutionContext.fromOptions(options),
+        priorOperations,
+        counterPrefix,
+        systemStageName,
+        counterSet.getAddCounterMutator(),
+        stateSampler);
+    assertThat(operation, instanceOf(PartialGroupByKeyOperation.class));
+    PartialGroupByKeyOperation pgbkOperation = (PartialGroupByKeyOperation) operation;
+
+    assertEquals(pgbkOperation.receivers.length, 1);
+    assertEquals(pgbkOperation.receivers[0].getReceiverCount(), 0);
+    assertEquals(pgbkOperation.initializationState, Operation.InitializationState.UNSTARTED);
+    assertSame(
+        pgbkOperation,
+        priorOperations.get(producerIndex).receivers[producerOutputNum].getOnlyReceiver());
+  }
+
   static ParallelInstruction createFlattenInstruction(int producerIndex1, int producerOutputNum1,
       int producerIndex2, int producerOutputNum2, String systemName) {
     List<InstructionInput> cloudInputs = new ArrayList<>();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
index 496632ba9a5fc..bf5bf9b541c45 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
@@ -37,6 +37,7 @@
 import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.ElementByteSizeObservableCoder;
 import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.PairInfo;
 import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.WindowingCoderGroupingKeyCreator;
+import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.WindowsExpandingPairInfo;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.Counter;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -47,6 +48,7 @@
 import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.SamplingSizeEstimator;
 import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.SizeEstimator;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.collect.ImmutableList;
 
 import org.hamcrest.Description;
 import org.hamcrest.TypeSafeDiagnosingMatcher;
@@ -181,7 +183,7 @@ public Integer extract(WindowedValue<String> key, Integer accumulator) {
             new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)),
             new CoderSizeEstimator(valueCoder),
             combineFn,
-            PairInfo.create(),
+            WindowsExpandingPairInfo.create(),
             receiver,
             counterPrefix,
             counterSet.getAddCounterMutator(),
@@ -262,8 +264,8 @@ public long estimateSize(String element) {
 
   private static class KvPairInfo implements PartialGroupByKeyOperation.PairInfo {
     @Override
-    public Object getKeyFromInputPair(Object pair) {
-      return ((KV<?, ?>) pair).getKey();
+    public Iterable<Object> getKeysFromInputPair(Object pair) {
+      return ImmutableList.of(((KV<Object, ?>) pair).getKey());
     }
     @Override
     public Object getValueFromInputPair(Object pair) {

From e5952815302de10e1979fcdd398ab71b73a15249 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 2 Feb 2016 15:19:19 -0800
Subject: [PATCH 1375/1541] Fix TimeTrigger#isCompatible

Previously, TimeTrigger#isCompatible was true only when the time
transformation functions were the same object, not merely equal
transformations. Now when they are created via the primary builder
methods, equality is decidable and compatibility is more useful.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113683351
---
 .../sdk/transforms/windowing/TimeTrigger.java | 103 ++++++++++++++----
 .../windowing/AfterProcessingTimeTest.java    |  12 ++
 2 files changed, 93 insertions(+), 22 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
index 72f00b7ed8c22..0263c859008a6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
@@ -30,6 +30,7 @@
 import org.joda.time.Instant;
 
 import java.util.List;
+import java.util.Objects;
 
 /**
  * Support for manipulating the time at which time-based {@link Trigger}s fire.
@@ -76,16 +77,7 @@ protected Instant computeTargetTimestamp(Instant time) {
    * @return An updated time trigger that will wait the additional time before firing.
    */
   public TimeTrigger<W> plusDelayOf(final Duration delay) {
-    return newWith(delayFn(delay));
-  }
-
-  private static SerializableFunction<Instant, Instant> delayFn(final Duration delay) {
-    return new SerializableFunction<Instant, Instant>() {
-      @Override
-      public Instant apply(Instant input) {
-        return input.plus(delay);
-      }
-    };
+    return newWith(new DelayFn(delay));
   }
 
   /**
@@ -96,18 +88,7 @@ public Instant apply(Instant input) {
    * CalendarWindows.
    */
   public TimeTrigger<W> alignedTo(final Duration size, final Instant offset) {
-    return newWith(alignFn(size, offset));
-  }
-
-  private static SerializableFunction<Instant, Instant> alignFn(
-      final Duration size, final Instant offset) {
-    return new SerializableFunction<Instant, Instant>() {
-      @Override
-      public Instant apply(Instant point) {
-        long millisSinceStart = new Duration(offset, point).getMillis() % size.getMillis();
-        return millisSinceStart == 0 ? point : point.plus(size).minus(millisSinceStart);
-      }
-    };
+    return newWith(new AlignFn(size, offset));
   }
 
   /**
@@ -161,4 +142,82 @@ private TimeTrigger<W> newWith(SerializableFunction<Instant, Instant> timestampM
    * @return a new {@code TimeTrigger}.
    */
   protected abstract TimeTrigger<W> newWith(List<SerializableFunction<Instant, Instant>> transform);
+
+  /**
+   * A {@link SerializableFunction} to delay the timestamp at which this triggers fires.
+   */
+  private static final class DelayFn implements SerializableFunction<Instant, Instant> {
+    private final Duration delay;
+
+    public DelayFn(Duration delay) {
+      this.delay = delay;
+    }
+
+    @Override
+    public Instant apply(Instant input) {
+      return input.plus(delay);
+    }
+
+    @Override
+    public boolean equals(Object object) {
+      if (object == this) {
+        return true;
+      }
+
+      if (!(object instanceof DelayFn)) {
+        return false;
+      }
+
+      return this.delay.equals(((DelayFn) object).delay);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(delay);
+    }
+  }
+
+  /**
+   * A {@link SerializableFunction} to align an instant to the nearest interval boundary.
+   */
+  private static final class AlignFn implements SerializableFunction<Instant, Instant> {
+    private final Duration size;
+    private final Instant offset;
+
+
+    /**
+     * Aligns timestamps to the smallest multiple of {@code size} since the {@code offset} greater
+     * than the timestamp.
+     */
+    public AlignFn(Duration size, Instant offset) {
+      this.size = size;
+      this.offset = offset;
+    }
+
+    @Override
+    public Instant apply(Instant point) {
+      long millisSinceStart = new Duration(offset, point).getMillis() % size.getMillis();
+      return millisSinceStart == 0 ? point : point.plus(size).minus(millisSinceStart);
+    }
+
+    @Override
+    public boolean equals(Object object) {
+      if (object == this) {
+        return true;
+      }
+
+      if (!(object instanceof AlignFn)) {
+        return false;
+      }
+
+      AlignFn other = (AlignFn) object;
+      return other.size.equals(this.size)
+          && other.offset.equals(this.offset);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(size, offset);
+    }
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
index 013c5395bbf03..0e35fe221f7cc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
@@ -141,4 +141,16 @@ public void testContinuation() throws Exception {
         new AfterSynchronizedProcessingTime<>(),
         firstElementPlus1.getContinuationTrigger());
   }
+
+  /**
+   * Basic test of compatibility check between identical triggers.
+   */
+  @Test
+  public void testCompatibilityIdentical() throws Exception {
+    Trigger<?> t1 = AfterProcessingTime.pastFirstElementInPane()
+            .plusDelayOf(Duration.standardMinutes(1L));
+    Trigger<?> t2 = AfterProcessingTime.pastFirstElementInPane()
+            .plusDelayOf(Duration.standardMinutes(1L));
+    assertTrue(t1.isCompatible(t2));
+  }
 }

From 4942eeff68edbc64f2b431f44ad6174659df63f6 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Tue, 2 Feb 2016 15:59:43 -0800
Subject: [PATCH 1376/1541] Move StructuralByteArray to the coder package

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113687235
---
 .../dataflow/sdk/coders/ByteArrayCoder.java   |  1 -
 .../dataflow/sdk/coders/StandardCoder.java    |  1 -
 .../sdk/coders/StructuralByteArray.java       | 56 +++++++++++++++++++
 .../worker/PartialGroupByKeyOperation.java    | 39 -------------
 .../sdk/coders/StructuralByteArrayTest.java   | 39 +++++++++++++
 5 files changed, 95 insertions(+), 41 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StructuralByteArray.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StructuralByteArrayTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
index 18182688d7e96..1e555c67bf625 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
@@ -19,7 +19,6 @@
 import com.google.cloud.dataflow.sdk.util.ExposedByteArrayOutputStream;
 import com.google.cloud.dataflow.sdk.util.StreamUtils;
 import com.google.cloud.dataflow.sdk.util.VarInt;
-import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.StructuralByteArray;
 import com.google.common.io.ByteStreams;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
index ad5551224a9a3..faa98619ecdfe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
@@ -24,7 +24,6 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.StructuralByteArray;
 import com.google.common.collect.Lists;
 import com.google.common.io.ByteStreams;
 import com.google.common.io.CountingOutputStream;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StructuralByteArray.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StructuralByteArray.java
new file mode 100644
index 0000000000000..ea18eb971a9ee
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StructuralByteArray.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.coders;
+
+import static com.google.api.client.util.Base64.encodeBase64String;
+
+import java.util.Arrays;
+
+/**
+ * A wrapper around a byte[] that uses structural, value-based
+ * equality rather than byte[]'s normal object identity.
+ */
+public class StructuralByteArray {
+  byte[] value;
+
+  public StructuralByteArray(byte[] value) {
+    this.value = value;
+  }
+
+  public byte[] getValue() {
+    return value;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (o instanceof StructuralByteArray) {
+      StructuralByteArray that = (StructuralByteArray) o;
+      return Arrays.equals(this.value, that.value);
+    } else {
+      return false;
+    }
+  }
+
+  @Override
+  public int hashCode() {
+    return Arrays.hashCode(value);
+  }
+
+  @Override
+  public String toString() {
+    return "base64:" + encodeBase64String(value);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
index cbd4f567992f6..7004eac287df6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
@@ -16,12 +16,9 @@
 
 package com.google.cloud.dataflow.sdk.util.common.worker;
 
-import static com.google.api.client.util.Base64.encodeBase64String;
-
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
@@ -67,42 +64,6 @@ public interface Combiner<K, InputT, AccumT, OutputT> {
     public OutputT extract(K key, AccumT accumulator);
   }
 
-  /**
-   * A wrapper around a byte[] that uses structural, value-based
-   * equality rather than byte[]'s normal object identity.
-   */
-  public static class StructuralByteArray {
-    byte[] value;
-
-    public StructuralByteArray(byte[] value) {
-      this.value = value;
-    }
-
-    public byte[] getValue() {
-      return value;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (o instanceof StructuralByteArray) {
-        StructuralByteArray that = (StructuralByteArray) o;
-        return Arrays.equals(this.value, that.value);
-      } else {
-        return false;
-      }
-    }
-
-    @Override
-    public int hashCode() {
-      return Arrays.hashCode(value);
-    }
-
-    @Override
-    public String toString() {
-      return "base64:" + encodeBase64String(value);
-    }
-  }
-
   // By default, how many bytes we allow the grouping table to consume before
   // it has to be flushed.
   static final long DEFAULT_MAX_GROUPING_TABLE_BYTES = 100_000_000L;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StructuralByteArrayTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StructuralByteArrayTest.java
new file mode 100644
index 0000000000000..8f8cd8cb081f7
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/StructuralByteArrayTest.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.coders;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link StructuralByteArray}.
+ */
+@RunWith(JUnit4.class)
+public final class StructuralByteArrayTest {
+
+  @Test
+  public void testStructuralByteArray() throws Exception {
+    assertEquals(
+        new StructuralByteArray("test string".getBytes()),
+        new StructuralByteArray("test string".getBytes()));
+    assertFalse(new StructuralByteArray("test string".getBytes()).equals(
+        new StructuralByteArray("diff string".getBytes())));
+  }
+}

From 5f06ee585d5c7b8d3db75f33707ccd0fc7aad58e Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 2 Feb 2016 16:04:19 -0800
Subject: [PATCH 1377/1541] AvroUtils: default missing field schema mode to
 NULLABLE, and test

Per https://cloud.google.com/bigquery/docs/reference/v2/tables#schema

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113687733
---
 .../google/cloud/dataflow/sdk/util/AvroUtils.java    | 10 +++++++++-
 .../cloud/dataflow/sdk/util/AvroUtilsTest.java       | 12 ++++++++++--
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AvroUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AvroUtils.java
index 335e529873f07..c3a486102e9db 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AvroUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AvroUtils.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static com.google.common.base.MoreObjects.firstNonNull;
 import static com.google.common.base.Preconditions.checkNotNull;
 import static com.google.common.base.Verify.verify;
 
@@ -201,6 +202,8 @@ private static TableRow convertGenericRecordToTableRow(
       GenericRecord record, List<TableFieldSchema> fields) {
     TableRow row = new TableRow();
     for (TableFieldSchema subSchema : fields) {
+      // Per https://cloud.google.com/bigquery/docs/reference/v2/tables#schema, the name field
+      // is required, so it may not be null.
       Field field = record.getSchema().getField(subSchema.getName());
       Object convertedValue =
           getTypedCellValue(field.schema(), subSchema, record.get(field.name()));
@@ -214,7 +217,10 @@ private static TableRow convertGenericRecordToTableRow(
 
   @Nullable
   private static Object getTypedCellValue(Schema schema, TableFieldSchema fieldSchema, Object v) {
-    switch (fieldSchema.getMode()) {
+    // Per https://cloud.google.com/bigquery/docs/reference/v2/tables#schema, the mode field
+    // is optional (and so it may be null), but defaults to "NULLABLE".
+    String mode = firstNonNull(fieldSchema.getMode(), "NULLABLE");
+    switch (mode) {
       case "REQUIRED":
         return convertRequiredField(schema.getType(), fieldSchema, v);
       case "REPEATED":
@@ -264,6 +270,8 @@ private static Object convertRequiredField(
             .put("TIMESTAMP", Type.LONG)
             .put("RECORD", Type.RECORD)
             .build();
+    // Per https://cloud.google.com/bigquery/docs/reference/v2/tables#schema, the type field
+    // is required, so it may not be null.
     String bqType = fieldSchema.getType();
     Type expectedAvroType = fieldMap.get(bqType);
     verify(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AvroUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AvroUtilsTest.java
index 28bd98adc7352..d03ac89eab7f2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AvroUtilsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/AvroUtilsTest.java
@@ -112,12 +112,20 @@ public void testConvertGenericRecordToTableRow() throws Exception {
     TableSchema tableSchema = new TableSchema();
     List<TableFieldSchema> subFields = Lists.<TableFieldSchema>newArrayList(
         new TableFieldSchema().setName("species").setType("STRING").setMode("NULLABLE"));
+    /*
+     * Note that the quality and quantity fields do not have their mode set, so they should default
+     * to NULLABLE. This is an important test of BigQuery semantics.
+     *
+     * All the other fields we set in this function are required on the Schema response.
+     *
+     * See https://cloud.google.com/bigquery/docs/reference/v2/tables#schema
+     */
     List<TableFieldSchema> fields =
         Lists.<TableFieldSchema>newArrayList(
             new TableFieldSchema().setName("number").setType("INTEGER").setMode("REQUIRED"),
             new TableFieldSchema().setName("species").setType("STRING").setMode("NULLABLE"),
-            new TableFieldSchema().setName("quality").setType("FLOAT").setMode("NULLABLE"),
-            new TableFieldSchema().setName("quantity").setType("INTEGER").setMode("NULLABLE"),
+            new TableFieldSchema().setName("quality").setType("FLOAT") /* default to NULLABLE */,
+            new TableFieldSchema().setName("quantity").setType("INTEGER") /* default to NULLABLE */,
             new TableFieldSchema().setName("birthday").setType("TIMESTAMP").setMode("NULLABLE"),
             new TableFieldSchema().setName("flighted").setType("BOOLEAN").setMode("NULLABLE"),
             new TableFieldSchema().setName("scion").setType("RECORD").setMode("NULLABLE")

From d476d5b76109bf3c20e05dfe3155d7520d335e4a Mon Sep 17 00:00:00 2001
From: hdeist <hdeist@google.com>
Date: Tue, 2 Feb 2016 16:14:43 -0800
Subject: [PATCH 1378/1541] Add ByteCount for BigQueryIO streaming write

Currently a system Counter called ByteCount is generated
when writing to BigQuery in non-streaming mode, but not
in streaming mode. This change uses an Aggregator in
BigQueryTableInserter to track bytes streamed to BigQuery
and expose it as the ByteCount Counter.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113688829
---
 .../cloud/dataflow/sdk/io/BigQueryIO.java     | 10 ++++-
 .../sdk/util/BigQueryTableInserter.java       | 16 +++++---
 .../dataflow/sdk/util/BigQueryUtilTest.java   | 38 ++++++++++++++++++-
 3 files changed, 56 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
index 6a56a81a989e7..ab7df6f081471 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
@@ -37,15 +37,18 @@
 import com.google.cloud.dataflow.sdk.options.GcpOptions;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.BigQueryTableInserter;
 import com.google.cloud.dataflow.sdk.util.BigQueryTableRowIterator;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.Reshuffle;
+import com.google.cloud.dataflow.sdk.util.SystemDoFnInternal;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
@@ -1053,6 +1056,7 @@ private static void verifyTablePresence(BigQueryOptions options, TableReference
   /**
    * Implementation of DoFn to perform streaming BigQuery write.
    */
+  @SystemDoFnInternal
   private static class StreamingWriteFn
       extends DoFn<KV<ShardedKey<String>, TableRowInfo>, Void> {
     /** TableSchema in JSON. Use String to make the class Serializable. */
@@ -1069,6 +1073,10 @@ private static class StreamingWriteFn
     private static Set<String> createdTables =
         Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>());
 
+    /** Tracks bytes written, exposed as "ByteCount" Counter. */
+    private Aggregator<Long, Long> byteCountAggregator =
+        createAggregator("ByteCount", new Sum.SumLongFn());
+
     /** Constructor. */
     StreamingWriteFn(TableSchema schema) {
       try {
@@ -1138,7 +1146,7 @@ private void flushRows(Bigquery client, TableReference tableReference,
       if (!tableRows.isEmpty()) {
         try {
           BigQueryTableInserter inserter = new BigQueryTableInserter(client);
-          inserter.insertAll(tableReference, tableRows, uniqueIds);
+          inserter.insertAll(tableReference, tableRows, uniqueIds, byteCountAggregator);
         } catch (IOException e) {
           throw new RuntimeException(e);
         }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
index bcd972034aae4..cd51062756467 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
@@ -31,6 +31,7 @@
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.WriteDisposition;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.hadoop.util.ApiErrorExtractor;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
@@ -136,7 +137,7 @@ public BigQueryTableInserter(Bigquery client, TableReference defaultRef, int max
    */
   @Deprecated
   public void insertAll(List<TableRow> rowList) throws IOException {
-    insertAll(defaultRef, rowList, null);
+    insertAll(defaultRef, rowList, null, null);
   }
 
   /**
@@ -147,21 +148,23 @@ public void insertAll(List<TableRow> rowList) throws IOException {
   @Deprecated
   public void insertAll(List<TableRow> rowList,
       @Nullable List<String> insertIdList) throws IOException {
-    insertAll(defaultRef, rowList, insertIdList);
+    insertAll(defaultRef, rowList, insertIdList, null);
   }
 
   /**
    * Insert all rows from the given list.
    */
   public void insertAll(TableReference ref, List<TableRow> rowList) throws IOException {
-    insertAll(ref, rowList, null);
+    insertAll(ref, rowList, null, null);
   }
 
   /**
-   * Insert all rows from the given list using specified insertIds if not null.
+   * Insert all rows from the given list using specified insertIds if not null. Track count of
+   * bytes written with the Aggregator.
    */
   public void insertAll(TableReference ref, List<TableRow> rowList,
-      @Nullable List<String> insertIdList) throws IOException {
+      @Nullable List<String> insertIdList, Aggregator<Long, Long> byteCountAggregator)
+      throws IOException {
     Preconditions.checkNotNull(ref, "ref");
     if (insertIdList != null && rowList.size() != insertIdList.size()) {
       throw new AssertionError("If insertIdList is not null it needs to have at least "
@@ -217,6 +220,9 @@ public List<TableDataInsertAllResponse.InsertErrors> call() throws IOException {
               }));
           strideIndices.add(strideIndex);
 
+          if (byteCountAggregator != null) {
+            byteCountAggregator.addValue(Long.valueOf(dataSize));
+          }
           dataSize = 0;
           strideIndex = i + 1;
           rows = new LinkedList<>();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
index 99dad0f54131f..fab4aecb47081 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/BigQueryUtilTest.java
@@ -40,6 +40,9 @@
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.common.collect.ImmutableList;
 
 import org.hamcrest.Matchers;
@@ -432,14 +435,45 @@ public void testInsertAll() throws Exception, IOException {
     List<TableRow> rows = new ArrayList<>();
     List<String> ids = new ArrayList<>();
     for (int i = 0; i < 25; ++i) {
-      rows.add(new TableRow());
+      rows.add(rawRow("foo", 1234));
       ids.add(new String());
     }
 
+    InMemoryLongSumAggregator byteCountAggregator = new InMemoryLongSumAggregator("ByteCount");
     try {
-      inserter.insertAll(ref, rows, ids);
+      inserter.insertAll(ref, rows, ids, byteCountAggregator);
     } finally {
       verifyInsertAll(5);
+      // Each of the 25 rows is 23 bytes: "{f=[{v=foo}, {v=1234}]}"
+      assertEquals("Incorrect byte count", 25L * 23L, byteCountAggregator.getSum());
+    }
+  }
+
+  private static class InMemoryLongSumAggregator implements Aggregator<Long, Long> {
+    private final String name;
+    private long sum = 0;
+
+    public InMemoryLongSumAggregator(String name) {
+      this.name = name;
+    }
+
+    @Override
+    public void addValue(Long value) {
+      sum += value;
+    }
+
+    @Override
+    public String getName() {
+      return name;
+    }
+
+    @Override
+    public CombineFn<Long, ?, Long> getCombineFn() {
+      return new Sum.SumLongFn();
+    }
+
+    public long getSum() {
+      return sum;
     }
   }
 }

From 4dce3c25d4c4920cb8c369ee31fb22ec0b5cd045 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Tue, 2 Feb 2016 20:24:06 -0800
Subject: [PATCH 1379/1541] Default to exporting BigQuery files in Avro format

----Release Notes----

Greatly increases the efficiency of the BigQuery source by using Avro instead of JSON as an export format.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113705731
---
 .../dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java      | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
index d6856633dd885..538901c722c05 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
@@ -49,6 +49,7 @@ public void translate(
       // Actual translation.
       context.addStep(transform, "ParallelRead");
       context.addInput(PropertyNames.FORMAT, "bigquery");
+      context.addInput(PropertyNames.BIGQUERY_EXPORT_FORMAT, "FORMAT_AVRO");
 
       if (transform.getQuery() != null) {
         context.addInput(PropertyNames.BIGQUERY_QUERY, transform.getQuery());

From ad6060958127914e48b8670e8328b0217d3c886f Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 3 Feb 2016 17:00:02 -0800
Subject: [PATCH 1380/1541] Add InProcessPipelineRunner, Evaluation Interfaces

These interfaces are the requirements for a Streamier
DirectPipelineRunner.

Implement transform primitives

Implement InProcessCreate as a PTransform that uses a Bounded Source.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113798889
---
 sdk/pom.xml                                   |   5 +-
 .../inprocess/InProcessPipelineOptions.java   |  24 ++
 .../inprocess/InProcessPipelineRunner.java    | 241 ++++++++++++++++++
 .../inprocess/InProcessTransformResult.java   |  51 ++++
 .../runners/inprocess/TransformEvaluator.java |  45 ++++
 .../inprocess/TransformEvaluatorFactory.java  |  41 +++
 .../BoundedReadEvaluatorFactory.java          | 114 +++++++++
 .../inprocess/evaluator/EvaluatorKey.java     |  56 ++++
 .../evaluator/FlattenEvaluatorFactory.java    |  79 ++++++
 .../evaluator/GroupByKeyEvaluatorFactory.java | 164 ++++++++++++
 .../inprocess/evaluator/InProcessCreate.java  | 169 ++++++++++++
 .../evaluator/ParDoInProcessEvaluator.java    |  84 ++++++
 .../evaluator/ParDoMultiEvaluatorFactory.java | 104 ++++++++
 .../ParDoSingleEvaluatorFactory.java          |  99 +++++++
 .../UnboundedReadEvaluatorFactory.java        | 124 +++++++++
 .../evaluator/ViewEvaluatorFactory.java       |  73 ++++++
 .../ImmutableInProcessTransformResult.java    | 112 ++++++++
 .../inprocess/util/InProcessBundle.java       |  98 +++++++
 .../cloud/dataflow/sdk/transforms/ParDo.java  |   4 +
 .../BoundedReadEvaluatorFactoryTest.java      | 106 ++++++++
 .../FlattenEvaluatorFactoryTest.java          | 109 ++++++++
 .../GroupByKeyEvaluatorFactoryTest.java       | 155 +++++++++++
 .../evaluator/InProcessCreateTest.java        |  73 ++++++
 .../ParDoMultiEvaluatorFactoryTest.java       | 138 ++++++++++
 .../ParDoSingleEvaluatorFactoryTest.java      |  95 +++++++
 .../UnboundedReadEvaluatorFactoryTest.java    | 127 +++++++++
 .../evaluator/ViewEvaluatorFactoryTest.java   |  79 ++++++
 .../inprocess/util/InProcessBundleTest.java   | 102 ++++++++
 28 files changed, 2668 insertions(+), 3 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineOptions.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTransformResult.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluatorFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/EvaluatorKey.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/InProcessCreate.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoInProcessEvaluator.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactory.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/ImmutableInProcessTransformResult.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundle.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/InProcessCreateTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactoryTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundleTest.java

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 4e6cb6c7777d0..2c8b52b638f4e 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -214,15 +214,14 @@
       </plugin>
 
       <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-javadoc-plugin</artifactId>
+        <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-javadoc-plugin</artifactId>
         <configuration>
           <windowtitle>Google Cloud Dataflow SDK ${project.version} API</windowtitle>
           <doctitle>Google Cloud Dataflow SDK for Java, version ${project.version}</doctitle>
           <overview>../javadoc/overview.html</overview>
 
           <subpackages>com.google.cloud.dataflow.sdk</subpackages>
-          <additionalparam>-exclude com.google.cloud.dataflow.sdk.runners.worker:com.google.cloud.dataflow.sdk.runners.dataflow:com.google.cloud.dataflow.sdk.util ${dataflow.javadoc_opts}</additionalparam>
+          <additionalparam>-exclude com.google.cloud.dataflow.sdk.runners.worker:com.google.cloud.dataflow.sdk.runners.dataflow:com.google.cloud.dataflow.sdk.util:com.google.cloud.dataflow.sdk.runners.inprocess ${dataflow.javadoc_opts}</additionalparam>
           <use>false</use>
           <quiet>true</quiet>
           <bottom><![CDATA[<br>]]></bottom>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineOptions.java
new file mode 100644
index 0000000000000..d659d962f0e5b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineOptions.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+/**
+ * Options that can be used to configure the {@link InProcessPipelineRunner}.
+ */
+public interface InProcessPipelineOptions extends PipelineOptions {}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java
new file mode 100644
index 0000000000000..399d1c85a2ad6
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java
@@ -0,0 +1,241 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.SideInputReader;
+import com.google.cloud.dataflow.sdk.util.TimerInternals;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+import com.google.cloud.dataflow.sdk.util.state.StateInternals;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import javax.annotation.Nullable;
+
+/**
+ * An In-Memory implementation of the Dataflow Programming Model. Supports Unbounded
+ * {@link PCollection PCollections}.
+ */
+@Experimental
+public class InProcessPipelineRunner {
+  private static Map<Class<?>, TransformEvaluatorFactory> defaultEvaluatorFactories =
+      new ConcurrentHashMap<>();
+
+  /**
+   * Register a default transform evaluator.
+   */
+  public static <TransformT extends PTransform<?, ?>> void registerTransformEvaluatorFactory(
+      Class<TransformT> clazz, TransformEvaluatorFactory evaluator) {
+    checkArgument(defaultEvaluatorFactories.put(clazz, evaluator) == null,
+        "Defining a default factory %s to evaluate Transforms of type %s multiple times", evaluator,
+        clazz);
+  }
+
+  /**
+   * Part of a {@link PCollection}. Elements are output to a bundle, which will cause them to be
+   * executed by {@link PTransform PTransforms} that consume the {@link PCollection} this bundle is
+   * a part of at a later point.
+   * @param <T>
+   */
+  public static interface Bundle<T> {
+    /**
+     * Outputs an element to this bundle.
+     *
+     * @param element the element to add to this bundle
+     * @return this bundle
+     */
+    Bundle<T> add(WindowedValue<T> element);
+
+    /**
+     * @return the PCollection that the elements of this bundle belong to
+     */
+    PCollection<T> getPCollection();
+
+    /**
+     * Returns weather this bundle is keyed. A bundle that is part of a {@link PCollection} that
+     * occurs after a {@link GroupByKey} is keyed by the result of the last {@link GroupByKey}.
+     */
+    boolean isKeyed();
+
+    /**
+     * Returns the (possibly null) key that was output in the most recent {@link GroupByKey} in the
+     * execution of this bundle.
+     */
+    @Nullable Object getKey();
+
+    /**
+     * @return an {@link Iterable} containing all of the elements that have been added to this
+     *         {@link Bundle}
+     */
+    Iterable<WindowedValue<T>> getElements();
+  }
+
+  /**
+   * A {@link PCollectionViewWriter} is responsible for writing contents of a {@link PCollection} to
+   * a storage mechanism that can be read from while constructing a {@link PCollectionView}.
+   * @param <ElemT> the type of elements the input {@link PCollection} contains.
+   * @param <ViewT> the type of the PCollectionView this writer writes to.
+   */
+  public static interface PCollectionViewWriter<ElemT, ViewT> {
+    void add(Iterable<WindowedValue<ElemT>> values);
+  }
+
+  /**
+   * Execution Context for the InMemoryPipelineRunner.
+   *
+   * This implementation is not thread safe. A new InMemoryExecutionContext must be created for each
+   * thread that requires it.
+   */
+  public static class InProcessExecutionContext
+      extends com.google.cloud.dataflow.sdk.util.BaseExecutionContext<InProcessExecutionContext
+              .InMemoryStepContext> {
+    @Override
+    protected InMemoryStepContext createStepContext(
+        String stepName, String transformName, StateSampler stateSampler) {
+      return new InMemoryStepContext(this, stepName, transformName);
+    }
+
+    /**
+     * Step Context for the InMemoryPipelineRunner.
+     */
+    public class InMemoryStepContext
+        extends com.google.cloud.dataflow.sdk.util.BaseExecutionContext.StepContext {
+      public InMemoryStepContext(
+          InProcessExecutionContext executionContext, String stepName, String transformName) {
+        super(executionContext, stepName, transformName);
+      }
+
+      @Override
+      public StateInternals stateInternals() {
+        // TODO get or create state for current key.
+        throw new UnsupportedOperationException("StateInternals not yet meaningfully supported");
+      }
+
+      @Override
+      public TimerInternals timerInternals() {
+        // TODO: Have the executionContext/evaluationContext pass this in
+        throw new UnsupportedOperationException("TimerInternals not yet meaningfully supported");
+      }
+    }
+
+  }
+
+
+  /**
+   * The evaluation context for the {@link InProcessPipelineRunner}. Contains state shared within
+   * the current evaluation.
+   */
+  public static interface InProcessEvaluationContext {
+    /**
+     * Create a bundle for use by a source.
+     */
+    <T> Bundle<T> createRootBundle(PCollection<T> output);
+
+    /**
+     * Create a {@link Bundle} whose elements belong to the specified {@link PCollection}.
+     */
+    <T> Bundle<T> createBundle(Bundle<?> input, PCollection<T> output);
+
+    /**
+     * Create a {@link Bundle} with the specified keys at the specified step. For use by
+     * {@link GroupByKeyOnly} {@link PTransform PTransforms}.
+     */
+    <T> Bundle<T> createKeyedBundle(Bundle<?> input, Object key, PCollection<T> output);
+
+    /**
+     * Create a bundle whose elements will be used in a PCollectionView.
+     */
+    <ElemT, ViewT> PCollectionViewWriter<ElemT, ViewT> createPCollectionViewWriter(
+        PCollection<ElemT> input, PCollectionView<ViewT> output);
+
+    /**
+     * Get the options used by this {@link Pipeline}.
+     */
+    InProcessPipelineOptions getPipelineOptions();
+
+    /**
+     * Get an {@link ExecutionContext} for the provided application.
+     */
+    InProcessExecutionContext getExecutionContext(AppliedPTransform<?, ?, ?> application);
+
+    /**
+     * Get the Step Name for the provided application.
+     */
+    String getStepName(AppliedPTransform<?, ?, ?> application);
+
+    /**
+     * @param sideInputs the {@link PCollectionView PCollectionViews} the result should be able to
+     *                   read
+     * @return a {@link SideInputReader} that can read all of the provided
+     *         {@link PCollectionView PCollectionViews}
+     */
+    SideInputReader createSideInputReader(List<PCollectionView<?>> sideInputs);
+
+    /**
+     * Create a {@link CounterSet} for this {@link Pipeline}. The {@link CounterSet} is independent
+     * of all other {@link CounterSet CounterSets} created by this call.
+     *
+     * The {@link InProcessEvaluationContext} is responsible for unifying the counters present in
+     * all created {@link CounterSet CounterSets} when the transforms that call this method
+     * complete.
+     */
+    CounterSet createCounterSet();
+
+    /**
+     * Returns all of the counters that have been merged into this context via calls to
+     * {@link CounterSet#merge(CounterSet)}.
+     */
+    CounterSet getCounters();
+  }
+
+  /**
+   * An executor that schedules and executes {@link AppliedPTransform AppliedPTransforms} for both
+   * source and intermediate {@link PTransform PTransforms}.
+   */
+  public static interface InProcessExecutor {
+    /**
+     * @param root the root {@link AppliedPTransform} to schedule
+     */
+    void scheduleRoot(AppliedPTransform<?, ?, ?> root);
+
+    /**
+     * @param consumer the {@link AppliedPTransform} to schedule
+     * @param bundle the input bundle to the consumer
+     */
+    void scheduleConsumption(AppliedPTransform<?, ?, ?> consumer, Bundle<?> bundle);
+
+    /**
+     * Blocks until the job being executed enters a terminal state. A job is completed after all
+     * root {@link AppliedPTransform AppliedPTransforms} have completed, and all
+     * {@link Bundle Bundles} have been consumed. Jobs may also terminate abnormally.
+     */
+    void awaitCompletion();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTransformResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTransformResult.java
new file mode 100644
index 0000000000000..aad0c7613fcd1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTransformResult.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+
+import org.joda.time.Instant;
+
+import javax.annotation.Nullable;
+
+/**
+ * The result of evaluating an {@link AppliedPTransform} with a {@link TransformEvaluator}.
+ */
+public interface InProcessTransformResult {
+  /**
+   * @return the {@link AppliedPTransform} that produced this result
+   */
+  AppliedPTransform<?, ?, ?> getTransform();
+
+  /**
+   * @return the {@link Bundle Bundles} produced by this transform
+   */
+  Iterable<? extends Bundle<?>> getBundles();
+
+  /**
+   * @return the {@link CounterSet} used by this {@link PTransform}, or null if this transform did
+   *         not use a {@link CounterSet}
+   */
+  @Nullable CounterSet getCounters();
+
+  /**
+   * @return the Watermark Hold for the transform at the time this result was produced
+   */
+  Instant getWatermarkHold();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluator.java
new file mode 100644
index 0000000000000..c8bccb3596f7b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluator.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+
+/**
+ * An evaluator of a specific application of a transform. Will be used for at least one
+ * {@link Bundle}.
+ *
+ * @param <InputT> the type of elements that will be passed to {@link #processElement}
+ */
+public interface TransformEvaluator<InputT> {
+  /**
+   * Process an element in the input {@link Bundle}.
+   *
+   * @param element the element to process
+   */
+  void processElement(WindowedValue<InputT> element) throws Exception;
+
+  /**
+   * Finish processing the bundle of this {@link TransformEvaluator}.
+   *
+   * After {@link #finishBundle()} is called, the {@link TransformEvaluator} will not be reused,
+   * and no more elements will be processed.
+   *
+   * @return an {@link InProcessTransformResult} containing the results of this bundle evaluation.
+   */
+  InProcessTransformResult finishBundle() throws Exception;
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluatorFactory.java
new file mode 100644
index 0000000000000..25cb6cc30c612
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluatorFactory.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+
+import javax.annotation.Nullable;
+
+/**
+ * A factory for creating instances of {@link TransformEvaluator} for the application of a
+ * {@link PTransform}.
+ */
+public interface TransformEvaluatorFactory {
+  /**
+   * Create a new {@link TransformEvaluator} for the application of the {@link PTransform}.
+   *
+   * Any work that must be done before input elements are processed (such as calling
+   * {@link DoFn#startBundle(DoFn.Context)}) must be done before the {@link TransformEvaluator} is
+   * made available to the caller.
+   */
+  <InputT> TransformEvaluator<InputT> forApplication(
+      AppliedPTransform<?, ?, ?> application, @Nullable Bundle<?> inputBundle,
+      InProcessEvaluationContext evaluationContext);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactory.java
new file mode 100644
index 0000000000000..bffaeea9743c8
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactory.java
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+
+import com.google.cloud.dataflow.sdk.io.Read.Bounded;
+import com.google.cloud.dataflow.sdk.io.Source.Reader;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
+import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.ImmutableInProcessTransformResult;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import javax.annotation.Nullable;
+
+/**
+ * A {@link TransformEvaluatorFactory} that produces {@link TransformEvaluator TransformEvaluators}
+ * for the {@link Bounded Read.Bounded} primitive {@link PTransform}.
+ */
+public class BoundedReadEvaluatorFactory implements TransformEvaluatorFactory {
+  /*
+   * An evaluator for a Source is stateful, to ensure data is not read multiple times.
+   * Evaluators are cached here to ensure that the reader is not restarted if the evaluator is
+   * retriggered.
+   */
+  private final Map<EvaluatorKey, BoundedReadEvaluator<?>> sourceEvaluators =
+      new ConcurrentHashMap<>();
+
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  @Override
+  public <InputT> TransformEvaluator<InputT> forApplication(
+      AppliedPTransform<?, ?, ?> application,
+      @Nullable Bundle<?> inputBundle,
+      InProcessEvaluationContext evaluationContext) {
+    return getTransformEvaluator((AppliedPTransform) application, evaluationContext);
+  }
+
+  private <OutputT> TransformEvaluator<?> getTransformEvaluator(
+      final AppliedPTransform<?, PCollection<OutputT>, Bounded<OutputT>> transform,
+      final InProcessEvaluationContext evaluationContext) {
+    EvaluatorKey key = new EvaluatorKey(transform, evaluationContext);
+    @SuppressWarnings("unchecked")
+    BoundedReadEvaluator<OutputT> result =
+        (BoundedReadEvaluator<OutputT>) sourceEvaluators.get(key);
+    if (result == null) {
+      try {
+        result = new BoundedReadEvaluator<OutputT>(transform, evaluationContext);
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+      sourceEvaluators.put(key, result);
+    }
+    return result;
+  }
+
+  private static class BoundedReadEvaluator<OutputT> implements TransformEvaluator<Object> {
+    private final AppliedPTransform<?, PCollection<OutputT>, Bounded<OutputT>> transform;
+    private final InProcessEvaluationContext evaluationContext;
+    private final Reader<OutputT> reader;
+    private boolean contentsRemaining;
+
+    public BoundedReadEvaluator(
+        AppliedPTransform<?, PCollection<OutputT>, Bounded<OutputT>> transform,
+        InProcessEvaluationContext evaluationContext)
+        throws IOException {
+      this.transform = transform;
+      this.evaluationContext = evaluationContext;
+      reader =
+          transform.getTransform().getSource().createReader(evaluationContext.getPipelineOptions());
+      contentsRemaining = reader.start();
+    }
+
+    @Override
+    public void processElement(WindowedValue<Object> element) {}
+
+    @Override
+    public InProcessTransformResult finishBundle() throws IOException {
+      Bundle<OutputT> output = evaluationContext.createRootBundle(transform.getOutput());
+      while (contentsRemaining) {
+        output.add(
+            WindowedValue.timestampedValueInGlobalWindow(
+                reader.getCurrent(), reader.getCurrentTimestamp()));
+        contentsRemaining = reader.advance();
+      }
+      return ImmutableInProcessTransformResult
+          .withHold(transform, BoundedWindow.TIMESTAMP_MAX_VALUE)
+          .addOutput(output)
+          .build();
+    }
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/EvaluatorKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/EvaluatorKey.java
new file mode 100644
index 0000000000000..5f7ff6a6a39b9
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/EvaluatorKey.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+
+import java.util.Objects;
+
+/**
+ * A (Transform, Pipeline Execution) key for stateful evaluators.
+ *
+ * Source evaluators are stateful to ensure data is not read multiple times. Evaluators are cached
+ * to ensure that the reader is not restarted if the evaluator is retriggered. An
+ * {@link EvaluatorKey} is used to ensure that multiple Pipelines can be executed without sharing
+ * the same evaluators.
+ */
+final class EvaluatorKey {
+  private final AppliedPTransform<?, ?, ?> transform;
+  private final InProcessEvaluationContext context;
+
+  public EvaluatorKey(AppliedPTransform<?, ?, ?> transform, InProcessEvaluationContext context) {
+    this.transform = transform;
+    this.context = context;
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(transform, context);
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other == null || !(other instanceof EvaluatorKey)) {
+      return false;
+    }
+    EvaluatorKey that = (EvaluatorKey) other;
+    return Objects.equals(this.transform, that.transform)
+        && Objects.equals(this.context, that.context);
+  }
+}
+
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactory.java
new file mode 100644
index 0000000000000..61ccc5e152bf1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactory.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
+import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.ImmutableInProcessTransformResult;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.Flatten;
+import com.google.cloud.dataflow.sdk.transforms.Flatten.FlattenPCollectionList;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+
+/**
+ * The {@link InProcessPipelineRunner} {@link TransformEvaluatorFactory} for the {@link Flatten}
+ * {@link PTransform}.
+ */
+public class FlattenEvaluatorFactory implements TransformEvaluatorFactory {
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  @Override
+  public <InputT> TransformEvaluator<InputT> forApplication(
+      AppliedPTransform<?, ?, ?> application,
+      Bundle<?> inputBundle,
+      InProcessEvaluationContext evaluationContext) {
+    return createInMemoryEvaluator((AppliedPTransform) application, inputBundle, evaluationContext);
+  }
+
+  private <InputT> TransformEvaluator<InputT> createInMemoryEvaluator(
+      final AppliedPTransform<
+              PCollectionList<InputT>, PCollection<InputT>, FlattenPCollectionList<InputT>>
+          application,
+      final Bundle<InputT> inputBundle,
+      final InProcessEvaluationContext evaluationContext) {
+    final Bundle<InputT> outputBundle =
+        evaluationContext.createBundle(inputBundle, application.getOutput());
+    final InProcessTransformResult result =
+        ImmutableInProcessTransformResult.withoutHold(application).addOutput(outputBundle).build();
+    return new FlattenEvaluator<>(outputBundle, result);
+  }
+
+  private static class FlattenEvaluator<InputT> implements TransformEvaluator<InputT> {
+    private final Bundle<InputT> outputBundle;
+    private final InProcessTransformResult result;
+
+    public FlattenEvaluator(Bundle<InputT> outputBundle, InProcessTransformResult result) {
+      this.outputBundle = outputBundle;
+      this.result = result;
+    }
+
+    @Override
+    public void processElement(WindowedValue<InputT> element) {
+      outputBundle.add(element);
+    }
+
+    @Override
+    public InProcessTransformResult finishBundle() {
+      return result;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactory.java
new file mode 100644
index 0000000000000..f5cf86186fe17
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactory.java
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
+import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.ImmutableInProcessTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.ImmutableInProcessTransformResult.Builder;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * The {@link InProcessPipelineRunner} {@link TransformEvaluatorFactory} for the {@link GroupByKey}
+ * {@link PTransform}.
+ */
+public class GroupByKeyEvaluatorFactory implements TransformEvaluatorFactory {
+  @Override
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  public <InputT> TransformEvaluator<InputT> forApplication(
+      AppliedPTransform<?, ?, ?> application,
+      Bundle<?> inputBundle,
+      InProcessEvaluationContext evaluationContext) {
+    return createEvaluator(
+        (AppliedPTransform) application, (Bundle) inputBundle, evaluationContext);
+  }
+
+  private <K, V> TransformEvaluator<KV<K, V>> createEvaluator(
+      final AppliedPTransform<
+              PCollection<KV<K, V>>, PCollection<KV<K, Iterable<V>>>, GroupByKeyOnly<K, V>>
+          application,
+      final Bundle<KV<K, V>> inputBundle,
+      final InProcessEvaluationContext evaluationContext) {
+    return new GroupByKeyEvaluator<K, V>(evaluationContext, inputBundle, application);
+  }
+
+  private static class GroupByKeyEvaluator<K, V> implements TransformEvaluator<KV<K, V>> {
+    private final InProcessEvaluationContext evaluationContext;
+
+    private final Bundle<KV<K, V>> inputBundle;
+    private final AppliedPTransform<
+            PCollection<KV<K, V>>, PCollection<KV<K, Iterable<V>>>, GroupByKeyOnly<K, V>>
+        application;
+    private final Coder<K> keyCoder;
+    private Map<GroupingKey<K>, List<V>> groupingMap;
+
+    public GroupByKeyEvaluator(
+        InProcessEvaluationContext evaluationContext,
+        Bundle<KV<K, V>> inputBundle,
+        AppliedPTransform<
+                PCollection<KV<K, V>>, PCollection<KV<K, Iterable<V>>>, GroupByKeyOnly<K, V>>
+            application) {
+      this.evaluationContext = evaluationContext;
+      this.inputBundle = inputBundle;
+      this.application = application;
+
+      PCollection<KV<K, V>> input = application.getInput();
+      keyCoder = getKeyCoder(input.getCoder());
+      groupingMap = new HashMap<>();
+    }
+
+    private Coder<K> getKeyCoder(Coder<KV<K, V>> coder) {
+      if (!(coder instanceof KvCoder)) {
+        throw new IllegalStateException();
+      }
+      @SuppressWarnings("unchecked")
+      Coder<K> keyCoder = ((KvCoder<K, V>) coder).getKeyCoder();
+      return keyCoder;
+    }
+
+    @Override
+    public void processElement(WindowedValue<KV<K, V>> element) {
+      KV<K, V> kv = element.getValue();
+      K key = kv.getKey();
+      byte[] encodedKey;
+      try {
+        encodedKey = encodeToByteArray(keyCoder, key);
+      } catch (CoderException exn) {
+        // TODO: Put in better element printing:
+        // truncate if too long.
+        throw new IllegalArgumentException(
+            String.format("unable to encode key %s of input to %s using %s", key, this, keyCoder),
+            exn);
+      }
+      GroupingKey<K> groupingKey = new GroupingKey<>(key, encodedKey);
+      if (!groupingMap.containsKey(groupingKey)) {
+        groupingMap.put(groupingKey, new ArrayList<V>());
+      }
+      List<V> values = groupingMap.get(groupingKey);
+      values.add(kv.getValue());
+    }
+
+    @Override
+    public InProcessTransformResult finishBundle() {
+      Builder resultBuilder = ImmutableInProcessTransformResult.withoutHold(application);
+      for (Map.Entry<GroupingKey<K>, List<V>> groupedEntry : groupingMap.entrySet()) {
+        K key = groupedEntry.getKey().key;
+        KV<K, Iterable<V>> groupedKv = KV.<K, Iterable<V>>of(key, groupedEntry.getValue());
+        Bundle<KV<K, Iterable<V>>> bundle =
+            evaluationContext.createKeyedBundle(inputBundle, key, application.getOutput());
+        bundle.add(WindowedValue.valueInEmptyWindows(groupedKv));
+        resultBuilder.addOutput(bundle);
+      }
+      return resultBuilder.build();
+    }
+
+    private static class GroupingKey<K> {
+      private K key;
+      private byte[] encodedKey;
+
+      public GroupingKey(K key, byte[] encodedKey) {
+        this.key = key;
+        this.encodedKey = encodedKey;
+      }
+
+      @Override
+      public boolean equals(Object o) {
+        if (o instanceof GroupingKey) {
+          GroupingKey<?> that = (GroupingKey<?>) o;
+          return Arrays.equals(this.encodedKey, that.encodedKey);
+        } else {
+          return false;
+        }
+      }
+
+      @Override
+      public int hashCode() {
+        return Arrays.hashCode(encodedKey);
+      }
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/InProcessCreate.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/InProcessCreate.java
new file mode 100644
index 0000000000000..cdd1b01d82620
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/InProcessCreate.java
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.io.BoundedSource;
+import com.google.cloud.dataflow.sdk.io.Read;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.Create.Values;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.common.collect.Iterators;
+import com.google.common.collect.PeekingIterator;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Collections;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * An in memory implementation of the {@link Values Create.Values} {@link PTransform}, implemented'
+ * using a {@link BoundedSource}.
+ *
+ * The coder is inferred via the {@link Values#getDefaultOutputCoder(PInput)} method on the original
+ * transform.
+ */
+public class InProcessCreate<T> extends PTransform<PInput, PCollection<T>> {
+  private final Create.Values<T> original;
+  private final InMemorySource<T> source;
+
+  public static <T> InProcessCreate<T> from(Create.Values<T> original) {
+    return new InProcessCreate<>(original);
+  }
+
+  private InProcessCreate(Values<T> original) {
+    this.original = original;
+    this.source = new InMemorySource<>(original.getElements());
+  }
+
+  @Override
+  public PCollection<T> apply(PInput input) {
+    input.getPipeline().getCoderRegistry();
+    PCollection<T> result = input.getPipeline().apply(Read.from(source));
+    try {
+      result.setCoder(original.getDefaultOutputCoder(input));
+    } catch (CannotProvideCoderException e) {
+      throw new IllegalArgumentException("Unable to infer a coder and no Coder was specified. "
+          + "Please set a coder by invoking Create.withCoder() explicitly.", e);
+    }
+    return result;
+  }
+
+  private static class InMemorySource<T> extends BoundedSource<T> {
+    private final Iterable<T> elements;
+
+    public InMemorySource(Iterable<T> elements) {
+      this.elements = elements;
+    }
+
+    @Override
+    public List<? extends BoundedSource<T>> splitIntoBundles(
+        long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
+      return Collections.singletonList(this);
+    }
+
+    @Override
+    public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
+      return 0L;
+    }
+
+    @Override
+    public boolean producesSortedKeys(PipelineOptions options) throws Exception {
+      return false;
+    }
+
+    @Override
+    public BoundedSource.BoundedReader<T> createReader(PipelineOptions options) throws IOException {
+      return new IterableReader();
+    }
+
+    @Override
+    public void validate() {}
+
+    @Override
+    public Coder<T> getDefaultOutputCoder() {
+      // Return a coder that exclusively throws exceptions. The coder is set properly in apply, or
+      // an illegal argument exception is thrown.
+      return new StandardCoder<T>() {
+        @Override
+        public void encode(T value, OutputStream outStream,
+            com.google.cloud.dataflow.sdk.coders.Coder.Context context)
+            throws CoderException, IOException {
+          throw new CoderException("Default Create Coder cannot be used");
+        }
+
+        @Override
+        public T decode(
+            InputStream inStream, com.google.cloud.dataflow.sdk.coders.Coder.Context context)
+            throws CoderException, IOException {
+          throw new CoderException("Default Create Coder cannot be used");
+        }
+
+        @Override
+        public List<? extends Coder<?>> getCoderArguments() {
+          return Collections.emptyList();
+        }
+
+        @Override
+        public void verifyDeterministic()
+            throws com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException {
+          throw new NonDeterministicException(
+              this, Collections.<String>singletonList("Default Create Coder cannot be used"));
+        }
+      };
+    }
+
+    private class IterableReader extends BoundedReader<T> {
+      private final PeekingIterator<T> iter;
+
+      public IterableReader() {
+        this.iter = Iterators.peekingIterator(elements.iterator());
+      }
+
+      @Override
+      public BoundedSource<T> getCurrentSource() {
+        return InMemorySource.this;
+      }
+
+      @Override
+      public boolean start() throws IOException {
+        return iter.hasNext();
+      }
+
+      @Override
+      public boolean advance() throws IOException {
+        iter.next();
+        return iter.hasNext();
+      }
+
+      @Override
+      public T getCurrent() throws NoSuchElementException {
+        return iter.peek();
+      }
+
+      @Override
+      public void close() throws IOException {}
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoInProcessEvaluator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoInProcessEvaluator.java
new file mode 100644
index 0000000000000..86cc89ce4cfed
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoInProcessEvaluator.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.ImmutableInProcessTransformResult;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import org.joda.time.Instant;
+
+import java.util.Collection;
+import java.util.Map;
+
+class ParDoInProcessEvaluator<T> {
+  private final DoFnRunner<T, ?> fnRunner;
+  private final AppliedPTransform<PCollection<T>, ?, ?> transform;
+  private final CounterSet counters;
+  private final Collection<Bundle<?>> outputBundles;
+
+  public ParDoInProcessEvaluator(DoFnRunner<T, ?> fnRunner,
+      AppliedPTransform<PCollection<T>, ?, ?> transform, CounterSet counters,
+      Collection<Bundle<?>> outputBundles) {
+    this.fnRunner = fnRunner;
+    this.transform = transform;
+    this.counters = counters;
+    this.outputBundles = outputBundles;
+  }
+
+  public void processElement(WindowedValue<T> element) {
+    fnRunner.processElement(element);
+  }
+
+  public InProcessTransformResult finishBundle() {
+    fnRunner.finishBundle();
+    // TODO Use a real value
+    Instant hold = BoundedWindow.TIMESTAMP_MAX_VALUE;
+    return ImmutableInProcessTransformResult.withHold(transform, hold)
+        .addOutput(outputBundles)
+        .withCounters(counters)
+        .build();
+  }
+
+  static class BundleOutputManager implements OutputManager {
+    private final Map<TupleTag<?>, Bundle<?>> bundles;
+
+    public static BundleOutputManager create(Map<TupleTag<?>, Bundle<?>> outputBundles) {
+      return new BundleOutputManager(outputBundles);
+    }
+
+    public BundleOutputManager(Map<TupleTag<?>, Bundle<?>> bundles) {
+      this.bundles = bundles;
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
+      @SuppressWarnings("rawtypes")
+      Bundle bundle = bundles.get(tag);
+      bundle.add(output);
+    }
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactory.java
new file mode 100644
index 0000000000000..3b2309e1c4760
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactory.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext.InMemoryStepContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
+import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
+import com.google.cloud.dataflow.sdk.runners.inprocess.evaluator.ParDoInProcessEvaluator.BundleOutputManager;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo.BoundMulti;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.DoFnRunners;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * The {@link InProcessPipelineRunner} {@link TransformEvaluatorFactory} for the
+ * {@link BoundMulti} primitive {@link PTransform}.
+ */
+public class ParDoMultiEvaluatorFactory implements TransformEvaluatorFactory {
+  @Override
+  public <T> TransformEvaluator<T> forApplication(
+      AppliedPTransform<?, ?, ?> application,
+      Bundle<?> inputBundle,
+      InProcessEvaluationContext evaluationContext) {
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    final ParDoInProcessEvaluator<T> multiEvaluator =
+        createMultiEvaluator((AppliedPTransform) application, inputBundle, evaluationContext);
+    return new TransformEvaluator<T>() {
+      @Override
+      public void processElement(WindowedValue<T> value) {
+        multiEvaluator.processElement(value);
+      }
+
+      @Override
+      public InProcessTransformResult finishBundle() {
+        return multiEvaluator.finishBundle();
+      }
+    };
+  }
+
+  private static <InT, OuT> ParDoInProcessEvaluator<InT> createMultiEvaluator(
+      AppliedPTransform<PCollection<InT>, PCollectionTuple, BoundMulti<InT, OuT>> application,
+      Bundle<InT> inputBundle,
+      InProcessEvaluationContext evaluationContext) {
+    PCollectionTuple output = application.getOutput();
+    Map<TupleTag<?>, PCollection<?>> outputs = output.getAll();
+    Map<TupleTag<?>, Bundle<?>> outputBundles = new HashMap<>();
+    for (Map.Entry<TupleTag<?>, PCollection<?>> outputEntry : outputs.entrySet()) {
+      outputBundles.put(
+          outputEntry.getKey(),
+          evaluationContext.createBundle(inputBundle, outputEntry.getValue()));
+    }
+    InProcessExecutionContext executionContext = evaluationContext.getExecutionContext(application);
+    String stepName = evaluationContext.getStepName(application);
+    InMemoryStepContext stepContext =
+        executionContext.getOrCreateStepContext(stepName, stepName, null);
+
+    CounterSet counters = evaluationContext.createCounterSet();
+
+    DoFn<InT, OuT> fn = application.getTransform().getFn();
+    DoFnRunner<InT, OuT> runner =
+        DoFnRunners.createDefault(
+            evaluationContext.getPipelineOptions(),
+            fn,
+            evaluationContext.createSideInputReader(application.getTransform().getSideInputs()),
+            BundleOutputManager.create(outputBundles),
+            application.getTransform().getMainOutputTag(),
+            application.getTransform().getSideOutputTags().getAll(),
+            stepContext,
+            counters.getAddCounterMutator(),
+            application.getInput().getWindowingStrategy());
+
+    runner.startBundle();
+
+    return new ParDoInProcessEvaluator<>(runner, application, counters, outputBundles.values());
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactory.java
new file mode 100644
index 0000000000000..33144e7b67418
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactory.java
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext.InMemoryStepContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
+import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
+import com.google.cloud.dataflow.sdk.runners.inprocess.evaluator.ParDoInProcessEvaluator.BundleOutputManager;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo.Bound;
+import com.google.cloud.dataflow.sdk.util.DoFnRunner;
+import com.google.cloud.dataflow.sdk.util.DoFnRunners;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import java.util.Collections;
+
+/**
+ * The {@link InProcessPipelineRunner} {@link TransformEvaluatorFactory} for the
+ * {@link Bound ParDo.Bound} primitive {@link PTransform}.
+ */
+public class ParDoSingleEvaluatorFactory implements TransformEvaluatorFactory {
+  @Override
+  public <T> TransformEvaluator<T> forApplication(
+      final AppliedPTransform<?, ?, ?> application,
+      Bundle<?> inputBundle,
+      InProcessEvaluationContext evaluationContext) {
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    final ParDoInProcessEvaluator<T> evaluator =
+        createSingleEvaluator((AppliedPTransform) application, inputBundle, evaluationContext);
+    TransformEvaluator<T> singleEvaluator =
+        new TransformEvaluator<T>() {
+          @Override
+          public void processElement(WindowedValue<T> value) {
+            evaluator.processElement(value);
+          }
+
+          @Override
+          public InProcessTransformResult finishBundle() {
+            return evaluator.finishBundle();
+          }
+        };
+    return singleEvaluator;
+  }
+
+  private static <InT, OuT> ParDoInProcessEvaluator<InT> createSingleEvaluator(
+      @SuppressWarnings("rawtypes")
+      AppliedPTransform<PCollection<InT>, PCollection<OuT>, Bound<InT, OuT>> application,
+      Bundle<InT> inputBundle,
+      InProcessEvaluationContext evaluationContext) {
+    TupleTag<OuT> mainOutputTag = new TupleTag<>("out");
+    Bundle<OuT> outputBundle = evaluationContext.createBundle(inputBundle, application.getOutput());
+
+    InProcessExecutionContext executionContext = evaluationContext.getExecutionContext(application);
+    String stepName = evaluationContext.getStepName(application);
+    InMemoryStepContext stepContext =
+        executionContext.getOrCreateStepContext(stepName, stepName, null);
+
+    CounterSet counters = evaluationContext.createCounterSet();
+
+    DoFnRunner<InT, OuT> runner =
+        DoFnRunners.createDefault(
+            evaluationContext.getPipelineOptions(),
+            application.getTransform().getFn(),
+            evaluationContext.createSideInputReader(application.getTransform().getSideInputs()),
+            BundleOutputManager.create(
+                Collections.<TupleTag<?>, Bundle<?>>singletonMap(mainOutputTag, outputBundle)),
+            mainOutputTag,
+            Collections.<TupleTag<?>>emptyList(),
+            stepContext,
+            counters.getAddCounterMutator(),
+            application.getInput().getWindowingStrategy());
+
+    runner.startBundle();
+    return new ParDoInProcessEvaluator<InT>(
+        runner, application, counters, Collections.<Bundle<?>>singleton(outputBundle));
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactory.java
new file mode 100644
index 0000000000000..9a5e3aa694337
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactory.java
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+
+import com.google.cloud.dataflow.sdk.io.Read.Unbounded;
+import com.google.cloud.dataflow.sdk.io.UnboundedSource;
+import com.google.cloud.dataflow.sdk.io.UnboundedSource.CheckpointMark;
+import com.google.cloud.dataflow.sdk.io.UnboundedSource.UnboundedReader;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
+import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.ImmutableInProcessTransformResult;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.annotation.Nullable;
+
+/**
+ * A {@link TransformEvaluatorFactory} that produces {@link TransformEvaluator TransformEvaluators}
+ * for the {@link Unbounded Read.Unbounded} primitive {@link PTransform}.
+ */
+public class UnboundedReadEvaluatorFactory implements TransformEvaluatorFactory {
+  /*
+   * An evaluator for a Source is stateful, to ensure the CheckpointMark is properly persisted.
+   * Evaluators are cached here to ensure that the checkpoint mark is appropriately reused
+   * and any splits are honored.
+   */
+  private final Map<EvaluatorKey, UnboundedReadEvaluator<?>> sourceEvaluators = new HashMap<>();
+
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  @Override
+  public <InputT> TransformEvaluator<InputT> forApplication(
+      AppliedPTransform<?, ?, ?> application,
+      @Nullable Bundle<?> inputBundle,
+      InProcessEvaluationContext evaluationContext) {
+    return getTransformEvaluator((AppliedPTransform) application, evaluationContext);
+  }
+
+  private <OutputT> TransformEvaluator<?> getTransformEvaluator(
+      final AppliedPTransform<?, PCollection<OutputT>, Unbounded<OutputT>> transform,
+      final InProcessEvaluationContext evaluationContext) {
+    EvaluatorKey key = new EvaluatorKey(transform, evaluationContext);
+    @SuppressWarnings("unchecked")
+    UnboundedReadEvaluator<OutputT> result =
+        (UnboundedReadEvaluator<OutputT>) sourceEvaluators.get(key);
+    if (result == null) {
+      result = new UnboundedReadEvaluator<OutputT>(transform, evaluationContext);
+      sourceEvaluators.put(key, result);
+    }
+    return result;
+  }
+
+  private static class UnboundedReadEvaluator<OutputT> implements TransformEvaluator<Object> {
+    private static final int ARBITRARY_MAX_ELEMENTS = 10;
+    private final AppliedPTransform<?, PCollection<OutputT>, Unbounded<OutputT>> transform;
+    private final InProcessEvaluationContext evaluationContext;
+    private CheckpointMark checkpointMark;
+
+    public UnboundedReadEvaluator(
+        AppliedPTransform<?, PCollection<OutputT>, Unbounded<OutputT>> transform,
+        InProcessEvaluationContext evaluationContext) {
+      this.transform = transform;
+      this.evaluationContext = evaluationContext;
+      this.checkpointMark = null;
+    }
+
+    @Override
+    public void processElement(WindowedValue<Object> element) {}
+
+    @Override
+    public InProcessTransformResult finishBundle() throws IOException {
+      Bundle<OutputT> output = evaluationContext.createRootBundle(transform.getOutput());
+      UnboundedReader<OutputT> reader =
+          createReader(
+              transform.getTransform().getSource(), evaluationContext.getPipelineOptions());
+      int numElements = 0;
+      if (reader.start()) {
+        do {
+          output.add(
+              WindowedValue.timestampedValueInGlobalWindow(
+                  reader.getCurrent(), reader.getCurrentTimestamp()));
+          numElements++;
+        } while (numElements < ARBITRARY_MAX_ELEMENTS && reader.advance());
+      }
+      checkpointMark = reader.getCheckpointMark();
+      checkpointMark.finalizeCheckpoint();
+      // TODO: When exercising create initial splits, make this the minimum across all existing
+      // readers
+      return ImmutableInProcessTransformResult.withHold(transform, reader.getWatermark())
+          .addOutput(output)
+          .build();
+    }
+
+    private <CheckpointMarkT extends CheckpointMark> UnboundedReader<OutputT> createReader(
+        UnboundedSource<OutputT, CheckpointMarkT> source, PipelineOptions options) {
+      @SuppressWarnings("unchecked")
+      CheckpointMarkT mark = (CheckpointMarkT) checkpointMark;
+      return source.createReader(options, mark);
+    }
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactory.java
new file mode 100644
index 0000000000000..c9f26db9291bc
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactory.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.PCollectionViewWriter;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
+import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.ImmutableInProcessTransformResult;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.View.CreatePCollectionView;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * The {@link InProcessPipelineRunner} {@link TransformEvaluatorFactory} for the
+ * {@link CreatePCollectionView} primitive {@link PTransform}.
+ */
+public class ViewEvaluatorFactory implements TransformEvaluatorFactory {
+  @SuppressWarnings({"rawtypes", "unchecked"})
+  @Override
+  public <T> TransformEvaluator<T> forApplication(
+      AppliedPTransform<?, ?, ?> application,
+      InProcessPipelineRunner.Bundle<?> inputBundle,
+      InProcessEvaluationContext evaluationContext) {
+    return createEvaluator(
+        (AppliedPTransform) application, evaluationContext);
+  }
+
+  private <InT, OuT> TransformEvaluator<InT> createEvaluator(
+      final AppliedPTransform<PCollection<InT>, PCollectionView<OuT>,
+      CreatePCollectionView<InT, OuT>> application,
+      InProcessEvaluationContext context) {
+    PCollection<InT> input = application.getInput();
+    final PCollectionViewWriter<InT, OuT> writer =
+        context.createPCollectionViewWriter(input, application.getOutput());
+    return new TransformEvaluator<InT>() {
+      private final List<WindowedValue<InT>> elements = new ArrayList<>();
+
+      @Override
+      public void processElement(WindowedValue<InT> element) {
+        elements.add(element);
+      }
+
+      @Override
+      public InProcessTransformResult finishBundle() {
+        writer.add(elements);
+        return ImmutableInProcessTransformResult.withoutHold(application).build();
+      }
+    };
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/ImmutableInProcessTransformResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/ImmutableInProcessTransformResult.java
new file mode 100644
index 0000000000000..0373ef5e11aa8
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/ImmutableInProcessTransformResult.java
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.common.collect.ImmutableList;
+
+import org.joda.time.Instant;
+
+import java.util.Collection;
+
+/**
+ * An immutable {@link InProcessTransformResult}.
+ */
+public class ImmutableInProcessTransformResult implements InProcessTransformResult {
+  private final AppliedPTransform<?, ?, ?> transform;
+  private final Iterable<? extends Bundle<?>> bundles;
+  private final CounterSet counters;
+  private final Instant watermarkHold;
+
+  private ImmutableInProcessTransformResult(AppliedPTransform<?, ?, ?> transform,
+      Iterable<? extends Bundle<?>> outputBundles, CounterSet counters, Instant watermarkHold) {
+    this.transform = transform;
+    this.bundles = outputBundles;
+    this.counters = counters;
+    this.watermarkHold = watermarkHold;
+  }
+
+  @Override
+  public Iterable<? extends Bundle<?>> getBundles() {
+    return bundles;
+  }
+
+  @Override
+  public CounterSet getCounters() {
+    return counters;
+  }
+
+  @Override
+  public AppliedPTransform<?, ?, ?> getTransform() {
+    return transform;
+  }
+
+  @Override
+  public Instant getWatermarkHold() {
+    return watermarkHold;
+  }
+
+  public static Builder withHold(AppliedPTransform<?, ?, ?> transform,
+      Instant watermarkHold) {
+    return new Builder(transform, watermarkHold);
+  }
+
+  public static Builder withoutHold(AppliedPTransform<?, ?, ?> transform) {
+    return new Builder(transform, BoundedWindow.TIMESTAMP_MAX_VALUE);
+  }
+
+  /**
+   * A builder for creating instances of {@link ImmutableInProcessTransformResult}.
+   */
+  public static class Builder {
+    private final AppliedPTransform<?, ?, ?> transform;
+    private final ImmutableList.Builder<Bundle<?>> bundlesBuilder;
+    private CounterSet counters;
+    private final Instant watermarkHold;
+
+    public Builder(AppliedPTransform<?, ?, ?> transform,
+        Instant watermarkHold) {
+      this.transform = transform;
+      this.watermarkHold = watermarkHold;
+      this.bundlesBuilder = ImmutableList.builder();
+    }
+
+    public ImmutableInProcessTransformResult build() {
+      return new ImmutableInProcessTransformResult(
+          transform, bundlesBuilder.build(), counters, watermarkHold);
+    }
+
+    public Builder withCounters(CounterSet counters) {
+      this.counters = counters;
+      return this;
+    }
+
+    public Builder addOutput(Bundle<?> outputBundle, Bundle<?>... outputBundles) {
+      bundlesBuilder.add(outputBundle);
+      bundlesBuilder.add(outputBundles);
+      return this;
+    }
+
+    public Builder addOutput(Collection<Bundle<?>> outputBundles) {
+      bundlesBuilder.addAll(outputBundles);
+      return this;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundle.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundle.java
new file mode 100644
index 0000000000000..1a01a3db12ea9
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundle.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.MoreObjects;
+import com.google.common.base.MoreObjects.ToStringHelper;
+
+import java.util.ArrayList;
+import java.util.Collection;
+
+import javax.annotation.Nullable;
+
+/**
+ * A {@link Bundle} that buffers elements in memory.
+ */
+public final class InProcessBundle<T> implements Bundle<T> {
+  private final PCollection<T> pcollection;
+  private final boolean keyed;
+  private final Object key;
+  private Collection<WindowedValue<T>> elements;
+
+  /**
+   * Create a new {@link InProcessBundle} for the specified {@link PCollection} without a key.
+   */
+  public static <T> InProcessBundle<T> unkeyed(PCollection<T> pcollection) {
+    return new InProcessBundle<T>(pcollection, false, null);
+  }
+
+  /**
+   * Create a new {@link InProcessBundle} for the specified {@link PCollection} with the specified
+   * key.
+   *
+   * See {@link #getKey()} and {@link #isKeyed()} for more information.
+   */
+  public static <T> InProcessBundle<T> keyed(PCollection<T> pcollection, Object key) {
+    return new InProcessBundle<T>(pcollection, true, key);
+  }
+
+  private InProcessBundle(PCollection<T> pcollection, boolean keyed, Object key) {
+    this.pcollection = pcollection;
+    this.keyed = keyed;
+    this.key = key;
+    this.elements = new ArrayList<>();
+  }
+
+  @Override
+  public InProcessBundle<T> add(WindowedValue<T> element) {
+    elements.add(element);
+    return this;
+  }
+
+  @Override
+  @Nullable
+  public Object getKey() {
+    return key;
+  }
+
+  @Override
+  public boolean isKeyed() {
+    return keyed;
+  }
+
+  @Override
+  public Iterable<WindowedValue<T>> getElements() {
+    return elements;
+  }
+
+  @Override
+  public PCollection<T> getPCollection() {
+    return pcollection;
+  }
+
+  @Override
+  public String toString() {
+    ToStringHelper toStringHelper =
+        MoreObjects.toStringHelper(this).add("pcollection", pcollection);
+    if (keyed) {
+      toStringHelper = toStringHelper.add("key", keyed);
+    }
+    return toStringHelper.add("elements", elements).toString();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index f0ed484258a29..e971ab9355a9c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -1017,6 +1017,10 @@ public TupleTag<OutputT> getMainOutputTag() {
       return mainOutputTag;
     }
 
+    public TupleTagList getSideOutputTags() {
+      return sideOutputTags;
+    }
+
     public List<PCollectionView<?>> getSideInputs() {
       return sideInputs;
     }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactoryTest.java
new file mode 100644
index 0000000000000..e20287618119c
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactoryTest.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.emptyIterable;
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.io.BoundedSource;
+import com.google.cloud.dataflow.sdk.io.CountingSource;
+import com.google.cloud.dataflow.sdk.io.Read;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
+import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link BoundedReadEvaluatorFactory}.
+ */
+@RunWith(JUnit4.class)
+public class BoundedReadEvaluatorFactoryTest {
+  @Test
+  public void boundedSourceInMemoryTransformEvaluatorProducesElements() throws Exception {
+    BoundedSource<Long> source = CountingSource.upTo(10L);
+    TestPipeline p = TestPipeline.create();
+    PCollection<Long> longs = p.apply(Read.from(source));
+
+    TransformEvaluatorFactory factory = new BoundedReadEvaluatorFactory();
+    InProcessEvaluationContext context = mock(InProcessEvaluationContext.class);
+    Bundle<Long> output = InProcessBundle.unkeyed(longs);
+    when(context.createRootBundle(longs)).thenReturn(output);
+
+    TransformEvaluator<?> evaluator =
+        factory.forApplication(longs.getProducingTransformInternal(), null, context);
+    InProcessTransformResult result = evaluator.finishBundle();
+    assertThat(result.getWatermarkHold(), equalTo(BoundedWindow.TIMESTAMP_MAX_VALUE));
+    assertThat(
+        output.getElements(),
+        containsInAnyOrder(
+            gw(1L), gw(2L), gw(4L), gw(8L), gw(9L), gw(7L), gw(6L), gw(5L), gw(3L), gw(0L)));
+  }
+
+  @Test
+  public void boundedSourceInMemoryTransformEvaluatorMultipleCalls() throws Exception {
+    BoundedSource<Long> source = CountingSource.upTo(10L);
+    TestPipeline p = TestPipeline.create();
+    PCollection<Long> longs = p.apply(Read.from(source));
+
+    TransformEvaluatorFactory factory = new BoundedReadEvaluatorFactory();
+    InProcessEvaluationContext context = mock(InProcessEvaluationContext.class);
+    Bundle<Long> output = InProcessBundle.unkeyed(longs);
+    when(context.createRootBundle(longs)).thenReturn(output);
+
+    TransformEvaluator<?> evaluator =
+        factory.forApplication(longs.getProducingTransformInternal(), null, context);
+    InProcessTransformResult result = evaluator.finishBundle();
+    assertThat(result.getWatermarkHold(), equalTo(BoundedWindow.TIMESTAMP_MAX_VALUE));
+    assertThat(
+        output.getElements(),
+        containsInAnyOrder(
+            gw(1L), gw(2L), gw(4L), gw(8L), gw(9L), gw(7L), gw(6L), gw(5L), gw(3L), gw(0L)));
+
+    Bundle<Long> secondOutput = InProcessBundle.unkeyed(longs);
+    when(context.createRootBundle(longs)).thenReturn(secondOutput);
+    TransformEvaluator<?> secondEvaluator =
+        factory.forApplication(longs.getProducingTransformInternal(), null, context);
+    InProcessTransformResult secondResult = secondEvaluator.finishBundle();
+    assertThat(secondResult.getWatermarkHold(), equalTo(BoundedWindow.TIMESTAMP_MAX_VALUE));
+    assertThat(secondOutput.getElements(), emptyIterable());
+    assertThat(
+        output.getElements(),
+        containsInAnyOrder(
+            gw(1L), gw(2L), gw(4L), gw(8L), gw(9L), gw(7L), gw(6L), gw(5L), gw(3L), gw(0L)));
+  }
+
+  private static WindowedValue<Long> gw(Long elem) {
+    return WindowedValue.valueInGlobalWindow(elem);
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactoryTest.java
new file mode 100644
index 0000000000000..ad4f927f7559d
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactoryTest.java
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.Flatten;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link FlattenEvaluatorFactory}.
+ */
+@RunWith(JUnit4.class)
+public class FlattenEvaluatorFactoryTest {
+  @Test
+  public void testFlattenInMemoryEvaluator() throws Exception {
+    TestPipeline p = TestPipeline.create();
+    PCollection<Integer> left = p.apply("left", Create.of(1, 2, 4));
+    PCollection<Integer> right = p.apply("right", Create.of(-1, 2, -4));
+    PCollectionList<Integer> list = PCollectionList.of(left).and(right);
+
+    PCollection<Integer> flattened = list.apply(Flatten.<Integer>pCollections());
+
+    Bundle<Integer> leftBundle = InProcessBundle.unkeyed(left);
+    Bundle<Integer> rightBundle = InProcessBundle.unkeyed(right);
+
+    InProcessEvaluationContext context = mock(InProcessEvaluationContext.class);
+
+    Bundle<Integer> flattenedLeftBundle = InProcessBundle.unkeyed(flattened);
+    Bundle<Integer> flattenedRightBundle = InProcessBundle.unkeyed(flattened);
+
+    when(context.createBundle(leftBundle, flattened)).thenReturn(flattenedLeftBundle);
+    when(context.createBundle(rightBundle, flattened)).thenReturn(flattenedRightBundle);
+
+    FlattenEvaluatorFactory factory = new FlattenEvaluatorFactory();
+    TransformEvaluator<Integer> leftSideEvaluator = factory.forApplication(
+        flattened.getProducingTransformInternal(), leftBundle, context);
+    TransformEvaluator<Integer> rightSideEvaluator =
+        factory.forApplication(flattened.getProducingTransformInternal(), rightBundle, context);
+
+    leftSideEvaluator.processElement(WindowedValue.valueInGlobalWindow(1));
+    rightSideEvaluator.processElement(WindowedValue.valueInGlobalWindow(-1));
+    leftSideEvaluator.processElement(
+        WindowedValue.timestampedValueInGlobalWindow(2, new Instant(1024)));
+    leftSideEvaluator.processElement(WindowedValue.valueInEmptyWindows(4, PaneInfo.NO_FIRING));
+    rightSideEvaluator.processElement(
+        WindowedValue.valueInEmptyWindows(2, PaneInfo.ON_TIME_AND_ONLY_FIRING));
+    rightSideEvaluator.processElement(
+        WindowedValue.timestampedValueInGlobalWindow(-4, new Instant(-4096)));
+
+    InProcessTransformResult rightSideResult = rightSideEvaluator.finishBundle();
+    InProcessTransformResult leftSideResult = leftSideEvaluator.finishBundle();
+
+    assertThat(rightSideResult.getBundles(), Matchers.<Bundle<?>>contains(flattenedRightBundle));
+    assertThat(
+        rightSideResult.getTransform(),
+        Matchers.<AppliedPTransform<?, ?, ?>>equalTo(flattened.getProducingTransformInternal()));
+    assertThat(leftSideResult.getBundles(), Matchers.<Bundle<?>>contains(flattenedLeftBundle));
+    assertThat(
+        leftSideResult.getTransform(),
+        Matchers.<AppliedPTransform<?, ?, ?>>equalTo(flattened.getProducingTransformInternal()));
+
+    assertThat(
+        flattenedLeftBundle.getElements(),
+        containsInAnyOrder(
+            WindowedValue.timestampedValueInGlobalWindow(2, new Instant(1024)),
+            WindowedValue.valueInEmptyWindows(4, PaneInfo.NO_FIRING),
+            WindowedValue.valueInGlobalWindow(1)));
+    assertThat(
+        flattenedRightBundle.getElements(),
+        containsInAnyOrder(
+            WindowedValue.valueInEmptyWindows(2, PaneInfo.ON_TIME_AND_ONLY_FIRING),
+            WindowedValue.timestampedValueInGlobalWindow(-4, new Instant(-4096)),
+            WindowedValue.valueInGlobalWindow(-1)));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactoryTest.java
new file mode 100644
index 0000000000000..456a52fa70a04
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactoryTest.java
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.emptyIterable;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.collect.HashMultiset;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Multiset;
+
+import org.hamcrest.BaseMatcher;
+import org.hamcrest.Description;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link GroupByKeyEvaluatorFactory}.
+ */
+@RunWith(JUnit4.class)
+public class GroupByKeyEvaluatorFactoryTest {
+  @Test
+  public void testInMemoryEvaluator() throws Exception {
+    TestPipeline p = TestPipeline.create();
+    KV<String, Integer> firstFoo = KV.of("foo", -1);
+    KV<String, Integer> secondFoo = KV.of("foo", 1);
+    KV<String, Integer> thirdFoo = KV.of("foo", 3);
+    KV<String, Integer> firstBar = KV.of("bar", 22);
+    KV<String, Integer> secondBar = KV.of("bar", 12);
+    KV<String, Integer> firstBaz = KV.of("baz", Integer.MAX_VALUE);
+    PCollection<KV<String, Integer>> kvs = p.apply(Create.of(firstFoo, firstBar,
+        secondFoo, firstBaz, secondBar, thirdFoo));
+    PCollection<KV<String, Iterable<Integer>>> groupedKvs =
+        kvs.apply(new GroupByKeyOnly<String, Integer>());
+
+    Bundle<KV<String, Integer>> inputBundle = InProcessBundle.unkeyed(kvs);
+    InProcessEvaluationContext evaluationContext = mock(InProcessEvaluationContext.class);
+
+    Bundle<KV<String, Iterable<Integer>>> fooBundle = InProcessBundle.keyed(groupedKvs, "foo");
+    Bundle<KV<String, Iterable<Integer>>> barBundle = InProcessBundle.keyed(groupedKvs, "bar");
+    Bundle<KV<String, Iterable<Integer>>> bazBundle = InProcessBundle.keyed(groupedKvs, "baz");
+
+    when(evaluationContext.createKeyedBundle(inputBundle, "foo", groupedKvs)).thenReturn(fooBundle);
+    when(evaluationContext.createKeyedBundle(inputBundle, "bar", groupedKvs)).thenReturn(barBundle);
+    when(evaluationContext.createKeyedBundle(inputBundle, "baz", groupedKvs)).thenReturn(bazBundle);
+
+    // The input to a GroupByKey is assumed to be a KvCoder
+    @SuppressWarnings("unchecked")
+    Coder<String> keyCoder = ((KvCoder<String, Integer>) kvs.getCoder()).getKeyCoder();
+    TransformEvaluator<KV<String, Integer>> evaluator =
+        new GroupByKeyEvaluatorFactory().forApplication(
+            groupedKvs.getProducingTransformInternal(), inputBundle, evaluationContext);
+
+    evaluator.processElement(WindowedValue.valueInGlobalWindow(firstFoo));
+    evaluator.processElement(WindowedValue.valueInGlobalWindow(secondFoo));
+    evaluator.processElement(WindowedValue.valueInGlobalWindow(thirdFoo));
+    evaluator.processElement(WindowedValue.valueInGlobalWindow(firstBar));
+    evaluator.processElement(WindowedValue.valueInGlobalWindow(secondBar));
+    evaluator.processElement(WindowedValue.valueInGlobalWindow(firstBaz));
+
+    assertThat(fooBundle.getElements(), emptyIterable());
+    assertThat(barBundle.getElements(), emptyIterable());
+    assertThat(bazBundle.getElements(), emptyIterable());
+
+    evaluator.finishBundle();
+    assertThat(
+        fooBundle.getElements(),
+        contains(new KIterVMatcher<String, Integer>(
+            KV.<String, Iterable<Integer>>of("foo", ImmutableSet.of(-1, 1, 3)), keyCoder)));
+    assertThat(
+        barBundle.getElements(),
+        contains(new KIterVMatcher<String, Integer>(
+            KV.<String, Iterable<Integer>>of("bar", ImmutableSet.of(12, 22)), keyCoder)));
+    assertThat(
+        bazBundle.getElements(),
+        contains(
+            new KIterVMatcher<String, Integer>(
+                KV.<String, Iterable<Integer>>of("baz", ImmutableSet.of(Integer.MAX_VALUE)),
+                keyCoder)));
+  }
+
+  private static class KIterVMatcher<K, V> extends BaseMatcher<WindowedValue<KV<K, Iterable<V>>>> {
+    private final KV<K, Iterable<V>> myKv;
+    private final Coder<K> keyCoder;
+
+    public KIterVMatcher(KV<K, Iterable<V>> myKv, Coder<K> keyCoder) {
+      this.myKv = myKv;
+      this.keyCoder = keyCoder;
+    }
+
+    @Override
+    public boolean matches(Object item) {
+      if (item == null || !(item instanceof WindowedValue)) {
+        return false;
+      }
+      @SuppressWarnings("unchecked")
+      WindowedValue<KV<K, Iterable<V>>> that = (WindowedValue<KV<K, Iterable<V>>>) item;
+      Multiset<V> myValues = HashMultiset.create();
+      Multiset<V> thatValues = HashMultiset.create();
+      for (V value : myKv.getValue()) {
+        myValues.add(value);
+      }
+      for (V value : that.getValue().getValue()) {
+        thatValues.add(value);
+      }
+      try {
+        return myValues.equals(thatValues)
+            && keyCoder
+                .structuralValue(myKv.getKey())
+                .equals(keyCoder.structuralValue(that.getValue().getKey()));
+      } catch (Exception e) {
+        return false;
+      }
+    }
+
+    @Override
+    public void describeTo(Description description) {
+      description
+          .appendText("KV<K, Iterable<V>> containing key ")
+          .appendValue(myKv.getKey())
+          .appendText(" and values ")
+          .appendValueList("[", ", ", "]", myKv.getValue());
+    }
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/InProcessCreateTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/InProcessCreateTest.java
new file mode 100644
index 0000000000000..2b685dce05a97
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/InProcessCreateTest.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+
+import static org.junit.Assert.fail;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.hamcrest.Matchers;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+
+/**
+ * Tests for {@link InProcessCreate}.
+ */
+@RunWith(JUnit4.class)
+public class InProcessCreateTest {
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void testConvertsCreate() {
+    TestPipeline p = TestPipeline.create();
+    Create.Values<Integer> og = Create.of(1, 2, 3);
+
+    InProcessCreate<Integer> converted = InProcessCreate.from(og);
+
+    DataflowAssert.that(p.apply(converted)).containsInAnyOrder(2, 1, 3);
+  }
+
+  static class Record implements Serializable {}
+
+  static class Record2 extends Record {}
+
+  @Test
+  public void testThrowsIllegalArgumentWhenCannotInferCoder() {
+    Create.Values<Record> og = Create.of(new Record(), new Record2());
+    InProcessCreate<Record> converted = InProcessCreate.from(og);
+
+    Pipeline p = TestPipeline.create();
+
+    // Create won't infer a default coder in this case.
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage(Matchers.containsString("Unable to infer a coder"));
+
+    PCollection<Record> c = p.apply(converted);
+    p.run();
+
+    fail("Unexpectedly Inferred Coder " + c.getCoder());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactoryTest.java
new file mode 100644
index 0000000000000..b5b3823a04d38
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactoryTest.java
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+
+import static org.hamcrest.core.IsEqual.equalTo;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.ParDo.BoundMulti;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.cloud.dataflow.sdk.values.TupleTagList;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+
+/**
+ * Tests for {@link ParDoMultiEvaluatorFactory}.
+ */
+@RunWith(JUnit4.class)
+public class ParDoMultiEvaluatorFactoryTest implements Serializable {
+  @Test
+  public void testParDoMultiInMemoryTransformEvaluator() throws Exception {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of("foo", "bara", "bazam"));
+
+    TupleTag<KV<String, Integer>> mainOutputTag = new TupleTag<KV<String, Integer>>() {};
+    final TupleTag<String> elementTag = new TupleTag<>();
+    final TupleTag<Integer> lengthTag = new TupleTag<>();
+
+    BoundMulti<String, KV<String, Integer>> pardo =
+        ParDo.of(new DoFn<String, KV<String, Integer>>() {
+          @Override
+          public void processElement(ProcessContext c) {
+            c.output(KV.<String, Integer>of(c.element(), c.element().length()));
+            c.sideOutput(elementTag, c.element());
+            c.sideOutput(lengthTag, c.element().length());
+          }
+        }).withOutputTags(mainOutputTag, TupleTagList.of(elementTag).and(lengthTag));
+    PCollectionTuple outputTuple = input.apply(pardo);
+
+    Bundle<String> inputBundle = InProcessBundle.unkeyed(input);
+
+    PCollection<KV<String, Integer>> mainOutput = outputTuple.get(mainOutputTag);
+    PCollection<String> elementOutput = outputTuple.get(elementTag);
+    PCollection<Integer> lengthOutput = outputTuple.get(lengthTag);
+
+    InProcessEvaluationContext evaluationContext = mock(InProcessEvaluationContext.class);
+    Bundle<KV<String, Integer>> mainOutputBundle = InProcessBundle.unkeyed(mainOutput);
+    Bundle<String> elementOutputBundle = InProcessBundle.unkeyed(elementOutput);
+    Bundle<Integer> lengthOutputBundle = InProcessBundle.unkeyed(lengthOutput);
+
+    when(evaluationContext.createBundle(inputBundle, mainOutput)).thenReturn(mainOutputBundle);
+    when(evaluationContext.createBundle(inputBundle, elementOutput))
+        .thenReturn(elementOutputBundle);
+    when(evaluationContext.createBundle(inputBundle, lengthOutput)).thenReturn(lengthOutputBundle);
+
+    InProcessExecutionContext executionContext = new InProcessExecutionContext();
+    when(evaluationContext.getExecutionContext(mainOutput.getProducingTransformInternal()))
+        .thenReturn(executionContext);
+    CounterSet counters = new CounterSet();
+    when(evaluationContext.createCounterSet()).thenReturn(counters);
+
+    com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator<String> evaluator =
+        new ParDoMultiEvaluatorFactory().forApplication(
+            mainOutput.getProducingTransformInternal(), inputBundle, evaluationContext);
+
+    evaluator.processElement(WindowedValue.valueInGlobalWindow("foo"));
+    evaluator.processElement(
+        WindowedValue.timestampedValueInGlobalWindow("bara", new Instant(1000)));
+    evaluator.processElement(
+        WindowedValue.valueInGlobalWindow("bazam", PaneInfo.ON_TIME_AND_ONLY_FIRING));
+
+    InProcessTransformResult result = evaluator.finishBundle();
+    assertThat(
+        result.getBundles(),
+        Matchers.<Bundle<?>>containsInAnyOrder(
+            lengthOutputBundle, mainOutputBundle, elementOutputBundle));
+    assertThat(result.getWatermarkHold(), equalTo(BoundedWindow.TIMESTAMP_MAX_VALUE));
+    assertThat(result.getCounters(), equalTo(counters));
+
+    assertThat(
+        mainOutputBundle.getElements(),
+        Matchers.<WindowedValue<KV<String, Integer>>>containsInAnyOrder(
+            WindowedValue.valueInGlobalWindow(KV.of("foo", 3)),
+            WindowedValue.timestampedValueInGlobalWindow(KV.of("bara", 4), new Instant(1000)),
+            WindowedValue.valueInGlobalWindow(
+                KV.of("bazam", 5), PaneInfo.ON_TIME_AND_ONLY_FIRING)));
+    assertThat(
+        elementOutputBundle.getElements(),
+        Matchers.<WindowedValue<String>>containsInAnyOrder(
+            WindowedValue.valueInGlobalWindow("foo"),
+            WindowedValue.timestampedValueInGlobalWindow("bara", new Instant(1000)),
+            WindowedValue.valueInGlobalWindow("bazam", PaneInfo.ON_TIME_AND_ONLY_FIRING)));
+    assertThat(
+        lengthOutputBundle.getElements(),
+        Matchers.<WindowedValue<Integer>>containsInAnyOrder(
+            WindowedValue.valueInGlobalWindow(3),
+            WindowedValue.timestampedValueInGlobalWindow(4, new Instant(1000)),
+            WindowedValue.valueInGlobalWindow(5, PaneInfo.ON_TIME_AND_ONLY_FIRING)));
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactoryTest.java
new file mode 100644
index 0000000000000..07d08d1fadb9f
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactoryTest.java
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+
+import static org.hamcrest.core.IsEqual.equalTo;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.hamcrest.Matchers;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+
+/**
+ * Tests for {@link ParDoSingleEvaluatorFactory}.
+ */
+@RunWith(JUnit4.class)
+public class ParDoSingleEvaluatorFactoryTest implements Serializable {
+  @Test
+  public void testParDoInMemoryTransformEvaluator() throws Exception {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of("foo", "bara", "bazam"));
+    PCollection<Integer> collection = input.apply(ParDo.of(new DoFn<String, Integer>() {
+      @Override public void processElement(ProcessContext c) {
+        c.output(c.element().length());
+      }
+    }));
+    Bundle<String> inputBundle = InProcessBundle.unkeyed(input);
+
+    InProcessEvaluationContext evaluationContext = mock(InProcessEvaluationContext.class);
+    Bundle<Integer> outputBundle = InProcessBundle.unkeyed(collection);
+    when(evaluationContext.createBundle(inputBundle, collection)).thenReturn(outputBundle);
+    InProcessExecutionContext executionContext = new InProcessExecutionContext();
+    when(evaluationContext.getExecutionContext(collection.getProducingTransformInternal()))
+        .thenReturn(executionContext);
+    CounterSet counters = new CounterSet();
+    when(evaluationContext.createCounterSet()).thenReturn(counters);
+
+    com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator<String> evaluator =
+        new ParDoSingleEvaluatorFactory().forApplication(
+            collection.getProducingTransformInternal(), inputBundle, evaluationContext);
+
+    evaluator.processElement(WindowedValue.valueInGlobalWindow("foo"));
+    evaluator.processElement(
+        WindowedValue.timestampedValueInGlobalWindow("bara", new Instant(1000)));
+    evaluator.processElement(
+        WindowedValue.valueInGlobalWindow("bazam", PaneInfo.ON_TIME_AND_ONLY_FIRING));
+
+    InProcessTransformResult result = evaluator.finishBundle();
+    assertThat(result.getBundles(), Matchers.<Bundle<?>>contains(outputBundle));
+    assertThat(result.getWatermarkHold(), equalTo(BoundedWindow.TIMESTAMP_MAX_VALUE));
+    assertThat(result.getCounters(), equalTo(counters));
+
+    assertThat(
+        outputBundle.getElements(),
+        Matchers.<WindowedValue<Integer>>containsInAnyOrder(
+            WindowedValue.valueInGlobalWindow(3),
+            WindowedValue.timestampedValueInGlobalWindow(4, new Instant(1000)),
+            WindowedValue.valueInGlobalWindow(5, PaneInfo.ON_TIME_AND_ONLY_FIRING)));
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactoryTest.java
new file mode 100644
index 0000000000000..bc3bd76daf536
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactoryTest.java
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.io.CountingSource;
+import com.google.cloud.dataflow.sdk.io.Read;
+import com.google.cloud.dataflow.sdk.io.UnboundedSource;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
+import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.hamcrest.Matchers;
+import org.joda.time.DateTime;
+import org.joda.time.Instant;
+import org.joda.time.ReadableInstant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+/**
+ * Tests for {@link UnboundedReadEvaluatorFactory}.
+ */
+@RunWith(JUnit4.class)
+public class UnboundedReadEvaluatorFactoryTest {
+  @Test
+  public void unboundedSourceInMemoryTransformEvaluatorProducesElements() throws Exception {
+    UnboundedSource<Long, ?> source =
+        CountingSource.unboundedWithTimestampFn(new LongToInstantFn());
+    TestPipeline p = TestPipeline.create();
+    PCollection<Long> longs = p.apply(Read.from(source));
+
+    TransformEvaluatorFactory factory = new UnboundedReadEvaluatorFactory();
+    InProcessEvaluationContext context = mock(InProcessEvaluationContext.class);
+    Bundle<Long> output = InProcessBundle.unkeyed(longs);
+    when(context.createRootBundle(longs)).thenReturn(output);
+
+    TransformEvaluator<?> evaluator =
+        factory.forApplication(longs.getProducingTransformInternal(), null, context);
+    InProcessTransformResult result = evaluator.finishBundle();
+    assertThat(
+        result.getWatermarkHold(), Matchers.<ReadableInstant>lessThan(DateTime.now().toInstant()));
+    assertThat(
+        output.getElements(),
+        containsInAnyOrder(tgw(1L), tgw(2L), tgw(4L), tgw(8L), tgw(9L), tgw(7L), tgw(6L), tgw(5L),
+            tgw(3L), tgw(0L)));
+  }
+
+  @Test
+  public void unboundedSourceInMemoryTransformEvaluatorMultipleCalls() throws Exception {
+    UnboundedSource<Long, ?> source =
+        CountingSource.unboundedWithTimestampFn(new LongToInstantFn());
+    TestPipeline p = TestPipeline.create();
+    PCollection<Long> longs = p.apply(Read.from(source));
+
+    TransformEvaluatorFactory factory = new UnboundedReadEvaluatorFactory();
+    InProcessEvaluationContext context = mock(InProcessEvaluationContext.class);
+    Bundle<Long> output = InProcessBundle.unkeyed(longs);
+    when(context.createRootBundle(longs)).thenReturn(output);
+
+    TransformEvaluator<?> evaluator =
+        factory.forApplication(longs.getProducingTransformInternal(), null, context);
+    InProcessTransformResult result = evaluator.finishBundle();
+    assertThat(
+        result.getWatermarkHold(), Matchers.<ReadableInstant>lessThan(DateTime.now().toInstant()));
+    assertThat(
+        output.getElements(),
+        containsInAnyOrder(tgw(1L), tgw(2L), tgw(4L), tgw(8L), tgw(9L), tgw(7L), tgw(6L), tgw(5L),
+            tgw(3L), tgw(0L)));
+
+    Bundle<Long> secondOutput = InProcessBundle.unkeyed(longs);
+    when(context.createRootBundle(longs)).thenReturn(secondOutput);
+    TransformEvaluator<?> secondEvaluator =
+        factory.forApplication(longs.getProducingTransformInternal(), null, context);
+    InProcessTransformResult secondResult = secondEvaluator.finishBundle();
+    assertThat(
+        secondResult.getWatermarkHold(),
+        Matchers.<ReadableInstant>lessThan(DateTime.now().toInstant()));
+    assertThat(
+        secondOutput.getElements(),
+        containsInAnyOrder(tgw(11L), tgw(12L), tgw(14L), tgw(18L), tgw(19L), tgw(17L), tgw(16L),
+            tgw(15L), tgw(13L), tgw(10L)));
+    assertThat(
+        output.getElements(),
+        containsInAnyOrder(tgw(1L), tgw(2L), tgw(4L), tgw(8L), tgw(9L), tgw(7L), tgw(6L), tgw(5L),
+            tgw(3L), tgw(0L)));
+  }
+
+  /**
+   * A terse alias for producing timestamped longs in the {@link GlobalWindow}, where
+   * the timestamp is the epoch offset by the value of the element.
+   */
+  private static WindowedValue<Long> tgw(Long elem) {
+    return WindowedValue.timestampedValueInGlobalWindow(elem, new Instant(elem));
+  }
+
+  private static class LongToInstantFn implements SerializableFunction<Long, Instant> {
+    @Override
+    public Instant apply(Long input) {
+      return new Instant(input);
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactoryTest.java
new file mode 100644
index 0000000000000..50af094086558
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactoryTest.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.nullValue;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.PCollectionViewWriter;
+import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link ViewEvaluatorFactory}.
+ */
+@RunWith(JUnit4.class)
+public class ViewEvaluatorFactoryTest {
+  @Test
+  public void testInMemoryEvaluator() throws Exception {
+    TestPipeline p = TestPipeline.create();
+    PCollection<String> input = p.apply(Create.of("foo", "bar"));
+    PCollectionView<Iterable<String>> view = input.apply(View.<String>asIterable());
+
+    InProcessEvaluationContext context = mock(InProcessEvaluationContext.class);
+    TestViewWriter<String, Iterable<String>> viewWriter = new TestViewWriter<>();
+    when(context.createPCollectionViewWriter(input, view)).thenReturn(viewWriter);
+
+    Bundle<String> inputBundle = InProcessBundle.unkeyed(input);
+    TransformEvaluator<String> evaluator = new ViewEvaluatorFactory().forApplication(
+        view.getProducingTransformInternal(), inputBundle, context);
+
+    evaluator.processElement(WindowedValue.valueInGlobalWindow("foo"));
+    evaluator.processElement(WindowedValue.valueInGlobalWindow("bar"));
+    assertThat(viewWriter.latest, nullValue());
+
+    evaluator.finishBundle();
+    assertThat(
+        viewWriter.latest,
+        containsInAnyOrder(
+            WindowedValue.valueInGlobalWindow("foo"), WindowedValue.valueInGlobalWindow("bar")));
+  }
+
+  private static class TestViewWriter<ElemT, ViewT> implements PCollectionViewWriter<ElemT, ViewT> {
+    private Iterable<WindowedValue<ElemT>> latest;
+
+    @Override
+    public void add(Iterable<WindowedValue<ElemT>> values) {
+      latest = values;
+    }
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundleTest.java
new file mode 100644
index 0000000000000..3378e3bded346
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundleTest.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.nullValue;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.collect.ImmutableList;
+
+import org.hamcrest.Matcher;
+import org.hamcrest.Matchers;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+
+/**
+ * Tests for {@link InProcessBundle}.
+ */
+@RunWith(JUnit4.class)
+public class InProcessBundleTest {
+  @Test
+  public void unkeyedShouldCreateWithNullKey() {
+    PCollection<Integer> pcollection = TestPipeline.create().apply(Create.of(1));
+
+    InProcessBundle<Integer> bundle = InProcessBundle.unkeyed(pcollection);
+
+    assertThat(bundle.isKeyed(), is(false));
+    assertThat(bundle.getKey(), nullValue());
+  }
+
+  private void keyedCreateBundle(Object key) {
+    PCollection<Integer> pcollection = TestPipeline.create().apply(Create.of(1));
+
+    InProcessBundle<Integer> bundle = InProcessBundle.keyed(pcollection, key);
+
+    assertThat(bundle.isKeyed(), is(true));
+    assertThat(bundle.getKey(), equalTo(key));
+  }
+
+  @Test
+  public void keyedWithNullKeyShouldCreateKeyedBundle() {
+    keyedCreateBundle(null);
+  }
+
+  @Test
+  public void keyedWithKeyShouldCreateKeyedBundle() {
+    keyedCreateBundle(new Object());
+  }
+
+  private <T> void getElementsShouldHaveAddedElements(Iterable<WindowedValue<T>> elems) {
+    PCollection<T> pcollection = TestPipeline.create().apply(Create.<T>of());
+
+    InProcessBundle<T> bundle = InProcessBundle.unkeyed(pcollection);
+    Collection<Matcher<? super WindowedValue<T>>> expectations = new ArrayList<>();
+    for (WindowedValue<T> elem : elems) {
+      bundle.add(elem);
+      expectations.add(equalTo(elem));
+    }
+    Matcher<Iterable<? extends WindowedValue<T>>> containsMatcher =
+        Matchers.<WindowedValue<T>>containsInAnyOrder(expectations);
+    assertThat(bundle.getElements(), containsMatcher);
+  }
+
+  @Test
+  public void getElementsBeforeAddShouldReturnEmptyIterable() {
+    getElementsShouldHaveAddedElements(Collections.<WindowedValue<Integer>>emptyList());
+  }
+
+  @Test
+  public void getElementsAfterAddShouldReturnAddedElements() {
+    WindowedValue<Integer> firstValue = WindowedValue.valueInGlobalWindow(1);
+    WindowedValue<Integer> secondValue =
+        WindowedValue.timestampedValueInGlobalWindow(2, new Instant(1000L));
+
+    getElementsShouldHaveAddedElements(ImmutableList.of(firstValue, secondValue));
+  }
+}
+

From d0e28bc982f2e23e4b6dcd0ba0e320f1b3f7dafb Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 3 Feb 2016 17:52:13 -0800
Subject: [PATCH 1381/1541] Upgrade maven shade plugin to avoid infinite loop

Maven shade plugin issue fixed:
https://issues.apache.org/jira/browse/MSHADE-148

This is for ASF beam

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113803227
---
 examples/pom.xml | 2 +-
 sdk/pom.xml      | 2 +-
 worker/pom.xml   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index 0e7db3d59bf3c..8e708ced693e4 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -306,7 +306,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-shade-plugin</artifactId>
-        <version>2.3</version>
+        <version>2.4.1</version>
         <executions>
           <execution>
             <phase>package</phase>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 2c8b52b638f4e..24fc45cc40c3a 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -286,7 +286,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-shade-plugin</artifactId>
-        <version>2.3</version>
+        <version>2.4.1</version>
         <executions>
           <!-- In the first phase, we create a special classifier for a
                jar with no relocations. This classifier should not
diff --git a/worker/pom.xml b/worker/pom.xml
index 111301fda7227..1bab41bc615fd 100644
--- a/worker/pom.xml
+++ b/worker/pom.xml
@@ -123,7 +123,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-shade-plugin</artifactId>
-        <version>2.3</version>
+        <version>2.4.1</version>
         <executions>
           <!-- In the first phase, we pick dependencies and relocate them. -->
           <execution>

From 460dc2004057ca7b0d362c404055798f5cd79a22 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 3 Feb 2016 20:25:47 -0800
Subject: [PATCH 1382/1541] Add InMemoryWatermarkManager

This is responsible for tracking and updating watermarks of
PCollections.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113811735
---
 .../util/InMemoryWatermarkManager.java        | 568 ++++++++++++++++++
 .../util/InMemoryWatermarkManagerTest.java    | 500 +++++++++++++++
 2 files changed, 1068 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
new file mode 100644
index 0000000000000..8673956d69b15
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
@@ -0,0 +1,568 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PValue;
+import com.google.common.base.MoreObjects;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Ordering;
+import com.google.common.collect.SortedMultiset;
+import com.google.common.collect.TreeMultiset;
+
+import org.joda.time.Instant;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicReference;
+
+import javax.annotation.Nullable;
+
+/**
+ * Manages watermarks of {@link PCollection PCollections} and input and output watermarks of
+ * {@link AppliedPTransform AppliedPTransforms} to provide event-time and completion tracking for
+ * in-memory execution. {@link InMemoryWatermarkManager} is designed to update and return a
+ * consistent view of watermarks in the presence of concurrent updates.
+ *
+ * <p>An {@link InMemoryWatermarkManager} is provided with the collection of root
+ * {@link AppliedPTransform AppliedPTransforms} and a map of {@link PCollection PCollections} to
+ * all the {@link AppliedPTransform AppliedPTransforms} that consume them at construction time.
+ *
+ * <p>Whenever a root {@link AppliedPTransform transform} produces elements, the
+ * {@link InMemoryWatermarkManager} is provided with the produced elements and the output watermark
+ * of the producing {@link AppliedPTransform transform}. The
+ * {@link InMemoryWatermarkManager watermark manager} is responsible for computing the watermarks
+ * of all {@link AppliedPTransform transforms} that consume one or more
+ * {@link PCollection PCollections}.
+ *
+ * <p>Whenever a non-root {@link AppliedPTransform} finishes processing one or more in-flight
+ * elements (referred to as the input {@link Bundle bundle}), the following occurs atomically:
+ * <ul>
+ *  <li>All of the in-flight elements are removed from the collection of pending elements for the
+ *      {@link AppliedPTransform}.</li>
+ *  <li>All of the elements produced by the {@link AppliedPTransform} are added to the collection
+ *      of pending elements for each {@link AppliedPTransform} that consumes them.</li>
+ *  <li>The input watermark for the {@link AppliedPTransform} becomes the maximum value of
+ *    <ul>
+ *      <li>the previous input watermark</li>
+ *      <li>the minimum of
+ *        <ul>
+ *          <li>the timestamps of all currently pending elements</li>
+ *          <li>all input {@link PCollection} watermarks</li>
+ *        </ul>
+ *      </li>
+ *    </ul>
+ *  </li>
+ *  <li>The output watermark for the {@link AppliedPTransform} becomes the maximum of
+ *    <ul>
+ *      <li>the previous output watermark</li>
+ *      <li>the minimum of
+ *        <ul>
+ *          <li>the current input watermark</li>
+ *          <li>the current watermark holds</li>
+ *        </ul>
+ *      </li>
+ *    </ul>
+ *  </li>
+ *  <li>The watermark of the output {@link PCollection} can be advanced to the output watermark of
+ *      the {@link AppliedPTransform}</li>
+ *  <li>The watermark of all downstream {@link AppliedPTransform AppliedPTransforms} can be
+ *      advanced.</li>
+ * </ul>
+ *
+ * <p>The watermark of a {@link PCollection} is equal to the output watermark of the
+ * {@link AppliedPTransform} that produces it.
+ *
+ * <p>The watermarks for a {@link PTransform} are updated as follows when output is committed:<pre>
+ * Watermark_In'  = MAX(Watermark_In, MIN(U(TS_Pending), U(Watermark_InputPCollection)))
+ * Watermark_Out' = MAX(Watermark_Out, MIN(Watermark_In', U(StateHold)))
+ * Watermark_PCollection = Watermark_Out_ProducingPTransform
+ * </pre>
+ */
+public class InMemoryWatermarkManager {
+  /**
+   * The watermark of some {@link Pipeline} element, usually a {@link PTransform} or a
+   * {@link PCollection}.
+   *
+   * <p>A watermark is a monotonically increasing value, which represents the point up to which the
+   * system believes it has received all of the data. Data that arrives with a timestamp that is
+   * before the watermark is considered late. {@link BoundedWindow#TIMESTAMP_MAX_VALUE} is a special
+   * timestamp which indicates we have received all of the data and there will be no more on-time or
+   * late data. This value is represented by {@link InMemoryWatermarkManager#THE_END_OF_TIME}.
+   */
+  private static interface Watermark {
+    /**
+     * Returns the current value of this watermark.
+     */
+    Instant get();
+
+    /**
+     * Refreshes the value of this watermark from its input watermarks and watermark holds.
+     *
+     * @return true if the value of the watermark has changed (and thus dependent watermark must
+     *         also be updated
+     */
+    WatermarkUpdate refresh();
+
+  }
+
+  /**
+   * The result of computing a {@link Watermark}.
+   */
+  private static enum WatermarkUpdate {
+    /** The watermark is later than the value at the previous time it was computed. */
+    ADVANCED(true),
+    /** The watermark is equal to the value at the previous time it was computed. */
+    NO_CHANGE(false);
+
+    private final boolean advanced;
+
+    private WatermarkUpdate(boolean advanced) {
+      this.advanced = advanced;
+    }
+
+    public boolean isAdvanced() {
+      return advanced;
+    }
+
+    /**
+     * Returns the {@link WatermarkUpdate} based on the former and current
+     * {@link Instant timestamps}.
+     */
+    public static WatermarkUpdate fromTimestamps(Instant oldTime, Instant currentTime) {
+      if (currentTime.isAfter(oldTime)) {
+        return ADVANCED;
+      }
+      return NO_CHANGE;
+    }
+  }
+
+  /**
+   * The input {@link Watermark} of an {@link AppliedPTransform}.
+   *
+   * <p>At any point, the value of an {@link AppliedPTransformInputWatermark} is equal to the
+   * minimum watermark across all of its input {@link Watermark Watermarks}, and the minimum
+   * timestamp of all of the pending elements, restricted to be monotonically increasing.
+   *
+   * <p>See {@link #refresh()} for more information.
+   */
+  private static class AppliedPTransformInputWatermark implements Watermark {
+    private final Collection<? extends Watermark> inputWatermarks;
+    private final SortedMultiset<WindowedValue<?>> pendingElements;
+    private AtomicReference<Instant> currentWatermark;
+
+    public AppliedPTransformInputWatermark(Collection<? extends Watermark> inputWatermarks) {
+      this.inputWatermarks = inputWatermarks;
+      this.pendingElements = TreeMultiset.create(PENDING_ELEMENT_COMPARATOR);
+      currentWatermark = new AtomicReference<>(BoundedWindow.TIMESTAMP_MIN_VALUE);
+    }
+
+    @Override
+    public Instant get() {
+      return currentWatermark.get();
+    }
+
+    /**
+     * {@inheritDoc}.
+     *
+     * <p>When refresh is called, the value of the {@link AppliedPTransformInputWatermark} becomes
+     * equal to the maximum value of
+     * <ul>
+     *   <li>the previous input watermark</li>
+     *   <li>the minimum of
+     *     <ul>
+     *       <li>the timestamps of all currently pending elements</li>
+     *       <li>all input {@link PCollection} watermarks</li>
+     *     </ul>
+     *   </li>
+     * </ul>
+     */
+    @Override
+    public synchronized WatermarkUpdate refresh() {
+      Instant oldWatermark = currentWatermark.get();
+      Instant minInputWatermark = BoundedWindow.TIMESTAMP_MAX_VALUE;
+      for (Watermark inputWatermark : inputWatermarks) {
+        minInputWatermark = INSTANT_ORDERING.min(minInputWatermark, inputWatermark.get());
+      }
+      if (!pendingElements.isEmpty()) {
+        minInputWatermark = INSTANT_ORDERING.min(
+            minInputWatermark, pendingElements.firstEntry().getElement().getTimestamp());
+      }
+      Instant newWatermark = INSTANT_ORDERING.max(oldWatermark, minInputWatermark);
+      currentWatermark.set(newWatermark);
+      return WatermarkUpdate.fromTimestamps(oldWatermark, newWatermark);
+    }
+
+    public synchronized void addPending(Iterable<? extends WindowedValue<?>> newPending) {
+      for (WindowedValue<?> pendingElement : newPending) {
+        pendingElements.add(pendingElement);
+      }
+    }
+
+    public synchronized void removePending(Iterable<? extends WindowedValue<?>> finishedElements) {
+      for (WindowedValue<?> finishedElement : finishedElements) {
+        pendingElements.remove(finishedElement);
+      }
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(AppliedPTransformInputWatermark.class)
+          .add("pendingElements", pendingElements)
+          .add("currentWatermark", currentWatermark)
+          .toString();
+    }
+  }
+
+  /**
+   * The output {@link Watermark} of an {@link AppliedPTransform}.
+   *
+   * <p>The value of an {@link AppliedPTransformOutputWatermark} is equal to the minimum of the
+   * current watermark hold and the {@link AppliedPTransformInputWatermark} for the same
+   * {@link AppliedPTransform}, restricted to be monotonically increasing. See
+   * {@link #refresh()} for more information.
+   */
+  private static class AppliedPTransformOutputWatermark implements Watermark {
+    private final Watermark inputWatermark;
+    private Instant currentHold;
+    private AtomicReference<Instant> currentWatermark;
+
+    public AppliedPTransformOutputWatermark(AppliedPTransformInputWatermark inputWatermark) {
+      this.inputWatermark = inputWatermark;
+      currentHold = BoundedWindow.TIMESTAMP_MAX_VALUE;
+      currentWatermark = new AtomicReference<>(BoundedWindow.TIMESTAMP_MIN_VALUE);
+    }
+
+    public synchronized void setHold(Instant newHold) {
+      currentHold = newHold;
+    }
+
+    @Override
+    public Instant get() {
+      return currentWatermark.get();
+    }
+
+    /**
+     * {@inheritDoc}.
+     *
+     * <p>When refresh is called, the value of the {@link AppliedPTransformOutputWatermark} becomes
+     * equal to the maximum value of:
+     * <ul>
+     *   <li>the previous output watermark</li>
+     *   <li>the minimum of
+     *     <ul>
+     *       <li>the current input watermark</li>
+     *       <li>the current watermark holds</li>
+     *     </ul>
+     *   </li>
+     * </ul>
+     */
+    @Override
+    public synchronized WatermarkUpdate refresh() {
+      Instant oldWatermark = currentWatermark.get();
+      Instant newWatermark;
+      if (currentHold == null) {
+        newWatermark = inputWatermark.get();
+      } else {
+        newWatermark = INSTANT_ORDERING.min(inputWatermark.get(), currentHold);
+      }
+      newWatermark = INSTANT_ORDERING.max(oldWatermark, newWatermark);
+      currentWatermark.set(newWatermark);
+      return WatermarkUpdate.fromTimestamps(oldWatermark, newWatermark);
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(AppliedPTransformOutputWatermark.class)
+          .add("currentHold", currentHold)
+          .add("currentWatermark", currentWatermark)
+          .toString();
+    }
+  }
+
+  /**
+   * A {@code Watermark} that is after the latest time it is possible to represent in the global
+   * window. This is a distinguished value representing a complete {@link PTransform}.
+   */
+  private static final Watermark THE_END_OF_TIME = new Watermark() {
+    @Override
+    public WatermarkUpdate refresh() {
+      // THE_END_OF_TIME is a distinguished value that cannot be advanced.
+      return WatermarkUpdate.NO_CHANGE;
+    }
+
+    @Override
+    public Instant get() {
+      return BoundedWindow.TIMESTAMP_MAX_VALUE;
+    }
+  };
+
+  private static final Ordering<Instant> INSTANT_ORDERING = Ordering.natural();
+
+  /**
+   * An ordering that compares windowed values by timestamp, then arbitrarily. This ensures that
+   * {@link WindowedValue WindowedValues} will be sorted by timestamp, while two different
+   * {@link WindowedValue WindowedValues} with the same timestamp are not considered equal.
+   */
+  private static final Ordering<WindowedValue<? extends Object>> PENDING_ELEMENT_COMPARATOR =
+      (new WindowedValueByTimestampComparator()).compound(Ordering.arbitrary());
+
+  /**
+   * A map from each {@link PCollection} to all {@link AppliedPTransform PTransform applications}
+   * that consume that {@link PCollection}.
+   */
+  private final Map<PCollection<?>, Collection<AppliedPTransform<?, ?, ?>>> consumers;
+
+  /**
+   * The input and output watermark of each {@link AppliedPTransform}.
+   */
+  private final Map<AppliedPTransform<?, ?, ?>, TransformWatermarks> transformToWatermarks;
+
+  /**
+   * Creates a new {@link InMemoryWatermarkManager}. All watermarks within the newly created
+   * {@link InMemoryWatermarkManager} start at {@link BoundedWindow#TIMESTAMP_MIN_VALUE}, the
+   * minimum watermark, with no watermark holds or pending elements.
+   *
+   * @param rootTransforms the root-level transforms of the {@link Pipeline}
+   * @param consumers a mapping between each {@link PCollection} in the {@link Pipeline} to the
+   *                  transforms that consume it as a part of their input
+   */
+  public static InMemoryWatermarkManager create(
+      Collection<AppliedPTransform<?, ?, ?>> rootTransforms,
+      Map<PCollection<?>, Collection<AppliedPTransform<?, ?, ?>>> consumers) {
+    return new InMemoryWatermarkManager(rootTransforms, consumers);
+  }
+
+  private InMemoryWatermarkManager(
+      Collection<AppliedPTransform<?, ?, ?>> rootTransforms,
+      Map<PCollection<?>, Collection<AppliedPTransform<?, ?, ?>>> consumers) {
+    this.consumers = consumers;
+
+    transformToWatermarks = new HashMap<>();
+
+    for (AppliedPTransform<?, ?, ?> rootTransform : rootTransforms) {
+      getTransformWatermark(rootTransform);
+    }
+    for (Collection<AppliedPTransform<?, ?, ?>> intermediateTransforms : consumers.values()) {
+      for (AppliedPTransform<?, ?, ?> transform : intermediateTransforms) {
+        getTransformWatermark(transform);
+      }
+    }
+  }
+
+  private TransformWatermarks getTransformWatermark(AppliedPTransform<?, ?, ?> transform) {
+    TransformWatermarks wms = transformToWatermarks.get(transform);
+    if (wms == null) {
+      List<Watermark> inputCollectionWatermarks = getInputWatermarks(transform);
+      AppliedPTransformInputWatermark inputWatermark =
+          new AppliedPTransformInputWatermark(inputCollectionWatermarks);
+      AppliedPTransformOutputWatermark outputWatermark =
+          new AppliedPTransformOutputWatermark(inputWatermark);
+      wms = new TransformWatermarks(inputWatermark, outputWatermark);
+      transformToWatermarks.put(transform, wms);
+    }
+    return wms;
+  }
+
+  private List<Watermark> getInputWatermarks(AppliedPTransform<?, ?, ?> transform) {
+    ImmutableList.Builder<Watermark> inputWatermarksBuilder = ImmutableList.builder();
+    Collection<? extends PValue> inputs = transform.getInput().expand();
+    if (inputs.isEmpty()) {
+      inputWatermarksBuilder.add(THE_END_OF_TIME);
+    }
+    for (PValue pvalue : inputs) {
+      Watermark producerOutputWatermark =
+          getTransformWatermark(pvalue.getProducingTransformInternal()).outputWatermark();
+      inputWatermarksBuilder.add(producerOutputWatermark);
+    }
+    List<Watermark> inputCollectionWatermarks = inputWatermarksBuilder.build();
+    return inputCollectionWatermarks;
+  }
+
+  /**
+   * Gets the input and output watermarks for an {@link AppliedPTransform}. If the
+   * {@link AppliedPTransform PTransform} has not processed any elements, return a watermark of
+   * {@link BoundedWindow#TIMESTAMP_MIN_VALUE}.
+   *
+   * @return a snapshot of the input watermark and output watermark for the provided transform
+   */
+  public synchronized TransformWatermarks getWatermarks(AppliedPTransform<?, ?, ?> transform) {
+    return transformToWatermarks.get(transform);
+  }
+
+  /**
+   * Updates the output watermark of a transform that takes no input.
+   *
+   * <p>The output watermark of a transform that takes no input is determined by that transform, as
+   * there are no input {@link PCollection PCollections}.
+   *
+   * @param watermark the output watermark of the transform. If the transform has buffered input
+   *                  elements, the watermark should be the minimum of all buffered elements.
+   * @return both watermarks of the source transform
+   */
+  public TransformWatermarks updateOutputWatermark(
+      AppliedPTransform<?, ?, ?> transform, Iterable<Bundle<?>> outputs, Instant watermark) {
+    TransformWatermarks watermarks = getWatermarks(transform);
+    watermarks.outputWatermark().setHold(watermark);
+
+    for (Bundle<?> output : outputs) {
+      PCollection<?> pCollection = output.getPCollection();
+      for (AppliedPTransform<?, ?, ?> consumer : consumers.get(pCollection)) {
+        addPending(consumer, output.getElements());
+      }
+    }
+    refreshWatermarks(transform);
+    return watermarks;
+  }
+
+  /**
+   * Updates the watermarks of a transform with one or more inputs.
+   *
+   * <p>Each transform has two monotonically increasing watermarks: the input watermark, which can,
+   * at any time, be updated to equal:
+   * <pre>
+   * MAX(CurrentInputWatermark, MIN(PendingElements, InputPCollectionWatermarks))
+   * </pre>
+   * and the output watermark, which can, at any time, be updated to equal:
+   * <pre>
+   * MAX(CurrentOutputWatermark, MIN(InputWatermark, WatermarkHolds))
+   * </pre>.
+   *
+   * @param completed the input that has completed
+   * @param transform the transform that has completed processing the input
+   * @param outputs the bundles the transform has output
+   * @param earliestHold the earliest watermark hold in the transform's state. {@code null} if there
+   *                     is no hold
+   * @return the updated watermark of the transform
+   */
+  public TransformWatermarks updateWatermarks(Bundle<?> completed,
+      AppliedPTransform<?, ?, ?> transform, Collection<Bundle<?>> outputs,
+      @Nullable Instant earliestHold) {
+    updatePending(completed, transform, outputs);
+    TransformWatermarks transformWms = transformToWatermarks.get(transform);
+    transformWms.outputWatermark().setHold(earliestHold);
+    refreshWatermarks(transform);
+    return transformWms;
+  }
+
+  private void refreshWatermarks(AppliedPTransform<?, ?, ?> transform) {
+    TransformWatermarks myWatermarks = transformToWatermarks.get(transform);
+    WatermarkUpdate updateResult = myWatermarks.refresh();
+    if (updateResult.isAdvanced()) {
+      for (PValue outputPValue : transform.getOutput().expand()) {
+        Collection<AppliedPTransform<?, ?, ?>> downstreamTransforms = consumers.get(outputPValue);
+        if (downstreamTransforms != null) {
+          for (AppliedPTransform<?, ?, ?> downstreamTransform : downstreamTransforms) {
+            refreshWatermarks(downstreamTransform);
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Removes all elements consumed by the input bundle from the {@link PTransform PTransforms}
+   * collection of pending elements, and adds all elements produced by the {@link PTransform} to the
+   * pending queue of each consumer.
+   */
+  private void updatePending(
+      Bundle<?> input, AppliedPTransform<?, ?, ?> transform, Collection<Bundle<?>> outputs) {
+    AppliedPTransformInputWatermark inputWatermark =
+        transformToWatermarks.get(transform).inputWatermark();
+    inputWatermark.removePending(input.getElements());
+
+    for (Bundle<?> bundle : outputs) {
+      for (AppliedPTransform<?, ?, ?> consumer : consumers.get(bundle.getPCollection())) {
+        addPending(consumer, bundle.getElements());
+      }
+    }
+  }
+
+  /**
+   * Adds all of the provided {@link WindowedValue WindowedValues} to the collection of pending
+   * elements for the provided {@link AppliedPTransform}.
+   */
+  private void addPending(
+      AppliedPTransform<?, ?, ?> transform, Iterable<? extends WindowedValue<?>> pending) {
+    TransformWatermarks watermarks = transformToWatermarks.get(transform);
+    watermarks.inputWatermark().addPending(pending);
+  }
+
+  /**
+   * A reference to the input and output watermarks of an {@link AppliedPTransform}.
+   */
+  public class TransformWatermarks {
+    private final AppliedPTransformInputWatermark inputWatermark;
+    private final AppliedPTransformOutputWatermark outputWatermark;
+
+    private TransformWatermarks(
+        AppliedPTransformInputWatermark inputWatermark,
+        AppliedPTransformOutputWatermark outputWatermark) {
+      this.inputWatermark = inputWatermark;
+      this.outputWatermark = outputWatermark;
+    }
+
+    /**
+     * Returns the input watermark of the {@link AppliedPTransform}.
+     */
+    public Instant getInputWatermark() {
+      return inputWatermark.get();
+    }
+
+    /**
+     * Returns the output watermark of the {@link AppliedPTransform}.
+     */
+    public Instant getOutputWatermark() {
+      return outputWatermark.get();
+    }
+
+    AppliedPTransformInputWatermark inputWatermark() {
+      return inputWatermark;
+    }
+
+    AppliedPTransformOutputWatermark outputWatermark() {
+      return outputWatermark;
+    }
+
+    private WatermarkUpdate refresh() {
+      inputWatermark.refresh();
+      return outputWatermark.refresh();
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(TransformWatermarks.class)
+          .add("inputWatermark", inputWatermark)
+          .add("outputWatermark", outputWatermark)
+          .toString();
+    }
+  }
+
+  private static class WindowedValueByTimestampComparator extends Ordering<WindowedValue<?>> {
+    @Override
+    public int compare(WindowedValue<?> o1, WindowedValue<?> o2) {
+      return o1.getTimestamp().compareTo(o2.getTimestamp());
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java
new file mode 100644
index 0000000000000..aeb276c9ad416
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java
@@ -0,0 +1,500 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.not;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TransformWatermarks;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.Filter;
+import com.google.cloud.dataflow.sdk.transforms.Flatten;
+import com.google.cloud.dataflow.sdk.transforms.WithKeys;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.common.collect.ImmutableList;
+
+import org.hamcrest.BaseMatcher;
+import org.hamcrest.Description;
+import org.hamcrest.Matcher;
+import org.joda.time.Instant;
+import org.joda.time.ReadableInstant;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Tests for {@link InMemoryWatermarkManager}.
+ */
+@RunWith(JUnit4.class)
+public class InMemoryWatermarkManagerTest implements Serializable {
+  private PCollection<Integer> createdInts;
+
+  private PCollection<Integer> filtered;
+  private PCollection<KV<String, Integer>> keyed;
+
+  private PCollection<Integer> intsToFlatten;
+  private PCollection<Integer> flattened;
+
+  private InMemoryWatermarkManager manager;
+
+  @Before
+  public void setup() {
+    TestPipeline p = TestPipeline.create();
+
+    createdInts = p.apply("createdInts", Create.of(1, 2, 3));
+
+    filtered = createdInts.apply("filtered", Filter.greaterThan(1));
+    keyed = createdInts.apply("keyed", WithKeys.<String, Integer>of("MyKey"));
+
+    intsToFlatten = p.apply("intsToFlatten", Create.of(-1, 256, 65535));
+    PCollectionList<Integer> preFlatten = PCollectionList.of(createdInts).and(intsToFlatten);
+    flattened = preFlatten.apply("flattened", Flatten.<Integer>pCollections());
+
+    Collection<AppliedPTransform<?, ?, ?>> rootTransforms =
+        ImmutableList.<AppliedPTransform<?, ?, ?>>of(
+            createdInts.getProducingTransformInternal(),
+            intsToFlatten.getProducingTransformInternal());
+
+    Map<PCollection<?>, Collection<AppliedPTransform<?, ?, ?>>> consumers = new HashMap<>();
+    consumers.put(
+        createdInts,
+        ImmutableList.<AppliedPTransform<?, ?, ?>>of(filtered.getProducingTransformInternal(),
+            keyed.getProducingTransformInternal(), flattened.getProducingTransformInternal()));
+    consumers.put(filtered, Collections.<AppliedPTransform<?, ?, ?>>emptyList());
+    consumers.put(keyed, Collections.<AppliedPTransform<?, ?, ?>>emptyList());
+
+    consumers.put(
+        intsToFlatten,
+        Collections.<AppliedPTransform<?, ?, ?>>singleton(
+            flattened.getProducingTransformInternal()));
+    consumers.put(flattened, Collections.<AppliedPTransform<?, ?, ?>>emptyList());
+
+    manager = InMemoryWatermarkManager.create(rootTransforms, consumers);
+  }
+
+
+  /**
+   * Demonstrates that getWatermark, when called on an {@link AppliedPTransform} that has not
+   * processed any elements, returns the {@link BoundedWindow#TIMESTAMP_MIN_VALUE}.
+   */
+  @Test
+  public void getWatermarkForUntouchedTransform() {
+    TransformWatermarks watermarks =
+        manager.getWatermarks(createdInts.getProducingTransformInternal());
+
+    assertThat(watermarks.getInputWatermark(), equalTo(BoundedWindow.TIMESTAMP_MIN_VALUE));
+    assertThat(watermarks.getOutputWatermark(), equalTo(BoundedWindow.TIMESTAMP_MIN_VALUE));
+  }
+
+  /**
+   * Demonstrates that getWatermark for a transform that consumes no input uses the Watermark
+   * Hold value provided to it as the output watermark.
+   */
+  @Test
+  public void getWatermarkForUpdatedSourceTransform() {
+    Bundle<Integer> output = globallyWindowedBundle(createdInts, 1);
+
+    TransformWatermarks updatedSourceWatermark =
+        manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
+            Collections.<Bundle<?>>singleton(output), new Instant(8000L));
+
+    assertThat(updatedSourceWatermark.getOutputWatermark(), equalTo(new Instant(8000L)));
+  }
+
+  /**
+   * Demonstrates that getWatermark for a transform that takes multiple inputs is held to the
+   * minimum watermark across all of its inputs.
+   */
+  @Test
+  public void getWatermarkForMultiInputTransform() {
+    Bundle<Integer> secondPcollectionBundle = globallyWindowedBundle(intsToFlatten, -1);
+
+    manager.updateOutputWatermark(intsToFlatten.getProducingTransformInternal(),
+        Collections.<Bundle<?>>singleton(secondPcollectionBundle), new Instant(Long.MAX_VALUE));
+
+    // We didn't do anything for the first source, so we shouldn't have progressed the watermark
+    TransformWatermarks firstSourceWatermark =
+        manager.getWatermarks(createdInts.getProducingTransformInternal());
+    assertThat(
+        firstSourceWatermark.getOutputWatermark(),
+        not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
+
+    // the Second Source output all of the elements so it should be done (with a watermark at the
+    // end of time).
+    TransformWatermarks secondSourceWatermark =
+        manager.getWatermarks(intsToFlatten.getProducingTransformInternal());
+    assertThat(
+        secondSourceWatermark.getOutputWatermark(),
+        not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
+
+    // We haven't consumed anything yet, so our watermark should be at the beginning of time
+    TransformWatermarks transformWatermark =
+        manager.getWatermarks(flattened.getProducingTransformInternal());
+    assertThat(
+        transformWatermark.getInputWatermark(), not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
+    assertThat(
+        transformWatermark.getOutputWatermark(), not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
+
+    Bundle<Integer> flattenedBundleSecondCreate = globallyWindowedBundle(flattened, -1);
+    // We have finished processing the bundle from the second PCollection, but we haven't consumed
+    // anything from the first PCollection yet; so our watermark shouldn't advance
+    TransformWatermarks transformAfterProcessing =
+        manager.updateWatermarks(secondPcollectionBundle, flattened.getProducingTransformInternal(),
+            Collections.<Bundle<?>>singleton(flattenedBundleSecondCreate), null);
+    assertThat(
+        transformAfterProcessing.getInputWatermark(),
+        not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
+    assertThat(
+        transformAfterProcessing.getOutputWatermark(),
+        not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
+
+    Instant firstCollectionTimestamp = new Instant(10000);
+    Bundle<Integer> firstPcollectionBundle = timestampedBundle(
+        createdInts, TimestampedValue.<Integer>of(5, firstCollectionTimestamp));
+    // the source is done, but elements are still buffered. The source output watermark should be
+    // past the end of the global window
+    TransformWatermarks firstSourceWatermarks =
+        manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
+            Collections.<Bundle<?>>singleton(firstPcollectionBundle), new Instant(Long.MAX_VALUE));
+    assertThat(firstSourceWatermarks.getOutputWatermark(),
+        not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
+
+    // We still haven't consumed any of the first source's input, so the watermark should still not
+    // progress
+    TransformWatermarks flattenAfterSourcesProduced =
+        manager.getWatermarks(flattened.getProducingTransformInternal());
+    assertThat(
+        flattenAfterSourcesProduced.getInputWatermark(),
+        not(laterThan(firstCollectionTimestamp)));
+    assertThat(
+        flattenAfterSourcesProduced.getOutputWatermark(),
+        not(laterThan(firstCollectionTimestamp)));
+
+    // We have buffered inputs, but since the PCollection has all of the elements (has a WM past the
+    // end of the global window), we should have a watermark equal to the min among buffered
+    // elements
+    TransformWatermarks withBufferedElements =
+        manager.getWatermarks(flattened.getProducingTransformInternal());
+    assertThat(withBufferedElements.getInputWatermark(), equalTo(firstCollectionTimestamp));
+    assertThat(withBufferedElements.getOutputWatermark(), equalTo(firstCollectionTimestamp));
+
+    Bundle<?> completedFlattenBundle = InProcessBundle.unkeyed(flattened);
+    TransformWatermarks afterConsumingAllInput =
+        manager.updateWatermarks(firstPcollectionBundle, flattened.getProducingTransformInternal(),
+            Collections.<Bundle<?>>singleton(completedFlattenBundle), null);
+    assertThat(
+        afterConsumingAllInput.getInputWatermark(),
+        not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
+    assertThat(
+        afterConsumingAllInput.getOutputWatermark(),
+        not(laterThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
+  }
+
+  /**
+   * Demonstrates that pending elements are independent among
+   * {@link AppliedPTransform AppliedPTransforms} that consume the same input {@link PCollection}.
+   */
+  @Test
+  public void getWatermarkForMultiConsumedCollection() {
+    Bundle<Integer> createdBundle = timestampedBundle(createdInts,
+        TimestampedValue.of(1, new Instant(1_000_000L)), TimestampedValue.of(2, new Instant(1234L)),
+        TimestampedValue.of(3, new Instant(-1000L)));
+    TransformWatermarks createdAfterProducing =
+        manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
+            Collections.<Bundle<?>>singleton(createdBundle), new Instant(Long.MAX_VALUE));
+    assertThat(createdAfterProducing.getOutputWatermark(),
+        not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
+
+    Bundle<KV<String, Integer>> keyBundle =
+        timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)),
+            TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)),
+            TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
+    TransformWatermarks keyedWatermarks = manager.updateWatermarks(createdBundle,
+        keyed.getProducingTransformInternal(), Collections.<Bundle<?>>singleton(keyBundle), null);
+    assertThat(
+        keyedWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
+    assertThat(
+        keyedWatermarks.getOutputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
+
+    TransformWatermarks filteredWatermarks =
+        manager.getWatermarks(filtered.getProducingTransformInternal());
+    assertThat(filteredWatermarks.getInputWatermark(), not(laterThan(new Instant(-1000L))));
+    assertThat(
+        filteredWatermarks.getOutputWatermark(), not(laterThan(new Instant(-1000L))));
+
+    Bundle<Integer> filteredBundle = timestampedBundle(
+        filtered, TimestampedValue.of(2, new Instant(1234L)));
+    TransformWatermarks filteredProcessedWatermarks =
+        manager.updateWatermarks(createdBundle, filtered.getProducingTransformInternal(),
+            Collections.<Bundle<?>>singleton(filteredBundle), null);
+    assertThat(
+        filteredProcessedWatermarks.getInputWatermark(),
+        not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
+    assertThat(
+        filteredProcessedWatermarks.getOutputWatermark(),
+        not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
+  }
+
+  /**
+   * Demonstrates that the watermark of an {@link AppliedPTransform} is held to the provided
+   * watermark hold.
+   */
+  @Test
+  public void updateWatermarkWithWatermarkHolds() {
+    Bundle<Integer> createdBundle = timestampedBundle(createdInts,
+        TimestampedValue.of(1, new Instant(1_000_000L)),
+        TimestampedValue.of(2, new Instant(1234L)),
+        TimestampedValue.of(3, new Instant(-1000L)));
+    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
+        Collections.<Bundle<?>>singleton(createdBundle), new Instant(Long.MAX_VALUE));
+
+    Bundle<KV<String, Integer>> keyBundle = timestampedBundle(
+        keyed, TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)),
+        TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)),
+        TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
+    TransformWatermarks keyedWatermarks = manager.updateWatermarks(createdBundle,
+        keyed.getProducingTransformInternal(), Collections.<Bundle<?>>singleton(keyBundle),
+        new Instant(500L));
+    assertThat(
+        keyedWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
+    assertThat(keyedWatermarks.getOutputWatermark(), not(laterThan(new Instant(500L))));
+  }
+
+  /**
+   * Demonstrates that updated output watermarks are monotonic in the presence of late data, when
+   * called on an {@link AppliedPTransform} that consumes no input.
+   */
+  @Test
+  public void updateOutputWatermarkShouldBeMonotonic() {
+    Bundle<?> firstInput = InProcessBundle.unkeyed(createdInts);
+    TransformWatermarks firstWatermarks =
+        manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
+            Collections.<Bundle<?>>singleton(firstInput), new Instant(0L));
+    assertThat(firstWatermarks.getOutputWatermark(), equalTo(new Instant(0L)));
+
+    Bundle<?> secondInput = InProcessBundle.unkeyed(createdInts);
+    TransformWatermarks secondWatermarks =
+        manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
+            Collections.<Bundle<?>>singleton(secondInput), new Instant(-250L));
+    assertThat(secondWatermarks.getOutputWatermark(), not(earlierThan(new Instant(0L))));
+  }
+
+  /**
+   * Demonstrates that updated output watermarks are monotonic in the presence of watermark holds
+   * that become earlier than a previous watermark hold.
+   */
+  @Test
+  public void updateWatermarkWithHoldsShouldBeMonotonic() {
+    Bundle<Integer> createdBundle = timestampedBundle(
+        createdInts,
+        TimestampedValue.of(1, new Instant(1_000_000L)),
+        TimestampedValue.of(2, new Instant(1234L)),
+        TimestampedValue.of(3, new Instant(-1000L)));
+    manager.updateOutputWatermark(
+        createdInts.getProducingTransformInternal(),
+        Collections.<Bundle<?>>singleton(createdBundle),
+        new Instant(Long.MAX_VALUE));
+
+    Bundle<KV<String, Integer>> keyBundle = timestampedBundle(
+            keyed,
+            TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)),
+            TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)),
+            TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
+    TransformWatermarks keyedWatermarks =
+        manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
+            Collections.<Bundle<?>>singleton(keyBundle), new Instant(500L));
+    assertThat(
+        keyedWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
+    assertThat(keyedWatermarks.getOutputWatermark(), not(laterThan(new Instant(500L))));
+    Instant oldOutputWatermark = keyedWatermarks.getOutputWatermark();
+
+    TransformWatermarks updatedWatermarks =
+        manager.getWatermarks(keyed.getProducingTransformInternal());
+    assertThat(
+        updatedWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
+    // We added a hold prior to the old watermark; we shouldn't progress (due to the earlier hold)
+    // but the watermark is monotonic and should not backslide to the new, earlier hold
+    assertThat(
+        updatedWatermarks.getOutputWatermark(), equalTo(oldOutputWatermark));
+  }
+
+  /**
+   * Demonstrates that updateWatermarks in the presence of late data is monotonic.
+   */
+  @Test
+  public void updateWatermarkWithLateData() {
+    Instant sourceWatermark = new Instant(1_000_000L);
+    Bundle<Integer> createdBundle = timestampedBundle(createdInts,
+        TimestampedValue.of(1, sourceWatermark), TimestampedValue.of(2, new Instant(1234L)));
+
+    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
+        Collections.<Bundle<?>>singleton(createdBundle), sourceWatermark);
+
+    Bundle<KV<String, Integer>> keyBundle = timestampedBundle(
+        keyed,
+        TimestampedValue.of(KV.of("MyKey", 1), sourceWatermark),
+        TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)));
+
+    // Finish processing the on-time data. The watermarks should progress to be equal to the source
+    TransformWatermarks onTimeWatermarks =
+        manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
+            Collections.<Bundle<?>>singleton(keyBundle), null);
+    assertThat(onTimeWatermarks.getInputWatermark(), equalTo(sourceWatermark));
+    assertThat(onTimeWatermarks.getOutputWatermark(), equalTo(sourceWatermark));
+
+    Bundle<Integer> lateDataBundle = timestampedBundle(
+        createdInts, TimestampedValue.of(3, new Instant(-1000L)));
+
+    // the late data arrives in a downstream PCollection after its watermark has advanced past it;
+    // we don't advance the watermark past the current watermark until we've consumed the late data
+    TransformWatermarks bufferedLateWm =
+        manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
+           Collections.<Bundle<?>>singleton(lateDataBundle),
+            new Instant(2_000_000L));
+    assertThat(bufferedLateWm.getOutputWatermark(), equalTo(new Instant(2_000_000L)));
+
+    // The input watermark should be held to its previous value (not advanced due to late data; not
+    // moved backwards in the presence of watermarks due to monotonicity).
+    TransformWatermarks lateDataBufferedWatermark =
+        manager.getWatermarks(keyed.getProducingTransformInternal());
+    assertThat(lateDataBufferedWatermark.getInputWatermark(), not(earlierThan(sourceWatermark)));
+    assertThat(lateDataBufferedWatermark.getOutputWatermark(), not(earlierThan(sourceWatermark)));
+
+    Bundle<KV<String, Integer>> lateKeyedBundle = timestampedBundle(
+        keyed, TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
+    manager.updateWatermarks(lateDataBundle, keyed.getProducingTransformInternal(),
+        Collections.<Bundle<?>>singleton(lateKeyedBundle), null);
+  }
+
+  /**
+   * Demonstrates that after watermarks of an upstream transform are updated, but no output has been
+   * produced, the watermarks of a downstream process are advanced.
+   */
+  @Test
+  public void getWatermarksAfterOnlyEmptyOutput() {
+    Bundle<Integer> emptyCreateOutput = globallyWindowedBundle(createdInts);
+    TransformWatermarks updatedSourceWatermarks =
+        manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
+            Collections.<Bundle<?>>singleton(emptyCreateOutput), BoundedWindow.TIMESTAMP_MAX_VALUE);
+
+    assertThat(updatedSourceWatermarks.getOutputWatermark(),
+        not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
+
+    TransformWatermarks finishedFilterWatermarks =
+        manager.getWatermarks(filtered.getProducingTransformInternal());
+    assertThat(finishedFilterWatermarks.getInputWatermark(),
+        not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
+    assertThat(finishedFilterWatermarks.getOutputWatermark(),
+        not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
+  }
+
+  /**
+   * Demonstrates that after watermarks of an upstream transform are updated, but no output has been
+   * produced, and the downstream transform has a watermark hold, the watermark is held to the hold.
+   */
+  @Test
+  public void getWatermarksAfterHoldAndEmptyOutput() {
+    Bundle<Integer> firstCreateOutput = globallyWindowedBundle(createdInts, 1, 2);
+    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
+        Collections.<Bundle<?>>singleton(firstCreateOutput), new Instant(12_000L));
+
+    Bundle<Integer> firstFilterOutput = globallyWindowedBundle(filtered);
+    TransformWatermarks firstFilterWatermarks =
+        manager.updateWatermarks(firstCreateOutput, filtered.getProducingTransformInternal(),
+            Collections.<Bundle<?>>singleton(firstFilterOutput), new Instant(10_000L));
+    assertThat(firstFilterWatermarks.getInputWatermark(), not(earlierThan(new Instant(12_000L))));
+    assertThat(firstFilterWatermarks.getOutputWatermark(), not(laterThan(new Instant(10_000L))));
+
+    Bundle<Integer> emptyCreateOutput = globallyWindowedBundle(createdInts);
+    TransformWatermarks updatedSourceWatermarks =
+        manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
+            Collections.<Bundle<?>>singleton(emptyCreateOutput), BoundedWindow.TIMESTAMP_MAX_VALUE);
+
+    assertThat(updatedSourceWatermarks.getOutputWatermark(),
+        not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
+
+    TransformWatermarks finishedFilterWatermarks =
+        manager.getWatermarks(filtered.getProducingTransformInternal());
+    assertThat(finishedFilterWatermarks.getInputWatermark(),
+        not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
+    assertThat(finishedFilterWatermarks.getOutputWatermark(), not(laterThan(new Instant(10_000L))));
+  }
+
+  private static Matcher<Instant> earlierThan(final Instant laterInstant) {
+    return new BaseMatcher<Instant>() {
+      @Override
+      public boolean matches(Object item) {
+        ReadableInstant instant = (ReadableInstant) item;
+        return instant.isBefore(laterInstant);
+      }
+
+      @Override
+      public void describeTo(Description description) {
+        description.appendText("earlier than ").appendValue(laterInstant);
+      }
+    };
+  }
+
+  private static Matcher<Instant> laterThan(final Instant shouldBeEarlier) {
+    return new BaseMatcher<Instant>() {
+      @Override
+      public boolean matches(Object item) {
+        ReadableInstant instant = (ReadableInstant) item;
+        return instant.isAfter(shouldBeEarlier);
+      }
+
+      @Override
+      public void describeTo(Description description) {
+        description.appendText("later than ").appendValue(shouldBeEarlier);
+      }};
+  }
+
+  @SafeVarargs
+  private final <T> Bundle<T> timestampedBundle(
+      PCollection<T> pc, TimestampedValue<T>... values) {
+    Bundle<T> bundle = InProcessBundle.unkeyed(pc);
+    for (TimestampedValue<T> value : values) {
+      bundle.add(
+          WindowedValue.timestampedValueInGlobalWindow(value.getValue(), value.getTimestamp()));
+    }
+    return bundle;
+  }
+
+  @SafeVarargs
+  private final <T> Bundle<T> globallyWindowedBundle(PCollection<T> pc, T... values) {
+    Bundle<T> bundle = InProcessBundle.unkeyed(pc);
+    for (T value : values) {
+      bundle.add(WindowedValue.valueInGlobalWindow(value));
+    }
+    return bundle;
+  }
+}

From b88c2c449e0677cf8985d8e37ea3e15711b6667c Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 4 Feb 2016 09:37:39 -0800
Subject: [PATCH 1383/1541] Have worker maven module depend on
 unshaded/unrelocated test jar

Worker maven module when compiling/running tests, executes
before shading, so must depend on targets before relocation/repackaging.

This is for ASF Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113858878
---
 sdk/pom.xml    | 21 ++------------------
 worker/pom.xml | 53 +++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 50 insertions(+), 24 deletions(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 24fc45cc40c3a..2526c10b65821 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -288,24 +288,7 @@
         <artifactId>maven-shade-plugin</artifactId>
         <version>2.4.1</version>
         <executions>
-          <!-- In the first phase, we create a special classifier for a
-               jar with no relocations. This classifier should not
-               be depended on as it is considered deprecated and will
-               be removed. TODO: Remove once worker module does not
-               share Guava dependency. -->
-          <execution>
-            <id>internal-only</id>
-            <phase>package</phase>
-            <goals>
-              <goal>shade</goal>
-            </goals>
-            <configuration>
-              <shadedArtifactAttached>true</shadedArtifactAttached>
-              <shadedClassifierName>internal-only</shadedClassifierName>
-            </configuration>
-          </execution>
-
-          <!-- In the second phase, we pick dependencies and relocate them. -->
+          <!-- In the first phase, we pick dependencies and relocate them. -->
           <execution>
             <id>bundle-and-repackage</id>
             <phase>package</phase>
@@ -345,7 +328,7 @@
             </configuration>
           </execution>
 
-          <!-- In the third phase, we pick remaining dependencies and bundle
+          <!-- In the second phase, we pick remaining dependencies and bundle
                them without repackaging. -->
           <execution>
             <id>bundle-rest-without-repackaging</id>
diff --git a/worker/pom.xml b/worker/pom.xml
index 1bab41bc615fd..aff36240b534b 100644
--- a/worker/pom.xml
+++ b/worker/pom.xml
@@ -42,14 +42,53 @@
   </properties>
 
   <build>
+    <!-- TODO: Remove overrides once we don't need to generate the repackaged
+         source manually when the Guava dependency is broken between
+         the sdk and worker module. -->
+    <sourceDirectory>${project.build.directory}/generated/src/main/java</sourceDirectory>
+    <testSourceDirectory>${project.build.directory}/generated/src/test/java</testSourceDirectory>
+
     <plugins>
+      <!-- TODO: Swap to not using Guava from sdk once Guava dependency
+           can be broken between sdk and worker module. -->
+      <plugin>
+        <groupId>com.google.code.maven-replacer-plugin</groupId>
+        <artifactId>replacer</artifactId>
+        <version>1.5.3</version>
+        <executions>
+          <execution>
+            <phase>process-sources</phase>
+            <goals>
+              <goal>replace</goal>
+            </goals>
+          </execution>
+        </executions>
+        <configuration>
+          <basedir>${project.basedir}</basedir>
+          <outputBasedir>${project.build.directory}</outputBasedir>
+          <outputDir>generated</outputDir>
+          <includes>
+            <include>src/**/*.java</include>
+          </includes>
+          <replacements>
+            <replacement>
+              <token>import com.google.common.</token>
+              <value>import com.google.cloud.dataflow.sdk.repackaged.com.google.common.</value>
+            </replacement>
+            <replacement>
+              <token>import com.google.thirdparty.</token>
+              <value>import com.google.cloud.dataflow.sdk.repackaged.com.google.thirdparty.</value>
+            </replacement>
+          </replacements>
+        </configuration>
+      </plugin>
+
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-compiler-plugin</artifactId>
       </plugin>
 
-      <!-- Run CheckStyle pass on transforms, as they are release in
-           source form. -->
+      <!-- Run CheckStyle. -->
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-checkstyle-plugin</artifactId>
@@ -68,6 +107,10 @@
           <includeResources>false</includeResources>
           <includeTestSourceDirectory>true</includeTestSourceDirectory>
           <excludes>${project.build.directory}/generated-test-sources/**</excludes>
+          <!-- TODO: Remove checkstyle override for source directories once maven replacer
+               is removed. -->
+          <sourceDirectory>${project.build.directory}/src/main/java</sourceDirectory>
+          <testSourceDirectory>${project.build.directory}/src/test/java</testSourceDirectory>
         </configuration>
         <executions>
           <execution>
@@ -155,11 +198,11 @@
                      the second relocation. -->
                 <relocation>
                   <pattern>com.google.common</pattern>
-                  <shadedPattern>com.google.cloud.dataflow.sdk.repackaged.com.google.common</shadedPattern>
+                  <shadedPattern>com.google.cloud.dataflow.worker.repackaged.com.google.common</shadedPattern>
                 </relocation>
                 <relocation>
                   <pattern>com.google.thirdparty</pattern>
-                  <shadedPattern>com.google.cloud.dataflow.sdk.repackaged.com.google.thirdparty</shadedPattern>
+                  <shadedPattern>com.google.cloud.dataflow.worker.repackaged.com.google.thirdparty</shadedPattern>
                 </relocation>
               </relocations>
             </configuration>
@@ -209,7 +252,7 @@
       <groupId>com.google.cloud.dataflow</groupId>
       <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
       <version>${project.version}</version>
-      <classifier>internal-only</classifier>
+      <scope>provided</scope>
     </dependency>
 
     <dependency>

From 396e0cc1a0a29aa977023c4f5e017e6176883b83 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 4 Feb 2016 09:49:46 -0800
Subject: [PATCH 1384/1541] Separate CustomSources into worker and non-worker
 pieces

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113860002
---
 .../sdk/runners/dataflow/CustomSources.java   | 484 -----------
 .../sdk/runners/worker/ReaderFactory.java     |   6 +-
 .../worker/SourceOperationExecutor.java       |   5 +-
 .../runners/worker/WorkerCustomSources.java   | 572 +++++++++++++
 .../runners/dataflow/CustomSourcesTest.java   | 333 +-------
 .../worker/WorkerCustomSourcesTest.java       | 749 ++++++++++++++++++
 6 files changed, 1340 insertions(+), 809 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSources.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSourcesTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
index 81d607e469923..238ec4b0f3cd4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
@@ -16,29 +16,14 @@
 
 package com.google.cloud.dataflow.sdk.runners.dataflow;
 
-import static com.google.api.client.util.Base64.decodeBase64;
 import static com.google.api.client.util.Base64.encodeBase64String;
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceToDictionary;
-import static com.google.cloud.dataflow.sdk.util.SerializableUtils.deserializeFromByteArray;
 import static com.google.cloud.dataflow.sdk.util.SerializableUtils.serializeToByteArray;
 import static com.google.cloud.dataflow.sdk.util.Structs.addString;
 import static com.google.cloud.dataflow.sdk.util.Structs.addStringList;
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-import static com.google.cloud.dataflow.sdk.util.Structs.getStrings;
 import static com.google.common.base.Preconditions.checkArgument;
 
-import com.google.api.client.util.BackOff;
-import com.google.api.client.util.Base64;
-import com.google.api.services.dataflow.model.ApproximateReportedProgress;
-import com.google.api.services.dataflow.model.ApproximateSplitRequest;
-import com.google.api.services.dataflow.model.DerivedSource;
-import com.google.api.services.dataflow.model.DynamicSourceSplit;
 import com.google.api.services.dataflow.model.SourceMetadata;
-import com.google.api.services.dataflow.model.SourceOperationResponse;
-import com.google.api.services.dataflow.model.SourceSplitOptions;
-import com.google.api.services.dataflow.model.SourceSplitRequest;
-import com.google.api.services.dataflow.model.SourceSplitResponse;
-import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.io.BoundedSource;
 import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.Source;
@@ -47,35 +32,21 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.worker.DataflowApiUtils;
-import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory;
-import com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils;
-import com.google.cloud.dataflow.sdk.runners.worker.StreamingModeExecutionContext;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.cloud.dataflow.sdk.values.PValue;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.protobuf.ByteString;
 
 import org.joda.time.Duration;
-import org.joda.time.Instant;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Map;
-import java.util.NoSuchElementException;
 
-import javax.annotation.Nullable;
 
 /**
  * A helper class for supporting sources defined as {@code Source}.
@@ -86,7 +57,6 @@
 public class CustomSources {
   private static final String SERIALIZED_SOURCE = "serialized_source";
   @VisibleForTesting static final String SERIALIZED_SOURCE_SPLITS = "serialized_source_splits";
-  private static final long DEFAULT_DESIRED_BUNDLE_SIZE_BYTES = 64 * (1 << 20);
   /**
    * The current limit on the size of a ReportWorkItemStatus RPC to Google Cloud Dataflow, which
    * includes the initial splits, is 20 MB.
@@ -95,270 +65,12 @@ public class CustomSources {
 
   private static final Logger LOG = LoggerFactory.getLogger(CustomSources.class);
 
-  /**
-   * A {@code DynamicSplitResult} specified explicitly by a pair of {@code BoundedSource}
-   * objects describing the primary and residual sources.
-   */
-  public static final class BoundedSourceSplit<T> implements NativeReader.DynamicSplitResult {
-    public final BoundedSource<T> primary;
-    public final BoundedSource<T> residual;
-
-    public BoundedSourceSplit(BoundedSource<T> primary, BoundedSource<T> residual) {
-      this.primary = primary;
-      this.residual = residual;
-    }
-
-    @Override
-    public String toString() {
-      return String.format("<primary: %s; residual: %s>", primary, residual);
-    }
-  }
-
-  public static DynamicSourceSplit toSourceSplit(
-      BoundedSourceSplit<?> sourceSplitResult, PipelineOptions options) {
-    DynamicSourceSplit sourceSplit = new DynamicSourceSplit();
-    com.google.api.services.dataflow.model.Source primarySource;
-    com.google.api.services.dataflow.model.Source residualSource;
-    try {
-      primarySource = serializeToCloudSource(sourceSplitResult.primary, options);
-      residualSource = serializeToCloudSource(sourceSplitResult.residual, options);
-    } catch (Exception e) {
-      throw new RuntimeException("Failed to serialize one of the parts of the source split", e);
-    }
-    sourceSplit.setPrimary(
-        new DerivedSource()
-            .setDerivationMode("SOURCE_DERIVATION_MODE_INDEPENDENT")
-            .setSource(primarySource));
-    sourceSplit.setResidual(
-        new DerivedSource()
-            .setDerivationMode("SOURCE_DERIVATION_MODE_INDEPENDENT")
-            .setSource(residualSource));
-    return sourceSplit;
-  }
-
-  /**
-   * Executes a protocol-level split {@code SourceOperationRequest} for bounded sources
-   * by deserializing its source to a {@code BoundedSource}, splitting it, and
-   * serializing results back.
-   *
-   * <p>When the splits produced by this function are too large to be serialized to the Dataflow
-   * API, splitting is retried once with an increase in the desired bundle size. This change aims
-   * to work around API limitations on split size.
-   */
-  public static SourceOperationResponse performSplit(
-      SourceSplitRequest request, PipelineOptions options) throws Exception {
-    Source<?> anySource = deserializeFromCloudSource(request.getSource().getSpec());
-    checkArgument(
-        anySource instanceof BoundedSource, "Cannot split a non-Bounded source: %s", anySource);
-    BoundedSource<?> source = (BoundedSource<?>) anySource;
-
-    // Compute the desired bundle size given by the service, or default if none was provided.
-    long desiredBundleSizeBytes = DEFAULT_DESIRED_BUNDLE_SIZE_BYTES;
-    SourceSplitOptions splitOptions = request.getOptions();
-    if (splitOptions != null && splitOptions.getDesiredBundleSizeBytes() != null) {
-      desiredBundleSizeBytes = splitOptions.getDesiredBundleSizeBytes();
-    }
-
-    // Try generating initial splits normally.
-    SourceSplitResponse splits = performSplit(source, options, desiredBundleSizeBytes);
-    long serializedSize = DataflowApiUtils.computeSerializedSizeBytes(splits);
-
-    // If split response is too large, scale desired size for expected DATAFLOW_API_SIZE_BYTES/2.
-    if (serializedSize > DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES) {
-      double expansion = 2 * (double) serializedSize / DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES;
-      long expandedBundleSizeBytes = (long) (desiredBundleSizeBytes * expansion);
-      LOG.warn(
-          "Splitting source {} into bundles of estimated size {} bytes produced {} bundles, which"
-              + " have total serialized size {} bytes. As this is too large for the Google Cloud"
-              + " Dataflow API, retrying splitting once with increased desiredBundleSizeBytes {}"
-              + " to reduce the number of splits.",
-          source,
-          desiredBundleSizeBytes,
-          splits.getBundles().size(),
-          serializedSize,
-          expandedBundleSizeBytes);
-      splits = performSplit(source, options, expandedBundleSizeBytes);
-    }
-
-    return new SourceOperationResponse().setSplit(splits);
-  }
-
-  /**
-   * Factory to create a {@link CustomSources} from a Dataflow API
-   * source specification.
-   */
-  public static class Factory implements ReaderFactory {
-    @Override
-    public NativeReader<?> create(
-        CloudObject spec,
-        @Nullable Coder<?> coder,
-        @Nullable PipelineOptions options,
-        @Nullable ExecutionContext executionContext,
-        @Nullable CounterSet.AddCounterMutator addCounterMutator,
-        @Nullable String operationName)
-            throws Exception {
-      // The parameter "coder" is deliberately never used. It is an artifact of ReaderFactory:
-      // some readers need a coder, some don't (i.e. for some it doesn't even make sense),
-      // but ReaderFactory passes it to all readers anyway.
-      return CustomSources.create(spec, options, executionContext);
-    }
-  }
-
-  public static NativeReader<WindowedValue<?>> create(
-      final CloudObject spec, final PipelineOptions options, ExecutionContext executionContext)
-          throws Exception {
-
-    @SuppressWarnings("unchecked")
-    final Source<Object> source = (Source<Object>) deserializeFromCloudSource(spec);
-
-    if (source instanceof BoundedSource) {
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      NativeReader<WindowedValue<?>> reader =
-          (NativeReader)
-              new NativeReader<WindowedValue<Object>>() {
-                @Override
-                public NativeReaderIterator<WindowedValue<Object>> iterator() throws IOException {
-                  return new BoundedReaderIterator<>(
-                      ((BoundedSource<Object>) source).createReader(options));
-                }
-              };
-      return reader;
-    } else if (source instanceof UnboundedSource) {
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      NativeReader<WindowedValue<?>> reader =
-          (NativeReader)
-              new UnboundedReader<Object>(
-                  options, spec, (StreamingModeExecutionContext) executionContext);
-      return reader;
-    } else {
-      throw new IllegalArgumentException("Unexpected source kind: " + source.getClass());
-    }
-  }
-
   private static final ByteString firstSplitKey = ByteString.copyFromUtf8("0000000000000001");
 
   public static boolean isFirstUnboundedSourceSplit(ByteString splitKey) {
     return splitKey.equals(firstSplitKey);
   }
 
-  /**
-   * {@link NativeReader} for reading from {@link UnboundedSource UnboundedSources}.
-   */
-  private static class UnboundedReader<T>
-      extends NativeReader<WindowedValue<ValueWithRecordId<T>>> {
-    private final PipelineOptions options;
-    private final CloudObject spec;
-    private final StreamingModeExecutionContext context;
-
-    UnboundedReader(
-        PipelineOptions options, CloudObject spec, StreamingModeExecutionContext context) {
-      this.options = options;
-      this.spec = spec;
-      this.context = context;
-    }
-
-    @Override
-    @SuppressWarnings("unchecked")
-    public NativeReaderIterator<WindowedValue<ValueWithRecordId<T>>> iterator() {
-      UnboundedSource.UnboundedReader<T> reader =
-          (UnboundedSource.UnboundedReader<T>) context.getCachedReader();
-      final boolean started = reader != null;
-      if (reader == null) {
-        String key = context.getSerializedKey().toStringUtf8();
-        // Key is expected to be a zero-padded integer representing the split index.
-        int splitIndex = Integer.parseInt(key.substring(0, 16), 16) - 1;
-
-        UnboundedSource<T, UnboundedSource.CheckpointMark> splitSource = parseSource(splitIndex);
-
-        UnboundedSource.CheckpointMark checkpoint = null;
-        if (splitSource.getCheckpointMarkCoder() != null) {
-          checkpoint = context.getReaderCheckpoint(splitSource.getCheckpointMarkCoder());
-        }
-
-        reader = splitSource.createReader(options, checkpoint);
-      }
-
-      context.setActiveReader(reader);
-
-      return new UnboundedReaderIterator<>(reader, started);
-    }
-
-    @Override
-    public boolean supportsRestart() {
-      return true;
-    }
-
-    @SuppressWarnings("unchecked")
-    private UnboundedSource<T, UnboundedSource.CheckpointMark> parseSource(int index) {
-      List<String> serializedSplits = null;
-      try {
-        serializedSplits = getStrings(spec, SERIALIZED_SOURCE_SPLITS, null);
-      } catch (Exception e) {
-        throw new RuntimeException("Parsing serialized source splits failed: ", e);
-      }
-      checkArgument(serializedSplits != null, "UnboundedSource object did not contain splits");
-      checkArgument(
-          index < serializedSplits.size(),
-          "UnboundedSource splits contained too few splits.  Requested index was %s, size was %s",
-          index,
-          serializedSplits.size());
-      Object rawSource = deserializeFromByteArray(
-          decodeBase64(serializedSplits.get(index)), "UnboundedSource split");
-      if (!(rawSource instanceof UnboundedSource)) {
-        throw new IllegalArgumentException("Expected UnboundedSource, got " + rawSource.getClass());
-      }
-      return (UnboundedSource<T, UnboundedSource.CheckpointMark>) rawSource;
-    }
-  }
-
-  private static SourceSplitResponse performSplit(
-      BoundedSource<?> source, PipelineOptions options, long desiredBundleSizeBytes)
-      throws Exception {
-    LOG.debug("Splitting source {} into bundles of size {}", source, desiredBundleSizeBytes);
-
-    List<? extends BoundedSource<?>> bundles =
-        ((BoundedSource<?>) source).splitIntoBundles(desiredBundleSizeBytes, options);
-    List<DerivedSource> splits = new ArrayList<>(bundles.size());
-
-    // Produce simple independent, unsplittable bundles with no metadata attached.
-    LOG.debug("Splitting produced {} bundles", bundles.size());
-    for (BoundedSource<?> split : bundles) {
-      try {
-        split.validate();
-      } catch (Exception e) {
-        throw new IllegalArgumentException(
-            String.format(
-                "Splitting a valid source produced an invalid source."
-                    + "\nOriginal source: %s\nInvalid source: %s",
-                source,
-                split),
-            e);
-      }
-
-      splits.add(
-          new DerivedSource()
-              .setDerivationMode("SOURCE_DERIVATION_MODE_INDEPENDENT")
-              .setSource(serializeToCloudSource(split, options).setDoesNotNeedSplitting(true)));
-    }
-
-    // Return all the splits in the SourceSplitResponse.
-    return new SourceSplitResponse()
-        .setBundles(splits)
-        .setOutcome("SOURCE_SPLIT_OUTCOME_SPLITTING_HAPPENED");
-  }
-
-  private static Source<?> deserializeFromCloudSource(Map<String, Object> spec) throws Exception {
-    Source<?> source = (Source<?>) deserializeFromByteArray(
-        Base64.decodeBase64(getString(spec, SERIALIZED_SOURCE)), "Source");
-    try {
-      source.validate();
-    } catch (Exception e) {
-      LOG.error("Invalid source: {}", source, e);
-      throw e;
-    }
-    return source;
-  }
-
   private static int getDesiredNumUnboundedSourceSplits(DataflowPipelineOptions options) {
     if (options.getMaxNumWorkers() > 0) {
       return options.getMaxNumWorkers();
@@ -448,109 +160,6 @@ public static <T> void translateReadHelper(Source<T> source,
     }
   }
 
-  private static class BoundedReaderIterator<T>
-      extends NativeReader.NativeReaderIterator<WindowedValue<T>> {
-    private BoundedSource.BoundedReader<T> reader;
-
-    private BoundedReaderIterator(BoundedSource.BoundedReader<T> reader) {
-      this.reader = reader;
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      try {
-        return reader.start();
-      } catch (Exception e) {
-        throw new IOException(
-            "Failed to start reading from source: " + reader.getCurrentSource(), e);
-      }
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      try {
-        return reader.advance();
-      } catch (Exception e) {
-        throw new IOException(
-            "Failed to advance reader of source: " + reader.getCurrentSource(), e);
-      }
-    }
-
-    @Override
-    public WindowedValue<T> getCurrent() throws NoSuchElementException {
-      return WindowedValue.timestampedValueInGlobalWindow(
-          reader.getCurrent(), reader.getCurrentTimestamp());
-    }
-
-    @Override
-    public void close() throws IOException {
-      reader.close();
-    }
-
-    @Override
-    public NativeReader.Progress getProgress() {
-      ApproximateReportedProgress progress = new ApproximateReportedProgress();
-      Double fractionConsumed = reader.getFractionConsumed();
-      if (fractionConsumed != null) {
-        progress.setFractionConsumed(fractionConsumed);
-      }
-      return SourceTranslationUtils.cloudProgressToReaderProgress(progress);
-    }
-
-    @Override
-    public NativeReader.DynamicSplitResult requestDynamicSplit(
-        NativeReader.DynamicSplitRequest request) {
-      ApproximateSplitRequest stopPosition =
-          SourceTranslationUtils.splitRequestToApproximateSplitRequest(request);
-      Double fractionConsumed = stopPosition.getFractionConsumed();
-      if (fractionConsumed == null) {
-        // Only truncating at a fraction is currently supported.
-        LOG.info(
-            "Rejecting split request because custom sources only support splits at fraction: {}",
-            stopPosition);
-        return null;
-      }
-      BoundedSource<T> original = reader.getCurrentSource();
-      BoundedSource<T> residual = reader.splitAtFraction(fractionConsumed);
-      if (residual == null) {
-        LOG.info("Rejecting split request because custom reader returned null residual source.");
-        return null;
-      }
-      // Try to catch some potential subclass implementation errors early.
-      BoundedSource<T> primary = reader.getCurrentSource();
-      if (original == primary) {
-        throw new IllegalStateException(
-          "Successful split did not change the current source: primary is identical to original"
-          + " (Source objects MUST be immutable): " + primary);
-      }
-      if (original == residual) {
-        throw new IllegalStateException(
-          "Successful split did not change the current source: residual is identical to original"
-          + " (Source objects MUST be immutable): " + residual);
-      }
-      try {
-        primary.validate();
-      } catch (Exception e) {
-        throw new IllegalStateException(
-            "Successful split produced an illegal primary source. "
-            + "\nOriginal: " + original + "\nPrimary: " + primary + "\nResidual: " + residual);
-      }
-      try {
-        residual.validate();
-      } catch (Exception e) {
-        throw new IllegalStateException(
-            "Successful split produced an illegal residual source. "
-            + "\nOriginal: " + original + "\nPrimary: " + primary + "\nResidual: " + residual);
-      }
-      return new BoundedSourceSplit<T>(primary, residual);
-    }
-
-    @Override
-    public double getRemainingParallelism() {
-      return Double.NaN;
-    }
-  }
-
   // Commit at least once every 10 seconds or 10k records.  This keeps the watermark advancing
   // smoothly, and ensures that not too much work will have to be reprocessed in the event of
   // a crash.
@@ -558,97 +167,4 @@ public double getRemainingParallelism() {
   static final int MAX_UNBOUNDED_BUNDLE_SIZE = 10000;
   @VisibleForTesting
   static final Duration MAX_UNBOUNDED_BUNDLE_READ_TIME = Duration.standardSeconds(10);
-
-  private static class UnboundedReaderIterator<T>
-      extends NativeReader.NativeReaderIterator<WindowedValue<ValueWithRecordId<T>>> {
-    private final UnboundedSource.UnboundedReader<T> reader;
-    private final boolean started;
-    private final Instant endTime;
-    private int elemsRead;
-
-    private UnboundedReaderIterator(UnboundedSource.UnboundedReader<T> reader, boolean started) {
-      this.reader = reader;
-      this.endTime = Instant.now().plus(MAX_UNBOUNDED_BUNDLE_READ_TIME);
-      this.elemsRead = 0;
-      this.started = started;
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      if (started) {
-        // This is a reader that has been restored from the unbounded reader cache.
-        // It has already been started, so this call to start() should delegate
-        // to advance() instead.
-        return advance();
-      }
-      try {
-        if (!reader.start()) {
-          return false;
-        }
-      } catch (Exception e) {
-        throw new IOException(
-            "Failed to start reading from source: " + reader.getCurrentSource(), e);
-      }
-      elemsRead++;
-      return true;
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      if (elemsRead >= MAX_UNBOUNDED_BUNDLE_SIZE
-          || Instant.now().isAfter(endTime)) {
-        return false;
-      }
-
-      // Backoff starting at 100ms, for approximately 1s total. 100+150+225+337.5~=1000.
-      BackOff backoff = new AttemptBoundedExponentialBackOff(5, 100);
-      while (true) {
-        try {
-          if (reader.advance()) {
-            elemsRead++;
-            return true;
-          }
-        } catch (Exception e) {
-          throw new IOException("Failed to advance source: " + reader.getCurrentSource(), e);
-        }
-        long nextBackoff = backoff.nextBackOffMillis();
-        if (nextBackoff == BackOff.STOP) {
-          return false;
-        }
-        try {
-          Thread.sleep(nextBackoff);
-        } catch (InterruptedException e) {
-          // ignore.
-        }
-      }
-    }
-
-    @Override
-    public WindowedValue<ValueWithRecordId<T>> getCurrent() throws NoSuchElementException {
-      WindowedValue<T> result =
-          WindowedValue.timestampedValueInGlobalWindow(
-              reader.getCurrent(), reader.getCurrentTimestamp());
-      return result.withValue(
-          new ValueWithRecordId<>(result.getValue(), reader.getCurrentRecordId()));
-    }
-
-    @Override
-    public void close() {}
-
-    @Override
-    public NativeReader.Progress getProgress() {
-      return null;
-    }
-
-    @Override
-    public NativeReader.DynamicSplitResult requestDynamicSplit(
-        NativeReader.DynamicSplitRequest request) {
-      return null;
-    }
-
-    @Override
-    public double getRemainingParallelism() {
-      return Double.NaN;
-    }
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
index 5ea2490475b56..e5a810d7550b6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
@@ -19,7 +19,6 @@
 import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
@@ -97,7 +96,10 @@ public static Registry defaultRegistry() {
       // Custom sources
       factories.put(
           "com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources",
-          new CustomSources.Factory());
+          new WorkerCustomSources.Factory());
+      factories.put(
+          "com.google.cloud.dataflow.sdk.runners.dataflow.WorkerCustomSources",
+          new WorkerCustomSources.Factory());
 
       return new Registry(factories);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
index cdace38d2d14d..ad13e3a69a4e1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
@@ -20,7 +20,6 @@
 import com.google.api.services.dataflow.model.SourceOperationResponse;
 import com.google.api.services.dataflow.model.SourceSplitRequest;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
 
@@ -56,7 +55,7 @@ public void execute() throws Exception {
     LOG.debug("Executing source operation");
     SourceSplitRequest split = request.getSplit();
     if (split != null) {
-      this.response = CustomSources.performSplit(split, options);
+      this.response = WorkerCustomSources.performSplit(split, options);
     } else {
       throw new UnsupportedOperationException("Unsupported source operation request: " + request);
     }
@@ -71,7 +70,7 @@ static boolean isSplitResponseTooLarge(SourceOperationResponse operationResponse
     try {
       long splitResponseSize =
           DataflowApiUtils.computeSerializedSizeBytes(operationResponse.getSplit());
-      return splitResponseSize > CustomSources.DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES;
+      return splitResponseSize > WorkerCustomSources.DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES;
     } catch (IOException e) {
       /* Assume that the size is not too large, so that the actual API error is exposed. */
       LOG.warn("Error determining the size of the split response.", e);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSources.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSources.java
new file mode 100644
index 0000000000000..59da1f08054ce
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSources.java
@@ -0,0 +1,572 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.api.client.util.Base64.decodeBase64;
+import static com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources.serializeToCloudSource;
+import static com.google.cloud.dataflow.sdk.util.SerializableUtils.deserializeFromByteArray;
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+import static com.google.cloud.dataflow.sdk.util.Structs.getStrings;
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.google.api.client.util.BackOff;
+import com.google.api.client.util.Base64;
+import com.google.api.services.dataflow.model.ApproximateReportedProgress;
+import com.google.api.services.dataflow.model.ApproximateSplitRequest;
+import com.google.api.services.dataflow.model.DerivedSource;
+import com.google.api.services.dataflow.model.DynamicSourceSplit;
+import com.google.api.services.dataflow.model.SourceOperationResponse;
+import com.google.api.services.dataflow.model.SourceSplitOptions;
+import com.google.api.services.dataflow.model.SourceSplitRequest;
+import com.google.api.services.dataflow.model.SourceSplitResponse;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.io.BoundedSource;
+import com.google.cloud.dataflow.sdk.io.Read;
+import com.google.cloud.dataflow.sdk.io.Source;
+import com.google.cloud.dataflow.sdk.io.UnboundedSource;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.protobuf.ByteString;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+
+import javax.annotation.Nullable;
+
+
+/**
+ * A helper class for supporting sources defined as {@code Source}.
+ *
+ * <p>Provides a bridge between the high-level {@code Source} API and the
+ * low-level {@code CloudSource} class.
+ */
+public class WorkerCustomSources {
+  private static final String SERIALIZED_SOURCE = "serialized_source";
+  @VisibleForTesting static final String SERIALIZED_SOURCE_SPLITS = "serialized_source_splits";
+  private static final long DEFAULT_DESIRED_BUNDLE_SIZE_BYTES = 64 * (1 << 20);
+  /**
+   * The current limit on the size of a ReportWorkItemStatus RPC to Google Cloud Dataflow, which
+   * includes the initial splits, is 20 MB.
+   */
+  public static final long DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES = 20 * (1 << 20);
+
+  private static final Logger LOG = LoggerFactory.getLogger(WorkerCustomSources.class);
+
+  /**
+   * A {@code DynamicSplitResult} specified explicitly by a pair of {@code BoundedSource}
+   * objects describing the primary and residual sources.
+   */
+  public static final class BoundedSourceSplit<T> implements NativeReader.DynamicSplitResult {
+    public final BoundedSource<T> primary;
+    public final BoundedSource<T> residual;
+
+    public BoundedSourceSplit(BoundedSource<T> primary, BoundedSource<T> residual) {
+      this.primary = primary;
+      this.residual = residual;
+    }
+
+    @Override
+    public String toString() {
+      return String.format("<primary: %s; residual: %s>", primary, residual);
+    }
+  }
+
+  public static DynamicSourceSplit toSourceSplit(
+      BoundedSourceSplit<?> sourceSplitResult, PipelineOptions options) {
+    DynamicSourceSplit sourceSplit = new DynamicSourceSplit();
+    com.google.api.services.dataflow.model.Source primarySource;
+    com.google.api.services.dataflow.model.Source residualSource;
+    try {
+      primarySource = serializeToCloudSource(sourceSplitResult.primary, options);
+      residualSource = serializeToCloudSource(sourceSplitResult.residual, options);
+    } catch (Exception e) {
+      throw new RuntimeException("Failed to serialize one of the parts of the source split", e);
+    }
+    sourceSplit.setPrimary(
+        new DerivedSource()
+            .setDerivationMode("SOURCE_DERIVATION_MODE_INDEPENDENT")
+            .setSource(primarySource));
+    sourceSplit.setResidual(
+        new DerivedSource()
+            .setDerivationMode("SOURCE_DERIVATION_MODE_INDEPENDENT")
+            .setSource(residualSource));
+    return sourceSplit;
+  }
+
+  /**
+   * Executes a protocol-level split {@code SourceOperationRequest} for bounded sources
+   * by deserializing its source to a {@code BoundedSource}, splitting it, and
+   * serializing results back.
+   *
+   * <p>When the splits produced by this function are too large to be serialized to the Dataflow
+   * API, splitting is retried once with an increase in the desired bundle size. This change aims
+   * to work around API limitations on split size.
+   */
+  public static SourceOperationResponse performSplit(
+      SourceSplitRequest request, PipelineOptions options) throws Exception {
+    Source<?> anySource = deserializeFromCloudSource(request.getSource().getSpec());
+    checkArgument(
+        anySource instanceof BoundedSource, "Cannot split a non-Bounded source: %s", anySource);
+    BoundedSource<?> source = (BoundedSource<?>) anySource;
+
+    // Compute the desired bundle size given by the service, or default if none was provided.
+    long desiredBundleSizeBytes = DEFAULT_DESIRED_BUNDLE_SIZE_BYTES;
+    SourceSplitOptions splitOptions = request.getOptions();
+    if (splitOptions != null && splitOptions.getDesiredBundleSizeBytes() != null) {
+      desiredBundleSizeBytes = splitOptions.getDesiredBundleSizeBytes();
+    }
+
+    // Try generating initial splits normally.
+    SourceSplitResponse splits = performSplit(source, options, desiredBundleSizeBytes);
+    long serializedSize = DataflowApiUtils.computeSerializedSizeBytes(splits);
+
+    // If split response is too large, scale desired size for expected DATAFLOW_API_SIZE_BYTES/2.
+    if (serializedSize > DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES) {
+      double expansion = 2 * (double) serializedSize / DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES;
+      long expandedBundleSizeBytes = (long) (desiredBundleSizeBytes * expansion);
+      LOG.warn(
+          "Splitting source {} into bundles of estimated size {} bytes produced {} bundles, which"
+              + " have total serialized size {} bytes. As this is too large for the Google Cloud"
+              + " Dataflow API, retrying splitting once with increased desiredBundleSizeBytes {}"
+              + " to reduce the number of splits.",
+          source,
+          desiredBundleSizeBytes,
+          splits.getBundles().size(),
+          serializedSize,
+          expandedBundleSizeBytes);
+      splits = performSplit(source, options, expandedBundleSizeBytes);
+    }
+
+    return new SourceOperationResponse().setSplit(splits);
+  }
+
+  /**
+   * Factory to create a {@link WorkerCustomSources} from a Dataflow API
+   * source specification.
+   */
+  public static class Factory implements ReaderFactory {
+    @Override
+    public NativeReader<?> create(
+        CloudObject spec,
+        @Nullable Coder<?> coder,
+        @Nullable PipelineOptions options,
+        @Nullable ExecutionContext executionContext,
+        @Nullable CounterSet.AddCounterMutator addCounterMutator,
+        @Nullable String operationName)
+            throws Exception {
+      // The parameter "coder" is deliberately never used. It is an artifact of ReaderFactory:
+      // some readers need a coder, some don't (i.e. for some it doesn't even make sense),
+      // but ReaderFactory passes it to all readers anyway.
+      return WorkerCustomSources.create(spec, options, executionContext);
+    }
+  }
+
+  public static NativeReader<WindowedValue<?>> create(
+      final CloudObject spec, final PipelineOptions options, ExecutionContext executionContext)
+          throws Exception {
+
+    @SuppressWarnings("unchecked")
+    final Source<Object> source = (Source<Object>) deserializeFromCloudSource(spec);
+
+    if (source instanceof BoundedSource) {
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      NativeReader<WindowedValue<?>> reader =
+          (NativeReader)
+              new NativeReader<WindowedValue<Object>>() {
+                @Override
+                public NativeReaderIterator<WindowedValue<Object>> iterator() throws IOException {
+                  return new BoundedReaderIterator<>(
+                      ((BoundedSource<Object>) source).createReader(options));
+                }
+              };
+      return reader;
+    } else if (source instanceof UnboundedSource) {
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      NativeReader<WindowedValue<?>> reader =
+          (NativeReader)
+              new UnboundedReader<Object>(
+                  options, spec, (StreamingModeExecutionContext) executionContext);
+      return reader;
+    } else {
+      throw new IllegalArgumentException("Unexpected source kind: " + source.getClass());
+    }
+  }
+
+  private static final ByteString firstSplitKey = ByteString.copyFromUtf8("0000000000000001");
+
+  public static boolean isFirstUnboundedSourceSplit(ByteString splitKey) {
+    return splitKey.equals(firstSplitKey);
+  }
+
+  /**
+   * {@link NativeReader} for reading from {@link UnboundedSource UnboundedSources}.
+   */
+  private static class UnboundedReader<T>
+      extends NativeReader<WindowedValue<ValueWithRecordId<T>>> {
+    private final PipelineOptions options;
+    private final CloudObject spec;
+    private final StreamingModeExecutionContext context;
+
+    UnboundedReader(
+        PipelineOptions options, CloudObject spec, StreamingModeExecutionContext context) {
+      this.options = options;
+      this.spec = spec;
+      this.context = context;
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    public NativeReaderIterator<WindowedValue<ValueWithRecordId<T>>> iterator() {
+      UnboundedSource.UnboundedReader<T> reader =
+          (UnboundedSource.UnboundedReader<T>) context.getCachedReader();
+      final boolean started = reader != null;
+      if (reader == null) {
+        String key = context.getSerializedKey().toStringUtf8();
+        // Key is expected to be a zero-padded integer representing the split index.
+        int splitIndex = Integer.parseInt(key.substring(0, 16), 16) - 1;
+
+        UnboundedSource<T, UnboundedSource.CheckpointMark> splitSource = parseSource(splitIndex);
+
+        UnboundedSource.CheckpointMark checkpoint = null;
+        if (splitSource.getCheckpointMarkCoder() != null) {
+          checkpoint = context.getReaderCheckpoint(splitSource.getCheckpointMarkCoder());
+        }
+
+        reader = splitSource.createReader(options, checkpoint);
+      }
+
+      context.setActiveReader(reader);
+
+      return new UnboundedReaderIterator<>(reader, started);
+    }
+
+    @Override
+    public boolean supportsRestart() {
+      return true;
+    }
+
+    @SuppressWarnings("unchecked")
+    private UnboundedSource<T, UnboundedSource.CheckpointMark> parseSource(int index) {
+      List<String> serializedSplits = null;
+      try {
+        serializedSplits = getStrings(spec, SERIALIZED_SOURCE_SPLITS, null);
+      } catch (Exception e) {
+        throw new RuntimeException("Parsing serialized source splits failed: ", e);
+      }
+      checkArgument(serializedSplits != null, "UnboundedSource object did not contain splits");
+      checkArgument(
+          index < serializedSplits.size(),
+          "UnboundedSource splits contained too few splits.  Requested index was %s, size was %s",
+          index,
+          serializedSplits.size());
+      Object rawSource = deserializeFromByteArray(
+          decodeBase64(serializedSplits.get(index)), "UnboundedSource split");
+      if (!(rawSource instanceof UnboundedSource)) {
+        throw new IllegalArgumentException("Expected UnboundedSource, got " + rawSource.getClass());
+      }
+      return (UnboundedSource<T, UnboundedSource.CheckpointMark>) rawSource;
+    }
+  }
+
+  private static SourceSplitResponse performSplit(
+      BoundedSource<?> source, PipelineOptions options, long desiredBundleSizeBytes)
+      throws Exception {
+    LOG.debug("Splitting source {} into bundles of size {}", source, desiredBundleSizeBytes);
+
+    List<? extends BoundedSource<?>> bundles =
+        ((BoundedSource<?>) source).splitIntoBundles(desiredBundleSizeBytes, options);
+    List<DerivedSource> splits = new ArrayList<>(bundles.size());
+
+    // Produce simple independent, unsplittable bundles with no metadata attached.
+    LOG.debug("Splitting produced {} bundles", bundles.size());
+    for (BoundedSource<?> split : bundles) {
+      try {
+        split.validate();
+      } catch (Exception e) {
+        throw new IllegalArgumentException(
+            String.format(
+                "Splitting a valid source produced an invalid source."
+                    + "\nOriginal source: %s\nInvalid source: %s",
+                source,
+                split),
+            e);
+      }
+
+      splits.add(
+          new DerivedSource()
+              .setDerivationMode("SOURCE_DERIVATION_MODE_INDEPENDENT")
+              .setSource(serializeToCloudSource(split, options).setDoesNotNeedSplitting(true)));
+    }
+
+    // Return all the splits in the SourceSplitResponse.
+    return new SourceSplitResponse()
+        .setBundles(splits)
+        .setOutcome("SOURCE_SPLIT_OUTCOME_SPLITTING_HAPPENED");
+  }
+
+  private static Source<?> deserializeFromCloudSource(Map<String, Object> spec) throws Exception {
+    Source<?> source = (Source<?>) deserializeFromByteArray(
+        Base64.decodeBase64(getString(spec, SERIALIZED_SOURCE)), "Source");
+    try {
+      source.validate();
+    } catch (Exception e) {
+      LOG.error("Invalid source: {}", source, e);
+      throw e;
+    }
+    return source;
+  }
+
+  public static <T> void evaluateReadHelper(
+      Read.Bounded<T> transform, DirectPipelineRunner.EvaluationContext context) {
+    try {
+      List<DirectPipelineRunner.ValueWithMetadata<T>> output = new ArrayList<>();
+      BoundedSource<T> source = transform.getSource();
+      try (BoundedSource.BoundedReader<T> reader =
+          source.createReader(context.getPipelineOptions())) {
+        for (boolean available = reader.start(); available; available = reader.advance()) {
+          output.add(
+              DirectPipelineRunner.ValueWithMetadata.of(
+                  WindowedValue.timestampedValueInGlobalWindow(
+                      reader.getCurrent(), reader.getCurrentTimestamp())));
+        }
+      }
+      context.setPCollectionValuesWithMetadata(context.getOutput(transform), output);
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  private static class BoundedReaderIterator<T>
+      extends NativeReader.NativeReaderIterator<WindowedValue<T>> {
+    private BoundedSource.BoundedReader<T> reader;
+
+    private BoundedReaderIterator(BoundedSource.BoundedReader<T> reader) {
+      this.reader = reader;
+    }
+
+    @Override
+    public boolean start() throws IOException {
+      try {
+        return reader.start();
+      } catch (Exception e) {
+        throw new IOException(
+            "Failed to start reading from source: " + reader.getCurrentSource(), e);
+      }
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      try {
+        return reader.advance();
+      } catch (Exception e) {
+        throw new IOException(
+            "Failed to advance reader of source: " + reader.getCurrentSource(), e);
+      }
+    }
+
+    @Override
+    public WindowedValue<T> getCurrent() throws NoSuchElementException {
+      return WindowedValue.timestampedValueInGlobalWindow(
+          reader.getCurrent(), reader.getCurrentTimestamp());
+    }
+
+    @Override
+    public void close() throws IOException {
+      reader.close();
+    }
+
+    @Override
+    public NativeReader.Progress getProgress() {
+      ApproximateReportedProgress progress = new ApproximateReportedProgress();
+      Double fractionConsumed = reader.getFractionConsumed();
+      if (fractionConsumed != null) {
+        progress.setFractionConsumed(fractionConsumed);
+      }
+      return SourceTranslationUtils.cloudProgressToReaderProgress(progress);
+    }
+
+    @Override
+    public NativeReader.DynamicSplitResult requestDynamicSplit(
+        NativeReader.DynamicSplitRequest request) {
+      ApproximateSplitRequest stopPosition =
+          SourceTranslationUtils.splitRequestToApproximateSplitRequest(request);
+      Double fractionConsumed = stopPosition.getFractionConsumed();
+      if (fractionConsumed == null) {
+        // Only truncating at a fraction is currently supported.
+        LOG.info(
+            "Rejecting split request because custom sources only support splits at fraction: {}",
+            stopPosition);
+        return null;
+      }
+      BoundedSource<T> original = reader.getCurrentSource();
+      BoundedSource<T> residual = reader.splitAtFraction(fractionConsumed);
+      if (residual == null) {
+        LOG.info("Rejecting split request because custom reader returned null residual source.");
+        return null;
+      }
+      // Try to catch some potential subclass implementation errors early.
+      BoundedSource<T> primary = reader.getCurrentSource();
+      if (original == primary) {
+        throw new IllegalStateException(
+          "Successful split did not change the current source: primary is identical to original"
+          + " (Source objects MUST be immutable): " + primary);
+      }
+      if (original == residual) {
+        throw new IllegalStateException(
+          "Successful split did not change the current source: residual is identical to original"
+          + " (Source objects MUST be immutable): " + residual);
+      }
+      try {
+        primary.validate();
+      } catch (Exception e) {
+        throw new IllegalStateException(
+            "Successful split produced an illegal primary source. "
+            + "\nOriginal: " + original + "\nPrimary: " + primary + "\nResidual: " + residual);
+      }
+      try {
+        residual.validate();
+      } catch (Exception e) {
+        throw new IllegalStateException(
+            "Successful split produced an illegal residual source. "
+            + "\nOriginal: " + original + "\nPrimary: " + primary + "\nResidual: " + residual);
+      }
+      return new BoundedSourceSplit<T>(primary, residual);
+    }
+
+    @Override
+    public double getRemainingParallelism() {
+      return Double.NaN;
+    }
+  }
+
+  // Commit at least once every 10 seconds or 10k records.  This keeps the watermark advancing
+  // smoothly, and ensures that not too much work will have to be reprocessed in the event of
+  // a crash.
+  @VisibleForTesting
+  static final int MAX_UNBOUNDED_BUNDLE_SIZE = 10000;
+  @VisibleForTesting
+  static final Duration MAX_UNBOUNDED_BUNDLE_READ_TIME = Duration.standardSeconds(10);
+
+  private static class UnboundedReaderIterator<T>
+      extends NativeReader.NativeReaderIterator<WindowedValue<ValueWithRecordId<T>>> {
+    private final UnboundedSource.UnboundedReader<T> reader;
+    private final boolean started;
+    private final Instant endTime;
+    private int elemsRead;
+
+    private UnboundedReaderIterator(UnboundedSource.UnboundedReader<T> reader, boolean started) {
+      this.reader = reader;
+      this.endTime = Instant.now().plus(MAX_UNBOUNDED_BUNDLE_READ_TIME);
+      this.elemsRead = 0;
+      this.started = started;
+    }
+
+    @Override
+    public boolean start() throws IOException {
+      if (started) {
+        // This is a reader that has been restored from the unbounded reader cache.
+        // It has already been started, so this call to start() should delegate
+        // to advance() instead.
+        return advance();
+      }
+      try {
+        if (!reader.start()) {
+          return false;
+        }
+      } catch (Exception e) {
+        throw new IOException(
+            "Failed to start reading from source: " + reader.getCurrentSource(), e);
+      }
+      elemsRead++;
+      return true;
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      if (elemsRead >= MAX_UNBOUNDED_BUNDLE_SIZE
+          || Instant.now().isAfter(endTime)) {
+        return false;
+      }
+
+      // Backoff starting at 100ms, for approximately 1s total. 100+150+225+337.5~=1000.
+      BackOff backoff = new AttemptBoundedExponentialBackOff(5, 100);
+      while (true) {
+        try {
+          if (reader.advance()) {
+            elemsRead++;
+            return true;
+          }
+        } catch (Exception e) {
+          throw new IOException("Failed to advance source: " + reader.getCurrentSource(), e);
+        }
+        long nextBackoff = backoff.nextBackOffMillis();
+        if (nextBackoff == BackOff.STOP) {
+          return false;
+        }
+        try {
+          Thread.sleep(nextBackoff);
+        } catch (InterruptedException e) {
+          // ignore.
+        }
+      }
+    }
+
+    @Override
+    public WindowedValue<ValueWithRecordId<T>> getCurrent() throws NoSuchElementException {
+      WindowedValue<T> result =
+          WindowedValue.timestampedValueInGlobalWindow(
+              reader.getCurrent(), reader.getCurrentTimestamp());
+      return result.withValue(
+          new ValueWithRecordId<>(result.getValue(), reader.getCurrentRecordId()));
+    }
+
+    @Override
+    public void close() {}
+
+    @Override
+    public NativeReader.Progress getProgress() {
+      return null;
+    }
+
+    @Override
+    public NativeReader.DynamicSplitResult requestDynamicSplit(
+        NativeReader.DynamicSplitRequest request) {
+      return null;
+    }
+
+    @Override
+    public double getRemainingParallelism() {
+      return Double.NaN;
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
index a0f19012d3c68..569c4523f639c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
@@ -15,60 +15,34 @@
  */
 
 package com.google.cloud.dataflow.sdk.runners.dataflow;
-
-import static com.google.api.client.util.Base64.decodeBase64;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtFraction;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.dictionaryToCloudSource;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
 import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.readFromSource;
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
-import static com.google.cloud.dataflow.sdk.util.SerializableUtils.deserializeFromByteArray;
 import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
 import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
-import static com.google.cloud.dataflow.sdk.util.Structs.getStrings;
-import static com.google.cloud.dataflow.sdk.util.WindowedValue.valueInGlobalWindow;
 import static com.google.common.base.Throwables.getStackTraceAsString;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.Matchers.allOf;
 import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.containsString;
-import static org.hamcrest.Matchers.lessThan;
-import static org.hamcrest.Matchers.lessThanOrEqualTo;
-import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
-import static org.junit.internal.matchers.ThrowableMessageMatcher.hasMessage;
 
 import com.google.api.services.dataflow.model.DataflowPackage;
-import com.google.api.services.dataflow.model.DerivedSource;
 import com.google.api.services.dataflow.model.Job;
-import com.google.api.services.dataflow.model.SourceOperationResponse;
-import com.google.api.services.dataflow.model.SourceSplitOptions;
-import com.google.api.services.dataflow.model.SourceSplitRequest;
-import com.google.api.services.dataflow.model.SourceSplitResponse;
+import com.google.api.services.dataflow.model.Source;
 import com.google.api.services.dataflow.model.Step;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
 import com.google.cloud.dataflow.sdk.io.BoundedSource;
-import com.google.cloud.dataflow.sdk.io.CountingSource;
 import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
-import com.google.cloud.dataflow.sdk.runners.worker.DataflowApiUtils;
-import com.google.cloud.dataflow.sdk.runners.worker.ReaderCacheEntry;
-import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory;
-import com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils;
-import com.google.cloud.dataflow.sdk.runners.worker.StreamingModeExecutionContext;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
@@ -76,18 +50,12 @@
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.Preconditions;
-import com.google.protobuf.ByteString;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -103,13 +71,9 @@
 import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.TimeUnit;
-
-import javax.annotation.Nullable;
 
 /**
- * Tests for {@code BasicSerializableSourceFormat}.
+ * Tests for {@link CustomSources}.
  */
 @RunWith(JUnit4.class)
 public class CustomSourcesTest {
@@ -257,33 +221,6 @@ public Double getFractionConsumed() {
     }
   }
 
-  @Test
-  public void testSplitAndReadBundlesBack() throws Exception {
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    options.setNumWorkers(5);
-    com.google.api.services.dataflow.model.Source source =
-        translateIOToCloudSource(TestIO.fromRange(10, 20), options);
-    List<WindowedValue<Integer>> elems = CloudSourceUtils.readElemsFromSource(options, source);
-    assertEquals(10, elems.size());
-    for (int i = 0; i < 10; ++i) {
-      assertEquals(valueInGlobalWindow(10 + i), elems.get(i));
-    }
-    SourceSplitResponse response = performSplit(source, options, null /*desiredBundleSizeBytes*/);
-    assertEquals("SOURCE_SPLIT_OUTCOME_SPLITTING_HAPPENED", response.getOutcome());
-    List<DerivedSource> bundles = response.getBundles();
-    assertEquals(5, bundles.size());
-    for (int i = 0; i < 5; ++i) {
-      DerivedSource bundle = bundles.get(i);
-      assertEquals("SOURCE_DERIVATION_MODE_INDEPENDENT", bundle.getDerivationMode());
-      com.google.api.services.dataflow.model.Source bundleSource = bundle.getSource();
-      assertTrue(bundleSource.getDoesNotNeedSplitting());
-      bundleSource.setCodec(source.getCodec());
-      List<WindowedValue<Integer>> xs = CloudSourceUtils.readElemsFromSource(options, bundleSource);
-      assertThat(xs, contains(valueInGlobalWindow(10 + 2 * i), valueInGlobalWindow(11 + 2 * i)));
-    }
-  }
-
   @Test
   public void testDirectPipelineWithoutTimestamps() throws Exception {
     Pipeline p = TestPipeline.create();
@@ -351,65 +288,6 @@ public void testRangeProgressAndSplitAtFraction() throws Exception {
     }
   }
 
-  @Test
-  @SuppressWarnings("unchecked")
-  public void testProgressAndSourceSplitTranslation() throws Exception {
-    // Same as previous test, but now using BasicSerializableSourceFormat wrappers.
-    // We know that the underlying reader behaves correctly (because of the previous test),
-    // now check that we are wrapping it correctly.
-    DataflowPipelineOptions options = PipelineOptionsFactory.create()
-        .as(DataflowPipelineOptions.class);
-    NativeReader<WindowedValue<Integer>> reader =
-        (NativeReader<WindowedValue<Integer>>)
-            ReaderFactory.Registry.defaultRegistry()
-                .create(
-                    translateIOToCloudSource(TestIO.fromRange(10, 20), options),
-                    options,
-                    null, // executionContext
-                    null, // addCounterMutator
-                    null); // operationName
-    try (NativeReader.NativeReaderIterator<WindowedValue<Integer>> iterator = reader.iterator()) {
-      assertTrue(iterator.start());
-      assertEquals(
-          0.1,
-          readerProgressToCloudProgress(iterator.getProgress()).getFractionConsumed().doubleValue(),
-          1e-6);
-      assertEquals(valueInGlobalWindow(10), iterator.getCurrent());
-      assertEquals(
-          0.1,
-          readerProgressToCloudProgress(iterator.getProgress()).getFractionConsumed().doubleValue(),
-          1e-6);
-      assertTrue(iterator.advance());
-      assertEquals(valueInGlobalWindow(11), iterator.getCurrent());
-      assertEquals(
-          0.2,
-          readerProgressToCloudProgress(iterator.getProgress()).getFractionConsumed().doubleValue(),
-          1e-6);
-      assertTrue(iterator.advance());
-      assertEquals(valueInGlobalWindow(12), iterator.getCurrent());
-
-      assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(0)));
-      assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(0.1f)));
-      CustomSources.BoundedSourceSplit<Integer> sourceSplit =
-          (CustomSources.BoundedSourceSplit<Integer>)
-              iterator.requestDynamicSplit(splitRequestAtFraction(0.5f));
-      assertNotNull(sourceSplit);
-      assertThat(readFromSource(sourceSplit.primary, options), contains(10, 11, 12, 13, 14));
-      assertThat(readFromSource(sourceSplit.residual, options), contains(15, 16, 17, 18, 19));
-
-      sourceSplit =
-          (CustomSources.BoundedSourceSplit<Integer>)
-              iterator.requestDynamicSplit(splitRequestAtFraction(0.8f));
-      assertNotNull(sourceSplit);
-      assertThat(readFromSource(sourceSplit.primary, options), contains(10, 11, 12, 13));
-      assertThat(readFromSource(sourceSplit.residual, options), contains(14));
-
-      assertTrue(iterator.advance());
-      assertEquals(valueInGlobalWindow(13), iterator.getCurrent());
-      assertFalse(iterator.advance());
-    }
-  }
-
   /**
    * A source that cannot do anything. Intended to be overridden for testing of individual methods.
    */
@@ -449,52 +327,6 @@ public Coder<Integer> getDefaultOutputCoder() {
     }
   }
 
-  private static class SourceProducingInvalidSplits extends MockSource {
-    private String description;
-    private String errorMessage;
-
-    private SourceProducingInvalidSplits(String description, String errorMessage) {
-      this.description = description;
-      this.errorMessage = errorMessage;
-    }
-
-    @Override
-    public List<? extends BoundedSource<Integer>> splitIntoBundles(
-        long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
-      Preconditions.checkState(errorMessage == null, "Unexpected invalid source");
-      return Arrays.asList(
-          new SourceProducingInvalidSplits("goodBundle", null),
-          new SourceProducingInvalidSplits("badBundle", "intentionally invalid"));
-    }
-
-    @Override
-    public void validate() {
-      Preconditions.checkState(errorMessage == null, errorMessage);
-    }
-
-    @Override
-    public String toString() {
-      return description;
-    }
-  }
-
-  @Test
-  public void testSplittingProducedInvalidSource() throws Exception {
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    com.google.api.services.dataflow.model.Source cloudSource =
-        translateIOToCloudSource(new SourceProducingInvalidSplits("original", null), options);
-
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage(
-        allOf(
-            containsString("Splitting a valid source produced an invalid source"),
-            containsString("original"),
-            containsString("badBundle")));
-    expectedException.expectCause(hasMessage(containsString("intentionally invalid")));
-    performSplit(cloudSource, options, null /*desiredBundleSizeBytes*/);
-  }
-
   private static class FailingReader extends BoundedSource.BoundedReader<Integer> {
     private BoundedSource<Integer> source;
 
@@ -557,8 +389,7 @@ public String toString() {
   public void testFailureToStartReadingIncludesSourceDetails() throws Exception {
     DataflowPipelineOptions options =
         PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    com.google.api.services.dataflow.model.Source source =
-        translateIOToCloudSource(new SourceProducingFailingReader(), options);
+    Source source = translateIOToCloudSource(new SourceProducingFailingReader(), options);
     // Unfortunately Hamcrest doesn't have a matcher that can match on the exception's
     // printStackTrace(), however we just want to verify that the error and source description
     // would be contained in the exception *somewhere*, not necessarily in the top-level
@@ -573,7 +404,7 @@ public void testFailureToStartReadingIncludesSourceDetails() throws Exception {
     }
   }
 
-  private static com.google.api.services.dataflow.model.Source translateIOToCloudSource(
+  private static Source translateIOToCloudSource(
       BoundedSource<?> io, DataflowPipelineOptions options) throws Exception {
     DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
     Pipeline p = Pipeline.create(options);
@@ -585,9 +416,9 @@ private static com.google.api.services.dataflow.model.Source translateIOToCloudS
     return stepToCloudSource(step);
   }
 
-  private static com.google.api.services.dataflow.model.Source stepToCloudSource(Step step)
+  private static Source stepToCloudSource(Step step)
       throws Exception {
-    com.google.api.services.dataflow.model.Source res = dictionaryToCloudSource(
+    Source res = dictionaryToCloudSource(
         getDictionary(step.getProperties(), PropertyNames.SOURCE_STEP_INPUT));
     // Encoding is specified in the step, not in the source itself.  This is
     // normal: incoming Dataflow API Source objects in map tasks will have the
@@ -602,150 +433,12 @@ private static com.google.api.services.dataflow.model.Source stepToCloudSource(S
     return res;
   }
 
-  private static SourceSplitResponse performSplit(
-      com.google.api.services.dataflow.model.Source source,
-      PipelineOptions options,
-      @Nullable Long desiredBundleSizeBytes)
-          throws Exception {
-    SourceSplitRequest splitRequest = new SourceSplitRequest();
-    splitRequest.setSource(source);
-    if (desiredBundleSizeBytes != null) {
-      splitRequest.setOptions(
-          new SourceSplitOptions().setDesiredBundleSizeBytes(desiredBundleSizeBytes));
-    }
-    SourceOperationResponse response = CustomSources.performSplit(splitRequest, options);
-    return response.getSplit();
-  }
-
-  @Test
-  public void testUnboundedSplits() throws Exception {
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    com.google.api.services.dataflow.model.Source source =
-        CustomSources.serializeToCloudSource(new TestCountingSource(Integer.MAX_VALUE), options);
-    List<String> serializedSplits =
-        getStrings(source.getSpec(), CustomSources.SERIALIZED_SOURCE_SPLITS, null);
-    assertEquals(20, serializedSplits.size());
-    for (String serializedSplit : serializedSplits) {
-      assertTrue(
-          deserializeFromByteArray(decodeBase64(serializedSplit), "source")
-              instanceof TestCountingSource);
-    }
-  }
-
-  @Test
-  public void testReadUnboundedReader() throws Exception {
-    StreamingModeExecutionContext context = new StreamingModeExecutionContext("stageName",
-        new ConcurrentHashMap<ByteString, ReaderCacheEntry>(), /*stateNameMap=*/null,
-        /*stateCache=*/null);
-
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    options.setNumWorkers(5);
-
-    ByteString state = ByteString.EMPTY;
-    for (int i = 0; i < 10 * CustomSources.MAX_UNBOUNDED_BUNDLE_SIZE;
-         /* Incremented in inner loop */) {
-      // Initialize streaming context with state from previous iteration.
-      context.start(
-          Windmill.WorkItem.newBuilder()
-              .setKey(ByteString.copyFromUtf8("0000000000000001")) // key is zero-padded index.
-              .setWorkToken(0) // Required proto field, unused.
-              .setSourceState(
-                  Windmill.SourceState.newBuilder().setState(state).build()) // Source state.
-              .build(),
-          new Instant(0), // input watermark
-          null, // output watermark
-          null, // synchronized processing time
-          null, // StateReader
-          null, // StateFetcher
-          Windmill.WorkItemCommitRequest.newBuilder());
-
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      NativeReader<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>> reader =
-          (NativeReader)
-              CustomSources.create(
-                  (CloudObject)
-                      CustomSources.serializeToCloudSource(
-                              new TestCountingSource(Integer.MAX_VALUE), options)
-                          .getSpec(),
-                  options,
-                  context);
-
-      // Verify data.
-      Instant beforeReading = Instant.now();
-      int numReadOnThisIteration = 0;
-      for (WindowedValue<ValueWithRecordId<KV<Integer, Integer>>> value :
-          ReaderUtils.readAllFromReader(reader)) {
-        assertEquals(KV.of(0, i), value.getValue().getValue());
-        assertArrayEquals(
-            encodeToByteArray(KvCoder.of(VarIntCoder.of(), VarIntCoder.of()), KV.of(0, i)),
-            value.getValue().getId());
-        assertThat(value.getWindows(), contains((BoundedWindow) GlobalWindow.INSTANCE));
-        assertEquals(i, value.getTimestamp().getMillis());
-        i++;
-        numReadOnThisIteration++;
-      }
-      Instant afterReading = Instant.now();
-      assertThat(
-          new Duration(beforeReading, afterReading).getStandardSeconds(),
-          lessThanOrEqualTo(CustomSources.MAX_UNBOUNDED_BUNDLE_READ_TIME.getStandardSeconds() + 1));
-      assertThat(
-          numReadOnThisIteration, lessThanOrEqualTo(CustomSources.MAX_UNBOUNDED_BUNDLE_SIZE));
-
-      // Extract and verify state modifications.
-      context.flushState();
-      state = context.getOutputBuilder().getSourceStateUpdates().getState();
-      // CountingSource's watermark is the last record + 1.  i is now one past the last record,
-      // so the expected watermark is i millis.
-      assertEquals(
-          TimeUnit.MILLISECONDS.toMicros(i), context.getOutputBuilder().getSourceWatermark());
-      assertEquals(
-          1,
-          context
-              .getOutputBuilder()
-              .getSourceStateUpdates()
-              .getFinalizeIdsList()
-              .size());
-
-      assertNotNull(context.getCachedReader());
-
-      Windmill.Counter backlog = getCounter(context, "dataflow_backlog_size-stageName");
-      assertEquals(7L, backlog.getIntScalar());
-      assertTrue(backlog.getCumulative());
-      assertEquals(Windmill.Counter.Kind.SUM, backlog.getKind());
-    }
-  }
-
-  private Windmill.Counter getCounter(StreamingModeExecutionContext context, String name) {
-    for (Windmill.Counter counter : context.getOutputBuilder().getCounterUpdatesList()) {
-      if (counter.getName().equals(name)) {
-        return counter;
-      }
-    }
-    return null;
-  }
-
-  @Test
-  public void testLargeSerializedSizeResplits() throws Exception {
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    // Figure out how many splits of CountingSource are needed to exceed the API limits, using an
-    // extra factor of 2 to ensure that we go over the limits.
-    BoundedSource<Long> justForSizing = CountingSource.upTo(1000000L);
-    long size =
-        DataflowApiUtils.computeSerializedSizeBytes(
-            translateIOToCloudSource(justForSizing, options));
-    long numberToSplitToExceedLimit =
-        2 * CustomSources.DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES / size;
-
-    // Generate a CountingSource and split it into the desired number of splits
-    // (desired size = 8 bytes, 1 long), triggering the re-split with a larger bundle size.
-    com.google.api.services.dataflow.model.Source source =
-        translateIOToCloudSource(CountingSource.upTo(numberToSplitToExceedLimit), options);
-    SourceSplitResponse split = performSplit(source, options, 8L);
-    logged.verifyWarn("too large for the Google Cloud Dataflow API");
-    logged.verifyWarn(String.format("%d bundles", numberToSplitToExceedLimit));
-    assertThat((long) split.getBundles().size(), lessThan(numberToSplitToExceedLimit));
+  // Duplicated from runners.worker.SourceTranslationUtils to break dependency on worker
+  private static Source dictionaryToCloudSource(Map<String, Object> params) throws Exception {
+    Source res = new Source();
+    res.setSpec(getDictionary(params, PropertyNames.SOURCE_SPEC));
+    // SOURCE_METADATA and SOURCE_DOES_NOT_NEED_SPLITTING do not have to be
+    // translated, because they only make sense in cloud Source objects produced by the user.
+    return res;
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSourcesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSourcesTest.java
new file mode 100644
index 0000000000000..26841294c81b4
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSourcesTest.java
@@ -0,0 +1,749 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.api.client.util.Base64.decodeBase64;
+import static com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources.serializeToCloudSource;
+import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtFraction;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.dictionaryToCloudSource;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
+import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.readFromSource;
+import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
+import static com.google.cloud.dataflow.sdk.util.SerializableUtils.deserializeFromByteArray;
+import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
+import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
+import static com.google.cloud.dataflow.sdk.util.Structs.getStrings;
+import static com.google.cloud.dataflow.sdk.util.WindowedValue.valueInGlobalWindow;
+import static com.google.common.base.Throwables.getStackTraceAsString;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.allOf;
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.lessThan;
+import static org.hamcrest.Matchers.lessThanOrEqualTo;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.junit.internal.matchers.ThrowableMessageMatcher.hasMessage;
+
+import com.google.api.services.dataflow.model.DataflowPackage;
+import com.google.api.services.dataflow.model.DerivedSource;
+import com.google.api.services.dataflow.model.Job;
+import com.google.api.services.dataflow.model.SourceOperationResponse;
+import com.google.api.services.dataflow.model.SourceSplitOptions;
+import com.google.api.services.dataflow.model.SourceSplitRequest;
+import com.google.api.services.dataflow.model.SourceSplitResponse;
+import com.google.api.services.dataflow.model.Step;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.io.BoundedSource;
+import com.google.cloud.dataflow.sdk.io.CountingSource;
+import com.google.cloud.dataflow.sdk.io.Read;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
+import com.google.cloud.dataflow.sdk.runners.dataflow.TestCountingSource;
+import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Sample;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.Preconditions;
+import com.google.protobuf.ByteString;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
+
+import javax.annotation.Nullable;
+
+/**
+ * Tests for {@link WorkerCustomSources}.
+ */
+@RunWith(JUnit4.class)
+public class WorkerCustomSourcesTest {
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+  @Rule public ExpectedLogs logged = ExpectedLogs.none(WorkerCustomSources.class);
+
+  static class TestIO {
+    public static Read fromRange(int from, int to) {
+      return new Read(from, to, false);
+    }
+
+    static class Read extends BoundedSource<Integer> {
+      final int from;
+      final int to;
+      final boolean produceTimestamps;
+
+      Read(int from, int to, boolean produceTimestamps) {
+        this.from = from;
+        this.to = to;
+        this.produceTimestamps = produceTimestamps;
+      }
+
+      public Read withTimestampsMillis() {
+        return new Read(from, to, true);
+      }
+
+      @Override
+      public List<Read> splitIntoBundles(long desiredBundleSizeBytes, PipelineOptions options)
+          throws Exception {
+        List<Read> res = new ArrayList<>();
+        DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
+        float step = 1.0f * (to - from) / dataflowOptions.getNumWorkers();
+        for (int i = 0; i < dataflowOptions.getNumWorkers(); ++i) {
+          res.add(new Read(
+              Math.round(from + i * step), Math.round(from + (i + 1) * step),
+              produceTimestamps));
+        }
+        return res;
+      }
+
+      @Override
+      public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
+        return 8 * (to - from);
+      }
+
+      @Override
+      public boolean producesSortedKeys(PipelineOptions options) throws Exception {
+        return true;
+      }
+
+      @Override
+      public BoundedReader<Integer> createReader(PipelineOptions options) throws IOException {
+        return new RangeReader(this);
+      }
+
+      @Override
+      public void validate() {}
+
+      @Override
+      public String toString() {
+        return "[" + from + ", " + to + ")";
+      }
+
+      @Override
+      public Coder<Integer> getDefaultOutputCoder() {
+        return BigEndianIntegerCoder.of();
+      }
+
+      private static class RangeReader extends BoundedReader<Integer> {
+        // To verify that BasicSerializableSourceFormat calls our methods according to protocol.
+        enum State {
+          UNSTARTED,
+          STARTED,
+          FINISHED
+        }
+        private Read source;
+        private int current = -1;
+        private State state = State.UNSTARTED;
+
+        public RangeReader(Read source) {
+          this.source = source;
+        }
+
+        @Override
+        public boolean start() throws IOException {
+          Preconditions.checkState(state == State.UNSTARTED);
+          state = State.STARTED;
+          current = source.from;
+          return (current < source.to);
+        }
+
+        @Override
+        public boolean advance() throws IOException {
+          Preconditions.checkState(state == State.STARTED);
+          if (current == source.to - 1) {
+            state = State.FINISHED;
+            return false;
+          }
+          current++;
+          return true;
+        }
+
+        @Override
+        public Integer getCurrent() {
+          Preconditions.checkState(state == State.STARTED);
+          return current;
+        }
+
+        @Override
+        public Instant getCurrentTimestamp() {
+          return source.produceTimestamps
+              ? new Instant(current /* as millis */) : BoundedWindow.TIMESTAMP_MIN_VALUE;
+        }
+
+        @Override
+        public void close() throws IOException {
+          Preconditions.checkState(state == State.STARTED || state == State.FINISHED);
+          state = State.FINISHED;
+        }
+
+        @Override
+        public Read getCurrentSource() {
+          return source;
+        }
+
+        @Override
+        public Read splitAtFraction(double fraction) {
+          int proposedIndex = (int) (source.from + fraction * (source.to - source.from));
+          if (proposedIndex <= current) {
+            return null;
+          }
+          Read primary = new Read(source.from, proposedIndex, source.produceTimestamps);
+          Read residual = new Read(proposedIndex, source.to, source.produceTimestamps);
+          this.source = primary;
+          return residual;
+        }
+
+        @Override
+        public Double getFractionConsumed() {
+          return (current == -1)
+              ? 0.0
+              : (1.0 * (1 + current - source.from) / (source.to - source.from));
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testSplitAndReadBundlesBack() throws Exception {
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    options.setNumWorkers(5);
+    com.google.api.services.dataflow.model.Source source =
+        translateIOToCloudSource(TestIO.fromRange(10, 20), options);
+    List<WindowedValue<Integer>> elems = CloudSourceUtils.readElemsFromSource(options, source);
+    assertEquals(10, elems.size());
+    for (int i = 0; i < 10; ++i) {
+      assertEquals(valueInGlobalWindow(10 + i), elems.get(i));
+    }
+    SourceSplitResponse response = performSplit(source, options, null /*desiredBundleSizeBytes*/);
+    assertEquals("SOURCE_SPLIT_OUTCOME_SPLITTING_HAPPENED", response.getOutcome());
+    List<DerivedSource> bundles = response.getBundles();
+    assertEquals(5, bundles.size());
+    for (int i = 0; i < 5; ++i) {
+      DerivedSource bundle = bundles.get(i);
+      assertEquals("SOURCE_DERIVATION_MODE_INDEPENDENT", bundle.getDerivationMode());
+      com.google.api.services.dataflow.model.Source bundleSource = bundle.getSource();
+      assertTrue(bundleSource.getDoesNotNeedSplitting());
+      bundleSource.setCodec(source.getCodec());
+      List<WindowedValue<Integer>> xs = CloudSourceUtils.readElemsFromSource(options, bundleSource);
+      assertThat(xs, contains(valueInGlobalWindow(10 + 2 * i), valueInGlobalWindow(11 + 2 * i)));
+    }
+  }
+
+  @Test
+  public void testDirectPipelineWithoutTimestamps() throws Exception {
+    Pipeline p = TestPipeline.create();
+    PCollection<Integer> sum = p
+        .apply(Read.from(TestIO.fromRange(10, 20)))
+        .apply(Sum.integersGlobally())
+        .apply(Sample.<Integer>any(1));
+
+    DataflowAssert.thatSingleton(sum).isEqualTo(145);
+    p.run();
+  }
+
+  @Test
+  public void testDirectPipelineWithTimestamps() throws Exception {
+    Pipeline p = TestPipeline.create();
+    PCollection<Integer> sums =
+        p.apply(Read.from(TestIO.fromRange(10, 20).withTimestampsMillis()))
+         .apply(Window.<Integer>into(FixedWindows.of(Duration.millis(3))))
+         .apply(Sum.integersGlobally().withoutDefaults());
+    // Should group into [10 11] [12 13 14] [15 16 17] [18 19].
+    DataflowAssert.that(sums).containsInAnyOrder(21, 37, 39, 48);
+    p.run();
+  }
+
+  @Test
+  public void testRangeProgressAndSplitAtFraction() throws Exception {
+    // Show basic usage of getFractionConsumed and splitAtFraction.
+    // This test only tests TestIO itself, not BasicSerializableSourceFormat.
+
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    TestIO.Read source = TestIO.fromRange(10, 20);
+    try (BoundedSource.BoundedReader<Integer> reader = source.createReader(options)) {
+      assertEquals(0, reader.getFractionConsumed().intValue());
+      assertTrue(reader.start());
+      assertEquals(0.1, reader.getFractionConsumed(), 1e-6);
+      assertTrue(reader.advance());
+      assertEquals(0.2, reader.getFractionConsumed(), 1e-6);
+      // Already past 0.0 and 0.1.
+      assertNull(reader.splitAtFraction(0.0));
+      assertNull(reader.splitAtFraction(0.1));
+
+      {
+        TestIO.Read residual = (TestIO.Read) reader.splitAtFraction(0.5);
+        assertNotNull(residual);
+        TestIO.Read primary = (TestIO.Read) reader.getCurrentSource();
+        assertThat(readFromSource(primary, options), contains(10, 11, 12, 13, 14));
+        assertThat(readFromSource(residual, options), contains(15, 16, 17, 18, 19));
+      }
+
+      // Range is now [10, 15) and we are at 12.
+      {
+        TestIO.Read residual = (TestIO.Read) reader.splitAtFraction(0.8); // give up 14.
+        assertNotNull(residual);
+        TestIO.Read primary = (TestIO.Read) reader.getCurrentSource();
+        assertThat(readFromSource(primary, options), contains(10, 11, 12, 13));
+        assertThat(readFromSource(residual, options), contains(14));
+      }
+
+      assertTrue(reader.advance());
+      assertEquals(12, reader.getCurrent().intValue());
+      assertTrue(reader.advance());
+      assertEquals(13, reader.getCurrent().intValue());
+      assertFalse(reader.advance());
+    }
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testProgressAndSourceSplitTranslation() throws Exception {
+    // Same as previous test, but now using BasicSerializableSourceFormat wrappers.
+    // We know that the underlying reader behaves correctly (because of the previous test),
+    // now check that we are wrapping it correctly.
+    DataflowPipelineOptions options = PipelineOptionsFactory.create()
+        .as(DataflowPipelineOptions.class);
+    NativeReader<WindowedValue<Integer>> reader =
+        (NativeReader<WindowedValue<Integer>>)
+            ReaderFactory.Registry.defaultRegistry()
+                .create(
+                    translateIOToCloudSource(TestIO.fromRange(10, 20), options),
+                    options,
+                    null, // executionContext
+                    null, // addCounterMutator
+                    null); // operationName
+    try (NativeReader.NativeReaderIterator<WindowedValue<Integer>> iterator = reader.iterator()) {
+      assertTrue(iterator.start());
+      assertEquals(
+          0.1,
+          readerProgressToCloudProgress(iterator.getProgress()).getFractionConsumed().doubleValue(),
+          1e-6);
+      assertEquals(valueInGlobalWindow(10), iterator.getCurrent());
+      assertEquals(
+          0.1,
+          readerProgressToCloudProgress(iterator.getProgress()).getFractionConsumed().doubleValue(),
+          1e-6);
+      assertTrue(iterator.advance());
+      assertEquals(valueInGlobalWindow(11), iterator.getCurrent());
+      assertEquals(
+          0.2,
+          readerProgressToCloudProgress(iterator.getProgress()).getFractionConsumed().doubleValue(),
+          1e-6);
+      assertTrue(iterator.advance());
+      assertEquals(valueInGlobalWindow(12), iterator.getCurrent());
+
+      assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(0)));
+      assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(0.1f)));
+      WorkerCustomSources.BoundedSourceSplit<Integer> sourceSplit =
+          (WorkerCustomSources.BoundedSourceSplit<Integer>)
+              iterator.requestDynamicSplit(splitRequestAtFraction(0.5f));
+      assertNotNull(sourceSplit);
+      assertThat(readFromSource(sourceSplit.primary, options), contains(10, 11, 12, 13, 14));
+      assertThat(readFromSource(sourceSplit.residual, options), contains(15, 16, 17, 18, 19));
+
+      sourceSplit =
+          (WorkerCustomSources.BoundedSourceSplit<Integer>)
+              iterator.requestDynamicSplit(splitRequestAtFraction(0.8f));
+      assertNotNull(sourceSplit);
+      assertThat(readFromSource(sourceSplit.primary, options), contains(10, 11, 12, 13));
+      assertThat(readFromSource(sourceSplit.residual, options), contains(14));
+
+      assertTrue(iterator.advance());
+      assertEquals(valueInGlobalWindow(13), iterator.getCurrent());
+      assertFalse(iterator.advance());
+    }
+  }
+
+  /**
+   * A source that cannot do anything. Intended to be overridden for testing of individual methods.
+   */
+  private static class MockSource extends BoundedSource<Integer> {
+    @Override
+    public List<? extends BoundedSource<Integer>> splitIntoBundles(
+        long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
+      return Arrays.asList(this);
+    }
+
+    @Override
+    public void validate() { }
+
+    @Override
+    public boolean producesSortedKeys(PipelineOptions options) {
+      return false;
+    }
+
+    @Override
+    public long getEstimatedSizeBytes(PipelineOptions options) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public BoundedReader<Integer> createReader(PipelineOptions options) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public String toString() {
+      return "<unknown>";
+    }
+
+    @Override
+    public Coder<Integer> getDefaultOutputCoder() {
+      return BigEndianIntegerCoder.of();
+    }
+  }
+
+  private static class SourceProducingInvalidSplits extends MockSource {
+    private String description;
+    private String errorMessage;
+
+    private SourceProducingInvalidSplits(String description, String errorMessage) {
+      this.description = description;
+      this.errorMessage = errorMessage;
+    }
+
+    @Override
+    public List<? extends BoundedSource<Integer>> splitIntoBundles(
+        long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
+      Preconditions.checkState(errorMessage == null, "Unexpected invalid source");
+      return Arrays.asList(
+          new SourceProducingInvalidSplits("goodBundle", null),
+          new SourceProducingInvalidSplits("badBundle", "intentionally invalid"));
+    }
+
+    @Override
+    public void validate() {
+      Preconditions.checkState(errorMessage == null, errorMessage);
+    }
+
+    @Override
+    public String toString() {
+      return description;
+    }
+  }
+
+  @Test
+  public void testSplittingProducedInvalidSource() throws Exception {
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    com.google.api.services.dataflow.model.Source cloudSource =
+        translateIOToCloudSource(new SourceProducingInvalidSplits("original", null), options);
+
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage(
+        allOf(
+            containsString("Splitting a valid source produced an invalid source"),
+            containsString("original"),
+            containsString("badBundle")));
+    expectedException.expectCause(hasMessage(containsString("intentionally invalid")));
+    performSplit(cloudSource, options, null /*desiredBundleSizeBytes*/);
+  }
+
+  private static class FailingReader extends BoundedSource.BoundedReader<Integer> {
+    private BoundedSource<Integer> source;
+
+    private FailingReader(BoundedSource<Integer> source) {
+      this.source = source;
+    }
+
+    @Override
+    public BoundedSource<Integer> getCurrentSource() {
+      return source;
+    }
+
+    @Override
+    public boolean start() throws IOException {
+      throw new IOException("Intentional error");
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      throw new IllegalStateException("Should have failed in start()");
+    }
+
+    @Override
+    public Integer getCurrent() throws NoSuchElementException {
+      throw new IllegalStateException("Should have failed in start()");
+    }
+
+    @Override
+    public Instant getCurrentTimestamp() throws NoSuchElementException {
+      throw new IllegalStateException("Should have failed in start()");
+    }
+
+    @Override
+    public void close() throws IOException {}
+
+    @Override
+    public Double getFractionConsumed() {
+      return null;
+    }
+
+    @Override
+    public BoundedSource<Integer> splitAtFraction(double fraction) {
+      return null;
+    }
+  }
+
+  private static class SourceProducingFailingReader extends MockSource {
+    @Override
+    public BoundedReader<Integer> createReader(PipelineOptions options) throws IOException {
+      return new FailingReader(this);
+    }
+
+    @Override
+    public String toString() {
+      return "Some description";
+    }
+  }
+
+  @Test
+  public void testFailureToStartReadingIncludesSourceDetails() throws Exception {
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    com.google.api.services.dataflow.model.Source source =
+        translateIOToCloudSource(new SourceProducingFailingReader(), options);
+    // Unfortunately Hamcrest doesn't have a matcher that can match on the exception's
+    // printStackTrace(), however we just want to verify that the error and source description
+    // would be contained in the exception *somewhere*, not necessarily in the top-level
+    // Exception object. So instead we use Throwables.getStackTraceAsString and match on that.
+    try {
+      CloudSourceUtils.readElemsFromSource(options, source);
+      fail("Expected to fail");
+    } catch (Exception e) {
+      assertThat(
+          getStackTraceAsString(e),
+          allOf(containsString("Intentional error"), containsString("Some description")));
+    }
+  }
+
+  private static com.google.api.services.dataflow.model.Source translateIOToCloudSource(
+      BoundedSource<?> io, DataflowPipelineOptions options) throws Exception {
+    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
+    Pipeline p = Pipeline.create(options);
+    p.begin().apply(Read.from(io));
+
+    Job workflow = translator.translate(p, new ArrayList<DataflowPackage>()).getJob();
+    Step step = workflow.getSteps().get(0);
+
+    return stepToCloudSource(step);
+  }
+
+  private static com.google.api.services.dataflow.model.Source stepToCloudSource(Step step)
+      throws Exception {
+    com.google.api.services.dataflow.model.Source res = dictionaryToCloudSource(
+        getDictionary(step.getProperties(), PropertyNames.SOURCE_STEP_INPUT));
+    // Encoding is specified in the step, not in the source itself.  This is
+    // normal: incoming Dataflow API Source objects in map tasks will have the
+    // encoding filled in from the step's output encoding.
+    @SuppressWarnings("unchecked")
+    List<Map<String, Object>> outputInfo =
+        (List<Map<String, Object>>) step.getProperties().get(PropertyNames.OUTPUT_INFO);
+
+    CloudObject encoding = CloudObject.fromSpec(getObject(outputInfo.get(0),
+        PropertyNames.ENCODING));
+    res.setCodec(encoding);
+    return res;
+  }
+
+  private static SourceSplitResponse performSplit(
+      com.google.api.services.dataflow.model.Source source,
+      PipelineOptions options,
+      @Nullable Long desiredBundleSizeBytes)
+          throws Exception {
+    SourceSplitRequest splitRequest = new SourceSplitRequest();
+    splitRequest.setSource(source);
+    if (desiredBundleSizeBytes != null) {
+      splitRequest.setOptions(
+          new SourceSplitOptions().setDesiredBundleSizeBytes(desiredBundleSizeBytes));
+    }
+    SourceOperationResponse response = WorkerCustomSources.performSplit(splitRequest, options);
+    return response.getSplit();
+  }
+
+  @Test
+  public void testUnboundedSplits() throws Exception {
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    com.google.api.services.dataflow.model.Source source =
+        serializeToCloudSource(new TestCountingSource(Integer.MAX_VALUE), options);
+    List<String> serializedSplits =
+        getStrings(source.getSpec(), WorkerCustomSources.SERIALIZED_SOURCE_SPLITS, null);
+    assertEquals(20, serializedSplits.size());
+    for (String serializedSplit : serializedSplits) {
+      assertTrue(
+          deserializeFromByteArray(decodeBase64(serializedSplit), "source")
+              instanceof TestCountingSource);
+    }
+  }
+
+  @Test
+  public void testReadUnboundedReader() throws Exception {
+    StreamingModeExecutionContext context = new StreamingModeExecutionContext("stageName",
+        new ConcurrentHashMap<ByteString, ReaderCacheEntry>(), /*stateNameMap=*/null,
+        /*stateCache=*/null);
+
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    options.setNumWorkers(5);
+
+    ByteString state = ByteString.EMPTY;
+    for (int i = 0; i < 10 * WorkerCustomSources.MAX_UNBOUNDED_BUNDLE_SIZE;
+         /* Incremented in inner loop */) {
+      // Initialize streaming context with state from previous iteration.
+      context.start(
+          Windmill.WorkItem.newBuilder()
+              .setKey(ByteString.copyFromUtf8("0000000000000001")) // key is zero-padded index.
+              .setWorkToken(0) // Required proto field, unused.
+              .setSourceState(
+                  Windmill.SourceState.newBuilder().setState(state).build()) // Source state.
+              .build(),
+          new Instant(0), // input watermark
+          null, // output watermark
+          null, // synchronized processing time
+          null, // StateReader
+          null, // StateFetcher
+          Windmill.WorkItemCommitRequest.newBuilder());
+
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      NativeReader<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>> reader =
+          (NativeReader)
+              WorkerCustomSources.create(
+                  (CloudObject)
+                      serializeToCloudSource(
+                              new TestCountingSource(Integer.MAX_VALUE), options)
+                          .getSpec(),
+                  options,
+                  context);
+
+      // Verify data.
+      Instant beforeReading = Instant.now();
+      int numReadOnThisIteration = 0;
+      for (WindowedValue<ValueWithRecordId<KV<Integer, Integer>>> value :
+          ReaderUtils.readAllFromReader(reader)) {
+        assertEquals(KV.of(0, i), value.getValue().getValue());
+        assertArrayEquals(
+            encodeToByteArray(KvCoder.of(VarIntCoder.of(), VarIntCoder.of()), KV.of(0, i)),
+            value.getValue().getId());
+        assertThat(value.getWindows(), contains((BoundedWindow) GlobalWindow.INSTANCE));
+        assertEquals(i, value.getTimestamp().getMillis());
+        i++;
+        numReadOnThisIteration++;
+      }
+      Instant afterReading = Instant.now();
+      assertThat(
+          new Duration(beforeReading, afterReading).getStandardSeconds(),
+          lessThanOrEqualTo(WorkerCustomSources.MAX_UNBOUNDED_BUNDLE_READ_TIME.getStandardSeconds()
+              + 1));
+      assertThat(
+          numReadOnThisIteration, lessThanOrEqualTo(WorkerCustomSources.MAX_UNBOUNDED_BUNDLE_SIZE));
+
+      // Extract and verify state modifications.
+      context.flushState();
+      state = context.getOutputBuilder().getSourceStateUpdates().getState();
+      // CountingSource's watermark is the last record + 1.  i is now one past the last record,
+      // so the expected watermark is i millis.
+      assertEquals(
+          TimeUnit.MILLISECONDS.toMicros(i), context.getOutputBuilder().getSourceWatermark());
+      assertEquals(
+          1,
+          context
+              .getOutputBuilder()
+              .getSourceStateUpdates()
+              .getFinalizeIdsList()
+              .size());
+
+      assertNotNull(context.getCachedReader());
+
+      Windmill.Counter backlog = getCounter(context, "dataflow_backlog_size-stageName");
+      assertEquals(7L, backlog.getIntScalar());
+      assertTrue(backlog.getCumulative());
+      assertEquals(Windmill.Counter.Kind.SUM, backlog.getKind());
+    }
+  }
+
+  private Windmill.Counter getCounter(StreamingModeExecutionContext context, String name) {
+    for (Windmill.Counter counter : context.getOutputBuilder().getCounterUpdatesList()) {
+      if (counter.getName().equals(name)) {
+        return counter;
+      }
+    }
+    return null;
+  }
+
+  @Test
+  public void testLargeSerializedSizeResplits() throws Exception {
+    DataflowPipelineOptions options =
+        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    // Figure out how many splits of CountingSource are needed to exceed the API limits, using an
+    // extra factor of 2 to ensure that we go over the limits.
+    BoundedSource<Long> justForSizing = CountingSource.upTo(1000000L);
+    long size =
+        DataflowApiUtils.computeSerializedSizeBytes(
+            translateIOToCloudSource(justForSizing, options));
+    long numberToSplitToExceedLimit =
+        2 * WorkerCustomSources.DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES / size;
+
+    // Generate a CountingSource and split it into the desired number of splits
+    // (desired size = 8 bytes, 1 long), triggering the re-split with a larger bundle size.
+    com.google.api.services.dataflow.model.Source source =
+        translateIOToCloudSource(CountingSource.upTo(numberToSplitToExceedLimit), options);
+    SourceSplitResponse split = performSplit(source, options, 8L);
+    logged.verifyWarn("too large for the Google Cloud Dataflow API");
+    logged.verifyWarn(String.format("%d bundles", numberToSplitToExceedLimit));
+    assertThat((long) split.getBundles().size(), lessThan(numberToSplitToExceedLimit));
+  }
+}

From cb11e9da5b563a170233f58c44322241b59d99f5 Mon Sep 17 00:00:00 2001
From: kak <kak@google.com>
Date: Thu, 4 Feb 2016 10:37:25 -0800
Subject: [PATCH 1385/1541] Most methods in com.google.common will soon have
 @CheckReturnValue applied to them. Adding the annotation makes it a compile
 time error to ignore the return value from the method. This CL fixes the
 soon-to-be compile error by either deleting the no op statement or otherwise
 handling the return value.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113865372
---
 .../sdk/runners/worker/WindmillStateReaderTest.java      | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java
index 9a411f935471c..bef0877f44975 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java
@@ -25,7 +25,6 @@
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
 import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.KeyedGetDataRequest;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.common.collect.Iterables;
 import com.google.protobuf.ByteString;
 import com.google.protobuf.ByteString.Output;
 
@@ -121,7 +120,9 @@ public void testReadList() throws Exception {
 
     Iterable<Integer> results = future.get();
     Mockito.verify(mockWindmill).getStateData(expectedRequest.build());
-    Iterables.size(results);
+    for (Integer unused : results) {
+      // Iterate over the results to force loading all the pages.
+    }
     Mockito.verifyNoMoreInteractions(mockWindmill);
 
     assertThat(results, Matchers.contains(5, 6));
@@ -190,7 +191,9 @@ public void testReadListWithContinuations() throws Exception {
 
     Iterable<Integer> results = future.get();
     Mockito.verify(mockWindmill).getStateData(expectedRequest1.build());
-    Iterables.size(results);
+    for (Integer unused : results) {
+      // Iterate over the results to force loading all the pages.
+    }
     Mockito.verify(mockWindmill).getStateData(expectedRequest2.build());
     Mockito.verifyNoMoreInteractions(mockWindmill);
 

From 7d7a49a2154bd09b134979d60d26f26b3d97fa23 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 4 Feb 2016 10:44:10 -0800
Subject: [PATCH 1386/1541] Move ReaderFactory.Registry to ReaderRegistry

This breaks the circular dependency between ReaderFactory and
all of its known implementations.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113866133
---
 .../sdk/runners/worker/ConcatReader.java      |   8 +-
 .../runners/worker/ConcatReaderFactory.java   |  12 +-
 .../worker/MapTaskExecutorFactory.java        |  18 +-
 .../sdk/runners/worker/ReaderFactory.java     | 135 ---------------
 .../sdk/runners/worker/ReaderRegistry.java    | 161 ++++++++++++++++++
 .../sdk/runners/worker/SideInputUtils.java    |   2 +-
 .../dataflow/sdk/util/CloudSourceUtils.java   |   4 +-
 .../runners/worker/AvroReaderFactoryTest.java |   2 +-
 .../worker/BigQueryReaderFactoryTest.java     |   4 +-
 .../worker/ConcatReaderFactoryTest.java       |   4 +-
 .../sdk/runners/worker/ConcatReaderTest.java  |   4 +-
 .../worker/InMemoryReaderFactoryTest.java     |   2 +-
 .../worker/MapTaskExecutorFactoryTest.java    |   4 +-
 .../sdk/runners/worker/ReaderFactoryTest.java |   6 +-
 .../worker/ShuffleReaderFactoryTest.java      |   2 +-
 .../runners/worker/TextReaderFactoryTest.java |   2 +-
 .../worker/WorkerCustomSourcesTest.java       |   2 +-
 17 files changed, 199 insertions(+), 173 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderRegistry.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
index 3ef0a9a9a2c1d..f3057c5ce059d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
@@ -72,13 +72,13 @@ public class ConcatReader<T> extends NativeReader<T> {
   private final ExecutionContext executionContext;
   private final CounterSet.AddCounterMutator addCounterMutator;
   private final String operationName;
-  private final ReaderFactory.Registry registry;
+  private final ReaderRegistry registry;
 
   /**
    * Create a {@link ConcatReader} using a given list of encoded {@link Source}s.
    */
   public ConcatReader(
-      ReaderFactory.Registry registry,
+      ReaderRegistry registry,
       PipelineOptions options,
       ExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator,
@@ -118,10 +118,10 @@ static class ConcatIterator<T> extends NativeReaderIterator<T> {
     private final CounterSet.AddCounterMutator addCounterMutator;
     private final String operationName;
     private final OffsetRangeTracker rangeTracker;
-    private final ReaderFactory.Registry registry;
+    private final ReaderRegistry registry;
 
     public ConcatIterator(
-        ReaderFactory.Registry registry,
+        ReaderRegistry registry,
         PipelineOptions options,
         ExecutionContext executionContext,
         CounterSet.AddCounterMutator addCounterMutator,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java
index 6fb8ef647319b..05ae2b9b1715f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java
@@ -42,25 +42,25 @@
  */
 public class ConcatReaderFactory implements ReaderFactory {
 
-  private final ReaderFactory.Registry registry;
+  private final ReaderRegistry registry;
 
-  private ConcatReaderFactory(ReaderFactory.Registry registry) {
+  private ConcatReaderFactory(ReaderRegistry registry) {
     this.registry = registry;
   }
 
   /**
    * Returns a new {@link ConcatReaderFactory} that will use the default
-   * {@link ReaderFactory.Registry} to create sub-{@link NativeReader} instances.
+   * {@link ReaderRegistry} to create sub-{@link NativeReader} instances.
    */
   public static ConcatReaderFactory withDefaultRegistry() {
-    return withRegistry(ReaderFactory.Registry.defaultRegistry());
+    return withRegistry(ReaderRegistry.defaultRegistry());
   }
 
   /**
    * Returns a new {@link ConcatReaderFactory} that will use the provided
-   * {@link ReaderFactory.Registry} to create sub-{@link NativeReader} instances.
+   * {@link ReaderRegistry} to create sub-{@link NativeReader} instances.
    */
-  public static ConcatReaderFactory withRegistry(ReaderFactory.Registry registry) {
+  public static ConcatReaderFactory withRegistry(ReaderRegistry registry) {
     return new ConcatReaderFactory(registry);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index b38d9c852dddf..420be59de7699 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -82,7 +82,7 @@ public class MapTaskExecutorFactory {
 
   /**
    * Creates a new MapTaskExecutor from the given MapTask definition using the default
-   * {@link ReaderFactory.Registry}.
+   * {@link ReaderRegistry}.
    */
   public static MapTaskExecutor create(
       PipelineOptions options,
@@ -93,7 +93,7 @@ public static MapTaskExecutor create(
     return create(
         options,
         mapTask,
-        ReaderFactory.Registry.defaultRegistry(),
+        ReaderRegistry.defaultRegistry(),
         context,
         counters,
         stateSampler);
@@ -101,12 +101,12 @@ public static MapTaskExecutor create(
 
   /**
    * Creates a new MapTaskExecutor from the given MapTask definition using the provided
-   * {@link ReaderFactory.Registry}.
+   * {@link ReaderRegistry}.
    */
   public static MapTaskExecutor create(
       PipelineOptions options,
       MapTask mapTask,
-      ReaderFactory.Registry registry,
+      ReaderRegistry registry,
       DataflowExecutionContext<?> context,
       CounterSet counters,
       StateSampler stateSampler)
@@ -135,7 +135,7 @@ public static MapTaskExecutor create(
 
   /**
    * Creates an Operation from the given ParallelInstruction definition using the provided
-   * {@link ReaderFactory.Registry}.
+   * {@link ReaderRegistry}.
    */
   static Operation createOperation(
       PipelineOptions options,
@@ -150,7 +150,7 @@ static Operation createOperation(
     return createOperation(
         options,
         instruction,
-        ReaderFactory.Registry.defaultRegistry(),
+        ReaderRegistry.defaultRegistry(),
         executionContext,
         priorOperations,
         counterPrefix,
@@ -161,12 +161,12 @@ static Operation createOperation(
 
   /**
    * Creates an Operation from the given ParallelInstruction definition using the provided
-   * {@link ReaderFactory.Registry}.
+   * {@link ReaderRegistry}.
    */
   static Operation createOperation(
       PipelineOptions options,
       ParallelInstruction instruction,
-      ReaderFactory.Registry registry,
+      ReaderRegistry registry,
       DataflowExecutionContext<?> executionContext,
       List<Operation> priorOperations,
       String counterPrefix,
@@ -205,7 +205,7 @@ static Operation createOperation(
   static ReadOperation createReadOperation(
       PipelineOptions options,
       ParallelInstruction instruction,
-      ReaderFactory.Registry registry,
+      ReaderRegistry registry,
       DataflowExecutionContext<?> executionContext,
       @SuppressWarnings("unused") List<Operation> priorOperations,
       String counterPrefix,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
index e5a810d7550b6..f65858a2b5fcb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
@@ -16,18 +16,12 @@
 
 package com.google.cloud.dataflow.sdk.runners.worker;
 
-import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.Serializer;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.common.collect.Maps;
-
-import java.util.Map;
 
 import javax.annotation.Nullable;
 
@@ -51,133 +45,4 @@ NativeReader<?> create(
       @Nullable CounterSet.AddCounterMutator addCounterMutator,
       @Nullable String operationName)
           throws Exception;
-
-  /**
-   * An immutable registry from {@link String} identifiers (provided to the worker by the Dataflow
-   * service) to appropriate {@link ReaderFactory} instances.
-   */
-  public class Registry implements ReaderFactory {
-
-    /**
-     * A {@link Registry} with each {@link ReaderFactory} known to the Dataflow worker already
-     * registered.
-     */
-    public static Registry defaultRegistry() {
-      Map<String, ReaderFactory> factories = Maps.newHashMap();
-
-      factories.put("TextSource", TextReaderFactory.getInstance());
-      factories.put("AvroSource", new AvroReaderFactory());
-      factories.put("BigQueryAvroSource", new BigQueryAvroReaderFactory());
-      factories.put("UngroupedShuffleSource", new UngroupedShuffleReaderFactory());
-      factories.put("PartitioningShuffleSource", new PartitioningShuffleReaderFactory());
-      factories.put("GroupingShuffleSource", new GroupingShuffleReaderFactory());
-      factories.put("InMemorySource", new InMemoryReaderFactory());
-      factories.put("BigQuerySource", new BigQueryReaderFactory());
-
-      // Aliases for WindowingWindmillreader
-      factories.put("WindowingWindmillReader", new WindowingWindmillReader.Factory());
-      factories.put("com.google.cloud.dataflow.sdk.runners.worker.WindowingWindmillReader",
-          new WindowingWindmillReader.Factory());
-      factories.put("com.google.cloud.dataflow.sdk.runners.worker.BucketingWindmillSource",
-          new WindowingWindmillReader.Factory());
-
-      // Aliases for UngroupedWindmillReader
-      factories.put("UngroupedWindmillReader", new UngroupedWindmillReader.Factory());
-      factories.put("com.google.cloud.dataflow.sdk.runners.worker.UngroupedWindmillSource",
-          new UngroupedWindmillReader.Factory());
-      factories.put("com.google.cloud.dataflow.sdk.runners.worker.UngroupedWindmillReader",
-          new UngroupedWindmillReader.Factory());
-
-      // Aliases for PubsubReader
-      factories.put("PubsubReader", new PubsubReader.Factory());
-      factories.put("com.google.cloud.dataflow.sdk.runners.worker.PubsubSource",
-          new PubsubReader.Factory());
-
-      // Custom sources
-      factories.put(
-          "com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources",
-          new WorkerCustomSources.Factory());
-      factories.put(
-          "com.google.cloud.dataflow.sdk.runners.dataflow.WorkerCustomSources",
-          new WorkerCustomSources.Factory());
-
-      return new Registry(factories);
-    }
-
-    /**
-     * Builds a new {@link Registry} with the provided mutable map of initial mappings.
-     *
-     * <p>Owns and mutates the provided map, which must be mutable. This constructor should only be
-     * called by methods in this class that are aware of this requirement and abstract from this
-     * behavior.
-     */
-    private Registry(Map<String, ReaderFactory> factories) {
-      // ConcatReader requires special treatment: Recursive access to the registry since it calls
-      // back to create its sub-readers lazily.
-      this.factories = factories;
-      this.factories.put(ConcatReader.SOURCE_NAME, ConcatReaderFactory.withRegistry(this));
-    }
-
-    /**
-     * A map from the short names of predefined sources to the associated {@link ReaderFactory}.
-     */
-    private final Map<String, ReaderFactory> factories;
-
-    /**
-     * Returns a new {@link Registry} with the provided identifier associated with the provided
-     * {@link ReaderFactory}, overriding any existing binding for that identifier.
-     */
-    public Registry register(String readerSpecType, ReaderFactory factory) {
-      Map<String, ReaderFactory> newFactories = Maps.newHashMap();
-      newFactories.putAll(factories);
-      newFactories.put(readerSpecType, factory);
-      return new Registry(newFactories);
-    }
-
-    /**
-     * Creates a {@link NativeReader} according to the provided {@code sourceSpec},
-     * by dispatching on the type of {@link CloudObject} to instantiate.
-     */
-    @Override
-    public NativeReader<?> create(
-        CloudObject sourceSpec,
-        @Nullable Coder<?> coder,
-        @Nullable PipelineOptions options,
-        @Nullable ExecutionContext executionContext,
-        @Nullable CounterSet.AddCounterMutator addCounterMutator,
-        @Nullable String operationName)
-            throws Exception {
-
-      String objClassName = sourceSpec.getClassName();
-      ReaderFactory readerFactory = factories.get(objClassName);
-      if (readerFactory == null) {
-        throw new IllegalArgumentException(String.format(
-            "Unable to create a Reader: Unknown Reader type in Source specification: %s",
-            objClassName));
-      }
-      return readerFactory.create(
-          sourceSpec, coder, options, executionContext, addCounterMutator, operationName);
-    }
-
-    /**
-     * Creates a {@link NativeReader} from a Dataflow API {@link Source} specification, using the
-     * {@link Coder} contained in the {@link Source} specification.
-     */
-    public NativeReader<?> create(
-        Source cloudSource,
-        @Nullable PipelineOptions options,
-        @Nullable ExecutionContext executionContext,
-        @Nullable CounterSet.AddCounterMutator addCounterMutator,
-        @Nullable String operationName)
-            throws Exception {
-
-      cloudSource = CloudSourceUtils.flattenBaseSpecs(cloudSource);
-      CloudObject sourceSpec = CloudObject.fromSpec(cloudSource.getSpec());
-      Coder<?> coder = null;
-      if (cloudSource.getCodec() != null) {
-        coder = Serializer.deserialize(cloudSource.getCodec(), Coder.class);
-      }
-      return create(sourceSpec, coder, options, executionContext, addCounterMutator, operationName);
-    }
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderRegistry.java
new file mode 100644
index 0000000000000..a74db55fb508b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderRegistry.java
@@ -0,0 +1,161 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.api.services.dataflow.model.Source;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.Serializer;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
+import com.google.common.collect.Maps;
+
+import java.util.Map;
+
+import javax.annotation.Nullable;
+
+/**
+ * An immutable registry from {@link String} identifiers (provided to the worker by the Dataflow
+ * service) to appropriate {@link ReaderFactory} instances.
+ */
+public class ReaderRegistry implements ReaderFactory {
+
+  /**
+   * A {@link ReaderRegistry} with each {@link ReaderFactory} known to the Dataflow worker already
+   * registered.
+   */
+  public static ReaderRegistry defaultRegistry() {
+    Map<String, ReaderFactory> factories = Maps.newHashMap();
+
+    factories.put("TextSource", TextReaderFactory.getInstance());
+    factories.put("AvroSource", new AvroReaderFactory());
+    factories.put("BigQueryAvroSource", new BigQueryAvroReaderFactory());
+    factories.put("UngroupedShuffleSource", new UngroupedShuffleReaderFactory());
+    factories.put("PartitioningShuffleSource", new PartitioningShuffleReaderFactory());
+    factories.put("GroupingShuffleSource", new GroupingShuffleReaderFactory());
+    factories.put("InMemorySource", new InMemoryReaderFactory());
+    factories.put("BigQuerySource", new BigQueryReaderFactory());
+
+    // Aliases for WindowingWindmillreader
+    factories.put("WindowingWindmillReader", new WindowingWindmillReader.Factory());
+    factories.put("com.google.cloud.dataflow.sdk.runners.worker.WindowingWindmillReader",
+        new WindowingWindmillReader.Factory());
+    factories.put("com.google.cloud.dataflow.sdk.runners.worker.BucketingWindmillSource",
+        new WindowingWindmillReader.Factory());
+
+    // Aliases for UngroupedWindmillReader
+    factories.put("UngroupedWindmillReader", new UngroupedWindmillReader.Factory());
+    factories.put("com.google.cloud.dataflow.sdk.runners.worker.UngroupedWindmillSource",
+        new UngroupedWindmillReader.Factory());
+    factories.put("com.google.cloud.dataflow.sdk.runners.worker.UngroupedWindmillReader",
+        new UngroupedWindmillReader.Factory());
+
+    // Aliases for PubsubReader
+    factories.put("PubsubReader", new PubsubReader.Factory());
+    factories.put("com.google.cloud.dataflow.sdk.runners.worker.PubsubSource",
+        new PubsubReader.Factory());
+
+    // Custom sources
+    factories.put(
+        "com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources",
+        new WorkerCustomSources.Factory());
+    factories.put(
+        "com.google.cloud.dataflow.sdk.runners.dataflow.WorkerCustomSources",
+        new WorkerCustomSources.Factory());
+
+    return new ReaderRegistry(factories);
+  }
+
+  /**
+   * Builds a new {@link ReaderRegistry} with the provided mutable map of initial mappings.
+   *
+   * <p>Owns and mutates the provided map, which must be mutable. This constructor should only be
+   * called by methods in this class that are aware of this requirement and abstract from this
+   * behavior.
+   */
+  private ReaderRegistry(Map<String, ReaderFactory> factories) {
+    // ConcatReader requires special treatment: Recursive access to the registry since it calls
+    // back to create its sub-readers lazily.
+    this.factories = factories;
+    this.factories.put(ConcatReader.SOURCE_NAME, ConcatReaderFactory.withRegistry(this));
+  }
+
+  /**
+   * A map from the short names of predefined sources to the associated {@link ReaderFactory}.
+   */
+  private final Map<String, ReaderFactory> factories;
+
+  /**
+   * Returns a new {@link ReaderRegistry} with the provided identifier associated with the
+   * provided {@link ReaderFactory}, overriding any existing binding for that identifier.
+   */
+  public ReaderRegistry register(String readerSpecType, ReaderFactory factory) {
+    Map<String, ReaderFactory> newFactories = Maps.newHashMap();
+    newFactories.putAll(factories);
+    newFactories.put(readerSpecType, factory);
+    return new ReaderRegistry(newFactories);
+  }
+
+  /**
+   * Creates a {@link NativeReader} according to the provided {@code sourceSpec},
+   * by dispatching on the type of {@link CloudObject} to instantiate.
+   */
+  @Override
+  public NativeReader<?> create(
+      CloudObject sourceSpec,
+      @Nullable Coder<?> coder,
+      @Nullable PipelineOptions options,
+      @Nullable ExecutionContext executionContext,
+      @Nullable CounterSet.AddCounterMutator addCounterMutator,
+      @Nullable String operationName)
+          throws Exception {
+
+    String objClassName = sourceSpec.getClassName();
+    ReaderFactory readerFactory = factories.get(objClassName);
+    if (readerFactory == null) {
+      throw new IllegalArgumentException(String.format(
+          "Unable to create a Reader: Unknown Reader type in Source specification: %s",
+          objClassName));
+    }
+    return readerFactory.create(
+        sourceSpec, coder, options, executionContext, addCounterMutator, operationName);
+  }
+
+  /**
+   * Creates a {@link NativeReader} from a Dataflow API {@link Source} specification, using the
+   * {@link Coder} contained in the {@link Source} specification.
+   */
+  public NativeReader<?> create(
+      Source cloudSource,
+      @Nullable PipelineOptions options,
+      @Nullable ExecutionContext executionContext,
+      @Nullable CounterSet.AddCounterMutator addCounterMutator,
+      @Nullable String operationName)
+          throws Exception {
+
+    cloudSource = CloudSourceUtils.flattenBaseSpecs(cloudSource);
+    CloudObject sourceSpec = CloudObject.fromSpec(cloudSource.getSpec());
+    Coder<?> coder = null;
+    if (cloudSource.getCodec() != null) {
+      coder = Serializer.deserialize(cloudSource.getCodec(), Coder.class);
+    }
+    return create(sourceSpec, coder, options, executionContext, addCounterMutator, operationName);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
index ce01a9bfdbe48..4603eccc82233 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
@@ -101,7 +101,7 @@ private static Iterable<Object> readSideInputSource(
     @SuppressWarnings("unchecked")
     NativeReader<Object> reader =
         (NativeReader<Object>)
-            ReaderFactory.Registry.defaultRegistry()
+            ReaderRegistry.defaultRegistry()
                 .create(sideInputSource, options, executionContext, null, null);
     if (observer != null) {
       reader.addObserver(observer);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
index 4d480106470b8..616fbe1b39512 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
@@ -18,7 +18,7 @@
 
 import com.google.api.services.dataflow.model.Source;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory;
+import com.google.cloud.dataflow.sdk.runners.worker.ReaderRegistry;
 import com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
@@ -63,7 +63,7 @@ public static <T> List<T> readElemsFromSource(PipelineOptions options, Source so
       @SuppressWarnings("unchecked")
       NativeReader<T> reader =
           (NativeReader<T>)
-              ReaderFactory.Registry.defaultRegistry().create(source, options, null, null, null);
+              ReaderRegistry.defaultRegistry().create(source, options, null, null, null);
       return ReaderUtils.readAllFromReader(reader);
     } catch (Exception e) {
       throw new RuntimeException("Failed to read from source: " + source.toString(), e);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
index 86d4085561bde..2a7b7e18bb702 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
@@ -63,7 +63,7 @@ NativeReader<?> runTestCreateAvroReader(
     cloudSource.setCodec(encoding);
 
     NativeReader<?> reader =
-        ReaderFactory.Registry.defaultRegistry()
+        ReaderRegistry.defaultRegistry()
             .create(
                 cloudSource,
                 PipelineOptionsFactory.create(),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
index 7d526586444b6..d2d69085a3b82 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
@@ -55,7 +55,7 @@ void runTestCreateBigQueryReaderFromTable(
     options.setGcpCredential(new TestCredential());
 
     NativeReader<?> reader =
-        ReaderFactory.Registry.defaultRegistry()
+        ReaderRegistry.defaultRegistry()
             .create(cloudSource, options, DirectModeExecutionContext.create(), null, null);
     assertThat(reader, new IsInstanceOf(BigQueryReader.class));
     BigQueryReader bigQueryReader = (BigQueryReader) reader;
@@ -77,7 +77,7 @@ void runTestCreateBigQueryReaderFromQuery(String query, CloudObject encoding) th
     options.setGcpCredential(new TestCredential());
 
     NativeReader<?> reader =
-        ReaderFactory.Registry.defaultRegistry()
+        ReaderRegistry.defaultRegistry()
             .create(cloudSource, options, DirectModeExecutionContext.create(), null, null);
 
     assertThat(reader, new IsInstanceOf(BigQueryReader.class));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
index 59cfcfe0d8865..dce6e9f15f1dd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
@@ -92,7 +92,7 @@ public void testCreateConcatReaderWithOneSubSource() throws Exception {
     @SuppressWarnings("unchecked")
     NativeReader<String> reader =
         (NativeReader<String>)
-            ReaderFactory.Registry.defaultRegistry().create(source, null, null, null, null);
+            ReaderRegistry.defaultRegistry().create(source, null, null, null, null);
     assertNotNull(reader);
 
     List<String> expected = new ArrayList<>();
@@ -111,7 +111,7 @@ public void testCreateConcatReaderWithManySubSources() throws Exception {
     @SuppressWarnings("unchecked")
     NativeReader<String> reader =
         (NativeReader<String>)
-            ReaderFactory.Registry.defaultRegistry().create(source, null, null, null, null);
+            ReaderRegistry.defaultRegistry().create(source, null, null, null, null);
     assertNotNull(reader);
 
     List<String> expected = new ArrayList<>();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
index 5eebb84823062..c38611ba4bacd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
@@ -66,12 +66,12 @@ public class ConcatReaderTest {
 
   private List<TestReader<?>> recordedReaders = new ArrayList<>();
 
-  private ReaderFactory.Registry registry;
+  private ReaderRegistry registry;
 
   @Before
   public void setUp() {
     recordedReaders.clear();
-    registry = ReaderFactory.Registry.defaultRegistry()
+    registry = ReaderRegistry.defaultRegistry()
         .register(TestReader.class.getName(), new TestReaderFactory());
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
index eb7d54cd7f85f..67647a53dc25b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
@@ -70,7 +70,7 @@ <T> void runTestCreateInMemoryReader(List<T> elements, Long start, Long end, int
     Source cloudSource = createInMemoryCloudSource(elements, start, end, coder);
 
     NativeReader<?> reader =
-        ReaderFactory.Registry.defaultRegistry()
+        ReaderRegistry.defaultRegistry()
             .create(
                 cloudSource,
                 PipelineOptionsFactory.create(),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
index 4dad6be28b2d2..777b3545c2377 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
@@ -99,12 +99,12 @@ public class MapTaskExecutorFactoryTest {
       WindowedValue.getValueOnlyCoder(StringUtf8Coder.of()).asCloudObject();
 
   private PipelineOptions options;
-  private ReaderFactory.Registry readerFactoryRegistry;
+  private ReaderRegistry readerFactoryRegistry;
 
   @Before
   public void setUp() {
     options = PipelineOptionsFactory.create();
-    readerFactoryRegistry = ReaderFactory.Registry.defaultRegistry()
+    readerFactoryRegistry = ReaderRegistry.defaultRegistry()
         .register(
             TestReaderFactory.class.getName(),
             new TestReaderFactory())
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
index b2e0a19fca43c..f5cc6159b4e1b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
@@ -135,7 +135,7 @@ public void testCreatePredefinedReader() throws Exception {
 
     PipelineOptions options = PipelineOptionsFactory.create();
     NativeReader<?> reader =
-        ReaderFactory.Registry.defaultRegistry()
+        ReaderRegistry.defaultRegistry()
             .create(
                 cloudSource, options, BatchModeExecutionContext.fromOptions(options), null, null);
     Assert.assertThat(reader, new IsInstanceOf(TextReader.class));
@@ -150,7 +150,7 @@ public void testCreateUserDefinedReader() throws Exception {
     cloudSource.setCodec(makeCloudEncoding("BigEndianIntegerCoder"));
 
     PipelineOptions options = PipelineOptionsFactory.create();
-    ReaderFactory.Registry registry = ReaderFactory.Registry.defaultRegistry()
+    ReaderRegistry registry = ReaderRegistry.defaultRegistry()
         .register(TestReaderFactory.class.getName(), new TestReaderFactory());
     NativeReader<?> reader =
         registry.create(
@@ -170,7 +170,7 @@ public void testCreateUnknownReader() throws Exception {
     cloudSource.setCodec(makeCloudEncoding("StringUtf8Coder"));
     try {
       PipelineOptions options = PipelineOptionsFactory.create();
-      ReaderFactory.Registry.defaultRegistry().create(
+      ReaderRegistry.defaultRegistry().create(
           cloudSource,
           options,
           BatchModeExecutionContext.fromOptions(options),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
index 394abcca992fb..3868931e6e182 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
@@ -71,7 +71,7 @@ <T extends NativeReader> T runTestCreateShuffleReader(
     cloudSource.setCodec(encoding);
 
     NativeReader<?> reader =
-        ReaderFactory.Registry.defaultRegistry()
+        ReaderRegistry.defaultRegistry()
             .create(cloudSource, PipelineOptionsFactory.create(), context, null, null);
     Assert.assertThat(reader, new IsInstanceOf(shuffleReaderClass));
     T shuffleSource = (T) reader;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
index 0d4ea0599da8c..f05d20344a918 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
@@ -68,7 +68,7 @@ void runTestCreateTextReader(String filename, @Nullable Boolean stripTrailingNew
 
     PipelineOptions options = PipelineOptionsFactory.create();
     NativeReader<?> reader =
-        ReaderFactory.Registry.defaultRegistry()
+        ReaderRegistry.defaultRegistry()
             .create(
                 cloudSource, options, BatchModeExecutionContext.fromOptions(options), null, null);
     Assert.assertThat(reader, new IsInstanceOf(TextReader.class));
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSourcesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSourcesTest.java
index 26841294c81b4..92d8a86b80239 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSourcesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSourcesTest.java
@@ -358,7 +358,7 @@ public void testProgressAndSourceSplitTranslation() throws Exception {
         .as(DataflowPipelineOptions.class);
     NativeReader<WindowedValue<Integer>> reader =
         (NativeReader<WindowedValue<Integer>>)
-            ReaderFactory.Registry.defaultRegistry()
+            ReaderRegistry.defaultRegistry()
                 .create(
                     translateIOToCloudSource(TestIO.fromRange(10, 20), options),
                     options,

From 1ddc6fd382e438b747a8a90186d971bbf1be4e51 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 4 Feb 2016 11:25:39 -0800
Subject: [PATCH 1387/1541] Add Ism file format version 2

Version 2 allows for Ism files to have keys with metadata associated with them.
Also, the IsmReader exposes methods to allow for iteration over a subset of keys and to find the last key with a given prefix. Finally expose the IsmSink via the sink factory.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113870973
---
 .../sdk/runners/worker/IsmFormat.java         |   22 +-
 .../sdk/runners/worker/IsmReader.java         | 1035 +++++++++++++----
 .../dataflow/sdk/runners/worker/IsmSink.java  |  198 +++-
 .../sdk/runners/worker/IsmSinkFactory.java    |   69 ++
 .../sdk/runners/worker/SinkFactory.java       |    3 +
 .../sdk/runners/worker/IsmFormatTest.java     |   28 +
 .../sdk/runners/worker/IsmReaderTest.java     |  585 +++++++---
 .../sdk/runners/worker/IsmSinkTest.java       |  111 +-
 .../sdk/testing/CoderPropertiesTest.java      |    2 +-
 9 files changed, 1625 insertions(+), 428 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkFactory.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormat.java
index 118f31692e274..182d3f2464376 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormat.java
@@ -23,6 +23,7 @@
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
 import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.ListCoder;
 import com.google.cloud.dataflow.sdk.coders.StandardCoder;
@@ -54,6 +55,8 @@
 import java.util.Arrays;
 import java.util.List;
 
+import javax.annotation.Nullable;
+
 /**
  * An Ism file is a prefix encoded composite key value file broken into shards. Each composite
  * key is composed of a fixed number of component keys. A fixed number of those sub keys represent
@@ -97,7 +100,7 @@
  *   <li>byte offset to key prefix in data block (variable length long coding)</li>
  * </ul>
  *
- * <p>The shard index is composed of a variable length integer encoding representing
+ * <p>The shard index is composed of a {@link VarInt variable length integer} encoding representing
  * the number of shard index records followed by that many shard index records.
  * See {@link IsmShardCoder} for further details as to its encoding scheme.
  */
@@ -135,7 +138,9 @@ public static <V> IsmRecord<V> meta(List<?> keyComponents, byte[] metadata) {
     }
 
     private final List<?> keyComponents;
+    @Nullable
     private final V value;
+    @Nullable
     private final byte[] metadata;
     private IsmRecord(List<?> keyComponents, V value, byte[] metadata) {
       this.keyComponents = keyComponents;
@@ -459,6 +464,21 @@ public Object structuralValue(IsmRecord<V> record) throws Exception {
     }
   }
 
+  /**
+   * Validates that the key portion of the given coder is deterministic.
+   */
+  static void validateCoderIsCompatible(IsmRecordCoder<?> coder) {
+    for (Coder<?> keyComponentCoder : coder.getKeyComponentCoders()) {
+      try {
+          keyComponentCoder.verifyDeterministic();
+      } catch (NonDeterministicException e) {
+        throw new IllegalArgumentException(
+            String.format("Key component coder %s is expected to be deterministic.",
+                keyComponentCoder), e);
+      }
+    }
+  }
+
   /** Returns true if and only if any of the passed in key components represent a metadata key. */
   public static boolean isMetadataKey(List<?> keyComponents) {
     for (Object keyComponent : keyComponents) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java
index 88c4f54f7a3c9..3b85f1e1c94ec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java
@@ -15,122 +15,200 @@
  */
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.util.WindowedValue.valueInEmptyWindows;
 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
 import static com.google.common.base.Preconditions.checkState;
 
-import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder.Context;
-import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
 import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.Footer;
 import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.FooterCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecord;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecordCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmShard;
 import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.KeyPrefix;
 import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.KeyPrefixCoder;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.RandomAccessData;
-import com.google.cloud.dataflow.sdk.util.RandomAccessData.RandomAccessDataCoder;
 import com.google.cloud.dataflow.sdk.util.ScalableBloomFilter;
 import com.google.cloud.dataflow.sdk.util.ScalableBloomFilter.ScalableBloomFilterCoder;
+import com.google.cloud.dataflow.sdk.util.VarInt;
+import com.google.cloud.dataflow.sdk.util.WeightedValue;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.MoreObjects;
+import com.google.common.base.Optional;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
+import com.google.common.cache.Cache;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSortedMap;
+import com.google.common.collect.Ordering;
+import com.google.common.io.ByteStreams;
+import com.google.common.primitives.Longs;
 
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.channels.Channels;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.channels.SeekableByteChannel;
-import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
 import java.util.Map.Entry;
+import java.util.NavigableMap;
 import java.util.NoSuchElementException;
+import java.util.Objects;
+import java.util.SortedMap;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
 
 /**
- * A {@link NativeReader} that reads Ism files. The coder provided is used to encode each key value
- * record. See {@link IsmFormat} for encoded format details.
+ * A {@link NativeReader} that reads Ism files.
  *
- * @param <K> the type of the keys written to the sink
- * @param <V> the type of the values written to the sink
+ * @param <V> the type of the value written to the sink
  */
-public class IsmReader<K, V> extends NativeReader<KV<K, V>> {
+public class IsmReader<V> extends NativeReader<WindowedValue<IsmRecord<V>>> {
+  /**
+   * This constant represents the distance we would rather read and drop bytes for
+   * versus doing an actual repositioning of the underlying stream. Tuned for operation
+   * within GCS.
+   */
+  private static final int SEEK_VS_READ = 6 * 1024 * 1024;
+  private static final int MAX_SHARD_INDEX_AND_FOOTER_SIZE = 1024 * 1024;
+
   private final String filename;
-  private final Coder<K> keyCoder;
-  private final Coder<V> valueCoder;
+  private final IsmRecordCoder<V> coder;
 
   /** Lazily initialized on first read. */
   private long length;
   private Footer footer;
 
-  /** Lazily initialized on first keyed read. */
-  private ImmutableSortedMap<RandomAccessData, Long> index;
+  /** A map indexed by shard id, storing the Ism shard descriptors. */
+  private NavigableMap<Integer, IsmShard> shardIdToShardMap;
+
+  /**
+   * A map sorted and indexed by the block offset for a given shard. The sorting is important so
+   * the {@link LazyIsmPrefixReaderIterator} can read through the file in increasing order.
+   */
+  private NavigableMap<Long, IsmShard> shardOffsetToShardMap;
+
+  /** Values lazily initialized per shard on first keyed read of each shard. */
+  private Map<Integer, ImmutableSortedMap<RandomAccessData, IsmShardKey>> indexPerShard;
   ScalableBloomFilter bloomFilter;
 
-  IsmReader(final String filename, Coder<K> keyCoder, Coder<V> valueCoder) {
+  /**
+   * A cache instance which if set on this reader is used to cache blocks of data that are read.
+   * Each value represents the decoded form of a block.
+   */
+  private final Cache<IsmShardKey,
+                      WeightedValue<NavigableMap<RandomAccessData,
+                                                 WindowedValue<IsmRecord<V>>>>> cache;
+
+  /**
+   * Produces a reader for the specified {@code filename} and {@code coder}.
+   * See {@link IsmFormat} for encoded format details.
+   */
+  IsmReader(
+      final String filename,
+      IsmRecordCoder<V> coder,
+      Cache<IsmShardKey, WeightedValue<NavigableMap<RandomAccessData,
+                                                    WindowedValue<IsmRecord<V>>>>> cache) {
+    checkNotNull(cache);
+    IsmFormat.validateCoderIsCompatible(coder);
     this.filename = filename;
-    this.keyCoder = keyCoder;
-    this.valueCoder = valueCoder;
+    this.coder = coder;
+    this.cache = cache;
   }
 
   @Override
-  public LazyIsmReaderIterator iterator() throws IOException {
-    return new LazyIsmReaderIterator();
+  public IsmPrefixReaderIterator iterator() throws IOException {
+    return new LazyIsmPrefixReaderIterator();
   }
 
   /**
-   * Returns a {@code KV<K, V>} pair for the given {@code K} or null if {@code K} is not
-   * present within this Ism file.
+   * Returns a reader over a set of key components. The key components are encoded to their
+   * byte representations and used as a key prefix.
+   *
+   * <p>If the file is empty or their is no key with the same prefix, then we return
+   * an empty reader iterator.
+   *
+   * <p>If less than the required number of shard key components is passed in, then a reader
+   * iterator over all the keys is returned.
+   *
+   * <p>Otherwise we return a reader iterator which only iterates over keys which have the
+   * same key prefix.
    */
-  public KV<K, V> get(K k) throws IOException {
-    try (SeekableByteChannel inChannel = initializeForKeyedRead()) {
-      RandomAccessData keyBytes = new RandomAccessData();
+  public IsmPrefixReaderIterator overKeyComponents(List<?> keyComponents) throws IOException {
+    checkNotNull(keyComponents);
+    checkArgument(keyComponents.size() <= coder.getKeyComponentCoders().size(),
+        "Expected at most %s key component(s) but received %s.",
+        coder.getKeyComponentCoders().size(), keyComponents);
+
+    Optional<SeekableByteChannel> inChannel =
+        initializeFooterAndShardIndex(Optional.<SeekableByteChannel>absent());
+
+    // If this file is empty, we can return an empty iterator.
+    if (footer.getNumberOfKeys() == 0) {
+      return new EmptyIsmPrefixReaderIterator(keyComponents);
+    }
 
-      // Encode the requested key
-      keyCoder.encode(k, keyBytes.asOutputStream(), Context.OUTER);
+    // If not enough key components to figure out which shard was requested, we return a reader
+    // iterator over all the keys.
+    if (keyComponents.size() < coder.getNumberOfShardKeyCoders(keyComponents)) {
+      return new ShardAwareIsmPrefixReaderIterator(keyComponents, openIfNeeded(inChannel));
+    }
 
-      // If the Bloom filter says we don't have the key, we have nothing further to do.
-      if (!bloomFilterMightContain(keyBytes)) {
-        return null;
-      }
+    RandomAccessData keyBytes = new RandomAccessData();
+    int shardId = coder.encodeAndHash(keyComponents, keyBytes);
 
-      // Find the index record which is less than or equal to the passed in key.
-      Entry<RandomAccessData, Long> entry = index.floorEntry(keyBytes);
+    // If this file does not contain the shard or Bloom filter does not contain the key prefix,
+    // we know that we can return an empty reader iterator.
+    if (!shardIdToShardMap.containsKey(shardId)) {
+      return new EmptyIsmPrefixReaderIterator(keyComponents);
+    }
+    inChannel = initializeForKeyedRead(shardId, inChannel);
+    closeIfPresent(inChannel);
+    if (!bloomFilterMightContain(keyBytes)) {
+      return new EmptyIsmPrefixReaderIterator(keyComponents);
+    }
 
-      // If no indexed entry is less than or equal to the passed in key then we start
-      // at the beginning of the file.
-      if (entry == null) {
-        entry = new AbstractMap.SimpleEntry<>(new RandomAccessData(), 0L);
-      }
+    // Otherwise we may actually contain the key so construct a reader iterator
+    // which will fetch the data blocks containing the requested key prefix.
 
-      // Reposition the stream to the data block that should contain the key.
-      inChannel.position(entry.getValue());
-
-      // Seek through the data block till we find a key that matches or a greater key.
-      try (IsmReaderIterator<RandomAccessData, V> iterator =
-              new IsmReaderIterator<>(
-                  inChannel,
-                  entry.getKey(),
-                  RandomAccessDataCoder.of(),
-                  valueCoder,
-                  footer.getBloomFilterPosition())) {
-        long startPosition = inChannel.position();
-        for (boolean more = iterator.start(); more; more = iterator.advance()) {
-          KV<RandomAccessData, V> next = iterator.getCurrent();
-          int comparison = RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(
-              next.getKey(), keyBytes);
-          // If the current key is greater then the requested key, this Ism file does not contain
-          // the record.
-          if (comparison > 0) {
-            return null;
-          } else if (comparison == 0) {
-            notifyElementRead(inChannel.position() - startPosition);
-            return KV.of(k, next.getValue());
-          }
-          startPosition = inChannel.position();
-        }
-      }
-      // We hit the end of the file, therefore this Ism file does not contain the key.
-      return null;
-    }
+    // We find the first key in the index which may contain our prefix
+    RandomAccessData floorKey = indexPerShard.get(shardId).floorKey(keyBytes);
+
+    // We compute an upper bound on the key prefix by incrementing the prefix
+    RandomAccessData keyBytesUpperBound = keyBytes.increment();
+    // Compute the sub-range of the index map that we want to iterate over since
+    // any of these blocks may contain the key prefix.
+    Iterator<IsmShardKey> blockEntries =
+        indexPerShard.get(shardId).subMap(floorKey, keyBytesUpperBound).values().iterator();
+
+    return new WithinShardIsmPrefixReaderIterator(
+        keyComponents,
+        keyBytes,
+        keyBytesUpperBound,
+        blockEntries);
+  }
+
+  /** Returns the coder associated with this IsmReader. */
+  public IsmRecordCoder<V> getCoder() {
+    return coder;
+  }
+
+  @Override
+  public String toString() {
+    return MoreObjects.toStringHelper(IsmReader.class)
+        .add("filename", filename)
+        .add("coder", coder)
+        .toString();
   }
 
   // Overridable by tests to get around the bloom filter not containing any values.
@@ -140,198 +218,745 @@ boolean bloomFilterMightContain(RandomAccessData keyBytes) {
   }
 
   /**
-   * Initialize this Ism reader by reading the footer.
+   * Initialize this Ism reader by reading the footer and shard index. Returns a channel for re-use
+   * if this method was required to open one.
    */
-  private synchronized void initializeFooter(SeekableByteChannel in) throws IOException {
+  private synchronized Optional<SeekableByteChannel>
+      initializeFooterAndShardIndex(Optional<SeekableByteChannel> inChannel) throws IOException {
     if (footer != null) {
-      return;
+      checkState(shardIdToShardMap != null,
+          "Expected shard id to shard map to have been initialized.");
+      checkState(shardOffsetToShardMap != null,
+          "Expected shard offset to shard map to have been initialized.");
+      return inChannel;
+    }
+    checkState(shardIdToShardMap == null,
+        "Expected shard id to shard map to not have been initialized.");
+    checkState(shardOffsetToShardMap == null,
+        "Expected shard offset to shard map to not have been initialized.");
+
+    SeekableByteChannel rawChannel = openIfNeeded(inChannel);
+    this.length = rawChannel.size();
+
+    // We read the last chunk of data, for small files we will capture the entire file.
+    // We may capture the Bloom filter, shard index, and footer for slightly larger files.
+    // Otherwise we are guaranteed to capture the footer and the shard index.
+    long startPosition = Math.max(length - MAX_SHARD_INDEX_AND_FOOTER_SIZE, 0);
+    position(rawChannel, startPosition);
+    RandomAccessData data =
+        new RandomAccessData(ByteStreams.toByteArray(Channels.newInputStream(rawChannel)));
+
+    // Read the fixed length footer.
+    this.footer = FooterCoder.of().decode(
+        data.asInputStream(data.size() - Footer.FIXED_LENGTH, Footer.FIXED_LENGTH), Context.OUTER);
+
+    checkState(startPosition < footer.getIndexPosition(),
+        "Malformed file, expected to have been able to read entire shard index.");
+    int offsetWithinReadData = (int) (footer.getIndexPosition() - startPosition);
+
+    // Decode the list of Ism shard descriptors
+    List<IsmShard> ismShards = IsmFormat.ISM_SHARD_INDEX_CODER.decode(
+        data.asInputStream(offsetWithinReadData, data.size() - offsetWithinReadData),
+        Context.NESTED);
+
+    // Build the shard id to shard descriptor map
+    ImmutableSortedMap.Builder<Integer, IsmShard> shardIdToShardMapBuilder =
+        ImmutableSortedMap.orderedBy(Ordering.<Integer>natural());
+    for (IsmShard ismShard : ismShards) {
+      shardIdToShardMapBuilder.put(ismShard.getId(), ismShard);
+    }
+    shardIdToShardMap = shardIdToShardMapBuilder.build();
+
+    // Build the shard block offset to shard descriptor map
+    ImmutableSortedMap.Builder<Long, IsmShard> shardOffsetToShardMapBuilder =
+        ImmutableSortedMap.orderedBy(Ordering.<Long>natural());
+    for (IsmShard ismShard : ismShards) {
+      shardOffsetToShardMapBuilder.put(ismShard.getBlockOffset(), ismShard);
+    }
+    shardOffsetToShardMap = shardOffsetToShardMapBuilder.build();
+
+    // We may have gotten the Bloom filter, if so lets store it.
+    if (startPosition < footer.getBloomFilterPosition()) {
+      offsetWithinReadData = (int) (footer.getBloomFilterPosition() - startPosition);
+      bloomFilter = ScalableBloomFilterCoder.of().decode(
+          data.asInputStream(offsetWithinReadData, data.size() - offsetWithinReadData),
+          Context.NESTED);
     }
-    this.length = in.size();
-    in.position(length - Footer.FIXED_LENGTH);
-    this.footer = FooterCoder.of().decode(Channels.newInputStream(in), Context.OUTER);
-    in.position(0L);
+
+    // TODO: We may have gotten the entire file so we should populate our cache to
+    // prevent a re-read of the same data.
+
+    return Optional.of(rawChannel);
   }
 
   /**
-   * A {@link NativeReaderIterator
-   * Reader.ReaderIterator} which initializes its input stream lazily.
+   * Initializes the Bloom filter and index per shard. We prepopulate empty indices
+   * for shards where the index offset matches the following shard block offset.
+   * Re-uses the provided channel, returning it or a new one if this method
+   * was required to open one.
    */
-  private class LazyIsmReaderIterator extends NativeReaderIterator<KV<K, V>> {
-    private IsmReaderIterator<K, V> delegate;
-    private SeekableByteChannel inChannel;
+  private synchronized Optional<SeekableByteChannel> initializeBloomFilterAndIndexPerShard(
+      Optional<SeekableByteChannel> inChannel) throws IOException {
+    if (indexPerShard != null) {
+      checkState(bloomFilter != null, "Expected Bloom filter to have been initialized.");
+      return inChannel;
+    }
 
-    @Override
-    public boolean start() throws IOException {
-      inChannel = openConnection(filename);
-      initializeFooter(inChannel);
-      delegate =
-          new IsmReaderIterator<>(
-              inChannel,
-              new RandomAccessData(),
-              keyCoder,
-              valueCoder,
-              footer.getBloomFilterPosition());
-
-      long startPosition = inChannel.position();
-      boolean rval = delegate.start();
-      notifyElementRead(inChannel.position() - startPosition);
-      return rval;
+    SeekableByteChannel rawChannel = openIfNeeded(inChannel);
+
+    // initializeFooterAndShardIndex may have initialized the Bloom filter
+    // if the file is small enough.
+    if (bloomFilter == null) {
+      // Set the position to where the bloom filter is and read it in.
+      position(rawChannel, footer.getBloomFilterPosition());
+      bloomFilter = ScalableBloomFilterCoder.of().decode(
+          Channels.newInputStream(rawChannel), Context.NESTED);
+    }
+
+    indexPerShard = new HashMap<>();
+    // If a shard is small, it may not contain an index and we can detect this and
+    // prepopulate the shard index map with an empty entry if the start of the index
+    // and start of the next block are equal
+    Iterator<IsmShard> shardIterator = shardOffsetToShardMap.values().iterator();
+
+    // If file is empty we just return here.
+    if (!shardIterator.hasNext()) {
+      return Optional.of(rawChannel);
+    }
+
+    // If the current shard's index position is equal to the next shards block offset
+    // then we know that the index contains no data and we can pre-populate it with
+    // the empty map.
+    IsmShard currentShard = shardIterator.next();
+    while (shardIterator.hasNext()) {
+      IsmShard nextShard = shardIterator.next();
+      if (currentShard.getIndexOffset() == nextShard.getBlockOffset()) {
+        indexPerShard.put(currentShard.getId(),
+            ImmutableSortedMap.<RandomAccessData, IsmShardKey>orderedBy(
+                RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR)
+            .put(
+                new RandomAccessData(0),
+                new IsmShardKey(
+                    IsmReader.this.filename,
+                    new RandomAccessData(0),
+                    currentShard.getBlockOffset(),
+                    currentShard.getIndexOffset())).build());
+      }
+      currentShard = nextShard;
+    }
+
+    // Add an entry for the last shard if its index offset is equivalent to the
+    // start of the Bloom filter, then we know that the index is empty.
+    if (currentShard.getIndexOffset() == footer.getBloomFilterPosition()) {
+      indexPerShard.put(currentShard.getId(),
+          ImmutableSortedMap.<RandomAccessData, IsmShardKey>orderedBy(
+              RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR)
+          .put(
+              new RandomAccessData(0),
+              new IsmShardKey(
+                  IsmReader.this.filename,
+                  new RandomAccessData(0),
+                  currentShard.getBlockOffset(),
+                  currentShard.getIndexOffset())).build());
+    }
+
+    return Optional.of(rawChannel);
+  }
+
+  /** A unique key used to fully describe an Ism shard. */
+  static class IsmShardKey {
+    private final String filename;
+    private final RandomAccessData firstKey;
+    private final long startOffset;
+    private final long endOffset;
+
+    private IsmShardKey(
+        String filename, RandomAccessData firstKey, long startOffset, long endOffset) {
+      this.filename = filename;
+      this.firstKey = firstKey;
+      this.startOffset = startOffset;
+      this.endOffset = endOffset;
     }
 
     @Override
-    public boolean advance() throws IOException {
-      checkState(delegate != null, "unstarted");
-      long startPosition = inChannel.position();
-      boolean rval = delegate.advance();
-      notifyElementRead(inChannel.position() - startPosition);
-      return rval;
+    public String toString() {
+      return MoreObjects.toStringHelper(IsmShardKey.class)
+          .add("filename", filename)
+          .add("firstKey", firstKey)
+          .add("startOffset", startOffset)
+          .add("endOffset", endOffset)
+          .toString();
     }
 
     @Override
-    public KV<K, V> getCurrent() {
-      // By the time getCurrent() is called, delegate should already be created by
-      // a start() call.
-      if (delegate == null) {
-        throw new NoSuchElementException();
+    public boolean equals(Object obj) {
+      if (!(obj instanceof IsmShardKey)) {
+        return false;
       }
-      return delegate.getCurrent();
+      IsmShardKey cacheEntry = (IsmShardKey) obj;
+      return startOffset == cacheEntry.startOffset
+          && endOffset == cacheEntry.endOffset
+          && Objects.equals(filename, cacheEntry.filename)
+          && Objects.equals(firstKey, cacheEntry.firstKey);
     }
 
     @Override
-    public void close() throws IOException {
-      inChannel.close();
+    public int hashCode() {
+      return Longs.hashCode(startOffset) + Longs.hashCode(endOffset);
     }
   }
 
   /**
-   * A {@link NativeReaderIterator
-   * Reader.ReaderIterator} for Ism formatted files which returns a sequence of
-   * {@code KV<K, V>}'s read from a {@link SeekableByteChannel}.
+   * Initializes the footer, shard index, Bloom filter and index for the requested shard id if
+   * they have not been initialized yet. Re-uses the provided channel, returning it or a
+   * new one if this method was required to open one.
    */
-  private static class IsmReaderIterator<K, V> extends NativeReaderIterator<KV<K, V>> {
-    private final SeekableByteChannel inChannel;
-    private final InputStream inStream;
-    private final RandomAccessData currentKeyBytes;
-    private final Coder<K> keyCoder;
-    private final Coder<V> valueCoder;
-    private final long readLimit;
-    private KV<K, V> current;
+  private Optional<SeekableByteChannel> initializeForKeyedRead(
+      int shardId, Optional<SeekableByteChannel> inChannel) throws IOException {
+    inChannel = initializeFooterAndShardIndex(inChannel);
+
+    IsmShard shardWithIndex = shardIdToShardMap.get(shardId);
+
+    // If this shard id is not within this file, we can return immediately.
+    if (shardWithIndex == null) {
+      return inChannel;
+    }
+
+    inChannel = initializeBloomFilterAndIndexPerShard(inChannel);
+
+    // If the index has been populated and contains the shard id, we can return.
+    if (indexPerShard != null && indexPerShard.containsKey(shardId)) {
+      checkState(bloomFilter != null, "Bloom filter expected to have been initialized.");
+      return inChannel;
+    }
+
+    checkState(indexPerShard.get(shardId) == null,
+        "Expected to not have initialized index for shard %s", shardId);
+
+    Long startOfNextBlock = shardOffsetToShardMap.higherKey(shardWithIndex.getBlockOffset());
+    // If this is the last block, then we need to grab the position of the Bloom filter
+    // as the upper bound.
+    if (startOfNextBlock == null) {
+      startOfNextBlock = footer.getBloomFilterPosition();
+    }
+
+    // Open the channel if needed and seek to the start of the index.
+    SeekableByteChannel rawChannel = openIfNeeded(inChannel);
+    rawChannel.position(shardWithIndex.getIndexOffset());
+    InputStream inStream = Channels.newInputStream(rawChannel);
+
+    ImmutableSortedMap.Builder<RandomAccessData, IsmShardKey> builder =
+        ImmutableSortedMap.orderedBy(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR);
+
+    // Read the first key
+    RandomAccessData currentKeyBytes = new RandomAccessData();
+    readKey(inStream, currentKeyBytes);
+    long currentOffset = VarInt.decodeLong(inStream);
+
+    // Insert the entry that happens at the beginning limiting the shard block by the
+    // first keys block offset.
+    builder.put(
+        new RandomAccessData(0),
+        new IsmShardKey(
+            IsmReader.this.filename,
+            new RandomAccessData(0),
+            shardWithIndex.getBlockOffset(),
+            currentOffset));
+
+    // While another index entry exists, insert an index entry with the key, and offsets
+    // that limit the range of the shard block.
+    while (rawChannel.position() < startOfNextBlock) {
+      RandomAccessData nextKeyBytes = currentKeyBytes.copy();
+      readKey(inStream, nextKeyBytes);
+      long nextOffset = VarInt.decodeLong(inStream);
+
+      builder.put(currentKeyBytes,
+          new IsmShardKey(
+              IsmReader.this.filename, currentKeyBytes, currentOffset, nextOffset));
+
+      currentKeyBytes = nextKeyBytes;
+      currentOffset = nextOffset;
+    }
+
+    // Upper bound the last entry with the index offset.
+    builder.put(currentKeyBytes,
+        new IsmShardKey(
+            IsmReader.this.filename,
+            currentKeyBytes,
+            currentOffset,
+            shardWithIndex.getIndexOffset()));
+    indexPerShard.put(shardId, builder.build());
+
+    return Optional.of(rawChannel);
+  }
+
+  /** A function which takes an IsmShardKey fully describing a data block to read and return. */
+  private class IsmCacheLoader
+      implements Callable<WeightedValue<NavigableMap<RandomAccessData,
+                                                     WindowedValue<IsmRecord<V>>>>> {
+
+    private final IsmShardKey key;
+    private IsmCacheLoader(IsmShardKey key) {
+      this.key = key;
+    }
+
+    @Override
+    public WeightedValue<NavigableMap<RandomAccessData, WindowedValue<IsmRecord<V>>>> call()
+          throws IOException {
+      // Open a channel and build a sorted map from key to value for each key value
+      // pair found within the data block.
+      try (SeekableByteChannel rawChannel = open()) {
+        try (WithinShardIsmReaderIterator readerIterator = new WithinShardIsmReaderIterator(
+              rawChannel, key.firstKey, key.startOffset, key.endOffset)) {
+
+          ImmutableSortedMap.Builder<RandomAccessData, WindowedValue<IsmRecord<V>>> mapBuilder =
+              ImmutableSortedMap.orderedBy(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR);
+          while (readerIterator.hasNext()) {
+            RandomAccessData nextKey = readerIterator.peekKey().copy();
+            WindowedValue<IsmRecord<V>> next = readerIterator.next();
+            mapBuilder.put(nextKey, next);
+          }
+          // We return the size of the data block as the weight of this data block.
+          return WeightedValue.of(
+              (NavigableMap<RandomAccessData, WindowedValue<IsmRecord<V>>>) mapBuilder.build(),
+              key.endOffset - key.startOffset);
+        }
+      }
+    }
+  }
+
+  /**
+   * Fetches the data block requested.
+   *
+   * If the cache is available, we will load and cache the requested block. Otherwise, we will
+   * load and return the block.
+   */
+  private NavigableMap<RandomAccessData, WindowedValue<IsmRecord<V>>>
+      fetch(IsmShardKey key) throws IOException {
+    try {
+      if (cache == null) {
+        return new IsmCacheLoader(key).call().getValue();
+      } else {
+        return cache.get(key, new IsmCacheLoader(key)).getValue();
+      }
+    } catch (ExecutionException e) {
+      // Try and re-throw the root cause if its an IOException
+      Throwables.propagateIfPossible(e.getCause(), IOException.class);
+      throw new IOException(e.getCause());
+    }
+  }
+
+  /** The base class of Ism reader iterators which operate over a given key prefix. */
+  abstract class IsmPrefixReaderIterator
+      extends LegacyReaderIterator<WindowedValue<IsmRecord<V>>> {
+    private final List<?> keyComponents;
+    private IsmPrefixReaderIterator(List<?> keyComponents) {
+      this.keyComponents = keyComponents;
+    }
 
     /**
-     * Start an initialized reader that will start from the given key. This reader iterator does
-     * not own the channel and the caller must ensure that it is closed.
+     * Returns the list of key components representing this iterators key prefix.
      */
-    public IsmReaderIterator(
-        SeekableByteChannel unownedChannel,
-        RandomAccessData currentKeyBytes,
-        Coder<K> keyCoder,
-        Coder<V> valueCoder,
-        long readLimit)
+    protected List<?> getKeyComponents() {
+      return keyComponents;
+    }
+
+    /**
+     * Concatenates this reader iterators key components with the additionally supplied
+     * key components and encodes them into their byte representations producing a key.
+     * Returns the exact record represented by the key generated above.
+     *
+     * Null is returned if no key has all the key components as a prefix within this file.
+     */
+    public final WindowedValue<IsmRecord<V>> get(List<?> additionalKeyComponents)
         throws IOException {
-      checkNotNull(unownedChannel);
-      checkNotNull(currentKeyBytes);
-      checkNotNull(keyCoder);
-      checkNotNull(valueCoder);
-      checkArgument(readLimit >= 0L);
-      this.inChannel = unownedChannel;
-      this.inStream = Channels.newInputStream(unownedChannel);
+      RandomAccessData keyBytes = new RandomAccessData();
+      int shardId = coder.encodeAndHash(
+          ImmutableList.builder().addAll(keyComponents).addAll(additionalKeyComponents).build(),
+          keyBytes);
+      return getBlock(keyBytes, shardId).get(keyBytes);
+    }
+
+    /**
+     * Returns the record for the last key having this iterators key prefix.
+     * Last is defined as the largest key with the same key prefix when comparing key's
+     * byte representations using an unsigned lexicographical byte order.
+     *
+     * Null is returned if the prefix is not present within this file.
+     */
+    public WindowedValue<IsmRecord<V>> getLast() throws IOException {
+      RandomAccessData keyBytes = new RandomAccessData();
+      int shardId = coder.encodeAndHash(keyComponents, keyBytes);
+
+      Optional<SeekableByteChannel> inChannel =
+          initializeFooterAndShardIndex(Optional.<SeekableByteChannel>absent());
+
+      // Key is not stored here
+      if (!shardIdToShardMap.containsKey(shardId)
+          || !bloomFilterMightContain(keyBytes)) {
+        return null;
+      }
+
+      inChannel = initializeForKeyedRead(shardId, inChannel);
+      closeIfPresent(inChannel);
+
+      final NavigableMap<RandomAccessData, IsmShardKey> indexInShard = indexPerShard.get(shardId);
+      RandomAccessData end = keyBytes.increment();
+      final IsmShardKey cacheEntry = indexInShard.floorEntry(end).getValue();
+
+      NavigableMap<RandomAccessData, WindowedValue<IsmRecord<V>>> block = fetch(cacheEntry);
+
+      RandomAccessData lastKey = block.lastKey();
+
+      // If the requested key is greater then the last key within the block, then it
+      // does not exist.
+      if (RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(keyBytes, lastKey) > 0) {
+        return null;
+      }
+
+      Entry<RandomAccessData, WindowedValue<IsmRecord<V>>> rval = block.floorEntry(end);
+
+      // If the prefix matches completely then we can return
+      if (RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.commonPrefixLength(
+          keyBytes, rval.getKey()) == keyBytes.size()) {
+        return rval.getValue();
+      }
+      return null;
+    }
+  }
+
+  /** An empty reader iterator. */
+  class EmptyIsmPrefixReaderIterator extends IsmPrefixReaderIterator {
+    private EmptyIsmPrefixReaderIterator(List<?> keyComponents) {
+      super(keyComponents);
+    }
+
+    @Override
+    public boolean hasNextImpl() throws IOException {
+      return false;
+    }
 
-      // Copy the key since the IsmReaderIterator mutates the key.
-      this.currentKeyBytes = new RandomAccessData(currentKeyBytes.size());
-      currentKeyBytes.writeTo(this.currentKeyBytes.asOutputStream(), 0, currentKeyBytes.size());
+    @Override
+    public WindowedValue<IsmRecord<V>> nextImpl() throws IOException, NoSuchElementException {
+      throw new NoSuchElementException();
+    }
+  }
+
+  /** A reader iterator which initializes its input stream lazily. */
+  class LazyIsmPrefixReaderIterator extends IsmPrefixReaderIterator {
+    private IsmPrefixReaderIterator delegate;
 
-      this.keyCoder = keyCoder;
-      this.valueCoder = valueCoder;
-      this.readLimit = readLimit;
+    public LazyIsmPrefixReaderIterator() {
+      super(ImmutableList.of());
     }
 
     @Override
-    public boolean start() throws IOException {
-      return advance();
+    public boolean hasNextImpl() throws IOException {
+      return getDelegate().hasNext();
     }
 
     @Override
-    public boolean advance() throws IOException {
-      if (inChannel.position() >= readLimit) {
-        current = null;
-        return false;
+    public WindowedValue<IsmRecord<V>> nextImpl()
+        throws IOException, NoSuchElementException {
+      WindowedValue<IsmRecord<V>> rval = getDelegate().next();
+      return rval;
+    }
+
+    @Override
+    public void close() throws IOException {
+      if (delegate != null) {
+        delegate.close();
+      }
+    }
+
+    /** Return a reader, caching the creation on the first call. */
+    private IsmPrefixReaderIterator getDelegate() throws IOException {
+      if (delegate == null) {
+        delegate = overKeyComponents(getKeyComponents());
+      }
+      return delegate;
+    }
+  }
+
+  /**
+   * Returns a map from key to value, where the keys are in increasing lexicographical order.
+   * If the requested key is not contained within this file, an empty map is returned.
+   */
+  private NavigableMap<RandomAccessData, WindowedValue<IsmRecord<V>>>
+      getBlock(RandomAccessData keyBytes, int shardId) throws IOException {
+    Optional<SeekableByteChannel> inChannel =
+        initializeFooterAndShardIndex(Optional.<SeekableByteChannel>absent());
+
+    // Key is not stored here so return an empty map.
+    if (!shardIdToShardMap.containsKey(shardId)
+        || !bloomFilterMightContain(keyBytes)) {
+      return ImmutableSortedMap.<RandomAccessData, WindowedValue<IsmRecord<V>>>orderedBy(
+          RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR).build();
+    }
+
+    inChannel = initializeForKeyedRead(shardId, inChannel);
+    closeIfPresent(inChannel);
+
+    final NavigableMap<RandomAccessData, IsmShardKey> indexInShard = indexPerShard.get(shardId);
+    final IsmShardKey cacheEntry = indexInShard.floorEntry(keyBytes).getValue();
+    return fetch(cacheEntry);
+  }
+
+  /**
+   * A reader iterator that returns all elements from prefix (inclusive) to
+   * prefixUpperBound (exclusive) within the set of block entries provided.
+   */
+  private class WithinShardIsmPrefixReaderIterator extends IsmPrefixReaderIterator {
+    private final Iterator<IsmShardKey> blockEntriesIterator;
+    Iterator<WindowedValue<IsmRecord<V>>> iterator;
+    private final RandomAccessData prefix;
+    private final RandomAccessData prefixUpperBound;
+
+    private WithinShardIsmPrefixReaderIterator(
+        List<?> keyComponents,
+        RandomAccessData prefix,
+        RandomAccessData prefixUpperBound,
+        Iterator<IsmShardKey> blockEntriesIterator) {
+      super(keyComponents);
+      checkNotNull(blockEntriesIterator);
+      this.prefix = prefix;
+      this.prefixUpperBound = prefixUpperBound;
+      this.blockEntriesIterator = blockEntriesIterator;
+    }
+
+    @Override
+    public boolean hasNextImpl() throws IOException {
+      // This is in a while loop because the blocks that we are asked to look into may
+      // not contain the key prefix.
+      while (iterator == null || !iterator.hasNext()) {
+        // If there are no blocks to iterate over we can return false
+        if (!blockEntriesIterator.hasNext()) {
+          return false;
+        }
+
+        IsmShardKey nextBlock = blockEntriesIterator.next();
+        NavigableMap<RandomAccessData, WindowedValue<IsmRecord<V>>> map =
+            fetch(nextBlock);
+        SortedMap<RandomAccessData, WindowedValue<IsmRecord<V>>> submap =
+            map.subMap(prefix, prefixUpperBound);
+        Collection<WindowedValue<IsmRecord<V>>> values = submap.values();
+        iterator = values.iterator();
       }
-      KeyPrefix keyPrefix = KeyPrefixCoder.of().decode(inStream, Context.NESTED);
-      int totalKeyLength = keyPrefix.getSharedKeySize() + keyPrefix.getUnsharedKeySize();
-      // currentKey = prevKey[0 : sharedKeySize] + read(unsharedKeySize)
-      currentKeyBytes.readFrom(
-          inStream,
-          keyPrefix.getSharedKeySize() /* start to overwrite the previous key at sharedKeySize */,
-          keyPrefix.getUnsharedKeySize() /* read unsharedKeySize bytes from the stream */);
-      K key = keyCoder.decode(currentKeyBytes.asInputStream(0, totalKeyLength), Context.OUTER);
-      V value = valueCoder.decode(inStream, Context.NESTED);
-      current = KV.of(key, value);
-      return true;
+      return iterator.hasNext();
     }
 
     @Override
-    public KV<K, V> getCurrent() {
-      if (current == null) {
-        throw new NoSuchElementException();
+    public WindowedValue<IsmRecord<V>> nextImpl()
+        throws IOException, NoSuchElementException {
+      return iterator.next();
+    }
+  }
+
+  /** A reader iterator that returns all records across all shards contained within this file. */
+  private class ShardAwareIsmPrefixReaderIterator extends IsmPrefixReaderIterator {
+    private final SeekableByteChannel rawChannel;
+    private WithinShardIsmReaderIterator delegate;
+    private Iterator<IsmShard> shardEntries;
+
+    private ShardAwareIsmPrefixReaderIterator(
+        List<?> keyComponents, SeekableByteChannel rawChannel) throws IOException {
+      super(keyComponents);
+      checkState(shardOffsetToShardMap.size() > 0,
+          "Expected that shard offset to shard map has been initialized and is not empty.");
+
+      this.rawChannel = rawChannel;
+      this.shardEntries = shardOffsetToShardMap.values().iterator();
+      IsmShard firstShard = shardEntries.next();
+      delegate = new WithinShardIsmReaderIterator(
+          rawChannel,
+          new RandomAccessData(),
+          firstShard.getBlockOffset(),
+          firstShard.getIndexOffset());
+    }
+
+    @Override
+    public boolean hasNextImpl() throws IOException {
+      // If the current shard has a value, or we have another shard index to
+      // look into then we know that there is another value.
+      return delegate.hasNext() || shardEntries.hasNext();
+    }
+
+    @Override
+    public WindowedValue<IsmRecord<V>> nextImpl()
+        throws IOException, NoSuchElementException {
+      // If our current shard index is empty, we need to move to the next one.
+      if (!delegate.hasNext()) {
+        moveToNextShard();
       }
-      return current;
+      WindowedValue<IsmRecord<V>> rval = delegate.next();
+      return rval;
+    }
+
+    @Override
+    public void close() throws IOException {
+      rawChannel.close();
+    }
+
+    @Override
+    public WindowedValue<IsmRecord<V>> getLast() throws IOException {
+      // Since this is an iterator over all of the file, we return the last record within the file.
+      int lastShardId = shardOffsetToShardMap.lastEntry().getValue().getId();
+      initializeForKeyedRead(lastShardId, Optional.<SeekableByteChannel>absent());
+      NavigableMap<RandomAccessData, WindowedValue<IsmRecord<V>>> lastBlock =
+          getBlock(indexPerShard.get(lastShardId).lastKey(), lastShardId);
+      return lastBlock.lastEntry().getValue();
+    }
+
+    private void moveToNextShard() throws IOException {
+      IsmShard nextIsmShard = shardEntries.next();
+      checkState(nextIsmShard.getBlockOffset() >= rawChannel.position(),
+          "Expected channel read order to be sequential.");
+
+      delegate = new WithinShardIsmReaderIterator(
+          rawChannel, new RandomAccessData(),
+          nextIsmShard.getBlockOffset(), nextIsmShard.getIndexOffset());
+
+      checkState(delegate.hasNext(), "Expected each shard to contain at least one entry.");
     }
   }
 
   /**
-   * Initializes the footer, Bloom filter and index if they have not yet been initialized.
-   * Returns a {@link SeekableByteChannel} at an arbitrary position within the stream.
-   * Callers should re-position the channel to their desired location.
+   * A reader iterator for Ism formatted files which returns a sequence of Ism records
+   * from the underlying channel from where it is currently positioned till
+   * the supplied limit.
+   *
+   * This reader iterator will use the supplied bytes as the bytes for the previous key
+   * which is required to do the KeyPrefix based key decoding.
    */
-  private SeekableByteChannel initializeForKeyedRead() throws IOException {
-    SeekableByteChannel inChannel = openConnection(filename);
-    if (index != null) {
-      checkState(footer != null, "Footer expected to have been initialized.");
-      checkState(bloomFilter != null, "Bloom filter expected to have been initialized.");
-      return inChannel;
+  private class WithinShardIsmReaderIterator
+      extends LegacyReaderIterator<WindowedValue<IsmRecord<V>>> {
+    private final SeekableByteChannel rawChannel;
+    private final InputStream inStream;
+    private long readLimit;
+    private RandomAccessData keyBytes;
+    private boolean keyIsPeeked;
+    private long position;
+
+    private WithinShardIsmReaderIterator(
+        SeekableByteChannel rawChannel,
+        RandomAccessData currentKeyBytes,
+        long newPosition,
+        long newLimit) throws IOException {
+      checkNotNull(rawChannel);
+      checkNotNull(currentKeyBytes);
+      checkArgument(newLimit >= 0L);
+      checkArgument(newPosition <= newLimit);
+      this.rawChannel = rawChannel;
+      this.inStream = Channels.newInputStream(rawChannel);
+      this.keyBytes = currentKeyBytes.copy();
+      this.position = newPosition;
+      this.readLimit = newLimit;
+      this.keyIsPeeked = false;
+      IsmReader.position(rawChannel, newPosition);
     }
-    checkState(bloomFilter == null, "Bloom filter not expected to have been initialized.");
-
-    initializeFooter(inChannel);
-
-    // Set the position to where the bloom filter is and read it in.
-    inChannel.position(footer.getBloomFilterPosition());
-    bloomFilter = ScalableBloomFilterCoder.of().decode(
-        Channels.newInputStream(inChannel), Context.NESTED);
-
-    // The index follows the bloom filter directly, so we do not need to do a seek here.
-    // This is an optimization.
-    @SuppressWarnings("resource")
-    NativeReaderIterator<KV<RandomAccessData, Long>> iterator =
-        new IsmReaderIterator<RandomAccessData, Long>(
-            inChannel,
-            new RandomAccessData(),
-            RandomAccessDataCoder.of(),
-            VarLongCoder.of(),
-            length - Footer.FIXED_LENGTH);
-    ImmutableSortedMap.Builder<RandomAccessData, Long> builder =
-        ImmutableSortedMap.orderedBy(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR);
 
-    // Read the index into memory.
-    for (boolean more = iterator.start(); more; more = iterator.advance()) {
-      KV<RandomAccessData, Long> next = iterator.getCurrent();
-      builder.put(next.getKey(), next.getValue());
+    @Override
+    public boolean hasNextImpl() throws IOException {
+      if (position > readLimit) {
+        throw new IllegalStateException("Read past end of stream");
+      }
+      return position < readLimit;
+    }
+
+    @Override
+    public WindowedValue<IsmRecord<V>> nextImpl() throws IOException, NoSuchElementException {
+      RandomAccessData peekedKey = peekKey();
+      keyIsPeeked = false;
+
+      InputStream keyInputStream = peekedKey.asInputStream(0, peekedKey.size());
+      List<Object> keyComponents = new ArrayList<>(coder.getKeyComponentCoders().size());
+      for (int i = 0; i < coder.getKeyComponentCoders().size(); ++i) {
+        keyComponents.add(coder.getKeyComponentCoder(i).decode(
+                keyInputStream,
+                Context.NESTED));
+      }
+
+      final IsmRecord<V> ismRecord;
+      if (IsmFormat.isMetadataKey(keyComponents)) {
+        byte[] metadata = ByteArrayCoder.of().decode(inStream, Context.NESTED);
+        ismRecord = IsmRecord.<V>meta(keyComponents, metadata);
+      } else {
+        V value = coder.getValueCoder().decode(inStream, Context.NESTED);
+        ismRecord = IsmRecord.<V>of(keyComponents, value);
+      }
+
+      long newPosition = rawChannel.position();
+      notifyElementRead(newPosition - position);
+      position = newPosition;
+      return valueInEmptyWindows(ismRecord);
+    }
+
+    public RandomAccessData peekKey() throws IOException, NoSuchElementException {
+      if (keyIsPeeked) {
+        return keyBytes;
+      }
+      readKey(inStream, keyBytes);
+      keyIsPeeked = true;
+
+      return keyBytes;
     }
-    index = builder.build();
-    return inChannel;
   }
 
   /**
-   * Returns a {@link SeekableByteChannel} for the given {@code filename}.
+   * Decodes a KeyPrefix from the stream and then reads unshared key bytes from the stream
+   * placing them into the supplied keyBytes at position
+   * keyBytes[shared key bytes : shared key bytes + unshared key bytes].
    */
-  private static SeekableByteChannel openConnection(String filename) throws IOException {
+  private static void readKey(InputStream inStream, RandomAccessData keyBytes)
+      throws IOException {
+    KeyPrefix keyPrefix = KeyPrefixCoder.of().decode(inStream, Context.NESTED);
+    // currentKey = prevKey[0 : sharedKeySize] + read(unsharedKeySize)
+    keyBytes.readFrom(
+        inStream,
+        keyPrefix.getSharedKeySize() /* start to overwrite the previous key at sharedKeySize */,
+        keyPrefix.getUnsharedKeySize() /* read unsharedKeySize bytes from the stream */);
+    // Reset the length incase the next key was shorter.
+    keyBytes.resetTo(keyPrefix.getSharedKeySize() + keyPrefix.getUnsharedKeySize());
+  }
+
+  /** Closes the underlying channel if present. */
+  private void closeIfPresent(Optional<SeekableByteChannel> inChannel) throws IOException {
+    if (inChannel.isPresent()) {
+      inChannel.get().close();
+    }
+  }
+
+  /** Returns a channel, opening a new one if required. */
+  private SeekableByteChannel openIfNeeded(Optional<SeekableByteChannel> inChannel)
+      throws IOException {
+    if (inChannel.isPresent()) {
+      return inChannel.get();
+    }
+    return open();
+  }
+
+  /** Opens a new channel. */
+  private SeekableByteChannel open()
+      throws IOException {
     ReadableByteChannel channel = IOChannelUtils.getFactory(filename).open(filename);
-    checkArgument(
-        channel instanceof SeekableByteChannel,
+    Preconditions.checkArgument(channel instanceof SeekableByteChannel,
         "IsmReader requires a SeekableByteChannel for path %s but received %s.",
-        filename,
-        channel);
+        filename, channel);
     return (SeekableByteChannel) channel;
   }
-}
 
+  /**
+   * Seeks into the channel intelligently by either resetting the position or reading and
+   * discarding bytes.
+   */
+  private static void position(SeekableByteChannel inChannel, long newPosition) throws IOException {
+    long currentPosition = inChannel.position();
+    // If just doing a read is cheaper discarding the bytes lets just do the read
+    if (currentPosition < newPosition && newPosition - currentPosition <= SEEK_VS_READ) {
+      ByteStreams.skipFully(Channels.newInputStream(inChannel), newPosition - currentPosition);
+    } else {
+      // Otherwise we will perform a seek
+      inChannel.position(newPosition);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSink.java
index 9f077802e19cb..cdb565fff056f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSink.java
@@ -15,12 +15,18 @@
  */
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
 
+import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.Coder.Context;
 import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.Footer;
 import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.FooterCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecord;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecordCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmShard;
 import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.KeyPrefix;
 import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.KeyPrefixCoder;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
@@ -31,46 +37,59 @@
 import com.google.cloud.dataflow.sdk.util.VarInt;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Optional;
 import com.google.common.io.CountingOutputStream;
 
 import java.io.IOException;
 import java.nio.channels.Channels;
 import java.nio.channels.WritableByteChannel;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.TreeMap;
 
 /**
- * A {@link Sink} that writes Ism files. The coder provided is used to encode each key value
- * record. See {@link IsmFormat} for encoded format details.
+ * A {@link Sink} that writes Ism files.
  *
- * @param <K> the type of the keys written to the sink
- * @param <V> the type of the values written to the sink
+ * @param <V> the type of the value written to the sink
  */
-public class IsmSink<K, V> extends Sink<WindowedValue<KV<K, V>>> {
+public class IsmSink<V> extends Sink<WindowedValue<IsmRecord<V>>> {
+  private static final int BLOCK_SIZE_BYTES = 1024 * 1024;
   private final String filename;
-  private final Coder<K> keyCoder;
-  private final Coder<V> valueCoder;
+  private final IsmRecordCoder<V> coder;
 
-  IsmSink(String filename, Coder<K> keyCoder, Coder<V> valueCoder) {
+  /**
+   * Produces a sink for the specified {@code filename} and {@code coder}.
+   * See {@link IsmFormat} for encoded format details.
+   */
+  IsmSink(String filename, IsmRecordCoder<V> coder) {
+    IsmFormat.validateCoderIsCompatible(coder);
     this.filename = filename;
-    this.keyCoder = keyCoder;
-    this.valueCoder = valueCoder;
+    this.coder = coder;
   }
 
   @Override
-  public SinkWriter<WindowedValue<KV<K, V>>> writer() throws IOException {
+  public SinkWriter<WindowedValue<IsmRecord<V>>> writer() throws IOException {
     return new IsmSinkWriter(IOChannelUtils.create(filename, MimeTypes.BINARY));
   }
 
-  private class IsmSinkWriter implements SinkWriter<WindowedValue<KV<K, V>>> {
-    private static final long MAX_BLOCK_SIZE = 1024 * 1024;
+  // Can be overridden by tests to generate files with smaller block sizes for testing.
+  @VisibleForTesting
+  long getBlockSize() {
+    return BLOCK_SIZE_BYTES;
+  }
 
+  private class IsmSinkWriter implements SinkWriter<WindowedValue<IsmRecord<V>>> {
     private final CountingOutputStream out;
     private final RandomAccessData indexOut;
-    private RandomAccessData lastKeyBytes;
+    private RandomAccessData previousKeyBytes;
+    private Optional<Integer> previousShard;
     private RandomAccessData currentKeyBytes;
     private RandomAccessData lastIndexKeyBytes;
     private long lastIndexedPosition;
     private long numberOfKeysWritten;
+    private SortedMap<Integer, IsmShard> shardKeyToShardMap;
     private final ScalableBloomFilter.Builder bloomFilterBuilder;
 
     /**
@@ -80,34 +99,70 @@ private IsmSinkWriter(WritableByteChannel channel) {
       checkNotNull(channel);
       out = new CountingOutputStream(Channels.newOutputStream(channel));
       indexOut = new RandomAccessData();
-      lastKeyBytes = new RandomAccessData();
+      previousShard = Optional.absent();
+      previousKeyBytes = new RandomAccessData();
       currentKeyBytes = new RandomAccessData();
       lastIndexKeyBytes = new RandomAccessData();
       bloomFilterBuilder = ScalableBloomFilter.builder();
+      shardKeyToShardMap = new TreeMap<>();
     }
 
     @Override
-    public long add(WindowedValue<KV<K, V>> windowedValue) throws IOException {
+    public long add(WindowedValue<IsmRecord<V>> windowedRecord) throws IOException {
       // The windowed portion of the value is ignored.
-      KV<K, V> value = windowedValue.getValue();
+      IsmRecord<V> record = windowedRecord.getValue();
+
+      checkArgument(coder.getKeyComponentCoders().size() == record.getKeyComponents().size());
+
+      List<Integer> keyOffsetPositions = new ArrayList<>();
+      final int currentShard =
+          coder.encodeAndHash(record.getKeyComponents(), currentKeyBytes, keyOffsetPositions);
+      // Put each component of the key into the Bloom filter so that we can use the Bloom
+      // filter for key prefix checks.
+      for (Integer offsetPosition : keyOffsetPositions) {
+        bloomFilterBuilder.put(currentKeyBytes.array(), 0, offsetPosition);
+      }
+
+      // If we are moving to another shard, finish outputting the last shard.
+      if (previousShard.isPresent() && currentShard != previousShard.get()) {
+        // We reset last shard to be empty.
+        finishShard();
+      }
 
       long currentPosition = out.getCount();
-      // Marshal the key, compute the common prefix length
-      keyCoder.encode(value.getKey(), currentKeyBytes.asOutputStream(), Context.OUTER);
-      int keySize = currentKeyBytes.size();
-      int sharedKeySize = commonPrefixLength(lastKeyBytes, currentKeyBytes);
+
+      // If we are doing a reset because the shard number is changing we
+      // assume 0 bytes are saved from the previous key.
+      int sharedKeySize;
+      if (!previousShard.isPresent()) {
+        sharedKeySize = 0;
+        // Create a new shard record for the current value being output validating
+        // that we have never seen this shard before.
+        IsmShard ismShard = IsmShard.of(currentShard, currentPosition);
+        checkState(shardKeyToShardMap.put(currentShard, ismShard) == null,
+            "Unexpected insertion of keys %s for shard which already exists %s. "
+            + "Ism files expect that all shards are written contiguously.",
+            record.getKeyComponents(), ismShard);
+      } else {
+        sharedKeySize = commonPrefixLengthWithOrderCheck(previousKeyBytes, currentKeyBytes);
+      }
 
       // Put key-value mapping record into block buffer
-      int unsharedKeySize = keySize - sharedKeySize;
+      int unsharedKeySize = currentKeyBytes.size() - sharedKeySize;
       KeyPrefix keyPrefix = new KeyPrefix(sharedKeySize, unsharedKeySize);
       KeyPrefixCoder.of().encode(keyPrefix, out, Context.NESTED);
       currentKeyBytes.writeTo(out, sharedKeySize, unsharedKeySize);
-      valueCoder.encode(value.getValue(), out, Context.NESTED);
+      if (IsmFormat.isMetadataKey(record.getKeyComponents())) {
+        ByteArrayCoder.of().encode(record.getMetadata(), out, Context.NESTED);
+      } else {
+        coder.getValueCoder().encode(record.getValue(), out, Context.NESTED);
+      }
 
-      // If we have emitted enough bytes to add another entry into the index.
-      if (lastIndexedPosition + MAX_BLOCK_SIZE < out.getCount()) {
-        int sharedIndexKeySize = commonPrefixLength(lastIndexKeyBytes, currentKeyBytes);
-        int unsharedIndexKeySize = keySize - sharedIndexKeySize;
+      // If we have emitted enough bytes to add another entry into the index
+      if (out.getCount() > lastIndexedPosition + getBlockSize()) {
+        int sharedIndexKeySize =
+            commonPrefixLengthWithOrderCheck(lastIndexKeyBytes, currentKeyBytes);
+        int unsharedIndexKeySize = currentKeyBytes.size() - sharedIndexKeySize;
         KeyPrefix indexKeyPrefix = new KeyPrefix(sharedIndexKeySize, unsharedIndexKeySize);
         KeyPrefixCoder.of().encode(indexKeyPrefix, indexOut.asOutputStream(), Context.NESTED);
         currentKeyBytes.writeTo(
@@ -115,14 +170,15 @@ public long add(WindowedValue<KV<K, V>> windowedValue) throws IOException {
         VarInt.encode(currentPosition, indexOut.asOutputStream());
         lastIndexKeyBytes.resetTo(0);
         currentKeyBytes.writeTo(lastIndexKeyBytes.asOutputStream(), 0, currentKeyBytes.size());
+        lastIndexedPosition = out.getCount();
       }
 
-      // Update the bloom filter
-      bloomFilterBuilder.put(currentKeyBytes.array(), 0, currentKeyBytes.size());
+      // Remember the shard for the current key.
+      previousShard = Optional.of(currentShard);
 
       // Swap the current key and the previous key, resetting the previous key to be re-used.
-      RandomAccessData temp = lastKeyBytes;
-      lastKeyBytes = currentKeyBytes;
+      RandomAccessData temp = previousKeyBytes;
+      previousKeyBytes = currentKeyBytes;
       currentKeyBytes = temp;
       currentKeyBytes.resetTo(0);
 
@@ -135,45 +191,79 @@ public long add(WindowedValue<KV<K, V>> windowedValue) throws IOException {
      * and perform a key order check. We check that the currently being inserted key
      * is strictly greater than the previous key.
      */
-    private int commonPrefixLength(
+    private int commonPrefixLengthWithOrderCheck(
         RandomAccessData prevKeyBytes, RandomAccessData currentKeyBytes) {
-      byte[] prevKey = prevKeyBytes.array();
-      byte[] currentKey = currentKeyBytes.array();
-      int minBytesLen = Math.min(prevKeyBytes.size(), currentKeyBytes.size());
-      for (int i = 0; i < minBytesLen; i++) {
-        // unsigned comparison
-        int b1 = prevKey[i] & 0xFF;
-        int b2 = currentKey[i] & 0xFF;
-        if (b1 > b2) {
-          throw new IllegalArgumentException(IsmSinkWriter.class.getSimpleName()
-              + " expects keys to be written in strictly increasing order but was given "
-              + prevKeyBytes + " as the previous key and " + currentKeyBytes
-              + " as the current key. Expected " + b1 + " <= " + b2 + " at position " + i + ".");
-        }
-        if (b1 != b2) {
-          return i;
-        }
-      }
-      if (prevKeyBytes.size() >= currentKeyBytes.size()) {
+      int offset = RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR
+          .commonPrefixLength(prevKeyBytes, currentKeyBytes);
+      int compare = RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR
+          .compare(prevKeyBytes, currentKeyBytes, offset);
+      if (compare < 0) {
+        return offset;
+      } else if (compare == 0) {
+        throw new IllegalArgumentException(IsmSinkWriter.class.getSimpleName()
+            + " expects keys to be written in strictly increasing order but was given "
+            + prevKeyBytes + " as the previous key and " + currentKeyBytes
+            + " as the current key. Expected " + prevKeyBytes.array()[offset + 1] + " <= "
+            + currentKeyBytes.array()[offset + 1] + " at position " + (offset + 1) + ".");
+      } else {
         throw new IllegalArgumentException(IsmSinkWriter.class.getSimpleName()
             + " expects keys to be written in strictly increasing order but was given "
             + prevKeyBytes + " as the previous key and " + currentKeyBytes
             + " as the current key. Expected length of previous key " + prevKeyBytes.size()
             + " <= " + currentKeyBytes.size() + " to current key.");
       }
-      return minBytesLen;
     }
 
     /**
-     * Completes the construction of the Ism file.
+     * Outputs the end of a shard. This is done by:
+     * <ul>
+     *   <li>updating the shard index for the current shard with the index offset</li>
+     *   <li>writing out the index for the shard</li>
+     *   <li>resetting the last indexed position</li>
+     *   <li>forgetting the last shard</li>
+     * </ul>
+     */
+    private void finishShard() throws IOException {
+      // Update the last shard record as to the position of the index.
+      IsmShard ismShard = shardKeyToShardMap.get(previousShard.get());
+      shardKeyToShardMap.put(
+          previousShard.get(), ismShard.withIndexOffset(out.getCount()));
+
+      indexOut.writeTo(out, 0, indexOut.size());
+      indexOut.resetTo(0);
+
+      // Reset the last indexed position to here.
+      lastIndexedPosition = out.getCount();
+      lastIndexKeyBytes = new RandomAccessData();
+
+      // Clear the last shard.
+      previousShard = Optional.absent();
+    }
+
+    /**
+     * Completes the construction of the Ism file. This is done by:
+     * <ul>
+     *   <li>finishing the last shard if present</li>
+     *   <li>writing out the Bloom filter</li>
+     *   <li>writing out the shard index</li>
+     *   <li>writing out the footer</li>
+     * </ul>
      *
      * @throws IOException if an underlying write fails
      */
     private void finish() throws IOException {
+      // Update the last shard if at least one element was written.
+      if (previousShard.isPresent()) {
+        finishShard();
+      }
+
       long startOfBloomFilter = out.getCount();
       ScalableBloomFilterCoder.of().encode(bloomFilterBuilder.build(), out, Context.NESTED);
+
       long startOfIndex = out.getCount();
-      indexOut.writeTo(out, 0, indexOut.size());
+      IsmFormat.ISM_SHARD_INDEX_CODER.encode(
+          new ArrayList<>(shardKeyToShardMap.values()), out, Context.NESTED);
+
       FooterCoder.of().encode(new Footer(startOfIndex, startOfBloomFilter, numberOfKeysWritten),
           out, Coder.Context.OUTER);
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkFactory.java
new file mode 100644
index 0000000000000..b1386067d1c58
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkFactory.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecord;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecordCoder;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+
+/**
+ * Creates an {@link IsmSink} from a {@link CloudObject} spec. Note that it is invalid to use a
+ * non {@link IsmRecordCoder} with this sink factory.
+ */
+public class IsmSinkFactory {
+  // Do not instantiate.
+  private IsmSinkFactory() {}
+
+  @SuppressWarnings("unused")
+  public static <V, T> Sink<WindowedValue<IsmRecord<V>>> create(
+      PipelineOptions options,
+      CloudObject spec,
+      Coder<WindowedValue<IsmRecord<V>>> coder,
+      ExecutionContext executionContext,
+      CounterSet.AddCounterMutator addCounterMutator) throws Exception {
+    return create(spec, coder);
+  }
+
+  static <V> Sink<WindowedValue<IsmRecord<V>>> create(
+      CloudObject spec, Coder<WindowedValue<IsmRecord<V>>> coder) throws Exception {
+    String filename = getString(spec, PropertyNames.FILENAME);
+
+    checkArgument(coder instanceof WindowedValueCoder,
+        "%s only supports using %s but got %s.", IsmSink.class, WindowedValueCoder.class, coder);
+    WindowedValueCoder<IsmRecord<V>> windowedCoder =
+        (WindowedValueCoder<IsmRecord<V>>) coder;
+
+    checkArgument(windowedCoder.getValueCoder() instanceof IsmRecordCoder,
+        "%s only supports using %s but got %s.",
+        IsmSink.class, IsmRecordCoder.class, windowedCoder.getValueCoder());
+    @SuppressWarnings("unchecked")
+    IsmRecordCoder<V> ismCoder =
+        (IsmRecordCoder<V>) windowedCoder.getValueCoder();
+
+    return new IsmSink<>(filename, ismCoder);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
index 63a22288f0a2c..da228ac6350e7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
@@ -55,6 +55,8 @@ private SinkFactory() {}
                                 TextSinkFactory.class.getName());
     predefinedSinkFactories.put("AvroSink",
                                 AvroSinkFactory.class.getName());
+    predefinedSinkFactories.put("IsmSink",
+                                IsmSinkFactory.class.getName());
     predefinedSinkFactories.put("ShuffleSink",
                                 ShuffleSinkFactory.class.getName());
     predefinedSinkFactories.put("PubsubSink",
@@ -75,6 +77,7 @@ public static <T> Sink<T> create(
       ExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
+    @SuppressWarnings("unchecked")
     Coder<T> coder = Serializer.deserialize(cloudSink.getCodec(), Coder.class);
     CloudObject object = CloudObject.fromSpec(cloudSink.getSpec());
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormatTest.java
index f03a2e9887a41..36875040be99a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormatTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormatTest.java
@@ -23,6 +23,7 @@
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
+import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.Coder.Context;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
@@ -37,11 +38,13 @@
 import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.KeyPrefixCoder;
 import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.MetadataKeyCoder;
 import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.testing.CoderPropertiesTest.NonDeterministicCoder;
 import com.google.common.collect.ImmutableList;
 
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
+import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -54,7 +57,32 @@
  */
 @RunWith(JUnit4.class)
 public class IsmFormatTest {
+  private static final Coder<String> NON_DETERMINISTIC_CODER = new NonDeterministicCoder();
   @Rule public ExpectedException expectedException = ExpectedException.none();
+  @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  @Test
+  public void testUsingNonDeterministicShardKeyCoder() throws Exception {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("is expected to be deterministic");
+    IsmFormat.validateCoderIsCompatible(IsmRecordCoder.of(
+        1, // number or shard key coders for value records
+        0, // number of shard key coders for metadata records
+        ImmutableList.<Coder<?>>of(NON_DETERMINISTIC_CODER, ByteArrayCoder.of()),
+        ByteArrayCoder.of()));
+  }
+
+  @Test
+  public void testUsingNonDeterministicNonShardKeyCoder() throws Exception {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("is expected to be deterministic");
+    IsmFormat.validateCoderIsCompatible(IsmRecordCoder.of(
+        1, // number or shard key coders for value records
+        0, // number of shard key coders for metadata records
+        ImmutableList.<Coder<?>>of(ByteArrayCoder.of(), NON_DETERMINISTIC_CODER),
+        ByteArrayCoder.of()));
+  }
+
 
   @Test
   public void testKeyPrefixCoder() throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReaderTest.java
index 7ee710ac40583..3ae91641e1a84 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReaderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReaderTest.java
@@ -15,25 +15,44 @@
  */
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.util.WindowedValue.valueInEmptyWindows;
+import static com.google.common.base.Preconditions.checkState;
 import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
 import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecord;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecordCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.MetadataKeyCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmReader.IsmShardKey;
+import com.google.cloud.dataflow.sdk.testing.CoderPropertiesTest.NonDeterministicCoder;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.RandomAccessData;
+import com.google.cloud.dataflow.sdk.util.WeightedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReaderObserver;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink.SinkWriter;
-import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.base.Function;
 import com.google.common.base.Predicate;
 import com.google.common.base.Predicates;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.collect.FluentIterable;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterables;
+import com.google.common.primitives.UnsignedBytes;
 
+import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -42,55 +61,163 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.NavigableMap;
+import java.util.NavigableSet;
 import java.util.NoSuchElementException;
 import java.util.Random;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
 
-/**
- * Tests for {@link IsmReader}.
- */
+/** Tests for {@link IsmReader}. */
 @RunWith(JUnit4.class)
 public class IsmReaderTest {
-  @Rule
-  public TemporaryFolder tmpFolder = new TemporaryFolder();
+  private static final int TEST_BLOCK_SIZE = 1024;
+  private static final IsmRecordCoder<byte[]> CODER =
+      IsmRecordCoder.of(
+          1, // number or shard key coders for value records
+          1, // number of shard key coders for metadata records
+          ImmutableList.<Coder<?>>of(MetadataKeyCoder.of(ByteArrayCoder.of()), ByteArrayCoder.of()),
+          ByteArrayCoder.of());
+
+  private static final Coder<String> NON_DETERMINISTIC_CODER = new NonDeterministicCoder();
+  private static final byte[] EMPTY = new byte[0];
+
+  @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+
+  private Cache<IsmShardKey, WeightedValue<NavigableMap<RandomAccessData,
+                                                        WindowedValue<IsmRecord<byte[]>>>>> cache;
+
+  @Before
+  public void setUp() {
+   cache = CacheBuilder
+       .newBuilder()
+       .weigher(Weighers.fixedWeightKeys(1))
+       .maximumWeight(10_000)
+       .build();
+  }
 
   @Test
   public void testReadEmpty() throws Exception {
-    runTestRead(Collections.<KV<byte[], byte[]>>emptyList(), tmpFolder.newFile());
+    writeElementsToFileAndReadInOrder(Collections.<IsmRecord<byte[]>>emptyList());
+  }
+
+  @Test
+  public void testUsingNonDeterministicShardKeyCoder() throws Exception {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("is expected to be deterministic");
+    new IsmReader<>(
+        tmpFolder.newFile().getPath(),
+        IsmRecordCoder.of(
+            1, // number or shard key coders for value records
+            0, // number of shard key coders for metadata records
+            ImmutableList.<Coder<?>>of(NON_DETERMINISTIC_CODER, ByteArrayCoder.of()),
+            ByteArrayCoder.of()),
+        cache);
+  }
+
+  @Test
+  public void testUsingNonDeterministicNonShardKeyCoder() throws Exception {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("is expected to be deterministic");
+    new IsmReader<>(
+        tmpFolder.newFile().getPath(),
+            IsmRecordCoder.of(
+            1, // number or shard key coders for value records
+            0, // number of shard key coders for metadata records
+            ImmutableList.<Coder<?>>of(ByteArrayCoder.of(), NON_DETERMINISTIC_CODER),
+            ByteArrayCoder.of()),
+        cache);
   }
 
   @Test
   public void testRead() throws Exception {
     Random random = new Random(23498321490L);
-    for (int i : Arrays.asList(4, 8, 12)) {
+    for (int i : Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) {
       int minElements = (int) Math.pow(2, i);
+      int valueSize = 128;
       // Generates between 2^i and 2^(i + 1) elements.
-      runTestRead(dataGenerator(minElements + random.nextInt(minElements),
-          8 /* approximate key size */, 8 /* max value size */), tmpFolder.newFile());
+      writeElementsToFileAndReadInOrder(dataGenerator(8 /* number of primary keys */,
+          minElements + random.nextInt(minElements) /* number of secondary keys */,
+          8 /* max key size */, valueSize));
     }
   }
 
+  @Test
+  public void testReadThatProducesIndexEntries() throws Exception {
+    Random random = new Random(23498323891L);
+    int minElements = (int) Math.pow(2, 6);
+    int valueSize = 128;
+    // Since we are generating more then 2 blocks worth of data, we are guaranteed that
+    // at least one index entry is generated per shard.
+    checkState(minElements * valueSize > 2 * TEST_BLOCK_SIZE);
+    writeElementsToFileAndReadInOrder(dataGenerator(8 /* number of primary keys */,
+        minElements + random.nextInt(minElements) /* number of secondary keys */,
+        8 /* max key size */, valueSize /* max value size */));
+  }
+
   @Test
   public void testReadRandomOrder() throws Exception {
     Random random = new Random(2348238943L);
-    for (int i : Arrays.asList(4, 8, 12)) {
+    for (int i : Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8)) {
       int minElements = (int) Math.pow(2, i);
+      int valueSize = 128;
       // Generates between 2^i and 2^(i + 1) elements.
-      runTestReadRandomOrder(
-          dataGenerator(minElements + random.nextInt(minElements),
-              8 /* approximate key size */, 4096 /* max value size */), tmpFolder.newFile());
+      writeElementsToFileAndReadInRandomOrder(dataGenerator(7 /* number of primary keys */,
+          minElements + random.nextInt(minElements) /* number of secondary keys */,
+          8 /* max key size */, valueSize /* max value size */));
     }
   }
 
+  @Test
+  public void testGetLastWithPrefix() throws Exception {
+    Random random = new Random(2348238943L);
+    for (int i : Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) {
+      int minElements = (int) Math.pow(2, i);
+      int valueSize = 128;
+      // Generates between 2^i and 2^(i + 1) elements.
+      writeElementsToFileAndFindLastElementPerPrimaryKey(
+          dataGenerator(7, minElements + random.nextInt(minElements),
+              8 /* max key size */, valueSize /* max value size */));
+    }
+  }
+
+  @Test
+  public void testReadMissingKeys() throws Exception {
+    File tmpFile = tmpFolder.newFile();
+    List<IsmRecord<byte[]>> data = new ArrayList<>();
+    data.add(IsmRecord.<byte[]>of(ImmutableList.of(EMPTY, new byte[]{ 0x04 }), EMPTY));
+    data.add(IsmRecord.<byte[]>of(ImmutableList.of(EMPTY, new byte[]{ 0x08 }), EMPTY));
+    writeElementsToFile(data, tmpFile);
+
+    IsmReader<byte[]> reader = new IsmReader<byte[]>(tmpFile.getAbsolutePath(), CODER, cache);
+
+    // Check that we got false with a key before all keys contained in the file.
+    assertFalse(reader.overKeyComponents(ImmutableList.of(EMPTY, new byte[]{ 0x02 })).hasNext());
+    // Check that we got false with a key between two other keys contained in the file.
+    assertFalse(reader.overKeyComponents(ImmutableList.of(EMPTY, new byte[]{ 0x06 })).hasNext());
+    // Check that we got false with a key that is after all keys contained in the file.
+    assertFalse(reader.overKeyComponents(ImmutableList.of(EMPTY, new byte[]{ 0x10 })).hasNext());
+  }
+
   @Test
   public void testReadMissingKeysBypassingBloomFilter() throws Exception {
-    List<KV<byte[], byte[]>> data = new ArrayList<>();
-    data.add(KV.of(new byte[]{ 0x04 }, new byte[] { 0x00 }));
-    data.add(KV.of(new byte[]{ 0x08 }, new byte[] { 0x01 }));
-    String path = initInputFile(data, tmpFolder.newFile());
-    IsmReader<byte[], byte[]> reader =
-        new IsmReader<byte[], byte[]>(path, ByteArrayCoder.of(), ByteArrayCoder.of()) {
+    File tmpFile = tmpFolder.newFile();
+    List<IsmRecord<byte[]>> data = new ArrayList<>();
+    data.add(IsmRecord.<byte[]>of(ImmutableList.of(EMPTY, new byte[]{ 0x04 }), EMPTY));
+    data.add(IsmRecord.<byte[]>of(ImmutableList.of(EMPTY, new byte[]{ 0x08 }), EMPTY));
+    writeElementsToFile(data, tmpFile);
+
+    IsmReader<byte[]> reader =
+        new IsmReader<byte[]>(tmpFile.getAbsolutePath(), CODER, cache) {
       // We use this override to get around the Bloom filter saying that the key doesn't exist.
       @Override
       boolean bloomFilterMightContain(RandomAccessData keyBytes) {
@@ -98,58 +225,94 @@ boolean bloomFilterMightContain(RandomAccessData keyBytes) {
       }
     };
 
-    // Check that we got null with a key before all keys contained in the file.
-    assertNull(reader.get(new byte[]{ 0x02 }));
-    // Check that we got null with a key between two other keys contained in the file.
-    assertNull(reader.get(new byte[]{ 0x06 }));
-    // Check that we got null with a key that is after all keys contained in the file.
-    assertNull(reader.get(new byte[]{ 0x10 }));
+    // Check that we got false with a key before all keys contained in the file.
+    assertFalse(reader.overKeyComponents(ImmutableList.of(EMPTY, new byte[]{ 0x02 })).hasNext());
+    // Check that we got false with a key between two other keys contained in the file.
+    assertFalse(reader.overKeyComponents(ImmutableList.of(EMPTY, new byte[]{ 0x06 })).hasNext());
+    // Check that we got false with a key that is after all keys contained in the file.
+    assertFalse(reader.overKeyComponents(ImmutableList.of(EMPTY, new byte[]{ 0x10 })).hasNext());
   }
 
-  /** Write input elements to a file and return the file name. */
-  static String initInputFile(Iterable<KV<byte[], byte[]>> elements, File tmpFile)
-      throws Exception {
-    Sink<WindowedValue<KV<byte[], byte[]>>> sink =
-        new IsmSink<byte[], byte[]>(tmpFile.getPath(), ByteArrayCoder.of(), ByteArrayCoder.of());
+  @Test
+  public void testReadKeyThatEncodesToEmptyByteArray() throws Exception {
+    File tmpFile = tmpFolder.newFile();
+    IsmRecordCoder<Void> coder = IsmRecordCoder.of(
+        1, 0, ImmutableList.<Coder<?>>of(VoidCoder.of()), VoidCoder.of());
+    IsmSink<Void> sink = new IsmSink<>(
+        tmpFile.getPath(), coder);
+    IsmRecord<Void> element = IsmRecord.of(Arrays.asList((Void) null), (Void) null);
+    try (SinkWriter<WindowedValue<IsmRecord<Void>>> writer =
+        sink.writer()) {
+      writer.add(valueInEmptyWindows(element));
+    }
+
+    Cache<IsmShardKey, WeightedValue<NavigableMap<RandomAccessData,
+                                                  WindowedValue<IsmRecord<Void>>>>> cache =
+                                                  CacheBuilder
+                                                      .newBuilder()
+                                                      .weigher(Weighers.fixedWeightKeys(1))
+                                                      .maximumWeight(10_000)
+                                                      .build();
+    IsmReader<Void> reader = new IsmReader<>(tmpFile.getAbsolutePath(), coder, cache);
+    assertEquals(coder.structuralValue(element),
+        coder.structuralValue(reader.iterator().next().getValue()));
+  }
 
-    try (SinkWriter<WindowedValue<KV<byte[], byte[]>>> writer = sink.writer()) {
-      for (KV<byte[], byte[]> element : elements) {
-        writer.add(WindowedValue.valueInGlobalWindow(element));
+  /** Write input elements to the specified file. */
+  static void writeElementsToFile(
+      Iterable<IsmRecord<byte[]>> elements, File tmpFile) throws Exception {
+    IsmSink<byte[]> sink =
+        new IsmSink<byte[]>(tmpFile.getPath(), CODER) {
+          @Override
+          long getBlockSize() {
+            return TEST_BLOCK_SIZE;
+          }
+        };
+
+    try (SinkWriter<WindowedValue<IsmRecord<byte[]>>> writer =
+        sink.writer()) {
+      for (IsmRecord<byte[]> element : elements) {
+        writer.add(valueInEmptyWindows(element));
       }
     }
-    return tmpFile.getPath();
   }
 
   /**
-   * Reads from a file generated from a collection of elements and verifies that the elements read
-   * are the same as the elements written.
+   * Writes elements to an Ism file using an IsmSink. Then reads them back with an IsmReader,
+   * verifying the values read match those that were written.
    */
-  static void runTestRead(Iterable<KV<byte[], byte[]>> expectedData, File tmpFile)
+  private void writeElementsToFileAndReadInOrder(Iterable<IsmRecord<byte[]>> elements)
       throws Exception {
-    String filename = initInputFile(expectedData, tmpFile);
-    IsmReader<byte[], byte[]> reader =
-        new IsmReader<>(filename, ByteArrayCoder.of(), ByteArrayCoder.of());
+    File tmpFile = tmpFolder.newFile();
+    writeElementsToFile(elements, tmpFile);
+    IsmReader<byte[]> reader = new IsmReader<>(tmpFile.getAbsolutePath(), CODER, cache);
     TestReaderObserver observer = new TestReaderObserver(reader);
     reader.addObserver(observer);
 
-    Iterator<KV<byte[], byte[]>> expectedIterator = expectedData.iterator();
-    try (NativeReader.NativeReaderIterator<KV<byte[], byte[]>> iterator = reader.iterator()) {
-      for (boolean more = iterator.start();
-          more && expectedIterator.hasNext();
-          more = iterator.advance()) {
-        KV<byte[], byte[]> actual = iterator.getCurrent();
-        KV<byte[], byte[]> expectedNext = expectedIterator.next();
-        assertArrayEquals(actual.getKey(), expectedNext.getKey());
-        assertArrayEquals(actual.getValue(), expectedNext.getValue());
+    Iterator<IsmRecord<byte[]>> elementsIterator = elements.iterator();
+    try (NativeReader.LegacyReaderIterator<WindowedValue<IsmRecord<byte[]>>> iterator =
+        reader.iterator()) {
+
+      while (iterator.hasNext() && elementsIterator.hasNext()) {
+        IsmRecord<byte[]> expected = elementsIterator.next();
+        IsmRecord<byte[]> actual = iterator.next().getValue();
+        assertIsmEquals(actual, expected);
 
+        final int expectedLength;
+        if (IsmFormat.isMetadataKey(expected.getKeyComponents())) {
+          expectedLength = expected.getMetadata().length;
+        } else {
+          expectedLength = expected.getValue().length;
+        }
         // Verify that the observer saw at least as many bytes as the size of the value.
-        assertTrue(actual.getValue().length
+        assertTrue(expectedLength
             <= observer.getActualSizes().get(observer.getActualSizes().size() - 1));
+
       }
       if (iterator.advance()) {
         fail("Read more elements then expected, did not expect: " + iterator.getCurrent());
-      } else if (expectedIterator.hasNext()) {
-        fail("Read less elements then expected, expected: " + expectedIterator.next());
+      } else if (elementsIterator.hasNext()) {
+        fail("Read less elements then expected, expected: " + elementsIterator.next());
       }
 
       // Verify that we see a {@link NoSuchElementException} if we attempt to go further.
@@ -161,108 +324,260 @@ static void runTestRead(Iterable<KV<byte[], byte[]>> expectedData, File tmpFile)
     }
   }
 
-  static class EvenFilter implements Predicate<KV<byte[], byte[]>> {
+  private static void assertIsmEquals(
+      IsmRecord<byte[]> actual,
+      IsmRecord<byte[]> expected) {
+    assertEquals(expected.getKeyComponents().size(), actual.getKeyComponents().size());
+    for (int i = 0; i < expected.getKeyComponents().size(); ++i) {
+      if (actual.getKeyComponent(i) != expected.getKeyComponent(i)) {
+        assertArrayEquals((byte[]) actual.getKeyComponent(i), (byte[]) expected.getKeyComponent(i));
+      }
+    }
+
+    if (IsmFormat.isMetadataKey(expected.getKeyComponents())) {
+      assertArrayEquals(actual.getMetadata(), expected.getMetadata());
+    } else {
+      assertArrayEquals(actual.getValue(), expected.getValue());
+    }
+  }
+
+  /**
+   * A predicate which filters elements on whether the second key's last byte is odd or even.
+   * Allows for a stable partitioning of generated data.
+   */
+  private static class EvenFilter implements Predicate<IsmRecord<byte[]>> {
     private static final EvenFilter INSTANCE = new EvenFilter();
 
     @Override
-    public boolean apply(KV<byte[], byte[]> input) {
-      return input.getValue()[input.getValue().length - 1] % 2 == 0;
+    public boolean apply(IsmRecord<byte[]> input) {
+      byte[] secondKey = (byte[]) input.getKeyComponent(1);
+      return secondKey[secondKey.length - 1] % 2 == 0;
     }
   }
 
-  static void runTestReadRandomOrder(Iterable<KV<byte[], byte[]>> elements, File tmpFile)
+  /**
+   * Writes elements to an Ism file using an IsmSink. Then reads them back with an IsmReader
+   * using a random order.
+   */
+  private void writeElementsToFileAndReadInRandomOrder(Iterable<IsmRecord<byte[]>> elements)
       throws Exception {
-    Iterable<KV<byte[], byte[]>> oddValues =
+    File tmpFile = tmpFolder.newFile();
+    List<IsmRecord<byte[]>> oddSecondaryKeys = new ArrayList<>(
+        ImmutableList.copyOf(Iterables.filter(elements, Predicates.not(EvenFilter.INSTANCE))));
+    List<IsmRecord<byte[]>> evenSecondaryKeys = new ArrayList<>(
+        ImmutableList.copyOf(Iterables.filter(elements, EvenFilter.INSTANCE)));
+
+    writeElementsToFile(oddSecondaryKeys, tmpFile);
+    IsmReader<byte[]> reader = new IsmReader<>(tmpFile.getAbsolutePath(), CODER, cache);
+
+    // Test using next() for a within shard Ism prefix reader iterator
+    Collections.shuffle(oddSecondaryKeys);
+    for (IsmRecord<byte[]> expectedNext : oddSecondaryKeys) {
+      assertIsmEquals(reader.overKeyComponents(
+          expectedNext.getKeyComponents()).next().getValue(), expectedNext);
+    }
+
+    Collections.shuffle(oddSecondaryKeys);
+    // Test using get() for a shard aware Ism prefix reader
+    IsmReader<byte[]>.IsmPrefixReaderIterator readerIterator =
+        reader.overKeyComponents(ImmutableList.of());
+    for (IsmRecord<byte[]> expectedNext : oddSecondaryKeys) {
+      assertIsmEquals(readerIterator.get(expectedNext.getKeyComponents()).getValue(), expectedNext);
+    }
+
+    // Test using next() for a within shard Ism prefix reader iterator
+    Collections.shuffle(evenSecondaryKeys);
+    for (IsmRecord<byte[]> missingNext : evenSecondaryKeys) {
+      assertFalse(reader.overKeyComponents(missingNext.getKeyComponents()).hasNext());
+    }
+
+    Collections.shuffle(evenSecondaryKeys);
+    // Test using get() for a shard aware Ism prefix reader
+    readerIterator = reader.overKeyComponents(ImmutableList.of());
+    for (IsmRecord<byte[]> missingNext : evenSecondaryKeys) {
+      assertNull(readerIterator.get(missingNext.getKeyComponents()));
+    }
+  }
+
+  private void writeElementsToFileAndFindLastElementPerPrimaryKey(
+      Iterable<IsmRecord<byte[]>> elements) throws Exception {
+    File tmpFile = tmpFolder.newFile();
+    Iterable<IsmRecord<byte[]>> oddValues =
         Iterables.filter(elements, Predicates.not(EvenFilter.INSTANCE));
-    Iterable<KV<byte[], byte[]>> evenValues =
+    Iterable<IsmRecord<byte[]>> evenValues =
         Iterables.filter(elements, EvenFilter.INSTANCE);
+    writeElementsToFile(oddValues, tmpFile);
+    IsmReader<byte[]> reader = new IsmReader<>(tmpFile.getAbsolutePath(), CODER, cache);
 
-    String filename = initInputFile(oddValues, tmpFile);
-    IsmReader<byte[], byte[]> reader =
-        new IsmReader<>(filename, ByteArrayCoder.of(), ByteArrayCoder.of());
-    TestReaderObserver observer = new TestReaderObserver(reader);
-    reader.addObserver(observer);
+    SortedMap<byte[], NavigableSet<IsmRecord<byte[]>>> sortedBySecondKey =
+        new TreeMap<>(UnsignedBytes.lexicographicalComparator());
+    for (IsmRecord<byte[]> element : oddValues) {
+      byte[] encodedPrimaryKey =
+          CoderUtils.encodeToByteArray(CODER.getKeyComponentCoder(0), element.getKeyComponent(0));
+      if (!sortedBySecondKey.containsKey(encodedPrimaryKey)) {
+        sortedBySecondKey.put(
+            encodedPrimaryKey, new TreeSet<>(new IsmRecordKeyComparator<>(CODER)));
+      }
+      sortedBySecondKey.get(encodedPrimaryKey).add(element);
+    }
 
-    Iterator<KV<byte[], byte[]>> expectedIterator = oddValues.iterator();
-    while (expectedIterator.hasNext()) {
-      KV<byte[], byte[]> expectedNext = expectedIterator.next();
-      KV<byte[], byte[]> actual = reader.get(expectedNext.getKey());
-      assertArrayEquals(actual.getValue(), expectedNext.getValue());
+    // The returned value should have the element as a prefix of itself.
+    for (IsmRecord<byte[]> element : oddValues) {
+      byte[] encodedPrimaryKey =
+          CoderUtils.encodeToByteArray(CODER.getKeyComponentCoder(0), element.getKeyComponent(0));
+      assertIsmEquals(
+          reader.overKeyComponents(
+              ImmutableList.of(element.getKeyComponent(0))).getLast().getValue(),
+          sortedBySecondKey.get(encodedPrimaryKey).last());
+    }
+
+    // The returned value should always have the element as a prefix of itself or not exist.
+    for (IsmRecord<byte[]> element : evenValues) {
+      byte[] encodedPrimaryKey =
+          CoderUtils.encodeToByteArray(CODER.getKeyComponentCoder(0), element.getKeyComponent(0));
+      IsmReader<byte[]>.IsmPrefixReaderIterator readerIterator =
+          reader.overKeyComponents(ImmutableList.of(element.getKeyComponent(0)));
+      WindowedValue<IsmRecord<byte[]>> lastWindowedValue = readerIterator.getLast();
+      if (lastWindowedValue != null) {
+        assertIsmEquals(
+            lastWindowedValue.getValue(),
+            sortedBySecondKey.get(encodedPrimaryKey).last());
+      }
+    }
+  }
 
-      // Verify that the observer saw at least as many bytes as the size of the value.
-      List<Integer> actualSizes = observer.getActualSizes();
-      assertTrue(actual.getValue().length <= actualSizes.get(actualSizes.size() - 1));
+  static class IsmRecordKeyComparator<V> implements Comparator<IsmRecord<V>> {
+    private final IsmRecordCoder<V> coder;
+    IsmRecordKeyComparator(IsmRecordCoder<V> coder) {
+      this.coder = coder;
     }
 
-    Iterator<KV<byte[], byte[]>> missingIterator = evenValues.iterator();
-    while (missingIterator.hasNext()) {
-      KV<byte[], byte[]> missingNext = missingIterator.next();
-      assertNull(reader.get(missingNext.getKey()));
+    @Override
+    public int compare(IsmRecord<V> first, IsmRecord<V> second) {
+      RandomAccessData firstKeyBytes = new RandomAccessData();
+      coder.encodeAndHash(first.getKeyComponents(), firstKeyBytes);
+      RandomAccessData secondKeyBytes = new RandomAccessData();
+      coder.encodeAndHash(second.getKeyComponents(), secondKeyBytes);
+      return RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(
+          firstKeyBytes, secondKeyBytes);
     }
   }
 
-  // Creates an iterable with key bytes in ascending order.
-  static Iterable<KV<byte[], byte[]>> dataGenerator(
-      final int numberOfKeys, final int approximateKeySize, final int maxValueSize) {
-    final int minimumNumberOfKeyBytes = 4;
-    return new Iterable<KV<byte[], byte[]>>() {
-      @Override
-      public Iterator<KV<byte[], byte[]>> iterator() {
-        final Random random = new Random(numberOfKeys);
-        return new Iterator<KV<byte[], byte[]>>() {
-          int current;
-          byte[] previousKey = new byte[random.nextInt(approximateKeySize)
-                                        + minimumNumberOfKeyBytes];
-          {
-            // Generate a random key with enough space at the front for 2^32 values.
-            random.nextBytes(previousKey);
-            previousKey[0] = 0;
-            previousKey[1] = 0;
-            previousKey[2] = 0;
-            previousKey[3] = 0;
-          }
+  /**
+   * Specifies the minimum key size so that we can produce a random byte array
+   * with enough of a prefix to be able to create successively larger secondary keys.
+   */
+  private static final int MIN_KEY_SIZE = 4;
 
-          @Override
-          public boolean hasNext() {
-            return current < numberOfKeys;
-          }
+  /** Specifies the percentage of keys that are metadata records when using the data generator. */
+  private static final double PERCENT_METADATA_RECORDS = 0.01;
 
-          @Override
-          public KV<byte[], byte[]> next() {
-            current += 1;
-            byte[] currentKey = new byte[random.nextInt(approximateKeySize)
-                                         + minimumNumberOfKeyBytes];
-            int matchingPrefix = Math.min(currentKey.length,
-                random.nextInt(approximateKeySize) + minimumNumberOfKeyBytes);
-            byte[] randomSuffix = new byte[currentKey.length - matchingPrefix];
-            random.nextBytes(randomSuffix);
-
-            System.arraycopy(previousKey, 0,
-                currentKey, 0, Math.min(currentKey.length, previousKey.length));
-            System.arraycopy(randomSuffix, 0, currentKey, matchingPrefix, randomSuffix.length);
-
-            matchingPrefix -= 1;
-            // Find the first byte which is less than 255 at the end of the matching portion.
-            while ((currentKey[matchingPrefix] & 0xFF) == 0xFF) {
-              currentKey[matchingPrefix] = 0;
-              matchingPrefix -= 1;
-            }
-            // Increment the last byte of the matching prefix to make sure this key is
-            // larger than the previous key.
-            currentKey[matchingPrefix] = (byte) ((currentKey[matchingPrefix] & 0xFF) + 1);
-
-            byte[] value = new byte[random.nextInt(maxValueSize) + 1];
-            random.nextBytes(value);
-            previousKey = currentKey;
-            return KV.of(currentKey, value);
-          }
+  /**
+   * Creates a map from Ism shard to a sorted set of IsmRecords.
+   */
+  private Map<Integer, SortedSet<IsmRecord<byte[]>>> dataGeneratorPerShard(
+      final int numberOfPrimaryKeys,
+      final int minNumberOfSecondaryKeys,
+      final int maxKeySize,
+      final int maxValueSize) {
+    checkState(maxKeySize >= MIN_KEY_SIZE);
 
-          @Override
-          public void remove() {
-            throw new UnsupportedOperationException();
+    final Random random = new Random(minNumberOfSecondaryKeys);
+
+    Map<Integer, SortedSet<IsmRecord<byte[]>>> shardToRecordMap = new HashMap<>();
+    while (shardToRecordMap.keySet().size() < numberOfPrimaryKeys) {
+      // Generate the next primary key
+      byte[] primaryKey = new byte[random.nextInt(maxKeySize - MIN_KEY_SIZE) + MIN_KEY_SIZE];
+      random.nextBytes(primaryKey);
+      int shardId = CODER.hash(ImmutableList.of(primaryKey));
+      // Add a sorted set for the shard id if this shard id has never been generated before.
+      if (!shardToRecordMap.containsKey(shardId)) {
+        shardToRecordMap.put(shardId,
+            new TreeSet<IsmRecord<byte[]>>(new IsmRecordKeyComparator<byte[]>(CODER)));
+      }
+
+      // Generate the requested number of secondary keys using the newly generated primary key.
+      byte[] secondaryKey = new byte[maxKeySize];
+      for (int j = 0; j < minNumberOfSecondaryKeys; ++j) {
+        secondaryKey = generateNextSecondaryKey(random, maxKeySize, secondaryKey);
+
+        // Generate the value bytes.
+        byte[] value = new byte[random.nextInt(maxValueSize)];
+        random.nextBytes(value);
+
+        // 1% of keys are metadata records
+        if (random.nextFloat() < PERCENT_METADATA_RECORDS) {
+          IsmRecord<byte[]> ismRecord = IsmRecord.meta(
+              ImmutableList.of(IsmFormat.getMetadataKey(), secondaryKey), value);
+          int metadataShardId = CODER.hash(ismRecord.getKeyComponents());
+          // Add a sorted set for the shard id if this shard id has never been generated before.
+          if (!shardToRecordMap.containsKey(metadataShardId)) {
+            shardToRecordMap.put(metadataShardId,
+                new TreeSet<IsmRecord<byte[]>>(
+                    new IsmRecordKeyComparator<byte[]>(CODER)));
           }
-        };
+          shardToRecordMap.get(metadataShardId).add(ismRecord);
+        } else {
+          IsmRecord<byte[]> ismRecord = IsmRecord.<byte[]>of(
+              ImmutableList.of(primaryKey, secondaryKey),
+              value);
+          shardToRecordMap.get(shardId).add(ismRecord);
+        }
       }
-    };
+    }
+    return shardToRecordMap;
+  }
+
+  private byte[] generateNextSecondaryKey(
+      Random random, int maxKeySize, byte[] previousSecondaryKey) {
+    byte[] currentSecondaryKey =
+        new byte[random.nextInt(maxKeySize - MIN_KEY_SIZE) + MIN_KEY_SIZE];
+    int matchingPrefix = Math.min(currentSecondaryKey.length,
+        random.nextInt(maxKeySize - MIN_KEY_SIZE) + MIN_KEY_SIZE);
+    byte[] randomSuffix = new byte[currentSecondaryKey.length - matchingPrefix];
+    random.nextBytes(randomSuffix);
+
+    System.arraycopy(previousSecondaryKey, 0,
+        currentSecondaryKey, 0,
+        Math.min(currentSecondaryKey.length, previousSecondaryKey.length));
+    System.arraycopy(randomSuffix, 0,
+        currentSecondaryKey, matchingPrefix, randomSuffix.length);
+
+    matchingPrefix -= 1;
+    // Find the first byte which is less than 255 at the end of the matching portion.
+    while ((currentSecondaryKey[matchingPrefix] & 0xFF) == 0xFF) {
+      currentSecondaryKey[matchingPrefix] = 0;
+      matchingPrefix -= 1;
+    }
+    // Increment the last byte of the matching prefix to make sure this key is
+    // larger than the previous key.
+    currentSecondaryKey[matchingPrefix] =
+        (byte) ((currentSecondaryKey[matchingPrefix] & 0xFF) + 1);
+    return currentSecondaryKey;
+  }
+
+  /**
+   * Creates an iterable of IsmRecords grouped by shard id, and in ascending order per shard.
+   */
+  private Iterable<IsmRecord<byte[]>> dataGenerator(
+      final int numberOfPrimaryKeys,
+      final int numberOfSecondaryKeys,
+      final int approximateKeySize,
+      final int maxValueSize) {
+
+    FluentIterable<IsmRecord<byte[]>> records = FluentIterable
+        .from(dataGeneratorPerShard(
+            numberOfPrimaryKeys, numberOfSecondaryKeys,
+            approximateKeySize, maxValueSize).entrySet())
+        .transformAndConcat(
+            new Function<Entry<Integer, SortedSet<IsmRecord<byte[]>>>,
+                         Iterable<IsmRecord<byte[]>>>() {
+            @Override
+            public Iterable<IsmRecord<byte[]>> apply(
+                Entry<Integer, SortedSet<IsmRecord<byte[]>>> input) {
+              return input.getValue();
+            }
+    });
+    return records;
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkTest.java
index 621305912b7cf..d8e3502a74493 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkTest.java
@@ -15,10 +15,17 @@
  */
 package com.google.cloud.dataflow.sdk.runners.worker;
 
+import static com.google.cloud.dataflow.sdk.util.WindowedValue.valueInEmptyWindows;
+
 import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecord;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecordCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.MetadataKeyCoder;
+import com.google.cloud.dataflow.sdk.testing.CoderPropertiesTest.NonDeterministicCoder;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink.SinkWriter;
-import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.collect.ImmutableList;
 
 import org.junit.Rule;
 import org.junit.Test;
@@ -27,64 +34,104 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-import java.util.Arrays;
-import java.util.Random;
-
 /**
  * Tests for {@link IsmSink}.
- * Note that {@link IsmReaderTest} covers reading/writing tests. This tests
+ *
+ * <p>Note that {@link IsmReaderTest} covers reading/writing tests. This tests
  * error cases for the {@link IsmSink}.
  */
 @RunWith(JUnit4.class)
 public class IsmSinkTest {
+  private static final IsmRecordCoder<byte[]> CODER =
+      IsmRecordCoder.of(
+          1, // number or shard key coders for value records
+          1, // number of shard key coders for metadata records
+          ImmutableList.<Coder<?>>of(MetadataKeyCoder.of(ByteArrayCoder.of()), ByteArrayCoder.of()),
+          ByteArrayCoder.of());
+  private static final byte[] EMPTY = new byte[0];
+  private static final Coder<String> NON_DETERMINISTIC_CODER = new NonDeterministicCoder();
+
+
   @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
   @Rule public ExpectedException expectedException = ExpectedException.none();
 
   @Test
-  public void testWriteOutOfOrderKeysIsError() throws Exception {
-    IsmSink<byte[], byte[]> sink =
-        new IsmSink<>(tmpFolder.newFile().getPath(), ByteArrayCoder.of(), ByteArrayCoder.of());
-    SinkWriter<WindowedValue<KV<byte[], byte[]>>> sinkWriter = sink.writer();
-    sinkWriter.add(WindowedValue.valueInGlobalWindow(KV.of(new byte[]{ 0x01 }, new byte[0])));
+  public void testWriteOutOfOrderKeysWithSameShardKeyIsError() throws Throwable {
+    IsmSink<byte[]> sink =
+        new IsmSink<>(tmpFolder.newFile().getPath(), CODER);
+    SinkWriter<WindowedValue<IsmRecord<byte[]>>> sinkWriter = sink.writer();
+    sinkWriter.add(valueInEmptyWindows(
+        IsmRecord.of(ImmutableList.of(EMPTY, new byte[]{ 0x01 }), EMPTY)));
 
     expectedException.expect(IllegalArgumentException.class);
     expectedException.expectMessage("expects keys to be written in strictly increasing order");
-    sinkWriter.add(WindowedValue.valueInGlobalWindow(KV.of(new byte[]{ 0x00 }, new byte[0])));
+    sinkWriter.add(valueInEmptyWindows(
+        IsmRecord.of(ImmutableList.of(EMPTY, new byte[]{ 0x00 }), EMPTY)));
+  }
+
+  @Test
+  public void testWriteNonContiguousShardsIsError() throws Throwable {
+    IsmSink<byte[]> sink =
+        new IsmSink<>(tmpFolder.newFile().getPath(), CODER);
+    SinkWriter<WindowedValue<IsmRecord<byte[]>>> sinkWriter = sink.writer();
+    sinkWriter.add(valueInEmptyWindows(
+        IsmRecord.of(ImmutableList.of(new byte[]{ 0x00 }, EMPTY), EMPTY)));
+    sinkWriter.add(valueInEmptyWindows(
+        IsmRecord.of(ImmutableList.of(new byte[]{ 0x01 }, EMPTY), EMPTY)));
+
+    expectedException.expect(IllegalStateException.class);
+    expectedException.expectMessage("for shard which already exists");
+    sinkWriter.add(valueInEmptyWindows(
+        IsmRecord.of(ImmutableList.of(new byte[]{ 0x00 }, EMPTY), EMPTY)));
   }
 
   @Test
-  public void testWriteEqualsKeysIsError() throws Exception {
-    IsmSink<byte[], byte[]> sink =
-        new IsmSink<>(tmpFolder.newFile().getPath(), ByteArrayCoder.of(), ByteArrayCoder.of());
-    SinkWriter<WindowedValue<KV<byte[], byte[]>>> sinkWriter = sink.writer();
-    sinkWriter.add(WindowedValue.valueInGlobalWindow(KV.of(new byte[]{ 0x00 }, new byte[0])));
+  public void testWriteEqualKeysIsError() throws Throwable {
+    IsmSink<byte[]> sink =
+        new IsmSink<>(tmpFolder.newFile().getPath(), CODER);
+    SinkWriter<WindowedValue<IsmRecord<byte[]>>> sinkWriter = sink.writer();
+    sinkWriter.add(valueInEmptyWindows(
+        IsmRecord.of(ImmutableList.of(EMPTY, new byte[]{ 0x01 }), EMPTY)));
 
     expectedException.expect(IllegalArgumentException.class);
     expectedException.expectMessage("expects keys to be written in strictly increasing order");
-    sinkWriter.add(WindowedValue.valueInGlobalWindow(KV.of(new byte[]{ 0x00 }, new byte[0])));
+    sinkWriter.add(valueInEmptyWindows(
+        IsmRecord.of(ImmutableList.of(EMPTY, new byte[]{ 0x01 }), EMPTY)));
   }
 
   @Test
-  public void testWriteKeyWhichIsProperPrefixOfPreviousKeyIsError() throws Exception {
-    IsmSink<byte[], byte[]> sink =
-        new IsmSink<>(tmpFolder.newFile().getPath(), ByteArrayCoder.of(), ByteArrayCoder.of());
-    SinkWriter<WindowedValue<KV<byte[], byte[]>>> sinkWriter = sink.writer();
-    sinkWriter.add(WindowedValue.valueInGlobalWindow(KV.of(new byte[]{ 0x00, 0x00 }, new byte[0])));
+  public void testWriteKeyWhichIsProperPrefixOfPreviousSecondaryKeyIsError() throws Throwable {
+    IsmSink<byte[]> sink =
+        new IsmSink<>(tmpFolder.newFile().getPath(), CODER);
+    SinkWriter<WindowedValue<IsmRecord<byte[]>>> sinkWriter = sink.writer();
+    sinkWriter.add(valueInEmptyWindows(
+        IsmRecord.of(ImmutableList.of(EMPTY, new byte[]{ 0x00, 0x00 }), EMPTY)));
 
     expectedException.expect(IllegalArgumentException.class);
     expectedException.expectMessage("expects keys to be written in strictly increasing order");
-    sinkWriter.add(WindowedValue.valueInGlobalWindow(KV.of(new byte[]{ 0x00 }, new byte[0])));
+    sinkWriter.add(valueInEmptyWindows(
+        IsmRecord.of(ImmutableList.of(EMPTY, new byte[]{ 0x00 }), EMPTY)));
   }
 
   @Test
-  public void testWrite() throws Exception {
-    Random random = new Random(23498321490L);
-    for (int i : Arrays.asList(4, 8, 12)) {
-      int minElements = (int) Math.pow(2, i);
-      // Generates between 2^i and 2^(i + 1) elements.
-      IsmReaderTest.runTestRead(
-          IsmReaderTest.dataGenerator(minElements + random.nextInt(minElements),
-              8 /* approximate key size */, 8 /* max value size */), tmpFolder.newFile());
-    }
+  public void testUsingNonDeterministicShardKeyCoder() throws Exception {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("is expected to be deterministic");
+    new IsmSink<>(tmpFolder.newFile().getPath(), IsmRecordCoder.of(
+        1,
+        0,
+        ImmutableList.<Coder<?>>of(NON_DETERMINISTIC_CODER, ByteArrayCoder.of()),
+        ByteArrayCoder.of()));
+  }
+
+  @Test
+  public void testUsingNonDeterministicNonShardKeyCoder() throws Exception {
+    expectedException.expect(IllegalArgumentException.class);
+    expectedException.expectMessage("is expected to be deterministic");
+    new IsmSink<>(tmpFolder.newFile().getPath(), IsmRecordCoder.of(
+        1,
+        0,
+        ImmutableList.<Coder<?>>of(ByteArrayCoder.of(), NON_DETERMINISTIC_CODER),
+        ByteArrayCoder.of()));
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/CoderPropertiesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/CoderPropertiesTest.java
index f0fe688459585..4564d95fc3554 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/CoderPropertiesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/CoderPropertiesTest.java
@@ -47,7 +47,7 @@ public void testGoodCoderIsDeterministic() throws Exception {
   }
 
   /** A coder that says it is not deterministic but actually is. */
-  private static class NonDeterministicCoder extends CustomCoder<String> {
+  public static class NonDeterministicCoder extends CustomCoder<String> {
     @Override
     public void encode(String value, OutputStream outStream, Context context)
         throws CoderException, IOException {

From bee2bb381cebf2da24c30741152559d8158adf10 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 4 Feb 2016 12:31:04 -0800
Subject: [PATCH 1388/1541] Always explicitly pass ReaderRegistry to
 MapTaskExecutorFactory

This breaks the dependency between MapTaskExecutorFactory and
the default registry (and thence to all implementations of ReaderFactory).
While the convenience method removed is reasonable, it is used
so infrequently it does not seem important, and the dependency removal
is a bigger win.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113877516
---
 .../worker/MapTaskExecutorFactory.java        | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index 420be59de7699..f95d7f7807ebd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -80,25 +80,6 @@
  */
 public class MapTaskExecutorFactory {
 
-  /**
-   * Creates a new MapTaskExecutor from the given MapTask definition using the default
-   * {@link ReaderRegistry}.
-   */
-  public static MapTaskExecutor create(
-      PipelineOptions options,
-      MapTask mapTask,
-      DataflowExecutionContext<?> context,
-      CounterSet counters,
-      StateSampler stateSampler) throws Exception {
-    return create(
-        options,
-        mapTask,
-        ReaderRegistry.defaultRegistry(),
-        context,
-        counters,
-        stateSampler);
-  }
-
   /**
    * Creates a new MapTaskExecutor from the given MapTask definition using the provided
    * {@link ReaderRegistry}.

From 97b9ed31a2383f51a63ef04d68254b58bfefd43a Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 4 Feb 2016 12:32:38 -0800
Subject: [PATCH 1389/1541] Move worker tests to worker maven module

This is for ASF Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113877653
---
 .../runners/worker/AvroByteReaderTest.java    | 306 ------
 .../sdk/runners/worker/AvroByteSinkTest.java  |  88 --
 .../runners/worker/AvroReaderFactoryTest.java | 120 ---
 .../sdk/runners/worker/AvroReaderTest.java    | 343 -------
 .../runners/worker/AvroSinkFactoryTest.java   |  86 --
 .../sdk/runners/worker/AvroSinkTest.java      |  88 --
 .../worker/BigQueryReaderFactoryTest.java     | 106 --
 .../runners/worker/BigQueryReaderTest.java    | 910 -----------------
 .../worker/CachingSideInputReaderTest.java    | 199 ----
 .../runners/worker/CombineValuesFnTest.java   | 350 -------
 .../worker/ConcatReaderFactoryTest.java       | 124 ---
 .../sdk/runners/worker/ConcatReaderTest.java  | 471 ---------
 .../CopyableSeekableByteChannelTest.java      | 153 ---
 .../worker/DataflowSideInputReaderTest.java   | 229 -----
 .../runners/worker/DatastoreReaderTest.java   | 200 ----
 .../worker/DefaultParDoFnFactoryTest.java     | 172 ----
 .../runners/worker/FakeWindmillServer.java    | 152 ---
 .../GroupingShuffleRangeTrackerTest.java      | 177 ----
 .../worker/GroupingShuffleReaderTest.java     | 709 -------------
 .../worker/InMemoryReaderFactoryTest.java     |  99 --
 .../runners/worker/InMemoryReaderTest.java    | 291 ------
 .../sdk/runners/worker/IsmFormatTest.java     | 328 ------
 .../sdk/runners/worker/IsmReaderTest.java     | 583 -----------
 .../sdk/runners/worker/IsmSinkTest.java       | 137 ---
 .../worker/KeyTokenInvalidExceptionTest.java  |  37 -
 .../runners/worker/KeyedWorkItemsTest.java    | 155 ---
 .../worker/MapTaskExecutorFactoryTest.java    | 636 ------------
 .../sdk/runners/worker/NormalParDoFnTest.java | 371 -------
 .../sdk/runners/worker/OrderedCodeTest.java   | 504 ---------
 .../worker/PartitioningShuffleReaderTest.java | 138 ---
 .../sdk/runners/worker/ReaderFactoryTest.java | 184 ----
 .../sdk/runners/worker/ReaderTestUtils.java   | 149 ---
 .../worker/ShuffleReaderFactoryTest.java      | 202 ----
 .../worker/ShuffleSinkFactoryTest.java        | 195 ----
 .../sdk/runners/worker/ShuffleSinkTest.java   | 251 -----
 .../runners/worker/SideInputUtilsTest.java    | 125 ---
 .../sdk/runners/worker/SinkFactoryTest.java   | 138 ---
 .../sdk/runners/worker/StateFetcherTest.java  | 227 -----
 .../StreamingGroupAlsoByWindowsDoFnTest.java  | 576 -----------
 ...ngGroupAlsoByWindowsReshuffleDoFnTest.java | 219 ----
 .../StreamingModeExecutionContextTest.java    | 189 ----
 .../StreamingSideInputDoFnRunnerTest.java     | 321 ------
 .../sdk/runners/worker/TestShuffleReader.java | 204 ----
 .../runners/worker/TestShuffleReaderTest.java | 143 ---
 .../sdk/runners/worker/TestShuffleWriter.java |  68 --
 .../runners/worker/TextReaderFactoryTest.java |  98 --
 .../sdk/runners/worker/TextReaderTest.java    | 804 ---------------
 .../runners/worker/TextSinkFactoryTest.java   | 105 --
 .../sdk/runners/worker/TextSinkTest.java      | 215 ----
 .../worker/UngroupedShuffleReaderTest.java    | 127 ---
 .../worker/UserCodeTimeTrackerTest.java       | 120 ---
 .../worker/WeightedDirectSideInputReader.java |  76 --
 .../WindmillReaderIteratorBaseTest.java       |  97 --
 .../worker/WindmillStateCacheTest.java        | 210 ----
 .../worker/WindmillStateInternalsTest.java    | 963 ------------------
 .../worker/WindmillStateReaderTest.java       | 411 --------
 .../worker/WorkerCustomSourcesTest.java       | 749 --------------
 .../BatchingShuffleEntryReaderTest.java       | 138 ---
 .../worker/CachingShuffleBatchReaderTest.java |  95 --
 .../util/common/worker/ExecutorTestUtils.java | 180 ----
 .../common/worker/FlattenOperationTest.java   |  80 --
 .../common/worker/MapTaskExecutorTest.java    | 264 -----
 .../OutputObjectAndByteCounterTest.java       | 128 ---
 .../common/worker/OutputReceiverTest.java     |  73 --
 .../common/worker/ParDoOperationTest.java     | 117 ---
 .../PartialGroupByKeyOperationTest.java       | 515 ----------
 .../util/common/worker/ReadOperationTest.java | 414 --------
 .../util/common/worker/ShuffleEntryTest.java  | 145 ---
 .../util/common/worker/StateSamplerTest.java  | 197 ----
 .../common/worker/TestOutputReceiver.java     | 117 ---
 .../util/common/worker/WorkExecutorTest.java  |  57 --
 .../common/worker/WriteOperationTest.java     |  74 --
 72 files changed, 18022 deletions(-)
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannelTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DefaultParDoFnFactoryTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTrackerTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormatTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReaderTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyTokenInvalidExceptionTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemsTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCodeTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactoryTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcherTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReader.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReaderTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleWriter.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTrackerTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WeightedDirectSideInputReader.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillReaderIteratorBaseTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCacheTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSourcesTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReaderTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReaderTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounterTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/TestOutputReceiver.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutorTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperationTest.java

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
deleted file mode 100644
index babd10a7fd488..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReaderTest.java
+++ /dev/null
@@ -1,306 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils.readAllFromReader;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils.readNItemsFromUnstartedIterator;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils.readRemainingFromIterator;
-
-import com.google.cloud.dataflow.sdk.TestUtils;
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.MimeTypes;
-import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader.DynamicSplitResult;
-
-import org.apache.avro.Schema;
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.generic.GenericDatumWriter;
-import org.apache.avro.io.DatumWriter;
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.File;
-import java.io.OutputStream;
-import java.nio.ByteBuffer;
-import java.nio.channels.Channels;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Random;
-
-import javax.annotation.Nullable;
-
-/**
- * Tests for AvroByteReader.
- */
-@RunWith(JUnit4.class)
-public class AvroByteReaderTest {
-  @Rule
-  public TemporaryFolder tmpFolder = new TemporaryFolder();
-
-  /** Class representing information about an Avro file generated from a list of elements. */
-  private static class AvroFileInfo<T> {
-    String filename;
-    List<Integer> elementSizes = new ArrayList<>();
-    List<Long> syncPoints = new ArrayList<>();
-    long totalElementEncodedSize = 0;
-  }
-
-  /** Write input elements to a file and return information about the Avro-encoded file. */
-  private <T> AvroFileInfo<T> initInputFile(List<List<T>> elemsList, Coder<T> coder)
-      throws Exception {
-    File tmpFile = tmpFolder.newFile("file.avro");
-    AvroFileInfo<T> fileInfo = new AvroFileInfo<>();
-    fileInfo.filename = tmpFile.getPath();
-
-    // Write the data.
-    OutputStream outStream =
-        Channels.newOutputStream(IOChannelUtils.create(fileInfo.filename, MimeTypes.BINARY));
-    Schema schema = Schema.create(Schema.Type.BYTES);
-    DatumWriter<ByteBuffer> datumWriter = new GenericDatumWriter<>(schema);
-    try (DataFileWriter<ByteBuffer> fileWriter = new DataFileWriter<>(datumWriter)) {
-      fileWriter.create(schema, outStream);
-      boolean first = true;
-      for (List<T> elems : elemsList) {
-        if (first) {
-          first = false;
-        } else {
-          // Ensure a block boundary here.
-          long syncPoint = fileWriter.sync();
-          fileInfo.syncPoints.add(syncPoint);
-        }
-        for (T elem : elems) {
-          byte[] encodedElement = CoderUtils.encodeToByteArray(coder, elem);
-          fileWriter.append(ByteBuffer.wrap(encodedElement));
-          fileInfo.elementSizes.add(encodedElement.length);
-          fileInfo.totalElementEncodedSize += encodedElement.length;
-        }
-      }
-    }
-
-    return fileInfo;
-  }
-
-
-  /**
-   * Reads from a file generated from a collection of elements and verifies that the elements read
-   * are the same as the elements written.
-   *
-   * @param elemsList a list of blocks of elements, each of which is as a list of elements.
-   * @param coder the coder used to encode the elements
-   * @param requireExactMatch if true, each block must match exactly
-   * @throws Exception
-   */
-  private <T> void runTestRead(List<List<T>> elemsList, Coder<T> coder, boolean requireExactMatch)
-      throws Exception {
-    AvroFileInfo<T> fileInfo = initInputFile(elemsList, coder);
-
-    // Test reading the data back.
-    List<List<T>> actualElemsList = new ArrayList<>();
-    List<Integer> actualSizes = new ArrayList<>();
-    Long startOffset = null;
-    Long endOffset;
-    long prevSyncPoint = 0;
-    for (long syncPoint : fileInfo.syncPoints) {
-      endOffset = (prevSyncPoint + syncPoint) / 2;
-      actualElemsList.add(readElems(fileInfo.filename, startOffset, endOffset, coder, actualSizes));
-      startOffset = endOffset;
-      prevSyncPoint = syncPoint;
-    }
-    actualElemsList.add(readElems(fileInfo.filename, startOffset, null, coder, actualSizes));
-
-    // Compare the expected and the actual elements.
-    if (requireExactMatch) {
-      // Require the blocks to match exactly.  (This works only for
-      // small block sizes.  Large block sizes, bigger than Avro's
-      // internal sizes, lead to different splits.)
-      Assert.assertEquals(elemsList, actualElemsList);
-    } else {
-      // Just require the overall elements to be the same.  (This
-      // works for any block size.)
-      List<T> expected = new ArrayList<>();
-      for (List<T> elems : elemsList) {
-        expected.addAll(elems);
-      }
-      List<T> actual = new ArrayList<>();
-      for (List<T> actualElems : actualElemsList) {
-        actual.addAll(actualElems);
-      }
-      Assert.assertEquals(expected, actual);
-    }
-
-    long actualTotalSize = 0;
-    for (int elemSize : actualSizes) {
-      actualTotalSize += elemSize;
-    }
-    Assert.assertEquals(fileInfo.totalElementEncodedSize, actualTotalSize);
-  }
-
-  private <T> List<T> readElems(String filename, @Nullable Long startOffset,
-      @Nullable Long endOffset, Coder<T> coder, List<Integer> actualSizes) throws Exception {
-    AvroByteReader<T> avroReader =
-        new AvroByteReader<>(filename, startOffset, endOffset, coder, null);
-    new ExecutorTestUtils.TestReaderObserver(avroReader, actualSizes);
-    return readAllFromReader(avroReader);
-  }
-
-  @Test
-  public void testRead() throws Exception {
-    runTestRead(Collections.singletonList(TestUtils.INTS), BigEndianIntegerCoder.of(),
-        true/* require exact match */);
-  }
-
-  @Test
-  public void testReadEmpty() throws Exception {
-    runTestRead(Collections.singletonList(TestUtils.NO_INTS), BigEndianIntegerCoder.of(),
-        true/* require exact match */);
-  }
-
-  private List<List<String>> generateInputBlocks(
-      int numBlocks, int blockSizeBytes, int averageLineSizeBytes) {
-    Random random = new Random(0);
-    List<List<String>> blocks = new ArrayList<>(numBlocks);
-    for (int blockNum = 0; blockNum < numBlocks; blockNum++) {
-      int numLines = blockSizeBytes / averageLineSizeBytes;
-      List<String> lines = new ArrayList<>(numLines);
-      for (int lineNum = 0; lineNum < numLines; lineNum++) {
-        int numChars = random.nextInt(averageLineSizeBytes * 2);
-        StringBuilder sb = new StringBuilder();
-        for (int charNum = 0; charNum < numChars; charNum++) {
-          sb.appendCodePoint(random.nextInt('z' - 'a' + 1) + 'a');
-        }
-        lines.add(sb.toString());
-      }
-      blocks.add(lines);
-    }
-    return blocks;
-  }
-
-  @Test
-  public void testReadSmallRanges() throws Exception {
-    runTestRead(generateInputBlocks(3, 50, 5), StringUtf8Coder.of(), true/* require exact match */);
-  }
-
-  @Test
-  public void testReadBigRanges() throws Exception {
-    runTestRead(generateInputBlocks(10, 128 * 1024, 100), StringUtf8Coder.of(),
-        false/* don't require exact match */);
-  }
-
-  // Verification behavior for split requests. Used for testRequestDynamicSplitInternal.
-  private static enum SplitVerificationBehavior {
-    VERIFY_SUCCESS, // Split request must succeed.
-    VERIFY_FAILURE, // Split request must fail.
-    DO_NOT_VERIFY; // Perform no verification.
-  }
-
-  private <T> void testRequestDynamicSplitInternal(
-      AvroByteReader<T> reader,
-      float splitAtFraction,
-      int readBeforeSplit,
-      SplitVerificationBehavior splitVerificationBehavior)
-          throws Exception {
-    // Read all elements from the reader
-    Long endOffset = reader.avroReader.endPosition;
-    List<T> expectedElements = readAllFromReader(reader);
-
-    List<T> primaryElements;
-    List<T> residualElements = new ArrayList<>();
-    try (AvroByteReader<T>.AvroByteFileIterator iterator = reader.iterator()) {
-      // Read n elements from the reader
-      primaryElements = readNItemsFromUnstartedIterator(iterator, readBeforeSplit);
-
-      // Request a split at the specified position
-      DynamicSplitResult splitResult =
-          iterator.requestDynamicSplit(ReaderTestUtils.splitRequestAtFraction(splitAtFraction));
-
-      switch (splitVerificationBehavior) {
-        case VERIFY_SUCCESS:
-          Assert.assertNotNull(splitResult);
-          break;
-        case VERIFY_FAILURE:
-          Assert.assertNull(splitResult);
-          break;
-        case DO_NOT_VERIFY:
-      }
-
-      // Finish reading from the original reader.
-      primaryElements.addAll(readRemainingFromIterator(iterator, readBeforeSplit > 0));
-
-      if (splitResult != null) {
-        Long splitPosition = ReaderTestUtils.positionFromSplitResult(splitResult).getByteOffset();
-        AvroByteReader<T> residualReader =
-            new AvroByteReader<T>(reader.avroReader.avroSource.getFileOrPatternSpec(),
-                splitPosition, endOffset, reader.coder, reader.avroReader.options);
-        // Read from the residual until it is complete.
-        residualElements = readAllFromReader(residualReader);
-      }
-    }
-
-    primaryElements.addAll(residualElements);
-    Assert.assertEquals(expectedElements, primaryElements);
-    if (splitVerificationBehavior == SplitVerificationBehavior.VERIFY_SUCCESS) {
-      Assert.assertNotEquals(0, residualElements.size());
-    }
-  }
-
-  @Test
-  public void testRequestDynamicSplit() throws Exception {
-    // Note that exhaustive tests for AvroSource's split behavior exist in {@link AvroSourceTest}.
-    List<List<String>> elements = generateInputBlocks(10, 100 * 100, 100);
-    Coder<String> coder = StringUtf8Coder.of();
-    AvroFileInfo<String> fileInfo = initInputFile(elements, coder);
-    AvroByteReader<String> reader =
-        new AvroByteReader<String>(fileInfo.filename, null, null, coder, null);
-    // Read most of the records before the proposed split point.
-    testRequestDynamicSplitInternal(reader, 0.5F, 490, SplitVerificationBehavior.VERIFY_SUCCESS);
-    // Read a single record.
-    testRequestDynamicSplitInternal(reader, 0.5F, 1, SplitVerificationBehavior.VERIFY_SUCCESS);
-    // Read zero records.
-    testRequestDynamicSplitInternal(reader, 0.5F, 0, SplitVerificationBehavior.VERIFY_FAILURE);
-    // Read almost the entire input.
-    testRequestDynamicSplitInternal(reader, 0.5F, 900, SplitVerificationBehavior.VERIFY_FAILURE);
-    // Read the entire input.
-    testRequestDynamicSplitInternal(reader, 0.5F, 2000, SplitVerificationBehavior.VERIFY_FAILURE);
-  }
-
-  @Test
-  public void testRequestDynamicSplitExhaustive() throws Exception {
-    List<List<String>> elements = generateInputBlocks(5, 10 * 10, 10);
-    Coder<String> coder = StringUtf8Coder.of();
-    AvroFileInfo<String> fileInfo = initInputFile(elements, coder);
-    AvroByteReader<String> reader =
-        new AvroByteReader<String>(fileInfo.filename, null, null, coder, null);
-    for (float splitFraction = 0.0F; splitFraction < 1.0F; splitFraction += 0.02F) {
-      for (int recordsToRead = 0; recordsToRead <= 500; recordsToRead += 5) {
-        testRequestDynamicSplitInternal(
-            reader, splitFraction, recordsToRead, SplitVerificationBehavior.DO_NOT_VERIFY);
-      }
-    }
-  }
-
-  // TODO: sharded filenames
-  // TODO: reading from GCS
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
deleted file mode 100644
index 9f1a4eacc51b7..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSinkTest.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils.readAllFromReader;
-
-import com.google.cloud.dataflow.sdk.TestUtils;
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.File;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Tests for AvroByteSink.
- */
-@RunWith(JUnit4.class)
-public class AvroByteSinkTest {
-  @Rule
-  public TemporaryFolder tmpFolder = new TemporaryFolder();
-
-  <T> void runTestWriteFile(List<T> elems, Coder<T> coder) throws Exception {
-    File tmpFile = tmpFolder.newFile("file.avro");
-    String filename = tmpFile.getPath();
-
-    // Write the file.
-
-    AvroByteSink<T> avroSink = new AvroByteSink<>(filename, coder);
-    List<Long> actualSizes = new ArrayList<>();
-    try (Sink.SinkWriter<T> writer = avroSink.writer()) {
-      for (T elem : elems) {
-        actualSizes.add(writer.add(elem));
-      }
-    }
-
-    // Read back the file.
-    AvroByteReader<T> reader = new AvroByteReader<>(filename, null, null, coder, null);
-
-
-    List<T> actual = readAllFromReader(reader);
-    List<Long> expectedSizes = new ArrayList<>();
-
-    for (T value : actual) {
-      expectedSizes.add((long) CoderUtils.encodeToByteArray(coder, value).length);
-    }
-
-    // Compare the expected and the actual elements.
-    Assert.assertEquals(elems, actual);
-    Assert.assertEquals(expectedSizes, actualSizes);
-  }
-
-  @Test
-  public void testWriteFile() throws Exception {
-    runTestWriteFile(TestUtils.INTS, BigEndianIntegerCoder.of());
-  }
-
-  @Test
-  public void testWriteEmptyFile() throws Exception {
-    runTestWriteFile(TestUtils.NO_INTS, BigEndianIntegerCoder.of());
-  }
-
-  // TODO: sharded filenames
-  // TODO: writing to GCS
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
deleted file mode 100644
index 2a7b7e18bb702..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactoryTest.java
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-
-import com.google.api.services.dataflow.model.Source;
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import org.hamcrest.core.IsInstanceOf;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import javax.annotation.Nullable;
-
-/**
- * Tests for AvroReaderFactory.
- */
-@RunWith(JUnit4.class)
-@SuppressWarnings("rawtypes")
-public class AvroReaderFactoryTest {
-  private final String pathToAvroFile = "/path/to/file.avro";
-
-  NativeReader<?> runTestCreateAvroReader(
-      String filename, @Nullable Long start, @Nullable Long end, CloudObject encoding)
-          throws Exception {
-    CloudObject spec = CloudObject.forClassName("AvroSource");
-    addString(spec, "filename", filename);
-    if (start != null) {
-      addLong(spec, "start_offset", start);
-    }
-    if (end != null) {
-      addLong(spec, "end_offset", end);
-    }
-
-    Source cloudSource = new Source();
-    cloudSource.setSpec(spec);
-    cloudSource.setCodec(encoding);
-
-    NativeReader<?> reader =
-        ReaderRegistry.defaultRegistry()
-            .create(
-                cloudSource,
-                PipelineOptionsFactory.create(),
-                DirectModeExecutionContext.create(),
-                null,
-                null);
-    return reader;
-  }
-
-  @Test
-  public void testCreatePlainAvroByteReader() throws Exception {
-    Coder<?> coder = WindowedValue.getFullCoder(
-        BigEndianIntegerCoder.of(), GlobalWindow.Coder.INSTANCE);
-    NativeReader<?> reader =
-        runTestCreateAvroReader(pathToAvroFile, null, null, coder.asCloudObject());
-
-    Assert.assertThat(reader, new IsInstanceOf(AvroByteReader.class));
-    AvroByteReader avroReader = (AvroByteReader) reader;
-    Assert.assertEquals(pathToAvroFile, avroReader.avroReader.avroSource.getFileOrPatternSpec());
-    Assert.assertEquals(null, avroReader.avroReader.startPosition);
-    Assert.assertEquals(null, avroReader.avroReader.endPosition);
-    Assert.assertEquals(coder, avroReader.coder);
-  }
-
-  @Test
-  public void testCreateRichAvroByteReader() throws Exception {
-    Coder<?> coder = WindowedValue.getFullCoder(
-        BigEndianIntegerCoder.of(), GlobalWindow.Coder.INSTANCE);
-    NativeReader<?> reader =
-        runTestCreateAvroReader(pathToAvroFile, 200L, 500L, coder.asCloudObject());
-
-    Assert.assertThat(reader, new IsInstanceOf(AvroByteReader.class));
-    AvroByteReader avroReader = (AvroByteReader) reader;
-    Assert.assertEquals(pathToAvroFile, avroReader.avroReader.avroSource.getFileOrPatternSpec());
-    Assert.assertEquals(200L, (long) avroReader.avroReader.startPosition);
-    Assert.assertEquals(500L, (long) avroReader.avroReader.endPosition);
-    Assert.assertEquals(coder, avroReader.coder);
-  }
-
-  @Test
-  public void testCreateRichAvroReader() throws Exception {
-    WindowedValue.WindowedValueCoder<?> coder =
-        WindowedValue.getValueOnlyCoder(AvroCoder.of(Integer.class));
-    NativeReader<?> reader =
-        runTestCreateAvroReader(pathToAvroFile, 200L, 500L, coder.asCloudObject());
-
-    Assert.assertThat(reader, new IsInstanceOf(AvroReader.class));
-    AvroReader avroReader = (AvroReader) reader;
-    Assert.assertEquals(pathToAvroFile, avroReader.avroSource.getFileOrPatternSpec());
-    Assert.assertEquals(200L, (long) avroReader.startPosition);
-    Assert.assertEquals(500L, (long) avroReader.endPosition);
-    Assert.assertEquals(coder.getValueCoder(), avroReader.avroCoder);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
deleted file mode 100644
index bc56cf56cf75c..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderTest.java
+++ /dev/null
@@ -1,343 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromSplitResult;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtFraction;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.windowedValuesToValues;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils.readAllFromReader;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils.readNItemsFromUnstartedIterator;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils.readRemainingFromIterator;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
-import static org.hamcrest.Matchers.greaterThanOrEqualTo;
-import static org.hamcrest.Matchers.isA;
-
-import com.google.cloud.dataflow.sdk.TestUtils;
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.MimeTypes;
-import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader.DynamicSplitResult;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader.Progress;
-
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.io.DatumWriter;
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.rules.TemporaryFolder;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.OutputStream;
-import java.nio.channels.Channels;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Random;
-
-import javax.annotation.Nullable;
-
-/**
- * Tests for AvroReader.
- */
-@RunWith(JUnit4.class)
-public class AvroReaderTest {
-  @Rule
-  public TemporaryFolder tmpFolder = new TemporaryFolder();
-  @Rule
-  public ExpectedException expectedException = ExpectedException.none();
-
-  /** Class representing information about an Avro file generated from a list of elements. */
-  private static class AvroFileInfo<T> {
-    String filename;
-    List<Integer> elementSizes = new ArrayList<>();
-    List<Long> syncPoints = new ArrayList<>();
-    long totalElementEncodedSize = 0;
-  }
-
-  /** Write input elements to a file and return information about the Avro-encoded file. */
-  private <T> AvroFileInfo<T> initInputFile(List<List<T>> elemsList, AvroCoder<T> coder)
-      throws Exception {
-    File tmpFile = tmpFolder.newFile("file.avro");
-    AvroFileInfo<T> fileInfo = new AvroFileInfo<>();
-    fileInfo.filename = tmpFile.getPath();
-
-    // Write the data.
-    OutputStream outStream =
-        Channels.newOutputStream(IOChannelUtils.create(fileInfo.filename, MimeTypes.BINARY));
-    DatumWriter<T> datumWriter = coder.createDatumWriter();
-    try (DataFileWriter<T> fileWriter = new DataFileWriter<>(datumWriter)) {
-      fileWriter.create(coder.getSchema(), outStream);
-      boolean first = true;
-      for (List<T> elems : elemsList) {
-        if (first) {
-          first = false;
-        } else {
-          // Ensure a block boundary here.
-          long syncPoint = fileWriter.sync();
-          fileInfo.syncPoints.add(syncPoint);
-        }
-        for (T elem : elems) {
-          fileWriter.append(elem);
-          fileInfo.elementSizes.add(CoderUtils.encodeToByteArray(coder, elem).length);
-          fileInfo.totalElementEncodedSize += CoderUtils.encodeToByteArray(coder, elem).length;
-        }
-      }
-    }
-
-    return fileInfo;
-  }
-
-  /**
-   * Reads from a file generated from a collection of elements and verifies that the elements read
-   * are the same as the elements written.
-   *
-   * @param elemsList a list of blocks of elements, each of which is as a list of elements.
-   * @param coder the coder used to encode the elements
-   * @param requireExactMatch if true, each block must match exactly
-   * @throws Exception
-   */
-  private <T> void runTestRead(
-      List<List<T>> elemsList, AvroCoder<T> coder, boolean requireExactMatch) throws Exception {
-    // Initialize the input file.
-    AvroFileInfo<T> fileInfo = initInputFile(elemsList, coder);
-
-    // Test reading the data back.
-    List<List<T>> actualElemsList = new ArrayList<>();
-    List<Integer> actualSizes = new ArrayList<>();
-    Long startOffset = null;
-    Long endOffset;
-    long prevSyncPoint = 0;
-    for (long syncPoint : fileInfo.syncPoints) {
-      endOffset = (prevSyncPoint + syncPoint) / 2;
-      actualElemsList.add(readElems(fileInfo.filename, startOffset, endOffset, coder, actualSizes));
-      startOffset = endOffset;
-      prevSyncPoint = syncPoint;
-    }
-    actualElemsList.add(readElems(fileInfo.filename, startOffset, null, coder, actualSizes));
-
-    // Compare the expected and the actual elements.
-    if (requireExactMatch) {
-      // Require the blocks to match exactly.  (This works only for
-      // small block sizes.  Large block sizes, bigger than Avro's
-      // internal sizes, lead to different splits.)
-      Assert.assertEquals(elemsList, actualElemsList);
-    } else {
-      // Just require the overall elements to be the same.  (This
-      // works for any block size.)
-      List<T> expected = new ArrayList<>();
-      for (List<T> elems : elemsList) {
-        expected.addAll(elems);
-      }
-      List<T> actual = new ArrayList<>();
-      for (List<T> actualElems : actualElemsList) {
-        actual.addAll(actualElems);
-      }
-      Assert.assertEquals(expected, actual);
-    }
-
-    long actualTotalSize = 0;
-    for (int elemSize : actualSizes) {
-      actualTotalSize += elemSize;
-    }
-    Assert.assertEquals(fileInfo.totalElementEncodedSize, actualTotalSize);
-  }
-
-  private <T> List<T> readElems(String filename, @Nullable Long startOffset,
-      @Nullable Long endOffset, AvroCoder<T> coder, List<Integer> actualSizes) throws Exception {
-    AvroReader<T> avroReader = new AvroReader<>(filename, startOffset, endOffset, coder, null);
-    new ExecutorTestUtils.TestReaderObserver(avroReader, actualSizes);
-
-    double progressReported = 0;
-    List<T> actualElems = new ArrayList<>();
-    try (AvroReader<T>.AvroFileIterator iterator = avroReader.iterator()) {
-      for (boolean more = iterator.start(); more; more = iterator.advance()) {
-        actualElems.add(iterator.getCurrent().getValue());
-        double progress = 0.0;
-        Progress readerProgress = iterator.getProgress();
-        if (readerProgress != null) {
-          progress = readerProgressToCloudProgress(iterator.getProgress()).getFractionConsumed();
-        }
-        // Make sure that the reported progress is monotonous.
-        Assert.assertThat(progress, greaterThanOrEqualTo(progressReported));
-        progressReported = progress;
-      }
-    }
-    return actualElems;
-  }
-
-  // Verification behavior for split requests. Used for testRequestDynamicSplitInternal.
-  private static enum SplitVerificationBehavior {
-    VERIFY_SUCCESS, // Split request must succeed.
-    VERIFY_FAILURE, // Split request must fail.
-    DO_NOT_VERIFY; // Perform no verification.
-  }
-
-  private <T> void testRequestDynamicSplitInternal(
-      AvroReader<T> reader,
-      float splitAtFraction,
-      int readBeforeSplit,
-      SplitVerificationBehavior splitVerificationBehavior)
-          throws Exception {
-    // Read all elements from the reader
-    Long endOffset = reader.endPosition;
-    List<T> primaryElements;
-    List<T> residualElements = new ArrayList<>();
-    try (AvroReader<T>.AvroFileIterator iterator = reader.iterator()) {
-      // Read n elements from the reader
-      primaryElements =
-          windowedValuesToValues(readNItemsFromUnstartedIterator(iterator, readBeforeSplit));
-
-      // Request a split at the specified position
-      DynamicSplitResult splitResult =
-          iterator.requestDynamicSplit(splitRequestAtFraction(splitAtFraction));
-
-      switch (splitVerificationBehavior) {
-        case VERIFY_SUCCESS:
-          Assert.assertNotNull(splitResult);
-          break;
-        case VERIFY_FAILURE:
-          Assert.assertNull(splitResult);
-          break;
-        case DO_NOT_VERIFY:
-      }
-
-      // Finish reading from the original reader.
-      primaryElements.addAll(
-          windowedValuesToValues(readRemainingFromIterator(iterator, readBeforeSplit > 0)));
-
-      if (splitResult != null) {
-        Long splitPosition = positionFromSplitResult(splitResult).getByteOffset();
-        AvroReader<T> residualReader = new AvroReader<T>(reader.avroSource.getFileOrPatternSpec(),
-            splitPosition, endOffset, reader.avroCoder, reader.options);
-        // Read from the residual until it is complete.
-        residualElements = windowedValuesToValues(readAllFromReader(residualReader));
-      }
-    }
-
-    primaryElements.addAll(residualElements);
-    List<T> expectedElements = windowedValuesToValues(readAllFromReader(reader));
-    Assert.assertEquals(expectedElements, primaryElements);
-    if (splitVerificationBehavior == SplitVerificationBehavior.VERIFY_SUCCESS) {
-      Assert.assertNotEquals(0, residualElements.size());
-    }
-  }
-
-  @Test
-  public void testRequestDynamicSplit() throws Exception {
-    // Note that exhaustive tests for AvroSource's split behavior exist in {@link AvroSourceTest}.
-    List<List<String>> elements = generateInputBlocks(10, 100 * 100, 100);
-    AvroCoder<String> coder = AvroCoder.of(String.class);
-    AvroFileInfo<String> fileInfo = initInputFile(elements, coder);
-    AvroReader<String> reader = new AvroReader<String>(fileInfo.filename, null, null, coder, null);
-    // Read most of the records before the proposed split point.
-    testRequestDynamicSplitInternal(reader, 0.5F, 490, SplitVerificationBehavior.VERIFY_SUCCESS);
-    // Read a single record.
-    testRequestDynamicSplitInternal(reader, 0.5F, 1, SplitVerificationBehavior.VERIFY_SUCCESS);
-    // Read zero records.
-    testRequestDynamicSplitInternal(reader, 0.5F, 0, SplitVerificationBehavior.VERIFY_FAILURE);
-    // Read almost the entire input.
-    testRequestDynamicSplitInternal(reader, 0.5F, 900, SplitVerificationBehavior.VERIFY_FAILURE);
-    // Read the entire input.
-    testRequestDynamicSplitInternal(reader, 0.5F, 2000, SplitVerificationBehavior.VERIFY_FAILURE);
-  }
-
-  @Test
-  public void testRequestDynamicSplitExhaustive() throws Exception {
-    List<List<String>> elements = generateInputBlocks(5, 10 * 10, 10);
-    AvroCoder<String> coder = AvroCoder.of(String.class);
-    AvroFileInfo<String> fileInfo = initInputFile(elements, coder);
-    AvroReader<String> reader = new AvroReader<String>(fileInfo.filename, null, null, coder, null);
-    for (float splitFraction = 0.0F; splitFraction < 1.0F; splitFraction += 0.02F) {
-      for (int recordsToRead = 0; recordsToRead <= 500; recordsToRead += 5) {
-        testRequestDynamicSplitInternal(
-            reader, splitFraction, recordsToRead, SplitVerificationBehavior.DO_NOT_VERIFY);
-      }
-    }
-  }
-
-  @Test
-  public void testRead() throws Exception {
-    runTestRead(Collections.singletonList(TestUtils.INTS), AvroCoder.of(Integer.class),
-        true/* require exact match */);
-  }
-
-  @Test
-  public void testReadEmpty() throws Exception {
-    runTestRead(Collections.singletonList(TestUtils.NO_INTS), AvroCoder.of(Integer.class),
-        true/* require exact match */);
-  }
-
-  private List<List<String>> generateInputBlocks(
-      int numBlocks, int blockSizeBytes, int averageLineSizeBytes) {
-    Random random = new Random(0);
-    List<List<String>> blocks = new ArrayList<>(numBlocks);
-    for (int blockNum = 0; blockNum < numBlocks; blockNum++) {
-      int numLines = blockSizeBytes / averageLineSizeBytes;
-      List<String> lines = new ArrayList<>(numLines);
-      for (int lineNum = 0; lineNum < numLines; lineNum++) {
-        int numChars = random.nextInt(averageLineSizeBytes * 2);
-        StringBuilder sb = new StringBuilder();
-        for (int charNum = 0; charNum < numChars; charNum++) {
-          sb.appendCodePoint(random.nextInt('z' - 'a' + 1) + 'a');
-        }
-        lines.add(sb.toString());
-      }
-      blocks.add(lines);
-    }
-    return blocks;
-  }
-
-  @Test
-  public void testReadSmallRanges() throws Exception {
-    runTestRead(generateInputBlocks(3, 50, 5), AvroCoder.of(String.class),
-        true/* require exact match */);
-  }
-
-  @Test
-  public void testReadBigRanges() throws Exception {
-    runTestRead(generateInputBlocks(10, 128 * 1024, 100), AvroCoder.of(String.class),
-        false/* don't require exact match */);
-  }
-
-  @Test
-  public void testErrorOnFileNotFound() throws Exception {
-    expectedException.expectCause(isA(FileNotFoundException.class));
-    readElems("file-not-found", 0L, 100L, AvroCoder.of(String.class), new ArrayList<Integer>());
-  }
-
-  @Test
-  public void testErrorOnMultipleFiles() throws Exception {
-    File file1 = tmpFolder.newFile("foo1.avro");
-    File file2 = tmpFolder.newFile("foo2.avro");
-    Channels.newOutputStream(IOChannelUtils.create(file1.getPath(), MimeTypes.BINARY)).close();
-    Channels.newOutputStream(IOChannelUtils.create(file2.getPath(), MimeTypes.BINARY)).close();
-
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("More than 1 file matched");
-    readElems(new File(tmpFolder.getRoot(), "*").getPath(), 0L, 100L,
-        AvroCoder.of(String.class), new ArrayList<Integer>());
-  }
-
-  // TODO: sharded filenames
-  // TODO: reading from GCS
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
deleted file mode 100644
index 3b42dc30c8ad0..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactoryTest.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-
-import org.hamcrest.core.IsInstanceOf;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for AvroSinkFactory.
- */
-@RunWith(JUnit4.class)
-public class AvroSinkFactoryTest {
-  private final String pathToAvroFile = "/path/to/file.avro";
-
-  Sink<?> runTestCreateAvroSink(String filename,
-                                CloudObject encoding)
-      throws Exception {
-    CloudObject spec = CloudObject.forClassName("AvroSink");
-    addString(spec, "filename", filename);
-
-    com.google.api.services.dataflow.model.Sink cloudSink =
-        new com.google.api.services.dataflow.model.Sink();
-    cloudSink.setSpec(spec);
-    cloudSink.setCodec(encoding);
-
-    PipelineOptions options = PipelineOptionsFactory.create();
-    Sink<?> sink = SinkFactory.create(options, cloudSink,
-        BatchModeExecutionContext.fromOptions(options), null);
-    return sink;
-  }
-
-  @Test
-  public void testCreateAvroByteSink() throws Exception {
-    Coder<?> coder =
-        WindowedValue.getFullCoder(BigEndianIntegerCoder.of(), GlobalWindow.Coder.INSTANCE);
-    Sink<?> sink = runTestCreateAvroSink(
-        pathToAvroFile, coder.asCloudObject());
-
-    Assert.assertThat(sink, new IsInstanceOf(AvroByteSink.class));
-    AvroByteSink<?> avroSink = (AvroByteSink<?>) sink;
-    Assert.assertEquals(pathToAvroFile, avroSink.avroSink.filenamePrefix);
-    Assert.assertEquals(coder, avroSink.coder);
-  }
-
-  @Test
-  public void testCreateAvroSink() throws Exception {
-    WindowedValue.WindowedValueCoder<?> coder =
-        WindowedValue.getValueOnlyCoder(AvroCoder.of(Integer.class));
-    Sink<?> sink = runTestCreateAvroSink(pathToAvroFile, coder.asCloudObject());
-
-    Assert.assertThat(sink, new IsInstanceOf(AvroSink.class));
-    AvroSink<?> avroSink = (AvroSink<?>) sink;
-    Assert.assertEquals(pathToAvroFile, avroSink.filenamePrefix);
-    Assert.assertEquals(coder.getValueCoder(), avroSink.avroCoder);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
deleted file mode 100644
index e881913ed867a..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkTest.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.windowedValuesToValues;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils.readAllFromReader;
-
-import com.google.cloud.dataflow.sdk.TestUtils;
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.File;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Tests for AvroSink.
- */
-@RunWith(JUnit4.class)
-public class AvroSinkTest {
-  @Rule
-  public TemporaryFolder tmpFolder = new TemporaryFolder();
-
-  <T> void runTestWriteFile(List<T> elems, AvroCoder<T> coder) throws Exception {
-    File tmpFile = tmpFolder.newFile("file.avro");
-    String filename = tmpFile.getPath();
-
-    // Write the file.
-
-    AvroSink<T> avroSink = new AvroSink<>(filename, WindowedValue.getValueOnlyCoder(coder));
-    List<Long> actualSizes = new ArrayList<>();
-    try (Sink.SinkWriter<WindowedValue<T>> writer = avroSink.writer()) {
-      for (T elem : elems) {
-        actualSizes.add(writer.add(WindowedValue.valueInGlobalWindow(elem)));
-      }
-    }
-
-    // Read back the file.
-    AvroReader<T> reader = new AvroReader<>(filename, null, null, coder, null);
-
-    List<T> actual = windowedValuesToValues(readAllFromReader(reader));
-
-    List<Long> expectedSizes = new ArrayList<>();
-    for (T value : actual) {
-      expectedSizes.add((long) CoderUtils.encodeToByteArray(coder, value).length);
-    }
-
-    // Compare the expected and the actual elements.
-    Assert.assertEquals(elems, actual);
-    Assert.assertEquals(expectedSizes, actualSizes);
-  }
-
-  @Test
-  public void testWriteFile() throws Exception {
-    runTestWriteFile(TestUtils.INTS, AvroCoder.of(Integer.class));
-  }
-
-  @Test
-  public void testWriteEmptyFile() throws Exception {
-    runTestWriteFile(TestUtils.NO_INTS, AvroCoder.of(Integer.class));
-  }
-
-  // TODO: sharded filenames
-  // TODO: writing to GCS
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
deleted file mode 100644
index d2d69085a3b82..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactoryTest.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
-
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.dataflow.model.Source;
-import com.google.cloud.dataflow.sdk.options.GcpOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.TestCredential;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import org.hamcrest.core.IsInstanceOf;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for BigQueryReaderFactory.
- */
-@RunWith(JUnit4.class)
-public class BigQueryReaderFactoryTest {
-  void runTestCreateBigQueryReaderFromTable(
-      String project, String dataset, String table, CloudObject encoding) throws Exception {
-    CloudObject spec = CloudObject.forClassName("BigQuerySource");
-    addString(spec, "project", project);
-    addString(spec, "dataset", dataset);
-    addString(spec, "table", table);
-
-    Source cloudSource = new Source();
-    cloudSource.setSpec(spec);
-    cloudSource.setCodec(encoding);
-
-    GcpOptions options = PipelineOptionsFactory.create().as(GcpOptions.class);
-    options.setGcpCredential(new TestCredential());
-
-    NativeReader<?> reader =
-        ReaderRegistry.defaultRegistry()
-            .create(cloudSource, options, DirectModeExecutionContext.create(), null, null);
-    assertThat(reader, new IsInstanceOf(BigQueryReader.class));
-    BigQueryReader bigQueryReader = (BigQueryReader) reader;
-    TableReference tableRef = bigQueryReader.getTableRef();
-    assertEquals(project, tableRef.getProjectId());
-    assertEquals(dataset, tableRef.getDatasetId());
-    assertEquals(table, tableRef.getTableId());
-  }
-
-  void runTestCreateBigQueryReaderFromQuery(String query, CloudObject encoding) throws Exception {
-    CloudObject spec = CloudObject.forClassName("BigQuerySource");
-    addString(spec, "bigquery_query", query);
-
-    Source cloudSource = new Source();
-    cloudSource.setSpec(spec);
-    cloudSource.setCodec(encoding);
-
-    GcpOptions options = PipelineOptionsFactory.create().as(GcpOptions.class);
-    options.setGcpCredential(new TestCredential());
-
-    NativeReader<?> reader =
-        ReaderRegistry.defaultRegistry()
-            .create(cloudSource, options, DirectModeExecutionContext.create(), null, null);
-
-    assertThat(reader, new IsInstanceOf(BigQueryReader.class));
-    BigQueryReader bigQueryReader = (BigQueryReader) reader;
-    assertEquals(query, bigQueryReader.getQuery());
-  }
-
-  @Test
-  public void testCreateBigQueryReaderFromQuery() throws Exception {
-    runTestCreateBigQueryReaderFromQuery("somequery", makeCloudEncoding("TableRowJsonCoder"));
-  }
-
-  @Test
-  public void testCreateBigQueryReaderFromTable() throws Exception {
-    runTestCreateBigQueryReaderFromTable(
-        "someproject", "somedataset", "sometable", makeCloudEncoding("TableRowJsonCoder"));
-  }
-
-  @Test
-  public void testCreateBigQueryReaderCoderIgnored() throws Exception {
-    // BigQuery sources do not need a coder because the TableRow objects are read directly from
-    // the table using the BigQuery API.
-    runTestCreateBigQueryReaderFromTable(
-        "someproject", "somedataset", "sometable", makeCloudEncoding("BigEndianIntegerCoder"));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
deleted file mode 100644
index 579bb6b1fb87b..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderTest.java
+++ /dev/null
@@ -1,910 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Matchers.contains;
-import static org.mockito.Matchers.endsWith;
-import static org.mockito.Matchers.eq;
-import static org.mockito.Mockito.atLeastOnce;
-import static org.mockito.Mockito.times;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.verifyNoMoreInteractions;
-import static org.mockito.Mockito.when;
-
-import com.google.api.client.http.LowLevelHttpRequest;
-import com.google.api.client.json.Json;
-import com.google.api.client.testing.http.MockHttpTransport;
-import com.google.api.client.testing.http.MockLowLevelHttpRequest;
-import com.google.api.client.testing.http.MockLowLevelHttpResponse;
-import com.google.api.services.bigquery.Bigquery;
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.common.collect.Lists;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.Mock;
-import org.mockito.MockitoAnnotations;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
-
-import java.io.IOException;
-import java.util.List;
-
-/**
- * Tests for BigQueryReader.
- *
- * <p>The tests make sure a simple scenario (reading two rows) work for the various kinds of fields
- * and modes.
- */
-@RunWith(JUnit4.class)
-public class BigQueryReaderTest {
-  @Rule public final ExpectedException thrown = ExpectedException.none();
-
-  private static final String PROJECT_ID = "project";
-  private static final String DATASET = "dataset";
-  private static final String TABLE = "table";
-  private static final String QUERY_JOB_ID = "query_job_id";
-  private static final String QUERY_JOB_RESPONSE_DATASET = "query_job_response_dataset";
-  private static final String QUERY_JOB_RESPONSE_TEMP_TABLE = "query_job_response_temp_table";
-
-  private static final String GET_TABLE_REQUEST_PATH =
-      String.format("projects/%s/datasets/%s/tables/%s", PROJECT_ID, DATASET, TABLE);
-
-  // This is a real response (with some unused fields removed) for the table created from this
-  // schema:
-  // [
-  //  {"name":"name","type":"STRING"},
-  //  {"name":"integer", "type":"INTEGER"},
-  //  {"name":"float", "type":"FLOAT"},
-  //  {"name":"bool", "type":"BOOLEAN"},
-  //  {"name":"record", "type":"RECORD", "fields":[
-  //    {"name": "nestedInt","type":"INTEGER"},
-  //    {"name": "nestedFloat","type":"FLOAT"}
-  //  ]},
-  //  {"name":"repeatedInt", "type":"INTEGER", "mode":"REPEATED"},
-  //  {"name":"repeatedFloat", "type":"FLOAT", "mode":"REPEATED"},
-  //
-  //  {"name":"repeatedRecord", "type":"RECORD", "mode":"REPEATED", "fields":[
-  //    {"name": "bool", "type": "BOOLEAN"},
-  //    {"name": "string", "type": "STRING"}
-  //  ]}
-  // ]
-  private static final String GET_TABLE_RESPONSE_JSON =
-      "{\n"
-      + " \"schema\": {\n"
-      + "  \"fields\": [\n"
-      + "   {\n"
-      + "    \"name\": \"name\",\n"
-      + "    \"type\": \"STRING\"\n"
-      + "   },\n"
-      + "   {\n"
-      + "    \"name\": \"integer\",\n"
-      + "    \"type\": \"INTEGER\"\n"
-      + "   },\n"
-      + "   {\n"
-      + "    \"name\": \"float\",\n"
-      + "    \"type\": \"FLOAT\"\n"
-      + "   },\n"
-      + "   {\n"
-      + "    \"name\": \"bool\",\n"
-      + "    \"type\": \"BOOLEAN\"\n"
-      + "   },\n"
-      + "   {\n"
-      + "    \"name\": \"record\",\n"
-      + "    \"type\": \"RECORD\",\n"
-      + "    \"fields\": [\n"
-      + "     {\n"
-      + "      \"name\": \"nestedInt\",\n"
-      + "      \"type\": \"INTEGER\"\n"
-      + "     },\n"
-      + "     {\n"
-      + "      \"name\": \"nestedFloat\",\n"
-      + "      \"type\": \"FLOAT\"\n"
-      + "     }\n"
-      + "    ]\n"
-      + "   },\n"
-      + "   {\n"
-      + "    \"name\": \"repeatedInt\",\n"
-      + "    \"type\": \"INTEGER\",\n"
-      + "    \"mode\": \"REPEATED\"\n"
-      + "   },\n"
-      + "   {\n"
-      + "    \"name\": \"repeatedFloat\",\n"
-      + "    \"type\": \"FLOAT\",\n"
-      + "    \"mode\": \"REPEATED\"\n"
-      + "   },\n"
-      + "   {\n"
-      + "    \"name\": \"repeatedRecord\",\n"
-      + "    \"type\": \"RECORD\",\n"
-      + "    \"mode\": \"REPEATED\",\n"
-      + "    \"fields\": [\n"
-      + "     {\n"
-      + "      \"name\": \"bool\",\n"
-      + "      \"type\": \"BOOLEAN\"\n"
-      + "     },\n"
-      + "     {\n"
-      + "      \"name\": \"string\",\n"
-      + "      \"type\": \"STRING\"\n"
-      + "     }\n"
-      + "    ]\n"
-      + "   }\n"
-      + "  ]\n"
-      + " },\n"
-      + " \"numRows\": \"2\",\n"
-      + " \"type\": \"TABLE\"\n"
-      + "}";
-
-  private static final String LIST_TABLE_DATA_REQUEST_PATH =
-      String.format("projects/%s/datasets/%s/tables/%s/data", PROJECT_ID, DATASET, TABLE);
-
-  // This is a real response (with some unused fields removed) for the table listed above, populated
-  // with the following data:
-  // {"name": "Arthur", "integer": 42, "float": 3.14159, "bool": "false",
-  // "record": {"nestedInt": 43, "nestedFloat": "4.14159"},
-  // "repeatedInt":[42, 43, 79]},
-  //
-  // {"name": "Allison", "integer": 79, "float": 2.71828, "bool": "true",
-  // "record": {"nestedInt": 80, "nestedFloat": "3.71828"},
-  // "repeatedFloat":[3.14159, 2.71828],
-  // "repeatedRecord":[{"bool":"true","string":"hello"},
-  //                   {"bool":"false","string":"world"}]}
-  private static final String LIST_TABLEDATA_RESPONSE_JSON =
-      "{\n"
-      + " \"totalRows\": \"2\",\n"
-      + " \"rows\": [\n"
-      + "  {\n"
-      + "   \"f\": [\n"
-      + "    {\n"
-      + "     \"v\": \"Arthur\"\n"
-      + "    },\n"
-      + "    {\n"
-      + "     \"v\": \"42\"\n"
-      + "    },\n"
-      + "    {\n"
-      + "     \"v\": \"3.14159\"\n"
-      + "    },\n"
-      + "    {\n"
-      + "     \"v\": \"false\"\n"
-      + "    },\n"
-      + "    {\n"
-      + "     \"v\": {\n"
-      + "      \"f\": [\n"
-      + "       {\n"
-      + "        \"v\": \"43\"\n"
-      + "       },\n"
-      + "       {\n"
-      + "        \"v\": \"4.14159\"\n"
-      + "       }\n"
-      + "      ]\n"
-      + "     }\n"
-      + "    },\n"
-      + "    {\n"
-      + "     \"v\": [\n"
-      + "      {\n"
-      + "       \"v\": \"42\"\n"
-      + "      },\n"
-      + "      {\n"
-      + "       \"v\": \"43\"\n"
-      + "      },\n"
-      + "      {\n"
-      + "       \"v\": \"79\"\n"
-      + "      }\n"
-      + "     ]\n"
-      + "    },\n"
-      + "    {\n"
-      + "     \"v\": [\n"
-      + "     ]\n"
-      + "    },\n"
-      + "    {\n"
-      + "     \"v\": [\n"
-      + "     ]\n"
-      + "    }\n"
-      + "   ]\n"
-      + "  },\n"
-      + "  {\n"
-      + "   \"f\": [\n"
-      + "    {\n"
-      + "     \"v\": \"Allison\"\n"
-      + "    },\n"
-      + "    {\n"
-      + "     \"v\": \"79\"\n"
-      + "    },\n"
-      + "    {\n"
-      + "     \"v\": \"2.71828\"\n"
-      + "    },\n"
-      + "    {\n"
-      + "     \"v\": \"true\"\n"
-      + "    },\n"
-      + "    {\n"
-      + "     \"v\": {\n"
-      + "      \"f\": [\n"
-      + "       {\n"
-      + "        \"v\": \"80\"\n"
-      + "       },\n"
-      + "       {\n"
-      + "        \"v\": \"3.71828\"\n"
-      + "       }\n"
-      + "      ]\n"
-      + "     }\n"
-      + "    },\n"
-      + "    {\n"
-      + "     \"v\": [\n"
-      + "     ]\n"
-      + "    },\n"
-      + "    {\n"
-      + "     \"v\": [\n"
-      + "      {\n"
-      + "       \"v\": \"3.14159\"\n"
-      + "      },\n"
-      + "      {\n"
-      + "       \"v\": \"2.71828\"\n"
-      + "      }\n"
-      + "     ]\n"
-      + "    },\n"
-      + "    {\n"
-      + "     \"v\": [\n"
-      + "      {\n"
-      + "       \"v\": {\n"
-      + "        \"f\": [\n"
-      + "         {\n"
-      + "          \"v\": \"true\"\n"
-      + "         },\n"
-      + "         {\n"
-      + "          \"v\": \"hello\"\n"
-      + "         }\n"
-      + "        ]\n"
-      + "       }\n"
-      + "      },\n"
-      + "      {\n"
-      + "       \"v\": {\n"
-      + "        \"f\": [\n"
-      + "         {\n"
-      + "          \"v\": \"false\"\n"
-      + "         },\n"
-      + "         {\n"
-      + "          \"v\": \"world\"\n"
-      + "         }\n"
-      + "        ]\n"
-      + "       }\n"
-      + "      }\n"
-      + "     ]\n"
-      + "    }\n"
-      + "   ]\n"
-      + "  }\n"
-      + " ]\n"
-      + "}";
-
-  private static final String INSERT_QUERY_JOB_PATH =
-      String.format("https://www.googleapis.com/bigquery/v2/projects/%s/jobs", PROJECT_ID);
-
-  // This is the actual response received for following requests (parameterized identifiers were
-  // updated).
-  //  POST https://www.googleapis.com/bigquery/v2/projects/project/jobs
-  //
-  //  {
-  //   "configuration": {
-  //    "query": {
-  //     "query": "SELECT name, integer from dataset.table"
-  //    }
-  //   }
-  //  }
-  private static final String INSERT_QUERY_JOB_RESPONSE = String.format(
-      "{\n"
-      + "\n"
-      + " \"kind\": \"bigquery#job\",\n"
-      + " \"etag\": \"\\\"Gn3Hpo5WaKnpFuT457VBDNMgZBw/GF2DiLTiF0s2MwsdQlV4UB-xaew\\\"\",\n"
-      + " \"id\": \"%1$s:%2$s\",\n"
-      + " \"selfLink\": \"https://www.googleapis.com/bigquery/v2/projects/%1$s/jobs/%2$s\",\n"
-      + " \"jobReference\": {\n"
-      + "  \"projectId\": \"%1$s\",\n"
-      + "  \"jobId\": \"%2$s\"\n"
-      + " },\n"
-      + " \"configuration\": {\n"
-      + "  \"query\": {\n"
-      + "   \"query\": \"SELECT name, integer from %4$s.%5$s\",\n"
-      + "   \"destinationTable\": {\n"
-      + "    \"projectId\": \"%1$s\",\n"
-      + "    \"datasetId\": \"%4$s\",\n"
-      + "    \"tableId\": \"%3$s\"\n"
-      + "   },\n"
-      + "   \"createDisposition\": \"CREATE_IF_NEEDED\",\n"
-      + "   \"writeDisposition\": \"WRITE_TRUNCATE\"\n"
-      + "  }\n"
-      + " },\n"
-      + " \"status\": {\n"
-      + "  \"state\": \"RUNNING\"\n"
-      + " },\n"
-      + " \"statistics\": {\n"
-      + "  \"creationTime\": \"1433378500260\",\n"
-      + "  \"startTime\": \"1433378500833\"\n"
-      + " },\n"
-      + " \"user_email\": \"user@gmail.com\"\n"
-      + "}",
-      PROJECT_ID, QUERY_JOB_ID, QUERY_JOB_RESPONSE_TEMP_TABLE, QUERY_JOB_RESPONSE_DATASET, TABLE);
-
-
-  private static final String GET_QUERY_JOB_STATUS_REQUEST_PATH = String.format(
-      "https://www.googleapis.com/bigquery/v2/projects/%s/jobs/%s", PROJECT_ID, QUERY_JOB_ID);
-
-
-  // This is the actual response received for following requests (parameterized identifiers were
-  // updated).
-  // GET
-  // https://www.googleapis.com/bigquery/v2/projects/project/jobs/query_job_id
-  private static final String GET_QUERY_JOB_STATUS_RESPONSE_JSON = String.format(
-      "{\n"
-      + "\n"
-      + " \"kind\": \"bigquery#job\",\n"
-      + " \"etag\": \"\\\"Gn3Hpo5WaKnpFuT457VBDNMgZBw/ZVEFinsDS6AZ04jebPWC9_isCpY\\\"\",\n"
-      + " \"id\": \"%1$s:%4$s\",\n"
-      + " \"selfLink\": \"https://www.googleapis.com/bigquery/v2/projects/%1$s/jobs/%4$s\",\n"
-      + " \"jobReference\": {\n"
-      + "  \"projectId\": \"%1$s\",\n"
-      + "  \"jobId\": \"%4$s\"\n"
-      + " },\n"
-      + " \"configuration\": {\n"
-      + "  \"query\": {\n"
-      + "   \"query\": \"SELECT name, integer from test_chamikara.bigquery_reader_test\",\n"
-      + "   \"destinationTable\": {\n"
-      + "    \"projectId\": \"%1$s\",\n"
-      + "    \"datasetId\": \"%2$s\",\n"
-      + "    \"tableId\": \"%3$s\"\n"
-      + "   },\n"
-      + "   \"createDisposition\": \"CREATE_IF_NEEDED\",\n"
-      + "   \"writeDisposition\": \"WRITE_TRUNCATE\",\n"
-      + "   \"priority\": \"INTERACTIVE\",\n"
-      + "   \"useQueryCache\": true\n"
-      + "  }\n"
-      + " },\n"
-      + " \"status\": {\n"
-      + "  \"state\": \"DONE\"\n"
-      + " },\n"
-      + " \"statistics\": {\n"
-      + "  \"creationTime\": \"1433374960768\",\n"
-      + "  \"startTime\": \"1433374961242\",\n"
-      + "  \"endTime\": \"1433374961532\",\n"
-      + "  \"totalBytesProcessed\": \"33\",\n"
-      + "  \"query\": {\n"
-      + "   \"totalBytesProcessed\": \"33\",\n"
-      + "   \"cacheHit\": false\n"
-      + "  }\n"
-      + " },\n"
-      + " \"user_email\": \"user@gmail.com\"\n"
-      + "}",
-      PROJECT_ID, QUERY_JOB_RESPONSE_DATASET, QUERY_JOB_RESPONSE_TEMP_TABLE, QUERY_JOB_ID);
-
-  private static final String LIST_QUERY_TABLE_DATA_REQUEST_PATH =
-      String.format("https://www.googleapis.com/bigquery/v2/projects/%s/datasets/%s/tables/%s/data",
-          PROJECT_ID, QUERY_JOB_RESPONSE_DATASET, QUERY_JOB_RESPONSE_TEMP_TABLE);
-
-  // This is the actual response received for following requests (parameterized identifiers were
-  // updated).
-  // GET
-  // https://www.googleapis.com/bigquery/v2/projects/project/datasets/query_job_response_dataset/
-  //   tables/query_job_response_temp_table/data
-  private static final String LIST_QUERY_TABLE_DATA_RESPONSE =
-      "{\n"
-      + " \"kind\": \"bigquery#tableDataList\",\n"
-      + " \"etag\": \"\\\"Gn3Hpo5WaKnpFuT457VBDNMgZBw/6Y6hxVy6yTmtI2EEgHfqg3w49yU\\\"\",\n"
-      + " \"totalRows\": \"2\",\n"
-      + " \"rows\": [\n"
-      + "  {\n"
-      + "   \"f\": [\n"
-      + "    {\n"
-      + "     \"v\": \"Arthur\"\n"
-      + "    },\n"
-      + "    {\n"
-      + "     \"v\": \"42\"\n"
-      + "    }\n"
-      + "   ]\n"
-      + "  },\n"
-      + "  {\n"
-      + "   \"f\": [\n"
-      + "    {\n"
-      + "     \"v\": \"Allison\"\n"
-      + "    },\n"
-      + "    {\n"
-      + "     \"v\": \"79\"\n"
-      + "    }\n"
-      + "   ]\n"
-      + "  }\n"
-      + " ]\n"
-      + "}";
-
-  private static final String QUERY_TABLE_GET_REQUEST_PATH =
-      String.format("https://www.googleapis.com/bigquery/v2/projects/%s/datasets/%s/tables/%s",
-          PROJECT_ID, QUERY_JOB_RESPONSE_DATASET, QUERY_JOB_RESPONSE_TEMP_TABLE);
-
-  // This is the actual response received for following requests (parameterized identifiers were
-  // updated).
-  // GET
-  // https://www.googleapis.com/bigquery/v2/projects/project/datasets/query_job_response_dataset/
-  //   tables/query_job_response_temp_table
-  private static final String QUERY_TABLE_GET_RESPONSE = String.format(
-      "{\n"
-      + "\n"
-      + " \"kind\": \"bigquery#table\",\n"
-      + " \"etag\": \"\\\"Gn3Hpo5WaKnpFuT457VBDNMgZBw/MTQzMzM3ODUwMDg4NA\\\"\",\n"
-      + " \"id\": \"%1$s:%2$s.%3$s\",\n"
-      + " \"selfLink\": \"https://www.googleapis.com/bigquery/v2/projects/%1$s/datasets/%2$s/"
-      + "tables/%3$s\",\n"
-      + " \"tableReference\": {\n"
-      + "  \"projectId\": \"%1$s\",\n"
-      + "  \"datasetId\": \"%2$s\",\n"
-      + "  \"tableId\": \"%3$s\"\n"
-      + " },\n"
-      + " \"schema\": {\n"
-      + "  \"fields\": [\n"
-      + "   {\n"
-      + "    \"name\": \"name\",\n"
-      + "    \"type\": \"STRING\",\n"
-      + "    \"mode\": \"NULLABLE\"\n"
-      + "   },\n"
-      + "   {\n"
-      + "    \"name\": \"integer\",\n"
-      + "    \"type\": \"INTEGER\",\n"
-      + "    \"mode\": \"NULLABLE\"\n"
-      + "   }\n"
-      + "  ]\n"
-      + " },\n"
-      + " \"numBytes\": \"33\",\n"
-      + " \"numRows\": \"2\",\n"
-      + " \"creationTime\": \"1433374961476\",\n"
-      + " \"expirationTime\": \"1433464900889\",\n"
-      + " \"lastModifiedTime\": \"1433378500884\",\n"
-      + " \"type\": \"TABLE\"\n"
-      + "}",
-      PROJECT_ID, QUERY_JOB_RESPONSE_DATASET, QUERY_JOB_RESPONSE_TEMP_TABLE);
-
-  private static final String QUERY_DATASET_INSERT_PATH =
-      String.format("https://www.googleapis.com/bigquery/v2/projects/%s/datasets", PROJECT_ID);
-
-  private static final String QUERY_DATASET_INSERT_RESPONSE = String.format(
-      "{\n"
-      + "\n"
-      + " \"kind\": \"bigquery#dataset\",\n"
-      + " \"etag\": \"\\\"Gn3Hpo5WaKnpFuT457VBDNMgZBw/67ZQLsO6a6iDLJ71ReSC-LWBBw4\\\"\",\n"
-      + " \"id\": \"%1$s:%2$s\",\n"
-      + " \"selfLink\": \"https://www.googleapis.com/bigquery/v2/projects/%1$s/datasets/%2$s\",\n"
-      + " \"datasetReference\": {\n"
-      + "  \"datasetId\": \"%2$s\",\n"
-      + "  \"projectId\": \"%1$s\"\n"
-      + " },\n"
-      + " \"access\": [\n"
-      + "  {\n"
-      + "   \"role\": \"OWNER\",\n"
-      + "   \"specialGroup\": \"projectOwners\"\n"
-      + "  },\n"
-      + "  {\n"
-      + "   \"role\": \"WRITER\",\n"
-      + "   \"specialGroup\": \"projectWriters\"\n"
-      + "  },\n"
-      + "  {\n"
-      + "   \"role\": \"READER\",\n"
-      + "   \"specialGroup\": \"projectReaders\"\n"
-      + "  }\n"
-      + " ],\n"
-      + " \"creationTime\": \"1436219427054\",\n"
-      + " \"lastModifiedTime\": \"1436219427054\"\n"
-      + "}",
-      PROJECT_ID, QUERY_JOB_RESPONSE_DATASET);
-
-  private static final String QUERY_TABLE_DELETE_PATH =
-      String.format("https://www.googleapis.com/bigquery/v2/projects/%s/", PROJECT_ID);
-
-  private static final String QUERY_TABLE_DELETE_RESPONSE = ""; // Empty body
-
-  private static final String QUERY_DATASET_DELETE_PATH =
-      String.format("https://www.googleapis.com/bigquery/v2/projects/%s/", PROJECT_ID);
-
-  private static final String QUERY_DATASET_DELETE_RESPONSE = ""; // Empty body
-
-  private static final String QUERY = "SELECT name, integer from dataset.table";
-
-  @Mock
-  private MockHttpTransport mockTransport;
-
-  private void verifyDatasetInsert() throws IOException {
-    verify(mockTransport, times(1)).buildRequest(eq("POST"), endsWith(QUERY_DATASET_INSERT_PATH));
-  }
-
-  private void verifyInsertQueryJob() throws IOException {
-    verify(mockTransport, times(1)).buildRequest(eq("POST"), endsWith(INSERT_QUERY_JOB_PATH));
-  }
-
-  private void verifyQueryJobStatus() throws IOException {
-    verify(mockTransport, times(1))
-        .buildRequest(eq("GET"), endsWith(GET_QUERY_JOB_STATUS_REQUEST_PATH));
-  }
-
-  private void verifyQueryTableData() throws IOException {
-    verify(mockTransport, times(1))
-        .buildRequest(eq("GET"), endsWith(LIST_QUERY_TABLE_DATA_REQUEST_PATH));
-  }
-
-  private void verifyQueryTableGet() throws IOException {
-    verify(mockTransport, times(1)).buildRequest(eq("GET"), endsWith(QUERY_TABLE_GET_REQUEST_PATH));
-  }
-
-  private void verifyTableDelete() throws IOException {
-    verify(mockTransport, times(2)).buildRequest(eq("DELETE"), contains(QUERY_TABLE_DELETE_PATH));
-  }
-
-  private void verifyDatasetDelete() throws IOException {
-    verify(mockTransport, times(2)).buildRequest(eq("DELETE"), contains(QUERY_DATASET_DELETE_PATH));
-  }
-
-  @Test
-  public void testReadQuery() throws Exception {
-    setUpMockQuery();
-
-    Bigquery bigQueryClient = new Bigquery(mockTransport, Transport.getJsonFactory(), null);
-    BigQueryReader reader = BigQueryReader.fromQuery(QUERY, PROJECT_ID, bigQueryClient, true);
-    BigQueryReader.BigQueryReaderIterator iterator = reader.iterator();
-
-    assertTrue(iterator.start());
-    TableRow row = iterator.getCurrent().getValue();
-
-    assertEquals("Arthur", row.get("name"));
-    assertEquals("42", row.get("integer"));
-    assertNull(row.getF());
-
-    assertTrue(iterator.advance());
-    row = iterator.getCurrent().getValue();
-    assertEquals("Allison", row.get("name"));
-    assertEquals("79", row.get("integer"));
-    assertNull(row.getF());
-
-    iterator.close();
-
-    verify(mockTransport, atLeastOnce()).supportsMethod("GET");
-    verify(mockTransport, atLeastOnce()).supportsMethod("DELETE");
-
-    verifyDatasetInsert();
-    verifyInsertQueryJob();
-    verifyQueryJobStatus();
-    verifyQueryTableData();
-    verifyQueryTableGet();
-    verifyTableDelete();
-    verifyDatasetDelete();
-
-    verifyNoMoreInteractions(mockTransport);
-  }
-
-  void setUpMockQuery() throws IOException {
-    MockitoAnnotations.initMocks(this);
-
-    when(mockTransport.buildRequest(eq("POST"), endsWith(QUERY_DATASET_INSERT_PATH)))
-    .thenAnswer(new Answer<LowLevelHttpRequest>() {
-      @Override
-      public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
-        MockLowLevelHttpResponse response =
-            new MockLowLevelHttpResponse()
-                .setContentType(Json.MEDIA_TYPE)
-                .setContent(QUERY_DATASET_INSERT_RESPONSE);
-        return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
-            .setResponse(response);
-      }
-    });
-
-    when(mockTransport.buildRequest(eq("POST"), endsWith(INSERT_QUERY_JOB_PATH)))
-        .thenAnswer(new Answer<LowLevelHttpRequest>() {
-          @Override
-          public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
-            MockLowLevelHttpResponse response =
-                new MockLowLevelHttpResponse()
-                    .setContentType(Json.MEDIA_TYPE)
-                    .setContent(INSERT_QUERY_JOB_RESPONSE);
-            return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
-                .setResponse(response);
-          }
-        });
-
-    when(mockTransport.buildRequest(eq("GET"), endsWith(GET_QUERY_JOB_STATUS_REQUEST_PATH)))
-        .thenAnswer(new Answer<LowLevelHttpRequest>() {
-          @Override
-          public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
-            MockLowLevelHttpResponse response =
-                new MockLowLevelHttpResponse()
-                    .setContentType(Json.MEDIA_TYPE)
-                    .setContent(GET_QUERY_JOB_STATUS_RESPONSE_JSON);
-            return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
-                .setResponse(response);
-          }
-        });
-
-    when(mockTransport.buildRequest(eq("GET"), endsWith(LIST_QUERY_TABLE_DATA_REQUEST_PATH)))
-        .thenAnswer(new Answer<LowLevelHttpRequest>() {
-          @Override
-          public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
-            MockLowLevelHttpResponse response =
-                new MockLowLevelHttpResponse()
-                    .setContentType(Json.MEDIA_TYPE)
-                    .setContent(LIST_QUERY_TABLE_DATA_RESPONSE);
-            return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
-                .setResponse(response);
-          }
-        });
-
-    when(mockTransport.buildRequest(eq("GET"), endsWith(QUERY_TABLE_GET_REQUEST_PATH)))
-        .thenAnswer(new Answer<LowLevelHttpRequest>() {
-          @Override
-          public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
-            MockLowLevelHttpResponse response =
-                new MockLowLevelHttpResponse()
-                    .setContentType(Json.MEDIA_TYPE)
-                    .setContent(QUERY_TABLE_GET_RESPONSE);
-            return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
-                .setResponse(response);
-          }
-        });
-
-    when(mockTransport.buildRequest(eq("DELETE"), contains(QUERY_TABLE_DELETE_PATH)))
-    .thenAnswer(new Answer<LowLevelHttpRequest>() {
-      @Override
-      public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
-        MockLowLevelHttpResponse response =
-            new MockLowLevelHttpResponse()
-                .setContentType(Json.MEDIA_TYPE)
-                .setContent(QUERY_TABLE_DELETE_RESPONSE);
-        return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
-            .setResponse(response);
-      }
-    });
-
-    when(mockTransport.buildRequest(eq("DELETE"), contains(QUERY_DATASET_DELETE_PATH)))
-    .thenAnswer(new Answer<LowLevelHttpRequest>() {
-      @Override
-      public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
-        MockLowLevelHttpResponse response =
-            new MockLowLevelHttpResponse()
-                .setContentType(Json.MEDIA_TYPE)
-                .setContent(QUERY_DATASET_DELETE_RESPONSE);
-        return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
-            .setResponse(response);
-      }
-    });
-
-    when(mockTransport.supportsMethod("GET")).thenReturn(true);
-    when(mockTransport.supportsMethod("DELETE")).thenReturn(true);
-  }
-
-  void setUpMockTable() throws Exception {
-    MockitoAnnotations.initMocks(this);
-    when(mockTransport.buildRequest(eq("GET"), endsWith(GET_TABLE_REQUEST_PATH)))
-        .thenThrow(new IOException())
-        .thenAnswer(new Answer<LowLevelHttpRequest>() {
-          @Override
-          public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
-            MockLowLevelHttpResponse response =
-                new MockLowLevelHttpResponse()
-                    .setContentType(Json.MEDIA_TYPE)
-                    .setContent(GET_TABLE_RESPONSE_JSON);
-            return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
-                .setResponse(response);
-          }
-        });
-    when(mockTransport.buildRequest(eq("GET"), endsWith(LIST_TABLE_DATA_REQUEST_PATH)))
-        .thenThrow(new IOException())
-        .thenAnswer(new Answer<LowLevelHttpRequest>() {
-          @Override
-          public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
-            MockLowLevelHttpResponse response =
-                new MockLowLevelHttpResponse()
-                    .setContentType(Json.MEDIA_TYPE)
-                    .setContent(LIST_TABLEDATA_RESPONSE_JSON);
-            return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
-                .setResponse(response);
-          }
-        });
-    when(mockTransport.buildRequest(eq("GET"), endsWith(GET_TABLE_WITH_F_REQUEST_PATH)))
-    .thenAnswer(new Answer<LowLevelHttpRequest>() {
-      @Override
-      public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
-        MockLowLevelHttpResponse response =
-            new MockLowLevelHttpResponse()
-                .setContentType(Json.MEDIA_TYPE)
-                .setContent(GET_TABLE_WITH_F_RESPONSE_JSON);
-        return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
-            .setResponse(response);
-      }
-    });
-    when(mockTransport.buildRequest(eq("GET"), endsWith(LIST_TABLE_WITH_F_DATA_REQUEST_PATH)))
-    .thenAnswer(new Answer<LowLevelHttpRequest>() {
-      @Override
-      public LowLevelHttpRequest answer(InvocationOnMock invocation) throws Throwable {
-        MockLowLevelHttpResponse response =
-            new MockLowLevelHttpResponse()
-                .setContentType(Json.MEDIA_TYPE)
-                .setContent(LIST_TABLE_WITH_F_DATA_RESPONSE_JSON);
-        return new MockLowLevelHttpRequest((String) invocation.getArguments()[1])
-            .setResponse(response);
-      }
-    });
-    when(mockTransport.supportsMethod("GET")).thenReturn(true);
-  }
-
-  private void verifyTableGet() throws IOException {
-    verify(mockTransport, times(2)).buildRequest(eq("GET"), endsWith(GET_TABLE_REQUEST_PATH));
-  }
-
-  private void verifyTabledataList() throws IOException {
-    verify(mockTransport, times(2)).buildRequest(eq("GET"), endsWith(LIST_TABLE_DATA_REQUEST_PATH));
-  }
-
-  @Test
-  public void testReadTable() throws Exception {
-    setUpMockTable();
-
-    Bigquery bigQueryClient = new Bigquery(mockTransport, Transport.getJsonFactory(), null);
-    BigQueryReader reader = BigQueryReader.fromTable(
-        new TableReference().setProjectId(PROJECT_ID).setDatasetId(DATASET).setTableId(TABLE),
-        bigQueryClient);
-
-    BigQueryReader.BigQueryReaderIterator iterator = reader.iterator();
-    assertTrue(iterator.start());
-
-    TableRow row = iterator.getCurrent().getValue();
-
-    assertEquals("Arthur", row.get("name"));
-    assertEquals("42", row.get("integer"));
-    assertEquals(3.14159, row.get("float"));
-    assertEquals(false, row.get("bool"));
-
-    TableRow nested = (TableRow) row.get("record");
-    assertEquals("43", nested.get("nestedInt"));
-    assertEquals(4.14159, nested.get("nestedFloat"));
-    assertNull(nested.getF());
-
-    assertEquals(Lists.newArrayList("42", "43", "79"), row.get("repeatedInt"));
-    assertTrue(((List<?>) row.get("repeatedFloat")).isEmpty());
-    assertTrue(((List<?>) row.get("repeatedRecord")).isEmpty());
-
-    assertTrue(iterator.advance());
-    row = iterator.getCurrent().getValue();
-
-    assertEquals("Allison", row.get("name"));
-    assertEquals("79", row.get("integer"));
-    assertEquals(2.71828, row.get("float"));
-    assertEquals(true, row.get("bool"));
-
-    nested = (TableRow) row.get("record");
-    assertEquals("80", nested.get("nestedInt"));
-    assertEquals(3.71828, nested.get("nestedFloat"));
-    assertNull(nested.getF());
-
-    assertTrue(((List<?>) row.get("repeatedInt")).isEmpty());
-    assertEquals(Lists.newArrayList(3.14159, 2.71828), row.get("repeatedFloat"));
-
-    @SuppressWarnings("unchecked")
-    List<TableRow> nestedRecords = (List<TableRow>) row.get("repeatedRecord");
-    assertEquals(2, nestedRecords.size());
-    assertEquals("hello", nestedRecords.get(0).get("string"));
-    assertEquals(true, nestedRecords.get(0).get("bool"));
-    assertNull(nestedRecords.get(0).getF());
-    assertEquals("world", nestedRecords.get(1).get("string"));
-    assertEquals(false, nestedRecords.get(1).get("bool"));
-    assertNull(nestedRecords.get(1).getF());
-
-    assertFalse(iterator.advance());
-
-    verifyTableGet();
-    verifyTabledataList();
-
-    verify(mockTransport, atLeastOnce()).supportsMethod("GET");
-    verifyNoMoreInteractions(mockTransport);
-  }
-
-  ////////////////////////////////////////////////////////////////////////////
-  // Constants and tests for surviving a field named "f" in the table schema.
-  ////////////////////////////////////////////////////////////////////////////
-
-  // Modified from GET_TABLE_RESPONSE_JSON for a table with a field name "f"
-  // [
-  //  {"name":"int","type":"INTEGER"},
-  //  {"name":"f","type":"STRING"}
-  // ]
-  private static final String GET_TABLE_WITH_F_RESPONSE_JSON =
-      "{\n"
-      + " \"schema\": {\n"
-      + "  \"fields\": [\n"
-      + "   {\n"
-      + "    \"name\": \"int\",\n"
-      + "    \"type\": \"INTEGER\"\n"
-      + "   },\n"
-      + "   {\n"
-      + "    \"name\": \"f\",\n"
-      + "    \"type\": \"STRING\"\n"
-      + "   }\n"
-      + "  ]\n"
-      + " },\n"
-      + " \"numRows\": \"2\",\n"
-      + " \"type\": \"TABLE\"\n"
-      + "}";
-
-  private static final String TABLE_WITH_FIELD_F = "table_f";
-  private static final String GET_TABLE_WITH_F_REQUEST_PATH =
-      String.format("projects/%s/datasets/%s/tables/%s", PROJECT_ID, DATASET, TABLE_WITH_FIELD_F);
-  private static final String LIST_TABLE_WITH_F_DATA_REQUEST_PATH =
-      String.format("%s/data", GET_TABLE_WITH_F_REQUEST_PATH);
-
-  // Modified from LIST_TABLEDATA_RESPONSE_JSON for a table with a field named "f"
-  // with the following data:
-  // {"int": "5", "f": "Arthur"},
-  //
-  // {"int": "42", "f": "Allison"}
-  private static final String LIST_TABLE_WITH_F_DATA_RESPONSE_JSON =
-      "{\n"
-      + " \"totalRows\": \"2\",\n"
-      + " \"rows\": [\n"
-      + "  {\n"
-      + "   \"f\": [\n"
-      + "    {\n"
-      + "     \"v\": \"5\"\n"
-      + "    },\n"
-      + "    {\n"
-      + "     \"v\": \"Arthur\"\n"
-      + "    }\n"
-      + "   ]\n"
-      + "  },\n"
-      + "  {\n"
-      + "   \"f\": [\n"
-      + "    {\n"
-      + "     \"v\": \"42\"\n"
-      + "    },\n"
-      + "    {\n"
-      + "     \"v\": \"Allison\"\n"
-      + "    }\n"
-      + "   ]\n"
-      + "  }\n"
-      + " ]\n"
-      + "}";
-
-  /**
-   * This tests two different things:
-   *
-   * <ol>
-   * <li>{@link BigQueryReader} can handle a field named "f" without crashing.
-   * <li>The value of field named "f" can be retrieved positionally from {@link TableRow#getF()}
-   * </ol>
-   */
-  @Test
-  public void testReadTableWithFieldF() throws Exception {
-    setUpMockTable();
-
-    Bigquery bigQueryClient = new Bigquery(mockTransport, Transport.getJsonFactory(), null);
-    TableReference tableRef = new TableReference()
-        .setProjectId(PROJECT_ID).setDatasetId(DATASET).setTableId(TABLE_WITH_FIELD_F);
-    BigQueryReader reader = BigQueryReader.fromTable(tableRef, bigQueryClient);
-
-    try (BigQueryReader.BigQueryReaderIterator iterator = reader.iterator()) {
-      thrown.expect(IllegalArgumentException.class);
-      thrown.expectMessage("BigQueryIO does not support records with columns named f");
-
-      iterator.start();
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java
deleted file mode 100644
index 791d762dc6ef3..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReaderTest.java
+++ /dev/null
@@ -1,199 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.hamcrest.Matchers.equalTo;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.PCollectionViewWindow;
-import com.google.cloud.dataflow.sdk.util.WeightedSideInputReader;
-import com.google.cloud.dataflow.sdk.util.WeightedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.cache.Cache;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.collect.ImmutableMap;
-
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for {@link CachingSideInputReader}.
- */
-@RunWith(JUnit4.class)
-public class CachingSideInputReaderTest {
-
-  private static boolean isCached(
-      Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache,
-      PCollectionView<?> view, BoundedWindow window) {
-    return null != cache.getIfPresent(PCollectionViewWindow.of(view, window));
-  }
-
-  /** An arbitrary {@link TupleTag} used for tests. */
-  private static final TupleTag<Iterable<WindowedValue<String>>> ITERABLE_TAG = new TupleTag<>();
-
-  /** A {@link TupleTag} that agrees with {@link #ITERABLE_TAG} but is not {@code ==} to it. */
-  private static final TupleTag<Object> UNTYPED_ITERABLE_TAG = new TupleTag<>(ITERABLE_TAG.getId());
-
-  /**
-   * A {@link PCollectionView} using {@link #ITERABLE_TAG} that maps the contents of a
-   * {@link PCollection} to the number of elements it contains.
-   */
-  private static final PCollectionView<Long> LENGTH_VIEW_FOR_ITERABLE_TAG =
-      PCollectionViewTesting.testingView(
-          ITERABLE_TAG,
-          new PCollectionViewTesting.LengthViewFn<String>(), StringUtf8Coder.of());
-
-  private static final int MAXIMUM_CACHE_SIZE = 1000;
-
-  /** A {@link Cache} that is set up before each test. */
-  private Cache<PCollectionViewWindow<?>, WeightedValue<Object>> defaultCache;
-
-  @Before
-  public void setupCache() {
-    defaultCache = CacheBuilder.newBuilder()
-        .maximumWeight(MAXIMUM_CACHE_SIZE)
-        .weigher(Weighers.fixedWeightKeys(1))
-        .build();
-  }
-
-  @Test
-  public void testCachingSideInputReaderAgreesWithUnderlyingReaderForSmallItem() throws Exception {
-    // A SideInputReader that vends fixed contents for LENGTH_VIEW_FOR_DEFAULT_TAG
-    // with a chosen size that fits in the maximum size of the cache.
-    WeightedSideInputReader reader = WeightedDirectSideInputReader.withContents(
-        ImmutableMap.of(
-            UNTYPED_ITERABLE_TAG,
-            WeightedValue.<Object>of(
-                PCollectionViewTesting.contentsInDefaultWindow("some", "small", "collection"),
-                MAXIMUM_CACHE_SIZE - 100)));
-
-    // The CachingSideInputReader under test
-    CachingSideInputReader cachingReader =
-        CachingSideInputReader.of(reader, defaultCache);
-
-    assertThat(
-        cachingReader.get(
-            LENGTH_VIEW_FOR_ITERABLE_TAG,
-            PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW),
-        equalTo(
-            reader.get(
-                LENGTH_VIEW_FOR_ITERABLE_TAG,
-                PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW)));
-  }
-
-  @Test
-  public void testCachingSideInputReaderAgreesWithUnderlyingReaderForLargeItem() throws Exception {
-    // A SideInputReader that vends fixed contents for LENGTH_VIEW_FOR_DEFAULT_TAG
-    // with a chosen size that exceeds the maximum size of the cache.
-    WeightedSideInputReader reader = WeightedDirectSideInputReader.withContents(
-        ImmutableMap.of(
-            UNTYPED_ITERABLE_TAG,
-            WeightedValue.<Object>of(
-                PCollectionViewTesting.contentsInDefaultWindow("some", "large", "collection"),
-                MAXIMUM_CACHE_SIZE + 100)));
-
-    // The CachingSideInputReader under test
-    CachingSideInputReader cachingReader =
-        CachingSideInputReader.of(reader, defaultCache);
-
-    assertThat(
-        cachingReader.get(
-            LENGTH_VIEW_FOR_ITERABLE_TAG,
-            PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW),
-        equalTo(
-            reader.get(
-                LENGTH_VIEW_FOR_ITERABLE_TAG,
-                PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW)));
-  }
-
-
-  @Test
-  public void testCachingSideInputReaderCachesSmallItem() throws Exception {
-    // A SideInputReader that vends fixed contents for LENGTH_VIEW_FOR_DEFAULT_TAG
-    // with a chosen size that fits in the maximum size of the cache.
-    WeightedSideInputReader reader = WeightedDirectSideInputReader.withContents(
-        ImmutableMap.of(
-            UNTYPED_ITERABLE_TAG,
-            WeightedValue.<Object>of(
-                PCollectionViewTesting.contentsInDefaultWindow("hello", "goodbye"),
-                MAXIMUM_CACHE_SIZE - 1000)));
-
-    // The CachingSideInputReader under test
-    CachingSideInputReader cachingReader =
-        CachingSideInputReader.of(reader, defaultCache);
-
-    cachingReader.get(
-        LENGTH_VIEW_FOR_ITERABLE_TAG,
-        PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
-
-    assertTrue(
-        isCached(
-            defaultCache,
-            LENGTH_VIEW_FOR_ITERABLE_TAG,
-            PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW));
-  }
-
-  @Test
-  public void testCachingSideInputReaderDoesNotCacheLargeItem() throws Exception {
-    // A SideInputReader that vends fixed contents for LENGTH_VIEW_FOR_DEFAULT_TAG
-    // with a chosen size that exceeds in the maximum size of the cache.
-    WeightedSideInputReader reader = WeightedDirectSideInputReader.withContents(
-        ImmutableMap.of(
-            UNTYPED_ITERABLE_TAG,
-            WeightedValue.<Object>of(
-                PCollectionViewTesting.contentsInDefaultWindow("hello", "goodbye"),
-                MAXIMUM_CACHE_SIZE + 100)));
-
-    // The CachingSideInputReader under test
-    CachingSideInputReader cachingReader =
-        CachingSideInputReader.of(reader, defaultCache);
-
-    cachingReader.get(
-        LENGTH_VIEW_FOR_ITERABLE_TAG,
-        PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
-
-    assertFalse(
-        isCached(
-            defaultCache,
-            LENGTH_VIEW_FOR_ITERABLE_TAG,
-            PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW));
-  }
-
-  @Test
-  public void testCachingSideInputReaderEmpty() throws Exception {
-    TupleTag<Iterable<WindowedValue<String>>> tag = new TupleTag<>();
-    PCollectionView<Long> view = PCollectionViewTesting.testingView(
-        tag, new PCollectionViewTesting.LengthViewFn<String>(), StringUtf8Coder.of());
-
-    CachingSideInputReader sideInputReader =
-        CachingSideInputReader.of(WeightedDirectSideInputReader.withContents(
-                                      ImmutableMap.<TupleTag<Object>, WeightedValue<Object>>of()),
-            defaultCache);
-
-    assertFalse(sideInputReader.contains(view));
-    assertTrue(sideInputReader.isEmpty());
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
deleted file mode 100644
index 973c3555670cd..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFnTest.java
+++ /dev/null
@@ -1,350 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
-import static com.google.cloud.dataflow.sdk.util.SerializableUtils.serializeToByteArray;
-import static com.google.cloud.dataflow.sdk.util.StringUtils.byteArrayToJsonString;
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.DeterministicStandardCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.Combine;
-import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
-import com.google.cloud.dataflow.sdk.util.common.worker.Receiver;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-import com.google.common.base.MoreObjects;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Objects;
-
-/**
- * Tests for CombineValuesFn.
- */
-@RunWith(JUnit4.class)
-public class CombineValuesFnTest {
-  /** Example AccumulatingCombineFn. */
-  public static class MeanInts extends
-      Combine.AccumulatingCombineFn<Integer, MeanInts.CountSum, String> {
-
-    class CountSum implements
-        Combine.AccumulatingCombineFn.Accumulator<Integer, CountSum, String> {
-
-      long count;
-      double sum;
-
-      @Override
-      public void addInput(Integer element) {
-        count++;
-        sum += element.doubleValue();
-      }
-
-      @Override
-      public void mergeAccumulator(CountSum accumulator) {
-        count += accumulator.count;
-        sum += accumulator.sum;
-      }
-
-      @Override
-      public String extractOutput() {
-        return String.format("%.1f", count == 0 ? 0.0 : sum / count);
-      }
-
-      public CountSum(long count, double sum) {
-        this.count = count;
-        this.sum = sum;
-      }
-
-      @Override
-      public int hashCode() {
-        return Objects.hash(count, sum);
-      }
-
-      @Override
-      public boolean equals(Object obj) {
-        if (obj == this) {
-          return true;
-        }
-        if (!(obj instanceof CountSum)) {
-          return false;
-        }
-        CountSum other = (CountSum) obj;
-        return (this.count == other.count)
-            && (Math.abs(this.sum - other.sum) < 0.1);
-      }
-
-      @Override
-      public String toString() {
-        return MoreObjects.toStringHelper(this)
-            .add("count", count)
-            .add("sum", sum)
-            .toString();
-      }
-    }
-
-    @Override
-    public CountSum createAccumulator() {
-      return new CountSum(0, 0.0);
-    }
-
-    @Override
-    public Coder<CountSum> getAccumulatorCoder(
-        CoderRegistry registry, Coder<Integer> inputCoder) {
-      return new CountSumCoder();
-    }
-  }
-
-  /**
-   * An example "cheap" accumulator coder.
-   */
-  public static class CountSumCoder extends DeterministicStandardCoder<MeanInts.CountSum> {
-    public CountSumCoder() { }
-
-    @Override
-    public void encode(
-        MeanInts.CountSum value, OutputStream outStream, Context context)
-        throws CoderException, IOException {
-      DataOutputStream dataStream = new DataOutputStream(outStream);
-      dataStream.writeLong(value.count);
-      dataStream.writeDouble(value.sum);
-    }
-
-    @Override
-    public MeanInts.CountSum decode(InputStream inStream, Context context)
-        throws CoderException, IOException {
-      DataInputStream dataStream = new DataInputStream(inStream);
-      long count = dataStream.readLong();
-      double sum = dataStream.readDouble();
-      return (new MeanInts ()).new CountSum(count, sum);
-    }
-
-    @Override
-    public CloudObject asCloudObject() {
-      return makeCloudEncoding(this.getClass().getName());
-    }
-
-    @Override
-    public List<? extends Coder<?>> getCoderArguments() {
-      return null;
-    }
-
-    @Override
-    public boolean isRegisterByteSizeObserverCheap(
-        MeanInts.CountSum value, Context context) {
-      return true;
-    }
-
-    @Override
-    public void registerByteSizeObserver(
-        MeanInts.CountSum value, ElementByteSizeObserver observer, Context ctx)
-        throws Exception {
-      observer.update((long) 16);
-    }
-  }
-
-  static class TestReceiver implements Receiver {
-    List<Object> receivedElems = new ArrayList<>();
-
-    @Override
-    public void process(Object outputElem) {
-      receivedElems.add(outputElem);
-    }
-  }
-
-  private static final ParDoFnFactory parDoFnFactory = new CombineValuesFn.Factory();
-
-  @SuppressWarnings("rawtypes")
-  private static ParDoFn createCombineValuesFn(
-      String phase, Combine.KeyedCombineFn combineFn, Coder<?> accumCoder) throws Exception {
-    // This partially mirrors the work that
-    // com.google.cloud.dataflow.sdk.transforms.Combine.translateHelper
-    // does, at least for the KeyedCombineFn. The phase is generated
-    // by the back-end.
-    CloudObject spec = CloudObject.forClassName("CombineValuesFn");
-    @SuppressWarnings("unchecked")
-    AppliedCombineFn appliedCombineFn =
-        AppliedCombineFn.withAccumulatorCoder(combineFn, accumCoder);
-    addString(spec, PropertyNames.SERIALIZED_FN,
-        byteArrayToJsonString(serializeToByteArray(appliedCombineFn)));
-    addString(spec, PropertyNames.PHASE, phase);
-
-    return parDoFnFactory.create(
-            PipelineOptionsFactory.create(),
-            spec,
-            "name",
-            "transformName",
-            null, // no side inputs
-            null, // no side outputs
-            1, // single main output
-            BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create()),
-            (new CounterSet()).getAddCounterMutator(),
-            null);
-  }
-
-  @Test
-  public void testCombineValuesFnAll() throws Exception {
-    TestReceiver receiver = new TestReceiver();
-
-    Combine.KeyedCombineFn<String, Integer, MeanInts.CountSum, String> combiner =
-        (new MeanInts()).asKeyedFn();
-
-    ParDoFn combineParDoFn = createCombineValuesFn(
-        CombineValuesFn.CombinePhase.ALL, combiner, new CountSumCoder());
-
-    combineParDoFn.startBundle(receiver);
-    combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
-        KV.of("a", Arrays.asList(5, 6, 7))));
-    combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
-        KV.of("b", Arrays.asList(1, 3, 7))));
-    combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
-        KV.of("c", Arrays.asList(3, 6, 8, 9))));
-    combineParDoFn.finishBundle();
-
-    Object[] expectedReceivedElems = {
-      WindowedValue.valueInGlobalWindow(KV.of("a", String.format("%.1f", 6.0))),
-      WindowedValue.valueInGlobalWindow(KV.of("b", String.format("%.1f", 3.7))),
-      WindowedValue.valueInGlobalWindow(KV.of("c", String.format("%.1f", 6.5))),
-    };
-    assertArrayEquals(expectedReceivedElems, receiver.receivedElems.toArray());
-  }
-
-  @Test
-  public void testCombineValuesFnAdd() throws Exception {
-    TestReceiver receiver = new TestReceiver();
-    MeanInts mean = new MeanInts();
-
-    Combine.KeyedCombineFn<String, Integer,
-        MeanInts.CountSum, String> combiner = mean.asKeyedFn();
-
-    ParDoFn combineParDoFn = createCombineValuesFn(
-        CombineValuesFn.CombinePhase.ADD, combiner, new CountSumCoder());
-
-    combineParDoFn.startBundle(receiver);
-    combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
-        KV.of("a", Arrays.asList(5, 6, 7))));
-    combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
-        KV.of("b", Arrays.asList(1, 3, 7))));
-    combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
-        KV.of("c", Arrays.asList(3, 6, 8, 9))));
-    combineParDoFn.finishBundle();
-
-    Object[] expectedReceivedElems = {
-      WindowedValue.valueInGlobalWindow(KV.of("a", mean.new CountSum(3, 18))),
-      WindowedValue.valueInGlobalWindow(KV.of("b", mean.new CountSum(3, 11))),
-      WindowedValue.valueInGlobalWindow(KV.of("c", mean.new CountSum(4, 26)))
-    };
-    assertArrayEquals(expectedReceivedElems, receiver.receivedElems.toArray());
-  }
-
-  @Test
-  public void testCombineValuesFnMerge() throws Exception {
-    TestReceiver receiver = new TestReceiver();
-    MeanInts mean = new MeanInts();
-
-    Combine.KeyedCombineFn<String, Integer,
-        MeanInts.CountSum, String> combiner = mean.asKeyedFn();
-
-    ParDoFn combineParDoFn = createCombineValuesFn(
-        CombineValuesFn.CombinePhase.MERGE, combiner, new CountSumCoder());
-
-    combineParDoFn.startBundle(receiver);
-    combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
-        KV.of("a",
-            Arrays.asList(
-                mean.new CountSum(3, 6),
-                mean.new CountSum(2, 9),
-                mean.new CountSum(1, 12)))));
-    combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
-        KV.of("b",
-            Arrays.asList(
-                mean.new CountSum(2, 20),
-                mean.new CountSum(1, 1)))));
-    combineParDoFn.finishBundle();
-
-    Object[] expectedReceivedElems = {
-      WindowedValue.valueInGlobalWindow(KV.of("a", mean.new CountSum(6, 27))),
-      WindowedValue.valueInGlobalWindow(KV.of("b", mean.new CountSum(3, 21))),
-    };
-    assertArrayEquals(expectedReceivedElems, receiver.receivedElems.toArray());
-  }
-
-  @Test
-  public void testCombineValuesFnExtract() throws Exception {
-    TestReceiver receiver = new TestReceiver();
-    MeanInts mean = new MeanInts();
-
-    Combine.KeyedCombineFn<String, Integer,
-        MeanInts.CountSum, String> combiner = mean.asKeyedFn();
-
-    ParDoFn combineParDoFn = createCombineValuesFn(
-        CombineValuesFn.CombinePhase.EXTRACT, combiner, new CountSumCoder());
-
-    combineParDoFn.startBundle(receiver);
-    combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
-        KV.of("a", mean.new CountSum(6, 27))));
-    combineParDoFn.processElement(WindowedValue.valueInGlobalWindow(
-        KV.of("b", mean.new CountSum(3, 21))));
-    combineParDoFn.finishBundle();
-
-    assertArrayEquals(
-        new Object[]{ WindowedValue.valueInGlobalWindow(KV.of("a", String.format("%.1f", 4.5))),
-                      WindowedValue.valueInGlobalWindow(KV.of("b", String.format("%.1f", 7.0))) },
-        receiver.receivedElems.toArray());
-  }
-
-  @Test
-  public void testCombineValuesFnCoders() throws Exception {
-    CoderRegistry registry = new CoderRegistry();
-    registry.registerStandardCoders();
-
-    MeanInts meanInts = new MeanInts();
-    MeanInts.CountSum countSum = meanInts.new CountSum(6, 27);
-
-    Coder<MeanInts.CountSum> coder = meanInts.getAccumulatorCoder(
-        registry, registry.getDefaultCoder(TypeDescriptor.of(Integer.class)));
-
-    assertEquals(
-        countSum,
-        CoderUtils.decodeFromByteArray(coder,
-            CoderUtils.encodeToByteArray(coder, countSum)));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
deleted file mode 100644
index dce6e9f15f1dd..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactoryTest.java
+++ /dev/null
@@ -1,124 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils.readAllFromReader;
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
-import static com.google.cloud.dataflow.sdk.util.Structs.addList;
-import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
-import static com.google.cloud.dataflow.sdk.util.Structs.addStringList;
-import static org.hamcrest.Matchers.containsInAnyOrder;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertThat;
-
-import com.google.api.services.dataflow.model.Source;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Test for {@code ConcatReaderFactory}.
- */
-@RunWith(JUnit4.class)
-public class ConcatReaderFactoryTest {
-
-  Source createSourcesWithInMemorySources(List<List<String>> allData) {
-    List<Map<String, Object>> sourcesList = new ArrayList<>();
-    Source source = new Source();
-
-    for (List<String> data : allData) {
-      CloudObject inMemorySourceSpec = CloudObject.forClassName("InMemorySource");
-
-      Map<String, Object> inMemorySourceDictionary = new HashMap<>();
-      addStringList(inMemorySourceSpec, PropertyNames.ELEMENTS, data);
-      addLong(inMemorySourceSpec, PropertyNames.START_INDEX, 0L);
-      addLong(inMemorySourceSpec, PropertyNames.END_INDEX, data.size());
-
-      inMemorySourceDictionary.put(PropertyNames.SOURCE_SPEC, inMemorySourceSpec);
-
-      CloudObject textSourceEncoding = makeCloudEncoding("StringUtf8Coder");
-      inMemorySourceDictionary.put(PropertyNames.ENCODING, textSourceEncoding);
-
-      sourcesList.add(inMemorySourceDictionary);
-    }
-    CloudObject spec = CloudObject.forClassName("ConcatSource");
-    addList(spec, PropertyNames.CONCAT_SOURCE_SOURCES, sourcesList);
-
-    source.setSpec(spec);
-    return source;
-  }
-
-  private List<List<String>> createInMemorySourceData(int numSources, int dataPerSource) {
-    List<List<String>> allData = new ArrayList<>();
-    for (int i = 0; i < numSources; i++) {
-      List<String> data = new ArrayList<>();
-      for (int j = 0; j < dataPerSource; j++) {
-        data.add("data j of source i");
-      }
-      allData.add(data);
-    }
-    return allData;
-  }
-
-  @Test
-  public void testCreateConcatReaderWithOneSubSource() throws Exception {
-    List<List<String>> allData = createInMemorySourceData(1, 10);
-
-    Source source = createSourcesWithInMemorySources(allData);
-
-    @SuppressWarnings("unchecked")
-    NativeReader<String> reader =
-        (NativeReader<String>)
-            ReaderRegistry.defaultRegistry().create(source, null, null, null, null);
-    assertNotNull(reader);
-
-    List<String> expected = new ArrayList<>();
-    for (List<String> data : allData) {
-      expected.addAll(data);
-    }
-    assertThat(readAllFromReader(reader), containsInAnyOrder(expected.toArray()));
-  }
-
-  @Test
-  public void testCreateConcatReaderWithManySubSources() throws Exception {
-    List<List<String>> allData = createInMemorySourceData(15, 10);
-
-    Source source = createSourcesWithInMemorySources(allData);
-
-    @SuppressWarnings("unchecked")
-    NativeReader<String> reader =
-        (NativeReader<String>)
-            ReaderRegistry.defaultRegistry().create(source, null, null, null, null);
-    assertNotNull(reader);
-
-    List<String> expected = new ArrayList<>();
-    for (List<String> data : allData) {
-      expected.addAll(data);
-    }
-
-    assertThat(readAllFromReader(reader), containsInAnyOrder(expected.toArray()));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
deleted file mode 100644
index c38611ba4bacd..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderTest.java
+++ /dev/null
@@ -1,471 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromSplitResult;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtConcatPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils.readAllFromReader;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
-import static org.hamcrest.Matchers.containsInAnyOrder;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.fail;
-
-import com.google.api.services.dataflow.model.ApproximateReportedProgress;
-import com.google.api.services.dataflow.model.Source;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader.DynamicSplitResult;
-
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-import javax.annotation.Nullable;
-
-/**
- * Tests for {@code ConcatReader}.
- */
-@RunWith(JUnit4.class)
-public class ConcatReaderTest {
-  private static final String READER_OBJECT = "reader_object";
-
-  @Rule
-  public ExpectedException expectedException = ExpectedException.none();
-
-  private List<TestReader<?>> recordedReaders = new ArrayList<>();
-
-  private ReaderRegistry registry;
-
-  @Before
-  public void setUp() {
-    recordedReaders.clear();
-    registry = ReaderRegistry.defaultRegistry()
-        .register(TestReader.class.getName(), new TestReaderFactory());
-  }
-
-  /**
-   * A {@link NativeReader} used for testing purposes. Delegates functionality to an underlying
-   * {@link InMemoryReader}.
-   */
-  public class TestReader<T> extends NativeReader<T> {
-    private final long recordToFailAt;
-    private final boolean failWhenClosing;
-    private TestIterator<T> lastIterator = null;
-    private final NativeReader<T> readerDelegator;
-
-    /**
-     * Create a TestReader.
-     *
-     * @param encodedElements list of elements read by the {@code Reader}
-     * @param coder {@code Coder} to by used by the underlying {@code Reader}
-     * @param recordToFailAt if non-negative, a {@code TestIterator} will fail throwing an {@code
-     * IOException} when trying to read the element at this index
-     * @param failWhenClosing if {@code true}, a {@code TestIterator} will fail throwing an {@code
-     * IOException} when {@link TestIterator#close()} is invoked
-     */
-    public TestReader(List<String> encodedElements, Coder<T> coder, long recordToFailAt,
-        boolean failWhenClosing) {
-      this.recordToFailAt = recordToFailAt;
-      this.failWhenClosing = failWhenClosing;
-      readerDelegator = new InMemoryReader<>(encodedElements, 0, encodedElements.size(), coder);
-      recordedReaders.add(this);
-    }
-
-    public boolean isClosedOrUnopened() {
-      if (lastIterator != null) {
-        return lastIterator.isClosed;
-      }
-
-      // A reader was not created
-      return true;
-    }
-
-    @Override
-    public NativeReaderIterator<T> iterator() throws IOException {
-      lastIterator = new TestIterator<T>(readerDelegator.iterator());
-      return lastIterator;
-    }
-
-    private class TestIterator<T> extends NativeReaderIterator<T> {
-      private final NativeReaderIterator<T> iteratorImpl;
-      private long currentIndex = -1;
-      private boolean isClosed = false;
-
-      private TestIterator(NativeReaderIterator<T> iteratorImpl) {
-        this.iteratorImpl = iteratorImpl;
-      }
-
-      @Override
-      public boolean start() throws IOException {
-        currentIndex++;
-        if (currentIndex == recordToFailAt) {
-          throw new IOException("Failing at record " + currentIndex);
-        }
-        return iteratorImpl.start();
-      }
-
-      @Override
-      public boolean advance() throws IOException {
-        currentIndex++;
-        if (currentIndex == recordToFailAt) {
-          throw new IOException("Failing at record " + currentIndex);
-        }
-        return iteratorImpl.advance();
-      }
-
-      @Override
-      public T getCurrent() throws NoSuchElementException {
-        return iteratorImpl.getCurrent();
-      }
-
-      @Override
-      public void close() throws IOException {
-        isClosed = true;
-        if (failWhenClosing) {
-          throw new IOException("Failing when closing");
-        }
-        iteratorImpl.close();
-      }
-
-      @Override
-      public Progress getProgress() {
-        return iteratorImpl.getProgress();
-      }
-
-      @Override
-      public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest request) {
-        throw new UnsupportedOperationException();
-      }
-
-      @Override
-      public double getRemainingParallelism() {
-        return Double.NaN;
-      }
-    }
-  }
-
-  private static class TestReaderFactory implements ReaderFactory {
-    @Override
-    public NativeReader<?> create(
-        CloudObject spec,
-        @Nullable Coder<?> coder,
-        @Nullable PipelineOptions options,
-        @Nullable ExecutionContext executionContext,
-        @Nullable CounterSet.AddCounterMutator addCounterMutator,
-        @Nullable String operationName)
-        throws Exception {
-      NativeReader<?> reader = (NativeReader<?>) spec.get(READER_OBJECT);
-      return reader;
-    }
-  }
-
-  private TestReader<String> createTestReader(long recordsPerReader, long recordToFailAt,
-      boolean failWhenClosing, List<String> expectedData) throws Exception {
-    List<String> records = new ArrayList<>();
-    for (int i = 0; i < recordsPerReader; i++) {
-      String record = "Record" + i;
-      records.add(record);
-      if (recordToFailAt < 0 || i < recordToFailAt) {
-        expectedData.add(record);
-      }
-    }
-
-    return new TestReader<String>(records, StringUtf8Coder.of(), recordToFailAt, failWhenClosing);
-  }
-
-  private static void assertAllOpenReadersClosed(List<TestReader<?>> readers) {
-    for (TestReader<?> reader : readers) {
-      if (!reader.isClosedOrUnopened()) {
-        throw new AssertionError("At least one reader was not closed");
-      }
-    }
-  }
-
-  private Source createSourceForTestReader(TestReader<String> testReader) {
-    Source source = new Source();
-    CloudObject specObj = CloudObject.forClass(TestReader.class);
-    specObj.put(READER_OBJECT, testReader);
-    source.setSpec(specObj);
-    return source;
-  }
-
-  private ConcatReader<String> createConcatReadersOfSizes(
-      List<String> expected, int... recordsPerReader) throws Exception {
-    List<Source> sourceList = new ArrayList<>();
-
-    for (int items : recordsPerReader) {
-      sourceList.add(createSourceForTestReader(createTestReader(
-          items /* recordsPerReader */,
-          -1 /* recordToFailAt */,
-          false /* failWhenClosing */,
-          expected)));
-    }
-    return new ConcatReader<>(
-        registry,
-        null /* options */,
-        null /* executionContext */,
-        null /* addCounterMutator */,
-        null /* operationName */,
-        sourceList);
-  }
-
-  private void testReadersOfSizes(int... recordsPerReader) throws Exception {
-    List<String> expected = new ArrayList<>();
-    ConcatReader<String> concatReader = createConcatReadersOfSizes(expected, recordsPerReader);
-    assertThat(readAllFromReader(concatReader), containsInAnyOrder(expected.toArray()));
-    assertEquals(recordedReaders.size(), recordsPerReader.length);
-    assertAllOpenReadersClosed(recordedReaders);
-  }
-
-  @Test
-  public void testCreateFromNull() throws Exception {
-    expectedException.expect(NullPointerException.class);
-    new ConcatReader<String>(
-        registry,
-        null /* options */,
-        null /* executionContext */,
-        null /* addCounterMutator */,
-        null /* operationName */,
-        null /* sources */);
-  }
-
-  @Test
-  public void testReadEmptyList() throws Exception {
-    ConcatReader<String> concat =
-        new ConcatReader<>(
-            registry,
-            null /* options */,
-            null /* executionContext */,
-            null /* addCounterMutator */,
-            null /* operationName */,
-            new ArrayList<Source>());
-    ConcatReader.ConcatIterator<String> iterator = concat.iterator();
-    assertNotNull(iterator);
-    assertFalse(concat.iterator().start());
-
-    expectedException.expect(NoSuchElementException.class);
-    iterator.getCurrent();
-  }
-
-  @Test
-  public void testReadOne() throws Exception {
-    testReadersOfSizes(100);
-  }
-
-  @Test
-  public void testReadMulti() throws Exception {
-    testReadersOfSizes(10, 5, 20, 40);
-  }
-
-  @Test
-  public void testReadFirstReaderEmpty() throws Exception {
-    testReadersOfSizes(0, 5, 20, 40);
-  }
-
-  @Test
-  public void testReadLastReaderEmpty() throws Exception {
-    testReadersOfSizes(10, 5, 20, 0);
-  }
-
-  @Test
-  public void testEmptyReaderBeforeNonEmptyReader() throws Exception {
-    testReadersOfSizes(10, 0, 20, 30);
-  }
-
-  @Test
-  public void testMultipleReadersAreEmpty() throws Exception {
-    testReadersOfSizes(10, 0, 20, 0, 30, 0, 40);
-  }
-
-  @Test
-  public void testAReaderFailsAtClose() throws Exception {
-    List<String> expected = new ArrayList<>();
-    List<Source> sources = Arrays.asList(
-        createSourceForTestReader(createTestReader(10 /* recordsPerReader */,
-            -1 /* recordToFailAt */, false /* failWhenClosing */, expected)),
-        createSourceForTestReader(createTestReader(10 /* recordsPerReader */,
-            -1 /* recordToFailAt */, true /* failWhenClosing */, expected)),
-        createSourceForTestReader(createTestReader(10/* recordsPerReader */, -1/* recordToFailAt */,
-            false/* failWhenClosing */, new ArrayList<String>())));
-
-    ConcatReader<String> concatReader = new ConcatReader<>(
-        registry,
-        null /* options */,
-        null /* executionContext */,
-        null /* addCounterMutator */,
-        null /* operationName */,
-        sources);
-    try {
-      readAllFromReader(concatReader);
-      fail();
-    } catch (IOException e) {
-      assertEquals(3, recordedReaders.size());
-      assertAllOpenReadersClosed(recordedReaders);
-    }
-  }
-
-  @Test
-  public void testReaderFailsAtRead() throws Exception {
-    List<String> expected = new ArrayList<>();
-    List<Source> sources = Arrays.asList(
-        createSourceForTestReader(createTestReader(10 /* recordsPerReader */,
-            -1 /* recordToFailAt */, false /* failWhenClosing */, expected)),
-        createSourceForTestReader(createTestReader(10 /* recordsPerReader */,
-            6 /* recordToFailAt */, false /* failWhenClosing */, expected)),
-        createSourceForTestReader(createTestReader(10 /* recordsPerReader */,
-            -1 /* recordToFailAt */, false /* failWhenClosing */, expected)));
-    expected = expected.subList(0, 16);
-    assertEquals(16, expected.size());
-
-    ConcatReader<String> concatReader = new ConcatReader<>(
-        registry,
-        null  /* options */,
-        null  /* executionContext */,
-        null  /* addCounterMutator */,
-        null  /* operationName */,
-        sources);
-    try {
-      readAllFromReader(concatReader);
-      fail();
-    } catch (IOException e) {
-      assertEquals(3, recordedReaders.size());
-      assertAllOpenReadersClosed(recordedReaders);
-    }
-  }
-
-  private void runProgressTest(int... sizes) throws Exception {
-    ConcatReader<String> concatReader = createConcatReadersOfSizes(new ArrayList<String>(), sizes);
-    try (ConcatReader.ConcatIterator<String> iterator = concatReader.iterator()) {
-      for (int readerIndex = 0; readerIndex < sizes.length; readerIndex++) {
-        for (int recordIndex = 0; recordIndex < sizes[readerIndex]; recordIndex++) {
-          if (readerIndex == 0 && recordIndex == 0) {
-            iterator.start();
-          } else {
-            iterator.advance();
-          }
-          ApproximateReportedProgress progress =
-              readerProgressToCloudProgress(iterator.getProgress());
-          assertEquals(
-              readerIndex, progress.getPosition().getConcatPosition().getIndex().intValue());
-        }
-      }
-    }
-  }
-
-  @Test
-  public void testGetProgressSingle() throws Exception {
-    runProgressTest(10);
-  }
-
-  @Test
-  public void testGetProgressSameSize() throws Exception {
-    runProgressTest(10, 10, 10);
-  }
-
-  @Test
-  public void testGetProgressDifferentSizes() throws Exception {
-    runProgressTest(10, 30, 20, 15, 7);
-  }
-
-  // This is an exhaustive test for method ConcatIterator#splitAtPosition.
-  // Given an array of reader sizes of length 's' this method exhaustively create ConcatReaders that
-  // have read up to every possible position. For each position 'p', this method creates a set of
-  // ConcatReaders of size 's+1' that have read up to position 'p' and tests splitting those
-  // ConcatReaders for index positions in the range [0, s].
-  public void runUpdateStopPositionTest(int... readerSizes) throws Exception {
-    ConcatReader<String> concatReader =
-        createConcatReadersOfSizes(new ArrayList<String>(), readerSizes);
-
-    // This includes indexToSplit == sizes.length case to test out of range split requests.
-    for (int indexToSplit = 0; indexToSplit <= readerSizes.length; indexToSplit++) {
-      int recordsToRead = -1; // Number of records to read from the ConcatReader before splitting.
-      for (int readerIndex = 0; readerIndex < readerSizes.length; readerIndex++) {
-        for (int recordIndex = 0; recordIndex <= readerSizes[readerIndex]; recordIndex++) {
-          if (readerIndex > 0 && recordIndex == 0) {
-            // This is an invalid state as far as ConcatReader is concerned.
-            // When we have not read any records from the reader at 'readerIndex', current reader
-            // should be the reader at 'readerIndex - 1'.
-            continue;
-          }
-
-          recordsToRead++;
-
-          NativeReader.NativeReaderIterator<String> iterator = concatReader.iterator();
-          for (int i = 0; i < recordsToRead; i++) {
-            if (i == 0) {
-              iterator.start();
-            } else {
-              iterator.advance();
-            }
-          }
-
-          DynamicSplitResult splitResult =
-              iterator.requestDynamicSplit(splitRequestAtConcatPosition(indexToSplit, null));
-
-          // We will not be able to successfully perform the request to dynamically split (and hence
-          // splitResult will be null) in following cases.
-          // * recordsToRead == 0 - ConcatReader has not started reading
-          // * readerIndex >= indexToSplit - ConcatReader has already read at least one record from
-          //   reader proposed in the split request.
-          // * indexToSplit < 0 || indexToSplit >= sizes.length - split position is out of range
-
-          if ((recordsToRead == 0) || (readerIndex >= indexToSplit)
-              || (indexToSplit < 0 || indexToSplit >= readerSizes.length)) {
-            assertNull(splitResult);
-          } else {
-            assertEquals(
-                indexToSplit,
-                positionFromSplitResult(splitResult).getConcatPosition().getIndex().intValue());
-          }
-        }
-      }
-    }
-  }
-
-  @Test
-  public void testUpdateStopPositionSingle() throws Exception {
-    runUpdateStopPositionTest(10);
-  }
-
-  @Test
-  public void testUpdateStopPositionSameSize() throws Exception {
-    runUpdateStopPositionTest(10, 10, 10);
-  }
-
-  @Test
-  public void testUpdateStopPositionDifferentSizes() throws Exception {
-    runUpdateStopPositionTest(10, 30, 20, 15, 7);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannelTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannelTest.java
deleted file mode 100644
index 0091d7710d23e..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannelTest.java
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.api.client.util.Preconditions.checkArgument;
-import static org.hamcrest.Matchers.equalTo;
-import static org.junit.Assert.assertThat;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.channels.ClosedChannelException;
-import java.nio.channels.SeekableByteChannel;
-
-/** Unit tests for {@link CopyableSeekableByteChannel}. */
-@RunWith(JUnit4.class)
-public final class CopyableSeekableByteChannelTest {
-  @Test
-  public void copiedChannelShouldMaintainIndependentPosition()
-      throws IOException {
-    ByteBuffer dst = ByteBuffer.allocate(6);
-    SeekableByteChannel base =
-        new FakeSeekableByteChannel("Hello, world! :-)".getBytes());
-    base.position(1);
-
-    try (CopyableSeekableByteChannel chan = new CopyableSeekableByteChannel(base)) {
-      assertThat(chan.position(), equalTo((long) 1));
-
-      CopyableSeekableByteChannel copy = chan.copy();
-      assertThat(copy.position(), equalTo((long) 1));
-
-      assertThat(chan.read(dst), equalTo(6));
-      assertThat(chan.position(), equalTo((long) 7));
-      assertThat(new String(dst.array()), equalTo("ello, "));
-      dst.rewind();
-
-      assertThat(copy.position(), equalTo((long) 1));
-      copy.position(3);
-      assertThat(copy.read(dst), equalTo(6));
-      assertThat(copy.position(), equalTo((long) 9));
-      assertThat(new String(dst.array()), equalTo("lo, wo"));
-      dst.rewind();
-
-      assertThat(chan.read(dst), equalTo(6));
-      assertThat(chan.position(), equalTo((long) 13));
-      assertThat(new String(dst.array()), equalTo("world!"));
-      dst.rewind();
-
-      assertThat(chan.read(dst), equalTo(4));
-      assertThat(chan.position(), equalTo((long) 17));
-      assertThat(new String(dst.array()), equalTo(" :-)d!"));
-      dst.rewind();
-
-      assertThat(copy.position(), equalTo((long) 9));
-      assertThat(copy.read(dst), equalTo(6));
-      assertThat(new String(dst.array()), equalTo("rld! :"));
-    }
-  }
-
-  private static final class FakeSeekableByteChannel
-      implements SeekableByteChannel {
-    private boolean closed = false;
-    private ByteBuffer data;
-
-    public FakeSeekableByteChannel(byte[] data) {
-      this.data = ByteBuffer.wrap(data);
-    }
-
-    @Override
-    public long position() throws IOException {
-      checkClosed();
-      return data.position();
-    }
-
-    @Override
-    public SeekableByteChannel position(long newPosition) throws IOException {
-      checkArgument(newPosition >= 0);
-      checkClosed();
-      data.position((int) newPosition);
-      return this;
-    }
-
-    @Override
-    public int read(ByteBuffer dst) throws IOException {
-      checkClosed();
-      if (!data.hasRemaining()) {
-        return -1;
-      }
-      int count = Math.min(data.remaining(), dst.remaining());
-      ByteBuffer src = data.slice();
-      src.limit(count);
-      dst.put(src);
-      data.position(data.position() + count);
-      return count;
-    }
-
-    @Override
-    public long size() throws IOException {
-      checkClosed();
-      return data.limit();
-    }
-
-    @Override
-    public SeekableByteChannel truncate(long size) throws IOException {
-      checkClosed();
-      data.limit((int) size);
-      return this;
-    }
-
-    @Override
-    public int write(ByteBuffer src) throws IOException {
-      checkClosed();
-      int count = Math.min(data.remaining(), src.remaining());
-      ByteBuffer copySrc = src.slice();
-      copySrc.limit(count);
-      data.put(copySrc);
-      return count;
-    }
-
-    @Override
-    public boolean isOpen() {
-      return !closed;
-    }
-
-    @Override
-    public void close() {
-      closed = true;
-    }
-
-    private void checkClosed() throws ClosedChannelException {
-      if (closed) {
-        throw new ClosedChannelException();
-      }
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
deleted file mode 100644
index 90c55273a528b..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReaderTest.java
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.hamcrest.Matchers.equalTo;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-
-import com.google.api.services.dataflow.model.SideInputInfo;
-import com.google.api.services.dataflow.model.Source;
-import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.WeightedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-
-import org.joda.time.Instant;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * Tests for {@link DataflowSideInputReader}.
- */
-@RunWith(JUnit4.class)
-public class DataflowSideInputReaderTest {
-
-  private static final Coder<Long> LONG_CODER = BigEndianLongCoder.of();
-  private static final TupleTag<Iterable<WindowedValue<Long>>> DEFAULT_TAG = new TupleTag<>();
-  private static final PCollectionView<Long> DEFAULT_LENGTH_VIEW =
-      PCollectionViewTesting.<Long, Long>testingView(
-          DEFAULT_TAG, new PCollectionViewTesting.LengthViewFn<Long>(), LONG_CODER);
-
-  private static final List<Long> DEFAULT_SOURCE_CONTENTS =
-      ImmutableList.of(1L, -43255L, 0L, 13L, 1975858L);
-
-  private static final long DEFAULT_SOURCE_LENGTH = DEFAULT_SOURCE_CONTENTS.size();
-  private static final IntervalWindow OTHER_WINDOW =
-      new IntervalWindow(new Instant(50000L), new Instant(60000L));
-  private PipelineOptions options = PipelineOptionsFactory.create();
-  private static ExecutionContext executionContext;
-  private SideInputInfo defaultSideInputInfo;
-  private DataflowSideInputReader defaultSideInputReader;
-
-  /**
-   * Creates a {@link Source} descriptor for reading the provided contents as a side input.
-   *
-   * <p>If the {@code PCollectionView} has an incompatible {@code Coder} or
-   * {@code WindowingStrategy}, then results are unpredictable.
-   */
-  private final <T> Source sourceInMultipleWindows(PCollectionView<T> view, Iterable<T> values)
-      throws Exception {
-    List<WindowedValue<T>> windowedValues = ImmutableList.<WindowedValue<T>>builder()
-        .addAll(PCollectionViewTesting.contentsInDefaultWindow(values))
-        // We add the values twice within the other window so there are a different number
-        // then in the default window.
-        .addAll(contentsInWindow(values, OTHER_WINDOW))
-        .addAll(contentsInWindow(values, OTHER_WINDOW))
-        .build();
-
-    @SuppressWarnings({"unchecked", "rawtypes"})
-    List<Coder<?>> componentCoders = (List) view.getCoderInternal().getCoderArguments();
-    if (componentCoders == null || componentCoders.size() != 1) {
-      throw new Exception("Could not extract element Coder from " + view.getCoderInternal());
-    }
-    @SuppressWarnings("unchecked")
-    Coder<WindowedValue<T>> elemCoder = (Coder<WindowedValue<T>>) componentCoders.get(0);
-
-    return InMemoryReaderFactoryTest.createInMemoryCloudSource(
-        windowedValues, null, null, elemCoder);
-  }
-
-  /**
-   * The size, in bytes, of a {@code long} placed in
-   * {@link PCollectionViewTesting#DEFAULT_NONEMPTY_WINDOW}. This is the size of each of the
-   * elements of each {@code PCollection} created in the following tests.
-   *
-   * <p>This value is arbitrary from the point of view of these tests.
-   * The correctness of {@link DataflowSideInputReader} does not depend on this value,
-   * but depends on the fact that the reported sizes are this value times the number
-   * of elements in a collection.
-   */
-  private long windowedLongBytes() throws Exception {
-    long arbitraryLong = 42L;
-    return CoderUtils.encodeToByteArray(
-        PCollectionViewTesting.defaultWindowedValueCoder(LONG_CODER),
-        PCollectionViewTesting.valueInDefaultWindow(arbitraryLong)).length;
-  }
-
-  /**
-   * Prepares {@code values} for reading as the contents of a {@link PCollectionView} side input.
-   */
-  private static <T> Iterable<WindowedValue<T>> contentsInWindow(Iterable<T> values,
-      BoundedWindow window) throws Exception {
-    List<WindowedValue<T>> windowedValues = Lists.newArrayList();
-    for (T value : values) {
-      windowedValues.add(
-          WindowedValue.of(value, window.maxTimestamp().minus(1), window, PaneInfo.NO_FIRING));
-    }
-    return windowedValues;
-  }
-
-  @Before
-  public void setUp() throws Exception {
-    options = PipelineOptionsFactory.create();
-    executionContext = BatchModeExecutionContext.fromOptions(options);
-
-    defaultSideInputInfo = SideInputUtils.createCollectionSideInputInfo(
-        sourceInMultipleWindows(DEFAULT_LENGTH_VIEW, DEFAULT_SOURCE_CONTENTS));
-    defaultSideInputInfo.setTag(DEFAULT_LENGTH_VIEW.getTagInternal().getId());
-
-    defaultSideInputReader = DataflowSideInputReader.of(
-        Collections.singletonList(defaultSideInputInfo), options, executionContext);
-  }
-
-  /**
-   * Tests that when a {@link PCollectionView} is actually available in a
-   * {@link DataflowSideInputReader}, it does not claim to be empty.
-   */
-  @Test
-  public void testDataflowSideInputReaderNotEmpty() throws Exception {
-    assertFalse(defaultSideInputReader.isEmpty());
-  }
-
-  /**
-   * Tests that when a {@link PCollectionView} is actually available in a
-   * {@link DataflowSideInputReader}, the read succeeds and has the right size.
-   */
-  @Test
-  public void testDataflowSideInputReaderFilteredRead() throws Exception {
-    assertTrue(defaultSideInputReader.contains(DEFAULT_LENGTH_VIEW));
-    WeightedValue<Long> sizedValue = defaultSideInputReader.getWeighted(
-        DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
-    assertThat(sizedValue.getValue(), equalTo(DEFAULT_SOURCE_LENGTH));
-    assertThat(sizedValue.getWeight(), equalTo(DEFAULT_SOURCE_LENGTH * windowedLongBytes()));
-  }
-
-  /**
-   * Tests that when a {@link PCollectionView} is actually available in a
-   * {@link DataflowSideInputReader}, repeated reads yield the same value with the same size.
-   */
-  @Test
-  public void testDataflowSideInputReaderRepeatedRead() throws Exception {
-    DataflowSideInputReader sideInputReader = DataflowSideInputReader.of(
-        Collections.singletonList(defaultSideInputInfo), options, executionContext);
-
-    WeightedValue<Long> firstRead = sideInputReader.getWeighted(
-        DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
-
-    // A repeated read should yield the same size.
-    WeightedValue<Long> repeatedRead = sideInputReader.getWeighted(
-        DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_NONEMPTY_WINDOW);
-
-    assertThat(repeatedRead.getValue(), equalTo(firstRead.getValue()));
-    assertThat(repeatedRead.getWeight(), equalTo(firstRead.getWeight()));
-
-  }
-
-  @Test
-  public void testDataflowSideInputReaderMiss() throws Exception {
-    DataflowSideInputReader sideInputReader = DataflowSideInputReader.of(
-        Collections.singletonList(defaultSideInputInfo), options, executionContext);
-
-    // Reading an empty window yields the size of 0 elements.
-    WeightedValue<Long> emptyWindowValue = sideInputReader.getWeighted(
-        DEFAULT_LENGTH_VIEW, PCollectionViewTesting.DEFAULT_EMPTY_WINDOW);
-    assertThat(emptyWindowValue.getValue(), equalTo(0L));
-    assertThat(emptyWindowValue.getWeight(), equalTo(0L));
-  }
-
-  /**
-   * Tests that when a {@link PCollectionView} is not available in a
-   * {@link DataflowSideInputReader}, it is reflected properly.
-   */
-  @Test
-  public void testDataflowSideInputReaderBadRead() throws Exception {
-    SideInputInfo sideInputInfo = SideInputUtils.createCollectionSideInputInfo(
-        sourceInMultipleWindows(DEFAULT_LENGTH_VIEW, DEFAULT_SOURCE_CONTENTS));
-    sideInputInfo.setTag("not the same tag at all");
-
-    DataflowSideInputReader sideInputReader = DataflowSideInputReader.of(
-        Arrays.asList(sideInputInfo), options, executionContext);
-
-    assertFalse(sideInputReader.contains(DEFAULT_LENGTH_VIEW));
-  }
-
-  /**
-   * Tests that when a {@link PCollectionView} is not available in a
-   * {@link DataflowSideInputReader}, it is reflected properly.
-   */
-  @Test
-  public void testDataflowSideInputEmpty() throws Exception {
-    DataflowSideInputReader sideInputReader = DataflowSideInputReader.of(
-        Collections.<SideInputInfo>emptyList(), options, executionContext);
-    assertTrue(sideInputReader.isEmpty());
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java
deleted file mode 100644
index c4d031b64e0c2..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DatastoreReaderTest.java
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.api.services.datastore.client.DatastoreHelper.makeProperty;
-import static com.google.api.services.datastore.client.DatastoreHelper.makeValue;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Matchers.argThat;
-import static org.mockito.Mockito.doReturn;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-import com.google.api.services.datastore.DatastoreV1.Entity;
-import com.google.api.services.datastore.DatastoreV1.EntityResult;
-import com.google.api.services.datastore.DatastoreV1.EntityResult.ResultType;
-import com.google.api.services.datastore.DatastoreV1.Query;
-import com.google.api.services.datastore.DatastoreV1.QueryResultBatch;
-import com.google.api.services.datastore.DatastoreV1.QueryResultBatch.MoreResultsType;
-import com.google.api.services.datastore.DatastoreV1.RunQueryRequest;
-import com.google.api.services.datastore.DatastoreV1.RunQueryResponse;
-import com.google.api.services.datastore.client.Datastore;
-import com.google.api.services.datastore.client.DatastoreException;
-import com.google.cloud.dataflow.sdk.io.DatastoreIO;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.ArgumentMatcher;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Objects;
-
-/**
- * Unit tests for {@code DatastoreSource}.
- */
-@RunWith(JUnit4.class)
-public class DatastoreReaderTest {
-  private static final String TEST_KIND = "mykind";
-  private static final String TEST_PROPERTY = "myproperty";
-  private static final String TEST_NAMESPACE = "mynamespace";
-
-  private static class IsValidRequestWithNamespace extends ArgumentMatcher<RunQueryRequest> {
-    private final String namespace;
-
-    public IsValidRequestWithNamespace(String namespace) {
-      this.namespace = namespace;
-    }
-    @Override
-    public boolean matches(Object o) {
-      RunQueryRequest request = (RunQueryRequest) o;
-      return request.hasQuery()
-          && Objects.equals(request.getPartitionId().getNamespace(), namespace);
-    }
-  }
-
-  private EntityResult createEntityResult(String val) {
-    Entity entity =
-        Entity.newBuilder().addProperty(makeProperty(TEST_PROPERTY, makeValue(val))).build();
-    return EntityResult.newBuilder().setEntity(entity).build();
-  }
-
-  private Datastore buildMockDatastore() throws DatastoreException {
-    Datastore datastore = mock(Datastore.class);
-    RunQueryResponse.Builder firstResponseBuilder = RunQueryResponse.newBuilder();
-    RunQueryResponse.Builder secondResponseBuilder = RunQueryResponse.newBuilder();
-    RunQueryResponse.Builder thirdResponseBuilder = RunQueryResponse.newBuilder();
-    RunQueryResponse.Builder firstNamespaceResponseBuilder = RunQueryResponse.newBuilder();
-    {
-      QueryResultBatch.Builder resultsBatch = QueryResultBatch.newBuilder();
-      resultsBatch.addEntityResult(0, createEntityResult("val0"));
-      resultsBatch.addEntityResult(1, createEntityResult("val1"));
-      resultsBatch.addEntityResult(2, createEntityResult("val2"));
-      resultsBatch.addEntityResult(3, createEntityResult("val3"));
-      resultsBatch.addEntityResult(4, createEntityResult("val4"));
-      resultsBatch.setEntityResultType(ResultType.FULL);
-
-      resultsBatch.setMoreResults(MoreResultsType.NOT_FINISHED);
-
-      firstResponseBuilder.setBatch(resultsBatch.build());
-    }
-    {
-      QueryResultBatch.Builder resultsBatch = QueryResultBatch.newBuilder();
-      resultsBatch.addEntityResult(0, createEntityResult("val5"));
-      resultsBatch.addEntityResult(1, createEntityResult("val6"));
-      resultsBatch.addEntityResult(2, createEntityResult("val7"));
-      resultsBatch.addEntityResult(3, createEntityResult("val8"));
-      resultsBatch.addEntityResult(4, createEntityResult("val9"));
-      resultsBatch.setEntityResultType(ResultType.FULL);
-
-      resultsBatch.setMoreResults(MoreResultsType.NOT_FINISHED);
-
-      secondResponseBuilder.setBatch(resultsBatch.build());
-    }
-    {
-      QueryResultBatch.Builder resultsBatch = QueryResultBatch.newBuilder();
-      resultsBatch.setEntityResultType(ResultType.FULL);
-
-      resultsBatch.setMoreResults(MoreResultsType.NO_MORE_RESULTS);
-
-      thirdResponseBuilder.setBatch(resultsBatch.build());
-    }
-    {
-      QueryResultBatch.Builder resultsBatch = QueryResultBatch.newBuilder();
-      resultsBatch.addEntityResult(0, createEntityResult("nsval0"));
-      resultsBatch.addEntityResult(1, createEntityResult("nsval1"));
-      resultsBatch.addEntityResult(2, createEntityResult("nsval2"));
-      resultsBatch.addEntityResult(3, createEntityResult("nsval3"));
-      resultsBatch.addEntityResult(4, createEntityResult("nsval4"));
-      resultsBatch.setEntityResultType(ResultType.FULL);
-
-      resultsBatch.setMoreResults(MoreResultsType.NO_MORE_RESULTS);
-
-      firstNamespaceResponseBuilder.setBatch(resultsBatch.build());
-    }
-    // Without namespace
-    when(datastore.runQuery(argThat(new IsValidRequestWithNamespace(""))))
-        .thenReturn(firstResponseBuilder.build())
-        .thenReturn(secondResponseBuilder.build())
-        .thenReturn(thirdResponseBuilder.build());
-
-    // With namespace
-    doReturn(firstNamespaceResponseBuilder.build()).when(datastore)
-    .runQuery(argThat(new IsValidRequestWithNamespace(TEST_NAMESPACE)));
-
-    return datastore;
-  }
-
-
-  @Test
-  public void testRead() throws Exception {
-    Datastore datastore = buildMockDatastore();
-
-    Query.Builder q = Query.newBuilder();
-    q.addKindBuilder().setName(TEST_KIND);
-    Query query = q.build();
-
-    List<Entity> entityResults = new ArrayList<Entity>();
-
-    try (DatastoreIO.DatastoreReader iterator =
-            new DatastoreIO.DatastoreReader(DatastoreIO.source().withQuery(query), datastore)) {
-      while (iterator.advance()) {
-        entityResults.add(iterator.getCurrent());
-      }
-    }
-
-    assertEquals(10, entityResults.size());
-    for (int i = 0; i < 10; i++) {
-      assertNotNull(entityResults.get(i).getPropertyList());
-      assertEquals(entityResults.get(i).getPropertyList().size(), 1);
-      assertTrue(entityResults.get(i).getPropertyList().get(0).hasValue());
-      assertTrue(entityResults.get(i).getPropertyList().get(0).getValue().hasStringValue());
-      assertEquals(
-          entityResults.get(i).getPropertyList().get(0).getValue().getStringValue(), "val" + i);
-    }
-  }
-
-  @Test
-  public void testReadWithNamespace() throws Exception {
-    Datastore datastore = buildMockDatastore();
-
-    Query.Builder q = Query.newBuilder();
-    q.addKindBuilder().setName(TEST_KIND);
-    Query query = q.build();
-
-    List<Entity> entityResults = new ArrayList<Entity>();
-
-    try (DatastoreIO.DatastoreReader iterator = new DatastoreIO.DatastoreReader(
-        DatastoreIO.source().withQuery(query).withNamespace(TEST_NAMESPACE), datastore)) {
-      while (iterator.advance()) {
-        entityResults.add(iterator.getCurrent());
-      }
-    }
-
-    assertEquals(5, entityResults.size());
-    for (int i = 0; i < 5; i++) {
-      assertNotNull(entityResults.get(i).getPropertyList());
-      assertEquals(entityResults.get(i).getPropertyList().size(), 1);
-      assertTrue(entityResults.get(i).getPropertyList().get(0).hasValue());
-      assertTrue(entityResults.get(i).getPropertyList().get(0).getValue().hasStringValue());
-      assertEquals(
-          entityResults.get(i).getPropertyList().get(0).getValue().getStringValue(), "nsval" + i);
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DefaultParDoFnFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DefaultParDoFnFactoryTest.java
deleted file mode 100644
index 8be86f71f32da..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/DefaultParDoFnFactoryTest.java
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-import static org.hamcrest.Matchers.instanceOf;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.fail;
-
-import com.google.api.services.dataflow.model.MultiOutputInfo;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.util.StringUtils;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-
-import org.hamcrest.Matchers;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.Collections;
-import java.util.List;
-
-/**
- * Tests for {@link DefaultParDoFnFactory}.
- */
-@RunWith(JUnit4.class)
-public class DefaultParDoFnFactoryTest {
-
-  private static class TestDoFn extends DoFn<Integer, String> {
-    final String stringField;
-    final long longField;
-
-    TestDoFn(String stringValue, long longValue) {
-      this.stringField = stringValue;
-      this.longField = longValue;
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      throw new RuntimeException("not expecting to call this");
-    }
-  }
-
-  // Miscellaneous default values required by the ParDoFnFactory interface
-  private static final ParDoFnFactory DEFAULT_FACTORY = new DefaultParDoFnFactory();
-  private static final PipelineOptions DEFAULT_OPTIONS = PipelineOptionsFactory.create();
-  private static final DataflowExecutionContext DEFAULT_EXECUTION_CONTEXT =
-      BatchModeExecutionContext.fromOptions(DEFAULT_OPTIONS);
-  private static final CounterSet EMPTY_COUNTER_SET = new CounterSet();
-  private static final StateSampler EMPTY_STATE_SAMPLER =
-      new StateSampler("test", EMPTY_COUNTER_SET.getAddCounterMutator());
-
-  private List<MultiOutputInfo> dummySingleOutputInfo;
-
-  @Before
-  public void setUp() throws Exception {
-    String tag = "output";
-    MultiOutputInfo multiOutputInfo = new MultiOutputInfo();
-    multiOutputInfo.setTag(tag);
-    dummySingleOutputInfo = Collections.singletonList(multiOutputInfo);
-  }
-
-  /**
-   * Tests that a "normal" {@link DoFn} is correctly dispatched to {@link NormalParDoFn} and
-   * instantiated correctly.
-   */
-  @Test
-  public void testCreateNormalParDoFn() throws Exception {
-    // A serialized DoFn
-    String stringFieldValue = "some state";
-    long longFieldValue = 42L;
-    TestDoFn fn = new TestDoFn(stringFieldValue, longFieldValue);
-    String serializedFn =
-        StringUtils.byteArrayToJsonString(
-            SerializableUtils.serializeToByteArray(
-                new DoFnInfo<>(fn, WindowingStrategy.globalDefault())));
-    CloudObject cloudUserFn = CloudObject.forClassName("DoFn");
-    addString(cloudUserFn, "serialized_fn", serializedFn);
-
-    // Create the ParDoFn from the serialized DoFn
-    ParDoFn parDoFn = DEFAULT_FACTORY.create(
-        DEFAULT_OPTIONS,
-        cloudUserFn,
-        "name",
-        "transformName",
-        null,
-        dummySingleOutputInfo,
-        1,
-        DEFAULT_EXECUTION_CONTEXT,
-        EMPTY_COUNTER_SET.getAddCounterMutator(),
-        EMPTY_STATE_SAMPLER);
-
-    // Test that the factory created the correct class
-    assertThat(parDoFn, instanceOf(NormalParDoFn.class));
-
-    // TODO: move the asserts below into new tests in NormalParDoFnTest, and this test should
-    // simply assert that DefaultParDoFnFactory.create() matches NormalParDoFn.Factory.create()
-
-    // Test that the DoFnInfo reflects the one passed in
-    NormalParDoFn normalParDoFn = (NormalParDoFn) parDoFn;
-    @SuppressWarnings("rawtypes")
-    DoFnInfo doFnInfo = normalParDoFn.getDoFnInfo();
-    @SuppressWarnings("rawtypes")
-    DoFn actualDoFn = doFnInfo.getDoFn();
-    assertThat(actualDoFn, instanceOf(TestDoFn.class));
-    assertThat(
-        doFnInfo.getWindowingStrategy().getWindowFn(),
-        instanceOf(GlobalWindows.class));
-    assertThat(
-        doFnInfo.getWindowingStrategy().getTrigger().getSpec(),
-        instanceOf(DefaultTrigger.class));
-
-    // Test that the deserialized user DoFn is as expected
-    TestDoFn actualTestDoFn = (TestDoFn) actualDoFn;
-    assertEquals(stringFieldValue, actualTestDoFn.stringField);
-    assertEquals(longFieldValue, actualTestDoFn.longField);
-    assertEquals(DEFAULT_EXECUTION_CONTEXT, normalParDoFn.getExecutionContext());
-  }
-
-  @Test
-  public void testCreateUnknownParDoFn() throws Exception {
-    // A bogus serialized DoFn
-    CloudObject cloudUserFn = CloudObject.forClassName("UnknownKindOfDoFn");
-    try {
-      DEFAULT_FACTORY.create(
-          DEFAULT_OPTIONS,
-          cloudUserFn,
-          "name",
-          "transformName",
-          null,
-          null,
-          1,
-          DEFAULT_EXECUTION_CONTEXT,
-          EMPTY_COUNTER_SET.getAddCounterMutator(),
-          EMPTY_STATE_SAMPLER);
-      fail("should have thrown an exception");
-    } catch (Exception exn) {
-      assertThat(
-          exn.toString(),
-          Matchers.containsString("No known ParDoFnFactory"));
-    }
-  }
-
-  // TODO: Test side inputs.
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java
deleted file mode 100644
index 1b609f044e2db..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/FakeWindmillServer.java
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.testing.SystemNanoTimeSleeper.sleepMillis;
-import static org.junit.Assert.assertFalse;
-
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.CommitWorkResponse;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.ComputationCommitWorkRequest;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.GetDataResponse;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.GetWorkResponse;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.WorkItemCommitRequest;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
-
-import java.util.Map;
-import java.util.Queue;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.atomic.AtomicInteger;
-
-/**
- * An in-memory Windmill server that offers provided work and data.
- */
-class FakeWindmillServer extends WindmillServerStub {
-  private Queue<Windmill.GetWorkResponse> workToOffer;
-  private Queue<Windmill.GetDataResponse> dataToOffer;
-  private Map<Long, WorkItemCommitRequest> commitsReceived;
-  private LinkedBlockingQueue<Windmill.Exception> exceptions;
-  private int commitsRequested = 0;
-  private AtomicInteger expectedExceptionCount;
-  public FakeWindmillServer() {
-    workToOffer = new ConcurrentLinkedQueue<GetWorkResponse>();
-    dataToOffer = new ConcurrentLinkedQueue<GetDataResponse>();
-    commitsReceived = new ConcurrentHashMap<Long, WorkItemCommitRequest>();
-    exceptions = new LinkedBlockingQueue<>();
-    expectedExceptionCount = new AtomicInteger();
-  }
-
-  public void addWorkToOffer(Windmill.GetWorkResponse work) {
-    workToOffer.add(work);
-  }
-
-  public void addDataToOffer(Windmill.GetDataResponse data) {
-    dataToOffer.add(data);
-  }
-
-  @Override
-  public Windmill.GetWorkResponse getWork(Windmill.GetWorkRequest request) {
-    Windmill.GetWorkResponse response = workToOffer.poll();
-    if (response == null) {
-      return Windmill.GetWorkResponse.newBuilder().build();
-    }
-    return response;
-  }
-
-  @Override
-  public Windmill.GetDataResponse getData(Windmill.GetDataRequest request) {
-    Windmill.GetDataResponse response = dataToOffer.poll();
-    if (response == null) {
-      response = Windmill.GetDataResponse.newBuilder().build();
-    } else {
-      try {
-        // Sleep for a little bit to ensure that *-windmill-read state-sampled counters show up.
-        sleepMillis(500);
-      } catch (InterruptedException e) {}
-    }
-    return response;
-  }
-
-  @Override
-  public CommitWorkResponse commitWork(Windmill.CommitWorkRequest request) {
-    for (ComputationCommitWorkRequest computationRequest : request.getRequestsList()) {
-      for (WorkItemCommitRequest commit : computationRequest.getRequestsList()) {
-        commitsReceived.put(commit.getWorkToken(), commit);
-      }
-    }
-    return CommitWorkResponse.newBuilder().build();
-  }
-
-  @Override
-  public Windmill.GetConfigResponse getConfig(Windmill.GetConfigRequest request) {
-    return Windmill.GetConfigResponse.newBuilder().build();
-  }
-
-  @Override
-  public Windmill.ReportStatsResponse reportStats(Windmill.ReportStatsRequest request) {
-    for (Windmill.Exception exception : request.getExceptionsList()) {
-      try {
-        exceptions.put(exception);
-      } catch (InterruptedException expected) {
-      }
-    }
-
-    if (request.getExceptionsList().isEmpty() || expectedExceptionCount.getAndDecrement() > 0) {
-      return Windmill.ReportStatsResponse.newBuilder().build();
-    } else {
-      return Windmill.ReportStatsResponse.newBuilder().setFailed(true).build();
-    }
-  }
-
-
-  public void waitForEmptyWorkQueue() {
-    while (!workToOffer.isEmpty()) {
-      try {
-        Thread.sleep(1000);
-      } catch (InterruptedException expected) {
-      }
-    }
-  }
-
-  public Map<Long, WorkItemCommitRequest> waitForAndGetCommits(int numCommits) {
-    int maxTries = 10;
-    while (maxTries-- > 0 && commitsReceived.size() < commitsRequested + numCommits) {
-      try {
-        Thread.sleep(1000);
-      } catch (InterruptedException expected) {
-      }
-    }
-
-    assertFalse(
-        "Should have received " + numCommits + " more commits beyond " + commitsRequested
-        + " commits already seen, but after 10s have only seen " + commitsReceived,
-        commitsReceived.size() < commitsRequested + numCommits);
-    commitsRequested += numCommits;
-
-    return commitsReceived;
-  }
-
-  public void setExpectedExceptionCount(int i) {
-    expectedExceptionCount.getAndAdd(i);
-  }
-
-  public Windmill.Exception getException() throws InterruptedException {
-    return exceptions.take();
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTrackerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTrackerTest.java
deleted file mode 100644
index ed525330a3a02..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTrackerTest.java
+++ /dev/null
@@ -1,177 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * icensed under the Apache icense, Version 2.0 (the "icense"); you may not
- * use this file except in compliance with the icense. You may obtain a copy of
- * the icense at
- *
- * http://www.apache.org/licenses/ICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the icense is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * icense for the specific language governing permissions and limitations under
- * the icense.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for {@link GroupingShuffleRangeTracker}.
- */
-@RunWith(JUnit4.class)
-public class GroupingShuffleRangeTrackerTest {
-  @Rule
-  public final ExpectedException expected = ExpectedException.none();
-
-  private static ByteArrayShufflePosition ofBytes(int... bytes) {
-    byte[] b = new byte[bytes.length];
-    for (int i = 0; i < bytes.length; ++i) {
-      b[i] = (byte) bytes[i];
-    }
-    return ByteArrayShufflePosition.of(b);
-  }
-
-  @Test
-  public void testTryReturnRecordInfiniteRange() throws Exception {
-    GroupingShuffleRangeTracker tracker = new GroupingShuffleRangeTracker(null, null);
-    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(1, 2, 3)));
-    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(1, 2, 5)));
-    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(3, 6, 8, 10)));
-  }
-
-  @Test
-  public void testTryReturnRecordFiniteRange() throws Exception {
-    GroupingShuffleRangeTracker tracker = new GroupingShuffleRangeTracker(
-        ofBytes(1, 0, 0), ofBytes(5, 0, 0));
-    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(1, 2, 3)));
-    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(1, 2, 5)));
-    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(3, 6, 8, 10)));
-    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(4, 255, 255, 255, 255)));
-    // Should fail on lexicographically larger positions.
-    assertFalse(tracker.copy().tryReturnRecordAt(true, ofBytes(5, 0, 0)));
-    assertFalse(tracker.copy().tryReturnRecordAt(true, ofBytes(5, 0, 1)));
-    assertFalse(tracker.copy().tryReturnRecordAt(true, ofBytes(6, 0, 0)));
-  }
-
-  @Test
-  public void testTryReturnRecordWithNonSplitPoints() throws Exception {
-    GroupingShuffleRangeTracker tracker = new GroupingShuffleRangeTracker(
-        ofBytes(1, 0, 0), ofBytes(5, 0, 0));
-    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(1, 2, 3)));
-    assertTrue(tracker.tryReturnRecordAt(false, ofBytes(1, 2, 3)));
-    assertTrue(tracker.tryReturnRecordAt(false, ofBytes(1, 2, 3)));
-    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(1, 2, 5)));
-    assertTrue(tracker.tryReturnRecordAt(false, ofBytes(1, 2, 5)));
-    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(3, 6, 8, 10)));
-    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(4, 255, 255, 255, 255)));
-  }
-
-  @Test
-  public void testFirstRecordNonSplitPoint() throws Exception {
-    GroupingShuffleRangeTracker tracker = new GroupingShuffleRangeTracker(
-        ofBytes(3, 0, 0), ofBytes(5, 0, 0));
-    expected.expect(IllegalStateException.class);
-    tracker.tryReturnRecordAt(false, ofBytes(3, 4, 5));
-  }
-
-  @Test
-  public void testNonSplitPointRecordWithDifferentPosition() throws Exception {
-    GroupingShuffleRangeTracker tracker = new GroupingShuffleRangeTracker(
-        ofBytes(3, 0, 0), ofBytes(5, 0, 0));
-    tracker.tryReturnRecordAt(true, ofBytes(3, 4, 5));
-    expected.expect(IllegalStateException.class);
-    tracker.tryReturnRecordAt(false, ofBytes(3, 4, 6));
-  }
-
-  @Test
-  public void testTryReturnRecordBeforeStart() throws Exception {
-    GroupingShuffleRangeTracker tracker = new GroupingShuffleRangeTracker(
-        ofBytes(3, 0, 0), ofBytes(5, 0, 0));
-    expected.expect(IllegalStateException.class);
-    tracker.tryReturnRecordAt(true, ofBytes(1, 2, 3));
-  }
-
-  @Test
-  public void testTryReturnNonMonotonic() throws Exception {
-    GroupingShuffleRangeTracker tracker = new GroupingShuffleRangeTracker(
-        ofBytes(3, 0, 0), ofBytes(5, 0, 0));
-    tracker.tryReturnRecordAt(true, ofBytes(3, 4, 5));
-    tracker.tryReturnRecordAt(true, ofBytes(3, 4, 6));
-    expected.expect(IllegalStateException.class);
-    tracker.tryReturnRecordAt(true, ofBytes(3, 2, 1));
-  }
-
-  @Test
-  public void testTryReturnIdenticalPositions() throws Exception {
-    GroupingShuffleRangeTracker tracker = new GroupingShuffleRangeTracker(
-        ofBytes(3, 0, 0), ofBytes(5, 0, 0));
-    tracker.tryReturnRecordAt(true, ofBytes(3, 4, 5));
-    expected.expect(IllegalStateException.class);
-    tracker.tryReturnRecordAt(true, ofBytes(3, 4, 5));
-  }
-
-  @Test
-  public void testTrySplitAtPositionInfiniteRange() throws Exception {
-    GroupingShuffleRangeTracker tracker = new GroupingShuffleRangeTracker(null, null);
-    // Should fail before first record is returned.
-    assertFalse(tracker.trySplitAtPosition(ofBytes(3, 4, 5, 6)));
-
-    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(1, 2, 3)));
-
-    // Should now succeed.
-    assertTrue(tracker.trySplitAtPosition(ofBytes(3, 4, 5, 6)));
-    // Should not split at same or larger position.
-    assertFalse(tracker.trySplitAtPosition(ofBytes(3, 4, 5, 6)));
-    assertFalse(tracker.trySplitAtPosition(ofBytes(3, 4, 5, 6, 7)));
-    assertFalse(tracker.trySplitAtPosition(ofBytes(4, 5, 6, 7)));
-
-    // Should split at smaller position.
-    assertTrue(tracker.trySplitAtPosition(ofBytes(3, 2, 1)));
-
-    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(2, 3, 4)));
-
-    // Should not split at a position we're already past.
-    assertFalse(tracker.trySplitAtPosition(ofBytes(2, 3, 4)));
-    assertFalse(tracker.trySplitAtPosition(ofBytes(2, 3, 3)));
-
-    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(3, 2, 0)));
-    assertFalse(tracker.tryReturnRecordAt(true, ofBytes(3, 2, 1)));
-  }
-
-  @Test
-  public void testTrySplitAtPositionFiniteRange() throws Exception {
-    GroupingShuffleRangeTracker tracker = new GroupingShuffleRangeTracker(
-        ofBytes(0, 0, 0), ofBytes(10, 20, 30));
-    // Should fail before first record is returned.
-    assertFalse(tracker.trySplitAtPosition(ofBytes(0, 0, 0)));
-    assertFalse(tracker.trySplitAtPosition(ofBytes(3, 4, 5, 6)));
-
-    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(1, 2, 3)));
-
-    // Should now succeed.
-    assertTrue(tracker.trySplitAtPosition(ofBytes(3, 4, 5, 6)));
-    // Should not split at same or larger position.
-    assertFalse(tracker.trySplitAtPosition(ofBytes(3, 4, 5, 6)));
-    assertFalse(tracker.trySplitAtPosition(ofBytes(3, 4, 5, 6, 7)));
-    assertFalse(tracker.trySplitAtPosition(ofBytes(4, 5, 6, 7)));
-
-    // Should split at smaller position.
-    assertTrue(tracker.trySplitAtPosition(ofBytes(3, 2, 1)));
-    // But not at a position at or before last returned record.
-    assertFalse(tracker.trySplitAtPosition(ofBytes(1, 2, 3)));
-
-    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(2, 3, 4)));
-    assertTrue(tracker.tryReturnRecordAt(true, ofBytes(3, 2, 0)));
-    assertFalse(tracker.tryReturnRecordAt(true, ofBytes(3, 2, 1)));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
deleted file mode 100644
index 5fa96599afab7..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderTest.java
+++ /dev/null
@@ -1,709 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.api.client.util.Base64.encodeBase64URLSafeString;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.approximateSplitRequestAtPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromSplitResult;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toDynamicSplitRequest;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import com.google.api.services.dataflow.model.ApproximateReportedProgress;
-import com.google.api.services.dataflow.model.Position;
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.worker.GroupingShuffleReader.GroupingShuffleReaderIterator;
-import com.google.cloud.dataflow.sdk.runners.worker.ShuffleSink.ShuffleKind;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-import com.google.cloud.dataflow.sdk.util.common.Reiterable;
-import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.collect.Lists;
-
-import org.joda.time.Instant;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.ByteArrayOutputStream;
-import java.io.DataOutputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-import javax.annotation.Nullable;
-
-/**
- * Tests for GroupingShuffleReader.
- */
-@RunWith(JUnit4.class)
-public class GroupingShuffleReaderTest {
-  private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
-  private static final List<KV<Integer, List<KV<Integer, Integer>>>> NO_KVS =
-      Collections.emptyList();
-
-  private static final Instant timestamp = new Instant(123000);
-  private static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
-
-  // As Shuffle records, {@code KV} is encoded as 10 records. Each records uses an integer as key
-  // (4 bytes), and a {@code KV} of an integer key and value (each 4 bytes).
-  // Overall {@code KV}s have a byte size of 25 * 4 = 100. Note that we also encode the
-  // timestamp into the secondary key adding another 100 bytes.
-  private static final List<KV<Integer, List<KV<Integer, Integer>>>> KVS = Arrays.asList(
-      KV.of(1, Arrays.asList(KV.of(1, 11), KV.of(2, 12))),
-      KV.of(2, Arrays.asList(KV.of(1, 21), KV.of(2, 22))),
-      KV.of(3, Arrays.asList(KV.of(1, 31))),
-      KV.of(4, Arrays.asList(KV.of(1, 41), KV.of(2, 42),
-                             KV.of(3, 43), KV.of(4, 44))),
-      KV.of(5, Arrays.asList(KV.of(1, 51))));
-
-  /** How many of the values with each key are to be read.
-   * Note that the order matters as the conversion to ordinal is used below.
-   */
-  private enum ValuesToRead {
-    /** Don't even ask for the values iterator. */
-    SKIP_VALUES,
-    /** Get the iterator, but don't read any values. */
-    READ_NO_VALUES,
-    /** Read just the first value. */
-    READ_ONE_VALUE,
-    /** Read all the values. */
-    READ_ALL_VALUES,
-    /** Read all the values twice. */
-    READ_ALL_VALUES_TWICE
-  }
-
-  private List<ShuffleEntry> writeShuffleEntries(
-      List<KV<Integer, List<KV<Integer, Integer>>>> input, boolean sortValues)
-      throws Exception {
-    Coder<WindowedValue<KV<Integer, KV<Integer, Integer>>>> sinkElemCoder =
-        WindowedValue.getFullCoder(
-            KvCoder.of(BigEndianIntegerCoder.of(),
-                       KvCoder.of(BigEndianIntegerCoder.of(),
-                                  BigEndianIntegerCoder.of())),
-            IntervalWindow.getCoder());
-    CounterSet.AddCounterMutator addCounterMutator = new CounterSet().getAddCounterMutator();
-    // Write to shuffle with GROUP_KEYS ShuffleSink.
-    ShuffleSink<KV<Integer, KV<Integer, Integer>>> shuffleSink =
-        new ShuffleSink<>(PipelineOptionsFactory.create(), null,
-            sortValues ? ShuffleKind.GROUP_KEYS_AND_SORT_VALUES : ShuffleKind.GROUP_KEYS,
-            sinkElemCoder, addCounterMutator);
-
-    TestShuffleWriter shuffleWriter = new TestShuffleWriter();
-
-    int kvCount = 0;
-    List<Long> actualSizes = new ArrayList<>();
-    try (Sink.SinkWriter<WindowedValue<KV<Integer, KV<Integer, Integer>>>> shuffleSinkWriter =
-        shuffleSink.writer(shuffleWriter, "dataset")) {
-      for (KV<Integer, List<KV<Integer, Integer>>> kvs : input) {
-        Integer key = kvs.getKey();
-        for (KV<Integer, Integer> value : kvs.getValue()) {
-          ++kvCount;
-          actualSizes.add(shuffleSinkWriter.add(WindowedValue.of(
-              KV.of(key, value), timestamp, Lists.newArrayList(window), PaneInfo.NO_FIRING)));
-        }
-      }
-    }
-    List<ShuffleEntry> records = shuffleWriter.getRecords();
-    assertEquals(kvCount, records.size());
-    assertEquals(shuffleWriter.getSizes(), actualSizes);
-    return records;
-  }
-
-  private List<KV<Integer, List<KV<Integer, Integer>>>> runIterationOverGroupingShuffleReader(
-      BatchModeExecutionContext context, TestShuffleReader shuffleReader,
-      GroupingShuffleReader<Integer, KV<Integer, Integer>> groupingShuffleReader,
-      Coder<WindowedValue<KV<Integer, Iterable<KV<Integer, Integer>>>>> coder,
-      ValuesToRead valuesToRead) throws Exception {
-    Counter<Long> elementByteSizeCounter = Counter.longs("element-byte-size-counter", SUM);
-    ElementByteSizeObserver elementObserver = new ElementByteSizeObserver(elementByteSizeCounter);
-    List<KV<Integer, List<KV<Integer, Integer>>>> actual = new ArrayList<>();
-    try (GroupingShuffleReaderIterator<Integer, KV<Integer, Integer>> iter =
-        groupingShuffleReader.iterator(shuffleReader)) {
-      Iterable<KV<Integer, Integer>> prevValuesIterable = null;
-      Iterator<KV<Integer, Integer>> prevValuesIterator = null;
-      while (iter.hasNext()) {
-        assertTrue(iter.hasNext());
-        assertTrue(iter.hasNext());
-
-        @SuppressWarnings({"rawtypes", "unchecked"})  // safe co-variant cast.
-        WindowedValue<KV<Integer, Iterable<KV<Integer, Integer>>>> windowedValue =
-            (WindowedValue) iter.next();
-        // Verify that the byte size observer is lazy for every value the GroupingShuffleReader
-        // produces.
-        coder.registerByteSizeObserver(windowedValue, elementObserver, Context.OUTER);
-        assertTrue(elementObserver.getIsLazy());
-
-        // Verify value is in an empty windows.
-        assertEquals(BoundedWindow.TIMESTAMP_MIN_VALUE, windowedValue.getTimestamp());
-        assertEquals(0, windowedValue.getWindows().size());
-
-        KV<Integer, Iterable<KV<Integer, Integer>>> elem = windowedValue.getValue();
-        Integer key = elem.getKey();
-        List<KV<Integer, Integer>> values = new ArrayList<>();
-        if (valuesToRead.ordinal() > ValuesToRead.SKIP_VALUES.ordinal()) {
-          if (prevValuesIterable != null) {
-            prevValuesIterable.iterator(); // Verifies that this does not throw.
-          }
-          if (prevValuesIterator != null) {
-            prevValuesIterator.hasNext(); // Verifies that this does not throw.
-          }
-
-          Iterable<KV<Integer, Integer>> valuesIterable = elem.getValue();
-          Iterator<KV<Integer, Integer>> valuesIterator = valuesIterable.iterator();
-
-          if (valuesToRead.ordinal() >= ValuesToRead.READ_ONE_VALUE.ordinal()) {
-            while (valuesIterator.hasNext()) {
-              assertTrue(valuesIterator.hasNext());
-              assertTrue(valuesIterator.hasNext());
-              assertEquals("BatchModeExecutionContext key", key, context.getKey());
-              values.add(valuesIterator.next());
-              if (valuesToRead == ValuesToRead.READ_ONE_VALUE) {
-                break;
-              }
-            }
-            if (valuesToRead.ordinal() >= ValuesToRead.READ_ALL_VALUES.ordinal()) {
-              assertFalse(valuesIterator.hasNext());
-              assertFalse(valuesIterator.hasNext());
-
-              try {
-                valuesIterator.next();
-                fail("Expected NoSuchElementException");
-              } catch (NoSuchElementException exn) {
-                // As expected.
-              }
-              valuesIterable.iterator(); // Verifies that this does not throw.
-            }
-          }
-          if (valuesToRead == ValuesToRead.READ_ALL_VALUES_TWICE) {
-            // Create new iterator;
-            valuesIterator = valuesIterable.iterator();
-
-            while (valuesIterator.hasNext()) {
-              assertTrue(valuesIterator.hasNext());
-              assertTrue(valuesIterator.hasNext());
-              assertEquals("BatchModeExecutionContext key", key, context.getKey());
-              valuesIterator.next();
-            }
-            assertFalse(valuesIterator.hasNext());
-            assertFalse(valuesIterator.hasNext());
-            try {
-              valuesIterator.next();
-              fail("Expected NoSuchElementException");
-            } catch (NoSuchElementException exn) {
-              // As expected.
-            }
-          }
-
-          prevValuesIterable = valuesIterable;
-          prevValuesIterator = valuesIterator;
-        }
-
-        actual.add(KV.of(key, values));
-      }
-      assertFalse(iter.hasNext());
-      assertFalse(iter.hasNext());
-      try {
-        iter.next();
-        fail("Expected NoSuchElementException");
-      } catch (NoSuchElementException exn) {
-        // As expected.
-      }
-    }
-    return actual;
-  }
-
-  private void runTestReadFromShuffle(
-      List<KV<Integer, List<KV<Integer, Integer>>>> input, boolean sortValues,
-      ValuesToRead valuesToRead) throws Exception {
-    Coder<WindowedValue<KV<Integer, Iterable<KV<Integer, Integer>>>>> sourceElemCoder =
-        WindowedValue.getFullCoder(
-            KvCoder.of(BigEndianIntegerCoder.of(),
-                       IterableCoder.of(KvCoder.of(BigEndianIntegerCoder.of(),
-                                                   BigEndianIntegerCoder.of()))),
-            IntervalWindow.getCoder());
-
-    List<ShuffleEntry> records = writeShuffleEntries(input, sortValues);
-
-    PipelineOptions options = PipelineOptionsFactory.create();
-    // Read from shuffle with GroupingShuffleReader.
-    BatchModeExecutionContext context = BatchModeExecutionContext.fromOptions(options);
-    GroupingShuffleReader<Integer, KV<Integer, Integer>> groupingShuffleReader =
-        new GroupingShuffleReader<>(
-            options, null, null, null, sourceElemCoder, context, null, null, sortValues);
-    ExecutorTestUtils.TestReaderObserver observer =
-        new ExecutorTestUtils.TestReaderObserver(groupingShuffleReader);
-
-    TestShuffleReader shuffleReader = new TestShuffleReader();
-    List<Integer> expectedSizes = new ArrayList<>();
-    for (ShuffleEntry record : records) {
-      expectedSizes.add(record.length());
-      shuffleReader.addEntry(record);
-    }
-
-    List<KV<Integer, List<KV<Integer, Integer>>>> actual = runIterationOverGroupingShuffleReader(
-        context, shuffleReader, groupingShuffleReader, sourceElemCoder, valuesToRead);
-
-    List<KV<Integer, List<KV<Integer, Integer>>>> expected = new ArrayList<>();
-    for (KV<Integer, List<KV<Integer, Integer>>> kvs : input) {
-      Integer key = kvs.getKey();
-      List<KV<Integer, Integer>> values = new ArrayList<>();
-      if (valuesToRead.ordinal() >= ValuesToRead.READ_ONE_VALUE.ordinal()) {
-        for (KV<Integer, Integer> value : kvs.getValue()) {
-          values.add(value);
-          if (valuesToRead == ValuesToRead.READ_ONE_VALUE) {
-            break;
-          }
-        }
-      }
-      expected.add(KV.of(key, values));
-    }
-    assertEquals(expected, actual);
-    assertEquals(expectedSizes, observer.getActualSizes());
-  }
-
-  @Test
-  public void testReadEmptyShuffleData() throws Exception {
-    runTestReadFromShuffle(NO_KVS, false /* do not sort values */, ValuesToRead.READ_ALL_VALUES);
-    runTestReadFromShuffle(NO_KVS, true /* sort values */, ValuesToRead.READ_ALL_VALUES);
-  }
-
-  @Test
-  public void testReadEmptyShuffleDataSkippingValues() throws Exception {
-    runTestReadFromShuffle(NO_KVS, false /* do not sort values */, ValuesToRead.SKIP_VALUES);
-    runTestReadFromShuffle(NO_KVS, true /* sort values */, ValuesToRead.SKIP_VALUES);
-  }
-
-  @Test
-  public void testReadNonEmptyShuffleData() throws Exception {
-    runTestReadFromShuffle(KVS, false /* do not sort values */, ValuesToRead.READ_ALL_VALUES);
-    runTestReadFromShuffle(KVS, true /* sort values */, ValuesToRead.READ_ALL_VALUES);
-  }
-
-  @Test
-  public void testReadNonEmptyShuffleDataTwice() throws Exception {
-    runTestReadFromShuffle(KVS, false /* do not sort values */, ValuesToRead.READ_ALL_VALUES_TWICE);
-    runTestReadFromShuffle(KVS, true /* sort values */, ValuesToRead.READ_ALL_VALUES_TWICE);
-  }
-
-  @Test
-  public void testReadNonEmptyShuffleDataReadingOneValue() throws Exception {
-    runTestReadFromShuffle(KVS, false /* do not sort values */, ValuesToRead.READ_ONE_VALUE);
-    runTestReadFromShuffle(KVS, true /* sort values */, ValuesToRead.READ_ONE_VALUE);
-  }
-
-  @Test
-  public void testReadNonEmptyShuffleDataReadingNoValues() throws Exception {
-    runTestReadFromShuffle(KVS, false /* do not sort values */, ValuesToRead.READ_NO_VALUES);
-    runTestReadFromShuffle(KVS, true /* sort values */, ValuesToRead.READ_NO_VALUES);
-  }
-
-  @Test
-  public void testReadNonEmptyShuffleDataSkippingValues() throws Exception {
-    runTestReadFromShuffle(KVS, false /* do not sort values */, ValuesToRead.SKIP_VALUES);
-    runTestReadFromShuffle(KVS, true /* sort values */, ValuesToRead.SKIP_VALUES);
-  }
-
-  private void runTestBytesReadCounter(
-      List<KV<Integer, List<KV<Integer, Integer>>>> input, boolean useSecondaryKey,
-      ValuesToRead valuesToRead, long expectedReadBytes) throws Exception {
-    // Create a shuffle reader with the shuffle values provided as input.
-    List<ShuffleEntry> records = writeShuffleEntries(input, useSecondaryKey);
-    TestShuffleReader shuffleReader = new TestShuffleReader();
-    for (ShuffleEntry record : records) {
-      shuffleReader.addEntry(record);
-    }
-
-    Coder<WindowedValue<KV<Integer, Iterable<KV<Integer, Integer>>>>> sourceElemCoder =
-        WindowedValue.getFullCoder(
-            KvCoder.of(BigEndianIntegerCoder.of(),
-                IterableCoder.of(KvCoder.of(BigEndianIntegerCoder.of(),
-                                            BigEndianIntegerCoder.of()))),
-            IntervalWindow.getCoder());
-    PipelineOptions options = PipelineOptionsFactory.create();
-    CounterSet.AddCounterMutator addCounterMutator =
-        new CounterSet().getAddCounterMutator();
-    // Read from shuffle with GroupingShuffleReader.
-    BatchModeExecutionContext context = BatchModeExecutionContext.fromOptions(options);
-    GroupingShuffleReader<Integer, KV<Integer, Integer>> groupingShuffleReader =
-        new GroupingShuffleReader<>(
-            options, null, null, null, sourceElemCoder, context, null, null, useSecondaryKey);
-    groupingShuffleReader.perOperationPerDatasetBytesCounter =
-        addCounterMutator.addCounter(Counter.longs("dax-shuffle-test-wf-read-bytes", SUM));
-
-    runIterationOverGroupingShuffleReader(
-        context, shuffleReader, groupingShuffleReader, sourceElemCoder, valuesToRead);
-
-    assertEquals(expectedReadBytes,
-                 (long) groupingShuffleReader.perOperationPerDatasetBytesCounter.getAggregate());
-  }
-
-  @Test
-  public void testBytesReadNonEmptyShuffleData() throws Exception {
-    runTestBytesReadCounter(KVS, false /* do not sort values */,
-        ValuesToRead.READ_ALL_VALUES, 200L);
-    runTestBytesReadCounter(KVS, true /* sort values */, ValuesToRead.READ_ALL_VALUES, 200L);
-  }
-
-  @Test
-  public void testBytesReadNonEmptyShuffleDataTwice() throws Exception {
-    runTestBytesReadCounter(KVS, false /* do not sort values */,
-        ValuesToRead.READ_ALL_VALUES_TWICE, 200L);
-    runTestBytesReadCounter(KVS, true /* sort values */, ValuesToRead.READ_ALL_VALUES_TWICE, 200L);
-  }
-
-  @Test
-  public void testBytesReadNonEmptyShuffleDataReadingOneValue() throws Exception {
-    runTestBytesReadCounter(KVS, false /* do not sort values */, ValuesToRead.READ_ONE_VALUE, 200L);
-    runTestBytesReadCounter(KVS, true /* sort values */, ValuesToRead.READ_ONE_VALUE, 200L);
-  }
-
-  @Test
-  public void testBytesReadNonEmptyShuffleDataSkippingValues() throws Exception {
-    runTestBytesReadCounter(KVS, false /* do not sort values */, ValuesToRead.SKIP_VALUES, 200L);
-    runTestBytesReadCounter(KVS, true /* sort values */, ValuesToRead.SKIP_VALUES, 200L);
-  }
-
-  @Test
-  public void testBytesReadEmptyShuffleData() throws Exception {
-    runTestBytesReadCounter(NO_KVS, false /* do not sort values */,
-        ValuesToRead.READ_ALL_VALUES, 0L);
-    runTestBytesReadCounter(NO_KVS, true /* sort values */, ValuesToRead.READ_ALL_VALUES, 0L);
-  }
-
-  static byte[] fabricatePosition(int shard) throws Exception {
-    return fabricatePosition(shard, (Integer) null);
-  }
-
-  static byte[] fabricatePosition(int shard, @Nullable byte[] key) throws Exception {
-    return fabricatePosition(shard, key == null ? null : Arrays.hashCode(key));
-  }
-
-  static byte[] fabricatePosition(int shard, @Nullable Integer keyHash) throws Exception {
-    ByteArrayOutputStream os = new ByteArrayOutputStream();
-    DataOutputStream dos = new DataOutputStream(os);
-    dos.writeInt(shard);
-    if (keyHash != null) {
-      dos.writeInt(keyHash);
-    }
-    return os.toByteArray();
-  }
-
-  @Test
-  public void testReadFromShuffleDataAndFailToSplit() throws Exception {
-    PipelineOptions options = PipelineOptionsFactory.create();
-    BatchModeExecutionContext context = BatchModeExecutionContext.fromOptions(options);
-    final int kFirstShard = 0;
-
-    TestShuffleReader shuffleReader = new TestShuffleReader();
-    final int kNumRecords = 2;
-    for (int i = 0; i < kNumRecords; ++i) {
-      byte[] key = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
-      shuffleReader.addEntry(
-          new ShuffleEntry(fabricatePosition(kFirstShard, key), key, EMPTY_BYTE_ARRAY, key));
-    }
-
-    // Note that TestShuffleReader start/end positions are in the
-    // space of keys not the positions (TODO: should probably always
-    // use positions instead).
-    String stop = encodeBase64URLSafeString(fabricatePosition(kNumRecords));
-    GroupingShuffleReader<Integer, Integer> groupingShuffleReader = new GroupingShuffleReader<>(
-        options, null, null, stop,
-        WindowedValue.getFullCoder(
-            KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())),
-            IntervalWindow.getCoder()),
-        context, null, null, false /* do not sort values */);
-
-    try (GroupingShuffleReaderIterator<Integer, Integer> iter =
-            groupingShuffleReader.iterator(shuffleReader)) {
-      // Poke the iterator so we can test dynamic splitting.
-      assertTrue(iter.hasNext());
-
-      // Cannot split since the value provided is past the current stop position.
-      assertNull(iter.requestDynamicSplit(splitRequestAtPosition(
-          makeShufflePosition(kNumRecords + 1, null))));
-
-      int i = 0;
-      for (; iter.hasNext(); ++i) {
-        iter.next().getValue(); // ignored
-        if (i == 0) {
-          // First record
-          byte[] key = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
-          // Cannot split since the split position is identical with the position of the record
-          // that was just returned.
-          assertNull(
-              iter.requestDynamicSplit(splitRequestAtPosition(
-                  makeShufflePosition(kFirstShard, key))));
-
-          // Cannot split since the requested split position comes before current position
-          assertNull(
-              iter.requestDynamicSplit(splitRequestAtPosition(
-                  makeShufflePosition(kFirstShard, null))));
-        }
-      }
-      assertEquals(kNumRecords, i);
-
-      // Cannot split since all input was consumed.
-      assertNull(
-          iter.requestDynamicSplit(splitRequestAtPosition(makeShufflePosition(kFirstShard, null))));
-    }
-  }
-
-  @Test
-  public void testRemainingParallelism() throws Exception {
-    PipelineOptions options = PipelineOptionsFactory.create();
-    BatchModeExecutionContext context = BatchModeExecutionContext.fromOptions(options);
-    final int kFirstShard = 0;
-
-    TestShuffleReader shuffleReader = new TestShuffleReader();
-    final int kNumRecords = 5;
-    for (int i = 0; i < kNumRecords; ++i) {
-      byte[] key = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
-      ShuffleEntry entry = new ShuffleEntry(
-          fabricatePosition(kFirstShard, i), key, EMPTY_BYTE_ARRAY, key);
-      shuffleReader.addEntry(entry);
-    }
-
-    GroupingShuffleReader<Integer, Integer> groupingShuffleReader =
-        new GroupingShuffleReader<>(
-            options,
-            null,
-            null,
-            null,
-            WindowedValue.getFullCoder(
-                KvCoder.of(
-                    BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())),
-                IntervalWindow.getCoder()),
-            context,
-            null,
-            null,
-            false /* do not sort values */);
-
-    try (GroupingShuffleReaderIterator<Integer, Integer> iter =
-            groupingShuffleReader.iterator(shuffleReader)) {
-
-      assertEquals(Double.POSITIVE_INFINITY, iter.getRemainingParallelism(), 0);
-
-      // The only way to set a stop *position* in tests is via a split. To do that,
-      // we must call hasNext() first.
-      assertTrue(iter.hasNext());
-      assertNotNull(
-          iter.requestDynamicSplit(
-              splitRequestAtPosition(
-                  makeShufflePosition(
-                      ByteArrayShufflePosition.of(fabricatePosition(kFirstShard, 2))
-                          .immediateSuccessor()
-                          .getPosition()))));
-      assertTrue(iter.hasNext());
-
-      assertEquals(Double.POSITIVE_INFINITY, iter.getRemainingParallelism(), 0);
-      iter.next();
-      assertEquals(Double.POSITIVE_INFINITY, iter.getRemainingParallelism(), 0);
-      assertTrue(iter.hasNext());
-      assertEquals(Double.POSITIVE_INFINITY, iter.getRemainingParallelism(), 0);
-      iter.next();
-      assertEquals(Double.POSITIVE_INFINITY, iter.getRemainingParallelism(), 0);
-      assertTrue(iter.hasNext());
-      assertEquals(1, iter.getRemainingParallelism(), 0);
-      iter.next();
-      assertEquals(1, iter.getRemainingParallelism(), 0);
-      assertFalse(iter.hasNext());
-      assertEquals(1, iter.getRemainingParallelism(), 0);
-    }
-  }
-
-  private Position makeShufflePosition(int shard, byte[] key) throws Exception {
-    return new Position().setShufflePosition(
-        encodeBase64URLSafeString(fabricatePosition(shard, key)));
-  }
-
-  private Position makeShufflePosition(byte[] position) throws Exception {
-    return new Position().setShufflePosition(encodeBase64URLSafeString(position));
-  }
-
-  @Test
-  public void testReadFromShuffleAndDynamicSplit() throws Exception {
-    PipelineOptions options = PipelineOptionsFactory.create();
-    BatchModeExecutionContext context = BatchModeExecutionContext.fromOptions(options);
-    CounterSet.AddCounterMutator addCounterMutator =
-        new CounterSet().getAddCounterMutator();
-    GroupingShuffleReader<Integer, Integer> groupingShuffleReader = new GroupingShuffleReader<>(
-        options, null, null, null,
-        WindowedValue.getFullCoder(
-            KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())),
-            IntervalWindow.getCoder()),
-        context, null, null, false /* do not sort values */);
-    groupingShuffleReader.perOperationPerDatasetBytesCounter =
-          addCounterMutator.addCounter(Counter.longs("dax-shuffle-test-wf-read-bytes", SUM));
-
-    TestShuffleReader shuffleReader = new TestShuffleReader();
-    final int kNumRecords = 10;
-    final int kFirstShard = 0;
-    final int kSecondShard = 1;
-
-    // Setting up two shards with kNumRecords each; keys are unique
-    // (hence groups of values for the same key are singletons)
-    // therefore each record comes with a unique position constructed.
-    for (int i = 0; i < kNumRecords; ++i) {
-      byte[] keyByte = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
-      ShuffleEntry entry = new ShuffleEntry(
-          fabricatePosition(kFirstShard, keyByte), keyByte, EMPTY_BYTE_ARRAY, keyByte);
-      shuffleReader.addEntry(entry);
-    }
-
-    for (int i = kNumRecords; i < 2 * kNumRecords; ++i) {
-      byte[] keyByte = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
-
-      ShuffleEntry entry = new ShuffleEntry(
-          fabricatePosition(kSecondShard, keyByte), keyByte, EMPTY_BYTE_ARRAY, keyByte);
-      shuffleReader.addEntry(entry);
-    }
-
-    int i = 0;
-    try (GroupingShuffleReaderIterator<Integer, Integer> iter =
-            groupingShuffleReader.iterator(shuffleReader)) {
-      // Poke the iterator so we can test dynamic splitting.
-      assertTrue(iter.hasNext());
-
-      assertNull(iter.requestDynamicSplit(splitRequestAtPosition(new Position())));
-
-      // Split at the shard boundary
-      NativeReader.DynamicSplitResult dynamicSplitResult =
-          iter.requestDynamicSplit(splitRequestAtPosition(makeShufflePosition(kSecondShard, null)));
-      assertNotNull(dynamicSplitResult);
-      assertEquals(
-          encodeBase64URLSafeString(fabricatePosition(kSecondShard)),
-          positionFromSplitResult(dynamicSplitResult).getShufflePosition());
-
-      while (iter.hasNext()) {
-        // iter.hasNext() is supposed to be side-effect-free and give the same result if called
-        // repeatedly. Test that this is indeed the case.
-        assertTrue(iter.hasNext());
-        assertTrue(iter.hasNext());
-
-        KV<Integer, Reiterable<Integer>> elem = iter.next().getValue();
-        int key = elem.getKey();
-        assertEquals(key, i);
-
-        Iterable<Integer> valuesIterable = elem.getValue();
-        Iterator<Integer> valuesIterator = valuesIterable.iterator();
-
-        int j = 0;
-        while (valuesIterator.hasNext()) {
-          assertTrue(valuesIterator.hasNext());
-          assertTrue(valuesIterator.hasNext());
-
-          int value = valuesIterator.next();
-          assertEquals(value, i);
-          ++j;
-        }
-        assertFalse(valuesIterator.hasNext());
-        assertFalse(valuesIterator.hasNext());
-        assertEquals(j, 1);
-        ++i;
-      }
-      assertFalse(iter.hasNext());
-    }
-    assertEquals(i, kNumRecords);
-    // There are 10 Shuffle records that each encode an integer key (4 bytes) and integer value (4
-    // bytes). We therefore expect to read 80 bytes.
-    assertEquals(
-        80L, (long) groupingShuffleReader.perOperationPerDatasetBytesCounter.getAggregate());
-  }
-
-  @Test
-  public void testGetApproximateProgress() throws Exception {
-    // Store the positions of all KVs returned.
-    List<byte[]> positionsList = new ArrayList<byte[]>();
-
-    PipelineOptions options = PipelineOptionsFactory.create();
-    BatchModeExecutionContext context = BatchModeExecutionContext.fromOptions(options);
-    GroupingShuffleReader<Integer, Integer> groupingShuffleReader = new GroupingShuffleReader<>(
-        options, null, null, null,
-        WindowedValue.getFullCoder(
-            KvCoder.of(BigEndianIntegerCoder.of(), IterableCoder.of(BigEndianIntegerCoder.of())),
-            IntervalWindow.getCoder()),
-        context, null, null, false /* do not sort values */);
-
-    TestShuffleReader shuffleReader = new TestShuffleReader();
-    final int kNumRecords = 10;
-
-    for (int i = 0; i < kNumRecords; ++i) {
-      byte[] position = fabricatePosition(i);
-      byte[] keyByte = CoderUtils.encodeToByteArray(BigEndianIntegerCoder.of(), i);
-      positionsList.add(position);
-      ShuffleEntry entry = new ShuffleEntry(position, keyByte, EMPTY_BYTE_ARRAY, keyByte);
-      shuffleReader.addEntry(entry);
-    }
-
-    try (GroupingShuffleReaderIterator<Integer, Integer> readerIterator =
-            groupingShuffleReader.iterator(shuffleReader)) {
-      Integer i = 0;
-      while (readerIterator.hasNext()) {
-        assertTrue(readerIterator.hasNext());
-        ApproximateReportedProgress progress = readerProgressToCloudProgress(
-            readerIterator.getProgress());
-        assertNotNull(progress.getPosition().getShufflePosition());
-
-        // Compare returned position with the expected position.
-        assertEquals(
-            ByteArrayShufflePosition.of(positionsList.get(i)).encodeBase64(),
-            progress.getPosition().getShufflePosition());
-
-        WindowedValue<KV<Integer, Reiterable<Integer>>> elem = readerIterator.next();
-        assertEquals(i, elem.getValue().getKey());
-        i++;
-      }
-      assertFalse(readerIterator.hasNext());
-
-      // Cannot split since all input was consumed.
-      Position proposedSplitPosition = new Position();
-      String stop = encodeBase64URLSafeString(fabricatePosition(0));
-      proposedSplitPosition.setShufflePosition(stop);
-      assertNull(
-          readerIterator.requestDynamicSplit(
-              toDynamicSplitRequest(approximateSplitRequestAtPosition(proposedSplitPosition))));
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
deleted file mode 100644
index 67647a53dc25b..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactoryTest.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.InMemoryReaderTest.encodedElements;
-import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
-import static com.google.cloud.dataflow.sdk.util.Structs.addStringList;
-
-import com.google.api.services.dataflow.model.Source;
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import org.hamcrest.core.IsInstanceOf;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Tests for InMemoryReaderFactory.
- */
-@RunWith(JUnit4.class)
-public class InMemoryReaderFactoryTest {
-  static <T> Source createInMemoryCloudSource(
-      List<T> elements, Long start, Long end, Coder<T> coder) throws Exception {
-    List<String> encodedElements = encodedElements(elements, coder);
-
-    CloudObject spec = CloudObject.forClassName("InMemorySource");
-    addStringList(spec, PropertyNames.ELEMENTS, encodedElements);
-
-    if (start != null) {
-      addLong(spec, PropertyNames.START_INDEX, start);
-    }
-    if (end != null) {
-      addLong(spec, PropertyNames.END_INDEX, end);
-    }
-
-    Source cloudSource = new Source();
-    cloudSource.setSpec(spec);
-    cloudSource.setCodec(coder.asCloudObject());
-
-    return cloudSource;
-  }
-
-  <T> void runTestCreateInMemoryReader(List<T> elements, Long start, Long end, int expectedStart,
-      int expectedEnd, Coder<T> coder) throws Exception {
-    Source cloudSource = createInMemoryCloudSource(elements, start, end, coder);
-
-    NativeReader<?> reader =
-        ReaderRegistry.defaultRegistry()
-            .create(
-                cloudSource,
-                PipelineOptionsFactory.create(),
-                BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create()),
-                null,
-                null);
-    Assert.assertThat(reader, new IsInstanceOf(InMemoryReader.class));
-    InMemoryReader<?> inMemoryReader = (InMemoryReader<?>) reader;
-    Assert.assertEquals(encodedElements(elements, coder), inMemoryReader.encodedElements);
-    Assert.assertEquals(expectedStart, inMemoryReader.startIndex);
-    Assert.assertEquals(expectedEnd, inMemoryReader.endIndex);
-    Assert.assertEquals(coder, inMemoryReader.coder);
-  }
-
-  @Test
-  public void testCreatePlainInMemoryReader() throws Exception {
-    runTestCreateInMemoryReader(
-        Arrays.asList("hi", "there", "bob"), null, null, 0, 3, StringUtf8Coder.of());
-  }
-
-  @Test
-  public void testCreateRichInMemoryReader() throws Exception {
-    runTestCreateInMemoryReader(
-        Arrays.asList(33, 44, 55, 66, 77, 88), 1L, 3L, 1, 3, BigEndianIntegerCoder.of());
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
deleted file mode 100644
index 19a022d0e7744..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderTest.java
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.approximateProgressAtIndex;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionAtIndex;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromSplitResult;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtIndex;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toDynamicSplitRequest;
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
-import static com.google.cloud.dataflow.sdk.util.StringUtils.byteArrayToJsonString;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import com.google.api.services.dataflow.model.ApproximateSplitRequest;
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-/**
- * Tests for InMemoryReader.
- */
-@RunWith(JUnit4.class)
-public class InMemoryReaderTest {
-  @Rule public ExpectedException expectedException = ExpectedException.none();
-
-  static <T> List<String> encodedElements(List<T> elements, Coder<T> coder) throws Exception {
-    List<String> encodedElements = new ArrayList<>();
-    for (T element : elements) {
-      byte[] encodedElement = encodeToByteArray(coder, element);
-      String encodedElementString = byteArrayToJsonString(encodedElement);
-      encodedElements.add(encodedElementString);
-    }
-    return encodedElements;
-  }
-
-  <T> void runTestReadInMemory(
-      List<T> elements,
-      @Nullable Integer startIndex,
-      @Nullable Integer endIndex,
-      List<T> expectedElements,
-      List<Integer> expectedSizes,
-      Coder<T> coder)
-      throws Exception {
-    InMemoryReader<T> inMemoryReader =
-        new InMemoryReader<>(encodedElements(elements, coder), startIndex, endIndex, coder);
-    ExecutorTestUtils.TestReaderObserver observer =
-        new ExecutorTestUtils.TestReaderObserver(inMemoryReader);
-    List<T> actualElements = new ArrayList<>();
-    try (InMemoryReader<T>.InMemoryReaderIterator iterator = inMemoryReader.iterator()) {
-      for (long i = inMemoryReader.startIndex;
-          (i == inMemoryReader.startIndex) ? iterator.start() : iterator.advance();
-          i++) {
-        assertEquals(
-            approximateProgressAtIndex(i), readerProgressToCloudProgress(iterator.getProgress()));
-        actualElements.add(iterator.getCurrent());
-      }
-    }
-    assertEquals(expectedElements, actualElements);
-    assertEquals(expectedSizes, observer.getActualSizes());
-  }
-
-  @Test
-  public void testReadAllElements() throws Exception {
-    runTestReadInMemory(Arrays.asList(33, 44, 55, 66, 77, 88), null, null,
-        Arrays.asList(33, 44, 55, 66, 77, 88), Arrays.asList(4, 4, 4, 4, 4, 4),
-        BigEndianIntegerCoder.of());
-  }
-
-  @Test
-  public void testReadElementsFromStart() throws Exception {
-    runTestReadInMemory(
-        Arrays.asList(33, 44, 55, 66, 77, 88),
-        2,
-        null,
-        Arrays.asList(55, 66, 77, 88),
-        Arrays.asList(4, 4, 4, 4),
-        BigEndianIntegerCoder.of());
-  }
-
-  @Test
-  public void testReadElementsToEnd() throws Exception {
-    runTestReadInMemory(
-        Arrays.asList(33, 44, 55, 66, 77, 88),
-        null,
-        3,
-        Arrays.asList(33, 44, 55),
-        Arrays.asList(4, 4, 4),
-        BigEndianIntegerCoder.of());
-  }
-
-  @Test
-  public void testReadElementsFromStartToEnd() throws Exception {
-    runTestReadInMemory(
-        Arrays.asList(33, 44, 55, 66, 77, 88),
-        2,
-        5,
-        Arrays.asList(55, 66, 77),
-        Arrays.asList(4, 4, 4),
-        BigEndianIntegerCoder.of());
-  }
-
-  @Test
-  public void testReadElementsOffEnd() throws Exception {
-    runTestReadInMemory(
-        Arrays.asList(33, 44, 55, 66, 77, 88),
-        null,
-        30,
-        Arrays.asList(33, 44, 55, 66, 77, 88),
-        Arrays.asList(4, 4, 4, 4, 4, 4),
-        BigEndianIntegerCoder.of());
-  }
-
-  @Test
-  public void testReadElementsFromStartPastEnd() throws Exception {
-    runTestReadInMemory(
-        Arrays.asList(33, 44, 55, 66, 77, 88),
-        20,
-        null,
-        Arrays.<Integer>asList(),
-        Arrays.<Integer>asList(),
-        BigEndianIntegerCoder.of());
-  }
-
-  @Test
-  public void testReadElementsFromStartToEndEmptyRange() throws Exception {
-    runTestReadInMemory(
-        Arrays.asList(33, 44, 55, 66, 77, 88),
-        2,
-        2,
-        Arrays.<Integer>asList(),
-        Arrays.<Integer>asList(),
-        BigEndianIntegerCoder.of());
-  }
-
-  @Test
-  public void testReadNoElements() throws Exception {
-    runTestReadInMemory(Arrays.<Integer>asList(), null, null, Arrays.<Integer>asList(),
-        Arrays.<Integer>asList(), BigEndianIntegerCoder.of());
-  }
-
-  @Test
-  public void testReadNoElementsFromStartToEndEmptyRange() throws Exception {
-    runTestReadInMemory(
-        Arrays.<Integer>asList(),
-        0,
-        0,
-        Arrays.<Integer>asList(),
-        Arrays.<Integer>asList(),
-        BigEndianIntegerCoder.of());
-  }
-
-  @Test
-  public void testProgressReporting() throws Exception {
-    List<Integer> elements = Arrays.asList(33, 44, 55, 66, 77, 88);
-    // Should initially read elements at indices: 44@1, 55@2, 66@3, 77@4
-
-    Coder<Integer> coder = BigEndianIntegerCoder.of();
-    InMemoryReader<Integer> inMemoryReader =
-        new InMemoryReader<>(encodedElements(elements, coder), 1, 4, coder);
-    try (InMemoryReader<Integer>.InMemoryReaderIterator iterator = inMemoryReader.iterator()) {
-      assertNull(iterator.getProgress());
-      assertEquals(3, iterator.getRemainingParallelism(), 0.0);
-
-      assertTrue(iterator.start());
-      assertEquals(positionAtIndex(1L), positionFromProgress(iterator.getProgress()));
-      assertEquals(3, iterator.getRemainingParallelism(), 0.0);
-
-      assertTrue(iterator.advance());
-      assertEquals(positionAtIndex(2L), positionFromProgress(iterator.getProgress()));
-      assertEquals(2, iterator.getRemainingParallelism(), 0.0);
-
-      assertTrue(iterator.advance());
-      assertEquals(positionAtIndex(3L), positionFromProgress(iterator.getProgress()));
-      assertEquals(1, iterator.getRemainingParallelism(), 0.0);
-
-      assertFalse(iterator.advance());
-    }
-  }
-
-  @Test
-  public void testDynamicSplit() throws Exception {
-    List<Integer> elements = Arrays.asList(33, 44, 55, 66, 77, 88);
-    // Should initially read elements at indices: 44@1, 55@2, 66@3, 77@4
-
-    Coder<Integer> coder = BigEndianIntegerCoder.of();
-    InMemoryReader<Integer> inMemoryReader =
-        new InMemoryReader<>(encodedElements(elements, coder), 1, 4, coder);
-
-    // Unstarted iterator.
-    try (InMemoryReader<Integer>.InMemoryReaderIterator iterator = inMemoryReader.iterator()) {
-      assertNull(iterator.requestDynamicSplit(splitRequestAtIndex(3L)));
-    }
-
-    // Illegal proposed split position.
-    try (InMemoryReader<Integer>.InMemoryReaderIterator iterator = inMemoryReader.iterator()) {
-      assertNull(iterator.requestDynamicSplit(splitRequestAtIndex(3L)));
-      // Poke the iterator so that we can test dynamic splitting.
-      assertTrue(iterator.start());
-      assertNull(iterator.requestDynamicSplit(toDynamicSplitRequest(
-          new ApproximateSplitRequest())));
-      assertNull(iterator.requestDynamicSplit(splitRequestAtIndex(null)));
-    }
-
-    // Successful update.
-    try (InMemoryReader<Integer>.InMemoryReaderIterator iterator = inMemoryReader.iterator()) {
-      // Poke the iterator so that we can test dynamic splitting.
-      assertTrue(iterator.start());
-      NativeReader.DynamicSplitResult dynamicSplitResult =
-          iterator.requestDynamicSplit(splitRequestAtIndex(3L));
-      assertEquals(positionAtIndex(3L), positionFromSplitResult(dynamicSplitResult));
-      assertEquals(3, iterator.tracker.getStopPosition().longValue());
-      assertEquals(44, iterator.getCurrent().intValue());
-      assertTrue(iterator.advance());
-      assertEquals(55, iterator.getCurrent().intValue());
-      assertFalse(iterator.advance());
-    }
-
-    // Proposed split position is before the current position, no update.
-    try (InMemoryReader<Integer>.InMemoryReaderIterator iterator = inMemoryReader.iterator()) {
-      // Poke the iterator so that we can test dynamic splitting.
-      assertTrue(iterator.start());
-      assertEquals(44, iterator.getCurrent().intValue());
-      assertTrue(iterator.advance());
-      assertEquals(55, iterator.getCurrent().intValue());
-      assertTrue(iterator.advance()); // Returns true => we promised to return 66.
-      // Now we have to refuse the split.
-      assertNull(iterator.requestDynamicSplit(splitRequestAtIndex(3L)));
-      assertEquals(4, iterator.tracker.getStopPosition().longValue());
-      assertEquals(66, iterator.getCurrent().intValue());
-      assertFalse(iterator.advance());
-    }
-
-    // Proposed split position is after the current stop (end) position, no update.
-    try (InMemoryReader<Integer>.InMemoryReaderIterator iterator = inMemoryReader.iterator()) {
-      // Poke the iterator so that we can test dynamic splitting.
-      assertTrue(iterator.start());
-      assertNull(iterator.requestDynamicSplit(splitRequestAtIndex(5L)));
-      assertEquals(4, iterator.tracker.getStopPosition().longValue());
-    }
-  }
-
-  @Test
-  public void testParallelism() throws Exception {
-    List<Integer> elements = Arrays.asList(33, 44, 55, 66, 77, 88);
-
-    Coder<Integer> coder = BigEndianIntegerCoder.of();
-    InMemoryReader<Integer> inMemoryReader =
-        new InMemoryReader<>(encodedElements(elements, coder), 1, 4, coder);
-    int count = 0;
-    try (InMemoryReader<Integer>.InMemoryReaderIterator iterator = inMemoryReader.iterator()) {
-      for (boolean more = iterator.start(); more; more = iterator.advance()) {
-        assertTrue(iterator.getRemainingParallelism() >= 1);
-        assertEquals(3 - count, iterator.getRemainingParallelism(), 0 /*tolerance*/);
-        count++;
-      }
-    }
-    assertEquals(count, inMemoryReader.getTotalParallelism(), 0 /*tolerance*/);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormatTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormatTest.java
deleted file mode 100644
index 36875040be99a..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormatTest.java
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.hamcrest.Matchers.allOf;
-import static org.hamcrest.Matchers.containsString;
-import static org.hamcrest.Matchers.not;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotEquals;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-
-import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.Footer;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.FooterCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecord;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecordCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmShard;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmShardCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.KeyPrefix;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.KeyPrefixCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.MetadataKeyCoder;
-import com.google.cloud.dataflow.sdk.testing.CoderProperties;
-import com.google.cloud.dataflow.sdk.testing.CoderPropertiesTest.NonDeterministicCoder;
-import com.google.common.collect.ImmutableList;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.rules.TemporaryFolder;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-
-/**
- * Tests for {@link IsmFormat}.
- */
-@RunWith(JUnit4.class)
-public class IsmFormatTest {
-  private static final Coder<String> NON_DETERMINISTIC_CODER = new NonDeterministicCoder();
-  @Rule public ExpectedException expectedException = ExpectedException.none();
-  @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
-
-  @Test
-  public void testUsingNonDeterministicShardKeyCoder() throws Exception {
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("is expected to be deterministic");
-    IsmFormat.validateCoderIsCompatible(IsmRecordCoder.of(
-        1, // number or shard key coders for value records
-        0, // number of shard key coders for metadata records
-        ImmutableList.<Coder<?>>of(NON_DETERMINISTIC_CODER, ByteArrayCoder.of()),
-        ByteArrayCoder.of()));
-  }
-
-  @Test
-  public void testUsingNonDeterministicNonShardKeyCoder() throws Exception {
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("is expected to be deterministic");
-    IsmFormat.validateCoderIsCompatible(IsmRecordCoder.of(
-        1, // number or shard key coders for value records
-        0, // number of shard key coders for metadata records
-        ImmutableList.<Coder<?>>of(ByteArrayCoder.of(), NON_DETERMINISTIC_CODER),
-        ByteArrayCoder.of()));
-  }
-
-
-  @Test
-  public void testKeyPrefixCoder() throws Exception {
-    KeyPrefix keyPrefixA = new KeyPrefix(5, 7);
-    KeyPrefix keyPrefixB = new KeyPrefix(5, 7);
-    CoderProperties.coderDecodeEncodeEqual(KeyPrefixCoder.of(), keyPrefixA);
-    CoderProperties.coderDeterministic(KeyPrefixCoder.of(), keyPrefixA, keyPrefixB);
-    CoderProperties.coderConsistentWithEquals(KeyPrefixCoder.of(), keyPrefixA, keyPrefixB);
-    CoderProperties.coderSerializable(KeyPrefixCoder.of());
-    CoderProperties.structuralValueConsistentWithEquals(
-        KeyPrefixCoder.of(), keyPrefixA, keyPrefixB);
-    assertTrue(KeyPrefixCoder.of().isRegisterByteSizeObserverCheap(keyPrefixA, Context.NESTED));
-    assertTrue(KeyPrefixCoder.of().isRegisterByteSizeObserverCheap(keyPrefixA, Context.OUTER));
-    assertEquals(2, KeyPrefixCoder.of().getEncodedElementByteSize(keyPrefixA, Context.NESTED));
-    assertEquals(2, KeyPrefixCoder.of().getEncodedElementByteSize(keyPrefixA, Context.OUTER));
-  }
-
-  @Test
-  public void testFooterCoder() throws Exception {
-    Footer footerA = new Footer(1, 2, 3);
-    Footer footerB = new Footer(1, 2, 3);
-    CoderProperties.coderDecodeEncodeEqual(FooterCoder.of(), footerA);
-    CoderProperties.coderDeterministic(FooterCoder.of(), footerA, footerB);
-    CoderProperties.coderConsistentWithEquals(FooterCoder.of(), footerA, footerB);
-    CoderProperties.coderSerializable(FooterCoder.of());
-    CoderProperties.structuralValueConsistentWithEquals(FooterCoder.of(), footerA, footerB);
-    assertTrue(FooterCoder.of().isRegisterByteSizeObserverCheap(footerA, Context.NESTED));
-    assertTrue(FooterCoder.of().isRegisterByteSizeObserverCheap(footerA, Context.OUTER));
-    assertEquals(25, FooterCoder.of().getEncodedElementByteSize(footerA, Context.NESTED));
-    assertEquals(25, FooterCoder.of().getEncodedElementByteSize(footerA, Context.OUTER));
-  }
-
-  @Test
-  public void testNormalIsmRecordWithMetadataKeyIsError() throws Exception {
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("Expected key components to not contain metadata key");
-    IsmRecord.of(ImmutableList.of(IsmFormat.getMetadataKey()), "test");
-  }
-
-  @Test
-  public void testMetadataIsmRecordWithoutMetadataKeyIsError() throws Exception {
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("Expected key components to contain metadata key");
-    IsmRecord.meta(ImmutableList.of("non-metadata key"), "test".getBytes());
-  }
-
-  @Test
-  public void testIsmRecordCoder() throws Exception {
-    IsmRecord<String> ismRecordA = IsmRecord.of(ImmutableList.of("0"), "1");
-    IsmRecord<String> ismRecordB = IsmRecord.of(ImmutableList.of("0"), "1");
-    IsmRecord<String> ismMetaRecordA =
-        IsmRecord.meta(ImmutableList.of(IsmFormat.getMetadataKey()), "2".getBytes());
-    IsmRecord<String> ismMetaRecordB =
-        IsmRecord.meta(ImmutableList.of(IsmFormat.getMetadataKey()), "2".getBytes());
-    IsmRecordCoder<String> coder =
-        IsmRecordCoder.of(
-            1,
-            0,
-            ImmutableList.<Coder<?>>of(StringUtf8Coder.of()),
-            StringUtf8Coder.of());
-    IsmRecordCoder<String> coderWithMetadata =
-        IsmRecordCoder.of(
-            1,
-            1,
-            ImmutableList.<Coder<?>>of(MetadataKeyCoder.of(StringUtf8Coder.of())),
-            StringUtf8Coder.of());
-
-    // Non-metadata records against coder without metadata key support
-    CoderProperties.coderDecodeEncodeEqual(coder, ismRecordA);
-    CoderProperties.coderDeterministic(coder, ismRecordA, ismRecordB);
-    CoderProperties.coderConsistentWithEquals(coder, ismRecordA, ismRecordB);
-    CoderProperties.coderSerializable(coder);
-    CoderProperties.structuralValueConsistentWithEquals(coder, ismRecordA, ismRecordB);
-
-    // Non-metadata records against coder with metadata key support
-    CoderProperties.coderDecodeEncodeEqual(coderWithMetadata, ismRecordA);
-    CoderProperties.coderDeterministic(coderWithMetadata, ismRecordA, ismRecordB);
-    CoderProperties.coderConsistentWithEquals(coderWithMetadata, ismRecordA, ismRecordB);
-    CoderProperties.coderSerializable(coderWithMetadata);
-    CoderProperties.structuralValueConsistentWithEquals(coderWithMetadata, ismRecordA, ismRecordB);
-
-    // Metadata records
-    CoderProperties.coderDecodeEncodeEqual(coderWithMetadata, ismMetaRecordA);
-    CoderProperties.coderDeterministic(coderWithMetadata, ismMetaRecordA, ismMetaRecordB);
-    CoderProperties.coderConsistentWithEquals(
-        coderWithMetadata, ismMetaRecordA, ismMetaRecordB);
-    CoderProperties.coderSerializable(coderWithMetadata);
-    CoderProperties.structuralValueConsistentWithEquals(
-        coderWithMetadata, ismMetaRecordA, ismMetaRecordB);
-  }
-
-  @Test
-  public void testIsmRecordCoderHashWithinExpectedRanges() throws Exception {
-    IsmRecordCoder<String> coder =
-        IsmRecordCoder.of(
-            2,
-            0,
-            ImmutableList.<Coder<?>>of(StringUtf8Coder.of(), StringUtf8Coder.of()),
-            StringUtf8Coder.of());
-    IsmRecordCoder<String> coderWithMetadata =
-        IsmRecordCoder.of(
-            2,
-            2,
-            ImmutableList.<Coder<?>>of(
-                MetadataKeyCoder.of(StringUtf8Coder.of()), StringUtf8Coder.of()),
-            StringUtf8Coder.of());
-
-    assertTrue(coder.hash(ImmutableList.of("A", "B")) < IsmFormat.SHARD_BITS + 1);
-    int hash = coderWithMetadata.hash(ImmutableList.of(IsmFormat.getMetadataKey(), "B"));
-    assertTrue(hash > IsmFormat.SHARD_BITS && hash < (IsmFormat.SHARD_BITS + 1) * 2);
-  }
-
-  @Test
-  public void testIsmRecordCoderWithTooManyKeysIsError() throws Exception {
-    IsmRecordCoder<String> coder =
-        IsmRecordCoder.of(
-            2,
-            0,
-            ImmutableList.<Coder<?>>of(StringUtf8Coder.of(), StringUtf8Coder.of()),
-            StringUtf8Coder.of());
-
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("Expected at most");
-    coder.hash(ImmutableList.of("A", "B", "C"));
-  }
-
-  @Test
-  public void testIsmRecordCoderHashWithoutEnoughKeysIsError() throws Exception {
-    IsmRecordCoder<String> coder =
-        IsmRecordCoder.of(
-            2,
-            0,
-            ImmutableList.<Coder<?>>of(StringUtf8Coder.of(), StringUtf8Coder.of()),
-            StringUtf8Coder.of());
-
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("Expected at least");
-    coder.hash(ImmutableList.of("A"));
-  }
-
-  @Test
-  public void testIsmRecordCoderMetadataHashWithoutEnoughKeysIsError() throws Exception {
-    IsmRecordCoder<String> coderWithMetadata =
-        IsmRecordCoder.of(
-            2,
-            2,
-            ImmutableList.<Coder<?>>of(
-                MetadataKeyCoder.of(StringUtf8Coder.of()), StringUtf8Coder.of()),
-            StringUtf8Coder.of());
-
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("Expected at least");
-    coderWithMetadata.hash(ImmutableList.of(IsmFormat.getMetadataKey()));
-  }
-
-
-  @Test
-  public void testIsmRecordCoderKeyCoderCountMismatch() throws Exception {
-    IsmRecord<String> ismRecord = IsmRecord.of(ImmutableList.of("0", "too many"), "1");
-    IsmRecordCoder<String> coder =
-        IsmRecordCoder.of(
-            1,
-            0,
-            ImmutableList.<Coder<?>>of(StringUtf8Coder.of()),
-            StringUtf8Coder.of());
-
-    expectedException.expect(CoderException.class);
-    expectedException.expectMessage("Expected 1 key component(s) but received key");
-    coder.encode(ismRecord, new ByteArrayOutputStream(), Context.NESTED);
-  }
-
-  @Test
-  public void testIsmRecordToStringEqualsAndHashCode() {
-    IsmRecord<String> ismRecordA = IsmRecord.of(ImmutableList.of("0"), "1");
-    IsmRecord<String> ismRecordB = IsmRecord.of(ImmutableList.of("0"), "1");
-    IsmRecord<String> ismRecordC = IsmRecord.of(ImmutableList.of("3"), "4");
-    IsmRecord<String> ismRecordAWithMeta =
-        IsmRecord.meta(ImmutableList.of(IsmFormat.getMetadataKey(), "0"), "2".getBytes());
-    IsmRecord<String> ismRecordBWithMeta =
-        IsmRecord.meta(ImmutableList.of(IsmFormat.getMetadataKey(), "0"), "2".getBytes());
-    IsmRecord<String> ismRecordCWithMeta =
-        IsmRecord.meta(ImmutableList.of(IsmFormat.getMetadataKey(), "0"), "5".getBytes());
-
-    assertEquals(ismRecordA, ismRecordB);
-    assertEquals(ismRecordAWithMeta, ismRecordBWithMeta);
-    assertNotEquals(ismRecordA, ismRecordAWithMeta);
-    assertNotEquals(ismRecordA, ismRecordC);
-    assertNotEquals(ismRecordAWithMeta, ismRecordCWithMeta);
-
-    assertEquals(ismRecordA.hashCode(), ismRecordB.hashCode());
-    assertEquals(ismRecordAWithMeta.hashCode(), ismRecordBWithMeta.hashCode());
-    assertNotEquals(ismRecordA.hashCode(), ismRecordAWithMeta.hashCode());
-    assertNotEquals(ismRecordA.hashCode(), ismRecordC.hashCode());
-    assertNotEquals(ismRecordAWithMeta.hashCode(), ismRecordCWithMeta.hashCode());
-
-    assertThat(ismRecordA.toString(), allOf(
-        containsString("keyComponents=[0]"),
-        containsString("value=1"),
-        not(containsString("metadata"))));
-    assertThat(ismRecordAWithMeta.toString(), allOf(
-        containsString("keyComponents=[META, 0]"),
-        containsString("metadata="),
-        not(containsString("value"))));
-  }
-
-  @Test
-  public void testIsmShardCoder() throws Exception {
-    IsmShard shardA = IsmShard.of(1, 2, 3);
-    IsmShard shardB = IsmShard.of(1, 2, 3);
-    CoderProperties.coderDecodeEncodeEqual(IsmShardCoder.of(), shardA);
-    CoderProperties.coderDeterministic(IsmShardCoder.of(), shardA, shardB);
-    CoderProperties.coderConsistentWithEquals(IsmShardCoder.of(), shardA, shardB);
-    CoderProperties.coderSerializable(IsmShardCoder.of());
-    CoderProperties.structuralValueConsistentWithEquals(IsmShardCoder.of(), shardA, shardB);
-  }
-
-  @Test
-  public void testIsmShardToStringEqualsAndHashCode() {
-    IsmShard shardA = IsmShard.of(1, 2, 3);
-    IsmShard shardB = IsmShard.of(1, 2, 3);
-    IsmShard shardC = IsmShard.of(4, 5, 6);
-    assertEquals(shardA, shardB);
-    assertNotEquals(shardA, shardC);
-    assertEquals(shardA.hashCode(), shardB.hashCode());
-    assertNotEquals(shardA.hashCode(), shardC.hashCode());
-    assertThat(shardA.toString(), allOf(
-        containsString("id=1"),
-        containsString("blockOffset=2"),
-        containsString("indexOffset=3")));
-  }
-
-  @Test
-  public void testUnknownVersion() throws Exception {
-    byte[] data = new byte[25];
-    data[24] = 5; // unknown version
-    ByteArrayInputStream bais = new ByteArrayInputStream(data);
-
-    expectedException.expect(IOException.class);
-    expectedException.expectMessage("Unknown version 5");
-    FooterCoder.of().decode(bais, Context.OUTER);
-  }
-}
-
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReaderTest.java
deleted file mode 100644
index 3ae91641e1a84..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReaderTest.java
+++ /dev/null
@@ -1,583 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.WindowedValue.valueInEmptyWindows;
-import static com.google.common.base.Preconditions.checkState;
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecord;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecordCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.MetadataKeyCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmReader.IsmShardKey;
-import com.google.cloud.dataflow.sdk.testing.CoderPropertiesTest.NonDeterministicCoder;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.RandomAccessData;
-import com.google.cloud.dataflow.sdk.util.WeightedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReaderObserver;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink.SinkWriter;
-import com.google.common.base.Function;
-import com.google.common.base.Predicate;
-import com.google.common.base.Predicates;
-import com.google.common.cache.Cache;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.collect.FluentIterable;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Iterables;
-import com.google.common.primitives.UnsignedBytes;
-
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.rules.TemporaryFolder;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.File;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.NavigableMap;
-import java.util.NavigableSet;
-import java.util.NoSuchElementException;
-import java.util.Random;
-import java.util.SortedMap;
-import java.util.SortedSet;
-import java.util.TreeMap;
-import java.util.TreeSet;
-
-/** Tests for {@link IsmReader}. */
-@RunWith(JUnit4.class)
-public class IsmReaderTest {
-  private static final int TEST_BLOCK_SIZE = 1024;
-  private static final IsmRecordCoder<byte[]> CODER =
-      IsmRecordCoder.of(
-          1, // number or shard key coders for value records
-          1, // number of shard key coders for metadata records
-          ImmutableList.<Coder<?>>of(MetadataKeyCoder.of(ByteArrayCoder.of()), ByteArrayCoder.of()),
-          ByteArrayCoder.of());
-
-  private static final Coder<String> NON_DETERMINISTIC_CODER = new NonDeterministicCoder();
-  private static final byte[] EMPTY = new byte[0];
-
-  @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
-  @Rule public ExpectedException expectedException = ExpectedException.none();
-
-  private Cache<IsmShardKey, WeightedValue<NavigableMap<RandomAccessData,
-                                                        WindowedValue<IsmRecord<byte[]>>>>> cache;
-
-  @Before
-  public void setUp() {
-   cache = CacheBuilder
-       .newBuilder()
-       .weigher(Weighers.fixedWeightKeys(1))
-       .maximumWeight(10_000)
-       .build();
-  }
-
-  @Test
-  public void testReadEmpty() throws Exception {
-    writeElementsToFileAndReadInOrder(Collections.<IsmRecord<byte[]>>emptyList());
-  }
-
-  @Test
-  public void testUsingNonDeterministicShardKeyCoder() throws Exception {
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("is expected to be deterministic");
-    new IsmReader<>(
-        tmpFolder.newFile().getPath(),
-        IsmRecordCoder.of(
-            1, // number or shard key coders for value records
-            0, // number of shard key coders for metadata records
-            ImmutableList.<Coder<?>>of(NON_DETERMINISTIC_CODER, ByteArrayCoder.of()),
-            ByteArrayCoder.of()),
-        cache);
-  }
-
-  @Test
-  public void testUsingNonDeterministicNonShardKeyCoder() throws Exception {
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("is expected to be deterministic");
-    new IsmReader<>(
-        tmpFolder.newFile().getPath(),
-            IsmRecordCoder.of(
-            1, // number or shard key coders for value records
-            0, // number of shard key coders for metadata records
-            ImmutableList.<Coder<?>>of(ByteArrayCoder.of(), NON_DETERMINISTIC_CODER),
-            ByteArrayCoder.of()),
-        cache);
-  }
-
-  @Test
-  public void testRead() throws Exception {
-    Random random = new Random(23498321490L);
-    for (int i : Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) {
-      int minElements = (int) Math.pow(2, i);
-      int valueSize = 128;
-      // Generates between 2^i and 2^(i + 1) elements.
-      writeElementsToFileAndReadInOrder(dataGenerator(8 /* number of primary keys */,
-          minElements + random.nextInt(minElements) /* number of secondary keys */,
-          8 /* max key size */, valueSize));
-    }
-  }
-
-  @Test
-  public void testReadThatProducesIndexEntries() throws Exception {
-    Random random = new Random(23498323891L);
-    int minElements = (int) Math.pow(2, 6);
-    int valueSize = 128;
-    // Since we are generating more then 2 blocks worth of data, we are guaranteed that
-    // at least one index entry is generated per shard.
-    checkState(minElements * valueSize > 2 * TEST_BLOCK_SIZE);
-    writeElementsToFileAndReadInOrder(dataGenerator(8 /* number of primary keys */,
-        minElements + random.nextInt(minElements) /* number of secondary keys */,
-        8 /* max key size */, valueSize /* max value size */));
-  }
-
-  @Test
-  public void testReadRandomOrder() throws Exception {
-    Random random = new Random(2348238943L);
-    for (int i : Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8)) {
-      int minElements = (int) Math.pow(2, i);
-      int valueSize = 128;
-      // Generates between 2^i and 2^(i + 1) elements.
-      writeElementsToFileAndReadInRandomOrder(dataGenerator(7 /* number of primary keys */,
-          minElements + random.nextInt(minElements) /* number of secondary keys */,
-          8 /* max key size */, valueSize /* max value size */));
-    }
-  }
-
-  @Test
-  public void testGetLastWithPrefix() throws Exception {
-    Random random = new Random(2348238943L);
-    for (int i : Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) {
-      int minElements = (int) Math.pow(2, i);
-      int valueSize = 128;
-      // Generates between 2^i and 2^(i + 1) elements.
-      writeElementsToFileAndFindLastElementPerPrimaryKey(
-          dataGenerator(7, minElements + random.nextInt(minElements),
-              8 /* max key size */, valueSize /* max value size */));
-    }
-  }
-
-  @Test
-  public void testReadMissingKeys() throws Exception {
-    File tmpFile = tmpFolder.newFile();
-    List<IsmRecord<byte[]>> data = new ArrayList<>();
-    data.add(IsmRecord.<byte[]>of(ImmutableList.of(EMPTY, new byte[]{ 0x04 }), EMPTY));
-    data.add(IsmRecord.<byte[]>of(ImmutableList.of(EMPTY, new byte[]{ 0x08 }), EMPTY));
-    writeElementsToFile(data, tmpFile);
-
-    IsmReader<byte[]> reader = new IsmReader<byte[]>(tmpFile.getAbsolutePath(), CODER, cache);
-
-    // Check that we got false with a key before all keys contained in the file.
-    assertFalse(reader.overKeyComponents(ImmutableList.of(EMPTY, new byte[]{ 0x02 })).hasNext());
-    // Check that we got false with a key between two other keys contained in the file.
-    assertFalse(reader.overKeyComponents(ImmutableList.of(EMPTY, new byte[]{ 0x06 })).hasNext());
-    // Check that we got false with a key that is after all keys contained in the file.
-    assertFalse(reader.overKeyComponents(ImmutableList.of(EMPTY, new byte[]{ 0x10 })).hasNext());
-  }
-
-  @Test
-  public void testReadMissingKeysBypassingBloomFilter() throws Exception {
-    File tmpFile = tmpFolder.newFile();
-    List<IsmRecord<byte[]>> data = new ArrayList<>();
-    data.add(IsmRecord.<byte[]>of(ImmutableList.of(EMPTY, new byte[]{ 0x04 }), EMPTY));
-    data.add(IsmRecord.<byte[]>of(ImmutableList.of(EMPTY, new byte[]{ 0x08 }), EMPTY));
-    writeElementsToFile(data, tmpFile);
-
-    IsmReader<byte[]> reader =
-        new IsmReader<byte[]>(tmpFile.getAbsolutePath(), CODER, cache) {
-      // We use this override to get around the Bloom filter saying that the key doesn't exist.
-      @Override
-      boolean bloomFilterMightContain(RandomAccessData keyBytes) {
-        return true;
-      }
-    };
-
-    // Check that we got false with a key before all keys contained in the file.
-    assertFalse(reader.overKeyComponents(ImmutableList.of(EMPTY, new byte[]{ 0x02 })).hasNext());
-    // Check that we got false with a key between two other keys contained in the file.
-    assertFalse(reader.overKeyComponents(ImmutableList.of(EMPTY, new byte[]{ 0x06 })).hasNext());
-    // Check that we got false with a key that is after all keys contained in the file.
-    assertFalse(reader.overKeyComponents(ImmutableList.of(EMPTY, new byte[]{ 0x10 })).hasNext());
-  }
-
-  @Test
-  public void testReadKeyThatEncodesToEmptyByteArray() throws Exception {
-    File tmpFile = tmpFolder.newFile();
-    IsmRecordCoder<Void> coder = IsmRecordCoder.of(
-        1, 0, ImmutableList.<Coder<?>>of(VoidCoder.of()), VoidCoder.of());
-    IsmSink<Void> sink = new IsmSink<>(
-        tmpFile.getPath(), coder);
-    IsmRecord<Void> element = IsmRecord.of(Arrays.asList((Void) null), (Void) null);
-    try (SinkWriter<WindowedValue<IsmRecord<Void>>> writer =
-        sink.writer()) {
-      writer.add(valueInEmptyWindows(element));
-    }
-
-    Cache<IsmShardKey, WeightedValue<NavigableMap<RandomAccessData,
-                                                  WindowedValue<IsmRecord<Void>>>>> cache =
-                                                  CacheBuilder
-                                                      .newBuilder()
-                                                      .weigher(Weighers.fixedWeightKeys(1))
-                                                      .maximumWeight(10_000)
-                                                      .build();
-    IsmReader<Void> reader = new IsmReader<>(tmpFile.getAbsolutePath(), coder, cache);
-    assertEquals(coder.structuralValue(element),
-        coder.structuralValue(reader.iterator().next().getValue()));
-  }
-
-  /** Write input elements to the specified file. */
-  static void writeElementsToFile(
-      Iterable<IsmRecord<byte[]>> elements, File tmpFile) throws Exception {
-    IsmSink<byte[]> sink =
-        new IsmSink<byte[]>(tmpFile.getPath(), CODER) {
-          @Override
-          long getBlockSize() {
-            return TEST_BLOCK_SIZE;
-          }
-        };
-
-    try (SinkWriter<WindowedValue<IsmRecord<byte[]>>> writer =
-        sink.writer()) {
-      for (IsmRecord<byte[]> element : elements) {
-        writer.add(valueInEmptyWindows(element));
-      }
-    }
-  }
-
-  /**
-   * Writes elements to an Ism file using an IsmSink. Then reads them back with an IsmReader,
-   * verifying the values read match those that were written.
-   */
-  private void writeElementsToFileAndReadInOrder(Iterable<IsmRecord<byte[]>> elements)
-      throws Exception {
-    File tmpFile = tmpFolder.newFile();
-    writeElementsToFile(elements, tmpFile);
-    IsmReader<byte[]> reader = new IsmReader<>(tmpFile.getAbsolutePath(), CODER, cache);
-    TestReaderObserver observer = new TestReaderObserver(reader);
-    reader.addObserver(observer);
-
-    Iterator<IsmRecord<byte[]>> elementsIterator = elements.iterator();
-    try (NativeReader.LegacyReaderIterator<WindowedValue<IsmRecord<byte[]>>> iterator =
-        reader.iterator()) {
-
-      while (iterator.hasNext() && elementsIterator.hasNext()) {
-        IsmRecord<byte[]> expected = elementsIterator.next();
-        IsmRecord<byte[]> actual = iterator.next().getValue();
-        assertIsmEquals(actual, expected);
-
-        final int expectedLength;
-        if (IsmFormat.isMetadataKey(expected.getKeyComponents())) {
-          expectedLength = expected.getMetadata().length;
-        } else {
-          expectedLength = expected.getValue().length;
-        }
-        // Verify that the observer saw at least as many bytes as the size of the value.
-        assertTrue(expectedLength
-            <= observer.getActualSizes().get(observer.getActualSizes().size() - 1));
-
-      }
-      if (iterator.advance()) {
-        fail("Read more elements then expected, did not expect: " + iterator.getCurrent());
-      } else if (elementsIterator.hasNext()) {
-        fail("Read less elements then expected, expected: " + elementsIterator.next());
-      }
-
-      // Verify that we see a {@link NoSuchElementException} if we attempt to go further.
-      try {
-        iterator.getCurrent();
-        fail("Expected a NoSuchElementException to have been thrown.");
-      } catch (NoSuchElementException expected) {
-      }
-    }
-  }
-
-  private static void assertIsmEquals(
-      IsmRecord<byte[]> actual,
-      IsmRecord<byte[]> expected) {
-    assertEquals(expected.getKeyComponents().size(), actual.getKeyComponents().size());
-    for (int i = 0; i < expected.getKeyComponents().size(); ++i) {
-      if (actual.getKeyComponent(i) != expected.getKeyComponent(i)) {
-        assertArrayEquals((byte[]) actual.getKeyComponent(i), (byte[]) expected.getKeyComponent(i));
-      }
-    }
-
-    if (IsmFormat.isMetadataKey(expected.getKeyComponents())) {
-      assertArrayEquals(actual.getMetadata(), expected.getMetadata());
-    } else {
-      assertArrayEquals(actual.getValue(), expected.getValue());
-    }
-  }
-
-  /**
-   * A predicate which filters elements on whether the second key's last byte is odd or even.
-   * Allows for a stable partitioning of generated data.
-   */
-  private static class EvenFilter implements Predicate<IsmRecord<byte[]>> {
-    private static final EvenFilter INSTANCE = new EvenFilter();
-
-    @Override
-    public boolean apply(IsmRecord<byte[]> input) {
-      byte[] secondKey = (byte[]) input.getKeyComponent(1);
-      return secondKey[secondKey.length - 1] % 2 == 0;
-    }
-  }
-
-  /**
-   * Writes elements to an Ism file using an IsmSink. Then reads them back with an IsmReader
-   * using a random order.
-   */
-  private void writeElementsToFileAndReadInRandomOrder(Iterable<IsmRecord<byte[]>> elements)
-      throws Exception {
-    File tmpFile = tmpFolder.newFile();
-    List<IsmRecord<byte[]>> oddSecondaryKeys = new ArrayList<>(
-        ImmutableList.copyOf(Iterables.filter(elements, Predicates.not(EvenFilter.INSTANCE))));
-    List<IsmRecord<byte[]>> evenSecondaryKeys = new ArrayList<>(
-        ImmutableList.copyOf(Iterables.filter(elements, EvenFilter.INSTANCE)));
-
-    writeElementsToFile(oddSecondaryKeys, tmpFile);
-    IsmReader<byte[]> reader = new IsmReader<>(tmpFile.getAbsolutePath(), CODER, cache);
-
-    // Test using next() for a within shard Ism prefix reader iterator
-    Collections.shuffle(oddSecondaryKeys);
-    for (IsmRecord<byte[]> expectedNext : oddSecondaryKeys) {
-      assertIsmEquals(reader.overKeyComponents(
-          expectedNext.getKeyComponents()).next().getValue(), expectedNext);
-    }
-
-    Collections.shuffle(oddSecondaryKeys);
-    // Test using get() for a shard aware Ism prefix reader
-    IsmReader<byte[]>.IsmPrefixReaderIterator readerIterator =
-        reader.overKeyComponents(ImmutableList.of());
-    for (IsmRecord<byte[]> expectedNext : oddSecondaryKeys) {
-      assertIsmEquals(readerIterator.get(expectedNext.getKeyComponents()).getValue(), expectedNext);
-    }
-
-    // Test using next() for a within shard Ism prefix reader iterator
-    Collections.shuffle(evenSecondaryKeys);
-    for (IsmRecord<byte[]> missingNext : evenSecondaryKeys) {
-      assertFalse(reader.overKeyComponents(missingNext.getKeyComponents()).hasNext());
-    }
-
-    Collections.shuffle(evenSecondaryKeys);
-    // Test using get() for a shard aware Ism prefix reader
-    readerIterator = reader.overKeyComponents(ImmutableList.of());
-    for (IsmRecord<byte[]> missingNext : evenSecondaryKeys) {
-      assertNull(readerIterator.get(missingNext.getKeyComponents()));
-    }
-  }
-
-  private void writeElementsToFileAndFindLastElementPerPrimaryKey(
-      Iterable<IsmRecord<byte[]>> elements) throws Exception {
-    File tmpFile = tmpFolder.newFile();
-    Iterable<IsmRecord<byte[]>> oddValues =
-        Iterables.filter(elements, Predicates.not(EvenFilter.INSTANCE));
-    Iterable<IsmRecord<byte[]>> evenValues =
-        Iterables.filter(elements, EvenFilter.INSTANCE);
-    writeElementsToFile(oddValues, tmpFile);
-    IsmReader<byte[]> reader = new IsmReader<>(tmpFile.getAbsolutePath(), CODER, cache);
-
-    SortedMap<byte[], NavigableSet<IsmRecord<byte[]>>> sortedBySecondKey =
-        new TreeMap<>(UnsignedBytes.lexicographicalComparator());
-    for (IsmRecord<byte[]> element : oddValues) {
-      byte[] encodedPrimaryKey =
-          CoderUtils.encodeToByteArray(CODER.getKeyComponentCoder(0), element.getKeyComponent(0));
-      if (!sortedBySecondKey.containsKey(encodedPrimaryKey)) {
-        sortedBySecondKey.put(
-            encodedPrimaryKey, new TreeSet<>(new IsmRecordKeyComparator<>(CODER)));
-      }
-      sortedBySecondKey.get(encodedPrimaryKey).add(element);
-    }
-
-    // The returned value should have the element as a prefix of itself.
-    for (IsmRecord<byte[]> element : oddValues) {
-      byte[] encodedPrimaryKey =
-          CoderUtils.encodeToByteArray(CODER.getKeyComponentCoder(0), element.getKeyComponent(0));
-      assertIsmEquals(
-          reader.overKeyComponents(
-              ImmutableList.of(element.getKeyComponent(0))).getLast().getValue(),
-          sortedBySecondKey.get(encodedPrimaryKey).last());
-    }
-
-    // The returned value should always have the element as a prefix of itself or not exist.
-    for (IsmRecord<byte[]> element : evenValues) {
-      byte[] encodedPrimaryKey =
-          CoderUtils.encodeToByteArray(CODER.getKeyComponentCoder(0), element.getKeyComponent(0));
-      IsmReader<byte[]>.IsmPrefixReaderIterator readerIterator =
-          reader.overKeyComponents(ImmutableList.of(element.getKeyComponent(0)));
-      WindowedValue<IsmRecord<byte[]>> lastWindowedValue = readerIterator.getLast();
-      if (lastWindowedValue != null) {
-        assertIsmEquals(
-            lastWindowedValue.getValue(),
-            sortedBySecondKey.get(encodedPrimaryKey).last());
-      }
-    }
-  }
-
-  static class IsmRecordKeyComparator<V> implements Comparator<IsmRecord<V>> {
-    private final IsmRecordCoder<V> coder;
-    IsmRecordKeyComparator(IsmRecordCoder<V> coder) {
-      this.coder = coder;
-    }
-
-    @Override
-    public int compare(IsmRecord<V> first, IsmRecord<V> second) {
-      RandomAccessData firstKeyBytes = new RandomAccessData();
-      coder.encodeAndHash(first.getKeyComponents(), firstKeyBytes);
-      RandomAccessData secondKeyBytes = new RandomAccessData();
-      coder.encodeAndHash(second.getKeyComponents(), secondKeyBytes);
-      return RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(
-          firstKeyBytes, secondKeyBytes);
-    }
-  }
-
-  /**
-   * Specifies the minimum key size so that we can produce a random byte array
-   * with enough of a prefix to be able to create successively larger secondary keys.
-   */
-  private static final int MIN_KEY_SIZE = 4;
-
-  /** Specifies the percentage of keys that are metadata records when using the data generator. */
-  private static final double PERCENT_METADATA_RECORDS = 0.01;
-
-  /**
-   * Creates a map from Ism shard to a sorted set of IsmRecords.
-   */
-  private Map<Integer, SortedSet<IsmRecord<byte[]>>> dataGeneratorPerShard(
-      final int numberOfPrimaryKeys,
-      final int minNumberOfSecondaryKeys,
-      final int maxKeySize,
-      final int maxValueSize) {
-    checkState(maxKeySize >= MIN_KEY_SIZE);
-
-    final Random random = new Random(minNumberOfSecondaryKeys);
-
-    Map<Integer, SortedSet<IsmRecord<byte[]>>> shardToRecordMap = new HashMap<>();
-    while (shardToRecordMap.keySet().size() < numberOfPrimaryKeys) {
-      // Generate the next primary key
-      byte[] primaryKey = new byte[random.nextInt(maxKeySize - MIN_KEY_SIZE) + MIN_KEY_SIZE];
-      random.nextBytes(primaryKey);
-      int shardId = CODER.hash(ImmutableList.of(primaryKey));
-      // Add a sorted set for the shard id if this shard id has never been generated before.
-      if (!shardToRecordMap.containsKey(shardId)) {
-        shardToRecordMap.put(shardId,
-            new TreeSet<IsmRecord<byte[]>>(new IsmRecordKeyComparator<byte[]>(CODER)));
-      }
-
-      // Generate the requested number of secondary keys using the newly generated primary key.
-      byte[] secondaryKey = new byte[maxKeySize];
-      for (int j = 0; j < minNumberOfSecondaryKeys; ++j) {
-        secondaryKey = generateNextSecondaryKey(random, maxKeySize, secondaryKey);
-
-        // Generate the value bytes.
-        byte[] value = new byte[random.nextInt(maxValueSize)];
-        random.nextBytes(value);
-
-        // 1% of keys are metadata records
-        if (random.nextFloat() < PERCENT_METADATA_RECORDS) {
-          IsmRecord<byte[]> ismRecord = IsmRecord.meta(
-              ImmutableList.of(IsmFormat.getMetadataKey(), secondaryKey), value);
-          int metadataShardId = CODER.hash(ismRecord.getKeyComponents());
-          // Add a sorted set for the shard id if this shard id has never been generated before.
-          if (!shardToRecordMap.containsKey(metadataShardId)) {
-            shardToRecordMap.put(metadataShardId,
-                new TreeSet<IsmRecord<byte[]>>(
-                    new IsmRecordKeyComparator<byte[]>(CODER)));
-          }
-          shardToRecordMap.get(metadataShardId).add(ismRecord);
-        } else {
-          IsmRecord<byte[]> ismRecord = IsmRecord.<byte[]>of(
-              ImmutableList.of(primaryKey, secondaryKey),
-              value);
-          shardToRecordMap.get(shardId).add(ismRecord);
-        }
-      }
-    }
-    return shardToRecordMap;
-  }
-
-  private byte[] generateNextSecondaryKey(
-      Random random, int maxKeySize, byte[] previousSecondaryKey) {
-    byte[] currentSecondaryKey =
-        new byte[random.nextInt(maxKeySize - MIN_KEY_SIZE) + MIN_KEY_SIZE];
-    int matchingPrefix = Math.min(currentSecondaryKey.length,
-        random.nextInt(maxKeySize - MIN_KEY_SIZE) + MIN_KEY_SIZE);
-    byte[] randomSuffix = new byte[currentSecondaryKey.length - matchingPrefix];
-    random.nextBytes(randomSuffix);
-
-    System.arraycopy(previousSecondaryKey, 0,
-        currentSecondaryKey, 0,
-        Math.min(currentSecondaryKey.length, previousSecondaryKey.length));
-    System.arraycopy(randomSuffix, 0,
-        currentSecondaryKey, matchingPrefix, randomSuffix.length);
-
-    matchingPrefix -= 1;
-    // Find the first byte which is less than 255 at the end of the matching portion.
-    while ((currentSecondaryKey[matchingPrefix] & 0xFF) == 0xFF) {
-      currentSecondaryKey[matchingPrefix] = 0;
-      matchingPrefix -= 1;
-    }
-    // Increment the last byte of the matching prefix to make sure this key is
-    // larger than the previous key.
-    currentSecondaryKey[matchingPrefix] =
-        (byte) ((currentSecondaryKey[matchingPrefix] & 0xFF) + 1);
-    return currentSecondaryKey;
-  }
-
-  /**
-   * Creates an iterable of IsmRecords grouped by shard id, and in ascending order per shard.
-   */
-  private Iterable<IsmRecord<byte[]>> dataGenerator(
-      final int numberOfPrimaryKeys,
-      final int numberOfSecondaryKeys,
-      final int approximateKeySize,
-      final int maxValueSize) {
-
-    FluentIterable<IsmRecord<byte[]>> records = FluentIterable
-        .from(dataGeneratorPerShard(
-            numberOfPrimaryKeys, numberOfSecondaryKeys,
-            approximateKeySize, maxValueSize).entrySet())
-        .transformAndConcat(
-            new Function<Entry<Integer, SortedSet<IsmRecord<byte[]>>>,
-                         Iterable<IsmRecord<byte[]>>>() {
-            @Override
-            public Iterable<IsmRecord<byte[]>> apply(
-                Entry<Integer, SortedSet<IsmRecord<byte[]>>> input) {
-              return input.getValue();
-            }
-    });
-    return records;
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkTest.java
deleted file mode 100644
index d8e3502a74493..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkTest.java
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.WindowedValue.valueInEmptyWindows;
-
-import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecord;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecordCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.MetadataKeyCoder;
-import com.google.cloud.dataflow.sdk.testing.CoderPropertiesTest.NonDeterministicCoder;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink.SinkWriter;
-import com.google.common.collect.ImmutableList;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.rules.TemporaryFolder;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for {@link IsmSink}.
- *
- * <p>Note that {@link IsmReaderTest} covers reading/writing tests. This tests
- * error cases for the {@link IsmSink}.
- */
-@RunWith(JUnit4.class)
-public class IsmSinkTest {
-  private static final IsmRecordCoder<byte[]> CODER =
-      IsmRecordCoder.of(
-          1, // number or shard key coders for value records
-          1, // number of shard key coders for metadata records
-          ImmutableList.<Coder<?>>of(MetadataKeyCoder.of(ByteArrayCoder.of()), ByteArrayCoder.of()),
-          ByteArrayCoder.of());
-  private static final byte[] EMPTY = new byte[0];
-  private static final Coder<String> NON_DETERMINISTIC_CODER = new NonDeterministicCoder();
-
-
-  @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
-  @Rule public ExpectedException expectedException = ExpectedException.none();
-
-  @Test
-  public void testWriteOutOfOrderKeysWithSameShardKeyIsError() throws Throwable {
-    IsmSink<byte[]> sink =
-        new IsmSink<>(tmpFolder.newFile().getPath(), CODER);
-    SinkWriter<WindowedValue<IsmRecord<byte[]>>> sinkWriter = sink.writer();
-    sinkWriter.add(valueInEmptyWindows(
-        IsmRecord.of(ImmutableList.of(EMPTY, new byte[]{ 0x01 }), EMPTY)));
-
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("expects keys to be written in strictly increasing order");
-    sinkWriter.add(valueInEmptyWindows(
-        IsmRecord.of(ImmutableList.of(EMPTY, new byte[]{ 0x00 }), EMPTY)));
-  }
-
-  @Test
-  public void testWriteNonContiguousShardsIsError() throws Throwable {
-    IsmSink<byte[]> sink =
-        new IsmSink<>(tmpFolder.newFile().getPath(), CODER);
-    SinkWriter<WindowedValue<IsmRecord<byte[]>>> sinkWriter = sink.writer();
-    sinkWriter.add(valueInEmptyWindows(
-        IsmRecord.of(ImmutableList.of(new byte[]{ 0x00 }, EMPTY), EMPTY)));
-    sinkWriter.add(valueInEmptyWindows(
-        IsmRecord.of(ImmutableList.of(new byte[]{ 0x01 }, EMPTY), EMPTY)));
-
-    expectedException.expect(IllegalStateException.class);
-    expectedException.expectMessage("for shard which already exists");
-    sinkWriter.add(valueInEmptyWindows(
-        IsmRecord.of(ImmutableList.of(new byte[]{ 0x00 }, EMPTY), EMPTY)));
-  }
-
-  @Test
-  public void testWriteEqualKeysIsError() throws Throwable {
-    IsmSink<byte[]> sink =
-        new IsmSink<>(tmpFolder.newFile().getPath(), CODER);
-    SinkWriter<WindowedValue<IsmRecord<byte[]>>> sinkWriter = sink.writer();
-    sinkWriter.add(valueInEmptyWindows(
-        IsmRecord.of(ImmutableList.of(EMPTY, new byte[]{ 0x01 }), EMPTY)));
-
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("expects keys to be written in strictly increasing order");
-    sinkWriter.add(valueInEmptyWindows(
-        IsmRecord.of(ImmutableList.of(EMPTY, new byte[]{ 0x01 }), EMPTY)));
-  }
-
-  @Test
-  public void testWriteKeyWhichIsProperPrefixOfPreviousSecondaryKeyIsError() throws Throwable {
-    IsmSink<byte[]> sink =
-        new IsmSink<>(tmpFolder.newFile().getPath(), CODER);
-    SinkWriter<WindowedValue<IsmRecord<byte[]>>> sinkWriter = sink.writer();
-    sinkWriter.add(valueInEmptyWindows(
-        IsmRecord.of(ImmutableList.of(EMPTY, new byte[]{ 0x00, 0x00 }), EMPTY)));
-
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("expects keys to be written in strictly increasing order");
-    sinkWriter.add(valueInEmptyWindows(
-        IsmRecord.of(ImmutableList.of(EMPTY, new byte[]{ 0x00 }), EMPTY)));
-  }
-
-  @Test
-  public void testUsingNonDeterministicShardKeyCoder() throws Exception {
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("is expected to be deterministic");
-    new IsmSink<>(tmpFolder.newFile().getPath(), IsmRecordCoder.of(
-        1,
-        0,
-        ImmutableList.<Coder<?>>of(NON_DETERMINISTIC_CODER, ByteArrayCoder.of()),
-        ByteArrayCoder.of()));
-  }
-
-  @Test
-  public void testUsingNonDeterministicNonShardKeyCoder() throws Exception {
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("is expected to be deterministic");
-    new IsmSink<>(tmpFolder.newFile().getPath(), IsmRecordCoder.of(
-        1,
-        0,
-        ImmutableList.<Coder<?>>of(ByteArrayCoder.of(), NON_DETERMINISTIC_CODER),
-        ByteArrayCoder.of()));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyTokenInvalidExceptionTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyTokenInvalidExceptionTest.java
deleted file mode 100644
index 8507fb90d92ea..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyTokenInvalidExceptionTest.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/** Tests for {@link KeyTokenInvalidException}. */
-@RunWith(JUnit4.class)
-public final class KeyTokenInvalidExceptionTest {
-  @Test
-  public void testIsKeyTokenInvalidException() throws Exception {
-    KeyTokenInvalidException exception = new KeyTokenInvalidException("test");
-    RuntimeException keyTokenCauseException = new RuntimeException("key token cause", exception);
-    assertTrue(KeyTokenInvalidException.isKeyTokenInvalidException(exception));
-    assertTrue(KeyTokenInvalidException.isKeyTokenInvalidException(keyTokenCauseException));
-    assertFalse(KeyTokenInvalidException.isKeyTokenInvalidException(
-        new RuntimeException("non key token")));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemsTest.java
deleted file mode 100644
index 8db559c14df6c..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItemsTest.java
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.junit.Assert.assertThat;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CollectionCoder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
-import com.google.cloud.dataflow.sdk.util.KeyedWorkItem;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
-import com.google.protobuf.ByteString;
-
-import org.hamcrest.Matchers;
-import org.joda.time.Instant;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.Mock;
-import org.mockito.MockitoAnnotations;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Collections;
-
-/** Tests for {@link KeyedWorkItems}. */
-@RunWith(JUnit4.class)
-public class KeyedWorkItemsTest {
-
-  private static final String STATE_FAMILY = "state";
-  private static final String KEY = "key";
-  private static final ByteString SERIALIZED_KEY = ByteString.copyFromUtf8(KEY);
-
-  private static final Coder<IntervalWindow> WINDOW_CODER = IntervalWindow.getCoder();
-  @SuppressWarnings({"unchecked", "rawtypes"})
-  private static final Coder<Collection<? extends BoundedWindow>> WINDOWS_CODER =
-      (Coder) CollectionCoder.of(WINDOW_CODER);
-  private static final Coder<String> VALUE_CODER = StringUtf8Coder.of();
-  private static final IntervalWindow WINDOW_1 =
-      new IntervalWindow(new Instant(0), new Instant(10));
-  private static final StateNamespace STATE_NAMESPACE_1 =
-      StateNamespaces.window(WINDOW_CODER, WINDOW_1);
-  private static final IntervalWindow WINDOW_2 =
-      new IntervalWindow(new Instant(10), new Instant(20));
-  private static final StateNamespace STATE_NAMESPACE_2 =
-      StateNamespaces.window(WINDOW_CODER, WINDOW_2);
-
-  @Mock
-  private StreamingModeExecutionContext mockContext;
-
-  @Before
-  public void setUp() {
-    MockitoAnnotations.initMocks(this);
-  }
-
-  @Test
-  public void testElementIteration() throws Exception {
-    Windmill.WorkItem.Builder workItem = Windmill.WorkItem.newBuilder()
-        .setKey(SERIALIZED_KEY)
-        .setWorkToken(17);
-    Windmill.InputMessageBundle.Builder chunk1 = workItem.addMessageBundlesBuilder();
-    chunk1.setSourceComputationId("computation");
-    addElement(chunk1, 5, "hello", WINDOW_1, paneInfo(0));
-    addElement(chunk1, 7, "world", WINDOW_2, paneInfo(2));
-    Windmill.InputMessageBundle.Builder chunk2 = workItem.addMessageBundlesBuilder();
-    chunk2.setSourceComputationId("computation");
-    addElement(chunk2, 6, "earth", WINDOW_1, paneInfo(1));
-
-    KeyedWorkItem<String, String> keyedWorkItem = KeyedWorkItems.windmillWorkItem(
-        KEY, workItem.build(), WINDOW_CODER, WINDOWS_CODER, VALUE_CODER);
-
-    assertThat(keyedWorkItem.elementsIterable(), Matchers.contains(
-        WindowedValue.of("hello", new Instant(5), WINDOW_1, paneInfo(0)),
-        WindowedValue.of("world", new Instant(7), WINDOW_2, paneInfo(2)),
-        WindowedValue.of("earth", new Instant(6), WINDOW_1, paneInfo(1))));
-  }
-
-  private void addElement(
-      Windmill.InputMessageBundle.Builder chunk, long timestamp, String value,
-      IntervalWindow window, PaneInfo pane) throws IOException {
-    ByteString encodedMetadata =
-        WindmillSink.encodeMetadata(WINDOWS_CODER, Collections.singletonList(window), pane);
-    chunk.addMessagesBuilder()
-        .setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(new Instant(timestamp)))
-        .setData(ByteString.copyFromUtf8(value))
-        .setMetadata(encodedMetadata);
-  }
-
-  private PaneInfo paneInfo(int index) {
-    return PaneInfo.createPane(false, false, Timing.EARLY, index, -1);
-  }
-
-  /** Make sure that event time timers are processed before other timers. */
-  @Test
-  public void testTimerOrdering() throws Exception {
-    Windmill.WorkItem workItem = Windmill.WorkItem.newBuilder()
-        .setKey(SERIALIZED_KEY)
-        .setWorkToken(17)
-        .setTimers(Windmill.TimerBundle.newBuilder()
-            .addTimers(makeSerializedTimer(STATE_NAMESPACE_1, 0, Windmill.Timer.Type.REALTIME))
-            .addTimers(makeSerializedTimer(STATE_NAMESPACE_1, 1, Windmill.Timer.Type.WATERMARK))
-            .addTimers(makeSerializedTimer(STATE_NAMESPACE_1, 2, Windmill.Timer.Type.REALTIME))
-            .addTimers(makeSerializedTimer(STATE_NAMESPACE_2, 3, Windmill.Timer.Type.WATERMARK))
-            .build())
-        .build();
-
-    KeyedWorkItem<String, String> keyedWorkItem =
-        KeyedWorkItems.windmillWorkItem(KEY, workItem, WINDOW_CODER, WINDOWS_CODER, VALUE_CODER);
-
-    assertThat(keyedWorkItem.timersIterable(), Matchers.contains(
-        makeTimer(STATE_NAMESPACE_1, 1, TimeDomain.EVENT_TIME),
-        makeTimer(STATE_NAMESPACE_2, 3, TimeDomain.EVENT_TIME),
-        makeTimer(STATE_NAMESPACE_1, 0, TimeDomain.PROCESSING_TIME),
-        makeTimer(STATE_NAMESPACE_1, 2, TimeDomain.PROCESSING_TIME)));
-  }
-
-  private static Windmill.Timer makeSerializedTimer(
-      StateNamespace ns, long timestamp, Windmill.Timer.Type type) {
-    return Windmill.Timer.newBuilder()
-        .setTag(ByteString.copyFromUtf8(
-            ns.stringKey() + "+" + type + "-" + timestamp))
-        .setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(new Instant(timestamp)))
-        .setType(type)
-        .setStateFamily(STATE_FAMILY)
-        .build();
-  }
-
-  private static TimerData makeTimer(StateNamespace ns, long timestamp, TimeDomain domain) {
-    return TimerData.of(ns, new Instant(timestamp), domain);
-  }
-}
-
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
deleted file mode 100644
index 777b3545c2377..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactoryTest.java
+++ /dev/null
@@ -1,636 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.DataflowOutputCounter.getElementCounterName;
-import static com.google.cloud.dataflow.sdk.runners.worker.DataflowOutputCounter.getMeanByteCounterName;
-import static com.google.cloud.dataflow.sdk.runners.worker.DataflowOutputCounter.getObjectCounterName;
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
-import static com.google.cloud.dataflow.sdk.util.SerializableUtils.serializeToByteArray;
-import static com.google.cloud.dataflow.sdk.util.StringUtils.byteArrayToJsonString;
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-import static org.hamcrest.Matchers.hasItems;
-import static org.hamcrest.Matchers.instanceOf;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertSame;
-import static org.junit.Assert.assertThat;
-
-import com.google.api.services.dataflow.model.FlattenInstruction;
-import com.google.api.services.dataflow.model.InstructionInput;
-import com.google.api.services.dataflow.model.InstructionOutput;
-import com.google.api.services.dataflow.model.MapTask;
-import com.google.api.services.dataflow.model.ParDoInstruction;
-import com.google.api.services.dataflow.model.ParallelInstruction;
-import com.google.api.services.dataflow.model.PartialGroupByKeyInstruction;
-import com.google.api.services.dataflow.model.ReadInstruction;
-import com.google.api.services.dataflow.model.Source;
-import com.google.api.services.dataflow.model.WriteInstruction;
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactoryTest.SingletonTestReaderFactory;
-import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactoryTest.TestReader;
-import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactoryTest.TestReaderFactory;
-import com.google.cloud.dataflow.sdk.runners.worker.SinkFactoryTest.TestSink;
-import com.google.cloud.dataflow.sdk.runners.worker.SinkFactoryTest.TestSinkFactory;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.util.StringUtils;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestOperation;
-import com.google.cloud.dataflow.sdk.util.common.worker.FlattenOperation;
-import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
-import com.google.cloud.dataflow.sdk.util.common.worker.Operation;
-import com.google.cloud.dataflow.sdk.util.common.worker.ParDoOperation;
-import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation;
-import com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.util.common.worker.WriteOperation;
-
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * Tests for MapTaskExecutorFactory.
- */
-@RunWith(JUnit4.class)
-public class MapTaskExecutorFactoryTest {
-
-  private static final CloudObject windowedStringCoder =
-      WindowedValue.getValueOnlyCoder(StringUtf8Coder.of()).asCloudObject();
-
-  private PipelineOptions options;
-  private ReaderRegistry readerFactoryRegistry;
-
-  @Before
-  public void setUp() {
-    options = PipelineOptionsFactory.create();
-    readerFactoryRegistry = ReaderRegistry.defaultRegistry()
-        .register(
-            TestReaderFactory.class.getName(),
-            new TestReaderFactory())
-        .register(
-            SingletonTestReaderFactory.class.getName(),
-            new SingletonTestReaderFactory());
-  }
-
-  @Test
-  public void testCreateMapTaskExecutor() throws Exception {
-    List<ParallelInstruction> instructions = Arrays.asList(
-        createReadInstruction("Read"),
-        createParDoInstruction(0, 0, "DoFn1"),
-        createParDoInstruction(0, 0, "DoFnWithContext"),
-        createFlattenInstruction(1, 0, 2, 0, "Flatten"),
-        createWriteInstruction(3, 0, "Write"));
-
-    MapTask mapTask = new MapTask();
-    mapTask.setStageName("test");
-    mapTask.setSystemName("stageName");
-    mapTask.setInstructions(instructions);
-
-    CounterSet counterSet = new CounterSet();
-    StateSampler sampler =
-        new StateSampler(mapTask.getStageName() + "-", counterSet.getAddCounterMutator());
-    try (
-        MapTaskExecutor executor = MapTaskExecutorFactory.create(
-            options,
-            mapTask,
-            readerFactoryRegistry,
-            BatchModeExecutionContext.fromOptions(options),
-            counterSet,
-            sampler)) {
-      // Safe covariant cast not expressible without rawtypes.
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      List<Object> operations = (List) executor.operations;
-      assertThat(
-          operations,
-          hasItems(
-              instanceOf(ReadOperation.class),
-              instanceOf(ParDoOperation.class),
-              instanceOf(ParDoOperation.class),
-              instanceOf(FlattenOperation.class),
-              instanceOf(WriteOperation.class)));
-      counterSet = executor.getOutputCounters();
-    }
-
-    @SuppressWarnings("unchecked")
-    Counter<Long> otherMsecCounter =
-        (Counter<Long>) counterSet.getExistingCounter("test-other-msecs");
-
-    // "other" state only got created upon MapTaskExecutor.execute().
-    assertNull(otherMsecCounter);
-
-    assertEquals(
-        new CounterSet(
-            Counter.longs(getElementCounterName("read_output_name"), SUM).resetToValue(0L),
-            Counter.longs(getObjectCounterName("read_output_name"), SUM).resetToValue(0L),
-            Counter.longs(getMeanByteCounterName("read_output_name"), MEAN).resetMeanToValue(0, 0L),
-            Counter.longs("Read-ByteCount", SUM).resetToValue(0L),
-            Counter.doubles("dataflow_total_parallelism-stageName", SUM)
-                .resetToValue(ReadOperation.LARGE_PARALLELISM_BOUND),
-            Counter.doubles("dataflow_remaining_parallelism-stageName", SUM).resetToValue(0.0),
-            Counter.longs("test-Read-start-msecs", SUM).resetToValue(0L),
-            Counter.longs("test-Read-process-msecs", SUM).resetToValue(0L),
-            Counter.longs("test-Read-finish-msecs", SUM).resetToValue(0L),
-            Counter.longs(getElementCounterName("DoFn1_output"), SUM).resetToValue(0L),
-            Counter.longs(getObjectCounterName("DoFn1_output"), SUM).resetToValue(0L),
-            Counter.longs(getMeanByteCounterName("DoFn1_output"), MEAN).resetMeanToValue(0, 0L),
-            Counter.longs("test-DoFn1-start-msecs", SUM).resetToValue(0L),
-            Counter.longs("test-DoFn1-process-msecs", SUM).resetToValue(0L),
-            Counter.longs("test-DoFn1-finish-msecs", SUM).resetToValue(0L),
-            Counter.longs(getElementCounterName("DoFnWithContext_output"), SUM).resetToValue(0L),
-            Counter.longs(getObjectCounterName("DoFnWithContext_output"), SUM).resetToValue(0L),
-            Counter.longs(getMeanByteCounterName("DoFnWithContext_output"), MEAN)
-                .resetMeanToValue(0, 0L),
-            Counter.longs("test-DoFnWithContext-start-msecs", SUM).resetToValue(0L),
-            Counter.longs("test-DoFnWithContext-process-msecs", SUM).resetToValue(0L),
-            Counter.longs("test-DoFnWithContext-finish-msecs", SUM).resetToValue(0L),
-            Counter.longs(getElementCounterName("flatten_output_name"), SUM).resetToValue(0L),
-            Counter.longs(getObjectCounterName("flatten_output_name"), SUM).resetToValue(0L),
-            Counter.longs(getMeanByteCounterName("flatten_output_name"), MEAN)
-                .resetMeanToValue(0, 0L),
-            Counter.longs("test-Flatten-start-msecs", SUM).resetToValue(0L),
-            Counter.longs("test-Flatten-process-msecs", SUM).resetToValue(0L),
-            Counter.longs("test-Flatten-finish-msecs", SUM).resetToValue(0L),
-            Counter.longs("Write-ByteCount", SUM).resetToValue(0L),
-            Counter.longs("test-Write-start-msecs", SUM).resetToValue(0L),
-            Counter.longs("test-Write-process-msecs", SUM).resetToValue(0L),
-            Counter.longs("test-Write-finish-msecs", SUM).resetToValue(0L)),
-        counterSet);
-  }
-
-  @Test
-  public void testExecutionContextPlumbing() throws Exception {
-    List<ParallelInstruction> instructions = Arrays.asList(
-        createReadInstruction("Read", SingletonTestReaderFactory.class),
-        createParDoInstruction(0, 0, "DoFn1", "DoFnUserName"),
-        createParDoInstruction(1, 0, "DoFnWithContext", "DoFnWithContextUserName"));
-
-    MapTask mapTask = new MapTask();
-    mapTask.setStageName("test");
-    mapTask.setInstructions(instructions);
-
-    BatchModeExecutionContext context = BatchModeExecutionContext.fromOptions(options);
-
-    CounterSet counters = new CounterSet();
-    try (MapTaskExecutor executor =
-        MapTaskExecutorFactory.create(
-            options,
-            mapTask,
-            readerFactoryRegistry,
-            context,
-            counters,
-            new StateSampler(mapTask.getStageName() + "-", counters.getAddCounterMutator()))) {
-      executor.execute();
-    }
-
-    List<String> stepNames = new ArrayList<>();
-    for (ExecutionContext.StepContext stepContext : context.getAllStepContexts()) {
-      stepNames.add(stepContext.getStepName());
-    }
-    assertThat(stepNames, hasItems("DoFn1", "DoFnWithContext"));
-  }
-
-  static ParallelInstruction createReadInstruction(String name) {
-    return createReadInstruction(name, TestReaderFactory.class);
-  }
-
-  static ParallelInstruction createReadInstruction(
-      String name, Class<? extends ReaderFactory> readerFactoryClass) {
-    CloudObject spec = CloudObject.forClass(readerFactoryClass);
-
-    Source cloudSource = new Source();
-    cloudSource.setSpec(spec);
-    cloudSource.setCodec(windowedStringCoder);
-
-    ReadInstruction readInstruction = new ReadInstruction();
-    readInstruction.setSource(cloudSource);
-
-    InstructionOutput output = new InstructionOutput();
-    output.setName("read_output_name");
-    output.setCodec(windowedStringCoder);
-
-    ParallelInstruction instruction = new ParallelInstruction();
-    instruction.setSystemName(name);
-    instruction.setRead(readInstruction);
-    instruction.setOutputs(Arrays.asList(output));
-
-    return instruction;
-  }
-
-  @Test
-  public void testCreateReadOperation() throws Exception {
-    CounterSet counterSet = new CounterSet();
-    String counterPrefix = "test-";
-    String systemStageName = "stageName";
-    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    Operation operation = MapTaskExecutorFactory.createOperation(
-        PipelineOptionsFactory.create(),
-        createReadInstruction("Read"),
-        readerFactoryRegistry,
-        BatchModeExecutionContext.fromOptions(options),
-        Collections.<Operation>emptyList(),
-        counterPrefix,
-        systemStageName,
-        counterSet.getAddCounterMutator(),
-        stateSampler);
-    assertThat(operation, instanceOf(ReadOperation.class));
-    ReadOperation readOperation = (ReadOperation) operation;
-
-    assertEquals(readOperation.receivers.length, 1);
-    assertEquals(readOperation.receivers[0].getReceiverCount(), 0);
-    assertEquals(readOperation.initializationState, Operation.InitializationState.UNSTARTED);
-    assertThat(readOperation.reader, instanceOf(TestReader.class));
-
-    assertEquals(
-        new CounterSet(
-            Counter.longs("test-Read-start-msecs", SUM).resetToValue(0L),
-            Counter.longs("Read-ByteCount", SUM).resetToValue(0L),
-            Counter.doubles("dataflow_total_parallelism-stageName", SUM)
-                .resetToValue(ReadOperation.LARGE_PARALLELISM_BOUND),
-            Counter.doubles("dataflow_remaining_parallelism-stageName", SUM).resetToValue(0.0),
-            Counter.longs("test-Read-finish-msecs", SUM).resetToValue(0L),
-            Counter.longs("test-Read-process-msecs", SUM),
-            Counter.longs(getMeanByteCounterName("read_output_name"), MEAN).resetMeanToValue(0, 0L),
-            Counter.longs(getElementCounterName("read_output_name"), SUM).resetToValue(0L),
-            Counter.longs(getObjectCounterName("read_output_name"), SUM).resetToValue(0L)),
-        counterSet);
-  }
-
-  static ParallelInstruction createWriteInstruction(
-      int producerIndex, int producerOutputNum, String systemName) {
-    InstructionInput cloudInput = new InstructionInput();
-    cloudInput.setProducerInstructionIndex(producerIndex);
-    cloudInput.setOutputNum(producerOutputNum);
-
-    CloudObject spec = CloudObject.forClass(TestSinkFactory.class);
-
-    com.google.api.services.dataflow.model.Sink cloudSink =
-        new com.google.api.services.dataflow.model.Sink();
-    cloudSink.setSpec(spec);
-    cloudSink.setCodec(windowedStringCoder);
-
-    WriteInstruction writeInstruction = new WriteInstruction();
-    writeInstruction.setInput(cloudInput);
-    writeInstruction.setSink(cloudSink);
-
-    ParallelInstruction instruction = new ParallelInstruction();
-    instruction.setWrite(writeInstruction);
-    instruction.setSystemName(systemName);
-
-    return instruction;
-  }
-
-  @SuppressWarnings("unchecked")
-  @Test
-  public void testCreateWriteOperation() throws Exception {
-    List<Operation> priorOperations = Arrays.asList(
-        new Operation[] {new TestOperation(3), new TestOperation(5), new TestOperation(1)});
-
-    int producerIndex = 1;
-    int producerOutputNum = 2;
-
-    ParallelInstruction instruction =
-        createWriteInstruction(producerIndex, producerOutputNum, "WriteOperation");
-
-    CounterSet counterSet = new CounterSet();
-    String counterPrefix = "test-";
-    String systemStageName = "stageName";
-    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    Operation operation = MapTaskExecutorFactory.createOperation(options,
-        instruction, BatchModeExecutionContext.fromOptions(options), priorOperations, counterPrefix,
-        systemStageName, counterSet.getAddCounterMutator(), stateSampler);
-    assertThat(operation, instanceOf(WriteOperation.class));
-    WriteOperation writeOperation = (WriteOperation) operation;
-
-    assertEquals(writeOperation.receivers.length, 0);
-    assertEquals(writeOperation.initializationState, Operation.InitializationState.UNSTARTED);
-    assertThat(writeOperation.sink, instanceOf(TestSink.class));
-
-    assertSame(
-        writeOperation,
-        priorOperations.get(producerIndex).receivers[producerOutputNum].getOnlyReceiver());
-
-    assertEquals(
-        new CounterSet(Counter.longs("WriteOperation-ByteCount", SUM).resetToValue(0L),
-            Counter.longs("test-WriteOperation-start-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                   "test-WriteOperation-start-msecs")).getAggregate()),
-            Counter.longs("test-WriteOperation-process-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                   "test-WriteOperation-process-msecs")).getAggregate()),
-            Counter.longs("test-WriteOperation-finish-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                   "test-WriteOperation-finish-msecs")).getAggregate())),
-        counterSet);
-  }
-
-  static class TestDoFn extends DoFn<String, String> {
-    @Override
-    public void processElement(ProcessContext c) {
-      c.output(c.element());
-    }
-  }
-
-  static ParallelInstruction createParDoInstruction(
-      int producerIndex, int producerOutputNum, String systemName) {
-    return createParDoInstruction(producerIndex, producerOutputNum, systemName, "");
-  }
-
-  static ParallelInstruction createParDoInstruction(
-      int producerIndex, int producerOutputNum, String systemName, String userName) {
-    InstructionInput cloudInput = new InstructionInput();
-    cloudInput.setProducerInstructionIndex(producerIndex);
-    cloudInput.setOutputNum(producerOutputNum);
-
-    TestDoFn fn = new TestDoFn();
-
-    String serializedFn =
-        StringUtils.byteArrayToJsonString(
-            SerializableUtils.serializeToByteArray(
-                new DoFnInfo<>(fn, WindowingStrategy.globalDefault())));
-
-    CloudObject cloudUserFn = CloudObject.forClassName("DoFn");
-    addString(cloudUserFn, PropertyNames.SERIALIZED_FN, serializedFn);
-
-    ParDoInstruction parDoInstruction = new ParDoInstruction();
-    parDoInstruction.setInput(cloudInput);
-    parDoInstruction.setNumOutputs(1);
-    parDoInstruction.setUserFn(cloudUserFn);
-
-    InstructionOutput output = new InstructionOutput();
-    output.setName(systemName + "_output");
-    output.setCodec(windowedStringCoder);
-
-    ParallelInstruction instruction = new ParallelInstruction();
-    instruction.setParDo(parDoInstruction);
-    instruction.setOutputs(Arrays.asList(output));
-    instruction.setSystemName(systemName);
-    instruction.setName(userName);
-    return instruction;
-  }
-
-  @Test
-  public void testCreateParDoOperation() throws Exception {
-    List<Operation> priorOperations = Arrays.asList(
-        new Operation[] {new TestOperation(3), new TestOperation(5), new TestOperation(1)});
-
-    int producerIndex = 1;
-    int producerOutputNum = 2;
-
-    ParallelInstruction instruction =
-        createParDoInstruction(producerIndex, producerOutputNum, "DoFn");
-
-    BatchModeExecutionContext context =
-        BatchModeExecutionContext.fromOptions(options);
-    CounterSet counterSet = new CounterSet();
-    String counterPrefix = "test-";
-    String systemStageName = "stageName";
-    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    Operation operation = MapTaskExecutorFactory.createOperation(
-        options,
-        instruction,
-        readerFactoryRegistry,
-        context,
-        priorOperations,
-        counterPrefix,
-        systemStageName,
-        counterSet.getAddCounterMutator(),
-        stateSampler);
-    assertThat(operation, instanceOf(ParDoOperation.class));
-    ParDoOperation parDoOperation = (ParDoOperation) operation;
-
-    assertEquals(parDoOperation.receivers.length, 1);
-    assertEquals(parDoOperation.receivers[0].getReceiverCount(), 0);
-    assertEquals(parDoOperation.initializationState, Operation.InitializationState.UNSTARTED);
-    assertThat(parDoOperation.getFn(), instanceOf(NormalParDoFn.class));
-    NormalParDoFn normalParDoFn = (NormalParDoFn) parDoOperation.getFn();
-
-    assertThat(
-        normalParDoFn.getDoFnInfo().getDoFn(),
-        instanceOf(TestDoFn.class));
-
-    assertSame(
-        parDoOperation,
-        priorOperations.get(producerIndex).receivers[producerOutputNum].getOnlyReceiver());
-
-    assertEquals(context, normalParDoFn.getExecutionContext());
-  }
-
-  static ParallelInstruction createPartialGroupByKeyInstruction(
-      int producerIndex, int producerOutputNum) {
-    InstructionInput cloudInput = new InstructionInput();
-    cloudInput.setProducerInstructionIndex(producerIndex);
-    cloudInput.setOutputNum(producerOutputNum);
-
-    PartialGroupByKeyInstruction pgbkInstruction = new PartialGroupByKeyInstruction();
-    pgbkInstruction.setInput(cloudInput);
-    pgbkInstruction.setInputElementCodec(makeCloudEncoding(
-        FullWindowedValueCoder.class.getName(),
-        makeCloudEncoding("KvCoder", makeCloudEncoding("StringUtf8Coder"),
-            makeCloudEncoding("BigEndianIntegerCoder")),
-        IntervalWindow.getCoder().asCloudObject()));
-
-    InstructionOutput output = new InstructionOutput();
-    output.setName("pgbk_output_name");
-    output.setCodec(makeCloudEncoding("KvCoder", makeCloudEncoding("StringUtf8Coder"),
-        makeCloudEncoding("IterableCoder", makeCloudEncoding("BigEndianIntegerCoder"))));
-
-    ParallelInstruction instruction = new ParallelInstruction();
-    instruction.setPartialGroupByKey(pgbkInstruction);
-    instruction.setOutputs(Arrays.asList(output));
-
-    return instruction;
-  }
-
-  @Test
-  public void testCreatePartialGroupByKeyOperation() throws Exception {
-    List<Operation> priorOperations = Arrays.asList(
-        new Operation[] {new TestOperation(3), new TestOperation(5), new TestOperation(1)});
-
-    int producerIndex = 1;
-    int producerOutputNum = 2;
-
-    ParallelInstruction instruction =
-        createPartialGroupByKeyInstruction(producerIndex, producerOutputNum);
-
-    CounterSet counterSet = new CounterSet();
-    String counterPrefix = "test-";
-    String systemStageName = "stageName";
-    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    Operation operation = MapTaskExecutorFactory.createOperation(
-        options,
-        instruction,
-        readerFactoryRegistry,
-        BatchModeExecutionContext.fromOptions(options),
-        priorOperations,
-        counterPrefix,
-        systemStageName,
-        counterSet.getAddCounterMutator(),
-        stateSampler);
-    assertThat(operation, instanceOf(PartialGroupByKeyOperation.class));
-    PartialGroupByKeyOperation pgbkOperation = (PartialGroupByKeyOperation) operation;
-
-    assertEquals(pgbkOperation.receivers.length, 1);
-    assertEquals(pgbkOperation.receivers[0].getReceiverCount(), 0);
-    assertEquals(pgbkOperation.initializationState, Operation.InitializationState.UNSTARTED);
-
-    assertSame(
-        pgbkOperation,
-        priorOperations.get(producerIndex).receivers[producerOutputNum].getOnlyReceiver());
-  }
-
-  @Test
-  public void testCreatePartialGroupByKeyOperationWithCombine() throws Exception {
-    List<Operation> priorOperations = Arrays.asList(
-        new Operation[] {new TestOperation(3), new TestOperation(5), new TestOperation(1)});
-
-    int producerIndex = 1;
-    int producerOutputNum = 2;
-
-    ParallelInstruction instruction =
-        createPartialGroupByKeyInstruction(producerIndex, producerOutputNum);
-
-    AppliedCombineFn<?, ?, ?, ?> combineFn = AppliedCombineFn.withInputCoder(
-        new Sum.SumIntegerFn().asKeyedFn(), new CoderRegistry(),
-        KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
-    CloudObject cloudCombineFn = CloudObject.forClassName("CombineFn");
-    addString(cloudCombineFn, PropertyNames.SERIALIZED_FN,
-        byteArrayToJsonString(serializeToByteArray(combineFn)));
-    instruction.getPartialGroupByKey().setValueCombiningFn(cloudCombineFn);
-
-    CounterSet counterSet = new CounterSet();
-    String counterPrefix = "test-";
-    String systemStageName = "stageName";
-    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    Operation operation = MapTaskExecutorFactory.createOperation(
-        options,
-        instruction,
-        readerFactoryRegistry,
-        BatchModeExecutionContext.fromOptions(options),
-        priorOperations,
-        counterPrefix,
-        systemStageName,
-        counterSet.getAddCounterMutator(),
-        stateSampler);
-    assertThat(operation, instanceOf(PartialGroupByKeyOperation.class));
-    PartialGroupByKeyOperation pgbkOperation = (PartialGroupByKeyOperation) operation;
-
-    assertEquals(pgbkOperation.receivers.length, 1);
-    assertEquals(pgbkOperation.receivers[0].getReceiverCount(), 0);
-    assertEquals(pgbkOperation.initializationState, Operation.InitializationState.UNSTARTED);
-    assertSame(
-        pgbkOperation,
-        priorOperations.get(producerIndex).receivers[producerOutputNum].getOnlyReceiver());
-  }
-
-  static ParallelInstruction createFlattenInstruction(int producerIndex1, int producerOutputNum1,
-      int producerIndex2, int producerOutputNum2, String systemName) {
-    List<InstructionInput> cloudInputs = new ArrayList<>();
-
-    InstructionInput cloudInput1 = new InstructionInput();
-    cloudInput1.setProducerInstructionIndex(producerIndex1);
-    cloudInput1.setOutputNum(producerOutputNum1);
-    cloudInputs.add(cloudInput1);
-
-    InstructionInput cloudInput2 = new InstructionInput();
-    cloudInput2.setProducerInstructionIndex(producerIndex2);
-    cloudInput2.setOutputNum(producerOutputNum2);
-    cloudInputs.add(cloudInput2);
-
-    FlattenInstruction flattenInstruction = new FlattenInstruction();
-    flattenInstruction.setInputs(cloudInputs);
-
-    InstructionOutput output = new InstructionOutput();
-    output.setName("flatten_output_name");
-    output.setCodec(makeCloudEncoding(StringUtf8Coder.class.getName()));
-
-    ParallelInstruction instruction = new ParallelInstruction();
-    instruction.setFlatten(flattenInstruction);
-    instruction.setOutputs(Arrays.asList(output));
-    instruction.setSystemName(systemName);
-
-    return instruction;
-  }
-
-  @Test
-  public void testCreateFlattenOperation() throws Exception {
-    List<Operation> priorOperations = Arrays.asList(
-        new Operation[] {new TestOperation(3), new TestOperation(5), new TestOperation(1)});
-
-    int producerIndex1 = 1;
-    int producerOutputNum1 = 2;
-    int producerIndex2 = 0;
-    int producerOutputNum2 = 1;
-
-    ParallelInstruction instruction = createFlattenInstruction(
-        producerIndex1, producerOutputNum1, producerIndex2, producerOutputNum2, "Flatten");
-
-    CounterSet counterSet = new CounterSet();
-    String counterPrefix = "test-";
-    String systemStageName = "stageName";
-    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    Operation operation = MapTaskExecutorFactory.createOperation(
-        options,
-        instruction,
-        readerFactoryRegistry,
-        BatchModeExecutionContext.fromOptions(options),
-        priorOperations,
-        counterPrefix,
-        systemStageName,
-        counterSet.getAddCounterMutator(),
-        stateSampler);
-    assertThat(operation, instanceOf(FlattenOperation.class));
-    FlattenOperation flattenOperation = (FlattenOperation) operation;
-
-    assertEquals(flattenOperation.receivers.length, 1);
-    assertEquals(flattenOperation.receivers[0].getReceiverCount(), 0);
-    assertEquals(flattenOperation.initializationState, Operation.InitializationState.UNSTARTED);
-
-    assertSame(
-        flattenOperation,
-        priorOperations.get(producerIndex1).receivers[producerOutputNum1].getOnlyReceiver());
-    assertSame(
-        flattenOperation,
-        priorOperations.get(producerIndex2).receivers[producerOutputNum2].getOnlyReceiver());
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
deleted file mode 100644
index e87d42a90b7fd..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFnTest.java
+++ /dev/null
@@ -1,371 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.DataflowOutputCounter.getElementCounterName;
-import static com.google.cloud.dataflow.sdk.runners.worker.DataflowOutputCounter.getObjectCounterName;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-import static org.hamcrest.CoreMatchers.containsString;
-import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
-import static org.hamcrest.core.AnyOf.anyOf;
-import static org.hamcrest.core.IsEqual.equalTo;
-import static org.hamcrest.core.IsInstanceOf.instanceOf;
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.fail;
-
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.DirectSideInputReader;
-import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
-import com.google.cloud.dataflow.sdk.util.PTuple;
-import com.google.cloud.dataflow.sdk.util.UserCodeException;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Receiver;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * Tests for NormalParDoFn.
- */
-@RunWith(JUnit4.class)
-public class NormalParDoFnTest {
-  static class TestDoFn extends DoFn<Integer, String> {
-    enum State { UNSTARTED, STARTED, PROCESSING, FINISHED }
-    State state = State.UNSTARTED;
-
-    List<TupleTag<String>> sideOutputTupleTags;
-
-    public TestDoFn(List<String> sideOutputTags) {
-      sideOutputTupleTags = new ArrayList<>();
-      for (String sideOutputTag : sideOutputTags) {
-        sideOutputTupleTags.add(new TupleTag<String>(sideOutputTag));
-      }
-    }
-
-    @Override
-    public void startBundle(Context c) {
-      assertEquals(State.UNSTARTED, state);
-      state = State.STARTED;
-      outputToAll(c, "started");
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      assertThat(state, anyOf(equalTo(State.STARTED),
-                              equalTo(State.PROCESSING)));
-      state = State.PROCESSING;
-      outputToAll(c, "processing: " + c.element());
-    }
-
-    @Override
-    public void finishBundle(Context c) {
-      assertThat(state, anyOf(equalTo(State.STARTED),
-                              equalTo(State.PROCESSING)));
-      state = State.FINISHED;
-      outputToAll(c, "finished");
-    }
-
-    private void outputToAll(Context c, String value) {
-      c.output(value);
-      for (TupleTag<String> sideOutputTupleTag : sideOutputTupleTags) {
-        c.sideOutput(sideOutputTupleTag,
-                     sideOutputTupleTag.getId() + ": " + value);
-      }
-    }
-  }
-
-  static class TestErrorDoFn extends DoFn<Integer, String> {
-
-    // Used to test nested stack traces.
-    private void nestedFunctionBeta(String s) {
-      throw new RuntimeException(s);
-    }
-
-    private void nestedFunctionAlpha(String s) {
-      nestedFunctionBeta(s);
-    }
-
-    @Override
-    public void startBundle(Context c) {
-      nestedFunctionAlpha("test error in initialize");
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      nestedFunctionBeta("test error in process");
-    }
-
-    @Override
-    public void finishBundle(Context c) {
-      throw new RuntimeException("test error in finalize");
-    }
-  }
-
-  static class TestReceiver implements Receiver {
-    List<Object> receivedElems = new ArrayList<>();
-
-    @Override
-    public void process(Object outputElem) {
-      receivedElems.add(outputElem);
-    }
-  }
-
-  private PipelineOptions options;
-
-  @Before
-  public void setupDefaultOptions() {
-    options = PipelineOptionsFactory.create();
-  }
-
-  @Test
-  public void testNormalParDoFn() throws Exception {
-    List<String> sideOutputTags = Arrays.asList("tag1", "tag2", "tag3");
-
-    TestDoFn fn = new TestDoFn(sideOutputTags);
-    DoFnInfo<?, ?> fnInfo = new DoFnInfo<>(fn, WindowingStrategy.globalDefault());
-    TestReceiver receiver = new TestReceiver();
-    TestReceiver receiver1 = new TestReceiver();
-    TestReceiver receiver2 = new TestReceiver();
-    TestReceiver receiver3 = new TestReceiver();
-
-    PTuple sideInputValues = PTuple.empty();
-
-    List<String> outputTags = new ArrayList<>();
-    outputTags.add("output");
-    outputTags.addAll(sideOutputTags);
-    NormalParDoFn normalParDoFn = NormalParDoFn.of(
-        options,
-        fnInfo,
-        DirectSideInputReader.of(sideInputValues),
-        outputTags,
-        "doFn",
-        "doFn",
-        BatchModeExecutionContext.fromOptions(options),
-        (new CounterSet()).getAddCounterMutator(),
-        null);
-
-    normalParDoFn.startBundle(receiver, receiver1, receiver2, receiver3);
-
-    normalParDoFn.processElement(WindowedValue.valueInGlobalWindow(3));
-    normalParDoFn.processElement(WindowedValue.valueInGlobalWindow(42));
-    normalParDoFn.processElement(WindowedValue.valueInGlobalWindow(666));
-
-    normalParDoFn.finishBundle();
-
-    Object[] expectedReceivedElems = {
-      WindowedValue.valueInGlobalWindow("started"),
-      WindowedValue.valueInGlobalWindow("processing: 3"),
-      WindowedValue.valueInGlobalWindow("processing: 42"),
-      WindowedValue.valueInGlobalWindow("processing: 666"),
-      WindowedValue.valueInGlobalWindow("finished"),
-    };
-    assertArrayEquals(expectedReceivedElems, receiver.receivedElems.toArray());
-
-    Object[] expectedReceivedElems1 = {
-      WindowedValue.valueInGlobalWindow("tag1: started"),
-      WindowedValue.valueInGlobalWindow("tag1: processing: 3"),
-      WindowedValue.valueInGlobalWindow("tag1: processing: 42"),
-      WindowedValue.valueInGlobalWindow("tag1: processing: 666"),
-      WindowedValue.valueInGlobalWindow("tag1: finished"),
-    };
-    assertArrayEquals(expectedReceivedElems1, receiver1.receivedElems.toArray());
-
-    Object[] expectedReceivedElems2 = {
-      WindowedValue.valueInGlobalWindow("tag2: started"),
-      WindowedValue.valueInGlobalWindow("tag2: processing: 3"),
-      WindowedValue.valueInGlobalWindow("tag2: processing: 42"),
-      WindowedValue.valueInGlobalWindow("tag2: processing: 666"),
-      WindowedValue.valueInGlobalWindow("tag2: finished"),
-    };
-    assertArrayEquals(expectedReceivedElems2, receiver2.receivedElems.toArray());
-
-    Object[] expectedReceivedElems3 = {
-      WindowedValue.valueInGlobalWindow("tag3: started"),
-      WindowedValue.valueInGlobalWindow("tag3: processing: 3"),
-      WindowedValue.valueInGlobalWindow("tag3: processing: 42"),
-      WindowedValue.valueInGlobalWindow("tag3: processing: 666"),
-      WindowedValue.valueInGlobalWindow("tag3: finished"),
-    };
-    assertArrayEquals(expectedReceivedElems3, receiver3.receivedElems.toArray());
-  }
-
-  @Test
-  public void testUnexpectedNumberOfReceivers() throws Exception {
-    TestDoFn fn = new TestDoFn(Collections.<String>emptyList());
-    DoFnInfo<?, ?> fnInfo = new DoFnInfo<>(fn, WindowingStrategy.globalDefault());
-    TestReceiver receiver = new TestReceiver();
-
-    PTuple sideInputValues = PTuple.empty();
-    List<String> outputTags = Arrays.asList("output");
-    NormalParDoFn normalParDoFn = NormalParDoFn.of(
-        options,
-        fnInfo,
-        DirectSideInputReader.of(sideInputValues),
-        outputTags,
-        "doFn",
-        "doFn",
-        BatchModeExecutionContext.fromOptions(options),
-        (new CounterSet()).getAddCounterMutator(),
-        null);
-
-    try {
-      normalParDoFn.startBundle();
-      fail("should have failed");
-    } catch (Throwable exn) {
-      assertThat(exn.toString(),
-                 containsString("unexpected number of receivers"));
-    }
-    try {
-      normalParDoFn.startBundle(receiver, receiver);
-      fail("should have failed");
-    } catch (Throwable exn) {
-      assertThat(exn.toString(),
-                 containsString("unexpected number of receivers"));
-    }
-  }
-
-  private List<String> stackTraceFrameStrings(Throwable t) {
-    List<String> stack = new ArrayList<>();
-    for (StackTraceElement frame : t.getStackTrace()) {
-      // Make sure that the frame has the expected name.
-      stack.add(frame.toString());
-    }
-    return stack;
-  }
-
-  @Test
-  public void testErrorPropagation() throws Exception {
-    TestErrorDoFn fn = new TestErrorDoFn();
-    DoFnInfo<?, ?> fnInfo = new DoFnInfo<>(fn, WindowingStrategy.globalDefault());
-    TestReceiver receiver = new TestReceiver();
-
-    PTuple sideInputValues = PTuple.empty();
-    List<String> outputTags = Arrays.asList("output");
-    NormalParDoFn normalParDoFn = NormalParDoFn.of(
-        options,
-        fnInfo,
-        DirectSideInputReader.of(sideInputValues),
-        outputTags,
-        "doFn",
-        "doFn",
-        BatchModeExecutionContext.fromOptions(options),
-        (new CounterSet()).getAddCounterMutator(),
-        null);
-
-    try {
-      normalParDoFn.startBundle(receiver);
-      normalParDoFn.processElement(null);
-      fail("should have failed");
-    } catch (Exception exn) {
-      // Because we're calling this from inside the SDK and not from a
-      // user's program (e.g. through Pipeline.run), the error should
-      // be thrown as a UserCodeException. The cause of the
-      // UserCodeError shouldn't contain any of the stack from within
-      // the SDK, since we don't want to overwhelm users with stack
-      // frames outside of their control.
-      assertThat(exn, instanceOf(UserCodeException.class));
-      // Stack trace of the cause should contain three frames:
-      // TestErrorDoFn.nestedFunctionBeta
-      // TestErrorDoFn.nestedFunctionAlpha
-      // TestErrorDoFn.startBundle
-      assertThat(stackTraceFrameStrings(exn.getCause()), contains(
-          containsString("TestErrorDoFn.nestedFunctionBeta"),
-          containsString("TestErrorDoFn.nestedFunctionAlpha"),
-          containsString("TestErrorDoFn.startBundle")));
-      assertThat(exn.toString(),
-                 containsString("test error in initialize"));
-    }
-
-    try {
-      normalParDoFn.processElement(WindowedValue.valueInGlobalWindow(3));
-      fail("should have failed");
-    } catch (Exception exn) {
-      // Exception should be a UserCodeException since we're calling
-      // from inside the SDK.
-      assertThat(exn, instanceOf(UserCodeException.class));
-      // Stack trace of the cause should contain two frames:
-      // TestErrorDoFn.nestedFunctionBeta
-      // TestErrorDoFn.processElement
-      assertThat(stackTraceFrameStrings(exn.getCause()), contains(
-          containsString("TestErrorDoFn.nestedFunctionBeta"),
-          containsString("TestErrorDoFn.processElement")));
-      assertThat(exn.toString(), containsString("test error in process"));
-    }
-
-    try {
-      normalParDoFn.finishBundle();
-      fail("should have failed");
-    } catch (Exception exn) {
-      // Exception should be a UserCodeException since we're calling
-      // from inside the SDK.
-      assertThat(exn, instanceOf(UserCodeException.class));
-      // Stack trace should only contain a single frame:
-      // TestErrorDoFn.finishBundle
-      assertThat(stackTraceFrameStrings(exn.getCause()), contains(
-          containsString("TestErrorDoFn.finishBundle")));
-      assertThat(exn.toString(), containsString("test error in finalize"));
-    }
-  }
-
-  @Test
-  public void testUndeclaredSideOutputs() throws Exception {
-    TestDoFn fn = new TestDoFn(Arrays.asList("declared", "undecl1", "undecl2", "undecl3"));
-    DoFnInfo<?, ?> fnInfo = new DoFnInfo<>(fn, WindowingStrategy.globalDefault());
-    CounterSet counters = new CounterSet();
-    NormalParDoFn normalParDoFn = NormalParDoFn.of(
-        options,
-        fnInfo,
-        NullSideInputReader.empty(),
-        Arrays.asList("output", "declared"),
-        "doFn",
-        "doFn",
-        BatchModeExecutionContext.fromOptions(options),
-        counters.getAddCounterMutator(),
-        null);
-
-    normalParDoFn.startBundle(new TestReceiver(), new TestReceiver());
-    normalParDoFn.processElement(WindowedValue.valueInGlobalWindow(5));
-    normalParDoFn.finishBundle();
-
-    assertEquals(
-        new CounterSet(
-            Counter.longs(getElementCounterName("implicit-undecl1"), SUM).resetToValue(3L),
-            Counter.longs(getObjectCounterName("implicit-undecl1"), SUM).resetToValue(3L),
-            Counter.longs(getElementCounterName("implicit-undecl2"), SUM).resetToValue(3L),
-            Counter.longs(getObjectCounterName("implicit-undecl2"), SUM).resetToValue(3L),
-            Counter.longs(getElementCounterName("implicit-undecl3"), SUM).resetToValue(3L),
-            Counter.longs(getObjectCounterName("implicit-undecl3"), SUM).resetToValue(3L)),
-        counters);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCodeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCodeTest.java
deleted file mode 100644
index 5acef2e08040f..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCodeTest.java
+++ /dev/null
@@ -1,504 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import com.google.common.io.BaseEncoding;
-import com.google.common.primitives.Bytes;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for OrderedCode.
- */
-@RunWith(JUnit4.class)
-public class OrderedCodeTest {
-  @Test
-  public void testWriteInfinity() {
-    OrderedCode orderedCode = new OrderedCode();
-    try {
-      orderedCode.readInfinity();
-      fail("Expected IllegalArgumentException.");
-    } catch (IllegalArgumentException e) {
-      // expected
-    }
-    orderedCode.writeInfinity();
-    assertTrue(orderedCode.readInfinity());
-    try {
-      orderedCode.readInfinity();
-      fail("Expected IllegalArgumentException.");
-    } catch (IllegalArgumentException e) {
-      // expected
-    }
-  }
-
-  @Test
-  public void testWriteBytes() {
-    byte[] first = { 'a', 'b', 'c'};
-    byte[] second = { 'd', 'e', 'f'};
-    byte[] last = { 'x', 'y', 'z'};
-    OrderedCode orderedCode = new OrderedCode();
-    orderedCode.writeBytes(first);
-    byte[] firstEncoded = orderedCode.getEncodedBytes();
-    assertArrayEquals(orderedCode.readBytes(), first);
-
-    orderedCode.writeBytes(first);
-    orderedCode.writeBytes(second);
-    orderedCode.writeBytes(last);
-    byte[] allEncoded = orderedCode.getEncodedBytes();
-    assertArrayEquals(orderedCode.readBytes(), first);
-    assertArrayEquals(orderedCode.readBytes(), second);
-    assertArrayEquals(orderedCode.readBytes(), last);
-
-    orderedCode = new OrderedCode(firstEncoded);
-    orderedCode.writeBytes(second);
-    orderedCode.writeBytes(last);
-    assertArrayEquals(orderedCode.getEncodedBytes(), allEncoded);
-    assertArrayEquals(orderedCode.readBytes(), first);
-    assertArrayEquals(orderedCode.readBytes(), second);
-    assertArrayEquals(orderedCode.readBytes(), last);
-
-    orderedCode = new OrderedCode(allEncoded);
-    assertArrayEquals(orderedCode.readBytes(), first);
-    assertArrayEquals(orderedCode.readBytes(), second);
-    assertArrayEquals(orderedCode.readBytes(), last);
-  }
-
-  @Test
-  public void testWriteNumIncreasing() {
-    OrderedCode orderedCode = new OrderedCode();
-    orderedCode.writeNumIncreasing(0);
-    orderedCode.writeNumIncreasing(1);
-    orderedCode.writeNumIncreasing(Long.MIN_VALUE);
-    orderedCode.writeNumIncreasing(Long.MAX_VALUE);
-    assertEquals(orderedCode.readNumIncreasing(), 0);
-    assertEquals(orderedCode.readNumIncreasing(), 1);
-    assertEquals(orderedCode.readNumIncreasing(), Long.MIN_VALUE);
-    assertEquals(orderedCode.readNumIncreasing(), Long.MAX_VALUE);
-  }
-
-  /**
-   * Assert that encoding the specified long via
-   * {@link OrderedCode#writeSignedNumIncreasing(long)} results in the bytes
-   * represented by the specified string of hex digits.
-   * E.g. assertSignedNumIncreasingEncodingEquals("3fbf", -65) asserts that
-   * -65 is encoded as { (byte) 0x3f, (byte) 0xbf }.
-   */
-  private static void assertSignedNumIncreasingEncodingEquals(
-      String expectedHexEncoding, long num) {
-    OrderedCode orderedCode = new OrderedCode();
-    orderedCode.writeSignedNumIncreasing(num);
-    assertEquals(
-        "Unexpected encoding for " + num,
-        expectedHexEncoding,
-        BaseEncoding.base16().lowerCase().encode(orderedCode.getEncodedBytes()));
-  }
-
-  /**
-   * Assert that encoding various long values via
-   * {@link OrderedCode#writeSignedNumIncreasing(long)} produces the expected
-   * bytes. Expected byte sequences were generated via the c++ (authoritative)
-   * implementation of OrderedCode::WriteSignedNumIncreasing.
-   */
-  @Test
-  public void testSignedNumIncreasing_write() {
-    assertSignedNumIncreasingEncodingEquals(
-        "003f8000000000000000", Long.MIN_VALUE);
-    assertSignedNumIncreasingEncodingEquals(
-        "003f8000000000000001", Long.MIN_VALUE + 1);
-    assertSignedNumIncreasingEncodingEquals(
-        "077fffffff", Integer.MIN_VALUE - 1L);
-    assertSignedNumIncreasingEncodingEquals("0780000000", Integer.MIN_VALUE);
-    assertSignedNumIncreasingEncodingEquals(
-        "0780000001", Integer.MIN_VALUE + 1);
-    assertSignedNumIncreasingEncodingEquals("3fbf", -65);
-    assertSignedNumIncreasingEncodingEquals("40", -64);
-    assertSignedNumIncreasingEncodingEquals("41", -63);
-    assertSignedNumIncreasingEncodingEquals("7d", -3);
-    assertSignedNumIncreasingEncodingEquals("7e", -2);
-    assertSignedNumIncreasingEncodingEquals("7f", -1);
-    assertSignedNumIncreasingEncodingEquals("80", 0);
-    assertSignedNumIncreasingEncodingEquals("81", 1);
-    assertSignedNumIncreasingEncodingEquals("82", 2);
-    assertSignedNumIncreasingEncodingEquals("83", 3);
-    assertSignedNumIncreasingEncodingEquals("bf", 63);
-    assertSignedNumIncreasingEncodingEquals("c040", 64);
-    assertSignedNumIncreasingEncodingEquals("c041", 65);
-    assertSignedNumIncreasingEncodingEquals(
-        "f87ffffffe", Integer.MAX_VALUE - 1);
-    assertSignedNumIncreasingEncodingEquals("f87fffffff", Integer.MAX_VALUE);
-    assertSignedNumIncreasingEncodingEquals(
-        "f880000000", Integer.MAX_VALUE + 1L);
-    assertSignedNumIncreasingEncodingEquals(
-        "ffc07ffffffffffffffe", Long.MAX_VALUE - 1);
-    assertSignedNumIncreasingEncodingEquals(
-        "ffc07fffffffffffffff", Long.MAX_VALUE);
-  }
-
-  /**
-   * Convert a string of hex digits (e.g. "3fbf") to a byte[]
-   * (e.g. { (byte) 0x3f, (byte) 0xbf }).
-   */
-  private static byte[] bytesFromHexString(String hexDigits) {
-    return BaseEncoding.base16().lowerCase().decode(hexDigits);
-  }
-
-  /**
-   * Assert that decoding (via {@link OrderedCode#readSignedNumIncreasing()})
-   * the bytes represented by the specified string of hex digits results in the
-   * expected long value.
-   * E.g. assertDecodedSignedNumIncreasingEquals(-65, "3fbf") asserts that the
-   * byte array { (byte) 0x3f, (byte) 0xbf } is decoded as -65.
-   */
-  private static void assertDecodedSignedNumIncreasingEquals(
-      long expectedNum, String encodedHexString) {
-    OrderedCode orderedCode =
-        new OrderedCode(bytesFromHexString(encodedHexString));
-    assertEquals(
-        "Unexpected value when decoding 0x" + encodedHexString,
-        expectedNum,
-        orderedCode.readSignedNumIncreasing());
-    assertFalse(
-        "Unexpected encoded bytes remain after decoding 0x" + encodedHexString,
-        orderedCode.hasRemainingEncodedBytes());
-  }
-
-  /**
-   * Assert that decoding various sequences of bytes via
-   * {@link OrderedCode#readSignedNumIncreasing()} produces the expected long
-   * value.
-   * Input byte sequences were generated via the c++ (authoritative)
-   * implementation of OrderedCode::WriteSignedNumIncreasing.
-   */
-  @Test
-  public void testSignedNumIncreasing_read() {
-    assertDecodedSignedNumIncreasingEquals(
-        Long.MIN_VALUE, "003f8000000000000000");
-    assertDecodedSignedNumIncreasingEquals(
-        Long.MIN_VALUE + 1, "003f8000000000000001");
-    assertDecodedSignedNumIncreasingEquals(
-        Integer.MIN_VALUE - 1L, "077fffffff");
-    assertDecodedSignedNumIncreasingEquals(Integer.MIN_VALUE, "0780000000");
-    assertDecodedSignedNumIncreasingEquals(Integer.MIN_VALUE + 1, "0780000001");
-    assertDecodedSignedNumIncreasingEquals(-65, "3fbf");
-    assertDecodedSignedNumIncreasingEquals(-64, "40");
-    assertDecodedSignedNumIncreasingEquals(-63, "41");
-    assertDecodedSignedNumIncreasingEquals(-3, "7d");
-    assertDecodedSignedNumIncreasingEquals(-2, "7e");
-    assertDecodedSignedNumIncreasingEquals(-1, "7f");
-    assertDecodedSignedNumIncreasingEquals(0, "80");
-    assertDecodedSignedNumIncreasingEquals(1, "81");
-    assertDecodedSignedNumIncreasingEquals(2, "82");
-    assertDecodedSignedNumIncreasingEquals(3, "83");
-    assertDecodedSignedNumIncreasingEquals(63, "bf");
-    assertDecodedSignedNumIncreasingEquals(64, "c040");
-    assertDecodedSignedNumIncreasingEquals(65, "c041");
-    assertDecodedSignedNumIncreasingEquals(Integer.MAX_VALUE - 1, "f87ffffffe");
-    assertDecodedSignedNumIncreasingEquals(Integer.MAX_VALUE, "f87fffffff");
-    assertDecodedSignedNumIncreasingEquals(
-        Integer.MAX_VALUE + 1L, "f880000000");
-    assertDecodedSignedNumIncreasingEquals(
-        Long.MAX_VALUE - 1, "ffc07ffffffffffffffe");
-    assertDecodedSignedNumIncreasingEquals(
-        Long.MAX_VALUE, "ffc07fffffffffffffff");
-  }
-
-  /**
-   * Assert that encoding (via
-   * {@link OrderedCode#writeSignedNumIncreasing(long)}) the specified long
-   * value and then decoding (via {@link OrderedCode#readSignedNumIncreasing()})
-   * results in the original value.
-   */
-  private static void assertSignedNumIncreasingWriteAndReadIsLossless(
-      long num) {
-    OrderedCode orderedCode = new OrderedCode();
-    orderedCode.writeSignedNumIncreasing(num);
-    assertEquals(
-        "Unexpected result when decoding writeSignedNumIncreasing(" + num + ")",
-        num,
-        orderedCode.readSignedNumIncreasing());
-    assertFalse("Unexpected remaining encoded bytes after decoding " + num,
-        orderedCode.hasRemainingEncodedBytes());
-  }
-
-  /**
-   * Assert that for various long values, encoding (via
-   * {@link OrderedCode#writeSignedNumIncreasing(long)}) and then decoding (via
-   * {@link OrderedCode#readSignedNumIncreasing()}) results in the original
-   * value.
-   */
-  @Test
-  public void testSignedNumIncreasing_writeAndRead() {
-    assertSignedNumIncreasingWriteAndReadIsLossless(Long.MIN_VALUE);
-    assertSignedNumIncreasingWriteAndReadIsLossless(Long.MIN_VALUE + 1);
-    assertSignedNumIncreasingWriteAndReadIsLossless(Integer.MIN_VALUE - 1L);
-    assertSignedNumIncreasingWriteAndReadIsLossless(Integer.MIN_VALUE);
-    assertSignedNumIncreasingWriteAndReadIsLossless(Integer.MIN_VALUE + 1);
-    assertSignedNumIncreasingWriteAndReadIsLossless(-65);
-    assertSignedNumIncreasingWriteAndReadIsLossless(-64);
-    assertSignedNumIncreasingWriteAndReadIsLossless(-63);
-    assertSignedNumIncreasingWriteAndReadIsLossless(-3);
-    assertSignedNumIncreasingWriteAndReadIsLossless(-2);
-    assertSignedNumIncreasingWriteAndReadIsLossless(-1);
-    assertSignedNumIncreasingWriteAndReadIsLossless(0);
-    assertSignedNumIncreasingWriteAndReadIsLossless(1);
-    assertSignedNumIncreasingWriteAndReadIsLossless(2);
-    assertSignedNumIncreasingWriteAndReadIsLossless(3);
-    assertSignedNumIncreasingWriteAndReadIsLossless(63);
-    assertSignedNumIncreasingWriteAndReadIsLossless(64);
-    assertSignedNumIncreasingWriteAndReadIsLossless(65);
-    assertSignedNumIncreasingWriteAndReadIsLossless(Integer.MAX_VALUE - 1);
-    assertSignedNumIncreasingWriteAndReadIsLossless(Integer.MAX_VALUE);
-    assertSignedNumIncreasingWriteAndReadIsLossless(Integer.MAX_VALUE + 1L);
-    assertSignedNumIncreasingWriteAndReadIsLossless(Long.MAX_VALUE - 1);
-    assertSignedNumIncreasingWriteAndReadIsLossless(Long.MAX_VALUE);
-  }
-
-  @Test
-  public void testLog2Floor_Positive() {
-    OrderedCode orderedCode = new OrderedCode();
-    assertEquals(0, orderedCode.log2Floor(1));
-    assertEquals(1, orderedCode.log2Floor(2));
-    assertEquals(1, orderedCode.log2Floor(3));
-    assertEquals(2, orderedCode.log2Floor(4));
-    assertEquals(5, orderedCode.log2Floor(63));
-    assertEquals(6, orderedCode.log2Floor(64));
-    assertEquals(62, orderedCode.log2Floor(Long.MAX_VALUE));
-  }
-
-  /**
-   * OrderedCode.log2Floor(long) is defined to return -1 given an input of zero.
-   */
-  @Test
-  public void testLog2Floor_zero() {
-    OrderedCode orderedCode = new OrderedCode();
-    assertEquals(-1, orderedCode.log2Floor(0));
-  }
-
-  @Test
-  public void testLog2Floor_negative() {
-    OrderedCode orderedCode = new OrderedCode();
-    try {
-      orderedCode.log2Floor(-1);
-      fail("Expected an IllegalArgumentException.");
-    } catch (IllegalArgumentException expected) {
-      // Expected!
-    }
-  }
-
-  @Test
-  public void testGetSignedEncodingLength() {
-    OrderedCode orderedCode = new OrderedCode();
-    assertEquals(10, orderedCode.getSignedEncodingLength(Long.MIN_VALUE));
-    assertEquals(10, orderedCode.getSignedEncodingLength(~(1L << 62)));
-    assertEquals(9, orderedCode.getSignedEncodingLength(~(1L << 62) + 1));
-    assertEquals(3, orderedCode.getSignedEncodingLength(-8193));
-    assertEquals(2, orderedCode.getSignedEncodingLength(-8192));
-    assertEquals(2, orderedCode.getSignedEncodingLength(-65));
-    assertEquals(1, orderedCode.getSignedEncodingLength(-64));
-    assertEquals(1, orderedCode.getSignedEncodingLength(-2));
-    assertEquals(1, orderedCode.getSignedEncodingLength(-1));
-    assertEquals(1, orderedCode.getSignedEncodingLength(0));
-    assertEquals(1, orderedCode.getSignedEncodingLength(1));
-    assertEquals(1, orderedCode.getSignedEncodingLength(63));
-    assertEquals(2, orderedCode.getSignedEncodingLength(64));
-    assertEquals(2, orderedCode.getSignedEncodingLength(8191));
-    assertEquals(3, orderedCode.getSignedEncodingLength(8192));
-    assertEquals(9, orderedCode.getSignedEncodingLength((1L << 62)) - 1);
-    assertEquals(10, orderedCode.getSignedEncodingLength(1L << 62));
-    assertEquals(10, orderedCode.getSignedEncodingLength(Long.MAX_VALUE));
-  }
-
-  @Test
-  public void testWriteTrailingBytes() {
-    byte[] escapeChars = new byte[] { OrderedCode.ESCAPE1,
-        OrderedCode.NULL_CHARACTER, OrderedCode.SEPARATOR, OrderedCode.ESCAPE2,
-        OrderedCode.INFINITY, OrderedCode.FF_CHARACTER};
-    byte[] anotherArray = new byte[] { 'a', 'b', 'c', 'd', 'e' };
-
-    OrderedCode orderedCode = new OrderedCode();
-    orderedCode.writeTrailingBytes(escapeChars);
-    assertArrayEquals(orderedCode.getEncodedBytes(), escapeChars);
-    assertArrayEquals(orderedCode.readTrailingBytes(), escapeChars);
-    try {
-      orderedCode.readInfinity();
-      fail("Expected IllegalArgumentException.");
-    } catch (IllegalArgumentException e) {
-      // expected
-    }
-
-    orderedCode = new OrderedCode();
-    orderedCode.writeTrailingBytes(anotherArray);
-    assertArrayEquals(orderedCode.getEncodedBytes(), anotherArray);
-    assertArrayEquals(orderedCode.readTrailingBytes(), anotherArray);
-  }
-
-  @Test
-  public void testMixedWrite() {
-    byte[] first = { 'a', 'b', 'c'};
-    byte[] second = { 'd', 'e', 'f'};
-    byte[] last = { 'x', 'y', 'z'};
-    byte[] escapeChars = new byte[] { OrderedCode.ESCAPE1,
-        OrderedCode.NULL_CHARACTER, OrderedCode.SEPARATOR, OrderedCode.ESCAPE2,
-        OrderedCode.INFINITY, OrderedCode.FF_CHARACTER};
-
-    OrderedCode orderedCode = new OrderedCode();
-    orderedCode.writeBytes(first);
-    orderedCode.writeBytes(second);
-    orderedCode.writeBytes(last);
-    orderedCode.writeInfinity();
-    orderedCode.writeNumIncreasing(0);
-    orderedCode.writeNumIncreasing(1);
-    orderedCode.writeNumIncreasing(Long.MIN_VALUE);
-    orderedCode.writeNumIncreasing(Long.MAX_VALUE);
-    orderedCode.writeSignedNumIncreasing(0);
-    orderedCode.writeSignedNumIncreasing(1);
-    orderedCode.writeSignedNumIncreasing(Long.MIN_VALUE);
-    orderedCode.writeSignedNumIncreasing(Long.MAX_VALUE);
-    orderedCode.writeTrailingBytes(escapeChars);
-    byte[] allEncoded = orderedCode.getEncodedBytes();
-    assertArrayEquals(orderedCode.readBytes(), first);
-    assertArrayEquals(orderedCode.readBytes(), second);
-    assertFalse(orderedCode.readInfinity());
-    assertArrayEquals(orderedCode.readBytes(), last);
-    assertTrue(orderedCode.readInfinity());
-    assertEquals(orderedCode.readNumIncreasing(), 0);
-    assertEquals(orderedCode.readNumIncreasing(), 1);
-    assertFalse(orderedCode.readInfinity());
-    assertEquals(orderedCode.readNumIncreasing(), Long.MIN_VALUE);
-    assertEquals(orderedCode.readNumIncreasing(), Long.MAX_VALUE);
-    assertEquals(orderedCode.readSignedNumIncreasing(), 0);
-    assertEquals(orderedCode.readSignedNumIncreasing(), 1);
-    assertFalse(orderedCode.readInfinity());
-    assertEquals(orderedCode.readSignedNumIncreasing(), Long.MIN_VALUE);
-    assertEquals(orderedCode.readSignedNumIncreasing(), Long.MAX_VALUE);
-    assertArrayEquals(orderedCode.getEncodedBytes(), escapeChars);
-    assertArrayEquals(orderedCode.readTrailingBytes(), escapeChars);
-
-    orderedCode = new OrderedCode(allEncoded);
-    assertArrayEquals(orderedCode.readBytes(), first);
-    assertArrayEquals(orderedCode.readBytes(), second);
-    assertFalse(orderedCode.readInfinity());
-    assertArrayEquals(orderedCode.readBytes(), last);
-    assertTrue(orderedCode.readInfinity());
-    assertEquals(orderedCode.readNumIncreasing(), 0);
-    assertEquals(orderedCode.readNumIncreasing(), 1);
-    assertFalse(orderedCode.readInfinity());
-    assertEquals(orderedCode.readNumIncreasing(), Long.MIN_VALUE);
-    assertEquals(orderedCode.readNumIncreasing(), Long.MAX_VALUE);
-    assertEquals(orderedCode.readSignedNumIncreasing(), 0);
-    assertEquals(orderedCode.readSignedNumIncreasing(), 1);
-    assertFalse(orderedCode.readInfinity());
-    assertEquals(orderedCode.readSignedNumIncreasing(), Long.MIN_VALUE);
-    assertEquals(orderedCode.readSignedNumIncreasing(), Long.MAX_VALUE);
-    assertArrayEquals(orderedCode.getEncodedBytes(), escapeChars);
-    assertArrayEquals(orderedCode.readTrailingBytes(), escapeChars);
-  }
-
-  @Test
-  public void testEdgeCases() {
-    byte[] ffChar = {OrderedCode.FF_CHARACTER};
-    byte[] nullChar = {OrderedCode.NULL_CHARACTER};
-
-    byte[] separatorEncoded = {OrderedCode.ESCAPE1, OrderedCode.SEPARATOR};
-    byte[] ffCharEncoded = {OrderedCode.ESCAPE1, OrderedCode.NULL_CHARACTER};
-    byte[] nullCharEncoded = {OrderedCode.ESCAPE2, OrderedCode.FF_CHARACTER};
-    byte[] infinityEncoded  = {OrderedCode.ESCAPE2, OrderedCode.INFINITY};
-
-    OrderedCode orderedCode = new OrderedCode();
-    orderedCode.writeBytes(ffChar);
-    orderedCode.writeBytes(nullChar);
-    orderedCode.writeInfinity();
-    assertArrayEquals(orderedCode.getEncodedBytes(),
-        Bytes.concat(ffCharEncoded, separatorEncoded,
-            nullCharEncoded, separatorEncoded,
-            infinityEncoded));
-    assertArrayEquals(orderedCode.readBytes(), ffChar);
-    assertArrayEquals(orderedCode.readBytes(), nullChar);
-    assertTrue(orderedCode.readInfinity());
-
-    orderedCode = new OrderedCode(
-        Bytes.concat(ffCharEncoded, separatorEncoded));
-    assertArrayEquals(orderedCode.readBytes(), ffChar);
-
-    orderedCode = new OrderedCode(
-        Bytes.concat(nullCharEncoded, separatorEncoded));
-    assertArrayEquals(orderedCode.readBytes(), nullChar);
-
-    byte[] invalidEncodingForRead = {OrderedCode.ESCAPE2, OrderedCode.ESCAPE2,
-        OrderedCode.ESCAPE1, OrderedCode.SEPARATOR};
-    orderedCode = new OrderedCode(invalidEncodingForRead);
-    try {
-      orderedCode.readBytes();
-      fail("Should have failed.");
-    } catch (Exception e) {
-      // Expected
-    }
-    assertTrue(orderedCode.hasRemainingEncodedBytes());
-  }
-
-  @Test
-  public void testHasRemainingEncodedBytes() {
-    byte[] bytes = { 'a', 'b', 'c'};
-    long number = 12345;
-
-    // Empty
-    OrderedCode orderedCode = new OrderedCode();
-    assertFalse(orderedCode.hasRemainingEncodedBytes());
-
-    // First and only field of each type.
-    orderedCode.writeBytes(bytes);
-    assertTrue(orderedCode.hasRemainingEncodedBytes());
-    assertArrayEquals(orderedCode.readBytes(), bytes);
-    assertFalse(orderedCode.hasRemainingEncodedBytes());
-
-    orderedCode.writeNumIncreasing(number);
-    assertTrue(orderedCode.hasRemainingEncodedBytes());
-    assertEquals(orderedCode.readNumIncreasing(), number);
-    assertFalse(orderedCode.hasRemainingEncodedBytes());
-
-    orderedCode.writeSignedNumIncreasing(number);
-    assertTrue(orderedCode.hasRemainingEncodedBytes());
-    assertEquals(orderedCode.readSignedNumIncreasing(), number);
-    assertFalse(orderedCode.hasRemainingEncodedBytes());
-
-    orderedCode.writeInfinity();
-    assertTrue(orderedCode.hasRemainingEncodedBytes());
-    assertTrue(orderedCode.readInfinity());
-    assertFalse(orderedCode.hasRemainingEncodedBytes());
-
-    orderedCode.writeTrailingBytes(bytes);
-    assertTrue(orderedCode.hasRemainingEncodedBytes());
-    assertArrayEquals(orderedCode.readTrailingBytes(), bytes);
-    assertFalse(orderedCode.hasRemainingEncodedBytes());
-
-    // Two fields of same type.
-    orderedCode.writeBytes(bytes);
-    orderedCode.writeBytes(bytes);
-    assertTrue(orderedCode.hasRemainingEncodedBytes());
-    assertArrayEquals(orderedCode.readBytes(), bytes);
-    assertArrayEquals(orderedCode.readBytes(), bytes);
-    assertFalse(orderedCode.hasRemainingEncodedBytes());
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
deleted file mode 100644
index cd3d8e52b68df..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderTest.java
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.NO_FIRING;
-
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.collect.Lists;
-
-import org.joda.time.Instant;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-/**
- * Tests for PartitioningShuffleReader.
- */
-@RunWith(JUnit4.class)
-public class PartitioningShuffleReaderTest {
-  private static final List<WindowedValue<KV<Integer, String>>> NO_KVS = Collections.emptyList();
-
-  private static final Instant timestamp = new Instant(123000);
-  private static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
-
-  private static final List<WindowedValue<KV<Integer, String>>> KVS = Arrays.asList(
-      WindowedValue.of(KV.of(1, "in 1a"), timestamp, Lists.newArrayList(window), NO_FIRING),
-      WindowedValue.of(KV.of(1, "in 1b"), timestamp, Lists.newArrayList(window), NO_FIRING),
-      WindowedValue.of(KV.of(2, "in 2a"), timestamp, Lists.newArrayList(window), NO_FIRING),
-      WindowedValue.of(KV.of(2, "in 2b"), timestamp, Lists.newArrayList(window), NO_FIRING),
-      WindowedValue.of(KV.of(3, "in 3"), timestamp, Lists.newArrayList(window), NO_FIRING),
-      WindowedValue.of(KV.of(4, "in 4a"), timestamp, Lists.newArrayList(window), NO_FIRING),
-      WindowedValue.of(KV.of(4, "in 4b"), timestamp, Lists.newArrayList(window), NO_FIRING),
-      WindowedValue.of(KV.of(4, "in 4c"), timestamp, Lists.newArrayList(window), NO_FIRING),
-      WindowedValue.of(KV.of(4, "in 4d"), timestamp, Lists.newArrayList(window), NO_FIRING),
-      WindowedValue.of(KV.of(5, "in 5"), timestamp, Lists.newArrayList(window), NO_FIRING));
-
-  private void runTestReadFromShuffle(List<WindowedValue<KV<Integer, String>>> expected)
-      throws Exception {
-    Coder<WindowedValue<KV<Integer, String>>> elemCoder = WindowedValue.getFullCoder(
-        KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()), IntervalWindow.getCoder());
-
-    // Write to shuffle with PARTITION_KEYS ShuffleSink.
-     CounterSet.AddCounterMutator addCounterMutator =
-         new CounterSet().getAddCounterMutator();
-    ShuffleSink<KV<Integer, String>> shuffleSink = new ShuffleSink<>(
-        PipelineOptionsFactory.create(), null, ShuffleSink.ShuffleKind.PARTITION_KEYS,
-        elemCoder, addCounterMutator);
-
-    TestShuffleWriter shuffleWriter = new TestShuffleWriter();
-
-    List<Long> actualSizes = new ArrayList<>();
-    try (Sink.SinkWriter<WindowedValue<KV<Integer, String>>> shuffleSinkWriter =
-        shuffleSink.writer(shuffleWriter, "dataset")) {
-      for (WindowedValue<KV<Integer, String>> value : expected) {
-        actualSizes.add(shuffleSinkWriter.add(value));
-      }
-    }
-    List<ShuffleEntry> records = shuffleWriter.getRecords();
-    Assert.assertEquals(expected.size(), records.size());
-    Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
-
-    // Read from shuffle with PartitioningShuffleReader.
-    PartitioningShuffleReader<Integer, String> partitioningShuffleReader =
-        new PartitioningShuffleReader<>(
-            PipelineOptionsFactory.create(), null, null, null, elemCoder,
-            addCounterMutator);
-    ExecutorTestUtils.TestReaderObserver observer =
-        new ExecutorTestUtils.TestReaderObserver(partitioningShuffleReader);
-
-    TestShuffleReader shuffleReader = new TestShuffleReader();
-    List<Integer> expectedSizes = new ArrayList<>();
-    for (ShuffleEntry record : records) {
-      expectedSizes.add(record.length());
-      shuffleReader.addEntry(record);
-    }
-
-    List<WindowedValue<KV<Integer, String>>> actual = new ArrayList<>();
-    try (PartitioningShuffleReader<Integer, String>.PartitioningShuffleReaderIterator iter =
-            partitioningShuffleReader.iterator(shuffleReader)) {
-      while (iter.hasNext()) {
-        Assert.assertTrue(iter.hasNext());
-        actual.add(iter.next());
-      }
-      Assert.assertFalse(iter.hasNext());
-      try {
-        iter.next();
-        Assert.fail("should have failed");
-      } catch (NoSuchElementException exn) {
-        // As expected.
-      }
-    }
-
-    Assert.assertEquals(expected, actual);
-    Assert.assertEquals(expectedSizes, observer.getActualSizes());
-  }
-
-  @Test
-  public void testReadEmptyShuffleData() throws Exception {
-    runTestReadFromShuffle(NO_KVS);
-  }
-
-  @Test
-  public void testReadNonEmptyShuffleData() throws Exception {
-    runTestReadFromShuffle(KVS);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
deleted file mode 100644
index f5cc6159b4e1b..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactoryTest.java
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-
-import com.google.api.services.dataflow.model.Source;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import org.hamcrest.CoreMatchers;
-import org.hamcrest.core.IsInstanceOf;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.IOException;
-import java.util.NoSuchElementException;
-
-import javax.annotation.Nullable;
-
-/**
- * Tests for ReaderFactory.
- */
-@RunWith(JUnit4.class)
-public class ReaderFactoryTest {
-
-  static class TestReaderFactory implements ReaderFactory {
-    @Override
-    public NativeReader<?> create(
-        CloudObject spec,
-        @Nullable Coder<?> coder,
-        @Nullable PipelineOptions options,
-        @Nullable ExecutionContext executionContext,
-        @Nullable CounterSet.AddCounterMutator addCounterMutator,
-        @Nullable String operationName) {
-      return new TestReader();
-    }
-  }
-
-  static class TestReader extends NativeReader<Integer> {
-    @Override
-    public NativeReaderIterator<Integer> iterator() {
-      return new TestReaderIterator();
-    }
-
-    /** A source iterator that produces no values, for testing. */
-    class TestReaderIterator extends NativeReaderIterator<Integer> {
-      @Override
-      public boolean start() {
-        return false;
-      }
-
-      @Override
-      public boolean advance() {
-        return false;
-      }
-
-      @Override
-      public Integer getCurrent() {
-        throw new NoSuchElementException();
-      }
-    }
-  }
-
-  static class SingletonTestReaderFactory implements ReaderFactory {
-    @Override
-    public NativeReader<?> create(
-        CloudObject spec,
-        @Nullable Coder<?> coder,
-        @Nullable PipelineOptions options,
-        @Nullable ExecutionContext executionContext,
-        @Nullable CounterSet.AddCounterMutator addCounterMutator,
-        @Nullable String operationName) {
-      return new SingletonTestReader();
-    }
-  }
-
-  static class SingletonTestReader extends NativeReader<WindowedValue<String>> {
-    @Override
-    public SingletonTestReaderIterator iterator() {
-      return new SingletonTestReaderIterator();
-    }
-
-    /** A source iterator that produces no values, for testing. */
-    class SingletonTestReaderIterator extends NativeReaderIterator<WindowedValue<String>> {
-      @Override
-      public boolean start() {
-        return true;
-      }
-
-      @Override
-      public boolean advance() throws IOException {
-        return false;
-      }
-
-      @Override
-      public WindowedValue<String> getCurrent() {
-        return WindowedValue.valueInGlobalWindow("something");
-      }
-    }
-  }
-
-  @Test
-  public void testCreatePredefinedReader() throws Exception {
-    CloudObject spec = CloudObject.forClassName("TextSource");
-    addString(spec, "filename", "/path/to/file.txt");
-
-    Source cloudSource = new Source();
-    cloudSource.setSpec(spec);
-    cloudSource.setCodec(makeCloudEncoding("StringUtf8Coder"));
-
-    PipelineOptions options = PipelineOptionsFactory.create();
-    NativeReader<?> reader =
-        ReaderRegistry.defaultRegistry()
-            .create(
-                cloudSource, options, BatchModeExecutionContext.fromOptions(options), null, null);
-    Assert.assertThat(reader, new IsInstanceOf(TextReader.class));
-  }
-
-  @Test
-  public void testCreateUserDefinedReader() throws Exception {
-    CloudObject spec = CloudObject.forClass(TestReaderFactory.class);
-
-    Source cloudSource = new Source();
-    cloudSource.setSpec(spec);
-    cloudSource.setCodec(makeCloudEncoding("BigEndianIntegerCoder"));
-
-    PipelineOptions options = PipelineOptionsFactory.create();
-    ReaderRegistry registry = ReaderRegistry.defaultRegistry()
-        .register(TestReaderFactory.class.getName(), new TestReaderFactory());
-    NativeReader<?> reader =
-        registry.create(
-            cloudSource,
-            PipelineOptionsFactory.create(),
-            BatchModeExecutionContext.fromOptions(options),
-            null,
-            null);
-    Assert.assertThat(reader, new IsInstanceOf(TestReader.class));
-  }
-
-  @Test
-  public void testCreateUnknownReader() throws Exception {
-    CloudObject spec = CloudObject.forClassName("UnknownSource");
-    Source cloudSource = new Source();
-    cloudSource.setSpec(spec);
-    cloudSource.setCodec(makeCloudEncoding("StringUtf8Coder"));
-    try {
-      PipelineOptions options = PipelineOptionsFactory.create();
-      ReaderRegistry.defaultRegistry().create(
-          cloudSource,
-          options,
-          BatchModeExecutionContext.fromOptions(options),
-          null,
-          null);
-      Assert.fail("should have thrown an exception");
-    } catch (Exception exn) {
-      Assert.assertThat(exn.toString(), CoreMatchers.containsString("Unable to create a Reader"));
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
deleted file mode 100644
index 11a1faddff5ec..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderTestUtils.java
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toDynamicSplitRequest;
-
-import com.google.api.services.dataflow.model.ApproximateReportedProgress;
-import com.google.api.services.dataflow.model.ApproximateSplitRequest;
-import com.google.api.services.dataflow.model.ConcatPosition;
-import com.google.api.services.dataflow.model.Position;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader.NativeReaderIterator;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-/**
- * Helpers for testing {@link NativeReader} and related classes, especially
- * {@link NativeReaderIterator#getProgress} and {@link NativeReaderIterator#requestDynamicSplit}.
- */
-public class ReaderTestUtils {
-  public static Position positionAtIndex(@Nullable Long index) {
-    return new Position().setRecordIndex(index);
-  }
-
-  public static Position positionAtByteOffset(@Nullable Long byteOffset) {
-    return new Position().setByteOffset(byteOffset);
-  }
-
-  public static Position positionAtConcatPosition(
-      @Nullable Integer index, @Nullable Position innerPosition) {
-    return new Position().setConcatPosition(
-        new ConcatPosition().setIndex(index).setPosition(innerPosition));
-  }
-
-  public static ApproximateReportedProgress approximateProgressAtPosition(
-      @Nullable Position position) {
-    return new ApproximateReportedProgress().setPosition(position);
-  }
-
-  public static ApproximateSplitRequest approximateSplitRequestAtPosition(
-      @Nullable Position position) {
-    return new ApproximateSplitRequest().setPosition(position);
-  }
-
-  public static ApproximateReportedProgress approximateProgressAtIndex(
-      @Nullable Long index) {
-    return approximateProgressAtPosition(positionAtIndex(index));
-  }
-
-  public static ApproximateSplitRequest approximateSplitRequestAtIndex(
-      @Nullable Long index) {
-    return approximateSplitRequestAtPosition(positionAtIndex(index));
-  }
-
-  public static ApproximateReportedProgress approximateProgressAtByteOffset(
-      @Nullable Long byteOffset) {
-    return approximateProgressAtPosition(positionAtByteOffset(byteOffset));
-  }
-
-  public static ApproximateSplitRequest approximateSplitRequestAtByteOffset(
-      @Nullable Long byteOffset) {
-    return approximateSplitRequestAtPosition(positionAtByteOffset(byteOffset));
-  }
-
-  public static ApproximateReportedProgress approximateProgressAtConcatPosition(
-      @Nullable Integer index, @Nullable Position innerPosition) {
-    return approximateProgressAtPosition(positionAtConcatPosition(index, innerPosition));
-  }
-
-  public static ApproximateSplitRequest approximateSplitRequestAtConcatPosition(
-      @Nullable Integer index, @Nullable Position innerPosition) {
-    return approximateSplitRequestAtPosition(positionAtConcatPosition(index, innerPosition));
-  }
-
-  public static ApproximateReportedProgress approximateProgressAtFraction(
-      @Nullable Double fraction) {
-    return new ApproximateReportedProgress().setFractionConsumed(fraction);
-  }
-
-  public static ApproximateSplitRequest approximateSplitRequestAtFraction(
-      @Nullable Double fraction) {
-    return new ApproximateSplitRequest().setFractionConsumed(fraction);
-  }
-
-  public static NativeReader.DynamicSplitRequest splitRequestAtPosition(
-      @Nullable Position position) {
-    return toDynamicSplitRequest(approximateSplitRequestAtPosition(position));
-  }
-
-  public static NativeReader.DynamicSplitRequest splitRequestAtIndex(@Nullable Long index) {
-    return toDynamicSplitRequest(approximateSplitRequestAtIndex(index));
-  }
-
-  public static NativeReader.DynamicSplitRequest splitRequestAtByteOffset(
-      @Nullable Long byteOffset) {
-    return toDynamicSplitRequest(approximateSplitRequestAtByteOffset(byteOffset));
-  }
-
-  public static NativeReader.DynamicSplitRequest splitRequestAtConcatPosition(
-      @Nullable Integer index, @Nullable Position innerPosition) {
-    return toDynamicSplitRequest(approximateSplitRequestAtConcatPosition(index, innerPosition));
-  }
-
-  public static Position positionFromSplitResult(
-      NativeReader.DynamicSplitResult dynamicSplitResult) {
-    return toCloudPosition(
-        ((NativeReader.DynamicSplitResultWithPosition) dynamicSplitResult).getAcceptedPosition());
-  }
-
-  public static Position positionFromProgress(NativeReader.Progress progress) {
-    return readerProgressToCloudProgress(progress).getPosition();
-  }
-
-  public static NativeReader.DynamicSplitRequest splitRequestAtFraction(double fraction) {
-    return toDynamicSplitRequest(approximateSplitRequestAtFraction(fraction));
-  }
-
-  /**
-   * Appends all values from a collection of {@code WindowedValue} values to a collection of values.
-   */
-  public static <T> List<T> windowedValuesToValues(Collection<WindowedValue<T>> windowedValues) {
-    List<T> res = new ArrayList<>();
-    for (WindowedValue<T> windowedValue : windowedValues) {
-      res.add(windowedValue.getValue());
-    }
-    return res;
-  }
-
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
deleted file mode 100644
index 3868931e6e182..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReaderFactoryTest.java
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.api.client.util.Base64.encodeBase64String;
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-
-import com.google.api.services.dataflow.model.Source;
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import org.hamcrest.core.IsInstanceOf;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import javax.annotation.Nullable;
-
-/**
- * Tests for UngroupedShuffleReaderFactory, GroupingShuffleReaderFactory,
- * and PartitioningShuffleReaderFactory.
- */
-@RunWith(JUnit4.class)
-@SuppressWarnings({"rawtypes", "unchecked"})
-public class ShuffleReaderFactoryTest {
-  <T extends NativeReader> T runTestCreateShuffleReader(
-      byte[] shuffleReaderConfig,
-      @Nullable String start,
-      @Nullable String end,
-      CloudObject encoding,
-      BatchModeExecutionContext context,
-      Class<T> shuffleReaderClass,
-      String shuffleSourceAlias)
-      throws Exception {
-    CloudObject spec = CloudObject.forClassName(shuffleSourceAlias);
-    addString(spec, "shuffle_reader_config", encodeBase64String(shuffleReaderConfig));
-    if (start != null) {
-      addString(spec, "start_shuffle_position", start);
-    }
-    if (end != null) {
-      addString(spec, "end_shuffle_position", end);
-    }
-
-    Source cloudSource = new Source();
-    cloudSource.setSpec(spec);
-    cloudSource.setCodec(encoding);
-
-    NativeReader<?> reader =
-        ReaderRegistry.defaultRegistry()
-            .create(cloudSource, PipelineOptionsFactory.create(), context, null, null);
-    Assert.assertThat(reader, new IsInstanceOf(shuffleReaderClass));
-    T shuffleSource = (T) reader;
-    return shuffleSource;
-  }
-
-  void runTestCreateUngroupedShuffleReader(byte[] shuffleReaderConfig, @Nullable String start,
-      @Nullable String end, CloudObject encoding, Coder<?> coder) throws Exception {
-    UngroupedShuffleReader ungroupedShuffleReader = runTestCreateShuffleReader(
-        shuffleReaderConfig,
-        start,
-        end,
-        encoding,
-        BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create()),
-        UngroupedShuffleReader.class,
-        "UngroupedShuffleSource");
-    Assert.assertArrayEquals(shuffleReaderConfig, ungroupedShuffleReader.shuffleReaderConfig);
-    Assert.assertEquals(start, ungroupedShuffleReader.startShufflePosition);
-    Assert.assertEquals(end, ungroupedShuffleReader.stopShufflePosition);
-
-    Assert.assertEquals(coder, ungroupedShuffleReader.coder);
-  }
-
-  void runTestCreateGroupingShuffleReader(byte[] shuffleReaderConfig, @Nullable String start,
-      @Nullable String end, CloudObject encoding, Coder<?> keyCoder, Coder<?> valueCoder)
-      throws Exception {
-    BatchModeExecutionContext context =
-        BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create());
-    GroupingShuffleReader groupingShuffleReader = runTestCreateShuffleReader(
-        shuffleReaderConfig, start, end, encoding, context, GroupingShuffleReader.class,
-        "GroupingShuffleSource");
-    Assert.assertArrayEquals(shuffleReaderConfig, groupingShuffleReader.shuffleReaderConfig);
-    Assert.assertEquals(start, groupingShuffleReader.startShufflePosition);
-    Assert.assertEquals(end, groupingShuffleReader.stopShufflePosition);
-
-    Assert.assertEquals(keyCoder, groupingShuffleReader.keyCoder);
-    Assert.assertEquals(valueCoder, groupingShuffleReader.valueCoder);
-    Assert.assertEquals(context, groupingShuffleReader.executionContext);
-  }
-
-  void runTestCreatePartitioningShuffleReader(byte[] shuffleReaderConfig, @Nullable String start,
-      @Nullable String end, CloudObject encoding, Coder<?> keyCoder, Coder<?> windowedValueCoder)
-      throws Exception {
-    PartitioningShuffleReader partitioningShuffleReader = runTestCreateShuffleReader(
-        shuffleReaderConfig,
-        start,
-        end,
-        encoding,
-        BatchModeExecutionContext.fromOptions(PipelineOptionsFactory.create()),
-        PartitioningShuffleReader.class,
-        "PartitioningShuffleSource");
-    Assert.assertArrayEquals(shuffleReaderConfig, partitioningShuffleReader.shuffleReaderConfig);
-    Assert.assertEquals(start, partitioningShuffleReader.startShufflePosition);
-    Assert.assertEquals(end, partitioningShuffleReader.stopShufflePosition);
-
-    Assert.assertEquals(keyCoder, partitioningShuffleReader.keyCoder);
-    Assert.assertEquals(windowedValueCoder, partitioningShuffleReader.windowedValueCoder);
-  }
-
-  @Test
-  public void testCreatePlainUngroupedShuffleReader() throws Exception {
-    runTestCreateUngroupedShuffleReader(new byte[] {(byte) 0xE1}, null, null,
-        makeCloudEncoding("StringUtf8Coder"), StringUtf8Coder.of());
-  }
-
-  @Test
-  public void testCreateRichUngroupedShuffleReader() throws Exception {
-    runTestCreateUngroupedShuffleReader(new byte[] {(byte) 0xE2}, "aaa", "zzz",
-        makeCloudEncoding("BigEndianIntegerCoder"), BigEndianIntegerCoder.of());
-  }
-
-  @Test
-  public void testCreatePlainGroupingShuffleReader() throws Exception {
-    runTestCreateGroupingShuffleReader(
-        new byte[] {(byte) 0xE1}, null, null,
-        makeCloudEncoding(
-            FullWindowedValueCoder.class.getName(),
-            makeCloudEncoding("KvCoder", makeCloudEncoding("BigEndianIntegerCoder"),
-                makeCloudEncoding("IterableCoder", makeCloudEncoding("StringUtf8Coder"))),
-            IntervalWindow.getCoder().asCloudObject()),
-        BigEndianIntegerCoder.of(), StringUtf8Coder.of());
-  }
-
-  @Test
-  public void testCreateRichGroupingShuffleReader() throws Exception {
-    runTestCreateGroupingShuffleReader(
-        new byte[] {(byte) 0xE2}, "aaa", "zzz",
-        makeCloudEncoding(
-            FullWindowedValueCoder.class.getName(),
-            makeCloudEncoding(
-                "KvCoder", makeCloudEncoding("BigEndianIntegerCoder"),
-                makeCloudEncoding(
-                    "IterableCoder",
-                    makeCloudEncoding("KvCoder", makeCloudEncoding("StringUtf8Coder"),
-                        makeCloudEncoding("VoidCoder")))),
-            IntervalWindow.getCoder().asCloudObject()),
-        BigEndianIntegerCoder.of(), KvCoder.of(StringUtf8Coder.of(), VoidCoder.of()));
-  }
-
-  @Test
-  public void testCreatePlainPartitioningShuffleReader() throws Exception {
-    runTestCreatePartitioningShuffleReader(
-        new byte[] {(byte) 0xE1}, null, null,
-        makeCloudEncoding(
-            FullWindowedValueCoder.class.getName(),
-            makeCloudEncoding("KvCoder", makeCloudEncoding("BigEndianIntegerCoder"),
-                makeCloudEncoding("StringUtf8Coder")),
-            IntervalWindow.getCoder().asCloudObject()),
-        BigEndianIntegerCoder.of(),
-        FullWindowedValueCoder.of(StringUtf8Coder.of(), IntervalWindow.getCoder()));
-  }
-
-  @Test
-  public void testCreateRichPartitioningShuffleReader() throws Exception {
-    runTestCreatePartitioningShuffleReader(
-        new byte[] {(byte) 0xE2}, "aaa", "zzz",
-        makeCloudEncoding(
-            FullWindowedValueCoder.class.getName(),
-            makeCloudEncoding(
-                "KvCoder", makeCloudEncoding("BigEndianIntegerCoder"),
-                makeCloudEncoding("KvCoder", makeCloudEncoding("StringUtf8Coder"),
-                    makeCloudEncoding("VoidCoder"))),
-            IntervalWindow.getCoder().asCloudObject()),
-        BigEndianIntegerCoder.of(),
-        FullWindowedValueCoder.of(
-            KvCoder.of(StringUtf8Coder.of(), VoidCoder.of()), IntervalWindow.getCoder()));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java
deleted file mode 100644
index cad5dadebc58c..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactoryTest.java
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.api.client.util.Base64.encodeBase64String;
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-
-import org.hamcrest.core.IsInstanceOf;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for ShuffleSinkFactory.
- */
-@RunWith(JUnit4.class)
-@SuppressWarnings("rawtypes")
-public class ShuffleSinkFactoryTest {
-  ShuffleSink runTestCreateShuffleSinkHelper(byte[] shuffleWriterConfig,
-                                             String shuffleKind,
-                                             CloudObject encoding,
-                                             FullWindowedValueCoder<?> coder)
-      throws Exception {
-    CloudObject spec = CloudObject.forClassName("ShuffleSink");
-    addString(spec, "shuffle_writer_config", encodeBase64String(shuffleWriterConfig));
-    addString(spec, "shuffle_kind", shuffleKind);
-
-    com.google.api.services.dataflow.model.Sink cloudSink =
-        new com.google.api.services.dataflow.model.Sink();
-    cloudSink.setSpec(spec);
-    cloudSink.setCodec(encoding);
-
-    PipelineOptions options = PipelineOptionsFactory.create();
-    CounterSet.AddCounterMutator addCounterMutator =
-        new CounterSet().getAddCounterMutator();
-    Sink<?> sink = SinkFactory.create(
-        options,
-        cloudSink,
-        BatchModeExecutionContext.fromOptions(options),
-        addCounterMutator);
-    Assert.assertThat(sink, new IsInstanceOf(ShuffleSink.class));
-    ShuffleSink shuffleSink = (ShuffleSink) sink;
-    Assert.assertArrayEquals(shuffleWriterConfig,
-                             shuffleSink.shuffleWriterConfig);
-    Assert.assertEquals(coder, shuffleSink.windowedElemCoder);
-    return shuffleSink;
-  }
-
-  void runTestCreateUngroupingShuffleSink(byte[] shuffleWriterConfig,
-                                          CloudObject encoding,
-                                          FullWindowedValueCoder<?> coder)
-      throws Exception {
-    ShuffleSink shuffleSink = runTestCreateShuffleSinkHelper(
-        shuffleWriterConfig, "ungrouped", encoding, coder);
-    Assert.assertEquals(ShuffleSink.ShuffleKind.UNGROUPED,
-                        shuffleSink.shuffleKind);
-    Assert.assertFalse(shuffleSink.shardByKey);
-    Assert.assertFalse(shuffleSink.groupValues);
-    Assert.assertFalse(shuffleSink.sortValues);
-    Assert.assertNull(shuffleSink.keyCoder);
-    Assert.assertNull(shuffleSink.valueCoder);
-    Assert.assertNull(shuffleSink.sortKeyCoder);
-    Assert.assertNull(shuffleSink.sortValueCoder);
-  }
-
-  void runTestCreatePartitioningShuffleSink(byte[] shuffleWriterConfig,
-                                            Coder<?> keyCoder,
-                                            Coder<?> valueCoder)
-      throws Exception {
-    FullWindowedValueCoder<?> coder = WindowedValue.getFullCoder(
-        KvCoder.of(keyCoder, valueCoder), IntervalWindow.getCoder());
-    ShuffleSink shuffleSink = runTestCreateShuffleSinkHelper(
-        shuffleWriterConfig, "partition_keys", coder.asCloudObject(), coder);
-    Assert.assertEquals(ShuffleSink.ShuffleKind.PARTITION_KEYS,
-                        shuffleSink.shuffleKind);
-    Assert.assertTrue(shuffleSink.shardByKey);
-    Assert.assertFalse(shuffleSink.groupValues);
-    Assert.assertFalse(shuffleSink.sortValues);
-    Assert.assertEquals(keyCoder, shuffleSink.keyCoder);
-    Assert.assertEquals(valueCoder, shuffleSink.valueCoder);
-    Assert.assertEquals(FullWindowedValueCoder.of(valueCoder,
-                                                  IntervalWindow.getCoder()),
-                        shuffleSink.windowedValueCoder);
-    Assert.assertNull(shuffleSink.sortKeyCoder);
-    Assert.assertNull(shuffleSink.sortValueCoder);
-  }
-
-  void runTestCreateGroupingShuffleSink(byte[] shuffleWriterConfig,
-                                        Coder<?> keyCoder,
-                                        Coder<?> valueCoder)
-      throws Exception {
-    FullWindowedValueCoder<?> coder = WindowedValue.getFullCoder(
-        KvCoder.of(keyCoder, valueCoder), IntervalWindow.getCoder());
-    ShuffleSink shuffleSink = runTestCreateShuffleSinkHelper(
-        shuffleWriterConfig, "group_keys", coder.asCloudObject(), coder);
-    Assert.assertEquals(ShuffleSink.ShuffleKind.GROUP_KEYS,
-                        shuffleSink.shuffleKind);
-    Assert.assertTrue(shuffleSink.shardByKey);
-    Assert.assertTrue(shuffleSink.groupValues);
-    Assert.assertFalse(shuffleSink.sortValues);
-    Assert.assertEquals(keyCoder, shuffleSink.keyCoder);
-    Assert.assertEquals(valueCoder, shuffleSink.valueCoder);
-    Assert.assertNull(shuffleSink.windowedValueCoder);
-    Assert.assertNull(shuffleSink.sortKeyCoder);
-    Assert.assertNull(shuffleSink.sortValueCoder);
-  }
-
-  void runTestCreateGroupingSortingShuffleSink(byte[] shuffleWriterConfig,
-                                               Coder<?> keyCoder,
-                                               Coder<?> sortKeyCoder,
-                                               Coder<?> sortValueCoder)
-      throws Exception {
-    FullWindowedValueCoder<?> coder = WindowedValue.getFullCoder(
-        KvCoder.of(keyCoder, KvCoder.of(sortKeyCoder, sortValueCoder)),
-        IntervalWindow.getCoder());
-    ShuffleSink shuffleSink = runTestCreateShuffleSinkHelper(
-        shuffleWriterConfig, "group_keys_and_sort_values", coder.asCloudObject(), coder);
-    Assert.assertEquals(ShuffleSink.ShuffleKind.GROUP_KEYS_AND_SORT_VALUES,
-                        shuffleSink.shuffleKind);
-    Assert.assertTrue(shuffleSink.shardByKey);
-    Assert.assertTrue(shuffleSink.groupValues);
-    Assert.assertTrue(shuffleSink.sortValues);
-    Assert.assertEquals(keyCoder, shuffleSink.keyCoder);
-    Assert.assertEquals(KvCoder.of(sortKeyCoder, sortValueCoder),
-                        shuffleSink.valueCoder);
-    Assert.assertEquals(sortKeyCoder, shuffleSink.sortKeyCoder);
-    Assert.assertEquals(sortValueCoder, shuffleSink.sortValueCoder);
-    Assert.assertNull(shuffleSink.windowedValueCoder);
-  }
-
-  @Test
-  public void testCreateUngroupingShuffleSink() throws Exception {
-    FullWindowedValueCoder<?> coder = WindowedValue.getFullCoder(
-        StringUtf8Coder.of(), IntervalWindow.getCoder());
-    runTestCreateUngroupingShuffleSink(
-        new byte[]{(byte) 0xE1},
-        coder.asCloudObject(),
-        coder);
-  }
-
-  @Test
-  public void testCreatePartitionShuffleSink() throws Exception {
-    runTestCreatePartitioningShuffleSink(
-        new byte[]{(byte) 0xE2},
-        BigEndianIntegerCoder.of(),
-        StringUtf8Coder.of());
-  }
-
-  @Test
-  public void testCreateGroupingShuffleSink() throws Exception {
-    runTestCreateGroupingShuffleSink(
-        new byte[]{(byte) 0xE2},
-        BigEndianIntegerCoder.of(),
-        WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder()));
-  }
-
-  @Test
-  public void testCreateGroupingSortingShuffleSink() throws Exception {
-    runTestCreateGroupingSortingShuffleSink(
-        new byte[]{(byte) 0xE3},
-        BigEndianIntegerCoder.of(),
-        StringUtf8Coder.of(),
-        VoidCoder.of());
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
deleted file mode 100644
index c4f2ea03b5e28..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkTest.java
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.TestUtils;
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
-import com.google.cloud.dataflow.sdk.coders.InstantCoder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink.SinkWriter;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.collect.Lists;
-
-import org.joda.time.Instant;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.ByteArrayInputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * Tests for {@link ShuffleSink}.
- */
-@RunWith(JUnit4.class)
-public class ShuffleSinkTest {
-  private static final List<KV<Integer, String>> NO_KVS = Collections.emptyList();
-
-  private static final List<KV<Integer, String>> KVS = Arrays.asList(
-      KV.of(1, "in 1a"),
-      KV.of(1, "in 1b"),
-      KV.of(2, "in 2a"),
-      KV.of(2, "in 2b"),
-      KV.of(3, "in 3"),
-      KV.of(4, "in 4a"),
-      KV.of(4, "in 4b"),
-      KV.of(4, "in 4c"),
-      KV.of(4, "in 4d"),
-      KV.of(5, "in 5"));
-
-  private static final List<KV<Integer, KV<String, Integer>>> NO_SORTING_KVS =
-      Collections.emptyList();
-
-  private static final List<KV<Integer, KV<String, Integer>>> SORTING_KVS =
-      Arrays.asList(
-          KV.of(1, KV.of("in 1a", 3)),
-          KV.of(1, KV.of("in 1b", 9)),
-          KV.of(2, KV.of("in 2a", 2)),
-          KV.of(2, KV.of("in 2b", 77)),
-          KV.of(3, KV.of("in 3", 33)),
-          KV.of(4, KV.of("in 4a", -123)),
-          KV.of(4, KV.of("in 4b", 0)),
-          KV.of(4, KV.of("in 4c", -1)),
-          KV.of(4, KV.of("in 4d", 1)),
-          KV.of(5, KV.of("in 5", 666)));
-
-  private static final Instant timestamp = new Instant(123000);
-  private static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
-
-  private void runTestWriteUngroupingShuffleSink(List<Integer> expected)
-      throws Exception {
-    Coder<WindowedValue<Integer>> windowedValueCoder =
-        WindowedValue.getFullCoder(BigEndianIntegerCoder.of(), new GlobalWindows().windowCoder());
-    CounterSet.AddCounterMutator addCounterMutator =
-        new CounterSet().getAddCounterMutator();
-    ShuffleSink<Integer> shuffleSink = new ShuffleSink<>(
-        PipelineOptionsFactory.create(),
-        null, ShuffleSink.ShuffleKind.UNGROUPED,
-        windowedValueCoder,
-        addCounterMutator);
-
-    TestShuffleWriter shuffleWriter = new TestShuffleWriter();
-    List<Long> actualSizes = new ArrayList<>();
-    try (Sink.SinkWriter<WindowedValue<Integer>> shuffleSinkWriter =
-             shuffleSink.writer(shuffleWriter, "dataset")) {
-      for (Integer value : expected) {
-        actualSizes.add(shuffleSinkWriter.add(WindowedValue.valueInGlobalWindow(value)));
-      }
-    }
-
-    List<ShuffleEntry> records = shuffleWriter.getRecords();
-
-    List<Integer> actual = new ArrayList<>();
-    for (ShuffleEntry record : records) {
-      // Ignore the key.
-      byte[] valueBytes = record.getValue();
-      WindowedValue<Integer> value = CoderUtils.decodeFromByteArray(windowedValueCoder, valueBytes);
-      Assert.assertEquals(Lists.newArrayList(GlobalWindow.INSTANCE),
-                          value.getWindows());
-      actual.add(value.getValue());
-    }
-
-    Assert.assertEquals(expected, actual);
-    Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
-  }
-
-  void runTestWriteGroupingShuffleSink(
-      List<KV<Integer, String>> expected)
-      throws Exception {
-    CounterSet.AddCounterMutator addCounterMutator =
-        new CounterSet().getAddCounterMutator();
-    ShuffleSink<KV<Integer, String>> shuffleSink = new ShuffleSink<>(
-        PipelineOptionsFactory.create(),
-        null, ShuffleSink.ShuffleKind.GROUP_KEYS,
-        WindowedValue.getFullCoder(
-            KvCoder.of(BigEndianIntegerCoder.of(), StringUtf8Coder.of()),
-            IntervalWindow.getCoder()),
-        addCounterMutator);
-
-    TestShuffleWriter shuffleWriter = new TestShuffleWriter();
-    List<Long> actualSizes = new ArrayList<>();
-    try (SinkWriter<WindowedValue<KV<Integer, String>>> shuffleSinkWriter =
-             shuffleSink.writer(shuffleWriter, "dataset")) {
-      for (KV<Integer, String> kv : expected) {
-        actualSizes.add(shuffleSinkWriter.add(
-            WindowedValue.of(KV.of(kv.getKey(), kv.getValue()),
-                             timestamp,
-                             Lists.newArrayList(window),
-                             PaneInfo.NO_FIRING)));
-      }
-    }
-
-    List<ShuffleEntry> records = shuffleWriter.getRecords();
-
-    List<KV<Integer, String>> actual = new ArrayList<>();
-    for (ShuffleEntry record : records) {
-      byte[] keyBytes = record.getKey();
-      byte[] valueBytes = record.getValue();
-      Assert.assertEquals(timestamp,
-          CoderUtils.decodeFromByteArray(InstantCoder.of(), record.getSecondaryKey()));
-
-      Integer key =
-          CoderUtils.decodeFromByteArray(BigEndianIntegerCoder.of(),
-                                         keyBytes);
-      String valueElem = CoderUtils.decodeFromByteArray(StringUtf8Coder.of(), valueBytes);
-
-      actual.add(KV.of(key, valueElem));
-    }
-
-    Assert.assertEquals(expected, actual);
-    Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
-  }
-
-  void runTestWriteGroupingSortingShuffleSink(
-      List<KV<Integer, KV<String, Integer>>> expected)
-      throws Exception {
-    CounterSet.AddCounterMutator addCounterMutator =
-        new CounterSet().getAddCounterMutator();
-    ShuffleSink<KV<Integer, KV<String, Integer>>> shuffleSink =
-        new ShuffleSink<>(
-            PipelineOptionsFactory.create(),
-            null,
-            ShuffleSink.ShuffleKind.GROUP_KEYS_AND_SORT_VALUES,
-            WindowedValue.getFullCoder(
-                KvCoder.of(BigEndianIntegerCoder.of(),
-                           KvCoder.of(StringUtf8Coder.of(),
-                                      BigEndianIntegerCoder.of())),
-                new GlobalWindows().windowCoder()),
-            addCounterMutator);
-
-    TestShuffleWriter shuffleWriter = new TestShuffleWriter();
-    List<Long> actualSizes = new ArrayList<>();
-    try (Sink.SinkWriter<WindowedValue<KV<Integer, KV<String, Integer>>>> shuffleSinkWriter =
-             shuffleSink.writer(shuffleWriter, "dataset")) {
-      for (KV<Integer, KV<String, Integer>> kv : expected) {
-        actualSizes.add(shuffleSinkWriter.add(WindowedValue.valueInGlobalWindow(kv)));
-      }
-    }
-
-    List<ShuffleEntry> records = shuffleWriter.getRecords();
-
-    List<KV<Integer, KV<String, Integer>>> actual = new ArrayList<>();
-    for (ShuffleEntry record : records) {
-      byte[] keyBytes = record.getKey();
-      byte[] valueBytes = record.getValue();
-      byte[] sortKeyBytes = record.getSecondaryKey();
-
-      Integer key =
-          CoderUtils.decodeFromByteArray(BigEndianIntegerCoder.of(),
-                                         keyBytes);
-      ByteArrayInputStream bais = new ByteArrayInputStream(sortKeyBytes);
-      String sortKey = StringUtf8Coder.of().decode(bais, Context.NESTED);
-      Integer sortValue = CoderUtils.decodeFromByteArray(BigEndianIntegerCoder.of(), valueBytes);
-
-      actual.add(KV.of(key, KV.of(sortKey, sortValue)));
-    }
-
-    Assert.assertEquals(expected, actual);
-    Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
-  }
-
-  @Test
-  public void testWriteEmptyUngroupingShuffleSink() throws Exception {
-    runTestWriteUngroupingShuffleSink(TestUtils.NO_INTS);
-  }
-
-  @Test
-  public void testWriteNonEmptyUngroupingShuffleSink() throws Exception {
-    runTestWriteUngroupingShuffleSink(TestUtils.INTS);
-  }
-
-  @Test
-  public void testWriteEmptyGroupingShuffleSink() throws Exception {
-    runTestWriteGroupingShuffleSink(NO_KVS);
-  }
-
-  @Test
-  public void testWriteNonEmptyGroupingShuffleSink() throws Exception {
-    runTestWriteGroupingShuffleSink(KVS);
-  }
-
-  @Test
-  public void testWriteEmptyGroupingSortingShuffleSink() throws Exception {
-    runTestWriteGroupingSortingShuffleSink(NO_SORTING_KVS);
-  }
-
-  @Test
-  public void testWriteNonEmptyGroupingSortingShuffleSink() throws Exception {
-    runTestWriteGroupingSortingShuffleSink(SORTING_KVS);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java
deleted file mode 100644
index be80235e54389..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtilsTest.java
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.SideInputUtils.createCollectionSideInputInfo;
-import static com.google.cloud.dataflow.sdk.runners.worker.SideInputUtils.createSingletonSideInputInfo;
-import static org.hamcrest.Matchers.emptyIterable;
-import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
-import static org.hamcrest.core.Is.is;
-import static org.hamcrest.core.IsInstanceOf.instanceOf;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
-
-import com.google.api.services.dataflow.model.SideInputInfo;
-import com.google.api.services.dataflow.model.Source;
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Tests for SideInputUtils.
- */
-@RunWith(JUnit4.class)
-public class SideInputUtilsTest {
-  Source createSideInputSource(Integer... ints) throws Exception {
-    return InMemoryReaderFactoryTest.createInMemoryCloudSource(
-        Arrays.asList(ints), null, null, BigEndianIntegerCoder.of());
-  }
-
-  void assertThatContains(Object actual, Object... expected) {
-    assertThat(actual, instanceOf(Iterable.class));
-    Iterable<?> iter = (Iterable<?>) actual;
-    if (expected.length == 0) {
-      assertThat(iter, is(emptyIterable()));
-    } else {
-      assertThat(iter, contains(expected));
-    }
-  }
-
-  @Test
-  public void testReadSingletonSideInput() throws Exception {
-    SideInputInfo sideInputInfo = createSingletonSideInputInfo(createSideInputSource(42));
-
-    PipelineOptions options = PipelineOptionsFactory.create();
-    assertEquals(
-        42,
-        SideInputUtils.readSideInput(
-            options, sideInputInfo,
-            BatchModeExecutionContext.fromOptions(options)));
-  }
-
-  @Test
-  public void testReadEmptyCollectionSideInput() throws Exception {
-    SideInputInfo sideInputInfo = createCollectionSideInputInfo(createSideInputSource());
-
-    PipelineOptions options = PipelineOptionsFactory.create();
-    assertThatContains(SideInputUtils.readSideInput(
-        options, sideInputInfo,
-        BatchModeExecutionContext.fromOptions(options)));
-  }
-
-  @Test
-  public void testReadCollectionSideInput() throws Exception {
-    SideInputInfo sideInputInfo = createCollectionSideInputInfo(createSideInputSource(3, 4, 5, 6));
-
-    PipelineOptions options = PipelineOptionsFactory.create();
-    assertThatContains(
-        SideInputUtils.readSideInput(
-            options, sideInputInfo,
-            BatchModeExecutionContext.fromOptions(options)),
-        3, 4, 5, 6);
-  }
-
-  @Test
-  public void testReadCollectionShardedSideInput() throws Exception {
-    SideInputInfo sideInputInfo =
-        createCollectionSideInputInfo(createSideInputSource(3), createSideInputSource(),
-            createSideInputSource(4, 5), createSideInputSource(6), createSideInputSource());
-
-    PipelineOptions options = PipelineOptionsFactory.create();
-    assertThatContains(
-        SideInputUtils.readSideInput(
-            options, sideInputInfo,
-            BatchModeExecutionContext.fromOptions(options)),
-        3, 4, 5, 6);
-  }
-
-  @Test
-  public void testReadSingletonSideInputValue() throws Exception {
-    CloudObject sideInputKind = CloudObject.forClassName("singleton");
-    Object elem = "hi";
-    List<Object> elems = Arrays.asList(elem);
-    assertEquals(elem, SideInputUtils.readSideInputValue(sideInputKind, elems));
-  }
-
-  @Test
-  public void testReadCollectionSideInputValue() throws Exception {
-    CloudObject sideInputKind = CloudObject.forClassName("collection");
-    List<Object> elems = Arrays.<Object>asList("hi", "there", "bob");
-    assertEquals(elems, SideInputUtils.readSideInputValue(sideInputKind, elems));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactoryTest.java
deleted file mode 100644
index 96189e7f3e0e1..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactoryTest.java
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-
-import org.hamcrest.CoreMatchers;
-import org.hamcrest.core.IsInstanceOf;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for SinkFactory.
- */
-@RunWith(JUnit4.class)
-public class SinkFactoryTest {
-  static class TestSinkFactory {
-    public static TestSink create(PipelineOptions options,
-                                  CloudObject o,
-                                  Coder<Integer> coder,
-                                  ExecutionContext executionContext,
-                                  CounterSet.AddCounterMutator addCounterMutator) {
-      return new TestSink();
-    }
-  }
-
-  static class TestSink extends Sink<Integer> {
-    @Override
-    public SinkWriter<Integer> writer() {
-      return new TestSinkWriter();
-    }
-
-    /** A sink writer that drops its input values, for testing. */
-    class TestSinkWriter implements SinkWriter<Integer> {
-      @Override
-      public long add(Integer outputElem) {
-        return 4;
-      }
-
-      @Override
-      public void close() {
-      }
-    }
-  }
-
-  private PipelineOptions options;
-
-  @Before
-  public void setUp() {
-    options = PipelineOptionsFactory.create();
-  }
-
-  @Test
-  public void testCreatePredefinedSink() throws Exception {
-    CloudObject spec = CloudObject.forClassName("TextSink");
-    addString(spec, "filename", "/path/to/file.txt");
-
-    com.google.api.services.dataflow.model.Sink cloudSink =
-        new com.google.api.services.dataflow.model.Sink();
-    cloudSink.setSpec(spec);
-    cloudSink.setCodec(makeCloudEncoding("StringUtf8Coder"));
-
-    CounterSet.AddCounterMutator addCounterMutator =
-        new CounterSet().getAddCounterMutator();
-    Sink<?> sink = SinkFactory.create(options,
-                                      cloudSink,
-                                      BatchModeExecutionContext.fromOptions(options),
-                                      addCounterMutator);
-    Assert.assertThat(sink, new IsInstanceOf(TextSink.class));
-  }
-
-  @Test
-  public void testCreateUserDefinedSink() throws Exception {
-    CloudObject spec = CloudObject.forClass(TestSinkFactory.class);
-
-    com.google.api.services.dataflow.model.Sink cloudSink =
-        new com.google.api.services.dataflow.model.Sink();
-    cloudSink.setSpec(spec);
-    cloudSink.setCodec(makeCloudEncoding("BigEndianIntegerCoder"));
-
-    CounterSet.AddCounterMutator addCounterMutator =
-        new CounterSet().getAddCounterMutator();
-    Sink<?> sink = SinkFactory.create(options,
-                                      cloudSink,
-                                      BatchModeExecutionContext.fromOptions(options),
-                                      addCounterMutator);
-    Assert.assertThat(sink, new IsInstanceOf(TestSink.class));
-  }
-
-  @Test
-  public void testCreateUnknownSink() throws Exception {
-    CloudObject spec = CloudObject.forClassName("UnknownSink");
-    com.google.api.services.dataflow.model.Sink cloudSink =
-        new com.google.api.services.dataflow.model.Sink();
-    cloudSink.setSpec(spec);
-    cloudSink.setCodec(makeCloudEncoding("StringUtf8Coder"));
-    try {
-      CounterSet.AddCounterMutator addCounterMutator =
-          new CounterSet().getAddCounterMutator();
-      SinkFactory.create(options,
-                         cloudSink,
-                         BatchModeExecutionContext.fromOptions(options),
-                         addCounterMutator);
-      Assert.fail("should have thrown an exception");
-    } catch (Exception exn) {
-      Assert.assertThat(exn.toString(),
-                        CoreMatchers.containsString(
-                            "unable to create a sink"));
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcherTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcherTest.java
deleted file mode 100644
index 8fe7bc2cd4289..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcherTest.java
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.hamcrest.Matchers.contains;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
-import static org.mockito.Matchers.any;
-import static org.mockito.Mockito.times;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.verifyNoMoreInteractions;
-import static org.mockito.Mockito.when;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.ListCoder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.StateFetcher.SideInputState;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.transforms.View;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.common.base.Supplier;
-import com.google.common.cache.Cache;
-import com.google.common.cache.CacheBuilder;
-import com.google.protobuf.ByteString;
-
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.ArgumentCaptor;
-import org.mockito.Mock;
-import org.mockito.MockitoAnnotations;
-
-import java.util.Arrays;
-import java.util.List;
-import java.util.concurrent.TimeUnit;
-
-/** Unit tests for {@link StateFetcher}. */
-@RunWith(JUnit4.class)
-public class StateFetcherTest {
-  private static final String STATE_FAMILY = "state";
-
-  @Mock
-  MetricTrackingWindmillServerStub server;
-
-  @Mock
-  Supplier<StateSampler.ScopedState> readStateSupplier;
-
-  @Before
-  public void setUp() {
-    MockitoAnnotations.initMocks(this);
-  }
-
-  @Test
-  public void testFetchGlobalDataBasic() throws Exception {
-    StateFetcher fetcher = new StateFetcher(server);
-
-    ByteString.Output stream = ByteString.newOutput();
-    ListCoder.of(WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE))
-        .encode(Arrays.asList(WindowedValue.valueInGlobalWindow("data")),
-            stream, Coder.Context.OUTER);
-    ByteString encodedIterable = stream.toByteString();
-
-    PCollectionView<String> view =
-        TestPipeline.create().apply(Create.<String>of()
-            .withCoder(StringUtf8Coder.of())).apply(View.<String>asSingleton());
-
-    String tag = view.getTagInternal().getId();
-
-    // Test three calls in a row. First, data is not ready, then data is ready,
-    // then the data is already cached.
-    when(server.getSideInputData(any(Windmill.GetDataRequest.class))).thenReturn(
-        buildGlobalDataResponse(tag, ByteString.EMPTY, false, null),
-        buildGlobalDataResponse(tag, ByteString.EMPTY, true, encodedIterable));
-
-    assertEquals(null, fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, STATE_FAMILY,
-            SideInputState.UNKNOWN, readStateSupplier));
-    assertEquals(null, fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, STATE_FAMILY,
-            SideInputState.UNKNOWN, readStateSupplier));
-    assertEquals("data", fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, STATE_FAMILY,
-            SideInputState.KNOWN_READY, readStateSupplier));
-    assertEquals("data", fetcher.fetchSideInput(view, GlobalWindow.INSTANCE, STATE_FAMILY,
-            SideInputState.KNOWN_READY, readStateSupplier));
-
-    verify(server, times(2)).getSideInputData(buildGlobalDataRequest(tag, ByteString.EMPTY));
-    verifyNoMoreInteractions(server);
-  }
-
-  @Test
-  public void testFetchGlobalDataCacheOverflow() throws Exception {
-    Coder<List<WindowedValue<String>>> coder =
-        ListCoder.of(WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE));
-
-    ByteString.Output stream = ByteString.newOutput();
-    coder.encode(Arrays.asList(WindowedValue.valueInGlobalWindow("data1")),
-        stream, Coder.Context.OUTER);
-    ByteString encodedIterable1 = stream.toByteString();
-    stream = ByteString.newOutput();
-    coder.encode(Arrays.asList(WindowedValue.valueInGlobalWindow("data2")),
-        stream, Coder.Context.OUTER);
-    ByteString encodedIterable2 = stream.toByteString();
-
-    Cache<StateFetcher.SideInputId, StateFetcher.SideInputCacheEntry> cache =
-        CacheBuilder.newBuilder().build();
-
-    StateFetcher fetcher = new StateFetcher(server, cache);
-
-    PCollectionView<String> view1 =
-        TestPipeline.create().apply(Create.<String>of()
-            .withCoder(StringUtf8Coder.of())).apply(View.<String>asSingleton());
-
-    PCollectionView<String> view2 =
-        TestPipeline.create().apply(Create.<String>of()
-            .withCoder(StringUtf8Coder.of())).apply(View.<String>asSingleton());
-
-    String tag1 = view1.getTagInternal().getId();
-    String tag2 = view2.getTagInternal().getId();
-
-    // Test four calls in a row. First, fetch view1, then view2 (which evicts view1 from the cache),
-    // then view 1 again twice.
-    when(server.getSideInputData(any(Windmill.GetDataRequest.class))).thenReturn(
-        buildGlobalDataResponse(tag1, ByteString.EMPTY, true, encodedIterable1),
-        buildGlobalDataResponse(tag2, ByteString.EMPTY, true, encodedIterable2),
-        buildGlobalDataResponse(tag1, ByteString.EMPTY, true, encodedIterable1));
-
-    assertEquals("data1", fetcher.fetchSideInput(view1, GlobalWindow.INSTANCE, STATE_FAMILY,
-            SideInputState.UNKNOWN, readStateSupplier));
-    assertEquals("data2", fetcher.fetchSideInput(view2, GlobalWindow.INSTANCE, STATE_FAMILY,
-            SideInputState.UNKNOWN, readStateSupplier));
-    cache.invalidateAll();
-    assertEquals("data1", fetcher.fetchSideInput(view1, GlobalWindow.INSTANCE, STATE_FAMILY,
-            SideInputState.UNKNOWN, readStateSupplier));
-    assertEquals("data1", fetcher.fetchSideInput(view1, GlobalWindow.INSTANCE, STATE_FAMILY,
-            SideInputState.UNKNOWN, readStateSupplier));
-
-    ArgumentCaptor<Windmill.GetDataRequest> captor =
-        ArgumentCaptor.forClass(Windmill.GetDataRequest.class);
-
-    verify(server, times(3)).getSideInputData(captor.capture());
-    verifyNoMoreInteractions(server);
-
-    assertThat(captor.getAllValues(), contains(
-        buildGlobalDataRequest(tag1, ByteString.EMPTY),
-        buildGlobalDataRequest(tag2, ByteString.EMPTY),
-        buildGlobalDataRequest(tag1, ByteString.EMPTY)));
-  }
-
-  @Test
-  public void testEmptyFetchGlobalData() throws Exception {
-    StateFetcher fetcher = new StateFetcher(server);
-
-    ByteString encodedIterable = ByteString.EMPTY;
-
-    PCollectionView<Long> view =
-        TestPipeline.create().apply(Create.<Long>of()
-            .withCoder(VarLongCoder.of())).apply(Sum.longsGlobally().asSingletonView());
-
-    String tag = view.getTagInternal().getId();
-
-    // Test three calls in a row. First, data is not ready, then data is ready,
-    // then the data is already cached.
-    when(server.getSideInputData(any(Windmill.GetDataRequest.class))).thenReturn(
-        buildGlobalDataResponse(tag, ByteString.EMPTY, true, encodedIterable));
-
-    assertEquals(0L, (long) fetcher.fetchSideInput(
-        view, GlobalWindow.INSTANCE, STATE_FAMILY, SideInputState.UNKNOWN, readStateSupplier));
-
-    verify(server).getSideInputData(buildGlobalDataRequest(tag, ByteString.EMPTY));
-    verifyNoMoreInteractions(server);
-  }
-
-  private Windmill.GetDataResponse buildGlobalDataResponse(
-      String tag, ByteString version, boolean isReady, ByteString data) {
-    Windmill.GlobalData.Builder builder = Windmill.GlobalData.newBuilder()
-        .setDataId(Windmill.GlobalDataId.newBuilder()
-            .setTag(tag)
-            .setVersion(version)
-            .build());
-
-    if (isReady) {
-      builder.setIsReady(true).setData(data);
-    } else {
-      builder.setIsReady(false);
-    }
-    return Windmill.GetDataResponse.newBuilder()
-        .addGlobalData(builder.build()).build();
-  }
-
-  private Windmill.GetDataRequest buildGlobalDataRequest(
-      String tag, ByteString version) {
-    Windmill.GlobalDataId id =
-        Windmill.GlobalDataId.newBuilder().setTag(tag).setVersion(version).build();
-
-    return Windmill.GetDataRequest.newBuilder()
-        .addGlobalDataFetchRequests(
-             Windmill.GlobalDataRequest.newBuilder()
-                 .setDataId(id)
-                 .setStateFamily(STATE_FAMILY)
-                 .setExistenceWatermarkDeadline(
-                      TimeUnit.MILLISECONDS.toMicros(
-                          GlobalWindow.INSTANCE.maxTimestamp().getMillis()))
-                 .build())
-        .addGlobalDataToFetch(id)
-        .build();
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
deleted file mode 100644
index 5f5f712b320e5..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFnTest.java
+++ /dev/null
@@ -1,576 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.when;
-
-import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.CollectionCoder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.InputMessageBundle;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.Timer;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.WorkItem;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
-import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
-import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
-import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner;
-import com.google.cloud.dataflow.sdk.util.DoFnRunnerBase;
-import com.google.cloud.dataflow.sdk.util.DoFnRunners;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.KeyedWorkItem;
-import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
-import com.google.cloud.dataflow.sdk.util.ReshuffleTriggerTest;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.TimerInternals;
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.protobuf.ByteString;
-
-import org.hamcrest.Matchers;
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.Mock;
-import org.mockito.Mockito;
-import org.mockito.MockitoAnnotations;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-
-/** Unit tests for {@link StreamingGroupAlsoByWindowsDoFn}. */
-@RunWith(JUnit4.class)
-public class StreamingGroupAlsoByWindowsDoFnTest {
-  private static final String KEY = "k";
-  private static final String STATE_FAMILY = "stateFamily";
-  private static final long WORK_TOKEN = 1000L;
-  private static final String SOURCE_COMPUTATION_ID = "sourceComputationId";
-  private ExecutionContext execContext;
-  private CounterSet counters;
-
-  @Mock
-  private TimerInternals mockTimerInternals;
-
-  private Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
-  private Coder<Collection<IntervalWindow>> windowsCoder = CollectionCoder.of(windowCoder);
-
-  @Before public void setUp() {
-    MockitoAnnotations.initMocks(this);
-    execContext = new DirectModeExecutionContext() {
-      // Normally timerInternals doesn't come from the execution context, but
-      // StreamingGroupAlsoByWindows expects it to. So, hook that up.
-
-      @Override
-      public StepContext createStepContext(
-          String stepName, String transformName, StateSampler stateSampler) {
-        StepContext context =
-            Mockito.spy(super.createStepContext(stepName, transformName, stateSampler));
-        Mockito.doReturn(mockTimerInternals).when(context).timerInternals();
-        return context;
-      }
-    };
-    counters = new CounterSet();
-  }
-
-  @Test public void testReshufle() throws Exception {
-    DoFn<?, ?> fn = StreamingGroupAlsoByWindowsDoFn.createForIterable(
-        WindowingStrategy.of(FixedWindows.of(Duration.standardSeconds(30)))
-        .withTrigger(ReshuffleTriggerTest.forTest()),
-        VarIntCoder.of());
-    assertTrue(fn instanceof StreamingGroupAlsoByWindowsReshuffleDoFn);
-  }
-
-  @Test public void testEmpty() throws Exception {
-    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
-    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner =
-        makeRunner(
-            outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
-
-    runner.startBundle();
-
-    runner.finishBundle();
-
-    List<?> result = outputManager.getOutput(outputTag);
-
-    assertEquals(0, result.size());
-  }
-
-  private void addTimer(WorkItem.Builder workItem,
-      IntervalWindow window, Instant timestamp, Windmill.Timer.Type type) {
-    StateNamespace namespace = StateNamespaces.window(windowCoder, window);
-    workItem.getTimersBuilder().addTimersBuilder()
-        .setTag(StreamingModeExecutionContext.timerTag(TimerData.of(
-            namespace, timestamp,
-            type == Windmill.Timer.Type.WATERMARK
-                ? TimeDomain.EVENT_TIME : TimeDomain.PROCESSING_TIME)))
-        .setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(timestamp))
-        .setType(type)
-        .setStateFamily(STATE_FAMILY);
-  }
-
-  private <V> void addElement(
-      InputMessageBundle.Builder messageBundle, Collection<IntervalWindow> windows,
-      Instant timestamp, Coder<V> valueCoder, V value) throws IOException {
-    @SuppressWarnings({"unchecked", "rawtypes"})
-    Coder<Collection<? extends BoundedWindow>> windowsCoder =
-        (Coder) CollectionCoder.of(windowCoder);
-
-    ByteString.Output dataOutput = ByteString.newOutput();
-    valueCoder.encode(value, dataOutput, Context.OUTER);
-    messageBundle.addMessagesBuilder()
-        .setMetadata(WindmillSink.encodeMetadata(windowsCoder, windows, PaneInfo.NO_FIRING))
-        .setData(dataOutput.toByteString())
-        .setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(timestamp));
-  }
-
-  private <T> WindowedValue<KeyedWorkItem<String, T>> createValue(
-      WorkItem.Builder workItem, Coder<T> valueCoder) {
-    @SuppressWarnings({"unchecked", "rawtypes"})
-    Coder<Collection<? extends BoundedWindow>> wildcardWindowsCoder = (Coder) windowsCoder;
-    return WindowedValue.valueInEmptyWindows(
-        KeyedWorkItems.windmillWorkItem(
-            KEY, workItem.build(), windowCoder, wildcardWindowsCoder, valueCoder));
-  }
-
-  @Test public void testFixedWindows() throws Exception {
-    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
-    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner =
-        makeRunner(
-            outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
-
-    runner.startBundle();
-
-    WorkItem.Builder workItem1 = WorkItem.newBuilder();
-    workItem1.setKey(ByteString.copyFromUtf8(KEY));
-    workItem1.setWorkToken(WORK_TOKEN);
-    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
-    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
-
-    Coder<String> valueCoder = StringUtf8Coder.of();
-    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(1), valueCoder, "v1");
-    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(2), valueCoder, "v2");
-    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, "v0");
-    addElement(messageBundle, Arrays.asList(window(10, 20)), new Instant(13), valueCoder, "v3");
-
-    runner.processElement(createValue(workItem1, valueCoder));
-
-    runner.finishBundle();
-    runner.startBundle();
-
-    WorkItem.Builder workItem2 = WorkItem.newBuilder();
-    workItem2.setKey(ByteString.copyFromUtf8(KEY));
-    workItem2.setWorkToken(WORK_TOKEN);
-    addTimer(workItem2, window(0, 10), new Instant(9), Timer.Type.WATERMARK);
-    addTimer(workItem2, window(10, 20), new Instant(19), Timer.Type.WATERMARK);
-
-    runner.processElement(createValue(workItem2, valueCoder));
-
-    runner.finishBundle();
-
-    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
-
-    assertEquals(2, result.size());
-
-    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
-    assertEquals(KEY, item0.getValue().getKey());
-    assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1", "v2"));
-    assertEquals(new Instant(0), item0.getTimestamp());
-    assertThat(item0.getWindows(), Matchers.<BoundedWindow>contains(window(0, 10)));
-
-    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
-    assertEquals(KEY, item1.getValue().getKey());
-    assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v3"));
-    assertEquals(new Instant(13), item1.getTimestamp());
-    assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(10, 20)));
-  }
-
-  @Test public void testSlidingWindows() throws Exception {
-    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
-    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner =
-        makeRunner(
-            outputTag,
-            outputManager,
-            WindowingStrategy.of(
-            SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))));
-
-    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(5));
-
-    runner.startBundle();
-
-    WorkItem.Builder workItem1 = WorkItem.newBuilder();
-    workItem1.setKey(ByteString.copyFromUtf8(KEY));
-    workItem1.setWorkToken(WORK_TOKEN);
-    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
-    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
-
-    Coder<String> valueCoder = StringUtf8Coder.of();
-    addElement(messageBundle,
-        Arrays.asList(window(-10, 10), window(0, 20)), new Instant(5), valueCoder, "v1");
-    addElement(messageBundle,
-        Arrays.asList(window(-10, 10), window(0, 20)), new Instant(2), valueCoder, "v0");
-    addElement(messageBundle,
-        Arrays.asList(window(0, 20), window(10, 30)), new Instant(15), valueCoder, "v2");
-
-    runner.processElement(createValue(workItem1, valueCoder));
-
-    runner.finishBundle();
-    runner.startBundle();
-
-    WorkItem.Builder workItem2 = WorkItem.newBuilder();
-    workItem2.setKey(ByteString.copyFromUtf8(KEY));
-    workItem2.setWorkToken(WORK_TOKEN);
-    addTimer(workItem2, window(-10, 10), new Instant(9), Timer.Type.WATERMARK);
-    addTimer(workItem2, window(0, 20), new Instant(19), Timer.Type.WATERMARK);
-    addTimer(workItem2, window(10, 30), new Instant(29), Timer.Type.WATERMARK);
-
-    runner.processElement(createValue(workItem2, valueCoder));
-
-    runner.finishBundle();
-
-    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
-
-    assertEquals(3, result.size());
-
-    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
-    assertEquals(KEY, item0.getValue().getKey());
-    assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1"));
-    assertEquals(new Instant(2), item0.getTimestamp());
-    assertThat(item0.getWindows(), Matchers.<BoundedWindow>contains(window(-10, 10)));
-
-    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
-    assertEquals(KEY, item1.getValue().getKey());
-    assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1", "v2"));
-    // For this sliding window, the minimum output timestmap was 10, since we didn't want to overlap
-    // with the previous window that was [-10, 10).
-    assertEquals(new Instant(10), item1.getTimestamp());
-    assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(0, 20)));
-
-    WindowedValue<KV<String, Iterable<String>>> item2 = result.get(2);
-    assertEquals(KEY, item2.getValue().getKey());
-    assertThat(item2.getValue().getValue(), Matchers.containsInAnyOrder("v2"));
-    assertEquals(new Instant(20), item2.getTimestamp());
-    assertThat(item2.getWindows(), Matchers.<BoundedWindow>contains(window(10, 30)));
-  }
-
-  @Test public void testSlidingWindowsAndLateData() throws Exception {
-    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
-    WindowingStrategy<? super String, IntervalWindow> windowingStrategy = WindowingStrategy.of(
-        SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10)));
-    DoFn<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> fn =
-        StreamingGroupAlsoByWindowsDoFn.createForIterable(windowingStrategy, StringUtf8Coder.of());
-    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner =
-        makeRunner(
-            outputTag,
-            outputManager,
-            windowingStrategy,
-            fn);
-
-    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(15));
-
-    runner.startBundle();
-
-    WorkItem.Builder workItem1 = WorkItem.newBuilder();
-    workItem1.setKey(ByteString.copyFromUtf8(KEY));
-    workItem1.setWorkToken(WORK_TOKEN);
-    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
-    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
-
-    Coder<String> valueCoder = StringUtf8Coder.of();
-    addElement(messageBundle,
-        Arrays.asList(window(-10, 10), window(0, 20)), new Instant(5), valueCoder, "v1");
-    addElement(messageBundle,
-        Arrays.asList(window(-10, 10), window(0, 20)), new Instant(2), valueCoder, "v0");
-    addElement(messageBundle,
-        Arrays.asList(window(0, 20), window(10, 30)), new Instant(15), valueCoder, "v2");
-
-    runner.processElement(createValue(workItem1, valueCoder));
-
-    runner.finishBundle();
-    runner.startBundle();
-
-    WorkItem.Builder workItem2 = WorkItem.newBuilder();
-    workItem2.setKey(ByteString.copyFromUtf8(KEY));
-    workItem2.setWorkToken(WORK_TOKEN);
-    addTimer(workItem2, window(-10, 10), new Instant(9), Timer.Type.WATERMARK);
-    addTimer(workItem2, window(0, 20), new Instant(19), Timer.Type.WATERMARK);
-    addTimer(workItem2, window(10, 30), new Instant(29), Timer.Type.WATERMARK);
-
-    runner.processElement(createValue(workItem2, valueCoder));
-
-    runner.finishBundle();
-
-    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
-
-    assertEquals(3, result.size());
-
-    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
-    assertEquals(KEY, item0.getValue().getKey());
-    assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder());
-    assertEquals(new Instant(9), item0.getTimestamp());
-    assertThat(item0.getWindows(), Matchers.<BoundedWindow>contains(window(-10, 10)));
-
-    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
-    assertEquals(KEY, item1.getValue().getKey());
-    assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1", "v2"));
-    // For this sliding window, the minimum output timestmap was 10, since we didn't want to overlap
-    // with the previous window that was [-10, 10).
-    assertEquals(new Instant(10), item1.getTimestamp());
-    assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(0, 20)));
-
-    WindowedValue<KV<String, Iterable<String>>> item2 = result.get(2);
-    assertEquals(KEY, item2.getValue().getKey());
-    assertThat(item2.getValue().getValue(), Matchers.containsInAnyOrder("v2"));
-    assertEquals(new Instant(20), item2.getTimestamp());
-    assertThat(item2.getWindows(), Matchers.<BoundedWindow>contains(window(10, 30)));
-
-    assertEquals(
-        counters.getExistingCounter("merge-DroppedDueToLateness").getAggregate(),
-        new Long(2));
-  }
-
-  @Test public void testSessions() throws Exception {
-    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
-    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(
-        outputTag,
-        outputManager,
-        WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))));
-
-    runner.startBundle();
-
-    WorkItem.Builder workItem1 = WorkItem.newBuilder();
-    workItem1.setKey(ByteString.copyFromUtf8(KEY));
-    workItem1.setWorkToken(WORK_TOKEN);
-    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
-    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
-
-    Coder<String> valueCoder = StringUtf8Coder.of();
-    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, "v1");
-    addElement(messageBundle, Arrays.asList(window(5, 15)), new Instant(5), valueCoder, "v2");
-    addElement(messageBundle, Arrays.asList(window(15, 25)), new Instant(15), valueCoder, "v3");
-    addElement(messageBundle, Arrays.asList(window(3, 13)), new Instant(3), valueCoder, "v0");
-
-    runner.processElement(createValue(workItem1, valueCoder));
-
-    runner.finishBundle();
-    runner.startBundle();
-
-    WorkItem.Builder workItem2 = WorkItem.newBuilder();
-    workItem2.setKey(ByteString.copyFromUtf8(KEY));
-    workItem2.setWorkToken(WORK_TOKEN);
-    // Note that the WATERMARK timer for Instant(9) will have been deleted by
-    // ReduceFnRunner when window(0, 10) was merged away.
-    addTimer(workItem2, window(0, 15), new Instant(14), Timer.Type.WATERMARK);
-    addTimer(workItem2, window(15, 25), new Instant(24), Timer.Type.WATERMARK);
-
-    runner.processElement(createValue(workItem2, valueCoder));
-
-    runner.finishBundle();
-
-    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
-
-    assertEquals(2, result.size());
-
-    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
-    assertEquals(KEY, item0.getValue().getKey());
-    assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v0", "v1", "v2"));
-    assertEquals(new Instant(0), item0.getTimestamp());
-    assertThat(item0.getWindows(), Matchers.<BoundedWindow>contains(window(0, 15)));
-
-    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
-    assertEquals(KEY, item1.getValue().getKey());
-    assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v3"));
-    assertEquals(new Instant(15), item1.getTimestamp());
-    assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(15, 25)));
-  }
-
-  /**
-   * A custom combine fn that doesn't take any performace shortcuts
-   * to ensure that we are using the CombineFn API properly.
-   */
-  private static class SumLongs extends CombineFn<Long, Long, Long> {
-    @Override
-    public Long createAccumulator() {
-      return 0L;
-    }
-
-    @Override
-    public Long addInput(Long accumulator, Long input) {
-      return accumulator + input;
-    }
-
-    @Override
-    public Long mergeAccumulators(Iterable<Long> accumulators) {
-      Long sum = 0L;
-      for (Long value : accumulators) {
-        sum += value;
-      }
-      return sum;
-    }
-
-    @Override
-    public Long extractOutput(Long accumulator) {
-      return new Long(accumulator);
-    }
-  }
-
-  @Test public void testSessionsCombine() throws Exception {
-    TupleTag<KV<String, Long>> outputTag = new TupleTag<>();
-    CombineFn<Long, ?, Long> combineFn = new SumLongs();
-    CoderRegistry registry = new CoderRegistry();
-    registry.registerStandardCoders();
-
-    AppliedCombineFn<String, Long, ?, Long> appliedCombineFn = AppliedCombineFn.withInputCoder(
-        combineFn.asKeyedFn(), registry, KvCoder.of(StringUtf8Coder.of(), BigEndianLongCoder.of()));
-
-    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
-    DoFnRunner<KeyedWorkItem<String, Long>, KV<String, Long>> runner = makeRunner(
-        outputTag,
-        outputManager,
-        WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))),
-        appliedCombineFn);
-
-    runner.startBundle();
-
-    WorkItem.Builder workItem1 = WorkItem.newBuilder();
-    workItem1.setKey(ByteString.copyFromUtf8(KEY));
-    workItem1.setWorkToken(WORK_TOKEN);
-    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
-    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
-
-    Coder<Long> valueCoder = BigEndianLongCoder.of();
-    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, 1L);
-    addElement(messageBundle, Arrays.asList(window(5, 15)), new Instant(5), valueCoder, 2L);
-    addElement(messageBundle, Arrays.asList(window(15, 25)), new Instant(15), valueCoder, 3L);
-    addElement(messageBundle, Arrays.asList(window(3, 13)), new Instant(3), valueCoder, 4L);
-
-    runner.processElement(createValue(workItem1, valueCoder));
-
-    runner.finishBundle();
-    runner.startBundle();
-
-    WorkItem.Builder workItem2 = WorkItem.newBuilder();
-    workItem2.setKey(ByteString.copyFromUtf8(KEY));
-    workItem2.setWorkToken(WORK_TOKEN);
-    // Note that the WATERMARK timer for Instant(9) will have been deleted by
-    // ReduceFnRunner when window(0, 10) was merged away.
-    addTimer(workItem2, window(0, 15), new Instant(14), Timer.Type.WATERMARK);
-    addTimer(workItem2, window(15, 25), new Instant(24), Timer.Type.WATERMARK);
-
-    runner.processElement(createValue(workItem2, valueCoder));
-
-    runner.finishBundle();
-
-    List<WindowedValue<KV<String, Long>>> result = outputManager.getOutput(outputTag);
-
-    assertEquals(2, result.size());
-
-    WindowedValue<KV<String, Long>> item0 = result.get(0);
-    assertEquals(KEY, item0.getValue().getKey());
-    assertEquals((Long) 7L, item0.getValue().getValue());
-    assertEquals(new Instant(0), item0.getTimestamp());
-    assertThat(item0.getWindows(), Matchers.<BoundedWindow>contains(window(0, 15)));
-
-    WindowedValue<KV<String, Long>> item1 = result.get(1);
-    assertEquals(KEY, item1.getValue().getKey());
-    assertEquals((Long) 3L, item1.getValue().getValue());
-    assertEquals(new Instant(15), item1.getTimestamp());
-    assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(15, 25)));
-  }
-
-  private DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> makeRunner(
-          TupleTag<KV<String, Iterable<String>>> outputTag,
-          DoFnRunners.OutputManager outputManager,
-          WindowingStrategy<? super String, IntervalWindow> windowingStrategy) throws Exception {
-
-    DoFn<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> fn =
-        StreamingGroupAlsoByWindowsDoFn.createForIterable(windowingStrategy, StringUtf8Coder.of());
-
-    return makeRunner(outputTag, outputManager, windowingStrategy, fn);
-  }
-
-  private DoFnRunner<KeyedWorkItem<String, Long>, KV<String, Long>> makeRunner(
-          TupleTag<KV<String, Long>> outputTag,
-          DoFnRunners.OutputManager outputManager,
-          WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
-          AppliedCombineFn<String, Long, ?, Long> combineFn) throws Exception {
-
-    DoFn<KeyedWorkItem<String, Long>, KV<String, Long>> fn =
-        StreamingGroupAlsoByWindowsDoFn.create(windowingStrategy, combineFn, StringUtf8Coder.of());
-
-    return makeRunner(outputTag, outputManager, windowingStrategy, fn);
-  }
-
-  private <InputT, OutputT>
-      DoFnRunner<KeyedWorkItem<String, InputT>, KV<String, OutputT>> makeRunner(
-          TupleTag<KV<String, OutputT>> outputTag,
-          DoFnRunners.OutputManager outputManager,
-          WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
-          DoFn<KeyedWorkItem<String, InputT>, KV<String, OutputT>> fn) throws Exception {
-    DoFnInfo<KeyedWorkItem<String, InputT>, KV<String, OutputT>> doFnInfo =
-        new DoFnInfo<>(fn, windowingStrategy);
-
-    return DoFnRunners.createDefault(
-        PipelineOptionsFactory.create(),
-        doFnInfo.getDoFn(),
-        NullSideInputReader.empty(),
-        outputManager,
-        outputTag,
-        new ArrayList<TupleTag<?>>(),
-        execContext.getOrCreateStepContext("merge", "merge", null),
-        counters.getAddCounterMutator(),
-        doFnInfo.getWindowingStrategy());
-  }
-
-  private IntervalWindow window(long start, long end) {
-    return new IntervalWindow(new Instant(start), new Instant(end));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java
deleted file mode 100644
index e2f6ef28b6f8c..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFnTest.java
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
-import com.google.cloud.dataflow.sdk.coders.CollectionCoder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.InputMessageBundle;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.WorkItem;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner;
-import com.google.cloud.dataflow.sdk.util.DoFnRunnerBase;
-import com.google.cloud.dataflow.sdk.util.DoFnRunners;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.KeyedWorkItem;
-import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.protobuf.ByteString;
-
-import org.hamcrest.Matchers;
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.Mockito;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-
-/** Unit tests for {@link StreamingGroupAlsoByWindowsReshuffleDoFn}. */
-@RunWith(JUnit4.class)
-public class StreamingGroupAlsoByWindowsReshuffleDoFnTest {
-  private static final String KEY = "k";
-  private static final long WORK_TOKEN = 1000L;
-  private static final String SOURCE_COMPUTATION_ID = "sourceComputationId";
-  private ExecutionContext execContext;
-  private CounterSet counters;
-
-  private Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
-  private Coder<Collection<IntervalWindow>> windowsCoder = CollectionCoder.of(windowCoder);
-
-  @Before public void setUp() {
-    execContext = new DirectModeExecutionContext() {
-      // Normally timerInternals doesn't come from the execution context, but
-      // StreamingGroupAlsoByWindows expects it to. So, hook that up.
-
-      @Override
-      public StepContext createStepContext(
-          String stepName, String transformName, StateSampler stateSampler) {
-        StepContext context =
-            Mockito.spy(super.createStepContext(stepName, transformName, stateSampler));
-        Mockito.doReturn(null).when(context).timerInternals();
-        return context;
-      }
-    };
-    counters = new CounterSet();
-  }
-
-  @Test public void testEmpty() throws Exception {
-    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
-    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner =
-        makeRunner(
-            outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
-
-    runner.startBundle();
-
-    runner.finishBundle();
-
-    List<?> result = outputManager.getOutput(outputTag);
-
-    assertEquals(0, result.size());
-  }
-
-  private <V> void addElement(
-      InputMessageBundle.Builder messageBundle, Collection<IntervalWindow> windows,
-      Instant timestamp, Coder<V> valueCoder, V value) throws IOException {
-    @SuppressWarnings({"unchecked", "rawtypes"})
-    Coder<Collection<? extends BoundedWindow>> windowsCoder =
-        (Coder) CollectionCoder.of(windowCoder);
-
-    ByteString.Output dataOutput = ByteString.newOutput();
-    valueCoder.encode(value, dataOutput, Context.OUTER);
-    messageBundle.addMessagesBuilder()
-        .setMetadata(WindmillSink.encodeMetadata(windowsCoder, windows, PaneInfo.NO_FIRING))
-        .setData(dataOutput.toByteString())
-        .setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(timestamp));
-  }
-
-  private <T> WindowedValue<KeyedWorkItem<String, T>> createValue(
-      WorkItem.Builder workItem, Coder<T> valueCoder) {
-    @SuppressWarnings({"unchecked", "rawtypes"})
-    Coder<Collection<? extends BoundedWindow>> wildcardWindowsCoder = (Coder) windowsCoder;
-    return WindowedValue.valueInEmptyWindows(KeyedWorkItems.windmillWorkItem(
-        KEY, workItem.build(), windowCoder, wildcardWindowsCoder, valueCoder));
-  }
-
-  @Test public void testFixedWindows() throws Exception {
-    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
-    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
-    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner =
-        makeRunner(
-            outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
-
-    runner.startBundle();
-
-    WorkItem.Builder workItem = WorkItem.newBuilder();
-    workItem.setKey(ByteString.copyFromUtf8(KEY));
-    workItem.setWorkToken(WORK_TOKEN);
-    InputMessageBundle.Builder messageBundle = workItem.addMessageBundlesBuilder();
-    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
-
-    Coder<String> valueCoder = StringUtf8Coder.of();
-    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(1), valueCoder, "v1");
-    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(2), valueCoder, "v2");
-    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, "v0");
-    addElement(messageBundle, Arrays.asList(window(10, 20)), new Instant(13), valueCoder, "v3");
-
-    runner.processElement(createValue(workItem, valueCoder));
-
-    runner.finishBundle();
-
-    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
-
-    assertEquals(4, result.size());
-
-    WindowedValue<KV<String, Iterable<String>>> item0 = result.get(0);
-    assertEquals(KEY, item0.getValue().getKey());
-    assertThat(item0.getValue().getValue(), Matchers.containsInAnyOrder("v1"));
-    assertEquals(new Instant(1), item0.getTimestamp());
-    assertThat(item0.getWindows(), Matchers.<BoundedWindow>contains(window(0, 10)));
-
-    WindowedValue<KV<String, Iterable<String>>> item1 = result.get(1);
-    assertEquals(KEY, item1.getValue().getKey());
-    assertThat(item1.getValue().getValue(), Matchers.containsInAnyOrder("v2"));
-    assertEquals(new Instant(2), item1.getTimestamp());
-    assertThat(item1.getWindows(), Matchers.<BoundedWindow>contains(window(0, 10)));
-
-    WindowedValue<KV<String, Iterable<String>>> item2 = result.get(2);
-    assertEquals(KEY, item2.getValue().getKey());
-    assertThat(item2.getValue().getValue(), Matchers.containsInAnyOrder("v0"));
-    assertEquals(new Instant(0), item2.getTimestamp());
-    assertThat(item2.getWindows(), Matchers.<BoundedWindow>contains(window(0, 10)));
-
-    WindowedValue<KV<String, Iterable<String>>> item3 = result.get(3);
-    assertEquals(KEY, item3.getValue().getKey());
-    assertThat(item3.getValue().getValue(), Matchers.containsInAnyOrder("v3"));
-    assertEquals(new Instant(13), item3.getTimestamp());
-    assertThat(item3.getWindows(), Matchers.<BoundedWindow>contains(window(10, 20)));
-  }
-
-  private DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> makeRunner(
-          TupleTag<KV<String, Iterable<String>>> outputTag,
-          DoFnRunners.OutputManager outputManager,
-          WindowingStrategy<? super String, IntervalWindow> windowingStrategy) {
-
-    DoFn<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> fn =
-        new StreamingGroupAlsoByWindowsReshuffleDoFn<>();
-
-    return makeRunner(outputTag, outputManager, windowingStrategy, fn);
-  }
-
-  private <InputT, OutputT>
-      DoFnRunner<KeyedWorkItem<String, InputT>, KV<String, OutputT>> makeRunner(
-          TupleTag<KV<String, OutputT>> outputTag,
-          DoFnRunners.OutputManager outputManager,
-          WindowingStrategy<? super String, IntervalWindow> windowingStrategy,
-          DoFn<KeyedWorkItem<String, InputT>, KV<String, OutputT>> fn) {
-    return
-        DoFnRunners.createDefault(
-            PipelineOptionsFactory.create(),
-            fn,
-            NullSideInputReader.empty(),
-            outputManager,
-            outputTag,
-            new ArrayList<TupleTag<?>>(),
-            execContext.getOrCreateStepContext("merge", "merge", null),
-            counters.getAddCounterMutator(),
-            windowingStrategy);
-  }
-
-  private IntervalWindow window(long start, long end) {
-    return new IntervalWindow(new Instant(start), new Instant(end));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
deleted file mode 100644
index b781546b1b2db..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContextTest.java
+++ /dev/null
@@ -1,189 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.io.UnboundedSource;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.dataflow.TestCountingSource;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting;
-import com.google.cloud.dataflow.sdk.testing.PCollectionViewTesting.ConstantViewFn;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.TimerInternals;
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaceForTest;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.protobuf.ByteString;
-
-import org.joda.time.Instant;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.Mock;
-import org.mockito.MockitoAnnotations;
-
-import java.util.Arrays;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.TimeUnit;
-
-/**
- * Tests for {@link StreamingModeExecutionContext}.
- */
-@RunWith(JUnit4.class)
-public class StreamingModeExecutionContextTest {
-
-  @Mock
-  private StateFetcher stateFetcher;
-  @Mock
-  private WindmillStateReader stateReader;
-
-  @Before
-  public void setUp() {
-    MockitoAnnotations.initMocks(this);
-  }
-
-  // Helper to aid type inference
-  private static TupleTag<Iterable<WindowedValue<String>>> newStringTag() {
-    return new TupleTag<>();
-  }
-
-  @Test
-  public void testTimerInternalsSetTimer() {
-    StreamingModeExecutionContext executionContext = new StreamingModeExecutionContext("stageName",
-        null, new ConcurrentHashMap<String, String>(),
-        new WindmillStateCache().forComputation("comp"));
-
-    Windmill.WorkItemCommitRequest.Builder outputBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    executionContext.start(
-        Windmill.WorkItem.newBuilder().setKey(ByteString.EMPTY).setWorkToken(17L).build(),
-        new Instant(1000), // input watermark
-        null, // output watermark
-        null, // synchronized processing time
-        stateReader, stateFetcher, outputBuilder);
-    ExecutionContext.StepContext step =
-        executionContext.getOrCreateStepContext("step", "transform", null);
-
-    TimerInternals timerInternals = step.timerInternals();
-
-    timerInternals.setTimer(
-        TimerData.of(new StateNamespaceForTest("key"), new Instant(5000), TimeDomain.EVENT_TIME));
-    executionContext.flushState();
-
-    Windmill.Timer timer = outputBuilder.buildPartial().getOutputTimers(0);
-    assertEquals("key+0:5000", timer.getTag().toStringUtf8());
-    assertEquals(TimeUnit.MILLISECONDS.toMicros(5000), timer.getTimestamp());
-    assertEquals(Windmill.Timer.Type.WATERMARK, timer.getType());
-  }
-
-  /**
-   * Tests that the {@link SideInputReader} returned by the {@link StreamingModeExecutionContext}
-   * contains the expected views when they are deserialized, as occurs on the
-   * service.
-   */
-  @Test
-  public void testSideInputReaderReconstituted() {
-    StreamingModeExecutionContext executionContext =
-        new StreamingModeExecutionContext("stageName", null, null, null);
-
-    PCollectionView<String> preview1 = PCollectionViewTesting.<String, String>testingView(
-        newStringTag(), new ConstantViewFn<String, String>("view1"), StringUtf8Coder.of());
-    PCollectionView<String> preview2 = PCollectionViewTesting.testingView(
-        newStringTag(), new ConstantViewFn<String, String>("view2"), StringUtf8Coder.of());
-    PCollectionView<String> preview3 = PCollectionViewTesting.testingView(
-        newStringTag(), new ConstantViewFn<String, String>("view3"), StringUtf8Coder.of());
-
-    SideInputReader sideInputReader = executionContext.getSideInputReaderForViews(
-        Arrays.asList(preview1, preview2));
-
-    assertTrue(sideInputReader.contains(preview1));
-    assertTrue(sideInputReader.contains(preview2));
-    assertFalse(sideInputReader.contains(preview3));
-
-    PCollectionView<String> view1 = SerializableUtils.ensureSerializable(preview1);
-    PCollectionView<String> view2 = SerializableUtils.ensureSerializable(preview2);
-    PCollectionView<String> view3 = SerializableUtils.ensureSerializable(preview3);
-
-    assertTrue(sideInputReader.contains(view1));
-    assertTrue(sideInputReader.contains(view2));
-    assertFalse(sideInputReader.contains(view3));
-  }
-
-  private void startContext(
-      StreamingModeExecutionContext context, String key, long cacheToken) {
-    context.start(
-        Windmill.WorkItem.newBuilder()
-            .setKey(ByteString.copyFromUtf8(key)) // key is zero-padded index.
-            .setWorkToken(0) // Required proto field, unused.
-            .setCacheToken(cacheToken)
-            .setSourceState(Windmill.SourceState.newBuilder()
-                                .setState(ByteString.EMPTY)
-                                .build()) // Source state.
-            .build(),
-        new Instant(0), // input watermark
-        null, // output watermark
-        null, // synchronized processing time
-        null, // StateReader
-        null, // StateFetcher
-        Windmill.WorkItemCommitRequest.newBuilder());
-  }
-
-  @Test
-  public void testReaderCache() throws Exception {
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    options.setNumWorkers(5);
-
-    ConcurrentHashMap<ByteString, ReaderCacheEntry> readerCache =
-        new ConcurrentHashMap<ByteString, ReaderCacheEntry>();
-    StreamingModeExecutionContext context =
-        new StreamingModeExecutionContext("stageName", readerCache, /*stateNameMap=*/null,
-            /*stateCache=*/null);
-
-    UnboundedSource.UnboundedReader<?> reader1 =
-        new TestCountingSource(Integer.MAX_VALUE).createReader(options, null);
-    UnboundedSource.UnboundedReader<?> reader2 =
-        new TestCountingSource(Integer.MAX_VALUE).createReader(options, null);
-
-    readerCache.put(ByteString.copyFromUtf8("0000000000000001"), new ReaderCacheEntry(reader1, 1L));
-    readerCache.put(ByteString.copyFromUtf8("0000000000000002"), new ReaderCacheEntry(reader2, 2L));
-
-    startContext(context, "0000000000000001", 1L);
-    assertEquals(reader1, context.getCachedReader());
-
-    startContext(context, "0000000000000001", 1L);
-    assertEquals(reader1, context.getCachedReader());
-
-    startContext(context, "0000000000000002", 1L);
-    assertEquals(null, context.getCachedReader());
-
-    startContext(context, "0000000000000003", 3L);
-    assertEquals(null, context.getCachedReader());
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java
deleted file mode 100644
index 0c16555604b52..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunnerTest.java
+++ /dev/null
@@ -1,321 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.hamcrest.Matchers.contains;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.eq;
-import static org.mockito.Mockito.when;
-
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.worker.StateFetcher.SideInputState;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.GlobalDataRequest;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.View;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.DoFnRunnerBase;
-import com.google.cloud.dataflow.sdk.util.DoFnRunners;
-import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
-import com.google.cloud.dataflow.sdk.util.state.ValueState;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.protobuf.ByteString;
-
-import org.hamcrest.Matchers;
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.Mock;
-import org.mockito.Mockito;
-import org.mockito.MockitoAnnotations;
-
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-/** Unit tests for {@link StreamingSideInputDoFnRunner}. */
-@RunWith(JUnit4.class)
-public class StreamingSideInputDoFnRunnerTest {
-
-  private static final FixedWindows WINDOW_FN = FixedWindows.of(Duration.millis(10));
-
-  static TupleTag<String> mainOutputTag = new TupleTag<String>();
-  @Mock StreamingModeExecutionContext execContext;
-  @Mock StreamingModeExecutionContext.StepContext stepContext;
-  @Mock SideInputReader mockSideInputReader;
-
-  private final InMemoryStateInternals state = new InMemoryStateInternals();
-
-  @Before
-  public void setUp() {
-    MockitoAnnotations.initMocks(this);
-    when(stepContext.stateInternals()).thenReturn(state);
-  }
-
-  @Test
-  public void testSideInputReady() throws Exception {
-    PCollectionView<String> view = createView();
-
-    when(stepContext.getSideInputNotifications())
-        .thenReturn(Arrays.<Windmill.GlobalDataId>asList());
-    when(stepContext.issueSideInputFetch(
-             eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN)))
-        .thenReturn(true);
-    when(execContext.getSideInputReaderForViews(
-        Mockito.<Iterable<? extends PCollectionView<?>>>any())).thenReturn(mockSideInputReader);
-    when(mockSideInputReader.contains(eq(view))).thenReturn(true);
-    when(mockSideInputReader.get(eq(view), any(BoundedWindow.class))).thenReturn("data");
-
-    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
-    StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner =
-        createRunner(outputManager, Arrays.asList(view));
-
-    runner.startBundle();
-    runner.processElement(createDatum("e", 0));
-    runner.finishBundle();
-
-    assertThat(outputManager.getOutput(mainOutputTag), contains(createDatum("e:data", 0)));
-  }
-
-  @Test
-  public void testSideInputNotReady() throws Exception {
-    PCollectionView<String> view = createView();
-
-    when(stepContext.getSideInputNotifications())
-        .thenReturn(Arrays.<Windmill.GlobalDataId>asList());
-    when(stepContext.issueSideInputFetch(
-             eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN)))
-        .thenReturn(false);
-
-    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
-    StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner =
-        createRunner(outputManager, Arrays.asList(view));
-
-    runner.startBundle();
-    runner.processElement(createDatum("e", 0));
-    runner.finishBundle();
-
-    assertTrue(outputManager.getOutput(mainOutputTag).isEmpty());
-
-    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
-
-    // Verify that we added the element to an appropriate tag list, and that we buffered the element
-    ValueState<Map<IntervalWindow, Set<GlobalDataRequest>>> blockedMapState =
-        state.state(StateNamespaces.global(),
-            StreamingSideInputDoFnRunner.blockedMapAddr(WINDOW_FN));
-    assertEquals(
-        blockedMapState.get().read(),
-        Collections.singletonMap(
-            window,
-            Collections.singleton(Windmill.GlobalDataRequest.newBuilder()
-                .setDataId(Windmill.GlobalDataId.newBuilder()
-                    .setTag(view.getTagInternal().getId())
-                    .setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(
-                        IntervalWindow.getCoder(), window)))
-                        .build())
-                        .setExistenceWatermarkDeadline(9000)
-                        .build())));
-    assertThat(runner.elementBag(createWindow(0)).get().read(),
-        Matchers.contains(createDatum("e", 0)));
-    assertEquals(runner.watermarkHold(createWindow(0)).get().read(), new Instant(0));
-  }
-
-  @Test
-  public void testSideInputNotification() throws Exception {
-    PCollectionView<String> view = createView();
-
-    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
-    Windmill.GlobalDataId id = Windmill.GlobalDataId.newBuilder()
-        .setTag(view.getTagInternal().getId())
-        .setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(
-            IntervalWindow.getCoder(), window)))
-        .build();
-
-    Set<Windmill.GlobalDataRequest> requestSet = new HashSet<>();
-    requestSet.add(Windmill.GlobalDataRequest.newBuilder().setDataId(id).build());
-    Map<IntervalWindow, Set<Windmill.GlobalDataRequest>> blockedMap = new HashMap<>();
-    blockedMap.put(window, requestSet);
-
-    ValueState<Map<IntervalWindow, Set<GlobalDataRequest>>> blockedMapState =
-        state.state(StateNamespaces.global(),
-            StreamingSideInputDoFnRunner.blockedMapAddr(WINDOW_FN));
-    blockedMapState.set(blockedMap);
-
-    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
-    StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner =
-        createRunner(outputManager, Arrays.asList(view));
-    runner.watermarkHold(createWindow(0)).add(new Instant(0));
-    runner.elementBag(createWindow(0)).add(createDatum("e", 0));
-
-    when(stepContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
-    when(stepContext.issueSideInputFetch(
-             eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN)))
-        .thenReturn(false);
-    when(stepContext.issueSideInputFetch(
-             eq(view), any(BoundedWindow.class), eq(SideInputState.KNOWN_READY)))
-        .thenReturn(true);
-    when(execContext.getSideInputReaderForViews(
-        Mockito.<Iterable<? extends PCollectionView<?>>>any())).thenReturn(mockSideInputReader);
-    when(mockSideInputReader.contains(eq(view))).thenReturn(true);
-    when(mockSideInputReader.get(eq(view), any(BoundedWindow.class))).thenReturn("data");
-
-    runner.startBundle();
-    runner.finishBundle();
-
-    assertThat(outputManager.getOutput(mainOutputTag), contains(createDatum("e:data", 0)));
-
-    assertThat(blockedMapState.get().read().keySet(), Matchers.empty());
-    assertThat(runner.watermarkHold(createWindow(0)).get().read(), Matchers.nullValue());
-    assertThat(runner.elementBag(createWindow(0)).get().read(), Matchers.emptyIterable());
-  }
-
-  @Test
-  public void testMultipleSideInputs() throws Exception {
-    PCollectionView<String> view1 = createView();
-    PCollectionView<String> view2 = createView();
-
-    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
-    Windmill.GlobalDataId id = Windmill.GlobalDataId.newBuilder()
-        .setTag(view1.getTagInternal().getId())
-        .setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(
-            IntervalWindow.getCoder(), window)))
-        .build();
-
-    Set<Windmill.GlobalDataRequest> requestSet = new HashSet<>();
-    requestSet.add(Windmill.GlobalDataRequest.newBuilder().setDataId(id).build());
-    Map<IntervalWindow, Set<Windmill.GlobalDataRequest>> blockedMap = new HashMap<>();
-    blockedMap.put(window, requestSet);
-
-    ValueState<Map<IntervalWindow, Set<GlobalDataRequest>>> blockedMapState =
-        state.state(StateNamespaces.global(),
-            StreamingSideInputDoFnRunner.blockedMapAddr(WINDOW_FN));
-    blockedMapState.set(blockedMap);
-
-    when(stepContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
-    when(stepContext.issueSideInputFetch(
-             any(PCollectionView.class), any(BoundedWindow.class), any(SideInputState.class)))
-        .thenReturn(true);
-    when(execContext.getSideInputReaderForViews(
-        Mockito.<Iterable<? extends PCollectionView<?>>>any())).thenReturn(mockSideInputReader);
-    when(mockSideInputReader.contains(eq(view1))).thenReturn(true);
-    when(mockSideInputReader.contains(eq(view2))).thenReturn(true);
-    when(mockSideInputReader.get(eq(view1), any(BoundedWindow.class))).thenReturn("data1");
-    when(mockSideInputReader.get(eq(view2), any(BoundedWindow.class))).thenReturn("data2");
-
-    DoFnRunnerBase.ListOutputManager outputManager = new DoFnRunnerBase.ListOutputManager();
-    StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner =
-        createRunner(outputManager, Arrays.asList(view1, view2));
-    runner.watermarkHold(createWindow(0)).add(new Instant(0));
-    runner.elementBag(createWindow(0)).add(createDatum("e1", 0));
-
-    runner.startBundle();
-    runner.processElement(createDatum("e2", 2));
-    runner.finishBundle();
-
-    assertThat(outputManager.getOutput(mainOutputTag),
-        contains(createDatum("e1:data1:data2", 0), createDatum("e2:data1:data2", 2)));
-
-    assertThat(blockedMapState.get().read().keySet(), Matchers.empty());
-    assertThat(runner.watermarkHold(createWindow(0)).get().read(), Matchers.nullValue());
-    assertThat(runner.elementBag(createWindow(0)).get().read(), Matchers.emptyIterable());
-  }
-
-  @SuppressWarnings("unchecked")
-  private <ReceiverT> StreamingSideInputDoFnRunner<String, String, IntervalWindow>
-      createRunner(DoFnRunners.OutputManager outputManager, List<PCollectionView<String>> views)
-          throws Exception {
-    @SuppressWarnings({"unchecked", "rawtypes"})
-    Iterable<PCollectionView<?>> typedViews = (Iterable) views;
-
-    DoFnInfo<String, String> doFnInfo = new DoFnInfo<String, String>(
-        new SideInputFn(views), WindowingStrategy.of(WINDOW_FN),
-        typedViews, StringUtf8Coder.of());
-
-    return (StreamingSideInputDoFnRunner<String, String, IntervalWindow>)
-        StreamingDoFnRunners.streamingSideInputRunner(
-            PipelineOptionsFactory.create(),
-            doFnInfo,
-            mockSideInputReader,
-            outputManager,
-            mainOutputTag,
-            Arrays.<TupleTag<?>>asList(),
-            stepContext,
-            null);
-  }
-
-  private static class SideInputFn extends DoFn<String, String> {
-    private List<PCollectionView<String>> views;
-
-    public SideInputFn(List<PCollectionView<String>> views) {
-      this.views = views;
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      String output = c.element();
-      for (PCollectionView<String> view : views) {
-        output += ":" + c.sideInput(view);
-      }
-      c.output(output);
-    }
-  }
-
-  private PCollectionView<String> createView() {
-    return TestPipeline.create()
-        .apply(Create.<String>of().withCoder(StringUtf8Coder.of()))
-        .apply(Window.<String>into(WINDOW_FN))
-        .apply(View.<String>asSingleton());
-  }
-
-  private WindowedValue<String> createDatum(String element, long timestamp) {
-    return WindowedValue.of(
-        element,
-        new Instant(timestamp),
-        Arrays.asList(createWindow(timestamp)),
-        PaneInfo.NO_FIRING);
-  }
-
-  private IntervalWindow createWindow(long timestamp) {
-    return new IntervalWindow(
-        new Instant(timestamp - timestamp % 10),
-        new Instant(timestamp - timestamp % 10 + 10));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReader.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReader.java
deleted file mode 100644
index 34bbead2bfef5..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReader.java
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.Reiterator;
-import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
-import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.ShufflePosition;
-import com.google.common.primitives.UnsignedBytes;
-
-import org.junit.Assert;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Comparator;
-import java.util.Iterator;
-import java.util.List;
-import java.util.ListIterator;
-import java.util.Map;
-import java.util.NavigableMap;
-import java.util.NoSuchElementException;
-import java.util.TreeMap;
-
-/**
- * A fake implementation of a ShuffleEntryReader, for testing.
- */
-public class TestShuffleReader implements ShuffleEntryReader {
-  // Sorts by secondary key where an empty secondary key sorts before all other secondary keys.
-  static final Comparator<byte[]> SHUFFLE_KEY_COMPARATOR = new Comparator<byte[]>() {
-
-    @Override
-    public int compare(byte[] o1, byte[] o2) {
-      if (o1 == o2) {
-        return 0;
-      }
-      if (o1 == null) {
-        return -1;
-      }
-      if (o2 == null) {
-        return 1;
-      }
-      return UnsignedBytes.lexicographicalComparator().compare(o1, o2);
-    }
-  };
-
-  final NavigableMap<byte[], NavigableMap<byte[], List<ShuffleEntry>>> records;
-
-  public TestShuffleReader() {
-    this.records = new TreeMap<>(SHUFFLE_KEY_COMPARATOR);
-  }
-
-  public void addEntry(String key, String secondaryKey, String value) {
-    addEntry(new ShuffleEntry(key.getBytes(), secondaryKey.getBytes(), value.getBytes()));
-  }
-
-  public void addEntry(ShuffleEntry entry) {
-    NavigableMap<byte[], List<ShuffleEntry>> valuesBySecondaryKey = records.get(entry.getKey());
-    if (valuesBySecondaryKey == null) {
-      valuesBySecondaryKey = new TreeMap<>(SHUFFLE_KEY_COMPARATOR);
-      records.put(entry.getKey(), valuesBySecondaryKey);
-    }
-    List<ShuffleEntry> values = valuesBySecondaryKey.get(entry.getSecondaryKey());
-    if (values == null) {
-      values = new ArrayList<>();
-      valuesBySecondaryKey.put(entry.getSecondaryKey(), values);
-    }
-    values.add(entry);
-  }
-
-  public Iterator<ShuffleEntry> read() {
-    return read((byte[]) null, (byte[]) null);
-  }
-
-  @Override
-  public Reiterator<ShuffleEntry> read(ShufflePosition startPosition,
-                                       ShufflePosition endPosition) {
-    return read(ByteArrayShufflePosition.getPosition(startPosition),
-                ByteArrayShufflePosition.getPosition(endPosition));
-  }
-
-  public Reiterator<ShuffleEntry> read(String startKey, String endKey) {
-    return read(startKey == null ? null : startKey.getBytes(),
-                endKey == null ? null : endKey.getBytes());
-  }
-
-  public Reiterator<ShuffleEntry>read(byte[] startKey, byte[] endKey) {
-    return new ShuffleReaderIterator(startKey, endKey);
-  }
-
-  class ShuffleReaderIterator implements Reiterator<ShuffleEntry> {
-    final Iterator<Map.Entry<byte[], NavigableMap<byte[], List<ShuffleEntry>>>> recordsIter;
-    final byte[] startKey;
-    final byte[] endKey;
-    byte[] currentKey;
-    byte[] currentSecondaryKey;
-    Map.Entry<byte[], NavigableMap<byte[], List<ShuffleEntry>>> currentRecord;
-    ListIterator<ShuffleEntry> currentValuesIter;
-
-    public ShuffleReaderIterator(byte[] startKey, byte[] endKey) {
-      this.recordsIter = records.entrySet().iterator();
-      this.startKey = startKey;
-      this.endKey = endKey;
-      advanceKey();
-    }
-
-    private ShuffleReaderIterator(ShuffleReaderIterator it) {
-      if (it.currentKey != null) {
-        this.recordsIter =
-            records.tailMap(it.currentKey, false).entrySet().iterator();
-      } else {
-        this.recordsIter = null;
-      }
-      this.startKey = it.startKey;
-      this.endKey = it.endKey;
-      this.currentKey = it.currentKey;
-      this.currentRecord = it.currentRecord;
-      if (it.currentValuesIter != null) {
-        this.currentSecondaryKey = it.currentSecondaryKey;
-        this.currentValuesIter =
-            it.currentRecord.getValue().get(currentSecondaryKey).listIterator(
-                it.currentValuesIter.nextIndex());
-      } else {
-        this.currentValuesIter = null;
-      }
-    }
-
-    @Override
-    public boolean hasNext() {
-      return currentKey != null;
-    }
-
-    @Override
-    public ShuffleEntry next() {
-      if (currentKey == null) {
-        throw new NoSuchElementException();
-      }
-      ShuffleEntry resultValue = currentValuesIter.next();
-      Assert.assertTrue(Arrays.equals(currentKey, resultValue.getKey()));
-      if (!currentValuesIter.hasNext()) {
-        advanceSecondaryKey();
-      }
-      return resultValue;
-    }
-
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public Reiterator<ShuffleEntry> copy() {
-      return new ShuffleReaderIterator(this);
-    }
-
-    private void advanceSecondaryKey() {
-      NavigableMap<byte[], List<ShuffleEntry>> tailMap =
-          currentRecord.getValue().tailMap(currentSecondaryKey, false /* do not include key */);
-      if (tailMap.isEmpty()) {
-        advanceKey();
-      } else {
-        currentSecondaryKey = tailMap.firstKey();
-        currentValuesIter = tailMap.get(currentSecondaryKey).listIterator();
-      }
-    }
-
-    private void advanceKey() {
-      while (recordsIter.hasNext()) {
-        currentRecord = recordsIter.next();
-        currentKey = currentRecord.getKey();
-        if (startKey != null &&
-            SHUFFLE_KEY_COMPARATOR.compare(currentKey, startKey) < 0) {
-          // This key is before the start of the range.  Keep looking.
-          continue;
-        }
-        if (endKey != null &&
-            SHUFFLE_KEY_COMPARATOR.compare(currentKey, endKey) >= 0) {
-          // This key is at or after the end of the range.  Stop looking.
-          break;
-        }
-        // In range.
-        currentSecondaryKey = currentRecord.getValue().firstKey();
-        currentValuesIter = currentRecord.getValue().get(currentSecondaryKey).listIterator();
-        return;
-      }
-      currentKey = null;
-      currentSecondaryKey = null;
-      currentValuesIter = null;
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReaderTest.java
deleted file mode 100644
index f4b87bc01b74f..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleReaderTest.java
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.fail;
-
-import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
-import com.google.cloud.dataflow.sdk.values.KV;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-/**
- * Tests of TestShuffleReader.
- */
-@RunWith(JUnit4.class)
-public class TestShuffleReaderTest {
-  static final String START_KEY = "ddd";
-  static final String END_KEY = "mmm";
-
-  static final List<KV<String, KV<String, String>>> NO_ENTRIES =
-      Collections.emptyList();
-
-  static final List<KV<String, KV<String, String>>> IN_RANGE_ENTRIES =
-      Arrays.<KV<String, KV<String, String>>>asList(
-          KV.of("ddd", KV.of("1", "in 1")),
-          KV.of("ddd", KV.of("2", "in 1")),
-          KV.of("ddd", KV.of("3", "in 1")),
-          KV.of("dddd", KV.of("1", "in 2")),
-          KV.of("dddd", KV.of("2", "in 2")),
-          KV.of("de", KV.of("1", "in 3")),
-          KV.of("ee", KV.of("1", "in 4")),
-          KV.of("ee", KV.of("2", "in 4")),
-          KV.of("ee", KV.of("3", "in 4")),
-          KV.of("ee", KV.of("4", "in 4")),
-          KV.of("mm", KV.of("1", "in 5")));
-  static final List<KV<String, KV<String, String>>> BEFORE_RANGE_ENTRIES =
-      Arrays.<KV<String, KV<String, String>>>asList(
-          KV.of("", KV.of("1", "out 1")),
-          KV.of("dd", KV.of("1", "out 2")));
-  static final List<KV<String, KV<String, String>>> AFTER_RANGE_ENTRIES =
-      Arrays.<KV<String, KV<String, String>>>asList(
-          KV.of("mmm", KV.of("1", "out 3")),
-          KV.of("mmm", KV.of("2", "out 3")),
-          KV.of("mmmm", KV.of("1", "out 4")),
-          KV.of("mn", KV.of("1", "out 5")),
-          KV.of("zzz", KV.of("1", "out 6")));
-  static final List<KV<String, KV<String, String>>> OUT_OF_RANGE_ENTRIES =
-      new ArrayList<>();
-  static {
-    OUT_OF_RANGE_ENTRIES.addAll(BEFORE_RANGE_ENTRIES);
-    OUT_OF_RANGE_ENTRIES.addAll(AFTER_RANGE_ENTRIES);
-  }
-  static final List<KV<String, KV<String, String>>> ALL_ENTRIES = new ArrayList<>();
-  static {
-    ALL_ENTRIES.addAll(BEFORE_RANGE_ENTRIES);
-    ALL_ENTRIES.addAll(IN_RANGE_ENTRIES);
-    ALL_ENTRIES.addAll(AFTER_RANGE_ENTRIES);
-  }
-
-  void runTest(List<KV<String, KV<String, String>>> expected,
-               List<KV<String, KV<String, String>>> outOfRange,
-               String startKey,
-               String endKey) {
-    TestShuffleReader shuffleReader = new TestShuffleReader();
-    List<KV<String, KV<String, String>>> expectedCopy = new ArrayList<>(expected);
-    expectedCopy.addAll(outOfRange);
-    Collections.shuffle(expectedCopy);
-    for (KV<String, KV<String, String>> entry : expectedCopy) {
-      shuffleReader.addEntry(entry.getKey(),
-                             entry.getValue().getKey() /* secondary key */,
-                             entry.getValue().getValue());
-    }
-    Iterator<ShuffleEntry> iter = shuffleReader.read(startKey, endKey);
-    List<KV<String, KV<String, String>>> actual = new ArrayList<>();
-    while (iter.hasNext()) {
-      ShuffleEntry entry = iter.next();
-      actual.add(KV.of(new String(entry.getKey()),
-                       KV.of(new String(entry.getSecondaryKey()),
-                             new String(entry.getValue()))));
-    }
-    try {
-      iter.next();
-      fail("should have failed");
-    } catch (NoSuchElementException exn) {
-      // Success.
-    }
-    assertEquals(expected, actual);
-  }
-
-  @Test
-  public void testEmpty() {
-    runTest(NO_ENTRIES, NO_ENTRIES, null, null);
-  }
-
-  @Test
-  public void testEmptyWithRange() {
-    runTest(NO_ENTRIES, NO_ENTRIES, START_KEY, END_KEY);
-  }
-
-  @Test
-  public void testNonEmpty() {
-    runTest(ALL_ENTRIES, NO_ENTRIES, null, null);
-  }
-
-  @Test
-  public void testNonEmptyWithAllInRange() {
-    runTest(IN_RANGE_ENTRIES, NO_ENTRIES, START_KEY, END_KEY);
-  }
-
-  @Test
-  public void testNonEmptyWithSomeOutOfRange() {
-    runTest(IN_RANGE_ENTRIES, OUT_OF_RANGE_ENTRIES, START_KEY, END_KEY);
-  }
-
-  @Test
-  public void testNonEmptyWithAllOutOfRange() {
-    runTest(NO_ENTRIES, OUT_OF_RANGE_ENTRIES, START_KEY, END_KEY);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleWriter.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleWriter.java
deleted file mode 100644
index c7a2dfc67757e..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TestShuffleWriter.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * A fake implementation of a ShuffleEntryWriter, for testing.
- */
-public class TestShuffleWriter implements ShuffleEntryWriter {
-  final List<ShuffleEntry> records = new ArrayList<>();
-  final List<Long> sizes = new ArrayList<>();
-  boolean closed = false;
-
-  public TestShuffleWriter() { }
-
-  @Override
-  public void put(ShuffleEntry entry) {
-    if (closed) {
-      throw new AssertionError("shuffle writer already closed");
-    }
-    records.add(entry);
-
-    long size = entry.length();
-    sizes.add(size);
-  }
-
-  @Override
-  public void close() {
-    if (closed) {
-      throw new AssertionError("shuffle writer already closed");
-    }
-    closed = true;
-  }
-
-  /** Returns the key/value records that were written to this ShuffleWriter. */
-  public List<ShuffleEntry> getRecords() {
-    if (!closed) {
-      throw new AssertionError("shuffle writer not closed");
-    }
-    return records;
-  }
-
-  /** Returns the sizes in bytes of the records that were written to this ShuffleWriter. */
-  public List<Long> getSizes() {
-    if (!closed) {
-      throw new AssertionError("shuffle writer not closed");
-    }
-    return sizes;
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
deleted file mode 100644
index f05d20344a918..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactoryTest.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
-import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
-import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-
-import com.google.api.services.dataflow.model.Source;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
-import com.google.cloud.dataflow.sdk.io.TextIO.CompressionType;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import org.hamcrest.core.IsInstanceOf;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import javax.annotation.Nullable;
-
-/**
- * Tests for TextReaderFactory.
- */
-@RunWith(JUnit4.class)
-public class TextReaderFactoryTest {
-  void runTestCreateTextReader(String filename, @Nullable Boolean stripTrailingNewlines,
-      @Nullable Long start, @Nullable Long end, CloudObject encoding, Coder<?> coder,
-      CompressionType compressionType)
-      throws Exception {
-    CloudObject spec = CloudObject.forClassName("TextSource");
-    addString(spec, "filename", filename);
-    if (stripTrailingNewlines != null) {
-      addBoolean(spec, "strip_trailing_newlines", stripTrailingNewlines);
-    }
-    if (start != null) {
-      addLong(spec, "start_offset", start);
-    }
-    if (end != null) {
-      addLong(spec, "end_offset", end);
-    }
-    addString(spec, "compression_type", compressionType.toString());
-
-    Source cloudSource = new Source();
-    cloudSource.setSpec(spec);
-    cloudSource.setCodec(encoding);
-
-    PipelineOptions options = PipelineOptionsFactory.create();
-    NativeReader<?> reader =
-        ReaderRegistry.defaultRegistry()
-            .create(
-                cloudSource, options, BatchModeExecutionContext.fromOptions(options), null, null);
-    Assert.assertThat(reader, new IsInstanceOf(TextReader.class));
-    TextReader<?> textReader = (TextReader<?>) reader;
-    Assert.assertEquals(filename, textReader.filepattern);
-    Assert.assertEquals(
-        stripTrailingNewlines == null ? true : stripTrailingNewlines,
-        textReader.stripTrailingNewlines);
-    Assert.assertEquals(start, textReader.startPosition);
-    Assert.assertEquals(end, textReader.endPosition);
-    Assert.assertEquals(coder, textReader.coder);
-    Assert.assertEquals(compressionType, textReader.compressionType);
-  }
-
-  @Test
-  public void testCreatePlainTextReader() throws Exception {
-    runTestCreateTextReader("/path/to/file.txt", null, null, null,
-        makeCloudEncoding("StringUtf8Coder"), StringUtf8Coder.of(), CompressionType.UNCOMPRESSED);
-  }
-
-  @Test
-  public void testCreateRichTextReader() throws Exception {
-    runTestCreateTextReader("gs://bucket/path/to/file2.txt", false, 200L, 500L,
-        makeCloudEncoding("TextualIntegerCoder"), TextualIntegerCoder.of(),
-        CompressionType.UNCOMPRESSED);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
deleted file mode 100644
index 59f494241d5a6..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderTest.java
+++ /dev/null
@@ -1,804 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromSplitResult;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtByteOffset;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtFraction;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
-import static org.hamcrest.Matchers.containsInAnyOrder;
-import static org.hamcrest.Matchers.greaterThan;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import com.google.api.services.dataflow.model.ApproximateReportedProgress;
-import com.google.api.services.dataflow.model.Position;
-import com.google.cloud.dataflow.sdk.TestUtils;
-import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.io.TextIO.CompressionType;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.MimeTypes;
-import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader.NativeReaderIterator;
-import com.google.common.base.Joiner;
-
-import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.rules.TemporaryFolder;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.PrintStream;
-import java.nio.channels.Channels;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.zip.GZIPOutputStream;
-
-/**
- * Tests for TextReader.
- */
-@RunWith(JUnit4.class)
-public class TextReaderTest {
-  private static final String[] fileContent = {
-      "<First line>\n", "<Second line>\r\n", "<Third line>"
-  };
-  private static final long TOTAL_BYTES_COUNT;
-
-  static {
-    long sumLen = 0L;
-
-    for (String s : fileContent) {
-      sumLen += s.length();
-    }
-    TOTAL_BYTES_COUNT = sumLen;
-  }
-
-  @Rule
-  public TemporaryFolder tmpFolder = new TemporaryFolder();
-  @Rule
-  public ExpectedException expectedException = ExpectedException.none();
-
-  /**
-   * A coder that verifies that all lines are of the form {@code <...>},
-   * to give further assurance that TextReader is never returning or even
-   * trying to decode partial lines, in the tests where this coder is used.
-   */
-  private static class WholeLineVerifyingCoder extends AtomicCoder<String> {
-    @Override
-    public void encode(String value, OutputStream outStream, Context context)
-        throws CoderException, IOException {
-      StringUtf8Coder.of().encode(value, outStream, context);
-    }
-
-    @Override
-    public String decode(InputStream inStream, Context context) throws CoderException, IOException {
-      String res = StringUtf8Coder.of().decode(inStream, context);
-      if (!res.trim().startsWith("<") || !res.trim().endsWith(">")) {
-        throw new CoderException("A partial line was passed to the coder by TextReader: " + res);
-      }
-      return res;
-    }
-  }
-
-  private File initTestFile() throws IOException {
-    File tmpFile = tmpFolder.newFile();
-    try (FileOutputStream output = new FileOutputStream(tmpFile)) {
-      for (String s : fileContent) {
-        output.write(s.getBytes());
-      }
-    }
-
-    return tmpFile;
-  }
-
-  @Test
-  public void testReadEmptyFile() throws Exception {
-    TextReader<String> textReader = new TextReader<>(tmpFolder.newFile().getPath(), true, null,
-        null, new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
-    try (NativeReaderIterator<String> iterator = textReader.iterator()) {
-      assertFalse(iterator.start());
-    }
-  }
-
-  @Test
-  public void testStrippedNewlines() throws Exception {
-    testNewlineHandling("\r", true);
-    testNewlineHandling("\r\n", true);
-    testNewlineHandling("\n", true);
-  }
-
-  @Test
-  public void testStrippedNewlinesAtEndOfReadBuffer() throws Exception {
-    boolean stripNewLines = true;
-    StringBuilder payload = new StringBuilder();
-    payload.append('<');
-    for (int i = 0; i < TextReader.BUF_SIZE - 4; ++i) {
-      payload.append('a');
-    }
-    payload.append('>');
-    String[] lines = {payload.toString(), payload.toString()};
-    testStringPayload(lines, "\r", stripNewLines);
-    testStringPayload(lines, "\r\n", stripNewLines);
-    testStringPayload(lines, "\n", stripNewLines);
-  }
-
-  @Test
-  public void testUnstrippedNewlines() throws Exception {
-    testNewlineHandling("\r", false);
-    testNewlineHandling("\r\n", false);
-    testNewlineHandling("\n", false);
-  }
-
-  @Test
-  public void testUnstrippedNewlinesAtEndOfReadBuffer() throws Exception {
-    boolean stripNewLines = false;
-    StringBuilder payload = new StringBuilder();
-    for (int i = 0; i < TextReader.BUF_SIZE - 2; ++i) {
-      payload.append('a');
-    }
-    String[] lines = {payload.toString(), payload.toString()};
-    testStringPayload(lines, "\r", stripNewLines);
-    testStringPayload(lines, "\r\n", stripNewLines);
-    testStringPayload(lines, "\n", stripNewLines);
-  }
-
-  @Test
-  public void testStartPosition() throws Exception {
-    File tmpFile = initTestFile();
-    assertEquals(40, tmpFile.length());
-
-    {
-      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 13L, null,
-          new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
-      ExecutorTestUtils.TestReaderObserver observer =
-          new ExecutorTestUtils.TestReaderObserver(textReader);
-      assertEquals(
-          Arrays.asList("<Second line>\r\n", "<Third line>"),
-          ReaderUtils.readAllFromReader(textReader));
-      // The first '1' in the array represents the reading of '\n' between first and
-      // second line, to confirm that we are reading from the beginning of a record.
-      assertEquals(Arrays.asList(1, 15, 12), observer.getActualSizes());
-    }
-
-    {
-      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 24L, null,
-          new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
-      ExecutorTestUtils.TestReaderObserver observer =
-          new ExecutorTestUtils.TestReaderObserver(textReader);
-      assertEquals(Arrays.asList("<Third line>"), ReaderUtils.readAllFromReader(textReader));
-      // The first '5' in the array represents the reading of a portion of the second
-      // line, which had to be read to find the beginning of the third line.
-      assertEquals(Arrays.asList(5, 12), observer.getActualSizes());
-    }
-
-    {
-      TextReader<String> textReader =
-          new TextReader<>(
-              tmpFile.getPath(),
-              false,
-              32L,
-              null,
-              new WholeLineVerifyingCoder(),
-              TextIO.CompressionType.UNCOMPRESSED);
-      ExecutorTestUtils.TestReaderObserver observer =
-          new ExecutorTestUtils.TestReaderObserver(textReader);
-      assertEquals(Arrays.asList(), ReaderUtils.readAllFromReader(textReader));
-      assertEquals(Arrays.asList(9), observer.getActualSizes());
-    }
-
-    {
-      TextReader<String> textReader =
-          new TextReader<>(
-              tmpFile.getPath(),
-              false,
-              41L,
-              null,
-              new WholeLineVerifyingCoder(),
-              TextIO.CompressionType.UNCOMPRESSED);
-      ExecutorTestUtils.TestReaderObserver observer =
-          new ExecutorTestUtils.TestReaderObserver(textReader);
-      assertEquals(Arrays.asList(), ReaderUtils.readAllFromReader(textReader));
-      assertEquals(Arrays.asList(), observer.getActualSizes());
-    }
-
-    {
-      TextReader<String> textReader =
-          new TextReader<>(
-              tmpFile.getPath(),
-              true,
-              0L,
-              22L,
-              new WholeLineVerifyingCoder(),
-              TextIO.CompressionType.UNCOMPRESSED);
-      ExecutorTestUtils.TestReaderObserver observer =
-          new ExecutorTestUtils.TestReaderObserver(textReader);
-      assertEquals(
-          Arrays.asList("<First line>", "<Second line>"),
-          ReaderUtils.readAllFromReader(textReader));
-      assertEquals(Arrays.asList(13, 15), observer.getActualSizes());
-    }
-
-    {
-      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), true, 1L, 20L,
-          new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
-      ExecutorTestUtils.TestReaderObserver observer =
-          new ExecutorTestUtils.TestReaderObserver(textReader);
-      assertEquals(Arrays.asList("<Second line>"), ReaderUtils.readAllFromReader(textReader));
-      // The first '13' in the array represents the reading of the entire first
-      // line, which had to be read to find the beginning of the second line.
-      assertEquals(Arrays.asList(13, 15), observer.getActualSizes());
-    }
-  }
-
-  @Test
-  public void testUtf8Handling() throws Exception {
-    File tmpFile = tmpFolder.newFile();
-    try (FileOutputStream output = new FileOutputStream(tmpFile)) {
-      // first line:  €\n
-      // second line: ¢\n
-      output.write(
-          new byte[] {(byte) 0xE2, (byte) 0x82, (byte) 0xAC, '\n', (byte) 0xC2, (byte) 0xA2, '\n'});
-    }
-
-    {
-      // 3L is after the first line if counting codepoints, but within
-      // the first line if counting chars.  So correct behavior is to return
-      // just one line, since offsets are in chars, not codepoints.
-      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), true, 0L, 3L,
-          StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
-      ExecutorTestUtils.TestReaderObserver observer =
-          new ExecutorTestUtils.TestReaderObserver(textReader);
-      assertEquals(Arrays.asList("€"), ReaderUtils.readAllFromReader(textReader));
-      assertEquals(Arrays.asList(4), observer.getActualSizes());
-    }
-
-    {
-      // Starting location is mid-way into a codepoint.
-      // Ensures we don't fail when skipping over an incomplete codepoint.
-      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), true, 2L, null,
-          StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
-      ExecutorTestUtils.TestReaderObserver observer =
-          new ExecutorTestUtils.TestReaderObserver(textReader);
-      assertEquals(Arrays.asList("¢"), ReaderUtils.readAllFromReader(textReader));
-      // The first '3' in the array represents the reading of a portion of the first
-      // line, which had to be read to find the beginning of the second line.
-      assertEquals(Arrays.asList(3, 3), observer.getActualSizes());
-    }
-  }
-
-  private void testNewlineHandling(String separator, boolean stripNewlines) throws Exception {
-    File tmpFile = tmpFolder.newFile();
-    List<String> expected = Arrays.asList("", "  hi there  ", "bob", "", "  ", "--zowie!--", "");
-    List<Integer> expectedSizes = new ArrayList<>();
-    try (PrintStream writer = new PrintStream(new FileOutputStream(tmpFile))) {
-      for (String line : expected) {
-        writer.print(line);
-        writer.print(separator);
-        expectedSizes.add(line.length() + separator.length());
-      }
-    }
-
-    TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), stripNewlines, null, null,
-        StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
-    ExecutorTestUtils.TestReaderObserver observer =
-        new ExecutorTestUtils.TestReaderObserver(textReader);
-    List<String> actual = ReaderUtils.readAllFromReader(textReader);
-    if (stripNewlines) {
-      assertEquals(expected, actual);
-    } else {
-      List<String> unstripped = new LinkedList<>();
-      for (String s : expected) {
-        unstripped.add(s + separator);
-      }
-      assertEquals(unstripped, actual);
-    }
-
-    assertEquals(expectedSizes, observer.getActualSizes());
-  }
-
-  private void testStringPayload(String[] lines, String separator, boolean stripNewlines)
-      throws Exception {
-    File tmpFile = tmpFolder.newFile();
-    List<String> expected = new ArrayList<>();
-    try (PrintStream writer = new PrintStream(new FileOutputStream(tmpFile))) {
-      for (String line : lines) {
-        writer.print(line);
-        writer.print(separator);
-        expected.add(stripNewlines ? line : line + separator);
-      }
-    }
-
-    TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), stripNewlines, null, null,
-        StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED);
-    assertEquals(expected, ReaderUtils.readAllFromReader(textReader));
-  }
-
-  @Test
-  public void testNonStringCoders() throws Exception {
-    File tmpFile = tmpFolder.newFile();
-    List<Integer> expected = TestUtils.INTS;
-    List<Integer> expectedSizes = new ArrayList<>();
-    try (PrintStream writer = new PrintStream(new FileOutputStream(tmpFile))) {
-      for (Integer elem : expected) {
-        byte[] encodedElem = CoderUtils.encodeToByteArray(TextualIntegerCoder.of(), elem);
-        writer.print(elem);
-        writer.print("\n");
-        expectedSizes.add(1 + encodedElem.length);
-      }
-    }
-
-    TextReader<Integer> textReader = new TextReader<>(tmpFile.getPath(), true, null, null,
-        TextualIntegerCoder.of(), TextIO.CompressionType.UNCOMPRESSED);
-    ExecutorTestUtils.TestReaderObserver observer =
-        new ExecutorTestUtils.TestReaderObserver(textReader);
-    assertEquals(expected, ReaderUtils.readAllFromReader(textReader));
-    assertEquals(expectedSizes, observer.getActualSizes());
-  }
-
-  @Test
-  public void testGetProgressNoEndOffset() throws Exception {
-    File tmpFile = initTestFile();
-    TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 0L, null,
-        new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
-
-    try (NativeReaderIterator<String> iterator = textReader.iterator()) {
-      ApproximateReportedProgress progress = readerProgressToCloudProgress(iterator.getProgress());
-      assertEquals(0L, progress.getPosition().getByteOffset().longValue());
-      assertTrue(iterator.start());
-      progress = readerProgressToCloudProgress(iterator.getProgress());
-      assertEquals(13L, progress.getPosition().getByteOffset().longValue());
-      assertTrue(iterator.advance());
-      progress = readerProgressToCloudProgress(iterator.getProgress());
-      assertEquals(28L, progress.getPosition().getByteOffset().longValue());
-      // Since end position is not specified, percentComplete should be null.
-      assertNull(progress.getFractionConsumed());
-
-      assertTrue(iterator.advance());
-      progress = readerProgressToCloudProgress(iterator.getProgress());
-      assertEquals(40L, progress.getPosition().getByteOffset().longValue());
-      assertFalse(iterator.advance());
-    }
-  }
-
-  @Test
-  public void testGetProgressWithEndOffset() throws Exception {
-    File tmpFile = initTestFile();
-    TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 0L, 40L,
-        new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
-
-    try (NativeReaderIterator<String> iterator = textReader.iterator()) {
-      assertTrue(iterator.start());
-      ApproximateReportedProgress progress = readerProgressToCloudProgress(iterator.getProgress());
-      // Returned a record that starts at position 0 of 40 - 1/40 fraction consumed.
-      assertEquals(1.0 / 40, progress.getFractionConsumed(), 1e-6);
-      assertTrue(iterator.advance());
-      assertTrue(iterator.advance());
-      progress = readerProgressToCloudProgress(iterator.getProgress());
-      // Returned a record that starts at position 28 - 29/40 consumed.
-      assertEquals(1.0 * 29 / 40, progress.getFractionConsumed(), 1e-6);
-      assertFalse(iterator.advance());
-    }
-  }
-
-  @Test
-  public void testUpdateStopPosition() throws Exception {
-    final long end = 10L; // in the first line
-    final long stop = 14L; // in the middle of the second line
-    File tmpFile = initTestFile();
-    long fileSize = tmpFile.length();
-
-    // Illegal proposed stop position, no update.
-    {
-      TextReader<String> textReader = new TextReader<>(
-          tmpFile.getPath(), false, 0L, fileSize,
-          new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
-
-      try (NativeReaderIterator<String> iterator = textReader.iterator()) {
-        // Poke the iterator so we can test dynamic splitting.
-        assertTrue(iterator.start());
-
-        assertNull(iterator.requestDynamicSplit(splitRequestAtPosition(new Position())));
-      }
-    }
-
-    // Successful update.
-    {
-      TextReader<String> textReader = new TextReader<>(
-          tmpFile.getPath(), false, 0L, fileSize,
-          new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
-      ExecutorTestUtils.TestReaderObserver observer =
-          new ExecutorTestUtils.TestReaderObserver(textReader);
-
-      try (TextReader<String>.TextFileIterator iterator =
-          (TextReader<String>.TextFileIterator) textReader.iterator()) {
-        // Poke the iterator so we can test dynamic splitting.
-        assertTrue(iterator.start());
-
-        assertEquals(fileSize, iterator.getEndOffset());
-        assertEquals(
-            Long.valueOf(stop),
-            positionFromSplitResult(iterator.requestDynamicSplit(splitRequestAtByteOffset(stop)))
-                .getByteOffset());
-        assertEquals(stop, iterator.getEndOffset());
-        assertEquals(fileContent[0], iterator.getCurrent());
-        assertTrue(iterator.advance());
-        assertEquals(fileContent[1], iterator.getCurrent());
-        assertFalse(iterator.advance());
-        assertEquals(
-            Arrays.asList(fileContent[0].length(), fileContent[1].length()),
-            observer.getActualSizes());
-      }
-    }
-
-    // Update based on fraction.
-    {
-      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 0L, fileSize,
-          new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
-      ExecutorTestUtils.TestReaderObserver observer =
-          new ExecutorTestUtils.TestReaderObserver(textReader);
-
-      try (TextReader<String>.TextFileIterator iterator =
-          (TextReader<String>.TextFileIterator) textReader.iterator()) {
-        // Poke the iterator so we can test dynamic splitting.
-        assertTrue(iterator.start());
-
-        // Trying to split at 0 or 1 will fail.
-        assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(0)));
-        assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(1)));
-
-        // must be less than or equal to (size of first two lines / size of file) for this test to
-        // pass.
-        float splitPos = 0.61f;
-
-        long stopPosition = (long) Math.ceil(fileSize * splitPos);
-        assertEquals(fileSize, iterator.getEndOffset());
-        assertEquals(
-            Long.valueOf(stopPosition),
-            positionFromSplitResult(iterator.requestDynamicSplit(splitRequestAtFraction(splitPos)))
-                .getByteOffset());
-        assertEquals(stopPosition, iterator.getEndOffset());
-        assertEquals(fileContent[0], iterator.getCurrent());
-        assertTrue(iterator.advance());
-        assertEquals(fileContent[1], iterator.getCurrent());
-        assertFalse(iterator.advance());
-        assertEquals(
-            Arrays.asList(fileContent[0].length(), fileContent[1].length()),
-            observer.getActualSizes());
-      }
-    }
-
-    // Proposed stop position is before the current position, no update.
-    {
-      TextReader<String> textReader = new TextReader<>(
-          tmpFile.getPath(), false, 0L, fileSize,
-          new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
-      ExecutorTestUtils.TestReaderObserver observer =
-          new ExecutorTestUtils.TestReaderObserver(textReader);
-
-      try (TextReader<String>.TextFileIterator iterator =
-          (TextReader<String>.TextFileIterator) textReader.iterator()) {
-        assertTrue(iterator.start());
-        assertEquals(fileContent[0], iterator.getCurrent());
-        assertTrue(iterator.advance());
-        assertEquals(fileContent[1], iterator.getCurrent());
-        assertThat(
-            readerProgressToCloudProgress(iterator.getProgress()).getPosition().getByteOffset(),
-            greaterThan(stop));
-        assertTrue(iterator.advance());
-        // The iterator just promised to return the next record, which is beyond "stop".
-        assertNull(iterator.requestDynamicSplit(splitRequestAtByteOffset(stop)));
-        assertEquals(fileSize, iterator.getEndOffset());
-        assertEquals(fileContent[2], iterator.getCurrent());
-        assertEquals(
-            Arrays.asList(
-                fileContent[0].length(), fileContent[1].length(), fileContent[2].length()),
-            observer.getActualSizes());
-        assertFalse(iterator.advance());
-      }
-    }
-
-    // Proposed stop position is after the current stop (end) position, no update.
-    {
-      TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, null, end,
-          new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
-      ExecutorTestUtils.TestReaderObserver observer =
-          new ExecutorTestUtils.TestReaderObserver(textReader);
-
-      try (TextReader<String>.TextFileIterator iterator =
-          (TextReader<String>.TextFileIterator) textReader.iterator()) {
-        assertTrue(iterator.start());
-        assertEquals(fileContent[0], iterator.getCurrent());
-        assertNull(iterator.requestDynamicSplit(splitRequestAtByteOffset(stop)));
-        assertEquals(end, iterator.getEndOffset());
-        assertFalse(iterator.advance());
-        assertEquals(Arrays.asList(fileContent[0].length()), observer.getActualSizes());
-      }
-    }
-  }
-
-  @Test
-  public void testUpdateStopPositionExhaustive() throws Exception {
-    File tmpFile = initTestFile();
-
-    // Checks for every possible position in the file, that either we fail to
-    // "updateStop" at it, or we succeed and then reading both halves together
-    // yields the original file with no missed records or duplicates.
-    for (long start = 0; start < TOTAL_BYTES_COUNT - 1; start++) {
-      for (long end = start + 1; end < TOTAL_BYTES_COUNT; end++) {
-        for (long stop = start; stop <= end; stop++) {
-          stopPositionTestInternal(start, end, stop, tmpFile);
-        }
-      }
-    }
-
-    // Test with null start/end positions.
-    for (long stop = 0L; stop < TOTAL_BYTES_COUNT; stop++) {
-      stopPositionTestInternal(null, null, stop, tmpFile);
-    }
-  }
-
-  private void stopPositionTestInternal(
-      Long startOffset, Long endOffset, Long stopOffset, File tmpFile) throws Exception {
-    String readWithoutSplit;
-    String readWithSplit1, readWithSplit2;
-
-    // Read from source without split attempts.
-    TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, startOffset,
-        endOffset, new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
-    readWithoutSplit = Joiner.on("").join(ReaderUtils.readAllFromReader(textReader));
-
-    // Read the first half of the split.
-    textReader = new TextReader<>(tmpFile.getPath(), false, startOffset, stopOffset,
-        new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
-    readWithSplit1 = Joiner.on("").join(ReaderUtils.readAllFromReader(textReader));
-
-    // Read the second half of the split.
-    textReader = new TextReader<>(tmpFile.getPath(), false, stopOffset, endOffset,
-        new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
-    readWithSplit2 = Joiner.on("").join(ReaderUtils.readAllFromReader(textReader));
-
-    assertEquals(readWithoutSplit, readWithSplit1 + readWithSplit2);
-  }
-
-  private OutputStream getOutputStreamForCompressionType(
-      OutputStream stream, CompressionType compressionType) throws IOException {
-    switch (compressionType) {
-      case GZIP:
-        return new GZIPOutputStream(stream);
-      case BZIP2:
-        return new BZip2CompressorOutputStream(stream);
-      case UNCOMPRESSED:
-      case AUTO:
-        return stream;
-      default:
-        fail("Unrecognized stream type");
-    }
-    return stream;
-  }
-
-  private File createFileWithCompressionType(
-      String[] lines, String filename, CompressionType compressionType) throws IOException {
-    File tmpFile = tmpFolder.newFile(filename);
-    try (PrintStream writer =
-            new PrintStream(
-                getOutputStreamForCompressionType(
-                    new FileOutputStream(tmpFile), compressionType))) {
-      for (String line : lines) {
-        writer.println(line);
-      }
-    }
-    return tmpFile;
-  }
-
-  private void testCompressionTypeHelper(String[] lines, String filename,
-      CompressionType outputCompressionType, CompressionType inputCompressionType)
-      throws IOException {
-    File tmpFile = createFileWithCompressionType(lines, filename, outputCompressionType);
-
-    List<String> expected = new ArrayList<>();
-    for (String line : lines) {
-      expected.add(line);
-    }
-
-    TextReader<String> textReader = new TextReader<>(
-        tmpFile.getPath(), true, null, null, new WholeLineVerifyingCoder(), inputCompressionType);
-    assertEquals(expected, ReaderUtils.readAllFromReader(textReader));
-    tmpFile.delete();
-  }
-
-  @Test
-  public void testCompressionTypeOneFile() throws IOException {
-    String[] contents = {"<Miserable pigeon>", "<Vulnerable sparrow>", "<Brazen crow>"};
-    // test AUTO compression type with different extensions
-    testCompressionTypeHelper(contents, "test.gz", CompressionType.GZIP, CompressionType.AUTO);
-    testCompressionTypeHelper(contents, "test.bz2", CompressionType.BZIP2, CompressionType.AUTO);
-    testCompressionTypeHelper(
-        contents, "test.txt", CompressionType.UNCOMPRESSED, CompressionType.AUTO);
-    testCompressionTypeHelper(contents, "test", CompressionType.UNCOMPRESSED, CompressionType.AUTO);
-    // test GZIP, BZIP2, and UNCOMPRESSED
-    testCompressionTypeHelper(contents, "test.txt", CompressionType.GZIP, CompressionType.GZIP);
-    testCompressionTypeHelper(contents, "test.txt", CompressionType.BZIP2, CompressionType.BZIP2);
-    testCompressionTypeHelper(
-        contents, "test.gz", CompressionType.UNCOMPRESSED, CompressionType.UNCOMPRESSED);
-  }
-
-  @Test
-  public void testCompressionTypeFileGlob() throws IOException {
-    String[][] contents = {
-        {"<Miserable pigeon>", "<Vulnerable sparrow>", "<Brazen crow>"},
-        {"<Timid osprey>", "<Lazy vulture>"},
-        {"<Erratic finch>", "<Impressible parakeet>"},
-    };
-    File[] files = {
-        createFileWithCompressionType(contents[0], "test.gz", CompressionType.GZIP),
-        createFileWithCompressionType(contents[1], "test.bz2", CompressionType.BZIP2),
-        createFileWithCompressionType(contents[2], "test.txt", CompressionType.UNCOMPRESSED),
-    };
-
-    List<String> expected = new ArrayList<>();
-    for (String[] fileContents : contents) {
-      for (String line : fileContents) {
-        expected.add(line);
-      }
-    }
-
-    String path = tmpFolder.getRoot().getPath() + System.getProperty("file.separator") + "*";
-
-    TextReader<String> textReader =
-        new TextReader<>(path, true, null, null, new WholeLineVerifyingCoder(),
-                         CompressionType.AUTO);
-    assertThat(ReaderUtils.readAllFromReader(textReader), containsInAnyOrder(expected.toArray()));
-    for (File file : files) {
-      file.delete();
-    }
-  }
-
-  @Test
-  public void testParallelismEstimatesDeclaredNotCompressed() throws IOException {
-    File file =
-        createFileWithCompressionType(fileContent, "test.gz", CompressionType.UNCOMPRESSED);
-    TextReader<String> textReader =
-        new TextReader<>(
-            file.getPath(),
-            true /*stripTrailingNewlines*/,
-            null /*startPos*/,
-            null /*endPos*/,
-            new WholeLineVerifyingCoder(),
-            CompressionType.UNCOMPRESSED);
-    assertEquals(Double.POSITIVE_INFINITY, textReader.getTotalParallelism(), 0 /*tolerance*/);
-    file.delete();
-  }
-
-  @Test
-  public void testParallelismEstimatesDeclaredCompressed() throws IOException {
-    File file = createFileWithCompressionType(fileContent, "test.txt", CompressionType.GZIP);
-    TextReader<String> textReader =
-        new TextReader<>(
-            file.getPath(), true, null, null, new WholeLineVerifyingCoder(), CompressionType.GZIP);
-    assertEquals(1, textReader.getTotalParallelism(), 0 /*tolerance*/);
-    file.delete();
-  }
-
-  @Test
-  public void testParallelismEstimatesAutoNotCompressed() throws IOException {
-    File file =
-        createFileWithCompressionType(fileContent, "test.txt", CompressionType.UNCOMPRESSED);
-    TextReader<String> textReader =
-        new TextReader<>(
-            file.getPath(), true, null, null, new WholeLineVerifyingCoder(), CompressionType.AUTO);
-    assertEquals(Double.POSITIVE_INFINITY, textReader.getTotalParallelism(), 0 /*tolerance*/);
-    file.delete();
-  }
-
-  @Test
-  public void testParallelismEstimatesAutoCompressed() throws IOException {
-    File file = createFileWithCompressionType(fileContent, "test.gz", CompressionType.GZIP);
-    TextReader<String> textReader =
-        new TextReader<>(
-            file.getPath(), true, null, null, new WholeLineVerifyingCoder(), CompressionType.AUTO);
-    assertEquals(1, textReader.getTotalParallelism(), 0 /*tolerance*/);
-    file.delete();
-  }
-
-  @Test
-  public void testParallelismEstimatesPartialRead() throws IOException {
-    File file =
-        createFileWithCompressionType(fileContent, "test.txt", CompressionType.UNCOMPRESSED);
-    TextReader<String> textReader =
-        new TextReader<>(
-            file.getPath(),
-            true /*stripTrailingNewlines*/,
-            10L /*startPos*/,
-            17L /*endPos*/,
-            new WholeLineVerifyingCoder(),
-            CompressionType.AUTO);
-    assertEquals(7, textReader.getTotalParallelism(), 0 /*tolerance*/);
-    file.delete();
-  }
-
-  @Test
-  public void testParallelismEstimatesCompressedGlob() throws IOException {
-    File gzip = createFileWithCompressionType(fileContent, "test.gz", CompressionType.GZIP);
-    File bzip = createFileWithCompressionType(fileContent, "test.bz2", CompressionType.BZIP2);
-    String pattern = new File(tmpFolder.getRoot(), "*").getPath();
-    TextReader<String> textReader =
-        new TextReader<>(
-            pattern, true, null, null, new WholeLineVerifyingCoder(), CompressionType.AUTO);
-    assertEquals(2, textReader.getTotalParallelism(), 0 /*tolerance*/);
-    gzip.delete();
-    bzip.delete();
-  }
-
-  @Test
-  public void testParallelismEstimatesMixedGlob() throws IOException {
-    File gzip = createFileWithCompressionType(fileContent, "test.gz", CompressionType.GZIP);
-    File txt = createFileWithCompressionType(fileContent, "test.txt", CompressionType.UNCOMPRESSED);
-    String pattern = new File(tmpFolder.getRoot(), "*").getPath();
-    TextReader<String> textReader =
-        new TextReader<>(
-            pattern, true, null, null, new WholeLineVerifyingCoder(), CompressionType.AUTO);
-    assertEquals(Double.POSITIVE_INFINITY, textReader.getTotalParallelism(), 0 /*tolerance*/);
-    gzip.delete();
-    txt.delete();
-  }
-
-  @Test
-  public void testErrorOnFileNotFound() throws Exception {
-    expectedException.expect(FileNotFoundException.class);
-    TextReader<String> textReader = new TextReader<>(
-        "file-not-found", true, 0L, 100L,
-        new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
-    textReader.iterator().close();
-  }
-
-  @Test
-  public void testErrorOnMultipleFiles() throws Exception {
-    File file1 = tmpFolder.newFile("foo1.avro");
-    File file2 = tmpFolder.newFile("foo2.avro");
-    Channels.newOutputStream(IOChannelUtils.create(file1.getPath(), MimeTypes.BINARY)).close();
-    Channels.newOutputStream(IOChannelUtils.create(file2.getPath(), MimeTypes.BINARY)).close();
-    TextReader<String> textReader = new TextReader<>(
-        new File(tmpFolder.getRoot(), "*").getPath(), true, 0L, 100L,
-        new WholeLineVerifyingCoder(), TextIO.CompressionType.UNCOMPRESSED);
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage("more than 1 file matched");
-    textReader.iterator().close();
-  }
-
-  // TODO: sharded filenames
-  // TODO: reading from GCS
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java
deleted file mode 100644
index 5ed168adc87bc..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactoryTest.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
-import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-
-import org.hamcrest.core.IsInstanceOf;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import javax.annotation.Nullable;
-
-/**
- * Tests for TextSinkFactory.
- */
-@RunWith(JUnit4.class)
-public class TextSinkFactoryTest {
-  void runTestCreateTextSink(String filename,
-                             @Nullable Boolean appendTrailingNewlines,
-                             @Nullable String header,
-                             @Nullable String footer,
-                             CloudObject encoding,
-                             Coder<?> coder)
-      throws Exception {
-    CloudObject spec = CloudObject.forClassName("TextSink");
-    addString(spec, PropertyNames.FILENAME, filename);
-    if (appendTrailingNewlines != null) {
-      addBoolean(spec, PropertyNames.APPEND_TRAILING_NEWLINES, appendTrailingNewlines);
-    }
-    if (header != null) {
-      addString(spec, PropertyNames.HEADER, header);
-    }
-    if (footer != null) {
-      addString(spec, PropertyNames.FOOTER, footer);
-    }
-
-    com.google.api.services.dataflow.model.Sink cloudSink =
-        new com.google.api.services.dataflow.model.Sink();
-    cloudSink.setSpec(spec);
-    cloudSink.setCodec(encoding);
-
-    CounterSet.AddCounterMutator addCounterMutator =
-        new CounterSet().getAddCounterMutator();
-    PipelineOptions options = PipelineOptionsFactory.create();
-    Sink<?> sink = SinkFactory.create(
-        options,
-        cloudSink,
-        BatchModeExecutionContext.fromOptions(options),
-        addCounterMutator);
-    Assert.assertThat(sink, new IsInstanceOf(TextSink.class));
-    TextSink<?> textSink = (TextSink<?>) sink;
-    Assert.assertEquals(filename, textSink.namePrefix);
-    Assert.assertEquals(
-        appendTrailingNewlines == null ? true : appendTrailingNewlines,
-        textSink.appendTrailingNewlines);
-    Assert.assertEquals(header, textSink.header);
-    Assert.assertEquals(footer, textSink.footer);
-    Assert.assertEquals(coder, textSink.coder);
-  }
-
-  @Test
-  public void testCreatePlainTextSink() throws Exception {
-    runTestCreateTextSink(
-        "/path/to/file.txt", null, null, null,
-        makeCloudEncoding("StringUtf8Coder"),
-        StringUtf8Coder.of());
-  }
-
-  @Test
-  public void testCreateRichTextSink() throws Exception {
-    runTestCreateTextSink(
-        "gs://bucket/path/to/file2.txt", false, "$$$", "***",
-        makeCloudEncoding("TextualIntegerCoder"),
-        TextualIntegerCoder.of());
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkTest.java
deleted file mode 100644
index 51228f678e013..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkTest.java
+++ /dev/null
@@ -1,215 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertSame;
-import static org.junit.Assert.fail;
-
-import com.google.cloud.dataflow.sdk.TestUtils;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.ShardingWritableByteChannel;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-/**
- * Tests for TextSink.
- */
-@RunWith(JUnit4.class)
-public class TextSinkTest {
-  @Rule
-  public TemporaryFolder tmpFolder = new TemporaryFolder();
-
-  <T> void runTestWriteFile(List<T> elems,
-                            @Nullable String header,
-                            @Nullable String footer,
-                            Coder<T> coder) throws Exception {
-    File tmpFile = tmpFolder.newFile("file.txt");
-    TextSink<WindowedValue<T>> textSink = TextSink.createForTest(
-        tmpFile.getPath(), true, header, footer, coder);
-    List<String> expected = new ArrayList<>();
-    List<Integer> actualSizes = new ArrayList<>();
-    if (header != null) {
-      expected.add(header);
-    }
-    try (Sink.SinkWriter<WindowedValue<T>> writer = textSink.writer()) {
-      for (T elem : elems) {
-        actualSizes.add((int) writer.add(WindowedValue.valueInGlobalWindow(elem)));
-        byte[] encodedElem = CoderUtils.encodeToByteArray(coder, elem);
-        String line = new String(encodedElem);
-        expected.add(line);
-      }
-    }
-    if (footer != null) {
-      expected.add(footer);
-    }
-
-    List<String> actual = new ArrayList<>();
-    List<Integer> expectedSizes = new ArrayList<>();
-    try (BufferedReader reader = new BufferedReader(new FileReader(tmpFile))) {
-      for (;;) {
-        String line = reader.readLine();
-        if (line == null) {
-          break;
-        }
-        actual.add(line);
-        expectedSizes.add(line.length() + TextSink.NEWLINE.length);
-      }
-    }
-    if (header != null) {
-      expectedSizes.remove(0);
-    }
-    if (footer != null) {
-      expectedSizes.remove(expectedSizes.size() - 1);
-    }
-
-    Assert.assertEquals(expected, actual);
-    Assert.assertEquals(expectedSizes, actualSizes);
-  }
-
-  @Test
-  public void testWriteEmptyFile() throws Exception {
-    runTestWriteFile(Collections.<String>emptyList(), null, null,
-                     StringUtf8Coder.of());
-  }
-
-  @Test
-  public void testWriteEmptyFileWithHeaderAndFooter() throws Exception {
-    runTestWriteFile(Collections.<String>emptyList(), "the head", "the foot",
-                     StringUtf8Coder.of());
-  }
-
-  @Test
-  public void testWriteNonEmptyFile() throws Exception {
-    List<String> lines = Arrays.asList(
-        "",
-        "  hi there  ",
-        "bob",
-        "",
-        "  ",
-        "--zowie!--",
-        "");
-    runTestWriteFile(lines, null, null, StringUtf8Coder.of());
-  }
-
-  @Test
-  public void testWriteNonEmptyFileWithHeaderAndFooter() throws Exception {
-    List<String> lines = Arrays.asList(
-        "",
-        "  hi there  ",
-        "bob",
-        "",
-        "  ",
-        "--zowie!--",
-        "");
-    runTestWriteFile(lines, "the head", "the foot", StringUtf8Coder.of());
-  }
-
-  @Test
-  public void testWriteNonEmptyNonStringFile() throws Exception {
-    runTestWriteFile(TestUtils.INTS, null, null, TextualIntegerCoder.of());
-  }
-
-
-  private static class ThrowingWritableByteChannel extends ShardingWritableByteChannel {
-    IOException exception = null;
-    boolean open = true;
-    @Override
-    public boolean isOpen() {
-      return open;
-    }
-
-    @Override
-    public void close() throws IOException {
-      open = false;
-    }
-
-    @Override
-    public int write(ByteBuffer src) throws IOException {
-      if (exception != null) {
-        throw exception;
-      }
-      return src.remaining();
-    }
-
-    @Override
-    public int writeToShard(int shardNum, ByteBuffer src) throws IOException {
-      if (exception != null) {
-        throw exception;
-      }
-      return src.remaining();
-    }
-  }
-
-  @Test
-  public void testWriteFileWithFooterThatThrowsException() throws Exception {
-    ThrowingWritableByteChannel channel = new ThrowingWritableByteChannel();
-    TextSink<WindowedValue<String>> sink =
-        TextSink.createForTest("test-location", false, null, "test-footer", StringUtf8Coder.of());
-    TextSink<WindowedValue<String>>.TextFileWriter writer = sink.new TextFileWriter(channel);
-    channel.exception = new IOException("Test throwing exception during close");
-    try {
-      writer.close();
-      fail();
-    } catch (IOException e) {
-      assertSame(e, channel.exception);
-      assertFalse(channel.isOpen());
-    }
-  }
-
-  @Test
-  public void testWriteShardedFileWithFooterThatThrowsException() throws Exception {
-    ThrowingWritableByteChannel channel = new ThrowingWritableByteChannel();
-    TextSink<WindowedValue<String>> sink =
-        TextSink.createForTest("test-location", false, null, "test-footer", StringUtf8Coder.of());
-    TextSink<WindowedValue<String>>.ShardingTextFileWriter writer =
-        sink.new ShardingTextFileWriter(channel);
-    channel.exception = new IOException("Test throwing exception during close");
-    try {
-      writer.close();
-      fail();
-    } catch (IOException e) {
-      assertSame(e, channel.exception);
-      assertFalse(channel.isOpen());
-    }
-  }
-
-  // TODO: sharded filenames
-  // TODO: not appending newlines
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
deleted file mode 100644
index 657ed050d522f..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderTest.java
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.TestUtils;
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-import com.google.common.collect.Lists;
-
-import org.joda.time.Instant;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-/**
- * Tests for UngroupedShuffleReader.
- */
-@RunWith(JUnit4.class)
-public class UngroupedShuffleReaderTest {
-  private static final Instant timestamp = new Instant(123000);
-  private static final IntervalWindow window = new IntervalWindow(timestamp, timestamp.plus(1000));
-
-  void runTestReadFromShuffle(List<Integer> expected) throws Exception {
-    Coder<WindowedValue<Integer>> elemCoder =
-        WindowedValue.getFullCoder(BigEndianIntegerCoder.of(), IntervalWindow.getCoder());
-
-    CounterSet.AddCounterMutator addCounterMutator =
-        new CounterSet().getAddCounterMutator();
-    // Write to shuffle with UNGROUPED ShuffleSink.
-    ShuffleSink<Integer> shuffleSink = new ShuffleSink<>(
-        PipelineOptionsFactory.create(),
-        null, ShuffleSink.ShuffleKind.UNGROUPED,
-        elemCoder,
-        addCounterMutator);
-
-    TestShuffleWriter shuffleWriter = new TestShuffleWriter();
-
-    List<Long> actualSizes = new ArrayList<>();
-    try (Sink.SinkWriter<WindowedValue<Integer>> shuffleSinkWriter =
-             shuffleSink.writer(shuffleWriter, "dataset")) {
-      for (Integer value : expected) {
-        actualSizes.add(shuffleSinkWriter.add(
-            WindowedValue.of(value, timestamp, Lists.newArrayList(window), PaneInfo.NO_FIRING)));
-      }
-    }
-    List<ShuffleEntry> records = shuffleWriter.getRecords();
-    Assert.assertEquals(expected.size(), records.size());
-    Assert.assertEquals(shuffleWriter.getSizes(), actualSizes);
-
-    // Read from shuffle with UngroupedShuffleReader.
-    UngroupedShuffleReader<WindowedValue<Integer>> ungroupedShuffleReader =
-        new UngroupedShuffleReader<>(
-            PipelineOptionsFactory.create(),
-            null, null, null,
-            elemCoder, addCounterMutator);
-    ExecutorTestUtils.TestReaderObserver observer =
-        new ExecutorTestUtils.TestReaderObserver(ungroupedShuffleReader);
-
-    TestShuffleReader shuffleReader = new TestShuffleReader();
-    List<Integer> expectedSizes = new ArrayList<>();
-    for (ShuffleEntry record : records) {
-      expectedSizes.add(record.length());
-      shuffleReader.addEntry(record);
-    }
-
-    List<Integer> actual = new ArrayList<>();
-    try (UngroupedShuffleReader<WindowedValue<Integer>>.UngroupedShuffleReaderIterator iter =
-            ungroupedShuffleReader.iterator(shuffleReader)) {
-      while (iter.hasNext()) {
-        Assert.assertTrue(iter.hasNext());
-        Assert.assertTrue(iter.hasNext());
-        WindowedValue<Integer> elem = iter.next();
-        Assert.assertEquals(timestamp, elem.getTimestamp());
-        Assert.assertEquals(Lists.newArrayList(window), elem.getWindows());
-        actual.add(elem.getValue());
-      }
-      Assert.assertFalse(iter.hasNext());
-      Assert.assertFalse(iter.hasNext());
-      try {
-        iter.next();
-        Assert.fail("should have failed");
-      } catch (NoSuchElementException exn) {
-        // As expected.
-      }
-    }
-
-    Assert.assertEquals(expected, actual);
-    Assert.assertEquals(expectedSizes, observer.getActualSizes());
-  }
-
-  @Test
-  public void testReadEmptyShuffleData() throws Exception {
-    runTestReadFromShuffle(TestUtils.NO_INTS);
-  }
-
-  @Test
-  public void testReadNonEmptyShuffleData() throws Exception {
-    runTestReadFromShuffle(TestUtils.INTS);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTrackerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTrackerTest.java
deleted file mode 100644
index fa34e9a275bf1..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTrackerTest.java
+++ /dev/null
@@ -1,120 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.junit.Assert.assertEquals;
-
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Test for {@link UserCodeTimeTracker}.
- */
-@RunWith(JUnit4.class)
-public class UserCodeTimeTrackerTest {
-  UserCodeTimeTracker getTracker(final int numProcessors) {
-    return new UserCodeTimeTracker() {
-      @Override
-      protected int getNumProcessors() {
-        return numProcessors;
-      }
-    };
-  }
-  @Test
-  public void testUserFrameworkTimeDiscrimination() {
-    CounterSet counters = new CounterSet();
-    UserCodeTimeTracker tracker = getTracker(1);
-    CounterSet.AddCounterMutator mutator = counters.getAddCounterMutator();
-    tracker.workStarted("stage1-", 1, mutator);
-    tracker.workObservedInState(1, StateKind.USER, 100);
-    tracker.workObservedInState(1, StateKind.FRAMEWORK, 50);
-    tracker.workFinished(1);
-
-    assertEquals(100L, counters.getExistingCounter("stage1-user-code-msecs").getAggregate());
-  }
-
-  @Test
-  public void testSaturated() {
-    CounterSet counters = new CounterSet();
-    UserCodeTimeTracker tracker = getTracker(2);
-    CounterSet.AddCounterMutator mutator = counters.getAddCounterMutator();
-    tracker.workStarted("stage1-", 1, mutator);
-    tracker.workStarted("stage2-", 2, mutator);
-    tracker.workObservedInState(1, StateKind.USER, 100);
-    tracker.workObservedInState(2, StateKind.USER, 50);
-    tracker.workFinished(1);
-    tracker.workFinished(2);
-
-    assertEquals(100L, counters.getExistingCounter("stage1-user-code-msecs").getAggregate());
-    assertEquals(50L, counters.getExistingCounter("stage2-user-code-msecs").getAggregate());
-  }
-
-  @Test
-  public void testOversubscribed() {
-    CounterSet counters = new CounterSet();
-    UserCodeTimeTracker tracker = getTracker(1);
-    CounterSet.AddCounterMutator mutator = counters.getAddCounterMutator();
-    tracker.workStarted("stage1-", 1, mutator);
-    tracker.workStarted("stage2-", 2, mutator);
-    // 1 user state work item.
-    tracker.workObservedInState(1, StateKind.USER, 10);
-    // 2 user state work items.
-    tracker.workObservedInState(2, StateKind.USER, 20);
-    // 2 user state work items.
-    tracker.workObservedInState(1, StateKind.USER, 30);
-    // 2 user state work items.
-    tracker.workObservedInState(2, StateKind.USER, 40);
-    // 1 user state work item.
-    tracker.workObservedInState(1, StateKind.FRAMEWORK, 99);
-    // 1 user state work items.
-    tracker.workObservedInState(2, StateKind.USER, 50);
-    // 2 user state work items.
-    tracker.workFinished(1);
-    // 1 user state work item.
-    tracker.workObservedInState(2, StateKind.USER, 60);
-    tracker.workFinished(2);
-    // 0 user state work item.
-
-    // 10 + 30 / 2 = 25
-    assertEquals(25L, counters.getExistingCounter("stage1-user-code-msecs").getAggregate());
-    // 20 / 2 + 40 / 2 + 50 + 60 = 140
-    assertEquals(140L, counters.getExistingCounter("stage2-user-code-msecs").getAggregate());
-  }
-
-  @Test
-  public void testScopedWork() throws Exception {
-    CounterSet counters = new CounterSet();
-    UserCodeTimeTracker tracker = getTracker(1);
-    try (AutoCloseable scope1 = tracker.scopedWork("stage1-", 1, counters.getAddCounterMutator())) {
-      tracker.workObservedInState(1, StateKind.USER, 10);
-      try (AutoCloseable scope2 = tracker.scopedWork(
-          "stage2-", 2, counters.getAddCounterMutator())) {
-        tracker.workObservedInState(2, StateKind.USER, 30);
-        tracker.workObservedInState(1, StateKind.USER, 100);
-      }
-    }
-
-    // 10 + 100 / 2 = 60
-    assertEquals(60L, counters.getExistingCounter("stage1-user-code-msecs").getAggregate());
-    // 30 / 2 = 15
-    assertEquals(15L, counters.getExistingCounter("stage2-user-code-msecs").getAggregate());
-  }
-}
-
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WeightedDirectSideInputReader.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WeightedDirectSideInputReader.java
deleted file mode 100644
index 9667df2eddf78..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WeightedDirectSideInputReader.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.DirectSideInputReader;
-import com.google.cloud.dataflow.sdk.util.PTuple;
-import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.WeightedSideInputReader;
-import com.google.cloud.dataflow.sdk.util.WeightedValue;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * A {@link WeightedSideInputReader} with explicitly provided sizes for all values.
- */
-class WeightedDirectSideInputReader extends WeightedSideInputReader.Defaults {
-
-  private final SideInputReader subReader;
-  private final Map<TupleTag<?>, Long> weights;
-
-  /**
-   * Returns a {@link WeightedDirectSideInputReader} containing the contents in the provided
-   * {@code Map}. A {@link DirectSideInputReader} will be used for the actual retrieval logic; this
-   * class merely does the size bookkeeping.
-   */
-  public static WeightedDirectSideInputReader withContents(
-      Map<TupleTag<Object>, WeightedValue<Object>> sizedContents) {
-    return new WeightedDirectSideInputReader(sizedContents);
-  }
-
-  private WeightedDirectSideInputReader(
-      Map<TupleTag<Object>, WeightedValue<Object>> sizedContents) {
-    weights = new HashMap<>();
-    PTuple values = PTuple.empty();
-    for (Map.Entry<TupleTag<Object>, WeightedValue<Object>> entry : sizedContents.entrySet()) {
-      values = values.and(entry.getKey(), entry.getValue().getValue());
-      weights.put(entry.getKey(), entry.getValue().getWeight());
-    }
-    subReader = DirectSideInputReader.of(values);
-  }
-
-  @Override
-  public boolean isEmpty() {
-    return subReader.isEmpty();
-  }
-
-  @Override
-  public <T> boolean contains(PCollectionView<T> view) {
-    return subReader.contains(view);
-  }
-
-  @Override
-  public <T> WeightedValue<T> getWeighted(PCollectionView<T> view, BoundedWindow window) {
-    return WeightedValue.of(
-        subReader.get(view, window),
-        weights.get(view.getTagInternal()));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillReaderIteratorBaseTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillReaderIteratorBaseTest.java
deleted file mode 100644
index 7c09fcc51d656..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillReaderIteratorBaseTest.java
+++ /dev/null
@@ -1,97 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.protobuf.ByteString;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Tests for {@link WindmillReaderIteratorBase}.
- */
-@RunWith(JUnit4.class)
-public class WindmillReaderIteratorBaseTest {
-  private static class TestWindmillReaderIterator extends WindmillReaderIteratorBase<Long> {
-    protected TestWindmillReaderIterator(Windmill.WorkItem work) {
-      super(work);
-    }
-
-    @Override
-    protected WindowedValue<Long> decodeMessage(Windmill.Message message) {
-      return WindowedValue.valueInGlobalWindow(message.getTimestamp());
-    }
-  }
-
-  @Test
-  public void testBasic() throws IOException {
-    testForMessageBundleCounts();
-    testForMessageBundleCounts(0);
-    testForMessageBundleCounts(0, 0);
-    testForMessageBundleCounts(1);
-    testForMessageBundleCounts(2);
-    testForMessageBundleCounts(1, 1);
-    testForMessageBundleCounts(0, 1);
-    testForMessageBundleCounts(1, 0);
-    testForMessageBundleCounts(0, 0, 1, 3, 0, 1, 0, 0, 0, 1);
-    testForMessageBundleCounts(0, 0, 1, 3, 0, 1, 0, 0, 0, 0);
-  }
-
-  private void testForMessageBundleCounts(int... messageBundleCounts) throws IOException {
-    List<Windmill.InputMessageBundle> bundles = new ArrayList<>();
-    long numTotalMessages = 0;
-    for (int count : messageBundleCounts) {
-      Windmill.InputMessageBundle.Builder bundle =
-          Windmill.InputMessageBundle.newBuilder().setSourceComputationId("foo");
-      for (int i = 0; i < count; ++i) {
-        bundle.addMessages(
-            Windmill.Message.newBuilder()
-                .setTimestamp(numTotalMessages++)
-                .setData(ByteString.EMPTY)
-                .build());
-      }
-      bundles.add(bundle.build());
-    }
-    Windmill.WorkItem workItem =
-        Windmill.WorkItem.newBuilder()
-            .setKey(ByteString.EMPTY)
-            .setWorkToken(0L)
-            .addAllMessageBundles(bundles)
-            .build();
-    try (TestWindmillReaderIterator iter = new TestWindmillReaderIterator(workItem)) {
-      List<Long> actual =
-          ReaderTestUtils.windowedValuesToValues(
-              ReaderUtils.readRemainingFromIterator(iter, false));
-      assertFalse(iter.advance());
-      List<Long> expected = new ArrayList<>();
-      for (int i = 0; i < numTotalMessages; ++i) {
-        expected.add((long) i);
-      }
-      assertEquals(Arrays.toString(messageBundleCounts), expected, actual);
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCacheTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCacheTest.java
deleted file mode 100644
index 6d2e8b5675377..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCacheTest.java
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.util.state.State;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.protobuf.ByteString;
-
-import org.joda.time.Instant;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.IOException;
-import java.util.Objects;
-
-/**
- * Tests for {@link WindmillStateCache}.
- */
-@RunWith(JUnit4.class)
-public class WindmillStateCacheTest {
-  private static final String COMPUTATION = "computation";
-  private static final ByteString KEY = ByteString.copyFromUtf8("key");
-  private static final String STATE_FAMILY = "family";
-
-  private static class TestStateTag implements StateTag<TestState> {
-    final String id;
-
-    TestStateTag(String id) {
-      this.id = id;
-    }
-
-    @Override
-    public void appendTo(Appendable appendable) throws IOException {
-      appendable.append(id);
-    }
-
-    @Override
-    public String getId() {
-      return id;
-    }
-
-    @Override
-    public TestState bind(StateBinder binder) {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public String toString() {
-      return "Tag(" + id + ")";
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      return (other instanceof TestStateTag) && Objects.equals(((TestStateTag) other).id, id);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(id);
-    }
-  }
-
-  private static class TestState implements State {
-    String value = null;
-
-    TestState(String value) {
-      this.value = value;
-    }
-
-    public String getValue() {
-      return value;
-    }
-
-    @Override
-    public void clear() {
-      this.value = null;
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      return (other instanceof TestState) && Objects.equals(((TestState) other).value, value);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(value);
-    }
-
-    @Override
-    public String toString() {
-      return "State(" + value + ")";
-    }
-  }
-
-  private static StateNamespace windowNamespace(long start) {
-    return StateNamespaces.window(
-        IntervalWindow.getCoder(), new IntervalWindow(new Instant(start), new Instant(start + 1)));
-  }
-
-  private static StateNamespace triggerNamespace(long start, int triggerIdx) {
-    return StateNamespaces.windowAndTrigger(IntervalWindow.getCoder(),
-        new IntervalWindow(new Instant(start), new Instant(start + 1)), triggerIdx);
-  }
-
-  WindmillStateCache cache;
-  WindmillStateCache.ForKey keyCache;
-
-  @Before
-  public void setUp() {
-    cache = new WindmillStateCache();
-    keyCache = cache.forComputation(COMPUTATION).forKey(KEY, STATE_FAMILY, 0L);
-    assertEquals(0, cache.getWeight());
-  }
-
-  @Test
-  public void testBasic() throws Exception {
-    assertNull(keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
-    assertNull(keyCache.get(windowNamespace(0), new TestStateTag("tag2")));
-    assertNull(keyCache.get(triggerNamespace(0, 0), new TestStateTag("tag3")));
-    assertNull(keyCache.get(triggerNamespace(0, 0), new TestStateTag("tag2")));
-
-    keyCache.put(StateNamespaces.global(), new TestStateTag("tag1"), new TestState("g1"), 2);
-    assertEquals(121, cache.getWeight());
-    keyCache.put(windowNamespace(0), new TestStateTag("tag2"), new TestState("w2"), 2);
-    assertEquals(242, cache.getWeight());
-    keyCache.put(triggerNamespace(0, 0), new TestStateTag("tag3"), new TestState("t3"), 2);
-    assertEquals(260, cache.getWeight());
-    keyCache.put(triggerNamespace(0, 0), new TestStateTag("tag2"), new TestState("t2"), 2);
-    assertEquals(278, cache.getWeight());
-
-    assertEquals(
-        new TestState("g1"), keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
-    assertEquals(new TestState("w2"), keyCache.get(windowNamespace(0), new TestStateTag("tag2")));
-    assertEquals(
-        new TestState("t3"), keyCache.get(triggerNamespace(0, 0), new TestStateTag("tag3")));
-    assertEquals(
-        new TestState("t2"), keyCache.get(triggerNamespace(0, 0), new TestStateTag("tag2")));
-  }
-
-  /**
-   * Verifies that values are cached in the appropriate namespaces.
-   */
-  @Test
-  public void testInvalidation() throws Exception {
-    assertNull(keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
-    keyCache.put(StateNamespaces.global(), new TestStateTag("tag1"), new TestState("g1"), 2);
-    assertEquals(121, cache.getWeight());
-    assertEquals(
-        new TestState("g1"), keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
-
-    keyCache = cache.forComputation(COMPUTATION).forKey(KEY, STATE_FAMILY, 1L);
-    assertEquals(121, cache.getWeight());
-    assertNull(keyCache.get(StateNamespaces.global(), new TestStateTag("tag1")));
-    assertEquals(0, cache.getWeight());
-  }
-
-  /**
-   * Verifies that the cache is invalidated when the cache token changes.
-   */
-  @Test
-  public void testEviction() throws Exception {
-    keyCache.put(windowNamespace(0), new TestStateTag("tag2"), new TestState("w2"), 2);
-    assertEquals(121, cache.getWeight());
-    keyCache.put(triggerNamespace(0, 0), new TestStateTag("tag3"), new TestState("t3"), 2000000000);
-    assertEquals(0, cache.getWeight());
-    // Eviction is atomic across the whole window.
-    assertNull(keyCache.get(windowNamespace(0), new TestStateTag("tag2")));
-    assertNull(keyCache.get(triggerNamespace(0, 0), new TestStateTag("tag3")));
-  }
-
-  /**
-   * Verifies that caches are kept independently per-key.
-   */
-  @Test
-  public void testMultipleKeys() throws Exception {
-    WindmillStateCache.ForKey keyCache1 = cache.forComputation("comp1").forKey(
-        ByteString.copyFromUtf8("key1"), STATE_FAMILY, 0L);
-    WindmillStateCache.ForKey keyCache2 = cache.forComputation("comp1").forKey(
-        ByteString.copyFromUtf8("key2"), STATE_FAMILY, 0L);
-    WindmillStateCache.ForKey keyCache3 = cache.forComputation("comp2").forKey(
-        ByteString.copyFromUtf8("key1"), STATE_FAMILY, 0L);
-
-    keyCache1.put(StateNamespaces.global(), new TestStateTag("tag1"), new TestState("g1"), 2);
-    assertEquals(
-        new TestState("g1"), keyCache1.get(StateNamespaces.global(), new TestStateTag("tag1")));
-    assertNull(keyCache2.get(StateNamespaces.global(), new TestStateTag("tag1")));
-    assertNull(keyCache3.get(StateNamespaces.global(), new TestStateTag("tag1")));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
deleted file mode 100644
index a279b80acf781..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternalsTest.java
+++ /dev/null
@@ -1,963 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.DataflowMatchers.ByteStringMatcher.byteStringEq;
-import static com.google.cloud.dataflow.sdk.testing.SystemNanoTimeSleeper.sleepMillis;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Matchers.eq;
-import static org.mockito.Mockito.never;
-import static org.mockito.Mockito.when;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagList;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagValue;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.util.state.BagState;
-import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
-import com.google.cloud.dataflow.sdk.util.state.StateContents;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaceForTest;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.cloud.dataflow.sdk.util.state.StateTags;
-import com.google.cloud.dataflow.sdk.util.state.ValueState;
-import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
-import com.google.common.base.Supplier;
-import com.google.common.collect.ImmutableList;
-import com.google.common.util.concurrent.Futures;
-import com.google.common.util.concurrent.SettableFuture;
-import com.google.protobuf.ByteString;
-
-import org.hamcrest.Matchers;
-import org.joda.time.Instant;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.ArgumentCaptor;
-import org.mockito.Mock;
-import org.mockito.Mockito;
-import org.mockito.MockitoAnnotations;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.concurrent.TimeUnit;
-
-/**
- * Tests for {@link WindmillStateInternals}.
- */
-@RunWith(JUnit4.class)
-public class WindmillStateInternalsTest {
-  private static final StateNamespace NAMESPACE = new StateNamespaceForTest("ns");
-  private static final String STATE_FAMILY = "family";
-
-  private static final StateTag<CombiningValueState<Integer, Integer>> COMBINING_ADDR =
-      StateTags.combiningValueFromInputInternal(
-          "combining", VarIntCoder.of(), new Sum.SumIntegerFn());
-  private static final ByteString COMBINING_KEY = key(NAMESPACE, "combining");
-  private final Coder<int[]> accumCoder =
-      new Sum.SumIntegerFn().getAccumulatorCoder(null, VarIntCoder.of());
-
-  @Mock
-  private WindmillStateReader mockReader;
-
-  private WindmillStateInternals underTest;
-  private WindmillStateCache cache;
-
-  @Mock
-  private Supplier<StateSampler.ScopedState> readStateSupplier;
-
-  private static ByteString key(StateNamespace namespace, String addrId) {
-    return ByteString.copyFromUtf8(namespace.stringKey() + "+u" + addrId);
-  }
-
-  private static ByteString systemKey(StateNamespace namespace, String addrId) {
-    return ByteString.copyFromUtf8(namespace.stringKey() + "+s" + addrId);
-  }
-
-  @Before
-  public void setUp() {
-    MockitoAnnotations.initMocks(this);
-    cache = new WindmillStateCache();
-    underTest = new WindmillStateInternals(STATE_FAMILY, mockReader,
-        cache.forComputation("comp").forKey(ByteString.EMPTY, STATE_FAMILY, 17L),
-        readStateSupplier);
-  }
-
-  private <T> void waitAndSet(final SettableFuture<T> future, final T value, final long millis) {
-    new Thread(new Runnable() {
-      @Override
-      public void run() {
-        try {
-          sleepMillis(millis);
-        } catch (InterruptedException e) {
-          throw new RuntimeException("Interrupted before setting", e);
-        }
-        future.set(value);
-      }
-    }).run();
-  }
-
-  private WindmillStateReader.WeightedList<String> weightedList(String... elems) {
-    WindmillStateReader.WeightedList<String> result =
-        new WindmillStateReader.WeightedList<String>(new ArrayList<String>(elems.length));
-    for (String elem : elems) {
-      result.addWeighted(elem, elem.length());
-    }
-    return result;
-  }
-
-  @Test
-  public void testBagAddBeforeRead() throws Exception {
-    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
-    BagState<String> bag = underTest.state(NAMESPACE, addr);
-
-    SettableFuture<Iterable<String>> future = SettableFuture.create();
-    when(mockReader.listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of()))
-        .thenReturn(future);
-
-    StateContents<Iterable<String>> result = bag.get();
-
-    bag.add("hello");
-    waitAndSet(future, Arrays.asList("world"), 200);
-    assertThat(result.read(), Matchers.containsInAnyOrder("hello", "world"));
-
-    bag.add("goodbye");
-    assertThat(result.read(), Matchers.containsInAnyOrder("hello", "world", "goodbye"));
-  }
-
-  @Test
-  public void testBagClearBeforeRead() throws Exception {
-    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
-    BagState<String> bag = underTest.state(NAMESPACE, addr);
-
-    bag.clear();
-    bag.add("hello");
-    assertThat(bag.get().read(), Matchers.containsInAnyOrder("hello"));
-
-    // Shouldn't need to read from windmill for this.
-    Mockito.verifyZeroInteractions(mockReader);
-  }
-
-  @Test
-  public void testBagIsEmptyFalse() throws Exception {
-    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
-    BagState<String> bag = underTest.state(NAMESPACE, addr);
-
-    SettableFuture<Iterable<String>> future = SettableFuture.create();
-    when(mockReader.listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of()))
-        .thenReturn(future);
-    StateContents<Boolean> result = bag.isEmpty();
-    Mockito.verify(mockReader)
-        .listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of());
-
-    waitAndSet(future, Arrays.asList("world"), 200);
-    assertThat(result.read(), Matchers.is(false));
-  }
-
-  @Test
-  public void testBagIsEmptyTrue() throws Exception {
-    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
-    BagState<String> bag = underTest.state(NAMESPACE, addr);
-
-    SettableFuture<Iterable<String>> future = SettableFuture.create();
-    when(mockReader.listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of()))
-        .thenReturn(future);
-    StateContents<Boolean> result = bag.isEmpty();
-    Mockito.verify(mockReader)
-        .listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of());
-
-    waitAndSet(future, Arrays.<String>asList(), 200);
-    assertThat(result.read(), Matchers.is(true));
-  }
-
-  @Test
-  public void testBagIsEmptyAfterClear() throws Exception {
-    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
-    BagState<String> bag = underTest.state(NAMESPACE, addr);
-
-    bag.clear();
-    StateContents<Boolean> result = bag.isEmpty();
-    Mockito.verify(mockReader, never())
-        .listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of());
-    assertThat(result.read(), Matchers.is(true));
-
-    bag.add("hello");
-    assertThat(result.read(), Matchers.is(false));
-  }
-
-  @Test
-  public void testBagAddPersist() throws Exception {
-    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
-    BagState<String> bag = underTest.state(NAMESPACE, addr);
-
-    bag.add("hello");
-
-    Windmill.WorkItemCommitRequest.Builder commitBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.persist(commitBuilder);
-
-    assertEquals(1, commitBuilder.getListUpdatesCount());
-
-    TagList listUpdates = commitBuilder.getListUpdates(0);
-    assertEquals(key(NAMESPACE, "bag"), listUpdates.getTag());
-    assertEquals(1, listUpdates.getValuesCount());
-    assertEquals("hello", listUpdates.getValues(0).getData().substring(1).toStringUtf8());
-
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testBagClearPersist() throws Exception {
-    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
-    BagState<String> bag = underTest.state(NAMESPACE, addr);
-
-    bag.add("hello");
-    bag.clear();
-    bag.add("world");
-
-    Windmill.WorkItemCommitRequest.Builder commitBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.persist(commitBuilder);
-
-    assertEquals(2, commitBuilder.getListUpdatesCount());
-
-    TagList listClear = commitBuilder.getListUpdates(0);
-    assertEquals(key(NAMESPACE, "bag"), listClear.getTag());
-    assertEquals(Long.MAX_VALUE, listClear.getEndTimestamp());
-    assertEquals(0, listClear.getValuesCount());
-
-    TagList listUpdates = commitBuilder.getListUpdates(1);
-    assertEquals(key(NAMESPACE, "bag"), listUpdates.getTag());
-    assertEquals(1, listUpdates.getValuesCount());
-    assertEquals("world", listUpdates.getValues(0).getData().substring(1).toStringUtf8());
-
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testBagPersistEmpty() throws Exception {
-    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
-    BagState<String> bag = underTest.state(NAMESPACE, addr);
-
-    bag.clear();
-
-    Windmill.WorkItemCommitRequest.Builder commitBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.persist(commitBuilder);
-
-    // 1 list update = the clear
-    assertEquals(1, commitBuilder.getListUpdatesCount());
-  }
-
-  @Test
-  public void testCombiningAddBeforeRead() throws Exception {
-    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
-
-    SettableFuture<Iterable<int[]>> future = SettableFuture.create();
-    when(mockReader.listFuture(COMBINING_KEY, STATE_FAMILY, accumCoder))
-        .thenReturn(future);
-
-    StateContents<Integer> result = value.get();
-
-    value.add(5);
-    value.add(6);
-    waitAndSet(future, Arrays.asList(new int[] {8}, new int[] {10}), 200);
-    assertThat(result.read(), Matchers.equalTo(29));
-
-    // That get "compressed" the combiner. So, the underlying future should change:
-    future.set(Arrays.asList(new int[] {29}));
-
-    value.add(2);
-    assertThat(result.read(), Matchers.equalTo(31));
-  }
-
-  @Test
-  public void testCombiningClearBeforeRead() throws Exception {
-    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
-
-    value.clear();
-
-    StateContents<Integer> result = value.get();
-    value.add(5);
-    value.add(6);
-    assertThat(result.read(), Matchers.equalTo(11));
-
-    value.add(2);
-    assertThat(result.read(), Matchers.equalTo(13));
-
-    // Shouldn't need to read from windmill for this because we immediately cleared..
-    Mockito.verifyZeroInteractions(mockReader);
-  }
-
-  @Test
-  public void testCombiningIsEmpty() throws Exception {
-    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
-
-    SettableFuture<Iterable<int[]>> future = SettableFuture.create();
-    when(mockReader.listFuture(COMBINING_KEY, STATE_FAMILY, accumCoder))
-        .thenReturn(future);
-    StateContents<Boolean> result = value.isEmpty();
-    ArgumentCaptor<ByteString> byteString = ArgumentCaptor.forClass(ByteString.class);
-    Mockito.verify(mockReader).listFuture(byteString.capture(), eq(STATE_FAMILY), eq(accumCoder));
-    assertThat(byteString.getValue(), byteStringEq(COMBINING_KEY));
-
-    waitAndSet(future, Arrays.asList(new int[] {29}), 200);
-    assertThat(result.read(), Matchers.is(false));
-  }
-
-  @Test
-  public void testCombiningIsEmptyAfterClear() throws Exception {
-    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
-
-    value.clear();
-    StateContents<Boolean> result = value.isEmpty();
-    Mockito.verify(mockReader, never())
-        .listFuture(COMBINING_KEY, STATE_FAMILY, accumCoder);
-    assertThat(result.read(), Matchers.is(true));
-
-    value.add(87);
-    assertThat(result.read(), Matchers.is(false));
-  }
-
-  @Test
-  public void testCombiningAddPersist() throws Exception {
-    disableCompactOnWrite();
-
-    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
-
-    value.add(5);
-    value.add(6);
-
-    Windmill.WorkItemCommitRequest.Builder commitBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.persist(commitBuilder);
-
-    assertEquals(1, commitBuilder.getListUpdatesCount());
-
-    TagList listUpdates = commitBuilder.getListUpdates(0);
-    assertEquals(COMBINING_KEY, listUpdates.getTag());
-    assertEquals(1, listUpdates.getValuesCount());
-    assertEquals(
-        11,
-        CoderUtils.decodeFromByteArray(
-            accumCoder, listUpdates.getValues(0).getData().substring(1).toByteArray())[0]);
-
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testCombiningAddPersistWithCompact() throws Exception {
-    forceCompactOnWrite();
-
-    Mockito.stub(
-            mockReader.listFuture(
-                org.mockito.Matchers.<ByteString>any(),
-                org.mockito.Matchers.<String>any(),
-                org.mockito.Matchers.<Coder<int[]>>any()))
-        .toReturn(
-            Futures.<Iterable<int[]>>immediateFuture(
-                ImmutableList.of(new int[] {40}, new int[] {60})));
-
-    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
-
-    value.add(5);
-    value.add(6);
-
-    Windmill.WorkItemCommitRequest.Builder commitBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.persist(commitBuilder);
-
-    assertEquals(2, commitBuilder.getListUpdatesCount());
-    assertEquals(0, commitBuilder.getListUpdates(0).getValuesCount());
-
-    TagList listUpdates = commitBuilder.getListUpdates(1);
-    assertEquals(COMBINING_KEY, listUpdates.getTag());
-    assertEquals(1, listUpdates.getValuesCount());
-    assertEquals(
-        111,
-        CoderUtils.decodeFromByteArray(
-                accumCoder, listUpdates.getValues(0).getData().substring(1).toByteArray())[
-            0]);
-  }
-
-  @Test
-  public void testCombiningClearPersist() throws Exception {
-    disableCompactOnWrite();
-
-    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
-
-    value.clear();
-    value.add(5);
-    value.add(6);
-
-    Windmill.WorkItemCommitRequest.Builder commitBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.persist(commitBuilder);
-
-    assertEquals(2, commitBuilder.getListUpdatesCount());
-
-    TagList listClear = commitBuilder.getListUpdates(0);
-    assertEquals(COMBINING_KEY, listClear.getTag());
-    assertEquals(Long.MAX_VALUE, listClear.getEndTimestamp());
-    assertEquals(0, listClear.getValuesCount());
-
-    TagList listUpdates = commitBuilder.getListUpdates(1);
-    assertEquals(COMBINING_KEY, listUpdates.getTag());
-    assertEquals(1, listUpdates.getValuesCount());
-    assertEquals(
-        11,
-        CoderUtils.decodeFromByteArray(
-            accumCoder, listUpdates.getValues(0).getData().substring(1).toByteArray())[0]);
-
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testWatermarkAddBeforeReadEarliest() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-
-    SettableFuture<Instant> future = SettableFuture.create();
-    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY)).thenReturn(future);
-
-    StateContents<Instant> result = bag.get();
-
-    bag.add(new Instant(3000));
-    waitAndSet(future, new Instant(2000), 200);
-    assertThat(result.read(), Matchers.equalTo(new Instant(2000)));
-
-    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
-    Mockito.verifyNoMoreInteractions(mockReader);
-
-    // Adding another value doesn't create another future, but does update the result.
-    bag.add(new Instant(1000));
-    assertThat(result.read(), Matchers.equalTo(new Instant(1000)));
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testWatermarkAddBeforeReadLatest() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtLatestInputTimestamp());
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-
-    SettableFuture<Instant> future = SettableFuture.create();
-    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY)).thenReturn(future);
-
-    StateContents<Instant> result = bag.get();
-
-    bag.add(new Instant(3000));
-    waitAndSet(future, new Instant(2000), 200);
-    assertThat(result.read(), Matchers.equalTo(new Instant(3000)));
-
-    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
-    Mockito.verifyNoMoreInteractions(mockReader);
-
-    // Adding another value doesn't create another future, but does update the result.
-    bag.add(new Instant(3000));
-    assertThat(result.read(), Matchers.equalTo(new Instant(3000)));
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testWatermarkAddBeforeReadEndOfWindow() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtEndOfWindow());
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-
-    SettableFuture<Instant> future = SettableFuture.create();
-    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY)).thenReturn(future);
-
-    StateContents<Instant> result = bag.get();
-
-    bag.add(new Instant(3000));
-    waitAndSet(future, new Instant(3000), 200);
-    assertThat(result.read(), Matchers.equalTo(new Instant(3000)));
-
-    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
-    Mockito.verifyNoMoreInteractions(mockReader);
-
-    // Adding another value doesn't create another future, but does update the result.
-    bag.add(new Instant(3000));
-    assertThat(result.read(), Matchers.equalTo(new Instant(3000)));
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testWatermarkClearBeforeRead() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
-
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-
-    bag.clear();
-    assertThat(bag.get().read(), Matchers.nullValue());
-
-    bag.add(new Instant(300));
-    assertThat(bag.get().read(), Matchers.equalTo(new Instant(300)));
-
-    // Shouldn't need to read from windmill because the value is already available.
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testWatermarkPersistEarliest() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-
-    bag.add(new Instant(1000));
-    bag.add(new Instant(2000));
-
-    Windmill.WorkItemCommitRequest.Builder commitBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.persist(commitBuilder);
-
-    assertEquals(1, commitBuilder.getWatermarkHoldsCount());
-
-    Windmill.WatermarkHold watermarkHold = commitBuilder.getWatermarkHolds(0);
-    assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
-    assertEquals(TimeUnit.MILLISECONDS.toMicros(1000), watermarkHold.getTimestamps(0));
-
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testWatermarkPersistLatestEmpty() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtLatestInputTimestamp());
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-
-    bag.add(new Instant(1000));
-    bag.add(new Instant(2000));
-
-    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY))
-        .thenReturn(Futures.<Instant>immediateFuture(null));
-
-    Windmill.WorkItemCommitRequest.Builder commitBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.persist(commitBuilder);
-
-    assertEquals(1, commitBuilder.getWatermarkHoldsCount());
-
-    Windmill.WatermarkHold watermarkHold = commitBuilder.getWatermarkHolds(0);
-    assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
-    assertEquals(TimeUnit.MILLISECONDS.toMicros(2000), watermarkHold.getTimestamps(0));
-
-    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testWatermarkPersistLatestWindmillWins() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtLatestInputTimestamp());
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-
-    bag.add(new Instant(1000));
-    bag.add(new Instant(2000));
-
-    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY))
-        .thenReturn(Futures.<Instant>immediateFuture(new Instant(4000)));
-
-    Windmill.WorkItemCommitRequest.Builder commitBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.persist(commitBuilder);
-
-    assertEquals(1, commitBuilder.getWatermarkHoldsCount());
-
-    Windmill.WatermarkHold watermarkHold = commitBuilder.getWatermarkHolds(0);
-    assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
-    assertEquals(TimeUnit.MILLISECONDS.toMicros(4000), watermarkHold.getTimestamps(0));
-
-    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testWatermarkPersistLatestLocalAdditionsWin() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtLatestInputTimestamp());
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-
-    bag.add(new Instant(1000));
-    bag.add(new Instant(2000));
-
-    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY))
-        .thenReturn(Futures.<Instant>immediateFuture(new Instant(500)));
-
-    Windmill.WorkItemCommitRequest.Builder commitBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.persist(commitBuilder);
-
-    assertEquals(1, commitBuilder.getWatermarkHoldsCount());
-
-    Windmill.WatermarkHold watermarkHold = commitBuilder.getWatermarkHolds(0);
-    assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
-    assertEquals(TimeUnit.MILLISECONDS.toMicros(2000), watermarkHold.getTimestamps(0));
-
-    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testWatermarkPersistEndOfWindow() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtEndOfWindow());
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-
-    bag.add(new Instant(2000));
-    bag.add(new Instant(2000));
-
-    Windmill.WorkItemCommitRequest.Builder commitBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.persist(commitBuilder);
-
-    assertEquals(1, commitBuilder.getWatermarkHoldsCount());
-
-    Windmill.WatermarkHold watermarkHold = commitBuilder.getWatermarkHolds(0);
-    assertEquals(key(NAMESPACE, "watermark"), watermarkHold.getTag());
-    assertEquals(TimeUnit.MILLISECONDS.toMicros(2000), watermarkHold.getTimestamps(0));
-
-    // Blind adds should not need to read the future.
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testWatermarkClearPersist() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-
-    bag.add(new Instant(500));
-    bag.clear();
-    bag.add(new Instant(1000));
-    bag.add(new Instant(2000));
-
-    Windmill.WorkItemCommitRequest.Builder commitBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.persist(commitBuilder);
-
-    assertEquals(1, commitBuilder.getWatermarkHoldsCount());
-
-    Windmill.WatermarkHold clearAndUpdate = commitBuilder.getWatermarkHolds(0);
-    assertEquals(key(NAMESPACE, "watermark"), clearAndUpdate.getTag());
-    assertEquals(1, clearAndUpdate.getTimestampsCount());
-    assertEquals(TimeUnit.MILLISECONDS.toMicros(1000), clearAndUpdate.getTimestamps(0));
-
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testWatermarkPersistEmpty() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-
-    bag.add(new Instant(500));
-    bag.clear();
-
-    Windmill.WorkItemCommitRequest.Builder commitBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.persist(commitBuilder);
-
-    // 1 list update corresponds to deletion. There shouldn't be a list update adding items.
-    assertEquals(1, commitBuilder.getWatermarkHoldsCount());
-  }
-
-  @Test
-  public void testValueSetBeforeRead() throws Exception {
-    StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
-    ValueState<String> value = underTest.state(NAMESPACE, addr);
-
-    value.set("Hello");
-
-    assertEquals("Hello", value.get().read());
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testValueClearBeforeRead() throws Exception {
-    StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
-    ValueState<String> value = underTest.state(NAMESPACE, addr);
-
-    value.clear();
-
-    assertEquals(null, value.get().read());
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testValueRead() throws Exception {
-    StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
-    ValueState<String> value = underTest.state(NAMESPACE, addr);
-
-    SettableFuture<String> future = SettableFuture.create();
-    when(mockReader.valueFuture(key(NAMESPACE, "value"), STATE_FAMILY, StringUtf8Coder.of()))
-        .thenReturn(future);
-    waitAndSet(future, "World", 200);
-
-    assertEquals("World", value.get().read());
-  }
-
-  @Test
-  public void testValueSetPersist() throws Exception {
-    StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
-    ValueState<String> value = underTest.state(NAMESPACE, addr);
-
-    value.set("Hi");
-
-    Windmill.WorkItemCommitRequest.Builder commitBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.persist(commitBuilder);
-
-    assertEquals(1, commitBuilder.getValueUpdatesCount());
-    TagValue valueUpdate = commitBuilder.getValueUpdates(0);
-    assertEquals(key(NAMESPACE, "value"), valueUpdate.getTag());
-    assertEquals("Hi", valueUpdate.getValue().getData().toStringUtf8());
-    assertTrue(valueUpdate.isInitialized());
-
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testValueClearPersist() throws Exception {
-    StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
-    ValueState<String> value = underTest.state(NAMESPACE, addr);
-
-    value.set("Hi");
-    value.clear();
-
-    Windmill.WorkItemCommitRequest.Builder commitBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.persist(commitBuilder);
-
-    assertEquals(1, commitBuilder.getValueUpdatesCount());
-    TagValue valueUpdate = commitBuilder.getValueUpdates(0);
-    assertEquals(key(NAMESPACE, "value"), valueUpdate.getTag());
-    assertEquals(0, valueUpdate.getValue().getData().size());
-
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testValueNoChangePersist() throws Exception {
-    StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
-    underTest.state(NAMESPACE, addr);
-
-    Windmill.WorkItemCommitRequest.Builder commitBuilder =
-        Windmill.WorkItemCommitRequest.newBuilder();
-    underTest.persist(commitBuilder);
-
-    assertEquals(0, commitBuilder.getValueUpdatesCount());
-
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testCachedValue() throws Exception {
-    StateTag<ValueState<String>> addr = StateTags.value("value", StringUtf8Coder.of());
-    ValueState<String> value = underTest.state(NAMESPACE, addr);
-
-    assertEquals(0, cache.getWeight());
-
-    value.set("Hi");
-    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
-
-    assertEquals(118, cache.getWeight());
-
-    value = underTest.state(NAMESPACE, addr);
-    assertEquals("Hi", value.get().read());
-    value.clear();
-    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
-
-    assertEquals(116, cache.getWeight());
-
-    value = underTest.state(NAMESPACE, addr);
-    assertEquals(null, value.get().read());
-
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testCachedBag() throws Exception {
-    StateTag<BagState<String>> addr = StateTags.bag("bag", StringUtf8Coder.of());
-    BagState<String> bag = underTest.state(NAMESPACE, addr);
-
-    assertEquals(0, cache.getWeight());
-
-    SettableFuture<Iterable<String>> future = SettableFuture.create();
-    when(mockReader.listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of()))
-        .thenReturn(future);
-
-    StateContents<Iterable<String>> result = bag.get();
-
-    assertEquals(0, cache.getWeight());
-
-    bag.add("hello");
-    waitAndSet(future, weightedList("world"), 200);
-    assertThat(result.read(), Matchers.containsInAnyOrder("hello", "world"));
-
-    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
-
-    assertEquals(126, cache.getWeight());
-
-    bag = underTest.state(NAMESPACE, addr);
-    bag.add("goodbye");
-    assertThat(bag.get().read(), Matchers.containsInAnyOrder("hello", "world", "goodbye"));
-    bag.clear();
-    bag.add("new");
-
-    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
-
-    assertEquals(119, cache.getWeight());
-
-    bag = underTest.state(NAMESPACE, addr);
-    bag.add("new2");
-    assertThat(bag.get().read(), Matchers.containsInAnyOrder("new", "new2"));
-    bag.clear();
-    bag.add("new3");
-
-    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
-
-    assertEquals(120, cache.getWeight());
-
-    bag = underTest.state(NAMESPACE, addr);
-    assertThat(bag.get().read(), Matchers.containsInAnyOrder("new3"));
-
-    Mockito.verify(mockReader)
-        .listFuture(key(NAMESPACE, "bag"), STATE_FAMILY, StringUtf8Coder.of());
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testCachedWatermarkHold() throws Exception {
-    StateTag<WatermarkStateInternal> addr = StateTags.watermarkStateInternal(
-        "watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
-    WatermarkStateInternal bag = underTest.state(NAMESPACE, addr);
-
-    SettableFuture<Instant> future = SettableFuture.create();
-    when(mockReader.watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY)).thenReturn(future);
-
-    assertEquals(0, cache.getWeight());
-
-    StateContents<Instant> result = bag.get();
-
-    bag.add(new Instant(3000));
-    waitAndSet(future, new Instant(2000), 200);
-    assertThat(result.read(), Matchers.equalTo(new Instant(2000)));
-
-    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
-
-    assertEquals(124, cache.getWeight());
-
-    bag = underTest.state(NAMESPACE, addr);
-    assertThat(bag.get().read(), Matchers.equalTo(new Instant(2000)));
-    bag.clear();
-
-    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
-
-    assertEquals(124, cache.getWeight());
-
-    bag = underTest.state(NAMESPACE, addr);
-    assertEquals(null, bag.get().read());
-
-    Mockito.verify(mockReader).watermarkFuture(key(NAMESPACE, "watermark"), STATE_FAMILY);
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testCachedCombining() throws Exception {
-    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE, COMBINING_ADDR);
-
-    SettableFuture<Iterable<int[]>> future = SettableFuture.create();
-    when(mockReader.listFuture(key(NAMESPACE, "combining"), STATE_FAMILY, accumCoder))
-        .thenReturn(future);
-
-    assertEquals(0, cache.getWeight());
-
-    StateContents<Integer> result = value.get();
-
-    value.add(1);
-    waitAndSet(future, Arrays.asList(new int[]{2}), 200);
-    assertThat(result.read(), Matchers.equalTo(3));
-
-    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
-
-    assertEquals(117, cache.getWeight());
-
-    value = underTest.state(NAMESPACE, COMBINING_ADDR);
-    assertThat(value.get().read(), Matchers.equalTo(3));
-    value.add(3);
-    assertThat(value.get().read(), Matchers.equalTo(6));
-    value.clear();
-
-    underTest.persist(Windmill.WorkItemCommitRequest.newBuilder());
-
-    assertEquals(116, cache.getWeight());
-
-    value = underTest.state(NAMESPACE, COMBINING_ADDR);
-    assertThat(value.get().read(), Matchers.equalTo(0));
-
-    Mockito.verify(mockReader)
-        .listFuture(key(NAMESPACE, "combining"), STATE_FAMILY, accumCoder);
-    Mockito.verifyNoMoreInteractions(mockReader);
-  }
-
-  @Test
-  public void testSystemTags() throws Exception {
-    CombiningValueState<Integer, Integer> value =
-        underTest.state(NAMESPACE, StateTags.makeSystemTagInternal(COMBINING_ADDR));
-
-    SettableFuture<Iterable<int[]>> future = SettableFuture.create();
-    when(mockReader.listFuture(systemKey(NAMESPACE, "combining"), STATE_FAMILY, accumCoder))
-        .thenReturn(future);
-    value.isEmpty();  // Ignore result, we just want to generate the future.
-    ArgumentCaptor<ByteString> byteString = ArgumentCaptor.forClass(ByteString.class);
-    Mockito.verify(mockReader).listFuture(byteString.capture(), eq(STATE_FAMILY), eq(accumCoder));
-    assertThat(byteString.getValue(), byteStringEq(systemKey(NAMESPACE, "combining")));
-  }
-
-  private void disableCompactOnWrite() {
-    WindmillStateInternals.COMPACT_NOW.set(
-        new Supplier<Boolean>() {
-          @Override
-          public Boolean get() {
-            return false;
-          }
-        });
-  }
-
-  private void forceCompactOnWrite() {
-    WindmillStateInternals.COMPACT_NOW.set(
-        new Supplier<Boolean>() {
-          @Override
-          public Boolean get() {
-            return true;
-          }
-        });
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java
deleted file mode 100644
index bef0877f44975..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReaderTest.java
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.KeyedGetDataRequest;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.protobuf.ByteString;
-import com.google.protobuf.ByteString.Output;
-
-import org.hamcrest.Matchers;
-import org.joda.time.Instant;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.ArgumentCaptor;
-import org.mockito.Mock;
-import org.mockito.Mockito;
-import org.mockito.MockitoAnnotations;
-
-import java.io.IOException;
-import java.util.concurrent.Future;
-
-/**
- * Tests for {@link WindmillStateReader}.
- */
-@RunWith(JUnit4.class)
-public class WindmillStateReaderTest {
-  private static final VarIntCoder INT_CODER = VarIntCoder.of();
-
-  private static final String COMPUTATION = "computation";
-  private static final ByteString DATA_KEY = ByteString.copyFromUtf8("DATA_KEY");
-  private static final long WORK_TOKEN = 5043L;
-  private static final ByteString CONT_TOKEN = ByteString.copyFromUtf8("CONT_TOKEN");
-
-  private static final ByteString STATE_KEY_1 = ByteString.copyFromUtf8("key1");
-  private static final ByteString STATE_KEY_2 = ByteString.copyFromUtf8("key2");
-  private static final String STATE_FAMILY = "family";
-
-  @Mock
-  private MetricTrackingWindmillServerStub mockWindmill;
-
-  private WindmillStateReader underTest;
-
-  @Before
-  public void setUp() {
-    MockitoAnnotations.initMocks(this);
-
-    underTest = new WindmillStateReader(mockWindmill, COMPUTATION, DATA_KEY, WORK_TOKEN);
-  }
-
-  private Windmill.Value intValue(int value, boolean padded) throws IOException {
-    Output output = ByteString.newOutput();
-
-    if (padded) {
-      byte[] zero = {0x0};
-      output.write(zero);
-    }
-    INT_CODER.encode(value, output, Coder.Context.OUTER);
-
-    return Windmill.Value.newBuilder()
-        .setData(output.toByteString())
-        .setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(
-            BoundedWindow.TIMESTAMP_MAX_VALUE))
-        .build();
-  }
-
-  @Test
-  public void testReadList() throws Exception {
-    Future<Iterable<Integer>> future = underTest.listFuture(STATE_KEY_1, STATE_FAMILY, INT_CODER);
-    Mockito.verifyNoMoreInteractions(mockWindmill);
-
-    Windmill.GetDataRequest.Builder expectedRequest = Windmill.GetDataRequest.newBuilder();
-    expectedRequest.addRequestsBuilder()
-        .setComputationId(COMPUTATION)
-        .addRequestsBuilder()
-        .setKey(DATA_KEY)
-        .setWorkToken(WORK_TOKEN)
-        .addListsToFetch(
-            Windmill.TagList.newBuilder()
-                .setTag(STATE_KEY_1)
-                .setStateFamily(STATE_FAMILY)
-                .setEndTimestamp(Long.MAX_VALUE)
-                .setFetchMaxBytes(WindmillStateReader.MAX_LIST_BYTES));
-
-    Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
-    response.addDataBuilder()
-        .setComputationId(COMPUTATION)
-        .addDataBuilder()
-        .setKey(DATA_KEY)
-        .addLists(
-            Windmill.TagList.newBuilder()
-                .setTag(STATE_KEY_1)
-                .setStateFamily(STATE_FAMILY)
-                .addValues(intValue(5, true))
-                .addValues(intValue(6, true)));
-
-    Mockito.when(mockWindmill.getStateData(expectedRequest.build())).thenReturn(response.build());
-
-    Iterable<Integer> results = future.get();
-    Mockito.verify(mockWindmill).getStateData(expectedRequest.build());
-    for (Integer unused : results) {
-      // Iterate over the results to force loading all the pages.
-    }
-    Mockito.verifyNoMoreInteractions(mockWindmill);
-
-    assertThat(results, Matchers.contains(5, 6));
-  }
-
-  @Test
-  public void testReadListWithContinuations() throws Exception {
-    Future<Iterable<Integer>> future = underTest.listFuture(STATE_KEY_1, STATE_FAMILY, INT_CODER);
-    Mockito.verifyNoMoreInteractions(mockWindmill);
-
-    Windmill.GetDataRequest.Builder expectedRequest1 = Windmill.GetDataRequest.newBuilder();
-    expectedRequest1.addRequestsBuilder()
-        .setComputationId(COMPUTATION)
-        .addRequestsBuilder()
-        .setKey(DATA_KEY)
-        .setWorkToken(WORK_TOKEN)
-        .addListsToFetch(
-            Windmill.TagList.newBuilder()
-                .setTag(STATE_KEY_1)
-                .setStateFamily(STATE_FAMILY)
-                .setEndTimestamp(Long.MAX_VALUE)
-                .setFetchMaxBytes(WindmillStateReader.MAX_LIST_BYTES));
-
-    Windmill.GetDataResponse.Builder response1 = Windmill.GetDataResponse.newBuilder();
-    response1.addDataBuilder()
-        .setComputationId(COMPUTATION)
-        .addDataBuilder()
-        .setKey(DATA_KEY)
-        .addLists(
-            Windmill.TagList.newBuilder()
-                .setTag(STATE_KEY_1)
-                .setStateFamily(STATE_FAMILY)
-                .setContinuationToken(CONT_TOKEN)
-                .addValues(intValue(5, true))
-                .addValues(intValue(6, true)));
-
-    Windmill.GetDataRequest.Builder expectedRequest2 = Windmill.GetDataRequest.newBuilder();
-    expectedRequest2.addRequestsBuilder()
-        .setComputationId(COMPUTATION)
-        .addRequestsBuilder()
-        .setKey(DATA_KEY)
-        .setWorkToken(WORK_TOKEN)
-        .addListsToFetch(
-            Windmill.TagList.newBuilder()
-                .setTag(STATE_KEY_1)
-                .setStateFamily(STATE_FAMILY)
-                .setEndTimestamp(Long.MAX_VALUE)
-                .setFetchMaxBytes(WindmillStateReader.MAX_LIST_BYTES)
-                .setRequestToken(CONT_TOKEN));
-
-    Windmill.GetDataResponse.Builder response2 = Windmill.GetDataResponse.newBuilder();
-    response2.addDataBuilder()
-        .setComputationId(COMPUTATION)
-        .addDataBuilder()
-        .setKey(DATA_KEY)
-        .addLists(
-            Windmill.TagList.newBuilder()
-                .setTag(STATE_KEY_1)
-                .setStateFamily(STATE_FAMILY)
-                .setRequestToken(CONT_TOKEN)
-                .addValues(intValue(7, true))
-                .addValues(intValue(8, true)));
-
-    Mockito.when(mockWindmill.getStateData(expectedRequest1.build())).thenReturn(response1.build());
-    Mockito.when(mockWindmill.getStateData(expectedRequest2.build())).thenReturn(response2.build());
-
-    Iterable<Integer> results = future.get();
-    Mockito.verify(mockWindmill).getStateData(expectedRequest1.build());
-    for (Integer unused : results) {
-      // Iterate over the results to force loading all the pages.
-    }
-    Mockito.verify(mockWindmill).getStateData(expectedRequest2.build());
-    Mockito.verifyNoMoreInteractions(mockWindmill);
-
-    assertThat(results, Matchers.contains(5, 6, 7, 8));
-  }
-
-  @Test
-  public void testReadValue() throws Exception {
-    Future<Integer> future = underTest.valueFuture(STATE_KEY_1, STATE_FAMILY, INT_CODER);
-    Mockito.verifyNoMoreInteractions(mockWindmill);
-
-    Windmill.GetDataRequest.Builder expectedRequest = Windmill.GetDataRequest.newBuilder();
-    expectedRequest.addRequestsBuilder()
-        .setComputationId(COMPUTATION)
-        .addRequestsBuilder()
-        .setKey(DATA_KEY)
-        .setWorkToken(WORK_TOKEN)
-        .addValuesToFetch(
-            Windmill.TagValue.newBuilder()
-                .setTag(STATE_KEY_1)
-                .setStateFamily(STATE_FAMILY)
-                .build());
-    Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
-    response.addDataBuilder()
-        .setComputationId(COMPUTATION)
-        .addDataBuilder()
-        .setKey(DATA_KEY)
-        .addValues(
-            Windmill.TagValue.newBuilder()
-                .setTag(STATE_KEY_1)
-                .setStateFamily(STATE_FAMILY)
-                .setValue(intValue(8, false)));
-
-    Mockito.when(mockWindmill.getStateData(expectedRequest.build())).thenReturn(response.build());
-
-    Integer result = future.get();
-    Mockito.verify(mockWindmill).getStateData(expectedRequest.build());
-    Mockito.verifyNoMoreInteractions(mockWindmill);
-
-    assertThat(result, Matchers.equalTo(8));
-  }
-
-  @Test
-  public void testReadWatermark() throws Exception {
-    Future<Instant> future = underTest.watermarkFuture(STATE_KEY_1, STATE_FAMILY);
-    Mockito.verifyNoMoreInteractions(mockWindmill);
-
-    Windmill.GetDataRequest.Builder expectedRequest = Windmill.GetDataRequest.newBuilder();
-    expectedRequest.addRequestsBuilder()
-        .setComputationId(COMPUTATION)
-        .addRequestsBuilder()
-        .setKey(DATA_KEY)
-        .setWorkToken(WORK_TOKEN)
-        .addWatermarkHoldsToFetch(
-            Windmill.WatermarkHold.newBuilder().setTag(STATE_KEY_1).setStateFamily(STATE_FAMILY));
-
-    Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
-    response.addDataBuilder()
-        .setComputationId(COMPUTATION)
-        .addDataBuilder()
-        .setKey(DATA_KEY)
-        .addWatermarkHolds(
-            Windmill.WatermarkHold.newBuilder()
-                .setTag(STATE_KEY_1)
-                .setStateFamily(STATE_FAMILY)
-                .addTimestamps(5000000)
-                .addTimestamps(6000000));
-
-    Mockito.when(mockWindmill.getStateData(expectedRequest.build())).thenReturn(response.build());
-
-    Instant result = future.get();
-    Mockito.verify(mockWindmill).getStateData(expectedRequest.build());
-
-    assertThat(result, Matchers.equalTo(new Instant(5000)));
-  }
-
-  @Test
-  public void testBatching() throws Exception {
-    // Reads two lists and verifies that we batch them up correctly.
-    Future<Instant> watermarkFuture = underTest.watermarkFuture(STATE_KEY_2, STATE_FAMILY);
-    Future<Iterable<Integer>> listFuture =
-        underTest.listFuture(STATE_KEY_1, STATE_FAMILY, INT_CODER);
-
-    Mockito.verifyNoMoreInteractions(mockWindmill);
-
-    ArgumentCaptor<Windmill.GetDataRequest> request =
-        ArgumentCaptor.forClass(Windmill.GetDataRequest.class);
-
-    Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
-    response.addDataBuilder()
-        .setComputationId(COMPUTATION)
-        .addDataBuilder()
-        .setKey(DATA_KEY)
-        .addWatermarkHolds(
-            Windmill.WatermarkHold.newBuilder()
-                .setTag(STATE_KEY_2)
-                .setStateFamily(STATE_FAMILY)
-                .addTimestamps(5000000)
-                .addTimestamps(6000000))
-        .addLists(
-            Windmill.TagList.newBuilder()
-                .setTag(STATE_KEY_1)
-                .setStateFamily(STATE_FAMILY)
-                .addValues(intValue(5, true))
-                .addValues(intValue(100, true)));
-
-    Mockito.when(mockWindmill.getStateData(Mockito.isA(Windmill.GetDataRequest.class)))
-        .thenReturn(response.build());
-    Instant result = watermarkFuture.get();
-    Mockito.verify(mockWindmill).getStateData(request.capture());
-
-    // Verify the request looks right.
-    assertThat(request.getValue().getRequestsCount(), Matchers.equalTo(1));
-    assertThat(request.getValue().getRequests(0).getComputationId(), Matchers.equalTo(COMPUTATION));
-    assertThat(request.getValue().getRequests(0).getRequestsCount(), Matchers.equalTo(1));
-    KeyedGetDataRequest keyedRequest = request.getValue().getRequests(0).getRequests(0);
-    assertThat(keyedRequest.getKey(), Matchers.equalTo(DATA_KEY));
-    assertThat(keyedRequest.getWorkToken(), Matchers.equalTo(WORK_TOKEN));
-    assertThat(keyedRequest.getListsToFetchCount(), Matchers.equalTo(1));
-    assertThat(keyedRequest.getListsToFetch(0).getEndTimestamp(), Matchers.equalTo(Long.MAX_VALUE));
-    assertThat(keyedRequest.getListsToFetch(0).getTag(), Matchers.equalTo(STATE_KEY_1));
-    assertThat(keyedRequest.getWatermarkHoldsToFetchCount(), Matchers.equalTo(1));
-    assertThat(keyedRequest.getWatermarkHoldsToFetch(0).getTag(), Matchers.equalTo(STATE_KEY_2));
-
-    // Verify the values returned to the user.
-    assertThat(result, Matchers.equalTo(new Instant(5000)));
-    Mockito.verifyNoMoreInteractions(mockWindmill);
-
-    assertThat(listFuture.get(), Matchers.contains(5, 100));
-    Mockito.verifyNoMoreInteractions(mockWindmill);
-
-    // And verify that getting a future again returns the already completed future.
-    Future<Instant> watermarkFuture2 = underTest.watermarkFuture(STATE_KEY_2, STATE_FAMILY);
-    assertTrue(watermarkFuture2.isDone());
-  }
-
-  @Test
-  public void testNoStateFamily() throws Exception {
-    Future<Integer> future = underTest.valueFuture(STATE_KEY_1, "", INT_CODER);
-    Mockito.verifyNoMoreInteractions(mockWindmill);
-
-    Windmill.GetDataRequest.Builder expectedRequest = Windmill.GetDataRequest.newBuilder();
-    expectedRequest.addRequestsBuilder()
-        .setComputationId(COMPUTATION)
-        .addRequestsBuilder()
-        .setKey(DATA_KEY)
-        .setWorkToken(WORK_TOKEN)
-        .addValuesToFetch(
-            Windmill.TagValue.newBuilder().setTag(STATE_KEY_1).setStateFamily("").build());
-    Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
-    response.addDataBuilder()
-        .setComputationId(COMPUTATION)
-        .addDataBuilder()
-        .setKey(DATA_KEY)
-        .addValues(
-            Windmill.TagValue.newBuilder()
-                .setTag(STATE_KEY_1)
-                .setStateFamily("")
-                .setValue(intValue(8, false)));
-
-    Mockito.when(mockWindmill.getStateData(expectedRequest.build())).thenReturn(response.build());
-
-    Integer result = future.get();
-    Mockito.verify(mockWindmill).getStateData(expectedRequest.build());
-    Mockito.verifyNoMoreInteractions(mockWindmill);
-
-    assertThat(result, Matchers.equalTo(8));
-  }
-
-  @Test
-  public void testKeyTokenInvalid() throws Exception {
-    // Reads two lists and verifies that we batch them up correctly.
-    Future<Instant> watermarkFuture = underTest.watermarkFuture(STATE_KEY_2, STATE_FAMILY);
-    Future<Iterable<Integer>> listFuture =
-        underTest.listFuture(STATE_KEY_1, STATE_FAMILY, INT_CODER);
-
-    Mockito.verifyNoMoreInteractions(mockWindmill);
-
-    Windmill.GetDataResponse.Builder response = Windmill.GetDataResponse.newBuilder();
-    response.addDataBuilder()
-        .setComputationId(COMPUTATION)
-        .addDataBuilder()
-        .setKey(DATA_KEY)
-        .setFailed(true);
-
-    Mockito.when(mockWindmill.getStateData(Mockito.isA(Windmill.GetDataRequest.class)))
-        .thenReturn(response.build());
-
-    try {
-      watermarkFuture.get();
-      fail("Expected KeyTokenInvalidException");
-    } catch (Throwable e) {
-      assertTrue(KeyTokenInvalidException.isKeyTokenInvalidException(e));
-    }
-
-    try {
-      listFuture.get();
-      fail("Expected KeyTokenInvalidException");
-    } catch (Throwable e) {
-      assertTrue(KeyTokenInvalidException.isKeyTokenInvalidException(e));
-    }
-  }
-
-  /**
-   * Tests that multiple reads for the same tag in the same batch are cached. We can't compare
-   * the futures since we've wrapped the delegate aronud them, so we just verify there is only
-   * one queued lookup.
-   */
-  @Test
-  public void testCachingWithinBatch() throws Exception {
-    underTest.watermarkFuture(STATE_KEY_1, STATE_FAMILY);
-    underTest.watermarkFuture(STATE_KEY_1, STATE_FAMILY);
-    assertEquals(1, underTest.pendingLookups.size());
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSourcesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSourcesTest.java
deleted file mode 100644
index 92d8a86b80239..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSourcesTest.java
+++ /dev/null
@@ -1,749 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.api.client.util.Base64.decodeBase64;
-import static com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources.serializeToCloudSource;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtFraction;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.dictionaryToCloudSource;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
-import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.readFromSource;
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
-import static com.google.cloud.dataflow.sdk.util.SerializableUtils.deserializeFromByteArray;
-import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
-import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
-import static com.google.cloud.dataflow.sdk.util.Structs.getStrings;
-import static com.google.cloud.dataflow.sdk.util.WindowedValue.valueInGlobalWindow;
-import static com.google.common.base.Throwables.getStackTraceAsString;
-import static org.hamcrest.MatcherAssert.assertThat;
-import static org.hamcrest.Matchers.allOf;
-import static org.hamcrest.Matchers.contains;
-import static org.hamcrest.Matchers.containsString;
-import static org.hamcrest.Matchers.lessThan;
-import static org.hamcrest.Matchers.lessThanOrEqualTo;
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-import static org.junit.internal.matchers.ThrowableMessageMatcher.hasMessage;
-
-import com.google.api.services.dataflow.model.DataflowPackage;
-import com.google.api.services.dataflow.model.DerivedSource;
-import com.google.api.services.dataflow.model.Job;
-import com.google.api.services.dataflow.model.SourceOperationResponse;
-import com.google.api.services.dataflow.model.SourceSplitOptions;
-import com.google.api.services.dataflow.model.SourceSplitRequest;
-import com.google.api.services.dataflow.model.SourceSplitResponse;
-import com.google.api.services.dataflow.model.Step;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.io.BoundedSource;
-import com.google.cloud.dataflow.sdk.io.CountingSource;
-import com.google.cloud.dataflow.sdk.io.Read;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
-import com.google.cloud.dataflow.sdk.runners.dataflow.TestCountingSource;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Sample;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.base.Preconditions;
-import com.google.protobuf.ByteString;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.NoSuchElementException;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.TimeUnit;
-
-import javax.annotation.Nullable;
-
-/**
- * Tests for {@link WorkerCustomSources}.
- */
-@RunWith(JUnit4.class)
-public class WorkerCustomSourcesTest {
-  @Rule public ExpectedException expectedException = ExpectedException.none();
-  @Rule public ExpectedLogs logged = ExpectedLogs.none(WorkerCustomSources.class);
-
-  static class TestIO {
-    public static Read fromRange(int from, int to) {
-      return new Read(from, to, false);
-    }
-
-    static class Read extends BoundedSource<Integer> {
-      final int from;
-      final int to;
-      final boolean produceTimestamps;
-
-      Read(int from, int to, boolean produceTimestamps) {
-        this.from = from;
-        this.to = to;
-        this.produceTimestamps = produceTimestamps;
-      }
-
-      public Read withTimestampsMillis() {
-        return new Read(from, to, true);
-      }
-
-      @Override
-      public List<Read> splitIntoBundles(long desiredBundleSizeBytes, PipelineOptions options)
-          throws Exception {
-        List<Read> res = new ArrayList<>();
-        DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
-        float step = 1.0f * (to - from) / dataflowOptions.getNumWorkers();
-        for (int i = 0; i < dataflowOptions.getNumWorkers(); ++i) {
-          res.add(new Read(
-              Math.round(from + i * step), Math.round(from + (i + 1) * step),
-              produceTimestamps));
-        }
-        return res;
-      }
-
-      @Override
-      public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
-        return 8 * (to - from);
-      }
-
-      @Override
-      public boolean producesSortedKeys(PipelineOptions options) throws Exception {
-        return true;
-      }
-
-      @Override
-      public BoundedReader<Integer> createReader(PipelineOptions options) throws IOException {
-        return new RangeReader(this);
-      }
-
-      @Override
-      public void validate() {}
-
-      @Override
-      public String toString() {
-        return "[" + from + ", " + to + ")";
-      }
-
-      @Override
-      public Coder<Integer> getDefaultOutputCoder() {
-        return BigEndianIntegerCoder.of();
-      }
-
-      private static class RangeReader extends BoundedReader<Integer> {
-        // To verify that BasicSerializableSourceFormat calls our methods according to protocol.
-        enum State {
-          UNSTARTED,
-          STARTED,
-          FINISHED
-        }
-        private Read source;
-        private int current = -1;
-        private State state = State.UNSTARTED;
-
-        public RangeReader(Read source) {
-          this.source = source;
-        }
-
-        @Override
-        public boolean start() throws IOException {
-          Preconditions.checkState(state == State.UNSTARTED);
-          state = State.STARTED;
-          current = source.from;
-          return (current < source.to);
-        }
-
-        @Override
-        public boolean advance() throws IOException {
-          Preconditions.checkState(state == State.STARTED);
-          if (current == source.to - 1) {
-            state = State.FINISHED;
-            return false;
-          }
-          current++;
-          return true;
-        }
-
-        @Override
-        public Integer getCurrent() {
-          Preconditions.checkState(state == State.STARTED);
-          return current;
-        }
-
-        @Override
-        public Instant getCurrentTimestamp() {
-          return source.produceTimestamps
-              ? new Instant(current /* as millis */) : BoundedWindow.TIMESTAMP_MIN_VALUE;
-        }
-
-        @Override
-        public void close() throws IOException {
-          Preconditions.checkState(state == State.STARTED || state == State.FINISHED);
-          state = State.FINISHED;
-        }
-
-        @Override
-        public Read getCurrentSource() {
-          return source;
-        }
-
-        @Override
-        public Read splitAtFraction(double fraction) {
-          int proposedIndex = (int) (source.from + fraction * (source.to - source.from));
-          if (proposedIndex <= current) {
-            return null;
-          }
-          Read primary = new Read(source.from, proposedIndex, source.produceTimestamps);
-          Read residual = new Read(proposedIndex, source.to, source.produceTimestamps);
-          this.source = primary;
-          return residual;
-        }
-
-        @Override
-        public Double getFractionConsumed() {
-          return (current == -1)
-              ? 0.0
-              : (1.0 * (1 + current - source.from) / (source.to - source.from));
-        }
-      }
-    }
-  }
-
-  @Test
-  public void testSplitAndReadBundlesBack() throws Exception {
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    options.setNumWorkers(5);
-    com.google.api.services.dataflow.model.Source source =
-        translateIOToCloudSource(TestIO.fromRange(10, 20), options);
-    List<WindowedValue<Integer>> elems = CloudSourceUtils.readElemsFromSource(options, source);
-    assertEquals(10, elems.size());
-    for (int i = 0; i < 10; ++i) {
-      assertEquals(valueInGlobalWindow(10 + i), elems.get(i));
-    }
-    SourceSplitResponse response = performSplit(source, options, null /*desiredBundleSizeBytes*/);
-    assertEquals("SOURCE_SPLIT_OUTCOME_SPLITTING_HAPPENED", response.getOutcome());
-    List<DerivedSource> bundles = response.getBundles();
-    assertEquals(5, bundles.size());
-    for (int i = 0; i < 5; ++i) {
-      DerivedSource bundle = bundles.get(i);
-      assertEquals("SOURCE_DERIVATION_MODE_INDEPENDENT", bundle.getDerivationMode());
-      com.google.api.services.dataflow.model.Source bundleSource = bundle.getSource();
-      assertTrue(bundleSource.getDoesNotNeedSplitting());
-      bundleSource.setCodec(source.getCodec());
-      List<WindowedValue<Integer>> xs = CloudSourceUtils.readElemsFromSource(options, bundleSource);
-      assertThat(xs, contains(valueInGlobalWindow(10 + 2 * i), valueInGlobalWindow(11 + 2 * i)));
-    }
-  }
-
-  @Test
-  public void testDirectPipelineWithoutTimestamps() throws Exception {
-    Pipeline p = TestPipeline.create();
-    PCollection<Integer> sum = p
-        .apply(Read.from(TestIO.fromRange(10, 20)))
-        .apply(Sum.integersGlobally())
-        .apply(Sample.<Integer>any(1));
-
-    DataflowAssert.thatSingleton(sum).isEqualTo(145);
-    p.run();
-  }
-
-  @Test
-  public void testDirectPipelineWithTimestamps() throws Exception {
-    Pipeline p = TestPipeline.create();
-    PCollection<Integer> sums =
-        p.apply(Read.from(TestIO.fromRange(10, 20).withTimestampsMillis()))
-         .apply(Window.<Integer>into(FixedWindows.of(Duration.millis(3))))
-         .apply(Sum.integersGlobally().withoutDefaults());
-    // Should group into [10 11] [12 13 14] [15 16 17] [18 19].
-    DataflowAssert.that(sums).containsInAnyOrder(21, 37, 39, 48);
-    p.run();
-  }
-
-  @Test
-  public void testRangeProgressAndSplitAtFraction() throws Exception {
-    // Show basic usage of getFractionConsumed and splitAtFraction.
-    // This test only tests TestIO itself, not BasicSerializableSourceFormat.
-
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    TestIO.Read source = TestIO.fromRange(10, 20);
-    try (BoundedSource.BoundedReader<Integer> reader = source.createReader(options)) {
-      assertEquals(0, reader.getFractionConsumed().intValue());
-      assertTrue(reader.start());
-      assertEquals(0.1, reader.getFractionConsumed(), 1e-6);
-      assertTrue(reader.advance());
-      assertEquals(0.2, reader.getFractionConsumed(), 1e-6);
-      // Already past 0.0 and 0.1.
-      assertNull(reader.splitAtFraction(0.0));
-      assertNull(reader.splitAtFraction(0.1));
-
-      {
-        TestIO.Read residual = (TestIO.Read) reader.splitAtFraction(0.5);
-        assertNotNull(residual);
-        TestIO.Read primary = (TestIO.Read) reader.getCurrentSource();
-        assertThat(readFromSource(primary, options), contains(10, 11, 12, 13, 14));
-        assertThat(readFromSource(residual, options), contains(15, 16, 17, 18, 19));
-      }
-
-      // Range is now [10, 15) and we are at 12.
-      {
-        TestIO.Read residual = (TestIO.Read) reader.splitAtFraction(0.8); // give up 14.
-        assertNotNull(residual);
-        TestIO.Read primary = (TestIO.Read) reader.getCurrentSource();
-        assertThat(readFromSource(primary, options), contains(10, 11, 12, 13));
-        assertThat(readFromSource(residual, options), contains(14));
-      }
-
-      assertTrue(reader.advance());
-      assertEquals(12, reader.getCurrent().intValue());
-      assertTrue(reader.advance());
-      assertEquals(13, reader.getCurrent().intValue());
-      assertFalse(reader.advance());
-    }
-  }
-
-  @Test
-  @SuppressWarnings("unchecked")
-  public void testProgressAndSourceSplitTranslation() throws Exception {
-    // Same as previous test, but now using BasicSerializableSourceFormat wrappers.
-    // We know that the underlying reader behaves correctly (because of the previous test),
-    // now check that we are wrapping it correctly.
-    DataflowPipelineOptions options = PipelineOptionsFactory.create()
-        .as(DataflowPipelineOptions.class);
-    NativeReader<WindowedValue<Integer>> reader =
-        (NativeReader<WindowedValue<Integer>>)
-            ReaderRegistry.defaultRegistry()
-                .create(
-                    translateIOToCloudSource(TestIO.fromRange(10, 20), options),
-                    options,
-                    null, // executionContext
-                    null, // addCounterMutator
-                    null); // operationName
-    try (NativeReader.NativeReaderIterator<WindowedValue<Integer>> iterator = reader.iterator()) {
-      assertTrue(iterator.start());
-      assertEquals(
-          0.1,
-          readerProgressToCloudProgress(iterator.getProgress()).getFractionConsumed().doubleValue(),
-          1e-6);
-      assertEquals(valueInGlobalWindow(10), iterator.getCurrent());
-      assertEquals(
-          0.1,
-          readerProgressToCloudProgress(iterator.getProgress()).getFractionConsumed().doubleValue(),
-          1e-6);
-      assertTrue(iterator.advance());
-      assertEquals(valueInGlobalWindow(11), iterator.getCurrent());
-      assertEquals(
-          0.2,
-          readerProgressToCloudProgress(iterator.getProgress()).getFractionConsumed().doubleValue(),
-          1e-6);
-      assertTrue(iterator.advance());
-      assertEquals(valueInGlobalWindow(12), iterator.getCurrent());
-
-      assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(0)));
-      assertNull(iterator.requestDynamicSplit(splitRequestAtFraction(0.1f)));
-      WorkerCustomSources.BoundedSourceSplit<Integer> sourceSplit =
-          (WorkerCustomSources.BoundedSourceSplit<Integer>)
-              iterator.requestDynamicSplit(splitRequestAtFraction(0.5f));
-      assertNotNull(sourceSplit);
-      assertThat(readFromSource(sourceSplit.primary, options), contains(10, 11, 12, 13, 14));
-      assertThat(readFromSource(sourceSplit.residual, options), contains(15, 16, 17, 18, 19));
-
-      sourceSplit =
-          (WorkerCustomSources.BoundedSourceSplit<Integer>)
-              iterator.requestDynamicSplit(splitRequestAtFraction(0.8f));
-      assertNotNull(sourceSplit);
-      assertThat(readFromSource(sourceSplit.primary, options), contains(10, 11, 12, 13));
-      assertThat(readFromSource(sourceSplit.residual, options), contains(14));
-
-      assertTrue(iterator.advance());
-      assertEquals(valueInGlobalWindow(13), iterator.getCurrent());
-      assertFalse(iterator.advance());
-    }
-  }
-
-  /**
-   * A source that cannot do anything. Intended to be overridden for testing of individual methods.
-   */
-  private static class MockSource extends BoundedSource<Integer> {
-    @Override
-    public List<? extends BoundedSource<Integer>> splitIntoBundles(
-        long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
-      return Arrays.asList(this);
-    }
-
-    @Override
-    public void validate() { }
-
-    @Override
-    public boolean producesSortedKeys(PipelineOptions options) {
-      return false;
-    }
-
-    @Override
-    public long getEstimatedSizeBytes(PipelineOptions options) {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public BoundedReader<Integer> createReader(PipelineOptions options) throws IOException {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public String toString() {
-      return "<unknown>";
-    }
-
-    @Override
-    public Coder<Integer> getDefaultOutputCoder() {
-      return BigEndianIntegerCoder.of();
-    }
-  }
-
-  private static class SourceProducingInvalidSplits extends MockSource {
-    private String description;
-    private String errorMessage;
-
-    private SourceProducingInvalidSplits(String description, String errorMessage) {
-      this.description = description;
-      this.errorMessage = errorMessage;
-    }
-
-    @Override
-    public List<? extends BoundedSource<Integer>> splitIntoBundles(
-        long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
-      Preconditions.checkState(errorMessage == null, "Unexpected invalid source");
-      return Arrays.asList(
-          new SourceProducingInvalidSplits("goodBundle", null),
-          new SourceProducingInvalidSplits("badBundle", "intentionally invalid"));
-    }
-
-    @Override
-    public void validate() {
-      Preconditions.checkState(errorMessage == null, errorMessage);
-    }
-
-    @Override
-    public String toString() {
-      return description;
-    }
-  }
-
-  @Test
-  public void testSplittingProducedInvalidSource() throws Exception {
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    com.google.api.services.dataflow.model.Source cloudSource =
-        translateIOToCloudSource(new SourceProducingInvalidSplits("original", null), options);
-
-    expectedException.expect(IllegalArgumentException.class);
-    expectedException.expectMessage(
-        allOf(
-            containsString("Splitting a valid source produced an invalid source"),
-            containsString("original"),
-            containsString("badBundle")));
-    expectedException.expectCause(hasMessage(containsString("intentionally invalid")));
-    performSplit(cloudSource, options, null /*desiredBundleSizeBytes*/);
-  }
-
-  private static class FailingReader extends BoundedSource.BoundedReader<Integer> {
-    private BoundedSource<Integer> source;
-
-    private FailingReader(BoundedSource<Integer> source) {
-      this.source = source;
-    }
-
-    @Override
-    public BoundedSource<Integer> getCurrentSource() {
-      return source;
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      throw new IOException("Intentional error");
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      throw new IllegalStateException("Should have failed in start()");
-    }
-
-    @Override
-    public Integer getCurrent() throws NoSuchElementException {
-      throw new IllegalStateException("Should have failed in start()");
-    }
-
-    @Override
-    public Instant getCurrentTimestamp() throws NoSuchElementException {
-      throw new IllegalStateException("Should have failed in start()");
-    }
-
-    @Override
-    public void close() throws IOException {}
-
-    @Override
-    public Double getFractionConsumed() {
-      return null;
-    }
-
-    @Override
-    public BoundedSource<Integer> splitAtFraction(double fraction) {
-      return null;
-    }
-  }
-
-  private static class SourceProducingFailingReader extends MockSource {
-    @Override
-    public BoundedReader<Integer> createReader(PipelineOptions options) throws IOException {
-      return new FailingReader(this);
-    }
-
-    @Override
-    public String toString() {
-      return "Some description";
-    }
-  }
-
-  @Test
-  public void testFailureToStartReadingIncludesSourceDetails() throws Exception {
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    com.google.api.services.dataflow.model.Source source =
-        translateIOToCloudSource(new SourceProducingFailingReader(), options);
-    // Unfortunately Hamcrest doesn't have a matcher that can match on the exception's
-    // printStackTrace(), however we just want to verify that the error and source description
-    // would be contained in the exception *somewhere*, not necessarily in the top-level
-    // Exception object. So instead we use Throwables.getStackTraceAsString and match on that.
-    try {
-      CloudSourceUtils.readElemsFromSource(options, source);
-      fail("Expected to fail");
-    } catch (Exception e) {
-      assertThat(
-          getStackTraceAsString(e),
-          allOf(containsString("Intentional error"), containsString("Some description")));
-    }
-  }
-
-  private static com.google.api.services.dataflow.model.Source translateIOToCloudSource(
-      BoundedSource<?> io, DataflowPipelineOptions options) throws Exception {
-    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
-    Pipeline p = Pipeline.create(options);
-    p.begin().apply(Read.from(io));
-
-    Job workflow = translator.translate(p, new ArrayList<DataflowPackage>()).getJob();
-    Step step = workflow.getSteps().get(0);
-
-    return stepToCloudSource(step);
-  }
-
-  private static com.google.api.services.dataflow.model.Source stepToCloudSource(Step step)
-      throws Exception {
-    com.google.api.services.dataflow.model.Source res = dictionaryToCloudSource(
-        getDictionary(step.getProperties(), PropertyNames.SOURCE_STEP_INPUT));
-    // Encoding is specified in the step, not in the source itself.  This is
-    // normal: incoming Dataflow API Source objects in map tasks will have the
-    // encoding filled in from the step's output encoding.
-    @SuppressWarnings("unchecked")
-    List<Map<String, Object>> outputInfo =
-        (List<Map<String, Object>>) step.getProperties().get(PropertyNames.OUTPUT_INFO);
-
-    CloudObject encoding = CloudObject.fromSpec(getObject(outputInfo.get(0),
-        PropertyNames.ENCODING));
-    res.setCodec(encoding);
-    return res;
-  }
-
-  private static SourceSplitResponse performSplit(
-      com.google.api.services.dataflow.model.Source source,
-      PipelineOptions options,
-      @Nullable Long desiredBundleSizeBytes)
-          throws Exception {
-    SourceSplitRequest splitRequest = new SourceSplitRequest();
-    splitRequest.setSource(source);
-    if (desiredBundleSizeBytes != null) {
-      splitRequest.setOptions(
-          new SourceSplitOptions().setDesiredBundleSizeBytes(desiredBundleSizeBytes));
-    }
-    SourceOperationResponse response = WorkerCustomSources.performSplit(splitRequest, options);
-    return response.getSplit();
-  }
-
-  @Test
-  public void testUnboundedSplits() throws Exception {
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    com.google.api.services.dataflow.model.Source source =
-        serializeToCloudSource(new TestCountingSource(Integer.MAX_VALUE), options);
-    List<String> serializedSplits =
-        getStrings(source.getSpec(), WorkerCustomSources.SERIALIZED_SOURCE_SPLITS, null);
-    assertEquals(20, serializedSplits.size());
-    for (String serializedSplit : serializedSplits) {
-      assertTrue(
-          deserializeFromByteArray(decodeBase64(serializedSplit), "source")
-              instanceof TestCountingSource);
-    }
-  }
-
-  @Test
-  public void testReadUnboundedReader() throws Exception {
-    StreamingModeExecutionContext context = new StreamingModeExecutionContext("stageName",
-        new ConcurrentHashMap<ByteString, ReaderCacheEntry>(), /*stateNameMap=*/null,
-        /*stateCache=*/null);
-
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    options.setNumWorkers(5);
-
-    ByteString state = ByteString.EMPTY;
-    for (int i = 0; i < 10 * WorkerCustomSources.MAX_UNBOUNDED_BUNDLE_SIZE;
-         /* Incremented in inner loop */) {
-      // Initialize streaming context with state from previous iteration.
-      context.start(
-          Windmill.WorkItem.newBuilder()
-              .setKey(ByteString.copyFromUtf8("0000000000000001")) // key is zero-padded index.
-              .setWorkToken(0) // Required proto field, unused.
-              .setSourceState(
-                  Windmill.SourceState.newBuilder().setState(state).build()) // Source state.
-              .build(),
-          new Instant(0), // input watermark
-          null, // output watermark
-          null, // synchronized processing time
-          null, // StateReader
-          null, // StateFetcher
-          Windmill.WorkItemCommitRequest.newBuilder());
-
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      NativeReader<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>> reader =
-          (NativeReader)
-              WorkerCustomSources.create(
-                  (CloudObject)
-                      serializeToCloudSource(
-                              new TestCountingSource(Integer.MAX_VALUE), options)
-                          .getSpec(),
-                  options,
-                  context);
-
-      // Verify data.
-      Instant beforeReading = Instant.now();
-      int numReadOnThisIteration = 0;
-      for (WindowedValue<ValueWithRecordId<KV<Integer, Integer>>> value :
-          ReaderUtils.readAllFromReader(reader)) {
-        assertEquals(KV.of(0, i), value.getValue().getValue());
-        assertArrayEquals(
-            encodeToByteArray(KvCoder.of(VarIntCoder.of(), VarIntCoder.of()), KV.of(0, i)),
-            value.getValue().getId());
-        assertThat(value.getWindows(), contains((BoundedWindow) GlobalWindow.INSTANCE));
-        assertEquals(i, value.getTimestamp().getMillis());
-        i++;
-        numReadOnThisIteration++;
-      }
-      Instant afterReading = Instant.now();
-      assertThat(
-          new Duration(beforeReading, afterReading).getStandardSeconds(),
-          lessThanOrEqualTo(WorkerCustomSources.MAX_UNBOUNDED_BUNDLE_READ_TIME.getStandardSeconds()
-              + 1));
-      assertThat(
-          numReadOnThisIteration, lessThanOrEqualTo(WorkerCustomSources.MAX_UNBOUNDED_BUNDLE_SIZE));
-
-      // Extract and verify state modifications.
-      context.flushState();
-      state = context.getOutputBuilder().getSourceStateUpdates().getState();
-      // CountingSource's watermark is the last record + 1.  i is now one past the last record,
-      // so the expected watermark is i millis.
-      assertEquals(
-          TimeUnit.MILLISECONDS.toMicros(i), context.getOutputBuilder().getSourceWatermark());
-      assertEquals(
-          1,
-          context
-              .getOutputBuilder()
-              .getSourceStateUpdates()
-              .getFinalizeIdsList()
-              .size());
-
-      assertNotNull(context.getCachedReader());
-
-      Windmill.Counter backlog = getCounter(context, "dataflow_backlog_size-stageName");
-      assertEquals(7L, backlog.getIntScalar());
-      assertTrue(backlog.getCumulative());
-      assertEquals(Windmill.Counter.Kind.SUM, backlog.getKind());
-    }
-  }
-
-  private Windmill.Counter getCounter(StreamingModeExecutionContext context, String name) {
-    for (Windmill.Counter counter : context.getOutputBuilder().getCounterUpdatesList()) {
-      if (counter.getName().equals(name)) {
-        return counter;
-      }
-    }
-    return null;
-  }
-
-  @Test
-  public void testLargeSerializedSizeResplits() throws Exception {
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    // Figure out how many splits of CountingSource are needed to exceed the API limits, using an
-    // extra factor of 2 to ensure that we go over the limits.
-    BoundedSource<Long> justForSizing = CountingSource.upTo(1000000L);
-    long size =
-        DataflowApiUtils.computeSerializedSizeBytes(
-            translateIOToCloudSource(justForSizing, options));
-    long numberToSplitToExceedLimit =
-        2 * WorkerCustomSources.DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES / size;
-
-    // Generate a CountingSource and split it into the desired number of splits
-    // (desired size = 8 bytes, 1 long), triggering the re-split with a larger bundle size.
-    com.google.api.services.dataflow.model.Source source =
-        translateIOToCloudSource(CountingSource.upTo(numberToSplitToExceedLimit), options);
-    SourceSplitResponse split = performSplit(source, options, 8L);
-    logged.verifyWarn("too large for the Google Cloud Dataflow API");
-    logged.verifyWarn(String.format("%d bundles", numberToSplitToExceedLimit));
-    assertThat((long) split.getBundles().size(), lessThan(numberToSplitToExceedLimit));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReaderTest.java
deleted file mode 100644
index 93c16e92b5a31..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReaderTest.java
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import static com.google.api.client.util.Lists.newArrayList;
-import static org.hamcrest.Matchers.contains;
-import static org.hamcrest.Matchers.equalTo;
-import static org.junit.Assert.assertThat;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.verifyNoMoreInteractions;
-import static org.mockito.Mockito.when;
-
-import com.google.cloud.dataflow.sdk.runners.worker.ByteArrayShufflePosition;
-import com.google.cloud.dataflow.sdk.util.common.Reiterator;
-
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.Mock;
-import org.mockito.MockitoAnnotations;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-/** Unit tests for {@link BatchingShuffleEntryReader}. */
-@RunWith(JUnit4.class)
-public final class BatchingShuffleEntryReaderTest {
-  private static final byte[] KEY = {0xA};
-  private static final byte[] SKEY = {0xB};
-  private static final byte[] VALUE = {0xC};
-  private static final ShufflePosition START_POSITION =
-      ByteArrayShufflePosition.of("aaa".getBytes());
-  private static final ShufflePosition END_POSITION =
-      ByteArrayShufflePosition.of("zzz".getBytes());
-  private static final ShufflePosition NEXT_START_POSITION =
-      ByteArrayShufflePosition.of("next".getBytes());
-  private static final ShufflePosition SECOND_NEXT_START_POSITION =
-      ByteArrayShufflePosition.of("next-second".getBytes());
-
-  @Mock private ShuffleBatchReader batchReader;
-  private ShuffleEntryReader reader;
-
-  @Before
-  public void initMocksAndReader() {
-    MockitoAnnotations.initMocks(this);
-    reader = new BatchingShuffleEntryReader(batchReader);
-  }
-
-  @Test
-  public void readerCanRead() throws Exception {
-    ShuffleEntry e1 = new ShuffleEntry(KEY, SKEY, VALUE);
-    ShuffleEntry e2 = new ShuffleEntry(KEY, SKEY, VALUE);
-    ArrayList<ShuffleEntry> entries = new ArrayList<>();
-    entries.add(e1);
-    entries.add(e2);
-    when(batchReader.read(START_POSITION, END_POSITION))
-        .thenReturn(new ShuffleBatchReader.Batch(entries, null));
-    List<ShuffleEntry> results = newArrayList(reader.read(START_POSITION, END_POSITION));
-    assertThat(results, contains(e1, e2));
-  }
-
-  @Test
-  public void readerIteratorCanBeCopied() throws Exception {
-    ShuffleEntry e1 = new ShuffleEntry(KEY, SKEY, VALUE);
-    ShuffleEntry e2 = new ShuffleEntry(KEY, SKEY, VALUE);
-    ArrayList<ShuffleEntry> entries = new ArrayList<>();
-    entries.add(e1);
-    entries.add(e2);
-    when(batchReader.read(START_POSITION, END_POSITION))
-        .thenReturn(new ShuffleBatchReader.Batch(entries, null));
-    Reiterator<ShuffleEntry> it = reader.read(START_POSITION, END_POSITION);
-    assertThat(it.hasNext(), equalTo(Boolean.TRUE));
-    assertThat(it.next(), equalTo(e1));
-    Reiterator<ShuffleEntry> copy = it.copy();
-    assertThat(it.hasNext(), equalTo(Boolean.TRUE));
-    assertThat(it.next(), equalTo(e2));
-    assertThat(it.hasNext(), equalTo(Boolean.FALSE));
-    assertThat(copy.hasNext(), equalTo(Boolean.TRUE));
-    assertThat(copy.next(), equalTo(e2));
-    assertThat(copy.hasNext(), equalTo(Boolean.FALSE));
-  }
-
-  @Test
-  public void readerShouldMergeMultipleBatchResults() throws Exception {
-    ShuffleEntry e1 = new ShuffleEntry(KEY, SKEY, VALUE);
-    List<ShuffleEntry> e1s = Collections.singletonList(e1);
-    ShuffleEntry e2 = new ShuffleEntry(KEY, SKEY, VALUE);
-    List<ShuffleEntry> e2s = Collections.singletonList(e2);
-    when(batchReader.read(START_POSITION, END_POSITION))
-        .thenReturn(new ShuffleBatchReader.Batch(e1s, NEXT_START_POSITION));
-    when(batchReader.read(NEXT_START_POSITION, END_POSITION))
-        .thenReturn(new ShuffleBatchReader.Batch(e2s, null));
-    List<ShuffleEntry> results = newArrayList(reader.read(START_POSITION, END_POSITION));
-    assertThat(results, contains(e1, e2));
-
-    verify(batchReader).read(START_POSITION, END_POSITION);
-    verify(batchReader).read(NEXT_START_POSITION, END_POSITION);
-    verifyNoMoreInteractions(batchReader);
-  }
-
-  @Test
-  public void readerShouldMergeMultipleBatchResultsIncludingEmptyShards()
-      throws Exception {
-    List<ShuffleEntry> e1s = new ArrayList<>();
-    List<ShuffleEntry> e2s = new ArrayList<>();
-    ShuffleEntry e3 = new ShuffleEntry(KEY, SKEY, VALUE);
-    List<ShuffleEntry> e3s = Collections.singletonList(e3);
-    when(batchReader.read(START_POSITION, END_POSITION))
-        .thenReturn(new ShuffleBatchReader.Batch(e1s, NEXT_START_POSITION));
-    when(batchReader.read(NEXT_START_POSITION, END_POSITION))
-        .thenReturn(new ShuffleBatchReader.Batch(e2s, SECOND_NEXT_START_POSITION));
-    when(batchReader.read(SECOND_NEXT_START_POSITION, END_POSITION))
-        .thenReturn(new ShuffleBatchReader.Batch(e3s, null));
-    List<ShuffleEntry> results = newArrayList(reader.read(START_POSITION, END_POSITION));
-    assertThat(results, contains(e3));
-
-    verify(batchReader).read(START_POSITION, END_POSITION);
-    verify(batchReader).read(NEXT_START_POSITION, END_POSITION);
-    verify(batchReader).read(SECOND_NEXT_START_POSITION, END_POSITION);
-    verifyNoMoreInteractions(batchReader);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReaderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReaderTest.java
deleted file mode 100644
index afc8e385807f5..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReaderTest.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.notNullValue;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.fail;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.times;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.when;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.IOException;
-import java.util.ArrayList;
-
-/** Unit tests for {@link CachingShuffleBatchReader}. */
-@RunWith(JUnit4.class)
-public final class CachingShuffleBatchReaderTest {
-
-  private final ShuffleBatchReader.Batch testBatch =
-      new ShuffleBatchReader.Batch(new ArrayList<ShuffleEntry>(), null);
-
-  @Test
-  public void readerShouldCacheReads() throws IOException {
-    ShuffleBatchReader base = mock(ShuffleBatchReader.class);
-    CachingShuffleBatchReader reader = new CachingShuffleBatchReader(base);
-    when(base.read(null, null)).thenReturn(testBatch);
-    // N.B. We need to capture the result of reader.read() in order to ensure
-    // that there's a strong reference to it, preventing it from being
-    // collected.  Not that this should be an issue in tests, but it's good to
-    // be solid.
-    ShuffleBatchReader.Batch read = reader.read(null, null);
-    assertThat(read, equalTo(testBatch));
-    assertThat(reader.read(null, null), equalTo(testBatch));
-    assertThat(reader.read(null, null), equalTo(testBatch));
-    assertThat(reader.read(null, null), equalTo(testBatch));
-    assertThat(reader.read(null, null), equalTo(testBatch));
-    verify(base, times(1)).read(null, null);
-  }
-
-  @Test
-  public void readerShouldNotCacheExceptions() throws IOException {
-    ShuffleBatchReader base = mock(ShuffleBatchReader.class);
-    CachingShuffleBatchReader reader = new CachingShuffleBatchReader(base);
-    when(base.read(null, null))
-        .thenThrow(new IOException("test"))
-        .thenReturn(testBatch);
-    try {
-      reader.read(null, null);
-      fail("expected an IOException");
-    } catch (IOException e) {
-      // Nothing to do -- exception is expected.
-    }
-    assertThat(reader.read(null, null), equalTo(testBatch));
-    verify(base, times(2)).read(null, null);
-  }
-
-  @Test
-  public void readerShouldRereadClearedBatches() throws IOException {
-    ShuffleBatchReader base = mock(ShuffleBatchReader.class);
-    CachingShuffleBatchReader reader = new CachingShuffleBatchReader(base);
-    when(base.read(null, null)).thenReturn(testBatch);
-    ShuffleBatchReader.Batch read = reader.read(null, null);
-    assertThat(read, equalTo(testBatch));
-    verify(base, times(1)).read(null, null);
-    CachingShuffleBatchReader.BatchRange range =
-        new CachingShuffleBatchReader.BatchRange(null, null);
-    CachingShuffleBatchReader.RangeReadReference ref =
-        reader.cache.get(range);
-    assertThat(ref, notNullValue());
-    ref.clear();
-    read = reader.read(null, null);
-    assertThat(read, equalTo(testBatch));
-    verify(base, times(2)).read(null, null);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
deleted file mode 100644
index 18b703564a35c..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ExecutorTestUtils.java
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.ElementByteSizeObservableCoder;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.common.collect.ImmutableList;
-
-import org.junit.Assert;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.NoSuchElementException;
-import java.util.Observable;
-import java.util.Observer;
-
-/**
- * Utilities for tests.
- */
-@SuppressWarnings({"rawtypes", "unchecked"})
-public class ExecutorTestUtils {
-  // Do not instantiate.
-  private ExecutorTestUtils() {}
-
-  /** An Operation with a specified number of outputs. */
-  public static class TestOperation extends Operation {
-    public TestOperation(int numOutputs) {
-      this(numOutputs, new CounterSet());
-    }
-
-    TestOperation(int numOutputs, CounterSet counters) {
-      this(numOutputs, counters, "test-");
-    }
-
-    TestOperation(int numOutputs, CounterSet counters, String counterPrefix) {
-      this(numOutputs, counterPrefix, counters.getAddCounterMutator(),
-          new StateSampler(counterPrefix, counters.getAddCounterMutator()));
-    }
-
-    TestOperation(int numOutputs, String counterPrefix,
-        CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) {
-      super("TestOperation",
-          createOutputReceivers(numOutputs, counterPrefix, addCounterMutator),
-          counterPrefix, addCounterMutator, stateSampler);
-    }
-
-    private static OutputReceiver[] createOutputReceivers(int numOutputs, String counterPrefix,
-        CounterSet.AddCounterMutator addCounterMutator) {
-      OutputReceiver[] receivers = new OutputReceiver[numOutputs];
-      for (int i = 0; i < numOutputs; i++) {
-        receivers[i] = new TestOutputReceiver("out_" + i,
-            new ElementByteSizeObservableCoder(StringUtf8Coder.of()), addCounterMutator);
-      }
-      return receivers;
-    }
-  }
-
-
-  /** A {@code Reader<String>} that yields a specified set of values. */
-  public static class TestReader extends NativeReader<String> {
-    private final List<String> inputs;
-
-    public TestReader(String... inputs) {
-      this.inputs = Arrays.asList(inputs);
-    }
-
-    @Override
-    public NativeReaderIterator<String> iterator() {
-      return new TestReaderIterator(inputs);
-    }
-
-    class TestReaderIterator extends NativeReaderIterator<String> {
-      private final List<String> inputs;
-      private int currentIndex;
-      private boolean closed = false;
-
-      public TestReaderIterator(List<String> inputs) {
-        this.inputs = ImmutableList.copyOf(inputs);
-      }
-
-      @Override
-      public boolean start() {
-        if (inputs.isEmpty()) {
-          return false;
-        }
-        notifyElementRead(getCurrent().length());
-        return true;
-      }
-
-      @Override
-      public boolean advance() throws IOException {
-        if (currentIndex >= inputs.size() - 1) {
-          return false;
-        }
-        ++currentIndex;
-        notifyElementRead(getCurrent().length());
-        return true;
-      }
-
-      @Override
-      public String getCurrent() throws NoSuchElementException {
-        return inputs.get(currentIndex);
-      }
-
-      @Override
-      public void close() {
-        Assert.assertFalse(closed);
-        closed = true;
-      }
-    }
-  }
-
-  /**
-   * An Observer that stores all sizes into an ArrayList, to compare
-   * against the gold standard during testing.
-   */
-  public static class TestReaderObserver implements Observer {
-    private final List<Integer> sizes;
-
-    public TestReaderObserver(NativeReader reader) {
-      this(reader, new ArrayList<Integer>());
-    }
-
-    public TestReaderObserver(NativeReader reader, List<Integer> sizes) {
-      this.sizes = sizes;
-      reader.addObserver(this);
-    }
-
-    @Override
-    public void update(Observable obs, Object obj) {
-      sizes.add((int) (long) obj);
-    }
-
-    public List<Integer> getActualSizes() {
-      return sizes;
-    }
-  }
-
-  /** A {@code Sink<String>} that allows the output elements to be retrieved. */
-  public static class TestSink extends Sink<String> {
-    List<String> outputElems = new ArrayList<>();
-    boolean closed = false;
-
-    @Override
-    public SinkWriter<String> writer() {
-      return new TestSinkWriter();
-    }
-
-    class TestSinkWriter implements SinkWriter<String> {
-      @Override
-      public long add(String outputElem) {
-        outputElems.add(outputElem);
-        return outputElem.length();
-      }
-
-      @Override
-      public void close() {
-        Assert.assertFalse(closed);
-        closed = true;
-      }
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java
deleted file mode 100644
index 7586239d4f9bc..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperationTest.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getMeanByteCounterName;
-import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getObjectCounterName;
-
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-
-import org.hamcrest.CoreMatchers;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for FlattenOperation.
- */
-@RunWith(JUnit4.class)
-@SuppressWarnings("unchecked")
-public class FlattenOperationTest {
-  @Test
-  public void testRunFlattenOperation() throws Exception {
-    CounterSet counterSet = new CounterSet();
-    String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(
-        counterPrefix, counterSet.getAddCounterMutator());
-    TestOutputReceiver receiver = new TestOutputReceiver(counterSet);
-
-    FlattenOperation flattenOperation =
-        new FlattenOperation(receiver,
-                             counterPrefix, counterSet.getAddCounterMutator(),
-                             stateSampler);
-
-    flattenOperation.start();
-
-    flattenOperation.process("hi");
-    flattenOperation.process("there");
-    flattenOperation.process("");
-    flattenOperation.process("bob");
-
-    flattenOperation.finish();
-
-    Assert.assertThat(receiver.outputElems,
-                      CoreMatchers.<Object>hasItems("hi", "there", "", "bob"));
-
-    Assert.assertEquals(
-        new CounterSet(
-            Counter.longs("test-FlattenOperation-start-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                   "test-FlattenOperation-start-msecs")).getAggregate()),
-            Counter.longs("test-FlattenOperation-process-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                   "test-FlattenOperation-process-msecs")).getAggregate()),
-            Counter.longs("test-FlattenOperation-finish-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                   "test-FlattenOperation-finish-msecs")).getAggregate()),
-            Counter.longs(getObjectCounterName("test_receiver_out"), SUM).resetToValue(4L),
-            Counter.longs(getMeanByteCounterName("test_receiver_out"), MEAN)
-                .resetMeanToValue(4, 10L)),
-        counterSet);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
deleted file mode 100644
index 6a60e96a26b23..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutorTest.java
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.approximateProgressAtIndex;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionAtIndex;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromSplitResult;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtIndex;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateSplitRequest;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-
-import com.google.api.services.dataflow.model.ApproximateReportedProgress;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
-import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReader;
-
-import org.hamcrest.CoreMatchers;
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Tests for MapTaskExecutor.
- */
-@RunWith(JUnit4.class)
-public class MapTaskExecutorTest {
-
-  @Rule
-  public ExpectedException thrown = ExpectedException.none();
-
-  static class TestOperation extends Operation {
-    String label;
-    List<String> log;
-
-    private static CounterSet counterSet = new CounterSet();
-    private static String counterPrefix = "test-";
-    private static StateSampler testStateSampler =
-        new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-
-    TestOperation(String label, List<String> log) {
-      super(label, new OutputReceiver[] {}, counterPrefix, counterSet.getAddCounterMutator(),
-          testStateSampler);
-      this.label = label;
-      this.log = log;
-    }
-
-    TestOperation(String outputName, String counterPrefix,
-        CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler,
-        long outputCount) {
-      super(outputName, new OutputReceiver[] {}, counterPrefix, addCounterMutator, stateSampler);
-      addCounterMutator.addCounter(
-          Counter.longs(outputName + "-ElementCount", SUM).resetToValue(outputCount));
-    }
-
-    @Override
-    public void start() throws Exception {
-      super.start();
-      log.add(label + " started");
-    }
-
-    @Override
-    public void finish() throws Exception {
-      log.add(label + " finished");
-      super.finish();
-    }
-  }
-
-  // A mock ReadOperation fed to a MapTaskExecutor in test.
-  static class TestReadOperation extends ReadOperation {
-    private ApproximateReportedProgress progress = null;
-
-    TestReadOperation(OutputReceiver outputReceiver, String counterPrefix,
-        AddCounterMutator addCounterMutator, StateSampler stateSampler) {
-      super("ReadOperation", new TestReader(), new OutputReceiver[]{outputReceiver},
-          counterPrefix, "systemStageName", addCounterMutator, stateSampler);
-    }
-
-    @Override
-    public NativeReader.Progress getProgress() {
-      return cloudProgressToReaderProgress(progress);
-    }
-
-    @Override
-    public NativeReader.DynamicSplitResult requestDynamicSplit(
-        NativeReader.DynamicSplitRequest splitRequest) {
-      // Fakes the return with the same position as proposed.
-      return new NativeReader.DynamicSplitResultWithPosition(
-          cloudPositionToReaderPosition(
-              splitRequestToApproximateSplitRequest(splitRequest).getPosition()));
-    }
-
-    public void setProgress(ApproximateReportedProgress progress) {
-      this.progress = progress;
-    }
-  }
-
-  @Test
-  public void testExecuteMapTaskExecutor() throws Exception {
-    List<String> log = new ArrayList<>();
-
-    List<Operation> operations = Arrays.asList(new Operation[] {
-        new TestOperation("o1", log), new TestOperation("o2", log), new TestOperation("o3", log)});
-
-    CounterSet counters = new CounterSet();
-    String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(counterPrefix, counters.getAddCounterMutator());
-    MapTaskExecutor executor = new MapTaskExecutor(operations, counters, stateSampler);
-
-    executor.execute();
-
-    Assert.assertThat(
-        log,
-        CoreMatchers.hasItems(
-            "o3 started", "o2 started", "o1 started", "o1 finished", "o2 finished", "o3 finished"));
-
-    executor.close();
-  }
-
-  @Test
-  @SuppressWarnings("unchecked")
-  public void testGetOutputCounters() throws Exception {
-    CounterSet counters = new CounterSet();
-    String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(counterPrefix, counters.getAddCounterMutator());
-    List<Operation> operations = Arrays.asList(new Operation[] {
-        new TestOperation("o1", counterPrefix, counters.getAddCounterMutator(), stateSampler, 1),
-        new TestOperation("o2", counterPrefix, counters.getAddCounterMutator(), stateSampler, 2),
-        new TestOperation("o3", counterPrefix, counters.getAddCounterMutator(), stateSampler, 3)});
-
-    MapTaskExecutor executor = new MapTaskExecutor(operations, counters, stateSampler);
-
-    CounterSet counterSet = executor.getOutputCounters();
-    Assert.assertEquals(
-        new CounterSet(Counter.longs("o1-ElementCount", SUM).resetToValue(1L),
-            Counter.longs("test-o1-start-msecs", SUM)
-                .resetToValue(
-                    ((Counter<Long>)
-                        counterSet.getExistingCounter("test-o1-start-msecs")).getAggregate()),
-            Counter.longs("test-o1-process-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                   "test-o1-process-msecs")).getAggregate()),
-            Counter.longs("test-o1-finish-msecs", SUM)
-                .resetToValue(
-                    ((Counter<Long>)
-                        counterSet.getExistingCounter("test-o1-finish-msecs")).getAggregate()),
-            Counter.longs("o2-ElementCount", SUM).resetToValue(2L),
-            Counter.longs("test-o2-start-msecs", SUM)
-                .resetToValue(
-                    ((Counter<Long>)
-                        counterSet.getExistingCounter("test-o2-start-msecs")).getAggregate()),
-            Counter.longs("test-o2-process-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                   "test-o2-process-msecs")).getAggregate()),
-            Counter.longs("test-o2-finish-msecs", SUM)
-                .resetToValue(
-                    ((Counter<Long>)
-                        counterSet.getExistingCounter("test-o2-finish-msecs")).getAggregate()),
-            Counter.longs("o3-ElementCount", SUM).resetToValue(3L),
-            Counter.longs("test-o3-start-msecs", SUM)
-                .resetToValue(
-                    ((Counter<Long>)
-                        counterSet.getExistingCounter("test-o3-start-msecs")).getAggregate()),
-            Counter.longs("test-o3-process-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                   "test-o3-process-msecs")).getAggregate()),
-            Counter.longs("test-o3-finish-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                                   "test-o3-finish-msecs")).getAggregate())),
-        counterSet);
-
-    executor.close();
-  }
-
-  @Test
-  public void testNoOperation() throws Exception {
-    // Test MapTaskExecutor without a single operation.
-    CounterSet counterSet = new CounterSet();
-    String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    try (MapTaskExecutor executor =
-        new MapTaskExecutor(new ArrayList<Operation>(), counterSet, stateSampler)) {
-      thrown.expect(IllegalStateException.class);
-      thrown.expectMessage("has no operation");
-      executor.getReadOperation();
-    }
-  }
-
-  @Test
-  public void testNoReadOperation() throws Exception {
-    // Test MapTaskExecutor without ReadOperation.
-    CounterSet counterSet = new CounterSet();
-    String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    List<Operation> operations = Arrays.<Operation>asList(
-        new TestOperation("o1", counterPrefix, counterSet.getAddCounterMutator(), stateSampler, 1),
-        new TestOperation(
-            "o2", counterPrefix, counterSet.getAddCounterMutator(), stateSampler, 2));
-
-    try (MapTaskExecutor executor = new MapTaskExecutor(operations, counterSet, stateSampler)) {
-      thrown.expect(IllegalStateException.class);
-      thrown.expectMessage("is not a ReadOperation");
-      executor.getReadOperation();
-    }
-  }
-
-  @Test
-  public void testValidOperations() throws Exception {
-    CounterSet counterSet = new CounterSet();
-    String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    TestOutputReceiver receiver = new TestOutputReceiver(counterSet);
-    List<Operation> operations = Arrays.<Operation>asList(
-        new TestReadOperation(receiver, counterPrefix,
-            counterSet.getAddCounterMutator(), stateSampler));
-    try (MapTaskExecutor executor = new MapTaskExecutor(operations, counterSet, stateSampler)) {
-      Assert.assertEquals(operations.get(0), executor.getReadOperation());
-    }
-  }
-
-  @Test
-  public void testGetProgressAndRequestSplit() throws Exception {
-    CounterSet counterSet = new CounterSet();
-    String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    TestOutputReceiver receiver = new TestOutputReceiver(counterSet);
-    TestReadOperation operation = new TestReadOperation(
-        receiver, counterPrefix, counterSet.getAddCounterMutator(), stateSampler);
-    MapTaskExecutor executor =
-        new MapTaskExecutor(Arrays.asList(new Operation[] {operation}), counterSet, stateSampler);
-
-    operation.setProgress(approximateProgressAtIndex(1L));
-    Assert.assertEquals(positionAtIndex(1L), positionFromProgress(executor.getWorkerProgress()));
-    Assert.assertEquals(
-        positionAtIndex(1L),
-        positionFromSplitResult(executor.requestDynamicSplit(splitRequestAtIndex(1L))));
-
-    executor.close();
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounterTest.java
deleted file mode 100644
index cd6b6e5084c43..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounterTest.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getMeanByteCounterName;
-import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getObjectCounterName;
-import static org.junit.Assert.assertEquals;
-
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.ElementByteSizeObservableCoder;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.Counter.CounterMean;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.Arrays;
-import java.util.List;
-import java.util.Random;
-
-/**
- * Tests for {@link OutputObjectAndByteCounter}.
- */
-@RunWith(JUnit4.class)
-public class OutputObjectAndByteCounterTest {
-  @Rule
-  public final ExpectedException thrown = ExpectedException.none();
-
-  @Test
-  public void testUpdate() throws Exception {
-    TestOutputCounter outputCounter = new TestOutputCounter();
-    outputCounter.update("hi");
-    outputCounter.finishLazyUpdate("hi");
-    outputCounter.update("bob");
-    outputCounter.finishLazyUpdate("bob");
-
-    CounterMean<Long> meanByteCount = outputCounter.getMeanByteCount().getMean();
-    assertEquals(5, (long) meanByteCount.getAggregate());
-    assertEquals(2, meanByteCount.getCount());
-  }
-
-  @Test
-  public void testIncorrectType() throws Exception {
-    TestOutputCounter outputCounter = new TestOutputCounter();
-    thrown.expect(ClassCastException.class);
-    outputCounter.update(5);
-  }
-
-  @Test
-  public void testNullArgument() throws Exception {
-    TestOutputCounter outputCounter = new TestOutputCounter();
-    thrown.expect(CoderException.class);
-    outputCounter.update(null);
-  }
-
-  @Test
-  public void testAddingCountersIntoCounterSet() throws Exception {
-    CounterSet counters = new CounterSet();
-    new TestOutputCounter(counters);
-
-    assertEquals(
-        new CounterSet(
-            Counter.longs(getMeanByteCounterName("output_name"), MEAN).resetMeanToValue(0, 0L),
-            Counter.longs(getObjectCounterName("output_name"), SUM).resetToValue(0L)),
-        counters);
-  }
-
-  private OutputObjectAndByteCounter makeCounter(int samplingPeriod, int seed) {
-    return new OutputObjectAndByteCounter(
-               new ElementByteSizeObservableCoder<>(StringUtf8Coder.of()),
-               new CounterSet().getAddCounterMutator())
-        .setSamplingPeriod(samplingPeriod)
-        .setRandom(new Random(seed))
-        .countBytes("byte_count");
-  }
-
-  @Test
-  public void testSimpleByteCount() throws Exception {
-    OutputObjectAndByteCounter counter = makeCounter(1, 0);
-    for (int i = 0; i < 10000; i++) {
-      counter.update("foo");
-    }
-    assertEquals(30000L, (long) counter.getByteCount().getAggregate());
-  }
-
-  @Test
-  public void testSamplingByteCountFewElements() throws Exception {
-    OutputObjectAndByteCounter counter = makeCounter(100, 0);
-    for (int i = 0; i < 10; i++) {
-      counter.update("foo");
-    }
-    assertEquals(30L, (long) counter.getByteCount().getAggregate());
-  }
-
-  @Test
-  public void testSamplingByteCount() throws Exception {
-    List<Long> expected = Arrays.asList(3007230L, 2984989L, 3010540L);
-    for (int n = 0; n < 3; n++) {
-      OutputObjectAndByteCounter counter = makeCounter(100, n);
-      for (int i = 0; i < 1000000; i++) {
-        counter.update("foo");
-      }
-      assertEquals(expected.get(n), counter.getByteCount().getAggregate());
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java
deleted file mode 100644
index 77fe09460651a..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiverTest.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.Counter.CounterMean;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter;
-
-import org.hamcrest.CoreMatchers;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for OutputReceiver.
- */
-@RunWith(JUnit4.class)
-public class OutputReceiverTest {
-
-  @Test
-  public void testEmptyOutputReceiver() throws Exception {
-    OutputReceiver fanOut = new OutputReceiver();
-    TestOutputCounter outputCounter = new TestOutputCounter();
-    fanOut.addOutputCounter(outputCounter);
-    fanOut.process("hi");
-    fanOut.process("bob");
-
-    CounterMean<Long> meanByteCount = outputCounter.getMeanByteCount().getMean();
-    Assert.assertEquals(5, (long) meanByteCount.getAggregate());
-    Assert.assertEquals(2, meanByteCount.getCount());
-  }
-
-  @Test
-  public void testMultipleOutputReceiver() throws Exception {
-    OutputReceiver fanOut = new OutputReceiver();
-    TestOutputCounter outputCounter = new TestOutputCounter();
-    fanOut.addOutputCounter(outputCounter);
-
-    CounterSet counters = new CounterSet();
-
-    TestOutputReceiver receiver1 = new TestOutputReceiver(counters);
-    fanOut.addOutput(receiver1);
-
-    TestOutputReceiver receiver2 = new TestOutputReceiver(counters);
-    fanOut.addOutput(receiver2);
-
-    fanOut.process("hi");
-    fanOut.process("bob");
-
-    CounterMean<Long> meanByteCount = outputCounter.getMeanByteCount().getMean();
-    Assert.assertEquals(5, meanByteCount.getAggregate().longValue());
-    Assert.assertEquals(2, meanByteCount.getCount());
-    Assert.assertThat(receiver1.outputElems,
-        CoreMatchers.<Object>hasItems("hi", "bob"));
-    Assert.assertThat(receiver2.outputElems,
-        CoreMatchers.<Object>hasItems("hi", "bob"));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
deleted file mode 100644
index 0bcf6c2b9d398..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperationTest.java
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getMeanByteCounterName;
-import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getObjectCounterName;
-
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-
-import org.hamcrest.CoreMatchers;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for ParDoOperation.
- */
-@RunWith(JUnit4.class)
-@SuppressWarnings("unchecked")
-public class ParDoOperationTest {
-  private static class TestParDoFn implements ParDoFn {
-    final OutputReceiver outputReceiver;
-
-    public TestParDoFn(OutputReceiver outputReceiver) {
-      this.outputReceiver = outputReceiver;
-    }
-
-    @Override
-    public void startBundle(final Receiver... receivers) throws Exception {
-      if (receivers.length != 1) {
-        throw new AssertionError(
-            "unexpected number of receivers for DoFn");
-      }
-
-      outputReceiver.process("x-start");
-    }
-
-    @Override
-    public void processElement(Object elem) throws Exception {
-      outputReceiver.process("y-" + elem);
-    }
-
-    @Override
-    public void finishBundle() throws Exception {
-      outputReceiver.process("z-finish");
-    }
-  }
-
-  @Test
-  public void testRunParDoOperation() throws Exception {
-    CounterSet counterSet = new CounterSet();
-    String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(
-        counterPrefix, counterSet.getAddCounterMutator());
-    TestOutputReceiver receiver = new TestOutputReceiver(counterSet);
-
-    ParDoOperation parDoOperation =
-        new ParDoOperation(
-            "ParDoOperation",
-            new TestParDoFn(receiver),
-            new OutputReceiver[]{ receiver },
-            counterPrefix,
-            counterSet.getAddCounterMutator(),
-            stateSampler);
-
-    parDoOperation.start();
-
-    parDoOperation.process("hi");
-    parDoOperation.process("there");
-    parDoOperation.process("");
-    parDoOperation.process("bob");
-
-    parDoOperation.finish();
-
-    Assert.assertThat(
-        receiver.outputElems,
-        CoreMatchers.<Object>hasItems(
-            "x-start", "y-hi", "y-there", "y-", "y-bob", "z-finish"));
-
-    Assert.assertEquals(
-        new CounterSet(
-            Counter.longs("test-ParDoOperation-start-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-ParDoOperation-start-msecs")).getAggregate()),
-            Counter.longs("test-ParDoOperation-process-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-ParDoOperation-process-msecs")).getAggregate()),
-            Counter.longs("test-ParDoOperation-finish-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-ParDoOperation-finish-msecs")).getAggregate()),
-            Counter.longs(getObjectCounterName("test_receiver_out"), SUM).resetToValue(6L),
-            Counter.longs(getMeanByteCounterName("test_receiver_out"), MEAN)
-                .resetMeanToValue(6, 33L)),
-        counterSet);
-  }
-
-  // TODO: Test side inputs.
-  // TODO: Test side outputs.
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
deleted file mode 100644
index bf5bf9b541c45..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperationTest.java
+++ /dev/null
@@ -1,515 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getMeanByteCounterName;
-import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getObjectCounterName;
-import static org.hamcrest.Matchers.anyOf;
-import static org.hamcrest.Matchers.empty;
-import static org.hamcrest.Matchers.hasItem;
-import static org.hamcrest.Matchers.isIn;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
-
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.CoderSizeEstimator;
-import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.ElementByteSizeObservableCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.PairInfo;
-import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.WindowingCoderGroupingKeyCreator;
-import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.WindowsExpandingPairInfo;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.BufferingGroupingTable;
-import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.Combiner;
-import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.CombiningGroupingTable;
-import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.GroupingKeyCreator;
-import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.SamplingSizeEstimator;
-import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.SizeEstimator;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.collect.ImmutableList;
-
-import org.hamcrest.Description;
-import org.hamcrest.TypeSafeDiagnosingMatcher;
-import org.hamcrest.collection.IsIterableContainingInAnyOrder;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.Arrays;
-import java.util.List;
-import java.util.Random;
-
-/**
- * Tests for PartialGroupByKeyOperation.
- */
-@RunWith(JUnit4.class)
-@SuppressWarnings({"rawtypes", "unchecked"})
-public class PartialGroupByKeyOperationTest {
-  @Test
-  public void testRunPartialGroupByKeyOperation() throws Exception {
-    Coder keyCoder = StringUtf8Coder.of();
-    Coder valueCoder = BigEndianIntegerCoder.of();
-
-    CounterSet counterSet = new CounterSet();
-    String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(
-        counterPrefix, counterSet.getAddCounterMutator());
-    TestOutputReceiver receiver =
-        new TestOutputReceiver(
-            new ElementByteSizeObservableCoder(
-                WindowedValue.getValueOnlyCoder(
-                    KvCoder.of(keyCoder, IterableCoder.of(valueCoder)))),
-            counterSet);
-
-    PartialGroupByKeyOperation pgbkOperation =
-        new PartialGroupByKeyOperation(
-            new WindowingCoderGroupingKeyCreator(keyCoder),
-            new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)),
-            new CoderSizeEstimator(valueCoder),
-            PairInfo.create(),
-            receiver,
-            counterPrefix,
-            counterSet.getAddCounterMutator(),
-            stateSampler);
-
-    pgbkOperation.start();
-
-    pgbkOperation.process(WindowedValue.valueInEmptyWindows(KV.of("hi", 4)));
-    pgbkOperation.process(WindowedValue.valueInEmptyWindows(KV.of("there", 5)));
-    pgbkOperation.process(WindowedValue.valueInEmptyWindows(KV.of("hi", 6)));
-    pgbkOperation.process(WindowedValue.valueInEmptyWindows(KV.of("joe", 7)));
-    pgbkOperation.process(WindowedValue.valueInEmptyWindows(KV.of("there", 8)));
-    pgbkOperation.process(WindowedValue.valueInEmptyWindows(KV.of("hi", 9)));
-
-    pgbkOperation.finish();
-
-    assertThat(receiver.outputElems,
-               IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(
-                   WindowedValue.valueInEmptyWindows(KV.of("hi", Arrays.asList(4, 6, 9))),
-                   WindowedValue.valueInEmptyWindows(KV.of("there", Arrays.asList(5, 8))),
-                   WindowedValue.valueInEmptyWindows(KV.of("joe", Arrays.asList(7)))));
-
-    // Exact counter values depend on size of encoded data.  If encoding
-    // changes, then these expected counters should change to match.
-    assertEquals(
-        new CounterSet(
-            Counter.longs("test-PartialGroupByKeyOperation-start-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-PartialGroupByKeyOperation-start-msecs")).getAggregate()),
-            Counter.longs("test-PartialGroupByKeyOperation-process-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-PartialGroupByKeyOperation-process-msecs")).getAggregate()),
-            Counter.longs("test-PartialGroupByKeyOperation-finish-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-PartialGroupByKeyOperation-finish-msecs")).getAggregate()),
-            Counter.longs(getObjectCounterName("test_receiver_out"), SUM).resetToValue(3L),
-            Counter.longs(getMeanByteCounterName("test_receiver_out"), MEAN)
-                .resetMeanToValue(3, 49L)),
-        counterSet);
-  }
-
-  @Test
-  public void testRunPartialGroupByKeyOperationWithCombiner() throws Exception {
-    Coder keyCoder = StringUtf8Coder.of();
-    Coder valueCoder = BigEndianIntegerCoder.of();
-
-    CounterSet counterSet = new CounterSet();
-    String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(
-        counterPrefix, counterSet.getAddCounterMutator());
-    TestOutputReceiver receiver = new TestOutputReceiver(
-        new ElementByteSizeObservableCoder(
-            WindowedValue.getValueOnlyCoder(KvCoder.of(keyCoder, valueCoder))),
-        counterSet);
-
-    Combiner<WindowedValue<String>, Integer, Integer, Integer> combineFn =
-        new Combiner<WindowedValue<String>, Integer, Integer, Integer>() {
-
-      @Override
-      public Integer createAccumulator(WindowedValue<String> key) {
-        return 0;
-      }
-
-      @Override
-      public Integer add(WindowedValue<String> key, Integer accumulator, Integer value) {
-        return accumulator + value;
-      }
-
-      @Override
-      public Integer merge(WindowedValue<String> key, Iterable<Integer> accumulators) {
-        Integer sum = 0;
-        for (Integer part : accumulators) {
-          sum += part;
-        }
-        return sum;
-      }
-
-      @Override
-      public Integer compact(WindowedValue<String> key, Integer accumulator) {
-        return accumulator;
-      }
-
-      @Override
-      public Integer extract(WindowedValue<String> key, Integer accumulator) {
-        return accumulator;
-      }
-    };
-
-    PartialGroupByKeyOperation pgbkOperation =
-        new PartialGroupByKeyOperation(
-            new WindowingCoderGroupingKeyCreator(keyCoder),
-            new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)),
-            new CoderSizeEstimator(valueCoder),
-            combineFn,
-            WindowsExpandingPairInfo.create(),
-            receiver,
-            counterPrefix,
-            counterSet.getAddCounterMutator(),
-            stateSampler);
-
-    pgbkOperation.start();
-
-    pgbkOperation.process(WindowedValue.valueInGlobalWindow(KV.of("hi", 4)));
-    pgbkOperation.process(WindowedValue.valueInGlobalWindow(KV.of("there", 5)));
-    pgbkOperation.process(WindowedValue.valueInGlobalWindow(KV.of("hi", 6)));
-    pgbkOperation.process(WindowedValue.valueInGlobalWindow(KV.of("joe", 7)));
-    pgbkOperation.process(WindowedValue.valueInGlobalWindow(KV.of("there", 8)));
-    pgbkOperation.process(WindowedValue.valueInGlobalWindow(KV.of("hi", 9)));
-
-    pgbkOperation.finish();
-
-    assertThat(receiver.outputElems,
-               IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(
-                   WindowedValue.valueInGlobalWindow(KV.of("hi", 19)),
-                   WindowedValue.valueInGlobalWindow(KV.of("there", 13)),
-                   WindowedValue.valueInGlobalWindow(KV.of("joe", 7))));
-
-    // Exact counter values depend on size of encoded data.  If encoding
-    // changes, then these expected counters should change to match.
-    assertEquals(
-        new CounterSet(
-            Counter.longs("test-PartialGroupByKeyOperation-start-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-PartialGroupByKeyOperation-start-msecs")).getAggregate()),
-            Counter.longs("test-PartialGroupByKeyOperation-process-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-PartialGroupByKeyOperation-process-msecs")).getAggregate()),
-            Counter.longs("test-PartialGroupByKeyOperation-finish-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-PartialGroupByKeyOperation-finish-msecs")).getAggregate()),
-            Counter.longs(getObjectCounterName("test_receiver_out"), SUM).resetToValue(3L),
-            Counter.longs(getMeanByteCounterName("test_receiver_out"), MEAN)
-                .resetMeanToValue(3, 25L)),
-        counterSet);
-  }
-
-  // TODO: Add tests about early flushing when the table fills.
-
-  ////////////////////////////////////////////////////////////////////////////
-  // Tests for PartialGroupByKey internals.
-
-  /**
-   * Return the key as its grouping key.
-   */
-  public static class IdentityGroupingKeyCreator implements GroupingKeyCreator<Object> {
-    @Override
-    public Object createGroupingKey(Object key) {
-      return key;
-    }
-  }
-
-  /**
-   * "Estimate" the size of longs by looking at their value.
-   */
-  private static class IdentitySizeEstimator implements SizeEstimator<Long> {
-    public int calls = 0;
-    @Override
-    public long estimateSize(Long element) {
-      calls++;
-      return element;
-    }
-  }
-
-  /**
-   * "Estimate" the size of strings by taking the tenth power of their length.
-   */
-  private static class StringPowerSizeEstimator implements SizeEstimator<String> {
-    @Override
-    public long estimateSize(String element) {
-      return (long) Math.pow(10, element.length());
-    }
-  }
-
-  private static class KvPairInfo implements PartialGroupByKeyOperation.PairInfo {
-    @Override
-    public Iterable<Object> getKeysFromInputPair(Object pair) {
-      return ImmutableList.of(((KV<Object, ?>) pair).getKey());
-    }
-    @Override
-    public Object getValueFromInputPair(Object pair) {
-      return ((KV<?, ?>) pair).getValue();
-    }
-    @Override
-    public Object makeOutputPair(Object key, Object value) {
-      return KV.of(key, value);
-    }
-  }
-
-  @Test
-  public void testBufferingGroupingTable() throws Exception {
-    BufferingGroupingTable<String, String> table =
-        new BufferingGroupingTable<>(
-            1000, new IdentityGroupingKeyCreator(), new KvPairInfo(),
-            new StringPowerSizeEstimator(), new StringPowerSizeEstimator());
-    TestOutputReceiver receiver = new TestOutputReceiver(
-        KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(StringUtf8Coder.of())));
-
-    table.put("A", "a", receiver);
-    table.put("B", "b1", receiver);
-    table.put("B", "b2", receiver);
-    table.put("C", "c", receiver);
-    assertThat(receiver.outputElems, empty());
-
-    table.put("C", "cccc", receiver);
-    assertThat(receiver.outputElems,
-               hasItem((Object) KV.of("C", Arrays.asList("c", "cccc"))));
-
-    table.put("DDDD", "d", receiver);
-    assertThat(receiver.outputElems,
-               hasItem((Object) KV.of("DDDD", Arrays.asList("d"))));
-
-    table.flush(receiver);
-    assertThat(receiver.outputElems,
-               IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(
-                   KV.of("A", Arrays.asList("a")),
-                   KV.of("B", Arrays.asList("b1", "b2")),
-                   KV.of("C", Arrays.asList("c", "cccc")),
-                   KV.of("DDDD", Arrays.asList("d"))));
-  }
-
-  @Test
-  public void testCombiningGroupingTable() throws Exception {
-    Combiner<Object, Integer, Long, Long> summingCombineFn =
-        new Combiner<Object, Integer, Long, Long>() {
-
-      @Override
-      public Long createAccumulator(Object key) {
-        return 0L;
-      }
-
-      @Override
-      public Long add(Object key, Long accumulator, Integer value) {
-        return accumulator + value;
-      }
-
-      @Override
-      public Long merge(Object key, Iterable<Long> accumulators) {
-        long sum = 0;
-        for (Long part : accumulators) {
-          sum += part;
-        }
-        return sum;
-      }
-
-      @Override
-      public Long compact(Object key, Long accumulator) {
-        return accumulator;
-      }
-
-      @Override
-      public Long extract(Object key, Long accumulator) {
-        return accumulator;
-      }
-    };
-
-    CombiningGroupingTable<String, Integer, Long> table =
-        new CombiningGroupingTable<String, Integer, Long>(
-            1000, new IdentityGroupingKeyCreator(), new KvPairInfo(),
-            summingCombineFn,
-            new StringPowerSizeEstimator(), new IdentitySizeEstimator());
-
-    TestOutputReceiver receiver = new TestOutputReceiver(
-        KvCoder.of(StringUtf8Coder.of(), BigEndianLongCoder.of()));
-
-    table.put("A", 1, receiver);
-    table.put("B", 2, receiver);
-    table.put("B", 3, receiver);
-    table.put("C", 4, receiver);
-    assertThat(receiver.outputElems, empty());
-
-    table.put("C", 5000, receiver);
-    assertThat(receiver.outputElems, hasItem((Object) KV.of("C", 5004L)));
-
-    table.put("DDDD", 6, receiver);
-    assertThat(receiver.outputElems, hasItem((Object) KV.of("DDDD", 6L)));
-
-    table.flush(receiver);
-    assertThat(receiver.outputElems,
-               IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(
-                   KV.of("A", 1L),
-                   KV.of("B", 2L + 3),
-                   KV.of("C", 5000L + 4),
-                   KV.of("DDDD", 6L)));
-  }
-
-
-  ////////////////////////////////////////////////////////////////////////////
-  // Tests for the sampling size estimator.
-
-  @Test
-  public void testSampleFlatSizes() throws Exception {
-    IdentitySizeEstimator underlying = new IdentitySizeEstimator();
-    SizeEstimator<Long> estimator =
-        new SamplingSizeEstimator<Long>(underlying, 0.05, 1.0, 10, new Random(1));
-    // First 10 elements are always sampled.
-    for (int k = 0; k < 10; k++) {
-      assertEquals(100, estimator.estimateSize(100L));
-      assertEquals(k + 1, underlying.calls);
-    }
-    // Next 10 are sometimes sampled.
-    for (int k = 10; k < 20; k++) {
-      assertEquals(100, estimator.estimateSize(100L));
-    }
-    assertThat(underlying.calls, between(11, 19));
-    int initialCalls = underlying.calls;
-    // Next 1000 are sampled at about 5%.
-    for (int k = 20; k < 1020; k++) {
-      assertEquals(100, estimator.estimateSize(100L));
-    }
-    assertThat(underlying.calls - initialCalls, between(40, 60));
-  }
-
-  @Test
-  public void testSampleBoringSizes() throws Exception {
-    IdentitySizeEstimator underlying = new IdentitySizeEstimator();
-    SizeEstimator<Long> estimator =
-        new SamplingSizeEstimator<Long>(underlying, 0.05, 1.0, 10, new Random(1));
-    // First 10 elements are always sampled.
-    for (int k = 0; k < 10; k += 2) {
-      assertEquals(100, estimator.estimateSize(100L));
-      assertEquals(102, estimator.estimateSize(102L));
-      assertEquals(k + 2, underlying.calls);
-    }
-    // Next 10 are sometimes sampled.
-    for (int k = 10; k < 20; k += 2) {
-      assertThat(estimator.estimateSize(100L), between(100L, 102L));
-      assertThat(estimator.estimateSize(102L), between(100L, 102L));
-    }
-    assertThat(underlying.calls, between(11, 19));
-    int initialCalls = underlying.calls;
-    // Next 1000 are sampled at about 5%.
-    for (int k = 20; k < 1020; k += 2) {
-      assertThat(estimator.estimateSize(100L), between(100L, 102L));
-      assertThat(estimator.estimateSize(102L), between(100L, 102L));
-    }
-    assertThat(underlying.calls - initialCalls, between(40, 60));
-  }
-
-  @Test
-  public void testSampleHighVarianceSizes() throws Exception {
-    // The largest element is much larger than the average.
-    List<Long> sizes = Arrays.asList(1L, 10L, 100L, 1000L);
-    IdentitySizeEstimator underlying = new IdentitySizeEstimator();
-    SizeEstimator<Long> estimator =
-        new SamplingSizeEstimator<Long>(underlying, 0.1, 0.2, 10, new Random(1));
-    // First 10 elements are always sampled.
-    for (int k = 0; k < 10; k++) {
-      long size = sizes.get(k % sizes.size());
-      assertEquals(size, estimator.estimateSize(size));
-      assertEquals(k + 1, underlying.calls);
-    }
-    // We're still not out of the woods; sample every element.
-    for (int k = 10; k < 20; k++) {
-      long size = sizes.get(k % sizes.size());
-      assertEquals(size, estimator.estimateSize(size));
-      assertEquals(k + 1, underlying.calls);
-    }
-    // Sample some more to let things settle down.
-    for (int k = 20; k < 500; k++) {
-      estimator.estimateSize(sizes.get(k % sizes.size()));
-    }
-    // Next 1000 are sampled at about 20% (maxSampleRate).
-    int initialCalls = underlying.calls;
-    for (int k = 500; k < 1500; k++) {
-      long size = sizes.get(k % sizes.size());
-      assertThat(estimator.estimateSize(size),
-                 anyOf(isIn(sizes), between(250L, 350L)));
-    }
-    assertThat(underlying.calls - initialCalls, between(180, 220));
-    // Sample some more to let things settle down.
-    for (int k = 1500; k < 3000; k++) {
-      estimator.estimateSize(sizes.get(k % sizes.size()));
-    }
-    // Next 1000 are sampled at about 10% (minSampleRate).
-    initialCalls = underlying.calls;
-    for (int k = 3000; k < 4000; k++) {
-      long size = sizes.get(k % sizes.size());
-      assertThat(estimator.estimateSize(size),
-                 anyOf(isIn(sizes), between(250L, 350L)));
-    }
-    assertThat(underlying.calls - initialCalls, between(90, 110));
-  }
-
-  @Test
-  public void testSampleChangingSizes() throws Exception {
-    IdentitySizeEstimator underlying = new IdentitySizeEstimator();
-    SizeEstimator<Long> estimator =
-        new SamplingSizeEstimator<Long>(underlying, 0.05, 1.0, 10, new Random(1));
-    // First 10 elements are always sampled.
-    for (int k = 0; k < 10; k++) {
-      assertEquals(100, estimator.estimateSize(100L));
-      assertEquals(k + 1, underlying.calls);
-    }
-    // Next 10 are sometimes sampled.
-    for (int k = 10; k < 20; k++) {
-      assertEquals(100, estimator.estimateSize(100L));
-    }
-    assertThat(underlying.calls, between(11, 19));
-    int initialCalls = underlying.calls;
-    // Next 1000 are sampled at about 5%.
-    for (int k = 20; k < 1020; k++) {
-      assertEquals(100, estimator.estimateSize(100L));
-    }
-    assertThat(underlying.calls - initialCalls, between(40, 60));
-    // Inject a big element until it is sampled.
-    while (estimator.estimateSize(1000000L) == 100) { }
-    // Check that we have started sampling more regularly again.
-    assertEquals(99, estimator.estimateSize(99L));
-  }
-
-  private static <T extends Comparable<T>> TypeSafeDiagnosingMatcher<T>
-      between(final T min, final T max) {
-    return new TypeSafeDiagnosingMatcher<T>() {
-      @Override
-      public void describeTo(Description description) {
-        description.appendText("is between " + min + " and " + max);
-      }
-      @Override
-      protected boolean matchesSafely(T item, Description mismatchDescription) {
-        return min.compareTo(item) <= 0 && item.compareTo(max) <= 0;
-      }
-    };
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
deleted file mode 100644
index 7385199d5d980..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperationTest.java
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionAtIndex;
-import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtIndex;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateSplitRequest;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.toCloudPosition;
-import static com.google.cloud.dataflow.sdk.testing.SystemNanoTimeSleeper.sleepMillis;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getMeanByteCounterName;
-import static com.google.cloud.dataflow.sdk.util.common.worker.TestOutputReceiver.TestOutputCounter.getObjectCounterName;
-import static org.hamcrest.Matchers.containsInAnyOrder;
-import static org.hamcrest.Matchers.equalTo;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-
-import com.google.api.services.dataflow.model.ApproximateReportedProgress;
-import com.google.api.services.dataflow.model.ApproximateSplitRequest;
-import com.google.api.services.dataflow.model.Position;
-import com.google.cloud.dataflow.sdk.io.range.OffsetRangeTracker;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils.TestReader;
-import com.google.common.base.Preconditions;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.IOException;
-import java.util.NoSuchElementException;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.Exchanger;
-
-/**
- * Tests for ReadOperation.
- */
-@RunWith(JUnit4.class)
-public class ReadOperationTest {
-
-  private <T> void assertCounterKindAndContents(
-      CounterSet counterSet, String name, AggregationKind kind, T contents) {
-    @SuppressWarnings("unchecked")
-    Counter<T> counter = (Counter<T>) counterSet.getExistingCounter(name);
-    assertThat(counter.getKind(), equalTo(kind));
-    assertThat(counter.getAggregate(), equalTo(contents));
-  }
-
-  private <T> void assertCounterMean(
-      CounterSet counterSet, String name, long count, T aggregate) {
-    @SuppressWarnings("unchecked")
-    Counter<T> counter = (Counter<T>) counterSet.getExistingCounter(name);
-    assertThat(counter.getKind(), equalTo(MEAN));
-    assertThat(counter.getMean().getCount(), equalTo(count));
-    assertThat(counter.getMean().getAggregate(), equalTo(aggregate));
-  }
-
-  private void assertCounterKind(
-      CounterSet counterSet, String name, AggregationKind kind) {
-    assertThat(counterSet.getExistingCounter(name).getKind(), equalTo(kind));
-  }
-
-  /**
-   * Tests that a {@link ReadOperation} has expected counters, and that their
-   * values are reasonable.
-   */
-  @Test
-  @SuppressWarnings("unchecked")
-  public void testRunReadOperation() throws Exception {
-    TestReader reader = new TestReader("hi", "there", "", "bob");
-
-    CounterSet counterSet = new CounterSet();
-    String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(counterPrefix, counterSet.getAddCounterMutator());
-    TestOutputReceiver receiver = new TestOutputReceiver(counterSet);
-
-    ReadOperation readOperation = ReadOperation.forTest(
-        reader, receiver, counterPrefix, counterSet.getAddCounterMutator(), stateSampler);
-
-    readOperation.start();
-    readOperation.finish();
-
-    assertThat(receiver.outputElems, containsInAnyOrder((Object) "hi", "there", "", "bob"));
-
-    assertCounterKindAndContents(counterSet, "ReadOperation-ByteCount", SUM, 2L + 5 + 0 + 3);
-    assertCounterKindAndContents(counterSet, getObjectCounterName("test_receiver_out"), SUM, 4L);
-    assertCounterMean(counterSet, getMeanByteCounterName("test_receiver_out"), 4, 10L);
-    assertCounterKind(counterSet, "test-ReadOperation-start-msecs", SUM);
-    assertCounterKind(counterSet, "test-ReadOperation-process-msecs", SUM);
-    assertCounterKind(counterSet, "test-ReadOperation-finish-msecs", SUM);
-  }
-
-  @Test
-  public void testGetProgress() throws Exception {
-    MockReaderIterator iterator = new MockReaderIterator(0, 5);
-    CounterSet counterSet = new CounterSet();
-    String counterPrefix = "test-";
-    final ReadOperation readOperation = ReadOperation.forTest(new MockReader(iterator),
-        new TestOutputReceiver("out", null, counterSet), counterPrefix,
-        counterSet.getAddCounterMutator(),
-        new StateSampler(counterPrefix, counterSet.getAddCounterMutator()));
-    // Update progress not continuously, but so that it's never more than 1 record stale.
-    readOperation.setProgressUpdatePeriodMs(150);
-
-    Thread thread = runReadLoopInThread(readOperation);
-    for (int i = 0; i < 5; ++i) {
-      sleepMillis(500); // Wait for the operation to start and block.
-      // Ensure that getProgress() doesn't block while the next() method is blocked.
-      ApproximateReportedProgress progress = readerProgressToCloudProgress(
-          readOperation.getProgress());
-      long observedIndex = progress.getPosition().getRecordIndex().longValue();
-      assertTrue("Actual: " + observedIndex, i == observedIndex || i == observedIndex + 1);
-      iterator.offerNext(i);
-    }
-    thread.join();
-  }
-
-  @Test
-  public void testDynamicSplit() throws Exception {
-    MockReaderIterator iterator = new MockReaderIterator(0, 10);
-    CounterSet counterSet = new CounterSet();
-    MockOutputReceiver receiver = new MockOutputReceiver();
-    ReadOperation readOperation = ReadOperation.forTest(new MockReader(iterator), receiver, "test-",
-        counterSet.getAddCounterMutator(),
-        new StateSampler("test-", counterSet.getAddCounterMutator()));
-    readOperation.setProgressUpdatePeriodMs(ReadOperation.UPDATE_ON_EACH_ITERATION);
-
-    // An unstarted ReadOperation refuses split requests.
-    assertNull(
-        readOperation.requestDynamicSplit(splitRequestAtIndex(8L)));
-
-    Thread thread = runReadLoopInThread(readOperation);
-    iterator.offerNext(0); // Await start() and return 0 from getCurrent().
-    receiver.unblockProcess();
-    // Await advance() and return 1 from getCurrent().
-    iterator.offerNext(1);
-    NativeReader.DynamicSplitResultWithPosition split =
-        (NativeReader.DynamicSplitResultWithPosition)
-            readOperation.requestDynamicSplit(splitRequestAtIndex(8L));
-    assertNotNull(split);
-    assertEquals(positionAtIndex(8L), toCloudPosition(split.getAcceptedPosition()));
-
-    // Check that the progress has been recomputed.
-    ApproximateReportedProgress progress = readerProgressToCloudProgress(
-        readOperation.getProgress());
-    assertEquals(1, progress.getPosition().getRecordIndex().longValue());
-    assertEquals(2.0f / 8.0, progress.getFractionConsumed(), 0.001);
-
-    receiver.unblockProcess();
-    iterator.offerNext(2);
-    receiver.unblockProcess();
-    iterator.offerNext(3);
-
-    // Should accept a split at an earlier position than previously requested.
-    // Should reject a split at a later position than previously requested.
-    // Note that here we're testing our own MockReaderIterator class, so it's kind of pointless,
-    // but we're also testing that ReadOperation correctly relays the request to the iterator.
-    split =
-        (NativeReader.DynamicSplitResultWithPosition)
-            readOperation.requestDynamicSplit(splitRequestAtIndex(6L));
-    assertNotNull(split);
-    assertEquals(positionAtIndex(6L), toCloudPosition(split.getAcceptedPosition()));
-    split =
-        (NativeReader.DynamicSplitResultWithPosition)
-            readOperation.requestDynamicSplit(splitRequestAtIndex(6L));
-    assertNull(split);
-    receiver.unblockProcess();
-
-    iterator.offerNext(4);
-    receiver.unblockProcess();
-    iterator.offerNext(5);
-    receiver.unblockProcess();
-
-    // Should return false from hasNext() and exit read loop now.
-
-    thread.join();
-
-    // Operation is now finished. Check that it refuses a split request.
-    assertNull(readOperation.requestDynamicSplit(splitRequestAtIndex(5L)));
-  }
-
-  @Test
-  public void testDynamicSplitDoesNotBlock() throws Exception {
-    MockReaderIterator iterator = new MockReaderIterator(0, 10);
-    CounterSet counterSet = new CounterSet();
-    MockOutputReceiver receiver = new MockOutputReceiver();
-    ReadOperation readOperation = ReadOperation.forTest(new MockReader(iterator), receiver, "test-",
-        counterSet.getAddCounterMutator(),
-        new StateSampler("test-", counterSet.getAddCounterMutator()));
-
-    Thread thread = runReadLoopInThread(readOperation);
-    iterator.offerNext(0);
-    receiver.unblockProcess();
-    // Read loop is blocked in next(). Do not offer another next item,
-    // but check that we can still split while the read loop is blocked.
-    NativeReader.DynamicSplitResultWithPosition split =
-        (NativeReader.DynamicSplitResultWithPosition)
-            readOperation.requestDynamicSplit(splitRequestAtIndex(5L));
-    assertNotNull(split);
-    assertEquals(positionAtIndex(5L), toCloudPosition(split.getAcceptedPosition()));
-
-    for (int i = 1; i < 5; ++i) {
-      iterator.offerNext(i);
-      receiver.unblockProcess();
-    }
-
-    thread.join();
-  }
-
-  @Test
-  public void testRaceBetweenCloseAndDynamicSplit() throws Exception {
-    MockReaderIterator iterator = new MockReaderIterator(0, 10);
-    CounterSet counterSet = new CounterSet();
-    MockOutputReceiver receiver = new MockOutputReceiver();
-    final ReadOperation readOperation = ReadOperation.forTest(
-        new MockReader(iterator), receiver, "test-",
-        counterSet.getAddCounterMutator(),
-        new StateSampler("test-", counterSet.getAddCounterMutator()));
-
-    // We simulate the following sequence:
-    // "Reader thread" calls ReadOperation.start() and returns from it
-    // "Main thread" calls requestDynamicSplit()
-    // "Reader thread" calls ReadOperation.finish()
-    // We use CountDownLatch as synchronization barriers to establish this sequence.
-    final CountDownLatch startCompleted = new CountDownLatch(1);
-    final CountDownLatch requestDynamicSplitCompleted = new CountDownLatch(1);
-    Thread thread =
-        new Thread() {
-          @Override
-          public void run() {
-            try {
-              readOperation.start();
-              // Synchronize with main test thread to notify it that .start() has finished,
-              // meaning the ReadOperation has finished reading.
-              startCompleted.countDown();
-              // Synchronize with main test thread to wait until requestDynamicSplit()
-              // has completed.
-              requestDynamicSplitCompleted.await();
-              // Now finish the ReadOperation.
-              readOperation.finish();
-            } catch (Exception e) {
-              e.printStackTrace();
-            }
-          }
-        };
-    thread.start();
-
-    for (int i = 0; i < 10; ++i) {
-      iterator.offerNext(i);
-      receiver.unblockProcess();
-    }
-    // Synchronize with reader thread to wait until ReadOperation.start() finishes.
-    startCompleted.await();
-    // Check that requestDynamicSplit is safe (no-op) if the operation is done with start()
-    // but not yet done with finish()
-    readOperation.requestDynamicSplit(splitRequestAtIndex(5L));
-    // Synchronize with reader thread to notify it that we're done calling requestDynamicSplit().
-    requestDynamicSplitCompleted.countDown();
-
-    // Let the reader thread complete (it just calls finish()).
-    thread.join();
-
-    // Check once more that requestDynamicSplit on a finished operation is also safe (no-op).
-    readOperation.requestDynamicSplit(splitRequestAtIndex(5L));
-  }
-
-  private Thread runReadLoopInThread(final ReadOperation readOperation) {
-    Thread thread = new Thread() {
-      @Override
-      public void run() {
-        try {
-          readOperation.start();
-          readOperation.finish();
-        } catch (Exception e) {
-          e.printStackTrace();
-        }
-      }
-    };
-    thread.start();
-    return thread;
-  }
-
-  private static class MockReaderIterator extends NativeReader.NativeReaderIterator<Integer> {
-    private final OffsetRangeTracker tracker;
-    private Exchanger<Integer> exchanger = new Exchanger<>();
-    private int current;
-    private volatile boolean isClosed;
-
-    public MockReaderIterator(int from, int to) {
-      this.tracker = new OffsetRangeTracker(from, to);
-      this.current = from - 1;
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      return advance();
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      if (!tracker.tryReturnRecordAt(true, current + 1)) {
-        return false;
-      }
-      ++current;
-      exchangeCurrent();
-      return true;
-    }
-
-    private void exchangeCurrent() {
-      try {
-        current = exchanger.exchange(current);
-      } catch (InterruptedException e) {
-        throw new NoSuchElementException("interrupted");
-      }
-    }
-
-    @Override
-    public Integer getCurrent() {
-      return current;
-    }
-
-    @Override
-    public NativeReader.Progress getProgress() {
-      Preconditions.checkState(!isClosed);
-      return cloudProgressToReaderProgress(
-          new ApproximateReportedProgress()
-              .setPosition(new Position().setRecordIndex((long) current))
-              .setFractionConsumed(tracker.getFractionConsumed()));
-    }
-
-    @Override
-    public NativeReader.DynamicSplitResult requestDynamicSplit(
-        NativeReader.DynamicSplitRequest splitRequest) {
-      Preconditions.checkState(!isClosed);
-      ApproximateSplitRequest approximateSplitRequest = splitRequestToApproximateSplitRequest(
-          splitRequest);
-      int index = approximateSplitRequest.getPosition().getRecordIndex().intValue();
-      if (!tracker.trySplitAtPosition(index)) {
-        return null;
-      }
-      return new NativeReader.DynamicSplitResultWithPosition(
-          cloudPositionToReaderPosition(approximateSplitRequest.getPosition()));
-    }
-
-    public int offerNext(int next) {
-      try {
-        return exchanger.exchange(next);
-      } catch (InterruptedException e) {
-        throw new RuntimeException(e);
-      }
-    }
-
-    @Override
-    public void close() throws IOException {
-      isClosed = true;
-    }
-  }
-
-  private static class MockReader extends NativeReader<Integer> {
-    private NativeReaderIterator<Integer> iterator;
-
-    private MockReader(NativeReaderIterator<Integer> iterator) {
-      this.iterator = iterator;
-    }
-
-    @Override
-    public NativeReaderIterator<Integer> iterator() throws IOException {
-      return iterator;
-    }
-  }
-
-  /**
-   * A mock {@link OutputReceiver} that blocks the read loop in {@link ReadOperation}.
-   */
-  private static class MockOutputReceiver extends OutputReceiver {
-    private Exchanger<Object> exchanger = new Exchanger<>();
-
-    @Override
-    public void process(Object elem) throws Exception {
-      exchanger.exchange(null);
-    }
-
-    public void unblockProcess() {
-      try {
-        exchanger.exchange(null);
-      } catch (InterruptedException e) {
-        throw new RuntimeException(e);
-      }
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryTest.java
deleted file mode 100644
index 6b7e0e9d1f024..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryTest.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.not;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/** Unit tests for {@link ShuffleEntry}. */
-@RunWith(JUnit4.class)
-public class ShuffleEntryTest {
-  private static final byte[] KEY = {0xA};
-  private static final byte[] SKEY = {0xB};
-  private static final byte[] VALUE = {0xC};
-
-  @Test
-  public void accessors() {
-    ShuffleEntry entry = new ShuffleEntry(KEY, SKEY, VALUE);
-    assertThat(entry.getKey(), equalTo(KEY));
-    assertThat(entry.getSecondaryKey(), equalTo(SKEY));
-    assertThat(entry.getValue(), equalTo(VALUE));
-  }
-
-  @Test
-  public void equalsToItself() {
-    ShuffleEntry entry = new ShuffleEntry(KEY, SKEY, VALUE);
-    assertTrue(entry.equals(entry));
-  }
-
-  @Test
-  public void equalsForEqualEntries() {
-    ShuffleEntry entry0 = new ShuffleEntry(KEY, SKEY, VALUE);
-    ShuffleEntry entry1 = new ShuffleEntry(
-        KEY.clone(), SKEY.clone(), VALUE.clone());
-
-    assertTrue(entry0.equals(entry1));
-    assertTrue(entry1.equals(entry0));
-    assertEquals(entry0.hashCode(), entry1.hashCode());
-  }
-
-  @Test
-  public void equalsForEqualNullEntries() {
-    ShuffleEntry entry0 = new ShuffleEntry(null, null, null);
-    ShuffleEntry entry1 = new ShuffleEntry(null, null, null);
-
-    assertTrue(entry0.equals(entry1));
-    assertTrue(entry1.equals(entry0));
-    assertEquals(entry0.hashCode(), entry1.hashCode());
-  }
-
-  @Test
-  public void notEqualsWhenKeysDiffer() {
-    final byte[] otherKey = {0x1};
-    ShuffleEntry entry0 = new ShuffleEntry(KEY, SKEY, VALUE);
-    ShuffleEntry entry1 = new ShuffleEntry(otherKey, SKEY, VALUE);
-
-    assertFalse(entry0.equals(entry1));
-    assertFalse(entry1.equals(entry0));
-    assertThat(entry0.hashCode(), not(equalTo(entry1.hashCode())));
-  }
-
-  @Test
-  public void notEqualsWhenKeysDifferOneNull() {
-    ShuffleEntry entry0 = new ShuffleEntry(KEY, SKEY, VALUE);
-    ShuffleEntry entry1 = new ShuffleEntry(null, SKEY, VALUE);
-
-    assertFalse(entry0.equals(entry1));
-    assertFalse(entry1.equals(entry0));
-    assertThat(entry0.hashCode(), not(equalTo(entry1.hashCode())));
-  }
-
-  @Test
-  public void notEqualsWhenSecondaryKeysDiffer() {
-    final byte[] otherSKey = {0x2};
-    ShuffleEntry entry0 = new ShuffleEntry(KEY, SKEY, VALUE);
-    ShuffleEntry entry1 = new ShuffleEntry(KEY, otherSKey, VALUE);
-
-    assertFalse(entry0.equals(entry1));
-    assertFalse(entry1.equals(entry0));
-    assertThat(entry0.hashCode(), not(equalTo(entry1.hashCode())));
-  }
-
-  @Test
-  public void notEqualsWhenSecondaryKeysDifferOneNull() {
-    ShuffleEntry entry0 = new ShuffleEntry(KEY, SKEY, VALUE);
-    ShuffleEntry entry1 = new ShuffleEntry(KEY, null, VALUE);
-
-    assertFalse(entry0.equals(entry1));
-    assertFalse(entry1.equals(entry0));
-    assertThat(entry0.hashCode(), not(equalTo(entry1.hashCode())));
-  }
-
-  @Test
-  public void notEqualsWhenValuesDiffer() {
-    final byte[] otherValue = {0x2};
-    ShuffleEntry entry0 = new ShuffleEntry(KEY, SKEY, VALUE);
-    ShuffleEntry entry1 = new ShuffleEntry(KEY, SKEY, otherValue);
-
-    assertFalse(entry0.equals(entry1));
-    assertFalse(entry1.equals(entry0));
-    assertThat(entry0.hashCode(), not(equalTo(entry1.hashCode())));
-  }
-
-  @Test
-  public void notEqualsWhenValuesDifferOneNull() {
-    ShuffleEntry entry0 = new ShuffleEntry(KEY, SKEY, VALUE);
-    ShuffleEntry entry1 = new ShuffleEntry(KEY, SKEY, null);
-
-    assertFalse(entry0.equals(entry1));
-    assertFalse(entry1.equals(entry0));
-    assertThat(entry0.hashCode(), not(equalTo(entry1.hashCode())));
-  }
-
-  @Test
-  public void emptyNotTheSameAsNull() {
-    final byte[] empty = {};
-    ShuffleEntry entry0 = new ShuffleEntry(null, null, null);
-    ShuffleEntry entry1 = new ShuffleEntry(empty, empty, empty);
-
-    assertFalse(entry0.equals(entry1));
-    assertFalse(entry1.equals(entry0));
-    assertThat(entry0.hashCode(), not(equalTo(entry1.hashCode())));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
deleted file mode 100644
index f4f6752b6c504..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSamplerTest.java
+++ /dev/null
@@ -1,197 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import static com.google.cloud.dataflow.sdk.testing.SystemNanoTimeSleeper.sleepMillis;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.SamplingCallback;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.ScopedState;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
-
-import org.hamcrest.Matchers;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.concurrent.Semaphore;
-import java.util.concurrent.atomic.AtomicLong;
-
-/**
- * Unit tests for the {@link Counter} API.
- */
-@RunWith(JUnit4.class)
-public class StateSamplerTest {
-
-  public static long getCounterLongValue(CounterSet counters, String name) {
-    @SuppressWarnings("unchecked")
-    Counter<Long> counter = (Counter<Long>) counters.getExistingCounter(name);
-    return counter.getAggregate();
-  }
-
-  @Test
-  public void basicTest() throws InterruptedException {
-    CounterSet counters = new CounterSet();
-    long periodMs = 50;
-    try (StateSampler stateSampler =
-        new StateSampler("test-", counters.getAddCounterMutator(), periodMs)) {
-
-      int state1 = stateSampler.stateForName("1", StateSampler.StateKind.USER);
-      int state2 = stateSampler.stateForName("2", StateSampler.StateKind.USER);
-
-      try (StateSampler.ScopedState s1 =
-          stateSampler.scopedState(state1)) {
-        assert s1 != null;
-        sleepMillis(2 * periodMs);
-      }
-
-      try (StateSampler.ScopedState s2 =
-          stateSampler.scopedState(state2)) {
-        assert s2 != null;
-        sleepMillis(3 * periodMs);
-      }
-
-      long s1 = getCounterLongValue(counters, "test-1-msecs");
-      long s2 = getCounterLongValue(counters, "test-2-msecs");
-
-      long toleranceMs = periodMs;
-      assertTrue(s1 + s2 >= 1 * periodMs - toleranceMs);
-      assertTrue(s1 + s2 <= 10 * periodMs + toleranceMs);
-    }
-  }
-
-  @Test
-  public void nestingTest() throws InterruptedException {
-    CounterSet counters = new CounterSet();
-    long periodMs = 50;
-    try (StateSampler stateSampler =
-        new StateSampler("test-", counters.getAddCounterMutator(), periodMs)) {
-
-      int state1 = stateSampler.stateForName("1", StateSampler.StateKind.USER);
-      int state2 = stateSampler.stateForName("2", StateSampler.StateKind.USER);
-      int state3 = stateSampler.stateForName("3", StateSampler.StateKind.USER);
-
-      try (StateSampler.ScopedState s1 =
-          stateSampler.scopedState(state1)) {
-        assert s1 != null;
-        sleepMillis(2 * periodMs);
-
-        try (StateSampler.ScopedState s2 =
-            stateSampler.scopedState(state2)) {
-          assert s2 != null;
-          sleepMillis(2 * periodMs);
-
-          try (StateSampler.ScopedState s3 =
-              stateSampler.scopedState(state3)) {
-            assert s3 != null;
-            sleepMillis(2 * periodMs);
-          }
-          sleepMillis(periodMs);
-        }
-        sleepMillis(periodMs);
-      }
-
-      long s1 = getCounterLongValue(counters, "test-1-msecs");
-      long s2 = getCounterLongValue(counters, "test-2-msecs");
-      long s3 = getCounterLongValue(counters, "test-3-msecs");
-
-      long toleranceMs = periodMs;
-      assertTrue(s1 + s2 + s3 >= 4 * periodMs - toleranceMs);
-      assertTrue(s1 + s2 + s3 <= 18 * periodMs + toleranceMs);
-    }
-  }
-
-  @Test
-  public void nonScopedTest() throws InterruptedException {
-    CounterSet counters = new CounterSet();
-    long periodMs = 50;
-    try (StateSampler stateSampler = new StateSampler("test-",
-        counters.getAddCounterMutator(), periodMs)) {
-
-      int state1 = stateSampler.stateForName("1", StateSampler.StateKind.USER);
-      int previousState = stateSampler.setState(state1);
-      sleepMillis(2 * periodMs);
-      stateSampler.setState(previousState);
-      long tolerance = periodMs;
-      long s = getCounterLongValue(counters, "test-1-msecs");
-
-      assertTrue(s >= periodMs - tolerance);
-      assertTrue(s <= 5 * periodMs + tolerance);
-    }
-  }
-
-  private void noSamplingAfterCloseTestOnce() throws Exception {
-    CounterSet counters = new CounterSet();
-    long periodMs = 200;
-
-    final AtomicLong lastSampledTimeStamp = new AtomicLong();
-    final Semaphore sampleHappened = new Semaphore(0);
-    try (StateSampler stateSampler = new StateSampler("test-",
-        counters.getAddCounterMutator(), periodMs)) {
-      stateSampler.setState("test", StateKind.USER);
-      stateSampler.addSamplingCallback(new SamplingCallback(){
-        @Override
-        public void run(int state, StateKind kind, long elapsedMs) {
-          lastSampledTimeStamp.set(System.nanoTime());
-          sampleHappened.release();
-        }
-      });
-      sampleHappened.acquire();
-    }
-    long samplerStoppedTimeStamp = System.nanoTime();
-    assertThat(lastSampledTimeStamp.get(), Matchers.lessThanOrEqualTo(samplerStoppedTimeStamp));
-  }
-
-  @Test
-  public void noSamplingAfterCloseTest() throws Exception {
-    // Run it multiple times to detect flakyness.
-    for (int i = 0; i < 10; ++i) {
-      noSamplingAfterCloseTestOnce();
-    }
-  }
-
-  @Test
-  public void reuseStateByNameTest() throws Exception {
-    StateSampler stateSampler = new StateSampler("test-",
-        new CounterSet().getAddCounterMutator(), 200);
-    int state1 = stateSampler.stateForName("test_state", StateKind.USER);
-    int state2 = stateSampler.stateForName("test_state", StateKind.USER);
-    assertEquals(state1, state2);
-    stateSampler.close();
-  }
-
-  @Test
-  public void reuseManyStatesByName() throws Exception {
-    // Issue big number of stateForName() and setState calls to StateSampler.
-    CounterSet counters = new CounterSet();
-    StateSampler stateSampler = new StateSampler("test-",
-        counters.getAddCounterMutator(), 200);
-    for (int i = 0; i < 10000; i++) {
-      for (int j = 0; j < 10000; j++) {
-        int state = stateSampler.stateForName("state" + j, StateKind.USER);
-        try (ScopedState scope = stateSampler.scopedState(state)) {}
-      }
-    }
-    // All counters got reused.
-    assertEquals(10000, counters.size());
-    stateSampler.close();
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/TestOutputReceiver.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/TestOutputReceiver.java
deleted file mode 100644
index d5ff1573ce7ce..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/TestOutputReceiver.java
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.runners.worker.MapTaskExecutorFactory.ElementByteSizeObservableCoder;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservable;
-import com.google.common.annotations.VisibleForTesting;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * An OutputReceiver that allows the output elements to be retrieved.
- */
-public class TestOutputReceiver extends OutputReceiver {
-  private static final String OBJECT_COUNTER_NAME = "-ObjectCount";
-  private static final String MEAN_BYTE_COUNTER_NAME = "-MeanByteCount";
-
-  @VisibleForTesting
-  final List<Object> outputElems = new ArrayList<>();
-
-  public TestOutputReceiver(CounterSet counterSet) {
-    this("test_receiver_out", counterSet);
-  }
-
-  public TestOutputReceiver(Coder<?> coder) {
-    this(coder, new CounterSet());
-  }
-
-  public TestOutputReceiver(Coder<?> coder, CounterSet counterSet) {
-    this("test_receiver_out", new ElementByteSizeObservableCoder<>(coder), counterSet);
-  }
-
-  public TestOutputReceiver(String outputName, CounterSet counterSet) {
-    this(outputName, new ElementByteSizeObservableCoder<>(StringUtf8Coder.of()), counterSet);
-  }
-
-  public TestOutputReceiver(
-      ElementByteSizeObservable<?> elementByteSizeObservable, CounterSet counterSet) {
-    this("test_receiver_out", elementByteSizeObservable, counterSet);
-  }
-
-  public TestOutputReceiver(String outputName,
-      ElementByteSizeObservable<?> elementByteSizeObservable,
-      CounterSet counterSet) {
-    this(outputName, elementByteSizeObservable, counterSet.getAddCounterMutator());
-  }
-
-  public TestOutputReceiver(String string, ElementByteSizeObservable<?> elementByteSizeObservable,
-      AddCounterMutator addCounterMutator) {
-    ElementCounter outputCounter =
-        new TestOutputCounter(string, elementByteSizeObservable, addCounterMutator);
-    addOutputCounter(outputCounter);
-  }
-
-  @Override
-  public void process(Object elem) throws Exception {
-    super.process(elem);
-    outputElems.add(elem);
-  }
-
-  /**
-   * TestOutputCounter that samples every element.
-   */
-  public static class TestOutputCounter extends OutputObjectAndByteCounter {
-    public TestOutputCounter() {
-      this(new CounterSet());
-    }
-
-    @SuppressWarnings("rawtypes")
-    public TestOutputCounter(CounterSet counters) {
-      this("output_name", new ElementByteSizeObservableCoder<>(StringUtf8Coder.of()),
-          counters.getAddCounterMutator());
-    }
-
-    public TestOutputCounter(String outputName,
-        ElementByteSizeObservable<?> elementByteSizeObservable,
-        CounterSet.AddCounterMutator addCounterMutator) {
-      super(elementByteSizeObservable, addCounterMutator);
-      this.countObject(outputName + OBJECT_COUNTER_NAME);
-      this.countMeanByte(outputName + MEAN_BYTE_COUNTER_NAME);
-    }
-
-    @Override
-    protected boolean sampleElement() {
-      return true;
-    }
-
-    @VisibleForTesting
-    static String getObjectCounterName(String prefix) {
-      return prefix + OBJECT_COUNTER_NAME;
-    }
-
-    @VisibleForTesting
-    static String getMeanByteCounterName(String prefix) {
-      return prefix + MEAN_BYTE_COUNTER_NAME;
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutorTest.java
deleted file mode 100644
index 918ac4ad7840c..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutorTest.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
-
-import com.google.cloud.dataflow.sdk.util.common.Metric;
-
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.ArrayList;
-import java.util.Collection;
-
-/**
- * Unit tests for {@link WorkExecutor}.
- */
-@RunWith(JUnit4.class)
-public class WorkExecutorTest {
-  private WorkExecutor mapWorker;
-
-  @Before
-  public void setUp() {
-    mapWorker = new MapTaskExecutor(null, null, null);
-  }
-
-  @Test
-  public void testMapTaskGetOutputMetrics() {
-    Collection<Metric<?>> metrics = mapWorker.getOutputMetrics();
-    verifyOutputMetrics(metrics);
-  }
-
-  private void verifyOutputMetrics(Collection<Metric<?>> metrics) {
-    Collection<String> metricNames = new ArrayList<>();
-    for (Metric<?> metric : metrics) {
-      metricNames.add(metric.getName());
-    }
-    Assert.assertThat(metricNames, containsInAnyOrder("CPU"));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperationTest.java
deleted file mode 100644
index bb013afa156e3..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperationTest.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-
-import org.hamcrest.CoreMatchers;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for WriteOperation.
- */
-@RunWith(JUnit4.class)
-public class WriteOperationTest {
-  @Test
-  @SuppressWarnings("unchecked")
-  public void testRunWriteOperation() throws Exception {
-    ExecutorTestUtils.TestSink sink = new ExecutorTestUtils.TestSink();
-    CounterSet counterSet = new CounterSet();
-    String counterPrefix = "test-";
-    StateSampler stateSampler = new StateSampler(
-        counterPrefix, counterSet.getAddCounterMutator());
-
-    WriteOperation writeOperation = new WriteOperation(
-        sink, counterPrefix, counterSet.getAddCounterMutator(), stateSampler);
-
-    writeOperation.start();
-
-    writeOperation.process("hi");
-    writeOperation.process("there");
-    writeOperation.process("");
-    writeOperation.process("bob");
-
-    writeOperation.finish();
-
-    Assert.assertThat(sink.outputElems,
-                      CoreMatchers.hasItems("hi", "there", "", "bob"));
-
-    Assert.assertEquals(
-        new CounterSet(
-            Counter.longs("WriteOperation-ByteCount", SUM)
-                .resetToValue(2L + 5 + 0 + 3),
-            Counter.longs("test-WriteOperation-start-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-WriteOperation-start-msecs")).getAggregate()),
-            Counter.longs("test-WriteOperation-process-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-WriteOperation-process-msecs")).getAggregate()),
-            Counter.longs("test-WriteOperation-finish-msecs", SUM)
-                .resetToValue(((Counter<Long>) counterSet.getExistingCounter(
-                    "test-WriteOperation-finish-msecs")).getAggregate())),
-        counterSet);
-  }
-}

From 2db0f78a53996f1f20bd0cffdeb7b6b23081fcd3 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 4 Feb 2016 13:07:37 -0800
Subject: [PATCH 1390/1541] CustomSourcesTest: use a smaller byte limit to
 reduce test runtime

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113880884
---
 .../sdk/runners/worker/WorkerCustomSources.java    | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSources.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSources.java
index 59da1f08054ce..192b9ee4565f9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSources.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSources.java
@@ -63,7 +63,6 @@
 
 import javax.annotation.Nullable;
 
-
 /**
  * A helper class for supporting sources defined as {@code Source}.
  *
@@ -134,6 +133,15 @@ public static DynamicSourceSplit toSourceSplit(
    */
   public static SourceOperationResponse performSplit(
       SourceSplitRequest request, PipelineOptions options) throws Exception {
+    return performSplitWithApiLimit(request, options, DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES);
+  }
+
+  /**
+   * A helper method like {@link #performSplit(SourceSplitRequest, PipelineOptions)} but that
+   * allows overriding the API size limit for testing.
+   */
+  static SourceOperationResponse performSplitWithApiLimit(
+      SourceSplitRequest request, PipelineOptions options, long apiByteLimit) throws Exception {
     Source<?> anySource = deserializeFromCloudSource(request.getSource().getSpec());
     checkArgument(
         anySource instanceof BoundedSource, "Cannot split a non-Bounded source: %s", anySource);
@@ -151,8 +159,8 @@ public static SourceOperationResponse performSplit(
     long serializedSize = DataflowApiUtils.computeSerializedSizeBytes(splits);
 
     // If split response is too large, scale desired size for expected DATAFLOW_API_SIZE_BYTES/2.
-    if (serializedSize > DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES) {
-      double expansion = 2 * (double) serializedSize / DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES;
+    if (serializedSize > apiByteLimit) {
+      double expansion = 2 * (double) serializedSize / apiByteLimit;
       long expandedBundleSizeBytes = (long) (desiredBundleSizeBytes * expansion);
       LOG.warn(
           "Splitting source {} into bundles of estimated size {} bytes produced {} bundles, which"

From 5addd1b58a1b0c2d0a80823ad59a6664b6ee4bb7 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 4 Feb 2016 13:33:10 -0800
Subject: [PATCH 1391/1541] Explicitly pass ReaderFactory from worker classes
 to SideInputUtils

This removes the dependency of SideInputUtils on having the default
registry, hence the various ReaderFactory implementations. Furthermore,
it was probably wrong that a DataflowSideInputReader created by the
workers did not use a registry controlled at the worker level
but instead created its own. (in practice it is never customized)

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113883350
---
 .../worker/DataflowSideInputReader.java       |  6 +-
 .../worker/MapTaskExecutorFactory.java        | 58 +++++++------------
 .../sdk/runners/worker/SideInputUtils.java    | 30 +++++++---
 .../sdk/util/BatchModeExecutionContext.java   | 17 ++++--
 4 files changed, 58 insertions(+), 53 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
index ed1d702791ab1..109fc58da80a9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
@@ -59,6 +59,7 @@ public class DataflowSideInputReader
 
   private DataflowSideInputReader(
       Iterable<? extends SideInputInfo> sideInputInfos,
+      ReaderFactory readerFactory,
       PipelineOptions options,
       ExecutionContext executionContext) throws Exception {
     // Initializing the values may or may not actually read through the
@@ -72,7 +73,7 @@ private DataflowSideInputReader(
       TupleTag<Object> tag = new TupleTag<>(sideInputInfo.getTag());
       ByteSizeObserver observer = new ByteSizeObserver();
       Object sideInputValue = SideInputUtils.readSideInput(
-          options, sideInputInfo, observer, executionContext);
+          options, sideInputInfo, readerFactory, observer, executionContext);
       overheads.put(tag, observer.getBytes());
       observer.reset();
       observers.put(tag, observer);
@@ -87,10 +88,11 @@ private DataflowSideInputReader(
    */
   public static DataflowSideInputReader of(
       Iterable<? extends SideInputInfo> sideInputInfos,
+      ReaderFactory readerFactory,
       PipelineOptions options,
       ExecutionContext context)
       throws Exception {
-    return new DataflowSideInputReader(sideInputInfos, options, context);
+    return new DataflowSideInputReader(sideInputInfos, readerFactory, options, context);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
index f95d7f7807ebd..094a50247b6b8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
@@ -26,6 +26,7 @@
 import com.google.api.services.dataflow.model.ParallelInstruction;
 import com.google.api.services.dataflow.model.PartialGroupByKeyInstruction;
 import com.google.api.services.dataflow.model.ReadInstruction;
+import com.google.api.services.dataflow.model.Source;
 import com.google.api.services.dataflow.model.WriteInstruction;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
@@ -34,6 +35,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PerKeyCombineFnRunner;
@@ -76,18 +78,18 @@
 import javax.annotation.Nullable;
 
 /**
- * Creates a MapTaskExecutor from a MapTask definition.
+ * Creates a {@link MapTaskExecutor} from a {@link MapTask} definition.
  */
 public class MapTaskExecutorFactory {
 
   /**
-   * Creates a new MapTaskExecutor from the given MapTask definition using the provided
-   * {@link ReaderRegistry}.
+   * Creates a new {@link MapTaskExecutor} from the given {@link MapTask} definition using the
+   * provided {@link ReaderFactory}.
    */
   public static MapTaskExecutor create(
       PipelineOptions options,
       MapTask mapTask,
-      ReaderRegistry registry,
+      ReaderFactory readerFactory,
       DataflowExecutionContext<?> context,
       CounterSet counters,
       StateSampler stateSampler)
@@ -101,7 +103,7 @@ public static MapTaskExecutor create(
       operations.add(createOperation(
           options,
           instruction,
-          registry,
+          readerFactory,
           context,
           operations,
           counterPrefix,
@@ -115,39 +117,13 @@ public static MapTaskExecutor create(
   }
 
   /**
-   * Creates an Operation from the given ParallelInstruction definition using the provided
-   * {@link ReaderRegistry}.
+   * Creates an {@link Operation} from the given {@link ParallelInstruction} definition using the
+   * provided {@link ReaderFactory}.
    */
   static Operation createOperation(
       PipelineOptions options,
       ParallelInstruction instruction,
-      DataflowExecutionContext<?> executionContext,
-      List<Operation> priorOperations,
-      String counterPrefix,
-      String systemStageName,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler)
-          throws Exception {
-    return createOperation(
-        options,
-        instruction,
-        ReaderRegistry.defaultRegistry(),
-        executionContext,
-        priorOperations,
-        counterPrefix,
-        systemStageName,
-        addCounterMutator,
-        stateSampler);
-  }
-
-  /**
-   * Creates an Operation from the given ParallelInstruction definition using the provided
-   * {@link ReaderRegistry}.
-   */
-  static Operation createOperation(
-      PipelineOptions options,
-      ParallelInstruction instruction,
-      ReaderRegistry registry,
+      ReaderFactory readerFactory,
       DataflowExecutionContext<?> executionContext,
       List<Operation> priorOperations,
       String counterPrefix,
@@ -159,7 +135,7 @@ static Operation createOperation(
       return createReadOperation(
           options,
           instruction,
-          registry,
+          readerFactory,
           executionContext,
           priorOperations,
           counterPrefix,
@@ -186,7 +162,7 @@ static Operation createOperation(
   static ReadOperation createReadOperation(
       PipelineOptions options,
       ParallelInstruction instruction,
-      ReaderRegistry registry,
+      ReaderFactory readerFactory,
       DataflowExecutionContext<?> executionContext,
       @SuppressWarnings("unused") List<Operation> priorOperations,
       String counterPrefix,
@@ -197,9 +173,15 @@ static ReadOperation createReadOperation(
     ReadInstruction read = instruction.getRead();
 
     String operationName = instruction.getSystemName();
+    Source cloudSource = CloudSourceUtils.flattenBaseSpecs(read.getSource());
+    CloudObject sourceSpec = CloudObject.fromSpec(cloudSource.getSpec());
+    Coder<?> coder = null;
+    if (cloudSource.getCodec() != null) {
+      coder = Serializer.deserialize(cloudSource.getCodec(), Coder.class);
+    }
     NativeReader<?> reader =
-        registry.create(
-            read.getSource(), options, executionContext, addCounterMutator, operationName);
+        readerFactory.create(
+            sourceSpec, coder, options, executionContext, addCounterMutator, operationName);
 
     OutputReceiver[] receivers =
         createOutputReceivers(instruction, counterPrefix, addCounterMutator, stateSampler, 1);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
index 4603eccc82233..8254bceba0c48 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
@@ -20,10 +20,12 @@
 
 import com.google.api.services.dataflow.model.SideInputInfo;
 import com.google.api.services.dataflow.model.Source;
+import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.Serializer;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.common.collect.Iterables;
 
@@ -53,27 +55,31 @@ public class SideInputUtils {
   public static Object readSideInput(
       PipelineOptions options,
       SideInputInfo sideInputInfo,
+      ReaderFactory readerFactory,
       Observer observer,
       ExecutionContext executionContext)
       throws Exception {
     Iterable<Object> elements =
-        readSideInputSources(options, sideInputInfo.getSources(), observer, executionContext);
+        readSideInputSources(options, sideInputInfo.getSources(), readerFactory, observer,
+            executionContext);
     return readSideInputValue(sideInputInfo.getKind(), elements);
   }
 
   public static Object readSideInput(
       PipelineOptions options,
       SideInputInfo sideInputInfo,
+      ReaderFactory readerFactory,
       ExecutionContext executionContext)
       throws Exception {
-    Iterable<Object> elements =
-        readSideInputSources(options, sideInputInfo.getSources(), null, executionContext);
+    Iterable<Object> elements = readSideInputSources(
+        options, sideInputInfo.getSources(), readerFactory, null, executionContext);
     return readSideInputValue(sideInputInfo.getKind(), elements);
   }
 
   private static Iterable<Object> readSideInputSources(
       PipelineOptions options,
       List<Source> sideInputSources,
+      ReaderFactory readerFactory,
       Observer observer,
       ExecutionContext executionContext)
       throws Exception {
@@ -81,11 +87,13 @@ private static Iterable<Object> readSideInputSources(
     if (numSideInputSources == 0) {
       throw new Exception("expecting at least one side input Source");
     } else if (numSideInputSources == 1) {
-      return readSideInputSource(options, sideInputSources.get(0), observer, executionContext);
+      return readSideInputSource(
+          options, sideInputSources.get(0), readerFactory, observer, executionContext);
     } else {
       List<Iterable<Object>> shards = new ArrayList<>();
       for (Source sideInputSource : sideInputSources) {
-        shards.add(readSideInputSource(options, sideInputSource, observer, executionContext));
+        shards.add(readSideInputSource(
+            options, sideInputSource, readerFactory, observer, executionContext));
       }
       return Iterables.concat(shards);
     }
@@ -94,15 +102,19 @@ private static Iterable<Object> readSideInputSources(
   private static Iterable<Object> readSideInputSource(
       PipelineOptions options,
       Source sideInputSource,
+      ReaderFactory readerFactory,
       Observer observer,
       ExecutionContext executionContext)
       throws Exception {
+    Coder<?> coder = null;
+    if (sideInputSource.getCodec() != null) {
+      coder = Serializer.deserialize(sideInputSource.getCodec(), Coder.class);
+    }
     // We don't do shuffle sanity check on side inputs, as they don't have to be read completely.
     @SuppressWarnings("unchecked")
     NativeReader<Object> reader =
-        (NativeReader<Object>)
-            ReaderRegistry.defaultRegistry()
-                .create(sideInputSource, options, executionContext, null, null);
+        (NativeReader<Object>) readerFactory.create(CloudObject.fromSpec(sideInputSource.getSpec()),
+            coder, options, executionContext, null, null);
     if (observer != null) {
       reader.addObserver(observer);
     }
@@ -170,6 +182,7 @@ private NativeReaderToIteratorAdapter(NativeReader.NativeReaderIterator<T> reade
       this.state = NextState.UNKNOWN_BEFORE_START;
     }
 
+    @Override
     public boolean hasNext() {
       try {
         switch (state) {
@@ -201,6 +214,7 @@ public boolean hasNext() {
       }
     }
 
+    @Override
     public T next() {
       if (!hasNext()) {
         throw new NoSuchElementException();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
index 98065952aa569..29980bd0154e4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
@@ -21,6 +21,8 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.worker.DataflowExecutionContext;
 import com.google.cloud.dataflow.sdk.runners.worker.DataflowSideInputReader;
+import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory;
+import com.google.cloud.dataflow.sdk.runners.worker.ReaderRegistry;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
@@ -35,10 +37,12 @@ public class BatchModeExecutionContext
     extends DataflowExecutionContext<BatchModeExecutionContext.StepContext> {
   private Object key;
 
-  private PipelineOptions options;
+  private final PipelineOptions options;
+  private final ReaderFactory readerFactory;
 
-  protected BatchModeExecutionContext(PipelineOptions options) {
+  protected BatchModeExecutionContext(PipelineOptions options, ReaderFactory readerFactory) {
     this.options = options;
+    this.readerFactory = readerFactory;
   }
 
   /**
@@ -46,14 +50,16 @@ protected BatchModeExecutionContext(PipelineOptions options) {
    * pipeline options.
    */
   public static BatchModeExecutionContext withDefaultOptions() {
-    return new BatchModeExecutionContext(PipelineOptionsFactory.create());
+    return new BatchModeExecutionContext(
+        PipelineOptionsFactory.create(),
+        ReaderRegistry.defaultRegistry());
   }
 
   /**
    * Returns a {@link BatchModeExecutionContext} configured according to the provided options.
    */
   public static BatchModeExecutionContext fromOptions(PipelineOptions options) {
-    return new BatchModeExecutionContext(options);
+    return new BatchModeExecutionContext(options, ReaderRegistry.defaultRegistry());
   }
 
   /**
@@ -104,7 +110,8 @@ public Object getKey() {
   @Override
   protected SideInputReader getSideInputReader(
       Iterable<? extends SideInputInfo> sideInputInfos) throws Exception {
-    return DataflowSideInputReader.of(sideInputInfos, options, this);
+    return DataflowSideInputReader.of(
+        sideInputInfos, readerFactory, options, this);
   }
 
   @Override

From be3326c13723f23fdf30f6ffd039962eb46135f6 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 4 Feb 2016 17:11:00 -0800
Subject: [PATCH 1392/1541] Move MapTaskExecutorFactory and system DoFns and
 their factories

This is for ASF Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113904737
---
 .../runners/worker/AssignWindowsParDoFn.java  | 136 -----
 .../sdk/runners/worker/CombineValuesFn.java   | 289 ---------
 .../runners/worker/DefaultParDoFnFactory.java |  82 ---
 .../worker/GroupAlsoByWindowsParDoFn.java     | 290 ---------
 .../worker/MapTaskExecutorFactory.java        | 555 ------------------
 .../sdk/runners/worker/NormalParDoFn.java     | 168 ------
 .../sdk/runners/worker/ParDoFnBase.java       | 223 -------
 .../ReifyTimestampAndWindowsParDoFn.java      | 115 ----
 8 files changed, 1858 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DefaultParDoFnFactory.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
deleted file mode 100644
index 153fcf5f3085f..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AssignWindowsParDoFn.java
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.getBytes;
-
-import com.google.api.services.dataflow.model.MultiOutputInfo;
-import com.google.api.services.dataflow.model.SideInputInfo;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.common.base.Preconditions;
-
-import java.util.Arrays;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-/**
- * A wrapper around an AssignWindowsDoFn.  This class is the same as
- * NormalParDoFn, except that it gets deserialized differently.
- */
-class AssignWindowsParDoFn extends ParDoFnBase {
-
-  static AssignWindowsParDoFn of(
-      PipelineOptions options,
-      AssignWindowsDoFn<?, ?> fn,
-      String stepName,
-      String transformName,
-      DataflowExecutionContext<?> executionContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler)
-      throws Exception {
-    return new AssignWindowsParDoFn(
-        options, fn, stepName, transformName, executionContext, addCounterMutator, stateSampler);
-  }
-
-  /**
-   * A {@link ParDoFnFactory} to create instances of {@link AssignWindowsParDoFn} according to
-   * specifications from the Dataflow service.
-   */
-  static final class Factory implements ParDoFnFactory {
-    @Override
-    public ParDoFn create(
-        PipelineOptions options,
-        final CloudObject cloudUserFn,
-        String stepName,
-        String transformName,
-        @Nullable List<SideInputInfo> sideInputInfos,
-        @Nullable List<MultiOutputInfo> multiOutputInfos,
-        int numOutputs,
-        DataflowExecutionContext<?> executionContext,
-        CounterSet.AddCounterMutator addCounterMutator,
-        StateSampler stateSampler)
-            throws Exception {
-
-      final Object deserializedWindowingStrategy =
-          SerializableUtils.deserializeFromByteArray(
-              getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
-              "serialized windowing strategy");
-
-      Preconditions.checkArgument(
-          deserializedWindowingStrategy instanceof WindowingStrategy,
-          "unexpected kind of WindowingStrategy: "
-          + deserializedWindowingStrategy.getClass().getName());
-
-      // We just checked the raw type, and the other types are simply required to be enforced
-      // outside of this class
-      @SuppressWarnings("unchecked")
-      final WindowingStrategy<Object, BoundedWindow> windowingStrategy =
-          (WindowingStrategy<Object, BoundedWindow>) deserializedWindowingStrategy;
-
-      final AssignWindowsDoFn<Object, BoundedWindow> assignFn =
-          new AssignWindowsDoFn<>(windowingStrategy.getWindowFn());
-
-      return AssignWindowsParDoFn.of(
-          options,
-          assignFn,
-          stepName,
-          transformName,
-          executionContext,
-          addCounterMutator,
-          stateSampler);
-    }
-  }
-
-  @Override
-  protected DoFnInfo<?, ?> getDoFnInfo() {
-    return new DoFnInfo<>(fn, null);
-  }
-
-  private final AssignWindowsDoFn<?, ?> fn;
-
-  private AssignWindowsParDoFn(
-      PipelineOptions options,
-      AssignWindowsDoFn<?, ?> fn,
-      String stepName,
-      String transformName,
-      DataflowExecutionContext<?> executionContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler) {
-    super(
-        options,
-        NullSideInputReader.empty(),
-        Arrays.asList("output"),
-        stepName,
-        transformName,
-        executionContext,
-        addCounterMutator,
-        stateSampler);
-    this.fn = fn;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
deleted file mode 100644
index 2b381f15b574c..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CombineValuesFn.java
+++ /dev/null
@@ -1,289 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.getBytes;
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-
-import com.google.api.services.dataflow.model.MultiOutputInfo;
-import com.google.api.services.dataflow.model.SideInputInfo;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.PerKeyCombineFnRunner;
-import com.google.cloud.dataflow.sdk.util.PerKeyCombineFnRunners;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.common.base.Preconditions;
-
-import java.util.Arrays;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-/**
- * A {@link ParDoFn} wrapping a decoded user {@link CombineFn}.
- */
-class CombineValuesFn extends ParDoFnBase {
-  /**
-   * The optimizer may split run the user combiner in 3 separate
-   * phases (ADD, MERGE, and EXTRACT), on separate VMs, as it sees
-   * fit. The CombinerPhase dictates which DoFn is actually running in
-   * the worker.
-   */
-   // TODO: These strings are part of the service definition, and
-   // should be added into the definition of the ParDoInstruction,
-   // but the protiary definitions don't allow for enums yet.
-  public static class CombinePhase {
-    public static final String ALL = "all";
-    public static final String ADD = "add";
-    public static final String MERGE = "merge";
-    public static final String EXTRACT = "extract";
-  }
-
-  static CombineValuesFn of(
-      PipelineOptions options,
-      AppliedCombineFn<?, ?, ?, ?> combineFn,
-      String phase,
-      SideInputReader sideInputReader,
-      String stepName,
-      String transformName,
-      DataflowExecutionContext<?> executionContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler)
-      throws Exception {
-    return new CombineValuesFn(options, combineFn, phase, sideInputReader, stepName, transformName,
-        executionContext, addCounterMutator, stateSampler);
-  }
-
-  /**
-   * A {@link ParDoFnFactory} to create instances of {@link CombineValuesFn} according to
-   * specifications from the Dataflow service.
-   */
-  static final class Factory implements ParDoFnFactory {
-    @Override
-    public ParDoFn create(
-        PipelineOptions options,
-        final CloudObject cloudUserFn,
-        String stepName,
-        String transformName,
-        @Nullable List<SideInputInfo> sideInputInfos,
-        @Nullable List<MultiOutputInfo> multiOutputInfos,
-        int numOutputs,
-        DataflowExecutionContext<?> executionContext,
-        CounterSet.AddCounterMutator addCounterMutator,
-        StateSampler stateSampler)
-            throws Exception {
-
-      Preconditions.checkArgument(
-          numOutputs == 1, "expected exactly one output for CombineValuesFn");
-
-      Object deserializedFn =
-          SerializableUtils.deserializeFromByteArray(
-              getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
-              "serialized user fn");
-      Preconditions.checkArgument(deserializedFn instanceof AppliedCombineFn);
-      AppliedCombineFn<?, ?, ?, ?> combineFn = (AppliedCombineFn<?, ?, ?, ?>) deserializedFn;
-      Iterable<PCollectionView<?>> sideInputViews = combineFn.getSideInputViews();
-      final SideInputReader sideInputReader =
-          executionContext.getSideInputReader(sideInputInfos, sideInputViews);
-
-      // Get the combine phase, default to ALL. (The implementation
-      // doesn't have to split the combiner).
-      String phase = getString(cloudUserFn, PropertyNames.PHASE, CombinePhase.ALL);
-
-      return CombineValuesFn.of(
-          options,
-          combineFn,
-          phase,
-          sideInputReader,
-          stepName,
-          transformName,
-          executionContext,
-          addCounterMutator,
-          stateSampler);
-    }
-  }
-
-  @Override
-  protected DoFnInfo<?, ?> getDoFnInfo() {
-    PerKeyCombineFnRunner<?, ?, ?, ?> combineFnRunner =
-        PerKeyCombineFnRunners.create(combineFn.getFn());
-    DoFn<?, ?> doFn = null;
-    switch (phase) {
-      case CombinePhase.ALL:
-        doFn = new CombineValuesDoFn<>(combineFnRunner);
-        break;
-      case CombinePhase.ADD:
-        doFn = new AddInputsDoFn<>(combineFnRunner);
-        break;
-      case CombinePhase.MERGE:
-        doFn = new MergeAccumulatorsDoFn<>(combineFnRunner);
-        break;
-      case CombinePhase.EXTRACT:
-        doFn = new ExtractOutputDoFn<>(combineFnRunner);
-        break;
-      default:
-        throw new IllegalArgumentException(
-            "phase must be one of 'all', 'add', 'merge', 'extract'");
-    }
-
-    Coder inputCoder = null;
-    if (combineFn.getKvCoder() != null) {
-      switch (phase) {
-        case CombinePhase.ALL:
-          inputCoder = KvCoder.of(
-              combineFn.getKvCoder().getKeyCoder(),
-              IterableCoder.of(combineFn.getKvCoder().getValueCoder()));
-          break;
-        case CombinePhase.ADD:
-          inputCoder = combineFn.getKvCoder();
-          break;
-        case CombinePhase.MERGE:
-          inputCoder = KvCoder.of(
-              combineFn.getKvCoder().getKeyCoder(),
-              IterableCoder.of(combineFn.getAccumulatorCoder()));
-          break;
-        case CombinePhase.EXTRACT:
-          inputCoder =
-              KvCoder.of(combineFn.getKvCoder().getKeyCoder(), combineFn.getAccumulatorCoder());
-          break;
-      }
-    }
-    return new DoFnInfo<>(
-        doFn, combineFn.getWindowingStrategy(), combineFn.getSideInputViews(), inputCoder);
-  }
-
-  private final String phase;
-  private final AppliedCombineFn<?, ?, ?, ?> combineFn;
-
-  private CombineValuesFn(
-      PipelineOptions options,
-      AppliedCombineFn<?, ?, ?, ?> combineFn,
-      String phase,
-      SideInputReader sideInputReader,
-      String stepName,
-      String transformName,
-      DataflowExecutionContext<?> executionContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler) {
-    super(
-        options,
-        sideInputReader,
-        Arrays.asList("output"),
-        stepName,
-        transformName,
-        executionContext,
-        addCounterMutator,
-        stateSampler);
-    this.phase = phase;
-    this.combineFn = combineFn;
-  }
-
-  /**
-   * The ALL phase is the unsplit combiner, in case combiner lifting
-   * is disabled or the optimizer chose not to lift this combiner.
-   */
-  private static class CombineValuesDoFn<K, InputT, OutputT>
-      extends DoFn<KV<K, Iterable<InputT>>, KV<K, OutputT>>{
-    private final PerKeyCombineFnRunner<K, InputT, ?, OutputT> combinefnRunner;
-
-    private CombineValuesDoFn(PerKeyCombineFnRunner<K, InputT, ?, OutputT> combinefnRunner) {
-      this.combinefnRunner = combinefnRunner;
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      KV<K, Iterable<InputT>> kv = c.element();
-      K key = kv.getKey();
-
-      c.output(KV.of(key, this.combinefnRunner.apply(key, kv.getValue(), c)));
-    }
-  }
-
-  /*
-   * ADD phase: KV<K, Iterable<InputT>> -> KV<K, AccumT>.
-   */
-  private static class AddInputsDoFn<K, InputT, AccumT>
-      extends DoFn<KV<K, Iterable<InputT>>, KV<K, AccumT>>{
-    private final PerKeyCombineFnRunner<K, InputT, AccumT, ?> combinefnRunner;
-
-    private AddInputsDoFn(PerKeyCombineFnRunner<K, InputT, AccumT, ?> combinefnRunner) {
-      this.combinefnRunner = combinefnRunner;
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      KV<K, Iterable<InputT>> kv = c.element();
-      K key = kv.getKey();
-      AccumT accum = combinefnRunner.addInputs(key, kv.getValue(), c);
-      c.output(KV.of(key, accum));
-    }
-  }
-
-  /*
-   * MERGE phase: KV<K, Iterable<AccumT>> -> KV<K, AccumT>.
-   */
-  private static class MergeAccumulatorsDoFn<K, AccumT>
-      extends DoFn<KV<K, Iterable<AccumT>>, KV<K, AccumT>>{
-    private final PerKeyCombineFnRunner<K, ?, AccumT, ?> combinefnRunner;
-
-    private MergeAccumulatorsDoFn(PerKeyCombineFnRunner<K, ?, AccumT, ?> combinefnRunner) {
-      this.combinefnRunner = combinefnRunner;
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      KV<K, Iterable<AccumT>> kv = c.element();
-      K key = kv.getKey();
-      AccumT accum = this.combinefnRunner.mergeAccumulators(key, kv.getValue(), c);
-      c.output(KV.of(key, accum));
-    }
-  }
-
-  /*
-   * EXTRACT phase: KV<K, AccumT> -> KV<K, OutputT>.
-   */
-  private static class ExtractOutputDoFn<K, AccumT, OutputT>
-      extends DoFn<KV<K, AccumT>, KV<K, OutputT>>{
-    private final PerKeyCombineFnRunner<K, ?, AccumT, OutputT> combinefnRunner;
-
-    private ExtractOutputDoFn(PerKeyCombineFnRunner<K, ?, AccumT, OutputT> combinefnRunner) {
-      this.combinefnRunner = combinefnRunner;
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      KV<K, AccumT> kv = c.element();
-      K key = kv.getKey();
-      OutputT output = this.combinefnRunner.extractOutput(key, kv.getValue(), c);
-      c.output(KV.of(key, output));
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DefaultParDoFnFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DefaultParDoFnFactory.java
deleted file mode 100644
index b11d8327a5ea5..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DefaultParDoFnFactory.java
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.api.services.dataflow.model.MultiOutputInfo;
-import com.google.api.services.dataflow.model.SideInputInfo;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.common.collect.ImmutableMap;
-
-import java.util.List;
-
-/**
- * A factory that dispatches to all known factories in the Dataflow SDK based on the value of
- * {@link CloudObject#getClassName()} for the specified {@code DoFn}.
- */
-public class DefaultParDoFnFactory implements ParDoFnFactory {
-  private final ImmutableMap<String, ParDoFnFactory> defaultFactories;
-
-  public DefaultParDoFnFactory() {
-    defaultFactories = ImmutableMap.<String, ParDoFnFactory>builder()
-        .put("DoFn", new NormalParDoFn.Factory())
-        .put("CombineValuesFn", new CombineValuesFn.Factory())
-        .put("MergeBucketsDoFn", new GroupAlsoByWindowsParDoFn.Factory())
-        .put("AssignBucketsDoFn", new AssignWindowsParDoFn.Factory())
-        .put("MergeWindowsDoFn", new GroupAlsoByWindowsParDoFn.Factory())
-        .put("AssignWindowsDoFn", new AssignWindowsParDoFn.Factory())
-        .put("ReifyTimestampAndWindowsDoFn", new ReifyTimestampAndWindowsParDoFn.Factory())
-        .build();
-  }
-
-  @Override
-  public ParDoFn create(
-      PipelineOptions options,
-      CloudObject cloudUserFn,
-      String stepName,
-      String transformName,
-      List<SideInputInfo> sideInputInfos,
-      List<MultiOutputInfo> multiOutputInfos,
-      int numOutputs,
-      DataflowExecutionContext<?> executionContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler)
-          throws Exception {
-
-    String className = cloudUserFn.getClassName();
-    ParDoFnFactory factory = defaultFactories.get(className);
-
-    if (factory == null) {
-      throw new Exception("No known ParDoFnFactory for " + className);
-    }
-
-    return factory.create(
-        options,
-        cloudUserFn,
-        stepName,
-        transformName,
-        sideInputInfos,
-        multiOutputInfos,
-        numOutputs,
-        executionContext,
-        addCounterMutator,
-        stateSampler);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
deleted file mode 100644
index e8b93062b32ef..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupAlsoByWindowsParDoFn.java
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.getBytes;
-import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-
-import com.google.api.services.dataflow.model.MultiOutputInfo;
-import com.google.api.services.dataflow.model.SideInputInfo;
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.ListCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.StreamingOptions;
-import com.google.cloud.dataflow.sdk.runners.worker.CombineValuesFn.CombinePhase;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.RequiresContextInternal;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
-import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.util.Serializer;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Iterables;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-
-import javax.annotation.Nullable;
-
-/**
- * A wrapper around a GroupAlsoByWindowsDoFn.  This class is the same as
- * NormalParDoFn, except that it gets deserialized differently.
- */
-class GroupAlsoByWindowsParDoFn extends ParDoFnBase {
-
-  static GroupAlsoByWindowsParDoFn of(
-      PipelineOptions options,
-      DoFn<?, ?> groupAlsoByWindowsDoFn,
-      WindowingStrategy<?, ?> windowingStrategy,
-      String stepName,
-      String transformName,
-      DataflowExecutionContext<?> executionContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler)
-      throws Exception {
-    return new GroupAlsoByWindowsParDoFn(options, groupAlsoByWindowsDoFn, windowingStrategy,
-        stepName, transformName, executionContext, addCounterMutator, stateSampler);
-  }
-
-  /**
-   * A {@link ParDoFnFactory} to create {@link GroupAlsoByWindowsParDoFn} instances according to
-   * specifications from the Dataflow service.
-   */
-  static final class Factory implements ParDoFnFactory {
-    @Override
-    public ParDoFn create(
-        PipelineOptions options,
-        CloudObject cloudUserFn,
-        String stepName,
-        String transformName,
-        @Nullable List<SideInputInfo> sideInputInfos,
-        @Nullable List<MultiOutputInfo> multiOutputInfos,
-        int numOutputs,
-        DataflowExecutionContext<?> executionContext,
-        CounterSet.AddCounterMutator addCounterMutator,
-        StateSampler stateSampler)
-            throws Exception {
-      Object windowingStrategyObj;
-      byte[] encodedWindowingStrategy = getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN);
-      if (encodedWindowingStrategy.length == 0) {
-        windowingStrategyObj = WindowingStrategy.globalDefault();
-      } else {
-        windowingStrategyObj =
-          SerializableUtils.deserializeFromByteArray(
-              encodedWindowingStrategy, "serialized windowing strategy");
-        Preconditions.checkArgument(
-          windowingStrategyObj instanceof WindowingStrategy,
-          "unexpected kind of WindowingStrategy: " + windowingStrategyObj.getClass().getName());
-      }
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      WindowingStrategy windowingStrategy = (WindowingStrategy) windowingStrategyObj;
-
-      byte[] serializedCombineFn = getBytes(cloudUserFn, PropertyNames.COMBINE_FN, null);
-      AppliedCombineFn<?, ?, ?, ?> combineFn = null;
-      if (serializedCombineFn != null) {
-        Object combineFnObj = SerializableUtils.deserializeFromByteArray(
-            serializedCombineFn, "serialized combine fn");
-        Preconditions.checkArgument(
-            combineFnObj instanceof AppliedCombineFn,
-            "unexpected kind of AppliedCombineFn: " + combineFnObj.getClass().getName());
-        combineFn = (AppliedCombineFn<?, ?, ?, ?>) combineFnObj;
-      }
-
-      Map<String, Object> inputCoderObject = getObject(cloudUserFn, PropertyNames.INPUT_CODER);
-
-      Coder<?> inputCoder = Serializer.deserialize(inputCoderObject, Coder.class);
-      Preconditions.checkArgument(
-          inputCoder instanceof WindowedValueCoder,
-          "Expected WindowedValueCoder for inputCoder, got: " + inputCoder.getClass().getName());
-      @SuppressWarnings("unchecked")
-      WindowedValueCoder<?> windowedValueCoder = (WindowedValueCoder<?>) inputCoder;
-
-      Coder<?> elemCoder = windowedValueCoder.getValueCoder();
-      Preconditions.checkArgument(
-          elemCoder instanceof KvCoder,
-          "Expected KvCoder for inputCoder, got: " + elemCoder.getClass().getName());
-      @SuppressWarnings("unchecked")
-      KvCoder<?, ?> kvCoder = (KvCoder<?, ?>) elemCoder;
-
-      boolean isStreamingPipeline = options.as(StreamingOptions.class).isStreaming();
-
-      @Nullable AppliedCombineFn<?, ?, ?, ?> maybeMergingCombineFn = null;
-      if (combineFn != null) {
-        Preconditions.checkState(
-            !(combineFn.getFn() instanceof RequiresContextInternal),
-            "Combiner lifting is not supported for combine functions with contexts: %s",
-            combineFn.getFn().getClass().getName());
-        String phase = getString(cloudUserFn, PropertyNames.PHASE, CombinePhase.ALL);
-        Preconditions.checkArgument(
-            phase.equals(CombinePhase.ALL) || phase.equals(CombinePhase.MERGE),
-            "Unexpected phase: " + phase);
-        if (phase.equals(CombinePhase.MERGE)) {
-          maybeMergingCombineFn = makeAppliedMergingFunction(combineFn);
-        } else {
-          maybeMergingCombineFn = combineFn;
-        }
-      }
-
-      DoFn<?, ?> groupAlsoByWindowsDoFn = getGroupAlsoByWindowsDoFn(
-          isStreamingPipeline, windowingStrategy, kvCoder, maybeMergingCombineFn);
-
-      return GroupAlsoByWindowsParDoFn.of(
-          options,
-          groupAlsoByWindowsDoFn,
-          windowingStrategy,
-          stepName,
-          transformName,
-          executionContext,
-          addCounterMutator,
-          stateSampler);
-    }
-  }
-
-  @Override
-  protected DoFnInfo<?, ?> getDoFnInfo() {
-    return new DoFnInfo<>(groupAlsoByWindowsDoFn, windowingStrategy);
-  }
-
-  @SuppressWarnings({"unchecked", "rawtypes"})
-  private static DoFn<?, ?> getGroupAlsoByWindowsDoFn(
-      boolean isStreamingPipeline,
-      WindowingStrategy windowingStrategy,
-      KvCoder kvCoder,
-      @Nullable AppliedCombineFn maybeMergingCombineFn) {
-    if (isStreamingPipeline) {
-      if (maybeMergingCombineFn == null) {
-        return StreamingGroupAlsoByWindowsDoFn.createForIterable(
-            windowingStrategy, kvCoder.getValueCoder());
-      } else {
-        return StreamingGroupAlsoByWindowsDoFn.create(
-            windowingStrategy, maybeMergingCombineFn, kvCoder.getKeyCoder());
-      }
-    } else {
-      if (maybeMergingCombineFn == null) {
-        return GroupAlsoByWindowsDoFn.createForIterable(
-            windowingStrategy, kvCoder.getValueCoder());
-      } else {
-        return GroupAlsoByWindowsDoFn.create(
-            windowingStrategy, maybeMergingCombineFn, kvCoder.getKeyCoder());
-      }
-    }
-  }
-
-  private static <K, AccumT> AppliedCombineFn<K, AccumT, List<AccumT>, AccumT>
-  makeAppliedMergingFunction(AppliedCombineFn<K, ?, AccumT, ?> appliedFn) {
-    Preconditions.checkArgument(
-      !(appliedFn.getFn() instanceof RequiresContextInternal),
-      "Combiner lifting is not supported for combine functions with contexts: %s",
-      appliedFn.getFn().getClass().getName());
-    KeyedCombineFn<K, ?, AccumT, ?> keyedCombineFn =
-        ((KeyedCombineFn<K, ?, AccumT, ?>) appliedFn.getFn());
-    MergingKeyedCombineFn<K, AccumT> mergingCombineFn =
-        new MergingKeyedCombineFn<>(keyedCombineFn , appliedFn.getAccumulatorCoder());
-    return AppliedCombineFn.<K, AccumT, List<AccumT>, AccumT>withAccumulatorCoder(
-        mergingCombineFn, ListCoder.of(appliedFn.getAccumulatorCoder()));
-  }
-
-  static class MergingKeyedCombineFn<K, AccumT>
-      extends KeyedCombineFn<K, AccumT, List<AccumT>, AccumT> {
-
-    private final KeyedCombineFn<K, ?, AccumT, ?> keyedCombineFn;
-    private final Coder<AccumT> accumCoder;
-
-    MergingKeyedCombineFn(
-      KeyedCombineFn<K, ?, AccumT, ?> keyedCombineFn, Coder<AccumT> accumCoder) {
-      this.keyedCombineFn = keyedCombineFn;
-      this.accumCoder = accumCoder;
-    }
-    @Override
-    public List<AccumT> createAccumulator(K key) {
-      return new ArrayList<>();
-    }
-    @Override
-    public List<AccumT> addInput(K key, List<AccumT> accumulator, AccumT input) {
-      accumulator.add(input);
-      return accumulator;
-    }
-    @Override
-    public List<AccumT> mergeAccumulators(K key, Iterable<List<AccumT>> accumulators) {
-      return mergeToSingleton(key, Iterables.concat(accumulators));
-    }
-    @Override
-    public List<AccumT> compact(K key, List<AccumT> accumulator) {
-      return mergeToSingleton(key, accumulator);
-    }
-    @Override
-    public AccumT extractOutput(K key, List<AccumT> accumulator) {
-      if (accumulator.size() == 0) {
-        return keyedCombineFn.createAccumulator(key);
-      } else {
-        return keyedCombineFn.mergeAccumulators(key, accumulator);
-      }
-    }
-    private List<AccumT> mergeToSingleton(K key, Iterable<AccumT> accumulators) {
-      List<AccumT> singleton = new ArrayList<>();
-      singleton.add(keyedCombineFn.mergeAccumulators(key, accumulators));
-      return singleton;
-    }
-
-    @Override
-    public Coder<List<AccumT>> getAccumulatorCoder(CoderRegistry registry, Coder<K> keyCoder,
-        Coder<AccumT> inputCoder) throws CannotProvideCoderException {
-      return ListCoder.of(accumCoder);
-    }
-  }
-
-  private final DoFn<?, ?> groupAlsoByWindowsDoFn;
-  private final WindowingStrategy<?, ?> windowingStrategy;
-
-  private GroupAlsoByWindowsParDoFn(
-      PipelineOptions options,
-      DoFn<?, ?> groupAlsoByWindowsDoFn,
-      WindowingStrategy<?, ?> windowingStrategy,
-      String stepName,
-      String transformName,
-      DataflowExecutionContext<?> executionContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler) {
-    super(
-        options,
-        NullSideInputReader.empty(),
-        Arrays.asList("output"),
-        stepName,
-        transformName,
-        executionContext,
-        addCounterMutator,
-        stateSampler);
-    this.groupAlsoByWindowsDoFn = groupAlsoByWindowsDoFn;
-    this.windowingStrategy = windowingStrategy;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
deleted file mode 100644
index 094a50247b6b8..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MapTaskExecutorFactory.java
+++ /dev/null
@@ -1,555 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.getBytes;
-
-import com.google.api.services.dataflow.model.FlattenInstruction;
-import com.google.api.services.dataflow.model.InstructionInput;
-import com.google.api.services.dataflow.model.InstructionOutput;
-import com.google.api.services.dataflow.model.MapTask;
-import com.google.api.services.dataflow.model.ParDoInstruction;
-import com.google.api.services.dataflow.model.ParallelInstruction;
-import com.google.api.services.dataflow.model.PartialGroupByKeyInstruction;
-import com.google.api.services.dataflow.model.ReadInstruction;
-import com.google.api.services.dataflow.model.Source;
-import com.google.api.services.dataflow.model.WriteInstruction;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.RequiresContextInternal;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PerKeyCombineFnRunner;
-import com.google.cloud.dataflow.sdk.util.PerKeyCombineFnRunners;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.util.Serializer;
-import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservable;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-import com.google.cloud.dataflow.sdk.util.common.worker.ElementCounter;
-import com.google.cloud.dataflow.sdk.util.common.worker.FlattenOperation;
-import com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.Operation;
-import com.google.cloud.dataflow.sdk.util.common.worker.OutputReceiver;
-import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
-import com.google.cloud.dataflow.sdk.util.common.worker.ParDoOperation;
-import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation;
-import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation.GroupingKeyCreator;
-import com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation;
-import com.google.cloud.dataflow.sdk.util.common.worker.ReceivingOperation;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.util.common.worker.WriteOperation;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.base.Function;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Iterables;
-
-import org.joda.time.Instant;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-/**
- * Creates a {@link MapTaskExecutor} from a {@link MapTask} definition.
- */
-public class MapTaskExecutorFactory {
-
-  /**
-   * Creates a new {@link MapTaskExecutor} from the given {@link MapTask} definition using the
-   * provided {@link ReaderFactory}.
-   */
-  public static MapTaskExecutor create(
-      PipelineOptions options,
-      MapTask mapTask,
-      ReaderFactory readerFactory,
-      DataflowExecutionContext<?> context,
-      CounterSet counters,
-      StateSampler stateSampler)
-          throws Exception {
-
-    List<Operation> operations = new ArrayList<>();
-    String counterPrefix = stateSampler.getPrefix();
-
-    // Instantiate operations for each instruction in the graph.
-    for (ParallelInstruction instruction : mapTask.getInstructions()) {
-      operations.add(createOperation(
-          options,
-          instruction,
-          readerFactory,
-          context,
-          operations,
-          counterPrefix,
-          mapTask.getSystemName(),
-          counters.getAddCounterMutator(),
-          stateSampler));
-    }
-
-    return new MapTaskExecutor(operations, counters, stateSampler);
-
-  }
-
-  /**
-   * Creates an {@link Operation} from the given {@link ParallelInstruction} definition using the
-   * provided {@link ReaderFactory}.
-   */
-  static Operation createOperation(
-      PipelineOptions options,
-      ParallelInstruction instruction,
-      ReaderFactory readerFactory,
-      DataflowExecutionContext<?> executionContext,
-      List<Operation> priorOperations,
-      String counterPrefix,
-      String systemStageName,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler)
-          throws Exception {
-    if (instruction.getRead() != null) {
-      return createReadOperation(
-          options,
-          instruction,
-          readerFactory,
-          executionContext,
-          priorOperations,
-          counterPrefix,
-          systemStageName,
-          addCounterMutator,
-          stateSampler);
-    } else if (instruction.getWrite() != null) {
-      return createWriteOperation(options, instruction, executionContext, priorOperations,
-          counterPrefix, addCounterMutator, stateSampler);
-    } else if (instruction.getParDo() != null) {
-      return createParDoOperation(options, instruction, executionContext, priorOperations,
-          counterPrefix, addCounterMutator, stateSampler);
-    } else if (instruction.getPartialGroupByKey() != null) {
-      return createPartialGroupByKeyOperation(options, instruction, executionContext,
-          priorOperations, counterPrefix, addCounterMutator, stateSampler);
-    } else if (instruction.getFlatten() != null) {
-      return createFlattenOperation(options, instruction, executionContext, priorOperations,
-          counterPrefix, addCounterMutator, stateSampler);
-    } else {
-      throw new Exception("Unexpected instruction: " + instruction);
-    }
-  }
-
-  static ReadOperation createReadOperation(
-      PipelineOptions options,
-      ParallelInstruction instruction,
-      ReaderFactory readerFactory,
-      DataflowExecutionContext<?> executionContext,
-      @SuppressWarnings("unused") List<Operation> priorOperations,
-      String counterPrefix,
-      String systemStageName,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler)
-      throws Exception {
-    ReadInstruction read = instruction.getRead();
-
-    String operationName = instruction.getSystemName();
-    Source cloudSource = CloudSourceUtils.flattenBaseSpecs(read.getSource());
-    CloudObject sourceSpec = CloudObject.fromSpec(cloudSource.getSpec());
-    Coder<?> coder = null;
-    if (cloudSource.getCodec() != null) {
-      coder = Serializer.deserialize(cloudSource.getCodec(), Coder.class);
-    }
-    NativeReader<?> reader =
-        readerFactory.create(
-            sourceSpec, coder, options, executionContext, addCounterMutator, operationName);
-
-    OutputReceiver[] receivers =
-        createOutputReceivers(instruction, counterPrefix, addCounterMutator, stateSampler, 1);
-
-    return new ReadOperation(operationName, reader, receivers, counterPrefix, systemStageName,
-        addCounterMutator, stateSampler);
-  }
-
-  static WriteOperation createWriteOperation(PipelineOptions options,
-      ParallelInstruction instruction, ExecutionContext executionContext,
-      List<Operation> priorOperations, String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) throws Exception {
-    WriteInstruction write = instruction.getWrite();
-
-    Sink<?> sink =
-        SinkFactory.create(options, write.getSink(), executionContext, addCounterMutator);
-
-    OutputReceiver[] receivers =
-        createOutputReceivers(instruction, counterPrefix, addCounterMutator, stateSampler, 0);
-
-    WriteOperation operation = new WriteOperation(instruction.getSystemName(), sink, receivers,
-        counterPrefix, addCounterMutator, stateSampler);
-
-    attachInput(operation, write.getInput(), priorOperations);
-
-    return operation;
-  }
-
-  private static ParDoFnFactory parDoFnFactory = new DefaultParDoFnFactory();
-
-  static ParDoOperation createParDoOperation(
-      PipelineOptions options,
-      ParallelInstruction instruction,
-      DataflowExecutionContext<?> executionContext,
-      List<Operation> priorOperations,
-      String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler)
-      throws Exception {
-    ParDoInstruction parDo = instruction.getParDo();
-
-    ParDoFn fn = parDoFnFactory.create(
-        options,
-        CloudObject.fromSpec(parDo.getUserFn()),
-        instruction.getSystemName(),
-        instruction.getName(),
-        parDo.getSideInputs(),
-        parDo.getMultiOutputInfos(),
-        parDo.getNumOutputs(),
-        executionContext,
-        addCounterMutator,
-        stateSampler);
-
-    OutputReceiver[] receivers = createOutputReceivers(
-        instruction, counterPrefix, addCounterMutator, stateSampler, parDo.getNumOutputs());
-
-    ParDoOperation operation = new ParDoOperation(
-        instruction.getSystemName(), fn, receivers, counterPrefix, addCounterMutator, stateSampler);
-
-    attachInput(operation, parDo.getInput(), priorOperations);
-
-    return operation;
-  }
-
-  static PartialGroupByKeyOperation createPartialGroupByKeyOperation(
-      PipelineOptions options,
-      ParallelInstruction instruction,
-      DataflowExecutionContext<?> executionContext,
-      List<Operation> priorOperations, String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) throws Exception {
-    PartialGroupByKeyInstruction pgbk = instruction.getPartialGroupByKey();
-
-    Coder<?> windowedCoder = Serializer.deserialize(pgbk.getInputElementCodec(), Coder.class);
-    if (!(windowedCoder instanceof WindowedValueCoder)) {
-      throw new Exception(
-          "unexpected kind of input coder for PartialGroupByKeyOperation: " + windowedCoder);
-    }
-    Coder<?> elemCoder = ((WindowedValueCoder<?>) windowedCoder).getValueCoder();
-    if (!(elemCoder instanceof KvCoder)) {
-      throw new Exception(
-          "unexpected kind of input element coder for PartialGroupByKeyOperation: " + elemCoder);
-    }
-    KvCoder<?, ?> kvCoder = (KvCoder<?, ?>) elemCoder;
-    Coder<?> keyCoder = kvCoder.getKeyCoder();
-    Coder<?> valueCoder = kvCoder.getValueCoder();
-
-    OutputReceiver[] receivers =
-        createOutputReceivers(instruction, counterPrefix, addCounterMutator, stateSampler, 1);
-
-    PartialGroupByKeyOperation.Combiner<?, ?, ?, ?> valueCombiner = null;
-    PartialGroupByKeyOperation.PairInfo pairInfo = PairInfo.create();
-    if (pgbk.getValueCombiningFn() != null) {
-      Object deserializedFn = SerializableUtils.deserializeFromByteArray(
-          getBytes(CloudObject.fromSpec(pgbk.getValueCombiningFn()), PropertyNames.SERIALIZED_FN),
-          "serialized combine fn");
-      AppliedCombineFn<?, ?, ?, ?> combineFn = ((AppliedCombineFn<?, ?, ?, ?>) deserializedFn);
-
-      SideInputReader sideInputReader =
-          executionContext.getSideInputReader(pgbk.getSideInputs(), combineFn.getSideInputViews());
-      valueCombiner = new ValueCombiner<>(
-          PerKeyCombineFnRunners.create(combineFn.getFn()), sideInputReader, options);
-      if (combineFn.getFn() instanceof RequiresContextInternal) {
-        pairInfo = WindowsExpandingPairInfo.create();
-      }
-    }
-
-    PartialGroupByKeyOperation operation = new PartialGroupByKeyOperation(
-        instruction.getSystemName(),
-        new WindowingCoderGroupingKeyCreator<>(keyCoder),
-        new CoderSizeEstimator<>(WindowedValue.getValueOnlyCoder(keyCoder)),
-        new CoderSizeEstimator<>(valueCoder), 0.001 /*sizeEstimatorSampleRate*/, valueCombiner,
-        pairInfo, receivers, counterPrefix, addCounterMutator, stateSampler);
-
-    attachInput(operation, pgbk.getInput(), priorOperations);
-
-    return operation;
-  }
-
-  /**
-   * Implements PGBKOp.Combiner via Combine.KeyedCombineFn.
-   */
-  public static class ValueCombiner<K, InputT, AccumT, OutputT>
-      implements PartialGroupByKeyOperation.Combiner<WindowedValue<K>, InputT, AccumT, OutputT> {
-    private final PerKeyCombineFnRunner<K, InputT, AccumT, OutputT> combineFn;
-    private final SideInputReader sideInputReader;
-    private final PipelineOptions options;
-
-    private ValueCombiner(
-        PerKeyCombineFnRunner<K, InputT, AccumT, OutputT> combineFn,
-        SideInputReader sideInputReader,
-        PipelineOptions options) {
-      this.combineFn = combineFn;
-      this.sideInputReader = sideInputReader;
-      this.options = options;
-    }
-
-    @Override
-    public AccumT createAccumulator(WindowedValue<K> windowedKey) {
-      return this.combineFn.createAccumulator(windowedKey.getValue(),
-          options, sideInputReader, windowedKey.getWindows());
-    }
-
-    @Override
-    public AccumT add(WindowedValue<K> windowedKey, AccumT accumulator, InputT value) {
-      return this.combineFn.addInput(windowedKey.getValue(), accumulator, value,
-          options, sideInputReader, windowedKey.getWindows());
-    }
-
-    @Override
-    public AccumT merge(WindowedValue<K> windowedKey, Iterable<AccumT> accumulators) {
-      return this.combineFn.mergeAccumulators(windowedKey.getValue(), accumulators,
-          options, sideInputReader, windowedKey.getWindows());
-    }
-
-    @Override
-    public AccumT compact(WindowedValue<K> windowedKey, AccumT accumulator) {
-      return this.combineFn.compact(windowedKey.getValue(), accumulator,
-          options, sideInputReader, windowedKey.getWindows());
-    }
-
-    @Override
-    public OutputT extract(WindowedValue<K> windowedKey, AccumT accumulator) {
-      return this.combineFn.extractOutput(windowedKey.getValue(), accumulator,
-          options, sideInputReader, windowedKey.getWindows());
-    }
-  }
-
-  /**
-   * Implements PGBKOp.PairInfo via KVs.
-   */
-  public static class PairInfo implements PartialGroupByKeyOperation.PairInfo {
-    private static PairInfo theInstance = new PairInfo();
-    public static PairInfo create() {
-      return theInstance;
-    }
-    private PairInfo() {}
-    @Override
-    public Iterable<Object> getKeysFromInputPair(Object pair) {
-      @SuppressWarnings("unchecked")
-      WindowedValue<KV<?, ?>> windowedKv = (WindowedValue<KV<?, ?>>) pair;
-      return ImmutableList.<Object>of(windowedKv.withValue(windowedKv.getValue().getKey()));
-    }
-    @Override
-    public Object getValueFromInputPair(Object pair) {
-      @SuppressWarnings("unchecked")
-      WindowedValue<KV<?, ?>> windowedKv = (WindowedValue<KV<?, ?>>) pair;
-      return windowedKv.getValue().getValue();
-    }
-    @Override
-    public Object makeOutputPair(Object key, Object values) {
-      WindowedValue<?> windowedKey = (WindowedValue<?>) key;
-      return windowedKey.withValue(KV.of(windowedKey.getValue(), values));
-    }
-  }
-
-  /**
-   * Implements windows expanding PGBKOp.PairInfo via KVs.
-   */
-  public static class WindowsExpandingPairInfo implements PartialGroupByKeyOperation.PairInfo {
-    private static WindowsExpandingPairInfo theInstance = new WindowsExpandingPairInfo();
-    public static WindowsExpandingPairInfo create() {
-      return theInstance;
-    }
-    private WindowsExpandingPairInfo() {}
-    @Override
-    public Iterable<Object> getKeysFromInputPair(Object pair) {
-      @SuppressWarnings("unchecked")
-      final WindowedValue<KV<?, ?>> windowedKv = (WindowedValue<KV<?, ?>>) pair;
-      Preconditions.checkArgument(!windowedKv.getWindows().isEmpty());
-      return Iterables.transform(windowedKv.getWindows(),
-          new Function<BoundedWindow, Object>() {
-            @Override
-            public Object apply(BoundedWindow window) {
-              return WindowedValue.of(windowedKv.getValue().getKey(),
-                  windowedKv.getTimestamp(), window, windowedKv.getPane());
-            }
-      });
-    }
-    @Override
-    public Object getValueFromInputPair(Object pair) {
-      @SuppressWarnings("unchecked")
-      WindowedValue<KV<?, ?>> windowedKv = (WindowedValue<KV<?, ?>>) pair;
-      return windowedKv.getValue().getValue();
-    }
-    @Override
-    public Object makeOutputPair(Object key, Object values) {
-      WindowedValue<?> windowedKey = (WindowedValue<?>) key;
-      return windowedKey.withValue(KV.of(windowedKey.getValue(), values));
-    }
-  }
-
-  /**
-   * Implements PGBKOp.GroupingKeyCreator via Coder.
-   */
-  // TODO: Actually support window merging in the combiner table.
-  public static class WindowingCoderGroupingKeyCreator<K>
-      implements GroupingKeyCreator<WindowedValue<K>> {
-
-    private static final Instant ignored = BoundedWindow.TIMESTAMP_MIN_VALUE;
-
-    private final Coder<K> coder;
-
-    public WindowingCoderGroupingKeyCreator(Coder<K> coder) {
-      this.coder = coder;
-    }
-
-    @Override
-    public Object createGroupingKey(WindowedValue<K> key) throws Exception {
-      // Ignore timestamp for grouping purposes.
-      // The PGBK output will inherit the timestamp of one of its inputs.
-      return WindowedValue.of(
-          coder.structuralValue(key.getValue()),
-          ignored,
-          key.getWindows(),
-          key.getPane());
-    }
-  }
-
-  /**
-   * Implements PGBKOp.SizeEstimator via Coder.
-   */
-  public static class CoderSizeEstimator<T>implements PartialGroupByKeyOperation.SizeEstimator<T> {
-    final Coder<T> coder;
-
-    public CoderSizeEstimator(Coder<T> coder) {
-      this.coder = coder;
-    }
-
-    @Override
-    public long estimateSize(T value) throws Exception {
-      return CoderUtils.encodeToByteArray(coder, value).length;
-    }
-  }
-
-  static FlattenOperation createFlattenOperation(
-      @SuppressWarnings("unused") PipelineOptions options,
-      ParallelInstruction instruction,
-      @SuppressWarnings("unused") ExecutionContext executionContext,
-      List<Operation> priorOperations, String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) throws Exception {
-    FlattenInstruction flatten = instruction.getFlatten();
-
-    OutputReceiver[] receivers =
-        createOutputReceivers(instruction, counterPrefix, addCounterMutator, stateSampler, 1);
-
-    FlattenOperation operation = new FlattenOperation(
-        instruction.getSystemName(), receivers, counterPrefix, addCounterMutator, stateSampler);
-
-    for (InstructionInput input : flatten.getInputs()) {
-      attachInput(operation, input, priorOperations);
-    }
-
-    return operation;
-  }
-
-  /**
-   * Returns an array of OutputReceivers for the given
-   * ParallelInstruction definition.
-   */
-  static OutputReceiver[] createOutputReceivers(ParallelInstruction instruction,
-      @SuppressWarnings("unused") String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator,
-      @SuppressWarnings("unused") StateSampler stateSampler,
-      int expectedNumOutputs) throws Exception {
-    int numOutputs = 0;
-    if (instruction.getOutputs() != null) {
-      numOutputs = instruction.getOutputs().size();
-    }
-    if (numOutputs != expectedNumOutputs) {
-      throw new AssertionError("ParallelInstruction.Outputs has an unexpected length");
-    }
-    OutputReceiver[] receivers = new OutputReceiver[numOutputs];
-    for (int i = 0; i < numOutputs; i++) {
-      InstructionOutput cloudOutput = instruction.getOutputs().get(i);
-      receivers[i] = new OutputReceiver();
-
-      @SuppressWarnings("unchecked")
-      ElementCounter outputCounter = new DataflowOutputCounter(
-          cloudOutput.getName(),
-          new ElementByteSizeObservableCoder<>(
-              Serializer.deserialize(cloudOutput.getCodec(), Coder.class)),
-          addCounterMutator);
-      receivers[i].addOutputCounter(outputCounter);
-    }
-    return receivers;
-  }
-
-  /**
-   * Adapts a Coder to the ElementByteSizeObservable interface.
-   */
-  public static class ElementByteSizeObservableCoder<T> implements ElementByteSizeObservable<T> {
-    final Coder<T> coder;
-
-    public ElementByteSizeObservableCoder(Coder<T> coder) {
-      this.coder = coder;
-    }
-
-    @Override
-    public boolean isRegisterByteSizeObserverCheap(T value) {
-      return coder.isRegisterByteSizeObserverCheap(value, Coder.Context.OUTER);
-    }
-
-    @Override
-    public void registerByteSizeObserver(T value, ElementByteSizeObserver observer)
-        throws Exception {
-      coder.registerByteSizeObserver(value, observer, Coder.Context.OUTER);
-    }
-  }
-
-  /**
-   * Adds an input to the given Operation, coming from the given
-   * producer instruction output.
-   */
-  static void attachInput(ReceivingOperation operation, @Nullable InstructionInput input,
-      List<Operation> priorOperations) {
-    Integer producerInstructionIndex = 0;
-    Integer outputNum = 0;
-    if (input != null) {
-      if (input.getProducerInstructionIndex() != null) {
-        producerInstructionIndex = input.getProducerInstructionIndex();
-      }
-      if (input.getOutputNum() != null) {
-        outputNum = input.getOutputNum();
-      }
-    }
-    // Input id must refer to an operation that has already been seen.
-    Operation source = priorOperations.get(producerInstructionIndex);
-    operation.attachInput(source, outputNum);
-  }
-}
-
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
deleted file mode 100644
index 86d159f2817df..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/NormalParDoFn.java
+++ /dev/null
@@ -1,168 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.getBytes;
-
-import com.google.api.services.dataflow.model.MultiOutputInfo;
-import com.google.api.services.dataflow.model.SideInputInfo;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-/**
- * A wrapper around a decoded user {@link DoFn}.
- */
-class NormalParDoFn extends ParDoFnBase {
-
-  /**
-   * Create a {@link NormalParDoFn}.
-   */
-  static NormalParDoFn of(
-      PipelineOptions options,
-      DoFnInfo<?, ?> doFnInfo,
-      SideInputReader sideInputReader,
-      List<String> outputTags,
-      String stepName,
-      String transformName,
-      DataflowExecutionContext<?> executionContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler) {
-    return new NormalParDoFn(
-        options,
-        doFnInfo,
-        sideInputReader,
-        outputTags,
-        stepName,
-        transformName,
-        executionContext,
-        addCounterMutator,
-        stateSampler);
-  }
-
-  /**
-   * A {@link ParDoFnFactory} to create instances of {@link NormalParDoFn} according to
-   * specifications from the Dataflow service.
-   */
-  static final class Factory implements ParDoFnFactory {
-    @Override
-    public ParDoFn create(
-        PipelineOptions options,
-        final CloudObject cloudUserFn,
-        String stepName,
-        String transformName,
-        @Nullable List<SideInputInfo> sideInputInfos,
-        @Nullable List<MultiOutputInfo> multiOutputInfos,
-        int numOutputs,
-        DataflowExecutionContext<?> executionContext,
-        CounterSet.AddCounterMutator addCounterMutator,
-        StateSampler stateSampler)
-            throws Exception {
-      Object deserializedFnInfo =
-          SerializableUtils.deserializeFromByteArray(
-              getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN),
-              "serialized fn info");
-      if (!(deserializedFnInfo instanceof DoFnInfo)) {
-        throw new Exception(
-            "unexpected kind of DoFnInfo: " + deserializedFnInfo.getClass().getName());
-      }
-      DoFnInfo<?, ?> doFnInfo = (DoFnInfo<?, ?>) deserializedFnInfo;
-
-      Iterable<PCollectionView<?>> sideInputViews = doFnInfo.getSideInputViews();
-      SideInputReader sideInputReader =
-          executionContext.getSideInputReader(sideInputInfos, sideInputViews);
-
-      List<String> outputTags = new ArrayList<>();
-      if (multiOutputInfos != null) {
-        for (MultiOutputInfo multiOutputInfo : multiOutputInfos) {
-          outputTags.add(multiOutputInfo.getTag());
-        }
-      }
-      if (outputTags.isEmpty()) {
-        // Legacy support: assume there's a single output tag named "output".
-        // (The output tag name will be ignored, for the main output.)
-        outputTags.add("output");
-      }
-      if (numOutputs != outputTags.size()) {
-        throw new AssertionError(
-            "unexpected number of outputTags for DoFn");
-      }
-
-      return NormalParDoFn.of(
-          options,
-          doFnInfo,
-          sideInputReader,
-          outputTags,
-          stepName,
-          transformName,
-          executionContext,
-          addCounterMutator,
-          stateSampler);
-    }
-  }
-
-  private final byte[] serializedDoFn;
-  private final DoFnInfo<?, ?> doFnInfo;
-
-  private NormalParDoFn(
-      PipelineOptions options,
-      DoFnInfo<?, ?> doFnInfo,
-      SideInputReader sideInputReader,
-      List<String> outputTags,
-      String stepName,
-      String transformName,
-      ExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler) {
-    super(options, sideInputReader, outputTags, stepName, transformName, executionContext,
-        addCounterMutator, stateSampler);
-    // The userDoFn is serialized because a fresh copy is provided each time it is accessed.
-    this.serializedDoFn = SerializableUtils.serializeToByteArray(doFnInfo.getDoFn());
-    this.doFnInfo = doFnInfo;
-  }
-
-  /**
-   * Produces a fresh {@link DoFnInfo} containing the user's {@link DoFn}.
-   */
-  @Override
-  protected DoFnInfo getDoFnInfo() {
-    // This class write the serialized data in its own constructor, as a way of doing
-    // a deep copy.
-    @SuppressWarnings("unchecked")
-    DoFn<?, ?> userDoFn = (DoFn<?, ?>) SerializableUtils.deserializeFromByteArray(
-        serializedDoFn, "serialized user fun");
-    return new DoFnInfo(
-        userDoFn,
-        doFnInfo.getWindowingStrategy(),
-        doFnInfo.getSideInputViews(),
-        doFnInfo.getInputCoder());
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
deleted file mode 100644
index a1a1181498542..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnBase.java
+++ /dev/null
@@ -1,223 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.StreamingOptions;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner;
-import com.google.cloud.dataflow.sdk.util.DoFnRunners;
-import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
-import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.ElementCounter;
-import com.google.cloud.dataflow.sdk.util.common.worker.OutputReceiver;
-import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
-import com.google.cloud.dataflow.sdk.util.common.worker.Receiver;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Throwables;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import javax.annotation.Nullable;
-
-/**
- * A base class providing simple set up, processing, and tear down for a wrapped
- * {@link DoFn}.
- *
- * <p>Subclasses override just a method to provide a {@link DoFnInfo} for the
- * wrapped {@link DoFn}.
- */
-public abstract class ParDoFnBase implements ParDoFn {
-
-  private final PipelineOptions options;
-  private final SideInputReader sideInputReader;
-  private final TupleTag<Object> mainOutputTag;
-  private final List<TupleTag<?>> sideOutputTags;
-  private final String stepName;
-  private final String transformName;
-  private final ExecutionContext executionContext;
-  private final CounterSet.AddCounterMutator addCounterMutator;
-  private final StateSampler stateSampler;
-  private final boolean hasStreamingSideInput;
-
-  /** The DoFnRunner executing a batch. Null between batches. */
-  private DoFnRunner<Object, Object> fnRunner;
-  private Receiver[] receivers;
-
-  public ExecutionContext getExecutionContext() {
-    return executionContext;
-  }
-
-  /**
-   * Creates a {@link ParDoFnBase} using basic information about the step being executed.
-   */
-  protected ParDoFnBase(
-      PipelineOptions options,
-      SideInputReader sideInputReader,
-      List<String> outputTags,
-      String stepName,
-      String transformName,
-      ExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler) {
-    this.options = options;
-
-    // We vend a freshly deserialized version for each run
-    this.sideInputReader = sideInputReader;
-    Preconditions.checkArgument(
-      outputTags.size() > 0,
-      "expected at least one output");
-    this.mainOutputTag = new TupleTag<>(outputTags.get(0));
-    this.sideOutputTags = new ArrayList<>();
-    if (outputTags.size() > 1) {
-      for (String tag : outputTags.subList(1, outputTags.size())) {
-        this.sideOutputTags.add(new TupleTag<Object>(tag));
-      }
-    }
-    this.stepName = stepName;
-    this.transformName = transformName;
-    this.executionContext = executionContext;
-    this.addCounterMutator = addCounterMutator;
-    this.stateSampler = stateSampler;
-    this.hasStreamingSideInput =
-        options.as(StreamingOptions.class).isStreaming() && !sideInputReader.isEmpty();
-  }
-
-  /**
-   * Creates a fresh {@link DoFnInfo}. This will be called for each bundle.
-   */
-  protected abstract DoFnInfo<?, ?> getDoFnInfo();
-
-  @Override
-  public void startBundle(Receiver... receivers) throws Exception {
-    if (receivers.length != sideOutputTags.size() + 1) {
-      throw new AssertionError(
-          "unexpected number of receivers for DoFn");
-    }
-
-    this.receivers = receivers;
-    if (hasStreamingSideInput) {
-      // There is non-trivial setup that needs to be performed for watermark propagation
-      // even on empty bundles.
-      reallyStartBundle();
-    }
-  }
-
-  private void reallyStartBundle() throws Exception {
-    Preconditions.checkState(fnRunner == null, "bundle already started (or not properly finished)");
-    StepContext stepContext = null;
-    if (executionContext != null) {
-      stepContext = executionContext.getOrCreateStepContext(stepName, transformName, stateSampler);
-    }
-
-    @SuppressWarnings("unchecked")
-    DoFnInfo<Object, Object> doFnInfo = (DoFnInfo<Object, Object>) getDoFnInfo();
-
-    OutputManager outputManager = new OutputManager() {
-      final Map<TupleTag<?>, OutputReceiver> undeclaredOutputs = new HashMap<>();
-
-      @Nullable
-      private Receiver getReceiverOrNull(TupleTag<?> tag) {
-        if (tag.equals(mainOutputTag)) {
-          return receivers[0];
-        } else if (sideOutputTags.contains(tag)) {
-          return receivers[sideOutputTags.indexOf(tag) + 1];
-        } else {
-          return undeclaredOutputs.get(tag);
-        }
-      }
-
-      @Override
-      public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
-        Receiver receiver = getReceiverOrNull(tag);
-        if (receiver == null) {
-          // A new undeclared output.
-          // TODO: plumb through the operationName, so that we can
-          // name implicit outputs after it.
-          String outputName = "implicit-" + tag.getId();
-          // TODO: plumb through the counter prefix, so we can
-          // make it available to the OutputReceiver class in case
-          // it wants to use it in naming output counters.  (It
-          // doesn't today.)
-          OutputReceiver undeclaredReceiver = new OutputReceiver();
-          ElementCounter outputCounter = new DataflowOutputCounter(outputName, addCounterMutator);
-          undeclaredReceiver.addOutputCounter(outputCounter);
-          undeclaredOutputs.put(tag, undeclaredReceiver);
-          receiver = undeclaredReceiver;
-        }
-
-        try {
-          receiver.process(output);
-        } catch (Throwable t) {
-          throw Throwables.propagate(t);
-        }
-      }
-    };
-
-    if (hasStreamingSideInput) {
-      fnRunner = StreamingDoFnRunners.streamingSideInputRunner(
-          options,
-          doFnInfo,
-          sideInputReader,
-          outputManager,
-          mainOutputTag,
-          sideOutputTags,
-          stepContext,
-          addCounterMutator);
-    } else {
-      fnRunner = DoFnRunners.createDefault(
-          options,
-          doFnInfo.getDoFn(),
-          sideInputReader,
-          outputManager,
-          mainOutputTag,
-          sideOutputTags,
-          stepContext,
-          addCounterMutator,
-          doFnInfo.getWindowingStrategy());
-    }
-
-    fnRunner.startBundle();
-  }
-
-  @Override
-  @SuppressWarnings("unchecked")
-  public void processElement(Object elem) throws Exception {
-    if (fnRunner == null) {
-      reallyStartBundle();
-    }
-    fnRunner.processElement((WindowedValue<Object>) elem);
-  }
-
-  @Override
-  public void finishBundle() throws Exception {
-    if (fnRunner != null) {
-      fnRunner.finishBundle();
-      fnRunner = null;
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
deleted file mode 100644
index 3e9e1a6e54f23..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReifyTimestampAndWindowsParDoFn.java
+++ /dev/null
@@ -1,115 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.api.services.dataflow.model.MultiOutputInfo;
-import com.google.api.services.dataflow.model.SideInputInfo;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
-import com.google.cloud.dataflow.sdk.util.ReifyTimestampAndWindowsDoFn;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
-import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-
-import java.util.Arrays;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-/**
- * A {@link ParDoFn} wrapping a {@link ReifyTimestampAndWindowsDoFn}.
- */
-class ReifyTimestampAndWindowsParDoFn extends ParDoFnBase {
-
-  static ReifyTimestampAndWindowsParDoFn of(
-      PipelineOptions options,
-      ReifyTimestampAndWindowsDoFn<?, ?> fn,
-      String stepName,
-      String transformName,
-      DataflowExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler)
-      throws Exception {
-
-    return new ReifyTimestampAndWindowsParDoFn(
-        options, fn, stepName, transformName, executionContext, addCounterMutator, stateSampler);
-  }
-
-  /**
-   * A {@link ParDoFnFactory} to create instances of {@link ReifyTimestampAndWindowsParDoFn}
-   * according to specifications from the Dataflow service.
-   */
-  static final class Factory implements ParDoFnFactory {
-    @Override
-    public ParDoFn create(
-        PipelineOptions options,
-        final CloudObject cloudUserFn,
-        String stepName,
-        String transformName,
-        @Nullable List<SideInputInfo> sideInputInfos,
-        @Nullable List<MultiOutputInfo> multiOutputInfos,
-        int numOutputs,
-        DataflowExecutionContext<?> executionContext,
-        CounterSet.AddCounterMutator addCounterMutator,
-        StateSampler stateSampler)
-            throws Exception {
-
-      final ReifyTimestampAndWindowsDoFn<Object, Object> fn =
-          new ReifyTimestampAndWindowsDoFn<Object, Object>();
-
-      return ReifyTimestampAndWindowsParDoFn.of(
-          options,
-          fn,
-          stepName,
-          transformName,
-          executionContext,
-          addCounterMutator,
-          stateSampler);
-    }
-  }
-
-  @Override
-  protected DoFnInfo<?, ?> getDoFnInfo() {
-    return new DoFnInfo<>(fn, null);
-  }
-
-  private final ReifyTimestampAndWindowsDoFn<?, ?> fn;
-
-  private ReifyTimestampAndWindowsParDoFn(
-      PipelineOptions options,
-      ReifyTimestampAndWindowsDoFn fn,
-      String stepName,
-      String transformName,
-      ExecutionContext executionContext,
-      AddCounterMutator addCounterMutator,
-      StateSampler stateSampler) {
-    super(
-        options,
-        NullSideInputReader.empty(),
-        Arrays.asList("output"),
-        stepName,
-        transformName,
-        executionContext,
-        addCounterMutator,
-        stateSampler);
-    this.fn = fn;
-  }
-}

From 7794247e509ce8c45b0b7eec0db34d46881f07bb Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 5 Feb 2016 11:17:59 -0800
Subject: [PATCH 1393/1541] Move several operations and dependencies to worker
 module

This is for ASF Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113967733
---
 .../runners/worker/DataflowOutputCounter.java |  92 ---
 .../sdk/runners/worker/ParDoFnFactory.java    |  51 --
 .../worker/SourceOperationExecutor.java       |  80 ---
 .../SourceOperationExecutorFactory.java       |  32 -
 .../sdk/util/PerKeyCombineFnRunner.java       |  16 +-
 .../util/common/worker/FlattenOperation.java  |  59 --
 .../util/common/worker/MapTaskExecutor.java   | 129 ----
 .../worker/OutputObjectAndByteCounter.java    | 171 ------
 .../sdk/util/common/worker/ParDoFn.java       |  31 -
 .../util/common/worker/ParDoOperation.java    |  76 ---
 .../worker/PartialGroupByKeyOperation.java    | 576 ------------------
 .../common/worker/ReceivingOperation.java     |  55 --
 .../sdk/util/common/worker/WorkExecutor.java  | 109 ----
 .../common/worker/WorkProgressUpdater.java    | 257 --------
 .../util/common/worker/WriteOperation.java    | 113 ----
 15 files changed, 5 insertions(+), 1842 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowOutputCounter.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutorFactory.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperation.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounter.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperation.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowOutputCounter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowOutputCounter.java
deleted file mode 100644
index cd0323d08bce1..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowOutputCounter.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservable;
-import com.google.cloud.dataflow.sdk.util.common.worker.ElementCounter;
-import com.google.cloud.dataflow.sdk.util.common.worker.OutputObjectAndByteCounter;
-import com.google.common.annotations.VisibleForTesting;
-
-/**
- * A Dataflow-specific version of {@link ElementCounter}, which specifies
- * the object counter name differently as PhysicalElementCount.
- * Additionally, it counts element windows as ElementCount.
- */
-public class DataflowOutputCounter implements ElementCounter {
-  /** Number of physical element and multiple-window assignments that were serialized/processed. */
-  private static final String OBJECT_COUNTER_NAME = "-PhysicalElementCount";
-  /** Number of logical element and single window pairs that were processed. */
-  private static final String ELEMENT_COUNTER_NAME = "-ElementCount";
-  private static final String MEAN_BYTE_COUNTER_NAME = "-MeanByteCount";
-
-  private OutputObjectAndByteCounter objectAndByteCounter;
-  private Counter<Long> elementCount;
-
-  public DataflowOutputCounter(String outputName, CounterSet.AddCounterMutator addCounterMutator) {
-    this(outputName, null, addCounterMutator);
-  }
-
-  public DataflowOutputCounter(
-      String outputName, ElementByteSizeObservable<?> elementByteSizeObservable,
-      CounterSet.AddCounterMutator addCounterMutator) {
-    objectAndByteCounter =
-        new OutputObjectAndByteCounter(elementByteSizeObservable, addCounterMutator);
-    objectAndByteCounter.countObject(outputName + OBJECT_COUNTER_NAME);
-    objectAndByteCounter.countMeanByte(outputName + MEAN_BYTE_COUNTER_NAME);
-    elementCount =
-        addCounterMutator.addCounter(Counter.longs(outputName + ELEMENT_COUNTER_NAME, SUM));
-  }
-
-  @Override
-  public void update(Object elem) throws Exception {
-    objectAndByteCounter.update(elem);
-    long windowsSize = ((WindowedValue<?>) elem).getWindows().size();
-    if (windowsSize == 0) {
-      // GroupingShuffleReader produces ValueInEmptyWindows.
-      // For now, we count the element at least once to keep the current counter
-      // behavior.
-      elementCount.addValue(1L);
-    } else {
-      elementCount.addValue(windowsSize);
-    }
-  }
-
-  @Override
-  public void finishLazyUpdate(Object elem) {
-    objectAndByteCounter.finishLazyUpdate(elem);
-  }
-
-  @VisibleForTesting
-  static String getElementCounterName(String prefix) {
-    return prefix + ELEMENT_COUNTER_NAME;
-  }
-
-  @VisibleForTesting
-  static String getObjectCounterName(String prefix) {
-    return prefix + OBJECT_COUNTER_NAME;
-  }
-
-  @VisibleForTesting
-  static String getMeanByteCounterName(String prefix) {
-    return prefix + MEAN_BYTE_COUNTER_NAME;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
deleted file mode 100644
index cfb96645c79fd..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ParDoFnFactory.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.api.services.dataflow.model.MultiOutputInfo;
-import com.google.api.services.dataflow.model.SideInputInfo;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.ParDoFn;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-
-import java.util.List;
-
-/**
- * The interface of factories that create a worker-side {@link ParDoFn} from a {@link CloudObject}
- * specification provided by the Dataflow service.
- */
-public interface ParDoFnFactory {
-
-  /**
-   * Creates a {@link ParDoFn} from standard parameters, corresponding to the specification
-   * provided to the worker by the Dataflow service.
-   */
-  ParDoFn create(
-      PipelineOptions options,
-      CloudObject cloudUserFn,
-      String stepName,
-      String transformName,
-      List<SideInputInfo> sideInputInfos,
-      List<MultiOutputInfo> multiOutputInfos,
-      int numOutputs,
-      DataflowExecutionContext<?> executionContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler)
-      throws Exception;
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
deleted file mode 100644
index ad13e3a69a4e1..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutor.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.api.services.dataflow.model.SourceOperationRequest;
-import com.google.api.services.dataflow.model.SourceOperationResponse;
-import com.google.api.services.dataflow.model.SourceSplitRequest;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.WorkExecutor;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-
-/**
- * An executor for a source operation, defined by a {@code SourceOperationRequest}.
- */
-public class SourceOperationExecutor extends WorkExecutor {
-  private static final Logger LOG = LoggerFactory.getLogger(SourceOperationExecutor.class);
-  public static final String SPLIT_RESPONSE_TOO_LARGE_ERROR =
-      "Total size of the BoundedSource objects generated by splitIntoBundles() operation is larger"
-          + " than the allowable limit. For more information, please check the corresponding FAQ"
-          + " entry at https://cloud.google.com/dataflow/pipelines/troubleshooting-your-pipeline";
-
-  private final PipelineOptions options;
-  private final SourceOperationRequest request;
-  private SourceOperationResponse response;
-
-  public SourceOperationExecutor(PipelineOptions options,
-                                 SourceOperationRequest request,
-                                 CounterSet counters) {
-    super(counters);
-    this.options = options;
-    this.request = request;
-  }
-
-  @Override
-  public void execute() throws Exception {
-    LOG.debug("Executing source operation");
-    SourceSplitRequest split = request.getSplit();
-    if (split != null) {
-      this.response = WorkerCustomSources.performSplit(split, options);
-    } else {
-      throw new UnsupportedOperationException("Unsupported source operation request: " + request);
-    }
-    LOG.debug("Source operation execution complete");
-  }
-
-  public SourceOperationResponse getResponse() {
-    return response;
-  }
-
-  static boolean isSplitResponseTooLarge(SourceOperationResponse operationResponse) {
-    try {
-      long splitResponseSize =
-          DataflowApiUtils.computeSerializedSizeBytes(operationResponse.getSplit());
-      return splitResponseSize > WorkerCustomSources.DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES;
-    } catch (IOException e) {
-      /* Assume that the size is not too large, so that the actual API error is exposed. */
-      LOG.warn("Error determining the size of the split response.", e);
-      return false;
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutorFactory.java
deleted file mode 100644
index 6c897004d48ae..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceOperationExecutorFactory.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.api.services.dataflow.model.SourceOperationRequest;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-
-/**
- * Creates a SourceOperationExecutor from a SourceOperation.
- */
-public class SourceOperationExecutorFactory {
-  public static SourceOperationExecutor create(
-      PipelineOptions options, SourceOperationRequest request) throws Exception {
-    CounterSet counters = new CounterSet();
-    return new SourceOperationExecutor(options, request, counters);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunner.java
index 516525a79598a..b5f328f014b1a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunner.java
@@ -19,7 +19,6 @@
 import com.google.cloud.dataflow.sdk.transforms.CombineFnBase.PerKeyCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.common.worker.PartialGroupByKeyOperation;
 
 import java.io.Serializable;
 import java.util.Collection;
@@ -102,8 +101,7 @@ public AccumT mergeAccumulators(
   /////////////////////////////////////////////////////////////////////////////
 
   /**
-   * Forwards the call to a {@link PerKeyCombineFn} to create the accumulator
-   * in a {@link PartialGroupByKeyOperation}.
+   * Forwards the call to a {@link PerKeyCombineFn} to create the accumulator.
    *
    * <p>It constructs a {@code CombineWithContext.Context} from
    * {@link PipelineOptions} and {@link SideInputReader} if it is required.
@@ -112,8 +110,7 @@ public AccumT createAccumulator(K key, PipelineOptions options,
       SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows);
 
   /**
-   * Forwards the call to a {@link PerKeyCombineFn} to add the input
-   * in a {@link PartialGroupByKeyOperation}.
+   * Forwards the call to a {@link PerKeyCombineFn} to add the input.
    *
    * <p>It constructs a {@code CombineWithContext.Context} from
    * {@link PipelineOptions} and {@link SideInputReader} if it is required.
@@ -122,8 +119,7 @@ public AccumT addInput(K key, AccumT accumulator, InputT value, PipelineOptions
       SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows);
 
   /**
-   * Forwards the call to a {@link PerKeyCombineFn} to merge accumulators
-   * in a {@link PartialGroupByKeyOperation}.
+   * Forwards the call to a {@link PerKeyCombineFn} to merge accumulators.
    *
    * <p>It constructs a {@code CombineWithContext.Context} from
    * {@link PipelineOptions} and {@link SideInputReader} if it is required.
@@ -132,8 +128,7 @@ public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators, PipelineOp
       SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows);
 
   /**
-   * Forwards the call to a {@link PerKeyCombineFn} to extract the output
-   * in a {@link PartialGroupByKeyOperation}.
+   * Forwards the call to a {@link PerKeyCombineFn} to extract the output.
    *
    * <p>It constructs a {@code CombineWithContext.Context} from
    * {@link PipelineOptions} and {@link SideInputReader} if it is required.
@@ -142,8 +137,7 @@ public OutputT extractOutput(K key, AccumT accumulator, PipelineOptions options,
       SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows);
 
   /**
-   * Forwards the call to a {@link PerKeyCombineFn} to compact the accumulator
-   * in a {@link PartialGroupByKeyOperation}.
+   * Forwards the call to a {@link PerKeyCombineFn} to compact the accumulator.
    *
    * <p>It constructs a {@code CombineWithContext.Context} from
    * {@link PipelineOptions} and {@link SideInputReader} if it is required.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperation.java
deleted file mode 100644
index fb6a48f35a9d4..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/FlattenOperation.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-
-/**
- * A flatten operation.
- */
-public class FlattenOperation extends ReceivingOperation {
-  public FlattenOperation(String operationName,
-                          OutputReceiver[] receivers,
-                          String counterPrefix,
-                          CounterSet.AddCounterMutator addCounterMutator,
-                          StateSampler stateSampler) {
-    super(operationName, receivers,
-          counterPrefix, addCounterMutator, stateSampler);
-  }
-
-  /** Invoked by tests. */
-  public FlattenOperation(OutputReceiver outputReceiver,
-                          String counterPrefix,
-                          CounterSet.AddCounterMutator addCounterMutator,
-                          StateSampler stateSampler) {
-    this("FlattenOperation", new OutputReceiver[]{ outputReceiver },
-         counterPrefix, addCounterMutator, stateSampler);
-  }
-
-  @Override
-  public void process(Object elem) throws Exception {
-    try (StateSampler.ScopedState process =
-        stateSampler.scopedState(processState)) {
-      checkStarted();
-      Receiver receiver = receivers[0];
-      if (receiver != null) {
-        receiver.process(elem);
-      }
-    }
-  }
-
-  @Override
-  public boolean supportsRestart() {
-    return true;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
deleted file mode 100644
index abe2e2063e5bd..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/MapTaskExecutor.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.List;
-import java.util.ListIterator;
-
-/**
- * An executor for a map task, defined by a list of Operations.
- */
-public class MapTaskExecutor extends WorkExecutor {
-  private static final Logger LOG = LoggerFactory.getLogger(MapTaskExecutor.class);
-
-  /** The operations in the map task, in execution order. */
-  public final List<Operation> operations;
-
-  /** The StateSampler for tracking where time is being spent, or null. */
-  protected final StateSampler stateSampler;
-
-  /**
-   * Creates a new MapTaskExecutor.
-   *
-   * @param operations the operations of the map task, in order of execution
-   * @param counters a set of system counters associated with
-   * operations, which may get extended during execution
-   * @param stateSampler a state sampler for tracking where time is being spent
-   */
-  public MapTaskExecutor(
-      List<Operation> operations, CounterSet counters, StateSampler stateSampler) {
-    super(counters);
-    this.operations = operations;
-    this.stateSampler = stateSampler;
-  }
-
-  @Override
-  public void execute() throws Exception {
-    LOG.debug("Executing map task");
-
-    try (StateSampler.ScopedState state
-        = stateSampler.scopedState(
-            stateSampler.stateForName("other", StateSampler.StateKind.FRAMEWORK))) {
-      // Start operations, in reverse-execution-order, so that a
-      // consumer is started before a producer might output to it.
-      // Starting a root operation such as a ReadOperation does the work
-      // of processing the input dataset.
-      LOG.debug("Starting operations");
-      ListIterator<Operation> iterator = operations.listIterator(operations.size());
-      while (iterator.hasPrevious()) {
-        Operation op = iterator.previous();
-        op.start();
-      }
-
-      // Finish operations, in forward-execution-order, so that a
-      // producer finishes outputting to its consumers before those
-      // consumers are themselves finished.
-      LOG.debug("Finishing operations");
-      for (Operation op : operations) {
-        op.finish();
-      }
-    }
-
-    LOG.debug("Map task execution complete");
-
-    // TODO: support for success / failure ports?
-  }
-
-  @Override
-  public NativeReader.Progress getWorkerProgress() throws Exception {
-    return getReadOperation().getProgress();
-  }
-
-  @Override
-  public StateSampler.StateSamplerInfo getWorkerStateSamplerInfo() throws Exception {
-    return stateSampler == null ? null : stateSampler.getInfo();
-  }
-
-  @Override
-  public NativeReader.DynamicSplitResult requestDynamicSplit(
-      NativeReader.DynamicSplitRequest splitRequest) throws Exception {
-    return getReadOperation().requestDynamicSplit(splitRequest);
-  }
-
-  public ReadOperation getReadOperation() throws Exception {
-    if (operations == null || operations.isEmpty()) {
-      throw new IllegalStateException("Map task has no operation.");
-    }
-
-    Operation readOperation = operations.get(0);
-    if (!(readOperation instanceof ReadOperation)) {
-      throw new IllegalStateException("First operation in the map task is not a ReadOperation.");
-    }
-
-    return (ReadOperation) readOperation;
-  }
-
-  @Override
-  public void close() throws Exception {
-    stateSampler.close();
-    super.close();
-  }
-
-  public boolean supportsRestart() {
-    for (Operation op : operations) {
-      if (!op.supportsRestart()) {
-        return false;
-      }
-    }
-    return true;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounter.java
deleted file mode 100644
index 0cd7a0832b2be..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputObjectAndByteCounter.java
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservable;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-
-import java.util.Random;
-
-/**
- * An {@link ElementCounter} that counts output objects, bytes, and mean bytes.
- */
-public class OutputObjectAndByteCounter implements ElementCounter {
-  // Might be null, e.g., undeclared outputs will not have an
-  // elementByteSizeObservable.
-  private final ElementByteSizeObservable<Object> elementByteSizeObservable;
-  private final CounterSet.AddCounterMutator addCounterMutator;
-
-  private Random randomGenerator = new Random();
-
-  // Lowest sampling probability: 0.001%.
-  private static final int SAMPLING_TOKEN_UPPER_BOUND = 1000000;
-  private static final int SAMPLING_CUTOFF = 10;
-  private int samplingToken = 0;
-
-  private Counter<Long> objectCount = null;
-  private Counter<Long> byteCount = null;
-  private Counter<Long> meanByteCount = null;
-  private ElementByteSizeObserver byteCountObserver = null;
-  private ElementByteSizeObserver meanByteCountObserver = null;
-  private int samplingTokenUpperBound = SAMPLING_TOKEN_UPPER_BOUND;
-
-  @SuppressWarnings("unchecked")
-  public OutputObjectAndByteCounter(
-      ElementByteSizeObservable<?> elementByteSizeObservable,
-      CounterSet.AddCounterMutator addCounterMutator) {
-    this.elementByteSizeObservable = (ElementByteSizeObservable<Object>) elementByteSizeObservable;
-    this.addCounterMutator = addCounterMutator;
-  }
-
-  /**
-   * Count output objects.
-   */
-  public OutputObjectAndByteCounter countObject(String objectCounterName) {
-    objectCount = addCounterMutator.addCounter(Counter.longs(objectCounterName, SUM));
-    return this;
-  }
-
-  /**
-   * Count output bytes.
-   */
-  public OutputObjectAndByteCounter countBytes(String bytesCounterName) {
-    if (elementByteSizeObservable != null) {
-      byteCount = addCounterMutator.addCounter(Counter.longs(bytesCounterName, SUM));
-      byteCountObserver = new ElementByteSizeObserver(byteCount);
-    }
-    return this;
-  }
-
-  /**
-   * Count output mean byte.
-   */
-  public OutputObjectAndByteCounter countMeanByte(String meanByteCounterName) {
-    if (elementByteSizeObservable != null) {
-      meanByteCount = addCounterMutator.addCounter(Counter.longs(meanByteCounterName, MEAN));
-      meanByteCountObserver = new ElementByteSizeObserver(meanByteCount);
-    }
-    return this;
-  }
-
-  /**
-   * Sets the minimum sampling rate to the inverse of the given value.
-   */
-  public OutputObjectAndByteCounter setSamplingPeriod(int period) {
-    this.samplingTokenUpperBound = period * SAMPLING_CUTOFF;
-    return this;
-  }
-
-  public Counter<Long> getObjectCount() {
-    return objectCount;
-  }
-
-  public Counter<Long> getByteCount() {
-    return byteCount;
-  }
-
-  public Counter<Long> getMeanByteCount() {
-    return meanByteCount;
-  }
-
-  @Override
-  public void update(Object elem) throws Exception {
-    // Increment object counter.
-    if (objectCount != null) {
-      objectCount.addValue(1L);
-    }
-
-    // Increment byte counter.
-    if ((byteCountObserver != null || meanByteCountObserver != null)
-        && (sampleElement() || elementByteSizeObservable.isRegisterByteSizeObserverCheap(elem))) {
-      if (byteCountObserver != null) {
-        byteCountObserver.setScalingFactor(
-            Math.max(samplingToken, SAMPLING_CUTOFF) / (double) SAMPLING_CUTOFF);
-        elementByteSizeObservable.registerByteSizeObserver(elem, byteCountObserver);
-      }
-      if (meanByteCountObserver != null) {
-        elementByteSizeObservable.registerByteSizeObserver(elem, meanByteCountObserver);
-      }
-
-      if (byteCountObserver != null && !byteCountObserver.getIsLazy()) {
-        byteCountObserver.advance();
-      }
-      if (meanByteCountObserver != null && !meanByteCountObserver.getIsLazy()) {
-        meanByteCountObserver.advance();
-      }
-    }
-  }
-
-  @Override
-  public void finishLazyUpdate(Object elem) {
-    // Advance lazy ElementByteSizeObservers, if any.
-    // Note that user's code is allowed to store the element of one
-    // DoFn.processElement() call and access it later on. We are still
-    // calling next() here, causing an update to byteCount. If user's
-    // code really accesses more element's pieces later on, their byte
-    // count would accrue against a future element. This is not ideal,
-    // but still approximately correct.
-    if (byteCountObserver != null && byteCountObserver.getIsLazy()) {
-      byteCountObserver.advance();
-    }
-    if (meanByteCountObserver != null && meanByteCountObserver.getIsLazy()) {
-      meanByteCountObserver.advance();
-    }
-  }
-
-  protected boolean sampleElement() {
-    // Sampling probability decreases as the element count is increasing.
-    // We unconditionally sample the first samplingCutoff elements. For the
-    // next samplingCutoff elements, the sampling probability drops from 100%
-    // to 50%. The probability of sampling the Nth element is:
-    // min(1, samplingCutoff / N), with an additional lower bound of
-    // samplingCutoff / samplingTokenUpperBound. This algorithm may be refined
-    // later.
-    samplingToken = Math.min(samplingToken + 1, samplingTokenUpperBound);
-    return randomGenerator.nextInt(samplingToken) < SAMPLING_CUTOFF;
-  }
-
-  public OutputObjectAndByteCounter setRandom(Random random) {
-    this.randomGenerator = random;
-    return this;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java
deleted file mode 100644
index 828bc9e9718f9..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoFn.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-/**
- * Interface for functions invocable by {@link ParDoOperation} instances.
- *
- * <p>To easily create a {@link ParDoFn} implementation with default setup, processing,
- * and teardown, extend {@code ParDoFnBase}.
- */
-public interface ParDoFn {
-  public void startBundle(Receiver... receivers) throws Exception;
-
-  public void processElement(Object elem) throws Exception;
-
-  public void finishBundle() throws Exception;
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperation.java
deleted file mode 100644
index 2f3fa6521d5d8..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ParDoOperation.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.common.annotations.VisibleForTesting;
-
-/**
- * A ParDo mapping function.
- */
-public class ParDoOperation extends ReceivingOperation {
-  private final ParDoFn fn;
-
-  public ParDoOperation(String operationName,
-                        ParDoFn fn,
-                        OutputReceiver[] outputReceivers,
-                        String counterPrefix,
-                        CounterSet.AddCounterMutator addCounterMutator,
-                        StateSampler stateSampler) {
-    super(operationName, outputReceivers,
-          counterPrefix, addCounterMutator, stateSampler);
-    this.fn = fn;
-  }
-
-  @Override
-  public void start() throws Exception {
-    try (StateSampler.ScopedState start =
-        stateSampler.scopedState(startState)) {
-      super.start();
-      fn.startBundle(receivers);
-    }
-  }
-
-  @Override
-  public void process(Object elem) throws Exception {
-    try (StateSampler.ScopedState process =
-        stateSampler.scopedState(processState)) {
-      checkStarted();
-      fn.processElement(elem);
-    }
-  }
-
-  @Override
-  public void finish() throws Exception {
-    try (StateSampler.ScopedState finish =
-        stateSampler.scopedState(finishState)) {
-      checkStarted();
-      fn.finishBundle();
-      super.finish();
-    }
-  }
-
-  @Override
-  public boolean supportsRestart() {
-    return true;
-  }
-
-  @VisibleForTesting
-  public ParDoFn getFn() throws Exception {
-    return fn;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
deleted file mode 100644
index 7004eac287df6..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/PartialGroupByKeyOperation.java
+++ /dev/null
@@ -1,576 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-
-/**
- * A partial group-by-key operation.
- */
-public class PartialGroupByKeyOperation extends ReceivingOperation {
-  /**
-   * Provides client-specific operations for grouping keys.
-   */
-  public static interface GroupingKeyCreator<K> {
-    public Object createGroupingKey(K key) throws Exception;
-  }
-
-  /**
-   * Provides client-specific operations for size estimates.
-   */
-  public static interface SizeEstimator<T> {
-    public long estimateSize(T element) throws Exception;
-  }
-
-  /**
-   * Provides client-specific operations for working with elements
-   * that are key/value or key/values pairs.
-   */
-  public interface PairInfo {
-    public Iterable<Object> getKeysFromInputPair(Object pair);
-    public Object getValueFromInputPair(Object pair);
-    public Object makeOutputPair(Object key, Object value);
-  }
-
-  /**
-   * Provides client-specific operations for combining values.
-   */
-  public interface Combiner<K, InputT, AccumT, OutputT> {
-    public AccumT createAccumulator(K key);
-    public AccumT add(K key, AccumT accumulator, InputT value);
-    public AccumT merge(K key, Iterable<AccumT> accumulators);
-    public AccumT compact(K key, AccumT accumulator);
-    public OutputT extract(K key, AccumT accumulator);
-  }
-
-  // By default, how many bytes we allow the grouping table to consume before
-  // it has to be flushed.
-  static final long DEFAULT_MAX_GROUPING_TABLE_BYTES = 100_000_000L;
-
-  // How many bytes a word in the JVM has.
-  static final int BYTES_PER_JVM_WORD = getBytesPerJvmWord();
-
-  /**
-   * The number of bytes of overhead to store an entry in the
-   * grouping table (a {@code HashMap<StructuralByteArray, KeyAndValues>}),
-   * ignoring the actual number of bytes in the keys and values:
-   * <ul>
-   * <li> an array element (1 word),
-   * <li> a HashMap.Entry (4 words),
-   * <li> a StructuralByteArray (1 words),
-   * <li> a backing array (guessed at 1 word for the length),
-   * <li> a KeyAndValues (2 words),
-   * <li> an ArrayList (2 words),
-   * <li> a backing array (1 word),
-   * <li> per-object overhead (JVM-specific, guessed at 2 words * 6 objects).
-   * </ul>
-   */
-  static final int PER_KEY_OVERHEAD = 24 * BYTES_PER_JVM_WORD;
-
-  final GroupingTable<Object, Object, Object> groupingTable;
-
-  @SuppressWarnings("unchecked")
-  public PartialGroupByKeyOperation(
-      String operationName,
-      GroupingKeyCreator<?> groupingKeyCreator,
-      SizeEstimator<?> keySizeEstimator, SizeEstimator<?> valueSizeEstimator,
-      PairInfo pairInfo,
-      OutputReceiver[] receivers,
-      String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler) {
-    this(operationName, groupingKeyCreator, keySizeEstimator, valueSizeEstimator, null, pairInfo,
-        receivers, counterPrefix, addCounterMutator, stateSampler);
-  }
-
-  @SuppressWarnings({"rawtypes", "unchecked"})
-  public PartialGroupByKeyOperation(
-      String operationName,
-      GroupingKeyCreator<?> groupingKeyCreator,
-      SizeEstimator<?> keySizeEstimator, SizeEstimator<?> valueSizeEstimator,
-      Combiner combineFn,
-      PairInfo pairInfo,
-      OutputReceiver[] receivers,
-      String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler) {
-    super(operationName, receivers, counterPrefix, addCounterMutator, stateSampler);
-    if (combineFn == null) {
-      groupingTable = new BufferingGroupingTable(DEFAULT_MAX_GROUPING_TABLE_BYTES,
-          groupingKeyCreator, pairInfo, keySizeEstimator, valueSizeEstimator);
-    } else {
-      groupingTable = new CombiningGroupingTable(DEFAULT_MAX_GROUPING_TABLE_BYTES,
-          groupingKeyCreator, pairInfo, combineFn, keySizeEstimator, valueSizeEstimator);
-    }
-  }
-
-  @SuppressWarnings({"rawtypes", "unchecked"})
-  public PartialGroupByKeyOperation(
-      String operationName,
-      GroupingKeyCreator<?> groupingKeyCreator,
-      SizeEstimator<?> keySizeEstimator, SizeEstimator<?> valueSizeEstimator,
-      double sizeEstimatorSampleRate,
-      Combiner combineFn,
-      PairInfo pairInfo,
-      OutputReceiver[] receivers,
-      String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler) {
-    this(operationName, groupingKeyCreator,
-        new SamplingSizeEstimator(keySizeEstimator, sizeEstimatorSampleRate, 1.0),
-        new SamplingSizeEstimator(valueSizeEstimator, sizeEstimatorSampleRate, 1.0), combineFn,
-        pairInfo, receivers, counterPrefix, addCounterMutator, stateSampler);
-  }
-
-  /** Invoked by tests. */
-  public PartialGroupByKeyOperation(GroupingKeyCreator<?> groupingKeyCreator,
-      SizeEstimator<?> keySizeEstimator, SizeEstimator<?> valueSizeEstimator, PairInfo pairInfo,
-      OutputReceiver outputReceiver, String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator, StateSampler stateSampler) {
-    this(groupingKeyCreator,
-        keySizeEstimator, valueSizeEstimator, null, pairInfo,
-        outputReceiver,
-        counterPrefix,
-        addCounterMutator,
-        stateSampler);
-  }
-
-  /** Invoked by tests. */
-  @SuppressWarnings({"rawtypes"})
-  public PartialGroupByKeyOperation(
-      GroupingKeyCreator<?> groupingKeyCreator,
-      SizeEstimator<?> keySizeEstimator, SizeEstimator<?> valueSizeEstimator,
-      Combiner combineFn,
-      PairInfo pairInfo,
-      OutputReceiver outputReceiver,
-      String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler) {
-    this("PartialGroupByKeyOperation", groupingKeyCreator,
-        keySizeEstimator, valueSizeEstimator, combineFn, pairInfo,
-        new OutputReceiver[]{ outputReceiver },
-        counterPrefix,
-        addCounterMutator,
-        stateSampler);
-  }
-
-  @Override
-  public void process(Object elem) throws Exception {
-    try (StateSampler.ScopedState process =
-        stateSampler.scopedState(processState)) {
-      if (receivers[0] != null) {
-        groupingTable.put(elem, receivers[0]);
-      }
-    }
-  }
-
-  @Override
-  public void finish() throws Exception {
-    try (StateSampler.ScopedState finish =
-        stateSampler.scopedState(finishState)) {
-      checkStarted();
-      if (receivers[0] != null) {
-        groupingTable.flush(receivers[0]);
-      }
-      super.finish();
-    }
-  }
-
-  @Override
-  public boolean supportsRestart() {
-    // SizeEstimators are safe to be reused.
-    return true;
-  }
-
-  /**
-   * Sets the maximum amount of memory the grouping table is allowed to
-   * consume before it has to be flushed.
-   */
-  // @VisibleForTesting
-  public void setMaxGroupingTableBytes(long maxSize) {
-    groupingTable.maxSize = maxSize;
-  }
-
-  /**
-   * Returns the amount of memory the grouping table currently consumes.
-   */
-  // @VisibleForTesting
-  public long getGroupingTableBytes() {
-    return groupingTable.size;
-  }
-
-  /**
-   * Returns the number of bytes in a JVM word.  In case we failed to
-   * find the answer, returns 8.
-   */
-  static int getBytesPerJvmWord() {
-    String wordSizeInBits = System.getProperty("sun.arch.data.model");
-    try {
-      return Integer.parseInt(wordSizeInBits) / 8;
-    } catch (NumberFormatException e) {
-      // The JVM word size is unknown.  Assume 64-bit.
-      return 8;
-    }
-  }
-
-  private abstract static class GroupingTable<K, InputT, AccumT> {
-
-    // Keep the table relatively full to increase the chance of collisions.
-    private static final double TARGET_LOAD = 0.9;
-
-    private long maxSize;
-    private final GroupingKeyCreator<? super K> groupingKeyCreator;
-    private final PairInfo pairInfo;
-
-    private long size = 0;
-    private Map<Object, GroupingTableEntry<K, InputT, AccumT>> table;
-
-    public GroupingTable(long maxSize,
-                          GroupingKeyCreator<? super K> groupingKeyCreator,
-                          PairInfo pairInfo) {
-      this.maxSize = maxSize;
-      this.groupingKeyCreator = groupingKeyCreator;
-      this.pairInfo = pairInfo;
-      this.table = new HashMap<>();
-    }
-
-    interface GroupingTableEntry<K, InputT, AccumT> {
-      public K getKey();
-      public AccumT getValue();
-      public void add(InputT value) throws Exception;
-      public long getSize();
-      public void compact() throws Exception;
-    }
-
-    public abstract GroupingTableEntry<K, InputT, AccumT> createTableEntry(K key) throws Exception;
-
-    /**
-     * Adds a pair to this table, possibly flushing some entries to output
-     * if the table is full.
-     */
-    @SuppressWarnings("unchecked")
-    public void put(Object pair, Receiver receiver) throws Exception {
-      for (Object key : pairInfo.getKeysFromInputPair(pair)) {
-        put((K) key,
-            (InputT) pairInfo.getValueFromInputPair(pair),
-            receiver);
-      }
-    }
-
-    /**
-     * Adds the key and value to this table, possibly flushing some entries
-     * to output if the table is full.
-     */
-    public void put(K key, InputT value, Receiver receiver) throws Exception {
-      Object groupingKey = groupingKeyCreator.createGroupingKey(key);
-      GroupingTableEntry<K, InputT, AccumT> entry = table.get(groupingKey);
-      if (entry == null) {
-        entry = createTableEntry(key);
-        table.put(groupingKey, entry);
-        size += PER_KEY_OVERHEAD;
-      } else {
-        size -= entry.getSize();
-      }
-      entry.add(value);
-      size += entry.getSize();
-
-      if (size >= maxSize) {
-        long targetSize = (long) (TARGET_LOAD * maxSize);
-        Iterator<GroupingTableEntry<K, InputT, AccumT>> entries =
-            table.values().iterator();
-        while (size >= targetSize) {
-          if (!entries.hasNext()) {
-            // Should never happen, but sizes may be estimates...
-            size = 0;
-            break;
-          }
-          GroupingTableEntry<K, InputT, AccumT> toFlush = entries.next();
-          entries.remove();
-          size -= toFlush.getSize() + PER_KEY_OVERHEAD;
-          output(toFlush, receiver);
-        }
-      }
-    }
-
-    /**
-     * Output the given entry. Does not actually remove it from the table or
-     * update this table's size.
-     */
-    private void output(GroupingTableEntry<K, InputT, AccumT> entry, Receiver receiver)
-        throws Exception {
-      entry.compact();
-      receiver.process(pairInfo.makeOutputPair(entry.getKey(), entry.getValue()));
-    }
-
-    /**
-     * Flushes all entries in this table to output.
-     */
-    public void flush(Receiver output) throws Exception {
-      for (GroupingTableEntry<K, InputT, AccumT> entry : table.values()) {
-        output(entry, output);
-      }
-      table.clear();
-      size = 0;
-    }
-
-  }
-
-  /**
-   * A grouping table that simply buffers all inserted values in a list.
-   */
-  public static class BufferingGroupingTable<K, V> extends GroupingTable<K, V, List<V>> {
-
-    public final SizeEstimator<? super K> keySizer;
-    public final SizeEstimator<? super V> valueSizer;
-
-    public BufferingGroupingTable(long maxSize,
-                                  GroupingKeyCreator<? super K> groupingKeyCreator,
-                                  PairInfo pairInfo,
-                                  SizeEstimator<? super K> keySizer,
-                                  SizeEstimator<? super V> valueSizer) {
-      super(maxSize, groupingKeyCreator, pairInfo);
-      this.keySizer = keySizer;
-      this.valueSizer = valueSizer;
-    }
-
-    @Override
-    public GroupingTableEntry<K, V, List<V>> createTableEntry(final K key) throws Exception {
-      return new GroupingTableEntry<K, V, List<V>>() {
-        long size = keySizer.estimateSize(key);
-        final List<V> values = new ArrayList<>();
-
-        @Override
-        public K getKey() {
-          return key;
-        }
-
-        @Override
-        public List<V> getValue() {
-          return values;
-        }
-
-        @Override
-        public long getSize() {
-          return size;
-        }
-
-        @Override
-        public void compact() { }
-
-        @Override
-        public void add(V value) throws Exception {
-          values.add(value);
-          size += BYTES_PER_JVM_WORD + valueSizer.estimateSize(value);
-        }
-      };
-    }
-  }
-
-  /**
-   * A grouping table that uses the given combiner to combine values in place.
-   */
-  public static class CombiningGroupingTable<K, InputT, AccumT>
-      extends GroupingTable<K, InputT, AccumT> {
-
-    private final Combiner<? super K, InputT, AccumT, ?> combiner;
-    private final SizeEstimator<? super K> keySizer;
-    private final SizeEstimator<? super AccumT> accumulatorSizer;
-
-    public CombiningGroupingTable(long maxSize,
-                                  GroupingKeyCreator<? super K> groupingKeyCreator,
-                                  PairInfo pairInfo,
-                                  Combiner<? super K, InputT, AccumT, ?> combineFn,
-                                  SizeEstimator<? super K> keySizer,
-                                  SizeEstimator<? super AccumT> accumulatorSizer) {
-      super(maxSize, groupingKeyCreator, pairInfo);
-      this.combiner =  combineFn;
-      this.keySizer = keySizer;
-      this.accumulatorSizer = accumulatorSizer;
-    }
-
-    @Override
-    public GroupingTableEntry<K, InputT, AccumT> createTableEntry(final K key) throws Exception {
-      return new GroupingTableEntry<K, InputT, AccumT>() {
-        final long keySize = keySizer.estimateSize(key);
-        AccumT accumulator = combiner.createAccumulator(key);
-        long accumulatorSize = 0; // never used before a value is added...
-
-        @Override
-        public K getKey() {
-          return key;
-        }
-
-        @Override
-        public AccumT getValue() {
-          return accumulator;
-        }
-
-        @Override
-        public long getSize() {
-          return keySize + accumulatorSize;
-        }
-
-        @Override
-        public void compact() throws Exception {
-          AccumT newAccumulator = combiner.compact(key, accumulator);
-          if (newAccumulator != accumulator) {
-            accumulator = newAccumulator;
-            accumulatorSize = accumulatorSizer.estimateSize(newAccumulator);
-          }
-        }
-
-        @Override
-        public void add(InputT value) throws Exception {
-          accumulator = combiner.add(key, accumulator, value);
-          accumulatorSize = accumulatorSizer.estimateSize(accumulator);
-        }
-      };
-    }
-  }
-
-
-  ////////////////////////////////////////////////////////////////////////////
-  // Size sampling.
-
-  /**
-   * Implements size estimation by adaptively delegating to an underlying
-   * (potentially more expensive) estimator for some elements and returning
-   * the average value for others.
-   */
-  public static class SamplingSizeEstimator<T> implements SizeEstimator<T> {
-
-    /**
-     * The degree of confidence required in our expected value predictions
-     * before we allow under-sampling.
-     *
-     * <p>The value of 3.0 is a confidence interval of about 99.7% for a
-     * a high-degree-of-freedom t-distribution.
-     */
-    public static final double CONFIDENCE_INTERVAL_SIGMA = 3;
-
-    /**
-     * The desired size of our confidence interval (relative to the measured
-     * expected value).
-     *
-     * <p>The value of 0.25 is plus or minus 25%.
-     */
-    public static final double CONFIDENCE_INTERVAL_SIZE = 0.25;
-
-    /**
-     * Default number of elements that must be measured before elements are skipped.
-     */
-    public static final long DEFAULT_MIN_SAMPLED = 20;
-
-    private final SizeEstimator<T> underlying;
-    private final double minSampleRate;
-    private final double maxSampleRate;
-    private final long minSampled;
-    private final Random random;
-
-    private long totalElements = 0;
-    private long sampledElements = 0;
-    private long sampledSum = 0;
-    private double sampledSumSquares = 0;
-    private long estimate;
-
-    private long nextSample = 0;
-
-    public SamplingSizeEstimator(
-        SizeEstimator<T> underlying,
-        double minSampleRate,
-        double maxSampleRate) {
-      this(underlying, minSampleRate, maxSampleRate, DEFAULT_MIN_SAMPLED, new Random());
-    }
-
-    public SamplingSizeEstimator(SizeEstimator<T> underlying,
-                                 double minSampleRate,
-                                 double maxSampleRate,
-                                 long minSampled,
-                                 Random random) {
-      this.underlying = underlying;
-      this.minSampleRate = minSampleRate;
-      this.maxSampleRate = maxSampleRate;
-      this.minSampled = minSampled;
-      this.random = random;
-    }
-
-    @Override
-    public long estimateSize(T element) throws Exception {
-      if (sampleNow()) {
-        return recordSample(underlying.estimateSize(element));
-      } else {
-        return estimate;
-      }
-    }
-
-    private boolean sampleNow() {
-      totalElements++;
-      return --nextSample < 0;
-    }
-
-    private long recordSample(long value) {
-      sampledElements += 1;
-      sampledSum += value;
-      sampledSumSquares += value * value;
-      estimate = (long) Math.ceil(sampledSum / sampledElements);
-      long target = desiredSampleSize();
-      if (sampledElements < minSampled || sampledElements < target) {
-        // Sample immediately.
-        nextSample = 0;
-      } else {
-        double rate = cap(
-            minSampleRate,
-            maxSampleRate,
-            Math.max(1.0 / (totalElements - minSampled + 1), // slowly ramp down
-                     target / (double) totalElements));      // "future" target
-        // Uses the geometric distribution to return the likely distance between
-        // successive independent trials of a fixed probability p. This gives the
-        // same uniform distribution of branching on Math.random() < p, but with
-        // one random number generation per success rather than one
-        // per test, which can be a significant savings if p is small.
-        nextSample = rate == 1.0
-            ? 0
-            : (long) Math.floor(Math.log(random.nextDouble()) / Math.log(1 - rate));
-      }
-      return value;
-    }
-
-    private static final double cap(double min, double max, double value) {
-      return Math.min(max, Math.max(min, value));
-    }
-
-    private long desiredSampleSize() {
-      // We have no a-priori information on the actual distribution of data
-      // sizes, so compute our desired sample as if it were normal.
-      // Yes this formula is unstable for small stddev, but we only care about large stddev.
-      double mean = sampledSum / (double) sampledElements;
-      double sumSquareDiff =
-          (sampledSumSquares - (2 * mean * sampledSum) + (sampledElements * mean * mean));
-      double stddev = Math.sqrt(sumSquareDiff / (sampledElements - 1));
-      double sqrtDesiredSamples =
-          (CONFIDENCE_INTERVAL_SIGMA * stddev) / (CONFIDENCE_INTERVAL_SIZE * mean);
-      return (long) Math.ceil(sqrtDesiredSamples * sqrtDesiredSamples);
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java
deleted file mode 100644
index eb8f26cfd1669..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReceivingOperation.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-
-/**
- * The abstract base class for Operations that have inputs and
- * implement process().
- */
-public abstract class ReceivingOperation extends Operation implements Receiver {
-
-  public ReceivingOperation(String operationName,
-                            OutputReceiver[] receivers,
-                            String counterPrefix,
-                            CounterSet.AddCounterMutator addCounterMutator,
-                            StateSampler stateSampler,
-                            StateSampler.StateKind stateKind) {
-    super(operationName, receivers,
-          counterPrefix, addCounterMutator, stateSampler, stateKind);
-  }
-
-  public ReceivingOperation(String operationName,
-                            OutputReceiver[] receivers,
-                            String counterPrefix,
-                            CounterSet.AddCounterMutator addCounterMutator,
-                            StateSampler stateSampler) {
-    super(operationName, receivers,
-        counterPrefix, addCounterMutator, stateSampler);
-  }
-
-  /**
-   * Adds an input to this Operation, coming from the given
-   * output of the given source Operation.
-   */
-  public void attachInput(Operation source, int outputNum) {
-    checkUnstarted();
-    OutputReceiver fanOut = source.receivers[outputNum];
-    fanOut.addOutput(this);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
deleted file mode 100644
index 2d8dfa6971d29..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkExecutor.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.Metric;
-import com.google.cloud.dataflow.sdk.util.common.Metric.DoubleMetric;
-
-import com.sun.management.OperatingSystemMXBean;
-
-import java.lang.management.ManagementFactory;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-/**
- * Abstract executor for WorkItem tasks.
- */
-@SuppressWarnings("resource")
-public abstract class WorkExecutor implements AutoCloseable {
-  /** The output counters for this task. */
-  private final CounterSet outputCounters;
-
-  /**
-   * OperatingSystemMXBean for reporting CPU usage.
-   *
-   * <p>Uses com.sun.management.OperatingSystemMXBean instead of
-   * java.lang.management.OperatingSystemMXBean because the former supports
-   * getProcessCpuLoad().
-   */
-  private final OperatingSystemMXBean os;
-
-  /**
-   * Constructs a new WorkExecutor task.
-   */
-  public WorkExecutor(CounterSet outputCounters) {
-    this.outputCounters = outputCounters;
-    this.os = (OperatingSystemMXBean) ManagementFactory.getOperatingSystemMXBean();
-  }
-
-  /**
-   * Returns the set of output counters for this task.
-   */
-  public CounterSet getOutputCounters() {
-    return outputCounters;
-  }
-
-  /**
-   * Returns a collection of output metrics for this task.
-   */
-  public Collection<Metric<?>> getOutputMetrics() {
-    List<Metric<?>> outputMetrics = new ArrayList<>();
-    outputMetrics.add(new DoubleMetric("CPU", os.getProcessCpuLoad()));
-    // More metrics as needed.
-    return outputMetrics;
-  }
-
-  /**
-   * Executes the task.
-   */
-  public abstract void execute() throws Exception;
-
-  /**
-   * Returns the worker's current progress.
-   */
-  public NativeReader.Progress getWorkerProgress() throws Exception {
-    // By default, return null indicating worker progress not available.
-    return null;
-  }
-
-  /**
-   * See {@link NativeReader.NativeReaderIterator#requestDynamicSplit}.
-   * Makes sense only for tasks that read input.
-   */
-  public NativeReader.DynamicSplitResult requestDynamicSplit(
-      NativeReader.DynamicSplitRequest splitRequest) throws Exception {
-    // By default, dynamic splitting is unsupported.
-    return null;
-  }
-
-  /**
-   * Returns the worker's current state sampler info, or null if the
-   * state sampling is not enabled.
-   */
-  @Nullable public StateSampler.StateSamplerInfo getWorkerStateSamplerInfo() throws Exception {
-    return null;
-  }
-
-  @Override
-  public void close() throws Exception {
-    // By default, nothing to close or shut down.
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
deleted file mode 100644
index 285f03f7b8355..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WorkProgressUpdater.java
+++ /dev/null
@@ -1,257 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.concurrent.Executors;
-import java.util.concurrent.ScheduledExecutorService;
-import java.util.concurrent.TimeUnit;
-
-import javax.annotation.concurrent.NotThreadSafe;
-
-/**
- * WorkProgressUpdater allows a work executor to send work progress
- * updates to the worker service. The life-cycle of the
- * WorkProgressUpdater is controlled externally through its
- * {@link #startReportingProgress()} and
- * {@link #stopReportingProgress()} methods. The updater queries the
- * worker for progress updates and sends the updates to the worker
- * service.  The interval between two consecutive updates is
- * controlled by the worker service through reporting interval hints
- * sent back in the update response messages.  To avoid update storms
- * and monitoring staleness, the interval between two consecutive
- * updates is also bound by {@link #getMinReportingInterval} and
- * {@link #getMaxReportingInterval}.
- */
-@NotThreadSafe
-public abstract class WorkProgressUpdater {
-  private static final Logger LOG = LoggerFactory.getLogger(WorkProgressUpdater.class);
-
-  /** The default lease duration to request from the external worker service (3 minutes). */
-  public static final long DEFAULT_LEASE_DURATION_MILLIS = 3 * 60 * 1000;
-
-  /** The lease renewal RPC latency margin (5 seconds). */
-  private static final long DEFAULT_LEASE_RENEWAL_LATENCY_MARGIN = 5000;
-
-  /**
-   * The minimum period between two consecutive progress updates. Ensures the
-   * {@link WorkProgressUpdater} does not generate update storms (5 seconds).
-   */
-  private static final long DEFAULT_MIN_REPORTING_INTERVAL_MILLIS = 5000;
-
-  /**
-   * The maximum period between two consecutive progress updates. Ensures the
-   * {@link WorkProgressUpdater} does not cause monitoring staleness (10 minutes).
-   */
-  private static final long DEFAULT_MAX_REPORTING_INTERVAL_MILLIS = 10 * 60 * 1000;
-
-  /** Worker providing the work progress updates. */
-  protected final WorkExecutor worker;
-
-  /** Executor used to schedule work progress updates. */
-  private final ScheduledExecutorService executor;
-
-  /** The lease duration to request from the external worker service. */
-  protected long requestedLeaseDurationMs;
-
-  /** The time period until the next work progress update. */
-  protected long progressReportIntervalMs;
-
-  /**
-   * The {@link NativeReader.DynamicSplitResult} to report to the service in the next
-   * progress update, or {@code null} if there is nothing to report (if no dynamic split happened
-   * since the last progress update).
-   */
-  protected NativeReader.DynamicSplitResult dynamicSplitResultToReport;
-
-  public WorkProgressUpdater(WorkExecutor worker) {
-    this.worker = worker;
-    this.executor = Executors.newSingleThreadScheduledExecutor(
-        new ThreadFactoryBuilder().setDaemon(true).setNameFormat("WorkProgressUpdater-%d").build());
-  }
-
-  /**
-   * Starts sending work progress updates to the worker service.
-   */
-  public void startReportingProgress() {
-    // The initial work progress report is sent according to hints from the service if any.
-    // Otherwise the default is half-way through the lease.
-    long leaseRemainingTime = leaseRemainingTime(getWorkUnitLeaseExpirationTimestamp());
-    progressReportIntervalMs =
-        nextProgressReportInterval(
-            getWorkUnitSuggestedReportingInterval(), leaseRemainingTime);
-    requestedLeaseDurationMs = DEFAULT_LEASE_DURATION_MILLIS;
-
-    LOG.debug("Started reporting progress for work item: {}", workString());
-    scheduleNextUpdate();
-  }
-
-  /**
-   * Stops sending work progress updates to the worker service.
-   * It may throw an exception if the final progress report fails to be sent for some reason.
-   */
-  public void stopReportingProgress() throws Exception {
-    // TODO: Redesign to get rid of the executor and use a dedicated
-    // thread with a sleeper.  Also unify with success/failure reporting.
-
-    // Wait until there are no more progress updates in progress, then
-    // shut down.
-    synchronized (executor) {
-      executor.shutdownNow();
-    }
-
-    // We send a final progress report in case there was an unreported dynamic split.
-    if (dynamicSplitResultToReport != null) {
-      LOG.debug("Sending final progress update with unreported split: {} "
-          + "for work item: {}", dynamicSplitResultToReport, workString());
-      reportProgressHelper(); // This call can fail with an exception
-    }
-
-    LOG.debug("Stopped reporting progress for work item: {}", workString());
-  }
-
-  /**
-   * Computes the time before sending the next work progress update making sure
-   * that it falls between the [{@link #getMinReportingInterval},
-   * {@link #getMaxReportingInterval}] interval. Makes an attempt to bound
-   * the result by the remaining lease time, with an RPC latency margin of
-   * {@link #getLeaseRenewalLatencyMargin}.
-   *
-   * @param suggestedInterval the suggested progress report interval
-   * @param leaseRemainingTime milliseconds left before the work lease expires
-   * @return the time in milliseconds before sending the next progress update
-   */
-  protected final long nextProgressReportInterval(
-      long suggestedInterval, long leaseRemainingTime) {
-    // Try to send the next progress update before the next lease expiration
-    // allowing some RPC latency margin.
-    suggestedInterval =
-        Math.min(suggestedInterval, leaseRemainingTime - getLeaseRenewalLatencyMargin());
-
-    // Bound reporting interval to avoid staleness and progress update storms.
-    return Math.min(
-        Math.max(getMinReportingInterval(), suggestedInterval), getMaxReportingInterval());
-  }
-
-  /**
-   * Schedules the next work progress update.
-   */
-  private void scheduleNextUpdate() {
-    if (executor.isShutdown()) {
-      return;
-    }
-    executor.schedule(new Runnable() {
-      @Override
-      public void run() {
-        // Don't shut down while reporting progress.
-        synchronized (executor) {
-          if (executor.isShutdown()) {
-            return;
-          }
-          reportProgress();
-        }
-      }
-    },
-        progressReportIntervalMs, TimeUnit.MILLISECONDS);
-    LOG.debug("Next work progress update for work item {} scheduled to occur in {} ms.",
-        workString(), progressReportIntervalMs);
-  }
-
-  /**
-   * Reports the current work progress to the worker service.
-   */
-  private void reportProgress() {
-    LOG.debug("Updating progress on work item {}", workString());
-    try {
-      reportProgressHelper();
-    } catch (Throwable e) {
-      LOG.warn("Error reporting workitem progress update to Dataflow service: ", e);
-    } finally {
-      scheduleNextUpdate();
-    }
-  }
-
-  /**
-   * Computes the amount of time left, in milliseconds, before a lease
-   * with the specified expiration timestamp expires.  Returns zero if
-   * the lease has already expired.
-   */
-  protected long leaseRemainingTime(long leaseExpirationTimestamp) {
-    long now = System.currentTimeMillis();
-    if (leaseExpirationTimestamp < now) {
-      LOG.debug("Lease remaining time for {} is 0 ms.", workString());
-      return 0;
-    }
-    LOG.debug(
-        "Lease remaining time for {} is {} ms.", workString(), leaseExpirationTimestamp - now);
-    return leaseExpirationTimestamp - now;
-  }
-
-  // Visible for testing.
-  public NativeReader.DynamicSplitResult getDynamicSplitResultToReport() {
-    return dynamicSplitResultToReport;
-  }
-
-  /**
-   * Reports the current work progress to the worker service.
-   */
-  protected abstract void reportProgressHelper() throws Exception;
-
-  /**
-   * Returns the current work item's lease expiration timestamp.
-   */
-  protected abstract long getWorkUnitLeaseExpirationTimestamp();
-
-  /**
-   * Returns the current work item's suggested progress reporting interval.
-   */
-  protected long getWorkUnitSuggestedReportingInterval() {
-    return leaseRemainingTime(getWorkUnitLeaseExpirationTimestamp()) / 2;
-  }
-
-  /**
-   * Returns the minimum allowed time between two periodic progress updates.
-   */
-  protected long getMinReportingInterval() {
-    return DEFAULT_MIN_REPORTING_INTERVAL_MILLIS;
-  }
-
-  /**
-   * Returns the maximum allowed time between two periodic progress updates.
-   */
-  protected long getMaxReportingInterval() {
-    return DEFAULT_MAX_REPORTING_INTERVAL_MILLIS;
-  }
-
-  /**
-   * Returns the maximum allowed time between a periodic progress update and the moment
-   * the current lease expires.
-   */
-  protected long getLeaseRenewalLatencyMargin() {
-    return DEFAULT_LEASE_RENEWAL_LATENCY_MARGIN;
-  }
-
-  /**
-   * Returns a string representation of the work item whose progress
-   * is being updated, for use in logging messages.
-   */
-  protected abstract String workString();
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
deleted file mode 100644
index 31ed07d5fcdc8..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/WriteOperation.java
+++ /dev/null
@@ -1,113 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-
-/**
- * A write operation.
- */
-public class WriteOperation extends ReceivingOperation {
-  /**
-   * The Sink this operation writes to.
-   */
-  public final Sink<?> sink;
-
-  /**
-   * The total byte counter for all data written by this operation.
-   */
-  final Counter<Long> byteCount;
-
-  /**
-   * The Sink's writer this operation writes to, created by start().
-   */
-  Sink.SinkWriter<Object> writer;
-
-  public WriteOperation(String operationName,
-                        Sink<?> sink,
-                        OutputReceiver[] receivers,
-                        String counterPrefix,
-                        CounterSet.AddCounterMutator addCounterMutator,
-                        StateSampler stateSampler) {
-    super(operationName, receivers,
-          counterPrefix, addCounterMutator, stateSampler, sink.getStateSamplerStateKind());
-    this.sink = sink;
-    this.byteCount = addCounterMutator.addCounter(
-        Counter.longs(bytesCounterName(counterPrefix, operationName), SUM));
-  }
-
-  /** Invoked by tests. */
-  public WriteOperation(Sink<?> sink,
-                        String counterPrefix,
-                        CounterSet.AddCounterMutator addCounterMutator,
-                        StateSampler stateSampler) {
-    this("WriteOperation", sink, new OutputReceiver[]{ },
-         counterPrefix, addCounterMutator, stateSampler);
-  }
-
-  protected String bytesCounterName(String counterPrefix,
-                                    String operationName) {
-    return operationName + "-ByteCount";
-  }
-
-  public Sink<?> getSink() {
-    return sink;
-  }
-
-  @Override
-  public void start() throws Exception {
-    try (StateSampler.ScopedState start =
-        stateSampler.scopedState(startState)) {
-      assert start != null;
-      super.start();
-      writer = (Sink.SinkWriter<Object>) sink.writer();
-    }
-  }
-
-  @Override
-  public void process(Object outputElem) throws Exception {
-    try (StateSampler.ScopedState process =
-        stateSampler.scopedState(processState)) {
-      assert process != null;
-      checkStarted();
-      byteCount.addValue(writer.add(outputElem));
-    }
-  }
-
-  @Override
-  public void finish() throws Exception {
-    try (StateSampler.ScopedState finish =
-        stateSampler.scopedState(finishState)) {
-      assert finish != null;
-      checkStarted();
-      writer.close();
-      super.finish();
-    }
-  }
-
-  @Override
-  public boolean supportsRestart() {
-    return sink.supportsRestart();
-  }
-
-  public Counter<Long> getByteCount() {
-    return byteCount;
-  }
-}

From 085469d0fbadada98afe0d04c25f3e09d6fa409c Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Fri, 5 Feb 2016 11:22:34 -0800
Subject: [PATCH 1394/1541] Split InMemoryBundle into Read/Write only
 interfaces

This allows transform evaluators to write to a bundle but provide a
single immutable object representing that bundle after processing
completes, as well as deferring setting bundle attributes until the
bundle has been completed, rather than when it starts.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113968310
---
 .../inprocess/InProcessPipelineRunner.java    |  74 +++++--
 .../inprocess/InProcessTransformResult.java   |   7 +-
 .../runners/inprocess/TransformEvaluator.java |   6 +-
 .../inprocess/TransformEvaluatorFactory.java  |   4 +-
 .../BoundedReadEvaluatorFactory.java          |   7 +-
 .../evaluator/FlattenEvaluatorFactory.java    |  14 +-
 .../evaluator/GroupByKeyEvaluatorFactory.java |  15 +-
 .../evaluator/ParDoInProcessEvaluator.java    |  14 +-
 .../evaluator/ParDoMultiEvaluatorFactory.java |  13 +-
 .../ParDoSingleEvaluatorFactory.java          |  32 +--
 .../UnboundedReadEvaluatorFactory.java        |   7 +-
 .../evaluator/ViewEvaluatorFactory.java       |   2 +-
 .../ImmutableInProcessTransformResult.java    |  20 +-
 .../util/InMemoryWatermarkManager.java        |  21 +-
 .../inprocess/util/InProcessBundle.java       |  83 ++++---
 .../util/InProcessBundleOutputManager.java    |  51 +++++
 .../cloud/dataflow/sdk/util/TimeDomain.java   |   2 +-
 .../BoundedReadEvaluatorFactoryTest.java      |  20 +-
 .../FlattenEvaluatorFactoryTest.java          |  32 +--
 .../GroupByKeyEvaluatorFactoryTest.java       |  48 ++---
 .../ParDoMultiEvaluatorFactoryTest.java       |  26 ++-
 .../ParDoSingleEvaluatorFactoryTest.java      |  14 +-
 .../UnboundedReadEvaluatorFactoryTest.java    |  18 +-
 .../evaluator/ViewEvaluatorFactoryTest.java   |   5 +-
 .../util/InMemoryWatermarkManagerTest.java    | 204 +++++++++---------
 .../inprocess/util/InProcessBundleTest.java   |  53 ++++-
 26 files changed, 487 insertions(+), 305 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundleOutputManager.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java
index 399d1c85a2ad6..10ac123143dfb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java
@@ -23,9 +23,11 @@
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.BaseExecutionContext;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
 import com.google.cloud.dataflow.sdk.util.TimerInternals;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
@@ -33,6 +35,8 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 
+import org.joda.time.Instant;
+
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
@@ -61,17 +65,37 @@ public class InProcessPipelineRunner {
   /**
    * Part of a {@link PCollection}. Elements are output to a bundle, which will cause them to be
    * executed by {@link PTransform PTransforms} that consume the {@link PCollection} this bundle is
-   * a part of at a later point.
-   * @param <T>
+   * a part of at a later point. This is an uncommitted bundle and can have elements added to it.
+   *
+   * @param <T> the type of elements that can be added to this bundle
    */
-  public static interface Bundle<T> {
+  public static interface UncommittedBundle<T> {
     /**
      * Outputs an element to this bundle.
      *
      * @param element the element to add to this bundle
      * @return this bundle
      */
-    Bundle<T> add(WindowedValue<T> element);
+    UncommittedBundle<T> add(WindowedValue<T> element);
+
+    /**
+     * Commits this {@link UncommittedBundle}, returning an immutable {@link CommittedBundle}
+     * containing all of the elements that were added to it. The {@link #add(WindowedValue)} method
+     * will throw an {@link IllegalStateException} if called after a call to commit.
+     * @param synchronizedProcessingTime the synchronized processing time at which this bundle was
+     *                                   committed
+     */
+    CommittedBundle<T> commit(Instant synchronizedProcessingTime);
+  }
+
+  /**
+   * Part of a {@link PCollection}. Elements are output to an {@link UncommittedBundle}, which will
+   * eventually committed. Committed elements are executed by the {@link PTransform PTransforms}
+   * that consume the {@link PCollection} this bundle is
+   * a part of at a later point.
+   * @param <T> the type of elements contained within this bundle
+   */
+  public static interface CommittedBundle<T> {
 
     /**
      * @return the PCollection that the elements of this bundle belong to
@@ -92,9 +116,20 @@ public static interface Bundle<T> {
 
     /**
      * @return an {@link Iterable} containing all of the elements that have been added to this
-     *         {@link Bundle}
+     *         {@link CommittedBundle}
      */
     Iterable<WindowedValue<T>> getElements();
+
+    /**
+     * Returns the processing time output watermark at the time the producing {@link PTransform}
+     * committed this bundle. Downstream synchronized processing time watermarks cannot progress
+     * past this point before consuming this bundle.
+     *
+     * <p>This value is no greater than the earliest incomplete processing time or synchronized
+     * processing time {@link TimerData timer} at the time this bundle was committed, including any
+     * timers that fired to produce this bundle.
+     */
+    Instant getSynchronizedProcessingOutputWatermark();
   }
 
   /**
@@ -114,20 +149,19 @@ public static interface PCollectionViewWriter<ElemT, ViewT> {
    * thread that requires it.
    */
   public static class InProcessExecutionContext
-      extends com.google.cloud.dataflow.sdk.util.BaseExecutionContext<InProcessExecutionContext
-              .InMemoryStepContext> {
+      extends BaseExecutionContext<InProcessExecutionContext.InProcessStepContext> {
     @Override
-    protected InMemoryStepContext createStepContext(
+    protected InProcessStepContext createStepContext(
         String stepName, String transformName, StateSampler stateSampler) {
-      return new InMemoryStepContext(this, stepName, transformName);
+      return new InProcessStepContext(this, stepName, transformName);
     }
 
     /**
      * Step Context for the InMemoryPipelineRunner.
      */
-    public class InMemoryStepContext
+    public class InProcessStepContext
         extends com.google.cloud.dataflow.sdk.util.BaseExecutionContext.StepContext {
-      public InMemoryStepContext(
+      public InProcessStepContext(
           InProcessExecutionContext executionContext, String stepName, String transformName) {
         super(executionContext, stepName, transformName);
       }
@@ -154,20 +188,22 @@ public TimerInternals timerInternals() {
    */
   public static interface InProcessEvaluationContext {
     /**
-     * Create a bundle for use by a source.
+     * Create a {@link UncommittedBundle} for use by a source.
      */
-    <T> Bundle<T> createRootBundle(PCollection<T> output);
+    <T> UncommittedBundle<T> createRootBundle(PCollection<T> output);
 
     /**
-     * Create a {@link Bundle} whose elements belong to the specified {@link PCollection}.
+     * Create a {@link UncommittedBundle} whose elements belong to the specified {@link
+     * PCollection}.
      */
-    <T> Bundle<T> createBundle(Bundle<?> input, PCollection<T> output);
+    <T> UncommittedBundle<T> createBundle(CommittedBundle<?> input, PCollection<T> output);
 
     /**
-     * Create a {@link Bundle} with the specified keys at the specified step. For use by
+     * Create a {@link UncommittedBundle} with the specified keys at the specified step. For use by
      * {@link GroupByKeyOnly} {@link PTransform PTransforms}.
      */
-    <T> Bundle<T> createKeyedBundle(Bundle<?> input, Object key, PCollection<T> output);
+    <T> UncommittedBundle<T> createKeyedBundle(
+        CommittedBundle<?> input, Object key, PCollection<T> output);
 
     /**
      * Create a bundle whose elements will be used in a PCollectionView.
@@ -229,12 +265,12 @@ public static interface InProcessExecutor {
      * @param consumer the {@link AppliedPTransform} to schedule
      * @param bundle the input bundle to the consumer
      */
-    void scheduleConsumption(AppliedPTransform<?, ?, ?> consumer, Bundle<?> bundle);
+    void scheduleConsumption(AppliedPTransform<?, ?, ?> consumer, CommittedBundle<?> bundle);
 
     /**
      * Blocks until the job being executed enters a terminal state. A job is completed after all
      * root {@link AppliedPTransform AppliedPTransforms} have completed, and all
-     * {@link Bundle Bundles} have been consumed. Jobs may also terminate abnormally.
+     * {@link CommittedBundle Bundles} have been consumed. Jobs may also terminate abnormally.
      */
     void awaitCompletion();
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTransformResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTransformResult.java
index aad0c7613fcd1..c44f77c8633c6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTransformResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTransformResult.java
@@ -15,7 +15,7 @@
  */
 package com.google.cloud.dataflow.sdk.runners.inprocess;
 
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -34,9 +34,10 @@ public interface InProcessTransformResult {
   AppliedPTransform<?, ?, ?> getTransform();
 
   /**
-   * @return the {@link Bundle Bundles} produced by this transform
+   * Returns the {@link UncommittedBundle (uncommitted) Bundles} output by this transform. These
+   * will be committed by the evaluation context as part of completing this result.
    */
-  Iterable<? extends Bundle<?>> getBundles();
+  Iterable<? extends UncommittedBundle<?>> getOutputBundles();
 
   /**
    * @return the {@link CounterSet} used by this {@link PTransform}, or null if this transform did
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluator.java
index c8bccb3596f7b..270557d55c111 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluator.java
@@ -15,18 +15,18 @@
  */
 package com.google.cloud.dataflow.sdk.runners.inprocess;
 
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 
 /**
  * An evaluator of a specific application of a transform. Will be used for at least one
- * {@link Bundle}.
+ * {@link CommittedBundle}.
  *
  * @param <InputT> the type of elements that will be passed to {@link #processElement}
  */
 public interface TransformEvaluator<InputT> {
   /**
-   * Process an element in the input {@link Bundle}.
+   * Process an element in the input {@link CommittedBundle}.
    *
    * @param element the element to process
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluatorFactory.java
index 25cb6cc30c612..7941c1651b73f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluatorFactory.java
@@ -15,7 +15,7 @@
  */
 package com.google.cloud.dataflow.sdk.runners.inprocess;
 
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
@@ -36,6 +36,6 @@ public interface TransformEvaluatorFactory {
    * made available to the caller.
    */
   <InputT> TransformEvaluator<InputT> forApplication(
-      AppliedPTransform<?, ?, ?> application, @Nullable Bundle<?> inputBundle,
+      AppliedPTransform<?, ?, ?> application, @Nullable CommittedBundle<?> inputBundle,
       InProcessEvaluationContext evaluationContext);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactory.java
index bffaeea9743c8..7679fa78acc67 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactory.java
@@ -17,8 +17,9 @@
 
 import com.google.cloud.dataflow.sdk.io.Read.Bounded;
 import com.google.cloud.dataflow.sdk.io.Source.Reader;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
@@ -52,7 +53,7 @@ public class BoundedReadEvaluatorFactory implements TransformEvaluatorFactory {
   @Override
   public <InputT> TransformEvaluator<InputT> forApplication(
       AppliedPTransform<?, ?, ?> application,
-      @Nullable Bundle<?> inputBundle,
+      @Nullable CommittedBundle<?> inputBundle,
       InProcessEvaluationContext evaluationContext) {
     return getTransformEvaluator((AppliedPTransform) application, evaluationContext);
   }
@@ -97,7 +98,7 @@ public void processElement(WindowedValue<Object> element) {}
 
     @Override
     public InProcessTransformResult finishBundle() throws IOException {
-      Bundle<OutputT> output = evaluationContext.createRootBundle(transform.getOutput());
+      UncommittedBundle<OutputT> output = evaluationContext.createRootBundle(transform.getOutput());
       while (contentsRemaining) {
         output.add(
             WindowedValue.timestampedValueInGlobalWindow(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactory.java
index 61ccc5e152bf1..29910b29b36dc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactory.java
@@ -16,8 +16,9 @@
 package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
 
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
@@ -39,7 +40,7 @@ public class FlattenEvaluatorFactory implements TransformEvaluatorFactory {
   @Override
   public <InputT> TransformEvaluator<InputT> forApplication(
       AppliedPTransform<?, ?, ?> application,
-      Bundle<?> inputBundle,
+      CommittedBundle<?> inputBundle,
       InProcessEvaluationContext evaluationContext) {
     return createInMemoryEvaluator((AppliedPTransform) application, inputBundle, evaluationContext);
   }
@@ -48,9 +49,9 @@ private <InputT> TransformEvaluator<InputT> createInMemoryEvaluator(
       final AppliedPTransform<
               PCollectionList<InputT>, PCollection<InputT>, FlattenPCollectionList<InputT>>
           application,
-      final Bundle<InputT> inputBundle,
+      final CommittedBundle<InputT> inputBundle,
       final InProcessEvaluationContext evaluationContext) {
-    final Bundle<InputT> outputBundle =
+    final UncommittedBundle<InputT> outputBundle =
         evaluationContext.createBundle(inputBundle, application.getOutput());
     final InProcessTransformResult result =
         ImmutableInProcessTransformResult.withoutHold(application).addOutput(outputBundle).build();
@@ -58,10 +59,11 @@ private <InputT> TransformEvaluator<InputT> createInMemoryEvaluator(
   }
 
   private static class FlattenEvaluator<InputT> implements TransformEvaluator<InputT> {
-    private final Bundle<InputT> outputBundle;
+    private final UncommittedBundle<InputT> outputBundle;
     private final InProcessTransformResult result;
 
-    public FlattenEvaluator(Bundle<InputT> outputBundle, InProcessTransformResult result) {
+    public FlattenEvaluator(
+        UncommittedBundle<InputT> outputBundle, InProcessTransformResult result) {
       this.outputBundle = outputBundle;
       this.result = result;
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactory.java
index f5cf86186fe17..7598b1e495f32 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactory.java
@@ -21,8 +21,9 @@
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
@@ -51,17 +52,17 @@ public class GroupByKeyEvaluatorFactory implements TransformEvaluatorFactory {
   @SuppressWarnings({"unchecked", "rawtypes"})
   public <InputT> TransformEvaluator<InputT> forApplication(
       AppliedPTransform<?, ?, ?> application,
-      Bundle<?> inputBundle,
+      CommittedBundle<?> inputBundle,
       InProcessEvaluationContext evaluationContext) {
     return createEvaluator(
-        (AppliedPTransform) application, (Bundle) inputBundle, evaluationContext);
+        (AppliedPTransform) application, (CommittedBundle) inputBundle, evaluationContext);
   }
 
   private <K, V> TransformEvaluator<KV<K, V>> createEvaluator(
       final AppliedPTransform<
               PCollection<KV<K, V>>, PCollection<KV<K, Iterable<V>>>, GroupByKeyOnly<K, V>>
           application,
-      final Bundle<KV<K, V>> inputBundle,
+      final CommittedBundle<KV<K, V>> inputBundle,
       final InProcessEvaluationContext evaluationContext) {
     return new GroupByKeyEvaluator<K, V>(evaluationContext, inputBundle, application);
   }
@@ -69,7 +70,7 @@ private <K, V> TransformEvaluator<KV<K, V>> createEvaluator(
   private static class GroupByKeyEvaluator<K, V> implements TransformEvaluator<KV<K, V>> {
     private final InProcessEvaluationContext evaluationContext;
 
-    private final Bundle<KV<K, V>> inputBundle;
+    private final CommittedBundle<KV<K, V>> inputBundle;
     private final AppliedPTransform<
             PCollection<KV<K, V>>, PCollection<KV<K, Iterable<V>>>, GroupByKeyOnly<K, V>>
         application;
@@ -78,7 +79,7 @@ private static class GroupByKeyEvaluator<K, V> implements TransformEvaluator<KV<
 
     public GroupByKeyEvaluator(
         InProcessEvaluationContext evaluationContext,
-        Bundle<KV<K, V>> inputBundle,
+        CommittedBundle<KV<K, V>> inputBundle,
         AppliedPTransform<
                 PCollection<KV<K, V>>, PCollection<KV<K, Iterable<V>>>, GroupByKeyOnly<K, V>>
             application) {
@@ -128,7 +129,7 @@ public InProcessTransformResult finishBundle() {
       for (Map.Entry<GroupingKey<K>, List<V>> groupedEntry : groupingMap.entrySet()) {
         K key = groupedEntry.getKey().key;
         KV<K, Iterable<V>> groupedKv = KV.<K, Iterable<V>>of(key, groupedEntry.getValue());
-        Bundle<KV<K, Iterable<V>>> bundle =
+        UncommittedBundle<KV<K, Iterable<V>>> bundle =
             evaluationContext.createKeyedBundle(inputBundle, key, application.getOutput());
         bundle.add(WindowedValue.valueInEmptyWindows(groupedKv));
         resultBuilder.addOutput(bundle);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoInProcessEvaluator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoInProcessEvaluator.java
index 86cc89ce4cfed..797a175f4d316 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoInProcessEvaluator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoInProcessEvaluator.java
@@ -15,7 +15,7 @@
  */
 package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
 
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
 import com.google.cloud.dataflow.sdk.runners.inprocess.util.ImmutableInProcessTransformResult;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
@@ -36,11 +36,11 @@ class ParDoInProcessEvaluator<T> {
   private final DoFnRunner<T, ?> fnRunner;
   private final AppliedPTransform<PCollection<T>, ?, ?> transform;
   private final CounterSet counters;
-  private final Collection<Bundle<?>> outputBundles;
+  private final Collection<UncommittedBundle<?>> outputBundles;
 
   public ParDoInProcessEvaluator(DoFnRunner<T, ?> fnRunner,
       AppliedPTransform<PCollection<T>, ?, ?> transform, CounterSet counters,
-      Collection<Bundle<?>> outputBundles) {
+      Collection<UncommittedBundle<?>> outputBundles) {
     this.fnRunner = fnRunner;
     this.transform = transform;
     this.counters = counters;
@@ -62,13 +62,13 @@ public InProcessTransformResult finishBundle() {
   }
 
   static class BundleOutputManager implements OutputManager {
-    private final Map<TupleTag<?>, Bundle<?>> bundles;
+    private final Map<TupleTag<?>, UncommittedBundle<?>> bundles;
 
-    public static BundleOutputManager create(Map<TupleTag<?>, Bundle<?>> outputBundles) {
+    public static BundleOutputManager create(Map<TupleTag<?>, UncommittedBundle<?>> outputBundles) {
       return new BundleOutputManager(outputBundles);
     }
 
-    public BundleOutputManager(Map<TupleTag<?>, Bundle<?>> bundles) {
+    private BundleOutputManager(Map<TupleTag<?>, UncommittedBundle<?>> bundles) {
       this.bundles = bundles;
     }
 
@@ -76,7 +76,7 @@ public BundleOutputManager(Map<TupleTag<?>, Bundle<?>> bundles) {
     @Override
     public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
       @SuppressWarnings("rawtypes")
-      Bundle bundle = bundles.get(tag);
+      UncommittedBundle bundle = bundles.get(tag);
       bundle.add(output);
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactory.java
index 3b2309e1c4760..c6ee1345811bb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactory.java
@@ -16,10 +16,11 @@
 package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
 
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext.InMemoryStepContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext.InProcessStepContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
@@ -47,7 +48,7 @@ public class ParDoMultiEvaluatorFactory implements TransformEvaluatorFactory {
   @Override
   public <T> TransformEvaluator<T> forApplication(
       AppliedPTransform<?, ?, ?> application,
-      Bundle<?> inputBundle,
+      CommittedBundle<?> inputBundle,
       InProcessEvaluationContext evaluationContext) {
     @SuppressWarnings({"unchecked", "rawtypes"})
     final ParDoInProcessEvaluator<T> multiEvaluator =
@@ -67,11 +68,11 @@ public InProcessTransformResult finishBundle() {
 
   private static <InT, OuT> ParDoInProcessEvaluator<InT> createMultiEvaluator(
       AppliedPTransform<PCollection<InT>, PCollectionTuple, BoundMulti<InT, OuT>> application,
-      Bundle<InT> inputBundle,
+      CommittedBundle<InT> inputBundle,
       InProcessEvaluationContext evaluationContext) {
     PCollectionTuple output = application.getOutput();
     Map<TupleTag<?>, PCollection<?>> outputs = output.getAll();
-    Map<TupleTag<?>, Bundle<?>> outputBundles = new HashMap<>();
+    Map<TupleTag<?>, UncommittedBundle<?>> outputBundles = new HashMap<>();
     for (Map.Entry<TupleTag<?>, PCollection<?>> outputEntry : outputs.entrySet()) {
       outputBundles.put(
           outputEntry.getKey(),
@@ -79,7 +80,7 @@ private static <InT, OuT> ParDoInProcessEvaluator<InT> createMultiEvaluator(
     }
     InProcessExecutionContext executionContext = evaluationContext.getExecutionContext(application);
     String stepName = evaluationContext.getStepName(application);
-    InMemoryStepContext stepContext =
+    InProcessStepContext stepContext =
         executionContext.getOrCreateStepContext(stepName, stepName, null);
 
     CounterSet counters = evaluationContext.createCounterSet();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactory.java
index 33144e7b67418..c6e3f8accc943 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactory.java
@@ -16,10 +16,11 @@
 package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
 
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext.InMemoryStepContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext.InProcessStepContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
@@ -44,7 +45,7 @@ public class ParDoSingleEvaluatorFactory implements TransformEvaluatorFactory {
   @Override
   public <T> TransformEvaluator<T> forApplication(
       final AppliedPTransform<?, ?, ?> application,
-      Bundle<?> inputBundle,
+      CommittedBundle<?> inputBundle,
       InProcessEvaluationContext evaluationContext) {
     @SuppressWarnings({"unchecked", "rawtypes"})
     final ParDoInProcessEvaluator<T> evaluator =
@@ -64,28 +65,29 @@ public InProcessTransformResult finishBundle() {
     return singleEvaluator;
   }
 
-  private static <InT, OuT> ParDoInProcessEvaluator<InT> createSingleEvaluator(
-      @SuppressWarnings("rawtypes")
-      AppliedPTransform<PCollection<InT>, PCollection<OuT>, Bound<InT, OuT>> application,
-      Bundle<InT> inputBundle,
-      InProcessEvaluationContext evaluationContext) {
-    TupleTag<OuT> mainOutputTag = new TupleTag<>("out");
-    Bundle<OuT> outputBundle = evaluationContext.createBundle(inputBundle, application.getOutput());
+  private static <InputT, OutputT> ParDoInProcessEvaluator<InputT> createSingleEvaluator(
+      @SuppressWarnings("rawtypes") AppliedPTransform<PCollection<InputT>, PCollection<OutputT>,
+          Bound<InputT, OutputT>> application,
+      CommittedBundle<InputT> inputBundle, InProcessEvaluationContext evaluationContext) {
+    TupleTag<OutputT> mainOutputTag = new TupleTag<>("out");
+    UncommittedBundle<OutputT> outputBundle =
+        evaluationContext.createBundle(inputBundle, application.getOutput());
 
     InProcessExecutionContext executionContext = evaluationContext.getExecutionContext(application);
     String stepName = evaluationContext.getStepName(application);
-    InMemoryStepContext stepContext =
+    InProcessStepContext stepContext =
         executionContext.getOrCreateStepContext(stepName, stepName, null);
 
     CounterSet counters = evaluationContext.createCounterSet();
 
-    DoFnRunner<InT, OuT> runner =
+    DoFnRunner<InputT, OutputT> runner =
         DoFnRunners.createDefault(
             evaluationContext.getPipelineOptions(),
             application.getTransform().getFn(),
             evaluationContext.createSideInputReader(application.getTransform().getSideInputs()),
             BundleOutputManager.create(
-                Collections.<TupleTag<?>, Bundle<?>>singletonMap(mainOutputTag, outputBundle)),
+                Collections.<TupleTag<?>, UncommittedBundle<?>>singletonMap(
+                    mainOutputTag, outputBundle)),
             mainOutputTag,
             Collections.<TupleTag<?>>emptyList(),
             stepContext,
@@ -93,7 +95,7 @@ private static <InT, OuT> ParDoInProcessEvaluator<InT> createSingleEvaluator(
             application.getInput().getWindowingStrategy());
 
     runner.startBundle();
-    return new ParDoInProcessEvaluator<InT>(
-        runner, application, counters, Collections.<Bundle<?>>singleton(outputBundle));
+    return new ParDoInProcessEvaluator<InputT>(
+        runner, application, counters, Collections.<UncommittedBundle<?>>singleton(outputBundle));
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactory.java
index 9a5e3aa694337..e40941eddeaa4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactory.java
@@ -20,8 +20,9 @@
 import com.google.cloud.dataflow.sdk.io.UnboundedSource.CheckpointMark;
 import com.google.cloud.dataflow.sdk.io.UnboundedSource.UnboundedReader;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
@@ -53,7 +54,7 @@ public class UnboundedReadEvaluatorFactory implements TransformEvaluatorFactory
   @Override
   public <InputT> TransformEvaluator<InputT> forApplication(
       AppliedPTransform<?, ?, ?> application,
-      @Nullable Bundle<?> inputBundle,
+      @Nullable CommittedBundle<?> inputBundle,
       InProcessEvaluationContext evaluationContext) {
     return getTransformEvaluator((AppliedPTransform) application, evaluationContext);
   }
@@ -91,7 +92,7 @@ public void processElement(WindowedValue<Object> element) {}
 
     @Override
     public InProcessTransformResult finishBundle() throws IOException {
-      Bundle<OutputT> output = evaluationContext.createRootBundle(transform.getOutput());
+      UncommittedBundle<OutputT> output = evaluationContext.createRootBundle(transform.getOutput());
       UnboundedReader<OutputT> reader =
           createReader(
               transform.getTransform().getSource(), evaluationContext.getPipelineOptions());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactory.java
index c9f26db9291bc..d6518710992da 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactory.java
@@ -41,7 +41,7 @@ public class ViewEvaluatorFactory implements TransformEvaluatorFactory {
   @Override
   public <T> TransformEvaluator<T> forApplication(
       AppliedPTransform<?, ?, ?> application,
-      InProcessPipelineRunner.Bundle<?> inputBundle,
+      InProcessPipelineRunner.CommittedBundle<?> inputBundle,
       InProcessEvaluationContext evaluationContext) {
     return createEvaluator(
         (AppliedPTransform) application, evaluationContext);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/ImmutableInProcessTransformResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/ImmutableInProcessTransformResult.java
index 0373ef5e11aa8..c55887f784af0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/ImmutableInProcessTransformResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/ImmutableInProcessTransformResult.java
@@ -15,7 +15,7 @@
  */
 package com.google.cloud.dataflow.sdk.runners.inprocess.util;
 
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
@@ -31,12 +31,15 @@
  */
 public class ImmutableInProcessTransformResult implements InProcessTransformResult {
   private final AppliedPTransform<?, ?, ?> transform;
-  private final Iterable<? extends Bundle<?>> bundles;
+  private final Iterable<? extends UncommittedBundle<?>> bundles;
   private final CounterSet counters;
   private final Instant watermarkHold;
 
-  private ImmutableInProcessTransformResult(AppliedPTransform<?, ?, ?> transform,
-      Iterable<? extends Bundle<?>> outputBundles, CounterSet counters, Instant watermarkHold) {
+  private ImmutableInProcessTransformResult(
+      AppliedPTransform<?, ?, ?> transform,
+      Iterable<? extends UncommittedBundle<?>> outputBundles,
+      CounterSet counters,
+      Instant watermarkHold) {
     this.transform = transform;
     this.bundles = outputBundles;
     this.counters = counters;
@@ -44,7 +47,7 @@ private ImmutableInProcessTransformResult(AppliedPTransform<?, ?, ?> transform,
   }
 
   @Override
-  public Iterable<? extends Bundle<?>> getBundles() {
+  public Iterable<? extends UncommittedBundle<?>> getOutputBundles() {
     return bundles;
   }
 
@@ -77,7 +80,7 @@ public static Builder withoutHold(AppliedPTransform<?, ?, ?> transform) {
    */
   public static class Builder {
     private final AppliedPTransform<?, ?, ?> transform;
-    private final ImmutableList.Builder<Bundle<?>> bundlesBuilder;
+    private final ImmutableList.Builder<UncommittedBundle<?>> bundlesBuilder;
     private CounterSet counters;
     private final Instant watermarkHold;
 
@@ -98,13 +101,14 @@ public Builder withCounters(CounterSet counters) {
       return this;
     }
 
-    public Builder addOutput(Bundle<?> outputBundle, Bundle<?>... outputBundles) {
+    public Builder addOutput(
+        UncommittedBundle<?> outputBundle, UncommittedBundle<?>... outputBundles) {
       bundlesBuilder.add(outputBundle);
       bundlesBuilder.add(outputBundles);
       return this;
     }
 
-    public Builder addOutput(Collection<Bundle<?>> outputBundles) {
+    public Builder addOutput(Collection<UncommittedBundle<?>> outputBundles) {
       bundlesBuilder.addAll(outputBundles);
       return this;
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
index 8673956d69b15..417b33b3c57eb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
@@ -16,7 +16,7 @@
 package com.google.cloud.dataflow.sdk.runners.inprocess.util;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
@@ -57,7 +57,8 @@
  * {@link PCollection PCollections}.
  *
  * <p>Whenever a non-root {@link AppliedPTransform} finishes processing one or more in-flight
- * elements (referred to as the input {@link Bundle bundle}), the following occurs atomically:
+ * elements (referred to as the input {@link CommittedBundle bundle}), the following occurs
+ * atomically:
  * <ul>
  *  <li>All of the in-flight elements are removed from the collection of pending elements for the
  *      {@link AppliedPTransform}.</li>
@@ -421,12 +422,12 @@ public synchronized TransformWatermarks getWatermarks(AppliedPTransform<?, ?, ?>
    *                  elements, the watermark should be the minimum of all buffered elements.
    * @return both watermarks of the source transform
    */
-  public TransformWatermarks updateOutputWatermark(
-      AppliedPTransform<?, ?, ?> transform, Iterable<Bundle<?>> outputs, Instant watermark) {
+  public TransformWatermarks updateOutputWatermark(AppliedPTransform<?, ?, ?> transform,
+      Iterable<CommittedBundle<?>> outputs, Instant watermark) {
     TransformWatermarks watermarks = getWatermarks(transform);
     watermarks.outputWatermark().setHold(watermark);
 
-    for (Bundle<?> output : outputs) {
+    for (CommittedBundle<?> output : outputs) {
       PCollection<?> pCollection = output.getPCollection();
       for (AppliedPTransform<?, ?, ?> consumer : consumers.get(pCollection)) {
         addPending(consumer, output.getElements());
@@ -456,8 +457,8 @@ public TransformWatermarks updateOutputWatermark(
    *                     is no hold
    * @return the updated watermark of the transform
    */
-  public TransformWatermarks updateWatermarks(Bundle<?> completed,
-      AppliedPTransform<?, ?, ?> transform, Collection<Bundle<?>> outputs,
+  public TransformWatermarks updateWatermarks(CommittedBundle<?> completed,
+      AppliedPTransform<?, ?, ?> transform, Collection<CommittedBundle<?>> outputs,
       @Nullable Instant earliestHold) {
     updatePending(completed, transform, outputs);
     TransformWatermarks transformWms = transformToWatermarks.get(transform);
@@ -486,13 +487,13 @@ private void refreshWatermarks(AppliedPTransform<?, ?, ?> transform) {
    * collection of pending elements, and adds all elements produced by the {@link PTransform} to the
    * pending queue of each consumer.
    */
-  private void updatePending(
-      Bundle<?> input, AppliedPTransform<?, ?, ?> transform, Collection<Bundle<?>> outputs) {
+  private void updatePending(CommittedBundle<?> input, AppliedPTransform<?, ?, ?> transform,
+      Collection<CommittedBundle<?>> outputs) {
     AppliedPTransformInputWatermark inputWatermark =
         transformToWatermarks.get(transform).inputWatermark();
     inputWatermark.removePending(input.getElements());
 
-    for (Bundle<?> bundle : outputs) {
+    for (CommittedBundle<?> bundle : outputs) {
       for (AppliedPTransform<?, ?, ?> consumer : consumers.get(bundle.getPCollection())) {
         addPending(consumer, bundle.getElements());
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundle.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundle.java
index 1a01a3db12ea9..18f863dbbfc75 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundle.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundle.java
@@ -15,25 +15,29 @@
  */
 package com.google.cloud.dataflow.sdk.runners.inprocess.util;
 
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import static com.google.common.base.Preconditions.checkState;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.MoreObjects;
 import com.google.common.base.MoreObjects.ToStringHelper;
+import com.google.common.collect.ImmutableList;
 
-import java.util.ArrayList;
-import java.util.Collection;
+import org.joda.time.Instant;
 
 import javax.annotation.Nullable;
 
 /**
- * A {@link Bundle} that buffers elements in memory.
+ * A {@link UncommittedBundle} that buffers elements in memory.
  */
-public final class InProcessBundle<T> implements Bundle<T> {
+public final class InProcessBundle<T> implements UncommittedBundle<T> {
   private final PCollection<T> pcollection;
   private final boolean keyed;
   private final Object key;
-  private Collection<WindowedValue<T>> elements;
+  private boolean committed = false;
+  private ImmutableList.Builder<WindowedValue<T>> elements;
 
   /**
    * Create a new {@link InProcessBundle} for the specified {@link PCollection} without a key.
@@ -46,7 +50,8 @@ public static <T> InProcessBundle<T> unkeyed(PCollection<T> pcollection) {
    * Create a new {@link InProcessBundle} for the specified {@link PCollection} with the specified
    * key.
    *
-   * See {@link #getKey()} and {@link #isKeyed()} for more information.
+   * See {@link CommittedBundle#getKey()} and {@link CommittedBundle#isKeyed()} for more
+   * information.
    */
   public static <T> InProcessBundle<T> keyed(PCollection<T> pcollection, Object key) {
     return new InProcessBundle<T>(pcollection, true, key);
@@ -56,43 +61,57 @@ private InProcessBundle(PCollection<T> pcollection, boolean keyed, Object key) {
     this.pcollection = pcollection;
     this.keyed = keyed;
     this.key = key;
-    this.elements = new ArrayList<>();
+    this.elements = ImmutableList.builder();
   }
 
   @Override
   public InProcessBundle<T> add(WindowedValue<T> element) {
+    checkState(!committed, "Can't add element %s to committed bundle %s", element, this);
     elements.add(element);
     return this;
   }
 
   @Override
-  @Nullable
-  public Object getKey() {
-    return key;
-  }
+  public CommittedBundle<T> commit(final Instant synchronizedCompletionTime) {
+    checkState(!committed, "Can't commit already committed bundle %s", this);
+    committed = true;
+    final Iterable<WindowedValue<T>> committedElements = elements.build();
+    return new CommittedBundle<T>() {
+      @Override
+      @Nullable
+      public Object getKey() {
+        return key;
+      }
 
-  @Override
-  public boolean isKeyed() {
-    return keyed;
-  }
+      @Override
+      public boolean isKeyed() {
+        return keyed;
+      }
 
-  @Override
-  public Iterable<WindowedValue<T>> getElements() {
-    return elements;
-  }
+      @Override
+      public Iterable<WindowedValue<T>> getElements() {
+        return committedElements;
+      }
 
-  @Override
-  public PCollection<T> getPCollection() {
-    return pcollection;
-  }
+      @Override
+      public PCollection<T> getPCollection() {
+        return pcollection;
+      }
 
-  @Override
-  public String toString() {
-    ToStringHelper toStringHelper =
-        MoreObjects.toStringHelper(this).add("pcollection", pcollection);
-    if (keyed) {
-      toStringHelper = toStringHelper.add("key", keyed);
-    }
-    return toStringHelper.add("elements", elements).toString();
+      @Override
+      public Instant getSynchronizedProcessingOutputWatermark() {
+        return synchronizedCompletionTime;
+      }
+
+      @Override
+      public String toString() {
+        ToStringHelper toStringHelper =
+            MoreObjects.toStringHelper(this).add("pcollection", pcollection);
+        if (keyed) {
+          toStringHelper = toStringHelper.add("key", key);
+        }
+        return toStringHelper.add("elements", elements).toString();
+      }
+    };
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundleOutputManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundleOutputManager.java
new file mode 100644
index 0000000000000..00462dc379a34
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundleOutputManager.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
+import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import java.util.Map;
+
+/**
+ * An {@link OutputManager} that outputs to {@link CommittedBundle Bundles} used by the
+ * {@link InProcessPipelineRunner}.
+ */
+public class InProcessBundleOutputManager implements OutputManager {
+  private final Map<TupleTag<?>, UncommittedBundle<?>> bundles;
+
+  public static InProcessBundleOutputManager create(
+      Map<TupleTag<?>, UncommittedBundle<?>> outputBundles) {
+    return new InProcessBundleOutputManager(outputBundles);
+  }
+
+  public InProcessBundleOutputManager(Map<TupleTag<?>, UncommittedBundle<?>> bundles) {
+    this.bundles = bundles;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
+    @SuppressWarnings("rawtypes")
+    UncommittedBundle bundle = bundles.get(tag);
+    bundle.add(output);
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeDomain.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeDomain.java
index 75e086350a9e6..4ff36f722a796 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeDomain.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeDomain.java
@@ -21,7 +21,7 @@
  */
 public enum TimeDomain {
   /**
-   * The {@code EVENT_TIME} domain corresponds to the timestamps on the elemnts. Time advances
+   * The {@code EVENT_TIME} domain corresponds to the timestamps on the elements. Time advances
    * on the system watermark advances.
    */
   EVENT_TIME,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactoryTest.java
index e20287618119c..81095b0a0dc3a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactoryTest.java
@@ -25,8 +25,8 @@
 import com.google.cloud.dataflow.sdk.io.BoundedSource;
 import com.google.cloud.dataflow.sdk.io.CountingSource;
 import com.google.cloud.dataflow.sdk.io.Read;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
@@ -53,7 +53,7 @@ public void boundedSourceInMemoryTransformEvaluatorProducesElements() throws Exc
 
     TransformEvaluatorFactory factory = new BoundedReadEvaluatorFactory();
     InProcessEvaluationContext context = mock(InProcessEvaluationContext.class);
-    Bundle<Long> output = InProcessBundle.unkeyed(longs);
+    UncommittedBundle<Long> output = InProcessBundle.unkeyed(longs);
     when(context.createRootBundle(longs)).thenReturn(output);
 
     TransformEvaluator<?> evaluator =
@@ -61,7 +61,7 @@ public void boundedSourceInMemoryTransformEvaluatorProducesElements() throws Exc
     InProcessTransformResult result = evaluator.finishBundle();
     assertThat(result.getWatermarkHold(), equalTo(BoundedWindow.TIMESTAMP_MAX_VALUE));
     assertThat(
-        output.getElements(),
+        output.commit(BoundedWindow.TIMESTAMP_MAX_VALUE).getElements(),
         containsInAnyOrder(
             gw(1L), gw(2L), gw(4L), gw(8L), gw(9L), gw(7L), gw(6L), gw(5L), gw(3L), gw(0L)));
   }
@@ -74,27 +74,31 @@ public void boundedSourceInMemoryTransformEvaluatorMultipleCalls() throws Except
 
     TransformEvaluatorFactory factory = new BoundedReadEvaluatorFactory();
     InProcessEvaluationContext context = mock(InProcessEvaluationContext.class);
-    Bundle<Long> output = InProcessBundle.unkeyed(longs);
+    UncommittedBundle<Long> output =
+        InProcessBundle.unkeyed(longs);
     when(context.createRootBundle(longs)).thenReturn(output);
 
     TransformEvaluator<?> evaluator =
         factory.forApplication(longs.getProducingTransformInternal(), null, context);
     InProcessTransformResult result = evaluator.finishBundle();
     assertThat(result.getWatermarkHold(), equalTo(BoundedWindow.TIMESTAMP_MAX_VALUE));
+    Iterable<? extends WindowedValue<Long>> outputElements =
+        output.commit(BoundedWindow.TIMESTAMP_MAX_VALUE).getElements();
     assertThat(
-        output.getElements(),
+        outputElements,
         containsInAnyOrder(
             gw(1L), gw(2L), gw(4L), gw(8L), gw(9L), gw(7L), gw(6L), gw(5L), gw(3L), gw(0L)));
 
-    Bundle<Long> secondOutput = InProcessBundle.unkeyed(longs);
+    UncommittedBundle<Long> secondOutput = InProcessBundle.unkeyed(longs);
     when(context.createRootBundle(longs)).thenReturn(secondOutput);
     TransformEvaluator<?> secondEvaluator =
         factory.forApplication(longs.getProducingTransformInternal(), null, context);
     InProcessTransformResult secondResult = secondEvaluator.finishBundle();
     assertThat(secondResult.getWatermarkHold(), equalTo(BoundedWindow.TIMESTAMP_MAX_VALUE));
-    assertThat(secondOutput.getElements(), emptyIterable());
     assertThat(
-        output.getElements(),
+        secondOutput.commit(BoundedWindow.TIMESTAMP_MAX_VALUE).getElements(), emptyIterable());
+    assertThat(
+        outputElements,
         containsInAnyOrder(
             gw(1L), gw(2L), gw(4L), gw(8L), gw(9L), gw(7L), gw(6L), gw(5L), gw(3L), gw(0L)));
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactoryTest.java
index ad4f927f7559d..8fa411464eab4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactoryTest.java
@@ -20,8 +20,9 @@
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
 import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
@@ -54,22 +55,25 @@ public void testFlattenInMemoryEvaluator() throws Exception {
 
     PCollection<Integer> flattened = list.apply(Flatten.<Integer>pCollections());
 
-    Bundle<Integer> leftBundle = InProcessBundle.unkeyed(left);
-    Bundle<Integer> rightBundle = InProcessBundle.unkeyed(right);
+    CommittedBundle<Integer> leftBundle = InProcessBundle.unkeyed(left).commit(Instant.now());
+    CommittedBundle<Integer> rightBundle = InProcessBundle.unkeyed(right).commit(Instant.now());
 
     InProcessEvaluationContext context = mock(InProcessEvaluationContext.class);
 
-    Bundle<Integer> flattenedLeftBundle = InProcessBundle.unkeyed(flattened);
-    Bundle<Integer> flattenedRightBundle = InProcessBundle.unkeyed(flattened);
+    UncommittedBundle<Integer> flattenedLeftBundle = InProcessBundle.unkeyed(flattened);
+    UncommittedBundle<Integer> flattenedRightBundle = InProcessBundle.unkeyed(flattened);
 
     when(context.createBundle(leftBundle, flattened)).thenReturn(flattenedLeftBundle);
     when(context.createBundle(rightBundle, flattened)).thenReturn(flattenedRightBundle);
 
     FlattenEvaluatorFactory factory = new FlattenEvaluatorFactory();
-    TransformEvaluator<Integer> leftSideEvaluator = factory.forApplication(
-        flattened.getProducingTransformInternal(), leftBundle, context);
+    TransformEvaluator<Integer> leftSideEvaluator =
+        factory.forApplication(flattened.getProducingTransformInternal(), leftBundle, context);
     TransformEvaluator<Integer> rightSideEvaluator =
-        factory.forApplication(flattened.getProducingTransformInternal(), rightBundle, context);
+        factory.forApplication(
+            flattened.getProducingTransformInternal(),
+            rightBundle,
+            context);
 
     leftSideEvaluator.processElement(WindowedValue.valueInGlobalWindow(1));
     rightSideEvaluator.processElement(WindowedValue.valueInGlobalWindow(-1));
@@ -84,23 +88,27 @@ public void testFlattenInMemoryEvaluator() throws Exception {
     InProcessTransformResult rightSideResult = rightSideEvaluator.finishBundle();
     InProcessTransformResult leftSideResult = leftSideEvaluator.finishBundle();
 
-    assertThat(rightSideResult.getBundles(), Matchers.<Bundle<?>>contains(flattenedRightBundle));
+    assertThat(
+        rightSideResult.getOutputBundles(),
+        Matchers.<UncommittedBundle<?>>contains(flattenedRightBundle));
     assertThat(
         rightSideResult.getTransform(),
         Matchers.<AppliedPTransform<?, ?, ?>>equalTo(flattened.getProducingTransformInternal()));
-    assertThat(leftSideResult.getBundles(), Matchers.<Bundle<?>>contains(flattenedLeftBundle));
+    assertThat(
+        leftSideResult.getOutputBundles(),
+        Matchers.<UncommittedBundle<?>>contains(flattenedLeftBundle));
     assertThat(
         leftSideResult.getTransform(),
         Matchers.<AppliedPTransform<?, ?, ?>>equalTo(flattened.getProducingTransformInternal()));
 
     assertThat(
-        flattenedLeftBundle.getElements(),
+        flattenedLeftBundle.commit(Instant.now()).getElements(),
         containsInAnyOrder(
             WindowedValue.timestampedValueInGlobalWindow(2, new Instant(1024)),
             WindowedValue.valueInEmptyWindows(4, PaneInfo.NO_FIRING),
             WindowedValue.valueInGlobalWindow(1)));
     assertThat(
-        flattenedRightBundle.getElements(),
+        flattenedRightBundle.commit(Instant.now()).getElements(),
         containsInAnyOrder(
             WindowedValue.valueInEmptyWindows(2, PaneInfo.ON_TIME_AND_ONLY_FIRING),
             WindowedValue.timestampedValueInGlobalWindow(-4, new Instant(-4096)),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactoryTest.java
index 456a52fa70a04..1d7031dee7d5a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactoryTest.java
@@ -16,20 +16,21 @@
 package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
 
 import static org.hamcrest.Matchers.contains;
-import static org.hamcrest.Matchers.emptyIterable;
 import static org.junit.Assert.assertThat;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
 import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -39,6 +40,7 @@
 
 import org.hamcrest.BaseMatcher;
 import org.hamcrest.Description;
+import org.joda.time.Instant;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -57,17 +59,21 @@ public void testInMemoryEvaluator() throws Exception {
     KV<String, Integer> firstBar = KV.of("bar", 22);
     KV<String, Integer> secondBar = KV.of("bar", 12);
     KV<String, Integer> firstBaz = KV.of("baz", Integer.MAX_VALUE);
-    PCollection<KV<String, Integer>> kvs = p.apply(Create.of(firstFoo, firstBar,
-        secondFoo, firstBaz, secondBar, thirdFoo));
+    PCollection<KV<String, Integer>> kvs =
+        p.apply(Create.of(firstFoo, firstBar, secondFoo, firstBaz, secondBar, thirdFoo));
     PCollection<KV<String, Iterable<Integer>>> groupedKvs =
         kvs.apply(new GroupByKeyOnly<String, Integer>());
 
-    Bundle<KV<String, Integer>> inputBundle = InProcessBundle.unkeyed(kvs);
+    CommittedBundle<KV<String, Integer>> inputBundle =
+        InProcessBundle.unkeyed(kvs).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
     InProcessEvaluationContext evaluationContext = mock(InProcessEvaluationContext.class);
 
-    Bundle<KV<String, Iterable<Integer>>> fooBundle = InProcessBundle.keyed(groupedKvs, "foo");
-    Bundle<KV<String, Iterable<Integer>>> barBundle = InProcessBundle.keyed(groupedKvs, "bar");
-    Bundle<KV<String, Iterable<Integer>>> bazBundle = InProcessBundle.keyed(groupedKvs, "baz");
+    UncommittedBundle<KV<String, Iterable<Integer>>> fooBundle =
+        InProcessBundle.keyed(groupedKvs, "foo");
+    UncommittedBundle<KV<String, Iterable<Integer>>> barBundle =
+        InProcessBundle.keyed(groupedKvs, "bar");
+    UncommittedBundle<KV<String, Iterable<Integer>>> bazBundle =
+        InProcessBundle.keyed(groupedKvs, "baz");
 
     when(evaluationContext.createKeyedBundle(inputBundle, "foo", groupedKvs)).thenReturn(fooBundle);
     when(evaluationContext.createKeyedBundle(inputBundle, "bar", groupedKvs)).thenReturn(barBundle);
@@ -87,25 +93,21 @@ public void testInMemoryEvaluator() throws Exception {
     evaluator.processElement(WindowedValue.valueInGlobalWindow(secondBar));
     evaluator.processElement(WindowedValue.valueInGlobalWindow(firstBaz));
 
-    assertThat(fooBundle.getElements(), emptyIterable());
-    assertThat(barBundle.getElements(), emptyIterable());
-    assertThat(bazBundle.getElements(), emptyIterable());
-
     evaluator.finishBundle();
+
     assertThat(
-        fooBundle.getElements(),
+        fooBundle.commit(Instant.now()).getElements(),
         contains(new KIterVMatcher<String, Integer>(
             KV.<String, Iterable<Integer>>of("foo", ImmutableSet.of(-1, 1, 3)), keyCoder)));
     assertThat(
-        barBundle.getElements(),
+        barBundle.commit(Instant.now()).getElements(),
         contains(new KIterVMatcher<String, Integer>(
             KV.<String, Iterable<Integer>>of("bar", ImmutableSet.of(12, 22)), keyCoder)));
     assertThat(
-        bazBundle.getElements(),
-        contains(
-            new KIterVMatcher<String, Integer>(
-                KV.<String, Iterable<Integer>>of("baz", ImmutableSet.of(Integer.MAX_VALUE)),
-                keyCoder)));
+        bazBundle.commit(Instant.now()).getElements(),
+        contains(new KIterVMatcher<String, Integer>(
+            KV.<String, Iterable<Integer>>of("baz", ImmutableSet.of(Integer.MAX_VALUE)),
+            keyCoder)));
   }
 
   private static class KIterVMatcher<K, V> extends BaseMatcher<WindowedValue<KV<K, Iterable<V>>>> {
@@ -134,9 +136,8 @@ public boolean matches(Object item) {
       }
       try {
         return myValues.equals(thatValues)
-            && keyCoder
-                .structuralValue(myKv.getKey())
-                .equals(keyCoder.structuralValue(that.getValue().getKey()));
+            && keyCoder.structuralValue(myKv.getKey())
+                   .equals(keyCoder.structuralValue(that.getValue().getKey()));
       } catch (Exception e) {
         return false;
       }
@@ -144,8 +145,7 @@ public boolean matches(Object item) {
 
     @Override
     public void describeTo(Description description) {
-      description
-          .appendText("KV<K, Iterable<V>> containing key ")
+      description.appendText("KV<K, Iterable<V>> containing key ")
           .appendValue(myKv.getKey())
           .appendText(" and values ")
           .appendValueList("[", ", ", "]", myKv.getValue());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactoryTest.java
index b5b3823a04d38..939121be41137 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactoryTest.java
@@ -15,14 +15,15 @@
  */
 package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
 
-import static org.hamcrest.core.IsEqual.equalTo;
+import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertThat;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
 import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
@@ -74,16 +75,19 @@ public void processElement(ProcessContext c) {
         }).withOutputTags(mainOutputTag, TupleTagList.of(elementTag).and(lengthTag));
     PCollectionTuple outputTuple = input.apply(pardo);
 
-    Bundle<String> inputBundle = InProcessBundle.unkeyed(input);
+    CommittedBundle<String> inputBundle = InProcessBundle.unkeyed(input).commit(Instant.now());
 
     PCollection<KV<String, Integer>> mainOutput = outputTuple.get(mainOutputTag);
     PCollection<String> elementOutput = outputTuple.get(elementTag);
     PCollection<Integer> lengthOutput = outputTuple.get(lengthTag);
 
     InProcessEvaluationContext evaluationContext = mock(InProcessEvaluationContext.class);
-    Bundle<KV<String, Integer>> mainOutputBundle = InProcessBundle.unkeyed(mainOutput);
-    Bundle<String> elementOutputBundle = InProcessBundle.unkeyed(elementOutput);
-    Bundle<Integer> lengthOutputBundle = InProcessBundle.unkeyed(lengthOutput);
+    UncommittedBundle<KV<String, Integer>> mainOutputBundle =
+        InProcessBundle.unkeyed(mainOutput);
+    UncommittedBundle<String> elementOutputBundle =
+        InProcessBundle.unkeyed(elementOutput);
+    UncommittedBundle<Integer> lengthOutputBundle =
+        InProcessBundle.unkeyed(lengthOutput);
 
     when(evaluationContext.createBundle(inputBundle, mainOutput)).thenReturn(mainOutputBundle);
     when(evaluationContext.createBundle(inputBundle, elementOutput))
@@ -108,27 +112,27 @@ public void processElement(ProcessContext c) {
 
     InProcessTransformResult result = evaluator.finishBundle();
     assertThat(
-        result.getBundles(),
-        Matchers.<Bundle<?>>containsInAnyOrder(
+        result.getOutputBundles(),
+        Matchers.<UncommittedBundle<?>>containsInAnyOrder(
             lengthOutputBundle, mainOutputBundle, elementOutputBundle));
     assertThat(result.getWatermarkHold(), equalTo(BoundedWindow.TIMESTAMP_MAX_VALUE));
     assertThat(result.getCounters(), equalTo(counters));
 
     assertThat(
-        mainOutputBundle.getElements(),
+        mainOutputBundle.commit(Instant.now()).getElements(),
         Matchers.<WindowedValue<KV<String, Integer>>>containsInAnyOrder(
             WindowedValue.valueInGlobalWindow(KV.of("foo", 3)),
             WindowedValue.timestampedValueInGlobalWindow(KV.of("bara", 4), new Instant(1000)),
             WindowedValue.valueInGlobalWindow(
                 KV.of("bazam", 5), PaneInfo.ON_TIME_AND_ONLY_FIRING)));
     assertThat(
-        elementOutputBundle.getElements(),
+        elementOutputBundle.commit(Instant.now()).getElements(),
         Matchers.<WindowedValue<String>>containsInAnyOrder(
             WindowedValue.valueInGlobalWindow("foo"),
             WindowedValue.timestampedValueInGlobalWindow("bara", new Instant(1000)),
             WindowedValue.valueInGlobalWindow("bazam", PaneInfo.ON_TIME_AND_ONLY_FIRING)));
     assertThat(
-        lengthOutputBundle.getElements(),
+        lengthOutputBundle.commit(Instant.now()).getElements(),
         Matchers.<WindowedValue<Integer>>containsInAnyOrder(
             WindowedValue.valueInGlobalWindow(3),
             WindowedValue.timestampedValueInGlobalWindow(4, new Instant(1000)),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactoryTest.java
index 07d08d1fadb9f..b0e30034eed8f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactoryTest.java
@@ -15,14 +15,15 @@
  */
 package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
 
-import static org.hamcrest.core.IsEqual.equalTo;
+import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertThat;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
 import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
@@ -58,10 +59,11 @@ public void testParDoInMemoryTransformEvaluator() throws Exception {
         c.output(c.element().length());
       }
     }));
-    Bundle<String> inputBundle = InProcessBundle.unkeyed(input);
+    CommittedBundle<String> inputBundle = InProcessBundle.unkeyed(input).commit(Instant.now());
 
     InProcessEvaluationContext evaluationContext = mock(InProcessEvaluationContext.class);
-    Bundle<Integer> outputBundle = InProcessBundle.unkeyed(collection);
+    UncommittedBundle<Integer> outputBundle =
+        InProcessBundle.unkeyed(collection);
     when(evaluationContext.createBundle(inputBundle, collection)).thenReturn(outputBundle);
     InProcessExecutionContext executionContext = new InProcessExecutionContext();
     when(evaluationContext.getExecutionContext(collection.getProducingTransformInternal()))
@@ -80,12 +82,12 @@ public void testParDoInMemoryTransformEvaluator() throws Exception {
         WindowedValue.valueInGlobalWindow("bazam", PaneInfo.ON_TIME_AND_ONLY_FIRING));
 
     InProcessTransformResult result = evaluator.finishBundle();
-    assertThat(result.getBundles(), Matchers.<Bundle<?>>contains(outputBundle));
+    assertThat(result.getOutputBundles(), Matchers.<UncommittedBundle<?>>contains(outputBundle));
     assertThat(result.getWatermarkHold(), equalTo(BoundedWindow.TIMESTAMP_MAX_VALUE));
     assertThat(result.getCounters(), equalTo(counters));
 
     assertThat(
-        outputBundle.getElements(),
+        outputBundle.commit(Instant.now()).getElements(),
         Matchers.<WindowedValue<Integer>>containsInAnyOrder(
             WindowedValue.valueInGlobalWindow(3),
             WindowedValue.timestampedValueInGlobalWindow(4, new Instant(1000)),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactoryTest.java
index bc3bd76daf536..c8500f30cba8d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactoryTest.java
@@ -23,8 +23,8 @@
 import com.google.cloud.dataflow.sdk.io.CountingSource;
 import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
@@ -56,7 +56,7 @@ public void unboundedSourceInMemoryTransformEvaluatorProducesElements() throws E
 
     TransformEvaluatorFactory factory = new UnboundedReadEvaluatorFactory();
     InProcessEvaluationContext context = mock(InProcessEvaluationContext.class);
-    Bundle<Long> output = InProcessBundle.unkeyed(longs);
+    UncommittedBundle<Long> output = InProcessBundle.unkeyed(longs);
     when(context.createRootBundle(longs)).thenReturn(output);
 
     TransformEvaluator<?> evaluator =
@@ -65,7 +65,7 @@ public void unboundedSourceInMemoryTransformEvaluatorProducesElements() throws E
     assertThat(
         result.getWatermarkHold(), Matchers.<ReadableInstant>lessThan(DateTime.now().toInstant()));
     assertThat(
-        output.getElements(),
+        output.commit(Instant.now()).getElements(),
         containsInAnyOrder(tgw(1L), tgw(2L), tgw(4L), tgw(8L), tgw(9L), tgw(7L), tgw(6L), tgw(5L),
             tgw(3L), tgw(0L)));
   }
@@ -79,7 +79,7 @@ public void unboundedSourceInMemoryTransformEvaluatorMultipleCalls() throws Exce
 
     TransformEvaluatorFactory factory = new UnboundedReadEvaluatorFactory();
     InProcessEvaluationContext context = mock(InProcessEvaluationContext.class);
-    Bundle<Long> output = InProcessBundle.unkeyed(longs);
+    UncommittedBundle<Long> output = InProcessBundle.unkeyed(longs);
     when(context.createRootBundle(longs)).thenReturn(output);
 
     TransformEvaluator<?> evaluator =
@@ -88,11 +88,11 @@ public void unboundedSourceInMemoryTransformEvaluatorMultipleCalls() throws Exce
     assertThat(
         result.getWatermarkHold(), Matchers.<ReadableInstant>lessThan(DateTime.now().toInstant()));
     assertThat(
-        output.getElements(),
+        output.commit(Instant.now()).getElements(),
         containsInAnyOrder(tgw(1L), tgw(2L), tgw(4L), tgw(8L), tgw(9L), tgw(7L), tgw(6L), tgw(5L),
             tgw(3L), tgw(0L)));
 
-    Bundle<Long> secondOutput = InProcessBundle.unkeyed(longs);
+    UncommittedBundle<Long> secondOutput = InProcessBundle.unkeyed(longs);
     when(context.createRootBundle(longs)).thenReturn(secondOutput);
     TransformEvaluator<?> secondEvaluator =
         factory.forApplication(longs.getProducingTransformInternal(), null, context);
@@ -101,13 +101,9 @@ public void unboundedSourceInMemoryTransformEvaluatorMultipleCalls() throws Exce
         secondResult.getWatermarkHold(),
         Matchers.<ReadableInstant>lessThan(DateTime.now().toInstant()));
     assertThat(
-        secondOutput.getElements(),
+        secondOutput.commit(Instant.now()).getElements(),
         containsInAnyOrder(tgw(11L), tgw(12L), tgw(14L), tgw(18L), tgw(19L), tgw(17L), tgw(16L),
             tgw(15L), tgw(13L), tgw(10L)));
-    assertThat(
-        output.getElements(),
-        containsInAnyOrder(tgw(1L), tgw(2L), tgw(4L), tgw(8L), tgw(9L), tgw(7L), tgw(6L), tgw(5L),
-            tgw(3L), tgw(0L)));
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactoryTest.java
index 50af094086558..34a9344d28794 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactoryTest.java
@@ -21,7 +21,7 @@
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.PCollectionViewWriter;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
@@ -33,6 +33,7 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 
+import org.joda.time.Instant;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -52,7 +53,7 @@ public void testInMemoryEvaluator() throws Exception {
     TestViewWriter<String, Iterable<String>> viewWriter = new TestViewWriter<>();
     when(context.createPCollectionViewWriter(input, view)).thenReturn(viewWriter);
 
-    Bundle<String> inputBundle = InProcessBundle.unkeyed(input);
+    CommittedBundle<String> inputBundle = InProcessBundle.unkeyed(input).commit(Instant.now());
     TransformEvaluator<String> evaluator = new ViewEvaluatorFactory().forApplication(
         view.getProducingTransformInternal(), inputBundle, context);
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java
index aeb276c9ad416..d32fa6b869cdb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java
@@ -19,7 +19,8 @@
 import static org.hamcrest.Matchers.not;
 import static org.junit.Assert.assertThat;
 
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.Bundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TransformWatermarks;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
@@ -121,11 +122,11 @@ public void getWatermarkForUntouchedTransform() {
    */
   @Test
   public void getWatermarkForUpdatedSourceTransform() {
-    Bundle<Integer> output = globallyWindowedBundle(createdInts, 1);
+    CommittedBundle<Integer> output = globallyWindowedBundle(createdInts, 1);
 
     TransformWatermarks updatedSourceWatermark =
         manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-            Collections.<Bundle<?>>singleton(output), new Instant(8000L));
+            Collections.<CommittedBundle<?>>singleton(output), new Instant(8000L));
 
     assertThat(updatedSourceWatermark.getOutputWatermark(), equalTo(new Instant(8000L)));
   }
@@ -136,10 +137,12 @@ public void getWatermarkForUpdatedSourceTransform() {
    */
   @Test
   public void getWatermarkForMultiInputTransform() {
-    Bundle<Integer> secondPcollectionBundle = globallyWindowedBundle(intsToFlatten, -1);
+    CommittedBundle<Integer> secondPcollectionBundle = globallyWindowedBundle(intsToFlatten, -1);
 
-    manager.updateOutputWatermark(intsToFlatten.getProducingTransformInternal(),
-        Collections.<Bundle<?>>singleton(secondPcollectionBundle), new Instant(Long.MAX_VALUE));
+    manager.updateOutputWatermark(
+        intsToFlatten.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(secondPcollectionBundle),
+        new Instant(Long.MAX_VALUE));
 
     // We didn't do anything for the first source, so we shouldn't have progressed the watermark
     TransformWatermarks firstSourceWatermark =
@@ -164,12 +167,12 @@ public void getWatermarkForMultiInputTransform() {
     assertThat(
         transformWatermark.getOutputWatermark(), not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
 
-    Bundle<Integer> flattenedBundleSecondCreate = globallyWindowedBundle(flattened, -1);
+    CommittedBundle<Integer> flattenedBundleSecondCreate = globallyWindowedBundle(flattened, -1);
     // We have finished processing the bundle from the second PCollection, but we haven't consumed
     // anything from the first PCollection yet; so our watermark shouldn't advance
     TransformWatermarks transformAfterProcessing =
         manager.updateWatermarks(secondPcollectionBundle, flattened.getProducingTransformInternal(),
-            Collections.<Bundle<?>>singleton(flattenedBundleSecondCreate), null);
+            Collections.<CommittedBundle<?>>singleton(flattenedBundleSecondCreate), null);
     assertThat(
         transformAfterProcessing.getInputWatermark(),
         not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
@@ -178,14 +181,16 @@ public void getWatermarkForMultiInputTransform() {
         not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
 
     Instant firstCollectionTimestamp = new Instant(10000);
-    Bundle<Integer> firstPcollectionBundle = timestampedBundle(
-        createdInts, TimestampedValue.<Integer>of(5, firstCollectionTimestamp));
+    CommittedBundle<Integer> firstPcollectionBundle =
+        timestampedBundle(createdInts, TimestampedValue.<Integer>of(5, firstCollectionTimestamp));
     // the source is done, but elements are still buffered. The source output watermark should be
     // past the end of the global window
-    TransformWatermarks firstSourceWatermarks =
-        manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-            Collections.<Bundle<?>>singleton(firstPcollectionBundle), new Instant(Long.MAX_VALUE));
-    assertThat(firstSourceWatermarks.getOutputWatermark(),
+    TransformWatermarks firstSourceWatermarks = manager.updateOutputWatermark(
+        createdInts.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(firstPcollectionBundle),
+        new Instant(Long.MAX_VALUE));
+    assertThat(
+        firstSourceWatermarks.getOutputWatermark(),
         not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
 
     // We still haven't consumed any of the first source's input, so the watermark should still not
@@ -193,11 +198,9 @@ public void getWatermarkForMultiInputTransform() {
     TransformWatermarks flattenAfterSourcesProduced =
         manager.getWatermarks(flattened.getProducingTransformInternal());
     assertThat(
-        flattenAfterSourcesProduced.getInputWatermark(),
-        not(laterThan(firstCollectionTimestamp)));
+        flattenAfterSourcesProduced.getInputWatermark(), not(laterThan(firstCollectionTimestamp)));
     assertThat(
-        flattenAfterSourcesProduced.getOutputWatermark(),
-        not(laterThan(firstCollectionTimestamp)));
+        flattenAfterSourcesProduced.getOutputWatermark(), not(laterThan(firstCollectionTimestamp)));
 
     // We have buffered inputs, but since the PCollection has all of the elements (has a WM past the
     // end of the global window), we should have a watermark equal to the min among buffered
@@ -207,10 +210,11 @@ public void getWatermarkForMultiInputTransform() {
     assertThat(withBufferedElements.getInputWatermark(), equalTo(firstCollectionTimestamp));
     assertThat(withBufferedElements.getOutputWatermark(), equalTo(firstCollectionTimestamp));
 
-    Bundle<?> completedFlattenBundle = InProcessBundle.unkeyed(flattened);
+    CommittedBundle<?> completedFlattenBundle =
+        InProcessBundle.unkeyed(flattened).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
     TransformWatermarks afterConsumingAllInput =
         manager.updateWatermarks(firstPcollectionBundle, flattened.getProducingTransformInternal(),
-            Collections.<Bundle<?>>singleton(completedFlattenBundle), null);
+            Collections.<CommittedBundle<?>>singleton(completedFlattenBundle), null);
     assertThat(
         afterConsumingAllInput.getInputWatermark(),
         not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
@@ -225,21 +229,23 @@ public void getWatermarkForMultiInputTransform() {
    */
   @Test
   public void getWatermarkForMultiConsumedCollection() {
-    Bundle<Integer> createdBundle = timestampedBundle(createdInts,
+    CommittedBundle<Integer> createdBundle = timestampedBundle(createdInts,
         TimestampedValue.of(1, new Instant(1_000_000L)), TimestampedValue.of(2, new Instant(1234L)),
         TimestampedValue.of(3, new Instant(-1000L)));
     TransformWatermarks createdAfterProducing =
         manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-            Collections.<Bundle<?>>singleton(createdBundle), new Instant(Long.MAX_VALUE));
-    assertThat(createdAfterProducing.getOutputWatermark(),
+            Collections.<CommittedBundle<?>>singleton(createdBundle), new Instant(Long.MAX_VALUE));
+    assertThat(
+        createdAfterProducing.getOutputWatermark(),
         not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
 
-    Bundle<KV<String, Integer>> keyBundle =
+    CommittedBundle<KV<String, Integer>> keyBundle =
         timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)),
             TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)),
             TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
-    TransformWatermarks keyedWatermarks = manager.updateWatermarks(createdBundle,
-        keyed.getProducingTransformInternal(), Collections.<Bundle<?>>singleton(keyBundle), null);
+    TransformWatermarks keyedWatermarks =
+        manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
+            Collections.<CommittedBundle<?>>singleton(keyBundle), null);
     assertThat(
         keyedWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
     assertThat(
@@ -248,14 +254,13 @@ public void getWatermarkForMultiConsumedCollection() {
     TransformWatermarks filteredWatermarks =
         manager.getWatermarks(filtered.getProducingTransformInternal());
     assertThat(filteredWatermarks.getInputWatermark(), not(laterThan(new Instant(-1000L))));
-    assertThat(
-        filteredWatermarks.getOutputWatermark(), not(laterThan(new Instant(-1000L))));
+    assertThat(filteredWatermarks.getOutputWatermark(), not(laterThan(new Instant(-1000L))));
 
-    Bundle<Integer> filteredBundle = timestampedBundle(
-        filtered, TimestampedValue.of(2, new Instant(1234L)));
+    CommittedBundle<Integer> filteredBundle =
+        timestampedBundle(filtered, TimestampedValue.of(2, new Instant(1234L)));
     TransformWatermarks filteredProcessedWatermarks =
         manager.updateWatermarks(createdBundle, filtered.getProducingTransformInternal(),
-            Collections.<Bundle<?>>singleton(filteredBundle), null);
+            Collections.<CommittedBundle<?>>singleton(filteredBundle), null);
     assertThat(
         filteredProcessedWatermarks.getInputWatermark(),
         not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
@@ -270,20 +275,19 @@ public void getWatermarkForMultiConsumedCollection() {
    */
   @Test
   public void updateWatermarkWithWatermarkHolds() {
-    Bundle<Integer> createdBundle = timestampedBundle(createdInts,
-        TimestampedValue.of(1, new Instant(1_000_000L)),
-        TimestampedValue.of(2, new Instant(1234L)),
+    CommittedBundle<Integer> createdBundle = timestampedBundle(createdInts,
+        TimestampedValue.of(1, new Instant(1_000_000L)), TimestampedValue.of(2, new Instant(1234L)),
         TimestampedValue.of(3, new Instant(-1000L)));
     manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-        Collections.<Bundle<?>>singleton(createdBundle), new Instant(Long.MAX_VALUE));
-
-    Bundle<KV<String, Integer>> keyBundle = timestampedBundle(
-        keyed, TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)),
-        TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)),
-        TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
-    TransformWatermarks keyedWatermarks = manager.updateWatermarks(createdBundle,
-        keyed.getProducingTransformInternal(), Collections.<Bundle<?>>singleton(keyBundle),
-        new Instant(500L));
+        Collections.<CommittedBundle<?>>singleton(createdBundle), new Instant(Long.MAX_VALUE));
+
+    CommittedBundle<KV<String, Integer>> keyBundle =
+        timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)),
+            TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)),
+            TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
+    TransformWatermarks keyedWatermarks =
+        manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
+            Collections.<CommittedBundle<?>>singleton(keyBundle), new Instant(500L));
     assertThat(
         keyedWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
     assertThat(keyedWatermarks.getOutputWatermark(), not(laterThan(new Instant(500L))));
@@ -295,16 +299,18 @@ public void updateWatermarkWithWatermarkHolds() {
    */
   @Test
   public void updateOutputWatermarkShouldBeMonotonic() {
-    Bundle<?> firstInput = InProcessBundle.unkeyed(createdInts);
+    CommittedBundle<?> firstInput =
+        InProcessBundle.unkeyed(createdInts).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
     TransformWatermarks firstWatermarks =
         manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-            Collections.<Bundle<?>>singleton(firstInput), new Instant(0L));
+            Collections.<CommittedBundle<?>>singleton(firstInput), new Instant(0L));
     assertThat(firstWatermarks.getOutputWatermark(), equalTo(new Instant(0L)));
 
-    Bundle<?> secondInput = InProcessBundle.unkeyed(createdInts);
+    CommittedBundle<?> secondInput =
+        InProcessBundle.unkeyed(createdInts).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
     TransformWatermarks secondWatermarks =
         manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-            Collections.<Bundle<?>>singleton(secondInput), new Instant(-250L));
+            Collections.<CommittedBundle<?>>singleton(secondInput), new Instant(-250L));
     assertThat(secondWatermarks.getOutputWatermark(), not(earlierThan(new Instant(0L))));
   }
 
@@ -314,24 +320,19 @@ public void updateOutputWatermarkShouldBeMonotonic() {
    */
   @Test
   public void updateWatermarkWithHoldsShouldBeMonotonic() {
-    Bundle<Integer> createdBundle = timestampedBundle(
-        createdInts,
-        TimestampedValue.of(1, new Instant(1_000_000L)),
-        TimestampedValue.of(2, new Instant(1234L)),
+    CommittedBundle<Integer> createdBundle = timestampedBundle(createdInts,
+        TimestampedValue.of(1, new Instant(1_000_000L)), TimestampedValue.of(2, new Instant(1234L)),
         TimestampedValue.of(3, new Instant(-1000L)));
-    manager.updateOutputWatermark(
-        createdInts.getProducingTransformInternal(),
-        Collections.<Bundle<?>>singleton(createdBundle),
-        new Instant(Long.MAX_VALUE));
+    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(createdBundle), new Instant(Long.MAX_VALUE));
 
-    Bundle<KV<String, Integer>> keyBundle = timestampedBundle(
-            keyed,
-            TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)),
+    CommittedBundle<KV<String, Integer>> keyBundle =
+        timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)),
             TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)),
             TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
     TransformWatermarks keyedWatermarks =
         manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
-            Collections.<Bundle<?>>singleton(keyBundle), new Instant(500L));
+            Collections.<CommittedBundle<?>>singleton(keyBundle), new Instant(500L));
     assertThat(
         keyedWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
     assertThat(keyedWatermarks.getOutputWatermark(), not(laterThan(new Instant(500L))));
@@ -343,8 +344,7 @@ public void updateWatermarkWithHoldsShouldBeMonotonic() {
         updatedWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
     // We added a hold prior to the old watermark; we shouldn't progress (due to the earlier hold)
     // but the watermark is monotonic and should not backslide to the new, earlier hold
-    assertThat(
-        updatedWatermarks.getOutputWatermark(), equalTo(oldOutputWatermark));
+    assertThat(updatedWatermarks.getOutputWatermark(), equalTo(oldOutputWatermark));
   }
 
   /**
@@ -353,33 +353,31 @@ public void updateWatermarkWithHoldsShouldBeMonotonic() {
   @Test
   public void updateWatermarkWithLateData() {
     Instant sourceWatermark = new Instant(1_000_000L);
-    Bundle<Integer> createdBundle = timestampedBundle(createdInts,
+    CommittedBundle<Integer> createdBundle = timestampedBundle(createdInts,
         TimestampedValue.of(1, sourceWatermark), TimestampedValue.of(2, new Instant(1234L)));
 
     manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-        Collections.<Bundle<?>>singleton(createdBundle), sourceWatermark);
+        Collections.<CommittedBundle<?>>singleton(createdBundle), sourceWatermark);
 
-    Bundle<KV<String, Integer>> keyBundle = timestampedBundle(
-        keyed,
-        TimestampedValue.of(KV.of("MyKey", 1), sourceWatermark),
-        TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)));
+    CommittedBundle<KV<String, Integer>> keyBundle =
+        timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), sourceWatermark),
+            TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)));
 
     // Finish processing the on-time data. The watermarks should progress to be equal to the source
     TransformWatermarks onTimeWatermarks =
         manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
-            Collections.<Bundle<?>>singleton(keyBundle), null);
+            Collections.<CommittedBundle<?>>singleton(keyBundle), null);
     assertThat(onTimeWatermarks.getInputWatermark(), equalTo(sourceWatermark));
     assertThat(onTimeWatermarks.getOutputWatermark(), equalTo(sourceWatermark));
 
-    Bundle<Integer> lateDataBundle = timestampedBundle(
-        createdInts, TimestampedValue.of(3, new Instant(-1000L)));
+    CommittedBundle<Integer> lateDataBundle =
+        timestampedBundle(createdInts, TimestampedValue.of(3, new Instant(-1000L)));
 
     // the late data arrives in a downstream PCollection after its watermark has advanced past it;
     // we don't advance the watermark past the current watermark until we've consumed the late data
     TransformWatermarks bufferedLateWm =
         manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-           Collections.<Bundle<?>>singleton(lateDataBundle),
-            new Instant(2_000_000L));
+            Collections.<CommittedBundle<?>>singleton(lateDataBundle), new Instant(2_000_000L));
     assertThat(bufferedLateWm.getOutputWatermark(), equalTo(new Instant(2_000_000L)));
 
     // The input watermark should be held to its previous value (not advanced due to late data; not
@@ -389,10 +387,10 @@ public void updateWatermarkWithLateData() {
     assertThat(lateDataBufferedWatermark.getInputWatermark(), not(earlierThan(sourceWatermark)));
     assertThat(lateDataBufferedWatermark.getOutputWatermark(), not(earlierThan(sourceWatermark)));
 
-    Bundle<KV<String, Integer>> lateKeyedBundle = timestampedBundle(
-        keyed, TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
+    CommittedBundle<KV<String, Integer>> lateKeyedBundle =
+        timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
     manager.updateWatermarks(lateDataBundle, keyed.getProducingTransformInternal(),
-        Collections.<Bundle<?>>singleton(lateKeyedBundle), null);
+        Collections.<CommittedBundle<?>>singleton(lateKeyedBundle), null);
   }
 
   /**
@@ -401,19 +399,23 @@ public void updateWatermarkWithLateData() {
    */
   @Test
   public void getWatermarksAfterOnlyEmptyOutput() {
-    Bundle<Integer> emptyCreateOutput = globallyWindowedBundle(createdInts);
-    TransformWatermarks updatedSourceWatermarks =
-        manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-            Collections.<Bundle<?>>singleton(emptyCreateOutput), BoundedWindow.TIMESTAMP_MAX_VALUE);
+    CommittedBundle<Integer> emptyCreateOutput = globallyWindowedBundle(createdInts);
+    TransformWatermarks updatedSourceWatermarks = manager.updateOutputWatermark(
+        createdInts.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(emptyCreateOutput),
+        BoundedWindow.TIMESTAMP_MAX_VALUE);
 
-    assertThat(updatedSourceWatermarks.getOutputWatermark(),
+    assertThat(
+        updatedSourceWatermarks.getOutputWatermark(),
         not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
 
     TransformWatermarks finishedFilterWatermarks =
         manager.getWatermarks(filtered.getProducingTransformInternal());
-    assertThat(finishedFilterWatermarks.getInputWatermark(),
+    assertThat(
+        finishedFilterWatermarks.getInputWatermark(),
         not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
-    assertThat(finishedFilterWatermarks.getOutputWatermark(),
+    assertThat(
+        finishedFilterWatermarks.getOutputWatermark(),
         not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
   }
 
@@ -423,28 +425,31 @@ public void getWatermarksAfterOnlyEmptyOutput() {
    */
   @Test
   public void getWatermarksAfterHoldAndEmptyOutput() {
-    Bundle<Integer> firstCreateOutput = globallyWindowedBundle(createdInts, 1, 2);
+    CommittedBundle<Integer> firstCreateOutput = globallyWindowedBundle(createdInts, 1, 2);
     manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-        Collections.<Bundle<?>>singleton(firstCreateOutput), new Instant(12_000L));
+        Collections.<CommittedBundle<?>>singleton(firstCreateOutput), new Instant(12_000L));
 
-    Bundle<Integer> firstFilterOutput = globallyWindowedBundle(filtered);
+    CommittedBundle<Integer> firstFilterOutput = globallyWindowedBundle(filtered);
     TransformWatermarks firstFilterWatermarks =
         manager.updateWatermarks(firstCreateOutput, filtered.getProducingTransformInternal(),
-            Collections.<Bundle<?>>singleton(firstFilterOutput), new Instant(10_000L));
+            Collections.<CommittedBundle<?>>singleton(firstFilterOutput), new Instant(10_000L));
     assertThat(firstFilterWatermarks.getInputWatermark(), not(earlierThan(new Instant(12_000L))));
     assertThat(firstFilterWatermarks.getOutputWatermark(), not(laterThan(new Instant(10_000L))));
 
-    Bundle<Integer> emptyCreateOutput = globallyWindowedBundle(createdInts);
-    TransformWatermarks updatedSourceWatermarks =
-        manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-            Collections.<Bundle<?>>singleton(emptyCreateOutput), BoundedWindow.TIMESTAMP_MAX_VALUE);
+    CommittedBundle<Integer> emptyCreateOutput = globallyWindowedBundle(createdInts);
+    TransformWatermarks updatedSourceWatermarks = manager.updateOutputWatermark(
+        createdInts.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(emptyCreateOutput),
+        BoundedWindow.TIMESTAMP_MAX_VALUE);
 
-    assertThat(updatedSourceWatermarks.getOutputWatermark(),
+    assertThat(
+        updatedSourceWatermarks.getOutputWatermark(),
         not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
 
     TransformWatermarks finishedFilterWatermarks =
         manager.getWatermarks(filtered.getProducingTransformInternal());
-    assertThat(finishedFilterWatermarks.getInputWatermark(),
+    assertThat(
+        finishedFilterWatermarks.getInputWatermark(),
         not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
     assertThat(finishedFilterWatermarks.getOutputWatermark(), not(laterThan(new Instant(10_000L))));
   }
@@ -475,26 +480,27 @@ public boolean matches(Object item) {
       @Override
       public void describeTo(Description description) {
         description.appendText("later than ").appendValue(shouldBeEarlier);
-      }};
+      }
+    };
   }
 
   @SafeVarargs
-  private final <T> Bundle<T> timestampedBundle(
+  private final <T> CommittedBundle<T> timestampedBundle(
       PCollection<T> pc, TimestampedValue<T>... values) {
-    Bundle<T> bundle = InProcessBundle.unkeyed(pc);
+    UncommittedBundle<T> bundle = InProcessBundle.unkeyed(pc);
     for (TimestampedValue<T> value : values) {
       bundle.add(
           WindowedValue.timestampedValueInGlobalWindow(value.getValue(), value.getTimestamp()));
     }
-    return bundle;
+    return bundle.commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
   }
 
   @SafeVarargs
-  private final <T> Bundle<T> globallyWindowedBundle(PCollection<T> pc, T... values) {
-    Bundle<T> bundle = InProcessBundle.unkeyed(pc);
+  private final <T> CommittedBundle<T> globallyWindowedBundle(PCollection<T> pc, T... values) {
+    UncommittedBundle<T> bundle = InProcessBundle.unkeyed(pc);
     for (T value : values) {
       bundle.add(WindowedValue.valueInGlobalWindow(value));
     }
-    return bundle;
+    return bundle.commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundleTest.java
index 3378e3bded346..57d8c908e0ce2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundleTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundleTest.java
@@ -15,11 +15,13 @@
  */
 package com.google.cloud.dataflow.sdk.runners.inprocess.util;
 
+import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.is;
 import static org.hamcrest.Matchers.nullValue;
 import static org.junit.Assert.assertThat;
 
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
@@ -29,7 +31,9 @@
 import org.hamcrest.Matcher;
 import org.hamcrest.Matchers;
 import org.joda.time.Instant;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -42,11 +46,16 @@
  */
 @RunWith(JUnit4.class)
 public class InProcessBundleTest {
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
   @Test
   public void unkeyedShouldCreateWithNullKey() {
     PCollection<Integer> pcollection = TestPipeline.create().apply(Create.of(1));
 
-    InProcessBundle<Integer> bundle = InProcessBundle.unkeyed(pcollection);
+    InProcessBundle<Integer> inFlightBundle = InProcessBundle.unkeyed(pcollection);
+
+    CommittedBundle<Integer> bundle = inFlightBundle.commit(Instant.now());
 
     assertThat(bundle.isKeyed(), is(false));
     assertThat(bundle.getKey(), nullValue());
@@ -55,8 +64,9 @@ public void unkeyedShouldCreateWithNullKey() {
   private void keyedCreateBundle(Object key) {
     PCollection<Integer> pcollection = TestPipeline.create().apply(Create.of(1));
 
-    InProcessBundle<Integer> bundle = InProcessBundle.keyed(pcollection, key);
+    InProcessBundle<Integer> inFlightBundle = InProcessBundle.keyed(pcollection, key);
 
+    CommittedBundle<Integer> bundle = inFlightBundle.commit(Instant.now());
     assertThat(bundle.isKeyed(), is(true));
     assertThat(bundle.getKey(), equalTo(key));
   }
@@ -71,7 +81,7 @@ public void keyedWithKeyShouldCreateKeyedBundle() {
     keyedCreateBundle(new Object());
   }
 
-  private <T> void getElementsShouldHaveAddedElements(Iterable<WindowedValue<T>> elems) {
+  private <T> void afterCommitGetElementsShouldHaveAddedElements(Iterable<WindowedValue<T>> elems) {
     PCollection<T> pcollection = TestPipeline.create().apply(Create.<T>of());
 
     InProcessBundle<T> bundle = InProcessBundle.unkeyed(pcollection);
@@ -82,12 +92,12 @@ private <T> void getElementsShouldHaveAddedElements(Iterable<WindowedValue<T>> e
     }
     Matcher<Iterable<? extends WindowedValue<T>>> containsMatcher =
         Matchers.<WindowedValue<T>>containsInAnyOrder(expectations);
-    assertThat(bundle.getElements(), containsMatcher);
+    assertThat(bundle.commit(Instant.now()).getElements(), containsMatcher);
   }
 
   @Test
   public void getElementsBeforeAddShouldReturnEmptyIterable() {
-    getElementsShouldHaveAddedElements(Collections.<WindowedValue<Integer>>emptyList());
+    afterCommitGetElementsShouldHaveAddedElements(Collections.<WindowedValue<Integer>>emptyList());
   }
 
   @Test
@@ -96,7 +106,38 @@ public void getElementsAfterAddShouldReturnAddedElements() {
     WindowedValue<Integer> secondValue =
         WindowedValue.timestampedValueInGlobalWindow(2, new Instant(1000L));
 
-    getElementsShouldHaveAddedElements(ImmutableList.of(firstValue, secondValue));
+    afterCommitGetElementsShouldHaveAddedElements(ImmutableList.of(firstValue, secondValue));
+  }
+
+  @Test
+  public void addAfterCommitShouldThrowException() {
+    PCollection<Integer> pcollection = TestPipeline.create().apply(Create.<Integer>of());
+
+    InProcessBundle<Integer> bundle = InProcessBundle.unkeyed(pcollection);
+    bundle.add(WindowedValue.valueInGlobalWindow(1));
+    CommittedBundle<Integer> firstCommit = bundle.commit(Instant.now());
+    assertThat(firstCommit.getElements(), containsInAnyOrder(WindowedValue.valueInGlobalWindow(1)));
+
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("3");
+    thrown.expectMessage("committed");
+
+    bundle.add(WindowedValue.valueInGlobalWindow(3));
+  }
+
+  @Test
+  public void commitAfterCommitShouldThrowException() {
+    PCollection<Integer> pcollection = TestPipeline.create().apply(Create.<Integer>of());
+
+    InProcessBundle<Integer> bundle = InProcessBundle.unkeyed(pcollection);
+    bundle.add(WindowedValue.valueInGlobalWindow(1));
+    CommittedBundle<Integer> firstCommit = bundle.commit(Instant.now());
+    assertThat(firstCommit.getElements(), containsInAnyOrder(WindowedValue.valueInGlobalWindow(1)));
+
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("committed");
+
+    bundle.commit(Instant.now());
   }
 }
 

From 4c1e9112a92e1600c6bbdd63723220f06ba50132 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Fri, 5 Feb 2016 12:16:32 -0800
Subject: [PATCH 1395/1541] ApiSurfaceTest: whitelist com.google.auth package

This is a new package that contains user credentials for GCP,
and will ultimately need to be exposed as part of our API surface,

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113973842
---
 .../main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
index 2456c0e0947d4..20ba6698f1887 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
@@ -550,6 +550,7 @@ public static ApiSurface getSdkApiSurface() throws IOException {
         .pruningPrefix("com.google.cloud.dataflow.integration")
         .pruningPrefix("java")
         .pruningPrefix("com.google.api")
+        .pruningPrefix("com.google.auth")
         .pruningPrefix("com.google.protobuf")
         .pruningPrefix("org.joda.time")
         .pruningPrefix("org.apache.avro")

From f41716aa4cab2c5641c06105665476e934ea1089 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 5 Feb 2016 12:28:42 -0800
Subject: [PATCH 1396/1541] Make SinkFactory a real class; remove some
 reflection

This refactor has no observable change, given that the reflection
was not actually part of our public programming model.

Previously, SinkFactory was actually a global registry mapping
service-side class names to SDK-side classes. These classes were
to "implement" a particular interface via their static methods.

This change makes SinkFactory express that interface, and now
SinkRegistry encapsulates the default behavior of maintaining
a lightweight registry, previously in a global variable.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113974786
---
 .../sdk/runners/worker/AvroSinkFactory.java   |  31 ++---
 .../sdk/runners/worker/IsmSinkFactory.java    |  40 +++---
 .../sdk/runners/worker/PubsubSink.java        |  34 +++--
 .../runners/worker/ShuffleSinkFactory.java    |  37 +++---
 .../sdk/runners/worker/SinkFactory.java       |  82 ++----------
 .../sdk/runners/worker/SinkRegistry.java      | 118 ++++++++++++++++++
 .../sdk/runners/worker/TextSinkFactory.java   |  44 +++----
 .../sdk/runners/worker/WindmillSink.java      |  25 ++--
 8 files changed, 239 insertions(+), 172 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkRegistry.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
index fadd31a38fbff..bb881da9ec3fe 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
@@ -28,28 +28,21 @@
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 
+import javax.annotation.Nullable;
+
 /**
- * Creates an AvroSink from a CloudObject spec.
+ * Creates an {@link AvroSink} from a {@link CloudObject} spec.
  */
-@SuppressWarnings("rawtypes")
-public final class AvroSinkFactory {
-  // Do not instantiate.
-  private AvroSinkFactory() {}
-
-  @SuppressWarnings("unused")
-  public static <T> Sink<T> create(PipelineOptions options,
-                                   CloudObject spec,
-                                   Coder<T> coder,
-                                   ExecutionContext executionContext,
-                                   CounterSet.AddCounterMutator addCounterMutator)
-      throws Exception {
-    return create(spec, coder);
-  }
+public final class AvroSinkFactory implements SinkFactory {
 
-  static <T> Sink<T> create(CloudObject spec, Coder<T> coder)
+  @Override
+  public Sink<?> create(
+      CloudObject spec,
+      Coder<?> coder,
+      @Nullable PipelineOptions options,
+      @Nullable ExecutionContext executionContext,
+      @Nullable CounterSet.AddCounterMutator addCounterMutator)
       throws Exception {
-    String filename = getString(spec, PropertyNames.FILENAME);
-
     // Avro sinks are used both for outputting user data at the end of a pipeline and for
     // materializing PCollections as intermediate results. It is important to distinguish these
     // two cases because one requires only the values (outputting with AvroSink) and one requires
@@ -83,6 +76,8 @@ static <T> Sink<T> create(CloudObject spec, Coder<T> coder)
     // include the window and timestamp.
     //
     // See AvroReaderFactory#create for the accompanying reader logic.
+    String filename = getString(spec, PropertyNames.FILENAME);
+
     if (coder instanceof ValueOnlyWindowedValueCoder
         && ((ValueOnlyWindowedValueCoder) coder).getValueCoder() instanceof AvroCoder) {
       return new AvroSink(filename, (ValueOnlyWindowedValueCoder<?>) coder);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkFactory.java
index b1386067d1c58..03ce0327c8264 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkFactory.java
@@ -30,39 +30,41 @@
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 
+import javax.annotation.Nullable;
+
 /**
  * Creates an {@link IsmSink} from a {@link CloudObject} spec. Note that it is invalid to use a
  * non {@link IsmRecordCoder} with this sink factory.
  */
-public class IsmSinkFactory {
-  // Do not instantiate.
-  private IsmSinkFactory() {}
+public class IsmSinkFactory implements SinkFactory {
 
-  @SuppressWarnings("unused")
-  public static <V, T> Sink<WindowedValue<IsmRecord<V>>> create(
-      PipelineOptions options,
+  @Override
+  public Sink<?> create(
       CloudObject spec,
-      Coder<WindowedValue<IsmRecord<V>>> coder,
-      ExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator) throws Exception {
-    return create(spec, coder);
-  }
+      @Nullable Coder<?> coder,
+      @Nullable PipelineOptions options,
+      @Nullable ExecutionContext executionContext,
+      @Nullable CounterSet.AddCounterMutator addCounterMutator) throws Exception {
+
+    // The validity of this coder is checked in detail by the typed create, below
+    @SuppressWarnings("unchecked")
+    Coder<WindowedValue<IsmRecord<Object>>> typedCoder =
+        (Coder<WindowedValue<IsmRecord<Object>>>) coder;
 
-  static <V> Sink<WindowedValue<IsmRecord<V>>> create(
-      CloudObject spec, Coder<WindowedValue<IsmRecord<V>>> coder) throws Exception {
     String filename = getString(spec, PropertyNames.FILENAME);
 
-    checkArgument(coder instanceof WindowedValueCoder,
-        "%s only supports using %s but got %s.", IsmSink.class, WindowedValueCoder.class, coder);
-    WindowedValueCoder<IsmRecord<V>> windowedCoder =
-        (WindowedValueCoder<IsmRecord<V>>) coder;
+    checkArgument(typedCoder instanceof WindowedValueCoder,
+        "%s only supports using %s but got %s.", IsmSink.class, WindowedValueCoder.class,
+        typedCoder);
+    WindowedValueCoder<IsmRecord<Object>> windowedCoder =
+        (WindowedValueCoder<IsmRecord<Object>>) typedCoder;
 
     checkArgument(windowedCoder.getValueCoder() instanceof IsmRecordCoder,
         "%s only supports using %s but got %s.",
         IsmSink.class, IsmRecordCoder.class, windowedCoder.getValueCoder());
     @SuppressWarnings("unchecked")
-    IsmRecordCoder<V> ismCoder =
-        (IsmRecordCoder<V>) windowedCoder.getValueCoder();
+    IsmRecordCoder<Object> ismCoder =
+        (IsmRecordCoder<Object>) windowedCoder.getValueCoder();
 
     return new IsmSink<>(filename, ismCoder);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
index 2c7c142fbe1d1..4fe7c272430c2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
@@ -30,6 +30,8 @@
 
 import java.io.IOException;
 
+import javax.annotation.Nullable;
+
 /**
  * A sink that writes to Pubsub, via a Windmill server.
  *
@@ -55,18 +57,26 @@ class PubsubSink<T> extends Sink<WindowedValue<T>> {
     this.context = context;
   }
 
-  @SuppressWarnings("unused")
-  public static <T> PubsubSink<T> create(PipelineOptions options,
-                                         CloudObject spec,
-                                         Coder<WindowedValue<T>> coder,
-                                         ExecutionContext context,
-                                         CounterSet.AddCounterMutator addCounterMutator)
-      throws Exception {
-    String topic = getString(spec, "pubsub_topic");
-    String timestampLabel = getString(spec, "pubsub_timestamp_label", "");
-    String idLabel = getString(spec, "pubsub_id_label", "");
-    return new PubsubSink<>(
-        topic, timestampLabel, idLabel, coder, (StreamingModeExecutionContext) context);
+  public static class Factory implements SinkFactory {
+    @Override
+    public PubsubSink<?> create(
+        CloudObject spec,
+        Coder<?> coder,
+        @Nullable PipelineOptions options,
+        @Nullable ExecutionContext context,
+        @Nullable CounterSet.AddCounterMutator addCounterMutator)
+            throws Exception {
+      String topic = getString(spec, "pubsub_topic");
+      String timestampLabel = getString(spec, "pubsub_timestamp_label", "");
+      String idLabel = getString(spec, "pubsub_id_label", "");
+
+      @SuppressWarnings("unchecked")
+      Coder<WindowedValue<Object>> typedCoder =
+          (Coder<WindowedValue<Object>>) coder;
+
+      return new PubsubSink<>(
+          topic, timestampLabel, idLabel, typedCoder, (StreamingModeExecutionContext) context);
+    }
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactory.java
index 8f3897541f4c3..360a20bad80ec 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactory.java
@@ -28,32 +28,31 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 
+import javax.annotation.Nullable;
+
 /**
- * Creates a ShuffleSink from a CloudObject spec.
+ * Creates a {@link ShuffleSink} from a {@link CloudObject} spec.
  */
-public class ShuffleSinkFactory {
-  // Do not instantiate.
-  private ShuffleSinkFactory() {}
-
-  public static <T> ShuffleSink<T> create(PipelineOptions options,
-                                          CloudObject spec,
-                                          Coder<WindowedValue<T>> coder,
-                                          ExecutionContext executionContext,
-                                          CounterSet.AddCounterMutator addCounterMutator)
-      throws Exception {
-    return create(options, spec, coder, addCounterMutator);
-  }
+public class ShuffleSinkFactory implements SinkFactory {
+
+  @Override
+  public ShuffleSink<?> create(
+      CloudObject spec,
+      Coder<?> coder,
+      @Nullable PipelineOptions options,
+      @Nullable ExecutionContext executionContext,
+      @Nullable CounterSet.AddCounterMutator addCounterMutator)
+          throws Exception {
+
+    @SuppressWarnings("unchecked")
+    Coder<WindowedValue<Object>> typedCoder =
+        (Coder<WindowedValue<Object>>) coder;
 
-  static <T> ShuffleSink<T> create(PipelineOptions options,
-                                   CloudObject spec,
-                                   Coder<WindowedValue<T>> coder,
-                                   CounterSet.AddCounterMutator addCounterMutator)
-      throws Exception {
     return new ShuffleSink<>(
         options,
         decodeBase64(getString(spec, PropertyNames.SHUFFLE_WRITER_CONFIG, null)),
         parseShuffleKind(getString(spec, PropertyNames.SHUFFLE_KIND)),
-        coder,
+        typedCoder,
         addCounterMutator);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
index da228ac6350e7..6abef2c3b6a00 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
@@ -20,86 +20,24 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
-import com.google.cloud.dataflow.sdk.util.Serializer;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
-import java.util.HashMap;
-import java.util.Map;
+import javax.annotation.Nullable;
 
 /**
- * Constructs a Sink from a Dataflow service protocol Sink definition.
- *
- * <p>A SinkFactory concrete "subclass" should define a method with the
- * following signature:
- * <pre> {@code
- * static SomeSinkSubclass<T> create(PipelineOptions, CloudObject,
- *                                   Coder<T>, ExecutionContext,
- *                                   CounterSet.AddCounterMutator);
- * } </pre>
+ * Constructs a {@link Sink} from a Dataflow service {@link CloudObject} specification.
  */
-public final class SinkFactory {
-  // Do not instantiate.
-  private SinkFactory() {}
-
-  /**
-   * A map from the short names of predefined sinks to their full
-   * factory class names.
-   */
-  static Map<String, String> predefinedSinkFactories = new HashMap<>();
-
-  static {
-    predefinedSinkFactories.put("TextSink",
-                                TextSinkFactory.class.getName());
-    predefinedSinkFactories.put("AvroSink",
-                                AvroSinkFactory.class.getName());
-    predefinedSinkFactories.put("IsmSink",
-                                IsmSinkFactory.class.getName());
-    predefinedSinkFactories.put("ShuffleSink",
-                                ShuffleSinkFactory.class.getName());
-    predefinedSinkFactories.put("PubsubSink",
-                                PubsubSink.class.getName());
-    predefinedSinkFactories.put("WindmillSink",
-                                WindmillSink.class.getName());
-  }
+public interface SinkFactory {
 
   /**
    * Creates a {@link Sink} from a Dataflow API Sink definition.
-   *
-   * @throws Exception if the sink could not be decoded and
-   * constructed
    */
-  public static <T> Sink<T> create(
-      PipelineOptions options,
-      com.google.api.services.dataflow.model.Sink cloudSink,
-      ExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator)
-      throws Exception {
-    @SuppressWarnings("unchecked")
-    Coder<T> coder = Serializer.deserialize(cloudSink.getCodec(), Coder.class);
-    CloudObject object = CloudObject.fromSpec(cloudSink.getSpec());
-
-    String className = predefinedSinkFactories.get(object.getClassName());
-    if (className == null) {
-      className = object.getClassName();
-    }
-
-    try {
-      return InstanceBuilder.ofType(new TypeDescriptor<Sink<T>>() {})
-          .fromClassName(className)
-          .fromFactoryMethod("create")
-          .withArg(PipelineOptions.class, options)
-          .withArg(CloudObject.class, object)
-          .withArg(Coder.class, coder)
-          .withArg(ExecutionContext.class, executionContext)
-          .withArg(CounterSet.AddCounterMutator.class, addCounterMutator)
-          .build();
-
-    } catch (ClassNotFoundException exn) {
-      throw new Exception(
-          "unable to create a sink from " + cloudSink, exn);
-    }
-  }
+  Sink<?> create(
+      CloudObject sinkSpec,
+      Coder<?> coder,
+      @Nullable PipelineOptions options,
+      @Nullable ExecutionContext executionContext,
+      @Nullable CounterSet.AddCounterMutator addCounterMutator)
+      throws Exception;
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkRegistry.java
new file mode 100644
index 0000000000000..d73c88035beb3
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkRegistry.java
@@ -0,0 +1,118 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.annotation.Nullable;
+
+/**
+ * An immutable registry from {@link String} identifiers (provided to the worker by the Dataflow
+ * service) to appropriate {@link SinkFactory} instances.
+ */
+public final class SinkRegistry implements SinkFactory {
+
+  public static SinkRegistry defaultRegistry() {
+    Map<String, SinkFactory> predefinedSinkFactories = new HashMap<>();
+    predefinedSinkFactories.put("TextSink", new TextSinkFactory());
+    predefinedSinkFactories.put("com.google.cloud.dataflow.sdk.runners.worker.TextSink",
+        new TextSinkFactory());
+
+    predefinedSinkFactories.put("AvroSink", new AvroSinkFactory());
+    predefinedSinkFactories.put("com.google.cloud.dataflow.sdk.runners.worker.AvroSink",
+        new AvroSinkFactory());
+
+    predefinedSinkFactories.put("IsmSink", new IsmSinkFactory());
+    predefinedSinkFactories.put("com.google.cloud.dataflow.sdk.runners.worker.IsmSink",
+        new IsmSinkFactory());
+
+    predefinedSinkFactories.put("ShuffleSink", new ShuffleSinkFactory());
+    predefinedSinkFactories.put("com.google.cloud.dataflow.sdk.runners.worker.ShuffleSink",
+        new ShuffleSinkFactory());
+
+    predefinedSinkFactories.put("PubsubSink", new PubsubSink.Factory());
+    predefinedSinkFactories.put("com.google.cloud.dataflow.sdk.runners.worker.PubsubSink",
+        new PubsubSink.Factory());
+
+    predefinedSinkFactories.put("WindmillSink", new WindmillSink.Factory());
+    predefinedSinkFactories.put("com.google.cloud.dataflow.sdk.runners.worker.WindmillSink",
+        new WindmillSink.Factory());
+
+    return new SinkRegistry(predefinedSinkFactories);
+  }
+
+  /**
+   * Builds a new {@link SinkRegistry} with the provided mutable map of initial mappings.
+   *
+   * <p>Owns and mutates the provided map, which must be mutable. This constructor should only be
+   * called by methods in this class that are aware of this requirement and abstract from this
+   * behavior.
+   */
+  private SinkRegistry(Map<String, SinkFactory> factories) {
+    this.factories = factories;
+  }
+
+  /**
+   * A map from the short names of predefined sinks to the associated {@link SinkFactory}.
+   */
+  private final Map<String, SinkFactory> factories;
+
+  /**
+   * Returns a new {@link SinkRegistry} with the provided identifier associated with the
+   * provided {@link SinkFactory}, overriding any existing binding for that identifier.
+   */
+  public SinkRegistry register(String readerSpecType, SinkFactory factory) {
+    Map<String, SinkFactory> newFactories = new HashMap<>();
+    newFactories.putAll(factories);
+    newFactories.put(readerSpecType, factory);
+    return new SinkRegistry(newFactories);
+  }
+
+  /**
+   * Creates a {@link Sink} from a Dataflow API Sink definition.
+   *
+   * @throws Exception if the sink could not be decoded and
+   * constructed
+   */
+  @Override
+  public Sink<?> create(
+      CloudObject sinkSpec,
+      Coder<?> coder,
+      @Nullable PipelineOptions options,
+      @Nullable ExecutionContext executionContext,
+      @Nullable CounterSet.AddCounterMutator addCounterMutator)
+      throws Exception {
+
+    String objClassName = sinkSpec.getClassName();
+
+    SinkFactory factory = factories.get(objClassName);
+    if (factory == null) {
+      throw new IllegalArgumentException(String.format(
+          "Unable to create a Sink: Unknown Sink type in specification: %s",
+          objClassName));
+    }
+    return factory.create(sinkSpec, coder, options, executionContext, addCounterMutator);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactory.java
index d0b2b9b24b3c7..30b91b2cef276 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactory.java
@@ -26,32 +26,28 @@
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 
+import javax.annotation.Nullable;
+
 /**
- * Creates a TextSink from a CloudObject spec.
+ * Creates a {@link TextSink} from a {@link CloudObject} spec.
  */
-public final class TextSinkFactory {
-  // Do not instantiate.
-  private TextSinkFactory() {}
-
-  public static <T> TextSink<T> create(PipelineOptions options,
-                                       CloudObject spec,
-                                       Coder<T> coder,
-                                       ExecutionContext executionContext,
-                                       CounterSet.AddCounterMutator addCounterMutator)
-      throws Exception {
-    return create(spec, coder);
-  }
-
-  static <T> TextSink<T> create(CloudObject spec, Coder<T> coder)
-      throws Exception {
+public final class TextSinkFactory implements SinkFactory {
+  @Override
+  public TextSink<?> create(
+      CloudObject spec,
+      Coder<?> coder,
+      @Nullable PipelineOptions options,
+      @Nullable ExecutionContext executionContext,
+      @Nullable CounterSet.AddCounterMutator addCounterMutator)
+          throws Exception {
     return TextSink.create(
-        getString(spec, PropertyNames.FILENAME),
-        "",  // No shard template
-        "",  // No suffix
-        1,   // Exactly one output file
-        getBoolean(spec, PropertyNames.APPEND_TRAILING_NEWLINES, true),
-        getString(spec, PropertyNames.HEADER, null),
-        getString(spec, PropertyNames.FOOTER, null),
-        coder);
+    getString(spec, PropertyNames.FILENAME),
+    "",  // No shard template
+    "",  // No suffix
+    1,   // Exactly one output file
+    getBoolean(spec, PropertyNames.APPEND_TRAILING_NEWLINES, true),
+    getString(spec, PropertyNames.HEADER, null),
+    getString(spec, PropertyNames.FOOTER, null),
+    coder);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
index 9fc6cf75effbd..7c92d3e89266b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
@@ -42,6 +42,8 @@
 import java.util.HashMap;
 import java.util.Map;
 
+import javax.annotation.Nullable;
+
 class WindmillSink<T> extends Sink<WindowedValue<T>> {
   private WindmillStreamWriter writer;
   private final Coder<T> valueCoder;
@@ -81,14 +83,21 @@ public static Collection<? extends BoundedWindow> decodeMetadataWindows(
     return windowsCoder.decode(inStream, Coder.Context.OUTER);
   }
 
-  public static <T> WindmillSink<T> create(PipelineOptions options,
-                                           CloudObject spec,
-                                           Coder<WindowedValue<T>> coder,
-                                           ExecutionContext context,
-                                           CounterSet.AddCounterMutator addCounterMutator)
-      throws Exception {
-    return new WindmillSink<>(getString(spec, "stream_id"), coder,
-        (StreamingModeExecutionContext) context);
+  public static class Factory implements SinkFactory {
+    @Override
+    public WindmillSink<?> create(
+        CloudObject spec,
+        Coder<?> coder,
+        @Nullable PipelineOptions options,
+        @Nullable ExecutionContext context,
+        @Nullable CounterSet.AddCounterMutator addCounterMutator)
+            throws Exception {
+
+      @SuppressWarnings("unchecked")
+      Coder<WindowedValue<Object>> typedCoder = (Coder<WindowedValue<Object>>) coder;
+      return new WindmillSink<>(
+          getString(spec, "stream_id"), typedCoder, (StreamingModeExecutionContext) context);
+    }
   }
 
   @Override

From 91a418f5f6426a782a9bd27653d497f8e7c0f69b Mon Sep 17 00:00:00 2001
From: robertwb <robertwb@google.com>
Date: Fri, 5 Feb 2016 13:34:14 -0800
Subject: [PATCH 1397/1541] Marks getAllowedTimestampSkew as deprecated.

This API is dangerous as it does not influence watermark computations.  A better API will be released in a future version of Dataflow.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113980532
---
 .../com/google/cloud/dataflow/sdk/transforms/DoFn.java    | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
index 85bd43307192f..af06cc87961f9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
@@ -295,7 +295,15 @@ public abstract class ProcessContext extends Context {
    * <p>The default value is {@code Duration.ZERO}, in which case
    * timestamps can only be shifted forward to future.  For infinite
    * skew, return {@code Duration.millis(Long.MAX_VALUE)}.
+   *
+   * <p> Note that producing an element whose timestamp is less than the
+   * current timestamp may result in late data, i.e. returning a non-zero
+   * value here does not impact watermark calculations used for firing
+   * windows.
+   *
+   * @deprecated does not interact well with the watermark.
    */
+  @Deprecated
   public Duration getAllowedTimestampSkew() {
     return Duration.ZERO;
   }

From 6100a17e5da914c0b4931aea9d78c94e90f1d09d Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 5 Feb 2016 13:35:23 -0800
Subject: [PATCH 1398/1541] Add the transforms required to create an Ism side
 input

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113980662
---
 .../sdk/runners/DataflowPipelineRunner.java   | 1518 ++++++++++++++++-
 .../runners/DataflowPipelineTranslator.java   |   44 +-
 .../runners/DataflowPipelineRunnerTest.java   |  468 ++++-
 .../DataflowPipelineTranslatorTest.java       |  150 +-
 .../runners/dataflow/CustomSourcesTest.java   |   13 +-
 .../dataflow/sdk/transforms/ViewTest.java     | 1215 +++++++++----
 6 files changed, 2985 insertions(+), 423 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 846c81dc3cc09..c3efb791eeb6c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -18,6 +18,9 @@
 
 import static com.google.cloud.dataflow.sdk.util.StringUtils.approximatePTransformName;
 import static com.google.cloud.dataflow.sdk.util.StringUtils.approximateSimpleName;
+import static com.google.cloud.dataflow.sdk.util.WindowedValue.valueInEmptyWindows;
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkState;
 
 import com.google.api.client.googleapis.json.GoogleJsonResponseException;
 import com.google.api.services.dataflow.Dataflow;
@@ -25,13 +28,23 @@
 import com.google.api.services.dataflow.model.Job;
 import com.google.api.services.dataflow.model.ListJobsResponse;
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
 import com.google.cloud.dataflow.sdk.PipelineResult.State;
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.ListCoder;
+import com.google.cloud.dataflow.sdk.coders.MapCoder;
+import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
 import com.google.cloud.dataflow.sdk.io.AvroIO;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
@@ -48,18 +61,27 @@
 import com.google.cloud.dataflow.sdk.runners.dataflow.AssignWindows;
 import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
 import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowAggregatorTransforms;
+import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecord;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecordCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.MetadataKeyCoder;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Flatten;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.transforms.View.CreatePCollectionView;
 import com.google.cloud.dataflow.sdk.transforms.WithKeys;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
@@ -71,23 +93,41 @@
 import com.google.cloud.dataflow.sdk.util.PathValidator;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.Reshuffle;
+import com.google.cloud.dataflow.sdk.util.SystemDoFnInternal;
 import com.google.cloud.dataflow.sdk.util.Transport;
 import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.cloud.dataflow.sdk.values.POutput;
+import com.google.cloud.dataflow.sdk.values.PValue;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.cloud.dataflow.sdk.values.TupleTagList;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Function;
 import com.google.common.base.Joiner;
+import com.google.common.base.Optional;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
 import com.google.common.base.Utf8;
+import com.google.common.collect.ForwardingMap;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Multimap;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
 
 import org.joda.time.DateTimeUtils;
 import org.joda.time.DateTimeZone;
@@ -99,17 +139,26 @@
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.io.PrintWriter;
+import java.io.Serializable;
 import java.net.URISyntaxException;
 import java.net.URL;
 import java.net.URLClassLoader;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
 
 /**
  * A {@link PipelineRunner} that executes the operations in the
@@ -150,6 +199,8 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
   // The limit of CreateJob request size.
   private static final int CREATE_JOB_REQUEST_LIMIT_BYTES = 10 * 1024 * 1024;
 
+  private final Set<PCollection<?>> pcollectionsRequiringIndexedFormat;
+
   /**
    * Project IDs must contain lowercase letters, digits, or dashes.
    * IDs must start with a letter and may not end with a dash.
@@ -165,7 +216,6 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
    * @return The newly created runner.
    */
   public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
-
     // (Re-)register standard IO factories. Clobbers any prior credentials.
     IOChannelUtils.registerStandardIOFactories(options);
 
@@ -244,6 +294,8 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
     this.options = options;
     this.dataflowClient = options.getDataflowClient();
     this.translator = DataflowPipelineTranslator.fromOptions(options);
+    this.pcollectionsRequiringIndexedFormat = new HashSet<>();
+    this.ptransformViewsWithNonDeterministicKeyCoders = new HashSet<>();
 
     if (options.isStreaming()) {
       overrides = ImmutableMap.<Class<?>, Class<?>>builder()
@@ -266,10 +318,18 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
           .put(Window.Bound.class, AssignWindows.class)
           .build();
     } else {
-      overrides = ImmutableMap.<Class<?>, Class<?>>builder()
-          .put(Read.Unbounded.class, UnsupportedIO.class)
-          .put(Window.Bound.class, AssignWindows.class)
-          .build();
+      ImmutableMap.Builder<Class<?>, Class<?>> builder = ImmutableMap.<Class<?>, Class<?>>builder();
+      builder.put(Read.Unbounded.class, UnsupportedIO.class);
+      builder.put(Window.Bound.class, AssignWindows.class);
+      if (options.getExperiments() != null
+          && options.getExperiments().contains("enable_ism_side_input")) {
+        builder.put(View.AsMap.class, BatchViewAsMap.class);
+        builder.put(View.AsMultimap.class, BatchViewAsMultimap.class);
+        builder.put(View.AsSingleton.class, BatchViewAsSingleton.class);
+        builder.put(View.AsList.class, BatchViewAsList.class);
+        builder.put(View.AsIterable.class, BatchViewAsIterable.class);
+      }
+      overrides = builder.build();
     }
   }
 
@@ -319,6 +379,7 @@ public <OutputT extends POutput, InputT extends PInput> OutputT apply(
 
       PTransform<InputT, OutputT> customTransform =
           InstanceBuilder.ofType(customTransformClass)
+          .withArg(DataflowPipelineRunner.class, this)
           .withArg(transformClass, transform)
           .build();
 
@@ -340,11 +401,14 @@ private <T> PCollection<T> applyWindow(
 
   @Override
   public DataflowPipelineJob run(Pipeline pipeline) {
+    logWarningIfPCollectionViewHasNonDeterministicKeyCoder(pipeline);
+
     LOG.info("Executing pipeline on the Dataflow Service, which will have billing implications "
         + "related to Google Compute Engine usage and other Google Cloud Services.");
 
     List<DataflowPackage> packages = options.getStager().stageFiles();
-    JobSpecification jobSpecification = translator.translate(pipeline, packages);
+    JobSpecification jobSpecification =
+        translator.translate(pipeline, this, packages);
     Job newJob = jobSpecification.getJob();
 
     // Set a unique client_request_id in the CreateJob request.
@@ -496,15 +560,1355 @@ public void setHooks(DataflowPipelineRunnerHooks hooks) {
 
   /////////////////////////////////////////////////////////////////////////////
 
+  /** Outputs a warning about PCollection views without deterministic key coders. */
+  private void logWarningIfPCollectionViewHasNonDeterministicKeyCoder(Pipeline pipeline) {
+    // We need to wait till this point to determine the names of the transforms since only
+    // at this time do we know the hierarchy of the transforms otherwise we could
+    // have just recorded the full names during apply time.
+    if (!ptransformViewsWithNonDeterministicKeyCoders.isEmpty()) {
+      final SortedSet<String> ptransformViewNamesWithNonDeterministicKeyCoders = new TreeSet<>();
+      pipeline.traverseTopologically(new PipelineVisitor() {
+        @Override
+        public void visitValue(PValue value, TransformTreeNode producer) {
+        }
+
+        @Override
+        public void visitTransform(TransformTreeNode node) {
+          if (ptransformViewsWithNonDeterministicKeyCoders.contains(node.getTransform())) {
+            ptransformViewNamesWithNonDeterministicKeyCoders.add(node.getFullName());
+          }
+        }
+
+        @Override
+        public void enterCompositeTransform(TransformTreeNode node) {
+          if (ptransformViewsWithNonDeterministicKeyCoders.contains(node.getTransform())) {
+            ptransformViewNamesWithNonDeterministicKeyCoders.add(node.getFullName());
+          }
+        }
+
+        @Override
+        public void leaveCompositeTransform(TransformTreeNode node) {
+        }
+      });
+
+      LOG.warn("Unable to use indexed implementation for View.AsMap and View.AsMultimap for {} "
+          + "because the key coder is not deterministic. Falling back to singleton implementation "
+          + "which may cause memory and/or performance problems. Future major versions of "
+          + "Dataflow will require deterministic key coders.",
+          ptransformViewNamesWithNonDeterministicKeyCoders);
+    }
+  }
+
+  /**
+   * Returns true if the passed in {@link PCollection} needs to be materialiazed using
+   * an indexed format.
+   */
+  boolean doesPCollectionRequireIndexedFormat(PCollection<?> pcol) {
+    return pcollectionsRequiringIndexedFormat.contains(pcol);
+  }
+
+  /**
+   * Marks the passed in {@link PCollection} as requiring to be materialized using
+   * an indexed format.
+   */
+  private void addPCollectionRequiringIndexedFormat(PCollection<?> pcol) {
+    pcollectionsRequiringIndexedFormat.add(pcol);
+  }
+
+  /** A set of {@link View}s with non-deterministic key coders. */
+  Set<PTransform<?, ?>> ptransformViewsWithNonDeterministicKeyCoders;
+
+  /**
+   * Records that the {@link PTransform} requires a deterministic key coder.
+   */
+  private void recordViewUsesNonDeterministicKeyCoder(PTransform<?, ?> ptransform) {
+    ptransformViewsWithNonDeterministicKeyCoders.add(ptransform);
+  }
+
+  /**
+   * A {@link GroupByKey} transform for the {@link DataflowPipelineRunner} which sorts
+   * values using the secondary key {@code K2}.
+   *
+   * <p>The {@link PCollection} created created by this {@link PTransform} will have values in
+   * the empty window. Care must be taken *afterwards* to either re-window
+   * (using {@link Window#into}) or only use {@link PTransform}s that do not depend on the
+   * values being within a window.
+   */
+  static class GroupByKeyAndSortValuesOnly<K1, K2, V>
+      extends PTransform<PCollection<KV<K1, KV<K2, V>>>, PCollection<KV<K1, Iterable<KV<K2, V>>>>> {
+    private GroupByKeyAndSortValuesOnly() {
+    }
+
+    @Override
+    public PCollection<KV<K1, Iterable<KV<K2, V>>>> apply(PCollection<KV<K1, KV<K2, V>>> input) {
+      PCollection<KV<K1, Iterable<KV<K2, V>>>> rval =
+          PCollection.<KV<K1, Iterable<KV<K2, V>>>>createPrimitiveOutputInternal(
+          input.getPipeline(),
+          WindowingStrategy.globalDefault(),
+          IsBounded.BOUNDED);
+
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      KvCoder<K1, KV<K2, V>> inputCoder = (KvCoder) input.getCoder();
+      rval.setCoder(
+          KvCoder.of(inputCoder.getKeyCoder(),
+          IterableCoder.of(inputCoder.getValueCoder())));
+      return rval;
+    }
+  }
+
   /**
-   * Specialized (non-)implementation for {@link Write.Bound} for the Dataflow runner in streaming
-   * mode.
+   * A {@link PTransform} that groups the values by a hash of the window's byte representation
+   * and sorts the values using the windows byte representation.
+   */
+  private static class GroupByWindowHashAsKeyAndWindowAsSortKey<T, W extends BoundedWindow> extends
+      PTransform<PCollection<T>, PCollection<KV<Integer, Iterable<KV<W, WindowedValue<T>>>>>> {
+
+    /**
+     * A {@link DoFn} that for each element outputs a {@code KV} structure suitable for
+     * grouping by the hash of the window's byte representation and sorting the grouped values
+     * using the window's byte representation.
+     */
+    @SystemDoFnInternal
+    private static class UseWindowHashAsKeyAndWindowAsSortKeyDoFn<T, W extends BoundedWindow>
+        extends DoFn<T, KV<Integer, KV<W, WindowedValue<T>>>> implements DoFn.RequiresWindowAccess {
+
+      private final IsmRecordCoder<?> ismCoderForHash;
+      private UseWindowHashAsKeyAndWindowAsSortKeyDoFn(IsmRecordCoder<?> ismCoderForHash) {
+        this.ismCoderForHash = ismCoderForHash;
+      }
+
+      @Override
+      public void processElement(ProcessContext c) throws Exception {
+        @SuppressWarnings("unchecked")
+        W window = (W) c.window();
+        c.output(
+            KV.of(ismCoderForHash.hash(ImmutableList.of(window)),
+                KV.of(window,
+                    WindowedValue.of(
+                        c.element(),
+                        c.timestamp(),
+                        c.window(),
+                        c.pane()))));
+      }
+    }
+
+    private final IsmRecordCoder<?> ismCoderForHash;
+    private GroupByWindowHashAsKeyAndWindowAsSortKey(IsmRecordCoder<?> ismCoderForHash) {
+      this.ismCoderForHash = ismCoderForHash;
+    }
+
+    @Override
+    public PCollection<KV<Integer, Iterable<KV<W, WindowedValue<T>>>>> apply(PCollection<T> input) {
+      @SuppressWarnings("unchecked")
+      Coder<W> windowCoder = (Coder<W>)
+          input.getWindowingStrategy().getWindowFn().windowCoder();
+      PCollection<KV<Integer, KV<W, WindowedValue<T>>>> rval =
+          input.apply(ParDo.of(
+              new UseWindowHashAsKeyAndWindowAsSortKeyDoFn<T, W>(ismCoderForHash)));
+      rval.setCoder(
+          KvCoder.of(
+              VarIntCoder.of(),
+              KvCoder.of(windowCoder,
+                  FullWindowedValueCoder.of(input.getCoder(), windowCoder))));
+      return rval.apply(new GroupByKeyAndSortValuesOnly<Integer, W, WindowedValue<T>>());
+    }
+  }
+
+  /**
+   * Specialized implementation for
+   * {@link com.google.cloud.dataflow.sdk.transforms.View.AsSingleton View.AsSingleton} for the
+   * Dataflow runner in batch mode.
+   *
+   * <p>Creates a set of files in the {@link IsmFormat} sharded by the hash of the windows
+   * byte representation and with records having:
+   * <ul>
+   *   <li>Key 1: Window</li>
+   *   <li>Value: Windowed value</li>
+   * </ul>
+   */
+  static class BatchViewAsSingleton<T>
+      extends PTransform<PCollection<T>, PCollectionView<T>> {
+
+    /**
+     * A {@link DoFn} that outputs {@link IsmRecord}s. These records are structured as follows:
+     * <ul>
+     *   <li>Key 1: Window
+     *   <li>Value: Windowed value
+     * </ul>
+     */
+    static class IsmRecordForSingularValuePerWindowDoFn<T, W extends BoundedWindow>
+        extends DoFn<KV<Integer, Iterable<KV<W, WindowedValue<T>>>>,
+                     IsmRecord<WindowedValue<T>>> {
+
+      @Override
+      public void processElement(ProcessContext c) throws Exception {
+        Iterator<KV<W, WindowedValue<T>>> iterator = c.element().getValue().iterator();
+        while (iterator.hasNext()) {
+          KV<W, WindowedValue<T>> next = iterator.next();
+          c.output(
+              IsmRecord.of(
+                  ImmutableList.of(next.getKey()), next.getValue()));
+        }
+      }
+    }
+
+    private final DataflowPipelineRunner runner;
+    private final View.AsSingleton<T> transform;
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+    public BatchViewAsSingleton(DataflowPipelineRunner runner, View.AsSingleton<T> transform) {
+      this.runner = runner;
+      this.transform = transform;
+    }
+
+    @Override
+    public PCollectionView<T> apply(PCollection<T> input) {
+      return BatchViewAsSingleton.<T, T, T, BoundedWindow>applyForSingleton(
+          runner,
+          input,
+          new IsmRecordForSingularValuePerWindowDoFn<T, BoundedWindow>(),
+          transform.hasDefaultValue(),
+          transform.defaultValue(),
+          input.getCoder());
+    }
+
+    static <T, FinalT, ViewT, W extends BoundedWindow> PCollectionView<ViewT>
+        applyForSingleton(
+            DataflowPipelineRunner runner,
+            PCollection<T> input,
+            DoFn<KV<Integer, Iterable<KV<W, WindowedValue<T>>>>,
+                 IsmRecord<WindowedValue<FinalT>>> doFn,
+            boolean hasDefault,
+            FinalT defaultValue,
+            Coder<FinalT> defaultValueCoder) {
+
+      @SuppressWarnings("unchecked")
+      Coder<W> windowCoder = (Coder<W>)
+          input.getWindowingStrategy().getWindowFn().windowCoder();
+
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      PCollectionView<ViewT> view = PCollectionViews.singletonView(
+          input.getPipeline(),
+          (WindowingStrategy) input.getWindowingStrategy(),
+          hasDefault,
+          defaultValue,
+          defaultValueCoder);
+
+      IsmRecordCoder<WindowedValue<FinalT>> ismCoder =
+          coderForSingleton(windowCoder, defaultValueCoder);
+
+      PCollection<IsmRecord<WindowedValue<FinalT>>> reifiedPerWindowAndSorted = input
+              .apply(new GroupByWindowHashAsKeyAndWindowAsSortKey<T, W>(ismCoder))
+              .apply(ParDo.of(doFn));
+      reifiedPerWindowAndSorted.setCoder(ismCoder);
+
+      runner.addPCollectionRequiringIndexedFormat(reifiedPerWindowAndSorted);
+      return reifiedPerWindowAndSorted.apply(
+          CreatePCollectionView.<IsmRecord<WindowedValue<FinalT>>, ViewT>of(view));
+    }
+
+    @Override
+    protected String getKindString() {
+      return "BatchViewAsSingleton";
+    }
+
+    static <T> IsmRecordCoder<WindowedValue<T>> coderForSingleton(
+        Coder<? extends BoundedWindow> windowCoder, Coder<T> valueCoder) {
+      return IsmRecordCoder.of(
+          1, // We hash using only the window
+          0, // There are no metadata records
+          ImmutableList.<Coder<?>>of(windowCoder),
+          FullWindowedValueCoder.of(valueCoder, windowCoder));
+    }
+  }
+
+  /**
+   * Specialized implementation for
+   * {@link com.google.cloud.dataflow.sdk.transforms.View.AsIterable View.AsIterable} for the
+   * Dataflow runner in batch mode.
+   *
+   * <p>Creates a set of {@code Ism} files sharded by the hash of the windows byte representation
+   * and with records having:
+   * <ul>
+   *   <li>Key 1: Window</li>
+   *   <li>Key 2: Index offset within window</li>
+   *   <li>Value: Windowed value</li>
+   * </ul>
+   */
+  static class BatchViewAsIterable<T>
+      extends PTransform<PCollection<T>, PCollectionView<Iterable<T>>> {
+
+    private final DataflowPipelineRunner runner;
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+    public BatchViewAsIterable(DataflowPipelineRunner runner, View.AsIterable<T> transform) {
+      this.runner = runner;
+    }
+
+    @Override
+    public PCollectionView<Iterable<T>> apply(PCollection<T> input) {
+      PCollectionView<Iterable<T>> view = PCollectionViews.iterableView(
+          input.getPipeline(), input.getWindowingStrategy(), input.getCoder());
+      return BatchViewAsList.applyForIterableLike(runner, input, view);
+    }
+  }
+
+  /**
+   * Specialized implementation for
+   * {@link com.google.cloud.dataflow.sdk.transforms.View.AsList View.AsList} for the
+   * Dataflow runner in batch mode.
+   *
+   * <p>Creates a set of {@code Ism} files sharded by the hash of the window's byte representation
+   * and with records having:
+   * <ul>
+   *   <li>Key 1: Window</li>
+   *   <li>Key 2: Index offset within window</li>
+   *   <li>Value: Windowed value</li>
+   * </ul>
+   */
+  static class BatchViewAsList<T>
+      extends PTransform<PCollection<T>, PCollectionView<List<T>>> {
+    /**
+     * A {@link DoFn} which creates {@link IsmRecord}s assuming that each element is within the
+     * global window. Each {@link IsmRecord} has
+     * <ul>
+     *   <li>Key 1: Global window</li>
+     *   <li>Key 2: Index offset within window</li>
+     *   <li>Value: Windowed value</li>
+     * </ul>
+     */
+    @SystemDoFnInternal
+    static class ToIsmRecordForGlobalWindowDoFn<T>
+        extends DoFn<T, IsmRecord<WindowedValue<T>>> {
+
+      long indexInBundle;
+      @Override
+      public void startBundle(Context c) throws Exception {
+        indexInBundle = 0;
+      }
+
+      @Override
+      public void processElement(ProcessContext c) throws Exception {
+        c.output(IsmRecord.of(
+            ImmutableList.of(GlobalWindow.INSTANCE, indexInBundle),
+            WindowedValue.of(
+                c.element(),
+                c.timestamp(),
+                GlobalWindow.INSTANCE,
+                c.pane())));
+        indexInBundle += 1;
+      }
+    }
+
+    /**
+     * A {@link DoFn} which creates {@link IsmRecord}s comparing successive elements windows
+     * to locate the window boundaries. The {@link IsmRecord} has:
+     * <ul>
+     *   <li>Key 1: Window</li>
+     *   <li>Key 2: Index offset within window</li>
+     *   <li>Value: Windowed value</li>
+     * </ul>
+     */
+    @SystemDoFnInternal
+    static class ToIsmRecordForNonGlobalWindowDoFn<T, W extends BoundedWindow>
+        extends DoFn<KV<Integer, Iterable<KV<W, WindowedValue<T>>>>,
+                     IsmRecord<WindowedValue<T>>> {
+
+      private final Coder<W> windowCoder;
+      ToIsmRecordForNonGlobalWindowDoFn(Coder<W> windowCoder) {
+        this.windowCoder = windowCoder;
+      }
+
+      @Override
+      public void processElement(ProcessContext c) throws Exception {
+        long elementsInWindow = 0;
+        Optional<Object> previousWindowStructuralValue = Optional.absent();
+        for (KV<W, WindowedValue<T>> value : c.element().getValue()) {
+          Object currentWindowStructuralValue = windowCoder.structuralValue(value.getKey());
+          // Compare to see if this is a new window so we can reset the index counter i
+          if (previousWindowStructuralValue.isPresent()
+              && !previousWindowStructuralValue.get().equals(currentWindowStructuralValue)) {
+            // Reset i since we have a new window.
+            elementsInWindow = 0;
+          }
+          c.output(IsmRecord.of(
+              ImmutableList.of(value.getKey(), elementsInWindow),
+              value.getValue()));
+          previousWindowStructuralValue = Optional.of(currentWindowStructuralValue);
+          elementsInWindow += 1;
+        }
+      }
+    }
+
+    private final DataflowPipelineRunner runner;
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+    public BatchViewAsList(DataflowPipelineRunner runner, View.AsList<T> transform) {
+      this.runner = runner;
+    }
+
+    @Override
+    public PCollectionView<List<T>> apply(PCollection<T> input) {
+      PCollectionView<List<T>> view = PCollectionViews.listView(
+          input.getPipeline(), input.getWindowingStrategy(), input.getCoder());
+      return applyForIterableLike(runner, input, view);
+    }
+
+    static <T, W extends BoundedWindow, ViewT> PCollectionView<ViewT> applyForIterableLike(
+        DataflowPipelineRunner runner,
+        PCollection<T> input,
+        PCollectionView<ViewT> view) {
+
+      @SuppressWarnings("unchecked")
+      Coder<W> windowCoder = (Coder<W>)
+          input.getWindowingStrategy().getWindowFn().windowCoder();
+
+      IsmRecordCoder<WindowedValue<T>> ismCoder = coderForListLike(windowCoder, input.getCoder());
+
+      // If we are working in the global window, we do not need to do a GBK using the window
+      // as the key since all the elements of the input PCollection are already such.
+      // We just reify the windowed value while converting them to IsmRecords and generating
+      // an index based upon where we are within the bundle. Each bundle
+      // maps to one file exactly.
+      if (input.getWindowingStrategy().getWindowFn() instanceof GlobalWindows) {
+        PCollection<IsmRecord<WindowedValue<T>>> reifiedPerWindowAndSorted =
+            input.apply(ParDo.of(new ToIsmRecordForGlobalWindowDoFn<T>()));
+        reifiedPerWindowAndSorted.setCoder(ismCoder);
+
+        runner.addPCollectionRequiringIndexedFormat(reifiedPerWindowAndSorted);
+        return reifiedPerWindowAndSorted.apply(
+            CreatePCollectionView.<IsmRecord<WindowedValue<T>>, ViewT>of(view));
+      }
+
+      PCollection<IsmRecord<WindowedValue<T>>> reifiedPerWindowAndSorted = input
+              .apply(new GroupByWindowHashAsKeyAndWindowAsSortKey<T, W>(ismCoder))
+              .apply(ParDo.of(new ToIsmRecordForNonGlobalWindowDoFn<T, W>(windowCoder)));
+      reifiedPerWindowAndSorted.setCoder(ismCoder);
+
+      runner.addPCollectionRequiringIndexedFormat(reifiedPerWindowAndSorted);
+      return reifiedPerWindowAndSorted.apply(
+          CreatePCollectionView.<IsmRecord<WindowedValue<T>>, ViewT>of(view));
+    }
+
+    @Override
+    protected String getKindString() {
+      return "BatchViewAsList";
+    }
+
+    static <T> IsmRecordCoder<WindowedValue<T>> coderForListLike(
+        Coder<? extends BoundedWindow> windowCoder, Coder<T> valueCoder) {
+      // TODO: swap to use a variable length long coder which has values which compare
+      // the same as their byte representation compare lexicographically within the key coder
+      return IsmRecordCoder.of(
+          1, // We hash using only the window
+          0, // There are no metadata records
+          ImmutableList.of(windowCoder, BigEndianLongCoder.of()),
+          FullWindowedValueCoder.of(valueCoder, windowCoder));
+    }
+  }
+
+  /**
+   * Specialized implementation for
+   * {@link com.google.cloud.dataflow.sdk.transforms.View.AsMap View.AsMap} for the
+   * Dataflow runner in batch mode.
+   *
+   * <p>Creates a set of {@code Ism} files sharded by the hash of the key's byte
+   * representation. Each record is structured as follows:
+   * <ul>
+   *   <li>Key 1: User key K</li>
+   *   <li>Key 2: Window</li>
+   *   <li>Key 3: 0L (constant)</li>
+   *   <li>Value: Windowed value</li>
+   * </ul>
+   *
+   * <p>Alongside the data records, there are the following metadata records:
+   * <ul>
+   *   <li>Key 1: Metadata Key</li>
+   *   <li>Key 2: Window</li>
+   *   <li>Key 3: Index [0, size of map]</li>
+   *   <li>Value: variable length long byte representation of size of map if index is 0,
+   *              otherwise the byte representation of a key</li>
+   * </ul>
+   * The {@code [META, Window, 0]} record stores the number of unique keys per window, while
+   * {@code [META, Window, i]}  for {@code i} in {@code [1, size of map]} stores a the users key.
+   * This allows for one to access the size of the map by looking at {@code [META, Window, 0]}
+   * and iterate over all the keys by accessing {@code [META, Window, i]} for {@code i} in
+   * {@code [1, size of map]}.
+   *
+   * <p>Note that in the case of a non-deterministic key coder, we fallback to using
+   * {@link com.google.cloud.dataflow.sdk.transforms.View.AsSingleton View.AsSingleton} printing
+   * a warning to users to specify a deterministic key coder.
+   */
+  static class BatchViewAsMap<K, V>
+      extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, V>>> {
+
+    /**
+     * A {@link DoFn} which groups elements by window boundaries. For each group,
+     * the group of elements is transformed into a {@link TransformedMap}.
+     * The transformed {@code Map<K, V>} is backed by a {@code Map<K, WindowedValue<V>>}
+     * and contains a function {@code WindowedValue<V> -> V}.
+     *
+     * <p>Outputs {@link IsmRecord}s having:
+     * <ul>
+     *   <li>Key 1: Window</li>
+     *   <li>Value: Transformed map containing a transform that removes the encapsulation
+     *              of the window around each value,
+     *              {@code Map<K, WindowedValue<V>> -> Map<K, V>}.</li>
+     * </ul>
+     */
+    static class ToMapDoFn<K, V, W extends BoundedWindow>
+        extends DoFn<KV<Integer, Iterable<KV<W, WindowedValue<KV<K, V>>>>>,
+                     IsmRecord<WindowedValue<TransformedMap<K,
+                                             WindowedValue<V>,
+                                             V>>>> {
+
+      private final Coder<W> windowCoder;
+      ToMapDoFn(Coder<W> windowCoder) {
+        this.windowCoder = windowCoder;
+      }
+
+      @Override
+      public void processElement(ProcessContext c)
+          throws Exception {
+        Optional<Object> previousWindowStructuralValue = Optional.absent();
+        Optional<W> previousWindow = Optional.absent();
+        Map<K, WindowedValue<V>> map = new HashMap<>();
+        for (KV<W, WindowedValue<KV<K, V>>> kv : c.element().getValue()) {
+          Object currentWindowStructuralValue = windowCoder.structuralValue(kv.getKey());
+          if (previousWindowStructuralValue.isPresent()
+              && !previousWindowStructuralValue.get().equals(currentWindowStructuralValue)) {
+            // Construct the transformed map containing all the elements since we
+            // are at a window boundary.
+            c.output(IsmRecord.of(
+                ImmutableList.of(previousWindow.get()),
+                valueInEmptyWindows(new TransformedMap<>(WindowedValueToValue.<V>of(), map))));
+            map = new HashMap<>();
+          }
+
+          // Verify that the user isn't trying to insert the same key multiple times.
+          checkState(!map.containsKey(kv.getValue().getValue().getKey()),
+              "Multiple values [%s, %s] found for single key [%s] within window [%s].",
+              map.get(kv.getValue().getValue().getKey()),
+              kv.getValue().getValue().getValue(),
+              kv.getKey());
+          map.put(kv.getValue().getValue().getKey(),
+                  kv.getValue().withValue(kv.getValue().getValue().getValue()));
+          previousWindowStructuralValue = Optional.of(currentWindowStructuralValue);
+          previousWindow = Optional.of(kv.getKey());
+        }
+
+        // The last value for this hash is guaranteed to be at a window boundary
+        // so we output a transformed map containing all the elements since the last
+        // window boundary.
+        c.output(IsmRecord.of(
+            ImmutableList.of(previousWindow.get()),
+            valueInEmptyWindows(new TransformedMap<>(WindowedValueToValue.<V>of(), map))));
+      }
+    }
+
+    private final DataflowPipelineRunner runner;
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+    public BatchViewAsMap(DataflowPipelineRunner runner, View.AsMap<K, V> transform) {
+      this.runner = runner;
+    }
+
+    @Override
+    public PCollectionView<Map<K, V>> apply(PCollection<KV<K, V>> input) {
+      return this.<BoundedWindow>applyInternal(input);
+    }
+
+    private <W extends BoundedWindow> PCollectionView<Map<K, V>>
+        applyInternal(PCollection<KV<K, V>> input) {
+
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
+      try {
+        PCollectionView<Map<K, V>> view = PCollectionViews.mapView(
+            input.getPipeline(), input.getWindowingStrategy(), inputCoder);
+        return BatchViewAsMultimap.applyForMapLike(runner, input, view, true /* unique keys */);
+      } catch (NonDeterministicException e) {
+        runner.recordViewUsesNonDeterministicKeyCoder(this);
+
+        // Since the key coder is not deterministic, we convert the map into a singleton
+        // and return a singleton view equivalent.
+        return applyForSingletonFallback(input);
+      }
+    }
+
+    @Override
+    protected String getKindString() {
+      return "BatchViewAsMap";
+    }
+
+    /** Transforms the input {@link PCollection} into a singleton {@link Map} per window. */
+    private <W extends BoundedWindow> PCollectionView<Map<K, V>>
+        applyForSingletonFallback(PCollection<KV<K, V>> input) {
+      @SuppressWarnings("unchecked")
+      Coder<W> windowCoder = (Coder<W>)
+          input.getWindowingStrategy().getWindowFn().windowCoder();
+
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
+
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      Coder<Function<WindowedValue<V>, V>> transformCoder =
+          (Coder) SerializableCoder.of(WindowedValueToValue.class);
+
+      Coder<TransformedMap<K, WindowedValue<V>, V>> finalValueCoder =
+          TransformedMapCoder.of(
+          transformCoder,
+          MapCoder.of(
+              inputCoder.getKeyCoder(),
+              FullWindowedValueCoder.of(inputCoder.getValueCoder(), windowCoder)));
+
+      TransformedMap<K, WindowedValue<V>, V> defaultValue = new TransformedMap<>(
+          WindowedValueToValue.<V>of(),
+          ImmutableMap.<K, WindowedValue<V>>of());
+
+      return BatchViewAsSingleton.<KV<K, V>,
+                                   TransformedMap<K, WindowedValue<V>, V>,
+                                   Map<K, V>,
+                                   W> applyForSingleton(
+          runner,
+          input,
+          new ToMapDoFn<K, V, W>(windowCoder),
+          true,
+          defaultValue,
+          finalValueCoder);
+    }
+  }
+
+  /**
+   * Specialized implementation for
+   * {@link com.google.cloud.dataflow.sdk.transforms.View.AsMultimap View.AsMultimap} for the
+   * Dataflow runner in batch mode.
+   *
+   * <p>Creates a set of {@code Ism} files sharded by the hash of the key's byte
+   * representation. Each record is structured as follows:
+   * <ul>
+   *   <li>Key 1: User key K</li>
+   *   <li>Key 2: Window</li>
+   *   <li>Key 3: Index offset for a given key and window.</li>
+   *   <li>Value: Windowed value</li>
+   * </ul>
+   *
+   * <p>Alongside the data records, there are the following metadata records:
+   * <ul>
+   *   <li>Key 1: Metadata Key</li>
+   *   <li>Key 2: Window</li>
+   *   <li>Key 3: Index [0, size of map]</li>
+   *   <li>Value: variable length long byte representation of size of map if index is 0,
+   *              otherwise the byte representation of a key</li>
+   * </ul>
+   * The {@code [META, Window, 0]} record stores the number of unique keys per window, while
+   * {@code [META, Window, i]}  for {@code i} in {@code [1, size of map]} stores a the users key.
+   * This allows for one to access the size of the map by looking at {@code [META, Window, 0]}
+   * and iterate over all the keys by accessing {@code [META, Window, i]} for {@code i} in
+   * {@code [1, size of map]}.
+   *
+   * <p>Note that in the case of a non-deterministic key coder, we fallback to using
+   * {@link com.google.cloud.dataflow.sdk.transforms.View.AsSingleton View.AsSingleton} printing
+   * a warning to users to specify a deterministic key coder.
+   */
+  static class BatchViewAsMultimap<K, V>
+      extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, Iterable<V>>>> {
+    /**
+     * A {@link PTransform} that groups elements by the hash of window's byte representation
+     * if the input {@link PCollection} is not within the global window. Otherwise by the hash
+     * of the window and key's byte representation. This {@link PTransform} also sorts
+     * the values by the combination of the window and key's byte representations.
+     */
+    private static class GroupByKeyHashAndSortByKeyAndWindow<K, V, W extends BoundedWindow>
+        extends PTransform<PCollection<KV<K, V>>,
+                           PCollection<KV<Integer, Iterable<KV<KV<K, W>, WindowedValue<V>>>>>> {
+
+      @SystemDoFnInternal
+      private static class GroupByKeyHashAndSortByKeyAndWindowDoFn<K, V, W>
+          extends DoFn<KV<K, V>, KV<Integer, KV<KV<K, W>, WindowedValue<V>>>>
+          implements DoFn.RequiresWindowAccess {
+
+        private final IsmRecordCoder<?> coder;
+        private GroupByKeyHashAndSortByKeyAndWindowDoFn(IsmRecordCoder<?> coder) {
+          this.coder = coder;
+        }
+
+        @Override
+        public void processElement(ProcessContext c) throws Exception {
+          @SuppressWarnings("unchecked")
+          W window = (W) c.window();
+
+          c.output(
+              KV.of(coder.hash(ImmutableList.of(c.element().getKey())),
+                  KV.of(KV.of(c.element().getKey(), window),
+                      WindowedValue.of(
+                          c.element().getValue(),
+                          c.timestamp(),
+                          (BoundedWindow) window,
+                          c.pane()))));
+        }
+      }
+
+      private final IsmRecordCoder<?> coder;
+      public GroupByKeyHashAndSortByKeyAndWindow(IsmRecordCoder<?> coder) {
+        this.coder = coder;
+      }
+
+      @Override
+      public PCollection<KV<Integer, Iterable<KV<KV<K, W>, WindowedValue<V>>>>>
+          apply(PCollection<KV<K, V>> input) {
+
+        @SuppressWarnings("unchecked")
+        Coder<W> windowCoder = (Coder<W>)
+            input.getWindowingStrategy().getWindowFn().windowCoder();
+        @SuppressWarnings("unchecked")
+        KvCoder<K, V> inputCoder = (KvCoder<K, V>) input.getCoder();
+
+        PCollection<KV<Integer, KV<KV<K, W>, WindowedValue<V>>>> keyedByHash;
+        keyedByHash = input.apply(
+            ParDo.of(new GroupByKeyHashAndSortByKeyAndWindowDoFn<K, V, W>(coder)));
+        keyedByHash.setCoder(
+            KvCoder.of(
+                VarIntCoder.of(),
+                KvCoder.of(KvCoder.of(inputCoder.getKeyCoder(), windowCoder),
+                    FullWindowedValueCoder.of(inputCoder.getValueCoder(), windowCoder))));
+
+        return keyedByHash.apply(
+            new GroupByKeyAndSortValuesOnly<Integer, KV<K, W>, WindowedValue<V>>());
+      }
+    }
+
+    /**
+     * A {@link DoFn} which creates {@link IsmRecord}s comparing successive elements windows
+     * and keys to locate window and key boundaries. The main output {@link IsmRecord}s have:
+     * <ul>
+     *   <li>Key 1: Window</li>
+     *   <li>Key 2: User key K</li>
+     *   <li>Key 3: Index offset for a given key and window.</li>
+     *   <li>Value: Windowed value</li>
+     * </ul>
+     *
+     * <p>Additionally, we output all the unique keys per window seen to {@code outputForEntrySet}
+     * and the unique key count per window to {@code outputForSize}.
+     *
+     * <p>Finally, if this DoFn has been requested to perform unique key checking, it will
+     * throw an {@link IllegalStateException} if more than one key per window is found.
+     */
+    static class ToIsmRecordForMapLikeDoFn<K, V, W extends BoundedWindow>
+        extends DoFn<KV<Integer, Iterable<KV<KV<K, W>, WindowedValue<V>>>>,
+                     IsmRecord<WindowedValue<V>>> {
+
+      private final TupleTag<KV<Integer, KV<W, Long>>> outputForSize;
+      private final TupleTag<KV<Integer, KV<W, K>>> outputForEntrySet;
+      private final Coder<W> windowCoder;
+      private final Coder<K> keyCoder;
+      private final IsmRecordCoder<WindowedValue<V>> ismCoder;
+      private final boolean uniqueKeysExpected;
+      ToIsmRecordForMapLikeDoFn(
+          TupleTag<KV<Integer, KV<W, Long>>> outputForSize,
+          TupleTag<KV<Integer, KV<W, K>>> outputForEntrySet,
+          Coder<W> windowCoder,
+          Coder<K> keyCoder,
+          IsmRecordCoder<WindowedValue<V>> ismCoder,
+          boolean uniqueKeysExpected) {
+        this.outputForSize = outputForSize;
+        this.outputForEntrySet = outputForEntrySet;
+        this.windowCoder = windowCoder;
+        this.keyCoder = keyCoder;
+        this.ismCoder = ismCoder;
+        this.uniqueKeysExpected = uniqueKeysExpected;
+      }
+
+      @Override
+      public void processElement(ProcessContext c) throws Exception {
+        long currentKeyIndex = 0;
+        // We use one based indexing while counting
+        long currentUniqueKeyCounter = 1;
+        Iterator<KV<KV<K, W>, WindowedValue<V>>> iterator = c.element().getValue().iterator();
+
+        KV<KV<K, W>, WindowedValue<V>> currentValue = iterator.next();
+        Object currentKeyStructuralValue =
+            keyCoder.structuralValue(currentValue.getKey().getKey());
+        Object currentWindowStructuralValue =
+            windowCoder.structuralValue(currentValue.getKey().getValue());
+
+        while (iterator.hasNext()) {
+          KV<KV<K, W>, WindowedValue<V>> nextValue = iterator.next();
+          Object nextKeyStructuralValue =
+              keyCoder.structuralValue(nextValue.getKey().getKey());
+          Object nextWindowStructuralValue =
+              windowCoder.structuralValue(nextValue.getKey().getValue());
+
+          outputDataRecord(c, currentValue, currentKeyIndex);
+
+          final long nextKeyIndex;
+          final long nextUniqueKeyCounter;
+
+          // Check to see if its a new window
+          if (!currentWindowStructuralValue.equals(nextWindowStructuralValue)) {
+            // The next value is a new window, so we output for size the number of unique keys
+            // seen and the last key of the window. We also reset the next key index the unique
+            // key counter.
+            outputMetadataRecordForSize(c, currentValue, currentUniqueKeyCounter);
+            outputMetadataRecordForEntrySet(c, currentValue);
+
+            nextKeyIndex = 0;
+            nextUniqueKeyCounter = 1;
+          } else if (!currentKeyStructuralValue.equals(nextKeyStructuralValue)){
+            // It is a new key within the same window so output the key for the entry set,
+            // reset the key index and increase the count of unique keys seen within this window.
+            outputMetadataRecordForEntrySet(c, currentValue);
+
+            nextKeyIndex = 0;
+            nextUniqueKeyCounter = currentUniqueKeyCounter + 1;
+          } else if (!uniqueKeysExpected) {
+            // It is not a new key so we don't have to output the number of elements in this
+            // window or increase the unique key counter. All we do is increase the key index.
+
+            nextKeyIndex = currentKeyIndex + 1;
+            nextUniqueKeyCounter = currentUniqueKeyCounter;
+          } else {
+            throw new IllegalStateException(String.format(
+                "Unique keys are expected but found key %s with values %s and %s in window %s.",
+                currentValue.getKey().getKey(),
+                currentValue.getValue().getValue(),
+                nextValue.getValue().getValue(),
+                currentValue.getKey().getValue()));
+          }
+
+          currentValue = nextValue;
+          currentWindowStructuralValue = nextWindowStructuralValue;
+          currentKeyStructuralValue = nextKeyStructuralValue;
+          currentKeyIndex = nextKeyIndex;
+          currentUniqueKeyCounter = nextUniqueKeyCounter;
+        }
+
+        outputDataRecord(c, currentValue, currentKeyIndex);
+        outputMetadataRecordForSize(c, currentValue, currentUniqueKeyCounter);
+        // The last value for this hash is guaranteed to be at a window boundary
+        // so we output a record with the number of unique keys seen.
+        outputMetadataRecordForEntrySet(c, currentValue);
+      }
+
+      /** This outputs the data record. */
+      private void outputDataRecord(
+          ProcessContext c, KV<KV<K, W>, WindowedValue<V>> value, long keyIndex) {
+        IsmRecord<WindowedValue<V>> ismRecord = IsmRecord.of(
+            ImmutableList.of(
+                value.getKey().getKey(),
+                value.getKey().getValue(),
+                keyIndex),
+            value.getValue());
+        c.output(ismRecord);
+      }
+
+      /**
+       * This outputs records which will be used to compute the number of keys for a given window.
+       */
+      private void outputMetadataRecordForSize(
+          ProcessContext c, KV<KV<K, W>, WindowedValue<V>> value, long uniqueKeyCount) {
+        c.sideOutput(outputForSize,
+            KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(),
+                                                 value.getKey().getValue())),
+                KV.of(value.getKey().getValue(), uniqueKeyCount)));
+      }
+
+      /** This outputs records which will be used to construct the entry set. */
+      private void outputMetadataRecordForEntrySet(
+          ProcessContext c, KV<KV<K, W>, WindowedValue<V>> value) {
+        c.sideOutput(outputForEntrySet,
+            KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(),
+                                                 value.getKey().getValue())),
+                KV.of(value.getKey().getValue(), value.getKey().getKey())));
+      }
+    }
+
+    /**
+     * A {@link DoFn} which outputs a metadata {@link IsmRecord} per window of:
+       * <ul>
+       *   <li>Key 1: META key</li>
+       *   <li>Key 2: window</li>
+       *   <li>Key 3: 0L (constant)</li>
+       *   <li>Value: sum of values for window</li>
+       * </ul>
+       *
+       * <p>This {@link DoFn} is meant to be used to compute the number of unique keys
+       * per window for map and multimap side inputs.
+       */
+    static class ToIsmMetadataRecordForSizeDoFn<K, V, W extends BoundedWindow>
+        extends DoFn<KV<Integer, Iterable<KV<W, Long>>>, IsmRecord<WindowedValue<V>>> {
+      private final Coder<W> windowCoder;
+      ToIsmMetadataRecordForSizeDoFn(Coder<W> windowCoder) {
+        this.windowCoder = windowCoder;
+      }
+
+      @Override
+      public void processElement(ProcessContext c) throws Exception {
+        Iterator<KV<W, Long>> iterator = c.element().getValue().iterator();
+        KV<W, Long> currentValue = iterator.next();
+        Object currentWindowStructuralValue = windowCoder.structuralValue(currentValue.getKey());
+        long size = 0;
+        while (iterator.hasNext()) {
+          KV<W, Long> nextValue = iterator.next();
+          Object nextWindowStructuralValue = windowCoder.structuralValue(nextValue.getKey());
+
+          size += currentValue.getValue();
+          if (!currentWindowStructuralValue.equals(nextWindowStructuralValue)) {
+            c.output(IsmRecord.<WindowedValue<V>>meta(
+                ImmutableList.of(IsmFormat.getMetadataKey(), currentValue.getKey(), 0L),
+                CoderUtils.encodeToByteArray(VarLongCoder.of(), size)));
+            size = 0;
+          }
+
+          currentValue = nextValue;
+          currentWindowStructuralValue = nextWindowStructuralValue;
+        }
+
+        size += currentValue.getValue();
+        // Output the final value since it is guaranteed to be on a window boundary.
+        c.output(IsmRecord.<WindowedValue<V>>meta(
+            ImmutableList.of(IsmFormat.getMetadataKey(), currentValue.getKey(), 0L),
+            CoderUtils.encodeToByteArray(VarLongCoder.of(), size)));
+      }
+    }
+
+    /**
+     * A {@link DoFn} which outputs a metadata {@link IsmRecord} per window and key pair of:
+       * <ul>
+       *   <li>Key 1: META key</li>
+       *   <li>Key 2: window</li>
+       *   <li>Key 3: index offset (1-based index)</li>
+       *   <li>Value: key</li>
+       * </ul>
+       *
+       * <p>This {@link DoFn} is meant to be used to output index to key records
+       * per window for map and multimap side inputs.
+       */
+    static class ToIsmMetadataRecordForKeyDoFn<K, V, W extends BoundedWindow>
+        extends DoFn<KV<Integer, Iterable<KV<W, K>>>, IsmRecord<WindowedValue<V>>> {
+
+      private final Coder<K> keyCoder;
+      private final Coder<W> windowCoder;
+      ToIsmMetadataRecordForKeyDoFn(Coder<K> keyCoder, Coder<W> windowCoder) {
+        this.keyCoder = keyCoder;
+        this.windowCoder = windowCoder;
+      }
+
+      @Override
+      public void processElement(ProcessContext c) throws Exception {
+        Iterator<KV<W, K>> iterator = c.element().getValue().iterator();
+        KV<W, K> currentValue = iterator.next();
+        Object currentWindowStructuralValue = windowCoder.structuralValue(currentValue.getKey());
+        long elementsInWindow = 1;
+        while (iterator.hasNext()) {
+          KV<W, K> nextValue = iterator.next();
+          Object nextWindowStructuralValue = windowCoder.structuralValue(nextValue.getKey());
+
+          c.output(IsmRecord.<WindowedValue<V>>meta(
+              ImmutableList.of(IsmFormat.getMetadataKey(), currentValue.getKey(), elementsInWindow),
+              CoderUtils.encodeToByteArray(keyCoder, currentValue.getValue())));
+          elementsInWindow += 1;
+
+          if (!currentWindowStructuralValue.equals(nextWindowStructuralValue)) {
+            elementsInWindow = 1;
+          }
+
+          currentValue = nextValue;
+          currentWindowStructuralValue = nextWindowStructuralValue;
+        }
+
+        // Output the final value since it is guaranteed to be on a window boundary.
+        c.output(IsmRecord.<WindowedValue<V>>meta(
+            ImmutableList.of(IsmFormat.getMetadataKey(), currentValue.getKey(), elementsInWindow),
+            CoderUtils.encodeToByteArray(keyCoder, currentValue.getValue())));
+      }
+    }
+
+    /**
+     * A {@link DoFn} which partitions sets of elements by window boundaries. Within each
+     * partition, the set of elements is transformed into a {@link TransformedMap}.
+     * The transformed {@code Map<K, Iterable<V>>} is backed by a
+     * {@code Map<K, Iterable<WindowedValue<V>>>} and contains a function
+     * {@code Iterable<WindowedValue<V>> -> Iterable<V>}.
+     *
+     * <p>Outputs {@link IsmRecord}s having:
+     * <ul>
+     *   <li>Key 1: Window</li>
+     *   <li>Value: Transformed map containing a transform that removes the encapsulation
+     *              of the window around each value,
+     *              {@code Map<K, Iterable<WindowedValue<V>>> -> Map<K, Iterable<V>>}.</li>
+     * </ul>
+     */
+    static class ToMultimapDoFn<K, V, W extends BoundedWindow>
+        extends DoFn<KV<Integer, Iterable<KV<W, WindowedValue<KV<K, V>>>>>,
+                     IsmRecord<WindowedValue<TransformedMap<K,
+                                                            Iterable<WindowedValue<V>>,
+                                                            Iterable<V>>>>> {
+
+      private final Coder<W> windowCoder;
+      ToMultimapDoFn(Coder<W> windowCoder) {
+        this.windowCoder = windowCoder;
+      }
+
+      @Override
+      public void processElement(ProcessContext c)
+          throws Exception {
+        Optional<Object> previousWindowStructuralValue = Optional.absent();
+        Optional<W> previousWindow = Optional.absent();
+        Multimap<K, WindowedValue<V>> multimap = HashMultimap.create();
+        for (KV<W, WindowedValue<KV<K, V>>> kv : c.element().getValue()) {
+          Object currentWindowStructuralValue = windowCoder.structuralValue(kv.getKey());
+          if (previousWindowStructuralValue.isPresent()
+              && !previousWindowStructuralValue.get().equals(currentWindowStructuralValue)) {
+            // Construct the transformed map containing all the elements since we
+            // are at a window boundary.
+            @SuppressWarnings({"unchecked", "rawtypes"})
+            Map<K, Iterable<WindowedValue<V>>> resultMap = (Map) multimap.asMap();
+            c.output(IsmRecord.<WindowedValue<TransformedMap<K,
+                                                             Iterable<WindowedValue<V>>,
+                                                             Iterable<V>>>>of(
+                ImmutableList.of(previousWindow.get()),
+                valueInEmptyWindows(
+                    new TransformedMap<>(
+                        IterableWithWindowedValuesToIterable.<V>of(), resultMap))));
+            multimap = HashMultimap.create();
+          }
+
+          multimap.put(kv.getValue().getValue().getKey(),
+                       kv.getValue().withValue(kv.getValue().getValue().getValue()));
+          previousWindowStructuralValue = Optional.of(currentWindowStructuralValue);
+          previousWindow = Optional.of(kv.getKey());
+        }
+
+        // The last value for this hash is guaranteed to be at a window boundary
+        // so we output a transformed map containing all the elements since the last
+        // window boundary.
+        @SuppressWarnings({"unchecked", "rawtypes"})
+        Map<K, Iterable<WindowedValue<V>>> resultMap = (Map) multimap.asMap();
+        c.output(IsmRecord.<WindowedValue<TransformedMap<K,
+                                                         Iterable<WindowedValue<V>>,
+                                                         Iterable<V>>>>of(
+            ImmutableList.of(previousWindow.get()),
+            valueInEmptyWindows(
+                new TransformedMap<>(IterableWithWindowedValuesToIterable.<V>of(), resultMap))));
+      }
+    }
+
+    private final DataflowPipelineRunner runner;
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+    public BatchViewAsMultimap(DataflowPipelineRunner runner, View.AsMultimap<K, V> transform) {
+      this.runner = runner;
+    }
+
+    @Override
+    public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
+      return this.<BoundedWindow>applyInternal(input);
+    }
+
+    private <W extends BoundedWindow> PCollectionView<Map<K, Iterable<V>>>
+        applyInternal(PCollection<KV<K, V>> input) {
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
+      try {
+        PCollectionView<Map<K, Iterable<V>>> view = PCollectionViews.multimapView(
+            input.getPipeline(), input.getWindowingStrategy(), inputCoder);
+
+        return applyForMapLike(runner, input, view, false /* unique keys not expected */);
+      } catch (NonDeterministicException e) {
+        runner.recordViewUsesNonDeterministicKeyCoder(this);
+
+        // Since the key coder is not deterministic, we convert the map into a singleton
+        // and return a singleton view equivalent.
+        return applyForSingletonFallback(input);
+      }
+    }
+
+    /** Transforms the input {@link PCollection} into a singleton {@link Map} per window. */
+    private <W extends BoundedWindow> PCollectionView<Map<K, Iterable<V>>>
+        applyForSingletonFallback(PCollection<KV<K, V>> input) {
+      @SuppressWarnings("unchecked")
+      Coder<W> windowCoder = (Coder<W>)
+          input.getWindowingStrategy().getWindowFn().windowCoder();
+
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
+
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      Coder<Function<Iterable<WindowedValue<V>>, Iterable<V>>> transformCoder =
+          (Coder) SerializableCoder.of(IterableWithWindowedValuesToIterable.class);
+
+      Coder<TransformedMap<K, Iterable<WindowedValue<V>>, Iterable<V>>> finalValueCoder =
+          TransformedMapCoder.of(
+          transformCoder,
+          MapCoder.of(
+              inputCoder.getKeyCoder(),
+              IterableCoder.of(
+                  FullWindowedValueCoder.of(inputCoder.getValueCoder(), windowCoder))));
+
+      TransformedMap<K, Iterable<WindowedValue<V>>, Iterable<V>> defaultValue =
+          new TransformedMap<>(
+              IterableWithWindowedValuesToIterable.<V>of(),
+              ImmutableMap.<K, Iterable<WindowedValue<V>>>of());
+
+      return BatchViewAsSingleton.<KV<K, V>,
+                                   TransformedMap<K, Iterable<WindowedValue<V>>, Iterable<V>>,
+                                   Map<K, Iterable<V>>,
+                                   W> applyForSingleton(
+          runner,
+          input,
+          new ToMultimapDoFn<K, V, W>(windowCoder),
+          true,
+          defaultValue,
+          finalValueCoder);
+    }
+
+
+
+    private static <K, V, W extends BoundedWindow, ViewT> PCollectionView<ViewT> applyForMapLike(
+        DataflowPipelineRunner runner,
+        PCollection<KV<K, V>> input,
+        PCollectionView<ViewT> view,
+        boolean uniqueKeysExpected) throws NonDeterministicException {
+
+      @SuppressWarnings("unchecked")
+      Coder<W> windowCoder = (Coder<W>)
+          input.getWindowingStrategy().getWindowFn().windowCoder();
+
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
+
+      // If our key coder is deterministic, we can use the key portion of each KV
+      // part of a composite key containing the window , key and index.
+      inputCoder.getKeyCoder().verifyDeterministic();
+
+      IsmRecordCoder<WindowedValue<V>> ismCoder =
+          coderForMapLike(windowCoder, inputCoder.getKeyCoder(), inputCoder.getValueCoder());
+
+      // Create the various output tags representing the main output containing the data stream
+      // and the side outputs containing the metadata about the size and entry set.
+      TupleTag<IsmRecord<WindowedValue<V>>> mainOutputTag = new TupleTag<>();
+      TupleTag<KV<Integer, KV<W, Long>>> outputForSizeTag = new TupleTag<>();
+      TupleTag<KV<Integer, KV<W, K>>> outputForEntrySetTag = new TupleTag<>();
+
+      // Process all the elements grouped by key hash, and sorted by key and then window
+      // outputting to all the outputs defined above.
+      PCollectionTuple outputTuple = input
+           .apply("GBKaSVForData", new GroupByKeyHashAndSortByKeyAndWindow<K, V, W>(ismCoder))
+           .apply(ParDo.of(new ToIsmRecordForMapLikeDoFn<K, V, W>(
+                   outputForSizeTag, outputForEntrySetTag,
+                   windowCoder, inputCoder.getKeyCoder(), ismCoder, uniqueKeysExpected))
+                       .withOutputTags(mainOutputTag,
+                                       TupleTagList.of(
+                                           ImmutableList.<TupleTag<?>>of(outputForSizeTag,
+                                                                         outputForEntrySetTag))));
+
+      // Set the coder on the main data output.
+      PCollection<IsmRecord<WindowedValue<V>>> perHashWithReifiedWindows =
+          outputTuple.get(mainOutputTag);
+      perHashWithReifiedWindows.setCoder(ismCoder);
+
+      // Set the coder on the metadata output for size and process the entries
+      // producing a [META, Window, 0L] record per window storing the number of unique keys
+      // for each window.
+      PCollection<KV<Integer, KV<W, Long>>> outputForSize = outputTuple.get(outputForSizeTag);
+      outputForSize.setCoder(
+          KvCoder.of(VarIntCoder.of(),
+                     KvCoder.of(windowCoder, VarLongCoder.of())));
+      PCollection<IsmRecord<WindowedValue<V>>> windowMapSizeMetadata = outputForSize
+          .apply("GBKaSVForSize", new GroupByKeyAndSortValuesOnly<Integer, W, Long>())
+          .apply(ParDo.of(new ToIsmMetadataRecordForSizeDoFn<K, V, W>(windowCoder)));
+      windowMapSizeMetadata.setCoder(ismCoder);
+
+      // Set the coder on the metadata output destined to build the entry set and process the
+      // entries producing a [META, Window, Index] record per window key pair storing the key.
+      PCollection<KV<Integer, KV<W, K>>> outputForEntrySet =
+          outputTuple.get(outputForEntrySetTag);
+      outputForEntrySet.setCoder(
+          KvCoder.of(VarIntCoder.of(),
+                     KvCoder.of(windowCoder, inputCoder.getKeyCoder())));
+      PCollection<IsmRecord<WindowedValue<V>>> windowMapKeysMetadata = outputForEntrySet
+          .apply("GBKaSVForKeys", new GroupByKeyAndSortValuesOnly<Integer, W, K>())
+          .apply(ParDo.of(
+              new ToIsmMetadataRecordForKeyDoFn<K, V, W>(inputCoder.getKeyCoder(), windowCoder)));
+      windowMapKeysMetadata.setCoder(ismCoder);
+
+      // Set that all these outputs should be materialized using an indexed format.
+      runner.addPCollectionRequiringIndexedFormat(perHashWithReifiedWindows);
+      runner.addPCollectionRequiringIndexedFormat(windowMapSizeMetadata);
+      runner.addPCollectionRequiringIndexedFormat(windowMapKeysMetadata);
+
+      PCollectionList<IsmRecord<WindowedValue<V>>> outputs =
+          PCollectionList.of(ImmutableList.of(
+              perHashWithReifiedWindows, windowMapSizeMetadata, windowMapKeysMetadata));
+
+      return Pipeline.applyTransform(outputs,
+                                     Flatten.<IsmRecord<WindowedValue<V>>>pCollections())
+          .apply(CreatePCollectionView.<IsmRecord<WindowedValue<V>>,
+                                        ViewT>of(view));
+    }
+
+    @Override
+    protected String getKindString() {
+      return "BatchViewAsMultimap";
+    }
+
+    static <V> IsmRecordCoder<WindowedValue<V>> coderForMapLike(
+        Coder<? extends BoundedWindow> windowCoder, Coder<?> keyCoder, Coder<V> valueCoder) {
+      // TODO: swap to use a variable length long coder which has values which compare
+      // the same as their byte representation compare lexicographically within the key coder
+      return IsmRecordCoder.of(
+          1, // We use only the key for hashing when producing value records
+          2, // Since the key is not present, we add the window to the hash when
+             // producing metadata records
+          ImmutableList.of(
+              MetadataKeyCoder.of(keyCoder),
+              windowCoder,
+              BigEndianLongCoder.of()),
+          FullWindowedValueCoder.of(valueCoder, windowCoder));
+    }
+  }
+
+  /**
+   * A {@code Map<K, V2>} backed by a {@code Map<K, V1>} and a function that transforms
+   * {@code V1 -> V2}.
+   */
+  static class TransformedMap<K, V1, V2>
+      extends ForwardingMap<K, V2> {
+    private final Function<V1, V2> transform;
+    private final Map<K, V1> originalMap;
+    private final Map<K, V2> transformedMap;
+
+    private TransformedMap(Function<V1, V2> transform, Map<K, V1> originalMap) {
+      this.transform = transform;
+      this.originalMap = Collections.unmodifiableMap(originalMap);
+      this.transformedMap = Maps.transformValues(originalMap, transform);
+    }
+
+    @Override
+    protected Map<K, V2> delegate() {
+      return transformedMap;
+    }
+  }
+
+  /**
+   * A {@link Coder} for {@link TransformedMap}s.
+   */
+  static class TransformedMapCoder<K, V1, V2>
+      extends StandardCoder<TransformedMap<K, V1, V2>> {
+    private final Coder<Function<V1, V2>> transformCoder;
+    private final Coder<Map<K, V1>> originalMapCoder;
+
+    private TransformedMapCoder(
+        Coder<Function<V1, V2>> transformCoder, Coder<Map<K, V1>> originalMapCoder) {
+      this.transformCoder = transformCoder;
+      this.originalMapCoder = originalMapCoder;
+    }
+
+    public static <K, V1, V2> TransformedMapCoder<K, V1, V2> of(
+        Coder<Function<V1, V2>> transformCoder, Coder<Map<K, V1>> originalMapCoder) {
+      return new TransformedMapCoder<>(transformCoder, originalMapCoder);
+    }
+
+    @JsonCreator
+    public static <K, V1, V2> TransformedMapCoder<K, V1, V2> of(
+        @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+        List<Coder<?>> components) {
+      checkArgument(components.size() == 2,
+          "Expecting 2 components, got " + components.size());
+      @SuppressWarnings("unchecked")
+      Coder<Function<V1, V2>> transformCoder = (Coder<Function<V1, V2>>) components.get(0);
+      @SuppressWarnings("unchecked")
+      Coder<Map<K, V1>> originalMapCoder = (Coder<Map<K, V1>>) components.get(1);
+      return of(transformCoder, originalMapCoder);
+    }
+
+    @Override
+    public void encode(TransformedMap<K, V1, V2> value, OutputStream outStream,
+        Coder.Context context) throws CoderException, IOException {
+      transformCoder.encode(value.transform, outStream, context.nested());
+      originalMapCoder.encode(value.originalMap, outStream, context.nested());
+    }
+
+    @Override
+    public TransformedMap<K, V1, V2> decode(
+        InputStream inStream, Coder.Context context) throws CoderException, IOException {
+      return new TransformedMap<>(
+          transformCoder.decode(inStream, context.nested()),
+          originalMapCoder.decode(inStream, context.nested()));
+    }
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return Arrays.asList(transformCoder, originalMapCoder);
+    }
+
+    @Override
+    public void verifyDeterministic()
+        throws com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException {
+      verifyDeterministic("Expected transform coder to be deterministic.", transformCoder);
+      verifyDeterministic("Expected map coder to be deterministic.", originalMapCoder);
+    }
+  }
+
+  /**
+   * A {@link Function} which converts {@code WindowedValue<V>} to {@code V}.
+   */
+  private static class WindowedValueToValue<V> implements
+      Function<WindowedValue<V>, V>, Serializable {
+    private static final WindowedValueToValue<?> INSTANCE = new WindowedValueToValue<>();
+
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    private static <V> WindowedValueToValue<V> of() {
+      return (WindowedValueToValue) INSTANCE;
+    }
+
+    @Override
+    public V apply(WindowedValue<V> input) {
+      return input.getValue();
+    }
+  }
+
+  /**
+   * A {@link Function} which converts {@code Iterable<WindowedValue<V>>} to {@code Iterable<V>}.
+   */
+  private static class IterableWithWindowedValuesToIterable<V> implements
+      Function<Iterable<WindowedValue<V>>, Iterable<V>>, Serializable {
+    private static final IterableWithWindowedValuesToIterable<?> INSTANCE =
+        new IterableWithWindowedValuesToIterable<>();
+
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    private static <V> IterableWithWindowedValuesToIterable<V> of() {
+      return (IterableWithWindowedValuesToIterable) INSTANCE;
+    }
+
+    @Override
+    public Iterable<V> apply(Iterable<WindowedValue<V>> input) {
+      return Iterables.transform(input, WindowedValueToValue.<V>of());
+    }
+  }
+
+  /**
+   * Specialized (non-)implementation for
+   * {@link com.google.cloud.dataflow.sdk.io.Write.Bound Write.Bound}
+   * for the Dataflow runner in streaming mode.
    */
   private static class StreamingWrite<T> extends PTransform<PCollection<T>, PDone> {
     /**
      * Builds an instance of this class from the overridden transform.
      */
-    public StreamingWrite(Write.Bound<T> transform) { }
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+    public StreamingWrite(DataflowPipelineRunner runner, Write.Bound<T> transform) { }
 
     @Override
     public PDone apply(PCollection<T> input) {
@@ -519,13 +1923,13 @@ protected String getKindString() {
   }
 
   /**
-   * Specialized implementation for {@link PubsubIO.Write} for the Dataflow runner in streaming
-   * mode.
+   * Specialized implementation for
+   * {@link com.google.cloud.dataflow.sdk.io.PubsubIO.Write PubsubIO.Write} for the
+   * Dataflow runner in streaming mode.
    *
    * <p>For internal use only. Subject to change at any time.
    *
-   * <p>Public so the {@link com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator}
-   * can access.
+   * <p>Public so the {@link PubsubIOTranslator} can access.
    */
   public static class StreamingPubsubIOWrite<T> extends PTransform<PCollection<T>, PDone> {
     private final PubsubIO.Write.Bound<T> transform;
@@ -533,7 +1937,8 @@ public static class StreamingPubsubIOWrite<T> extends PTransform<PCollection<T>,
     /**
      * Builds an instance of this class from the overridden transform.
      */
-    public StreamingPubsubIOWrite(PubsubIO.Write.Bound<T> transform) {
+    public StreamingPubsubIOWrite(
+        DataflowPipelineRunner runner, PubsubIO.Write.Bound<T> transform) {
       this.transform = transform;
     }
 
@@ -553,8 +1958,9 @@ protected String getKindString() {
   }
 
   /**
-   * Specialized implementation for {@link Read.Unbounded} for the Dataflow runner in streaming
-   * mode.
+   * Specialized implementation for
+   * {@link com.google.cloud.dataflow.sdk.io.Read.Unbounded Read.Unbounded} for the
+   * Dataflow runner in streaming mode.
    *
    * <p>In particular, if an UnboundedSource requires deduplication, then features of WindmillSink
    * are leveraged to do the deduplication.
@@ -566,7 +1972,7 @@ private static class StreamingUnboundedRead<T> extends PTransform<PInput, PColle
      * Builds an instance of this class from the overridden transform.
      */
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public StreamingUnboundedRead(Read.Unbounded<T> transform) {
+    public StreamingUnboundedRead(DataflowPipelineRunner runner, Read.Unbounded<T> transform) {
       this.source = transform.getSource();
     }
 
@@ -669,7 +2075,9 @@ public void processElement(ProcessContext c) {
   }
 
   /**
-   * Specialized implementation for {@link Create.Values} for the Dataflow runner in streaming mode.
+   * Specialized implementation for
+   * {@link com.google.cloud.dataflow.sdk.transforms.Create.Values Create.Values} for the
+   * Dataflow runner in streaming mode.
    */
   private static class StreamingCreate<T> extends PTransform<PInput, PCollection<T>> {
     private final Create.Values<T> transform;
@@ -678,7 +2086,7 @@ private static class StreamingCreate<T> extends PTransform<PInput, PCollection<T
      * Builds an instance of this class from the overridden transform.
      */
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public StreamingCreate(Create.Values<T> transform) {
+    public StreamingCreate(DataflowPipelineRunner runner, Create.Values<T> transform) {
       this.transform = transform;
     }
 
@@ -785,12 +2193,18 @@ public void processElement(ProcessContext c) throws Exception {
   }
 
   /**
-   * Specialized implementation for {@link View.AsMap} for the Dataflow runner in streaming mode.
+   * Specialized implementation for
+   * {@link com.google.cloud.dataflow.sdk.transforms.View.AsMap View.AsMap}
+   * for the Dataflow runner in streaming mode.
    */
   private static class StreamingViewAsMap<K, V>
       extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, V>>> {
+    private final DataflowPipelineRunner runner;
+
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public StreamingViewAsMap(View.AsMap<K, V> transform) { }
+    public StreamingViewAsMap(DataflowPipelineRunner runner, View.AsMap<K, V> transform) {
+      this.runner = runner;
+    }
 
     @Override
     public PCollectionView<Map<K, V>> apply(PCollection<KV<K, V>> input) {
@@ -800,6 +2214,14 @@ public PCollectionView<Map<K, V>> apply(PCollection<KV<K, V>> input) {
               input.getWindowingStrategy(),
               input.getCoder());
 
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
+      try {
+        inputCoder.getKeyCoder().verifyDeterministic();
+      } catch (NonDeterministicException e) {
+        runner.recordViewUsesNonDeterministicKeyCoder(this);
+      }
+
       return input
           .apply(Combine.globally(new Concatenate<KV<K, V>>()).withoutDefaults())
           .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, input.getCoder())))
@@ -813,15 +2235,21 @@ protected String getKindString() {
   }
 
   /**
-   * Specialized expansion for {@link View.AsMultimap} for the Dataflow runner in streaming mode.
+   * Specialized expansion for {@link
+   * com.google.cloud.dataflow.sdk.transforms.View.AsMultimap View.AsMultimap} for the
+   * Dataflow runner in streaming mode.
    */
   private static class StreamingViewAsMultimap<K, V>
-    extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, Iterable<V>>>> {
+      extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, Iterable<V>>>> {
+    private final DataflowPipelineRunner runner;
+
     /**
      * Builds an instance of this class from the overridden transform.
      */
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public StreamingViewAsMultimap(View.AsMultimap<K, V> transform) { }
+    public StreamingViewAsMultimap(DataflowPipelineRunner runner, View.AsMultimap<K, V> transform) {
+      this.runner = runner;
+    }
 
     @Override
     public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
@@ -831,6 +2259,14 @@ public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
               input.getWindowingStrategy(),
               input.getCoder());
 
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
+      try {
+        inputCoder.getKeyCoder().verifyDeterministic();
+      } catch (NonDeterministicException e) {
+        runner.recordViewUsesNonDeterministicKeyCoder(this);
+      }
+
       return input
           .apply(Combine.globally(new Concatenate<KV<K, V>>()).withoutDefaults())
           .apply(ParDo.of(StreamingPCollectionViewWriterFn.create(view, input.getCoder())))
@@ -844,7 +2280,9 @@ protected String getKindString() {
   }
 
   /**
-   * Specialized implementation for {@link View.AsList} for the Dataflow runner in streaming mode.
+   * Specialized implementation for
+   * {@link com.google.cloud.dataflow.sdk.transforms.View.AsList View.AsList} for the
+   * Dataflow runner in streaming mode.
    */
   private static class StreamingViewAsList<T>
       extends PTransform<PCollection<T>, PCollectionView<List<T>>> {
@@ -852,7 +2290,7 @@ private static class StreamingViewAsList<T>
      * Builds an instance of this class from the overridden transform.
      */
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public StreamingViewAsList(View.AsList<T> transform) {}
+    public StreamingViewAsList(DataflowPipelineRunner runner, View.AsList<T> transform) {}
 
     @Override
     public PCollectionView<List<T>> apply(PCollection<T> input) {
@@ -874,8 +2312,9 @@ protected String getKindString() {
   }
 
   /**
-   * Specialized implementation for {@link View.AsIterable} for the Dataflow runner in streaming
-   * mode.
+   * Specialized implementation for
+   * {@link com.google.cloud.dataflow.sdk.transforms.View.AsIterable View.AsIterable} for the
+   * Dataflow runner in streaming mode.
    */
   private static class StreamingViewAsIterable<T>
       extends PTransform<PCollection<T>, PCollectionView<Iterable<T>>> {
@@ -883,7 +2322,7 @@ private static class StreamingViewAsIterable<T>
      * Builds an instance of this class from the overridden transform.
      */
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public StreamingViewAsIterable(View.AsIterable<T> transform) { }
+    public StreamingViewAsIterable(DataflowPipelineRunner runner, View.AsIterable<T> transform) { }
 
     @Override
     public PCollectionView<Iterable<T>> apply(PCollection<T> input) {
@@ -912,7 +2351,9 @@ public void processElement(ProcessContext c) {
   }
 
   /**
-   * Specialized expansion for {@link View.AsSingleton} for the Dataflow runner in streaming mode.
+   * Specialized expansion for
+   * {@link com.google.cloud.dataflow.sdk.transforms.View.AsSingleton View.AsSingleton} for the
+   * Dataflow runner in streaming mode.
    */
   private static class StreamingViewAsSingleton<T>
       extends PTransform<PCollection<T>, PCollectionView<T>> {
@@ -922,7 +2363,7 @@ private static class StreamingViewAsSingleton<T>
      * Builds an instance of this class from the overridden transform.
      */
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public StreamingViewAsSingleton(View.AsSingleton<T> transform) {
+    public StreamingViewAsSingleton(DataflowPipelineRunner runner, View.AsSingleton<T> transform) {
       this.transform = transform;
     }
 
@@ -979,6 +2420,7 @@ private static class StreamingCombineGloballyAsSingletonView<InputT, OutputT>
      */
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
     public StreamingCombineGloballyAsSingletonView(
+        DataflowPipelineRunner runner,
         Combine.GloballyAsSingletonView<InputT, OutputT> transform) {
       this.transform = transform;
     }
@@ -1067,7 +2509,7 @@ private static class UnsupportedIO<InputT extends PInput, OutputT extends POutpu
      * Builds an instance of this class from the overridden transform.
      */
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public UnsupportedIO(AvroIO.Read.Bound<?> transform) {
+    public UnsupportedIO(DataflowPipelineRunner runner, AvroIO.Read.Bound<?> transform) {
       this.transform = transform;
     }
 
@@ -1075,7 +2517,7 @@ public UnsupportedIO(AvroIO.Read.Bound<?> transform) {
      * Builds an instance of this class from the overridden transform.
      */
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public UnsupportedIO(BigQueryIO.Read.Bound transform) {
+    public UnsupportedIO(DataflowPipelineRunner runner, BigQueryIO.Read.Bound transform) {
       this.transform = transform;
     }
 
@@ -1083,7 +2525,7 @@ public UnsupportedIO(BigQueryIO.Read.Bound transform) {
      * Builds an instance of this class from the overridden transform.
      */
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public UnsupportedIO(TextIO.Read.Bound<?> transform) {
+    public UnsupportedIO(DataflowPipelineRunner runner, TextIO.Read.Bound<?> transform) {
       this.transform = transform;
     }
 
@@ -1091,7 +2533,7 @@ public UnsupportedIO(TextIO.Read.Bound<?> transform) {
      * Builds an instance of this class from the overridden transform.
      */
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public UnsupportedIO(Read.Bounded<?> transform) {
+    public UnsupportedIO(DataflowPipelineRunner runner, Read.Bounded<?> transform) {
       this.transform = transform;
     }
 
@@ -1099,7 +2541,7 @@ public UnsupportedIO(Read.Bounded<?> transform) {
      * Builds an instance of this class from the overridden transform.
      */
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public UnsupportedIO(Read.Unbounded<?> transform) {
+    public UnsupportedIO(DataflowPipelineRunner runner, Read.Unbounded<?> transform) {
       this.transform = transform;
     }
 
@@ -1107,7 +2549,7 @@ public UnsupportedIO(Read.Unbounded<?> transform) {
      * Builds an instance of this class from the overridden transform.
      */
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public UnsupportedIO(AvroIO.Write.Bound<?> transform) {
+    public UnsupportedIO(DataflowPipelineRunner runner, AvroIO.Write.Bound<?> transform) {
       this.transform = transform;
     }
 
@@ -1115,7 +2557,7 @@ public UnsupportedIO(AvroIO.Write.Bound<?> transform) {
      * Builds an instance of this class from the overridden transform.
      */
     @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public UnsupportedIO(TextIO.Write.Bound<?> transform) {
+    public UnsupportedIO(DataflowPipelineRunner runner, TextIO.Write.Bound<?> transform) {
       this.transform = transform;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index bcbd3e7eb598b..cef03a8598ed0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -48,6 +48,7 @@
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.GroupByKeyAndSortValuesOnly;
 import com.google.cloud.dataflow.sdk.runners.dataflow.AvroIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BigQueryIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
@@ -141,8 +142,12 @@ private DataflowPipelineTranslator(DataflowPipelineOptions options) {
   /**
    * Translates a {@link Pipeline} into a {@code JobSpecification}.
    */
-  public JobSpecification translate(Pipeline pipeline, List<DataflowPackage> packages) {
-    Translator translator = new Translator(pipeline);
+  public JobSpecification translate(
+      Pipeline pipeline,
+      DataflowPipelineRunner runner,
+      List<DataflowPackage> packages) {
+
+    Translator translator = new Translator(pipeline, runner);
     Job result = translator.translate(packages);
     return new JobSpecification(result, Collections.unmodifiableMap(translator.stepNames));
   }
@@ -349,6 +354,9 @@ class Translator implements PipelineVisitor, TranslationContext {
     /** The Pipeline to translate. */
     private final Pipeline pipeline;
 
+    /** The runner which will execute the pipeline. */
+    private final DataflowPipelineRunner runner;
+
     /** The Cloud Dataflow Job representation. */
     private final Job job = new Job();
 
@@ -382,8 +390,9 @@ class Translator implements PipelineVisitor, TranslationContext {
      * Constructs a Translator that will translate the specified
      * Pipeline into Dataflow objects.
      */
-    public Translator(Pipeline pipeline) {
+    public Translator(Pipeline pipeline, DataflowPipelineRunner runner) {
       this.pipeline = pipeline;
+      this.runner = runner;
     }
 
     /**
@@ -702,7 +711,10 @@ private void addOutput(String name, PValue value, Coder<?> valueCoder) {
       Map<String, Object> outputInfo = new HashMap<>();
       addString(outputInfo, PropertyNames.OUTPUT_NAME, name);
       addString(outputInfo, PropertyNames.USER_NAME, value.getName());
-
+      if (value instanceof PCollection
+          && runner.doesPCollectionRequireIndexedFormat((PCollection<?>) value)) {
+        addBoolean(outputInfo, PropertyNames.USE_INDEXED_FORMAT, true);
+      }
       if (valueCoder != null) {
         // Verify that encoding can be decoded, in order to catch serialization
         // failures as early as possible.
@@ -886,6 +898,30 @@ private <T> void flattenHelper(
           }
         });
 
+    registerTransformTranslator(
+        GroupByKeyAndSortValuesOnly.class,
+        new TransformTranslator<GroupByKeyAndSortValuesOnly>() {
+          @Override
+          public void translate(
+              GroupByKeyAndSortValuesOnly transform,
+              TranslationContext context) {
+            groupByKeyAndSortValuesHelper(transform, context);
+          }
+
+          private <K1, K2, V> void groupByKeyAndSortValuesHelper(
+              GroupByKeyAndSortValuesOnly<K1, K2, V> transform,
+              TranslationContext context) {
+            context.addStep(transform, "GroupByKey");
+            context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
+            context.addOutput(PropertyNames.OUTPUT, context.getOutput(transform));
+            context.addInput(PropertyNames.SORT_VALUES, true);
+
+            // TODO: Add support for combiner lifting once the need arises.
+            context.addInput(
+                PropertyNames.DISALLOW_COMBINER_LIFTING, true);
+          }
+        });
+
     registerTransformTranslator(
         GroupByKey.class,
         new TransformTranslator<GroupByKey>() {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index deabbc0a2fb0b..6d9f90f39ef61 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -16,8 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.runners;
 
+import static com.google.cloud.dataflow.sdk.util.WindowedValue.valueInGlobalWindow;
+import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.startsWith;
+import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
@@ -37,7 +40,9 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
 import com.google.cloud.dataflow.sdk.io.AvroIO;
 import com.google.cloud.dataflow.sdk.io.AvroSource;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
@@ -48,21 +53,41 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.BatchViewAsList;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.BatchViewAsMap;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.BatchViewAsMultimap;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.TransformedMap;
 import com.google.cloud.dataflow.sdk.runners.dataflow.TestCountingSource;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecord;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecordCoder;
+import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.MetadataKeyCoder;
 import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.util.DataflowReleaseInfo;
 import com.google.cloud.dataflow.sdk.util.GcsUtil;
 import com.google.cloud.dataflow.sdk.util.NoopPathValidator;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
+import com.google.cloud.dataflow.sdk.util.UserCodeException;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.cloud.dataflow.sdk.values.PValue;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.cloud.dataflow.sdk.values.TupleTagList;
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
 
 import org.hamcrest.Description;
 import org.hamcrest.Matchers;
@@ -89,6 +114,7 @@
 import java.util.Collections;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.Map;
 
 /**
  * Tests for DataflowPipelineRunner.
@@ -713,7 +739,7 @@ public void testTransformTranslatorMissing() throws IOException {
     ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
 
     DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
-    Pipeline p = DataflowPipeline.create(options);
+    DataflowPipeline p = DataflowPipeline.create(options);
 
     p.apply(Create.of(Arrays.asList(1, 2, 3)))
      .apply(new TestTransform());
@@ -721,7 +747,7 @@ public void testTransformTranslatorMissing() throws IOException {
     thrown.expect(IllegalStateException.class);
     thrown.expectMessage(Matchers.containsString("no translator registered"));
     DataflowPipelineTranslator.fromOptions(options)
-        .translate(p, Collections.<DataflowPackage>emptyList());
+        .translate(p, p.getRunner(), Collections.<DataflowPackage>emptyList());
     assertValidJob(jobCaptor.getValue());
   }
 
@@ -755,7 +781,8 @@ public void translate(
           }
         });
 
-    translator.translate(p, Collections.<DataflowPackage>emptyList());
+    translator.translate(
+        p, p.getRunner(), Collections.<DataflowPackage>emptyList());
     assertTrue(transform.translated);
   }
 
@@ -894,4 +921,439 @@ public void testAvroIOSinkUnsupportedInStreaming() throws Exception {
   public void testTextIOSinkUnsupportedInStreaming() throws Exception {
     testUnsupportedSink(TextIO.Write.to("foo"), "TextIO.Write", true);
   }
+
+  @Test
+  public void testBatchViewAsListToIsmRecordForGlobalWindow() throws Exception {
+    DoFnTester<String, IsmRecord<WindowedValue<String>>> doFnTester =
+        DoFnTester.of(new BatchViewAsList.ToIsmRecordForGlobalWindowDoFn<String>());
+
+    // The order of the output elements is important relative to processing order
+    assertThat(doFnTester.processBatch(ImmutableList.of("a", "b", "c")), contains(
+        IsmRecord.of(ImmutableList.of(GlobalWindow.INSTANCE, 0L), valueInGlobalWindow("a")),
+        IsmRecord.of(ImmutableList.of(GlobalWindow.INSTANCE, 1L), valueInGlobalWindow("b")),
+        IsmRecord.of(ImmutableList.of(GlobalWindow.INSTANCE, 2L), valueInGlobalWindow("c"))));
+  }
+
+  @Test
+  public void testBatchViewAsListToIsmRecordForNonGlobalWindow() throws Exception {
+    DoFnTester<KV<Integer, Iterable<KV<IntervalWindow, WindowedValue<Long>>>>,
+               IsmRecord<WindowedValue<Long>>> doFnTester =
+        DoFnTester.of(
+            new BatchViewAsList.ToIsmRecordForNonGlobalWindowDoFn<Long, IntervalWindow>(
+                IntervalWindow.getCoder()));
+
+    IntervalWindow windowA = new IntervalWindow(new Instant(0), new Instant(10));
+    IntervalWindow windowB = new IntervalWindow(new Instant(10), new Instant(20));
+    IntervalWindow windowC = new IntervalWindow(new Instant(20), new Instant(30));
+
+    Iterable<KV<Integer, Iterable<KV<IntervalWindow, WindowedValue<Long>>>>> inputElements =
+        ImmutableList.of(
+            KV.of(1, (Iterable<KV<IntervalWindow, WindowedValue<Long>>>) ImmutableList.of(
+                KV.of(
+                    windowA, WindowedValue.of(110L, new Instant(1), windowA, PaneInfo.NO_FIRING)),
+                KV.of(
+                    windowA, WindowedValue.of(111L, new Instant(3), windowA, PaneInfo.NO_FIRING)),
+                KV.of(
+                    windowA, WindowedValue.of(112L, new Instant(4), windowA, PaneInfo.NO_FIRING)),
+                KV.of(
+                    windowB, WindowedValue.of(120L, new Instant(12), windowB, PaneInfo.NO_FIRING)),
+                KV.of(
+                    windowB, WindowedValue.of(121L, new Instant(14), windowB, PaneInfo.NO_FIRING))
+                )),
+            KV.of(2, (Iterable<KV<IntervalWindow, WindowedValue<Long>>>) ImmutableList.of(
+                KV.of(
+                    windowC, WindowedValue.of(210L, new Instant(25), windowC, PaneInfo.NO_FIRING))
+                )));
+
+    // The order of the output elements is important relative to processing order
+    assertThat(doFnTester.processBatch(inputElements), contains(
+        IsmRecord.of(ImmutableList.of(windowA, 0L),
+            WindowedValue.of(110L, new Instant(1), windowA, PaneInfo.NO_FIRING)),
+        IsmRecord.of(ImmutableList.of(windowA, 1L),
+            WindowedValue.of(111L, new Instant(3), windowA, PaneInfo.NO_FIRING)),
+        IsmRecord.of(ImmutableList.of(windowA, 2L),
+            WindowedValue.of(112L, new Instant(4), windowA, PaneInfo.NO_FIRING)),
+        IsmRecord.of(ImmutableList.of(windowB, 0L),
+            WindowedValue.of(120L, new Instant(12), windowB, PaneInfo.NO_FIRING)),
+        IsmRecord.of(ImmutableList.of(windowB, 1L),
+            WindowedValue.of(121L, new Instant(14), windowB, PaneInfo.NO_FIRING)),
+        IsmRecord.of(ImmutableList.of(windowC, 0L),
+            WindowedValue.of(210L, new Instant(25), windowC, PaneInfo.NO_FIRING))));
+  }
+
+  @Test
+  public void testToIsmRecordForMapLikeDoFn() throws Exception {
+    TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForSizeTag = new TupleTag<>();
+    TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForEntrySetTag = new TupleTag<>();
+
+    Coder<Long> keyCoder = VarLongCoder.of();
+    Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
+
+    IsmRecordCoder<WindowedValue<Long>> ismCoder = IsmRecordCoder.of(
+        1,
+        2,
+        ImmutableList.<Coder<?>>of(
+            MetadataKeyCoder.of(keyCoder),
+            IntervalWindow.getCoder(),
+            BigEndianLongCoder.of()),
+        FullWindowedValueCoder.of(VarLongCoder.of(), windowCoder));
+
+    DoFnTester<KV<Integer, Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>>,
+               IsmRecord<WindowedValue<Long>>> doFnTester =
+        DoFnTester.of(new BatchViewAsMultimap.ToIsmRecordForMapLikeDoFn<Long, Long, IntervalWindow>(
+            outputForSizeTag,
+            outputForEntrySetTag,
+            windowCoder,
+            keyCoder,
+            ismCoder,
+            false /* unique keys */));
+    doFnTester.setSideOutputTags(TupleTagList.of(
+        ImmutableList.<TupleTag<?>>of(outputForSizeTag, outputForEntrySetTag)));
+
+    IntervalWindow windowA = new IntervalWindow(new Instant(0), new Instant(10));
+    IntervalWindow windowB = new IntervalWindow(new Instant(10), new Instant(20));
+    IntervalWindow windowC = new IntervalWindow(new Instant(20), new Instant(30));
+
+    Iterable<KV<Integer,
+                Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>>> inputElements =
+        ImmutableList.of(
+            KV.of(1, (Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>) ImmutableList.of(
+                KV.of(KV.of(1L, windowA),
+                    WindowedValue.of(110L, new Instant(1), windowA, PaneInfo.NO_FIRING)),
+                // same window same key as to previous
+                KV.of(KV.of(1L, windowA),
+                    WindowedValue.of(111L, new Instant(2), windowA, PaneInfo.NO_FIRING)),
+                // same window different key as to previous
+                KV.of(KV.of(2L, windowA),
+                    WindowedValue.of(120L, new Instant(3), windowA, PaneInfo.NO_FIRING)),
+                // different window same key as to previous
+                KV.of(KV.of(2L, windowB),
+                    WindowedValue.of(210L, new Instant(11), windowB, PaneInfo.NO_FIRING)),
+                // different window and different key as to previous
+                KV.of(KV.of(3L, windowB),
+                    WindowedValue.of(220L, new Instant(12), windowB, PaneInfo.NO_FIRING)))),
+            KV.of(2, (Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>) ImmutableList.of(
+                // different shard
+                KV.of(KV.of(4L, windowC),
+                    WindowedValue.of(330L, new Instant(21), windowC, PaneInfo.NO_FIRING)))));
+
+    // The order of the output elements is important relative to processing order
+    assertThat(doFnTester.processBatch(inputElements), contains(
+        IsmRecord.of(
+            ImmutableList.of(1L, windowA, 0L),
+            WindowedValue.of(110L, new Instant(1), windowA, PaneInfo.NO_FIRING)),
+        IsmRecord.of(
+            ImmutableList.of(1L, windowA, 1L),
+            WindowedValue.of(111L, new Instant(2), windowA, PaneInfo.NO_FIRING)),
+        IsmRecord.of(
+            ImmutableList.of(2L, windowA, 0L),
+            WindowedValue.of(120L, new Instant(3), windowA, PaneInfo.NO_FIRING)),
+        IsmRecord.of(
+            ImmutableList.of(2L, windowB, 0L),
+            WindowedValue.of(210L, new Instant(11), windowB, PaneInfo.NO_FIRING)),
+        IsmRecord.of(
+            ImmutableList.of(3L, windowB, 0L),
+            WindowedValue.of(220L, new Instant(12), windowB, PaneInfo.NO_FIRING)),
+        IsmRecord.of(
+            ImmutableList.of(4L, windowC, 0L),
+            WindowedValue.of(330L, new Instant(21), windowC, PaneInfo.NO_FIRING))));
+
+    // Verify the number of unique keys per window.
+    assertThat(doFnTester.takeSideOutputElements(outputForSizeTag), contains(
+        KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowA)),
+            KV.of(windowA, 2L)),
+        KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowB)),
+            KV.of(windowB, 2L)),
+        KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowC)),
+            KV.of(windowC, 1L))
+        ));
+
+    // Verify the output for the unique keys.
+    assertThat(doFnTester.takeSideOutputElements(outputForEntrySetTag), contains(
+        KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowA)),
+            KV.of(windowA, 1L)),
+        KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowA)),
+            KV.of(windowA, 2L)),
+        KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowB)),
+            KV.of(windowB, 2L)),
+        KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowB)),
+            KV.of(windowB, 3L)),
+        KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowC)),
+            KV.of(windowC, 4L))
+        ));
+  }
+
+  @Test
+  public void testToIsmRecordForMapLikeDoFnWithoutUniqueKeysThrowsException() throws Exception {
+    TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForSizeTag = new TupleTag<>();
+    TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForEntrySetTag = new TupleTag<>();
+
+    Coder<Long> keyCoder = VarLongCoder.of();
+    Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
+
+    IsmRecordCoder<WindowedValue<Long>> ismCoder = IsmRecordCoder.of(
+        1,
+        2,
+        ImmutableList.<Coder<?>>of(
+            MetadataKeyCoder.of(keyCoder),
+            IntervalWindow.getCoder(),
+            BigEndianLongCoder.of()),
+        FullWindowedValueCoder.of(VarLongCoder.of(), windowCoder));
+
+    DoFnTester<KV<Integer, Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>>,
+               IsmRecord<WindowedValue<Long>>> doFnTester =
+        DoFnTester.of(new BatchViewAsMultimap.ToIsmRecordForMapLikeDoFn<Long, Long, IntervalWindow>(
+            outputForSizeTag,
+            outputForEntrySetTag,
+            windowCoder,
+            keyCoder,
+            ismCoder,
+            true /* unique keys */));
+    doFnTester.setSideOutputTags(TupleTagList.of(
+        ImmutableList.<TupleTag<?>>of(outputForSizeTag, outputForEntrySetTag)));
+
+    IntervalWindow windowA = new IntervalWindow(new Instant(0), new Instant(10));
+
+    Iterable<KV<Integer,
+                Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>>> inputElements =
+        ImmutableList.of(
+            KV.of(1, (Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>) ImmutableList.of(
+                KV.of(KV.of(1L, windowA),
+                    WindowedValue.of(110L, new Instant(1), windowA, PaneInfo.NO_FIRING)),
+                // same window same key as to previous
+                KV.of(KV.of(1L, windowA),
+                    WindowedValue.of(111L, new Instant(2), windowA, PaneInfo.NO_FIRING)))));
+
+    try {
+      doFnTester.processBatch(inputElements);
+    } catch (UserCodeException e) {
+      assertTrue(e.getCause() instanceof IllegalStateException);
+      IllegalStateException rootCause = (IllegalStateException) e.getCause();
+      assertThat(rootCause.getMessage(), containsString("Unique keys are expected but found key"));
+    }
+  }
+
+  @Test
+  public void testToIsmMetadataRecordForSizeDoFn() throws Exception {
+    TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForSizeTag = new TupleTag<>();
+    TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForEntrySetTag = new TupleTag<>();
+
+    Coder<Long> keyCoder = VarLongCoder.of();
+    Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
+
+    IsmRecordCoder<WindowedValue<Long>> ismCoder = IsmRecordCoder.of(
+        1,
+        2,
+        ImmutableList.<Coder<?>>of(
+            MetadataKeyCoder.of(keyCoder),
+            IntervalWindow.getCoder(),
+            BigEndianLongCoder.of()),
+        FullWindowedValueCoder.of(VarLongCoder.of(), windowCoder));
+
+    DoFnTester<KV<Integer, Iterable<KV<IntervalWindow, Long>>>,
+               IsmRecord<WindowedValue<Long>>> doFnTester = DoFnTester.of(
+        new BatchViewAsMultimap.ToIsmMetadataRecordForSizeDoFn<Long, Long, IntervalWindow>(
+            windowCoder));
+    doFnTester.setSideOutputTags(TupleTagList.of(
+        ImmutableList.<TupleTag<?>>of(outputForSizeTag, outputForEntrySetTag)));
+
+    IntervalWindow windowA = new IntervalWindow(new Instant(0), new Instant(10));
+    IntervalWindow windowB = new IntervalWindow(new Instant(10), new Instant(20));
+    IntervalWindow windowC = new IntervalWindow(new Instant(20), new Instant(30));
+
+    Iterable<KV<Integer, Iterable<KV<IntervalWindow, Long>>>> inputElements =
+        ImmutableList.of(
+            KV.of(1,
+                (Iterable<KV<IntervalWindow, Long>>) ImmutableList.of(
+                    KV.of(windowA, 2L),
+                    KV.of(windowA, 3L),
+                    KV.of(windowB, 7L))),
+            KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowB)),
+                (Iterable<KV<IntervalWindow, Long>>) ImmutableList.of(
+                    KV.of(windowC, 9L))));
+
+    // The order of the output elements is important relative to processing order
+    assertThat(doFnTester.processBatch(inputElements), contains(
+        IsmRecord.<WindowedValue<Long>>meta(
+            ImmutableList.of(IsmFormat.getMetadataKey(), windowA, 0L),
+            CoderUtils.encodeToByteArray(VarLongCoder.of(), 5L)),
+        IsmRecord.<WindowedValue<Long>>meta(
+            ImmutableList.of(IsmFormat.getMetadataKey(), windowB, 0L),
+            CoderUtils.encodeToByteArray(VarLongCoder.of(), 7L)),
+        IsmRecord.<WindowedValue<Long>>meta(
+            ImmutableList.of(IsmFormat.getMetadataKey(), windowC, 0L),
+            CoderUtils.encodeToByteArray(VarLongCoder.of(), 9L))
+        ));
+  }
+
+  @Test
+  public void testToIsmMetadataRecordForKeyDoFn() throws Exception {
+    TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForSizeTag = new TupleTag<>();
+    TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForEntrySetTag = new TupleTag<>();
+
+    Coder<Long> keyCoder = VarLongCoder.of();
+    Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
+
+    IsmRecordCoder<WindowedValue<Long>> ismCoder = IsmRecordCoder.of(
+        1,
+        2,
+        ImmutableList.<Coder<?>>of(
+            MetadataKeyCoder.of(keyCoder),
+            IntervalWindow.getCoder(),
+            BigEndianLongCoder.of()),
+        FullWindowedValueCoder.of(VarLongCoder.of(), windowCoder));
+
+    DoFnTester<KV<Integer, Iterable<KV<IntervalWindow, Long>>>,
+               IsmRecord<WindowedValue<Long>>> doFnTester = DoFnTester.of(
+        new BatchViewAsMultimap.ToIsmMetadataRecordForKeyDoFn<Long, Long, IntervalWindow>(
+            keyCoder, windowCoder));
+    doFnTester.setSideOutputTags(TupleTagList.of(
+        ImmutableList.<TupleTag<?>>of(outputForSizeTag, outputForEntrySetTag)));
+
+    IntervalWindow windowA = new IntervalWindow(new Instant(0), new Instant(10));
+    IntervalWindow windowB = new IntervalWindow(new Instant(10), new Instant(20));
+    IntervalWindow windowC = new IntervalWindow(new Instant(20), new Instant(30));
+
+    Iterable<KV<Integer, Iterable<KV<IntervalWindow, Long>>>> inputElements =
+        ImmutableList.of(
+            KV.of(1,
+                (Iterable<KV<IntervalWindow, Long>>) ImmutableList.of(
+                    KV.of(windowA, 2L),
+                    // same window as previous
+                    KV.of(windowA, 3L),
+                    // different window as previous
+                    KV.of(windowB, 3L))),
+            KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowB)),
+                (Iterable<KV<IntervalWindow, Long>>) ImmutableList.of(
+                    KV.of(windowC, 3L))));
+
+    // The order of the output elements is important relative to processing order
+    assertThat(doFnTester.processBatch(inputElements), contains(
+        IsmRecord.<WindowedValue<Long>>meta(
+            ImmutableList.of(IsmFormat.getMetadataKey(), windowA, 1L),
+            CoderUtils.encodeToByteArray(VarLongCoder.of(), 2L)),
+        IsmRecord.<WindowedValue<Long>>meta(
+            ImmutableList.of(IsmFormat.getMetadataKey(), windowA, 2L),
+            CoderUtils.encodeToByteArray(VarLongCoder.of(), 3L)),
+        IsmRecord.<WindowedValue<Long>>meta(
+            ImmutableList.of(IsmFormat.getMetadataKey(), windowB, 1L),
+            CoderUtils.encodeToByteArray(VarLongCoder.of(), 3L)),
+        IsmRecord.<WindowedValue<Long>>meta(
+            ImmutableList.of(IsmFormat.getMetadataKey(), windowC, 1L),
+            CoderUtils.encodeToByteArray(VarLongCoder.of(), 3L))
+        ));
+  }
+
+  @Test
+  public void testToMapDoFn() throws Exception {
+    Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
+
+    DoFnTester<KV<Integer, Iterable<KV<IntervalWindow, WindowedValue<KV<Long, Long>>>>>,
+                  IsmRecord<WindowedValue<TransformedMap<Long,
+                                                         WindowedValue<Long>,
+                                                         Long>>>> doFnTester =
+        DoFnTester.of(new BatchViewAsMap.ToMapDoFn<Long, Long, IntervalWindow>(windowCoder));
+
+
+    IntervalWindow windowA = new IntervalWindow(new Instant(0), new Instant(10));
+    IntervalWindow windowB = new IntervalWindow(new Instant(10), new Instant(20));
+    IntervalWindow windowC = new IntervalWindow(new Instant(20), new Instant(30));
+
+    Iterable<KV<Integer,
+             Iterable<KV<IntervalWindow, WindowedValue<KV<Long, Long>>>>>> inputElements =
+        ImmutableList.of(
+            KV.of(1,
+                (Iterable<KV<IntervalWindow, WindowedValue<KV<Long, Long>>>>) ImmutableList.of(
+                    KV.of(windowA, WindowedValue.of(
+                        KV.of(1L, 11L), new Instant(3), windowA, PaneInfo.NO_FIRING)),
+                    KV.of(windowA, WindowedValue.of(
+                        KV.of(2L, 21L), new Instant(7), windowA, PaneInfo.NO_FIRING)),
+                    KV.of(windowB, WindowedValue.of(
+                        KV.of(2L, 21L), new Instant(13), windowB, PaneInfo.NO_FIRING)),
+                    KV.of(windowB, WindowedValue.of(
+                        KV.of(3L, 31L), new Instant(15), windowB, PaneInfo.NO_FIRING)))),
+            KV.of(2,
+                (Iterable<KV<IntervalWindow, WindowedValue<KV<Long, Long>>>>) ImmutableList.of(
+                    KV.of(windowC, WindowedValue.of(
+                        KV.of(4L, 41L), new Instant(25), windowC, PaneInfo.NO_FIRING)))));
+
+    // The order of the output elements is important relative to processing order
+    List<IsmRecord<WindowedValue<TransformedMap<Long,
+                                                WindowedValue<Long>,
+                                                Long>>>> output =
+                                                doFnTester.processBatch(inputElements);
+    assertEquals(3, output.size());
+    Map<Long, Long> outputMap;
+
+    outputMap = output.get(0).getValue().getValue();
+    assertEquals(2, outputMap.size());
+    assertEquals(ImmutableMap.of(1L, 11L, 2L, 21L), outputMap);
+
+    outputMap = output.get(1).getValue().getValue();
+    assertEquals(2, outputMap.size());
+    assertEquals(ImmutableMap.of(2L, 21L, 3L, 31L), outputMap);
+
+    outputMap = output.get(2).getValue().getValue();
+    assertEquals(1, outputMap.size());
+    assertEquals(ImmutableMap.of(4L, 41L), outputMap);
+  }
+
+  @Test
+  public void testToMultimapDoFn() throws Exception {
+    Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
+
+    DoFnTester<KV<Integer, Iterable<KV<IntervalWindow, WindowedValue<KV<Long, Long>>>>>,
+                  IsmRecord<WindowedValue<TransformedMap<Long,
+                                                         Iterable<WindowedValue<Long>>,
+                                                         Iterable<Long>>>>> doFnTester =
+        DoFnTester.of(
+            new BatchViewAsMultimap.ToMultimapDoFn<Long, Long, IntervalWindow>(windowCoder));
+
+
+    IntervalWindow windowA = new IntervalWindow(new Instant(0), new Instant(10));
+    IntervalWindow windowB = new IntervalWindow(new Instant(10), new Instant(20));
+    IntervalWindow windowC = new IntervalWindow(new Instant(20), new Instant(30));
+
+    Iterable<KV<Integer,
+             Iterable<KV<IntervalWindow, WindowedValue<KV<Long, Long>>>>>> inputElements =
+        ImmutableList.of(
+            KV.of(1,
+                (Iterable<KV<IntervalWindow, WindowedValue<KV<Long, Long>>>>) ImmutableList.of(
+                    KV.of(windowA, WindowedValue.of(
+                        KV.of(1L, 11L), new Instant(3), windowA, PaneInfo.NO_FIRING)),
+                    KV.of(windowA, WindowedValue.of(
+                        KV.of(1L, 12L), new Instant(5), windowA, PaneInfo.NO_FIRING)),
+                    KV.of(windowA, WindowedValue.of(
+                        KV.of(2L, 21L), new Instant(7), windowA, PaneInfo.NO_FIRING)),
+                    KV.of(windowB, WindowedValue.of(
+                        KV.of(2L, 21L), new Instant(13), windowB, PaneInfo.NO_FIRING)),
+                    KV.of(windowB, WindowedValue.of(
+                        KV.of(3L, 31L), new Instant(15), windowB, PaneInfo.NO_FIRING)))),
+            KV.of(2,
+                (Iterable<KV<IntervalWindow, WindowedValue<KV<Long, Long>>>>) ImmutableList.of(
+                    KV.of(windowC, WindowedValue.of(
+                        KV.of(4L, 41L), new Instant(25), windowC, PaneInfo.NO_FIRING)))));
+
+    // The order of the output elements is important relative to processing order
+    List<IsmRecord<WindowedValue<TransformedMap<Long,
+                                                Iterable<WindowedValue<Long>>,
+                                                Iterable<Long>>>>> output =
+                                                doFnTester.processBatch(inputElements);
+    assertEquals(3, output.size());
+    Map<Long, Iterable<Long>> outputMap;
+
+    outputMap = output.get(0).getValue().getValue();
+    assertEquals(2, outputMap.size());
+    assertThat(outputMap.get(1L), containsInAnyOrder(11L, 12L));
+    assertThat(outputMap.get(2L), containsInAnyOrder(21L));
+
+    outputMap = output.get(1).getValue().getValue();
+    assertEquals(2, outputMap.size());
+    assertThat(outputMap.get(2L), containsInAnyOrder(21L));
+    assertThat(outputMap.get(3L), containsInAnyOrder(31L));
+
+    outputMap = output.get(2).getValue().getValue();
+    assertEquals(1, outputMap.size());
+    assertThat(outputMap.get(4L), containsInAnyOrder(41L));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 272329f950905..c55d215da8fe7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -21,6 +21,7 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.anyString;
 import static org.mockito.Matchers.argThat;
@@ -53,6 +54,7 @@
 import com.google.cloud.dataflow.sdk.util.GcsUtil;
 import com.google.cloud.dataflow.sdk.util.OutputReference;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.Structs;
 import com.google.cloud.dataflow.sdk.util.TestCredential;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
@@ -60,6 +62,7 @@
 import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterables;
 
@@ -73,6 +76,7 @@
 
 import java.io.IOException;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
@@ -153,29 +157,30 @@ public void testSettingOfSdkPipelineOptions() throws IOException {
     DataflowPipelineOptions options = buildPipelineOptions();
     options.setRunner(DataflowPipelineRunner.class);
 
-    Pipeline p = buildPipeline(options);
+    DataflowPipeline p = buildPipeline(options);
     p.traverseTopologically(new RecordingPipelineVisitor());
     Job job =
         DataflowPipelineTranslator.fromOptions(options)
-            .translate(p, Collections.<DataflowPackage>emptyList())
+            .translate(p, p.getRunner(), Collections.<DataflowPackage>emptyList())
             .getJob();
 
     // Note that the contents of this materialized map may be changed by the act of reading an
     // option, which will cause the default to get materialized whereas it would otherwise be
     // left absent. It is permissible to simply alter this test to reflect current behavior.
-    assertEquals(ImmutableMap.of("options",
-        ImmutableMap.builder()
-          .put("appName", "DataflowPipelineTranslatorTest")
-          .put("project", "some-project")
-          .put("pathValidatorClass", "com.google.cloud.dataflow.sdk.util.DataflowPathValidator")
-          .put("runner", "com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner")
-          .put("jobName", "some-job-name")
-          .put("tempLocation", "gs://somebucket/some/path")
-          .put("stagingLocation", "gs://somebucket/some/path/staging")
-          .put("stableUniqueNames", "WARNING")
-          .put("streaming", false)
-          .put("numberOfWorkerHarnessThreads", 0)
-          .build()),
+    Map<String, Object> settings = new HashMap<>();
+    settings.put("appName", "DataflowPipelineTranslatorTest");
+    settings.put("project", "some-project");
+    settings.put("pathValidatorClass", "com.google.cloud.dataflow.sdk.util.DataflowPathValidator");
+    settings.put("runner", "com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner");
+    settings.put("jobName", "some-job-name");
+    settings.put("tempLocation", "gs://somebucket/some/path");
+    settings.put("stagingLocation", "gs://somebucket/some/path/staging");
+    settings.put("stableUniqueNames", "WARNING");
+    settings.put("streaming", false);
+    settings.put("numberOfWorkerHarnessThreads", 0);
+    settings.put("experiments", null);
+
+    assertEquals(ImmutableMap.of("options", settings),
         job.getEnvironment().getSdkPipelineOptions());
   }
 
@@ -186,11 +191,11 @@ public void testNetworkConfig() throws IOException {
     DataflowPipelineOptions options = buildPipelineOptions();
     options.setNetwork(testNetwork);
 
-    Pipeline p = buildPipeline(options);
+    DataflowPipeline p = buildPipeline(options);
     p.traverseTopologically(new RecordingPipelineVisitor());
     Job job =
         DataflowPipelineTranslator.fromOptions(options)
-            .translate(p, Collections.<DataflowPackage>emptyList())
+            .translate(p, p.getRunner(), Collections.<DataflowPackage>emptyList())
             .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
@@ -202,11 +207,11 @@ public void testNetworkConfig() throws IOException {
   public void testNetworkConfigMissing() throws IOException {
     DataflowPipelineOptions options = buildPipelineOptions();
 
-    Pipeline p = buildPipeline(options);
+    DataflowPipeline p = buildPipeline(options);
     p.traverseTopologically(new RecordingPipelineVisitor());
     Job job =
         DataflowPipelineTranslator.fromOptions(options)
-            .translate(p, Collections.<DataflowPackage>emptyList())
+            .translate(p, p.getRunner(), Collections.<DataflowPackage>emptyList())
             .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
@@ -217,11 +222,11 @@ public void testNetworkConfigMissing() throws IOException {
   public void testScalingAlgorithmMissing() throws IOException {
     DataflowPipelineOptions options = buildPipelineOptions();
 
-    Pipeline p = buildPipeline(options);
+    DataflowPipeline p = buildPipeline(options);
     p.traverseTopologically(new RecordingPipelineVisitor());
     Job job =
         DataflowPipelineTranslator.fromOptions(options)
-            .translate(p, Collections.<DataflowPackage>emptyList())
+            .translate(p, p.getRunner(), Collections.<DataflowPackage>emptyList())
             .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
@@ -252,11 +257,11 @@ public void testScalingAlgorithmNone() throws IOException {
     DataflowPipelineOptions options = buildPipelineOptions();
     options.setAutoscalingAlgorithm(noScaling);
 
-    Pipeline p = buildPipeline(options);
+    DataflowPipeline p = buildPipeline(options);
     p.traverseTopologically(new RecordingPipelineVisitor());
     Job job =
         DataflowPipelineTranslator.fromOptions(options)
-            .translate(p, Collections.<DataflowPackage>emptyList())
+            .translate(p, p.getRunner(), Collections.<DataflowPackage>emptyList())
             .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
@@ -286,11 +291,11 @@ public void testMaxNumWorkersIsPassedWhenNoAlgorithmIsSet() throws IOException {
     options.setMaxNumWorkers(42);
     options.setAutoscalingAlgorithm(noScaling);
 
-    Pipeline p = buildPipeline(options);
+    DataflowPipeline p = buildPipeline(options);
     p.traverseTopologically(new RecordingPipelineVisitor());
     Job job =
         DataflowPipelineTranslator.fromOptions(options)
-            .translate(p, Collections.<DataflowPackage>emptyList())
+            .translate(p, p.getRunner(), Collections.<DataflowPackage>emptyList())
             .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
@@ -319,11 +324,11 @@ public void testZoneConfig() throws IOException {
     DataflowPipelineOptions options = buildPipelineOptions();
     options.setZone(testZone);
 
-    Pipeline p = buildPipeline(options);
+    DataflowPipeline p = buildPipeline(options);
     p.traverseTopologically(new RecordingPipelineVisitor());
     Job job =
         DataflowPipelineTranslator.fromOptions(options)
-            .translate(p, Collections.<DataflowPackage>emptyList())
+            .translate(p, p.getRunner(), Collections.<DataflowPackage>emptyList())
             .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
@@ -338,11 +343,11 @@ public void testWorkerMachineTypeConfig() throws IOException {
     DataflowPipelineOptions options = buildPipelineOptions();
     options.setWorkerMachineType(testMachineType);
 
-    Pipeline p = buildPipeline(options);
+    DataflowPipeline p = buildPipeline(options);
     p.traverseTopologically(new RecordingPipelineVisitor());
     Job job =
         DataflowPipelineTranslator.fromOptions(options)
-            .translate(p, Collections.<DataflowPackage>emptyList())
+            .translate(p, p.getRunner(), Collections.<DataflowPackage>emptyList())
             .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
@@ -358,11 +363,11 @@ public void testDiskSizeGbConfig() throws IOException {
     DataflowPipelineOptions options = buildPipelineOptions();
     options.setDiskSizeGb(diskSizeGb);
 
-    Pipeline p = buildPipeline(options);
+    DataflowPipeline p = buildPipeline(options);
     p.traverseTopologically(new RecordingPipelineVisitor());
     Job job =
         DataflowPipelineTranslator.fromOptions(options)
-            .translate(p, Collections.<DataflowPackage>emptyList())
+            .translate(p, p.getRunner(), Collections.<DataflowPackage>emptyList())
             .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
@@ -387,7 +392,8 @@ public void testPredefinedAddStep() throws Exception {
         .apply(ParDo.of(new NoOpFn()))
         .apply(new EmbeddedTransform(predefinedStep.clone()))
         .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/out"));
-    Job job = translator.translate(pipeline, Collections.<DataflowPackage>emptyList()).getJob();
+    Job job = translator.translate(
+        pipeline, pipeline.getRunner(), Collections.<DataflowPackage>emptyList()).getJob();
 
     List<Step> steps = job.getSteps();
     assertEquals(4, steps.size());
@@ -435,7 +441,8 @@ private static Step createPredefinedStep() throws Exception {
     pipeline.apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/in"))
         .apply(ParDo.of(new NoOpFn()).named(stepName))
         .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/out"));
-    Job job = translator.translate(pipeline, Collections.<DataflowPackage>emptyList()).getJob();
+    Job job = translator.translate(
+        pipeline, pipeline.getRunner(), Collections.<DataflowPackage>emptyList()).getJob();
 
     assertEquals(3, job.getSteps().size());
     Step step = job.getSteps().get(1);
@@ -555,7 +562,7 @@ public PCollectionTuple apply(PCollection<Integer> input) {
 
   @Test
   public void testMultiGraphPipelineSerialization() throws IOException {
-    Pipeline p = DataflowPipeline.create(buildPipelineOptions());
+    DataflowPipeline p = DataflowPipeline.create(buildPipelineOptions());
 
     PCollection<Integer> input = p.begin()
         .apply(Create.of(1, 2, 3));
@@ -567,7 +574,7 @@ public void testMultiGraphPipelineSerialization() throws IOException {
         PipelineOptionsFactory.as(DataflowPipelineOptions.class));
 
     // Check that translation doesn't fail.
-    t.translate(p, Collections.<DataflowPackage>emptyList());
+    t.translate(p, p.getRunner(), Collections.<DataflowPackage>emptyList());
   }
 
   @Test
@@ -589,7 +596,7 @@ public void testPartiallyBoundFailure() throws IOException {
   @Test
   public void testGoodWildcards() throws Exception {
     DataflowPipelineOptions options = buildPipelineOptions();
-    Pipeline pipeline = DataflowPipeline.create(options);
+    DataflowPipeline pipeline = DataflowPipeline.create(options);
     DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(options);
 
     applyRead(pipeline, "gs://bucket/foo");
@@ -608,7 +615,7 @@ public void testGoodWildcards() throws Exception {
     applyRead(pipeline, "gs://bucket/foo[0-9]/baz");
 
     // Check that translation doesn't fail.
-    t.translate(pipeline, Collections.<DataflowPackage>emptyList());
+    t.translate(pipeline, pipeline.getRunner(), Collections.<DataflowPackage>emptyList());
   }
 
   private void applyRead(Pipeline pipeline, String path) {
@@ -622,7 +629,7 @@ private void applyRead(Pipeline pipeline, String path) {
   @Test
   public void testBadWildcardRecursive() throws Exception {
     DataflowPipelineOptions options = buildPipelineOptions();
-    Pipeline pipeline = DataflowPipeline.create(options);
+    DataflowPipeline pipeline = DataflowPipeline.create(options);
     DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(options);
 
     pipeline.apply(TextIO.Read.from("gs://bucket/foo**/baz"));
@@ -630,7 +637,7 @@ public void testBadWildcardRecursive() throws Exception {
     // Check that translation does fail.
     thrown.expect(IllegalArgumentException.class);
     thrown.expectMessage("Unsupported wildcard usage");
-    t.translate(pipeline, Collections.<DataflowPackage>emptyList());
+    t.translate(pipeline, pipeline.getRunner(), Collections.<DataflowPackage>emptyList());
   }
 
   @Test
@@ -645,7 +652,8 @@ public void testToSingletonTranslation() throws Exception {
     DataflowPipeline pipeline = DataflowPipeline.create(options);
     pipeline.apply(Create.of(1))
         .apply(View.<Integer>asSingleton());
-    Job job = translator.translate(pipeline, Collections.<DataflowPackage>emptyList()).getJob();
+    Job job = translator.translate(
+        pipeline, pipeline.getRunner(), Collections.<DataflowPackage>emptyList()).getJob();
 
     List<Step> steps = job.getSteps();
     assertEquals(2, steps.size());
@@ -670,7 +678,8 @@ public void testToIterableTranslation() throws Exception {
     DataflowPipeline pipeline = DataflowPipeline.create(options);
     pipeline.apply(Create.of(1, 2, 3))
         .apply(View.<Integer>asIterable());
-    Job job = translator.translate(pipeline, Collections.<DataflowPackage>emptyList()).getJob();
+    Job job = translator.translate(
+        pipeline, pipeline.getRunner(), Collections.<DataflowPackage>emptyList()).getJob();
 
     List<Step> steps = job.getSteps();
     assertEquals(2, steps.size());
@@ -681,4 +690,63 @@ public void testToIterableTranslation() throws Exception {
     Step collectionToSingletonStep = steps.get(1);
     assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind());
   }
+
+  @Test
+  public void testToSingletonTranslationWithIsmSideInput() throws Exception {
+    // A "change detector" test that makes sure the translation
+    // of getting a PCollectionView<T> does not change
+    // in bad ways during refactor
+
+    DataflowPipelineOptions options = buildPipelineOptions();
+    options.setExperiments(ImmutableList.of("enable_ism_side_input"));
+    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
+
+    DataflowPipeline pipeline = DataflowPipeline.create(options);
+    pipeline.apply(Create.of(1))
+        .apply(View.<Integer>asSingleton());
+    Job job = translator.translate(
+        pipeline, pipeline.getRunner(), Collections.<DataflowPackage>emptyList()).getJob();
+
+    List<Step> steps = job.getSteps();
+    assertEquals(5, steps.size());
+
+    @SuppressWarnings("unchecked")
+    List<Map<String, Object>> toIsmRecordOutputs =
+        (List<Map<String, Object>>) steps.get(3).getProperties().get(PropertyNames.OUTPUT_INFO);
+    assertTrue(
+        Structs.getBoolean(Iterables.getOnlyElement(toIsmRecordOutputs), "use_indexed_format"));
+
+    Step collectionToSingletonStep = steps.get(4);
+    assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind());
+  }
+
+  @Test
+  public void testToIterableTranslationWithIsmSideInput() throws Exception {
+    // A "change detector" test that makes sure the translation
+    // of getting a PCollectionView<Iterable<T>> does not change
+    // in bad ways during refactor
+
+    DataflowPipelineOptions options = buildPipelineOptions();
+    options.setExperiments(ImmutableList.of("enable_ism_side_input"));
+    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
+
+    DataflowPipeline pipeline = DataflowPipeline.create(options);
+    pipeline.apply(Create.of(1, 2, 3))
+        .apply(View.<Integer>asIterable());
+    Job job = translator.translate(
+        pipeline, pipeline.getRunner(), Collections.<DataflowPackage>emptyList()).getJob();
+
+    List<Step> steps = job.getSteps();
+    assertEquals(3, steps.size());
+
+    @SuppressWarnings("unchecked")
+    List<Map<String, Object>> toIsmRecordOutputs =
+        (List<Map<String, Object>>) steps.get(1).getProperties().get(PropertyNames.OUTPUT_INFO);
+    assertTrue(
+        Structs.getBoolean(Iterables.getOnlyElement(toIsmRecordOutputs), "use_indexed_format"));
+
+
+    Step collectionToSingletonStep = steps.get(2);
+    assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind());
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
index 569c4523f639c..fe350aad60be4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
@@ -42,6 +42,7 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipeline;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
@@ -53,7 +54,9 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
+import com.google.cloud.dataflow.sdk.util.NoopPathValidator;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.TestCredential;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.Preconditions;
 
@@ -406,11 +409,17 @@ public void testFailureToStartReadingIncludesSourceDetails() throws Exception {
 
   private static Source translateIOToCloudSource(
       BoundedSource<?> io, DataflowPipelineOptions options) throws Exception {
+    options.setProject("test-project");
+    options.setTempLocation("gs://test-tmp");
+    options.setPathValidatorClass(NoopPathValidator.class);
+    options.setGcpCredential(new TestCredential());
+
     DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
-    Pipeline p = Pipeline.create(options);
+    DataflowPipeline p = DataflowPipeline.create(options);
     p.begin().apply(Read.from(io));
 
-    Job workflow = translator.translate(p, new ArrayList<DataflowPackage>()).getJob();
+    Job workflow = translator.translate(
+        p, p.getRunner(), new ArrayList<DataflowPackage>()).getJob();
     Step step = workflow.getSteps().get(0);
 
     return stepToCloudSource(step);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index 555eb8b6e76b7..aa45050e5034b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -24,6 +24,9 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.Pipeline.PipelineExecutionException;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.CustomCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.NullableCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
@@ -62,12 +65,16 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.NoSuchElementException;
 
 /**
@@ -88,44 +95,69 @@ public class ViewTest implements Serializable {
   public void testSingletonSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<Integer> view = pipeline
-        .apply("Create47", Create.of(47))
-        .apply(View.<Integer>asSingleton());
+    final PCollectionView<Integer> view =
+        pipeline.apply("Create47", Create.of(47)).apply(View.<Integer>asSingleton());
 
-    PCollection<Integer> output = pipeline
-        .apply("Create123", Create.of(1, 2, 3))
-        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
-            new DoFn<Integer, Integer>() {
+    PCollection<Integer> output =
+        pipeline.apply("Create123", Create.of(1, 2, 3))
+            .apply("OutputSideInputs", ParDo.withSideInputs(view).of(new DoFn<Integer, Integer>() {
               @Override
               public void processElement(ProcessContext c) {
                 c.output(c.sideInput(view));
               }
             }));
 
-    DataflowAssert.that(output)
-        .containsInAnyOrder(47, 47, 47);
+    DataflowAssert.that(output).containsInAnyOrder(47, 47, 47);
 
     pipeline.run();
   }
 
   @Test
-  public void testEmptySingletonSideInput() throws Exception {
+  @Category(RunnableOnService.class)
+  public void testWindowedSingletonSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<Integer> view = pipeline
-        .apply("CreateEmptyIntegers", Create.<Integer>of().withCoder(VarIntCoder.of()))
-        .apply(View.<Integer>asSingleton());
-
-    pipeline
-        .apply("Create123", Create.of(1, 2, 3))
-        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
-            new DoFn<Integer, Integer>() {
+    final PCollectionView<Integer> view =
+        pipeline.apply("Create47", Create.timestamped(
+                                       TimestampedValue.of(47, new Instant(1)),
+                                       TimestampedValue.of(48, new Instant(11))))
+            .apply("SideWindowInto", Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
+            .apply(View.<Integer>asSingleton());
+
+    PCollection<Integer> output =
+        pipeline.apply("Create123", Create.timestamped(
+                                        TimestampedValue.of(1, new Instant(4)),
+                                        TimestampedValue.of(2, new Instant(8)),
+                                        TimestampedValue.of(3, new Instant(12))))
+            .apply("MainWindowInto", Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
+            .apply("OutputSideInputs", ParDo.withSideInputs(view).of(new DoFn<Integer, Integer>() {
               @Override
               public void processElement(ProcessContext c) {
                 c.output(c.sideInput(view));
               }
             }));
 
+    DataflowAssert.that(output).containsInAnyOrder(47, 47, 48);
+
+    pipeline.run();
+  }
+
+  @Test
+  public void testEmptySingletonSideInput() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Integer> view =
+        pipeline.apply("CreateEmptyIntegers", Create.<Integer>of().withCoder(VarIntCoder.of()))
+            .apply(View.<Integer>asSingleton());
+
+    pipeline.apply("Create123", Create.of(1, 2, 3))
+        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(new DoFn<Integer, Integer>() {
+          @Override
+          public void processElement(ProcessContext c) {
+            c.output(c.sideInput(view));
+          }
+        }));
+
     thrown.expect(PipelineExecutionException.class);
     thrown.expectCause(isA(NoSuchElementException.class));
     thrown.expectMessage("Empty");
@@ -140,17 +172,15 @@ public void testNonSingletonSideInput() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
     PCollection<Integer> oneTwoThree = pipeline.apply(Create.<Integer>of(1, 2, 3));
-    final PCollectionView<Integer> view = oneTwoThree
-        .apply(View.<Integer>asSingleton());
+    final PCollectionView<Integer> view = oneTwoThree.apply(View.<Integer>asSingleton());
 
-    oneTwoThree
-        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
-            new DoFn<Integer, Integer>() {
-              @Override
-              public void processElement(ProcessContext c) {
-                c.output(c.sideInput(view));
-              }
-            }));
+    oneTwoThree.apply(
+        "OutputSideInputs", ParDo.withSideInputs(view).of(new DoFn<Integer, Integer>() {
+          @Override
+          public void processElement(ProcessContext c) {
+            c.output(c.sideInput(view));
+          }
+        }));
 
     thrown.expect(PipelineExecutionException.class);
     thrown.expectCause(isA(IllegalArgumentException.class));
@@ -166,14 +196,12 @@ public void processElement(ProcessContext c) {
   public void testListSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<List<Integer>> view = pipeline
-        .apply("CreateSideInput", Create.of(11, 13, 17, 23))
-        .apply(View.<Integer>asList());
+    final PCollectionView<List<Integer>> view =
+        pipeline.apply("CreateSideInput", Create.of(11, 13, 17, 23)).apply(View.<Integer>asList());
 
-    PCollection<Integer> output = pipeline
-        .apply("CreateMainInput", Create.of(29, 31))
-        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
-            new DoFn<Integer, Integer>() {
+    PCollection<Integer> output =
+        pipeline.apply("CreateMainInput", Create.of(29, 31))
+            .apply("OutputSideInputs", ParDo.withSideInputs(view).of(new DoFn<Integer, Integer>() {
               @Override
               public void processElement(ProcessContext c) {
                 Preconditions.checkArgument(c.sideInput(view).size() == 4);
@@ -184,9 +212,46 @@ public void processElement(ProcessContext c) {
               }
             }));
 
-    DataflowAssert.that(output).containsInAnyOrder(
-        11, 13, 17, 23,
-        11, 13, 17, 23);
+    DataflowAssert.that(output).containsInAnyOrder(11, 13, 17, 23, 11, 13, 17, 23);
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testWindowedListSideInput() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<List<Integer>> view =
+        pipeline.apply("CreateSideInput", Create.timestamped(
+                                              TimestampedValue.of(11, new Instant(1)),
+                                              TimestampedValue.of(13, new Instant(1)),
+                                              TimestampedValue.of(17, new Instant(1)),
+                                              TimestampedValue.of(23, new Instant(1)),
+                                              TimestampedValue.of(31, new Instant(11)),
+                                              TimestampedValue.of(33, new Instant(11)),
+                                              TimestampedValue.of(37, new Instant(11)),
+                                              TimestampedValue.of(43, new Instant(11))))
+            .apply("SideWindowInto", Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
+            .apply(View.<Integer>asList());
+
+    PCollection<Integer> output =
+        pipeline.apply("CreateMainInput", Create.timestamped(
+                                              TimestampedValue.of(29, new Instant(1)),
+                                              TimestampedValue.of(35, new Instant(11))))
+            .apply("MainWindowInto", Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
+            .apply("OutputSideInputs", ParDo.withSideInputs(view).of(new DoFn<Integer, Integer>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                Preconditions.checkArgument(c.sideInput(view).size() == 4);
+                Preconditions.checkArgument(c.sideInput(view).get(0) == c.sideInput(view).get(0));
+                for (Integer i : c.sideInput(view)) {
+                  c.output(i);
+                }
+              }
+            }));
+
+    DataflowAssert.that(output).containsInAnyOrder(11, 13, 17, 23, 31, 33, 37, 43);
 
     pipeline.run();
   }
@@ -196,17 +261,17 @@ public void processElement(ProcessContext c) {
   public void testEmptyListSideInput() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<List<Integer>> view = pipeline
-        .apply("CreateEmptyView", Create.<Integer>of().withCoder(VarIntCoder.of()))
-        .apply(View.<Integer>asList());
+    final PCollectionView<List<Integer>> view =
+        pipeline.apply("CreateEmptyView", Create.<Integer>of().withCoder(VarIntCoder.of()))
+            .apply(View.<Integer>asList());
 
-    PCollection<Integer> results = pipeline
-        .apply("Create1", Create.of(1))
-        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
-            new DoFn<Integer, Integer>() {
+    PCollection<Integer> results =
+        pipeline.apply("Create1", Create.of(1))
+            .apply("OutputSideInputs", ParDo.withSideInputs(view).of(new DoFn<Integer, Integer>() {
               @Override
               public void processElement(ProcessContext c) {
                 assertTrue(c.sideInput(view).isEmpty());
+                assertFalse(c.sideInput(view).iterator().hasNext());
                 c.output(1);
               }
             }));
@@ -222,14 +287,12 @@ public void processElement(ProcessContext c) {
   public void testListSideInputIsImmutable() {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<List<Integer>> view = pipeline
-        .apply("CreateSideInput", Create.of(11))
-        .apply(View.<Integer>asList());
+    final PCollectionView<List<Integer>> view =
+        pipeline.apply("CreateSideInput", Create.of(11)).apply(View.<Integer>asList());
 
-    PCollection<Integer> output = pipeline
-        .apply("CreateMainInput", Create.of(29))
-        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
-            new DoFn<Integer, Integer>() {
+    PCollection<Integer> output =
+        pipeline.apply("CreateMainInput", Create.of(29))
+            .apply("OutputSideInputs", ParDo.withSideInputs(view).of(new DoFn<Integer, Integer>() {
               @Override
               public void processElement(ProcessContext c) {
                 try {
@@ -269,14 +332,13 @@ public void processElement(ProcessContext c) {
   public void testIterableSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<Iterable<Integer>> view = pipeline
-        .apply("CreateSideInput", Create.of(11, 13, 17, 23))
-        .apply(View.<Integer>asIterable());
+    final PCollectionView<Iterable<Integer>> view =
+        pipeline.apply("CreateSideInput", Create.of(11, 13, 17, 23))
+            .apply(View.<Integer>asIterable());
 
-    PCollection<Integer> output = pipeline
-        .apply("CreateMainInput", Create.of(29, 31))
-        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
-            new DoFn<Integer, Integer>() {
+    PCollection<Integer> output =
+        pipeline.apply("CreateMainInput", Create.of(29, 31))
+            .apply("OutputSideInputs", ParDo.withSideInputs(view).of(new DoFn<Integer, Integer>() {
               @Override
               public void processElement(ProcessContext c) {
                 for (Integer i : c.sideInput(view)) {
@@ -285,9 +347,44 @@ public void processElement(ProcessContext c) {
               }
             }));
 
-    DataflowAssert.that(output).containsInAnyOrder(
-        11, 13, 17, 23,
-        11, 13, 17, 23);
+    DataflowAssert.that(output).containsInAnyOrder(11, 13, 17, 23, 11, 13, 17, 23);
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testWindowedIterableSideInput() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Iterable<Integer>> view =
+        pipeline.apply("CreateSideInput", Create.timestamped(
+                                              TimestampedValue.of(11, new Instant(1)),
+                                              TimestampedValue.of(13, new Instant(1)),
+                                              TimestampedValue.of(17, new Instant(1)),
+                                              TimestampedValue.of(23, new Instant(1)),
+                                              TimestampedValue.of(31, new Instant(11)),
+                                              TimestampedValue.of(33, new Instant(11)),
+                                              TimestampedValue.of(37, new Instant(11)),
+                                              TimestampedValue.of(43, new Instant(11))))
+            .apply("SideWindowInto", Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
+            .apply(View.<Integer>asIterable());
+
+    PCollection<Integer> output =
+        pipeline.apply("CreateMainInput", Create.timestamped(
+                                              TimestampedValue.of(29, new Instant(1)),
+                                              TimestampedValue.of(35, new Instant(11))))
+            .apply("MainWindowInto", Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
+            .apply("OutputSideInputs", ParDo.withSideInputs(view).of(new DoFn<Integer, Integer>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                for (Integer i : c.sideInput(view)) {
+                  c.output(i);
+                }
+              }
+            }));
+
+    DataflowAssert.that(output).containsInAnyOrder(11, 13, 17, 23, 31, 33, 37, 43);
 
     pipeline.run();
   }
@@ -297,14 +394,13 @@ public void processElement(ProcessContext c) {
   public void testEmptyIterableSideInput() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<Iterable<Integer>> view = pipeline
-        .apply("CreateEmptyView", Create.<Integer>of().withCoder(VarIntCoder.of()))
-        .apply(View.<Integer>asIterable());
+    final PCollectionView<Iterable<Integer>> view =
+        pipeline.apply("CreateEmptyView", Create.<Integer>of().withCoder(VarIntCoder.of()))
+            .apply(View.<Integer>asIterable());
 
-    PCollection<Integer> results = pipeline
-        .apply("Create1", Create.of(1))
-        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
-            new DoFn<Integer, Integer>() {
+    PCollection<Integer> results =
+        pipeline.apply("Create1", Create.of(1))
+            .apply("OutputSideInputs", ParDo.withSideInputs(view).of(new DoFn<Integer, Integer>() {
               @Override
               public void processElement(ProcessContext c) {
                 assertFalse(c.sideInput(view).iterator().hasNext());
@@ -323,14 +419,12 @@ public void processElement(ProcessContext c) {
   public void testIterableSideInputIsImmutable() {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<Iterable<Integer>> view = pipeline
-        .apply("CreateSideInput", Create.of(11))
-        .apply(View.<Integer>asIterable());
+    final PCollectionView<Iterable<Integer>> view =
+        pipeline.apply("CreateSideInput", Create.of(11)).apply(View.<Integer>asIterable());
 
-    PCollection<Integer> output = pipeline
-        .apply("CreateMainInput", Create.of(29))
-        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
-            new DoFn<Integer, Integer>() {
+    PCollection<Integer> output =
+        pipeline.apply("CreateMainInput", Create.of(29))
+            .apply("OutputSideInputs", ParDo.withSideInputs(view).of(new DoFn<Integer, Integer>() {
               @Override
               public void processElement(ProcessContext c) {
                 Iterator<Integer> iterator = c.sideInput(view).iterator();
@@ -356,25 +450,229 @@ public void processElement(ProcessContext c) {
   public void testMultimapSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<Map<String, Iterable<Integer>>> view = pipeline
-        .apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("a", 2), KV.of("b", 3)))
-        .apply(View.<String, Integer>asMultimap());
+    final PCollectionView<Map<String, Iterable<Integer>>> view =
+        pipeline.apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("a", 2), KV.of("b", 3)))
+            .apply(View.<String, Integer>asMultimap());
 
-    PCollection<KV<String, Integer>> output = pipeline
-        .apply("CreateMainInput", Create.of("apple", "banana", "blackberry"))
-        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
-            new DoFn<String, KV<String, Integer>>() {
-              @Override
-              public void processElement(ProcessContext c) {
-                for (Integer v : c.sideInput(view).get(c.element().substring(0, 1))) {
-                  c.output(KV.of(c.element(), v));
-                }
-              }
-            }));
+    PCollection<KV<String, Integer>> output =
+        pipeline.apply("CreateMainInput", Create.of("apple", "banana", "blackberry"))
+            .apply(
+                "OutputSideInputs",
+                ParDo.withSideInputs(view).of(new DoFn<String, KV<String, Integer>>() {
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    for (Integer v : c.sideInput(view).get(c.element().substring(0, 1))) {
+                      c.output(KV.of(c.element(), v));
+                    }
+                  }
+                }));
 
-    DataflowAssert.that(output)
-        .containsInAnyOrder(KV.of("apple", 1), KV.of("apple", 2),
-                            KV.of("banana", 3), KV.of("blackberry", 3));
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("apple", 1), KV.of("apple", 2), KV.of("banana", 3), KV.of("blackberry", 3));
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testMultimapAsEntrySetSideInput() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Map<String, Iterable<Integer>>> view =
+        pipeline.apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("a", 2), KV.of("b", 3)))
+            .apply(View.<String, Integer>asMultimap());
+
+    PCollection<KV<String, Integer>> output =
+        pipeline.apply("CreateMainInput", Create.of(2 /* size */))
+            .apply(
+                "OutputSideInputs",
+                ParDo.withSideInputs(view).of(new DoFn<Integer, KV<String, Integer>>() {
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    assertEquals((int) c.element(), c.sideInput(view).size());
+                    assertEquals((int) c.element(), c.sideInput(view).entrySet().size());
+                    for (Entry<String, Iterable<Integer>> entry : c.sideInput(view).entrySet()) {
+                      for (Integer value : entry.getValue()) {
+                        c.output(KV.of(entry.getKey(), value));
+                      }
+                    }
+                  }
+                }));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("a", 1), KV.of("a", 2), KV.of("b", 3));
+
+    pipeline.run();
+  }
+
+  private static class NonDeterministicStringCoder extends CustomCoder<String> {
+    @Override
+    public void encode(String value, OutputStream outStream, Coder.Context context)
+        throws CoderException, IOException {
+      StringUtf8Coder.of().encode(value, outStream, context);
+    }
+
+    @Override
+    public String decode(InputStream inStream, Coder.Context context)
+        throws CoderException, IOException {
+      return StringUtf8Coder.of().decode(inStream, context);
+    }
+
+    @Override
+    public void verifyDeterministic()
+        throws com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException {
+      throw new NonDeterministicException(this, "Test coder is not deterministic on purpose.");
+    }
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testMultimapSideInputWithNonDeterministicKeyCoder() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Map<String, Iterable<Integer>>> view =
+        pipeline.apply("CreateSideInput",
+                Create.of(KV.of("a", 1), KV.of("a", 2), KV.of("b", 3))
+                    .withCoder(KvCoder.of(new NonDeterministicStringCoder(), VarIntCoder.of())))
+            .apply(View.<String, Integer>asMultimap());
+
+    PCollection<KV<String, Integer>> output =
+        pipeline.apply("CreateMainInput", Create.of("apple", "banana", "blackberry"))
+            .apply(
+                "OutputSideInputs",
+                ParDo.withSideInputs(view).of(new DoFn<String, KV<String, Integer>>() {
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    for (Integer v : c.sideInput(view).get(c.element().substring(0, 1))) {
+                      c.output(KV.of(c.element(), v));
+                    }
+                  }
+                }));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("apple", 1), KV.of("apple", 2), KV.of("banana", 3), KV.of("blackberry", 3));
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testWindowedMultimapSideInput() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Map<String, Iterable<Integer>>> view =
+        pipeline.apply("CreateSideInput", Create.timestamped(
+                                              TimestampedValue.of(KV.of("a", 1), new Instant(1)),
+                                              TimestampedValue.of(KV.of("a", 2), new Instant(7)),
+                                              TimestampedValue.of(KV.of("b", 3), new Instant(14))))
+            .apply(
+                "SideWindowInto",
+                Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10))))
+            .apply(View.<String, Integer>asMultimap());
+
+    PCollection<KV<String, Integer>> output =
+        pipeline.apply("CreateMainInput", Create.timestamped(
+                                              TimestampedValue.of("apple", new Instant(5)),
+                                              TimestampedValue.of("banana", new Instant(13)),
+                                              TimestampedValue.of("blackberry", new Instant(16))))
+            .apply("MainWindowInto", Window.<String>into(FixedWindows.of(Duration.millis(10))))
+            .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
+                                           new DoFn<String, KV<String, Integer>>() {
+                                             @Override
+                                             public void processElement(ProcessContext c) {
+                                               for (Integer v :
+                                                   c.sideInput(view)
+                                                       .get(c.element().substring(0, 1))) {
+                                                 c.output(KV.of(c.element(), v));
+                                               }
+                                             }
+                                           }));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("apple", 1), KV.of("apple", 2), KV.of("banana", 3), KV.of("blackberry", 3));
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testWindowedMultimapAsEntrySetSideInput() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Map<String, Iterable<Integer>>> view =
+        pipeline.apply("CreateSideInput", Create.timestamped(
+                                              TimestampedValue.of(KV.of("a", 1), new Instant(1)),
+                                              TimestampedValue.of(KV.of("a", 2), new Instant(7)),
+                                              TimestampedValue.of(KV.of("b", 3), new Instant(14))))
+            .apply(
+                "SideWindowInto",
+                Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10))))
+            .apply(View.<String, Integer>asMultimap());
+
+    PCollection<KV<String, Integer>> output =
+        pipeline.apply("CreateMainInput", Create.timestamped(
+                                              TimestampedValue.of(1 /* size */, new Instant(5)),
+                                              TimestampedValue.of(1 /* size */, new Instant(16))))
+            .apply("MainWindowInto", Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
+            .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
+                                           new DoFn<Integer, KV<String, Integer>>() {
+                                             @Override
+                                             public void processElement(ProcessContext c) {
+                                               assertEquals((int) c.element(),
+                                                   c.sideInput(view).size());
+                                               assertEquals((int) c.element(),
+                                                   c.sideInput(view).entrySet().size());
+                                               for (Entry<String, Iterable<Integer>> entry
+                                                   : c.sideInput(view).entrySet()) {
+                                                 for (Integer value : entry.getValue()) {
+                                                   c.output(KV.of(entry.getKey(), value));
+                                                 }
+                                               }
+                                             }
+                                           }));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("a", 1), KV.of("a", 2), KV.of("b", 3));
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testWindowedMultimapSideInputWithNonDeterministicKeyCoder() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Map<String, Iterable<Integer>>> view =
+        pipeline.apply("CreateSideInput",
+                Create.timestamped(
+                    TimestampedValue.of(KV.of("a", 1), new Instant(1)),
+                    TimestampedValue.of(KV.of("a", 2), new Instant(7)),
+                    TimestampedValue.of(KV.of("b", 3), new Instant(14)))
+                    .withCoder(KvCoder.of(new NonDeterministicStringCoder(), VarIntCoder.of())))
+            .apply("SideWindowInto",
+                Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10))))
+            .apply(View.<String, Integer>asMultimap());
+
+    PCollection<KV<String, Integer>> output =
+        pipeline.apply("CreateMainInput", Create.timestamped(
+                                              TimestampedValue.of("apple", new Instant(5)),
+                                              TimestampedValue.of("banana", new Instant(13)),
+                                              TimestampedValue.of("blackberry", new Instant(16))))
+            .apply("MainWindowInto", Window.<String>into(FixedWindows.of(Duration.millis(10))))
+            .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
+                                           new DoFn<String, KV<String, Integer>>() {
+                                             @Override
+                                             public void processElement(ProcessContext c) {
+                                               for (Integer v :
+                                                   c.sideInput(view)
+                                                       .get(c.element().substring(0, 1))) {
+                                                 c.output(KV.of(c.element(), v));
+                                               }
+                                             }
+                                           }));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("apple", 1), KV.of("apple", 2), KV.of("banana", 3), KV.of("blackberry", 3));
 
     pipeline.run();
   }
@@ -384,18 +682,19 @@ public void processElement(ProcessContext c) {
   public void testEmptyMultimapSideInput() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<Map<Integer, Iterable<Integer>>> view = pipeline
-        .apply("CreateEmptyView", Create.<KV<Integer, Integer>>of()
-            .withCoder(KvCoder.of(VarIntCoder.of(), VarIntCoder.of())))
-        .apply(View.<Integer, Integer>asMultimap());
+    final PCollectionView<Map<String, Iterable<Integer>>> view =
+        pipeline.apply("CreateEmptyView", Create.<KV<String, Integer>>of().withCoder(
+                                              KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())))
+            .apply(View.<String, Integer>asMultimap());
 
-    PCollection<Integer> results = pipeline
-        .apply("Create1", Create.of(1))
-        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
-            new DoFn<Integer, Integer>() {
+    PCollection<Integer> results =
+        pipeline.apply("Create1", Create.of(1))
+            .apply("OutputSideInputs", ParDo.withSideInputs(view).of(new DoFn<Integer, Integer>() {
               @Override
               public void processElement(ProcessContext c) {
                 assertTrue(c.sideInput(view).isEmpty());
+                assertTrue(c.sideInput(view).entrySet().isEmpty());
+                assertFalse(c.sideInput(view).entrySet().iterator().hasNext());
                 c.output(c.element());
               }
             }));
@@ -408,45 +707,75 @@ public void processElement(ProcessContext c) {
 
   @Test
   @Category(RunnableOnService.class)
-  public void testMultimapSideInputIsImmutable() {
+  public void testEmptyMultimapSideInputWithNonDeterministicKeyCoder() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<Map<String, Iterable<Integer>>> view = pipeline
-        .apply("CreateSideInput", Create.of(KV.of("a", 1)))
-        .apply(View.<String, Integer>asMultimap());
+    final PCollectionView<Map<String, Iterable<Integer>>> view =
+        pipeline.apply("CreateEmptyView",
+                Create.<KV<String, Integer>>of().withCoder(
+                    KvCoder.of(new NonDeterministicStringCoder(), VarIntCoder.of())))
+            .apply(View.<String, Integer>asMultimap());
 
-    PCollection<KV<String, Integer>> output = pipeline
-        .apply("CreateMainInput", Create.of("apple"))
-        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
-            new DoFn<String, KV<String, Integer>>() {
+    PCollection<Integer> results =
+        pipeline.apply("Create1", Create.of(1))
+            .apply("OutputSideInputs", ParDo.withSideInputs(view).of(new DoFn<Integer, Integer>() {
               @Override
               public void processElement(ProcessContext c) {
-                try {
-                  c.sideInput(view).clear();
-                  fail("Expected UnsupportedOperationException on clear()");
-                } catch (UnsupportedOperationException expected) {
-                }
-                try {
-                  c.sideInput(view).put("c", ImmutableList.of(3));
-                  fail("Expected UnsupportedOperationException on put()");
-                } catch (UnsupportedOperationException expected) {
-                }
-                try {
-                  c.sideInput(view).remove("c");
-                  fail("Expected UnsupportedOperationException on remove()");
-                } catch (UnsupportedOperationException expected) {
-                }
-                try {
-                  c.sideInput(view).putAll(new HashMap<String, Iterable<Integer>>());
-                  fail("Expected UnsupportedOperationException on putAll()");
-                } catch (UnsupportedOperationException expected) {
-                }
-                for (Integer v : c.sideInput(view).get(c.element().substring(0, 1))) {
-                  c.output(KV.of(c.element(), v));
-                }
+                assertTrue(c.sideInput(view).isEmpty());
+                assertTrue(c.sideInput(view).entrySet().isEmpty());
+                assertFalse(c.sideInput(view).entrySet().iterator().hasNext());
+                c.output(c.element());
               }
             }));
 
+    // Pass at least one value through to guarantee that DoFn executes.
+    DataflowAssert.that(results).containsInAnyOrder(1);
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testMultimapSideInputIsImmutable() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Map<String, Iterable<Integer>>> view =
+        pipeline.apply("CreateSideInput", Create.of(KV.of("a", 1)))
+            .apply(View.<String, Integer>asMultimap());
+
+    PCollection<KV<String, Integer>> output =
+        pipeline.apply("CreateMainInput", Create.of("apple"))
+            .apply(
+                "OutputSideInputs",
+                ParDo.withSideInputs(view).of(new DoFn<String, KV<String, Integer>>() {
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    try {
+                      c.sideInput(view).clear();
+                      fail("Expected UnsupportedOperationException on clear()");
+                    } catch (UnsupportedOperationException expected) {
+                    }
+                    try {
+                      c.sideInput(view).put("c", ImmutableList.of(3));
+                      fail("Expected UnsupportedOperationException on put()");
+                    } catch (UnsupportedOperationException expected) {
+                    }
+                    try {
+                      c.sideInput(view).remove("c");
+                      fail("Expected UnsupportedOperationException on remove()");
+                    } catch (UnsupportedOperationException expected) {
+                    }
+                    try {
+                      c.sideInput(view).putAll(new HashMap<String, Iterable<Integer>>());
+                      fail("Expected UnsupportedOperationException on putAll()");
+                    } catch (UnsupportedOperationException expected) {
+                    }
+                    for (Integer v : c.sideInput(view).get(c.element().substring(0, 1))) {
+                      c.output(KV.of(c.element(), v));
+                    }
+                  }
+                }));
+
     // Pass at least one value through to guarantee that DoFn executes.
     DataflowAssert.that(output).containsInAnyOrder(KV.of("apple", 1));
 
@@ -458,23 +787,204 @@ public void processElement(ProcessContext c) {
   public void testMapSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<Map<String, Integer>> view = pipeline
-        .apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("b", 3)))
-        .apply(View.<String, Integer>asMap());
+    final PCollectionView<Map<String, Integer>> view =
+        pipeline.apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("b", 3)))
+            .apply(View.<String, Integer>asMap());
 
-    PCollection<KV<String, Integer>> output = pipeline
-        .apply("CreateMainInput", Create.of("apple", "banana", "blackberry"))
-        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
-            new DoFn<String, KV<String, Integer>>() {
-              @Override
-              public void processElement(ProcessContext c) {
-                c.output(KV.of(c.element(), c.sideInput(view).get(c.element().substring(0, 1))));
-              }
-            }));
+    PCollection<KV<String, Integer>> output =
+        pipeline.apply("CreateMainInput", Create.of("apple", "banana", "blackberry"))
+            .apply(
+                "OutputSideInputs",
+                ParDo.withSideInputs(view).of(new DoFn<String, KV<String, Integer>>() {
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    c.output(
+                        KV.of(c.element(), c.sideInput(view).get(c.element().substring(0, 1))));
+                  }
+                }));
 
-    DataflowAssert.that(output)
-        .containsInAnyOrder(KV.of("apple", 1),
-                            KV.of("banana", 3), KV.of("blackberry", 3));
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("apple", 1), KV.of("banana", 3), KV.of("blackberry", 3));
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testMapAsEntrySetSideInput() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Map<String, Integer>> view =
+        pipeline.apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("b", 3)))
+            .apply(View.<String, Integer>asMap());
+
+    PCollection<KV<String, Integer>> output =
+        pipeline.apply("CreateMainInput", Create.of(2 /* size */))
+            .apply(
+                "OutputSideInputs",
+                ParDo.withSideInputs(view).of(new DoFn<Integer, KV<String, Integer>>() {
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    assertEquals((int) c.element(), c.sideInput(view).size());
+                    assertEquals((int) c.element(), c.sideInput(view).entrySet().size());
+                    for (Entry<String, Integer> entry : c.sideInput(view).entrySet()) {
+                      System.out.println("LCWIKL: " + entry);
+                      c.output(KV.of(entry.getKey(), entry.getValue()));
+                    }
+                  }
+                }));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("a", 1), KV.of("b", 3));
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testMapSideInputWithNonDeterministicKeyCoder() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Map<String, Integer>> view =
+        pipeline.apply("CreateSideInput",
+                Create.of(KV.of("a", 1), KV.of("b", 3))
+                    .withCoder(KvCoder.of(new NonDeterministicStringCoder(), VarIntCoder.of())))
+            .apply(View.<String, Integer>asMap());
+
+    PCollection<KV<String, Integer>> output =
+        pipeline.apply("CreateMainInput", Create.of("apple", "banana", "blackberry"))
+            .apply(
+                "OutputSideInputs",
+                ParDo.withSideInputs(view).of(new DoFn<String, KV<String, Integer>>() {
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    c.output(
+                        KV.of(c.element(), c.sideInput(view).get(c.element().substring(0, 1))));
+                  }
+                }));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("apple", 1), KV.of("banana", 3), KV.of("blackberry", 3));
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testWindowedMapSideInput() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Map<String, Integer>> view =
+        pipeline.apply("CreateSideInput", Create.timestamped(
+                                              TimestampedValue.of(KV.of("a", 1), new Instant(1)),
+                                              TimestampedValue.of(KV.of("b", 2), new Instant(4)),
+                                              TimestampedValue.of(KV.of("b", 3), new Instant(18))))
+            .apply(
+                "SideWindowInto",
+                Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10))))
+            .apply(View.<String, Integer>asMap());
+
+    PCollection<KV<String, Integer>> output =
+        pipeline.apply("CreateMainInput", Create.timestamped(
+                                              TimestampedValue.of("apple", new Instant(5)),
+                                              TimestampedValue.of("banana", new Instant(4)),
+                                              TimestampedValue.of("blackberry", new Instant(16))))
+            .apply("MainWindowInto", Window.<String>into(FixedWindows.of(Duration.millis(10))))
+            .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
+                                           new DoFn<String, KV<String, Integer>>() {
+                                             @Override
+                                             public void processElement(ProcessContext c) {
+                                               c.output(KV.of(
+                                                   c.element(),
+                                                   c.sideInput(view).get(
+                                                       c.element().substring(0, 1))));
+                                             }
+                                           }));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("apple", 1), KV.of("banana", 2), KV.of("blackberry", 3));
+
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testWindowedMapAsEntrySetSideInput() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Map<String, Integer>> view =
+        pipeline.apply("CreateSideInput", Create.timestamped(
+                                              TimestampedValue.of(KV.of("a", 1), new Instant(1)),
+                                              TimestampedValue.of(KV.of("b", 2), new Instant(4)),
+                                              TimestampedValue.of(KV.of("b", 3), new Instant(18))))
+            .apply(
+                "SideWindowInto",
+                Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10))))
+            .apply(View.<String, Integer>asMap());
+
+    PCollection<KV<String, Integer>> output =
+        pipeline.apply("CreateMainInput", Create.timestamped(
+                                              TimestampedValue.of(2 /* size */, new Instant(5)),
+                                              TimestampedValue.of(1 /* size */, new Instant(16))))
+            .apply("MainWindowInto", Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
+            .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
+                                           new DoFn<Integer, KV<String, Integer>>() {
+                                             @Override
+                                             public void processElement(ProcessContext c) {
+                                               assertEquals((int) c.element(),
+                                                   c.sideInput(view).size());
+                                               assertEquals((int) c.element(),
+                                                   c.sideInput(view).entrySet().size());
+                                               for (Entry<String, Integer> entry
+                                                   : c.sideInput(view).entrySet()) {
+                                                 c.output(KV.of(entry.getKey(), entry.getValue()));
+                                               }
+                                             }
+                                           }));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("a", 1), KV.of("b", 2), KV.of("b", 3));
+
+    pipeline.run();
+  }
+
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testWindowedMapSideInputWithNonDeterministicKeyCoder() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Map<String, Integer>> view =
+        pipeline.apply("CreateSideInput",
+                Create.timestamped(
+                    TimestampedValue.of(KV.of("a", 1), new Instant(1)),
+                    TimestampedValue.of(KV.of("b", 2), new Instant(4)),
+                    TimestampedValue.of(KV.of("b", 3), new Instant(18)))
+                .withCoder(KvCoder.of(new NonDeterministicStringCoder(), VarIntCoder.of())))
+            .apply(
+                "SideWindowInto",
+                Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10))))
+            .apply(View.<String, Integer>asMap());
+
+    PCollection<KV<String, Integer>> output =
+        pipeline.apply("CreateMainInput", Create.timestamped(
+                                              TimestampedValue.of("apple", new Instant(5)),
+                                              TimestampedValue.of("banana", new Instant(4)),
+                                              TimestampedValue.of("blackberry", new Instant(16))))
+            .apply("MainWindowInto", Window.<String>into(FixedWindows.of(Duration.millis(10))))
+            .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
+                                           new DoFn<String, KV<String, Integer>>() {
+                                             @Override
+                                             public void processElement(ProcessContext c) {
+                                               c.output(KV.of(
+                                                   c.element(),
+                                                   c.sideInput(view).get(
+                                                       c.element().substring(0, 1))));
+                                             }
+                                           }));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("apple", 1), KV.of("banana", 2), KV.of("blackberry", 3));
 
     pipeline.run();
   }
@@ -484,18 +994,19 @@ public void processElement(ProcessContext c) {
   public void testEmptyMapSideInput() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<Map<Integer, Integer>> view = pipeline
-        .apply("CreateEmptyView", Create.<KV<Integer, Integer>>of()
-            .withCoder(KvCoder.of(VarIntCoder.of(), VarIntCoder.of())))
-        .apply(View.<Integer, Integer>asMap());
+    final PCollectionView<Map<String, Integer>> view =
+        pipeline.apply("CreateEmptyView", Create.<KV<String, Integer>>of().withCoder(
+                KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())))
+            .apply(View.<String, Integer>asMap());
 
-    PCollection<Integer> results = pipeline
-        .apply("Create1", Create.of(1))
-        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
-            new DoFn<Integer, Integer>() {
+    PCollection<Integer> results =
+        pipeline.apply("Create1", Create.of(1))
+            .apply("OutputSideInputs", ParDo.withSideInputs(view).of(new DoFn<Integer, Integer>() {
               @Override
               public void processElement(ProcessContext c) {
                 assertTrue(c.sideInput(view).isEmpty());
+                assertTrue(c.sideInput(view).entrySet().isEmpty());
+                assertFalse(c.sideInput(view).entrySet().iterator().hasNext());
                 c.output(c.element());
               }
             }));
@@ -507,32 +1018,65 @@ public void processElement(ProcessContext c) {
   }
 
   @Test
-  public void testMapSideInputWithNullValuesCatchesDuplicates() {
+  @Category(RunnableOnService.class)
+  public void testEmptyMapSideInputWithNonDeterministicKeyCoder() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<Map<String, Integer>> view = pipeline
-        .apply("CreateSideInput", Create.of(KV.of("a", (Integer) null), KV.of("a", (Integer) null))
-            .withCoder(KvCoder.of(StringUtf8Coder.of(), NullableCoder.of(VarIntCoder.of()))))
-        .apply(View.<String, Integer>asMap());
+    final PCollectionView<Map<String, Integer>> view =
+        pipeline.apply("CreateEmptyView", Create.<KV<String, Integer>>of().withCoder(
+                KvCoder.of(new NonDeterministicStringCoder(), VarIntCoder.of())))
+            .apply(View.<String, Integer>asMap());
 
-    PCollection<KV<String, Integer>> output = pipeline
-        .apply("CreateMainInput", Create.of("apple", "banana", "blackberry"))
-        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
-            new DoFn<String, KV<String, Integer>>() {
+    PCollection<Integer> results =
+        pipeline.apply("Create1", Create.of(1))
+            .apply("OutputSideInputs", ParDo.withSideInputs(view).of(new DoFn<Integer, Integer>() {
               @Override
               public void processElement(ProcessContext c) {
-                c.output(KV.of(c.element(), c.sideInput(view).get(c.element().substring(0, 1))));
+                assertTrue(c.sideInput(view).isEmpty());
+                assertTrue(c.sideInput(view).entrySet().isEmpty());
+                assertFalse(c.sideInput(view).entrySet().iterator().hasNext());
+                c.output(c.element());
               }
             }));
 
-    DataflowAssert.that(output)
-        .containsInAnyOrder(KV.of("apple", 1),
-                            KV.of("banana", 3), KV.of("blackberry", 3));
+    // Pass at least one value through to guarantee that DoFn executes.
+    DataflowAssert.that(results).containsInAnyOrder(1);
+
+    pipeline.run();
+  }
+
+  @Test
+  public void testMapSideInputWithNullValuesCatchesDuplicates() {
+    Pipeline pipeline = TestPipeline.create();
+
+    final PCollectionView<Map<String, Integer>> view =
+        pipeline
+            .apply(
+                "CreateSideInput",
+                Create.of(KV.of("a", (Integer) null), KV.of("a", (Integer) null))
+                    .withCoder(
+                        KvCoder.of(StringUtf8Coder.of(), NullableCoder.of(VarIntCoder.of()))))
+            .apply(View.<String, Integer>asMap());
+
+    PCollection<KV<String, Integer>> output =
+        pipeline.apply("CreateMainInput", Create.of("apple", "banana", "blackberry"))
+            .apply(
+                "OutputSideInputs",
+                ParDo.withSideInputs(view).of(new DoFn<String, KV<String, Integer>>() {
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    c.output(
+                        KV.of(c.element(), c.sideInput(view).get(c.element().substring(0, 1))));
+                  }
+                }));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("apple", 1), KV.of("banana", 3), KV.of("blackberry", 3));
 
     // PipelineExecutionException is thrown with cause having a message stating that a
     // duplicate is not allowed.
-    thrown.expectCause(ThrowableMessageMatcher.hasMessage(
-        Matchers.containsString("Duplicate values for a")));
+    thrown.expectCause(
+        ThrowableMessageMatcher.hasMessage(Matchers.containsString("Duplicate values for a")));
     pipeline.run();
   }
 
@@ -541,39 +1085,41 @@ public void processElement(ProcessContext c) {
   public void testMapSideInputIsImmutable() {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<Map<String, Integer>> view = pipeline
-        .apply("CreateSideInput", Create.of(KV.of("a", 1)))
-        .apply(View.<String, Integer>asMap());
+    final PCollectionView<Map<String, Integer>> view =
+        pipeline.apply("CreateSideInput", Create.of(KV.of("a", 1)))
+            .apply(View.<String, Integer>asMap());
 
-    PCollection<KV<String, Integer>> output = pipeline
-        .apply("CreateMainInput", Create.of("apple"))
-        .apply("OutputSideInputs", ParDo.withSideInputs(view).of(
-            new DoFn<String, KV<String, Integer>>() {
-              @Override
-              public void processElement(ProcessContext c) {
-                try {
-                  c.sideInput(view).clear();
-                  fail("Expected UnsupportedOperationException on clear()");
-                } catch (UnsupportedOperationException expected) {
-                }
-                try {
-                  c.sideInput(view).put("c", 3);
-                  fail("Expected UnsupportedOperationException on put()");
-                } catch (UnsupportedOperationException expected) {
-                }
-                try {
-                  c.sideInput(view).remove("c");
-                  fail("Expected UnsupportedOperationException on remove()");
-                } catch (UnsupportedOperationException expected) {
-                }
-                try {
-                  c.sideInput(view).putAll(new HashMap<String, Integer>());
-                  fail("Expected UnsupportedOperationException on putAll()");
-                } catch (UnsupportedOperationException expected) {
-                }
-                c.output(KV.of(c.element(), c.sideInput(view).get(c.element().substring(0, 1))));
-              }
-            }));
+    PCollection<KV<String, Integer>> output =
+        pipeline.apply("CreateMainInput", Create.of("apple"))
+            .apply(
+                "OutputSideInputs",
+                ParDo.withSideInputs(view).of(new DoFn<String, KV<String, Integer>>() {
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    try {
+                      c.sideInput(view).clear();
+                      fail("Expected UnsupportedOperationException on clear()");
+                    } catch (UnsupportedOperationException expected) {
+                    }
+                    try {
+                      c.sideInput(view).put("c", 3);
+                      fail("Expected UnsupportedOperationException on put()");
+                    } catch (UnsupportedOperationException expected) {
+                    }
+                    try {
+                      c.sideInput(view).remove("c");
+                      fail("Expected UnsupportedOperationException on remove()");
+                    } catch (UnsupportedOperationException expected) {
+                    }
+                    try {
+                      c.sideInput(view).putAll(new HashMap<String, Integer>());
+                      fail("Expected UnsupportedOperationException on putAll()");
+                    } catch (UnsupportedOperationException expected) {
+                    }
+                    c.output(
+                        KV.of(c.element(), c.sideInput(view).get(c.element().substring(0, 1))));
+                  }
+                }));
 
     // Pass at least one value through to guarantee that DoFn executes.
     DataflowAssert.that(output).containsInAnyOrder(KV.of("apple", 1));
@@ -586,25 +1132,22 @@ public void processElement(ProcessContext c) {
   public void testCombinedMapSideInput() {
     Pipeline pipeline = TestPipeline.create();
 
-    final PCollectionView<Map<String, Integer>> view = pipeline
-        .apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("a", 20), KV.of("b", 3)))
-        .apply("SumIntegers",
-            Combine.perKey(new Sum.SumIntegerFn().<String>asKeyedFn()))
-        .apply(View.<String, Integer>asMap());
+    final PCollectionView<Map<String, Integer>> view =
+        pipeline.apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("a", 20), KV.of("b", 3)))
+            .apply("SumIntegers", Combine.perKey(new Sum.SumIntegerFn().<String>asKeyedFn()))
+            .apply(View.<String, Integer>asMap());
 
-    PCollection<KV<String, Integer>> output = pipeline
-        .apply("CreateMainInput", Create.of("apple", "banana", "blackberry"))
-        .apply("Output", ParDo.withSideInputs(view).of(
-            new DoFn<String, KV<String, Integer>>() {
+    PCollection<KV<String, Integer>> output =
+        pipeline.apply("CreateMainInput", Create.of("apple", "banana", "blackberry"))
+            .apply("Output", ParDo.withSideInputs(view).of(new DoFn<String, KV<String, Integer>>() {
               @Override
               public void processElement(ProcessContext c) {
                 c.output(KV.of(c.element(), c.sideInput(view).get(c.element().substring(0, 1))));
               }
             }));
 
-    DataflowAssert.that(output)
-        .containsInAnyOrder(KV.of("apple", 21),
-                            KV.of("banana", 3), KV.of("blackberry", 3));
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("apple", 21), KV.of("banana", 3), KV.of("blackberry", 3));
 
     pipeline.run();
   }
@@ -614,28 +1157,28 @@ public void processElement(ProcessContext c) {
   public void testWindowedSideInputFixedToFixed() {
     Pipeline p = TestPipeline.create();
 
-    final PCollectionView<Integer> view = p
-        .apply("CreateSideInput", Create.timestamped(
-            TimestampedValue.of(1, new Instant(1)),
-            TimestampedValue.of(2, new Instant(11)),
-            TimestampedValue.of(3, new Instant(13))))
-        .apply("WindowSideInput", Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
-        .apply(Sum.integersGlobally().withoutDefaults())
-        .apply(View.<Integer>asSingleton());
-
-    PCollection<String> output = p
-        .apply("CreateMainInput", Create.timestamped(
-            TimestampedValue.of("A", new Instant(4)),
-            TimestampedValue.of("B", new Instant(15)),
-            TimestampedValue.of("C", new Instant(7))))
-        .apply("WindowMainInput", Window.<String>into(FixedWindows.of(Duration.millis(10))))
-        .apply("OutputMainAndSideInputs", ParDo.withSideInputs(view).of(
-            new DoFn<String, String>() {
-              @Override
-              public void processElement(ProcessContext c) {
-                c.output(c.element() + c.sideInput(view));
-              }
-            }));
+    final PCollectionView<Integer> view =
+        p.apply(
+             "CreateSideInput",
+             Create.timestamped(TimestampedValue.of(1, new Instant(1)),
+                 TimestampedValue.of(2, new Instant(11)), TimestampedValue.of(3, new Instant(13))))
+            .apply("WindowSideInput", Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
+            .apply(Sum.integersGlobally().withoutDefaults())
+            .apply(View.<Integer>asSingleton());
+
+    PCollection<String> output =
+        p.apply("CreateMainInput", Create.timestamped(
+                                       TimestampedValue.of("A", new Instant(4)),
+                                       TimestampedValue.of("B", new Instant(15)),
+                                       TimestampedValue.of("C", new Instant(7))))
+            .apply("WindowMainInput", Window.<String>into(FixedWindows.of(Duration.millis(10))))
+            .apply("OutputMainAndSideInputs", ParDo.withSideInputs(view).of(
+                                                  new DoFn<String, String>() {
+                                                    @Override
+                                                    public void processElement(ProcessContext c) {
+                                                      c.output(c.element() + c.sideInput(view));
+                                                    }
+                                                  }));
 
     DataflowAssert.that(output).containsInAnyOrder("A1", "B5", "C1");
 
@@ -647,28 +1190,28 @@ public void processElement(ProcessContext c) {
   public void testWindowedSideInputFixedToGlobal() {
     Pipeline p = TestPipeline.create();
 
-    final PCollectionView<Integer> view = p
-        .apply("CreateSideInput", Create.timestamped(
-            TimestampedValue.of(1, new Instant(1)),
-            TimestampedValue.of(2, new Instant(11)),
-            TimestampedValue.of(3, new Instant(13))))
-        .apply("WindowSideInput", Window.<Integer>into(new GlobalWindows()))
-        .apply(Sum.integersGlobally())
-        .apply(View.<Integer>asSingleton());
-
-    PCollection<String> output = p
-        .apply("CreateMainInput", Create.timestamped(
-            TimestampedValue.of("A", new Instant(4)),
-            TimestampedValue.of("B", new Instant(15)),
-            TimestampedValue.of("C", new Instant(7))))
-        .apply("WindowMainInput", Window.<String>into(FixedWindows.of(Duration.millis(10))))
-        .apply("OutputMainAndSideInputs", ParDo.withSideInputs(view).of(
-            new DoFn<String, String>() {
-              @Override
-              public void processElement(ProcessContext c) {
-                c.output(c.element() + c.sideInput(view));
-              }
-            }));
+    final PCollectionView<Integer> view =
+        p.apply(
+             "CreateSideInput",
+             Create.timestamped(TimestampedValue.of(1, new Instant(1)),
+                 TimestampedValue.of(2, new Instant(11)), TimestampedValue.of(3, new Instant(13))))
+            .apply("WindowSideInput", Window.<Integer>into(new GlobalWindows()))
+            .apply(Sum.integersGlobally())
+            .apply(View.<Integer>asSingleton());
+
+    PCollection<String> output =
+        p.apply("CreateMainInput", Create.timestamped(
+                                       TimestampedValue.of("A", new Instant(4)),
+                                       TimestampedValue.of("B", new Instant(15)),
+                                       TimestampedValue.of("C", new Instant(7))))
+            .apply("WindowMainInput", Window.<String>into(FixedWindows.of(Duration.millis(10))))
+            .apply("OutputMainAndSideInputs", ParDo.withSideInputs(view).of(
+                                                  new DoFn<String, String>() {
+                                                    @Override
+                                                    public void processElement(ProcessContext c) {
+                                                      c.output(c.element() + c.sideInput(view));
+                                                    }
+                                                  }));
 
     DataflowAssert.that(output).containsInAnyOrder("A6", "B6", "C6");
 
@@ -680,26 +1223,26 @@ public void processElement(ProcessContext c) {
   public void testWindowedSideInputFixedToFixedWithDefault() {
     Pipeline p = TestPipeline.create();
 
-    final PCollectionView<Integer> view = p
-        .apply("CreateSideInput", Create.timestamped(
-            TimestampedValue.of(2, new Instant(11)),
-            TimestampedValue.of(3, new Instant(13))))
-        .apply("WindowSideInput", Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
-        .apply(Sum.integersGlobally().asSingletonView());
-
-    PCollection<String> output = p
-        .apply("CreateMainInput", Create.timestamped(
-            TimestampedValue.of("A", new Instant(4)),
-            TimestampedValue.of("B", new Instant(15)),
-            TimestampedValue.of("C", new Instant(7))))
-        .apply("WindowMainInput", Window.<String>into(FixedWindows.of(Duration.millis(10))))
-        .apply("OutputMainAndSideInputs", ParDo.withSideInputs(view).of(
-            new DoFn<String, String>() {
-              @Override
-              public void processElement(ProcessContext c) {
-                c.output(c.element() + c.sideInput(view));
-              }
-            }));
+    final PCollectionView<Integer> view =
+        p.apply("CreateSideInput", Create.timestamped(
+                                       TimestampedValue.of(2, new Instant(11)),
+                                       TimestampedValue.of(3, new Instant(13))))
+            .apply("WindowSideInput", Window.<Integer>into(FixedWindows.of(Duration.millis(10))))
+            .apply(Sum.integersGlobally().asSingletonView());
+
+    PCollection<String> output =
+        p.apply("CreateMainInput", Create.timestamped(
+                                       TimestampedValue.of("A", new Instant(4)),
+                                       TimestampedValue.of("B", new Instant(15)),
+                                       TimestampedValue.of("C", new Instant(7))))
+            .apply("WindowMainInput", Window.<String>into(FixedWindows.of(Duration.millis(10))))
+            .apply("OutputMainAndSideInputs", ParDo.withSideInputs(view).of(
+                                                  new DoFn<String, String>() {
+                                                    @Override
+                                                    public void processElement(ProcessContext c) {
+                                                      c.output(c.element() + c.sideInput(view));
+                                                    }
+                                                  }));
 
     DataflowAssert.that(output).containsInAnyOrder("A0", "B5", "C0");
 
@@ -711,24 +1254,25 @@ public void processElement(ProcessContext c) {
   public void testSideInputWithNullDefault() {
     Pipeline p = TestPipeline.create();
 
-    final PCollectionView<Void> view = p
-        .apply("CreateSideInput", Create.of((Void) null).withCoder(VoidCoder.of()))
-        .apply(Combine.globally(new SerializableFunction<Iterable<Void>, Void>() {
-                  @Override
-                  public Void apply(Iterable<Void> input) {
-                    return null;
-                  }
-                }).asSingletonView());
-
-    PCollection<String> output = p
-        .apply("CreateMainInput", Create.of(""))
-        .apply("OutputMainAndSideInputs", ParDo.withSideInputs(view).of(
-            new DoFn<String, String>() {
+    final PCollectionView<Void> view =
+        p.apply("CreateSideInput", Create.of((Void) null).withCoder(VoidCoder.of()))
+            .apply(Combine.globally(new SerializableFunction<Iterable<Void>, Void>() {
               @Override
-              public void processElement(ProcessContext c) {
-                c.output(c.element() + c.sideInput(view));
+              public Void apply(Iterable<Void> input) {
+                return null;
               }
-            }));
+            }).asSingletonView());
+
+    PCollection<String> output =
+        p.apply("CreateMainInput", Create.of(""))
+            .apply(
+                "OutputMainAndSideInputs",
+                ParDo.withSideInputs(view).of(new DoFn<String, String>() {
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    c.output(c.element() + c.sideInput(view));
+                  }
+                }));
 
     DataflowAssert.that(output).containsInAnyOrder("null");
 
@@ -739,39 +1283,41 @@ public void processElement(ProcessContext c) {
   @Category(RunnableOnService.class)
   public void testSideInputWithNestedIterables() {
     Pipeline pipeline = TestPipeline.create();
-    final PCollectionView<Iterable<Integer>> view1 = pipeline
-        .apply("CreateVoid1", Create.of((Void) null).withCoder(VoidCoder.of()))
-        .apply("OutputOneInteger", ParDo.of(new DoFn<Void, Integer>() {
-          @Override
-          public void processElement(ProcessContext c) {
-            c.output(17);
-          }
-        }))
-        .apply("View1", View.<Integer>asIterable());
-
-    final PCollectionView<Iterable<Iterable<Integer>>> view2 = pipeline
-        .apply("CreateVoid2", Create.of((Void) null).withCoder(VoidCoder.of()))
-        .apply("OutputSideInput",
-            ParDo.withSideInputs(view1).of(new DoFn<Void, Iterable<Integer>>(){
+    final PCollectionView<Iterable<Integer>> view1 =
+        pipeline.apply("CreateVoid1", Create.of((Void) null).withCoder(VoidCoder.of()))
+            .apply("OutputOneInteger", ParDo.of(new DoFn<Void, Integer>() {
               @Override
               public void processElement(ProcessContext c) {
-                c.output(c.sideInput(view1));
+                c.output(17);
               }
             }))
-        .apply("View2", View.<Iterable<Integer>>asIterable());
+            .apply("View1", View.<Integer>asIterable());
 
-    PCollection<Integer> output = pipeline
-        .apply("CreateVoid3", Create.of((Void) null).withCoder(VoidCoder.of()))
-        .apply("ReadIterableSideInput", ParDo.withSideInputs(view2).of(new DoFn<Void, Integer>() {
-          @Override
-          public void processElement(ProcessContext c) {
-            for (Iterable<Integer> input : c.sideInput(view2)) {
-              for (Integer i : input) {
-                c.output(i);
-              }
-            }
-          }
-        }));
+    final PCollectionView<Iterable<Iterable<Integer>>> view2 =
+        pipeline.apply("CreateVoid2", Create.of((Void) null).withCoder(VoidCoder.of()))
+            .apply(
+                "OutputSideInput",
+                ParDo.withSideInputs(view1).of(new DoFn<Void, Iterable<Integer>>() {
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    c.output(c.sideInput(view1));
+                  }
+                }))
+            .apply("View2", View.<Iterable<Integer>>asIterable());
+
+    PCollection<Integer> output =
+        pipeline.apply("CreateVoid3", Create.of((Void) null).withCoder(VoidCoder.of()))
+            .apply(
+                "ReadIterableSideInput", ParDo.withSideInputs(view2).of(new DoFn<Void, Integer>() {
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    for (Iterable<Integer> input : c.sideInput(view2)) {
+                      for (Integer i : input) {
+                        c.output(i);
+                      }
+                    }
+                  }
+                }));
 
     DataflowAssert.that(output).containsInAnyOrder(17);
 
@@ -813,31 +1359,30 @@ private Pipeline createTestDirectRunner() {
     return Pipeline.create(options);
   }
 
-  private void testViewUnbounded(Pipeline pipeline,
+  private void testViewUnbounded(
+      Pipeline pipeline,
       PTransform<PCollection<KV<String, Integer>>, ? extends PCollectionView<?>> view) {
     thrown.expect(IllegalStateException.class);
     thrown.expectMessage("Unable to create a side-input view from input");
     thrown.expectCause(
         ThrowableMessageMatcher.hasMessage(Matchers.containsString("non-bounded PCollection")));
-    pipeline
-        .apply(new PTransform<PBegin, PCollection<KV<String, Integer>>>() {
-          @Override
-          public PCollection<KV<String, Integer>> apply(PBegin input) {
-            return PCollection.createPrimitiveOutputInternal(input.getPipeline(),
-                WindowingStrategy.globalDefault(), PCollection.IsBounded.UNBOUNDED);
-          }
-        })
-        .apply(view);
+    pipeline.apply(new PTransform<PBegin, PCollection<KV<String, Integer>>>() {
+      @Override
+      public PCollection<KV<String, Integer>> apply(PBegin input) {
+        return PCollection.createPrimitiveOutputInternal(input.getPipeline(),
+            WindowingStrategy.globalDefault(), PCollection.IsBounded.UNBOUNDED);
+      }
+    }).apply(view);
   }
 
-  private void testViewNonmerging(Pipeline pipeline,
+  private void testViewNonmerging(
+      Pipeline pipeline,
       PTransform<PCollection<KV<String, Integer>>, ? extends PCollectionView<?>> view) {
     thrown.expect(IllegalStateException.class);
     thrown.expectMessage("Unable to create a side-input view from input");
     thrown.expectCause(
         ThrowableMessageMatcher.hasMessage(Matchers.containsString("Consumed by GroupByKey")));
-    pipeline
-        .apply(Create.<KV<String, Integer>>of(KV.of("hello", 5)))
+    pipeline.apply(Create.<KV<String, Integer>>of(KV.of("hello", 5)))
         .apply(Window.<KV<String, Integer>>into(new InvalidWindows<>(
             "Consumed by GroupByKey", FixedWindows.of(Duration.standardHours(1)))))
         .apply(view);

From 867afdd6edbabfaec866d5e0787a5df2b6b2ca89 Mon Sep 17 00:00:00 2001
From: swegner <swegner@google.com>
Date: Fri, 5 Feb 2016 15:20:42 -0800
Subject: [PATCH 1399/1541] Don't wrap system DoFn exceptions.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=113990785
---
 .../worker/StreamingSideInputDoFnRunner.java  |  40 ++--
 .../sdk/testing/SerializableMatchers.java     |   2 +-
 .../sdk/transforms/DoFnReflector.java         |   3 +-
 .../cloud/dataflow/sdk/transforms/ParDo.java  |   7 +-
 .../dataflow/sdk/util/DoFnRunnerBase.java     |  18 +-
 .../sdk/util/LateDataDroppingDoFnRunner.java  |   7 +-
 .../dataflow/sdk/util/ReduceFnRunner.java     |   8 +-
 .../dataflow/sdk/util/SimpleDoFnRunner.java   |   9 +-
 .../dataflow/sdk/util/UserCodeException.java  | 138 +++++---------
 .../cloud/dataflow/sdk/PipelineTest.java      |   2 +-
 .../sdk/util/SimpleDoFnRunnerTest.java        |  86 +++++++++
 .../sdk/util/UserCodeExceptionTest.java       | 176 ++++++++++++++++++
 12 files changed, 359 insertions(+), 137 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SimpleDoFnRunnerTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/UserCodeExceptionTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
index b61a48d3eafbb..be916adf76ac9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
@@ -31,7 +31,6 @@
 import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.UserCodeException;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
@@ -191,14 +190,12 @@ public void startBundle() {
 
       BagState<WindowedValue<InputT>> elementsBag = elementBag(window);
       Iterable<WindowedValue<InputT>> elements = elementsBag.get().read();
-      try {
-        for (WindowedValue<InputT> elem : elements) {
+      for (WindowedValue<InputT> elem : elements) {
+        try {
           fn.processElement(createProcessContext(elem));
+        } catch (Exception ex) {
+          throw wrapUserCodeException(ex);
         }
-      } catch (Throwable t) {
-        // Exception in user code.
-        Throwables.propagateIfInstanceOf(t, UserCodeException.class);
-        throw new UserCodeException(t);
       }
 
       elementsBag.clear();
@@ -239,21 +236,26 @@ public void invokeProcessElement(WindowedValue<InputT> elem) {
     @SuppressWarnings("unchecked")
     W window = (W) Iterables.getOnlyElement(elem.getWindows());
 
-    // This can contain user code. Wrap it in case it throws an exception.
+    Set<Windmill.GlobalDataRequest> blocked;
     try {
-      Set<Windmill.GlobalDataRequest> blocked = computeBlockedSideInputs(window);
-      if (blocked == null) {
-        fn.processElement(createProcessContext(elem));
-      } else {
-        elementBag(window).add(elem);
-        watermarkHold(window).add(elem.getTimestamp());
+      blocked = computeBlockedSideInputs(window);
+    } catch (IOException ex) {
+      // Can't leak IOException here
+      throw Throwables.propagate(ex);
+    }
 
-        stepContext.addBlockingSideInputs(blocked);
+    if (blocked == null) {
+      // This can contain user code. Wrap it in case it throws an exception.
+      try {
+        fn.processElement(createProcessContext(elem));
+      } catch (Exception ex) {
+        throw wrapUserCodeException(ex);
       }
-    } catch (Throwable t) {
-      // Exception in user code.
-      Throwables.propagateIfInstanceOf(t, UserCodeException.class);
-      throw new UserCodeException(t);
+    } else {
+      elementBag(window).add(elem);
+      watermarkHold(window).add(elem.getTimestamp());
+
+      stepContext.addBlockingSideInputs(blocked);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
index 252bd2fdec321..da5171e21f50e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
@@ -1159,7 +1159,7 @@ public SerializableArrayViaCoder(Coder<T> elementCoder, T[] value) {
       try {
         this.encodedValue = CoderUtils.encodeToByteArray(coder, Arrays.asList(value));
       } catch (CoderException exc) {
-        throw new UserCodeException(exc);
+        throw UserCodeException.wrap(exc);
       }
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
index c75c013b2c5b4..1bb05fb3405b7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
@@ -473,8 +473,7 @@ private <InputT, OutputT> void invoke(Method m,
         m.invoke(on, args);
       } catch (InvocationTargetException e) {
         // Exception in user code.
-        Throwables.propagateIfInstanceOf(e.getCause(), UserCodeException.class);
-        throw new UserCodeException(e.getCause());
+        throw UserCodeException.wrap(e.getCause());
       } catch (IllegalAccessException | IllegalArgumentException e) {
         // Exception in our code.
         throw Throwables.propagate(e);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
index e971ab9355a9c..0922767adc825 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
@@ -16,11 +16,13 @@
 
 package com.google.cloud.dataflow.sdk.transforms;
 
+import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
 import com.google.cloud.dataflow.sdk.util.DirectSideInputReader;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
@@ -45,6 +47,7 @@
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Maps;
 
+import java.io.Serializable;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
@@ -1184,7 +1187,7 @@ private static <InputT, OutputT, ActualInputT extends InputT> void evaluateHelpe
         fnRunner.processElement(windowedElem);
         inputMutationDetector.verifyUnmodified();
       } catch (CoderException e) {
-        throw new UserCodeException(e);
+        throw UserCodeException.wrap(e);
       } catch (IllegalMutationException exn) {
         throw new IllegalMutationException(
             String.format("DoFn %s mutated input value %s of class %s (new value was %s)."
@@ -1252,7 +1255,7 @@ public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
           MutationDetector priorDetector = mutationDetectorForTag.put(tag, newDetector);
           verifyOutputUnmodified(priorDetector);
         } catch (CoderException e) {
-          throw new UserCodeException(e);
+          throw UserCodeException.wrap(e);
         }
       }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java
index de2844c7ff4ae..f4dfeb20b6693 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java
@@ -37,7 +37,6 @@
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Preconditions;
-import com.google.common.base.Throwables;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
@@ -128,8 +127,7 @@ public void startBundle() {
       fn.startBundle(context);
     } catch (Throwable t) {
       // Exception in user code.
-      Throwables.propagateIfInstanceOf(t, UserCodeException.class);
-      throw new UserCodeException(t);
+      throw wrapUserCodeException(t);
     }
   }
 
@@ -173,8 +171,7 @@ public void finishBundle() {
       fn.finishBundle(context);
     } catch (Throwable t) {
       // Exception in user code.
-      Throwables.propagateIfInstanceOf(t, UserCodeException.class);
-      throw new UserCodeException(t);
+      throw wrapUserCodeException(t);
     }
   }
 
@@ -275,8 +272,7 @@ public Collection<? extends BoundedWindow> windows() {
             }
           });
         } catch (Exception e) {
-          Throwables.propagateIfInstanceOf(e, UserCodeException.class);
-          throw new UserCodeException(e);
+          throw UserCodeException.wrap(e);
         }
       }
 
@@ -381,6 +377,14 @@ protected DoFn<InputT, OutputT>.ProcessContext createProcessContext(WindowedValu
     return new DoFnProcessContext<InputT, OutputT>(fn, context, elem);
   }
 
+  protected RuntimeException wrapUserCodeException(Throwable t) {
+    throw UserCodeException.wrapIf(!isSystemDoFn(), t);
+  }
+
+  private boolean isSystemDoFn() {
+    return fn.getClass().isAnnotationPresent(SystemDoFnInternal.class);
+  }
+
   /**
    * A concrete implementation of {@code DoFn.ProcessContext} used for
    * running a {@link DoFn} over a single element.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/LateDataDroppingDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/LateDataDroppingDoFnRunner.java
index 749438a99afd3..e1f13d539d31b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/LateDataDroppingDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/LateDataDroppingDoFnRunner.java
@@ -25,7 +25,6 @@
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Function;
 import com.google.common.base.Predicate;
-import com.google.common.base.Throwables;
 import com.google.common.collect.Iterables;
 
 import org.joda.time.Instant;
@@ -118,10 +117,8 @@ public boolean apply(WindowedValue<InputT> input) {
     // This can contain user code. Wrap it in case it throws an exception.
     try {
       fn.processElement(createProcessContext(elem.withValue(inputs)));
-    } catch (Throwable t) {
-      // Exception in user code.
-      Throwables.propagateIfInstanceOf(t, UserCodeException.class);
-      throw new UserCodeException(t);
+    } catch (Exception ex) {
+      throw wrapUserCodeException(ex);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index 54906b1e5c286..5fc2bc22bab3e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -729,11 +729,7 @@ private void cancelEndOfWindowAndGarbageCollectionTimers(ReduceFn<?, ?, ?, W>.Co
   //////////////////////////////////////////////////////////////////////////////////////////////////
 
   private RuntimeException wrapMaybeUserException(Throwable t) {
-    if (reduceFn instanceof SystemReduceFn) {
-      throw Throwables.propagate(t);
-    } else {
-      // Any exceptions that happen inside a non-system ReduceFn are considered user code.
-      throw new UserCodeException(t);
-    }
+    // Any exceptions that happen inside a non-system ReduceFn are considered user code.
+    throw UserCodeException.wrapIf(!(reduceFn instanceof SystemReduceFn), t);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SimpleDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SimpleDoFnRunner.java
index 66824e39f2008..15a5e518341f5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SimpleDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SimpleDoFnRunner.java
@@ -21,7 +21,6 @@
 import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.base.Throwables;
 
 import java.util.List;
 
@@ -44,14 +43,12 @@ protected SimpleDoFnRunner(PipelineOptions options, DoFn<InputT, OutputT> fn,
 
   @Override
   protected void invokeProcessElement(WindowedValue<InputT> elem) {
-    DoFn<InputT, OutputT>.ProcessContext processContext = createProcessContext(elem);
+    final DoFn<InputT, OutputT>.ProcessContext processContext = createProcessContext(elem);
     // This can contain user code. Wrap it in case it throws an exception.
     try {
       fn.processElement(processContext);
-    } catch (Throwable t) {
-      // Exception in user code.
-      Throwables.propagateIfInstanceOf(t, UserCodeException.class);
-      throw new UserCodeException(t);
+    } catch (Exception ex) {
+      throw wrapUserCodeException(ex);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java
index b0f21f5dfbfbb..9b9c7a5919f32 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java
@@ -16,9 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import java.util.Arrays;
 import java.util.Objects;
 
@@ -29,104 +26,69 @@
  * reached.
  */
 public class UserCodeException extends RuntimeException {
-  private static final Logger LOG = LoggerFactory.getLogger(UserCodeException.class);
-
-  public UserCodeException(Throwable t) {
-    super(t);
 
-    StackTraceElement[] currentFrames =
-        Thread.currentThread().getStackTrace();
-
-    // We're interested in getting the third stack frame here, since
-    // the exception stack trace includes the getStackTrace frame from
-    // Thread and the frame from where the UserCodeException is
-    // actually thrown. If there aren't more than two frames,
-    // something is odd about where the exception was thrown, so leave
-    // the stack trace alone and allow it to propagate.
-    //
-    // For example, if an exception in user code has a stack trace like this:
-    //
-    // java.lang.NullPointerException
-    // at com.google.cloud.dataflow.sdk.examples.
-    //     SimpleWordCount$ExtractWordsFn.dieHere(SimpleWordCount.java:23)
-    // at com.google.cloud.dataflow.sdk.examples.
-    //     SimpleWordCount$ExtractWordsFn.
-    //         processElement(SimpleWordCount.java:27)
-    // at com.google.cloud.dataflow.sdk.
-    //     DoFnRunner.processElement(DoFnRunner.java:95)       <-- caught here
-    // at com.google.cloud.dataflow.sdk.
-    //     worker.NormalParDoFn.processElement(NormalParDoFn.java:119)
-    // at com.google.cloud.dataflow.sdk.
-    //     worker.executor.ParDoOperation.process(ParDoOperation.java:65)
-    // at com.google.cloud.dataflow.sdk.
-    //     worker.executor.ReadOperation.start(ReadOperation.java:65)
-    // at com.google.cloud.dataflow.sdk.
-    //     worker.executor.MapTaskExecutor.execute(MapTaskExecutor.java:79)
-    // at com.google.cloud.dataflow.sdk.
-    //     worker.DataflowWorkerHarness.main(DataflowWorkerHarness.java:95)
-    //
-    // It would be truncated to:
-    //
-    // java.lang.NullPointerException
-    // at com.google.cloud.dataflow.sdk.examples.
-    //     SimpleWordCount$ExtractWordsFn.dieHere(SimpleWordCount.java:23)
-    // at com.google.cloud.dataflow.sdk.examples.
-    //     SimpleWordCount$ExtractWordsFn.
-    //         processElement(SimpleWordCount.java:27)
-    //
-    // However, we need to get the third stack frame from the
-    // getStackTrace, since after catching the error in DoFnRunner,
-    // the trace is two frames deeper by the time we get it:
-    //
-    // [0] java.lang.Thread.getStackTrace(Thread.java:1568)
-    // [1] com.google.cloud.dataflow.sdk.
-    //         UserCodeException.<init>(UserCodeException.java:16)
-    // [2] com.google.cloud.dataflow.sdk.
-    //         DoFnRunner.processElement(DoFnRunner.java:95)  <-- common frame
-    //
-    // We then proceed to truncate the original exception at the
-    // common frame, setting the UserCodeException's cause to the
-    // truncated stack trace.
-
-    // Check to make sure the stack is > 2 deep.
-    if (currentFrames.length <= 2) {
-      LOG.error("Expecting stack trace to be > 2 frames long.");
-      return;
+  public static UserCodeException wrap(Throwable t) {
+    if (t instanceof UserCodeException) {
+      return (UserCodeException) t;
     }
 
-    // Perform some checks to make sure javac doesn't change from below us.
-    if (!Objects.equals(currentFrames[1].getClassName(), getClass().getName())) {
-      LOG.error("Expected second frame coming from Thread.currentThread.getStackTrace() "
-          + "to be {}, was: {}", getClass().getName(), currentFrames[1].getClassName());
-      return;
+    return new UserCodeException(t);
+  }
+
+  public static RuntimeException wrapIf(boolean condition, Throwable t) {
+    if (condition) {
+      return wrap(t);
     }
-    if (Objects.equals(currentFrames[2].getClassName(), currentFrames[1].getClassName())) {
-      LOG.error("Javac's Thread.CurrentThread.getStackTrace() changed unexpectedly.");
-      return;
+
+    if (t instanceof RuntimeException) {
+      return (RuntimeException) t;
     }
 
-    // Now that all checks have passed, select the common frame.
-    StackTraceElement callingFrame = currentFrames[2];
-    // Truncate the user-level stack trace below where the
-    // UserCodeException was thrown.
-    truncateStackTrace(callingFrame, t);
+    return new RuntimeException(t);
+  }
+
+  private UserCodeException(Throwable t) {
+    super(t);
+    truncateStackTrace(t);
   }
 
   /**
-   * Truncates this Throwable's stack frame at the given frame,
+   * Truncates the @{Throwable}'s stack trace to contain only user code,
    * removing all frames below.
+   *
+   * <p>This is to remove infrastructure noise below user code entry point. We do this
+   * by finding common stack frames between the throwable's captured stack and that
+   * of the current thread.
    */
-  private void truncateStackTrace(
-      StackTraceElement currentFrame, Throwable t) {
-    int index = 0;
-    StackTraceElement[] stackTrace = t.getStackTrace();
-    for (StackTraceElement element : stackTrace) {
-      if (Objects.equals(element.getClassName(), currentFrame.getClassName()) &&
-          Objects.equals(element.getMethodName(), currentFrame.getMethodName())) {
-        t.setStackTrace(Arrays.copyOfRange(stackTrace, 0, index));
+  private void truncateStackTrace(Throwable t) {
+
+    StackTraceElement[] currentStack = Thread.currentThread().getStackTrace();
+    StackTraceElement[] throwableStack = t.getStackTrace();
+
+    int currentStackSize = currentStack.length;
+    int throwableStackSize = throwableStack.length;
+
+    int commonFrames = 0;
+    while (framesEqual(currentStack[currentStackSize - commonFrames - 1],
+        throwableStack[throwableStackSize - commonFrames - 1])) {
+      commonFrames++;
+      if (commonFrames >= Math.min(currentStackSize, throwableStackSize)) {
         break;
       }
-      index++;
     }
+
+    StackTraceElement[] truncatedStack = Arrays.copyOfRange(throwableStack, 0,
+        throwableStackSize - commonFrames);
+    t.setStackTrace(truncatedStack);
+  }
+
+  /**
+   * Check if two frames are equal; Frames are considered equal if they point to the same method.
+   */
+  private boolean framesEqual(StackTraceElement frame1, StackTraceElement frame2) {
+    boolean areEqual = Objects.equals(frame1.getClassName(), frame2.getClassName());
+    areEqual &= Objects.equals(frame1.getMethodName(), frame2.getMethodName());
+
+    return areEqual;
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
index 351af9307a983..e311252021b65 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
@@ -77,7 +77,7 @@ static class TestPipelineRunnerThrowingUserException
     @Override
     public PipelineResult run(Pipeline pipeline) {
       Throwable t = new IllegalStateException("user code exception");
-      throw new UserCodeException(t);
+      throw UserCodeException.wrap(t);
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SimpleDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SimpleDoFnRunnerTest.java
new file mode 100644
index 0000000000000..4af60dbe91259
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/SimpleDoFnRunnerTest.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.hamcrest.Matchers.is;
+import static org.mockito.Mockito.mock;
+
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.util.BaseExecutionContext.StepContext;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests for base @{link DoFnRunnerBase} functionality.
+ */
+@RunWith(JUnit4.class)
+public class SimpleDoFnRunnerTest {
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void testExceptionsWrappedAsUserCodeException() {
+    ThrowingDoFn fn = new ThrowingDoFn();
+    DoFnRunner<String, String> runner = createRunner(fn);
+
+    thrown.expect(UserCodeException.class);
+    thrown.expectCause(is(fn.exceptionToThrow));
+
+    runner.processElement(WindowedValue.valueInGlobalWindow("anyValue"));
+  }
+
+  @Test
+  public void testSystemDoFnInternalExceptionsNotWrapped() {
+    ThrowingSystemDoFn fn = new ThrowingSystemDoFn();
+    DoFnRunner<String, String> runner = createRunner(fn);
+
+    thrown.expect(is(fn.exceptionToThrow));
+
+    runner.processElement(WindowedValue.valueInGlobalWindow("anyValue"));
+  }
+
+  private DoFnRunner<String, String> createRunner(DoFn<String, String> fn) {
+    // Pass in only necessary parameters for the test
+    List<TupleTag<?>> sideOutputTags = Arrays.asList();
+    StepContext context = mock(StepContext.class);
+    return DoFnRunners.simpleRunner(
+          null, fn, null, null, null, sideOutputTags, context, null, null);
+  }
+
+  static class ThrowingDoFn extends DoFn<String, String> {
+    final Exception exceptionToThrow =
+        new UnsupportedOperationException("Expected exception");
+
+    @Override
+    public void processElement(ProcessContext c) throws Exception {
+      throw exceptionToThrow;
+    }
+  }
+
+  @SystemDoFnInternal
+  static class ThrowingSystemDoFn extends ThrowingDoFn {
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/UserCodeExceptionTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/UserCodeExceptionTest.java
new file mode 100644
index 0000000000000..5cf385cf31bd7
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/UserCodeExceptionTest.java
@@ -0,0 +1,176 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.hamcrest.Matchers.instanceOf;
+import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.isA;
+import static org.hamcrest.Matchers.not;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+
+import org.hamcrest.Description;
+import org.hamcrest.FeatureMatcher;
+import org.hamcrest.Matcher;
+import org.hamcrest.TypeSafeMatcher;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+
+/**
+ * Tests for @{link UserCodeException} functionality.
+ */
+@RunWith(JUnit4.class)
+public class UserCodeExceptionTest {
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void existingUserCodeExceptionsNotWrapped() {
+    UserCodeException existing = UserCodeException.wrap(new IOException());
+    UserCodeException wrapped = UserCodeException.wrap(existing);
+
+    assertEquals(existing, wrapped);
+  }
+
+  @Test
+  public void testCauseIsSet() {
+    thrown.expectCause(isA(IOException.class));
+    throwUserCodeException();
+  }
+
+  @Test
+  public void testStackTraceIsTruncatedToUserCode() {
+    thrown.expectCause(hasBottomStackFrame(method("userCode")));
+    throwUserCodeException();
+  }
+
+  @Test
+  public void testStackTraceIsTruncatedProperlyFromHelperMethod() {
+    thrown.expectCause(hasBottomStackFrame(method("userCode")));
+    throwUserCodeExceptionFromHelper();
+  }
+
+  @Test
+  public void testWrapIfOnlyWrapsWhenTrue() {
+    IOException cause = new IOException();
+    RuntimeException wrapped = UserCodeException.wrapIf(true, cause);
+
+    assertThat(wrapped, is(instanceOf(UserCodeException.class)));
+  }
+
+  @Test
+  public void testWrapIfReturnsRuntimeExceptionWhenFalse() {
+    IOException cause = new IOException();
+    RuntimeException wrapped = UserCodeException.wrapIf(false, cause);
+
+    assertThat(wrapped, is(not(instanceOf(UserCodeException.class))));
+    assertEquals(cause, wrapped.getCause());
+  }
+
+  @Test
+  public void testWrapIfReturnsSourceRuntimeExceptionWhenFalse() {
+    RuntimeException runtimeException = new RuntimeException("oh noes!");
+    RuntimeException wrapped = UserCodeException.wrapIf(false, runtimeException);
+
+    assertEquals(runtimeException, wrapped);
+  }
+
+
+  private void throwUserCodeException() {
+    try {
+      userCode();
+    } catch (Exception ex) {
+      throw UserCodeException.wrap(ex);
+    }
+  }
+
+  private void throwUserCodeExceptionFromHelper() {
+    try {
+      userCode();
+    } catch (Exception ex) {
+      throw wrap(ex);
+    }
+  }
+
+  private UserCodeException wrap(Throwable t) {
+    throw UserCodeException.wrap(t);
+  }
+
+  private void userCode() throws IOException {
+    userCode2();
+  }
+
+  private void userCode2() throws IOException {
+    userCode3();
+  }
+
+  private void userCode3() throws IOException {
+    IOException ex = new IOException("User processing error!");
+    throw ex;
+  }
+
+  private static ThrowableBottomStackFrameMethodMatcher hasBottomStackFrame(
+      Matcher<StackTraceElement> frameMatcher) {
+    return new ThrowableBottomStackFrameMethodMatcher(frameMatcher);
+  }
+
+  private static StackFrameMethodMatcher method(String methodName) {
+    return new StackFrameMethodMatcher(is(methodName));
+  }
+
+  static class ThrowableBottomStackFrameMethodMatcher
+    extends FeatureMatcher<Throwable, StackTraceElement> {
+
+    public ThrowableBottomStackFrameMethodMatcher(Matcher<StackTraceElement> subMatcher) {
+      super(subMatcher, "Throwable with bottom stack frame:", "stack frame");
+    }
+
+    @Override
+    protected StackTraceElement featureValueOf(Throwable actual) {
+      StackTraceElement[] stackTrace = actual.getStackTrace();
+      return stackTrace[stackTrace.length - 1];
+    }
+  }
+
+  static class StackFrameMethodMatcher extends TypeSafeMatcher<StackTraceElement> {
+
+    private Matcher<String> methodNameMatcher;
+
+    public StackFrameMethodMatcher(Matcher<String> methodNameMatcher) {
+      this.methodNameMatcher = methodNameMatcher;
+    }
+
+    @Override
+    public void describeTo(Description description) {
+      description.appendText("stack frame where method name ");
+      methodNameMatcher.describeTo(description);
+    }
+
+    @Override
+    protected boolean matchesSafely(StackTraceElement item) {
+      return methodNameMatcher.matches(item.getMethodName());
+    }
+  }
+}
+

From abcbdf4ab63cf21bbeae0cff9a2d21f7657ef793 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 5 Feb 2016 19:25:09 -0800
Subject: [PATCH 1400/1541] Move BatchModeExecutionContext and dependents to
 worker module

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114006425
---
 .../sdk/runners/worker/ConcatReader.java      |  27 +-
 .../runners/worker/ConcatReaderFactory.java   | 163 -------
 .../runners/worker/GroupingShuffleReader.java | 440 ------------------
 .../worker/GroupingShuffleReaderFactory.java  |  95 ----
 ...uffleReaderWithFaultyBytesReadCounter.java |  67 ---
 .../sdk/runners/worker/ReaderRegistry.java    | 161 -------
 .../sdk/util/BatchModeExecutionContext.java   | 146 ------
 .../dataflow/sdk/util/CloudSourceUtils.java   |  21 -
 .../runners/dataflow/CustomSourcesTest.java   | 180 -------
 9 files changed, 18 insertions(+), 1282 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderWithFaultyBytesReadCounter.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderRegistry.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
index f3057c5ce059d..d7c0429d42d65 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
@@ -25,10 +25,13 @@
 import com.google.api.services.dataflow.model.ApproximateSplitRequest;
 import com.google.api.services.dataflow.model.ConcatPosition;
 import com.google.api.services.dataflow.model.Source;
+import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.io.range.OffsetRangeTracker;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.DataflowReaderProgress;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.Serializer;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 import com.google.common.annotations.VisibleForTesting;
@@ -72,20 +75,20 @@ public class ConcatReader<T> extends NativeReader<T> {
   private final ExecutionContext executionContext;
   private final CounterSet.AddCounterMutator addCounterMutator;
   private final String operationName;
-  private final ReaderRegistry registry;
+  private final ReaderFactory readerFactory;
 
   /**
    * Create a {@link ConcatReader} using a given list of encoded {@link Source}s.
    */
   public ConcatReader(
-      ReaderRegistry registry,
+      ReaderFactory readerFactory,
       PipelineOptions options,
       ExecutionContext executionContext,
       CounterSet.AddCounterMutator addCounterMutator,
       String operationName,
       List<Source> sources) {
     Preconditions.checkNotNull(sources);
-    this.registry = registry;
+    this.readerFactory = readerFactory;
     this.sources = sources;
     this.options = options;
     this.executionContext = executionContext;
@@ -100,7 +103,7 @@ public Iterator<Source> getSources() {
   @Override
   public ConcatIterator<T> iterator() throws IOException {
     return new ConcatIterator<T>(
-        registry,
+        readerFactory,
         options,
         executionContext,
         addCounterMutator,
@@ -118,16 +121,16 @@ static class ConcatIterator<T> extends NativeReaderIterator<T> {
     private final CounterSet.AddCounterMutator addCounterMutator;
     private final String operationName;
     private final OffsetRangeTracker rangeTracker;
-    private final ReaderRegistry registry;
+    private final ReaderFactory readerFactory;
 
     public ConcatIterator(
-        ReaderRegistry registry,
+        ReaderFactory readerFactory,
         PipelineOptions options,
         ExecutionContext executionContext,
         CounterSet.AddCounterMutator addCounterMutator,
         String operationName,
         List<Source> sources) {
-      this.registry = registry;
+      this.readerFactory = readerFactory;
       this.sources = sources;
       this.options = options;
       this.executionContext = executionContext;
@@ -168,11 +171,17 @@ public boolean advance() throws IOException {
 
         Source currentSource = sources.get(currentIteratorIndex);
         try {
+          Coder<?> coder = null;
+          if (currentSource.getCodec() != null) {
+            coder = Serializer.deserialize(currentSource.getCodec(), Coder.class);
+          }
           @SuppressWarnings("unchecked")
           NativeReader<T> currentReader =
               (NativeReader<T>)
-                  registry.create(
-                      currentSource, options, executionContext, addCounterMutator, operationName);
+                  readerFactory.create(
+                      CloudObject.fromSpec(currentSource.getSpec()), coder, options,
+                      executionContext, addCounterMutator,
+                      operationName);
           currentIterator = currentReader.iterator();
         } catch (Exception e) {
           throw new IOException("Failed to create a reader for source: " + currentSource, e);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java
deleted file mode 100644
index 05ae2b9b1715f..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReaderFactory.java
+++ /dev/null
@@ -1,163 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.getBoolean;
-import static com.google.cloud.dataflow.sdk.util.Structs.getListOfMaps;
-import static com.google.cloud.dataflow.sdk.util.Structs.getLong;
-import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
-
-import com.google.api.services.dataflow.model.Source;
-import com.google.api.services.dataflow.model.SourceMetadata;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-import javax.annotation.Nullable;
-
-/**
- * Creates an {@link ConcatReader} from a {@link CloudObject} spec.
- */
-public class ConcatReaderFactory implements ReaderFactory {
-
-  private final ReaderRegistry registry;
-
-  private ConcatReaderFactory(ReaderRegistry registry) {
-    this.registry = registry;
-  }
-
-  /**
-   * Returns a new {@link ConcatReaderFactory} that will use the default
-   * {@link ReaderRegistry} to create sub-{@link NativeReader} instances.
-   */
-  public static ConcatReaderFactory withDefaultRegistry() {
-    return withRegistry(ReaderRegistry.defaultRegistry());
-  }
-
-  /**
-   * Returns a new {@link ConcatReaderFactory} that will use the provided
-   * {@link ReaderRegistry} to create sub-{@link NativeReader} instances.
-   */
-  public static ConcatReaderFactory withRegistry(ReaderRegistry registry) {
-    return new ConcatReaderFactory(registry);
-  }
-
-  @Override
-  public NativeReader<?> create(
-      CloudObject spec,
-      @Nullable Coder<?> coder,
-      @Nullable PipelineOptions options,
-      @Nullable ExecutionContext executionContext,
-      @Nullable CounterSet.AddCounterMutator addCounterMutator,
-      @Nullable String operationName)
-      throws Exception {
-    @SuppressWarnings("unchecked")
-    Coder<Object> typedCoder = (Coder<Object>) coder;
-    return createTyped(
-        spec, typedCoder, options, executionContext, addCounterMutator, operationName);
-  }
-
-  public <T> NativeReader<T> createTyped(
-      CloudObject spec,
-      @Nullable Coder<T> coder,
-      @Nullable PipelineOptions options,
-      @Nullable ExecutionContext executionContext,
-      @Nullable CounterSet.AddCounterMutator addCounterMutator,
-      @Nullable String operationName)
-          throws Exception {
-    List<Source> sources = getSubSources(spec);
-    return new ConcatReader<T>(
-        registry, options, executionContext, addCounterMutator, operationName, sources);
-  }
-
-  private static List<Source> getSubSources(CloudObject spec) throws Exception {
-    List<Source> subSources = new ArrayList<>();
-
-    // Get the list of sub-sources.
-    List<Map<String, Object>> subSourceDictionaries =
-        getListOfMaps(spec, PropertyNames.CONCAT_SOURCE_SOURCES, null);
-    if (subSourceDictionaries == null) {
-      return subSources;
-    }
-
-    for (Map<String, Object> subSourceDictionary : subSourceDictionaries) {
-      // Each sub-source is encoded as a dictionary that contains several properties.
-      subSources.add(createSourceFromDictionary(subSourceDictionary));
-    }
-
-    return subSources;
-  }
-
-  public static Source createSourceFromDictionary(Map<String, Object> dictionary) throws Exception {
-    Source source = new Source();
-
-    // Set spec
-    CloudObject subSourceSpec =
-        CloudObject.fromSpec(getObject(dictionary, PropertyNames.SOURCE_SPEC));
-    source.setSpec(subSourceSpec);
-
-    // Set encoding
-    CloudObject subSourceEncoding =
-        CloudObject.fromSpec(getObject(dictionary, PropertyNames.ENCODING, null));
-    if (subSourceEncoding != null) {
-      source.setCodec(subSourceEncoding);
-    }
-
-    // Set base specs
-    List<Map<String, Object>> subSourceBaseSpecs =
-        getListOfMaps(dictionary, PropertyNames.CONCAT_SOURCE_BASE_SPECS, null);
-    if (subSourceBaseSpecs != null) {
-      source.setBaseSpecs(subSourceBaseSpecs);
-    }
-
-    // Set metadata
-    SourceMetadata metadata = new SourceMetadata();
-    Boolean producesSortedKeys =
-        getBoolean(dictionary, PropertyNames.SOURCE_PRODUCES_SORTED_KEYS, null);
-    if (producesSortedKeys != null) {
-      metadata.setProducesSortedKeys(producesSortedKeys);
-    }
-    Boolean infinite = getBoolean(dictionary, PropertyNames.SOURCE_IS_INFINITE, null);
-    if (infinite != null) {
-      metadata.setInfinite(infinite);
-    }
-    Long estimatedSizeBytes = getLong(dictionary, PropertyNames.SOURCE_ESTIMATED_SIZE_BYTES, null);
-    if (estimatedSizeBytes != null) {
-      metadata.setEstimatedSizeBytes(estimatedSizeBytes);
-    }
-    if (producesSortedKeys != null || estimatedSizeBytes != null || infinite != null) {
-      source.setMetadata(metadata);
-    }
-
-    // Set doesNotNeedSplitting
-    Boolean doesNotNeedSplitting =
-        getBoolean(dictionary, PropertyNames.SOURCE_DOES_NOT_NEED_SPLITTING, null);
-    if (doesNotNeedSplitting != null) {
-      source.setDoesNotNeedSplitting(doesNotNeedSplitting);
-    }
-
-    return source;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
deleted file mode 100644
index ce9a626b60fce..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReader.java
+++ /dev/null
@@ -1,440 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.api.client.util.Preconditions.checkNotNull;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateSplitRequest;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.api.services.dataflow.model.ApproximateReportedProgress;
-import com.google.api.services.dataflow.model.ApproximateSplitRequest;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservableIterable;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservableIterator;
-import com.google.cloud.dataflow.sdk.util.common.Reiterable;
-import com.google.cloud.dataflow.sdk.util.common.Reiterator;
-import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.GroupingShuffleEntryIterator;
-import com.google.cloud.dataflow.sdk.util.common.worker.KeyGroupedShuffleEntries;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
-import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.concurrent.atomic.AtomicLong;
-
-import javax.annotation.Nullable;
-
-/**
- * A source that reads from a shuffled dataset and yields key-grouped data.
- *
- * @param <K> the type of the keys read from the shuffle
- * @param <V> the type of the values read from the shuffle
- */
-public class GroupingShuffleReader<K, V> extends NativeReader<WindowedValue<KV<K, Reiterable<V>>>> {
-  private static final Logger LOG = LoggerFactory.getLogger(GroupingShuffleReader.class);
-  public static final String SOURCE_NAME = "GroupingShuffleSource";
-
-  final byte[] shuffleReaderConfig;
-  @Nullable final String startShufflePosition;
-  @Nullable final String stopShufflePosition;
-  final BatchModeExecutionContext executionContext;
-  @Nullable final CounterSet.AddCounterMutator addCounterMutator;
-  @Nullable final String operationName;
-
-  // Counts how many bytes were from by a given operation from a given shuffle session.
-  @Nullable Counter<Long> perOperationPerDatasetBytesCounter;
-  Coder<K> keyCoder;
-  Coder<?> valueCoder;
-  @Nullable Coder<?> secondaryKeyCoder;
-
-  public GroupingShuffleReader(
-      PipelineOptions options,
-      byte[] shuffleReaderConfig,
-      @Nullable String startShufflePosition,
-      @Nullable String stopShufflePosition,
-      Coder<WindowedValue<KV<K, Iterable<V>>>> coder,
-      BatchModeExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      String operationName,
-      boolean valuesAreSorted)
-      throws Exception {
-    this.shuffleReaderConfig = shuffleReaderConfig;
-    this.startShufflePosition = startShufflePosition;
-    this.stopShufflePosition = stopShufflePosition;
-    this.executionContext = executionContext;
-    this.addCounterMutator = addCounterMutator;
-    this.operationName = operationName;
-    initCoder(coder, valuesAreSorted);
-    // We cannot initialize perOperationPerDatasetBytesCounter here, as it
-    // depends on shuffleReaderConfig, which isn't populated yet.
-  }
-
-  private synchronized void initCounter(String datasetId) {
-    if (perOperationPerDatasetBytesCounter == null
-        && addCounterMutator != null
-        && operationName != null) {
-      perOperationPerDatasetBytesCounter =
-          addCounterMutator.addCounter(
-              Counter.longs(
-                  "dax-shuffle-" + datasetId + "-wf-" + operationName + "-read-bytes",
-                  SUM));
-    }
-  }
-
-  @Override
-  protected StateKind getStateSamplerStateKind() {
-    return StateKind.FRAMEWORK;
-  }
-
-  @Override
-  public GroupingShuffleReaderIterator<K, V> iterator() throws IOException {
-    Preconditions.checkArgument(shuffleReaderConfig != null);
-    ApplianceShuffleReader asr =
-        new ApplianceShuffleReader(shuffleReaderConfig, addCounterMutator);
-    String datasetId = asr.getDatasetId();
-    initCounter(datasetId);
-
-    return iterator(new BatchingShuffleEntryReader(
-        new ChunkingShuffleBatchReader(asr)));
-  }
-
-  private void initCoder(Coder<WindowedValue<KV<K, Iterable<V>>>> coder,
-      boolean valuesAreSorted) throws Exception {
-    if (!(coder instanceof WindowedValueCoder)) {
-      throw new Exception("unexpected kind of coder for WindowedValue: " + coder);
-    }
-    Coder<KV<K, Iterable<V>>> elemCoder =
-        ((WindowedValueCoder<KV<K, Iterable<V>>>) coder).getValueCoder();
-    if (!(elemCoder instanceof KvCoder)) {
-      throw new Exception("unexpected kind of coder for elements read from "
-          + "a key-grouping shuffle: " + elemCoder);
-    }
-
-    @SuppressWarnings("unchecked")
-    KvCoder<K, Iterable<V>> kvCoder = (KvCoder<K, Iterable<V>>) elemCoder;
-    this.keyCoder = kvCoder.getKeyCoder();
-    Coder<Iterable<V>> kvValueCoder = kvCoder.getValueCoder();
-    if (!(kvValueCoder instanceof IterableCoder)) {
-      throw new Exception("unexpected kind of coder for values of KVs read from "
-          + "a key-grouping shuffle");
-    }
-    IterableCoder<V> iterCoder = (IterableCoder<V>) kvValueCoder;
-    if (valuesAreSorted) {
-      checkState(iterCoder.getElemCoder() instanceof KvCoder,
-          "unexpected kind of coder for elements read from a "
-          + "key-grouping value sorting shuffle: %s", iterCoder.getElemCoder());
-      @SuppressWarnings("rawtypes")
-      KvCoder<?, ?> valueKvCoder = (KvCoder) iterCoder.getElemCoder();
-      this.secondaryKeyCoder = valueKvCoder.getKeyCoder();
-      this.valueCoder = valueKvCoder.getValueCoder();
-    } else {
-      this.valueCoder = iterCoder.getElemCoder();
-    }
-  }
-
-  final GroupingShuffleReaderIterator<K, V> iterator(ShuffleEntryReader reader) {
-    return new GroupingShuffleReaderIterator<K, V>(this, reader);
-  }
-
-  /**
-   * A ReaderIterator that reads from a ShuffleEntryReader and groups
-   * all the values with the same key.
-   *
-   * <p>A key limitation of this implementation is that all iterator accesses
-   * must by externally synchronized (the iterator objects are not individually
-   * thread-safe, and the iterators derived from a single original iterator
-   * access shared state that is not thread-safe).
-   *
-   * <p>To access the current position, the iterator must advance
-   * on-demand and cache the next batch of key grouped shuffle
-   * entries. The iterator does not advance a second time in @next()
-   * to avoid asking the underlying iterator to advance to the next
-   * key before the caller/user iterates over the values corresponding
-   * to the current key, which would introduce a performance
-   * penalty.
-   */
-  @VisibleForTesting
-  static final class GroupingShuffleReaderIterator<K, V>
-      extends LegacyReaderIterator<WindowedValue<KV<K, Reiterable<V>>>> {
-    // The enclosing GroupingShuffleReader.
-    private final GroupingShuffleReader<K, V> parentReader;
-
-    /** The iterator over shuffle entries, grouped by common key. */
-    private final Iterator<KeyGroupedShuffleEntries> groups;
-
-    private final GroupingShuffleRangeTracker rangeTracker;
-    private ByteArrayShufflePosition lastGroupStart;
-
-    /** The next group to be consumed, if available. */
-    private KeyGroupedShuffleEntries currentGroup = null;
-    private final AtomicLong currentGroupSize = new AtomicLong(0L);
-
-    protected StateSampler stateSampler = null;
-    protected int readState;
-
-    public GroupingShuffleReaderIterator(
-        final GroupingShuffleReader<K, V> parentReader, ShuffleEntryReader entryReader) {
-      this.parentReader = parentReader;
-      if (parentReader.stateSampler == null) {
-        // This code path is only used in tests.
-        CounterSet counterSet = new CounterSet();
-        this.stateSampler = new StateSampler("local", counterSet.getAddCounterMutator());
-        this.readState = stateSampler.stateForName("shuffle", StateSampler.StateKind.FRAMEWORK);
-      } else {
-        checkNotNull(parentReader.stateSamplerOperationName);
-        this.stateSampler = parentReader.stateSampler;
-        this.readState = stateSampler.stateForName(
-            parentReader.stateSamplerOperationName + "-process",
-            StateSampler.StateKind.FRAMEWORK);
-      }
-
-      this.rangeTracker =
-          new GroupingShuffleRangeTracker(
-              ByteArrayShufflePosition.fromBase64(parentReader.startShufflePosition),
-              ByteArrayShufflePosition.fromBase64(parentReader.stopShufflePosition));
-      try (StateSampler.ScopedState read = stateSampler.scopedState(readState)) {
-        this.groups =
-            new GroupingShuffleEntryIterator(
-                entryReader.read(rangeTracker.getStartPosition(), rangeTracker.getStopPosition()),
-                parentReader.perOperationPerDatasetBytesCounter) {
-              @Override
-              protected void notifyElementRead(long byteSize) {
-                // We accumulate the sum of bytes read in a local variable. This sum will be counted
-                // when the values are actually read by the consumer of the shuffle reader.
-                currentGroupSize.addAndGet(byteSize);
-                parentReader.notifyElementRead(byteSize);
-              }
-            };
-      }
-    }
-
-    @Override
-    protected boolean hasNextImpl() throws IOException {
-      try (StateSampler.ScopedState read = stateSampler.scopedState(readState)) {
-        if (!groups.hasNext()) {
-          return false;
-        }
-        currentGroup = groups.next();
-      }
-      ByteArrayShufflePosition groupStart = ByteArrayShufflePosition.of(currentGroup.position);
-      boolean isAtSplitPoint = (lastGroupStart == null) || (!groupStart.equals(lastGroupStart));
-      lastGroupStart = groupStart;
-      return rangeTracker.tryReturnRecordAt(isAtSplitPoint, groupStart);
-    }
-
-    @Override
-    protected WindowedValue<KV<K, Reiterable<V>>> nextImpl() throws IOException {
-      K key = CoderUtils.decodeFromByteArray(parentReader.keyCoder, currentGroup.key);
-      if (parentReader.executionContext != null) {
-        parentReader.executionContext.setKey(key);
-      }
-
-      KeyGroupedShuffleEntries group = currentGroup;
-      currentGroup = null;
-      return WindowedValue.valueInEmptyWindows(
-          KV.<K, Reiterable<V>>of(key, new ValuesIterable(group.values)));
-    }
-
-    @Override
-    public double getRemainingParallelism() {
-      // Return 1 iff the stop position <= the lexicographic successor to the current position.
-      ByteArrayShufflePosition stopPosition = rangeTracker.getStopPosition();
-      if (stopPosition != null
-          && lastGroupStart != null
-          && stopPosition.compareTo(lastGroupStart.immediateSuccessor()) <= 0) {
-        return 1;
-      } else {
-        return Double.POSITIVE_INFINITY;
-      }
-    }
-
-    /**
-     * Returns the position before the next {@code KV<K, Reiterable<V>>} to be returned by the
-     * {@link GroupingShuffleReaderIterator}. Returns null if the
-     * {@link GroupingShuffleReaderIterator} is finished.
-     */
-    @Override
-    public Progress getProgress() {
-      com.google.api.services.dataflow.model.Position position =
-          new com.google.api.services.dataflow.model.Position();
-      ApproximateReportedProgress progress = new ApproximateReportedProgress();
-      ByteArrayShufflePosition groupStart = rangeTracker.getLastGroupStart();
-      if (groupStart != null) {
-        position.setShufflePosition(groupStart.encodeBase64());
-        progress.setPosition(position);
-      }
-      return cloudProgressToReaderProgress(progress);
-    }
-
-    /**
-     * Updates the stop position of the shuffle source to the position proposed. Ignores the
-     * proposed stop position if it is smaller than or equal to the position before the next
-     * {@code KV<K, Reiterable<V>>} to be returned by the {@link GroupingShuffleReaderIterator}.
-     */
-    @Override
-    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
-      checkNotNull(splitRequest);
-      ApproximateSplitRequest splitProgress = splitRequestToApproximateSplitRequest(
-          splitRequest);
-      com.google.api.services.dataflow.model.Position splitPosition = splitProgress.getPosition();
-      if (splitPosition == null) {
-        LOG.warn("GroupingShuffleReader only supports split at a Position. Requested: {}",
-            splitRequest);
-        return null;
-      }
-      String splitShufflePosition = splitPosition.getShufflePosition();
-      if (splitShufflePosition == null) {
-        LOG.warn("GroupingShuffleReader only supports split at a shuffle position. Requested: {}",
-            splitPosition);
-        return null;
-      }
-      ByteArrayShufflePosition newStopPosition =
-          ByteArrayShufflePosition.fromBase64(splitShufflePosition);
-      if (rangeTracker.trySplitAtPosition(newStopPosition)) {
-        LOG.info(
-            "Split GroupingShuffleReader at {}, now {}",
-            newStopPosition.encodeBase64(),
-            rangeTracker);
-        return new DynamicSplitResultWithPosition(cloudPositionToReaderPosition(splitPosition));
-      } else {
-        LOG.info(
-            "Refused to split GroupingShuffleReader {} at {}",
-            rangeTracker,
-            newStopPosition.encodeBase64());
-        return null;
-      }
-    }
-
-    /**
-     * Provides the {@link Reiterable} used to iterate through the values part
-     * of a {@code KV<K, Reiterable<V>>} entry produced by a
-     * {@link GroupingShuffleReader}.
-     */
-    private final class ValuesIterable extends ElementByteSizeObservableIterable<V, ValuesIterator>
-        implements Reiterable<V> {
-      // N.B. This class is *not* static; it uses the valueCoder from
-      // its enclosing GroupingShuffleReader.
-
-      private final Reiterable<ShuffleEntry> base;
-
-      public ValuesIterable(Reiterable<ShuffleEntry> base) {
-        this.base = checkNotNull(base);
-      }
-
-      @Override
-      public ValuesIterator iterator() {
-        return new ValuesIterator(base.iterator());
-      }
-
-      @Override
-      protected ValuesIterator createIterator() {
-        return iterator();
-      }
-    }
-
-    /**
-     * Provides the {@link Reiterator} used to iterate through the values part
-     * of a {@code KV<K, Reiterable<V>>} entry produced by a
-     * {@link GroupingShuffleReader}.
-     */
-    private final class ValuesIterator extends ElementByteSizeObservableIterator<V>
-        implements Reiterator<V> {
-      // N.B. This class is *not* static; it uses the valueCoder from
-      // its enclosing GroupingShuffleReader.
-
-      private final Reiterator<ShuffleEntry> base;
-
-      public ValuesIterator(Reiterator<ShuffleEntry> base) {
-        this.base = checkNotNull(base);
-      }
-
-      @Override
-      public boolean hasNext() {
-        try (StateSampler.ScopedState read =
-            GroupingShuffleReaderIterator.this.stateSampler.scopedState(
-                GroupingShuffleReaderIterator.this.readState)) {
-          return base.hasNext();
-        }
-      }
-
-      @Override
-      public V next() {
-        try (StateSampler.ScopedState read =
-            GroupingShuffleReaderIterator.this.stateSampler.scopedState(
-                GroupingShuffleReaderIterator.this.readState)) {
-          ShuffleEntry entry = base.next();
-
-          // The shuffle entries are handed over to the consumer of this iterator. Therefore, we can
-          // notify the bytes that have been read so far.
-          notifyValueReturned(currentGroupSize.getAndSet(0L));
-          try {
-            if (parentReader.secondaryKeyCoder != null) {
-              ByteArrayInputStream bais = new ByteArrayInputStream(entry.getSecondaryKey());
-              @SuppressWarnings("unchecked")
-              V value = (V) KV.of(
-                  // We ignore decoding the timestamp.
-                  parentReader.secondaryKeyCoder.decode(bais, Context.NESTED),
-                  CoderUtils.decodeFromByteArray(parentReader.valueCoder, entry.getValue()));
-              return value;
-            } else {
-              @SuppressWarnings("unchecked")
-              V value = (V) CoderUtils.decodeFromByteArray(parentReader.valueCoder,
-                                                           entry.getValue());
-              return value;
-            }
-          } catch (IOException exn) {
-            throw new RuntimeException(exn);
-          }
-        }
-      }
-
-      @Override
-      public void remove() {
-        base.remove();
-      }
-
-      @Override
-      public ValuesIterator copy() {
-        return new ValuesIterator(base.copy());
-      }
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
deleted file mode 100644
index a0f1c9d73a364..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderFactory.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.api.client.util.Base64.decodeBase64;
-import static com.google.cloud.dataflow.sdk.util.Structs.getBoolean;
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.cloud.dataflow.sdk.values.KV;
-
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-/**
- * Creates a GroupingShuffleReader from a CloudObject spec.
- */
-public class GroupingShuffleReaderFactory implements ReaderFactory {
-  @Override
-  public NativeReader<?> create(
-      CloudObject spec,
-      @Nullable Coder<?> coder,
-      @Nullable PipelineOptions options,
-      @Nullable ExecutionContext executionContext,
-      @Nullable CounterSet.AddCounterMutator addCounterMutator,
-      @Nullable String operationName)
-          throws Exception {
-
-    @SuppressWarnings({"rawtypes", "unchecked"})
-    Coder<WindowedValue<KV<Object, Iterable<Object>>>> typedCoder = (Coder) coder;
-    return createTyped(
-        spec, typedCoder, options, executionContext, addCounterMutator, operationName);
-  }
-
-  public <K, V> GroupingShuffleReader<K, V> createTyped(
-      CloudObject spec,
-      @Nullable Coder<WindowedValue<KV<K, Iterable<V>>>> coder,
-      @Nullable PipelineOptions options,
-      @Nullable ExecutionContext executionContext,
-      @Nullable CounterSet.AddCounterMutator addCounterMutator,
-      @Nullable String operationName)
-          throws Exception {
-    if (shouldUseGroupingShuffleReaderWithFaultyBytesReadCounter(options)) {
-      return new GroupingShuffleReaderWithFaultyBytesReadCounter<K, V>(options,
-          decodeBase64(getString(spec, PropertyNames.SHUFFLE_READER_CONFIG)),
-          getString(spec, PropertyNames.START_SHUFFLE_POSITION, null),
-          getString(spec, PropertyNames.END_SHUFFLE_POSITION, null), coder,
-          (BatchModeExecutionContext) executionContext, addCounterMutator, operationName,
-          getBoolean(spec, PropertyNames.SORT_VALUES, false));
-    }
-
-    return new GroupingShuffleReader<K, V>(options,
-        decodeBase64(getString(spec, PropertyNames.SHUFFLE_READER_CONFIG)),
-        getString(spec, PropertyNames.START_SHUFFLE_POSITION, null),
-        getString(spec, PropertyNames.END_SHUFFLE_POSITION, null),
-        coder,
-        (BatchModeExecutionContext) executionContext,
-        addCounterMutator, operationName,
-        getBoolean(spec, PropertyNames.SORT_VALUES, false));
-  }
-
-  /**
-   * Returns true if we should inject errors in the shuffle read bytes
-   * counter for testing.
-   */
-  private static boolean shouldUseGroupingShuffleReaderWithFaultyBytesReadCounter(
-      PipelineOptions options) {
-    List<String> experiments = options.as(DataflowPipelineDebugOptions.class).getExperiments();
-    return (experiments != null) && experiments.contains("inject_shuffle_read_count_error");
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderWithFaultyBytesReadCounter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderWithFaultyBytesReadCounter.java
deleted file mode 100644
index d4a4170f726b1..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleReaderWithFaultyBytesReadCounter.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.BatchModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.annotations.VisibleForTesting;
-
-import java.io.IOException;
-
-import javax.annotation.Nullable;
-
-/**
- * A source that reads from a shuffled dataset and yields key-grouped
- * data.  Like {@link GroupingShuffleReader}, except that it injects
- * an error in the counter tracking how many bytes are read.  This
- * class is solely used to test the shuffle sanity check mechanism.
- *
- * @param <K> the type of the keys read from the shuffle
- * @param <V> the type of the values read from the shuffle
- */
-@VisibleForTesting
-class GroupingShuffleReaderWithFaultyBytesReadCounter<K, V> extends GroupingShuffleReader<K, V> {
-  public GroupingShuffleReaderWithFaultyBytesReadCounter(
-      PipelineOptions options,
-      byte[] shuffleReaderConfig,
-      @Nullable String startShufflePosition,
-      @Nullable String stopShufflePosition,
-      Coder<WindowedValue<KV<K, Iterable<V>>>> coder,
-      BatchModeExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      String operationName,
-      boolean sortValues)
-      throws Exception {
-    super(options, shuffleReaderConfig, startShufflePosition, stopShufflePosition, coder,
-        executionContext, addCounterMutator, operationName, sortValues);
-  }
-
-  @Override
-  public GroupingShuffleReaderIterator<K, V> iterator() throws IOException {
-    // This causes perOperationPerDatasetBytesCounter to be initialized.
-    GroupingShuffleReaderIterator<K, V> it = super.iterator();
-
-    // Inject an error in the counter tracking how many bytes are read
-    // from this reader's data source.
-    perOperationPerDatasetBytesCounter.addValue(1L);
-    return it;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderRegistry.java
deleted file mode 100644
index a74db55fb508b..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderRegistry.java
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.api.services.dataflow.model.Source;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.Serializer;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.common.collect.Maps;
-
-import java.util.Map;
-
-import javax.annotation.Nullable;
-
-/**
- * An immutable registry from {@link String} identifiers (provided to the worker by the Dataflow
- * service) to appropriate {@link ReaderFactory} instances.
- */
-public class ReaderRegistry implements ReaderFactory {
-
-  /**
-   * A {@link ReaderRegistry} with each {@link ReaderFactory} known to the Dataflow worker already
-   * registered.
-   */
-  public static ReaderRegistry defaultRegistry() {
-    Map<String, ReaderFactory> factories = Maps.newHashMap();
-
-    factories.put("TextSource", TextReaderFactory.getInstance());
-    factories.put("AvroSource", new AvroReaderFactory());
-    factories.put("BigQueryAvroSource", new BigQueryAvroReaderFactory());
-    factories.put("UngroupedShuffleSource", new UngroupedShuffleReaderFactory());
-    factories.put("PartitioningShuffleSource", new PartitioningShuffleReaderFactory());
-    factories.put("GroupingShuffleSource", new GroupingShuffleReaderFactory());
-    factories.put("InMemorySource", new InMemoryReaderFactory());
-    factories.put("BigQuerySource", new BigQueryReaderFactory());
-
-    // Aliases for WindowingWindmillreader
-    factories.put("WindowingWindmillReader", new WindowingWindmillReader.Factory());
-    factories.put("com.google.cloud.dataflow.sdk.runners.worker.WindowingWindmillReader",
-        new WindowingWindmillReader.Factory());
-    factories.put("com.google.cloud.dataflow.sdk.runners.worker.BucketingWindmillSource",
-        new WindowingWindmillReader.Factory());
-
-    // Aliases for UngroupedWindmillReader
-    factories.put("UngroupedWindmillReader", new UngroupedWindmillReader.Factory());
-    factories.put("com.google.cloud.dataflow.sdk.runners.worker.UngroupedWindmillSource",
-        new UngroupedWindmillReader.Factory());
-    factories.put("com.google.cloud.dataflow.sdk.runners.worker.UngroupedWindmillReader",
-        new UngroupedWindmillReader.Factory());
-
-    // Aliases for PubsubReader
-    factories.put("PubsubReader", new PubsubReader.Factory());
-    factories.put("com.google.cloud.dataflow.sdk.runners.worker.PubsubSource",
-        new PubsubReader.Factory());
-
-    // Custom sources
-    factories.put(
-        "com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources",
-        new WorkerCustomSources.Factory());
-    factories.put(
-        "com.google.cloud.dataflow.sdk.runners.dataflow.WorkerCustomSources",
-        new WorkerCustomSources.Factory());
-
-    return new ReaderRegistry(factories);
-  }
-
-  /**
-   * Builds a new {@link ReaderRegistry} with the provided mutable map of initial mappings.
-   *
-   * <p>Owns and mutates the provided map, which must be mutable. This constructor should only be
-   * called by methods in this class that are aware of this requirement and abstract from this
-   * behavior.
-   */
-  private ReaderRegistry(Map<String, ReaderFactory> factories) {
-    // ConcatReader requires special treatment: Recursive access to the registry since it calls
-    // back to create its sub-readers lazily.
-    this.factories = factories;
-    this.factories.put(ConcatReader.SOURCE_NAME, ConcatReaderFactory.withRegistry(this));
-  }
-
-  /**
-   * A map from the short names of predefined sources to the associated {@link ReaderFactory}.
-   */
-  private final Map<String, ReaderFactory> factories;
-
-  /**
-   * Returns a new {@link ReaderRegistry} with the provided identifier associated with the
-   * provided {@link ReaderFactory}, overriding any existing binding for that identifier.
-   */
-  public ReaderRegistry register(String readerSpecType, ReaderFactory factory) {
-    Map<String, ReaderFactory> newFactories = Maps.newHashMap();
-    newFactories.putAll(factories);
-    newFactories.put(readerSpecType, factory);
-    return new ReaderRegistry(newFactories);
-  }
-
-  /**
-   * Creates a {@link NativeReader} according to the provided {@code sourceSpec},
-   * by dispatching on the type of {@link CloudObject} to instantiate.
-   */
-  @Override
-  public NativeReader<?> create(
-      CloudObject sourceSpec,
-      @Nullable Coder<?> coder,
-      @Nullable PipelineOptions options,
-      @Nullable ExecutionContext executionContext,
-      @Nullable CounterSet.AddCounterMutator addCounterMutator,
-      @Nullable String operationName)
-          throws Exception {
-
-    String objClassName = sourceSpec.getClassName();
-    ReaderFactory readerFactory = factories.get(objClassName);
-    if (readerFactory == null) {
-      throw new IllegalArgumentException(String.format(
-          "Unable to create a Reader: Unknown Reader type in Source specification: %s",
-          objClassName));
-    }
-    return readerFactory.create(
-        sourceSpec, coder, options, executionContext, addCounterMutator, operationName);
-  }
-
-  /**
-   * Creates a {@link NativeReader} from a Dataflow API {@link Source} specification, using the
-   * {@link Coder} contained in the {@link Source} specification.
-   */
-  public NativeReader<?> create(
-      Source cloudSource,
-      @Nullable PipelineOptions options,
-      @Nullable ExecutionContext executionContext,
-      @Nullable CounterSet.AddCounterMutator addCounterMutator,
-      @Nullable String operationName)
-          throws Exception {
-
-    cloudSource = CloudSourceUtils.flattenBaseSpecs(cloudSource);
-    CloudObject sourceSpec = CloudObject.fromSpec(cloudSource.getSpec());
-    Coder<?> coder = null;
-    if (cloudSource.getCodec() != null) {
-      coder = Serializer.deserialize(cloudSource.getCodec(), Coder.class);
-    }
-    return create(sourceSpec, coder, options, executionContext, addCounterMutator, operationName);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
deleted file mode 100644
index 29980bd0154e4..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchModeExecutionContext.java
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.services.dataflow.model.SideInputInfo;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.worker.DataflowExecutionContext;
-import com.google.cloud.dataflow.sdk.runners.worker.DataflowSideInputReader;
-import com.google.cloud.dataflow.sdk.runners.worker.ReaderFactory;
-import com.google.cloud.dataflow.sdk.runners.worker.ReaderRegistry;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals;
-import com.google.cloud.dataflow.sdk.util.state.StateInternals;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-
-import java.util.Objects;
-
-/**
- * {@link ExecutionContext} for use in batch mode.
- */
-public class BatchModeExecutionContext
-    extends DataflowExecutionContext<BatchModeExecutionContext.StepContext> {
-  private Object key;
-
-  private final PipelineOptions options;
-  private final ReaderFactory readerFactory;
-
-  protected BatchModeExecutionContext(PipelineOptions options, ReaderFactory readerFactory) {
-    this.options = options;
-    this.readerFactory = readerFactory;
-  }
-
-  /**
-   * Returns a {@link BatchModeExecutionContext} configured according to default
-   * pipeline options.
-   */
-  public static BatchModeExecutionContext withDefaultOptions() {
-    return new BatchModeExecutionContext(
-        PipelineOptionsFactory.create(),
-        ReaderRegistry.defaultRegistry());
-  }
-
-  /**
-   * Returns a {@link BatchModeExecutionContext} configured according to the provided options.
-   */
-  public static BatchModeExecutionContext fromOptions(PipelineOptions options) {
-    return new BatchModeExecutionContext(options, ReaderRegistry.defaultRegistry());
-  }
-
-  /**
-   * Create a new {@link ExecutionContext.StepContext}.
-   */
-  @Override
-  protected StepContext createStepContext(
-      String stepName, String transformName, StateSampler stateSampler) {
-    return new StepContext(stepName, transformName);
-  }
-
-  /**
-   * Sets the key of the work currently being processed.
-   */
-  public void setKey(Object key) {
-    if (!Objects.equals(key, this.key)) {
-      switchStateKey(key);
-    }
-
-    this.key = key;
-  }
-
-  /**
-   * @param newKey the key being switched to
-   */
-  protected void switchStateKey(Object newKey) {
-    // When the key changes, we clear out the in-memory state stored in the step contexts.
-    // In BatchMode a specific key is only processed in a single chunk
-    // because the state is either used after a GroupByKeyOnly where
-    // each key only occurs once, or after some ParDo's that preserved
-    // the key.
-    for (ExecutionContext.StepContext stepContext : getAllStepContexts()) {
-      InMemoryStateInternals stateInternals =
-          (InMemoryStateInternals) stepContext.stateInternals();
-      stateInternals.clear();
-    }
-  }
-
-  /**
-   * Returns the key of the work currently being processed.
-   *
-   * <p>If there is not a currently defined key, returns null.
-   */
-  public Object getKey() {
-    return key;
-  }
-
-  @Override
-  protected SideInputReader getSideInputReader(
-      Iterable<? extends SideInputInfo> sideInputInfos) throws Exception {
-    return DataflowSideInputReader.of(
-        sideInputInfos, readerFactory, options, this);
-  }
-
-  @Override
-  protected SideInputReader getSideInputReaderForViews(
-      Iterable<? extends PCollectionView<?>> views) throws Exception {
-    throw new UnsupportedOperationException(
-        "BatchModeExecutionContext.withoutSideInputs().getSideInputReaderForViews(...)");
-  }
-
-
-  /**
-   * {@link ExecutionContext.StepContext} used in batch mode.
-   */
-  public class StepContext extends BaseExecutionContext.StepContext {
-
-    private final InMemoryStateInternals stateInternals = new InMemoryStateInternals();
-
-    private StepContext(String stepName, String transformName) {
-      super(BatchModeExecutionContext.this, stepName, transformName);
-    }
-
-    @Override
-    public StateInternals stateInternals() {
-      return stateInternals;
-    }
-
-    @Override
-    public TimerInternals timerInternals() {
-      throw new UnsupportedOperationException("Batch mode cannot return timerInternals");
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
index 616fbe1b39512..af3055521f767 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
@@ -17,13 +17,9 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.api.services.dataflow.model.Source;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.worker.ReaderRegistry;
-import com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils;
 import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
 
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 
 /**
@@ -52,21 +48,4 @@ public static Source flattenBaseSpecs(Source source) {
     result.setBaseSpecs(null);
     return result;
   }
-
-  /**
-   * Creates a {@link NativeReader} from the given Dataflow Source API definition and
-   * reads all elements from it.
-   */
-
-  public static <T> List<T> readElemsFromSource(PipelineOptions options, Source source) {
-    try {
-      @SuppressWarnings("unchecked")
-      NativeReader<T> reader =
-          (NativeReader<T>)
-              ReaderRegistry.defaultRegistry().create(source, options, null, null, null);
-      return ReaderUtils.readAllFromReader(reader);
-    } catch (Exception e) {
-      throw new RuntimeException("Failed to read from source: " + source.toString(), e);
-    }
-  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
index fe350aad60be4..e09251b6a977a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSourcesTest.java
@@ -16,24 +16,14 @@
 
 package com.google.cloud.dataflow.sdk.runners.dataflow;
 import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.readFromSource;
-import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
-import static com.google.cloud.dataflow.sdk.util.Structs.getObject;
-import static com.google.common.base.Throwables.getStackTraceAsString;
 import static org.hamcrest.MatcherAssert.assertThat;
-import static org.hamcrest.Matchers.allOf;
 import static org.hamcrest.Matchers.contains;
-import static org.hamcrest.Matchers.containsString;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
 
-import com.google.api.services.dataflow.model.DataflowPackage;
-import com.google.api.services.dataflow.model.Job;
-import com.google.api.services.dataflow.model.Source;
-import com.google.api.services.dataflow.model.Step;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -42,8 +32,6 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipeline;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
@@ -52,11 +40,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.CloudSourceUtils;
-import com.google.cloud.dataflow.sdk.util.NoopPathValidator;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.TestCredential;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.Preconditions;
 
@@ -70,10 +53,7 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
-import java.util.Map;
-import java.util.NoSuchElementException;
 
 /**
  * Tests for {@link CustomSources}.
@@ -290,164 +270,4 @@ public void testRangeProgressAndSplitAtFraction() throws Exception {
       assertFalse(reader.advance());
     }
   }
-
-  /**
-   * A source that cannot do anything. Intended to be overridden for testing of individual methods.
-   */
-  private static class MockSource extends BoundedSource<Integer> {
-    @Override
-    public List<? extends BoundedSource<Integer>> splitIntoBundles(
-        long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
-      return Arrays.asList(this);
-    }
-
-    @Override
-    public void validate() { }
-
-    @Override
-    public boolean producesSortedKeys(PipelineOptions options) {
-      return false;
-    }
-
-    @Override
-    public long getEstimatedSizeBytes(PipelineOptions options) {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public BoundedReader<Integer> createReader(PipelineOptions options) throws IOException {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public String toString() {
-      return "<unknown>";
-    }
-
-    @Override
-    public Coder<Integer> getDefaultOutputCoder() {
-      return BigEndianIntegerCoder.of();
-    }
-  }
-
-  private static class FailingReader extends BoundedSource.BoundedReader<Integer> {
-    private BoundedSource<Integer> source;
-
-    private FailingReader(BoundedSource<Integer> source) {
-      this.source = source;
-    }
-
-    @Override
-    public BoundedSource<Integer> getCurrentSource() {
-      return source;
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      throw new IOException("Intentional error");
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      throw new IllegalStateException("Should have failed in start()");
-    }
-
-    @Override
-    public Integer getCurrent() throws NoSuchElementException {
-      throw new IllegalStateException("Should have failed in start()");
-    }
-
-    @Override
-    public Instant getCurrentTimestamp() throws NoSuchElementException {
-      throw new IllegalStateException("Should have failed in start()");
-    }
-
-    @Override
-    public void close() throws IOException {}
-
-    @Override
-    public Double getFractionConsumed() {
-      return null;
-    }
-
-    @Override
-    public BoundedSource<Integer> splitAtFraction(double fraction) {
-      return null;
-    }
-  }
-
-  private static class SourceProducingFailingReader extends MockSource {
-    @Override
-    public BoundedReader<Integer> createReader(PipelineOptions options) throws IOException {
-      return new FailingReader(this);
-    }
-
-    @Override
-    public String toString() {
-      return "Some description";
-    }
-  }
-
-  @Test
-  public void testFailureToStartReadingIncludesSourceDetails() throws Exception {
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
-    Source source = translateIOToCloudSource(new SourceProducingFailingReader(), options);
-    // Unfortunately Hamcrest doesn't have a matcher that can match on the exception's
-    // printStackTrace(), however we just want to verify that the error and source description
-    // would be contained in the exception *somewhere*, not necessarily in the top-level
-    // Exception object. So instead we use Throwables.getStackTraceAsString and match on that.
-    try {
-      CloudSourceUtils.readElemsFromSource(options, source);
-      fail("Expected to fail");
-    } catch (Exception e) {
-      assertThat(
-          getStackTraceAsString(e),
-          allOf(containsString("Intentional error"), containsString("Some description")));
-    }
-  }
-
-  private static Source translateIOToCloudSource(
-      BoundedSource<?> io, DataflowPipelineOptions options) throws Exception {
-    options.setProject("test-project");
-    options.setTempLocation("gs://test-tmp");
-    options.setPathValidatorClass(NoopPathValidator.class);
-    options.setGcpCredential(new TestCredential());
-
-    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
-    DataflowPipeline p = DataflowPipeline.create(options);
-    p.begin().apply(Read.from(io));
-
-    Job workflow = translator.translate(
-        p, p.getRunner(), new ArrayList<DataflowPackage>()).getJob();
-    Step step = workflow.getSteps().get(0);
-
-    return stepToCloudSource(step);
-  }
-
-  private static Source stepToCloudSource(Step step)
-      throws Exception {
-    Source res = dictionaryToCloudSource(
-        getDictionary(step.getProperties(), PropertyNames.SOURCE_STEP_INPUT));
-    // Encoding is specified in the step, not in the source itself.  This is
-    // normal: incoming Dataflow API Source objects in map tasks will have the
-    // encoding filled in from the step's output encoding.
-    @SuppressWarnings("unchecked")
-    List<Map<String, Object>> outputInfo =
-        (List<Map<String, Object>>) step.getProperties().get(PropertyNames.OUTPUT_INFO);
-
-    CloudObject encoding = CloudObject.fromSpec(getObject(outputInfo.get(0),
-        PropertyNames.ENCODING));
-    res.setCodec(encoding);
-    return res;
-  }
-
-  // Duplicated from runners.worker.SourceTranslationUtils to break dependency on worker
-  private static Source dictionaryToCloudSource(Map<String, Object> params) throws Exception {
-    Source res = new Source();
-    res.setSpec(getDictionary(params, PropertyNames.SOURCE_SPEC));
-    // SOURCE_METADATA and SOURCE_DOES_NOT_NEED_SPLITTING do not have to be
-    // translated, because they only make sense in cloud Source objects produced by the user.
-    return res;
-  }
 }

From 64eefa348fae6816e80c2d6033cceea051c3a4fd Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Fri, 5 Feb 2016 19:54:18 -0800
Subject: [PATCH 1401/1541] Handle SynchronizedProcessingTime in
 InMemoryWatermarkManager

Bundles carry their time-of-commit Synchronized processing time.

Split Bundle into a pre/post commit dichotomy, in order to enforce
either write-only (from the producer perspective) and read-only (from
the consumer perspective).

Processing Time Timers will be implemented primarily on top of the new
SynchronizedProcessingTime watermarks. This will be alongside
implementation of ProcessingTime-based Synchronized Processing Time
holds.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114007176
---
 .../util/InMemoryWatermarkManager.java        | 261 ++++++++++++++----
 .../util/InMemoryWatermarkManagerTest.java    | 206 ++++++++++++--
 2 files changed, 392 insertions(+), 75 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
index 417b33b3c57eb..96a8f5bdb31ac 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
@@ -33,6 +33,7 @@
 
 import java.util.Collection;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicReference;
@@ -147,6 +148,19 @@ public boolean isAdvanced() {
       return advanced;
     }
 
+    /**
+     * Returns the {@link WatermarkUpdate} that is a result of combining the two watermark updates.
+     *
+     * If either of the input {@link WatermarkUpdate WatermarkUpdates} were advanced, the result
+     * {@link WatermarkUpdate} has been advanced.
+     */
+    public WatermarkUpdate union(WatermarkUpdate that) {
+      if (this.advanced) {
+        return this;
+      }
+      return that;
+    }
+
     /**
      * Returns the {@link WatermarkUpdate} based on the former and current
      * {@link Instant timestamps}.
@@ -302,22 +316,119 @@ public String toString() {
     }
   }
 
-  /**
-   * A {@code Watermark} that is after the latest time it is possible to represent in the global
-   * window. This is a distinguished value representing a complete {@link PTransform}.
-   */
-  private static final Watermark THE_END_OF_TIME = new Watermark() {
+  private static class SynchronizedProcessingTimeInputWatermark implements Watermark {
+    private final Collection<? extends Watermark> inputWms;
+    private final Collection<CommittedBundle<?>> pendingBundles;
+
+    private AtomicReference<Instant> earliestHold;
+
+    public SynchronizedProcessingTimeInputWatermark(Collection<? extends Watermark> inputWms) {
+      this.inputWms = inputWms;
+      this.pendingBundles = new HashSet<>();
+      Instant initialHold = BoundedWindow.TIMESTAMP_MAX_VALUE;
+      for (Watermark wm : inputWms) {
+        initialHold = INSTANT_ORDERING.min(initialHold, wm.get());
+      }
+      earliestHold = new AtomicReference<>(initialHold);
+    }
+
+    @Override
+    public Instant get() {
+      return earliestHold.get();
+    }
+
+    @Override
+    public synchronized WatermarkUpdate refresh() {
+      Instant oldHold = earliestHold.get();
+      Instant minTime = THE_END_OF_TIME.get();
+      for (Watermark input : inputWms) {
+        minTime = INSTANT_ORDERING.min(minTime, input.get());
+      }
+      for (CommittedBundle<?> bundle : pendingBundles) {
+        // TODO: Track elements in the bundle by the processing time they were output instead of
+        // entire bundles. Requried to support arbitrarily splitting and merging bundles between
+        // steps
+        minTime = INSTANT_ORDERING.min(minTime, bundle.getSynchronizedProcessingOutputWatermark());
+      }
+      earliestHold.set(minTime);
+      return WatermarkUpdate.fromTimestamps(oldHold, minTime);
+    }
+
+    public synchronized void addPending(CommittedBundle<?> bundle) {
+      pendingBundles.add(bundle);
+    }
+
+    public synchronized void removePending(CommittedBundle<?> bundle) {
+      pendingBundles.remove(bundle);
+    }
+
+    public synchronized Instant getEarliestTimerTimestamp() {
+      // TODO: use unfired and pending timers to determine earliest timestamp. Requires supporting
+      // timers.
+      Instant earliest = THE_END_OF_TIME.get();
+      return earliest;
+    }
+
     @Override
-    public WatermarkUpdate refresh() {
-      // THE_END_OF_TIME is a distinguished value that cannot be advanced.
-      return WatermarkUpdate.NO_CHANGE;
+    public String toString() {
+      return MoreObjects.toStringHelper(SynchronizedProcessingTimeInputWatermark.class)
+          .add("earliestHold", earliestHold)
+          .toString();
+    }
+  }
+
+  private static class SynchronizedProcessingTimeOutputWatermark implements Watermark {
+    private final SynchronizedProcessingTimeInputWatermark inputWm;
+    private AtomicReference<Instant> latestRefresh;
+
+    public SynchronizedProcessingTimeOutputWatermark(
+        SynchronizedProcessingTimeInputWatermark inputWm) {
+      this.inputWm = inputWm;
+      this.latestRefresh = new AtomicReference<>(BoundedWindow.TIMESTAMP_MIN_VALUE);
     }
 
     @Override
     public Instant get() {
-      return BoundedWindow.TIMESTAMP_MAX_VALUE;
+      return latestRefresh.get();
+    }
+
+    @Override
+    public synchronized WatermarkUpdate refresh() {
+      // Hold the output synchronized processing time to the input watermark, which takes into
+      // account buffered bundles, and the earliest pending timer, which determines what to hold
+      // downstream timers to.
+      Instant oldRefresh = latestRefresh.get();
+      Instant newTimestamp =
+          INSTANT_ORDERING.min(inputWm.get(), inputWm.getEarliestTimerTimestamp());
+      latestRefresh.set(newTimestamp);
+      return WatermarkUpdate.fromTimestamps(oldRefresh, newTimestamp);
     }
-  };
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(SynchronizedProcessingTimeOutputWatermark.class)
+          .add("latestRefresh", latestRefresh)
+          .toString();
+    }
+  }
+
+  /**
+   * The {@code Watermark} that is after the latest time it is possible to represent in the global
+   * window. This is a distinguished value representing a complete {@link PTransform}.
+   */
+  private static final Watermark THE_END_OF_TIME =
+      new Watermark() {
+        @Override
+        public WatermarkUpdate refresh() {
+          // THE_END_OF_TIME is a distinguished value that cannot be advanced.
+          return WatermarkUpdate.NO_CHANGE;
+        }
+
+        @Override
+        public Instant get() {
+          return BoundedWindow.TIMESTAMP_MAX_VALUE;
+        }
+      };
 
   private static final Ordering<Instant> INSTANT_ORDERING = Ordering.natural();
 
@@ -380,12 +491,36 @@ private TransformWatermarks getTransformWatermark(AppliedPTransform<?, ?, ?> tra
           new AppliedPTransformInputWatermark(inputCollectionWatermarks);
       AppliedPTransformOutputWatermark outputWatermark =
           new AppliedPTransformOutputWatermark(inputWatermark);
-      wms = new TransformWatermarks(inputWatermark, outputWatermark);
+
+      SynchronizedProcessingTimeInputWatermark inputProcessingWatermark =
+          new SynchronizedProcessingTimeInputWatermark(getInputProcessingWatermarks(transform));
+      SynchronizedProcessingTimeOutputWatermark outputProcessingWatermark =
+          new SynchronizedProcessingTimeOutputWatermark(inputProcessingWatermark);
+
+      wms =
+          new TransformWatermarks(
+              inputWatermark, outputWatermark, inputProcessingWatermark, outputProcessingWatermark);
       transformToWatermarks.put(transform, wms);
     }
     return wms;
   }
 
+  private Collection<Watermark> getInputProcessingWatermarks(
+      AppliedPTransform<?, ?, ?> transform) {
+    ImmutableList.Builder<Watermark> inputWmsBuilder = ImmutableList.builder();
+    Collection<? extends PValue> inputs = transform.getInput().expand();
+    if (inputs.isEmpty()) {
+      inputWmsBuilder.add(THE_END_OF_TIME);
+    }
+    for (PValue pvalue : inputs) {
+      Watermark producerOutputWatermark =
+          getTransformWatermark(pvalue.getProducingTransformInternal())
+              .synchronizedProcessingOutputWatermark;
+      inputWmsBuilder.add(producerOutputWatermark);
+    }
+    return inputWmsBuilder.build();
+  }
+
   private List<Watermark> getInputWatermarks(AppliedPTransform<?, ?, ?> transform) {
     ImmutableList.Builder<Watermark> inputWatermarksBuilder = ImmutableList.builder();
     Collection<? extends PValue> inputs = transform.getInput().expand();
@@ -394,7 +529,7 @@ private List<Watermark> getInputWatermarks(AppliedPTransform<?, ?, ?> transform)
     }
     for (PValue pvalue : inputs) {
       Watermark producerOutputWatermark =
-          getTransformWatermark(pvalue.getProducingTransformInternal()).outputWatermark();
+          getTransformWatermark(pvalue.getProducingTransformInternal()).outputWatermark;
       inputWatermarksBuilder.add(producerOutputWatermark);
     }
     List<Watermark> inputCollectionWatermarks = inputWatermarksBuilder.build();
@@ -408,7 +543,7 @@ private List<Watermark> getInputWatermarks(AppliedPTransform<?, ?, ?> transform)
    *
    * @return a snapshot of the input watermark and output watermark for the provided transform
    */
-  public synchronized TransformWatermarks getWatermarks(AppliedPTransform<?, ?, ?> transform) {
+  public TransformWatermarks getWatermarks(AppliedPTransform<?, ?, ?> transform) {
     return transformToWatermarks.get(transform);
   }
 
@@ -418,23 +553,24 @@ public synchronized TransformWatermarks getWatermarks(AppliedPTransform<?, ?, ?>
    * <p>The output watermark of a transform that takes no input is determined by that transform, as
    * there are no input {@link PCollection PCollections}.
    *
-   * @param watermark the output watermark of the transform. If the transform has buffered input
-   *                  elements, the watermark should be the minimum of all buffered elements.
-   * @return both watermarks of the source transform
+   * @param eventTimeWatermark the output watermark of the transform. If the transform has buffered
+   *                           input elements, the watermark should be the minimum of all buffered
+   *                           elements.
    */
-  public TransformWatermarks updateOutputWatermark(AppliedPTransform<?, ?, ?> transform,
-      Iterable<CommittedBundle<?>> outputs, Instant watermark) {
+  public void updateOutputWatermark(
+      AppliedPTransform<?, ?, ?> transform,
+      Iterable<? extends CommittedBundle<?>> outputs,
+      Instant eventTimeWatermark) {
     TransformWatermarks watermarks = getWatermarks(transform);
-    watermarks.outputWatermark().setHold(watermark);
+    watermarks.setEventTimeHold(eventTimeWatermark);
 
     for (CommittedBundle<?> output : outputs) {
       PCollection<?> pCollection = output.getPCollection();
       for (AppliedPTransform<?, ?, ?> consumer : consumers.get(pCollection)) {
-        addPending(consumer, output.getElements());
+        addPending(consumer, output);
       }
     }
     refreshWatermarks(transform);
-    return watermarks;
   }
 
   /**
@@ -455,16 +591,16 @@ public TransformWatermarks updateOutputWatermark(AppliedPTransform<?, ?, ?> tran
    * @param outputs the bundles the transform has output
    * @param earliestHold the earliest watermark hold in the transform's state. {@code null} if there
    *                     is no hold
-   * @return the updated watermark of the transform
    */
-  public TransformWatermarks updateWatermarks(CommittedBundle<?> completed,
-      AppliedPTransform<?, ?, ?> transform, Collection<CommittedBundle<?>> outputs,
+  public void updateWatermarks(
+      CommittedBundle<?> completed,
+      AppliedPTransform<?, ?, ?> transform,
+      Iterable<? extends CommittedBundle<?>> outputs,
       @Nullable Instant earliestHold) {
     updatePending(completed, transform, outputs);
     TransformWatermarks transformWms = transformToWatermarks.get(transform);
-    transformWms.outputWatermark().setHold(earliestHold);
+    transformWms.setEventTimeHold(earliestHold);
     refreshWatermarks(transform);
-    return transformWms;
   }
 
   private void refreshWatermarks(AppliedPTransform<?, ?, ?> transform) {
@@ -475,7 +611,7 @@ private void refreshWatermarks(AppliedPTransform<?, ?, ?> transform) {
         Collection<AppliedPTransform<?, ?, ?>> downstreamTransforms = consumers.get(outputPValue);
         if (downstreamTransforms != null) {
           for (AppliedPTransform<?, ?, ?> downstreamTransform : downstreamTransforms) {
-            refreshWatermarks(downstreamTransform);
+                refreshWatermarks(downstreamTransform);
           }
         }
       }
@@ -483,19 +619,21 @@ private void refreshWatermarks(AppliedPTransform<?, ?, ?> transform) {
   }
 
   /**
-   * Removes all elements consumed by the input bundle from the {@link PTransform PTransforms}
-   * collection of pending elements, and adds all elements produced by the {@link PTransform} to the
-   * pending queue of each consumer.
+   * Removes all of the completed Timers from the collection of pending timers, adds all new timers,
+   * and removes all deleted timers. Removes all elements consumed by the input bundle from the
+   * {@link PTransform PTransforms} collection of pending elements, and adds all elements produced
+   * by the {@link PTransform} to the pending queue of each consumer.
    */
-  private void updatePending(CommittedBundle<?> input, AppliedPTransform<?, ?, ?> transform,
-      Collection<CommittedBundle<?>> outputs) {
-    AppliedPTransformInputWatermark inputWatermark =
-        transformToWatermarks.get(transform).inputWatermark();
-    inputWatermark.removePending(input.getElements());
+  private void updatePending(
+      CommittedBundle<?> input,
+      AppliedPTransform<?, ?, ?> transform,
+      Iterable<? extends CommittedBundle<?>> outputs) {
+    TransformWatermarks completedTransform = transformToWatermarks.get(transform);
+    completedTransform.removePending(input);
 
     for (CommittedBundle<?> bundle : outputs) {
       for (AppliedPTransform<?, ?, ?> consumer : consumers.get(bundle.getPCollection())) {
-        addPending(consumer, bundle.getElements());
+        addPending(consumer, bundle);
       }
     }
   }
@@ -505,9 +643,9 @@ private void updatePending(CommittedBundle<?> input, AppliedPTransform<?, ?, ?>
    * elements for the provided {@link AppliedPTransform}.
    */
   private void addPending(
-      AppliedPTransform<?, ?, ?> transform, Iterable<? extends WindowedValue<?>> pending) {
+      AppliedPTransform<?, ?, ?> transform, CommittedBundle<?> pending) {
     TransformWatermarks watermarks = transformToWatermarks.get(transform);
-    watermarks.inputWatermark().addPending(pending);
+    watermarks.addPending(pending);
   }
 
   /**
@@ -517,11 +655,33 @@ public class TransformWatermarks {
     private final AppliedPTransformInputWatermark inputWatermark;
     private final AppliedPTransformOutputWatermark outputWatermark;
 
+    private final SynchronizedProcessingTimeInputWatermark synchronizedProcessingInputWatermark;
+    private final SynchronizedProcessingTimeOutputWatermark synchronizedProcessingOutputWatermark;
+
     private TransformWatermarks(
         AppliedPTransformInputWatermark inputWatermark,
-        AppliedPTransformOutputWatermark outputWatermark) {
+        AppliedPTransformOutputWatermark outputWatermark,
+        SynchronizedProcessingTimeInputWatermark sychronizedProcessingWatermark,
+        SynchronizedProcessingTimeOutputWatermark outputProcessingWatermark) {
       this.inputWatermark = inputWatermark;
       this.outputWatermark = outputWatermark;
+
+      this.synchronizedProcessingInputWatermark = sychronizedProcessingWatermark;
+      this.synchronizedProcessingOutputWatermark = outputProcessingWatermark;
+    }
+
+    private void setEventTimeHold(Instant hold) {
+      outputWatermark.setHold(hold);
+    }
+
+    private void removePending(CommittedBundle<?> bundle) {
+      inputWatermark.removePending(bundle.getElements());
+      synchronizedProcessingInputWatermark.removePending(bundle);
+    }
+
+    private void addPending(CommittedBundle<?> bundle) {
+      inputWatermark.addPending(bundle.getElements());
+      synchronizedProcessingInputWatermark.addPending(bundle);
     }
 
     /**
@@ -531,6 +691,14 @@ public Instant getInputWatermark() {
       return inputWatermark.get();
     }
 
+    public Instant getSynchronizedProcessingInputTime() {
+      return INSTANT_ORDERING.min(Instant.now(), synchronizedProcessingInputWatermark.get());
+    }
+
+    public Instant getSynchronizedProcessingOutputTime() {
+      return INSTANT_ORDERING.min(Instant.now(), synchronizedProcessingOutputWatermark.get());
+    }
+
     /**
      * Returns the output watermark of the {@link AppliedPTransform}.
      */
@@ -538,17 +706,12 @@ public Instant getOutputWatermark() {
       return outputWatermark.get();
     }
 
-    AppliedPTransformInputWatermark inputWatermark() {
-      return inputWatermark;
-    }
-
-    AppliedPTransformOutputWatermark outputWatermark() {
-      return outputWatermark;
-    }
-
     private WatermarkUpdate refresh() {
       inputWatermark.refresh();
-      return outputWatermark.refresh();
+      synchronizedProcessingInputWatermark.refresh();
+      WatermarkUpdate eventOutputUpdate = outputWatermark.refresh();
+      WatermarkUpdate syncOutputUpdate = synchronizedProcessingOutputWatermark.refresh();
+      return eventOutputUpdate.union(syncOutputUpdate);
     }
 
     @Override
@@ -556,6 +719,8 @@ public String toString() {
       return MoreObjects.toStringHelper(TransformWatermarks.class)
           .add("inputWatermark", inputWatermark)
           .add("outputWatermark", outputWatermark)
+          .add("inputProcessingTime", synchronizedProcessingInputWatermark)
+          .add("outputProcessingTime", synchronizedProcessingOutputWatermark)
           .toString();
     }
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java
index d32fa6b869cdb..2186718169c4c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java
@@ -39,8 +39,10 @@
 import org.hamcrest.BaseMatcher;
 import org.hamcrest.Description;
 import org.hamcrest.Matcher;
+import org.joda.time.DateTimeUtils;
 import org.joda.time.Instant;
 import org.joda.time.ReadableInstant;
+import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -57,6 +59,8 @@
  */
 @RunWith(JUnit4.class)
 public class InMemoryWatermarkManagerTest implements Serializable {
+  private static final Instant INITIAL_PROCESSING_TIME = new Instant(1000L);
+
   private PCollection<Integer> createdInts;
 
   private PCollection<Integer> filtered;
@@ -69,6 +73,8 @@ public class InMemoryWatermarkManagerTest implements Serializable {
 
   @Before
   public void setup() {
+    DateTimeUtils.setCurrentMillisFixed(INITIAL_PROCESSING_TIME.getMillis());
+
     TestPipeline p = TestPipeline.create();
 
     createdInts = p.apply("createdInts", Create.of(1, 2, 3));
@@ -102,6 +108,10 @@ public void setup() {
     manager = InMemoryWatermarkManager.create(rootTransforms, consumers);
   }
 
+  @After
+  public void tearDown() {
+    DateTimeUtils.setCurrentMillisSystem();
+  }
 
   /**
    * Demonstrates that getWatermark, when called on an {@link AppliedPTransform} that has not
@@ -124,9 +134,12 @@ public void getWatermarkForUntouchedTransform() {
   public void getWatermarkForUpdatedSourceTransform() {
     CommittedBundle<Integer> output = globallyWindowedBundle(createdInts, 1);
 
+    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(output), new Instant(8000L));
+    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(output), new Instant(8000L));
     TransformWatermarks updatedSourceWatermark =
-        manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-            Collections.<CommittedBundle<?>>singleton(output), new Instant(8000L));
+        manager.getWatermarks(createdInts.getProducingTransformInternal());
 
     assertThat(updatedSourceWatermark.getOutputWatermark(), equalTo(new Instant(8000L)));
   }
@@ -170,9 +183,12 @@ public void getWatermarkForMultiInputTransform() {
     CommittedBundle<Integer> flattenedBundleSecondCreate = globallyWindowedBundle(flattened, -1);
     // We have finished processing the bundle from the second PCollection, but we haven't consumed
     // anything from the first PCollection yet; so our watermark shouldn't advance
+    manager.updateWatermarks(secondPcollectionBundle, flattened.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(flattenedBundleSecondCreate), null);
     TransformWatermarks transformAfterProcessing =
-        manager.updateWatermarks(secondPcollectionBundle, flattened.getProducingTransformInternal(),
-            Collections.<CommittedBundle<?>>singleton(flattenedBundleSecondCreate), null);
+        manager.getWatermarks(flattened.getProducingTransformInternal());
+    manager.updateWatermarks(secondPcollectionBundle, flattened.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(flattenedBundleSecondCreate), null);
     assertThat(
         transformAfterProcessing.getInputWatermark(),
         not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
@@ -185,10 +201,12 @@ public void getWatermarkForMultiInputTransform() {
         timestampedBundle(createdInts, TimestampedValue.<Integer>of(5, firstCollectionTimestamp));
     // the source is done, but elements are still buffered. The source output watermark should be
     // past the end of the global window
-    TransformWatermarks firstSourceWatermarks = manager.updateOutputWatermark(
+    manager.updateOutputWatermark(
         createdInts.getProducingTransformInternal(),
         Collections.<CommittedBundle<?>>singleton(firstPcollectionBundle),
         new Instant(Long.MAX_VALUE));
+    TransformWatermarks firstSourceWatermarks =
+        manager.getWatermarks(createdInts.getProducingTransformInternal());
     assertThat(
         firstSourceWatermarks.getOutputWatermark(),
         not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
@@ -212,9 +230,12 @@ public void getWatermarkForMultiInputTransform() {
 
     CommittedBundle<?> completedFlattenBundle =
         InProcessBundle.unkeyed(flattened).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
+    manager.updateWatermarks(firstPcollectionBundle, flattened.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(completedFlattenBundle), null);
+    manager.updateWatermarks(firstPcollectionBundle, flattened.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(completedFlattenBundle), null);
     TransformWatermarks afterConsumingAllInput =
-        manager.updateWatermarks(firstPcollectionBundle, flattened.getProducingTransformInternal(),
-            Collections.<CommittedBundle<?>>singleton(completedFlattenBundle), null);
+        manager.getWatermarks(flattened.getProducingTransformInternal());
     assertThat(
         afterConsumingAllInput.getInputWatermark(),
         not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
@@ -232,9 +253,10 @@ public void getWatermarkForMultiConsumedCollection() {
     CommittedBundle<Integer> createdBundle = timestampedBundle(createdInts,
         TimestampedValue.of(1, new Instant(1_000_000L)), TimestampedValue.of(2, new Instant(1234L)),
         TimestampedValue.of(3, new Instant(-1000L)));
+    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(createdBundle), new Instant(Long.MAX_VALUE));
     TransformWatermarks createdAfterProducing =
-        manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-            Collections.<CommittedBundle<?>>singleton(createdBundle), new Instant(Long.MAX_VALUE));
+        manager.getWatermarks(createdInts.getProducingTransformInternal());
     assertThat(
         createdAfterProducing.getOutputWatermark(),
         not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
@@ -243,9 +265,10 @@ public void getWatermarkForMultiConsumedCollection() {
         timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)),
             TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)),
             TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
+    manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(keyBundle), null);
     TransformWatermarks keyedWatermarks =
-        manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
-            Collections.<CommittedBundle<?>>singleton(keyBundle), null);
+        manager.getWatermarks(keyed.getProducingTransformInternal());
     assertThat(
         keyedWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
     assertThat(
@@ -258,9 +281,10 @@ public void getWatermarkForMultiConsumedCollection() {
 
     CommittedBundle<Integer> filteredBundle =
         timestampedBundle(filtered, TimestampedValue.of(2, new Instant(1234L)));
+    manager.updateWatermarks(createdBundle, filtered.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(filteredBundle), null);
     TransformWatermarks filteredProcessedWatermarks =
-        manager.updateWatermarks(createdBundle, filtered.getProducingTransformInternal(),
-            Collections.<CommittedBundle<?>>singleton(filteredBundle), null);
+        manager.getWatermarks(filtered.getProducingTransformInternal());
     assertThat(
         filteredProcessedWatermarks.getInputWatermark(),
         not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
@@ -285,9 +309,10 @@ public void updateWatermarkWithWatermarkHolds() {
         timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)),
             TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)),
             TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
+    manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(keyBundle), new Instant(500L));
     TransformWatermarks keyedWatermarks =
-        manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
-            Collections.<CommittedBundle<?>>singleton(keyBundle), new Instant(500L));
+        manager.getWatermarks(keyed.getProducingTransformInternal());
     assertThat(
         keyedWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
     assertThat(keyedWatermarks.getOutputWatermark(), not(laterThan(new Instant(500L))));
@@ -301,16 +326,18 @@ public void updateWatermarkWithWatermarkHolds() {
   public void updateOutputWatermarkShouldBeMonotonic() {
     CommittedBundle<?> firstInput =
         InProcessBundle.unkeyed(createdInts).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
-    TransformWatermarks firstWatermarks =
         manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
             Collections.<CommittedBundle<?>>singleton(firstInput), new Instant(0L));
+TransformWatermarks firstWatermarks =
+        manager.getWatermarks(createdInts.getProducingTransformInternal());
     assertThat(firstWatermarks.getOutputWatermark(), equalTo(new Instant(0L)));
 
     CommittedBundle<?> secondInput =
         InProcessBundle.unkeyed(createdInts).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
-    TransformWatermarks secondWatermarks =
         manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
             Collections.<CommittedBundle<?>>singleton(secondInput), new Instant(-250L));
+TransformWatermarks secondWatermarks =
+        manager.getWatermarks(createdInts.getProducingTransformInternal());
     assertThat(secondWatermarks.getOutputWatermark(), not(earlierThan(new Instant(0L))));
   }
 
@@ -330,9 +357,12 @@ public void updateWatermarkWithHoldsShouldBeMonotonic() {
         timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)),
             TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)),
             TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
+    manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(keyBundle), new Instant(500L));
+    manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(keyBundle), new Instant(500L));
     TransformWatermarks keyedWatermarks =
-        manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
-            Collections.<CommittedBundle<?>>singleton(keyBundle), new Instant(500L));
+        manager.getWatermarks(keyed.getProducingTransformInternal());
     assertThat(
         keyedWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
     assertThat(keyedWatermarks.getOutputWatermark(), not(laterThan(new Instant(500L))));
@@ -364,20 +394,22 @@ public void updateWatermarkWithLateData() {
             TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)));
 
     // Finish processing the on-time data. The watermarks should progress to be equal to the source
+    manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(keyBundle), null);
+
     TransformWatermarks onTimeWatermarks =
-        manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
-            Collections.<CommittedBundle<?>>singleton(keyBundle), null);
+        manager.getWatermarks(keyed.getProducingTransformInternal());
     assertThat(onTimeWatermarks.getInputWatermark(), equalTo(sourceWatermark));
     assertThat(onTimeWatermarks.getOutputWatermark(), equalTo(sourceWatermark));
 
     CommittedBundle<Integer> lateDataBundle =
         timestampedBundle(createdInts, TimestampedValue.of(3, new Instant(-1000L)));
-
     // the late data arrives in a downstream PCollection after its watermark has advanced past it;
     // we don't advance the watermark past the current watermark until we've consumed the late data
-    TransformWatermarks bufferedLateWm =
         manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
             Collections.<CommittedBundle<?>>singleton(lateDataBundle), new Instant(2_000_000L));
+        TransformWatermarks bufferedLateWm =
+        manager.getWatermarks(createdInts.getProducingTransformInternal());
     assertThat(bufferedLateWm.getOutputWatermark(), equalTo(new Instant(2_000_000L)));
 
     // The input watermark should be held to its previous value (not advanced due to late data; not
@@ -400,10 +432,12 @@ public void updateWatermarkWithLateData() {
   @Test
   public void getWatermarksAfterOnlyEmptyOutput() {
     CommittedBundle<Integer> emptyCreateOutput = globallyWindowedBundle(createdInts);
-    TransformWatermarks updatedSourceWatermarks = manager.updateOutputWatermark(
+    manager.updateOutputWatermark(
         createdInts.getProducingTransformInternal(),
         Collections.<CommittedBundle<?>>singleton(emptyCreateOutput),
         BoundedWindow.TIMESTAMP_MAX_VALUE);
+    TransformWatermarks updatedSourceWatermarks =
+        manager.getWatermarks(createdInts.getProducingTransformInternal());
 
     assertThat(
         updatedSourceWatermarks.getOutputWatermark(),
@@ -430,17 +464,20 @@ public void getWatermarksAfterHoldAndEmptyOutput() {
         Collections.<CommittedBundle<?>>singleton(firstCreateOutput), new Instant(12_000L));
 
     CommittedBundle<Integer> firstFilterOutput = globallyWindowedBundle(filtered);
+    manager.updateWatermarks(firstCreateOutput, filtered.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(firstFilterOutput), new Instant(10_000L));
     TransformWatermarks firstFilterWatermarks =
-        manager.updateWatermarks(firstCreateOutput, filtered.getProducingTransformInternal(),
-            Collections.<CommittedBundle<?>>singleton(firstFilterOutput), new Instant(10_000L));
+        manager.getWatermarks(filtered.getProducingTransformInternal());
     assertThat(firstFilterWatermarks.getInputWatermark(), not(earlierThan(new Instant(12_000L))));
     assertThat(firstFilterWatermarks.getOutputWatermark(), not(laterThan(new Instant(10_000L))));
 
     CommittedBundle<Integer> emptyCreateOutput = globallyWindowedBundle(createdInts);
-    TransformWatermarks updatedSourceWatermarks = manager.updateOutputWatermark(
+    manager.updateOutputWatermark(
         createdInts.getProducingTransformInternal(),
         Collections.<CommittedBundle<?>>singleton(emptyCreateOutput),
         BoundedWindow.TIMESTAMP_MAX_VALUE);
+    TransformWatermarks updatedSourceWatermarks =
+        manager.getWatermarks(createdInts.getProducingTransformInternal());
 
     assertThat(
         updatedSourceWatermarks.getOutputWatermark(),
@@ -454,6 +491,121 @@ public void getWatermarksAfterHoldAndEmptyOutput() {
     assertThat(finishedFilterWatermarks.getOutputWatermark(), not(laterThan(new Instant(10_000L))));
   }
 
+  @Test
+  public void getSynchronizedProcessingTimeInputWatermarksHeldToPendingBundles() {
+    TransformWatermarks watermarks =
+        manager.getWatermarks(createdInts.getProducingTransformInternal());
+    assertThat(watermarks.getSynchronizedProcessingInputTime(), equalTo(INITIAL_PROCESSING_TIME));
+    assertThat(
+        watermarks.getSynchronizedProcessingOutputTime(),
+        equalTo(BoundedWindow.TIMESTAMP_MIN_VALUE));
+
+    TransformWatermarks filteredWatermarks =
+        manager.getWatermarks(filtered.getProducingTransformInternal());
+    // Non-root processing watermarks don't progress until data has been processed
+    assertThat(
+        filteredWatermarks.getSynchronizedProcessingInputTime(),
+        not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
+    assertThat(
+        filteredWatermarks.getSynchronizedProcessingOutputTime(),
+        not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
+
+    CommittedBundle<Integer> createOutput =
+        InProcessBundle.unkeyed(createdInts).commit(new Instant(1250L));
+
+    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(createOutput), BoundedWindow.TIMESTAMP_MAX_VALUE);
+    TransformWatermarks createAfterUpdate =
+        manager.getWatermarks(createdInts.getProducingTransformInternal());
+    assertThat(createAfterUpdate.getSynchronizedProcessingInputTime(), equalTo(Instant.now()));
+    assertThat(createAfterUpdate.getSynchronizedProcessingOutputTime(), equalTo(Instant.now()));
+
+    TransformWatermarks filterAfterProduced =
+        manager.getWatermarks(filtered.getProducingTransformInternal());
+    assertThat(
+        filterAfterProduced.getSynchronizedProcessingInputTime(), not(laterThan(Instant.now())));
+    assertThat(
+        filterAfterProduced.getSynchronizedProcessingOutputTime(), not(laterThan(Instant.now())));
+
+    DateTimeUtils.setCurrentMillisFixed(1500L);
+    assertThat(createAfterUpdate.getSynchronizedProcessingInputTime(), equalTo(Instant.now()));
+    assertThat(createAfterUpdate.getSynchronizedProcessingOutputTime(), equalTo(Instant.now()));
+    assertThat(
+        filterAfterProduced.getSynchronizedProcessingInputTime(),
+        not(laterThan(new Instant(1250L))));
+    assertThat(
+        filterAfterProduced.getSynchronizedProcessingOutputTime(),
+        not(laterThan(new Instant(1250L))));
+
+    CommittedBundle<?> filterOutputBundle =
+        InProcessBundle.unkeyed(intsToFlatten).commit(new Instant(1250L));
+    manager.updateWatermarks(createOutput, filtered.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(filterOutputBundle),
+        BoundedWindow.TIMESTAMP_MAX_VALUE);
+    TransformWatermarks filterAfterConsumed =
+        manager.getWatermarks(filtered.getProducingTransformInternal());
+    assertThat(
+        filterAfterConsumed.getSynchronizedProcessingInputTime(),
+        not(laterThan(createAfterUpdate.getSynchronizedProcessingOutputTime())));
+    assertThat(
+        filterAfterConsumed.getSynchronizedProcessingOutputTime(),
+        not(laterThan(filterAfterConsumed.getSynchronizedProcessingInputTime())));
+  }
+
+  /**
+   * Demonstrates that the Synchronized Processing Time output watermark cannot progress past
+   * pending timers in the same set. This propagates to all downstream SynchronizedProcessingTimes.
+   */
+  @Test
+  public void getSynchronizedProcessingTimeOutputHeldToPendingTimers() {
+    // TODO: Support Timers; add test
+  }
+
+  /**
+   * Demonstrates that if any earlier processing holds appear in the synchronized processing time
+   * output hold the result is monotonic.
+   */
+  @Test
+  public void getSynchronizedProcessingTimeOutputTimeIsMonotonic() {
+    TransformWatermarks watermarks =
+        manager.getWatermarks(createdInts.getProducingTransformInternal());
+    assertThat(watermarks.getSynchronizedProcessingInputTime(), equalTo(INITIAL_PROCESSING_TIME));
+
+    TransformWatermarks filteredWatermarks =
+        manager.getWatermarks(filtered.getProducingTransformInternal());
+    // Non-root processing watermarks don't progress until data has been processed
+    assertThat(
+        filteredWatermarks.getSynchronizedProcessingInputTime(),
+        not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
+    assertThat(
+        filteredWatermarks.getSynchronizedProcessingOutputTime(),
+        not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
+
+    CommittedBundle<Integer> createOutput =
+        InProcessBundle.unkeyed(createdInts).commit(new Instant(1250L));
+
+    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(createOutput), BoundedWindow.TIMESTAMP_MAX_VALUE);
+    TransformWatermarks createAfterUpdate =
+        manager.getWatermarks(createdInts.getProducingTransformInternal());
+    assertThat(
+        createAfterUpdate.getSynchronizedProcessingInputTime(), not(laterThan(Instant.now())));
+    assertThat(
+        createAfterUpdate.getSynchronizedProcessingOutputTime(), not(laterThan(Instant.now())));
+
+    CommittedBundle<Integer> createSecondOutput =
+        InProcessBundle.unkeyed(createdInts).commit(new Instant(750L));
+    manager.updateOutputWatermark(
+        createdInts.getProducingTransformInternal(),
+        Collections.<CommittedBundle<?>>singleton(createSecondOutput),
+        BoundedWindow.TIMESTAMP_MAX_VALUE);
+
+    assertThat(createAfterUpdate.getSynchronizedProcessingOutputTime(), equalTo(Instant.now()));
+  }
+
+  @Test
+  public void getSynchronizedProcessingTimeInputIsHeldToEarliestUpstream() {}
+
   private static Matcher<Instant> earlierThan(final Instant laterInstant) {
     return new BaseMatcher<Instant>() {
       @Override

From 1452daa2be31638b59100cbf5431a326a4097906 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Sat, 6 Feb 2016 11:36:58 -0800
Subject: [PATCH 1402/1541] Move streaming worker code into worker module

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114030419
---
 .../sdk/runners/worker/KeyedWorkItems.java    | 287 ---------
 .../sdk/runners/worker/PubsubReader.java      |  92 ---
 .../sdk/runners/worker/PubsubSink.java        | 132 ----
 .../sdk/runners/worker/SinkRegistry.java      | 118 ----
 .../runners/worker/StreamingDoFnRunners.java  |  65 --
 .../worker/StreamingModeExecutionContext.java | 562 -----------------
 .../worker/StreamingSideInputDoFnRunner.java  | 295 ---------
 .../worker/UngroupedWindmillReader.java       | 118 ----
 .../sdk/runners/worker/WindmillSink.java      | 182 ------
 .../worker/WindowingWindmillReader.java       | 135 ----
 .../runners/worker/WorkerCustomSources.java   | 580 ------------------
 .../dataflow/sdk/util/TimerOrElement.java     |  57 --
 12 files changed, 2623 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItems.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkRegistry.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDoFnRunners.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSources.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItems.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItems.java
deleted file mode 100644
index 7a9ece1edaafc..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyedWorkItems.java
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.StandardCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.InputMessageBundle;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.Message;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.Timer;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.ComposedKeyedWorkItem;
-import com.google.cloud.dataflow.sdk.util.KeyedWorkItem;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
-import com.google.common.base.Function;
-import com.google.common.base.Predicate;
-import com.google.common.base.Predicates;
-import com.google.common.base.Throwables;
-import com.google.common.collect.FluentIterable;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.Objects;
-import java.util.concurrent.TimeUnit;
-
-/**
- * Static utility methods that provide {@link KeyedWorkItem} implementations.
- */
-public class KeyedWorkItems {
-  /**
-   * Return an implementation of {@link KeyedWorkItem} that wraps around
-   * a {@code Windmill.WorkItem}.
-   *
-   * @param <K> the key type
-   * @param <ElemT> the element type
-   */
-  public static <K, ElemT> KeyedWorkItem<K, ElemT> windmillWorkItem(
-      K key,
-      Windmill.WorkItem workItem,
-      Coder<? extends BoundedWindow> windowCoder,
-      Coder<Collection<? extends BoundedWindow>> windowsCoder,
-      Coder<ElemT> valueCoder) {
-    return new WindmillKeyedWorkItem<>(key, workItem, windowCoder, windowsCoder, valueCoder);
-  }
-
-  /**
-   * Returns an implementation of {@link KeyedWorkItem} that wraps around an elements iterable.
-   *
-   * @param <K> the key type
-   * @param <ElemT> the element type
-   */
-  public static <K, ElemT> KeyedWorkItem<K, ElemT> elementsWorkItem(
-      K key, Iterable<WindowedValue<ElemT>> elementsIterable) {
-    return ComposedKeyedWorkItem.create(key, Collections.<TimerData>emptyList(), elementsIterable);
-  }
-
-  /**
-   * Returns an implementation of {@link KeyedWorkItem} that wraps around an timers iterable.
-   *
-   * @param <K> the key type
-   * @param <ElemT> the element type
-   */
-  public static <K, ElemT> KeyedWorkItem<K, ElemT> timersWorkItem(
-      K key, Iterable<TimerData> timersIterable) {
-    return ComposedKeyedWorkItem.create(
-        key, timersIterable, Collections.<WindowedValue<ElemT>>emptyList());
-  }
-
-  /**
-   * Returns an implementation of {@link KeyedWorkItem} that wraps around
-   * an timers iterable and an elements iterable.
-   *
-   * @param <K> the key type
-   * @param <ElemT> the element type
-   */
-  public static <K, ElemT> KeyedWorkItem<K, ElemT> workItem(
-      K key, Iterable<TimerData> timersIterable, Iterable<WindowedValue<ElemT>> elementsIterable) {
-    return ComposedKeyedWorkItem.create(key, timersIterable, elementsIterable);
-  }
-
-  private static class WindmillKeyedWorkItem<K, ElemT> implements KeyedWorkItem<K, ElemT> {
-    private static final Predicate<Timer> IS_WATERMARK = new Predicate<Timer>() {
-      @Override
-      public boolean apply(Timer input) {
-        return input.getType() == Timer.Type.WATERMARK;
-      }
-    };
-
-    private final Windmill.WorkItem workItem;
-    private final K key;
-
-    private final transient Coder<? extends BoundedWindow> windowCoder;
-    private final transient Coder<Collection<? extends BoundedWindow>> windowsCoder;
-    private final transient Coder<ElemT> valueCoder;
-
-    WindmillKeyedWorkItem(
-        K key,
-        Windmill.WorkItem workItem,
-        Coder<? extends BoundedWindow> windowCoder,
-        Coder<Collection<? extends BoundedWindow>> windowsCoder,
-        Coder<ElemT> valueCoder) {
-      this.key = key;
-      this.workItem = workItem;
-      this.windowCoder = windowCoder;
-      this.windowsCoder = windowsCoder;
-      this.valueCoder = valueCoder;
-    }
-
-    @Override
-    public K key() {
-      return key;
-    }
-
-    @Override
-    public Iterable<TimerData> timersIterable() {
-      FluentIterable<Timer> allTimers = FluentIterable.from(workItem.getTimers().getTimersList());
-      FluentIterable<Timer> eventTimers = allTimers.filter(IS_WATERMARK);
-      FluentIterable<Timer> nonEventTimers = allTimers.filter(Predicates.not(IS_WATERMARK));
-      return eventTimers.append(nonEventTimers).transform(new Function<Timer, TimerData>() {
-        private TimeDomain getTimeDomain(Windmill.Timer.Type type) {
-          switch (type) {
-            case REALTIME:
-              return TimeDomain.PROCESSING_TIME;
-            case DEPENDENT_REALTIME:
-              return TimeDomain.SYNCHRONIZED_PROCESSING_TIME;
-            case WATERMARK:
-              return TimeDomain.EVENT_TIME;
-            default:
-              throw new IllegalArgumentException("Unsupported timer type " + type);
-          }
-        }
-
-        @Override
-        public TimerData apply(Timer timer) {
-          String tag = timer.getTag().toStringUtf8();
-          String namespaceString = tag.substring(0, tag.indexOf('+'));
-          StateNamespace namespace = StateNamespaces.fromString(namespaceString, windowCoder);
-
-          Instant timestamp = new Instant(TimeUnit.MICROSECONDS.toMillis(timer.getTimestamp()));
-          return TimerData.of(namespace, timestamp, getTimeDomain(timer.getType()));
-        }
-      });
-    }
-
-    @Override
-    public Iterable<WindowedValue<ElemT>> elementsIterable() {
-      return FluentIterable.from(workItem.getMessageBundlesList())
-          .transformAndConcat(new Function<InputMessageBundle, Iterable<Message>>() {
-            @Override
-            public Iterable<Message> apply(InputMessageBundle input) {
-              return input.getMessagesList();
-            }
-          })
-          .transform(new Function<Message, WindowedValue<ElemT>>() {
-            @Override
-            public WindowedValue<ElemT> apply(Message message) {
-              try {
-                Instant timestamp = new Instant(
-                    TimeUnit.MICROSECONDS.toMillis(message.getTimestamp()));
-                Collection<? extends BoundedWindow> windows =
-                    WindmillSink.decodeMetadataWindows(windowsCoder, message.getMetadata());
-                PaneInfo pane = WindmillSink.decodeMetadataPane(message.getMetadata());
-
-                InputStream inputStream = message.getData().newInput();
-                ElemT value = valueCoder.decode(inputStream, Coder.Context.OUTER);
-                return WindowedValue.of(value, timestamp, windows, pane);
-              } catch (IOException e) {
-                throw Throwables.propagate(e);
-              }
-            }
-          });
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      if (!(other instanceof WindmillKeyedWorkItem)) {
-        return false;
-      }
-
-      WindmillKeyedWorkItem<?, ?> that = (WindmillKeyedWorkItem<?, ?>) other;
-      return Objects.equals(this.key, that.key)
-          && Objects.equals(this.workItem, that.workItem);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(key, workItem);
-    }
-  }
-
-  /**
-   * Coder that forwards {@code ByteSizeObserver} calls to an underlying element coder.
-   * {@code TimerOrElement} objects never need to be encoded, so this class does not
-   * support the {@code encode} and {@code decode} methods.
-   */
-  public static class FakeKeyedWorkItemCoder<K, T> extends StandardCoder<KeyedWorkItem<K, T>> {
-    final Coder<T> elemCoder;
-
-    /**
-     * Creates a new {@code TimerOrElement.Coder} that wraps the given {@link Coder}.
-     */
-    public static <T> FakeKeyedWorkItemCoder<?, T> of(Coder<T> elemCoder) {
-      return new FakeKeyedWorkItemCoder<>(elemCoder);
-    }
-
-    @JsonCreator
-    public static FakeKeyedWorkItemCoder<?, ?> of(
-            @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-            List<Object> components) {
-      return of((Coder<?>) components.get(0));
-    }
-
-    @Override
-    public void encode(KeyedWorkItem<K, T> value, OutputStream outStream, Context context) {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public KeyedWorkItem<K, T> decode(InputStream inStream, Context context) {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public boolean isRegisterByteSizeObserverCheap(KeyedWorkItem<K, T> value, Context context) {
-      return true;
-    }
-
-    @Override
-    public void registerByteSizeObserver(
-        KeyedWorkItem<K, T> value, ElementByteSizeObserver observer, Context context)
-        throws Exception {
-      if (value instanceof WindmillKeyedWorkItem) {
-        long serializedSize = ((WindmillKeyedWorkItem<?, ?>) value).workItem.getSerializedSize();
-        observer.update(serializedSize);
-      } else {
-        throw new UnsupportedOperationException();
-      }
-    }
-
-    @Override
-    public void verifyDeterministic() throws NonDeterministicException {}
-
-    @Override
-    public List<? extends Coder<?>> getCoderArguments() {
-      return Arrays.asList(elemCoder);
-    }
-
-    public Coder<T> getElementCoder() {
-      return elemCoder;
-    }
-
-    protected FakeKeyedWorkItemCoder(Coder<T> elemCoder) {
-      this.elemCoder = elemCoder;
-    }
-  }
-}
-
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
deleted file mode 100644
index b341c0553ae92..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubReader.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.ValueOnlyWindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.concurrent.TimeUnit;
-
-import javax.annotation.Nullable;
-
-/**
- * A Reader that receives elements from Pubsub, via a Windmill server.
- */
-class PubsubReader<T> extends NativeReader<WindowedValue<T>> {
-  private final ValueOnlyWindowedValueCoder<?> coder;
-  private StreamingModeExecutionContext context;
-
-  PubsubReader(Coder<WindowedValue<T>> coder, StreamingModeExecutionContext context) {
-    @SuppressWarnings({"unchecked", "rawtypes"})
-    ValueOnlyWindowedValueCoder<?> typedCoder = (ValueOnlyWindowedValueCoder) coder;
-    this.coder = typedCoder;
-    this.context = context;
-  }
-
-  static class Factory implements ReaderFactory {
-    @Override
-    public NativeReader<?> create(
-        CloudObject cloudSourceSpec,
-        @Nullable Coder<?> coder,
-        @Nullable PipelineOptions options,
-        @Nullable ExecutionContext executionContext,
-        @Nullable CounterSet.AddCounterMutator addCounterMutator,
-        @Nullable String operationName)
-            throws Exception {
-      @SuppressWarnings("unchecked")
-      Coder<WindowedValue<Object>> typedCoder = (Coder<WindowedValue<Object>>) coder;
-      return new PubsubReader<>(typedCoder, (StreamingModeExecutionContext) executionContext);
-    }
-  }
-
-  @Override
-  public NativeReaderIterator<WindowedValue<T>> iterator() throws IOException {
-    return new PubsubReaderIterator(context.getWork());
-  }
-
-  class PubsubReaderIterator extends WindmillReaderIteratorBase<T> {
-    protected PubsubReaderIterator(Windmill.WorkItem work) {
-      super(work);
-    }
-
-    @Override
-    protected WindowedValue<T> decodeMessage(Windmill.Message message) throws IOException {
-      long timestampMillis = TimeUnit.MICROSECONDS.toMillis(message.getTimestamp());
-      InputStream data = message.getData().newInput();
-      notifyElementRead(data.available());
-      @SuppressWarnings("unchecked")
-      T value = (T) coder.getValueCoder().decode(data, Coder.Context.OUTER);
-      return WindowedValue.timestampedValueInGlobalWindow(value, new Instant(timestampMillis));
-    }
-  }
-
-  @Override
-  public boolean supportsRestart() {
-    return true;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
deleted file mode 100644
index 4fe7c272430c2..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PubsubSink.java
+++ /dev/null
@@ -1,132 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-import com.google.protobuf.ByteString;
-
-import java.io.IOException;
-
-import javax.annotation.Nullable;
-
-/**
- * A sink that writes to Pubsub, via a Windmill server.
- *
- * @param <T> the type of the elements written to the sink
- */
-class PubsubSink<T> extends Sink<WindowedValue<T>> {
-  private final String topic;
-  private final String timestampLabel;
-  private final String idLabel;
-  private final Coder<WindowedValue<T>> coder;
-  private final StreamingModeExecutionContext context;
-
-  PubsubSink(
-      String topic,
-      String timestampLabel,
-      String idLabel,
-      Coder<WindowedValue<T>> coder,
-      StreamingModeExecutionContext context) {
-    this.topic = topic;
-    this.timestampLabel = timestampLabel;
-    this.idLabel = idLabel;
-    this.coder = coder;
-    this.context = context;
-  }
-
-  public static class Factory implements SinkFactory {
-    @Override
-    public PubsubSink<?> create(
-        CloudObject spec,
-        Coder<?> coder,
-        @Nullable PipelineOptions options,
-        @Nullable ExecutionContext context,
-        @Nullable CounterSet.AddCounterMutator addCounterMutator)
-            throws Exception {
-      String topic = getString(spec, "pubsub_topic");
-      String timestampLabel = getString(spec, "pubsub_timestamp_label", "");
-      String idLabel = getString(spec, "pubsub_id_label", "");
-
-      @SuppressWarnings("unchecked")
-      Coder<WindowedValue<Object>> typedCoder =
-          (Coder<WindowedValue<Object>>) coder;
-
-      return new PubsubSink<>(
-          topic, timestampLabel, idLabel, typedCoder, (StreamingModeExecutionContext) context);
-    }
-  }
-
-  @Override
-  public SinkWriter<WindowedValue<T>> writer() {
-    return new PubsubWriter(topic);
-  }
-
-  /** The SinkWriter for a PubsubSink. */
-  class PubsubWriter implements SinkWriter<WindowedValue<T>> {
-    private Windmill.PubSubMessageBundle.Builder outputBuilder;
-
-    private PubsubWriter(String topic) {
-      outputBuilder =
-          Windmill.PubSubMessageBundle.newBuilder()
-              .setTopic(topic)
-              .setTimestampLabel(timestampLabel)
-              .setIdLabel(idLabel);
-    }
-
-    private <T> ByteString encode(Coder<T> coder, T object) throws IOException {
-      ByteString.Output stream = ByteString.newOutput();
-      coder.encode(object, stream, Coder.Context.OUTER);
-      return stream.toByteString();
-    }
-
-    @Override
-    public long add(WindowedValue<T> data) throws IOException {
-      ByteString byteString = encode(coder, data);
-
-      outputBuilder.addMessages(
-          Windmill.Message.newBuilder()
-          .setData(byteString)
-          .setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(data.getTimestamp()))
-          .build());
-
-      return byteString.size();
-    }
-
-    @Override
-    public void close() throws IOException {
-      Windmill.PubSubMessageBundle pubsubMessages = outputBuilder.build();
-      if (pubsubMessages.getMessagesCount() > 0) {
-        context.getOutputBuilder().addPubsubMessages(pubsubMessages);
-      }
-      outputBuilder.clear();
-    }
-  }
-
-  @Override
-  public boolean supportsRestart() {
-    return true;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkRegistry.java
deleted file mode 100644
index d73c88035beb3..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkRegistry.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-
-import java.util.HashMap;
-import java.util.Map;
-
-import javax.annotation.Nullable;
-
-/**
- * An immutable registry from {@link String} identifiers (provided to the worker by the Dataflow
- * service) to appropriate {@link SinkFactory} instances.
- */
-public final class SinkRegistry implements SinkFactory {
-
-  public static SinkRegistry defaultRegistry() {
-    Map<String, SinkFactory> predefinedSinkFactories = new HashMap<>();
-    predefinedSinkFactories.put("TextSink", new TextSinkFactory());
-    predefinedSinkFactories.put("com.google.cloud.dataflow.sdk.runners.worker.TextSink",
-        new TextSinkFactory());
-
-    predefinedSinkFactories.put("AvroSink", new AvroSinkFactory());
-    predefinedSinkFactories.put("com.google.cloud.dataflow.sdk.runners.worker.AvroSink",
-        new AvroSinkFactory());
-
-    predefinedSinkFactories.put("IsmSink", new IsmSinkFactory());
-    predefinedSinkFactories.put("com.google.cloud.dataflow.sdk.runners.worker.IsmSink",
-        new IsmSinkFactory());
-
-    predefinedSinkFactories.put("ShuffleSink", new ShuffleSinkFactory());
-    predefinedSinkFactories.put("com.google.cloud.dataflow.sdk.runners.worker.ShuffleSink",
-        new ShuffleSinkFactory());
-
-    predefinedSinkFactories.put("PubsubSink", new PubsubSink.Factory());
-    predefinedSinkFactories.put("com.google.cloud.dataflow.sdk.runners.worker.PubsubSink",
-        new PubsubSink.Factory());
-
-    predefinedSinkFactories.put("WindmillSink", new WindmillSink.Factory());
-    predefinedSinkFactories.put("com.google.cloud.dataflow.sdk.runners.worker.WindmillSink",
-        new WindmillSink.Factory());
-
-    return new SinkRegistry(predefinedSinkFactories);
-  }
-
-  /**
-   * Builds a new {@link SinkRegistry} with the provided mutable map of initial mappings.
-   *
-   * <p>Owns and mutates the provided map, which must be mutable. This constructor should only be
-   * called by methods in this class that are aware of this requirement and abstract from this
-   * behavior.
-   */
-  private SinkRegistry(Map<String, SinkFactory> factories) {
-    this.factories = factories;
-  }
-
-  /**
-   * A map from the short names of predefined sinks to the associated {@link SinkFactory}.
-   */
-  private final Map<String, SinkFactory> factories;
-
-  /**
-   * Returns a new {@link SinkRegistry} with the provided identifier associated with the
-   * provided {@link SinkFactory}, overriding any existing binding for that identifier.
-   */
-  public SinkRegistry register(String readerSpecType, SinkFactory factory) {
-    Map<String, SinkFactory> newFactories = new HashMap<>();
-    newFactories.putAll(factories);
-    newFactories.put(readerSpecType, factory);
-    return new SinkRegistry(newFactories);
-  }
-
-  /**
-   * Creates a {@link Sink} from a Dataflow API Sink definition.
-   *
-   * @throws Exception if the sink could not be decoded and
-   * constructed
-   */
-  @Override
-  public Sink<?> create(
-      CloudObject sinkSpec,
-      Coder<?> coder,
-      @Nullable PipelineOptions options,
-      @Nullable ExecutionContext executionContext,
-      @Nullable CounterSet.AddCounterMutator addCounterMutator)
-      throws Exception {
-
-    String objClassName = sinkSpec.getClassName();
-
-    SinkFactory factory = factories.get(objClassName);
-    if (factory == null) {
-      throw new IllegalArgumentException(String.format(
-          "Unable to create a Sink: Unknown Sink type in specification: %s",
-          objClassName));
-    }
-    return factory.create(sinkSpec, coder, options, executionContext, addCounterMutator);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDoFnRunners.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDoFnRunners.java
deleted file mode 100644
index f3c2a859e3f6f..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingDoFnRunners.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner;
-import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
-import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-
-import java.util.List;
-
-/**
- * Utility methods for creating {@link DoFnRunner} instances used by streaming Dataflow.
- */
-public final class StreamingDoFnRunners {
-  private StreamingDoFnRunners() {
-    // Do not instantiate
-  }
-
-  /**
-   * Returns an implementation of {@link DoFnRunner} that handles streaming side inputs.
-   *
-   * <p>It blocks and caches input elements if their side inputs are not ready.
-   */
-  public static <InputT, OutputT> DoFnRunner<InputT, OutputT> streamingSideInputRunner(
-      PipelineOptions options,
-      DoFnInfo<InputT, OutputT> doFnInfo,
-      SideInputReader sideInputReader,
-      OutputManager outputManager,
-      TupleTag<OutputT> mainOutputTag,
-      List<TupleTag<?>> sideOutputTags,
-      StepContext stepContext,
-      CounterSet.AddCounterMutator addCounterMutator) {
-    return new StreamingSideInputDoFnRunner<>(
-        options,
-        doFnInfo,
-        sideInputReader,
-        outputManager,
-        mainOutputTag,
-        sideOutputTags,
-        stepContext,
-        addCounterMutator,
-        doFnInfo.getWindowingStrategy());
-  }
-
-
-}
-
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
deleted file mode 100644
index 3b8dd591e21b1..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingModeExecutionContext.java
+++ /dev/null
@@ -1,562 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.api.services.dataflow.model.SideInputInfo;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.io.UnboundedSource;
-import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
-import com.google.cloud.dataflow.sdk.runners.worker.StateFetcher.SideInputState;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.BaseExecutionContext;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.TimerInternals;
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
-import com.google.cloud.dataflow.sdk.util.state.StateInternals;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Supplier;
-import com.google.common.collect.ImmutableSet;
-import com.google.protobuf.ByteString;
-
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-import java.util.concurrent.ConcurrentMap;
-import java.util.concurrent.ThreadLocalRandom;
-
-import javax.annotation.Nullable;
-
-/**
- * {@link ExecutionContext} for use in streaming mode.
- */
-public class StreamingModeExecutionContext
-    extends DataflowExecutionContext<StreamingModeExecutionContext.StepContext> {
-
-  private final String stageName;
-  private final Map<TupleTag<?>, Map<BoundedWindow, Object>> sideInputCache;
-
-  // Per-key cache of active Reader objects in use by this process.
-  private final ConcurrentMap<ByteString, ReaderCacheEntry> readerCache;
-  private final ConcurrentMap<String, String> stateNameMap;
-  private final WindmillStateCache.ForComputation stateCache;
-
-  private Windmill.WorkItem work;
-  @Nullable private Instant inputDataWatermark;
-  @Nullable private Instant outputDataWatermark;
-  @Nullable private Instant synchronizedProcessingTime;
-  private WindmillStateReader stateReader;
-  private StateFetcher stateFetcher;
-  private Windmill.WorkItemCommitRequest.Builder outputBuilder;
-  private UnboundedSource.UnboundedReader<?> activeReader;
-
-  public StreamingModeExecutionContext(String stageName,
-      ConcurrentMap<ByteString, ReaderCacheEntry> readerCache,
-      ConcurrentMap<String, String> stateNameMap, WindmillStateCache.ForComputation stateCache) {
-    this.stageName = stageName;
-    this.sideInputCache = new HashMap<>();
-    this.readerCache = readerCache;
-    this.stateNameMap = stateNameMap;
-    this.stateCache = stateCache;
-  }
-
-  public void start(
-      Windmill.WorkItem work,
-      @Nullable Instant inputDataWatermark,
-      @Nullable Instant outputDataWatermark,
-      @Nullable Instant synchronizedProcessingTime,
-      WindmillStateReader stateReader,
-      StateFetcher stateFetcher,
-      Windmill.WorkItemCommitRequest.Builder outputBuilder) {
-    this.work = work;
-    this.inputDataWatermark = inputDataWatermark;
-    this.outputDataWatermark = outputDataWatermark;
-    this.synchronizedProcessingTime = synchronizedProcessingTime;
-    this.stateReader = stateReader;
-    this.stateFetcher = stateFetcher;
-    this.outputBuilder = outputBuilder;
-    this.sideInputCache.clear();
-
-    for (ExecutionContext.StepContext stepContext : getAllStepContexts()) {
-      ((StepContext) stepContext)
-          .start(stateReader, inputDataWatermark, outputDataWatermark, synchronizedProcessingTime);
-    }
-  }
-
-  @Override
-  public StepContext createStepContext(
-      String stepName, String transformName, StateSampler stateSampler) {
-    StepContext context = new StepContext(stepName, transformName, stateSampler);
-    context.start(stateReader, inputDataWatermark, outputDataWatermark, synchronizedProcessingTime);
-    return context;
-  }
-
-  @Override
-  protected SideInputReader getSideInputReader(Iterable<? extends SideInputInfo> sideInputInfos) {
-    throw new UnsupportedOperationException(
-        "Cannot call getSideInputReader for StreamingDataflowWorker: "
-        + "the MapTask specification should not have had any SideInputInfo descriptors "
-        + "since the streaming runner does not yet support them.");
-  }
-
-  @Override
-  protected SideInputReader getSideInputReaderForViews(
-      Iterable<? extends PCollectionView<?>> views) {
-    return StreamingModeSideInputReader.of(views, this);
-  }
-
-  /**
-   * Fetches the requested sideInput, and maintains a view of the cache that doesn't remove
-   * items until the active work item is finished.
-   */
-  private <T> T fetchSideInput(PCollectionView<T> view, BoundedWindow sideInputWindow,
-      String stateFamily, SideInputState state,
-      Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
-    Map<BoundedWindow, Object> tagCache = sideInputCache.get(view.getTagInternal());
-    if (tagCache == null) {
-      tagCache = new HashMap<>();
-      sideInputCache.put(view.getTagInternal(), tagCache);
-    }
-
-    @SuppressWarnings("unchecked")
-    T sideInput = (T) tagCache.get(sideInputWindow);
-    if (sideInput == null) {
-      if (state == SideInputState.CACHED_IN_WORKITEM) {
-        throw new IllegalStateException(
-            "Expected side input to be cached. Tag: "
-            + view.getTagInternal().getId());
-      }
-      T typed = stateFetcher.fetchSideInput(
-          view, sideInputWindow, stateFamily, state, scopedReadStateSupplier);
-      sideInput = typed;
-      if (sideInput != null) {
-        tagCache.put(sideInputWindow, sideInput);
-        return sideInput;
-      } else {
-        return null;
-      }
-    } else {
-      return sideInput;
-    }
-  }
-
-  public Iterable<Windmill.GlobalDataId> getSideInputNotifications() {
-    return work.getGlobalDataIdNotificationsList();
-  }
-
-  public ByteString getSerializedKey() {
-    return work.getKey();
-  }
-
-  public long getWorkToken() {
-    return work.getWorkToken();
-  }
-
-  public Windmill.WorkItem getWork() {
-    return work;
-  }
-
-  public Windmill.WorkItemCommitRequest.Builder getOutputBuilder() {
-    return outputBuilder;
-  }
-
-  public UnboundedSource.UnboundedReader<?> getCachedReader() {
-    ReaderCacheEntry entry = readerCache.get(getSerializedKey());
-    if (entry == null) {
-      return null;
-    } else if (entry.token != getWork().getCacheToken()) {
-      readerCache.remove(getSerializedKey());
-      return null;
-    } else {
-      return entry.reader;
-    }
-  }
-
-  public void setActiveReader(UnboundedSource.UnboundedReader<?> reader) {
-    activeReader = reader;
-  }
-
-  public UnboundedSource.CheckpointMark getReaderCheckpoint(
-      Coder<? extends UnboundedSource.CheckpointMark> coder) {
-    try {
-      ByteString state = work.getSourceState().getState();
-      if (state.isEmpty()) {
-        return null;
-      }
-      return coder.decode(state.newInput(), Coder.Context.OUTER);
-    } catch (IOException e) {
-      throw new RuntimeException("Exception while decoding checkpoint", e);
-    }
-  }
-
-  public Map<Long, Runnable> flushState() {
-    Map<Long, Runnable> callbacks = new HashMap<>();
-
-    for (ExecutionContext.StepContext stepContext : getAllStepContexts()) {
-      ((StepContext) stepContext).flushState();
-    }
-
-
-    if (activeReader != null) {
-      Windmill.SourceState.Builder sourceStateBuilder =
-          outputBuilder.getSourceStateUpdatesBuilder();
-      final UnboundedSource.CheckpointMark checkpointMark = activeReader.getCheckpointMark();
-      final Instant watermark = activeReader.getWatermark();
-      long id = ThreadLocalRandom.current().nextLong();
-      sourceStateBuilder.addFinalizeIds(id);
-      callbacks.put(
-          id,
-          new Runnable() {
-            @Override
-            public void run() {
-              try {
-                checkpointMark.finalizeCheckpoint();
-              } catch (IOException e) {
-                throw new RuntimeException("Exception while finalizing checkpoint", e);
-              }
-            }
-          });
-
-      @SuppressWarnings("unchecked")
-      Coder<UnboundedSource.CheckpointMark> checkpointCoder =
-          ((UnboundedSource<?, UnboundedSource.CheckpointMark>) activeReader.getCurrentSource())
-              .getCheckpointMarkCoder();
-      if (checkpointCoder != null) {
-        ByteString.Output stream = ByteString.newOutput();
-        try {
-          checkpointCoder.encode(checkpointMark, stream, Coder.Context.OUTER);
-        } catch (IOException e) {
-          throw new RuntimeException("Exception while encoding checkpoint", e);
-        }
-        sourceStateBuilder.setState(stream.toByteString());
-      }
-      outputBuilder.setSourceWatermark(
-          WindmillTimeUtils.harnessToWindmillTimestamp(watermark));
-
-      long backlogBytes = activeReader.getSplitBacklogBytes();
-      if (backlogBytes == UnboundedSource.UnboundedReader.BACKLOG_UNKNOWN
-          && CustomSources.isFirstUnboundedSourceSplit(getSerializedKey())) {
-        // Only call getTotalBacklogBytes() on the first split.
-        backlogBytes = activeReader.getTotalBacklogBytes();
-      }
-      if (backlogBytes != UnboundedSource.UnboundedReader.BACKLOG_UNKNOWN) {
-        outputBuilder.addCounterUpdates(
-            Windmill.Counter.newBuilder()
-            .setName("dataflow_backlog_size-" + stageName)
-            .setKind(Windmill.Counter.Kind.SUM)
-            .setIntScalar(backlogBytes)
-            .setCumulative(true)
-            .build());
-      }
-
-      readerCache.put(
-          getSerializedKey(), new ReaderCacheEntry(activeReader, getWork().getCacheToken()));
-    }
-    return callbacks;
-  }
-
-  public List<Long> getReadyCommitCallbackIds() {
-    return work.getSourceState().getFinalizeIdsList();
-  }
-
-  /**
-   * Produce a tag that is guaranteed to be unique for the given namespace, domain and timestamp.
-   *
-   * <p>This is necessary because Windmill will deduplicate based only on this tag.
-   */
-  public static ByteString timerTag(TimerData key) {
-    String tagString = String.format("%s+%d:%d",
-        key.getNamespace().stringKey(), key.getDomain().ordinal(),
-        key.getTimestamp().getMillis());
-    return ByteString.copyFromUtf8(tagString);
-  }
-
-  private static class WindmillTimerInternals implements TimerInternals {
-    private Map<TimerData, Boolean> timers = new HashMap<>();
-    @Nullable private Instant inputDataWatermark;
-    @Nullable private Instant outputDataWatermark;
-    @Nullable private Instant synchronizedProcessingTime;
-    private String stateFamily;
-
-    public WindmillTimerInternals(String stateFamily, @Nullable Instant inputDataWatermark,
-        @Nullable Instant outputDataWatermark, @Nullable Instant synchronizedProcessingTime) {
-      this.inputDataWatermark = inputDataWatermark;
-      this.outputDataWatermark = outputDataWatermark;
-      this.stateFamily = stateFamily;
-    }
-
-    @Override
-    public void setTimer(TimerData timerKey) {
-      timers.put(timerKey, true);
-    }
-
-    @Override
-    public void deleteTimer(TimerData timerKey) {
-      timers.put(timerKey, false);
-    }
-
-    @Override
-    public Instant currentProcessingTime() {
-      return Instant.now();
-    }
-
-    @Override
-    @Nullable
-    public Instant currentSynchronizedProcessingTime() {
-      return synchronizedProcessingTime;
-    }
-
-    /**
-     * {@inheritDoc}
-     *
-     * <p>Note that this value may be arbitrarily behind the global input watermark. Windmill
-     * simply reports the last known input watermark value at the time the GetWork response was
-     * constructed. However, if an element in a GetWork request has a timestamp at or ahead
-     * of the local input watermark then Windmill will not allow the local input watermark
-     * to advance until that element has been committed.
-     */
-    @Override
-    @Nullable
-    public Instant currentInputWatermarkTime() {
-      return inputDataWatermark;
-    }
-
-    /**
-     * {@inheritDoc}
-     *
-     * <p>Note that Windmill will provisionally hold the output watermark to the timestamp of the
-     * earliest element in a computation's GetWork response. (Elements with timestamps already
-     * behind the output watermark at the point the GetWork response is constructed will have
-     * no influence on the output watermark). The provisional hold will last until this work item is
-     * committed. It is the responsibility of the harness to impose any persistent holds it needs.
-     */
-    @Override
-    @Nullable
-    public Instant currentOutputWatermarkTime() {
-      return outputDataWatermark;
-    }
-
-    public void persistTo(Windmill.WorkItemCommitRequest.Builder outputBuilder) {
-      for (Entry<TimerData, Boolean> entry : timers.entrySet()) {
-        Windmill.Timer.Builder timer = outputBuilder.addOutputTimersBuilder()
-            .setTag(timerTag(entry.getKey()))
-            .setType(timerType(entry.getKey().getDomain()));
-        if (stateFamily != null) {
-          timer.setStateFamily(stateFamily);
-        }
-
-        // If the timer was being set (not deleted) then set a timestamp for it.
-        if (entry.getValue()) {
-          timer.setTimestamp(
-              WindmillTimeUtils.harnessToWindmillTimestamp(entry.getKey().getTimestamp()));
-        }
-      }
-      timers.clear();
-    }
-
-    private Windmill.Timer.Type timerType(TimeDomain domain) {
-      switch (domain) {
-        case EVENT_TIME: return Windmill.Timer.Type.WATERMARK;
-        case PROCESSING_TIME: return Windmill.Timer.Type.REALTIME;
-        case SYNCHRONIZED_PROCESSING_TIME: return Windmill.Timer.Type.DEPENDENT_REALTIME;
-        default:
-          throw new IllegalArgumentException("Unrecgonized TimeDomain: " + domain);
-      }
-    }
-  }
-
-  class StepContext extends BaseExecutionContext.StepContext {
-    private static final String DEFAULT_STATE_FAMILY = "";
-
-    private WindmillStateInternals stateInternals;
-    private WindmillTimerInternals timerInternals;
-    private final String stateFamily;
-    private final Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
-
-    public StepContext(
-        final String stepName, String transformName, final StateSampler stateSampler) {
-      super(StreamingModeExecutionContext.this, stepName, transformName);
-
-      String mappedName = stateNameMap.get(transformName);
-      this.stateFamily = mappedName == null ? DEFAULT_STATE_FAMILY : mappedName;
-
-      this.scopedReadStateSupplier = new Supplier<StateSampler.ScopedState>() {
-        private int readState = -1;  // Uninitialized value.
-
-        @Override
-        public StateSampler.ScopedState get() {
-          if (stateSampler == null) {
-            return null;
-          }
-          if (readState == -1) {
-            readState = stateSampler.stateForName(stepName + "-windmill-read", StateKind.FRAMEWORK);
-          }
-          return stateSampler.scopedState(readState);
-        }
-      };
-    }
-
-    /**
-     * Update the {@code stateReader} used by this {@code StepContext}.
-     */
-    public void start(
-        WindmillStateReader stateReader, @Nullable Instant inputDataWatermark,
-        @Nullable Instant outputDataWatermark,
-        @Nullable Instant synchronizedProcessingTime) {
-      this.stateInternals = new WindmillStateInternals(stateFamily, stateReader,
-          stateCache.forKey(getSerializedKey(), stateFamily, getWork().getCacheToken()),
-          scopedReadStateSupplier);
-      this.timerInternals =
-new WindmillTimerInternals(
-          stateFamily, inputDataWatermark, outputDataWatermark, synchronizedProcessingTime);
-    }
-
-    public void flushState() {
-      stateInternals.persist(outputBuilder);
-      timerInternals.persistTo(outputBuilder);
-    }
-
-    public Iterable<Windmill.GlobalDataId> getSideInputNotifications() {
-      return StreamingModeExecutionContext.this.getSideInputNotifications();
-    }
-
-    @Override
-    public <T, W extends BoundedWindow> void writePCollectionViewData(
-        TupleTag<?> tag,
-        Iterable<WindowedValue<T>> data, Coder<Iterable<WindowedValue<T>>> dataCoder,
-        W window, Coder<W> windowCoder) throws IOException {
-      if (getSerializedKey().size() != 0) {
-        throw new IllegalStateException("writePCollectionViewData must follow a Combine.globally");
-      }
-
-      ByteString.Output dataStream = ByteString.newOutput();
-      dataCoder.encode(data, dataStream, Coder.Context.OUTER);
-
-      ByteString.Output windowStream = ByteString.newOutput();
-      windowCoder.encode(window, windowStream, Coder.Context.OUTER);
-
-      Windmill.GlobalData.Builder builder = Windmill.GlobalData.newBuilder()
-          .setDataId(
-              Windmill.GlobalDataId.newBuilder()
-              .setTag(tag.getId())
-              .setVersion(windowStream.toByteString())
-              .build())
-          .setData(dataStream.toByteString());
-      if (stateFamily != null) {
-        builder.setStateFamily(stateFamily);
-      }
-
-      outputBuilder.addGlobalDataUpdates(builder.build());
-    }
-
-    /**
-     * Fetch the given side input asynchronously and return true if it is present.
-     */
-    public boolean issueSideInputFetch(
-        PCollectionView<?> view, BoundedWindow mainInputWindow, SideInputState state) {
-      BoundedWindow sideInputWindow =
-          view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(mainInputWindow);
-      return fetchSideInput(view, sideInputWindow, stateFamily, state, scopedReadStateSupplier)
-          != null;
-    }
-
-    /**
-     * Note that there is data on the current key that is blocked on the given side input.
-     */
-    public void addBlockingSideInput(Windmill.GlobalDataRequest sideInput) {
-      if (stateFamily != null) {
-        sideInput =
-            Windmill.GlobalDataRequest.newBuilder(sideInput).setStateFamily(stateFamily).build();
-      }
-      outputBuilder.addGlobalDataRequests(sideInput);
-      outputBuilder.addGlobalDataIdRequests(sideInput.getDataId());
-    }
-
-    /**
-     * Note that there is data on the current key that is blocked on the given side inputs.
-     */
-    public void addBlockingSideInputs(Iterable<Windmill.GlobalDataRequest> sideInputs) {
-      for (Windmill.GlobalDataRequest sideInput : sideInputs) {
-        addBlockingSideInput(sideInput);
-      }
-    }
-
-    @Override
-    public StateInternals stateInternals() {
-      return Preconditions.checkNotNull(stateInternals);
-    }
-
-    @Override
-    public TimerInternals timerInternals() {
-      return Preconditions.checkNotNull(timerInternals);
-    }
-  }
-
-  /**
-   * A {@link SideInputReader} that fetches side inputs from the streaming worker's
-   * cache.
-   */
-  public static class StreamingModeSideInputReader implements SideInputReader {
-    private StreamingModeExecutionContext context;
-    private Set<PCollectionView<?>> viewSet;
-
-    private StreamingModeSideInputReader(
-        Iterable<? extends PCollectionView<?>> views, StreamingModeExecutionContext context) {
-      this.context = context;
-      this.viewSet = ImmutableSet.copyOf(views);
-    }
-
-    public static StreamingModeSideInputReader of(
-        Iterable<? extends PCollectionView<?>> views, StreamingModeExecutionContext context) {
-      return new StreamingModeSideInputReader(views, context);
-    }
-
-    @Override
-    public <T> T get(PCollectionView<T> view, BoundedWindow window) {
-      if (!contains(view)) {
-        throw new RuntimeException("get() called with unknown view");
-      }
-
-      // We are only fetching the cached value here, so we don't need stateFamily or
-      // readStateSupplier.
-      return context.fetchSideInput(view, window, null /* unused stateFamily */,
-          SideInputState.CACHED_IN_WORKITEM, null /* unused readStateSupplier */);
-    }
-
-    @Override
-    public <T> boolean contains(PCollectionView<T> view) {
-      return viewSet.contains(view);
-    }
-
-    @Override
-    public boolean isEmpty() {
-      return viewSet.isEmpty();
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
deleted file mode 100644
index be916adf76ac9..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingSideInputDoFnRunner.java
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.MapCoder;
-import com.google.cloud.dataflow.sdk.coders.Proto2Coder;
-import com.google.cloud.dataflow.sdk.coders.SetCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.worker.StateFetcher.SideInputState;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.GlobalDataRequest;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.DoFnRunnerBase;
-import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
-import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.state.BagState;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.cloud.dataflow.sdk.util.state.StateTags;
-import com.google.cloud.dataflow.sdk.util.state.ValueState;
-import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Throwables;
-import com.google.common.collect.Iterables;
-import com.google.protobuf.ByteString;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-/**
- * Runs a DoFn by constructing the appropriate contexts and passing them in.
- *
- * @param <InputT> the type of the DoFn's (main) input elements
- * @param <OutputT> the type of the DoFn's (main) output elements
- * @param <W> the type of the windows of the main input
- */
-public class StreamingSideInputDoFnRunner<InputT, OutputT, W extends BoundedWindow>
-    extends DoFnRunnerBase<InputT, OutputT> {
-  private StreamingModeExecutionContext.StepContext stepContext;
-  private Map<String, PCollectionView<?>> sideInputViews;
-
-  private final StateTag<BagState<WindowedValue<InputT>>> elementsAddr;
-  private final StateTag<WatermarkStateInternal> watermarkHoldingAddr;
-  private final StateTag<ValueState<Map<W, Set<Windmill.GlobalDataRequest>>>> blockedMapAddr;
-
-  private Map<W, Set<Windmill.GlobalDataRequest>> blockedMap;
-
-  private WindowFn<?, W> windowFn;
-
-  public StreamingSideInputDoFnRunner(
-      PipelineOptions options,
-      DoFnInfo<InputT, OutputT> doFnInfo,
-      SideInputReader sideInputReader,
-      OutputManager outputManager,
-      TupleTag<OutputT> mainOutputTag,
-      List<TupleTag<?>> sideOutputTags,
-      StepContext stepContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      WindowingStrategy<?, W> windowingStrategy) {
-    super(options, doFnInfo.getDoFn(), sideInputReader, outputManager,
-        mainOutputTag, sideOutputTags, stepContext,
-        addCounterMutator, doFnInfo.getWindowingStrategy());
-    this.stepContext = (StreamingModeExecutionContext.StepContext) stepContext;
-
-    this.windowFn = windowingStrategy.getWindowFn();
-
-    this.sideInputViews = new HashMap<>();
-    for (PCollectionView<?> view : doFnInfo.getSideInputViews()) {
-      sideInputViews.put(view.getTagInternal().getId(), view);
-    }
-
-    this.blockedMapAddr = blockedMapAddr(windowFn);
-    this.elementsAddr = StateTags.makeSystemTagInternal(StateTags.bag("elem",
-        WindowedValue.getFullCoder(doFnInfo.getInputCoder(), windowFn.windowCoder())));
-    this.watermarkHoldingAddr =
-        StateTags.makeSystemTagInternal(StateTags.watermarkStateInternal("hold",
-            doFnInfo.getWindowingStrategy().getOutputTimeFn()));
-
-    this.blockedMap = stepContext.stateInternals().state(StateNamespaces.global(), blockedMapAddr)
-        .get().read();
-    if (this.blockedMap == null) {
-      this.blockedMap = new HashMap<>();
-    }
-  }
-
-  @VisibleForTesting static <W extends BoundedWindow>
-  StateTag<ValueState<Map<W, Set<GlobalDataRequest>>>> blockedMapAddr(WindowFn<?, W> windowFn) {
-    return StateTags.value("blockedMap", MapCoder.of(
-        windowFn.windowCoder(), SetCoder.of(Proto2Coder.of(Windmill.GlobalDataRequest.class))));
-  }
-
-  /**
-   * Computes the set of main input windows for which all side inputs are ready and cached.
-   */
-  private Set<W> getReadyWindows() {
-    Set<W> readyWindows = new HashSet<>();
-
-    for (Windmill.GlobalDataId id : stepContext.getSideInputNotifications()) {
-      if (sideInputViews.get(id.getTag()) == null) {
-        // Side input is for a different DoFn; ignore it.
-        continue;
-      }
-
-      for (Map.Entry<W, Set<Windmill.GlobalDataRequest>> entry : blockedMap.entrySet()) {
-        Set<Windmill.GlobalDataRequest> windowBlockedSet = entry.getValue();
-        Set<Windmill.GlobalDataRequest> found = new HashSet<>();
-        for (Windmill.GlobalDataRequest request : windowBlockedSet) {
-          if (id.equals(request.getDataId())) {
-            found.add(request);
-          }
-        }
-
-        windowBlockedSet.removeAll(found);
-
-        if (windowBlockedSet.isEmpty()) {
-          // Notifications were received for all side inputs for this window.
-          // Issue fetches for all the needed side inputs to make sure they are all present
-          // in the local cache.  If not, note the side inputs as still being blocked.
-          try {
-            W window = entry.getKey();
-            boolean allSideInputsCached = true;
-            for (PCollectionView<?> view : sideInputViews.values()) {
-              if (!stepContext.issueSideInputFetch(
-                  view, window, SideInputState.KNOWN_READY)) {
-                Windmill.GlobalDataRequest request = buildGlobalDataRequest(view, window);
-                stepContext.addBlockingSideInput(request);
-                windowBlockedSet.add(request);
-                allSideInputsCached = false;
-              }
-            }
-
-            if (allSideInputsCached) {
-              readyWindows.add(window);
-            }
-          } catch (IOException e) {
-            throw Throwables.propagate(e);
-          }
-        }
-      }
-    }
-
-    return readyWindows;
-  }
-
-  @Override
-  public void startBundle() {
-    super.startBundle();
-
-    // Find the set of ready windows.
-    Set<W> readyWindows = getReadyWindows();
-
-    // Pre-fetch the elements for each of the ready windows.
-    for (W window : readyWindows) {
-      elementBag(window).get();
-      WatermarkStateInternal watermarkHold = watermarkHold(window);
-      watermarkHold.get();
-      watermarkHold.clear();
-    }
-
-    // Run the DoFn code now that all side inputs are ready.
-    for (W window : readyWindows) {
-      blockedMap.remove(window);
-
-      BagState<WindowedValue<InputT>> elementsBag = elementBag(window);
-      Iterable<WindowedValue<InputT>> elements = elementsBag.get().read();
-      for (WindowedValue<InputT> elem : elements) {
-        try {
-          fn.processElement(createProcessContext(elem));
-        } catch (Exception ex) {
-          throw wrapUserCodeException(ex);
-        }
-      }
-
-      elementsBag.clear();
-    }
-  }
-
-  /**
-   * Compute the set of side inputs that are not yet ready for the given main input window.
-   */
-  private Set<Windmill.GlobalDataRequest> computeBlockedSideInputs(W window) throws IOException {
-    Set<Windmill.GlobalDataRequest> blocked = blockedMap.get(window);
-    if (blocked == null) {
-      for (PCollectionView<?> view : sideInputViews.values()) {
-        if (!stepContext.issueSideInputFetch(view, window, SideInputState.UNKNOWN)) {
-          if (blocked == null) {
-            blocked = new HashSet<>();
-            blockedMap.put(window, blocked);
-          }
-          blocked.add(buildGlobalDataRequest(view, window));
-        }
-      }
-    }
-    return blocked;
-  }
-
-  @VisibleForTesting BagState<WindowedValue<InputT>> elementBag(W window) {
-    return stepContext.stateInternals()
-        .state(StateNamespaces.window(windowFn.windowCoder(), window), elementsAddr);
-  }
-
-  @VisibleForTesting WatermarkStateInternal watermarkHold(W window) {
-    return stepContext.stateInternals()
-        .state(StateNamespaces.window(windowFn.windowCoder(), window), watermarkHoldingAddr);
-  }
-
-  @Override
-  public void invokeProcessElement(WindowedValue<InputT> elem) {
-    @SuppressWarnings("unchecked")
-    W window = (W) Iterables.getOnlyElement(elem.getWindows());
-
-    Set<Windmill.GlobalDataRequest> blocked;
-    try {
-      blocked = computeBlockedSideInputs(window);
-    } catch (IOException ex) {
-      // Can't leak IOException here
-      throw Throwables.propagate(ex);
-    }
-
-    if (blocked == null) {
-      // This can contain user code. Wrap it in case it throws an exception.
-      try {
-        fn.processElement(createProcessContext(elem));
-      } catch (Exception ex) {
-        throw wrapUserCodeException(ex);
-      }
-    } else {
-      elementBag(window).add(elem);
-      watermarkHold(window).add(elem.getTimestamp());
-
-      stepContext.addBlockingSideInputs(blocked);
-    }
-  }
-
-  @Override
-  public void finishBundle() {
-    super.finishBundle();
-    stepContext.stateInternals().state(StateNamespaces.global(), blockedMapAddr).set(blockedMap);
-  }
-
-  private <SideWindowT extends BoundedWindow> Windmill.GlobalDataRequest buildGlobalDataRequest(
-      PCollectionView<?> view, BoundedWindow mainWindow) throws IOException {
-    @SuppressWarnings("unchecked")
-    WindowingStrategy<?, SideWindowT> sideWindowStrategy =
-        (WindowingStrategy<?, SideWindowT>) view.getWindowingStrategyInternal();
-
-    WindowFn<?, SideWindowT> sideWindowFn = sideWindowStrategy.getWindowFn();
-
-    Coder<SideWindowT> sideInputWindowCoder = sideWindowFn.windowCoder();
-
-    SideWindowT sideInputWindow = sideWindowFn.getSideInputWindow(mainWindow);
-
-    ByteString.Output windowStream = ByteString.newOutput();
-    sideInputWindowCoder.encode(sideInputWindow, windowStream, Coder.Context.OUTER);
-
-    return Windmill.GlobalDataRequest.newBuilder()
-        .setDataId(Windmill.GlobalDataId.newBuilder()
-            .setTag(view.getTagInternal().getId())
-            .setVersion(windowStream.toByteString())
-            .build())
-        .setExistenceWatermarkDeadline(WindmillTimeUtils.harnessToWindmillTimestamp(
-            sideWindowStrategy
-                .getTrigger()
-                .getSpec()
-                .getWatermarkThatGuaranteesFiring(sideInputWindow)))
-        .build();
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
deleted file mode 100644
index abcb9c5fe909d..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedWindmillReader.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.cloud.dataflow.sdk.values.KV;
-
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Collection;
-import java.util.concurrent.TimeUnit;
-
-import javax.annotation.Nullable;
-
-/**
- * A Reader that receives input data from a Windmill server, and returns it as
- * individual elements.
- */
-class UngroupedWindmillReader<T> extends NativeReader<WindowedValue<T>> {
-  private final Coder<T> valueCoder;
-  private final Coder<Collection<? extends BoundedWindow>> windowsCoder;
-  private StreamingModeExecutionContext context;
-
-  UngroupedWindmillReader(Coder<WindowedValue<T>> coder, StreamingModeExecutionContext context) {
-    FullWindowedValueCoder<T> inputCoder = (FullWindowedValueCoder<T>) coder;
-    this.valueCoder = inputCoder.getValueCoder();
-    this.windowsCoder = inputCoder.getWindowsCoder();
-    this.context = context;
-  }
-
-  static class Factory implements ReaderFactory {
-    @Override
-    public NativeReader<?> create(
-        CloudObject spec,
-        @Nullable Coder<?> coder,
-        @Nullable PipelineOptions options,
-        @Nullable ExecutionContext executionContext,
-        @Nullable CounterSet.AddCounterMutator addCounterMutator,
-        @Nullable String operationName)
-            throws Exception {
-      @SuppressWarnings("unchecked")
-      Coder<WindowedValue<Object>> typedCoder = (Coder<WindowedValue<Object>>) coder;
-      return new UngroupedWindmillReader<>(
-          typedCoder, (StreamingModeExecutionContext) executionContext);
-    }
-  }
-
-  @Override
-  public NativeReaderIterator<WindowedValue<T>> iterator() throws IOException {
-    return new UngroupedWindmillReaderIterator(context.getWork());
-  }
-
-  class UngroupedWindmillReaderIterator extends WindmillReaderIteratorBase {
-    UngroupedWindmillReaderIterator(Windmill.WorkItem work) {
-      super(work);
-    }
-
-    @Override
-    protected WindowedValue<T> decodeMessage(Windmill.Message message) throws IOException {
-      Instant timestampMillis = new Instant(TimeUnit.MICROSECONDS.toMillis(message.getTimestamp()));
-      InputStream data = message.getData().newInput();
-      InputStream metadata = message.getMetadata().newInput();
-      Collection<? extends BoundedWindow> windows = WindmillSink.decodeMetadataWindows(
-          windowsCoder, message.getMetadata());
-      PaneInfo pane = WindmillSink.decodeMetadataPane(message.getMetadata());
-      if (valueCoder instanceof KvCoder) {
-        KvCoder<?, ?> kvCoder = (KvCoder<?, ?>) valueCoder;
-        InputStream key = context.getSerializedKey().newInput();
-        notifyElementRead(key.available() + data.available() + metadata.available());
-
-        @SuppressWarnings("unchecked")
-        T result = (T) KV.of(
-            decode(kvCoder.getKeyCoder(), key),
-            decode(kvCoder.getValueCoder(), data));
-        return WindowedValue.of(result, timestampMillis, windows, pane);
-      } else {
-        notifyElementRead(data.available() + metadata.available());
-        return WindowedValue.of(decode(valueCoder, data), timestampMillis, windows, pane);
-      }
-    }
-
-    private <X> X decode(Coder<X> coder, InputStream input) throws IOException {
-      return coder.decode(input, Coder.Context.OUTER);
-    }
-  }
-
-  @Override
-  public boolean supportsRestart() {
-    return true;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
deleted file mode 100644
index 7c92d3e89266b..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillSink.java
+++ /dev/null
@@ -1,182 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.PaneInfoCoder;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
-import com.google.cloud.dataflow.sdk.util.ValueWithRecordId.ValueWithRecordIdCoder;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.protobuf.ByteString;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Map;
-
-import javax.annotation.Nullable;
-
-class WindmillSink<T> extends Sink<WindowedValue<T>> {
-  private WindmillStreamWriter writer;
-  private final Coder<T> valueCoder;
-  private final Coder<Collection<? extends BoundedWindow>> windowsCoder;
-  private StreamingModeExecutionContext context;
-
-  WindmillSink(String destinationName,
-               Coder<WindowedValue<T>> coder,
-               StreamingModeExecutionContext context) {
-    this.writer = new WindmillStreamWriter(destinationName);
-    FullWindowedValueCoder<T> inputCoder = (FullWindowedValueCoder<T>) coder;
-    this.valueCoder = inputCoder.getValueCoder();
-    this.windowsCoder = inputCoder.getWindowsCoder();
-    this.context = context;
-  }
-
-  public static ByteString encodeMetadata(
-      Coder<Collection<? extends BoundedWindow>> windowsCoder,
-      Collection<? extends BoundedWindow> windows,
-      PaneInfo pane) throws IOException {
-    ByteString.Output stream = ByteString.newOutput();
-    PaneInfoCoder.INSTANCE.encode(pane, stream, Coder.Context.NESTED);
-    windowsCoder.encode(windows, stream, Coder.Context.OUTER);
-    return stream.toByteString();
-  }
-
-  public static PaneInfo decodeMetadataPane(ByteString metadata) throws IOException {
-    InputStream inStream = metadata.newInput();
-    return PaneInfoCoder.INSTANCE.decode(inStream, Coder.Context.NESTED);
-  }
-
-  public static Collection<? extends BoundedWindow> decodeMetadataWindows(
-      Coder<Collection<? extends BoundedWindow>> windowsCoder,
-      ByteString metadata) throws IOException {
-    InputStream inStream = metadata.newInput();
-    PaneInfoCoder.INSTANCE.decode(inStream, Coder.Context.NESTED);
-    return windowsCoder.decode(inStream, Coder.Context.OUTER);
-  }
-
-  public static class Factory implements SinkFactory {
-    @Override
-    public WindmillSink<?> create(
-        CloudObject spec,
-        Coder<?> coder,
-        @Nullable PipelineOptions options,
-        @Nullable ExecutionContext context,
-        @Nullable CounterSet.AddCounterMutator addCounterMutator)
-            throws Exception {
-
-      @SuppressWarnings("unchecked")
-      Coder<WindowedValue<Object>> typedCoder = (Coder<WindowedValue<Object>>) coder;
-      return new WindmillSink<>(
-          getString(spec, "stream_id"), typedCoder, (StreamingModeExecutionContext) context);
-    }
-  }
-
-  @Override
-  public SinkWriter<WindowedValue<T>> writer() {
-    return writer;
-  }
-
-  class WindmillStreamWriter implements SinkWriter<WindowedValue<T>> {
-    private Map<ByteString, Windmill.KeyedMessageBundle.Builder> productionMap;
-    private final String destinationName;
-
-    private WindmillStreamWriter(String destinationName) {
-      this.destinationName = destinationName;
-      productionMap = new HashMap<ByteString, Windmill.KeyedMessageBundle.Builder>();
-    }
-
-    private <T> ByteString encode(Coder<T> coder, T object) throws IOException {
-      ByteString.Output stream = ByteString.newOutput();
-      coder.encode(object, stream, Coder.Context.OUTER);
-      return stream.toByteString();
-    }
-
-    @Override
-    public long add(WindowedValue<T> data) throws IOException {
-      ByteString key, value;
-      ByteString id = ByteString.EMPTY;
-      ByteString metadata = encodeMetadata(windowsCoder, data.getWindows(), data.getPane());
-      if (valueCoder instanceof KvCoder) {
-        KvCoder kvCoder = (KvCoder) valueCoder;
-        KV kv = (KV) data.getValue();
-        key = encode(kvCoder.getKeyCoder(), kv.getKey());
-        Coder valueCoder = kvCoder.getValueCoder();
-        // If ids are explicitly provided, use that instead of the windmill-generated id.
-        // This is used when reading an UnboundedSource to deduplicate records.
-        if (valueCoder instanceof ValueWithRecordIdCoder) {
-          ValueWithRecordId valueAndId = (ValueWithRecordId) kv.getValue();
-          value =
-              encode(((ValueWithRecordIdCoder) valueCoder).getValueCoder(), valueAndId.getValue());
-          id = ByteString.copyFrom(valueAndId.getId());
-        } else {
-          value = encode(valueCoder, kv.getValue());
-        }
-      } else {
-        key = context.getSerializedKey();
-        value = encode(valueCoder, data.getValue());
-      }
-
-      Windmill.KeyedMessageBundle.Builder keyedOutput = productionMap.get(key);
-      if (keyedOutput == null) {
-        keyedOutput = Windmill.KeyedMessageBundle.newBuilder().setKey(key);
-        productionMap.put(key, keyedOutput);
-      }
-
-      Windmill.Message.Builder builder = Windmill.Message.newBuilder()
-          .setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(data.getTimestamp()))
-          .setData(value)
-          .setMetadata(metadata);
-      keyedOutput.addMessages(builder.build());
-      keyedOutput.addMessagesIds(id);
-      return key.size() + value.size() + metadata.size() + id.size();
-    }
-
-    @Override
-    public void close() throws IOException {
-      Windmill.OutputMessageBundle.Builder outputBuilder =
-          Windmill.OutputMessageBundle.newBuilder().setDestinationStreamId(destinationName);
-
-      for (Windmill.KeyedMessageBundle.Builder keyedOutput : productionMap.values()) {
-        outputBuilder.addBundles(keyedOutput.build());
-      }
-      if (outputBuilder.getBundlesCount() > 0) {
-        context.getOutputBuilder().addOutputMessages(outputBuilder.build());
-      }
-      productionMap.clear();
-    }
-  }
-
-  @Override
-  public boolean supportsRestart() {
-    return true;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
deleted file mode 100644
index 5cff492296fca..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindowingWindmillReader.java
+++ /dev/null
@@ -1,135 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.worker.KeyedWorkItems.FakeKeyedWorkItemCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.WorkItem;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.KeyedWorkItem;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.NoSuchElementException;
-
-import javax.annotation.Nullable;
-
-/**
- * A Reader that receives input data from a Windmill server, and returns a singleton iterable
- * containing the work item.
- */
-class WindowingWindmillReader<K, T> extends NativeReader<WindowedValue<KeyedWorkItem<K, T>>> {
-
-  private final KvCoder<K, T> kvCoder;
-  private final Coder<? extends BoundedWindow> windowCoder;
-  private final Coder<Collection<? extends BoundedWindow>> windowsCoder;
-  private StreamingModeExecutionContext context;
-
-  WindowingWindmillReader(Coder<WindowedValue<KeyedWorkItem<K, T>>> coder,
-                          StreamingModeExecutionContext context) {
-    FullWindowedValueCoder<KeyedWorkItem<K, T>> inputCoder =
-        (FullWindowedValueCoder<KeyedWorkItem<K, T>>) coder;
-    this.windowsCoder = inputCoder.getWindowsCoder();
-    this.windowCoder = inputCoder.getWindowCoder();
-    Coder<T> elementCoder =
-        ((FakeKeyedWorkItemCoder<K, T>) inputCoder.getValueCoder()).getElementCoder();
-    if (!(elementCoder instanceof KvCoder)) {
-      throw new IllegalArgumentException(
-          "WindowingWindmillReader only works with KvCoders.");
-    }
-    @SuppressWarnings("unchecked")
-    KvCoder<K, T> kvCoder = (KvCoder<K, T>)
-        elementCoder;
-    this.kvCoder = kvCoder;
-    this.context = context;
-  }
-
-  static class Factory implements ReaderFactory {
-    @Override
-    public NativeReader<?> create(
-        CloudObject spec,
-        @Nullable Coder<?> coder,
-        @Nullable PipelineOptions options,
-        @Nullable ExecutionContext context,
-        @Nullable CounterSet.AddCounterMutator addCounterMutator,
-        @Nullable String operationName)
-            throws Exception {
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      Coder<WindowedValue<KeyedWorkItem<Object, Object>>> typedCoder =
-          (Coder<WindowedValue<KeyedWorkItem<Object, Object>>>) coder;
-      return WindowingWindmillReader.create(typedCoder, (StreamingModeExecutionContext) context);
-    }
-  }
-
-  /**
-   * Creates a {@link WindowingWindmillReader} from the provided {@link Coder}
-   * and {@link StreamingModeExecutionContext}.
-   */
-  public static <K, T> WindowingWindmillReader<K, T> create(
-      Coder<WindowedValue<KeyedWorkItem<K, T>>> coder,
-      StreamingModeExecutionContext context) {
-    return new WindowingWindmillReader<K, T>(coder, context);
-  }
-
-  @Override
-  public NativeReaderIterator<WindowedValue<KeyedWorkItem<K, T>>> iterator() throws IOException {
-    final K key = kvCoder.getKeyCoder().decode(
-        context.getSerializedKey().newInput(), Coder.Context.OUTER);
-    final WorkItem workItem = context.getWork();
-    final WindowedValue<KeyedWorkItem<K, T>> value =
-        WindowedValue.valueInEmptyWindows(
-            KeyedWorkItems.<K, T>windmillWorkItem(
-                key, workItem, windowCoder, windowsCoder, kvCoder.getValueCoder()));
-
-    return new NativeReaderIterator<WindowedValue<KeyedWorkItem<K, T>>>() {
-      private WindowedValue<KeyedWorkItem<K, T>> current;
-
-      @Override
-      public boolean start() throws IOException {
-        current = value;
-        return true;
-      }
-
-      @Override
-      public boolean advance() throws IOException {
-        current = null;
-        return false;
-      }
-
-      @Override
-      public WindowedValue<KeyedWorkItem<K, T>> getCurrent() {
-        if (current == null) {
-          throw new NoSuchElementException();
-        }
-        return value;
-      }
-    };
-  }
-
-  @Override
-  public boolean supportsRestart() {
-    return true;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSources.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSources.java
deleted file mode 100644
index 192b9ee4565f9..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WorkerCustomSources.java
+++ /dev/null
@@ -1,580 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.api.client.util.Base64.decodeBase64;
-import static com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources.serializeToCloudSource;
-import static com.google.cloud.dataflow.sdk.util.SerializableUtils.deserializeFromByteArray;
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-import static com.google.cloud.dataflow.sdk.util.Structs.getStrings;
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.api.client.util.BackOff;
-import com.google.api.client.util.Base64;
-import com.google.api.services.dataflow.model.ApproximateReportedProgress;
-import com.google.api.services.dataflow.model.ApproximateSplitRequest;
-import com.google.api.services.dataflow.model.DerivedSource;
-import com.google.api.services.dataflow.model.DynamicSourceSplit;
-import com.google.api.services.dataflow.model.SourceOperationResponse;
-import com.google.api.services.dataflow.model.SourceSplitOptions;
-import com.google.api.services.dataflow.model.SourceSplitRequest;
-import com.google.api.services.dataflow.model.SourceSplitResponse;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.io.BoundedSource;
-import com.google.cloud.dataflow.sdk.io.Read;
-import com.google.cloud.dataflow.sdk.io.Source;
-import com.google.cloud.dataflow.sdk.io.UnboundedSource;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.protobuf.ByteString;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.NoSuchElementException;
-
-import javax.annotation.Nullable;
-
-/**
- * A helper class for supporting sources defined as {@code Source}.
- *
- * <p>Provides a bridge between the high-level {@code Source} API and the
- * low-level {@code CloudSource} class.
- */
-public class WorkerCustomSources {
-  private static final String SERIALIZED_SOURCE = "serialized_source";
-  @VisibleForTesting static final String SERIALIZED_SOURCE_SPLITS = "serialized_source_splits";
-  private static final long DEFAULT_DESIRED_BUNDLE_SIZE_BYTES = 64 * (1 << 20);
-  /**
-   * The current limit on the size of a ReportWorkItemStatus RPC to Google Cloud Dataflow, which
-   * includes the initial splits, is 20 MB.
-   */
-  public static final long DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES = 20 * (1 << 20);
-
-  private static final Logger LOG = LoggerFactory.getLogger(WorkerCustomSources.class);
-
-  /**
-   * A {@code DynamicSplitResult} specified explicitly by a pair of {@code BoundedSource}
-   * objects describing the primary and residual sources.
-   */
-  public static final class BoundedSourceSplit<T> implements NativeReader.DynamicSplitResult {
-    public final BoundedSource<T> primary;
-    public final BoundedSource<T> residual;
-
-    public BoundedSourceSplit(BoundedSource<T> primary, BoundedSource<T> residual) {
-      this.primary = primary;
-      this.residual = residual;
-    }
-
-    @Override
-    public String toString() {
-      return String.format("<primary: %s; residual: %s>", primary, residual);
-    }
-  }
-
-  public static DynamicSourceSplit toSourceSplit(
-      BoundedSourceSplit<?> sourceSplitResult, PipelineOptions options) {
-    DynamicSourceSplit sourceSplit = new DynamicSourceSplit();
-    com.google.api.services.dataflow.model.Source primarySource;
-    com.google.api.services.dataflow.model.Source residualSource;
-    try {
-      primarySource = serializeToCloudSource(sourceSplitResult.primary, options);
-      residualSource = serializeToCloudSource(sourceSplitResult.residual, options);
-    } catch (Exception e) {
-      throw new RuntimeException("Failed to serialize one of the parts of the source split", e);
-    }
-    sourceSplit.setPrimary(
-        new DerivedSource()
-            .setDerivationMode("SOURCE_DERIVATION_MODE_INDEPENDENT")
-            .setSource(primarySource));
-    sourceSplit.setResidual(
-        new DerivedSource()
-            .setDerivationMode("SOURCE_DERIVATION_MODE_INDEPENDENT")
-            .setSource(residualSource));
-    return sourceSplit;
-  }
-
-  /**
-   * Executes a protocol-level split {@code SourceOperationRequest} for bounded sources
-   * by deserializing its source to a {@code BoundedSource}, splitting it, and
-   * serializing results back.
-   *
-   * <p>When the splits produced by this function are too large to be serialized to the Dataflow
-   * API, splitting is retried once with an increase in the desired bundle size. This change aims
-   * to work around API limitations on split size.
-   */
-  public static SourceOperationResponse performSplit(
-      SourceSplitRequest request, PipelineOptions options) throws Exception {
-    return performSplitWithApiLimit(request, options, DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES);
-  }
-
-  /**
-   * A helper method like {@link #performSplit(SourceSplitRequest, PipelineOptions)} but that
-   * allows overriding the API size limit for testing.
-   */
-  static SourceOperationResponse performSplitWithApiLimit(
-      SourceSplitRequest request, PipelineOptions options, long apiByteLimit) throws Exception {
-    Source<?> anySource = deserializeFromCloudSource(request.getSource().getSpec());
-    checkArgument(
-        anySource instanceof BoundedSource, "Cannot split a non-Bounded source: %s", anySource);
-    BoundedSource<?> source = (BoundedSource<?>) anySource;
-
-    // Compute the desired bundle size given by the service, or default if none was provided.
-    long desiredBundleSizeBytes = DEFAULT_DESIRED_BUNDLE_SIZE_BYTES;
-    SourceSplitOptions splitOptions = request.getOptions();
-    if (splitOptions != null && splitOptions.getDesiredBundleSizeBytes() != null) {
-      desiredBundleSizeBytes = splitOptions.getDesiredBundleSizeBytes();
-    }
-
-    // Try generating initial splits normally.
-    SourceSplitResponse splits = performSplit(source, options, desiredBundleSizeBytes);
-    long serializedSize = DataflowApiUtils.computeSerializedSizeBytes(splits);
-
-    // If split response is too large, scale desired size for expected DATAFLOW_API_SIZE_BYTES/2.
-    if (serializedSize > apiByteLimit) {
-      double expansion = 2 * (double) serializedSize / apiByteLimit;
-      long expandedBundleSizeBytes = (long) (desiredBundleSizeBytes * expansion);
-      LOG.warn(
-          "Splitting source {} into bundles of estimated size {} bytes produced {} bundles, which"
-              + " have total serialized size {} bytes. As this is too large for the Google Cloud"
-              + " Dataflow API, retrying splitting once with increased desiredBundleSizeBytes {}"
-              + " to reduce the number of splits.",
-          source,
-          desiredBundleSizeBytes,
-          splits.getBundles().size(),
-          serializedSize,
-          expandedBundleSizeBytes);
-      splits = performSplit(source, options, expandedBundleSizeBytes);
-    }
-
-    return new SourceOperationResponse().setSplit(splits);
-  }
-
-  /**
-   * Factory to create a {@link WorkerCustomSources} from a Dataflow API
-   * source specification.
-   */
-  public static class Factory implements ReaderFactory {
-    @Override
-    public NativeReader<?> create(
-        CloudObject spec,
-        @Nullable Coder<?> coder,
-        @Nullable PipelineOptions options,
-        @Nullable ExecutionContext executionContext,
-        @Nullable CounterSet.AddCounterMutator addCounterMutator,
-        @Nullable String operationName)
-            throws Exception {
-      // The parameter "coder" is deliberately never used. It is an artifact of ReaderFactory:
-      // some readers need a coder, some don't (i.e. for some it doesn't even make sense),
-      // but ReaderFactory passes it to all readers anyway.
-      return WorkerCustomSources.create(spec, options, executionContext);
-    }
-  }
-
-  public static NativeReader<WindowedValue<?>> create(
-      final CloudObject spec, final PipelineOptions options, ExecutionContext executionContext)
-          throws Exception {
-
-    @SuppressWarnings("unchecked")
-    final Source<Object> source = (Source<Object>) deserializeFromCloudSource(spec);
-
-    if (source instanceof BoundedSource) {
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      NativeReader<WindowedValue<?>> reader =
-          (NativeReader)
-              new NativeReader<WindowedValue<Object>>() {
-                @Override
-                public NativeReaderIterator<WindowedValue<Object>> iterator() throws IOException {
-                  return new BoundedReaderIterator<>(
-                      ((BoundedSource<Object>) source).createReader(options));
-                }
-              };
-      return reader;
-    } else if (source instanceof UnboundedSource) {
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      NativeReader<WindowedValue<?>> reader =
-          (NativeReader)
-              new UnboundedReader<Object>(
-                  options, spec, (StreamingModeExecutionContext) executionContext);
-      return reader;
-    } else {
-      throw new IllegalArgumentException("Unexpected source kind: " + source.getClass());
-    }
-  }
-
-  private static final ByteString firstSplitKey = ByteString.copyFromUtf8("0000000000000001");
-
-  public static boolean isFirstUnboundedSourceSplit(ByteString splitKey) {
-    return splitKey.equals(firstSplitKey);
-  }
-
-  /**
-   * {@link NativeReader} for reading from {@link UnboundedSource UnboundedSources}.
-   */
-  private static class UnboundedReader<T>
-      extends NativeReader<WindowedValue<ValueWithRecordId<T>>> {
-    private final PipelineOptions options;
-    private final CloudObject spec;
-    private final StreamingModeExecutionContext context;
-
-    UnboundedReader(
-        PipelineOptions options, CloudObject spec, StreamingModeExecutionContext context) {
-      this.options = options;
-      this.spec = spec;
-      this.context = context;
-    }
-
-    @Override
-    @SuppressWarnings("unchecked")
-    public NativeReaderIterator<WindowedValue<ValueWithRecordId<T>>> iterator() {
-      UnboundedSource.UnboundedReader<T> reader =
-          (UnboundedSource.UnboundedReader<T>) context.getCachedReader();
-      final boolean started = reader != null;
-      if (reader == null) {
-        String key = context.getSerializedKey().toStringUtf8();
-        // Key is expected to be a zero-padded integer representing the split index.
-        int splitIndex = Integer.parseInt(key.substring(0, 16), 16) - 1;
-
-        UnboundedSource<T, UnboundedSource.CheckpointMark> splitSource = parseSource(splitIndex);
-
-        UnboundedSource.CheckpointMark checkpoint = null;
-        if (splitSource.getCheckpointMarkCoder() != null) {
-          checkpoint = context.getReaderCheckpoint(splitSource.getCheckpointMarkCoder());
-        }
-
-        reader = splitSource.createReader(options, checkpoint);
-      }
-
-      context.setActiveReader(reader);
-
-      return new UnboundedReaderIterator<>(reader, started);
-    }
-
-    @Override
-    public boolean supportsRestart() {
-      return true;
-    }
-
-    @SuppressWarnings("unchecked")
-    private UnboundedSource<T, UnboundedSource.CheckpointMark> parseSource(int index) {
-      List<String> serializedSplits = null;
-      try {
-        serializedSplits = getStrings(spec, SERIALIZED_SOURCE_SPLITS, null);
-      } catch (Exception e) {
-        throw new RuntimeException("Parsing serialized source splits failed: ", e);
-      }
-      checkArgument(serializedSplits != null, "UnboundedSource object did not contain splits");
-      checkArgument(
-          index < serializedSplits.size(),
-          "UnboundedSource splits contained too few splits.  Requested index was %s, size was %s",
-          index,
-          serializedSplits.size());
-      Object rawSource = deserializeFromByteArray(
-          decodeBase64(serializedSplits.get(index)), "UnboundedSource split");
-      if (!(rawSource instanceof UnboundedSource)) {
-        throw new IllegalArgumentException("Expected UnboundedSource, got " + rawSource.getClass());
-      }
-      return (UnboundedSource<T, UnboundedSource.CheckpointMark>) rawSource;
-    }
-  }
-
-  private static SourceSplitResponse performSplit(
-      BoundedSource<?> source, PipelineOptions options, long desiredBundleSizeBytes)
-      throws Exception {
-    LOG.debug("Splitting source {} into bundles of size {}", source, desiredBundleSizeBytes);
-
-    List<? extends BoundedSource<?>> bundles =
-        ((BoundedSource<?>) source).splitIntoBundles(desiredBundleSizeBytes, options);
-    List<DerivedSource> splits = new ArrayList<>(bundles.size());
-
-    // Produce simple independent, unsplittable bundles with no metadata attached.
-    LOG.debug("Splitting produced {} bundles", bundles.size());
-    for (BoundedSource<?> split : bundles) {
-      try {
-        split.validate();
-      } catch (Exception e) {
-        throw new IllegalArgumentException(
-            String.format(
-                "Splitting a valid source produced an invalid source."
-                    + "\nOriginal source: %s\nInvalid source: %s",
-                source,
-                split),
-            e);
-      }
-
-      splits.add(
-          new DerivedSource()
-              .setDerivationMode("SOURCE_DERIVATION_MODE_INDEPENDENT")
-              .setSource(serializeToCloudSource(split, options).setDoesNotNeedSplitting(true)));
-    }
-
-    // Return all the splits in the SourceSplitResponse.
-    return new SourceSplitResponse()
-        .setBundles(splits)
-        .setOutcome("SOURCE_SPLIT_OUTCOME_SPLITTING_HAPPENED");
-  }
-
-  private static Source<?> deserializeFromCloudSource(Map<String, Object> spec) throws Exception {
-    Source<?> source = (Source<?>) deserializeFromByteArray(
-        Base64.decodeBase64(getString(spec, SERIALIZED_SOURCE)), "Source");
-    try {
-      source.validate();
-    } catch (Exception e) {
-      LOG.error("Invalid source: {}", source, e);
-      throw e;
-    }
-    return source;
-  }
-
-  public static <T> void evaluateReadHelper(
-      Read.Bounded<T> transform, DirectPipelineRunner.EvaluationContext context) {
-    try {
-      List<DirectPipelineRunner.ValueWithMetadata<T>> output = new ArrayList<>();
-      BoundedSource<T> source = transform.getSource();
-      try (BoundedSource.BoundedReader<T> reader =
-          source.createReader(context.getPipelineOptions())) {
-        for (boolean available = reader.start(); available; available = reader.advance()) {
-          output.add(
-              DirectPipelineRunner.ValueWithMetadata.of(
-                  WindowedValue.timestampedValueInGlobalWindow(
-                      reader.getCurrent(), reader.getCurrentTimestamp())));
-        }
-      }
-      context.setPCollectionValuesWithMetadata(context.getOutput(transform), output);
-    } catch (Exception e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  private static class BoundedReaderIterator<T>
-      extends NativeReader.NativeReaderIterator<WindowedValue<T>> {
-    private BoundedSource.BoundedReader<T> reader;
-
-    private BoundedReaderIterator(BoundedSource.BoundedReader<T> reader) {
-      this.reader = reader;
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      try {
-        return reader.start();
-      } catch (Exception e) {
-        throw new IOException(
-            "Failed to start reading from source: " + reader.getCurrentSource(), e);
-      }
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      try {
-        return reader.advance();
-      } catch (Exception e) {
-        throw new IOException(
-            "Failed to advance reader of source: " + reader.getCurrentSource(), e);
-      }
-    }
-
-    @Override
-    public WindowedValue<T> getCurrent() throws NoSuchElementException {
-      return WindowedValue.timestampedValueInGlobalWindow(
-          reader.getCurrent(), reader.getCurrentTimestamp());
-    }
-
-    @Override
-    public void close() throws IOException {
-      reader.close();
-    }
-
-    @Override
-    public NativeReader.Progress getProgress() {
-      ApproximateReportedProgress progress = new ApproximateReportedProgress();
-      Double fractionConsumed = reader.getFractionConsumed();
-      if (fractionConsumed != null) {
-        progress.setFractionConsumed(fractionConsumed);
-      }
-      return SourceTranslationUtils.cloudProgressToReaderProgress(progress);
-    }
-
-    @Override
-    public NativeReader.DynamicSplitResult requestDynamicSplit(
-        NativeReader.DynamicSplitRequest request) {
-      ApproximateSplitRequest stopPosition =
-          SourceTranslationUtils.splitRequestToApproximateSplitRequest(request);
-      Double fractionConsumed = stopPosition.getFractionConsumed();
-      if (fractionConsumed == null) {
-        // Only truncating at a fraction is currently supported.
-        LOG.info(
-            "Rejecting split request because custom sources only support splits at fraction: {}",
-            stopPosition);
-        return null;
-      }
-      BoundedSource<T> original = reader.getCurrentSource();
-      BoundedSource<T> residual = reader.splitAtFraction(fractionConsumed);
-      if (residual == null) {
-        LOG.info("Rejecting split request because custom reader returned null residual source.");
-        return null;
-      }
-      // Try to catch some potential subclass implementation errors early.
-      BoundedSource<T> primary = reader.getCurrentSource();
-      if (original == primary) {
-        throw new IllegalStateException(
-          "Successful split did not change the current source: primary is identical to original"
-          + " (Source objects MUST be immutable): " + primary);
-      }
-      if (original == residual) {
-        throw new IllegalStateException(
-          "Successful split did not change the current source: residual is identical to original"
-          + " (Source objects MUST be immutable): " + residual);
-      }
-      try {
-        primary.validate();
-      } catch (Exception e) {
-        throw new IllegalStateException(
-            "Successful split produced an illegal primary source. "
-            + "\nOriginal: " + original + "\nPrimary: " + primary + "\nResidual: " + residual);
-      }
-      try {
-        residual.validate();
-      } catch (Exception e) {
-        throw new IllegalStateException(
-            "Successful split produced an illegal residual source. "
-            + "\nOriginal: " + original + "\nPrimary: " + primary + "\nResidual: " + residual);
-      }
-      return new BoundedSourceSplit<T>(primary, residual);
-    }
-
-    @Override
-    public double getRemainingParallelism() {
-      return Double.NaN;
-    }
-  }
-
-  // Commit at least once every 10 seconds or 10k records.  This keeps the watermark advancing
-  // smoothly, and ensures that not too much work will have to be reprocessed in the event of
-  // a crash.
-  @VisibleForTesting
-  static final int MAX_UNBOUNDED_BUNDLE_SIZE = 10000;
-  @VisibleForTesting
-  static final Duration MAX_UNBOUNDED_BUNDLE_READ_TIME = Duration.standardSeconds(10);
-
-  private static class UnboundedReaderIterator<T>
-      extends NativeReader.NativeReaderIterator<WindowedValue<ValueWithRecordId<T>>> {
-    private final UnboundedSource.UnboundedReader<T> reader;
-    private final boolean started;
-    private final Instant endTime;
-    private int elemsRead;
-
-    private UnboundedReaderIterator(UnboundedSource.UnboundedReader<T> reader, boolean started) {
-      this.reader = reader;
-      this.endTime = Instant.now().plus(MAX_UNBOUNDED_BUNDLE_READ_TIME);
-      this.elemsRead = 0;
-      this.started = started;
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      if (started) {
-        // This is a reader that has been restored from the unbounded reader cache.
-        // It has already been started, so this call to start() should delegate
-        // to advance() instead.
-        return advance();
-      }
-      try {
-        if (!reader.start()) {
-          return false;
-        }
-      } catch (Exception e) {
-        throw new IOException(
-            "Failed to start reading from source: " + reader.getCurrentSource(), e);
-      }
-      elemsRead++;
-      return true;
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      if (elemsRead >= MAX_UNBOUNDED_BUNDLE_SIZE
-          || Instant.now().isAfter(endTime)) {
-        return false;
-      }
-
-      // Backoff starting at 100ms, for approximately 1s total. 100+150+225+337.5~=1000.
-      BackOff backoff = new AttemptBoundedExponentialBackOff(5, 100);
-      while (true) {
-        try {
-          if (reader.advance()) {
-            elemsRead++;
-            return true;
-          }
-        } catch (Exception e) {
-          throw new IOException("Failed to advance source: " + reader.getCurrentSource(), e);
-        }
-        long nextBackoff = backoff.nextBackOffMillis();
-        if (nextBackoff == BackOff.STOP) {
-          return false;
-        }
-        try {
-          Thread.sleep(nextBackoff);
-        } catch (InterruptedException e) {
-          // ignore.
-        }
-      }
-    }
-
-    @Override
-    public WindowedValue<ValueWithRecordId<T>> getCurrent() throws NoSuchElementException {
-      WindowedValue<T> result =
-          WindowedValue.timestampedValueInGlobalWindow(
-              reader.getCurrent(), reader.getCurrentTimestamp());
-      return result.withValue(
-          new ValueWithRecordId<>(result.getValue(), reader.getCurrentRecordId()));
-    }
-
-    @Override
-    public void close() {}
-
-    @Override
-    public NativeReader.Progress getProgress() {
-      return null;
-    }
-
-    @Override
-    public NativeReader.DynamicSplitResult requestDynamicSplit(
-        NativeReader.DynamicSplitRequest request) {
-      return null;
-    }
-
-    @Override
-    public double getRemainingParallelism() {
-      return Double.NaN;
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
deleted file mode 100644
index 7cd5f82340108..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerOrElement.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.runners.worker.KeyedWorkItems.FakeKeyedWorkItemCoder;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.util.List;
-
-/**
- * Empty class which exists because the back end will sometimes insert uses of
- * {@code com.google.cloud.dataflow.sdk.util.TimerOrElement$TimerOrElementCoder} and we'd like to be
- * able to rename/move that without breaking things.
- */
-public class TimerOrElement {
-
-  // TimerOrElement should never be created.
-  private TimerOrElement() {}
-
-  /**
-   * Empty class which exists because the back end will sometimes insert uses of
-   * {@code com.google.cloud.dataflow.sdk.util.TimerOrElement$TimerOrElementCoder} and we'd like to
-   * be able to rename/move that without breaking things.
-   */
-  public static class TimerOrElementCoder<ElemT> extends FakeKeyedWorkItemCoder<Object, ElemT> {
-
-    private TimerOrElementCoder(Coder<ElemT> elemCoder) {
-      super(elemCoder);
-    }
-
-    public static <T> TimerOrElementCoder<T> of(Coder<T> elemCoder) {
-      return new TimerOrElementCoder<>(elemCoder);
-    }
-
-    @JsonCreator
-    public static TimerOrElementCoder<?> of(
-        @JsonProperty(PropertyNames.COMPONENT_ENCODINGS) List<Object> components) {
-      return of((Coder<?>) components.get(0));
-    }
-  }
-}

From c27412e86090686afa60418e2489bd9cbed68919 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Sat, 6 Feb 2016 16:32:51 -0800
Subject: [PATCH 1403/1541] Move Windmill-specific code and dependents to
 worker module

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114037017
---
 .../MetricTrackingWindmillServerStub.java     |  66 --
 .../sdk/runners/worker/StateFetcher.java      | 232 -----
 .../worker/WindmillReaderIteratorBase.java    |  71 --
 .../runners/worker/WindmillStateCache.java    | 312 -------
 .../worker/WindmillStateInternals.java        | 848 ------------------
 .../runners/worker/WindmillStateReader.java   | 657 --------------
 .../sdk/runners/worker/WindmillTimeUtils.java | 106 ---
 .../worker/windmill/WindmillServerStub.java   |  45 -
 8 files changed, 2337 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MetricTrackingWindmillServerStub.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcher.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillReaderIteratorBase.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillTimeUtils.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/windmill/WindmillServerStub.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MetricTrackingWindmillServerStub.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MetricTrackingWindmillServerStub.java
deleted file mode 100644
index ba96c473b7c60..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/MetricTrackingWindmillServerStub.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.WindmillServerStub;
-import com.google.cloud.dataflow.sdk.util.MemoryMonitor;
-
-import java.io.PrintWriter;
-import java.util.concurrent.atomic.AtomicInteger;
-
-/**
- * Wrapper around a {@link WindmillServerStub} that tracks metrics for the number of in-flight
- * requests and throttles requests when memory pressure is high.
- */
-public class MetricTrackingWindmillServerStub {
-  private final AtomicInteger activeSideInputs = new AtomicInteger();
-  private final AtomicInteger activeStateReads = new AtomicInteger();
-  private final WindmillServerStub server;
-  private final MemoryMonitor gcThrashingMonitor;
-
-  public MetricTrackingWindmillServerStub(
-      WindmillServerStub server, MemoryMonitor gcThrashingMonitor) {
-    this.server = server;
-    this.gcThrashingMonitor = gcThrashingMonitor;
-  }
-
-  public Windmill.GetDataResponse getStateData(Windmill.GetDataRequest request) {
-    gcThrashingMonitor.waitForResources("GetStateData");
-    activeStateReads.getAndIncrement();
-    try {
-      return server.getData(request);
-    } finally {
-      activeStateReads.getAndDecrement();
-    }
-  }
-
-  public Windmill.GetDataResponse getSideInputData(Windmill.GetDataRequest request) {
-    gcThrashingMonitor.waitForResources("GetSideInputData");
-    activeSideInputs.getAndIncrement();
-    try {
-      return server.getData(request);
-    } finally {
-      activeSideInputs.getAndDecrement();
-    }
-  }
-
-  public void printHtml(PrintWriter writer) {
-    writer.println("Active Fetches:");
-    writer.println("  Side Inputs: " + activeSideInputs.get());
-    writer.println("  State Reads: " + activeStateReads.get());
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcher.java
deleted file mode 100644
index 3f2aa38adc776..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StateFetcher.java
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.base.Supplier;
-import com.google.common.cache.Cache;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.cache.Weigher;
-import com.google.protobuf.ByteString;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.Collections;
-import java.util.Objects;
-import java.util.concurrent.Callable;
-import java.util.concurrent.TimeUnit;
-
-/**
- * Class responsible for fetching state from the windmill server.
- */
-class StateFetcher {
-  private static final Logger LOG = LoggerFactory.getLogger(StateFetcher.class);
-
-  private Cache<SideInputId, SideInputCacheEntry> sideInputCache;
-  private MetricTrackingWindmillServerStub server;
-  private long bytesRead = 0L;
-
-  public StateFetcher(MetricTrackingWindmillServerStub server) {
-    this(server, CacheBuilder
-        .newBuilder()
-        .maximumWeight(100000000 /* 100 MB */)
-        .expireAfterWrite(1, TimeUnit.MINUTES)
-        .weigher(new Weigher<SideInputId, SideInputCacheEntry>() {
-              @Override
-              public int weigh(SideInputId id, SideInputCacheEntry entry) {
-                return entry.encodedSize;
-              }
-            })
-        .build());
-  }
-
-  public StateFetcher(MetricTrackingWindmillServerStub server,
-      Cache<SideInputId, SideInputCacheEntry> sideInputCache) {
-    this.server = server;
-    this.sideInputCache = sideInputCache;
-  }
-
-  /**
-   * Returns a view of the underlying cache that keeps track of bytes read separately.
-   */
-  public StateFetcher byteTrackingView() {
-    return new StateFetcher(server, sideInputCache);
-  }
-
-  public long getBytesRead() {
-    return bytesRead;
-  }
-
-  /**
-   * Indicates the caller's knowledge of whether a particular side input has been computed.
-   */
-  public enum SideInputState {
-    CACHED_IN_WORKITEM, KNOWN_READY, UNKNOWN;
-  }
-
-  /**
-   * Fetch the given side input, storing it in a process-level cache.
-   *
-   * <p>If state is KNOWN_READY, attempt to fetch the data regardless of whether a
-   * not-ready entry was cached.
-   */
-  public <T, SideWindowT extends BoundedWindow> T fetchSideInput(final PCollectionView<T> view,
-      final SideWindowT sideWindow, final String stateFamily, SideInputState state,
-      final Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
-    final SideInputId id = new SideInputId(view.getTagInternal(), sideWindow);
-
-    Callable<SideInputCacheEntry> fetchCallable = new Callable<SideInputCacheEntry>() {
-      @Override
-      public SideInputCacheEntry call() throws Exception {
-        @SuppressWarnings("unchecked")
-        WindowingStrategy<?, SideWindowT> sideWindowStrategy =
-            (WindowingStrategy<?, SideWindowT>) view.getWindowingStrategyInternal();
-
-        Coder<SideWindowT> windowCoder = sideWindowStrategy.getWindowFn().windowCoder();
-
-        ByteString.Output windowStream = ByteString.newOutput();
-        windowCoder.encode(sideWindow, windowStream, Coder.Context.OUTER);
-
-        @SuppressWarnings("unchecked")
-        Windmill.GlobalDataRequest request =
-            Windmill.GlobalDataRequest.newBuilder()
-                .setDataId(Windmill.GlobalDataId.newBuilder()
-                    .setTag(view.getTagInternal().getId())
-                    .setVersion(windowStream.toByteString())
-                    .build())
-                .setStateFamily(stateFamily)
-                .setExistenceWatermarkDeadline(WindmillTimeUtils.harnessToWindmillTimestamp(
-                     sideWindowStrategy
-                         .getTrigger().getSpec()
-                         .getWatermarkThatGuaranteesFiring(sideWindow)))
-                .build();
-
-        Windmill.GetDataResponse response;
-        try (StateSampler.ScopedState scope = scopedReadStateSupplier.get()) {
-            response = server.getSideInputData(
-                Windmill.GetDataRequest.newBuilder()
-                .addGlobalDataFetchRequests(request)
-                .addGlobalDataToFetch(request.getDataId())
-                .build());
-        }
-
-        Windmill.GlobalData data = response.getGlobalData(0);
-        bytesRead += data.getSerializedSize();
-
-        Iterable<WindowedValue<?>> rawData;
-        if (data.getIsReady()) {
-          if (data.getData().size() > 0) {
-            rawData = view.getCoderInternal().decode(
-                data.getData().newInput(), Coder.Context.OUTER);
-          } else {
-            rawData = Collections.emptyList();
-          }
-
-          return new SideInputCacheEntry(
-              view.fromIterableInternal(rawData), data.getData().size());
-        } else {
-          return SideInputCacheEntry.notReady();
-        }
-      }
-    };
-
-    try {
-      if (state == SideInputState.KNOWN_READY) {
-        SideInputCacheEntry entry = sideInputCache.getIfPresent(id);
-        if (entry == null) {
-          return (T) sideInputCache.get(id, fetchCallable).value;
-        } else if (!entry.isReady()) {
-          // Invalidate the existing not-ready entry.  This must be done atomically
-          // so that another thread doesn't replace the entry with a ready entry, which
-          // would then be deleted here.
-          synchronized (entry) {
-            SideInputCacheEntry newEntry = sideInputCache.getIfPresent(id);
-            if (newEntry != null && !newEntry.isReady()) {
-              sideInputCache.invalidate(id);
-            }
-          }
-
-          return (T) sideInputCache.get(id, fetchCallable).value;
-        } else {
-          return (T) entry.value;
-        }
-      } else {
-        return (T) sideInputCache.get(id, fetchCallable).value;
-      }
-    } catch (Exception e) {
-      LOG.error("Fetch failed: ", e);
-      throw new RuntimeException("Exception while fetching side input: ", e);
-    }
-  }
-
-  /**
-   * Struct representing a side input for a particular window.
-   */
-  static class SideInputId {
-    private final TupleTag<?> tag;
-    private final BoundedWindow window;
-
-    public SideInputId(TupleTag<?> tag, BoundedWindow window) {
-      this.tag = tag;
-      this.window = window;
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      if (other instanceof SideInputId) {
-        SideInputId otherId = (SideInputId) other;
-        return tag.equals(otherId.tag) && window.equals(otherId.window);
-      }
-      return false;
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(tag, window);
-    }
-  }
-
-  /**
-   * Entry in the side input cache that stores the value (null if not ready), and
-   * the encoded size of the value.
-   */
-  static class SideInputCacheEntry {
-    public final Object value;
-    public final int encodedSize;
-
-    public SideInputCacheEntry(Object value, int encodedSize) {
-      this.value = value;
-      this.encodedSize = encodedSize;
-    }
-
-    public static SideInputCacheEntry notReady() {
-      return new SideInputCacheEntry(null, 0);
-    }
-
-    public boolean isReady() {
-      return value != null;
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillReaderIteratorBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillReaderIteratorBase.java
deleted file mode 100644
index 9a576218035e0..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillReaderIteratorBase.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import java.io.IOException;
-import java.util.NoSuchElementException;
-
-/**
- * Base class for iterators that decode messages from bundles inside a {@link Windmill.WorkItem}.
- */
-public abstract class WindmillReaderIteratorBase<T>
-    extends NativeReader.NativeReaderIterator<WindowedValue<T>> {
-  private Windmill.WorkItem work;
-  private int bundleIndex = 0;
-  private int messageIndex = -1;
-  private WindowedValue<T> current;
-
-  protected WindmillReaderIteratorBase(Windmill.WorkItem work) {
-    this.work = work;
-  }
-
-  @Override
-  public boolean start() throws IOException {
-    return advance();
-  }
-
-  @Override
-  public boolean advance() throws IOException {
-    if (bundleIndex == work.getMessageBundlesCount()
-        || messageIndex == work.getMessageBundles(bundleIndex).getMessagesCount()) {
-      return false;
-    }
-    ++messageIndex;
-    for (; bundleIndex < work.getMessageBundlesCount(); ++bundleIndex, messageIndex = 0) {
-      Windmill.InputMessageBundle bundle = work.getMessageBundles(bundleIndex);
-      if (messageIndex < bundle.getMessagesCount()) {
-        current = decodeMessage(bundle.getMessages(messageIndex));
-        return true;
-      }
-    }
-    current = null;
-    return false;
-  }
-
-  protected abstract WindowedValue<T> decodeMessage(Windmill.Message message) throws IOException;
-
-  @Override
-  public WindowedValue<T> getCurrent() throws NoSuchElementException {
-    if (current == null) {
-      throw new NoSuchElementException();
-    }
-    return current;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java
deleted file mode 100644
index 9849ed8695045..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateCache.java
+++ /dev/null
@@ -1,312 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.runners.worker.status.BaseStatusServlet;
-import com.google.cloud.dataflow.sdk.runners.worker.status.StatusDataProvider;
-import com.google.cloud.dataflow.sdk.util.Weighted;
-import com.google.cloud.dataflow.sdk.util.state.State;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.common.cache.Cache;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.cache.RemovalCause;
-import com.google.common.cache.RemovalListener;
-import com.google.common.cache.RemovalNotification;
-import com.google.common.cache.Weigher;
-import com.google.protobuf.ByteString;
-
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Objects;
-
-import javax.servlet.ServletException;
-import javax.servlet.http.HttpServletRequest;
-import javax.servlet.http.HttpServletResponse;
-
-/**
- * Process-wide cache of per-key state.
- */
-public class WindmillStateCache implements StatusDataProvider {
-  // Estimate of overhead per StateId.
-  private static final int PER_STATE_ID_OVERHEAD = 20;
-  // Initial size of hash tables per entry.
-  private static final int INITIAL_HASH_MAP_CAPACITY = 4;
-  // Overhead of each hash map entry.
-  private static final int HASH_MAP_ENTRY_OVERHEAD = 16;
-  // Overhead of each cache entry.  Two longs, plus a hash table.
-  private static final int PER_CACHE_ENTRY_OVERHEAD =
-      16 + HASH_MAP_ENTRY_OVERHEAD * INITIAL_HASH_MAP_CAPACITY;
-
-  private Cache<StateId, StateCacheEntry> stateCache;
-  private int displayedWeight = 0;  // Only used for status pages and unit tests.
-
-  public WindmillStateCache() {
-    final Weigher<Weighted, Weighted> weigher = Weighers.weightedKeysAndValues();
-
-    stateCache =
-        CacheBuilder.newBuilder()
-        .maximumWeight(100000000 /* 100 MB */)
-        .recordStats()
-        .weigher(weigher)
-        .removalListener(new RemovalListener<StateId, StateCacheEntry>() {
-              @Override
-              public void onRemoval(RemovalNotification<StateId, StateCacheEntry> removal) {
-                if (removal.getCause() != RemovalCause.REPLACED) {
-                  displayedWeight -= weigher.weigh(removal.getKey(), removal.getValue());
-                }
-              }
-            })
-        .build();
-  }
-
-  public long getWeight() {
-    return displayedWeight;
-  }
-
-  /**
-   * Per-computation view of the state cache.
-   */
-  public class ForComputation {
-    private final String computation;
-    private ForComputation(String computation) {
-      this.computation = computation;
-    }
-
-    /**
-     * Returns a per-computation, per-key view of the state cache.
-     */
-    public ForKey forKey(ByteString key, String stateFamily, long cacheToken) {
-      return new ForKey(computation, key, stateFamily, cacheToken);
-    }
-  }
-
-  /**
-   * Per-computation, per-key view of the state cache.
-   */
-  public class ForKey {
-    private final String computation;
-    private final ByteString key;
-    private final String stateFamily;
-    private final long cacheToken;
-
-    private ForKey(String computation, ByteString key, String stateFamily, long cacheToken) {
-      this.computation = computation;
-      this.key = key;
-      this.stateFamily = stateFamily;
-      this.cacheToken = cacheToken;
-    }
-
-    public <T extends State> T get(StateNamespace namespace, StateTag<T> address) {
-      return WindmillStateCache.this.get(
-          computation, key, stateFamily, cacheToken, namespace, address);
-    }
-
-    public <T extends State> void put(
-        StateNamespace namespace, StateTag<T> address, T value, long weight) {
-      WindmillStateCache.this.put(
-          computation, key, stateFamily, cacheToken, namespace, address, value, weight);
-    }
-  }
-
-  /**
-   * Returns a per-computation view of the state cache.
-   */
-  public ForComputation forComputation(String computation) {
-    return new ForComputation(computation);
-  }
-
-  private <T extends State> T get(String computation, ByteString processingKey, String stateFamily,
-      long token, StateNamespace namespace, StateTag<T> address) {
-    StateId id = new StateId(computation, processingKey, stateFamily, namespace);
-    StateCacheEntry entry = stateCache.getIfPresent(id);
-    if (entry == null) {
-      return null;
-    }
-    if (entry.getToken() != token) {
-      stateCache.invalidate(id);
-      return null;
-    }
-    return entry.get(namespace, address);
-  }
-
-  private <T extends State> void put(String computation, ByteString processingKey,
-      String stateFamily, long token, StateNamespace namespace, StateTag<T> address, T value,
-      long weight) {
-    StateId id = new StateId(computation, processingKey, stateFamily, namespace);
-    StateCacheEntry entry = stateCache.getIfPresent(id);
-    if (entry == null || entry.getToken() != token) {
-      entry = new StateCacheEntry(token);
-      this.displayedWeight += id.getWeight();
-      this.displayedWeight += entry.getWeight();
-    }
-    this.displayedWeight += entry.put(namespace, address, value, weight);
-    // Always add back to the cache to update the weight.
-    stateCache.put(id, entry);
-  }
-
-  /**
-   * Struct identifying a cache entry that contains all data for a key and namespace.
-   */
-  private static class StateId implements Weighted {
-    public final String computation;
-    public final ByteString processingKey;
-    public final String stateFamily;
-    public final Object namespaceKey;
-
-    public StateId(String computation, ByteString processingKey, String stateFamily,
-        StateNamespace namespace) {
-      this.computation = computation;
-      this.processingKey = processingKey;
-      this.stateFamily = stateFamily;
-      this.namespaceKey = namespace.getCacheKey();
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      if (other instanceof StateId) {
-        StateId otherId = (StateId) other;
-        return computation.equals(otherId.computation)
-            && processingKey.equals(otherId.processingKey)
-            && stateFamily.equals(otherId.stateFamily)
-            && namespaceKey.equals(otherId.namespaceKey);
-      }
-      return false;
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(computation, processingKey, namespaceKey);
-    }
-
-    @Override
-    public long getWeight() {
-      return processingKey.size() + PER_STATE_ID_OVERHEAD;
-    }
-  }
-
-  /**
-   * Entry in the state cache that stores a map of values and a token representing the
-   * validity of the values.
-   */
-  private static class StateCacheEntry implements Weighted {
-    private final long token;
-    private final Map<NamespacedTag<?>, WeightedValue<?>> values;
-    private long weight;
-
-    public StateCacheEntry(long token) {
-      this.values = new HashMap<>(INITIAL_HASH_MAP_CAPACITY);
-      this.token = token;
-      this.weight = 0;
-    }
-
-    @SuppressWarnings("unchecked")
-    public <T extends State> T get(StateNamespace namespace, StateTag<T> tag) {
-      WeightedValue<T> weightedValue =
-          (WeightedValue<T>) values.get(new NamespacedTag(namespace, tag));
-      return weightedValue == null ? null : weightedValue.value;
-    }
-
-    public <T extends State> long put(
-        StateNamespace namespace, StateTag<T> tag, T value, long weight) {
-      WeightedValue<T> weightedValue =
-          (WeightedValue<T>) values.get(new NamespacedTag(namespace, tag));
-      long weightDelta = 0;
-      if (weightedValue == null) {
-        weightedValue = new WeightedValue<T>();
-        weightDelta += HASH_MAP_ENTRY_OVERHEAD;
-      } else {
-        weightDelta -= weightedValue.weight;
-      }
-      weightedValue.value = value;
-      weightedValue.weight = weight;
-      weightDelta += weight;
-      this.weight += weightDelta;
-      values.put(new NamespacedTag(namespace, tag), weightedValue);
-      return weightDelta;
-    }
-
-    @Override
-    public long getWeight() {
-      return weight + PER_CACHE_ENTRY_OVERHEAD;
-    }
-
-    public long getToken() {
-      return token;
-    }
-
-    private static class NamespacedTag<T extends State> {
-      private final StateNamespace namespace;
-      private final StateTag<T> tag;
-      NamespacedTag(StateNamespace namespace, StateTag<T> tag) {
-        this.namespace = namespace;
-        this.tag = tag;
-      }
-
-      @Override
-      public boolean equals(Object other) {
-        if (!(other instanceof NamespacedTag)) {
-          return false;
-        }
-        NamespacedTag<?> that = (NamespacedTag<?>) other;
-        return namespace.equals(that.namespace) && tag.equals(that.tag);
-      }
-
-      @Override
-      public int hashCode() {
-        return Objects.hash(namespace, tag);
-      }
-    }
-
-    private static class WeightedValue<T> {
-      public long weight = 0;
-      public T value = null;
-    }
-  }
-
-  /**
-   * Print summary statistics of the cache to the given {@link PrintWriter}.
-   */
-  @Override
-  public void appendSummaryHtml(PrintWriter response) {
-    response.println("Cache Stats: <br><table border=0>");
-    response.println(
-        "<tr><th>Hit Ratio</th><th>Evictions</th><th>Size</th><th>Weight</th></tr><tr>");
-    response.println("<th>" + stateCache.stats().hitRate() + "</th>");
-    response.println("<th>" + stateCache.stats().evictionCount() + "</th>");
-    response.println("<th>" + stateCache.size() + "</th>");
-    response.println("<th>" + getWeight() + "</th>");
-    response.println("</tr></table><br>");
-  }
-
-
-  public BaseStatusServlet statusServlet() {
-    return new BaseStatusServlet("/cachez") {
-      @Override
-      protected void doGet(HttpServletRequest request, HttpServletResponse response)
-          throws IOException, ServletException {
-
-        PrintWriter writer = response.getWriter();
-        writer.println("<h1>Cache Information</h1>");
-        appendSummaryHtml(writer);
-      }
-    };
-  }
-
-
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
deleted file mode 100644
index 87eaa1f58f6bb..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateInternals.java
+++ /dev/null
@@ -1,848 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.WorkItemCommitRequest;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
-import com.google.cloud.dataflow.sdk.util.Weighted;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.util.state.BagState;
-import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateInternals;
-import com.google.cloud.dataflow.sdk.util.state.State;
-import com.google.cloud.dataflow.sdk.util.state.StateContents;
-import com.google.cloud.dataflow.sdk.util.state.StateInternals;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
-import com.google.cloud.dataflow.sdk.util.state.StateTable;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
-import com.google.cloud.dataflow.sdk.util.state.StateTags;
-import com.google.cloud.dataflow.sdk.util.state.ValueState;
-import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Function;
-import com.google.common.base.Optional;
-import com.google.common.base.Supplier;
-import com.google.common.base.Throwables;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Iterators;
-import com.google.common.util.concurrent.Futures;
-import com.google.protobuf.ByteString;
-
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Random;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.Future;
-import javax.annotation.concurrent.NotThreadSafe;
-
-/**
- * Implementation of {@link StateInternals} using Windmill to manage the underlying data.
- */
-class WindmillStateInternals extends MergingStateInternals {
-  private static class CachingStateTable extends StateTable {
-    private final String stateFamily;
-    private final WindmillStateReader reader;
-    private final WindmillStateCache.ForKey cache;
-    private final Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
-
-    public CachingStateTable(String stateFamily,
-        WindmillStateReader reader, WindmillStateCache.ForKey cache,
-        Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
-      this.stateFamily = stateFamily;
-      this.reader = reader;
-      this.cache = cache;
-      this.scopedReadStateSupplier = scopedReadStateSupplier;
-    }
-
-    @Override
-    protected StateBinder binderForNamespace(final StateNamespace namespace) {
-      // Look up state objects in the cache or create new ones if not found.  The state will
-      // be added to the cache in persist().
-      return new StateBinder() {
-        @Override
-        public <T> BagState<T> bindBag(StateTag<BagState<T>> address, Coder<T> elemCoder) {
-          WindmillBag<T> result = (WindmillBag<T>) cache.get(namespace, address);
-          if (result == null) {
-            result = new WindmillBag<T>(namespace, address, stateFamily, elemCoder);
-          }
-          result.initializeForWorkItem(reader, scopedReadStateSupplier);
-          return result;
-        }
-
-        @Override
-        public <W extends BoundedWindow> WatermarkStateInternal bindWatermark(
-            StateTag<WatermarkStateInternal> address, OutputTimeFn<? super W> outputTimeFn) {
-          WindmillWatermarkState result = (WindmillWatermarkState) cache.get(namespace, address);
-          if (result == null) {
-            result = new WindmillWatermarkState(namespace, address, stateFamily, outputTimeFn);
-          }
-          result.initializeForWorkItem(reader, scopedReadStateSupplier);
-          return result;
-        }
-
-        @Override
-        public <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
-        bindCombiningValue(StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
-            Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
-          WindmillCombiningValue<InputT, AccumT, OutputT> result = new WindmillCombiningValue<>(
-              namespace, address, stateFamily, accumCoder, combineFn, cache);
-          result.initializeForWorkItem(reader, scopedReadStateSupplier);
-          return result;
-        }
-
-        @Override
-        public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> coder) {
-          WindmillValue<T> result = (WindmillValue<T>) cache.get(namespace, address);
-          if (result == null) {
-            result = new WindmillValue<T>(namespace, address, stateFamily, coder);
-          }
-          result.initializeForWorkItem(reader, scopedReadStateSupplier);
-          return result;
-        }
-      };
-    }
-  };
-
-  private WindmillStateCache.ForKey cache;
-  Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
-  private StateTable workItemState;
-
-  public WindmillStateInternals(String stateFamily, WindmillStateReader reader,
-      WindmillStateCache.ForKey cache, Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
-    this.cache = cache;
-    this.scopedReadStateSupplier = scopedReadStateSupplier;
-    this.workItemState = new CachingStateTable(stateFamily, reader, cache, scopedReadStateSupplier);
-  }
-
-  public void persist(final Windmill.WorkItemCommitRequest.Builder commitBuilder) {
-    List<Future<WorkItemCommitRequest>> commitsToMerge = new ArrayList<>();
-
-    // Call persist on each first, which may schedule some futures for reading.
-    for (State location : workItemState.values()) {
-      if (!(location instanceof WindmillState)) {
-        throw new IllegalStateException(String.format(
-            "%s wasn't created by %s -- unable to persist it",
-            location.getClass().getSimpleName(),
-            getClass().getSimpleName()));
-      }
-
-      try {
-        commitsToMerge.add(((WindmillState) location).persist(cache));
-      } catch (IOException e) {
-        throw new RuntimeException("Unable to persist state", e);
-      }
-    }
-
-    // Clear out the map of already retrieved state instances.
-    workItemState.clear();
-
-    try (StateSampler.ScopedState scope = scopedReadStateSupplier.get()) {
-      for (Future<WorkItemCommitRequest> commitFuture : commitsToMerge) {
-        commitBuilder.mergeFrom(commitFuture.get());
-      }
-    } catch (ExecutionException | InterruptedException exc) {
-      throw new RuntimeException("Failed to retrieve Windmill state during persist()", exc);
-    }
-  }
-
-  /**
-   * Encodes the given namespace and address as {@code &lt;namespace&gt;+&lt;address&gt;}.
-   */
-  @VisibleForTesting
-  static ByteString encodeKey(StateNamespace namespace, StateTag<?> address) {
-    try {
-      // Use ByteString.Output rather than concatenation and String.format. We build these keys
-      // a lot, and this leads to better performance results. See associated benchmarks.
-      ByteString.Output stream = ByteString.newOutput();
-      OutputStreamWriter writer = new OutputStreamWriter(stream, StandardCharsets.UTF_8);
-
-      // stringKey starts and ends with a slash.  We separate it from the
-      // StateTag ID by a '+' (which is guaranteed not to be in the stringKey) because the
-      // ID comes from the user.
-      namespace.appendTo(writer);
-      writer.write('+');
-      address.appendTo(writer);
-      writer.flush();
-      return stream.toByteString();
-    } catch (IOException e) {
-      throw Throwables.propagate(e);
-    }
-  }
-
-  /**
-   * Abstract base class for all Windmill state.
-   *
-   * <p>Note that these are not thread safe; each state object is associated with a key
-   * and thus only accessed by a single thread at once.
-   */
-  @NotThreadSafe
-  private abstract static class WindmillState {
-    protected Supplier<StateSampler.ScopedState> scopedReadStateSupplier;
-    protected WindmillStateReader reader;
-
-    /**
-     * Return an asynchronously computed {@link WorkItemCommitRequest}. The request should
-     * be of a form that can be merged with others (only add to repeated fields).
-     */
-    abstract Future<WorkItemCommitRequest> persist(WindmillStateCache.ForKey cache)
-        throws IOException;
-
-    void initializeForWorkItem(
-        WindmillStateReader reader, Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
-      this.reader = reader;
-      this.scopedReadStateSupplier = scopedReadStateSupplier;
-    }
-
-    StateSampler.ScopedState scopedReadState() {
-      return scopedReadStateSupplier.get();
-    }
-  }
-
-  /**
-   * Base class for implementations of {@link WindmillState} where the {@link #persist} call does
-   * not require any asynchronous reading.
-   */
-  private abstract static class SimpleWindmillState extends WindmillState {
-    @Override
-    public final Future<WorkItemCommitRequest> persist(WindmillStateCache.ForKey cache)
-        throws IOException {
-      return Futures.immediateFuture(persistDirectly(cache));
-    }
-
-    /**
-     * Returns a {@link WorkItemCommitRequest} that can be used to persist this state to
-     * Windmill.
-     */
-    protected abstract WorkItemCommitRequest persistDirectly(WindmillStateCache.ForKey cache)
-        throws IOException;
-  }
-
-  @Override
-  public <T extends State> T state(StateNamespace namespace, StateTag<T> address) {
-    return workItemState.get(namespace, address);
-  }
-
-  private static class WindmillValue<T> extends SimpleWindmillState implements ValueState<T> {
-    private final StateNamespace namespace;
-    private final StateTag<ValueState<T>> address;
-    private final ByteString stateKey;
-    private final String stateFamily;
-    private final Coder<T> coder;
-
-    /** Whether we've modified the value since creation of this state. */
-    private boolean modified = false;
-    /** Whether the in memory value is the true value. */
-    private boolean valueIsKnown = false;
-    private T value;
-
-    private WindmillValue(StateNamespace namespace, StateTag<ValueState<T>> address,
-        String stateFamily, Coder<T> coder) {
-      this.namespace = namespace;
-      this.address = address;
-      this.stateKey = encodeKey(namespace, address);
-      this.stateFamily = stateFamily;
-      this.coder = coder;
-    }
-
-    @Override
-    public void clear() {
-      modified = true;
-      valueIsKnown = true;
-      value = null;
-    }
-
-    @Override
-    public StateContents<T> get() {
-      final Future<T> future = valueIsKnown ? Futures.immediateFuture(value)
-                                            : reader.valueFuture(stateKey, stateFamily, coder);
-
-      return new StateContents<T>() {
-        @Override
-        public T read() {
-          try (StateSampler.ScopedState scope = scopedReadState()) {
-            valueIsKnown = true;
-            return future.get();
-          } catch (InterruptedException | ExecutionException e) {
-            throw new RuntimeException("Unable to read value from state", e);
-          }
-        }
-      };
-    }
-
-    @Override
-    public void set(T value) {
-      modified = true;
-      valueIsKnown = true;
-      this.value = value;
-    }
-
-    @Override
-    protected WorkItemCommitRequest persistDirectly(WindmillStateCache.ForKey cache)
-        throws IOException {
-      if (!modified) {
-        // No in-memory changes.
-        return WorkItemCommitRequest.newBuilder().buildPartial();
-      }
-
-      ByteString.Output stream = ByteString.newOutput();
-      if (value != null) {
-        coder.encode(value, stream, Coder.Context.OUTER);
-      }
-      ByteString encoded = stream.toByteString();
-
-      WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
-      // Update the entry of the cache with the new value and change in encoded size.
-      cache.put(namespace, address, this, encoded.size());
-
-      modified = false;
-
-      commitBuilder
-          .addValueUpdatesBuilder()
-          .setTag(stateKey)
-          .setStateFamily(stateFamily)
-          .getValueBuilder()
-          .setData(encoded)
-          .setTimestamp(Long.MAX_VALUE);
-
-      return commitBuilder.buildPartial();
-    }
-  }
-
-  private static class WindmillBag<T> extends SimpleWindmillState implements BagState<T> {
-
-    private final StateNamespace namespace;
-    private final StateTag<BagState<T>> address;
-    private final ByteString stateKey;
-    private final String stateFamily;
-    private final Coder<T> elemCoder;
-
-    private boolean cleared;
-    // Cache of all values in this bag. Null if the persisted state is unknown.
-    private ConcatIterables<T> cachedValues = null;
-    private List<T> localAdditions = new ArrayList<>();
-    private long encodedSize = 0;
-
-    private WindmillBag(StateNamespace namespace, StateTag<BagState<T>> address, String stateFamily,
-        Coder<T> elemCoder) {
-      this.namespace = namespace;
-      this.address = address;
-      this.stateKey = encodeKey(namespace, address);
-      this.stateFamily = stateFamily;
-      this.elemCoder = elemCoder;
-    }
-
-    @Override
-    public void clear() {
-      cleared = true;
-      cachedValues = new ConcatIterables<T>();
-      localAdditions.clear();
-      encodedSize = 0;
-    }
-
-    private Iterable<T> fetchData(Future<Iterable<T>> persistedData) {
-      try (StateSampler.ScopedState scope = scopedReadState()) {
-        if (cachedValues != null) {
-          return cachedValues;
-        }
-        Iterable<T> data = persistedData.get();
-        if (data instanceof Weighted) {
-          // We have a known bounded amount of data; cache it.
-          cachedValues = new ConcatIterables<T>();
-          cachedValues.extendWith(data);
-          encodedSize = ((Weighted) data).getWeight();
-          return cachedValues;
-        } else {
-          // This is an iterable that may not fit in memory at once; don't cache it.
-          return data;
-        }
-      } catch (InterruptedException | ExecutionException e) {
-        throw new RuntimeException("Unable to read state", e);
-      }
-    }
-
-    public boolean valuesAreCached() {
-      return cachedValues != null;
-    }
-
-    @Override
-    public StateContents<Iterable<T>> get() {
-      // If we clear after calling get() but before calling read(), technically we didn't need the
-      // underlying windmill read. But, we need to register the desire now if we aren't going to
-      // clear (in order to get it added to the prefetch).
-      final Future<Iterable<T>> persistedData = (cachedValues != null)
-          ? null
-          : reader.listFuture(stateKey, stateFamily, elemCoder);
-
-      return new StateContents<Iterable<T>>() {
-        @Override
-        public Iterable<T> read() {
-          return Iterables.concat(fetchData(persistedData), localAdditions);
-        }
-      };
-    }
-
-    @Override
-    public StateContents<Boolean> isEmpty() {
-      // If we clear after calling isEmpty() but before calling read(), technically we didn't need
-      // the underlying windmill read. But, we need to register the desire now if we aren't going to
-      // clear (in order to get it added to the prefetch).
-      final Future<Iterable<T>> persistedData = (cachedValues != null)
-          ? null
-          : reader.listFuture(stateKey, stateFamily, elemCoder);
-
-      return new StateContents<Boolean>() {
-        @Override
-        public Boolean read() {
-          return Iterables.isEmpty(fetchData(persistedData)) && localAdditions.isEmpty();
-        }
-      };
-    }
-
-    @Override
-    public void add(T input) {
-      localAdditions.add(input);
-    }
-
-    @Override
-    public WorkItemCommitRequest persistDirectly(WindmillStateCache.ForKey cache)
-        throws IOException {
-      WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
-
-      if (cleared) {
-        commitBuilder.addListUpdatesBuilder()
-            .setTag(stateKey)
-            .setStateFamily(stateFamily)
-            .setEndTimestamp(Long.MAX_VALUE);
-      }
-
-      if (!localAdditions.isEmpty()) {
-        byte[] zero = {0x0};
-        Windmill.TagList.Builder listUpdatesBuilder =
-            commitBuilder.addListUpdatesBuilder().setTag(stateKey).setStateFamily(stateFamily);
-        for (T value : localAdditions) {
-          ByteString.Output stream = ByteString.newOutput();
-
-          // Windmill does not support empty data for tag list state; prepend a zero byte.
-          stream.write(zero);
-
-          // Encode the value
-          elemCoder.encode(value, stream, Coder.Context.OUTER);
-          ByteString encoded = stream.toByteString();
-          if (cachedValues != null) {
-            encodedSize += encoded.size() - 1;
-          }
-
-          listUpdatesBuilder.addValuesBuilder()
-              .setData(encoded)
-              .setTimestamp(Long.MAX_VALUE);
-        }
-      }
-
-      if (cachedValues != null) {
-        cachedValues.extendWith(localAdditions);
-        // Don't reuse the localAdditions object; we don't want future changes to it to modify the
-        // value of cachedValues.
-        localAdditions = new ArrayList<T>();
-        cache.put(namespace, address, this, encodedSize);
-      } else {
-        localAdditions.clear();
-      }
-      cleared = false;
-
-      return commitBuilder.buildPartial();
-    }
-  }
-
-  private static class ConcatIterables<T> implements Iterable<T> {
-    List<Iterable<T>> iterables;
-
-    public ConcatIterables() {
-      this.iterables = new ArrayList<>();
-    }
-
-    public void extendWith(Iterable<T> iterable) {
-      iterables.add(iterable);
-    }
-
-    @Override
-    public Iterator<T> iterator() {
-      return Iterators.concat(
-          Iterables.transform(
-                  iterables,
-                  new Function<Iterable<T>, Iterator<T>>() {
-                    @Override
-                    public Iterator<T> apply(Iterable<T> iterable) {
-                      return iterable.iterator();
-                    }
-                  })
-              .iterator());
-    }
-  }
-
-  private static class WindmillWatermarkState
-      extends WindmillState implements WatermarkStateInternal {
-    // The encoded size of an Instant.
-    private static final int ENCODED_SIZE = 8;
-
-    private final OutputTimeFn<?> outputTimeFn;
-    private final StateNamespace namespace;
-    private final StateTag<WatermarkStateInternal> address;
-    private final ByteString stateKey;
-    private final String stateFamily;
-
-    private boolean cleared = false;
-    // The hold value, Optional.absent() if no hold, or null if unknown.
-    private Optional<Instant> cachedValue = null;
-    private Instant localAdditions = null;
-
-    private WindmillWatermarkState(StateNamespace namespace,
-        StateTag<WatermarkStateInternal> address, String stateFamily,
-        OutputTimeFn<?> outputTimeFn) {
-      this.namespace = namespace;
-      this.address = address;
-      this.stateKey = encodeKey(namespace, address);
-      this.stateFamily = stateFamily;
-      this.outputTimeFn = outputTimeFn;
-    }
-
-    @Override
-    public void clear() {
-      cleared = true;
-      cachedValue = Optional.<Instant>absent();
-      localAdditions = null;
-    }
-
-    /**
-     * {@inheritDoc}
-     *
-     * <p>Does nothing. There is only one hold and it is not extraneous.
-     * See {@link MergedWatermarkStateInternal} for a nontrivial implementation.
-     */
-    @Override
-    public void releaseExtraneousHolds() { }
-
-    @Override
-    public StateContents<Instant> get() {
-      // If we clear after calling get() but before calling read(), technically we didn't need the
-      // underlying windmill read. But, we need to register the desire now if we aren't going to
-      // clear (in order to get it added to the prefetch).
-      final Future<Instant> persistedData = (cachedValue != null)
-          ? Futures.immediateFuture(cachedValue.orNull())
-          : reader.watermarkFuture(stateKey, stateFamily);
-
-      return new StateContents<Instant>() {
-        @Override
-        public Instant read() {
-          try (StateSampler.ScopedState scope = scopedReadState()) {
-            Instant persistedHold = persistedData.get();
-            if (persistedHold == null || persistedHold.equals(BoundedWindow.TIMESTAMP_MAX_VALUE)) {
-              cachedValue = Optional.absent();
-            } else {
-              cachedValue = Optional.of(persistedHold);
-            }
-          } catch (InterruptedException | ExecutionException e) {
-            throw new RuntimeException("Unable to read state", e);
-          }
-
-          if (localAdditions == null) {
-            return cachedValue.orNull();
-          } else if (!cachedValue.isPresent()) {
-            return localAdditions;
-          } else {
-            return outputTimeFn.combine(localAdditions, cachedValue.get());
-          }
-        }
-      };
-    }
-
-    @Override
-    public StateContents<Boolean> isEmpty() {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public void add(Instant outputTime) {
-      localAdditions = (localAdditions == null) ? outputTime
-          : outputTimeFn.combine(outputTime, localAdditions);
-    }
-
-    @Override
-    public Future<WorkItemCommitRequest> persist(final WindmillStateCache.ForKey cache) {
-      Future<WorkItemCommitRequest> result;
-
-      if (!cleared && localAdditions == null) {
-        // Nothing to do
-        return Futures.immediateFuture(WorkItemCommitRequest.newBuilder().buildPartial());
-      } else if (cleared && localAdditions == null) {
-        // Just clearing the persisted state; blind delete
-        WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
-        commitBuilder.addWatermarkHoldsBuilder()
-            .setTag(stateKey)
-            .setStateFamily(stateFamily)
-            .setReset(true);
-
-        result = Futures.immediateFuture(commitBuilder.buildPartial());
-      } else if (cleared && localAdditions != null) {
-        // Since we cleared before adding, we can do a blind overwrite of persisted state
-        WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
-        commitBuilder.addWatermarkHoldsBuilder()
-            .setTag(stateKey)
-            .setStateFamily(stateFamily)
-            .setReset(true)
-            .addTimestamps(WindmillTimeUtils.harnessToWindmillTimestamp(localAdditions));
-
-        cachedValue = Optional.of(localAdditions);
-
-        result = Futures.immediateFuture(commitBuilder.buildPartial());
-      } else if (!cleared && localAdditions != null) {
-        // Otherwise, we need to combine the local additions with the already persisted data
-        result = combineWithPersisted();
-      } else {
-        throw new IllegalStateException("Unreachable condition");
-      }
-
-      return Futures.lazyTransform(
-          result, new Function<WorkItemCommitRequest, WorkItemCommitRequest>() {
-            @Override
-            public WorkItemCommitRequest apply(WorkItemCommitRequest result) {
-              cleared = false;
-              localAdditions = null;
-              if (cachedValue != null) {
-                cache.put(
-                    namespace, address, WindmillWatermarkState.this, ENCODED_SIZE);
-              }
-              return result;
-            }
-          });
-    }
-
-    /**
-     * Combines local additions with persisted data and mutates the {@code commitBuilder}
-     * to write the result.
-     */
-    private Future<WorkItemCommitRequest> combineWithPersisted() {
-      boolean windmillCanCombine = false;
-
-      // If the combined output time depends only on the window, then we are just blindly adding
-      // the same value that may or may not already be present. This depends on the state only being
-      // used for one window.
-      windmillCanCombine |= outputTimeFn.dependsOnlyOnWindow();
-
-      // If the combined output time depends only on the earliest input timestamp, then because
-      // assignOutputTime is monotonic, the hold only depends on the earliest output timestamp
-      // (which is the value submitted as a watermark hold). The only way holds for later inputs
-      // can be redundant is if the are later (or equal) to the earliest. So taking the MIN
-      // implicitly, as Windmill does, has the desired behavior.
-      windmillCanCombine |= outputTimeFn.dependsOnlyOnEarliestInputTimestamp();
-
-      if (windmillCanCombine) {
-        // We do a blind write and let Windmill take the MIN
-        WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
-        commitBuilder.addWatermarkHoldsBuilder()
-            .setTag(stateKey)
-            .setStateFamily(stateFamily)
-            .addTimestamps(
-                WindmillTimeUtils.harnessToWindmillTimestamp(localAdditions));
-
-        if (cachedValue != null) {
-          cachedValue = Optional.of(cachedValue.isPresent()
-              ? outputTimeFn.combine(cachedValue.get(), localAdditions)
-              : localAdditions);
-        }
-
-         return Futures.immediateFuture(commitBuilder.buildPartial());
-      } else {
-        // The non-fast path does a read-modify-write
-        return Futures.lazyTransform((cachedValue != null)
-                ? Futures.immediateFuture(cachedValue.orNull())
-                : reader.watermarkFuture(stateKey, stateFamily),
-            new Function<Instant, WorkItemCommitRequest>() {
-              @Override
-              public WorkItemCommitRequest apply(Instant priorHold) {
-                cachedValue = Optional.of((priorHold != null)
-                        ? outputTimeFn.combine(priorHold, localAdditions)
-                        : localAdditions);
-
-                WorkItemCommitRequest.Builder commitBuilder = WorkItemCommitRequest.newBuilder();
-                commitBuilder.addWatermarkHoldsBuilder()
-                    .setTag(stateKey)
-                    .setStateFamily(stateFamily)
-                    .setReset(true)
-                    .addTimestamps(WindmillTimeUtils.harnessToWindmillTimestamp(cachedValue.get()));
-
-                return commitBuilder.buildPartial();
-              }
-            });
-      }
-    }
-  }
-
-  private static class WindmillCombiningValue<InputT, AccumT, OutputT>
-      extends WindmillState implements CombiningValueStateInternal<InputT, AccumT, OutputT> {
-    private final WindmillBag<AccumT> bag;
-    private final CombineFn<InputT, AccumT, OutputT> combineFn;
-
-    /* We use a separate, in-memory AccumT rather than relying on the WindmillWatermarkBag's
-     * localAdditions, because we want to combine multiple InputT's to a single AccumT
-     * before adding it.
-     */
-    private AccumT localAdditionsAccum;
-    private boolean hasLocalAdditions = false;
-
-    private WindmillCombiningValue(StateNamespace namespace,
-        StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address, String stateFamily,
-        Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn,
-        WindmillStateCache.ForKey cache) {
-      StateTag<BagState<AccumT>> internalBagAddress = StateTags.convertToBagTagInternal(address);
-      WindmillBag<AccumT> cachedBag =
-          (WindmillBag<AccumT>) cache.get(namespace, internalBagAddress);
-      this.bag =
-          (cachedBag != null)
-              ? cachedBag
-              : new WindmillBag<>(namespace, internalBagAddress, stateFamily, accumCoder);
-      this.combineFn = combineFn;
-      this.localAdditionsAccum = combineFn.createAccumulator();
-    }
-
-    @Override
-    void initializeForWorkItem(
-        WindmillStateReader reader, Supplier<StateSampler.ScopedState> scopedReadStateSupplier) {
-      super.initializeForWorkItem(reader, scopedReadStateSupplier);
-      this.bag.initializeForWorkItem(reader, scopedReadStateSupplier);
-    }
-
-    @Override
-    public StateContents<OutputT> get() {
-      final StateContents<AccumT> accum = getAccum();
-      return new StateContents<OutputT>() {
-        @Override
-        public OutputT read() {
-          return combineFn.extractOutput(accum.read());
-        }
-      };
-    }
-
-    @Override
-    public void add(InputT input) {
-      hasLocalAdditions = true;
-      localAdditionsAccum = combineFn.addInput(localAdditionsAccum, input);
-    }
-
-    @Override
-    public void clear() {
-      bag.clear();
-      localAdditionsAccum = combineFn.createAccumulator();
-      hasLocalAdditions = false;
-    }
-
-    @Override
-    public Future<WorkItemCommitRequest> persist(WindmillStateCache.ForKey cache)
-        throws IOException {
-      if (hasLocalAdditions) {
-        if (COMPACT_NOW.get().get() || bag.valuesAreCached()) {
-          // Implicitly clears the bag and combines local and persisted accumulators.
-          localAdditionsAccum = getAccum().read();
-        }
-        bag.add(combineFn.compact(localAdditionsAccum));
-        localAdditionsAccum = combineFn.createAccumulator();
-        hasLocalAdditions = false;
-      }
-
-      return bag.persist(cache);
-    }
-
-    @Override
-    public StateContents<AccumT> getAccum() {
-      final StateContents<Iterable<AccumT>> future = bag.get();
-
-      return new StateContents<AccumT>() {
-        @Override
-        public AccumT read() {
-          Iterable<AccumT> accums = Iterables.concat(
-              future.read(), Collections.singleton(localAdditionsAccum));
-
-          // Compact things
-          AccumT merged = combineFn.mergeAccumulators(accums);
-          bag.clear();
-          localAdditionsAccum = merged;
-          hasLocalAdditions = true;
-          return merged;
-        }
-      };
-    }
-
-    @Override
-    public StateContents<Boolean> isEmpty() {
-      final StateContents<Boolean> isEmptyFuture = bag.isEmpty();
-
-      return new StateContents<Boolean>() {
-        @Override
-        public Boolean read() {
-          return !hasLocalAdditions && isEmptyFuture.read();
-        }
-      };
-    }
-
-
-    @Override
-    public void addAccum(AccumT accum) {
-      hasLocalAdditions = true;
-      localAdditionsAccum = combineFn.mergeAccumulators(Arrays.asList(localAdditionsAccum, accum));
-    }
-  }
-
-  @VisibleForTesting
-  static final ThreadLocal<Supplier<Boolean>> COMPACT_NOW =
-      new ThreadLocal<Supplier<Boolean>>() {
-        public Supplier<Boolean> initialValue() {
-          return new Supplier<Boolean>() {
-            /* The rate at which, on average, this will return true. */
-            static final double RATE = 0.002;
-            Random random = new Random();
-            long counter = nextSample();
-
-            private long nextSample() {
-              // Use geometric distribution to find next true value.
-              // This lets us avoid invoking random.nextDouble() on every call.
-              return (long) Math.floor(Math.log(random.nextDouble()) / Math.log(1 - RATE));
-            }
-
-            public Boolean get() {
-              counter--;
-              if (counter < 0) {
-                counter = nextSample();
-                return true;
-              } else {
-                return false;
-              }
-            }
-          };
-        }
-      };
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
deleted file mode 100644
index 53e910407494c..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillStateReader.java
+++ /dev/null
@@ -1,657 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagList;
-import com.google.cloud.dataflow.sdk.runners.worker.windmill.Windmill.TagValue;
-import com.google.cloud.dataflow.sdk.util.Weighted;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Function;
-import com.google.common.base.Objects;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.AbstractIterator;
-import com.google.common.collect.ForwardingList;
-import com.google.common.util.concurrent.ForwardingFuture;
-import com.google.common.util.concurrent.Futures;
-import com.google.common.util.concurrent.SettableFuture;
-import com.google.protobuf.ByteString;
-
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.Future;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.TimeoutException;
-
-import javax.annotation.Nullable;
-
-/**
- * Reads persistent state from {@link Windmill}. Returns {@code Future}s containing the data that
- * has been read. Will not initiate a read until {@link Future#get} is called, at which point all
- * the pending futures will be read.
- */
-class WindmillStateReader {
-  /**
-   * Ideal maximum bytes in a TagList response. However, Windmill will always return
-   * at least one value if possible irrespective of this limit.
-   */
-  public static final long MAX_LIST_BYTES = 8L << 20; // 8MB
-
-  /**
-   * When combined with a key and computationId, represents the unique address for
-   * state managed by Windmill.
-   */
-  private static class StateTag {
-    private enum Kind {
-      VALUE,
-      LIST,
-      WATERMARK;
-    }
-
-    private final Kind kind;
-    private final ByteString tag;
-    private final String stateFamily;
-
-    /**
-     * For {@link Kind#LIST} kinds: A previous 'continuation_token' returned by Windmill to signal
-     * the resulting list was incomplete. Sending that token will request the next page of values.
-     * Null for first request.
-     *
-     * <p>Null for other kinds.
-     */
-    @Nullable
-    private final ByteString requestToken;
-
-    private StateTag(
-        Kind kind, ByteString tag, String stateFamily, @Nullable ByteString requestToken) {
-      this.kind = kind;
-      this.tag = tag;
-      this.stateFamily = Preconditions.checkNotNull(stateFamily);
-      this.requestToken = requestToken;
-    }
-
-    private StateTag(Kind kind, ByteString tag, String stateFamily) {
-      this(kind, tag, stateFamily, null);
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (this == obj) {
-        return true;
-      }
-
-      if (!(obj instanceof StateTag)) {
-        return false;
-      }
-
-      StateTag that = (StateTag) obj;
-      return Objects.equal(this.kind, that.kind) && Objects.equal(this.tag, that.tag)
-          && Objects.equal(this.stateFamily, that.stateFamily)
-          && Objects.equal(this.requestToken, that.requestToken);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hashCode(kind, tag, stateFamily, requestToken);
-    }
-
-    @Override
-    public String toString() {
-      return "Tag(" + kind + "," + tag.toStringUtf8() + "," + stateFamily
-          + (requestToken == null ? "" : ("," + requestToken.toStringUtf8())) + ")";
-    }
-  }
-
-  /**
-   * An in-memory collection of deserialized values and an optional continuation token to pass to
-   * Windmill when fetching the next page of values.
-   */
-  private static class ValuesAndContToken<T> {
-    private final List<T> values;
-
-    /** Token to pass to next request for next page of values. Null if done. */
-    @Nullable
-    private final ByteString continuationToken;
-
-    public ValuesAndContToken(List<T> values, @Nullable ByteString continuationToken) {
-      this.values = values;
-      this.continuationToken = continuationToken;
-    }
-  }
-
-  private final String computation;
-  private final ByteString key;
-  private final long workToken;
-
-  private final MetricTrackingWindmillServerStub metrics;
-
-  private long bytesRead = 0L;
-
-  public WindmillStateReader(MetricTrackingWindmillServerStub metrics, String computation,
-      ByteString key, long workToken) {
-    this.metrics = metrics;
-    this.computation = computation;
-    this.key = key;
-    this.workToken = workToken;
-  }
-
-  private static final class CoderAndFuture<ElemT, FutureT> {
-    private Coder<ElemT> coder;
-    private final SettableFuture<FutureT> future;
-
-    private CoderAndFuture(Coder<ElemT> coder, SettableFuture<FutureT> future) {
-      this.coder = coder;
-      this.future = future;
-    }
-
-    private SettableFuture<FutureT> getFuture() {
-      return future;
-    }
-
-    private SettableFuture<FutureT> getNonDoneFuture(StateTag stateTag) {
-      if (future.isDone()) {
-        throw new IllegalStateException("Future for " + stateTag + " is already done");
-      }
-      return future;
-    }
-
-    private Coder<ElemT> getAndClearCoder() {
-      if (coder == null) {
-        throw new IllegalStateException("Coder has already been cleared from cache");
-      }
-      Coder<ElemT> result = coder;
-      coder = null;
-      return result;
-    }
-
-    private void checkNoCoder() {
-      if (coder != null) {
-        throw new IllegalStateException("Unexpected coder");
-      }
-    }
-  }
-
-  @VisibleForTesting
-  ConcurrentLinkedQueue<StateTag> pendingLookups = new ConcurrentLinkedQueue<>();
-  private ConcurrentHashMap<StateTag, CoderAndFuture<?, ?>> waiting = new ConcurrentHashMap<>();
-
-  private <ElemT, FutureT> Future<FutureT> stateFuture(
-      StateTag stateTag, @Nullable Coder<ElemT> coder) {
-    CoderAndFuture<ElemT, FutureT> coderAndFuture =
-        new CoderAndFuture<ElemT, FutureT>(coder, SettableFuture.<FutureT>create());
-    CoderAndFuture<?, ?> existingCoderAndFutureWildcard =
-        waiting.putIfAbsent(stateTag, coderAndFuture);
-    if (existingCoderAndFutureWildcard == null) {
-      // Schedule a new request. It's response is guaranteed to find the future and coder.
-      pendingLookups.add(stateTag);
-    } else {
-      // Piggy-back on the pending or already answered request.
-      @SuppressWarnings("unchecked")
-      CoderAndFuture<ElemT, FutureT> existingCoderAndFuture =
-          (CoderAndFuture<ElemT, FutureT>) existingCoderAndFutureWildcard;
-      coderAndFuture = existingCoderAndFuture;
-    }
-
-    return wrappedFuture(coderAndFuture.getFuture());
-  }
-
-  private <ElemT, FutureT> CoderAndFuture<ElemT, FutureT> getWaiting(
-      StateTag stateTag, boolean shouldRemove) {
-    CoderAndFuture<?, ?> coderAndFutureWildcard;
-    if (shouldRemove) {
-      coderAndFutureWildcard = waiting.remove(stateTag);
-    } else {
-      coderAndFutureWildcard = waiting.get(stateTag);
-    }
-    if (coderAndFutureWildcard == null) {
-      throw new IllegalStateException("Missing future for " + stateTag);
-    }
-    @SuppressWarnings("unchecked")
-    CoderAndFuture<ElemT, FutureT> coderAndFuture =
-        (CoderAndFuture<ElemT, FutureT>) coderAndFutureWildcard;
-    return coderAndFuture;
-  }
-
-  public Future<Instant> watermarkFuture(ByteString encodedTag, String stateFamily) {
-    return stateFuture(new StateTag(StateTag.Kind.WATERMARK, encodedTag, stateFamily), null);
-  }
-
-  public <T> Future<T> valueFuture(ByteString encodedTag, String stateFamily, Coder<T> coder) {
-    return stateFuture(new StateTag(StateTag.Kind.VALUE, encodedTag, stateFamily), coder);
-  }
-
-  public <T> Future<Iterable<T>> listFuture(
-      ByteString encodedTag, String stateFamily, Coder<T> elemCoder) {
-    // First request has no continuation token.
-    StateTag stateTag = new StateTag(StateTag.Kind.LIST, encodedTag, stateFamily);
-    // Convert the ValuesAndContToken<T> to Iterable<T>.
-    return valuesToPagingIterableFuture(
-        stateTag, elemCoder, this.<T, ValuesAndContToken<T>>stateFuture(stateTag, elemCoder));
-  }
-
-  /**
-   * Internal request to fetch the next 'page' of values in a TagList. Return null if
-   * no continuation token is in {@code contTag}, which signals there are no more pages.
-   */
-  @Nullable
-  private <T> Future<ValuesAndContToken<T>> continuationListFuture(
-      StateTag contStateTag, Coder<T> elemCoder) {
-    if (contStateTag.requestToken == null) {
-      // We're done.
-      return null;
-    }
-    return stateFuture(contStateTag, elemCoder);
-  }
-
-  private <T> Future<T> wrappedFuture(final Future<T> future) {
-    // If the underlying lookup is already complete, we don't need to create the wrapper.
-    if (future.isDone()) {
-      return future;
-    }
-
-    return new ForwardingFuture<T>() {
-      @Override
-      protected Future<T> delegate() {
-        return future;
-      }
-
-      @Override
-      public T get() throws InterruptedException, ExecutionException {
-        if (!future.isDone()) {
-          startBatchAndBlock();
-        }
-        return super.get();
-      }
-
-      @Override
-      public T get(long timeout, TimeUnit unit)
-          throws InterruptedException, ExecutionException, TimeoutException {
-        if (!future.isDone()) {
-          startBatchAndBlock();
-        }
-        return super.get(timeout, unit);
-      }
-    };
-  }
-
-  /**
-   * Return future which transforms a {@code ValuesAndContToken<T>} result into the
-   * initial Iterable<T> result expected from the external caller.
-   */
-  private <T> Future<Iterable<T>> valuesToPagingIterableFuture(final StateTag stateTag,
-      final Coder<T> elemCoder, final Future<ValuesAndContToken<T>> future) {
-    return Futures.lazyTransform(future, new Function<ValuesAndContToken<T>, Iterable<T>>() {
-      @Override
-      public Iterable<T> apply(ValuesAndContToken<T> valuesAndContToken) {
-        if (valuesAndContToken.continuationToken == null) {
-          // Number of values is small enough Windmill sent us the entire list in one response.
-          return valuesAndContToken.values;
-        } else {
-          // Return an iterable which knows how to come back for more.
-          StateTag contStateTag = new StateTag(stateTag.kind, stateTag.tag, stateTag.stateFamily,
-              valuesAndContToken.continuationToken);
-          return new TagListPagingIterable<>(valuesAndContToken.values, contStateTag, elemCoder);
-        }
-      }
-    });
-  }
-
-  public void startBatchAndBlock() {
-    // First, drain work out of the pending lookups into a set. These will be the items we fetch.
-    HashSet<StateTag> toFetch = new HashSet<>();
-    while (!pendingLookups.isEmpty()) {
-      StateTag stateTag = pendingLookups.poll();
-      if (stateTag == null) {
-        break;
-      }
-
-      if (!toFetch.add(stateTag)) {
-        throw new IllegalStateException("Duplicate tags being fetched.");
-      }
-    }
-
-    // If we failed to drain anything, some other thread pulled it off the queue. We have no work
-    // to do.
-    if (toFetch.isEmpty()) {
-      return;
-    }
-
-    Windmill.GetDataRequest request = createRequest(toFetch);
-    Windmill.GetDataResponse response = metrics.getStateData(request);
-
-    if (response == null) {
-      throw new RuntimeException("Windmill unexpectedly returned null for request " + request);
-    }
-
-    consumeResponse(request, response, toFetch);
-  }
-
-  public long getBytesRead() {
-    return bytesRead;
-  }
-
-  private Windmill.GetDataRequest createRequest(Iterable<StateTag> toFetch) {
-    Windmill.GetDataRequest.Builder request = Windmill.GetDataRequest.newBuilder();
-    Windmill.KeyedGetDataRequest.Builder keyedDataBuilder =
-        request.addRequestsBuilder()
-            .setComputationId(computation)
-            .addRequestsBuilder()
-            .setKey(key)
-            .setWorkToken(workToken);
-
-    for (StateTag stateTag : toFetch) {
-      switch (stateTag.kind) {
-        case LIST:
-          TagList.Builder tagList =
-              keyedDataBuilder.addListsToFetchBuilder()
-                  .setTag(stateTag.tag)
-                  .setStateFamily(stateTag.stateFamily)
-                  .setEndTimestamp(Long.MAX_VALUE)
-                  .setFetchMaxBytes(MAX_LIST_BYTES);
-          if (stateTag.requestToken != null) {
-            // We're asking for the next page.
-            tagList.setRequestToken(stateTag.requestToken);
-          }
-          break;
-
-        case WATERMARK:
-          keyedDataBuilder.addWatermarkHoldsToFetchBuilder()
-              .setTag(stateTag.tag)
-              .setStateFamily(stateTag.stateFamily);
-          break;
-
-        case VALUE:
-          keyedDataBuilder.addValuesToFetchBuilder()
-              .setTag(stateTag.tag)
-              .setStateFamily(stateTag.stateFamily);
-          break;
-
-        default:
-          throw new RuntimeException("Unknown kind of tag requested: " + stateTag.kind);
-      }
-    }
-
-    return request.build();
-  }
-
-  private void consumeResponse(Windmill.GetDataRequest request,
-      Windmill.GetDataResponse getDataResponse, Set<StateTag> toFetch) {
-    // Validate the response is for our computation/key.
-    if (getDataResponse.getDataCount() == 0) {
-      throw new RuntimeException("No computation in response to request: " + request);
-    } else if (getDataResponse.getDataCount() > 1) {
-      throw new RuntimeException("Expected exactly one computation in response, but got: "
-          + getDataResponse.getDataList());
-    }
-
-    Windmill.ComputationGetDataResponse computationResponse = getDataResponse.getData(0);
-
-    if (!computation.equals(computationResponse.getComputationId())) {
-      throw new RuntimeException("Expected data for computation " + computation + " but was "
-          + computationResponse.getComputationId());
-    }
-
-    if (computationResponse.getDataCount() == 0) {
-      throw new RuntimeException("No key in response to request: " + request);
-    } else if (computationResponse.getDataCount() > 1) {
-      throw new RuntimeException(
-          "Expected exactly one key in response, but was: " + computationResponse.getDataList());
-    }
-
-    Windmill.KeyedGetDataResponse response = computationResponse.getData(0);
-    bytesRead += response.getSerializedSize();
-
-    if (response.getFailed()) {
-      // Set up all the futures for this key to throw an exception:
-      KeyTokenInvalidException keyTokenInvalidException =
-          new KeyTokenInvalidException(key.toStringUtf8());
-      for (StateTag stateTag : toFetch) {
-        waiting.get(stateTag).future.setException(keyTokenInvalidException);
-      }
-      return;
-    }
-
-    if (!key.equals(response.getKey())) {
-      throw new RuntimeException("Expected data for key " + key + " but was " + response.getKey());
-    }
-
-
-    for (Windmill.TagList tagList : response.getListsList()) {
-      StateTag stateTag = new StateTag(StateTag.Kind.LIST, tagList.getTag(),
-          tagList.getStateFamily(), tagList.hasRequestToken() ? tagList.getRequestToken() : null);
-      if (!toFetch.remove(stateTag)) {
-        throw new IllegalStateException(
-            "Received response for unrequested tag " + stateTag + ". Pending tags: " + toFetch);
-      }
-      consumeTagList(tagList, stateTag);
-    }
-
-    for (Windmill.WatermarkHold hold : response.getWatermarkHoldsList()) {
-      StateTag stateTag =
-          new StateTag(StateTag.Kind.WATERMARK, hold.getTag(), hold.getStateFamily());
-      if (!toFetch.remove(stateTag)) {
-        throw new IllegalStateException(
-            "Received response for unrequested tag " + stateTag + ". Pending tags: " + toFetch);
-      }
-      consumeWatermark(hold, stateTag);
-    }
-
-    for (Windmill.TagValue value : response.getValuesList()) {
-      StateTag stateTag = new StateTag(StateTag.Kind.VALUE, value.getTag(), value.getStateFamily());
-      if (!toFetch.remove(stateTag)) {
-        throw new IllegalStateException(
-            "Received response for unrequested tag " + stateTag + ". Pending tags: " + toFetch);
-      }
-      consumeTagValue(value, stateTag);
-    }
-
-    if (!toFetch.isEmpty()) {
-      throw new IllegalStateException(
-          "Didn't receive responses for all pending fetches. Missing: " + toFetch);
-    }
-  }
-
-  @VisibleForTesting
-  static class WeightedList<T> extends ForwardingList<T> implements Weighted {
-    private List<T> delegate;
-    long weight;
-
-    WeightedList(List<T> delegate) {
-      this.delegate = delegate;
-      this.weight = 0;
-    }
-
-    @Override
-    protected List<T> delegate() {
-      return delegate;
-    }
-
-    @Override
-    public boolean add(T elem) {
-      throw new UnsupportedOperationException("Must use AddWeighted()");
-    }
-
-    @Override
-    public long getWeight() {
-      return weight;
-    }
-
-    public void addWeighted(T elem, long weight) {
-      delegate.add(elem);
-      this.weight += weight;
-    }
-  }
-
-  /**
-   * The deserialized values in {@code tagList} as a read-only array list.
-   */
-  private <T> List<T> tagListPageValues(TagList tagList, Coder<T> elemCoder) {
-    if (tagList.getValuesCount() == 0) {
-      return new WeightedList<T>(Collections.<T>emptyList());
-    }
-
-    WeightedList<T> valueList = new WeightedList<>(new ArrayList<T>(tagList.getValuesCount()));
-    for (Windmill.Value value : tagList.getValuesList()) {
-      if (value.hasData() && !value.getData().isEmpty()) {
-        // Drop the first byte of the data; it's the zero byte we prepended to avoid writing
-        // empty data.
-        InputStream inputStream = value.getData().substring(1).newInput();
-        try {
-          valueList.addWeighted(
-              elemCoder.decode(inputStream, Coder.Context.OUTER),  value.getData().size() - 1);
-        } catch (IOException e) {
-          throw new IllegalStateException("Unable to decode tag list using " + elemCoder, e);
-        }
-      }
-    }
-    return valueList;
-  }
-
-  private <T> void consumeTagList(TagList tagList, StateTag stateTag) {
-    boolean shouldRemove;
-    if (stateTag.requestToken == null) {
-      // This is the response for the first page.
-      // Leave the future in the cache so subsequent requests for the first page
-      // can return immediately.
-      shouldRemove = false;
-    } else {
-      // This is a response for a subsequent page.
-      // Don't cache the future since we may need to make multiple requests with different
-      // continuation tokens.
-      shouldRemove = true;
-    }
-    CoderAndFuture<T, ValuesAndContToken<T>> coderAndFuture = getWaiting(stateTag, shouldRemove);
-    SettableFuture<ValuesAndContToken<T>> future = coderAndFuture.getNonDoneFuture(stateTag);
-    Coder<T> coder = coderAndFuture.getAndClearCoder();
-    List<T> values = this.<T>tagListPageValues(tagList, coder);
-    future.set(new ValuesAndContToken<T>(
-        values, tagList.hasContinuationToken() ? tagList.getContinuationToken() : null));
-  }
-
-  private void consumeWatermark(Windmill.WatermarkHold watermarkHold, StateTag stateTag) {
-    CoderAndFuture<Void, Instant> coderAndFuture = getWaiting(stateTag, false);
-    SettableFuture<Instant> future = coderAndFuture.getNonDoneFuture(stateTag);
-    // No coders for watermarks
-    coderAndFuture.checkNoCoder();
-
-    Instant hold = null;
-    for (long timestamp : watermarkHold.getTimestampsList()) {
-      Instant instant = new Instant(TimeUnit.MICROSECONDS.toMillis(timestamp));
-      if (hold == null || instant.isBefore(hold)) {
-        hold = instant;
-      }
-    }
-
-    future.set(hold);
-  }
-
-  private <T> void consumeTagValue(TagValue tagValue, StateTag stateTag) {
-    CoderAndFuture<T, T> coderAndFuture = getWaiting(stateTag, false);
-    SettableFuture<T> future = coderAndFuture.getNonDoneFuture(stateTag);
-    Coder<T> coder = coderAndFuture.getAndClearCoder();
-
-    if (tagValue.hasValue() && tagValue.getValue().hasData()
-        && !tagValue.getValue().getData().isEmpty()) {
-      InputStream inputStream = tagValue.getValue().getData().newInput();
-      try {
-        T value = coder.decode(inputStream, Coder.Context.OUTER);
-        future.set(value);
-      } catch (IOException e) {
-        throw new IllegalStateException("Unable to decode value using " + coder, e);
-      }
-    } else {
-      future.set(null);
-    }
-  }
-
-  /**
-   * An iterable over elements backed by paginated GetData requests to Windmill. The
-   * iterable may be iterated over an arbitrary number of times and multiple iterators
-   * may be active simultaneously.
-   *
-   * <p>There are two pattern we wish to support with low -memory and -latency:
-   * <ol>
-   * <li>Re-iterate over the initial elements multiple times (eg Iterables.first). We'll cache
-   * the initial 'page' of values returned by Windmill from our first request for the lifetime
-   * of the iterable.
-   * <li>Iterate through all elements of a very large collection. We'll send the GetData request
-   * for the next page when the current page is begun. We'll discard intermediate pages and only
-   * retain the first. Thus the maximum memory pressure is one page plus one page per call to
-   * iterator.
-   * </ol>
-   */
-  private class TagListPagingIterable<T> implements Iterable<T> {
-    /** Initial values returned for the first page. Never reclaimed. */
-    private final List<T> firstPage;
-
-    /** State tag with continuation token set for second page. */
-    private final StateTag secondPageCont;
-
-    /** Coder for elements. */
-    private final Coder<T> elemCoder;
-
-    private TagListPagingIterable(List<T> firstPage, StateTag secondPageCont, Coder<T> elemCoder) {
-      this.firstPage = firstPage;
-      this.secondPageCont = secondPageCont;
-      this.elemCoder = elemCoder;
-    }
-
-    @Override
-    public Iterator<T> iterator() {
-      return new AbstractIterator<T>() {
-        private Iterator<T> currentPage = firstPage.iterator();
-        private StateTag nextPageCont = secondPageCont;
-        private Future<ValuesAndContToken<T>> pendingNextPage =
-            continuationListFuture(nextPageCont, elemCoder);
-
-        @Override
-        protected T computeNext() {
-          while (true) {
-            if (currentPage.hasNext()) {
-              return currentPage.next();
-            }
-            if (pendingNextPage == null) {
-              return endOfData();
-            }
-
-            ValuesAndContToken<T> valuesAndContToken;
-            try {
-              valuesAndContToken = pendingNextPage.get();
-            } catch (InterruptedException | ExecutionException e) {
-              throw new RuntimeException("Unable to read value from state", e);
-            }
-            currentPage = valuesAndContToken.values.iterator();
-            nextPageCont = new StateTag(nextPageCont.kind, nextPageCont.tag,
-                nextPageCont.stateFamily, valuesAndContToken.continuationToken);
-            pendingNextPage = continuationListFuture(nextPageCont, elemCoder);
-          }
-        }
-      };
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillTimeUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillTimeUtils.java
deleted file mode 100644
index e107aea3806cb..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/WindmillTimeUtils.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.common.base.Preconditions;
-
-import org.joda.time.Instant;
-
-import javax.annotation.Nullable;
-
-/**
- * Some timestamp conversion helpers for working with Windmill.
- */
-class WindmillTimeUtils {
-  /**
-   * Convert a Windmill output watermark to a harness watermark.
-   *
-   * <p>Windmill tracks time in microseconds while the harness uses milliseconds.
-   * Windmill will 'speculatively' hold the output watermark for a computation to the
-   * earliest input message timestamp, provided that message timestamp is at or after
-   * the current output watermark. Thus for soundness we must ensure
-   * 'Windmill considers message late' implies 'harness considers message late'. Thus we
-   * round up when converting from microseconds to milliseconds.
-   *
-   * <p>In other words, harness output watermark >= windmill output watermark.
-   */
-  @Nullable
-  static Instant windmillToHarnessOutputWatermark(long watermarkUs) {
-    if (watermarkUs == Long.MIN_VALUE) {
-      // Unknown.
-      return null;
-    } else if (watermarkUs == Long.MAX_VALUE) {
-      // End of time.
-      return BoundedWindow.TIMESTAMP_MAX_VALUE;
-    } else {
-      // Round up to nearest millisecond.
-      return new Instant((watermarkUs + 999) / 1000);
-    }
-  }
-
-  /**
-   * Convert a Windmill input watermark to a harness input watermark.
-   *
-   * <p>We round down, thus harness input watermark <= windmill output watermark.
-   */
-  @Nullable
-  static Instant windmillToHarnessInputWatermark(long watermarkUs) {
-    if (watermarkUs == Long.MIN_VALUE) {
-      // Unknown.
-      return null;
-    } else if (watermarkUs == Long.MAX_VALUE) {
-      // End of time.
-      return BoundedWindow.TIMESTAMP_MAX_VALUE;
-    } else {
-      // Round down to nearest millisecond.
-      return new Instant(watermarkUs / 1000);
-    }
-  }
-
-  /**
-   * Convert a Windmill message timestamp to a harness timestamp.
-   *
-   * <p>For soundness we require the test
-   * {@code harness message timestamp >= harness output watermark} to imply
-   * {@code windmill message timestamp >= windmill output watermark}. Thus
-   * we round timestamps down and output watermarks up.
-   */
-  static Instant windmillToHarnessTimestamp(long timestampUs) {
-    // Windmill should never send us an unknown timestamp.
-    Preconditions.checkArgument(timestampUs != Long.MIN_VALUE);
-    if (timestampUs == Long.MAX_VALUE) {
-      // End of time.
-      return BoundedWindow.TIMESTAMP_MAX_VALUE;
-    } else {
-      // Round down to nearest millisecond.
-      return new Instant(timestampUs / 1000);
-    }
-  }
-
-  /**
-   * Convert a harness timestamp to a Windmill timestamp.
-   */
-  static long harnessToWindmillTimestamp(Instant timestamp) {
-    if (timestamp.equals(BoundedWindow.TIMESTAMP_MAX_VALUE)) {
-      // End of time.
-      return Long.MAX_VALUE;
-    } else {
-      return timestamp.getMillis() * 1000;
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/windmill/WindmillServerStub.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/windmill/WindmillServerStub.java
deleted file mode 100644
index 8b55824de367d..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/windmill/WindmillServerStub.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker.windmill;
-
-/**
- * Stub for communicating with a Windmill server.
- */
-public abstract class WindmillServerStub {
-  /**
-   * Get a batch of work to process.
-   */
-  public abstract Windmill.GetWorkResponse getWork(Windmill.GetWorkRequest request);
-  /**
-   * Get addition data such as state needed to process work.
-   */
-  public abstract Windmill.GetDataResponse getData(Windmill.GetDataRequest request);
-  /**
-   * Commit the work, issuing any output productions, state modifications etc.
-   */
-  public abstract Windmill.CommitWorkResponse commitWork(
-      Windmill.CommitWorkRequest request);
-  /**
-   * Get configuration data from the server.
-   */
-  public abstract Windmill.GetConfigResponse getConfig(Windmill.GetConfigRequest request);
-
-  /**
-   * Report execution information to the server.
-   */
-  public abstract Windmill.ReportStatsResponse reportStats(Windmill.ReportStatsRequest request);
-}

From fba2172c420db68a46436690d50dbf7615c4fda2 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Sat, 6 Feb 2016 18:31:35 -0800
Subject: [PATCH 1404/1541] Move Reader classes and dependents to worker module

This excludes AvroReader and TextReader which are shared
with the DirectPipelineRunner evaluators. That issue
will be dealt with outside of this change.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114039786
---
 .../worker/ApplianceShuffleReader.java        |  77 --
 .../sdk/runners/worker/AvroByteReader.java    | 117 ---
 .../sdk/runners/worker/AvroReaderFactory.java |  69 --
 .../runners/worker/BigQueryAvroReader.java    | 160 ---
 .../worker/BigQueryAvroReaderFactory.java     |  68 --
 .../sdk/runners/worker/BigQueryReader.java    | 161 ---
 .../runners/worker/BigQueryReaderFactory.java |  73 --
 .../worker/ChunkingShuffleBatchReader.java    |  96 --
 .../sdk/runners/worker/ConcatReader.java      | 275 -----
 .../worker/GroupingShuffleRangeTracker.java   | 168 ---
 .../sdk/runners/worker/InMemoryReader.java    | 183 ----
 .../runners/worker/InMemoryReaderFactory.java |  58 --
 .../sdk/runners/worker/IsmReader.java         | 962 ------------------
 .../worker/PartitioningShuffleReader.java     | 125 ---
 .../PartitioningShuffleReaderFactory.java     |  70 --
 .../sdk/runners/worker/ReaderCacheEntry.java  |  32 -
 .../sdk/runners/worker/ShuffleReader.java     |  48 -
 .../sdk/runners/worker/TextReaderFactory.java |  71 --
 .../worker/UngroupedShuffleReader.java        |  99 --
 .../worker/UngroupedShuffleReaderFactory.java |  69 --
 20 files changed, 2981 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryAvroReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryAvroReaderFactory.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTracker.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderCacheEntry.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java
deleted file mode 100644
index 8ebcd52e0baba..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleReader.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-
-import java.io.IOException;
-
-import javax.annotation.concurrent.ThreadSafe;
-
-/**
- * ApplianceShuffleReader reads chunks of data from a shuffle dataset
- * for a position range.
- *
- * <p>It is a JNI wrapper of an equivalent C++ class.
- */
-@ThreadSafe
-public final class ApplianceShuffleReader implements ShuffleReader {
-  static {
-    ShuffleLibrary.load();
-  }
-
-  /**
-   * Pointer to the underlying C++ ShuffleReader object.
-   */
-  private long nativePointer;
-
-  /**
-   * Mutator that can be used to update counters.
-   */
-  private CounterSet.AddCounterMutator addCounterMutator;
-
-  /**
-   * @param shuffleReaderConfig opaque configuration for creating a
-   * shuffle reader
-   * @param addCounterMutator mutator that can be used to update counters
-   */
-  public ApplianceShuffleReader(
-      byte[] shuffleReaderConfig,
-      CounterSet.AddCounterMutator addCounterMutator) {
-    this.nativePointer = createFromConfig(shuffleReaderConfig);
-    this.addCounterMutator = addCounterMutator;
-  }
-
-  @Override
-  public void finalize() {
-    destroy();
-  }
-
-  /**
-   * Native methods for interacting with the underlying native shuffle client
-   * code.  {@code createFromConfig()} returns a pointer to a newly created
-   * C++ ShuffleReader object.
-   */
-  private native long createFromConfig(byte[] shuffleReaderConfig);
-  private native void destroy();
-
-  public native String getDatasetId();
-
-  @Override
-  public native ReadChunkResult readIncludingPosition(
-      byte[] startPosition, byte[] endPosition) throws IOException;
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
deleted file mode 100644
index 418dd05d9c888..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteReader.java
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import org.apache.avro.Schema;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.NoSuchElementException;
-
-import javax.annotation.Nullable;
-
-/**
- * A source that reads Avro files. Records are read from the Avro file as a
- * series of byte arrays. The coder provided is used to deserialize each record
- * from a byte array.
- *
- * @param <T> the type of the elements read from the source
- */
-public class AvroByteReader<T> extends NativeReader<T> {
-  final AvroReader<ByteBuffer> avroReader;
-  final Coder<T> coder;
-  private final Schema schema = Schema.create(Schema.Type.BYTES);
-
-  public AvroByteReader(String filename, @Nullable Long startPosition, @Nullable Long endPosition,
-      Coder<T> coder, @Nullable PipelineOptions options) {
-    this.coder = coder;
-    avroReader = new AvroReader<>(
-        filename, startPosition, endPosition, AvroCoder.of(ByteBuffer.class, schema), options);
-  }
-
-  @Override
-  public AvroByteFileIterator iterator() throws IOException {
-    return new AvroByteFileIterator();
-  }
-
-  class AvroByteFileIterator extends NativeReaderIterator<T> {
-    private final NativeReaderIterator<WindowedValue<ByteBuffer>> avroFileIterator;
-    private T current;
-
-    public AvroByteFileIterator() throws IOException {
-      avroFileIterator = avroReader.iterator();
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      if (!avroFileIterator.start()) {
-        return false;
-      }
-      updateCurrent();
-      return true;
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      if (!avroFileIterator.advance()) {
-        return false;
-      }
-      updateCurrent();
-      return true;
-    }
-
-    @Override
-    public T getCurrent() throws NoSuchElementException {
-      if (current == null) {
-        throw new NoSuchElementException();
-      }
-      return current;
-    }
-
-    private void updateCurrent() throws IOException {
-      ByteBuffer inBuffer = avroFileIterator.getCurrent().getValue();
-      byte[] encodedElem = new byte[inBuffer.remaining()];
-      inBuffer.get(encodedElem);
-      assert inBuffer.remaining() == 0;
-      inBuffer.clear();
-      notifyElementRead(encodedElem.length);
-      current = CoderUtils.decodeFromByteArray(coder, encodedElem);
-    }
-
-    @Override
-    public void close() throws IOException {
-      avroFileIterator.close();
-    }
-
-    @Override
-    public Progress getProgress() {
-      return avroFileIterator.getProgress();
-    }
-
-    @Override
-    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
-      return avroFileIterator.requestDynamicSplit(splitRequest);
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
deleted file mode 100644
index 75242abd411f7..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReaderFactory.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.getLong;
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.ValueOnlyWindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import javax.annotation.Nullable;
-
-/**
- * Creates an AvroReader from a CloudObject spec.
- */
-public class AvroReaderFactory implements ReaderFactory {
-
-  public AvroReaderFactory() {}
-
-  @Override
-  public NativeReader<?> create(
-      CloudObject spec,
-      @Nullable Coder<?> coder,
-      @Nullable PipelineOptions options,
-      @Nullable ExecutionContext executionContext,
-      @Nullable CounterSet.AddCounterMutator addCounterMutator,
-      @Nullable String operationName)
-          throws Exception {
-    return create(spec, coder, options);
-  }
-
-  NativeReader<?> create(CloudObject spec, Coder<?> coder, PipelineOptions options)
-      throws Exception {
-    String filename = getString(spec, PropertyNames.FILENAME);
-    Long startOffset = getLong(spec, PropertyNames.START_OFFSET, null);
-    Long endOffset = getLong(spec, PropertyNames.END_OFFSET, null);
-
-    // See AvroSinkFactory#create for an explanation of this logic.
-    if (coder instanceof ValueOnlyWindowedValueCoder
-        && ((ValueOnlyWindowedValueCoder) coder).getValueCoder() instanceof AvroCoder) {
-      ValueOnlyWindowedValueCoder<?> valueCoder = (ValueOnlyWindowedValueCoder<?>) coder;
-      return new AvroReader<>(
-          filename, startOffset, endOffset, (AvroCoder<?>) valueCoder.getValueCoder(), options);
-    } else {
-      return new AvroByteReader<>(filename, startOffset, endOffset, coder, options);
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryAvroReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryAvroReader.java
deleted file mode 100644
index a7ad24b3da4f0..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryAvroReader.java
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.common.base.MoreObjects.firstNonNull;
-
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.api.services.dataflow.model.ApproximateReportedProgress;
-import com.google.api.services.dataflow.model.ApproximateSplitRequest;
-import com.google.cloud.dataflow.sdk.io.AvroSource;
-import com.google.cloud.dataflow.sdk.io.BoundedSource;
-import com.google.cloud.dataflow.sdk.io.OffsetBasedSource;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.AvroUtils;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import org.apache.avro.Schema;
-import org.apache.avro.Schema.Parser;
-import org.apache.avro.generic.GenericRecord;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-
-import javax.annotation.Nullable;
-
-/**
- * A reader for Avro files exported by BigQuery, which converts them to {@link TableRow} records.
- */
-public class BigQueryAvroReader extends NativeReader<WindowedValue<TableRow>> {
-  private static final Logger LOG = LoggerFactory.getLogger(BigQueryAvroReader.class);
-
-  @Nullable
-  final Long startPosition;
-  @Nullable
-  final Long endPosition;
-  final String filename;
-  final AvroSource<GenericRecord> avroSource;
-  final TableSchema tableSchema;
-  final PipelineOptions options;
-
-  public BigQueryAvroReader(
-      String filename,
-      @Nullable Long startPosition,
-      @Nullable Long endPosition,
-      TableSchema schema,
-      @Nullable PipelineOptions options)
-      throws IOException {
-    this.filename = filename;
-    this.startPosition = startPosition;
-    this.endPosition = endPosition;
-    Schema recordSchema =
-        new Parser().parse(AvroUtils.readMetadataFromFile(filename).getSchemaString());
-    this.tableSchema = schema;
-    this.options = options;
-    this.avroSource = AvroSource.from(filename).withSchema(recordSchema);
-  }
-
-  @Override
-  public BigQueryAvroFileIterator iterator() throws IOException {
-    Long endPosition = firstNonNull(this.endPosition, Long.MAX_VALUE);
-    Long startPosition = firstNonNull(this.startPosition, 0L);
-    BoundedSource.BoundedReader<GenericRecord> reader;
-    if (startPosition == 0 && endPosition == Long.MAX_VALUE) {
-      // Read entire file (or collection of files).
-      reader = avroSource.createReader(options);
-    } else {
-      // Read a subrange of file.
-      reader = avroSource.createForSubrangeOfFile(filename, startPosition, endPosition)
-          .createReader(options);
-    }
-    return new BigQueryAvroFileIterator((AvroSource.AvroReader<GenericRecord>) reader);
-  }
-
-  class BigQueryAvroFileIterator extends LegacyReaderIterator<WindowedValue<TableRow>> {
-    final AvroSource.AvroReader<GenericRecord> reader;
-    boolean hasStarted = false;
-    long blockOffset = -1;
-
-    public BigQueryAvroFileIterator(AvroSource.AvroReader<GenericRecord> reader) {
-      this.reader = reader;
-    }
-
-    @Override
-    protected WindowedValue<TableRow> nextImpl() throws IOException {
-      GenericRecord next = reader.getCurrent();
-      // Coarse-grained reporting of input bytes consumed.
-      // After completing reading a block, the block offset changes.
-      long currentOffset = reader.getCurrentBlockOffset();
-      if (currentOffset != blockOffset) {
-        notifyElementRead(reader.getCurrentBlockSize());
-        blockOffset = currentOffset;
-      }
-      return WindowedValue.valueInGlobalWindow(
-          AvroUtils.convertGenericRecordToTableRow(next, tableSchema));
-    }
-
-    @Override
-    protected boolean hasNextImpl() throws IOException {
-      if (!hasStarted) {
-        hasStarted = true;
-        return reader.start();
-      }
-      return reader.advance();
-    }
-
-    @Override
-    public Progress getProgress() {
-      Double readerProgress = reader.getFractionConsumed();
-      if (readerProgress == null) {
-        return null;
-      }
-      ApproximateReportedProgress progress = new ApproximateReportedProgress();
-      progress.setFractionConsumed(readerProgress);
-      return cloudProgressToReaderProgress(progress);
-    }
-
-    @Override
-    public void close() throws IOException {
-      reader.close();
-    }
-
-    @Override
-    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
-      ApproximateSplitRequest splitProgress =
-          SourceTranslationUtils.splitRequestToApproximateSplitRequest(splitRequest);
-      double splitAtFraction = splitProgress.getFractionConsumed();
-      LOG.info("Received request for dynamic split at {}", splitAtFraction);
-      OffsetBasedSource<GenericRecord> residual = reader.splitAtFraction(splitAtFraction);
-      if (residual == null) {
-        LOG.info("Rejected split request for split at {}", splitAtFraction);
-        return null;
-      }
-      com.google.api.services.dataflow.model.Position acceptedPosition =
-          new com.google.api.services.dataflow.model.Position();
-      acceptedPosition.setByteOffset(residual.getStartOffset());
-      LOG.info("Accepted split for position {} which resulted in a new source with byte offset {}",
-          splitAtFraction, residual.getStartOffset());
-      return new DynamicSplitResultWithPosition(
-          SourceTranslationUtils.cloudPositionToReaderPosition(acceptedPosition));
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryAvroReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryAvroReaderFactory.java
deleted file mode 100644
index 9ab6534049011..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryAvroReaderFactory.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.getLong;
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.api.client.json.jackson2.JacksonFactory;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import java.io.IOException;
-
-import javax.annotation.Nullable;
-
-/**
- * Creates an AvroReader specifically for the output of a BigQuery export job.
- */
-public class BigQueryAvroReaderFactory implements ReaderFactory {
-
-  public BigQueryAvroReaderFactory() {}
-
-  @Override
-  public NativeReader<?> create(
-      CloudObject spec,
-      @Nullable Coder<?> coder,
-      @Nullable PipelineOptions options,
-      @Nullable ExecutionContext executionContext,
-      @Nullable CounterSet.AddCounterMutator addCounterMutator,
-      @Nullable String operationName)
-      throws Exception {
-    return createTyped(spec, options);
-  }
-
-  private static TableSchema parseTableSchema(String schemaStr) throws IOException {
-    checkArgument(!schemaStr.isEmpty(), "Provided BigQuery schema is empty");
-    return JacksonFactory.getDefaultInstance().fromString(schemaStr, TableSchema.class);
-  }
-
-  NativeReader<?> createTyped(CloudObject spec, PipelineOptions options) throws Exception {
-    String filename = getString(spec, PropertyNames.FILENAME);
-    Long startOffset = getLong(spec, PropertyNames.START_OFFSET, null);
-    Long endOffset = getLong(spec, PropertyNames.END_OFFSET, null);
-    TableSchema bqSchema = parseTableSchema(getString(spec, PropertyNames.BIGQUERY_EXPORT_SCHEMA));
-    return new BigQueryAvroReader(filename, startOffset, endOffset, bqSchema, options);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
deleted file mode 100644
index 975bf382187cb..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReader.java
+++ /dev/null
@@ -1,161 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.api.services.bigquery.Bigquery;
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.cloud.dataflow.sdk.util.BigQueryTableRowIterator;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.common.annotations.VisibleForTesting;
-
-import java.io.IOException;
-import java.util.NoSuchElementException;
-
-import javax.annotation.Nullable;
-
-/**
- * A source that reads a BigQuery table and yields TableRow objects.
- *
- * <p>The source is a wrapper over the {@code BigQueryTableRowIterator} class, which issues a
- * query for all rows of a table and then iterates over the result. There is no support for
- * progress reporting because the source is used only in situations where the entire table must be
- * read by each worker (i.e. the source is used as a side input).
- */
-public class BigQueryReader extends NativeReader<WindowedValue<TableRow>> {
-  @Nullable private final TableReference tableRef;
-  @Nullable private final String query;
-  @Nullable private final String projectId;
-  @Nullable private final Boolean flattenResults;
-  private final Bigquery bigQueryClient;
-
-  private BigQueryReader(TableReference tableRef, String query,  String projectId,
-      Bigquery bigQueryClient, Boolean flattenResults) {
-    this.tableRef = tableRef;
-    this.query = query;
-    this.projectId = projectId;
-    this.flattenResults = flattenResults;
-    this.bigQueryClient = checkNotNull(bigQueryClient, "bigQueryClient");
-  }
-
-  /**
-   * Returns a {@code BigQueryReader} that uses the specified client to read from the specified
-   * table.
-   */
-  static BigQueryReader fromTable(TableReference tableRef, Bigquery bigQueryClient) {
-    return new BigQueryReader(tableRef, null, null, bigQueryClient, null);
-  }
-
-  /**
-   * Returns a {@code BigQueryReader} that uses the specified client to read the results from
-   * executing the specified query in the specified project.
-   */
-  static BigQueryReader fromQuery(
-      String query, String projectId, Bigquery bigQueryClient, boolean flatten) {
-    return new BigQueryReader(null, query, projectId, bigQueryClient, flatten);
-  }
-
-  public TableReference getTableRef() {
-    return tableRef;
-  }
-
-  public String getQuery() {
-    return query;
-  }
-
-  @Override
-  public BigQueryReaderIterator iterator() throws IOException {
-    if (tableRef != null) {
-      return new BigQueryReaderIterator(tableRef, bigQueryClient);
-    } else {
-      return new BigQueryReaderIterator(query, projectId, bigQueryClient, flattenResults);
-    }
-  }
-
-  /**
-   * A ReaderIterator that yields TableRow objects for each row of a BigQuery table.
-   */
-  @VisibleForTesting
-  static class BigQueryReaderIterator extends NativeReaderIterator<WindowedValue<TableRow>> {
-    private BigQueryTableRowIterator rowIterator;
-    private WindowedValue<TableRow> current;
-
-    public BigQueryReaderIterator(TableReference tableRef, Bigquery bigQueryClient) {
-      rowIterator = BigQueryTableRowIterator.fromTable(tableRef, bigQueryClient);
-    }
-
-    public BigQueryReaderIterator(String query, String projectId, Bigquery bigQueryClient,
-        @Nullable Boolean flattenResults) {
-      rowIterator = BigQueryTableRowIterator.fromQuery(query, projectId, bigQueryClient,
-          flattenResults);
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      try {
-        rowIterator.open();
-      } catch (InterruptedException e) {
-        throw new IOException(e);
-      }
-      return advance();
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      try {
-        if (!rowIterator.advance()) {
-          return false;
-        }
-      } catch (InterruptedException e) {
-        throw new IOException(e);
-      }
-      current = WindowedValue.valueInGlobalWindow(rowIterator.getCurrent());
-      return true;
-    }
-
-    @Override
-    public WindowedValue<TableRow> getCurrent() throws NoSuchElementException {
-      if (current == null) {
-        throw new NoSuchElementException();
-      }
-      return current;
-    }
-
-    @Override
-    public Progress getProgress() {
-      // For now reporting progress is not supported because this source is used only when
-      // an entire table needs to be read by each worker (used as a side input for instance).
-      return null;
-    }
-
-    @Override
-    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
-      // For now dynamic splitting is not supported because this source
-      // is used only when an entire table needs to be read by each worker (used
-      // as a side input for instance).
-      return null;
-    }
-
-    @Override
-    public void close() throws IOException {
-      rowIterator.close();
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
deleted file mode 100644
index 06486e9a1765b..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/BigQueryReaderFactory.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.getBoolean;
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.api.services.bigquery.Bigquery;
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
-import com.google.cloud.dataflow.sdk.options.GcpOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import javax.annotation.Nullable;
-
-/**
- * Creates a BigQueryReader from a {@link CloudObject} spec.
- */
-public class BigQueryReaderFactory implements ReaderFactory {
-
-  @Override
-  public NativeReader<?> create(
-      CloudObject spec,
-      @Nullable Coder<?> coder,
-      @Nullable PipelineOptions options,
-      @Nullable ExecutionContext executionContext,
-      @Nullable CounterSet.AddCounterMutator addCounterMutator,
-      @Nullable String operationName)
-          throws Exception {
-    return createTyped(spec, options);
-  }
-
-  private BigQueryReader createTyped(CloudObject spec, PipelineOptions options) throws Exception {
-    Bigquery client = Transport.newBigQueryClient(options.as(BigQueryOptions.class)).build();
-
-    String query = getString(spec, PropertyNames.BIGQUERY_QUERY, null);
-    if (query != null) {
-      String project = options.as(GcpOptions.class).getProject();
-      Boolean flatten = getBoolean(spec, PropertyNames.BIGQUERY_FLATTEN_RESULTS, true);
-      return BigQueryReader.fromQuery(query, project, client, flatten);
-    } else {
-      String tableId = getString(spec, PropertyNames.BIGQUERY_TABLE, null);
-      checkArgument(tableId != null, "Either a table or a query has to be specified");
-      String project = getString(spec, PropertyNames.BIGQUERY_PROJECT);
-      String dataset = getString(spec, PropertyNames.BIGQUERY_DATASET);
-      return BigQueryReader.fromTable(
-          new TableReference().setProjectId(project).setDatasetId(dataset).setTableId(tableId),
-          client);
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java
deleted file mode 100644
index 5ee398d1b12e4..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleBatchReader.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleBatchReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
-import com.google.cloud.dataflow.sdk.util.common.worker.ShufflePosition;
-import com.google.common.io.ByteStreams;
-
-import java.io.ByteArrayInputStream;
-import java.io.DataInputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-
-import javax.annotation.Nullable;
-
-/**
- * ChunkingShuffleBatchReader reads data from a shuffle dataset using a
- * ShuffleReader.
- */
-final class ChunkingShuffleBatchReader implements ShuffleBatchReader {
-  private ShuffleReader reader;
-
-  /**
-   * @param reader used to read from a shuffle dataset
-   */
-  public ChunkingShuffleBatchReader(ShuffleReader reader) {
-    this.reader = reader;
-  }
-
-  @Override
-  public ShuffleBatchReader.Batch read(
-      @Nullable ShufflePosition startShufflePosition,
-      @Nullable ShufflePosition endShufflePosition) throws IOException {
-    @Nullable byte[] startPosition =
-        ByteArrayShufflePosition.getPosition(startShufflePosition);
-    @Nullable byte[] endPosition =
-        ByteArrayShufflePosition.getPosition(endShufflePosition);
-
-    ShuffleReader.ReadChunkResult result =
-        reader.readIncludingPosition(startPosition, endPosition);
-    DataInputStream input =
-        new DataInputStream(new ByteArrayInputStream(result.chunk));
-    ArrayList<ShuffleEntry> entries = new ArrayList<>();
-    while (input.available() > 0) {
-      entries.add(getShuffleEntry(input));
-    }
-    return new Batch(entries, result.nextStartPosition == null ? null
-        : ByteArrayShufflePosition.of(result.nextStartPosition));
-  }
-
-  /**
-   * Extracts a ShuffleEntry by parsing bytes from a given InputStream.
-   *
-   * @param input stream to read from
-   * @return parsed ShuffleEntry
-   */
-  static ShuffleEntry getShuffleEntry(DataInputStream input) throws IOException {
-    byte[] position = getFixedLengthPrefixedByteArray(input);
-    byte[] key = getFixedLengthPrefixedByteArray(input);
-    byte[] skey = getFixedLengthPrefixedByteArray(input);
-    byte[] value = getFixedLengthPrefixedByteArray(input);
-    return new ShuffleEntry(position, key, skey, value);
-  }
-
-  /**
-   * Extracts a length-prefix-encoded byte array from a given InputStream.
-   *
-   * @param dataInputStream stream to read from
-   * @return parsed byte array
-   */
-  static byte[] getFixedLengthPrefixedByteArray(DataInputStream dataInputStream)
-      throws IOException {
-    int length = dataInputStream.readInt();
-    if (length < 0) {
-      throw new IOException("invalid length: " + length);
-    }
-    byte[] data = new byte[length];
-    ByteStreams.readFully(dataInputStream, data);
-    return data;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
deleted file mode 100644
index d7c0429d42d65..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ConcatReader.java
+++ /dev/null
@@ -1,275 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateSplitRequest;
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.api.services.dataflow.model.ApproximateReportedProgress;
-import com.google.api.services.dataflow.model.ApproximateSplitRequest;
-import com.google.api.services.dataflow.model.ConcatPosition;
-import com.google.api.services.dataflow.model.Source;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.io.range.OffsetRangeTracker;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.DataflowReaderProgress;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.Serializer;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-import javax.annotation.Nullable;
-
-/**
- * A {@link NativeReader} that reads elements from a given set of encoded {@link Source}s. Creates
- * {@link NativeReader}s for sources lazily, i.e. only when elements from the particular
- * {@code NativeReader} are about to be read.
- *
- * <p>This class does does not cache {@link NativeReader}s and instead creates new set of
- * {@link NativeReader}s for every new {@link ConcatIterator}. Because of this, multiple
- * {@link ConcatIterator}s created using the same {@link ConcatReader} will not be able to share
- * any state between each other. This design was chosen since keeping a large number of
- * {@link NativeReader} objects alive within a single {@link ConcatReader} could be highly
- * memory consuming.
- *
- * <p> For progress reporting and dynamic work rebalancing purposes, {@link ConcatIterator} uses
- * a position of type {@link ConcatPosition}. Progress reporting and dynamic work rebalancing
- * currently work only at the granularity of full sources being concatenated.
- *
- * @param <T> Type of the elements read by the {@link NativeReader}s.
- */
-public class ConcatReader<T> extends NativeReader<T> {
-  private static final Logger LOG = LoggerFactory.getLogger(ConcatReader.class);
-
-  public static final String SOURCE_NAME = "ConcatSource";
-
-  private final List<Source> sources;
-  private final PipelineOptions options;
-  private final ExecutionContext executionContext;
-  private final CounterSet.AddCounterMutator addCounterMutator;
-  private final String operationName;
-  private final ReaderFactory readerFactory;
-
-  /**
-   * Create a {@link ConcatReader} using a given list of encoded {@link Source}s.
-   */
-  public ConcatReader(
-      ReaderFactory readerFactory,
-      PipelineOptions options,
-      ExecutionContext executionContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      String operationName,
-      List<Source> sources) {
-    Preconditions.checkNotNull(sources);
-    this.readerFactory = readerFactory;
-    this.sources = sources;
-    this.options = options;
-    this.executionContext = executionContext;
-    this.addCounterMutator = addCounterMutator;
-    this.operationName = operationName;
-  }
-
-  public Iterator<Source> getSources() {
-    return sources.iterator();
-  }
-
-  @Override
-  public ConcatIterator<T> iterator() throws IOException {
-    return new ConcatIterator<T>(
-        readerFactory,
-        options,
-        executionContext,
-        addCounterMutator,
-        operationName,
-        sources);
-  }
-
-  @VisibleForTesting
-  static class ConcatIterator<T> extends NativeReaderIterator<T> {
-    private int currentIteratorIndex = -1;
-    @Nullable private NativeReaderIterator<T> currentIterator = null;
-    private final List<Source> sources;
-    private final PipelineOptions options;
-    private final ExecutionContext executionContext;
-    private final CounterSet.AddCounterMutator addCounterMutator;
-    private final String operationName;
-    private final OffsetRangeTracker rangeTracker;
-    private final ReaderFactory readerFactory;
-
-    public ConcatIterator(
-        ReaderFactory readerFactory,
-        PipelineOptions options,
-        ExecutionContext executionContext,
-        CounterSet.AddCounterMutator addCounterMutator,
-        String operationName,
-        List<Source> sources) {
-      this.readerFactory = readerFactory;
-      this.sources = sources;
-      this.options = options;
-      this.executionContext = executionContext;
-      this.addCounterMutator = addCounterMutator;
-      this.operationName = operationName;
-      this.rangeTracker = new OffsetRangeTracker(0, sources.size());
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      return advance();
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      while (true) {
-        // Invariant: we call currentIterator.start() immediately when opening an iterator
-        // (below). So if currentIterator != null, then start() has already been called on it.
-        if (currentIterator != null && currentIterator.advance()) {
-          // Happy case: current iterator has a next record.
-          return true;
-        }
-        // Now current iterator is either non-existent or exhausted.
-        // Close it, and try opening a new one.
-        if (currentIterator != null) {
-          currentIterator.close();
-          currentIterator = null;
-        }
-
-        if (!rangeTracker.tryReturnRecordAt(true, currentIteratorIndex + 1)) {
-          return false;
-        }
-        currentIteratorIndex++;
-        if (currentIteratorIndex == sources.size()) {
-          // All sources were read.
-          return false;
-        }
-
-        Source currentSource = sources.get(currentIteratorIndex);
-        try {
-          Coder<?> coder = null;
-          if (currentSource.getCodec() != null) {
-            coder = Serializer.deserialize(currentSource.getCodec(), Coder.class);
-          }
-          @SuppressWarnings("unchecked")
-          NativeReader<T> currentReader =
-              (NativeReader<T>)
-                  readerFactory.create(
-                      CloudObject.fromSpec(currentSource.getSpec()), coder, options,
-                      executionContext, addCounterMutator,
-                      operationName);
-          currentIterator = currentReader.iterator();
-        } catch (Exception e) {
-          throw new IOException("Failed to create a reader for source: " + currentSource, e);
-        }
-        if (!currentIterator.start()) {
-          currentIterator.close();
-          currentIterator = null;
-          continue;
-        }
-        // Happy case: newly opened iterator has a first record.
-        return true;
-      }
-    }
-
-    @Override
-    public T getCurrent() throws NoSuchElementException {
-      if (currentIterator == null) {
-        throw new NoSuchElementException();
-      }
-      return currentIterator.getCurrent();
-    }
-
-    @Override
-    public void close() throws IOException {
-      if (currentIterator != null) {
-        currentIterator.close();
-      }
-    }
-
-    @Override
-    public Progress getProgress() {
-      if (currentIteratorIndex < 0) {
-        // Reading has not been started yet.
-        return null;
-      }
-
-      ConcatPosition concatPosition = new ConcatPosition();
-      concatPosition.setIndex(currentIteratorIndex);
-      Progress progressOfCurrentIterator = currentIterator.getProgress();
-      if (!(progressOfCurrentIterator instanceof DataflowReaderProgress)) {
-        throw new IllegalArgumentException("Cannot process progress " + progressOfCurrentIterator
-            + " since ConcatReader can only handle readers that generate a progress of type "
-            + "DataflowReaderProgress");
-      }
-      com.google.api.services.dataflow.model.Position positionOfCurrentIterator =
-          ((DataflowReaderProgress) progressOfCurrentIterator).cloudProgress.getPosition();
-
-      if (positionOfCurrentIterator != null) {
-        concatPosition.setPosition(positionOfCurrentIterator);
-      }
-
-      ApproximateReportedProgress progress = new ApproximateReportedProgress();
-      com.google.api.services.dataflow.model.Position currentPosition =
-          new com.google.api.services.dataflow.model.Position();
-      currentPosition.setConcatPosition(concatPosition);
-      progress.setPosition(currentPosition);
-
-      return cloudProgressToReaderProgress(progress);
-    }
-
-    @Override
-    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
-      checkNotNull(splitRequest);
-
-      ApproximateSplitRequest splitProgress = splitRequestToApproximateSplitRequest(splitRequest);
-      com.google.api.services.dataflow.model.Position cloudPosition = splitProgress.getPosition();
-      if (cloudPosition == null) {
-        LOG.warn("Concat only supports split at a Position. Requested: {}", splitRequest);
-        return null;
-      }
-
-      ConcatPosition concatPosition = cloudPosition.getConcatPosition();
-      if (concatPosition == null) {
-        LOG.warn(
-            "ConcatReader only supports split at a ConcatPosition. Requested: {}", cloudPosition);
-        return null;
-      }
-
-      if (rangeTracker.trySplitAtPosition(concatPosition.getIndex())) {
-        com.google.api.services.dataflow.model.Position positionToSplit =
-            new com.google.api.services.dataflow.model.Position();
-        positionToSplit.setConcatPosition(
-            new ConcatPosition().setIndex((int) rangeTracker.getStopPosition().longValue()));
-        return new DynamicSplitResultWithPosition(cloudPositionToReaderPosition(positionToSplit));
-      } else {
-        LOG.debug("Could not perform the dynamic split request " + splitRequest);
-        return null;
-      }
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTracker.java
deleted file mode 100644
index 0092142d4c305..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/GroupingShuffleRangeTracker.java
+++ /dev/null
@@ -1,168 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.io.range.RangeTracker;
-import com.google.common.annotations.VisibleForTesting;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import javax.annotation.Nullable;
-
-/**
- * A {@link RangeTracker} for positions used by {@code GroupingShuffleReader}
- * ({@code ByteArrayShufflePosition}).
- *
- * <p>These positions roughly correspond to hashes of keys. In case of hash collisions,
- * multiple groups can have the same position. In that case, the first group at a particular
- * position is considered a split point (because it is the first to be returned when reading
- * a position range starting at this position), others are not.
- */
-public class GroupingShuffleRangeTracker implements RangeTracker<ByteArrayShufflePosition> {
-  private static final Logger LOG = LoggerFactory.getLogger(GroupingShuffleRangeTracker.class);
-
-  // null means "no limit": read from the beginning of the data.
-  @Nullable private final ByteArrayShufflePosition startPosition;
-
-  // null means "no limit": read until the end of the data.
-  @Nullable private ByteArrayShufflePosition stopPosition;
-
-  private ByteArrayShufflePosition lastGroupStart = null;
-  private boolean lastGroupWasAtSplitPoint = false;
-
-  public GroupingShuffleRangeTracker(
-      @Nullable ByteArrayShufflePosition startPosition,
-      @Nullable ByteArrayShufflePosition stopPosition) {
-    this.startPosition = startPosition;
-    this.stopPosition = stopPosition;
-  }
-
-  @Override
-  public ByteArrayShufflePosition getStartPosition() {
-    return startPosition;
-  }
-
-  @Override
-  public synchronized ByteArrayShufflePosition getStopPosition() {
-    return stopPosition;
-  }
-
-  public synchronized ByteArrayShufflePosition getLastGroupStart() {
-    return lastGroupStart;
-  }
-
-  @Override
-  public synchronized boolean tryReturnRecordAt(
-      boolean isAtSplitPoint, ByteArrayShufflePosition groupStart) {
-    if (lastGroupStart == null && !isAtSplitPoint) {
-      throw new IllegalStateException(
-          String.format("The first group [at %s] must be at a split point",
-              groupStart.encodeBase64()));
-    }
-    if (this.startPosition != null && groupStart.compareTo(this.startPosition) < 0) {
-      throw new IllegalStateException(
-          String.format(
-              "Trying to return record at %s which is before the starting position at %s",
-              groupStart,
-              this.startPosition));
-    }
-    int comparedToLast = (lastGroupStart == null) ? 1 : groupStart.compareTo(this.lastGroupStart);
-    if (comparedToLast < 0) {
-      throw new IllegalStateException(
-          String.format(
-              "Trying to return group at %s which is before the last-returned group at %s",
-              groupStart,
-              this.lastGroupStart));
-    }
-    if (isAtSplitPoint) {
-      if (comparedToLast == 0) {
-        throw new IllegalStateException(
-            String.format(
-                "Trying to return a group at a split point with same position as the "
-                    + "previous group: both at %s, last group was %s",
-                groupStart,
-                lastGroupWasAtSplitPoint ? "at a split point." : "not at a split point."));
-      }
-      if (stopPosition != null && groupStart.compareTo(stopPosition) >= 0) {
-        return false;
-      }
-    } else if (comparedToLast != 0) {
-      // This case is not a violation of general RangeTracker semantics, but it is
-      // contrary to how GroupingShuffleReader in particular works. Hitting it would
-      // mean it's behaving unexpectedly.
-      throw new IllegalStateException(
-          String.format(
-              "Trying to return a group not at a split point, but with a different position "
-              + "than the previous group: last group was %s at %s, current at %s",
-              lastGroupWasAtSplitPoint ? "a split point" : "a non-split point",
-              lastGroupStart, groupStart));
-    }
-    this.lastGroupStart = groupStart;
-    this.lastGroupWasAtSplitPoint = isAtSplitPoint;
-    return true;
-  }
-
-  @Override
-  public synchronized boolean trySplitAtPosition(ByteArrayShufflePosition splitPosition) {
-    if (lastGroupStart == null) {
-      LOG.debug("Refusing to split {} at {}: unstarted", this, splitPosition);
-      return false;
-    }
-    if (splitPosition.compareTo(lastGroupStart) <= 0) {
-      LOG.debug(
-          "Refusing to split {} at {}: already past proposed split position", this, splitPosition);
-      return false;
-    }
-    if ((stopPosition != null && splitPosition.compareTo(stopPosition) >= 0)
-        || (startPosition != null && splitPosition.compareTo(startPosition) <= 0)) {
-      LOG.error(
-          "Refusing to split {} at {}: proposed split position out of range", this, splitPosition);
-      return false;
-    }
-    LOG.debug("Agreeing to split {} at {}", this, splitPosition);
-    this.stopPosition = splitPosition;
-    return true;
-  }
-
-  @Override
-  public synchronized double getFractionConsumed() {
-    // GroupingShuffle sources have special support on the service and the service
-    // will estimate progress from positions for us.
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public synchronized String toString() {
-    if (lastGroupStart != null) {
-      return String.format(
-          "<at position %s of shuffle range [%s, %s)>",
-          lastGroupStart, startPosition, stopPosition);
-    } else {
-      return String.format("<unstarted in shuffle range [%s, %s)>", startPosition, stopPosition);
-    }
-  }
-
-  @VisibleForTesting
-  GroupingShuffleRangeTracker copy() {
-    GroupingShuffleRangeTracker res =
-        new GroupingShuffleRangeTracker(this.startPosition, this.stopPosition);
-    res.lastGroupStart = this.lastGroupStart;
-    res.lastGroupWasAtSplitPoint = lastGroupWasAtSplitPoint;
-    return res;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
deleted file mode 100644
index ffcfa0ca4f2d2..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReader.java
+++ /dev/null
@@ -1,183 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.api.client.util.Preconditions.checkNotNull;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateSplitRequest;
-import static com.google.common.base.MoreObjects.firstNonNull;
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.api.services.dataflow.model.ApproximateReportedProgress;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.io.range.OffsetRangeTracker;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.StringUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-import javax.annotation.Nullable;
-
-/**
- * A source that yields a set of precomputed elements.
- *
- * @param <T> the type of the elements read from the source
- */
-public class InMemoryReader<T> extends NativeReader<T> {
-  private static final Logger LOG = LoggerFactory.getLogger(InMemoryReader.class);
-
-  final List<String> encodedElements;
-  final int startIndex;
-  final int endIndex;
-  final Coder<T> coder;
-
-  public InMemoryReader(
-      List<String> encodedElements,
-      @Nullable Integer startIndex,
-      @Nullable Integer endIndex,
-      Coder<T> coder) {
-    checkNotNull(encodedElements);
-    this.encodedElements = encodedElements;
-    int maxIndex = encodedElements.size();
-    this.startIndex = Math.min(maxIndex, firstNonNull(startIndex, 0));
-    this.endIndex = Math.min(maxIndex, firstNonNull(endIndex, maxIndex));
-    checkArgument(this.startIndex >= 0, "negative start index: " + startIndex);
-    checkArgument(
-        this.endIndex >= this.startIndex,
-        "end index before start: [" + this.startIndex + ", " + this.endIndex + ")");
-    this.coder = coder;
-  }
-
-  @Override
-  public InMemoryReaderIterator iterator() throws IOException {
-    return new InMemoryReaderIterator();
-  }
-
-  @Override
-  public double getTotalParallelism() {
-    return this.endIndex - this.startIndex;
-  }
-
-  /**
-   * A ReaderIterator that yields an in-memory list of elements.
-   */
-  class InMemoryReaderIterator extends NativeReaderIterator<T> {
-    @VisibleForTesting
-    OffsetRangeTracker tracker;
-    @Nullable private Integer lastReturnedIndex;
-    private T current;
-
-    public InMemoryReaderIterator() {
-      this.tracker = new OffsetRangeTracker(startIndex, endIndex);
-      this.lastReturnedIndex = null;
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      Preconditions.checkState(lastReturnedIndex == null, "Already started");
-      if (!tracker.tryReturnRecordAt(true, startIndex)) {
-        return false;
-      }
-      current = decode(encodedElements.get(startIndex));
-      lastReturnedIndex = startIndex;
-      return true;
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      Preconditions.checkNotNull(lastReturnedIndex, "Not started");
-      if (!tracker.tryReturnRecordAt(true, (long) lastReturnedIndex + 1)) {
-        return false;
-      }
-      ++lastReturnedIndex;
-      current = decode(encodedElements.get(lastReturnedIndex));
-      return true;
-    }
-
-    @Override
-    public T getCurrent() throws NoSuchElementException {
-      return current;
-    }
-
-    private T decode(String encodedElementString) throws CoderException {
-      // TODO: Replace with the real encoding used by the
-      // front end, when we know what it is.
-      byte[] encodedElement = StringUtils.jsonStringToByteArray(encodedElementString);
-      notifyElementRead(encodedElement.length);
-      return CoderUtils.decodeFromByteArray(coder, encodedElement);
-    }
-
-    @Override
-    public Progress getProgress() {
-      if (lastReturnedIndex == null) {
-        return null;
-      }
-      // Currently we assume that only a record index position is reported as
-      // current progress. An implementer can override this method to update
-      // other metrics, e.g. completion percentage or remaining time.
-      com.google.api.services.dataflow.model.Position currentPosition =
-          new com.google.api.services.dataflow.model.Position();
-      currentPosition.setRecordIndex((long) lastReturnedIndex);
-
-      ApproximateReportedProgress progress = new ApproximateReportedProgress();
-      progress.setPosition(currentPosition);
-
-      return cloudProgressToReaderProgress(progress);
-    }
-
-    @Override
-    public double getRemainingParallelism() {
-      // Use the starting index if no elements have yet been returned.
-      return tracker.getStopPosition() - firstNonNull(lastReturnedIndex, startIndex);
-    }
-
-    @Override
-    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
-      checkNotNull(splitRequest);
-
-      com.google.api.services.dataflow.model.Position splitPosition =
-          splitRequestToApproximateSplitRequest(splitRequest).getPosition();
-      if (splitPosition == null) {
-        LOG.warn("InMemoryReader only supports split at a Position. Requested: {}",
-            splitRequest);
-        return null;
-      }
-
-      Long splitIndex = splitPosition.getRecordIndex();
-      if (splitIndex == null) {
-        LOG.warn("InMemoryReader only supports split at a record index. Requested: {}",
-            splitPosition);
-        return null;
-      }
-
-      if (!tracker.trySplitAtPosition(splitIndex)) {
-        return null;
-      }
-      return new DynamicSplitResultWithPosition(cloudPositionToReaderPosition(splitPosition));
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java
deleted file mode 100644
index 3a8f74834877e..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/InMemoryReaderFactory.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.getInt;
-import static com.google.cloud.dataflow.sdk.util.Structs.getStrings;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import java.util.Collections;
-
-import javax.annotation.Nullable;
-
-/**
- * Creates an InMemoryReader from a CloudObject spec.
- */
-public class InMemoryReaderFactory implements ReaderFactory {
-
-  @Override
-  public NativeReader<?> create(
-      CloudObject spec,
-      @Nullable Coder<?> coder,
-      @Nullable PipelineOptions options,
-      @Nullable ExecutionContext executionContext,
-      @Nullable CounterSet.AddCounterMutator addCounterMutator,
-      @Nullable String operationName)
-          throws Exception {
-    return create(spec, coder);
-  }
-
-  <T> InMemoryReader<T> create(CloudObject spec, Coder<T> coder) throws Exception {
-    return new InMemoryReader<>(
-        getStrings(spec, PropertyNames.ELEMENTS, Collections.<String>emptyList()),
-        getInt(spec, PropertyNames.START_INDEX, null),
-        getInt(spec, PropertyNames.END_INDEX, null),
-        coder);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java
deleted file mode 100644
index 3b85f1e1c94ec..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmReader.java
+++ /dev/null
@@ -1,962 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.WindowedValue.valueInEmptyWindows;
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.Footer;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.FooterCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecord;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecordCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmShard;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.KeyPrefix;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.KeyPrefixCoder;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.RandomAccessData;
-import com.google.cloud.dataflow.sdk.util.ScalableBloomFilter;
-import com.google.cloud.dataflow.sdk.util.ScalableBloomFilter.ScalableBloomFilterCoder;
-import com.google.cloud.dataflow.sdk.util.VarInt;
-import com.google.cloud.dataflow.sdk.util.WeightedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.MoreObjects;
-import com.google.common.base.Optional;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Throwables;
-import com.google.common.cache.Cache;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableSortedMap;
-import com.google.common.collect.Ordering;
-import com.google.common.io.ByteStreams;
-import com.google.common.primitives.Longs;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.channels.Channels;
-import java.nio.channels.ReadableByteChannel;
-import java.nio.channels.SeekableByteChannel;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.NavigableMap;
-import java.util.NoSuchElementException;
-import java.util.Objects;
-import java.util.SortedMap;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-
-/**
- * A {@link NativeReader} that reads Ism files.
- *
- * @param <V> the type of the value written to the sink
- */
-public class IsmReader<V> extends NativeReader<WindowedValue<IsmRecord<V>>> {
-  /**
-   * This constant represents the distance we would rather read and drop bytes for
-   * versus doing an actual repositioning of the underlying stream. Tuned for operation
-   * within GCS.
-   */
-  private static final int SEEK_VS_READ = 6 * 1024 * 1024;
-  private static final int MAX_SHARD_INDEX_AND_FOOTER_SIZE = 1024 * 1024;
-
-  private final String filename;
-  private final IsmRecordCoder<V> coder;
-
-  /** Lazily initialized on first read. */
-  private long length;
-  private Footer footer;
-
-  /** A map indexed by shard id, storing the Ism shard descriptors. */
-  private NavigableMap<Integer, IsmShard> shardIdToShardMap;
-
-  /**
-   * A map sorted and indexed by the block offset for a given shard. The sorting is important so
-   * the {@link LazyIsmPrefixReaderIterator} can read through the file in increasing order.
-   */
-  private NavigableMap<Long, IsmShard> shardOffsetToShardMap;
-
-  /** Values lazily initialized per shard on first keyed read of each shard. */
-  private Map<Integer, ImmutableSortedMap<RandomAccessData, IsmShardKey>> indexPerShard;
-  ScalableBloomFilter bloomFilter;
-
-  /**
-   * A cache instance which if set on this reader is used to cache blocks of data that are read.
-   * Each value represents the decoded form of a block.
-   */
-  private final Cache<IsmShardKey,
-                      WeightedValue<NavigableMap<RandomAccessData,
-                                                 WindowedValue<IsmRecord<V>>>>> cache;
-
-  /**
-   * Produces a reader for the specified {@code filename} and {@code coder}.
-   * See {@link IsmFormat} for encoded format details.
-   */
-  IsmReader(
-      final String filename,
-      IsmRecordCoder<V> coder,
-      Cache<IsmShardKey, WeightedValue<NavigableMap<RandomAccessData,
-                                                    WindowedValue<IsmRecord<V>>>>> cache) {
-    checkNotNull(cache);
-    IsmFormat.validateCoderIsCompatible(coder);
-    this.filename = filename;
-    this.coder = coder;
-    this.cache = cache;
-  }
-
-  @Override
-  public IsmPrefixReaderIterator iterator() throws IOException {
-    return new LazyIsmPrefixReaderIterator();
-  }
-
-  /**
-   * Returns a reader over a set of key components. The key components are encoded to their
-   * byte representations and used as a key prefix.
-   *
-   * <p>If the file is empty or their is no key with the same prefix, then we return
-   * an empty reader iterator.
-   *
-   * <p>If less than the required number of shard key components is passed in, then a reader
-   * iterator over all the keys is returned.
-   *
-   * <p>Otherwise we return a reader iterator which only iterates over keys which have the
-   * same key prefix.
-   */
-  public IsmPrefixReaderIterator overKeyComponents(List<?> keyComponents) throws IOException {
-    checkNotNull(keyComponents);
-    checkArgument(keyComponents.size() <= coder.getKeyComponentCoders().size(),
-        "Expected at most %s key component(s) but received %s.",
-        coder.getKeyComponentCoders().size(), keyComponents);
-
-    Optional<SeekableByteChannel> inChannel =
-        initializeFooterAndShardIndex(Optional.<SeekableByteChannel>absent());
-
-    // If this file is empty, we can return an empty iterator.
-    if (footer.getNumberOfKeys() == 0) {
-      return new EmptyIsmPrefixReaderIterator(keyComponents);
-    }
-
-    // If not enough key components to figure out which shard was requested, we return a reader
-    // iterator over all the keys.
-    if (keyComponents.size() < coder.getNumberOfShardKeyCoders(keyComponents)) {
-      return new ShardAwareIsmPrefixReaderIterator(keyComponents, openIfNeeded(inChannel));
-    }
-
-    RandomAccessData keyBytes = new RandomAccessData();
-    int shardId = coder.encodeAndHash(keyComponents, keyBytes);
-
-    // If this file does not contain the shard or Bloom filter does not contain the key prefix,
-    // we know that we can return an empty reader iterator.
-    if (!shardIdToShardMap.containsKey(shardId)) {
-      return new EmptyIsmPrefixReaderIterator(keyComponents);
-    }
-    inChannel = initializeForKeyedRead(shardId, inChannel);
-    closeIfPresent(inChannel);
-    if (!bloomFilterMightContain(keyBytes)) {
-      return new EmptyIsmPrefixReaderIterator(keyComponents);
-    }
-
-    // Otherwise we may actually contain the key so construct a reader iterator
-    // which will fetch the data blocks containing the requested key prefix.
-
-    // We find the first key in the index which may contain our prefix
-    RandomAccessData floorKey = indexPerShard.get(shardId).floorKey(keyBytes);
-
-    // We compute an upper bound on the key prefix by incrementing the prefix
-    RandomAccessData keyBytesUpperBound = keyBytes.increment();
-    // Compute the sub-range of the index map that we want to iterate over since
-    // any of these blocks may contain the key prefix.
-    Iterator<IsmShardKey> blockEntries =
-        indexPerShard.get(shardId).subMap(floorKey, keyBytesUpperBound).values().iterator();
-
-    return new WithinShardIsmPrefixReaderIterator(
-        keyComponents,
-        keyBytes,
-        keyBytesUpperBound,
-        blockEntries);
-  }
-
-  /** Returns the coder associated with this IsmReader. */
-  public IsmRecordCoder<V> getCoder() {
-    return coder;
-  }
-
-  @Override
-  public String toString() {
-    return MoreObjects.toStringHelper(IsmReader.class)
-        .add("filename", filename)
-        .add("coder", coder)
-        .toString();
-  }
-
-  // Overridable by tests to get around the bloom filter not containing any values.
-  @VisibleForTesting
-  boolean bloomFilterMightContain(RandomAccessData keyBytes) {
-    return bloomFilter.mightContain(keyBytes.array(), 0, keyBytes.size());
-  }
-
-  /**
-   * Initialize this Ism reader by reading the footer and shard index. Returns a channel for re-use
-   * if this method was required to open one.
-   */
-  private synchronized Optional<SeekableByteChannel>
-      initializeFooterAndShardIndex(Optional<SeekableByteChannel> inChannel) throws IOException {
-    if (footer != null) {
-      checkState(shardIdToShardMap != null,
-          "Expected shard id to shard map to have been initialized.");
-      checkState(shardOffsetToShardMap != null,
-          "Expected shard offset to shard map to have been initialized.");
-      return inChannel;
-    }
-    checkState(shardIdToShardMap == null,
-        "Expected shard id to shard map to not have been initialized.");
-    checkState(shardOffsetToShardMap == null,
-        "Expected shard offset to shard map to not have been initialized.");
-
-    SeekableByteChannel rawChannel = openIfNeeded(inChannel);
-    this.length = rawChannel.size();
-
-    // We read the last chunk of data, for small files we will capture the entire file.
-    // We may capture the Bloom filter, shard index, and footer for slightly larger files.
-    // Otherwise we are guaranteed to capture the footer and the shard index.
-    long startPosition = Math.max(length - MAX_SHARD_INDEX_AND_FOOTER_SIZE, 0);
-    position(rawChannel, startPosition);
-    RandomAccessData data =
-        new RandomAccessData(ByteStreams.toByteArray(Channels.newInputStream(rawChannel)));
-
-    // Read the fixed length footer.
-    this.footer = FooterCoder.of().decode(
-        data.asInputStream(data.size() - Footer.FIXED_LENGTH, Footer.FIXED_LENGTH), Context.OUTER);
-
-    checkState(startPosition < footer.getIndexPosition(),
-        "Malformed file, expected to have been able to read entire shard index.");
-    int offsetWithinReadData = (int) (footer.getIndexPosition() - startPosition);
-
-    // Decode the list of Ism shard descriptors
-    List<IsmShard> ismShards = IsmFormat.ISM_SHARD_INDEX_CODER.decode(
-        data.asInputStream(offsetWithinReadData, data.size() - offsetWithinReadData),
-        Context.NESTED);
-
-    // Build the shard id to shard descriptor map
-    ImmutableSortedMap.Builder<Integer, IsmShard> shardIdToShardMapBuilder =
-        ImmutableSortedMap.orderedBy(Ordering.<Integer>natural());
-    for (IsmShard ismShard : ismShards) {
-      shardIdToShardMapBuilder.put(ismShard.getId(), ismShard);
-    }
-    shardIdToShardMap = shardIdToShardMapBuilder.build();
-
-    // Build the shard block offset to shard descriptor map
-    ImmutableSortedMap.Builder<Long, IsmShard> shardOffsetToShardMapBuilder =
-        ImmutableSortedMap.orderedBy(Ordering.<Long>natural());
-    for (IsmShard ismShard : ismShards) {
-      shardOffsetToShardMapBuilder.put(ismShard.getBlockOffset(), ismShard);
-    }
-    shardOffsetToShardMap = shardOffsetToShardMapBuilder.build();
-
-    // We may have gotten the Bloom filter, if so lets store it.
-    if (startPosition < footer.getBloomFilterPosition()) {
-      offsetWithinReadData = (int) (footer.getBloomFilterPosition() - startPosition);
-      bloomFilter = ScalableBloomFilterCoder.of().decode(
-          data.asInputStream(offsetWithinReadData, data.size() - offsetWithinReadData),
-          Context.NESTED);
-    }
-
-    // TODO: We may have gotten the entire file so we should populate our cache to
-    // prevent a re-read of the same data.
-
-    return Optional.of(rawChannel);
-  }
-
-  /**
-   * Initializes the Bloom filter and index per shard. We prepopulate empty indices
-   * for shards where the index offset matches the following shard block offset.
-   * Re-uses the provided channel, returning it or a new one if this method
-   * was required to open one.
-   */
-  private synchronized Optional<SeekableByteChannel> initializeBloomFilterAndIndexPerShard(
-      Optional<SeekableByteChannel> inChannel) throws IOException {
-    if (indexPerShard != null) {
-      checkState(bloomFilter != null, "Expected Bloom filter to have been initialized.");
-      return inChannel;
-    }
-
-    SeekableByteChannel rawChannel = openIfNeeded(inChannel);
-
-    // initializeFooterAndShardIndex may have initialized the Bloom filter
-    // if the file is small enough.
-    if (bloomFilter == null) {
-      // Set the position to where the bloom filter is and read it in.
-      position(rawChannel, footer.getBloomFilterPosition());
-      bloomFilter = ScalableBloomFilterCoder.of().decode(
-          Channels.newInputStream(rawChannel), Context.NESTED);
-    }
-
-    indexPerShard = new HashMap<>();
-    // If a shard is small, it may not contain an index and we can detect this and
-    // prepopulate the shard index map with an empty entry if the start of the index
-    // and start of the next block are equal
-    Iterator<IsmShard> shardIterator = shardOffsetToShardMap.values().iterator();
-
-    // If file is empty we just return here.
-    if (!shardIterator.hasNext()) {
-      return Optional.of(rawChannel);
-    }
-
-    // If the current shard's index position is equal to the next shards block offset
-    // then we know that the index contains no data and we can pre-populate it with
-    // the empty map.
-    IsmShard currentShard = shardIterator.next();
-    while (shardIterator.hasNext()) {
-      IsmShard nextShard = shardIterator.next();
-      if (currentShard.getIndexOffset() == nextShard.getBlockOffset()) {
-        indexPerShard.put(currentShard.getId(),
-            ImmutableSortedMap.<RandomAccessData, IsmShardKey>orderedBy(
-                RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR)
-            .put(
-                new RandomAccessData(0),
-                new IsmShardKey(
-                    IsmReader.this.filename,
-                    new RandomAccessData(0),
-                    currentShard.getBlockOffset(),
-                    currentShard.getIndexOffset())).build());
-      }
-      currentShard = nextShard;
-    }
-
-    // Add an entry for the last shard if its index offset is equivalent to the
-    // start of the Bloom filter, then we know that the index is empty.
-    if (currentShard.getIndexOffset() == footer.getBloomFilterPosition()) {
-      indexPerShard.put(currentShard.getId(),
-          ImmutableSortedMap.<RandomAccessData, IsmShardKey>orderedBy(
-              RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR)
-          .put(
-              new RandomAccessData(0),
-              new IsmShardKey(
-                  IsmReader.this.filename,
-                  new RandomAccessData(0),
-                  currentShard.getBlockOffset(),
-                  currentShard.getIndexOffset())).build());
-    }
-
-    return Optional.of(rawChannel);
-  }
-
-  /** A unique key used to fully describe an Ism shard. */
-  static class IsmShardKey {
-    private final String filename;
-    private final RandomAccessData firstKey;
-    private final long startOffset;
-    private final long endOffset;
-
-    private IsmShardKey(
-        String filename, RandomAccessData firstKey, long startOffset, long endOffset) {
-      this.filename = filename;
-      this.firstKey = firstKey;
-      this.startOffset = startOffset;
-      this.endOffset = endOffset;
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(IsmShardKey.class)
-          .add("filename", filename)
-          .add("firstKey", firstKey)
-          .add("startOffset", startOffset)
-          .add("endOffset", endOffset)
-          .toString();
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (!(obj instanceof IsmShardKey)) {
-        return false;
-      }
-      IsmShardKey cacheEntry = (IsmShardKey) obj;
-      return startOffset == cacheEntry.startOffset
-          && endOffset == cacheEntry.endOffset
-          && Objects.equals(filename, cacheEntry.filename)
-          && Objects.equals(firstKey, cacheEntry.firstKey);
-    }
-
-    @Override
-    public int hashCode() {
-      return Longs.hashCode(startOffset) + Longs.hashCode(endOffset);
-    }
-  }
-
-  /**
-   * Initializes the footer, shard index, Bloom filter and index for the requested shard id if
-   * they have not been initialized yet. Re-uses the provided channel, returning it or a
-   * new one if this method was required to open one.
-   */
-  private Optional<SeekableByteChannel> initializeForKeyedRead(
-      int shardId, Optional<SeekableByteChannel> inChannel) throws IOException {
-    inChannel = initializeFooterAndShardIndex(inChannel);
-
-    IsmShard shardWithIndex = shardIdToShardMap.get(shardId);
-
-    // If this shard id is not within this file, we can return immediately.
-    if (shardWithIndex == null) {
-      return inChannel;
-    }
-
-    inChannel = initializeBloomFilterAndIndexPerShard(inChannel);
-
-    // If the index has been populated and contains the shard id, we can return.
-    if (indexPerShard != null && indexPerShard.containsKey(shardId)) {
-      checkState(bloomFilter != null, "Bloom filter expected to have been initialized.");
-      return inChannel;
-    }
-
-    checkState(indexPerShard.get(shardId) == null,
-        "Expected to not have initialized index for shard %s", shardId);
-
-    Long startOfNextBlock = shardOffsetToShardMap.higherKey(shardWithIndex.getBlockOffset());
-    // If this is the last block, then we need to grab the position of the Bloom filter
-    // as the upper bound.
-    if (startOfNextBlock == null) {
-      startOfNextBlock = footer.getBloomFilterPosition();
-    }
-
-    // Open the channel if needed and seek to the start of the index.
-    SeekableByteChannel rawChannel = openIfNeeded(inChannel);
-    rawChannel.position(shardWithIndex.getIndexOffset());
-    InputStream inStream = Channels.newInputStream(rawChannel);
-
-    ImmutableSortedMap.Builder<RandomAccessData, IsmShardKey> builder =
-        ImmutableSortedMap.orderedBy(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR);
-
-    // Read the first key
-    RandomAccessData currentKeyBytes = new RandomAccessData();
-    readKey(inStream, currentKeyBytes);
-    long currentOffset = VarInt.decodeLong(inStream);
-
-    // Insert the entry that happens at the beginning limiting the shard block by the
-    // first keys block offset.
-    builder.put(
-        new RandomAccessData(0),
-        new IsmShardKey(
-            IsmReader.this.filename,
-            new RandomAccessData(0),
-            shardWithIndex.getBlockOffset(),
-            currentOffset));
-
-    // While another index entry exists, insert an index entry with the key, and offsets
-    // that limit the range of the shard block.
-    while (rawChannel.position() < startOfNextBlock) {
-      RandomAccessData nextKeyBytes = currentKeyBytes.copy();
-      readKey(inStream, nextKeyBytes);
-      long nextOffset = VarInt.decodeLong(inStream);
-
-      builder.put(currentKeyBytes,
-          new IsmShardKey(
-              IsmReader.this.filename, currentKeyBytes, currentOffset, nextOffset));
-
-      currentKeyBytes = nextKeyBytes;
-      currentOffset = nextOffset;
-    }
-
-    // Upper bound the last entry with the index offset.
-    builder.put(currentKeyBytes,
-        new IsmShardKey(
-            IsmReader.this.filename,
-            currentKeyBytes,
-            currentOffset,
-            shardWithIndex.getIndexOffset()));
-    indexPerShard.put(shardId, builder.build());
-
-    return Optional.of(rawChannel);
-  }
-
-  /** A function which takes an IsmShardKey fully describing a data block to read and return. */
-  private class IsmCacheLoader
-      implements Callable<WeightedValue<NavigableMap<RandomAccessData,
-                                                     WindowedValue<IsmRecord<V>>>>> {
-
-    private final IsmShardKey key;
-    private IsmCacheLoader(IsmShardKey key) {
-      this.key = key;
-    }
-
-    @Override
-    public WeightedValue<NavigableMap<RandomAccessData, WindowedValue<IsmRecord<V>>>> call()
-          throws IOException {
-      // Open a channel and build a sorted map from key to value for each key value
-      // pair found within the data block.
-      try (SeekableByteChannel rawChannel = open()) {
-        try (WithinShardIsmReaderIterator readerIterator = new WithinShardIsmReaderIterator(
-              rawChannel, key.firstKey, key.startOffset, key.endOffset)) {
-
-          ImmutableSortedMap.Builder<RandomAccessData, WindowedValue<IsmRecord<V>>> mapBuilder =
-              ImmutableSortedMap.orderedBy(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR);
-          while (readerIterator.hasNext()) {
-            RandomAccessData nextKey = readerIterator.peekKey().copy();
-            WindowedValue<IsmRecord<V>> next = readerIterator.next();
-            mapBuilder.put(nextKey, next);
-          }
-          // We return the size of the data block as the weight of this data block.
-          return WeightedValue.of(
-              (NavigableMap<RandomAccessData, WindowedValue<IsmRecord<V>>>) mapBuilder.build(),
-              key.endOffset - key.startOffset);
-        }
-      }
-    }
-  }
-
-  /**
-   * Fetches the data block requested.
-   *
-   * If the cache is available, we will load and cache the requested block. Otherwise, we will
-   * load and return the block.
-   */
-  private NavigableMap<RandomAccessData, WindowedValue<IsmRecord<V>>>
-      fetch(IsmShardKey key) throws IOException {
-    try {
-      if (cache == null) {
-        return new IsmCacheLoader(key).call().getValue();
-      } else {
-        return cache.get(key, new IsmCacheLoader(key)).getValue();
-      }
-    } catch (ExecutionException e) {
-      // Try and re-throw the root cause if its an IOException
-      Throwables.propagateIfPossible(e.getCause(), IOException.class);
-      throw new IOException(e.getCause());
-    }
-  }
-
-  /** The base class of Ism reader iterators which operate over a given key prefix. */
-  abstract class IsmPrefixReaderIterator
-      extends LegacyReaderIterator<WindowedValue<IsmRecord<V>>> {
-    private final List<?> keyComponents;
-    private IsmPrefixReaderIterator(List<?> keyComponents) {
-      this.keyComponents = keyComponents;
-    }
-
-    /**
-     * Returns the list of key components representing this iterators key prefix.
-     */
-    protected List<?> getKeyComponents() {
-      return keyComponents;
-    }
-
-    /**
-     * Concatenates this reader iterators key components with the additionally supplied
-     * key components and encodes them into their byte representations producing a key.
-     * Returns the exact record represented by the key generated above.
-     *
-     * Null is returned if no key has all the key components as a prefix within this file.
-     */
-    public final WindowedValue<IsmRecord<V>> get(List<?> additionalKeyComponents)
-        throws IOException {
-      RandomAccessData keyBytes = new RandomAccessData();
-      int shardId = coder.encodeAndHash(
-          ImmutableList.builder().addAll(keyComponents).addAll(additionalKeyComponents).build(),
-          keyBytes);
-      return getBlock(keyBytes, shardId).get(keyBytes);
-    }
-
-    /**
-     * Returns the record for the last key having this iterators key prefix.
-     * Last is defined as the largest key with the same key prefix when comparing key's
-     * byte representations using an unsigned lexicographical byte order.
-     *
-     * Null is returned if the prefix is not present within this file.
-     */
-    public WindowedValue<IsmRecord<V>> getLast() throws IOException {
-      RandomAccessData keyBytes = new RandomAccessData();
-      int shardId = coder.encodeAndHash(keyComponents, keyBytes);
-
-      Optional<SeekableByteChannel> inChannel =
-          initializeFooterAndShardIndex(Optional.<SeekableByteChannel>absent());
-
-      // Key is not stored here
-      if (!shardIdToShardMap.containsKey(shardId)
-          || !bloomFilterMightContain(keyBytes)) {
-        return null;
-      }
-
-      inChannel = initializeForKeyedRead(shardId, inChannel);
-      closeIfPresent(inChannel);
-
-      final NavigableMap<RandomAccessData, IsmShardKey> indexInShard = indexPerShard.get(shardId);
-      RandomAccessData end = keyBytes.increment();
-      final IsmShardKey cacheEntry = indexInShard.floorEntry(end).getValue();
-
-      NavigableMap<RandomAccessData, WindowedValue<IsmRecord<V>>> block = fetch(cacheEntry);
-
-      RandomAccessData lastKey = block.lastKey();
-
-      // If the requested key is greater then the last key within the block, then it
-      // does not exist.
-      if (RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(keyBytes, lastKey) > 0) {
-        return null;
-      }
-
-      Entry<RandomAccessData, WindowedValue<IsmRecord<V>>> rval = block.floorEntry(end);
-
-      // If the prefix matches completely then we can return
-      if (RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.commonPrefixLength(
-          keyBytes, rval.getKey()) == keyBytes.size()) {
-        return rval.getValue();
-      }
-      return null;
-    }
-  }
-
-  /** An empty reader iterator. */
-  class EmptyIsmPrefixReaderIterator extends IsmPrefixReaderIterator {
-    private EmptyIsmPrefixReaderIterator(List<?> keyComponents) {
-      super(keyComponents);
-    }
-
-    @Override
-    public boolean hasNextImpl() throws IOException {
-      return false;
-    }
-
-    @Override
-    public WindowedValue<IsmRecord<V>> nextImpl() throws IOException, NoSuchElementException {
-      throw new NoSuchElementException();
-    }
-  }
-
-  /** A reader iterator which initializes its input stream lazily. */
-  class LazyIsmPrefixReaderIterator extends IsmPrefixReaderIterator {
-    private IsmPrefixReaderIterator delegate;
-
-    public LazyIsmPrefixReaderIterator() {
-      super(ImmutableList.of());
-    }
-
-    @Override
-    public boolean hasNextImpl() throws IOException {
-      return getDelegate().hasNext();
-    }
-
-    @Override
-    public WindowedValue<IsmRecord<V>> nextImpl()
-        throws IOException, NoSuchElementException {
-      WindowedValue<IsmRecord<V>> rval = getDelegate().next();
-      return rval;
-    }
-
-    @Override
-    public void close() throws IOException {
-      if (delegate != null) {
-        delegate.close();
-      }
-    }
-
-    /** Return a reader, caching the creation on the first call. */
-    private IsmPrefixReaderIterator getDelegate() throws IOException {
-      if (delegate == null) {
-        delegate = overKeyComponents(getKeyComponents());
-      }
-      return delegate;
-    }
-  }
-
-  /**
-   * Returns a map from key to value, where the keys are in increasing lexicographical order.
-   * If the requested key is not contained within this file, an empty map is returned.
-   */
-  private NavigableMap<RandomAccessData, WindowedValue<IsmRecord<V>>>
-      getBlock(RandomAccessData keyBytes, int shardId) throws IOException {
-    Optional<SeekableByteChannel> inChannel =
-        initializeFooterAndShardIndex(Optional.<SeekableByteChannel>absent());
-
-    // Key is not stored here so return an empty map.
-    if (!shardIdToShardMap.containsKey(shardId)
-        || !bloomFilterMightContain(keyBytes)) {
-      return ImmutableSortedMap.<RandomAccessData, WindowedValue<IsmRecord<V>>>orderedBy(
-          RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR).build();
-    }
-
-    inChannel = initializeForKeyedRead(shardId, inChannel);
-    closeIfPresent(inChannel);
-
-    final NavigableMap<RandomAccessData, IsmShardKey> indexInShard = indexPerShard.get(shardId);
-    final IsmShardKey cacheEntry = indexInShard.floorEntry(keyBytes).getValue();
-    return fetch(cacheEntry);
-  }
-
-  /**
-   * A reader iterator that returns all elements from prefix (inclusive) to
-   * prefixUpperBound (exclusive) within the set of block entries provided.
-   */
-  private class WithinShardIsmPrefixReaderIterator extends IsmPrefixReaderIterator {
-    private final Iterator<IsmShardKey> blockEntriesIterator;
-    Iterator<WindowedValue<IsmRecord<V>>> iterator;
-    private final RandomAccessData prefix;
-    private final RandomAccessData prefixUpperBound;
-
-    private WithinShardIsmPrefixReaderIterator(
-        List<?> keyComponents,
-        RandomAccessData prefix,
-        RandomAccessData prefixUpperBound,
-        Iterator<IsmShardKey> blockEntriesIterator) {
-      super(keyComponents);
-      checkNotNull(blockEntriesIterator);
-      this.prefix = prefix;
-      this.prefixUpperBound = prefixUpperBound;
-      this.blockEntriesIterator = blockEntriesIterator;
-    }
-
-    @Override
-    public boolean hasNextImpl() throws IOException {
-      // This is in a while loop because the blocks that we are asked to look into may
-      // not contain the key prefix.
-      while (iterator == null || !iterator.hasNext()) {
-        // If there are no blocks to iterate over we can return false
-        if (!blockEntriesIterator.hasNext()) {
-          return false;
-        }
-
-        IsmShardKey nextBlock = blockEntriesIterator.next();
-        NavigableMap<RandomAccessData, WindowedValue<IsmRecord<V>>> map =
-            fetch(nextBlock);
-        SortedMap<RandomAccessData, WindowedValue<IsmRecord<V>>> submap =
-            map.subMap(prefix, prefixUpperBound);
-        Collection<WindowedValue<IsmRecord<V>>> values = submap.values();
-        iterator = values.iterator();
-      }
-      return iterator.hasNext();
-    }
-
-    @Override
-    public WindowedValue<IsmRecord<V>> nextImpl()
-        throws IOException, NoSuchElementException {
-      return iterator.next();
-    }
-  }
-
-  /** A reader iterator that returns all records across all shards contained within this file. */
-  private class ShardAwareIsmPrefixReaderIterator extends IsmPrefixReaderIterator {
-    private final SeekableByteChannel rawChannel;
-    private WithinShardIsmReaderIterator delegate;
-    private Iterator<IsmShard> shardEntries;
-
-    private ShardAwareIsmPrefixReaderIterator(
-        List<?> keyComponents, SeekableByteChannel rawChannel) throws IOException {
-      super(keyComponents);
-      checkState(shardOffsetToShardMap.size() > 0,
-          "Expected that shard offset to shard map has been initialized and is not empty.");
-
-      this.rawChannel = rawChannel;
-      this.shardEntries = shardOffsetToShardMap.values().iterator();
-      IsmShard firstShard = shardEntries.next();
-      delegate = new WithinShardIsmReaderIterator(
-          rawChannel,
-          new RandomAccessData(),
-          firstShard.getBlockOffset(),
-          firstShard.getIndexOffset());
-    }
-
-    @Override
-    public boolean hasNextImpl() throws IOException {
-      // If the current shard has a value, or we have another shard index to
-      // look into then we know that there is another value.
-      return delegate.hasNext() || shardEntries.hasNext();
-    }
-
-    @Override
-    public WindowedValue<IsmRecord<V>> nextImpl()
-        throws IOException, NoSuchElementException {
-      // If our current shard index is empty, we need to move to the next one.
-      if (!delegate.hasNext()) {
-        moveToNextShard();
-      }
-      WindowedValue<IsmRecord<V>> rval = delegate.next();
-      return rval;
-    }
-
-    @Override
-    public void close() throws IOException {
-      rawChannel.close();
-    }
-
-    @Override
-    public WindowedValue<IsmRecord<V>> getLast() throws IOException {
-      // Since this is an iterator over all of the file, we return the last record within the file.
-      int lastShardId = shardOffsetToShardMap.lastEntry().getValue().getId();
-      initializeForKeyedRead(lastShardId, Optional.<SeekableByteChannel>absent());
-      NavigableMap<RandomAccessData, WindowedValue<IsmRecord<V>>> lastBlock =
-          getBlock(indexPerShard.get(lastShardId).lastKey(), lastShardId);
-      return lastBlock.lastEntry().getValue();
-    }
-
-    private void moveToNextShard() throws IOException {
-      IsmShard nextIsmShard = shardEntries.next();
-      checkState(nextIsmShard.getBlockOffset() >= rawChannel.position(),
-          "Expected channel read order to be sequential.");
-
-      delegate = new WithinShardIsmReaderIterator(
-          rawChannel, new RandomAccessData(),
-          nextIsmShard.getBlockOffset(), nextIsmShard.getIndexOffset());
-
-      checkState(delegate.hasNext(), "Expected each shard to contain at least one entry.");
-    }
-  }
-
-  /**
-   * A reader iterator for Ism formatted files which returns a sequence of Ism records
-   * from the underlying channel from where it is currently positioned till
-   * the supplied limit.
-   *
-   * This reader iterator will use the supplied bytes as the bytes for the previous key
-   * which is required to do the KeyPrefix based key decoding.
-   */
-  private class WithinShardIsmReaderIterator
-      extends LegacyReaderIterator<WindowedValue<IsmRecord<V>>> {
-    private final SeekableByteChannel rawChannel;
-    private final InputStream inStream;
-    private long readLimit;
-    private RandomAccessData keyBytes;
-    private boolean keyIsPeeked;
-    private long position;
-
-    private WithinShardIsmReaderIterator(
-        SeekableByteChannel rawChannel,
-        RandomAccessData currentKeyBytes,
-        long newPosition,
-        long newLimit) throws IOException {
-      checkNotNull(rawChannel);
-      checkNotNull(currentKeyBytes);
-      checkArgument(newLimit >= 0L);
-      checkArgument(newPosition <= newLimit);
-      this.rawChannel = rawChannel;
-      this.inStream = Channels.newInputStream(rawChannel);
-      this.keyBytes = currentKeyBytes.copy();
-      this.position = newPosition;
-      this.readLimit = newLimit;
-      this.keyIsPeeked = false;
-      IsmReader.position(rawChannel, newPosition);
-    }
-
-    @Override
-    public boolean hasNextImpl() throws IOException {
-      if (position > readLimit) {
-        throw new IllegalStateException("Read past end of stream");
-      }
-      return position < readLimit;
-    }
-
-    @Override
-    public WindowedValue<IsmRecord<V>> nextImpl() throws IOException, NoSuchElementException {
-      RandomAccessData peekedKey = peekKey();
-      keyIsPeeked = false;
-
-      InputStream keyInputStream = peekedKey.asInputStream(0, peekedKey.size());
-      List<Object> keyComponents = new ArrayList<>(coder.getKeyComponentCoders().size());
-      for (int i = 0; i < coder.getKeyComponentCoders().size(); ++i) {
-        keyComponents.add(coder.getKeyComponentCoder(i).decode(
-                keyInputStream,
-                Context.NESTED));
-      }
-
-      final IsmRecord<V> ismRecord;
-      if (IsmFormat.isMetadataKey(keyComponents)) {
-        byte[] metadata = ByteArrayCoder.of().decode(inStream, Context.NESTED);
-        ismRecord = IsmRecord.<V>meta(keyComponents, metadata);
-      } else {
-        V value = coder.getValueCoder().decode(inStream, Context.NESTED);
-        ismRecord = IsmRecord.<V>of(keyComponents, value);
-      }
-
-      long newPosition = rawChannel.position();
-      notifyElementRead(newPosition - position);
-      position = newPosition;
-      return valueInEmptyWindows(ismRecord);
-    }
-
-    public RandomAccessData peekKey() throws IOException, NoSuchElementException {
-      if (keyIsPeeked) {
-        return keyBytes;
-      }
-      readKey(inStream, keyBytes);
-      keyIsPeeked = true;
-
-      return keyBytes;
-    }
-  }
-
-  /**
-   * Decodes a KeyPrefix from the stream and then reads unshared key bytes from the stream
-   * placing them into the supplied keyBytes at position
-   * keyBytes[shared key bytes : shared key bytes + unshared key bytes].
-   */
-  private static void readKey(InputStream inStream, RandomAccessData keyBytes)
-      throws IOException {
-    KeyPrefix keyPrefix = KeyPrefixCoder.of().decode(inStream, Context.NESTED);
-    // currentKey = prevKey[0 : sharedKeySize] + read(unsharedKeySize)
-    keyBytes.readFrom(
-        inStream,
-        keyPrefix.getSharedKeySize() /* start to overwrite the previous key at sharedKeySize */,
-        keyPrefix.getUnsharedKeySize() /* read unsharedKeySize bytes from the stream */);
-    // Reset the length incase the next key was shorter.
-    keyBytes.resetTo(keyPrefix.getSharedKeySize() + keyPrefix.getUnsharedKeySize());
-  }
-
-  /** Closes the underlying channel if present. */
-  private void closeIfPresent(Optional<SeekableByteChannel> inChannel) throws IOException {
-    if (inChannel.isPresent()) {
-      inChannel.get().close();
-    }
-  }
-
-  /** Returns a channel, opening a new one if required. */
-  private SeekableByteChannel openIfNeeded(Optional<SeekableByteChannel> inChannel)
-      throws IOException {
-    if (inChannel.isPresent()) {
-      return inChannel.get();
-    }
-    return open();
-  }
-
-  /** Opens a new channel. */
-  private SeekableByteChannel open()
-      throws IOException {
-    ReadableByteChannel channel = IOChannelUtils.getFactory(filename).open(filename);
-    Preconditions.checkArgument(channel instanceof SeekableByteChannel,
-        "IsmReader requires a SeekableByteChannel for path %s but received %s.",
-        filename, channel);
-    return (SeekableByteChannel) channel;
-  }
-
-  /**
-   * Seeks into the channel intelligently by either resetting the position or reading and
-   * discarding bytes.
-   */
-  private static void position(SeekableByteChannel inChannel, long newPosition) throws IOException {
-    long currentPosition = inChannel.position();
-    // If just doing a read is cheaper discarding the bytes lets just do the read
-    if (currentPosition < newPosition && newPosition - currentPosition <= SEEK_VS_READ) {
-      ByteStreams.skipFully(Channels.newInputStream(inChannel), newPosition - currentPosition);
-    } else {
-      // Otherwise we will perform a seek
-      inChannel.position(newPosition);
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
deleted file mode 100644
index 72abe59a81e7c..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReader.java
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
-import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.base.Preconditions;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-/**
- * A source that reads from a key-sharded dataset, and returns KVs without
- * any values grouping.
- *
- * @param <K> the type of the keys read from the shuffle
- * @param <V> the type of the values read from the shuffle
- */
-public class PartitioningShuffleReader<K, V> extends NativeReader<WindowedValue<KV<K, V>>> {
-  final byte[] shuffleReaderConfig;
-  final String startShufflePosition;
-  final String stopShufflePosition;
-  final CounterSet.AddCounterMutator addCounterMutator;
-  Coder<K> keyCoder;
-  WindowedValueCoder<V> windowedValueCoder;
-
-  public PartitioningShuffleReader(PipelineOptions options, byte[] shuffleReaderConfig,
-      String startShufflePosition, String stopShufflePosition, Coder<WindowedValue<KV<K, V>>> coder,
-      CounterSet.AddCounterMutator addCounterMutator)
-      throws Exception {
-    this.shuffleReaderConfig = shuffleReaderConfig;
-    this.startShufflePosition = startShufflePosition;
-    this.stopShufflePosition = stopShufflePosition;
-    this.addCounterMutator = addCounterMutator;
-    initCoder(coder);
-  }
-
-  /**
-   * Given a {@code WindowedValueCoder<KV<K, V>>}, splits it into a coder for K
-   * and a {@code WindowedValueCoder<V>} with the same kind of windows.
-   */
-  private void initCoder(Coder<WindowedValue<KV<K, V>>> coder) throws Exception {
-    if (!(coder instanceof WindowedValueCoder)) {
-      throw new Exception("unexpected kind of coder for WindowedValue: " + coder);
-    }
-    WindowedValueCoder<KV<K, V>> windowedElemCoder = ((WindowedValueCoder<KV<K, V>>) coder);
-    Coder<KV<K, V>> elemCoder = windowedElemCoder.getValueCoder();
-    if (!(elemCoder instanceof KvCoder)) {
-      throw new Exception("unexpected kind of coder for elements read from "
-          + "a key-partitioning shuffle: " + elemCoder);
-    }
-    @SuppressWarnings("unchecked")
-    KvCoder<K, V> kvCoder = (KvCoder<K, V>) elemCoder;
-    this.keyCoder = kvCoder.getKeyCoder();
-    windowedValueCoder = windowedElemCoder.withValueCoder(kvCoder.getValueCoder());
-  }
-
-  @Override
-  public NativeReaderIterator<WindowedValue<KV<K, V>>> iterator() throws IOException {
-    Preconditions.checkArgument(shuffleReaderConfig != null);
-    return iterator(new BatchingShuffleEntryReader(
-        new ChunkingShuffleBatchReader(
-            new ApplianceShuffleReader(
-                shuffleReaderConfig,
-                addCounterMutator))));
-  }
-
-  PartitioningShuffleReaderIterator iterator(ShuffleEntryReader reader) {
-    return new PartitioningShuffleReaderIterator(reader);
-  }
-
-  /**
-   * A ReaderIterator that reads from a ShuffleEntryReader,
-   * extracts K and {@code WindowedValue<V>}, and returns a constructed
-   * {@code WindowedValue<KV>}.
-   */
-  class PartitioningShuffleReaderIterator extends LegacyReaderIterator<WindowedValue<KV<K, V>>> {
-    Iterator<ShuffleEntry> iterator;
-
-    PartitioningShuffleReaderIterator(ShuffleEntryReader reader) {
-      this.iterator = reader.read(
-          ByteArrayShufflePosition.fromBase64(startShufflePosition),
-          ByteArrayShufflePosition.fromBase64(stopShufflePosition));
-    }
-
-    @Override
-    protected boolean hasNextImpl() throws IOException {
-      return iterator.hasNext();
-    }
-
-    @Override
-    protected WindowedValue<KV<K, V>> nextImpl() throws IOException {
-      ShuffleEntry record = iterator.next();
-      K key = CoderUtils.decodeFromByteArray(keyCoder, record.getKey());
-      WindowedValue<V> windowedValue =
-          CoderUtils.decodeFromByteArray(windowedValueCoder, record.getValue());
-      notifyElementRead(record.length());
-      return windowedValue.withValue(KV.of(key, windowedValue.getValue()));
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java
deleted file mode 100644
index cb68eefa3f8dd..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/PartitioningShuffleReaderFactory.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.api.client.util.Base64.decodeBase64;
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.cloud.dataflow.sdk.values.KV;
-
-import javax.annotation.Nullable;
-
-/**
- * Creates a PartitioningShuffleReader from a CloudObject spec.
- */
-public class PartitioningShuffleReaderFactory implements ReaderFactory {
-
-  @Override
-  public NativeReader<?> create(
-      CloudObject spec,
-      @Nullable Coder<?> coder,
-      @Nullable PipelineOptions options,
-      @Nullable ExecutionContext executionContext,
-      @Nullable CounterSet.AddCounterMutator addCounterMutator,
-      @Nullable String operationName)
-          throws Exception {
-    checkNotNull(options, "PipelineOptions must not be null in PartitioningShuffleReaderFactory");
-    @SuppressWarnings({"unchecked", "rawtypes"})
-    Coder<WindowedValue<KV<Object, Object>>> typedCoder =
-        (Coder<WindowedValue<KV<Object, Object>>>) coder;
-    return createTyped(spec, options, typedCoder, addCounterMutator);
-  }
-
-  public <K, V> PartitioningShuffleReader<K, V> createTyped(
-      CloudObject spec,
-      PipelineOptions options,
-      Coder<WindowedValue<KV<K, V>>> coder,
-      CounterSet.AddCounterMutator addCounterMutator)
-      throws Exception {
-    return new PartitioningShuffleReader<K, V>(
-        options,
-        decodeBase64(getString(spec, PropertyNames.SHUFFLE_READER_CONFIG)),
-        getString(spec, PropertyNames.START_SHUFFLE_POSITION, null),
-        getString(spec, PropertyNames.END_SHUFFLE_POSITION, null),
-        coder,
-        addCounterMutator);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderCacheEntry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderCacheEntry.java
deleted file mode 100644
index 0fe080081049d..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderCacheEntry.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.io.UnboundedSource;
-
-/**
- * Entry in a per-key UnboundedSource#UnboundedReader cache.
- */
-public class ReaderCacheEntry {
-  UnboundedSource.UnboundedReader<?> reader;
-  long token;
-
-  public ReaderCacheEntry(UnboundedSource.UnboundedReader<?> reader, long token) {
-    this.reader = reader;
-    this.token = token;
-  }
-}
-
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReader.java
deleted file mode 100644
index 273456ddc560f..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleReader.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import java.io.IOException;
-
-/**
- * ShuffleReader reads chunks of data from a shuffle dataset for
- * a given position range.
- */
-interface ShuffleReader {
-  /** Represents a chunk of data read from a shuffle dataset. */
-  public static class ReadChunkResult {
-    public final byte[] chunk;
-    public final byte[] nextStartPosition;
-    public ReadChunkResult(byte[] chunk, byte[] nextStartPosition) {
-      this.chunk = chunk;
-      this.nextStartPosition = nextStartPosition;
-    }
-  }
-
-  /**
-   * Reads a chunk of data for keys in the given position range.
-   * The chunk is a sequence of pairs encoded as:
-   * {@code <position-size><position><key-size><key>
-      <secondary-key-size><secondary-key><value-size><value>}
-   * where the sizes are 4-byte big-endian integers.
-   *
-   * @param startPosition the start of the requested range (inclusive)
-   * @param endPosition the end of the requested range (exclusive)
-   */
-  public ReadChunkResult readIncludingPosition(
-      byte[] startPosition, byte[] endPosition) throws IOException;
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java
deleted file mode 100644
index 2845498eb3c6c..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReaderFactory.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.getBoolean;
-import static com.google.cloud.dataflow.sdk.util.Structs.getLong;
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import javax.annotation.Nullable;
-
-/**
- * Creates a TextReader from a CloudObject spec.
- */
-public class TextReaderFactory implements ReaderFactory {
-
-  private static final TextReaderFactory INSTANCE = new TextReaderFactory();
-
-  public static TextReaderFactory getInstance() {
-    return INSTANCE;
-  }
-
-  private TextReaderFactory() {}
-
-  @Override
-  public NativeReader<?> create(
-      CloudObject spec,
-      @Nullable Coder<?> coder,
-      @Nullable PipelineOptions options,
-      @Nullable ExecutionContext executionContext,
-      @Nullable CounterSet.AddCounterMutator addCounterMutator,
-      @Nullable String operationName)
-          throws Exception {
-    return create(spec, coder);
-  }
-
-  public <T> TextReader<T> create(CloudObject spec, Coder<T> coder) throws Exception {
-    String filenameOrPattern = getString(spec, PropertyNames.FILENAME, null);
-    if (filenameOrPattern == null) {
-      filenameOrPattern = getString(spec, PropertyNames.FILEPATTERN, null);
-    }
-    return new TextReader<>(filenameOrPattern,
-        getBoolean(spec, PropertyNames.STRIP_TRAILING_NEWLINES, true),
-        getLong(spec, PropertyNames.START_OFFSET, null),
-        getLong(spec, PropertyNames.END_OFFSET, null), coder,
-        Enum.valueOf(TextIO.CompressionType.class,
-            getString(spec, PropertyNames.COMPRESSION_TYPE, "AUTO")));
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
deleted file mode 100644
index 391f169bf68c0..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReader.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.BatchingShuffleEntryReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
-import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntryReader;
-import com.google.common.base.Preconditions;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import javax.annotation.Nullable;
-
-/**
- * A source that reads from a shuffled dataset, without any key grouping.
- * Returns just the values.  (This reader is for an UNGROUPED shuffle session.)
- *
- * @param <T> the type of the elements read from the source
- */
-public class UngroupedShuffleReader<T> extends NativeReader<T> {
-  final byte[] shuffleReaderConfig;
-  final String startShufflePosition;
-  final String stopShufflePosition;
-  final Coder<T> coder;
-  final CounterSet.AddCounterMutator addCounterMutator;
-
-  public UngroupedShuffleReader(
-      @SuppressWarnings("unused") PipelineOptions options, byte[] shuffleReaderConfig,
-      @Nullable String startShufflePosition, @Nullable String stopShufflePosition, Coder<T> coder,
-      @Nullable CounterSet.AddCounterMutator addCounterMutator) {
-    this.shuffleReaderConfig = shuffleReaderConfig;
-    this.startShufflePosition = startShufflePosition;
-    this.stopShufflePosition = stopShufflePosition;
-    this.coder = coder;
-    this.addCounterMutator = addCounterMutator;
-  }
-
-  @Override
-  public NativeReaderIterator<T> iterator() throws IOException {
-    Preconditions.checkArgument(shuffleReaderConfig != null);
-    return iterator(new BatchingShuffleEntryReader(
-        new ChunkingShuffleBatchReader(
-            new ApplianceShuffleReader(
-                shuffleReaderConfig,
-                addCounterMutator))));
-  }
-
-  UngroupedShuffleReaderIterator iterator(ShuffleEntryReader reader) {
-    return new UngroupedShuffleReaderIterator(reader);
-  }
-
-  /**
-   * A ReaderIterator that reads from a ShuffleEntryReader and extracts
-   * just the values.
-   */
-  class UngroupedShuffleReaderIterator extends LegacyReaderIterator<T> {
-    Iterator<ShuffleEntry> iterator;
-
-    UngroupedShuffleReaderIterator(ShuffleEntryReader reader) {
-      this.iterator = reader.read(
-          ByteArrayShufflePosition.fromBase64(startShufflePosition),
-          ByteArrayShufflePosition.fromBase64(stopShufflePosition));
-    }
-
-    @Override
-    protected boolean hasNextImpl() throws IOException {
-      return iterator.hasNext();
-    }
-
-    @Override
-    protected T nextImpl() throws IOException {
-      ShuffleEntry record = iterator.next();
-      // Throw away the primary and the secondary keys.
-      byte[] value = record.getValue();
-      notifyElementRead(record.length());
-      return CoderUtils.decodeFromByteArray(coder, value);
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java
deleted file mode 100644
index dbf0bed40360a..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UngroupedShuffleReaderFactory.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.api.client.util.Base64.decodeBase64;
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import javax.annotation.Nullable;
-
-/**
- * Creates an UngroupedShuffleReader from a CloudObject spec.
- */
-public class UngroupedShuffleReaderFactory implements ReaderFactory {
-
-  @Override
-  public NativeReader<?> create(
-      CloudObject spec,
-      @Nullable Coder<?> coder,
-      @Nullable PipelineOptions options,
-      @Nullable ExecutionContext executionContext,
-      @Nullable CounterSet.AddCounterMutator addCounterMutator,
-      @Nullable String operationName)
-      throws Exception {
-    return create(spec, coder, options, addCounterMutator);
-  }
-
-  public <T> UngroupedShuffleReader<T> create(
-      CloudObject spec,
-      Coder<T> coder,
-      PipelineOptions options,
-      CounterSet.AddCounterMutator addCounterMutator) throws Exception {
-    return create(options, spec, coder, addCounterMutator);
-  }
-
-  <T> UngroupedShuffleReader<T> create(
-      PipelineOptions options,
-      CloudObject spec,
-      Coder<T> coder,
-      CounterSet.AddCounterMutator addCounterMutator) throws Exception {
-    return new UngroupedShuffleReader<>(options,
-        decodeBase64(getString(spec, PropertyNames.SHUFFLE_READER_CONFIG)),
-        getString(spec, PropertyNames.START_SHUFFLE_POSITION, null),
-        getString(spec, PropertyNames.END_SHUFFLE_POSITION, null),
-        coder,
-        addCounterMutator);
-  }
-}

From 9be2b2dbf232c6e7c85c44f0a4c3dbebeaf95732 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 8 Feb 2016 09:24:27 -0800
Subject: [PATCH 1405/1541] Move over IO sink factories and dependants

This is for ASF Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114117322
---
 .../worker/ApplianceShuffleWriter.java        |  79 --
 .../sdk/runners/worker/AvroByteSink.java      |  83 ---
 .../sdk/runners/worker/AvroSinkFactory.java   |  88 ---
 .../worker/ChunkingShuffleEntryWriter.java    |  87 ---
 .../dataflow/sdk/runners/worker/IsmSink.java  | 278 -------
 .../sdk/runners/worker/IsmSinkFactory.java    |  71 --
 .../sdk/runners/worker/OrderedCode.java       | 679 ------------------
 .../runners/worker/ShuffleEntryWriter.java    |  38 -
 .../sdk/runners/worker/ShuffleSink.java       | 295 --------
 .../runners/worker/ShuffleSinkFactory.java    |  58 --
 .../sdk/runners/worker/SinkFactory.java       |  43 --
 .../sdk/runners/worker/TextSinkFactory.java   |  53 --
 .../runners/worker/UserCodeTimeTracker.java   | 156 ----
 13 files changed, 2008 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSink.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleEntryWriter.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSink.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkFactory.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleEntryWriter.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactory.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactory.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTracker.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java
deleted file mode 100644
index ca633f00eadc4..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ApplianceShuffleWriter.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-
-import java.io.IOException;
-import javax.annotation.concurrent.ThreadSafe;
-
-/**
- * ApplianceShuffleWriter writes chunks of data to a shuffle dataset.
- *
- * <p>It is a JNI wrapper of an equivalent C++ class.
- */
-@ThreadSafe
-public final class ApplianceShuffleWriter implements ShuffleWriter {
-  static {
-    ShuffleLibrary.load();
-  }
-
-  /**
-   * Pointer to the underlying native shuffle writer code.
-   */
-  private long nativePointer;
-
-  /**
-   * Mutator that can be used to update counters.
-   */
-  private final CounterSet.AddCounterMutator addCounterMutator;
-
-  /**
-   * @param shuffleWriterConfig opaque configuration for creating a
-   * shuffle writer
-   * @param bufferSize the writer buffer size
-   * @param addCounterMutator mutator that can be used to update counters
-   */
-  public ApplianceShuffleWriter(
-      byte[] shuffleWriterConfig,
-      long bufferSize,
-      CounterSet.AddCounterMutator addCounterMutator) {
-    this.nativePointer = createFromConfig(shuffleWriterConfig, bufferSize);
-    this.addCounterMutator = addCounterMutator;
-  }
-
-  @Override
-  public void finalize() {
-    destroy();
-  }
-
-  /**
-   * Native methods for interacting with the underlying native shuffle
-   * writer code.
-   */
-  private native long createFromConfig(byte[] shuffleWriterConfig,
-                                       long bufferSize);
-  private native void destroy();
-
-  public native String getDatasetId();
-
-  @Override
-  public native void write(byte[] chunk) throws IOException;
-
-  @Override
-  public native void close() throws IOException;
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSink.java
deleted file mode 100644
index 2fc0f4d5c30bd..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroByteSink.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericDatumWriter;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-/**
- * A sink that writes Avro files. Records are written to the Avro file as a
- * series of byte arrays. The coder provided is used to serialize each record
- * into a byte array.
- *
- * @param <T> the type of the elements written to the sink
- */
-public class AvroByteSink<T> extends Sink<T> {
-
-  final AvroSink<ByteBuffer> avroSink;
-  final Coder<T> coder;
-  private final Schema schema = Schema.create(Schema.Type.BYTES);
-
-  public AvroByteSink(String filenamePrefix, Coder<T> coder) {
-    this(filenamePrefix, "", "", 1, coder);
-  }
-
-  public AvroByteSink(String filenamePrefix, String shardFormat, String filenameSuffix,
-                      int shardCount, Coder<T> coder) {
-    this.coder = coder;
-    avroSink = new AvroSink<>(
-        filenamePrefix, shardFormat, filenameSuffix, shardCount,
-        WindowedValue.getValueOnlyCoder(AvroCoder.of(ByteBuffer.class, schema)));
-  }
-
-  @Override
-  public SinkWriter<T> writer() throws IOException {
-    return new AvroByteFileWriter();
-  }
-
-  /** The SinkWriter for an AvroByteSink. */
-  class AvroByteFileWriter implements SinkWriter<T> {
-
-    private final SinkWriter<WindowedValue<ByteBuffer>> avroFileWriter;
-
-    public AvroByteFileWriter() throws IOException {
-      avroFileWriter = avroSink.writer(new GenericDatumWriter<ByteBuffer>(schema));
-    }
-
-    @Override
-    public long add(T value) throws IOException {
-      byte[] encodedElem = CoderUtils.encodeToByteArray(coder, value);
-      ByteBuffer encodedBuffer = ByteBuffer.wrap(encodedElem);
-      avroFileWriter.add(WindowedValue.valueInGlobalWindow(encodedBuffer));
-      return encodedElem.length;
-    }
-
-    @Override
-    public void close() throws IOException {
-      avroFileWriter.close();
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
deleted file mode 100644
index bb881da9ec3fe..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSinkFactory.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.ValueOnlyWindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-
-import javax.annotation.Nullable;
-
-/**
- * Creates an {@link AvroSink} from a {@link CloudObject} spec.
- */
-public final class AvroSinkFactory implements SinkFactory {
-
-  @Override
-  public Sink<?> create(
-      CloudObject spec,
-      Coder<?> coder,
-      @Nullable PipelineOptions options,
-      @Nullable ExecutionContext executionContext,
-      @Nullable CounterSet.AddCounterMutator addCounterMutator)
-      throws Exception {
-    // Avro sinks are used both for outputting user data at the end of a pipeline and for
-    // materializing PCollections as intermediate results. It is important to distinguish these
-    // two cases because one requires only the values (outputting with AvroSink) and one requires
-    // the values along with their window and timestamp (materializing intermediate results with
-    // AvroByteSink).
-    //
-    // The logic we would like is "use AvroSink when writing at the end of a pipeline; use
-    // AvroByteSink for materialized results".
-    //
-    // ValueOnlyWindowedValueCoder is used to decode/encode the values read from a Source, and used
-    // to encode the values written to a Sink. FullWindowedValueCoder is used as the coder between
-    // other edges in a Dataflow pipeline graph.
-    //
-    // Checking that the provided coder is an instance of ValueOnlyWindowedValueCoder is almost
-    // enough to identify a user's AvroSink at the end of a pipeline, but it does not eliminate the
-    // case when we are materializing immediately after reading from a Source. If this was the
-    // entire check to decide to use AvroSink, there could be a crash when we materialized the
-    // output of a Source that does not use AvroCoder, such as TextIO with StringUtf8Coder.
-    //
-    // Adding the additional test that the inner value coder is an AvroCoder will eliminate the
-    // TextIO case but will leave sources that, like AvroSource, use AvroCoder to represent their
-    // values. This fixes the potential crash, but still would use AvroSink for intermediate
-    // results immediately after such a Source.
-    //
-    // Luckily, using AvroSink in these cases is safe. Though AvroSink will only encode the value,
-    // and will drop the associated timestamp and window, the dropped values were applied by
-    // ValueOnlyWindowedValueCoder and will be reapplied by the same when the file is re-read by
-    // later in the pipeline.
-    //
-    // Otherwise, this is definitely a materialized result and we should use the AvroByteSink to
-    // include the window and timestamp.
-    //
-    // See AvroReaderFactory#create for the accompanying reader logic.
-    String filename = getString(spec, PropertyNames.FILENAME);
-
-    if (coder instanceof ValueOnlyWindowedValueCoder
-        && ((ValueOnlyWindowedValueCoder) coder).getValueCoder() instanceof AvroCoder) {
-      return new AvroSink(filename, (ValueOnlyWindowedValueCoder<?>) coder);
-    } else {
-      return new AvroByteSink<>(filename, coder);
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleEntryWriter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleEntryWriter.java
deleted file mode 100644
index c8a408d054aff..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ChunkingShuffleEntryWriter.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.api.client.util.Preconditions.checkNotNull;
-
-import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
-
-import java.io.ByteArrayOutputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-
-import javax.annotation.concurrent.NotThreadSafe;
-
-/**
- * ChunkingShuffleEntryWriter buffers ShuffleEntries and writes them
- * in batches to a shuffle dataset using a given writer.
- */
-@NotThreadSafe
-final class ChunkingShuffleEntryWriter implements ShuffleEntryWriter {
-  // Approximate maximum size of a chunk in bytes.
-  private static final int MAX_CHUNK_SIZE = 1 << 20;
-
-  private static final byte[] EMPTY_BYTES = new byte[0];
-
-  private ByteArrayOutputStream chunk = new ByteArrayOutputStream();
-
-  private DataOutputStream output = new DataOutputStream(chunk);
-
-  private final ShuffleWriter writer;
-
-  /**
-   * @param writer used to write chunks created by this writer
-   */
-  public ChunkingShuffleEntryWriter(ShuffleWriter writer) {
-    this.writer = checkNotNull(writer);
-  }
-
-  @Override
-  public void put(ShuffleEntry entry) throws IOException {
-    if (chunk.size() >= MAX_CHUNK_SIZE) {
-      writeChunk();
-    }
-
-    putFixedLengthPrefixedByteArray(entry.getKey(), output);
-    putFixedLengthPrefixedByteArray(entry.getSecondaryKey(), output);
-    putFixedLengthPrefixedByteArray(entry.getValue(), output);
-  }
-
-  @Override
-  public void close() throws IOException {
-    writeChunk();
-    writer.close();
-  }
-
-  private void writeChunk() throws IOException {
-    if (chunk.size() > 0) {
-      writer.write(chunk.toByteArray());
-      chunk.reset();
-      output = new DataOutputStream(chunk);
-    }
-  }
-
-  static void putFixedLengthPrefixedByteArray(byte[] data,
-                                              DataOutputStream output)
-      throws IOException {
-    if (data == null) {
-      data = EMPTY_BYTES;
-    }
-    output.writeInt(data.length);
-    output.write(data, 0, data.length);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSink.java
deleted file mode 100644
index cdb565fff056f..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSink.java
+++ /dev/null
@@ -1,278 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.Footer;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.FooterCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecord;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecordCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmShard;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.KeyPrefix;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.KeyPrefixCoder;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.MimeTypes;
-import com.google.cloud.dataflow.sdk.util.RandomAccessData;
-import com.google.cloud.dataflow.sdk.util.ScalableBloomFilter;
-import com.google.cloud.dataflow.sdk.util.ScalableBloomFilter.ScalableBloomFilterCoder;
-import com.google.cloud.dataflow.sdk.util.VarInt;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Optional;
-import com.google.common.io.CountingOutputStream;
-
-import java.io.IOException;
-import java.nio.channels.Channels;
-import java.nio.channels.WritableByteChannel;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.SortedMap;
-import java.util.TreeMap;
-
-/**
- * A {@link Sink} that writes Ism files.
- *
- * @param <V> the type of the value written to the sink
- */
-public class IsmSink<V> extends Sink<WindowedValue<IsmRecord<V>>> {
-  private static final int BLOCK_SIZE_BYTES = 1024 * 1024;
-  private final String filename;
-  private final IsmRecordCoder<V> coder;
-
-  /**
-   * Produces a sink for the specified {@code filename} and {@code coder}.
-   * See {@link IsmFormat} for encoded format details.
-   */
-  IsmSink(String filename, IsmRecordCoder<V> coder) {
-    IsmFormat.validateCoderIsCompatible(coder);
-    this.filename = filename;
-    this.coder = coder;
-  }
-
-  @Override
-  public SinkWriter<WindowedValue<IsmRecord<V>>> writer() throws IOException {
-    return new IsmSinkWriter(IOChannelUtils.create(filename, MimeTypes.BINARY));
-  }
-
-  // Can be overridden by tests to generate files with smaller block sizes for testing.
-  @VisibleForTesting
-  long getBlockSize() {
-    return BLOCK_SIZE_BYTES;
-  }
-
-  private class IsmSinkWriter implements SinkWriter<WindowedValue<IsmRecord<V>>> {
-    private final CountingOutputStream out;
-    private final RandomAccessData indexOut;
-    private RandomAccessData previousKeyBytes;
-    private Optional<Integer> previousShard;
-    private RandomAccessData currentKeyBytes;
-    private RandomAccessData lastIndexKeyBytes;
-    private long lastIndexedPosition;
-    private long numberOfKeysWritten;
-    private SortedMap<Integer, IsmShard> shardKeyToShardMap;
-    private final ScalableBloomFilter.Builder bloomFilterBuilder;
-
-    /**
-     * Creates an IsmSinkWriter for the given channel.
-     */
-    private IsmSinkWriter(WritableByteChannel channel) {
-      checkNotNull(channel);
-      out = new CountingOutputStream(Channels.newOutputStream(channel));
-      indexOut = new RandomAccessData();
-      previousShard = Optional.absent();
-      previousKeyBytes = new RandomAccessData();
-      currentKeyBytes = new RandomAccessData();
-      lastIndexKeyBytes = new RandomAccessData();
-      bloomFilterBuilder = ScalableBloomFilter.builder();
-      shardKeyToShardMap = new TreeMap<>();
-    }
-
-    @Override
-    public long add(WindowedValue<IsmRecord<V>> windowedRecord) throws IOException {
-      // The windowed portion of the value is ignored.
-      IsmRecord<V> record = windowedRecord.getValue();
-
-      checkArgument(coder.getKeyComponentCoders().size() == record.getKeyComponents().size());
-
-      List<Integer> keyOffsetPositions = new ArrayList<>();
-      final int currentShard =
-          coder.encodeAndHash(record.getKeyComponents(), currentKeyBytes, keyOffsetPositions);
-      // Put each component of the key into the Bloom filter so that we can use the Bloom
-      // filter for key prefix checks.
-      for (Integer offsetPosition : keyOffsetPositions) {
-        bloomFilterBuilder.put(currentKeyBytes.array(), 0, offsetPosition);
-      }
-
-      // If we are moving to another shard, finish outputting the last shard.
-      if (previousShard.isPresent() && currentShard != previousShard.get()) {
-        // We reset last shard to be empty.
-        finishShard();
-      }
-
-      long currentPosition = out.getCount();
-
-      // If we are doing a reset because the shard number is changing we
-      // assume 0 bytes are saved from the previous key.
-      int sharedKeySize;
-      if (!previousShard.isPresent()) {
-        sharedKeySize = 0;
-        // Create a new shard record for the current value being output validating
-        // that we have never seen this shard before.
-        IsmShard ismShard = IsmShard.of(currentShard, currentPosition);
-        checkState(shardKeyToShardMap.put(currentShard, ismShard) == null,
-            "Unexpected insertion of keys %s for shard which already exists %s. "
-            + "Ism files expect that all shards are written contiguously.",
-            record.getKeyComponents(), ismShard);
-      } else {
-        sharedKeySize = commonPrefixLengthWithOrderCheck(previousKeyBytes, currentKeyBytes);
-      }
-
-      // Put key-value mapping record into block buffer
-      int unsharedKeySize = currentKeyBytes.size() - sharedKeySize;
-      KeyPrefix keyPrefix = new KeyPrefix(sharedKeySize, unsharedKeySize);
-      KeyPrefixCoder.of().encode(keyPrefix, out, Context.NESTED);
-      currentKeyBytes.writeTo(out, sharedKeySize, unsharedKeySize);
-      if (IsmFormat.isMetadataKey(record.getKeyComponents())) {
-        ByteArrayCoder.of().encode(record.getMetadata(), out, Context.NESTED);
-      } else {
-        coder.getValueCoder().encode(record.getValue(), out, Context.NESTED);
-      }
-
-      // If we have emitted enough bytes to add another entry into the index
-      if (out.getCount() > lastIndexedPosition + getBlockSize()) {
-        int sharedIndexKeySize =
-            commonPrefixLengthWithOrderCheck(lastIndexKeyBytes, currentKeyBytes);
-        int unsharedIndexKeySize = currentKeyBytes.size() - sharedIndexKeySize;
-        KeyPrefix indexKeyPrefix = new KeyPrefix(sharedIndexKeySize, unsharedIndexKeySize);
-        KeyPrefixCoder.of().encode(indexKeyPrefix, indexOut.asOutputStream(), Context.NESTED);
-        currentKeyBytes.writeTo(
-            indexOut.asOutputStream(), sharedIndexKeySize, unsharedIndexKeySize);
-        VarInt.encode(currentPosition, indexOut.asOutputStream());
-        lastIndexKeyBytes.resetTo(0);
-        currentKeyBytes.writeTo(lastIndexKeyBytes.asOutputStream(), 0, currentKeyBytes.size());
-        lastIndexedPosition = out.getCount();
-      }
-
-      // Remember the shard for the current key.
-      previousShard = Optional.of(currentShard);
-
-      // Swap the current key and the previous key, resetting the previous key to be re-used.
-      RandomAccessData temp = previousKeyBytes;
-      previousKeyBytes = currentKeyBytes;
-      currentKeyBytes = temp;
-      currentKeyBytes.resetTo(0);
-
-      numberOfKeysWritten += 1;
-      return out.getCount() - currentPosition;
-    }
-
-    /**
-     * Compute the length of the common prefix of the previous key and the given key
-     * and perform a key order check. We check that the currently being inserted key
-     * is strictly greater than the previous key.
-     */
-    private int commonPrefixLengthWithOrderCheck(
-        RandomAccessData prevKeyBytes, RandomAccessData currentKeyBytes) {
-      int offset = RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR
-          .commonPrefixLength(prevKeyBytes, currentKeyBytes);
-      int compare = RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR
-          .compare(prevKeyBytes, currentKeyBytes, offset);
-      if (compare < 0) {
-        return offset;
-      } else if (compare == 0) {
-        throw new IllegalArgumentException(IsmSinkWriter.class.getSimpleName()
-            + " expects keys to be written in strictly increasing order but was given "
-            + prevKeyBytes + " as the previous key and " + currentKeyBytes
-            + " as the current key. Expected " + prevKeyBytes.array()[offset + 1] + " <= "
-            + currentKeyBytes.array()[offset + 1] + " at position " + (offset + 1) + ".");
-      } else {
-        throw new IllegalArgumentException(IsmSinkWriter.class.getSimpleName()
-            + " expects keys to be written in strictly increasing order but was given "
-            + prevKeyBytes + " as the previous key and " + currentKeyBytes
-            + " as the current key. Expected length of previous key " + prevKeyBytes.size()
-            + " <= " + currentKeyBytes.size() + " to current key.");
-      }
-    }
-
-    /**
-     * Outputs the end of a shard. This is done by:
-     * <ul>
-     *   <li>updating the shard index for the current shard with the index offset</li>
-     *   <li>writing out the index for the shard</li>
-     *   <li>resetting the last indexed position</li>
-     *   <li>forgetting the last shard</li>
-     * </ul>
-     */
-    private void finishShard() throws IOException {
-      // Update the last shard record as to the position of the index.
-      IsmShard ismShard = shardKeyToShardMap.get(previousShard.get());
-      shardKeyToShardMap.put(
-          previousShard.get(), ismShard.withIndexOffset(out.getCount()));
-
-      indexOut.writeTo(out, 0, indexOut.size());
-      indexOut.resetTo(0);
-
-      // Reset the last indexed position to here.
-      lastIndexedPosition = out.getCount();
-      lastIndexKeyBytes = new RandomAccessData();
-
-      // Clear the last shard.
-      previousShard = Optional.absent();
-    }
-
-    /**
-     * Completes the construction of the Ism file. This is done by:
-     * <ul>
-     *   <li>finishing the last shard if present</li>
-     *   <li>writing out the Bloom filter</li>
-     *   <li>writing out the shard index</li>
-     *   <li>writing out the footer</li>
-     * </ul>
-     *
-     * @throws IOException if an underlying write fails
-     */
-    private void finish() throws IOException {
-      // Update the last shard if at least one element was written.
-      if (previousShard.isPresent()) {
-        finishShard();
-      }
-
-      long startOfBloomFilter = out.getCount();
-      ScalableBloomFilterCoder.of().encode(bloomFilterBuilder.build(), out, Context.NESTED);
-
-      long startOfIndex = out.getCount();
-      IsmFormat.ISM_SHARD_INDEX_CODER.encode(
-          new ArrayList<>(shardKeyToShardMap.values()), out, Context.NESTED);
-
-      FooterCoder.of().encode(new Footer(startOfIndex, startOfBloomFilter, numberOfKeysWritten),
-          out, Coder.Context.OUTER);
-    }
-
-    @Override
-    public void close() throws IOException {
-      finish();
-      out.close();
-    }
-  }
-}
-
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkFactory.java
deleted file mode 100644
index 03ce0327c8264..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmSinkFactory.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecord;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecordCoder;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-
-import javax.annotation.Nullable;
-
-/**
- * Creates an {@link IsmSink} from a {@link CloudObject} spec. Note that it is invalid to use a
- * non {@link IsmRecordCoder} with this sink factory.
- */
-public class IsmSinkFactory implements SinkFactory {
-
-  @Override
-  public Sink<?> create(
-      CloudObject spec,
-      @Nullable Coder<?> coder,
-      @Nullable PipelineOptions options,
-      @Nullable ExecutionContext executionContext,
-      @Nullable CounterSet.AddCounterMutator addCounterMutator) throws Exception {
-
-    // The validity of this coder is checked in detail by the typed create, below
-    @SuppressWarnings("unchecked")
-    Coder<WindowedValue<IsmRecord<Object>>> typedCoder =
-        (Coder<WindowedValue<IsmRecord<Object>>>) coder;
-
-    String filename = getString(spec, PropertyNames.FILENAME);
-
-    checkArgument(typedCoder instanceof WindowedValueCoder,
-        "%s only supports using %s but got %s.", IsmSink.class, WindowedValueCoder.class,
-        typedCoder);
-    WindowedValueCoder<IsmRecord<Object>> windowedCoder =
-        (WindowedValueCoder<IsmRecord<Object>>) typedCoder;
-
-    checkArgument(windowedCoder.getValueCoder() instanceof IsmRecordCoder,
-        "%s only supports using %s but got %s.",
-        IsmSink.class, IsmRecordCoder.class, windowedCoder.getValueCoder());
-    @SuppressWarnings("unchecked")
-    IsmRecordCoder<Object> ismCoder =
-        (IsmRecordCoder<Object>) windowedCoder.getValueCoder();
-
-    return new IsmSink<>(filename, ismCoder);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java
deleted file mode 100644
index 37f8a1be2ef93..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/OrderedCode.java
+++ /dev/null
@@ -1,679 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.common.math.LongMath;
-import com.google.common.primitives.Longs;
-
-import java.math.RoundingMode;
-import java.util.ArrayList;
-import java.util.Arrays;
-
-/**
- * This module provides routines for encoding a sequence of typed
- * entities into a byte array.  The resulting byte arrays can be
- * lexicographically compared to yield the same comparison value that
- * would have been generated if the encoded items had been compared
- * one by one according to their type.
- *
- * <p>More precisely, suppose:
- * <ol>
- *  <li> byte array A is generated by encoding the sequence of items [A_1..A_n]
- *  <li> byte array B is generated by encoding the sequence of items [B_1..B_n]
- *  <li> The types match; i.e., for all i: A_i was encoded using
- *     the same routine as B_i
- * </ol>
- * Then:
- *    Comparing A vs. B lexicographically is the same as comparing
- *    the vectors [A_1..A_n] and [B_1..B_n] lexicographically.
- *
- * <p><b>This class is NOT thread safe.</b>
- */
-public class OrderedCode {
-  // We want to encode a few extra symbols in strings:
-  //      <sep>           Separator between items
-  //      <infinity>      Infinite string
-  //
-  // Therefore we need an alphabet with at least 258 characters.  We
-  // achieve this by using two-letter sequences starting with '\0' and '\xff'
-  // as extra symbols:
-  //      <sep>           encoded as =>           \0\1
-  //      \0              encoded as =>           \0\xff
-  //      \xff            encoded as =>           \xff\x00
-  //      <infinity>      encoded as =>           \xff\xff
-  //
-  // The remaining two letter sequences starting with '\0' and '\xff'
-  // are currently unused.
-
-  public static final byte ESCAPE1        = 0x00;
-  public static final byte NULL_CHARACTER =
-      (byte) 0xff;                                  // Combined with ESCAPE1
-  public static final byte SEPARATOR      = 0x01;   // Combined with ESCAPE1
-
-  public static final byte ESCAPE2        = (byte) 0xff;
-  public static final byte INFINITY       =
-      (byte) 0xff;                                  // Combined with ESCAPE2
-  public static final byte FF_CHARACTER   = 0x00;   // Combined with ESCAPE2
-
-  public static final byte[] ESCAPE1_SEPARATOR = { ESCAPE1, SEPARATOR };
-
-  public static final byte[] INFINITY_ENCODED = { ESCAPE2, INFINITY };
-
-  /**
-   * This array maps encoding length to header bits in the first two bytes for
-   * SignedNumIncreasing encoding.
-   */
-  private static final byte[][] LENGTH_TO_HEADER_BITS = {
-    { 0, 0 },
-    { (byte) 0x80, 0 },
-    { (byte) 0xc0, 0 },
-    { (byte) 0xe0, 0 },
-    { (byte) 0xf0, 0 },
-    { (byte) 0xf8, 0 },
-    { (byte) 0xfc, 0 },
-    { (byte) 0xfe, 0 },
-    { (byte) 0xff, 0 },
-    { (byte) 0xff, (byte) 0x80 },
-    { (byte) 0xff, (byte) 0xc0 }
-  };
-
-  /**
-   * This array maps encoding lengths to the header bits that overlap with
-   * the payload and need fixing during readSignedNumIncreasing.
-   */
-  private static final long[] LENGTH_TO_MASK = {
-    0L,
-    0x80L,
-    0xc000L,
-    0xe00000L,
-    0xf0000000L,
-    0xf800000000L,
-    0xfc0000000000L,
-    0xfe000000000000L,
-    0xff00000000000000L,
-    0x8000000000000000L,
-    0L
-  };
-
-  /**
-   * This array maps the number of bits in a number to the encoding
-   * length produced by WriteSignedNumIncreasing.
-   * For positive numbers, the number of bits is 1 plus the most significant
-   * bit position (the highest bit position in a positive long is 63).
-   * For a negative number n, we count the bits in ~n.
-   * That is, length = BITS_TO_LENGTH[log2Floor(n < 0 ? ~n : n) + 1].
-   */
-  private static final short[] BITS_TO_LENGTH = {
-    1, 1, 1, 1, 1, 1, 1,
-    2, 2, 2, 2, 2, 2, 2,
-    3, 3, 3, 3, 3, 3, 3,
-    4, 4, 4, 4, 4, 4, 4,
-    5, 5, 5, 5, 5, 5, 5,
-    6, 6, 6, 6, 6, 6, 6,
-    7, 7, 7, 7, 7, 7, 7,
-    8, 8, 8, 8, 8, 8, 8,
-    9, 9, 9, 9, 9, 9, 9,
-    10
-  };
-
-  /**
-   * stores the current encoded value as a list of byte arrays. Note that this
-   * is manipulated as we read/write items.
-   * Note that every item will fit on at most one array. One array may
-   * have more than one item (eg when used for decoding). While encoding,
-   * one array will have exactly one item. While returning the encoded array
-   * we will merge all the arrays in this list.
-   */
-  private final ArrayList<byte[]> encodedArrays = new ArrayList<>();
-
-  /**
-   * This is the current position on the first array. Will be non-zero
-   * only if the ordered code was created using encoded byte array.
-   */
-  private int firstArrayPosition = 0;
-
-  /**
-   * Creates OrderedCode from scractch. Typically used at encoding time.
-   */
-  public OrderedCode(){
-  }
-
-  /**
-   * Creates OrderedCode from a given encoded byte array. Typically used at
-   * decoding time.
-   *
-   * <p><b> For better performance, it uses the input array provided (not a copy).
-   * Therefore the input array should not be modified.</b>
-   */
-  public OrderedCode(byte[] encodedByteArray) {
-    encodedArrays.add(encodedByteArray);
-  }
-
-  /**
-   * Adds the given byte array item to the OrderedCode. It encodes the input
-   * byte array, followed by a separator and appends the result to its
-   * internal encoded byte array store.
-   *
-   * <p>It works with the input array,
-   * so the input array 'value' should not be modified till the method returns.
-   *
-   * @param value bytes to be written.
-   * @see #readBytes()
-   */
-  public void writeBytes(byte[] value) {
-    // Determine the length of the encoded array
-    int encodedLength = 2;      // for separator
-    for (byte b : value) {
-      if ((b == ESCAPE1) || (b == ESCAPE2)) {
-        encodedLength += 2;
-      } else {
-        encodedLength++;
-      }
-    }
-
-    byte[] encodedArray = new byte[encodedLength];
-    int copyStart = 0;
-    int outIndex = 0;
-    for (int i = 0; i < value.length; i++) {
-      byte b = value[i];
-      if (b == ESCAPE1) {
-        System.arraycopy(value, copyStart, encodedArray, outIndex,
-                         i - copyStart);
-        outIndex += i - copyStart;
-        encodedArray[outIndex++] = ESCAPE1;
-        encodedArray[outIndex++] = NULL_CHARACTER;
-        copyStart = i + 1;
-      } else if (b == ESCAPE2) {
-        System.arraycopy(value, copyStart, encodedArray, outIndex,
-                         i - copyStart);
-        outIndex += i - copyStart;
-        encodedArray[outIndex++] = ESCAPE2;
-        encodedArray[outIndex++] = FF_CHARACTER;
-        copyStart = i + 1;
-      }
-    }
-    if (copyStart < value.length) {
-      System.arraycopy(value, copyStart, encodedArray, outIndex,
-          value.length - copyStart);
-      outIndex += value.length - copyStart;
-    }
-    encodedArray[outIndex++] = ESCAPE1;
-    encodedArray[outIndex] = SEPARATOR;
-
-    encodedArrays.add(encodedArray);
-  }
-
-  /**
-   * Encodes the long item, in big-endian format, and appends the result to its
-   * internal encoded byte array store.
-   *
-   * <p>Note that the specified long is treated like a uint64, e.g.
-   * {@code new OrderedCode().writeNumIncreasing(-1L).getEncodedBytes()}
-   * is greater than
-   * {@code new OrderedCode().writeNumIncreasing(Long.MAX_VALUE).getEncodedBytes()}.
-   *
-   * @see #readNumIncreasing()
-   */
-  public void writeNumIncreasing(long value) {
-    // Values are encoded with a single byte length prefix, followed
-    // by the actual value in big-endian format with leading 0 bytes
-    // dropped.
-    byte[] bufer = new byte[9];  // 8 bytes for value plus one byte for length
-    int len = 0;
-    while (value != 0) {
-      len++;
-      bufer[9 - len] = (byte) (value & 0xff);
-      value >>>= 8;
-    }
-    bufer[9 - len - 1] = (byte) len;
-    len++;
-    byte[] encodedArray = new byte[len];
-    System.arraycopy(bufer, 9 - len, encodedArray, 0, len);
-    encodedArrays.add(encodedArray);
-  }
-
-  /**
-   * Return floor(log2(n)) for positive integer n.  Returns -1 iff n == 0.
-   */
-  int log2Floor(long n) {
-    if (n < 0) {
-      throw new IllegalArgumentException("must be non-negative");
-    }
-    return n == 0 ? -1 : LongMath.log2(n, RoundingMode.FLOOR);
-  }
-
-  /**
-   * Calculates the encoding length in bytes of the signed number n.
-   */
-  int getSignedEncodingLength(long n) {
-    return BITS_TO_LENGTH[log2Floor(n < 0 ? ~n : n) + 1];
-  }
-
-  /**
-   * Encodes the long item, in big-endian format, and appends the result to its
-   * internal encoded byte array store.
-   *
-   * <p>Note that the specified long is treated like an int64, i.e.
-   * {@code new OrderedCode().writeNumIncreasing(-1L).getEncodedBytes()}
-   * is less than
-   * {@code new OrderedCode().writeNumIncreasing(0L).getEncodedBytes()}.
-   *
-   * @see #readSignedNumIncreasing()
-   */
-  public void writeSignedNumIncreasing(long val) {
-    long x = val < 0 ? ~val : val;
-    if (x < 64) {  // Fast path for encoding length == 1.
-      byte[] encodedArray =
-          new byte[] { (byte) (LENGTH_TO_HEADER_BITS[1][0] ^ val) };
-      encodedArrays.add(encodedArray);
-      return;
-    }
-    // buf = val in network byte order, sign extended to 10 bytes.
-    byte signByte = val < 0 ? (byte) 0xff : 0;
-    byte[] buf = new byte[2 + Longs.BYTES];
-    buf[0] = buf[1] = signByte;
-    System.arraycopy(Longs.toByteArray(val), 0, buf, 2, Longs.BYTES);
-    int len = getSignedEncodingLength(x);
-    if (len < 2) {
-      throw new IllegalStateException(
-          "Invalid length (" + len + ")" +
-          " returned by getSignedEncodingLength(" + x + ")");
-    }
-    int beginIndex = buf.length - len;
-    buf[beginIndex] ^= LENGTH_TO_HEADER_BITS[len][0];
-    buf[beginIndex + 1] ^= LENGTH_TO_HEADER_BITS[len][1];
-
-    byte[] encodedArray = new byte[len];
-    System.arraycopy(buf, beginIndex, encodedArray, 0, len);
-    encodedArrays.add(encodedArray);
-  }
-
-  /**
-   * Encodes and appends INFINITY item to its internal encoded byte array
-   * store.
-   *
-   * @see #readInfinity()
-   */
-  public void writeInfinity() {
-    writeTrailingBytes(INFINITY_ENCODED);
-  }
-
-  /**
-   * Appends the byte array item to its internal encoded byte array
-   * store. This is used for the last item and is not encoded.  It
-   * also can be used to write a fixed number of bytes that will be
-   * read back using {@link #readBytes(int)}.
-   *
-   *
-   * <p>It stores the input array in the store,
-   * so the input array 'value' should not be modified.
-   *
-   * @param value bytes to be written.
-   * @see #readTrailingBytes()
-   * @see #readBytes(int)
-   */
-  public void writeTrailingBytes(byte[] value) {
-    if ((value == null) || (value.length == 0)) {
-      throw new IllegalArgumentException(
-          "Value cannot be null or have 0 elements");
-    }
-
-    encodedArrays.add(value);
-  }
-
-  /**
-   * Returns the next byte array item from its encoded byte array store and
-   * removes the item from the store.
-   *
-   * @see #writeBytes(byte[])
-   */
-  public byte[] readBytes() {
-    if ((encodedArrays == null) || (encodedArrays.size() == 0) ||
-        ((encodedArrays.get(0)).length - firstArrayPosition <= 0)) {
-      throw new IllegalArgumentException("Invalid encoded byte array");
-    }
-
-    // Determine the length of the decoded array
-    // We only scan up to "length-2" since a valid string must end with
-    // a two character terminator: 'ESCAPE1 SEPARATOR'
-    byte[] store = encodedArrays.get(0);
-    int decodedLength = 0;
-    boolean valid = false;
-    int i = firstArrayPosition;
-    while (i < store.length - 1) {
-      byte b = store[i++];
-      if (b == ESCAPE1) {
-        b = store[i++];
-        if (b == SEPARATOR) {
-          valid = true;
-          break;
-        } else if (b == NULL_CHARACTER) {
-          decodedLength++;
-        } else {
-          throw new IllegalArgumentException("Invalid encoded byte array");
-        }
-      } else if (b == ESCAPE2) {
-        b = store[i++];
-        if (b == FF_CHARACTER) {
-          decodedLength++;
-        } else {
-          throw new IllegalArgumentException("Invalid encoded byte array");
-        }
-      } else {
-        decodedLength++;
-      }
-    }
-    if (!valid) {
-      throw new IllegalArgumentException("Invalid encoded byte array");
-    }
-
-    byte[] decodedArray = new byte[decodedLength];
-    int copyStart = firstArrayPosition;
-    int outIndex = 0;
-    int j = firstArrayPosition;
-    while (j < store.length - 1) {
-      byte b = store[j++];   // note that j has been incremented
-      if (b == ESCAPE1) {
-        System.arraycopy(store, copyStart, decodedArray, outIndex,
-                         j - copyStart - 1);
-        outIndex += j - copyStart - 1;
-        // ESCAPE1 SEPARATOR ends component
-        // ESCAPE1 NULL_CHARACTER represents '\0'
-        b = store[j++];
-        if (b == SEPARATOR) {
-          if ((store.length - j) == 0) {
-            // we are done with the first array
-            encodedArrays.remove(0);
-            firstArrayPosition = 0;
-          } else {
-            firstArrayPosition = j;
-          }
-          return decodedArray;
-        } else if (b == NULL_CHARACTER) {
-          decodedArray[outIndex++] = 0x00;
-        }   // else not required - handled during length determination
-        copyStart = j;
-      } else if (b == ESCAPE2) {
-        System.arraycopy(store, copyStart, decodedArray, outIndex,
-                         j - copyStart - 1);
-        outIndex += j - copyStart - 1;
-        // ESCAPE2 FF_CHARACTER represents '\xff'
-        // ESCAPE2 INFINITY is an error
-        b = store[j++];
-        if (b == FF_CHARACTER) {
-          decodedArray[outIndex++] = (byte) 0xff;
-        }   // else not required - handled during length determination
-        copyStart = j;
-      }
-    }
-    // not required due to the first phase, but need to entertain the compiler
-    throw new IllegalArgumentException("Invalid encoded byte array");
-  }
-
-  /**
-   * Returns the next long item (encoded in big-endian format via
-   * {@code writeNumIncreasing(long)}) from its internal encoded byte array
-   * store and removes the item from the store.
-   *
-   * @see #writeNumIncreasing(long)
-   */
-  public long readNumIncreasing() {
-    if ((encodedArrays == null) || (encodedArrays.size() == 0) ||
-        ((encodedArrays.get(0)).length - firstArrayPosition < 1)) {
-      throw new IllegalArgumentException("Invalid encoded byte array");
-    }
-
-    byte[] store = encodedArrays.get(0);
-    // Decode length byte
-    int len = store[firstArrayPosition];
-    if ((firstArrayPosition + len + 1 > store.length) || len > 8) {
-      throw new IllegalArgumentException("Invalid encoded byte array");
-    }
-
-    long result = 0;
-    for (int i = 0; i < len; i++) {
-      result <<= 8;
-      result |= (store[firstArrayPosition + i + 1] & 0xff);
-    }
-
-    if ((store.length - firstArrayPosition - len - 1) == 0) {
-      // we are done with the first array
-      encodedArrays.remove(0);
-      firstArrayPosition = 0;
-    } else {
-      firstArrayPosition = firstArrayPosition + len + 1;
-    }
-
-    return result;
-  }
-
-  /**
-   * Returns the next long item (encoded via
-   * {@code writeSignedNumIncreasing(long)}) from its internal encoded byte
-   * array store and removes the item from the store.
-   *
-   * @see #writeSignedNumIncreasing(long)
-   */
-  public long readSignedNumIncreasing() {
-    if ((encodedArrays == null) || (encodedArrays.size() == 0) ||
-        ((encodedArrays.get(0)).length - firstArrayPosition < 1)) {
-      throw new IllegalArgumentException("Invalid encoded byte array");
-    }
-
-    byte[] store = encodedArrays.get(0);
-
-    long xorMask = ((store[firstArrayPosition] & 0x80) == 0) ? ~0L : 0L;
-    // Store first byte as an int rather than a (signed) byte -- to avoid
-    // accidental byte-to-int promotion later, which would extend the byte's
-    // sign bit (if any).
-    int firstByte =
-        (store[firstArrayPosition] & 0xff) ^ (int) (xorMask & 0xff);
-
-    // Now calculate and test length, and set x to raw (unmasked) result.
-    int len;
-    long x;
-    if (firstByte != 0xff) {
-      len = 7 - log2Floor(firstByte ^ 0xff);
-      if (store.length - firstArrayPosition < len) {
-        throw new IllegalArgumentException("Invalid encoded byte array");
-      }
-      x = xorMask;  // Sign extend using xorMask.
-      for (int i = firstArrayPosition; i < firstArrayPosition + len; i++) {
-        x = (x << 8) | (store[i] & 0xff);
-      }
-    } else {
-      len = 8;
-      if (store.length - firstArrayPosition < len) {
-        throw new IllegalArgumentException("Invalid encoded byte array");
-      }
-      int secondByte =
-          (store[firstArrayPosition + 1] & 0xff) ^ (int) (xorMask & 0xff);
-      if (secondByte >= 0x80) {
-        if (secondByte < 0xc0) {
-          len = 9;
-        } else {
-          int thirdByte =
-              (store[firstArrayPosition + 2] & 0xff) ^ (int) (xorMask & 0xff);
-          if (secondByte == 0xc0 && thirdByte < 0x80) {
-            len = 10;
-          } else {
-            // Either len > 10 or len == 10 and #bits > 63.
-            throw new IllegalArgumentException("Invalid encoded byte array");
-          }
-        }
-        if (store.length - firstArrayPosition < len) {
-          throw new IllegalArgumentException("Invalid encoded byte array");
-        }
-      }
-      x = Longs.fromByteArray(Arrays.copyOfRange(
-          store, firstArrayPosition + len - 8, firstArrayPosition + len));
-    }
-
-    x ^= LENGTH_TO_MASK[len];  // Remove spurious header bits.
-
-    if (len != getSignedEncodingLength(x)) {
-      throw new IllegalArgumentException("Invalid encoded byte array");
-    }
-
-    if ((store.length - firstArrayPosition - len) == 0) {
-      // We are done with the first array.
-      encodedArrays.remove(0);
-      firstArrayPosition = 0;
-    } else {
-      firstArrayPosition = firstArrayPosition + len;
-    }
-
-    return x;
-  }
-
-  /**
-   * Removes INFINITY item from its internal encoded byte array store
-   * if present.  Returns whether INFINITY was present.
-   *
-   * @see #writeInfinity()
-   */
-  public boolean readInfinity() {
-    if ((encodedArrays == null) || (encodedArrays.size() == 0) ||
-        ((encodedArrays.get(0)).length - firstArrayPosition < 1)) {
-      throw new IllegalArgumentException("Invalid encoded byte array");
-    }
-    byte[] store = encodedArrays.get(0);
-    if (store.length - firstArrayPosition < 2) {
-      return false;
-    }
-    if ((store[firstArrayPosition] == ESCAPE2) &&
-        (store[firstArrayPosition +  1] == INFINITY)) {
-      if ((store.length - firstArrayPosition - 2) == 0) {
-        // we are done with the first array
-        encodedArrays.remove(0);
-        firstArrayPosition = 0;
-      } else {
-        firstArrayPosition = firstArrayPosition + 2;
-      }
-      return true;
-    } else {
-      return false;
-    }
-  }
-
-  /**
-   * Returns the trailing byte array item from its internal encoded byte array
-   * store and removes the item from the store.
-   *
-   * @see #writeTrailingBytes(byte[])
-   */
-  public byte[] readTrailingBytes() {
-    // one item is contained within one byte array
-    if ((encodedArrays == null) || (encodedArrays.size() != 1)) {
-      throw new IllegalArgumentException("Invalid encoded byte array");
-    }
-
-    byte[] store = encodedArrays.get(0);
-    encodedArrays.remove(0);
-    assert encodedArrays.size() == 0;
-    return Arrays.copyOfRange(store, firstArrayPosition, store.length);
-  }
-
-  /**
-   * Reads (unencoded) {@code len} bytes.
-   *
-   * @see #writeTrailingBytes(byte[])
-   */
-  public byte[] readBytes(int len) {
-    if ((encodedArrays == null) || (encodedArrays.size() == 0) ||
-        ((encodedArrays.get(0)).length - firstArrayPosition < len)) {
-      throw new IllegalArgumentException("Invalid encoded byte array");
-    }
-
-    byte[] store = encodedArrays.get(0);
-
-    byte[] result;
-    if (store.length - firstArrayPosition == len) {
-      // We are done with the first array.
-      result = encodedArrays.remove(0);
-      firstArrayPosition = 0;
-    } else {
-      result = new byte[len];
-      System.arraycopy(store, firstArrayPosition, result, 0, len);
-      firstArrayPosition = firstArrayPosition + len;
-    }
-    return result;
-  }
-
-  /**
-   * Returns the encoded bytes that represent the current state of the
-   * OrderedCode.
-   *
-   *
-   * <p><b> NOTE: This method returns OrederedCode's internal array (not a
-   * copy) for better performance. Therefore the returned array should not be
-   * modified.</b>
-   */
-  public byte[] getEncodedBytes() {
-    if (encodedArrays.size() == 0) {
-      return new byte[0];
-    }
-    if ((encodedArrays.size() == 1) && (firstArrayPosition == 0)) {
-      return encodedArrays.get(0);
-    }
-
-    int totalLength = 0;
-
-    for (int i = 0; i < encodedArrays.size(); i++) {
-      byte[] bytes = encodedArrays.get(i);
-      if (i == 0) {
-        totalLength += bytes.length - firstArrayPosition;
-      } else {
-        totalLength += bytes.length;
-      }
-    }
-
-    byte[] encodedBytes = new byte[totalLength];
-    int destPos = 0;
-    for (int i = 0; i < encodedArrays.size(); i++) {
-      byte[] bytes = encodedArrays.get(i);
-      if (i == 0) {
-        System.arraycopy(bytes, firstArrayPosition, encodedBytes, destPos,
-            bytes.length - firstArrayPosition);
-        destPos += bytes.length - firstArrayPosition;
-      } else {
-        System.arraycopy(bytes, 0, encodedBytes, destPos, bytes.length);
-        destPos += bytes.length;
-      }
-    }
-
-    // replace the store with merged array, so that repeated calls
-    // don't need to merge. The reads can handle both the versions.
-    encodedArrays.clear();
-    encodedArrays.add(encodedBytes);
-    firstArrayPosition = 0;
-
-    return encodedBytes;
-  }
-
-  /**
-   * Returns true if this has more encoded bytes that haven't been read,
-   * false otherwise.  Return value of true doesn't imply anything about
-   * validity of remaining data.
-   * @return true if it has more encoded bytes that haven't been read,
-   * false otherwise.
-   */
-  public boolean hasRemainingEncodedBytes() {
-    // We delete an array after fully consuming it.
-    return encodedArrays != null && encodedArrays.size() != 0;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleEntryWriter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleEntryWriter.java
deleted file mode 100644
index bf1c21fed6fe0..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleEntryWriter.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
-
-import java.io.IOException;
-
-import javax.annotation.concurrent.NotThreadSafe;
-
-/**
- * ShuffleEntryWriter provides an interface for writing key/value
- * entries to a shuffle dataset.
- */
-@NotThreadSafe
-interface ShuffleEntryWriter extends AutoCloseable {
-  /**
-   * Writes an entry to a shuffle dataset.
-   */
-  public void put(ShuffleEntry entry) throws IOException;
-
-  @Override
-  public void close() throws IOException;
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
deleted file mode 100644
index ea538db1a4b4e..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSink.java
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-
-import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
-import com.google.cloud.dataflow.sdk.coders.InstantCoder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.options.DataflowWorkerHarnessOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.ShuffleEntry;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.base.Preconditions;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-
-/**
- * A sink that writes to a shuffle dataset.
- *
- * @param <T> the type of the elements written to the sink
- */
-public class ShuffleSink<T> extends Sink<WindowedValue<T>> {
-  enum ShuffleKind {
-    UNGROUPED,
-    PARTITION_KEYS,
-    GROUP_KEYS,
-    GROUP_KEYS_AND_SORT_VALUES
-  }
-
-  static final long SHUFFLE_WRITER_BUFFER_SIZE = 128 << 20;
-
-  final byte[] shuffleWriterConfig;
-
-  final ShuffleKind shuffleKind;
-
-  final PipelineOptions options;
-
-  final CounterSet.AddCounterMutator addCounterMutator;
-
-  boolean shardByKey;
-  boolean groupValues;
-  boolean sortValues;
-
-  WindowedValueCoder<T> windowedElemCoder;
-  WindowedValueCoder windowedValueCoder;
-  Coder<T> elemCoder;
-  Coder keyCoder;
-  Coder valueCoder;
-  Coder sortKeyCoder;
-  Coder sortValueCoder;
-
-  public static ShuffleKind parseShuffleKind(String shuffleKind) throws Exception {
-    try {
-      return Enum.valueOf(ShuffleKind.class, shuffleKind.trim().toUpperCase());
-    } catch (IllegalArgumentException e) {
-      throw new Exception("unexpected shuffle_kind", e);
-    }
-  }
-
-  public ShuffleSink(PipelineOptions options, byte[] shuffleWriterConfig, ShuffleKind shuffleKind,
-      Coder<WindowedValue<T>> coder, CounterSet.AddCounterMutator addCounterMutator)
-      throws Exception {
-    this.shuffleWriterConfig = shuffleWriterConfig;
-    this.shuffleKind = shuffleKind;
-    this.options = options;
-    this.addCounterMutator = addCounterMutator;
-    initCoder(coder);
-  }
-
-  private void initCoder(Coder<WindowedValue<T>> coder) throws Exception {
-    switch (shuffleKind) {
-      case UNGROUPED:
-        this.shardByKey = false;
-        this.groupValues = false;
-        this.sortValues = false;
-        break;
-      case PARTITION_KEYS:
-        this.shardByKey = true;
-        this.groupValues = false;
-        this.sortValues = false;
-        break;
-      case GROUP_KEYS:
-        this.shardByKey = true;
-        this.groupValues = true;
-        this.sortValues = false;
-        break;
-      case GROUP_KEYS_AND_SORT_VALUES:
-        this.shardByKey = true;
-        this.groupValues = true;
-        this.sortValues = true;
-        break;
-      default:
-        throw new AssertionError("unexpected shuffle kind");
-    }
-
-    this.windowedElemCoder = (WindowedValueCoder<T>) coder;
-    this.elemCoder = windowedElemCoder.getValueCoder();
-    if (shardByKey) {
-      if (!(elemCoder instanceof KvCoder)) {
-        throw new Exception("unexpected kind of coder for elements written to "
-            + "a key-grouping shuffle");
-      }
-      KvCoder<?, ?> kvCoder = (KvCoder<?, ?>) elemCoder;
-      this.keyCoder = kvCoder.getKeyCoder();
-      this.valueCoder = kvCoder.getValueCoder();
-      if (sortValues) {
-        // TODO: Decide the representation of sort-keyed values.
-        // For now, we'll just use KVs.
-        if (!(valueCoder instanceof KvCoder)) {
-          throw new Exception("unexpected kind of coder for values written to "
-              + "a value-sorting shuffle");
-        }
-        KvCoder<?, ?> kvValueCoder = (KvCoder<?, ?>) valueCoder;
-        this.sortKeyCoder = kvValueCoder.getKeyCoder();
-        this.sortValueCoder = kvValueCoder.getValueCoder();
-      } else {
-        this.sortKeyCoder = null;
-        this.sortValueCoder = null;
-      }
-      if (groupValues) {
-        this.windowedValueCoder = null;
-      } else {
-        this.windowedValueCoder = this.windowedElemCoder.withValueCoder(this.valueCoder);
-      }
-    } else {
-      this.keyCoder = null;
-      this.valueCoder = null;
-      this.sortKeyCoder = null;
-      this.sortValueCoder = null;
-      this.windowedValueCoder = null;
-    }
-  }
-
-  /**
-   * Returns a SinkWriter that allows writing to this ShuffleSink,
-   * using the given ShuffleEntryWriter. The dataset ID is used to
-   * construct names of counters that track per-worker per-dataset
-   * bytes written to shuffle.
-   */
-  public SinkWriter<WindowedValue<T>> writer(ShuffleEntryWriter writer, String datasetId) {
-    return new ShuffleSinkWriter(writer, options, addCounterMutator, datasetId);
-  }
-
-  /** The SinkWriter for a ShuffleSink. */
-  class ShuffleSinkWriter implements SinkWriter<WindowedValue<T>> {
-    private static final String COUNTER_WORKER_PREFIX = "worker-";
-    private static final String COUNTER_DATASET_PREFIX = "-dataset-";
-    private static final String COUNTER_SUFFIX = "-shuffle-bytes";
-
-    private ShuffleEntryWriter writer;
-    private long seqNum = 0;
-    private final Counter<Long> perWorkerPerDatasetBytesCounter;
-    // How many bytes were written to a given shuffle session, across all workers.
-    private final Counter<Long> perDatasetBytesCounter;
-
-    ShuffleSinkWriter(
-        ShuffleEntryWriter writer,
-        PipelineOptions options,
-        CounterSet.AddCounterMutator addCounterMutator,
-        String datasetId) {
-      this.writer = writer;
-      DataflowWorkerHarnessOptions dataflowOptions =
-          options.as(DataflowWorkerHarnessOptions.class);
-      this.perWorkerPerDatasetBytesCounter = addCounterMutator.addCounter(
-          Counter.longs(
-              COUNTER_WORKER_PREFIX + dataflowOptions.getWorkerId()
-              + COUNTER_DATASET_PREFIX + datasetId + COUNTER_SUFFIX,
-              SUM));
-      this.perDatasetBytesCounter = addCounterMutator.addCounter(
-          Counter.longs("dax-shuffle-" + datasetId + "-written-bytes", SUM));
-    }
-
-    @Override
-    public long add(WindowedValue<T> windowedElem) throws IOException {
-      byte[] keyBytes;
-      byte[] secondaryKeyBytes;
-      byte[] valueBytes;
-      T elem = windowedElem.getValue();
-      if (shardByKey) {
-        if (!(elem instanceof KV)) {
-          throw new AssertionError("expecting the values written to a key-grouping shuffle "
-              + "to be KVs");
-        }
-        KV<?, ?> kv = (KV) elem;
-        Object key = kv.getKey();
-        Object value = kv.getValue();
-
-        keyBytes = CoderUtils.encodeToByteArray(keyCoder, key);
-
-        if (sortValues) {
-          if (!(value instanceof KV)) {
-            throw new AssertionError("expecting the value parts of the KVs written to "
-                + "a value-sorting shuffle to also be KVs");
-          }
-          KV<?, ?> kvValue = (KV) value;
-          Object sortKey = kvValue.getKey();
-          Object sortValue = kvValue.getValue();
-
-          // Sort values by key and then timestamp so that any GroupAlsoByWindows
-          // can run more efficiently.
-          ByteArrayOutputStream baos = new ByteArrayOutputStream();
-          sortKeyCoder.encode(sortKey, baos, Context.NESTED);
-          if (!windowedElem.getTimestamp().equals(BoundedWindow.TIMESTAMP_MIN_VALUE)) {
-            // Empty timestamp suffixes sort before all other sort value keys with
-            // the same prefix. So We can omit this suffix for this common value here
-            // for efficiency and only encode when its not the minimum timestamp.
-            InstantCoder.of().encode(windowedElem.getTimestamp(), baos, Context.OUTER);
-          }
-          secondaryKeyBytes = baos.toByteArray();
-          valueBytes = CoderUtils.encodeToByteArray(sortValueCoder, sortValue);
-        } else if (groupValues) {
-          // Sort values by timestamp so that GroupAlsoByWindows can run efficiently.
-          if (windowedElem.getTimestamp().equals(BoundedWindow.TIMESTAMP_MIN_VALUE)) {
-            // Empty secondary keys sort before all other secondary keys, so we
-            // can omit this common value here for efficiency.
-            secondaryKeyBytes = null;
-          } else {
-            secondaryKeyBytes =
-                CoderUtils.encodeToByteArray(InstantCoder.of(), windowedElem.getTimestamp());
-          }
-          valueBytes = CoderUtils.encodeToByteArray(valueCoder, value);
-        } else {
-          secondaryKeyBytes = null;
-          valueBytes = CoderUtils.encodeToByteArray(
-              windowedValueCoder,
-              windowedElem.withValue(value));
-        }
-
-      } else {
-        // Not partitioning or grouping by key, just resharding values.
-        // <key> is ignored, except by the shuffle splitter.  Use a seq#
-        // as the key, so we can split records anywhere.  This also works
-        // for writing a single-sharded ordered PCollection through a
-        // shuffle, since the order of elements in the input will be
-        // preserved in the output.
-        keyBytes = CoderUtils.encodeToByteArray(BigEndianLongCoder.of(), seqNum++);
-
-        secondaryKeyBytes = null;
-        valueBytes = CoderUtils.encodeToByteArray(windowedElemCoder, windowedElem);
-      }
-
-      ShuffleEntry entry = new ShuffleEntry(keyBytes, secondaryKeyBytes, valueBytes);
-      writer.put(entry);
-      long bytes = entry.length();
-      perWorkerPerDatasetBytesCounter.addValue(bytes);
-      perDatasetBytesCounter.addValue(bytes);
-      return bytes;
-    }
-
-    @Override
-    public void close() throws IOException {
-      writer.close();
-    }
-  }
-
-  @Override
-  public SinkWriter<WindowedValue<T>> writer() throws IOException {
-    Preconditions.checkArgument(shuffleWriterConfig != null);
-    ApplianceShuffleWriter applianceWriter = new ApplianceShuffleWriter(
-        shuffleWriterConfig, SHUFFLE_WRITER_BUFFER_SIZE, addCounterMutator);
-    String datasetId = applianceWriter.getDatasetId();
-    return writer(new ChunkingShuffleEntryWriter(applianceWriter), datasetId);
-  }
-
-  @Override
-  protected StateKind getStateSamplerStateKind() {
-    return StateKind.FRAMEWORK;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactory.java
deleted file mode 100644
index 360a20bad80ec..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleSinkFactory.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.api.client.util.Base64.decodeBase64;
-import static com.google.cloud.dataflow.sdk.runners.worker.ShuffleSink.parseShuffleKind;
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-
-import javax.annotation.Nullable;
-
-/**
- * Creates a {@link ShuffleSink} from a {@link CloudObject} spec.
- */
-public class ShuffleSinkFactory implements SinkFactory {
-
-  @Override
-  public ShuffleSink<?> create(
-      CloudObject spec,
-      Coder<?> coder,
-      @Nullable PipelineOptions options,
-      @Nullable ExecutionContext executionContext,
-      @Nullable CounterSet.AddCounterMutator addCounterMutator)
-          throws Exception {
-
-    @SuppressWarnings("unchecked")
-    Coder<WindowedValue<Object>> typedCoder =
-        (Coder<WindowedValue<Object>>) coder;
-
-    return new ShuffleSink<>(
-        options,
-        decodeBase64(getString(spec, PropertyNames.SHUFFLE_WRITER_CONFIG, null)),
-        parseShuffleKind(getString(spec, PropertyNames.SHUFFLE_KIND)),
-        typedCoder,
-        addCounterMutator);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
deleted file mode 100644
index 6abef2c3b6a00..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SinkFactory.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-
-import javax.annotation.Nullable;
-
-/**
- * Constructs a {@link Sink} from a Dataflow service {@link CloudObject} specification.
- */
-public interface SinkFactory {
-
-  /**
-   * Creates a {@link Sink} from a Dataflow API Sink definition.
-   */
-  Sink<?> create(
-      CloudObject sinkSpec,
-      Coder<?> coder,
-      @Nullable PipelineOptions options,
-      @Nullable ExecutionContext executionContext,
-      @Nullable CounterSet.AddCounterMutator addCounterMutator)
-      throws Exception;
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactory.java
deleted file mode 100644
index 30b91b2cef276..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSinkFactory.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.getBoolean;
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-
-import javax.annotation.Nullable;
-
-/**
- * Creates a {@link TextSink} from a {@link CloudObject} spec.
- */
-public final class TextSinkFactory implements SinkFactory {
-  @Override
-  public TextSink<?> create(
-      CloudObject spec,
-      Coder<?> coder,
-      @Nullable PipelineOptions options,
-      @Nullable ExecutionContext executionContext,
-      @Nullable CounterSet.AddCounterMutator addCounterMutator)
-          throws Exception {
-    return TextSink.create(
-    getString(spec, PropertyNames.FILENAME),
-    "",  // No shard template
-    "",  // No suffix
-    1,   // Exactly one output file
-    getBoolean(spec, PropertyNames.APPEND_TRAILING_NEWLINES, true),
-    getString(spec, PropertyNames.HEADER, null),
-    getString(spec, PropertyNames.FOOTER, null),
-    coder);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTracker.java
deleted file mode 100644
index 15d8de9c84303..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/UserCodeTimeTracker.java
+++ /dev/null
@@ -1,156 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
-
-import org.eclipse.jetty.util.ConcurrentHashSet;
-
-import java.util.Map;
-import java.util.NoSuchElementException;
-import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
-
-/**
- * This class tracks the user code time spent on every work item executed by a Dataflow worker.
- * User code time is the time spent on doing Dataflow SDK operations other than shuffling (like
- * GroupByKey or CoGroupByKey) and reading / writing states from / to windmill.
- *
- * <p>We assume that user code in work items is CPU-bound. To account for proportional slowdown in
- * case there are more work items concurrently executing user code than there are CPUs, we introduce
- * "effective user code time" that is invariant to the level of multithreading. More precisely,
- * a unit of time spent in user code counts as min(1, numCPUs / numActiveItemsInUserCode) units of
- * "effective time in user code".
- */
-public class UserCodeTimeTracker {
-  private static class WorkItemInfo {
-    final Counter<Long> counter;
-
-    public WorkItemInfo(String counterPrefix, AddCounterMutator mutator) {
-      counter = mutator.addCounter(
-          Counter.longs(counterPrefix + "user-code-msecs", Counter.AggregationKind.SUM));
-    }
-  }
-
-  /**
-   * Mapping from item id to WorkItemInfo.
-   */
-  private final Map<Long, WorkItemInfo> itemMap = new ConcurrentHashMap<Long, WorkItemInfo>();
-
-  /**
-   * Set of the item ids in states with the kind StateSampler.StateKind.USER.
-   */
-  private final Set<Long> itemsInUserState = new ConcurrentHashSet<Long>();
-
-  /**
-   * Records the start of the work.
-   * @param counterPrefix counter prefix associated with this work item.
-   * @param itemId id of the work.
-   * @param mutator counter mutator associated with this work item.
-   */
-  public void workStarted(String counterPrefix, long itemId, AddCounterMutator mutator) {
-    if (itemMap.put(itemId, new WorkItemInfo(counterPrefix, mutator)) != null) {
-      throw new IllegalArgumentException("Item " + itemId + " already started.");
-    }
-  }
-
-  /**
-   * Records the finish of the work.
-   * @param itemId id of the work.
-   */
-  public void workFinished(long itemId) {
-    if (itemMap.remove(itemId) == null) {
-      throw new IllegalArgumentException("Item " + itemId + " never started.");
-    }
-    itemsInUserState.remove(itemId);
-  }
-
-  /**
-   * Records the observation that the work with {@code itemId} has been in a state with {@code kind}
-   * for {@code elapsedMs} milliseconds.
-   * It is supposed to be called in a callback of a StateSampler.
-   * @param itemId the id of the work
-   * @param kind kind of the associated state
-   * @param elapsedMs time duration in milliseconds since the previous observation of this work
-   *        item's state
-   */
-  public void workObservedInState(
-      long itemId, StateKind kind, long elapsedMs) {
-    if (kind == StateSampler.StateKind.USER) {
-      itemsInUserState.add(itemId);
-    } else {
-      itemsInUserState.remove(itemId);
-      return;
-    }
-
-    WorkItemInfo info = itemMap.get(itemId);
-    if (info == null) {
-      throw new NoSuchElementException("Item " + itemId + " doesn't exist.");
-    }
-    int numProcessors = getNumProcessors();
-    int numActives = itemsInUserState.size();
-    long userCodeMsecs = (long) (elapsedMs * Math.min(1.0, 1.0 * numProcessors / numActives));
-
-    info.counter.addValue(userCodeMsecs);
-  }
-
-  /**
-   * Returns an AutoCloseable that will call {@link #workStarted} at first and will automatically
-   * call {@link #workFinished} upon closing.
-   * @param counterPrefix counter prefix associated with this work item.
-   * @param itemId id of the work.
-   * @param mutator counter mutator associated with this work item.
-   * @return an AutoCloseable that automatically call {@link #workFinished} upon closing.
-   */
-  public AutoCloseable scopedWork(
-      String counterPrefix, final long itemId, final CounterSet.AddCounterMutator mutator) {
-    workStarted(counterPrefix, itemId, mutator);
-    return new AutoCloseable() {
-      @Override
-      public void close() throws Exception {
-        workFinished(itemId);
-      }
-    };
-  }
-
-  protected int getNumProcessors() {
-    return Runtime.getRuntime().availableProcessors();
-  }
-
-  /**
-   * A simple callback to be used to invoke {@code UserCodeTimeTracker.workObservedInState} from
-   * the StateSampler.
-   */
-  public static class StateSamplerCallback implements StateSampler.SamplingCallback {
-    private final UserCodeTimeTracker tracker;
-
-    private final long itemId;
-
-    StateSamplerCallback(UserCodeTimeTracker tracker, long itemId) {
-      this.tracker = tracker;
-      this.itemId = itemId;
-    }
-
-    @Override
-    public void run(int state, StateKind kind, long elapsedMs) {
-      tracker.workObservedInState(itemId, kind, elapsedMs);
-    }
-  }
-}

From 72097504b820e8ed054efb043b1b39c8e81283d5 Mon Sep 17 00:00:00 2001
From: tudorm <tudorm@google.com>
Date: Mon, 8 Feb 2016 10:12:24 -0800
Subject: [PATCH 1406/1541] Render the /threadz page using plain text.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114122363
---
 .../sdk/runners/worker/status/ThreadzServlet.java      | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/ThreadzServlet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/ThreadzServlet.java
index 0c8dc6a8aa607..a1faba3180d49 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/ThreadzServlet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/ThreadzServlet.java
@@ -35,21 +35,19 @@ public ThreadzServlet() {
   @Override
   public void doGet(HttpServletRequest request, HttpServletResponse response)
       throws IOException {
-    response.setContentType("text/html;charset=utf-8");
+    response.setContentType("text/plain;charset=utf-8");
     response.setStatus(HttpServletResponse.SC_OK);
 
     PrintWriter writer = response.getWriter();
-    writer.println("<html>");
 
     Map<Thread, StackTraceElement[]> stacks = Thread.getAllStackTraces();
     for (Map.Entry<Thread,  StackTraceElement[]> entry : stacks.entrySet()) {
       Thread thread = entry.getKey();
-      writer.println("Thread: " + thread + " State: " + thread.getState() + "<br>");
+      writer.println("--- Thread: " + thread + " State: "
+          + thread.getState() + " stack: ---");
       for (StackTraceElement element : entry.getValue()) {
-        writer.println("&nbsp&nbsp" + element + "<br>");
+        writer.println("  " + element);
       }
-      writer.println("<br>");
     }
-    writer.println("</html>");
   }
 }

From d6e5cc9d5dcf119dfde9b54fe66ade99d9dec163 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 8 Feb 2016 11:05:01 -0800
Subject: [PATCH 1407/1541] Move remainder of status servlets

This is for ASF Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114128541
---
 .../worker/status/BaseStatusServlet.java      |  60 ---
 .../runners/worker/status/HealthzServlet.java |  40 --
 .../runners/worker/status/HeapzServlet.java   |  97 ----
 .../worker/status/StatusDataProvider.java     |  25 -
 .../runners/worker/status/ThreadzServlet.java |  53 ---
 .../runners/worker/status/package-info.java   |  23 -
 .../dataflow/sdk/util/MemoryMonitor.java      | 428 ------------------
 .../dataflow/sdk/util/MemoryMonitorTest.java  |  86 ----
 8 files changed, 812 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/BaseStatusServlet.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/HealthzServlet.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/HeapzServlet.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/StatusDataProvider.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/ThreadzServlet.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/package-info.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MemoryMonitor.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MemoryMonitorTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/BaseStatusServlet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/BaseStatusServlet.java
deleted file mode 100644
index 23245bbab2d25..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/BaseStatusServlet.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker.status;
-
-import com.google.common.base.Strings;
-
-import java.io.IOException;
-
-import javax.servlet.ServletException;
-import javax.servlet.http.HttpServlet;
-import javax.servlet.http.HttpServletRequest;
-import javax.servlet.http.HttpServletResponse;
-
-/**
- * Base class for status servlets.
- */
-public abstract class BaseStatusServlet extends HttpServlet {
-
-  private final String path;
-
-  protected BaseStatusServlet(String path) {
-    this.path = path.startsWith("/") ? path : "/" + path;
-  }
-
-  /**
-   * Return the path that this servlet should listen on.
-   */
-  public String getPath() {
-    return path;
-  }
-
-  /**
-   * Return the servlet that this path is on, modified to include the specified query
-   * {@code parameters} if any.
-   */
-  protected String getPath(String parameters) {
-    if (Strings.isNullOrEmpty(parameters)) {
-      return path;
-    } else {
-      return path + "?" + parameters;
-    }
-  }
-
-  @Override
-  protected abstract void doGet(HttpServletRequest request, HttpServletResponse response)
-      throws IOException, ServletException;
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/HealthzServlet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/HealthzServlet.java
deleted file mode 100644
index 1f7c83c83f7c5..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/HealthzServlet.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker.status;
-
-import java.io.IOException;
-
-import javax.servlet.http.HttpServletRequest;
-import javax.servlet.http.HttpServletResponse;
-
-/**
- * Respond to /healthz with "ok".
- */
-public class HealthzServlet extends BaseStatusServlet {
-
-  public HealthzServlet() {
-    super("healthz");
-  }
-
-  @Override
-  public void doGet(HttpServletRequest request, HttpServletResponse response)
-      throws IOException {
-    response.setContentType("text/html;charset=utf-8");
-    response.setStatus(HttpServletResponse.SC_OK);
-    response.getWriter().println("ok");
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/HeapzServlet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/HeapzServlet.java
deleted file mode 100644
index 363d26fb57a92..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/HeapzServlet.java
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker.status;
-
-import com.google.cloud.dataflow.sdk.util.MemoryMonitor;
-import com.google.common.io.Files;
-
-import java.io.File;
-import java.io.IOException;
-
-import javax.management.InstanceNotFoundException;
-import javax.management.MBeanException;
-import javax.management.MalformedObjectNameException;
-import javax.management.ReflectionException;
-import javax.servlet.ServletOutputStream;
-import javax.servlet.http.HttpServletRequest;
-import javax.servlet.http.HttpServletResponse;
-
-/**
- * Respond to /heapz with a page allowing downloading of the heap dumps.
- *
- * <p>Respond to /heapz?action=download with a download of the actual heap dump.
- */
-public class HeapzServlet extends BaseStatusServlet {
-
-  public HeapzServlet() {
-    super("heapz");
-  }
-
-  @Override
-  protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws IOException {
-    String action = req.getParameter("action");
-    if (action == null || action.isEmpty()) {
-      resp.setContentType("text/html;charset=utf-8");
-      resp.setStatus(HttpServletResponse.SC_OK);
-
-      ServletOutputStream writer = resp.getOutputStream();
-      writer.println("<html>");
-      writer.println(String.format(
-          "Click <a href=\"%s\">here to download heap dump</a>", getPath("action=download")));
-      writer.println("</html>");
-      return;
-    } else if ("download".equals(action)) {
-      doDownload(resp);
-    }
-  }
-
-  private void doDownload(HttpServletResponse resp) throws IOException {
-    File file;
-
-    try {
-      file = MemoryMonitor.dumpHeap();
-    } catch (MalformedObjectNameException
-        | InstanceNotFoundException | ReflectionException | MBeanException e) {
-      resp.setContentType("text/html;charset=utf-8");
-      resp.setStatus(HttpServletResponse.SC_OK);
-
-      ServletOutputStream writer = resp.getOutputStream();
-      writer.println("<html>\nFailed to dump heap: <br>\n<pre>");
-      writer.println(e.toString());
-      writer.println("</pre>\n</html>");
-      return;
-    }
-
-    resp.setContentType("application/octet-stream");
-    resp.setContentLength((int) file.length());
-    resp.setHeader(
-        "Content-Disposition", String.format("attachment; filename=\"%s\"", file.getName()));
-
-    try {
-      Files.copy(file, resp.getOutputStream());
-      resp.setStatus(HttpServletResponse.SC_OK);
-    } catch (IOException e) {
-      resp.reset();
-      resp.setContentType("text/html;charset=utf-8");
-      ServletOutputStream writer = resp.getOutputStream();
-      writer.println("<html>\nFailed to dump heap: <br>\n<pre>\n");
-      writer.println(e.toString());
-      writer.println("</pre>\n</html>");
-      resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/StatusDataProvider.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/StatusDataProvider.java
deleted file mode 100644
index 91e962d331690..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/StatusDataProvider.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker.status;
-
-import java.io.PrintWriter;
-
-/**
- * Interface for providing information to the /statusz page.
- */
-public interface StatusDataProvider {
-  void appendSummaryHtml(PrintWriter writer);
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/ThreadzServlet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/ThreadzServlet.java
deleted file mode 100644
index a1faba3180d49..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/ThreadzServlet.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker.status;
-
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.Map;
-
-import javax.servlet.http.HttpServletRequest;
-import javax.servlet.http.HttpServletResponse;
-
-/**
- * Respond to /threadz with the stack traces of all running threads.
- */
-class ThreadzServlet extends BaseStatusServlet {
-
-  public ThreadzServlet() {
-    super("threadz");
-  }
-
-  @Override
-  public void doGet(HttpServletRequest request, HttpServletResponse response)
-      throws IOException {
-    response.setContentType("text/plain;charset=utf-8");
-    response.setStatus(HttpServletResponse.SC_OK);
-
-    PrintWriter writer = response.getWriter();
-
-    Map<Thread, StackTraceElement[]> stacks = Thread.getAllStackTraces();
-    for (Map.Entry<Thread,  StackTraceElement[]> entry : stacks.entrySet()) {
-      Thread thread = entry.getKey();
-      writer.println("--- Thread: " + thread + " State: "
-          + thread.getState() + " stack: ---");
-      for (StackTraceElement element : entry.getValue()) {
-        writer.println("  " + element);
-      }
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/package-info.java
deleted file mode 100644
index 9138c155a63b5..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/status/package-info.java
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * Server and tools for the Worker Status pages.
- */
-@ParametersAreNonnullByDefault
-package com.google.cloud.dataflow.sdk.runners.worker.status;
-
-import javax.annotation.ParametersAreNonnullByDefault;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MemoryMonitor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MemoryMonitor.java
deleted file mode 100644
index 78df47ad5d381..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MemoryMonitor.java
+++ /dev/null
@@ -1,428 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.cloud.dataflow.sdk.runners.worker.status.StatusDataProvider;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.util.concurrent.AtomicDouble;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.File;
-import java.io.PrintWriter;
-import java.lang.management.GarbageCollectorMXBean;
-import java.lang.management.ManagementFactory;
-import java.util.ArrayDeque;
-import java.util.Queue;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import javax.management.InstanceNotFoundException;
-import javax.management.MBeanException;
-import javax.management.MBeanServer;
-import javax.management.MalformedObjectNameException;
-import javax.management.ObjectName;
-import javax.management.ReflectionException;
-
-/**
- * A runnable which monitors a server for GC thrashing.
- *
- * <p>Note: Only one instance of this should be initialized per server and
- * it should be done when the server starts running.
- *
- * <p>This runnable works as follows:
- * <ul>
- * <li> It wakes up periodically and determines how much time was spend on garbage
- *      collection since the last time it woke up.
- * <li> If the time spent in garbage collection in the last period of time
- *      exceeds a certain threshold, that period is marked as "being in GC thrashing"
- * <li> It keeps track of the GC thrashing status of the last few periods.
- * <li> Every time the runnable's thread wakes up, it computes the ratio
- *      {@code (# monitored periods in GC thrashing) / (# monitored periods)}.
- * <li> If this ratio exceeds a certain threshold, it is assumed that the server
- *      is in GC thrashing.
- * <li> It can also shutdown the current jvm runtime when a threshold of consecutive gc
- *      thrashing count is met. A heap dump is made before shutdown.
- * </ul>
- */
-public class MemoryMonitor implements Runnable, StatusDataProvider {
-  private static final Logger LOG = LoggerFactory.getLogger(MemoryMonitor.class);
-
-  /** Directory to hold heap dumps if not overridden. */
-  private static final String DEFAULT_LOGGING_DIR = "dataflow/logs";
-
-  /**
-   * Amount of time (in ms) this thread must sleep
-   * between two consecutive iterations.
-   */
-  public static final long DEFAULT_SLEEP_TIME_MILLIS = 15 * 1000; // 15 sec.
-
-  /**
-   * The number of periods to take into account when determining
-   * if the server is in GC thrashing.
-   */
-  private static final int NUM_MONITORED_PERIODS = 4; // ie 1 min's worth.
-
-  /**
-   * The GC thrashing threshold (0.00 - 100.00) for every period. If
-   * the time spent on garbage collection in one period exceeds this
-   * threshold, that period is considered to be in GC thrashing.
-   */
-  private static final double GC_THRASHING_PERCENTAGE_PER_PERIOD = 50.0;
-
-  /**
-   * The <code>(# monitored periods in GC thrashing) / (# monitored
-   * periods)</code> threshold after which the server is considered to be in
-   * GC thrashing, expressed as a percentage.
-   */
-  private static final double GC_THRASHING_PERCENTAGE_PER_SERVER = 60.0;
-
-  /**
-   * The amount of memory (in bytes) we should pre-allocate, in order
-   * to be able to dump the heap.
-   *
-   * Since the server is in GC thrashing when we try to dump the heap, we
-   * might not be able to successfully do it. However, if we pre-allocate a
-   * big enough block of memory and "release" it right before trying to dump
-   * the heap, the pre-allocated block of memory will get GCed, and the heap
-   * dump might succeed.
-   */
-  private static final int HEAP_DUMP_RESERVED_BYTES = 10 << 20; // 10MB
-
-  /**
-   * Shutdown the current JVM instance after given consecutive gc thrashing
-   * periods are detected. This offers an opportunity to fast kill a JVM server
-   * if it is about to enter a long lasting gc thrashing state, which is almost
-   * never a desired behavior for a healthy server. 0 to disable.
-   */
-  private static final int DEFAULT_SHUT_DOWN_AFTER_NUM_GCTHRASHING = 8; // ie 2 min's worth.
-
-  /**
-   * Delay between logging the current memory state.
-   */
-  private static final int NORMAL_LOGGING_PERIOD_MILLIS = 5 * 60 * 1000; // 5 min.
-
-  /**
-   * Abstract interface for providing GC stats (for testing).
-   */
-  public interface GCStatsProvider {
-    /**
-     * Return the total milliseconds spent in GC since JVM was started.
-     */
-    long totalGCTimeMilliseconds();
-  }
-
-  /**
-   * True system GC stats.
-   */
-  private static class SystemGCStatsProvider implements GCStatsProvider {
-    @Override
-    public long totalGCTimeMilliseconds() {
-      long inGC = 0;
-      for (GarbageCollectorMXBean gc : ManagementFactory.getGarbageCollectorMXBeans()) {
-        inGC += gc.getCollectionTime();
-      }
-      return inGC;
-    }
-  }
-
-  /** Where to get GC stats. */
-  private final GCStatsProvider gcStatsProvider;
-
-  /** Actual sleep time, in milliseconds, for main monitor. */
-  private final long sleepTimeMillis;
-
-  /** Actual number of cycles before shutting down VM. */
-  private final int shutDownAfterNumGCThrashing;
-
-  /**
-   * The state of the periods that are taken into account when
-   * deciding if the server is in GC thrashing.
-   */
-  private final Queue<Boolean> periodIsThrashing = new ArrayDeque<>();
-
-  /**
-   * Keeps track of the time the server spent in GC since it started running.
-   */
-  private long timeInGC = 0;
-
-  /**
-   * A reserved block of memory, needed to dump the heap. Dumping the heap
-   * requires memory. However, since we try to do it when the server is in
-   * GC thrashing, no memory is available and dumpHeap() brings the server
-   * down. If we pre-allocate a block of memory though, and "release" it
-   * right before dumping the heap, this block of memory will be
-   * garbage collected, thus giving dumpHeap() enough space to dump the heap.
-   */
-  @SuppressWarnings("unused")
-  private byte[] reservedForDumpingHeap = new byte[HEAP_DUMP_RESERVED_BYTES];
-
-  private final AtomicBoolean isThrashing = new AtomicBoolean(false);
-
-  private final AtomicBoolean isStarted = new AtomicBoolean(false);
-
-  private final AtomicDouble lastMeasuredGCPercentage = new AtomicDouble(0.0);
-  private final AtomicDouble maxGCPercentage = new AtomicDouble(0.0);
-  private final AtomicInteger numPushbacks = new AtomicInteger(0);
-
-  /** Wait point for threads in pushback waiting for gc thrashing to pass. */
-  private final Object waitingForResources = new Object();
-
-  public MemoryMonitor() {
-    gcStatsProvider = new SystemGCStatsProvider();
-    sleepTimeMillis = DEFAULT_SLEEP_TIME_MILLIS;
-    shutDownAfterNumGCThrashing = DEFAULT_SHUT_DOWN_AFTER_NUM_GCTHRASHING;
-  }
-
-  /**
-   * For testing only: Construct memory monitor which takes GC stats from given provider
-   * and uses given sleep time and shutdown threshold.
-   */
-  @VisibleForTesting
-  public MemoryMonitor(
-      GCStatsProvider gcStatsProvider, long sleepTimeMillis, int shutDownAfterNumGCThrashing) {
-    this.gcStatsProvider = gcStatsProvider;
-    this.sleepTimeMillis = sleepTimeMillis;
-    this.shutDownAfterNumGCThrashing = shutDownAfterNumGCThrashing;
-  }
-
-  /**
-   * Check if we've observed high gc workload in sufficient
-   * sample periods to justify classifying the server as in gc thrashing.
-   */
-  private void updateIsThrashing() {
-    // have we monitored enough periods?
-    if (periodIsThrashing.size() < NUM_MONITORED_PERIODS) {
-      setIsThrashing(false);
-      return;
-    }
-
-    // count the number of periods in GC thrashing
-    int numPeriodsInGCThrashing = 0;
-    for (Boolean state : periodIsThrashing) {
-      numPeriodsInGCThrashing += (state ? 1 : 0);
-    }
-
-    // Did we have too many periods in GC thrashing?
-    boolean serverInGcThrashing = (numPeriodsInGCThrashing * 100
-        >= periodIsThrashing.size() * GC_THRASHING_PERCENTAGE_PER_SERVER);
-    setIsThrashing(serverInGcThrashing);
-  }
-
-  /**
-   * Set the thrashing state.
-   */
-  private void setIsThrashing(boolean serverInGcThrashing) {
-    synchronized (waitingForResources) {
-      boolean prev = isThrashing.getAndSet(serverInGcThrashing);
-      if (prev && !serverInGcThrashing) {
-        waitingForResources.notifyAll();
-      }
-    }
-  }
-
-  /**
-   * Determines if too much time was spent on garbage collection in the last
-   * period of time.
-   *
-   * @param now The current time.
-   * @param lastTimeWokeUp The last time this thread woke up.
-   *
-   * @return The state of the last period of time.
-   */
-  private boolean wasLastPeriodInGCThrashing(long now, long lastTimeWokeUp) {
-    // Find out how much time was spent on garbage collection
-    // since the start of the server.  This queries the set of garbage collectors for
-    // how long each one has spent doing GC.
-    long inGC = gcStatsProvider.totalGCTimeMilliseconds();
-
-    // Compare the amount of time spent in GC thrashing to the given threshold;
-    // if config.getSleepTimeMillis() is equal to 0 (should happen in tests only),
-    // then we compare percentage-per-period to 100%
-    double gcPercentage = (inGC - timeInGC) * 100 / (now - lastTimeWokeUp);
-
-    lastMeasuredGCPercentage.set(gcPercentage);
-    maxGCPercentage.set(Math.max(maxGCPercentage.get(), gcPercentage));
-    timeInGC = inGC;
-
-    return gcPercentage > GC_THRASHING_PERCENTAGE_PER_PERIOD;
-  }
-
-  /**
-   * Updates the data we monitor.
-   *
-   * @param now The current time.
-   * @param lastTimeWokeUp The last time this thread woke up.
-   */
-  private void updateData(long now, long lastTimeWokeUp) {
-    // remove data that's no longer relevant
-    int numIntervals = NUM_MONITORED_PERIODS;
-    while (periodIsThrashing.size() >= numIntervals) {
-      periodIsThrashing.poll();
-    }
-    // store the state of the last period
-    boolean wasThrashing = wasLastPeriodInGCThrashing(now, lastTimeWokeUp);
-    periodIsThrashing.offer(wasThrashing);
-  }
-
-  /**
-   * Dumps the heap to a file and return the name of the file, or
-   * <code>null</code> if the heap could not be dumped.
-   *
-   * @return The name of the file the heap was dumped to.
-   */
-  private File tryToDumpHeap() {
-    // clearing this list should "release" some memory
-    // that will be needed to dump the heap
-    reservedForDumpingHeap = null;
-
-    try {
-      return dumpHeap();
-    } catch (Exception e) {
-      return null;
-    }
-  }
-
-  /**
-   * Runs this thread.
-   */
-  @Override
-  public void run() {
-    checkState(!isStarted.getAndSet(true), "run() called twice");
-
-    try {
-      long lastTimeWokeUp = System.currentTimeMillis();
-      long lastLog = -1;
-      int currentThrashingCount = 0;
-      while (true) {
-        Thread.sleep(sleepTimeMillis);
-        long now = System.currentTimeMillis();
-
-        updateData(now, lastTimeWokeUp);
-        updateIsThrashing();
-
-        if (lastLog < 0 || lastLog + NORMAL_LOGGING_PERIOD_MILLIS < now) {
-          LOG.info("Memory is {}", describeMemory());
-          lastLog = now;
-        }
-
-        if (isThrashing.get()) {
-          currentThrashingCount++;
-
-          if (shutDownAfterNumGCThrashing > 0
-              && (currentThrashingCount >= shutDownAfterNumGCThrashing)) {
-            File heapDumpFile = tryToDumpHeap();
-            LOG.error(
-                "Shutting down JVM after {} consecutive periods of measured GC thrashing. "
-                + "Memory is {}. Heap dump written to {}",
-                currentThrashingCount, describeMemory(), heapDumpFile);
-            System.exit(1);
-          }
-        } else {
-          // Reset the counter whenever the server is evaluated not under gc thrashing.
-          currentThrashingCount = 0;
-        }
-
-        lastTimeWokeUp = now;
-      }
-    } catch (InterruptedException e) {
-      // most probably means that the server is shutting down
-      // in any case, there's not much we can do here
-      LOG.info("The GCThrashingMonitor was interrupted.");
-    }
-  }
-
-  /**
-   * Return only when the server is not in the GC thrashing state.
-   */
-  public void waitForResources(String context) {
-    if (!isThrashing.get()) {
-      return;
-    }
-    numPushbacks.incrementAndGet();
-    LOG.info("Waiting for resources for {}. Memory is {}", context, describeMemory());
-    synchronized (waitingForResources) {
-      // No big deal if isThrashing became false in the meantime.
-      while (isThrashing.get()) {
-        try {
-          waitingForResources.wait();
-        } catch (InterruptedException e1) {
-          LOG.debug("waitForResources was interrupted.");
-        }
-      }
-    }
-    LOG.info("Resources granted for {}. Memory is {}", context, describeMemory());
-  }
-
-  /**
-   * Return the path for logging heap dumps.
-   */
-  private static String getLoggingDir() {
-    String defaultPath = System.getProperty("java.io.tmpdir", DEFAULT_LOGGING_DIR);
-    String jsonLogFile = System.getProperty("dataflow.worker.logging.filepath");
-    if (jsonLogFile == null) {
-      return defaultPath;
-    }
-    String logPath = new File(jsonLogFile).getParent();
-    if (logPath == null) {
-      return defaultPath;
-    }
-    return logPath;
-  }
-
-  /**
-   * Dump the current heap profile to a file and return its name.
-   */
-  public static File dumpHeap() throws
-      MalformedObjectNameException, InstanceNotFoundException, ReflectionException, MBeanException {
-    boolean liveObjectsOnly = true;
-    String fileName = String.format(
-        "%s/heap_dump_%d.hprof", getLoggingDir(), System.currentTimeMillis());
-
-    MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
-    ObjectName oname = new ObjectName("com.sun.management:type=HotSpotDiagnostic");
-    Object[] parameters = {fileName, liveObjectsOnly};
-    String[] signatures = {"java.lang.String", boolean.class.getName()};
-    mbs.invoke(oname, "dumpHeap", parameters, signatures);
-
-    return new File(fileName);
-  }
-
-  /**
-   * Return a string describing the current memory state of the server.
-   */
-  private String describeMemory() {
-    Runtime runtime = Runtime.getRuntime();
-    long maxMemory = runtime.maxMemory();
-    long totalMemory = runtime.totalMemory();
-    long usedMemory = totalMemory - runtime.freeMemory();
-    return String.format(
-        "used/total/max = %d/%d/%d MB, GC last/max = %.2f/%.2f %%, #pushbacks=%d, gc thrashing=%s",
-        usedMemory >> 20, totalMemory >> 20, maxMemory >> 20, lastMeasuredGCPercentage.get(),
-        maxGCPercentage.get(), numPushbacks.get(), isThrashing.get());
-  }
-
-  @Override
-  public void appendSummaryHtml(PrintWriter writer) {
-    writer.print("Memory: ");
-    writer.print(describeMemory());
-    writer.println("<br>");
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MemoryMonitorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MemoryMonitorTest.java
deleted file mode 100644
index c175165b6dcaa..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MemoryMonitorTest.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.cloud.dataflow.sdk.testing.SystemNanoTimeSleeper.sleepMillis;
-
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.concurrent.Semaphore;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
-
-/**
- * Test the memory monitor will block threads when the server is in a (faked) GC thrashing state.
- */
-@RunWith(JUnit4.class)
-public class MemoryMonitorTest {
-  static class FakeGCStatsProvider implements MemoryMonitor.GCStatsProvider {
-    AtomicBoolean inGCThrashingState = new AtomicBoolean(false);
-    long lastCallTimestamp = System.currentTimeMillis();
-    long lastGCResult = 0;
-
-    @Override
-    public long totalGCTimeMilliseconds() {
-      if (inGCThrashingState.get()) {
-        long now = System.currentTimeMillis();
-        lastGCResult += now - lastCallTimestamp;
-        lastCallTimestamp = now;
-      }
-      return lastGCResult;
-    }
-  }
-
-  private FakeGCStatsProvider provider;
-  private MemoryMonitor monitor;
-  private Thread thread;
-
-  @Before
-  public void setup() {
-    provider = new FakeGCStatsProvider();
-    // Update every 10ms, never shutdown VM.
-    monitor = new MemoryMonitor(provider, 10, 0);
-    thread = new Thread(monitor);
-    thread.start();
-  }
-
-  @Test(timeout = 1000)
-  public void detectGCThrashing() throws InterruptedException {
-    sleepMillis(100);
-    monitor.waitForResources("Test1");
-    provider.inGCThrashingState.set(true);
-    sleepMillis(100);
-    final Semaphore s = new Semaphore(0);
-    new Thread(new Runnable() {
-      @Override
-      public void run() {
-        monitor.waitForResources("Test2");
-        s.release();
-      }
-    }).start();
-    assertFalse(s.tryAcquire(100, TimeUnit.MILLISECONDS));
-    provider.inGCThrashingState.set(false);
-    assertTrue(s.tryAcquire(100, TimeUnit.MILLISECONDS));
-    monitor.waitForResources("Test3");
-  }
-}

From 9894b42e8e7ca56a3493231560fdb07e21cdb7e1 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 8 Feb 2016 11:18:45 -0800
Subject: [PATCH 1408/1541] Decouple StateContext from ReduceFn

Prior to this change, the definition of a trigger as a state
machine directly referenced ReduceFn.StateContext. But triggers
(and other possible uses of state) can and should be defined
without dependency on ReduceFn, and the StateContext interface
is actually already independent of ReduceFn. This change removes
the extraneous coupling.

This is also preparatory for the keying of state APIs
whereby ReduceFn will use a KeyedStateContext<K>, which in
turn allows easy support for KeyedCombineFn in the state API.
Triggers will still use a non-keyed StateContext as they are
independent of the type of key or what the key happens to be,
so having a decoupled interface supports the feature.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114130211
---
 .../windowing/AfterDelayFromFirstElement.java |   4 +-
 .../sdk/transforms/windowing/AfterPane.java   |   4 +-
 .../sdk/transforms/windowing/Trigger.java     |  27 ++--
 .../sdk/util/MergingStateContext.java         |  39 +++++
 .../dataflow/sdk/util/PaneInfoTracker.java    |   1 -
 .../cloud/dataflow/sdk/util/ReduceFn.java     |  41 -----
 .../sdk/util/ReduceFnContextFactory.java      |   7 +-
 .../dataflow/sdk/util/ReduceFnRunner.java     |   2 +-
 .../cloud/dataflow/sdk/util/StateContext.java |  43 +++++
 .../dataflow/sdk/util/SystemReduceFn.java     |   2 +-
 .../sdk/util/TriggerContextFactory.java       | 150 +++++++++++++-----
 .../dataflow/sdk/util/TriggerRunner.java      |  13 +-
 12 files changed, 221 insertions(+), 112 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingStateContext.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateContext.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
index 027262939921a..1e2e4cc59eaa8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
@@ -17,8 +17,8 @@
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
-import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
+import com.google.cloud.dataflow.sdk.util.MergingStateContext;
+import com.google.cloud.dataflow.sdk.util.StateContext;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index 642efd1875824..61a7d18bd4b71 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -20,8 +20,8 @@
 import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
-import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
+import com.google.cloud.dataflow.sdk.util.MergingStateContext;
+import com.google.cloud.dataflow.sdk.util.StateContext;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 1cb88ebd0eecd..32d1069d66d74 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -18,7 +18,8 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
-import com.google.cloud.dataflow.sdk.util.ReduceFn;
+import com.google.cloud.dataflow.sdk.util.MergingStateContext;
+import com.google.cloud.dataflow.sdk.util.StateContext;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.common.base.Joiner;
 
@@ -189,7 +190,7 @@ public abstract class TriggerContext {
     public abstract TriggerInfo<W> trigger();
 
     /** Returns the interface for accessing persistent state. */
-    public abstract ReduceFn.StateContext state();
+    public abstract StateContext state();
 
     /** The window that the current context is executing in. */
     public abstract W window();
@@ -216,7 +217,7 @@ public abstract class TriggerContext {
   }
 
   /**
-   * Extended {@link Context} containing information accessible to the {@link #onElement}
+   * Extended {@link TriggerContext} containing information accessible to the {@link #onElement}
    * operational hook.
    */
   public abstract class OnElementContext extends TriggerContext {
@@ -243,7 +244,7 @@ public abstract class OnElementContext extends TriggerContext {
   }
 
   /**
-   * Extended {@link Context} containing information accessible to the {@link #onMerge}
+   * Extended {@link TriggerContext} containing information accessible to the {@link #onMerge}
    * operational hook.
    */
   public abstract class OnMergeContext extends TriggerContext {
@@ -269,7 +270,7 @@ public abstract class OnMergeContext extends TriggerContext {
     public abstract OnMergeContext forTrigger(ExecutableTrigger<W> trigger);
 
     @Override
-    public abstract ReduceFn.MergingStateContext state();
+    public abstract MergingStateContext state();
 
     @Override
     public abstract MergingTriggerInfo<W> trigger();
@@ -322,10 +323,8 @@ protected Trigger(@Nullable List<Trigger<W>> subTriggers) {
   /**
    * Called to allow the trigger to prefetch any state it will likely need to read from during
    * an {@link #onElement} call.
-   *
-   * @param state {@link ReduceFn.StateContext} to prefetch from.
    */
-  public void prefetchOnElement(ReduceFn.StateContext state) {
+  public void prefetchOnElement(StateContext state) {
     if (subTriggers != null) {
       for (Trigger<W> trigger : subTriggers) {
         trigger.prefetchOnElement(state);
@@ -336,10 +335,8 @@ public void prefetchOnElement(ReduceFn.StateContext state) {
   /**
    * Called to allow the trigger to prefetch any state it will likely need to read from during
    * an {@link #onMerge} call.
-   *
-   * @param state {@link ReduceFn.MergingStateContext} to prefetch from.
    */
-  public void prefetchOnMerge(ReduceFn.MergingStateContext state) {
+  public void prefetchOnMerge(MergingStateContext state) {
     if (subTriggers != null) {
       for (Trigger<W> trigger : subTriggers) {
         trigger.prefetchOnMerge(state);
@@ -350,10 +347,8 @@ public void prefetchOnMerge(ReduceFn.MergingStateContext state) {
   /**
    * Called to allow the trigger to prefetch any state it will likely need to read from during
    * an {@link #shouldFire} call.
-   *
-   * @param state {@link ReduceFn.StateContext} to prefetch from.
    */
-  public void prefetchShouldFire(ReduceFn.StateContext state) {
+  public void prefetchShouldFire(StateContext state) {
     if (subTriggers != null) {
       for (Trigger<W> trigger : subTriggers) {
         trigger.prefetchShouldFire(state);
@@ -364,10 +359,8 @@ public void prefetchShouldFire(ReduceFn.StateContext state) {
   /**
    * Called to allow the trigger to prefetch any state it will likely need to read from during
    * an {@link #onFire} call.
-   *
-   * @param state {@link ReduceFn.StateContext} to prefetch from.
    */
-  public void prefetchOnFire(ReduceFn.StateContext state) {
+  public void prefetchOnFire(StateContext state) {
     if (subTriggers != null) {
       for (Trigger<W> trigger : subTriggers) {
         trigger.prefetchOnFire(state);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingStateContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingStateContext.java
new file mode 100644
index 0000000000000..877cbf984368e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingStateContext.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.state.MergeableState;
+import com.google.cloud.dataflow.sdk.util.state.State;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+
+import java.util.Map;
+
+/** Interface for interacting with persistent state within {@link #onMerge}. */
+public interface MergingStateContext extends StateContext {
+  /**
+   * Analogous to {@link #access}, but across all windows which are about to be merged.
+   */
+  <StateT extends MergeableState<?, ?>> StateT mergingAccess(StateTag<StateT> address);
+
+  /**
+   * Analogous to {@link #access}, but returned as a map from each window which is
+   * about to be merged to the corresponding state.
+   */
+  public abstract <StateT extends State> Map<BoundedWindow, StateT>
+      mergingAccessInEachMergingWindow(StateTag<StateT> address);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
index 05360f79d49e3..b3235c042ab66 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
@@ -19,7 +19,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.PaneInfoCoder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
-import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
index 3ac2126fdd3e4..5f541412dc558 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
@@ -19,15 +19,12 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.util.state.MergeableState;
-import com.google.cloud.dataflow.sdk.util.state.State;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
 
 import org.joda.time.Instant;
 
 import java.io.Serializable;
 import java.util.Collection;
-import java.util.Map;
 
 import javax.annotation.Nullable;
 
@@ -41,44 +38,6 @@
  */
 public abstract class ReduceFn<K, InputT, OutputT, W extends BoundedWindow>
     implements Serializable {
-  /** Interface for interacting with persistent state. */
-  public interface StateContext {
-    /**
-     * Access the storage for the given {@code address} in the current window.
-     *
-     * <p>Never accounts for merged windows. When windows are merged, any state accessed via
-     * this method must be eagerly combined and written into the result window.
-     */
-    <StateT extends State> StateT access(StateTag<StateT> address);
-
-    /**
-     * Access the storage for the given {@code address} in all of the windows that were
-     * merged into the current window.
-     *
-     * <p>If no windows were merged, this reads and writes to just the current window.
-     * Otherwise, when windows merge we do not eagerly combine state, but rather defer the
-     * combination to reading time. Thus reads will be from all 'merged windows' for the
-     * current window, and writes will be to the designated 'writing window' for the current window.
-     */
-    <StateT extends MergeableState<?, ?>> StateT accessAcrossMergedWindows(
-        StateTag<StateT> address);
-  }
-
-  /** Interface for interacting with persistent state within {@link #onMerge}. */
-  public interface MergingStateContext extends StateContext {
-    /**
-     * Analogous to {@link #access}, but across all windows which are about to be merged.
-     */
-    <StateT extends MergeableState<?, ?>> StateT mergingAccess(StateTag<StateT> address);
-
-    /**
-     * Analogous to {@link #access}, but returned as a map from each window which is
-     * about to be merged to the corresponding state.
-     */
-    public abstract <StateT extends State> Map<BoundedWindow, StateT>
-        mergingAccessInEachMergingWindow(StateTag<StateT> address);
-  }
-
   /**
    * Interface for interacting with time.
    */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
index baa48cc7a78ce..7563814ef6484 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
@@ -20,8 +20,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.ReduceFn.MergingStateContext;
-import com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext;
 import com.google.cloud.dataflow.sdk.util.ReduceFn.Timers;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.state.MergeableState;
@@ -136,7 +134,7 @@ public Instant currentEventTime() {
   }
 
   static class StateContextImpl<W extends BoundedWindow>
-      implements ReduceFn.StateContext {
+      implements StateContext {
 
     private final ActiveWindowSet<W> activeWindows;
     private final W window;
@@ -185,8 +183,7 @@ public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
     }
   }
 
-  static class MergingStateContextImpl<W extends BoundedWindow>
-      implements ReduceFn.MergingStateContext {
+  static class MergingStateContextImpl<W extends BoundedWindow> implements MergingStateContext {
 
     private final StateContextImpl<W> delegate;
     private final Collection<W> mergingWindows;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index 5fc2bc22bab3e..83eadf337c0a7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -309,7 +309,7 @@ public void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W
               contextFactory.forMerge(toBeMerged, mergeResult);
 
           // Prefetch various state.
-          triggerRunner.prefetchForMerge(mergeResultContext.state());
+          triggerRunner.prefetchForMerge(mergeResult, toBeMerged, mergeResultContext.state());
 
           // Run the reduceFn to perform any needed merging.
           try {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateContext.java
new file mode 100644
index 0000000000000..95bf94dfb29c1
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateContext.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.util.state.MergeableState;
+import com.google.cloud.dataflow.sdk.util.state.State;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+
+/** Interface for interacting with persistent state. */
+public interface StateContext {
+  /**
+   * Access the storage for the given {@code address} in the current window.
+   *
+   * <p>Never accounts for merged windows. When windows are merged, any state accessed via
+   * this method must be eagerly combined and written into the result window.
+   */
+  <StateT extends State> StateT access(StateTag<StateT> address);
+
+  /**
+   * Access the storage for the given {@code address} in all of the windows that were
+   * merged into the current window.
+   *
+   * <p>If no windows were merged, this reads and writes to just the current window.
+   * Otherwise, when windows merge we do not eagerly combine state, but rather defer the
+   * combination to reading time. Thus reads will be from all 'merged windows' for the
+   * current window, and writes will be to the designated 'writing window' for the current window.
+   */
+  <StateT extends MergeableState<?, ?>> StateT accessAcrossMergedWindows(StateTag<StateT> address);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
index 08ba98121b809..5b6fa784e6ccf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
@@ -116,7 +116,7 @@ public void onMerge(OnMergeContext c) throws Exception {
   }
 
   @Override
-  public void prefetchOnTrigger(com.google.cloud.dataflow.sdk.util.ReduceFn.StateContext c) {
+  public void prefetchOnTrigger(StateContext c) {
     c.accessAcrossMergedWindows(bufferTag).get();
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
index 83ea415d4d1fb..308b84954973a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
@@ -20,17 +20,23 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergingTriggerInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerInfo;
+import com.google.cloud.dataflow.sdk.util.state.MergeableState;
+import com.google.cloud.dataflow.sdk.util.state.State;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.common.base.Predicate;
 import com.google.common.collect.FluentIterable;
+import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Maps;
 
 import org.joda.time.Instant;
 
+import java.util.ArrayList;
 import java.util.Collection;
+import java.util.List;
 import java.util.Map;
 
 import javax.annotation.Nullable;
@@ -46,12 +52,15 @@ public class TriggerContextFactory<W extends BoundedWindow> {
   private final WindowingStrategy<?, W> windowingStrategy;
   private StateInternals stateInternals;
   private ActiveWindowSet<W> activeWindows;
+  private final Coder<W> windowCoder;
+
 
   public TriggerContextFactory(WindowingStrategy<?, W> windowingStrategy,
       StateInternals stateInternals, ActiveWindowSet<W> activeWindows) {
     this.windowingStrategy = windowingStrategy;
     this.stateInternals = stateInternals;
     this.activeWindows = activeWindows;
+    this.windowCoder = windowingStrategy.getWindowFn().windowCoder();
   }
 
   public Trigger<W>.TriggerContext base(W window, ReduceFn.Timers timers,
@@ -75,6 +84,15 @@ public Trigger<W>.OnMergeContext createOnMergeContext(
         mergingWindows, finishedSets);
   }
 
+ public StateContext createStateContext(W window, ExecutableTrigger<W> trigger) {
+    return new StateContextImpl(window, trigger);
+  }
+
+  public MergingStateContext createMergingStateContext(
+      W window, Collection<W> mergingWindows, ExecutableTrigger<W> trigger) {
+    return new MergingStateContextImpl(new StateContextImpl(window, trigger), mergingWindows);
+  }
+
   private class TriggerInfoImpl implements Trigger.TriggerInfo<W> {
 
     protected final ExecutableTrigger<W> trigger;
@@ -246,38 +264,91 @@ public boolean apply(FinishedTriggers finishedSet) {
     }
   }
 
-  private ReduceFnContextFactory.StateContextImpl<W> triggerState(
-      W window, ExecutableTrigger<W> trigger) {
-    return new TriggerStateContextImpl(
-        activeWindows, windowingStrategy.getWindowFn().windowCoder(),
-        stateInternals, window, trigger);
-  }
+  private class StateContextImpl implements StateContext {
 
-  private class TriggerStateContextImpl
-      extends ReduceFnContextFactory.StateContextImpl<W> {
-
-    private int triggerIndex;
+    private final int triggerIndex;
+    private final StateNamespace namespace;
+    private final W window;
 
-    public TriggerStateContextImpl(ActiveWindowSet<W> activeWindows,
-        Coder<W> windowCoder, StateInternals stateInternals, W window,
+    public StateContextImpl(
+        W window,
         ExecutableTrigger<W> trigger) {
-      super(activeWindows, windowCoder, stateInternals, window);
+      this.window = window;
       this.triggerIndex = trigger.getTriggerIndex();
 
-      // Annoyingly, since we hadn't set the triggerIndex yet (we can't do it before super)
-      // This will would otherwise have incorporated 0 as the trigger index.
-      this.windowNamespace = namespaceFor(window);
+      // Must be called after setting windowCoder, window, and triggerIndex
+      this.namespace = namespaceFor(window);
     }
 
-    @Override
-    protected StateNamespace namespaceFor(W window) {
+    private StateNamespace namespaceFor(W window) {
       return StateNamespaces.windowAndTrigger(windowCoder, window, triggerIndex);
     }
+
+    @Override
+    public <StateT extends State> StateT access(StateTag<StateT> address) {
+      return stateInternals.state(namespace, address);
+    }
+
+    @Override
+    public <StateT extends MergeableState<?, ?>> StateT accessAcrossMergedWindows(
+        StateTag<StateT> address) {
+      List<StateNamespace> readNamespaces = new ArrayList<>();
+      for (W readWindow : activeWindows.readStateAddresses(window)) {
+        readNamespaces.add(namespaceFor(readWindow));
+      }
+      StateNamespace writeNamespace = namespaceFor(activeWindows.writeStateAddress(window));
+      return stateInternals.mergedState(readNamespaces, writeNamespace, address, window);
+    }
+  }
+
+  private class MergingStateContextImpl implements MergingStateContext {
+
+    private final StateContextImpl delegate;
+    private final Collection<W> mergingWindows;
+
+    public MergingStateContextImpl(StateContextImpl delegate, Collection<W> mergingWindows) {
+      this.delegate = delegate;
+      this.mergingWindows = mergingWindows;
+    }
+
+    @Override
+    public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
+      return delegate.access(address);
+    }
+
+    @Override
+    public <StorageT extends MergeableState<?, ?>> StorageT accessAcrossMergedWindows(
+        StateTag<StorageT> address) {
+      return delegate.accessAcrossMergedWindows(address);
+    }
+
+    @Override
+    public <StateT extends MergeableState<?, ?>> StateT mergingAccess(StateTag<StateT> address) {
+      List<StateNamespace> readNamespaces = new ArrayList<>();
+      for (W mergingWindow : mergingWindows) {
+        readNamespaces.add(delegate.namespaceFor(mergingWindow));
+      }
+      return stateInternals.mergedState(
+          readNamespaces, delegate.namespace, address, delegate.window);
+    }
+
+    @Override
+    public <StateT extends State> Map<BoundedWindow, StateT> mergingAccessInEachMergingWindow(
+        StateTag<StateT> address) {
+      ImmutableMap.Builder<BoundedWindow, StateT> builder = ImmutableMap.builder();
+      for (W mergingWindow : mergingWindows) {
+        StateT stateForWindow = stateInternals.state(
+            delegate.namespaceFor(mergingWindow), address);
+        builder.put(mergingWindow, stateForWindow);
+      }
+      return builder.build();
+    }
   }
 
   private class TriggerContextImpl extends Trigger<W>.TriggerContext {
 
-    private final ReduceFnContextFactory.StateContextImpl<W> state;
+    private final W window;
+    private final StateContextImpl state;
     private final ReduceFn.Timers timers;
     private final TriggerInfoImpl triggerInfo;
 
@@ -287,14 +358,15 @@ private TriggerContextImpl(
         ExecutableTrigger<W> trigger,
         FinishedTriggers finishedSet) {
       trigger.getSpec().super();
-      this.state = triggerState(window, trigger);
+      this.window = window;
+      this.state = new StateContextImpl(window, trigger);
       this.timers = new TriggerTimers(window, timers);
       this.triggerInfo = new TriggerInfoImpl(trigger, finishedSet, this);
     }
 
     @Override
     public Trigger<W>.TriggerContext forTrigger(ExecutableTrigger<W> trigger) {
-      return new TriggerContextImpl(state.window(), timers, trigger, triggerInfo.finishedSet);
+      return new TriggerContextImpl(window, timers, trigger, triggerInfo.finishedSet);
     }
 
     @Override
@@ -303,13 +375,13 @@ public TriggerInfo<W> trigger() {
     }
 
     @Override
-    public ReduceFn.StateContext state() {
+    public StateContext state() {
       return state;
     }
 
     @Override
     public W window() {
-      return state.window();
+      return window;
     }
 
     @Override
@@ -337,7 +409,8 @@ public Instant currentEventTime() {
 
   private class OnElementContextImpl extends Trigger<W>.OnElementContext {
 
-    private final ReduceFnContextFactory.StateContextImpl<W> state;
+    private final W window;
+    private final StateContextImpl state;
     private final ReduceFn.Timers timers;
     private final TriggerInfoImpl triggerInfo;
     private final Instant eventTimestamp;
@@ -349,7 +422,8 @@ private OnElementContextImpl(
         FinishedTriggers finishedSet,
         Instant eventTimestamp) {
       trigger.getSpec().super();
-      this.state = triggerState(window, trigger);
+      this.window = window;
+      this.state = new StateContextImpl(window, trigger);
       this.timers = new TriggerTimers(window, timers);
       this.triggerInfo = new TriggerInfoImpl(trigger, finishedSet, this);
       this.eventTimestamp = eventTimestamp;
@@ -364,7 +438,7 @@ public Instant eventTimestamp() {
     @Override
     public Trigger<W>.OnElementContext forTrigger(ExecutableTrigger<W> trigger) {
       return new OnElementContextImpl(
-          state.window(), timers, trigger, triggerInfo.finishedSet, eventTimestamp);
+          window, timers, trigger, triggerInfo.finishedSet, eventTimestamp);
     }
 
     @Override
@@ -373,13 +447,13 @@ public TriggerInfo<W> trigger() {
     }
 
     @Override
-    public ReduceFn.StateContext state() {
+    public StateContext state() {
       return state;
     }
 
     @Override
     public W window() {
-      return state.window();
+      return window;
     }
 
     @Override
@@ -413,7 +487,9 @@ public Instant currentEventTime() {
 
   private class OnMergeContextImpl extends Trigger<W>.OnMergeContext {
 
-    private final ReduceFnContextFactory.MergingStateContextImpl<W> state;
+    private final MergingStateContext state;
+    private final W window;
+    private final Collection<W> mergingWindows;
     private final ReduceFn.Timers timers;
     private final MergingTriggerInfoImpl triggerInfo;
 
@@ -425,8 +501,10 @@ private OnMergeContextImpl(
         Collection<W> mergingWindows,
         Map<W, FinishedTriggers> finishedSets) {
       trigger.getSpec().super();
-      this.state = new ReduceFnContextFactory.MergingStateContextImpl<>(
-          triggerState(window, trigger), mergingWindows);
+      this.mergingWindows = mergingWindows;
+      this.window = window;
+      this.state =
+          new MergingStateContextImpl(new StateContextImpl(window, trigger), mergingWindows);
       this.timers = new TriggerTimers(window, timers);
       this.triggerInfo = new MergingTriggerInfoImpl(trigger, finishedSet, this, finishedSets);
     }
@@ -434,17 +512,17 @@ private OnMergeContextImpl(
     @Override
     public Trigger<W>.OnMergeContext forTrigger(ExecutableTrigger<W> trigger) {
       return new OnMergeContextImpl(
-          state.window(), timers, trigger, triggerInfo.finishedSet,
-          state.mergingWindows(), triggerInfo.finishedSets);
+          window, timers, trigger, triggerInfo.finishedSet,
+          mergingWindows, triggerInfo.finishedSets);
     }
 
     @Override
     public Iterable<W> oldWindows() {
-      return state.mergingWindows();
+      return mergingWindows;
     }
 
     @Override
-    public ReduceFn.MergingStateContext state() {
+    public MergingStateContext state() {
       return state;
     }
 
@@ -455,7 +533,7 @@ public MergingTriggerInfo<W> trigger() {
 
     @Override
     public W window() {
-      return state.window();
+      return window;
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
index c846dac86057e..a8a61cfaf53c1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
@@ -26,6 +26,7 @@
 import com.google.common.collect.ImmutableMap;
 
 import java.util.BitSet;
+import java.util.Collection;
 import java.util.Map;
 
 /**
@@ -70,25 +71,25 @@ private FinishedTriggersBitSet readFinishedBits(ValueState<BitSet> state) {
   }
 
   /** Return true if the trigger is closed in the window corresponding to the specified state. */
-  public boolean isClosed(ReduceFn.StateContext state) {
+  public boolean isClosed(StateContext state) {
     return readFinishedBits(state.access(FINISHED_BITS_TAG)).isFinished(rootTrigger);
   }
 
-  public void prefetchForValue(ReduceFn.StateContext state) {
+  public void prefetchForValue(StateContext state) {
     if (isFinishedSetNeeded()) {
       state.access(FINISHED_BITS_TAG).get();
     }
     rootTrigger.getSpec().prefetchOnElement(state);
   }
 
-  public void prefetchOnFire(ReduceFn.StateContext state) {
+  public void prefetchOnFire(StateContext state) {
     if (isFinishedSetNeeded()) {
       state.access(FINISHED_BITS_TAG).get();
     }
     rootTrigger.getSpec().prefetchOnFire(state);
   }
 
-  public void prefetchShouldFire(ReduceFn.StateContext state) {
+  public void prefetchShouldFire(StateContext state) {
     if (isFinishedSetNeeded()) {
       state.access(FINISHED_BITS_TAG).get();
     }
@@ -108,7 +109,7 @@ public void processValue(ReduceFn<?, ?, ?, W>.ProcessValueContext c) throws Exce
     persistFinishedSet(c.state(), finishedSet);
   }
 
-  public void prefetchForMerge(ReduceFn.MergingStateContext state) {
+  public void prefetchForMerge(W window, Collection<W> mergingWindows, MergingStateContext state) {
     if (isFinishedSetNeeded()) {
       for (ValueState<?> value :
           state.mergingAccessInEachMergingWindow(FINISHED_BITS_TAG).values()) {
@@ -162,7 +163,7 @@ public void onFire(ReduceFn<?, ?, ?, W>.Context c) throws Exception {
   }
 
   private void persistFinishedSet(
-      ReduceFn.StateContext state, FinishedTriggersBitSet modifiedFinishedSet) {
+      StateContext state, FinishedTriggersBitSet modifiedFinishedSet) {
     if (!isFinishedSetNeeded()) {
       return;
     }

From 314e8dd974fb0c9eec9bfc7814d9f0c15e99d167 Mon Sep 17 00:00:00 2001
From: kirpichov <kirpichov@google.com>
Date: Mon, 8 Feb 2016 12:40:08 -0800
Subject: [PATCH 1409/1541] Allows specifying if an OffsetBasedSource supports
 dynamic splitting

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114139165
---
 .../cloud/dataflow/sdk/io/OffsetBasedSource.java   | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java
index df244f28f2cf5..d581b80ca2706 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java
@@ -187,6 +187,17 @@ public long getBytesPerOffset() {
    */
   public abstract OffsetBasedSource<T> createSourceForSubrange(long start, long end);
 
+  /**
+   * Whether this source should allow dynamic splitting of the offset ranges.
+   *
+   * <p>True by default. Override this to return false if the source cannot
+   * support dynamic splitting correctly. If this returns false,
+   * {@link OffsetBasedSource.OffsetBasedReader#splitAtFraction} will refuse all split requests.
+   */
+  public boolean allowsDynamicSplitting() {
+    return true;
+  }
+
   /**
    * A {@link Source.Reader} that implements code common to readers of all
    * {@link OffsetBasedSource}s.
@@ -288,6 +299,9 @@ public Double getFractionConsumed() {
 
     @Override
     public final synchronized OffsetBasedSource<T> splitAtFraction(double fraction) {
+      if (!getCurrentSource().allowsDynamicSplitting()) {
+        return null;
+      }
       if (rangeTracker.getStopPosition() == Long.MAX_VALUE) {
         LOG.debug(
             "Refusing to split unbounded OffsetBasedReader {} at fraction {}",

From 12dd58ca55ba3157848079813b65e89aec2c7c9b Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Mon, 8 Feb 2016 14:07:29 -0800
Subject: [PATCH 1410/1541] Create a top level GroupingTable interface and its
 factory class

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114147875
---
 .../sdk/util/common/worker/GroupingTable.java |  34 ++
 .../util/common/worker/GroupingTables.java    | 522 ++++++++++++++++++
 2 files changed, 556 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingTable.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingTables.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingTable.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingTable.java
new file mode 100644
index 0000000000000..30cc4edd7fc9e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingTable.java
@@ -0,0 +1,34 @@
+/*******************************************************************************
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+/**
+ * An interface that groups inputs to an accumulator and flushes the output.
+ */
+public interface GroupingTable<K, InputT, AccumT> {
+
+  /**
+   * Adds a pair to this table, possibly flushing some entries to output
+   * if the table is full.
+   */
+  void put(Object pair, Receiver receiver) throws Exception;
+
+  /**
+   * Flushes all entries in this table to output.
+   */
+  void flush(Receiver output) throws Exception;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingTables.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingTables.java
new file mode 100644
index 0000000000000..18a78b6679a5e
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingTables.java
@@ -0,0 +1,522 @@
+/*******************************************************************************
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import com.google.common.annotations.VisibleForTesting;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+/**
+ * Static utility methods that provide {@link GroupingTable} implementations.
+ */
+public class GroupingTables {
+  /**
+   * Returns a {@link GroupingTable} that groups inputs into a {@link List}.
+   */
+  public static <K, V> GroupingTable<K, V, List<V>> buffering(
+      GroupingKeyCreator<? super K> groupingKeyCreator,
+      PairInfo pairInfo,
+      SizeEstimator<? super K> keySizer,
+      SizeEstimator<? super V> valueSizer) {
+    return new BufferingGroupingTable<>(
+        DEFAULT_MAX_GROUPING_TABLE_BYTES, groupingKeyCreator, pairInfo, keySizer, valueSizer);
+  }
+
+  /**
+   * Returns a {@link GroupingTable} that groups inputs into a {@link List} with
+   * sampling {@link SizeEstimator SizeEstimators}.
+   */
+  public static <K, V> GroupingTable<K, V, List<V>> bufferingAndSampling(
+      GroupingKeyCreator<? super K> groupingKeyCreator,
+      PairInfo pairInfo,
+      SizeEstimator<? super K> keySizer,
+      SizeEstimator<? super V> valueSizer,
+      double sizeEstimatorSampleRate) {
+    return new BufferingGroupingTable<>(
+        DEFAULT_MAX_GROUPING_TABLE_BYTES, groupingKeyCreator, pairInfo,
+        new SamplingSizeEstimator<>(keySizer, sizeEstimatorSampleRate, 1.0),
+        new SamplingSizeEstimator<>(valueSizer, sizeEstimatorSampleRate, 1.0));
+  }
+
+  /**
+   * Returns a {@link GroupingTable} that combines inputs into a accumulator.
+   */
+  public static <K, InputT, AccumT> GroupingTable<K, InputT, AccumT> combining(
+      GroupingKeyCreator<? super K> groupingKeyCreator,
+      PairInfo pairInfo,
+      Combiner<? super K, InputT, AccumT, ?> combineFn,
+      SizeEstimator<? super K> keySizer,
+      SizeEstimator<? super AccumT> accumulatorSizer) {
+    return new CombiningGroupingTable<>(
+        DEFAULT_MAX_GROUPING_TABLE_BYTES, groupingKeyCreator, pairInfo,
+        combineFn, keySizer, accumulatorSizer);
+  }
+
+  /**
+   * Returns a {@link GroupingTable} that combines inputs into a accumulator with
+   * sampling {@link SizeEstimator SizeEstimators}.
+   */
+  public static <K, InputT, AccumT> GroupingTable<K, InputT, AccumT> combiningAndSampling(
+      GroupingKeyCreator<? super K> groupingKeyCreator,
+      PairInfo pairInfo,
+      Combiner<? super K, InputT, AccumT, ?> combineFn,
+      SizeEstimator<? super K> keySizer,
+      SizeEstimator<? super AccumT> accumulatorSizer,
+      double sizeEstimatorSampleRate) {
+    return new CombiningGroupingTable<>(
+        DEFAULT_MAX_GROUPING_TABLE_BYTES, groupingKeyCreator, pairInfo, combineFn,
+        new SamplingSizeEstimator<>(keySizer, sizeEstimatorSampleRate, 1.0),
+        new SamplingSizeEstimator<>(accumulatorSizer, sizeEstimatorSampleRate, 1.0));
+  }
+
+  /**
+   * Provides client-specific operations for grouping keys.
+   */
+  public static interface GroupingKeyCreator<K> {
+    public Object createGroupingKey(K key) throws Exception;
+  }
+
+  /**
+   * Provides client-specific operations for size estimates.
+   */
+  public static interface SizeEstimator<T> {
+    public long estimateSize(T element) throws Exception;
+  }
+
+  /**
+   * Provides client-specific operations for working with elements
+   * that are key/value or key/values pairs.
+   */
+  public interface PairInfo {
+    public Iterable<Object> getKeysFromInputPair(Object pair);
+    public Object getValueFromInputPair(Object pair);
+    public Object makeOutputPair(Object key, Object value);
+  }
+
+  /**
+   * Provides client-specific operations for combining values.
+   */
+  public interface Combiner<K, InputT, AccumT, OutputT> {
+    public AccumT createAccumulator(K key);
+    public AccumT add(K key, AccumT accumulator, InputT value);
+    public AccumT merge(K key, Iterable<AccumT> accumulators);
+    public AccumT compact(K key, AccumT accumulator);
+    public OutputT extract(K key, AccumT accumulator);
+  }
+
+  // By default, how many bytes we allow the grouping table to consume before
+  // it has to be flushed.
+  static final long DEFAULT_MAX_GROUPING_TABLE_BYTES = 100_000_000L;
+
+  // How many bytes a word in the JVM has.
+  static final int BYTES_PER_JVM_WORD = getBytesPerJvmWord();
+  /**
+   * The number of bytes of overhead to store an entry in the
+   * grouping table (a {@code HashMap<StructuralByteArray, KeyAndValues>}),
+   * ignoring the actual number of bytes in the keys and values:
+   * <ul>
+   * <li> an array element (1 word),
+   * <li> a HashMap.Entry (4 words),
+   * <li> a StructuralByteArray (1 words),
+   * <li> a backing array (guessed at 1 word for the length),
+   * <li> a KeyAndValues (2 words),
+   * <li> an ArrayList (2 words),
+   * <li> a backing array (1 word),
+   * <li> per-object overhead (JVM-specific, guessed at 2 words * 6 objects).
+   * </ul>
+   */
+  static final int PER_KEY_OVERHEAD = 24 * BYTES_PER_JVM_WORD;
+
+  /**
+   * A base class of {@link GroupingTable} that provides the implementation of
+   * {@link #put} and {@link #flush}.
+   *
+   * <p>Subclasses override {@link #createTableEntry}.
+   */
+  @VisibleForTesting
+  public abstract static class GroupingTableBase<K, InputT, AccumT>
+      implements GroupingTable<K, InputT, AccumT>{
+    // Keep the table relatively full to increase the chance of collisions.
+    private static final double TARGET_LOAD = 0.9;
+
+    private long maxSize;
+    private final GroupingKeyCreator<? super K> groupingKeyCreator;
+    private final PairInfo pairInfo;
+
+    private long size = 0;
+    private Map<Object, GroupingTableEntry<K, InputT, AccumT>> table;
+
+    private GroupingTableBase(long maxSize,
+                            GroupingKeyCreator<? super K> groupingKeyCreator,
+                            PairInfo pairInfo) {
+      this.maxSize = maxSize;
+      this.groupingKeyCreator = groupingKeyCreator;
+      this.pairInfo = pairInfo;
+      this.table = new HashMap<>();
+    }
+
+    interface GroupingTableEntry<K, InputT, AccumT> {
+      public K getKey();
+      public AccumT getValue();
+      public void add(InputT value) throws Exception;
+      public long getSize();
+      public void compact() throws Exception;
+    }
+
+    public abstract GroupingTableEntry<K, InputT, AccumT> createTableEntry(K key) throws Exception;
+
+    /**
+     * Adds a pair to this table, possibly flushing some entries to output
+     * if the table is full.
+     */
+    @SuppressWarnings("unchecked")
+    @Override
+    public void put(Object pair, Receiver receiver) throws Exception {
+      for (Object key : pairInfo.getKeysFromInputPair(pair)) {
+        put((K) key,
+            (InputT) pairInfo.getValueFromInputPair(pair),
+            receiver);
+      }
+    }
+
+    /**
+     * Adds the key and value to this table, possibly flushing some entries
+     * to output if the table is full.
+     */
+    public void put(K key, InputT value, Receiver receiver) throws Exception {
+      Object groupingKey = groupingKeyCreator.createGroupingKey(key);
+      GroupingTableEntry<K, InputT, AccumT> entry = table.get(groupingKey);
+      if (entry == null) {
+        entry = createTableEntry(key);
+        table.put(groupingKey, entry);
+        size += PER_KEY_OVERHEAD;
+      } else {
+        size -= entry.getSize();
+      }
+      entry.add(value);
+      size += entry.getSize();
+
+      if (size >= maxSize) {
+        long targetSize = (long) (TARGET_LOAD * maxSize);
+        Iterator<GroupingTableEntry<K, InputT, AccumT>> entries =
+            table.values().iterator();
+        while (size >= targetSize) {
+          if (!entries.hasNext()) {
+            // Should never happen, but sizes may be estimates...
+            size = 0;
+            break;
+          }
+          GroupingTableEntry<K, InputT, AccumT> toFlush = entries.next();
+          entries.remove();
+          size -= toFlush.getSize() + PER_KEY_OVERHEAD;
+          output(toFlush, receiver);
+        }
+      }
+    }
+
+    /**
+     * Output the given entry. Does not actually remove it from the table or
+     * update this table's size.
+     */
+    private void output(GroupingTableEntry<K, InputT, AccumT> entry, Receiver receiver)
+        throws Exception {
+      entry.compact();
+      receiver.process(pairInfo.makeOutputPair(entry.getKey(), entry.getValue()));
+    }
+
+    /**
+     * Flushes all entries in this table to output.
+     */
+    @Override
+    public void flush(Receiver output) throws Exception {
+      for (GroupingTableEntry<K, InputT, AccumT> entry : table.values()) {
+        output(entry, output);
+      }
+      table.clear();
+      size = 0;
+    }
+
+    @VisibleForTesting
+    public void setMaxSize(long maxSize) {
+      this.maxSize = maxSize;
+    }
+
+    @VisibleForTesting
+    public long size() {
+      return size;
+    }
+  }
+
+  /**
+   * A grouping table that simply buffers all inserted values in a list.
+   */
+  private static class BufferingGroupingTable<K, V> extends GroupingTableBase<K, V, List<V>> {
+
+    public final SizeEstimator<? super K> keySizer;
+    public final SizeEstimator<? super V> valueSizer;
+
+    private BufferingGroupingTable(long maxSize,
+                                  GroupingKeyCreator<? super K> groupingKeyCreator,
+                                  PairInfo pairInfo,
+                                  SizeEstimator<? super K> keySizer,
+                                  SizeEstimator<? super V> valueSizer) {
+      super(maxSize, groupingKeyCreator, pairInfo);
+      this.keySizer = keySizer;
+      this.valueSizer = valueSizer;
+    }
+
+    @Override
+    public GroupingTableEntry<K, V, List<V>> createTableEntry(final K key) throws Exception {
+      return new GroupingTableEntry<K, V, List<V>>() {
+        long size = keySizer.estimateSize(key);
+        final List<V> values = new ArrayList<>();
+
+        @Override
+        public K getKey() {
+          return key;
+        }
+
+        @Override
+        public List<V> getValue() {
+          return values;
+        }
+
+        @Override
+        public long getSize() {
+          return size;
+        }
+
+        @Override
+        public void compact() { }
+
+        @Override
+        public void add(V value) throws Exception {
+          values.add(value);
+          size += BYTES_PER_JVM_WORD + valueSizer.estimateSize(value);
+        }
+      };
+    }
+  }
+
+  /**
+   * A grouping table that uses the given combiner to combine values in place.
+   */
+  private static class CombiningGroupingTable<K, InputT, AccumT>
+      extends GroupingTableBase<K, InputT, AccumT> {
+
+    private final Combiner<? super K, InputT, AccumT, ?> combiner;
+    private final SizeEstimator<? super K> keySizer;
+    private final SizeEstimator<? super AccumT> accumulatorSizer;
+
+    private CombiningGroupingTable(long maxSize,
+                                  GroupingKeyCreator<? super K> groupingKeyCreator,
+                                  PairInfo pairInfo,
+                                  Combiner<? super K, InputT, AccumT, ?> combineFn,
+                                  SizeEstimator<? super K> keySizer,
+                                  SizeEstimator<? super AccumT> accumulatorSizer) {
+      super(maxSize, groupingKeyCreator, pairInfo);
+      this.combiner =  combineFn;
+      this.keySizer = keySizer;
+      this.accumulatorSizer = accumulatorSizer;
+    }
+
+    @Override
+    public GroupingTableEntry<K, InputT, AccumT> createTableEntry(final K key) throws Exception {
+      return new GroupingTableEntry<K, InputT, AccumT>() {
+        final long keySize = keySizer.estimateSize(key);
+        AccumT accumulator = combiner.createAccumulator(key);
+        long accumulatorSize = 0; // never used before a value is added...
+
+        @Override
+        public K getKey() {
+          return key;
+        }
+
+        @Override
+        public AccumT getValue() {
+          return accumulator;
+        }
+
+        @Override
+        public long getSize() {
+          return keySize + accumulatorSize;
+        }
+
+        @Override
+        public void compact() throws Exception {
+          AccumT newAccumulator = combiner.compact(key, accumulator);
+          if (newAccumulator != accumulator) {
+            accumulator = newAccumulator;
+            accumulatorSize = accumulatorSizer.estimateSize(newAccumulator);
+          }
+        }
+
+        @Override
+        public void add(InputT value) throws Exception {
+          accumulator = combiner.add(key, accumulator, value);
+          accumulatorSize = accumulatorSizer.estimateSize(accumulator);
+        }
+      };
+    }
+  }
+
+  /**
+   * Returns the number of bytes in a JVM word.  In case we failed to
+   * find the answer, returns 8.
+   */
+  private static int getBytesPerJvmWord() {
+    String wordSizeInBits = System.getProperty("sun.arch.data.model");
+    try {
+      return Integer.parseInt(wordSizeInBits) / 8;
+    } catch (NumberFormatException e) {
+      // The JVM word size is unknown.  Assume 64-bit.
+      return 8;
+    }
+  }
+
+  ////////////////////////////////////////////////////////////////////////////
+  // Size sampling.
+
+  /**
+   * Implements size estimation by adaptively delegating to an underlying
+   * (potentially more expensive) estimator for some elements and returning
+   * the average value for others.
+   */
+  @VisibleForTesting
+  static class SamplingSizeEstimator<T> implements SizeEstimator<T> {
+
+    /**
+     * The degree of confidence required in our expected value predictions
+     * before we allow under-sampling.
+     *
+     * <p>The value of 3.0 is a confidence interval of about 99.7% for a
+     * a high-degree-of-freedom t-distribution.
+     */
+    public static final double CONFIDENCE_INTERVAL_SIGMA = 3;
+
+    /**
+     * The desired size of our confidence interval (relative to the measured
+     * expected value).
+     *
+     * <p>The value of 0.25 is plus or minus 25%.
+     */
+    public static final double CONFIDENCE_INTERVAL_SIZE = 0.25;
+
+    /**
+     * Default number of elements that must be measured before elements are skipped.
+     */
+    public static final long DEFAULT_MIN_SAMPLED = 20;
+
+    private final SizeEstimator<T> underlying;
+    private final double minSampleRate;
+    private final double maxSampleRate;
+    private final long minSampled;
+    private final Random random;
+
+    private long totalElements = 0;
+    private long sampledElements = 0;
+    private long sampledSum = 0;
+    private double sampledSumSquares = 0;
+    private long estimate;
+
+    private long nextSample = 0;
+
+    private SamplingSizeEstimator(
+        SizeEstimator<T> underlying,
+        double minSampleRate,
+        double maxSampleRate) {
+      this(underlying, minSampleRate, maxSampleRate, DEFAULT_MIN_SAMPLED, new Random());
+    }
+
+    @VisibleForTesting
+    SamplingSizeEstimator(SizeEstimator<T> underlying,
+                                 double minSampleRate,
+                                 double maxSampleRate,
+                                 long minSampled,
+                                 Random random) {
+      this.underlying = underlying;
+      this.minSampleRate = minSampleRate;
+      this.maxSampleRate = maxSampleRate;
+      this.minSampled = minSampled;
+      this.random = random;
+    }
+
+    @Override
+    public long estimateSize(T element) throws Exception {
+      if (sampleNow()) {
+        return recordSample(underlying.estimateSize(element));
+      } else {
+        return estimate;
+      }
+    }
+
+    private boolean sampleNow() {
+      totalElements++;
+      return --nextSample < 0;
+    }
+
+    private long recordSample(long value) {
+      sampledElements += 1;
+      sampledSum += value;
+      sampledSumSquares += value * value;
+      estimate = (long) Math.ceil(sampledSum / sampledElements);
+      long target = desiredSampleSize();
+      if (sampledElements < minSampled || sampledElements < target) {
+        // Sample immediately.
+        nextSample = 0;
+      } else {
+        double rate = cap(
+            minSampleRate,
+            maxSampleRate,
+            Math.max(1.0 / (totalElements - minSampled + 1), // slowly ramp down
+                     target / (double) totalElements));      // "future" target
+        // Uses the geometric distribution to return the likely distance between
+        // successive independent trials of a fixed probability p. This gives the
+        // same uniform distribution of branching on Math.random() < p, but with
+        // one random number generation per success rather than one
+        // per test, which can be a significant savings if p is small.
+        nextSample = rate == 1.0
+            ? 0
+            : (long) Math.floor(Math.log(random.nextDouble()) / Math.log(1 - rate));
+      }
+      return value;
+    }
+
+    private static final double cap(double min, double max, double value) {
+      return Math.min(max, Math.max(min, value));
+    }
+
+    private long desiredSampleSize() {
+      // We have no a-priori information on the actual distribution of data
+      // sizes, so compute our desired sample as if it were normal.
+      // Yes this formula is unstable for small stddev, but we only care about large stddev.
+      double mean = sampledSum / (double) sampledElements;
+      double sumSquareDiff =
+          (sampledSumSquares - (2 * mean * sampledSum) + (sampledElements * mean * mean));
+      double stddev = Math.sqrt(sumSquareDiff / (sampledElements - 1));
+      double sqrtDesiredSamples =
+          (CONFIDENCE_INTERVAL_SIGMA * stddev) / (CONFIDENCE_INTERVAL_SIZE * mean);
+      return (long) Math.ceil(sqrtDesiredSamples * sqrtDesiredSamples);
+    }
+  }
+}
+

From a2d8cd5e719cb60e6dea32b35e10c6caabb3a923 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 8 Feb 2016 15:47:00 -0800
Subject: [PATCH 1411/1541] Experimental Cloud Bigtable sink

This commit adds an experimental sink for Google Cloud Bigtable.

The sink is experimental and may break in future versions, though
we believe the user-facing API is stable.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114158575
---
 checkstyle.xml                                |   2 +-
 pom.xml                                       |   1 +
 sdk/pom.xml                                   |  31 ++
 .../dataflow/sdk/io/bigtable/BigtableIO.java  | 431 ++++++++++++++++++
 .../sdk/io/bigtable/BigtableService.java      |  63 +++
 .../sdk/io/bigtable/BigtableServiceImpl.java  | 146 ++++++
 .../cloud/dataflow/sdk/util/ApiSurface.java   |   2 +
 .../sdk/io/bigtable/BigtableIOTest.java       | 273 +++++++++++
 8 files changed, 948 insertions(+), 1 deletion(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableIO.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableService.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableServiceImpl.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableIOTest.java

diff --git a/checkstyle.xml b/checkstyle.xml
index 5d7c563dfb14c..f38dd74b34753 100644
--- a/checkstyle.xml
+++ b/checkstyle.xml
@@ -110,7 +110,7 @@ page at http://checkstyle.sourceforge.net/config.html -->
       <!-- Checks for out of order import statements. -->
 
       <property name="severity" value="error"/>
-      <property name="groups" value="com.google,android,com,Jama,junit,net,org,sun,java,javax"/>
+      <property name="groups" value="com.google,android,com,io,Jama,junit,net,org,sun,java,javax"/>
       <!-- This ensures that static imports go first. -->
       <property name="option" value="top"/>
       <property name="tokens" value="STATIC_IMPORT, IMPORT"/>
diff --git a/pom.xml b/pom.xml
index 78b82b6d65b8a..03d2a8ddc3af5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -69,6 +69,7 @@
     <!-- If updating dependencies, please update any relevant javadoc offlineLinks -->
     <avro.version>1.7.7</avro.version>
     <bigquery.version>v2-rev248-1.21.0</bigquery.version>
+    <bigtable.version>0.2.3</bigtable.version>
     <dataflow.version>v1b3-rev14-1.21.0</dataflow.version>
     <datastore.version>v1beta2-rev1-4.0.0</datastore.version>
     <google-clients.version>1.21.0</google-clients.version>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 2526c10b65821..1d7458052cd69 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -299,6 +299,7 @@
               <shadeTestJar>true</shadeTestJar>
               <artifactSet>
                 <includes>
+                  <include>com.google.cloud.bigtable:bigtable-client-core</include>
                   <include>com.google.guava:guava</include>
                 </includes>
               </artifactSet>
@@ -324,6 +325,17 @@
                   <pattern>com.google.thirdparty</pattern>
                   <shadedPattern>com.google.cloud.dataflow.sdk.repackaged.com.google.thirdparty</shadedPattern>
                 </relocation>
+                <relocation>
+                  <pattern>com.google.cloud.bigtable</pattern>
+                  <shadedPattern>com.google.cloud.dataflow.sdk.repackaged.com.google.cloud.bigtable</shadedPattern>
+                  <excludes>
+                    <exclude>com.google.cloud.bigtable.config.BigtableOptions*</exclude>
+                    <exclude>com.google.cloud.bigtable.config.CredentialOptions*</exclude>
+                    <exclude>com.google.cloud.bigtable.config.RetryOptions*</exclude>
+                    <exclude>com.google.cloud.bigtable.grpc.BigtableClusterName</exclude>
+                    <exclude>com.google.cloud.bigtable.grpc.BigtableTableName</exclude>
+                  </excludes>
+                </relocation>
               </relocations>
             </configuration>
           </execution>
@@ -341,6 +353,7 @@
               <finalName>${project.artifactId}-bundled-${project.version}</finalName>
               <artifactSet>
                 <excludes>
+                  <exclude>com.google.cloud.bigtable:bigtable-client-core</exclude>
                   <exclude>com.google.guava:guava</exclude>
                 </excludes>
               </artifactSet>
@@ -432,6 +445,24 @@
       <version>0.5.160127</version>
     </dependency>
 
+    <dependency>
+      <groupId>io.grpc</groupId>
+      <artifactId>grpc-all</artifactId>
+      <version>0.12.0</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.cloud.bigtable</groupId>
+      <artifactId>bigtable-protos</artifactId>
+      <version>${bigtable.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.cloud.bigtable</groupId>
+      <artifactId>bigtable-client-core</artifactId>
+      <version>${bigtable.version}</version>
+    </dependency>
+
     <dependency>
       <groupId>com.google.api-client</groupId>
       <artifactId>google-api-client</artifactId>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableIO.java
new file mode 100644
index 0000000000000..379450a880848
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableIO.java
@@ -0,0 +1,431 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io.bigtable;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import com.google.bigtable.v1.Mutation;
+import com.google.cloud.bigtable.config.BigtableOptions;
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
+import com.google.cloud.dataflow.sdk.io.Sink.WriteOperation;
+import com.google.cloud.dataflow.sdk.io.Sink.Writer;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.DataflowReleaseInfo;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PDone;
+import com.google.common.base.MoreObjects;
+import com.google.common.util.concurrent.FutureCallback;
+import com.google.common.util.concurrent.Futures;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.Empty;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.concurrent.ConcurrentLinkedQueue;
+
+import javax.annotation.Nullable;
+
+/**
+ * A bounded sink for Google Cloud Bigtable.
+ *
+ * <p>For more information, see the online documentation at
+ * <a href="https://cloud.google.com/bigtable/">Google Cloud Bigtable</a>.
+ *
+ * <h3>Writing to Cloud Bigtable</h3>
+ *
+ * <p>The Bigtable sink executes a set of row mutations on a single table. It takes as input a
+ * {@link PCollection PCollection&lt;KV&lt;ByteString, Iterable&lt;Mutation&gt;&gt;&gt;}, where the
+ * {@link ByteString} is the key of the row being mutated, and each {@link Mutation} represents an
+ * idempotent transformation to that row.
+ *
+ * <p>To configure a Cloud Bigtable sink, you must supply a table id and a {@link BigtableOptions}
+ * or builder configured with the project and other information necessary to identify the
+ * Bigtable cluster, for example:
+ *
+ * <pre>{@code
+ * BigtableOptions.Builder optionsBuilder =
+ *     new BigtableOptions.Builder()
+ *         .setProjectId("project")
+ *         .setClusterId("cluster")
+ *         .setZoneId("zone");
+ *
+ * PCollection<KV<ByteString, Iterable<Mutation>>> data = ...;
+ *
+ * data.apply("write",
+ *     BigtableIO.write()
+ *         .withBigtableOptions(optionsBuilder)
+ *         .withTableId("table"));
+ * }</pre>
+ *
+ * <h3>Experimental</h3>
+ *
+ * <p>This connector for Cloud Bigtable is considered experimental and may break or receive
+ * backwards-incompatible changes in future versions of the Cloud Dataflow SDK. Cloud Bigtable is
+ * in Beta, and thus it may introduce breaking changes in future revisions of its service or APIs.
+ *
+ * <h3>Permissions</h3>
+ *
+ * <p>Permission requirements depend on the {@link PipelineRunner} that is used to execute the
+ * Dataflow job. Please refer to the documentation of corresponding
+ * {@link PipelineRunner PipelineRunners} for more details.
+ */
+@Experimental
+public class BigtableIO {
+  private static final Logger logger = LoggerFactory.getLogger(BigtableIO.class);
+
+  /**
+   * Creates an uninitialized {@link BigtableIO.Write}. Before use, the {@code Write} must be
+   * initialized with a
+   * {@link BigtableIO.Write#withBigtableOptions(BigtableOptions) BigtableOptions} that specifies
+   * the destination Cloud Bigtable cluster, and a {@link BigtableIO.Write#withTableId table} that
+   * specifies which table to write.
+   */
+  @Experimental
+  public static Write write() {
+    return new Write(null, "", null);
+  }
+
+  /**
+   * A {@link PTransform} that writes to Google Cloud Bigtable. See the class-level Javadoc on
+   * {@link BigtableIO} for more information.
+   *
+   * @see BigtableIO
+   */
+  @Experimental
+  public static class Write
+      extends PTransform<PCollection<KV<ByteString, Iterable<Mutation>>>, PDone> {
+    /**
+     * Used to define the Cloud Bigtable cluster and any options for the networking layer.
+     * Cannot actually be {@code null} at validation time, but may start out {@code null} while
+     * source is being built.
+     */
+    @Nullable private final BigtableOptions options;
+    private final String tableId;
+    @Nullable private final BigtableService bigtableService;
+
+    private Write(
+        @Nullable BigtableOptions options,
+        String tableId,
+        @Nullable BigtableService bigtableService) {
+      this.options = options;
+      this.tableId = checkNotNull(tableId, "tableId");
+      this.bigtableService = bigtableService;
+    }
+
+    /**
+     * Returns a new {@link BigtableIO.Write} that will write to the Cloud Bigtable cluster
+     * indicated by the given options, and using any other specified customizations.
+     *
+     * <p>Does not modify this object.
+     */
+    public Write withBigtableOptions(BigtableOptions options) {
+      checkNotNull(options, "options");
+      return withBigtableOptions(options.toBuilder());
+    }
+
+    /**
+     * Returns a new {@link BigtableIO.Write} that will write to the Cloud Bigtable cluster
+     * indicated by the given options, and using any other specified customizations.
+     *
+     * <p>Clones the given {@link BigtableOptions.Builder} is cloned so that any further changes
+     * will have no effect on the returned {@link BigtableIO.Write}.
+     *
+     * <p>Does not modify this object.
+     */
+    public Write withBigtableOptions(BigtableOptions.Builder optionsBuilder) {
+      checkNotNull(optionsBuilder, "optionsBuilder");
+      // TODO: is there a better way to clone a Builder? Want it to be immune from user changes.
+      BigtableOptions.Builder clonedBuilder = optionsBuilder.build().toBuilder();
+      BigtableOptions optionsWithAgent = clonedBuilder.setUserAgent(getUserAgent()).build();
+      return new Write(optionsWithAgent, tableId, bigtableService);
+    }
+
+    /**
+     * Returns a new {@link BigtableIO.Write} that will write to the specified table.
+     *
+     * <p>Does not modify this object.
+     */
+    public Write withTableId(String tableId) {
+      checkNotNull(tableId, "tableId");
+      return new Write(options, tableId, bigtableService);
+    }
+
+    /**
+     * Returns the Google Cloud Bigtable cluster being written to, and other parameters.
+     */
+    public BigtableOptions getBigtableOptions() {
+      return options;
+    }
+
+    /**
+     * Returns the table being written to.
+     */
+    public String getTableId() {
+      return tableId;
+    }
+
+    @Override
+    public PDone apply(PCollection<KV<ByteString, Iterable<Mutation>>> input) {
+      Sink sink = new Sink(tableId, getBigtableService());
+      return input.apply(com.google.cloud.dataflow.sdk.io.Write.to(sink));
+    }
+
+    @Override
+    public void validate(PCollection<KV<ByteString, Iterable<Mutation>>> input) {
+      checkArgument(options != null, "BigtableOptions not specified");
+      checkArgument(!tableId.isEmpty(), "Table ID not specified");
+      try {
+        checkArgument(
+            getBigtableService().tableExists(tableId), "Table %s does not exist", tableId);
+      } catch (IOException e) {
+        logger.warn("Error checking whether table {} exists; proceeding.", tableId, e);
+      }
+    }
+
+    /**
+     * Returns a new {@link BigtableIO.Write} that will write using the given Cloud Bigtable
+     * service implementation.
+     *
+     * <p>This is used for testing.
+     *
+     * <p>Does not modify this object.
+     */
+    Write withBigtableService(BigtableService bigtableService) {
+      checkNotNull(bigtableService, "bigtableService");
+      return new Write(options, tableId, bigtableService);
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(Write.class)
+          .add("options", options)
+          .add("tableId", tableId)
+          .toString();
+    }
+
+    /**
+     * Helper function that either returns the mock Bigtable service supplied by
+     * {@link #withBigtableService} or creates and returns an implementation that talks to
+     * {@code Cloud Bigtable}.
+     */
+    private BigtableService getBigtableService() {
+      if (bigtableService != null) {
+        return bigtableService;
+      }
+      return new BigtableServiceImpl(options);
+    }
+  }
+
+  private static class Sink
+      extends com.google.cloud.dataflow.sdk.io.Sink<KV<ByteString, Iterable<Mutation>>> {
+
+    public Sink(String tableId, BigtableService bigtableService) {
+      this.tableId = checkNotNull(tableId, "tableId");
+      this.bigtableService = checkNotNull(bigtableService, "bigtableService");
+    }
+
+    public String getTableId() {
+      return tableId;
+    }
+
+    public BigtableService getBigtableService() {
+      return bigtableService;
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(Sink.class)
+          .add("bigtableService", bigtableService)
+          .add("tableId", tableId)
+          .toString();
+    }
+
+    ///////////////////////////////////////////////////////////////////////////////
+    private final String tableId;
+    private final BigtableService bigtableService;
+
+    @Override
+    public WriteOperation<KV<ByteString, Iterable<Mutation>>, Long> createWriteOperation(
+        PipelineOptions options) {
+      return new BigtableWriteOperation(this);
+    }
+
+    /** Does nothing, as it is redundant with {@link Write#validate}. */
+    @Override
+    public void validate(PipelineOptions options) {}
+  }
+
+  private static class BigtableWriteOperation
+      extends WriteOperation<KV<ByteString, Iterable<Mutation>>, Long> {
+    private final Sink sink;
+
+    public BigtableWriteOperation(Sink sink) {
+      this.sink = sink;
+    }
+
+    @Override
+    public Writer<KV<ByteString, Iterable<Mutation>>, Long> createWriter(PipelineOptions options)
+        throws Exception {
+      return new BigtableWriter(this);
+    }
+
+    @Override
+    public void initialize(PipelineOptions options) {}
+
+    @Override
+    public void finalize(Iterable<Long> writerResults, PipelineOptions options) {
+      long count = 0;
+      for (Long value : writerResults) {
+        value += count;
+      }
+      logger.debug("Wrote {} elements to BigtableIO.Sink {}", sink);
+    }
+
+    @Override
+    public Sink getSink() {
+      return sink;
+    }
+
+    @Override
+    public Coder<Long> getWriterResultCoder() {
+      return VarLongCoder.of();
+    }
+  }
+
+  private static class BigtableWriter extends Writer<KV<ByteString, Iterable<Mutation>>, Long> {
+    private final BigtableWriteOperation writeOperation;
+    private final Sink sink;
+    private BigtableService.Writer bigtableWriter;
+    private long recordsWritten;
+    private final ConcurrentLinkedQueue<BigtableWriteException> failures;
+
+    public BigtableWriter(BigtableWriteOperation writeOperation) {
+      this.writeOperation = writeOperation;
+      this.sink = writeOperation.getSink();
+      this.failures = new ConcurrentLinkedQueue<>();
+    }
+
+    @Override
+    public void open(String uId) throws Exception {
+      bigtableWriter = sink.getBigtableService().openForWriting(sink.getTableId());
+      recordsWritten = 0;
+    }
+
+    /**
+     * If any write has asynchronously failed, fail the bundle with a useful error.
+     */
+    private void checkForFailures() throws IOException {
+      // Note that this function is never called by multiple threads and is the only place that
+      // we remove from failures, so this code is safe.
+      if (failures.isEmpty()) {
+        return;
+      }
+
+      StringBuilder logEntry = new StringBuilder();
+      int i = 0;
+      for (; i < 10 && !failures.isEmpty(); ++i) {
+        BigtableWriteException exc = failures.remove();
+        logEntry.append("\n").append(exc.getMessage());
+        if (exc.getCause() != null) {
+          logEntry.append(": ").append(exc.getCause().getMessage());
+        }
+      }
+      String message =
+          String.format(
+              "At least %d errors occurred writing to Bigtable. First %d errors: %s",
+              i + failures.size(),
+              i,
+              logEntry.toString());
+      logger.error(message);
+      throw new IOException(message);
+    }
+
+    @Override
+    public void write(KV<ByteString, Iterable<Mutation>> rowMutations) throws Exception {
+      checkForFailures();
+      Futures.addCallback(
+          bigtableWriter.writeRecord(rowMutations), new WriteExceptionCallback(rowMutations));
+      ++recordsWritten;
+    }
+
+    @Override
+    public Long close() throws Exception {
+      bigtableWriter.close();
+      bigtableWriter = null;
+      checkForFailures();
+      logger.info("Wrote {} records", recordsWritten);
+      return recordsWritten;
+    }
+
+    @Override
+    public WriteOperation<KV<ByteString, Iterable<Mutation>>, Long> getWriteOperation() {
+      return writeOperation;
+    }
+
+    private class WriteExceptionCallback implements FutureCallback<Empty> {
+      private final KV<ByteString, Iterable<Mutation>> value;
+
+      public WriteExceptionCallback(KV<ByteString, Iterable<Mutation>> value) {
+        this.value = value;
+      }
+
+      @Override
+      public void onFailure(Throwable cause) {
+        failures.add(new BigtableWriteException(value, cause));
+      }
+
+      @Override
+      public void onSuccess(Empty produced) {}
+    }
+  }
+
+  /**
+   * An exception that puts information about the failed record being written in its message.
+   */
+  static class BigtableWriteException extends IOException {
+    public BigtableWriteException(KV<ByteString, Iterable<Mutation>> record, Throwable cause) {
+      super(
+          String.format(
+              "Error mutating row %s with mutations %s",
+              record.getKey().toStringUtf8(),
+              record.getValue()),
+          cause);
+    }
+  }
+
+  /**
+   * A helper function to produce a Cloud Bigtable user agent string.
+   */
+  private static String getUserAgent() {
+    String javaVersion = System.getProperty("java.specification.version");
+    DataflowReleaseInfo info = DataflowReleaseInfo.getReleaseInfo();
+    return String.format(
+        "%s/%s (%s); %s",
+        info.getName(),
+        info.getVersion(),
+        javaVersion,
+        "0.2.3" /* TODO get Bigtable client version directly from jar. */);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableService.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableService.java
new file mode 100644
index 0000000000000..03ddd3a6aebc8
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableService.java
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.io.bigtable;
+
+import com.google.bigtable.v1.Mutation;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.util.concurrent.ListenableFuture;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.Empty;
+
+import java.io.IOException;
+import java.io.Serializable;
+
+/**
+ * An interface for real or fake implementations of Cloud Bigtable.
+ */
+interface BigtableService extends Serializable {
+
+  /**
+   * The interface of a class that can write to Cloud Bigtable.
+   */
+  interface Writer {
+    /**
+     * Writes a single row transaction to Cloud Bigtable. The key of the {@code record} is the
+     * row key to be mutated and the iterable of mutations represent the changes to be made to the
+     * row.
+     *
+     * @throws IOException if there is an error submitting the write.
+     */
+    ListenableFuture<Empty> writeRecord(KV<ByteString, Iterable<Mutation>> record)
+        throws IOException;
+
+    /**
+     * Closes the writer.
+     *
+     * @throws IOException if any writes did not succeed
+     */
+    void close() throws IOException;
+  }
+
+  /**
+   * Returns {@code true} if the table with the give name exists.
+   */
+  boolean tableExists(String tableId) throws IOException;
+
+  /**
+   * Returns a {@link Writer} that will write to the specified table.
+   */
+  Writer openForWriting(String tableId) throws IOException;
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableServiceImpl.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableServiceImpl.java
new file mode 100644
index 0000000000000..196e64c241b4b
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableServiceImpl.java
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.io.bigtable;
+
+import com.google.bigtable.admin.table.v1.GetTableRequest;
+import com.google.bigtable.v1.MutateRowRequest;
+import com.google.bigtable.v1.Mutation;
+import com.google.cloud.bigtable.config.BigtableOptions;
+import com.google.cloud.bigtable.grpc.BigtableSession;
+import com.google.cloud.bigtable.grpc.async.AsyncExecutor;
+import com.google.cloud.bigtable.grpc.async.HeapSizeManager;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.base.MoreObjects;
+import com.google.common.util.concurrent.ListenableFuture;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.Empty;
+
+import io.grpc.Status.Code;
+import io.grpc.StatusRuntimeException;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+
+/**
+ * An implementation of {@link BigtableService} that actually communicates with the Cloud Bigtable
+ * service.
+ */
+class BigtableServiceImpl implements BigtableService {
+  private static final Logger logger = LoggerFactory.getLogger(BigtableService.class);
+
+  public BigtableServiceImpl(BigtableOptions options) {
+    this.options = options;
+  }
+
+  private final BigtableOptions options;
+
+  @Override
+  public BigtableWriterImpl openForWriting(String tableId) throws IOException {
+    BigtableSession session = new BigtableSession(options);
+    String tableName = options.getClusterName().toTableNameStr(tableId);
+    return new BigtableWriterImpl(session, tableName);
+  }
+
+  @Override
+  public boolean tableExists(String tableId) throws IOException {
+    if (!BigtableSession.isAlpnProviderEnabled()) {
+      logger.info(
+          "Skipping existence check for table {} (BigtableOptions {}) because ALPN is not"
+              + " configured.",
+          tableId,
+          options);
+      return true;
+    }
+
+    try (BigtableSession session = new BigtableSession(options)) {
+      GetTableRequest getTable =
+          GetTableRequest.newBuilder()
+              .setName(options.getClusterName().toTableNameStr(tableId))
+              .build();
+      session.getTableAdminClient().getTable(getTable);
+      return true;
+    } catch (StatusRuntimeException e) {
+      if (e.getStatus().getCode() == Code.NOT_FOUND) {
+        return false;
+      }
+      String message =
+          String.format(
+              "Error checking whether table %s (BigtableOptions %s) exists", tableId, options);
+      logger.error(message, e);
+      throw new IOException(message, e);
+    }
+  }
+
+  private class BigtableWriterImpl implements Writer {
+    private BigtableSession session;
+    private AsyncExecutor executor;
+    private final MutateRowRequest.Builder partialBuilder;
+
+    public BigtableWriterImpl(BigtableSession session, String tableName) {
+      this.session = session;
+      this.executor =
+          new AsyncExecutor(
+              session.getDataClient(),
+              new HeapSizeManager(
+                  AsyncExecutor.ASYNC_MUTATOR_MAX_MEMORY_DEFAULT,
+                  AsyncExecutor.MAX_INFLIGHT_RPCS_DEFAULT));
+
+      partialBuilder = MutateRowRequest.newBuilder().setTableName(tableName);
+    }
+
+    @Override
+    public void close() throws IOException {
+      try {
+        if (executor != null) {
+          executor.flush();
+          executor = null;
+        }
+      } finally {
+        if (session != null) {
+          session.close();
+          session = null;
+        }
+      }
+    }
+
+    @Override
+    public ListenableFuture<Empty> writeRecord(KV<ByteString, Iterable<Mutation>> record)
+        throws IOException {
+      MutateRowRequest r =
+          partialBuilder
+              .clone()
+              .setRowKey(record.getKey())
+              .addAllMutations(record.getValue())
+              .build();
+      try {
+        return executor.mutateRowAsync(r);
+      } catch (InterruptedException e) {
+        Thread.currentThread().interrupt();
+        throw new IOException("Write interrupted", e);
+      }
+    }
+  }
+
+  @Override
+  public String toString() {
+    return MoreObjects
+        .toStringHelper(BigtableServiceImpl.class)
+        .add("options", options)
+        .toString();
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
index 20ba6698f1887..a9892ca03737d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
@@ -551,6 +551,8 @@ public static ApiSurface getSdkApiSurface() throws IOException {
         .pruningPrefix("java")
         .pruningPrefix("com.google.api")
         .pruningPrefix("com.google.auth")
+        .pruningPrefix("com.google.bigtable")
+        .pruningPrefix("com.google.cloud.bigtable")
         .pruningPrefix("com.google.protobuf")
         .pruningPrefix("org.joda.time")
         .pruningPrefix("org.apache.avro")
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableIOTest.java
new file mode 100644
index 0000000000000..e3a9940a2f360
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableIOTest.java
@@ -0,0 +1,273 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io.bigtable;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import com.google.bigtable.v1.Mutation;
+import com.google.bigtable.v1.Mutation.SetCell;
+import com.google.cloud.bigtable.config.BigtableOptions;
+import com.google.cloud.dataflow.sdk.Pipeline.PipelineExecutionException;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.collect.ImmutableList;
+import com.google.common.util.concurrent.Futures;
+import com.google.common.util.concurrent.ListenableFuture;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.Empty;
+
+import org.hamcrest.Matchers;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.annotation.Nullable;
+
+/**
+ * Unit tests for {@link BigtableIO}.
+ */
+@RunWith(JUnit4.class)
+public class BigtableIOTest {
+  @Rule public ExpectedException thrown = ExpectedException.none();
+  @Rule public ExpectedLogs logged = ExpectedLogs.none(BigtableIO.class);
+
+  /**
+   * These tests requires a static instance of the {@link FakeBigtableService} because the writers
+   * go through a serialization step when executing the test and would not affect passed-in objects
+   * otherwise.
+   */
+  private static FakeBigtableService service;
+  private static final BigtableOptions BIGTABLE_OPTIONS =
+      new BigtableOptions.Builder()
+          .setProjectId("project")
+          .setClusterId("cluster")
+          .setZoneId("zone")
+          .build();
+  private Coder<KV<ByteString, Iterable<Mutation>>> bigtableCoder;
+  private static final TypeDescriptor<KV<ByteString, Iterable<Mutation>>> BIGTABLE_WRITE_TYPE =
+      new TypeDescriptor<KV<ByteString, Iterable<Mutation>>>() {};
+
+  @Before
+  public void setup() throws Exception {
+    service = new FakeBigtableService();
+    bigtableCoder = TestPipeline.create().getCoderRegistry().getCoder(BIGTABLE_WRITE_TYPE);
+  }
+
+  @Test
+  public void testWriteBuildsCorrectly() {
+    BigtableIO.Write write =
+        BigtableIO.write().withBigtableOptions(BIGTABLE_OPTIONS).withTableId("table");
+    assertEquals("table", write.getTableId());
+    assertEquals("project", write.getBigtableOptions().getProjectId());
+    assertEquals("zone", write.getBigtableOptions().getZoneId());
+    assertEquals("cluster", write.getBigtableOptions().getClusterId());
+  }
+
+  @Test
+  public void testWriteBuildsCorrectlyInDifferentOrder() {
+    BigtableIO.Write write =
+        BigtableIO.write().withTableId("table").withBigtableOptions(BIGTABLE_OPTIONS);
+    assertEquals("cluster", write.getBigtableOptions().getClusterId());
+    assertEquals("project", write.getBigtableOptions().getProjectId());
+    assertEquals("zone", write.getBigtableOptions().getZoneId());
+    assertEquals("table", write.getTableId());
+  }
+
+  @Test
+  public void testWriteValidationFailsMissingTable() {
+    BigtableIO.Write write = BigtableIO.write().withBigtableOptions(BIGTABLE_OPTIONS);
+
+    thrown.expect(IllegalArgumentException.class);
+
+    write.validate(null);
+  }
+
+  @Test
+  public void testSinkValidationFailsMissingOptions() {
+    BigtableIO.Write write = BigtableIO.write().withTableId("table");
+
+    thrown.expect(IllegalArgumentException.class);
+
+    write.validate(null);
+  }
+
+  /** Helper function to make a single row mutation to be written. */
+  private static KV<ByteString, Iterable<Mutation>> makeRowWithSetCell(String key, String value) {
+    ByteString rowKey = ByteString.copyFromUtf8(key);
+    Iterable<Mutation> mutations =
+        ImmutableList.of(
+            Mutation.newBuilder()
+                .setSetCell(SetCell.newBuilder().setValue(ByteString.copyFromUtf8(value)))
+                .build());
+    return KV.of(rowKey, mutations);
+  }
+
+  /** Helper function to make a single bad row mutation (no set cell). */
+  private static KV<ByteString, Iterable<Mutation>> makeBadRow(String key) {
+    Iterable<Mutation> mutations = ImmutableList.of(Mutation.newBuilder().build());
+    return KV.of(ByteString.copyFromUtf8(key), mutations);
+  }
+
+  /** Tests that a record gets written to the service and messages are logged. */
+  @Test
+  public void testWriting() throws Exception {
+    final String table = "table";
+    final String key = "key";
+    final String value = "value";
+
+    service.createTable(table);
+
+    BigtableIO.Write write =
+        BigtableIO.write()
+            .withBigtableOptions(BIGTABLE_OPTIONS)
+            .withTableId(table)
+            .withBigtableService(service);
+
+    TestPipeline p = TestPipeline.create();
+    p.apply("single row", Create.of(makeRowWithSetCell(key, value)).withCoder(bigtableCoder))
+        .apply("write", write);
+    p.run();
+
+    logged.verifyInfo("Wrote 1 records");
+
+    assertEquals(1, service.tables.size());
+    assertNotNull(service.getTable(table));
+    Map<ByteString, ByteString> rows = service.getTable(table);
+    assertEquals(1, rows.size());
+    assertEquals(ByteString.copyFromUtf8(value), rows.get(ByteString.copyFromUtf8(key)));
+  }
+
+  /** Tests that when writing to a non-existent table, the write fails. */
+  @Test
+  public void testWritingFailsTableDoesNotExist() throws Exception {
+    final String table = "TEST-TABLE";
+
+    BigtableIO.Write write =
+        BigtableIO.write()
+            .withBigtableOptions(BIGTABLE_OPTIONS)
+            .withTableId(table)
+            .withBigtableService(service);
+
+    PCollection<KV<ByteString, Iterable<Mutation>>> emptyInput =
+        TestPipeline.create().apply(Create.<KV<ByteString, Iterable<Mutation>>>of());
+
+    // Exception will be thrown by write.validate() when write is applied.
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage(String.format("Table %s does not exist", table));
+
+    emptyInput.apply("write", write);
+  }
+
+  /** Tests that when writing an element fails, the write fails. */
+  @Test
+  public void testWritingFailsBadElement() throws Exception {
+    final String table = "TEST-TABLE";
+    final String key = "KEY";
+    service.createTable(table);
+
+    BigtableIO.Write write =
+        BigtableIO.write()
+            .withBigtableOptions(BIGTABLE_OPTIONS)
+            .withTableId(table)
+            .withBigtableService(service);
+
+    TestPipeline p = TestPipeline.create();
+    p.apply(Create.of(makeBadRow(key)).withCoder(bigtableCoder)).apply(write);
+
+    thrown.expect(PipelineExecutionException.class);
+    thrown.expectCause(Matchers.<Throwable>instanceOf(IOException.class));
+    thrown.expectMessage("At least 1 errors occurred writing to Bigtable. First 1 errors:");
+    thrown.expectMessage("Error mutating row " + key + " with mutations []: cell value missing");
+    p.run();
+  }
+
+  /**
+   * A {@link BigtableService} implementation that stores tables and their contents in memory.
+   */
+  private static class FakeBigtableService implements BigtableService {
+    private final Map<String, Map<ByteString, ByteString>> tables = new HashMap<>();
+
+    @Nullable
+    public Map<ByteString, ByteString> getTable(String tableId) {
+      return tables.get(tableId);
+    }
+
+    public void createTable(String tableId) {
+      tables.put(tableId, new HashMap<ByteString, ByteString>());
+    }
+
+    @Override
+    public boolean tableExists(String tableId) {
+      return tables.containsKey(tableId);
+    }
+
+    @Override
+    public FakeBigtableWriter openForWriting(String tableId) {
+      checkArgument(tableExists(tableId), "Table %s does not exist", tableId);
+      return new FakeBigtableWriter(tableId);
+    }
+  }
+
+  /**
+   * A {@link BigtableService.Writer} implementation that writes to the static instance of
+   * {@link FakeBigtableService} stored in {@link #service}.
+   *
+   * <p>This writer only supports {@link Mutation Mutations} that consist only of {@link SetCell}
+   * entries. The column family in the {@link SetCell} is ignored; only the value is used.
+   *
+   * <p>When no {@link SetCell} is provided, the write will fail and this will be exposed via an
+   * exception on the returned {@link ListenableFuture}.
+   */
+  private static class FakeBigtableWriter implements BigtableService.Writer {
+    private final String tableId;
+
+    public FakeBigtableWriter(String tableId) {
+      this.tableId = tableId;
+    }
+
+    @Override
+    public ListenableFuture<Empty> writeRecord(KV<ByteString, Iterable<Mutation>> record) {
+      Map<ByteString, ByteString> table = service.getTable(tableId);
+      ByteString key = record.getKey();
+      for (Mutation m : record.getValue()) {
+        SetCell cell = m.getSetCell();
+        if (cell.getValue().isEmpty()) {
+          return Futures.immediateFailedCheckedFuture(new IOException("cell value missing"));
+        }
+        table.put(key, cell.getValue());
+      }
+      return Futures.immediateFuture(Empty.getDefaultInstance());
+    }
+
+    @Override
+    public void close() {}
+  }
+}

From a1be25667298a17afbfc7cbc4d67f150b0df3a35 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 8 Feb 2016 18:45:20 -0800
Subject: [PATCH 1412/1541] Tighten BigtableIO's API surface exposure

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114173876
---
 .../java/com/google/cloud/dataflow/sdk/util/ApiSurface.java  | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
index a9892ca03737d..7c0e7bdfca5d8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
@@ -551,8 +551,9 @@ public static ApiSurface getSdkApiSurface() throws IOException {
         .pruningPrefix("java")
         .pruningPrefix("com.google.api")
         .pruningPrefix("com.google.auth")
-        .pruningPrefix("com.google.bigtable")
-        .pruningPrefix("com.google.cloud.bigtable")
+        .pruningPrefix("com.google.bigtable.v1")
+        .pruningPrefix("com.google.cloud.bigtable.config")
+        .pruningPrefix("com.google.cloud.bigtable.grpc.Bigtable*Name")
         .pruningPrefix("com.google.protobuf")
         .pruningPrefix("org.joda.time")
         .pruningPrefix("org.apache.avro")

From f087da6d7ccafea5d6d23e109209b916d13f5a77 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 8 Feb 2016 21:11:04 -0800
Subject: [PATCH 1413/1541] Separate Timers interface from ReduceFn

Prior to this change, the definition of a trigger
as a state machine referenced ReduceFn.Timers. However,
triggers (and other possible uses of timers) can and
should be defined independently from ReduceFn. The
Timers interface is actually already independent.
This change removes the extraneous coupling.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114180206
---
 .../sdk/annotations/Experimental.java         |  3 +
 .../cloud/dataflow/sdk/util/ReduceFn.java     | 38 ------------
 .../sdk/util/ReduceFnContextFactory.java      |  3 +-
 .../cloud/dataflow/sdk/util/Timers.java       | 60 +++++++++++++++++++
 .../sdk/util/TriggerContextFactory.java       | 24 ++++----
 .../dataflow/sdk/util/TriggerTester.java      |  2 +-
 6 files changed, 77 insertions(+), 53 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Timers.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java
index f094442ec3c08..cac2aa8435db0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java
@@ -71,6 +71,9 @@ public enum Kind {
     /** State-related experimental APIs. */
     STATE,
 
+    /** Timer-related experimental APIs. */
+    TIMERS,
+
     /** Experimental APIs related to customizing the output time for computed values. */
     OUTPUT_TIME
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
index 5f541412dc558..91fd3c494160c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
@@ -17,7 +17,6 @@
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.util.state.MergeableState;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
 
@@ -26,8 +25,6 @@
 import java.io.Serializable;
 import java.util.Collection;
 
-import javax.annotation.Nullable;
-
 /**
  * Specification for processing to happen after elements have been grouped by key.
  *
@@ -38,41 +35,6 @@
  */
 public abstract class ReduceFn<K, InputT, OutputT, W extends BoundedWindow>
     implements Serializable {
-  /**
-   * Interface for interacting with time.
-   */
-  public interface Timers {
-    /**
-     * Sets a timer to fire when the watermark or processing time is beyond the given timestamp.
-     * Timers are not guaranteed to fire immediately, but will be delivered at some time afterwards.
-     *
-     * <p>As with {@link StateContext}, timers are implicitly scoped to the current window. All
-     * timer firings for a window will be received, but the implementation should choose to ignore
-     * those that are not applicable.
-     *
-     * @param timestamp the time at which the trigger’s {@link Trigger#onTimer} callback should
-     *        execute
-     * @param timeDomain the domain that the {@code timestamp} applies to
-     */
-    public abstract void setTimer(Instant timestamp, TimeDomain timeDomain);
-
-    /**
-     * Removes the timer set in this trigger context for the given {@code window}, {@code timestmap}
-     * and {@code timeDomain}.
-     */
-    public abstract void deleteTimer(Instant timestamp, TimeDomain timeDomain);
-
-    /** Returns the current processing time. */
-    public abstract Instant currentProcessingTime();
-
-    /** Returns the current synchronized processing time or {@code null} if unknown. */
-    @Nullable
-    public abstract Instant currentSynchronizedProcessingTime();
-
-    /** Returns the current event time or {@code null} if unknown. */
-    @Nullable
-    public abstract Instant currentEventTime();
-  }
 
   /** Information accessible to all the processing methods in this {@code ReduceFn}. */
   public abstract class Context {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
index 7563814ef6484..92c23e7132ad3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
@@ -20,7 +20,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.ReduceFn.Timers;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.state.MergeableState;
 import com.google.cloud.dataflow.sdk.util.state.State;
@@ -96,7 +95,7 @@ public ReduceFn<K, InputT, OutputT, W>.OnMergeContext forMerge(
   }
 
 
-  private class TimersImpl implements ReduceFn.Timers {
+  private class TimersImpl implements Timers {
 
     private final StateNamespace namespace;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Timers.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Timers.java
new file mode 100644
index 0000000000000..7d4b4f2abe573
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Timers.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+
+import org.joda.time.Instant;
+
+import javax.annotation.Nullable;
+
+/**
+ * Interface for interacting with time.
+ */
+@Experimental(Experimental.Kind.TIMERS)
+public interface Timers {
+  /**
+   * Sets a timer to fire when the event time watermark, the current processing time, or
+   * the synchronized processing time watermark surpasses a given timestamp.
+   *
+   * <p>See {@link TimeDomain} for details on the time domains available.
+   *
+   * <p>Timers are not guaranteed to fire immediately, but will be delivered at some time
+   * afterwards.
+   *
+   * <p>An implementation of {@link Timers} implicitly scopes timers that are set - they may
+   * be scoped to a key and window, or a key, window, and trigger, etc.
+   *
+   * @param timestamp the time at which the timer should be delivered
+   * @param timeDomain the domain that the {@code timestamp} applies to
+   */
+  public abstract void setTimer(Instant timestamp, TimeDomain timeDomain);
+
+  /** Removes the timer set in this context for the {@code timestmap} and {@code timeDomain}. */
+  public abstract void deleteTimer(Instant timestamp, TimeDomain timeDomain);
+
+  /** Returns the current processing time. */
+  public abstract Instant currentProcessingTime();
+
+  /** Returns the current synchronized processing time or {@code null} if unknown. */
+  @Nullable
+  public abstract Instant currentSynchronizedProcessingTime();
+
+  /** Returns the current event time or {@code null} if unknown. */
+  @Nullable
+  public abstract Instant currentEventTime();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
index 308b84954973a..61c064a10fbe4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
@@ -63,13 +63,13 @@ public TriggerContextFactory(WindowingStrategy<?, W> windowingStrategy,
     this.windowCoder = windowingStrategy.getWindowFn().windowCoder();
   }
 
-  public Trigger<W>.TriggerContext base(W window, ReduceFn.Timers timers,
+  public Trigger<W>.TriggerContext base(W window, Timers timers,
       ExecutableTrigger<W> rootTrigger, FinishedTriggers finishedSet) {
     return new TriggerContextImpl(window, timers, rootTrigger, finishedSet);
   }
 
   public Trigger<W>.OnElementContext createOnElementContext(
-      W window, ReduceFn.Timers timers, Instant elementTimestamp,
+      W window, Timers timers, Instant elementTimestamp,
       ExecutableTrigger<W> rootTrigger, FinishedTriggers finishedSet) {
     return new OnElementContextImpl(
         window, timers, rootTrigger, finishedSet,
@@ -77,7 +77,7 @@ public Trigger<W>.OnElementContext createOnElementContext(
   }
 
   public Trigger<W>.OnMergeContext createOnMergeContext(
-      W window, ReduceFn.Timers timers, Collection<W> mergingWindows,
+      W window, Timers timers, Collection<W> mergingWindows,
       ExecutableTrigger<W> rootTrigger, FinishedTriggers finishedSet,
       Map<W, FinishedTriggers> finishedSets) {
     return new OnMergeContextImpl(window, timers, rootTrigger, finishedSet,
@@ -175,12 +175,12 @@ public void setFinished(boolean finished, int subTriggerIndex) {
     }
   }
 
-  private class TriggerTimers implements ReduceFn.Timers {
+  private class TriggerTimers implements Timers {
 
-    private final ReduceFn.Timers timers;
+    private final Timers timers;
     private final W window;
 
-    public TriggerTimers(W window, ReduceFn.Timers timers) {
+    public TriggerTimers(W window, Timers timers) {
       this.timers = timers;
       this.window = window;
     }
@@ -349,12 +349,12 @@ private class TriggerContextImpl extends Trigger<W>.TriggerContext {
 
     private final W window;
     private final StateContextImpl state;
-    private final ReduceFn.Timers timers;
+    private final Timers timers;
     private final TriggerInfoImpl triggerInfo;
 
     private TriggerContextImpl(
         W window,
-        ReduceFn.Timers timers,
+        Timers timers,
         ExecutableTrigger<W> trigger,
         FinishedTriggers finishedSet) {
       trigger.getSpec().super();
@@ -411,13 +411,13 @@ private class OnElementContextImpl extends Trigger<W>.OnElementContext {
 
     private final W window;
     private final StateContextImpl state;
-    private final ReduceFn.Timers timers;
+    private final Timers timers;
     private final TriggerInfoImpl triggerInfo;
     private final Instant eventTimestamp;
 
     private OnElementContextImpl(
         W window,
-        ReduceFn.Timers timers,
+        Timers timers,
         ExecutableTrigger<W> trigger,
         FinishedTriggers finishedSet,
         Instant eventTimestamp) {
@@ -490,12 +490,12 @@ private class OnMergeContextImpl extends Trigger<W>.OnMergeContext {
     private final MergingStateContext state;
     private final W window;
     private final Collection<W> mergingWindows;
-    private final ReduceFn.Timers timers;
+    private final Timers timers;
     private final MergingTriggerInfoImpl triggerInfo;
 
     private OnMergeContextImpl(
         W window,
-        ReduceFn.Timers timers,
+        Timers timers,
         ExecutableTrigger<W> trigger,
         FinishedTriggers finishedSet,
         Collection<W> mergingWindows,
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 0c362e6438ea0..4fdd2d305a91a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -544,7 +544,7 @@ public void advanceSynchronizedProcessingTime(Instant newSynchronizedProcessingT
     }
   }
 
-  private class TestTimers implements ReduceFn.Timers {
+  private class TestTimers implements Timers {
     private final StateNamespace namespace;
 
     public TestTimers(StateNamespace namespace) {

From 546ee4f8b49c1f1e658507f574ce78cb86ef7f55 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 8 Feb 2016 21:58:54 -0800
Subject: [PATCH 1414/1541] Merge TimeTrigger into its only subclass

Since AfterWatermark is no longer a TimeTrigger, there is only one
subclass of TimeTrigger, and it is AfterDelayFromFirstElement.

This CL has no behavioral change, but only adjusts implementation details
of experimental features that have public visibility due to package
and class hierarchy organization.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114182277
---
 .../windowing/AfterDelayFromFirstElement.java | 189 ++++++++++++++-
 .../sdk/transforms/windowing/TimeTrigger.java | 223 ------------------
 .../windowing/AfterProcessingTimeTest.java    |   3 +-
 .../transforms/windowing/TimeTriggerTest.java |  94 --------
 4 files changed, 188 insertions(+), 321 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
index 1e2e4cc59eaa8..1eb84d1a2c5e1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
@@ -16,23 +16,45 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.coders.InstantCoder;
+import com.google.cloud.dataflow.sdk.transforms.Min;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementContext;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeContext;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.util.MergingStateContext;
 import com.google.cloud.dataflow.sdk.util.StateContext;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.cloud.dataflow.sdk.util.state.StateTags;
+import com.google.common.collect.ImmutableList;
 
+import org.joda.time.Duration;
 import org.joda.time.Instant;
 
 import java.util.List;
+import java.util.Objects;
 
 import javax.annotation.Nullable;
 
 /**
  * A base class for triggers that happen after a processing time delay from the arrival
  * of the first element in a pane.
+ *
+ * <p>This class is for internal use only and may change at any time.
  */
-abstract class AfterDelayFromFirstElement<W extends BoundedWindow> extends TimeTrigger<W> {
+@Experimental(Experimental.Kind.TRIGGER)
+public abstract class AfterDelayFromFirstElement<W extends BoundedWindow> extends OnceTrigger<W> {
+
+  protected static final List<SerializableFunction<Instant, Instant>> IDENTITY =
+      ImmutableList.<SerializableFunction<Instant, Instant>>of();
+
+  protected static final StateTag<CombiningValueState<Instant, Instant>> DELAYED_UNTIL_TAG =
+      StateTags.makeSystemTagInternal(StateTags.combiningValueFromInputInternal(
+          "delayed", InstantCoder.of(), Min.MinFn.<Instant>naturalOrder()));
 
   /**
    * To complete an implementation, return the desired time from the TriggerContext.
@@ -40,11 +62,28 @@ abstract class AfterDelayFromFirstElement<W extends BoundedWindow> extends TimeT
   @Nullable
   public abstract Instant getCurrentTime(Trigger<W>.TriggerContext context);
 
+  /**
+   * To complete an implementation, return a new instance like this one, but incorporating
+   * the provided timestamp mapping functions. Generally should be used by calling the
+   * constructor of this class from the constructor of the subclass.
+   */
+  protected abstract AfterDelayFromFirstElement<W> newWith(
+      List<SerializableFunction<Instant, Instant>> transform);
+
+  /**
+   * A list of timestampMappers m1, m2, m3, ... m_n considered to be composed in sequence. The
+   * overall mapping for an instance `instance` is `m_n(... m3(m2(m1(instant))`,
+   * implemented via #computeTargetTimestamp
+   */
+  protected final List<SerializableFunction<Instant, Instant>> timestampMappers;
+
   private final TimeDomain timeDomain;
 
   public AfterDelayFromFirstElement(
-      TimeDomain timeDomain, List<SerializableFunction<Instant, Instant>> timestampMappers) {
-    super(timestampMappers);
+      TimeDomain timeDomain,
+      List<SerializableFunction<Instant, Instant>> timestampMappers) {
+    super(null);
+    this.timestampMappers = timestampMappers;
     this.timeDomain = timeDomain;
   }
 
@@ -52,6 +91,64 @@ private Instant getTargetTimestamp(OnElementContext c) {
     return computeTargetTimestamp(c.currentProcessingTime());
   }
 
+  /**
+   * Aligns timestamps to the smallest multiple of {@code size} since the {@code offset} greater
+   * than the timestamp.
+   *
+   * <p>TODO: Consider sharing this with FixedWindows, and bring over the equivalent of
+   * CalendarWindows.
+   */
+  public AfterDelayFromFirstElement<W> alignedTo(final Duration size, final Instant offset) {
+    return newWith(new AlignFn(size, offset));
+  }
+
+  /**
+   * Aligns the time to be the smallest multiple of {@code size} greater than the timestamp
+   * since the epoch.
+   */
+  public AfterDelayFromFirstElement<W> alignedTo(final Duration size) {
+    return alignedTo(size, new Instant(0));
+  }
+
+  /**
+   * Adds some delay to the original target time.
+   *
+   * @param delay the delay to add
+   * @return An updated time trigger that will wait the additional time before firing.
+   */
+  public AfterDelayFromFirstElement<W> plusDelayOf(final Duration delay) {
+    return newWith(new DelayFn(delay));
+  }
+
+  /**
+   * @deprecated This will be removed in the next major version. Please use only
+   *             {@link #plusDelayOf} and {@link #alignedTo}.
+   */
+  @Deprecated
+  public OnceTrigger<W> mappedTo(SerializableFunction<Instant, Instant> timestampMapper) {
+    return newWith(timestampMapper);
+  }
+
+  @Override
+  public boolean isCompatible(Trigger<?> other) {
+    if (!getClass().equals(other.getClass())) {
+      return false;
+    }
+
+    AfterDelayFromFirstElement<?> that = (AfterDelayFromFirstElement<?>) other;
+    return this.timestampMappers.equals(that.timestampMappers);
+  }
+
+
+  private AfterDelayFromFirstElement<W> newWith(
+      SerializableFunction<Instant, Instant> timestampMapper) {
+    return newWith(
+        ImmutableList.<SerializableFunction<Instant, Instant>>builder()
+            .addAll(timestampMappers)
+            .add(timestampMapper)
+            .build());
+  }
+
   @Override
   public void prefetchOnElement(StateContext state) {
     state.access(DELAYED_UNTIL_TAG).get();
@@ -124,4 +221,90 @@ && getCurrentTime(context) != null
   protected void onOnlyFiring(Trigger<W>.TriggerContext context) throws Exception {
     clear(context);
   }
+
+  protected Instant computeTargetTimestamp(Instant time) {
+    Instant result = time;
+    for (SerializableFunction<Instant, Instant> timestampMapper : timestampMappers) {
+      result = timestampMapper.apply(result);
+    }
+    return result;
+  }
+
+  /**
+   * A {@link SerializableFunction} to delay the timestamp at which this triggers fires.
+   */
+  private static final class DelayFn implements SerializableFunction<Instant, Instant> {
+    private final Duration delay;
+
+    public DelayFn(Duration delay) {
+      this.delay = delay;
+    }
+
+    @Override
+    public Instant apply(Instant input) {
+      return input.plus(delay);
+    }
+
+    @Override
+    public boolean equals(Object object) {
+      if (object == this) {
+        return true;
+      }
+
+      if (!(object instanceof DelayFn)) {
+        return false;
+      }
+
+      return this.delay.equals(((DelayFn) object).delay);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(delay);
+    }
+  }
+
+  /**
+   * A {@link SerializableFunction} to align an instant to the nearest interval boundary.
+   */
+  static final class AlignFn implements SerializableFunction<Instant, Instant> {
+    private final Duration size;
+    private final Instant offset;
+
+
+    /**
+     * Aligns timestamps to the smallest multiple of {@code size} since the {@code offset} greater
+     * than the timestamp.
+     */
+    public AlignFn(Duration size, Instant offset) {
+      this.size = size;
+      this.offset = offset;
+    }
+
+    @Override
+    public Instant apply(Instant point) {
+      long millisSinceStart = new Duration(offset, point).getMillis() % size.getMillis();
+      return millisSinceStart == 0 ? point : point.plus(size).minus(millisSinceStart);
+    }
+
+    @Override
+    public boolean equals(Object object) {
+      if (object == this) {
+        return true;
+      }
+
+      if (!(object instanceof AlignFn)) {
+        return false;
+      }
+
+      AlignFn other = (AlignFn) object;
+      return other.size.equals(this.size)
+          && other.offset.equals(this.offset);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(size, offset);
+    }
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
deleted file mode 100644
index 0263c859008a6..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTrigger.java
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.coders.InstantCoder;
-import com.google.cloud.dataflow.sdk.transforms.Min;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.cloud.dataflow.sdk.util.state.StateTags;
-import com.google.common.collect.ImmutableList;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-import java.util.List;
-import java.util.Objects;
-
-/**
- * Support for manipulating the time at which time-based {@link Trigger}s fire.
- *
- * @param <W> {@link BoundedWindow} subclass used to represent the windows used.
- */
-@Experimental(Experimental.Kind.TRIGGER)
-public abstract class TimeTrigger<W extends BoundedWindow> extends OnceTrigger<W> {
-
-  protected static final StateTag<CombiningValueState<Instant, Instant>> DELAYED_UNTIL_TAG =
-      StateTags.makeSystemTagInternal(StateTags.combiningValueFromInputInternal(
-          "delayed", InstantCoder.of(), Min.MinFn.<Instant>naturalOrder()));
-
-  protected static final List<SerializableFunction<Instant, Instant>> IDENTITY;
-  static {
-    IDENTITY = ImmutableList.<SerializableFunction<Instant, Instant>>of();
-  }
-
-  /**
-   * A list of timestampMappers m1, m2, m3, ... m_n considered to be composed in sequence. The
-   * overall mapping for an instance `instance` is `m_n(... m3(m2(m1(instant))`,
-   * implemented via #computeTargetTimestamp
-   */
-  protected final List<SerializableFunction<Instant, Instant>> timestampMappers;
-
-  protected TimeTrigger(
-      List<SerializableFunction<Instant, Instant>> timestampMappers) {
-    super(null);
-    this.timestampMappers = timestampMappers;
-  }
-
-  protected Instant computeTargetTimestamp(Instant time) {
-    Instant result = time;
-    for (SerializableFunction<Instant, Instant> timestampMapper : timestampMappers) {
-      result = timestampMapper.apply(result);
-    }
-    return result;
-  }
-
-  /**
-   * Adds some delay to the original target time.
-   *
-   * @param delay the delay to add
-   * @return An updated time trigger that will wait the additional time before firing.
-   */
-  public TimeTrigger<W> plusDelayOf(final Duration delay) {
-    return newWith(new DelayFn(delay));
-  }
-
-  /**
-   * Aligns timestamps to the smallest multiple of {@code size} since the {@code offset} greater
-   * than the timestamp.
-   *
-   * <p>TODO: Consider sharing this with FixedWindows, and bring over the equivalent of
-   * CalendarWindows.
-   */
-  public TimeTrigger<W> alignedTo(final Duration size, final Instant offset) {
-    return newWith(new AlignFn(size, offset));
-  }
-
-  /**
-   * Aligns the time to be the smallest multiple of {@code size} greater than the timestamp
-   * since the epoch.
-   */
-  public TimeTrigger<W> alignedTo(final Duration size) {
-    return alignedTo(size, new Instant(0));
-  }
-
-  /**
-   * Adjust the time at which the trigger will fire.
-   *
-   * <p>The {@code timestampMapper} function must have the following properties for all values
-   * {@code a} and {@code b}:
-   *
-   * <ul>
-   *   <li> Deterministic: If {@code a = b} then {@code timestampMapper(a) = timestampMapper(b)}
-   *   <li> Monotonicity: If {@code a < b} then {@code timestampMapper(a) <= timestampMapper(b)}
-   * </ul>
-   *
-   * @param timestampMapper Function that will be invoked on the proposed trigger time to determine
-   *        the time at which the trigger should actually fire.
-   */
-  public TimeTrigger<W> mappedTo(SerializableFunction<Instant, Instant> timestampMapper) {
-    return newWith(timestampMapper);
-  }
-
-  @Override
-  public boolean isCompatible(Trigger<?> other) {
-    if (!getClass().equals(other.getClass())) {
-      return false;
-    }
-
-    TimeTrigger<?> that = (TimeTrigger<?>) other;
-    return this.timestampMappers.equals(that.timestampMappers);
-  }
-
-  private TimeTrigger<W> newWith(SerializableFunction<Instant, Instant> timestampMapper) {
-    return newWith(ImmutableList.<SerializableFunction<Instant, Instant>>builder()
-        .addAll(timestampMappers)
-        .add(timestampMapper)
-        .build());
-  }
-
-  /**
-   * Method to create an updated version of this {@code TimeTrigger} modified to use the specified
-   * {@code transform}.
-   *
-   * @param transform The new transform to apply to target times.
-   * @return a new {@code TimeTrigger}.
-   */
-  protected abstract TimeTrigger<W> newWith(List<SerializableFunction<Instant, Instant>> transform);
-
-  /**
-   * A {@link SerializableFunction} to delay the timestamp at which this triggers fires.
-   */
-  private static final class DelayFn implements SerializableFunction<Instant, Instant> {
-    private final Duration delay;
-
-    public DelayFn(Duration delay) {
-      this.delay = delay;
-    }
-
-    @Override
-    public Instant apply(Instant input) {
-      return input.plus(delay);
-    }
-
-    @Override
-    public boolean equals(Object object) {
-      if (object == this) {
-        return true;
-      }
-
-      if (!(object instanceof DelayFn)) {
-        return false;
-      }
-
-      return this.delay.equals(((DelayFn) object).delay);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(delay);
-    }
-  }
-
-  /**
-   * A {@link SerializableFunction} to align an instant to the nearest interval boundary.
-   */
-  private static final class AlignFn implements SerializableFunction<Instant, Instant> {
-    private final Duration size;
-    private final Instant offset;
-
-
-    /**
-     * Aligns timestamps to the smallest multiple of {@code size} since the {@code offset} greater
-     * than the timestamp.
-     */
-    public AlignFn(Duration size, Instant offset) {
-      this.size = size;
-      this.offset = offset;
-    }
-
-    @Override
-    public Instant apply(Instant point) {
-      long millisSinceStart = new Duration(offset, point).getMillis() % size.getMillis();
-      return millisSinceStart == 0 ? point : point.plus(size).minus(millisSinceStart);
-    }
-
-    @Override
-    public boolean equals(Object object) {
-      if (object == this) {
-        return true;
-      }
-
-      if (!(object instanceof AlignFn)) {
-        return false;
-      }
-
-      AlignFn other = (AlignFn) object;
-      return other.size.equals(this.size)
-          && other.offset.equals(this.offset);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(size, offset);
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
index 0e35fe221f7cc..a3bb3c372eabc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTimeTest.java
@@ -20,6 +20,7 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.util.TriggerTester;
 import com.google.cloud.dataflow.sdk.util.TriggerTester.SimpleTriggerTester;
 
@@ -135,7 +136,7 @@ public void testFireDeadline() throws Exception {
 
   @Test
   public void testContinuation() throws Exception {
-    TimeTrigger<?> firstElementPlus1 =
+    OnceTrigger<?> firstElementPlus1 =
         AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.standardHours(1));
     assertEquals(
         new AfterSynchronizedProcessingTime<>(),
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
deleted file mode 100644
index b15a6037d7415..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/TimeTriggerTest.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import static org.junit.Assert.assertEquals;
-
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.Collections;
-import java.util.List;
-
-/**
- * Tests for the various time operations in {@link TimeTrigger}.
- */
-@RunWith(JUnit4.class)
-public class TimeTriggerTest {
-  @Test
-  public void testAlignTo() {
-    TimeTrigger<?> size10 = new TestTimeTrigger().alignedTo(new Duration(10));
-    TimeTrigger<?> size10offset5 =
-        new TestTimeTrigger().alignedTo(new Duration(10), new Instant(5));
-
-    assertEquals(new Instant(100), size10.computeTargetTimestamp(new Instant(100)));
-    assertEquals(new Instant(110), size10.computeTargetTimestamp(new Instant(105)));
-    assertEquals(new Instant(105), size10offset5.computeTargetTimestamp(new Instant(105)));
-    assertEquals(new Instant(115), size10offset5.computeTargetTimestamp(new Instant(110)));
-  }
-
-  private static class TestTimeTrigger extends TimeTrigger<IntervalWindow> {
-
-    private TestTimeTrigger() {
-      this(Collections.<SerializableFunction<Instant, Instant>>emptyList());
-    }
-
-    private TestTimeTrigger(List<SerializableFunction<Instant, Instant>> timestampMappers) {
-      super(timestampMappers);
-    }
-
-    @Override
-    public Instant computeTargetTimestamp(Instant time) {
-      return super.computeTargetTimestamp(time);
-    }
-
-    @Override
-    protected TestTimeTrigger newWith(List<SerializableFunction<Instant, Instant>> transform) {
-      return new TestTimeTrigger(transform);
-    }
-
-    @Override
-    public void onElement(OnElementContext c) throws Exception { }
-
-    @Override
-    public void onMerge(OnMergeContext c) throws Exception { }
-
-    @Override
-    protected Trigger<IntervalWindow> getContinuationTrigger(
-        List<Trigger<IntervalWindow>> continuationTriggers) {
-      return null;
-    }
-
-    @Override
-    public Instant getWatermarkThatGuaranteesFiring(IntervalWindow window) {
-      return null;
-    }
-
-    @Override
-    public boolean shouldFire(TriggerContext context) throws Exception {
-      return false;
-    }
-
-    @Override
-    public void onOnlyFiring(TriggerContext context) throws Exception { }
-  }
-}

From d58d7da34799ef5f6e822a6db30bd9c92695f6fe Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 8 Feb 2016 23:11:16 -0800
Subject: [PATCH 1415/1541] ApiSurface: add debug messages

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114185275
---
 .../cloud/dataflow/sdk/util/ApiSurface.java   | 72 ++++++++++++++++++-
 1 file changed, 69 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
index 7c0e7bdfca5d8..7a9c87733b2b8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
@@ -29,6 +29,9 @@
 import com.google.common.reflect.Parameter;
 import com.google.common.reflect.TypeToken;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import java.io.IOException;
 import java.lang.annotation.Annotation;
 import java.lang.reflect.Constructor;
@@ -73,11 +76,13 @@
  */
 @SuppressWarnings("rawtypes")
 public class ApiSurface {
+  private static Logger logger = LoggerFactory.getLogger(ApiSurface.class);
 
   /**
    * Returns an empty {@link ApiSurface}.
    */
   public static ApiSurface empty() {
+    logger.debug("Returning an empty ApiSurface");
     return new ApiSurface(Collections.<Class<?>>emptySet(), Collections.<Pattern>emptySet());
   }
 
@@ -103,13 +108,14 @@ public ApiSurface includingPackage(String packageName) throws IOException {
     ClassPath classPath = ClassPath.from(ClassLoader.getSystemClassLoader());
 
     Set<Class<?>> newRootClasses = Sets.newHashSet();
-    newRootClasses.addAll(rootClasses);
     for (ClassInfo classInfo : classPath.getTopLevelClassesRecursive(packageName)) {
       Class clazz = classInfo.load();
       if (exposed(clazz.getModifiers())) {
         newRootClasses.add(clazz);
       }
     }
+    logger.debug("Including package {} and subpackages: {}", packageName, newRootClasses);
+    newRootClasses.addAll(rootClasses);
 
     return new ApiSurface(newRootClasses, patternsToPrune);
   }
@@ -119,8 +125,9 @@ public ApiSurface includingPackage(String packageName) throws IOException {
    */
   public ApiSurface includingClass(Class<?> clazz) {
     Set<Class<?>> newRootClasses = Sets.newHashSet();
-    newRootClasses.addAll(rootClasses);
+    logger.debug("Including class {}", clazz);
     newRootClasses.add(clazz);
+    newRootClasses.addAll(rootClasses);
     return new ApiSurface(newRootClasses, patternsToPrune);
   }
 
@@ -354,6 +361,8 @@ private void visit(Type type) {
    * See {@link #addExposedTypes(Type, Class)}.
    */
   private void addExposedTypes(TypeToken type, Class<?> cause) {
+    logger.debug(
+        "Adding exposed types from {}, which is the type in type token {}", type.getType(), type);
     addExposedTypes(type.getType(), cause);
   }
 
@@ -364,14 +373,19 @@ private void addExposedTypes(TypeToken type, Class<?> cause) {
    */
   private void addExposedTypes(Type type, Class<?> cause) {
     if (type instanceof TypeVariable) {
+      logger.debug("Adding exposed types from {}, which is a type variable", type);
       addExposedTypes((TypeVariable) type, cause);
     } else if (type instanceof WildcardType) {
+      logger.debug("Adding exposed types from {}, which is a wildcard type", type);
       addExposedTypes((WildcardType) type, cause);
     } else if (type instanceof GenericArrayType) {
+      logger.debug("Adding exposed types from {}, which is a generic array type", type);
       addExposedTypes((GenericArrayType) type, cause);
     } else if (type instanceof ParameterizedType) {
+      logger.debug("Adding exposed types from {}, which is a parameterized type", type);
       addExposedTypes((ParameterizedType) type, cause);
     } else if (type instanceof Class) {
+      logger.debug("Adding exposed types from {}, which is a class", type);
       addExposedTypes((Class) type, cause);
     } else {
       throw new IllegalArgumentException("Unknown implementation of Type");
@@ -389,6 +403,7 @@ private void addExposedTypes(TypeVariable type, Class<?> cause) {
     }
     visit(type);
     for (Type bound : type.getBounds()) {
+      logger.debug("Adding exposed types from {}, which is a type bound on {}", bound, type);
       addExposedTypes(bound, cause);
     }
   }
@@ -400,9 +415,17 @@ private void addExposedTypes(TypeVariable type, Class<?> cause) {
   private void addExposedTypes(WildcardType type, Class<?> cause) {
     visit(type);
     for (Type lowerBound : type.getLowerBounds()) {
+      logger.debug(
+          "Adding exposed types from {}, which is a type lower bound on wildcard type {}",
+          lowerBound,
+          type);
       addExposedTypes(lowerBound, cause);
     }
     for (Type upperBound : type.getUpperBounds()) {
+      logger.debug(
+          "Adding exposed types from {}, which is a type upper bound on wildcard type {}",
+          upperBound,
+          type);
       addExposedTypes(upperBound, cause);
     }
   }
@@ -417,6 +440,10 @@ private void addExposedTypes(GenericArrayType type, Class<?> cause) {
       return;
     }
     visit(type);
+    logger.debug(
+        "Adding exposed types from {}, which is the component type on generic array type {}",
+        type.getGenericComponentType(),
+        type);
     addExposedTypes(type.getGenericComponentType(), cause);
   }
 
@@ -441,8 +468,16 @@ private void addExposedTypes(ParameterizedType type, Class<?> cause) {
     // The type parameters themselves may not be pruned,
     // for example with List<MyApiType> probably the
     // standard List is pruned, but MyApiType is not.
+    logger.debug(
+        "Adding exposed types from {}, which is the raw type on parameterized type {}",
+        type.getRawType(),
+        type);
     addExposedTypes(type.getRawType(), cause);
     for (Type typeArg : type.getActualTypeArguments()) {
+      logger.debug(
+          "Adding exposed types from {}, which is a type argument on parameterized type {}",
+          typeArg,
+          type);
       addExposedTypes(typeArg, cause);
     }
   }
@@ -467,20 +502,29 @@ private void addExposedTypes(Class<?> clazz, Class<?> cause) {
     TypeToken<?> token = TypeToken.of(clazz);
     for (TypeToken<?> superType : token.getTypes()) {
       if (!superType.equals(token)) {
+        logger.debug(
+            "Adding exposed types from {}, which is a super type token on {}", superType, clazz);
         addExposedTypes(superType, clazz);
       }
     }
     for (Class innerClass : clazz.getDeclaredClasses()) {
       if (exposed(innerClass.getModifiers())) {
+        logger.debug(
+            "Adding exposed types from {}, which is an exposed inner class of {}",
+            innerClass,
+            clazz);
         addExposedTypes(innerClass, clazz);
       }
     }
     for (Field field : clazz.getDeclaredFields()) {
       if (exposed(field.getModifiers())) {
+        logger.debug("Adding exposed types from {}, which is an exposed field on {}", field, clazz);
         addExposedTypes(field, clazz);
       }
     }
     for (Invokable invokable : getExposedInvokables(token)) {
+      logger.debug(
+          "Adding exposed types from {}, which is an exposed invokable on {}", invokable, clazz);
       addExposedTypes(invokable, clazz);
     }
   }
@@ -488,19 +532,39 @@ private void addExposedTypes(Class<?> clazz, Class<?> cause) {
   private void addExposedTypes(Invokable<?, ?> invokable, Class<?> cause) {
     addExposedTypes(invokable.getReturnType(), cause);
     for (Annotation annotation : invokable.getAnnotations()) {
+      logger.debug(
+          "Adding exposed types from {}, which is an annotation on invokable {}",
+          annotation,
+          invokable);
      addExposedTypes(annotation.annotationType(), cause);
     }
     for (Parameter parameter : invokable.getParameters()) {
+      logger.debug(
+          "Adding exposed types from {}, which is a parameter on invokable {}",
+          parameter,
+          invokable);
       addExposedTypes(parameter, cause);
     }
     for (TypeToken<?> exceptionType : invokable.getExceptionTypes()) {
+      logger.debug(
+          "Adding exposed types from {}, which is an exception type on invokable {}",
+          exceptionType,
+          invokable);
       addExposedTypes(exceptionType, cause);
     }
   }
 
   private void addExposedTypes(Parameter parameter, Class<?> cause) {
+    logger.debug(
+        "Adding exposed types from {}, which is the type of parameter {}",
+        parameter.getType(),
+        parameter);
     addExposedTypes(parameter.getType(), cause);
     for (Annotation annotation : parameter.getAnnotations()) {
+      logger.debug(
+          "Adding exposed types from {}, which is an annotation on parameter {}",
+          annotation,
+          parameter);
       addExposedTypes(annotation.annotationType(), cause);
     }
   }
@@ -508,6 +572,8 @@ private void addExposedTypes(Parameter parameter, Class<?> cause) {
   private void addExposedTypes(Field field, Class<?> cause) {
     addExposedTypes(field.getGenericType(), cause);
     for (Annotation annotation : field.getDeclaredAnnotations()) {
+      logger.debug(
+          "Adding exposed types from {}, which is an annotation on field {}", annotation, field);
       addExposedTypes(annotation.annotationType(), cause);
     }
   }
@@ -515,7 +581,7 @@ private void addExposedTypes(Field field, Class<?> cause) {
   /**
    * Returns an {@link Invokable} for each public methods or constructors of a type.
    */
-  private Set<Invokable> getExposedInvokables(TypeToken type) {
+  private Set<Invokable> getExposedInvokables(TypeToken<?> type) {
     Set<Invokable> invokables = Sets.newHashSet();
 
     for (Constructor constructor : type.getRawType().getConstructors()) {

From 5ae27f08aad0f793e34b98fb3b1456e9c4f0c8b9 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 9 Feb 2016 09:15:14 -0800
Subject: [PATCH 1416/1541] Migrate AvroIO to internally use
 Read.from(AvroSource)

File expansion validation has moved into the Dataflow
SDK for Java and is performed during application of
the AvroIO.Read transform.

This is for Apache Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114225417
---
 .../google/cloud/dataflow/sdk/io/AvroIO.java  | 72 +++++++------------
 .../runners/DataflowPipelineTranslator.java   |  2 -
 .../runners/dataflow/AvroIOTranslator.java    | 30 --------
 .../sdk/io/AvroIOGeneratedClassTest.java      | 48 ++++++-------
 4 files changed, 51 insertions(+), 101 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index b18e7266e5269..9ee7e6b13cc93 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -16,21 +16,20 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
+import static com.google.common.base.Preconditions.checkState;
+
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.io.Read.Bounded;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.worker.AvroReader;
 import com.google.cloud.dataflow.sdk.runners.worker.AvroSink;
-import com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.common.base.Preconditions;
@@ -40,7 +39,6 @@
 import org.apache.avro.reflect.ReflectData;
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.List;
 import java.util.regex.Pattern;
 
@@ -299,15 +297,30 @@ public PCollection<T> apply(PInput input) {
         if (schema == null) {
           throw new IllegalStateException("need to set the schema of an AvroIO.Read transform");
         }
+        if (validate) {
+          try {
+            checkState(
+                !IOChannelUtils.getFactory(filepattern).match(filepattern).isEmpty(),
+                "Unable to find any files matching %s",
+                filepattern);
+          } catch (IOException e) {
+            throw new IllegalStateException(
+                String.format("Failed to validate %s", filepattern), e);
+          }
+        }
 
-        // Force the output's Coder to be what the read is using, and
-        // unchangeable later, to ensure that we read the input in the
-        // format specified by the Read transform.
-        return PCollection.<T>createPrimitiveOutputInternal(
-            input.getPipeline(),
-            WindowingStrategy.globalDefault(),
-            IsBounded.BOUNDED)
-            .setCoder(getDefaultOutputCoder());
+        @SuppressWarnings("unchecked")
+        Bounded<T> read =
+            type == GenericRecord.class
+                ? (Bounded<T>) com.google.cloud.dataflow.sdk.io.Read.from(
+                    AvroSource.from(filepattern).withSchema(schema))
+                : com.google.cloud.dataflow.sdk.io.Read.from(
+                    AvroSource.from(filepattern).withSchema(type));
+
+        PCollection<T> pcol = input.getPipeline().apply(read);
+        // Honor the default output coder that would have been used by this PTransform.
+        pcol.setCoder(getDefaultOutputCoder());
+        return pcol;
       }
 
       @Override
@@ -326,21 +339,6 @@ public Schema getSchema() {
       public boolean needsValidation() {
         return validate;
       }
-
-      static {
-        @SuppressWarnings("rawtypes")
-        DirectPipelineRunner.TransformEvaluator<Bound> transformEvaluator =
-            new DirectPipelineRunner.TransformEvaluator<Bound>() {
-          @Override
-          @SuppressWarnings("unchecked")
-          public void evaluate(
-              Bound transform, DirectPipelineRunner.EvaluationContext context) {
-            evaluateReadHelper(transform, context);
-          }
-        };
-        DirectPipelineRunner.registerDefaultTransformEvaluator(
-            Bound.class, transformEvaluator);
-      }
     }
 
     /** Disallow construction of utility class. */
@@ -781,22 +779,6 @@ private static void validateOutputComponent(String partialFilePattern) {
   /** Disallow construction of utility class. */
   private AvroIO() {}
 
-  private static <T> void evaluateReadHelper(
-      Read.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
-    AvroReader<T> reader = new AvroReader<>(transform.filepattern, null, null,
-        (AvroCoder<T>) transform.getDefaultOutputCoder(), context.getPipelineOptions());
-    try {
-      List<WindowedValue<T>> elems = ReaderUtils.readAllFromReader(reader);
-      List<ValueWithMetadata<T>> output = new ArrayList<>();
-      for (WindowedValue<T> elem : elems) {
-        output.add(ValueWithMetadata.of(elem));
-      }
-      context.setPCollectionValuesWithMetadata(context.getOutput(transform), output);
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
   private static <T> void evaluateWriteHelper(
       Write.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
     List<WindowedValue<T>> elems =
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index cef03a8598ed0..1d14254c249c8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -1028,8 +1028,6 @@ private <T> void translateHelper(
     ///////////////////////////////////////////////////////////////////////////
     // IO Translation.
 
-    registerTransformTranslator(
-        AvroIO.Read.Bound.class, new AvroIOTranslator.ReadTranslator());
     registerTransformTranslator(
         AvroIO.Write.Bound.class, new AvroIOTranslator.WriteTranslator());
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
index 652bc4ac20723..b114021c4d470 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
@@ -31,36 +31,6 @@
  */
 public class AvroIOTranslator {
 
-  /**
-   * Implements AvroIO Read translation for the Dataflow backend.
-   */
-  @SuppressWarnings("rawtypes")
-  public static class ReadTranslator implements TransformTranslator<AvroIO.Read.Bound> {
-
-    @Override
-    public void translate(
-        AvroIO.Read.Bound transform,
-        TranslationContext context) {
-      translateReadHelper(transform, context);
-    }
-
-    private <T> void translateReadHelper(
-        AvroIO.Read.Bound<T> transform,
-        TranslationContext context) {
-      if (context.getPipelineOptions().isStreaming()) {
-        throw new IllegalArgumentException("AvroIO not supported in streaming mode.");
-      }
-
-      PathValidator validator = context.getPipelineOptions().getPathValidator();
-      String filepattern = validator.validateInputFilePatternSupported(transform.getFilepattern());
-      context.addStep(transform, "ParallelRead");
-      context.addInput(PropertyNames.FORMAT, "avro");
-      context.addInput(PropertyNames.FILEPATTERN, filepattern);
-      context.addValueOnlyOutput(PropertyNames.OUTPUT, context.getOutput(transform));
-      context.addInput(PropertyNames.VALIDATE_SOURCE, transform.needsValidation());
-    }
-  }
-
   /**
    * Implements AvroIO Write translation for the Dataflow backend.
    */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOGeneratedClassTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOGeneratedClassTest.java
index 22fecd369a6e0..6bb459de7a08c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOGeneratedClassTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOGeneratedClassTest.java
@@ -148,102 +148,102 @@ <T> void runTestRead(AvroIO.Read.Bound<T> read, String expectedName, T[] expecte
   public void testReadFromGeneratedClass() throws Exception {
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .withSchema(AvroGeneratedUser.class),
-                "AvroIO.Read.out", generateAvroObjects());
+                "AvroIO.Read/Read(AvroSource).out", generateAvroObjects());
     runTestRead(AvroIO.Read.withSchema(AvroGeneratedUser.class)
                            .from(avroFile.getPath()),
-                "AvroIO.Read.out", generateAvroObjects());
+                "AvroIO.Read/Read(AvroSource).out", generateAvroObjects());
     runTestRead(AvroIO.Read.named("MyRead")
                            .from(avroFile.getPath())
                            .withSchema(AvroGeneratedUser.class),
-                "MyRead.out", generateAvroObjects());
+                "MyRead/Read(AvroSource).out", generateAvroObjects());
     runTestRead(AvroIO.Read.named("MyRead")
                            .withSchema(AvroGeneratedUser.class)
                            .from(avroFile.getPath()),
-                "MyRead.out", generateAvroObjects());
+                "MyRead/Read(AvroSource).out", generateAvroObjects());
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .withSchema(AvroGeneratedUser.class)
                            .named("HerRead"),
-                "HerRead.out", generateAvroObjects());
+                "HerRead/Read(AvroSource).out", generateAvroObjects());
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .named("HerRead")
                            .withSchema(AvroGeneratedUser.class),
-                "HerRead.out", generateAvroObjects());
+                "HerRead/Read(AvroSource).out", generateAvroObjects());
     runTestRead(AvroIO.Read.withSchema(AvroGeneratedUser.class)
                            .named("HerRead")
                            .from(avroFile.getPath()),
-                "HerRead.out", generateAvroObjects());
+                "HerRead/Read(AvroSource).out", generateAvroObjects());
     runTestRead(AvroIO.Read.withSchema(AvroGeneratedUser.class)
                            .from(avroFile.getPath())
                            .named("HerRead"),
-                "HerRead.out", generateAvroObjects());
+                "HerRead/Read(AvroSource).out", generateAvroObjects());
   }
 
   @Test
   public void testReadFromSchema() throws Exception {
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .withSchema(schema),
-                "AvroIO.Read.out", generateAvroGenericRecords());
+                "AvroIO.Read/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.withSchema(schema)
                            .from(avroFile.getPath()),
-                "AvroIO.Read.out", generateAvroGenericRecords());
+                "AvroIO.Read/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.named("MyRead")
                            .from(avroFile.getPath())
                            .withSchema(schema),
-                "MyRead.out", generateAvroGenericRecords());
+                "MyRead/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.named("MyRead")
                            .withSchema(schema)
                            .from(avroFile.getPath()),
-                "MyRead.out", generateAvroGenericRecords());
+                "MyRead/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .withSchema(schema)
                            .named("HerRead"),
-                "HerRead.out", generateAvroGenericRecords());
+                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .named("HerRead")
                            .withSchema(schema),
-                "HerRead.out", generateAvroGenericRecords());
+                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.withSchema(schema)
                            .named("HerRead")
                            .from(avroFile.getPath()),
-                "HerRead.out", generateAvroGenericRecords());
+                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.withSchema(schema)
                            .from(avroFile.getPath())
                            .named("HerRead"),
-                "HerRead.out", generateAvroGenericRecords());
+                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
   }
 
   @Test
   public void testReadFromSchemaString() throws Exception {
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .withSchema(schemaString),
-                "AvroIO.Read.out", generateAvroGenericRecords());
+                "AvroIO.Read/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.withSchema(schemaString)
                            .from(avroFile.getPath()),
-                "AvroIO.Read.out", generateAvroGenericRecords());
+                "AvroIO.Read/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.named("MyRead")
                            .from(avroFile.getPath())
                            .withSchema(schemaString),
-                "MyRead.out", generateAvroGenericRecords());
+                "MyRead/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.named("MyRead")
                            .withSchema(schemaString)
                            .from(avroFile.getPath()),
-                "MyRead.out", generateAvroGenericRecords());
+                "MyRead/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .withSchema(schemaString)
                            .named("HerRead"),
-                "HerRead.out", generateAvroGenericRecords());
+                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .named("HerRead")
                            .withSchema(schemaString),
-                "HerRead.out", generateAvroGenericRecords());
+                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.withSchema(schemaString)
                            .named("HerRead")
                            .from(avroFile.getPath()),
-                "HerRead.out", generateAvroGenericRecords());
+                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.withSchema(schemaString)
                            .from(avroFile.getPath())
                            .named("HerRead"),
-                "HerRead.out", generateAvroGenericRecords());
+                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
   }
 
   <T> void runTestWrite(AvroIO.Write.Bound<T> write, String expectedName)

From 38702c351c6948a0e2d7221f24ea090d85dc0b0b Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 9 Feb 2016 10:31:47 -0800
Subject: [PATCH 1417/1541] Upgrade to SLF4J 1.7.14 so that we can use
 slf4j-api MDC

Since SLF4J 1.7.13, the MDC now functions correctly because of fix:
https://github.com/qos-ch/slf4j/commit/fd5d546503b1b7ba3d865382da2a0dbda0c13cb3

This is for ASF Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114233927
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 03d2a8ddc3af5..071b5df4ea7e5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -80,7 +80,7 @@
     <junit.version>4.11</junit.version>
     <protobuf.version>3.0.0-beta-1</protobuf.version>
     <pubsub.version>v1-rev7-1.21.0</pubsub.version>
-    <slf4j.version>1.7.7</slf4j.version>
+    <slf4j.version>1.7.14</slf4j.version>
     <storage.version>v1-rev53-1.21.0</storage.version>
   </properties>
 

From c71eb08e9c4d48d76bc3720f501b8922d9525a4c Mon Sep 17 00:00:00 2001
From: markshields <markshields@google.com>
Date: Tue, 9 Feb 2016 11:27:00 -0800
Subject: [PATCH 1418/1541] Eagerly merge all window state

* Remove MergeableState extends State, replace with
  static helpers.
* Always merge state. New pipelines will have at most
  one state address window per active window.
* 'Premerge' any windows which have more than one to
  ensure reload on legacy pipelines works with new code.
* Prefetch for merging before any merging occurs. This
  helps reduce pauses for round GetData trips when elements
  for multiple windows end up in the same bundle.
  (We don't attempt to prefetch for firing since that is
  much less common and would require substantial additional
  bookkeeping to pull off.)
* Various interface fiddles to make this work.
* BUG: We were not merging end-of-window or garbage
  collection holds correctly.

We implement two optimizations on watermark hold
merging to not incur a performance penalty:
 - If the hold depends only on the window, update it
   on merge and clear all the existing holds, who's
   values are now irrelevant.
 - If there's only a single active window and we know
   the hold depends only on the earliest event time then
   the merged result will be equal to the current effective
   hold.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114240537
---
 .../windowing/AfterDelayFromFirstElement.java |  34 +-
 .../sdk/transforms/windowing/AfterPane.java   |  20 +-
 .../transforms/windowing/AfterWatermark.java  |   8 +
 .../transforms/windowing/OutputTimeFn.java    |   2 +-
 .../sdk/transforms/windowing/Trigger.java     |  17 +-
 .../sdk/transforms/windowing/WindowFn.java    |   4 +-
 .../dataflow/sdk/util/ActiveWindowSet.java    |  27 +-
 .../sdk/util/MergingActiveWindowSet.java      | 120 ++++-
 .../sdk/util/MergingStateContext.java         |  23 +-
 .../dataflow/sdk/util/NonEmptyPanes.java      |  48 +-
 .../sdk/util/NonMergingActiveWindowSet.java   |   8 +
 .../cloud/dataflow/sdk/util/ReduceFn.java     |  29 +-
 .../sdk/util/ReduceFnContextFactory.java      | 245 +++++----
 .../dataflow/sdk/util/ReduceFnRunner.java     | 463 ++++++++++--------
 .../cloud/dataflow/sdk/util/StateContext.java |  12 -
 .../dataflow/sdk/util/SystemReduceFn.java     |  63 ++-
 .../sdk/util/TriggerContextFactory.java       | 108 ++--
 .../dataflow/sdk/util/TriggerRunner.java      |  48 +-
 .../dataflow/sdk/util/WatermarkHold.java      | 127 ++---
 .../dataflow/sdk/util/state/BagState.java     |   2 +-
 .../sdk/util/state/CombiningValueState.java   |  17 +-
 .../state/CombiningValueStateInternal.java    |   5 +
 .../util/state/InMemoryStateInternals.java    |  35 +-
 .../sdk/util/state/MergeableState.java        |  40 --
 .../dataflow/sdk/util/state/MergedBag.java    |  93 ----
 .../sdk/util/state/MergedCombiningValue.java  | 125 -----
 .../state/MergedWatermarkStateInternal.java   | 141 ------
 .../sdk/util/state/MergingStateInternals.java |  93 ----
 .../sdk/util/state/StateInternals.java        |  16 -
 .../dataflow/sdk/util/state/StateMerging.java | 251 ++++++++++
 .../dataflow/sdk/util/state/StateTag.java     |   4 +-
 .../dataflow/sdk/util/state/StateTags.java    |  44 +-
 .../dataflow/sdk/util/state/ValueState.java   |   7 +
 .../util/state/WatermarkStateInternal.java    |  11 +-
 .../dataflow/sdk/util/ReduceFnTester.java     |   2 +-
 .../dataflow/sdk/util/TriggerTester.java      |  31 +-
 .../state/InMemoryStateInternalsTest.java     | 157 +++---
 37 files changed, 1198 insertions(+), 1282 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergeableState.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedBag.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedCombiningValue.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkStateInternal.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
index 1eb84d1a2c5e1..51de85e86258a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
@@ -18,16 +18,16 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.InstantCoder;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.Min;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnElementContext;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnMergeContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerContext;
 import com.google.cloud.dataflow.sdk.util.MergingStateContext;
 import com.google.cloud.dataflow.sdk.util.StateContext;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
+import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
+import com.google.cloud.dataflow.sdk.util.state.StateMerging;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.common.collect.ImmutableList;
@@ -52,7 +52,8 @@ public abstract class AfterDelayFromFirstElement<W extends BoundedWindow> extend
   protected static final List<SerializableFunction<Instant, Instant>> IDENTITY =
       ImmutableList.<SerializableFunction<Instant, Instant>>of();
 
-  protected static final StateTag<CombiningValueState<Instant, Instant>> DELAYED_UNTIL_TAG =
+  protected static final StateTag<CombiningValueStateInternal<Instant, Combine.Holder<Instant>,
+      Instant>> DELAYED_UNTIL_TAG =
       StateTags.makeSystemTagInternal(StateTags.combiningValueFromInputInternal(
           "delayed", InstantCoder.of(), Min.MinFn.<Instant>naturalOrder()));
 
@@ -171,25 +172,36 @@ public void onElement(OnElementContext c) throws Exception {
   }
 
   @Override
-  public void prefetchOnMerge(MergingStateContext state) {
-    state.mergingAccess(DELAYED_UNTIL_TAG).get();
+  public void prefetchOnMerge(MergingStateContext<W> state) {
+    super.prefetchOnMerge(state);
+    StateMerging.prefetchCombiningValues(state, DELAYED_UNTIL_TAG);
   }
 
   @Override
   public void onMerge(OnMergeContext c) throws Exception {
+    // NOTE: We could try to delete all timers which are still active, but we would
+    // need access to a timer context for each merging window.
+    // for (CombiningValueStateInternal<Instant, Combine.Holder<Instant>, Instant> state :
+    //    c.state().accessInEachMergingWindow(DELAYED_UNTIL_TAG).values()) {
+    //   Instant timestamp = state.get().read();
+    //   if (timestamp != null) {
+    //     <context for merging window>.deleteTimer(timestamp, timeDomain);
+    //   }
+    // }
+    // Instead let them fire and be ignored.
+
     // If the trigger is already finished, there is no way it will become re-activated
     if (c.trigger().isFinished()) {
+      StateMerging.clear(c.state(), DELAYED_UNTIL_TAG);
+      // NOTE: We do not attempt to delete  the timers.
       return;
     }
 
     // Determine the earliest point across all the windows, and delay to that.
-    CombiningValueState<Instant, Instant> mergingDelays =
-        c.state().mergingAccess(DELAYED_UNTIL_TAG);
+    StateMerging.mergeCombiningValues(c.state(), DELAYED_UNTIL_TAG);
 
-    Instant earliestTargetTime = mergingDelays.get().read();
+    Instant earliestTargetTime = c.state().access(DELAYED_UNTIL_TAG).get().read();
     if (earliestTargetTime != null) {
-      mergingDelays.clear();
-      mergingDelays.add(earliestTargetTime);
       c.setTimer(earliestTargetTime, timeDomain);
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index 61a7d18bd4b71..8eed3fcea7154 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -22,7 +22,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.util.MergingStateContext;
 import com.google.cloud.dataflow.sdk.util.StateContext;
-import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
+import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
+import com.google.cloud.dataflow.sdk.util.state.StateMerging;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 
@@ -40,7 +41,8 @@
 @Experimental(Experimental.Kind.TRIGGER)
 public class AfterPane<W extends BoundedWindow> extends OnceTrigger<W>{
 
-  private static final StateTag<CombiningValueState<Long, Long>> ELEMENTS_IN_PANE_TAG =
+private static final StateTag<CombiningValueStateInternal<Long, long[], Long>>
+      ELEMENTS_IN_PANE_TAG =
       StateTags.makeSystemTagInternal(StateTags.combiningValueFromInputInternal(
           "count", VarLongCoder.of(), new Sum.SumLongFn()));
 
@@ -64,21 +66,23 @@ public void onElement(OnElementContext c) throws Exception {
   }
 
   @Override
-  public void prefetchOnMerge(MergingStateContext state) {
-    state.mergingAccess(ELEMENTS_IN_PANE_TAG).get();
+  public void prefetchOnMerge(MergingStateContext<W> state) {
+    super.prefetchOnMerge(state);
+    StateMerging.prefetchCombiningValues(state, ELEMENTS_IN_PANE_TAG);
   }
 
   @Override
   public void onMerge(OnMergeContext context) throws Exception {
+    // If we've already received enough elements and finished in some window,
+    // then this trigger is just finished.
     if (context.trigger().finishedInAnyMergingWindow()) {
       context.trigger().setFinished(true);
+      StateMerging.clear(context.state(), ELEMENTS_IN_PANE_TAG);
       return;
     }
 
-    // Eagerly merge
-    long count = context.state().mergingAccess(ELEMENTS_IN_PANE_TAG).get().read();
-    context.state().mergingAccess(ELEMENTS_IN_PANE_TAG).clear();
-    context.state().access(ELEMENTS_IN_PANE_TAG).add(count);
+    // Otherwise, compute the sum of elements in all the active panes.
+    StateMerging.mergeCombiningValues(context.state(), ELEMENTS_IN_PANE_TAG);
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 694a538f6cb26..1330337926a5f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -179,6 +179,9 @@ public void onElement(OnElementContext c) throws Exception {
 
     @Override
     public void onMerge(OnMergeContext c) throws Exception {
+      // NOTE that the ReduceFnRunner will delete all end-of-window timers for the
+      // merged-away windows.
+
       ExecutableTrigger<W> earlySubtrigger = c.trigger().subTrigger(EARLY_INDEX);
       // We check the early trigger to determine if we are still processing it or
       // if the end of window has transitioned us to the late trigger
@@ -328,11 +331,16 @@ public AfterWatermarkLate<W> withLateFirings(OnceTrigger<W> lateFirings) {
 
     @Override
     public void onElement(OnElementContext c) throws Exception {
+      // NOTE: The ReduceFnRunner will have already set an end-of-window timer, so this is
+      // redundant. However we leave it here so that the trigger machinery can stand on its own.
       c.setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
     }
 
     @Override
     public void onMerge(OnMergeContext c) throws Exception {
+      // NOTE that the ReduceFnRunner will delete all end-of-window timers for the
+      // merged-away windows.
+
       if (!c.trigger().finishedInAllMergingWindows()) {
         // If the trigger is still active in any merging window then it is still active in the new
         // merged window, because even if the merged window is "done" some pending elements haven't
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFn.java
index 785bc5ab5aa0a..c5d943d3c5ebc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFn.java
@@ -179,7 +179,7 @@ public Instant merge(W resultWindow, Iterable<? extends Instant> mergingTimestam
      * {@inheritDoc}
      *
      * @return {@code false}. An {@link OutputTimeFn} that depends only on the window should extend
-     * {@link DependsOnlyOnWindow}.
+     * {@link OutputTimeFn.DependsOnlyOnWindow}.
      */
     @Override
     public final boolean dependsOnlyOnWindow() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 32d1069d66d74..4afcf11a4b3bd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -232,8 +232,7 @@ public abstract class OnElementContext extends TriggerContext {
      * timer firings for a window will be received, but the implementation should choose to ignore
      * those that are not applicable.
      *
-     * @param timestamp the time at which the trigger’s {@link Trigger#onTimer} callback should
-     *        execute
+     * @param timestamp the time at which the trigger should be re-evaluated
      * @param domain the domain that the {@code timestamp} applies to
      */
     public abstract void setTimer(Instant timestamp, TimeDomain domain);
@@ -248,9 +247,6 @@ public abstract class OnElementContext extends TriggerContext {
    * operational hook.
    */
   public abstract class OnMergeContext extends TriggerContext {
-    /** The old windows that were merged. */
-    public abstract Iterable<W> oldWindows();
-
     /**
      * Sets a timer to fire when the watermark or processing time is beyond the given timestamp.
      * Timers are not guaranteed to fire immediately, but will be delivered at some time afterwards.
@@ -259,8 +255,7 @@ public abstract class OnMergeContext extends TriggerContext {
      * timer firings for a window will be received, but the implementation should choose to ignore
      * those that are not applicable.
      *
-     * @param timestamp the time at which the trigger’s {@link Trigger#onTimer} callback should
-     *        execute
+     * @param timestamp the time at which the trigger should be re-evaluated
      * @param domain the domain that the {@code timestamp} applies to
      */
     public abstract void setTimer(Instant timestamp, TimeDomain domain);
@@ -270,7 +265,7 @@ public abstract class OnMergeContext extends TriggerContext {
     public abstract OnMergeContext forTrigger(ExecutableTrigger<W> trigger);
 
     @Override
-    public abstract MergingStateContext state();
+    public abstract MergingStateContext<W> state();
 
     @Override
     public abstract MergingTriggerInfo<W> trigger();
@@ -294,11 +289,11 @@ protected Trigger(@Nullable List<Trigger<W>> subTriggers) {
    *
    * <p>Leaf triggers should update their state by inspecting their status and any state
    * in the merging windows. Composite triggers should update their state by calling
-   * {@link ExecutableTrigger#invokeMerge} on their sub-triggers, and applying appropriate logic.
+   * {@link ExecutableTrigger#invokeOnMerge} on their sub-triggers, and applying appropriate logic.
    *
    * <p>A trigger such as {@link AfterWatermark#pastEndOfWindow} may no longer be finished;
    * it is the responsibility of the trigger itself to record this fact. It is forbidden for
-   * a trigger to become finished due to {@link onMerge}, as it has not yet fired the pending
+   * a trigger to become finished due to {@link #onMerge}, as it has not yet fired the pending
    * elements that led to it being ready to fire.
    *
    * <p>The implementation does not need to clear out any state associated with the old windows.
@@ -336,7 +331,7 @@ public void prefetchOnElement(StateContext state) {
    * Called to allow the trigger to prefetch any state it will likely need to read from during
    * an {@link #onMerge} call.
    */
-  public void prefetchOnMerge(MergingStateContext state) {
+  public void prefetchOnMerge(MergingStateContext<W> state) {
     if (subTriggers != null) {
       for (Trigger<W> trigger : subTriggers) {
         trigger.prefetchOnMerge(state);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
index 0423cfb5eadda..d51fc7ead46b0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
@@ -132,8 +132,8 @@ public abstract void merge(Collection<W> toBeMerged, W mergeResult)
   public abstract W getSideInputWindow(final BoundedWindow window);
 
   /**
-   * @deprecated Implement {@link #getOutputTimeFn} to return either the appropriate
-   * {@link ElementaryOutputTimeFn} or a custom {@link OutputTimeFn} extending
+   * @deprecated Implement {@link #getOutputTimeFn} to return one of the appropriate
+   * {@link OutputTimeFns}, or a custom {@link OutputTimeFn} extending
    * {@link OutputTimeFn.Defaults}.
    */
   @Deprecated
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
index c5854ea4ea679..9af11ca453f6b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
@@ -67,12 +67,20 @@ public interface ActiveWindowSet<W extends BoundedWindow> {
    */
   public interface MergeCallback<W extends BoundedWindow> {
     /**
-     * Called when windows are about to be merged.
+     * Called when windows are about to be merged, but before any {@link #onMerge} callback
+     * has been made.
+     */
+    void prefetchOnMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W mergeResult)
+        throws Exception;
+
+    /**
+     * Called when windows are about to be merged, after all {@link #prefetchOnMerge} calls
+     * have been made, but before the active window set has been updated to reflect the merge.
      *
      * @param toBeMerged the windows about to be merged.
      * @param activeToBeMerged the subset of {@code toBeMerged} corresponding to windows which
      * are currently ACTIVE (and about to be merged). The remaining windows have been deemed
-     * EPHEMERAL.
+     * EPHEMERAL, and thus have no state associated with them.
      * @param mergeResult the result window, either a member of {@code toBeMerged} or new.
      */
     void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W mergeResult)
@@ -128,10 +136,17 @@ void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W mergeRe
   /**
    * Invoke {@link WindowFn#mergeWindows} on the {@code WindowFn} associated with this window set,
    * merging as many of the active windows as possible. {@code mergeCallback} will be invoked for
-   * each group of windows that are merged. After this no NEW windows will remain.
+   * each group of windows that are merged. After this no NEW windows will remain, all merge
+   * result windows will be ACTIVE, and all windows which have been merged away will not be ACTIVE.
    */
   void merge(MergeCallback<W> mergeCallback) throws Exception;
 
+  /**
+   * Signal that all state in {@link #readStateAddresses} for {@code window} has been merged into
+   * the {@link #writeStateAddress} for {@code window}.
+   */
+  void merged(W window);
+
   /**
    * Return the state address windows for ACTIVE {@code window} from which all state associated
    * should be read and merged.
@@ -143,4 +158,10 @@ void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W mergeRe
    * written. Always one of the results of {@link #readStateAddresses}.
    */
   W writeStateAddress(W window);
+
+  /**
+   * Return the state address window into which all new state should be written after
+   * ACTIVE windows {@code toBeMerged} have been merged into {@code mergeResult}.
+   */
+  W mergedWriteStateAddress(Collection<W> toBeMerged, W mergeResult);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
index ebf1540e6e4bb..3a6960980d113 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
@@ -32,6 +32,8 @@
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.LinkedHashSet;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
@@ -134,6 +136,11 @@ public void removeEphemeralWindows() {
 
   @Override
   public void persist() {
+    if (activeWindowToStateAddressWindows.isEmpty()) {
+      // Force all persistent state to disappear.
+      valueState.clear();
+      return;
+    }
     if (activeWindowToStateAddressWindows.equals(originalActiveWindowToStateAddressWindows)) {
       // No change.
       return;
@@ -171,14 +178,14 @@ public boolean isActive(W window) {
   @Override
   public void addNew(W window) {
     if (!windowToActiveWindow.containsKey(window)) {
-      activeWindowToStateAddressWindows.put(window, new HashSet<W>());
+      activeWindowToStateAddressWindows.put(window, new LinkedHashSet<W>());
     }
   }
 
   @Override
   public void addActive(W window) {
     if (!windowToActiveWindow.containsKey(window)) {
-      Set<W> stateAddressWindows = new HashSet<>();
+      Set<W> stateAddressWindows = new LinkedHashSet<>();
       stateAddressWindows.add(window);
       activeWindowToStateAddressWindows.put(window, stateAddressWindows);
       windowToActiveWindow.put(window, window);
@@ -203,10 +210,18 @@ public void remove(W window) {
 
   private class MergeContextImpl extends WindowFn<Object, W>.MergeContext {
     private MergeCallback<W> mergeCallback;
+    private final List<Collection<W>> allToBeMerged;
+    private final List<Collection<W>> allActiveToBeMerged;
+    private final List<W> allMergeResults;
+    private final Set<W> seen;
 
     public MergeContextImpl(MergeCallback<W> mergeCallback) {
       windowFn.super();
       this.mergeCallback = mergeCallback;
+      allToBeMerged = new ArrayList<>();
+      allActiveToBeMerged = new ArrayList<>();
+      allMergeResults = new ArrayList<>();
+      seen = new HashSet<>();
     }
 
     @Override
@@ -216,15 +231,65 @@ public Collection<W> windows() {
 
     @Override
     public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
-      recordMerge(mergeCallback, toBeMerged, mergeResult);
+      // The arguments have come from userland.
+      Preconditions.checkNotNull(toBeMerged);
+      Preconditions.checkNotNull(mergeResult);
+      List<W> copyOfToBeMerged = new ArrayList<>(toBeMerged.size());
+      List<W> activeToBeMerged = new ArrayList<>(toBeMerged.size());
+      boolean includesMergeResult = false;
+      for (W window : toBeMerged) {
+        Preconditions.checkNotNull(window);
+        Preconditions.checkState(
+            isActive(window), "Expecting merge window %s to be active", window);
+        if (window.equals(mergeResult)) {
+          includesMergeResult = true;
+        }
+        boolean notDup = seen.add(window);
+        Preconditions.checkState(
+            notDup, "Expecting merge window %s to appear in at most one merge set", window);
+        copyOfToBeMerged.add(window);
+        if (!activeWindowToStateAddressWindows.get(window).isEmpty()) {
+          activeToBeMerged.add(window);
+        }
+      }
+      if (!includesMergeResult) {
+        Preconditions.checkState(
+            !isActive(mergeResult), "Expecting result window %s to be new", mergeResult);
+      }
+      allToBeMerged.add(copyOfToBeMerged);
+      allActiveToBeMerged.add(activeToBeMerged);
+      allMergeResults.add(mergeResult);
+    }
+
+    public void recordMerges() throws Exception {
+      for (int i = 0; i < allToBeMerged.size(); i++) {
+        mergeCallback.prefetchOnMerge(
+            allToBeMerged.get(i), allActiveToBeMerged.get(i), allMergeResults.get(i));
+      }
+      for (int i = 0; i < allToBeMerged.size(); i++) {
+        mergeCallback.onMerge(
+            allToBeMerged.get(i), allActiveToBeMerged.get(i), allMergeResults.get(i));
+        recordMerge(allToBeMerged.get(i), allMergeResults.get(i));
+      }
+      allToBeMerged.clear();
+      allActiveToBeMerged.clear();
+      allMergeResults.clear();
+      seen.clear();
     }
   }
 
   @Override
   public void merge(MergeCallback<W> mergeCallback) throws Exception {
+    MergeContextImpl context = new MergeContextImpl(mergeCallback);
+
     // See what the window function does with the NEW and already ACTIVE windows.
-    windowFn.mergeWindows(new MergeContextImpl(mergeCallback));
+    // Entering userland.
+    windowFn.mergeWindows(context);
+
+    // Actually do the merging and invoke the callbacks.
+    context.recordMerges();
 
+    // Any remaining NEW windows should become implicitly ACTIVE.
     for (Map.Entry<W, Set<W>> entry : activeWindowToStateAddressWindows.entrySet()) {
       if (entry.getValue().isEmpty()) {
         // This window was NEW but since it survived merging must now become ACTIVE.
@@ -238,11 +303,11 @@ public void merge(MergeCallback<W> mergeCallback) throws Exception {
   /**
    * A {@code WindowFn.mergeWindows} call has requested {@code toBeMerged} (which must
    * all be ACTIVE} be considered equivalent to {@code activeWindow} (which is either a
-   * member of {@code toBeMerged} or is a new window).
+   * member of {@code toBeMerged} or is a new window). Make the corresponding change in
+   * the active window set.
    */
-  private void recordMerge(MergeCallback<W> mergeCallback, Collection<W> toBeMerged, W mergeResult)
-      throws Exception {
-    Set<W> newStateAddressWindows = new HashSet<>();
+  private void recordMerge(Collection<W> toBeMerged, W mergeResult) throws Exception {
+    Set<W> newStateAddressWindows = new LinkedHashSet<>();
     Set<W> existingStateAddressWindows = activeWindowToStateAddressWindows.get(mergeResult);
     if (existingStateAddressWindows != null) {
       // Preserve all the existing state address windows for mergeResult.
@@ -256,8 +321,6 @@ private void recordMerge(MergeCallback<W> mergeCallback, Collection<W> toBeMerge
       newEphemeralWindows.addAll(existingEphemeralWindows);
     }
 
-    Collection<W> activeToBeMerged = new ArrayList<>();
-
     for (W other : toBeMerged) {
       Set<W> otherStateAddressWindows = activeWindowToStateAddressWindows.get(other);
       Preconditions.checkState(otherStateAddressWindows != null, "Window %s is not ACTIVE", other);
@@ -284,21 +347,16 @@ private void recordMerge(MergeCallback<W> mergeCallback, Collection<W> toBeMerge
       // Now other equiv mergeResult.
       if (otherStateAddressWindows.contains(other)) {
         // Other was ACTIVE and is now known to be MERGED.
-        newStateAddressWindows.add(other);
-        activeToBeMerged.add(other);
       } else if (otherStateAddressWindows.isEmpty()) {
         // Other was NEW thus has no state. It is now EPHEMERAL.
         newEphemeralWindows.add(other);
       } else if (other.equals(mergeResult)) {
         // Other was ACTIVE, was never used to store elements, but is still ACTIVE.
         // Leave it as active.
-        activeToBeMerged.add(other);
       } else {
         // Other was ACTIVE, was never used to store element, as is no longer considered ACTIVE.
         // It is now EPHEMERAL.
         newEphemeralWindows.add(other);
-        // However, since it may have metadata state, include it in the ACTIVE to be merged set.
-        activeToBeMerged.add(other);
       }
       windowToActiveWindow.put(other, mergeResult);
     }
@@ -315,7 +373,16 @@ private void recordMerge(MergeCallback<W> mergeCallback, Collection<W> toBeMerge
       activeWindowToEphemeralWindows.put(mergeResult, newEphemeralWindows);
     }
 
-    mergeCallback.onMerge(toBeMerged, activeToBeMerged, mergeResult);
+    merged(mergeResult);
+  }
+
+  @Override
+  public void merged(W window) {
+    Set<W> stateAddressWindows = activeWindowToStateAddressWindows.get(window);
+    Preconditions.checkState(stateAddressWindows != null, "Window %s is not ACTIVE", window);
+    W first = Iterables.getFirst(stateAddressWindows, null);
+    stateAddressWindows.clear();
+    stateAddressWindows.add(first);
   }
 
   /**
@@ -343,6 +410,21 @@ public W writeStateAddress(W window) {
     return result;
   }
 
+  @Override
+  public W mergedWriteStateAddress(Collection<W> toBeMerged, W mergeResult) {
+    Set<W> stateAddressWindows = activeWindowToStateAddressWindows.get(mergeResult);
+    if (stateAddressWindows != null && !stateAddressWindows.isEmpty()) {
+      return Iterables.getFirst(stateAddressWindows, null);
+    }
+    for (W mergedWindow : toBeMerged) {
+      stateAddressWindows = activeWindowToStateAddressWindows.get(mergedWindow);
+      if (stateAddressWindows != null && !stateAddressWindows.isEmpty()) {
+        return Iterables.getFirst(stateAddressWindows, null);
+      }
+    }
+    return mergeResult;
+  }
+
   @VisibleForTesting
   public void checkInvariants() {
     Set<W> knownStateAddressWindows = new HashSet<>();
@@ -422,13 +504,13 @@ public String toString() {
    * Replace null {@code multimap} with empty map, and replace null entries in {@code multimap} with
    * empty sets.
    */
-  private static <W> Map<W, Set<W>> emptyIfNull(Map<W, Set<W>> multimap) {
+  private static <W> Map<W, Set<W>> emptyIfNull(@Nullable Map<W, Set<W>> multimap) {
     if (multimap == null) {
       return new HashMap<>();
     } else {
       for (Map.Entry<W, Set<W>> entry : multimap.entrySet()) {
         if (entry.getValue() == null) {
-          entry.setValue(new HashSet<W>());
+          entry.setValue(new LinkedHashSet<W>());
         }
       }
       return multimap;
@@ -439,7 +521,7 @@ private static <W> Map<W, Set<W>> emptyIfNull(Map<W, Set<W>> multimap) {
   private static <W> Map<W, Set<W>> deepCopy(Map<W, Set<W>> multimap) {
     Map<W, Set<W>> newMultimap = new HashMap<>();
     for (Map.Entry<W, Set<W>> entry : multimap.entrySet()) {
-      newMultimap.put(entry.getKey(), new HashSet<W>(entry.getValue()));
+      newMultimap.put(entry.getKey(), new LinkedHashSet<W>(entry.getValue()));
     }
     return newMultimap;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingStateContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingStateContext.java
index 877cbf984368e..98d62f919ce56 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingStateContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingStateContext.java
@@ -17,23 +17,18 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.state.MergeableState;
 import com.google.cloud.dataflow.sdk.util.state.State;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 
 import java.util.Map;
 
-/** Interface for interacting with persistent state within {@link #onMerge}. */
-public interface MergingStateContext extends StateContext {
-  /**
-   * Analogous to {@link #access}, but across all windows which are about to be merged.
-   */
-  <StateT extends MergeableState<?, ?>> StateT mergingAccess(StateTag<StateT> address);
-
-  /**
-   * Analogous to {@link #access}, but returned as a map from each window which is
-   * about to be merged to the corresponding state.
-   */
-  public abstract <StateT extends State> Map<BoundedWindow, StateT>
-      mergingAccessInEachMergingWindow(StateTag<StateT> address);
+/** Interface for interacting with persistent state when merging windows. */
+public interface MergingStateContext<W extends BoundedWindow> extends StateContext {
+    /**
+     * Analogous to {@link #access}, but returned as a map from each window which is
+     * about to be merged to the corresponding state. Only includes windows which
+     * are known to have state.
+     */
+    public <StateT extends State> Map<W, StateT> accessInEachMergingWindow(
+        StateTag<StateT> address);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
index f567888260768..2d5fa41964e95 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
@@ -19,8 +19,9 @@
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
+import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
+import com.google.cloud.dataflow.sdk.util.state.StateMerging;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 
@@ -31,7 +32,6 @@
  * @param <W> The kind of windows being tracked.
  */
 public abstract class NonEmptyPanes<W extends BoundedWindow> {
-
   public static <W extends BoundedWindow> NonEmptyPanes<W> create(
       WindowingStrategy<?, W> strategy, ReduceFn<?, ?, ?, W> reduceFn) {
     if (strategy.getMode() == AccumulationMode.DISCARDING_FIRED_PANES) {
@@ -57,13 +57,22 @@ public static <W extends BoundedWindow> NonEmptyPanes<W> create(
    */
   public abstract StateContents<Boolean> isEmpty(ReduceFn<?, ?, ?, W>.Context context);
 
+  /**
+   * Prefetch in preparation for merging.
+   */
+  public abstract void prefetchOnMerge(MergingStateContext<W> state);
+
+  /**
+   * Eagerly merge backing state.
+   */
+  public abstract void onMerge(MergingStateContext<W> context);
+
   /**
    * An implementation of {@code NonEmptyPanes} optimized for use with discarding mode. Uses the
    * presence of data in the accumulation buffer to record non-empty panes.
    */
   private static class DiscardingModeNonEmptyPanes<W extends BoundedWindow>
       extends NonEmptyPanes<W> {
-
     private ReduceFn<?, ?, ?, W> reduceFn;
 
     private DiscardingModeNonEmptyPanes(ReduceFn<?, ?, ?, W> reduceFn) {
@@ -84,33 +93,50 @@ public void recordContent(ReduceFn<?, ?, ?, W>.Context context) {
     public void clearPane(ReduceFn<?, ?, ?, W>.Context context) {
       // Nothing to do -- the reduceFn is tracking contents
     }
+
+    @Override
+    public void prefetchOnMerge(MergingStateContext<W> state) {
+      // Nothing to do -- the reduceFn is tracking contents
+    }
+
+    @Override
+    public void onMerge(MergingStateContext<W> context) {
+      // Nothing to do -- the reduceFn is tracking contents
+    }
   }
 
   /**
    * An implementation of {@code NonEmptyPanes} for general use.
    */
   private static class GeneralNonEmptyPanes<W extends BoundedWindow> extends NonEmptyPanes<W> {
-
-    private static final StateTag<CombiningValueState<Long, Long>> PANE_ADDITIONS_TAG =
+    private static final StateTag<CombiningValueStateInternal<Long, long[], Long>>
+        PANE_ADDITIONS_TAG =
         StateTags.makeSystemTagInternal(StateTags.combiningValueFromInputInternal(
             "count", VarLongCoder.of(), new Sum.SumLongFn()));
 
     @Override
     public void recordContent(ReduceFn<?, ?, ?, W>.Context context) {
-      context.state().accessAcrossMergedWindows(PANE_ADDITIONS_TAG).add(1L);
+      context.state().access(PANE_ADDITIONS_TAG).add(1L);
     }
 
     @Override
     public void clearPane(ReduceFn<?, ?, ?, W>.Context context) {
-      context.state().accessAcrossMergedWindows(PANE_ADDITIONS_TAG).clear();
+      context.state().access(PANE_ADDITIONS_TAG).clear();
     }
 
     @Override
     public StateContents<Boolean> isEmpty(ReduceFn<?, ?, ?, W>.Context context) {
-      // Since we only check for empty element sets when a trigger fires it's unreasonable
-      // to require a prefetch.
-      return context.state().accessAcrossMergedWindows(PANE_ADDITIONS_TAG).isEmpty();
+      return context.state().access(PANE_ADDITIONS_TAG).isEmpty();
+    }
+
+    @Override
+    public void prefetchOnMerge(MergingStateContext<W> state) {
+      StateMerging.prefetchCombiningValues(state, PANE_ADDITIONS_TAG);
+    }
+
+    @Override
+    public void onMerge(MergingStateContext<W> context) {
+      StateMerging.mergeCombiningValues(context, PANE_ADDITIONS_TAG);
     }
   }
 }
-
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java
index 10d7666f5dd53..cb7f9b06e7c32 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.common.collect.ImmutableSet;
 
+import java.util.Collection;
 import java.util.Set;
 
 /**
@@ -64,6 +65,8 @@ public void remove(W window) {}
   @Override
   public void merge(MergeCallback<W> mergeCallback) throws Exception {}
 
+  @Override
+  public void merged(W window) {}
 
   @Override
   public Set<W> readStateAddresses(W window) {
@@ -74,4 +77,9 @@ public Set<W> readStateAddresses(W window) {
   public W writeStateAddress(W window) {
     return window;
   }
+
+  @Override
+  public W mergedWriteStateAddress(Collection<W> toBeMerged, W mergeResult) {
+    return mergeResult;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
index 91fd3c494160c..b1b323d79e0ad 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
@@ -17,13 +17,11 @@
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.state.MergeableState;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
 
 import org.joda.time.Instant;
 
 import java.io.Serializable;
-import java.util.Collection;
 
 /**
  * Specification for processing to happen after elements have been grouped by key.
@@ -65,16 +63,9 @@ public abstract class ProcessValueContext extends Context {
 
   /** Information accessible within {@link #onMerge}. */
   public abstract class OnMergeContext extends Context {
-    /**
-     * Return the collection of windows that were merged.
-     *
-     * <p>Note that this may include the result window.
-     */
-    public abstract Collection<W> mergingWindows();
-
     /** Return the interface for accessing state. */
     @Override
-    public abstract MergingStateContext state();
+    public abstract MergingStateContext<W> state();
   }
 
   /** Information accessible within {@link #onTrigger}. */
@@ -95,16 +86,6 @@ public abstract class OnTriggerContext extends Context {
 
   /**
    * Called when windows are merged.
-   *
-   * <p>There are generally two strategies for implementing this and handling merging of state:
-   * <ul>
-   * <li> Lazily merge the state when outputting. This is especially easy if all the state is stored
-   * in {@link MergeableState}, since an automatically merged view can be retrieved.
-   * <li> Eagerly merge the state inside the {@link #onMerge} implementation. Load all the state
-   * from the merging windows and write it back to the result window. In this case the state in the
-   * result window should be cleared into between the read and write in case it was in the source
-   * windows.
-   * </ul>
    */
   public abstract void onMerge(OnMergeContext c) throws Exception;
 
@@ -116,6 +97,14 @@ public abstract class OnTriggerContext extends Context {
    */
   public abstract void onTrigger(OnTriggerContext c) throws Exception;
 
+  /**
+   * Called before {@link #onMerge} is invoked to provide an opportunity to prefetch any needed
+   * state.
+   *
+   * @param c Context to use prefetch from.
+   */
+  public void prefetchOnMerge(MergingStateContext<W> c) throws Exception {}
+
   /**
    * Called before {@link #onTrigger} is invoked to provide an opportunity to prefetch any needed
    * state.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
index 92c23e7132ad3..2fa2c3ff9a95e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
@@ -21,7 +21,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.state.MergeableState;
 import com.google.cloud.dataflow.sdk.util.state.State;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
@@ -34,9 +33,7 @@
 
 import org.joda.time.Instant;
 
-import java.util.ArrayList;
 import java.util.Collection;
-import java.util.List;
 import java.util.Map;
 
 import javax.annotation.Nullable;
@@ -45,7 +42,6 @@
  * Factory for creating instances of the various {@link ReduceFn} contexts.
  */
 class ReduceFnContextFactory<K, InputT, OutputT, W extends BoundedWindow> {
-
   public interface OnTriggerCallbacks<OutputT> {
     void output(OutputT toOutput);
   }
@@ -57,10 +53,9 @@ public interface OnTriggerCallbacks<OutputT> {
   private ActiveWindowSet<W> activeWindows;
   private TimerInternals timerInternals;
 
-  ReduceFnContextFactory(
-      K key, ReduceFn<K, InputT, OutputT, W> reduceFn, WindowingStrategy<?, W> windowingStrategy,
-      StateInternals stateInternals, ActiveWindowSet<W> activeWindows,
-      TimerInternals timerInternals) {
+  ReduceFnContextFactory(K key, ReduceFn<K, InputT, OutputT, W> reduceFn,
+      WindowingStrategy<?, W> windowingStrategy, StateInternals stateInternals,
+      ActiveWindowSet<W> activeWindows, TimerInternals timerInternals) {
     this.key = key;
     this.reduceFn = reduceFn;
     this.windowingStrategy = windowingStrategy;
@@ -69,34 +64,46 @@ public interface OnTriggerCallbacks<OutputT> {
     this.timerInternals = timerInternals;
   }
 
-  private StateContextImpl<W> stateContext(W window) {
-    return new StateContextImpl<>(
-        activeWindows, windowingStrategy.getWindowFn().windowCoder(), stateInternals, window);
+  /** Where should we look for state associated with a given window? */
+  public static enum StateStyle {
+    /** All state is associated with the window itself. */
+    DIRECT,
+    /** State is associated with the 'state address' windows tracked by the active window set. */
+    RENAMED
+  }
+
+  private StateContextImpl<W> stateContext(W window, StateStyle style) {
+    return new StateContextImpl<>(activeWindows, windowingStrategy.getWindowFn().windowCoder(),
+        stateInternals, window, style);
   }
 
-  public ReduceFn<K, InputT, OutputT, W>.Context base(W window) {
-    return new ContextImpl(stateContext(window));
+  public ReduceFn<K, InputT, OutputT, W>.Context base(W window, StateStyle style) {
+    return new ContextImpl(stateContext(window, style));
   }
 
   public ReduceFn<K, InputT, OutputT, W>.ProcessValueContext forValue(
-      W window, InputT value, Instant timestamp) {
-    return new ProcessValueContextImpl(stateContext(window), value, timestamp);
+      W window, InputT value, Instant timestamp, StateStyle style) {
+    return new ProcessValueContextImpl(stateContext(window, style), value, timestamp);
   }
 
-  public ReduceFn<K, InputT, OutputT, W>.OnTriggerContext forTrigger(
-      W window, StateContents<PaneInfo> pane, OnTriggerCallbacks<OutputT> callbacks) {
-    return new OnTriggerContextImpl(stateContext(window), pane, callbacks);
+  public ReduceFn<K, InputT, OutputT, W>.OnTriggerContext forTrigger(W window,
+      StateContents<PaneInfo> pane, StateStyle style, OnTriggerCallbacks<OutputT> callbacks) {
+    return new OnTriggerContextImpl(stateContext(window, style), pane, callbacks);
   }
 
   public ReduceFn<K, InputT, OutputT, W>.OnMergeContext forMerge(
-      Collection<W> mergingWindows, W resultWindow) {
+      Collection<W> activeToBeMerged, W mergeResult, StateStyle style) {
     return new OnMergeContextImpl(
-        new MergingStateContextImpl<W>(stateContext(resultWindow), mergingWindows));
+        new MergingStateContextImpl<W>(activeWindows, windowingStrategy.getWindowFn().windowCoder(),
+            stateInternals, style, activeToBeMerged, mergeResult));
   }
 
+  public ReduceFn<K, InputT, OutputT, W>.OnMergeContext forPremerge(W window) {
+    return new OnPremergeContextImpl(new PremergingStateContextImpl<W>(
+        activeWindows, windowingStrategy.getWindowFn().windowCoder(), stateInternals, window));
+  }
 
   private class TimersImpl implements Timers {
-
     private final StateNamespace namespace;
 
     public TimersImpl(StateNamespace namespace) {
@@ -132,110 +139,130 @@ public Instant currentEventTime() {
     }
   }
 
-  static class StateContextImpl<W extends BoundedWindow>
-      implements StateContext {
+  // ======================================================================
+  // StateContexts
+  // ======================================================================
 
-    private final ActiveWindowSet<W> activeWindows;
-    private final W window;
-    protected StateNamespace windowNamespace;
+  static class StateContextImpl<W extends BoundedWindow> implements StateContext {
+    protected final ActiveWindowSet<W> activeWindows;
+    protected final W window;
+    protected final StateNamespace windowNamespace;
     protected final Coder<W> windowCoder;
-    private final StateInternals stateInternals;
+    protected final StateInternals stateInternals;
+    protected final StateStyle style;
 
-    public StateContextImpl(
-        ActiveWindowSet<W> activeWindows,
-        Coder<W> windowCoder,
-        StateInternals stateInternals,
-        W window) {
+    public StateContextImpl(ActiveWindowSet<W> activeWindows, Coder<W> windowCoder,
+        StateInternals stateInternals, W window, StateStyle style) {
       this.activeWindows = activeWindows;
       this.windowCoder = windowCoder;
       this.stateInternals = stateInternals;
       this.window = checkNotNull(window);
       this.windowNamespace = namespaceFor(window);
+      this.style = style;
     }
 
     protected StateNamespace namespaceFor(W window) {
       return StateNamespaces.window(windowCoder, window);
     }
 
+    protected StateNamespace windowNamespace() {
+      return windowNamespace;
+    }
+
     W window() {
       return window;
     }
 
     StateNamespace namespace() {
-      return windowNamespace;
-    }
-
-    @Override
-    public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
-      return stateInternals.state(windowNamespace, address);
+      return windowNamespace();
     }
 
     @Override
-    public <StorageT extends MergeableState<?, ?>> StorageT accessAcrossMergedWindows(
-        StateTag<StorageT> address) {
-      List<StateNamespace> readNamespaces = new ArrayList<>();
-      for (W readWindow : activeWindows.readStateAddresses(window)) {
-        readNamespaces.add(namespaceFor(readWindow));
+    public <StateT extends State> StateT access(StateTag<StateT> address) {
+      switch (style) {
+        case DIRECT:
+          return stateInternals.state(windowNamespace(), address);
+        case RENAMED:
+          return stateInternals.state(
+              namespaceFor(activeWindows.writeStateAddress(window)), address);
       }
-      StateNamespace writeNamespace = namespaceFor(activeWindows.writeStateAddress(window));
-      return stateInternals.mergedState(readNamespaces, writeNamespace, address, window);
+      throw new RuntimeException(); // cases are exhaustive.
     }
   }
 
-  static class MergingStateContextImpl<W extends BoundedWindow> implements MergingStateContext {
+  static class MergingStateContextImpl<W extends BoundedWindow>
+      extends StateContextImpl<W> implements MergingStateContext<W> {
+    private final Collection<W> activeToBeMerged;
 
-    private final StateContextImpl<W> delegate;
-    private final Collection<W> mergingWindows;
-
-    public MergingStateContextImpl(StateContextImpl<W> delegate, Collection<W> mergingWindows) {
-      this.delegate = delegate;
-      this.mergingWindows = mergingWindows;
-    }
-
-    W window() {
-      return delegate.window();
+    public MergingStateContextImpl(ActiveWindowSet<W> activeWindows, Coder<W> windowCoder,
+        StateInternals stateInternals, StateStyle style, Collection<W> activeToBeMerged,
+        W mergeResult) {
+      super(activeWindows, windowCoder, stateInternals, mergeResult, style);
+      this.activeToBeMerged = activeToBeMerged;
     }
 
-    Collection<W> mergingWindows() {
-      return mergingWindows;
+    @Override
+    public <StateT extends State> StateT access(StateTag<StateT> address) {
+      switch (style) {
+        case DIRECT:
+          return stateInternals.state(windowNamespace(), address);
+        case RENAMED:
+          return stateInternals.state(
+              namespaceFor(activeWindows.mergedWriteStateAddress(activeToBeMerged, window)),
+              address);
+      }
+      throw new RuntimeException(); // cases are exhaustive.
     }
 
     @Override
-    public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
-      return delegate.access(address);
+    public <StateT extends State> Map<W, StateT> accessInEachMergingWindow(
+        StateTag<StateT> address) {
+      ImmutableMap.Builder<W, StateT> builder = ImmutableMap.builder();
+      for (W mergingWindow : activeToBeMerged) {
+        StateNamespace namespace = null;
+        switch (style) {
+          case DIRECT:
+            namespace = namespaceFor(mergingWindow);
+            break;
+          case RENAMED:
+            namespace = namespaceFor(activeWindows.writeStateAddress(mergingWindow));
+            break;
+        }
+        Preconditions.checkNotNull(namespace); // cases are exhaustive.
+        builder.put(mergingWindow, stateInternals.state(namespace, address));
+      }
+      return builder.build();
     }
+  }
 
-    @Override
-    public <StorageT extends MergeableState<?, ?>> StorageT accessAcrossMergedWindows(
-        StateTag<StorageT> address) {
-      return delegate.accessAcrossMergedWindows(address);
+  static class PremergingStateContextImpl<W extends BoundedWindow>
+      extends StateContextImpl<W> implements MergingStateContext<W> {
+    public PremergingStateContextImpl(ActiveWindowSet<W> activeWindows, Coder<W> windowCoder,
+        StateInternals stateInternals, W window) {
+      super(activeWindows, windowCoder, stateInternals, window, StateStyle.RENAMED);
     }
 
-    @Override
-    public <StateT extends MergeableState<?, ?>> StateT mergingAccess(StateTag<StateT> address) {
-      List<StateNamespace> readNamespaces = new ArrayList<>();
-      for (W mergingWindow : mergingWindows) {
-        readNamespaces.add(delegate.namespaceFor(mergingWindow));
-      }
-      return delegate.stateInternals.mergedState(
-          readNamespaces, delegate.windowNamespace, address, delegate.window);
+    Collection<W> mergingWindows() {
+      return activeWindows.readStateAddresses(window);
     }
 
     @Override
-    public <StateT extends State> Map<BoundedWindow, StateT> mergingAccessInEachMergingWindow(
+    public <StateT extends State> Map<W, StateT> accessInEachMergingWindow(
         StateTag<StateT> address) {
-      ImmutableMap.Builder<BoundedWindow, StateT> builder = ImmutableMap.builder();
-      for (W mergingWindow : mergingWindows) {
-        StateT stateForWindow = delegate.stateInternals.state(
-            delegate.namespaceFor(mergingWindow), address);
-        builder.put(mergingWindow, stateForWindow);
+      ImmutableMap.Builder<W, StateT> builder = ImmutableMap.builder();
+      for (W stateAddressWindow : activeWindows.readStateAddresses(window)) {
+        StateT stateForWindow = stateInternals.state(namespaceFor(stateAddressWindow), address);
+        builder.put(stateAddressWindow, stateForWindow);
       }
       return builder.build();
     }
   }
 
-  private class ContextImpl extends ReduceFn<K, InputT, OutputT, W>.Context {
+  // ======================================================================
+  // Contexts
+  // ======================================================================
 
+  private class ContextImpl extends ReduceFn<K, InputT, OutputT, W>.Context {
     private final StateContextImpl<W> state;
     private final TimersImpl timers;
 
@@ -252,7 +279,7 @@ public K key() {
 
     @Override
     public W window() {
-      return state.window;
+      return state.window();
     }
 
     @Override
@@ -273,7 +300,6 @@ public Timers timers() {
 
   private class ProcessValueContextImpl
       extends ReduceFn<K, InputT, OutputT, W>.ProcessValueContext {
-
     private final InputT value;
     private final Instant timestamp;
     private final StateContextImpl<W> state;
@@ -294,7 +320,7 @@ public K key() {
 
     @Override
     public W window() {
-      return state.window;
+      return state.window();
     }
 
     @Override
@@ -323,16 +349,14 @@ public Timers timers() {
     }
   }
 
-  private class OnTriggerContextImpl
-      extends ReduceFn<K, InputT, OutputT, W>.OnTriggerContext {
-
+  private class OnTriggerContextImpl extends ReduceFn<K, InputT, OutputT, W>.OnTriggerContext {
     private final StateContextImpl<W> state;
     private final StateContents<PaneInfo> pane;
     private final OnTriggerCallbacks<OutputT> callbacks;
     private final TimersImpl timers;
 
-    private OnTriggerContextImpl(StateContextImpl<W> state,
-        StateContents<PaneInfo> pane, OnTriggerCallbacks<OutputT> callbacks) {
+    private OnTriggerContextImpl(StateContextImpl<W> state, StateContents<PaneInfo> pane,
+        OnTriggerCallbacks<OutputT> callbacks) {
       reduceFn.super();
       this.state = state;
       this.pane = pane;
@@ -347,7 +371,7 @@ public K key() {
 
     @Override
     public W window() {
-      return state.window;
+      return state.window();
     }
 
     @Override
@@ -376,16 +400,14 @@ public Timers timers() {
     }
   }
 
-  private class OnMergeContextImpl
-      extends ReduceFn<K, InputT, OutputT, W>.OnMergeContext {
-
+  private class OnMergeContextImpl extends ReduceFn<K, InputT, OutputT, W>.OnMergeContext {
     private final MergingStateContextImpl<W> state;
     private final TimersImpl timers;
 
     private OnMergeContextImpl(MergingStateContextImpl<W> state) {
       reduceFn.super();
       this.state = state;
-      this.timers = new TimersImpl(state.delegate.namespace());
+      this.timers = new TimersImpl(state.namespace());
     }
 
     @Override
@@ -399,18 +421,49 @@ public WindowingStrategy<?, W> windowingStrategy() {
     }
 
     @Override
-    public MergingStateContext state() {
+    public MergingStateContext<W> state() {
       return state;
     }
 
     @Override
-    public Collection<W> mergingWindows() {
-      return state.mergingWindows;
+    public W window() {
+      return state.window();
+    }
+
+    @Override
+    public Timers timers() {
+      return timers;
+    }
+  }
+
+  private class OnPremergeContextImpl extends ReduceFn<K, InputT, OutputT, W>.OnMergeContext {
+    private final PremergingStateContextImpl<W> state;
+    private final TimersImpl timers;
+
+    private OnPremergeContextImpl(PremergingStateContextImpl<W> state) {
+      reduceFn.super();
+      this.state = state;
+      this.timers = new TimersImpl(state.namespace());
+    }
+
+    @Override
+    public K key() {
+      return key;
+    }
+
+    @Override
+    public WindowingStrategy<?, W> windowingStrategy() {
+      return windowingStrategy;
+    }
+
+    @Override
+    public MergingStateContext<W> state() {
+      return state;
     }
 
     @Override
     public W window() {
-      return state.delegate.window;
+      return state.window();
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index 83eadf337c0a7..85b9bae4ba366 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -16,6 +16,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
@@ -23,8 +24,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.ActiveWindowSet.MergeCallback;
 import com.google.cloud.dataflow.sdk.util.ReduceFnContextFactory.OnTriggerCallbacks;
+import com.google.cloud.dataflow.sdk.util.ReduceFnContextFactory.StateStyle;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
@@ -70,7 +71,8 @@
  * @param <OutputT> The output type that will be produced for each key.
  * @param <W> The type of windows this operates on.
  */
-public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
+public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow>
+    implements ActiveWindowSet.MergeCallback<W> {
   private final WindowingStrategy<Object, W> windowingStrategy;
 
   private final WindowingInternals<?, KV<K, OutputT>> windowingInternals;
@@ -83,22 +85,23 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
    * Track which windows are still active and which 'state address' windows contain state
    * for a merged window.
    *
-   * <p>In general, when windows are merged we prefer to defer merging their state until the
-   * overall state is needed. In other words, we prefer to merge state 'lazily' (on read)
-   * instead of 'eagerly' (on merge).
+   * <ul>
+   * <li>State: Global map for all active windows for this computation and key.
+   * <li>Lifetime: Cleared when no active windows need to be tracked. A window lives within
+   * the active window set until its trigger is closed or the window is garbage collected.
+   * </ul>
    */
   private final ActiveWindowSet<W> activeWindows;
 
   /**
-   * User's reduce function (or {@link SystemReduceFn} for simple GroupByKey operations).
-   * May store its own state.
+   * Always a {@link SystemReduceFn}.
    *
    * <ul>
-   * <li>Merging: Uses {@link #activeWindows} to determine the 'state address' windows under which
-   * state is read and written. Merging may be done lazily, in which case state is merged
-   * only when a pane fires.
-   * <li>Lifetime: Possibly cleared when a pane fires. Always cleared when a window is
-   * garbage collected.
+   * <li>State: A bag of accumulated values, or the intermediate result of a combiner.
+   * <li>State style: RENAMED
+   * <li>Merging: Concatenate or otherwise combine the state from each merged window.
+   * <li>Lifetime: Cleared when a pane fires if DISCARDING_FIRED_PANES. Otherwise cleared
+   * when trigger is finished or when the window is garbage collected.
    * </ul>
    */
   private final ReduceFn<K, InputT, OutputT, W> reduceFn;
@@ -107,7 +110,9 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
    * Manage the setting and firing of timer events.
    *
    * <ul>
-   * <li>Merging: Timers are cancelled when windows are merged away.
+   * <li>Merging: End-of-window and garbage collection timers are cancelled when windows are
+   * merged away. Timers created by triggers are never garbage collected and are left to
+   * fire and be ignored.
    * <li>Lifetime: Timers automatically disappear after they fire.
    * </ul>
    */
@@ -117,10 +122,14 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
    * Manage the execution and state for triggers.
    *
    * <ul>
-   * <li>Merging: All state is keyed by actual window, so does not depend on {@link #activeWindows}.
-   * Individual triggers know how to eagerly merge their state on merge.
+   * <li>State: Tracks which sub-triggers have finished, and any additional state needed to
+   * determine when the trigger should fire.
+   * <li>State style: DIRECT
+   * <li>Merging: Finished bits are explicitly managed. Other state is eagerly merged as
+   * needed.
    * <li>Lifetime: Most trigger state is cleared when the final pane is emitted. However
-   * a tombstone is left behind which must be cleared when the window is garbage collected.
+   * the finished bits are left behind and must be cleared when the window is
+   * garbage collected.
    * </ul>
    */
   private final TriggerRunner<W> triggerRunner;
@@ -129,16 +138,11 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
    * Store the output watermark holds for each window.
    *
    * <ul>
-   * <li>Merging: Generally uses {@link #activeWindows} to maintain the 'state address' windows
-   * under which holds are stored, and holds are merged lazily only when a pane fires.
-   * However there are two special cases:
-   * <ul>
-   * <li>Depending on the window's {@link OutputTimeFn}, it is possible holds need to be read,
-   * recalculated, cleared, and added back on merging.
-   * <li>When a pane fires it may be necessary to add (back) an end-of-window or
-   * garbage collection hold. If the current window is no longer active these holds will
-   * be associated with the current window.
-   * </ul>
+   * <li>State: Bag of hold timestamps.
+   * <li>State style: RENAMED
+   * <li>Merging: Depending on {@link OutputTimeFn}, may need to be recalculated on merging.
+   * When a pane fires it may be necessary to add (back) an end-of-window or garbage collection
+   * hold.
    * <li>Lifetime: Cleared when a pane fires or when the window is garbage collected.
    * </ul>
    */
@@ -150,9 +154,12 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
    * Store the previously emitted pane (if any) for each window.
    *
    * <ul>
+   * <li>State: The previous {@link PaneInfo} passed to the user's {@link DoFn#processElement},
+   * if any.
+   * <li>Style style: DIRECT
    * <li>Merging: Always keyed by actual window, so does not depend on {@link #activeWindows}.
    * Cleared when window is merged away.
-   * <li>Lifetime: Cleared when trigger is finished or window is garbage collected.
+   * <li>Lifetime: Cleared when trigger is closed or window is garbage collected.
    * </ul>
    */
   private final PaneInfoTracker paneInfoTracker;
@@ -161,8 +168,9 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
    * Store whether we've seen any elements for a window since the last pane was emitted.
    *
    * <ul>
-   * <li>Merging: Uses {@link #activeWindows} determine the state address windows under which
-   * counts are stored. Merging is done lazily when checking if a pane needs to fire.
+   * <li>State: Unless DISCARDING_FIRED_PANES, a count of number of elements added so far.
+   * <li>State style: RENAMED.
+   * <li>Merging: Counts are summed when windows are merged.
    * <li>Lifetime: Cleared when pane fires or window is garbage collected.
    * </ul>
    */
@@ -170,8 +178,7 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
 
   public ReduceFnRunner(K key, WindowingStrategy<?, W> windowingStrategy,
       TimerInternals timerInternals, WindowingInternals<?, KV<K, OutputT>> windowingInternals,
-      Aggregator<Long, Long> droppedDueToClosedWindow,
-      ReduceFn<K, InputT, OutputT, W> reduceFn) {
+      Aggregator<Long, Long> droppedDueToClosedWindow, ReduceFn<K, InputT, OutputT, W> reduceFn) {
     this.key = key;
     this.timerInternals = timerInternals;
     this.paneInfoTracker = new PaneInfoTracker(timerInternals);
@@ -206,7 +213,7 @@ private ActiveWindowSet<W> createActiveWindowSet() {
 
   @VisibleForTesting
   boolean isFinished(W window) {
-    return triggerRunner.isClosed(contextFactory.base(window).state());
+    return triggerRunner.isClosed(contextFactory.base(window, StateStyle.DIRECT).state());
   }
 
   /**
@@ -246,9 +253,12 @@ public void processElements(Iterable<WindowedValue<InputT>> values) throws Excep
 
     // Trigger output from any window for which the trigger is ready
     for (W mergedWindow : windowsToConsider) {
-      ReduceFn<K, InputT, OutputT, W>.Context context = contextFactory.base(mergedWindow);
-      triggerRunner.prefetchShouldFire(context.state());
-      emitIfAppropriate(context, false /* isEndOfWindow */);
+      ReduceFn<K, InputT, OutputT, W>.Context directContext =
+          contextFactory.base(mergedWindow, StateStyle.DIRECT);
+      ReduceFn<K, InputT, OutputT, W>.Context renamedContext =
+          contextFactory.base(mergedWindow, StateStyle.RENAMED);
+      triggerRunner.prefetchShouldFire(directContext.state());
+      emitIfAppropriate(directContext, renamedContext, false/* isEndOfWindow */);
     }
 
     // We're all done with merging and emitting elements so can compress the activeWindow state.
@@ -277,12 +287,31 @@ private void collectAndMergeWindows(Iterable<WindowedValue<InputT>> values) {
         @SuppressWarnings("unchecked")
         W window = (W) untypedWindow;
 
-        ReduceFn<K, InputT, OutputT, W>.Context context = contextFactory.base(window);
-        if (triggerRunner.isClosed(context.state())) {
+        ReduceFn<K, InputT, OutputT, W>.Context directContext =
+            contextFactory.base(window, StateStyle.DIRECT);
+        if (triggerRunner.isClosed(directContext.state())) {
           // This window has already been closed.
           // We will update the counter for this in the corresponding processElement call.
           continue;
         }
+
+        if (activeWindows.isActive(window)) {
+          Set<W> stateAddressWindows = activeWindows.readStateAddresses(window);
+          if (stateAddressWindows.size() > 1) {
+            // This is a legacy window who's state has not been eagerly merged.
+            // Do that now.
+            ReduceFn<K, InputT, OutputT, W>.OnMergeContext premergeContext =
+                contextFactory.forPremerge(window);
+            try {
+              reduceFn.onMerge(premergeContext);
+            } catch (Exception e) {
+              throw wrapMaybeUserException(e);
+            }
+            watermarkHold.onMerge(premergeContext);
+            activeWindows.merged(window);
+          }
+        }
+
         // Add this window as NEW if we've not yet seen it.
         activeWindows.addNew(window);
       }
@@ -291,66 +320,94 @@ private void collectAndMergeWindows(Iterable<WindowedValue<InputT>> values) {
     // Merge all of the active windows and retain a mapping from source windows to result windows.
     mergeActiveWindows();
 
-    // Make sure we've scheduled timers for any ACTIVE windows we just introduced.
+    // Make sure we've scheduled end-of-window or garbage collection timers for any
+    // ACTIVE windows we just introduced.
     // (Timers for ACTIVE windows which are now MERGED will have been discarded above.)
     for (W window : Sets.difference(activeWindows.getActiveWindows(), currentlyActiveWindows)) {
-      scheduleEndOfWindowOrGarbageCollectionTimer(contextFactory.base(window));
+      scheduleEndOfWindowOrGarbageCollectionTimer(contextFactory.base(window, StateStyle.DIRECT));
     }
   }
 
-  private void mergeActiveWindows() {
+  /**
+   * Called from the active window set to indicate {@code toBeMerged} (of which only
+   * {@code activeToBeMerged} are ACTIVE and thus have state associated with them) will later
+   * be merged into {@code mergeResult}.
+   */
+  @Override
+  public void prefetchOnMerge(
+      Collection<W> toBeMerged, Collection<W> activeToBeMerged, W mergeResult) throws Exception {
+    ReduceFn<K, InputT, OutputT, W>.OnMergeContext directMergeContext =
+        contextFactory.forMerge(activeToBeMerged, mergeResult, StateStyle.DIRECT);
+    ReduceFn<K, InputT, OutputT, W>.OnMergeContext renamedMergeContext =
+        contextFactory.forMerge(activeToBeMerged, mergeResult, StateStyle.RENAMED);
+
+    // Prefetch various state.
+    triggerRunner.prefetchForMerge(directMergeContext.state());
     try {
-      activeWindows.merge(new MergeCallback<W>() {
-        @Override
-        public void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W mergeResult)
-            throws Exception {
-          // At this point activeWindows has already incorporated the results of the merge.
-          ReduceFn<K, InputT, OutputT, W>.OnMergeContext mergeResultContext =
-              contextFactory.forMerge(toBeMerged, mergeResult);
-
-          // Prefetch various state.
-          triggerRunner.prefetchForMerge(mergeResult, toBeMerged, mergeResultContext.state());
-
-          // Run the reduceFn to perform any needed merging.
-          try {
-            reduceFn.onMerge(mergeResultContext);
-          } catch (Exception e) {
-            throw wrapMaybeUserException(e);
-          }
+      reduceFn.prefetchOnMerge(renamedMergeContext.state());
+    } catch (Exception e) {
+      throw wrapMaybeUserException(e);
+    }
+    watermarkHold.prefetchOnMerge(renamedMergeContext.state());
+    nonEmptyPanes.prefetchOnMerge(renamedMergeContext.state());
+  }
+
+  /**
+   * Called from the active window set to indicate {@code toBeMerged} (of which only
+   * {@code activeToBeMerged} are ACTIVE and thus have state associated with them) are about
+   * to be merged into {@code mergeResult}.
+   */
+  @Override
+  public void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W mergeResult)
+      throws Exception {
+    // At this point activeWindows has NOT incorporated the results of the merge.
+    ReduceFn<K, InputT, OutputT, W>.OnMergeContext directMergeContext =
+        contextFactory.forMerge(activeToBeMerged, mergeResult, StateStyle.DIRECT);
+    ReduceFn<K, InputT, OutputT, W>.OnMergeContext renamedMergeContext =
+        contextFactory.forMerge(activeToBeMerged, mergeResult, StateStyle.RENAMED);
+
+    // Run the reduceFn to perform any needed merging.
+    try {
+      reduceFn.onMerge(renamedMergeContext);
+    } catch (Exception e) {
+      throw wrapMaybeUserException(e);
+    }
 
-          // Merge the watermark holds if the output time function is not just MIN.
-          // Otherwise, leave all the merging window watermark holds where they are.
-          watermarkHold.onMerge(mergeResultContext);
+    // Merge the watermark holds.
+    watermarkHold.onMerge(renamedMergeContext);
 
-          // Have the trigger merge state as needed
-          try {
-            triggerRunner.onMerge(mergeResultContext);
-          } catch (Exception e) {
-            Throwables.propagateIfPossible(e);
-            throw new RuntimeException("Failed to merge the triggers", e);
-          }
+    // Merge non-empty pane state.
+    nonEmptyPanes.onMerge(renamedMergeContext.state());
 
-          for (W active : activeToBeMerged) {
-            if (active.equals(mergeResult)) {
-              // Not merged away.
-              continue;
-            }
-            WindowTracing.debug("ReduceFnRunner.mergeActiveWindows/onMerge: Merging {} into {}",
-                active, mergeResult);
-            // Currently ACTIVE window is about to become MERGED.
-            ReduceFn<K, InputT, OutputT, W>.Context clearContext = contextFactory.base(active);
-            // We are going to take care of any cleanup now, so cancel timers.
-            cancelEndOfWindowAndGarbageCollectionTimers(clearContext);
-            // All the trigger state has been merged. Clear any tombstones.
-            triggerRunner.clearEverything(clearContext);
-            // We no longer care about any previous panes of merged away windows. The
-            // merge result window gets to start fresh if it is new.
-            paneInfoTracker.clear(clearContext.state());
-            // Any reduceFn state, watermark holds and non-empty pane state have either been
-            // merged away or will be lazily merged when the next pane fires.
-          }
-        }
-      });
+    // Have the trigger merge state as needed
+    try {
+      triggerRunner.onMerge(directMergeContext);
+    } catch (Exception e) {
+      Throwables.propagateIfPossible(e);
+      throw new RuntimeException("Failed to merge the triggers", e);
+    }
+
+    for (W active : activeToBeMerged) {
+      if (active.equals(mergeResult)) {
+        // Not merged away.
+        continue;
+      }
+      // Cleanup flavor A: Currently ACTIVE window is about to become MERGED.
+      // Clear any state not already cleared by the onMerge calls above.
+      WindowTracing.debug("ReduceFnRunner.onMerge: Merging {} into {}", active, mergeResult);
+      ReduceFn<K, InputT, OutputT, W>.Context directClearContext =
+          contextFactory.base(active, StateStyle.DIRECT);
+      // No need for the end-of-window or garbage collection timers.
+      cancelEndOfWindowAndGarbageCollectionTimers(directClearContext);
+      // We no longer care about any previous panes of merged away windows. The
+      // merge result window gets to start fresh if it is new.
+      paneInfoTracker.clear(directClearContext.state());
+    }
+  }
+
+  private void mergeActiveWindows() {
+    try {
+      activeWindows.merge(this);
     } catch (Exception e) {
       Throwables.propagateIfPossible(e);
       throw new RuntimeException("Exception while merging windows", e);
@@ -378,51 +435,54 @@ private Collection<W> processElement(WindowedValue<InputT> value) {
 
     // Prefetch in each of the windows if we're going to need to process triggers
     for (W window : windows) {
-      ReduceFn<K, InputT, OutputT, W>.ProcessValueContext context =
-          contextFactory.forValue(window, value.getValue(), value.getTimestamp());
-      triggerRunner.prefetchForValue(context.state());
+      ReduceFn<K, InputT, OutputT, W>.ProcessValueContext directContext = contextFactory.forValue(
+          window, value.getValue(), value.getTimestamp(), StateStyle.DIRECT);
+      triggerRunner.prefetchForValue(directContext.state());
     }
 
     // Process the element for each (representative) window it belongs to.
     for (W window : windows) {
-      ReduceFn<K, InputT, OutputT, W>.ProcessValueContext context =
-          contextFactory.forValue(window, value.getValue(), value.getTimestamp());
+      ReduceFn<K, InputT, OutputT, W>.ProcessValueContext directContext = contextFactory.forValue(
+          window, value.getValue(), value.getTimestamp(), StateStyle.DIRECT);
+      ReduceFn<K, InputT, OutputT, W>.ProcessValueContext renamedContext = contextFactory.forValue(
+          window, value.getValue(), value.getTimestamp(), StateStyle.RENAMED);
 
       // Check to see if the triggerRunner thinks the window is closed. If so, drop that window.
-      if (triggerRunner.isClosed(context.state())) {
+      if (triggerRunner.isClosed(directContext.state())) {
         droppedDueToClosedWindow.addValue(1L);
         WindowTracing.debug(
             "ReduceFnRunner.processElement: Dropping element at {} for key:{}; window:{} "
-                + "since window is no longer active at inputWatermark:{}; outputWatermark:{}",
-                value.getTimestamp(), key, window, timerInternals.currentInputWatermarkTime(),
-                timerInternals.currentOutputWatermarkTime());
+            + "since window is no longer active at inputWatermark:{}; outputWatermark:{}",
+            value.getTimestamp(), key, window, timerInternals.currentInputWatermarkTime(),
+            timerInternals.currentOutputWatermarkTime());
         continue;
       }
 
-      nonEmptyPanes.recordContent(context);
+      nonEmptyPanes.recordContent(renamedContext);
 
       // Make sure we've scheduled the end-of-window or garbage collection timer for this window
       // However if we have pre-merged then they will already have been scheduled.
       if (windowingStrategy.getWindowFn().isNonMerging()) {
-        scheduleEndOfWindowOrGarbageCollectionTimer(context);
+        scheduleEndOfWindowOrGarbageCollectionTimer(directContext);
       }
 
       // Hold back progress of the output watermark until we have processed the pane this
-      // element will be included within. Also add a hold at the end-of-window or garbage
-      // collection time to allow empty panes to contribute elements which won't be dropped
-      // due to lateness.
-      watermarkHold.addHolds(context);
+      // element will be included within. If the element is too late for that, place a hold at
+      // the end-of-window or garbage collection time to allow empty panes to contribute elements
+      // which won't be dropped due to lateness by a following computation (assuming the following
+      // computation uses the same allowed lateness value...)
+      watermarkHold.addHolds(renamedContext);
 
       // Execute the reduceFn, which will buffer the value as appropriate
       try {
-        reduceFn.processValue(context);
+        reduceFn.processValue(renamedContext);
       } catch (Exception e) {
         throw wrapMaybeUserException(e);
       }
 
       // Run the trigger to update its state
       try {
-        triggerRunner.processValue(context);
+        triggerRunner.processValue(directContext);
       } catch (Exception e) {
         Throwables.propagateIfPossible(e);
         throw new RuntimeException("Failed to run trigger", e);
@@ -442,14 +502,24 @@ public void onTimer(TimerData timer) {
     @SuppressWarnings("unchecked")
     WindowNamespace<W> windowNamespace = (WindowNamespace<W>) timer.getNamespace();
     W window = windowNamespace.getWindow();
-
-    if (!activeWindows.isActive(window)) {
+    ReduceFn<K, InputT, OutputT, W>.Context directContext =
+        contextFactory.base(window, StateStyle.DIRECT);
+    ReduceFn<K, InputT, OutputT, W>.Context renamedContext =
+        contextFactory.base(window, StateStyle.RENAMED);
+
+    // Has this window had its trigger finish?
+    // - The trigger may implement isClosed as constant false.
+    // - If the window function does not support windowing then all windows will be considered
+    // active.
+    // So we must combine the above.
+    boolean windowIsActive =
+        activeWindows.isActive(window) && !triggerRunner.isClosed(directContext.state());
+
+    if (!windowIsActive) {
       WindowTracing.debug(
           "ReduceFnRunner.onTimer: Note that timer {} is for non-ACTIVE window {}", timer, window);
     }
 
-    ReduceFn<K, InputT, OutputT, W>.Context context = contextFactory.base(window);
-
     // If this is an end-of-window timer then we should test if an AfterWatermark trigger
     // will fire.
     // It's fine if the window trigger has such trigger, this flag is only used to decide
@@ -470,18 +540,18 @@ public void onTimer(TimerData timer) {
           key, window, timer.getTimestamp(), timerInternals.currentInputWatermarkTime(),
           timerInternals.currentOutputWatermarkTime());
 
-      if (activeWindows.isActive(window) && !triggerRunner.isClosed(context.state())) {
+      if (windowIsActive) {
         // We need to call onTrigger to emit the final pane if required.
         // The final pane *may* be ON_TIME if no prior ON_TIME pane has been emitted,
         // and the watermark has passed the end of the window.
         boolean isWatermarkTrigger = isEndOfWindowTimer;
-        onTrigger(context, isWatermarkTrigger,
-            true /* isFinished */, false /* willStillBeActive */);
+        onTrigger(directContext, renamedContext, isWatermarkTrigger, true/* isFinished */);
       }
 
-      // Clear all the state for this window since we'll never see elements for it again.
+      // Cleanup flavor B: Clear all the remaining state for this window since we'll never
+      // see elements for it again.
       try {
-        clearAllState(context);
+        clearAllState(directContext, renamedContext, windowIsActive);
       } catch (Exception e) {
         Throwables.propagateIfInstanceOf(e, UserCodeException.class);
         throw new RuntimeException(
@@ -493,9 +563,9 @@ public void onTimer(TimerData timer) {
           + "inputWatermark:{}; outputWatermark:{}",
           key, window, timer.getTimestamp(), timerInternals.currentInputWatermarkTime(),
           timerInternals.currentOutputWatermarkTime());
-      if (activeWindows.isActive(window) && !triggerRunner.isClosed(context.state())) {
+      if (windowIsActive) {
         try {
-          emitIfAppropriate(context, isEndOfWindowTimer);
+          emitIfAppropriate(directContext, renamedContext, isEndOfWindowTimer);
         } catch (Exception e) {
           Throwables.propagateIfPossible(e);
           throw Throwables.propagate(e);
@@ -514,9 +584,9 @@ public void onTimer(TimerData timer) {
         WindowTracing.debug(
             "ReduceFnRunner.onTimer: Scheduling cleanup timer for key:{}; window:{} at {} with "
             + "inputWatermark:{}; outputWatermark:{}",
-            key, context.window(), cleanupTime, timerInternals.currentInputWatermarkTime(),
+            key, directContext.window(), cleanupTime, timerInternals.currentInputWatermarkTime(),
             timerInternals.currentOutputWatermarkTime());
-        context.timers().setTimer(cleanupTime, TimeDomain.EVENT_TIME);
+        directContext.timers().setTimer(cleanupTime, TimeDomain.EVENT_TIME);
       }
     }
   }
@@ -525,27 +595,42 @@ public void onTimer(TimerData timer) {
    * Clear all the state associated with {@code context}'s window.
    * Should only be invoked if we know all future elements for this window will be considered
    * beyond allowed lateness.
-   * This is a superset of the clearing done by {@link #handleTriggerResult} below since:
+   * This is a superset of the clearing done by {@link #emitIfAppropriate} below since:
    * <ol>
    * <li>We can clear the trigger state tombstone since we'll never need to ask about it again.
    * <li>We can clear any remaining garbage collection hold.
    * </ol>
    */
-  private void clearAllState(ReduceFn<K, InputT, OutputT, W>.Context context) throws Exception {
-    boolean isActive = activeWindows.isActive(context.window());
-    watermarkHold.clearHolds(context, isActive);
-    if (isActive) {
-      // The trigger never finished, so make sure we clear any remaining state.
+  private void clearAllState(ReduceFn<K, InputT, OutputT, W>.Context directContext,
+      ReduceFn<K, InputT, OutputT, W>.Context renamedContext, boolean windowIsActive)
+      throws Exception {
+    if (windowIsActive) {
+      // Since window is still active the trigger has not closed.
       try {
-        reduceFn.clearState(context);
+        reduceFn.clearState(renamedContext);
       } catch (Exception e) {
         throw wrapMaybeUserException(e);
       }
-      nonEmptyPanes.clearPane(context);
-      activeWindows.remove(context.window());
+      watermarkHold.clearHolds(renamedContext);
+      nonEmptyPanes.clearPane(renamedContext);
+      triggerRunner.clearState(directContext);
+    } else {
+      // Needed only for backwards compatibility over UPDATE.
+      // Clear any end-of-window or garbage collection holds keyed by the current window.
+      // Only needed if:
+      // - We have merging windows.
+      // - We are DISCARDING_FIRED_PANES.
+      // - A pane has fired.
+      // - But the trigger is not (yet) closed.
+      if (windowingStrategy.getMode() == AccumulationMode.DISCARDING_FIRED_PANES
+          && !windowingStrategy.getWindowFn().isNonMerging()) {
+        watermarkHold.clearHolds(directContext);
+      }
     }
-    triggerRunner.clearEverything(context);
-    paneInfoTracker.clear(context.state());
+    paneInfoTracker.clear(directContext.state());
+    activeWindows.remove(directContext.window());
+    // We'll never need to test for the trigger being closed again.
+    triggerRunner.clearFinished(directContext);
   }
 
   /** Should the reduce function state be cleared? */
@@ -564,16 +649,17 @@ private boolean shouldDiscardAfterFiring(boolean isFinished) {
   /**
    * Possibly emit a pane if a trigger is ready to fire or timers require it, and cleanup state.
    */
-  private void emitIfAppropriate(ReduceFn<K, InputT, OutputT, W>.Context context,
-      boolean isEndOfWindow) throws Exception {
-    if (!triggerRunner.shouldFire(context)) {
+  private void emitIfAppropriate(ReduceFn<K, InputT, OutputT, W>.Context directContext,
+      ReduceFn<K, InputT, OutputT, W>.Context renamedContext, boolean isEndOfWindow)
+      throws Exception {
+    if (!triggerRunner.shouldFire(directContext)) {
       // Ignore unless trigger is ready to fire
       return;
     }
 
     // Inform the trigger of the transition to see if it is finished
-    triggerRunner.onFire(context);
-    boolean isFinished = triggerRunner.isClosed(context.state());
+    triggerRunner.onFire(directContext);
+    boolean isFinished = triggerRunner.isClosed(directContext.state());
 
     // Will be able to clear all element state after triggering?
     boolean shouldDiscard = shouldDiscardAfterFiring(isFinished);
@@ -581,45 +667,33 @@ private void emitIfAppropriate(ReduceFn<K, InputT, OutputT, W>.Context context,
     // Run onTrigger to produce the actual pane contents.
     // As a side effect it will clear all element holds, but not necessarily any
     // end-of-window or garbage collection holds.
-    onTrigger(context, isEndOfWindow, isFinished, !shouldDiscard);
+    onTrigger(directContext, renamedContext, isEndOfWindow, isFinished);
 
     // Now that we've triggered, the pane is empty.
-    nonEmptyPanes.clearPane(context);
+    nonEmptyPanes.clearPane(renamedContext);
 
     // Cleanup buffered data if appropriate
     if (shouldDiscard) {
-      // Clear the reduceFn state across all windows in the equivalence class for the current
-      // window.
+      // Cleanup flavor C: The user does not want any buffered data to persist between panes.
       try {
-        reduceFn.clearState(context);
+        reduceFn.clearState(renamedContext);
       } catch (Exception e) {
         throw wrapMaybeUserException(e);
       }
-
-      // Remove the window from active set.
-      // This will forget the equivalence class for this window.
-      WindowTracing.debug("ReduceFnRunner.handleTriggerResult: removing {}", context.window());
-      activeWindows.remove(context.window());
-
-      if (!triggerRunner.isClosed(context.state())) {
-        // We still need to consider this window active since we may have had to add an
-        // end-of-window or garbage collection hold above.
-        activeWindows.addActive(context.window());
-      }
     }
 
-    if (triggerRunner.isClosed(context.state())) {
-      // If we're finishing, clear up the trigger tree as well.
-      // However, we'll leave behind a tombstone so we know the trigger is finished.
+    if (isFinished) {
+      // Cleanup flavor D: If trigger is closed we will ignore all new incoming elements.
+      // Clear state not otherwise cleared by onTrigger and clearPane above.
+      // Remember the trigger is, indeed, closed until the window is garbage collected.
       try {
-        triggerRunner.clearState(context);
-        paneInfoTracker.clear(context.state());
+        triggerRunner.clearState(directContext);
+        paneInfoTracker.clear(directContext.state());
       } catch (Exception e) {
         Throwables.propagateIfPossible(e);
         throw new RuntimeException("Exception while clearing trigger state", e);
       }
-      // No more watermark holds will be placed (even for end-of-window or garbage
-      // collection holds).
+      activeWindows.remove(directContext.window());
     }
   }
 
@@ -636,8 +710,7 @@ private boolean needToEmit(
       // This is the unique ON_TIME pane, triggered by an AfterWatermark.
       return true;
     }
-    if (isFinished
-        && windowingStrategy.getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS) {
+    if (isFinished && windowingStrategy.getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS) {
       // This is known to be the final pane, and the user has requested it even when empty.
       return true;
     }
@@ -646,22 +719,23 @@ private boolean needToEmit(
 
   /**
    * Run the {@link ReduceFn#onTrigger} method and produce any necessary output.
-   *
-   * @param context the context for the pane to fire
-   * @param isWatermarkTrigger true if this triggering is for an AfterWatermark trigger
-   * @param isFinish true if this will be the last triggering processed
    */
-  private void onTrigger(final ReduceFn<K, InputT, OutputT, W>.Context context,
-      boolean isWatermarkTrigger, boolean isFinished, boolean willStillBeActive) {
+  private void onTrigger(final ReduceFn<K, InputT, OutputT, W>.Context directContext,
+      ReduceFn<K, InputT, OutputT, W>.Context renamedContext, boolean isWatermarkTrigger,
+      boolean isFinished) {
     // Collect state.
-    StateContents<Instant> outputTimestampFuture = watermarkHold.extractAndRelease(
-        context, isFinished, willStillBeActive);
-    StateContents<PaneInfo> paneFuture = paneInfoTracker.getNextPaneInfo(
-        context, isWatermarkTrigger, isFinished);
-    StateContents<Boolean> isEmptyFuture = nonEmptyPanes.isEmpty(context);
+    StateContents<Instant> outputTimestampFuture =
+        watermarkHold.extractAndRelease(renamedContext, isFinished);
+    StateContents<PaneInfo> paneFuture =
+        paneInfoTracker.getNextPaneInfo(directContext, isWatermarkTrigger, isFinished);
+    StateContents<Boolean> isEmptyFuture = nonEmptyPanes.isEmpty(renamedContext);
 
-    reduceFn.prefetchOnTrigger(context.state());
-    triggerRunner.prefetchOnFire(context.state());
+    try {
+      reduceFn.prefetchOnTrigger(renamedContext.state());
+    } catch (Exception e) {
+      throw wrapMaybeUserException(e);
+    }
+    triggerRunner.prefetchOnFire(directContext.state()); // Is a no-op. Why?
 
     // Calculate the pane info.
     final PaneInfo pane = paneFuture.read();
@@ -671,31 +745,38 @@ private void onTrigger(final ReduceFn<K, InputT, OutputT, W>.Context context,
     // Only emit a pane if it has data or empty panes are observable.
     if (needToEmit(isEmptyFuture.read(), isWatermarkTrigger, isFinished, pane.getTiming())) {
       // Run reduceFn.onTrigger method.
-      final List<W> windows = Collections.singletonList(context.window());
-      ReduceFn<K, InputT, OutputT, W>.OnTriggerContext triggerContext = contextFactory.forTrigger(
-          context.window(), paneFuture, new OnTriggerCallbacks<OutputT>() {
-            @Override
-            public void output(OutputT toOutput) {
-              // We're going to output panes, so commit the (now used) PaneInfo.
-              // TODO: Unnecessary if isFinal?
-              paneInfoTracker.storeCurrentPaneInfo(context, pane);
-
-              // Output the actual value.
-              windowingInternals.outputWindowedValue(
-                  KV.of(key, toOutput), outputTimestamp, windows, pane);
-            }
-          });
+      final List<W> windows = Collections.singletonList(directContext.window());
+      ReduceFn<K, InputT, OutputT, W>.OnTriggerContext renamedTriggerContext =
+          contextFactory.forTrigger(directContext.window(), paneFuture, StateStyle.RENAMED,
+              new OnTriggerCallbacks<OutputT>() {
+                @Override
+                public void output(OutputT toOutput) {
+                  // We're going to output panes, so commit the (now used) PaneInfo.
+                  // TODO: This is unnecessary if the trigger isFinished since the saved
+                  // state will be immediately deleted.
+                  paneInfoTracker.storeCurrentPaneInfo(directContext, pane);
+
+                  // Output the actual value.
+                  windowingInternals.outputWindowedValue(
+                      KV.of(key, toOutput), outputTimestamp, windows, pane);
+                }
+              });
 
       try {
-        reduceFn.onTrigger(triggerContext);
+        reduceFn.onTrigger(renamedTriggerContext);
       } catch (Exception e) {
         throw wrapMaybeUserException(e);
       }
     }
   }
 
-  private void scheduleEndOfWindowOrGarbageCollectionTimer(ReduceFn<?, ?, ?, W>.Context context) {
-    Instant fireTime = context.window().maxTimestamp();
+  /**
+   * Make sure we will notice when the input watermark passes the end-of-window or garbage
+   * collection time.
+   */
+  private void scheduleEndOfWindowOrGarbageCollectionTimer(
+      ReduceFn<?, ?, ?, W>.Context directContext) {
+    Instant fireTime = directContext.window().maxTimestamp();
     String which = "end-of-window";
     Instant inputWM = timerInternals.currentInputWatermarkTime();
     if (inputWM != null && fireTime.isBefore(inputWM)) {
@@ -707,9 +788,9 @@ private void scheduleEndOfWindowOrGarbageCollectionTimer(ReduceFn<?, ?, ?, W>.Co
     WindowTracing.trace(
         "ReduceFnRunner.scheduleEndOfWindowOrGarbageCollectionTimer: Scheduling {} timer at {} for "
         + "key:{}; window:{} where inputWatermark:{}; outputWatermark:{}",
-        which, fireTime, key, context.window(), inputWM,
+        which, fireTime, key, directContext.window(), inputWM,
         timerInternals.currentOutputWatermarkTime());
-    context.timers().setTimer(fireTime, TimeDomain.EVENT_TIME);
+    directContext.timers().setTimer(fireTime, TimeDomain.EVENT_TIME);
   }
 
   private void cancelEndOfWindowAndGarbageCollectionTimers(ReduceFn<?, ?, ?, W>.Context context) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateContext.java
index 95bf94dfb29c1..f66d6863b9b42 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateContext.java
@@ -16,7 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import com.google.cloud.dataflow.sdk.util.state.MergeableState;
 import com.google.cloud.dataflow.sdk.util.state.State;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 
@@ -29,15 +28,4 @@ public interface StateContext {
    * this method must be eagerly combined and written into the result window.
    */
   <StateT extends State> StateT access(StateTag<StateT> address);
-
-  /**
-   * Access the storage for the given {@code address} in all of the windows that were
-   * merged into the current window.
-   *
-   * <p>If no windows were merged, this reads and writes to just the current window.
-   * Otherwise, when windows merge we do not eagerly combine state, but rather defer the
-   * combination to reading time. Thus reads will be from all 'merged windows' for the
-   * current window, and writes will be to the designated 'writing window' for the current window.
-   */
-  <StateT extends MergeableState<?, ?>> StateT accessAcrossMergedWindows(StateTag<StateT> address);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
index 5b6fa784e6ccf..5d6101aedf07f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
@@ -24,8 +24,9 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.state.BagState;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
-import com.google.cloud.dataflow.sdk.util.state.MergeableState;
+import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
+import com.google.cloud.dataflow.sdk.util.state.StateMerging;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 
@@ -39,9 +40,8 @@
  * @param <OutputT> The output type that will be produced for each key.
  * @param <W> The type of windows this operates on.
  */
-public class SystemReduceFn<K, InputT, OutputT, W extends BoundedWindow>
+public abstract class SystemReduceFn<K, InputT, AccumT, OutputT, W extends BoundedWindow>
     extends ReduceFn<K, InputT, OutputT, W> {
-
   private static final String BUFFER_NAME = "buf";
 
   /**
@@ -65,10 +65,19 @@ public static <K, T, W extends BoundedWindow> Factory<K, T, Iterable<T>, W> buff
     final StateTag<BagState<T>> bufferTag =
         StateTags.makeSystemTagInternal(StateTags.bag(BUFFER_NAME, inputCoder));
     return new Factory<K, T, Iterable<T>, W>() {
-
       @Override
       public ReduceFn<K, T, Iterable<T>, W> create(K key) {
-        return new SystemReduceFn<K, T, Iterable<T>, W>(bufferTag);
+        return new SystemReduceFn<K, T, Iterable<T>, Iterable<T>, W>(bufferTag) {
+          @Override
+          public void prefetchOnMerge(MergingStateContext<W> state) throws Exception {
+            StateMerging.prefetchBags(state, bufferTag);
+          }
+
+          @Override
+          public void onMerge(OnMergeContext c) throws Exception {
+            StateMerging.mergeBags(c.state(), bufferTag);
+          }
+        };
       }
     };
   }
@@ -77,63 +86,63 @@ public ReduceFn<K, T, Iterable<T>, W> create(K key) {
    * Create a factory that produces {@link SystemReduceFn} instances that combine all of the input
    * values using a {@link CombineFn}.
    */
-  public static
-  <K, InputT, AccumT, OutputT, W extends BoundedWindow> Factory<K, InputT, OutputT, W> combining(
-      final Coder<K> keyCoder, final AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
+  public static <K, InputT, AccumT, OutputT, W extends BoundedWindow> Factory<K, InputT, OutputT, W>
+      combining(
+          final Coder<K> keyCoder, final AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
     checkArgument(
         !(combineFn.getFn() instanceof RequiresContextInternal),
         "Combiner lifting is not supported for combine functions with contexts: %s",
         combineFn.getFn().getClass().getName());
     return new Factory<K, InputT, OutputT, W>() {
-
       @Override
       public ReduceFn<K, InputT, OutputT, W> create(K key) {
-        StateTag<CombiningValueState<InputT, OutputT>> bufferTag =
+        final StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> bufferTag =
             StateTags.makeSystemTagInternal(StateTags.<InputT, AccumT, OutputT>combiningValue(
                 BUFFER_NAME, combineFn.getAccumulatorCoder(),
                 (CombineFn<InputT, AccumT, OutputT>) combineFn.getFn().forKey(key, keyCoder)));
-        return new SystemReduceFn<K, InputT, OutputT, W>(bufferTag);
+        return new SystemReduceFn<K, InputT, AccumT, OutputT, W>(bufferTag) {
+          @Override
+          public void prefetchOnMerge(MergingStateContext<W> state) throws Exception {
+            StateMerging.prefetchCombiningValues(state, bufferTag);
+          }
+
+          @Override
+          public void onMerge(OnMergeContext c) throws Exception {
+            StateMerging.mergeCombiningValues(c.state(), bufferTag);
+          }
+        };
       }
     };
   }
 
-  private StateTag<? extends MergeableState<InputT, OutputT>> bufferTag;
+  private StateTag<? extends CombiningValueState<InputT, OutputT>> bufferTag;
 
-  public SystemReduceFn(StateTag<? extends MergeableState<InputT, OutputT>> bufferTag) {
+  public SystemReduceFn(StateTag<? extends CombiningValueState<InputT, OutputT>> bufferTag) {
     this.bufferTag = bufferTag;
   }
 
   @Override
   public void processValue(ProcessValueContext c) throws Exception {
-    c.state().accessAcrossMergedWindows(bufferTag).add(c.value());
-  }
-
-  @Override
-  public void onMerge(OnMergeContext c) throws Exception {
-    // All of the state used by SystemReduceFn is mergeable. Rather than eagerly reading it in
-    // to perform the merge here, we wait until the output is desired, and combine the values
-    // from all the source windows at that point.
+    c.state().access(bufferTag).add(c.value());
   }
 
   @Override
   public void prefetchOnTrigger(StateContext c) {
-    c.accessAcrossMergedWindows(bufferTag).get();
+    c.access(bufferTag).get();
   }
 
   @Override
   public void onTrigger(OnTriggerContext c) throws Exception {
-    c.output(c.state().accessAcrossMergedWindows(bufferTag).get().read());
+    c.output(c.state().access(bufferTag).get().read());
   }
 
   @Override
   public void clearState(Context c) throws Exception {
-    c.state().accessAcrossMergedWindows(bufferTag).clear();
+    c.state().access(bufferTag).clear();
   }
 
   @Override
   public StateContents<Boolean> isEmpty(StateContext state) {
-    // Since we only check for empty element sets when a trigger fires it's unreasonable
-    // to require a prefetch.
-    return state.accessAcrossMergedWindows(bufferTag).isEmpty();
+    return state.access(bufferTag).isEmpty();
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
index 61c064a10fbe4..1b204818e38a8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
@@ -20,7 +20,6 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergingTriggerInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerInfo;
-import com.google.cloud.dataflow.sdk.util.state.MergeableState;
 import com.google.cloud.dataflow.sdk.util.state.State;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
@@ -34,9 +33,7 @@
 
 import org.joda.time.Instant;
 
-import java.util.ArrayList;
 import java.util.Collection;
-import java.util.List;
 import java.util.Map;
 
 import javax.annotation.Nullable;
@@ -51,6 +48,8 @@ public class TriggerContextFactory<W extends BoundedWindow> {
 
   private final WindowingStrategy<?, W> windowingStrategy;
   private StateInternals stateInternals;
+  // Future triggers may be able to exploit the active window to state address window mapping.
+  @SuppressWarnings("unused")
   private ActiveWindowSet<W> activeWindows;
   private final Coder<W> windowCoder;
 
@@ -71,26 +70,22 @@ public Trigger<W>.TriggerContext base(W window, Timers timers,
   public Trigger<W>.OnElementContext createOnElementContext(
       W window, Timers timers, Instant elementTimestamp,
       ExecutableTrigger<W> rootTrigger, FinishedTriggers finishedSet) {
-    return new OnElementContextImpl(
-        window, timers, rootTrigger, finishedSet,
-        elementTimestamp);
+    return new OnElementContextImpl(window, timers, rootTrigger, finishedSet, elementTimestamp);
   }
 
-  public Trigger<W>.OnMergeContext createOnMergeContext(
-      W window, Timers timers, Collection<W> mergingWindows,
+  public Trigger<W>.OnMergeContext createOnMergeContext(W window, Timers timers,
       ExecutableTrigger<W> rootTrigger, FinishedTriggers finishedSet,
       Map<W, FinishedTriggers> finishedSets) {
-    return new OnMergeContextImpl(window, timers, rootTrigger, finishedSet,
-        mergingWindows, finishedSets);
+    return new OnMergeContextImpl(window, timers, rootTrigger, finishedSet, finishedSets);
   }
 
- public StateContext createStateContext(W window, ExecutableTrigger<W> trigger) {
+  public StateContext createStateContext(W window, ExecutableTrigger<W> trigger) {
     return new StateContextImpl(window, trigger);
   }
 
-  public MergingStateContext createMergingStateContext(
-      W window, Collection<W> mergingWindows, ExecutableTrigger<W> trigger) {
-    return new MergingStateContextImpl(new StateContextImpl(window, trigger), mergingWindows);
+  public MergingStateContext<W> createMergingStateContext(ExecutableTrigger<W> trigger,
+      Collection<W> mergingWindows, W mergeResult) {
+    return new MergingStateContextImpl(trigger, mergingWindows, mergeResult);
   }
 
   private class TriggerInfoImpl implements Trigger.TriggerInfo<W> {
@@ -265,80 +260,46 @@ public boolean apply(FinishedTriggers finishedSet) {
   }
 
   private class StateContextImpl implements StateContext {
-
-    private final int triggerIndex;
-    private final StateNamespace namespace;
-    private final W window;
+    protected final int triggerIndex;
+    protected final StateNamespace windowNamespace;
 
     public StateContextImpl(
         W window,
         ExecutableTrigger<W> trigger) {
-      this.window = window;
       this.triggerIndex = trigger.getTriggerIndex();
-
-      // Must be called after setting windowCoder, window, and triggerIndex
-      this.namespace = namespaceFor(window);
+      this.windowNamespace = namespaceFor(window);
     }
 
-    private StateNamespace namespaceFor(W window) {
+    protected StateNamespace namespaceFor(W window) {
       return StateNamespaces.windowAndTrigger(windowCoder, window, triggerIndex);
     }
 
     @Override
     public <StateT extends State> StateT access(StateTag<StateT> address) {
-      return stateInternals.state(namespace, address);
-    }
-
-    @Override
-    public <StateT extends MergeableState<?, ?>> StateT accessAcrossMergedWindows(
-        StateTag<StateT> address) {
-      List<StateNamespace> readNamespaces = new ArrayList<>();
-      for (W readWindow : activeWindows.readStateAddresses(window)) {
-        readNamespaces.add(namespaceFor(readWindow));
-      }
-      StateNamespace writeNamespace = namespaceFor(activeWindows.writeStateAddress(window));
-      return stateInternals.mergedState(readNamespaces, writeNamespace, address, window);
+      return stateInternals.state(windowNamespace, address);
     }
   }
 
-  private class MergingStateContextImpl implements MergingStateContext {
-
-    private final StateContextImpl delegate;
-    private final Collection<W> mergingWindows;
+  private class MergingStateContextImpl extends StateContextImpl implements MergingStateContext<W> {
+    private final Collection<W> activeToBeMerged;
 
-    public MergingStateContextImpl(StateContextImpl delegate, Collection<W> mergingWindows) {
-      this.delegate = delegate;
-      this.mergingWindows = mergingWindows;
+    public MergingStateContextImpl(ExecutableTrigger<W> trigger, Collection<W> activeToBeMerged,
+        W mergeResult) {
+      super(mergeResult, trigger);
+      this.activeToBeMerged = activeToBeMerged;
     }
 
     @Override
     public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
-      return delegate.access(address);
+      return stateInternals.state(windowNamespace, address);
     }
 
     @Override
-    public <StorageT extends MergeableState<?, ?>> StorageT accessAcrossMergedWindows(
-        StateTag<StorageT> address) {
-      return delegate.accessAcrossMergedWindows(address);
-    }
-
-    @Override
-    public <StateT extends MergeableState<?, ?>> StateT mergingAccess(StateTag<StateT> address) {
-      List<StateNamespace> readNamespaces = new ArrayList<>();
-      for (W mergingWindow : mergingWindows) {
-        readNamespaces.add(delegate.namespaceFor(mergingWindow));
-      }
-      return stateInternals.mergedState(
-          readNamespaces, delegate.namespace, address, delegate.window);
-    }
-
-    @Override
-    public <StateT extends State> Map<BoundedWindow, StateT> mergingAccessInEachMergingWindow(
+    public <StateT extends State> Map<W, StateT> accessInEachMergingWindow(
         StateTag<StateT> address) {
-      ImmutableMap.Builder<BoundedWindow, StateT> builder = ImmutableMap.builder();
-      for (W mergingWindow : mergingWindows) {
-        StateT stateForWindow = stateInternals.state(
-            delegate.namespaceFor(mergingWindow), address);
+      ImmutableMap.Builder<W, StateT> builder = ImmutableMap.builder();
+      for (W mergingWindow : activeToBeMerged) {
+        StateT stateForWindow = stateInternals.state(namespaceFor(mergingWindow), address);
         builder.put(mergingWindow, stateForWindow);
       }
       return builder.build();
@@ -486,8 +447,7 @@ public Instant currentEventTime() {
   }
 
   private class OnMergeContextImpl extends Trigger<W>.OnMergeContext {
-
-    private final MergingStateContext state;
+    private final MergingStateContext<W> state;
     private final W window;
     private final Collection<W> mergingWindows;
     private final Timers timers;
@@ -498,13 +458,11 @@ private OnMergeContextImpl(
         Timers timers,
         ExecutableTrigger<W> trigger,
         FinishedTriggers finishedSet,
-        Collection<W> mergingWindows,
         Map<W, FinishedTriggers> finishedSets) {
       trigger.getSpec().super();
-      this.mergingWindows = mergingWindows;
+      this.mergingWindows = finishedSets.keySet();
       this.window = window;
-      this.state =
-          new MergingStateContextImpl(new StateContextImpl(window, trigger), mergingWindows);
+      this.state = new MergingStateContextImpl(trigger, mergingWindows, window);
       this.timers = new TriggerTimers(window, timers);
       this.triggerInfo = new MergingTriggerInfoImpl(trigger, finishedSet, this, finishedSets);
     }
@@ -512,17 +470,11 @@ private OnMergeContextImpl(
     @Override
     public Trigger<W>.OnMergeContext forTrigger(ExecutableTrigger<W> trigger) {
       return new OnMergeContextImpl(
-          window, timers, trigger, triggerInfo.finishedSet,
-          mergingWindows, triggerInfo.finishedSets);
-    }
-
-    @Override
-    public Iterable<W> oldWindows() {
-      return mergingWindows;
+          window, timers, trigger, triggerInfo.finishedSet, triggerInfo.finishedSets);
     }
 
     @Override
-    public MergingStateContext state() {
+    public MergingStateContext<W> state() {
       return state;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
index a8a61cfaf53c1..cb6b5d5a4f8be 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
@@ -26,7 +26,6 @@
 import com.google.common.collect.ImmutableMap;
 
 import java.util.BitSet;
-import java.util.Collection;
 import java.util.Map;
 
 /**
@@ -109,10 +108,9 @@ public void processValue(ReduceFn<?, ?, ?, W>.ProcessValueContext c) throws Exce
     persistFinishedSet(c.state(), finishedSet);
   }
 
-  public void prefetchForMerge(W window, Collection<W> mergingWindows, MergingStateContext state) {
+  public void prefetchForMerge(MergingStateContext<W> state) {
     if (isFinishedSetNeeded()) {
-      for (ValueState<?> value :
-          state.mergingAccessInEachMergingWindow(FINISHED_BITS_TAG).values()) {
+      for (ValueState<?> value : state.accessInEachMergingWindow(FINISHED_BITS_TAG).values()) {
         value.get();
       }
     }
@@ -128,22 +126,24 @@ public void onMerge(ReduceFn<?, ?, ?, W>.OnMergeContext c) throws Exception {
         readFinishedBits(c.state().access(FINISHED_BITS_TAG)).copy();
 
     // And read the finished bits in each merging window.
-    ImmutableMap.Builder<W, FinishedTriggers> mergingFinishedSets = ImmutableMap.builder();
-    Map<BoundedWindow, ValueState<BitSet>> mergingFinishedSetState =
-        c.state().mergingAccessInEachMergingWindow(FINISHED_BITS_TAG);
-    for (W window : c.mergingWindows()) {
+    ImmutableMap.Builder<W, FinishedTriggers> builder = ImmutableMap.builder();
+    for (Map.Entry<W, ValueState<BitSet>> entry :
+        c.state().accessInEachMergingWindow(FINISHED_BITS_TAG).entrySet()) {
       // Don't need to clone these, since the trigger context doesn't allow modification
-      mergingFinishedSets.put(window, readFinishedBits(mergingFinishedSetState.get(window)));
+      builder.put(entry.getKey(), readFinishedBits(entry.getValue()));
     }
+    ImmutableMap<W, FinishedTriggers> mergingFinishedSets = builder.build();
 
     Trigger<W>.OnMergeContext mergeContext = contextFactory.createOnMergeContext(
-            c.window(), c.timers(), c.mergingWindows(), rootTrigger,
-            finishedSet, mergingFinishedSets.build());
+        c.window(), c.timers(), rootTrigger, finishedSet, mergingFinishedSets);
 
     // Run the merge from the trigger
     rootTrigger.invokeOnMerge(mergeContext);
 
     persistFinishedSet(c.state(), finishedSet);
+
+    // Clear the finished bits.
+    clearFinished(c);
   }
 
   public boolean shouldFire(ReduceFn<?, ?, ?, W>.Context c) throws Exception {
@@ -162,8 +162,7 @@ public void onFire(ReduceFn<?, ?, ?, W>.Context c) throws Exception {
     persistFinishedSet(c.state(), finishedSet);
   }
 
-  private void persistFinishedSet(
-      StateContext state, FinishedTriggersBitSet modifiedFinishedSet) {
+  private void persistFinishedSet(StateContext state, FinishedTriggersBitSet modifiedFinishedSet) {
     if (!isFinishedSetNeeded()) {
       return;
     }
@@ -178,6 +177,15 @@ private void persistFinishedSet(
     }
   }
 
+  /**
+   * Clear finished bits.
+   */
+  public void clearFinished(ReduceFn<?, ?, ?, W>.Context c) {
+    if (isFinishedSetNeeded()) {
+      c.state().access(FINISHED_BITS_TAG).clear();
+    }
+  }
+
   /**
    * Clear the state used for executing triggers, but leave the finished set to indicate
    * the window is closed.
@@ -185,19 +193,7 @@ private void persistFinishedSet(
   public void clearState(ReduceFn<?, ?, ?, W>.Context c) throws Exception {
     // Don't need to clone, because we'll be clearing the finished bits anyways.
     FinishedTriggers finishedSet = readFinishedBits(c.state().access(FINISHED_BITS_TAG));
-    rootTrigger.invokeClear(contextFactory.base(
-        c.window(), c.timers(), rootTrigger, finishedSet));
-  }
-
-  /**
-   * Clear all the state for executing triggers, including the finished bits. This should only be
-   * called after the allowed lateness has elapsed, so that the window will never be recreated.
-   */
-  public void clearEverything(ReduceFn<?, ?, ?, W>.Context c) throws Exception {
-    clearState(c);
-    if (isFinishedSetNeeded()) {
-      c.state().access(FINISHED_BITS_TAG).clear();
-    }
+    rootTrigger.invokeClear(contextFactory.base(c.window(), c.timers(), rootTrigger, finishedSet));
   }
 
   private boolean isFinishedSetNeeded() {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
index 0c6413eacc7d4..715857c5a89f1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
+import com.google.cloud.dataflow.sdk.util.state.StateMerging;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
@@ -48,8 +49,8 @@ public class WatermarkHold<W extends BoundedWindow> implements Serializable {
    * Return tag for state containing the output watermark hold
    * used for elements.
    */
-  public static StateTag<WatermarkStateInternal> watermarkHoldTagForOutputTimeFn(
-      OutputTimeFn<?> outputTimeFn) {
+  public static <W extends BoundedWindow> StateTag<WatermarkStateInternal<W>>
+      watermarkHoldTagForOutputTimeFn(OutputTimeFn<? super W> outputTimeFn) {
     return StateTags.makeSystemTagInternal(StateTags.watermarkStateInternal("hold", outputTimeFn));
   }
 
@@ -57,16 +58,16 @@ public static StateTag<WatermarkStateInternal> watermarkHoldTagForOutputTimeFn(
    * Tag for state containing end-of-window and garbage collection output watermark holds.
    * (We can't piggy-back on the data hold state since the outputTimeFn may be
    * {@link OutputTimeFns#outputAtLatestInputTimestamp()}, in which case every pane will
-   * would take the end-of-window time as its element time.
+   * would take the end-of-window time as its element time.)
    */
   @VisibleForTesting
-  public static final StateTag<WatermarkStateInternal> EXTRA_HOLD_TAG =
+  public static final StateTag<WatermarkStateInternal<BoundedWindow>> EXTRA_HOLD_TAG =
       StateTags.makeSystemTagInternal(StateTags.watermarkStateInternal(
           "extra", OutputTimeFns.outputAtEarliestInputTimestamp()));
 
   private final TimerInternals timerInternals;
   private final WindowingStrategy<?, W> windowingStrategy;
-  private final StateTag<WatermarkStateInternal> elementHoldTag;
+  private final StateTag<WatermarkStateInternal<W>> elementHoldTag;
 
   public WatermarkHold(TimerInternals timerInternals, WindowingStrategy<?, W> windowingStrategy) {
     this.timerInternals = timerInternals;
@@ -178,7 +179,7 @@ public WatermarkHold(TimerInternals timerInternals, WindowingStrategy<?, W> wind
    */
   public void addHolds(ReduceFn<?, ?, ?, W>.ProcessValueContext context) {
     if (!addElementHold(context)) {
-      addEndOfWindowOrGarbageCollectionHolds(context, true);
+      addEndOfWindowOrGarbageCollectionHolds(context);
     }
   }
 
@@ -220,7 +221,7 @@ private boolean addElementHold(ReduceFn<?, ?, ?, W>.ProcessValueContext context)
       tooLate = true;
     } else {
       tooLate = false;
-      context.state().accessAcrossMergedWindows(elementHoldTag).add(elementHold);
+      context.state().access(elementHoldTag).add(elementHold);
     }
     WindowTracing.trace(
         "WatermarkHold.addHolds: element hold at {} is {} for "
@@ -237,10 +238,9 @@ private boolean addElementHold(ReduceFn<?, ?, ?, W>.ProcessValueContext context)
    * <p>The end-of-window hold guarantees that an empty {@code ON_TIME} pane can be given
    * a timestamp which will not be considered beyond allowed lateness by any downstream computation.
    */
-  private void addEndOfWindowOrGarbageCollectionHolds(
-      ReduceFn<?, ?, ?, W>.Context context, boolean isActive) {
-    if (!addEndOfWindowHold(context, isActive)) {
-      addGarbageCollectionHold(context, isActive);
+  private void addEndOfWindowOrGarbageCollectionHolds(ReduceFn<?, ?, ?, W>.Context context) {
+    if (!addEndOfWindowHold(context)) {
+      addGarbageCollectionHold(context);
     }
   }
 
@@ -250,7 +250,7 @@ private void addEndOfWindowOrGarbageCollectionHolds(
    * <p>The end-of-window hold guarantees that any empty {@code ON_TIME} pane can be given
    * a timestamp which will not be considered beyond allowed lateness by any downstream computation.
    */
-  private boolean addEndOfWindowHold(ReduceFn<?, ?, ?, W>.Context context, boolean isActive) {
+  private boolean addEndOfWindowHold(ReduceFn<?, ?, ?, W>.Context context) {
     // Only add an end-of-window hold if we can be sure the end-of-window timer
     // has not yet fired. Otherwise we risk holding up the output watermark until
     // the garbage collection timer fires, which may be a very long time in the future.
@@ -264,21 +264,13 @@ private boolean addEndOfWindowHold(ReduceFn<?, ?, ?, W>.Context context, boolean
       tooLate = false;
       Preconditions.checkState(outputWM == null || !eowHold.isBefore(outputWM),
           "End-of-window hold %s cannot be before output watermark %s", eowHold, outputWM);
-      if (isActive) {
-        context.state().accessAcrossMergedWindows(EXTRA_HOLD_TAG).add(eowHold);
-      } else {
-        // The window is not currently ACTIVE, so we can't use accessAcrossMergedWindows
-        // to collect its state. Instead, store the holds under the window itself. The
-        // caller will be responsible for ensuring the active window set now considers this
-        // window ACTIVE.
-        context.state().access(EXTRA_HOLD_TAG).add(eowHold);
-      }
+      context.state().access(EXTRA_HOLD_TAG).add(eowHold);
     }
     WindowTracing.trace(
-        "WatermarkHold.addEndOfWindowHold: end-of-window hold for %s at {} is {} for "
+        "WatermarkHold.addEndOfWindowHold: end-of-window hold at {} is {} for "
         + "key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
-        isActive ? "active" : "inactive", eowHold, tooLate ? "too late" : "on-time", context.key(),
-        context.window(), inputWM, outputWM);
+        eowHold, tooLate ? "too late" : "on-time", context.key(), context.window(), inputWM,
+        outputWM);
     return !tooLate;
   }
 
@@ -290,7 +282,7 @@ private boolean addEndOfWindowHold(ReduceFn<?, ?, ?, W>.Context context, boolean
    * computation. If we are sure no empty final panes can be emitted then there's no need
    * for an additional hold.
    */
-  private void addGarbageCollectionHold(ReduceFn<?, ?, ?, W>.Context context, boolean isActive) {
+  private void addGarbageCollectionHold(ReduceFn<?, ?, ?, W>.Context context) {
     // Only add a garbage collection hold if we are sure we need an empty final pane and
     // the window will be garbage collected after the end-of-window trigger.
     if (context.windowingStrategy().getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS
@@ -299,44 +291,39 @@ private void addGarbageCollectionHold(ReduceFn<?, ?, ?, W>.Context context, bool
       Instant outputWM = timerInternals.currentOutputWatermarkTime();
       Instant inputWM = timerInternals.currentInputWatermarkTime();
       WindowTracing.trace(
-          "WatermarkHold.addGarbageCollectionHold: garbage collection hold for %s at {} for "
+          "WatermarkHold.addGarbageCollectionHold: garbage collection at {} hold for "
           + "key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
-          isActive ? "active" : "inactive", gcHold, context.key(), context.window(), inputWM,
-          outputWM);
+          gcHold, context.key(), context.window(), inputWM, outputWM);
       Preconditions.checkState(inputWM == null || !gcHold.isBefore(inputWM),
           "Garbage collection hold %s cannot be before input watermark %s", gcHold, inputWM);
-      if (isActive) {
-        context.state().accessAcrossMergedWindows(EXTRA_HOLD_TAG).add(gcHold);
-      } else {
-        // See comment above for addEndOfWindowHold.
-        context.state().access(EXTRA_HOLD_TAG).add(gcHold);
-      }
+      context.state().access(EXTRA_HOLD_TAG).add(gcHold);
     }
   }
 
+  /**
+   * Prefetch watermark holds in preparation for merging.
+   */
+  public void prefetchOnMerge(MergingStateContext<W> state) {
+    StateMerging.prefetchWatermarks(state, elementHoldTag);
+  }
+
   /**
    * Updates the watermark hold when windows merge if it is possible the merged value does
    * not equal all of the existing holds. For example, if the new window implies a later
    * watermark hold, then earlier holds may be released.
-   *
-   * <p>Note that state may be left behind in merged windows.
    */
-  public void onMerge(final ReduceFn<?, ?, ?, W>.OnMergeContext context) {
+  public void onMerge(ReduceFn<?, ?, ?, W>.OnMergeContext context) {
     WindowTracing.debug("onMerge: for key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
         context.key(), context.window(), timerInternals.currentInputWatermarkTime(),
         timerInternals.currentOutputWatermarkTime());
-    // If the output hold depends only on the window, then there may not be a hold in place
-    // for the new merged window, so add one.
-    if (windowingStrategy.getOutputTimeFn().dependsOnlyOnWindow()) {
-      Instant arbitraryTimestamp = new Instant(0);
-      context.state()
-          .accessAcrossMergedWindows(elementHoldTag)
-          .add(windowingStrategy.getOutputTimeFn().assignOutputTime(
-              arbitraryTimestamp, context.window()));
-    }
-
-    context.state().accessAcrossMergedWindows(elementHoldTag).releaseExtraneousHolds();
-    context.state().accessAcrossMergedWindows(EXTRA_HOLD_TAG).releaseExtraneousHolds();
+    StateMerging.mergeWatermarks(context.state(), elementHoldTag, context.window());
+    // If we had a cheap way to determine if we have an element hold then we could
+    // avoid adding an unnecessary end-of-window or garbage collection hold.
+    // Simply reading the above merged watermark would impose an additional read for the
+    // common case that the active window has just one undelying state address window and
+    // the hold depends on the min of the elemest timestamps.
+    StateMerging.clear(context.state(), EXTRA_HOLD_TAG);
+    addEndOfWindowOrGarbageCollectionHolds(context);
   }
 
   /**
@@ -347,24 +334,19 @@ public void onMerge(final ReduceFn<?, ?, ?, W>.OnMergeContext context) {
    * from the windowing strategy of this {@link WatermarkHold}, combined across all the non-late
    * elements in the current pane. If there is no such value the timestamp is the end
    * of the window.
-   *
-   * <p>If {@code willStillBeActive} then any end-of-window or garbage collection holds will
-   * be reestablished in one of the target windows alread in use for this window. Otherwise,
-   * the holds will be placed in this window itself.
    */
-  public StateContents<Instant> extractAndRelease(final ReduceFn<?, ?, ?, W>.Context context,
-      final boolean isFinal, final boolean willStillBeActive) {
+  public StateContents<Instant> extractAndRelease(
+      final ReduceFn<?, ?, ?, W>.Context context, final boolean isFinished) {
     WindowTracing.debug(
         "extractAndRelease: for key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
         context.key(), context.window(), timerInternals.currentInputWatermarkTime(),
         timerInternals.currentOutputWatermarkTime());
-    final WatermarkStateInternal elementHoldState =
-        context.state().accessAcrossMergedWindows(elementHoldTag);
+    final WatermarkStateInternal<W> elementHoldState = context.state().access(elementHoldTag);
     // Since we only extract holds when a trigger fires it is unreasonable to expect
     // the state to be prefetched.
     final StateContents<Instant> elementHoldFuture = elementHoldState.get();
-    final WatermarkStateInternal extraHoldState =
-        context.state().accessAcrossMergedWindows(EXTRA_HOLD_TAG);
+    final WatermarkStateInternal<BoundedWindow> extraHoldState =
+        context.state().access(EXTRA_HOLD_TAG);
     final StateContents<Instant> extraHoldFuture = extraHoldState.get();
     return new StateContents<Instant>() {
       @Override
@@ -399,8 +381,10 @@ public Instant read() {
         elementHoldState.clear();
         extraHoldState.clear();
 
-        if (!isFinal) {
-          addEndOfWindowOrGarbageCollectionHolds(context, willStillBeActive);
+        if (!isFinished) {
+          // Only need to leave behind an end-of-window or garbage collection hold
+          // if future elements will be processed.
+          addEndOfWindowOrGarbageCollectionHolds(context);
         }
 
         return hold;
@@ -409,21 +393,14 @@ public Instant read() {
   }
 
   /**
-   * Clear any remaining holds. If {@code isActive} then we assume holds could be placed in any
-   * of the target windows for this window. Otherwise we assume only this window has any
-   * end-of-window or garbage collection holds.
+   * Clear any remaining holds.
    */
-  public void clearHolds(ReduceFn<?, ?, ?, W>.Context context, boolean isActive) {
+  public void clearHolds(ReduceFn<?, ?, ?, W>.Context context) {
     WindowTracing.debug(
-        "WatermarkHold.clearHolds: For key:{}; %s window:{}; "
-        + "inputWatermark:{}; outputWatermark:{}",
-        context.key(), isActive ? "active" : "inactive", context.window(),
-        timerInternals.currentInputWatermarkTime(), timerInternals.currentOutputWatermarkTime());
-    if (isActive) {
-      context.state().accessAcrossMergedWindows(elementHoldTag).clear();
-      context.state().accessAcrossMergedWindows(EXTRA_HOLD_TAG).clear();
-    } else {
-      context.state().access(EXTRA_HOLD_TAG).clear();
-    }
+        "WatermarkHold.clearHolds: For key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
+        context.key(), context.window(), timerInternals.currentInputWatermarkTime(),
+        timerInternals.currentOutputWatermarkTime());
+    context.state().access(elementHoldTag).clear();
+    context.state().access(EXTRA_HOLD_TAG).clear();
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/BagState.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/BagState.java
index 7f1e8bc7ad877..5e0eef6a55679 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/BagState.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/BagState.java
@@ -20,5 +20,5 @@
  *
  * @param <T> The type of elements in the bag.
  */
-public interface BagState<T> extends MergeableState<T, Iterable<T>> {
+public interface BagState<T> extends CombiningValueState<T, Iterable<T>> {
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueState.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueState.java
index 67e8698a3ee3f..ee1fcac1d8704 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueState.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueState.java
@@ -24,5 +24,20 @@
  * @param <InputT> the type of values added to the state
  * @param <OutputT> the type of value extracted from the state
  */
-public interface CombiningValueState<InputT, OutputT> extends MergeableState<InputT, OutputT> {
+public interface CombiningValueState<InputT, OutputT> extends State {
+  /**
+   * Add a value to the buffer.
+   */
+  void add(InputT value);
+
+  /**
+   * Return the {@link StateContents} object to use for accessing the contents of the buffer.
+   */
+  StateContents<OutputT> get();
+
+  /**
+   * Return true if this state is empty.
+   */
+  StateContents<Boolean> isEmpty();
+
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueStateInternal.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueStateInternal.java
index 26df372a4857b..bba93b7869f25 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueStateInternal.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueStateInternal.java
@@ -38,4 +38,9 @@ public interface CombiningValueStateInternal<InputT, AccumT, OutputT>
    * merge it with the previous accumulator, or may buffer this accumulator for a future merge.
    */
   void addAccum(AccumT accum);
+
+  /**
+   * Return the combiner function for this state.
+   */
+  CombineFn<InputT, AccumT, OutputT> getCombineFn();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
index 64341b9180597..80e6ee992c23a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
@@ -37,7 +37,7 @@
  * and for running tests that need state.
  */
 @Experimental(Kind.STATE)
-public class InMemoryStateInternals extends MergingStateInternals {
+public class InMemoryStateInternals implements StateInternals {
   private interface InMemoryState {
     boolean isEmptyForTesting();
   }
@@ -65,10 +65,10 @@ public <T> BagState<T> bindBag(final StateTag<BagState<T>> address, Coder<T> ele
         }
 
         @Override
-        public <W extends BoundedWindow> WatermarkStateInternal bindWatermark(
-            StateTag<WatermarkStateInternal> address,
+        public <W extends BoundedWindow> WatermarkStateInternal<W> bindWatermark(
+            StateTag<WatermarkStateInternal<W>> address,
             OutputTimeFn<? super W> outputTimeFn) {
-          return new WatermarkStateInternalImplementation(outputTimeFn);
+          return new WatermarkStateInternalImplementation<W>(outputTimeFn);
         }
       };
     }
@@ -125,15 +125,15 @@ public boolean isEmptyForTesting() {
     }
   }
 
-  private final class WatermarkStateInternalImplementation
-      implements WatermarkStateInternal, InMemoryState {
+  private final class WatermarkStateInternalImplementation<W extends BoundedWindow>
+      implements WatermarkStateInternal<W>, InMemoryState {
 
-    private final OutputTimeFn<?> outputTimeFn;
+    private final OutputTimeFn<? super W> outputTimeFn;
 
     @Nullable
     private Instant combinedHold = null;
 
-    public WatermarkStateInternalImplementation(OutputTimeFn<?> outputTimeFn) {
+    public WatermarkStateInternalImplementation(OutputTimeFn<? super W> outputTimeFn) {
       this.outputTimeFn = outputTimeFn;
     }
 
@@ -144,15 +144,6 @@ public void clear() {
       combinedHold = null;
     }
 
-    /**
-     * {@inheritDoc}
-     *
-     * <p>Does nothing. There is only one hold and it is not extraneous.
-     * See {@link MergedWatermarkStateInternal} for a nontrivial implementation.
-     */
-    @Override
-    public void releaseExtraneousHolds() { }
-
     @Override
     public StateContents<Instant> get() {
       return new StateContents<Instant>() {
@@ -184,6 +175,11 @@ public Boolean read() {
       };
     }
 
+    @Override
+    public OutputTimeFn<? super W> getOutputTimeFn() {
+      return outputTimeFn;
+    }
+
     @Override
     public String toString() {
       return Objects.toString(combinedHold);
@@ -251,6 +247,11 @@ public void addAccum(AccumT accum) {
       this.accum = combineFn.mergeAccumulators(Arrays.asList(this.accum, accum));
     }
 
+    @Override
+    public CombineFn<InputT, AccumT, OutputT> getCombineFn() {
+      return combineFn;
+    }
+
     @Override
     public boolean isEmptyForTesting() {
       return isCleared;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergeableState.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergeableState.java
deleted file mode 100644
index ed81e9e55aa6d..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergeableState.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-/**
- * {@code State} that is automatically mergeable and supports buffering values.
- *
- * @param <InputT> The type of values put into the buffer.
- * @param <OutputT> The type of values extracted from the buffer.
- */
-public interface MergeableState<InputT, OutputT> extends State {
-
-  /**
-   * Add a value to the buffer.
-   */
-  void add(InputT value);
-
-  /**
-   * Return the {@link StateContents} object to use for accessing the contents of the buffer.
-   */
-  StateContents<OutputT> get();
-
-  /**
-   * Return true if this state is empty.
-   */
-  StateContents<Boolean> isEmpty();
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedBag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedBag.java
deleted file mode 100644
index f39a681347b3e..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedBag.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-import com.google.common.collect.Iterables;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-
-/**
- * Implementation of {@link BagState} reads from all the sources and writes to the specified result.
- *
- * @param <T> the type of elements in the bag
- */
-class MergedBag<T> implements BagState<T> {
-  private final Collection<BagState<T>> sources;
-  private final BagState<T> result;
-
-  public MergedBag(Collection<BagState<T>> sources, BagState<T> result) {
-    this.sources = sources;
-    this.result = result;
-  }
-
-  @Override
-  public void clear() {
-    for (State source : sources) {
-      source.clear();
-    }
-    result.clear();
-  }
-
-  @Override
-  public void add(T input) {
-    result.add(input);
-  }
-
-  @Override
-  public StateContents<Iterable<T>> get() {
-    // Initiate the get's right away
-    final List<StateContents<Iterable<T>>> futures = new ArrayList<>(sources.size());
-    for (BagState<T> source : sources) {
-      futures.add(source.get());
-    }
-
-    // But defer the actual reads until later.
-    return new StateContents<Iterable<T>>() {
-      @Override
-      public Iterable<T> read() {
-        List<Iterable<T>> allIterables = new ArrayList<>();
-        for (StateContents<Iterable<T>> future : futures) {
-          allIterables.add(future.read());
-        }
-        return Iterables.concat(allIterables);
-      }
-    };
-  }
-
-  @Override
-  public StateContents<Boolean> isEmpty() {
-    // Initiate the get's right away
-    final List<StateContents<Boolean>> futures = new ArrayList<>(sources.size());
-    for (BagState<T> source : sources) {
-      futures.add(source.isEmpty());
-    }
-
-    // But defer the actual reads until later.
-    return new StateContents<Boolean>() {
-      @Override
-      public Boolean read() {
-        for (StateContents<Boolean> future : futures) {
-          if (!future.read()) {
-            return false;
-          }
-        }
-        return true;
-      }
-    };
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedCombiningValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedCombiningValue.java
deleted file mode 100644
index 56451a771e721..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedCombiningValue.java
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-
-/**
- * Base implementation of a {@link CombiningValueState} reading from multiple sources and writing to
- * a single result.
- *
- * @param <InputT> the type of values added to the state
- * @param <AccumT> the type of accumulators that are actually stored
- * @param <OutputT> the type of value extracted from the state
- */
-class MergedCombiningValue<InputT, AccumT, OutputT>
-    implements CombiningValueStateInternal<InputT, AccumT, OutputT> {
-
-  private final Collection<CombiningValueStateInternal<InputT, AccumT, OutputT>> sources;
-  private final CombiningValueStateInternal<InputT, AccumT, OutputT> result;
-  private final CombineFn<InputT, AccumT, OutputT> combineFn;
-
-  public MergedCombiningValue(
-      Collection<CombiningValueStateInternal<InputT, AccumT, OutputT>> sources,
-      CombiningValueStateInternal<InputT, AccumT, OutputT> result,
-      CombineFn<InputT, AccumT, OutputT> combineFn) {
-    this.sources = sources;
-    this.result = result;
-    this.combineFn = combineFn;
-  }
-
-  @Override
-  public void clear() {
-    for (State source : sources) {
-      source.clear();
-    }
-    result.clear();
-  }
-
-  @Override
-  public StateContents<OutputT> get() {
-    final StateContents<AccumT> accum = getAccum();
-    return new StateContents<OutputT>() {
-      @Override
-      public OutputT read() {
-        return combineFn.extractOutput(accum.read());
-      }
-    };
-  }
-
-  @Override
-  public void add(InputT input) {
-    result.add(input);
-  }
-
-  @Override
-  public void addAccum(AccumT accum) {
-    result.addAccum(accum);
-  }
-
-  @Override
-  public StateContents<AccumT> getAccum() {
-    final List<StateContents<AccumT>> futures = new ArrayList<>(sources.size());
-    for (CombiningValueStateInternal<InputT, AccumT, OutputT> source : sources) {
-      futures.add(source.getAccum());
-    }
-
-    return new StateContents<AccumT>() {
-      @Override
-      public AccumT read() {
-        List<AccumT> accumulators = new ArrayList<>(futures.size());
-        for (StateContents<AccumT> future : futures) {
-          accumulators.add(future.read());
-        }
-
-        // Combine the accumualtors and compact the underyling state.
-        AccumT combined = combineFn.mergeAccumulators(accumulators);
-        clear();
-        addAccum(combined);
-
-        // This should be in memory. Since we may have mutated combined by
-        // incorporating it into the result, we re-read it from memory.
-        return result.getAccum().read();
-      }
-    };
-  }
-
-  @Override
-  public StateContents<Boolean> isEmpty() {
-    // Initiate the get's right away
-    final List<StateContents<Boolean>> futures = new ArrayList<>(sources.size());
-    for (CombiningValueStateInternal<InputT, AccumT, OutputT> source : sources) {
-      futures.add(source.isEmpty());
-    }
-
-    // But defer the actual reads until later.
-    return new StateContents<Boolean>() {
-      @Override
-      public Boolean read() {
-        for (StateContents<Boolean> future : futures) {
-          if (!future.read()) {
-            return false;
-          }
-        }
-        return true;
-      }
-    };
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkStateInternal.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkStateInternal.java
deleted file mode 100644
index 7a1e416aaacee..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergedWatermarkStateInternal.java
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
-import com.google.common.collect.Lists;
-
-import org.joda.time.Instant;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-
-/**
- * Implementation of {@link WatermarkStateInternal} reading from multiple sources and writing to a
- * single result.
- */
-class MergedWatermarkStateInternal<W extends BoundedWindow> implements WatermarkStateInternal {
-
-  private final Collection<WatermarkStateInternal> sources;
-  private final WatermarkStateInternal result;
-  private final OutputTimeFn<? super W> outputTimeFn;
-  private final W resultWindow;
-
-  public MergedWatermarkStateInternal(
-      Collection<WatermarkStateInternal> sources,
-      WatermarkStateInternal result,
-      W resultWindow,
-      OutputTimeFn<? super W> outputTimeFn) {
-    this.sources = sources;
-    this.result = result;
-    this.resultWindow = resultWindow;
-    this.outputTimeFn = outputTimeFn;
-  }
-
-  @Override
-  public void clear() {
-    for (State source : sources) {
-      source.clear();
-    }
-    result.clear();
-  }
-
-  @Override
-  public void add(Instant outputTimestamp) {
-    result.add(outputTimestamp);
-  }
-
-  @Override
-  public StateContents<Instant> get() {
-    // Short circuit if output times depend only on the window, hence are all equal.
-    if (outputTimeFn.dependsOnlyOnWindow()) {
-      return result.get();
-    }
-
-    // Get the underlying StateContents's right away.
-    final List<StateContents<Instant>> reads = new ArrayList<>(sources.size());
-    for (WatermarkStateInternal source : sources) {
-      reads.add(source.get());
-    }
-
-    // But defer actually reading them.
-    return new StateContents<Instant>() {
-      @Override
-      public Instant read() {
-        List<Instant> outputTimesToMerge = Lists.newArrayListWithCapacity(sources.size());
-        for (StateContents<Instant> read : reads) {
-          Instant sourceOutputTime = read.read();
-          if (sourceOutputTime != null) {
-            outputTimesToMerge.add(sourceOutputTime);
-          }
-        }
-
-        if (outputTimesToMerge.isEmpty()) {
-          return null;
-        } else {
-          // Also, compact the state
-          clear();
-          Instant mergedOutputTime = outputTimeFn.merge(resultWindow, outputTimesToMerge);
-          add(mergedOutputTime);
-          return mergedOutputTime;
-        }
-      }
-    };
-  }
-
-  @Override
-  public StateContents<Boolean> isEmpty() {
-    // Initiate the get's right away
-    final List<StateContents<Boolean>> futures = new ArrayList<>(sources.size());
-    for (WatermarkStateInternal source : sources) {
-      futures.add(source.isEmpty());
-    }
-
-    // But defer the actual reads until later.
-    return new StateContents<Boolean>() {
-      @Override
-      public Boolean read() {
-        for (StateContents<Boolean> future : futures) {
-          if (!future.read()) {
-            return false;
-          }
-        }
-        return true;
-      }
-    };
-  }
-
-  @Override
-  public void releaseExtraneousHolds() {
-    if (outputTimeFn.dependsOnlyOnEarliestInputTimestamp()) {
-      // The backend is implicitly already holding the output watermark to
-      // the minimum of all holds in all merged windows. Therefore, we don't need to
-      // explicitly change it.
-      // When the final (post merged) session window fires, we will collect all holds
-      // over all intermediate (pre merged) windows, take their min, and clear them.
-      // Therefore we also don't need to garbage collect any state here.
-    } else {
-      // The output time function may be able to move the hold forward. get() implements
-      // the necessary combining logic, and as a side effect will compact the hold
-      // in Windmill state. This ensures Windmill's output watermark can progress, and
-      // there is no stale hold left behind.
-      get().read();
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
deleted file mode 100644
index f47c9ddbcf5d2..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateInternals.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
-import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Abstract implementation of {@link StateInternals} that provides {@link #mergedState} in terms of
- * {@link #state}.
- */
-@Experimental(Kind.STATE)
-public abstract class MergingStateInternals implements StateInternals {
-
-  @Override
-  public <T extends MergeableState<?, ?>> T mergedState(
-      final Iterable<StateNamespace> sourceNamespaces,
-      final StateNamespace resultNamespace,
-      StateTag<T> address,
-      final BoundedWindow resultWindow) {
-    return address.bind(new StateBinder() {
-      @Override
-      public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> coder) {
-        throw new IllegalStateException(
-            "Value is not mergable. Should not be passed to mergedState");
-      }
-
-      @Override
-      public <T> BagState<T> bindBag(StateTag<BagState<T>> address, Coder<T> elemCoder) {
-        List<BagState<T>> sources = new ArrayList<>();
-        for (StateNamespace sourceNamespace : sourceNamespaces) {
-          sources.add(state(sourceNamespace, address));
-        }
-
-        BagState<T> results = state(resultNamespace, address);
-        return new MergedBag<>(sources, results);
-      }
-
-      @Override
-      public <InputT, AccumT, OutputT>
-      CombiningValueStateInternal<InputT, AccumT, OutputT> bindCombiningValue(
-          StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
-          Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
-        List<CombiningValueStateInternal<InputT, AccumT, OutputT>> sources = new ArrayList<>();
-        for (StateNamespace sourceNamespace : sourceNamespaces) {
-          sources.add(state(sourceNamespace, address));
-        }
-        CombiningValueStateInternal<InputT, AccumT, OutputT> result =
-            state(resultNamespace, address);
-        return new MergedCombiningValue<>(sources, result, combineFn);
-      }
-
-      @Override
-      public <W extends BoundedWindow> WatermarkStateInternal bindWatermark(
-          StateTag<WatermarkStateInternal> address,
-          OutputTimeFn<? super W> outputTimeFn) {
-        List<WatermarkStateInternal> sources = new ArrayList<>();
-        for (StateNamespace sourceNamespace : sourceNamespaces) {
-          sources.add(state(sourceNamespace, address));
-        }
-        WatermarkStateInternal result = state(resultNamespace, address);
-
-        // It is the responsibility of the SDK to only pass allowed result windows.
-        @SuppressWarnings("unchecked")
-        W typedResultWindow = (W) resultWindow;
-
-        return new MergedWatermarkStateInternal<W>(
-            sources, result, typedResultWindow, outputTimeFn);
-      }
-    });
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
index f8038823b762e..5f4dc6372fb47 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
@@ -18,7 +18,6 @@
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 
 /**
  * {@code StateInternals} describes the functionality a runner needs to provide for the
@@ -43,19 +42,4 @@ public interface StateInternals  {
    * Return the state associated with {@code address} in the specified {@code namespace}.
    */
   <T extends State> T state(StateNamespace namespace, StateTag<T> address);
-
-  /**
-   * Return state that reads from all the source namespaces. Only required to ensure that
-   * resultNamespace contains all the data that is added.
-   *
-   * <p>Merging state is potentially destructive, in that it may move information from the
-   * {@code sourceNamespaces} to {@code resultNamespace}. As a result, after calling this all
-   * future calls should include as their namespaces a superset of
-   * {@code sourceNamespaces} and {@code resultNamespace}.
-   */
-  <T extends MergeableState<?, ?>> T mergedState(
-      Iterable<StateNamespace> sourceNamespaces,
-      StateNamespace resultNamespace,
-      StateTag<T> address,
-      BoundedWindow resultWindow);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java
new file mode 100644
index 0000000000000..858091983dc62
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java
@@ -0,0 +1,251 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.MergingStateContext;
+import com.google.common.base.Preconditions;
+
+import org.joda.time.Instant;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Helpers for merging state.
+ */
+public class StateMerging {
+  /**
+   * Clear all state in {@code address} in all windows under merge (even result windows)
+   * in {@code context}.
+   */
+  public static <StateT extends State, W extends BoundedWindow> void clear(
+      MergingStateContext<W> context, StateTag<StateT> address) {
+    for (StateT state : context.accessInEachMergingWindow(address).values()) {
+      state.clear();
+    }
+  }
+
+  /**
+   * Prefetch all bag state in {@code address} across all windows under merge in
+   * {@code context}, except for the bag state in the final state address window which we can
+   * blindly append to.
+   */
+  public static <T, W extends BoundedWindow> void prefetchBags(
+      MergingStateContext<W> context, StateTag<BagState<T>> address) {
+    Map<W, BagState<T>> map = context.accessInEachMergingWindow(address);
+    if (map.isEmpty()) {
+      // Nothing to prefetch.
+      return;
+    }
+    BagState<T> result = context.access(address);
+    // Prefetch everything except what's already in result.
+    for (BagState<T> source : map.values()) {
+      if (!source.equals(result)) {
+        source.get();
+      }
+    }
+  }
+
+  /**
+   * Merge all bag state in {@code address} across all windows under merge.
+   */
+  public static <T, W extends BoundedWindow> void mergeBags(
+      MergingStateContext<W> context, StateTag<BagState<T>> address) {
+    mergeBags(context.accessInEachMergingWindow(address).values(), context.access(address));
+  }
+
+  /**
+   * Merge all bag state in {@code sources} (which may include {@code result}) into {@code result}.
+   */
+  public static <T, W extends BoundedWindow> void mergeBags(
+      Collection<BagState<T>> sources, BagState<T> result) {
+    if (sources.isEmpty()) {
+      // Nothing to merge.
+      return;
+    }
+    // Prefetch everything except what's already in result.
+    List<StateContents<Iterable<T>>> futures = new ArrayList<>(sources.size());
+    for (BagState<T> source : sources) {
+      if (!source.equals(result)) {
+        futures.add(source.get());
+      }
+    }
+    if (futures.isEmpty()) {
+      // Result already holds all the values.
+      return;
+    }
+    // Transfer from sources to result.
+    for (StateContents<Iterable<T>> future : futures) {
+      for (T element : future.read()) {
+        result.add(element);
+      }
+    }
+    // Clear sources except for result.
+    for (BagState<T> source : sources) {
+      if (!source.equals(result)) {
+        source.clear();
+      }
+    }
+  }
+
+  /**
+   * Prefetch all combining value state for {@code address} across all merging windows in {@code
+   * context}.
+   */
+  public static <StateT extends CombiningValueState<?, ?>, W extends BoundedWindow> void
+      prefetchCombiningValues(MergingStateContext<W> context, StateTag<StateT> address) {
+    for (StateT state : context.accessInEachMergingWindow(address).values()) {
+      state.get();
+    }
+  }
+
+  /**
+   * Merge all value state in {@code address} across all merging windows in {@code context}.
+   */
+  public static <InputT, AccumT, OutputT, W extends BoundedWindow> void mergeCombiningValues(
+      MergingStateContext<W> context,
+      StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address) {
+    mergeCombiningValues(
+        context.accessInEachMergingWindow(address).values(), context.access(address));
+  }
+
+  /**
+   * Merge all value state from {@code sources} (which may include {@code result}) into
+   * {@code result}.
+   */
+  public static <InputT, AccumT, OutputT, W extends BoundedWindow> void mergeCombiningValues(
+      Collection<CombiningValueStateInternal<InputT, AccumT, OutputT>> sources,
+      CombiningValueStateInternal<InputT, AccumT, OutputT> result) {
+    if (sources.isEmpty()) {
+      // Nothing to merge.
+      return;
+    }
+    if (sources.size() == 1 && sources.contains(result)) {
+      // Result already holds combined value.
+      return;
+    }
+    // Prefetch.
+    List<StateContents<AccumT>> futures = new ArrayList<>(sources.size());
+    for (CombiningValueStateInternal<InputT, AccumT, OutputT> source : sources) {
+      futures.add(source.getAccum());
+    }
+    // Read.
+    List<AccumT> accumulators = new ArrayList<>(futures.size());
+    for (StateContents<AccumT> future : futures) {
+      accumulators.add(future.read());
+    }
+    // Merge (possibly update and return one of the existing accumulators).
+    AccumT merged = result.getCombineFn().mergeAccumulators(accumulators);
+    // Clear sources.
+    for (CombiningValueStateInternal<InputT, AccumT, OutputT> source : sources) {
+      source.clear();
+    }
+    // Update result.
+    result.addAccum(merged);
+  }
+
+  /**
+   * Prefetch all watermark state for {@code address} across all merging windows in
+   * {@code context}.
+   */
+  public static <W extends BoundedWindow> void prefetchWatermarks(
+      MergingStateContext<W> context, StateTag<WatermarkStateInternal<W>> address) {
+    Map<W, WatermarkStateInternal<W>> map = context.accessInEachMergingWindow(address);
+    WatermarkStateInternal<W> result = context.access(address);
+    if (map.isEmpty()) {
+      // Nothing to prefetch.
+      return;
+    }
+    if (map.size() == 1 && map.values().contains(result)
+        && result.getOutputTimeFn().dependsOnlyOnEarliestInputTimestamp()) {
+      // Nothing to change.
+      return;
+    }
+    if (result.getOutputTimeFn().dependsOnlyOnWindow()) {
+      // No need to read existing holds.
+      return;
+    }
+    // Prefetch.
+    for (WatermarkStateInternal<W> source : map.values()) {
+      source.get();
+    }
+  }
+
+  /**
+   * Merge all watermark state in {@code address} across all merging windows in {@code context},
+   * where the final merge result window is {@code mergeResult}.
+   */
+  public static <W extends BoundedWindow> void mergeWatermarks(
+      MergingStateContext<W> context, StateTag<WatermarkStateInternal<W>> address,
+      W mergeResult) {
+    mergeWatermarks(
+        context.accessInEachMergingWindow(address).values(), context.access(address), mergeResult);
+  }
+
+  /**
+   * Merge all watermark state in {@code sources} (which must include {@code result} if non-empty)
+   * into {@code result}, where the final merge result window is {@code mergeResult}.
+   */
+  public static <W extends BoundedWindow> void mergeWatermarks(
+      Collection<WatermarkStateInternal<W>> sources, WatermarkStateInternal<W> result,
+      W resultWindow) {
+    if (sources.isEmpty()) {
+      // Nothing to merge.
+      return;
+    }
+    if (sources.size() == 1 && sources.contains(result)
+        && result.getOutputTimeFn().dependsOnlyOnEarliestInputTimestamp()) {
+      // Nothing to merge.
+      return;
+    }
+    if (result.getOutputTimeFn().dependsOnlyOnWindow()) {
+      // Clear sources.
+      for (WatermarkStateInternal<W> source : sources) {
+        source.clear();
+      }
+      // Update directly from window-derived hold.
+      Instant hold = result.getOutputTimeFn().assignOutputTime(
+          BoundedWindow.TIMESTAMP_MIN_VALUE, resultWindow);
+      Preconditions.checkState(hold.isAfter(BoundedWindow.TIMESTAMP_MIN_VALUE));
+      result.add(hold);
+    } else {
+      // Prefetch.
+      List<StateContents<Instant>> futures = new ArrayList<>(sources.size());
+      for (WatermarkStateInternal<W> source : sources) {
+        futures.add(source.get());
+      }
+      // Read.
+      List<Instant> outputTimesToMerge = new ArrayList<>(sources.size());
+      for (StateContents<Instant> future : futures) {
+        Instant sourceOutputTime = future.read();
+        if (sourceOutputTime != null) {
+          outputTimesToMerge.add(sourceOutputTime);
+        }
+      }
+      // Clear sources.
+      for (WatermarkStateInternal<W> source : sources) {
+        source.clear();
+      }
+      if (!outputTimesToMerge.isEmpty()) {
+        // Merge and update.
+        result.add(result.getOutputTimeFn().merge(resultWindow, outputTimesToMerge));
+      }
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
index be114f893fc0d..d221543309892 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
@@ -59,8 +59,8 @@ public interface StateBinder {
      * <p>This accepts the {@link OutputTimeFn} that dictates how watermark hold timestamps
      * added to the returned {@link WatermarkStateInternal} are to be combined.
      */
-    <W extends BoundedWindow> WatermarkStateInternal bindWatermark(
-        StateTag<WatermarkStateInternal> address,
+    <W extends BoundedWindow> WatermarkStateInternal<W> bindWatermark(
+        StateTag<WatermarkStateInternal<W>> address,
         OutputTimeFn<? super W> outputTimeFn);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
index 4ad50df24228a..89e20134311ef 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
@@ -64,8 +64,9 @@ public static <T> StateTag<ValueState<T>> value(String id, Coder<T> valueCoder)
    * Create a state tag for values that use a {@link CombineFn} to automatically merge
    * multiple {@code InputT}s into a single {@code OutputT}.
    */
-  public static <InputT, AccumT, OutputT> StateTag<CombiningValueState<InputT, OutputT>>
-  combiningValue(
+  public static <InputT, AccumT, OutputT>
+    StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>>
+    combiningValue(
       String id, Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
     return combiningValueInternal(id, accumCoder, combineFn);
   }
@@ -77,8 +78,9 @@ public static <T> StateTag<ValueState<T>> value(String id, Coder<T> valueCoder)
    * <p>This determines the {@code Coder<AccumT>} from the given {@code Coder<InputT>}, and
    * should only be used to initialize static values.
    */
-  public static <InputT, AccumT, OutputT> StateTag<CombiningValueState<InputT, OutputT>>
-  combiningValueFromInputInternal(
+  public static <InputT, AccumT,
+      OutputT> StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>>
+      combiningValueFromInputInternal(
       String id, Coder<InputT> inputCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
     try {
       Coder<AccumT> accumCoder = combineFn.getAccumulatorCoder(STANDARD_REGISTRY, inputCoder);
@@ -90,19 +92,13 @@ public static <T> StateTag<ValueState<T>> value(String id, Coder<T> valueCoder)
     }
   }
 
-  private static <InputT, AccumT, OutputT> StateTag<CombiningValueState<InputT, OutputT>>
-  combiningValueInternal(
+  private static <InputT, AccumT,
+      OutputT> StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>>
+      combiningValueInternal(
       String id, Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
-    StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> internal =
+    return
         new CombiningValueStateTag<InputT, AccumT, OutputT>(
             new StructuredId(id), accumCoder, combineFn);
-
-    // This is a safe cast, since StateTag only supports reading, and
-    // CombiningValue<InputT, OutputT> is a super-interface of
-    // CombiningValueInternal<InputT, AccumT, OutputT>
-    @SuppressWarnings({"unchecked", "rawtypes"})
-    StateTag<CombiningValueState<InputT, OutputT>> external = (StateTag) internal;
-    return external;
   }
 
   /**
@@ -116,8 +112,8 @@ public static <T> StateTag<BagState<T>> bag(String id, Coder<T> elemCoder) {
   /**
    * Create a state tag for holding the watermark.
    */
-  public static <T, W extends BoundedWindow> StateTag<WatermarkStateInternal>
-      watermarkStateInternal(String id, OutputTimeFn<W> outputTimeFn) {
+  public static <W extends BoundedWindow> StateTag<WatermarkStateInternal<W>>
+      watermarkStateInternal(String id, OutputTimeFn<? super W> outputTimeFn) {
     return new WatermarkStateTagInternal<W>(new StructuredId(id), outputTimeFn);
   }
 
@@ -138,7 +134,10 @@ public static <InputT, AccumT, OutputT> StateTag<BagState<AccumT>> convertToBagT
     if (!(combiningTag instanceof CombiningValueStateTag)) {
       throw new IllegalArgumentException("Unexpected StateTag " + combiningTag);
     }
-    return ((CombiningValueStateTag<InputT, AccumT, OutputT>) combiningTag).asBagTag();
+    @SuppressWarnings("unchecked")
+    CombiningValueStateTag<InputT, AccumT, OutputT> typedTag =
+        (CombiningValueStateTag<InputT, AccumT, OutputT>) combiningTag;
+    return typedTag.asBagTag();
   }
 
   private static class StructuredId implements Serializable {
@@ -158,10 +157,6 @@ public StructuredId asKind(StateKind kind) {
       return new StructuredId(kind, rawId);
     }
 
-    public String getIdString() {
-      return kind.prefix + rawId;
-    }
-
     public void appendTo(Appendable sb) throws IOException {
       sb.append(kind.prefix).append(rawId);
     }
@@ -221,6 +216,7 @@ public String toString() {
 
     protected abstract StateTag<StateT> asKind(StateKind kind);
 
+    @Override
     public void appendTo(Appendable sb) throws IOException {
       id.appendTo(sb);
     }
@@ -375,7 +371,7 @@ protected StateTag<BagState<T>> asKind(StateKind kind) {
   }
 
   private static class WatermarkStateTagInternal<W extends BoundedWindow>
-      extends StateTagBase<WatermarkStateInternal> {
+      extends StateTagBase<WatermarkStateInternal<W>> {
 
     /**
      * When multiple output times are added to hold the watermark, this determines how they are
@@ -390,7 +386,7 @@ private WatermarkStateTagInternal(StructuredId id, OutputTimeFn<? super W> outpu
     }
 
     @Override
-    public WatermarkStateInternal bind(StateBinder visitor) {
+    public WatermarkStateInternal<W> bind(StateBinder visitor) {
       return visitor.bindWatermark(this, outputTimeFn);
     }
 
@@ -414,7 +410,7 @@ public int hashCode() {
     }
 
     @Override
-    protected StateTag<WatermarkStateInternal> asKind(StateKind kind) {
+    protected StateTag<WatermarkStateInternal<W>> asKind(StateKind kind) {
       return new WatermarkStateTagInternal<W>(id.asKind(kind), outputTimeFn);
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ValueState.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ValueState.java
index d72c67e22cac8..f575dfc7f5c25 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ValueState.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ValueState.java
@@ -25,6 +25,13 @@
  */
 @Experimental(Kind.STATE)
 public interface ValueState<T> extends State {
+  /**
+   * Return the {@link StateContents} object to use for accessing the contents of the buffer.
+   */
   StateContents<T> get();
+
+  /**
+   * Set the value of the buffer.
+   */
   void set(T input);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java
index 9521088185670..19972aab2e6cd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java
@@ -17,6 +17,7 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
 
 import org.joda.time.Instant;
@@ -28,11 +29,11 @@
  * <p><b><i>For internal use only. This API may change at any time.</i></b>
  */
 @Experimental(Kind.STATE)
-public interface WatermarkStateInternal extends MergeableState<Instant, Instant> {
-
+public interface WatermarkStateInternal<W extends BoundedWindow>
+  extends CombiningValueState<Instant, Instant> {
   /**
-   * Release all holds for windows which have been merged away and incorporate their
-   * combined values (according to {@link OutputTimeFn#merge}) into the result window hold.
+   * Return the {@link OutputTimeFn} which will be used to determine a watermark hold time given
+   * an element timestamp, and to combine watermarks from windows which are about to be merged.
    */
-  void releaseExtraneousHolds();
+  OutputTimeFn<? super W> getOutputTimeFn();
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
index c2220051d001b..2088ac3c34e93 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
@@ -373,7 +373,7 @@ public Instant earliestWatermarkHold() {
       Instant minimum = null;
       for (State storage : inMemoryState.values()) {
         if (storage instanceof WatermarkStateInternal) {
-          Instant hold = ((WatermarkStateInternal) storage).get().read();
+          Instant hold = ((WatermarkStateInternal<BoundedWindow>) storage).get().read();
           if (minimum == null || (hold != null && hold.isBefore(minimum))) {
             minimum = hold;
           }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index 4fdd2d305a91a..ea631cfba04ae 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -309,30 +309,26 @@ public void setSubTriggerFinishedForWindow(int subTriggerIndex, W window, boolea
    * since it is just to test the trigger's {@code OnMerge} method.
    */
   public final void mergeWindows() throws Exception {
-    final Map<W, Collection<W>> windowToComponents = new HashMap<>();
-
     activeWindows.merge(new MergeCallback<W>() {
+      @Override
+      public void prefetchOnMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged,
+          W mergeResult) throws Exception {}
+
       @Override
       public void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W mergeResult)
           throws Exception {
-        windowToComponents.put(mergeResult, toBeMerged);
+        Map<W, FinishedTriggers> mergingFinishedSets =
+            Maps.newHashMapWithExpectedSize(activeToBeMerged.size());
+        for (W oldWindow : activeToBeMerged) {
+          mergingFinishedSets.put(oldWindow, getFinishedSet(oldWindow));
+        }
+        executableTrigger.invokeOnMerge(contextFactory.createOnMergeContext(mergeResult,
+            new TestTimers(windowNamespace(mergeResult)), executableTrigger,
+            getFinishedSet(mergeResult), mergingFinishedSets));
         timerInternals.setTimer(TimerData.of(
             windowNamespace(mergeResult), mergeResult.maxTimestamp(), TimeDomain.EVENT_TIME));
       }
     });
-
-    for (Map.Entry<W, Collection<W>> merged : windowToComponents.entrySet()) {
-      W window = merged.getKey();
-      Collection<W> oldWindows = merged.getValue();
-      Map<W, FinishedTriggers> mergingFinishedSets =
-          Maps.newHashMapWithExpectedSize(oldWindows.size());
-      for (W oldWindow : oldWindows) {
-        mergingFinishedSets.put(oldWindow, getFinishedSet(oldWindow));
-      }
-      executableTrigger.invokeOnMerge(
-          contextFactory.createOnMergeContext(window, new TestTimers(windowNamespace(window)),
-              oldWindows, executableTrigger, getFinishedSet(window), mergingFinishedSets));
-    }
   }
 
   private FinishedTriggers getFinishedSet(W window) {
@@ -368,7 +364,8 @@ public Instant earliestWatermarkHold() {
       Instant minimum = null;
       for (State storage : inMemoryState.values()) {
         if (storage instanceof WatermarkStateInternal) {
-          Instant hold = ((WatermarkStateInternal) storage).get().read();
+          @SuppressWarnings("unchecked")
+          Instant hold = ((WatermarkStateInternal<BoundedWindow>) storage).get().read();
           if (minimum == null || (hold != null && hold.isBefore(minimum))) {
             minimum = hold;
           }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
index bf9c6f7bb8dce..ca4c785229e20 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
@@ -39,29 +39,24 @@
  */
 @RunWith(JUnit4.class)
 public class InMemoryStateInternalsTest {
-
   private static final BoundedWindow WINDOW_1 = new IntervalWindow(new Instant(0), new Instant(10));
-  private static final BoundedWindow WINDOW_3 = new IntervalWindow(new Instant(5), new Instant(20));
   private static final StateNamespace NAMESPACE_1 = new StateNamespaceForTest("ns1");
   private static final StateNamespace NAMESPACE_2 = new StateNamespaceForTest("ns2");
   private static final StateNamespace NAMESPACE_3 = new StateNamespaceForTest("ns3");
 
   private static final StateTag<ValueState<String>> STRING_VALUE_ADDR =
       StateTags.value("stringValue", StringUtf8Coder.of());
-  private static final StateTag<CombiningValueState<Integer, Integer>> SUM_INTEGER_ADDR =
-      StateTags.combiningValueFromInputInternal(
+  private static final StateTag<CombiningValueStateInternal<Integer, int[], Integer>>
+      SUM_INTEGER_ADDR = StateTags.combiningValueFromInputInternal(
           "sumInteger", VarIntCoder.of(), new Sum.SumIntegerFn());
   private static final StateTag<BagState<String>> STRING_BAG_ADDR =
       StateTags.bag("stringBag", StringUtf8Coder.of());
-  private static final StateTag<WatermarkStateInternal> WATERMARK_EARLIEST_ADDR =
-      StateTags.watermarkStateInternal("watermark",
-          OutputTimeFns.outputAtEarliestInputTimestamp());
-  private static final StateTag<WatermarkStateInternal> WATERMARK_LATEST_ADDR =
-      StateTags.watermarkStateInternal("watermark",
-          OutputTimeFns.outputAtLatestInputTimestamp());
-  private static final StateTag<WatermarkStateInternal> WATERMARK_EOW_ADDR =
-      StateTags.watermarkStateInternal("watermark",
-          OutputTimeFns.outputAtEndOfWindow());
+  private static final StateTag<WatermarkStateInternal<BoundedWindow>> WATERMARK_EARLIEST_ADDR =
+      StateTags.watermarkStateInternal("watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
+  private static final StateTag<WatermarkStateInternal<BoundedWindow>> WATERMARK_LATEST_ADDR =
+      StateTags.watermarkStateInternal("watermark", OutputTimeFns.outputAtLatestInputTimestamp());
+  private static final StateTag<WatermarkStateInternal<BoundedWindow>> WATERMARK_EOW_ADDR =
+      StateTags.watermarkStateInternal("watermark", OutputTimeFns.outputAtEndOfWindow());
 
   InMemoryStateInternals underTest = new InMemoryStateInternals();
 
@@ -71,7 +66,8 @@ public void testValue() throws Exception {
 
     // State instances are cached, but depend on the namespace.
     assertThat(underTest.state(NAMESPACE_1, STRING_VALUE_ADDR), Matchers.sameInstance(value));
-    assertThat(underTest.state(NAMESPACE_2, STRING_VALUE_ADDR),
+    assertThat(
+        underTest.state(NAMESPACE_2, STRING_VALUE_ADDR),
         Matchers.not(Matchers.sameInstance(value)));
 
     assertThat(value.get().read(), Matchers.nullValue());
@@ -131,42 +127,29 @@ public void testMergeBagIntoSource() throws Exception {
     bag2.add("World");
     bag1.add("!");
 
-    BagState<String> merged = underTest.mergedState(
-        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_1, STRING_BAG_ADDR, WINDOW_1);
+    StateMerging.mergeBags(Arrays.asList(bag1, bag2), bag1);
 
     // Reading the merged bag gets both the contents
-    assertThat(merged.get().read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
-
-    // Adding to the merged bag adds to namespace 1
-    merged.add("...");
-    assertThat(merged.get().read(), Matchers.containsInAnyOrder("Hello", "World", "!", "..."));
-    assertThat(bag1.get().read(), Matchers.containsInAnyOrder("Hello", "!", "..."));
-    assertThat(bag2.get().read(), Matchers.not(Matchers.contains("...")));
+    assertThat(bag1.get().read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
+    assertThat(bag2.get().read(), Matchers.emptyIterable());
   }
 
   @Test
   public void testMergeBagIntoNewNamespace() throws Exception {
     BagState<String> bag1 = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
     BagState<String> bag2 = underTest.state(NAMESPACE_2, STRING_BAG_ADDR);
+    BagState<String> bag3 = underTest.state(NAMESPACE_3, STRING_BAG_ADDR);
 
     bag1.add("Hello");
     bag2.add("World");
     bag1.add("!");
 
-    BagState<String> merged = underTest.mergedState(
-        Arrays.asList(NAMESPACE_1, NAMESPACE_2, NAMESPACE_3),
-        NAMESPACE_3, STRING_BAG_ADDR, WINDOW_3);
+    StateMerging.mergeBags(Arrays.asList(bag1, bag2, bag3), bag3);
 
     // Reading the merged bag gets both the contents
-    assertThat(merged.get().read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
-
-    // Adding to the merged bag adds to namespace 3
-    merged.add("...");
-    assertThat(merged.get().read(), Matchers.containsInAnyOrder("Hello", "World", "!", "..."));
-    assertThat(bag1.get().read(), Matchers.not(Matchers.contains("...")));
-    assertThat(bag2.get().read(), Matchers.not(Matchers.contains("...")));
-    assertThat(
-        underTest.state(NAMESPACE_3, STRING_BAG_ADDR).get().read(), Matchers.contains("..."));
+    assertThat(bag3.get().read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
+    assertThat(bag1.get().read(), Matchers.emptyIterable());
+    assertThat(bag2.get().read(), Matchers.emptyIterable());
   }
 
   @Test
@@ -206,8 +189,10 @@ public void testCombiningIsEmpty() throws Exception {
 
   @Test
   public void testMergeCombiningValueIntoSource() throws Exception {
-    CombiningValueState<Integer, Integer> value1 = underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
-    CombiningValueState<Integer, Integer> value2 = underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR);
+    CombiningValueStateInternal<Integer, int[], Integer> value1 =
+        underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
+    CombiningValueStateInternal<Integer, int[], Integer> value2 =
+        underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR);
 
     value1.add(5);
     value2.add(10);
@@ -216,59 +201,38 @@ public void testMergeCombiningValueIntoSource() throws Exception {
     assertThat(value1.get().read(), Matchers.equalTo(11));
     assertThat(value2.get().read(), Matchers.equalTo(10));
 
-    CombiningValueState<Integer, Integer> merged = underTest.mergedState(
-        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_1, SUM_INTEGER_ADDR, WINDOW_1);
-
-    assertThat(value1.get().read(), Matchers.equalTo(11));
-    assertThat(value2.get().read(), Matchers.equalTo(10));
-    assertThat(merged.get().read(), Matchers.equalTo(21));
+    // Merging clears the old values and updates the result value.
+    StateMerging.mergeCombiningValues(Arrays.asList(value1, value2), value1);
 
-    // Reading the merged value compressed the old values.
     assertThat(value1.get().read(), Matchers.equalTo(21));
     assertThat(value2.get().read(), Matchers.equalTo(0));
-
-    merged.add(8);
-    assertThat(merged.get().read(), Matchers.equalTo(29));
-    assertThat(value1.get().read(), Matchers.equalTo(29));
-    assertThat(value2.get().read(), Matchers.equalTo(0));
   }
 
   @Test
   public void testMergeCombiningValueIntoNewNamespace() throws Exception {
-    CombiningValueState<Integer, Integer> value1 = underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
-    CombiningValueState<Integer, Integer> value2 = underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR);
+    CombiningValueStateInternal<Integer, int[], Integer> value1 =
+        underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
+    CombiningValueStateInternal<Integer, int[], Integer> value2 =
+        underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR);
+    CombiningValueStateInternal<Integer, int[], Integer> value3 =
+        underTest.state(NAMESPACE_3, SUM_INTEGER_ADDR);
 
     value1.add(5);
     value2.add(10);
     value1.add(6);
 
-    assertThat(value1.get().read(), Matchers.equalTo(11));
-    assertThat(value2.get().read(), Matchers.equalTo(10));
-
-    CombiningValueState<Integer, Integer> merged = underTest.mergedState(
-        Arrays.asList(NAMESPACE_1, NAMESPACE_2, NAMESPACE_3),
-        NAMESPACE_3, SUM_INTEGER_ADDR, WINDOW_3);
-
-    assertThat(value1.get().read(), Matchers.equalTo(11));
-    assertThat(value2.get().read(), Matchers.equalTo(10));
-    assertThat(merged.get().read(), Matchers.equalTo(21));
+    StateMerging.mergeCombiningValues(Arrays.asList(value1, value2), value3);
 
-    // Reading the merged value compressed the old values.
-    CombiningValueState<Integer, Integer> value3 = underTest.state(NAMESPACE_3, SUM_INTEGER_ADDR);
+    // Merging clears the old values and updates the result value.
     assertThat(value1.get().read(), Matchers.equalTo(0));
     assertThat(value2.get().read(), Matchers.equalTo(0));
     assertThat(value3.get().read(), Matchers.equalTo(21));
-
-    merged.add(8);
-    assertThat(merged.get().read(), Matchers.equalTo(29));
-    assertThat(value1.get().read(), Matchers.equalTo(0));
-    assertThat(value2.get().read(), Matchers.equalTo(0));
-    assertThat(value3.get().read(), Matchers.equalTo(29));
   }
 
   @Test
   public void testWatermarkEarliestState() throws Exception {
-    WatermarkStateInternal value = underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
+    WatermarkStateInternal<BoundedWindow> value =
+        underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
 
     // State instances are cached, but depend on the namespace.
     assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR));
@@ -295,7 +259,8 @@ public void testWatermarkEarliestState() throws Exception {
 
   @Test
   public void testWatermarkLatestState() throws Exception {
-    WatermarkStateInternal value = underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR);
+    WatermarkStateInternal<BoundedWindow> value =
+        underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR);
 
     // State instances are cached, but depend on the namespace.
     assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR));
@@ -322,7 +287,7 @@ public void testWatermarkLatestState() throws Exception {
 
   @Test
   public void testWatermarkEndOfWindowState() throws Exception {
-    WatermarkStateInternal value = underTest.state(NAMESPACE_1, WATERMARK_EOW_ADDR);
+    WatermarkStateInternal<BoundedWindow> value = underTest.state(NAMESPACE_1, WATERMARK_EOW_ADDR);
 
     // State instances are cached, but depend on the namespace.
     assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_EOW_ADDR));
@@ -341,7 +306,8 @@ public void testWatermarkEndOfWindowState() throws Exception {
 
   @Test
   public void testWatermarkStateIsEmpty() throws Exception {
-    WatermarkStateInternal value = underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
+    WatermarkStateInternal<BoundedWindow> value =
+        underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
 
     assertThat(value.isEmpty().read(), Matchers.is(true));
     StateContents<Boolean> readFuture = value.isEmpty();
@@ -354,54 +320,43 @@ public void testWatermarkStateIsEmpty() throws Exception {
 
   @Test
   public void testMergeEarliestWatermarkIntoSource() throws Exception {
-    WatermarkStateInternal value1 = underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
-    WatermarkStateInternal value2 = underTest.state(NAMESPACE_2, WATERMARK_EARLIEST_ADDR);
+    WatermarkStateInternal<BoundedWindow> value1 =
+        underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
+    WatermarkStateInternal<BoundedWindow> value2 =
+        underTest.state(NAMESPACE_2, WATERMARK_EARLIEST_ADDR);
 
     value1.add(new Instant(3000));
     value2.add(new Instant(5000));
     value1.add(new Instant(4000));
     value2.add(new Instant(2000));
 
-    WatermarkStateInternal merged = underTest.mergedState(
-        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_1, WATERMARK_EARLIEST_ADDR, WINDOW_1);
-
-    assertThat(value1.get().read(), Matchers.equalTo(new Instant(3000)));
-    assertThat(value2.get().read(), Matchers.equalTo(new Instant(2000)));
-    assertThat(merged.get().read(), Matchers.equalTo(new Instant(2000)));
+    // Merging clears the old values and updates the merged value.
+    StateMerging.mergeWatermarks(Arrays.asList(value1, value2), value1, WINDOW_1);
 
-    // Reading the merged value compressed the old values
     assertThat(value1.get().read(), Matchers.equalTo(new Instant(2000)));
     assertThat(value2.get().read(), Matchers.equalTo(null));
-
-    merged.add(new Instant(1000));
-    assertThat(merged.get().read(), Matchers.equalTo(new Instant(1000)));
   }
 
   @Test
   public void testMergeLatestWatermarkIntoSource() throws Exception {
-    WatermarkStateInternal value1 = underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR);
-    WatermarkStateInternal value2 = underTest.state(NAMESPACE_2, WATERMARK_LATEST_ADDR);
+    WatermarkStateInternal<BoundedWindow> value1 =
+        underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR);
+    WatermarkStateInternal<BoundedWindow> value2 =
+        underTest.state(NAMESPACE_2, WATERMARK_LATEST_ADDR);
+    WatermarkStateInternal<BoundedWindow> value3 =
+        underTest.state(NAMESPACE_3, WATERMARK_LATEST_ADDR);
 
     value1.add(new Instant(3000));
     value2.add(new Instant(5000));
     value1.add(new Instant(4000));
     value2.add(new Instant(2000));
 
-    WatermarkStateInternal merged = underTest.mergedState(
-        Arrays.asList(NAMESPACE_1, NAMESPACE_2), NAMESPACE_1, WATERMARK_LATEST_ADDR, WINDOW_1);
+    // Merging clears the old values and updates the result value.
+    StateMerging.mergeWatermarks(Arrays.asList(value1, value2), value3, WINDOW_1);
 
-    assertThat(value1.get().read(), Matchers.equalTo(new Instant(4000)));
-    assertThat(value2.get().read(), Matchers.equalTo(new Instant(5000)));
-    assertThat(merged.get().read(), Matchers.equalTo(new Instant(5000)));
-
-    // Reading the merged value compressed the old values
-    assertThat(value1.get().read(), Matchers.equalTo(new Instant(5000)));
+    // Merging clears the old values and updates the result value.
+    assertThat(value3.get().read(), Matchers.equalTo(new Instant(5000)));
+    assertThat(value1.get().read(), Matchers.equalTo(null));
     assertThat(value2.get().read(), Matchers.equalTo(null));
-
-    merged.add(new Instant(1000));
-    assertThat(merged.get().read(), Matchers.equalTo(new Instant(5000)));
-
-    merged.add(new Instant(7000));
-    assertThat(merged.get().read(), Matchers.equalTo(new Instant(7000)));
   }
 }

From 26f5dc4fd98c89af3062cc40dbcd82cb3d969cf4 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 9 Feb 2016 13:25:22 -0800
Subject: [PATCH 1419/1541] Add the Ism side input reader

This creates an Ism source aware side input reader which fronts
the different PCollection view types for Google Cloud Dataflow
when processing batch pipelines.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114252920
---
 .../dataflow/sdk/util/PCollectionViews.java   | 68 ++++++++++++-------
 1 file changed, 42 insertions(+), 26 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
index e5308aa58a966..7e735473c3a2e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
@@ -41,6 +41,8 @@
 import java.util.NoSuchElementException;
 import java.util.Objects;
 
+import javax.annotation.Nullable;
+
 /**
  * Implementations of {@link PCollectionView} shared across the SDK.
  *
@@ -117,16 +119,18 @@ public static <K, V, W extends BoundedWindow> PCollectionView<Map<K, Iterable<V>
    *
    * <p>Instantiate via {@link PCollectionViews#singletonView}.
    */
-  private static class SingletonPCollectionView<T, W extends BoundedWindow>
+  public static class SingletonPCollectionView<T, W extends BoundedWindow>
      extends PCollectionViewBase<T, T, W> {
-    private byte[] encodedDefaultValue;
-    private transient T defaultValue;
-    private Coder<T> valueCoder;
+    @Nullable private byte[] encodedDefaultValue;
+    @Nullable private transient T defaultValue;
+    @Nullable private Coder<T> valueCoder;
+    private boolean hasDefault;
 
-    public SingletonPCollectionView(
+    private SingletonPCollectionView(
         Pipeline pipeline, WindowingStrategy<?, W> windowingStrategy,
         boolean hasDefault, T defaultValue, Coder<T> valueCoder) {
       super(pipeline, windowingStrategy, valueCoder);
+      this.hasDefault = hasDefault;
       this.defaultValue = defaultValue;
       this.valueCoder = valueCoder;
       if (hasDefault) {
@@ -138,25 +142,37 @@ public SingletonPCollectionView(
       }
     }
 
-    @Override
-    protected T fromElements(Iterable<WindowedValue<T>> contents) {
-      if (encodedDefaultValue != null && defaultValue == null) {
-        try {
-          defaultValue = CoderUtils.decodeFromByteArray(valueCoder, encodedDefaultValue);
-        } catch (IOException e) {
-          throw new RuntimeException("Unexpected IOException: ", e);
+    /**
+     * Returns the default value that was specified.
+     *
+     * <p>For internal use only.
+     *
+     * @throws NoSuchElementException if no default was specified.
+     */
+    public T getDefaultValue() {
+      if (!hasDefault) {
+        throw new NoSuchElementException("Empty PCollection accessed as a singleton view.");
+      }
+      // Lazily decode the default value once
+      synchronized (this) {
+        if (encodedDefaultValue != null) {
+          try {
+            defaultValue = CoderUtils.decodeFromByteArray(valueCoder, encodedDefaultValue);
+            encodedDefaultValue = null;
+          } catch (IOException e) {
+            throw new RuntimeException("Unexpected IOException: ", e);
+          }
         }
       }
+      return defaultValue;
+    }
 
+    @Override
+    protected T fromElements(Iterable<WindowedValue<T>> contents) {
       try {
         return Iterables.getOnlyElement(contents).getValue();
       } catch (NoSuchElementException exc) {
-        if (encodedDefaultValue != null) {
-          return defaultValue;
-        } else {
-          throw new NoSuchElementException(
-              "Empty PCollection accessed as a singleton view.");
-        }
+        return getDefaultValue();
       } catch (IllegalArgumentException exc) {
         throw new IllegalArgumentException(
             "PCollection with more than one element "
@@ -172,9 +188,9 @@ protected T fromElements(Iterable<WindowedValue<T>> contents) {
    *
    * <p>Instantiate via {@link PCollectionViews#iterableView}.
    */
-  private static class IterablePCollectionView<T, W extends BoundedWindow>
+  public static class IterablePCollectionView<T, W extends BoundedWindow>
       extends PCollectionViewBase<T, Iterable<T>, W> {
-    public IterablePCollectionView(
+    private IterablePCollectionView(
         Pipeline pipeline, WindowingStrategy<?, W> windowingStrategy, Coder<T> valueCoder) {
       super(pipeline, windowingStrategy, valueCoder);
     }
@@ -199,9 +215,9 @@ public T apply(WindowedValue<T> input) {
    *
    * <p>Instantiate via {@link PCollectionViews#listView}.
    */
-  private static class ListPCollectionView<T, W extends BoundedWindow>
+  public static class ListPCollectionView<T, W extends BoundedWindow>
       extends PCollectionViewBase<T, List<T>, W> {
-    public ListPCollectionView(
+    private ListPCollectionView(
         Pipeline pipeline, WindowingStrategy<?, W> windowingStrategy, Coder<T> valueCoder) {
       super(pipeline, windowingStrategy, valueCoder);
     }
@@ -225,9 +241,9 @@ public T apply(WindowedValue<T> input) {
    *
    * <p>For internal use only.
    */
-  private static class MultimapPCollectionView<K, V, W extends BoundedWindow>
+  public static class MultimapPCollectionView<K, V, W extends BoundedWindow>
       extends PCollectionViewBase<KV<K, V>, Map<K, Iterable<V>>, W> {
-    public MultimapPCollectionView(
+    private MultimapPCollectionView(
         Pipeline pipeline,
         WindowingStrategy<?, W> windowingStrategy,
         Coder<KV<K, V>> valueCoder) {
@@ -254,9 +270,9 @@ protected Map<K, Iterable<V>> fromElements(Iterable<WindowedValue<KV<K, V>>> ele
    *
    * <p>For internal use only.
    */
-  private static class MapPCollectionView<K, V, W extends BoundedWindow>
+  public static class MapPCollectionView<K, V, W extends BoundedWindow>
       extends PCollectionViewBase<KV<K, V>, Map<K, V>, W> {
-    public MapPCollectionView(
+    private MapPCollectionView(
         Pipeline pipeline,
         WindowingStrategy<?, W> windowingStrategy,
         Coder<KV<K, V>> valueCoder) {

From c36a2e98e1a088571f542320d282a57a5dc26ff8 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 9 Feb 2016 14:54:13 -0800
Subject: [PATCH 1420/1541] Move over various worker classes to worker maven
 module

This is for Apache Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114262848
---
 .../worker/CachingSideInputReader.java        |  90 -------
 .../sdk/runners/worker/DataflowApiUtils.java  |  60 -----
 .../worker/DataflowExecutionContext.java      |  77 ------
 .../worker/DataflowSideInputReader.java       | 172 ------------
 .../worker/KeyTokenInvalidException.java      |  38 ---
 .../sdk/runners/worker/ReaderFactory.java     |  48 ----
 .../sdk/runners/worker/ShuffleLibrary.java    |  48 ----
 .../sdk/runners/worker/ShuffleWriter.java     |  37 ---
 .../sdk/runners/worker/SideInputUtils.java    | 253 ------------------
 .../StreamingGroupAlsoByWindowsDoFn.java      |  66 -----
 ...eamingGroupAlsoByWindowsReshuffleDoFn.java |  53 ----
 .../dataflow/sdk/runners/worker/Weighers.java |  48 ----
 12 files changed, 990 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowApiUtils.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyTokenInvalidException.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleLibrary.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleWriter.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFn.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/Weighers.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java
deleted file mode 100644
index afd77f0666cee..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CachingSideInputReader.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.PCollectionViewWindow;
-import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.WeightedSideInputReader;
-import com.google.cloud.dataflow.sdk.util.WeightedValue;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.common.cache.Cache;
-
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-
-/**
- * A {@link SideInputReader} that maintains a cache of side inputs per window.
- *
- * <p>For internal use only.
- *
- * <p>Package-private here so that the dependency on Guava does not leak into the public API
- * surface. Note that Guava is "shaded" so the {@code Cache} class here is not actually compatible
- * with a {@code Cache} created by anything other than the SDK.
- */
-final class CachingSideInputReader
-    extends WeightedSideInputReader.Defaults
-    implements WeightedSideInputReader {
-  private final WeightedSideInputReader subReader;
-  private final Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache;
-
-  private CachingSideInputReader(WeightedSideInputReader subReader,
-      Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache) {
-    this.subReader = subReader;
-    this.cache = cache;
-  }
-
-  public static CachingSideInputReader of(WeightedSideInputReader subReader,
-      Cache<PCollectionViewWindow<?>, WeightedValue<Object>> cache) {
-    return new CachingSideInputReader(subReader, cache);
-  }
-
-  @Override
-  public <T> boolean contains(PCollectionView<T> view) {
-    return subReader.contains(view);
-  }
-
-  @Override
-  public boolean isEmpty() {
-    return subReader.isEmpty();
-  }
-
-  @Override
-  public <T> WeightedValue<T> getWeighted(
-      final PCollectionView<T> view, final BoundedWindow window) {
-    PCollectionViewWindow<T> cacheKey = PCollectionViewWindow.of(view, window);
-
-      try {
-        @SuppressWarnings("unchecked") // safely uncasting the thing from the callback
-        WeightedValue<T> sideInputContents = (WeightedValue<T>) cache.get(cacheKey,
-            new Callable<WeightedValue<Object>>() {
-              @Override
-              public WeightedValue<Object> call() {
-                @SuppressWarnings("unchecked") // safe covariant cast
-                WeightedValue<Object> value =
-                    (WeightedValue<Object>) subReader.getWeighted(view, window);
-                return value;
-              }
-            });
-        return sideInputContents;
-      } catch (ExecutionException checkedException) {
-        // The call to subReader.getWeighted() is not permitted to throw any checked exceptions,
-        // so the Callable created above should not throw any either.
-        throw new RuntimeException("Unexpected checked exception.", checkedException.getCause());
-      }
-    }
-  }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowApiUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowApiUtils.java
deleted file mode 100644
index 5c0d7d7da0245..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowApiUtils.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.api.client.json.GenericJson;
-import com.google.api.client.json.JsonFactory;
-import com.google.api.client.json.JsonGenerator;
-import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.common.io.ByteStreams;
-import com.google.common.io.CountingOutputStream;
-
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-
-/**
- * A utility class for generic interactions with the Google Cloud Dataflow API.
- */
-public final class DataflowApiUtils {
-  /**
-   * Determines the serialized size (in bytes) of the {@link GenericJson} object that will be
-   * serialized and sent to the Google Cloud Dataflow service API.
-   *
-   * <p>Uses only constant memory.
-   */
-  public static long computeSerializedSizeBytes(GenericJson object) throws IOException {
-    JsonFactory factory = object.getFactory();
-    if (factory == null) {
-      factory = Transport.getJsonFactory();
-    }
-
-    CountingOutputStream stream = new CountingOutputStream(ByteStreams.nullOutputStream());
-    JsonGenerator generator = null;
-    try {
-      generator = factory.createJsonGenerator(stream, StandardCharsets.UTF_8);
-      generator.serialize(object);
-      generator.close(); // also closes the stream.
-    } finally {
-      if (generator != null) {
-        generator.close();
-      }
-    }
-    return stream.getCount();
-  }
-
-  // Prevent construction of utility class.
-  private DataflowApiUtils() {}
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java
deleted file mode 100644
index da48470d619f3..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowExecutionContext.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.api.services.dataflow.model.SideInputInfo;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.util.BaseExecutionContext;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.NullSideInputReader;
-import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.common.collect.Iterables;
-
-import javax.annotation.Nullable;
-
-/**
- * Extensions to {@link BaseExecutionContext} specific to the Dataflow worker.
- */
-public abstract class DataflowExecutionContext<T extends ExecutionContext.StepContext>
-    extends BaseExecutionContext<T> {
-
-  /**
-   * Returns a {@link SideInputReader} based on {@link SideInputInfo} descriptors
-   * and {@link PCollectionView PCollectionViews}.
-   *
-   * <p>If side input source metadata is provided by the service in
-   * {@link SideInputInfo sideInputInfos}, we request
-   * a {@link SideInputReader} from the {@code executionContext} using that info.
-   * If no side input source metadata is provided but the DoFn expects side inputs, as a
-   * fallback, we request a {@link SideInputReader} based only on the expected views.
-   *
-   * <p>These cases are not disjoint: Whenever a {@link DoFn} takes side inputs,
-   * {@code doFnInfo.getSideInputViews()} should be non-empty.
-   *
-   * <p>A note on the behavior of the Dataflow service: Today, the first case corresponds to
-   * batch mode, while the fallback corresponds to streaming mode.
-   */
-  public SideInputReader getSideInputReader(
-      @Nullable Iterable<? extends SideInputInfo> sideInputInfos,
-      @Nullable Iterable<? extends PCollectionView<?>> views) throws Exception {
-    if (sideInputInfos != null && sideInputInfos.iterator().hasNext()) {
-      return getSideInputReader(sideInputInfos);
-    } else if (views != null && Iterables.size(views) > 0) {
-      return getSideInputReaderForViews(views);
-    } else {
-      return NullSideInputReader.empty();
-    }
-  }
-
-  /**
-   * Returns a {@link SideInputReader} for all the side inputs described in the given
-   * {@link SideInputInfo} descriptors.
-   */
-  protected abstract SideInputReader getSideInputReader(
-      Iterable<? extends SideInputInfo> sideInputInfos) throws Exception;
-
-  /**
-   * Returns a {@link SideInputReader} for all the provided views, where the execution context
-   * itself knows how to read data for the view.
-   */
-  protected abstract SideInputReader getSideInputReaderForViews(
-      Iterable<? extends PCollectionView<?>> views) throws Exception;
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
deleted file mode 100644
index 109fc58da80a9..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/DataflowSideInputReader.java
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.api.services.dataflow.model.SideInputInfo;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PTuple;
-import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.WeightedSideInputReader;
-import com.google.cloud.dataflow.sdk.util.WeightedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Predicate;
-import com.google.common.collect.Iterables;
-
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Observable;
-import java.util.Observer;
-
-/**
- * A simple side input reader that re-reads a side input from its iterable each time it is
- * requested.
- *
- * <p>Sizes are accurate only for {@link PCollectionView} implementations that read the same
- * amount of data for each access.
- */
-public class DataflowSideInputReader
-    extends WeightedSideInputReader.Defaults
-    implements WeightedSideInputReader {
-
-  /** An observer for each side input to count its size as it is being read. */
-  private final Map<TupleTag<Object>, ByteSizeObserver> observers;
-
-  /** A byte count saved as overhead per side input, not cleared when the observer is reset. */
-  private final Map<TupleTag<Object>, Long> overheads;
-
-  /** A list of TupleTags representing the side input values. */
-  private final PTuple sideInputValues;
-
-  private DataflowSideInputReader(
-      Iterable<? extends SideInputInfo> sideInputInfos,
-      ReaderFactory readerFactory,
-      PipelineOptions options,
-      ExecutionContext executionContext) throws Exception {
-    // Initializing the values may or may not actually read through the
-    // source. The full size is the amount read here plus the amount
-    // read when view.fromIterableInternal() is called.
-    this.observers = new HashMap<>();
-    this.overheads = new HashMap<>();
-
-    PTuple sideInputValuesBeingBuilt = PTuple.empty();
-    for (SideInputInfo sideInputInfo : sideInputInfos) {
-      TupleTag<Object> tag = new TupleTag<>(sideInputInfo.getTag());
-      ByteSizeObserver observer = new ByteSizeObserver();
-      Object sideInputValue = SideInputUtils.readSideInput(
-          options, sideInputInfo, readerFactory, observer, executionContext);
-      overheads.put(tag, observer.getBytes());
-      observer.reset();
-      observers.put(tag, observer);
-      sideInputValuesBeingBuilt = sideInputValuesBeingBuilt.and(tag, sideInputValue);
-    }
-    sideInputValues = sideInputValuesBeingBuilt;
-  }
-
-  /**
-   * Creates a new {@link SideInputReader} that will provide side inputs
-   * according to the provided {@link SideInputInfo} descriptors.
-   */
-  public static DataflowSideInputReader of(
-      Iterable<? extends SideInputInfo> sideInputInfos,
-      ReaderFactory readerFactory,
-      PipelineOptions options,
-      ExecutionContext context)
-      throws Exception {
-    return new DataflowSideInputReader(sideInputInfos, readerFactory, options, context);
-  }
-
-  @Override
-  public <T> boolean contains(PCollectionView<T> view) {
-    return sideInputValues.has(view.getTagInternal());
-  }
-
-  @Override
-  public boolean isEmpty() {
-    return sideInputValues.isEmpty();
-  }
-
-  /**
-   * Gets a side input for a view and window by reading data according to the corresponding
-   * {@link SideInputInfo}, passing the result through the view's
-   * {@link PCollectionView#fromIterableInternal} conversion method, and extracting
-   * the value for the appropriate window.
-   */
-  @Override
-  public <T> WeightedValue<T> getWeighted(PCollectionView<T> view, final BoundedWindow window) {
-    final TupleTag<Iterable<WindowedValue<?>>> tag = view.getTagInternal();
-    if (!sideInputValues.has(tag)) {
-      throw new IllegalArgumentException("calling getSideInput() with unknown view");
-    }
-
-    // It is hard to estimate the size with any accuracy here, and there will be improvements
-    // possible, but it is only required to estimate in a way so that a cache will not OOM.
-    T value;
-    long overhead = overheads.get(tag);
-    final ByteSizeObserver observer = observers.get(tag);
-    if (view.getWindowingStrategyInternal().getWindowFn() instanceof GlobalWindows) {
-      value = view.fromIterableInternal(sideInputValues.get(tag));
-      long bytesRead = observer.getBytes();
-      observer.reset();
-      return WeightedValue.of(value, overhead + bytesRead);
-    } else {
-      final long[] sum = new long[]{ 0L };
-      value = view.fromIterableInternal(
-          Iterables.filter(sideInputValues.get(tag),
-              new Predicate<WindowedValue<?>>() {
-                  @Override
-                  public boolean apply(WindowedValue<?> element) {
-                    boolean containsWindow = element.getWindows().contains(window);
-                    // Only sum up the size of the elements within the window.
-                    if (containsWindow) {
-                      sum[0] += observer.getBytes();
-                    }
-                    observer.reset();
-                    return containsWindow;
-                  }
-                }));
-      return WeightedValue.of(value, overhead + sum[0]);
-    }
-  }
-
-  /**
-   * An observer for counting the bytes read and then resetting.
-   */
-  private static class ByteSizeObserver implements Observer {
-    /** a byte count beyond overhead, cleared when the observer is reset. */
-    private long byteCount = 0;
-
-    @Override
-    public void update(Observable reader, Object obj) {
-      Preconditions.checkArgument(obj instanceof Long, "unexpected parameter object");
-      byteCount = byteCount + (long) obj;
-    }
-
-    public void reset() {
-      byteCount = 0;
-    }
-
-    public long getBytes() {
-      return byteCount;
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyTokenInvalidException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyTokenInvalidException.java
deleted file mode 100644
index 096a50d3564df..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/KeyTokenInvalidException.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-/**
- * Indicates that the key token was invalid when data was attempted to be fetched.
- */
-public class KeyTokenInvalidException extends RuntimeException {
-  public KeyTokenInvalidException(String key) {
-    super("Unable to fetch data due to token mismatch for key " + key);
-  }
-
-  /**
-   * Returns whether an exception was caused by a {@link KeyTokenInvalidException}.
-   */
-  public static boolean isKeyTokenInvalidException(Throwable t) {
-    while (t != null) {
-      if (t instanceof KeyTokenInvalidException) {
-        return true;
-      }
-      t = t.getCause();
-    }
-    return false;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
deleted file mode 100644
index f65858a2b5fcb..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderFactory.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import javax.annotation.Nullable;
-
-/**
- * Creates a {@link NativeReader} from a Dataflow API source definition, presented as a
- * {@link CloudObject}.
- */
-public interface ReaderFactory {
-
-  /**
-   * Creates a {@link NativeReader} from a Dataflow API source definition, presented as a
-   * {@link CloudObject}.
-   *
-   * @throws Exception if a {@link NativeReader} could not be created
-   */
-  NativeReader<?> create(
-      CloudObject cloudSourceSpec,
-      @Nullable Coder<?> coder,
-      @Nullable PipelineOptions options,
-      @Nullable ExecutionContext executionContext,
-      @Nullable CounterSet.AddCounterMutator addCounterMutator,
-      @Nullable String operationName)
-          throws Exception;
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleLibrary.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleLibrary.java
deleted file mode 100644
index 999809b2f8b81..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleLibrary.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.file.Files;
-import java.nio.file.StandardCopyOption;
-
-/**
- * Native library used to read from and write to a shuffle dataset.
- */
-class ShuffleLibrary {
-  /**
-   * Loads the native shuffle library.
-   */
-  static void load() {
-    try {
-      final String shuffleClientLibraryPropertyKey = "batch.shuffle_client_library";
-      String shuffleClientLibrary = "libshuffle_client_jni.so.stripped";
-      if (System.getProperties().containsKey(shuffleClientLibraryPropertyKey)) {
-        shuffleClientLibrary = System.getProperty(shuffleClientLibraryPropertyKey);
-      }
-      File tempfile = File.createTempFile("libshuffle_client_jni", ".so");
-      InputStream input = ClassLoader.getSystemResourceAsStream(shuffleClientLibrary);
-      Files.copy(input, tempfile.toPath(), StandardCopyOption.REPLACE_EXISTING);
-      System.load(tempfile.getAbsolutePath());
-    } catch (IOException e) {
-      throw new RuntimeException("Loading shuffle_client failed:", e);
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleWriter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleWriter.java
deleted file mode 100644
index 386a2f10feba7..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ShuffleWriter.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import java.io.IOException;
-
-/**
- * ShuffleWriter writes chunks of records to a shuffle dataset.
- */
-interface ShuffleWriter extends AutoCloseable {
-  /**
-   * Writes a chunk of records. The chunk is a sequence of pairs encoded as:
-   * <key-size><key><secondary-key-size><secondary-key><value-size><value>
-   * where the sizes are 4-byte big-endian integers.
-   */
-  public void write(byte[] chunk) throws IOException;
-
-  /**
-   * Flushes written records and closes this writer.
-   */
-  @Override
-  public void close() throws IOException;
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
deleted file mode 100644
index 8254bceba0c48..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SideInputUtils.java
+++ /dev/null
@@ -1,253 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-
-import com.google.api.services.dataflow.model.SideInputInfo;
-import com.google.api.services.dataflow.model.Source;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.Serializer;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.common.collect.Iterables;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.NoSuchElementException;
-import java.util.Observer;
-
-/**
- * Utilities for working with side inputs.
- */
-public class SideInputUtils {
-  static final String SINGLETON_KIND = "singleton";
-  static final String COLLECTION_KIND = "collection";
-
-  /**
-   * Reads the given side input, producing the contents associated
-   * with a {@code PCollectionView}.
-   *
-   * @throws Exception anything thrown by the delegate {@link NativeReader}
-   * @see com.google.cloud.dataflow.sdk.values.PCollectionView
-   */
-  public static Object readSideInput(
-      PipelineOptions options,
-      SideInputInfo sideInputInfo,
-      ReaderFactory readerFactory,
-      Observer observer,
-      ExecutionContext executionContext)
-      throws Exception {
-    Iterable<Object> elements =
-        readSideInputSources(options, sideInputInfo.getSources(), readerFactory, observer,
-            executionContext);
-    return readSideInputValue(sideInputInfo.getKind(), elements);
-  }
-
-  public static Object readSideInput(
-      PipelineOptions options,
-      SideInputInfo sideInputInfo,
-      ReaderFactory readerFactory,
-      ExecutionContext executionContext)
-      throws Exception {
-    Iterable<Object> elements = readSideInputSources(
-        options, sideInputInfo.getSources(), readerFactory, null, executionContext);
-    return readSideInputValue(sideInputInfo.getKind(), elements);
-  }
-
-  private static Iterable<Object> readSideInputSources(
-      PipelineOptions options,
-      List<Source> sideInputSources,
-      ReaderFactory readerFactory,
-      Observer observer,
-      ExecutionContext executionContext)
-      throws Exception {
-    int numSideInputSources = sideInputSources.size();
-    if (numSideInputSources == 0) {
-      throw new Exception("expecting at least one side input Source");
-    } else if (numSideInputSources == 1) {
-      return readSideInputSource(
-          options, sideInputSources.get(0), readerFactory, observer, executionContext);
-    } else {
-      List<Iterable<Object>> shards = new ArrayList<>();
-      for (Source sideInputSource : sideInputSources) {
-        shards.add(readSideInputSource(
-            options, sideInputSource, readerFactory, observer, executionContext));
-      }
-      return Iterables.concat(shards);
-    }
-  }
-
-  private static Iterable<Object> readSideInputSource(
-      PipelineOptions options,
-      Source sideInputSource,
-      ReaderFactory readerFactory,
-      Observer observer,
-      ExecutionContext executionContext)
-      throws Exception {
-    Coder<?> coder = null;
-    if (sideInputSource.getCodec() != null) {
-      coder = Serializer.deserialize(sideInputSource.getCodec(), Coder.class);
-    }
-    // We don't do shuffle sanity check on side inputs, as they don't have to be read completely.
-    @SuppressWarnings("unchecked")
-    NativeReader<Object> reader =
-        (NativeReader<Object>) readerFactory.create(CloudObject.fromSpec(sideInputSource.getSpec()),
-            coder, options, executionContext, null, null);
-    if (observer != null) {
-      reader.addObserver(observer);
-    }
-    return new ReaderIterable<>(reader);
-  }
-
-  static Object readSideInputValue(Map<String, Object> sideInputKind, Iterable<Object> elements)
-      throws Exception {
-    String className = getString(sideInputKind, PropertyNames.OBJECT_TYPE_NAME);
-    if (SINGLETON_KIND.equals(className)) {
-      Iterator<Object> iter = elements.iterator();
-      if (iter.hasNext()) {
-        Object elem = iter.next();
-        if (!iter.hasNext()) {
-          return elem;
-        }
-      }
-      throw new Exception("expecting a singleton side input to have a single value");
-
-    } else if (COLLECTION_KIND.equals(className)) {
-      return elements;
-
-    } else {
-      throw new Exception("unexpected kind of side input: " + className);
-    }
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-
-  static class ReaderIterable<T> implements Iterable<T> {
-    final NativeReader<T> reader;
-
-    public ReaderIterable(NativeReader<T> reader) {
-      this.reader = reader;
-    }
-
-    @Override
-    public Iterator<T> iterator() {
-      try {
-        return new NativeReaderToIteratorAdapter<>(reader.iterator());
-      } catch (Exception exn) {
-        throw new RuntimeException(exn);
-      }
-    }
-  }
-
-  private static class NativeReaderToIteratorAdapter<T> implements Iterator<T> {
-    private enum NextState {
-      UNKNOWN_BEFORE_START,
-      UNKNOWN_BEFORE_ADVANCE,
-      AVAILABLE,
-      UNAVAILABLE
-    }
-
-    private NativeReader.NativeReaderIterator<T> reader;
-    private NextState state;
-
-    /**
-     * Creates an iterator adapter for the given reader.
-     */
-    private NativeReaderToIteratorAdapter(NativeReader.NativeReaderIterator<T> reader) {
-      this.reader = reader;
-      this.state = NextState.UNKNOWN_BEFORE_START;
-    }
-
-    @Override
-    public boolean hasNext() {
-      try {
-        switch (state) {
-          case UNKNOWN_BEFORE_START:
-            if (reader.start()) {
-              state = NextState.AVAILABLE;
-              return true;
-            } else {
-              state = NextState.UNAVAILABLE;
-              return false;
-            }
-          case UNKNOWN_BEFORE_ADVANCE:
-            if (reader.advance()) {
-              state = NextState.AVAILABLE;
-              return true;
-            } else {
-              state = NextState.UNAVAILABLE;
-              return false;
-            }
-          case AVAILABLE:
-            return true;
-          case UNAVAILABLE:
-            return false;
-          default:
-            throw new AssertionError();
-        }
-      } catch (IOException e) {
-        throw new RuntimeException(e);
-      }
-    }
-
-    @Override
-    public T next() {
-      if (!hasNext()) {
-        throw new NoSuchElementException();
-      }
-      state = NextState.UNKNOWN_BEFORE_ADVANCE;
-      return reader.getCurrent();
-    }
-
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Builds a {@link SideInputInfo} for a "singleton" side input.
-   */
-  public static SideInputInfo createSingletonSideInputInfo(Source sideInputSource) {
-    SideInputInfo sideInputInfo = new SideInputInfo();
-    sideInputInfo.setSources(Arrays.asList(sideInputSource));
-    sideInputInfo.setKind(CloudObject.forClassName("singleton"));
-    return sideInputInfo;
-  }
-
-  /**
-   * Builds a {@link SideInputInfo} for a "collection" side input.
-   */
-  public static SideInputInfo createCollectionSideInputInfo(Source... sideInputSources) {
-    SideInputInfo sideInputInfo = new SideInputInfo();
-    sideInputInfo.setSources(Arrays.asList(sideInputSources));
-    sideInputInfo.setKind(CloudObject.forClassName("collection"));
-    return sideInputInfo;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
deleted file mode 100644
index c589ec3762e15..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsDoFn.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
-import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowViaWindowSetDoFn;
-import com.google.cloud.dataflow.sdk.util.KeyedWorkItem;
-import com.google.cloud.dataflow.sdk.util.SystemDoFnInternal;
-import com.google.cloud.dataflow.sdk.util.SystemReduceFn;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.base.Preconditions;
-
-/**
- * DoFn that merges windows and groups elements in those windows.
- *
- * @param <K> key type
- * @param <InputT> input value element type
- * @param <OutputT> output value element type
- * @param <W> window type
- */
-@SystemDoFnInternal
-public abstract class StreamingGroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends BoundedWindow>
-    extends DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> {
-  public static <K, InputT, AccumT, OutputT, W extends BoundedWindow>
-      DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> create(
-          final WindowingStrategy<?, W> windowingStrategy,
-          final AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn,
-          final Coder<K> keyCoder) {
-    Preconditions.checkNotNull(combineFn);
-    DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> fn =
-        GroupAlsoByWindowViaWindowSetDoFn.create(
-            windowingStrategy,
-            SystemReduceFn.<K, InputT, AccumT, OutputT, W>combining(keyCoder, combineFn));
-    return fn;
-  }
-
-  public static <K, V, W extends BoundedWindow>
-      DoFn<KeyedWorkItem<K, V>, KV<K, Iterable<V>>> createForIterable(
-          final WindowingStrategy<?, W> windowingStrategy, final Coder<V> inputCoder) {
-    // If the windowing strategy indicates we're doing a reshuffle, use the special-path.
-    if (StreamingGroupAlsoByWindowsReshuffleDoFn.isReshuffle(windowingStrategy)) {
-      return new StreamingGroupAlsoByWindowsReshuffleDoFn<>();
-    } else {
-      return GroupAlsoByWindowViaWindowSetDoFn.create(
-          windowingStrategy, SystemReduceFn.<K, V, W>buffering(inputCoder));
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFn.java
deleted file mode 100644
index ddd9b00bd1444..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/StreamingGroupAlsoByWindowsReshuffleDoFn.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.util.KeyedWorkItem;
-import com.google.cloud.dataflow.sdk.util.ReshuffleTrigger;
-import com.google.cloud.dataflow.sdk.util.SystemDoFnInternal;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.values.KV;
-
-import java.util.Collections;
-
-/**
- * Implementation of {@link StreamingGroupAlsoByWindowsDoFn} used for the {@link ReshuffleTrigger}
- * which outputs each element as a separate pane.
- *
- * @param <K> key type
- * @param <T> value element type
- */
-@SystemDoFnInternal
-public class StreamingGroupAlsoByWindowsReshuffleDoFn<K, T>
-    extends DoFn<KeyedWorkItem<K, T>, KV<K, Iterable<T>>> {
-
-  public static boolean isReshuffle(WindowingStrategy<?, ?> strategy) {
-    return strategy.getTrigger().getSpec() instanceof ReshuffleTrigger;
-  }
-
-  @Override
-  public void processElement(ProcessContext c) throws Exception {
-    @SuppressWarnings("unchecked")
-    K key = c.element().key();
-    for (WindowedValue<T> item : c.element().elementsIterable()) {
-      c.windowingInternals().outputWindowedValue(
-          KV.of(key, (Iterable<T>) Collections.singletonList(item.getValue())),
-          item.getTimestamp(), item.getWindows(), item.getPane());
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/Weighers.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/Weighers.java
deleted file mode 100644
index 5bafca766e2c8..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/Weighers.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.util.Weighted;
-import com.google.common.cache.Weigher;
-
-/**
- * A {@code Weigher}
- *
- * <p>For internal use only.
- *
- * <p>Package-private here so that the dependency on Guava does not leak into the public API
- * surface.
- */
-class Weighers {
-  public static Weigher<Object, Weighted> fixedWeightKeys(final int keyWeight) {
-    return new Weigher<Object, Weighted>() {
-      @Override
-      public int weigh(Object key, Weighted value) {
-        return (int) Math.min(keyWeight + value.getWeight(), Integer.MAX_VALUE);
-      }
-    };
-  }
-
-  public static Weigher<Weighted, Weighted> weightedKeysAndValues() {
-    return new Weigher<Weighted, Weighted>() {
-      @Override
-      public int weigh(Weighted key, Weighted value) {
-        return (int) Math.min(key.getWeight() + value.getWeight(), Integer.MAX_VALUE);
-      }
-    };
-  }
-}

From 1067af65008a5cc595dba871b1ee7feacf6ec30c Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 9 Feb 2016 17:22:44 -0800
Subject: [PATCH 1421/1541] Move over several shuffle classes to the worker
 maven module

This is for Apache Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114278581
---
 .../worker/ByteArrayShufflePosition.java      | 111 ----
 .../worker/BatchingShuffleEntryReader.java    | 148 -----
 .../worker/CachingShuffleBatchReader.java     | 228 --------
 .../worker/GroupingShuffleEntryIterator.java  | 250 ---------
 .../sdk/util/common/worker/GroupingTable.java |  34 --
 .../util/common/worker/GroupingTables.java    | 522 ------------------
 .../worker/KeyGroupedShuffleEntries.java      |  35 --
 .../common/worker/ShuffleBatchReader.java     |  61 --
 .../sdk/util/common/worker/ShuffleEntry.java  | 112 ----
 .../common/worker/ShuffleEntryReader.java     |  50 --
 .../util/common/worker/ShufflePosition.java   |  23 -
 11 files changed, 1574 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingTable.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingTables.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/KeyGroupedShuffleEntries.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleBatchReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntry.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShufflePosition.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
deleted file mode 100644
index 019cb5513c8f1..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ByteArrayShufflePosition.java
+++ /dev/null
@@ -1,111 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.api.client.util.Base64.decodeBase64;
-import static com.google.api.client.util.Base64.encodeBase64URLSafeString;
-
-import com.google.cloud.dataflow.sdk.util.common.worker.ShufflePosition;
-import com.google.common.base.Preconditions;
-import com.google.common.primitives.Bytes;
-import com.google.common.primitives.UnsignedBytes;
-
-import java.util.Arrays;
-
-import javax.annotation.Nullable;
-
-/**
- * Represents a position of a {@link GroupingShuffleReader} as an opaque array of bytes,
- * encoded in a way such that lexicographic ordering of the bytes is consistent with the inherent
- * ordering of {@link GroupingShuffleReader} positions.
- */
-public class ByteArrayShufflePosition
-    implements Comparable<ByteArrayShufflePosition>, ShufflePosition {
-  private final byte[] position;
-
-  public ByteArrayShufflePosition(byte[] position) {
-    this.position = position;
-  }
-
-  public static ByteArrayShufflePosition fromBase64(@Nullable String position) {
-    return ByteArrayShufflePosition.of(decodeBase64(position));
-  }
-
-  public static ByteArrayShufflePosition of(@Nullable byte[] position) {
-    if (position == null) {
-      return null;
-    }
-    return new ByteArrayShufflePosition(position);
-  }
-
-  public static byte[] getPosition(@Nullable ShufflePosition shufflePosition) {
-    if (shufflePosition == null) {
-      return null;
-    }
-    Preconditions.checkArgument(
-        shufflePosition instanceof ByteArrayShufflePosition);
-    ByteArrayShufflePosition adapter = (ByteArrayShufflePosition) shufflePosition;
-    return adapter.getPosition();
-  }
-
-  public byte[] getPosition() {
-    return position;
-  }
-
-  public String encodeBase64() {
-    return encodeBase64URLSafeString(position);
-  }
-
-  /**
-   * Returns the {@link ByteArrayShufflePosition} that immediately follows this one, i.e. there
-   * are no possible {@link ByteArrayShufflePosition ByteArrayShufflePositions} between this and
-   * its successor.
-   */
-  public ByteArrayShufflePosition immediateSuccessor() {
-    return new ByteArrayShufflePosition(Bytes.concat(position, new byte[] {0}));
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (this == o) {
-      return true;
-    }
-    if (o instanceof ByteArrayShufflePosition) {
-      ByteArrayShufflePosition that = (ByteArrayShufflePosition) o;
-      return Arrays.equals(this.position, that.position);
-    }
-    return false;
-  }
-
-  @Override
-  public int hashCode() {
-    return Arrays.hashCode(position);
-  }
-
-  @Override
-  public String toString() {
-    return "ShufflePosition(base64:" + encodeBase64() + ")";
-  }
-
-  @Override
-  public int compareTo(ByteArrayShufflePosition o) {
-    if (this == o) {
-      return 0;
-    }
-    return UnsignedBytes.lexicographicalComparator().compare(position, o.position);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReader.java
deleted file mode 100644
index d30c01be9ce71..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/BatchingShuffleEntryReader.java
+++ /dev/null
@@ -1,148 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.cloud.dataflow.sdk.util.common.Reiterator;
-
-import java.util.ListIterator;
-import java.util.NoSuchElementException;
-
-import javax.annotation.Nullable;
-import javax.annotation.concurrent.NotThreadSafe;
-
-/**
- * BatchingShuffleEntryReader provides a mechanism for reading entries from
- * a shuffle dataset.
- */
-@NotThreadSafe
-public final class BatchingShuffleEntryReader implements ShuffleEntryReader {
-  private final ShuffleBatchReader batchReader;
-
-  /**
-   * Constructs a {@link BatchingShuffleEntryReader}.
-   *
-   * @param batchReader supplies the underlying
-   * {@link ShuffleBatchReader} to read batches of entries from
-   */
-  public BatchingShuffleEntryReader(
-      ShuffleBatchReader batchReader) {
-    this.batchReader = checkNotNull(batchReader);
-  }
-
-  @Override
-  public Reiterator<ShuffleEntry> read(
-      @Nullable ShufflePosition startPosition,
-      @Nullable ShufflePosition endPosition) {
-    return new ShuffleReadIterator(startPosition, endPosition);
-  }
-
-  /**
-   * ShuffleReadIterator iterates over a (potentially huge) sequence of shuffle
-   * entries.
-   */
-  private final class ShuffleReadIterator implements Reiterator<ShuffleEntry> {
-    // Shuffle service returns entries in pages. If the response contains a
-    // non-null nextStartPosition, we have to ask for more pages. The response
-    // with null nextStartPosition signifies the end of stream.
-    @Nullable private final ShufflePosition endPosition;
-    @Nullable private ShufflePosition nextStartPosition;
-
-    /** The most recently read batch. */
-    @Nullable ShuffleBatchReader.Batch currentBatch;
-    /** An iterator over the most recently read batch. */
-    @Nullable private ListIterator<ShuffleEntry> entries;
-
-    ShuffleReadIterator(@Nullable ShufflePosition startPosition,
-                        @Nullable ShufflePosition endPosition) {
-      this.nextStartPosition = startPosition;
-      this.endPosition = endPosition;
-    }
-
-    private ShuffleReadIterator(ShuffleReadIterator it) {
-      this.endPosition = it.endPosition;
-      this.nextStartPosition = it.nextStartPosition;
-      this.currentBatch = it.currentBatch;
-      // The idea here: if the iterator being copied was in the middle of a
-      // batch (the typical case), create a new iteration state at the same
-      // point in the same batch.
-      this.entries = (it.entries == null
-          ? null
-          : it.currentBatch.entries.listIterator(it.entries.nextIndex()));
-    }
-
-    @Override
-    public boolean hasNext() {
-      fillEntriesIfNeeded();
-      // TODO: Report API errors to the caller using checked
-      // exceptions.
-      return entries.hasNext();
-    }
-
-    @Override
-    public ShuffleEntry next() throws NoSuchElementException {
-      fillEntriesIfNeeded();
-      ShuffleEntry entry = entries.next();
-      return entry;
-    }
-
-    @Override
-    public void remove() throws UnsupportedOperationException {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public ShuffleReadIterator copy() {
-      return new ShuffleReadIterator(this);
-    }
-
-    private void fillEntriesIfNeeded() {
-      if (entries != null && entries.hasNext()) {
-        // Has more records in the current page, or error.
-        return;
-      }
-
-      if (entries != null && nextStartPosition == null) {
-        // End of stream.
-        checkState(!entries.hasNext());
-        return;
-      }
-
-      do {
-        fillEntries();
-      } while (!entries.hasNext() && nextStartPosition != null);
-    }
-
-    private void fillEntries() {
-      try {
-        ShuffleBatchReader.Batch batch =
-            batchReader.read(nextStartPosition, endPosition);
-        nextStartPosition = batch.nextStartPosition;
-        entries = batch.entries.listIterator();
-        currentBatch = batch;
-      } catch (RuntimeException e) {
-        throw e;
-      } catch (Throwable t) {
-        throw new RuntimeException(t);
-      }
-
-      checkState(entries != null);
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReader.java
deleted file mode 100644
index c7cffc58b7239..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/CachingShuffleBatchReader.java
+++ /dev/null
@@ -1,228 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.common.base.Objects;
-import com.google.common.base.Throwables;
-
-import java.io.IOException;
-import java.lang.ref.Reference;
-import java.lang.ref.ReferenceQueue;
-import java.lang.ref.SoftReference;
-import java.util.HashMap;
-
-import javax.annotation.Nullable;
-
-/** A {@link ShuffleBatchReader} that caches batches as they're read. */
-public final class CachingShuffleBatchReader implements ShuffleBatchReader {
-  private final ShuffleBatchReader reader;
-
-  // The cache itself is implemented as a HashMap of RangeReadReference values,
-  // keyed by the start and end positions describing the range of a particular
-  // request (represented by BatchRange).
-  //
-  // The first reader for a particular range builds an AsyncReadResult for the
-  // result, inserts it into the cache, drops the lock, and then completes the
-  // read; subsequent readers simply wait for the AsyncReadResult to complete.
-  //
-  // Note that overlapping ranges are considered distinct; cached entries for
-  // one range are not used for any other range, even if doing so would avoid a
-  // fetch.
-  //
-  // So this is not a particularly sophisticated algorithm: a smarter cache
-  // would be able to use subranges of previous requests to satisfy new
-  // requests.  But in this particular case, we expect that the simple algorithm
-  // will work well.  For a given shuffle source, the splits read by various
-  // iterators over that source starting from a particular position (which is
-  // how this class is used in practice) should turn out to be constant, if the
-  // result returned by the service for a particular [start, end) range are
-  // consistent.  So we're not expecting to see overlapping ranges of entries
-  // within a cache.
-  //
-  // It's also been shown -- by implementing it -- that the more thorough
-  // algorithm is relatively complex, with numerous edge cases requiring very
-  // careful thought to get right.  It's doable, but non-trivial and hard to
-  // understand and maintain; without a compelling justification, it's better to
-  // stick with the simpler implementation.
-  //
-  // @VisibleForTesting
-  final HashMap<BatchRange, RangeReadReference> cache = new HashMap<>();
-
-  // The queue of references that have been collected by the garbage collector.
-  // This queue should only be used with references of class RangeReadReference.
-  private final ReferenceQueue<AsyncReadResult> refQueue = new ReferenceQueue<>();
-
-  /**
-   * Constructs a new {@link CachingShuffleBatchReader}.
-   *
-   * @param reader supplies the downstream {@link ShuffleBatchReader}
-   * this {@code CachingShuffleBatchReader} will use to issue reads
-   */
-  public CachingShuffleBatchReader(ShuffleBatchReader reader) {
-    this.reader = checkNotNull(reader);
-  }
-
-  @Override
-  public Batch read(
-      @Nullable ShufflePosition startPosition,
-      @Nullable ShufflePosition endPosition) throws IOException {
-
-    @Nullable AsyncReadResult waitResult = null;
-    @Nullable AsyncReadResult runResult = null;
-    final BatchRange batchRange = new BatchRange(startPosition, endPosition);
-
-    synchronized (cache) {
-      // Remove any GCd entries.
-      for (Reference<? extends AsyncReadResult> ref = refQueue.poll();
-           ref != null;
-           ref = refQueue.poll()) {
-        RangeReadReference rangeReadRef = (RangeReadReference) ref;
-        cache.remove(rangeReadRef.getBatchRange());
-      }
-
-      // Find the range reference; note that one might not be in the map, or it
-      // might contain a null if its target has been GCd.
-      @Nullable RangeReadReference rangeReadRef = cache.get(batchRange);
-
-      // Get a strong reference to the existing AsyncReadResult for the range, if possible.
-      if (rangeReadRef != null) {
-        waitResult = rangeReadRef.get();
-      }
-
-      // Create a new AsyncReadResult if one is needed.
-      if (waitResult == null) {
-        runResult = new AsyncReadResult();
-        waitResult = runResult;
-        rangeReadRef = null;  // Replace the previous RangeReadReference.
-      }
-
-      // Insert a new RangeReadReference into the map if we don't have a usable
-      // one (either we weren't able to find one in the map, or we did but it
-      // was already cleared by the GC).
-      if (rangeReadRef == null) {
-        cache.put(batchRange,
-            new RangeReadReference(batchRange, runResult, refQueue));
-      }
-    }  // Drop the cache lock.
-
-    if (runResult != null) {
-      // This thread created the AsyncReadResult, and is responsible for
-      // actually performing the read.
-      try {
-        Batch result = reader.read(startPosition, endPosition);
-        runResult.setResult(result);
-      } catch (RuntimeException | IOException e) {
-        runResult.setException(e);
-        synchronized (cache) {
-          // No reason to continue to cache the fact that there was a problem.
-          // Note that since this thread holds a strong reference to the
-          // AsyncReadResult, it won't be GCd, so the soft reference held by the
-          // cache is guaranteed to still be present.
-          cache.remove(batchRange);
-        }
-      }
-    }
-
-    return waitResult.getResult();
-  }
-
-  /** The key for the entries stored in the batch cache. */
-  // @VisibleForTesting
-  static final class BatchRange {
-    @Nullable private final ShufflePosition startPosition;
-    @Nullable private final ShufflePosition endPosition;
-
-    public BatchRange(@Nullable ShufflePosition startPosition,
-                      @Nullable ShufflePosition endPosition) {
-      this.startPosition = startPosition;
-      this.endPosition = endPosition;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      return o == this
-          || (o instanceof BatchRange
-              && Objects.equal(((BatchRange) o).startPosition, startPosition)
-              && Objects.equal(((BatchRange) o).endPosition, endPosition));
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hashCode(startPosition, endPosition);
-    }
-  }
-
-  /** Holds an asynchronously batch read result. */
-  private static final class AsyncReadResult {
-    @Nullable private Batch batch = null;
-    @Nullable private Throwable thrown = null;
-
-    public synchronized void setResult(Batch b) {
-      batch = b;
-      notifyAll();
-    }
-
-    public synchronized void setException(Throwable t) {
-      thrown = t;
-      notifyAll();
-    }
-
-    public synchronized Batch getResult() throws IOException {
-      while (batch == null && thrown == null) {
-        try {
-          wait();
-        } catch (InterruptedException e) {
-          throw new RuntimeException("interrupted", e);
-        }
-      }
-      if (thrown != null) {
-        // N.B. setException can only be called with a RuntimeException or an
-        // IOException, so propagateIfPossible should always do the throw.
-        Throwables.propagateIfPossible(thrown, IOException.class);
-        throw new RuntimeException("unexpected", thrown);
-      }
-      return batch;
-    }
-  }
-
-  /**
-   * Maintains a soft reference to an AsyncReadResult.
-   *
-   * <p>This class extends {@link SoftReference} so that when the garbage
-   * collector collects a batch and adds its reference to the cache's reference
-   * queue, that reference can be cast back to {@code RangeReadReference},
-   * allowing us to identify the reference's position in the cache (and to
-   * therefore remove it).
-   */
-  // @VisibleForTesting
-  static final class RangeReadReference extends SoftReference<AsyncReadResult> {
-    private final BatchRange range;
-
-    public RangeReadReference(
-        BatchRange range, AsyncReadResult result,
-        ReferenceQueue<? super AsyncReadResult> refQueue) {
-      super(result, refQueue);
-      this.range = checkNotNull(range);
-    }
-
-    public BatchRange getBatchRange() {
-      return range;
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java
deleted file mode 100644
index f9b3a233717fb..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingShuffleEntryIterator.java
+++ /dev/null
@@ -1,250 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservableIterable;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservableIterator;
-import com.google.cloud.dataflow.sdk.util.common.PeekingReiterator;
-import com.google.cloud.dataflow.sdk.util.common.Reiterable;
-import com.google.cloud.dataflow.sdk.util.common.Reiterator;
-
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.NoSuchElementException;
-
-import javax.annotation.Nullable;
-
-/**
- * An iterator through KeyGroupedShuffleEntries.
- */
-public abstract class GroupingShuffleEntryIterator
-    implements Iterator<KeyGroupedShuffleEntries> {
-  /** The iterator through the underlying shuffle records. */
-  private PeekingReiterator<ShuffleEntry> shuffleIterator;
-
-  /**
-   * The key of the most recent KeyGroupedShuffleEntries returned by
-   * {@link #next}, if any.
-   *
-   * <p>If currentKeyBytes is non-null, then it's the key for the last entry
-   * returned by {@link #next}, and all incoming entries with that key should
-   * be skipped over by this iterator (since this iterator is iterating over
-   * keys, not the individual values associated with a given key).
-   *
-   * <p>If currentKeyBytes is null, and shuffleIterator.hasNext(), then the
-   * key of shuffleIterator.next() is the key of the next
-   * KeyGroupedShuffleEntries to return from {@link #next}.
-   */
-  @Nullable private byte[] currentKeyBytes = null;
-
-  /**
-   * The size of the shuffle entries read so far for the current key
-   * (if currentKeyBytes is non-null), or the previous key (if currentKeyBytes
-   * is null).
-   */
-  private long totalByteSizeOfEntriesForCurrentKey = 0L;
-
-  /**
-   * Counter to increment with the bytes read from the underlying shuffle
-   * iterator, or null if no counting is needed.
-   */
-  @Nullable private Counter<Long> bytesCounter;
-
-  /**
-   * Constructs a GroupingShuffleEntryIterator, given a Reiterator
-   * over ungrouped ShuffleEntries, assuming the ungrouped
-   * ShuffleEntries for a given key are consecutive. The counter given
-   * as argument, if non-null, will be updated with the byte size of the entries
-   * read.
-   */
-  public GroupingShuffleEntryIterator(
-      Reiterator<ShuffleEntry> shuffleIterator, Counter<Long> bytesCounter) {
-    this.shuffleIterator =
-        new PeekingReiterator<>(
-            new ProgressTrackingReiterator<>(
-                shuffleIterator,
-                new ProgressTrackerGroup<ShuffleEntry>() {
-                  @Override
-                  protected void report(ShuffleEntry entry) {
-                    notifyElementRead(entry.length());
-                  }
-                }.start()));
-    this.bytesCounter = bytesCounter;
-  }
-
-  /**
-   * Notifies observers about a new ShuffleEntry (key and value, not
-   * key and iterable of values) read.
-   */
-  protected abstract void notifyElementRead(long byteSize);
-
-  @Override
-  public boolean hasNext() {
-    advanceIteratorToNextKey();
-    return shuffleIterator.hasNext();
-  }
-
-  @Override
-  public KeyGroupedShuffleEntries next() {
-    if (!hasNext()) {
-      throw new NoSuchElementException();
-    }
-    ShuffleEntry entry = shuffleIterator.peek();
-    currentKeyBytes = entry.getKey();
-    return new KeyGroupedShuffleEntries(
-        entry.getPosition(),
-        currentKeyBytes,
-        new ValuesIterable(new ValuesIterator(currentKeyBytes)));
-  }
-
-  @Override
-  public void remove() {
-    throw new UnsupportedOperationException();
-  }
-
-  private void advanceIteratorToNextKey() {
-    if (currentKeyBytes != null) {
-      // We need to advance the iterator to the next key.
-      while (shuffleIterator.hasNext()) {
-        ShuffleEntry entry = shuffleIterator.peek();
-        if (!Arrays.equals(entry.getKey(), currentKeyBytes)) {
-          break;
-        }
-        totalByteSizeOfEntriesForCurrentKey += shuffleIterator.next().length();
-      }
-      currentKeyBytes = null;
-    }
-    // We are now at key boundary.
-    if (bytesCounter != null) {
-      // Commit the size of the currently read key group.
-      bytesCounter.addValue(totalByteSizeOfEntriesForCurrentKey);
-    }
-    totalByteSizeOfEntriesForCurrentKey = 0L;
-  }
-
-  private static class ValuesIterable
-      extends ElementByteSizeObservableIterable<ShuffleEntry, ValuesIterator>
-      implements Reiterable<ShuffleEntry> {
-    private final ValuesIterator base;
-
-    public ValuesIterable(ValuesIterator base) {
-      this.base = checkNotNull(base);
-    }
-
-    @Override
-    public ValuesIterator createIterator() {
-      return base.copy();
-    }
-  }
-
-  /**
-   * Provides the {@link Reiterator} used to iterate through the
-   * shuffle entries of a KeyGroupedShuffleEntries.
-   */
-  private class ValuesIterator
-      extends ElementByteSizeObservableIterator<ShuffleEntry>
-      implements Reiterator<ShuffleEntry> {
-    // N.B. This class is *not* static; it maintains a reference to its
-    // enclosing KeyGroupedShuffleEntriesIterator instance so that it can update
-    // that instance's shuffleIterator as an optimization.
-
-    private final byte[] valueKeyBytes;
-    private final PeekingReiterator<ShuffleEntry> valueShuffleIterator;
-    private final ProgressTracker<ShuffleEntry> tracker;
-    private boolean nextKnownValid = false;
-    private long byteSizeRead = 0L;
-
-    public ValuesIterator(byte[] valueKeyBytes) {
-      this.valueKeyBytes = checkNotNull(valueKeyBytes);
-      this.valueShuffleIterator = shuffleIterator.copy();
-      // N.B. The ProgressTrackerGroup captures the reference to the original
-      // ValuesIterator for a given values iteration, which happens to be
-      // exactly what we want, since this is also the ValuesIterator whose
-      // base Observable has the references to all of the Observers watching
-      // the iteration.  Copied ValuesIterator instances do *not* have these
-      // Observers, but that's fine, since the derived ProgressTracker
-      // instances reference the ProgressTrackerGroup, which references the
-      // original ValuesIterator, which does have them.
-      this.tracker = new ProgressTrackerGroup<ShuffleEntry>() {
-        @Override
-        protected void report(ShuffleEntry entry) {
-          notifyValueReturned(entry.length());
-        }
-      }.start();
-    }
-
-    private ValuesIterator(ValuesIterator it, long byteSizeRead) {
-      this.valueKeyBytes = it.valueKeyBytes;
-      this.valueShuffleIterator = it.valueShuffleIterator.copy();
-      this.tracker = it.tracker.copy();
-      this.nextKnownValid = it.nextKnownValid;
-      this.byteSizeRead = byteSizeRead;
-    }
-
-    @Override
-    public boolean hasNext() {
-      if (nextKnownValid) {
-        return true;
-      }
-      if (!valueShuffleIterator.hasNext()) {
-        return false;
-      }
-      ShuffleEntry entry = valueShuffleIterator.peek();
-      nextKnownValid = Arrays.equals(entry.getKey(), valueKeyBytes);
-
-      // Opportunistically update the parent KeyGroupedShuffleEntriesIterator,
-      // potentially allowing it to skip a large number of key/value pairs
-      // with this key.
-      if (!nextKnownValid && valueKeyBytes == currentKeyBytes) {
-        shuffleIterator = valueShuffleIterator.copy();
-        currentKeyBytes = null;
-        // We update the bytes read size for the key as this is the first
-        // ValuesIterator copy to finish reading the values of the
-        // "parent" GroupingShuffleEntryIterator. Setting currentKeyBytes
-        // to null prevents other copies from also recording their bytes read.
-        totalByteSizeOfEntriesForCurrentKey = byteSizeRead;
-      }
-
-      return nextKnownValid;
-    }
-
-    @Override
-    public ShuffleEntry next() {
-      if (!hasNext()) {
-        throw new NoSuchElementException();
-      }
-      ShuffleEntry entry = valueShuffleIterator.next();
-      byteSizeRead += entry.length();
-      nextKnownValid = false;
-      tracker.saw(entry);
-      return entry;
-    }
-
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public ValuesIterator copy() {
-      return new ValuesIterator(this, byteSizeRead);
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingTable.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingTable.java
deleted file mode 100644
index 30cc4edd7fc9e..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingTable.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-/**
- * An interface that groups inputs to an accumulator and flushes the output.
- */
-public interface GroupingTable<K, InputT, AccumT> {
-
-  /**
-   * Adds a pair to this table, possibly flushing some entries to output
-   * if the table is full.
-   */
-  void put(Object pair, Receiver receiver) throws Exception;
-
-  /**
-   * Flushes all entries in this table to output.
-   */
-  void flush(Receiver output) throws Exception;
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingTables.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingTables.java
deleted file mode 100644
index 18a78b6679a5e..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/GroupingTables.java
+++ /dev/null
@@ -1,522 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import com.google.common.annotations.VisibleForTesting;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-
-/**
- * Static utility methods that provide {@link GroupingTable} implementations.
- */
-public class GroupingTables {
-  /**
-   * Returns a {@link GroupingTable} that groups inputs into a {@link List}.
-   */
-  public static <K, V> GroupingTable<K, V, List<V>> buffering(
-      GroupingKeyCreator<? super K> groupingKeyCreator,
-      PairInfo pairInfo,
-      SizeEstimator<? super K> keySizer,
-      SizeEstimator<? super V> valueSizer) {
-    return new BufferingGroupingTable<>(
-        DEFAULT_MAX_GROUPING_TABLE_BYTES, groupingKeyCreator, pairInfo, keySizer, valueSizer);
-  }
-
-  /**
-   * Returns a {@link GroupingTable} that groups inputs into a {@link List} with
-   * sampling {@link SizeEstimator SizeEstimators}.
-   */
-  public static <K, V> GroupingTable<K, V, List<V>> bufferingAndSampling(
-      GroupingKeyCreator<? super K> groupingKeyCreator,
-      PairInfo pairInfo,
-      SizeEstimator<? super K> keySizer,
-      SizeEstimator<? super V> valueSizer,
-      double sizeEstimatorSampleRate) {
-    return new BufferingGroupingTable<>(
-        DEFAULT_MAX_GROUPING_TABLE_BYTES, groupingKeyCreator, pairInfo,
-        new SamplingSizeEstimator<>(keySizer, sizeEstimatorSampleRate, 1.0),
-        new SamplingSizeEstimator<>(valueSizer, sizeEstimatorSampleRate, 1.0));
-  }
-
-  /**
-   * Returns a {@link GroupingTable} that combines inputs into a accumulator.
-   */
-  public static <K, InputT, AccumT> GroupingTable<K, InputT, AccumT> combining(
-      GroupingKeyCreator<? super K> groupingKeyCreator,
-      PairInfo pairInfo,
-      Combiner<? super K, InputT, AccumT, ?> combineFn,
-      SizeEstimator<? super K> keySizer,
-      SizeEstimator<? super AccumT> accumulatorSizer) {
-    return new CombiningGroupingTable<>(
-        DEFAULT_MAX_GROUPING_TABLE_BYTES, groupingKeyCreator, pairInfo,
-        combineFn, keySizer, accumulatorSizer);
-  }
-
-  /**
-   * Returns a {@link GroupingTable} that combines inputs into a accumulator with
-   * sampling {@link SizeEstimator SizeEstimators}.
-   */
-  public static <K, InputT, AccumT> GroupingTable<K, InputT, AccumT> combiningAndSampling(
-      GroupingKeyCreator<? super K> groupingKeyCreator,
-      PairInfo pairInfo,
-      Combiner<? super K, InputT, AccumT, ?> combineFn,
-      SizeEstimator<? super K> keySizer,
-      SizeEstimator<? super AccumT> accumulatorSizer,
-      double sizeEstimatorSampleRate) {
-    return new CombiningGroupingTable<>(
-        DEFAULT_MAX_GROUPING_TABLE_BYTES, groupingKeyCreator, pairInfo, combineFn,
-        new SamplingSizeEstimator<>(keySizer, sizeEstimatorSampleRate, 1.0),
-        new SamplingSizeEstimator<>(accumulatorSizer, sizeEstimatorSampleRate, 1.0));
-  }
-
-  /**
-   * Provides client-specific operations for grouping keys.
-   */
-  public static interface GroupingKeyCreator<K> {
-    public Object createGroupingKey(K key) throws Exception;
-  }
-
-  /**
-   * Provides client-specific operations for size estimates.
-   */
-  public static interface SizeEstimator<T> {
-    public long estimateSize(T element) throws Exception;
-  }
-
-  /**
-   * Provides client-specific operations for working with elements
-   * that are key/value or key/values pairs.
-   */
-  public interface PairInfo {
-    public Iterable<Object> getKeysFromInputPair(Object pair);
-    public Object getValueFromInputPair(Object pair);
-    public Object makeOutputPair(Object key, Object value);
-  }
-
-  /**
-   * Provides client-specific operations for combining values.
-   */
-  public interface Combiner<K, InputT, AccumT, OutputT> {
-    public AccumT createAccumulator(K key);
-    public AccumT add(K key, AccumT accumulator, InputT value);
-    public AccumT merge(K key, Iterable<AccumT> accumulators);
-    public AccumT compact(K key, AccumT accumulator);
-    public OutputT extract(K key, AccumT accumulator);
-  }
-
-  // By default, how many bytes we allow the grouping table to consume before
-  // it has to be flushed.
-  static final long DEFAULT_MAX_GROUPING_TABLE_BYTES = 100_000_000L;
-
-  // How many bytes a word in the JVM has.
-  static final int BYTES_PER_JVM_WORD = getBytesPerJvmWord();
-  /**
-   * The number of bytes of overhead to store an entry in the
-   * grouping table (a {@code HashMap<StructuralByteArray, KeyAndValues>}),
-   * ignoring the actual number of bytes in the keys and values:
-   * <ul>
-   * <li> an array element (1 word),
-   * <li> a HashMap.Entry (4 words),
-   * <li> a StructuralByteArray (1 words),
-   * <li> a backing array (guessed at 1 word for the length),
-   * <li> a KeyAndValues (2 words),
-   * <li> an ArrayList (2 words),
-   * <li> a backing array (1 word),
-   * <li> per-object overhead (JVM-specific, guessed at 2 words * 6 objects).
-   * </ul>
-   */
-  static final int PER_KEY_OVERHEAD = 24 * BYTES_PER_JVM_WORD;
-
-  /**
-   * A base class of {@link GroupingTable} that provides the implementation of
-   * {@link #put} and {@link #flush}.
-   *
-   * <p>Subclasses override {@link #createTableEntry}.
-   */
-  @VisibleForTesting
-  public abstract static class GroupingTableBase<K, InputT, AccumT>
-      implements GroupingTable<K, InputT, AccumT>{
-    // Keep the table relatively full to increase the chance of collisions.
-    private static final double TARGET_LOAD = 0.9;
-
-    private long maxSize;
-    private final GroupingKeyCreator<? super K> groupingKeyCreator;
-    private final PairInfo pairInfo;
-
-    private long size = 0;
-    private Map<Object, GroupingTableEntry<K, InputT, AccumT>> table;
-
-    private GroupingTableBase(long maxSize,
-                            GroupingKeyCreator<? super K> groupingKeyCreator,
-                            PairInfo pairInfo) {
-      this.maxSize = maxSize;
-      this.groupingKeyCreator = groupingKeyCreator;
-      this.pairInfo = pairInfo;
-      this.table = new HashMap<>();
-    }
-
-    interface GroupingTableEntry<K, InputT, AccumT> {
-      public K getKey();
-      public AccumT getValue();
-      public void add(InputT value) throws Exception;
-      public long getSize();
-      public void compact() throws Exception;
-    }
-
-    public abstract GroupingTableEntry<K, InputT, AccumT> createTableEntry(K key) throws Exception;
-
-    /**
-     * Adds a pair to this table, possibly flushing some entries to output
-     * if the table is full.
-     */
-    @SuppressWarnings("unchecked")
-    @Override
-    public void put(Object pair, Receiver receiver) throws Exception {
-      for (Object key : pairInfo.getKeysFromInputPair(pair)) {
-        put((K) key,
-            (InputT) pairInfo.getValueFromInputPair(pair),
-            receiver);
-      }
-    }
-
-    /**
-     * Adds the key and value to this table, possibly flushing some entries
-     * to output if the table is full.
-     */
-    public void put(K key, InputT value, Receiver receiver) throws Exception {
-      Object groupingKey = groupingKeyCreator.createGroupingKey(key);
-      GroupingTableEntry<K, InputT, AccumT> entry = table.get(groupingKey);
-      if (entry == null) {
-        entry = createTableEntry(key);
-        table.put(groupingKey, entry);
-        size += PER_KEY_OVERHEAD;
-      } else {
-        size -= entry.getSize();
-      }
-      entry.add(value);
-      size += entry.getSize();
-
-      if (size >= maxSize) {
-        long targetSize = (long) (TARGET_LOAD * maxSize);
-        Iterator<GroupingTableEntry<K, InputT, AccumT>> entries =
-            table.values().iterator();
-        while (size >= targetSize) {
-          if (!entries.hasNext()) {
-            // Should never happen, but sizes may be estimates...
-            size = 0;
-            break;
-          }
-          GroupingTableEntry<K, InputT, AccumT> toFlush = entries.next();
-          entries.remove();
-          size -= toFlush.getSize() + PER_KEY_OVERHEAD;
-          output(toFlush, receiver);
-        }
-      }
-    }
-
-    /**
-     * Output the given entry. Does not actually remove it from the table or
-     * update this table's size.
-     */
-    private void output(GroupingTableEntry<K, InputT, AccumT> entry, Receiver receiver)
-        throws Exception {
-      entry.compact();
-      receiver.process(pairInfo.makeOutputPair(entry.getKey(), entry.getValue()));
-    }
-
-    /**
-     * Flushes all entries in this table to output.
-     */
-    @Override
-    public void flush(Receiver output) throws Exception {
-      for (GroupingTableEntry<K, InputT, AccumT> entry : table.values()) {
-        output(entry, output);
-      }
-      table.clear();
-      size = 0;
-    }
-
-    @VisibleForTesting
-    public void setMaxSize(long maxSize) {
-      this.maxSize = maxSize;
-    }
-
-    @VisibleForTesting
-    public long size() {
-      return size;
-    }
-  }
-
-  /**
-   * A grouping table that simply buffers all inserted values in a list.
-   */
-  private static class BufferingGroupingTable<K, V> extends GroupingTableBase<K, V, List<V>> {
-
-    public final SizeEstimator<? super K> keySizer;
-    public final SizeEstimator<? super V> valueSizer;
-
-    private BufferingGroupingTable(long maxSize,
-                                  GroupingKeyCreator<? super K> groupingKeyCreator,
-                                  PairInfo pairInfo,
-                                  SizeEstimator<? super K> keySizer,
-                                  SizeEstimator<? super V> valueSizer) {
-      super(maxSize, groupingKeyCreator, pairInfo);
-      this.keySizer = keySizer;
-      this.valueSizer = valueSizer;
-    }
-
-    @Override
-    public GroupingTableEntry<K, V, List<V>> createTableEntry(final K key) throws Exception {
-      return new GroupingTableEntry<K, V, List<V>>() {
-        long size = keySizer.estimateSize(key);
-        final List<V> values = new ArrayList<>();
-
-        @Override
-        public K getKey() {
-          return key;
-        }
-
-        @Override
-        public List<V> getValue() {
-          return values;
-        }
-
-        @Override
-        public long getSize() {
-          return size;
-        }
-
-        @Override
-        public void compact() { }
-
-        @Override
-        public void add(V value) throws Exception {
-          values.add(value);
-          size += BYTES_PER_JVM_WORD + valueSizer.estimateSize(value);
-        }
-      };
-    }
-  }
-
-  /**
-   * A grouping table that uses the given combiner to combine values in place.
-   */
-  private static class CombiningGroupingTable<K, InputT, AccumT>
-      extends GroupingTableBase<K, InputT, AccumT> {
-
-    private final Combiner<? super K, InputT, AccumT, ?> combiner;
-    private final SizeEstimator<? super K> keySizer;
-    private final SizeEstimator<? super AccumT> accumulatorSizer;
-
-    private CombiningGroupingTable(long maxSize,
-                                  GroupingKeyCreator<? super K> groupingKeyCreator,
-                                  PairInfo pairInfo,
-                                  Combiner<? super K, InputT, AccumT, ?> combineFn,
-                                  SizeEstimator<? super K> keySizer,
-                                  SizeEstimator<? super AccumT> accumulatorSizer) {
-      super(maxSize, groupingKeyCreator, pairInfo);
-      this.combiner =  combineFn;
-      this.keySizer = keySizer;
-      this.accumulatorSizer = accumulatorSizer;
-    }
-
-    @Override
-    public GroupingTableEntry<K, InputT, AccumT> createTableEntry(final K key) throws Exception {
-      return new GroupingTableEntry<K, InputT, AccumT>() {
-        final long keySize = keySizer.estimateSize(key);
-        AccumT accumulator = combiner.createAccumulator(key);
-        long accumulatorSize = 0; // never used before a value is added...
-
-        @Override
-        public K getKey() {
-          return key;
-        }
-
-        @Override
-        public AccumT getValue() {
-          return accumulator;
-        }
-
-        @Override
-        public long getSize() {
-          return keySize + accumulatorSize;
-        }
-
-        @Override
-        public void compact() throws Exception {
-          AccumT newAccumulator = combiner.compact(key, accumulator);
-          if (newAccumulator != accumulator) {
-            accumulator = newAccumulator;
-            accumulatorSize = accumulatorSizer.estimateSize(newAccumulator);
-          }
-        }
-
-        @Override
-        public void add(InputT value) throws Exception {
-          accumulator = combiner.add(key, accumulator, value);
-          accumulatorSize = accumulatorSizer.estimateSize(accumulator);
-        }
-      };
-    }
-  }
-
-  /**
-   * Returns the number of bytes in a JVM word.  In case we failed to
-   * find the answer, returns 8.
-   */
-  private static int getBytesPerJvmWord() {
-    String wordSizeInBits = System.getProperty("sun.arch.data.model");
-    try {
-      return Integer.parseInt(wordSizeInBits) / 8;
-    } catch (NumberFormatException e) {
-      // The JVM word size is unknown.  Assume 64-bit.
-      return 8;
-    }
-  }
-
-  ////////////////////////////////////////////////////////////////////////////
-  // Size sampling.
-
-  /**
-   * Implements size estimation by adaptively delegating to an underlying
-   * (potentially more expensive) estimator for some elements and returning
-   * the average value for others.
-   */
-  @VisibleForTesting
-  static class SamplingSizeEstimator<T> implements SizeEstimator<T> {
-
-    /**
-     * The degree of confidence required in our expected value predictions
-     * before we allow under-sampling.
-     *
-     * <p>The value of 3.0 is a confidence interval of about 99.7% for a
-     * a high-degree-of-freedom t-distribution.
-     */
-    public static final double CONFIDENCE_INTERVAL_SIGMA = 3;
-
-    /**
-     * The desired size of our confidence interval (relative to the measured
-     * expected value).
-     *
-     * <p>The value of 0.25 is plus or minus 25%.
-     */
-    public static final double CONFIDENCE_INTERVAL_SIZE = 0.25;
-
-    /**
-     * Default number of elements that must be measured before elements are skipped.
-     */
-    public static final long DEFAULT_MIN_SAMPLED = 20;
-
-    private final SizeEstimator<T> underlying;
-    private final double minSampleRate;
-    private final double maxSampleRate;
-    private final long minSampled;
-    private final Random random;
-
-    private long totalElements = 0;
-    private long sampledElements = 0;
-    private long sampledSum = 0;
-    private double sampledSumSquares = 0;
-    private long estimate;
-
-    private long nextSample = 0;
-
-    private SamplingSizeEstimator(
-        SizeEstimator<T> underlying,
-        double minSampleRate,
-        double maxSampleRate) {
-      this(underlying, minSampleRate, maxSampleRate, DEFAULT_MIN_SAMPLED, new Random());
-    }
-
-    @VisibleForTesting
-    SamplingSizeEstimator(SizeEstimator<T> underlying,
-                                 double minSampleRate,
-                                 double maxSampleRate,
-                                 long minSampled,
-                                 Random random) {
-      this.underlying = underlying;
-      this.minSampleRate = minSampleRate;
-      this.maxSampleRate = maxSampleRate;
-      this.minSampled = minSampled;
-      this.random = random;
-    }
-
-    @Override
-    public long estimateSize(T element) throws Exception {
-      if (sampleNow()) {
-        return recordSample(underlying.estimateSize(element));
-      } else {
-        return estimate;
-      }
-    }
-
-    private boolean sampleNow() {
-      totalElements++;
-      return --nextSample < 0;
-    }
-
-    private long recordSample(long value) {
-      sampledElements += 1;
-      sampledSum += value;
-      sampledSumSquares += value * value;
-      estimate = (long) Math.ceil(sampledSum / sampledElements);
-      long target = desiredSampleSize();
-      if (sampledElements < minSampled || sampledElements < target) {
-        // Sample immediately.
-        nextSample = 0;
-      } else {
-        double rate = cap(
-            minSampleRate,
-            maxSampleRate,
-            Math.max(1.0 / (totalElements - minSampled + 1), // slowly ramp down
-                     target / (double) totalElements));      // "future" target
-        // Uses the geometric distribution to return the likely distance between
-        // successive independent trials of a fixed probability p. This gives the
-        // same uniform distribution of branching on Math.random() < p, but with
-        // one random number generation per success rather than one
-        // per test, which can be a significant savings if p is small.
-        nextSample = rate == 1.0
-            ? 0
-            : (long) Math.floor(Math.log(random.nextDouble()) / Math.log(1 - rate));
-      }
-      return value;
-    }
-
-    private static final double cap(double min, double max, double value) {
-      return Math.min(max, Math.max(min, value));
-    }
-
-    private long desiredSampleSize() {
-      // We have no a-priori information on the actual distribution of data
-      // sizes, so compute our desired sample as if it were normal.
-      // Yes this formula is unstable for small stddev, but we only care about large stddev.
-      double mean = sampledSum / (double) sampledElements;
-      double sumSquareDiff =
-          (sampledSumSquares - (2 * mean * sampledSum) + (sampledElements * mean * mean));
-      double stddev = Math.sqrt(sumSquareDiff / (sampledElements - 1));
-      double sqrtDesiredSamples =
-          (CONFIDENCE_INTERVAL_SIGMA * stddev) / (CONFIDENCE_INTERVAL_SIZE * mean);
-      return (long) Math.ceil(sqrtDesiredSamples * sqrtDesiredSamples);
-    }
-  }
-}
-
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/KeyGroupedShuffleEntries.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/KeyGroupedShuffleEntries.java
deleted file mode 100644
index 17177a25166f9..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/KeyGroupedShuffleEntries.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.Reiterable;
-
-/**
- * A collection of ShuffleEntries, all with the same key.
- */
-public class KeyGroupedShuffleEntries {
-  public final byte[] position;
-  public final byte[] key;
-  public final Reiterable<ShuffleEntry> values;
-
-  public KeyGroupedShuffleEntries(byte[] position, byte[] key,
-                                  Reiterable<ShuffleEntry> values) {
-    this.position = position;
-    this.key = key;
-    this.values = values;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleBatchReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleBatchReader.java
deleted file mode 100644
index 00e575adb328b..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleBatchReader.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import java.io.IOException;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-/**
- * ShuffleBatchReader provides an interface for reading a batch of
- * key/value entries from a shuffle dataset.
- */
-public interface ShuffleBatchReader {
-  /** The result returned by #read. */
-  public static class Batch {
-    public final List<ShuffleEntry> entries;
-    @Nullable public final ShufflePosition nextStartPosition;
-
-    public Batch(List<ShuffleEntry> entries,
-                 @Nullable ShufflePosition nextStartPosition) {
-      this.entries = entries;
-      this.nextStartPosition = nextStartPosition;
-    }
-  }
-
-  /**
-   * Reads a batch of data from a shuffle dataset.
-   *
-   * @param startPosition encodes the initial key from where to read.
-   * This parameter may be null, indicating that the read should start
-   * with the first key in the dataset.
-   *
-   * @param endPosition encodes the key "just past" the end of the
-   * range to be read; keys up to endPosition will be returned, but
-   * keys equal to or greater than endPosition will not.  This
-   * parameter may be null, indicating that the read should end just
-   * past the last key in the dataset (that is, the last key in the
-   * dataset will be included in the read, as long as that key is
-   * greater than or equal to startPosition).
-   *
-   * @return the first {@link Batch} of entries
-   */
-  public Batch read(@Nullable ShufflePosition startPosition,
-                    @Nullable ShufflePosition endPosition)
-      throws IOException;
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntry.java
deleted file mode 100644
index c5f37c6e48c05..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntry.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import java.util.Arrays;
-
-/**
- * Entry written to/read from a shuffle dataset.
- */
-public class ShuffleEntry {
-  final byte[] position;
-  final byte[] key;
-  final byte[] secondaryKey;
-  final byte[] value;
-
-  public ShuffleEntry(byte[] key, byte[] secondaryKey, byte[] value) {
-    this.position = null;
-    this.key = key;
-    this.secondaryKey = secondaryKey;
-    this.value = value;
-  }
-
-  public ShuffleEntry(byte[] position, byte[] key, byte[] secondaryKey,
-      byte[] value) {
-    this.position = position;
-    this.key = key;
-    this.secondaryKey = secondaryKey;
-    this.value = value;
-  }
-
-  public byte[] getPosition() {
-    return position;
-  }
-
-  public byte[] getKey() {
-    return key;
-  }
-
-  public byte[] getSecondaryKey() {
-    return secondaryKey;
-  }
-
-  public byte[] getValue() {
-    return value;
-  }
-
-  /**
-   * Returns the size of this entry in bytes, excluding {@code position}.
-   */
-  public int length() {
-    return (key == null ? 0 : key.length)
-        + (secondaryKey == null ? 0 : secondaryKey.length)
-        + (value == null ? 0 : value.length);
-  }
-
-  @Override
-  public String toString() {
-    return "ShuffleEntry("
-        + byteArrayToString(position) + ","
-        + byteArrayToString(key) + ","
-        + byteArrayToString(secondaryKey) + ","
-        + byteArrayToString(value) + ")";
-  }
-
-  public static String byteArrayToString(byte[] bytes) {
-    // TODO: Use a more compact and readable representation,
-    // particularly for (nearly-)ascii keys and values.
-    return Arrays.toString(bytes);
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (this == o) {
-      return true;
-    }
-    if (o instanceof ShuffleEntry) {
-      ShuffleEntry that = (ShuffleEntry) o;
-      return (this.position == null ? that.position == null
-              : Arrays.equals(this.position, that.position))
-          && (this.key == null ? that.key == null
-              : Arrays.equals(this.key, that.key))
-          && (this.secondaryKey == null ? that.secondaryKey == null
-              : Arrays.equals(this.secondaryKey, that.secondaryKey))
-          && (this.value == null ? that.value == null
-              : Arrays.equals(this.value, that.value));
-    }
-    return false;
-  }
-
-  @Override
-  public int hashCode() {
-    return getClass().hashCode()
-        + (position == null ? 0 : Arrays.hashCode(position))
-        + (key == null ? 0 : Arrays.hashCode(key))
-        + (secondaryKey == null ? 0 : Arrays.hashCode(secondaryKey))
-        + (value == null ? 0 : Arrays.hashCode(value));
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryReader.java
deleted file mode 100644
index 3fc75791d1b4b..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShuffleEntryReader.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.Reiterator;
-
-import javax.annotation.Nullable;
-import javax.annotation.concurrent.NotThreadSafe;
-
-/**
- * ShuffleEntryReader provides an interface for reading key/value
- * entries from a shuffle dataset.
- */
-@NotThreadSafe
-public interface ShuffleEntryReader {
-  /**
-   * Returns an iterator that reads a range of entries from a shuffle dataset.
-   *
-   * @param startPosition encodes the initial key from where to read.
-   * This parameter may be null, indicating that the read should start
-   * with the first key in the dataset.
-   *
-   * @param endPosition encodes the key "just past" the end of the
-   * range to be read; keys up to endPosition will be returned, but
-   * keys equal to or greater than endPosition will not.  This
-   * parameter may be null, indicating that the read should end just
-   * past the last key in the dataset (that is, the last key in the
-   * dataset will be included in the read, as long as that key is
-   * greater than or equal to startPosition).
-   *
-   * @return a {@link Reiterator} over the requested range of entries.
-   */
-  public Reiterator<ShuffleEntry> read(
-      @Nullable ShufflePosition startPosition,
-      @Nullable ShufflePosition endPosition);
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShufflePosition.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShufflePosition.java
deleted file mode 100644
index b3a7d5b59892c..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ShufflePosition.java
+++ /dev/null
@@ -1,23 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-/**
- * Represents a position in a stream of ShuffleEntries.
- */
-public interface ShufflePosition {
-}

From 478b1de33c58d45ad4631a54b776c34830ff5fd6 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Wed, 10 Feb 2016 00:48:15 -0800
Subject: [PATCH 1422/1541] Add a DoFnRunner for StreamingGroupAlsoByWindows
 with side inputs

Refactor streaming side inputs code into StreamingSideInputFetcher.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114300609
---
 .../sdk/util/ComposedKeyedWorkItem.java       |  56 ------
 .../cloud/dataflow/sdk/util/DoFnRunners.java  |  16 +-
 .../dataflow/sdk/util/KeyedWorkItems.java     |  93 +++++++++
 .../sdk/util/LateDataDroppingDoFnRunner.java  | 180 ++++++++++--------
 .../dataflow/sdk/util/TimerInternals.java     |  73 +++++++
 .../util/LateDataDroppingDoFnRunnerTest.java  | 115 +++++++++++
 .../dataflow/sdk/util/TimerInternalsTest.java |  52 +++++
 7 files changed, 440 insertions(+), 145 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ComposedKeyedWorkItem.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItems.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/LateDataDroppingDoFnRunnerTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TimerInternalsTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ComposedKeyedWorkItem.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ComposedKeyedWorkItem.java
deleted file mode 100644
index 25dd6a7dd08a1..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ComposedKeyedWorkItem.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-
-/**
- * A {@link KeyedWorkItem} composed of an underlying key, {@link TimerData} iterable, and element
- * iterable.
- */
-public class ComposedKeyedWorkItem<K, ElemT> implements KeyedWorkItem<K, ElemT> {
-
-  private final K key;
-  private final Iterable<TimerData> timers;
-  private final Iterable<WindowedValue<ElemT>> elements;
-
-  public static <K, ElemT> ComposedKeyedWorkItem<K, ElemT> create(
-      K key, Iterable<TimerData> timers, Iterable<WindowedValue<ElemT>> elements) {
-    return new ComposedKeyedWorkItem<K, ElemT>(key, timers, elements);
-  }
-
-  private ComposedKeyedWorkItem(
-      K key, Iterable<TimerData> timers, Iterable<WindowedValue<ElemT>> elements) {
-    this.key = key;
-    this.timers = timers;
-    this.elements = elements;
-  }
-
-  @Override
-  public K key() {
-    return key;
-  }
-
-  @Override
-  public Iterable<TimerData> timersIterable() {
-    return timers;
-  }
-
-  @Override
-  public Iterable<WindowedValue<ElemT>> elementsIterable() {
-    return elements;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunners.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunners.java
index d78b45a09abe0..64a0968e0fb28 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunners.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunners.java
@@ -46,7 +46,7 @@ public interface OutputManager {
    *
    * <p>It invokes {@link DoFn#processElement} for each input.
    */
-  static <InputT, OutputT> DoFnRunner<InputT, OutputT> simpleRunner(
+  public static <InputT, OutputT> DoFnRunner<InputT, OutputT> simpleRunner(
       PipelineOptions options,
       DoFn<InputT, OutputT> fn,
       SideInputReader sideInputReader,
@@ -73,7 +73,7 @@ static <InputT, OutputT> DoFnRunner<InputT, OutputT> simpleRunner(
    *
    * <p>It drops elements from expired windows before they reach the underlying {@link DoFn}.
    */
-  static <K, InputT, OutputT, W extends BoundedWindow>
+  public static <K, InputT, OutputT, W extends BoundedWindow>
       DoFnRunner<KeyedWorkItem<K, InputT>, KV<K, OutputT>> lateDataDroppingRunner(
           PipelineOptions options,
           ReduceFnExecutor<K, InputT, OutputT, W> reduceFnExecutor,
@@ -84,10 +84,10 @@ DoFnRunner<KeyedWorkItem<K, InputT>, KV<K, OutputT>> lateDataDroppingRunner(
           StepContext stepContext,
           CounterSet.AddCounterMutator addCounterMutator,
           WindowingStrategy<?, W> windowingStrategy) {
-    LateDataDroppingDoFnRunner<K, InputT, OutputT, W> runner =
-        new LateDataDroppingDoFnRunner<>(
+    DoFnRunner<KeyedWorkItem<K, InputT>, KV<K, OutputT>> simpleDoFnRunner =
+        simpleRunner(
             options,
-            reduceFnExecutor,
+            reduceFnExecutor.asDoFn(),
             sideInputReader,
             outputManager,
             mainOutputTag,
@@ -95,7 +95,11 @@ DoFnRunner<KeyedWorkItem<K, InputT>, KV<K, OutputT>> lateDataDroppingRunner(
             stepContext,
             addCounterMutator,
             windowingStrategy);
-    return runner;
+    return new LateDataDroppingDoFnRunner<>(
+        simpleDoFnRunner,
+        windowingStrategy,
+        stepContext.timerInternals(),
+        reduceFnExecutor.getDroppedDueToLatenessAggregator());
   }
 
   public static <InputT, OutputT> DoFnRunner<InputT, OutputT> createDefault(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItems.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItems.java
new file mode 100644
index 0000000000000..b94397c2221b4
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItems.java
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
+
+import java.util.Collections;
+
+/**
+ * Static utility methods that provide {@link KeyedWorkItem} implementations.
+ */
+public class KeyedWorkItems {
+  /**
+   * Returns an implementation of {@link KeyedWorkItem} that wraps around an elements iterable.
+   *
+   * @param <K> the key type
+   * @param <ElemT> the element type
+   */
+  public static <K, ElemT> KeyedWorkItem<K, ElemT> elementsWorkItem(
+      K key, Iterable<WindowedValue<ElemT>> elementsIterable) {
+    return new ComposedKeyedWorkItem<>(key, Collections.<TimerData>emptyList(), elementsIterable);
+  }
+
+  /**
+   * Returns an implementation of {@link KeyedWorkItem} that wraps around an timers iterable.
+   *
+   * @param <K> the key type
+   * @param <ElemT> the element type
+   */
+  public static <K, ElemT> KeyedWorkItem<K, ElemT> timersWorkItem(
+      K key, Iterable<TimerData> timersIterable) {
+    return new ComposedKeyedWorkItem<>(
+        key, timersIterable, Collections.<WindowedValue<ElemT>>emptyList());
+  }
+
+  /**
+   * Returns an implementation of {@link KeyedWorkItem} that wraps around
+   * an timers iterable and an elements iterable.
+   *
+   * @param <K> the key type
+   * @param <ElemT> the element type
+   */
+  public static <K, ElemT> KeyedWorkItem<K, ElemT> workItem(
+      K key, Iterable<TimerData> timersIterable, Iterable<WindowedValue<ElemT>> elementsIterable) {
+    return new ComposedKeyedWorkItem<>(key, timersIterable, elementsIterable);
+  }
+
+  /**
+   * A {@link KeyedWorkItem} composed of an underlying key, {@link TimerData} iterable, and element
+   * iterable.
+   */
+  public static class ComposedKeyedWorkItem<K, ElemT> implements KeyedWorkItem<K, ElemT> {
+
+    private final K key;
+    private final Iterable<TimerData> timers;
+    private final Iterable<WindowedValue<ElemT>> elements;
+
+    private ComposedKeyedWorkItem(
+        K key, Iterable<TimerData> timers, Iterable<WindowedValue<ElemT>> elements) {
+      this.key = key;
+      this.timers = timers;
+      this.elements = elements;
+    }
+
+    @Override
+    public K key() {
+      return key;
+    }
+
+    @Override
+    public Iterable<TimerData> timersIterable() {
+      return timers;
+    }
+
+    @Override
+    public Iterable<WindowedValue<ElemT>> elementsIterable() {
+      return elements;
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/LateDataDroppingDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/LateDataDroppingDoFnRunner.java
index e1f13d539d31b..31927ab8823ba 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/LateDataDroppingDoFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/LateDataDroppingDoFnRunner.java
@@ -15,117 +15,131 @@
  */
 package com.google.cloud.dataflow.sdk.util;
 
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Function;
 import com.google.common.base.Predicate;
 import com.google.common.collect.Iterables;
 
 import org.joda.time.Instant;
 
-import java.util.List;
-
 /**
- * A customized {@link DoFnRunner} that handles late data dropping.
+ * A customized {@link DoFnRunner} that handles late data dropping for
+ * a {@link KeyedWorkItem} input {@link DoFn}.
  *
  * <p>It expands windows before checking data lateness.
  *
+ * <p>{@link KeyedWorkItem KeyedWorkItems} are always in empty windows.
+ *
  * @param <K> key type
  * @param <InputT> input value element type
  * @param <OutputT> output value element type
  * @param <W> window type
  */
 public class LateDataDroppingDoFnRunner<K, InputT, OutputT, W extends BoundedWindow>
-    extends DoFnRunnerBase<KeyedWorkItem<K, InputT>, KV<K, OutputT>> {
-  private final WindowingStrategy<?, W> windowingStrategy;
-  private final TimerInternals timerInternals;
-  private final Aggregator<Long, Long> droppedDueToLateness;
+    implements DoFnRunner<KeyedWorkItem<K, InputT>, KV<K, OutputT>> {
+  private final DoFnRunner<KeyedWorkItem<K, InputT>, KV<K, OutputT>> doFnRunner;
+  private final LateDataFilter lateDataFilter;
 
   public LateDataDroppingDoFnRunner(
-      PipelineOptions options,
-      ReduceFnExecutor<K, InputT, OutputT, W> reduceFnExecutor,
-      SideInputReader sideInputReader,
-      OutputManager outputManager,
-      TupleTag<KV<K, OutputT>> mainOutputTag,
-      List<TupleTag<?>> sideOutputTags,
-      StepContext stepContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      WindowingStrategy<?, W> windowingStrategy) {
-    super(
-        options,
-        reduceFnExecutor.asDoFn(),
-        sideInputReader,
-        outputManager,
-        mainOutputTag,
-        sideOutputTags,
-        stepContext,
-        addCounterMutator,
-        windowingStrategy);
-    this.windowingStrategy = windowingStrategy;
-    this.timerInternals = stepContext.timerInternals();
+      DoFnRunner<KeyedWorkItem<K, InputT>, KV<K, OutputT>> doFnRunner,
+      WindowingStrategy<?, ?> windowingStrategy,
+      TimerInternals timerInternals,
+      Aggregator<Long, Long> droppedDueToLateness) {
+    this.doFnRunner = doFnRunner;
+    lateDataFilter = new LateDataFilter(windowingStrategy, timerInternals, droppedDueToLateness);
+  }
+
+  @Override
+  public void startBundle() {
+    doFnRunner.startBundle();
+  }
 
-    droppedDueToLateness = reduceFnExecutor.getDroppedDueToLatenessAggregator();
+  @Override
+  public void processElement(WindowedValue<KeyedWorkItem<K, InputT>> elem) {
+    Iterable<WindowedValue<InputT>> nonLateElements = lateDataFilter.filter(
+        elem.getValue().key(), elem.getValue().elementsIterable());
+    KeyedWorkItem<K, InputT> keyedWorkItem = KeyedWorkItems.workItem(
+        elem.getValue().key(), elem.getValue().timersIterable(), nonLateElements);
+    doFnRunner.processElement(elem.withValue(keyedWorkItem));
   }
 
   @Override
-  public void invokeProcessElement(WindowedValue<KeyedWorkItem<K, InputT>> elem) {
-    final K key = elem.getValue().key();
-    Iterable<Iterable<WindowedValue<InputT>>> elements = Iterables.transform(
-        elem.getValue().elementsIterable(),
-        new Function<WindowedValue<InputT>, Iterable<WindowedValue<InputT>>>() {
-          @Override
-          public Iterable<WindowedValue<InputT>> apply(final WindowedValue<InputT> input) {
-            return Iterables.transform(
-                input.getWindows(),
-                new Function<BoundedWindow, WindowedValue<InputT>>() {
-                  @Override
-                  public WindowedValue<InputT> apply(BoundedWindow window) {
-                    return WindowedValue.of(
-                        input.getValue(), input.getTimestamp(), window, input.getPane());
-                  }
-                });
-          }});
+  public void finishBundle() {
+    doFnRunner.finishBundle();
+  }
+
+  /**
+   * It filters late data in a {@link KeyedWorkItem}.
+   */
+  @VisibleForTesting
+  static class LateDataFilter {
+    private final WindowingStrategy<?, ?> windowingStrategy;
+    private final TimerInternals timerInternals;
+    private final Aggregator<Long, Long> droppedDueToLateness;
+
+    public LateDataFilter(
+        WindowingStrategy<?, ?> windowingStrategy,
+        TimerInternals timerInternals,
+        Aggregator<Long, Long> droppedDueToLateness) {
+      this.windowingStrategy = windowingStrategy;
+      this.timerInternals = timerInternals;
+      this.droppedDueToLateness = droppedDueToLateness;
+    }
 
-    Iterable<WindowedValue<InputT>> nonLateElements = Iterables.filter(
-        Iterables.concat(elements),
-        new Predicate<WindowedValue<InputT>>() {
-          @Override
-          public boolean apply(WindowedValue<InputT> input) {
-            BoundedWindow window = Iterables.getOnlyElement(input.getWindows());
-            if (canDropDueToExpiredWindow(window)) {
-              // The element is too late for this window.
-              droppedDueToLateness.addValue(1L);
-              WindowTracing.debug(
-                  "ReduceFnRunner.processElement: Dropping element at {} for key:{}; window:{} "
-                  + "since too far behind inputWatermark:{}; outputWatermark:{}",
-                  input.getTimestamp(), key, window, timerInternals.currentInputWatermarkTime(),
-                  timerInternals.currentOutputWatermarkTime());
-              return false;
-            } else {
-              return true;
+    /**
+     * Returns an {@code Iterable<WindowedValue<InputT>>} that only contains
+     * non-late input elements.
+     */
+    public <K, InputT> Iterable<WindowedValue<InputT>> filter(
+        final K key, Iterable<WindowedValue<InputT>> elements) {
+      Iterable<Iterable<WindowedValue<InputT>>> windowsExpandedElements = Iterables.transform(
+          elements,
+          new Function<WindowedValue<InputT>, Iterable<WindowedValue<InputT>>>() {
+            @Override
+            public Iterable<WindowedValue<InputT>> apply(final WindowedValue<InputT> input) {
+              return Iterables.transform(
+                  input.getWindows(),
+                  new Function<BoundedWindow, WindowedValue<InputT>>() {
+                    @Override
+                    public WindowedValue<InputT> apply(BoundedWindow window) {
+                      return WindowedValue.of(
+                          input.getValue(), input.getTimestamp(), window, input.getPane());
+                    }
+                  });
+            }});
+
+      Iterable<WindowedValue<InputT>> nonLateElements = Iterables.filter(
+          Iterables.concat(windowsExpandedElements),
+          new Predicate<WindowedValue<InputT>>() {
+            @Override
+            public boolean apply(WindowedValue<InputT> input) {
+              BoundedWindow window = Iterables.getOnlyElement(input.getWindows());
+              if (canDropDueToExpiredWindow(window)) {
+                // The element is too late for this window.
+                droppedDueToLateness.addValue(1L);
+                WindowTracing.debug(
+                    "ReduceFnRunner.processElement: Dropping element at {} for key:{}; window:{} "
+                    + "since too far behind inputWatermark:{}; outputWatermark:{}",
+                    input.getTimestamp(), key, window, timerInternals.currentInputWatermarkTime(),
+                    timerInternals.currentOutputWatermarkTime());
+                return false;
+              } else {
+                return true;
+              }
             }
-          }
-        });
-    KeyedWorkItem<K, InputT> inputs = ComposedKeyedWorkItem.create(
-        elem.getValue().key(), elem.getValue().timersIterable(), nonLateElements);
-    // This can contain user code. Wrap it in case it throws an exception.
-    try {
-      fn.processElement(createProcessContext(elem.withValue(inputs)));
-    } catch (Exception ex) {
-      throw wrapUserCodeException(ex);
+          });
+      return nonLateElements;
     }
-  }
 
-  /** Is {@code window} expired w.r.t. the garbage collection watermark? */
-  private boolean canDropDueToExpiredWindow(BoundedWindow window) {
-    Instant inputWM = timerInternals.currentInputWatermarkTime();
-    return inputWM != null
-        && window.maxTimestamp().plus(windowingStrategy.getAllowedLateness()).isBefore(inputWM);
+    /** Is {@code window} expired w.r.t. the garbage collection watermark? */
+    private boolean canDropDueToExpiredWindow(BoundedWindow window) {
+      Instant inputWM = timerInternals.currentInputWatermarkTime();
+      return inputWM != null
+          && window.maxTimestamp().plus(windowingStrategy.getAllowedLateness()).isBefore(inputWM);
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
index bedff3e8d6a42..f47f73d26f712 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
@@ -18,11 +18,27 @@
 
 import static com.google.common.base.Preconditions.checkNotNull;
 
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.InstantCoder;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
 import com.google.common.base.MoreObjects;
+import com.google.common.base.Preconditions;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
 
 import org.joda.time.Instant;
 
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.List;
 import java.util.Objects;
 
 import javax.annotation.Nullable;
@@ -193,4 +209,61 @@ public int compareTo(TimerData o) {
       return Long.compare(timestamp.getMillis(), o.getTimestamp().getMillis());
     }
   }
+
+  /**
+   * A {@link Coder} for {@link TimerData}.
+   */
+  public class TimerDataCoder extends StandardCoder<TimerData> {
+    private static final StringUtf8Coder STRING_CODER = StringUtf8Coder.of();
+    private static final InstantCoder INSTANT_CODER = InstantCoder.of();
+    private final Coder<? extends BoundedWindow> windowCoder;
+
+    public static TimerDataCoder of(Coder<? extends BoundedWindow> windowCoder) {
+      return new TimerDataCoder(windowCoder);
+    }
+
+    @SuppressWarnings("unchecked")
+    @JsonCreator
+    public static TimerDataCoder of(
+        @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
+        List<Coder<?>> components) {
+      Preconditions.checkArgument(components.size() == 1,
+          "Expecting 1 components, got " + components.size());
+      return of((Coder<? extends BoundedWindow>) components.get(0));
+    }
+
+    private TimerDataCoder(Coder<? extends BoundedWindow> windowCoder) {
+      this.windowCoder = windowCoder;
+    }
+
+    @Override
+    public void encode(TimerData timer, OutputStream outStream, Context context)
+        throws CoderException, IOException {
+      Context nestedContext = context.nested();
+      STRING_CODER.encode(timer.namespace.stringKey(), outStream, nestedContext);
+      INSTANT_CODER.encode(timer.timestamp, outStream, nestedContext);
+      STRING_CODER.encode(timer.domain.name(), outStream, nestedContext);
+    }
+
+    @Override
+    public TimerData decode(InputStream inStream, Context context)
+        throws CoderException, IOException {
+      Context nestedContext = context.nested();
+      StateNamespace namespace =
+          StateNamespaces.fromString(STRING_CODER.decode(inStream, nestedContext), windowCoder);
+      Instant timestamp = INSTANT_CODER.decode(inStream, nestedContext);
+      TimeDomain domain = TimeDomain.valueOf(STRING_CODER.decode(inStream, nestedContext));
+      return TimerData.of(namespace, timestamp, domain);
+    }
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return Arrays.asList(windowCoder);
+    }
+
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException {
+      verifyDeterministic("window coder must be deterministic", windowCoder);
+    }
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/LateDataDroppingDoFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/LateDataDroppingDoFnRunnerTest.java
new file mode 100644
index 0000000000000..c951d4c9a1746
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/LateDataDroppingDoFnRunnerTest.java
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.util.LateDataDroppingDoFnRunner.LateDataFilter;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.util.Arrays;
+
+/**
+ * Unit tests for {@link LateDataDroppingDoFnRunner}.
+ */
+@RunWith(JUnit4.class)
+public class LateDataDroppingDoFnRunnerTest {
+  private static final FixedWindows WINDOW_FN = FixedWindows.of(Duration.millis(10));
+
+  @Mock private TimerInternals mockTimerInternals;
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+  }
+
+  @Test
+  public void testLateDataFilter() throws Exception {
+    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(15L));
+
+    InMemoryLongSumAggregator droppedDueToLateness =
+        new InMemoryLongSumAggregator("droppedDueToLateness");
+    LateDataFilter lateDataFilter = new LateDataFilter(
+        WindowingStrategy.of(WINDOW_FN), mockTimerInternals, droppedDueToLateness);
+
+    Iterable<WindowedValue<Integer>> actual = lateDataFilter.filter(
+        "a",
+        ImmutableList.of(
+            createDatum(13, 13L),
+            createDatum(5, 5L), // late element, earlier than 4L.
+            createDatum(16, 16L),
+            createDatum(18, 18L)));
+
+    Iterable<WindowedValue<Integer>> expected =  ImmutableList.of(
+        createDatum(13, 13L),
+        createDatum(16, 16L),
+        createDatum(18, 18L));
+    assertThat(expected, containsInAnyOrder(Iterables.toArray(actual, WindowedValue.class)));
+    assertEquals(1, droppedDueToLateness.sum);
+  }
+
+  private <T> WindowedValue<T> createDatum(T element, long timestampMillis) {
+    Instant timestamp = new Instant(timestampMillis);
+    return WindowedValue.of(
+        element,
+        timestamp,
+        Arrays.asList(WINDOW_FN.assignWindow(timestamp)),
+        PaneInfo.NO_FIRING);
+  }
+
+  private static class InMemoryLongSumAggregator implements Aggregator<Long, Long> {
+    private final String name;
+    private long sum = 0;
+
+    public InMemoryLongSumAggregator(String name) {
+      this.name = name;
+    }
+
+    @Override
+    public void addValue(Long value) {
+      sum += value;
+    }
+
+    @Override
+    public String getName() {
+      return name;
+    }
+
+    @Override
+    public CombineFn<Long, ?, Long> getCombineFn() {
+      return new Sum.SumLongFn();
+    }
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TimerInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TimerInternalsTest.java
new file mode 100644
index 0000000000000..68aecf0b53b33
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TimerInternalsTest.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerDataCoder;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Unit tests for {@link TimerInternals}.
+ */
+@RunWith(JUnit4.class)
+public class TimerInternalsTest {
+
+  @Test
+  public void testTimerDataCoder() throws Exception {
+    CoderProperties.coderDecodeEncodeEqual(
+        TimerDataCoder.of(GlobalWindow.Coder.INSTANCE),
+        TimerData.of(StateNamespaces.global(), new Instant(0), TimeDomain.EVENT_TIME));
+
+    Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
+    CoderProperties.coderDecodeEncodeEqual(
+        TimerDataCoder.of(windowCoder),
+        TimerData.of(
+            StateNamespaces.window(
+                windowCoder, new IntervalWindow(new Instant(0), new Instant(100))),
+            new Instant(99), TimeDomain.PROCESSING_TIME));
+  }
+}
+

From f833d2ece00b5df3803f1a746e3eb90c936511cf Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 10 Feb 2016 09:25:11 -0800
Subject: [PATCH 1423/1541] Handle Timers in InMemoryWatermarkManager

Timers are updated whenever watermarks are; timers are extracted
on-demand using the #extractTimers method.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114334216
---
 .../util/InMemoryWatermarkManager.java        | 695 +++++++++++++++--
 .../util/InMemoryWatermarkManagerTest.java    | 714 ++++++++++++++++--
 2 files changed, 1277 insertions(+), 132 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
index 96a8f5bdb31ac..fb2e53686d9e8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
@@ -20,22 +20,37 @@
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimerInternals;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PValue;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.MoreObjects;
+import com.google.common.collect.ComparisonChain;
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Iterables;
 import com.google.common.collect.Ordering;
 import com.google.common.collect.SortedMultiset;
 import com.google.common.collect.TreeMultiset;
 
 import org.joda.time.Instant;
 
+import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
+import java.util.EnumMap;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.NavigableSet;
+import java.util.Objects;
+import java.util.PriorityQueue;
+import java.util.Set;
+import java.util.TreeSet;
 import java.util.concurrent.atomic.AtomicReference;
 
 import javax.annotation.Nullable;
@@ -126,7 +141,6 @@ private static interface Watermark {
      *         also be updated
      */
     WatermarkUpdate refresh();
-
   }
 
   /**
@@ -185,11 +199,14 @@ public static WatermarkUpdate fromTimestamps(Instant oldTime, Instant currentTim
   private static class AppliedPTransformInputWatermark implements Watermark {
     private final Collection<? extends Watermark> inputWatermarks;
     private final SortedMultiset<WindowedValue<?>> pendingElements;
+    private final Map<Object, NavigableSet<TimerData>> objectTimers;
+
     private AtomicReference<Instant> currentWatermark;
 
     public AppliedPTransformInputWatermark(Collection<? extends Watermark> inputWatermarks) {
       this.inputWatermarks = inputWatermarks;
       this.pendingElements = TreeMultiset.create(PENDING_ELEMENT_COMPARATOR);
+      this.objectTimers = new HashMap<>();
       currentWatermark = new AtomicReference<>(BoundedWindow.TIMESTAMP_MIN_VALUE);
     }
 
@@ -229,20 +246,44 @@ public synchronized WatermarkUpdate refresh() {
       return WatermarkUpdate.fromTimestamps(oldWatermark, newWatermark);
     }
 
-    public synchronized void addPending(Iterable<? extends WindowedValue<?>> newPending) {
+    private synchronized void addPendingElements(Iterable<? extends WindowedValue<?>> newPending) {
       for (WindowedValue<?> pendingElement : newPending) {
         pendingElements.add(pendingElement);
       }
     }
 
-    public synchronized void removePending(Iterable<? extends WindowedValue<?>> finishedElements) {
+    private synchronized void removePendingElements(
+        Iterable<? extends WindowedValue<?>> finishedElements) {
       for (WindowedValue<?> finishedElement : finishedElements) {
         pendingElements.remove(finishedElement);
       }
     }
 
+    private synchronized void updateTimers(TimerUpdate update) {
+      NavigableSet<TimerData> keyTimers = objectTimers.get(update.key);
+      if (keyTimers == null) {
+        keyTimers = new TreeSet<>();
+        objectTimers.put(update.key, keyTimers);
+      }
+      for (TimerData timer : update.setTimers) {
+        if (TimeDomain.EVENT_TIME.equals(timer.getDomain())) {
+          keyTimers.add(timer);
+        }
+      }
+      for (TimerData timer : update.deletedTimers) {
+        if (TimeDomain.EVENT_TIME.equals(timer.getDomain())) {
+          keyTimers.remove(timer);
+        }
+      }
+      // We don't keep references to timers that have been fired and delivered via #getFiredTimers()
+    }
+
+    private synchronized Map<Object, List<TimerData>> extractFiredEventTimeTimers() {
+      return extractFiredTimers(currentWatermark.get(), objectTimers);
+    }
+
     @Override
-    public String toString() {
+    public synchronized String toString() {
       return MoreObjects.toStringHelper(AppliedPTransformInputWatermark.class)
           .add("pendingElements", pendingElements)
           .add("currentWatermark", currentWatermark)
@@ -260,17 +301,21 @@ public String toString() {
    */
   private static class AppliedPTransformOutputWatermark implements Watermark {
     private final Watermark inputWatermark;
-    private Instant currentHold;
+    private final PerKeyHolds holds;
     private AtomicReference<Instant> currentWatermark;
 
     public AppliedPTransformOutputWatermark(AppliedPTransformInputWatermark inputWatermark) {
       this.inputWatermark = inputWatermark;
-      currentHold = BoundedWindow.TIMESTAMP_MAX_VALUE;
+      holds = new PerKeyHolds();
       currentWatermark = new AtomicReference<>(BoundedWindow.TIMESTAMP_MIN_VALUE);
     }
 
-    public synchronized void setHold(Instant newHold) {
-      currentHold = newHold;
+    public synchronized void updateHold(Object key, Instant newHold) {
+      if (newHold == null) {
+        holds.removeHold(key);
+      } else {
+        holds.updateHold(key, newHold);
+      }
     }
 
     @Override
@@ -296,35 +341,54 @@ public Instant get() {
     @Override
     public synchronized WatermarkUpdate refresh() {
       Instant oldWatermark = currentWatermark.get();
-      Instant newWatermark;
-      if (currentHold == null) {
-        newWatermark = inputWatermark.get();
-      } else {
-        newWatermark = INSTANT_ORDERING.min(inputWatermark.get(), currentHold);
-      }
+      Instant newWatermark = INSTANT_ORDERING.min(inputWatermark.get(), holds.getMinHold());
       newWatermark = INSTANT_ORDERING.max(oldWatermark, newWatermark);
       currentWatermark.set(newWatermark);
       return WatermarkUpdate.fromTimestamps(oldWatermark, newWatermark);
     }
 
     @Override
-    public String toString() {
+    public synchronized String toString() {
       return MoreObjects.toStringHelper(AppliedPTransformOutputWatermark.class)
-          .add("currentHold", currentHold)
+          .add("holds", holds)
           .add("currentWatermark", currentWatermark)
           .toString();
     }
   }
 
+  /**
+   * The input {@link TimeDomain#SYNCHRONIZED_PROCESSING_TIME} hold for an
+   * {@link AppliedPTransform}.
+   *
+   * <p>At any point, the hold value of an {@link SynchronizedProcessingTimeInputWatermark} is equal
+   * to the minimum across all pending bundles at the {@link AppliedPTransform} and all upstream
+   * {@link TimeDomain#SYNCHRONIZED_PROCESSING_TIME} watermarks. The value of the input
+   * synchronized processing time at any step is equal to the maximum of:
+   * <ul>
+   *   <li>The most recently returned synchronized processing input time
+   *   <li>The minimum of
+   *     <ul>
+   *       <li>The current processing time
+   *       <li>The current synchronized processing time input hold
+   *     </ul>
+   * </ul>
+   */
   private static class SynchronizedProcessingTimeInputWatermark implements Watermark {
     private final Collection<? extends Watermark> inputWms;
     private final Collection<CommittedBundle<?>> pendingBundles;
+    private final Map<Object, NavigableSet<TimerData>> processingTimers;
+    private final Map<Object, NavigableSet<TimerData>> synchronizedProcessingTimers;
+
+    private final PriorityQueue<TimerData> pendingTimers;
 
     private AtomicReference<Instant> earliestHold;
 
     public SynchronizedProcessingTimeInputWatermark(Collection<? extends Watermark> inputWms) {
       this.inputWms = inputWms;
       this.pendingBundles = new HashSet<>();
+      this.processingTimers = new HashMap<>();
+      this.synchronizedProcessingTimers = new HashMap<>();
+      this.pendingTimers = new PriorityQueue<>();
       Instant initialHold = BoundedWindow.TIMESTAMP_MAX_VALUE;
       for (Watermark wm : inputWms) {
         initialHold = INSTANT_ORDERING.min(initialHold, wm.get());
@@ -337,6 +401,19 @@ public Instant get() {
       return earliestHold.get();
     }
 
+    /**
+     * {@inheritDoc}.
+     *
+     * <p>When refresh is called, the value of the {@link SynchronizedProcessingTimeInputWatermark}
+     * becomes equal to the minimum value of
+     * <ul>
+     *   <li>the timestamps of all currently pending bundles</li>
+     *   <li>all input {@link PCollection} synchronized processing time watermarks</li>
+     * </ul>
+     *
+     * <p>Note that this value is not monotonic, but the returned value for the synchronized
+     * processing time must be.
+     */
     @Override
     public synchronized WatermarkUpdate refresh() {
       Instant oldHold = earliestHold.get();
@@ -362,21 +439,116 @@ public synchronized void removePending(CommittedBundle<?> bundle) {
       pendingBundles.remove(bundle);
     }
 
+    /**
+     * Return the earliest timestamp of the earliest timer that has not been completed. This is
+     * either the earliest timestamp across timers that have not been completed, or the earliest
+     * timestamp across timers that have been delivered but have not been completed.
+     */
     public synchronized Instant getEarliestTimerTimestamp() {
-      // TODO: use unfired and pending timers to determine earliest timestamp. Requires supporting
-      // timers.
       Instant earliest = THE_END_OF_TIME.get();
+      for (NavigableSet<TimerData> timers : processingTimers.values()) {
+        if (!timers.isEmpty()) {
+          earliest = INSTANT_ORDERING.min(timers.first().getTimestamp(), earliest);
+        }
+      }
+      for (NavigableSet<TimerData> timers : synchronizedProcessingTimers.values()) {
+        if (!timers.isEmpty()) {
+          earliest = INSTANT_ORDERING.min(timers.first().getTimestamp(), earliest);
+        }
+      }
+      if (!pendingTimers.isEmpty()) {
+        earliest = INSTANT_ORDERING.min(pendingTimers.peek().getTimestamp(), earliest);
+      }
       return earliest;
     }
 
+    private synchronized void updateTimers(TimerUpdate update) {
+      for (TimerData completedTimer : update.completedTimers) {
+        pendingTimers.remove(completedTimer);
+      }
+      Map<TimeDomain, NavigableSet<TimerData>> timerMap = timerMap(update.key);
+      for (TimerData addedTimer : update.setTimers) {
+        NavigableSet<TimerData> timerQueue = timerMap.get(addedTimer.getDomain());
+        if (timerQueue != null) {
+          timerQueue.add(addedTimer);
+        }
+      }
+      for (TimerData deletedTimer : update.deletedTimers) {
+        NavigableSet<TimerData> timerQueue = timerMap.get(deletedTimer.getDomain());
+        if (timerQueue != null) {
+          timerQueue.remove(deletedTimer);
+        }
+      }
+    }
+
+    private synchronized Map<Object, List<TimerData>> extractFiredDomainTimers(
+        TimeDomain domain, Instant firingTime) {
+      Map<Object, List<TimerData>> firedTimers;
+      switch (domain) {
+        case PROCESSING_TIME:
+          firedTimers = extractFiredTimers(firingTime, processingTimers);
+          break;
+        case SYNCHRONIZED_PROCESSING_TIME:
+          firedTimers =
+              extractFiredTimers(
+                  INSTANT_ORDERING.min(firingTime, earliestHold.get()),
+                  synchronizedProcessingTimers);
+          break;
+        default:
+          throw new IllegalArgumentException(
+              "Called getFiredTimers on a Synchronized Processing Time watermark"
+                  + " and gave a non-processing time domain "
+                  + domain);
+      }
+      for (Map.Entry<Object, ? extends Collection<TimerData>> firedTimer : firedTimers.entrySet()) {
+        pendingTimers.addAll(firedTimer.getValue());
+      }
+      return firedTimers;
+    }
+
+    private Map<TimeDomain, NavigableSet<TimerData>> timerMap(Object key) {
+      NavigableSet<TimerData> processingQueue = processingTimers.get(key);
+      if (processingQueue == null) {
+        processingQueue = new TreeSet<>();
+        processingTimers.put(key, processingQueue);
+      }
+      NavigableSet<TimerData> synchronizedProcessingQueue =
+          synchronizedProcessingTimers.get(key);
+      if (synchronizedProcessingQueue == null) {
+        synchronizedProcessingQueue = new TreeSet<>();
+        synchronizedProcessingTimers.put(key, synchronizedProcessingQueue);
+      }
+      EnumMap<TimeDomain, NavigableSet<TimerData>> result = new EnumMap<>(TimeDomain.class);
+      result.put(TimeDomain.PROCESSING_TIME, processingQueue);
+      result.put(TimeDomain.SYNCHRONIZED_PROCESSING_TIME, synchronizedProcessingQueue);
+      return result;
+    }
+
     @Override
-    public String toString() {
+    public synchronized String toString() {
       return MoreObjects.toStringHelper(SynchronizedProcessingTimeInputWatermark.class)
           .add("earliestHold", earliestHold)
           .toString();
     }
   }
 
+  /**
+   * The output {@link TimeDomain#SYNCHRONIZED_PROCESSING_TIME} hold for an
+   * {@link AppliedPTransform}.
+   *
+   * <p>At any point, the hold value of an {@link SynchronizedProcessingTimeOutputWatermark} is
+   * equal to the minimum across all incomplete timers at the {@link AppliedPTransform} and all
+   * upstream {@link TimeDomain#SYNCHRONIZED_PROCESSING_TIME} watermarks. The value of the output
+   * synchronized processing time at any step is equal to the maximum of:
+   * <ul>
+   *   <li>The most recently returned synchronized processing output time
+   *   <li>The minimum of
+   *     <ul>
+   *       <li>The current processing time
+   *       <li>The current synchronized processing time output hold
+   *     </ul>
+   * </ul>
+   */
   private static class SynchronizedProcessingTimeOutputWatermark implements Watermark {
     private final SynchronizedProcessingTimeInputWatermark inputWm;
     private AtomicReference<Instant> latestRefresh;
@@ -392,6 +564,21 @@ public Instant get() {
       return latestRefresh.get();
     }
 
+    /**
+     * {@inheritDoc}.
+     *
+     * <p>When refresh is called, the value of the {@link SynchronizedProcessingTimeOutputWatermark}
+     * becomes equal to the minimum value of:
+     * <ul>
+     *   <li>the current input watermark.
+     *   <li>all {@link TimeDomain#SYNCHRONIZED_PROCESSING_TIME} timers that are based on the input
+     *       watermark.
+     *   <li>all {@link TimeDomain#PROCESSING_TIME} timers that are based on the input watermark.
+     * </ul>
+     *
+     * <p>Note that this value is not monotonic, but the returned value for the synchronized
+     * processing time must be.
+     */
     @Override
     public synchronized WatermarkUpdate refresh() {
       // Hold the output synchronized processing time to the input watermark, which takes into
@@ -405,7 +592,7 @@ public synchronized WatermarkUpdate refresh() {
     }
 
     @Override
-    public String toString() {
+    public synchronized String toString() {
       return MoreObjects.toStringHelper(SynchronizedProcessingTimeOutputWatermark.class)
           .add("latestRefresh", latestRefresh)
           .toString();
@@ -416,8 +603,7 @@ public String toString() {
    * The {@code Watermark} that is after the latest time it is possible to represent in the global
    * window. This is a distinguished value representing a complete {@link PTransform}.
    */
-  private static final Watermark THE_END_OF_TIME =
-      new Watermark() {
+  private static final Watermark THE_END_OF_TIME = new Watermark() {
         @Override
         public WatermarkUpdate refresh() {
           // THE_END_OF_TIME is a distinguished value that cannot be advanced.
@@ -440,11 +626,42 @@ public Instant get() {
   private static final Ordering<WindowedValue<? extends Object>> PENDING_ELEMENT_COMPARATOR =
       (new WindowedValueByTimestampComparator()).compound(Ordering.arbitrary());
 
+  /**
+   * For each (Object, PriorityQueue) pair in the provided map, remove each Timer that is before the
+   * latestTime argument and put in in the result with the same key, then remove all of the keys
+   * which have no more pending timers.
+   *
+   * The result collection retains ordering of timers (from earliest to latest).
+   */
+  private static Map<Object, List<TimerData>> extractFiredTimers(
+      Instant latestTime, Map<Object, NavigableSet<TimerData>> objectTimers) {
+    Map<Object, List<TimerData>> result = new HashMap<>();
+    Set<Object> emptyKeys = new HashSet<>();
+    for (Map.Entry<Object, NavigableSet<TimerData>> pendingTimers : objectTimers.entrySet()) {
+      NavigableSet<TimerData> timers = pendingTimers.getValue();
+      if (!timers.isEmpty() && timers.first().getTimestamp().isBefore(latestTime)) {
+        ArrayList<TimerData> keyFiredTimers = new ArrayList<>();
+        result.put(pendingTimers.getKey(), keyFiredTimers);
+        while (!timers.isEmpty() && timers.first().getTimestamp().isBefore(latestTime)) {
+          keyFiredTimers.add(timers.first());
+          timers.remove(timers.first());
+        }
+      }
+      if (timers.isEmpty()) {
+        emptyKeys.add(pendingTimers.getKey());
+      }
+    }
+    objectTimers.keySet().removeAll(emptyKeys);
+    return result;
+  }
+
+  ////////////////////////////////////////////////////////////////////////////////////////////////
+
   /**
    * A map from each {@link PCollection} to all {@link AppliedPTransform PTransform applications}
    * that consume that {@link PCollection}.
    */
-  private final Map<PCollection<?>, Collection<AppliedPTransform<?, ?, ?>>> consumers;
+  private final Map<PValue, Collection<AppliedPTransform<?, ?, ?>>> consumers;
 
   /**
    * The input and output watermark of each {@link AppliedPTransform}.
@@ -462,13 +679,13 @@ public Instant get() {
    */
   public static InMemoryWatermarkManager create(
       Collection<AppliedPTransform<?, ?, ?>> rootTransforms,
-      Map<PCollection<?>, Collection<AppliedPTransform<?, ?, ?>>> consumers) {
+      Map<PValue, Collection<AppliedPTransform<?, ?, ?>>> consumers) {
     return new InMemoryWatermarkManager(rootTransforms, consumers);
   }
 
   private InMemoryWatermarkManager(
       Collection<AppliedPTransform<?, ?, ?>> rootTransforms,
-      Map<PCollection<?>, Collection<AppliedPTransform<?, ?, ?>>> consumers) {
+      Map<PValue, Collection<AppliedPTransform<?, ?, ?>>> consumers) {
     this.consumers = consumers;
 
     transformToWatermarks = new HashMap<>();
@@ -559,10 +776,12 @@ public TransformWatermarks getWatermarks(AppliedPTransform<?, ?, ?> transform) {
    */
   public void updateOutputWatermark(
       AppliedPTransform<?, ?, ?> transform,
+      TimerUpdate timerUpdate,
       Iterable<? extends CommittedBundle<?>> outputs,
       Instant eventTimeWatermark) {
     TransformWatermarks watermarks = getWatermarks(transform);
-    watermarks.setEventTimeHold(eventTimeWatermark);
+    watermarks.updateTimers(timerUpdate);
+    watermarks.setEventTimeHold(null, eventTimeWatermark);
 
     for (CommittedBundle<?> output : outputs) {
       PCollection<?> pCollection = output.getPCollection();
@@ -595,15 +814,17 @@ public void updateOutputWatermark(
   public void updateWatermarks(
       CommittedBundle<?> completed,
       AppliedPTransform<?, ?, ?> transform,
+      TimerUpdate timerUpdate,
       Iterable<? extends CommittedBundle<?>> outputs,
       @Nullable Instant earliestHold) {
-    updatePending(completed, transform, outputs);
+    updatePending(completed, transform, timerUpdate, outputs);
     TransformWatermarks transformWms = transformToWatermarks.get(transform);
-    transformWms.setEventTimeHold(earliestHold);
+    transformWms.setEventTimeHold(completed.getKey(), earliestHold);
     refreshWatermarks(transform);
   }
 
-  private void refreshWatermarks(AppliedPTransform<?, ?, ?> transform) {
+  private void refreshWatermarks(
+      AppliedPTransform<?, ?, ?> transform) {
     TransformWatermarks myWatermarks = transformToWatermarks.get(transform);
     WatermarkUpdate updateResult = myWatermarks.refresh();
     if (updateResult.isAdvanced()) {
@@ -611,7 +832,7 @@ private void refreshWatermarks(AppliedPTransform<?, ?, ?> transform) {
         Collection<AppliedPTransform<?, ?, ?>> downstreamTransforms = consumers.get(outputPValue);
         if (downstreamTransforms != null) {
           for (AppliedPTransform<?, ?, ?> downstreamTransform : downstreamTransforms) {
-                refreshWatermarks(downstreamTransform);
+            refreshWatermarks(downstreamTransform);
           }
         }
       }
@@ -627,8 +848,10 @@ private void refreshWatermarks(AppliedPTransform<?, ?, ?> transform) {
   private void updatePending(
       CommittedBundle<?> input,
       AppliedPTransform<?, ?, ?> transform,
+      TimerUpdate timerUpdate,
       Iterable<? extends CommittedBundle<?>> outputs) {
     TransformWatermarks completedTransform = transformToWatermarks.get(transform);
+    completedTransform.updateTimers(timerUpdate);
     completedTransform.removePending(input);
 
     for (CommittedBundle<?> bundle : outputs) {
@@ -648,6 +871,145 @@ private void addPending(
     watermarks.addPending(pending);
   }
 
+  /**
+   * Returns a map of each {@link PTransform} that has pending timers to those timers. All of the
+   * pending timers will be removed from this {@link InMemoryWatermarkManager}.
+   *
+   * This method exists primarily to extract processing time timers, as watermark timers will be
+   * returned whenever a watermark is updated through
+   * {@link #updateOutputWatermark(AppliedPTransform, TimerUpdate, Iterable, Instant)} and
+   * {@link #updateWatermarks(CommittedBundle, AppliedPTransform, TimerUpdate, Iterable, Instant)}.
+   */
+  public Map<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> extractFiredTimers() {
+    Map<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> allTimers = new HashMap<>();
+    for (Map.Entry<AppliedPTransform<?, ?, ?>, TransformWatermarks> watermarksEntry :
+        transformToWatermarks.entrySet()) {
+      Map<Object, FiredTimers> keyFiredTimers = watermarksEntry.getValue().extractFiredTimers();
+      if (!keyFiredTimers.isEmpty()) {
+        allTimers.put(watermarksEntry.getKey(), keyFiredTimers);
+      }
+    }
+    return allTimers;
+  }
+
+  /**
+   * Returns true if, for any {@link TransformWatermarks} returned by
+   * {@link #getWatermarks(AppliedPTransform)}, the output watermark will be equal to
+   * {@link BoundedWindow#TIMESTAMP_MAX_VALUE}.
+   */
+  public boolean isDone() {
+    for (Map.Entry<AppliedPTransform<?, ?, ?>, TransformWatermarks> watermarksEntry :
+        transformToWatermarks.entrySet()) {
+      Instant endOfTime = THE_END_OF_TIME.get();
+      if (watermarksEntry.getValue().getOutputWatermark().isBefore(endOfTime)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /**
+   * A (key, Instant) pair that holds the watermark. Holds are per-key, but the watermark is global,
+   * and as such the watermark manager must track holds and the release of holds on a per-key basis.
+   *
+   * <p>The {@link #compareTo(KeyedHold)} method of {@link KeyedHold} is not consistent with equals,
+   * as the key is arbitrarily ordered via identity, rather than object equality.
+   */
+  private static final class KeyedHold implements Comparable<KeyedHold> {
+    private static final Ordering<Object> KEY_ORDERING = Ordering.arbitrary().nullsLast();
+
+    private final Object key;
+    private final Instant timestamp;
+
+    /**
+     * Create a new KeyedHold with the specified key and timestamp.
+     */
+    public static KeyedHold of(Object key, Instant timestamp) {
+      return new KeyedHold(key, MoreObjects.firstNonNull(timestamp, THE_END_OF_TIME.get()));
+    }
+
+    private KeyedHold(Object key, Instant timestamp) {
+      this.key = key;
+      this.timestamp = timestamp;
+    }
+
+    @Override
+    public int compareTo(KeyedHold that) {
+      return ComparisonChain.start()
+          .compare(this.timestamp, that.timestamp)
+          .compare(this.key, that.key, KEY_ORDERING)
+          .result();
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(timestamp, key);
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (other == null || !(other instanceof KeyedHold)) {
+        return false;
+      }
+      KeyedHold that = (KeyedHold) other;
+      return Objects.equals(this.timestamp, that.timestamp) && Objects.equals(this.key, that.key);
+    }
+
+    /**
+     * Get the value of this {@link KeyedHold}.
+     */
+    public Instant getTimestamp() {
+      return timestamp;
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(KeyedHold.class)
+          .add("key", key)
+          .add("hold", timestamp)
+          .toString();
+    }
+  }
+
+  private static class PerKeyHolds {
+    private final Map<Object, KeyedHold> keyedHolds;
+    private final PriorityQueue<KeyedHold> allHolds;
+
+    private PerKeyHolds() {
+      this.keyedHolds = new HashMap<>();
+      this.allHolds = new PriorityQueue<>();
+    }
+
+    /**
+     * Gets the minimum hold across all keys in this {@link PerKeyHolds}, or THE_END_OF_TIME if
+     * there are no holds within this {@link PerKeyHolds}.
+     */
+    public Instant getMinHold() {
+      return allHolds.isEmpty() ? THE_END_OF_TIME.get() : allHolds.peek().getTimestamp();
+    }
+
+    /**
+     * Updates the hold of the provided key to the provided value, removing any other holds for
+     * the same key.
+     */
+    public void updateHold(@Nullable Object key, Instant newHold) {
+      removeHold(key);
+      KeyedHold newKeyedHold = KeyedHold.of(key, newHold);
+      keyedHolds.put(key, newKeyedHold);
+      allHolds.offer(newKeyedHold);
+    }
+
+    /**
+     * Removes the hold of the provided key.
+     */
+    public void removeHold(Object key) {
+      KeyedHold oldHold = keyedHolds.get(key);
+      if (oldHold != null) {
+        allHolds.remove(oldHold);
+      }
+    }
+  }
+
   /**
    * A reference to the input and output watermarks of an {@link AppliedPTransform}.
    */
@@ -658,30 +1020,21 @@ public class TransformWatermarks {
     private final SynchronizedProcessingTimeInputWatermark synchronizedProcessingInputWatermark;
     private final SynchronizedProcessingTimeOutputWatermark synchronizedProcessingOutputWatermark;
 
+    private Instant latestSynchronizedInputWm;
+    private Instant latestSynchronizedOutputWm;
+
     private TransformWatermarks(
         AppliedPTransformInputWatermark inputWatermark,
         AppliedPTransformOutputWatermark outputWatermark,
-        SynchronizedProcessingTimeInputWatermark sychronizedProcessingWatermark,
-        SynchronizedProcessingTimeOutputWatermark outputProcessingWatermark) {
+        SynchronizedProcessingTimeInputWatermark inputSynchProcessingWatermark,
+        SynchronizedProcessingTimeOutputWatermark outputSynchProcessingWatermark) {
       this.inputWatermark = inputWatermark;
       this.outputWatermark = outputWatermark;
 
-      this.synchronizedProcessingInputWatermark = sychronizedProcessingWatermark;
-      this.synchronizedProcessingOutputWatermark = outputProcessingWatermark;
-    }
-
-    private void setEventTimeHold(Instant hold) {
-      outputWatermark.setHold(hold);
-    }
-
-    private void removePending(CommittedBundle<?> bundle) {
-      inputWatermark.removePending(bundle.getElements());
-      synchronizedProcessingInputWatermark.removePending(bundle);
-    }
-
-    private void addPending(CommittedBundle<?> bundle) {
-      inputWatermark.addPending(bundle.getElements());
-      synchronizedProcessingInputWatermark.addPending(bundle);
+      this.synchronizedProcessingInputWatermark = inputSynchProcessingWatermark;
+      this.synchronizedProcessingOutputWatermark = outputSynchProcessingWatermark;
+      this.latestSynchronizedInputWm = BoundedWindow.TIMESTAMP_MIN_VALUE;
+      this.latestSynchronizedOutputWm = BoundedWindow.TIMESTAMP_MIN_VALUE;
     }
 
     /**
@@ -691,19 +1044,37 @@ public Instant getInputWatermark() {
       return inputWatermark.get();
     }
 
-    public Instant getSynchronizedProcessingInputTime() {
-      return INSTANT_ORDERING.min(Instant.now(), synchronizedProcessingInputWatermark.get());
+    /**
+     * Returns the output watermark of the {@link AppliedPTransform}.
+     */
+    public Instant getOutputWatermark() {
+      return outputWatermark.get();
     }
 
-    public Instant getSynchronizedProcessingOutputTime() {
-      return INSTANT_ORDERING.min(Instant.now(), synchronizedProcessingOutputWatermark.get());
+    /**
+     * Returns the synchronized processing input time of the {@link AppliedPTransform}.
+     *
+     * <p>The returned value is guaranteed to be monotonically increasing, and outside of the
+     * presence of holds, will increase as the system time progresses.
+     */
+    public synchronized Instant getSynchronizedProcessingInputTime() {
+      latestSynchronizedInputWm = INSTANT_ORDERING.max(
+          latestSynchronizedInputWm,
+          INSTANT_ORDERING.min(Instant.now(), synchronizedProcessingInputWatermark.get()));
+      return latestSynchronizedInputWm;
     }
 
     /**
-     * Returns the output watermark of the {@link AppliedPTransform}.
+     * Returns the synchronized processing output time of the {@link AppliedPTransform}.
+     *
+     * <p>The returned value is guaranteed to be monotonically increasing, and outside of the
+     * presence of holds, will increase as the system time progresses.
      */
-    public Instant getOutputWatermark() {
-      return outputWatermark.get();
+    public synchronized Instant getSynchronizedProcessingOutputTime() {
+      latestSynchronizedOutputWm = INSTANT_ORDERING.max(
+          latestSynchronizedOutputWm,
+          INSTANT_ORDERING.min(Instant.now(), synchronizedProcessingOutputWatermark.get()));
+      return latestSynchronizedOutputWm;
     }
 
     private WatermarkUpdate refresh() {
@@ -714,6 +1085,67 @@ private WatermarkUpdate refresh() {
       return eventOutputUpdate.union(syncOutputUpdate);
     }
 
+    private void setEventTimeHold(Object key, Instant newHold) {
+      outputWatermark.updateHold(key, newHold);
+    }
+
+    private void removePending(CommittedBundle<?> bundle) {
+      inputWatermark.removePendingElements(bundle.getElements());
+      synchronizedProcessingInputWatermark.removePending(bundle);
+    }
+
+    private void addPending(CommittedBundle<?> bundle) {
+      inputWatermark.addPendingElements(bundle.getElements());
+      synchronizedProcessingInputWatermark.addPending(bundle);
+    }
+
+    private Map<Object, FiredTimers> extractFiredTimers() {
+      Map<Object, List<TimerData>> eventTimeTimers = inputWatermark.extractFiredEventTimeTimers();
+      Map<Object, List<TimerData>> processingTimers;
+      Map<Object, List<TimerData>> synchronizedTimers;
+      if (inputWatermark.get().equals(BoundedWindow.TIMESTAMP_MAX_VALUE)) {
+        processingTimers = synchronizedProcessingInputWatermark.extractFiredDomainTimers(
+            TimeDomain.PROCESSING_TIME, BoundedWindow.TIMESTAMP_MAX_VALUE);
+        synchronizedTimers = synchronizedProcessingInputWatermark.extractFiredDomainTimers(
+            TimeDomain.PROCESSING_TIME, BoundedWindow.TIMESTAMP_MAX_VALUE);
+      } else {
+        processingTimers = synchronizedProcessingInputWatermark.extractFiredDomainTimers(
+            TimeDomain.PROCESSING_TIME, Instant.now());
+        synchronizedTimers = synchronizedProcessingInputWatermark.extractFiredDomainTimers(
+            TimeDomain.SYNCHRONIZED_PROCESSING_TIME, getSynchronizedProcessingInputTime());
+      }
+      Map<Object, Map<TimeDomain, List<TimerData>>> groupedTimers = new HashMap<>();
+      groupFiredTimers(groupedTimers, eventTimeTimers, processingTimers, synchronizedTimers);
+
+      Map<Object, FiredTimers> keyFiredTimers = new HashMap<>();
+      for (Map.Entry<Object, Map<TimeDomain, List<TimerData>>> firedTimers :
+          groupedTimers.entrySet()) {
+        keyFiredTimers.put(firedTimers.getKey(), new FiredTimers(firedTimers.getValue()));
+      }
+      return keyFiredTimers;
+    }
+
+    @SafeVarargs
+    private final void groupFiredTimers(
+        Map<Object, Map<TimeDomain, List<TimerData>>> groupedToMutate,
+        Map<Object, List<TimerData>>... timersToGroup) {
+      for (Map<Object, List<TimerData>> subGroup : timersToGroup) {
+        for (Map.Entry<Object, List<TimerData>> newTimers : subGroup.entrySet()) {
+          Map<TimeDomain, List<TimerData>> grouped = groupedToMutate.get(newTimers.getKey());
+          if (grouped == null) {
+            grouped = new HashMap<>();
+            groupedToMutate.put(newTimers.getKey(), grouped);
+          }
+          grouped.put(newTimers.getValue().get(0).getDomain(), newTimers.getValue());
+        }
+      }
+    }
+
+    private void updateTimers(TimerUpdate update) {
+      inputWatermark.updateTimers(update);
+      synchronizedProcessingInputWatermark.updateTimers(update);
+    }
+
     @Override
     public String toString() {
       return MoreObjects.toStringHelper(TransformWatermarks.class)
@@ -725,6 +1157,159 @@ public String toString() {
     }
   }
 
+  /**
+   * A collection of newly set, deleted, and completed timers.
+   *
+   * <p>setTimers and deletedTimers are collections of {@link TimerData} that have been added to the
+   * {@link TimerInternals} of an executed step. completedTimers are timers that were delivered as
+   * the input to the executed step.
+   */
+  public static class TimerUpdate {
+    private final Object key;
+    private final Iterable<? extends TimerData> completedTimers;
+
+    private final Iterable<? extends TimerData> setTimers;
+    private final Iterable<? extends TimerData> deletedTimers;
+
+    /**
+     * Returns a TimerUpdate for a null key with no timers.
+     */
+    public static TimerUpdate empty() {
+      return new TimerUpdate(
+          null,
+          Collections.<TimerData>emptyList(),
+          Collections.<TimerData>emptyList(),
+          Collections.<TimerData>emptyList());
+    }
+
+    /**
+     * Creates a new {@link TimerUpdate} builder with the provided completed timers that needs the
+     * set and deleted timers to be added to it.
+     */
+    public static TimerUpdateBuilder builder(Object key) {
+      return new TimerUpdateBuilder(key);
+    }
+
+    /**
+     * A {@link TimerUpdate} builder that needs to be provided with set timers and deleted timers.
+     */
+    public static final class TimerUpdateBuilder {
+      private final Object key;
+      private final Collection<TimerData> completedTimers;
+      private final Collection<TimerData> setTimers;
+      private final Collection<TimerData> deletedTimers;
+
+      private TimerUpdateBuilder(Object key) {
+        this.key = key;
+        this.completedTimers = new HashSet<>();
+        this.setTimers = new HashSet<>();
+        this.deletedTimers = new HashSet<>();
+      }
+
+      /**
+       * Adds all of the provided timers to the collection of completed timers, and returns this
+       * {@link TimerUpdateBuilder}.
+       */
+      public TimerUpdateBuilder withCompletedTimers(Iterable<TimerData> completedTimers) {
+        Iterables.addAll(this.completedTimers, completedTimers);
+        return this;
+      }
+
+      /**
+       * Adds the provided timer to the collection of set timers, removing it from deleted timers if
+       * it has previously been deleted. Returns this {@link TimerUpdateBuilder}.
+       */
+      public TimerUpdateBuilder setTimer(TimerData setTimer) {
+        deletedTimers.remove(setTimer);
+        setTimers.add(setTimer);
+        return this;
+      }
+
+      /**
+       * Adds the provided timer to the collection of deleted timers, removing it from set timers if
+       * it has previously been set. Returns this {@link TimerUpdateBuilder}.
+       */
+      public TimerUpdateBuilder deletedTimer(TimerData deletedTimer) {
+        deletedTimers.add(deletedTimer);
+        setTimers.remove(deletedTimer);
+        return this;
+      }
+
+      /**
+       * Returns a new {@link TimerUpdate} with the most recently set completedTimers, setTimers,
+       * and deletedTimers.
+       */
+      public TimerUpdate build() {
+        return new TimerUpdate(key, ImmutableSet.copyOf(completedTimers),
+            ImmutableSet.copyOf(setTimers), ImmutableSet.copyOf(deletedTimers));
+      }
+    }
+
+    private TimerUpdate(
+        Object key,
+        Iterable<? extends TimerData> completedTimers,
+        Iterable<? extends TimerData> setTimers,
+        Iterable<? extends TimerData> deletedTimers) {
+      this.key = key;
+      this.completedTimers = completedTimers;
+      this.setTimers = setTimers;
+      this.deletedTimers = deletedTimers;
+    }
+
+    @VisibleForTesting
+    Object getKey() {
+      return key;
+    }
+
+    @VisibleForTesting
+    Iterable<? extends TimerData> getCompletedTimers() {
+      return completedTimers;
+    }
+
+    @VisibleForTesting
+    Iterable<? extends TimerData> getSetTimers() {
+      return setTimers;
+    }
+
+    @VisibleForTesting
+    Iterable<? extends TimerData> getDeletedTimers() {
+      return deletedTimers;
+    }
+  }
+
+  /**
+   * A pair of {@link TimerData} and key which can be delivered to the appropriate
+   * {@link AppliedPTransform}. A timer fires at the transform that set it with a specific key when
+   * the time domain in which it lives progresses past a specified time, as determined by the
+   * {@link InMemoryWatermarkManager}.
+   */
+  public static class FiredTimers {
+    private final Map<TimeDomain, ? extends Collection<TimerData>> timers;
+
+    private FiredTimers(Map<TimeDomain, ? extends Collection<TimerData>> timers) {
+      this.timers = timers;
+    }
+
+    /**
+     * Gets all of the timers that have fired within the provided {@link TimeDomain}. If no timers
+     * fired within the provided domain, return an empty collection.
+     *
+     * <p>Timers within a {@link TimeDomain} are guaranteed to be in order of increasing timestamp.
+     */
+    public Collection<TimerData> getTimers(TimeDomain domain) {
+      Collection<TimerData> domainTimers = timers.get(domain);
+      if (domainTimers == null) {
+        return Collections.emptyList();
+      }
+      return domainTimers;
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(FiredTimers.class).add("timers", timers).toString();
+    }
+  }
+
   private static class WindowedValueByTimestampComparator extends Ordering<WindowedValue<?>> {
     @Override
     public int compare(WindowedValue<?> o1, WindowedValue<?> o2) {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java
index 2186718169c4c..a5490475241e1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java
@@ -15,24 +15,37 @@
  */
 package com.google.cloud.dataflow.sdk.runners.inprocess.util;
 
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.emptyIterable;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.not;
+import static org.hamcrest.Matchers.nullValue;
 import static org.junit.Assert.assertThat;
 
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.FiredTimers;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate.TimerUpdateBuilder;
 import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TransformWatermarks;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.Filter;
 import com.google.cloud.dataflow.sdk.transforms.Flatten;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.WithKeys;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
+import com.google.cloud.dataflow.sdk.values.PValue;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.common.collect.ImmutableList;
 
@@ -61,15 +74,16 @@
 public class InMemoryWatermarkManagerTest implements Serializable {
   private static final Instant INITIAL_PROCESSING_TIME = new Instant(1000L);
 
-  private PCollection<Integer> createdInts;
+  private transient PCollection<Integer> createdInts;
 
-  private PCollection<Integer> filtered;
-  private PCollection<KV<String, Integer>> keyed;
+  private transient PCollection<Integer> filtered;
+  private transient PCollection<Integer> filteredTimesTwo;
+  private transient PCollection<KV<String, Integer>> keyed;
 
-  private PCollection<Integer> intsToFlatten;
-  private PCollection<Integer> flattened;
+  private transient PCollection<Integer> intsToFlatten;
+  private transient PCollection<Integer> flattened;
 
-  private InMemoryWatermarkManager manager;
+  private transient InMemoryWatermarkManager manager;
 
   @Before
   public void setup() {
@@ -80,6 +94,12 @@ public void setup() {
     createdInts = p.apply("createdInts", Create.of(1, 2, 3));
 
     filtered = createdInts.apply("filtered", Filter.greaterThan(1));
+    filteredTimesTwo = filtered.apply("timesTwo", ParDo.of(new DoFn<Integer, Integer>() {
+      @Override
+      public void processElement(DoFn<Integer, Integer>.ProcessContext c) throws Exception {
+        c.output(c.element() * 2);
+      }}));
+
     keyed = createdInts.apply("keyed", WithKeys.<String, Integer>of("MyKey"));
 
     intsToFlatten = p.apply("intsToFlatten", Create.of(-1, 256, 65535));
@@ -91,12 +111,17 @@ public void setup() {
             createdInts.getProducingTransformInternal(),
             intsToFlatten.getProducingTransformInternal());
 
-    Map<PCollection<?>, Collection<AppliedPTransform<?, ?, ?>>> consumers = new HashMap<>();
+    Map<PValue, Collection<AppliedPTransform<?, ?, ?>>> consumers = new HashMap<>();
     consumers.put(
         createdInts,
-        ImmutableList.<AppliedPTransform<?, ?, ?>>of(filtered.getProducingTransformInternal(),
+        ImmutableList.<AppliedPTransform<?, ?, ?>>of(
+            filtered.getProducingTransformInternal(),
             keyed.getProducingTransformInternal(), flattened.getProducingTransformInternal()));
-    consumers.put(filtered, Collections.<AppliedPTransform<?, ?, ?>>emptyList());
+    consumers.put(
+        filtered,
+        Collections.<AppliedPTransform<?, ?, ?>>singleton(
+            filteredTimesTwo.getProducingTransformInternal()));
+    consumers.put(filteredTimesTwo, Collections.<AppliedPTransform<?, ?, ?>>emptyList());
     consumers.put(keyed, Collections.<AppliedPTransform<?, ?, ?>>emptyList());
 
     consumers.put(
@@ -133,11 +158,11 @@ public void getWatermarkForUntouchedTransform() {
   @Test
   public void getWatermarkForUpdatedSourceTransform() {
     CommittedBundle<Integer> output = globallyWindowedBundle(createdInts, 1);
-
-    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-        Collections.<CommittedBundle<?>>singleton(output), new Instant(8000L));
-    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-        Collections.<CommittedBundle<?>>singleton(output), new Instant(8000L));
+    manager.updateOutputWatermark(
+        createdInts.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(output),
+        new Instant(8000L));
     TransformWatermarks updatedSourceWatermark =
         manager.getWatermarks(createdInts.getProducingTransformInternal());
 
@@ -154,8 +179,9 @@ public void getWatermarkForMultiInputTransform() {
 
     manager.updateOutputWatermark(
         intsToFlatten.getProducingTransformInternal(),
+        TimerUpdate.empty(),
         Collections.<CommittedBundle<?>>singleton(secondPcollectionBundle),
-        new Instant(Long.MAX_VALUE));
+        BoundedWindow.TIMESTAMP_MAX_VALUE);
 
     // We didn't do anything for the first source, so we shouldn't have progressed the watermark
     TransformWatermarks firstSourceWatermark =
@@ -183,12 +209,17 @@ public void getWatermarkForMultiInputTransform() {
     CommittedBundle<Integer> flattenedBundleSecondCreate = globallyWindowedBundle(flattened, -1);
     // We have finished processing the bundle from the second PCollection, but we haven't consumed
     // anything from the first PCollection yet; so our watermark shouldn't advance
-    manager.updateWatermarks(secondPcollectionBundle, flattened.getProducingTransformInternal(),
-        Collections.<CommittedBundle<?>>singleton(flattenedBundleSecondCreate), null);
+    manager.updateWatermarks(
+        secondPcollectionBundle,
+        flattened.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(flattenedBundleSecondCreate),
+        null);
     TransformWatermarks transformAfterProcessing =
         manager.getWatermarks(flattened.getProducingTransformInternal());
     manager.updateWatermarks(secondPcollectionBundle, flattened.getProducingTransformInternal(),
-        Collections.<CommittedBundle<?>>singleton(flattenedBundleSecondCreate), null);
+        TimerUpdate.empty(), Collections.<CommittedBundle<?>>singleton(flattenedBundleSecondCreate),
+        null);
     assertThat(
         transformAfterProcessing.getInputWatermark(),
         not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
@@ -203,6 +234,7 @@ public void getWatermarkForMultiInputTransform() {
     // past the end of the global window
     manager.updateOutputWatermark(
         createdInts.getProducingTransformInternal(),
+        TimerUpdate.empty(),
         Collections.<CommittedBundle<?>>singleton(firstPcollectionBundle),
         new Instant(Long.MAX_VALUE));
     TransformWatermarks firstSourceWatermarks =
@@ -230,10 +262,12 @@ public void getWatermarkForMultiInputTransform() {
 
     CommittedBundle<?> completedFlattenBundle =
         InProcessBundle.unkeyed(flattened).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
-    manager.updateWatermarks(firstPcollectionBundle, flattened.getProducingTransformInternal(),
-        Collections.<CommittedBundle<?>>singleton(completedFlattenBundle), null);
-    manager.updateWatermarks(firstPcollectionBundle, flattened.getProducingTransformInternal(),
-        Collections.<CommittedBundle<?>>singleton(completedFlattenBundle), null);
+    manager.updateWatermarks(
+        firstPcollectionBundle,
+        flattened.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(completedFlattenBundle),
+        null);
     TransformWatermarks afterConsumingAllInput =
         manager.getWatermarks(flattened.getProducingTransformInternal());
     assertThat(
@@ -250,11 +284,17 @@ public void getWatermarkForMultiInputTransform() {
    */
   @Test
   public void getWatermarkForMultiConsumedCollection() {
-    CommittedBundle<Integer> createdBundle = timestampedBundle(createdInts,
-        TimestampedValue.of(1, new Instant(1_000_000L)), TimestampedValue.of(2, new Instant(1234L)),
-        TimestampedValue.of(3, new Instant(-1000L)));
-    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-        Collections.<CommittedBundle<?>>singleton(createdBundle), new Instant(Long.MAX_VALUE));
+CommittedBundle<Integer> createdBundle =
+        timestampedBundle(
+            createdInts,
+            TimestampedValue.of(1, new Instant(1_000_000L)),
+            TimestampedValue.of(2, new Instant(1234L)),
+            TimestampedValue.of(3, new Instant(-1000L)));
+    manager.updateOutputWatermark(
+        createdInts.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(createdBundle),
+        new Instant(Long.MAX_VALUE));
     TransformWatermarks createdAfterProducing =
         manager.getWatermarks(createdInts.getProducingTransformInternal());
     assertThat(
@@ -265,8 +305,12 @@ public void getWatermarkForMultiConsumedCollection() {
         timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)),
             TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)),
             TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
-    manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
-        Collections.<CommittedBundle<?>>singleton(keyBundle), null);
+    manager.updateWatermarks(
+        createdBundle,
+        keyed.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(keyBundle),
+        null);
     TransformWatermarks keyedWatermarks =
         manager.getWatermarks(keyed.getProducingTransformInternal());
     assertThat(
@@ -281,8 +325,12 @@ public void getWatermarkForMultiConsumedCollection() {
 
     CommittedBundle<Integer> filteredBundle =
         timestampedBundle(filtered, TimestampedValue.of(2, new Instant(1234L)));
-    manager.updateWatermarks(createdBundle, filtered.getProducingTransformInternal(),
-        Collections.<CommittedBundle<?>>singleton(filteredBundle), null);
+    manager.updateWatermarks(
+        createdBundle,
+        filtered.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(filteredBundle),
+        null);
     TransformWatermarks filteredProcessedWatermarks =
         manager.getWatermarks(filtered.getProducingTransformInternal());
     assertThat(
@@ -299,18 +347,30 @@ public void getWatermarkForMultiConsumedCollection() {
    */
   @Test
   public void updateWatermarkWithWatermarkHolds() {
-    CommittedBundle<Integer> createdBundle = timestampedBundle(createdInts,
-        TimestampedValue.of(1, new Instant(1_000_000L)), TimestampedValue.of(2, new Instant(1234L)),
-        TimestampedValue.of(3, new Instant(-1000L)));
-    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-        Collections.<CommittedBundle<?>>singleton(createdBundle), new Instant(Long.MAX_VALUE));
+    CommittedBundle<Integer> createdBundle =
+        timestampedBundle(
+            createdInts,
+            TimestampedValue.of(1, new Instant(1_000_000L)),
+            TimestampedValue.of(2, new Instant(1234L)),
+            TimestampedValue.of(3, new Instant(-1000L)));
+    manager.updateOutputWatermark(
+        createdInts.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(createdBundle),
+        new Instant(Long.MAX_VALUE));
 
     CommittedBundle<KV<String, Integer>> keyBundle =
-        timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)),
+        timestampedBundle(
+            keyed,
+            TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)),
             TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)),
             TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
-    manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
-        Collections.<CommittedBundle<?>>singleton(keyBundle), new Instant(500L));
+    manager.updateWatermarks(
+        createdBundle,
+        keyed.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(keyBundle),
+        new Instant(500L));
     TransformWatermarks keyedWatermarks =
         manager.getWatermarks(keyed.getProducingTransformInternal());
     assertThat(
@@ -318,6 +378,60 @@ public void updateWatermarkWithWatermarkHolds() {
     assertThat(keyedWatermarks.getOutputWatermark(), not(laterThan(new Instant(500L))));
   }
 
+  /**
+   * Demonstrates that the watermark of an {@link AppliedPTransform} is held to the provided
+   * watermark hold.
+   */
+  @Test
+  public void updateWatermarkWithKeyedWatermarkHolds() {
+    CommittedBundle<Integer> firstKeyBundle =
+        InProcessBundle.keyed(createdInts, "Odd")
+            .add(WindowedValue.timestampedValueInGlobalWindow(1, new Instant(1_000_000L)))
+            .add(WindowedValue.timestampedValueInGlobalWindow(3, new Instant(-1000L)))
+            .commit(Instant.now());
+
+    CommittedBundle<Integer> secondKeyBundle =
+        InProcessBundle.keyed(createdInts, "Even")
+            .add(WindowedValue.timestampedValueInGlobalWindow(2, new Instant(1234L)))
+            .commit(Instant.now());
+
+    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+        ImmutableList.of(firstKeyBundle, secondKeyBundle), BoundedWindow.TIMESTAMP_MAX_VALUE);
+
+    manager.updateWatermarks(firstKeyBundle, filtered.getProducingTransformInternal(),
+        TimerUpdate.empty(), Collections.<CommittedBundle<?>>emptyList(), new Instant(-1000L));
+    manager.updateWatermarks(secondKeyBundle, filtered.getProducingTransformInternal(),
+        TimerUpdate.empty(), Collections.<CommittedBundle<?>>emptyList(), new Instant(1234L));
+
+    TransformWatermarks filteredWatermarks =
+        manager.getWatermarks(filtered.getProducingTransformInternal());
+    assertThat(
+        filteredWatermarks.getInputWatermark(),
+        not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
+    assertThat(filteredWatermarks.getOutputWatermark(), not(laterThan(new Instant(-1000L))));
+
+    CommittedBundle<Integer> fauxFirstKeyTimerBundle =
+        InProcessBundle.keyed(createdInts, "Odd").commit(Instant.now());
+    manager.updateWatermarks(fauxFirstKeyTimerBundle, filtered.getProducingTransformInternal(),
+        TimerUpdate.empty(), Collections.<CommittedBundle<?>>emptyList(),
+        BoundedWindow.TIMESTAMP_MAX_VALUE);
+
+    assertThat(filteredWatermarks.getOutputWatermark(), equalTo(new Instant(1234L)));
+
+    CommittedBundle<Integer> fauxSecondKeyTimerBundle =
+        InProcessBundle.keyed(createdInts, "Even").commit(Instant.now());
+    manager.updateWatermarks(fauxSecondKeyTimerBundle, filtered.getProducingTransformInternal(),
+        TimerUpdate.empty(), Collections.<CommittedBundle<?>>emptyList(), new Instant(5678L));
+    assertThat(filteredWatermarks.getOutputWatermark(), equalTo(new Instant(5678L)));
+
+    manager.updateWatermarks(fauxSecondKeyTimerBundle, filtered.getProducingTransformInternal(),
+        TimerUpdate.empty(), Collections.<CommittedBundle<?>>emptyList(),
+        BoundedWindow.TIMESTAMP_MAX_VALUE);
+    assertThat(
+        filteredWatermarks.getOutputWatermark(),
+        not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
+  }
+
   /**
    * Demonstrates that updated output watermarks are monotonic in the presence of late data, when
    * called on an {@link AppliedPTransform} that consumes no input.
@@ -326,17 +440,23 @@ public void updateWatermarkWithWatermarkHolds() {
   public void updateOutputWatermarkShouldBeMonotonic() {
     CommittedBundle<?> firstInput =
         InProcessBundle.unkeyed(createdInts).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
-        manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-            Collections.<CommittedBundle<?>>singleton(firstInput), new Instant(0L));
-TransformWatermarks firstWatermarks =
+    manager.updateOutputWatermark(
+        createdInts.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(firstInput),
+        new Instant(0L));
+    TransformWatermarks firstWatermarks =
         manager.getWatermarks(createdInts.getProducingTransformInternal());
     assertThat(firstWatermarks.getOutputWatermark(), equalTo(new Instant(0L)));
 
     CommittedBundle<?> secondInput =
         InProcessBundle.unkeyed(createdInts).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
-        manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-            Collections.<CommittedBundle<?>>singleton(secondInput), new Instant(-250L));
-TransformWatermarks secondWatermarks =
+    manager.updateOutputWatermark(
+        createdInts.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(secondInput),
+        new Instant(-250L));
+    TransformWatermarks secondWatermarks =
         manager.getWatermarks(createdInts.getProducingTransformInternal());
     assertThat(secondWatermarks.getOutputWatermark(), not(earlierThan(new Instant(0L))));
   }
@@ -347,20 +467,30 @@ public void updateOutputWatermarkShouldBeMonotonic() {
    */
   @Test
   public void updateWatermarkWithHoldsShouldBeMonotonic() {
-    CommittedBundle<Integer> createdBundle = timestampedBundle(createdInts,
-        TimestampedValue.of(1, new Instant(1_000_000L)), TimestampedValue.of(2, new Instant(1234L)),
-        TimestampedValue.of(3, new Instant(-1000L)));
-    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-        Collections.<CommittedBundle<?>>singleton(createdBundle), new Instant(Long.MAX_VALUE));
+    CommittedBundle<Integer> createdBundle =
+        timestampedBundle(
+            createdInts,
+            TimestampedValue.of(1, new Instant(1_000_000L)),
+            TimestampedValue.of(2, new Instant(1234L)),
+            TimestampedValue.of(3, new Instant(-1000L)));
+    manager.updateOutputWatermark(
+        createdInts.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(createdBundle),
+        new Instant(Long.MAX_VALUE));
 
     CommittedBundle<KV<String, Integer>> keyBundle =
-        timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)),
+        timestampedBundle(
+            keyed,
+            TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)),
             TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)),
             TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
-    manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
-        Collections.<CommittedBundle<?>>singleton(keyBundle), new Instant(500L));
-    manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
-        Collections.<CommittedBundle<?>>singleton(keyBundle), new Instant(500L));
+    manager.updateWatermarks(
+        createdBundle,
+        keyed.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(keyBundle),
+        new Instant(500L));
     TransformWatermarks keyedWatermarks =
         manager.getWatermarks(keyed.getProducingTransformInternal());
     assertThat(
@@ -386,17 +516,23 @@ public void updateWatermarkWithLateData() {
     CommittedBundle<Integer> createdBundle = timestampedBundle(createdInts,
         TimestampedValue.of(1, sourceWatermark), TimestampedValue.of(2, new Instant(1234L)));
 
-    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-        Collections.<CommittedBundle<?>>singleton(createdBundle), sourceWatermark);
+    manager.updateOutputWatermark(
+        createdInts.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(createdBundle),
+        sourceWatermark);
 
     CommittedBundle<KV<String, Integer>> keyBundle =
         timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), sourceWatermark),
             TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)));
 
     // Finish processing the on-time data. The watermarks should progress to be equal to the source
-    manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
-        Collections.<CommittedBundle<?>>singleton(keyBundle), null);
-
+    manager.updateWatermarks(
+        createdBundle,
+        keyed.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(keyBundle),
+        null);
     TransformWatermarks onTimeWatermarks =
         manager.getWatermarks(keyed.getProducingTransformInternal());
     assertThat(onTimeWatermarks.getInputWatermark(), equalTo(sourceWatermark));
@@ -406,9 +542,12 @@ public void updateWatermarkWithLateData() {
         timestampedBundle(createdInts, TimestampedValue.of(3, new Instant(-1000L)));
     // the late data arrives in a downstream PCollection after its watermark has advanced past it;
     // we don't advance the watermark past the current watermark until we've consumed the late data
-        manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-            Collections.<CommittedBundle<?>>singleton(lateDataBundle), new Instant(2_000_000L));
-        TransformWatermarks bufferedLateWm =
+    manager.updateOutputWatermark(
+        createdInts.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(lateDataBundle),
+        new Instant(2_000_000L));
+    TransformWatermarks bufferedLateWm =
         manager.getWatermarks(createdInts.getProducingTransformInternal());
     assertThat(bufferedLateWm.getOutputWatermark(), equalTo(new Instant(2_000_000L)));
 
@@ -421,8 +560,12 @@ public void updateWatermarkWithLateData() {
 
     CommittedBundle<KV<String, Integer>> lateKeyedBundle =
         timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
-    manager.updateWatermarks(lateDataBundle, keyed.getProducingTransformInternal(),
-        Collections.<CommittedBundle<?>>singleton(lateKeyedBundle), null);
+    manager.updateWatermarks(
+        lateDataBundle,
+        keyed.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(lateKeyedBundle),
+        null);
   }
 
   /**
@@ -434,6 +577,7 @@ public void getWatermarksAfterOnlyEmptyOutput() {
     CommittedBundle<Integer> emptyCreateOutput = globallyWindowedBundle(createdInts);
     manager.updateOutputWatermark(
         createdInts.getProducingTransformInternal(),
+        TimerUpdate.empty(),
         Collections.<CommittedBundle<?>>singleton(emptyCreateOutput),
         BoundedWindow.TIMESTAMP_MAX_VALUE);
     TransformWatermarks updatedSourceWatermarks =
@@ -460,12 +604,19 @@ public void getWatermarksAfterOnlyEmptyOutput() {
   @Test
   public void getWatermarksAfterHoldAndEmptyOutput() {
     CommittedBundle<Integer> firstCreateOutput = globallyWindowedBundle(createdInts, 1, 2);
-    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-        Collections.<CommittedBundle<?>>singleton(firstCreateOutput), new Instant(12_000L));
+    manager.updateOutputWatermark(
+        createdInts.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(firstCreateOutput),
+        new Instant(12_000L));
 
     CommittedBundle<Integer> firstFilterOutput = globallyWindowedBundle(filtered);
-    manager.updateWatermarks(firstCreateOutput, filtered.getProducingTransformInternal(),
-        Collections.<CommittedBundle<?>>singleton(firstFilterOutput), new Instant(10_000L));
+    manager.updateWatermarks(
+        firstCreateOutput,
+        filtered.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(firstFilterOutput),
+        new Instant(10_000L));
     TransformWatermarks firstFilterWatermarks =
         manager.getWatermarks(filtered.getProducingTransformInternal());
     assertThat(firstFilterWatermarks.getInputWatermark(), not(earlierThan(new Instant(12_000L))));
@@ -474,6 +625,7 @@ public void getWatermarksAfterHoldAndEmptyOutput() {
     CommittedBundle<Integer> emptyCreateOutput = globallyWindowedBundle(createdInts);
     manager.updateOutputWatermark(
         createdInts.getProducingTransformInternal(),
+        TimerUpdate.empty(),
         Collections.<CommittedBundle<?>>singleton(emptyCreateOutput),
         BoundedWindow.TIMESTAMP_MAX_VALUE);
     TransformWatermarks updatedSourceWatermarks =
@@ -513,8 +665,11 @@ public void getSynchronizedProcessingTimeInputWatermarksHeldToPendingBundles() {
     CommittedBundle<Integer> createOutput =
         InProcessBundle.unkeyed(createdInts).commit(new Instant(1250L));
 
-    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-        Collections.<CommittedBundle<?>>singleton(createOutput), BoundedWindow.TIMESTAMP_MAX_VALUE);
+    manager.updateOutputWatermark(
+        createdInts.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(createOutput),
+        BoundedWindow.TIMESTAMP_MAX_VALUE);
     TransformWatermarks createAfterUpdate =
         manager.getWatermarks(createdInts.getProducingTransformInternal());
     assertThat(createAfterUpdate.getSynchronizedProcessingInputTime(), equalTo(Instant.now()));
@@ -539,7 +694,10 @@ public void getSynchronizedProcessingTimeInputWatermarksHeldToPendingBundles() {
 
     CommittedBundle<?> filterOutputBundle =
         InProcessBundle.unkeyed(intsToFlatten).commit(new Instant(1250L));
-    manager.updateWatermarks(createOutput, filtered.getProducingTransformInternal(),
+    manager.updateWatermarks(
+        createOutput,
+        filtered.getProducingTransformInternal(),
+        TimerUpdate.empty(),
         Collections.<CommittedBundle<?>>singleton(filterOutputBundle),
         BoundedWindow.TIMESTAMP_MAX_VALUE);
     TransformWatermarks filterAfterConsumed =
@@ -555,10 +713,88 @@ public void getSynchronizedProcessingTimeInputWatermarksHeldToPendingBundles() {
   /**
    * Demonstrates that the Synchronized Processing Time output watermark cannot progress past
    * pending timers in the same set. This propagates to all downstream SynchronizedProcessingTimes.
+   *
+   * <p>Also demonstrate that the result is monotonic.
    */
-  @Test
+//  @Test
   public void getSynchronizedProcessingTimeOutputHeldToPendingTimers() {
-    // TODO: Support Timers; add test
+    CommittedBundle<Integer> createdBundle = globallyWindowedBundle(createdInts, 1, 2, 4, 8);
+    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(createdBundle), new Instant(1248L));
+
+    TransformWatermarks filteredWms =
+        manager.getWatermarks(filtered.getProducingTransformInternal());
+    TransformWatermarks filteredDoubledWms =
+        manager.getWatermarks(filteredTimesTwo.getProducingTransformInternal());
+    Instant initialFilteredWm = filteredWms.getSynchronizedProcessingOutputTime();
+    Instant initialFilteredDoubledWm = filteredDoubledWms.getSynchronizedProcessingOutputTime();
+
+    CommittedBundle<Integer> filteredBundle = globallyWindowedBundle(filtered, 2, 8);
+    TimerData pastTimer =
+        TimerData.of(StateNamespaces.global(), new Instant(250L), TimeDomain.PROCESSING_TIME);
+    TimerData futureTimer =
+        TimerData.of(StateNamespaces.global(), new Instant(4096L), TimeDomain.PROCESSING_TIME);
+    TimerUpdate timers =
+        TimerUpdate.builder("key").setTimer(pastTimer).setTimer(futureTimer).build();
+    manager.updateWatermarks(createdBundle, filtered.getProducingTransformInternal(), timers,
+        Collections.<CommittedBundle<?>>singleton(filteredBundle),
+        BoundedWindow.TIMESTAMP_MAX_VALUE);
+    DateTimeUtils.setCurrentMillisFixed(INITIAL_PROCESSING_TIME.plus(250L).getMillis());
+    // We're held based on the past timer
+    assertThat(
+        filteredWms.getSynchronizedProcessingOutputTime(), not(laterThan(INITIAL_PROCESSING_TIME)));
+    assertThat(
+        filteredDoubledWms.getSynchronizedProcessingOutputTime(),
+        not(laterThan(INITIAL_PROCESSING_TIME)));
+    // And we're monotonic
+    assertThat(
+        filteredWms.getSynchronizedProcessingOutputTime(), not(earlierThan(initialFilteredWm)));
+    assertThat(
+        filteredDoubledWms.getSynchronizedProcessingOutputTime(),
+        not(earlierThan(initialFilteredDoubledWm)));
+
+    Map<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> firedTimers =
+        manager.extractFiredTimers();
+    assertThat(
+        firedTimers.get(filtered.getProducingTransformInternal())
+            .get("key")
+            .getTimers(TimeDomain.PROCESSING_TIME),
+        contains(pastTimer));
+    // Our timer has fired, but has not been completed, so it holds our synchronized processing WM
+    assertThat(
+        filteredWms.getSynchronizedProcessingOutputTime(), not(laterThan(INITIAL_PROCESSING_TIME)));
+    assertThat(
+        filteredDoubledWms.getSynchronizedProcessingOutputTime(),
+        not(laterThan(INITIAL_PROCESSING_TIME)));
+
+    CommittedBundle<Integer> filteredTimerBundle =
+        InProcessBundle.keyed(filtered, "key").commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
+    CommittedBundle<Integer> filteredTimerResult =
+        InProcessBundle.keyed(filteredTimesTwo, "key")
+            .commit(filteredWms.getSynchronizedProcessingOutputTime());
+    // Complete the processing time timer
+    manager.updateWatermarks(filteredTimerBundle, filtered.getProducingTransformInternal(),
+        TimerUpdate.builder("key")
+            .withCompletedTimers(Collections.<TimerData>singleton(pastTimer))
+            .build(),
+        Collections.<CommittedBundle<?>>singleton(filteredTimerResult),
+        BoundedWindow.TIMESTAMP_MAX_VALUE);
+
+    DateTimeUtils.setCurrentMillisFixed(INITIAL_PROCESSING_TIME.plus(500L).getMillis());
+    assertThat(filteredWms.getSynchronizedProcessingOutputTime(), not(laterThan(Instant.now())));
+    // filtered should be held to the time at which the filteredTimerResult fired
+    assertThat(
+        filteredDoubledWms.getSynchronizedProcessingOutputTime(), not(INITIAL_PROCESSING_TIME));
+
+    manager.updateWatermarks(filteredTimerResult, filteredTimesTwo.getProducingTransformInternal(),
+        TimerUpdate.empty(), Collections.<CommittedBundle<?>>emptyList(),
+        BoundedWindow.TIMESTAMP_MAX_VALUE);
+    assertThat(filteredDoubledWms.getSynchronizedProcessingOutputTime(), equalTo(Instant.now()));
+
+    DateTimeUtils.setCurrentMillisFixed(Long.MAX_VALUE);
+    assertThat(filteredWms.getSynchronizedProcessingOutputTime(), equalTo(new Instant(4096)));
+    assertThat(
+        filteredDoubledWms.getSynchronizedProcessingOutputTime(), equalTo(new Instant(4096)));
   }
 
   /**
@@ -584,8 +820,11 @@ public void getSynchronizedProcessingTimeOutputTimeIsMonotonic() {
     CommittedBundle<Integer> createOutput =
         InProcessBundle.unkeyed(createdInts).commit(new Instant(1250L));
 
-    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(),
-        Collections.<CommittedBundle<?>>singleton(createOutput), BoundedWindow.TIMESTAMP_MAX_VALUE);
+    manager.updateOutputWatermark(
+        createdInts.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(createOutput),
+        BoundedWindow.TIMESTAMP_MAX_VALUE);
     TransformWatermarks createAfterUpdate =
         manager.getWatermarks(createdInts.getProducingTransformInternal());
     assertThat(
@@ -597,6 +836,7 @@ public void getSynchronizedProcessingTimeOutputTimeIsMonotonic() {
         InProcessBundle.unkeyed(createdInts).commit(new Instant(750L));
     manager.updateOutputWatermark(
         createdInts.getProducingTransformInternal(),
+        TimerUpdate.empty(),
         Collections.<CommittedBundle<?>>singleton(createSecondOutput),
         BoundedWindow.TIMESTAMP_MAX_VALUE);
 
@@ -604,7 +844,327 @@ public void getSynchronizedProcessingTimeOutputTimeIsMonotonic() {
   }
 
   @Test
-  public void getSynchronizedProcessingTimeInputIsHeldToEarliestUpstream() {}
+  public void synchronizedProcessingInputTimeIsHeldToUpstreamProcessingTimeTimers() {
+    CommittedBundle<Integer> created = globallyWindowedBundle(createdInts, 1, 2, 3);
+    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(created), new Instant(40_900L));
+
+    CommittedBundle<Integer> filteredBundle = globallyWindowedBundle(filtered, 2, 4);
+    Instant upstreamHold = new Instant(2048L);
+    TimerData upstreamProcessingTimer =
+        TimerData.of(StateNamespaces.global(), upstreamHold, TimeDomain.PROCESSING_TIME);
+    manager.updateWatermarks(created, filtered.getProducingTransformInternal(),
+        TimerUpdate.builder("key")
+            .setTimer(upstreamProcessingTimer)
+            .build(),
+        Collections.<CommittedBundle<?>>singleton(filteredBundle),
+        BoundedWindow.TIMESTAMP_MAX_VALUE);
+
+    TransformWatermarks downstreamWms =
+        manager.getWatermarks(filteredTimesTwo.getProducingTransformInternal());
+    assertThat(downstreamWms.getSynchronizedProcessingInputTime(), equalTo(Instant.now()));
+
+    DateTimeUtils.setCurrentMillisFixed(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis());
+    assertThat(downstreamWms.getSynchronizedProcessingInputTime(), equalTo(upstreamHold));
+
+    manager.extractFiredTimers();
+    // Pending processing time timers that have been fired but aren't completed hold the
+    // synchronized processing time
+    assertThat(downstreamWms.getSynchronizedProcessingInputTime(), equalTo(upstreamHold));
+
+    CommittedBundle<Integer> otherCreated = globallyWindowedBundle(createdInts, 4, 8, 12);
+    manager.updateWatermarks(otherCreated, filtered.getProducingTransformInternal(),
+        TimerUpdate.builder("key")
+            .withCompletedTimers(Collections.singleton(upstreamProcessingTimer))
+            .build(),
+        Collections.<CommittedBundle<?>>emptyList(), BoundedWindow.TIMESTAMP_MAX_VALUE);
+
+    assertThat(downstreamWms.getSynchronizedProcessingInputTime(), not(earlierThan(Instant.now())));
+  }
+
+  @Test
+  public void synchronizedProcessingInputTimeIsHeldToPendingBundleTimes() {
+    CommittedBundle<Integer> created = globallyWindowedBundle(createdInts, 1, 2, 3);
+    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(created), new Instant(29_919_235L));
+
+    Instant upstreamHold = new Instant(2048L);
+    CommittedBundle<Integer> filteredBundle =
+        InProcessBundle.keyed(filtered, "key").commit(upstreamHold);
+    manager.updateWatermarks(created, filtered.getProducingTransformInternal(), TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(filteredBundle),
+        BoundedWindow.TIMESTAMP_MAX_VALUE);
+
+    TransformWatermarks downstreamWms =
+        manager.getWatermarks(filteredTimesTwo.getProducingTransformInternal());
+    assertThat(downstreamWms.getSynchronizedProcessingInputTime(), equalTo(Instant.now()));
+
+    DateTimeUtils.setCurrentMillisFixed(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis());
+    assertThat(downstreamWms.getSynchronizedProcessingInputTime(), equalTo(upstreamHold));
+  }
+
+  @Test
+  public void extractFiredTimersReturnsFiredEventTimeTimers() {
+    Map<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> initialTimers =
+        manager.extractFiredTimers();
+    // Watermarks haven't advanced
+    assertThat(initialTimers.entrySet(), emptyIterable());
+
+    // Advance WM of keyed past the first timer, but ahead of the second and third
+    CommittedBundle<Integer> createdBundle = globallyWindowedBundle(filtered);
+    manager.updateOutputWatermark(
+        createdInts.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.singleton(createdBundle),
+        new Instant(1500L));
+
+    TimerData earliestTimer =
+        TimerData.of(StateNamespaces.global(), new Instant(1000), TimeDomain.EVENT_TIME);
+    TimerData middleTimer =
+        TimerData.of(StateNamespaces.global(), new Instant(5000L), TimeDomain.EVENT_TIME);
+    TimerData lastTimer =
+        TimerData.of(StateNamespaces.global(), new Instant(10000L), TimeDomain.EVENT_TIME);
+    Object key = new Object();
+    TimerUpdate update =
+        TimerUpdate.builder(key)
+            .setTimer(earliestTimer)
+            .setTimer(middleTimer)
+            .setTimer(lastTimer)
+            .build();
+
+    manager.updateWatermarks(
+        createdBundle,
+        filtered.getProducingTransformInternal(),
+        update,
+        Collections.<CommittedBundle<?>>singleton(globallyWindowedBundle(intsToFlatten)),
+        new Instant(1000L));
+
+    Map<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> firstTransformFiredTimers =
+        manager.extractFiredTimers();
+    assertThat(
+        firstTransformFiredTimers.get(filtered.getProducingTransformInternal()), not(nullValue()));
+    Map<Object, FiredTimers> firstFilteredTimers =
+        firstTransformFiredTimers.get(filtered.getProducingTransformInternal());
+    assertThat(firstFilteredTimers.get(key), not(nullValue()));
+    FiredTimers firstFired = firstFilteredTimers.get(key);
+    assertThat(firstFired.getTimers(TimeDomain.EVENT_TIME), contains(earliestTimer));
+
+    manager.updateOutputWatermark(
+        createdInts.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>emptyList(),
+        new Instant(50_000L));
+    Map<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> secondTransformFiredTimers =
+        manager.extractFiredTimers();
+    assertThat(
+        secondTransformFiredTimers.get(filtered.getProducingTransformInternal()), not(nullValue()));
+    Map<Object, FiredTimers> secondFilteredTimers =
+        secondTransformFiredTimers.get(filtered.getProducingTransformInternal());
+    assertThat(secondFilteredTimers.get(key), not(nullValue()));
+    FiredTimers secondFired = secondFilteredTimers.get(key);
+    // Contains, in order, middleTimer and then lastTimer
+    assertThat(secondFired.getTimers(TimeDomain.EVENT_TIME), contains(middleTimer, lastTimer));
+  }
+
+  @Test
+  public void extractFiredTimersReturnsFiredProcessingTimeTimers() {
+    Map<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> initialTimers =
+        manager.extractFiredTimers();
+    // Watermarks haven't advanced
+    assertThat(initialTimers.entrySet(), emptyIterable());
+
+    // Advance WM of keyed past the first timer, but ahead of the second and third
+    CommittedBundle<Integer> createdBundle = globallyWindowedBundle(filtered);
+    manager.updateOutputWatermark(
+        createdInts.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.singleton(createdBundle),
+        new Instant(1500L));
+
+    TimerData earliestTimer =
+        TimerData.of(StateNamespaces.global(), new Instant(999L), TimeDomain.PROCESSING_TIME);
+    TimerData middleTimer =
+        TimerData.of(StateNamespaces.global(), new Instant(5000L), TimeDomain.PROCESSING_TIME);
+    TimerData lastTimer =
+        TimerData.of(StateNamespaces.global(), new Instant(10000L), TimeDomain.PROCESSING_TIME);
+    Object key = new Object();
+    TimerUpdate update =
+        TimerUpdate.builder(key)
+            .setTimer(lastTimer)
+            .setTimer(earliestTimer)
+            .setTimer(middleTimer).build();
+
+    manager.updateWatermarks(
+        createdBundle,
+        filtered.getProducingTransformInternal(),
+        update,
+        Collections.<CommittedBundle<?>>singleton(globallyWindowedBundle(intsToFlatten)),
+        new Instant(1000L));
+
+    Map<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> firstTransformFiredTimers =
+        manager.extractFiredTimers();
+    assertThat(
+        firstTransformFiredTimers.get(filtered.getProducingTransformInternal()), not(nullValue()));
+    Map<Object, FiredTimers> firstFilteredTimers =
+        firstTransformFiredTimers.get(filtered.getProducingTransformInternal());
+    assertThat(firstFilteredTimers.get(key), not(nullValue()));
+    FiredTimers firstFired = firstFilteredTimers.get(key);
+    assertThat(firstFired.getTimers(TimeDomain.PROCESSING_TIME), contains(earliestTimer));
+
+    DateTimeUtils.setCurrentMillisFixed(50_000L);
+    manager.updateOutputWatermark(
+        createdInts.getProducingTransformInternal(),
+        TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>emptyList(),
+        new Instant(50_000L));
+    Map<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> secondTransformFiredTimers =
+        manager.extractFiredTimers();
+    assertThat(
+        secondTransformFiredTimers.get(filtered.getProducingTransformInternal()), not(nullValue()));
+    Map<Object, FiredTimers> secondFilteredTimers =
+        secondTransformFiredTimers.get(filtered.getProducingTransformInternal());
+    assertThat(secondFilteredTimers.get(key), not(nullValue()));
+    FiredTimers secondFired = secondFilteredTimers.get(key);
+    // Contains, in order, middleTimer and then lastTimer
+    assertThat(secondFired.getTimers(TimeDomain.PROCESSING_TIME), contains(middleTimer, lastTimer));
+  }
+
+  @Test
+  public void extractFiredTimersReturnsFiredSynchronizedProcessingTimeTimers() {
+    Map<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> initialTimers =
+        manager.extractFiredTimers();
+    // Watermarks haven't advanced
+    assertThat(initialTimers.entrySet(), emptyIterable());
+
+    // Advance WM of keyed past the first timer, but ahead of the second and third
+    CommittedBundle<Integer> createdBundle = globallyWindowedBundle(filtered);
+    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+        Collections.singleton(createdBundle), new Instant(1500L));
+
+    TimerData earliestTimer = TimerData.of(
+        StateNamespaces.global(), new Instant(999L), TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+    TimerData middleTimer = TimerData.of(
+        StateNamespaces.global(), new Instant(5000L), TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+    TimerData lastTimer = TimerData.of(
+        StateNamespaces.global(), new Instant(10000L), TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+    Object key = new Object();
+    TimerUpdate update =
+        TimerUpdate.builder(key)
+            .setTimer(lastTimer)
+            .setTimer(earliestTimer)
+            .setTimer(middleTimer).build();
+
+    manager.updateWatermarks(createdBundle, filtered.getProducingTransformInternal(), update,
+        Collections.<CommittedBundle<?>>singleton(globallyWindowedBundle(intsToFlatten)),
+        new Instant(1000L));
+
+    Map<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> firstTransformFiredTimers =
+        manager.extractFiredTimers();
+    assertThat(
+        firstTransformFiredTimers.get(filtered.getProducingTransformInternal()), not(nullValue()));
+    Map<Object, FiredTimers> firstFilteredTimers =
+        firstTransformFiredTimers.get(filtered.getProducingTransformInternal());
+    assertThat(firstFilteredTimers.get(key), not(nullValue()));
+    FiredTimers firstFired = firstFilteredTimers.get(key);
+    assertThat(
+        firstFired.getTimers(TimeDomain.SYNCHRONIZED_PROCESSING_TIME), contains(earliestTimer));
+
+    DateTimeUtils.setCurrentMillisFixed(50_000L);
+    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>emptyList(), new Instant(50_000L));
+    Map<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> secondTransformFiredTimers =
+        manager.extractFiredTimers();
+    assertThat(
+        secondTransformFiredTimers.get(filtered.getProducingTransformInternal()), not(nullValue()));
+    Map<Object, FiredTimers> secondFilteredTimers =
+        secondTransformFiredTimers.get(filtered.getProducingTransformInternal());
+    assertThat(secondFilteredTimers.get(key), not(nullValue()));
+    FiredTimers secondFired = secondFilteredTimers.get(key);
+    // Contains, in order, middleTimer and then lastTimer
+    assertThat(
+        secondFired.getTimers(TimeDomain.SYNCHRONIZED_PROCESSING_TIME),
+        contains(middleTimer, lastTimer));
+  }
+
+  @Test
+  public void timerUpdateBuilderBuildAddsAllAddedTimers() {
+    TimerData set = TimerData.of(StateNamespaces.global(), new Instant(10L), TimeDomain.EVENT_TIME);
+    TimerData deleted =
+        TimerData.of(StateNamespaces.global(), new Instant(24L), TimeDomain.PROCESSING_TIME);
+    TimerData completedOne = TimerData.of(
+        StateNamespaces.global(), new Instant(1024L), TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+    TimerData completedTwo =
+        TimerData.of(StateNamespaces.global(), new Instant(2048L), TimeDomain.EVENT_TIME);
+
+    TimerUpdate update =
+        TimerUpdate.builder("foo")
+            .withCompletedTimers(ImmutableList.of(completedOne, completedTwo))
+            .setTimer(set)
+            .deletedTimer(deleted)
+            .build();
+
+    assertThat(update.getCompletedTimers(), containsInAnyOrder(completedOne, completedTwo));
+    assertThat(update.getSetTimers(), contains(set));
+    assertThat(update.getDeletedTimers(), contains(deleted));
+  }
+
+  @Test
+  public void timerUpdateBuilderWithSetThenDeleteHasOnlyDeleted() {
+    TimerUpdateBuilder builder = TimerUpdate.builder(null);
+    TimerData timer = TimerData.of(StateNamespaces.global(), Instant.now(), TimeDomain.EVENT_TIME);
+
+    TimerUpdate built = builder.setTimer(timer).deletedTimer(timer).build();
+
+    assertThat(built.getSetTimers(), emptyIterable());
+    assertThat(built.getDeletedTimers(), contains(timer));
+  }
+
+  @Test
+  public void timerUpdateBuilderWithDeleteThenSetHasOnlySet() {
+    TimerUpdateBuilder builder = TimerUpdate.builder(null);
+    TimerData timer = TimerData.of(StateNamespaces.global(), Instant.now(), TimeDomain.EVENT_TIME);
+
+    TimerUpdate built = builder.deletedTimer(timer).setTimer(timer).build();
+
+    assertThat(built.getSetTimers(), contains(timer));
+    assertThat(built.getDeletedTimers(), emptyIterable());
+  }
+
+  @Test
+  public void timerUpdateBuilderWithSetAfterBuildNotAddedToBuilt() {
+    TimerUpdateBuilder builder = TimerUpdate.builder(null);
+    TimerData timer = TimerData.of(StateNamespaces.global(), Instant.now(), TimeDomain.EVENT_TIME);
+
+    TimerUpdate built = builder.build();
+    builder.setTimer(timer);
+    assertThat(built.getSetTimers(), emptyIterable());
+    builder.build();
+    assertThat(built.getSetTimers(), emptyIterable());
+  }
+
+  @Test
+  public void timerUpdateBuilderWithDeleteAfterBuildNotAddedToBuilt() {
+    TimerUpdateBuilder builder = TimerUpdate.builder(null);
+    TimerData timer = TimerData.of(StateNamespaces.global(), Instant.now(), TimeDomain.EVENT_TIME);
+
+    TimerUpdate built = builder.build();
+    builder.deletedTimer(timer);
+    assertThat(built.getDeletedTimers(), emptyIterable());
+    builder.build();
+    assertThat(built.getDeletedTimers(), emptyIterable());
+  }
+
+  @Test
+  public void timerUpdateBuilderWithCompletedAfterBuildNotAddedToBuilt() {
+    TimerUpdateBuilder builder = TimerUpdate.builder(null);
+    TimerData timer = TimerData.of(StateNamespaces.global(), Instant.now(), TimeDomain.EVENT_TIME);
+
+    TimerUpdate built = builder.build();
+    builder.withCompletedTimers(ImmutableList.of(timer));
+    assertThat(built.getCompletedTimers(), emptyIterable());
+    builder.build();
+    assertThat(built.getCompletedTimers(), emptyIterable());
+  }
 
   private static Matcher<Instant> earlierThan(final Instant laterInstant) {
     return new BaseMatcher<Instant>() {

From 62e516b568b9fb4967e52826658200f1ff6d9e98 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 10 Feb 2016 10:11:40 -0800
Subject: [PATCH 1424/1541] Move over worker logging classes to worker maven
 module

This is for Apache Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114339317
---
 .../logging/DataflowWorkerLoggingHandler.java | 258 ------------------
 .../DataflowWorkerLoggingInitializer.java     | 148 ----------
 .../JulLoggerPrintStreamAdapterFactory.java   | 128 ---------
 .../DataflowWorkerLoggingHandlerTest.java     | 246 -----------------
 .../DataflowWorkerLoggingInitializerTest.java | 139 ----------
 ...ulLoggerPrintStreamAdapterFactoryTest.java |  82 ------
 6 files changed, 1001 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandler.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/JulLoggerPrintStreamAdapterFactory.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandlerTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/JulLoggerPrintStreamAdapterFactoryTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandler.java
deleted file mode 100644
index 85d2b1b1698c6..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandler.java
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker.logging;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingInitializer.LEVELS;
-
-import com.google.common.base.MoreObjects;
-import com.google.common.base.Supplier;
-import com.google.common.io.CountingOutputStream;
-
-import com.fasterxml.jackson.core.JsonEncoding;
-import com.fasterxml.jackson.core.JsonFactory;
-import com.fasterxml.jackson.core.JsonGenerator;
-import com.fasterxml.jackson.core.util.MinimalPrettyPrinter;
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-import java.io.BufferedOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.logging.ErrorManager;
-import java.util.logging.Handler;
-import java.util.logging.LogRecord;
-
-/**
- * Formats {@link LogRecord} into JSON format for Cloud Logging.
- * Any exception is represented using {@link Throwable#printStackTrace()}.
- */
-public class DataflowWorkerLoggingHandler extends Handler {
-  /**
-   * Formats the throwable as per {@link Throwable#printStackTrace()}.
-   *
-   * @param thrown The throwable to format.
-   * @return A string containing the contents of {@link Throwable#printStackTrace()}.
-   */
-  public static String formatException(Throwable thrown) {
-    if (thrown == null) {
-      return null;
-    }
-    StringWriter sw = new StringWriter();
-    PrintWriter pw = new PrintWriter(sw);
-    thrown.printStackTrace(pw);
-    pw.close();
-    return sw.toString();
-  }
-
-  /**
-   * Constructs a handler that writes to a rotating set of files.
-   */
-  public DataflowWorkerLoggingHandler(String filename, long sizeLimit) throws IOException {
-    this(new FileOutputStreamFactory(filename), sizeLimit);
-  }
-
-  /**
-   * Constructs a handler that writes to arbitrary output streams. No rollover if sizeLimit is
-   * zero or negative.
-   */
-  DataflowWorkerLoggingHandler(Supplier<OutputStream> factory, long sizeLimit)
-      throws IOException {
-    this.outputStreamFactory = factory;
-    this.generatorFactory = new ObjectMapper().getFactory();
-    this.sizeLimit = sizeLimit < 1 ? Long.MAX_VALUE : sizeLimit;
-    createOutputStream();
-  }
-
-  @Override
-  public synchronized void publish(LogRecord record) {
-    if (!isLoggable(record)) {
-      return;
-    }
-
-    rolloverOutputStreamIfNeeded();
-
-    try {
-      // Generating a JSON map like:
-      // {"timestamp": {"seconds": 1435835832, "nanos": 123456789}, ...  "message": "hello"}
-      generator.writeStartObject();
-      // Write the timestamp.
-      generator.writeFieldName("timestamp");
-      generator.writeStartObject();
-      generator.writeNumberField("seconds", record.getMillis() / 1000);
-      generator.writeNumberField("nanos", (record.getMillis() % 1000) * 1000000);
-      generator.writeEndObject();
-      // Write the severity.
-      generator.writeObjectField(
-          "severity",
-          MoreObjects.firstNonNull(LEVELS.get(record.getLevel()), record.getLevel().getName()));
-      // Write the other labels.
-      writeIfNotNull("message", record.getMessage());
-      writeIfNotNull("thread", String.valueOf(record.getThreadID()));
-      writeIfNotNull("job", DataflowWorkerLoggingMDC.getJobId());
-      writeIfNotNull("stage", DataflowWorkerLoggingMDC.getStageName());
-      writeIfNotNull("step", DataflowWorkerLoggingMDC.getStepName());
-      writeIfNotNull("worker", DataflowWorkerLoggingMDC.getWorkerId());
-      writeIfNotNull("work", DataflowWorkerLoggingMDC.getWorkId());
-      writeIfNotNull("logger", record.getLoggerName());
-      writeIfNotNull("exception", formatException(record.getThrown()));
-      generator.writeEndObject();
-      generator.writeRaw(System.lineSeparator());
-    } catch (IOException | RuntimeException e) {
-      reportFailure("Unable to publish", e, ErrorManager.WRITE_FAILURE);
-    }
-
-    // This implementation is based on that of java.util.logging.FileHandler, which flushes in a
-    // synchronized context like this. Unfortunately the maximum throughput for generating log
-    // entries will be the inverse of the flush latency. That could be as little as one hundred
-    // log entries per second on some systems. For higher throughput this should be changed to
-    // batch publish operations while writes and flushes are in flight on a different thread.
-    flush();
-  }
-
-  /**
-   * Check if a LogRecord will be logged.
-   *
-   * <p>This method checks if the <tt>LogRecord</tt> has an appropriate level and
-   * whether it satisfies any <tt>Filter</tt>.  It will also return false if
-   * the handler has been closed, or the LogRecord is null.
-   */
-  @Override
-  public boolean isLoggable(LogRecord record) {
-    return generator != null && record != null && super.isLoggable(record);
-  }
-
-  @Override
-  public synchronized void flush() {
-    try {
-      if (generator != null) {
-        generator.flush();
-      }
-    } catch (IOException | RuntimeException e) {
-      reportFailure("Unable to flush", e, ErrorManager.FLUSH_FAILURE);
-    }
-}
-
-  @Override
-  public synchronized void close() {
-    // Flush any in-flight content, though there should not actually be any because
-    // the generator is currently flushed in the synchronized publish() method.
-    flush();
-    // Close the generator and log file.
-    try {
-      if (generator != null) {
-        generator.close();
-      }
-    } catch (IOException | RuntimeException e) {
-      reportFailure("Unable to close", e, ErrorManager.CLOSE_FAILURE);
-    } finally {
-      generator = null;
-      counter = null;
-    }
-  }
-
-  /**
-   * Unique file generator. Uses filenames with timestamp.
-   */
-  private static final class FileOutputStreamFactory implements Supplier<OutputStream> {
-    private final String filepath;
-    private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy_MM_dd_hh_mm_ss_SSS");
-
-    public FileOutputStreamFactory(String filepath) {
-      this.filepath = filepath;
-    }
-
-    @Override
-    public OutputStream get() {
-      try {
-        String filename = filepath + "." + formatter.format(new Date());
-        return new BufferedOutputStream(
-            new FileOutputStream(new File(filename), true /* append */));
-      } catch (IOException e) {
-        throw new RuntimeException(e);
-      }
-    }
-  }
-
-  private void createOutputStream() throws IOException {
-    CountingOutputStream stream = new CountingOutputStream(outputStreamFactory.get());
-    generator = generatorFactory.createGenerator(stream, JsonEncoding.UTF8);
-    counter = stream;
-
-    // Avoid 1 space indent for every line. We already add a newline after each log record.
-    generator.setPrettyPrinter(new MinimalPrettyPrinter(""));
-  }
-
-  /**
-   * Rollover to a new output stream (log file) if we have reached the size limit. Ensure that
-   * the rollover fails or succeeds atomically.
-   */
-  private void rolloverOutputStreamIfNeeded() {
-    if (counter.getCount() < sizeLimit) {
-      return;
-    }
-
-    try {
-      JsonGenerator old = generator;
-      createOutputStream();
-
-      try {
-        // Rollover successful. Attempt to close old stream, but ignore on failure.
-        old.close();
-      } catch (IOException | RuntimeException e) {
-        reportFailure("Unable to close old log file", e, ErrorManager.CLOSE_FAILURE);
-      }
-    } catch (IOException | RuntimeException e) {
-      reportFailure("Unable to create new log file", e, ErrorManager.OPEN_FAILURE);
-    }
-  }
-
-  /**
-   * Appends a JSON key/value pair if the specified val is not null.
-   */
-  private void writeIfNotNull(String name, String val) throws IOException {
-    if (val != null) {
-      generator.writeStringField(name, val);
-    }
-  }
-
-  /**
-   * Report logging failure to ErrorManager. Does not throw.
-   */
-  private void reportFailure(String message, Exception e, int code) {
-    try {
-      ErrorManager manager = getErrorManager();
-      if (manager != null) {
-        manager.error(message, e, code);
-      }
-    } catch (Throwable t) {
-      // Failed to report logging failure. No meaningful action left.
-    }
-  }
-
-  // Null after close().
-  private JsonGenerator generator;
-  private CountingOutputStream counter;
-
-  private final long sizeLimit;
-  private final Supplier<OutputStream> outputStreamFactory;
-  private final JsonFactory generatorFactory;
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
deleted file mode 100644
index 167859e0a03b1..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializer.java
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker.logging;
-
-import static com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.Level.DEBUG;
-import static com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.Level.ERROR;
-import static com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.Level.INFO;
-import static com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.Level.TRACE;
-import static com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.Level.WARN;
-
-import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions;
-import com.google.common.collect.ImmutableBiMap;
-import com.google.common.collect.Lists;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.List;
-import java.util.Map;
-import java.util.logging.Handler;
-import java.util.logging.Level;
-import java.util.logging.LogManager;
-import java.util.logging.Logger;
-
-/**
- * Sets up {@link java.util.logging} configuration on the Dataflow worker with a rotating
- * file logger. The file logger uses the {@link DataflowWorkerLoggingHandler} format.
- * A user can override the logging level by customizing the options found within
- * {@link DataflowWorkerLoggingOptions}. A user can override the location by specifying the
- * Java system property "dataflow.worker.logging.basepath" and the file size in MB before
- * rolling over to a new file by specifying the Java system property "dataflow.worker.
- * loggging.filesize_mb". The default log level is INFO, the default location is a file
- * named dataflow-json.log within the system temporary directory and the default file size
- * is 1 GB.
- */
-public class DataflowWorkerLoggingInitializer {
-  private static final String ROOT_LOGGER_NAME = "";
-  private static final String DEFAULT_LOGGING_LOCATION =
-      new File(System.getProperty("java.io.tmpdir"), "dataflow-json.log").getPath();
-  private static final String FILEPATH_PROPERTY = "dataflow.worker.logging.filepath";
-  private static final String FILESIZE_MB_PROPERTY = "dataflow.worker.logging.filesize_mb";
-
-  static final ImmutableBiMap<Level, DataflowWorkerLoggingOptions.Level> LEVELS =
-      ImmutableBiMap.<Level, DataflowWorkerLoggingOptions.Level>builder()
-          .put(Level.SEVERE, ERROR)
-          .put(Level.WARNING, WARN)
-          .put(Level.INFO, INFO)
-          .put(Level.FINE, DEBUG)
-          .put(Level.FINEST, TRACE)
-          .build();
-
-  /**
-   * This default log level is overridden by the log level found at
-   * {@code DataflowWorkerLoggingOptions#getDefaultWorkerLogLevel()}.
-   */
-  private static final DataflowWorkerLoggingOptions.Level DEFAULT_LOG_LEVEL =
-      LEVELS.get(Level.INFO);
-
-  /* We need to store a reference to the configured loggers so that they are not
-   * garbage collected. java.util.logging only has weak references to the loggers
-   * so if they are garbage collection, our hierarchical configuration will be lost. */
-  private static List<Logger> configuredLoggers = Lists.newArrayList();
-  private static PrintStream originalStdOut;
-  private static PrintStream originalStdErr;
-  private static boolean initialized = false;
-
-  /**
-   * Sets up the initial logging configuration.
-   */
-  public static synchronized void initialize() {
-    if (initialized) {
-      return;
-    }
-
-    try {
-      String filepath = System.getProperty(FILEPATH_PROPERTY, DEFAULT_LOGGING_LOCATION);
-      int filesizeMb = Integer.parseInt(System.getProperty(FILESIZE_MB_PROPERTY, "1024"));
-
-      DataflowWorkerLoggingHandler loggingHandler =
-          new DataflowWorkerLoggingHandler(filepath, filesizeMb * 1024 * 1024);
-      loggingHandler.setLevel(Level.ALL);
-
-      // Reset the global log manager, get the root logger and remove the default log handlers.
-      LogManager logManager = LogManager.getLogManager();
-      logManager.reset();
-      Logger rootLogger = logManager.getLogger(ROOT_LOGGER_NAME);
-      for (Handler handler : rootLogger.getHandlers()) {
-        rootLogger.removeHandler(handler);
-      }
-
-      Level logLevel = LEVELS.inverse().get(DEFAULT_LOG_LEVEL);
-      rootLogger.setLevel(logLevel);
-      rootLogger.addHandler(loggingHandler);
-
-      originalStdOut = System.out;
-      originalStdErr = System.err;
-      System.setOut(JulLoggerPrintStreamAdapterFactory.create("System.out", Level.INFO));
-      System.setErr(JulLoggerPrintStreamAdapterFactory.create("System.err", Level.SEVERE));
-
-      initialized = true;
-    } catch (SecurityException | IOException | NumberFormatException e) {
-      throw new ExceptionInInitializerError(e);
-    }
-  }
-
-  /**
-   * Reconfigures logging with the passed in options.
-   */
-  public static synchronized void configure(DataflowWorkerLoggingOptions options) {
-    initialize();
-
-    if (options.getDefaultWorkerLogLevel() != null) {
-      LogManager.getLogManager().getLogger(ROOT_LOGGER_NAME).setLevel(
-          LEVELS.inverse().get(options.getDefaultWorkerLogLevel()));
-    }
-
-    if (options.getWorkerLogLevelOverrides() != null) {
-      for (Map.Entry<String, DataflowWorkerLoggingOptions.Level> loggerOverride :
-          options.getWorkerLogLevelOverrides().entrySet()) {
-        Logger logger = Logger.getLogger(loggerOverride.getKey());
-        logger.setLevel(LEVELS.inverse().get(loggerOverride.getValue()));
-        configuredLoggers.add(logger);
-      }
-    }
-  }
-
-  // Visible for testing
-  static void reset() {
-    configuredLoggers = Lists.newArrayList();
-    System.setOut(originalStdOut);
-    System.setErr(originalStdErr);
-    initialized = false;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/JulLoggerPrintStreamAdapterFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/JulLoggerPrintStreamAdapterFactory.java
deleted file mode 100644
index 86ffa281ccf5c..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/JulLoggerPrintStreamAdapterFactory.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker.logging;
-
-import com.google.common.base.Throwables;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.io.PrintStream;
-import java.io.UnsupportedEncodingException;
-import java.nio.charset.StandardCharsets;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-/**
- * A {@link PrintStream} factory that creates {@link PrintStream}s which output
- * to the named JUL {@link Logger} at the specified {@link Level}.
- */
-class JulLoggerPrintStreamAdapterFactory {
-  private static final AtomicBoolean outputWarning = new AtomicBoolean(false);
-
-  /**
-   * Creates a {@link PrintStream} which redirects all output to a JUL {@link Logger}
-   * with the given {@code name} at the specified {@code level}.
-   */
-  static PrintStream create(String name, Level level) {
-    try {
-      return new PrintStream(
-          new JulLoggerAdapterOutputStream(name, level),
-          false, StandardCharsets.UTF_8.name());
-    } catch (UnsupportedEncodingException e) {
-      throw Throwables.propagate(e);
-    }
-  }
-
-  /**
-   * An output stream adapter which is able to take a stream of UTF-8 data and output
-   * to a named JUL logger. The log messages will be buffered until the system
-   * dependent new line separator is seen, at which point the buffered string will be
-   * output.
-   */
-  private static class JulLoggerAdapterOutputStream extends OutputStream {
-    private static final String LOGGING_DISCLAIMER = String.format(
-        "Please use a logger instead of System.out or System.err.%n"
-        + "Please switch to using org.slf4j.Logger.%n"
-        + "See: https://cloud.google.com/dataflow/pipelines/logging");
-    // This limits the number of bytes which we buffer in case we don't see a newline character.
-    private static final int BUFFER_LIMIT = 1 << 14; // 16384 bytes
-    private static final byte[] NEW_LINE = System.lineSeparator().getBytes(StandardCharsets.UTF_8);
-    private Logger logger;
-    private ByteArrayOutputStream baos;
-    private Level logLevel;
-    private int matched = 0;
-
-    private JulLoggerAdapterOutputStream(String name, Level logLevel) {
-      this.logger = Logger.getLogger(name);
-      this.logLevel = logLevel;
-      this.baos = new ByteArrayOutputStream(BUFFER_LIMIT);
-    }
-
-    @Override
-    public void write(int b) {
-      if (outputWarning.compareAndSet(false, true)) {
-        logger.warning(LOGGING_DISCLAIMER);
-      }
-      baos.write(b);
-      // Check to see if the next byte matches further into new line string.
-      if (NEW_LINE[matched] == b) {
-        matched += 1;
-        // If we have matched the entire new line, output the contents of the buffer.
-        if (matched == NEW_LINE.length) {
-          output();
-        }
-      } else {
-        // Reset the match
-        matched = 0;
-      }
-      if (baos.size() == BUFFER_LIMIT) {
-        output();
-      }
-    }
-
-    @Override
-    public void flush() throws IOException {
-      output();
-    }
-
-    @Override
-    public void close() throws IOException {
-      output();
-    }
-
-    private void output() {
-      // If nothing was output, do not log anything
-      if (baos.size() == 0) {
-        return;
-      }
-      try {
-        String message = baos.toString(StandardCharsets.UTF_8.name());
-        // Strip the new line if it exists
-        if (message.endsWith(System.lineSeparator())) {
-          message = message.substring(0, message.length() - System.lineSeparator().length());
-        }
-        logger.log(logLevel, message);
-      } catch (UnsupportedEncodingException e) {
-        logger.severe(String.format("Unable to decode string output to stdout/stderr %s", e));
-      }
-      matched = 0;
-      baos.reset();
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandlerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandlerTest.java
deleted file mode 100644
index 78705059f1169..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingHandlerTest.java
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker.logging;
-
-import static org.junit.Assert.assertEquals;
-
-import com.google.cloud.dataflow.sdk.testing.RestoreDataflowLoggingMDC;
-import com.google.common.base.Supplier;
-import com.google.common.base.Throwables;
-
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestRule;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.nio.charset.StandardCharsets;
-import java.util.logging.Level;
-import java.util.logging.LogRecord;
-
-/** Unit tests for {@link DataflowWorkerLoggingHandler}. */
-@RunWith(JUnit4.class)
-public class DataflowWorkerLoggingHandlerTest {
-  @Rule public TestRule restoreMDC = new RestoreDataflowLoggingMDC();
-
-  /** Returns the json-escaped string for the platform specific line separator.*/
-  private static String escapeNewline() {
-    try {
-      String quoted = new ObjectMapper().writeValueAsString(System.lineSeparator());
-      int len = quoted.length();
-      if (len < 3 || quoted.charAt(0) != '\"' || quoted.charAt(len - 1) != '\"') {
-        return "Failed to escape newline; expected quoted intermediate value";
-      }
-      // Strip the quotes.
-      return quoted.substring(1, len - 1);
-
-    } catch (JsonProcessingException e) {
-      throw Throwables.propagate(e);
-    }
-  }
-
-  // Typically \n or \r\n
-  private static String escapedNewline = escapeNewline();
-
-  private static class FixedOutputStreamFactory implements Supplier<OutputStream> {
-    private OutputStream[] streams;
-    private int next = 0;
-
-    public FixedOutputStreamFactory(OutputStream... streams) {
-      this.streams = streams;
-    }
-
-    @Override
-    public OutputStream get() {
-      return streams[next++];
-    }
-  }
-
-  /**
-   * Encodes a LogRecord into a Json string.
-   */
-  private static String createJson(LogRecord record) throws IOException {
-    ByteArrayOutputStream output = new ByteArrayOutputStream();
-    FixedOutputStreamFactory factory = new FixedOutputStreamFactory(output);
-    DataflowWorkerLoggingHandler handler = new DataflowWorkerLoggingHandler(factory, 0);
-    // Format the record as JSON.
-    handler.publish(record);
-    // Decode the binary output as UTF-8 and return the generated string.
-    return new String(output.toByteArray(), StandardCharsets.UTF_8);
-  }
-
-  @Test
-  public void testOutputStreamRollover() throws IOException {
-    ByteArrayOutputStream first = new ByteArrayOutputStream();
-    ByteArrayOutputStream second = new ByteArrayOutputStream();
-
-    LogRecord record = createLogRecord("test.message", null);
-    String expected = "{\"timestamp\":{\"seconds\":0,\"nanos\":1000000},\"severity\":\"INFO\","
-        + "\"message\":\"test.message\",\"thread\":\"2\",\"logger\":\"LoggerName\"}"
-        + System.lineSeparator();
-
-    FixedOutputStreamFactory factory = new FixedOutputStreamFactory(first, second);
-    DataflowWorkerLoggingHandler handler
-        = new DataflowWorkerLoggingHandler(factory, expected.length() + 1 /* sizelimit */);
-
-    // Using |expected|+1 for size limit means that we will rollover after writing 2 log messages.
-    // We thus expect to see 2 messsages written to 'first' and 1 message to 'second',
-
-    handler.publish(record);
-    handler.publish(record);
-    handler.publish(record);
-
-    assertEquals(expected + expected, new String(first.toByteArray(), StandardCharsets.UTF_8));
-    assertEquals(expected, new String(second.toByteArray(), StandardCharsets.UTF_8));
-  }
-
-  @Test
-  public void testWithUnsetValuesInMDC() throws IOException {
-    assertEquals(
-        "{\"timestamp\":{\"seconds\":0,\"nanos\":1000000},\"severity\":\"INFO\","
-            + "\"message\":\"test.message\",\"thread\":\"2\",\"logger\":\"LoggerName\"}"
-            + System.lineSeparator(),
-        createJson(createLogRecord("test.message", null)));
-  }
-
-  @Test
-  public void testWithAllValuesInMDC() throws IOException {
-    DataflowWorkerLoggingMDC.setJobId("testJobId");
-    DataflowWorkerLoggingMDC.setStageName("testStage");
-    DataflowWorkerLoggingMDC.setStepName("testStep");
-    DataflowWorkerLoggingMDC.setWorkerId("testWorkerId");
-    DataflowWorkerLoggingMDC.setWorkId("testWorkId");
-
-    createLogRecord("test.message", null);
-    assertEquals(
-        "{\"timestamp\":{\"seconds\":0,\"nanos\":1000000},\"severity\":\"INFO\","
-            + "\"message\":\"test.message\",\"thread\":\"2\",\"job\":\"testJobId\","
-            + "\"stage\":\"testStage\",\"step\":\"testStep\",\"worker\":\"testWorkerId\","
-            + "\"work\":\"testWorkId\",\"logger\":\"LoggerName\"}"
-            + System.lineSeparator(),
-        createJson(createLogRecord("test.message", null)));
-  }
-
-  @Test
-  public void testWithMessage() throws IOException {
-    DataflowWorkerLoggingMDC.setJobId("testJobId");
-    DataflowWorkerLoggingMDC.setWorkerId("testWorkerId");
-    DataflowWorkerLoggingMDC.setWorkId("testWorkId");
-
-    assertEquals(
-        "{\"timestamp\":{\"seconds\":0,\"nanos\":1000000},\"severity\":\"INFO\","
-            + "\"message\":\"test.message\",\"thread\":\"2\",\"job\":\"testJobId\","
-            + "\"worker\":\"testWorkerId\",\"work\":\"testWorkId\",\"logger\":\"LoggerName\"}"
-            + System.lineSeparator(),
-        createJson(createLogRecord("test.message", null)));
-  }
-
-  @Test
-  public void testWithMessageAndException() throws IOException {
-    DataflowWorkerLoggingMDC.setJobId("testJobId");
-    DataflowWorkerLoggingMDC.setWorkerId("testWorkerId");
-    DataflowWorkerLoggingMDC.setWorkId("testWorkId");
-
-    assertEquals(
-        "{\"timestamp\":{\"seconds\":0,\"nanos\":1000000},\"severity\":\"INFO\","
-            + "\"message\":\"test.message\",\"thread\":\"2\",\"job\":\"testJobId\","
-            + "\"worker\":\"testWorkerId\",\"work\":\"testWorkId\",\"logger\":\"LoggerName\","
-            + "\"exception\":\"java.lang.Throwable: exception.test.message"
-            + escapedNewline
-            + "\\tat declaringClass1.method1(file1.java:1)"
-            + escapedNewline
-            + "\\tat declaringClass2.method2(file2.java:1)"
-            + escapedNewline
-            + "\\tat declaringClass3.method3(file3.java:1)"
-            + escapedNewline
-            + "\"}"
-            + System.lineSeparator(),
-        createJson(createLogRecord("test.message", createThrowable())));
-  }
-
-  @Test
-  public void testWithException() throws IOException {
-    DataflowWorkerLoggingMDC.setJobId("testJobId");
-    DataflowWorkerLoggingMDC.setWorkerId("testWorkerId");
-    DataflowWorkerLoggingMDC.setWorkId("testWorkId");
-
-    assertEquals(
-        "{\"timestamp\":{\"seconds\":0,\"nanos\":1000000},\"severity\":\"INFO\","
-            + "\"thread\":\"2\",\"job\":\"testJobId\",\"worker\":\"testWorkerId\","
-            + "\"work\":\"testWorkId\",\"logger\":\"LoggerName\","
-            + "\"exception\":\"java.lang.Throwable: exception.test.message"
-            + escapedNewline
-            + "\\tat declaringClass1.method1(file1.java:1)"
-            + escapedNewline
-            + "\\tat declaringClass2.method2(file2.java:1)"
-            + escapedNewline
-            + "\\tat declaringClass3.method3(file3.java:1)"
-            + escapedNewline
-            + "\"}"
-            + System.lineSeparator(),
-        createJson(createLogRecord(null, createThrowable())));
-  }
-
-  @Test
-  public void testWithoutExceptionOrMessage() throws IOException {
-    DataflowWorkerLoggingMDC.setJobId("testJobId");
-    DataflowWorkerLoggingMDC.setWorkerId("testWorkerId");
-    DataflowWorkerLoggingMDC.setWorkId("testWorkId");
-
-    assertEquals(
-        "{\"timestamp\":{\"seconds\":0,\"nanos\":1000000},\"severity\":\"INFO\","
-            + "\"thread\":\"2\",\"job\":\"testJobId\",\"worker\":\"testWorkerId\","
-            + "\"work\":\"testWorkId\",\"logger\":\"LoggerName\"}"
-            + System.lineSeparator(),
-        createJson(createLogRecord(null, null)));
-  }
-
-  /**
-   * @return A throwable with a fixed stack trace.
-   */
-  private Throwable createThrowable() {
-    Throwable throwable = new Throwable("exception.test.message");
-    throwable.setStackTrace(new StackTraceElement[]{
-        new StackTraceElement("declaringClass1", "method1", "file1.java", 1),
-        new StackTraceElement("declaringClass2", "method2", "file2.java", 1),
-        new StackTraceElement("declaringClass3", "method3", "file3.java", 1),
-    });
-    return throwable;
-  }
-
-  /**
-   * Creates and returns a LogRecord with a given message and throwable.
-   *
-   * @param message The message to place in the {@link LogRecord}
-   * @param throwable The throwable to place in the {@link LogRecord}
-   * @return A {@link LogRecord} with the given message and throwable.
-   */
-  private LogRecord createLogRecord(String message, Throwable throwable) {
-    LogRecord logRecord = new LogRecord(Level.INFO, message);
-    logRecord.setLoggerName("LoggerName");
-    logRecord.setMillis(1L);
-    logRecord.setThreadID(2);
-    logRecord.setThrown(throwable);
-    return logRecord;
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
deleted file mode 100644
index 0af9f877b14c6..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingInitializerTest.java
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker.logging;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions;
-import com.google.cloud.dataflow.sdk.options.DataflowWorkerLoggingOptions.WorkerLogLevelOverrides;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
-
-import org.junit.After;
-import org.junit.Test;
-import org.junit.runner.Description;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.junit.runners.model.Statement;
-
-import java.util.logging.Handler;
-import java.util.logging.Level;
-import java.util.logging.LogManager;
-import java.util.logging.Logger;
-
-/** Unit tests for {@link DataflowWorkerLoggingInitializer}. */
-@RunWith(JUnit4.class)
-public class DataflowWorkerLoggingInitializerTest {
-  @After
-  public void tearDown() {
-    LogManager.getLogManager().reset();
-    DataflowWorkerLoggingInitializer.reset();
-  }
-
-  @Test
-  public void testWithDefaults() {
-    DataflowWorkerLoggingOptions options =
-        PipelineOptionsFactory.as(DataflowWorkerLoggingOptions.class);
-
-    DataflowWorkerLoggingInitializer.initialize();
-    DataflowWorkerLoggingInitializer.configure(options);
-
-    Logger rootLogger = LogManager.getLogManager().getLogger("");
-    assertEquals(1, rootLogger.getHandlers().length);
-    assertEquals(Level.INFO, rootLogger.getLevel());
-    assertTrue(isDataflowWorkerLoggingHandler(rootLogger.getHandlers()[0], Level.ALL));
-  }
-
-  @Test
-  public void testWithConfigurationOverride() {
-    DataflowWorkerLoggingOptions options =
-        PipelineOptionsFactory.as(DataflowWorkerLoggingOptions.class);
-    options.setDefaultWorkerLogLevel(DataflowWorkerLoggingOptions.Level.WARN);
-
-    DataflowWorkerLoggingInitializer.initialize();
-    DataflowWorkerLoggingInitializer.configure(options);
-
-    Logger rootLogger = LogManager.getLogManager().getLogger("");
-    assertEquals(1, rootLogger.getHandlers().length);
-    assertEquals(Level.WARNING, rootLogger.getLevel());
-    assertTrue(isDataflowWorkerLoggingHandler(rootLogger.getHandlers()[0], Level.ALL));
-  }
-
-  @Test
-  public void testWithCustomLogLevels() {
-    DataflowWorkerLoggingOptions options =
-        PipelineOptionsFactory.as(DataflowWorkerLoggingOptions.class);
-    options.setWorkerLogLevelOverrides(new WorkerLogLevelOverrides()
-        .addOverrideForName("A", DataflowWorkerLoggingOptions.Level.DEBUG)
-        .addOverrideForName("B", DataflowWorkerLoggingOptions.Level.ERROR));
-
-    DataflowWorkerLoggingInitializer.initialize();
-    DataflowWorkerLoggingInitializer.configure(options);
-
-    Logger aLogger = LogManager.getLogManager().getLogger("A");
-    assertEquals(0, aLogger.getHandlers().length);
-    assertEquals(Level.FINE, aLogger.getLevel());
-    assertTrue(aLogger.getUseParentHandlers());
-
-    Logger bLogger = LogManager.getLogManager().getLogger("B");
-    assertEquals(Level.SEVERE, bLogger.getLevel());
-    assertEquals(0, bLogger.getHandlers().length);
-    assertTrue(aLogger.getUseParentHandlers());
-  }
-
-  private boolean isDataflowWorkerLoggingHandler(Handler handler, Level level) {
-    return handler instanceof DataflowWorkerLoggingHandler && level.equals(handler.getLevel());
-  }
-
-  @Test
-  public void testSystemOutToLogger() throws Throwable {
-    DataflowWorkerLoggingInitializer.initialize();
-    Description description = Description.createTestDescription(
-        DataflowWorkerLoggingInitializerTest.class, "testSystemOutToLogger");
-    ExpectedLogs systemOut = ExpectedLogs.none("System.out");
-    // We evaluate the test rule with the logging inside of it explicitly since
-    // DataflowWorkerLoggingInitializer.initialize() resets all log handlers
-    // not allowing us to use ExpectedLogs as a test rule.
-    systemOut.apply(new Statement() {
-      @Override
-      public void evaluate() {
-        System.out.println("afterInitialization");
-      }
-    }, description).evaluate();
-    systemOut.verifyInfo("afterInitialization");
-  }
-
-  @Test
-  public void testSystemErrToLogger() throws Throwable {
-    DataflowWorkerLoggingInitializer.initialize();
-    Description description = Description.createTestDescription(
-        DataflowWorkerLoggingInitializerTest.class, "testSystemErrToLogger");
-    ExpectedLogs systemErr = ExpectedLogs.none("System.err");
-    // We evaluate the test rule with the logging inside of it explicitly since
-    // DataflowWorkerLoggingInitializer.initialize() resets all log handlers
-    // not allowing us to use ExpectedLogs as a test rule.
-    systemErr.apply(new Statement() {
-      @Override
-      public void evaluate() {
-        System.err.println("afterInitialization");
-      }
-    }, description).evaluate();
-    systemErr.verifyError("afterInitialization");
-  }
-}
-
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/JulLoggerPrintStreamAdapterFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/JulLoggerPrintStreamAdapterFactoryTest.java
deleted file mode 100644
index 07ea5aafe606c..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/worker/logging/JulLoggerPrintStreamAdapterFactoryTest.java
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker.logging;
-
-import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.PrintStream;
-import java.util.logging.Level;
-
-/** Tests for {@link JulLoggerPrintStreamAdapterFactory}. */
-@RunWith(JUnit4.class)
-public class JulLoggerPrintStreamAdapterFactoryTest {
-  private static final String NAME = "test";
-  @Rule public ExpectedLogs expectedLogs = ExpectedLogs.none(NAME);
-
-  @Test
-  public void testLogOnNewLine() {
-    PrintStream printStream = JulLoggerPrintStreamAdapterFactory.create(NAME, Level.INFO);
-    printStream.println("blah");
-    expectedLogs.verifyInfo("blah");
-  }
-
-  @Test
-  public void testLogOnlyUptoNewLine() {
-    PrintStream printStream = JulLoggerPrintStreamAdapterFactory.create(NAME, Level.INFO);
-    printStream.println("blah");
-    printStream.print("foo");
-    expectedLogs.verifyInfo("blah");
-    expectedLogs.verifyNotLogged("foo");
-  }
-
-  @Test
-  public void testLogMultiLine() {
-    PrintStream printStream = JulLoggerPrintStreamAdapterFactory.create(NAME, Level.INFO);
-    printStream.format("blah%nfoo%n");
-    expectedLogs.verifyInfo("blah");
-    expectedLogs.verifyInfo("foo");
-  }
-
-  @Test
-  public void testDontLogIfNoNewLine() {
-    PrintStream printStream = JulLoggerPrintStreamAdapterFactory.create(NAME, Level.INFO);
-    printStream.print("blah");
-    expectedLogs.verifyNotLogged("blah");
-  }
-
-  @Test
-  public void testLogOnFlush() {
-    PrintStream printStream = JulLoggerPrintStreamAdapterFactory.create(NAME, Level.INFO);
-    printStream.print("blah");
-    printStream.flush();
-    expectedLogs.verifyInfo("blah");
-  }
-
-  @Test
-  public void testLogOnClose() {
-    PrintStream printStream = JulLoggerPrintStreamAdapterFactory.create(NAME, Level.INFO);
-    printStream.print("blah");
-    printStream.close();
-    expectedLogs.verifyInfo("blah");
-  }
-}
-

From 38b5bc030b6fe8038ceeaf2acef54d59b1c904cf Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 10 Feb 2016 10:21:09 -0800
Subject: [PATCH 1425/1541] Move worker implementation classes of aggregators
 to worker maven module

This is for Apache Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114340519
---
 .../dataflow/sdk/util/CloudMetricUtils.java   | 73 -------------------
 .../dataflow/sdk/util/common/Metric.java      | 49 -------------
 .../sdk/util/CloudMetricUtilsTest.java        | 66 -----------------
 .../dataflow/sdk/util/common/MetricTest.java  | 40 ----------
 4 files changed, 228 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtils.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Metric.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtilsTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/MetricTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtils.java
deleted file mode 100644
index 8a66fc3f2167e..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtils.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.services.dataflow.model.MetricStructuredName;
-import com.google.api.services.dataflow.model.MetricUpdate;
-import com.google.cloud.dataflow.sdk.util.common.Metric;
-import com.google.cloud.dataflow.sdk.util.common.Metric.DoubleMetric;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Utilities for working with Dataflow API Metrics.
- */
-public class CloudMetricUtils {
-  // Do not instantiate.
-  private CloudMetricUtils() {}
-
-  /**
-   * Returns a List of {@link MetricUpdate}s representing the given Metrics.
-   */
-  public static List<MetricUpdate> extractCloudMetrics(
-      Collection<Metric<?>> metrics,
-      String workerId) {
-    List<MetricUpdate> cloudMetrics = new ArrayList<>(metrics.size());
-    for (Metric<?> metric : metrics) {
-      cloudMetrics.add(extractCloudMetric(metric, workerId));
-    }
-    return cloudMetrics;
-  }
-
-  /**
-   * Returns a {@link MetricUpdate} representing the given Metric.
-   */
-  public static MetricUpdate extractCloudMetric(Metric<?> metric, String workerId) {
-    if (metric instanceof DoubleMetric) {
-      return extractCloudMetric(
-          metric,
-          ((DoubleMetric) metric).getValue(),
-          workerId);
-    } else {
-      throw new IllegalArgumentException("unexpected kind of Metric");
-    }
-  }
-
-  private static MetricUpdate extractCloudMetric(
-      Metric<?> metric, Double value, String workerId) {
-    MetricStructuredName name = new MetricStructuredName();
-    name.setName(metric.getName());
-    Map<String, String> context = new HashMap<>();
-    context.put("workerId", workerId);
-    name.setContext(context);
-    return new MetricUpdate().setName(name).setScalar(CloudObject.forFloat(value));
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Metric.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Metric.java
deleted file mode 100644
index 5245abf55986a..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Metric.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common;
-
-/**
- * A metric (e.g., CPU usage) that can be reported by a worker.
- *
- * @param <T> the type of the metric's value
- */
-public abstract class Metric<T> {
-  String name;
-  T value;
-
-  public Metric(String name, T value) {
-    this.name = name;
-    this.value = value;
-  }
-
-  public String getName() {
-    return name;
-  }
-
-  public T getValue() {
-    return value;
-  }
-
-  /**
-   * A double-valued Metric.
-   */
-  public static class DoubleMetric extends Metric<Double> {
-    public DoubleMetric(String name, double value) {
-      super(name, value);
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtilsTest.java
deleted file mode 100644
index ff3451f51a343..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudMetricUtilsTest.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static org.junit.Assert.assertEquals;
-
-import com.google.api.services.dataflow.model.MetricStructuredName;
-import com.google.api.services.dataflow.model.MetricUpdate;
-import com.google.cloud.dataflow.sdk.util.common.Metric;
-import com.google.cloud.dataflow.sdk.util.common.Metric.DoubleMetric;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/** Unit tests for {@link CloudMetricUtils}. */
-@RunWith(JUnit4.class)
-public class CloudMetricUtilsTest {
-  private void addDoubleMetric(String name, double value, String workerId,
-                                List<Metric<?>> metrics,
-                                List<MetricUpdate> cloudMetrics) {
-    metrics.add(new DoubleMetric(name, value));
-    MetricStructuredName structuredName = new MetricStructuredName();
-    structuredName.setName(name);
-    Map<String, String> context = new HashMap<>();
-    context.put("workerId", workerId);
-    structuredName.setContext(context);
-    cloudMetrics.add(new MetricUpdate()
-        .setName(structuredName)
-        .setScalar(CloudObject.forFloat(value)));
-  }
-
-  @Test
-  public void testExtractCloudMetrics() {
-    List<Metric<?>> metrics = new ArrayList<>();
-    List<MetricUpdate> expected = new ArrayList<>();
-    String workerId = "worker-id";
-
-    addDoubleMetric("m1", 3.14, workerId, metrics, expected);
-    addDoubleMetric("m2", 2.17, workerId, metrics, expected);
-    addDoubleMetric("m3", -66.666, workerId, metrics, expected);
-
-    List<MetricUpdate> actual = CloudMetricUtils.extractCloudMetrics(metrics, workerId);
-
-    assertEquals(expected, actual);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/MetricTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/MetricTest.java
deleted file mode 100644
index d7810c676f801..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/MetricTest.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common;
-
-import static org.junit.Assert.assertEquals;
-
-import com.google.cloud.dataflow.sdk.util.common.Metric.DoubleMetric;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/** Unit tests for {@link Metric}. */
-@RunWith(JUnit4.class)
-public class MetricTest {
-  @Test
-  public void testDoubleMetric() {
-    String name = "metric-name";
-    double value = 3.14;
-
-    DoubleMetric doubleMetric = new DoubleMetric(name, value);
-
-    assertEquals(name, doubleMetric.getName());
-    assertEquals((Double) value, doubleMetric.getValue());
-  }
-}

From 39422b9c436d05a5d883add5063edccd445d4399 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 10 Feb 2016 10:30:13 -0800
Subject: [PATCH 1426/1541] Remove Base64Utils which is dead code

This is for Apache Beam

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114341725
---
 .../cloud/dataflow/sdk/util/Base64Utils.java  | 30 -----------
 .../dataflow/sdk/util/Base64UtilsTest.java    | 53 -------------------
 2 files changed, 83 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Base64Utils.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/Base64UtilsTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Base64Utils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Base64Utils.java
deleted file mode 100644
index 457048eb9ca06..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Base64Utils.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-/**
- * Utilities related to Base64 encoding.
- */
-public class Base64Utils {
-  /**
-   * Returns an upper bound of the length of non-chunked Base64 encoded version
-   * of the string of the given length.
-   */
-  public static int getBase64Length(int length) {
-    return 4 * ((length + 2) / 3);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/Base64UtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/Base64UtilsTest.java
deleted file mode 100644
index 441a3dc7b6c86..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/Base64UtilsTest.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.api.client.util.Base64.encodeBase64URLSafeString;
-
-import static org.hamcrest.Matchers.greaterThanOrEqualTo;
-import static org.hamcrest.Matchers.lessThan;
-import static org.junit.Assert.assertThat;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/** Unit tests for {@link Base64Utils}. */
-@RunWith(JUnit4.class)
-public class Base64UtilsTest {
-  void testLength(int length) {
-    byte[] b = new byte[length];
-    // Make sure that the estimated length is an upper bound.
-    assertThat(
-        Base64Utils.getBase64Length(length),
-        greaterThanOrEqualTo(encodeBase64URLSafeString(b).length()));
-    // Make sure that it's a tight upper bound (no more than 4 characters off).
-    assertThat(
-        Base64Utils.getBase64Length(length),
-        lessThan(4 + encodeBase64URLSafeString(b).length()));
-  }
-
-  @Test
-  public void getBase64Length() {
-    for (int i = 0; i < 100; ++i) {
-      testLength(i);
-    }
-    for (int i = 1000; i < 1100; ++i) {
-      testLength(i);
-    }
-  }
-}

From a0e271582865c63cdef088466d64524c0cb206eb Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 10 Feb 2016 16:57:01 -0800
Subject: [PATCH 1427/1541] Move over several Dataflow worker specific classes
 to the worker maven module

This is for Apache Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114385369
---
 .../sdk/runners/worker/AvroReader.java        | 184 ----------
 .../sdk/runners/worker/IsmFormat.java         |   3 +-
 .../sdk/util/BoundedQueueExecutor.java        |  70 ----
 .../dataflow/sdk/util/CloudSourceUtils.java   |  51 ---
 .../sdk/util/PCollectionViewWindow.java       |  67 ----
 .../sdk/util/ScalableBloomFilter.java         | 331 ------------------
 .../sdk/util/WeightedSideInputReader.java     |  48 ---
 .../sdk/util/CloudSourceUtilsTest.java        |  83 -----
 .../sdk/util/ScalableBloomFilterTest.java     | 169 ---------
 9 files changed, 1 insertion(+), 1005 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViewWindow.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ScalableBloomFilter.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedSideInputReader.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtilsTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ScalableBloomFilterTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
deleted file mode 100644
index 90452c1f04b32..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroReader.java
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-
-import com.google.api.services.dataflow.model.ApproximateReportedProgress;
-import com.google.api.services.dataflow.model.ApproximateSplitRequest;
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.io.AvroSource;
-import com.google.cloud.dataflow.sdk.io.BoundedSource;
-import com.google.cloud.dataflow.sdk.io.OffsetBasedSource;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import org.apache.avro.generic.GenericRecord;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.NoSuchElementException;
-
-import javax.annotation.Nullable;
-
-/**
- * A source that reads Avro files.
- *
- * @param <T> the type of the elements read from the source
- */
-public class AvroReader<T> extends NativeReader<WindowedValue<T>> {
-  private static final Logger LOG = LoggerFactory.getLogger(AvroReader.class);
-
-  @Nullable
-  final Long startPosition;
-  @Nullable
-  final Long endPosition;
-  final String filename;
-  final AvroSource<T> avroSource;
-  final AvroCoder<T> avroCoder;
-  final PipelineOptions options;
-
-  @SuppressWarnings("unchecked")
-  public AvroReader(String filename, @Nullable Long startPosition, @Nullable Long endPosition,
-      AvroCoder<T> coder, @Nullable PipelineOptions options) {
-
-    this.avroCoder = coder;
-
-    this.startPosition = startPosition;
-    this.endPosition = endPosition;
-    this.filename = filename;
-    this.options = options;
-
-    Class<T> type = avroCoder.getType();
-    AvroSource<T> source;
-    if (type.equals(GenericRecord.class)) {
-      source = (AvroSource<T>) AvroSource.from(filename).withSchema(avroCoder.getSchema());
-    } else {
-      source = AvroSource.from(filename).withSchema(type);
-    }
-
-
-    this.avroSource = source;
-  }
-
-  @Override
-  public AvroFileIterator iterator() throws IOException {
-    Long endPosition = this.endPosition;
-    Long startPosition = this.startPosition;
-    if (endPosition == null) {
-      endPosition = Long.MAX_VALUE;
-    }
-    if (startPosition == null) {
-      startPosition = 0L;
-    }
-    BoundedSource.BoundedReader<T> reader;
-    if (startPosition == 0 && endPosition == Long.MAX_VALUE) {
-      // Read entire file (or collection of files).
-      reader = avroSource.createReader(options);
-    } else {
-      // Read a subrange of file.
-      reader = avroSource.createForSubrangeOfFile(filename, startPosition, endPosition)
-          .createReader(options);
-    }
-    return new AvroFileIterator((AvroSource.AvroReader<T>) reader);
-  }
-
-  class AvroFileIterator extends NativeReaderIterator<WindowedValue<T>> {
-    private final AvroSource.AvroReader<T> reader;
-    private long blockOffset = -1;
-    private WindowedValue<T> current;
-
-    public AvroFileIterator(AvroSource.AvroReader<T> reader) {
-      this.reader = reader;
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      if (!reader.start()) {
-        return false;
-      }
-      updateBlockOffsetAndCurrent();
-      return true;
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      if (!reader.advance()) {
-        return false;
-      }
-      updateBlockOffsetAndCurrent();
-      return true;
-    }
-
-    private void updateBlockOffsetAndCurrent() {
-      // Coarse-grained reporting of input bytes consumed.
-      // After completing reading a block, the block offset changes.
-      long currentOffset = reader.getCurrentBlockOffset();
-      if (currentOffset != blockOffset) {
-        notifyElementRead(reader.getCurrentBlockSize());
-        blockOffset = currentOffset;
-      }
-      current = WindowedValue.valueInGlobalWindow(reader.getCurrent());
-    }
-
-    @Override
-    public WindowedValue<T> getCurrent() throws NoSuchElementException {
-      if (current == null) {
-        throw new NoSuchElementException();
-      }
-      return current;
-    }
-
-    @Override
-    public Progress getProgress() {
-      Double readerProgress = reader.getFractionConsumed();
-      if (readerProgress == null) {
-        return null;
-      }
-      ApproximateReportedProgress progress = new ApproximateReportedProgress();
-      progress.setFractionConsumed(readerProgress);
-      return cloudProgressToReaderProgress(progress);
-    }
-
-    @Override
-    public void close() throws IOException {
-      reader.close();
-    }
-
-    @Override
-    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
-      ApproximateSplitRequest splitProgress =
-          SourceTranslationUtils.splitRequestToApproximateSplitRequest(splitRequest);
-      double splitAtFraction = splitProgress.getFractionConsumed();
-      LOG.info("Received request for dynamic split at {}", splitAtFraction);
-      OffsetBasedSource<T> residual = reader.splitAtFraction(splitAtFraction);
-      if (residual == null) {
-        LOG.info("Rejected split request for split at {}", splitAtFraction);
-        return null;
-      }
-      com.google.api.services.dataflow.model.Position acceptedPosition =
-          new com.google.api.services.dataflow.model.Position();
-      acceptedPosition.setByteOffset(residual.getStartOffset());
-      LOG.info("Accepted split for position {} which resulted in a new source with byte offset {}",
-          splitAtFraction, residual.getStartOffset());
-      return new DynamicSplitResultWithPosition(
-          SourceTranslationUtils.cloudPositionToReaderPosition(acceptedPosition));
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormat.java
index 182d3f2464376..318de9b5b8942 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormat.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormat.java
@@ -32,7 +32,6 @@
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.util.RandomAccessData;
-import com.google.cloud.dataflow.sdk.util.ScalableBloomFilter;
 import com.google.cloud.dataflow.sdk.util.VarInt;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.MoreObjects;
@@ -67,7 +66,7 @@
  * <p>An Ism file is composed of these high level sections (in order):
  * <ul>
  *   <li>shard block</li>
- *   <li>bloom filter (See {@link ScalableBloomFilter} for details on encoding format)</li>
+ *   <li>bloom filter (See {@code ScalableBloomFilter} for details on encoding format)</li>
  *   <li>shard index</li>
  *   <li>footer (See {@link Footer} for details on encoding format)</li>
  * </ul>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java
deleted file mode 100644
index aaa71d7bf6641..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BoundedQueueExecutor.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.Semaphore;
-import java.util.concurrent.ThreadFactory;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-
-/**
- * Executor that blocks on execute() if its queue is full.
- */
-public class BoundedQueueExecutor extends ThreadPoolExecutor {
-  private static class ReducableSemaphore extends Semaphore {
-    ReducableSemaphore(int permits) {
-      super(permits);
-    }
-
-    @Override
-    public void reducePermits(int permits) {
-      super.reducePermits(permits);
-    }
-  }
-  private ReducableSemaphore semaphore;
-
-  public BoundedQueueExecutor(int maximumPoolSize,
-                   long keepAliveTime,
-                   TimeUnit unit,
-                   int maximumQueueSize,
-                   ThreadFactory threadFactory) {
-    super(maximumPoolSize, maximumPoolSize, keepAliveTime, unit,
-        new LinkedBlockingQueue<Runnable>(), threadFactory);
-    this.semaphore = new ReducableSemaphore(maximumQueueSize);
-    allowCoreThreadTimeOut(true);
-  }
-
-  // Before adding a Runnable to the queue, acquire the semaphore.
-  @Override
-  public void execute(Runnable r) {
-    semaphore.acquireUninterruptibly();
-    super.execute(r);
-  }
-
-  // Forcibly add something to the queue, ignoring the length limit.
-  public void forceExecute(Runnable r) {
-    semaphore.reducePermits(1);
-    super.execute(r);
-  }
-
-  // Release the semaphore after taking a Runnable off the queue.
-  @Override
-  public void beforeExecute(Thread t, Runnable r) {
-    semaphore.release();
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
deleted file mode 100644
index af3055521f767..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtils.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.services.dataflow.model.Source;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * Utilities for working with Source Dataflow API definitions and
- * {@link NativeReader}
- * objects.
- */
-public class CloudSourceUtils {
-  /**
-   * Returns a copy of the source with {@code baseSpecs} flattened into {@code spec}.
-   * On conflict for a parameter name, values in {@code spec} override values in {@code baseSpecs},
-   * and later values in {@code baseSpecs} override earlier ones.
-   */
-  public static Source flattenBaseSpecs(Source source) {
-    if (source.getBaseSpecs() == null) {
-      return source;
-    }
-    Map<String, Object> params = new HashMap<>();
-    for (Map<String, Object> baseSpec : source.getBaseSpecs()) {
-      params.putAll(baseSpec);
-    }
-    params.putAll(source.getSpec());
-
-    Source result = source.clone();
-    result.setSpec(params);
-    result.setBaseSpecs(null);
-    return result;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViewWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViewWindow.java
deleted file mode 100644
index 7cf636eb63c4c..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViewWindow.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-
-import java.util.Objects;
-
-/**
- * A pair of a {@link PCollectionView} and a {@link BoundedWindow}, which can
- * be thought of as window "of" the view. This is a value class for use e.g.
- * as a compound cache key.
- *
- * @param <T> the type of the underlying PCollectionView
- */
-public final class PCollectionViewWindow<T> {
-
-  private final PCollectionView<T> view;
-  private final BoundedWindow window;
-
-  private PCollectionViewWindow(PCollectionView<T> view, BoundedWindow window) {
-    this.view = view;
-    this.window = window;
-  }
-
-  public static <T> PCollectionViewWindow<T> of(PCollectionView<T> view, BoundedWindow window) {
-    return new PCollectionViewWindow<>(view, window);
-  }
-
-  public PCollectionView<T> getView() {
-    return view;
-  }
-
-  public BoundedWindow getWindow() {
-    return window;
-  }
-
-  @Override
-  public boolean equals(Object otherObject) {
-    if (!(otherObject instanceof PCollectionViewWindow)) {
-      return false;
-    }
-    @SuppressWarnings("unchecked")
-    PCollectionViewWindow<T> other = (PCollectionViewWindow<T>) otherObject;
-    return getView().equals(other.getView()) && getWindow().equals(other.getWindow());
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hash(getView(), getWindow());
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ScalableBloomFilter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ScalableBloomFilter.java
deleted file mode 100644
index b509dcfa28b94..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ScalableBloomFilter.java
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.MoreObjects;
-import com.google.common.hash.BloomFilter;
-import com.google.common.hash.Funnel;
-import com.google.common.hash.PrimitiveSink;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.Serializable;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * A Bloom filter implementation with an expected false positive probability of {@code 0.000001}
- * which grows dynamically with the number of insertions. For less than {@code 2^20} insertions
- * which would modify a Bloom filter, we brute force all the Bloom filter combinations in powers of
- * {@code 2} to only produce a scalable Bloom filter with one slice.
- *
- * <p>Otherwise, we use an implementation of
- * <a href="http://gsd.di.uminho.pt/members/cbm/ps/dbloom.pdf">Scalable Bloom Filters</a>
- * by Paulo Sergio Almeida, Carlos Baquero, Nuno Preguica, David Hutchison. Our implementation
- * has an effective positive probability of {@code 0.000001}, given that we use a ratio of
- * {@code 0.9} and a scaling factor of {@code 2}.
- */
-public class ScalableBloomFilter implements Serializable {
-  /**
-   * A {@link Coder} for scalable Bloom filters. The encoded format is:
-   * <ul>
-   *   <li>var int encoding of number of Bloom filter slices
-   *   <li>N Bloom filter slices
-   * </ul>
-   *
-   * <p>The encoded form of each Bloom filter slice is:
-   * <ul>
-   *   <li>1 signed byte for the strategy
-   *   <li>1 unsigned byte for the number of hash functions
-   *   <li>1 big endian int, the number of longs in our bitset
-   *   <li>N big endian longs of our bitset
-   * </ul>
-   */
-  public static class ScalableBloomFilterCoder extends AtomicCoder<ScalableBloomFilter> {
-    private static final ScalableBloomFilterCoder INSTANCE = new ScalableBloomFilterCoder();
-
-    @JsonCreator
-    public static ScalableBloomFilterCoder of() {
-      return INSTANCE;
-    }
-
-    @Override
-    public void encode(ScalableBloomFilter value, OutputStream outStream, Coder.Context context)
-        throws CoderException, IOException {
-      VarInt.encode(value.bloomFilterSlices.size(), outStream);
-      for (BloomFilter<ByteBuffer> bloomFilter : value.bloomFilterSlices) {
-        bloomFilter.writeTo(outStream);
-      }
-    }
-
-    @Override
-    public ScalableBloomFilter decode(InputStream inStream, Coder.Context context)
-        throws CoderException, IOException {
-      int numberOfBloomFilters = VarInt.decodeInt(inStream);
-      List<BloomFilter<ByteBuffer>> bloomFilters = new ArrayList<>(numberOfBloomFilters);
-      for (int i = 0; i < numberOfBloomFilters; ++i) {
-        bloomFilters.add(BloomFilter.readFrom(inStream, ByteBufferFunnel.INSTANCE));
-      }
-      return new ScalableBloomFilter(bloomFilters);
-    }
-
-    @Override
-    public boolean consistentWithEquals() {
-      return true;
-    }
-  }
-
-  private final List<BloomFilter<ByteBuffer>> bloomFilterSlices;
-  private ScalableBloomFilter(List<BloomFilter<ByteBuffer>> bloomFilters) {
-    this.bloomFilterSlices = bloomFilters;
-  }
-
-  /**
-   * Returns false if the Bloom filter definitely does not contain the byte
-   * representation of an element contained in {@code buf} from {@code [offset, offset + length)}.
-   */
-  public boolean mightContain(byte[] buf, int offset, int length) {
-    ByteBuffer byteBuffer = ByteBuffer.wrap(buf, offset, length);
-    return mightContain(byteBuffer);
-  }
-
-  /**
-   * Returns false if the Bloom filter definitely does not contain the byte
-   * representation of an element contained in {@code byteBuffer} from {@code [position, limit)}.
-   */
-  public boolean mightContain(ByteBuffer byteBuffer) {
-    for (int i = bloomFilterSlices.size() - 1; i >= 0; i--) {
-      if (bloomFilterSlices.get(i).mightContain(byteBuffer)) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (other == this) {
-      return true;
-    }
-    if (!(other instanceof ScalableBloomFilter)) {
-      return false;
-    }
-    ScalableBloomFilter scalableBloomFilter = (ScalableBloomFilter) other;
-    if (bloomFilterSlices.size() != scalableBloomFilter.bloomFilterSlices.size()) {
-      return false;
-    }
-    for (int i = 0; i < bloomFilterSlices.size(); ++i) {
-      if (!bloomFilterSlices.get(i).equals(scalableBloomFilter.bloomFilterSlices.get(i))) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    return bloomFilterSlices.hashCode();
-  }
-
-  @Override
-  public String toString() {
-    return MoreObjects.toStringHelper(ScalableBloomFilter.class)
-        .add("bloomFilterSlices", bloomFilterSlices)
-        .toString();
-  }
-
-  @VisibleForTesting
-  int numberOfBloomFilterSlices() {
-    return bloomFilterSlices.size();
-  }
-
-  /**
-   * Returns a scalable Bloom filter builder allowing one to construct a Bloom filter
-   * with an expected false positive probability of {@code 0.000001} irrespective
-   * of the number of elements inserted.
-   */
-  public static Builder builder() {
-    return builder(Builder.MAX_INSERTIONS_FOR_ADD_TO_ALL_MODE_LOG_2);
-  }
-
-  @VisibleForTesting
-  static Builder builder(int maxInsertionsForAddToAllModeLog2) {
-    return new Builder(maxInsertionsForAddToAllModeLog2);
-  }
-
-  /**
-   * A scalable Bloom filter builder which during the build process will attempt to
-   * create a Bloom filter no larger than twice the required size for small Bloom filters.
-   * For large Bloom filters, we create a list of Bloom filters which are successively twice as
-   * large as the previous which we insert elements into.
-   *
-   * <p>This scalable Bloom filter builder uses 8mb of memory per instance to start when
-   * fewer than {@code 2^20} elements have been inserted. Afterwards, it increases in space usage
-   * by a factor of {@code 2.2} for every doubling in the number of unique insertions.
-   */
-  public static class Builder {
-    private static final long MAX_ELEMENTS = 1L << 62;
-    private static final int MAX_INSERTIONS_FOR_ADD_TO_ALL_MODE_LOG_2 = 20;
-    private static final double DEFAULT_FALSE_POSITIVE_PROBABILITY = 0.000001;
-    private static final double RATIO = 0.9;
-
-    private enum Mode {
-      ADD_TO_ALL, ADD_TO_LAST
-    }
-
-    private final List<BloomFilter<ByteBuffer>> bloomFilters;
-    private Mode mode;
-    private long numberOfInsertions;
-
-    private Builder(int maxInsertionsForAddToAllModeLog2) {
-      checkArgument(maxInsertionsForAddToAllModeLog2 < Long.SIZE - 1,
-          "%s does not support an initial size with more than 2^63 elements.",
-          ScalableBloomFilter.class.getSimpleName());
-      this.bloomFilters = new ArrayList<>();
-      this.mode = Mode.ADD_TO_ALL;
-      // 1, 2, 4, 8, 16, 32, ...
-      for (int i = 0; i <= maxInsertionsForAddToAllModeLog2; ++i) {
-        bloomFilters.add(BloomFilter.<ByteBuffer>create(
-            ByteBufferFunnel.INSTANCE,
-            1 << i,
-            DEFAULT_FALSE_POSITIVE_PROBABILITY));
-      }
-    }
-
-    /**
-     * Returns true if the Bloom filter was modified by inserting the byte
-     * representation of an element contained in {@code buf} from {@code [offset, offset + length)}.
-     */
-    public boolean put(final byte[] buf, final int off, final int len) {
-      ByteBuffer buffer = ByteBuffer.wrap(buf, off, len);
-      return put(buffer);
-    }
-
-    /**
-     * Returns true if the Bloom filter was modified by inserting the byte
-     * representation of an element contained in {@code byteBuffer} from {@code [position, limit)}.
-     */
-    public boolean put(final ByteBuffer byteBuffer) {
-      // Check to see if we gain any information by adding this element.
-      switch (mode) {
-        case ADD_TO_ALL:
-          if (bloomFilters.get(bloomFilters.size() - 1).mightContain(byteBuffer)) {
-            // We do not gain any information by adding this element
-            return false;
-          }
-          break;
-        case ADD_TO_LAST:
-          for (int i = bloomFilters.size() - 1; i >= 0; i--) {
-            if (bloomFilters.get(i).mightContain(byteBuffer)) {
-              // One of the Bloom filters already considers that this element exists so skip
-              // adding it.
-              return false;
-            }
-          }
-          break;
-        default:
-          throw new IllegalStateException("Unknown builder mode: " + mode);
-      }
-
-      // We now need to add the element to the appropriate Bloom filter(s) depending on the mode.
-      switch (mode) {
-        case ADD_TO_ALL:
-          int bloomFilterToStartWith =
-              Long.SIZE - Long.numberOfLeadingZeros(numberOfInsertions);
-          // If we were to attempt to add to a non-existent Bloom filter, we need to
-          // swap to the other mode.
-          if (bloomFilterToStartWith == bloomFilters.size()) {
-            BloomFilter<ByteBuffer> last = bloomFilters.get(bloomFilters.size() - 1);
-            bloomFilters.clear();
-            bloomFilters.add(last);
-            mode = Mode.ADD_TO_LAST;
-            addToLast(byteBuffer);
-          } else {
-            for (int i = bloomFilterToStartWith; i < bloomFilters.size(); ++i) {
-              bloomFilters.get(i).put(byteBuffer);
-            }
-          }
-          break;
-        case ADD_TO_LAST:
-          addToLast(byteBuffer);
-          break;
-        default:
-          throw new IllegalStateException("Unknown builder mode: " + mode);
-      }
-      numberOfInsertions += 1;
-      return true;
-    }
-
-    /**
-     * Returns a scalable Bloom filter with the elements that were added.
-     */
-    public ScalableBloomFilter build() {
-      switch (mode) {
-        case ADD_TO_ALL:
-          int bloomFilterToUse = Long.SIZE - Long.numberOfLeadingZeros(numberOfInsertions);
-          if (Long.bitCount(numberOfInsertions) == 1) {
-            bloomFilterToUse -= 1;
-          }
-          return new ScalableBloomFilter(Arrays.asList(bloomFilters.get(bloomFilterToUse)));
-        case ADD_TO_LAST:
-          return new ScalableBloomFilter(bloomFilters);
-        default:
-          throw new IllegalStateException("Unknown builder mode: " + mode);
-      }
-    }
-
-    private void addToLast(ByteBuffer byteBuffer) {
-      // If we are a power of 2, we have hit the number of expected insertions
-      // for the last Bloom filter and we have to add a new one.
-      if (Long.bitCount(numberOfInsertions) == 1) {
-        checkArgument(numberOfInsertions <= MAX_ELEMENTS,
-            "%s does not support Bloom filter slices with more than 2^63 elements.",
-            ScalableBloomFilter.class);
-        bloomFilters.add(BloomFilter.<ByteBuffer>create(
-            ByteBufferFunnel.INSTANCE,
-            numberOfInsertions,
-            DEFAULT_FALSE_POSITIVE_PROBABILITY * Math.pow(RATIO, bloomFilters.size())));
-      }
-      BloomFilter<ByteBuffer> last = bloomFilters.get(bloomFilters.size() - 1);
-      last.put(byteBuffer);
-    }
-  }
-
-  /**
-   * Writes {@link ByteBuffer}s to {@link PrimitiveSink}s and meant to be used
-   * with Guava's {@link BloomFilter} API. This {@link Funnel} does not modify the
-   * underlying byte buffer and assumes that {@code ByteBuffer#array} returns the backing data.
-   */
-  private static class ByteBufferFunnel implements Funnel<ByteBuffer> {
-    private static final ByteBufferFunnel INSTANCE = new ByteBufferFunnel();
-    @Override
-    public void funnel(ByteBuffer from, PrimitiveSink into) {
-      into.putBytes(from.array(), from.position(), from.remaining());
-    }
-  }
-}
-
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedSideInputReader.java
deleted file mode 100644
index 0323f2cafdab9..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedSideInputReader.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-
-/**
- * Extension to {@link SideInputReader} that can approximate the size of the side input.
- */
-public interface WeightedSideInputReader extends SideInputReader {
-
-  /**
-   * Returns the value of the requested {@link PCollectionView} for the given {@link BoundedWindow}
-   * along with a rough estimate of the number of bytes of memory it consumes.
-   *
-   * <p>It is valid for a side input value to be {@code null}. In this case, the return
-   * value of this method must still be non-{@code null}. It should be a {@link Weighted}
-   * object where {@link WeightedValue#getValue()} returns {@code null} and
-   * {@link WeightedValue#getWeight()} may still return any non-negative value.
-   */
-  <T> WeightedValue<T> getWeighted(PCollectionView<T> view, BoundedWindow window);
-
-  /**
-   * Abstract class providing default implementations for methods of
-   * {@link WeightedSideInputReader}.
-   */
-  abstract static class Defaults implements WeightedSideInputReader {
-    @Override
-    public <T> T get(PCollectionView<T> view, BoundedWindow window) {
-      return getWeighted(view, window).getValue();
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtilsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtilsTest.java
deleted file mode 100644
index 6df36a9c38e7c..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CloudSourceUtilsTest.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.makeCloudEncoding;
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-
-import com.google.api.services.dataflow.model.Source;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.ArrayList;
-import java.util.Map;
-
-/**
- * Tests for {@code CloudSourceUtils}.
- */
-@RunWith(JUnit4.class)
-public class CloudSourceUtilsTest {
-  @Test
-  public void testFlattenBaseSpecs() throws Exception {
-    // G = grandparent, P = parent, C = child.
-    CloudObject grandparent = CloudObject.forClassName("text");
-    addString(grandparent, "G", "g_g");
-    addString(grandparent, "GP", "gp_g");
-    addString(grandparent, "GC", "gc_g");
-    addString(grandparent, "GPC", "gpc_g");
-
-    CloudObject parent = CloudObject.forClassName("text");
-    addString(parent, "P", "p_p");
-    addString(parent, "PC", "pc_p");
-    addString(parent, "GP", "gp_p");
-    addString(parent, "GPC", "gpc_p");
-
-    CloudObject child = CloudObject.forClassName("text");
-    addString(child, "C", "c_c");
-    addString(child, "PC", "pc_c");
-    addString(child, "GC", "gc_c");
-    addString(child, "GPC", "gpc_c");
-
-    Source source = new Source();
-    source.setBaseSpecs(new ArrayList<Map<String, Object>>());
-    source.getBaseSpecs().add(grandparent);
-    source.getBaseSpecs().add(parent);
-    source.setSpec(child);
-    source.setCodec(makeCloudEncoding(StringUtf8Coder.class.getName()));
-
-    Source flat = CloudSourceUtils.flattenBaseSpecs(source);
-    assertNull(flat.getBaseSpecs());
-    assertEquals(
-        StringUtf8Coder.class.getName(),
-        getString(flat.getCodec(), PropertyNames.OBJECT_TYPE_NAME));
-
-    CloudObject flatSpec = CloudObject.fromSpec(flat.getSpec());
-    assertEquals("g_g", getString(flatSpec, "G"));
-    assertEquals("p_p", getString(flatSpec, "P"));
-    assertEquals("c_c", getString(flatSpec, "C"));
-    assertEquals("gp_p", getString(flatSpec, "GP"));
-    assertEquals("gc_c", getString(flatSpec, "GC"));
-    assertEquals("pc_c", getString(flatSpec, "PC"));
-    assertEquals("gpc_c", getString(flatSpec, "GPC"));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ScalableBloomFilterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ScalableBloomFilterTest.java
deleted file mode 100644
index ad72e2914726c..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ScalableBloomFilterTest.java
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import com.google.cloud.dataflow.sdk.testing.CoderProperties;
-import com.google.cloud.dataflow.sdk.util.ScalableBloomFilter.Builder;
-import com.google.cloud.dataflow.sdk.util.ScalableBloomFilter.ScalableBloomFilterCoder;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.nio.ByteBuffer;
-
-/**
- * Tests for {@link ScalableBloomFilter}.
- */
-@RunWith(JUnit4.class)
-public class ScalableBloomFilterTest {
-  private static final ByteBuffer BUFFER = ByteBuffer.wrap(new byte[]{ 0x01, 0x02 });
-
-  @Test
-  public void testBuilderModeAddAll() throws Exception {
-    Builder builder = ScalableBloomFilter.builder();
-    assertTrue("Expected Bloom filter to have been modified.", builder.put(BUFFER));
-
-    // Re-adding should skip and not record the insertion.
-    assertFalse("Expected Bloom filter to not have been modified.", builder.put(BUFFER));
-
-    // Verify insertion
-    int maxValue = insertAndVerifyContents(builder, 31);
-
-    // Verify that we only have one bloom filter instead of many since the number of insertions
-    // is small.
-    ScalableBloomFilter bloomFilter = builder.build();
-    assertEquals(1, bloomFilter.numberOfBloomFilterSlices());
-    verifyCoder(builder.build(), maxValue);
-  }
-
-  @Test
-  public void testBuilderModeAddAllModeAtThreshold() throws Exception {
-    // Use a builder where the insertion threshold to swap to add to last mode is 2^4 elements.
-    Builder builder = ScalableBloomFilter.builder(4);
-
-    // Verify insertion
-    int maxValue = insertAndVerifyContents(builder, 16);
-
-    ScalableBloomFilter bloomFilter = builder.build();
-    // Verify at the threshold we have only built a single Bloom filter slice.
-    assertEquals(1, bloomFilter.numberOfBloomFilterSlices());
-
-    verifyCoder(bloomFilter, maxValue);
-  }
-
-  @Test
-  public void testBuilderModeAddAllModeAtThresholdPlusOne() throws Exception {
-    // Use a builder where the insertion threshold to swap to add to last mode is 2^4 elements.
-    Builder builder = ScalableBloomFilter.builder(4);
-
-    // Verify insertion
-    int maxValue = insertAndVerifyContents(builder, 17);
-
-    ScalableBloomFilter bloomFilter = builder.build();
-    // Verify that at one over the threshold, we created two Bloom filter slices.
-    assertEquals(2, bloomFilter.numberOfBloomFilterSlices());
-
-    verifyCoder(bloomFilter, maxValue);
-  }
-
-  @Test
-  public void testBuilderModeAddLastMode() throws Exception {
-    // Use a builder where the insertion threshold to swap to add to last mode is 2^4 elements.
-    Builder builder = ScalableBloomFilter.builder(4);
-
-    // Verify insertion
-    int maxValue = insertAndVerifyContents(builder, (int) Math.pow(2, 12) - 16);
-
-    ScalableBloomFilter bloomFilter = builder.build();
-    // Verify that we swapped to the scalable mode.
-    // This is 9 because we inserted 16 elements swapping us into the add to all mode.
-    // Then at every power of 2 (e.g. 32, 64, 128, ..) we add another filter.
-    // Thus we have a filter for every power of 2 from 2^4 to 2^12 giving us 9 filters.
-    assertEquals(9, bloomFilter.numberOfBloomFilterSlices());
-
-    verifyCoder(bloomFilter, maxValue);
-  }
-
-  @Test
-  public void testScalableBloomFilterCoder() throws Exception {
-    Builder builderA = ScalableBloomFilter.builder();
-    builderA.put(BUFFER);
-    ScalableBloomFilter filterA = builderA.build();
-    Builder builderB = ScalableBloomFilter.builder();
-    builderB.put(BUFFER);
-    ScalableBloomFilter filterB = builderB.build();
-
-    CoderProperties.coderDecodeEncodeEqual(ScalableBloomFilterCoder.of(), filterA);
-    CoderProperties.coderDeterministic(ScalableBloomFilterCoder.of(), filterA, filterB);
-    CoderProperties.coderConsistentWithEquals(ScalableBloomFilterCoder.of(), filterA, filterB);
-    CoderProperties.coderSerializable(ScalableBloomFilterCoder.of());
-    CoderProperties.structuralValueConsistentWithEquals(
-        ScalableBloomFilterCoder.of(), filterA, filterB);
-  }
-
-  /**
-   * Inserts elements {@code 0, 1, ...} until the internal bloom filters have
-   * been modified {@code maxNumberOfInsertions} times. Returns the largest value inserted.
-   */
-  private int insertAndVerifyContents(Builder builder, int maxNumberOfInsertions) {
-    ByteBuffer byteBuffer = ByteBuffer.allocate(4);
-    int value = -1;
-    while (maxNumberOfInsertions > 0) {
-      value += 1;
-      byteBuffer.clear();
-      byteBuffer.putInt(value);
-      byteBuffer.rewind();
-      if (builder.put(byteBuffer)) {
-        maxNumberOfInsertions -= 1;
-      }
-    }
-
-    verifyContents(builder.build(), value);
-    return value;
-  }
-
-  /**
-   * Verifies that the bloom filter contains all the values from {@code [0, 1, ..., maxValue]}.
-   */
-  private void verifyContents(ScalableBloomFilter bloomFilter, int maxValue) {
-    ByteBuffer byteBuffer = ByteBuffer.allocate(4);
-    // Verify that all the values exist
-    for (int i = 0; i <= maxValue; ++i) {
-      byteBuffer.clear();
-      byteBuffer.putInt(i);
-      byteBuffer.rewind();
-      assertTrue(bloomFilter.mightContain(byteBuffer));
-    }
-  }
-
-  /**
-   * Verifies that the coder correctly encodes and decodes and that all the values
-   * {@code [0, 1, 2, ..., maxValue]} are contained within the decoded bloom filter.
-   */
-  private void verifyCoder(ScalableBloomFilter bloomFilter, int maxValue) throws Exception {
-    byte[] encodedValue =
-        CoderUtils.encodeToByteArray(ScalableBloomFilterCoder.of(), bloomFilter);
-    ScalableBloomFilter decoded =
-        CoderUtils.decodeFromByteArray(ScalableBloomFilterCoder.of(), encodedValue);
-    verifyContents(decoded, maxValue);
-  }
-}
-

From a6c3e409081402630f324c5e8b84278b162c7138 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 10 Feb 2016 17:12:02 -0800
Subject: [PATCH 1428/1541] ByteKey: a key represented byte[]

- wraps the array and provides zero-copy access via ByteString
- base for key-value storage systems like Google Cloud Bigtable or
  Apache HBase

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114386886
---
 .../cloud/dataflow/sdk/io/range/ByteKey.java  | 170 +++++++++++++++++
 .../dataflow/sdk/io/range/ByteKeyTest.java    | 178 ++++++++++++++++++
 2 files changed, 348 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKey.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKey.java
new file mode 100644
index 0000000000000..19c560fd5e4a0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKey.java
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io.range;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import com.google.protobuf.ByteString;
+import com.google.protobuf.ByteString.ByteIterator;
+
+import java.io.Serializable;
+
+/**
+ * A class representing a key consisting of an array of bytes. Arbitrary-length
+ * {@code byte[]} keys are typical in key-value stores such as Google Cloud Bigtable.
+ *
+ * <p>Instances of {@link ByteKey} are immutable.
+ *
+ * <p>{@link ByteKey} implements {@link Comparable Comparable&lt;ByteKey&gt;} by comparing the
+ * arrays in lexicographic order. The smallest {@link ByteKey} is a zero-length array; the successor
+ * to a key is the same key with an additional 0 byte appended; and keys have unbounded size.
+ *
+ * <p>Note that the empty {@link ByteKey} compares smaller than all other keys, but some systems
+ * have the semantic that when an empty {@link ByteKey} is used as an upper bound, it represents
+ * the largest possible key. In these cases, implementors should use {@link #isEmpty} to test
+ * whether an upper bound key is empty.
+ */
+public final class ByteKey implements Comparable<ByteKey>, Serializable {
+  /**
+   * Creates a new {@link ByteKey} backed by the specified {@link ByteString}.
+   */
+  public static ByteKey of(ByteString value) {
+    return new ByteKey(value);
+  }
+
+  /**
+   * Creates a new {@link ByteKey} backed by a copy of the specified {@code byte[]}.
+   *
+   * <p>Makes a copy of the underlying array.
+   */
+  public static ByteKey copyFrom(byte[] bytes) {
+    return of(ByteString.copyFrom(bytes));
+  }
+
+  /**
+   * Creates a new {@link ByteKey} backed by a copy of the specified {@code int[]}. This method is
+   * primarily used as a convenience to create a {@link ByteKey} in code without casting down to
+   * signed Java {@link Byte bytes}:
+   *
+   * <pre>{@code
+   * ByteKey key = ByteKey.of(0xde, 0xad, 0xbe, 0xef);
+   * }</pre>
+   *
+   * <p>Makes a copy of the input.
+   */
+  public static ByteKey of(int... bytes) {
+    byte[] ret = new byte[bytes.length];
+    for (int i = 0; i < bytes.length; ++i) {
+      ret[i] = (byte) (bytes[i] & 0xff);
+    }
+    return ByteKey.copyFrom(ret);
+  }
+
+  /**
+   * Returns an immutable {@link ByteString} representing this {@link ByteKey}.
+   *
+   * <p>Does not copy.
+   */
+  public ByteString getValue() {
+    return value;
+  }
+
+  /**
+   * Returns a newly-allocated {@code byte[]} representing this {@link ByteKey}.
+   *
+   * <p>Copies the underlying {@code byte[]}.
+   */
+  public byte[] getBytes() {
+    return value.toByteArray();
+  }
+
+  /**
+   * Returns {@code true} if the {@code byte[]} backing this {@link ByteKey} is of length 0.
+   */
+  public boolean isEmpty() {
+    return value.isEmpty();
+  }
+
+  /**
+   * {@link ByteKey} implements {@link Comparable Comparable&lt;ByteKey&gt;} by comparing the
+   * arrays in lexicographic order. The smallest {@link ByteKey} is a zero-length array; the
+   * successor to a key is the same key with an additional 0 byte appended; and keys have unbounded
+   * size.
+   */
+  @Override
+  public int compareTo(ByteKey other) {
+    checkNotNull(other, "other");
+    ByteIterator thisIt = value.iterator();
+    ByteIterator otherIt = other.value.iterator();
+    while (thisIt.hasNext() && otherIt.hasNext()) {
+      // (byte & 0xff) converts [-128,127] bytes to [0,255] ints.
+      int cmp = (thisIt.nextByte() & 0xff) - (otherIt.nextByte() & 0xff);
+      if (cmp != 0) {
+        return cmp;
+      }
+    }
+    // If we get here, the prefix of both arrays is equal up to the shorter array. The array with
+    // more bytes is larger.
+    return value.size() - other.value.size();
+  }
+
+  ////////////////////////////////////////////////////////////////////////////////////
+  private final ByteString value;
+
+  private ByteKey(ByteString value) {
+    this.value = value;
+  }
+
+  /** Array used as a helper in {@link #toString}. */
+  private static final char[] HEX =
+      new char[] {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
+
+  // Prints the key as a string "[deadbeef]".
+  @Override
+  public String toString() {
+    char[] encoded = new char[2 * value.size() + 2];
+    encoded[0] = '[';
+    int cnt = 1;
+    ByteIterator iterator = value.iterator();
+    while (iterator.hasNext()) {
+      byte b = iterator.nextByte();
+      encoded[cnt] = HEX[(b & 0xF0) >>> 4];
+      ++cnt;
+      encoded[cnt] = HEX[b & 0xF];
+      ++cnt;
+    }
+    encoded[cnt] = ']';
+    return new String(encoded);
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (o == this) {
+      return true;
+    }
+    if (!(o instanceof ByteKey)) {
+      return false;
+    }
+    ByteKey other = (ByteKey) o;
+    return (other.value.size() == value.size()) && this.compareTo(other) == 0;
+  }
+
+  @Override
+  public int hashCode() {
+    return value.hashCode();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyTest.java
new file mode 100644
index 0000000000000..66c5d394ba2a2
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyTest.java
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io.range;
+
+import static org.hamcrest.Matchers.lessThan;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+
+/**
+ * Tests of {@link ByteKey}.
+ */
+@RunWith(JUnit4.class)
+public class ByteKeyTest {
+  // A big list of byte[] keys, in ascending sorted order.
+  private static final ByteKey[] TEST_KEYS =
+      new ByteKey[] {
+        ByteKey.of(),
+        ByteKey.of(0),
+        ByteKey.of(0, 1),
+        ByteKey.of(0, 1, 1),
+        ByteKey.of(0, 1, 2),
+        ByteKey.of(0, 1, 2, 0xfe),
+        ByteKey.of(0, 1, 3, 0xfe),
+        ByteKey.of(0, 0xfe, 0xfe, 0xfe),
+        ByteKey.of(0, 0xfe, 0xfe, 0xff),
+        ByteKey.of(0, 0xfe, 0xff, 0),
+        ByteKey.of(0, 0xff, 0xff, 0),
+        ByteKey.of(0, 0xff, 0xff, 1),
+        ByteKey.of(0, 0xff, 0xff, 0xfe),
+        ByteKey.of(0, 0xff, 0xff, 0xff),
+        ByteKey.of(1),
+        ByteKey.of(1, 2),
+        ByteKey.of(1, 2, 3),
+        ByteKey.of(3),
+        ByteKey.of(0xdd),
+        ByteKey.of(0xfe),
+        ByteKey.of(0xfe, 0xfe),
+        ByteKey.of(0xfe, 0xff),
+        ByteKey.of(0xff),
+        ByteKey.of(0xff, 0),
+        ByteKey.of(0xff, 0xfe),
+        ByteKey.of(0xff, 0xff),
+        ByteKey.of(0xff, 0xff, 0xff),
+        ByteKey.of(0xff, 0xff, 0xff, 0xff),
+      };
+
+  /**
+   * Tests {@link ByteKey#compareTo(ByteKey)} using exhaustive testing within a large sorted list
+   * of keys.
+   */
+  @Test
+  public void testCompareToExhaustive() {
+    // Verify that the comparison gives the correct result for all values in both directions.
+    for (int i = 0; i < TEST_KEYS.length; ++i) {
+      for (int j = 0; j < TEST_KEYS.length; ++j) {
+        ByteKey left = TEST_KEYS[i];
+        ByteKey right = TEST_KEYS[j];
+        int cmp = left.compareTo(right);
+        if (i < j && !(cmp < 0)) {
+          fail(
+              String.format(
+                  "Expected that cmp(%s, %s) < 0, got %d [i=%d, j=%d]", left, right, cmp, i, j));
+        } else if (i == j && !(cmp == 0)) {
+          fail(
+              String.format(
+                  "Expected that cmp(%s, %s) == 0, got %d [i=%d, j=%d]", left, right, cmp, i, j));
+        } else if (i > j && !(cmp > 0)) {
+          fail(
+              String.format(
+                  "Expected that cmp(%s, %s) > 0, got %d [i=%d, j=%d]", left, right, cmp, i, j));
+        }
+      }
+    }
+  }
+
+  /**
+   * Tests {@link ByteKey#equals}.
+   */
+  @Test
+  public void testEquals() {
+    // Verify that the comparison gives the correct result for all values in both directions.
+    for (int i = 0; i < TEST_KEYS.length; ++i) {
+      for (int j = 0; j < TEST_KEYS.length; ++j) {
+        ByteKey left = TEST_KEYS[i];
+        ByteKey right = TEST_KEYS[j];
+        boolean eq = left.equals(right);
+        if (i == j) {
+          assertTrue(String.format("Expected that %s is equal to itself.", left), eq);
+          assertTrue(
+              String.format("Expected that %s is equal to a copy of itself.", left),
+              left.equals(ByteKey.of(right.getValue())));
+        } else {
+          assertFalse(String.format("Expected that %s is not equal to %s", left, right), eq);
+        }
+      }
+    }
+  }
+
+  /**
+   * Tests {@link ByteKey#hashCode}.
+   */
+  @Test
+  public void testHashCode() {
+    // Verify that the hashCode is equal when i==j, and usually not equal otherwise.
+    int collisions = 0;
+    for (int i = 0; i < TEST_KEYS.length; ++i) {
+      int left = TEST_KEYS[i].hashCode();
+      int leftClone = ByteKey.of(TEST_KEYS[i].getValue()).hashCode();
+      assertEquals(
+          String.format("Expected same hash code for %s and a copy of itself", TEST_KEYS[i]),
+          left,
+          leftClone);
+      for (int j = i + 1; j < TEST_KEYS.length; ++j) {
+        int right = TEST_KEYS[j].hashCode();
+        if (left == right) {
+          ++collisions;
+        }
+      }
+    }
+    int totalUnequalTests = TEST_KEYS.length * (TEST_KEYS.length - 1) / 2;
+    assertThat("Too many hash collisions", collisions, lessThan(totalUnequalTests / 2));
+  }
+
+  /**
+   * Tests {@link ByteKey#toString}.
+   */
+  @Test
+  public void testToString() {
+    assertEquals("[]", ByteKey.of().toString());
+    assertEquals("[00]", ByteKey.of(0).toString());
+    assertEquals("[0000]", ByteKey.of(0x00, 0x00).toString());
+    assertEquals(
+        "[0123456789abcdef]",
+        ByteKey.of(0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef).toString());
+  }
+
+  /**
+   * Tests {@link ByteKey#isEmpty}.
+   */
+  @Test
+  public void testIsEmpty() {
+    assertTrue("[] is empty", ByteKey.of().isEmpty());
+    assertFalse("[00]", ByteKey.of(0).isEmpty());
+  }
+
+  /**
+   * Tests {@link ByteKey#getBytes}.
+   */
+  @Test
+  public void testGetBytes() {
+    assertTrue("[] equal after getBytes", Arrays.equals(new byte[] {}, ByteKey.of().getBytes()));
+    assertTrue(
+        "[00] equal after getBytes", Arrays.equals(new byte[] {0x00}, ByteKey.of(0x00).getBytes()));
+  }
+}

From 35dbb0c872039f19d957d7146028eb1e0df4346a Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 11 Feb 2016 12:54:23 -0800
Subject: [PATCH 1429/1541] Remove duplicate definition of MetricUpdate
 utilities

Moved previous references to point to correct utilities class.
This allows for moving remaining cloud counter classes to worker maven module.

This is for Apache Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114467570
---
 .../dataflow/sdk/util/CloudCounterUtils.java  |  14 ++
 .../sdk/util/CounterAggregatorTest.java       |   8 +-
 .../dataflow/sdk/util/common/CounterTest.java | 132 ++----------------
 .../sdk/util/common/CounterTestUtils.java     |  42 ------
 4 files changed, 31 insertions(+), 165 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java
index b1b50d40f365a..4e13e0640fb7a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java
@@ -34,6 +34,13 @@
 public class CloudCounterUtils {
   private static final Logger LOG = LoggerFactory.getLogger(CloudCounterUtils.class);
 
+  /**
+   * Extracts MetricUpdate updates from the given counter set. This is used mainly
+   * for testing.
+   *
+   * @param delta specifies whether or not to extract the cumulative
+   *        aggregate values or the deltas since the last extraction.
+   */
   public static List<MetricUpdate> extractCounters(
       CounterSet counters, boolean delta) {
     synchronized (counters) {
@@ -52,6 +59,13 @@ public static List<MetricUpdate> extractCounters(
     }
   }
 
+  /**
+   * Extracts a MetricUpdate update from the given counter. This is used mainly
+   * for testing.
+   *
+   * @param delta specifies whether or not to extract the cumulative
+   *        aggregate value or the delta since the last extraction.
+   */
   public static MetricUpdate extractCounter(Counter<?> counter, boolean delta) {
     // TODO: Omit no-op counter updates, for counters whose
     // values haven't changed since the last time we sent them.
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java
index 47ec579d0419f..4cc2f6a30ef4c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CounterAggregatorTest.java
@@ -25,7 +25,6 @@
 import static org.mockito.Mockito.when;
 import static org.mockito.Mockito.withSettings;
 
-import com.google.api.services.dataflow.model.MetricUpdate;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
@@ -38,7 +37,7 @@
 import com.google.cloud.dataflow.sdk.util.common.CounterProvider;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
-import com.google.cloud.dataflow.sdk.util.common.CounterTestUtils;
+import com.google.common.collect.Iterables;
 
 import org.hamcrest.Matchers;
 import org.junit.Assert;
@@ -72,10 +71,7 @@ private <V, AccumT> void testAggregator(List<V> items,
       aggregator.addValue(item);
     }
 
-    List<MetricUpdate> cloudCounterSet = CounterTestUtils.extractCounterUpdates(counters, false);
-    Assert.assertEquals(cloudCounterSet.size(), 1);
-    Assert.assertEquals(cloudCounterSet.get(0),
-                        CounterTestUtils.extractCounterUpdate(expectedCounter, false));
+    assertEquals(Iterables.getOnlyElement(counters), expectedCounter);
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
index 33d5d3e6473bf..619f523445526 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTest.java
@@ -16,9 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.util.common;
 
-import static com.google.cloud.dataflow.sdk.util.Values.asBoolean;
 import static com.google.cloud.dataflow.sdk.util.Values.asDouble;
-import static com.google.cloud.dataflow.sdk.util.Values.asLong;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.AND;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MAX;
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
@@ -29,8 +27,6 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
-import com.google.api.services.dataflow.model.MetricUpdate;
-import com.google.cloud.dataflow.sdk.util.CloudCounterUtils;
 import com.google.cloud.dataflow.sdk.util.common.Counter.CounterMean;
 
 import org.junit.Rule;
@@ -40,9 +36,7 @@
 import org.junit.runners.JUnit4;
 
 import java.util.Arrays;
-import java.util.HashSet;
 import java.util.List;
-import java.util.Set;
 
 /**
  * Unit tests for the {@link Counter} API.
@@ -53,101 +47,25 @@ public class CounterTest {
   @Rule
   public ExpectedException thrown = ExpectedException.none();
 
-  private static MetricUpdate flush(Counter<?> c) {
-    // TODO: Move this out into a separate Counter test.
-    return CounterTestUtils.extractCounterUpdate(c, true);
+  private static void flush(Counter<?> c) {
+    switch (c.getKind()) {
+      case SUM:
+      case MAX:
+      case MIN:
+      case AND:
+      case OR:
+        c.getAndResetDelta();
+        break;
+      case MEAN:
+        c.getAndResetMeanDelta();
+        break;
+      default:
+        throw new IllegalArgumentException("Unknown counter kind " + c.getKind());
+    }
   }
 
   private static final double EPSILON = 0.00000000001;
 
-  @Test
-  public void testNameKindAndCloudCounterRepresentation() {
-    Counter<Long> c1 = Counter.longs("c1", SUM);
-    Counter<Double> c2 = Counter.doubles("c2", MAX);
-    Counter<Double> c3 = Counter.doubles("c3", MIN);
-    Counter<Double> c4 = Counter.doubles("c4", MEAN);
-    Counter<Integer> c5 = Counter.ints("c5", MIN);
-    Counter<Boolean> c6 = Counter.booleans("c6", AND);
-    Counter<Boolean> c7 = Counter.booleans("c7", OR);
-
-    assertEquals("c1", c1.getName());
-    assertEquals(SUM, c1.getKind());
-    MetricUpdate cc = flush(c1);
-    assertEquals("c1", cc.getName().getName());
-    assertEquals("SUM", cc.getKind());
-    assertEquals(0L, asLong(cc.getScalar()).longValue());
-    c1.addValue(123L).addValue(-13L);
-    cc = flush(c1);
-    assertEquals(110L, asLong(cc.getScalar()).longValue());
-
-    assertEquals("c2", c2.getName());
-    assertEquals(MAX, c2.getKind());
-    cc = flush(c2);
-    assertEquals("c2", cc.getName().getName());
-    assertEquals("MAX", cc.getKind());
-    assertEquals(Double.NEGATIVE_INFINITY, asDouble(cc.getScalar()), EPSILON);
-    c2.resetToValue(0.0).addValue(Math.PI).addValue(Math.E);
-    cc = flush(c2);
-    assertEquals(Math.PI, asDouble(cc.getScalar()), EPSILON);
-
-    assertEquals("c3", c3.getName());
-    assertEquals(MIN, c3.getKind());
-    c3.addValue(Math.PI).addValue(-Math.PI).addValue(-Math.sqrt(2));
-    cc = flush(c3);
-    assertEquals("c3", cc.getName().getName());
-    assertEquals("MIN", cc.getKind());
-    assertEquals(-Math.PI, asDouble(cc.getScalar()), EPSILON);
-
-    assertEquals("c4", c4.getName());
-    assertEquals(MEAN, c4.getKind());
-    cc = flush(c4); // zero-count means are not sent to the service
-    assertEquals(null, cc);
-    c4.addValue(Math.PI).addValue(Math.E).addValue(Math.sqrt(2));
-    cc = flush(c4);
-    assertEquals("c4", cc.getName().getName());
-    assertEquals("MEAN", cc.getKind());
-    Object ms = cc.getMeanSum();
-    Object mc = cc.getMeanCount();
-    assertEquals(Math.PI + Math.E + Math.sqrt(2), asDouble(ms), EPSILON);
-    assertEquals(3, asLong(mc).longValue());
-    c4.addValue(2.0).addValue(5.0);
-    cc = flush(c4);
-    ms = cc.getMeanSum();
-    mc = cc.getMeanCount();
-    assertEquals(7.0, asDouble(ms), EPSILON);
-    assertEquals(2L, asLong(mc).longValue());
-
-    assertEquals("c5", c5.getName());
-    assertEquals(MIN, c5.getKind());
-    cc = flush(c5);
-    assertEquals("c5", cc.getName().getName());
-    assertEquals("MIN", cc.getKind());
-    assertEquals(Integer.MAX_VALUE, asLong(cc.getScalar()).longValue());
-    c5.addValue(123).addValue(-13);
-    cc = flush(c5);
-    assertEquals(-13, asLong(cc.getScalar()).longValue());
-
-    assertEquals("c6", c6.getName());
-    assertEquals(AND, c6.getKind());
-    cc = flush(c6);
-    assertEquals("c6", cc.getName().getName());
-    assertEquals("AND", cc.getKind());
-    assertEquals(true, asBoolean(cc.getScalar()));
-    c6.addValue(false);
-    cc = flush(c6);
-    assertEquals(false, asBoolean(cc.getScalar()));
-
-    assertEquals("c7", c7.getName());
-    assertEquals(OR, c7.getKind());
-    cc = flush(c7);
-    assertEquals("c7", cc.getName().getName());
-    assertEquals("OR", cc.getKind());
-    assertEquals(false, asBoolean(cc.getScalar()));
-    c7.addValue(true);
-    cc = flush(c7);
-    assertEquals(true, asBoolean(cc.getScalar()));
-  }
-
   @Test
   public void testCompatibility() {
     // Equal counters are compatible, of all kinds.
@@ -591,7 +509,6 @@ public void testBoolOr() {
     assertBool(expectedTotal, expectedDelta, c);
   }
 
-
   // Incompatibility tests.
 
   @Test(expected = IllegalArgumentException.class)
@@ -634,25 +551,6 @@ public void testOrDouble() {
     Counter.doubles("counter", OR);
   }
 
-  @Test
-  public void testExtraction() {
-    Counter<?>[] counters = {Counter.longs("c1", SUM),
-                             Counter.doubles("c2", MAX)};
-    CounterSet set = new CounterSet();
-    for (Counter<?> c : counters) {
-      set.addCounter(c);
-    }
-
-    Set<MetricUpdate> cloudCountersFromSet = new HashSet<>(
-        CloudCounterUtils.extractCounters(set, true));
-
-    Set<MetricUpdate> cloudCountersFromArray =
-        new HashSet<>(CounterTestUtils.extractCounterUpdates(Arrays.asList(counters), true));
-
-    assertEquals(cloudCountersFromSet, cloudCountersFromArray);
-    assertEquals(2, cloudCountersFromSet.size());
-  }
-
   @Test
   public void testMergeIncompatibleCounters() {
     Counter<Long> longSums = Counter.longs("longsums", SUM);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java
index 5c9af99d14e9a..faaa34110d8c5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/CounterTestUtils.java
@@ -18,59 +18,17 @@
 
 import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
 
-import com.google.api.services.dataflow.model.MetricUpdate;
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.util.CloudCounterUtils;
 import com.google.cloud.dataflow.sdk.util.common.Counter.CounterMean;
 
 import org.junit.Assert;
 
 import java.io.ByteArrayOutputStream;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
 
 /**
  * Utilities for testing {@link Counter}s.
  */
 public class CounterTestUtils {
-
-  /**
-   * Extracts a MetricUpdate update from the given counter. This is used mainly
-   * for testing.
-   *
-   * @param extractDelta specifies whether or not to extract the cumulative
-   *        aggregate value or the delta since the last extraction.
-   */
-  public static MetricUpdate extractCounterUpdate(Counter<?> counter,
-      boolean extractDelta) {
-    // This may be invoked asynchronously with regular counter updates but
-    // access to counter data is synchronized, so this is safe.
-    return CloudCounterUtils.extractCounter(counter, extractDelta);
-  }
-
-  /**
-   * Extracts MetricUpdate updates from the given counters. This is used mainly
-   * for testing.
-   *
-   * @param extractDelta specifies whether or not to extract the cumulative
-   *        aggregate values or the deltas since the last extraction.
-   */
-  public static List<MetricUpdate> extractCounterUpdates(
-      Collection<Counter<?>> counters, boolean extractDelta) {
-    // This may be invoked asynchronously with regular counter updates but
-    // access to counter data is synchronized, so this is safe. Note however
-    // that the result is NOT an atomic snapshot across all given counters.
-    List<MetricUpdate> cloudCounters = new ArrayList<>(counters.size());
-    for (Counter<?> counter : counters) {
-      MetricUpdate cloudCounter = extractCounterUpdate(counter, extractDelta);
-      if (null != cloudCounter) {
-        cloudCounters.add(cloudCounter);
-      }
-    }
-    return cloudCounters;
-  }
-
   /**
    * A utility method that passes the given (unencoded) elements through
    * coder's registerByteSizeObserver() and encode() methods, and confirms

From b494fef291bc0444f50bde68d10f57be3145ffe1 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 11 Feb 2016 13:45:21 -0800
Subject: [PATCH 1430/1541] Enable new Ism side input format by default for
 batch Dataflow jobs

The new side input format uses an index that significantly
increases performance for list, map, and multimap views in
the global window. In the non global window case, the indexed
format increases performance for all views.

For performance reasons, it is important to use a deterministic
key coder for map and multimap views. Dataflow falls back to
using a singleton implementation which may cause memory and/or
performance problems. A warning is now emitted when using
PCollection map and multimap views with non-deterministic key coders.

----Release Notes----
Enabled indexed side input format for batch Dataflow pipelines
significantly increasing performance for list, map, multimap,
and non global windowed views.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114473202
---
 .../cloud/dataflow/sdk/runners/DataflowPipelineRunner.java    | 4 ++--
 .../dataflow/sdk/runners/DataflowPipelineTranslatorTest.java  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index c3efb791eeb6c..fcee7edd6cc44 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -321,8 +321,8 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
       ImmutableMap.Builder<Class<?>, Class<?>> builder = ImmutableMap.<Class<?>, Class<?>>builder();
       builder.put(Read.Unbounded.class, UnsupportedIO.class);
       builder.put(Window.Bound.class, AssignWindows.class);
-      if (options.getExperiments() != null
-          && options.getExperiments().contains("enable_ism_side_input")) {
+      if (options.getExperiments() == null
+          || !options.getExperiments().contains("disable_ism_side_input")) {
         builder.put(View.AsMap.class, BatchViewAsMap.class);
         builder.put(View.AsMultimap.class, BatchViewAsMultimap.class);
         builder.put(View.AsSingleton.class, BatchViewAsSingleton.class);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index c55d215da8fe7..98efc001a92b8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -647,6 +647,7 @@ public void testToSingletonTranslation() throws Exception {
     // in bad ways during refactor
 
     DataflowPipelineOptions options = buildPipelineOptions();
+    options.setExperiments(ImmutableList.of("disable_ism_side_input"));
     DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
 
     DataflowPipeline pipeline = DataflowPipeline.create(options);
@@ -673,6 +674,7 @@ public void testToIterableTranslation() throws Exception {
     // in bad ways during refactor
 
     DataflowPipelineOptions options = buildPipelineOptions();
+    options.setExperiments(ImmutableList.of("disable_ism_side_input"));
     DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
 
     DataflowPipeline pipeline = DataflowPipeline.create(options);
@@ -698,7 +700,6 @@ public void testToSingletonTranslationWithIsmSideInput() throws Exception {
     // in bad ways during refactor
 
     DataflowPipelineOptions options = buildPipelineOptions();
-    options.setExperiments(ImmutableList.of("enable_ism_side_input"));
     DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
 
     DataflowPipeline pipeline = DataflowPipeline.create(options);
@@ -727,7 +728,6 @@ public void testToIterableTranslationWithIsmSideInput() throws Exception {
     // in bad ways during refactor
 
     DataflowPipelineOptions options = buildPipelineOptions();
-    options.setExperiments(ImmutableList.of("enable_ism_side_input"));
     DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
 
     DataflowPipeline pipeline = DataflowPipeline.create(options);

From 3f77c3928be77bedc62749db26e2aadb75b02251 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 11 Feb 2016 16:31:26 -0800
Subject: [PATCH 1431/1541] Use shaded guava from within worker maven module

This drops the maven replacer plugin which made
the worker module src use the guava that was shaded
within the sdk module.

Fixed checkstyle issues that appeared during migration.

This is for Apache Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114491104
---
 worker/pom.xml | 44 --------------------------------------------
 1 file changed, 44 deletions(-)

diff --git a/worker/pom.xml b/worker/pom.xml
index aff36240b534b..0a107a5f3ea9f 100644
--- a/worker/pom.xml
+++ b/worker/pom.xml
@@ -42,47 +42,7 @@
   </properties>
 
   <build>
-    <!-- TODO: Remove overrides once we don't need to generate the repackaged
-         source manually when the Guava dependency is broken between
-         the sdk and worker module. -->
-    <sourceDirectory>${project.build.directory}/generated/src/main/java</sourceDirectory>
-    <testSourceDirectory>${project.build.directory}/generated/src/test/java</testSourceDirectory>
-
     <plugins>
-      <!-- TODO: Swap to not using Guava from sdk once Guava dependency
-           can be broken between sdk and worker module. -->
-      <plugin>
-        <groupId>com.google.code.maven-replacer-plugin</groupId>
-        <artifactId>replacer</artifactId>
-        <version>1.5.3</version>
-        <executions>
-          <execution>
-            <phase>process-sources</phase>
-            <goals>
-              <goal>replace</goal>
-            </goals>
-          </execution>
-        </executions>
-        <configuration>
-          <basedir>${project.basedir}</basedir>
-          <outputBasedir>${project.build.directory}</outputBasedir>
-          <outputDir>generated</outputDir>
-          <includes>
-            <include>src/**/*.java</include>
-          </includes>
-          <replacements>
-            <replacement>
-              <token>import com.google.common.</token>
-              <value>import com.google.cloud.dataflow.sdk.repackaged.com.google.common.</value>
-            </replacement>
-            <replacement>
-              <token>import com.google.thirdparty.</token>
-              <value>import com.google.cloud.dataflow.sdk.repackaged.com.google.thirdparty.</value>
-            </replacement>
-          </replacements>
-        </configuration>
-      </plugin>
-
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-compiler-plugin</artifactId>
@@ -107,10 +67,6 @@
           <includeResources>false</includeResources>
           <includeTestSourceDirectory>true</includeTestSourceDirectory>
           <excludes>${project.build.directory}/generated-test-sources/**</excludes>
-          <!-- TODO: Remove checkstyle override for source directories once maven replacer
-               is removed. -->
-          <sourceDirectory>${project.build.directory}/src/main/java</sourceDirectory>
-          <testSourceDirectory>${project.build.directory}/src/test/java</testSourceDirectory>
         </configuration>
         <executions>
           <execution>

From 3c57e542a8e354a15cfe3df3c47b5e9e843cf71d Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 11 Feb 2016 17:00:19 -0800
Subject: [PATCH 1432/1541] ByteKey: add a static instance EMPTY for the empty
 key

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114493446
---
 .../com/google/cloud/dataflow/sdk/io/range/ByteKey.java   | 3 +++
 .../google/cloud/dataflow/sdk/io/range/ByteKeyTest.java   | 8 ++++----
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKey.java
index 19c560fd5e4a0..30772da793c31 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKey.java
@@ -39,6 +39,9 @@
  * whether an upper bound key is empty.
  */
 public final class ByteKey implements Comparable<ByteKey>, Serializable {
+  /** An empty key. */
+  public static final ByteKey EMPTY = ByteKey.of();
+
   /**
    * Creates a new {@link ByteKey} backed by the specified {@link ByteString}.
    */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyTest.java
index 66c5d394ba2a2..5c81287c8b72c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyTest.java
@@ -37,7 +37,7 @@ public class ByteKeyTest {
   // A big list of byte[] keys, in ascending sorted order.
   private static final ByteKey[] TEST_KEYS =
       new ByteKey[] {
-        ByteKey.of(),
+        ByteKey.EMPTY,
         ByteKey.of(0),
         ByteKey.of(0, 1),
         ByteKey.of(0, 1, 1),
@@ -149,7 +149,7 @@ public void testHashCode() {
    */
   @Test
   public void testToString() {
-    assertEquals("[]", ByteKey.of().toString());
+    assertEquals("[]", ByteKey.EMPTY.toString());
     assertEquals("[00]", ByteKey.of(0).toString());
     assertEquals("[0000]", ByteKey.of(0x00, 0x00).toString());
     assertEquals(
@@ -162,7 +162,7 @@ public void testToString() {
    */
   @Test
   public void testIsEmpty() {
-    assertTrue("[] is empty", ByteKey.of().isEmpty());
+    assertTrue("[] is empty", ByteKey.EMPTY.isEmpty());
     assertFalse("[00]", ByteKey.of(0).isEmpty());
   }
 
@@ -171,7 +171,7 @@ public void testIsEmpty() {
    */
   @Test
   public void testGetBytes() {
-    assertTrue("[] equal after getBytes", Arrays.equals(new byte[] {}, ByteKey.of().getBytes()));
+    assertTrue("[] equal after getBytes", Arrays.equals(new byte[] {}, ByteKey.EMPTY.getBytes()));
     assertTrue(
         "[00] equal after getBytes", Arrays.equals(new byte[] {0x00}, ByteKey.of(0x00).getBytes()));
   }

From 0e204fe32194592ea22b0d20423c3dee06082346 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 11 Feb 2016 17:30:33 -0800
Subject: [PATCH 1433/1541] Move miscellaneous worker classes to worker maven
 module

This is for Apache Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114495743
---
 .../dataflow/sdk/util/CloudCounterUtils.java  | 115 ---------
 .../sdk/util/common/ForwardingReiterator.java |  85 -------
 .../sdk/util/common/TaggedReiteratorList.java | 167 -------------
 .../worker/ProgressTrackingReiterator.java    |  57 -----
 .../util/common/TaggedReiteratorListTest.java | 220 ------------------
 5 files changed, 644 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ForwardingReiterator.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackingReiterator.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorListTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java
deleted file mode 100644
index 4e13e0640fb7a..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudCounterUtils.java
+++ /dev/null
@@ -1,115 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.services.dataflow.model.MetricStructuredName;
-import com.google.api.services.dataflow.model.MetricUpdate;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.Counter.CounterMean;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Utilities for working with CloudCounters.
- */
-public class CloudCounterUtils {
-  private static final Logger LOG = LoggerFactory.getLogger(CloudCounterUtils.class);
-
-  /**
-   * Extracts MetricUpdate updates from the given counter set. This is used mainly
-   * for testing.
-   *
-   * @param delta specifies whether or not to extract the cumulative
-   *        aggregate values or the deltas since the last extraction.
-   */
-  public static List<MetricUpdate> extractCounters(
-      CounterSet counters, boolean delta) {
-    synchronized (counters) {
-      List<MetricUpdate> cloudCounters = new ArrayList<>(counters.size());
-      for (Counter<?> counter : counters) {
-        try {
-          MetricUpdate cloudCounter = extractCounter(counter, delta);
-          if (cloudCounter != null) {
-            cloudCounters.add(cloudCounter);
-          }
-        } catch (IllegalArgumentException exn) {
-          LOG.warn("Error extracting counter value: ", exn);
-        }
-      }
-      return cloudCounters;
-    }
-  }
-
-  /**
-   * Extracts a MetricUpdate update from the given counter. This is used mainly
-   * for testing.
-   *
-   * @param delta specifies whether or not to extract the cumulative
-   *        aggregate value or the delta since the last extraction.
-   */
-  public static MetricUpdate extractCounter(Counter<?> counter, boolean delta) {
-    // TODO: Omit no-op counter updates, for counters whose
-    // values haven't changed since the last time we sent them.
-    synchronized (counter) {
-      MetricStructuredName name = new MetricStructuredName();
-      name.setName(counter.getName());
-      MetricUpdate metricUpdate = new MetricUpdate()
-          .setName(name)
-          .setKind(counter.getKind().name())
-          .setCumulative(!delta);
-      switch (counter.getKind()) {
-        case SUM:
-        case MAX:
-        case MIN:
-        case AND:
-        case OR:
-          Object aggregate;
-          if (delta) {
-            aggregate = counter.getAndResetDelta();
-          } else {
-            aggregate = counter.getAggregate();
-          }
-          metricUpdate.setScalar(
-                CloudObject.forKnownType(aggregate));
-          break;
-        case MEAN: {
-          CounterMean<?> mean;
-          if (delta) {
-            mean = counter.getAndResetMeanDelta();
-          } else {
-            mean = counter.getMean();
-          }
-          if (mean.getCount() <= 0) {
-            return null;
-          }
-          metricUpdate.setMeanSum(
-              CloudObject.forKnownType(mean.getAggregate()));
-          metricUpdate.setMeanCount(CloudObject.forKnownType(mean.getCount()));
-          break;
-        }
-        default:
-          throw new IllegalArgumentException("unexpected kind of counter");
-      }
-      return metricUpdate;
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ForwardingReiterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ForwardingReiterator.java
deleted file mode 100644
index c63114e1bb19d..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ForwardingReiterator.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-/**
- * A {@link Reiterator} that forwards to another {@code Reiterator}, useful for
- * implementing {@code Reiterator} wrappers.
- *
- * @param <T> the type of elements returned by this iterator
- */
-public abstract class ForwardingReiterator<T>
-    implements Reiterator<T>, Cloneable {
-  private Reiterator<T> base;
-
-  /**
-   * Constructs a {@link ForwardingReiterator}.
-   * @param base supplies a base reiterator to forward requests to.  This
-   * reiterator will be used directly; it will not be copied by the constructor.
-   */
-  public ForwardingReiterator(Reiterator<T> base) {
-    this.base = checkNotNull(base);
-  }
-
-  @Override
-  protected ForwardingReiterator<T> clone() {
-    ForwardingReiterator<T> result;
-    try {
-      @SuppressWarnings("unchecked")
-      ForwardingReiterator<T> declResult = (ForwardingReiterator<T>) super.clone();
-      result = declResult;
-    } catch (CloneNotSupportedException e) {
-      throw new AssertionError(
-          "Object.clone() for a ForwardingReiterator<T> threw "
-          + "CloneNotSupportedException; this should not happen, "
-          + "since ForwardingReiterator<T> implements Cloneable.",
-          e);
-    }
-    result.base = base.copy();
-    return result;
-  }
-
-  @Override
-  public boolean hasNext() {
-    return base.hasNext();
-  }
-
-  @Override
-  public T next() {
-    return base.next();
-  }
-
-  @Override
-  public void remove() {
-    base.remove();
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * <p>This implementation uses {@link #clone} to construct a duplicate of the
-   * {@link Reiterator}.  Derived classes must either implement
-   * {@link Cloneable} semantics, or must provide an alternative implementation
-   * of this method.
-   */
-  @Override
-  public ForwardingReiterator<T> copy() {
-    return clone();
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java
deleted file mode 100644
index 9e79b960b96b0..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorList.java
+++ /dev/null
@@ -1,167 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common;
-
-import java.util.AbstractList;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-/**
- * Provides a view a of re-iterable of tagged values, with monotonically
- * increasing tags, as a list of tagged re-iterables.
- *
- * <p>This class, and the returned iterators, are not threadsafe.
- */
-public class TaggedReiteratorList extends AbstractList<Reiterator<Object>> {
-
-  /**
-   * Interface for extracting the tag and value from an opaque element.
-   */
-  public interface TagExtractor<T> {
-    public int getTag(T elem);
-    public Object getValue(T elem);
-  }
-
-  private final TagExtractor<Object> extractor;
-
-  private final List<PeekingReiterator<Object>> starts;
-
-  private final int size;
-
-  public <T> TaggedReiteratorList(Reiterator<T> taggedReiterator,
-      TagExtractor<T> extractor) {
-    this(taggedReiterator, extractor, -1);
-  }
-
-  @SuppressWarnings({"unchecked", "rawtypes"})
-  public <T> TaggedReiteratorList(Reiterator<T> taggedReiterator,
-                                   TagExtractor<T> extractor,
-                                   int size) {
-    starts = new ArrayList<>();
-    starts.add(new PeekingReiterator<Object>((Reiterator) taggedReiterator));
-    this.extractor = (TagExtractor) extractor;
-    this.size = size;
-  }
-
-  @Override
-  public Reiterator<Object> get(int tag) {
-    return new SubIterator(tag);
-  }
-
-  @Override
-  public int size() {
-    if (size == -1) {
-      throw new UnsupportedOperationException();
-    } else {
-      return size;
-    }
-  }
-
-  private PeekingReiterator<Object> getStart(int tag) {
-    if (tag >= starts.size()) {
-      PeekingReiterator<Object> start = getStart(tag - 1);
-      while (start.hasNext() && extractor.getTag(start.peek()) < tag) {
-        start.next();
-      }
-      starts.add(start);
-    }
-    // Use the stored value, store a copy.
-    return starts.set(tag, starts.get(tag).copy());
-  }
-
-  private static final PeekingReiterator<Object> EMPTY_TAIL =
-      new PeekingReiterator<Object>(
-          new Reiterator<Object>() {
-            @Override
-            public boolean hasNext() {
-              return false;
-            }
-
-            @Override
-            public Object next() {
-              throw new NoSuchElementException();
-            }
-
-            @Override
-            public void remove() {
-              throw new IllegalArgumentException();
-            }
-
-            @Override
-            public Reiterator<Object> copy() {
-              throw new IllegalArgumentException();
-            }
-      });
-
-  private class SubIterator implements Reiterator<Object> {
-
-    private final int tag;
-    private PeekingReiterator<Object> iterator;
-
-    private SubIterator(int tag) {
-      this(tag, null);
-    }
-
-    private SubIterator(int tag, PeekingReiterator<Object> iterator) {
-      this.tag = tag;
-      this.iterator = iterator;
-    }
-
-    @Override
-    public boolean hasNext() {
-      if (iterator == null) {
-        iterator = getStart(tag);
-      }
-      if (iterator.hasNext() && extractor.getTag(iterator.peek()) == tag) {
-        return true;
-      } else {
-        if (iterator != EMPTY_TAIL) {
-          // Set up for the common case that we're iterating over the
-          // next tag soon.
-          if (starts.size() > tag + 1) {
-            starts.set(tag + 1, iterator);
-          } else {
-            starts.add(tag + 1, iterator);
-          }
-          iterator = EMPTY_TAIL;
-        }
-        return false;
-      }
-    }
-
-    @Override
-    public Object next() {
-      if (hasNext()) {
-        return extractor.getValue(iterator.next());
-      } else {
-        throw new NoSuchElementException();
-      }
-    }
-
-    @Override
-    public void remove() {
-      throw new IllegalArgumentException();
-    }
-
-    @Override
-    public Reiterator<Object> copy() {
-      return new SubIterator(
-          tag, iterator == null || iterator == EMPTY_TAIL ? iterator : iterator.copy());
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackingReiterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackingReiterator.java
deleted file mode 100644
index 47e8a949f3a19..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackingReiterator.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.cloud.dataflow.sdk.util.common.ForwardingReiterator;
-import com.google.cloud.dataflow.sdk.util.common.Reiterator;
-
-/**
- * Implements a {@link Reiterator} that uses a
- * {@link ProgressTrackerGroup.Tracker ProgressTracker} to track how far
- * it's gotten through some base {@code Reiterator}.
- * {@link ProgressTrackingReiterator#copy} copies the {@code ProgressTracker},
- * allowing for an independent progress state.
- *
- * @param <T> the type of the elements of this iterator
- */
-public final class ProgressTrackingReiterator<T>
-    extends ForwardingReiterator<T> {
-  private ProgressTracker<T> tracker;
-
-  public ProgressTrackingReiterator(Reiterator<T> base,
-                                    ProgressTracker<T> tracker) {
-    super(base);
-    this.tracker = checkNotNull(tracker);
-  }
-
-  @Override
-  public T next() {
-    T result = super.next();
-    tracker.saw(result);
-    return result;
-  }
-
-  @Override
-  protected ProgressTrackingReiterator<T> clone() {
-    ProgressTrackingReiterator<T> result =
-        (ProgressTrackingReiterator<T>) super.clone();
-    result.tracker = tracker.copy();
-    return result;
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorListTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorListTest.java
deleted file mode 100644
index 6cd38111489b4..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/common/TaggedReiteratorListTest.java
+++ /dev/null
@@ -1,220 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-
-/**
- * Tests for {@link TaggedReiteratorList}.
- */
-@RunWith(JUnit4.class)
-public class TaggedReiteratorListTest {
-
-  @Test
-  public void testSingleIterator() {
-    TaggedReiteratorList iter = create(new String[] {"a", "b", "c"});
-    assertEquals(iter.get(0), "a", "b", "c");
-    assertEquals(iter.get(0), "a", "b", "c");
-    assertEquals(iter.get(1) /*empty*/);
-    assertEquals(iter.get(0), "a", "b", "c");
-  }
-
-  @Test
-  public void testSequentialAccess() {
-    TaggedReiteratorList iter = create(3, new String[] {"a", "b", "c"});
-    for (int i = 0; i < 2; i++) {
-      assertEquals(iter.get(0), "a0", "b0", "c0");
-      assertEquals(iter.get(1), "a1", "b1", "c1");
-      assertEquals(iter.get(2), "a2", "b2", "c2");
-    }
-    for (int i = 0; i < 2; i++) {
-      assertEquals(iter.get(2), "a2", "b2", "c2");
-      assertEquals(iter.get(1), "a1", "b1", "c1");
-      assertEquals(iter.get(0), "a0", "b0", "c0");
-    }
-  }
-
-  @Test
-  public void testRandomAccess() {
-    TaggedReiteratorList iter = create(6, new String[] {"a", "b"});
-    assertEquals(iter.get(3), "a3", "b3");
-    assertEquals(iter.get(1), "a1", "b1");
-    assertEquals(iter.get(5), "a5", "b5");
-    assertEquals(iter.get(0), "a0", "b0");
-    assertEquals(iter.get(4), "a4", "b4");
-    assertEquals(iter.get(4), "a4", "b4");
-    assertEquals(iter.get(1), "a1", "b1");
-  }
-
-  @Test
-  public void testPartialIteration() {
-    TaggedReiteratorList iter = create(6, new String[] {"a", "b", "c"});
-    Iterator<?> get0 = iter.get(0);
-    Iterator<?> get1 = iter.get(1);
-    Iterator<?> get3 = iter.get(3);
-    assertEquals(asList(get0, 1), "a0");
-    assertEquals(asList(get1, 2), "a1", "b1");
-    assertEquals(asList(get3, 3), "a3", "b3", "c3");
-    Iterator<?> get2 = iter.get(2);
-    Iterator<?> get0Again = iter.get(0);
-    assertEquals(asList(get0, 1), "b0");
-    assertEquals(get2, "a2", "b2", "c2");
-    assertEquals(get0Again, "a0", "b0", "c0");
-    assertEquals(asList(get0), "c0");
-    Iterator<?> get4 = iter.get(4);
-    assertEquals(get4, "a4", "b4", "c4");
-    assertEquals(get4 /*empty*/);
-    assertEquals(iter.get(4), "a4", "b4", "c4");
-  }
-
-  @Test
-  public void testNextIteration() {
-    TaggedReiteratorList iter = create(2, new String[] {"a", "b", "c"});
-    Reiterator<?> get0 = iter.get(0);
-    assertEquals(get0, "a0", "b0", "c0");
-    Iterator<?> get1 = iter.get(1);
-    Assert.assertEquals(get1.next(), "a1");
-    assertEquals(get0.copy() /*empty*/);
-    Assert.assertEquals(get1.next(), "b1");
-    assertEquals(iter.get(1), "a1", "b1", "c1");
-  }
-
-  @Test
-  public void testEmpties() {
-    TaggedReiteratorList iter = create(new String[] {},
-                                       new String[] {"a", "b", "c"},
-                                       new String[] {},
-                                       new String[] {},
-                                       new String[] {"d"});
-    assertEquals(iter.get(2) /*empty*/);
-    assertEquals(iter.get(1), "a", "b", "c");
-    assertEquals(iter.get(2) /*empty*/);
-    assertEquals(iter.get(0) /*empty*/);
-    assertEquals(iter.get(2) /*empty*/);
-    assertEquals(iter.get(4), "d");
-    assertEquals(iter.get(3) /*empty*/);
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-  // Helpers
-
-  private TaggedReiteratorList create(String[]... values) {
-    ArrayList<TaggedValue> taggedValues = new ArrayList<>();
-    for (int tag = 0; tag < values.length; tag++) {
-      for (String value : values[tag]) {
-        taggedValues.add(new TaggedValue(tag, value));
-      }
-    }
-    return new TaggedReiteratorList(
-        new TestReiterator(taggedValues.toArray(new TaggedValue[0])),
-        new TaggedValueExtractor());
-  }
-
-  private TaggedReiteratorList create(int repeat, String... values) {
-    ArrayList<TaggedValue> taggedValues = new ArrayList<>();
-    for (int tag = 0; tag < repeat; tag++) {
-      for (String value : values) {
-        taggedValues.add(new TaggedValue(tag, value + tag));
-      }
-    }
-    return new TaggedReiteratorList(
-        new TestReiterator(taggedValues.toArray(new TaggedValue[0])),
-        new TaggedValueExtractor());
-  }
-
-  private <T> List<T> asList(Iterator<T> iter) {
-    return asList(iter, Integer.MAX_VALUE);
-  }
-
-  private <T> List<T> asList(Iterator<T> iter, int limit) {
-    List<T> list = new ArrayList<>();
-    for (int i = 0; i < limit && iter.hasNext(); i++) {
-      list.add(iter.next());
-    }
-    return list;
-  }
-
-  private void assertEquals(Iterator<?> actual, Object... expected) {
-    assertEquals(asList(actual), expected);
-  }
-
-  private void assertEquals(List<?> actual, Object... expected) {
-    Assert.assertEquals(Arrays.asList(expected), actual);
-  }
-
-  private static class TestReiterator implements Reiterator<TaggedValue> {
-    private final TaggedValue[] values;
-    private int pos = 0;
-    public TestReiterator(TaggedValue... values) {
-      this(values, 0);
-    }
-    private TestReiterator(TaggedValue[] values, int pos) {
-      this.values = values;
-      this.pos = pos;
-    }
-
-    @Override
-    public boolean hasNext() {
-      return pos < values.length;
-    }
-
-    @Override
-    public TaggedValue next() {
-      return values[pos++];
-    }
-
-    @Override
-    public void remove() {
-      throw new IllegalArgumentException();
-    }
-
-    @Override
-    public TestReiterator copy() {
-      return new TestReiterator(values, pos);
-    }
-  }
-
-  private static class TaggedValueExtractor
-      implements TaggedReiteratorList.TagExtractor<TaggedValue> {
-    @Override
-    public int getTag(TaggedValue elem) {
-      return elem.tag;
-    }
-
-    @Override
-    public String getValue(TaggedValue elem) {
-      return elem.value;
-    }
-  }
-
-  private static class TaggedValue {
-    public final int tag;
-    public final String value;
-    public TaggedValue(int tag, String value) {
-      this.tag = tag;
-      this.value = value;
-    }
-  }
-}

From aaf27d3184ac836eda2cd3d24d795e8463a39522 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Thu, 11 Feb 2016 17:30:53 -0800
Subject: [PATCH 1434/1541] Allow StreamingDataflowWorker to run against a
 remote gRPC windmill

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114495775
---
 worker/pom.xml | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/worker/pom.xml b/worker/pom.xml
index 0a107a5f3ea9f..5c5dca1a2dfa4 100644
--- a/worker/pom.xml
+++ b/worker/pom.xml
@@ -42,6 +42,13 @@
   </properties>
 
   <build>
+    <resources>
+      <resource>
+        <directory>src/main/resources</directory>
+        <filtering>true</filtering>
+      </resource>
+    </resources>
+
     <plugins>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
@@ -225,6 +232,19 @@
       <version>${guava.version}</version>
     </dependency>
 
+    <dependency>
+      <groupId>io.grpc</groupId>
+      <artifactId>grpc-all</artifactId>
+      <version>0.12.0</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.auth</groupId>
+      <artifactId>google-auth-library-oauth2-http</artifactId>
+      <version>0.3.0</version>
+      <scope>compile</scope>
+    </dependency>
+
     <!-- test dependencies -->
     <dependency>
       <groupId>org.hamcrest</groupId>

From 2d88e6fae73154dfc6c0bcfae95f7423e247d64e Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 11 Feb 2016 17:39:35 -0800
Subject: [PATCH 1435/1541] Move CustomSources logic to live in more
 appropriate places

This reduces the remainder of stuff in CustomSources to
just translation source translation.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114496358
---
 .../google/cloud/dataflow/sdk/io/Read.java    | 29 ++++++++++-
 .../sdk/runners/DataflowPipelineRunner.java   |  7 +--
 .../sdk/runners/dataflow/CustomSources.java   | 52 -------------------
 .../sdk/runners/dataflow/ReadTranslator.java  | 28 ++++++++--
 4 files changed, 54 insertions(+), 62 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
index 710fd643b3633..cde87696bbcc3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
@@ -20,9 +20,9 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.SerializableUtils;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
@@ -30,6 +30,9 @@
 
 import org.joda.time.Duration;
 
+import java.util.ArrayList;
+import java.util.List;
+
 import javax.annotation.Nullable;
 
 /**
@@ -152,7 +155,29 @@ private static void registerDefaultTransformEvaluator() {
             @Override
             public void evaluate(
                 Bounded transform, DirectPipelineRunner.EvaluationContext context) {
-              CustomSources.evaluateReadHelper(transform, context);
+              evaluateReadHelper(transform, context);
+            }
+
+            private <T> void evaluateReadHelper(
+                Read.Bounded<T> transform, DirectPipelineRunner.EvaluationContext context) {
+              try {
+                List<DirectPipelineRunner.ValueWithMetadata<T>> output = new ArrayList<>();
+                BoundedSource<T> source = transform.getSource();
+                try (BoundedSource.BoundedReader<T> reader =
+                    source.createReader(context.getPipelineOptions())) {
+                  for (boolean available = reader.start();
+                      available;
+                      available = reader.advance()) {
+                    output.add(
+                        DirectPipelineRunner.ValueWithMetadata.of(
+                            WindowedValue.timestampedValueInGlobalWindow(
+                                reader.getCurrent(), reader.getCurrentTimestamp())));
+                  }
+                }
+                context.setPCollectionValuesWithMetadata(context.getOutput(transform), output);
+              } catch (Exception e) {
+                throw new RuntimeException(e);
+              }
             }
           });
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index fcee7edd6cc44..f3d4825ff9c2a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -59,9 +59,9 @@
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.JobSpecification;
 import com.google.cloud.dataflow.sdk.runners.dataflow.AssignWindows;
-import com.google.cloud.dataflow.sdk.runners.dataflow.CustomSources;
 import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowAggregatorTransforms;
 import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
+import com.google.cloud.dataflow.sdk.runners.dataflow.ReadTranslator;
 import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat;
 import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecord;
 import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecordCoder;
@@ -1673,8 +1673,6 @@ W> applyForSingleton(
           finalValueCoder);
     }
 
-
-
     private static <K, V, W extends BoundedWindow, ViewT> PCollectionView<ViewT> applyForMapLike(
         DataflowPipelineRunner runner,
         PCollection<KV<K, V>> input,
@@ -2037,8 +2035,7 @@ private static class ReadWithIdsTranslator
       @Override
       public void translate(ReadWithIds<?> transform,
           DataflowPipelineTranslator.TranslationContext context) {
-        CustomSources.translateReadHelper(
-            transform.getSource(), transform, context);
+        ReadTranslator.translateReadHelper(transform.getSource(), transform, context);
       }
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
index 238ec4b0f3cd4..81606931954b1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
@@ -17,7 +17,6 @@
 package com.google.cloud.dataflow.sdk.runners.dataflow;
 
 import static com.google.api.client.util.Base64.encodeBase64String;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceToDictionary;
 import static com.google.cloud.dataflow.sdk.util.SerializableUtils.serializeToByteArray;
 import static com.google.cloud.dataflow.sdk.util.Structs.addString;
 import static com.google.cloud.dataflow.sdk.util.Structs.addStringList;
@@ -25,22 +24,14 @@
 
 import com.google.api.services.dataflow.model.SourceMetadata;
 import com.google.cloud.dataflow.sdk.io.BoundedSource;
-import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.Source;
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.values.PValue;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.protobuf.ByteString;
 
-import org.joda.time.Duration;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -124,47 +115,4 @@ public static com.google.api.services.dataflow.model.Source serializeToCloudSour
     cloudSource.setMetadata(metadata);
     return cloudSource;
   }
-
-  public static <T> void evaluateReadHelper(
-      Read.Bounded<T> transform, DirectPipelineRunner.EvaluationContext context) {
-    try {
-      List<DirectPipelineRunner.ValueWithMetadata<T>> output = new ArrayList<>();
-      BoundedSource<T> source = transform.getSource();
-      try (BoundedSource.BoundedReader<T> reader =
-          source.createReader(context.getPipelineOptions())) {
-        for (boolean available = reader.start(); available; available = reader.advance()) {
-          output.add(
-              DirectPipelineRunner.ValueWithMetadata.of(
-                  WindowedValue.timestampedValueInGlobalWindow(
-                      reader.getCurrent(), reader.getCurrentTimestamp())));
-        }
-      }
-      context.setPCollectionValuesWithMetadata(context.getOutput(transform), output);
-    } catch (Exception e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  public static <T> void translateReadHelper(Source<T> source,
-      PTransform<?, ? extends PValue> transform,
-      DataflowPipelineTranslator.TranslationContext context) {
-    try {
-      context.addStep(transform, "ParallelRead");
-      context.addInput(PropertyNames.FORMAT, PropertyNames.CUSTOM_SOURCE_FORMAT);
-      context.addInput(
-          PropertyNames.SOURCE_STEP_INPUT,
-          cloudSourceToDictionary(serializeToCloudSource(source, context.getPipelineOptions())));
-      context.addValueOnlyOutput(PropertyNames.OUTPUT, context.getOutput(transform));
-    } catch (Exception e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  // Commit at least once every 10 seconds or 10k records.  This keeps the watermark advancing
-  // smoothly, and ensures that not too much work will have to be reprocessed in the event of
-  // a crash.
-  @VisibleForTesting
-  static final int MAX_UNBOUNDED_BUNDLE_SIZE = 10000;
-  @VisibleForTesting
-  static final Duration MAX_UNBOUNDED_BUNDLE_READ_TIME = Duration.standardSeconds(10);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
index a7d0914da2dfa..2513d431cb683 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
@@ -16,10 +16,16 @@
 
 package com.google.cloud.dataflow.sdk.runners.dataflow;
 
-import static com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
-import static com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
+import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceToDictionary;
 
 import com.google.cloud.dataflow.sdk.io.Read;
+import com.google.cloud.dataflow.sdk.io.Source;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.values.PValue;
 
 /**
  * Translator for the {@code Read} {@code PTransform} for the Dataflow back-end.
@@ -27,6 +33,22 @@
 public class ReadTranslator implements TransformTranslator<Read.Bounded<?>> {
   @Override
   public void translate(Read.Bounded<?> transform, TranslationContext context) {
-    CustomSources.translateReadHelper(transform.getSource(), transform, context);
+    translateReadHelper(transform.getSource(), transform, context);
+  }
+
+  public static <T> void translateReadHelper(Source<T> source,
+      PTransform<?, ? extends PValue> transform,
+      DataflowPipelineTranslator.TranslationContext context) {
+    try {
+      context.addStep(transform, "ParallelRead");
+      context.addInput(PropertyNames.FORMAT, PropertyNames.CUSTOM_SOURCE_FORMAT);
+      context.addInput(
+          PropertyNames.SOURCE_STEP_INPUT,
+          cloudSourceToDictionary(
+              CustomSources.serializeToCloudSource(source, context.getPipelineOptions())));
+      context.addValueOnlyOutput(PropertyNames.OUTPUT, context.getOutput(transform));
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
   }
 }

From f0198ddd1b15ec892671db7df1960855dc9a03bc Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 11 Feb 2016 18:47:25 -0800
Subject: [PATCH 1436/1541] Add filename-based compression selection to
 CompressedSource

----Release Notes----
 - CompressedSource now detects compression according to filename
   by default.
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114500494
---
 .../dataflow/sdk/io/CompressedSource.java     | 98 +++++++++++++++++--
 .../dataflow/sdk/io/CompressedSourceTest.java | 98 +++++++++++++++++--
 2 files changed, 180 insertions(+), 16 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
index 687fa04ab6984..ba42fdc8d14cf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
@@ -43,8 +43,15 @@
  *     .withDecompression(CompressedSource.CompressionMode.GZIP)));
  * } </pre>
  *
- * <p>Default compression modes are {@link CompressionMode#GZIP} and {@link CompressionMode#BZIP2}.
- * User-defined compression types are supported by implementing {@link DecompressingChannelFactory}.
+ * <p>Supported compression algorithms are {@link CompressionMode#GZIP} and
+ * {@link CompressionMode#BZIP2}. User-defined compression types are supported by implementing
+ * {@link DecompressingChannelFactory}.
+ *
+ * <p>By default, the compression algorithm is selected from those supported in
+ * {@link CompressionMode} based on the file name provided to the source, namely
+ * {@code ".bz2"} indicates {@link CompressionMode#BZIP2} and {@code ".gz"} indicates
+ * {@link CompressionMode#GZIP}. If the file name does not match any of the supported
+ * algorithms, it is assumed to be uncompressed data.
  *
  * @param <T> The type to read from the compressed file.
  */
@@ -53,7 +60,6 @@ public class CompressedSource<T> extends FileBasedSource<T> {
   /**
    * Factory interface for creating channels that decompress the content of an underlying channel.
    */
-  // TODO: Refactor decompressing channel/stream creation and default instances to util classes.
   public static interface DecompressingChannelFactory extends Serializable {
     /**
      * Given a channel, create a channel that decompresses the content read from the channel.
@@ -63,18 +69,49 @@ public ReadableByteChannel createDecompressingChannel(ReadableByteChannel channe
         throws IOException;
   }
 
+  /**
+   * Factory interface for creating channels that decompress the content of an underlying channel,
+   * based on both the channel and the file name.
+   */
+  private static interface FileNameBasedDecompressingChannelFactory
+      extends DecompressingChannelFactory {
+    /**
+     * Given a channel, create a channel that decompresses the content read from the channel.
+     * @throws IOException
+     */
+    ReadableByteChannel createDecompressingChannel(String fileName, ReadableByteChannel channel)
+        throws IOException;
+  }
+
   /**
    * Default compression types supported by the {@code CompressedSource}.
    */
   public enum CompressionMode implements DecompressingChannelFactory {
+    /**
+     * Reads a byte channel assuming it is compressed with gzip.
+     */
     GZIP {
+      @Override
+      public boolean matches(String fileName) {
+          return fileName.toLowerCase().endsWith(".gz");
+      }
+
       @Override
       public ReadableByteChannel createDecompressingChannel(ReadableByteChannel channel)
           throws IOException {
         return Channels.newChannel(new GzipCompressorInputStream(Channels.newInputStream(channel)));
       }
     },
+
+    /**
+     * Reads a byte channel assuming it is compressed with bzip2.
+     */
     BZIP2 {
+      @Override
+      public boolean matches(String fileName) {
+          return fileName.toLowerCase().endsWith(".bz2");
+      }
+
       @Override
       public ReadableByteChannel createDecompressingChannel(ReadableByteChannel channel)
           throws IOException {
@@ -83,11 +120,48 @@ public ReadableByteChannel createDecompressingChannel(ReadableByteChannel channe
       }
     };
 
+    /**
+     * Returns {@code true} if the given file name implies that the contents are compressed
+     * according to the compression embodied by this factory.
+     */
+    public abstract boolean matches(String fileName);
+
     @Override
     public abstract ReadableByteChannel createDecompressingChannel(ReadableByteChannel channel)
         throws IOException;
   }
 
+  /**
+   * Reads a byte channel detecting compression according to the file name. If the filename
+   * is not any other known {@link CompressionMode}, it is presumed to be uncompressed.
+   */
+  private static class DecompressAccordingToFilename
+      implements FileNameBasedDecompressingChannelFactory {
+
+    @Override
+    public ReadableByteChannel createDecompressingChannel(
+        String fileName, ReadableByteChannel channel) throws IOException {
+      for (CompressionMode type : CompressionMode.values()) {
+        if (type.matches(fileName)) {
+          return type.createDecompressingChannel(channel);
+        }
+      }
+      // Uncompressed
+      return channel;
+    }
+
+    @Override
+    public ReadableByteChannel createDecompressingChannel(ReadableByteChannel channel) {
+      throw new UnsupportedOperationException(
+          String.format("%s does not support createDecompressingChannel(%s) but only"
+              + " createDecompressingChannel(%s,%s)",
+              getClass().getSimpleName(),
+              String.class.getSimpleName(),
+              ReadableByteChannel.class.getSimpleName(),
+              ReadableByteChannel.class.getSimpleName()));
+    }
+  }
+
   private final FileBasedSource<T> sourceDelegate;
   private final DecompressingChannelFactory channelFactory;
 
@@ -102,11 +176,12 @@ public static <T> Read.Bounded<T> readFromSource(
   }
 
   /**
-   * Creates a {@code CompressedSource} from an underlying {@code FileBasedSource} that must be
-   * further configured with {@link CompressedSource#withDecompression}.
+   * Creates a {@code CompressedSource} from an underlying {@code FileBasedSource}. The type
+   * of compression used will be based on the file name extension unless explicitly
+   * configured via {@link CompressedSource#withDecompression}.
    */
   public static <T> CompressedSource<T> from(FileBasedSource<T> sourceDelegate) {
-    return new CompressedSource<>(sourceDelegate, null);
+    return new CompressedSource<>(sourceDelegate, new DecompressAccordingToFilename());
   }
 
   /**
@@ -252,7 +327,16 @@ protected final boolean isAtSplitPoint() {
      */
     @Override
     protected final void startReading(ReadableByteChannel channel) throws IOException {
-      readerDelegate.startReading(source.getChannelFactory().createDecompressingChannel(channel));
+      if (source.getChannelFactory() instanceof FileNameBasedDecompressingChannelFactory) {
+        FileNameBasedDecompressingChannelFactory channelFactory =
+            (FileNameBasedDecompressingChannelFactory) source.getChannelFactory();
+        readerDelegate.startReading(channelFactory.createDecompressingChannel(
+            getCurrentSource().getFileOrPatternSpec(),
+            channel));
+      } else {
+        readerDelegate.startReading(source.getChannelFactory().createDecompressingChannel(
+            channel));
+      }
     }
 
     /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
index d0a9b7be0359c..7583ee022673c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
 import com.google.cloud.dataflow.sdk.io.CompressedSource.CompressionMode;
+import com.google.cloud.dataflow.sdk.io.CompressedSource.DecompressingChannelFactory;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
@@ -45,6 +46,8 @@
 import java.util.List;
 import java.util.NoSuchElementException;
 
+import javax.annotation.Nullable;
+
 /**
  * Tests for CompressedSource.
  */
@@ -92,6 +95,69 @@ public void testCompressedReadBzip2() throws Exception {
     runReadTest(input, CompressionMode.BZIP2);
   }
 
+  /**
+   * Test reading according to filepattern when the file is bzipped.
+   */
+  @Test
+  public void testCompressedAccordingToFilepatternGzip() throws Exception {
+    byte[] input = generateInput(100);
+    File tmpFile = tmpFolder.newFile("test.gz");
+    writeFile(tmpFile, input, CompressionMode.GZIP);
+    verifyReadContents(input, tmpFile, null /* default auto decompression factory */);
+  }
+
+  /**
+   * Test reading according to filepattern when the file is gzipped.
+   */
+  @Test
+  public void testCompressedAccordingToFilepatternBzip2() throws Exception {
+    byte[] input = generateInput(100);
+    File tmpFile = tmpFolder.newFile("test.bz2");
+    writeFile(tmpFile, input, CompressionMode.BZIP2);
+    verifyReadContents(input, tmpFile, null /* default auto decompression factory */);
+  }
+
+  /**
+   * Test reading multiple files with different compression.
+   */
+  @Test
+  public void testHeterogeneousCompression() throws Exception {
+    String baseName = "test-input";
+
+    // Expected data
+    byte[] generated = generateInput(1000);
+    List<Byte> expected = new ArrayList<>();
+
+    // Every sort of compression
+    File uncompressedFile = tmpFolder.newFile(baseName + ".bin");
+    generated = generateInput(1000);
+    try (OutputStream outStream = new FileOutputStream(uncompressedFile)) {
+      outStream.write(generated);
+    }
+    expected.addAll(Bytes.asList(generated));
+
+    File gzipFile = tmpFolder.newFile(baseName + ".gz");
+    generated = generateInput(1000);
+    writeFile(gzipFile, generated, CompressionMode.GZIP);
+    expected.addAll(Bytes.asList(generated));
+
+    File bzip2File = tmpFolder.newFile(baseName + ".bz2");
+    generated = generateInput(1000);
+    writeFile(bzip2File, generateInput(1000), CompressionMode.BZIP2);
+    expected.addAll(Bytes.asList(generated));
+
+    String filePattern = new File(tmpFolder.getRoot().toString(), baseName + ".*").toString();
+
+    Pipeline p = TestPipeline.create();
+
+    CompressedSource<Byte> source =
+        CompressedSource.from(new ByteSource(filePattern, 1));
+    PCollection<Byte> output = p.apply(Read.from(source));
+
+    DataflowAssert.that(output).containsInAnyOrder(expected);
+    p.run();
+  }
+
   /**
    * Test reading multiple files.
    */
@@ -134,7 +200,7 @@ private byte[] generateInput(int size) {
   /**
    * Get a compressing stream for a given compression mode.
    */
-  private OutputStream getStreamForMode(CompressionMode mode, OutputStream stream)
+  private OutputStream getOutputStreamForMode(CompressionMode mode, OutputStream stream)
       throws IOException {
     switch (mode) {
       case GZIP:
@@ -150,7 +216,7 @@ private OutputStream getStreamForMode(CompressionMode mode, OutputStream stream)
    * Writes a single output file.
    */
   private void writeFile(File file, byte[] input, CompressionMode mode) throws IOException {
-    try (OutputStream os = getStreamForMode(mode, new FileOutputStream(file))) {
+    try (OutputStream os = getOutputStreamForMode(mode, new FileOutputStream(file))) {
       os.write(input);
     }
   }
@@ -158,21 +224,35 @@ private void writeFile(File file, byte[] input, CompressionMode mode) throws IOE
   /**
    * Run a single read test, writing and reading back input with the given compression mode.
    */
-  private void runReadTest(byte[] input, CompressionMode mode) throws IOException {
+  private void runReadTest(byte[] input,
+      CompressionMode inputCompressionMode,
+      @Nullable DecompressingChannelFactory decompressionFactory)
+      throws IOException {
     File tmpFile = tmpFolder.newFile();
-    writeFile(tmpFile, input, mode);
+    writeFile(tmpFile, input, inputCompressionMode);
+    verifyReadContents(input, tmpFile, decompressionFactory);
+  }
 
+  private void verifyReadContents(byte[] expected, File inputFile,
+      @Nullable DecompressingChannelFactory decompressionFactory) {
     Pipeline p = TestPipeline.create();
-
     CompressedSource<Byte> source =
-        CompressedSource.from(new ByteSource(tmpFile.toPath().toString(), 1))
-            .withDecompression(mode);
+        CompressedSource.from(new ByteSource(inputFile.toPath().toString(), 1));
+    if (decompressionFactory != null) {
+      source = source.withDecompression(decompressionFactory);
+    }
     PCollection<Byte> output = p.apply(Read.from(source));
-
-    DataflowAssert.that(output).containsInAnyOrder(Bytes.asList(input));
+    DataflowAssert.that(output).containsInAnyOrder(Bytes.asList(expected));
     p.run();
   }
 
+  /**
+   * Run a single read test, writing and reading back input with the given compression mode.
+   */
+  private void runReadTest(byte[] input, CompressionMode mode) throws IOException {
+    runReadTest(input, mode, mode);
+  }
+
   /**
    * Dummy source for use in tests.
    */

From dbcc0af0d80d57dace086c520d635516d1650602 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 11 Feb 2016 21:12:27 -0800
Subject: [PATCH 1437/1541] Make internal state APIs explicitly keyed

This is preparatory plumbing for making the key to
which state is associated available to the State
implementation, for example to use a KeyedCombineFn.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114507418
---
 .../windowing/AfterDelayFromFirstElement.java | 14 ++--
 .../sdk/transforms/windowing/AfterPane.java   | 10 +--
 .../sdk/transforms/windowing/Trigger.java     | 16 ++--
 .../sdk/util/BaseExecutionContext.java        |  2 +-
 .../sdk/util/DirectModeExecutionContext.java  | 14 ++--
 .../dataflow/sdk/util/DoFnRunnerBase.java     |  2 +-
 .../dataflow/sdk/util/ExecutionContext.java   |  2 +-
 .../GroupAlsoByWindowsAndCombineDoFn.java     |  1 -
 .../sdk/util/MergingActiveWindowSet.java      |  4 +-
 .../sdk/util/MergingStateContext.java         | 34 --------
 .../dataflow/sdk/util/NonEmptyPanes.java      | 53 ++++++------
 .../dataflow/sdk/util/PaneInfoTracker.java    |  5 +-
 .../cloud/dataflow/sdk/util/ReduceFn.java     | 20 +++--
 .../sdk/util/ReduceFnContextFactory.java      | 76 +++++++++--------
 .../dataflow/sdk/util/ReduceFnRunner.java     | 21 +++--
 .../dataflow/sdk/util/SystemReduceFn.java     | 21 +++--
 .../sdk/util/TriggerContextFactory.java       | 28 ++++---
 .../dataflow/sdk/util/TriggerRunner.java      | 29 ++++---
 .../dataflow/sdk/util/WatermarkHold.java      |  9 +-
 .../dataflow/sdk/util/WindowingInternals.java |  6 +-
 .../util/state/InMemoryStateInternals.java    | 39 ++++++---
 .../sdk/util/state/MergingStateContext.java   | 39 +++++++++
 .../sdk/util/{ => state}/StateContext.java    | 18 ++--
 .../sdk/util/state/StateInternals.java        |  7 +-
 .../dataflow/sdk/util/state/StateMerging.java | 34 ++++----
 .../dataflow/sdk/util/state/StateTable.java   | 16 ++--
 .../dataflow/sdk/util/state/StateTag.java     | 18 ++--
 .../dataflow/sdk/util/state/StateTags.java    | 83 ++++++++++++-------
 .../sdk/util/MergingActiveWindowSetTest.java  |  4 +-
 .../dataflow/sdk/util/ReduceFnTester.java     | 48 +++++++----
 .../dataflow/sdk/util/TriggerTester.java      | 18 ++--
 .../state/InMemoryStateInternalsTest.java     | 16 ++--
 .../dataflow/sdk/util/state/StateTagTest.java | 34 ++++----
 33 files changed, 431 insertions(+), 310 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingStateContext.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateContext.java
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/util/{ => state}/StateContext.java (65%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
index 51de85e86258a..9c9a0956abf53 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
@@ -22,11 +22,11 @@
 import com.google.cloud.dataflow.sdk.transforms.Min;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.util.MergingStateContext;
-import com.google.cloud.dataflow.sdk.util.StateContext;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
+import com.google.cloud.dataflow.sdk.util.state.MergingStateContext;
+import com.google.cloud.dataflow.sdk.util.state.StateContext;
 import com.google.cloud.dataflow.sdk.util.state.StateMerging;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
@@ -52,8 +52,8 @@ public abstract class AfterDelayFromFirstElement<W extends BoundedWindow> extend
   protected static final List<SerializableFunction<Instant, Instant>> IDENTITY =
       ImmutableList.<SerializableFunction<Instant, Instant>>of();
 
-  protected static final StateTag<CombiningValueStateInternal<Instant, Combine.Holder<Instant>,
-      Instant>> DELAYED_UNTIL_TAG =
+  protected static final StateTag<Object, CombiningValueStateInternal<Instant,
+                                              Combine.Holder<Instant>, Instant>> DELAYED_UNTIL_TAG =
       StateTags.makeSystemTagInternal(StateTags.combiningValueFromInputInternal(
           "delayed", InstantCoder.of(), Min.MinFn.<Instant>naturalOrder()));
 
@@ -151,7 +151,7 @@ private AfterDelayFromFirstElement<W> newWith(
   }
 
   @Override
-  public void prefetchOnElement(StateContext state) {
+  public void prefetchOnElement(StateContext<?> state) {
     state.access(DELAYED_UNTIL_TAG).get();
   }
 
@@ -172,7 +172,7 @@ public void onElement(OnElementContext c) throws Exception {
   }
 
   @Override
-  public void prefetchOnMerge(MergingStateContext<W> state) {
+  public void prefetchOnMerge(MergingStateContext<?, W> state) {
     super.prefetchOnMerge(state);
     StateMerging.prefetchCombiningValues(state, DELAYED_UNTIL_TAG);
   }
@@ -207,7 +207,7 @@ public void onMerge(OnMergeContext c) throws Exception {
   }
 
   @Override
-  public void prefetchShouldFire(StateContext state) {
+  public void prefetchShouldFire(StateContext<?> state) {
     state.access(DELAYED_UNTIL_TAG).get();
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index 8eed3fcea7154..79a370757be16 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -20,9 +20,9 @@
 import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.util.MergingStateContext;
-import com.google.cloud.dataflow.sdk.util.StateContext;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
+import com.google.cloud.dataflow.sdk.util.state.MergingStateContext;
+import com.google.cloud.dataflow.sdk.util.state.StateContext;
 import com.google.cloud.dataflow.sdk.util.state.StateMerging;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
@@ -41,7 +41,7 @@
 @Experimental(Experimental.Kind.TRIGGER)
 public class AfterPane<W extends BoundedWindow> extends OnceTrigger<W>{
 
-private static final StateTag<CombiningValueStateInternal<Long, long[], Long>>
+private static final StateTag<Object, CombiningValueStateInternal<Long, long[], Long>>
       ELEMENTS_IN_PANE_TAG =
       StateTags.makeSystemTagInternal(StateTags.combiningValueFromInputInternal(
           "count", VarLongCoder.of(), new Sum.SumLongFn()));
@@ -66,7 +66,7 @@ public void onElement(OnElementContext c) throws Exception {
   }
 
   @Override
-  public void prefetchOnMerge(MergingStateContext<W> state) {
+  public void prefetchOnMerge(MergingStateContext<?, W> state) {
     super.prefetchOnMerge(state);
     StateMerging.prefetchCombiningValues(state, ELEMENTS_IN_PANE_TAG);
   }
@@ -86,7 +86,7 @@ public void onMerge(OnMergeContext context) throws Exception {
   }
 
   @Override
-  public void prefetchShouldFire(StateContext state) {
+  public void prefetchShouldFire(StateContext<?> state) {
     state.access(ELEMENTS_IN_PANE_TAG).get();
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 4afcf11a4b3bd..3d4029e29852b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -18,9 +18,9 @@
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
-import com.google.cloud.dataflow.sdk.util.MergingStateContext;
-import com.google.cloud.dataflow.sdk.util.StateContext;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.state.MergingStateContext;
+import com.google.cloud.dataflow.sdk.util.state.StateContext;
 import com.google.common.base.Joiner;
 
 import org.joda.time.Instant;
@@ -190,7 +190,7 @@ public abstract class TriggerContext {
     public abstract TriggerInfo<W> trigger();
 
     /** Returns the interface for accessing persistent state. */
-    public abstract StateContext state();
+    public abstract StateContext<?> state();
 
     /** The window that the current context is executing in. */
     public abstract W window();
@@ -265,7 +265,7 @@ public abstract class OnMergeContext extends TriggerContext {
     public abstract OnMergeContext forTrigger(ExecutableTrigger<W> trigger);
 
     @Override
-    public abstract MergingStateContext<W> state();
+    public abstract MergingStateContext<?, W> state();
 
     @Override
     public abstract MergingTriggerInfo<W> trigger();
@@ -319,7 +319,7 @@ protected Trigger(@Nullable List<Trigger<W>> subTriggers) {
    * Called to allow the trigger to prefetch any state it will likely need to read from during
    * an {@link #onElement} call.
    */
-  public void prefetchOnElement(StateContext state) {
+  public void prefetchOnElement(StateContext<?> state) {
     if (subTriggers != null) {
       for (Trigger<W> trigger : subTriggers) {
         trigger.prefetchOnElement(state);
@@ -331,7 +331,7 @@ public void prefetchOnElement(StateContext state) {
    * Called to allow the trigger to prefetch any state it will likely need to read from during
    * an {@link #onMerge} call.
    */
-  public void prefetchOnMerge(MergingStateContext<W> state) {
+  public void prefetchOnMerge(MergingStateContext<?, W> state) {
     if (subTriggers != null) {
       for (Trigger<W> trigger : subTriggers) {
         trigger.prefetchOnMerge(state);
@@ -343,7 +343,7 @@ public void prefetchOnMerge(MergingStateContext<W> state) {
    * Called to allow the trigger to prefetch any state it will likely need to read from during
    * an {@link #shouldFire} call.
    */
-  public void prefetchShouldFire(StateContext state) {
+  public void prefetchShouldFire(StateContext<?> state) {
     if (subTriggers != null) {
       for (Trigger<W> trigger : subTriggers) {
         trigger.prefetchShouldFire(state);
@@ -355,7 +355,7 @@ public void prefetchShouldFire(StateContext state) {
    * Called to allow the trigger to prefetch any state it will likely need to read from during
    * an {@link #onFire} call.
    */
-  public void prefetchOnFire(StateContext state) {
+  public void prefetchOnFire(StateContext<?> state) {
     if (subTriggers != null) {
       for (Trigger<W> trigger : subTriggers) {
         trigger.prefetchOnFire(state);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java
index 9d048962038e7..6a0ccf3531bf8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java
@@ -147,7 +147,7 @@ public <T, W extends BoundedWindow> void writePCollectionViewData(
     }
 
     @Override
-    public abstract StateInternals stateInternals();
+    public abstract StateInternals<?> stateInternals();
 
     @Override
     public abstract TimerInternals timerInternals();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
index 12f2d208d696c..6e970535dbdf3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
@@ -16,12 +16,13 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static com.google.common.base.Preconditions.checkNotNull;
+
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 
@@ -99,8 +100,9 @@ public <T> List<ValueWithMetadata<T>> getSideOutput(TupleTag<T> tag) {
    */
   public static class StepContext extends BaseExecutionContext.StepContext {
 
-    private final Map<Object, InMemoryStateInternals> stateInternals = Maps.newHashMap();
-    private InMemoryStateInternals currentStateInternals = null;
+    /** A map from each key to the state associated with it. */
+    private final Map<Object, InMemoryStateInternals<Object>> stateInternals = Maps.newHashMap();
+    private InMemoryStateInternals<Object> currentStateInternals = null;
 
     private StepContext(ExecutionContext executionContext, String stepName, String transformName) {
       super(executionContext, stepName, transformName);
@@ -110,14 +112,14 @@ private StepContext(ExecutionContext executionContext, String stepName, String t
     public void switchKey(Object newKey) {
       currentStateInternals = stateInternals.get(newKey);
       if (currentStateInternals == null) {
-        currentStateInternals = new InMemoryStateInternals();
+        currentStateInternals = InMemoryStateInternals.forKey(newKey);
         stateInternals.put(newKey, currentStateInternals);
       }
     }
 
     @Override
-    public StateInternals stateInternals() {
-      return Preconditions.checkNotNull(currentStateInternals);
+    public StateInternals<Object> stateInternals() {
+      return checkNotNull(currentStateInternals);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java
index f4dfeb20b6693..a3e22a660f1ba 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java
@@ -549,7 +549,7 @@ public <T> void writePCollectionViewData(
         }
 
         @Override
-        public StateInternals stateInternals() {
+        public StateInternals<?> stateInternals() {
           return context.stepContext.stateInternals();
         }
       };
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
index 83d74e6668abd..cff5b95cf9a9c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
@@ -95,7 +95,7 @@ <T, W extends BoundedWindow> void writePCollectionViewData(
         Coder<W> windowCoder)
             throws IOException;
 
-    StateInternals stateInternals();
+    StateInternals<?> stateInternals();
 
     TimerInternals timerInternals();
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
index 2fc6c650e2e19..c0a5cae2ed9e0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
@@ -89,7 +89,6 @@ public GroupAlsoByWindowsAndCombineDoFn(
     @SuppressWarnings("unchecked")
     WindowingStrategy<Object, W> objectWindowingStrategy = (WindowingStrategy<Object, W>) strategy;
     this.windowingStrategy = objectWindowingStrategy;
-
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
index 3a6960980d113..eab7786b2ecdf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
@@ -109,10 +109,10 @@ public class MergingActiveWindowSet<W extends BoundedWindow> implements ActiveWi
    */
   private final ValueState<Map<W, Set<W>>> valueState;
 
-  public MergingActiveWindowSet(WindowFn<Object, W> windowFn, StateInternals state) {
+  public MergingActiveWindowSet(WindowFn<Object, W> windowFn, StateInternals<?> state) {
     this.windowFn = windowFn;
 
-    StateTag<ValueState<Map<W, Set<W>>>> mergeTreeAddr =
+    StateTag<Object, ValueState<Map<W, Set<W>>>> mergeTreeAddr =
         StateTags.makeSystemTagInternal(StateTags.value(
             "tree", MapCoder.of(windowFn.windowCoder(), SetCoder.of(windowFn.windowCoder()))));
     valueState = state.state(StateNamespaces.global(), mergeTreeAddr);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingStateContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingStateContext.java
deleted file mode 100644
index 98d62f919ce56..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingStateContext.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.state.State;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-
-import java.util.Map;
-
-/** Interface for interacting with persistent state when merging windows. */
-public interface MergingStateContext<W extends BoundedWindow> extends StateContext {
-    /**
-     * Analogous to {@link #access}, but returned as a map from each window which is
-     * about to be merged to the corresponding state. Only includes windows which
-     * are known to have state.
-     */
-    public <StateT extends State> Map<W, StateT> accessInEachMergingWindow(
-        StateTag<StateT> address);
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
index 2d5fa41964e95..91a74a85ef05e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
+import com.google.cloud.dataflow.sdk.util.state.MergingStateContext;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateMerging;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
@@ -31,9 +32,10 @@
  *
  * @param <W> The kind of windows being tracked.
  */
-public abstract class NonEmptyPanes<W extends BoundedWindow> {
-  public static <W extends BoundedWindow> NonEmptyPanes<W> create(
-      WindowingStrategy<?, W> strategy, ReduceFn<?, ?, ?, W> reduceFn) {
+public abstract class NonEmptyPanes<K, W extends BoundedWindow> {
+
+  public static <K, W extends BoundedWindow> NonEmptyPanes<K, W> create(
+      WindowingStrategy<?, W> strategy, ReduceFn<K, ?, ?, W> reduceFn) {
     if (strategy.getMode() == AccumulationMode.DISCARDING_FIRED_PANES) {
       return new DiscardingModeNonEmptyPanes<>(reduceFn);
     } else {
@@ -45,62 +47,63 @@ public static <W extends BoundedWindow> NonEmptyPanes<W> create(
    * Record that some content has been added to the window in {@code context}, and therefore the
    * current pane is not empty.
    */
-  public abstract void recordContent(ReduceFn<?, ?, ?, W>.Context context);
+  public abstract void recordContent(ReduceFn<K, ?, ?, W>.Context context);
 
   /**
    * Record that the given pane is empty.
    */
-  public abstract void clearPane(ReduceFn<?, ?, ?, W>.Context context);
+  public abstract void clearPane(ReduceFn<K, ?, ?, W>.Context context);
 
   /**
    * Return true if the current pane for the window in {@code context} is empty.
    */
-  public abstract StateContents<Boolean> isEmpty(ReduceFn<?, ?, ?, W>.Context context);
+  public abstract StateContents<Boolean> isEmpty(ReduceFn<K, ?, ?, W>.Context context);
 
   /**
    * Prefetch in preparation for merging.
    */
-  public abstract void prefetchOnMerge(MergingStateContext<W> state);
+  public abstract void prefetchOnMerge(MergingStateContext<K, W> state);
 
   /**
    * Eagerly merge backing state.
    */
-  public abstract void onMerge(MergingStateContext<W> context);
+  public abstract void onMerge(MergingStateContext<K, W> context);
 
   /**
    * An implementation of {@code NonEmptyPanes} optimized for use with discarding mode. Uses the
    * presence of data in the accumulation buffer to record non-empty panes.
    */
-  private static class DiscardingModeNonEmptyPanes<W extends BoundedWindow>
-      extends NonEmptyPanes<W> {
-    private ReduceFn<?, ?, ?, W> reduceFn;
+  private static class DiscardingModeNonEmptyPanes<K, W extends BoundedWindow>
+      extends NonEmptyPanes<K, W> {
+
+    private ReduceFn<K, ?, ?, W> reduceFn;
 
-    private DiscardingModeNonEmptyPanes(ReduceFn<?, ?, ?, W> reduceFn) {
+    private DiscardingModeNonEmptyPanes(ReduceFn<K, ?, ?, W> reduceFn) {
       this.reduceFn = reduceFn;
     }
 
     @Override
-    public StateContents<Boolean> isEmpty(ReduceFn<?, ?, ?, W>.Context context) {
+    public StateContents<Boolean> isEmpty(ReduceFn<K, ?, ?, W>.Context context) {
       return reduceFn.isEmpty(context.state());
     }
 
     @Override
-    public void recordContent(ReduceFn<?, ?, ?, W>.Context context) {
+    public void recordContent(ReduceFn<K, ?, ?, W>.Context context) {
       // Nothing to do -- the reduceFn is tracking contents
     }
 
     @Override
-    public void clearPane(ReduceFn<?, ?, ?, W>.Context context) {
+    public void clearPane(ReduceFn<K, ?, ?, W>.Context context) {
       // Nothing to do -- the reduceFn is tracking contents
     }
 
     @Override
-    public void prefetchOnMerge(MergingStateContext<W> state) {
+    public void prefetchOnMerge(MergingStateContext<K, W> state) {
       // Nothing to do -- the reduceFn is tracking contents
     }
 
     @Override
-    public void onMerge(MergingStateContext<W> context) {
+    public void onMerge(MergingStateContext<K, W> context) {
       // Nothing to do -- the reduceFn is tracking contents
     }
   }
@@ -108,34 +111,36 @@ public void onMerge(MergingStateContext<W> context) {
   /**
    * An implementation of {@code NonEmptyPanes} for general use.
    */
-  private static class GeneralNonEmptyPanes<W extends BoundedWindow> extends NonEmptyPanes<W> {
-    private static final StateTag<CombiningValueStateInternal<Long, long[], Long>>
+  private static class GeneralNonEmptyPanes<K, W extends BoundedWindow>
+      extends NonEmptyPanes<K, W> {
+
+    private static final StateTag<Object, CombiningValueStateInternal<Long, long[], Long>>
         PANE_ADDITIONS_TAG =
         StateTags.makeSystemTagInternal(StateTags.combiningValueFromInputInternal(
             "count", VarLongCoder.of(), new Sum.SumLongFn()));
 
     @Override
-    public void recordContent(ReduceFn<?, ?, ?, W>.Context context) {
+    public void recordContent(ReduceFn<K, ?, ?, W>.Context context) {
       context.state().access(PANE_ADDITIONS_TAG).add(1L);
     }
 
     @Override
-    public void clearPane(ReduceFn<?, ?, ?, W>.Context context) {
+    public void clearPane(ReduceFn<K, ?, ?, W>.Context context) {
       context.state().access(PANE_ADDITIONS_TAG).clear();
     }
 
     @Override
-    public StateContents<Boolean> isEmpty(ReduceFn<?, ?, ?, W>.Context context) {
+    public StateContents<Boolean> isEmpty(ReduceFn<K, ?, ?, W>.Context context) {
       return context.state().access(PANE_ADDITIONS_TAG).isEmpty();
     }
 
     @Override
-    public void prefetchOnMerge(MergingStateContext<W> state) {
+    public void prefetchOnMerge(MergingStateContext<K, W> state) {
       StateMerging.prefetchCombiningValues(state, PANE_ADDITIONS_TAG);
     }
 
     @Override
-    public void onMerge(MergingStateContext<W> context) {
+    public void onMerge(MergingStateContext<K, W> context) {
       StateMerging.mergeCombiningValues(context, PANE_ADDITIONS_TAG);
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
index b3235c042ab66..e055b5828e944 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.PaneInfoCoder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
+import com.google.cloud.dataflow.sdk.util.state.StateContext;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.cloud.dataflow.sdk.util.state.ValueState;
@@ -42,10 +43,10 @@ public PaneInfoTracker(TimerInternals timerInternals) {
   }
 
   @VisibleForTesting
-  static final StateTag<ValueState<PaneInfo>> PANE_INFO_TAG =
+  static final StateTag<Object, ValueState<PaneInfo>> PANE_INFO_TAG =
       StateTags.makeSystemTagInternal(StateTags.value("pane", PaneInfoCoder.INSTANCE));
 
-  public void clear(StateContext state) {
+  public void clear(StateContext<?> state) {
     state.access(PANE_INFO_TAG).clear();
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
index b1b323d79e0ad..1ee86ffa08840 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
@@ -17,7 +17,9 @@
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.util.state.MergingStateContext;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
+import com.google.cloud.dataflow.sdk.util.state.StateContext;
 
 import org.joda.time.Instant;
 
@@ -46,7 +48,7 @@ public abstract class Context {
     public abstract WindowingStrategy<?, W> windowingStrategy();
 
     /** Return the interface for accessing state. */
-    public abstract StateContext state();
+    public abstract StateContext<K> state();
 
     /** Return the interface for accessing timers. */
     public abstract Timers timers();
@@ -65,7 +67,7 @@ public abstract class ProcessValueContext extends Context {
   public abstract class OnMergeContext extends Context {
     /** Return the interface for accessing state. */
     @Override
-    public abstract MergingStateContext<W> state();
+    public abstract MergingStateContext<K, W> state();
   }
 
   /** Information accessible within {@link #onTrigger}. */
@@ -87,7 +89,7 @@ public abstract class OnTriggerContext extends Context {
   /**
    * Called when windows are merged.
    */
-  public abstract void onMerge(OnMergeContext c) throws Exception;
+  public abstract void onMerge(OnMergeContext context) throws Exception;
 
   /**
    * Called when triggers fire.
@@ -95,7 +97,7 @@ public abstract class OnTriggerContext extends Context {
    * <p>Implementations of {@link ReduceFn} should call {@link OnTriggerContext#output} to emit
    * any results that should be included in the pane produced by this trigger firing.
    */
-  public abstract void onTrigger(OnTriggerContext c) throws Exception;
+  public abstract void onTrigger(OnTriggerContext context) throws Exception;
 
   /**
    * Called before {@link #onMerge} is invoked to provide an opportunity to prefetch any needed
@@ -103,24 +105,24 @@ public abstract class OnTriggerContext extends Context {
    *
    * @param c Context to use prefetch from.
    */
-  public void prefetchOnMerge(MergingStateContext<W> c) throws Exception {}
+  public void prefetchOnMerge(MergingStateContext<K, W> c) throws Exception {}
 
   /**
    * Called before {@link #onTrigger} is invoked to provide an opportunity to prefetch any needed
    * state.
    *
-   * @param c Context to use prefetch from.
+   * @param context Context to use prefetch from.
    */
-  public void prefetchOnTrigger(StateContext c) {}
+  public void prefetchOnTrigger(StateContext<K> context) {}
 
   /**
    * Called to clear any persisted state that the {@link ReduceFn} may be holding. This will be
    * called when the windowing is closing and will receive no future interactions.
    */
-  public abstract void clearState(Context c) throws Exception;
+  public abstract void clearState(Context context) throws Exception;
 
   /**
    * Returns true if the there is no buffered state.
    */
-  public abstract StateContents<Boolean> isEmpty(StateContext c);
+  public abstract StateContents<Boolean> isEmpty(StateContext<K> context);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
index 2fa2c3ff9a95e..e6caf7556709a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
@@ -21,8 +21,10 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
+import com.google.cloud.dataflow.sdk.util.state.MergingStateContext;
 import com.google.cloud.dataflow.sdk.util.state.State;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
+import com.google.cloud.dataflow.sdk.util.state.StateContext;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
@@ -49,12 +51,12 @@ public interface OnTriggerCallbacks<OutputT> {
   private final K key;
   private final ReduceFn<K, InputT, OutputT, W> reduceFn;
   private final WindowingStrategy<?, W> windowingStrategy;
-  private StateInternals stateInternals;
+  private StateInternals<K> stateInternals;
   private ActiveWindowSet<W> activeWindows;
   private TimerInternals timerInternals;
 
   ReduceFnContextFactory(K key, ReduceFn<K, InputT, OutputT, W> reduceFn,
-      WindowingStrategy<?, W> windowingStrategy, StateInternals stateInternals,
+      WindowingStrategy<?, W> windowingStrategy, StateInternals<K> stateInternals,
       ActiveWindowSet<W> activeWindows, TimerInternals timerInternals) {
     this.key = key;
     this.reduceFn = reduceFn;
@@ -72,8 +74,9 @@ public static enum StateStyle {
     RENAMED
   }
 
-  private StateContextImpl<W> stateContext(W window, StateStyle style) {
-    return new StateContextImpl<>(activeWindows, windowingStrategy.getWindowFn().windowCoder(),
+  private StateContextImpl<K, W> stateContext(W window, StateStyle style) {
+    return new StateContextImpl<K, W>(
+        activeWindows, windowingStrategy.getWindowFn().windowCoder(),
         stateInternals, window, style);
   }
 
@@ -94,12 +97,13 @@ public ReduceFn<K, InputT, OutputT, W>.OnTriggerContext forTrigger(W window,
   public ReduceFn<K, InputT, OutputT, W>.OnMergeContext forMerge(
       Collection<W> activeToBeMerged, W mergeResult, StateStyle style) {
     return new OnMergeContextImpl(
-        new MergingStateContextImpl<W>(activeWindows, windowingStrategy.getWindowFn().windowCoder(),
+        new MergingStateContextImpl<K, W>(activeWindows,
+            windowingStrategy.getWindowFn().windowCoder(),
             stateInternals, style, activeToBeMerged, mergeResult));
   }
 
   public ReduceFn<K, InputT, OutputT, W>.OnMergeContext forPremerge(W window) {
-    return new OnPremergeContextImpl(new PremergingStateContextImpl<W>(
+    return new OnPremergeContextImpl(new PremergingStateContextImpl<K, W>(
         activeWindows, windowingStrategy.getWindowFn().windowCoder(), stateInternals, window));
   }
 
@@ -142,17 +146,20 @@ public Instant currentEventTime() {
   // ======================================================================
   // StateContexts
   // ======================================================================
+  static class StateContextImpl<K, W extends BoundedWindow>
+      implements StateContext<K> {
+
 
-  static class StateContextImpl<W extends BoundedWindow> implements StateContext {
     protected final ActiveWindowSet<W> activeWindows;
     protected final W window;
     protected final StateNamespace windowNamespace;
     protected final Coder<W> windowCoder;
-    protected final StateInternals stateInternals;
+    protected final StateInternals<K> stateInternals;
     protected final StateStyle style;
 
     public StateContextImpl(ActiveWindowSet<W> activeWindows, Coder<W> windowCoder,
-        StateInternals stateInternals, W window, StateStyle style) {
+        StateInternals<K> stateInternals, W window, StateStyle style) {
+
       this.activeWindows = activeWindows;
       this.windowCoder = windowCoder;
       this.stateInternals = stateInternals;
@@ -178,7 +185,7 @@ StateNamespace namespace() {
     }
 
     @Override
-    public <StateT extends State> StateT access(StateTag<StateT> address) {
+    public <StateT extends State> StateT access(StateTag<? super K, StateT> address) {
       switch (style) {
         case DIRECT:
           return stateInternals.state(windowNamespace(), address);
@@ -190,19 +197,19 @@ public <StateT extends State> StateT access(StateTag<StateT> address) {
     }
   }
 
-  static class MergingStateContextImpl<W extends BoundedWindow>
-      extends StateContextImpl<W> implements MergingStateContext<W> {
+  static class MergingStateContextImpl<K, W extends BoundedWindow>
+      extends StateContextImpl<K, W> implements MergingStateContext<K, W> {
     private final Collection<W> activeToBeMerged;
 
     public MergingStateContextImpl(ActiveWindowSet<W> activeWindows, Coder<W> windowCoder,
-        StateInternals stateInternals, StateStyle style, Collection<W> activeToBeMerged,
+        StateInternals<K> stateInternals, StateStyle style, Collection<W> activeToBeMerged,
         W mergeResult) {
       super(activeWindows, windowCoder, stateInternals, mergeResult, style);
       this.activeToBeMerged = activeToBeMerged;
     }
 
     @Override
-    public <StateT extends State> StateT access(StateTag<StateT> address) {
+    public <StateT extends State> StateT access(StateTag<? super K, StateT> address) {
       switch (style) {
         case DIRECT:
           return stateInternals.state(windowNamespace(), address);
@@ -216,7 +223,7 @@ public <StateT extends State> StateT access(StateTag<StateT> address) {
 
     @Override
     public <StateT extends State> Map<W, StateT> accessInEachMergingWindow(
-        StateTag<StateT> address) {
+        StateTag<? super K, StateT> address) {
       ImmutableMap.Builder<W, StateT> builder = ImmutableMap.builder();
       for (W mergingWindow : activeToBeMerged) {
         StateNamespace namespace = null;
@@ -235,10 +242,10 @@ public <StateT extends State> Map<W, StateT> accessInEachMergingWindow(
     }
   }
 
-  static class PremergingStateContextImpl<W extends BoundedWindow>
-      extends StateContextImpl<W> implements MergingStateContext<W> {
+  static class PremergingStateContextImpl<K, W extends BoundedWindow>
+      extends StateContextImpl<K, W> implements MergingStateContext<K, W> {
     public PremergingStateContextImpl(ActiveWindowSet<W> activeWindows, Coder<W> windowCoder,
-        StateInternals stateInternals, W window) {
+        StateInternals<K> stateInternals, W window) {
       super(activeWindows, windowCoder, stateInternals, window, StateStyle.RENAMED);
     }
 
@@ -248,7 +255,7 @@ Collection<W> mergingWindows() {
 
     @Override
     public <StateT extends State> Map<W, StateT> accessInEachMergingWindow(
-        StateTag<StateT> address) {
+        StateTag<? super K, StateT> address) {
       ImmutableMap.Builder<W, StateT> builder = ImmutableMap.builder();
       for (W stateAddressWindow : activeWindows.readStateAddresses(window)) {
         StateT stateForWindow = stateInternals.state(namespaceFor(stateAddressWindow), address);
@@ -263,10 +270,10 @@ public <StateT extends State> Map<W, StateT> accessInEachMergingWindow(
   // ======================================================================
 
   private class ContextImpl extends ReduceFn<K, InputT, OutputT, W>.Context {
-    private final StateContextImpl<W> state;
+    private final StateContextImpl<K, W> state;
     private final TimersImpl timers;
 
-    private ContextImpl(StateContextImpl<W> state) {
+    private ContextImpl(StateContextImpl<K, W> state) {
       reduceFn.super();
       this.state = state;
       this.timers = new TimersImpl(state.namespace());
@@ -288,7 +295,7 @@ public WindowingStrategy<?, W> windowingStrategy() {
     }
 
     @Override
-    public StateContext state() {
+    public StateContext<K> state() {
       return state;
     }
 
@@ -302,10 +309,11 @@ private class ProcessValueContextImpl
       extends ReduceFn<K, InputT, OutputT, W>.ProcessValueContext {
     private final InputT value;
     private final Instant timestamp;
-    private final StateContextImpl<W> state;
+    private final StateContextImpl<K, W> state;
     private final TimersImpl timers;
 
-    private ProcessValueContextImpl(StateContextImpl<W> state, InputT value, Instant timestamp) {
+    private ProcessValueContextImpl(StateContextImpl<K, W> state,
+        InputT value, Instant timestamp) {
       reduceFn.super();
       this.state = state;
       this.value = value;
@@ -329,7 +337,7 @@ public WindowingStrategy<?, W> windowingStrategy() {
     }
 
     @Override
-    public StateContext state() {
+    public StateContext<K> state() {
       return state;
     }
 
@@ -350,12 +358,12 @@ public Timers timers() {
   }
 
   private class OnTriggerContextImpl extends ReduceFn<K, InputT, OutputT, W>.OnTriggerContext {
-    private final StateContextImpl<W> state;
+    private final StateContextImpl<K, W> state;
     private final StateContents<PaneInfo> pane;
     private final OnTriggerCallbacks<OutputT> callbacks;
     private final TimersImpl timers;
 
-    private OnTriggerContextImpl(StateContextImpl<W> state, StateContents<PaneInfo> pane,
+    private OnTriggerContextImpl(StateContextImpl<K, W> state, StateContents<PaneInfo> pane,
         OnTriggerCallbacks<OutputT> callbacks) {
       reduceFn.super();
       this.state = state;
@@ -380,7 +388,7 @@ public WindowingStrategy<?, W> windowingStrategy() {
     }
 
     @Override
-    public StateContext state() {
+    public StateContext<K> state() {
       return state;
     }
 
@@ -401,10 +409,10 @@ public Timers timers() {
   }
 
   private class OnMergeContextImpl extends ReduceFn<K, InputT, OutputT, W>.OnMergeContext {
-    private final MergingStateContextImpl<W> state;
+    private final MergingStateContextImpl<K, W> state;
     private final TimersImpl timers;
 
-    private OnMergeContextImpl(MergingStateContextImpl<W> state) {
+    private OnMergeContextImpl(MergingStateContextImpl<K, W> state) {
       reduceFn.super();
       this.state = state;
       this.timers = new TimersImpl(state.namespace());
@@ -421,7 +429,7 @@ public WindowingStrategy<?, W> windowingStrategy() {
     }
 
     @Override
-    public MergingStateContext<W> state() {
+    public MergingStateContext<K, W> state() {
       return state;
     }
 
@@ -437,10 +445,10 @@ public Timers timers() {
   }
 
   private class OnPremergeContextImpl extends ReduceFn<K, InputT, OutputT, W>.OnMergeContext {
-    private final PremergingStateContextImpl<W> state;
+    private final PremergingStateContextImpl<K, W> state;
     private final TimersImpl timers;
 
-    private OnPremergeContextImpl(PremergingStateContextImpl<W> state) {
+    private OnPremergeContextImpl(PremergingStateContextImpl<K, W> state) {
       reduceFn.super();
       this.state = state;
       this.timers = new TimersImpl(state.namespace());
@@ -457,7 +465,7 @@ public WindowingStrategy<?, W> windowingStrategy() {
     }
 
     @Override
-    public MergingStateContext<W> state() {
+    public MergingStateContext<K, W> state() {
       return state;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index 85b9bae4ba366..675eae00cc075 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -29,6 +29,7 @@
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
+import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces.WindowNamespace;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -174,7 +175,7 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow>
    * <li>Lifetime: Cleared when pane fires or window is garbage collected.
    * </ul>
    */
-  private final NonEmptyPanes<W> nonEmptyPanes;
+  private final NonEmptyPanes<K, W> nonEmptyPanes;
 
   public ReduceFnRunner(K key, WindowingStrategy<?, W> windowingStrategy,
       TimerInternals timerInternals, WindowingInternals<?, KV<K, OutputT>> windowingInternals,
@@ -194,9 +195,15 @@ public ReduceFnRunner(K key, WindowingStrategy<?, W> windowingStrategy,
     this.nonEmptyPanes = NonEmptyPanes.create(this.windowingStrategy, this.reduceFn);
     // Note this may trigger a GetData request to load the existing window set.
     this.activeWindows = createActiveWindowSet();
+
+    // It is the user of ReduceFnRunner's responsibility to have state internals with a
+    // compatible key type. This is generally assured by graph construction time validation.
+    @SuppressWarnings("unchecked")
+    StateInternals<K> stateInternals = (StateInternals<K>) this.windowingInternals.stateInternals();
+
     this.contextFactory =
         new ReduceFnContextFactory<K, InputT, OutputT, W>(key, reduceFn, this.windowingStrategy,
-            this.windowingInternals.stateInternals(), this.activeWindows, timerInternals);
+            stateInternals, this.activeWindows, timerInternals);
 
     this.watermarkHold = new WatermarkHold<>(timerInternals, windowingStrategy);
     this.triggerRunner = new TriggerRunner<>(
@@ -257,7 +264,7 @@ public void processElements(Iterable<WindowedValue<InputT>> values) throws Excep
           contextFactory.base(mergedWindow, StateStyle.DIRECT);
       ReduceFn<K, InputT, OutputT, W>.Context renamedContext =
           contextFactory.base(mergedWindow, StateStyle.RENAMED);
-      triggerRunner.prefetchShouldFire(directContext.state());
+      triggerRunner.prefetchShouldFire(mergedWindow, directContext.state());
       emitIfAppropriate(directContext, renamedContext, false/* isEndOfWindow */);
     }
 
@@ -342,7 +349,7 @@ public void prefetchOnMerge(
         contextFactory.forMerge(activeToBeMerged, mergeResult, StateStyle.RENAMED);
 
     // Prefetch various state.
-    triggerRunner.prefetchForMerge(directMergeContext.state());
+    triggerRunner.prefetchForMerge(mergeResult, activeToBeMerged, directMergeContext.state());
     try {
       reduceFn.prefetchOnMerge(renamedMergeContext.state());
     } catch (Exception e) {
@@ -437,7 +444,7 @@ private Collection<W> processElement(WindowedValue<InputT> value) {
     for (W window : windows) {
       ReduceFn<K, InputT, OutputT, W>.ProcessValueContext directContext = contextFactory.forValue(
           window, value.getValue(), value.getTimestamp(), StateStyle.DIRECT);
-      triggerRunner.prefetchForValue(directContext.state());
+      triggerRunner.prefetchForValue(window, directContext.state());
     }
 
     // Process the element for each (representative) window it belongs to.
@@ -731,11 +738,11 @@ private void onTrigger(final ReduceFn<K, InputT, OutputT, W>.Context directConte
     StateContents<Boolean> isEmptyFuture = nonEmptyPanes.isEmpty(renamedContext);
 
     try {
-      reduceFn.prefetchOnTrigger(renamedContext.state());
+      reduceFn.prefetchOnTrigger(directContext.state());
     } catch (Exception e) {
       throw wrapMaybeUserException(e);
     }
-    triggerRunner.prefetchOnFire(directContext.state()); // Is a no-op. Why?
+    triggerRunner.prefetchOnFire(directContext.window(), directContext.state()); // Is a no-op. Why?
 
     // Calculate the pane info.
     final PaneInfo pane = paneFuture.read();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
index 5d6101aedf07f..6068f913db6e2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
@@ -25,7 +25,9 @@
 import com.google.cloud.dataflow.sdk.util.state.BagState;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
+import com.google.cloud.dataflow.sdk.util.state.MergingStateContext;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
+import com.google.cloud.dataflow.sdk.util.state.StateContext;
 import com.google.cloud.dataflow.sdk.util.state.StateMerging;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
@@ -62,14 +64,14 @@ public interface Factory<K, InputT, OutputT, W extends BoundedWindow> extends Se
    */
   public static <K, T, W extends BoundedWindow> Factory<K, T, Iterable<T>, W> buffering(
       final Coder<T> inputCoder) {
-    final StateTag<BagState<T>> bufferTag =
+    final StateTag<Object, BagState<T>> bufferTag =
         StateTags.makeSystemTagInternal(StateTags.bag(BUFFER_NAME, inputCoder));
     return new Factory<K, T, Iterable<T>, W>() {
       @Override
       public ReduceFn<K, T, Iterable<T>, W> create(K key) {
         return new SystemReduceFn<K, T, Iterable<T>, Iterable<T>, W>(bufferTag) {
           @Override
-          public void prefetchOnMerge(MergingStateContext<W> state) throws Exception {
+          public void prefetchOnMerge(MergingStateContext<K, W> state) throws Exception {
             StateMerging.prefetchBags(state, bufferTag);
           }
 
@@ -96,13 +98,13 @@ public void onMerge(OnMergeContext c) throws Exception {
     return new Factory<K, InputT, OutputT, W>() {
       @Override
       public ReduceFn<K, InputT, OutputT, W> create(K key) {
-        final StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> bufferTag =
+        final StateTag<Object, CombiningValueStateInternal<InputT, AccumT, OutputT>> bufferTag =
             StateTags.makeSystemTagInternal(StateTags.<InputT, AccumT, OutputT>combiningValue(
                 BUFFER_NAME, combineFn.getAccumulatorCoder(),
                 (CombineFn<InputT, AccumT, OutputT>) combineFn.getFn().forKey(key, keyCoder)));
         return new SystemReduceFn<K, InputT, AccumT, OutputT, W>(bufferTag) {
           @Override
-          public void prefetchOnMerge(MergingStateContext<W> state) throws Exception {
+          public void prefetchOnMerge(MergingStateContext<K, W> state) throws Exception {
             StateMerging.prefetchCombiningValues(state, bufferTag);
           }
 
@@ -115,9 +117,10 @@ public void onMerge(OnMergeContext c) throws Exception {
     };
   }
 
-  private StateTag<? extends CombiningValueState<InputT, OutputT>> bufferTag;
+  private StateTag<? super K, ? extends CombiningValueState<InputT, OutputT>> bufferTag;
 
-  public SystemReduceFn(StateTag<? extends CombiningValueState<InputT, OutputT>> bufferTag) {
+  public SystemReduceFn(
+      StateTag<? super K, ? extends CombiningValueState<InputT, OutputT>> bufferTag) {
     this.bufferTag = bufferTag;
   }
 
@@ -127,8 +130,8 @@ public void processValue(ProcessValueContext c) throws Exception {
   }
 
   @Override
-  public void prefetchOnTrigger(StateContext c) {
-    c.access(bufferTag).get();
+  public void prefetchOnTrigger(StateContext<K> state) {
+    state.access(bufferTag).get();
   }
 
   @Override
@@ -142,7 +145,7 @@ public void clearState(Context c) throws Exception {
   }
 
   @Override
-  public StateContents<Boolean> isEmpty(StateContext state) {
+  public StateContents<Boolean> isEmpty(StateContext<K> state) {
     return state.access(bufferTag).isEmpty();
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
index 1b204818e38a8..05cf137e8ce46 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
@@ -20,7 +20,9 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergingTriggerInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerInfo;
+import com.google.cloud.dataflow.sdk.util.state.MergingStateContext;
 import com.google.cloud.dataflow.sdk.util.state.State;
+import com.google.cloud.dataflow.sdk.util.state.StateContext;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
@@ -47,15 +49,14 @@
 public class TriggerContextFactory<W extends BoundedWindow> {
 
   private final WindowingStrategy<?, W> windowingStrategy;
-  private StateInternals stateInternals;
+  private StateInternals<?> stateInternals;
   // Future triggers may be able to exploit the active window to state address window mapping.
   @SuppressWarnings("unused")
   private ActiveWindowSet<W> activeWindows;
   private final Coder<W> windowCoder;
 
-
   public TriggerContextFactory(WindowingStrategy<?, W> windowingStrategy,
-      StateInternals stateInternals, ActiveWindowSet<W> activeWindows) {
+      StateInternals<?> stateInternals, ActiveWindowSet<W> activeWindows) {
     this.windowingStrategy = windowingStrategy;
     this.stateInternals = stateInternals;
     this.activeWindows = activeWindows;
@@ -79,12 +80,12 @@ public Trigger<W>.OnMergeContext createOnMergeContext(W window, Timers timers,
     return new OnMergeContextImpl(window, timers, rootTrigger, finishedSet, finishedSets);
   }
 
-  public StateContext createStateContext(W window, ExecutableTrigger<W> trigger) {
+  public StateContext<?> createStateContext(W window, ExecutableTrigger<W> trigger) {
     return new StateContextImpl(window, trigger);
   }
 
-  public MergingStateContext<W> createMergingStateContext(ExecutableTrigger<W> trigger,
-      Collection<W> mergingWindows, W mergeResult) {
+  public MergingStateContext<?, W> createMergingStateContext(
+      W mergeResult, Collection<W> mergingWindows, ExecutableTrigger<W> trigger) {
     return new MergingStateContextImpl(trigger, mergingWindows, mergeResult);
   }
 
@@ -259,7 +260,7 @@ public boolean apply(FinishedTriggers finishedSet) {
     }
   }
 
-  private class StateContextImpl implements StateContext {
+  private class StateContextImpl implements StateContext<Object> {
     protected final int triggerIndex;
     protected final StateNamespace windowNamespace;
 
@@ -275,12 +276,13 @@ protected StateNamespace namespaceFor(W window) {
     }
 
     @Override
-    public <StateT extends State> StateT access(StateTag<StateT> address) {
+    public <StateT extends State> StateT access(StateTag<? super Object, StateT> address) {
       return stateInternals.state(windowNamespace, address);
     }
   }
 
-  private class MergingStateContextImpl extends StateContextImpl implements MergingStateContext<W> {
+  private class MergingStateContextImpl extends StateContextImpl
+  implements MergingStateContext<Object, W> {
     private final Collection<W> activeToBeMerged;
 
     public MergingStateContextImpl(ExecutableTrigger<W> trigger, Collection<W> activeToBeMerged,
@@ -290,13 +292,13 @@ public MergingStateContextImpl(ExecutableTrigger<W> trigger, Collection<W> activ
     }
 
     @Override
-    public <StorageT extends State> StorageT access(StateTag<StorageT> address) {
+    public <StorageT extends State> StorageT access(StateTag<? super Object, StorageT> address) {
       return stateInternals.state(windowNamespace, address);
     }
 
     @Override
     public <StateT extends State> Map<W, StateT> accessInEachMergingWindow(
-        StateTag<StateT> address) {
+        StateTag<? super Object, StateT> address) {
       ImmutableMap.Builder<W, StateT> builder = ImmutableMap.builder();
       for (W mergingWindow : activeToBeMerged) {
         StateT stateForWindow = stateInternals.state(namespaceFor(mergingWindow), address);
@@ -447,7 +449,7 @@ public Instant currentEventTime() {
   }
 
   private class OnMergeContextImpl extends Trigger<W>.OnMergeContext {
-    private final MergingStateContext<W> state;
+    private final MergingStateContext<?, W> state;
     private final W window;
     private final Collection<W> mergingWindows;
     private final Timers timers;
@@ -474,7 +476,7 @@ public Trigger<W>.OnMergeContext forTrigger(ExecutableTrigger<W> trigger) {
     }
 
     @Override
-    public MergingStateContext<W> state() {
+    public MergingStateContext<?, W> state() {
       return state;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
index cb6b5d5a4f8be..eadacc0ff84d4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
@@ -18,6 +18,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
+import com.google.cloud.dataflow.sdk.util.state.MergingStateContext;
+import com.google.cloud.dataflow.sdk.util.state.StateContext;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.cloud.dataflow.sdk.util.state.ValueState;
@@ -26,6 +28,7 @@
 import com.google.common.collect.ImmutableMap;
 
 import java.util.BitSet;
+import java.util.Collection;
 import java.util.Map;
 
 /**
@@ -43,7 +46,7 @@
  */
 public class TriggerRunner<W extends BoundedWindow> {
   @VisibleForTesting
-  static final StateTag<ValueState<BitSet>> FINISHED_BITS_TAG =
+  static final StateTag<Object, ValueState<BitSet>> FINISHED_BITS_TAG =
       StateTags.makeSystemTagInternal(StateTags.value("closed", BitSetCoder.of()));
 
   private final ExecutableTrigger<W> rootTrigger;
@@ -70,29 +73,30 @@ private FinishedTriggersBitSet readFinishedBits(ValueState<BitSet> state) {
   }
 
   /** Return true if the trigger is closed in the window corresponding to the specified state. */
-  public boolean isClosed(StateContext state) {
+  public boolean isClosed(StateContext<?> state) {
     return readFinishedBits(state.access(FINISHED_BITS_TAG)).isFinished(rootTrigger);
   }
 
-  public void prefetchForValue(StateContext state) {
+  public void prefetchForValue(W window, StateContext<?> state) {
     if (isFinishedSetNeeded()) {
       state.access(FINISHED_BITS_TAG).get();
     }
-    rootTrigger.getSpec().prefetchOnElement(state);
+    rootTrigger.getSpec().prefetchOnElement(contextFactory.createStateContext(window, rootTrigger));
   }
 
-  public void prefetchOnFire(StateContext state) {
+  public void prefetchOnFire(W window, StateContext<?> state) {
     if (isFinishedSetNeeded()) {
       state.access(FINISHED_BITS_TAG).get();
     }
-    rootTrigger.getSpec().prefetchOnFire(state);
+    rootTrigger.getSpec().prefetchOnFire(contextFactory.createStateContext(window, rootTrigger));
   }
 
-  public void prefetchShouldFire(StateContext state) {
+  public void prefetchShouldFire(W window, StateContext<?> state) {
     if (isFinishedSetNeeded()) {
       state.access(FINISHED_BITS_TAG).get();
     }
-    rootTrigger.getSpec().prefetchShouldFire(state);
+    rootTrigger.getSpec().prefetchShouldFire(
+        contextFactory.createStateContext(window, rootTrigger));
   }
 
   /**
@@ -108,13 +112,15 @@ public void processValue(ReduceFn<?, ?, ?, W>.ProcessValueContext c) throws Exce
     persistFinishedSet(c.state(), finishedSet);
   }
 
-  public void prefetchForMerge(MergingStateContext<W> state) {
+  public void prefetchForMerge(
+      W window, Collection<W> mergingWindows, MergingStateContext<?, W> state) {
     if (isFinishedSetNeeded()) {
       for (ValueState<?> value : state.accessInEachMergingWindow(FINISHED_BITS_TAG).values()) {
         value.get();
       }
     }
-    rootTrigger.getSpec().prefetchOnMerge(state);
+    rootTrigger.getSpec().prefetchOnMerge(contextFactory.createMergingStateContext(
+        window, mergingWindows, rootTrigger));
   }
 
   /**
@@ -162,7 +168,8 @@ public void onFire(ReduceFn<?, ?, ?, W>.Context c) throws Exception {
     persistFinishedSet(c.state(), finishedSet);
   }
 
-  private void persistFinishedSet(StateContext state, FinishedTriggersBitSet modifiedFinishedSet) {
+  private void persistFinishedSet(
+      StateContext<?> state, FinishedTriggersBitSet modifiedFinishedSet) {
     if (!isFinishedSetNeeded()) {
       return;
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
index 715857c5a89f1..d379cecf48954 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
+import com.google.cloud.dataflow.sdk.util.state.MergingStateContext;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateMerging;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
@@ -49,7 +50,7 @@ public class WatermarkHold<W extends BoundedWindow> implements Serializable {
    * Return tag for state containing the output watermark hold
    * used for elements.
    */
-  public static <W extends BoundedWindow> StateTag<WatermarkStateInternal<W>>
+  public static <W extends BoundedWindow> StateTag<Object, WatermarkStateInternal<W>>
       watermarkHoldTagForOutputTimeFn(OutputTimeFn<? super W> outputTimeFn) {
     return StateTags.makeSystemTagInternal(StateTags.watermarkStateInternal("hold", outputTimeFn));
   }
@@ -61,13 +62,13 @@ public class WatermarkHold<W extends BoundedWindow> implements Serializable {
    * would take the end-of-window time as its element time.)
    */
   @VisibleForTesting
-  public static final StateTag<WatermarkStateInternal<BoundedWindow>> EXTRA_HOLD_TAG =
+  public static final StateTag<Object, WatermarkStateInternal<BoundedWindow>> EXTRA_HOLD_TAG =
       StateTags.makeSystemTagInternal(StateTags.watermarkStateInternal(
           "extra", OutputTimeFns.outputAtEarliestInputTimestamp()));
 
   private final TimerInternals timerInternals;
   private final WindowingStrategy<?, W> windowingStrategy;
-  private final StateTag<WatermarkStateInternal<W>> elementHoldTag;
+  private final StateTag<Object, WatermarkStateInternal<W>> elementHoldTag;
 
   public WatermarkHold(TimerInternals timerInternals, WindowingStrategy<?, W> windowingStrategy) {
     this.timerInternals = timerInternals;
@@ -303,7 +304,7 @@ private void addGarbageCollectionHold(ReduceFn<?, ?, ?, W>.Context context) {
   /**
    * Prefetch watermark holds in preparation for merging.
    */
-  public void prefetchOnMerge(MergingStateContext<W> state) {
+  public void prefetchOnMerge(MergingStateContext<?, W> state) {
     StateMerging.prefetchWatermarks(state, elementHoldTag);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
index aa1ec00ee76bf..9ffdbee35fdc8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
@@ -39,7 +39,11 @@
  */
 public interface WindowingInternals<InputT, OutputT> {
 
-  StateInternals stateInternals();
+  /**
+   * Unsupported state internals. The key type is unknown. It is up to the user to use the
+   * correct type of key.
+   */
+  StateInternals<?> stateInternals();
 
   /**
    * Output the value at the specified timestamp in the listed windows.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
index 80e6ee992c23a..2963dc21e63cc 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
@@ -37,36 +37,55 @@
  * and for running tests that need state.
  */
 @Experimental(Kind.STATE)
-public class InMemoryStateInternals implements StateInternals {
+public class InMemoryStateInternals<K> implements StateInternals<K> {
+
+  public static <K> InMemoryStateInternals<K> forKey(K key) {
+    return new InMemoryStateInternals<>(key);
+  }
+
+  private final K key;
+
+  protected InMemoryStateInternals(K key) {
+    this.key = key;
+  }
+
+  @Override
+  public K getKey() {
+    return key;
+  }
+
   private interface InMemoryState {
     boolean isEmptyForTesting();
   }
 
-  protected final StateTable inMemoryState = new StateTable() {
+  protected final StateTable<K> inMemoryState = new StateTable<K>() {
     @Override
-    protected StateBinder binderForNamespace(final StateNamespace namespace) {
-      return new StateBinder() {
+    protected StateBinder<K> binderForNamespace(final StateNamespace namespace) {
+      return new StateBinder<K>() {
         @Override
-        public <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> coder) {
+        public <T> ValueState<T> bindValue(
+            StateTag<? super K, ValueState<T>> address, Coder<T> coder) {
           return new InMemoryValue<T>();
         }
 
         @Override
-        public <T> BagState<T> bindBag(final StateTag<BagState<T>> address, Coder<T> elemCoder) {
+        public <T> BagState<T> bindBag(
+            final StateTag<? super K, BagState<T>> address, Coder<T> elemCoder) {
           return new InMemoryBag<T>();
         }
 
         @Override
         public <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
             bindCombiningValue(
-                StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
-                Coder<AccumT> accumCoder, final CombineFn<InputT, AccumT, OutputT> combineFn) {
+                StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>>
+                address, Coder<AccumT> accumCoder,
+                final CombineFn<InputT, AccumT, OutputT> combineFn) {
           return new InMemoryCombiningValue<InputT, AccumT, OutputT>(combineFn);
         }
 
         @Override
         public <W extends BoundedWindow> WatermarkStateInternal<W> bindWatermark(
-            StateTag<WatermarkStateInternal<W>> address,
+            StateTag<? super K, WatermarkStateInternal<W>> address,
             OutputTimeFn<? super W> outputTimeFn) {
           return new WatermarkStateInternalImplementation<W>(outputTimeFn);
         }
@@ -87,7 +106,7 @@ protected boolean isEmptyForTesting(State state) {
   }
 
   @Override
-  public <T extends State> T state(StateNamespace namespace, StateTag<T> address) {
+  public <T extends State> T state(StateNamespace namespace, StateTag<? super K, T> address) {
     return inMemoryState.get(namespace, address);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateContext.java
new file mode 100644
index 0000000000000..03efb5e703e29
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateContext.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util.state;
+
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+
+import java.util.Map;
+
+/**
+ * Interface for interacting with persistent state at the moment windows are merging.
+ *
+ * <p>For internal use only.
+ */
+@Experimental(Kind.STATE)
+public interface MergingStateContext<K, W extends BoundedWindow> extends StateContext<K> {
+  /**
+   * Analogous to {@link #access}, but returned as a map from each window which is
+   * about to be merged to the corresponding state. Only includes windows which
+   * are known to have state.
+   */
+  <StateT extends State> Map<W, StateT> accessInEachMergingWindow(
+      StateTag<? super K, StateT> address);
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContext.java
similarity index 65%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateContext.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContext.java
index f66d6863b9b42..8a73308388e30 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StateContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContext.java
@@ -14,18 +14,24 @@
  * the License.
  */
 
-package com.google.cloud.dataflow.sdk.util;
+package com.google.cloud.dataflow.sdk.util.state;
 
-import com.google.cloud.dataflow.sdk.util.state.State;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
 
-/** Interface for interacting with persistent state. */
-public interface StateContext {
+/**
+ * Interface for interacting with per-key persistent state identified via a
+ * {@link StateTag}.
+ *
+ * <p>For internal use only.
+ */
+@Experimental(Kind.STATE)
+public interface StateContext<K> {
   /**
    * Access the storage for the given {@code address} in the current window.
    *
    * <p>Never accounts for merged windows. When windows are merged, any state accessed via
    * this method must be eagerly combined and written into the result window.
    */
-  <StateT extends State> StateT access(StateTag<StateT> address);
+  <StateT extends State> StateT access(StateTag<? super K, StateT> address);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
index 5f4dc6372fb47..78aed870093d2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
@@ -36,10 +36,13 @@
  * used directly, and is highly likely to change.
  */
 @Experimental(Kind.STATE)
-public interface StateInternals  {
+public interface StateInternals<K> {
+
+  /** The key for this {@link StateInternals}. */
+  K getKey();
 
   /**
    * Return the state associated with {@code address} in the specified {@code namespace}.
    */
-  <T extends State> T state(StateNamespace namespace, StateTag<T> address);
+  <T extends State> T state(StateNamespace namespace, StateTag<? super K, T> address);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java
index 858091983dc62..fd783d2bcd3e5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java
@@ -16,7 +16,6 @@
 package com.google.cloud.dataflow.sdk.util.state;
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.MergingStateContext;
 import com.google.common.base.Preconditions;
 
 import org.joda.time.Instant;
@@ -34,8 +33,8 @@ public class StateMerging {
    * Clear all state in {@code address} in all windows under merge (even result windows)
    * in {@code context}.
    */
-  public static <StateT extends State, W extends BoundedWindow> void clear(
-      MergingStateContext<W> context, StateTag<StateT> address) {
+  public static <K, StateT extends State, W extends BoundedWindow> void clear(
+      MergingStateContext<K, W> context, StateTag<? super K, StateT> address) {
     for (StateT state : context.accessInEachMergingWindow(address).values()) {
       state.clear();
     }
@@ -46,8 +45,8 @@ public static <StateT extends State, W extends BoundedWindow> void clear(
    * {@code context}, except for the bag state in the final state address window which we can
    * blindly append to.
    */
-  public static <T, W extends BoundedWindow> void prefetchBags(
-      MergingStateContext<W> context, StateTag<BagState<T>> address) {
+  public static <K, T, W extends BoundedWindow> void prefetchBags(
+      MergingStateContext<K, W> context, StateTag<? super K, BagState<T>> address) {
     Map<W, BagState<T>> map = context.accessInEachMergingWindow(address);
     if (map.isEmpty()) {
       // Nothing to prefetch.
@@ -65,8 +64,8 @@ public static <T, W extends BoundedWindow> void prefetchBags(
   /**
    * Merge all bag state in {@code address} across all windows under merge.
    */
-  public static <T, W extends BoundedWindow> void mergeBags(
-      MergingStateContext<W> context, StateTag<BagState<T>> address) {
+  public static <K, T, W extends BoundedWindow> void mergeBags(
+      MergingStateContext<K, W> context, StateTag<? super K, BagState<T>> address) {
     mergeBags(context.accessInEachMergingWindow(address).values(), context.access(address));
   }
 
@@ -108,8 +107,9 @@ public static <T, W extends BoundedWindow> void mergeBags(
    * Prefetch all combining value state for {@code address} across all merging windows in {@code
    * context}.
    */
-  public static <StateT extends CombiningValueState<?, ?>, W extends BoundedWindow> void
-      prefetchCombiningValues(MergingStateContext<W> context, StateTag<StateT> address) {
+  public static <K, StateT extends CombiningValueState<?, ?>, W extends BoundedWindow> void
+      prefetchCombiningValues(MergingStateContext<K, W> context,
+          StateTag<? super K, StateT> address) {
     for (StateT state : context.accessInEachMergingWindow(address).values()) {
       state.get();
     }
@@ -118,9 +118,9 @@ public static <T, W extends BoundedWindow> void mergeBags(
   /**
    * Merge all value state in {@code address} across all merging windows in {@code context}.
    */
-  public static <InputT, AccumT, OutputT, W extends BoundedWindow> void mergeCombiningValues(
-      MergingStateContext<W> context,
-      StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address) {
+  public static <K, InputT, AccumT, OutputT, W extends BoundedWindow> void mergeCombiningValues(
+      MergingStateContext<K, W> context,
+      StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>> address) {
     mergeCombiningValues(
         context.accessInEachMergingWindow(address).values(), context.access(address));
   }
@@ -164,8 +164,9 @@ public static <InputT, AccumT, OutputT, W extends BoundedWindow> void mergeCombi
    * Prefetch all watermark state for {@code address} across all merging windows in
    * {@code context}.
    */
-  public static <W extends BoundedWindow> void prefetchWatermarks(
-      MergingStateContext<W> context, StateTag<WatermarkStateInternal<W>> address) {
+  public static <K, W extends BoundedWindow> void prefetchWatermarks(
+      MergingStateContext<K, W> context,
+      StateTag<? super K, WatermarkStateInternal<W>> address) {
     Map<W, WatermarkStateInternal<W>> map = context.accessInEachMergingWindow(address);
     WatermarkStateInternal<W> result = context.access(address);
     if (map.isEmpty()) {
@@ -191,8 +192,9 @@ public static <W extends BoundedWindow> void prefetchWatermarks(
    * Merge all watermark state in {@code address} across all merging windows in {@code context},
    * where the final merge result window is {@code mergeResult}.
    */
-  public static <W extends BoundedWindow> void mergeWatermarks(
-      MergingStateContext<W> context, StateTag<WatermarkStateInternal<W>> address,
+  public static <K, W extends BoundedWindow> void mergeWatermarks(
+      MergingStateContext<K, W> context,
+      StateTag<? super K, WatermarkStateInternal<W>> address,
       W mergeResult) {
     mergeWatermarks(
         context.accessInEachMergingWindow(address).values(), context.access(address), mergeResult);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java
index be9ae18550b73..5529b9fb64e76 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java
@@ -27,19 +27,19 @@
 /**
  * Table mapping {@code StateNamespace} and {@code StateTag<?>} to a {@code State} instance.
  */
-public abstract class StateTable {
+public abstract class StateTable<K> {
 
-  private final Table<StateNamespace, StateTag<?>, State> stateTable =
-      Tables.newCustomTable(new HashMap<StateNamespace, Map<StateTag<?>, State>>(),
-          new Supplier<Map<StateTag<?>, State>>() {
+  private final Table<StateNamespace, StateTag<? super K, ?>, State> stateTable =
+      Tables.newCustomTable(new HashMap<StateNamespace, Map<StateTag<? super K, ?>, State>>(),
+          new Supplier<Map<StateTag<? super K, ?>, State>>() {
         @Override
-        public Map<StateTag<?>, State> get() {
+        public Map<StateTag<? super K, ?>, State> get() {
           return new HashMap<>();
         }
       });
 
   public <StateT extends State> StateT get(
-      StateNamespace namespace, StateTag<StateT> tag) {
+      StateNamespace namespace, StateTag<? super K, StateT> tag) {
     State storage = stateTable.get(namespace, tag);
     if (storage != null) {
       @SuppressWarnings("unchecked")
@@ -68,7 +68,7 @@ public boolean isNamespaceInUse(StateNamespace namespace) {
     return stateTable.containsRow(namespace);
   }
 
-  public Map<StateTag<?>, State> getTagsInUse(StateNamespace namespace) {
+  public Map<StateTag<? super K, ?>, State> getTagsInUse(StateNamespace namespace) {
     return stateTable.row(namespace);
   }
 
@@ -80,5 +80,5 @@ public Set<StateNamespace> getNamespacesInUse() {
    * Provide the {@code StateBinder} to use for creating {@code Storage} instances
    * in the specified {@code namespace}.
    */
-  protected abstract StateBinder binderForNamespace(StateNamespace namespace);
+  protected abstract StateBinder<K> binderForNamespace(StateNamespace namespace);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
index d221543309892..db052f868243a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
@@ -35,22 +35,26 @@
  *
  * <p>Currently, this can only be used in a step immediately following a {@link GroupByKey}.
  *
+ * @param <K> The type of key that must be used with the state tag. Contravariant: methods should
+ *            accept values of type {@code KeyedStateTag<? super K, StateT>}.
  * @param <StateT> The type of state being tagged.
  */
 @Experimental(Kind.STATE)
-public interface StateTag<StateT extends State> extends Serializable {
+public interface StateTag<K, StateT extends State> extends Serializable {
 
   /**
    * Visitor for binding a {@link StateTag} and to the associated {@link State}.
+   *
+   * @param <K> the type of key this binder embodies.
    */
-  public interface StateBinder {
-    <T> ValueState<T> bindValue(StateTag<ValueState<T>> address, Coder<T> coder);
+  public interface StateBinder<K> {
+    <T> ValueState<T> bindValue(StateTag<? super K, ValueState<T>> address, Coder<T> coder);
 
-    <T> BagState<T> bindBag(StateTag<BagState<T>> address, Coder<T> elemCoder);
+    <T> BagState<T> bindBag(StateTag<? super K, BagState<T>> address, Coder<T> elemCoder);
 
     <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
     bindCombiningValue(
-        StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
+        StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
         Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn);
 
     /**
@@ -60,7 +64,7 @@ public interface StateBinder {
      * added to the returned {@link WatermarkStateInternal} are to be combined.
      */
     <W extends BoundedWindow> WatermarkStateInternal<W> bindWatermark(
-        StateTag<WatermarkStateInternal<W>> address,
+        StateTag<? super K, WatermarkStateInternal<W>> address,
         OutputTimeFn<? super W> outputTimeFn);
   }
 
@@ -75,5 +79,5 @@ <W extends BoundedWindow> WatermarkStateInternal<W> bindWatermark(
   /**
    * Use the {@code binder} to create an instance of {@code StateT} appropriate for this address.
    */
-  StateT bind(StateBinder binder);
+  StateT bind(StateBinder<? extends K> binder);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
index 89e20134311ef..b072f6d7b1a16 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
@@ -36,6 +36,7 @@
 public class StateTags {
 
   private static final CoderRegistry STANDARD_REGISTRY = new CoderRegistry();
+
   static {
     STANDARD_REGISTRY.registerStandardCoders();
   }
@@ -53,10 +54,14 @@ private enum StateKind {
 
   private StateTags() { }
 
+  private interface SystemStateTag<K, StateT extends State> {
+    StateTag<K, StateT> asKind(StateKind kind);
+  }
+
   /**
    * Create a simple state tag for values of type {@code T}.
    */
-  public static <T> StateTag<ValueState<T>> value(String id, Coder<T> valueCoder) {
+  public static <T> StateTag<Object, ValueState<T>> value(String id, Coder<T> valueCoder) {
     return new ValueStateTag<>(new StructuredId(id), valueCoder);
   }
 
@@ -65,7 +70,7 @@ public static <T> StateTag<ValueState<T>> value(String id, Coder<T> valueCoder)
    * multiple {@code InputT}s into a single {@code OutputT}.
    */
   public static <InputT, AccumT, OutputT>
-    StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>>
+    StateTag<Object, CombiningValueStateInternal<InputT, AccumT, OutputT>>
     combiningValue(
       String id, Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
     return combiningValueInternal(id, accumCoder, combineFn);
@@ -78,10 +83,10 @@ public static <T> StateTag<ValueState<T>> value(String id, Coder<T> valueCoder)
    * <p>This determines the {@code Coder<AccumT>} from the given {@code Coder<InputT>}, and
    * should only be used to initialize static values.
    */
-  public static <InputT, AccumT,
-      OutputT> StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>>
+  public static <InputT, AccumT, OutputT>
+      StateTag<Object, CombiningValueStateInternal<InputT, AccumT, OutputT>>
       combiningValueFromInputInternal(
-      String id, Coder<InputT> inputCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
+          String id, Coder<InputT> inputCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
     try {
       Coder<AccumT> accumCoder = combineFn.getAccumulatorCoder(STANDARD_REGISTRY, inputCoder);
       return combiningValueInternal(id, accumCoder, combineFn);
@@ -93,7 +98,7 @@ public static <T> StateTag<ValueState<T>> value(String id, Coder<T> valueCoder)
   }
 
   private static <InputT, AccumT,
-      OutputT> StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>>
+      OutputT> StateTag<Object, CombiningValueStateInternal<InputT, AccumT, OutputT>>
       combiningValueInternal(
       String id, Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
     return
@@ -105,14 +110,14 @@ public static <T> StateTag<ValueState<T>> value(String id, Coder<T> valueCoder)
    * Create a state tag that is optimized for adding values frequently, and
    * occasionally retrieving all the values that have been added.
    */
-  public static <T> StateTag<BagState<T>> bag(String id, Coder<T> elemCoder) {
+  public static <T> StateTag<Object, BagState<T>> bag(String id, Coder<T> elemCoder) {
     return new BagStateTag<T>(new StructuredId(id), elemCoder);
   }
 
   /**
    * Create a state tag for holding the watermark.
    */
-  public static <W extends BoundedWindow> StateTag<WatermarkStateInternal<W>>
+  public static <W extends BoundedWindow> StateTag<Object, WatermarkStateInternal<W>>
       watermarkStateInternal(String id, OutputTimeFn<? super W> outputTimeFn) {
     return new WatermarkStateTagInternal<W>(new StructuredId(id), outputTimeFn);
   }
@@ -121,19 +126,22 @@ public static <T> StateTag<BagState<T>> bag(String id, Coder<T> elemCoder) {
    * Convert an arbitrary {@code StateTag} to a system-internal tag that is guaranteed not to
    * collide with any user tags.
    */
-  public static <StateT extends State> StateTag<StateT> makeSystemTagInternal(
-      StateTag<StateT> tag) {
-    if (!(tag instanceof StateTagBase)) {
+  public static <K, StateT extends State> StateTag<K, StateT> makeSystemTagInternal(
+      StateTag<K, StateT> tag) {
+    if (!(tag instanceof SystemStateTag)) {
       throw new IllegalArgumentException("Expected subclass of StateTagBase, got " + tag);
     }
-    return ((StateTagBase<StateT>) tag).asKind(StateKind.SYSTEM);
+    return ((SystemStateTag<K, StateT>) tag).asKind(StateKind.SYSTEM);
   }
 
-  public static <InputT, AccumT, OutputT> StateTag<BagState<AccumT>> convertToBagTagInternal(
-      StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> combiningTag) {
+  public static <InputT, AccumT, OutputT> StateTag<Object, BagState<AccumT>>
+      convertToBagTagInternal(
+          StateTag<?, CombiningValueStateInternal<InputT, AccumT, OutputT>> combiningTag) {
     if (!(combiningTag instanceof CombiningValueStateTag)) {
       throw new IllegalArgumentException("Unexpected StateTag " + combiningTag);
     }
+    // Checked above; conversion to a bag tag depends on the provided tag being one of those
+    // created via the factory methods in this class.
     @SuppressWarnings("unchecked")
     CombiningValueStateTag<InputT, AccumT, OutputT> typedTag =
         (CombiningValueStateTag<InputT, AccumT, OutputT>) combiningTag;
@@ -194,7 +202,11 @@ public int hashCode() {
     }
   }
 
-  private abstract static class StateTagBase<StateT extends State> implements StateTag<StateT> {
+  /**
+   * A base class that just manages the structured ids.
+   */
+  private abstract static class StateTagBase<K, StateT extends State>
+      implements StateTag<K, StateT>, SystemStateTag<K, StateT> {
 
     protected final StructuredId id;
 
@@ -214,12 +226,13 @@ public String toString() {
           .toString();
     }
 
-    protected abstract StateTag<StateT> asKind(StateKind kind);
-
     @Override
     public void appendTo(Appendable sb) throws IOException {
       id.appendTo(sb);
     }
+
+    @Override
+    public abstract StateTag<K, StateT> asKind(StateKind kind);
   }
 
   /**
@@ -227,7 +240,8 @@ public void appendTo(Appendable sb) throws IOException {
    *
    * @param <T> the type of value being stored
    */
-  private static class ValueStateTag<T> extends StateTagBase<ValueState<T>> {
+  private static class ValueStateTag<T> extends StateTagBase<Object, ValueState<T>>
+      implements StateTag<Object, ValueState<T>> {
 
     private final Coder<T> coder;
 
@@ -237,7 +251,7 @@ private ValueStateTag(StructuredId id, Coder<T> coder) {
     }
 
     @Override
-    public ValueState<T> bind(StateBinder visitor) {
+    public ValueState<T> bind(StateBinder<? extends Object> visitor) {
       return visitor.bindValue(this, coder);
     }
 
@@ -262,20 +276,22 @@ public int hashCode() {
     }
 
     @Override
-    protected StateTag<ValueState<T>> asKind(StateKind kind) {
+    public StateTag<Object, ValueState<T>> asKind(StateKind kind) {
       return new ValueStateTag<T>(id.asKind(kind), coder);
     }
   }
 
   /**
-   * A general purpose state cell for values of type {@code T}.
+   * A state cell for values that are combined according to a {@link CombineFn}.
    *
    * @param <InputT> the type of input values
    * @param <AccumT> type of mutable accumulator values
    * @param <OutputT> type of output values
    */
   private static class CombiningValueStateTag<InputT, AccumT, OutputT>
-      extends StateTagBase<CombiningValueStateInternal<InputT, AccumT, OutputT>> {
+      extends StateTagBase<Object, CombiningValueStateInternal<InputT, AccumT, OutputT>>
+      implements StateTag<Object, CombiningValueStateInternal<InputT, AccumT, OutputT>>,
+      SystemStateTag<Object, CombiningValueStateInternal<InputT, AccumT, OutputT>> {
 
     private final Coder<AccumT> accumCoder;
     private final CombineFn<InputT, AccumT, OutputT> combineFn;
@@ -289,7 +305,8 @@ private CombiningValueStateTag(
     }
 
     @Override
-    public CombiningValueStateInternal<InputT, AccumT, OutputT> bind(StateBinder visitor) {
+    public CombiningValueStateInternal<InputT, AccumT, OutputT> bind(
+        StateBinder<? extends Object> visitor) {
       return visitor.bindCombiningValue(this, accumCoder, combineFn);
     }
 
@@ -314,12 +331,13 @@ public int hashCode() {
     }
 
     @Override
-    protected StateTag<CombiningValueStateInternal<InputT, AccumT, OutputT>> asKind(
-        StateKind kind) {
+    public StateTag<Object, CombiningValueStateInternal<InputT, AccumT, OutputT>>
+        asKind(StateKind kind) {
       return new CombiningValueStateTag<>(id.asKind(kind), accumCoder, combineFn);
+
     }
 
-    private StateTag<BagState<AccumT>> asBagTag() {
+    private StateTag<Object, BagState<AccumT>> asBagTag() {
       return new BagStateTag<AccumT>(id, accumCoder);
     }
   }
@@ -330,7 +348,8 @@ private StateTag<BagState<AccumT>> asBagTag() {
    *
    * @param <T> the type of value in the bag
    */
-  private static class BagStateTag<T> extends StateTagBase<BagState<T>> {
+  private static class BagStateTag<T> extends StateTagBase<Object, BagState<T>>
+      implements StateTag<Object, BagState<T>>{
 
     private final Coder<T> elemCoder;
 
@@ -340,7 +359,7 @@ private BagStateTag(StructuredId id, Coder<T> elemCoder) {
     }
 
     @Override
-    public BagState<T> bind(StateBinder visitor) {
+    public BagState<T> bind(StateBinder<? extends Object> visitor) {
       return visitor.bindBag(this, elemCoder);
     }
 
@@ -365,13 +384,13 @@ public int hashCode() {
     }
 
     @Override
-    protected StateTag<BagState<T>> asKind(StateKind kind) {
+    public StateTag<Object, BagState<T>> asKind(StateKind kind) {
       return new BagStateTag<>(id.asKind(kind), elemCoder);
     }
   }
 
   private static class WatermarkStateTagInternal<W extends BoundedWindow>
-      extends StateTagBase<WatermarkStateInternal<W>> {
+      extends StateTagBase<Object, WatermarkStateInternal<W>> {
 
     /**
      * When multiple output times are added to hold the watermark, this determines how they are
@@ -386,7 +405,7 @@ private WatermarkStateTagInternal(StructuredId id, OutputTimeFn<? super W> outpu
     }
 
     @Override
-    public WatermarkStateInternal<W> bind(StateBinder visitor) {
+    public WatermarkStateInternal<W> bind(StateBinder<? extends Object> visitor) {
       return visitor.bindWatermark(this, outputTimeFn);
     }
 
@@ -410,7 +429,7 @@ public int hashCode() {
     }
 
     @Override
-    protected StateTag<WatermarkStateInternal<W>> asKind(StateKind kind) {
+    public StateTag<Object, WatermarkStateInternal<W>> asKind(StateKind kind) {
       return new WatermarkStateTagInternal<W>(id.asKind(kind), outputTimeFn);
     }
   }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSetTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSetTest.java
index 0b1a7521f40b0..8f66e5a41c32e 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSetTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSetTest.java
@@ -44,13 +44,13 @@
 @RunWith(JUnit4.class)
 public class MergingActiveWindowSetTest {
   private Sessions windowFn;
-  private StateInternals state;
+  private StateInternals<String> state;
   private MergingActiveWindowSet<IntervalWindow> set;
 
   @Before
   public void before() {
     windowFn = Sessions.withGapDuration(Duration.millis(10));
-    state = new InMemoryStateInternals();
+    state = InMemoryStateInternals.forKey("dummyKey");
     set = new MergingActiveWindowSet<>(windowFn, state);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
index 2088ac3c34e93..f7fe75eac433f 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
@@ -70,6 +70,7 @@
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.PriorityQueue;
 import java.util.Set;
 
@@ -86,7 +87,10 @@
  * @param <W> The type of windows being used.
  */
 public class ReduceFnTester<InputT, OutputT, W extends BoundedWindow> {
-  private final TestInMemoryStateInternals stateInternals = new TestInMemoryStateInternals();
+  private static final String KEY = "TEST_KEY";
+
+  private final TestInMemoryStateInternals<String> stateInternals =
+      new TestInMemoryStateInternals<>(KEY);
   private final TestTimerInternals timerInternals = new TestTimerInternals();
 
   private final WindowFn<Object, W> windowFn;
@@ -95,7 +99,6 @@ public class ReduceFnTester<InputT, OutputT, W extends BoundedWindow> {
   private final WindowingStrategy<Object, W> objectStrategy;
   private final ReduceFn<String, InputT, OutputT, W> reduceFn;
 
-  private static final String KEY = "TEST_KEY";
   private ExecutableTrigger<W> executableTrigger;
 
   private final InMemoryLongSumAggregator droppedDueToClosedWindow =
@@ -180,28 +183,29 @@ public boolean isMarkedFinished(W window) {
   public final void assertHasOnlyGlobalAndFinishedSetsFor(W... expectedWindows) {
     assertHasOnlyGlobalAndAllowedTags(
         ImmutableSet.copyOf(expectedWindows),
-        ImmutableSet.<StateTag<?>>of(TriggerRunner.FINISHED_BITS_TAG));
+        ImmutableSet.<StateTag<? super String, ?>>of(TriggerRunner.FINISHED_BITS_TAG));
   }
 
   @SafeVarargs
   public final void assertHasOnlyGlobalAndFinishedSetsAndPaneInfoFor(W... expectedWindows) {
     assertHasOnlyGlobalAndAllowedTags(
         ImmutableSet.copyOf(expectedWindows),
-        ImmutableSet.<StateTag<?>>of(TriggerRunner.FINISHED_BITS_TAG, PaneInfoTracker.PANE_INFO_TAG,
+        ImmutableSet.<StateTag<? super String, ?>>of(
+            TriggerRunner.FINISHED_BITS_TAG, PaneInfoTracker.PANE_INFO_TAG,
             WatermarkHold.watermarkHoldTagForOutputTimeFn(objectStrategy.getOutputTimeFn()),
             WatermarkHold.EXTRA_HOLD_TAG));
   }
 
   public final void assertHasOnlyGlobalState() {
     assertHasOnlyGlobalAndAllowedTags(
-        Collections.<W>emptySet(), Collections.<StateTag<?>>emptySet());
+        Collections.<W>emptySet(), Collections.<StateTag<? super String, ?>>emptySet());
   }
 
   @SafeVarargs
   public final void assertHasOnlyGlobalAndPaneInfoFor(W... expectedWindows) {
     assertHasOnlyGlobalAndAllowedTags(
         ImmutableSet.copyOf(expectedWindows),
-        ImmutableSet.<StateTag<?>>of(
+        ImmutableSet.<StateTag<? super String, ?>>of(
             PaneInfoTracker.PANE_INFO_TAG,
             WatermarkHold.watermarkHoldTagForOutputTimeFn(objectStrategy.getOutputTimeFn()),
             WatermarkHold.EXTRA_HOLD_TAG));
@@ -212,30 +216,30 @@ public final void assertHasOnlyGlobalAndPaneInfoFor(W... expectedWindows) {
    * {@code expectedWindows} and that each of these windows has only tags from {@code allowedTags}.
    */
   private void assertHasOnlyGlobalAndAllowedTags(
-      Set<W> expectedWindows, Set<StateTag<?>> allowedTags) {
+      Set<W> expectedWindows, Set<StateTag<? super String, ?>> allowedTags) {
     Set<StateNamespace> expectedWindowsSet = new HashSet<>();
     for (W expectedWindow : expectedWindows) {
       expectedWindowsSet.add(windowNamespace(expectedWindow));
     }
-    Map<StateNamespace, Set<StateTag<?>>> actualWindows = new HashMap<>();
+    Map<StateNamespace, Set<StateTag<? super String, ?>>> actualWindows = new HashMap<>();
 
     for (StateNamespace namespace : stateInternals.getNamespacesInUse()) {
       if (namespace instanceof StateNamespaces.GlobalNamespace) {
         continue;
       } else if (namespace instanceof StateNamespaces.WindowNamespace) {
-        Set<StateTag<?>> tagsInUse = stateInternals.getTagsInUse(namespace);
+        Set<StateTag<? super String, ?>> tagsInUse = stateInternals.getTagsInUse(namespace);
         if (tagsInUse.isEmpty()) {
           continue;
         }
         actualWindows.put(namespace, tagsInUse);
-        Set<StateTag<?>> unexpected = Sets.difference(tagsInUse, allowedTags);
+        Set<StateTag<? super String, ?>> unexpected = Sets.difference(tagsInUse, allowedTags);
         if (unexpected.isEmpty()) {
           continue;
         } else {
           fail(namespace + " has unexpected states: " + tagsInUse);
         }
       } else if (namespace instanceof StateNamespaces.WindowAndTriggerNamespace) {
-        Set<StateTag<?>> tagsInUse = stateInternals.getTagsInUse(namespace);
+        Set<StateTag<? super String, ?>> tagsInUse = stateInternals.getTagsInUse(namespace);
         assertTrue(namespace + " contains " + tagsInUse, tagsInUse.isEmpty());
       } else {
         fail("Unrecognized namespace " + namespace);
@@ -353,10 +357,16 @@ public void fireTimer(W window, Instant timestamp, TimeDomain domain) {
   /**
    * Simulate state.
    */
-  private static class TestInMemoryStateInternals extends InMemoryStateInternals {
-    public Set<StateTag<?>> getTagsInUse(StateNamespace namespace) {
-      Set<StateTag<?>> inUse = new HashSet<>();
-      for (Map.Entry<StateTag<?>, State> entry : inMemoryState.getTagsInUse(namespace).entrySet()) {
+  private static class TestInMemoryStateInternals<K> extends InMemoryStateInternals<K> {
+
+    public TestInMemoryStateInternals(K key) {
+      super(key);
+    }
+
+    public Set<StateTag<? super K, ?>> getTagsInUse(StateNamespace namespace) {
+      Set<StateTag<? super K, ?>> inUse = new HashSet<>();
+      for (Entry<StateTag<? super K, ?>, State> entry :
+        inMemoryState.getTagsInUse(namespace).entrySet()) {
         if (!isEmptyForTesting(entry.getValue())) {
           inUse.add(entry.getKey());
         }
@@ -426,8 +436,12 @@ public <T> void writePCollectionViewData(
     }
 
     @Override
-    public StateInternals stateInternals() {
-      return stateInternals;
+    public StateInternals<Object> stateInternals() {
+      // Safe for testing only
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      TestInMemoryStateInternals<Object> untypedStateInternals =
+          (TestInMemoryStateInternals) stateInternals;
+      return untypedStateInternals;
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index ea631cfba04ae..e5507ced8d4d5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -96,7 +96,8 @@ public SimpleTriggerTester<W> withAllowedLateness(Duration allowedLateness) thro
 
   protected final WindowingStrategy<Object, W> windowingStrategy;
 
-  private final TestInMemoryStateInternals stateInternals = new TestInMemoryStateInternals();
+  private final TestInMemoryStateInternals<?> stateInternals =
+      new TestInMemoryStateInternals<Object>();
   private final TestTimerInternals timerInternals = new TestTimerInternals();
   private final TriggerContextFactory<W> contextFactory;
   private final WindowFn<Object, W> windowFn;
@@ -176,7 +177,7 @@ public void assertCleared(W window) {
         @SuppressWarnings("unchecked")
         WindowAndTriggerNamespace<W> namespace = (WindowAndTriggerNamespace<W>) untypedNamespace;
         if (namespace.getWindow().equals(window)) {
-          Set<StateTag<?>> tagsInUse = stateInternals.getTagsInUse(namespace);
+          Set<?> tagsInUse = stateInternals.getTagsInUse(namespace);
           assertTrue("Trigger has not cleared tags: " + tagsInUse, tagsInUse.isEmpty());
         }
       }
@@ -343,11 +344,16 @@ private FinishedTriggers getFinishedSet(W window) {
   /**
    * Simulate state.
    */
-  private static class TestInMemoryStateInternals extends InMemoryStateInternals {
+  private static class TestInMemoryStateInternals<K> extends InMemoryStateInternals<K> {
 
-    public Set<StateTag<?>> getTagsInUse(StateNamespace namespace) {
-      Set<StateTag<?>> inUse = new HashSet<>();
-      for (Map.Entry<StateTag<?>, State> entry : inMemoryState.getTagsInUse(namespace).entrySet()) {
+    public TestInMemoryStateInternals() {
+      super(null);
+    }
+
+    public Set<StateTag<? super K, ?>> getTagsInUse(StateNamespace namespace) {
+      Set<StateTag<? super K, ?>> inUse = new HashSet<>();
+      for (Map.Entry<StateTag<? super K, ?>, State> entry :
+          inMemoryState.getTagsInUse(namespace).entrySet()) {
         if (!isEmptyForTesting(entry.getValue())) {
           inUse.add(entry.getKey());
         }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
index ca4c785229e20..4df13ae9396fb 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
@@ -44,21 +44,23 @@ public class InMemoryStateInternalsTest {
   private static final StateNamespace NAMESPACE_2 = new StateNamespaceForTest("ns2");
   private static final StateNamespace NAMESPACE_3 = new StateNamespaceForTest("ns3");
 
-  private static final StateTag<ValueState<String>> STRING_VALUE_ADDR =
+  private static final StateTag<Object, ValueState<String>> STRING_VALUE_ADDR =
       StateTags.value("stringValue", StringUtf8Coder.of());
-  private static final StateTag<CombiningValueStateInternal<Integer, int[], Integer>>
+  private static final StateTag<Object, CombiningValueStateInternal<Integer, int[], Integer>>
       SUM_INTEGER_ADDR = StateTags.combiningValueFromInputInternal(
           "sumInteger", VarIntCoder.of(), new Sum.SumIntegerFn());
-  private static final StateTag<BagState<String>> STRING_BAG_ADDR =
+  private static final StateTag<Object, BagState<String>> STRING_BAG_ADDR =
       StateTags.bag("stringBag", StringUtf8Coder.of());
-  private static final StateTag<WatermarkStateInternal<BoundedWindow>> WATERMARK_EARLIEST_ADDR =
+  private static final StateTag<Object, WatermarkStateInternal<BoundedWindow>>
+      WATERMARK_EARLIEST_ADDR =
       StateTags.watermarkStateInternal("watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
-  private static final StateTag<WatermarkStateInternal<BoundedWindow>> WATERMARK_LATEST_ADDR =
+  private static final StateTag<Object, WatermarkStateInternal<BoundedWindow>>
+      WATERMARK_LATEST_ADDR =
       StateTags.watermarkStateInternal("watermark", OutputTimeFns.outputAtLatestInputTimestamp());
-  private static final StateTag<WatermarkStateInternal<BoundedWindow>> WATERMARK_EOW_ADDR =
+  private static final StateTag<Object, WatermarkStateInternal<BoundedWindow>> WATERMARK_EOW_ADDR =
       StateTags.watermarkStateInternal("watermark", OutputTimeFns.outputAtEndOfWindow());
 
-  InMemoryStateInternals underTest = new InMemoryStateInternals();
+  InMemoryStateInternals<String> underTest = InMemoryStateInternals.forKey("dummyKey");
 
   @Test
   public void testValue() throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateTagTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateTagTest.java
index fa3c82634a943..47f7224e90968 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateTagTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/StateTagTest.java
@@ -38,10 +38,10 @@
 public class StateTagTest {
   @Test
   public void testValueEquality() {
-    StateTag<?> fooVarInt1 = StateTags.value("foo", VarIntCoder.of());
-    StateTag<?> fooVarInt2 = StateTags.value("foo", VarIntCoder.of());
-    StateTag<?> fooBigEndian = StateTags.value("foo", BigEndianIntegerCoder.of());
-    StateTag<?> barVarInt = StateTags.value("bar", VarIntCoder.of());
+    StateTag<?, ?> fooVarInt1 = StateTags.value("foo", VarIntCoder.of());
+    StateTag<?, ?> fooVarInt2 = StateTags.value("foo", VarIntCoder.of());
+    StateTag<?, ?> fooBigEndian = StateTags.value("foo", BigEndianIntegerCoder.of());
+    StateTag<?, ?> barVarInt = StateTags.value("bar", VarIntCoder.of());
 
     assertEquals(fooVarInt1, fooVarInt2);
     assertNotEquals(fooVarInt1, fooBigEndian);
@@ -50,10 +50,10 @@ public void testValueEquality() {
 
   @Test
   public void testBagEquality() {
-    StateTag<?> fooVarInt1 = StateTags.bag("foo", VarIntCoder.of());
-    StateTag<?> fooVarInt2 = StateTags.bag("foo", VarIntCoder.of());
-    StateTag<?> fooBigEndian = StateTags.bag("foo", BigEndianIntegerCoder.of());
-    StateTag<?> barVarInt = StateTags.bag("bar", VarIntCoder.of());
+    StateTag<?, ?> fooVarInt1 = StateTags.bag("foo", VarIntCoder.of());
+    StateTag<?, ?> fooVarInt2 = StateTags.bag("foo", VarIntCoder.of());
+    StateTag<?, ?> fooBigEndian = StateTags.bag("foo", BigEndianIntegerCoder.of());
+    StateTag<?, ?> barVarInt = StateTags.bag("bar", VarIntCoder.of());
 
     assertEquals(fooVarInt1, fooVarInt2);
     assertNotEquals(fooVarInt1, fooBigEndian);
@@ -62,14 +62,14 @@ public void testBagEquality() {
 
   @Test
   public void testWatermarkBagEquality() {
-    StateTag<?> foo1 = StateTags.watermarkStateInternal(
+    StateTag<?, ?> foo1 = StateTags.watermarkStateInternal(
         "foo", OutputTimeFns.outputAtEarliestInputTimestamp());
-    StateTag<?> foo2 = StateTags.watermarkStateInternal(
+    StateTag<?, ?> foo2 = StateTags.watermarkStateInternal(
         "foo", OutputTimeFns.outputAtEarliestInputTimestamp());
-    StateTag<?> bar = StateTags.watermarkStateInternal(
+    StateTag<?, ?> bar = StateTags.watermarkStateInternal(
         "bar", OutputTimeFns.outputAtEarliestInputTimestamp());
 
-    StateTag<?> bar2 = StateTags.watermarkStateInternal(
+    StateTag<?, ?> bar2 = StateTags.watermarkStateInternal(
         "bar", OutputTimeFns.outputAtLatestInputTimestamp());
 
     // Same id, same fn.
@@ -87,12 +87,12 @@ public void testCombiningValueEquality() {
     Coder<Integer> input2 = BigEndianIntegerCoder.of();
     MinIntegerFn minFn = new Min.MinIntegerFn();
 
-    StateTag<?> fooCoder1Max1 = StateTags.combiningValueFromInputInternal("foo", input1, maxFn);
-    StateTag<?> fooCoder1Max2 = StateTags.combiningValueFromInputInternal("foo", input1, maxFn);
-    StateTag<?> fooCoder1Min = StateTags.combiningValueFromInputInternal("foo", input1, minFn);
+    StateTag<?, ?> fooCoder1Max1 = StateTags.combiningValueFromInputInternal("foo", input1, maxFn);
+    StateTag<?, ?> fooCoder1Max2 = StateTags.combiningValueFromInputInternal("foo", input1, maxFn);
+    StateTag<?, ?> fooCoder1Min = StateTags.combiningValueFromInputInternal("foo", input1, minFn);
 
-    StateTag<?> fooCoder2Max = StateTags.combiningValueFromInputInternal("foo", input2, maxFn);
-    StateTag<?> barCoder1Max = StateTags.combiningValueFromInputInternal("bar", input1, maxFn);
+    StateTag<?, ?> fooCoder2Max = StateTags.combiningValueFromInputInternal("foo", input2, maxFn);
+    StateTag<?, ?> barCoder1Max = StateTags.combiningValueFromInputInternal("bar", input1, maxFn);
 
     // Same name, coder and combineFn
     assertEquals(fooCoder1Max1, fooCoder1Max2);

From 6edc6e0f0bdd608cbdf3c3db82b7d0b3a18e3f59 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 12 Feb 2016 09:00:20 -0800
Subject: [PATCH 1438/1541] Add KeyedCombiningValueStateTag containing a
 KeyedCombineFn

This state tag binds to state that combines values added to it
according to a KeyedCombineFn. The interface to the state itself
is the same as that for a CombingFn - the key is provided by the
keyed StateContext in which the state cell is used.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114539022
---
 .../state/CombiningValueStateInternal.java    |  4 +-
 .../util/state/InMemoryStateInternals.java    | 30 ++++---
 .../dataflow/sdk/util/state/StateMerging.java |  2 +-
 .../dataflow/sdk/util/state/StateTag.java     |  6 ++
 .../dataflow/sdk/util/state/StateTags.java    | 86 +++++++++++++++----
 5 files changed, 100 insertions(+), 28 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueStateInternal.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueStateInternal.java
index bba93b7869f25..2f18b9c80badd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueStateInternal.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueStateInternal.java
@@ -40,7 +40,7 @@ public interface CombiningValueStateInternal<InputT, AccumT, OutputT>
   void addAccum(AccumT accum);
 
   /**
-   * Return the combiner function for this state.
+   * Merge the given accumulators according to the underlying combiner.
    */
-  CombineFn<InputT, AccumT, OutputT> getCombineFn();
+  AccumT mergeAccumulators(Iterable<AccumT> accumulators);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
index 2963dc21e63cc..7f42f6bcfc650 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
 import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
@@ -80,7 +81,7 @@ public <T> BagState<T> bindBag(
                 StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>>
                 address, Coder<AccumT> accumCoder,
                 final CombineFn<InputT, AccumT, OutputT> combineFn) {
-          return new InMemoryCombiningValue<InputT, AccumT, OutputT>(combineFn);
+          return new InMemoryCombiningValue<InputT, AccumT, OutputT>(combineFn.<K>asKeyedFn());
         }
 
         @Override
@@ -89,6 +90,15 @@ public <W extends BoundedWindow> WatermarkStateInternal<W> bindWatermark(
             OutputTimeFn<? super W> outputTimeFn) {
           return new WatermarkStateInternalImplementation<W>(outputTimeFn);
         }
+
+        @Override
+        public <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
+            bindKeyedCombiningValue(
+                StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>>
+                address, Coder<AccumT> accumCoder,
+                KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
+          return new InMemoryCombiningValue<InputT, AccumT, OutputT>(combineFn);
+        }
       };
     }
   };
@@ -208,19 +218,19 @@ public String toString() {
   private final class InMemoryCombiningValue<InputT, AccumT, OutputT>
       implements CombiningValueStateInternal<InputT, AccumT, OutputT>, InMemoryState {
     private boolean isCleared = true;
-    private final CombineFn<InputT, AccumT, OutputT> combineFn;
+    private final KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn;
     private AccumT accum;
 
-    private InMemoryCombiningValue(CombineFn<InputT, AccumT, OutputT> combineFn) {
+    private InMemoryCombiningValue(KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
       this.combineFn = combineFn;
-      accum = combineFn.createAccumulator();
+      accum = combineFn.createAccumulator(key);
     }
 
     @Override
     public void clear() {
       // Even though we're clearing we can't remove this from the in-memory state map, since
       // other users may already have a handle on this CombiningValue.
-      accum = combineFn.createAccumulator();
+      accum = combineFn.createAccumulator(key);
       isCleared = true;
     }
 
@@ -229,7 +239,7 @@ public StateContents<OutputT> get() {
       return new StateContents<OutputT>() {
         @Override
         public OutputT read() {
-          return combineFn.extractOutput(accum);
+          return combineFn.extractOutput(key, accum);
         }
       };
     }
@@ -237,7 +247,7 @@ public OutputT read() {
     @Override
     public void add(InputT input) {
       isCleared = false;
-      accum = combineFn.addInput(accum, input);
+      accum = combineFn.addInput(key, accum, input);
     }
 
     @Override
@@ -263,12 +273,12 @@ public Boolean read() {
     @Override
     public void addAccum(AccumT accum) {
       isCleared = false;
-      this.accum = combineFn.mergeAccumulators(Arrays.asList(this.accum, accum));
+      this.accum = combineFn.mergeAccumulators(key, Arrays.asList(this.accum, accum));
     }
 
     @Override
-    public CombineFn<InputT, AccumT, OutputT> getCombineFn() {
-      return combineFn;
+    public AccumT mergeAccumulators(Iterable<AccumT> accumulators) {
+      return combineFn.mergeAccumulators(key, accumulators);
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java
index fd783d2bcd3e5..9cea21354738a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java
@@ -151,7 +151,7 @@ public static <InputT, AccumT, OutputT, W extends BoundedWindow> void mergeCombi
       accumulators.add(future.read());
     }
     // Merge (possibly update and return one of the existing accumulators).
-    AccumT merged = result.getCombineFn().mergeAccumulators(accumulators);
+    AccumT merged = result.mergeAccumulators(accumulators);
     // Clear sources.
     for (CombiningValueStateInternal<InputT, AccumT, OutputT> source : sources) {
       source.clear();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
index db052f868243a..e7b02dfc5a052 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
@@ -57,6 +58,11 @@ public interface StateBinder<K> {
         StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
         Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn);
 
+    <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
+    bindKeyedCombiningValue(
+        StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
+        Coder<AccumT> accumCoder, KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn);
+
     /**
      * Bind to a watermark {@link StateTag}.
      *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
index b072f6d7b1a16..caa1ab643fe68 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
 import com.google.common.base.MoreObjects;
@@ -76,6 +77,18 @@ public static <T> StateTag<Object, ValueState<T>> value(String id, Coder<T> valu
     return combiningValueInternal(id, accumCoder, combineFn);
   }
 
+  /**
+   * Create a state tag for values that use a {@link KeyedCombineFn} to automatically merge
+   * multiple {@code InputT}s into a single {@code OutputT}. The key provided to the
+   * {@link KeyedCombineFn} comes from the keyed {@link StateContext}.
+   */
+  public static <K, InputT, AccumT,
+      OutputT> StateTag<K, CombiningValueStateInternal<InputT, AccumT, OutputT>>
+      keyedCombiningValue(String id, Coder<AccumT> accumCoder,
+          KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
+    return keyedCombiningValueInternal(id, accumCoder, combineFn);
+  }
+
   /**
    * Create a state tag for values that use a {@link CombineFn} to automatically merge
    * multiple {@code InputT}s into a single {@code OutputT}.
@@ -106,6 +119,15 @@ public static <T> StateTag<Object, ValueState<T>> value(String id, Coder<T> valu
             new StructuredId(id), accumCoder, combineFn);
   }
 
+  private static <K, InputT, AccumT, OutputT>
+      StateTag<K, CombiningValueStateInternal<InputT, AccumT, OutputT>> keyedCombiningValueInternal(
+          String id,
+          Coder<AccumT> accumCoder,
+          KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
+    return new KeyedCombiningValueStateTag<K, InputT, AccumT, OutputT>(
+        new StructuredId(id), accumCoder, combineFn);
+  }
+
   /**
    * Create a state tag that is optimized for adding values frequently, and
    * occasionally retrieving all the values that have been added.
@@ -123,7 +145,7 @@ public static <T> StateTag<Object, BagState<T>> bag(String id, Coder<T> elemCode
   }
 
   /**
-   * Convert an arbitrary {@code StateTag} to a system-internal tag that is guaranteed not to
+   * Convert an arbitrary {@link StateTag} to a system-internal tag that is guaranteed not to
    * collide with any user tags.
    */
   public static <K, StateT extends State> StateTag<K, StateT> makeSystemTagInternal(
@@ -131,20 +153,23 @@ public static <K, StateT extends State> StateTag<K, StateT> makeSystemTagInterna
     if (!(tag instanceof SystemStateTag)) {
       throw new IllegalArgumentException("Expected subclass of StateTagBase, got " + tag);
     }
-    return ((SystemStateTag<K, StateT>) tag).asKind(StateKind.SYSTEM);
+    // Checked above
+    @SuppressWarnings("unchecked")
+    SystemStateTag<K, StateT> typedTag = (SystemStateTag<K, StateT>) tag;
+    return typedTag.asKind(StateKind.SYSTEM);
   }
 
-  public static <InputT, AccumT, OutputT> StateTag<Object, BagState<AccumT>>
+  public static <K, InputT, AccumT, OutputT> StateTag<Object, BagState<AccumT>>
       convertToBagTagInternal(
-          StateTag<?, CombiningValueStateInternal<InputT, AccumT, OutputT>> combiningTag) {
+          StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>> combiningTag) {
     if (!(combiningTag instanceof CombiningValueStateTag)) {
       throw new IllegalArgumentException("Unexpected StateTag " + combiningTag);
     }
     // Checked above; conversion to a bag tag depends on the provided tag being one of those
     // created via the factory methods in this class.
     @SuppressWarnings("unchecked")
-    CombiningValueStateTag<InputT, AccumT, OutputT> typedTag =
-        (CombiningValueStateTag<InputT, AccumT, OutputT>) combiningTag;
+    KeyedCombiningValueStateTag<K, InputT, AccumT, OutputT> typedTag =
+        (KeyedCombiningValueStateTag<K, InputT, AccumT, OutputT>) combiningTag;
     return typedTag.asBagTag();
   }
 
@@ -289,7 +314,7 @@ public StateTag<Object, ValueState<T>> asKind(StateKind kind) {
    * @param <OutputT> type of output values
    */
   private static class CombiningValueStateTag<InputT, AccumT, OutputT>
-      extends StateTagBase<Object, CombiningValueStateInternal<InputT, AccumT, OutputT>>
+      extends KeyedCombiningValueStateTag<Object, InputT, AccumT, OutputT>
       implements StateTag<Object, CombiningValueStateInternal<InputT, AccumT, OutputT>>,
       SystemStateTag<Object, CombiningValueStateInternal<InputT, AccumT, OutputT>> {
 
@@ -299,15 +324,47 @@ private static class CombiningValueStateTag<InputT, AccumT, OutputT>
     private CombiningValueStateTag(
         StructuredId id,
         Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
-      super(id);
+      super(id, accumCoder, combineFn.asKeyedFn());
       this.combineFn = combineFn;
       this.accumCoder = accumCoder;
     }
 
+    @Override
+    public StateTag<Object, CombiningValueStateInternal<InputT, AccumT, OutputT>>
+    asKind(StateKind kind) {
+      return new CombiningValueStateTag<InputT, AccumT, OutputT>(
+          id.asKind(kind), accumCoder, combineFn);
+    }
+  }
+
+
+  /**
+   * A general purpose state cell for values of type {@code T}.
+   *
+   * @param <K> the type of keys
+   * @param <InputT> the type of input values
+   * @param <AccumT> type of mutable accumulator values
+   * @param <OutputT> type of output values
+   */
+  private static class KeyedCombiningValueStateTag<K, InputT, AccumT, OutputT>
+      extends StateTagBase<K, CombiningValueStateInternal<InputT, AccumT, OutputT>>
+      implements SystemStateTag<K, CombiningValueStateInternal<InputT, AccumT, OutputT>> {
+
+    private final Coder<AccumT> accumCoder;
+    private final KeyedCombineFn<K, InputT, AccumT, OutputT> keyedCombineFn;
+
+    protected KeyedCombiningValueStateTag(
+        StructuredId id,
+        Coder<AccumT> accumCoder, KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
+      super(id);
+      this.keyedCombineFn = combineFn;
+      this.accumCoder = accumCoder;
+    }
+
     @Override
     public CombiningValueStateInternal<InputT, AccumT, OutputT> bind(
-        StateBinder<? extends Object> visitor) {
-      return visitor.bindCombiningValue(this, accumCoder, combineFn);
+        StateBinder<? extends K> visitor) {
+      return visitor.bindKeyedCombiningValue(this, accumCoder, keyedCombineFn);
     }
 
     @Override
@@ -320,7 +377,7 @@ public boolean equals(Object obj) {
         return false;
       }
 
-      CombiningValueStateTag<?, ?, ?> that = (CombiningValueStateTag<?, ?, ?>) obj;
+      KeyedCombiningValueStateTag<?, ?, ?, ?> that = (KeyedCombiningValueStateTag<?, ?, ?, ?>) obj;
       return Objects.equals(this.id, that.id)
           && Objects.equals(this.accumCoder, that.accumCoder);
     }
@@ -331,10 +388,9 @@ public int hashCode() {
     }
 
     @Override
-    public StateTag<Object, CombiningValueStateInternal<InputT, AccumT, OutputT>>
-        asKind(StateKind kind) {
-      return new CombiningValueStateTag<>(id.asKind(kind), accumCoder, combineFn);
-
+    public StateTag<K, CombiningValueStateInternal<InputT, AccumT, OutputT>> asKind(
+        StateKind kind) {
+      return new KeyedCombiningValueStateTag<>(id.asKind(kind), accumCoder, keyedCombineFn);
     }
 
     private StateTag<Object, BagState<AccumT>> asBagTag() {

From da1a5bf75ff46aded43b0a1e827a2e677a646cac Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Fri, 12 Feb 2016 18:57:35 -0800
Subject: [PATCH 1439/1541] Explicitly select GroupAlsoByWindow in default GBK

The default expansion of GroupByKey uses the default, general version of
GroupAlsoByWindow.

Make GroupAlsoByWindowViaOutputBufferDoFn visible outside of the util
package.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114589839
---
 .../dataflow/sdk/transforms/GroupByKey.java   | 20 ++++++++++++-------
 ...GroupAlsoByWindowsViaOutputBufferDoFn.java |  2 +-
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index 6c9defadfe55b..dee7d956ef6ef 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -25,13 +25,15 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn;
+import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsViaOutputBufferDoFn;
 import com.google.cloud.dataflow.sdk.util.ReifyTimestampAndWindowsDoFn;
+import com.google.cloud.dataflow.sdk.util.SystemReduceFn;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
 import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
@@ -418,14 +420,18 @@ public PCollection<KV<K, Iterable<V>>> apply(
           inputIterableWindowedValueCoder.getValueCoder();
       Coder<Iterable<V>> outputValueCoder =
           IterableCoder.of(inputIterableElementValueCoder);
-      Coder<KV<K, Iterable<V>>> outputKvCoder =
-          KvCoder.of(keyCoder, outputValueCoder);
+      Coder<KV<K, Iterable<V>>> outputKvCoder = KvCoder.of(keyCoder, outputValueCoder);
 
-      GroupAlsoByWindowsDoFn<K, V, Iterable<V>, ?> fn =
-          GroupAlsoByWindowsDoFn.createForIterable(
-              windowingStrategy, inputIterableElementValueCoder);
+      return input
+          .apply(ParDo.of(groupAlsoByWindowsFn(windowingStrategy, inputIterableElementValueCoder)))
+          .setCoder(outputKvCoder);
+    }
 
-      return input.apply(ParDo.of(fn)).setCoder(outputKvCoder);
+    private <W extends BoundedWindow> GroupAlsoByWindowsViaOutputBufferDoFn<K, V, Iterable<V>, W>
+        groupAlsoByWindowsFn(
+            WindowingStrategy<?, W> strategy, Coder<V> inputIterableElementValueCoder) {
+      return new GroupAlsoByWindowsViaOutputBufferDoFn<K, V, Iterable<V>, W>(
+          strategy, SystemReduceFn.<K, V, W>buffering(inputIterableElementValueCoder));
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
index bb3b2194b67ff..de137da6653e2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
@@ -30,7 +30,7 @@
  * implementation is applicable.
  */
 @SystemDoFnInternal
-class GroupAlsoByWindowsViaOutputBufferDoFn<K, InputT, OutputT, W extends BoundedWindow>
+public class GroupAlsoByWindowsViaOutputBufferDoFn<K, InputT, OutputT, W extends BoundedWindow>
    extends GroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
 
   private final WindowingStrategy<?, W> strategy;

From 7a9838149c1b40c354b460e609dd417242ef8c69 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Fri, 12 Feb 2016 19:17:46 -0800
Subject: [PATCH 1440/1541] Add ByteKeyRange and ByteKeyRangeTracker

The former abstracts a range of ByteKeys, and the latter uses the range
to provide a RangeTracker to support Dynamic Work Rebalancing.

This is useful for services like Google Cloud Bigtable or its open-source
analog Apache HBase that use byte arrays as keys.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114590566
---
 .../dataflow/sdk/io/range/ByteKeyRange.java   | 376 ++++++++++++++
 .../sdk/io/range/ByteKeyRangeTracker.java     | 117 +++++
 .../sdk/io/range/ByteKeyRangeTest.java        | 463 ++++++++++++++++++
 .../sdk/io/range/ByteKeyRangeTrackerTest.java | 118 +++++
 .../dataflow/sdk/io/range/ByteKeyTest.java    |   4 +-
 5 files changed, 1076 insertions(+), 2 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRange.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeTracker.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeTrackerTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRange.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRange.java
new file mode 100644
index 0000000000000..6f58d393f9055
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRange.java
@@ -0,0 +1,376 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io.range;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
+import static com.google.common.base.Verify.verify;
+
+import com.google.common.base.MoreObjects;
+import com.google.common.collect.ImmutableList;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.Serializable;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A class representing a range of {@link ByteKey ByteKeys}.
+ *
+ * <p>Instances of {@link ByteKeyRange} are immutable.
+ *
+ * <p>A {@link ByteKeyRange} enforces the restriction that its start and end keys must form a valid,
+ * non-empty range {@code [startKey, endKey)} that is inclusive of the start key and exclusive of
+ * the end key.
+ *
+ * <p>When the end key is empty, it is treated as the largest possible key.
+ *
+ * <h3>Interpreting {@link ByteKey} in a {@link ByteKeyRange}</h3>
+ *
+ * <p>The primary role of {@link ByteKeyRange} is to provide functionality for
+ * {@link #estimateFractionForKey(ByteKey)}, {@link #interpolateKey(double)}, and
+ * {@link #split(int)}, which are used for Google Cloud Dataflow's
+ * <a href="https://cloud.google.com/dataflow/service/dataflow-service-desc#AutoScaling">Autoscaling
+ * and Dynamic Work Rebalancing</a> features.
+ *
+ * <p>{@link ByteKeyRange} implements these features by treating a {@link ByteKey}'s underlying
+ * {@code byte[]} as the binary expansion of floating point numbers in the range {@code [0.0, 1.0]}.
+ * For example, the keys {@code ByteKey.of(0x80)}, {@code ByteKey.of(0xc0)}, and
+ * {@code ByteKey.of(0xe0)} are interpreted as {@code 0.5}, {@code 0.75}, and {@code 0.875}
+ * respectively. The empty {@code ByteKey.EMPTY} is interpreted as {@code 0.0} when used as the
+ * start of a range and {@code 1.0} when used as the end key.
+ *
+ * <p>Key interpolation, fraction estimation, and range splitting are all interpreted in these
+ * floating-point semantics. See the respective implementations for further details. <b>Note:</b>
+ * the underlying implementations of these functions use {@link BigInteger} and {@link BigDecimal},
+ * so they can be slow and should not be called in hot loops. Dataflow's dynamic work
+ * rebalancing will only invoke these functions during periodic control operations, so they are not
+ * called on the critical path.
+ *
+ * @see ByteKey
+ */
+public final class ByteKeyRange implements Serializable {
+  private static final Logger logger = LoggerFactory.getLogger(ByteKeyRange.class);
+
+  /** The range of all keys, with empty start and end keys. */
+  public static final ByteKeyRange ALL_KEYS = ByteKeyRange.of(ByteKey.EMPTY, ByteKey.EMPTY);
+
+  /**
+   * Creates a new {@link ByteKeyRange} with the given start and end keys.
+   *
+   * <p>Note that if {@code endKey} is empty, it is treated as the largest possible key.
+   *
+   * @see ByteKeyRange
+   *
+   * @throws IllegalArgumentException if {@code endKey} is less than or equal to {@code startKey},
+   *     unless {@code endKey} is empty indicating the maximum possible {@link ByteKey}.
+   */
+  public static ByteKeyRange of(ByteKey startKey, ByteKey endKey) {
+    return new ByteKeyRange(startKey, endKey);
+  }
+
+  /**
+   * Returns the {@link ByteKey} representing the lower bound of this {@link ByteKeyRange}.
+   */
+  public ByteKey getStartKey() {
+    return startKey;
+  }
+
+  /**
+   * Returns the {@link ByteKey} representing the upper bound of this {@link ByteKeyRange}.
+   *
+   * <p>Note that if {@code endKey} is empty, it is treated as the largest possible key.
+   */
+  public ByteKey getEndKey() {
+    return endKey;
+  }
+
+  /**
+   * Returns {@code true} if the specified {@link ByteKey} is contained within this range.
+   */
+  public Boolean containsKey(ByteKey key) {
+    return key.compareTo(startKey) >= 0 && endsAfterKey(key);
+  }
+
+  /**
+   * Returns {@code true} if the specified {@link ByteKeyRange} overlaps this range.
+   */
+  public Boolean overlaps(ByteKeyRange other) {
+    // If each range starts before the other range ends, then they must overlap.
+    //     { [] } -- one range inside the other   OR   { [ } ] -- partial overlap.
+    return endsAfterKey(other.startKey) && other.endsAfterKey(startKey);
+  }
+
+  /**
+   * Returns a list of up to {@code numSplits + 1} {@link ByteKey ByteKeys} in ascending order,
+   * where the keys have been interpolated to form roughly equal sub-ranges of this
+   * {@link ByteKeyRange}, assuming a uniform distribution of keys within this range.
+   *
+   * <p>The first {@link ByteKey} in the result is guaranteed to be equal to {@link #getStartKey},
+   * and the last {@link ByteKey} in the result is guaranteed to be equal to {@link #getEndKey}.
+   * Thus the resulting list exactly spans the same key range as this {@link ByteKeyRange}.
+   *
+   * <p>Note that the number of keys returned is not always equal to {@code numSplits + 1}.
+   * Specifically, if this range is unsplittable (e.g., because the start and end keys are equal
+   * up to padding by zero bytes), the list returned will only contain the start and end key.
+   *
+   * @throws IllegalArgumentException if the specified number of splits is < 1
+   * @see ByteKeyRange the ByteKeyRange class Javadoc for more information about split semantics.
+   */
+  public List<ByteKey> split(int numSplits) {
+    checkArgument(numSplits > 0, "numSplits %s must be a positive integer", numSplits);
+
+    try {
+      ImmutableList.Builder<ByteKey> ret = ImmutableList.builder();
+      ret.add(startKey);
+      for (int i = 1; i < numSplits; ++i) {
+        ret.add(interpolateKey(i / (double) numSplits));
+      }
+      ret.add(endKey);
+      return ret.build();
+    } catch (IllegalStateException e) {
+      // The range is not splittable -- just return
+      return ImmutableList.of(startKey, endKey);
+    }
+  }
+
+  /**
+   * Returns the fraction of this range {@code [startKey, endKey)} that is in the interval
+   * {@code [startKey, key)}.
+   *
+   * @throws IllegalArgumentException if {@code key} does not fall within this range
+   * @see ByteKeyRange the ByteKeyRange class Javadoc for more information about fraction semantics.
+   */
+  public double estimateFractionForKey(ByteKey key) {
+    checkNotNull(key, "key");
+    checkArgument(!key.isEmpty(), "Cannot compute fraction for an empty key");
+    checkArgument(
+        key.compareTo(startKey) >= 0, "Expected key %s >= range start key %s", key, startKey);
+
+    if (key.equals(endKey)) {
+      return 1.0;
+    }
+    checkArgument(containsKey(key), "Cannot compute fraction for %s outside this %s", key, this);
+
+    byte[] startBytes = startKey.getBytes();
+    byte[] endBytes = endKey.getBytes();
+    byte[] keyBytes = key.getBytes();
+    // If the endKey is unspecified, add a leading 1 byte to it and a leading 0 byte to all other
+    // keys, to get a concrete least upper bound for the desired range.
+    if (endKey.isEmpty()) {
+      startBytes = addHeadByte(startBytes, (byte) 0);
+      endBytes = addHeadByte(endBytes, (byte) 1);
+      keyBytes = addHeadByte(keyBytes, (byte) 0);
+    }
+
+    // Pad to the longest of all 3 keys.
+    int paddedKeyLength = Math.max(Math.max(startBytes.length, endBytes.length), keyBytes.length);
+    BigInteger rangeStartInt = paddedPositiveInt(startBytes, paddedKeyLength);
+    BigInteger rangeEndInt = paddedPositiveInt(endBytes, paddedKeyLength);
+    BigInteger keyInt = paddedPositiveInt(keyBytes, paddedKeyLength);
+
+    // Keys are equal subject to padding by 0.
+    BigInteger range = rangeEndInt.subtract(rangeStartInt);
+    if (range.equals(BigInteger.ZERO)) {
+      logger.warn(
+          "Using 0.0 as the default fraction for this near-empty range {} where start and end keys"
+              + " differ only by trailing zeros.",
+          this);
+      return 0.0;
+    }
+
+    // Compute the progress (key-start)/(end-start) scaling by 2^64, dividing (which rounds),
+    // and then scaling down after the division. This gives ample precision when converted to
+    // double.
+    BigInteger progressScaled = keyInt.subtract(rangeStartInt).shiftLeft(64);
+    return progressScaled.divide(range).doubleValue() / Math.pow(2, 64);
+  }
+
+  /**
+   * Returns a {@link ByteKey} {@code key} such that {@code [startKey, key)} represents
+   * approximately the specified fraction of the range {@code [startKey, endKey)}. The interpolation
+   * is computed assuming a uniform distribution of keys.
+   *
+   * <p>For example, given the largest possible range (defined by empty start and end keys), the
+   * fraction {@code 0.5} will return the {@code ByteKey.of(0x80)}, which will also be returned for
+   * ranges {@code [0x40, 0xc0)} and {@code [0x6f, 0x91)}.
+   *
+   * <p>The key returned will never be empty.
+   *
+   * @throws IllegalArgumentException if {@code fraction} is outside the range [0, 1)
+   * @throws IllegalStateException if this range cannot be interpolated
+   * @see ByteKeyRange the ByteKeyRange class Javadoc for more information about fraction semantics.
+   */
+  public ByteKey interpolateKey(double fraction) {
+    checkArgument(
+        fraction >= 0.0 && fraction < 1.0, "Fraction %s must be in the range [0, 1)", fraction);
+    byte[] startBytes = startKey.getBytes();
+    byte[] endBytes = endKey.getBytes();
+    // If the endKey is unspecified, add a leading 1 byte to it and a leading 0 byte to all other
+    // keys, to get a concrete least upper bound for the desired range.
+    if (endKey.isEmpty()) {
+      startBytes = addHeadByte(startBytes, (byte) 0);
+      endBytes = addHeadByte(endBytes, (byte) 1);
+    }
+
+    // Pad to the longest key.
+    int paddedKeyLength = Math.max(startBytes.length, endBytes.length);
+    BigInteger rangeStartInt = paddedPositiveInt(startBytes, paddedKeyLength);
+    BigInteger rangeEndInt = paddedPositiveInt(endBytes, paddedKeyLength);
+
+    // If the keys are equal subject to padding by 0, we can't interpolate.
+    BigInteger range = rangeEndInt.subtract(rangeStartInt);
+    checkState(
+        !range.equals(BigInteger.ZERO),
+        "Refusing to interpolate for near-empty %s where start and end keys differ only by trailing"
+            + " zero bytes.",
+        this);
+
+    // Add precision so that range is at least 53 (double mantissa length) bits long. This way, we
+    // can interpolate small ranges finely, e.g., split the range key 3 to key 4 into 1024 parts.
+    // We add precision to range by adding zero bytes to the end of the keys, aka shifting the
+    // underlying BigInteger left by a multiple of 8 bits.
+    int bytesNeeded = ((53 - range.bitLength()) + 7) / 8;
+    if (bytesNeeded > 0) {
+      range = range.shiftLeft(bytesNeeded * 8);
+      rangeStartInt = rangeStartInt.shiftLeft(bytesNeeded * 8);
+      paddedKeyLength += bytesNeeded;
+    }
+
+    BigInteger interpolatedOffset =
+        new BigDecimal(range).multiply(BigDecimal.valueOf(fraction)).toBigInteger();
+
+    int outputKeyLength = endKey.isEmpty() ? (paddedKeyLength - 1) : paddedKeyLength;
+    return ByteKey.copyFrom(
+        fixupHeadZeros(rangeStartInt.add(interpolatedOffset).toByteArray(), outputKeyLength));
+  }
+
+  /**
+   * Returns new {@link ByteKeyRange} like this one, but with the specified start key.
+   */
+  public ByteKeyRange withStartKey(ByteKey startKey) {
+    return new ByteKeyRange(startKey, endKey);
+  }
+
+  /**
+   * Returns new {@link ByteKeyRange} like this one, but with the specified end key.
+   */
+  public ByteKeyRange withEndKey(ByteKey endKey) {
+    return new ByteKeyRange(startKey, endKey);
+  }
+
+  ////////////////////////////////////////////////////////////////////////////////////
+  private final ByteKey startKey;
+  private final ByteKey endKey;
+
+  private ByteKeyRange(ByteKey startKey, ByteKey endKey) {
+    this.startKey = checkNotNull(startKey, "startKey");
+    this.endKey = checkNotNull(endKey, "endKey");
+    checkArgument(endsAfterKey(startKey), "Start %s must be less than end %s", startKey, endKey);
+  }
+
+  @Override
+  public String toString() {
+    return MoreObjects.toStringHelper(ByteKeyRange.class)
+        .add("startKey", startKey)
+        .add("endKey", endKey)
+        .toString();
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (o == this) {
+      return true;
+    }
+    if (!(o instanceof ByteKeyRange)) {
+      return false;
+    }
+    ByteKeyRange other = (ByteKeyRange) o;
+    return Objects.equals(startKey, other.startKey) && Objects.equals(endKey, other.endKey);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(startKey, endKey);
+  }
+
+  /**
+   * Returns a copy of the specified array with the specified byte added at the front.
+   */
+  private static byte[] addHeadByte(byte[] array, byte b) {
+    byte[] ret = new byte[array.length + 1];
+    ret[0] = b;
+    System.arraycopy(array, 0, ret, 1, array.length);
+    return ret;
+  }
+
+  /**
+   * Ensures the array is exactly {@code size} bytes long. Returns the input array if the condition
+   * is met, otherwise either adds or removes zero bytes from the beginning of {@code array}.
+   */
+  private static byte[] fixupHeadZeros(byte[] array, int size) {
+    int padding = size - array.length;
+    if (padding == 0) {
+      return array;
+    }
+
+    if (padding < 0) {
+      // There is one zero byte at the beginning, added by BigInteger to make there be a sign
+      // bit when converting to bytes.
+      verify(
+          padding == -1,
+          "key %s: expected length %d with exactly one byte of padding, found %d",
+          ByteKey.copyFrom(array),
+          size,
+          -padding);
+      verify(
+          (array[0] == 0) && ((array[1] & 0x80) == 0x80),
+          "key %s: is 1 byte longer than expected, indicating BigInteger padding. Expect first byte"
+              + " to be zero with set MSB in second byte.",
+          ByteKey.copyFrom(array));
+      return Arrays.copyOfRange(array, 1, array.length);
+    }
+
+    byte[] ret = new byte[size];
+    System.arraycopy(array, 0, ret, padding, array.length);
+    return ret;
+  }
+
+  /**
+   * Returns {@code true} when the specified {@code key} is smaller this range's end key. The only
+   * semantic change from {@code (key.compareTo(getEndKey()) < 0)} is that the empty end key is
+   * treated as larger than all possible {@link ByteKey keys}.
+   */
+  boolean endsAfterKey(ByteKey key) {
+    return endKey.isEmpty() || key.compareTo(endKey) < 0;
+  }
+
+  /** Builds a BigInteger out of the specified array, padded to the desired byte length. */
+  private static BigInteger paddedPositiveInt(byte[] bytes, int length) {
+    int bytePaddingNeeded = length - bytes.length;
+    checkArgument(
+        bytePaddingNeeded >= 0, "Required bytes.length {} < length {}", bytes.length, length);
+    BigInteger ret = new BigInteger(1, bytes);
+    return (bytePaddingNeeded == 0) ? ret : ret.shiftLeft(8 * bytePaddingNeeded);
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeTracker.java
new file mode 100644
index 0000000000000..f6796cc5afb94
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeTracker.java
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io.range;
+
+import static com.google.common.base.MoreObjects.toStringHelper;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.annotation.Nullable;
+
+/**
+ * A {@link RangeTracker} for {@link ByteKey ByteKeys} in {@link ByteKeyRange ByteKeyRanges}.
+ *
+ * @see ByteKey
+ * @see ByteKeyRange
+ */
+public final class ByteKeyRangeTracker implements RangeTracker<ByteKey> {
+  private static final Logger logger = LoggerFactory.getLogger(ByteKeyRangeTracker.class);
+
+  /** Instantiates a new {@link ByteKeyRangeTracker} with the specified range. */
+  public static ByteKeyRangeTracker of(ByteKeyRange range) {
+    return new ByteKeyRangeTracker(range);
+  }
+
+  @Override
+  public synchronized ByteKey getStartPosition() {
+    return range.getStartKey();
+  }
+
+  @Override
+  public synchronized ByteKey getStopPosition() {
+    return range.getEndKey();
+  }
+
+  @Override
+  public synchronized boolean tryReturnRecordAt(boolean isAtSplitPoint, ByteKey recordStart) {
+    if (isAtSplitPoint && !range.containsKey(recordStart)) {
+      return false;
+    }
+    position = recordStart;
+    return true;
+  }
+
+  @Override
+  public synchronized boolean trySplitAtPosition(ByteKey splitPosition) {
+    // Unstarted.
+    if (position == null) {
+      logger.warn(
+          "{}: Rejecting split request at {} because no records have been returned.",
+          this,
+          splitPosition);
+      return false;
+    }
+
+    // Started, but not after current position.
+    if (splitPosition.compareTo(position) <= 0) {
+      logger.warn(
+          "{}: Rejecting split request at {} because it is not after current position {}.",
+          this,
+          splitPosition,
+          position);
+      return false;
+    }
+
+    // Sanity check.
+    if (!range.containsKey(splitPosition)) {
+      logger.warn(
+          "{}: Rejecting split request at {} because it is not within the range.",
+          this,
+          splitPosition);
+      return false;
+    }
+
+    range = range.withEndKey(splitPosition);
+    return true;
+  }
+
+  @Override
+  public synchronized double getFractionConsumed() {
+    if (position == null) {
+      return 0;
+    }
+    return range.estimateFractionForKey(position);
+  }
+
+  ///////////////////////////////////////////////////////////////////////////////
+  private ByteKeyRange range;
+  @Nullable private ByteKey position;
+
+  private ByteKeyRangeTracker(ByteKeyRange range) {
+    this.range = range;
+    this.position = null;
+  }
+
+  @Override
+  public String toString() {
+    return toStringHelper(ByteKeyRangeTracker.class)
+        .add("range", range)
+        .add("position", position)
+        .toString();
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeTest.java
new file mode 100644
index 0000000000000..a90bb5acaa18b
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeTest.java
@@ -0,0 +1,463 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io.range;
+
+import static org.hamcrest.Matchers.closeTo;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
+import static org.hamcrest.Matchers.lessThan;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import com.google.common.collect.ImmutableList;
+import com.google.protobuf.ByteString;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests for {@link ByteKeyRange}.
+ */
+@RunWith(JUnit4.class)
+public class ByteKeyRangeTest {
+  // A set of ranges for testing.
+  private static final ByteKeyRange RANGE_1_10 = ByteKeyRange.of(ByteKey.of(1), ByteKey.of(10));
+  private static final ByteKeyRange RANGE_5_10 = ByteKeyRange.of(ByteKey.of(5), ByteKey.of(10));
+  private static final ByteKeyRange RANGE_5_50 = ByteKeyRange.of(ByteKey.of(5), ByteKey.of(50));
+  private static final ByteKeyRange RANGE_10_50 = ByteKeyRange.of(ByteKey.of(10), ByteKey.of(50));
+  private static final ByteKeyRange UP_TO_1 = ByteKeyRange.of(ByteKey.EMPTY, ByteKey.of(1));
+  private static final ByteKeyRange UP_TO_5 = ByteKeyRange.of(ByteKey.EMPTY, ByteKey.of(5));
+  private static final ByteKeyRange UP_TO_10 = ByteKeyRange.of(ByteKey.EMPTY, ByteKey.of(10));
+  private static final ByteKeyRange UP_TO_50 = ByteKeyRange.of(ByteKey.EMPTY, ByteKey.of(50));
+  private static final ByteKeyRange AFTER_1 = ByteKeyRange.of(ByteKey.of(1), ByteKey.EMPTY);
+  private static final ByteKeyRange AFTER_5 = ByteKeyRange.of(ByteKey.of(5), ByteKey.EMPTY);
+  private static final ByteKeyRange AFTER_10 = ByteKeyRange.of(ByteKey.of(10), ByteKey.EMPTY);
+  private static final ByteKeyRange[] TEST_RANGES =
+      new ByteKeyRange[] {
+        ByteKeyRange.ALL_KEYS,
+        RANGE_1_10,
+        RANGE_5_10,
+        RANGE_5_50,
+        RANGE_10_50,
+        UP_TO_1,
+        UP_TO_5,
+        UP_TO_10,
+        UP_TO_50,
+        AFTER_1,
+        AFTER_5,
+        AFTER_10,
+      };
+
+  private static final ByteKey[] RANGE_TEST_KEYS =
+      ImmutableList.<ByteKey>builder()
+          .addAll(Arrays.asList(ByteKeyTest.TEST_KEYS))
+          .add(ByteKey.EMPTY)
+          .build()
+          .toArray(ByteKeyTest.TEST_KEYS);
+
+  /**
+   * Tests that the two ranges do not overlap, passing each in as the first range in the comparison.
+   */
+  private static void bidirectionalNonOverlap(ByteKeyRange left, ByteKeyRange right) {
+    bidirectionalOverlapHelper(left, right, false);
+  }
+
+  /**
+   * Tests that the two ranges overlap, passing each in as the first range in the comparison.
+   */
+  private static void bidirectionalOverlap(ByteKeyRange left, ByteKeyRange right) {
+    bidirectionalOverlapHelper(left, right, true);
+  }
+
+  /**
+   * Helper function for tests with a good error message.
+   */
+  private static void bidirectionalOverlapHelper(
+      ByteKeyRange left, ByteKeyRange right, boolean result) {
+    assertEquals(String.format("%s overlaps %s", left, right), result, left.overlaps(right));
+    assertEquals(String.format("%s overlaps %s", right, left), result, right.overlaps(left));
+  }
+
+  /**
+   * Tests of {@link ByteKeyRange#overlaps(ByteKeyRange)} with cases that should return true.
+   */
+  @Test
+  public void testOverlappingRanges() {
+    bidirectionalOverlap(ByteKeyRange.ALL_KEYS, ByteKeyRange.ALL_KEYS);
+    bidirectionalOverlap(ByteKeyRange.ALL_KEYS, RANGE_1_10);
+    bidirectionalOverlap(UP_TO_1, UP_TO_1);
+    bidirectionalOverlap(UP_TO_1, UP_TO_5);
+    bidirectionalOverlap(UP_TO_50, AFTER_10);
+    bidirectionalOverlap(UP_TO_50, RANGE_1_10);
+    bidirectionalOverlap(UP_TO_10, UP_TO_50);
+    bidirectionalOverlap(RANGE_1_10, RANGE_5_50);
+    bidirectionalOverlap(AFTER_1, AFTER_5);
+    bidirectionalOverlap(RANGE_5_10, RANGE_1_10);
+    bidirectionalOverlap(RANGE_5_10, RANGE_5_50);
+  }
+
+  /**
+   * Tests of {@link ByteKeyRange#overlaps(ByteKeyRange)} with cases that should return false.
+   */
+  @Test
+  public void testNonOverlappingRanges() {
+    bidirectionalNonOverlap(UP_TO_1, AFTER_1);
+    bidirectionalNonOverlap(UP_TO_1, AFTER_5);
+    bidirectionalNonOverlap(RANGE_5_10, RANGE_10_50);
+  }
+
+  /**
+   * Verifies that all keys in the given list are strictly ordered by size.
+   */
+  private static void ensureOrderedKeys(List<ByteKey> keys) {
+    for (int i = 0; i < keys.size() - 1; ++i) {
+      // This will throw if these two keys do not form a valid range.
+      ByteKeyRange.of(keys.get(i), keys.get(i + 1));
+      // Also, a key is only allowed empty if it is the first key.
+      if (i > 0 && keys.get(i).isEmpty()) {
+        fail(String.format("Intermediate key %s/%s may not be empty", i, keys.size()));
+      }
+    }
+  }
+
+  /** Tests for {@link ByteKeyRange#split(int)} with invalid inputs. */
+  @Test
+  public void testRejectsInvalidSplit() {
+    try {
+      fail(String.format("%s.split(0) should fail: %s", RANGE_1_10, RANGE_1_10.split(0)));
+    } catch (IllegalArgumentException expected) {
+      // pass
+    }
+
+    try {
+      fail(String.format("%s.split(-3) should fail: %s", RANGE_1_10, RANGE_1_10.split(-3)));
+    } catch (IllegalArgumentException expected) {
+      // pass
+    }
+  }
+
+  /** Tests for {@link ByteKeyRange#split(int)} with weird inputs. */
+  @Test
+  public void testSplitSpecialInputs() {
+    // Range split by 1 returns list of its keys.
+    assertEquals(
+        "Split 1 should return input",
+        ImmutableList.of(RANGE_1_10.getStartKey(), RANGE_1_10.getEndKey()),
+        RANGE_1_10.split(1));
+
+    // Unsplittable range returns list of its keys.
+    ByteKeyRange unsplittable = ByteKeyRange.of(ByteKey.of(), ByteKey.of(0, 0, 0, 0));
+    assertEquals(
+        "Unsplittable should return input",
+        ImmutableList.of(unsplittable.getStartKey(), unsplittable.getEndKey()),
+        unsplittable.split(5));
+  }
+
+  /** Tests for {@link ByteKeyRange#split(int)}. */
+  @Test
+  public void testSplitKeysCombinatorial() {
+    List<Integer> sizes = ImmutableList.of(1, 2, 5, 10, 25, 32, 64);
+    for (int i = 0; i < RANGE_TEST_KEYS.length; ++i) {
+      for (int j = i + 1; j < RANGE_TEST_KEYS.length; ++j) {
+        ByteKeyRange range = ByteKeyRange.of(RANGE_TEST_KEYS[i], RANGE_TEST_KEYS[j]);
+        for (int s : sizes) {
+          List<ByteKey> splits = range.split(s);
+          ensureOrderedKeys(splits);
+          assertThat("At least two entries in splits", splits.size(), greaterThanOrEqualTo(2));
+          assertEquals("First split equals start of range", splits.get(0), RANGE_TEST_KEYS[i]);
+          assertEquals(
+              "Last split equals end of range", splits.get(splits.size() - 1), RANGE_TEST_KEYS[j]);
+        }
+      }
+    }
+  }
+
+  /** Manual tests for {@link ByteKeyRange#estimateFractionForKey}. */
+  @Test
+  public void testEstimateFractionForKey() {
+    final double delta = 0.0000001;
+
+    /* 0x80 is halfway between [] and [] */
+    assertEquals(0.5, ByteKeyRange.ALL_KEYS.estimateFractionForKey(ByteKey.of(0x80)), delta);
+
+    /* 0x80 is halfway between [00] and [] */
+    ByteKeyRange after0 = ByteKeyRange.of(ByteKey.of(0), ByteKey.EMPTY);
+    assertEquals(0.5, after0.estimateFractionForKey(ByteKey.of(0x80)), delta);
+
+    /* 0x80 is halfway between [0000] and [] */
+    ByteKeyRange after00 = ByteKeyRange.of(ByteKey.of(0, 0), ByteKey.EMPTY);
+    assertEquals(0.5, after00.estimateFractionForKey(ByteKey.of(0x80)), delta);
+
+    /* 0x7f is halfway between [] and [fe] */
+    ByteKeyRange upToFE = ByteKeyRange.of(ByteKey.EMPTY, ByteKey.of(0xfe));
+    assertEquals(0.5, upToFE.estimateFractionForKey(ByteKey.of(0x7f)), delta);
+
+    /* 0x40 is one-quarter of the way between [] and [] */
+    assertEquals(0.25, ByteKeyRange.ALL_KEYS.estimateFractionForKey(ByteKey.of(0x40)), delta);
+
+    /* 0x40 is one-half of the way between [] and [0x80] */
+    ByteKeyRange upTo80 = ByteKeyRange.of(ByteKey.EMPTY, ByteKey.of(0x80));
+    assertEquals(0.50, upTo80.estimateFractionForKey(ByteKey.of(0x40)), delta);
+
+    /* 0x40 is one-half of the way between [0x30] and [0x50] */
+    ByteKeyRange range30to50 = ByteKeyRange.of(ByteKey.of(0x30), ByteKey.of(0x50));
+    assertEquals(0.50, range30to50.estimateFractionForKey(ByteKey.of(0x40)), delta);
+
+    /* 0x40 is one-half of the way between [0x30, 0, 1] and [0x4f, 0xff, 0xff, 0, 0] */
+    ByteKeyRange range31to4f =
+        ByteKeyRange.of(ByteKey.of(0x30, 0, 1), ByteKey.of(0x4f, 0xff, 0xff, 0, 0));
+    assertEquals(0.50, range31to4f.estimateFractionForKey(ByteKey.of(0x40)), delta);
+
+    /* Exact fractions from 0 to 47 for a prime range. */
+    ByteKeyRange upTo47 = ByteKeyRange.of(ByteKey.EMPTY, ByteKey.of(47));
+    for (int i = 0; i <= 47; ++i) {
+      assertEquals("i=" + i, i / 47.0, upTo47.estimateFractionForKey(ByteKey.of(i)), delta);
+    }
+
+    /* Exact fractions from 0 to 83 for a prime range. */
+    ByteKeyRange rangeFDECtoFDEC83 =
+        ByteKeyRange.of(ByteKey.of(0xfd, 0xec), ByteKey.of(0xfd, 0xec, 83));
+    for (int i = 0; i <= 83; ++i) {
+      assertEquals(
+          "i=" + i,
+          i / 83.0,
+          rangeFDECtoFDEC83.estimateFractionForKey(ByteKey.of(0xfd, 0xec, i)),
+          delta);
+    }
+  }
+
+  /** Exhaustive tests for {@link ByteKeyRange#estimateFractionForKey}. */
+  @Test
+  public void testEstimateFractionForKeyCombinatorial() {
+    double last;
+    for (int i = 0; i < RANGE_TEST_KEYS.length; ++i) {
+      for (int k = i + 1; k < RANGE_TEST_KEYS.length; ++k) {
+        ByteKeyRange range = ByteKeyRange.of(RANGE_TEST_KEYS[i], RANGE_TEST_KEYS[k]);
+        last = 0.0;
+        for (int j = i; j < k; ++j) {
+          ByteKey key = RANGE_TEST_KEYS[j];
+          if (key.isEmpty()) {
+            // Cannot compute progress for unspecified key
+            continue;
+          }
+          double fraction = range.estimateFractionForKey(key);
+          try {
+            assertThat(fraction, greaterThanOrEqualTo(last));
+          } catch (AssertionError e) {
+            throw new AssertionError(
+                String.format(
+                    "Range %s estimated fraction for key %s should be >= fraction for key %s",
+                    range,
+                    key,
+                    RANGE_TEST_KEYS[i]),
+                e);
+          }
+          last = fraction;
+        }
+      }
+    }
+  }
+
+  /** Manual tests for {@link ByteKeyRange#interpolateKey}. */
+  @Test
+  public void testInterpolateKey() {
+    /* 0x80 is halfway between [] and [] */
+    assertEqualExceptPadding(ByteKey.of(0x80), ByteKeyRange.ALL_KEYS.interpolateKey(0.5));
+
+    /* 0x80 is halfway between [00] and [] */
+    ByteKeyRange after0 = ByteKeyRange.of(ByteKey.of(0), ByteKey.EMPTY);
+    assertEqualExceptPadding(ByteKey.of(0x80), after0.interpolateKey(0.5));
+
+    /* 0x80 is halfway between [0000] and [] -- padding to longest key */
+    ByteKeyRange after00 = ByteKeyRange.of(ByteKey.of(0, 0), ByteKey.EMPTY);
+    assertEqualExceptPadding(ByteKey.of(0x80), after00.interpolateKey(0.5));
+
+    /* 0x7f is halfway between [] and [fe] */
+    ByteKeyRange upToFE = ByteKeyRange.of(ByteKey.EMPTY, ByteKey.of(0xfe));
+    assertEqualExceptPadding(ByteKey.of(0x7f), upToFE.interpolateKey(0.5));
+
+    /* 0x40 is one-quarter of the way between [] and [] */
+    assertEqualExceptPadding(ByteKey.of(0x40), ByteKeyRange.ALL_KEYS.interpolateKey(0.25));
+
+    /* 0x40 is halfway between [] and [0x80] */
+    ByteKeyRange upTo80 = ByteKeyRange.of(ByteKey.EMPTY, ByteKey.of(0x80));
+    assertEqualExceptPadding(ByteKey.of(0x40), upTo80.interpolateKey(0.5));
+
+    /* 0x40 is halfway between [0x30] and [0x50] */
+    ByteKeyRange range30to50 = ByteKeyRange.of(ByteKey.of(0x30), ByteKey.of(0x50));
+    assertEqualExceptPadding(ByteKey.of(0x40), range30to50.interpolateKey(0.5));
+
+    /* 0x40 is halfway between [0x30, 0, 1] and [0x4f, 0xff, 0xff, 0, 0]  */
+    ByteKeyRange range31to4f =
+        ByteKeyRange.of(ByteKey.of(0x30, 0, 1), ByteKey.of(0x4f, 0xff, 0xff, 0, 0));
+    assertEqualExceptPadding(ByteKey.of(0x40), range31to4f.interpolateKey(0.5));
+  }
+
+  /** Tests that {@link ByteKeyRange#interpolateKey} does not return the empty key. */
+  @Test
+  public void testInterpolateKeyIsNotEmpty() {
+    String fmt = "Interpolating %s at fraction 0.0 should not return the empty key";
+    for (ByteKeyRange range : TEST_RANGES) {
+      range = ByteKeyRange.ALL_KEYS;
+      assertFalse(String.format(fmt, range), range.interpolateKey(0.0).isEmpty());
+    }
+  }
+
+  /**
+   * Combinatorial tests for {@link ByteKeyRange#interpolateKey}, which also checks
+   * {@link ByteKeyRange#estimateFractionForKey} by converting the interpolated keys back to
+   * fractions.
+   */
+  @Test
+  public void testInterpolateKeyAndEstimateFractionCombinatorial() {
+    double delta = 0.0000001;
+    double[] testFractions =
+        new double[] {0.01, 0.1, 0.123, 0.2, 0.3, 0.45738, 0.5, 0.6, 0.7182, 0.8, 0.95, 0.97, 0.99};
+    for (int i = 0; i < RANGE_TEST_KEYS.length; ++i) {
+      for (int j = i + 1; j < RANGE_TEST_KEYS.length; ++j) {
+        ByteKeyRange range = ByteKeyRange.of(RANGE_TEST_KEYS[i], RANGE_TEST_KEYS[j]);
+        ByteKey last = RANGE_TEST_KEYS[i];
+        for (double fraction : testFractions) {
+          try {
+            ByteKey key = range.interpolateKey(fraction);
+            String message = String.format("%s, %s", range, fraction);
+            assertThat(message, key, greaterThanOrEqualTo(last));
+            assertThat(message, range.estimateFractionForKey(key), closeTo(fraction, delta));
+            last = key;
+          } catch (IllegalStateException e) {
+            assertThat(
+                String.format("range: %s fraction: %f", range, fraction),
+                e.getMessage(),
+                containsString("near-empty ByteKeyRange"));
+            continue;
+          }
+        }
+      }
+    }
+  }
+
+  /** Test {@link ByteKeyRange} getters. */
+  @Test
+  public void testKeyGetters() {
+    // [1,)
+    assertEquals(AFTER_1.getStartKey(), ByteKey.of(1));
+    assertEquals(AFTER_1.getEndKey(), ByteKey.EMPTY);
+    // [1, 10)
+    assertEquals(RANGE_1_10.getStartKey(), ByteKey.of(1));
+    assertEquals(RANGE_1_10.getEndKey(), ByteKey.of(10));
+    // [, 10)
+    assertEquals(UP_TO_10.getStartKey(), ByteKey.EMPTY);
+    assertEquals(UP_TO_10.getEndKey(), ByteKey.of(10));
+  }
+
+  /** Test {@link ByteKeyRange#toString}. */
+  @Test
+  public void testToString() {
+    assertEquals("ByteKeyRange{startKey=[], endKey=[0a]}", UP_TO_10.toString());
+  }
+
+  /** Test {@link ByteKeyRange#equals}. */
+  @Test
+  public void testEquals() {
+    // Verify that the comparison gives the correct result for all values in both directions.
+    for (int i = 0; i < TEST_RANGES.length; ++i) {
+      for (int j = 0; j < TEST_RANGES.length; ++j) {
+        ByteKeyRange left = TEST_RANGES[i];
+        ByteKeyRange right = TEST_RANGES[j];
+        boolean eq = left.equals(right);
+        if (i == j) {
+          assertTrue(String.format("Expected that %s is equal to itself.", left), eq);
+          assertTrue(
+              String.format("Expected that %s is equal to a copy of itself.", left),
+              left.equals(ByteKeyRange.of(right.getStartKey(), right.getEndKey())));
+        } else {
+          assertFalse(String.format("Expected that %s is not equal to %s", left, right), eq);
+        }
+      }
+    }
+  }
+
+  /** Test that {@link ByteKeyRange#of} rejects invalid ranges. */
+  @Test
+  public void testRejectsInvalidRanges() {
+    ByteKey[] testKeys = ByteKeyTest.TEST_KEYS;
+    for (int i = 0; i < testKeys.length; ++i) {
+      for (int j = i; j < testKeys.length; ++j) {
+        if (testKeys[i].isEmpty() || testKeys[j].isEmpty()) {
+          continue; // these are valid ranges.
+        }
+        try {
+          ByteKeyRange range = ByteKeyRange.of(testKeys[j], testKeys[i]);
+          fail(String.format("Expected failure constructing %s", range));
+        } catch (IllegalArgumentException expected) {
+          // pass
+        }
+      }
+    }
+  }
+
+  /** Test {@link ByteKeyRange#hashCode}. */
+  @Test
+  public void testHashCode() {
+    // Verify that the hashCode is equal when i==j, and usually not equal otherwise.
+    int collisions = 0;
+    for (int i = 0; i < TEST_RANGES.length; ++i) {
+      ByteKeyRange current = TEST_RANGES[i];
+      int left = current.hashCode();
+      int leftClone = ByteKeyRange.of(current.getStartKey(), current.getEndKey()).hashCode();
+      assertEquals(
+          String.format("Expected same hash code for %s and a copy of itself", current),
+          left,
+          leftClone);
+      for (int j = i + 1; j < TEST_RANGES.length; ++j) {
+        int right = TEST_RANGES[j].hashCode();
+        if (left == right) {
+          ++collisions;
+        }
+      }
+    }
+    int totalUnequalTests = TEST_RANGES.length * (TEST_RANGES.length - 1) / 2;
+    assertThat("Too many hash collisions", collisions, lessThan(totalUnequalTests / 2));
+  }
+
+  /** Asserts the two keys are equal except trailing zeros. */
+  private static void assertEqualExceptPadding(ByteKey expected, ByteKey key) {
+    ByteString shortKey = expected.getValue();
+    ByteString longKey = key.getValue();
+    if (shortKey.size() > longKey.size()) {
+      shortKey = key.getValue();
+      longKey = expected.getValue();
+    }
+    for (int i = 0; i < shortKey.size(); ++i) {
+      if (shortKey.byteAt(i) != longKey.byteAt(i)) {
+        fail(String.format("Expected %s (up to trailing zeros), got %s", expected, key));
+      }
+    }
+    for (int j = shortKey.size(); j < longKey.size(); ++j) {
+      if (longKey.byteAt(j) != 0) {
+        fail(String.format("Expected %s (up to trailing zeros), got %s", expected, key));
+      }
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeTrackerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeTrackerTest.java
new file mode 100644
index 0000000000000..234e6e9d95476
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeTrackerTest.java
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.io.range;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link ByteKeyRangeTracker}. */
+@RunWith(JUnit4.class)
+public class ByteKeyRangeTrackerTest {
+  private static final ByteKey START_KEY = ByteKey.of(0x12);
+  private static final ByteKey MIDDLE_KEY = ByteKey.of(0x23);
+  private static final ByteKey BEFORE_END_KEY = ByteKey.of(0x33);
+  private static final ByteKey END_KEY = ByteKey.of(0x34);
+  private static final double RANGE_SIZE = 0x34 - 0x12;
+  private static final ByteKeyRange RANGE = ByteKeyRange.of(START_KEY, END_KEY);
+
+  /** Tests for {@link ByteKeyRangeTracker#toString}. */
+  @Test
+  public void testToString() {
+    ByteKeyRangeTracker tracker = ByteKeyRangeTracker.of(RANGE);
+    String expected = String.format("ByteKeyRangeTracker{range=%s, position=null}", RANGE);
+    assertEquals(expected, tracker.toString());
+
+    tracker.tryReturnRecordAt(true, MIDDLE_KEY);
+    expected = String.format("ByteKeyRangeTracker{range=%s, position=%s}", RANGE, MIDDLE_KEY);
+    assertEquals(expected, tracker.toString());
+  }
+
+  /** Tests for {@link ByteKeyRangeTracker#of}. */
+  @Test
+  public void testBuilding() {
+    ByteKeyRangeTracker tracker = ByteKeyRangeTracker.of(RANGE);
+
+    assertEquals(START_KEY, tracker.getStartPosition());
+    assertEquals(END_KEY, tracker.getStopPosition());
+  }
+
+  /** Tests for {@link ByteKeyRangeTracker#getFractionConsumed()}. */
+  @Test
+  public void testGetFractionConsumed() {
+    ByteKeyRangeTracker tracker = ByteKeyRangeTracker.of(RANGE);
+    double delta = 0.00001;
+
+    assertEquals(0.0, tracker.getFractionConsumed(), delta);
+
+    tracker.tryReturnRecordAt(true, START_KEY);
+    assertEquals(0.0, tracker.getFractionConsumed(), delta);
+
+    tracker.tryReturnRecordAt(true, MIDDLE_KEY);
+    assertEquals(0.5, tracker.getFractionConsumed(), delta);
+
+    tracker.tryReturnRecordAt(true, BEFORE_END_KEY);
+    assertEquals(1 - 1 / RANGE_SIZE, tracker.getFractionConsumed(), delta);
+  }
+
+  /** Tests for {@link ByteKeyRangeTracker#tryReturnRecordAt}. */
+  @Test
+  public void testTryReturnRecordAt() {
+    ByteKeyRangeTracker tracker = ByteKeyRangeTracker.of(RANGE);
+
+    // Should be able to emit at the same key twice, should that happen.
+    // Should be able to emit within range (in order, but system guarantees won't try out of order).
+    // Should not be able to emit past end of range.
+
+    assertTrue(tracker.tryReturnRecordAt(true, START_KEY));
+    assertTrue(tracker.tryReturnRecordAt(true, START_KEY));
+
+    assertTrue(tracker.tryReturnRecordAt(true, MIDDLE_KEY));
+    assertTrue(tracker.tryReturnRecordAt(true, MIDDLE_KEY));
+
+    assertTrue(tracker.tryReturnRecordAt(true, BEFORE_END_KEY));
+
+    assertFalse(tracker.tryReturnRecordAt(true, END_KEY)); // after end
+
+    assertTrue(tracker.tryReturnRecordAt(true, BEFORE_END_KEY)); // still succeeds
+  }
+
+  /** Tests for {@link ByteKeyRangeTracker#trySplitAtPosition}. */
+  @Test
+  public void testSplitAtPosition() {
+    ByteKeyRangeTracker tracker = ByteKeyRangeTracker.of(RANGE);
+
+    // Unstarted, should not split.
+    assertFalse(tracker.trySplitAtPosition(MIDDLE_KEY));
+
+    // Start it, split it before the end.
+    assertTrue(tracker.tryReturnRecordAt(true, START_KEY));
+    assertTrue(tracker.trySplitAtPosition(BEFORE_END_KEY));
+    assertEquals(BEFORE_END_KEY, tracker.getStopPosition());
+
+    // Should not be able to split it after the end.
+    assertFalse(tracker.trySplitAtPosition(END_KEY));
+
+    // Should not be able to split after emitting.
+    assertTrue(tracker.tryReturnRecordAt(true, MIDDLE_KEY));
+    assertFalse(tracker.trySplitAtPosition(MIDDLE_KEY));
+    assertTrue(tracker.tryReturnRecordAt(true, MIDDLE_KEY));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyTest.java
index 5c81287c8b72c..922ac5b803719 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyTest.java
@@ -34,8 +34,8 @@
  */
 @RunWith(JUnit4.class)
 public class ByteKeyTest {
-  // A big list of byte[] keys, in ascending sorted order.
-  private static final ByteKey[] TEST_KEYS =
+  /* A big list of byte[] keys, in ascending sorted order. */
+  static final ByteKey[] TEST_KEYS =
       new ByteKey[] {
         ByteKey.EMPTY,
         ByteKey.of(0),

From ced1724bcd57b488885a9b09bcbaae17dc37486b Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 15 Feb 2016 13:20:21 -0800
Subject: [PATCH 1441/1541] Make some ReduceFnRunner implementation details
 private

The ReduceFnRunner calls into ActiveWindowSet#onMerge, which
calls back to ReduceFnRunner#onMerge. The existence of this
mechanism is an implementation detail, subject to change,
and the appropriate methods on ReduceFnRunner need not be
public; they have no relevance to its primary API.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114705912
---
 .../dataflow/sdk/util/ReduceFnRunner.java     | 143 +++++++++---------
 1 file changed, 73 insertions(+), 70 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index 675eae00cc075..f60faadf2590c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -72,8 +72,7 @@
  * @param <OutputT> The output type that will be produced for each key.
  * @param <W> The type of windows this operates on.
  */
-public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow>
-    implements ActiveWindowSet.MergeCallback<W> {
+public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
   private final WindowingStrategy<Object, W> windowingStrategy;
 
   private final WindowingInternals<?, KV<K, OutputT>> windowingInternals;
@@ -82,6 +81,8 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow>
 
   private final K key;
 
+  private final OnMergeCallback onMergeCallback = new OnMergeCallback();
+
   /**
    * Track which windows are still active and which 'state address' windows contain state
    * for a merged window.
@@ -335,86 +336,88 @@ private void collectAndMergeWindows(Iterable<WindowedValue<InputT>> values) {
     }
   }
 
-  /**
-   * Called from the active window set to indicate {@code toBeMerged} (of which only
-   * {@code activeToBeMerged} are ACTIVE and thus have state associated with them) will later
-   * be merged into {@code mergeResult}.
-   */
-  @Override
-  public void prefetchOnMerge(
-      Collection<W> toBeMerged, Collection<W> activeToBeMerged, W mergeResult) throws Exception {
-    ReduceFn<K, InputT, OutputT, W>.OnMergeContext directMergeContext =
-        contextFactory.forMerge(activeToBeMerged, mergeResult, StateStyle.DIRECT);
-    ReduceFn<K, InputT, OutputT, W>.OnMergeContext renamedMergeContext =
-        contextFactory.forMerge(activeToBeMerged, mergeResult, StateStyle.RENAMED);
-
-    // Prefetch various state.
-    triggerRunner.prefetchForMerge(mergeResult, activeToBeMerged, directMergeContext.state());
-    try {
-      reduceFn.prefetchOnMerge(renamedMergeContext.state());
-    } catch (Exception e) {
-      throw wrapMaybeUserException(e);
+  private class OnMergeCallback implements ActiveWindowSet.MergeCallback<W> {
+    /**
+     * Called from the active window set to indicate {@code toBeMerged} (of which only
+     * {@code activeToBeMerged} are ACTIVE and thus have state associated with them) will later
+     * be merged into {@code mergeResult}.
+     */
+    @Override
+    public void prefetchOnMerge(
+        Collection<W> toBeMerged, Collection<W> activeToBeMerged, W mergeResult) throws Exception {
+      ReduceFn<K, InputT, OutputT, W>.OnMergeContext directMergeContext =
+          contextFactory.forMerge(activeToBeMerged, mergeResult, StateStyle.DIRECT);
+      ReduceFn<K, InputT, OutputT, W>.OnMergeContext renamedMergeContext =
+          contextFactory.forMerge(activeToBeMerged, mergeResult, StateStyle.RENAMED);
+
+      // Prefetch various state.
+      triggerRunner.prefetchForMerge(mergeResult, activeToBeMerged, directMergeContext.state());
+      try {
+        reduceFn.prefetchOnMerge(renamedMergeContext.state());
+      } catch (Exception e) {
+        throw wrapMaybeUserException(e);
+      }
+      watermarkHold.prefetchOnMerge(renamedMergeContext.state());
+      nonEmptyPanes.prefetchOnMerge(renamedMergeContext.state());
     }
-    watermarkHold.prefetchOnMerge(renamedMergeContext.state());
-    nonEmptyPanes.prefetchOnMerge(renamedMergeContext.state());
-  }
-
-  /**
-   * Called from the active window set to indicate {@code toBeMerged} (of which only
-   * {@code activeToBeMerged} are ACTIVE and thus have state associated with them) are about
-   * to be merged into {@code mergeResult}.
-   */
-  @Override
-  public void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W mergeResult)
-      throws Exception {
-    // At this point activeWindows has NOT incorporated the results of the merge.
-    ReduceFn<K, InputT, OutputT, W>.OnMergeContext directMergeContext =
-        contextFactory.forMerge(activeToBeMerged, mergeResult, StateStyle.DIRECT);
-    ReduceFn<K, InputT, OutputT, W>.OnMergeContext renamedMergeContext =
-        contextFactory.forMerge(activeToBeMerged, mergeResult, StateStyle.RENAMED);
 
-    // Run the reduceFn to perform any needed merging.
-    try {
-      reduceFn.onMerge(renamedMergeContext);
-    } catch (Exception e) {
-      throw wrapMaybeUserException(e);
-    }
+    /**
+     * Called from the active window set to indicate {@code toBeMerged} (of which only
+     * {@code activeToBeMerged} are ACTIVE and thus have state associated with them) are about
+     * to be merged into {@code mergeResult}.
+     */
+    @Override
+    public void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W mergeResult)
+        throws Exception {
+      // At this point activeWindows has NOT incorporated the results of the merge.
+      ReduceFn<K, InputT, OutputT, W>.OnMergeContext directMergeContext =
+          contextFactory.forMerge(activeToBeMerged, mergeResult, StateStyle.DIRECT);
+      ReduceFn<K, InputT, OutputT, W>.OnMergeContext renamedMergeContext =
+          contextFactory.forMerge(activeToBeMerged, mergeResult, StateStyle.RENAMED);
+
+      // Run the reduceFn to perform any needed merging.
+      try {
+        reduceFn.onMerge(renamedMergeContext);
+      } catch (Exception e) {
+        throw wrapMaybeUserException(e);
+      }
 
-    // Merge the watermark holds.
-    watermarkHold.onMerge(renamedMergeContext);
+      // Merge the watermark holds.
+      watermarkHold.onMerge(renamedMergeContext);
 
-    // Merge non-empty pane state.
-    nonEmptyPanes.onMerge(renamedMergeContext.state());
+      // Merge non-empty pane state.
+      nonEmptyPanes.onMerge(renamedMergeContext.state());
 
-    // Have the trigger merge state as needed
-    try {
-      triggerRunner.onMerge(directMergeContext);
-    } catch (Exception e) {
-      Throwables.propagateIfPossible(e);
-      throw new RuntimeException("Failed to merge the triggers", e);
-    }
+      // Have the trigger merge state as needed
+      try {
+        triggerRunner.onMerge(directMergeContext);
+      } catch (Exception e) {
+        Throwables.propagateIfPossible(e);
+        throw new RuntimeException("Failed to merge the triggers", e);
+      }
 
-    for (W active : activeToBeMerged) {
-      if (active.equals(mergeResult)) {
-        // Not merged away.
-        continue;
+      for (W active : activeToBeMerged) {
+        if (active.equals(mergeResult)) {
+          // Not merged away.
+          continue;
+        }
+        // Cleanup flavor A: Currently ACTIVE window is about to become MERGED.
+        // Clear any state not already cleared by the onMerge calls above.
+        WindowTracing.debug("ReduceFnRunner.onMerge: Merging {} into {}", active, mergeResult);
+        ReduceFn<K, InputT, OutputT, W>.Context directClearContext =
+            contextFactory.base(active, StateStyle.DIRECT);
+        // No need for the end-of-window or garbage collection timers.
+        cancelEndOfWindowAndGarbageCollectionTimers(directClearContext);
+        // We no longer care about any previous panes of merged away windows. The
+        // merge result window gets to start fresh if it is new.
+        paneInfoTracker.clear(directClearContext.state());
       }
-      // Cleanup flavor A: Currently ACTIVE window is about to become MERGED.
-      // Clear any state not already cleared by the onMerge calls above.
-      WindowTracing.debug("ReduceFnRunner.onMerge: Merging {} into {}", active, mergeResult);
-      ReduceFn<K, InputT, OutputT, W>.Context directClearContext =
-          contextFactory.base(active, StateStyle.DIRECT);
-      // No need for the end-of-window or garbage collection timers.
-      cancelEndOfWindowAndGarbageCollectionTimers(directClearContext);
-      // We no longer care about any previous panes of merged away windows. The
-      // merge result window gets to start fresh if it is new.
-      paneInfoTracker.clear(directClearContext.state());
     }
   }
 
   private void mergeActiveWindows() {
     try {
-      activeWindows.merge(this);
+      activeWindows.merge(onMergeCallback);
     } catch (Exception e) {
       Throwables.propagateIfPossible(e);
       throw new RuntimeException("Exception while merging windows", e);

From f108866da54266bc50d6d431e180efdc5737a6ac Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 15 Feb 2016 19:27:46 -0800
Subject: [PATCH 1442/1541] Treat ReduceFn and Trigger exceptions as system
 exceptions

The ReduceFn and Trigger callback APIs are for internal use only,
so logic for catching and propagating user exceptions at the
ReduceFnRunner level is extraneous.

A ReduceFn may contain state utilizing a user's CombineFn, which can
throw only unchecked exceptions. For compatibility with existing
combine logic, these exceptions are not ever explicitly wrapped.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114717351
---
 .../sdk/util/BatchTimerInternals.java         |  10 +-
 .../dataflow/sdk/util/ReduceFnRunner.java     | 126 +++++-------------
 .../dataflow/sdk/util/ReduceFnTester.java     |  11 +-
 3 files changed, 42 insertions(+), 105 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java
index 8df54064178f5..b6a1493239bdb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java
@@ -101,7 +101,8 @@ public String toString() {
         .toString();
   }
 
-  public void advanceInputWatermark(ReduceFnRunner<?, ?, ?, ?> runner, Instant newInputWatermark) {
+  public void advanceInputWatermark(ReduceFnRunner<?, ?, ?, ?> runner, Instant newInputWatermark)
+      throws Exception {
     Preconditions.checkState(!newInputWatermark.isBefore(inputWatermarkTime),
         "Cannot move input watermark time backwards from %s to %s", inputWatermarkTime,
         newInputWatermark);
@@ -109,14 +110,16 @@ public void advanceInputWatermark(ReduceFnRunner<?, ?, ?, ?> runner, Instant new
     advance(runner, newInputWatermark, TimeDomain.EVENT_TIME);
   }
 
-  public void advanceProcessingTime(ReduceFnRunner<?, ?, ?, ?> runner, Instant newProcessingTime) {
+  public void advanceProcessingTime(ReduceFnRunner<?, ?, ?, ?> runner, Instant newProcessingTime)
+      throws Exception {
     Preconditions.checkState(!newProcessingTime.isBefore(processingTime),
         "Cannot move processing time backwards from %s to %s", processingTime, newProcessingTime);
     processingTime = newProcessingTime;
     advance(runner, newProcessingTime, TimeDomain.PROCESSING_TIME);
   }
 
-  private void advance(ReduceFnRunner<?, ?, ?, ?> runner, Instant newTime, TimeDomain domain) {
+  private void advance(ReduceFnRunner<?, ?, ?, ?> runner, Instant newTime, TimeDomain domain)
+      throws Exception {
     PriorityQueue<TimerData> timers = queue(domain);
     boolean shouldFire = false;
 
@@ -128,7 +131,6 @@ private void advance(ReduceFnRunner<?, ?, ?, ?> runner, Instant newTime, TimeDom
         // Remove before firing, so that if the trigger adds another identical
         // timer we don't remove it.
         timers.remove();
-
         runner.onTimer(timer);
       }
     } while (shouldFire);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index f60faadf2590c..4d5ba69d5130e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -35,7 +35,6 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
-import com.google.common.base.Throwables;
 import com.google.common.collect.Sets;
 
 import org.joda.time.Duration;
@@ -280,7 +279,7 @@ public void persist() {
   /**
    * Extract the windows associated with the values, and invoke merge.
    */
-  private void collectAndMergeWindows(Iterable<WindowedValue<InputT>> values) {
+  private void collectAndMergeWindows(Iterable<WindowedValue<InputT>> values) throws Exception {
     // No-op if no merging can take place
     if (windowingStrategy.getWindowFn().isNonMerging()) {
       return;
@@ -310,11 +309,7 @@ private void collectAndMergeWindows(Iterable<WindowedValue<InputT>> values) {
             // Do that now.
             ReduceFn<K, InputT, OutputT, W>.OnMergeContext premergeContext =
                 contextFactory.forPremerge(window);
-            try {
-              reduceFn.onMerge(premergeContext);
-            } catch (Exception e) {
-              throw wrapMaybeUserException(e);
-            }
+            reduceFn.onMerge(premergeContext);
             watermarkHold.onMerge(premergeContext);
             activeWindows.merged(window);
           }
@@ -352,11 +347,7 @@ public void prefetchOnMerge(
 
       // Prefetch various state.
       triggerRunner.prefetchForMerge(mergeResult, activeToBeMerged, directMergeContext.state());
-      try {
-        reduceFn.prefetchOnMerge(renamedMergeContext.state());
-      } catch (Exception e) {
-        throw wrapMaybeUserException(e);
-      }
+      reduceFn.prefetchOnMerge(renamedMergeContext.state());
       watermarkHold.prefetchOnMerge(renamedMergeContext.state());
       nonEmptyPanes.prefetchOnMerge(renamedMergeContext.state());
     }
@@ -376,11 +367,7 @@ public void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W
           contextFactory.forMerge(activeToBeMerged, mergeResult, StateStyle.RENAMED);
 
       // Run the reduceFn to perform any needed merging.
-      try {
-        reduceFn.onMerge(renamedMergeContext);
-      } catch (Exception e) {
-        throw wrapMaybeUserException(e);
-      }
+      reduceFn.onMerge(renamedMergeContext);
 
       // Merge the watermark holds.
       watermarkHold.onMerge(renamedMergeContext);
@@ -389,12 +376,7 @@ public void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W
       nonEmptyPanes.onMerge(renamedMergeContext.state());
 
       // Have the trigger merge state as needed
-      try {
-        triggerRunner.onMerge(directMergeContext);
-      } catch (Exception e) {
-        Throwables.propagateIfPossible(e);
-        throw new RuntimeException("Failed to merge the triggers", e);
-      }
+      triggerRunner.onMerge(directMergeContext);
 
       for (W active : activeToBeMerged) {
         if (active.equals(mergeResult)) {
@@ -415,13 +397,8 @@ public void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W
     }
   }
 
-  private void mergeActiveWindows() {
-    try {
-      activeWindows.merge(onMergeCallback);
-    } catch (Exception e) {
-      Throwables.propagateIfPossible(e);
-      throw new RuntimeException("Exception while merging windows", e);
-    }
+  private void mergeActiveWindows() throws Exception {
+    activeWindows.merge(onMergeCallback);
   }
 
   /**
@@ -430,7 +407,7 @@ private void mergeActiveWindows() {
    *
    * @return the set of windows in which the element was actually processed
    */
-  private Collection<W> processElement(WindowedValue<InputT> value) {
+  private Collection<W> processElement(WindowedValue<InputT> value) throws Exception {
     // Redirect element windows to the ACTIVE windows they have been merged into.
     // It is possible two of the element's windows have been merged into the same window.
     // In that case we'll process the same element for the same window twice.
@@ -484,19 +461,10 @@ private Collection<W> processElement(WindowedValue<InputT> value) {
       watermarkHold.addHolds(renamedContext);
 
       // Execute the reduceFn, which will buffer the value as appropriate
-      try {
-        reduceFn.processValue(renamedContext);
-      } catch (Exception e) {
-        throw wrapMaybeUserException(e);
-      }
+      reduceFn.processValue(renamedContext);
 
       // Run the trigger to update its state
-      try {
-        triggerRunner.processValue(directContext);
-      } catch (Exception e) {
-        Throwables.propagateIfPossible(e);
-        throw new RuntimeException("Failed to run trigger", e);
-      }
+      triggerRunner.processValue(directContext);
     }
 
     return windows;
@@ -505,7 +473,7 @@ private Collection<W> processElement(WindowedValue<InputT> value) {
   /**
    * Called when an end-of-window, garbage collection, or trigger-specific timer fires.
    */
-  public void onTimer(TimerData timer) {
+  public void onTimer(TimerData timer) throws Exception {
     // Which window is the timer for?
     Preconditions.checkArgument(timer.getNamespace() instanceof WindowNamespace,
         "Expected timer to be in WindowNamespace, but was in %s", timer.getNamespace());
@@ -560,13 +528,7 @@ public void onTimer(TimerData timer) {
 
       // Cleanup flavor B: Clear all the remaining state for this window since we'll never
       // see elements for it again.
-      try {
-        clearAllState(directContext, renamedContext, windowIsActive);
-      } catch (Exception e) {
-        Throwables.propagateIfInstanceOf(e, UserCodeException.class);
-        throw new RuntimeException(
-            "Exception while garbage collecting window " + windowNamespace.getWindow(), e);
-      }
+      clearAllState(directContext, renamedContext, windowIsActive);
     } else {
       WindowTracing.debug(
           "ReduceFnRunner.onTimer: Triggering for key:{}; window:{} at {} with "
@@ -574,12 +536,7 @@ public void onTimer(TimerData timer) {
           key, window, timer.getTimestamp(), timerInternals.currentInputWatermarkTime(),
           timerInternals.currentOutputWatermarkTime());
       if (windowIsActive) {
-        try {
-          emitIfAppropriate(directContext, renamedContext, isEndOfWindowTimer);
-        } catch (Exception e) {
-          Throwables.propagateIfPossible(e);
-          throw Throwables.propagate(e);
-        }
+        emitIfAppropriate(directContext, renamedContext, isEndOfWindowTimer);
       }
 
       if (isEndOfWindowTimer) {
@@ -611,16 +568,14 @@ public void onTimer(TimerData timer) {
    * <li>We can clear any remaining garbage collection hold.
    * </ol>
    */
-  private void clearAllState(ReduceFn<K, InputT, OutputT, W>.Context directContext,
-      ReduceFn<K, InputT, OutputT, W>.Context renamedContext, boolean windowIsActive)
-      throws Exception {
+  private void clearAllState(
+      ReduceFn<K, InputT, OutputT, W>.Context directContext,
+      ReduceFn<K, InputT, OutputT, W>.Context renamedContext,
+      boolean windowIsActive)
+          throws Exception {
     if (windowIsActive) {
       // Since window is still active the trigger has not closed.
-      try {
-        reduceFn.clearState(renamedContext);
-      } catch (Exception e) {
-        throw wrapMaybeUserException(e);
-      }
+      reduceFn.clearState(renamedContext);
       watermarkHold.clearHolds(renamedContext);
       nonEmptyPanes.clearPane(renamedContext);
       triggerRunner.clearState(directContext);
@@ -685,24 +640,15 @@ private void emitIfAppropriate(ReduceFn<K, InputT, OutputT, W>.Context directCon
     // Cleanup buffered data if appropriate
     if (shouldDiscard) {
       // Cleanup flavor C: The user does not want any buffered data to persist between panes.
-      try {
-        reduceFn.clearState(renamedContext);
-      } catch (Exception e) {
-        throw wrapMaybeUserException(e);
-      }
+      reduceFn.clearState(renamedContext);
     }
 
     if (isFinished) {
       // Cleanup flavor D: If trigger is closed we will ignore all new incoming elements.
       // Clear state not otherwise cleared by onTrigger and clearPane above.
       // Remember the trigger is, indeed, closed until the window is garbage collected.
-      try {
-        triggerRunner.clearState(directContext);
-        paneInfoTracker.clear(directContext.state());
-      } catch (Exception e) {
-        Throwables.propagateIfPossible(e);
-        throw new RuntimeException("Exception while clearing trigger state", e);
-      }
+      triggerRunner.clearState(directContext);
+      paneInfoTracker.clear(directContext.state());
       activeWindows.remove(directContext.window());
     }
   }
@@ -730,9 +676,12 @@ private boolean needToEmit(
   /**
    * Run the {@link ReduceFn#onTrigger} method and produce any necessary output.
    */
-  private void onTrigger(final ReduceFn<K, InputT, OutputT, W>.Context directContext,
-      ReduceFn<K, InputT, OutputT, W>.Context renamedContext, boolean isWatermarkTrigger,
-      boolean isFinished) {
+  private void onTrigger(
+      final ReduceFn<K, InputT, OutputT, W>.Context directContext,
+      ReduceFn<K, InputT, OutputT, W>.Context renamedContext,
+      boolean isWatermarkTrigger,
+      boolean isFinished)
+          throws Exception {
     // Collect state.
     StateContents<Instant> outputTimestampFuture =
         watermarkHold.extractAndRelease(renamedContext, isFinished);
@@ -740,11 +689,7 @@ private void onTrigger(final ReduceFn<K, InputT, OutputT, W>.Context directConte
         paneInfoTracker.getNextPaneInfo(directContext, isWatermarkTrigger, isFinished);
     StateContents<Boolean> isEmptyFuture = nonEmptyPanes.isEmpty(renamedContext);
 
-    try {
-      reduceFn.prefetchOnTrigger(directContext.state());
-    } catch (Exception e) {
-      throw wrapMaybeUserException(e);
-    }
+    reduceFn.prefetchOnTrigger(directContext.state());
     triggerRunner.prefetchOnFire(directContext.window(), directContext.state()); // Is a no-op. Why?
 
     // Calculate the pane info.
@@ -772,11 +717,7 @@ public void output(OutputT toOutput) {
                 }
               });
 
-      try {
-        reduceFn.onTrigger(renamedTriggerContext);
-      } catch (Exception e) {
-        throw wrapMaybeUserException(e);
-      }
+      reduceFn.onTrigger(renamedTriggerContext);
     }
   }
 
@@ -816,11 +757,4 @@ private void cancelEndOfWindowAndGarbageCollectionTimers(ReduceFn<?, ?, ?, W>.Co
       context.timers().deleteTimer(timer, TimeDomain.EVENT_TIME);
     }
   }
-
-  //////////////////////////////////////////////////////////////////////////////////////////////////
-
-  private RuntimeException wrapMaybeUserException(Throwable t) {
-    // Any exceptions that happen inside a non-system ReduceFn are considered user code.
-    throw UserCodeException.wrapIf(!(reduceFn instanceof SystemReduceFn), t);
-  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
index f7fe75eac433f..b23ce97efac6d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
@@ -347,7 +347,7 @@ public WindowedValue<InputT> apply(TimestampedValue<InputT> input) {
     runner.persist();
   }
 
-  public void fireTimer(W window, Instant timestamp, TimeDomain domain) {
+  public void fireTimer(W window, Instant timestamp, TimeDomain domain) throws Exception {
     ReduceFnRunner<String, InputT, OutputT, W> runner = createRunner();
     runner.onTimer(
         TimerData.of(StateNamespaces.window(windowFn.windowCoder(), window), timestamp, domain));
@@ -587,7 +587,7 @@ public String toString() {
     }
 
     public void advanceInputWatermark(
-        ReduceFnRunner<?, ?, ?, ?> runner, Instant newInputWatermark) {
+        ReduceFnRunner<?, ?, ?, ?> runner, Instant newInputWatermark) throws Exception {
       Preconditions.checkNotNull(newInputWatermark);
       Preconditions.checkState(
           inputWatermarkTime == null || !newInputWatermark.isBefore(inputWatermarkTime),
@@ -626,7 +626,7 @@ private void advanceOutputWatermark(Instant newOutputWatermark) {
     }
 
     public void advanceProcessingTime(
-        ReduceFnRunner<?, ?, ?, ?> runner, Instant newProcessingTime) {
+        ReduceFnRunner<?, ?, ?, ?> runner, Instant newProcessingTime) throws Exception {
       Preconditions.checkState(!newProcessingTime.isBefore(processingTime),
           "Cannot move processing time backwards from %s to %s", processingTime, newProcessingTime);
       WindowTracing.trace("TestTimerInternals.advanceProcessingTime: from {} to {}", processingTime,
@@ -636,7 +636,7 @@ public void advanceProcessingTime(
     }
 
     public void advanceSynchronizedProcessingTime(
-        ReduceFnRunner<?, ?, ?, ?> runner, Instant newSynchronizedProcessingTime) {
+        ReduceFnRunner<?, ?, ?, ?> runner, Instant newSynchronizedProcessingTime) throws Exception {
       Preconditions.checkState(!newSynchronizedProcessingTime.isBefore(synchronizedProcessingTime),
           "Cannot move processing time backwards from %s to %s", processingTime,
           newSynchronizedProcessingTime);
@@ -648,7 +648,8 @@ public void advanceSynchronizedProcessingTime(
     }
 
     private void advanceAndFire(
-        ReduceFnRunner<?, ?, ?, ?> runner, Instant currentTime, TimeDomain domain) {
+        ReduceFnRunner<?, ?, ?, ?> runner, Instant currentTime, TimeDomain domain)
+            throws Exception {
       PriorityQueue<TimerData> queue = queue(domain);
       boolean shouldFire = false;
 

From 841e2a2d01c761e95c7042a4497a4467f127c0cb Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 15 Feb 2016 20:34:12 -0800
Subject: [PATCH 1443/1541] Decouple NonEmptyPanes from ReduceFn somewhat

NonEmptyPanes is a useful utility class for runner implementers. It has
meaning relative to concepts in triggers and windowing without reference
to ReduceFn. This change removes extraneous references to ReduceFn contexts
from the methods of NonEmptyPanes, which only actually use state (and this
is all they should ever really do).

Some NonEmptyPanes implementations still refer to a (private) ReduceFn
instance to decide whether their state is empty.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114719638
---
 .../dataflow/sdk/util/NonEmptyPanes.java      | 29 ++++++++++---------
 .../dataflow/sdk/util/ReduceFnRunner.java     |  8 ++---
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
index 91a74a85ef05e..f31d6659a991b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
 import com.google.cloud.dataflow.sdk.util.state.MergingStateContext;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
+import com.google.cloud.dataflow.sdk.util.state.StateContext;
 import com.google.cloud.dataflow.sdk.util.state.StateMerging;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
@@ -34,7 +35,7 @@
  */
 public abstract class NonEmptyPanes<K, W extends BoundedWindow> {
 
-  public static <K, W extends BoundedWindow> NonEmptyPanes<K, W> create(
+  static <K, W extends BoundedWindow> NonEmptyPanes<K, W> create(
       WindowingStrategy<?, W> strategy, ReduceFn<K, ?, ?, W> reduceFn) {
     if (strategy.getMode() == AccumulationMode.DISCARDING_FIRED_PANES) {
       return new DiscardingModeNonEmptyPanes<>(reduceFn);
@@ -47,17 +48,17 @@ public static <K, W extends BoundedWindow> NonEmptyPanes<K, W> create(
    * Record that some content has been added to the window in {@code context}, and therefore the
    * current pane is not empty.
    */
-  public abstract void recordContent(ReduceFn<K, ?, ?, W>.Context context);
+  public abstract void recordContent(StateContext<K> context);
 
   /**
    * Record that the given pane is empty.
    */
-  public abstract void clearPane(ReduceFn<K, ?, ?, W>.Context context);
+  public abstract void clearPane(StateContext<K> state);
 
   /**
    * Return true if the current pane for the window in {@code context} is empty.
    */
-  public abstract StateContents<Boolean> isEmpty(ReduceFn<K, ?, ?, W>.Context context);
+  public abstract StateContents<Boolean> isEmpty(StateContext<K> context);
 
   /**
    * Prefetch in preparation for merging.
@@ -83,17 +84,17 @@ private DiscardingModeNonEmptyPanes(ReduceFn<K, ?, ?, W> reduceFn) {
     }
 
     @Override
-    public StateContents<Boolean> isEmpty(ReduceFn<K, ?, ?, W>.Context context) {
-      return reduceFn.isEmpty(context.state());
+    public StateContents<Boolean> isEmpty(StateContext<K> state) {
+      return reduceFn.isEmpty(state);
     }
 
     @Override
-    public void recordContent(ReduceFn<K, ?, ?, W>.Context context) {
+    public void recordContent(StateContext<K> state) {
       // Nothing to do -- the reduceFn is tracking contents
     }
 
     @Override
-    public void clearPane(ReduceFn<K, ?, ?, W>.Context context) {
+    public void clearPane(StateContext<K> state) {
       // Nothing to do -- the reduceFn is tracking contents
     }
 
@@ -120,18 +121,18 @@ private static class GeneralNonEmptyPanes<K, W extends BoundedWindow>
             "count", VarLongCoder.of(), new Sum.SumLongFn()));
 
     @Override
-    public void recordContent(ReduceFn<K, ?, ?, W>.Context context) {
-      context.state().access(PANE_ADDITIONS_TAG).add(1L);
+    public void recordContent(StateContext<K> state) {
+      state.access(PANE_ADDITIONS_TAG).add(1L);
     }
 
     @Override
-    public void clearPane(ReduceFn<K, ?, ?, W>.Context context) {
-      context.state().access(PANE_ADDITIONS_TAG).clear();
+    public void clearPane(StateContext<K> state) {
+      state.access(PANE_ADDITIONS_TAG).clear();
     }
 
     @Override
-    public StateContents<Boolean> isEmpty(ReduceFn<K, ?, ?, W>.Context context) {
-      return context.state().access(PANE_ADDITIONS_TAG).isEmpty();
+    public StateContents<Boolean> isEmpty(StateContext<K> state) {
+      return state.access(PANE_ADDITIONS_TAG).isEmpty();
     }
 
     @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index 4d5ba69d5130e..af6d185e6b913 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -445,7 +445,7 @@ private Collection<W> processElement(WindowedValue<InputT> value) throws Excepti
         continue;
       }
 
-      nonEmptyPanes.recordContent(renamedContext);
+      nonEmptyPanes.recordContent(renamedContext.state());
 
       // Make sure we've scheduled the end-of-window or garbage collection timer for this window
       // However if we have pre-merged then they will already have been scheduled.
@@ -577,7 +577,7 @@ private void clearAllState(
       // Since window is still active the trigger has not closed.
       reduceFn.clearState(renamedContext);
       watermarkHold.clearHolds(renamedContext);
-      nonEmptyPanes.clearPane(renamedContext);
+      nonEmptyPanes.clearPane(renamedContext.state());
       triggerRunner.clearState(directContext);
     } else {
       // Needed only for backwards compatibility over UPDATE.
@@ -635,7 +635,7 @@ private void emitIfAppropriate(ReduceFn<K, InputT, OutputT, W>.Context directCon
     onTrigger(directContext, renamedContext, isEndOfWindow, isFinished);
 
     // Now that we've triggered, the pane is empty.
-    nonEmptyPanes.clearPane(renamedContext);
+    nonEmptyPanes.clearPane(renamedContext.state());
 
     // Cleanup buffered data if appropriate
     if (shouldDiscard) {
@@ -687,7 +687,7 @@ private void onTrigger(
         watermarkHold.extractAndRelease(renamedContext, isFinished);
     StateContents<PaneInfo> paneFuture =
         paneInfoTracker.getNextPaneInfo(directContext, isWatermarkTrigger, isFinished);
-    StateContents<Boolean> isEmptyFuture = nonEmptyPanes.isEmpty(renamedContext);
+    StateContents<Boolean> isEmptyFuture = nonEmptyPanes.isEmpty(renamedContext.state());
 
     reduceFn.prefetchOnTrigger(directContext.state());
     triggerRunner.prefetchOnFire(directContext.window(), directContext.state()); // Is a no-op. Why?

From 857ee11b4d613f8a3dea1e4cc0d4007d2e207f24 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 15 Feb 2016 21:32:08 -0800
Subject: [PATCH 1444/1541] Decouple TriggerRunner from ReduceFn

Before this change, TriggerRunner has (at least) two major responsibilities:

1. Track state about a root trigger that neither the trigger nor its
   ExecutableTrigger wrapper track.
2. Extract the appropriate bits from the ReduceFn contexts to invoke the
   trigger.

This change removes responsibility 2, moving it into ReduceFnRunner. The
implementation of TriggerRunner may still have residual behavioral
dependencies on the implementation of ReduceFnRunner (its only client) but
these are unintended.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114721856
---
 .../dataflow/sdk/util/ReduceFnRunner.java     | 22 ++++---
 .../dataflow/sdk/util/TriggerRunner.java      | 61 +++++++++++--------
 2 files changed, 49 insertions(+), 34 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index af6d185e6b913..d817e74c7f33d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -376,7 +376,8 @@ public void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W
       nonEmptyPanes.onMerge(renamedMergeContext.state());
 
       // Have the trigger merge state as needed
-      triggerRunner.onMerge(directMergeContext);
+      triggerRunner.onMerge(
+          directMergeContext.window(), directMergeContext.timers(), directMergeContext.state());
 
       for (W active : activeToBeMerged) {
         if (active.equals(mergeResult)) {
@@ -464,7 +465,11 @@ private Collection<W> processElement(WindowedValue<InputT> value) throws Excepti
       reduceFn.processValue(renamedContext);
 
       // Run the trigger to update its state
-      triggerRunner.processValue(directContext);
+      triggerRunner.processValue(
+          directContext.window(),
+          directContext.timestamp(),
+          directContext.timers(),
+          directContext.state());
     }
 
     return windows;
@@ -578,7 +583,8 @@ private void clearAllState(
       reduceFn.clearState(renamedContext);
       watermarkHold.clearHolds(renamedContext);
       nonEmptyPanes.clearPane(renamedContext.state());
-      triggerRunner.clearState(directContext);
+      triggerRunner.clearState(
+          directContext.window(), directContext.timers(), directContext.state());
     } else {
       // Needed only for backwards compatibility over UPDATE.
       // Clear any end-of-window or garbage collection holds keyed by the current window.
@@ -595,7 +601,7 @@ private void clearAllState(
     paneInfoTracker.clear(directContext.state());
     activeWindows.remove(directContext.window());
     // We'll never need to test for the trigger being closed again.
-    triggerRunner.clearFinished(directContext);
+    triggerRunner.clearFinished(directContext.state());
   }
 
   /** Should the reduce function state be cleared? */
@@ -617,13 +623,14 @@ private boolean shouldDiscardAfterFiring(boolean isFinished) {
   private void emitIfAppropriate(ReduceFn<K, InputT, OutputT, W>.Context directContext,
       ReduceFn<K, InputT, OutputT, W>.Context renamedContext, boolean isEndOfWindow)
       throws Exception {
-    if (!triggerRunner.shouldFire(directContext)) {
+    if (!triggerRunner.shouldFire(
+        directContext.window(), directContext.timers(), directContext.state())) {
       // Ignore unless trigger is ready to fire
       return;
     }
 
     // Inform the trigger of the transition to see if it is finished
-    triggerRunner.onFire(directContext);
+    triggerRunner.onFire(directContext.window(), directContext.timers(), directContext.state());
     boolean isFinished = triggerRunner.isClosed(directContext.state());
 
     // Will be able to clear all element state after triggering?
@@ -647,7 +654,8 @@ private void emitIfAppropriate(ReduceFn<K, InputT, OutputT, W>.Context directCon
       // Cleanup flavor D: If trigger is closed we will ignore all new incoming elements.
       // Clear state not otherwise cleared by onTrigger and clearPane above.
       // Remember the trigger is, indeed, closed until the window is garbage collected.
-      triggerRunner.clearState(directContext);
+      triggerRunner.clearState(
+          directContext.window(), directContext.timers(), directContext.state());
       paneInfoTracker.clear(directContext.state());
       activeWindows.remove(directContext.window());
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
index eadacc0ff84d4..d84c02ba23a7c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
@@ -27,19 +27,25 @@
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableMap;
 
+import org.joda.time.Instant;
+
 import java.util.BitSet;
 import java.util.Collection;
 import java.util.Map;
 
 /**
- * Executes a trigger within the context provided by {@link ReduceFnRunner}.
+ * Executes a trigger while tracking which of its subtriggers are finished.
  *
- * <p>This is responsible for:
+ * <p>Specifically, the responsibilities are:
  *
  * <ul>
- * <li> Tracking the finished bits for the trigger tree, included whether the root trigger
- * has finished.
- * <li> Ensuring that the timer and state associated with each trigger node is separate.
+ *   <li>Invoking the trigger's methods via its {@link ExecutableTrigger} wrapper by
+ *       constructing the appropriate trigger contexts.</li>
+ *   <li>Tracking which of a trigger's subtriggers are finished.</li>
+ *   <li>Adjusting the trigger's subtriggers' finished states upon firing, according to the
+ *       trigger's own {@link Trigger#onFire} method.</li>
+ *   <li>Adjusting the trigger's subtriggers' finished states upon merging, according to the
+ *       trigger's own {@link Trigger#onMerge} method.</li>
  * </ul>
  *
  * @param <W> The kind of windows being processed.
@@ -102,14 +108,15 @@ public void prefetchShouldFire(W window, StateContext<?> state) {
   /**
    * Run the trigger logic to deal with a new value.
    */
-  public void processValue(ReduceFn<?, ?, ?, W>.ProcessValueContext c) throws Exception {
+  public void processValue(W window, Instant timestamp, Timers timers, StateContext<?> state)
+      throws Exception {
     // Clone so that we can detect changes and so that changes here don't pollute merging.
     FinishedTriggersBitSet finishedSet =
-        readFinishedBits(c.state().access(FINISHED_BITS_TAG)).copy();
+        readFinishedBits(state.access(FINISHED_BITS_TAG)).copy();
     Trigger<W>.OnElementContext triggerContext = contextFactory.createOnElementContext(
-        c.window(), c.timers(), c.timestamp(), rootTrigger, finishedSet);
+        window, timers, timestamp, rootTrigger, finishedSet);
     rootTrigger.invokeOnElement(triggerContext);
-    persistFinishedSet(c.state(), finishedSet);
+    persistFinishedSet(state, finishedSet);
   }
 
   public void prefetchForMerge(
@@ -126,46 +133,46 @@ public void prefetchForMerge(
   /**
    * Run the trigger merging logic as part of executing the specified merge.
    */
-  public void onMerge(ReduceFn<?, ?, ?, W>.OnMergeContext c) throws Exception {
+  public void onMerge(W window, Timers timers, MergingStateContext<?, W> state) throws Exception {
     // Clone so that we can detect changes and so that changes here don't pollute merging.
     FinishedTriggersBitSet finishedSet =
-        readFinishedBits(c.state().access(FINISHED_BITS_TAG)).copy();
+        readFinishedBits(state.access(FINISHED_BITS_TAG)).copy();
 
     // And read the finished bits in each merging window.
     ImmutableMap.Builder<W, FinishedTriggers> builder = ImmutableMap.builder();
     for (Map.Entry<W, ValueState<BitSet>> entry :
-        c.state().accessInEachMergingWindow(FINISHED_BITS_TAG).entrySet()) {
+        state.accessInEachMergingWindow(FINISHED_BITS_TAG).entrySet()) {
       // Don't need to clone these, since the trigger context doesn't allow modification
       builder.put(entry.getKey(), readFinishedBits(entry.getValue()));
     }
     ImmutableMap<W, FinishedTriggers> mergingFinishedSets = builder.build();
 
     Trigger<W>.OnMergeContext mergeContext = contextFactory.createOnMergeContext(
-        c.window(), c.timers(), rootTrigger, finishedSet, mergingFinishedSets);
+        window, timers, rootTrigger, finishedSet, mergingFinishedSets);
 
     // Run the merge from the trigger
     rootTrigger.invokeOnMerge(mergeContext);
 
-    persistFinishedSet(c.state(), finishedSet);
+    persistFinishedSet(state, finishedSet);
 
     // Clear the finished bits.
-    clearFinished(c);
+    clearFinished(state);
   }
 
-  public boolean shouldFire(ReduceFn<?, ?, ?, W>.Context c) throws Exception {
-    FinishedTriggers finishedSet = readFinishedBits(c.state().access(FINISHED_BITS_TAG)).copy();
-    Trigger<W>.TriggerContext context = contextFactory.base(c.window(), c.timers(),
+  public boolean shouldFire(W window, Timers timers, StateContext<?> state) throws Exception {
+    FinishedTriggers finishedSet = readFinishedBits(state.access(FINISHED_BITS_TAG)).copy();
+    Trigger<W>.TriggerContext context = contextFactory.base(window, timers,
         rootTrigger, finishedSet);
     return rootTrigger.invokeShouldFire(context);
   }
 
-  public void onFire(ReduceFn<?, ?, ?, W>.Context c) throws Exception {
+  public void onFire(W window, Timers timers, StateContext<?> state) throws Exception {
     FinishedTriggersBitSet finishedSet =
-        readFinishedBits(c.state().access(FINISHED_BITS_TAG)).copy();
-    Trigger<W>.TriggerContext context = contextFactory.base(c.window(), c.timers(),
+        readFinishedBits(state.access(FINISHED_BITS_TAG)).copy();
+    Trigger<W>.TriggerContext context = contextFactory.base(window, timers,
         rootTrigger, finishedSet);
     rootTrigger.invokeOnFire(context);
-    persistFinishedSet(c.state(), finishedSet);
+    persistFinishedSet(state, finishedSet);
   }
 
   private void persistFinishedSet(
@@ -187,9 +194,9 @@ private void persistFinishedSet(
   /**
    * Clear finished bits.
    */
-  public void clearFinished(ReduceFn<?, ?, ?, W>.Context c) {
+  public void clearFinished(StateContext<?> state) {
     if (isFinishedSetNeeded()) {
-      c.state().access(FINISHED_BITS_TAG).clear();
+      state.access(FINISHED_BITS_TAG).clear();
     }
   }
 
@@ -197,10 +204,10 @@ public void clearFinished(ReduceFn<?, ?, ?, W>.Context c) {
    * Clear the state used for executing triggers, but leave the finished set to indicate
    * the window is closed.
    */
-  public void clearState(ReduceFn<?, ?, ?, W>.Context c) throws Exception {
+  public void clearState(W window, Timers timers, StateContext<?> state) throws Exception {
     // Don't need to clone, because we'll be clearing the finished bits anyways.
-    FinishedTriggers finishedSet = readFinishedBits(c.state().access(FINISHED_BITS_TAG));
-    rootTrigger.invokeClear(contextFactory.base(c.window(), c.timers(), rootTrigger, finishedSet));
+    FinishedTriggers finishedSet = readFinishedBits(state.access(FINISHED_BITS_TAG));
+    rootTrigger.invokeClear(contextFactory.base(window, timers, rootTrigger, finishedSet));
   }
 
   private boolean isFinishedSetNeeded() {

From 897d7441376c3a37e7e064669db5c005487d8af2 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 16 Feb 2016 09:25:13 -0800
Subject: [PATCH 1445/1541] Support reading uncompressed files when gzip is
 expected

Some services will decompress gzipped files on the user's behalf in
particular circumstances. For example, GCS does this based on the
content encoding of the file. This change makes CompressedSource
support the resulting uncompressed data transparently.

----Release Notes----
 - CompressedSource now supports reading uncompressed files even when
   gzip is expected. This enables transparent compatibility with services
   that decompress gzipped files on the user's behalf.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114760618
---
 .../dataflow/sdk/io/CompressedSource.java     | 21 +++++++-
 .../dataflow/sdk/io/CompressedSourceTest.java | 48 +++++++++++++++++--
 2 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
index ba42fdc8d14cf..74ba05e475f21 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
@@ -20,15 +20,19 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.common.base.Preconditions;
+import com.google.common.io.ByteStreams;
+import com.google.common.primitives.Ints;
 
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
 import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
 
 import java.io.IOException;
+import java.io.PushbackInputStream;
 import java.io.Serializable;
 import java.nio.channels.Channels;
 import java.nio.channels.ReadableByteChannel;
 import java.util.NoSuchElementException;
+import java.util.zip.GZIPInputStream;
 
 /**
  * A Source that reads from compressed files. A {@code CompressedSources} wraps a delegate
@@ -99,7 +103,22 @@ public boolean matches(String fileName) {
       @Override
       public ReadableByteChannel createDecompressingChannel(ReadableByteChannel channel)
           throws IOException {
-        return Channels.newChannel(new GzipCompressorInputStream(Channels.newInputStream(channel)));
+        // Determine if the input stream is gzipped. The input stream returned from the
+        // GCS connector may already be decompressed; GCS does this based on the
+        // content-encoding property.
+        PushbackInputStream stream = new PushbackInputStream(Channels.newInputStream(channel), 2);
+        byte[] headerBytes = new byte[2];
+        int bytesRead = ByteStreams.read(
+            stream /* source */, headerBytes /* dest */, 0 /* offset */, 2 /* len */);
+        stream.unread(headerBytes, 0, bytesRead);
+        if (bytesRead >= 2) {
+          byte zero = 0x00;
+          int header = Ints.fromBytes(zero, zero, headerBytes[1], headerBytes[0]);
+          if (header == GZIPInputStream.GZIP_MAGIC) {
+            return Channels.newChannel(new GzipCompressorInputStream(stream));
+          }
+        }
+        return Channels.newChannel(stream);
       }
     },
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
index 7583ee022673c..f402e82f11d64 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.io.Files;
 import com.google.common.primitives.Bytes;
 
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
@@ -45,6 +46,7 @@
 import java.util.ArrayList;
 import java.util.List;
 import java.util.NoSuchElementException;
+import java.util.Random;
 
 import javax.annotation.Nullable;
 
@@ -131,9 +133,7 @@ public void testHeterogeneousCompression() throws Exception {
     // Every sort of compression
     File uncompressedFile = tmpFolder.newFile(baseName + ".bin");
     generated = generateInput(1000);
-    try (OutputStream outStream = new FileOutputStream(uncompressedFile)) {
-      outStream.write(generated);
-    }
+    Files.write(generated, uncompressedFile);
     expected.addAll(Bytes.asList(generated));
 
     File gzipFile = tmpFolder.newFile(baseName + ".gz");
@@ -158,6 +158,42 @@ public void testHeterogeneousCompression() throws Exception {
     p.run();
   }
 
+  /**
+   * Test reading an uncompressed file with {@link CompressionMode#GZIP}, since we must support
+   * this due to properties of services that we read from.
+   */
+  @Test
+  public void testFalseGzipStream() throws Exception {
+    byte[] input = generateInput(1000);
+    File tmpFile = tmpFolder.newFile("test.gz");
+    Files.write(input, tmpFile);
+    verifyReadContents(input, tmpFile, CompressionMode.GZIP);
+  }
+
+  /**
+   * Test reading an empty input file with gzip; it must be interpreted as uncompressed because
+   * the gzip header is two bytes.
+   */
+  @Test
+  public void testEmptyReadGzipUncompressed() throws Exception {
+    byte[] input = generateInput(0);
+    File tmpFile = tmpFolder.newFile("test.gz");
+    Files.write(input, tmpFile);
+    verifyReadContents(input, tmpFile, CompressionMode.GZIP);
+  }
+
+  /**
+   * Test reading single byte input with gzip; it must be interpreted as uncompressed because
+   * the gzip header is two bytes.
+   */
+  @Test
+  public void testOneByteReadGzipUncompressed() throws Exception {
+    byte[] input = generateInput(1);
+    File tmpFile = tmpFolder.newFile("test.gz");
+    Files.write(input, tmpFile);
+    verifyReadContents(input, tmpFile, CompressionMode.GZIP);
+  }
+
   /**
    * Test reading multiple files.
    */
@@ -190,9 +226,11 @@ public void testCompressedReadMultipleFiles() throws Exception {
    * Generate byte array of given size.
    */
   private byte[] generateInput(int size) {
+    // Arbitrary but fixed seed
+    Random random = new Random(285930);
     byte[] buff = new byte[size];
     for (int i = 0; i < size; i++) {
-      buff[i] = (byte) (i % Byte.MAX_VALUE);
+      buff[i] = (byte) (random.nextInt() % Byte.MAX_VALUE);
     }
     return buff;
   }
@@ -288,7 +326,7 @@ public Coder<Byte> getDefaultOutputCoder() {
     private static class ByteReader extends FileBasedReader<Byte> {
       ByteBuffer buff = ByteBuffer.allocate(1);
       Byte current;
-      long offset = 0;
+      long offset = -1;
       ReadableByteChannel channel;
 
       public ByteReader(ByteSource source) {

From 290d69f02332d6752b135d9fff233a08dc6f5aef Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 16 Feb 2016 10:01:35 -0800
Subject: [PATCH 1446/1541] Explicitly provide StateInternals to ReduceFnRunner

Previously, this key parameter was carried along via WindowingInternals.
This change makes the dependency explicit, eliminating the coupling
between the two. WindowingInternals is used only to output a windowed
value, while StateInternals is used extensively for all state, including
Trigger, FinishedTriggers, and ActiveWindowSet.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114764052
---
 .../GroupAlsoByWindowViaWindowSetDoFn.java    |  8 +++
 ...GroupAlsoByWindowsViaOutputBufferDoFn.java | 18 ++++-
 .../dataflow/sdk/util/ReduceFnRunner.java     | 66 ++++++++++++++-----
 .../dataflow/sdk/util/ReduceFnTester.java     | 10 ++-
 4 files changed, 80 insertions(+), 22 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java
index e9f23fb32715c..0e1c8d8e6b93a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java
@@ -21,6 +21,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner.ReduceFnExecutor;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
+import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.values.KV;
 
 /**
@@ -63,10 +64,17 @@ public void processElement(ProcessContext c) throws Exception {
 
     K key = c.element().key();
     TimerInternals timerInternals = c.windowingInternals().timerInternals();
+
+    // It is the responsibility of the user of GroupAlsoByWindowsViaWindowSet to only
+    // provide a WindowingInternals instance with the appropriate key type for StateInternals.
+    @SuppressWarnings("unchecked")
+    StateInternals<K> stateInternals = (StateInternals<K>) c.windowingInternals().stateInternals();
+
     ReduceFnRunner<K, InputT, OutputT, W> runner =
         new ReduceFnRunner<>(
             key,
             windowingStrategy,
+            stateInternals,
             timerInternals,
             c.windowingInternals(),
             droppedDueToClosedWindow,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
index de137da6653e2..6cd93815612f6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
@@ -18,6 +18,7 @@
 
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.collect.Iterables;
 
@@ -54,9 +55,20 @@ public void processElement(
     // watermark, knowing that we have all data and it is in timestamp order.
     BatchTimerInternals timerInternals = new BatchTimerInternals(Instant.now());
 
-    ReduceFnRunner<K, InputT, OutputT, W> runner = new ReduceFnRunner<>(
-        key, strategy, timerInternals, c.windowingInternals(),
-        droppedDueToClosedWindow, reduceFnFactory.create(key));
+    // It is the responsibility of the user of GroupAlsoByWindowsViaOutputBufferDoFn to only
+    // provide a WindowingInternals instance with the appropriate key type for StateInternals.
+    @SuppressWarnings("unchecked")
+    StateInternals<K> stateInternals = (StateInternals<K>) c.windowingInternals().stateInternals();
+
+    ReduceFnRunner<K, InputT, OutputT, W> runner =
+        new ReduceFnRunner<K, InputT, OutputT, W>(
+            key,
+            strategy,
+            stateInternals,
+            timerInternals,
+            c.windowingInternals(),
+            droppedDueToClosedWindow,
+            reduceFnFactory.create(key));
 
     Iterable<List<WindowedValue<InputT>>> chunks =
         Iterables.partition(c.element().getValue(), 1000);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index d817e74c7f33d..8d0cf3125a58e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -74,7 +74,9 @@
 public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
   private final WindowingStrategy<Object, W> windowingStrategy;
 
-  private final WindowingInternals<?, KV<K, OutputT>> windowingInternals;
+  private final OutputWindowedValue<KV<K, OutputT>> outputter;
+
+  private final StateInternals<K> stateInternals;
 
   private final Aggregator<Long, Long> droppedDueToClosedWindow;
 
@@ -177,13 +179,19 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
    */
   private final NonEmptyPanes<K, W> nonEmptyPanes;
 
-  public ReduceFnRunner(K key, WindowingStrategy<?, W> windowingStrategy,
-      TimerInternals timerInternals, WindowingInternals<?, KV<K, OutputT>> windowingInternals,
-      Aggregator<Long, Long> droppedDueToClosedWindow, ReduceFn<K, InputT, OutputT, W> reduceFn) {
+  public ReduceFnRunner(
+      K key,
+      WindowingStrategy<?, W> windowingStrategy,
+      StateInternals<K> stateInternals,
+      TimerInternals timerInternals,
+      WindowingInternals<?, KV<K, OutputT>> windowingInternals,
+      Aggregator<Long, Long> droppedDueToClosedWindow,
+      ReduceFn<K, InputT, OutputT, W> reduceFn) {
     this.key = key;
     this.timerInternals = timerInternals;
     this.paneInfoTracker = new PaneInfoTracker(timerInternals);
-    this.windowingInternals = windowingInternals;
+    this.stateInternals = stateInternals;
+    this.outputter = new OutputViaWindowingInternals<>(windowingInternals);
     this.droppedDueToClosedWindow = droppedDueToClosedWindow;
     this.reduceFn = reduceFn;
 
@@ -196,26 +204,21 @@ public ReduceFnRunner(K key, WindowingStrategy<?, W> windowingStrategy,
     // Note this may trigger a GetData request to load the existing window set.
     this.activeWindows = createActiveWindowSet();
 
-    // It is the user of ReduceFnRunner's responsibility to have state internals with a
-    // compatible key type. This is generally assured by graph construction time validation.
-    @SuppressWarnings("unchecked")
-    StateInternals<K> stateInternals = (StateInternals<K>) this.windowingInternals.stateInternals();
-
     this.contextFactory =
         new ReduceFnContextFactory<K, InputT, OutputT, W>(key, reduceFn, this.windowingStrategy,
             stateInternals, this.activeWindows, timerInternals);
 
     this.watermarkHold = new WatermarkHold<>(timerInternals, windowingStrategy);
-    this.triggerRunner = new TriggerRunner<>(
-        windowingStrategy.getTrigger(),
-        new TriggerContextFactory<>(
-            windowingStrategy, this.windowingInternals.stateInternals(), activeWindows));
+    this.triggerRunner =
+        new TriggerRunner<>(
+            windowingStrategy.getTrigger(),
+            new TriggerContextFactory<>(windowingStrategy, stateInternals, activeWindows));
   }
 
   private ActiveWindowSet<W> createActiveWindowSet() {
     return windowingStrategy.getWindowFn().isNonMerging()
-        ? new NonMergingActiveWindowSet<W>() : new MergingActiveWindowSet<W>(
-               windowingStrategy.getWindowFn(), windowingInternals.stateInternals());
+        ? new NonMergingActiveWindowSet<W>()
+        : new MergingActiveWindowSet<W>(windowingStrategy.getWindowFn(), stateInternals);
   }
 
   @VisibleForTesting
@@ -720,7 +723,7 @@ public void output(OutputT toOutput) {
                   paneInfoTracker.storeCurrentPaneInfo(directContext, pane);
 
                   // Output the actual value.
-                  windowingInternals.outputWindowedValue(
+                  outputter.outputWindowedValue(
                       KV.of(key, toOutput), outputTimestamp, windows, pane);
                 }
               });
@@ -765,4 +768,33 @@ private void cancelEndOfWindowAndGarbageCollectionTimers(ReduceFn<?, ?, ?, W>.Co
       context.timers().deleteTimer(timer, TimeDomain.EVENT_TIME);
     }
   }
+
+  /**
+   * An object that can output a value with all of its windowing information. This is a deliberately
+   * restricted subinterface of {@link WindowingInternals} to express how it is used here.
+   */
+  private interface OutputWindowedValue<OutputT> {
+    void outputWindowedValue(OutputT output, Instant timestamp,
+        Collection<? extends BoundedWindow> windows, PaneInfo pane);
+  }
+
+  private static class OutputViaWindowingInternals<OutputT>
+      implements OutputWindowedValue<OutputT> {
+
+    private final WindowingInternals<?, OutputT> windowingInternals;
+
+    public OutputViaWindowingInternals(WindowingInternals<?, OutputT> windowingInternals) {
+      this.windowingInternals = windowingInternals;
+    }
+
+    @Override
+    public void outputWindowedValue(
+        OutputT output,
+        Instant timestamp,
+        Collection<? extends BoundedWindow> windows,
+        PaneInfo pane) {
+      windowingInternals.outputWindowedValue(output, timestamp, windows, pane);
+    }
+
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
index b23ce97efac6d..ccc07b3c7a22b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
@@ -167,8 +167,14 @@ private ReduceFnTester(WindowingStrategy<?, W> wildcardStrategy,
   }
 
   ReduceFnRunner<String, InputT, OutputT, W> createRunner() {
-    return new ReduceFnRunner<>(KEY, objectStrategy, timerInternals, windowingInternals,
-        droppedDueToClosedWindow, reduceFn);
+    return new ReduceFnRunner<>(
+        KEY,
+        objectStrategy,
+        stateInternals,
+        timerInternals,
+        windowingInternals,
+        droppedDueToClosedWindow,
+        reduceFn);
   }
 
   public ExecutableTrigger<W> getTrigger() {

From 0467210197ef700c7af67e5df9fad03cc9cf92cb Mon Sep 17 00:00:00 2001
From: ccy <ccy@google.com>
Date: Tue, 16 Feb 2016 10:19:21 -0800
Subject: [PATCH 1447/1541] Fix typo in TimerInternals comment.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114766329
---
 .../java/com/google/cloud/dataflow/sdk/util/TimerInternals.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
index f47f73d26f712..c823ed39b1672 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
@@ -115,7 +115,7 @@ public interface TimerInternals {
    *  |              |       |       |       |
    *  |              |   D   |   C   |   B   |   A
    *  |              |       |       |       |
-   * GIWN     <=    GOWM <= LOWM <= LIWM <= GIWM
+   * GIWM     <=    GOWM <= LOWM <= LIWM <= GIWM
    * (next stage)
    * -------------------------------------------------> event time
    * </pre>

From 18d07e32c6459d53f12b850c0e52036a922686a6 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 16 Feb 2016 10:23:26 -0800
Subject: [PATCH 1448/1541] Fix propagation of streaming keys

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114766873
---
 .../com/google/cloud/dataflow/sdk/transforms/CombineTest.java   | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
index 63789a7c209ff..37ed20415fbb6 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineTest.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 import static com.google.cloud.dataflow.sdk.TestUtils.checkCombineFn;
+import static com.google.common.base.Preconditions.checkNotNull;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 
@@ -948,6 +949,7 @@ public Accumulator createAccumulator(String key) {
 
     @Override
     public Accumulator addInput(String key, Accumulator accumulator, Integer value) {
+      checkNotNull(key);
       try {
         assertThat(accumulator.value, Matchers.startsWith(key));
         return new Accumulator(accumulator.value + String.valueOf(value));

From 72521c73dfc23af589d911dd55b66d1dfffd48c6 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 16 Feb 2016 10:24:12 -0800
Subject: [PATCH 1449/1541] Do not assume channel is nonempty in TextIO gzip
 detection

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114766943
---
 .../com/google/cloud/dataflow/sdk/io/TextIO.java     | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 2e95b968a270f..bf72f6bf197f8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -33,6 +33,7 @@
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.common.base.Preconditions;
+import com.google.common.io.ByteStreams;
 import com.google.common.primitives.Ints;
 
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
@@ -721,11 +722,14 @@ public InputStream createInputStream(InputStream inputStream) throws IOException
         // GCS connector may already be decompressed, and no action is required.
         PushbackInputStream stream = new PushbackInputStream(inputStream, 2);
         byte[] headerBytes = new byte[2];
-        int bytesRead = stream.read(headerBytes);
+        int bytesRead = ByteStreams.read(
+            stream /* source */, headerBytes /* dest */, 0 /* offset */, 2 /* len */);
         stream.unread(headerBytes, 0, bytesRead);
-        int header = Ints.fromBytes((byte) 0, (byte) 0, headerBytes[1], headerBytes[0]);
-        if (header == GZIPInputStream.GZIP_MAGIC) {
-          return new GZIPInputStream(stream);
+        if (bytesRead >= 2) {
+          int header = Ints.fromBytes((byte) 0, (byte) 0, headerBytes[1], headerBytes[0]);
+          if (header == GZIPInputStream.GZIP_MAGIC) {
+            return new GZIPInputStream(stream);
+          }
         }
         return stream;
       }

From 5f5c26ff77b755996bfaae91ef0a31f7ef6c2d86 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 16 Feb 2016 11:11:47 -0800
Subject: [PATCH 1450/1541] Make WatermarkHold package-private

The class WatermarkHold is a tighly coupled class that should never be
used other than by ReduceFnRunner.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114772638
---
 .../com/google/cloud/dataflow/sdk/util/WatermarkHold.java  | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
index d379cecf48954..a79b8d32de4be 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
@@ -34,7 +34,7 @@
 import java.io.Serializable;
 
 /**
- * Implements the logic needed to hold the output watermark for a computation back
+ * Implements the logic to hold the output watermark for a computation back
  * until it has seen all the elements it needs based on the input watermark for the
  * computation.
  *
@@ -43,9 +43,12 @@
  * to the output watermark in order to prevent it progressing beyond a time within a window.
  * The hold will be 'cleared' when the associated pane is emitted.
  *
+ * <p>This class is only intended for use by {@link ReduceFnRunner}. The two evolve together and
+ * will likely break any other uses.
+ *
  * @param <W> The kind of {@link BoundedWindow} the hold is for.
  */
-public class WatermarkHold<W extends BoundedWindow> implements Serializable {
+class WatermarkHold<W extends BoundedWindow> implements Serializable {
   /**
    * Return tag for state containing the output watermark hold
    * used for elements.

From 20dd7075abaa24329f4dc8075edb3102a8726fa4 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 16 Feb 2016 12:17:00 -0800
Subject: [PATCH 1451/1541] Minor revision of ReduceFnRunner docs

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114780446
---
 .../dataflow/sdk/util/ReduceFnRunner.java     | 83 ++++++++++++-------
 1 file changed, 51 insertions(+), 32 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index 8d0cf3125a58e..a015b06c506ee 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -55,15 +55,15 @@
  * the triggering logic. The {@code ReduceFnRunner}s responsibilities are:
  *
  * <ul>
- * <li>Tracking the windows that are active (have buffered data) as elements arrive and
- * triggers are fired.
- * <li>Holding the watermark based on the timestamps of elements in a pane and releasing it
- * when the trigger fires.
- * <li>Dropping data that exceeds the maximum allowed lateness.
- * <li>Calling the appropriate callbacks on {@link ReduceFn} based on trigger execution, timer
- * firings, etc.
- * <li>Scheduling garbage collection of state associated with a specific window, and making that
- * happen when the appropriate timer fires.
+ *   <li>Tracking the windows that are active (have buffered data) as elements arrive and
+ *       triggers are fired.
+ *   <li>Holding the watermark based on the timestamps of elements in a pane and releasing it
+ *       when the trigger fires.
+ *   <li>Calling the appropriate callbacks on {@link ReduceFn} based on trigger execution, timer
+ *       firings, etc, and providing appropriate contexts to the {@link ReduceFn} for actions
+ *       such as output.
+ *   <li>Scheduling garbage collection of state associated with a specific window, and making that
+ *       happen when the appropriate timer fires.
  * </ul>
  *
  * @param <K> The type of key being processed.
@@ -72,6 +72,21 @@
  * @param <W> The type of windows this operates on.
  */
 public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
+
+  /**
+   * The {@link ReduceFnRunner} depends on most aspects of the {@link WindowingStrategy}.
+   *
+   * <ul>
+   *   <li>It runs the trigger from the {@link WindowingStrategy}.</li>
+   *   <li>It merges windows according to the {@link WindowingStrategy}.</li>
+   *   <li>It chooses how to track active windows and clear out expired windows
+   *       according to the {@link WindowingStrategy}, based on the allowed lateness and
+   *       whether windows can merge.</li>
+   *   <li>It decides whether to emit empty final panes according to whether the
+   *       {@link WindowingStrategy} requires it.<li>
+   *   <li>It uses discarding or accumulation mode according to the {@link WindowingStrategy}.</li>
+   * </ul>
+   */
   private final WindowingStrategy<Object, W> windowingStrategy;
 
   private final OutputWindowedValue<KV<K, OutputT>> outputter;
@@ -201,7 +216,8 @@ public ReduceFnRunner(
     this.windowingStrategy = objectWindowingStrategy;
 
     this.nonEmptyPanes = NonEmptyPanes.create(this.windowingStrategy, this.reduceFn);
-    // Note this may trigger a GetData request to load the existing window set.
+
+    // Note this may incur I/O to load persisted window set data.
     this.activeWindows = createActiveWindowSet();
 
     this.contextFactory =
@@ -232,21 +248,22 @@ boolean isFinished(W window) {
    *
    * <p>The general strategy is:
    * <ol>
-   * <li>Use {@link WindowedValue#getWindows} (itself determined using
-   * {@link WindowFn#assignWindows}) to determine which windows each element belongs to. Some of
-   * those windows will already have state associated with them. The rest are considered NEW.
-   * <li>Use {@link WindowFn#mergeWindows} to attempt to merge currently ACTIVE and NEW windows.
-   * Each NEW window will become either ACTIVE, MERGED, or EPHEMERAL. (See {@link ActiveWindowSet}
-   * for definitions of these terms.)
-   * <li>If at all possible, eagerly substitute EPHEMERAL windows with their ACTIVE state address
-   * windows before any state is associated with the EPHEMERAL window. In the common case that
-   * windows for new elements are merged into existing ACTIVE windows then no additional storage
-   * or merging overhead will be incurred.
-   * <li>Otherwise, keep track of the state address windows for ACTIVE windows so that their
-   * states can be merged on-demand when a pane fires.
-   * <li>Process the element for each of the windows it's windows have been merged into according
-   * to {@link ActiveWindowSet}. Processing may require running triggers, setting timers, setting
-   * holds, and invoking {@link ReduceFn#onTrigger}.
+   *   <li>Use {@link WindowedValue#getWindows} (itself determined using
+   *       {@link WindowFn#assignWindows}) to determine which windows each element belongs to. Some
+   *       of those windows will already have state associated with them. The rest are considered
+   *       NEW.
+   *   <li>Use {@link WindowFn#mergeWindows} to attempt to merge currently ACTIVE and NEW windows.
+   *       Each NEW window will become either ACTIVE, MERGED, or EPHEMERAL. (See {@link
+   *       ActiveWindowSet} for definitions of these terms.)
+   *   <li>If at all possible, eagerly substitute EPHEMERAL windows with their ACTIVE state address
+   *       windows before any state is associated with the EPHEMERAL window. In the common case that
+   *       windows for new elements are merged into existing ACTIVE windows then no additional
+   *       storage or merging overhead will be incurred.
+   *   <li>Otherwise, keep track of the state address windows for ACTIVE windows so that their
+   *       states can be merged on-demand when a pane fires.
+   *   <li>Process the element for each of the windows it's windows have been merged into according
+   *       to {@link ActiveWindowSet}. Processing may require running triggers, setting timers,
+   *       setting holds, and invoking {@link ReduceFn#onTrigger}.
    * </ol>
    */
   public void processElements(Iterable<WindowedValue<InputT>> values) throws Exception {
@@ -413,8 +430,10 @@ private void mergeActiveWindows() throws Exception {
    */
   private Collection<W> processElement(WindowedValue<InputT> value) throws Exception {
     // Redirect element windows to the ACTIVE windows they have been merged into.
-    // It is possible two of the element's windows have been merged into the same window.
-    // In that case we'll process the same element for the same window twice.
+    // The compressed representation (value, {window1, window2, ...}) actually represents
+    // distinct elements (value, window1), (value, window2), ...
+    // so if window1 and window2 merge, the resulting window will contain both copies
+    // of the value.
     Collection<W> windows = new ArrayList<>();
     for (BoundedWindow untypedWindow : value.getWindows()) {
       @SuppressWarnings("unchecked")
@@ -506,10 +525,10 @@ public void onTimer(TimerData timer) throws Exception {
           "ReduceFnRunner.onTimer: Note that timer {} is for non-ACTIVE window {}", timer, window);
     }
 
-    // If this is an end-of-window timer then we should test if an AfterWatermark trigger
-    // will fire.
-    // It's fine if the window trigger has such trigger, this flag is only used to decide
-    // if an emitted pane should be classified as ON_TIME.
+    // If this is an end-of-window timer then:
+    // 1. We need to set a GC timer
+    // 2. We need to let the PaneInfoTracker know that we are transitioning from early to late,
+    // and possibly emitting an on-time pane.
     boolean isEndOfWindowTimer =
         TimeDomain.EVENT_TIME == timer.getDomain()
         && timer.getTimestamp().equals(window.maxTimestamp());
@@ -701,7 +720,7 @@ private void onTrigger(
     StateContents<Boolean> isEmptyFuture = nonEmptyPanes.isEmpty(renamedContext.state());
 
     reduceFn.prefetchOnTrigger(directContext.state());
-    triggerRunner.prefetchOnFire(directContext.window(), directContext.state()); // Is a no-op. Why?
+    triggerRunner.prefetchOnFire(directContext.window(), directContext.state());
 
     // Calculate the pane info.
     final PaneInfo pane = paneFuture.read();

From 6e902d7ebab9d5e9e49b25305d2f16aae1e09c75 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 16 Feb 2016 13:15:00 -0800
Subject: [PATCH 1452/1541] Minor revision of ActiveWindowSet javadoc

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114786308
---
 .../dataflow/sdk/util/ActiveWindowSet.java    | 48 +++++++++-------
 .../sdk/util/MergingActiveWindowSet.java      | 57 ++++++++++---------
 2 files changed, 55 insertions(+), 50 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
index 9af11ca453f6b..69350cb3eb6ef 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
@@ -31,31 +31,35 @@
  * the windows are merged. The rest is combined lazily when the final state is actually
  * required when emitting a pane. We keep track of this using an {@link ActiveWindowSet}.
  *
- * <p>An element may belong to one or more windows. Each key may have zero or more windows
- * corresponding to elements with that key. A window can be in one of five states:
+ * <p>An {@link ActiveWindowSet} considers a window to be in one of the following states:
+ *
  * <ol>
- * <li>NEW: We have just encountered the window on an incoming element and do not yet know if
- * it should be merged into an ACTIVE window since we have not yet called
- * {@link WindowFn#mergeWindows}.
- * <li>EPHEMERAL: A NEW window has been merged into an ACTIVE window before any state has been
- * associated with that window. Thus the window is neither ACTIVE nor MERGED. These windows
- * are not persistently represented since if they reappear the merge function should again
- * redirect them to an ACTIVE window. (We could collapse EPHEMERAL into MERGED, but keeping them
- * separate cuts down on the number of windows we need to keep track of in the common case
- * of SessionWindows over in-order events.)
- * <li>ACTIVE: A NEW window has state associated with it and has not itself been merged away.
- * The window may have one or more 'state address' windows under which its non-empty state is
- * stored. The true state for an ACTIVE window must be derived by reading all of the state in its
- * state address windows.
- * <li>MERGED: An ACTIVE window has been merged into another ACTIVE window after it had state
- * associated with it. The window will thus appear as a state address window for exactly one
- * ACTIVE window.
- * <li>GARBAGE: The window has been garbage collected. No new elements (even late elements) will
- * ever be assigned to that window. These windows are not explicitly represented anywhere.
- * (Garbage collection is performed by {@link ReduceFnRunner#onTimer}).
+ *   <li><b>NEW</b>: The initial state for a window on an incoming element; we do not yet know
+ *       if it should be merged into an ACTIVE window, or whether it is already present as an
+ *       ACTIVE window, since we have not yet called
+ *       {@link WindowFn#mergeWindows}.</li>
+ *   <li><b>ACTIVE</b>: A window that has state associated with it and has not itself been merged
+ *       away. The window may have one or more <i>state address</i> windows under which its
+ *       non-empty state is stored. A state value for an ACTIVE window must be derived by reading
+ *       the state in all of its state address windows.</li>
+ *   <li><b>EPHEMERAL</b>: A NEW window that has been merged into an ACTIVE window before any state
+ *       has been associated with that window. Thus the window is neither ACTIVE nor MERGED. These
+ *       windows are not persistently represented since if they reappear the merge function should
+ *       again redirect them to an ACTIVE window. EPHEMERAL windows are an optimization for
+ *       the common case of in-order events and {@link Sessions session window} by never associating
+ *       state with windows that are created and immediately merged away.</li>
+ *   <li><b>MERGED</b>: An ACTIVE window has been merged into another ACTIVE window after it had
+ *       state associated with it. The window will thus appear as a state address window for exactly
+ *       one ACTIVE window.</li>
+ *   <li><b>EXPIRED</b>: The window has expired and may have been garbage collected. No new elements
+ *       (even late elements) will ever be assigned to that window. These windows are not explicitly
+ *       represented anywhere; it is expected that the user of {@link ActiveWindowSet} will store
+ *       no state associated with the window.</li>
  * </ol>
  *
- * <p>If no windows will ever be merged we can use the dummy implementation {@link
+ * <p>
+ *
+ * <p>If no windows will ever be merged we can use the trivial implementation {@link
  * NonMergingActiveWindowSet}. Otherwise, the actual implementation of this data structure is in
  * {@link MergingActiveWindowSet}.
  *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
index eab7786b2ecdf..d0a49dcfa8c44 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
@@ -40,38 +40,39 @@
 import javax.annotation.Nullable;
 
 /**
- * Implementation of {@link ActiveWindowSet} for use with {@link WindowFn}s that support
- * merging. In effect maintains an equivalence class of windows (where two windows which have
- * been merged are in the same class), but also manages which windows contain state which
- * must be merged when a pane is fired.
+ * An {@link ActiveWindowSet} for merging {@link WindowFn} implementations.
  *
- * <p>Note that this object must be serialized and stored when work units are committed such
- * that subsequent work units can recover the equivalence classes etc.
+ * <p>The underlying notion of {@link MergingActiveWindowSet} is that of representing equivalence
+ * classes of merged windows as a mapping from the merged "super-window" to a set of
+ * <i>state address</i> windows in which some state has been persisted. The mapping need not
+ * contain EPHEMERAL windows, because they are created and merged without any persistent state.
+ * Each window must be a state address window for at most one window, so the mapping is
+ * invertible.
  *
- * @param <W> the type of window being managed
+ * <p>The states of a non-expired window are treated as follows:
+ *
+ * <ul>
+ *   <li><b>NEW</b>: a NEW has an empty set of associated state address windows.</li>
+ *   <li><b>ACTIVE</b>: an ACTIVE window will be associated with some nonempty set of state
+ *       address windows. If the window has not merged, this will necessarily be the singleton set
+ *       containing just itself, but it is not required that an ACTIVE window be amongst its
+ *       state address windows.</li>
+ *   <li><b>MERGED</b>: a MERGED window will be in the set of associated windows for some
+ *       other window - that window is retrieved via {@link #representative} (this reverse
+ *       association is implemented in O(1) time).</li>
+ *   <li><b>EPHEMERAL</b>: EPHEMERAL windows are not persisted but are tracked transiently;
+ *       an EPHEMERAL window must be registered with this {@link ActiveWindowSet} by a call
+ *       to {@link #recordMerge} prior to any request for a {@link #representative}.</li>
+ * </ul>
+ *
+ * <p>To illustrate why an ACTIVE window need not be amongst its own state address windows,
+ * consider two active windows W1 and W2 that are merged to form W12. Further writes may be
+ * applied to either of W1 or W2, since a read of W12 implies reading both of W12 and merging
+ * their results. Hence W12 need not have state directly associated with it.
  */
 public class MergingActiveWindowSet<W extends BoundedWindow> implements ActiveWindowSet<W> {
   private final WindowFn<Object, W> windowFn;
 
-  /**
-   * A map from ACTIVE windows to their state address windows. Writes to the ACTIVE window
-   * state can be redirected to any one of the state address windows. Reads need to merge
-   * from all state address windows. If the set is empty then the window is NEW.
-   *
-   * <ul>
-   * <li>The state address windows will be empty if the window is NEW, we don't yet know what other
-   * windows it may be merged into, and the window does not yet have any state associated with it.
-   * In this way we can distinguish between MERGED and EPHEMERAL windows when merging.
-   * <li>The state address windows will contain just the window itself it it has never been merged
-   * but has state.
-   * <li>It is possible none of the state address windows correspond to the window itself. For
-   * example, two windows W1 and W2 with state may be merged to form W12. From then on additional
-   * state can be added to just W1 or W2. Thus the state address windows for W12 do not need to
-   * include W12.
-   * <li>If W1 is in the set for W2 then W1 is not a state address window of any other active
-   * window. Furthermore W1 will map to W2 in {@link #windowToActiveWindow}.
-   * </ul>
-   */
   @Nullable
   private Map<W, Set<W>> activeWindowToStateAddressWindows;
 
@@ -301,8 +302,8 @@ public void merge(MergeCallback<W> mergeCallback) throws Exception {
   }
 
   /**
-   * A {@code WindowFn.mergeWindows} call has requested {@code toBeMerged} (which must
-   * all be ACTIVE} be considered equivalent to {@code activeWindow} (which is either a
+   * A {@link WindowFn#mergeWindows} call has determined that {@code toBeMerged} (which must
+   * all be ACTIVE}) should be considered equivalent to {@code activeWindow} (which is either a
    * member of {@code toBeMerged} or is a new window). Make the corresponding change in
    * the active window set.
    */

From 7f354a1fd3b51be6dafba431ec48da1a228441cb Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Tue, 16 Feb 2016 13:24:12 -0800
Subject: [PATCH 1453/1541] Make empty flatten work in streaming

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114787305
---
 .../cloud/dataflow/sdk/runners/DataflowPipelineRunner.java     | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index f3d4825ff9c2a..126eb558c9e13 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -367,6 +367,9 @@ public <OutputT extends POutput, InputT extends PInput> OutputT apply(
       @SuppressWarnings("unchecked")
       OutputT windowed = (OutputT) applyWindow((Window.Bound<?>) transform, (PCollection<?>) input);
       return windowed;
+    } else if (Flatten.FlattenPCollectionList.class.equals(transform.getClass())
+        && ((PCollectionList<?>) input).size() == 0) {
+      return (OutputT) Pipeline.applyTransform(input, Create.of());
     } else if (overrides.containsKey(transform.getClass())) {
       // It is the responsibility of whoever constructs overrides to ensure this is type safe.
       @SuppressWarnings("unchecked")

From 0ebc99925f66ad185d87540f9f53b010b6f78376 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 16 Feb 2016 14:17:31 -0800
Subject: [PATCH 1454/1541] Minor revision of TriggerRunner javadoc

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114793024
---
 .../cloud/dataflow/sdk/util/TriggerRunner.java   | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
index d84c02ba23a7c..3629c5b7c8400 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
@@ -34,20 +34,24 @@
 import java.util.Map;
 
 /**
- * Executes a trigger while tracking which of its subtriggers are finished.
+ * Executes a trigger while managing persistence of information about which subtriggers are
+ * finished. Subtriggers include all recursive trigger expressions as well as the entire trigger.
  *
  * <p>Specifically, the responsibilities are:
  *
  * <ul>
  *   <li>Invoking the trigger's methods via its {@link ExecutableTrigger} wrapper by
  *       constructing the appropriate trigger contexts.</li>
- *   <li>Tracking which of a trigger's subtriggers are finished.</li>
- *   <li>Adjusting the trigger's subtriggers' finished states upon firing, according to the
- *       trigger's own {@link Trigger#onFire} method.</li>
- *   <li>Adjusting the trigger's subtriggers' finished states upon merging, according to the
- *       trigger's own {@link Trigger#onMerge} method.</li>
+ *   <li>Committing a record of which subtriggers are finished to persistent state.</li>
+ *   <li>Restoring the record of which subtriggers are finished from persistent state.</li>
+ *   <li>Clearing out the persisted finished set when a caller indicates
+ *       (via {#link #clearFinished}) that it is no longer needed.</li>
  * </ul>
  *
+ * <p>These responsibilities are intertwined: trigger contexts include mutable information about
+ * which subtriggers are finished. This class provides the information when building the contexts
+ * and commits the information when the method of the {@link ExecutableTrigger} returns.
+ *
  * @param <W> The kind of windows being processed.
  */
 public class TriggerRunner<W extends BoundedWindow> {

From 50721f5935b8ca06774daa2f1fdfc97b3810df09 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Tue, 16 Feb 2016 14:28:44 -0800
Subject: [PATCH 1455/1541] Improve IntraBundleParallelizationTest

Instead of tracking elapsed time, track the maximum number of
simultaneous calls to process element by using an atomic integer. This
improves reliability of the test by removing reliance on the progress of
time.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114794225
---
 .../IntraBundleParallelizationTest.java       | 93 +++++++++++++------
 1 file changed, 65 insertions(+), 28 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java
index a15c694fe97c8..76dd67e097df1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelizationTest.java
@@ -36,7 +36,6 @@
 import org.junit.runners.JUnit4;
 
 import java.util.ArrayList;
-import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 
 /**
@@ -49,7 +48,10 @@ public class IntraBundleParallelizationTest {
   private static final AtomicInteger numProcessed = new AtomicInteger();
   private static final AtomicInteger numFailures = new AtomicInteger();
   private static int concurrentElements = 0;
-  private static int maxConcurrency = 0;
+  private static int maxDownstreamConcurrency = 0;
+
+  private static final AtomicInteger maxFnConcurrency = new AtomicInteger();
+  private static final AtomicInteger currentFnConcurrency = new AtomicInteger();
 
   @Before
   public void setUp() {
@@ -57,7 +59,10 @@ public void setUp() {
     numProcessed.set(0);
     numFailures.set(0);
     concurrentElements = 0;
-    maxConcurrency = 0;
+    maxDownstreamConcurrency = 0;
+
+    maxFnConcurrency.set(0);
+    currentFnConcurrency.set(0);
   }
 
   /**
@@ -68,6 +73,7 @@ private static class DelayFn<T> extends DoFn<T, T> {
 
     @Override
     public void processElement(ProcessContext c) {
+      startConcurrentCall();
       try {
         sleepMillis(DELAY_MS);
       } catch (InterruptedException e) {
@@ -75,6 +81,7 @@ public void processElement(ProcessContext c) {
         throw new RuntimeException("Interrupted");
       }
       c.output(c.element());
+      finishConcurrentCall();
     }
   }
 
@@ -88,14 +95,19 @@ private ExceptionThrowingFn(int numSuccesses) {
 
     @Override
     public void processElement(ProcessContext c) {
-      numProcessed.incrementAndGet();
-      if (numSuccesses.decrementAndGet() >= 0) {
-        c.output(c.element());
-        return;
-      }
+      startConcurrentCall();
+      try {
+        numProcessed.incrementAndGet();
+        if (numSuccesses.decrementAndGet() >= 0) {
+          c.output(c.element());
+          return;
+        }
 
-      numFailures.incrementAndGet();
-      throw new RuntimeException("Expected failure");
+        numFailures.incrementAndGet();
+        throw new RuntimeException("Expected failure");
+      } finally {
+        finishConcurrentCall();
+      }
     }
   }
 
@@ -109,8 +121,8 @@ public void processElement(ProcessContext c) {
       // how this DoFn is called.
       synchronized (ConcurrencyMeasuringFn.class) {
         concurrentElements++;
-        if (concurrentElements > maxConcurrency) {
-          maxConcurrency = concurrentElements;
+        if (concurrentElements > maxDownstreamConcurrency) {
+          maxDownstreamConcurrency = concurrentElements;
         }
       }
 
@@ -122,23 +134,38 @@ public void processElement(ProcessContext c) {
     }
   }
 
+  private static void startConcurrentCall() {
+    int currentlyExecuting = currentFnConcurrency.incrementAndGet();
+    int maxConcurrency;
+    do {
+      maxConcurrency = maxFnConcurrency.get();
+    } while (maxConcurrency < currentlyExecuting
+        && !maxFnConcurrency.compareAndSet(maxConcurrency, currentlyExecuting));
+  }
+
+  private static void finishConcurrentCall() {
+    currentFnConcurrency.decrementAndGet();
+  }
+
+  /**
+   * Test that the DoFn is parallelized up the the Max Parallelism factor within a bundle, but not
+   * greater than that amount.
+   */
   @Test
   public void testParallelization() {
-    long minDuration = Long.MAX_VALUE;
+    int maxConcurrency = Integer.MIN_VALUE;
     // Take the minimum from multiple runs.
     for (int i = 0; i < 5; ++i) {
-      minDuration = Math.min(minDuration,
+      maxConcurrency = Math.max(maxConcurrency,
           run(2 * PARALLELISM_FACTOR, PARALLELISM_FACTOR, new DelayFn<Integer>()));
     }
 
-    // The minimum is guaranteed to be >= 2x the delay interval, since no more than half the
-    // elements can be scheduled at once.
-    assertThat(minDuration,
-        greaterThanOrEqualTo(2 * DelayFn.DELAY_MS));
-    // Also, it should take <= 8x the delay interval since we should be at least
-    // parallelizing some of the work.
-    assertThat(minDuration,
-        lessThanOrEqualTo(8 * DelayFn.DELAY_MS));
+    // We should run at least some elements in parallel on some run
+    assertThat(maxConcurrency,
+        greaterThanOrEqualTo(2));
+    // No run should execute more elements concurrency than the maximum concurrency allowed.
+    assertThat(maxConcurrency,
+        lessThanOrEqualTo(PARALLELISM_FACTOR));
   }
 
   @Test(timeout = 5000L)
@@ -186,7 +213,18 @@ public void testIntraBundleParallelizationGetName() {
         IntraBundleParallelization.of(new DelayFn<Integer>()).withMaxParallelism(1).getName());
   }
 
-  private long run(int numElements, int maxParallelism, DoFn<Integer, Integer> doFn) {
+  /**
+   * Runs the provided doFn inside of an {@link IntraBundleParallelization} transform.
+   *
+   * <p>This method assumes that the DoFn passed to it will call {@link #startConcurrentCall()}
+   * before processing each elements and {@link #finishConcurrentCall()} after each element.
+   *
+   * @param numElements the size of the input
+   * @param maxParallelism how many threads to execute in parallel
+   * @param doFn the DoFn to execute
+   * @return the maximum observed parallelism of the DoFn
+   */
+  private int run(int numElements, int maxParallelism, DoFn<Integer, Integer> doFn) {
     Pipeline pipeline = TestPipeline.create();
 
     ArrayList<Integer> data = new ArrayList<>(numElements);
@@ -200,14 +238,13 @@ private long run(int numElements, int maxParallelism, DoFn<Integer, Integer> doF
         .apply(IntraBundleParallelization.of(doFn).withMaxParallelism(maxParallelism))
         .apply(ParDo.of(downstream));
 
-    long startTime = System.nanoTime();
-
     pipeline.run();
 
+    // All elements should have completed.
+    assertEquals(0, currentFnConcurrency.get());
     // Downstream methods should not see parallel threads.
-    assertEquals(1, maxConcurrency);
+    assertEquals(1, maxDownstreamConcurrency);
 
-    long endTime = System.nanoTime();
-    return TimeUnit.MILLISECONDS.convert(endTime - startTime, TimeUnit.NANOSECONDS);
+    return maxFnConcurrency.get();
   }
 }

From c1dc715fce0b6042774d217980493d813a35b4be Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Tue, 16 Feb 2016 14:40:40 -0800
Subject: [PATCH 1456/1541] Move specialized GroupAlsoByWindow implementations

Move to the worker package. The default expansions that use the
GroupAlsoByWindowsDoFn and ReduceFnRunner use the versions left in the
sdk module; dataflow-specific replacements are moved to the worker
module.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114795550
---
 .../GroupAlsoByWindowsAndCombineDoFn.java     | 192 ------------
 .../sdk/util/GroupAlsoByWindowsDoFn.java      |  38 +--
 .../GroupAlsoByWindowsViaIteratorsDoFn.java   | 281 ------------------
 .../GroupAlsoByWindowsAndCombineDoFnTest.java |  85 ------
 .../sdk/util/GroupAlsoByWindowsDoFnTest.java  | 130 --------
 ...roupAlsoByWindowsViaIteratorsDoFnTest.java | 103 -------
 ...pAlsoByWindowsViaOutputBufferDoFnTest.java |  68 -----
 7 files changed, 5 insertions(+), 892 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFnTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFnTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
deleted file mode 100644
index c0a5cae2ed9e0..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFn.java
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-
-import org.joda.time.Instant;
-
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Comparator;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.PriorityQueue;
-
-/**
- * {@link GroupAlsoByWindowsDoFn} that uses combiner to accumulate input elements for non-merging
- * window functions with the default triggering strategy.
- *
- * @param <K> key type
- * @param <InputT> value input type
- * @param <AccumT> accumulator type
- * @param <OutputT> value output type
- * @param <W> window type
- */
-@SystemDoFnInternal
-class GroupAlsoByWindowsAndCombineDoFn<K, InputT, AccumT, OutputT, W extends BoundedWindow>
-    extends GroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
-
-  public static boolean isSupported(WindowingStrategy<?, ?> strategy) {
-    // TODO: Add support for other triggers.
-    if (!(strategy.getTrigger().getSpec() instanceof DefaultTrigger)) {
-      return false;
-    }
-
-    // Right now, we support ACCUMULATING_FIRED_PANES because it is the same as
-    // DISCARDING_FIRED_PANES. In Batch mode there is no late data so the default
-    // trigger (after watermark) will only fire once.
-    if (!(strategy.getMode().equals(AccumulationMode.DISCARDING_FIRED_PANES)
-        || strategy.getMode().equals(AccumulationMode.ACCUMULATING_FIRED_PANES))) {
-      return false;
-    }
-
-    return true;
-  }
-
-  private final KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn;
-  private WindowingStrategy<Object, W> windowingStrategy;
-
-  public GroupAlsoByWindowsAndCombineDoFn(
-      WindowingStrategy<?, W> strategy,
-      KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
-
-    checkArgument(GroupAlsoByWindowsAndCombineDoFn.isSupported(strategy),
-        "%s does not support non-default triggering, "
-        + "found in windowing strategy: %s",
-        getClass(),
-        strategy);
-    this.combineFn = combineFn;
-
-    // To make a MergeContext that is compatible with the type of windowFn, we need to remove
-    // the wildcard from the element type.
-    @SuppressWarnings("unchecked")
-    WindowingStrategy<Object, W> objectWindowingStrategy = (WindowingStrategy<Object, W>) strategy;
-    this.windowingStrategy = objectWindowingStrategy;
-  }
-
-  @Override
-  public void processElement(ProcessContext c) throws Exception {
-    final K key = c.element().getKey();
-    Iterator<WindowedValue<InputT>> iterator = c.element().getValue().iterator();
-
-    final PriorityQueue<W> liveWindows =
-        new PriorityQueue<>(11, new Comparator<BoundedWindow>() {
-          @Override
-          public int compare(BoundedWindow w1, BoundedWindow w2) {
-            return Long.signum(w1.maxTimestamp().getMillis() - w2.maxTimestamp().getMillis());
-          }
-        });
-
-    final Map<W, AccumT> accumulators = Maps.newHashMap();
-    final Map<W, Instant> accumulatorOutputTimestamps = Maps.newHashMap();
-
-    WindowFn<Object, W>.MergeContext mergeContext =
-        windowingStrategy.getWindowFn().new MergeContext() {
-      @Override
-      public Collection<W> windows() {
-        return liveWindows;
-      }
-
-      @Override
-      public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
-        List<AccumT> accumsToBeMerged = Lists.newArrayListWithCapacity(toBeMerged.size());
-        List<Instant> timestampsToBeMerged = Lists.newArrayListWithCapacity(toBeMerged.size());
-        for (W window : toBeMerged) {
-          accumsToBeMerged.add(accumulators.remove(window));
-          timestampsToBeMerged.add(accumulatorOutputTimestamps.remove(window));
-        }
-        liveWindows.removeAll(toBeMerged);
-
-
-        Instant mergedOutputTimestamp =
-            windowingStrategy.getOutputTimeFn().merge(mergeResult, timestampsToBeMerged);
-        accumulatorOutputTimestamps.put(mergeResult, mergedOutputTimestamp);
-        liveWindows.add(mergeResult);
-        accumulators.put(mergeResult, combineFn.mergeAccumulators(key, accumsToBeMerged));
-      }
-    };
-
-    while (iterator.hasNext()) {
-      WindowedValue<InputT> e = iterator.next();
-
-      @SuppressWarnings("unchecked")
-      Collection<W> windows = (Collection<W>) e.getWindows();
-      for (W window : windows) {
-        Instant outputTime =
-            windowingStrategy.getOutputTimeFn().assignOutputTime(e.getTimestamp(), window);
-        Instant accumulatorOutputTime = accumulatorOutputTimestamps.get(window);
-        if (accumulatorOutputTime == null) {
-          accumulatorOutputTimestamps.put(window, outputTime);
-        } else {
-          accumulatorOutputTimestamps.put(window,
-              windowingStrategy.getOutputTimeFn().combine(outputTime, accumulatorOutputTime));
-        }
-
-        AccumT accum = accumulators.get(window);
-        checkState((accumulatorOutputTime == null && accum == null)
-            || (accumulatorOutputTime != null && accum != null),
-            "accumulator and accumulatorOutputTime should both be null or both be non-null");
-        if (accum == null) {
-          accum = combineFn.createAccumulator(key);
-          liveWindows.add(window);
-        }
-        accum = combineFn.addInput(key, accum, e.getValue());
-        accumulators.put(window, accum);
-      }
-
-      windowingStrategy.getWindowFn().mergeWindows(mergeContext);
-
-      while (!liveWindows.isEmpty()
-          && liveWindows.peek().maxTimestamp().isBefore(e.getTimestamp())) {
-        closeWindow(key, liveWindows.poll(), accumulators, accumulatorOutputTimestamps, c);
-      }
-    }
-
-    // To have gotten here, we've either not had any elements added, or we've only run merge
-    // and then closed windows. We don't need to retry merging.
-    while (!liveWindows.isEmpty()) {
-      closeWindow(key, liveWindows.poll(), accumulators, accumulatorOutputTimestamps, c);
-    }
-  }
-
-  private void closeWindow(
-      K key, W window, Map<W, AccumT> accumulators,
-      Map<W, Instant> accumulatorOutputTimes,
-      ProcessContext context) {
-    AccumT accum = accumulators.remove(window);
-    Instant timestamp = accumulatorOutputTimes.remove(window);
-    checkState(accum != null && timestamp != null);
-    context.windowingInternals().outputWindowedValue(
-        KV.of(key, combineFn.extractOutput(key, accum)),
-        timestamp,
-        Arrays.asList(window),
-        PaneInfo.ON_TIME_AND_ONLY_FIRING);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
index f15a0c5e2ad55..175921d434971 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
@@ -16,11 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
-import static com.google.common.base.Preconditions.checkNotNull;
-
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
@@ -47,40 +44,15 @@ public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends Bound
       createAggregator(DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
 
   /**
-   * Create a {@link GroupAlsoByWindowsDoFn} without a combine function. Depending on the
-   * {@code windowFn} this will either use iterators or window sets to implement the grouping.
+   * Create the default {@link GroupAlsoByWindowsDoFn}, which uses window sets to implement the
+   * grouping.
    *
    * @param windowingStrategy The window function and trigger to use for grouping
    * @param inputCoder the input coder to use
    */
   public static <K, V, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W>
-  createForIterable(WindowingStrategy<?, W> windowingStrategy, Coder<V> inputCoder) {
-
-    return GroupAlsoByWindowsViaIteratorsDoFn.isSupported(windowingStrategy)
-        ? new GroupAlsoByWindowsViaIteratorsDoFn<K, V, W>(windowingStrategy)
-        : new GroupAlsoByWindowsViaOutputBufferDoFn<>(
-            windowingStrategy,
-            SystemReduceFn.<K, V, W>buffering(inputCoder));
-  }
-
-  /**
-   * Construct a {@link GroupAlsoByWindowsDoFn} using the {@code combineFn} if available.
-   */
-  public static <K, InputT, AccumT, OutputT, W extends BoundedWindow>
-  GroupAlsoByWindowsDoFn<K, InputT, OutputT, W>
-  create(
-      final WindowingStrategy<?, W> windowingStrategy,
-      final AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn,
-      final Coder<K> keyCoder) {
-
-    checkNotNull(combineFn);
-
-    return GroupAlsoByWindowsAndCombineDoFn.isSupported(windowingStrategy)
-        ? new GroupAlsoByWindowsAndCombineDoFn<>(
-            windowingStrategy,
-            (KeyedCombineFn<K, InputT, AccumT, OutputT>) combineFn.getFn())
-        : new GroupAlsoByWindowsViaOutputBufferDoFn<>(
-            windowingStrategy,
-            SystemReduceFn.<K, InputT, AccumT, OutputT, W>combining(keyCoder, combineFn));
+      createDefault(WindowingStrategy<?, W> windowingStrategy, Coder<V> inputCoder) {
+    return new GroupAlsoByWindowsViaOutputBufferDoFn<>(
+        windowingStrategy, SystemReduceFn.<K, V, W>buffering(inputCoder));
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
deleted file mode 100644
index 400824017cbcb..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFn.java
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.util.common.PeekingReiterator;
-import com.google.cloud.dataflow.sdk.util.common.Reiterable;
-import com.google.cloud.dataflow.sdk.util.common.Reiterator;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.base.MoreObjects;
-import com.google.common.collect.ArrayListMultimap;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.ListMultimap;
-
-import org.joda.time.Instant;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-/**
- * {@link GroupAlsoByWindowsDoFn} that uses reiterators to handle non-merging window functions with
- * the default triggering strategy.
- *
- * @param <K> key type
- * @param <V> value element type
- * @param <W> window type
- */
-@SystemDoFnInternal
-class GroupAlsoByWindowsViaIteratorsDoFn<K, V, W extends BoundedWindow>
-    extends GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W> {
-  private final WindowingStrategy<?, W> strategy;
-
-  public static boolean isSupported(WindowingStrategy<?, ?> strategy) {
-    if (!strategy.getWindowFn().isNonMerging()) {
-      return false;
-    }
-
-    // TODO: Add support for other triggers.
-    if (!(strategy.getTrigger().getSpec() instanceof DefaultTrigger)) {
-      return false;
-    }
-
-    // It must be possible to compute the output timestamp of a pane from the input timestamp
-    // of the element with the earliest input timestamp.
-    if (!strategy.getOutputTimeFn().dependsOnlyOnEarliestInputTimestamp()) {
-      return false;
-    }
-    // Right now, we support ACCUMULATING_FIRED_PANES because it is the same as
-    // DISCARDING_FIRED_PANES. In Batch mode there is no late data so the default
-    // trigger (after watermark) will only fire once.
-    if (!(strategy.getMode().equals(AccumulationMode.DISCARDING_FIRED_PANES)
-        || strategy.getMode().equals(AccumulationMode.ACCUMULATING_FIRED_PANES))) {
-      return false;
-    }
-
-    return true;
-  }
-
-  public GroupAlsoByWindowsViaIteratorsDoFn(WindowingStrategy<?, W> strategy) {
-    checkArgument(GroupAlsoByWindowsViaIteratorsDoFn.isSupported(strategy),
-        "%s does not support merging, non-default triggering, "
-        + "or any OutputTimeFn where dependsOnlyOnEarliest() is false, "
-        + "found in windowing strategy: %s",
-        getClass(),
-        strategy);
-    this.strategy = strategy;
-  }
-
-  @Override
-  public void processElement(ProcessContext c) throws Exception {
-    K key = c.element().getKey();
-    // This iterable is required to be in order of increasing timestamps
-    Iterable<WindowedValue<V>> value = c.element().getValue();
-    PeekingReiterator<WindowedValue<V>> iterator;
-
-    if (value instanceof Collection) {
-      iterator = new PeekingReiterator<>(new ListReiterator<WindowedValue<V>>(
-          new ArrayList<WindowedValue<V>>((Collection<WindowedValue<V>>) value), 0));
-    } else if (value instanceof Reiterable) {
-      iterator = new PeekingReiterator<>(((Reiterable<WindowedValue<V>>) value).iterator());
-    } else {
-      throw new IllegalArgumentException(
-          "Input to GroupAlsoByWindowsDoFn must be a Collection or Reiterable");
-    }
-
-    // This ListMultimap is a map of window maxTimestamps to the list of active
-    // windows with that maxTimestamp.
-    ListMultimap<Instant, BoundedWindow> windows = ArrayListMultimap.create();
-
-    while (iterator.hasNext()) {
-      WindowedValue<V> e = iterator.peek();
-      for (BoundedWindow window : e.getWindows()) {
-        // If this window is not already in the active set, emit a new WindowReiterable
-        // corresponding to this window, starting at this element in the input Reiterable.
-        if (!windows.containsEntry(window.maxTimestamp(), window)) {
-          // This window was produced by strategy.getWindowFn()
-          @SuppressWarnings("unchecked")
-          W typedWindow = (W) window;
-          // Iterating through the WindowReiterable may advance iterator as an optimization
-          // for as long as it detects that there are no new windows.
-          windows.put(window.maxTimestamp(), window);
-          c.windowingInternals().outputWindowedValue(
-              KV.of(key, (Iterable<V>) new WindowReiterable<V>(iterator, window)),
-              strategy.getOutputTimeFn().assignOutputTime(e.getTimestamp(), typedWindow),
-              Arrays.asList(window),
-              PaneInfo.ON_TIME_AND_ONLY_FIRING);
-        }
-      }
-      // Copy the iterator in case the next DoFn cached its version of the iterator instead
-      // of immediately iterating through it.
-      // And, only advance the iterator if the consuming operation hasn't done so.
-      iterator = iterator.copy();
-      if (iterator.hasNext() && iterator.peek() == e) {
-        iterator.next();
-      }
-
-      // Remove all windows with maxTimestamp behind the current timestamp.
-      Iterator<Instant> windowIterator = windows.keys().iterator();
-      while (windowIterator.hasNext()
-          && windowIterator.next().isBefore(e.getTimestamp())) {
-        windowIterator.remove();
-      }
-    }
-  }
-
-  /**
-   * {@link Reiterable} representing a view of all elements in a base
-   * {@link Reiterator} that are in a given window.
-   */
-  private static class WindowReiterable<V> implements Reiterable<V> {
-    private PeekingReiterator<WindowedValue<V>> baseIterator;
-    private BoundedWindow window;
-
-    public WindowReiterable(
-        PeekingReiterator<WindowedValue<V>> baseIterator, BoundedWindow window) {
-      this.baseIterator = baseIterator;
-      this.window = window;
-    }
-
-    @Override
-    public Reiterator<V> iterator() {
-      // We don't copy the baseIterator when creating the first WindowReiterator
-      // so that the WindowReiterator can advance the baseIterator.  We have to
-      // make a copy afterwards so that future calls to iterator() will start
-      // at the right spot.
-      Reiterator<V> result = new WindowReiterator<V>(baseIterator, window);
-      baseIterator = baseIterator.copy();
-      return result;
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(this)
-          .addValue(Iterables.toString(this))
-          .toString();
-    }
-  }
-
-  /**
-   * The {@link Reiterator} used by
-   * {@link com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsViaIteratorsDoFn.WindowReiterable}.
-   */
-  private static class WindowReiterator<V> implements Reiterator<V> {
-    private PeekingReiterator<WindowedValue<V>> iterator;
-    private BoundedWindow window;
-
-    public WindowReiterator(PeekingReiterator<WindowedValue<V>> iterator, BoundedWindow window) {
-      this.iterator = iterator;
-      this.window = window;
-    }
-
-    @Override
-    public Reiterator<V> copy() {
-      return new WindowReiterator<V>(iterator.copy(), window);
-    }
-
-    @Override
-    public boolean hasNext() {
-      skipToValidElement();
-      return (iterator.hasNext() && iterator.peek().getWindows().contains(window));
-    }
-
-    @Override
-    public V next() {
-      skipToValidElement();
-      WindowedValue<V> next = iterator.next();
-      if (!next.getWindows().contains(window)) {
-        throw new NoSuchElementException("No next item in window");
-      }
-      return next.getValue();
-    }
-
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-
-    /**
-     * Moves the underlying iterator forward until it either points to the next
-     * element in the correct window, or is past the end of the window.
-     */
-    private void skipToValidElement() {
-      while (iterator.hasNext()) {
-        WindowedValue<V> peek = iterator.peek();
-        if (peek.getTimestamp().isAfter(window.maxTimestamp())) {
-          // We are past the end of this window, so there can't be any more
-          // elements in this iterator.
-          break;
-        }
-        if (!(peek.getWindows().size() == 1 && peek.getWindows().contains(window))) {
-          // We have reached new windows; we need to copy the iterator so we don't
-          // keep advancing the outer loop in processElement.
-          iterator = iterator.copy();
-        }
-        if (!peek.getWindows().contains(window)) {
-          // The next element is not in the right window: skip it.
-          iterator.next();
-        } else {
-          // The next element is in the right window.
-          break;
-        }
-      }
-    }
-  }
-
-  /**
-   * {@link Reiterator} that wraps a {@link List}.
-   */
-  private static class ListReiterator<T> implements Reiterator<T> {
-    private List<T> list;
-    private int index;
-
-    public ListReiterator(List<T> list, int index) {
-      this.list = list;
-      this.index = index;
-    }
-
-    @Override
-    public T next() {
-      return list.get(index++);
-    }
-
-    @Override
-    public boolean hasNext() {
-      return index < list.size();
-    }
-
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public Reiterator<T> copy() {
-      return new ListReiterator<T>(list, index);
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFnTest.java
deleted file mode 100644
index 11df7e19d099e..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsAndCombineDoFnTest.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsProperties.GroupAlsoByWindowsDoFnFactory;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/** Unit tests for {@link GroupAlsoByWindowsAndCombineDoFn}. */
-@RunWith(JUnit4.class)
-public class GroupAlsoByWindowsAndCombineDoFnTest {
-
-  private class GABWAndCombineDoFnFactory<K, InputT, AccumT, OutputT>
-  implements GroupAlsoByWindowsDoFnFactory<K, InputT, OutputT> {
-
-    private final KeyedCombineFn<K, InputT, AccumT, OutputT> keyedCombineFn;
-
-    public GABWAndCombineDoFnFactory(
-        KeyedCombineFn<K, InputT, AccumT, OutputT> keyedCombineFn) {
-      this.keyedCombineFn = keyedCombineFn;
-    }
-
-    @Override
-    public <W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, InputT, OutputT, W>
-        forStrategy(WindowingStrategy<?, W> windowingStrategy) {
-
-      return new GroupAlsoByWindowsAndCombineDoFn<K, InputT, AccumT, OutputT, W>(
-          windowingStrategy,
-          keyedCombineFn);
-    }
-  }
-
-  @Test
-  public void testEmptyInputEmptyOutput() throws Exception {
-    GroupAlsoByWindowsProperties.emptyInputEmptyOutput(
-        new GABWAndCombineDoFnFactory<>(new Sum.SumLongFn().asKeyedFn()));
-  }
-
-  @Test
-  public void testCombinesElementsInSlidingWindows() throws Exception {
-    CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
-
-    GroupAlsoByWindowsProperties.combinesElementsInSlidingWindows(
-        new GABWAndCombineDoFnFactory<>(combineFn.<String>asKeyedFn()),
-        combineFn);
-  }
-
-  @Test
-  public void testCombinesIntoSessions() throws Exception {
-    CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
-
-    GroupAlsoByWindowsProperties.combinesElementsPerSession(
-        new GABWAndCombineDoFnFactory<>(combineFn.<String>asKeyedFn()),
-        combineFn);
-  }
-
-  @Test
-  public void testCombinesIntoSessionsWithEndOfWindowTimestamp() throws Exception {
-    CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
-
-    GroupAlsoByWindowsProperties.combinesElementsPerSessionWithEndOfWindowTimestamp(
-        new GABWAndCombineDoFnFactory<>(combineFn.<String>asKeyedFn()),
-        combineFn);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
deleted file mode 100644
index 48e4e1656b78a..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFnTest.java
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static org.hamcrest.Matchers.instanceOf;
-import static org.junit.Assert.assertThat;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
-import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
-
-import org.joda.time.Duration;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Unit tests for the static factory methods in the abstract
- * class {@link GroupAlsoByWindowsDoFn}.
- */
-@RunWith(JUnit4.class)
-public class GroupAlsoByWindowsDoFnTest {
-
-  @Test
-  public void testCreateNoncombiningNonmerging() throws Exception {
-    Coder<Long> inputCoder = VarLongCoder.of();
-    WindowingStrategy<?, IntervalWindow> windowingStrategy =
-        WindowingStrategy.of(FixedWindows.of(Duration.millis(10)));
-
-    assertThat(
-        GroupAlsoByWindowsDoFn.createForIterable(windowingStrategy, inputCoder),
-        instanceOf(GroupAlsoByWindowsViaIteratorsDoFn.class));
-  }
-
-  @Test
-  public void testCreateNoncombiningMerging() throws Exception {
-    Coder<Long> inputCoder = VarLongCoder.of();
-    WindowingStrategy<?, IntervalWindow> windowingStrategy =
-        WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)));
-
-    assertThat(
-        GroupAlsoByWindowsDoFn.createForIterable(windowingStrategy, inputCoder),
-        instanceOf(GroupAlsoByWindowsViaOutputBufferDoFn.class));
-  }
-
-  @Test
-  public void testCreateNoncombiningWithTrigger() throws Exception {
-    Coder<Long> inputCoder = VarLongCoder.of();
-    WindowingStrategy<?, IntervalWindow> windowingStrategy =
-        WindowingStrategy.of(FixedWindows.of(Duration.millis(10)))
-        .withTrigger(AfterPane.elementCountAtLeast(1));
-
-    assertThat(
-        GroupAlsoByWindowsDoFn.createForIterable(windowingStrategy, inputCoder),
-        instanceOf(GroupAlsoByWindowsViaOutputBufferDoFn.class));
-  }
-
-  @Test
-  public void testCreateCombiningNonmerging() throws Exception {
-    Coder<String> keyCoder = StringUtf8Coder.of();
-    AppliedCombineFn<String, Long, ?, Long> appliedFn = AppliedCombineFn.withInputCoder(
-        new Sum.SumLongFn().<String>asKeyedFn(),
-        new CoderRegistry(),
-        KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()));
-    WindowingStrategy<?, IntervalWindow> windowingStrategy =
-        WindowingStrategy.of(FixedWindows.of(Duration.millis(10)));
-
-    assertThat(
-        GroupAlsoByWindowsDoFn.create(windowingStrategy,
-            appliedFn,
-            keyCoder),
-        instanceOf(GroupAlsoByWindowsAndCombineDoFn.class));
-  }
-
-  @Test
-  public void testCreateCombiningMerging() throws Exception {
-    Coder<String> keyCoder = StringUtf8Coder.of();
-    AppliedCombineFn<String, Long, ?, Long> appliedFn = AppliedCombineFn.withInputCoder(
-        new Sum.SumLongFn().<String>asKeyedFn(),
-        new CoderRegistry(),
-        KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()));
-    WindowingStrategy<?, IntervalWindow> windowingStrategy =
-        WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)));
-
-    assertThat(
-        GroupAlsoByWindowsDoFn.create(windowingStrategy,
-            appliedFn,
-            keyCoder),
-        instanceOf(GroupAlsoByWindowsAndCombineDoFn.class));
-  }
-
-  @Test
-  public void testCreateCombiningWithTrigger() throws Exception {
-    Coder<String> keyCoder = StringUtf8Coder.of();
-    AppliedCombineFn<String, Long, ?, Long> appliedFn = AppliedCombineFn.withInputCoder(
-        new Sum.SumLongFn().<String>asKeyedFn(),
-        new CoderRegistry(),
-        KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()));
-    WindowingStrategy<?, IntervalWindow> windowingStrategy =
-        WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10)))
-        .withTrigger(AfterPane.elementCountAtLeast(1));
-
-    assertThat(
-        GroupAlsoByWindowsDoFn.create(windowingStrategy,
-            appliedFn,
-            keyCoder),
-        instanceOf(GroupAlsoByWindowsViaOutputBufferDoFn.class));
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFnTest.java
deleted file mode 100644
index 41341443551cc..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaIteratorsDoFnTest.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsProperties.GroupAlsoByWindowsDoFnFactory;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Unit tests for {@link GroupAlsoByWindowsViaIteratorsDoFn}.
- *
- * <p>Note the absence of tests for sessions, as merging window functions are not supported.
- */
-@RunWith(JUnit4.class)
-public class GroupAlsoByWindowsViaIteratorsDoFnTest {
-
-  @Rule
-  public final transient ExpectedException thrown = ExpectedException.none();
-
-  private class GABWViaIteratorsDoFnFactory<K, InputT>
-  implements GroupAlsoByWindowsDoFnFactory<K, InputT, Iterable<InputT>> {
-    @Override
-    public <W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, InputT, Iterable<InputT>, W>
-        forStrategy(WindowingStrategy<?, W> windowingStrategy) {
-      return new GroupAlsoByWindowsViaIteratorsDoFn<K, InputT, W>(windowingStrategy);
-    }
-  }
-
-  @Test
-  public void testEmptyInputEmptyOutput() throws Exception {
-    GroupAlsoByWindowsProperties.emptyInputEmptyOutput(
-        new GABWViaIteratorsDoFnFactory<>());
-  }
-
-  @Test
-  public void testGroupsElementsIntoFixedWindows() throws Exception {
-    GroupAlsoByWindowsProperties.groupsElementsIntoFixedWindows(
-        new GABWViaIteratorsDoFnFactory<String, String>());
-  }
-
-  @Test
-  public void testGroupsElementsIntoSlidingWindows() throws Exception {
-    GroupAlsoByWindowsProperties.groupsElementsIntoSlidingWindows(
-        new GABWViaIteratorsDoFnFactory<String, String>());
-  }
-
-  @Test
-  public void testGroupsIntoOverlappingNonmergingWindows() throws Exception {
-    GroupAlsoByWindowsProperties.groupsIntoOverlappingNonmergingWindows(
-        new GABWViaIteratorsDoFnFactory<String, String>());
-  }
-
-  @Test
-  public void testGroupsElementsIntoFixedWindowsWithEndOfWindowTimestamp() throws Exception {
-    GroupAlsoByWindowsProperties.groupsElementsIntoFixedWindowsWithEndOfWindowTimestamp(
-        new GABWViaIteratorsDoFnFactory<String, String>());
-  }
-
-  @Test
-  public void testLatestTimestampNotSupported() throws Exception {
-    thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage("OutputTimeFn");
-    thrown.expectMessage("not support");
-
-    GroupAlsoByWindowsProperties.groupsElementsIntoFixedWindowsWithLatestTimestamp(
-        new GABWViaIteratorsDoFnFactory<String, String>());
-  }
-
-  @Test
-  public void testGroupsElementsIntoFixedWindowsWithCustomTimestamp() throws Exception {
-    GroupAlsoByWindowsProperties.groupsElementsIntoFixedWindowsWithCustomTimestamp(
-        new GABWViaIteratorsDoFnFactory<String, String>());
-  }
-
-  @Test
-  public void testMergingNotSupported() throws Exception {
-    thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage("merging");
-    thrown.expectMessage("not support");
-
-    GroupAlsoByWindowsProperties.groupsElementsInMergedSessions(
-        new GABWViaIteratorsDoFnFactory<String, String>());
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFnTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFnTest.java
index 6109bf744210c..1e8458de76ca1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFnTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFnTest.java
@@ -17,12 +17,7 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsProperties.GroupAlsoByWindowsDoFnFactory;
 
@@ -54,27 +49,6 @@ public BufferingGABWViaOutputBufferDoFnFactory(Coder<InputT> inputCoder) {
     }
   }
 
-  private class CombiningGABWViaOutputBufferDoFnFactory<K, InputT, AccumT, OutputT>
-  implements GroupAlsoByWindowsDoFnFactory<K, InputT, OutputT> {
-
-    private final Coder<K> keyCoder;
-    private final AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn;
-
-    public CombiningGABWViaOutputBufferDoFnFactory(
-        Coder<K> keyCoder, AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
-      this.keyCoder = keyCoder;
-      this.combineFn = combineFn;
-    }
-
-    @Override
-    public <W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, InputT, OutputT, W>
-        forStrategy(WindowingStrategy<?, W> windowingStrategy) {
-      return new GroupAlsoByWindowsViaOutputBufferDoFn<K, InputT, OutputT, W>(
-          windowingStrategy,
-          SystemReduceFn.<K, InputT, AccumT, OutputT, W>combining(keyCoder, combineFn));
-    }
-  }
-
   @Test
   public void testEmptyInputEmptyOutput() throws Exception {
     GroupAlsoByWindowsProperties.emptyInputEmptyOutput(
@@ -93,20 +67,6 @@ public void testGroupsElementsIntoSlidingWindows() throws Exception {
         new BufferingGABWViaOutputBufferDoFnFactory<String, String>(StringUtf8Coder.of()));
   }
 
-  @Test
-  public void testCombinesElementsInSlidingWindows() throws Exception {
-    CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
-    AppliedCombineFn<String, Long, ?, Long> appliedFn = AppliedCombineFn.withInputCoder(
-        combineFn.<String>asKeyedFn(), new CoderRegistry(),
-        KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()));
-
-    GroupAlsoByWindowsProperties.combinesElementsInSlidingWindows(
-        new CombiningGABWViaOutputBufferDoFnFactory<>(
-            StringUtf8Coder.of(),
-            appliedFn),
-        combineFn);
-  }
-
   @Test
   public void testGroupsIntoOverlappingNonmergingWindows() throws Exception {
     GroupAlsoByWindowsProperties.groupsIntoOverlappingNonmergingWindows(
@@ -119,20 +79,6 @@ public void testGroupsIntoSessions() throws Exception {
         new BufferingGABWViaOutputBufferDoFnFactory<String, String>(StringUtf8Coder.of()));
   }
 
-  @Test
-  public void testCombinesIntoSessions() throws Exception {
-    CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
-    AppliedCombineFn<String, Long, ?, Long> appliedFn = AppliedCombineFn.withInputCoder(
-        combineFn.<String>asKeyedFn(), new CoderRegistry(),
-        KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()));
-
-    GroupAlsoByWindowsProperties.combinesElementsPerSession(
-        new CombiningGABWViaOutputBufferDoFnFactory<>(
-            StringUtf8Coder.of(),
-            appliedFn),
-        combineFn);
-  }
-
   @Test
   public void testGroupsElementsIntoFixedWindowsWithEndOfWindowTimestamp() throws Exception {
     GroupAlsoByWindowsProperties.groupsElementsIntoFixedWindowsWithEndOfWindowTimestamp(
@@ -162,18 +108,4 @@ public void testGroupsElementsIntoSessionsWithLatestTimestamp() throws Exception
     GroupAlsoByWindowsProperties.groupsElementsInMergedSessionsWithLatestTimestamp(
         new BufferingGABWViaOutputBufferDoFnFactory<String, String>(StringUtf8Coder.of()));
   }
-
-  @Test
-  public void testCombinesIntoSessionsWithEndOfWindowTimestamp() throws Exception {
-    CombineFn<Long, ?, Long> combineFn = new Sum.SumLongFn();
-    AppliedCombineFn<String, Long, ?, Long> appliedFn = AppliedCombineFn.withInputCoder(
-        combineFn.<String>asKeyedFn(), new CoderRegistry(),
-        KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()));
-
-    GroupAlsoByWindowsProperties.combinesElementsPerSessionWithEndOfWindowTimestamp(
-        new CombiningGABWViaOutputBufferDoFnFactory<>(
-            StringUtf8Coder.of(),
-            appliedFn),
-        combineFn);
-  }
 }

From c1a2e1f5f006e6d2d7fba7993f104472ddacedee Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Tue, 16 Feb 2016 16:31:35 -0800
Subject: [PATCH 1457/1541] Declare SideInputs on Combine.GroupedValues

This ensures the default expansion properly declares its SideInputs.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114807110
---
 .../java/com/google/cloud/dataflow/sdk/transforms/Combine.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
index 0e3ae9be48cd5..cc0347a124327 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
@@ -2189,7 +2189,7 @@ public void processElement(ProcessContext c) {
 
               c.output(KV.of(key, combineFnRunner.apply(key, c.element().getValue(), c)));
             }
-          }));
+          }).withSideInputs(sideInputs));
 
       try {
         Coder<KV<K, OutputT>> outputCoder = getDefaultOutputCoder(input);

From 75843ea37395c0c0ce0f5c42c41110ad4a06d8b8 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 17 Feb 2016 10:27:00 -0800
Subject: [PATCH 1458/1541] Update Dataflow API version to v1b3-rev19-1.21.0

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114873638
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 071b5df4ea7e5..7d0cba9e1dede 100644
--- a/pom.xml
+++ b/pom.xml
@@ -70,7 +70,7 @@
     <avro.version>1.7.7</avro.version>
     <bigquery.version>v2-rev248-1.21.0</bigquery.version>
     <bigtable.version>0.2.3</bigtable.version>
-    <dataflow.version>v1b3-rev14-1.21.0</dataflow.version>
+    <dataflow.version>v1b3-rev19-1.21.0</dataflow.version>
     <datastore.version>v1beta2-rev1-4.0.0</datastore.version>
     <google-clients.version>1.21.0</google-clients.version>
     <guava.version>19.0</guava.version>

From e30515402a38cdaee4ab23412a23b8e8563ce820 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 17 Feb 2016 10:44:57 -0800
Subject: [PATCH 1459/1541] Add InProcess Clock abstraction, nanoTime
 implementation

The Clock abstraction represents access to the current runner processing
time. The default implementation uses a nanosecond offset from a base
milliseconds to calculate the current time.

Use the clock in InMemoryWatermarkManager when calculating the
processing time.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114875746
---
 .../sdk/runners/inprocess/util/Clock.java     | 29 +++++++
 .../util/InMemoryWatermarkManager.java        | 16 +++-
 .../inprocess/util/NanosOffsetClock.java      | 58 +++++++++++++
 .../util/InMemoryWatermarkManagerTest.java    | 82 +++++++++----------
 .../sdk/runners/inprocess/util/MockClock.java | 50 +++++++++++
 5 files changed, 188 insertions(+), 47 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/Clock.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/NanosOffsetClock.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/MockClock.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/Clock.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/Clock.java
new file mode 100644
index 0000000000000..f2d57117f780d
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/Clock.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+
+import org.joda.time.Instant;
+
+/**
+ * Access to the current time.
+ */
+public interface Clock {
+  /**
+   * Returns the current time as an {@link Instant}.
+   */
+  Instant now();
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
index fb2e53686d9e8..b3a1d92695d7d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
@@ -657,6 +657,11 @@ private static Map<Object, List<TimerData>> extractFiredTimers(
 
   ////////////////////////////////////////////////////////////////////////////////////////////////
 
+  /**
+   * The {@link Clock} providing the current time in the {@link TimeDomain#PROCESSING_TIME} domain.
+   */
+  private final Clock clock;
+
   /**
    * A map from each {@link PCollection} to all {@link AppliedPTransform PTransform applications}
    * that consume that {@link PCollection}.
@@ -678,14 +683,17 @@ private static Map<Object, List<TimerData>> extractFiredTimers(
    *                  transforms that consume it as a part of their input
    */
   public static InMemoryWatermarkManager create(
+      Clock clock,
       Collection<AppliedPTransform<?, ?, ?>> rootTransforms,
       Map<PValue, Collection<AppliedPTransform<?, ?, ?>>> consumers) {
-    return new InMemoryWatermarkManager(rootTransforms, consumers);
+    return new InMemoryWatermarkManager(clock, rootTransforms, consumers);
   }
 
   private InMemoryWatermarkManager(
+      Clock clock,
       Collection<AppliedPTransform<?, ?, ?>> rootTransforms,
       Map<PValue, Collection<AppliedPTransform<?, ?, ?>>> consumers) {
+    this.clock = clock;
     this.consumers = consumers;
 
     transformToWatermarks = new HashMap<>();
@@ -1060,7 +1068,7 @@ public Instant getOutputWatermark() {
     public synchronized Instant getSynchronizedProcessingInputTime() {
       latestSynchronizedInputWm = INSTANT_ORDERING.max(
           latestSynchronizedInputWm,
-          INSTANT_ORDERING.min(Instant.now(), synchronizedProcessingInputWatermark.get()));
+          INSTANT_ORDERING.min(clock.now(), synchronizedProcessingInputWatermark.get()));
       return latestSynchronizedInputWm;
     }
 
@@ -1073,7 +1081,7 @@ public synchronized Instant getSynchronizedProcessingInputTime() {
     public synchronized Instant getSynchronizedProcessingOutputTime() {
       latestSynchronizedOutputWm = INSTANT_ORDERING.max(
           latestSynchronizedOutputWm,
-          INSTANT_ORDERING.min(Instant.now(), synchronizedProcessingOutputWatermark.get()));
+          INSTANT_ORDERING.min(clock.now(), synchronizedProcessingOutputWatermark.get()));
       return latestSynchronizedOutputWm;
     }
 
@@ -1110,7 +1118,7 @@ private Map<Object, FiredTimers> extractFiredTimers() {
             TimeDomain.PROCESSING_TIME, BoundedWindow.TIMESTAMP_MAX_VALUE);
       } else {
         processingTimers = synchronizedProcessingInputWatermark.extractFiredDomainTimers(
-            TimeDomain.PROCESSING_TIME, Instant.now());
+            TimeDomain.PROCESSING_TIME, clock.now());
         synchronizedTimers = synchronizedProcessingInputWatermark.extractFiredDomainTimers(
             TimeDomain.SYNCHRONIZED_PROCESSING_TIME, getSynchronizedProcessingInputTime());
       }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/NanosOffsetClock.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/NanosOffsetClock.java
new file mode 100644
index 0000000000000..623f07d675df0
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/NanosOffsetClock.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+
+import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+import org.joda.time.Instant;
+
+import java.util.concurrent.TimeUnit;
+
+/**
+ * A {@link Clock} that uses {@link System#nanoTime()} to track the progress of time.
+ */
+public class NanosOffsetClock implements Clock {
+  private final long baseMillis;
+  private final long nanosAtBaseMillis;
+
+  public static NanosOffsetClock create() {
+    return new NanosOffsetClock();
+  }
+
+  private NanosOffsetClock() {
+    baseMillis = System.currentTimeMillis();
+    nanosAtBaseMillis = System.nanoTime();
+  }
+
+  @Override
+  public Instant now() {
+    return new Instant(
+        baseMillis + (TimeUnit.MILLISECONDS.convert(
+            System.nanoTime() - nanosAtBaseMillis, TimeUnit.NANOSECONDS)));
+  }
+
+  /**
+   * Creates instances of {@link NanosOffsetClock}.
+   */
+  public static class Factory implements DefaultValueFactory<Clock> {
+    @Override
+    public Clock create(PipelineOptions options) {
+      return new NanosOffsetClock();
+    }
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java
index a5490475241e1..54bc8bfa54f85 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java
@@ -52,10 +52,8 @@
 import org.hamcrest.BaseMatcher;
 import org.hamcrest.Description;
 import org.hamcrest.Matcher;
-import org.joda.time.DateTimeUtils;
 import org.joda.time.Instant;
 import org.joda.time.ReadableInstant;
-import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -72,7 +70,7 @@
  */
 @RunWith(JUnit4.class)
 public class InMemoryWatermarkManagerTest implements Serializable {
-  private static final Instant INITIAL_PROCESSING_TIME = new Instant(1000L);
+  private transient MockClock clock;
 
   private transient PCollection<Integer> createdInts;
 
@@ -87,8 +85,6 @@ public class InMemoryWatermarkManagerTest implements Serializable {
 
   @Before
   public void setup() {
-    DateTimeUtils.setCurrentMillisFixed(INITIAL_PROCESSING_TIME.getMillis());
-
     TestPipeline p = TestPipeline.create();
 
     createdInts = p.apply("createdInts", Create.of(1, 2, 3));
@@ -130,12 +126,9 @@ public void processElement(DoFn<Integer, Integer>.ProcessContext c) throws Excep
             flattened.getProducingTransformInternal()));
     consumers.put(flattened, Collections.<AppliedPTransform<?, ?, ?>>emptyList());
 
-    manager = InMemoryWatermarkManager.create(rootTransforms, consumers);
-  }
+    clock = MockClock.fromInstant(new Instant(1000));
 
-  @After
-  public void tearDown() {
-    DateTimeUtils.setCurrentMillisSystem();
+    manager = InMemoryWatermarkManager.create(clock, rootTransforms, consumers);
   }
 
   /**
@@ -388,12 +381,12 @@ public void updateWatermarkWithKeyedWatermarkHolds() {
         InProcessBundle.keyed(createdInts, "Odd")
             .add(WindowedValue.timestampedValueInGlobalWindow(1, new Instant(1_000_000L)))
             .add(WindowedValue.timestampedValueInGlobalWindow(3, new Instant(-1000L)))
-            .commit(Instant.now());
+            .commit(clock.now());
 
     CommittedBundle<Integer> secondKeyBundle =
         InProcessBundle.keyed(createdInts, "Even")
             .add(WindowedValue.timestampedValueInGlobalWindow(2, new Instant(1234L)))
-            .commit(Instant.now());
+            .commit(clock.now());
 
     manager.updateOutputWatermark(createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
         ImmutableList.of(firstKeyBundle, secondKeyBundle), BoundedWindow.TIMESTAMP_MAX_VALUE);
@@ -411,7 +404,7 @@ public void updateWatermarkWithKeyedWatermarkHolds() {
     assertThat(filteredWatermarks.getOutputWatermark(), not(laterThan(new Instant(-1000L))));
 
     CommittedBundle<Integer> fauxFirstKeyTimerBundle =
-        InProcessBundle.keyed(createdInts, "Odd").commit(Instant.now());
+        InProcessBundle.keyed(createdInts, "Odd").commit(clock.now());
     manager.updateWatermarks(fauxFirstKeyTimerBundle, filtered.getProducingTransformInternal(),
         TimerUpdate.empty(), Collections.<CommittedBundle<?>>emptyList(),
         BoundedWindow.TIMESTAMP_MAX_VALUE);
@@ -419,7 +412,7 @@ public void updateWatermarkWithKeyedWatermarkHolds() {
     assertThat(filteredWatermarks.getOutputWatermark(), equalTo(new Instant(1234L)));
 
     CommittedBundle<Integer> fauxSecondKeyTimerBundle =
-        InProcessBundle.keyed(createdInts, "Even").commit(Instant.now());
+        InProcessBundle.keyed(createdInts, "Even").commit(clock.now());
     manager.updateWatermarks(fauxSecondKeyTimerBundle, filtered.getProducingTransformInternal(),
         TimerUpdate.empty(), Collections.<CommittedBundle<?>>emptyList(), new Instant(5678L));
     assertThat(filteredWatermarks.getOutputWatermark(), equalTo(new Instant(5678L)));
@@ -647,7 +640,7 @@ public void getWatermarksAfterHoldAndEmptyOutput() {
   public void getSynchronizedProcessingTimeInputWatermarksHeldToPendingBundles() {
     TransformWatermarks watermarks =
         manager.getWatermarks(createdInts.getProducingTransformInternal());
-    assertThat(watermarks.getSynchronizedProcessingInputTime(), equalTo(INITIAL_PROCESSING_TIME));
+    assertThat(watermarks.getSynchronizedProcessingInputTime(), equalTo(clock.now()));
     assertThat(
         watermarks.getSynchronizedProcessingOutputTime(),
         equalTo(BoundedWindow.TIMESTAMP_MIN_VALUE));
@@ -672,19 +665,19 @@ public void getSynchronizedProcessingTimeInputWatermarksHeldToPendingBundles() {
         BoundedWindow.TIMESTAMP_MAX_VALUE);
     TransformWatermarks createAfterUpdate =
         manager.getWatermarks(createdInts.getProducingTransformInternal());
-    assertThat(createAfterUpdate.getSynchronizedProcessingInputTime(), equalTo(Instant.now()));
-    assertThat(createAfterUpdate.getSynchronizedProcessingOutputTime(), equalTo(Instant.now()));
+    assertThat(createAfterUpdate.getSynchronizedProcessingInputTime(), equalTo(clock.now()));
+    assertThat(createAfterUpdate.getSynchronizedProcessingOutputTime(), equalTo(clock.now()));
 
     TransformWatermarks filterAfterProduced =
         manager.getWatermarks(filtered.getProducingTransformInternal());
     assertThat(
-        filterAfterProduced.getSynchronizedProcessingInputTime(), not(laterThan(Instant.now())));
+        filterAfterProduced.getSynchronizedProcessingInputTime(), not(laterThan(clock.now())));
     assertThat(
-        filterAfterProduced.getSynchronizedProcessingOutputTime(), not(laterThan(Instant.now())));
+        filterAfterProduced.getSynchronizedProcessingOutputTime(), not(laterThan(clock.now())));
 
-    DateTimeUtils.setCurrentMillisFixed(1500L);
-    assertThat(createAfterUpdate.getSynchronizedProcessingInputTime(), equalTo(Instant.now()));
-    assertThat(createAfterUpdate.getSynchronizedProcessingOutputTime(), equalTo(Instant.now()));
+    clock.set(new Instant(1500L));
+    assertThat(createAfterUpdate.getSynchronizedProcessingInputTime(), equalTo(clock.now()));
+    assertThat(createAfterUpdate.getSynchronizedProcessingOutputTime(), equalTo(clock.now()));
     assertThat(
         filterAfterProduced.getSynchronizedProcessingInputTime(),
         not(laterThan(new Instant(1250L))));
@@ -739,13 +732,14 @@ public void getSynchronizedProcessingTimeOutputHeldToPendingTimers() {
     manager.updateWatermarks(createdBundle, filtered.getProducingTransformInternal(), timers,
         Collections.<CommittedBundle<?>>singleton(filteredBundle),
         BoundedWindow.TIMESTAMP_MAX_VALUE);
-    DateTimeUtils.setCurrentMillisFixed(INITIAL_PROCESSING_TIME.plus(250L).getMillis());
+    Instant startTime = clock.now();
+    clock.set(startTime.plus(250L));
     // We're held based on the past timer
     assertThat(
-        filteredWms.getSynchronizedProcessingOutputTime(), not(laterThan(INITIAL_PROCESSING_TIME)));
+        filteredWms.getSynchronizedProcessingOutputTime(), not(laterThan(startTime)));
     assertThat(
         filteredDoubledWms.getSynchronizedProcessingOutputTime(),
-        not(laterThan(INITIAL_PROCESSING_TIME)));
+        not(laterThan(startTime)));
     // And we're monotonic
     assertThat(
         filteredWms.getSynchronizedProcessingOutputTime(), not(earlierThan(initialFilteredWm)));
@@ -762,10 +756,10 @@ public void getSynchronizedProcessingTimeOutputHeldToPendingTimers() {
         contains(pastTimer));
     // Our timer has fired, but has not been completed, so it holds our synchronized processing WM
     assertThat(
-        filteredWms.getSynchronizedProcessingOutputTime(), not(laterThan(INITIAL_PROCESSING_TIME)));
+        filteredWms.getSynchronizedProcessingOutputTime(), not(laterThan(startTime)));
     assertThat(
         filteredDoubledWms.getSynchronizedProcessingOutputTime(),
-        not(laterThan(INITIAL_PROCESSING_TIME)));
+        not(laterThan(startTime)));
 
     CommittedBundle<Integer> filteredTimerBundle =
         InProcessBundle.keyed(filtered, "key").commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
@@ -780,18 +774,19 @@ public void getSynchronizedProcessingTimeOutputHeldToPendingTimers() {
         Collections.<CommittedBundle<?>>singleton(filteredTimerResult),
         BoundedWindow.TIMESTAMP_MAX_VALUE);
 
-    DateTimeUtils.setCurrentMillisFixed(INITIAL_PROCESSING_TIME.plus(500L).getMillis());
-    assertThat(filteredWms.getSynchronizedProcessingOutputTime(), not(laterThan(Instant.now())));
+    clock.set(startTime.plus(500L));
+    assertThat(filteredWms.getSynchronizedProcessingOutputTime(), not(laterThan(clock.now())));
     // filtered should be held to the time at which the filteredTimerResult fired
     assertThat(
-        filteredDoubledWms.getSynchronizedProcessingOutputTime(), not(INITIAL_PROCESSING_TIME));
+        filteredDoubledWms.getSynchronizedProcessingOutputTime(),
+        not(earlierThan(filteredTimerResult.getSynchronizedProcessingOutputWatermark())));
 
     manager.updateWatermarks(filteredTimerResult, filteredTimesTwo.getProducingTransformInternal(),
         TimerUpdate.empty(), Collections.<CommittedBundle<?>>emptyList(),
         BoundedWindow.TIMESTAMP_MAX_VALUE);
-    assertThat(filteredDoubledWms.getSynchronizedProcessingOutputTime(), equalTo(Instant.now()));
+    assertThat(filteredDoubledWms.getSynchronizedProcessingOutputTime(), equalTo(clock.now()));
 
-    DateTimeUtils.setCurrentMillisFixed(Long.MAX_VALUE);
+    clock.set(new Instant(Long.MAX_VALUE));
     assertThat(filteredWms.getSynchronizedProcessingOutputTime(), equalTo(new Instant(4096)));
     assertThat(
         filteredDoubledWms.getSynchronizedProcessingOutputTime(), equalTo(new Instant(4096)));
@@ -803,9 +798,10 @@ public void getSynchronizedProcessingTimeOutputHeldToPendingTimers() {
    */
   @Test
   public void getSynchronizedProcessingTimeOutputTimeIsMonotonic() {
+    Instant startTime = clock.now();
     TransformWatermarks watermarks =
         manager.getWatermarks(createdInts.getProducingTransformInternal());
-    assertThat(watermarks.getSynchronizedProcessingInputTime(), equalTo(INITIAL_PROCESSING_TIME));
+    assertThat(watermarks.getSynchronizedProcessingInputTime(), equalTo(startTime));
 
     TransformWatermarks filteredWatermarks =
         manager.getWatermarks(filtered.getProducingTransformInternal());
@@ -828,9 +824,9 @@ public void getSynchronizedProcessingTimeOutputTimeIsMonotonic() {
     TransformWatermarks createAfterUpdate =
         manager.getWatermarks(createdInts.getProducingTransformInternal());
     assertThat(
-        createAfterUpdate.getSynchronizedProcessingInputTime(), not(laterThan(Instant.now())));
+        createAfterUpdate.getSynchronizedProcessingInputTime(), not(laterThan(clock.now())));
     assertThat(
-        createAfterUpdate.getSynchronizedProcessingOutputTime(), not(laterThan(Instant.now())));
+        createAfterUpdate.getSynchronizedProcessingOutputTime(), not(laterThan(clock.now())));
 
     CommittedBundle<Integer> createSecondOutput =
         InProcessBundle.unkeyed(createdInts).commit(new Instant(750L));
@@ -840,7 +836,7 @@ public void getSynchronizedProcessingTimeOutputTimeIsMonotonic() {
         Collections.<CommittedBundle<?>>singleton(createSecondOutput),
         BoundedWindow.TIMESTAMP_MAX_VALUE);
 
-    assertThat(createAfterUpdate.getSynchronizedProcessingOutputTime(), equalTo(Instant.now()));
+    assertThat(createAfterUpdate.getSynchronizedProcessingOutputTime(), equalTo(clock.now()));
   }
 
   @Test
@@ -862,9 +858,9 @@ public void synchronizedProcessingInputTimeIsHeldToUpstreamProcessingTimeTimers(
 
     TransformWatermarks downstreamWms =
         manager.getWatermarks(filteredTimesTwo.getProducingTransformInternal());
-    assertThat(downstreamWms.getSynchronizedProcessingInputTime(), equalTo(Instant.now()));
+    assertThat(downstreamWms.getSynchronizedProcessingInputTime(), equalTo(clock.now()));
 
-    DateTimeUtils.setCurrentMillisFixed(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis());
+    clock.set(BoundedWindow.TIMESTAMP_MAX_VALUE);
     assertThat(downstreamWms.getSynchronizedProcessingInputTime(), equalTo(upstreamHold));
 
     manager.extractFiredTimers();
@@ -879,7 +875,7 @@ public void synchronizedProcessingInputTimeIsHeldToUpstreamProcessingTimeTimers(
             .build(),
         Collections.<CommittedBundle<?>>emptyList(), BoundedWindow.TIMESTAMP_MAX_VALUE);
 
-    assertThat(downstreamWms.getSynchronizedProcessingInputTime(), not(earlierThan(Instant.now())));
+    assertThat(downstreamWms.getSynchronizedProcessingInputTime(), not(earlierThan(clock.now())));
   }
 
   @Test
@@ -897,9 +893,9 @@ public void synchronizedProcessingInputTimeIsHeldToPendingBundleTimes() {
 
     TransformWatermarks downstreamWms =
         manager.getWatermarks(filteredTimesTwo.getProducingTransformInternal());
-    assertThat(downstreamWms.getSynchronizedProcessingInputTime(), equalTo(Instant.now()));
+    assertThat(downstreamWms.getSynchronizedProcessingInputTime(), equalTo(clock.now()));
 
-    DateTimeUtils.setCurrentMillisFixed(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis());
+    clock.set(BoundedWindow.TIMESTAMP_MAX_VALUE);
     assertThat(downstreamWms.getSynchronizedProcessingInputTime(), equalTo(upstreamHold));
   }
 
@@ -1011,7 +1007,7 @@ public void extractFiredTimersReturnsFiredProcessingTimeTimers() {
     FiredTimers firstFired = firstFilteredTimers.get(key);
     assertThat(firstFired.getTimers(TimeDomain.PROCESSING_TIME), contains(earliestTimer));
 
-    DateTimeUtils.setCurrentMillisFixed(50_000L);
+    clock.set(new Instant(50_000L));
     manager.updateOutputWatermark(
         createdInts.getProducingTransformInternal(),
         TimerUpdate.empty(),
@@ -1069,7 +1065,7 @@ public void extractFiredTimersReturnsFiredSynchronizedProcessingTimeTimers() {
     assertThat(
         firstFired.getTimers(TimeDomain.SYNCHRONIZED_PROCESSING_TIME), contains(earliestTimer));
 
-    DateTimeUtils.setCurrentMillisFixed(50_000L);
+    clock.set(new Instant(50_000L));
     manager.updateOutputWatermark(createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
         Collections.<CommittedBundle<?>>emptyList(), new Instant(50_000L));
     Map<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> secondTransformFiredTimers =
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/MockClock.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/MockClock.java
new file mode 100644
index 0000000000000..b45238474d428
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/MockClock.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import org.joda.time.Instant;
+
+/**
+ * A clock that returns a constant value for now which can be set with calls to
+ * {@link #set(Instant)}.
+ *
+ * <p>For uses of the {@link Clock} interface in unit tests.
+ */
+public class MockClock implements Clock {
+
+  private Instant now;
+
+  public static MockClock fromInstant(Instant initial) {
+    return new MockClock(initial);
+  }
+
+  private MockClock(Instant initialNow) {
+    this.now = initialNow;
+  }
+
+  public void set(Instant newNow) {
+    checkArgument(!newNow.isBefore(now), "Cannot move MockClock backwards in time from %s to %s",
+        now, newNow);
+    this.now = newNow;
+  }
+
+  @Override
+  public Instant now() {
+    return now;
+  }
+}

From aa7f07fa5b22f3656d52dc9e1d4557bceb87c013 Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Wed, 17 Feb 2016 11:18:29 -0800
Subject: [PATCH 1460/1541] Improve TypeDescriptor inference of SimpleFunction

Add the ability to get a TypeDescriptor from an *instance* of a
context Object. This allows walking up the enclosing instances, and
accessing the associated types.

Specifically, allow the use of a concrete SimpleFunction inside a
concrete PTransform to infer types from the enclosing PTransform
instance.

Make the inferred type in MapElements and FlatMapElements transient so
we get Coder inference errors (more descriptive) rather than
Serialization errors.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114879890
---
 .../sdk/transforms/FlatMapElements.java       |  2 +-
 .../dataflow/sdk/transforms/MapElements.java  |  2 +-
 .../sdk/transforms/SimpleFunction.java        |  4 +-
 .../dataflow/sdk/values/TypeDescriptor.java   | 78 +++++++++++++++++++
 .../sdk/transforms/FlatMapElementsTest.java   | 31 ++++++++
 .../sdk/transforms/MapElementsTest.java       | 30 +++++++
 .../sdk/values/TypeDescriptorTest.java        | 74 ++++++++++++++----
 7 files changed, 202 insertions(+), 19 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElements.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElements.java
index b0837c46ff9eb..fbaad5be6d936 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElements.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElements.java
@@ -121,7 +121,7 @@ private static TypeDescriptor<?> getIterableElementType(
   //////////////////////////////////////////////////////////////////////////////////////////////////
 
   private final SerializableFunction<InputT, ? extends Iterable<OutputT>> fn;
-  private final TypeDescriptor<OutputT> outputType;
+  private final transient TypeDescriptor<OutputT> outputType;
 
   private FlatMapElements(
       SerializableFunction<InputT, ? extends Iterable<OutputT>> fn,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapElements.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapElements.java
index 6d82dc49c0235..89970508645b3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapElements.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapElements.java
@@ -91,7 +91,7 @@ public MapElements<InputT, OutputT> withOutputType(TypeDescriptor<OutputT> outpu
   ///////////////////////////////////////////////////////////////////
 
   private final SerializableFunction<InputT, OutputT> fn;
-  private final TypeDescriptor<OutputT> outputType;
+  private final transient TypeDescriptor<OutputT> outputType;
 
   private MapElements(
       SerializableFunction<InputT, OutputT> fn,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SimpleFunction.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SimpleFunction.java
index 921bba8df79d6..ef6fd81a23f62 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SimpleFunction.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SimpleFunction.java
@@ -34,7 +34,7 @@ public abstract class SimpleFunction<InputT, OutputT>
    * <p>See {@link #getOutputTypeDescriptor} for more discussion.
    */
   public TypeDescriptor<InputT> getInputTypeDescriptor() {
-    return new TypeDescriptor<InputT>(getClass()) {};
+    return new TypeDescriptor<InputT>(this) {};
   }
 
   /**
@@ -49,6 +49,6 @@ public TypeDescriptor<InputT> getInputTypeDescriptor() {
    * {@code PCollection<OutputT>}.
    */
   public TypeDescriptor<OutputT> getOutputTypeDescriptor() {
-    return new TypeDescriptor<OutputT>(getClass()) {};
+    return new TypeDescriptor<OutputT>(this) {};
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
index 86670109188b5..559d67ce05c16 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
@@ -22,11 +22,15 @@
 import com.google.common.reflect.TypeToken;
 
 import java.io.Serializable;
+import java.lang.reflect.Field;
 import java.lang.reflect.Method;
+import java.lang.reflect.ParameterizedType;
 import java.lang.reflect.Type;
 import java.lang.reflect.TypeVariable;
 import java.util.List;
 
+import javax.annotation.Nullable;
+
 /**
  * A description of a Java type, including actual generic parameters where possible.
  *
@@ -66,6 +70,80 @@ protected TypeDescriptor() {
     token = new TypeToken<T>(getClass()) {};
   }
 
+  /**
+   * Creates a {@link TypeDescriptor} representing the type parameter {@code T}, which should
+   * resolve to a concrete type in the context of the class {@code clazz}.
+   *
+   * <p>Unlike {@link TypeDescriptor#TypeDescriptor(Class)} this will also use context's of the
+   * enclosing instances while attempting to resolve the type. This means that the types of any
+   * classes instantiated in the concrete instance should be resolvable.
+   */
+  protected TypeDescriptor(Object instance) {
+    TypeToken<?> unresolvedToken = new TypeToken<T>(getClass()) {};
+
+    // While we haven't fully resolved the parameters, refine it using the captured
+    // enclosing instance of the object.
+    unresolvedToken = TypeToken.of(instance.getClass()).resolveType(unresolvedToken.getType());
+
+    if (hasUnresolvedParameters(unresolvedToken.getType())) {
+      for (Field field : instance.getClass().getDeclaredFields()) {
+        Object fieldInstance = getEnclosingInstance(field, instance);
+        if (fieldInstance != null) {
+          unresolvedToken =
+              TypeToken.of(fieldInstance.getClass()).resolveType(unresolvedToken.getType());
+          if (!hasUnresolvedParameters(unresolvedToken.getType())) {
+            break;
+          }
+        }
+      }
+    }
+
+    // Once we've either fully resolved the parameters or exhausted enclosing instances, we have
+    // the best approximation to the token we can get.
+    @SuppressWarnings("unchecked")
+    TypeToken<T> typedToken = (TypeToken<T>) unresolvedToken;
+    token = typedToken;
+  }
+
+  private boolean hasUnresolvedParameters(Type type) {
+    if (type instanceof TypeVariable) {
+      return true;
+    } else if (type instanceof ParameterizedType) {
+      ParameterizedType param = (ParameterizedType) type;
+      for (Type arg : param.getActualTypeArguments()) {
+        if (hasUnresolvedParameters(arg)) {
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Returns the enclosing instance if the field is synthetic and it is able to access it, or
+   * {@literal null} if not.
+   */
+  @Nullable
+  private Object getEnclosingInstance(Field field, Object instance) {
+    if (!field.isSynthetic()) {
+      return null;
+    }
+
+    boolean accessible = field.isAccessible();
+    try {
+      field.setAccessible(true);
+      return field.get(instance);
+    } catch (IllegalArgumentException | IllegalAccessException e) {
+      // If we fail to get the enclosing instance field, do nothing. In the worst case, we won't
+      // refine the type based on information in this enclosing class -- that is consistent with
+      // previous behavior and is still a correct answer that can be fixed by returning the correct
+      // type descriptor.
+      return null;
+    } finally {
+      field.setAccessible(accessible);
+    }
+  }
+
   /**
    * Creates a {@link TypeDescriptor} representing the type parameter
    * {@code T}, which should resolve to a concrete type in the context
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsTest.java
index 6acefb050e490..8938b0398fad1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsTest.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 import com.google.common.collect.ImmutableList;
@@ -34,6 +35,7 @@
 import org.junit.runners.JUnit4;
 
 import java.io.Serializable;
+import java.util.Collections;
 import java.util.List;
 import java.util.Set;
 
@@ -89,5 +91,34 @@ public Set<String> apply(String input) {
         equalTo((TypeDescriptor<String>) new TypeDescriptor<String>() {}));
     assertThat(pipeline.getCoderRegistry().getDefaultCoder(output.getTypeDescriptor()),
         equalTo(pipeline.getCoderRegistry().getDefaultCoder(new TypeDescriptor<String>() {})));
+
+    // Make sure the pipeline runs
+    pipeline.run();
+  }
+
+  @Test
+  public void testVoidValues() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+    pipeline
+        .apply(Create.of("hello"))
+        .apply(WithKeys.<String, String>of("k"))
+        .apply(new VoidValues<String, String>() {});
+    // Make sure the pipeline runs
+    pipeline.run();
+  }
+
+  static class VoidValues<K, V>
+      extends PTransform<PCollection<KV<K, V>>, PCollection<KV<K, Void>>> {
+
+    @Override
+    public PCollection<KV<K, Void>> apply(PCollection<KV<K, V>> input) {
+      return input.apply(FlatMapElements.<KV<K, V>, KV<K, Void>>via(
+          new SimpleFunction<KV<K, V>, Iterable<KV<K, Void>>>() {
+            @Override
+            public Iterable<KV<K, Void>> apply(KV<K, V> input) {
+              return Collections.singletonList(KV.<K, Void>of(input.getKey(), null));
+            }
+          }));
+    }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapElementsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapElementsTest.java
index 99607a26b787c..be3e720646dcc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapElementsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapElementsTest.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
@@ -100,5 +101,34 @@ public String apply(String input) {
         equalTo((TypeDescriptor<String>) new TypeDescriptor<String>() {}));
     assertThat(pipeline.getCoderRegistry().getDefaultCoder(output.getTypeDescriptor()),
         equalTo(pipeline.getCoderRegistry().getDefaultCoder(new TypeDescriptor<String>() {})));
+
+    // Make sure the pipelien runs too
+    pipeline.run();
+  }
+
+  @Test
+  public void testVoidValues() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+    pipeline
+        .apply(Create.of("hello"))
+        .apply(WithKeys.<String, String>of("k"))
+        .apply(new VoidValues<String, String>() {});
+    // Make sure the pipeline runs
+    pipeline.run();
+  }
+
+  static class VoidValues<K, V>
+      extends PTransform<PCollection<KV<K, V>>, PCollection<KV<K, Void>>> {
+
+    @Override
+    public PCollection<KV<K, Void>> apply(PCollection<KV<K, V>> input) {
+      return input.apply(MapElements.<KV<K, V>, KV<K, Void>>via(
+          new SimpleFunction<KV<K, V>, KV<K, Void>>() {
+            @Override
+            public KV<K, Void> apply(KV<K, V> input) {
+              return KV.of(input.getKey(), null);
+            }
+          }));
+    }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypeDescriptorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypeDescriptorTest.java
index 5d4591beea951..a811a7cf6533b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypeDescriptorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypeDescriptorTest.java
@@ -61,22 +61,26 @@ public void testTypeDescriptorGeneric() throws Exception {
   }
 
   private static class TypeRememberer<T> {
-    public TypeToken<T> token;
-    public TypeDescriptor<T> descriptor;
+    public final TypeDescriptor<T> descriptorByClass;
+    public final TypeDescriptor<T> descriptorByInstance;
 
     public TypeRememberer() {
-      token = new TypeToken<T>(getClass()){};
-      descriptor = new TypeDescriptor<T>(getClass()){};
+      descriptorByClass = new TypeDescriptor<T>(getClass()){};
+      descriptorByInstance = new TypeDescriptor<T>(this) {};
     }
   }
 
   @Test
   public void testTypeDescriptorNested() throws Exception {
     TypeRememberer<String> rememberer = new TypeRememberer<String>(){};
-    assertEquals(rememberer.token.getType(), rememberer.descriptor.getType());
+    assertEquals(new TypeToken<String>() {}.getType(), rememberer.descriptorByClass.getType());
+    assertEquals(new TypeToken<String>() {}.getType(), rememberer.descriptorByInstance.getType());
 
     TypeRememberer<List<String>> genericRememberer = new TypeRememberer<List<String>>(){};
-    assertEquals(genericRememberer.token.getType(), genericRememberer.descriptor.getType());
+    assertEquals(new TypeToken<List<String>>() {}.getType(),
+        genericRememberer.descriptorByClass.getType());
+    assertEquals(new TypeToken<List<String>>() {}.getType(),
+        genericRememberer.descriptorByInstance.getType());
   }
 
   private static class Id<T> {
@@ -104,15 +108,10 @@ public void testGetArgumentTypes() throws Exception {
   }
 
   private static class TypeRemembererer<T1, T2> {
-    public TypeToken<T1> token1;
-    public TypeToken<T2> token2;
-
     public TypeDescriptor<T1> descriptor1;
     public TypeDescriptor<T2> descriptor2;
 
     public TypeRemembererer() {
-      token1 = new TypeToken<T1>(getClass()){};
-      token2 = new TypeToken<T2>(getClass()){};
       descriptor1 = new TypeDescriptor<T1>(getClass()){};
       descriptor2 = new TypeDescriptor<T2>(getClass()){};
     }
@@ -121,13 +120,15 @@ public TypeRemembererer() {
   @Test
   public void testTypeDescriptorNested2() throws Exception {
     TypeRemembererer<String, Integer> remembererer = new TypeRemembererer<String, Integer>(){};
-    assertEquals(remembererer.token1.getType(), remembererer.descriptor1.getType());
-    assertEquals(remembererer.token2.getType(), remembererer.descriptor2.getType());
+    assertEquals(new TypeToken<String>() {}.getType(), remembererer.descriptor1.getType());
+    assertEquals(new TypeToken<Integer>() {}.getType(), remembererer.descriptor2.getType());
 
     TypeRemembererer<List<String>, Set<Integer>> genericRemembererer =
         new TypeRemembererer<List<String>, Set<Integer>>(){};
-    assertEquals(genericRemembererer.token1.getType(), genericRemembererer.descriptor1.getType());
-    assertEquals(genericRemembererer.token2.getType(), genericRemembererer.descriptor2.getType());
+    assertEquals(new TypeToken<List<String>>() {}.getType(),
+        genericRemembererer.descriptor1.getType());
+    assertEquals(new TypeToken<Set<Integer>>() {}.getType(),
+        genericRemembererer.descriptor2.getType());
   }
 
   private static class GenericClass<BizzleT> { }
@@ -146,4 +147,47 @@ public void testGetTypeParameterBad() throws Exception {
     thrown.expectMessage("MerpleT"); // just check that the message gives actionable details
     TypeDescriptor.of(GenericClass.class).getTypeParameter("MerpleT");
   }
+
+  private static class GenericMaker<T> {
+    public TypeRememberer<List<T>> getRememberer() {
+      return new TypeRememberer<List<T>>() {};
+    }
+  }
+
+  private static class GenericMaker2<T> {
+    public GenericMaker<Set<T>> getGenericMaker() {
+      return new GenericMaker<Set<T>>() {};
+    }
+  }
+
+  @Test
+  public void testEnclosing() throws Exception {
+    TypeRememberer<List<String>> rememberer = new GenericMaker<String>(){}.getRememberer();
+    assertEquals(
+        new TypeToken<List<String>>() {}.getType(), rememberer.descriptorByInstance.getType());
+
+    // descriptorByClass *is not* able to find the type of T because it comes from the enclosing
+    // instance of GenericMaker.
+    // assertEquals(new TypeToken<List<T>>() {}.getType(), rememberer.descriptorByClass.getType());
+  }
+
+  @Test
+  public void testEnclosing2() throws Exception {
+    // If we don't override, the best we can get is List<Set<T>>
+    // TypeRememberer<List<Set<String>>> rememberer =
+    //    new GenericMaker2<String>(){}.getGenericMaker().getRememberer();
+    // assertNotEquals(
+    //    new TypeToken<List<Set<String>>>() {}.getType(),
+    //    rememberer.descriptorByInstance.getType());
+
+    // If we've overridden the getGenericMaker we can determine the types.
+    TypeRememberer<List<Set<String>>> rememberer = new GenericMaker2<String>() {
+      @Override public GenericMaker<Set<String>> getGenericMaker() {
+        return new GenericMaker<Set<String>>() {};
+      }
+    }.getGenericMaker().getRememberer();
+    assertEquals(
+        new TypeToken<List<Set<String>>>() {}.getType(),
+        rememberer.descriptorByInstance.getType());
+  }
 }

From 3f24ca3316d01c2b756c3f3900a926489fdb04e3 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Wed, 17 Feb 2016 11:35:42 -0800
Subject: [PATCH 1461/1541] Fix AfterSynchronizedProcessingTime.java

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114881925
---
 .../transforms/windowing/AfterSynchronizedProcessingTime.java  | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
index a811a9b2734ab..0a274c9ce08ea 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
@@ -32,8 +32,7 @@ class AfterSynchronizedProcessingTime<W extends BoundedWindow>
   @Override
   @Nullable
   public Instant getCurrentTime(Trigger<W>.TriggerContext context) {
-    // TODO: plumb synchronized processing time
-    return context.currentProcessingTime();
+    return context.currentSynchronizedProcessingTime();
   }
 
   public AfterSynchronizedProcessingTime() {

From d30c981b21f17520322f9ea121df62153aef13c0 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 17 Feb 2016 12:14:44 -0800
Subject: [PATCH 1462/1541] Remove debug statement from test

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114886024
---
 .../java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java  | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index aa45050e5034b..16b4640962280 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -828,7 +828,6 @@ public void processElement(ProcessContext c) {
                     assertEquals((int) c.element(), c.sideInput(view).size());
                     assertEquals((int) c.element(), c.sideInput(view).entrySet().size());
                     for (Entry<String, Integer> entry : c.sideInput(view).entrySet()) {
-                      System.out.println("LCWIKL: " + entry);
                       c.output(KV.of(entry.getKey(), entry.getValue()));
                     }
                   }

From 54999c58c88981d17674dac4f64b950fabc7d19a Mon Sep 17 00:00:00 2001
From: markshields <markshields@google.com>
Date: Wed, 17 Feb 2016 12:28:40 -0800
Subject: [PATCH 1463/1541] Make sure every hold is accompanied by a proximate
 timer

We now maintain the invariant that every hold
inside(outside) a window is accompanied by a
timer inside(outside) the window.

Consider this situation:
 - Input watermark beyond end-of-window.
 - Any AfterWatermark trigger already fired.
 - Output watermark still inside window.
 - Element added which is behind input watermark but
   ahead of output watermark.

Previously we would set a hold inside the window
which would not be cleared until the window was
garbage collected - possibly a very long time in the
future.

We now set at most a garbage collection hold.

To make the invariant checkable even with merging
windows we always create end-of-window or garbage
collection times in ReduceFnRunner.processElement.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114887337
---
 .../transforms/windowing/AfterWatermark.java  |   5 +-
 .../dataflow/sdk/util/PaneInfoTracker.java    |  16 +--
 .../dataflow/sdk/util/ReduceFnRunner.java     | 100 ++++++++-----
 .../dataflow/sdk/util/WatermarkHold.java      | 136 +++++++++++-------
 .../dataflow/sdk/util/ReduceFnRunnerTest.java |  66 +++++++--
 .../dataflow/sdk/util/ReduceFnTester.java     |  63 +++++++-
 6 files changed, 275 insertions(+), 111 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
index 1330337926a5f..da16db99c619d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
@@ -331,8 +331,9 @@ public AfterWatermarkLate<W> withLateFirings(OnceTrigger<W> lateFirings) {
 
     @Override
     public void onElement(OnElementContext c) throws Exception {
-      // NOTE: The ReduceFnRunner will have already set an end-of-window timer, so this is
-      // redundant. However we leave it here so that the trigger machinery can stand on its own.
+      // We're interested in knowing when the input watermark passes the end of the window.
+      // (It is possible this has already happened, in which case the timer will be fired
+      // almost immediately).
       c.setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
index e055b5828e944..f0fa9936b0e02 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
@@ -54,13 +54,13 @@ public void clear(StateContext<?> state) {
    * Return a (future for) the pane info appropriate for {@code context}. The pane info
    * includes the timing for the pane, who's calculation is quite subtle.
    *
-   * @param isWatermarkTrigger should be {@code true} only if the pane is being emitted
-   * because a {@link AfterWatermark#pastEndOfWindow} trigger has fired.
+   * @param isEndOfWindow should be {@code true} only if the pane is being emitted
+   * because an end-of-window timer has fired and the trigger agreed we should fire.
    * @param isFinal should be {@code true} only if the triggering machinery can guarantee
    * no further firings for the
    */
   public StateContents<PaneInfo> getNextPaneInfo(ReduceFn<?, ?, ?, ?>.Context context,
-      final boolean isWatermarkTrigger, final boolean isFinal) {
+      final boolean isEndOfWindow, final boolean isFinal) {
     final Object key = context.key();
     final StateContents<PaneInfo> previousPaneFuture =
         context.state().access(PaneInfoTracker.PANE_INFO_TAG).get();
@@ -70,7 +70,7 @@ public StateContents<PaneInfo> getNextPaneInfo(ReduceFn<?, ?, ?, ?>.Context cont
       @Override
       public PaneInfo read() {
         PaneInfo previousPane = previousPaneFuture.read();
-        return describePane(key, windowMaxTimestamp, previousPane, isWatermarkTrigger, isFinal);
+        return describePane(key, windowMaxTimestamp, previousPane, isEndOfWindow, isFinal);
       }
     };
   }
@@ -80,7 +80,7 @@ public void storeCurrentPaneInfo(ReduceFn<?, ?, ?, ?>.Context context, PaneInfo
   }
 
   private <W> PaneInfo describePane(Object key, Instant windowMaxTimestamp, PaneInfo previousPane,
-      boolean isWatermarkTrigger, boolean isFinal) {
+      boolean isEndOfWindow, boolean isFinal) {
     boolean isFirst = previousPane == null;
     Timing previousTiming = isFirst ? null : previousPane.getTiming();
     long index = isFirst ? 0 : previousPane.getIndex() + 1;
@@ -104,7 +104,7 @@ private <W> PaneInfo describePane(Object key, Instant windowMaxTimestamp, PaneIn
       // emitted a non-EARLY pane. Irrespective of how this pane was triggered we must
       // consider this pane LATE.
       timing = Timing.LATE;
-    } else if (isWatermarkTrigger) {
+    } else if (isEndOfWindow) {
       // This is the unique ON_TIME firing for the window.
       timing = Timing.ON_TIME;
     } else {
@@ -115,8 +115,8 @@ private <W> PaneInfo describePane(Object key, Instant windowMaxTimestamp, PaneIn
 
     WindowTracing.debug(
         "describePane: {} pane (prev was {}) for key:{}; windowMaxTimestamp:{}; "
-        + "inputWatermark:{}; outputWatermark:{}; isWatermarkTrigger:{}; isLateForOutput:{}",
-        timing, previousTiming, key, windowMaxTimestamp, inputWM, outputWM, isWatermarkTrigger,
+        + "inputWatermark:{}; outputWatermark:{}; isEndOfWindow:{}; isLateForOutput:{}",
+        timing, previousTiming, key, windowMaxTimestamp, inputWM, outputWM, isEndOfWindow,
         isLateForOutput);
 
     if (previousPane != null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index a015b06c506ee..d550a2395c4e0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -18,6 +18,7 @@
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
@@ -35,7 +36,6 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
-import com.google.common.collect.Sets;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -47,6 +47,8 @@
 import java.util.List;
 import java.util.Set;
 
+import javax.annotation.Nullable;
+
 /**
  * Manages the execution of a {@link ReduceFn} after a {@link GroupByKeyOnly} has partitioned the
  * {@link PCollection} by key.
@@ -305,8 +307,6 @@ private void collectAndMergeWindows(Iterable<WindowedValue<InputT>> values) thro
       return;
     }
 
-    Set<W> currentlyActiveWindows = Sets.newHashSet(activeWindows.getActiveWindows());
-
     // Collect the windows from all elements (except those which are too late) and
     // make sure they are already in the active window set or are added as NEW windows.
     for (WindowedValue<?> value : values) {
@@ -342,13 +342,6 @@ private void collectAndMergeWindows(Iterable<WindowedValue<InputT>> values) thro
 
     // Merge all of the active windows and retain a mapping from source windows to result windows.
     mergeActiveWindows();
-
-    // Make sure we've scheduled end-of-window or garbage collection timers for any
-    // ACTIVE windows we just introduced.
-    // (Timers for ACTIVE windows which are now MERGED will have been discarded above.)
-    for (W window : Sets.difference(activeWindows.getActiveWindows(), currentlyActiveWindows)) {
-      scheduleEndOfWindowOrGarbageCollectionTimer(contextFactory.base(window, StateStyle.DIRECT));
-    }
   }
 
   private class OnMergeCallback implements ActiveWindowSet.MergeCallback<W> {
@@ -410,6 +403,9 @@ public void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W
         ReduceFn<K, InputT, OutputT, W>.Context directClearContext =
             contextFactory.base(active, StateStyle.DIRECT);
         // No need for the end-of-window or garbage collection timers.
+        // We will establish a new end-of-window or garbage collection timer for the mergeResult
+        // window in processElement below. There must be at least one element for the mergeResult
+        // window since a new element with a new window must have triggered this onMerge.
         cancelEndOfWindowAndGarbageCollectionTimers(directClearContext);
         // We no longer care about any previous panes of merged away windows. The
         // merge result window gets to start fresh if it is new.
@@ -470,18 +466,27 @@ private Collection<W> processElement(WindowedValue<InputT> value) throws Excepti
 
       nonEmptyPanes.recordContent(renamedContext.state());
 
-      // Make sure we've scheduled the end-of-window or garbage collection timer for this window
-      // However if we have pre-merged then they will already have been scheduled.
-      if (windowingStrategy.getWindowFn().isNonMerging()) {
-        scheduleEndOfWindowOrGarbageCollectionTimer(directContext);
-      }
+      // Make sure we've scheduled the end-of-window or garbage collection timer for this window.
+      Instant timer = scheduleEndOfWindowOrGarbageCollectionTimer(directContext);
 
       // Hold back progress of the output watermark until we have processed the pane this
       // element will be included within. If the element is too late for that, place a hold at
       // the end-of-window or garbage collection time to allow empty panes to contribute elements
       // which won't be dropped due to lateness by a following computation (assuming the following
       // computation uses the same allowed lateness value...)
-      watermarkHold.addHolds(renamedContext);
+      @Nullable Instant hold = watermarkHold.addHolds(renamedContext);
+
+      if (hold != null) {
+        // Assert that holds have a proximate timer.
+        boolean holdInWindow = !hold.isAfter(window.maxTimestamp());
+        boolean timerInWindow = !timer.isAfter(window.maxTimestamp());
+        Preconditions.checkState(
+            holdInWindow == timerInWindow,
+            "set a hold at %s, a timer at %s, which disagree as to whether they are in window %s",
+            hold,
+            timer,
+            directContext.window());
+      }
 
       // Execute the reduceFn, which will buffer the value as appropriate
       reduceFn.processValue(renamedContext);
@@ -529,7 +534,7 @@ public void onTimer(TimerData timer) throws Exception {
     // 1. We need to set a GC timer
     // 2. We need to let the PaneInfoTracker know that we are transitioning from early to late,
     // and possibly emitting an on-time pane.
-    boolean isEndOfWindowTimer =
+    boolean isEndOfWindow =
         TimeDomain.EVENT_TIME == timer.getDomain()
         && timer.getTimestamp().equals(window.maxTimestamp());
 
@@ -549,8 +554,7 @@ public void onTimer(TimerData timer) throws Exception {
         // We need to call onTrigger to emit the final pane if required.
         // The final pane *may* be ON_TIME if no prior ON_TIME pane has been emitted,
         // and the watermark has passed the end of the window.
-        boolean isWatermarkTrigger = isEndOfWindowTimer;
-        onTrigger(directContext, renamedContext, isWatermarkTrigger, true/* isFinished */);
+        onTrigger(directContext, renamedContext, isEndOfWindow, true/* isFinished */);
       }
 
       // Cleanup flavor B: Clear all the remaining state for this window since we'll never
@@ -563,10 +567,10 @@ public void onTimer(TimerData timer) throws Exception {
           key, window, timer.getTimestamp(), timerInternals.currentInputWatermarkTime(),
           timerInternals.currentOutputWatermarkTime());
       if (windowIsActive) {
-        emitIfAppropriate(directContext, renamedContext, isEndOfWindowTimer);
+        emitIfAppropriate(directContext, renamedContext, isEndOfWindow);
       }
 
-      if (isEndOfWindowTimer) {
+      if (isEndOfWindow) {
         // Since we are processing an on-time firing we should schedule the garbage collection
         // timer. (If getAllowedLateness is zero then the timer event will be considered a
         // cleanup event and handled by the above).
@@ -687,13 +691,13 @@ private void emitIfAppropriate(ReduceFn<K, InputT, OutputT, W>.Context directCon
    * Do we need to emit a pane?
    */
   private boolean needToEmit(
-      boolean isEmpty, boolean isWatermarkTrigger, boolean isFinished, PaneInfo.Timing timing) {
+      boolean isEmpty, boolean isEndOfWindow, boolean isFinished, PaneInfo.Timing timing) {
     if (!isEmpty) {
       // The pane has elements.
       return true;
     }
-    if (isWatermarkTrigger && timing == Timing.ON_TIME) {
-      // This is the unique ON_TIME pane, triggered by an AfterWatermark.
+    if (isEndOfWindow && timing == Timing.ON_TIME) {
+      // This is the unique ON_TIME pane.
       return true;
     }
     if (isFinished && windowingStrategy.getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS) {
@@ -709,14 +713,14 @@ private boolean needToEmit(
   private void onTrigger(
       final ReduceFn<K, InputT, OutputT, W>.Context directContext,
       ReduceFn<K, InputT, OutputT, W>.Context renamedContext,
-      boolean isWatermarkTrigger,
+      boolean isEndOfWindow,
       boolean isFinished)
           throws Exception {
     // Collect state.
     StateContents<Instant> outputTimestampFuture =
         watermarkHold.extractAndRelease(renamedContext, isFinished);
     StateContents<PaneInfo> paneFuture =
-        paneInfoTracker.getNextPaneInfo(directContext, isWatermarkTrigger, isFinished);
+        paneInfoTracker.getNextPaneInfo(directContext, isEndOfWindow, isFinished);
     StateContents<Boolean> isEmptyFuture = nonEmptyPanes.isEmpty(renamedContext.state());
 
     reduceFn.prefetchOnTrigger(directContext.state());
@@ -728,7 +732,7 @@ private void onTrigger(
     final Instant outputTimestamp = outputTimestampFuture.read();
 
     // Only emit a pane if it has data or empty panes are observable.
-    if (needToEmit(isEmptyFuture.read(), isWatermarkTrigger, isFinished, pane.getTiming())) {
+    if (needToEmit(isEmptyFuture.read(), isEndOfWindow, isFinished, pane.getTiming())) {
       // Run reduceFn.onTrigger method.
       final List<W> windows = Collections.singletonList(directContext.window());
       ReduceFn<K, InputT, OutputT, W>.OnTriggerContext renamedTriggerContext =
@@ -752,26 +756,46 @@ public void output(OutputT toOutput) {
   }
 
   /**
-   * Make sure we will notice when the input watermark passes the end-of-window or garbage
-   * collection time.
+   * Make sure we'll eventually have a timer fire which will tell us to garbage collect
+   * the window state. For efficiency we may need to do this in two steps rather
+   * than one. Return the time at which the timer will fire.
+   *
+   * <ul>
+   * <li>If allowedLateness is zero then we'll garbage collect at the end of the window.
+   * For simplicity we'll set our own timer for this situation even though an
+   * {@link AfterWatermark} trigger may have also set an end-of-window timer.
+   * ({@code setTimer} is idempotent.)
+   * <li>If allowedLateness is non-zero then we could just always set a timer for the garbage
+   * collection time. However if the windows are large (eg hourly) and the allowedLateness is small
+   * (eg seconds) then we'll end up with nearly twice the number of timers in-flight. So we
+   * instead set an end-of-window timer and then roll that forward to a garbage collection timer
+   * when it fires. We use the input watermark to distinguish those cases.
+   * </ul>
    */
-  private void scheduleEndOfWindowOrGarbageCollectionTimer(
+  private Instant scheduleEndOfWindowOrGarbageCollectionTimer(
       ReduceFn<?, ?, ?, W>.Context directContext) {
-    Instant fireTime = directContext.window().maxTimestamp();
-    String which = "end-of-window";
     Instant inputWM = timerInternals.currentInputWatermarkTime();
-    if (inputWM != null && fireTime.isBefore(inputWM)) {
-      fireTime = fireTime.plus(windowingStrategy.getAllowedLateness());
+    Instant endOfWindow = directContext.window().maxTimestamp();
+    Instant fireTime;
+    String which;
+    if (inputWM != null && endOfWindow.isBefore(inputWM)) {
+      fireTime = endOfWindow.plus(windowingStrategy.getAllowedLateness());
       which = "garbage collection";
-      Preconditions.checkState(!fireTime.isBefore(inputWM),
-          "Asking to set a timer at %s behind input watermark %s", fireTime, inputWM);
+    } else {
+      fireTime = endOfWindow;
+      which = "end-of-window";
     }
     WindowTracing.trace(
         "ReduceFnRunner.scheduleEndOfWindowOrGarbageCollectionTimer: Scheduling {} timer at {} for "
-        + "key:{}; window:{} where inputWatermark:{}; outputWatermark:{}",
-        which, fireTime, key, directContext.window(), inputWM,
+            + "key:{}; window:{} where inputWatermark:{}; outputWatermark:{}",
+        which,
+        fireTime,
+        key,
+        directContext.window(),
+        inputWM,
         timerInternals.currentOutputWatermarkTime());
     directContext.timers().setTimer(fireTime, TimeDomain.EVENT_TIME);
+    return fireTime;
   }
 
   private void cancelEndOfWindowAndGarbageCollectionTimers(ReduceFn<?, ?, ?, W>.Context context) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
index a79b8d32de4be..4d37cdb0dbfea 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
@@ -33,6 +33,8 @@
 
 import java.io.Serializable;
 
+import javax.annotation.Nullable;
+
 /**
  * Implements the logic to hold the output watermark for a computation back
  * until it has seen all the elements it needs based on the input watermark for the
@@ -53,9 +55,11 @@ class WatermarkHold<W extends BoundedWindow> implements Serializable {
    * Return tag for state containing the output watermark hold
    * used for elements.
    */
-  public static <W extends BoundedWindow> StateTag<Object, WatermarkStateInternal<W>>
-      watermarkHoldTagForOutputTimeFn(OutputTimeFn<? super W> outputTimeFn) {
-    return StateTags.makeSystemTagInternal(StateTags.watermarkStateInternal("hold", outputTimeFn));
+  public static <W extends BoundedWindow>
+      StateTag<Object, WatermarkStateInternal<W>> watermarkHoldTagForOutputTimeFn(
+          OutputTimeFn<? super W> outputTimeFn) {
+    return StateTags.<Object, WatermarkStateInternal<W>>makeSystemTagInternal(
+        StateTags.<W>watermarkStateInternal("hold", outputTimeFn));
   }
 
   /**
@@ -83,13 +87,14 @@ public WatermarkHold(TimerInternals timerInternals, WindowingStrategy<?, W> wind
    * Add a hold to prevent the output watermark progressing beyond the (possibly adjusted) timestamp
    * of the element in {@code context}. We allow the actual hold time to be shifted later by
    * {@link OutputTimeFn#assignOutputTime}, but no further than the end of the window. The hold will
-   * remain until cleared by {@link #extractAndRelease}.
+   * remain until cleared by {@link #extractAndRelease}. Return the timestamp at which the hold
+   * was placed, or {@literal null} if no hold was placed.
    *
-   * <p>In the following we'll write {@code E} to represent an element, {@code IWM} for
-   * the local input watermark, {@code OWM} for the local output watermark, and {@code GCWM} for
-   * the garbage collection watermark (which is at {@code IWM - getAllowedLateness}). Time
-   * progresses from left to right, and we write {@code [ ... ]} to denote a bounded window with
-   * implied lower bound.
+   * <p>In the following we'll write {@code E} to represent an element's timestamp after passing
+   * through the window strategy's output time function, {@code IWM} for the local input watermark,
+   * {@code OWM} for the local output watermark, and {@code GCWM} for the garbage collection
+   * watermark (which is at {@code IWM - getAllowedLateness}). Time progresses from left to right,
+   * and we write {@code [ ... ]} to denote a bounded window with implied lower bound.
    *
    * <p>Note that the GCWM will be the same as the IWM if {@code getAllowedLateness}
    * is {@code ZERO}.
@@ -106,10 +111,9 @@ public WatermarkHold(TimerInternals timerInternals, WindowingStrategy<?, W> wind
    * </pre>
    * This is, hopefully, the common and happy case. The element is locally on-time and can
    * definitely make it to an {@code ON_TIME} pane which we can still set an end-of-window timer
-   * for. We place an element hold at E which will become the {@code ON_TIME} pane's timestamp
-   * if it is the earliest such hold. (Thus the OWM will not proceed past E until the next pane
-   * fires). We also place an end-of-window and (if required) garbage collection hold in case
-   * this is the first element seen for the window.
+   * for. We place an element hold at E, which may contribute to the {@code ON_TIME} pane's
+   * timestamp (depending on the output time function). Thus the OWM will not proceed past E
+   * until the next pane fires.
    *
    * <li>(Discard - no target window)
    * <pre>
@@ -133,7 +137,7 @@ public WatermarkHold(TimerInternals timerInternals, WindowingStrategy<?, W> wind
    * OWM. In effect, we get to 'launder' the locally late element and consider it as locally
    * on-time because no downstream computation can observe the difference.
    *
-   * <li>(Input Late)
+   * <li>(Maybe late 1)
    * <pre>
    *          |            |
    *      [   | E        ] |
@@ -141,33 +145,34 @@ public WatermarkHold(TimerInternals timerInternals, WindowingStrategy<?, W> wind
    *         OWM          IWM
    * </pre>
    * The end-of-window timer may have already fired for this window, and thus an {@code ON_TIME}
-   * pane may have already been emitted. We can still place an element hold, which will be
-   * cleared when the next pane fires (which could be {@code ON_TIME} or {@code LATE}). We
-   * should not place an end-of-window hold since we cannot guarantee it will be cleared until
-   * the garbage collection timer fires. We can still place a garbage collection hold if required.
+   * pane may have already been emitted. However, if timer firings have been delayed then it
+   * is possible the {@code ON_TIME} pane has not yet been emitted. We can't place an element
+   * hold since we can't be sure if it will be cleared promptly. Thus this element *may* find
+   * its way into an {@code ON_TIME} pane, but if so it will *not* contribute to that pane's
+   * timestamp. We may however set a garbage collection hold if required.
    *
-   * <li>(Possibly unobservably late - 1)
+   * <li>(Maybe late 2)
    * <pre>
    *               |   |
    *      [     E  |   | ]
    *               |   |
    *              OWM IWM
    * </pre>
-   * The element is too late to contribute to the output watermark hold, and thus won't
-   * contribute the any pane's timestamp. We don't know if a hold has been placed at or later
-   * than the OWM for this window. Thus we can't be sure E will make an {@code ON_TIME} pane,
-   * even though we know the end-of-window timer is yet to fire. We can still place an
-   * end-of-window hold, and a garbage collection hold if required.
+   * The end-of-window timer has not yet fired, so this element may still appear in an
+   * {@code ON_TIME} pane. However the element is too late to contribute to the output
+   * watermark hold, and thus won't contribute to the pane's timestamp. We can still place an
+   * end-of-window hold.
    *
-   * <li>(Possibly unobservably late - 2)
+   * <li>(Maybe late 3)
    * <pre>
    *               |       |
    *      [     E  |     ] |
    *               |       |
    *              OWM     IWM
    * </pre>
-   * As for the previous case, however we don't even know if the end-of-window timer has already
-   * fired, or it is about to fire. We can place only the garbage collection hold, if required.
+   * As for the (Maybe late 2) case, however we don't even know if the end-of-window timer
+   * has already fired, or it is about to fire. We can place only the garbage collection hold,
+   * if required.
    *
    * <li>(Definitely late)
    * <pre>
@@ -181,10 +186,13 @@ public WatermarkHold(TimerInternals timerInternals, WindowingStrategy<?, W> wind
    *
    * </ol>
    */
-  public void addHolds(ReduceFn<?, ?, ?, W>.ProcessValueContext context) {
-    if (!addElementHold(context)) {
-      addEndOfWindowOrGarbageCollectionHolds(context);
+  @Nullable
+  public Instant addHolds(ReduceFn<?, ?, ?, W>.ProcessValueContext context) {
+    Instant hold = addElementHold(context);
+    if (hold == null) {
+      hold = addEndOfWindowOrGarbageCollectionHolds(context);
     }
+    return hold;
   }
 
   /**
@@ -208,9 +216,11 @@ private Instant shift(Instant timestamp, W window) {
   }
 
   /**
-   * Add an element hold if possible. Return true if was added, false if too late to add.
+   * Add an element hold if possible. Return instant at which hold was added, or {@literal null}
+   * if no hold was added.
    */
-  private boolean addElementHold(ReduceFn<?, ?, ?, W>.ProcessValueContext context) {
+  @Nullable
+  private Instant addElementHold(ReduceFn<?, ?, ?, W>.ProcessValueContext context) {
     // Give the window function a chance to move the hold timestamp forward to encourage progress.
     // (A later hold implies less impediment to the output watermark making progress, which in
     // turn encourages end-of-window triggers to fire earlier in following computations.)
@@ -219,52 +229,72 @@ private boolean addElementHold(ReduceFn<?, ?, ?, W>.ProcessValueContext context)
     Instant outputWM = timerInternals.currentOutputWatermarkTime();
     Instant inputWM = timerInternals.currentInputWatermarkTime();
 
-    // Only add the hold if we can be sure the backend will be able to respect it.
+    // Only add the hold if we can be sure:
+    // - the backend will be able to respect it
+    // (ie the hold is at or ahead of the output watermark), AND
+    // - a timer will be set to clear it by the end of window
+    // (ie the end of window is at or ahead of the input watermark).
+    String which;
     boolean tooLate;
+    // TODO: These case labels could be tightened.
+    // See the case analysis in addHolds above for the motivation.
     if (outputWM != null && elementHold.isBefore(outputWM)) {
+      which = "too late to effect output watermark";
+      tooLate = true;
+    } else if (inputWM != null && context.window().maxTimestamp().isBefore(inputWM)) {
+      which = "too late for end-of-window timer";
       tooLate = true;
     } else {
+      which = "on time";
       tooLate = false;
       context.state().access(elementHoldTag).add(elementHold);
     }
     WindowTracing.trace(
         "WatermarkHold.addHolds: element hold at {} is {} for "
         + "key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
-        elementHold, tooLate ? "too late" : "on-time", context.key(), context.window(), inputWM,
+        elementHold, which, context.key(), context.window(), inputWM,
         outputWM);
 
-    return !tooLate;
+    return tooLate ? null : elementHold;
   }
 
   /**
    * Add an end-of-window hold or, if too late for that, a garbage collection hold (if required).
+   * Return the {@link Instant} at which hold was added, or {@literal null} if no hold was added.
    *
    * <p>The end-of-window hold guarantees that an empty {@code ON_TIME} pane can be given
    * a timestamp which will not be considered beyond allowed lateness by any downstream computation.
    */
-  private void addEndOfWindowOrGarbageCollectionHolds(ReduceFn<?, ?, ?, W>.Context context) {
-    if (!addEndOfWindowHold(context)) {
-      addGarbageCollectionHold(context);
+  @Nullable
+  private Instant addEndOfWindowOrGarbageCollectionHolds(ReduceFn<?, ?, ?, W>.Context context) {
+    Instant hold = addEndOfWindowHold(context);
+    if (hold == null) {
+      hold = addGarbageCollectionHold(context);
     }
+    return hold;
   }
 
   /**
-   * Add an end-of-window hold. Return true if was added, false if too late to add.
+   * Add an end-of-window hold. Return the {@link Instant} at which hold was added,
+   * or {@literal null} if no hold was added.
    *
    * <p>The end-of-window hold guarantees that any empty {@code ON_TIME} pane can be given
    * a timestamp which will not be considered beyond allowed lateness by any downstream computation.
    */
-  private boolean addEndOfWindowHold(ReduceFn<?, ?, ?, W>.Context context) {
-    // Only add an end-of-window hold if we can be sure the end-of-window timer
-    // has not yet fired. Otherwise we risk holding up the output watermark until
-    // the garbage collection timer fires, which may be a very long time in the future.
+  @Nullable
+  private Instant addEndOfWindowHold(ReduceFn<?, ?, ?, W>.Context context) {
+    // Only add an end-of-window hold if we can be sure a timer will be set to clear it
+    // by the end of window (ie the end of window is at or ahead of the input watermark).
     Instant outputWM = timerInternals.currentOutputWatermarkTime();
     Instant inputWM = timerInternals.currentInputWatermarkTime();
+    String which;
     boolean tooLate;
     Instant eowHold = context.window().maxTimestamp();
     if (inputWM != null && eowHold.isBefore(inputWM)) {
+      which = "too late for end-of-window timer";
       tooLate = true;
     } else {
+      which = "on time";
       tooLate = false;
       Preconditions.checkState(outputWM == null || !eowHold.isBefore(outputWM),
           "End-of-window hold %s cannot be before output watermark %s", eowHold, outputWM);
@@ -273,22 +303,27 @@ private boolean addEndOfWindowHold(ReduceFn<?, ?, ?, W>.Context context) {
     WindowTracing.trace(
         "WatermarkHold.addEndOfWindowHold: end-of-window hold at {} is {} for "
         + "key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
-        eowHold, tooLate ? "too late" : "on-time", context.key(), context.window(), inputWM,
+        eowHold, which, context.key(), context.window(), inputWM,
         outputWM);
-    return !tooLate;
+
+    return tooLate ? null : eowHold;
   }
 
   /**
-   * Add a garbage collection hold, if required.
+   * Add a garbage collection hold, if required. Return the {@link Instant} at which hold was added,
+   * or {@literal null} if no hold was added.
    *
    * <p>The garbage collection hold gurantees that any empty final pane can be given
    * a timestamp which will not be considered beyond allowed lateness by any downstream
    * computation. If we are sure no empty final panes can be emitted then there's no need
    * for an additional hold.
    */
-  private void addGarbageCollectionHold(ReduceFn<?, ?, ?, W>.Context context) {
-    // Only add a garbage collection hold if we are sure we need an empty final pane and
-    // the window will be garbage collected after the end-of-window trigger.
+  @Nullable
+  private Instant addGarbageCollectionHold(ReduceFn<?, ?, ?, W>.Context context) {
+    // Only add a garbage collection hold if we may need to emit an empty pane
+    // at garbage collection time, and garbage collection time is strictly after the
+    // end of window. (All non-empty panes will have holds at their output
+    // time derived from their incoming elements and no additional hold is required.)
     if (context.windowingStrategy().getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS
         && windowingStrategy.getAllowedLateness().isLongerThan(Duration.ZERO)) {
       Instant gcHold = context.window().maxTimestamp().plus(windowingStrategy.getAllowedLateness());
@@ -301,6 +336,9 @@ private void addGarbageCollectionHold(ReduceFn<?, ?, ?, W>.Context context) {
       Preconditions.checkState(inputWM == null || !gcHold.isBefore(inputWM),
           "Garbage collection hold %s cannot be before input watermark %s", gcHold, inputWM);
       context.state().access(EXTRA_HOLD_TAG).add(gcHold);
+      return gcHold;
+    } else {
+      return null;
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunnerTest.java
index ced9de5413a66..ddc33b8e3f45c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunnerTest.java
@@ -114,7 +114,8 @@ public void testOnElementBufferingDiscarding() throws Exception {
     // Test basic execution of a trigger using a non-combining window set and discarding mode.
     ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
         ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
-            AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(100));
+            AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(100),
+            ClosingBehavior.FIRE_IF_NON_EMPTY);
 
     // Pane of {1, 2}
     injectElement(tester, 1);
@@ -143,7 +144,8 @@ public void testOnElementBufferingAccumulating() throws Exception {
     // Test basic execution of a trigger using a non-combining window set and accumulating mode.
     ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
         ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
-            AccumulationMode.ACCUMULATING_FIRED_PANES, Duration.millis(100));
+            AccumulationMode.ACCUMULATING_FIRED_PANES, Duration.millis(100),
+            ClosingBehavior.FIRE_IF_NON_EMPTY);
 
     injectElement(tester, 1);
 
@@ -230,7 +232,8 @@ public void testWatermarkHoldAndLateData() throws Exception {
     // Test handling of late data. Specifically, ensure the watermark hold is correct.
     ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
         ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
-            AccumulationMode.ACCUMULATING_FIRED_PANES, Duration.millis(10));
+            AccumulationMode.ACCUMULATING_FIRED_PANES, Duration.millis(10),
+            ClosingBehavior.FIRE_IF_NON_EMPTY);
 
     // Input watermark -> null
     assertEquals(null, tester.getWatermarkHold());
@@ -343,11 +346,51 @@ public void testWatermarkHoldAndLateData() throws Exception {
     tester.assertHasOnlyGlobalAndFinishedSetsFor();
   }
 
+  @Test
+  public void dontSetHoldIfTooLateForEndOfWindowTimer() throws Exception {
+    // Make sure holds are only set if they are accompanied by an end-of-window timer.
+    ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
+        ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
+            AccumulationMode.ACCUMULATING_FIRED_PANES, Duration.millis(10),
+            ClosingBehavior.FIRE_ALWAYS);
+    tester.setAutoAdvanceOutputWatermark(false);
+
+    // Case: Unobservably late
+    tester.advanceInputWatermark(new Instant(15));
+    tester.advanceOutputWatermark(new Instant(11));
+    injectElement(tester, 14);
+    // Hold was applied, waiting for end-of-window timer.
+    assertEquals(new Instant(14), tester.getWatermarkHold());
+    assertEquals(new Instant(19), tester.getNextTimer(TimeDomain.EVENT_TIME));
+
+    // Trigger the end-of-window timer.
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    tester.advanceInputWatermark(new Instant(20));
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(false);
+    // Hold has been replaced with garbage collection hold. Waiting for garbage collection.
+    assertEquals(new Instant(29), tester.getWatermarkHold());
+    assertEquals(new Instant(29), tester.getNextTimer(TimeDomain.EVENT_TIME));
+
+    // Case: Maybe late 1
+    injectElement(tester, 13);
+    // No change to hold or timers.
+    assertEquals(new Instant(29), tester.getWatermarkHold());
+    assertEquals(new Instant(29), tester.getNextTimer(TimeDomain.EVENT_TIME));
+
+    // Trigger the garbage collection timer.
+    tester.advanceInputWatermark(new Instant(30));
+
+    // Everything should be cleaned up.
+    assertFalse(tester.isMarkedFinished(new IntervalWindow(new Instant(10), new Instant(20))));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor();
+  }
+
   @Test
   public void testPaneInfoAllStates() throws Exception {
     ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
         ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
-            AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(100));
+            AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(100),
+            ClosingBehavior.FIRE_IF_NON_EMPTY);
 
     tester.advanceInputWatermark(new Instant(0));
     when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
@@ -515,7 +558,8 @@ public void testPaneInfoFinalAndOnTime() throws Exception {
   public void testPaneInfoSkipToFinish() throws Exception {
     ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
         ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
-            AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(100));
+            AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(100),
+            ClosingBehavior.FIRE_IF_NON_EMPTY);
 
     tester.advanceInputWatermark(new Instant(0));
     when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
@@ -529,7 +573,8 @@ public void testPaneInfoSkipToFinish() throws Exception {
   public void testPaneInfoSkipToNonSpeculativeAndFinish() throws Exception {
     ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
         ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
-            AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(100));
+            AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(100),
+            ClosingBehavior.FIRE_IF_NON_EMPTY);
 
     tester.advanceInputWatermark(new Instant(15));
     when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
@@ -545,7 +590,8 @@ public void testMergeBeforeFinalizing() throws Exception {
     // unmerged windows.
     ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
         ReduceFnTester.nonCombining(Sessions.withGapDuration(Duration.millis(10)), mockTrigger,
-            AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(0));
+            AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(0),
+            ClosingBehavior.FIRE_IF_NON_EMPTY);
 
     // All on time data, verify watermark hold.
     // These two windows should pre-merge immediately to [1, 20)
@@ -611,7 +657,8 @@ public void testIdempotentEmptyPanesDiscarding() throws Exception {
     // modify PaneInfo.
     ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
         ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
-            AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(100));
+            AccumulationMode.DISCARDING_FIRED_PANES, Duration.millis(100),
+            ClosingBehavior.FIRE_IF_NON_EMPTY);
 
     // Inject a couple of on-time elements and fire at the window end.
     injectElement(tester, 1);
@@ -655,7 +702,8 @@ public void testIdempotentEmptyPanesAccumulating() throws Exception {
     // modify PaneInfo.
     ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester =
         ReduceFnTester.nonCombining(FixedWindows.of(Duration.millis(10)), mockTrigger,
-            AccumulationMode.ACCUMULATING_FIRED_PANES, Duration.millis(100));
+            AccumulationMode.ACCUMULATING_FIRED_PANES, Duration.millis(100),
+            ClosingBehavior.FIRE_IF_NON_EMPTY);
 
     // Inject a couple of on-time elements and fire at the window end.
     injectElement(tester, 1);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
index ccc07b3c7a22b..0cfb40adc75ee 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
@@ -35,6 +35,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.TriggerBuilder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
 import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
@@ -99,6 +100,14 @@ public class ReduceFnTester<InputT, OutputT, W extends BoundedWindow> {
   private final WindowingStrategy<Object, W> objectStrategy;
   private final ReduceFn<String, InputT, OutputT, W> reduceFn;
 
+  /**
+   * If true, the output watermark is automatically advanced to the latest possible
+   * point when the input watermark is advanced. This is the default for most tests.
+   * If false, the output watermark must be explicitly advanced by the test, which can
+   * be used to exercise some of the more subtle behavior of WatermarkHold.
+   */
+  private boolean autoAdvanceOutputWatermark;
+
   private ExecutableTrigger<W> executableTrigger;
 
   private final InMemoryLongSumAggregator droppedDueToClosedWindow =
@@ -114,12 +123,13 @@ public class ReduceFnTester<InputT, OutputT, W extends BoundedWindow> {
 
   public static <W extends BoundedWindow> ReduceFnTester<Integer, Iterable<Integer>, W>
       nonCombining(WindowFn<?, W> windowFn, TriggerBuilder<W> trigger, AccumulationMode mode,
-          Duration allowedDataLateness) throws Exception {
+          Duration allowedDataLateness, ClosingBehavior closingBehavior) throws Exception {
     WindowingStrategy<?, W> strategy =
         WindowingStrategy.of(windowFn)
             .withTrigger(trigger.buildTrigger())
             .withMode(mode)
-            .withAllowedLateness(allowedDataLateness);
+            .withAllowedLateness(allowedDataLateness)
+            .withClosingBehavior(closingBehavior);
     return nonCombining(strategy);
   }
 
@@ -163,9 +173,19 @@ private ReduceFnTester(WindowingStrategy<?, W> wildcardStrategy,
     this.windowFn = objectStrategy.getWindowFn();
     this.windowingInternals = new TestWindowingInternals();
     this.outputCoder = outputCoder;
+    this.autoAdvanceOutputWatermark = true;
     executableTrigger = wildcardStrategy.getTrigger();
   }
 
+  public void setAutoAdvanceOutputWatermark(boolean autoAdvanceOutputWatermark) {
+    this.autoAdvanceOutputWatermark = autoAdvanceOutputWatermark;
+  }
+
+  @Nullable
+  public Instant getNextTimer(TimeDomain domain) {
+    return timerInternals.getNextTimer(domain);
+  }
+
   ReduceFnRunner<String, InputT, OutputT, W> createRunner() {
     return new ReduceFnRunner<>(
         KEY,
@@ -306,6 +326,14 @@ public void advanceInputWatermark(Instant newInputWatermark) throws Exception {
     runner.persist();
   }
 
+  /**
+   * If {@link #autoAdvanceOutputWatermark} is {@literal false}, advance the output watermark
+   * to the given value. Otherwise throw.
+   */
+  public void advanceOutputWatermark(Instant newOutputWatermark) throws Exception {
+    timerInternals.advanceOutputWatermark(newOutputWatermark);
+  }
+
   /** Advance the processing time to the specified time, firing any timers that should fire. */
   public void advanceProcessingTime(Instant newProcessingTime) throws Exception {
     ReduceFnRunner<String, InputT, OutputT, W> runner = createRunner();
@@ -539,8 +567,31 @@ private class TestTimerInternals implements TimerInternals {
     @Nullable
     private Instant synchronizedProcessingTime = null;
 
+    @Nullable
+    public Instant getNextTimer(TimeDomain domain) {
+      TimerData data = null;
+      switch (domain) {
+        case EVENT_TIME:
+           data = watermarkTimers.peek();
+           break;
+        case PROCESSING_TIME:
+        case SYNCHRONIZED_PROCESSING_TIME:
+          data = processingTimers.peek();
+          break;
+      }
+      Preconditions.checkNotNull(data); // cases exhaustive
+      return data == null ? null : data.getTimestamp();
+    }
+
     private PriorityQueue<TimerData> queue(TimeDomain domain) {
-      return TimeDomain.EVENT_TIME.equals(domain) ? watermarkTimers : processingTimers;
+      switch (domain) {
+        case EVENT_TIME:
+          return watermarkTimers;
+        case PROCESSING_TIME:
+        case SYNCHRONIZED_PROCESSING_TIME:
+          return processingTimers;
+      }
+      throw new RuntimeException(); // cases exhaustive
     }
 
     @Override
@@ -610,10 +661,12 @@ public void advanceInputWatermark(
             + "so output watermark = input watermark");
         hold = inputWatermarkTime;
       }
-      advanceOutputWatermark(hold);
+      if (autoAdvanceOutputWatermark) {
+        advanceOutputWatermark(hold);
+      }
     }
 
-    private void advanceOutputWatermark(Instant newOutputWatermark) {
+    public void advanceOutputWatermark(Instant newOutputWatermark) {
       Preconditions.checkNotNull(newOutputWatermark);
       Preconditions.checkNotNull(inputWatermarkTime);
       if (newOutputWatermark.isAfter(inputWatermarkTime)) {

From 36732d47a5b4b328b320aa457943b22c58cb935f Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 17 Feb 2016 13:45:49 -0800
Subject: [PATCH 1464/1541] Add support for splitting a compressed source for
 uncompressed files

This adds support for splitting the file if the file name
does not end with one of the supported compression formats
(currently gz and bz2) and the delegate source also supports
splitting.

This is for Apache Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114894950
---
 .../dataflow/sdk/io/CompressedSource.java     | 44 +++++++++++--
 .../dataflow/sdk/io/FileBasedSource.java      |  6 +-
 .../dataflow/sdk/io/CompressedSourceTest.java | 66 ++++++++++++++++++-
 3 files changed, 106 insertions(+), 10 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
index 74ba05e475f21..e3dca91680435 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
@@ -85,6 +85,12 @@ private static interface FileNameBasedDecompressingChannelFactory
      */
     ReadableByteChannel createDecompressingChannel(String fileName, ReadableByteChannel channel)
         throws IOException;
+
+    /**
+     * Given a file name, returns true if the file name matches any supported compression
+     * scheme.
+     */
+    boolean isCompressed(String fileName);
   }
 
   /**
@@ -179,6 +185,16 @@ public ReadableByteChannel createDecompressingChannel(ReadableByteChannel channe
               ReadableByteChannel.class.getSimpleName(),
               ReadableByteChannel.class.getSimpleName()));
     }
+
+    @Override
+    public boolean isCompressed(String fileName) {
+      for (CompressionMode type : CompressionMode.values()) {
+        if  (type.matches(fileName)) {
+          return true;
+        }
+      }
+      return false;
+    }
   }
 
   private final FileBasedSource<T> sourceDelegate;
@@ -253,27 +269,43 @@ public void validate() {
    * source for a single file.
    */
   @Override
-  public CompressedSource<T> createForSubrangeOfFile(String fileName, long start, long end) {
+  protected FileBasedSource<T> createForSubrangeOfFile(String fileName, long start, long end) {
     return new CompressedSource<>(sourceDelegate.createForSubrangeOfFile(fileName, start, end),
         channelFactory, fileName, Long.MAX_VALUE, start, end);
   }
 
   /**
-   * Determines whether a single file represented by this source is splittable. Returns false:
-   * compressed sources are not splittable.
+   * Determines whether a single file represented by this source is splittable. Returns true
+   * if we are using the default decompression factory and and it determines
+   * from the requested file name that the file is not compressed.
    */
   @Override
   protected final boolean isSplittable() throws Exception {
-    return false;
+    if (channelFactory instanceof FileNameBasedDecompressingChannelFactory) {
+      FileNameBasedDecompressingChannelFactory fileNameBasedChannelFactory =
+          (FileNameBasedDecompressingChannelFactory) channelFactory;
+      return !fileNameBasedChannelFactory.isCompressed(getFileOrPatternSpec());
+    }
+    return true;
   }
 
   /**
-   * Creates a {@code CompressedReader} to read a single file.
+   * Creates a {@code FileBasedReader} to read a single file.
    *
    * <p>Uses the delegate source to create a single file reader for the delegate source.
+   * Utilizes the default decompression channel factory to not wrap the source reader
+   * if the file name does not represent a compressed file allowing for splitting of
+   * the source.
    */
   @Override
-  public final CompressedReader<T> createSingleFileReader(PipelineOptions options) {
+  protected final FileBasedReader<T> createSingleFileReader(PipelineOptions options) {
+    if (channelFactory instanceof FileNameBasedDecompressingChannelFactory) {
+      FileNameBasedDecompressingChannelFactory fileNameBasedChannelFactory =
+          (FileNameBasedDecompressingChannelFactory) channelFactory;
+      if (!fileNameBasedChannelFactory.isCompressed(getFileOrPatternSpec())) {
+        return sourceDelegate.createSingleFileReader(options);
+      }
+    }
     return new CompressedReader<T>(
         this, sourceDelegate.createSingleFileReader(options));
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
index a11b8cc895660..5d32a9d08fb1a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
@@ -162,14 +162,16 @@ public final FileBasedSource<T> createSourceForSubrange(long start, long end) {
    * @param end ending byte offset of the new {@code FileBasedSource}. May be Long.MAX_VALUE,
    *        in which case it will be inferred using {@link #getMaxEndOffset}.
    */
-  public abstract FileBasedSource<T> createForSubrangeOfFile(String fileName, long start, long end);
+  protected abstract FileBasedSource<T> createForSubrangeOfFile(
+      String fileName, long start, long end);
 
   /**
    * Creates and returns an instance of a {@code FileBasedReader} implementation for the current
    * source assuming the source represents a single file. File patterns will be handled by
    * {@code FileBasedSource} implementation automatically.
    */
-  public abstract FileBasedReader<T> createSingleFileReader(PipelineOptions options);
+  protected abstract FileBasedReader<T> createSingleFileReader(
+      PipelineOptions options);
 
   @Override
   public final long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
index f402e82f11d64..14c8fe9acad97 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/CompressedSourceTest.java
@@ -16,13 +16,20 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.instanceOf;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
 import com.google.cloud.dataflow.sdk.io.CompressedSource.CompressionMode;
 import com.google.cloud.dataflow.sdk.io.CompressedSource.DecompressingChannelFactory;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.SourceTestUtils;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.io.Files;
@@ -30,8 +37,10 @@
 
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
 import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
+import org.hamcrest.Matchers;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.internal.matchers.ThrowableMessageMatcher;
 import org.junit.rules.ExpectedException;
 import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
@@ -158,6 +167,43 @@ public void testHeterogeneousCompression() throws Exception {
     p.run();
   }
 
+  @Test
+  public void testUncompressedFileIsSplittable() throws Exception {
+    String baseName = "test-input";
+
+    File uncompressedFile = tmpFolder.newFile(baseName + ".bin");
+    Files.write(generateInput(10), uncompressedFile);
+
+    CompressedSource<Byte> source =
+        CompressedSource.from(new ByteSource(uncompressedFile.getPath(), 1));
+    assertTrue(source.isSplittable());
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testGzipFileIsNotSplittable() throws Exception {
+    String baseName = "test-input";
+
+    File compressedFile = tmpFolder.newFile(baseName + ".gz");
+    writeFile(compressedFile, generateInput(10), CompressionMode.GZIP);
+
+    CompressedSource<Byte> source =
+        CompressedSource.from(new ByteSource(compressedFile.getPath(), 1));
+    assertFalse(source.isSplittable());
+  }
+
+  @Test
+  public void testBzip2FileIsNotSplittable() throws Exception {
+    String baseName = "test-input";
+
+    File compressedFile = tmpFolder.newFile(baseName + ".bz2");
+    writeFile(compressedFile, generateInput(10), CompressionMode.BZIP2);
+
+    CompressedSource<Byte> source =
+        CompressedSource.from(new ByteSource(compressedFile.getPath(), 1));
+    assertFalse(source.isSplittable());
+  }
+
   /**
    * Test reading an uncompressed file with {@link CompressionMode#GZIP}, since we must support
    * this due to properties of services that we read from.
@@ -170,6 +216,22 @@ public void testFalseGzipStream() throws Exception {
     verifyReadContents(input, tmpFile, CompressionMode.GZIP);
   }
 
+  /**
+   * Test reading an uncompressed file with {@link CompressionMode#BZIP2}, and show that
+   * we fail.
+   */
+  @Test
+  public void testFalseBzip2Stream() throws Exception {
+    byte[] input = generateInput(1000);
+    File tmpFile = tmpFolder.newFile("test.bz2");
+    Files.write(input, tmpFile);
+    thrown.expectCause(Matchers.allOf(
+        instanceOf(IOException.class),
+        ThrowableMessageMatcher.hasMessage(
+            containsString("Stream is not in the BZip2 format"))));
+    verifyReadContents(input, tmpFile, CompressionMode.BZIP2);
+  }
+
   /**
    * Test reading an empty input file with gzip; it must be interpreted as uncompressed because
    * the gzip header is two bytes.
@@ -304,12 +366,12 @@ public ByteSource(String fileName, long minBundleSize, long startOffset, long en
     }
 
     @Override
-    public FileBasedSource<Byte> createForSubrangeOfFile(String fileName, long start, long end) {
+    protected FileBasedSource<Byte> createForSubrangeOfFile(String fileName, long start, long end) {
       return new ByteSource(fileName, getMinBundleSize(), start, end);
     }
 
     @Override
-    public ByteReader createSingleFileReader(PipelineOptions options) {
+    protected FileBasedReader<Byte> createSingleFileReader(PipelineOptions options) {
       return new ByteReader(this);
     }
 

From 059ca33b812e8bcc65c72d9fdbb24e86ea923943 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Wed, 17 Feb 2016 13:46:54 -0800
Subject: [PATCH 1465/1541] Add --worker_harness_container_image PipelineOption

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114895058
---
 .../DataflowPipelineWorkerPoolOptions.java    | 29 +++++++++++++++++++
 .../sdk/runners/DataflowPipelineRunner.java   | 21 +++++++++++++-
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
index 633b2707b4f2f..25d15890c7c35 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.options;
 
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 
 import com.fasterxml.jackson.annotation.JsonIgnore;
 
@@ -103,6 +104,34 @@ public String getAlgorithm() {
   int getDiskSizeGb();
   void setDiskSizeGb(int value);
 
+  /**
+   * Docker container image that executes Dataflow worker harness, residing in Google Container
+   * Registry.
+   */
+  @Default.InstanceFactory(WorkerHarnessContainerImageFactory.class)
+  @Description("Docker container image that executes Dataflow worker harness, residing in Google "
+      + " Container Registry.")
+  @Hidden
+  String getWorkerHarnessContainerImage();
+  void setWorkerHarnessContainerImage(String value);
+
+  /**
+   * Returns the default Docker container image that executes Dataflow worker harness, residing in
+   * Google Container Registry.
+   */
+  public static class WorkerHarnessContainerImageFactory
+      implements DefaultValueFactory<String> {
+    @Override
+    public String create(PipelineOptions options) {
+      DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
+      if (dataflowOptions.isStreaming()) {
+        return DataflowPipelineRunner.STREAMING_WORKER_HARNESS_CONTAINER_IMAGE;
+      } else {
+        return DataflowPipelineRunner.BATCH_WORKER_HARNESS_CONTAINER_IMAGE;
+      }
+    }
+  }
+
   /**
    * GCE <a href="https://cloud.google.com/compute/docs/networking">network</a> for launching
    * workers.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 126eb558c9e13..f20caa3b962b2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -27,6 +27,7 @@
 import com.google.api.services.dataflow.model.DataflowPackage;
 import com.google.api.services.dataflow.model.Job;
 import com.google.api.services.dataflow.model.ListJobsResponse;
+import com.google.api.services.dataflow.model.WorkerPool;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
 import com.google.cloud.dataflow.sdk.PipelineResult.State;
@@ -54,6 +55,7 @@
 import com.google.cloud.dataflow.sdk.io.Write;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
@@ -193,9 +195,17 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
   /** A set of user defined functions to invoke at different points in execution. */
   private DataflowPipelineRunnerHooks hooks;
 
-  // Environment version information
+  // Environment version information.
   private static final String ENVIRONMENT_MAJOR_VERSION = "4";
 
+  // Default Docker container images that execute Dataflow worker harness, residing in Google
+  // Container Registry, separately for Batch and Streaming.
+  // TODO: Set these once versioned containers are ready.
+  public static final String BATCH_WORKER_HARNESS_CONTAINER_IMAGE = null;
+      //"dataflow.gcr.io/v1beta3/java-batch:20160201-rc00---INVALID";
+  public static final String STREAMING_WORKER_HARNESS_CONTAINER_IMAGE = null;
+      //"dataflow.gcr.io/v1beta3/java-streaming:20160201-rc00---INVALID";
+
   // The limit of CreateJob request size.
   private static final int CREATE_JOB_REQUEST_LIMIT_BYTES = 10 * 1024 * 1024;
 
@@ -440,6 +450,15 @@ public DataflowPipelineJob run(Pipeline pipeline) {
     newJob.getEnvironment().setDataset(options.getTempDatasetId());
     newJob.getEnvironment().setExperiments(options.getExperiments());
 
+    // Set the Docker container image that executes Dataflow worker harness, residing in Google
+    // Container Registry. Translator is guaranteed to create a worker pool prior to this point.
+    String workerHarnessContainerImage =
+        options.as(DataflowPipelineWorkerPoolOptions.class)
+        .getWorkerHarnessContainerImage();
+    for (WorkerPool workerPool : newJob.getEnvironment().getWorkerPools()) {
+      workerPool.setWorkerHarnessContainerImage(workerHarnessContainerImage);
+    }
+
     // Requirements about the service.
     Map<String, Object> environmentVersion = new HashMap<>();
     environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_MAJOR_KEY, ENVIRONMENT_MAJOR_VERSION);

From b31d7c6abbf9db4b09191a78f3ca0cb1ca5698e5 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Wed, 17 Feb 2016 13:52:22 -0800
Subject: [PATCH 1466/1541] Use new keyed state API to remove ReduceFn.Factory

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114895604
---
 .../GroupAlsoByWindowViaWindowSetDoFn.java    | 20 +++---
 ...GroupAlsoByWindowsViaOutputBufferDoFn.java | 27 ++++---
 .../dataflow/sdk/util/SystemReduceFn.java     | 71 +++++++------------
 .../dataflow/sdk/util/state/StateTags.java    |  2 +-
 .../dataflow/sdk/util/ReduceFnTester.java     |  5 +-
 5 files changed, 51 insertions(+), 74 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java
index 0e1c8d8e6b93a..46b2e5eeb82fa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java
@@ -36,8 +36,8 @@ public class GroupAlsoByWindowViaWindowSetDoFn<
   public static <K, InputT, OutputT, W extends BoundedWindow,
           RinputsT extends KeyedWorkItem<K, InputT>>
       DoFn<RinputsT, KV<K, OutputT>> create(
-          WindowingStrategy<?, W> strategy, SystemReduceFn.Factory<K, InputT, OutputT, W> factory) {
-    return new GroupAlsoByWindowViaWindowSetDoFn<>(strategy, factory);
+          WindowingStrategy<?, W> strategy, SystemReduceFn<K, InputT, ?, OutputT, W> reduceFn) {
+    return new GroupAlsoByWindowViaWindowSetDoFn<>(strategy, reduceFn);
   }
 
   protected final Aggregator<Long, Long> droppedDueToClosedWindow =
@@ -47,15 +47,15 @@ DoFn<RinputsT, KV<K, OutputT>> create(
       createAggregator(GroupAlsoByWindowsDoFn.DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
 
   private final WindowingStrategy<Object, W> windowingStrategy;
-  private SystemReduceFn.Factory<K, InputT, OutputT, W> reduceFnFactory;
+  private SystemReduceFn<K, InputT, ?, OutputT, W> reduceFn;
 
   private GroupAlsoByWindowViaWindowSetDoFn(
       WindowingStrategy<?, W> windowingStrategy,
-      SystemReduceFn.Factory<K, InputT, OutputT, W> reduceFnFactory) {
+      SystemReduceFn<K, InputT, ?, OutputT, W> reduceFn) {
     @SuppressWarnings("unchecked")
     WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
     this.windowingStrategy = noWildcard;
-    this.reduceFnFactory = reduceFnFactory;
+    this.reduceFn = reduceFn;
   }
 
   @Override
@@ -70,7 +70,7 @@ public void processElement(ProcessContext c) throws Exception {
     @SuppressWarnings("unchecked")
     StateInternals<K> stateInternals = (StateInternals<K>) c.windowingInternals().stateInternals();
 
-    ReduceFnRunner<K, InputT, OutputT, W> runner =
+    ReduceFnRunner<K, InputT, OutputT, W> reduceFnRunner =
         new ReduceFnRunner<>(
             key,
             windowingStrategy,
@@ -78,13 +78,13 @@ public void processElement(ProcessContext c) throws Exception {
             timerInternals,
             c.windowingInternals(),
             droppedDueToClosedWindow,
-            reduceFnFactory.create(key));
+            reduceFn);
 
     for (TimerData timer : element.timersIterable()) {
-      runner.onTimer(timer);
+      reduceFnRunner.onTimer(timer);
     }
-    runner.processElements(element.elementsIterable());
-    runner.persist();
+    reduceFnRunner.processElements(element.elementsIterable());
+    reduceFnRunner.persist();
   }
 
   @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
index 6cd93815612f6..1d1afe38d82e9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
@@ -35,20 +35,19 @@ public class GroupAlsoByWindowsViaOutputBufferDoFn<K, InputT, OutputT, W extends
    extends GroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
 
   private final WindowingStrategy<?, W> strategy;
-  private SystemReduceFn.Factory<K, InputT, OutputT, W> reduceFnFactory;
+  private SystemReduceFn<K, InputT, ?, OutputT, W> reduceFn;
 
   public GroupAlsoByWindowsViaOutputBufferDoFn(
       WindowingStrategy<?, W> windowingStrategy,
-      SystemReduceFn.Factory<K, InputT, OutputT, W> reduceFnFactory) {
+      SystemReduceFn<K, InputT, ?, OutputT, W> reduceFn) {
     this.strategy = windowingStrategy;
-    this.reduceFnFactory = reduceFnFactory;
+    this.reduceFn = reduceFn;
   }
 
   @Override
   public void processElement(
-      DoFn<KV<K, Iterable<WindowedValue<InputT>>>,
-      KV<K, OutputT>>.ProcessContext c)
-      throws Exception {
+      DoFn<KV<K, Iterable<WindowedValue<InputT>>>, KV<K, OutputT>>.ProcessContext c)
+          throws Exception {
     K key = c.element().getKey();
     // Used with Batch, we know that all the data is available for this key. We can't use the
     // timer manager from the context because it doesn't exist. So we create one and emulate the
@@ -60,7 +59,7 @@ public void processElement(
     @SuppressWarnings("unchecked")
     StateInternals<K> stateInternals = (StateInternals<K>) c.windowingInternals().stateInternals();
 
-    ReduceFnRunner<K, InputT, OutputT, W> runner =
+    ReduceFnRunner<K, InputT, OutputT, W> reduceFnRunner =
         new ReduceFnRunner<K, InputT, OutputT, W>(
             key,
             strategy,
@@ -68,31 +67,31 @@ public void processElement(
             timerInternals,
             c.windowingInternals(),
             droppedDueToClosedWindow,
-            reduceFnFactory.create(key));
+            reduceFn);
 
     Iterable<List<WindowedValue<InputT>>> chunks =
         Iterables.partition(c.element().getValue(), 1000);
     for (Iterable<WindowedValue<InputT>> chunk : chunks) {
       // Process the chunk of elements.
-      runner.processElements(chunk);
+      reduceFnRunner.processElements(chunk);
 
       // Then, since elements are sorted by their timestamp, advance the input watermark
       // to the first element, and fire any timers that may have been scheduled.
-      timerInternals.advanceInputWatermark(runner, chunk.iterator().next().getTimestamp());
+      timerInternals.advanceInputWatermark(reduceFnRunner, chunk.iterator().next().getTimestamp());
 
       // Fire any processing timers that need to fire
-      timerInternals.advanceProcessingTime(runner, Instant.now());
+      timerInternals.advanceProcessingTime(reduceFnRunner, Instant.now());
 
       // Leave the output watermark undefined. Since there's no late data in batch mode
       // there's really no need to track it as we do for streaming.
     }
 
     // Finish any pending windows by advancing the input watermark to infinity.
-    timerInternals.advanceInputWatermark(runner, BoundedWindow.TIMESTAMP_MAX_VALUE);
+    timerInternals.advanceInputWatermark(reduceFnRunner, BoundedWindow.TIMESTAMP_MAX_VALUE);
 
     // Finally, advance the processing time to infinity to fire any timers.
-    timerInternals.advanceProcessingTime(runner, BoundedWindow.TIMESTAMP_MAX_VALUE);
+    timerInternals.advanceProcessingTime(reduceFnRunner, BoundedWindow.TIMESTAMP_MAX_VALUE);
 
-    runner.persist();
+    reduceFnRunner.persist();
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
index 6068f913db6e2..044847570f1b3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
@@ -19,6 +19,7 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
 import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.RequiresContextInternal;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
@@ -32,8 +33,6 @@
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 
-import java.io.Serializable;
-
 /**
  * {@link ReduceFn} implementing the default reduction behaviors of {@link GroupByKey}.
  *
@@ -46,40 +45,23 @@ public abstract class SystemReduceFn<K, InputT, AccumT, OutputT, W extends Bound
     extends ReduceFn<K, InputT, OutputT, W> {
   private static final String BUFFER_NAME = "buf";
 
-  /**
-   * Factory that produces {@link SystemReduceFn} instances for specific keys.
-   *
-   * @param <K> The type of key being processed.
-   * @param <InputT> The type of values associated with the key.
-   * @param <OutputT> The output type that will be produced for each key.
-   * @param <W> The type of windows this operates on.
-   */
-  public interface Factory<K, InputT, OutputT, W extends BoundedWindow> extends Serializable {
-    ReduceFn<K, InputT, OutputT, W> create(K key);
-  }
-
   /**
    * Create a factory that produces {@link SystemReduceFn} instances that that buffer all of the
    * input values in persistent state and produces an {@code Iterable<T>}.
    */
-  public static <K, T, W extends BoundedWindow> Factory<K, T, Iterable<T>, W> buffering(
-      final Coder<T> inputCoder) {
+  public static <K, T, W extends BoundedWindow> SystemReduceFn<K, T, Iterable<T>, Iterable<T>, W>
+      buffering(final Coder<T> inputCoder) {
     final StateTag<Object, BagState<T>> bufferTag =
         StateTags.makeSystemTagInternal(StateTags.bag(BUFFER_NAME, inputCoder));
-    return new Factory<K, T, Iterable<T>, W>() {
+    return new SystemReduceFn<K, T, Iterable<T>, Iterable<T>, W>(bufferTag) {
       @Override
-      public ReduceFn<K, T, Iterable<T>, W> create(K key) {
-        return new SystemReduceFn<K, T, Iterable<T>, Iterable<T>, W>(bufferTag) {
-          @Override
-          public void prefetchOnMerge(MergingStateContext<K, W> state) throws Exception {
-            StateMerging.prefetchBags(state, bufferTag);
-          }
+      public void prefetchOnMerge(MergingStateContext<K, W> state) throws Exception {
+        StateMerging.prefetchBags(state, bufferTag);
+      }
 
-          @Override
-          public void onMerge(OnMergeContext c) throws Exception {
-            StateMerging.mergeBags(c.state(), bufferTag);
-          }
-        };
+      @Override
+      public void onMerge(OnMergeContext c) throws Exception {
+        StateMerging.mergeBags(c.state(), bufferTag);
       }
     };
   }
@@ -88,38 +70,35 @@ public void onMerge(OnMergeContext c) throws Exception {
    * Create a factory that produces {@link SystemReduceFn} instances that combine all of the input
    * values using a {@link CombineFn}.
    */
-  public static <K, InputT, AccumT, OutputT, W extends BoundedWindow> Factory<K, InputT, OutputT, W>
+  public static <K, InputT, AccumT, OutputT, W extends BoundedWindow> SystemReduceFn<K, InputT,
+      AccumT, OutputT, W>
       combining(
           final Coder<K> keyCoder, final AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
     checkArgument(
         !(combineFn.getFn() instanceof RequiresContextInternal),
         "Combiner lifting is not supported for combine functions with contexts: %s",
         combineFn.getFn().getClass().getName());
-    return new Factory<K, InputT, OutputT, W>() {
-      @Override
-      public ReduceFn<K, InputT, OutputT, W> create(K key) {
-        final StateTag<Object, CombiningValueStateInternal<InputT, AccumT, OutputT>> bufferTag =
-            StateTags.makeSystemTagInternal(StateTags.<InputT, AccumT, OutputT>combiningValue(
+    final StateTag<K, CombiningValueStateInternal<InputT, AccumT, OutputT>> bufferTag =
+        StateTags.makeSystemTagInternal(
+            StateTags.<K, InputT, AccumT, OutputT>keyedCombiningValue(
                 BUFFER_NAME, combineFn.getAccumulatorCoder(),
-                (CombineFn<InputT, AccumT, OutputT>) combineFn.getFn().forKey(key, keyCoder)));
-        return new SystemReduceFn<K, InputT, AccumT, OutputT, W>(bufferTag) {
-          @Override
-          public void prefetchOnMerge(MergingStateContext<K, W> state) throws Exception {
-            StateMerging.prefetchCombiningValues(state, bufferTag);
-          }
+                (KeyedCombineFn<K, InputT, AccumT, OutputT>) combineFn.getFn()));
+    return new SystemReduceFn<K, InputT, AccumT, OutputT, W>(bufferTag) {
+      @Override
+      public void prefetchOnMerge(MergingStateContext<K, W> state) throws Exception {
+        StateMerging.prefetchCombiningValues(state, bufferTag);
+      }
 
-          @Override
-          public void onMerge(OnMergeContext c) throws Exception {
-            StateMerging.mergeCombiningValues(c.state(), bufferTag);
-          }
-        };
+      @Override
+      public void onMerge(OnMergeContext c) throws Exception {
+        StateMerging.mergeCombiningValues(c.state(), bufferTag);
       }
     };
   }
 
   private StateTag<? super K, ? extends CombiningValueState<InputT, OutputT>> bufferTag;
 
-  public SystemReduceFn(
+  private SystemReduceFn(
       StateTag<? super K, ? extends CombiningValueState<InputT, OutputT>> bufferTag) {
     this.bufferTag = bufferTag;
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
index caa1ab643fe68..510cd79d61827 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
@@ -162,7 +162,7 @@ public static <K, StateT extends State> StateTag<K, StateT> makeSystemTagInterna
   public static <K, InputT, AccumT, OutputT> StateTag<Object, BagState<AccumT>>
       convertToBagTagInternal(
           StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>> combiningTag) {
-    if (!(combiningTag instanceof CombiningValueStateTag)) {
+    if (!(combiningTag instanceof KeyedCombiningValueStateTag)) {
       throw new IllegalArgumentException("Unexpected StateTag " + combiningTag);
     }
     // Checked above; conversion to a bag tag depends on the provided tag being one of those
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
index 0cfb40adc75ee..a91675cd89fc1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
@@ -117,7 +117,7 @@ public class ReduceFnTester<InputT, OutputT, W extends BoundedWindow> {
       nonCombining(WindowingStrategy<?, W> windowingStrategy) throws Exception {
     return new ReduceFnTester<Integer, Iterable<Integer>, W>(
         windowingStrategy,
-        SystemReduceFn.<String, Integer, W>buffering(VarIntCoder.of()).create(KEY),
+        SystemReduceFn.<String, Integer, W>buffering(VarIntCoder.of()),
         IterableCoder.of(VarIntCoder.of()));
   }
 
@@ -146,8 +146,7 @@ public class ReduceFnTester<InputT, OutputT, W extends BoundedWindow> {
 
     return new ReduceFnTester<Integer, OutputT, W>(
         strategy,
-        SystemReduceFn.<String, Integer, AccumT, OutputT, W>combining(StringUtf8Coder.of(), fn)
-            .create(KEY),
+        SystemReduceFn.<String, Integer, AccumT, OutputT, W>combining(StringUtf8Coder.of(), fn),
         outputCoder);
   }
 

From cc0db49c24165e42f0d4f00919c1322b1f7b5248 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 17 Feb 2016 14:03:29 -0800
Subject: [PATCH 1467/1541] Implement CopyOnAccessInMemoryStateInternals

This is a view of State Internals that does not write changes back to
the StateInternals it reads from as underlying storage.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114896853
---
 .../CopyOnAccessInMemoryStateInternals.java   | 327 +++++++++++++++++
 .../util/state/InMemoryStateInternals.java    | 146 +++++---
 .../dataflow/sdk/util/state/StateTable.java   |   5 +
 ...opyOnAccessInMemoryStateInternalsTest.java | 343 ++++++++++++++++++
 4 files changed, 774 insertions(+), 47 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternalsTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java
new file mode 100644
index 0000000000000..29ce6519cb015
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
+import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals.InMemoryState;
+import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
+import com.google.common.base.Optional;
+
+import javax.annotation.Nullable;
+
+/**
+ * {@link StateInternals} built on top of an underlying {@link StateTable} that contains instances
+ * of {@link InMemoryState}. Whenever state that exists in the underlying {@link StateTable} is
+ * accessed, an independent copy will be created within this table.
+ */
+public class CopyOnAccessInMemoryStateInternals<K> implements StateInternals<K> {
+  private final K key;
+  private final CopyOnAccessInMemoryStateTable<K> table;
+
+  /**
+   * Creates a new {@link CopyOnAccessInMemoryStateInternals} with the underlying (possibly null)
+   * StateInternals.
+   */
+  public static <K> CopyOnAccessInMemoryStateInternals<K> withUnderlying(K key,
+@Nullable CopyOnAccessInMemoryStateInternals<K> underlying) {
+    return new CopyOnAccessInMemoryStateInternals<K>(key, underlying);
+  }
+
+  private CopyOnAccessInMemoryStateInternals(
+      K key, CopyOnAccessInMemoryStateInternals<K> underlying) {
+    this.key = key;
+    table = new CopyOnAccessInMemoryStateTable<>(key, underlying == null ? null : underlying.table);
+  }
+
+  /**
+   * Ensures this {@link CopyOnAccessInMemoryStateInternals} is complete. Other copies of state for
+   * the same Step and Key may be discarded after invoking this method.
+   *
+   * <p>For each {@link StateNamespace}, for each {@link StateTag address} in that namespace that
+   * has not been bound in this {@link CopyOnAccessInMemoryStateInternals}, put a reference to that
+   * state within this {@link StateInternals}.
+   *
+   * @return this table
+   */
+  public CopyOnAccessInMemoryStateInternals<K> commit() {
+    table.commit();
+    return this;
+  }
+
+  @Override
+  public <T extends State> T state(StateNamespace namespace, StateTag<? super K, T> address) {
+    return table.get(namespace, address);
+  }
+
+  @Override
+  public K getKey() {
+    return key;
+  }
+
+  /**
+   * A {@link StateTable} that, when a value is retrieved with
+   * {@link StateTable#get(StateNamespace, StateTag)}, first attempts to obtain a copy of existing
+   * {@link State} from an underlying {@link StateTable}.
+   */
+  private static class CopyOnAccessInMemoryStateTable<K> extends StateTable<K> {
+    private final K key;
+    private Optional<StateTable<K>> underlying;
+
+    /**
+     * The StateBinderFactory currently in use by this {@link CopyOnAccessInMemoryStateTable}.
+     *
+     * <p>There are three {@link StateBinderFactory} implementations used by the {@link
+     * CopyOnAccessInMemoryStateTable}.
+     * <ul>
+     *   <li>The default {@link StateBinderFactory} is a {@link CopyOnBindBinderFactory}, allowing
+     *       the table to copy any existing {@link State} values to this {@link StateTable} from the
+     *       underlying table when accessed, at which point mutations will not be visible to the
+     *       underlying table - effectively a "Copy by Value" binder.</li>
+     *   <li>During the execution of the {@link #commit()} method, this is a
+     *       {@link ReadThroughBinderFactory}, which copies the references to the existing
+     *       {@link State} objects to this {@link StateTable}.</li>
+     *   <li>After the execution of the {@link #commit()} method, this is an instance of
+     *       {@link InMemoryStateBinderFactory}, which constructs new instances of state when a
+     *       {@link StateTag} is bound.</li>
+     * </ul>
+     */
+    private StateBinderFactory<K> binderFactory;
+
+    public CopyOnAccessInMemoryStateTable(K key, StateTable<K> underlying) {
+      this.key = key;
+      this.underlying = Optional.fromNullable(underlying);
+      binderFactory = new CopyOnBindBinderFactory<>(key, this.underlying);
+    }
+
+    /**
+     * Copies all values in the underlying table to this table, then discards the underlying table.
+     *
+     * <p>If there is an underlying table, this replaces the existing
+     * {@link CopyOnBindBinderFactory} with a {@link ReadThroughBinderFactory}, then reads all of
+     * the values in the existing table, binding the state values to this table. The old StateTable
+     * should be discarded after the call to {@link #commit()}.
+     *
+     * <p>After copying all of the existing values, replace the binder factory with an instance of
+     * {@link InMemoryStateBinderFactory} to construct new values, since all existing values
+     * are bound in this {@link StateTable table} and this table represents the canonical state.
+     */
+    private void commit() {
+      if (underlying.isPresent()) {
+        ReadThroughBinderFactory<K> readThroughBinder =
+            new ReadThroughBinderFactory<>(underlying.get());
+        binderFactory = readThroughBinder;
+        readThroughBinder.readThrough(this);
+      }
+      binderFactory = new InMemoryStateBinderFactory<>(key);
+      underlying = Optional.absent();
+    }
+
+    @Override
+    protected StateBinder<K> binderForNamespace(final StateNamespace namespace) {
+      StateBinder<K> stateBinder = binderFactory.forNamespace(namespace);
+      return stateBinder;
+    }
+
+    private static interface StateBinderFactory<K> {
+      StateBinder<K> forNamespace(StateNamespace namespace);
+    }
+
+    /**
+     * {@link StateBinderFactory} that creates a copy of any existing state when the state is bound.
+     */
+    private static class CopyOnBindBinderFactory<K> implements StateBinderFactory<K> {
+      private final K key;
+      private final Optional<StateTable<K>> underlying;
+
+      public CopyOnBindBinderFactory(K key, Optional<StateTable<K>> underlying) {
+        this.key = key;
+        this.underlying = underlying;
+      }
+
+      private boolean containedInUnderlying(StateNamespace namespace, StateTag<? super K, ?> tag) {
+        return underlying.isPresent() && underlying.get().isNamespaceInUse(namespace)
+            && underlying.get().getTagsInUse(namespace).containsKey(tag);
+      }
+
+      @Override
+      public StateBinder<K> forNamespace(final StateNamespace namespace) {
+        return new StateBinder<K>() {
+          @Override
+          public <W extends BoundedWindow> WatermarkStateInternal<W> bindWatermark(
+              StateTag<? super K, WatermarkStateInternal<W>> address,
+              OutputTimeFn<? super W> outputTimeFn) {
+            if (containedInUnderlying(namespace, address)) {
+              @SuppressWarnings("unchecked")
+              InMemoryState<? extends WatermarkStateInternal<W>> existingState =
+                  (InMemoryStateInternals.InMemoryState<? extends WatermarkStateInternal<W>>)
+                  underlying.get().get(namespace, address);
+              return existingState.copy();
+            } else {
+              return new InMemoryStateInternals.WatermarkStateInternalImplementation<>(
+                  outputTimeFn);
+            }
+          }
+
+          @Override
+          public <T> ValueState<T> bindValue(
+              StateTag<? super K, ValueState<T>> address, Coder<T> coder) {
+            if (containedInUnderlying(namespace, address)) {
+              @SuppressWarnings("unchecked")
+              InMemoryState<? extends ValueState<T>> existingState =
+                  (InMemoryStateInternals.InMemoryState<? extends ValueState<T>>)
+                  underlying.get().get(namespace, address);
+              return existingState.copy();
+            } else {
+              return new InMemoryStateInternals.InMemoryValue<>();
+            }
+          }
+
+          @Override
+          public <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
+              bindCombiningValue(
+                  StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
+                  Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
+            if (containedInUnderlying(namespace, address)) {
+              @SuppressWarnings("unchecked")
+              InMemoryState<? extends CombiningValueStateInternal<InputT, AccumT, OutputT>>
+                  existingState = (
+                      InMemoryStateInternals
+                          .InMemoryState<? extends CombiningValueStateInternal<InputT, AccumT,
+                          OutputT>>) underlying.get().get(namespace, address);
+              return existingState.copy();
+            } else {
+              return new InMemoryStateInternals.InMemoryCombiningValue<>(
+                  key, combineFn.asKeyedFn());
+            }
+          }
+
+          @Override
+          public <T> BagState<T> bindBag(
+              StateTag<? super K, BagState<T>> address, Coder<T> elemCoder) {
+            if (containedInUnderlying(namespace, address)) {
+              @SuppressWarnings("unchecked")
+              InMemoryState<? extends BagState<T>> existingState =
+                  (InMemoryStateInternals.InMemoryState<? extends BagState<T>>)
+                  underlying.get().get(namespace, address);
+              return existingState.copy();
+            } else {
+              return new InMemoryStateInternals.InMemoryBag<>();
+            }
+          }
+
+          @Override
+          public <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
+              bindKeyedCombiningValue(
+                  StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
+                  Coder<AccumT> accumCoder,
+                  KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
+            if (containedInUnderlying(namespace, address)) {
+              @SuppressWarnings("unchecked")
+              InMemoryState<? extends CombiningValueStateInternal<InputT, AccumT, OutputT>>
+                  existingState = (
+                      InMemoryStateInternals
+                          .InMemoryState<? extends CombiningValueStateInternal<InputT, AccumT,
+                          OutputT>>) underlying.get().get(namespace, address);
+              return existingState.copy();
+            } else {
+              return new InMemoryStateInternals.InMemoryCombiningValue<>(key, combineFn);
+            }
+          }
+        };
+      }
+    }
+
+    /**
+     * {@link StateBinderFactory} that reads directly from the underlying table. Used during calls
+     * to {@link CopyOnAccessInMemoryStateTable#commit()} to read all values from the underlying
+     * table.
+     */
+    private static class ReadThroughBinderFactory<K> implements StateBinderFactory<K> {
+      private final StateTable<K> underlying;
+
+      public ReadThroughBinderFactory(StateTable<K> underlying) {
+        this.underlying = underlying;
+      }
+
+      public void readThrough(StateTable<K> readTo) {
+        for (StateNamespace namespace : underlying.getNamespacesInUse()) {
+          for (StateTag<? super K, ?> address : underlying.getTagsInUse(namespace).keySet()) {
+            readTo.get(namespace, address);
+          }
+        }
+      }
+
+      @Override
+      public StateBinder<K> forNamespace(final StateNamespace namespace) {
+        return new StateBinder<K>() {
+          @Override
+          public <W extends BoundedWindow> WatermarkStateInternal<W> bindWatermark(
+              StateTag<? super K, WatermarkStateInternal<W>> address,
+              OutputTimeFn<? super W> outputTimeFn) {
+            return underlying.get(namespace, address);
+          }
+
+          @Override
+          public <T> ValueState<T> bindValue(
+              StateTag<? super K, ValueState<T>> address, Coder<T> coder) {
+            return underlying.get(namespace, address);
+          }
+
+          @Override
+          public <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
+              bindCombiningValue(
+                  StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
+                  Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
+            return underlying.get(namespace, address);
+          }
+
+          @Override
+          public <T> BagState<T> bindBag(
+              StateTag<? super K, BagState<T>> address, Coder<T> elemCoder) {
+            return underlying.get(namespace, address);
+          }
+
+          @Override
+          public <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
+              bindKeyedCombiningValue(
+                  StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
+                  Coder<AccumT> accumCoder,
+                  KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
+            return underlying.get(namespace, address);
+          }
+        };
+      }
+    }
+
+    private static class InMemoryStateBinderFactory<K> implements StateBinderFactory<K> {
+      private final InMemoryStateInternals.InMemoryStateBinder<K> inMemoryStateBinder;
+
+      public InMemoryStateBinderFactory(K key) {
+        inMemoryStateBinder = new InMemoryStateInternals.InMemoryStateBinder<>(key);
+      }
+
+      @Override
+      public StateBinder<K> forNamespace(StateNamespace namespace) {
+        return inMemoryStateBinder;
+      }
+    }
+  }
+
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
index 7f42f6bcfc650..6b98101c233cf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
@@ -55,51 +55,15 @@ public K getKey() {
     return key;
   }
 
-  private interface InMemoryState {
+  interface InMemoryState<T extends InMemoryState<T>> {
     boolean isEmptyForTesting();
+    T copy();
   }
 
   protected final StateTable<K> inMemoryState = new StateTable<K>() {
     @Override
     protected StateBinder<K> binderForNamespace(final StateNamespace namespace) {
-      return new StateBinder<K>() {
-        @Override
-        public <T> ValueState<T> bindValue(
-            StateTag<? super K, ValueState<T>> address, Coder<T> coder) {
-          return new InMemoryValue<T>();
-        }
-
-        @Override
-        public <T> BagState<T> bindBag(
-            final StateTag<? super K, BagState<T>> address, Coder<T> elemCoder) {
-          return new InMemoryBag<T>();
-        }
-
-        @Override
-        public <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
-            bindCombiningValue(
-                StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>>
-                address, Coder<AccumT> accumCoder,
-                final CombineFn<InputT, AccumT, OutputT> combineFn) {
-          return new InMemoryCombiningValue<InputT, AccumT, OutputT>(combineFn.<K>asKeyedFn());
-        }
-
-        @Override
-        public <W extends BoundedWindow> WatermarkStateInternal<W> bindWatermark(
-            StateTag<? super K, WatermarkStateInternal<W>> address,
-            OutputTimeFn<? super W> outputTimeFn) {
-          return new WatermarkStateInternalImplementation<W>(outputTimeFn);
-        }
-
-        @Override
-        public <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
-            bindKeyedCombiningValue(
-                StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>>
-                address, Coder<AccumT> accumCoder,
-                KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
-          return new InMemoryCombiningValue<InputT, AccumT, OutputT>(combineFn);
-        }
-      };
+      return new InMemoryStateBinder<K>(key);
     }
   };
 
@@ -112,7 +76,7 @@ public void clear() {
    * that the state has been properly cleaned up.
    */
   protected boolean isEmptyForTesting(State state) {
-    return ((InMemoryState) state).isEmptyForTesting();
+    return ((InMemoryState<?>) state).isEmptyForTesting();
   }
 
   @Override
@@ -120,7 +84,55 @@ public <T extends State> T state(StateNamespace namespace, StateTag<? super K, T
     return inMemoryState.get(namespace, address);
   }
 
-  private final class InMemoryValue<T> implements ValueState<T>, InMemoryState {
+  /**
+   * A {@link StateBinder} that returns In Memory {@link State} objects.
+   */
+  static class InMemoryStateBinder<K> implements StateBinder<K> {
+    private final K key;
+
+    InMemoryStateBinder(K key) {
+      this.key = key;
+    }
+
+    @Override
+    public <T> ValueState<T> bindValue(
+        StateTag<? super K, ValueState<T>> address, Coder<T> coder) {
+      return new InMemoryValue<T>();
+    }
+
+    @Override
+    public <T> BagState<T> bindBag(
+        final StateTag<? super K, BagState<T>> address, Coder<T> elemCoder) {
+      return new InMemoryBag<T>();
+    }
+
+    @Override
+    public <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
+        bindCombiningValue(
+            StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>>
+            address, Coder<AccumT> accumCoder,
+            final CombineFn<InputT, AccumT, OutputT> combineFn) {
+      return new InMemoryCombiningValue<K, InputT, AccumT, OutputT>(key, combineFn.<K>asKeyedFn());
+    }
+
+    @Override
+    public <W extends BoundedWindow> WatermarkStateInternal<W> bindWatermark(
+        StateTag<? super K, WatermarkStateInternal<W>> address,
+        OutputTimeFn<? super W> outputTimeFn) {
+      return new WatermarkStateInternalImplementation<W>(outputTimeFn);
+    }
+
+    @Override
+    public <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
+        bindKeyedCombiningValue(
+            StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>>
+            address, Coder<AccumT> accumCoder,
+            KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
+      return new InMemoryCombiningValue<K, InputT, AccumT, OutputT>(key, combineFn);
+    }
+  }
+
+  static final class InMemoryValue<T> implements ValueState<T>, InMemoryState<InMemoryValue<T>> {
     private boolean isCleared = true;
     private T value = null;
 
@@ -148,14 +160,24 @@ public void set(T input) {
       this.value = input;
     }
 
+    @Override
+    public InMemoryValue<T> copy() {
+      InMemoryValue<T> that = new InMemoryValue<>();
+      if (!this.isCleared) {
+        that.isCleared = this.isCleared;
+        that.value = this.value;
+      }
+      return that;
+    }
+
     @Override
     public boolean isEmptyForTesting() {
       return isCleared;
     }
   }
 
-  private final class WatermarkStateInternalImplementation<W extends BoundedWindow>
-      implements WatermarkStateInternal<W>, InMemoryState {
+  static final class WatermarkStateInternalImplementation<W extends BoundedWindow>
+      implements WatermarkStateInternal<W>, InMemoryState<WatermarkStateInternalImplementation<W>> {
 
     private final OutputTimeFn<? super W> outputTimeFn;
 
@@ -213,15 +235,27 @@ public OutputTimeFn<? super W> getOutputTimeFn() {
     public String toString() {
       return Objects.toString(combinedHold);
     }
+
+    @Override
+    public WatermarkStateInternalImplementation<W> copy() {
+      WatermarkStateInternalImplementation<W> that =
+          new WatermarkStateInternalImplementation<>(outputTimeFn);
+      that.combinedHold = this.combinedHold;
+      return that;
+    }
   }
 
-  private final class InMemoryCombiningValue<InputT, AccumT, OutputT>
-      implements CombiningValueStateInternal<InputT, AccumT, OutputT>, InMemoryState {
+  static final class InMemoryCombiningValue<K, InputT, AccumT, OutputT>
+      implements CombiningValueStateInternal<InputT, AccumT, OutputT>,
+                 InMemoryState<InMemoryCombiningValue<K, InputT, AccumT, OutputT>> {
+    private final K key;
     private boolean isCleared = true;
     private final KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn;
     private AccumT accum;
 
-    private InMemoryCombiningValue(KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
+    InMemoryCombiningValue(
+        K key, KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
+      this.key = key;
       this.combineFn = combineFn;
       accum = combineFn.createAccumulator(key);
     }
@@ -285,9 +319,20 @@ public AccumT mergeAccumulators(Iterable<AccumT> accumulators) {
     public boolean isEmptyForTesting() {
       return isCleared;
     }
+
+    @Override
+    public InMemoryCombiningValue<K, InputT, AccumT, OutputT> copy() {
+      InMemoryCombiningValue<K, InputT, AccumT, OutputT> that =
+          new InMemoryCombiningValue<>(key, combineFn);
+      if (!this.isCleared) {
+        that.isCleared = this.isCleared;
+        that.addAccum(accum);
+      }
+      return that;
+    }
   }
 
-  private static final class InMemoryBag<T> implements BagState<T>, InMemoryState {
+  static final class InMemoryBag<T> implements BagState<T>, InMemoryState<InMemoryBag<T>> {
     private List<T> contents = new ArrayList<>();
 
     @Override
@@ -331,5 +376,12 @@ public Boolean read() {
         }
       };
     }
+
+    @Override
+    public InMemoryBag<T> copy() {
+      InMemoryBag<T> that = new InMemoryBag<>();
+      that.contents.addAll(this.contents);
+      return that;
+    }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java
index 5529b9fb64e76..0f1209ac7854a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java
@@ -38,6 +38,11 @@ public abstract class StateTable<K> {
         }
       });
 
+  /**
+   * Gets the {@link State} in the specified {@link StateNamespace} with the specified {@link
+   * StateTag}, binding it using the {@link #binderForNamespace(StateNamespace)} if it is not
+   * already present in this {@link StateTable}.
+   */
   public <StateT extends State> StateT get(
       StateNamespace namespace, StateTag<? super K, StateT> tag) {
     State storage = stateTable.get(namespace, tag);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternalsTest.java
new file mode 100644
index 0000000000000..03c40140e3bb7
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternalsTest.java
@@ -0,0 +1,343 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.emptyIterable;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.nullValue;
+import static org.hamcrest.Matchers.theInstance;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
+
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link CopyOnAccessInMemoryStateInternals}.
+ */
+@RunWith(JUnit4.class)
+public class CopyOnAccessInMemoryStateInternalsTest {
+  private String key = "foo";
+  @Test
+  public void testGetWithEmpty() {
+    CopyOnAccessInMemoryStateInternals<String> internals =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
+    StateNamespace namespace = new StateNamespaceForTest("foo");
+    StateTag<Object, BagState<String>> bagTag = StateTags.bag("foo", StringUtf8Coder.of());
+    BagState<String> stringBag = internals.state(namespace, bagTag);
+    assertThat(stringBag.get().read(), emptyIterable());
+
+    stringBag.add("bar");
+    stringBag.add("baz");
+    assertThat(stringBag.get().read(), containsInAnyOrder("baz", "bar"));
+
+    BagState<String> reReadStringBag = internals.state(namespace, bagTag);
+    assertThat(reReadStringBag.get().read(), containsInAnyOrder("baz", "bar"));
+  }
+
+  @Test
+  public void testGetWithAbsentInUnderlying() {
+    CopyOnAccessInMemoryStateInternals<String> underlying =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
+    CopyOnAccessInMemoryStateInternals<String> internals =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
+
+    StateNamespace namespace = new StateNamespaceForTest("foo");
+    StateTag<Object, BagState<String>> bagTag = StateTags.bag("foo", StringUtf8Coder.of());
+    BagState<String> stringBag = internals.state(namespace, bagTag);
+    assertThat(stringBag.get().read(), emptyIterable());
+
+    stringBag.add("bar");
+    stringBag.add("baz");
+    assertThat(stringBag.get().read(), containsInAnyOrder("baz", "bar"));
+
+    BagState<String> reReadVoidBag = internals.state(namespace, bagTag);
+    assertThat(reReadVoidBag.get().read(), containsInAnyOrder("baz", "bar"));
+
+    BagState<String> underlyingState = underlying.state(namespace, bagTag);
+    assertThat(underlyingState.get().read(), emptyIterable());
+  }
+
+  /**
+   * Tests that retrieving state with an underlying StateInternals with an existing value returns
+   * a value that initially has equal value to the provided state but can be modified without
+   * modifying the existing state.
+   */
+  @Test
+  public void testGetWithPresentInUnderlying() {
+    CopyOnAccessInMemoryStateInternals<String> underlying =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
+
+    StateNamespace namespace = new StateNamespaceForTest("foo");
+    StateTag<Object, ValueState<String>> valueTag = StateTags.value("foo", StringUtf8Coder.of());
+    ValueState<String> underlyingValue = underlying.state(namespace, valueTag);
+    assertThat(underlyingValue.get().read(), nullValue(String.class));
+
+    underlyingValue.set("bar");
+    assertThat(underlyingValue.get().read(), equalTo("bar"));
+
+    CopyOnAccessInMemoryStateInternals<String> internals =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
+    ValueState<String> copyOnAccessState = internals.state(namespace, valueTag);
+    assertThat(copyOnAccessState.get().read(), equalTo("bar"));
+
+    copyOnAccessState.set("baz");
+    assertThat(copyOnAccessState.get().read(), equalTo("baz"));
+    assertThat(underlyingValue.get().read(), equalTo("bar"));
+
+    ValueState<String> reReadUnderlyingValue = underlying.state(namespace, valueTag);
+    assertThat(underlyingValue.get().read(), equalTo(reReadUnderlyingValue.get().read()));
+  }
+
+  @Test
+  public void testBagStateWithUnderlying() {
+    CopyOnAccessInMemoryStateInternals<String> underlying =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
+
+    StateNamespace namespace = new StateNamespaceForTest("foo");
+    StateTag<Object, BagState<Integer>> valueTag = StateTags.bag("foo", VarIntCoder.of());
+    BagState<Integer> underlyingValue = underlying.state(namespace, valueTag);
+    assertThat(underlyingValue.get().read(), emptyIterable());
+
+    underlyingValue.add(1);
+    assertThat(underlyingValue.get().read(), containsInAnyOrder(1));
+
+    CopyOnAccessInMemoryStateInternals<String> internals =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
+    BagState<Integer> copyOnAccessState = internals.state(namespace, valueTag);
+    assertThat(copyOnAccessState.get().read(), containsInAnyOrder(1));
+
+    copyOnAccessState.add(4);
+    assertThat(copyOnAccessState.get().read(), containsInAnyOrder(4, 1));
+    assertThat(underlyingValue.get().read(), containsInAnyOrder(1));
+
+    BagState<Integer> reReadUnderlyingValue = underlying.state(namespace, valueTag);
+    assertThat(underlyingValue.get().read(), equalTo(reReadUnderlyingValue.get().read()));
+  }
+
+  @Test
+  public void testCombiningValueStateInternalWithUnderlying() throws CannotProvideCoderException {
+    CopyOnAccessInMemoryStateInternals<String> underlying =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
+    CombineFn<Long, long[], Long> sumLongFn = new Sum.SumLongFn();
+
+    StateNamespace namespace = new StateNamespaceForTest("foo");
+    CoderRegistry reg = TestPipeline.create().getCoderRegistry();
+    StateTag<Object, CombiningValueStateInternal<Long, long[], Long>> stateTag =
+        StateTags.combiningValue("summer",
+            sumLongFn.getAccumulatorCoder(reg, reg.getDefaultCoder(Long.class)), sumLongFn);
+    CombiningValueState<Long, Long> underlyingValue = underlying.state(namespace, stateTag);
+    assertThat(underlyingValue.get().read(), equalTo(0L));
+
+    underlyingValue.add(1L);
+    assertThat(underlyingValue.get().read(), equalTo(1L));
+
+    CopyOnAccessInMemoryStateInternals<String> internals =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
+    CombiningValueState<Long, Long> copyOnAccessState = internals.state(namespace, stateTag);
+    assertThat(copyOnAccessState.get().read(), equalTo(1L));
+
+    copyOnAccessState.add(4L);
+    assertThat(copyOnAccessState.get().read(), equalTo(5L));
+    assertThat(underlyingValue.get().read(), equalTo(1L));
+
+    CombiningValueState<Long, Long> reReadUnderlyingValue = underlying.state(namespace, stateTag);
+    assertThat(underlyingValue.get().read(), equalTo(reReadUnderlyingValue.get().read()));
+  }
+
+  @Test
+  public void testKeyedCombiningValueStateInternalWithUnderlying() throws Exception {
+    CopyOnAccessInMemoryStateInternals<String> underlying =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
+    KeyedCombineFn<String, Long, long[], Long> sumLongFn = new Sum.SumLongFn().asKeyedFn();
+
+    StateNamespace namespace = new StateNamespaceForTest("foo");
+    CoderRegistry reg = TestPipeline.create().getCoderRegistry();
+    StateTag<String, CombiningValueStateInternal<Long, long[], Long>> stateTag =
+        StateTags.keyedCombiningValue(
+            "summer",
+            sumLongFn.getAccumulatorCoder(
+                reg, StringUtf8Coder.of(), reg.getDefaultCoder(Long.class)),
+            sumLongFn);
+    CombiningValueState<Long, Long> underlyingValue = underlying.state(namespace, stateTag);
+    assertThat(underlyingValue.get().read(), equalTo(0L));
+
+    underlyingValue.add(1L);
+    assertThat(underlyingValue.get().read(), equalTo(1L));
+
+    CopyOnAccessInMemoryStateInternals<String> internals =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
+    CombiningValueState<Long, Long> copyOnAccessState = internals.state(namespace, stateTag);
+    assertThat(copyOnAccessState.get().read(), equalTo(1L));
+
+    copyOnAccessState.add(4L);
+    assertThat(copyOnAccessState.get().read(), equalTo(5L));
+    assertThat(underlyingValue.get().read(), equalTo(1L));
+
+    CombiningValueState<Long, Long> reReadUnderlyingValue = underlying.state(namespace, stateTag);
+    assertThat(underlyingValue.get().read(), equalTo(reReadUnderlyingValue.get().read()));
+  }
+
+  @Test
+  public void testWatermarkStateInternalWithUnderlying() {
+    CopyOnAccessInMemoryStateInternals<String> underlying =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
+
+    @SuppressWarnings("unchecked")
+    OutputTimeFn<BoundedWindow> outputTimeFn = (OutputTimeFn<BoundedWindow>)
+        TestPipeline.create().apply(Create.of("foo")).getWindowingStrategy().getOutputTimeFn();
+
+    StateNamespace namespace = new StateNamespaceForTest("foo");
+    StateTag<Object, WatermarkStateInternal<BoundedWindow>> stateTag =
+        StateTags.watermarkStateInternal("wmstate", outputTimeFn);
+    WatermarkStateInternal<?> underlyingValue = underlying.state(namespace, stateTag);
+    assertThat(underlyingValue.get().read(), nullValue());
+
+    underlyingValue.add(new Instant(250L));
+    assertThat(underlyingValue.get().read(), equalTo(new Instant(250L)));
+
+    CopyOnAccessInMemoryStateInternals<String> internals =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
+    WatermarkStateInternal<BoundedWindow> copyOnAccessState = internals.state(namespace, stateTag);
+    assertThat(copyOnAccessState.get().read(), equalTo(new Instant(250L)));
+
+    copyOnAccessState.add(new Instant(100L));
+    assertThat(copyOnAccessState.get().read(), equalTo(new Instant(100L)));
+    assertThat(underlyingValue.get().read(), equalTo(new Instant(250L)));
+
+    copyOnAccessState.add(new Instant(500L));
+    assertThat(copyOnAccessState.get().read(), equalTo(new Instant(100L)));
+
+    WatermarkStateInternal<BoundedWindow> reReadUnderlyingValue =
+        underlying.state(namespace, stateTag);
+    assertThat(underlyingValue.get().read(), equalTo(reReadUnderlyingValue.get().read()));
+  }
+
+  @Test
+  public void testCommitWithoutUnderlying() {
+    CopyOnAccessInMemoryStateInternals<String> internals =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
+    StateNamespace namespace = new StateNamespaceForTest("foo");
+    StateTag<Object, BagState<String>> bagTag = StateTags.bag("foo", StringUtf8Coder.of());
+    BagState<String> stringBag = internals.state(namespace, bagTag);
+    assertThat(stringBag.get().read(), emptyIterable());
+
+    stringBag.add("bar");
+    stringBag.add("baz");
+    assertThat(stringBag.get().read(), containsInAnyOrder("baz", "bar"));
+
+    internals.commit();
+
+    BagState<String> reReadStringBag = internals.state(namespace, bagTag);
+    assertThat(reReadStringBag.get().read(), containsInAnyOrder("baz", "bar"));
+  }
+
+  @Test
+  public void testCommitWithUnderlying() {
+    CopyOnAccessInMemoryStateInternals<String> underlying =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
+    CopyOnAccessInMemoryStateInternals<String> internals =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
+
+    StateNamespace namespace = new StateNamespaceForTest("foo");
+    StateTag<Object, BagState<String>> bagTag = StateTags.bag("foo", StringUtf8Coder.of());
+    BagState<String> stringBag = underlying.state(namespace, bagTag);
+    assertThat(stringBag.get().read(), emptyIterable());
+
+    stringBag.add("bar");
+    stringBag.add("baz");
+
+    internals.commit();
+    BagState<String> reReadStringBag = internals.state(namespace, bagTag);
+    assertThat(reReadStringBag.get().read(), containsInAnyOrder("baz", "bar"));
+
+    reReadStringBag.add("spam");
+
+    BagState<String> underlyingState = underlying.state(namespace, bagTag);
+    assertThat(underlyingState.get().read(), containsInAnyOrder("spam", "bar", "baz"));
+    assertThat(underlyingState, is(theInstance(stringBag)));
+  }
+
+  @Test
+  public void testCommitWithOverwrittenUnderlying() {
+    CopyOnAccessInMemoryStateInternals<String> underlying =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
+    CopyOnAccessInMemoryStateInternals<String> internals =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
+
+    StateNamespace namespace = new StateNamespaceForTest("foo");
+    StateTag<Object, BagState<String>> bagTag = StateTags.bag("foo", StringUtf8Coder.of());
+    BagState<String> stringBag = underlying.state(namespace, bagTag);
+    assertThat(stringBag.get().read(), emptyIterable());
+
+    stringBag.add("bar");
+    stringBag.add("baz");
+
+    BagState<String> internalsState = internals.state(namespace, bagTag);
+    internalsState.add("eggs");
+    internalsState.add("ham");
+    internalsState.add("0x00ff00");
+    internalsState.add("&");
+
+    internals.commit();
+
+    BagState<String> reReadInternalState = internals.state(namespace, bagTag);
+    assertThat(
+        reReadInternalState.get().read(),
+        containsInAnyOrder("bar", "baz", "0x00ff00", "eggs", "&", "ham"));
+    BagState<String> reReadUnderlyingState = underlying.state(namespace, bagTag);
+    assertThat(reReadUnderlyingState.get().read(), containsInAnyOrder("bar", "baz"));
+  }
+
+  @Test
+  public void testCommitWithAddedUnderlying() {
+    CopyOnAccessInMemoryStateInternals<String> underlying =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
+    CopyOnAccessInMemoryStateInternals<String> internals =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
+
+    internals.commit();
+
+    StateNamespace namespace = new StateNamespaceForTest("foo");
+    StateTag<Object, BagState<String>> bagTag = StateTags.bag("foo", StringUtf8Coder.of());
+    BagState<String> stringBag = underlying.state(namespace, bagTag);
+    assertThat(stringBag.get().read(), emptyIterable());
+
+    stringBag.add("bar");
+    stringBag.add("baz");
+
+    BagState<String> internalState = internals.state(namespace, bagTag);
+    assertThat(internalState.get().read(), emptyIterable());
+
+    BagState<String> reReadUnderlyingState = underlying.state(namespace, bagTag);
+    assertThat(reReadUnderlyingState.get().read(), containsInAnyOrder("bar", "baz"));
+  }
+}

From 41c04e348fff513ea7c716dae2107b98a84cd4cd Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 17 Feb 2016 14:59:29 -0800
Subject: [PATCH 1468/1541] Handle null input bundles in
 InMemoryWatermarkManager

Remove the updateOutputWatermark specializiation, and handle null
input bundles in updateWatermarks.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114903177
---
 .../util/InMemoryWatermarkManager.java        |  70 ++---
 .../util/InMemoryWatermarkManagerTest.java    | 293 ++++++------------
 2 files changed, 110 insertions(+), 253 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
index b3a1d92695d7d..ea6e00a79acf9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
@@ -761,6 +761,8 @@ private List<Watermark> getInputWatermarks(AppliedPTransform<?, ?, ?> transform)
     return inputCollectionWatermarks;
   }
 
+  ////////////////////////////////////////////////////////////////////////////////////////////////
+
   /**
    * Gets the input and output watermarks for an {@link AppliedPTransform}. If the
    * {@link AppliedPTransform PTransform} has not processed any elements, return a watermark of
@@ -772,34 +774,6 @@ public TransformWatermarks getWatermarks(AppliedPTransform<?, ?, ?> transform) {
     return transformToWatermarks.get(transform);
   }
 
-  /**
-   * Updates the output watermark of a transform that takes no input.
-   *
-   * <p>The output watermark of a transform that takes no input is determined by that transform, as
-   * there are no input {@link PCollection PCollections}.
-   *
-   * @param eventTimeWatermark the output watermark of the transform. If the transform has buffered
-   *                           input elements, the watermark should be the minimum of all buffered
-   *                           elements.
-   */
-  public void updateOutputWatermark(
-      AppliedPTransform<?, ?, ?> transform,
-      TimerUpdate timerUpdate,
-      Iterable<? extends CommittedBundle<?>> outputs,
-      Instant eventTimeWatermark) {
-    TransformWatermarks watermarks = getWatermarks(transform);
-    watermarks.updateTimers(timerUpdate);
-    watermarks.setEventTimeHold(null, eventTimeWatermark);
-
-    for (CommittedBundle<?> output : outputs) {
-      PCollection<?> pCollection = output.getPCollection();
-      for (AppliedPTransform<?, ?, ?> consumer : consumers.get(pCollection)) {
-        addPending(consumer, output);
-      }
-    }
-    refreshWatermarks(transform);
-  }
-
   /**
    * Updates the watermarks of a transform with one or more inputs.
    *
@@ -820,19 +794,18 @@ public void updateOutputWatermark(
    *                     is no hold
    */
   public void updateWatermarks(
-      CommittedBundle<?> completed,
+      @Nullable CommittedBundle<?> completed,
       AppliedPTransform<?, ?, ?> transform,
       TimerUpdate timerUpdate,
       Iterable<? extends CommittedBundle<?>> outputs,
       @Nullable Instant earliestHold) {
     updatePending(completed, transform, timerUpdate, outputs);
     TransformWatermarks transformWms = transformToWatermarks.get(transform);
-    transformWms.setEventTimeHold(completed.getKey(), earliestHold);
+    transformWms.setEventTimeHold(completed == null ? null : completed.getKey(), earliestHold);
     refreshWatermarks(transform);
   }
 
-  private void refreshWatermarks(
-      AppliedPTransform<?, ?, ?> transform) {
+  private void refreshWatermarks(AppliedPTransform<?, ?, ?> transform) {
     TransformWatermarks myWatermarks = transformToWatermarks.get(transform);
     WatermarkUpdate updateResult = myWatermarks.refresh();
     if (updateResult.isAdvanced()) {
@@ -860,33 +833,21 @@ private void updatePending(
       Iterable<? extends CommittedBundle<?>> outputs) {
     TransformWatermarks completedTransform = transformToWatermarks.get(transform);
     completedTransform.updateTimers(timerUpdate);
-    completedTransform.removePending(input);
+    if (input != null) {
+      completedTransform.removePending(input);
+    }
 
     for (CommittedBundle<?> bundle : outputs) {
       for (AppliedPTransform<?, ?, ?> consumer : consumers.get(bundle.getPCollection())) {
-        addPending(consumer, bundle);
+        TransformWatermarks watermarks = transformToWatermarks.get(consumer);
+        watermarks.addPending(bundle);
       }
     }
   }
 
-  /**
-   * Adds all of the provided {@link WindowedValue WindowedValues} to the collection of pending
-   * elements for the provided {@link AppliedPTransform}.
-   */
-  private void addPending(
-      AppliedPTransform<?, ?, ?> transform, CommittedBundle<?> pending) {
-    TransformWatermarks watermarks = transformToWatermarks.get(transform);
-    watermarks.addPending(pending);
-  }
-
   /**
    * Returns a map of each {@link PTransform} that has pending timers to those timers. All of the
    * pending timers will be removed from this {@link InMemoryWatermarkManager}.
-   *
-   * This method exists primarily to extract processing time timers, as watermark timers will be
-   * returned whenever a watermark is updated through
-   * {@link #updateOutputWatermark(AppliedPTransform, TimerUpdate, Iterable, Instant)} and
-   * {@link #updateWatermarks(CommittedBundle, AppliedPTransform, TimerUpdate, Iterable, Instant)}.
    */
   public Map<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> extractFiredTimers() {
     Map<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> allTimers = new HashMap<>();
@@ -1324,4 +1285,15 @@ public int compare(WindowedValue<?> o1, WindowedValue<?> o2) {
       return o1.getTimestamp().compareTo(o2.getTimestamp());
     }
   }
+
+  public Set<AppliedPTransform<?, ?, ?>> getCompletedTransforms() {
+    Set<AppliedPTransform<?, ?, ?>> result = new HashSet<>();
+    for (Map.Entry<AppliedPTransform<?, ?, ?>, TransformWatermarks> wms :
+        transformToWatermarks.entrySet()) {
+      if (wms.getValue().getOutputWatermark().equals(THE_END_OF_TIME.get())) {
+        result.add(wms.getKey());
+      }
+    }
+    return result;
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java
index 54bc8bfa54f85..d4979f215678a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java
@@ -94,7 +94,8 @@ public void setup() {
       @Override
       public void processElement(DoFn<Integer, Integer>.ProcessContext c) throws Exception {
         c.output(c.element() * 2);
-      }}));
+      }
+    }));
 
     keyed = createdInts.apply("keyed", WithKeys.<String, Integer>of("MyKey"));
 
@@ -110,8 +111,7 @@ public void processElement(DoFn<Integer, Integer>.ProcessContext c) throws Excep
     Map<PValue, Collection<AppliedPTransform<?, ?, ?>>> consumers = new HashMap<>();
     consumers.put(
         createdInts,
-        ImmutableList.<AppliedPTransform<?, ?, ?>>of(
-            filtered.getProducingTransformInternal(),
+        ImmutableList.<AppliedPTransform<?, ?, ?>>of(filtered.getProducingTransformInternal(),
             keyed.getProducingTransformInternal(), flattened.getProducingTransformInternal()));
     consumers.put(
         filtered,
@@ -151,11 +151,8 @@ public void getWatermarkForUntouchedTransform() {
   @Test
   public void getWatermarkForUpdatedSourceTransform() {
     CommittedBundle<Integer> output = globallyWindowedBundle(createdInts, 1);
-    manager.updateOutputWatermark(
-        createdInts.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(output),
-        new Instant(8000L));
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(output), new Instant(8000L));
     TransformWatermarks updatedSourceWatermark =
         manager.getWatermarks(createdInts.getProducingTransformInternal());
 
@@ -170,10 +167,8 @@ public void getWatermarkForUpdatedSourceTransform() {
   public void getWatermarkForMultiInputTransform() {
     CommittedBundle<Integer> secondPcollectionBundle = globallyWindowedBundle(intsToFlatten, -1);
 
-    manager.updateOutputWatermark(
-        intsToFlatten.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(secondPcollectionBundle),
+    manager.updateWatermarks(null, intsToFlatten.getProducingTransformInternal(),
+        TimerUpdate.empty(), Collections.<CommittedBundle<?>>singleton(secondPcollectionBundle),
         BoundedWindow.TIMESTAMP_MAX_VALUE);
 
     // We didn't do anything for the first source, so we shouldn't have progressed the watermark
@@ -202,11 +197,8 @@ public void getWatermarkForMultiInputTransform() {
     CommittedBundle<Integer> flattenedBundleSecondCreate = globallyWindowedBundle(flattened, -1);
     // We have finished processing the bundle from the second PCollection, but we haven't consumed
     // anything from the first PCollection yet; so our watermark shouldn't advance
-    manager.updateWatermarks(
-        secondPcollectionBundle,
-        flattened.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(flattenedBundleSecondCreate),
+    manager.updateWatermarks(secondPcollectionBundle, flattened.getProducingTransformInternal(),
+        TimerUpdate.empty(), Collections.<CommittedBundle<?>>singleton(flattenedBundleSecondCreate),
         null);
     TransformWatermarks transformAfterProcessing =
         manager.getWatermarks(flattened.getProducingTransformInternal());
@@ -225,9 +217,7 @@ public void getWatermarkForMultiInputTransform() {
         timestampedBundle(createdInts, TimestampedValue.<Integer>of(5, firstCollectionTimestamp));
     // the source is done, but elements are still buffered. The source output watermark should be
     // past the end of the global window
-    manager.updateOutputWatermark(
-        createdInts.getProducingTransformInternal(),
-        TimerUpdate.empty(),
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
         Collections.<CommittedBundle<?>>singleton(firstPcollectionBundle),
         new Instant(Long.MAX_VALUE));
     TransformWatermarks firstSourceWatermarks =
@@ -255,11 +245,8 @@ public void getWatermarkForMultiInputTransform() {
 
     CommittedBundle<?> completedFlattenBundle =
         InProcessBundle.unkeyed(flattened).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
-    manager.updateWatermarks(
-        firstPcollectionBundle,
-        flattened.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(completedFlattenBundle),
+    manager.updateWatermarks(firstPcollectionBundle, flattened.getProducingTransformInternal(),
+        TimerUpdate.empty(), Collections.<CommittedBundle<?>>singleton(completedFlattenBundle),
         null);
     TransformWatermarks afterConsumingAllInput =
         manager.getWatermarks(flattened.getProducingTransformInternal());
@@ -277,17 +264,11 @@ public void getWatermarkForMultiInputTransform() {
    */
   @Test
   public void getWatermarkForMultiConsumedCollection() {
-CommittedBundle<Integer> createdBundle =
-        timestampedBundle(
-            createdInts,
-            TimestampedValue.of(1, new Instant(1_000_000L)),
-            TimestampedValue.of(2, new Instant(1234L)),
-            TimestampedValue.of(3, new Instant(-1000L)));
-    manager.updateOutputWatermark(
-        createdInts.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(createdBundle),
-        new Instant(Long.MAX_VALUE));
+    CommittedBundle<Integer> createdBundle = timestampedBundle(createdInts,
+        TimestampedValue.of(1, new Instant(1_000_000L)), TimestampedValue.of(2, new Instant(1234L)),
+        TimestampedValue.of(3, new Instant(-1000L)));
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(createdBundle), new Instant(Long.MAX_VALUE));
     TransformWatermarks createdAfterProducing =
         manager.getWatermarks(createdInts.getProducingTransformInternal());
     assertThat(
@@ -298,12 +279,8 @@ public void getWatermarkForMultiConsumedCollection() {
         timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)),
             TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)),
             TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
-    manager.updateWatermarks(
-        createdBundle,
-        keyed.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(keyBundle),
-        null);
+    manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
+        TimerUpdate.empty(), Collections.<CommittedBundle<?>>singleton(keyBundle), null);
     TransformWatermarks keyedWatermarks =
         manager.getWatermarks(keyed.getProducingTransformInternal());
     assertThat(
@@ -318,12 +295,8 @@ public void getWatermarkForMultiConsumedCollection() {
 
     CommittedBundle<Integer> filteredBundle =
         timestampedBundle(filtered, TimestampedValue.of(2, new Instant(1234L)));
-    manager.updateWatermarks(
-        createdBundle,
-        filtered.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(filteredBundle),
-        null);
+    manager.updateWatermarks(createdBundle, filtered.getProducingTransformInternal(),
+        TimerUpdate.empty(), Collections.<CommittedBundle<?>>singleton(filteredBundle), null);
     TransformWatermarks filteredProcessedWatermarks =
         manager.getWatermarks(filtered.getProducingTransformInternal());
     assertThat(
@@ -340,29 +313,18 @@ public void getWatermarkForMultiConsumedCollection() {
    */
   @Test
   public void updateWatermarkWithWatermarkHolds() {
-    CommittedBundle<Integer> createdBundle =
-        timestampedBundle(
-            createdInts,
-            TimestampedValue.of(1, new Instant(1_000_000L)),
-            TimestampedValue.of(2, new Instant(1234L)),
-            TimestampedValue.of(3, new Instant(-1000L)));
-    manager.updateOutputWatermark(
-        createdInts.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(createdBundle),
-        new Instant(Long.MAX_VALUE));
+    CommittedBundle<Integer> createdBundle = timestampedBundle(createdInts,
+        TimestampedValue.of(1, new Instant(1_000_000L)), TimestampedValue.of(2, new Instant(1234L)),
+        TimestampedValue.of(3, new Instant(-1000L)));
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(createdBundle), new Instant(Long.MAX_VALUE));
 
     CommittedBundle<KV<String, Integer>> keyBundle =
-        timestampedBundle(
-            keyed,
-            TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)),
+        timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)),
             TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)),
             TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
-    manager.updateWatermarks(
-        createdBundle,
-        keyed.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(keyBundle),
+    manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
+        TimerUpdate.empty(), Collections.<CommittedBundle<?>>singleton(keyBundle),
         new Instant(500L));
     TransformWatermarks keyedWatermarks =
         manager.getWatermarks(keyed.getProducingTransformInternal());
@@ -388,7 +350,7 @@ public void updateWatermarkWithKeyedWatermarkHolds() {
             .add(WindowedValue.timestampedValueInGlobalWindow(2, new Instant(1234L)))
             .commit(clock.now());
 
-    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
         ImmutableList.of(firstKeyBundle, secondKeyBundle), BoundedWindow.TIMESTAMP_MAX_VALUE);
 
     manager.updateWatermarks(firstKeyBundle, filtered.getProducingTransformInternal(),
@@ -433,22 +395,16 @@ public void updateWatermarkWithKeyedWatermarkHolds() {
   public void updateOutputWatermarkShouldBeMonotonic() {
     CommittedBundle<?> firstInput =
         InProcessBundle.unkeyed(createdInts).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
-    manager.updateOutputWatermark(
-        createdInts.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(firstInput),
-        new Instant(0L));
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(firstInput), new Instant(0L));
     TransformWatermarks firstWatermarks =
         manager.getWatermarks(createdInts.getProducingTransformInternal());
     assertThat(firstWatermarks.getOutputWatermark(), equalTo(new Instant(0L)));
 
     CommittedBundle<?> secondInput =
         InProcessBundle.unkeyed(createdInts).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
-    manager.updateOutputWatermark(
-        createdInts.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(secondInput),
-        new Instant(-250L));
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(secondInput), new Instant(-250L));
     TransformWatermarks secondWatermarks =
         manager.getWatermarks(createdInts.getProducingTransformInternal());
     assertThat(secondWatermarks.getOutputWatermark(), not(earlierThan(new Instant(0L))));
@@ -460,29 +416,18 @@ public void updateOutputWatermarkShouldBeMonotonic() {
    */
   @Test
   public void updateWatermarkWithHoldsShouldBeMonotonic() {
-    CommittedBundle<Integer> createdBundle =
-        timestampedBundle(
-            createdInts,
-            TimestampedValue.of(1, new Instant(1_000_000L)),
-            TimestampedValue.of(2, new Instant(1234L)),
-            TimestampedValue.of(3, new Instant(-1000L)));
-    manager.updateOutputWatermark(
-        createdInts.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(createdBundle),
-        new Instant(Long.MAX_VALUE));
+    CommittedBundle<Integer> createdBundle = timestampedBundle(createdInts,
+        TimestampedValue.of(1, new Instant(1_000_000L)), TimestampedValue.of(2, new Instant(1234L)),
+        TimestampedValue.of(3, new Instant(-1000L)));
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(createdBundle), new Instant(Long.MAX_VALUE));
 
     CommittedBundle<KV<String, Integer>> keyBundle =
-        timestampedBundle(
-            keyed,
-            TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)),
+        timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)),
             TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)),
             TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
-    manager.updateWatermarks(
-        createdBundle,
-        keyed.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(keyBundle),
+    manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
+        TimerUpdate.empty(), Collections.<CommittedBundle<?>>singleton(keyBundle),
         new Instant(500L));
     TransformWatermarks keyedWatermarks =
         manager.getWatermarks(keyed.getProducingTransformInternal());
@@ -509,23 +454,16 @@ public void updateWatermarkWithLateData() {
     CommittedBundle<Integer> createdBundle = timestampedBundle(createdInts,
         TimestampedValue.of(1, sourceWatermark), TimestampedValue.of(2, new Instant(1234L)));
 
-    manager.updateOutputWatermark(
-        createdInts.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(createdBundle),
-        sourceWatermark);
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(createdBundle), sourceWatermark);
 
     CommittedBundle<KV<String, Integer>> keyBundle =
         timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), sourceWatermark),
             TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)));
 
     // Finish processing the on-time data. The watermarks should progress to be equal to the source
-    manager.updateWatermarks(
-        createdBundle,
-        keyed.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(keyBundle),
-        null);
+    manager.updateWatermarks(createdBundle, keyed.getProducingTransformInternal(),
+        TimerUpdate.empty(), Collections.<CommittedBundle<?>>singleton(keyBundle), null);
     TransformWatermarks onTimeWatermarks =
         manager.getWatermarks(keyed.getProducingTransformInternal());
     assertThat(onTimeWatermarks.getInputWatermark(), equalTo(sourceWatermark));
@@ -535,11 +473,8 @@ public void updateWatermarkWithLateData() {
         timestampedBundle(createdInts, TimestampedValue.of(3, new Instant(-1000L)));
     // the late data arrives in a downstream PCollection after its watermark has advanced past it;
     // we don't advance the watermark past the current watermark until we've consumed the late data
-    manager.updateOutputWatermark(
-        createdInts.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(lateDataBundle),
-        new Instant(2_000_000L));
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(lateDataBundle), new Instant(2_000_000L));
     TransformWatermarks bufferedLateWm =
         manager.getWatermarks(createdInts.getProducingTransformInternal());
     assertThat(bufferedLateWm.getOutputWatermark(), equalTo(new Instant(2_000_000L)));
@@ -553,12 +488,8 @@ public void updateWatermarkWithLateData() {
 
     CommittedBundle<KV<String, Integer>> lateKeyedBundle =
         timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
-    manager.updateWatermarks(
-        lateDataBundle,
-        keyed.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(lateKeyedBundle),
-        null);
+    manager.updateWatermarks(lateDataBundle, keyed.getProducingTransformInternal(),
+        TimerUpdate.empty(), Collections.<CommittedBundle<?>>singleton(lateKeyedBundle), null);
   }
 
   /**
@@ -568,9 +499,7 @@ public void updateWatermarkWithLateData() {
   @Test
   public void getWatermarksAfterOnlyEmptyOutput() {
     CommittedBundle<Integer> emptyCreateOutput = globallyWindowedBundle(createdInts);
-    manager.updateOutputWatermark(
-        createdInts.getProducingTransformInternal(),
-        TimerUpdate.empty(),
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
         Collections.<CommittedBundle<?>>singleton(emptyCreateOutput),
         BoundedWindow.TIMESTAMP_MAX_VALUE);
     TransformWatermarks updatedSourceWatermarks =
@@ -597,18 +526,12 @@ public void getWatermarksAfterOnlyEmptyOutput() {
   @Test
   public void getWatermarksAfterHoldAndEmptyOutput() {
     CommittedBundle<Integer> firstCreateOutput = globallyWindowedBundle(createdInts, 1, 2);
-    manager.updateOutputWatermark(
-        createdInts.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(firstCreateOutput),
-        new Instant(12_000L));
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(firstCreateOutput), new Instant(12_000L));
 
     CommittedBundle<Integer> firstFilterOutput = globallyWindowedBundle(filtered);
-    manager.updateWatermarks(
-        firstCreateOutput,
-        filtered.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(firstFilterOutput),
+    manager.updateWatermarks(firstCreateOutput, filtered.getProducingTransformInternal(),
+        TimerUpdate.empty(), Collections.<CommittedBundle<?>>singleton(firstFilterOutput),
         new Instant(10_000L));
     TransformWatermarks firstFilterWatermarks =
         manager.getWatermarks(filtered.getProducingTransformInternal());
@@ -616,9 +539,7 @@ public void getWatermarksAfterHoldAndEmptyOutput() {
     assertThat(firstFilterWatermarks.getOutputWatermark(), not(laterThan(new Instant(10_000L))));
 
     CommittedBundle<Integer> emptyCreateOutput = globallyWindowedBundle(createdInts);
-    manager.updateOutputWatermark(
-        createdInts.getProducingTransformInternal(),
-        TimerUpdate.empty(),
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
         Collections.<CommittedBundle<?>>singleton(emptyCreateOutput),
         BoundedWindow.TIMESTAMP_MAX_VALUE);
     TransformWatermarks updatedSourceWatermarks =
@@ -658,11 +579,8 @@ public void getSynchronizedProcessingTimeInputWatermarksHeldToPendingBundles() {
     CommittedBundle<Integer> createOutput =
         InProcessBundle.unkeyed(createdInts).commit(new Instant(1250L));
 
-    manager.updateOutputWatermark(
-        createdInts.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(createOutput),
-        BoundedWindow.TIMESTAMP_MAX_VALUE);
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(createOutput), BoundedWindow.TIMESTAMP_MAX_VALUE);
     TransformWatermarks createAfterUpdate =
         manager.getWatermarks(createdInts.getProducingTransformInternal());
     assertThat(createAfterUpdate.getSynchronizedProcessingInputTime(), equalTo(clock.now()));
@@ -687,11 +605,8 @@ public void getSynchronizedProcessingTimeInputWatermarksHeldToPendingBundles() {
 
     CommittedBundle<?> filterOutputBundle =
         InProcessBundle.unkeyed(intsToFlatten).commit(new Instant(1250L));
-    manager.updateWatermarks(
-        createOutput,
-        filtered.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(filterOutputBundle),
+    manager.updateWatermarks(createOutput, filtered.getProducingTransformInternal(),
+        TimerUpdate.empty(), Collections.<CommittedBundle<?>>singleton(filterOutputBundle),
         BoundedWindow.TIMESTAMP_MAX_VALUE);
     TransformWatermarks filterAfterConsumed =
         manager.getWatermarks(filtered.getProducingTransformInternal());
@@ -709,10 +624,10 @@ public void getSynchronizedProcessingTimeInputWatermarksHeldToPendingBundles() {
    *
    * <p>Also demonstrate that the result is monotonic.
    */
-//  @Test
+  //  @Test
   public void getSynchronizedProcessingTimeOutputHeldToPendingTimers() {
     CommittedBundle<Integer> createdBundle = globallyWindowedBundle(createdInts, 1, 2, 4, 8);
-    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
         Collections.<CommittedBundle<?>>singleton(createdBundle), new Instant(1248L));
 
     TransformWatermarks filteredWms =
@@ -735,11 +650,8 @@ public void getSynchronizedProcessingTimeOutputHeldToPendingTimers() {
     Instant startTime = clock.now();
     clock.set(startTime.plus(250L));
     // We're held based on the past timer
-    assertThat(
-        filteredWms.getSynchronizedProcessingOutputTime(), not(laterThan(startTime)));
-    assertThat(
-        filteredDoubledWms.getSynchronizedProcessingOutputTime(),
-        not(laterThan(startTime)));
+    assertThat(filteredWms.getSynchronizedProcessingOutputTime(), not(laterThan(startTime)));
+    assertThat(filteredDoubledWms.getSynchronizedProcessingOutputTime(), not(laterThan(startTime)));
     // And we're monotonic
     assertThat(
         filteredWms.getSynchronizedProcessingOutputTime(), not(earlierThan(initialFilteredWm)));
@@ -755,11 +667,8 @@ public void getSynchronizedProcessingTimeOutputHeldToPendingTimers() {
             .getTimers(TimeDomain.PROCESSING_TIME),
         contains(pastTimer));
     // Our timer has fired, but has not been completed, so it holds our synchronized processing WM
-    assertThat(
-        filteredWms.getSynchronizedProcessingOutputTime(), not(laterThan(startTime)));
-    assertThat(
-        filteredDoubledWms.getSynchronizedProcessingOutputTime(),
-        not(laterThan(startTime)));
+    assertThat(filteredWms.getSynchronizedProcessingOutputTime(), not(laterThan(startTime)));
+    assertThat(filteredDoubledWms.getSynchronizedProcessingOutputTime(), not(laterThan(startTime)));
 
     CommittedBundle<Integer> filteredTimerBundle =
         InProcessBundle.keyed(filtered, "key").commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
@@ -816,23 +725,17 @@ public void getSynchronizedProcessingTimeOutputTimeIsMonotonic() {
     CommittedBundle<Integer> createOutput =
         InProcessBundle.unkeyed(createdInts).commit(new Instant(1250L));
 
-    manager.updateOutputWatermark(
-        createdInts.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>singleton(createOutput),
-        BoundedWindow.TIMESTAMP_MAX_VALUE);
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>singleton(createOutput), BoundedWindow.TIMESTAMP_MAX_VALUE);
     TransformWatermarks createAfterUpdate =
         manager.getWatermarks(createdInts.getProducingTransformInternal());
-    assertThat(
-        createAfterUpdate.getSynchronizedProcessingInputTime(), not(laterThan(clock.now())));
+    assertThat(createAfterUpdate.getSynchronizedProcessingInputTime(), not(laterThan(clock.now())));
     assertThat(
         createAfterUpdate.getSynchronizedProcessingOutputTime(), not(laterThan(clock.now())));
 
     CommittedBundle<Integer> createSecondOutput =
         InProcessBundle.unkeyed(createdInts).commit(new Instant(750L));
-    manager.updateOutputWatermark(
-        createdInts.getProducingTransformInternal(),
-        TimerUpdate.empty(),
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
         Collections.<CommittedBundle<?>>singleton(createSecondOutput),
         BoundedWindow.TIMESTAMP_MAX_VALUE);
 
@@ -842,7 +745,7 @@ public void getSynchronizedProcessingTimeOutputTimeIsMonotonic() {
   @Test
   public void synchronizedProcessingInputTimeIsHeldToUpstreamProcessingTimeTimers() {
     CommittedBundle<Integer> created = globallyWindowedBundle(createdInts, 1, 2, 3);
-    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
         Collections.<CommittedBundle<?>>singleton(created), new Instant(40_900L));
 
     CommittedBundle<Integer> filteredBundle = globallyWindowedBundle(filtered, 2, 4);
@@ -850,9 +753,7 @@ public void synchronizedProcessingInputTimeIsHeldToUpstreamProcessingTimeTimers(
     TimerData upstreamProcessingTimer =
         TimerData.of(StateNamespaces.global(), upstreamHold, TimeDomain.PROCESSING_TIME);
     manager.updateWatermarks(created, filtered.getProducingTransformInternal(),
-        TimerUpdate.builder("key")
-            .setTimer(upstreamProcessingTimer)
-            .build(),
+        TimerUpdate.builder("key").setTimer(upstreamProcessingTimer).build(),
         Collections.<CommittedBundle<?>>singleton(filteredBundle),
         BoundedWindow.TIMESTAMP_MAX_VALUE);
 
@@ -881,7 +782,7 @@ public void synchronizedProcessingInputTimeIsHeldToUpstreamProcessingTimeTimers(
   @Test
   public void synchronizedProcessingInputTimeIsHeldToPendingBundleTimes() {
     CommittedBundle<Integer> created = globallyWindowedBundle(createdInts, 1, 2, 3);
-    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
         Collections.<CommittedBundle<?>>singleton(created), new Instant(29_919_235L));
 
     Instant upstreamHold = new Instant(2048L);
@@ -908,11 +809,8 @@ public void extractFiredTimersReturnsFiredEventTimeTimers() {
 
     // Advance WM of keyed past the first timer, but ahead of the second and third
     CommittedBundle<Integer> createdBundle = globallyWindowedBundle(filtered);
-    manager.updateOutputWatermark(
-        createdInts.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.singleton(createdBundle),
-        new Instant(1500L));
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+        Collections.singleton(createdBundle), new Instant(1500L));
 
     TimerData earliestTimer =
         TimerData.of(StateNamespaces.global(), new Instant(1000), TimeDomain.EVENT_TIME);
@@ -928,10 +826,7 @@ public void extractFiredTimersReturnsFiredEventTimeTimers() {
             .setTimer(lastTimer)
             .build();
 
-    manager.updateWatermarks(
-        createdBundle,
-        filtered.getProducingTransformInternal(),
-        update,
+    manager.updateWatermarks(createdBundle, filtered.getProducingTransformInternal(), update,
         Collections.<CommittedBundle<?>>singleton(globallyWindowedBundle(intsToFlatten)),
         new Instant(1000L));
 
@@ -945,11 +840,8 @@ public void extractFiredTimersReturnsFiredEventTimeTimers() {
     FiredTimers firstFired = firstFilteredTimers.get(key);
     assertThat(firstFired.getTimers(TimeDomain.EVENT_TIME), contains(earliestTimer));
 
-    manager.updateOutputWatermark(
-        createdInts.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>emptyList(),
-        new Instant(50_000L));
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>emptyList(), new Instant(50_000L));
     Map<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> secondTransformFiredTimers =
         manager.extractFiredTimers();
     assertThat(
@@ -971,11 +863,8 @@ public void extractFiredTimersReturnsFiredProcessingTimeTimers() {
 
     // Advance WM of keyed past the first timer, but ahead of the second and third
     CommittedBundle<Integer> createdBundle = globallyWindowedBundle(filtered);
-    manager.updateOutputWatermark(
-        createdInts.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.singleton(createdBundle),
-        new Instant(1500L));
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+        Collections.singleton(createdBundle), new Instant(1500L));
 
     TimerData earliestTimer =
         TimerData.of(StateNamespaces.global(), new Instant(999L), TimeDomain.PROCESSING_TIME);
@@ -988,12 +877,10 @@ public void extractFiredTimersReturnsFiredProcessingTimeTimers() {
         TimerUpdate.builder(key)
             .setTimer(lastTimer)
             .setTimer(earliestTimer)
-            .setTimer(middleTimer).build();
+            .setTimer(middleTimer)
+            .build();
 
-    manager.updateWatermarks(
-        createdBundle,
-        filtered.getProducingTransformInternal(),
-        update,
+    manager.updateWatermarks(createdBundle, filtered.getProducingTransformInternal(), update,
         Collections.<CommittedBundle<?>>singleton(globallyWindowedBundle(intsToFlatten)),
         new Instant(1000L));
 
@@ -1008,11 +895,8 @@ public void extractFiredTimersReturnsFiredProcessingTimeTimers() {
     assertThat(firstFired.getTimers(TimeDomain.PROCESSING_TIME), contains(earliestTimer));
 
     clock.set(new Instant(50_000L));
-    manager.updateOutputWatermark(
-        createdInts.getProducingTransformInternal(),
-        TimerUpdate.empty(),
-        Collections.<CommittedBundle<?>>emptyList(),
-        new Instant(50_000L));
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+        Collections.<CommittedBundle<?>>emptyList(), new Instant(50_000L));
     Map<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> secondTransformFiredTimers =
         manager.extractFiredTimers();
     assertThat(
@@ -1034,7 +918,7 @@ public void extractFiredTimersReturnsFiredSynchronizedProcessingTimeTimers() {
 
     // Advance WM of keyed past the first timer, but ahead of the second and third
     CommittedBundle<Integer> createdBundle = globallyWindowedBundle(filtered);
-    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
         Collections.singleton(createdBundle), new Instant(1500L));
 
     TimerData earliestTimer = TimerData.of(
@@ -1048,7 +932,8 @@ public void extractFiredTimersReturnsFiredSynchronizedProcessingTimeTimers() {
         TimerUpdate.builder(key)
             .setTimer(lastTimer)
             .setTimer(earliestTimer)
-            .setTimer(middleTimer).build();
+            .setTimer(middleTimer)
+            .build();
 
     manager.updateWatermarks(createdBundle, filtered.getProducingTransformInternal(), update,
         Collections.<CommittedBundle<?>>singleton(globallyWindowedBundle(intsToFlatten)),
@@ -1066,7 +951,7 @@ public void extractFiredTimersReturnsFiredSynchronizedProcessingTimeTimers() {
         firstFired.getTimers(TimeDomain.SYNCHRONIZED_PROCESSING_TIME), contains(earliestTimer));
 
     clock.set(new Instant(50_000L));
-    manager.updateOutputWatermark(createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
+    manager.updateWatermarks(null, createdInts.getProducingTransformInternal(), TimerUpdate.empty(),
         Collections.<CommittedBundle<?>>emptyList(), new Instant(50_000L));
     Map<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> secondTransformFiredTimers =
         manager.extractFiredTimers();

From b7505c50e1ddaf0013c0dca9a1a77b3512e6611d Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 17 Feb 2016 15:46:26 -0800
Subject: [PATCH 1469/1541] Remove unneccessary type from
 GroupAlsoByWindowViaWindowSet

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114907928
---
 .../dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java
index 46b2e5eeb82fa..ac2df24fecf2e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java
@@ -33,9 +33,8 @@ public class GroupAlsoByWindowViaWindowSetDoFn<
         K, InputT, OutputT, W extends BoundedWindow, RinT extends KeyedWorkItem<K, InputT>>
     extends DoFn<RinT, KV<K, OutputT>> implements ReduceFnExecutor<K, InputT, OutputT, W> {
 
-  public static <K, InputT, OutputT, W extends BoundedWindow,
-          RinputsT extends KeyedWorkItem<K, InputT>>
-      DoFn<RinputsT, KV<K, OutputT>> create(
+  public static <K, InputT, OutputT, W extends BoundedWindow>
+      DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> create(
           WindowingStrategy<?, W> strategy, SystemReduceFn<K, InputT, ?, OutputT, W> reduceFn) {
     return new GroupAlsoByWindowViaWindowSetDoFn<>(strategy, reduceFn);
   }

From 14154791d96ba223f08c42f13563c407285331a7 Mon Sep 17 00:00:00 2001
From: millsd <millsd@google.com>
Date: Wed, 17 Feb 2016 16:11:22 -0800
Subject: [PATCH 1470/1541] Never drop late data in Reshuffle

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114910300
---
 .../java/com/google/cloud/dataflow/sdk/util/Reshuffle.java | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java
index 0ca8fa8a698b4..367db2dc5bec6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java
@@ -63,11 +63,8 @@ public PCollection<KV<K, V>> apply(PCollection<KV<K, V>> input) {
     Window.Bound<KV<K, V>> rewindow = Window
         .<KV<K, V>>into(new PassThroughWindowFn<>(originalStrategy.getWindowFn()))
         .triggering(new ReshuffleTrigger<>())
-        .discardingFiredPanes();
-    if (!originalStrategy.isAllowedLatenessSpecified()) {
-      rewindow = rewindow.withAllowedLateness(
-          Duration.millis(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()));
-    }
+        .discardingFiredPanes()
+        .withAllowedLateness(Duration.millis(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()));
 
     return input.apply(rewindow)
         .apply(GroupByKey.<K, V>create())

From 0ad8ed19613200e659ad9a921c8179564185588b Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Wed, 17 Feb 2016 22:05:13 -0800
Subject: [PATCH 1471/1541] BigtableIO: add a bounded source for Google Cloud
 Bigtable

----Release Notes----
Dataflow now has support for Google Cloud Bigtable using BigtableIO.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114932364
---
 .../dataflow/sdk/io/bigtable/BigtableIO.java  | 562 +++++++++++++++++-
 .../sdk/io/bigtable/BigtableService.java      |  45 ++
 .../sdk/io/bigtable/BigtableServiceImpl.java  |  97 ++-
 .../sdk/io/bigtable/BigtableIOTest.java       | 473 ++++++++++++++-
 4 files changed, 1144 insertions(+), 33 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableIO.java
index 379450a880848..c3f233f249900 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableIO.java
@@ -18,14 +18,24 @@
 
 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
 
 import com.google.bigtable.v1.Mutation;
+import com.google.bigtable.v1.Row;
+import com.google.bigtable.v1.RowFilter;
+import com.google.bigtable.v1.SampleRowKeysResponse;
 import com.google.cloud.bigtable.config.BigtableOptions;
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Proto2Coder;
 import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
+import com.google.cloud.dataflow.sdk.io.BoundedSource;
+import com.google.cloud.dataflow.sdk.io.BoundedSource.BoundedReader;
 import com.google.cloud.dataflow.sdk.io.Sink.WriteOperation;
 import com.google.cloud.dataflow.sdk.io.Sink.Writer;
+import com.google.cloud.dataflow.sdk.io.range.ByteKey;
+import com.google.cloud.dataflow.sdk.io.range.ByteKeyRange;
+import com.google.cloud.dataflow.sdk.io.range.ByteKeyRangeTracker;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -33,7 +43,9 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PDone;
+import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.common.base.MoreObjects;
+import com.google.common.collect.ImmutableList;
 import com.google.common.util.concurrent.FutureCallback;
 import com.google.common.util.concurrent.Futures;
 import com.google.protobuf.ByteString;
@@ -43,16 +55,53 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
 import java.util.concurrent.ConcurrentLinkedQueue;
 
 import javax.annotation.Nullable;
 
 /**
- * A bounded sink for Google Cloud Bigtable.
+ * A bounded source and sink for Google Cloud Bigtable.
  *
  * <p>For more information, see the online documentation at
  * <a href="https://cloud.google.com/bigtable/">Google Cloud Bigtable</a>.
  *
+ * <h3>Reading from Cloud Bigtable</h3>
+ *
+ * <p>The Bigtable source returns a set of rows from a single table, returning a
+ * {@code PCollection&lt;Row&gt;}.
+ *
+ * <p>To configure a Cloud Bigtable source, you must supply a table id and a {@link BigtableOptions}
+ * or builder configured with the project and other information necessary to identify the
+ * Bigtable cluster. A {@link RowFilter} may also optionally be specified using
+ * {@link BigtableIO.Read#withRowFilter}. For example:
+ *
+ * <pre>{@code
+ * BigtableOptions.Builder optionsBuilder =
+ *     new BigtableOptions.Builder()
+ *         .setProjectId("project")
+ *         .setClusterId("cluster")
+ *         .setZoneId("zone");
+ *
+ * Pipeline p = ...;
+ *
+ * // Scan the entire table.
+ * p.apply("read",
+ *     BigtableIO.read()
+ *         .withBigtableOptions(optionsBuilder)
+ *         .withTableId("table"));
+ *
+ * // Scan a subset of rows that match the specified row filter.
+ * p.apply("filtered read",
+ *     BigtableIO.read()
+ *         .withBigtableOptions(optionsBuilder)
+ *         .withTableId("table")
+ *         .withRowFilter(filter));
+ * }</pre>
+ *
  * <h3>Writing to Cloud Bigtable</h3>
  *
  * <p>The Bigtable sink executes a set of row mutations on a single table. It takes as input a
@@ -95,11 +144,24 @@
 public class BigtableIO {
   private static final Logger logger = LoggerFactory.getLogger(BigtableIO.class);
 
+  /**
+   * Creates an uninitialized {@link BigtableIO.Read}. Before use, the {@code Read} must be
+   * initialized with a
+   * {@link BigtableIO.Read#withBigtableOptions(BigtableOptions) BigtableOptions} that specifies
+   * the source Cloud Bigtable cluster, and a {@link BigtableIO.Read#withTableId tableId} that
+   * specifies which table to read. A {@link RowFilter} may also optionally be specified using
+   * {@link BigtableIO.Read#withRowFilter}.
+   */
+  @Experimental
+  public static Read read() {
+    return new Read(null, "", null, null);
+  }
+
   /**
    * Creates an uninitialized {@link BigtableIO.Write}. Before use, the {@code Write} must be
    * initialized with a
    * {@link BigtableIO.Write#withBigtableOptions(BigtableOptions) BigtableOptions} that specifies
-   * the destination Cloud Bigtable cluster, and a {@link BigtableIO.Write#withTableId table} that
+   * the destination Cloud Bigtable cluster, and a {@link BigtableIO.Write#withTableId tableId} that
    * specifies which table to write.
    */
   @Experimental
@@ -107,6 +169,153 @@ public static Write write() {
     return new Write(null, "", null);
   }
 
+  /**
+   * A {@link PTransform} that reads from Google Cloud Bigtable. See the class-level Javadoc on
+   * {@link BigtableIO} for more information.
+   *
+   * @see BigtableIO
+   */
+  @Experimental
+  public static class Read extends PTransform<PInput, PCollection<Row>> {
+    /**
+     * Returns a new {@link BigtableIO.Read} that will read from the Cloud Bigtable cluster
+     * indicated by the given options, and using any other specified customizations.
+     *
+     * <p>Does not modify this object.
+     */
+    public Read withBigtableOptions(BigtableOptions options) {
+      checkNotNull(options, "options");
+      return withBigtableOptions(options.toBuilder());
+    }
+
+    /**
+     * Returns a new {@link BigtableIO.Read} that will read from the Cloud Bigtable cluster
+     * indicated by the given options, and using any other specified customizations.
+     *
+     * <p>Clones the given {@link BigtableOptions} builder so that any further changes
+     * will have no effect on the returned {@link BigtableIO.Read}.
+     *
+     * <p>Does not modify this object.
+     */
+    public Read withBigtableOptions(BigtableOptions.Builder optionsBuilder) {
+      checkNotNull(optionsBuilder, "optionsBuilder");
+      // TODO: is there a better way to clone a Builder? Want it to be immune from user changes.
+      BigtableOptions.Builder clonedBuilder = optionsBuilder.build().toBuilder();
+      BigtableOptions optionsWithAgent = clonedBuilder.setUserAgent(getUserAgent()).build();
+      return new Read(optionsWithAgent, tableId, filter, bigtableService);
+    }
+
+    /**
+     * Returns a new {@link BigtableIO.Read} that will filter the rows read from Cloud Bigtable
+     * using the given row filter.
+     *
+     * <p>Does not modify this object.
+     */
+    Read withRowFilter(RowFilter filter) {
+      checkNotNull(filter, "filter");
+      return new Read(options, tableId, filter, bigtableService);
+    }
+
+    /**
+     * Returns a new {@link BigtableIO.Read} that will read from the specified table.
+     *
+     * <p>Does not modify this object.
+     */
+    public Read withTableId(String tableId) {
+      checkNotNull(tableId, "tableId");
+      return new Read(options, tableId, filter, bigtableService);
+    }
+
+    /**
+     * Returns the Google Cloud Bigtable cluster being read from, and other parameters.
+     */
+    public BigtableOptions getBigtableOptions() {
+      return options;
+    }
+
+    /**
+     * Returns the table being read from.
+     */
+    public String getTableId() {
+      return tableId;
+    }
+
+    @Override
+    public PCollection<Row> apply(PInput input) {
+      BigtableSource source =
+          new BigtableSource(getBigtableService(), tableId, filter, ByteKeyRange.ALL_KEYS, null);
+      return input.getPipeline().apply(com.google.cloud.dataflow.sdk.io.Read.from(source));
+    }
+
+    @Override
+    public void validate(PInput input) {
+      checkArgument(options != null, "BigtableOptions not specified");
+      checkArgument(!tableId.isEmpty(), "Table ID not specified");
+      try {
+        checkArgument(
+            getBigtableService().tableExists(tableId), "Table %s does not exist", tableId);
+      } catch (IOException e) {
+        logger.warn("Error checking whether table {} exists; proceeding.", tableId, e);
+      }
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(Read.class)
+          .add("options", options)
+          .add("tableId", tableId)
+          .add("filter", filter)
+          .toString();
+    }
+
+    /////////////////////////////////////////////////////////////////////////////////////////
+    /**
+     * Used to define the Cloud Bigtable cluster and any options for the networking layer.
+     * Cannot actually be {@code null} at validation time, but may start out {@code null} while
+     * source is being built.
+     */
+    @Nullable private final BigtableOptions options;
+    private final String tableId;
+    @Nullable private final RowFilter filter;
+    @Nullable private final BigtableService bigtableService;
+
+    private Read(
+        @Nullable BigtableOptions options,
+        String tableId,
+        @Nullable RowFilter filter,
+        @Nullable BigtableService bigtableService) {
+      this.options = options;
+      this.tableId = checkNotNull(tableId, "tableId");
+      this.filter = filter;
+      this.bigtableService = bigtableService;
+    }
+
+    /**
+     * Returns a new {@link BigtableIO.Read} that will read using the given Cloud Bigtable
+     * service implementation.
+     *
+     * <p>This is used for testing.
+     *
+     * <p>Does not modify this object.
+     */
+    Read withBigtableService(BigtableService bigtableService) {
+      checkNotNull(bigtableService, "bigtableService");
+      return new Read(options, tableId, filter, bigtableService);
+    }
+
+    /**
+     * Helper function that either returns the mock Bigtable service supplied by
+     * {@link #withBigtableService} or creates and returns an implementation that talks to
+     * {@code Cloud Bigtable}.
+     */
+    private BigtableService getBigtableService() {
+      if (bigtableService != null) {
+        return bigtableService;
+      }
+      return new BigtableServiceImpl(options);
+    }
+  }
+
   /**
    * A {@link PTransform} that writes to Google Cloud Bigtable. See the class-level Javadoc on
    * {@link BigtableIO} for more information.
@@ -149,7 +358,7 @@ public Write withBigtableOptions(BigtableOptions options) {
      * Returns a new {@link BigtableIO.Write} that will write to the Cloud Bigtable cluster
      * indicated by the given options, and using any other specified customizations.
      *
-     * <p>Clones the given {@link BigtableOptions.Builder} is cloned so that any further changes
+     * <p>Clones the given {@link BigtableOptions} builder so that any further changes
      * will have no effect on the returned {@link BigtableIO.Write}.
      *
      * <p>Does not modify this object.
@@ -238,6 +447,353 @@ private BigtableService getBigtableService() {
     }
   }
 
+  //////////////////////////////////////////////////////////////////////////////////////////
+  /** Disallow construction of utility class. */
+  private BigtableIO() {}
+
+  static class BigtableSource extends BoundedSource<Row> {
+    public BigtableSource(
+        BigtableService service,
+        String tableId,
+        @Nullable RowFilter filter,
+        ByteKeyRange range,
+        Long estimatedSizeBytes) {
+      this.service = service;
+      this.tableId = tableId;
+      this.filter = filter;
+      this.range = range;
+      this.estimatedSizeBytes = estimatedSizeBytes;
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(BigtableSource.class)
+          .add("tableId", tableId)
+          .add("filter", filter)
+          .add("range", range)
+          .add("estimatedSizeBytes", estimatedSizeBytes)
+          .toString();
+    }
+
+    ////// Private state and internal implementation details //////
+    private final BigtableService service;
+    @Nullable private final String tableId;
+    @Nullable private final RowFilter filter;
+    private final ByteKeyRange range;
+    @Nullable private Long estimatedSizeBytes;
+    @Nullable private transient List<SampleRowKeysResponse> sampleRowKeys;
+
+    protected BigtableSource withStartKey(ByteKey startKey) {
+      checkNotNull(startKey, "startKey");
+      return new BigtableSource(
+          service, tableId, filter, range.withStartKey(startKey), estimatedSizeBytes);
+    }
+
+    protected BigtableSource withEndKey(ByteKey endKey) {
+      checkNotNull(endKey, "endKey");
+      return new BigtableSource(
+          service, tableId, filter, range.withEndKey(endKey), estimatedSizeBytes);
+    }
+
+    protected BigtableSource withEstimatedSizeBytes(Long estimatedSizeBytes) {
+      checkNotNull(estimatedSizeBytes, "estimatedSizeBytes");
+      return new BigtableSource(service, tableId, filter, range, estimatedSizeBytes);
+    }
+
+    /**
+     * Makes an API call to the Cloud Bigtable service that gives information about tablet key
+     * boundaries and estimated sizes. We can use these samples to ensure that splits are on
+     * different tablets, and possibly generate sub-splits within tablets.
+     */
+    private List<SampleRowKeysResponse> getSampleRowKeys() throws IOException {
+      return service.getSampleRowKeys(this);
+    }
+
+    @Override
+    public List<BigtableSource> splitIntoBundles(
+        long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
+      // Update the desiredBundleSizeBytes in order to limit the
+      // number of splits to maximumNumberOfSplits.
+      long maximumNumberOfSplits = 4000;
+      long sizeEstimate = getEstimatedSizeBytes(options);
+      desiredBundleSizeBytes =
+          Math.max(sizeEstimate / maximumNumberOfSplits, desiredBundleSizeBytes);
+
+      // Delegate to testable helper.
+      return splitIntoBundlesBasedOnSamples(desiredBundleSizeBytes, getSampleRowKeys());
+    }
+
+    /** Helper that splits this source into bundles based on Cloud Bigtable sampled row keys. */
+    private List<BigtableSource> splitIntoBundlesBasedOnSamples(
+        long desiredBundleSizeBytes, List<SampleRowKeysResponse> sampleRowKeys) {
+      // There are no regions, or no samples available. Just scan the entire range.
+      if (sampleRowKeys.isEmpty()) {
+        logger.info("Not splitting source {} because no sample row keys are available.", this);
+        return Collections.singletonList(this);
+      }
+
+      logger.info(
+          "About to split into bundles of size {} with sampleRowKeys length {} first element {}",
+          desiredBundleSizeBytes,
+          sampleRowKeys.size(),
+          sampleRowKeys.get(0));
+
+      // Loop through all sampled responses and generate splits from the ones that overlap the
+      // scan range. The main complication is that we must track the end range of the previous
+      // sample to generate good ranges.
+      ByteKey lastEndKey = ByteKey.EMPTY;
+      long lastOffset = 0;
+      ImmutableList.Builder<BigtableSource> splits = ImmutableList.builder();
+      for (SampleRowKeysResponse response : sampleRowKeys) {
+        ByteKey responseEndKey = ByteKey.of(response.getRowKey());
+        long responseOffset = response.getOffsetBytes();
+        checkState(
+            responseOffset >= lastOffset,
+            "Expected response byte offset %s to come after the last offset %s",
+            responseOffset,
+            lastOffset);
+
+        if (!range.overlaps(ByteKeyRange.of(lastEndKey, responseEndKey))) {
+          // This region does not overlap the scan, so skip it.
+          lastOffset = responseOffset;
+          lastEndKey = responseEndKey;
+          continue;
+        }
+
+        // Calculate the beginning of the split as the larger of startKey and the end of the last
+        // split. Unspecified start is smallest key so is correctly treated as earliest key.
+        ByteKey splitStartKey = lastEndKey;
+        if (splitStartKey.compareTo(range.getStartKey()) < 0) {
+          splitStartKey = range.getStartKey();
+        }
+
+        // Calculate the end of the split as the smaller of endKey and the end of this sample. Note
+        // that range.containsKey handles the case when range.getEndKey() is empty.
+        ByteKey splitEndKey = responseEndKey;
+        if (!range.containsKey(splitEndKey)) {
+          splitEndKey = range.getEndKey();
+        }
+
+        // We know this region overlaps the desired key range, and we know a rough estimate of its
+        // size. Split the key range into bundle-sized chunks and then add them all as splits.
+        long sampleSizeBytes = responseOffset - lastOffset;
+        List<BigtableSource> subSplits =
+            splitKeyRangeIntoBundleSizedSubranges(
+                sampleSizeBytes,
+                desiredBundleSizeBytes,
+                ByteKeyRange.of(splitStartKey, splitEndKey));
+        splits.addAll(subSplits);
+
+        // Move to the next region.
+        lastEndKey = responseEndKey;
+        lastOffset = responseOffset;
+      }
+
+      // We must add one more region after the end of the samples if both these conditions hold:
+      //  1. we did not scan to the end yet (lastEndKey is concrete, not 0-length).
+      //  2. we want to scan to the end (endKey is empty) or farther (lastEndKey < endKey).
+      if (!lastEndKey.isEmpty()
+          && (range.getEndKey().isEmpty() || lastEndKey.compareTo(range.getEndKey()) < 0)) {
+        splits.add(this.withStartKey(lastEndKey).withEndKey(range.getEndKey()));
+      }
+
+      List<BigtableSource> ret = splits.build();
+      logger.info("Generated {} splits. First split: {}", ret.size(), ret.get(0));
+      return ret;
+    }
+
+    @Override
+    public long getEstimatedSizeBytes(PipelineOptions options) throws IOException {
+      // Delegate to testable helper.
+      if (estimatedSizeBytes == null) {
+        estimatedSizeBytes = getEstimatedSizeBytesBasedOnSamples(getSampleRowKeys());
+      }
+      return estimatedSizeBytes;
+    }
+
+    /**
+     * Computes the estimated size in bytes based on the total size of all samples that overlap
+     * the key range this source will scan.
+     */
+    private long getEstimatedSizeBytesBasedOnSamples(List<SampleRowKeysResponse> samples) {
+      long estimatedSizeBytes = 0;
+      long lastOffset = 0;
+      ByteKey currentStartKey = ByteKey.EMPTY;
+      // Compute the total estimated size as the size of each sample that overlaps the scan range.
+      // TODO: In future, Bigtable service may provide finer grained APIs, e.g., to sample given a
+      // filter or to sample on a given key range.
+      for (SampleRowKeysResponse response : samples) {
+        ByteKey currentEndKey = ByteKey.of(response.getRowKey());
+        long currentOffset = response.getOffsetBytes();
+        if (!currentStartKey.isEmpty() && currentStartKey.equals(currentEndKey)) {
+          // Skip an empty region.
+          lastOffset = currentOffset;
+          continue;
+        } else if (range.overlaps(ByteKeyRange.of(currentStartKey, currentEndKey))) {
+          estimatedSizeBytes += currentOffset - lastOffset;
+        }
+        currentStartKey = currentEndKey;
+        lastOffset = currentOffset;
+      }
+      return estimatedSizeBytes;
+    }
+
+    /**
+     * Cloud Bigtable returns query results ordered by key.
+     */
+    @Override
+    public boolean producesSortedKeys(PipelineOptions options) throws Exception {
+      return true;
+    }
+
+    @Override
+    public BoundedReader<Row> createReader(PipelineOptions options) throws IOException {
+      return new BigtableReader(this, service);
+    }
+
+    @Override
+    public void validate() {
+      checkArgument(!tableId.isEmpty(), "tableId cannot be empty");
+    }
+
+    @Override
+    public Coder<Row> getDefaultOutputCoder() {
+      return Proto2Coder.of(Row.class);
+    }
+
+    /** Helper that splits the specified range in this source into bundles. */
+    private List<BigtableSource> splitKeyRangeIntoBundleSizedSubranges(
+        long sampleSizeBytes, long desiredBundleSizeBytes, ByteKeyRange range) {
+      // Catch the trivial cases. Split is small enough already, or this is the last region.
+      logger.debug(
+          "Subsplit for sampleSizeBytes {} and desiredBundleSizeBytes {}",
+          sampleSizeBytes,
+          desiredBundleSizeBytes);
+      if (sampleSizeBytes <= desiredBundleSizeBytes) {
+        return Collections.singletonList(
+            this.withStartKey(range.getStartKey()).withEndKey(range.getEndKey()));
+      }
+
+      checkArgument(
+          sampleSizeBytes > 0, "Sample size %s bytes must be greater than 0.", sampleSizeBytes);
+      checkArgument(
+          desiredBundleSizeBytes > 0,
+          "Desired bundle size %s bytes must be greater than 0.",
+          desiredBundleSizeBytes);
+
+      int splitCount = (int) Math.ceil(((double) sampleSizeBytes) / (desiredBundleSizeBytes));
+      List<ByteKey> splitKeys = range.split(splitCount);
+      ImmutableList.Builder<BigtableSource> splits = ImmutableList.builder();
+      Iterator<ByteKey> keys = splitKeys.iterator();
+      ByteKey prev = keys.next();
+      while (keys.hasNext()) {
+        ByteKey next = keys.next();
+        splits.add(
+            this
+                .withStartKey(prev)
+                .withEndKey(next)
+                .withEstimatedSizeBytes(sampleSizeBytes / splitCount));
+        prev = next;
+      }
+      return splits.build();
+    }
+
+    public ByteKeyRange getRange() {
+      return range;
+    }
+
+    public RowFilter getRowFilter() {
+      return filter;
+    }
+
+    public String getTableId() {
+      return tableId;
+    }
+  }
+
+  private static class BigtableReader extends BoundedReader<Row> {
+    // Thread-safety: source is protected via synchronization and is only accessed or modified
+    // inside a synchronized block (or constructor, which is the same).
+    private BigtableSource source;
+    private BigtableService service;
+    private BigtableService.Reader reader;
+    private final ByteKeyRangeTracker rangeTracker;
+    private long recordsReturned;
+
+    public BigtableReader(BigtableSource source, BigtableService service) {
+      this.source = source;
+      this.service = service;
+      rangeTracker = ByteKeyRangeTracker.of(source.getRange());
+    }
+
+    @Override
+    public boolean start() throws IOException {
+      reader = service.createReader(getCurrentSource());
+      boolean hasRecord =
+          reader.start()
+              && rangeTracker.tryReturnRecordAt(true, ByteKey.of(reader.getCurrentRow().getKey()));
+      if (hasRecord) {
+        ++recordsReturned;
+      }
+      return hasRecord;
+    }
+
+    @Override
+    public synchronized BigtableSource getCurrentSource() {
+      return source;
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      boolean hasRecord =
+          reader.advance()
+              && rangeTracker.tryReturnRecordAt(true, ByteKey.of(reader.getCurrentRow().getKey()));
+      if (hasRecord) {
+        ++recordsReturned;
+      }
+      return hasRecord;
+    }
+
+    @Override
+    public Row getCurrent() throws NoSuchElementException {
+      return reader.getCurrentRow();
+    }
+
+    @Override
+    public void close() throws IOException {
+      logger.info("Closing reader after reading {} records.", recordsReturned);
+      if (reader != null) {
+        reader.close();
+        reader = null;
+      }
+    }
+
+    @Override
+    public final Double getFractionConsumed() {
+      return rangeTracker.getFractionConsumed();
+    }
+
+    @Override
+    public final synchronized BigtableSource splitAtFraction(double fraction) {
+      ByteKey splitKey;
+      try {
+        splitKey = source.getRange().interpolateKey(fraction);
+      } catch (IllegalArgumentException e) {
+        logger.info("%s: Failed to interpolate key for fraction %s.", source.getRange(), fraction);
+        return null;
+      }
+      logger.debug(
+          "Proposing to split {} at fraction {} (key {})", rangeTracker, fraction, splitKey);
+      if (!rangeTracker.trySplitAtPosition(splitKey)) {
+        return null;
+      }
+      BigtableSource primary = source.withEndKey(splitKey);
+      BigtableSource residual = source.withStartKey(splitKey);
+      this.source = primary;
+      return residual;
+    }
+  }
+
   private static class Sink
       extends com.google.cloud.dataflow.sdk.io.Sink<KV<ByteString, Iterable<Mutation>>> {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableService.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableService.java
index 03ddd3a6aebc8..85d706cb0a675 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableService.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableService.java
@@ -16,6 +16,9 @@
 package com.google.cloud.dataflow.sdk.io.bigtable;
 
 import com.google.bigtable.v1.Mutation;
+import com.google.bigtable.v1.Row;
+import com.google.bigtable.v1.SampleRowKeysResponse;
+import com.google.cloud.dataflow.sdk.io.bigtable.BigtableIO.BigtableSource;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.util.concurrent.ListenableFuture;
 import com.google.protobuf.ByteString;
@@ -23,6 +26,8 @@
 
 import java.io.IOException;
 import java.io.Serializable;
+import java.util.List;
+import java.util.NoSuchElementException;
 
 /**
  * An interface for real or fake implementations of Cloud Bigtable.
@@ -51,13 +56,53 @@ ListenableFuture<Empty> writeRecord(KV<ByteString, Iterable<Mutation>> record)
     void close() throws IOException;
   }
 
+  /**
+   * The interface of a class that reads from Cloud Bigtable.
+   */
+  interface Reader {
+    /**
+     * Reads the first element (including initialization, such as opening a network connection) and
+     * returns true if an element was found.
+     */
+    boolean start() throws IOException;
+
+    /**
+     * Attempts to read the next element, and returns true if an element has been read.
+     */
+    boolean advance() throws IOException;
+
+    /**
+     * Closes the reader.
+     *
+     * @throws IOException if there is an error.
+     */
+    void close() throws IOException;
+
+    /**
+     * Returns the last row read by a successful start() or advance(), or throws if there is no
+     * current row because the last such call was unsuccessful.
+     */
+    Row getCurrentRow() throws NoSuchElementException;
+  }
+
   /**
    * Returns {@code true} if the table with the give name exists.
    */
   boolean tableExists(String tableId) throws IOException;
 
+  /**
+   * Returns a {@link Reader} that will read from the specified source.
+   */
+  Reader createReader(BigtableSource source) throws IOException;
+
   /**
    * Returns a {@link Writer} that will write to the specified table.
    */
   Writer openForWriting(String tableId) throws IOException;
+
+  /**
+   * Returns a set of row keys sampled from the underlying table. These contain information about
+   * the distribution of keys within the table.
+   */
+  List<SampleRowKeysResponse> getSampleRowKeys(BigtableSource source) throws IOException;
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableServiceImpl.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableServiceImpl.java
index 196e64c241b4b..5ab85827ec058 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableServiceImpl.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableServiceImpl.java
@@ -18,12 +18,20 @@
 import com.google.bigtable.admin.table.v1.GetTableRequest;
 import com.google.bigtable.v1.MutateRowRequest;
 import com.google.bigtable.v1.Mutation;
+import com.google.bigtable.v1.ReadRowsRequest;
+import com.google.bigtable.v1.Row;
+import com.google.bigtable.v1.RowRange;
+import com.google.bigtable.v1.SampleRowKeysRequest;
+import com.google.bigtable.v1.SampleRowKeysResponse;
 import com.google.cloud.bigtable.config.BigtableOptions;
 import com.google.cloud.bigtable.grpc.BigtableSession;
 import com.google.cloud.bigtable.grpc.async.AsyncExecutor;
 import com.google.cloud.bigtable.grpc.async.HeapSizeManager;
+import com.google.cloud.bigtable.grpc.scanner.ResultScanner;
+import com.google.cloud.dataflow.sdk.io.bigtable.BigtableIO.BigtableSource;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.common.base.MoreObjects;
+import com.google.common.io.Closer;
 import com.google.common.util.concurrent.ListenableFuture;
 import com.google.protobuf.ByteString;
 import com.google.protobuf.Empty;
@@ -35,6 +43,8 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.util.List;
+import java.util.NoSuchElementException;
 
 /**
  * An implementation of {@link BigtableService} that actually communicates with the Cloud Bigtable
@@ -86,7 +96,75 @@ public boolean tableExists(String tableId) throws IOException {
     }
   }
 
-  private class BigtableWriterImpl implements Writer {
+  private class BigtableReaderImpl implements Reader {
+    private BigtableSession session;
+    private final BigtableSource source;
+    private ResultScanner<Row> results;
+    private Row currentRow;
+
+    public BigtableReaderImpl(BigtableSession session, BigtableSource source) {
+      this.session = session;
+      this.source = source;
+    }
+
+    @Override
+    public boolean start() throws IOException {
+      RowRange range =
+          RowRange.newBuilder()
+              .setStartKey(source.getRange().getStartKey().getValue())
+              .setEndKey(source.getRange().getEndKey().getValue())
+              .build();
+      ReadRowsRequest.Builder requestB =
+          ReadRowsRequest.newBuilder()
+              .setRowRange(range)
+              .setTableName(options.getClusterName().toTableNameStr(source.getTableId()));
+      if (source.getRowFilter() != null) {
+        requestB.setFilter(source.getRowFilter());
+      }
+      results = session.getDataClient().readRows(requestB.build());
+      return advance();
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      currentRow = results.next();
+      return (currentRow != null);
+    }
+
+    @Override
+    public void close() throws IOException {
+      // Goal: by the end of this function, both results and session are null and closed,
+      // independent of what errors they throw or prior state.
+
+      if (session == null) {
+        // Only possible when previously closed, so we know that results is also null.
+        return;
+      }
+
+      // Session does not implement Closeable -- it's AutoCloseable. So we can't register it with
+      // the Closer, but we can use the Closer to simplify the error handling.
+      try (Closer closer = Closer.create()) {
+        if (results != null) {
+          closer.register(results);
+          results = null;
+        }
+
+        session.close();
+      } finally {
+        session = null;
+      }
+    }
+
+    @Override
+    public Row getCurrentRow() throws NoSuchElementException {
+      if (currentRow == null) {
+        throw new NoSuchElementException();
+      }
+      return currentRow;
+    }
+  }
+
+  private static class BigtableWriterImpl implements Writer {
     private BigtableSession session;
     private AsyncExecutor executor;
     private final MutateRowRequest.Builder partialBuilder;
@@ -143,4 +221,21 @@ public String toString() {
         .add("options", options)
         .toString();
   }
+
+  @Override
+  public Reader createReader(BigtableSource source) throws IOException {
+    BigtableSession session = new BigtableSession(options);
+    return new BigtableReaderImpl(session, source);
+  }
+
+  @Override
+  public List<SampleRowKeysResponse> getSampleRowKeys(BigtableSource source) throws IOException {
+    try (BigtableSession session = new BigtableSession(options)) {
+      SampleRowKeysRequest request =
+          SampleRowKeysRequest.newBuilder()
+              .setTableName(options.getClusterName().toTableNameStr(source.getTableId()))
+              .build();
+      return session.getDataClient().sampleRowKeys(request);
+    }
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableIOTest.java
index e3a9940a2f360..0afac13e29622 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableIOTest.java
@@ -16,22 +16,42 @@
 
 package com.google.cloud.dataflow.sdk.io.bigtable;
 
+import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.assertSourcesEqualReferenceSource;
+import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.assertSplitAtFractionExhaustive;
+import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.assertSplitAtFractionFails;
+import static com.google.cloud.dataflow.sdk.testing.SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent;
 import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Verify.verifyNotNull;
+import static org.hamcrest.Matchers.hasSize;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertThat;
 
+import com.google.bigtable.v1.Cell;
+import com.google.bigtable.v1.Column;
+import com.google.bigtable.v1.Family;
 import com.google.bigtable.v1.Mutation;
 import com.google.bigtable.v1.Mutation.SetCell;
+import com.google.bigtable.v1.Row;
+import com.google.bigtable.v1.RowFilter;
+import com.google.bigtable.v1.SampleRowKeysResponse;
 import com.google.cloud.bigtable.config.BigtableOptions;
 import com.google.cloud.dataflow.sdk.Pipeline.PipelineExecutionException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.io.bigtable.BigtableIO.BigtableSource;
+import com.google.cloud.dataflow.sdk.io.range.ByteKey;
+import com.google.cloud.dataflow.sdk.io.range.ByteKeyRange;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.base.Predicate;
+import com.google.common.base.Predicates;
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
 import com.google.common.util.concurrent.Futures;
 import com.google.common.util.concurrent.ListenableFuture;
 import com.google.protobuf.ByteString;
@@ -39,6 +59,7 @@
 
 import org.hamcrest.Matchers;
 import org.junit.Before;
+import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
@@ -46,8 +67,16 @@
 import org.junit.runners.JUnit4;
 
 import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Comparator;
 import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.SortedMap;
+import java.util.TreeMap;
 
 import javax.annotation.Nullable;
 
@@ -71,6 +100,10 @@ public class BigtableIOTest {
           .setClusterId("cluster")
           .setZoneId("zone")
           .build();
+  private static BigtableIO.Read defaultRead =
+      BigtableIO.read().withBigtableOptions(BIGTABLE_OPTIONS);
+  private static BigtableIO.Write defaultWrite =
+      BigtableIO.write().withBigtableOptions(BIGTABLE_OPTIONS);
   private Coder<KV<ByteString, Iterable<Mutation>>> bigtableCoder;
   private static final TypeDescriptor<KV<ByteString, Iterable<Mutation>>> BIGTABLE_WRITE_TYPE =
       new TypeDescriptor<KV<ByteString, Iterable<Mutation>>>() {};
@@ -78,9 +111,31 @@ public class BigtableIOTest {
   @Before
   public void setup() throws Exception {
     service = new FakeBigtableService();
+    defaultRead = defaultRead.withBigtableService(service);
+    defaultWrite = defaultWrite.withBigtableService(service);
     bigtableCoder = TestPipeline.create().getCoderRegistry().getCoder(BIGTABLE_WRITE_TYPE);
   }
 
+  @Test
+  public void testReadBuildsCorrectly() {
+    BigtableIO.Read read =
+        BigtableIO.read().withBigtableOptions(BIGTABLE_OPTIONS).withTableId("table");
+    assertEquals("project", read.getBigtableOptions().getProjectId());
+    assertEquals("cluster", read.getBigtableOptions().getClusterId());
+    assertEquals("zone", read.getBigtableOptions().getZoneId());
+    assertEquals("table", read.getTableId());
+  }
+
+  @Test
+  public void testReadBuildsCorrectlyInDifferentOrder() {
+    BigtableIO.Read read =
+        BigtableIO.read().withTableId("table").withBigtableOptions(BIGTABLE_OPTIONS);
+    assertEquals("project", read.getBigtableOptions().getProjectId());
+    assertEquals("cluster", read.getBigtableOptions().getClusterId());
+    assertEquals("zone", read.getBigtableOptions().getZoneId());
+    assertEquals("table", read.getTableId());
+  }
+
   @Test
   public void testWriteBuildsCorrectly() {
     BigtableIO.Write write =
@@ -111,7 +166,7 @@ public void testWriteValidationFailsMissingTable() {
   }
 
   @Test
-  public void testSinkValidationFailsMissingOptions() {
+  public void testWriteValidationFailsMissingOptions() {
     BigtableIO.Write write = BigtableIO.write().withTableId("table");
 
     thrown.expect(IllegalArgumentException.class);
@@ -120,7 +175,7 @@ public void testSinkValidationFailsMissingOptions() {
   }
 
   /** Helper function to make a single row mutation to be written. */
-  private static KV<ByteString, Iterable<Mutation>> makeRowWithSetCell(String key, String value) {
+  private static KV<ByteString, Iterable<Mutation>> makeWrite(String key, String value) {
     ByteString rowKey = ByteString.copyFromUtf8(key);
     Iterable<Mutation> mutations =
         ImmutableList.of(
@@ -131,11 +186,223 @@ private static KV<ByteString, Iterable<Mutation>> makeRowWithSetCell(String key,
   }
 
   /** Helper function to make a single bad row mutation (no set cell). */
-  private static KV<ByteString, Iterable<Mutation>> makeBadRow(String key) {
+  private static KV<ByteString, Iterable<Mutation>> makeBadWrite(String key) {
     Iterable<Mutation> mutations = ImmutableList.of(Mutation.newBuilder().build());
     return KV.of(ByteString.copyFromUtf8(key), mutations);
   }
 
+  /** Tests that when reading from a non-existent table, the read fails. */
+  @Test
+  public void testReadingFailsTableDoesNotExist() throws Exception {
+    final String table = "TEST-TABLE";
+
+    BigtableIO.Read read =
+        BigtableIO.read()
+            .withBigtableOptions(BIGTABLE_OPTIONS)
+            .withTableId(table)
+            .withBigtableService(service);
+
+    // Exception will be thrown by read.validate() when read is applied.
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage(String.format("Table %s does not exist", table));
+
+    TestPipeline.create().apply(read);
+  }
+
+  /** Tests that when reading from an empty table, the read succeeds. */
+  @Test
+  public void testReadingEmptyTable() throws Exception {
+    final String table = "TEST-EMPTY-TABLE";
+    service.createTable(table);
+
+    TestPipeline p = TestPipeline.create();
+    PCollection<Row> rows = p.apply(defaultRead.withTableId(table));
+    DataflowAssert.that(rows).empty();
+
+    p.run();
+    logged.verifyInfo(String.format("Closing reader after reading 0 records."));
+  }
+
+  /** Tests reading all rows from a table. */
+  @Test
+  public void testReading() throws Exception {
+    final String table = "TEST-MANY-ROWS-TABLE";
+    final int numRows = 1001;
+    List<Row> testRows = makeTableData(table, numRows);
+
+    TestPipeline p = TestPipeline.create();
+    PCollection<Row> rows = p.apply(defaultRead.withTableId(table));
+    DataflowAssert.that(rows).containsInAnyOrder(testRows);
+
+    p.run();
+    logged.verifyInfo(String.format("Closing reader after reading %d records.", numRows));
+  }
+
+  /** A {@link Predicate} that a {@link Row Row's} key matches the given regex. */
+  private static class KeyMatchesRegex implements Predicate<ByteString> {
+    private final String regex;
+
+    public KeyMatchesRegex(String regex) {
+      this.regex = regex;
+    }
+
+    @Override
+    public boolean apply(@Nullable ByteString input) {
+      verifyNotNull(input, "input");
+      return input.toStringUtf8().matches(regex);
+    }
+  }
+
+  /** Tests reading all rows using a filter. */
+  @Test
+  public void testReadingWithFilter() throws Exception {
+    final String table = "TEST-FILTER-TABLE";
+    final int numRows = 1001;
+    List<Row> testRows = makeTableData(table, numRows);
+    String regex = ".*17.*";
+    final KeyMatchesRegex keyPredicate = new KeyMatchesRegex(regex);
+    Iterable<Row> filteredRows =
+        Iterables.filter(
+            testRows,
+            new Predicate<Row>() {
+              @Override
+              public boolean apply(@Nullable Row input) {
+                verifyNotNull(input, "input");
+                return keyPredicate.apply(input.getKey());
+              }
+            });
+
+    RowFilter filter =
+        RowFilter.newBuilder().setRowKeyRegexFilter(ByteString.copyFromUtf8(regex)).build();
+
+    TestPipeline p = TestPipeline.create();
+    PCollection<Row> rows = p.apply(defaultRead.withTableId(table).withRowFilter(filter));
+    DataflowAssert.that(rows).containsInAnyOrder(filteredRows);
+
+    p.run();
+  }
+
+  /**
+   * Tests dynamic work rebalancing exhaustively.
+   *
+   * <p>Because this test runs so slowly, it is disabled by default. Re-run when changing the
+   * {@link BigtableIO.Read} implementation.
+   */
+  @Ignore("Slow. Rerun when changing the implementation.")
+  @Test
+  public void testReadingSplitAtFractionExhaustive() throws Exception {
+    final String table = "TEST-FEW-ROWS-SPLIT-EXHAUSTIVE-TABLE";
+    final int numRows = 10;
+    final int numSamples = 1;
+    final long bytesPerRow = 1L;
+    makeTableData(table, numRows);
+    service.setupSampleRowKeys(table, numSamples, bytesPerRow);
+
+    BigtableSource source =
+        new BigtableSource(service, table, null, service.getTableRange(table), null);
+    assertSplitAtFractionExhaustive(source, null);
+  }
+
+  /**
+   * Unit tests of splitAtFraction.
+   */
+  @Test
+  public void testReadingSplitAtFraction() throws Exception {
+    final String table = "TEST-SPLIT-AT-FRACTION";
+    final int numRows = 10;
+    final int numSamples = 1;
+    final long bytesPerRow = 1L;
+    makeTableData(table, numRows);
+    service.setupSampleRowKeys(table, numSamples, bytesPerRow);
+
+    BigtableSource source =
+        new BigtableSource(service, table, null, service.getTableRange(table), null);
+    // With 0 items read, all split requests will fail.
+    assertSplitAtFractionFails(source, 0, 0.1, null /* options */);
+    assertSplitAtFractionFails(source, 0, 1.0, null /* options */);
+    // With 1 items read, all split requests past 1/10th will succeed.
+    assertSplitAtFractionSucceedsAndConsistent(source, 1, 0.333, null /* options */);
+    assertSplitAtFractionSucceedsAndConsistent(source, 1, 0.666, null /* options */);
+    // With 3 items read, all split requests past 3/10ths will succeed.
+    assertSplitAtFractionFails(source, 3, 0.2, null /* options */);
+    assertSplitAtFractionSucceedsAndConsistent(source, 3, 0.571, null /* options */);
+    assertSplitAtFractionSucceedsAndConsistent(source, 3, 0.9, null /* options */);
+    // With 6 items read, all split requests past 6/10ths will succeed.
+    assertSplitAtFractionFails(source, 6, 0.5, null /* options */);
+    assertSplitAtFractionSucceedsAndConsistent(source, 6, 0.7, null /* options */);
+  }
+
+  /** Tests reading all rows from a split table. */
+  @Test
+  public void testReadingWithSplits() throws Exception {
+    final String table = "TEST-MANY-ROWS-SPLITS-TABLE";
+    final int numRows = 1500;
+    final int numSamples = 10;
+    final long bytesPerRow = 100L;
+
+    // Set up test table data and sample row keys for size estimation and splitting.
+    makeTableData(table, numRows);
+    service.setupSampleRowKeys(table, numSamples, bytesPerRow);
+
+    // Generate source and split it.
+    BigtableSource source =
+        new BigtableSource(service, table, null /*filter*/, ByteKeyRange.ALL_KEYS, null /*size*/);
+    List<BigtableSource> splits =
+        source.splitIntoBundles(numRows * bytesPerRow / numSamples, null /* options */);
+
+    // Test num splits and split equality.
+    assertThat(splits, hasSize(numSamples));
+    assertSourcesEqualReferenceSource(source, splits, null /* options */);
+  }
+
+  /** Tests reading all rows from a sub-split table. */
+  @Test
+  public void testReadingWithSubSplits() throws Exception {
+    final String table = "TEST-MANY-ROWS-SPLITS-TABLE";
+    final int numRows = 1000;
+    final int numSamples = 10;
+    final int numSplits = 20;
+    final long bytesPerRow = 100L;
+
+    // Set up test table data and sample row keys for size estimation and splitting.
+    makeTableData(table, numRows);
+    service.setupSampleRowKeys(table, numSamples, bytesPerRow);
+
+    // Generate source and split it.
+    BigtableSource source =
+        new BigtableSource(service, table, null /*filter*/, ByteKeyRange.ALL_KEYS, null /*size*/);
+    List<BigtableSource> splits = source.splitIntoBundles(numRows * bytesPerRow / numSplits, null);
+
+    // Test num splits and split equality.
+    assertThat(splits, hasSize(numSplits));
+    assertSourcesEqualReferenceSource(source, splits, null /* options */);
+  }
+
+  /** Tests reading all rows from a sub-split table. */
+  @Test
+  public void testReadingWithFilterAndSubSplits() throws Exception {
+    final String table = "TEST-FILTER-SUB-SPLITS";
+    final int numRows = 1700;
+    final int numSamples = 10;
+    final int numSplits = 20;
+    final long bytesPerRow = 100L;
+
+    // Set up test table data and sample row keys for size estimation and splitting.
+    makeTableData(table, numRows);
+    service.setupSampleRowKeys(table, numSamples, bytesPerRow);
+
+    // Generate source and split it.
+    RowFilter filter =
+        RowFilter.newBuilder().setRowKeyRegexFilter(ByteString.copyFromUtf8(".*17.*")).build();
+    BigtableSource source =
+        new BigtableSource(service, table, filter, ByteKeyRange.ALL_KEYS, null /*size*/);
+    List<BigtableSource> splits = source.splitIntoBundles(numRows * bytesPerRow / numSplits, null);
+
+    // Test num splits and split equality.
+    assertThat(splits, hasSize(numSplits));
+    assertSourcesEqualReferenceSource(source, splits, null /* options */);
+  }
+
   /** Tests that a record gets written to the service and messages are logged. */
   @Test
   public void testWriting() throws Exception {
@@ -145,15 +412,9 @@ public void testWriting() throws Exception {
 
     service.createTable(table);
 
-    BigtableIO.Write write =
-        BigtableIO.write()
-            .withBigtableOptions(BIGTABLE_OPTIONS)
-            .withTableId(table)
-            .withBigtableService(service);
-
     TestPipeline p = TestPipeline.create();
-    p.apply("single row", Create.of(makeRowWithSetCell(key, value)).withCoder(bigtableCoder))
-        .apply("write", write);
+    p.apply("single row", Create.of(makeWrite(key, value)).withCoder(bigtableCoder))
+        .apply("write", defaultWrite.withTableId(table));
     p.run();
 
     logged.verifyInfo("Wrote 1 records");
@@ -170,12 +431,6 @@ public void testWriting() throws Exception {
   public void testWritingFailsTableDoesNotExist() throws Exception {
     final String table = "TEST-TABLE";
 
-    BigtableIO.Write write =
-        BigtableIO.write()
-            .withBigtableOptions(BIGTABLE_OPTIONS)
-            .withTableId(table)
-            .withBigtableService(service);
-
     PCollection<KV<ByteString, Iterable<Mutation>>> emptyInput =
         TestPipeline.create().apply(Create.<KV<ByteString, Iterable<Mutation>>>of());
 
@@ -183,7 +438,7 @@ public void testWritingFailsTableDoesNotExist() throws Exception {
     thrown.expect(IllegalArgumentException.class);
     thrown.expectMessage(String.format("Table %s does not exist", table));
 
-    emptyInput.apply("write", write);
+    emptyInput.apply("write", defaultWrite.withTableId(table));
   }
 
   /** Tests that when writing an element fails, the write fails. */
@@ -193,14 +448,9 @@ public void testWritingFailsBadElement() throws Exception {
     final String key = "KEY";
     service.createTable(table);
 
-    BigtableIO.Write write =
-        BigtableIO.write()
-            .withBigtableOptions(BIGTABLE_OPTIONS)
-            .withTableId(table)
-            .withBigtableService(service);
-
     TestPipeline p = TestPipeline.create();
-    p.apply(Create.of(makeBadRow(key)).withCoder(bigtableCoder)).apply(write);
+    p.apply(Create.of(makeBadWrite(key)).withCoder(bigtableCoder))
+        .apply(defaultWrite.withTableId(table));
 
     thrown.expect(PipelineExecutionException.class);
     thrown.expectCause(Matchers.<Throwable>instanceOf(IOException.class));
@@ -209,19 +459,59 @@ public void testWritingFailsBadElement() throws Exception {
     p.run();
   }
 
+  ////////////////////////////////////////////////////////////////////////////////////////////
+  private static final String COLUMN_FAMILY_NAME = "family";
+  private static final ByteString COLUMN_NAME = ByteString.copyFromUtf8("column");
+  private static final Column TEST_COLUMN = Column.newBuilder().setQualifier(COLUMN_NAME).build();
+  private static final Family TEST_FAMILY = Family.newBuilder().setName(COLUMN_FAMILY_NAME).build();
+
+  /** Helper function that builds a {@link Row} in a test table that could be returned by read. */
+  private static Row makeRow(ByteString key, ByteString value) {
+    // Build the currentRow and return true.
+    Column.Builder newColumn = TEST_COLUMN.toBuilder().addCells(Cell.newBuilder().setValue(value));
+    return Row.newBuilder()
+        .setKey(key)
+        .addFamilies(TEST_FAMILY.toBuilder().addColumns(newColumn))
+        .build();
+  }
+
+  /** Helper function to create a table and return the rows that it created. */
+  private static List<Row> makeTableData(String tableId, int numRows) {
+    service.createTable(tableId);
+    Map<ByteString, ByteString> testData = service.getTable(tableId);
+
+    List<Row> testRows = new ArrayList<>(numRows);
+    for (int i = 0; i < numRows; ++i) {
+      ByteString key = ByteString.copyFromUtf8(String.format("key%09d", i));
+      ByteString value = ByteString.copyFromUtf8(String.format("value%09d", i));
+      testData.put(key, value);
+      testRows.add(makeRow(key, value));
+    }
+
+    return testRows;
+  }
+
+
   /**
    * A {@link BigtableService} implementation that stores tables and their contents in memory.
    */
   private static class FakeBigtableService implements BigtableService {
-    private final Map<String, Map<ByteString, ByteString>> tables = new HashMap<>();
+    private final Map<String, SortedMap<ByteString, ByteString>> tables = new HashMap<>();
+    private final Map<String, List<SampleRowKeysResponse>> sampleRowKeys = new HashMap<>();
 
     @Nullable
-    public Map<ByteString, ByteString> getTable(String tableId) {
+    public SortedMap<ByteString, ByteString> getTable(String tableId) {
       return tables.get(tableId);
     }
 
+    public ByteKeyRange getTableRange(String tableId) {
+      verifyTableExists(tableId);
+      SortedMap<ByteString, ByteString> data = tables.get(tableId);
+      return ByteKeyRange.of(ByteKey.of(data.firstKey()), ByteKey.of(data.lastKey()));
+    }
+
     public void createTable(String tableId) {
-      tables.put(tableId, new HashMap<ByteString, ByteString>());
+      tables.put(tableId, new TreeMap<ByteString, ByteString>(new ByteStringComparator()));
     }
 
     @Override
@@ -229,11 +519,127 @@ public boolean tableExists(String tableId) {
       return tables.containsKey(tableId);
     }
 
+    public void verifyTableExists(String tableId) {
+      checkArgument(tableExists(tableId), "Table %s does not exist", tableId);
+    }
+
+    @Override
+    public FakeBigtableReader createReader(BigtableSource source) {
+      return new FakeBigtableReader(source);
+    }
+
     @Override
     public FakeBigtableWriter openForWriting(String tableId) {
-      checkArgument(tableExists(tableId), "Table %s does not exist", tableId);
       return new FakeBigtableWriter(tableId);
     }
+
+    @Override
+    public List<SampleRowKeysResponse> getSampleRowKeys(BigtableSource source) {
+      List<SampleRowKeysResponse> samples = sampleRowKeys.get(source.getTableId());
+      checkArgument(samples != null, "No samples found for table %s", source.getTableId());
+      return samples;
+    }
+
+    /** Sets up the sample row keys for the specified table. */
+    void setupSampleRowKeys(String tableId, int numSamples, long bytesPerRow) {
+      verifyTableExists(tableId);
+      checkArgument(numSamples > 0, "Number of samples must be positive: %s", numSamples);
+      checkArgument(bytesPerRow > 0, "Bytes/Row must be positive: %s", bytesPerRow);
+
+      ImmutableList.Builder<SampleRowKeysResponse> ret = ImmutableList.builder();
+      SortedMap<ByteString, ByteString> rows = getTable(tableId);
+      int currentSample = 1;
+      int rowsSoFar = 0;
+      for (Map.Entry<ByteString, ByteString> entry : rows.entrySet()) {
+        if (((double) rowsSoFar) / rows.size() >= ((double) currentSample) / numSamples) {
+          // add the sample with the total number of bytes in the table before this key.
+          ret.add(
+              SampleRowKeysResponse.newBuilder()
+                  .setRowKey(entry.getKey())
+                  .setOffsetBytes(rowsSoFar * bytesPerRow)
+                  .build());
+          // Move on to next sample
+          currentSample++;
+        }
+        ++rowsSoFar;
+      }
+
+      // Add the last sample indicating the end of the table, with all rows before it.
+      ret.add(SampleRowKeysResponse.newBuilder().setOffsetBytes(rows.size() * bytesPerRow).build());
+      sampleRowKeys.put(tableId, ret.build());
+    }
+  }
+
+  /**
+   * A {@link BigtableService.Reader} implementation that reads from the static instance of
+   * {@link FakeBigtableService} stored in {@link #service}.
+   *
+   * <p>This reader does not support {@link RowFilter} objects.
+   */
+  private static class FakeBigtableReader implements BigtableService.Reader {
+    private final BigtableSource source;
+    private Iterator<Map.Entry<ByteString, ByteString>> rows;
+    private Row currentRow;
+    private Predicate<ByteString> filter;
+
+    public FakeBigtableReader(BigtableSource source) {
+      this.source = source;
+      if (source.getRowFilter() == null) {
+        filter = Predicates.alwaysTrue();
+      } else {
+        ByteString keyRegex = source.getRowFilter().getRowKeyRegexFilter();
+        checkArgument(!keyRegex.isEmpty(), "Only RowKeyRegexFilter is supported");
+        filter = new KeyMatchesRegex(keyRegex.toStringUtf8());
+      }
+      service.verifyTableExists(source.getTableId());
+    }
+
+    @Override
+    public boolean start() {
+      rows = service.tables.get(source.getTableId()).entrySet().iterator();
+      return advance();
+    }
+
+    @Override
+    public boolean advance() {
+      // Loop until we find a row in range, or reach the end of the iterator.
+      Map.Entry<ByteString, ByteString> entry = null;
+      while (rows.hasNext()) {
+        entry = rows.next();
+        if (!filter.apply(entry.getKey())
+            || !source.getRange().containsKey(ByteKey.of(entry.getKey()))) {
+          // Does not match row filter or does not match source range. Skip.
+          entry = null;
+          continue;
+        }
+        // Found a row inside this source's key range, stop.
+        break;
+      }
+
+      // Return false if no more rows.
+      if (entry == null) {
+        currentRow = null;
+        return false;
+      }
+
+      // Set the current row and return true.
+      currentRow = makeRow(entry.getKey(), entry.getValue());
+      return true;
+    }
+
+    @Override
+    public Row getCurrentRow() {
+      if (currentRow == null) {
+        throw new NoSuchElementException();
+      }
+      return currentRow;
+    }
+
+    @Override
+    public void close() {
+      rows = null;
+      currentRow = null;
+    }
   }
 
   /**
@@ -255,6 +661,7 @@ public FakeBigtableWriter(String tableId) {
 
     @Override
     public ListenableFuture<Empty> writeRecord(KV<ByteString, Iterable<Mutation>> record) {
+      service.verifyTableExists(tableId);
       Map<ByteString, ByteString> table = service.getTable(tableId);
       ByteString key = record.getKey();
       for (Mutation m : record.getValue()) {
@@ -270,4 +677,12 @@ public ListenableFuture<Empty> writeRecord(KV<ByteString, Iterable<Mutation>> re
     @Override
     public void close() {}
   }
+
+  /** A serializable comparator for ByteString. Used to make row samples. */
+  private static final class ByteStringComparator implements Comparator<ByteString>, Serializable {
+    @Override
+    public int compare(ByteString o1, ByteString o2) {
+      return ByteKey.of(o1).compareTo(ByteKey.of(o2));
+    }
+  }
 }

From 09d755d92a08145efd7830992e2e2f972e3d51d7 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 18 Feb 2016 08:43:43 -0800
Subject: [PATCH 1472/1541] ApproximateUnique[Test]: improve efficiency and
 cleanup test

ApproximateUnique:
*) Save a copy (and memory) in ApproximateUnique itself (~25% efficiency).

ApproximateUniqueTest:
*) Only set up the list of data to read once (~2%).
*) Reduce the number of copies of the input data by a factor of 10 (~50%).
*) Remove the local re-implementation of Count.globally.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114967356
---
 .../sdk/transforms/ApproximateUnique.java     | 14 ++--
 .../sdk/transforms/ApproximateUniqueTest.java | 73 ++++---------------
 2 files changed, 24 insertions(+), 63 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
index f662486e3e8e1..3c936a2b13a05 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
@@ -26,8 +26,9 @@
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.hash.Hashing;
+import com.google.common.hash.HashingOutputStream;
+import com.google.common.io.ByteStreams;
 
-import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.Serializable;
 import java.util.Arrays;
@@ -397,11 +398,12 @@ public Coder<LargestUnique> getAccumulatorCoder(CoderRegistry registry,
     /**
      * Encodes the given element using the given coder and hashes the encoding.
      */
-    static <T> long hash(T element, Coder<T> coder)
-        throws CoderException, IOException {
-      ByteArrayOutputStream baos = new ByteArrayOutputStream();
-      coder.encode(element, baos, Context.OUTER);
-      return Hashing.murmur3_128().hashBytes(baos.toByteArray()).asLong();
+    static <T> long hash(T element, Coder<T> coder) throws CoderException, IOException {
+      try (HashingOutputStream stream =
+              new HashingOutputStream(Hashing.murmur3_128(), ByteStreams.nullOutputStream())) {
+        coder.encode(element, stream, Context.OUTER);
+        return stream.hash().asLong();
+      }
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
index 66c679b1bb3f8..39731bb93157b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUniqueTest.java
@@ -25,7 +25,6 @@
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -200,17 +199,16 @@ public void testApproximateUniqueGetName() {
    * Applies {@code ApproximateUnique(sampleSize)} verifying that the estimation
    * error falls within the maximum allowed error of {@code 2/sqrt(sampleSize)}.
    */
-  private void runApproximateUniquePipeline(int sampleSize) {
+  private static void runApproximateUniquePipeline(int sampleSize) {
     Pipeline p = TestPipeline.create();
-    PCollection<String> collection = readPCollection(p);
 
-    final PCollectionView<Long> exact = collection
-        .apply(RemoveDuplicates.<String>create())
-        .apply(Combine.globally(new CountElements<String>()))
-        .apply(View.<Long>asSingleton());
-
-    PCollection<Long> approximate = collection
-        .apply(ApproximateUnique.<String>globally(sampleSize));
+    PCollection<String> input = p.apply(Create.of(TEST_LINES));
+    PCollection<Long> approximate = input.apply(ApproximateUnique.<String>globally(sampleSize));
+    final PCollectionView<Long> exact =
+        input
+            .apply(RemoveDuplicates.<String>create())
+            .apply(Count.<String>globally())
+            .apply(View.<Long>asSingleton());
 
     PCollection<KV<Long, Long>> approximateAndExact = approximate
         .apply(ParDo.of(new DoFn<Long, KV<Long, Long>>() {
@@ -221,26 +219,19 @@ public void processElement(ProcessContext c) {
             })
             .withSideInputs(exact));
 
-    DataflowAssert.that(approximateAndExact)
-        .satisfies(new VerifyEstimatePerKeyFn(sampleSize));
+    DataflowAssert.that(approximateAndExact).satisfies(new VerifyEstimatePerKeyFn(sampleSize));
 
     p.run();
   }
 
-  /**
-   * Reads a large {@code PCollection<String>}.
-   */
-  private PCollection<String> readPCollection(Pipeline p) {
-    // TODO: Read PCollection from a set of text files.
-    List<String> page = TestUtils.LINES;
-    final int pages = 1000;
-    ArrayList<String> file = new ArrayList<>(pages * page.size());
-    for (int i = 0; i < pages; i++) {
-      file.addAll(page);
+  private static final int TEST_PAGES = 100;
+  private static final List<String> TEST_LINES =
+      new ArrayList<>(TEST_PAGES * TestUtils.LINES.size());
+
+  static {
+    for (int i = 0; i < TEST_PAGES; i++) {
+      TEST_LINES.addAll(TestUtils.LINES);
     }
-    assert file.size() == pages * page.size();
-    PCollection<String> words = p.apply(Create.of(file));
-    return words;
   }
 
   /**
@@ -297,36 +288,4 @@ public Void apply(Iterable<KV<Long, Long>> estimatePerKey) {
       return null;
     }
   }
-
-  /**
-   * Combiner function counting the number of elements in an input PCollection.
-   *
-   * @param <T> the type of elements in the input PCollection.
-   */
-  private static class CountElements<T> extends CombineFn<T, Long, Long> {
-
-    @Override
-    public Long createAccumulator() {
-      return 0L;
-    }
-
-    @Override
-    public Long addInput(Long accumulator, T input) {
-      return accumulator + 1;
-    }
-
-    @Override
-    public Long mergeAccumulators(Iterable<Long> accumulators) {
-      long sum = 0;
-      for (Long accumulator : accumulators) {
-        sum += accumulator;
-      }
-      return sum;
-    }
-
-    @Override
-    public Long extractOutput(Long accumulator) {
-      return accumulator;
-    }
-  }
 }

From ede5bacd6acf1f73d8dc118131fb9aa3662d8402 Mon Sep 17 00:00:00 2001
From: swegner <swegner@google.com>
Date: Thu, 18 Feb 2016 08:43:54 -0800
Subject: [PATCH 1473/1541] Refactor to use try-with-resources

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114967372
---
 .../cloud/dataflow/sdk/util/ZipFilesTest.java  | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ZipFilesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ZipFilesTest.java
index d9e370bdd2685..8c079a6bd50b7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ZipFilesTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ZipFilesTest.java
@@ -165,18 +165,13 @@ public void testAsByteSource() throws Exception {
 
     ZipFiles.zipDirectory(tmpDir, zipFile);
 
-    ZipFile zip = new ZipFile(zipFile);
-    try {
+    try (ZipFile zip = new ZipFile(zipFile)) {
       ZipEntry entry = zip.getEntry("zip/myTextFile.txt");
       ByteSource byteSource = ZipFiles.asByteSource(zip, entry);
-
       if (entry.getSize() != -1) {
         assertEquals(entry.getSize(), byteSource.size());
       }
-
       assertArrayEquals("Simple Text".getBytes(StandardCharsets.UTF_8), byteSource.read());
-    } finally {
-      zip.close();
     }
   }
 
@@ -188,25 +183,18 @@ public void testAsCharSource() throws Exception {
 
     ZipFiles.zipDirectory(tmpDir, zipFile);
 
-    ZipFile zip = new ZipFile(zipFile);
-    try {
+    try (ZipFile zip = new ZipFile(zipFile)) {
       ZipEntry entry = zip.getEntry("zip/myTextFile.txt");
       CharSource charSource = ZipFiles.asCharSource(zip, entry, StandardCharsets.UTF_8);
       assertEquals("Simple Text", charSource.read());
-    } finally {
-      zip.close();
     }
   }
 
   private void assertZipOnlyContains(String zipFileEntry) throws IOException {
-    ZipFile zippedFile = new ZipFile(zipFile);
-    try {
+    try (ZipFile zippedFile = new ZipFile(zipFile)) {
       assertEquals(1, zippedFile.size());
-
       ZipEntry entry = zippedFile.entries().nextElement();
       assertEquals(zipFileEntry, entry.getName());
-    } finally {
-      zippedFile.close();
     }
   }
 

From 72e436f7033adbb12b09146e60cf20a15ca2bad7 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 18 Feb 2016 09:09:42 -0800
Subject: [PATCH 1474/1541] StateSamplerTest: fewer repeats per reused counter
 name

Speed up test.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114969466
---
 .../com/google/cloud/dataflow/sdk/util/common/CounterSet.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
index 873add4c85038..9e9638ff33eb3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
@@ -82,7 +82,7 @@ public synchronized <T> Counter<T> addOrReuseCounter(Counter<T> counter) {
     if (counter.isCompatibleWith(oldCounter)) {
       // Return the counter to reuse.
       @SuppressWarnings("unchecked")
-      Counter<T> compatibleCounter = (Counter) oldCounter;
+      Counter<T> compatibleCounter = (Counter<T>) oldCounter;
       return compatibleCounter;
     }
     throw new IllegalArgumentException(

From 570894535dc8e145ad9d8d8bba5f86b4151b685e Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Thu, 18 Feb 2016 09:31:46 -0800
Subject: [PATCH 1475/1541] Add InProcessSideInputContainer

This is responsible for receiving the input elements of a
PCollectionView and producing the view as a SideInput.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114971466
---
 .../inprocess/InProcessPipelineRunner.java    |  14 +
 .../InProcessSideInputContainer.java          | 207 ++++++++++
 .../sdk/util/PCollectionViewWindow.java       |  67 ++++
 .../InProcessSideInputContainerTest.java      | 356 ++++++++++++++++++
 4 files changed, 644 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessSideInputContainer.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViewWindow.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessSideInputContainerTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java
index 10ac123143dfb..7747839d0b9b2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java
@@ -23,17 +23,21 @@
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.util.BaseExecutionContext;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
 import com.google.cloud.dataflow.sdk.util.TimerInternals;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.PValue;
 
 import org.joda.time.Instant;
 
@@ -234,6 +238,16 @@ <ElemT, ViewT> PCollectionViewWriter<ElemT, ViewT> createPCollectionViewWriter(
      */
     SideInputReader createSideInputReader(List<PCollectionView<?>> sideInputs);
 
+    /**
+     * Schedules a callback after the watermark for a {@link PValue} after the trigger for the
+     * specified window (with the specified windowing strategy) must have fired from the perspective
+     * of that {@link PValue}, as specified by the value of
+     * {@link Trigger#getWatermarkThatGuaranteesFiring(BoundedWindow)} for the trigger of the
+     * {@link WindowingStrategy}.
+     */
+    void callAfterOutputMustHaveBeenProduced(PValue value, BoundedWindow window,
+        WindowingStrategy<?, ?> windowingStrategy, Runnable runnable);
+
     /**
      * Create a {@link CounterSet} for this {@link Pipeline}. The {@link CounterSet} is independent
      * of all other {@link CounterSet CounterSets} created by this call.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessSideInputContainer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessSideInputContainer.java
new file mode 100644
index 0000000000000..bf9a2e1c53fed
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessSideInputContainer.java
@@ -0,0 +1,207 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.util.PCollectionViewWindow;
+import com.google.cloud.dataflow.sdk.util.SideInputReader;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.common.base.MoreObjects;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
+import com.google.common.util.concurrent.SettableFuture;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ExecutionException;
+
+import javax.annotation.Nullable;
+
+/**
+ * An in-process container for {@link PCollectionView PCollectionViews}, which provides methods for
+ * constructing {@link SideInputReader SideInputReaders} which block until a side input is
+ * available and writing to a {@link PCollectionView}.
+ */
+class InProcessSideInputContainer {
+  private final InProcessEvaluationContext evaluationContext;
+  private final Collection<PCollectionView<?>> containedViews;
+  private final LoadingCache<PCollectionViewWindow<?>,
+      SettableFuture<Iterable<? extends WindowedValue<?>>>> viewByWindows;
+
+  /**
+   * Create a new {@link InProcessSideInputContainer} with the provided views and the provided
+   * context.
+   */
+  public static InProcessSideInputContainer create(
+      InProcessEvaluationContext context, Collection<PCollectionView<?>> containedViews) {
+    CacheLoader<PCollectionViewWindow<?>, SettableFuture<Iterable<? extends WindowedValue<?>>>>
+        loader = new CacheLoader<PCollectionViewWindow<?>,
+            SettableFuture<Iterable<? extends WindowedValue<?>>>>() {
+          @Override
+          public SettableFuture<Iterable<? extends WindowedValue<?>>> load(
+              PCollectionViewWindow<?> view) {
+            return SettableFuture.create();
+          }
+        };
+    LoadingCache<PCollectionViewWindow<?>, SettableFuture<Iterable<? extends WindowedValue<?>>>>
+        viewByWindows = CacheBuilder.newBuilder().build(loader);
+    return new InProcessSideInputContainer(context, containedViews, viewByWindows);
+  }
+
+  private InProcessSideInputContainer(InProcessEvaluationContext context,
+      Collection<PCollectionView<?>> containedViews,
+      LoadingCache<PCollectionViewWindow<?>, SettableFuture<Iterable<? extends WindowedValue<?>>>>
+      viewByWindows) {
+    this.evaluationContext = context;
+    this.containedViews = ImmutableSet.copyOf(containedViews);
+    this.viewByWindows = viewByWindows;
+  }
+
+  /**
+   * Return a view of this {@link InProcessSideInputContainer} that contains only the views in
+   * the provided argument. The returned {@link InProcessSideInputContainer} is unmodifiable without
+   * casting, but will change as this {@link InProcessSideInputContainer} is modified.
+   */
+  public SideInputReader withViews(Collection<PCollectionView<?>> newContainedViews) {
+    if (!containedViews.containsAll(newContainedViews)) {
+      Set<PCollectionView<?>> currentlyContained = ImmutableSet.copyOf(containedViews);
+      Set<PCollectionView<?>> newRequested = ImmutableSet.copyOf(newContainedViews);
+      throw new IllegalArgumentException("Can't create a SideInputReader with unknown views "
+          + Sets.difference(newRequested, currentlyContained));
+    }
+    return new SideInputContainerSideInputReader(newContainedViews);
+  }
+
+  /**
+   * Write the provided values to the provided view.
+   *
+   * <p>The windowed values are first exploded, then for each window the pane is determined. For
+   * each window, if the pane is later than the current pane stored within this container, write
+   * all of the values to the container as the new values of the {@link PCollectionView}.
+   *
+   * <p>The provided iterable is expected to contain only a single window and pane.
+   */
+  public void write(PCollectionView<?> view, Iterable<? extends WindowedValue<?>> values)
+      throws ExecutionException {
+    Map<BoundedWindow, Collection<WindowedValue<?>>> valuesPerWindow = new HashMap<>();
+    for (WindowedValue<?> value : values) {
+      for (BoundedWindow window : value.getWindows()) {
+        Collection<WindowedValue<?>> windowValues = valuesPerWindow.get(window);
+        if (windowValues == null) {
+          windowValues = new ArrayList<>();
+          valuesPerWindow.put(window, windowValues);
+        }
+        windowValues.add(value);
+      }
+    }
+    for (Map.Entry<BoundedWindow, Collection<WindowedValue<?>>> windowValues :
+        valuesPerWindow.entrySet()) {
+      PCollectionViewWindow<?> windowedView = PCollectionViewWindow.of(view, windowValues.getKey());
+      SettableFuture<Iterable<? extends WindowedValue<?>>> future = viewByWindows.get(windowedView);
+      if (future.isDone()) {
+        try {
+          Iterator<? extends WindowedValue<?>> existingValues = future.get().iterator();
+          PaneInfo newPane = windowValues.getValue().iterator().next().getPane();
+          // The current value may have no elements, if no elements were produced for the window,
+          // but we are recieving late data.
+          if (!existingValues.hasNext()
+              || newPane.getIndex() > existingValues.next().getPane().getIndex()) {
+            viewByWindows.invalidate(windowedView);
+            viewByWindows.get(windowedView).set(windowValues.getValue());
+          }
+        } catch (InterruptedException e) {
+          // TODO: Handle meaningfully. This should never really happen when the result remains
+          // useful, but the result could be available and the thread can still be interrupted.
+          Thread.currentThread().interrupt();
+        }
+      } else {
+        future.set(windowValues.getValue());
+      }
+    }
+  }
+
+  private final class SideInputContainerSideInputReader implements SideInputReader {
+    private final Collection<PCollectionView<?>> readerViews;
+
+    private SideInputContainerSideInputReader(Collection<PCollectionView<?>> readerViews) {
+      this.readerViews = ImmutableSet.copyOf(readerViews);
+    }
+
+    @Override
+    @Nullable
+    public <T> T get(final PCollectionView<T> view, final BoundedWindow window) {
+      checkArgument(
+          readerViews.contains(view), "calling get(PCollectionView) with unknown view: " + view);
+      PCollectionViewWindow<T> windowedView = PCollectionViewWindow.of(view, window);
+      try {
+        final SettableFuture<Iterable<? extends WindowedValue<?>>> future =
+            viewByWindows.get(windowedView);
+
+        WindowingStrategy<?, ?> windowingStrategy = view.getWindowingStrategyInternal();
+        evaluationContext.callAfterOutputMustHaveBeenProduced(
+            view, window, windowingStrategy, new Runnable() {
+              @Override
+              public void run() {
+                // The requested window has closed without producing elements, so reflect that in
+                // the PCollectionView. If set has already been called, will do nothing.
+                future.set(Collections.<WindowedValue<?>>emptyList());
+          }
+
+          @Override
+          public String toString() {
+            return MoreObjects.toStringHelper("InProcessSideInputContainerEmptyCallback")
+                .add("view", view)
+                .add("window", window)
+                .toString();
+          }
+        });
+        // Safe covariant cast
+        @SuppressWarnings("unchecked")
+        Iterable<WindowedValue<?>> values = (Iterable<WindowedValue<?>>) future.get();
+        return view.fromIterableInternal(values);
+      } catch (InterruptedException e) {
+        Thread.currentThread().interrupt();
+        return null;
+      } catch (ExecutionException e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    @Override
+    public <T> boolean contains(PCollectionView<T> view) {
+      return readerViews.contains(view);
+    }
+
+    @Override
+    public boolean isEmpty() {
+      return readerViews.isEmpty();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViewWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViewWindow.java
new file mode 100644
index 0000000000000..7cf636eb63c4c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViewWindow.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+
+import java.util.Objects;
+
+/**
+ * A pair of a {@link PCollectionView} and a {@link BoundedWindow}, which can
+ * be thought of as window "of" the view. This is a value class for use e.g.
+ * as a compound cache key.
+ *
+ * @param <T> the type of the underlying PCollectionView
+ */
+public final class PCollectionViewWindow<T> {
+
+  private final PCollectionView<T> view;
+  private final BoundedWindow window;
+
+  private PCollectionViewWindow(PCollectionView<T> view, BoundedWindow window) {
+    this.view = view;
+    this.window = window;
+  }
+
+  public static <T> PCollectionViewWindow<T> of(PCollectionView<T> view, BoundedWindow window) {
+    return new PCollectionViewWindow<>(view, window);
+  }
+
+  public PCollectionView<T> getView() {
+    return view;
+  }
+
+  public BoundedWindow getWindow() {
+    return window;
+  }
+
+  @Override
+  public boolean equals(Object otherObject) {
+    if (!(otherObject instanceof PCollectionViewWindow)) {
+      return false;
+    }
+    @SuppressWarnings("unchecked")
+    PCollectionViewWindow<T> other = (PCollectionViewWindow<T>) otherObject;
+    return getView().equals(other.getView()) && getWindow().equals(other.getWindow());
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(getView(), getWindow());
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessSideInputContainerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessSideInputContainerTest.java
new file mode 100644
index 0000000000000..4cfe782936877
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessSideInputContainerTest.java
@@ -0,0 +1,356 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess;
+
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.hasEntry;
+import static org.hamcrest.Matchers.is;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Mockito.doAnswer;
+
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.Mean;
+import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.transforms.WithKeys;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
+import com.google.cloud.dataflow.sdk.util.PCollectionViews;
+import com.google.cloud.dataflow.sdk.util.SideInputReader;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.common.collect.ImmutableList;
+
+import org.joda.time.Instant;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.MockitoAnnotations;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+import java.util.Map;
+import java.util.concurrent.Callable;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+/**
+ * Tests for {@link InProcessSideInputContainer}.
+ */
+@RunWith(JUnit4.class)
+public class InProcessSideInputContainerTest {
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Mock
+  private InProcessEvaluationContext context;
+
+  private TestPipeline pipeline;
+
+  private InProcessSideInputContainer container;
+
+  private PCollectionView<Map<String, Integer>> mapView;
+  private PCollectionView<Double> singletonView;
+
+  // Not present in container.
+  private PCollectionView<Iterable<Integer>> iterableView;
+
+  private BoundedWindow firstWindow = new BoundedWindow() {
+    @Override
+    public Instant maxTimestamp() {
+      return new Instant(789541L);
+    }
+
+    @Override
+    public String toString() {
+      return "firstWindow";
+    }
+  };
+
+  private BoundedWindow secondWindow = new BoundedWindow() {
+    @Override
+    public Instant maxTimestamp() {
+      return new Instant(14564786L);
+    }
+
+    @Override
+    public String toString() {
+      return "secondWindow";
+    }
+  };
+
+  @Before
+  public void setup() {
+    MockitoAnnotations.initMocks(this);
+    pipeline = TestPipeline.create();
+
+    PCollection<Integer> create =
+        pipeline.apply("forBaseCollection", Create.<Integer>of(1, 2, 3, 4));
+
+    mapView =
+        create.apply("forKeyTypes", WithKeys.<String, Integer>of("foo"))
+            .apply("asMapView", View.<String, Integer>asMap());
+
+    singletonView =
+        create.apply("forCombinedTypes", Mean.<Integer>globally())
+            .apply("asDoubleView", View.<Double>asSingleton());
+
+    iterableView = create.apply("asIterableView", View.<Integer>asIterable());
+
+    container = InProcessSideInputContainer.create(
+        context, ImmutableList.of(iterableView, mapView, singletonView));
+  }
+
+  @Test
+  public void getAfterWriteReturnsPaneInWindow() throws Exception {
+    WindowedValue<KV<String, Integer>> one = WindowedValue.of(
+        KV.of("one", 1), new Instant(1L), firstWindow, PaneInfo.ON_TIME_AND_ONLY_FIRING);
+    WindowedValue<KV<String, Integer>> two = WindowedValue.of(
+        KV.of("two", 2), new Instant(20L), firstWindow, PaneInfo.ON_TIME_AND_ONLY_FIRING);
+    container.write(mapView, ImmutableList.<WindowedValue<?>>of(one, two));
+
+    Map<String, Integer> viewContents =
+        container.withViews(ImmutableList.<PCollectionView<?>>of(mapView))
+            .get(mapView, firstWindow);
+    assertThat(viewContents, hasEntry("one", 1));
+    assertThat(viewContents, hasEntry("two", 2));
+    assertThat(viewContents.size(), is(2));
+  }
+
+  @Test
+  public void getReturnsLatestPaneInWindow() throws Exception {
+    WindowedValue<KV<String, Integer>> one = WindowedValue.of(KV.of("one", 1), new Instant(1L),
+        secondWindow, PaneInfo.createPane(true, false, Timing.EARLY));
+    WindowedValue<KV<String, Integer>> two = WindowedValue.of(KV.of("two", 2), new Instant(20L),
+        secondWindow, PaneInfo.createPane(true, false, Timing.EARLY));
+    container.write(mapView, ImmutableList.<WindowedValue<?>>of(one, two));
+
+    Map<String, Integer> viewContents =
+        container.withViews(ImmutableList.<PCollectionView<?>>of(mapView))
+            .get(mapView, secondWindow);
+    assertThat(viewContents, hasEntry("one", 1));
+    assertThat(viewContents, hasEntry("two", 2));
+    assertThat(viewContents.size(), is(2));
+
+    WindowedValue<KV<String, Integer>> three = WindowedValue.of(KV.of("three", 3),
+        new Instant(300L), secondWindow, PaneInfo.createPane(false, false, Timing.EARLY, 1, -1));
+    container.write(mapView, ImmutableList.<WindowedValue<?>>of(three));
+
+    Map<String, Integer> overwrittenViewContents =
+        container.withViews(ImmutableList.<PCollectionView<?>>of(mapView))
+            .get(mapView, secondWindow);
+    assertThat(overwrittenViewContents, hasEntry("three", 3));
+    assertThat(overwrittenViewContents.size(), is(1));
+  }
+
+  /**
+   * Demonstrates that calling get() on a window that currently has no data does not return until
+   * there is data in the pane.
+   */
+  @Test
+  public void getBlocksUntilPaneAvailable() throws Exception {
+    BoundedWindow window = new BoundedWindow() {
+      @Override
+      public Instant maxTimestamp() {
+        return new Instant(1024L);
+      }
+    };
+    Future<Double> singletonFuture =
+        getFutureOfView(container.withViews(ImmutableList.<PCollectionView<?>>of(singletonView)),
+            singletonView, window);
+
+    WindowedValue<Double> singletonValue =
+        WindowedValue.of(4.75, new Instant(475L), window, PaneInfo.ON_TIME_AND_ONLY_FIRING);
+
+    assertThat(singletonFuture.isDone(), is(false));
+    container.write(singletonView, ImmutableList.<WindowedValue<?>>of(singletonValue));
+    assertThat(singletonFuture.get(), equalTo(4.75));
+  }
+
+  @Test
+  public void withPCollectionViewsWithPutInOriginalReturnsContents() throws Exception {
+    BoundedWindow window = new BoundedWindow() {
+      @Override
+      public Instant maxTimestamp() {
+        return new Instant(1024L);
+      }
+    };
+    SideInputReader newReader =
+        container.withViews(ImmutableList.<PCollectionView<?>>of(singletonView));
+    Future<Double> singletonFuture = getFutureOfView(newReader, singletonView, window);
+
+    WindowedValue<Double> singletonValue =
+        WindowedValue.of(24.125, new Instant(475L), window, PaneInfo.ON_TIME_AND_ONLY_FIRING);
+
+    assertThat(singletonFuture.isDone(), is(false));
+    container.write(singletonView, ImmutableList.<WindowedValue<?>>of(singletonValue));
+    assertThat(singletonFuture.get(), equalTo(24.125));
+  }
+
+  @Test
+  public void withPCollectionViewsErrorsForContainsNotInViews() {
+    PCollectionView<Map<String, Iterable<String>>> newView = PCollectionViews.multimapView(pipeline,
+        WindowingStrategy.globalDefault(), KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()));
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("with unknown views " + ImmutableList.of(newView).toString());
+
+    container.withViews(ImmutableList.<PCollectionView<?>>of(newView));
+  }
+
+  @Test
+  public void withViewsForViewNotInContainerFails() {
+    PCollectionView<Map<String, Iterable<String>>> newView = PCollectionViews.multimapView(pipeline,
+        WindowingStrategy.globalDefault(), KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()));
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("unknown views");
+    thrown.expectMessage(newView.toString());
+
+    container.withViews(ImmutableList.<PCollectionView<?>>of(newView));
+  }
+
+  @Test
+  public void getOnReaderForViewNotInReaderFails() {
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("unknown view: " + iterableView.toString());
+
+    container.withViews(ImmutableList.<PCollectionView<?>>of(mapView))
+        .get(iterableView, GlobalWindow.INSTANCE);
+  }
+
+  @Test
+  public void writeForMultipleElementsInDifferentWindowsSucceeds() throws Exception {
+    WindowedValue<Double> firstWindowedValue = WindowedValue.of(2.875,
+        firstWindow.maxTimestamp().minus(200L), firstWindow, PaneInfo.ON_TIME_AND_ONLY_FIRING);
+    WindowedValue<Double> secondWindowedValue =
+        WindowedValue.of(4.125, secondWindow.maxTimestamp().minus(2_000_000L), secondWindow,
+            PaneInfo.ON_TIME_AND_ONLY_FIRING);
+    container.write(singletonView, ImmutableList.of(firstWindowedValue, secondWindowedValue));
+    assertThat(
+        container.withViews(ImmutableList.<PCollectionView<?>>of(singletonView))
+            .get(singletonView, firstWindow),
+        equalTo(2.875));
+    assertThat(
+        container.withViews(ImmutableList.<PCollectionView<?>>of(singletonView))
+            .get(singletonView, secondWindow),
+        equalTo(4.125));
+  }
+
+  @Test
+  public void writeForMultipleIdenticalElementsInSameWindowSucceeds() throws Exception {
+    WindowedValue<Integer> firstValue = WindowedValue.of(
+        44, firstWindow.maxTimestamp().minus(200L), firstWindow, PaneInfo.ON_TIME_AND_ONLY_FIRING);
+    WindowedValue<Integer> secondValue = WindowedValue.of(
+        44, firstWindow.maxTimestamp().minus(200L), firstWindow, PaneInfo.ON_TIME_AND_ONLY_FIRING);
+
+    container.write(iterableView, ImmutableList.of(firstValue, secondValue));
+
+    assertThat(
+        container.withViews(ImmutableList.<PCollectionView<?>>of(iterableView))
+            .get(iterableView, firstWindow),
+        contains(44, 44));
+  }
+
+  @Test
+  public void writeForElementInMultipleWindowsSucceeds() throws Exception {
+    WindowedValue<Double> multiWindowedValue =
+        WindowedValue.of(2.875, firstWindow.maxTimestamp().minus(200L),
+            ImmutableList.of(firstWindow, secondWindow), PaneInfo.ON_TIME_AND_ONLY_FIRING);
+    container.write(singletonView, ImmutableList.of(multiWindowedValue));
+    assertThat(
+        container.withViews(ImmutableList.<PCollectionView<?>>of(singletonView))
+            .get(singletonView, firstWindow),
+        equalTo(2.875));
+    assertThat(
+        container.withViews(ImmutableList.<PCollectionView<?>>of(singletonView))
+            .get(singletonView, secondWindow),
+        equalTo(2.875));
+  }
+
+  @Test
+  public void finishDoesNotOverwriteWrittenElements() throws Exception {
+    WindowedValue<KV<String, Integer>> one = WindowedValue.of(KV.of("one", 1), new Instant(1L),
+        secondWindow, PaneInfo.createPane(true, false, Timing.EARLY));
+    WindowedValue<KV<String, Integer>> two = WindowedValue.of(KV.of("two", 2), new Instant(20L),
+        secondWindow, PaneInfo.createPane(true, false, Timing.EARLY));
+    container.write(mapView, ImmutableList.<WindowedValue<?>>of(one, two));
+
+    immediatelyInvokeCallback(mapView, secondWindow);
+
+    Map<String, Integer> viewContents =
+        container.withViews(ImmutableList.<PCollectionView<?>>of(mapView))
+            .get(mapView, secondWindow);
+
+    assertThat(viewContents, hasEntry("one", 1));
+    assertThat(viewContents, hasEntry("two", 2));
+    assertThat(viewContents.size(), is(2));
+  }
+
+  @Test
+  public void finishOnPendingViewsSetsEmptyElements() throws Exception {
+    immediatelyInvokeCallback(mapView, secondWindow);
+    Future<Map<String, Integer>> mapFuture = getFutureOfView(
+        container.withViews(ImmutableList.<PCollectionView<?>>of(mapView)), mapView, secondWindow);
+
+    assertThat(mapFuture.get().isEmpty(), is(true));
+  }
+
+  /**
+   * When a callAfterWindowCloses with the specified view's producing transform, window, and
+   * windowing strategy is invoked, immediately execute the callback.
+   */
+  private void immediatelyInvokeCallback(PCollectionView<?> view, BoundedWindow window) {
+    doAnswer(
+        new Answer<Void>() {
+          @Override
+          public Void answer(InvocationOnMock invocation) throws Throwable {
+            Object callback = invocation.getArguments()[3];
+            Runnable callbackRunnable = (Runnable) callback;
+            callbackRunnable.run();
+            return null;
+          }
+        })
+        .when(context)
+        .callAfterOutputMustHaveBeenProduced(Mockito.eq(view), Mockito.eq(window),
+            Mockito.eq(view.getWindowingStrategyInternal()), Mockito.any(Runnable.class));
+  }
+
+  private <ValueT> Future<ValueT> getFutureOfView(final SideInputReader myReader,
+      final PCollectionView<ValueT> view, final BoundedWindow window) {
+    Callable<ValueT> callable = new Callable<ValueT>() {
+      @Override
+      public ValueT call() throws Exception {
+        return myReader.get(view, window);
+      }
+    };
+    return Executors.newSingleThreadExecutor().submit(callable);
+  }
+}

From 72f838740589258c3faa90f0c635aa364795cbde Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 18 Feb 2016 13:58:59 -0800
Subject: [PATCH 1476/1541] Update FileIOChannelFactory.expand() to only return
 file resources

This brings FileIOChannelFactory inline with what is occurring in
GCSIOChannelFactory and is required since expand is used in FileBasedReader
to find matches and FileBasedReader can not tell whether a path is a concrete
resource or something else like a dir or symlink.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=114999578
---
 .../sdk/util/FileIOChannelFactory.java        | 27 +++++---
 .../dataflow/sdk/util/IOChannelFactory.java   |  3 +-
 .../sdk/util/FileIOChannelFactoryTest.java    | 61 +++++++++++++++++--
 3 files changed, 76 insertions(+), 15 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
index e65d8701469b5..77d0b830cb924 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
@@ -16,12 +16,15 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.common.base.Predicate;
+import com.google.common.base.Predicates;
+import com.google.common.collect.Iterables;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.BufferedOutputStream;
 import java.io.File;
-import java.io.FileFilter;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
@@ -66,17 +69,21 @@ public Collection<String> match(String spec) throws IOException {
     String pathToMatch = file.getAbsolutePath().replaceAll(Matcher.quoteReplacement("\\"),
                                                            Matcher.quoteReplacement("\\\\"));
 
-    final PathMatcher matcher =
-        FileSystems.getDefault().getPathMatcher("glob:" + pathToMatch);
-    File[] files = parent.listFiles(new FileFilter() {
-      @Override
-      public boolean accept(File pathname) {
-        return matcher.matches(pathname.toPath());
-      }
-    });
+    final PathMatcher matcher = FileSystems.getDefault().getPathMatcher("glob:" + pathToMatch);
+
+    Iterable<File> files = com.google.common.io.Files.fileTreeTraverser().preOrderTraversal(parent);
+    Iterable<File> matchedFiles = Iterables.filter(files,
+        Predicates.and(
+            com.google.common.io.Files.isFile(),
+            new Predicate<File>() {
+              @Override
+              public boolean apply(File input) {
+                return matcher.matches(input.toPath());
+              }
+        }));
 
     List<String> result = new LinkedList<>();
-    for (File match : files) {
+    for (File match : matchedFiles) {
       result.add(match.getPath());
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
index 44b8ebd4e0d97..f7d0b9a27e1ea 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
@@ -40,7 +40,8 @@ public interface IOChannelFactory {
    * all support globs in the final component of a path (eg /foo/bar/*.txt),
    * however they are not required to support globs in the directory paths.
    *
-   * <p>The result is the (possibly empty) set of specifications that match.
+   * <p>The list of resources returned are required to exist and not represent abstract
+   * resources such as symlinks and directories.
    */
   Collection<String> match(String spec) throws IOException;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java
index 1a8ab86153e1c..80ec3a1308833 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactoryTest.java
@@ -16,6 +16,7 @@
 
 package com.google.cloud.dataflow.sdk.util;
 
+import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
@@ -54,7 +55,9 @@ private void testCreate(Path path) throws Exception {
         factory.create(path.toString(), MimeTypes.TEXT), StandardCharsets.UTF_8.name())) {
       writer.write(expected);
     }
-    assertThat(Files.readLines(path.toFile(), StandardCharsets.UTF_8), Matchers.hasItems(expected));
+    assertThat(
+        Files.readLines(path.toFile(), StandardCharsets.UTF_8),
+        containsInAnyOrder(expected));
   }
 
   @Test
@@ -111,7 +114,7 @@ public void testMatchExact() throws Exception {
     temporaryFolder.newFile("ab");
 
     assertThat(factory.match(temporaryFolder.getRoot().toPath().resolve("a").toString()),
-        Matchers.hasItems(expected.toArray(new String[expected.size()])));
+        containsInAnyOrder(expected.toArray(new String[expected.size()])));
   }
 
   @Test
@@ -123,11 +126,32 @@ public void testMatchNone() throws Exception {
 
     // Windows doesn't like resolving paths with * in them, so the * is appended after resolve.
     assertThat(factory.match(factory.resolve(temporaryFolder.getRoot().getPath(), "b") + "*"),
-        Matchers.hasItems(expected.toArray(new String[expected.size()])));
+        containsInAnyOrder(expected.toArray(new String[expected.size()])));
+  }
+
+  @Test
+  public void testMatchUsingExplicitPath() throws Exception {
+    List<String> expected = ImmutableList.of(temporaryFolder.newFile("a").toString());
+    temporaryFolder.newFile("aa");
+
+    assertThat(factory.match(factory.resolve(temporaryFolder.getRoot().getPath(), "a")),
+        containsInAnyOrder(expected.toArray(new String[expected.size()])));
+  }
+
+  @Test
+  public void testMatchUsingExplicitPathForNonExistentFile() throws Exception {
+    List<String> expected = ImmutableList.of();
+    temporaryFolder.newFile("aa");
+
+    assertThat(factory.match(factory.resolve(temporaryFolder.getRoot().getPath(), "a")),
+        containsInAnyOrder(expected.toArray(new String[expected.size()])));
   }
 
   @Test
-  public void testMatchMultiple() throws Exception {
+  public void testMatchMultipleWithoutSubdirectoryExpansion() throws Exception {
+    File unmatchedSubDir = temporaryFolder.newFolder("aaa");
+    File unmatchedSubDirFile = File.createTempFile("sub-dir-file", "", unmatchedSubDir);
+    unmatchedSubDirFile.deleteOnExit();
     List<String> expected = ImmutableList.of(temporaryFolder.newFile("a").toString(),
         temporaryFolder.newFile("aa").toString(), temporaryFolder.newFile("ab").toString());
     temporaryFolder.newFile("ba");
@@ -135,9 +159,38 @@ public void testMatchMultiple() throws Exception {
 
     // Windows doesn't like resolving paths with * in them, so the * is appended after resolve.
     assertThat(factory.match(factory.resolve(temporaryFolder.getRoot().getPath(), "a") + "*"),
+        containsInAnyOrder(expected.toArray(new String[expected.size()])));
+  }
+
+  @Test
+  public void testMatchMultipleWithSubdirectoryExpansion() throws Exception {
+    File matchedSubDir = temporaryFolder.newFolder("a");
+    File matchedSubDirFile = File.createTempFile("sub-dir-file", "", matchedSubDir);
+    matchedSubDirFile.deleteOnExit();
+    File unmatchedSubDir = temporaryFolder.newFolder("b");
+    File unmatchedSubDirFile = File.createTempFile("sub-dir-file", "", unmatchedSubDir);
+    unmatchedSubDirFile.deleteOnExit();
+
+    List<String> expected = ImmutableList.of(matchedSubDirFile.toString(),
+        temporaryFolder.newFile("aa").toString(), temporaryFolder.newFile("ab").toString());
+    temporaryFolder.newFile("ba");
+    temporaryFolder.newFile("bb");
+
+    // Windows doesn't like resolving paths with * in them, so the ** is appended after resolve.
+    assertThat(factory.match(factory.resolve(temporaryFolder.getRoot().getPath(), "a") + "**"),
         Matchers.hasItems(expected.toArray(new String[expected.size()])));
   }
 
+  @Test
+  public void testMatchWithDirectoryFiltersOutDirectory() throws Exception {
+    List<String> expected = ImmutableList.of(temporaryFolder.newFile("a").toString());
+    temporaryFolder.newFolder("a_dir_that_should_not_be_matched");
+
+    // Windows doesn't like resolving paths with * in them, so the * is appended after resolve.
+    assertThat(factory.match(factory.resolve(temporaryFolder.getRoot().getPath(), "a") + "*"),
+        containsInAnyOrder(expected.toArray(new String[expected.size()])));
+  }
+
   @Test
   public void testResolve() throws Exception {
     String expected = temporaryFolder.getRoot().toPath().resolve("aa").toString();

From 9877acc267514a6f95cd380ff0a8e9ca62ff32be Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 18 Feb 2016 16:03:15 -0800
Subject: [PATCH 1477/1541] Add validation for single objects in GcsUtil.expand

We now ensure that GcsUtil.expand with a file pattern
that would resolve to a single object will validate
that the object exists.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115013446
---
 .../cloud/dataflow/sdk/util/GcsUtil.java      | 25 +++++--
 .../cloud/dataflow/sdk/util/GcsUtilTest.java  | 66 ++++++++++++-------
 2 files changed, 62 insertions(+), 29 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
index 20a9abedc74a3..8fd258f6d23b0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
@@ -137,8 +137,8 @@ protected void setStorageClient(Storage storageClient) {
 
   /**
    * Expands a pattern into matched paths. The pattern path may contain globs, which are expanded
-   * in the result. This function may return non-existent files so this should not be used to
-   * validate the existence of files in GCS.
+   * in the result. For patterns that only match a single object, we ensure that the object
+   * exists.
    */
   public List<GcsPath> expand(GcsPath gcsPattern) throws IOException {
     Preconditions.checkArgument(isGcsPatternSupported(gcsPattern.getObject()));
@@ -147,9 +147,24 @@ public List<GcsPath> expand(GcsPath gcsPattern) throws IOException {
     String prefix = null;
     if (!m.matches()) {
       // Not a glob.
-      // Results of GCS storage list feature is only eventually consistent so we should not use that
-      // feature to check the existence of single files.
-      return ImmutableList.of(gcsPattern);
+      Storage.Objects.Get getObject = storageClient.objects().get(
+          gcsPattern.getBucket(), gcsPattern.getObject());
+      try {
+        // Use a get request to fetch the metadata of the object,
+        // the request has strong global consistency.
+        ResilientOperation.retry(
+            ResilientOperation.getGoogleRequestCallable(getObject),
+            new AttemptBoundedExponentialBackOff(3, 200),
+            RetryDeterminer.SOCKET_ERRORS,
+            IOException.class);
+        return ImmutableList.of(gcsPattern);
+      } catch (IOException | InterruptedException e) {
+        if (e instanceof IOException && errorExtractor.itemNotFound((IOException) e)) {
+          // If the path was not found, return an empty list.
+          return ImmutableList.of();
+        }
+        throw new IOException("Unable to match files for pattern " + gcsPattern, e);
+      }
     } else {
       // Part before the first wildcard character.
       prefix = m.group("PREFIX");
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
index 89a8ef527ff5f..e7cd7d7c22c82 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/GcsUtilTest.java
@@ -69,6 +69,7 @@
 import java.nio.channels.SeekableByteChannel;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutorService;
@@ -172,6 +173,7 @@ public void testGlobExpansion() throws IOException {
     gcsUtil.setStorageClient(mockStorage);
 
     Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
+    Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);
     Storage.Objects.List mockStorageList = Mockito.mock(Storage.Objects.List.class);
 
     Objects modelObjects = new Objects();
@@ -189,7 +191,11 @@ public void testGlobExpansion() throws IOException {
     modelObjects.setItems(items);
 
     when(mockStorage.objects()).thenReturn(mockStorageObjects);
+    when(mockStorageObjects.get("testbucket", "testdirectory/otherfile")).thenReturn(
+        mockStorageGet);
     when(mockStorageObjects.list("testbucket")).thenReturn(mockStorageList);
+    when(mockStorageGet.execute()).thenReturn(
+        new StorageObject().setBucket("testbucket").setName("testdirectory/otherfile"));
     when(mockStorageList.execute()).thenReturn(modelObjects);
 
     // Test a single file.
@@ -255,10 +261,10 @@ public void testRecursiveGlobExpansionFails() throws IOException {
     gcsUtil.expand(pattern);
   }
 
-  // GCSUtil.expand() should not fail for non-existent single files or directories, since GCS file
-  // listing is only eventually consistent.
+  // GCSUtil.expand() should fail when matching a single object when that object does not exist.
+  // We should return the empty result since GCS get object is strongly consistent.
   @Test
-  public void testNonExistent() throws IOException {
+  public void testNonExistentObjectReturnsEmptyResult() throws IOException {
     GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
     GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
 
@@ -266,34 +272,46 @@ public void testNonExistent() throws IOException {
     gcsUtil.setStorageClient(mockStorage);
 
     Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
-    Storage.Objects.List mockStorageList = Mockito.mock(Storage.Objects.List.class);
-
-    Objects modelObjects = new Objects();
-    List<StorageObject> items = new ArrayList<>();
+    Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);
 
-    // A directory
-    items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/"));
-    modelObjects.setItems(items);
+    GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/nonexistentfile");
+    GoogleJsonResponseException expectedException =
+        googleJsonResponseException(HttpStatusCodes.STATUS_CODE_NOT_FOUND,
+            "It don't exist", "Nothing here to see");
 
     when(mockStorage.objects()).thenReturn(mockStorageObjects);
-    when(mockStorageObjects.list("testbucket")).thenReturn(mockStorageList);
-    when(mockStorageList.execute()).thenReturn(modelObjects);
+    when(mockStorageObjects.get(pattern.getBucket(), pattern.getObject())).thenReturn(
+        mockStorageGet);
+    when(mockStorageGet.execute()).thenThrow(expectedException);
 
-    {
-      GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/nonexistentfile");
-      List<GcsPath> expectedFiles =
-          ImmutableList.of(GcsPath.fromUri("gs://testbucket/testdirectory/nonexistentfile"));
+    assertEquals(Collections.EMPTY_LIST, gcsUtil.expand(pattern));
+  }
 
-      assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
-    }
+  // GCSUtil.expand() should fail for other errors such as access denied.
+  @Test
+  public void testAccessDeniedObjectThrowsIOException() throws IOException {
+    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
+    GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
 
-    {
-      GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/nonexistentdirectory/");
-      List<GcsPath> expectedFiles =
-          ImmutableList.of(GcsPath.fromUri("gs://testbucket/testdirectory/nonexistentdirectory/"));
+    Storage mockStorage = Mockito.mock(Storage.class);
+    gcsUtil.setStorageClient(mockStorage);
 
-      assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
-    }
+    Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
+    Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);
+
+    GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/accessdeniedfile");
+    GoogleJsonResponseException expectedException =
+        googleJsonResponseException(HttpStatusCodes.STATUS_CODE_FORBIDDEN,
+            "Waves hand mysteriously", "These aren't the buckets your looking for");
+
+    when(mockStorage.objects()).thenReturn(mockStorageObjects);
+    when(mockStorageObjects.get(pattern.getBucket(), pattern.getObject())).thenReturn(
+        mockStorageGet);
+    when(mockStorageGet.execute()).thenThrow(expectedException);
+
+    thrown.expect(IOException.class);
+    thrown.expectMessage("Unable to match files for pattern");
+    gcsUtil.expand(pattern);
   }
 
   @Test

From 47eec4a4dfb162fafe37f1d4ff9925d8f51448c5 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Thu, 18 Feb 2016 17:05:29 -0800
Subject: [PATCH 1478/1541] Rename StateContext to StateAccessor

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115019254
---
 .../windowing/AfterDelayFromFirstElement.java | 10 +--
 .../sdk/transforms/windowing/AfterPane.java   |  8 +--
 .../sdk/transforms/windowing/Trigger.java     | 16 ++---
 .../dataflow/sdk/util/NonEmptyPanes.java      | 34 +++++------
 .../dataflow/sdk/util/PaneInfoTracker.java    |  4 +-
 .../cloud/dataflow/sdk/util/ReduceFn.java     | 14 ++---
 .../sdk/util/ReduceFnContextFactory.java      | 61 +++++++++----------
 .../dataflow/sdk/util/SystemReduceFn.java     | 12 ++--
 .../sdk/util/TriggerContextFactory.java       | 40 ++++++------
 .../dataflow/sdk/util/TriggerRunner.java      | 28 ++++-----
 .../dataflow/sdk/util/WatermarkHold.java      |  4 +-
 ...Context.java => MergingStateAccessor.java} |  5 +-
 .../{StateContext.java => StateAccessor.java} |  5 +-
 .../dataflow/sdk/util/state/StateMerging.java | 14 ++---
 .../dataflow/sdk/util/state/StateTags.java    |  2 +-
 15 files changed, 128 insertions(+), 129 deletions(-)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/{MergingStateContext.java => MergingStateAccessor.java} (87%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/{StateContext.java => StateAccessor.java} (89%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
index 9c9a0956abf53..43e77de6bc79f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
@@ -25,8 +25,8 @@
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateContext;
-import com.google.cloud.dataflow.sdk.util.state.StateContext;
+import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
+import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
 import com.google.cloud.dataflow.sdk.util.state.StateMerging;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
@@ -151,7 +151,7 @@ private AfterDelayFromFirstElement<W> newWith(
   }
 
   @Override
-  public void prefetchOnElement(StateContext<?> state) {
+  public void prefetchOnElement(StateAccessor<?> state) {
     state.access(DELAYED_UNTIL_TAG).get();
   }
 
@@ -172,7 +172,7 @@ public void onElement(OnElementContext c) throws Exception {
   }
 
   @Override
-  public void prefetchOnMerge(MergingStateContext<?, W> state) {
+  public void prefetchOnMerge(MergingStateAccessor<?, W> state) {
     super.prefetchOnMerge(state);
     StateMerging.prefetchCombiningValues(state, DELAYED_UNTIL_TAG);
   }
@@ -207,7 +207,7 @@ public void onMerge(OnMergeContext c) throws Exception {
   }
 
   @Override
-  public void prefetchShouldFire(StateContext<?> state) {
+  public void prefetchShouldFire(StateAccessor<?> state) {
     state.access(DELAYED_UNTIL_TAG).get();
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index 79a370757be16..729cd3e0633a1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -21,8 +21,8 @@
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateContext;
-import com.google.cloud.dataflow.sdk.util.state.StateContext;
+import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
+import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
 import com.google.cloud.dataflow.sdk.util.state.StateMerging;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
@@ -66,7 +66,7 @@ public void onElement(OnElementContext c) throws Exception {
   }
 
   @Override
-  public void prefetchOnMerge(MergingStateContext<?, W> state) {
+  public void prefetchOnMerge(MergingStateAccessor<?, W> state) {
     super.prefetchOnMerge(state);
     StateMerging.prefetchCombiningValues(state, ELEMENTS_IN_PANE_TAG);
   }
@@ -86,7 +86,7 @@ public void onMerge(OnMergeContext context) throws Exception {
   }
 
   @Override
-  public void prefetchShouldFire(StateContext<?> state) {
+  public void prefetchShouldFire(StateAccessor<?> state) {
     state.access(ELEMENTS_IN_PANE_TAG).get();
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
index 3d4029e29852b..4471563e70c54 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
@@ -19,8 +19,8 @@
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateContext;
-import com.google.cloud.dataflow.sdk.util.state.StateContext;
+import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
+import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
 import com.google.common.base.Joiner;
 
 import org.joda.time.Instant;
@@ -190,7 +190,7 @@ public abstract class TriggerContext {
     public abstract TriggerInfo<W> trigger();
 
     /** Returns the interface for accessing persistent state. */
-    public abstract StateContext<?> state();
+    public abstract StateAccessor<?> state();
 
     /** The window that the current context is executing in. */
     public abstract W window();
@@ -265,7 +265,7 @@ public abstract class OnMergeContext extends TriggerContext {
     public abstract OnMergeContext forTrigger(ExecutableTrigger<W> trigger);
 
     @Override
-    public abstract MergingStateContext<?, W> state();
+    public abstract MergingStateAccessor<?, W> state();
 
     @Override
     public abstract MergingTriggerInfo<W> trigger();
@@ -319,7 +319,7 @@ protected Trigger(@Nullable List<Trigger<W>> subTriggers) {
    * Called to allow the trigger to prefetch any state it will likely need to read from during
    * an {@link #onElement} call.
    */
-  public void prefetchOnElement(StateContext<?> state) {
+  public void prefetchOnElement(StateAccessor<?> state) {
     if (subTriggers != null) {
       for (Trigger<W> trigger : subTriggers) {
         trigger.prefetchOnElement(state);
@@ -331,7 +331,7 @@ public void prefetchOnElement(StateContext<?> state) {
    * Called to allow the trigger to prefetch any state it will likely need to read from during
    * an {@link #onMerge} call.
    */
-  public void prefetchOnMerge(MergingStateContext<?, W> state) {
+  public void prefetchOnMerge(MergingStateAccessor<?, W> state) {
     if (subTriggers != null) {
       for (Trigger<W> trigger : subTriggers) {
         trigger.prefetchOnMerge(state);
@@ -343,7 +343,7 @@ public void prefetchOnMerge(MergingStateContext<?, W> state) {
    * Called to allow the trigger to prefetch any state it will likely need to read from during
    * an {@link #shouldFire} call.
    */
-  public void prefetchShouldFire(StateContext<?> state) {
+  public void prefetchShouldFire(StateAccessor<?> state) {
     if (subTriggers != null) {
       for (Trigger<W> trigger : subTriggers) {
         trigger.prefetchShouldFire(state);
@@ -355,7 +355,7 @@ public void prefetchShouldFire(StateContext<?> state) {
    * Called to allow the trigger to prefetch any state it will likely need to read from during
    * an {@link #onFire} call.
    */
-  public void prefetchOnFire(StateContext<?> state) {
+  public void prefetchOnFire(StateAccessor<?> state) {
     if (subTriggers != null) {
       for (Trigger<W> trigger : subTriggers) {
         trigger.prefetchOnFire(state);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
index f31d6659a991b..4d6131451e4c9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
@@ -20,9 +20,9 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateContext;
+import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
+import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
-import com.google.cloud.dataflow.sdk.util.state.StateContext;
 import com.google.cloud.dataflow.sdk.util.state.StateMerging;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
@@ -48,27 +48,27 @@ static <K, W extends BoundedWindow> NonEmptyPanes<K, W> create(
    * Record that some content has been added to the window in {@code context}, and therefore the
    * current pane is not empty.
    */
-  public abstract void recordContent(StateContext<K> context);
+  public abstract void recordContent(StateAccessor<K> context);
 
   /**
    * Record that the given pane is empty.
    */
-  public abstract void clearPane(StateContext<K> state);
+  public abstract void clearPane(StateAccessor<K> state);
 
   /**
    * Return true if the current pane for the window in {@code context} is empty.
    */
-  public abstract StateContents<Boolean> isEmpty(StateContext<K> context);
+  public abstract StateContents<Boolean> isEmpty(StateAccessor<K> context);
 
   /**
    * Prefetch in preparation for merging.
    */
-  public abstract void prefetchOnMerge(MergingStateContext<K, W> state);
+  public abstract void prefetchOnMerge(MergingStateAccessor<K, W> state);
 
   /**
    * Eagerly merge backing state.
    */
-  public abstract void onMerge(MergingStateContext<K, W> context);
+  public abstract void onMerge(MergingStateAccessor<K, W> context);
 
   /**
    * An implementation of {@code NonEmptyPanes} optimized for use with discarding mode. Uses the
@@ -84,27 +84,27 @@ private DiscardingModeNonEmptyPanes(ReduceFn<K, ?, ?, W> reduceFn) {
     }
 
     @Override
-    public StateContents<Boolean> isEmpty(StateContext<K> state) {
+    public StateContents<Boolean> isEmpty(StateAccessor<K> state) {
       return reduceFn.isEmpty(state);
     }
 
     @Override
-    public void recordContent(StateContext<K> state) {
+    public void recordContent(StateAccessor<K> state) {
       // Nothing to do -- the reduceFn is tracking contents
     }
 
     @Override
-    public void clearPane(StateContext<K> state) {
+    public void clearPane(StateAccessor<K> state) {
       // Nothing to do -- the reduceFn is tracking contents
     }
 
     @Override
-    public void prefetchOnMerge(MergingStateContext<K, W> state) {
+    public void prefetchOnMerge(MergingStateAccessor<K, W> state) {
       // Nothing to do -- the reduceFn is tracking contents
     }
 
     @Override
-    public void onMerge(MergingStateContext<K, W> context) {
+    public void onMerge(MergingStateAccessor<K, W> context) {
       // Nothing to do -- the reduceFn is tracking contents
     }
   }
@@ -121,27 +121,27 @@ private static class GeneralNonEmptyPanes<K, W extends BoundedWindow>
             "count", VarLongCoder.of(), new Sum.SumLongFn()));
 
     @Override
-    public void recordContent(StateContext<K> state) {
+    public void recordContent(StateAccessor<K> state) {
       state.access(PANE_ADDITIONS_TAG).add(1L);
     }
 
     @Override
-    public void clearPane(StateContext<K> state) {
+    public void clearPane(StateAccessor<K> state) {
       state.access(PANE_ADDITIONS_TAG).clear();
     }
 
     @Override
-    public StateContents<Boolean> isEmpty(StateContext<K> state) {
+    public StateContents<Boolean> isEmpty(StateAccessor<K> state) {
       return state.access(PANE_ADDITIONS_TAG).isEmpty();
     }
 
     @Override
-    public void prefetchOnMerge(MergingStateContext<K, W> state) {
+    public void prefetchOnMerge(MergingStateAccessor<K, W> state) {
       StateMerging.prefetchCombiningValues(state, PANE_ADDITIONS_TAG);
     }
 
     @Override
-    public void onMerge(MergingStateContext<K, W> context) {
+    public void onMerge(MergingStateAccessor<K, W> context) {
       StateMerging.mergeCombiningValues(context, PANE_ADDITIONS_TAG);
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
index f0fa9936b0e02..e66f9a7b0df46 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
@@ -19,8 +19,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.PaneInfoCoder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
+import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
-import com.google.cloud.dataflow.sdk.util.state.StateContext;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.cloud.dataflow.sdk.util.state.ValueState;
@@ -46,7 +46,7 @@ public PaneInfoTracker(TimerInternals timerInternals) {
   static final StateTag<Object, ValueState<PaneInfo>> PANE_INFO_TAG =
       StateTags.makeSystemTagInternal(StateTags.value("pane", PaneInfoCoder.INSTANCE));
 
-  public void clear(StateContext<?> state) {
+  public void clear(StateAccessor<?> state) {
     state.access(PANE_INFO_TAG).clear();
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
index 1ee86ffa08840..ae0913ae4fd23 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
@@ -17,9 +17,9 @@
 
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateContext;
+import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
+import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
-import com.google.cloud.dataflow.sdk.util.state.StateContext;
 
 import org.joda.time.Instant;
 
@@ -48,7 +48,7 @@ public abstract class Context {
     public abstract WindowingStrategy<?, W> windowingStrategy();
 
     /** Return the interface for accessing state. */
-    public abstract StateContext<K> state();
+    public abstract StateAccessor<K> state();
 
     /** Return the interface for accessing timers. */
     public abstract Timers timers();
@@ -67,7 +67,7 @@ public abstract class ProcessValueContext extends Context {
   public abstract class OnMergeContext extends Context {
     /** Return the interface for accessing state. */
     @Override
-    public abstract MergingStateContext<K, W> state();
+    public abstract MergingStateAccessor<K, W> state();
   }
 
   /** Information accessible within {@link #onTrigger}. */
@@ -105,7 +105,7 @@ public abstract class OnTriggerContext extends Context {
    *
    * @param c Context to use prefetch from.
    */
-  public void prefetchOnMerge(MergingStateContext<K, W> c) throws Exception {}
+  public void prefetchOnMerge(MergingStateAccessor<K, W> c) throws Exception {}
 
   /**
    * Called before {@link #onTrigger} is invoked to provide an opportunity to prefetch any needed
@@ -113,7 +113,7 @@ public void prefetchOnMerge(MergingStateContext<K, W> c) throws Exception {}
    *
    * @param context Context to use prefetch from.
    */
-  public void prefetchOnTrigger(StateContext<K> context) {}
+  public void prefetchOnTrigger(StateAccessor<K> context) {}
 
   /**
    * Called to clear any persisted state that the {@link ReduceFn} may be holding. This will be
@@ -124,5 +124,5 @@ public void prefetchOnTrigger(StateContext<K> context) {}
   /**
    * Returns true if the there is no buffered state.
    */
-  public abstract StateContents<Boolean> isEmpty(StateContext<K> context);
+  public abstract StateContents<Boolean> isEmpty(StateAccessor<K> context);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
index e6caf7556709a..d0296d1ad67e2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
@@ -21,10 +21,10 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateContext;
+import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
 import com.google.cloud.dataflow.sdk.util.state.State;
+import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
-import com.google.cloud.dataflow.sdk.util.state.StateContext;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
@@ -74,8 +74,8 @@ public static enum StateStyle {
     RENAMED
   }
 
-  private StateContextImpl<K, W> stateContext(W window, StateStyle style) {
-    return new StateContextImpl<K, W>(
+  private StateAccessorImpl<K, W> stateContext(W window, StateStyle style) {
+    return new StateAccessorImpl<K, W>(
         activeWindows, windowingStrategy.getWindowFn().windowCoder(),
         stateInternals, window, style);
   }
@@ -97,13 +97,13 @@ public ReduceFn<K, InputT, OutputT, W>.OnTriggerContext forTrigger(W window,
   public ReduceFn<K, InputT, OutputT, W>.OnMergeContext forMerge(
       Collection<W> activeToBeMerged, W mergeResult, StateStyle style) {
     return new OnMergeContextImpl(
-        new MergingStateContextImpl<K, W>(activeWindows,
+        new MergingStateAccessorImpl<K, W>(activeWindows,
             windowingStrategy.getWindowFn().windowCoder(),
             stateInternals, style, activeToBeMerged, mergeResult));
   }
 
   public ReduceFn<K, InputT, OutputT, W>.OnMergeContext forPremerge(W window) {
-    return new OnPremergeContextImpl(new PremergingStateContextImpl<K, W>(
+    return new OnPremergeContextImpl(new PremergingStateAccessorImpl<K, W>(
         activeWindows, windowingStrategy.getWindowFn().windowCoder(), stateInternals, window));
   }
 
@@ -144,10 +144,9 @@ public Instant currentEventTime() {
   }
 
   // ======================================================================
-  // StateContexts
+  // StateAccessors
   // ======================================================================
-  static class StateContextImpl<K, W extends BoundedWindow>
-      implements StateContext<K> {
+  static class StateAccessorImpl<K, W extends BoundedWindow> implements StateAccessor<K> {
 
 
     protected final ActiveWindowSet<W> activeWindows;
@@ -157,7 +156,7 @@ static class StateContextImpl<K, W extends BoundedWindow>
     protected final StateInternals<K> stateInternals;
     protected final StateStyle style;
 
-    public StateContextImpl(ActiveWindowSet<W> activeWindows, Coder<W> windowCoder,
+    public StateAccessorImpl(ActiveWindowSet<W> activeWindows, Coder<W> windowCoder,
         StateInternals<K> stateInternals, W window, StateStyle style) {
 
       this.activeWindows = activeWindows;
@@ -197,11 +196,11 @@ public <StateT extends State> StateT access(StateTag<? super K, StateT> address)
     }
   }
 
-  static class MergingStateContextImpl<K, W extends BoundedWindow>
-      extends StateContextImpl<K, W> implements MergingStateContext<K, W> {
+  static class MergingStateAccessorImpl<K, W extends BoundedWindow>
+      extends StateAccessorImpl<K, W> implements MergingStateAccessor<K, W> {
     private final Collection<W> activeToBeMerged;
 
-    public MergingStateContextImpl(ActiveWindowSet<W> activeWindows, Coder<W> windowCoder,
+    public MergingStateAccessorImpl(ActiveWindowSet<W> activeWindows, Coder<W> windowCoder,
         StateInternals<K> stateInternals, StateStyle style, Collection<W> activeToBeMerged,
         W mergeResult) {
       super(activeWindows, windowCoder, stateInternals, mergeResult, style);
@@ -242,9 +241,9 @@ public <StateT extends State> Map<W, StateT> accessInEachMergingWindow(
     }
   }
 
-  static class PremergingStateContextImpl<K, W extends BoundedWindow>
-      extends StateContextImpl<K, W> implements MergingStateContext<K, W> {
-    public PremergingStateContextImpl(ActiveWindowSet<W> activeWindows, Coder<W> windowCoder,
+  static class PremergingStateAccessorImpl<K, W extends BoundedWindow>
+      extends StateAccessorImpl<K, W> implements MergingStateAccessor<K, W> {
+    public PremergingStateAccessorImpl(ActiveWindowSet<W> activeWindows, Coder<W> windowCoder,
         StateInternals<K> stateInternals, W window) {
       super(activeWindows, windowCoder, stateInternals, window, StateStyle.RENAMED);
     }
@@ -270,10 +269,10 @@ public <StateT extends State> Map<W, StateT> accessInEachMergingWindow(
   // ======================================================================
 
   private class ContextImpl extends ReduceFn<K, InputT, OutputT, W>.Context {
-    private final StateContextImpl<K, W> state;
+    private final StateAccessorImpl<K, W> state;
     private final TimersImpl timers;
 
-    private ContextImpl(StateContextImpl<K, W> state) {
+    private ContextImpl(StateAccessorImpl<K, W> state) {
       reduceFn.super();
       this.state = state;
       this.timers = new TimersImpl(state.namespace());
@@ -295,7 +294,7 @@ public WindowingStrategy<?, W> windowingStrategy() {
     }
 
     @Override
-    public StateContext<K> state() {
+    public StateAccessor<K> state() {
       return state;
     }
 
@@ -309,10 +308,10 @@ private class ProcessValueContextImpl
       extends ReduceFn<K, InputT, OutputT, W>.ProcessValueContext {
     private final InputT value;
     private final Instant timestamp;
-    private final StateContextImpl<K, W> state;
+    private final StateAccessorImpl<K, W> state;
     private final TimersImpl timers;
 
-    private ProcessValueContextImpl(StateContextImpl<K, W> state,
+    private ProcessValueContextImpl(StateAccessorImpl<K, W> state,
         InputT value, Instant timestamp) {
       reduceFn.super();
       this.state = state;
@@ -337,7 +336,7 @@ public WindowingStrategy<?, W> windowingStrategy() {
     }
 
     @Override
-    public StateContext<K> state() {
+    public StateAccessor<K> state() {
       return state;
     }
 
@@ -358,12 +357,12 @@ public Timers timers() {
   }
 
   private class OnTriggerContextImpl extends ReduceFn<K, InputT, OutputT, W>.OnTriggerContext {
-    private final StateContextImpl<K, W> state;
+    private final StateAccessorImpl<K, W> state;
     private final StateContents<PaneInfo> pane;
     private final OnTriggerCallbacks<OutputT> callbacks;
     private final TimersImpl timers;
 
-    private OnTriggerContextImpl(StateContextImpl<K, W> state, StateContents<PaneInfo> pane,
+    private OnTriggerContextImpl(StateAccessorImpl<K, W> state, StateContents<PaneInfo> pane,
         OnTriggerCallbacks<OutputT> callbacks) {
       reduceFn.super();
       this.state = state;
@@ -388,7 +387,7 @@ public WindowingStrategy<?, W> windowingStrategy() {
     }
 
     @Override
-    public StateContext<K> state() {
+    public StateAccessor<K> state() {
       return state;
     }
 
@@ -409,10 +408,10 @@ public Timers timers() {
   }
 
   private class OnMergeContextImpl extends ReduceFn<K, InputT, OutputT, W>.OnMergeContext {
-    private final MergingStateContextImpl<K, W> state;
+    private final MergingStateAccessorImpl<K, W> state;
     private final TimersImpl timers;
 
-    private OnMergeContextImpl(MergingStateContextImpl<K, W> state) {
+    private OnMergeContextImpl(MergingStateAccessorImpl<K, W> state) {
       reduceFn.super();
       this.state = state;
       this.timers = new TimersImpl(state.namespace());
@@ -429,7 +428,7 @@ public WindowingStrategy<?, W> windowingStrategy() {
     }
 
     @Override
-    public MergingStateContext<K, W> state() {
+    public MergingStateAccessor<K, W> state() {
       return state;
     }
 
@@ -445,10 +444,10 @@ public Timers timers() {
   }
 
   private class OnPremergeContextImpl extends ReduceFn<K, InputT, OutputT, W>.OnMergeContext {
-    private final PremergingStateContextImpl<K, W> state;
+    private final PremergingStateAccessorImpl<K, W> state;
     private final TimersImpl timers;
 
-    private OnPremergeContextImpl(PremergingStateContextImpl<K, W> state) {
+    private OnPremergeContextImpl(PremergingStateAccessorImpl<K, W> state) {
       reduceFn.super();
       this.state = state;
       this.timers = new TimersImpl(state.namespace());
@@ -465,7 +464,7 @@ public WindowingStrategy<?, W> windowingStrategy() {
     }
 
     @Override
-    public MergingStateContext<K, W> state() {
+    public MergingStateAccessor<K, W> state() {
       return state;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
index 044847570f1b3..233e1da9a3b0b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
@@ -26,9 +26,9 @@
 import com.google.cloud.dataflow.sdk.util.state.BagState;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
 import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateContext;
+import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
+import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
-import com.google.cloud.dataflow.sdk.util.state.StateContext;
 import com.google.cloud.dataflow.sdk.util.state.StateMerging;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
@@ -55,7 +55,7 @@ public abstract class SystemReduceFn<K, InputT, AccumT, OutputT, W extends Bound
         StateTags.makeSystemTagInternal(StateTags.bag(BUFFER_NAME, inputCoder));
     return new SystemReduceFn<K, T, Iterable<T>, Iterable<T>, W>(bufferTag) {
       @Override
-      public void prefetchOnMerge(MergingStateContext<K, W> state) throws Exception {
+      public void prefetchOnMerge(MergingStateAccessor<K, W> state) throws Exception {
         StateMerging.prefetchBags(state, bufferTag);
       }
 
@@ -85,7 +85,7 @@ public void onMerge(OnMergeContext c) throws Exception {
                 (KeyedCombineFn<K, InputT, AccumT, OutputT>) combineFn.getFn()));
     return new SystemReduceFn<K, InputT, AccumT, OutputT, W>(bufferTag) {
       @Override
-      public void prefetchOnMerge(MergingStateContext<K, W> state) throws Exception {
+      public void prefetchOnMerge(MergingStateAccessor<K, W> state) throws Exception {
         StateMerging.prefetchCombiningValues(state, bufferTag);
       }
 
@@ -109,7 +109,7 @@ public void processValue(ProcessValueContext c) throws Exception {
   }
 
   @Override
-  public void prefetchOnTrigger(StateContext<K> state) {
+  public void prefetchOnTrigger(StateAccessor<K> state) {
     state.access(bufferTag).get();
   }
 
@@ -124,7 +124,7 @@ public void clearState(Context c) throws Exception {
   }
 
   @Override
-  public StateContents<Boolean> isEmpty(StateContext<K> state) {
+  public StateContents<Boolean> isEmpty(StateAccessor<K> state) {
     return state.access(bufferTag).isEmpty();
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
index 05cf137e8ce46..c27e731e8ea90 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
@@ -20,9 +20,9 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergingTriggerInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerInfo;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateContext;
+import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
 import com.google.cloud.dataflow.sdk.util.state.State;
-import com.google.cloud.dataflow.sdk.util.state.StateContext;
+import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
@@ -80,13 +80,13 @@ public Trigger<W>.OnMergeContext createOnMergeContext(W window, Timers timers,
     return new OnMergeContextImpl(window, timers, rootTrigger, finishedSet, finishedSets);
   }
 
-  public StateContext<?> createStateContext(W window, ExecutableTrigger<W> trigger) {
-    return new StateContextImpl(window, trigger);
+  public StateAccessor<?> createStateContext(W window, ExecutableTrigger<W> trigger) {
+    return new StateAccessorImpl(window, trigger);
   }
 
-  public MergingStateContext<?, W> createMergingStateContext(
+  public MergingStateAccessor<?, W> createMergingStateContext(
       W mergeResult, Collection<W> mergingWindows, ExecutableTrigger<W> trigger) {
-    return new MergingStateContextImpl(trigger, mergingWindows, mergeResult);
+    return new MergingStateAccessorImpl(trigger, mergingWindows, mergeResult);
   }
 
   private class TriggerInfoImpl implements Trigger.TriggerInfo<W> {
@@ -260,11 +260,11 @@ public boolean apply(FinishedTriggers finishedSet) {
     }
   }
 
-  private class StateContextImpl implements StateContext<Object> {
+  private class StateAccessorImpl implements StateAccessor<Object> {
     protected final int triggerIndex;
     protected final StateNamespace windowNamespace;
 
-    public StateContextImpl(
+    public StateAccessorImpl(
         W window,
         ExecutableTrigger<W> trigger) {
       this.triggerIndex = trigger.getTriggerIndex();
@@ -281,11 +281,11 @@ public <StateT extends State> StateT access(StateTag<? super Object, StateT> add
     }
   }
 
-  private class MergingStateContextImpl extends StateContextImpl
-  implements MergingStateContext<Object, W> {
+  private class MergingStateAccessorImpl extends StateAccessorImpl
+  implements MergingStateAccessor<Object, W> {
     private final Collection<W> activeToBeMerged;
 
-    public MergingStateContextImpl(ExecutableTrigger<W> trigger, Collection<W> activeToBeMerged,
+    public MergingStateAccessorImpl(ExecutableTrigger<W> trigger, Collection<W> activeToBeMerged,
         W mergeResult) {
       super(mergeResult, trigger);
       this.activeToBeMerged = activeToBeMerged;
@@ -311,7 +311,7 @@ public <StateT extends State> Map<W, StateT> accessInEachMergingWindow(
   private class TriggerContextImpl extends Trigger<W>.TriggerContext {
 
     private final W window;
-    private final StateContextImpl state;
+    private final StateAccessorImpl state;
     private final Timers timers;
     private final TriggerInfoImpl triggerInfo;
 
@@ -322,7 +322,7 @@ private TriggerContextImpl(
         FinishedTriggers finishedSet) {
       trigger.getSpec().super();
       this.window = window;
-      this.state = new StateContextImpl(window, trigger);
+      this.state = new StateAccessorImpl(window, trigger);
       this.timers = new TriggerTimers(window, timers);
       this.triggerInfo = new TriggerInfoImpl(trigger, finishedSet, this);
     }
@@ -338,7 +338,7 @@ public TriggerInfo<W> trigger() {
     }
 
     @Override
-    public StateContext state() {
+    public StateAccessor state() {
       return state;
     }
 
@@ -373,7 +373,7 @@ public Instant currentEventTime() {
   private class OnElementContextImpl extends Trigger<W>.OnElementContext {
 
     private final W window;
-    private final StateContextImpl state;
+    private final StateAccessorImpl state;
     private final Timers timers;
     private final TriggerInfoImpl triggerInfo;
     private final Instant eventTimestamp;
@@ -386,7 +386,7 @@ private OnElementContextImpl(
         Instant eventTimestamp) {
       trigger.getSpec().super();
       this.window = window;
-      this.state = new StateContextImpl(window, trigger);
+      this.state = new StateAccessorImpl(window, trigger);
       this.timers = new TriggerTimers(window, timers);
       this.triggerInfo = new TriggerInfoImpl(trigger, finishedSet, this);
       this.eventTimestamp = eventTimestamp;
@@ -410,7 +410,7 @@ public TriggerInfo<W> trigger() {
     }
 
     @Override
-    public StateContext state() {
+    public StateAccessor state() {
       return state;
     }
 
@@ -449,7 +449,7 @@ public Instant currentEventTime() {
   }
 
   private class OnMergeContextImpl extends Trigger<W>.OnMergeContext {
-    private final MergingStateContext<?, W> state;
+    private final MergingStateAccessor<?, W> state;
     private final W window;
     private final Collection<W> mergingWindows;
     private final Timers timers;
@@ -464,7 +464,7 @@ private OnMergeContextImpl(
       trigger.getSpec().super();
       this.mergingWindows = finishedSets.keySet();
       this.window = window;
-      this.state = new MergingStateContextImpl(trigger, mergingWindows, window);
+      this.state = new MergingStateAccessorImpl(trigger, mergingWindows, window);
       this.timers = new TriggerTimers(window, timers);
       this.triggerInfo = new MergingTriggerInfoImpl(trigger, finishedSet, this, finishedSets);
     }
@@ -476,7 +476,7 @@ public Trigger<W>.OnMergeContext forTrigger(ExecutableTrigger<W> trigger) {
     }
 
     @Override
-    public MergingStateContext<?, W> state() {
+    public MergingStateAccessor<?, W> state() {
       return state;
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
index 3629c5b7c8400..e78411eb830c1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
@@ -18,8 +18,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateContext;
-import com.google.cloud.dataflow.sdk.util.state.StateContext;
+import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
+import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.cloud.dataflow.sdk.util.state.ValueState;
@@ -83,25 +83,25 @@ private FinishedTriggersBitSet readFinishedBits(ValueState<BitSet> state) {
   }
 
   /** Return true if the trigger is closed in the window corresponding to the specified state. */
-  public boolean isClosed(StateContext<?> state) {
+  public boolean isClosed(StateAccessor<?> state) {
     return readFinishedBits(state.access(FINISHED_BITS_TAG)).isFinished(rootTrigger);
   }
 
-  public void prefetchForValue(W window, StateContext<?> state) {
+  public void prefetchForValue(W window, StateAccessor<?> state) {
     if (isFinishedSetNeeded()) {
       state.access(FINISHED_BITS_TAG).get();
     }
     rootTrigger.getSpec().prefetchOnElement(contextFactory.createStateContext(window, rootTrigger));
   }
 
-  public void prefetchOnFire(W window, StateContext<?> state) {
+  public void prefetchOnFire(W window, StateAccessor<?> state) {
     if (isFinishedSetNeeded()) {
       state.access(FINISHED_BITS_TAG).get();
     }
     rootTrigger.getSpec().prefetchOnFire(contextFactory.createStateContext(window, rootTrigger));
   }
 
-  public void prefetchShouldFire(W window, StateContext<?> state) {
+  public void prefetchShouldFire(W window, StateAccessor<?> state) {
     if (isFinishedSetNeeded()) {
       state.access(FINISHED_BITS_TAG).get();
     }
@@ -112,7 +112,7 @@ public void prefetchShouldFire(W window, StateContext<?> state) {
   /**
    * Run the trigger logic to deal with a new value.
    */
-  public void processValue(W window, Instant timestamp, Timers timers, StateContext<?> state)
+  public void processValue(W window, Instant timestamp, Timers timers, StateAccessor<?> state)
       throws Exception {
     // Clone so that we can detect changes and so that changes here don't pollute merging.
     FinishedTriggersBitSet finishedSet =
@@ -124,7 +124,7 @@ public void processValue(W window, Instant timestamp, Timers timers, StateContex
   }
 
   public void prefetchForMerge(
-      W window, Collection<W> mergingWindows, MergingStateContext<?, W> state) {
+      W window, Collection<W> mergingWindows, MergingStateAccessor<?, W> state) {
     if (isFinishedSetNeeded()) {
       for (ValueState<?> value : state.accessInEachMergingWindow(FINISHED_BITS_TAG).values()) {
         value.get();
@@ -137,7 +137,7 @@ public void prefetchForMerge(
   /**
    * Run the trigger merging logic as part of executing the specified merge.
    */
-  public void onMerge(W window, Timers timers, MergingStateContext<?, W> state) throws Exception {
+  public void onMerge(W window, Timers timers, MergingStateAccessor<?, W> state) throws Exception {
     // Clone so that we can detect changes and so that changes here don't pollute merging.
     FinishedTriggersBitSet finishedSet =
         readFinishedBits(state.access(FINISHED_BITS_TAG)).copy();
@@ -163,14 +163,14 @@ public void onMerge(W window, Timers timers, MergingStateContext<?, W> state) th
     clearFinished(state);
   }
 
-  public boolean shouldFire(W window, Timers timers, StateContext<?> state) throws Exception {
+  public boolean shouldFire(W window, Timers timers, StateAccessor<?> state) throws Exception {
     FinishedTriggers finishedSet = readFinishedBits(state.access(FINISHED_BITS_TAG)).copy();
     Trigger<W>.TriggerContext context = contextFactory.base(window, timers,
         rootTrigger, finishedSet);
     return rootTrigger.invokeShouldFire(context);
   }
 
-  public void onFire(W window, Timers timers, StateContext<?> state) throws Exception {
+  public void onFire(W window, Timers timers, StateAccessor<?> state) throws Exception {
     FinishedTriggersBitSet finishedSet =
         readFinishedBits(state.access(FINISHED_BITS_TAG)).copy();
     Trigger<W>.TriggerContext context = contextFactory.base(window, timers,
@@ -180,7 +180,7 @@ public void onFire(W window, Timers timers, StateContext<?> state) throws Except
   }
 
   private void persistFinishedSet(
-      StateContext<?> state, FinishedTriggersBitSet modifiedFinishedSet) {
+      StateAccessor<?> state, FinishedTriggersBitSet modifiedFinishedSet) {
     if (!isFinishedSetNeeded()) {
       return;
     }
@@ -198,7 +198,7 @@ private void persistFinishedSet(
   /**
    * Clear finished bits.
    */
-  public void clearFinished(StateContext<?> state) {
+  public void clearFinished(StateAccessor<?> state) {
     if (isFinishedSetNeeded()) {
       state.access(FINISHED_BITS_TAG).clear();
     }
@@ -208,7 +208,7 @@ public void clearFinished(StateContext<?> state) {
    * Clear the state used for executing triggers, but leave the finished set to indicate
    * the window is closed.
    */
-  public void clearState(W window, Timers timers, StateContext<?> state) throws Exception {
+  public void clearState(W window, Timers timers, StateAccessor<?> state) throws Exception {
     // Don't need to clone, because we'll be clearing the finished bits anyways.
     FinishedTriggers finishedSet = readFinishedBits(state.access(FINISHED_BITS_TAG));
     rootTrigger.invokeClear(contextFactory.base(window, timers, rootTrigger, finishedSet));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
index 4d37cdb0dbfea..d0614db092295 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
@@ -19,7 +19,7 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateContext;
+import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
 import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateMerging;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
@@ -345,7 +345,7 @@ private Instant addGarbageCollectionHold(ReduceFn<?, ?, ?, W>.Context context) {
   /**
    * Prefetch watermark holds in preparation for merging.
    */
-  public void prefetchOnMerge(MergingStateContext<?, W> state) {
+  public void prefetchOnMerge(MergingStateAccessor<?, W> state) {
     StateMerging.prefetchWatermarks(state, elementHoldTag);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateAccessor.java
similarity index 87%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateContext.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateAccessor.java
index 03efb5e703e29..40211d739bea5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateAccessor.java
@@ -23,12 +23,13 @@
 import java.util.Map;
 
 /**
- * Interface for interacting with persistent state at the moment windows are merging.
+ * Interface for accessing persistent state while windows are merging.
  *
  * <p>For internal use only.
  */
 @Experimental(Kind.STATE)
-public interface MergingStateContext<K, W extends BoundedWindow> extends StateContext<K> {
+public interface MergingStateAccessor<K, W extends BoundedWindow>
+    extends StateAccessor<K> {
   /**
    * Analogous to {@link #access}, but returned as a map from each window which is
    * about to be merged to the corresponding state. Only includes windows which
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateAccessor.java
similarity index 89%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContext.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateAccessor.java
index 8a73308388e30..6cfbecff70976 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateAccessor.java
@@ -20,13 +20,12 @@
 import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
 
 /**
- * Interface for interacting with per-key persistent state identified via a
- * {@link StateTag}.
+ * Interface for accessing a {@link StateTag} in the current context.
  *
  * <p>For internal use only.
  */
 @Experimental(Kind.STATE)
-public interface StateContext<K> {
+public interface StateAccessor<K> {
   /**
    * Access the storage for the given {@code address} in the current window.
    *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java
index 9cea21354738a..3c53a20413cb5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java
@@ -34,7 +34,7 @@ public class StateMerging {
    * in {@code context}.
    */
   public static <K, StateT extends State, W extends BoundedWindow> void clear(
-      MergingStateContext<K, W> context, StateTag<? super K, StateT> address) {
+      MergingStateAccessor<K, W> context, StateTag<? super K, StateT> address) {
     for (StateT state : context.accessInEachMergingWindow(address).values()) {
       state.clear();
     }
@@ -46,7 +46,7 @@ public static <K, StateT extends State, W extends BoundedWindow> void clear(
    * blindly append to.
    */
   public static <K, T, W extends BoundedWindow> void prefetchBags(
-      MergingStateContext<K, W> context, StateTag<? super K, BagState<T>> address) {
+      MergingStateAccessor<K, W> context, StateTag<? super K, BagState<T>> address) {
     Map<W, BagState<T>> map = context.accessInEachMergingWindow(address);
     if (map.isEmpty()) {
       // Nothing to prefetch.
@@ -65,7 +65,7 @@ public static <K, T, W extends BoundedWindow> void prefetchBags(
    * Merge all bag state in {@code address} across all windows under merge.
    */
   public static <K, T, W extends BoundedWindow> void mergeBags(
-      MergingStateContext<K, W> context, StateTag<? super K, BagState<T>> address) {
+      MergingStateAccessor<K, W> context, StateTag<? super K, BagState<T>> address) {
     mergeBags(context.accessInEachMergingWindow(address).values(), context.access(address));
   }
 
@@ -108,7 +108,7 @@ public static <T, W extends BoundedWindow> void mergeBags(
    * context}.
    */
   public static <K, StateT extends CombiningValueState<?, ?>, W extends BoundedWindow> void
-      prefetchCombiningValues(MergingStateContext<K, W> context,
+      prefetchCombiningValues(MergingStateAccessor<K, W> context,
           StateTag<? super K, StateT> address) {
     for (StateT state : context.accessInEachMergingWindow(address).values()) {
       state.get();
@@ -119,7 +119,7 @@ public static <T, W extends BoundedWindow> void mergeBags(
    * Merge all value state in {@code address} across all merging windows in {@code context}.
    */
   public static <K, InputT, AccumT, OutputT, W extends BoundedWindow> void mergeCombiningValues(
-      MergingStateContext<K, W> context,
+      MergingStateAccessor<K, W> context,
       StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>> address) {
     mergeCombiningValues(
         context.accessInEachMergingWindow(address).values(), context.access(address));
@@ -165,7 +165,7 @@ public static <InputT, AccumT, OutputT, W extends BoundedWindow> void mergeCombi
    * {@code context}.
    */
   public static <K, W extends BoundedWindow> void prefetchWatermarks(
-      MergingStateContext<K, W> context,
+      MergingStateAccessor<K, W> context,
       StateTag<? super K, WatermarkStateInternal<W>> address) {
     Map<W, WatermarkStateInternal<W>> map = context.accessInEachMergingWindow(address);
     WatermarkStateInternal<W> result = context.access(address);
@@ -193,7 +193,7 @@ public static <K, W extends BoundedWindow> void prefetchWatermarks(
    * where the final merge result window is {@code mergeResult}.
    */
   public static <K, W extends BoundedWindow> void mergeWatermarks(
-      MergingStateContext<K, W> context,
+      MergingStateAccessor<K, W> context,
       StateTag<? super K, WatermarkStateInternal<W>> address,
       W mergeResult) {
     mergeWatermarks(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
index 510cd79d61827..df5ab0310c417 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
@@ -80,7 +80,7 @@ public static <T> StateTag<Object, ValueState<T>> value(String id, Coder<T> valu
   /**
    * Create a state tag for values that use a {@link KeyedCombineFn} to automatically merge
    * multiple {@code InputT}s into a single {@code OutputT}. The key provided to the
-   * {@link KeyedCombineFn} comes from the keyed {@link StateContext}.
+   * {@link KeyedCombineFn} comes from the keyed {@link StateAccessor}.
    */
   public static <K, InputT, AccumT,
       OutputT> StateTag<K, CombiningValueStateInternal<InputT, AccumT, OutputT>>

From c6382a189a9e0970f1d679478a307c0ea906f415 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Thu, 18 Feb 2016 17:35:11 -0800
Subject: [PATCH 1479/1541] ByteKeyRange: parameterize expensive combinatorial
 tests

Breaks two different R*R*T combinatorial into R*R O(T) tests
using JUnit Parameterized test suite. Here R is the number
of keys in a range and T is another test parameter.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115021282
---
 .../ByteKeyRangeEstimateFractionTest.java     | 69 ++++++++++++++++++
 .../range/ByteKeyRangeInterpolateKeyTest.java | 73 +++++++++++++++++++
 .../sdk/io/range/ByteKeyRangeTest.java        | 69 +-----------------
 3 files changed, 143 insertions(+), 68 deletions(-)
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeEstimateFractionTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeInterpolateKeyTest.java

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeEstimateFractionTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeEstimateFractionTest.java
new file mode 100644
index 0000000000000..71928f400eaaf
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeEstimateFractionTest.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io.range;
+
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
+import static org.junit.Assert.assertThat;
+
+import com.google.common.collect.ImmutableList;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
+import org.junit.runners.Parameterized.Parameters;
+
+/**
+ * A combinatorial test of {@link ByteKeyRange#estimateFractionForKey(ByteKey)}.
+ */
+@RunWith(Parameterized.class)
+public class ByteKeyRangeEstimateFractionTest {
+  private static final ByteKey[] TEST_KEYS = ByteKeyRangeTest.RANGE_TEST_KEYS;
+
+  @Parameters(name = "{index}: i={0}, k={1}")
+  public static Iterable<Object[]> data() {
+    ImmutableList.Builder<Object[]> ret = ImmutableList.builder();
+    for (int i = 0; i < TEST_KEYS.length; ++i) {
+      for (int k = i + 1; k < TEST_KEYS.length; ++k) {
+        ret.add(new Object[] {i, k});
+      }
+    }
+    return ret.build();
+  }
+
+  @Parameter(0)
+  public int i;
+
+  @Parameter(1)
+  public int k;
+
+  @Test
+  public void testEstimateFractionForKey() {
+    double last = 0.0;
+    ByteKeyRange range = ByteKeyRange.of(TEST_KEYS[i], TEST_KEYS[k]);
+    for (int j = i; j < k; ++j) {
+      ByteKey key = TEST_KEYS[j];
+      if (key.isEmpty()) {
+        // Cannot compute progress for unspecified key
+        continue;
+      }
+      double fraction = range.estimateFractionForKey(key);
+      assertThat(fraction, greaterThanOrEqualTo(last));
+      last = fraction;
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeInterpolateKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeInterpolateKeyTest.java
new file mode 100644
index 0000000000000..c41f9054553ec
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeInterpolateKeyTest.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.io.range;
+
+import static org.hamcrest.Matchers.closeTo;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
+import static org.junit.Assert.assertThat;
+
+import com.google.common.collect.ImmutableList;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
+import org.junit.runners.Parameterized.Parameters;
+
+/**
+ * Combinatorial tests for {@link ByteKeyRange#interpolateKey}, which also checks
+ * {@link ByteKeyRange#estimateFractionForKey} by converting the interpolated keys back to
+ * fractions.
+ */
+@RunWith(Parameterized.class)
+public class ByteKeyRangeInterpolateKeyTest {
+  private static final ByteKey[] TEST_KEYS = ByteKeyRangeTest.RANGE_TEST_KEYS;
+
+  @Parameters(name = "{index}: {0}")
+  public static Iterable<Object[]> data() {
+    ImmutableList.Builder<Object[]> ret = ImmutableList.builder();
+    for (int i = 0; i < TEST_KEYS.length; ++i) {
+      for (int j = i + 1; j < TEST_KEYS.length; ++j) {
+        ret.add(new Object[] {ByteKeyRange.of(TEST_KEYS[i], TEST_KEYS[j])});
+      }
+    }
+    return ret.build();
+  }
+
+  @Parameter public ByteKeyRange range;
+
+  @Test
+  public void testInterpolateKeyAndEstimateFraction() {
+    double delta = 0.0000001;
+    double[] testFractions =
+        new double[] {0.01, 0.1, 0.123, 0.2, 0.3, 0.45738, 0.5, 0.6, 0.7182, 0.8, 0.95, 0.97, 0.99};
+    ByteKey last = range.getStartKey();
+    for (double fraction : testFractions) {
+      String message = Double.toString(fraction);
+      try {
+        ByteKey key = range.interpolateKey(fraction);
+        assertThat(message, key, greaterThanOrEqualTo(last));
+        assertThat(message, range.estimateFractionForKey(key), closeTo(fraction, delta));
+        last = key;
+      } catch (IllegalStateException e) {
+        assertThat(message, e.getMessage(), containsString("near-empty ByteKeyRange"));
+        continue;
+      }
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeTest.java
index a90bb5acaa18b..f5c5d67d7b9dc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeTest.java
@@ -16,8 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.io.range;
 
-import static org.hamcrest.Matchers.closeTo;
-import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.hamcrest.Matchers.lessThan;
 import static org.junit.Assert.assertEquals;
@@ -69,7 +67,7 @@ public class ByteKeyRangeTest {
         AFTER_10,
       };
 
-  private static final ByteKey[] RANGE_TEST_KEYS =
+  static final ByteKey[] RANGE_TEST_KEYS =
       ImmutableList.<ByteKey>builder()
           .addAll(Arrays.asList(ByteKeyTest.TEST_KEYS))
           .add(ByteKey.EMPTY)
@@ -247,38 +245,6 @@ public void testEstimateFractionForKey() {
     }
   }
 
-  /** Exhaustive tests for {@link ByteKeyRange#estimateFractionForKey}. */
-  @Test
-  public void testEstimateFractionForKeyCombinatorial() {
-    double last;
-    for (int i = 0; i < RANGE_TEST_KEYS.length; ++i) {
-      for (int k = i + 1; k < RANGE_TEST_KEYS.length; ++k) {
-        ByteKeyRange range = ByteKeyRange.of(RANGE_TEST_KEYS[i], RANGE_TEST_KEYS[k]);
-        last = 0.0;
-        for (int j = i; j < k; ++j) {
-          ByteKey key = RANGE_TEST_KEYS[j];
-          if (key.isEmpty()) {
-            // Cannot compute progress for unspecified key
-            continue;
-          }
-          double fraction = range.estimateFractionForKey(key);
-          try {
-            assertThat(fraction, greaterThanOrEqualTo(last));
-          } catch (AssertionError e) {
-            throw new AssertionError(
-                String.format(
-                    "Range %s estimated fraction for key %s should be >= fraction for key %s",
-                    range,
-                    key,
-                    RANGE_TEST_KEYS[i]),
-                e);
-          }
-          last = fraction;
-        }
-      }
-    }
-  }
-
   /** Manual tests for {@link ByteKeyRange#interpolateKey}. */
   @Test
   public void testInterpolateKey() {
@@ -324,39 +290,6 @@ public void testInterpolateKeyIsNotEmpty() {
     }
   }
 
-  /**
-   * Combinatorial tests for {@link ByteKeyRange#interpolateKey}, which also checks
-   * {@link ByteKeyRange#estimateFractionForKey} by converting the interpolated keys back to
-   * fractions.
-   */
-  @Test
-  public void testInterpolateKeyAndEstimateFractionCombinatorial() {
-    double delta = 0.0000001;
-    double[] testFractions =
-        new double[] {0.01, 0.1, 0.123, 0.2, 0.3, 0.45738, 0.5, 0.6, 0.7182, 0.8, 0.95, 0.97, 0.99};
-    for (int i = 0; i < RANGE_TEST_KEYS.length; ++i) {
-      for (int j = i + 1; j < RANGE_TEST_KEYS.length; ++j) {
-        ByteKeyRange range = ByteKeyRange.of(RANGE_TEST_KEYS[i], RANGE_TEST_KEYS[j]);
-        ByteKey last = RANGE_TEST_KEYS[i];
-        for (double fraction : testFractions) {
-          try {
-            ByteKey key = range.interpolateKey(fraction);
-            String message = String.format("%s, %s", range, fraction);
-            assertThat(message, key, greaterThanOrEqualTo(last));
-            assertThat(message, range.estimateFractionForKey(key), closeTo(fraction, delta));
-            last = key;
-          } catch (IllegalStateException e) {
-            assertThat(
-                String.format("range: %s fraction: %f", range, fraction),
-                e.getMessage(),
-                containsString("near-empty ByteKeyRange"));
-            continue;
-          }
-        }
-      }
-    }
-  }
-
   /** Test {@link ByteKeyRange} getters. */
   @Test
   public void testKeyGetters() {

From 4533b90722dfe3168f136564d7ccbf17381022c1 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Thu, 18 Feb 2016 17:50:55 -0800
Subject: [PATCH 1480/1541] Migrate TextIO.Read to be a custom source

This allows for other runners to utilize the TextIO.Read transform since
it no longer relies on the internal details of the Google Cloud Dataflow
runner.

Note that validation has moved to when the PTransform is being applied.

This is for Apache Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115022452
---
 examples/pom.xml                              |   7 +
 .../examples/MinimalWordCountJava8Test.java   |  45 ++-
 .../google/cloud/dataflow/sdk/io/TextIO.java  | 286 ++++++++++++++++--
 .../runners/DataflowPipelineTranslator.java   |   2 -
 .../sdk/runners/dataflow/ReadTranslator.java  |  10 +
 .../runners/dataflow/TextIOTranslator.java    |  34 ---
 .../cloud/dataflow/sdk/io/TextIOTest.java     | 240 +++++++++++----
 .../runners/DataflowPipelineRunnerTest.java   |  40 ++-
 .../DataflowPipelineTranslatorTest.java       |  17 +-
 .../sdk/runners/TransformTreeTest.java        |  43 ++-
 10 files changed, 561 insertions(+), 163 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index 8e708ced693e4..adea4b129836e 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -498,5 +498,12 @@
       <artifactId>junit</artifactId>
       <version>${junit.version}</version>
     </dependency>
+
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <version>1.9.5</version>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 </project>
diff --git a/examples/src/test/java8/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java b/examples/src/test/java8/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java
index 5286d61a078f8..fcae41c6bb523 100644
--- a/examples/src/test/java8/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java
+++ b/examples/src/test/java8/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java
@@ -18,20 +18,33 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.GcsOptions;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Count;
 import com.google.cloud.dataflow.sdk.transforms.Filter;
 import com.google.cloud.dataflow.sdk.transforms.FlatMapElements;
 import com.google.cloud.dataflow.sdk.transforms.MapElements;
+import com.google.cloud.dataflow.sdk.util.GcsUtil;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.collect.ImmutableList;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
+import org.mockito.Mockito;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
 
+import java.io.IOException;
 import java.io.Serializable;
+import java.nio.channels.FileChannel;
+import java.nio.channels.SeekableByteChannel;
+import java.nio.file.Files;
+import java.nio.file.StandardOpenOption;
 import java.util.Arrays;
+import java.util.List;
 
 /**
  * To keep {@link MinimalWordCountJava8} simple, it is not factored or testable. This test
@@ -44,8 +57,9 @@ public class MinimalWordCountJava8Test implements Serializable {
    * A basic smoke test that ensures there is no crash at pipeline construction time.
    */
   @Test
-  public void testMinimalWordCountJava8() {
+  public void testMinimalWordCountJava8() throws Exception {
     Pipeline p = TestPipeline.create();
+    p.getOptions().as(GcsOptions.class).setGcsUtil(buildMockGcsUtil());
 
     p.apply(TextIO.Read.from("gs://dataflow-samples/shakespeare/*"))
      .apply(FlatMapElements.via((String word) -> Arrays.asList(word.split("[^a-zA-Z']+")))
@@ -57,4 +71,33 @@ public void testMinimalWordCountJava8() {
          .withOutputType(new TypeDescriptor<String>() {}))
      .apply(TextIO.Write.to("gs://YOUR_OUTPUT_BUCKET/AND_OUTPUT_PREFIX"));
   }
+
+  private GcsUtil buildMockGcsUtil() throws IOException {
+    GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class);
+
+    // Any request to open gets a new bogus channel
+    Mockito
+        .when(mockGcsUtil.open(Mockito.any(GcsPath.class)))
+        .then(new Answer<SeekableByteChannel>() {
+          @Override
+          public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
+            return FileChannel.open(
+                Files.createTempFile("channel-", ".tmp"),
+                StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
+          }
+        });
+
+    // Any request for expansion returns a list containing the original GcsPath
+    // This is required to pass validation that occurs in TextIO during apply()
+    Mockito
+        .when(mockGcsUtil.expand(Mockito.any(GcsPath.class)))
+        .then(new Answer<List<GcsPath>>() {
+          @Override
+          public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
+            return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
+          }
+        });
+
+    return mockGcsUtil;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index bf72f6bf197f8..71e755eebe651 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -16,32 +16,41 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
+import static com.google.common.base.Preconditions.checkState;
+
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.io.Read.Bounded;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils;
 import com.google.cloud.dataflow.sdk.runners.worker.TextReader;
 import com.google.cloud.dataflow.sdk.runners.worker.TextSink;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.io.ByteStreams;
 import com.google.common.primitives.Ints;
+import com.google.protobuf.ByteString;
 
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
 
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.PushbackInputStream;
+import java.nio.ByteBuffer;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.SeekableByteChannel;
 import java.util.List;
+import java.util.NoSuchElementException;
 import java.util.regex.Pattern;
 import java.util.zip.GZIPInputStream;
 
@@ -203,7 +212,7 @@ public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
       @Nullable private final String filepattern;
 
       /** The Coder to use to decode each line. */
-      @Nullable private final Coder<T> coder;
+      private final Coder<T> coder;
 
       /** An option to indicate if input validation is desired. Default is true. */
       private final boolean validate;
@@ -292,14 +301,48 @@ public PCollection<T> apply(PInput input) {
         if (filepattern == null) {
           throw new IllegalStateException("need to set the filepattern of a TextIO.Read transform");
         }
-        // Force the output's Coder to be what the read is using, and
-        // unchangeable later, to ensure that we read the input in the
-        // format specified by the Read transform.
-        return PCollection.<T>createPrimitiveOutputInternal(
-                input.getPipeline(),
-                WindowingStrategy.globalDefault(),
-                IsBounded.BOUNDED)
-            .setCoder(coder);
+
+        if (validate) {
+          try {
+            checkState(
+                !IOChannelUtils.getFactory(filepattern).match(filepattern).isEmpty(),
+                "Unable to find any files matching %s",
+                filepattern);
+          } catch (IOException e) {
+            throw new IllegalStateException(
+                String.format("Failed to validate %s", filepattern), e);
+          }
+        }
+
+        // Create a source specific to the requested compression type.
+        final Bounded<T> read;
+        switch(compressionType) {
+          case UNCOMPRESSED:
+            read = com.google.cloud.dataflow.sdk.io.Read.from(
+                new TextSource<T>(filepattern, coder));
+            break;
+          case AUTO:
+            read = com.google.cloud.dataflow.sdk.io.Read.from(
+                CompressedSource.from(new TextSource<T>(filepattern, coder)));
+            break;
+          case BZIP2:
+            read = com.google.cloud.dataflow.sdk.io.Read.from(
+                CompressedSource.from(new TextSource<T>(filepattern, coder))
+                                .withDecompression(CompressedSource.CompressionMode.BZIP2));
+            break;
+          case GZIP:
+            read = com.google.cloud.dataflow.sdk.io.Read.from(
+                CompressedSource.from(new TextSource<T>(filepattern, coder))
+                                .withDecompression(CompressedSource.CompressionMode.GZIP));
+            break;
+          default:
+            throw new IllegalArgumentException("Unknown compression mode: " + compressionType);
+        }
+
+        PCollection<T> pcol = input.getPipeline().apply("Read", read);
+        // Honor the default output coder that would have been used by this PTransform.
+        pcol.setCoder(getDefaultOutputCoder());
+        return pcol;
       }
 
       @Override
@@ -318,17 +361,6 @@ public boolean needsValidation() {
       public TextIO.CompressionType getCompressionType() {
         return compressionType;
       }
-
-      static {
-        DirectPipelineRunner.registerDefaultTransformEvaluator(
-            Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
-              @Override
-              public void evaluate(
-                  Bound transform, DirectPipelineRunner.EvaluationContext context) {
-                evaluateReadHelper(transform, context);
-              }
-            });
-      }
     }
 
     /** Disallow construction of utility classes. */
@@ -781,16 +813,204 @@ private static void validateOutputComponent(String partialFilePattern) {
   /** Disable construction of utility class. */
   private TextIO() {}
 
-  private static <T> void evaluateReadHelper(
-      Read.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
-    TextReader<T> reader =
-        new TextReader<>(transform.filepattern, true, null, null, transform.coder,
-            transform.getCompressionType());
-    try {
-      List<T> elems = ReaderUtils.readAllFromReader(reader);
-      context.setPCollection(context.getOutput(transform), elems);
-    } catch (IOException e) {
-      throw new RuntimeException(e);
+  /**
+   * A {@link FileBasedSource} which can decode records delimited by new line characters.
+   *
+   * <p>This source splits the data into records using {@code UTF-8} {@code \n}, {@code \r}, or
+   * {@code \r\n} as the delimiter. This source is not strict and supports decoding the last record
+   * even if it is not delimited. Finally, no records are decoded if the stream is empty.
+   *
+   * <p>This source supports reading from any arbitrary byte position within the stream. If the
+   * starting position is not {@code 0}, then bytes are skipped until the first delimiter is found
+   * representing the beginning of the first record to be decoded.
+   */
+  @VisibleForTesting
+  static class TextSource<T> extends FileBasedSource<T> {
+    /** The Coder to use to decode each line. */
+    private final Coder<T> coder;
+
+    @VisibleForTesting
+    TextSource(String fileSpec, Coder<T> coder) {
+      super(fileSpec, 1L);
+      this.coder = coder;
+    }
+
+    private TextSource(String fileName, long start, long end, Coder<T> coder) {
+      super(fileName, 1L, start, end);
+      this.coder = coder;
+    }
+
+    @Override
+    protected FileBasedSource<T> createForSubrangeOfFile(String fileName, long start, long end) {
+      return new TextSource<>(fileName, start, end, coder);
+    }
+
+    @Override
+    protected FileBasedReader<T> createSingleFileReader(PipelineOptions options) {
+      return new TextBasedReader<>(this);
+    }
+
+    @Override
+    public boolean producesSortedKeys(PipelineOptions options) throws Exception {
+      return false;
+    }
+
+    @Override
+    public Coder<T> getDefaultOutputCoder() {
+      return coder;
+    }
+
+    /**
+     * A {@link com.google.cloud.dataflow.sdk.io.FileBasedSource.FileBasedReader FileBasedReader}
+     * which can decode records delimited by new line characters.
+     *
+     * See {@link TextSource} for further details.
+     */
+    @VisibleForTesting
+    static class TextBasedReader<T> extends FileBasedReader<T> {
+      private static final int READ_BUFFER_SIZE = 8192;
+      private final Coder<T> coder;
+      private final ByteBuffer readBuffer = ByteBuffer.allocate(READ_BUFFER_SIZE);
+      private ByteString buffer;
+      private int startOfSeparatorInBuffer;
+      private int endOfSeparatorInBuffer;
+      private long startOfNextRecord;
+      private boolean eof;
+      private boolean elementIsPresent;
+      private T currentValue;
+      private ReadableByteChannel inChannel;
+
+      private TextBasedReader(TextSource<T> source) {
+        super(source);
+        coder = source.coder;
+        buffer = ByteString.EMPTY;
+      }
+
+      @Override
+      protected long getCurrentOffset() throws NoSuchElementException {
+        if (!elementIsPresent) {
+          throw new NoSuchElementException();
+        }
+        return startOfNextRecord;
+      }
+
+      @Override
+      public T getCurrent() throws NoSuchElementException {
+        if (!elementIsPresent) {
+          throw new NoSuchElementException();
+        }
+        return currentValue;
+      }
+
+      @Override
+      protected void startReading(ReadableByteChannel channel) throws IOException {
+        this.inChannel = channel;
+        // If the first offset is greater than zero, we need to skip bytes until we see our
+        // first separator.
+        if (getCurrentSource().getStartOffset() > 0) {
+          checkState(channel instanceof SeekableByteChannel,
+              "%s only supports reading from a SeekableByteChannel when given a start offset"
+              + " greater than 0.", TextSource.class.getSimpleName());
+          long requiredPosition = getCurrentSource().getStartOffset() - 1;
+          ((SeekableByteChannel) channel).position(requiredPosition);
+          findSeparatorBounds();
+          buffer = buffer.substring(endOfSeparatorInBuffer);
+          startOfNextRecord = requiredPosition + endOfSeparatorInBuffer;
+          endOfSeparatorInBuffer = 0;
+          startOfSeparatorInBuffer = 0;
+        }
+      }
+
+      /**
+       * Locates the start position and end position of the next delimiter. Will
+       * consume the channel till either EOF or the delimiter bounds are found.
+       *
+       * <p>This fills the buffer and updates the positions as follows:
+       * <pre>{@code
+       * ------------------------------------------------------
+       * | element bytes | delimiter bytes | unconsumed bytes |
+       * ------------------------------------------------------
+       * 0            start of          end of              buffer
+       *              separator         separator           size
+       *              in buffer         in buffer
+       * }</pre>
+       */
+      private void findSeparatorBounds() throws IOException {
+        int bytePositionInBuffer = 0;
+        while (true) {
+          if (!tryToEnsureNumberOfBytesInBuffer(bytePositionInBuffer + 1)) {
+            startOfSeparatorInBuffer = endOfSeparatorInBuffer = bytePositionInBuffer;
+            break;
+          }
+
+          byte currentByte = buffer.byteAt(bytePositionInBuffer);
+
+          if (currentByte == '\n') {
+            startOfSeparatorInBuffer = bytePositionInBuffer;
+            endOfSeparatorInBuffer = startOfSeparatorInBuffer + 1;
+            break;
+          } else if (currentByte == '\r') {
+            startOfSeparatorInBuffer = bytePositionInBuffer;
+            endOfSeparatorInBuffer = startOfSeparatorInBuffer + 1;
+
+            if (tryToEnsureNumberOfBytesInBuffer(bytePositionInBuffer + 2)) {
+              currentByte = buffer.byteAt(bytePositionInBuffer + 1);
+              if (currentByte == '\n') {
+                endOfSeparatorInBuffer += 1;
+              }
+            }
+            break;
+          }
+
+          // Move to the next byte in buffer.
+          bytePositionInBuffer += 1;
+        }
+      }
+
+      @Override
+      protected boolean readNextRecord() throws IOException {
+        startOfNextRecord += endOfSeparatorInBuffer;
+        findSeparatorBounds();
+
+        // If we have reached EOF file and consumed all of the buffer then we know
+        // that there are no more records.
+        if (eof && buffer.size() == 0) {
+          elementIsPresent = false;
+          return false;
+        }
+
+        decodeCurrentElement();
+        return true;
+      }
+
+      /**
+       * Decodes the current element updating the buffer to only contain the unconsumed bytes.
+       *
+       * This invalidates the currently stored {@code startOfSeparatorInBuffer} and
+       * {@code endOfSeparatorInBuffer}.
+       */
+      private void decodeCurrentElement() throws IOException {
+        ByteString dataToDecode = buffer.substring(0, startOfSeparatorInBuffer);
+        currentValue = coder.decode(dataToDecode.newInput(), Context.OUTER);
+        elementIsPresent = true;
+        buffer = buffer.substring(endOfSeparatorInBuffer);
+      }
+
+      /**
+       * Returns false if we were unable to ensure the minimum capacity by consuming the channel.
+       */
+      private boolean tryToEnsureNumberOfBytesInBuffer(int minCapacity) throws IOException {
+        // While we aren't at EOF or haven't fulfilled the minimum buffer capacity,
+        // attempt to read more bytes.
+        while (buffer.size() <= minCapacity && !eof) {
+          eof = inChannel.read(readBuffer) == -1;
+          readBuffer.flip();
+          buffer = buffer.concat(ByteString.copyFrom(readBuffer));
+          readBuffer.clear();
+        }
+        // Return true if we were able to honor the minimum buffer capacity request
+        return buffer.size() >= minCapacity;
+      }
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 1d14254c249c8..f7217f7610bb7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -1042,8 +1042,6 @@ private <T> void translateHelper(
         DataflowPipelineRunner.StreamingPubsubIOWrite.class,
         new PubsubIOTranslator.WriteTranslator());
 
-    registerTransformTranslator(
-        TextIO.Read.Bound.class, new TextIOTranslator.ReadTranslator());
     registerTransformTranslator(
         TextIO.Write.Bound.class, new TextIOTranslator.WriteTranslator());
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
index 2513d431cb683..47a1926764c7e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
@@ -18,6 +18,7 @@
 
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceToDictionary;
 
+import com.google.cloud.dataflow.sdk.io.FileBasedSource;
 import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.Source;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
@@ -40,6 +41,15 @@ public static <T> void translateReadHelper(Source<T> source,
       PTransform<?, ? extends PValue> transform,
       DataflowPipelineTranslator.TranslationContext context) {
     try {
+      // TODO: Move this validation out of translation once IOChannelUtils is portable
+      // and can be reconstructed on the worker.
+      if (source instanceof FileBasedSource) {
+        String filePatternOrSpec = ((FileBasedSource<?>) source).getFileOrPatternSpec();
+        context.getPipelineOptions()
+               .getPathValidator()
+               .validateInputFilePatternSupported(filePatternOrSpec);
+      }
+
       context.addStep(transform, "ParallelRead");
       context.addInput(PropertyNames.FORMAT, PropertyNames.CUSTOM_SOURCE_FORMAT);
       context.addInput(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
index abdf07fa58697..d6c96c31bd80a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
@@ -29,40 +29,6 @@
  * TextIO transform support code for the Dataflow backend.
  */
 public class TextIOTranslator {
-
-  /**
-   * Implements TextIO Read translation for the Dataflow backend.
-   */
-  @SuppressWarnings({"rawtypes", "unchecked"})
-  public static class ReadTranslator implements TransformTranslator<TextIO.Read.Bound> {
-    @Override
-    public void translate(
-        TextIO.Read.Bound transform,
-        TranslationContext context) {
-      translateReadHelper(transform, context);
-    }
-
-    private <T> void translateReadHelper(
-        TextIO.Read.Bound<T> transform,
-        TranslationContext context) {
-      if (context.getPipelineOptions().isStreaming()) {
-        throw new IllegalArgumentException("TextIO not supported in streaming mode.");
-      }
-
-      PathValidator validator = context.getPipelineOptions().getPathValidator();
-      String filepattern = validator.validateInputFilePatternSupported(transform.getFilepattern());
-
-      context.addStep(transform, "ParallelRead");
-      // TODO: How do we want to specify format and
-      // format-specific properties?
-      context.addInput(PropertyNames.FORMAT, "text");
-      context.addInput(PropertyNames.FILEPATTERN, filepattern);
-      context.addValueOnlyOutput(PropertyNames.OUTPUT, context.getOutput(transform));
-      context.addInput(PropertyNames.VALIDATE_SOURCE, transform.needsValidation());
-      context.addInput(PropertyNames.COMPRESSION_TYPE, transform.getCompressionType().toString());
-    }
-  }
-
   /**
    * Implements TextIO Write translation for the Dataflow backend.
    */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
index abd8333bc42db..6ad81e4ea0c4a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -20,7 +20,7 @@
 import static com.google.cloud.dataflow.sdk.TestUtils.LINES_ARRAY;
 import static com.google.cloud.dataflow.sdk.TestUtils.NO_INTS_ARRAY;
 import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES_ARRAY;
-import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
+import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
@@ -30,9 +30,12 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.io.TextIO.CompressionType;
+import com.google.cloud.dataflow.sdk.io.TextIO.TextSource;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.SourceTestUtils;
 import com.google.cloud.dataflow.sdk.testing.TestDataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
@@ -43,6 +46,7 @@
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PDone;
+import com.google.common.collect.ImmutableList;
 
 import org.junit.Rule;
 import org.junit.Test;
@@ -51,6 +55,8 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.mockito.Mockito;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
 
 import java.io.BufferedReader;
 import java.io.File;
@@ -58,8 +64,11 @@
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.PrintStream;
-import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
 import java.nio.channels.SeekableByteChannel;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.StandardOpenOption;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -74,60 +83,31 @@ public class TextIOTest {
   @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
   @Rule public ExpectedException expectedException = ExpectedException.none();
 
-  private static class EmptySeekableByteChannel implements SeekableByteChannel {
-    @Override
-    public long position() {
-      return 0L;
-    }
-
-    @Override
-    public SeekableByteChannel position(long newPosition) {
-      return this;
-    }
-
-    @Override
-    public long size() {
-      return 0L;
-    }
-
-    @Override
-    public SeekableByteChannel truncate(long size) {
-      return this;
-    }
-
-    @Override
-    public int write(ByteBuffer src) {
-      return 0;
-    }
-
-    @Override
-    public int read(ByteBuffer dst) {
-      return 0;
-    }
-
-    @Override
-    public boolean isOpen() {
-      return true;
-    }
-
-    @Override
-    public void close() { }
-  }
-
   private GcsUtil buildMockGcsUtil() throws IOException {
     GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class);
 
     // Any request to open gets a new bogus channel
     Mockito
         .when(mockGcsUtil.open(Mockito.any(GcsPath.class)))
-        .thenReturn(new EmptySeekableByteChannel());
-
-    // Any request for expansion gets a single bogus URL
-    // after we first run the expansion code (which will generally
-    // return no results, which causes a crash we aren't testing)
+        .then(new Answer<SeekableByteChannel>() {
+          @Override
+          public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
+            return FileChannel.open(
+                Files.createTempFile("channel-", ".tmp"),
+                StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
+          }
+        });
+
+    // Any request for expansion returns a list containing the original GcsPath
+    // This is required to pass validation that occurs in TextIO during apply()
     Mockito
         .when(mockGcsUtil.expand(Mockito.any(GcsPath.class)))
-        .thenReturn(Arrays.asList(GcsPath.fromUri("gs://bucket/foo")));
+        .then(new Answer<List<GcsPath>>() {
+          @Override
+          public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
+            return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
+          }
+        });
 
     return mockGcsUtil;
   }
@@ -189,25 +169,31 @@ public void testReadEmptyInts() throws Exception {
   }
 
   @Test
-  public void testReadNamed() {
+  public void testReadNulls() throws Exception {
+    runTestRead(new Void[]{ null, null, null }, VoidCoder.of());
+  }
+
+  @Test
+  public void testReadNamed() throws Exception {
+    String file = tmpFolder.newFile().getAbsolutePath();
     Pipeline p = TestPipeline.create();
 
     {
       PCollection<String> output1 =
-          p.apply(TextIO.Read.from("/tmp/file.txt"));
-      assertEquals("TextIO.Read.out", output1.getName());
+          p.apply(TextIO.Read.from(file));
+      assertEquals("TextIO.Read/Read.out", output1.getName());
     }
 
     {
       PCollection<String> output2 =
-          p.apply(TextIO.Read.named("MyRead").from("/tmp/file.txt"));
-      assertEquals("MyRead.out", output2.getName());
+          p.apply(TextIO.Read.named("MyRead").from(file));
+      assertEquals("MyRead/Read.out", output2.getName());
     }
 
     {
       PCollection<String> output3 =
-          p.apply(TextIO.Read.from("/tmp/file.txt").named("HerRead"));
-      assertEquals("HerRead.out", output3.getName());
+          p.apply(TextIO.Read.from(file).named("HerRead"));
+      assertEquals("HerRead/Read.out", output3.getName());
     }
   }
 
@@ -378,12 +364,11 @@ private void applyRead(Pipeline pipeline, String path) {
   public void testBadWildcardRecursive() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo**/baz"));
-
-    // Check that running does fail.
+    // Check that applying does fail.
     expectedException.expect(IllegalArgumentException.class);
     expectedException.expectMessage("wildcard");
-    pipeline.run();
+
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo**/baz"));
   }
 
   @Test
@@ -411,7 +396,7 @@ public void testCompressionTypeIsSet() throws Exception {
   @Test
   public void testCompressedRead() throws Exception {
     String[] lines = {"Irritable eagle", "Optimistic jay", "Fanciful hawk"};
-    File tmpFile = tmpFolder.newFile("test");
+    File tmpFile = tmpFolder.newFile();
     String filename = tmpFile.getPath();
 
     List<String> expected = new ArrayList<>();
@@ -431,14 +416,12 @@ public void testCompressedRead() throws Exception {
 
     DataflowAssert.that(output).containsInAnyOrder(expected);
     p.run();
-
-    tmpFile.delete();
   }
 
   @Test
   public void testGZIPReadWhenUncompressed() throws Exception {
     String[] lines = {"Meritorious condor", "Obnoxious duck"};
-    File tmpFile = tmpFolder.newFile("test");
+    File tmpFile = tmpFolder.newFile();
     String filename = tmpFile.getPath();
 
     List<String> expected = new ArrayList<>();
@@ -456,8 +439,6 @@ public void testGZIPReadWhenUncompressed() throws Exception {
 
     DataflowAssert.that(output).containsInAnyOrder(expected);
     p.run();
-
-    tmpFile.delete();
   }
 
   @Test
@@ -471,4 +452,133 @@ public void testTextIOGetName() {
     assertEquals(
         "ReadMyFile [TextIO.Read]", TextIO.Read.named("ReadMyFile").from("somefile").toString());
   }
+
+  @Test
+  public void testReadEmptyLines() throws Exception {
+    runTestReadWithData("\n\n\n".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("", "", ""));
+  }
+
+  @Test
+  public void testReadFileWithLineFeedDelimiter() throws Exception {
+    runTestReadWithData("asdf\nhjkl\nxyz\n".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  @Test
+  public void testReadFileWithCarriageReturnDelimiter() throws Exception {
+    runTestReadWithData("asdf\rhjkl\rxyz\r".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  @Test
+  public void testReadFileWithCarriageReturnAndLineFeedDelimiter() throws Exception {
+    runTestReadWithData("asdf\r\nhjkl\r\nxyz\r\n".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  @Test
+  public void testReadFileWithMixedDelimiters() throws Exception {
+    runTestReadWithData("asdf\rhjkl\r\nxyz\n".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  @Test
+  public void testReadFileWithLineFeedDelimiterAndNonEmptyBytesAtEnd() throws Exception {
+    runTestReadWithData("asdf\nhjkl\nxyz".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  @Test
+  public void testReadFileWithCarriageReturnDelimiterAndNonEmptyBytesAtEnd() throws Exception {
+    runTestReadWithData("asdf\rhjkl\rxyz".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  @Test
+  public void testReadFileWithCarriageReturnAndLineFeedDelimiterAndNonEmptyBytesAtEnd()
+      throws Exception {
+    runTestReadWithData("asdf\r\nhjkl\r\nxyz".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  @Test
+  public void testReadFileWithMixedDelimitersAndNonEmptyBytesAtEnd() throws Exception {
+    runTestReadWithData("asdf\rhjkl\r\nxyz".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  private void runTestReadWithData(byte[] data, List<String> expectedResults) throws Exception {
+    TextSource<String> source = prepareSource(data);
+    List<String> actual = SourceTestUtils.readFromSource(source, PipelineOptionsFactory.create());
+    assertThat(actual, containsInAnyOrder(new ArrayList<>(expectedResults).toArray(new String[0])));
+  }
+
+  @Test
+  public void testSplittingSourceWithEmptyLines() throws Exception {
+    TextSource<String> source = prepareSource("\n\n\n".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithLineFeedDelimiter() throws Exception {
+    TextSource<String> source = prepareSource("asdf\nhjkl\nxyz\n".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithCarriageReturnDelimiter() throws Exception {
+    TextSource<String> source = prepareSource("asdf\rhjkl\rxyz\r".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithCarriageReturnAndLineFeedDelimiter() throws Exception {
+    TextSource<String> source = prepareSource(
+        "asdf\r\nhjkl\r\nxyz\r\n".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithMixedDelimiters() throws Exception {
+    TextSource<String> source = prepareSource(
+        "asdf\rhjkl\r\nxyz\n".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithLineFeedDelimiterAndNonEmptyBytesAtEnd() throws Exception {
+    TextSource<String> source = prepareSource("asdf\nhjkl\nxyz".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithCarriageReturnDelimiterAndNonEmptyBytesAtEnd()
+      throws Exception {
+    TextSource<String> source = prepareSource("asdf\rhjkl\rxyz".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithCarriageReturnAndLineFeedDelimiterAndNonEmptyBytesAtEnd()
+      throws Exception {
+    TextSource<String> source = prepareSource(
+        "asdf\r\nhjkl\r\nxyz".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithMixedDelimitersAndNonEmptyBytesAtEnd() throws Exception {
+    TextSource<String> source = prepareSource("asdf\rhjkl\r\nxyz".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  private TextSource<String> prepareSource(byte[] data) throws IOException {
+    File file = tmpFolder.newFile();
+    Files.write(file.toPath(), data);
+
+    TextSource<String> source = new TextSource<>(file.toPath().toString(), StringUtf8Coder.of());
+
+    return source;
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 6d9f90f39ef61..c7175cb3b05f5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -19,6 +19,7 @@
 import static com.google.cloud.dataflow.sdk.util.WindowedValue.valueInGlobalWindow;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.startsWith;
 import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
 import static org.junit.Assert.assertEquals;
@@ -95,18 +96,22 @@
 import org.joda.time.Instant;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.internal.matchers.ThrowableMessageMatcher;
 import org.junit.rules.ExpectedException;
 import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.mockito.ArgumentCaptor;
 import org.mockito.Mockito;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
 
 import java.io.File;
 import java.io.IOException;
 import java.net.URL;
 import java.net.URLClassLoader;
 import java.nio.channels.FileChannel;
+import java.nio.channels.SeekableByteChannel;
 import java.nio.file.Files;
 import java.nio.file.StandardOpenOption;
 import java.util.ArrayList;
@@ -175,12 +180,23 @@ private static Dataflow buildMockDataflow(
 
   private GcsUtil buildMockGcsUtil(boolean bucketExists) throws IOException {
     GcsUtil mockGcsUtil = mock(GcsUtil.class);
-    when(mockGcsUtil.create(
-        any(GcsPath.class), anyString()))
-        .thenReturn(FileChannel.open(
-            Files.createTempFile("channel-", ".tmp"),
-            StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE));
+    when(mockGcsUtil.create(any(GcsPath.class), anyString()))
+        .then(new Answer<SeekableByteChannel>() {
+              @Override
+              public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
+                return FileChannel.open(
+                    Files.createTempFile("channel-", ".tmp"),
+                    StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
+              }
+            });
+
     when(mockGcsUtil.isGcsPatternSupported(anyString())).thenReturn(true);
+    when(mockGcsUtil.expand(any(GcsPath.class))).then(new Answer<List<GcsPath>>() {
+      @Override
+      public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
+        return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
+      }
+    });
     when(mockGcsUtil.bucketExists(any(GcsPath.class))).thenReturn(bucketExists);
     return mockGcsUtil;
   }
@@ -425,10 +441,12 @@ public void testNonGcsFilePathInReadFailure() throws IOException {
     ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
 
     Pipeline p = buildDataflowPipeline(buildPipelineOptions(jobCaptor));
-    p.apply(TextIO.Read.named("ReadMyNonGcsFile").from("/tmp/file"));
+    p.apply(TextIO.Read.named("ReadMyNonGcsFile").from(tmpFolder.newFile().getPath()));
 
-    thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage(containsString("expected a valid 'gs://' path but was given"));
+    thrown.expectCause(Matchers.allOf(
+        instanceOf(IllegalArgumentException.class),
+        ThrowableMessageMatcher.hasMessage(
+            containsString("expected a valid 'gs://' path but was given"))));
     p.run();
     assertValidJob(jobCaptor.getValue());
   }
@@ -455,8 +473,9 @@ public void testMultiSlashGcsFileReadPath() throws IOException {
     p.apply(TextIO.Read.named("ReadInvalidGcsFile")
         .from("gs://bucket/tmp//file"));
 
-    thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage("consecutive slashes");
+    thrown.expectCause(Matchers.allOf(
+        instanceOf(IllegalArgumentException.class),
+        ThrowableMessageMatcher.hasMessage(containsString("consecutive slashes"))));
     p.run();
     assertValidJob(jobCaptor.getValue());
   }
@@ -1126,6 +1145,7 @@ public void testToIsmRecordForMapLikeDoFnWithoutUniqueKeysThrowsException() thro
 
     try {
       doFnTester.processBatch(inputElements);
+      fail("Expected UserCodeException");
     } catch (UserCodeException e) {
       assertTrue(e.getCause() instanceof IllegalStateException);
       IllegalStateException rootCause = (IllegalStateException) e.getCause();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 98efc001a92b8..b9c94ad00b86b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -19,6 +19,8 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.addObject;
 import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.instanceOf;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
@@ -66,13 +68,17 @@
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterables;
 
+import org.hamcrest.Matchers;
 import org.junit.Assert;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.internal.matchers.ThrowableMessageMatcher;
 import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.mockito.ArgumentMatcher;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
 
 import java.io.IOException;
 import java.util.Collections;
@@ -138,6 +144,12 @@ private static Dataflow buildMockDataflow(
 
   private static DataflowPipelineOptions buildPipelineOptions() throws IOException {
     GcsUtil mockGcsUtil = mock(GcsUtil.class);
+    when(mockGcsUtil.expand(any(GcsPath.class))).then(new Answer<List<GcsPath>>() {
+      @Override
+      public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
+        return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
+      }
+    });
     when(mockGcsUtil.bucketExists(any(GcsPath.class))).thenReturn(true);
     when(mockGcsUtil.isGcsPatternSupported(anyString())).thenCallRealMethod();
 
@@ -635,8 +647,9 @@ public void testBadWildcardRecursive() throws Exception {
     pipeline.apply(TextIO.Read.from("gs://bucket/foo**/baz"));
 
     // Check that translation does fail.
-    thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage("Unsupported wildcard usage");
+    thrown.expectCause(Matchers.allOf(
+        instanceOf(IllegalArgumentException.class),
+        ThrowableMessageMatcher.hasMessage(containsString("Unsupported wildcard usage"))));
     t.translate(pipeline, pipeline.getRunner(), Collections.<DataflowPackage>emptyList());
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
index cb34e5c117e04..f1b7cd7fd0169 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
@@ -18,10 +18,15 @@
 
 import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.not;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.transforms.Count;
 import com.google.cloud.dataflow.sdk.transforms.Create;
@@ -34,11 +39,13 @@
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PValue;
 
-import org.junit.Assert;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.File;
 import java.util.Arrays;
 import java.util.EnumSet;
 
@@ -47,6 +54,7 @@
  */
 @RunWith(JUnit4.class)
 public class TransformTreeTest {
+  @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
 
   enum TransformsSeen {
     READ,
@@ -103,11 +111,14 @@ protected Coder<?> getDefaultOutputCoder() {
   // visits the nodes and verifies that the hierarchy was captured.
   @Test
   public void testCompositeCapture() throws Exception {
+    File inputFile = tmpFolder.newFile();
+    File outputFile = tmpFolder.newFile();
+
     Pipeline p = DirectPipeline.createForTest();
 
-    p.apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/object"))
+    p.apply(TextIO.Read.named("ReadMyFile").from(inputFile.getPath()))
         .apply(Sample.<String>any(10))
-        .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/object"));
+        .apply(TextIO.Write.named("WriteMyFile").to(outputFile.getPath()));
 
     final EnumSet<TransformsSeen> visited =
         EnumSet.noneOf(TransformsSeen.class);
@@ -119,19 +130,19 @@ public void testCompositeCapture() throws Exception {
       public void enterCompositeTransform(TransformTreeNode node) {
         PTransform<?, ?> transform = node.getTransform();
         if (transform instanceof Sample.SampleAny) {
-          Assert.assertTrue(visited.add(TransformsSeen.SAMPLE_ANY));
-          Assert.assertNotNull(node.getEnclosingNode());
-          Assert.assertTrue(node.isCompositeNode());
+          assertTrue(visited.add(TransformsSeen.SAMPLE_ANY));
+          assertNotNull(node.getEnclosingNode());
+          assertTrue(node.isCompositeNode());
         }
-        Assert.assertThat(transform, not(instanceOf(TextIO.Read.Bound.class)));
-        Assert.assertThat(transform, not(instanceOf(TextIO.Write.Bound.class)));
+        assertThat(transform, not(instanceOf(Read.Bounded.class)));
+        assertThat(transform, not(instanceOf(TextIO.Write.Bound.class)));
       }
 
       @Override
       public void leaveCompositeTransform(TransformTreeNode node) {
         PTransform<?, ?> transform = node.getTransform();
         if (transform instanceof Sample.SampleAny) {
-          Assert.assertTrue(left.add(TransformsSeen.SAMPLE_ANY));
+          assertTrue(left.add(TransformsSeen.SAMPLE_ANY));
         }
       }
 
@@ -139,11 +150,11 @@ public void leaveCompositeTransform(TransformTreeNode node) {
       public void visitTransform(TransformTreeNode node) {
         PTransform<?, ?> transform = node.getTransform();
         // Pick is a composite, should not be visited here.
-        Assert.assertThat(transform, not(instanceOf(Sample.SampleAny.class)));
-        if (transform instanceof TextIO.Read.Bound) {
-          Assert.assertTrue(visited.add(TransformsSeen.READ));
+        assertThat(transform, not(instanceOf(Sample.SampleAny.class)));
+        if (transform instanceof Read.Bounded) {
+          assertTrue(visited.add(TransformsSeen.READ));
         } else if (transform instanceof TextIO.Write.Bound) {
-          Assert.assertTrue(visited.add(TransformsSeen.WRITE));
+          assertTrue(visited.add(TransformsSeen.WRITE));
         }
       }
 
@@ -152,8 +163,8 @@ public void visitValue(PValue value, TransformTreeNode producer) {
       }
     });
 
-    Assert.assertTrue(visited.equals(EnumSet.allOf(TransformsSeen.class)));
-    Assert.assertTrue(left.equals(EnumSet.of(TransformsSeen.SAMPLE_ANY)));
+    assertTrue(visited.equals(EnumSet.allOf(TransformsSeen.class)));
+    assertTrue(left.equals(EnumSet.of(TransformsSeen.SAMPLE_ANY)));
   }
 
   @Test(expected = IllegalStateException.class)
@@ -163,7 +174,7 @@ public void testOutputChecking() throws Exception {
     p.apply(new InvalidCompositeTransform());
 
     p.traverseTopologically(new RecordingPipelineVisitor());
-    Assert.fail("traversal should have failed with an IllegalStateException");
+    fail("traversal should have failed with an IllegalStateException");
   }
 
   @Test

From 966b8f0be4a689a9685b0ac8be6c534a6d0f38ae Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Thu, 18 Feb 2016 17:54:25 -0800
Subject: [PATCH 1481/1541] Make earliest Watermark State available in
 CopyOnAccessState

This makes it the responsibility of the state to track watermark holds,
rather than interrogating it for all watermark holds externally.

Update InMemoryStateInternals#isEmptyForTesting to
InMemoryStateInternals#isCleared; use to verify that all states copied
during a commit are non-cleared, so cleared states are removed from the
state table (and can be reclaimed).

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115022701
---
 .../CopyOnAccessInMemoryStateInternals.java   |  95 +++++++++--
 .../util/state/InMemoryStateInternals.java    |  12 +-
 ...opyOnAccessInMemoryStateInternalsTest.java | 156 ++++++++++++++++++
 3 files changed, 243 insertions(+), 20 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java
index 29ce6519cb015..2a8fa47d88fc6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java
@@ -15,6 +15,8 @@
  */
 package com.google.cloud.dataflow.sdk.util.state;
 
+import static com.google.common.base.Preconditions.checkState;
+
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
@@ -24,6 +26,10 @@
 import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
 import com.google.common.base.Optional;
 
+import org.joda.time.Instant;
+
+import java.util.Map;
+
 import javax.annotation.Nullable;
 
 /**
@@ -39,15 +45,16 @@ public class CopyOnAccessInMemoryStateInternals<K> implements StateInternals<K>
    * Creates a new {@link CopyOnAccessInMemoryStateInternals} with the underlying (possibly null)
    * StateInternals.
    */
-  public static <K> CopyOnAccessInMemoryStateInternals<K> withUnderlying(K key,
-@Nullable CopyOnAccessInMemoryStateInternals<K> underlying) {
+  public static <K> CopyOnAccessInMemoryStateInternals<K> withUnderlying(
+      K key, @Nullable CopyOnAccessInMemoryStateInternals<K> underlying) {
     return new CopyOnAccessInMemoryStateInternals<K>(key, underlying);
   }
 
   private CopyOnAccessInMemoryStateInternals(
       K key, CopyOnAccessInMemoryStateInternals<K> underlying) {
     this.key = key;
-    table = new CopyOnAccessInMemoryStateTable<>(key, underlying == null ? null : underlying.table);
+    table =
+        new CopyOnAccessInMemoryStateTable<K>(key, underlying == null ? null : underlying.table);
   }
 
   /**
@@ -58,6 +65,10 @@ private CopyOnAccessInMemoryStateInternals(
    * has not been bound in this {@link CopyOnAccessInMemoryStateInternals}, put a reference to that
    * state within this {@link StateInternals}.
    *
+   * <p>Additionally, stores the {@link WatermarkStateInternal} with the earliest time bound in the
+   * state table after the commit is completed, enabling calls to
+   * {@link #getEarliestWatermarkHold()}.
+   *
    * @return this table
    */
   public CopyOnAccessInMemoryStateInternals<K> commit() {
@@ -65,6 +76,22 @@ public CopyOnAccessInMemoryStateInternals<K> commit() {
     return this;
   }
 
+  /**
+   * Gets the earliest Watermark Hold present in this table.
+   *
+   * <p>Must be called after this state has been committed. Will throw an
+   * {@link IllegalStateException} if the state has not been committed.
+   */
+  public Instant getEarliestWatermarkHold() {
+    // After commit, the watermark hold is always present, but may be
+    // BoundedWindow#TIMESTAMP_MAX_VALUE if there is no hold set.
+    checkState(
+        table.earliestWatermarkHold.isPresent(),
+        "Can't get the earliest watermark hold in a %s before it is committed",
+        getClass().getSimpleName());
+    return table.earliestWatermarkHold.get();
+  }
+
   @Override
   public <T extends State> T state(StateNamespace namespace, StateTag<? super K, T> address) {
     return table.get(namespace, address);
@@ -97,17 +124,23 @@ private static class CopyOnAccessInMemoryStateTable<K> extends StateTable<K> {
      *   <li>During the execution of the {@link #commit()} method, this is a
      *       {@link ReadThroughBinderFactory}, which copies the references to the existing
      *       {@link State} objects to this {@link StateTable}.</li>
-     *   <li>After the execution of the {@link #commit()} method, this is an instance of
-     *       {@link InMemoryStateBinderFactory}, which constructs new instances of state when a
-     *       {@link StateTag} is bound.</li>
+     *   <li>After the execution of the {@link #commit()} method, this is an
+     *       instance of {@link InMemoryStateBinderFactory}, which constructs new instances of state
+     *       when a {@link StateTag} is bound.</li>
      * </ul>
      */
     private StateBinderFactory<K> binderFactory;
 
+    /**
+     * The earliest watermark hold in this table.
+     */
+    private Optional<Instant> earliestWatermarkHold;
+
     public CopyOnAccessInMemoryStateTable(K key, StateTable<K> underlying) {
       this.key = key;
       this.underlying = Optional.fromNullable(underlying);
       binderFactory = new CopyOnBindBinderFactory<>(key, this.underlying);
+      earliestWatermarkHold = Optional.absent();
     }
 
     /**
@@ -123,20 +156,40 @@ public CopyOnAccessInMemoryStateTable(K key, StateTable<K> underlying) {
      * are bound in this {@link StateTable table} and this table represents the canonical state.
      */
     private void commit() {
+      Instant earliestHold = getEarliestWatermarkHold();
       if (underlying.isPresent()) {
         ReadThroughBinderFactory<K> readThroughBinder =
             new ReadThroughBinderFactory<>(underlying.get());
         binderFactory = readThroughBinder;
-        readThroughBinder.readThrough(this);
+        Instant earliestUnderlyingHold = readThroughBinder.readThroughAndGetEarliestHold(this);
+        if (earliestUnderlyingHold.isBefore(earliestHold)) {
+          earliestHold = earliestUnderlyingHold;
+        }
       }
+      earliestWatermarkHold = Optional.of(earliestHold);
       binderFactory = new InMemoryStateBinderFactory<>(key);
       underlying = Optional.absent();
     }
 
+    /**
+     * Get the earliest watermark hold in this table. Ignores the contents of any underlying table.
+     */
+    private Instant getEarliestWatermarkHold() {
+      Instant earliest = BoundedWindow.TIMESTAMP_MAX_VALUE;
+      for (State existingState : this.values()) {
+        if (existingState instanceof WatermarkStateInternal) {
+          Instant hold = ((WatermarkStateInternal<?>) existingState).get().read();
+          if (hold != null && hold.isBefore(earliest)) {
+            earliest = hold;
+          }
+        }
+      }
+      return earliest;
+    }
+
     @Override
     protected StateBinder<K> binderForNamespace(final StateNamespace namespace) {
-      StateBinder<K> stateBinder = binderFactory.forNamespace(namespace);
-      return stateBinder;
+      return binderFactory.forNamespace(namespace);
     }
 
     private static interface StateBinderFactory<K> {
@@ -250,8 +303,8 @@ public <T> BagState<T> bindBag(
 
     /**
      * {@link StateBinderFactory} that reads directly from the underlying table. Used during calls
-     * to {@link CopyOnAccessInMemoryStateTable#commit()} to read all values from the underlying
-     * table.
+     * to {@link CopyOnAccessInMemoryStateTable#commit()} to read all values from
+     * the underlying table.
      */
     private static class ReadThroughBinderFactory<K> implements StateBinderFactory<K> {
       private final StateTable<K> underlying;
@@ -260,12 +313,26 @@ public ReadThroughBinderFactory(StateTable<K> underlying) {
         this.underlying = underlying;
       }
 
-      public void readThrough(StateTable<K> readTo) {
+      public Instant readThroughAndGetEarliestHold(StateTable<K> readTo) {
+        Instant earliestHold = BoundedWindow.TIMESTAMP_MAX_VALUE;
         for (StateNamespace namespace : underlying.getNamespacesInUse()) {
-          for (StateTag<? super K, ?> address : underlying.getTagsInUse(namespace).keySet()) {
-            readTo.get(namespace, address);
+          for (Map.Entry<StateTag<? super K, ?>, ? extends State> existingState :
+              underlying.getTagsInUse(namespace).entrySet()) {
+            if (!((InMemoryState<?>) existingState.getValue()).isCleared()) {
+              // Only read through non-cleared values to ensure that completed windows are
+              // eventually discarded, and remember the earliest watermark hold from among those
+              // values.
+              State state = readTo.get(namespace, existingState.getKey());
+              if (state instanceof WatermarkStateInternal) {
+                Instant hold = ((WatermarkStateInternal<?>) state).get().read();
+                if (hold != null && hold.isBefore(earliestHold)) {
+                  earliestHold = hold;
+                }
+              }
+            }
           }
         }
+        return earliestHold;
       }
 
       @Override
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
index 6b98101c233cf..658494bcbb158 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
@@ -56,7 +56,7 @@ public K getKey() {
   }
 
   interface InMemoryState<T extends InMemoryState<T>> {
-    boolean isEmptyForTesting();
+    boolean isCleared();
     T copy();
   }
 
@@ -76,7 +76,7 @@ public void clear() {
    * that the state has been properly cleaned up.
    */
   protected boolean isEmptyForTesting(State state) {
-    return ((InMemoryState<?>) state).isEmptyForTesting();
+    return ((InMemoryState<?>) state).isCleared();
   }
 
   @Override
@@ -171,7 +171,7 @@ public InMemoryValue<T> copy() {
     }
 
     @Override
-    public boolean isEmptyForTesting() {
+    public boolean isCleared() {
       return isCleared;
     }
   }
@@ -212,7 +212,7 @@ public void add(Instant outputTime) {
     }
 
     @Override
-    public boolean isEmptyForTesting() {
+    public boolean isCleared() {
       return combinedHold == null;
     }
 
@@ -316,7 +316,7 @@ public AccumT mergeAccumulators(Iterable<AccumT> accumulators) {
     }
 
     @Override
-    public boolean isEmptyForTesting() {
+    public boolean isCleared() {
       return isCleared;
     }
 
@@ -363,7 +363,7 @@ public void add(T input) {
     }
 
     @Override
-    public boolean isEmptyForTesting() {
+    public boolean isCleared() {
       return contents.isEmpty();
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternalsTest.java
index 03c40140e3bb7..b9f12536f41fd 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternalsTest.java
@@ -34,9 +34,12 @@
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
 
 import org.joda.time.Instant;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -45,6 +48,7 @@
  */
 @RunWith(JUnit4.class)
 public class CopyOnAccessInMemoryStateInternalsTest {
+  @Rule public ExpectedException thrown = ExpectedException.none();
   private String key = "foo";
   @Test
   public void testGetWithEmpty() {
@@ -286,6 +290,34 @@ public void testCommitWithUnderlying() {
     assertThat(underlyingState, is(theInstance(stringBag)));
   }
 
+  @Test
+  public void testCommitWithClearedInUnderlying() {
+    CopyOnAccessInMemoryStateInternals<String> underlying =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
+    CopyOnAccessInMemoryStateInternals<String> secondUnderlying =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
+    CopyOnAccessInMemoryStateInternals<String> internals =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, secondUnderlying);
+
+    StateNamespace namespace = new StateNamespaceForTest("foo");
+    StateTag<Object, BagState<String>> bagTag = StateTags.bag("foo", StringUtf8Coder.of());
+    BagState<String> stringBag = underlying.state(namespace, bagTag);
+    assertThat(stringBag.get().read(), emptyIterable());
+
+    stringBag.add("bar");
+    stringBag.add("baz");
+    stringBag.clear();
+    // We should not read through the cleared bag
+    secondUnderlying.commit();
+
+    // Should not be visible
+    stringBag.add("foo");
+
+    internals.commit();
+    BagState<String> internalsStringBag = internals.state(namespace, bagTag);
+    assertThat(internalsStringBag.get().read(), emptyIterable());
+  }
+
   @Test
   public void testCommitWithOverwrittenUnderlying() {
     CopyOnAccessInMemoryStateInternals<String> underlying =
@@ -340,4 +372,128 @@ public void testCommitWithAddedUnderlying() {
     BagState<String> reReadUnderlyingState = underlying.state(namespace, bagTag);
     assertThat(reReadUnderlyingState.get().read(), containsInAnyOrder("bar", "baz"));
   }
+
+  @Test
+  public void testGetEarliestWatermarkHoldAfterCommit() {
+    BoundedWindow first = new BoundedWindow() {
+      @Override
+      public Instant maxTimestamp() {
+        return new Instant(2048L);
+      }
+    };
+    BoundedWindow second = new BoundedWindow() {
+      @Override
+      public Instant maxTimestamp() {
+        return new Instant(689743L);
+      }
+    };
+    CopyOnAccessInMemoryStateInternals<String> internals =
+        CopyOnAccessInMemoryStateInternals.withUnderlying("foo", null);
+
+    StateTag<Object, WatermarkStateInternal<BoundedWindow>> firstHoldAddress =
+        StateTags.watermarkStateInternal("foo", OutputTimeFns.outputAtEarliestInputTimestamp());
+    WatermarkStateInternal<BoundedWindow> firstHold =
+        internals.state(StateNamespaces.window(null, first), firstHoldAddress);
+    firstHold.add(new Instant(22L));
+
+    StateTag<Object, WatermarkStateInternal<BoundedWindow>> secondHoldAddress =
+        StateTags.watermarkStateInternal("foo", OutputTimeFns.outputAtEarliestInputTimestamp());
+    WatermarkStateInternal<BoundedWindow> secondHold =
+        internals.state(StateNamespaces.window(null, second), secondHoldAddress);
+    secondHold.add(new Instant(2L));
+
+    internals.commit();
+    assertThat(internals.getEarliestWatermarkHold(), equalTo(new Instant(2L)));
+  }
+
+  @Test
+  public void testGetEarliestWatermarkHoldWithEarliestInUnderlyingTable() {
+    BoundedWindow first = new BoundedWindow() {
+      @Override
+      public Instant maxTimestamp() {
+        return new Instant(2048L);
+      }
+    };
+    BoundedWindow second = new BoundedWindow() {
+      @Override
+      public Instant maxTimestamp() {
+        return new Instant(689743L);
+      }
+    };
+    CopyOnAccessInMemoryStateInternals<String> underlying =
+        CopyOnAccessInMemoryStateInternals.withUnderlying("foo", null);
+    StateTag<Object, WatermarkStateInternal<BoundedWindow>> firstHoldAddress =
+        StateTags.watermarkStateInternal("foo", OutputTimeFns.outputAtEarliestInputTimestamp());
+    WatermarkStateInternal<BoundedWindow> firstHold =
+        underlying.state(StateNamespaces.window(null, first), firstHoldAddress);
+    firstHold.add(new Instant(22L));
+
+    CopyOnAccessInMemoryStateInternals<String> internals =
+        CopyOnAccessInMemoryStateInternals.withUnderlying("foo", underlying.commit());
+
+    StateTag<Object, WatermarkStateInternal<BoundedWindow>> secondHoldAddress =
+        StateTags.watermarkStateInternal("foo", OutputTimeFns.outputAtEarliestInputTimestamp());
+    WatermarkStateInternal<BoundedWindow> secondHold =
+        internals.state(StateNamespaces.window(null, second), secondHoldAddress);
+    secondHold.add(new Instant(244L));
+
+    internals.commit();
+    assertThat(internals.getEarliestWatermarkHold(), equalTo(new Instant(22L)));
+  }
+
+  @Test
+  public void testGetEarliestWatermarkHoldWithEarliestInNewTable() {
+    BoundedWindow first =
+        new BoundedWindow() {
+          @Override
+          public Instant maxTimestamp() {
+            return new Instant(2048L);
+          }
+        };
+    BoundedWindow second =
+        new BoundedWindow() {
+          @Override
+          public Instant maxTimestamp() {
+            return new Instant(689743L);
+          }
+        };
+    CopyOnAccessInMemoryStateInternals<String> underlying =
+        CopyOnAccessInMemoryStateInternals.withUnderlying("foo", null);
+    StateTag<Object, WatermarkStateInternal<BoundedWindow>> firstHoldAddress =
+        StateTags.watermarkStateInternal("foo", OutputTimeFns.outputAtEarliestInputTimestamp());
+    WatermarkStateInternal<BoundedWindow> firstHold =
+        underlying.state(StateNamespaces.window(null, first), firstHoldAddress);
+    firstHold.add(new Instant(224L));
+
+    CopyOnAccessInMemoryStateInternals<String> internals =
+        CopyOnAccessInMemoryStateInternals.withUnderlying("foo", underlying.commit());
+
+    StateTag<Object, WatermarkStateInternal<BoundedWindow>> secondHoldAddress =
+        StateTags.watermarkStateInternal("foo", OutputTimeFns.outputAtEarliestInputTimestamp());
+    WatermarkStateInternal<BoundedWindow> secondHold =
+        internals.state(StateNamespaces.window(null, second), secondHoldAddress);
+    secondHold.add(new Instant(24L));
+
+    internals.commit();
+    assertThat(internals.getEarliestWatermarkHold(), equalTo(new Instant(24L)));
+  }
+
+  @Test
+  public void testGetEarliestHoldBeforeCommit() {
+    CopyOnAccessInMemoryStateInternals<String> internals =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
+
+    internals
+        .state(
+            StateNamespaces.global(),
+            StateTags.watermarkStateInternal("foo", OutputTimeFns.outputAtEarliestInputTimestamp()))
+        .add(new Instant(1234L));
+
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage(CopyOnAccessInMemoryStateInternals.class.getSimpleName());
+    thrown.expectMessage("Can't get the earliest watermark hold");
+    thrown.expectMessage("before it is committed");
+
+    internals.getEarliestWatermarkHold();
+  }
 }

From af9b77c3c41e6c0fda9f4a1c6f30db3d3f73c174 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Thu, 18 Feb 2016 19:16:02 -0800
Subject: [PATCH 1482/1541] Add ForwardingPTransform

This is a PTransform that forwards all nonoverriden method calls to a
delegate, provided by an extending class.

This can be used to construct PTransform overrides that only override
specific methods (e.g. apply for a Runner PTransform override)

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115027379
---
 .../inprocess/ForwardingPTransform.java       |  54 ++++++++++
 .../inprocess/ForwardingPTransformTest.java   | 100 ++++++++++++++++++
 2 files changed, 154 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ForwardingPTransform.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ForwardingPTransformTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ForwardingPTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ForwardingPTransform.java
new file mode 100644
index 0000000000000..b736e35d31285
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ForwardingPTransform.java
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess;
+
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.cloud.dataflow.sdk.values.POutput;
+import com.google.cloud.dataflow.sdk.values.TypedPValue;
+
+/**
+ * A base class for implementing {@link PTransform} overrides, which behave identically to the
+ * delegate transform but with overridden methods. Implementors are required to implement
+ * {@link #delegate()}, which returns the object to forward calls to, and {@link #apply(PInput)}.
+ */
+public abstract class ForwardingPTransform<InputT extends PInput, OutputT extends POutput>
+    extends PTransform<InputT, OutputT> {
+  protected abstract PTransform<InputT, OutputT> delegate();
+
+  @Override
+  public OutputT apply(InputT input) {
+    return delegate().apply(input);
+  }
+
+  @Override
+  public void validate(InputT input) {
+    delegate().validate(input);
+  }
+
+  @Override
+  public String getName() {
+    return delegate().getName();
+  }
+
+  @Override
+  public <T> Coder<T> getDefaultOutputCoder(InputT input, @SuppressWarnings("unused")
+      TypedPValue<T> output) throws CannotProvideCoderException {
+    return delegate().getDefaultOutputCoder(input, output);
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ForwardingPTransformTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ForwardingPTransformTest.java
new file mode 100644
index 0000000000000..2e283f50b88d7
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ForwardingPTransformTest.java
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+/**
+ * Tests for {@link ForwardingPTransform}.
+ */
+@RunWith(JUnit4.class)
+public class ForwardingPTransformTest {
+  @Rule public ExpectedException thrown = ExpectedException.none();
+
+  @Mock private PTransform<PCollection<Integer>, PCollection<String>> delegate;
+
+  private ForwardingPTransform<PCollection<Integer>, PCollection<String>> forwarding;
+
+  @Before
+  public void setup() {
+    MockitoAnnotations.initMocks(this);
+    forwarding =
+        new ForwardingPTransform<PCollection<Integer>, PCollection<String>>() {
+          @Override
+          protected PTransform<PCollection<Integer>, PCollection<String>> delegate() {
+            return delegate;
+          }
+        };
+  }
+
+  @Test
+  public void applyDelegates() {
+    @SuppressWarnings("unchecked")
+    PCollection<Integer> collection = mock(PCollection.class);
+    @SuppressWarnings("unchecked")
+    PCollection<String> output = mock(PCollection.class);
+    when(delegate.apply(collection)).thenReturn(output);
+    PCollection<String> result = forwarding.apply(collection);
+    assertThat(result, equalTo(output));
+  }
+
+  @Test
+  public void getNameDelegates() {
+    String name = "My_forwardingptransform-name;for!thisTest";
+    when(delegate.getName()).thenReturn(name);
+    assertThat(forwarding.getName(), equalTo(name));
+  }
+
+  @Test
+  public void validateDelegates() {
+    @SuppressWarnings("unchecked")
+    PCollection<Integer> input = mock(PCollection.class);
+    doThrow(RuntimeException.class).when(delegate).validate(input);
+
+    thrown.expect(RuntimeException.class);
+    forwarding.validate(input);
+  }
+
+  @Test
+  public void getDefaultOutputCoderDelegates() throws Exception {
+    @SuppressWarnings("unchecked")
+    PCollection<Integer> input = mock(PCollection.class);
+    @SuppressWarnings("unchecked")
+    PCollection<String> output = mock(PCollection.class);
+    @SuppressWarnings("unchecked")
+    Coder<String> outputCoder = mock(Coder.class);
+
+    when(delegate.getDefaultOutputCoder(input, output)).thenReturn(outputCoder);
+    assertThat(forwarding.getDefaultOutputCoder(input, output), equalTo(outputCoder));
+  }
+}

From 2f627c346a06cb788e1fbcc086746806044f504b Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 18 Feb 2016 19:46:57 -0800
Subject: [PATCH 1483/1541] Move test for post-GBK timestamp ordering to worker
 module

This test is not part of the spec. As long as the keys and
windows are grouped, the grouped values can be in any order.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115028575
---
 .../transforms/windowing/WindowingTest.java   | 51 -------------------
 1 file changed, 51 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
index c2161900814a1..1c1248bd0cf99 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowingTest.java
@@ -16,10 +16,6 @@
 
 package com.google.cloud.dataflow.sdk.transforms.windowing;
 
-import static org.hamcrest.Matchers.contains;
-import static org.hamcrest.Matchers.equalTo;
-import static org.junit.Assert.assertThat;
-
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.io.TextIO;
@@ -31,15 +27,12 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
 import com.google.cloud.dataflow.sdk.transforms.Flatten;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
-import com.google.common.collect.Iterables;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -194,50 +187,6 @@ public void testWindowPreservation() {
     p.run();
   }
 
-  @Test
-  @Category(RunnableOnService.class)
-  public void testElementsSortedByTimestamp() {
-    // The Windowing API does not guarantee that elements will be sorted by
-    // timestamp, but the implementation currently relies on this, so it
-    // needs to be tested.
-
-    Pipeline p = TestPipeline.create();
-
-    PCollection<KV<String, String>> a = p
-        .apply(Create.timestamped(
-            TimestampedValue.of(KV.of("k", "a"), new Instant(1)),
-            TimestampedValue.of(KV.of("k", "b"), new Instant(4)),
-            TimestampedValue.of(KV.of("k", "c"), new Instant(3)),
-            TimestampedValue.of(KV.of("k", "d"), new Instant(5)),
-            TimestampedValue.of(KV.of("k", "e"), new Instant(2)),
-            TimestampedValue.of(KV.of("k", "f"), new Instant(-5)),
-            TimestampedValue.of(KV.of("k", "g"), new Instant(-6)),
-            TimestampedValue.of(KV.of("k", "h"), new Instant(-255)),
-            TimestampedValue.of(KV.of("k", "i"), new Instant(-256)),
-            TimestampedValue.of(KV.of("k", "j"), new Instant(255))));
-
-    PCollection<KV<String, String>> b = a
-        .apply(Window.<KV<String, String>>into(
-            FixedWindows.of(new Duration(1000)).withOffset(new Duration(500))));
-
-    PCollection<KV<String, Iterable<String>>> output = b
-        .apply(GroupByKey.<String, String>create());
-
-    DataflowAssert.that(output).satisfies(
-        new SerializableFunction<Iterable<KV<String, Iterable<String>>>, Void>() {
-          @Override
-          public Void apply(Iterable<KV<String, Iterable<String>>> contents) {
-            KV<String, Iterable<String>> element = Iterables.getOnlyElement(contents);
-            assertThat(element.getKey(), equalTo("k"));
-            assertThat(element.getValue(),
-                contains("i", "h", "g", "f", "a", "e", "c", "b", "d", "j"));
-            return null;
-          }
-        });
-
-    p.run();
-  }
-
   @Test
   public void testEmptyInput() {
     Pipeline p = TestPipeline.create();

From ade6ae841dcc4f2427d37084f203b5c4f593767d Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 18 Feb 2016 20:57:56 -0800
Subject: [PATCH 1484/1541] Move DataflowWorkerLoggingMDC to the worker module

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115031881
---
 .../logging/DataflowWorkerLoggingMDC.java     | 98 -------------------
 .../dataflow/sdk/util/DoFnRunnerBase.java     | 32 ++----
 .../testing/RestoreDataflowLoggingMDC.java    | 52 ----------
 .../RestoreDataflowLoggingMDCTest.java        | 81 ---------------
 4 files changed, 10 insertions(+), 253 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingMDC.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingMDC.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingMDCTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingMDC.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingMDC.java
deleted file mode 100644
index 4d387c22c1b0f..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/logging/DataflowWorkerLoggingMDC.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker.logging;
-
-/**
- * Mapped diagnostic context for the Dataflow worker.
- */
-public class DataflowWorkerLoggingMDC {
-  private static final InheritableThreadLocal<String> jobId = new InheritableThreadLocal<>();
-  private static final InheritableThreadLocal<String> stageName = new InheritableThreadLocal<>();
-  private static final InheritableThreadLocal<String> stepName = new InheritableThreadLocal<>();
-  private static final InheritableThreadLocal<String> workerId = new InheritableThreadLocal<>();
-  private static final InheritableThreadLocal<String> workId = new InheritableThreadLocal<>();
-
-  /**
-   * Sets the Job ID of the current thread, which will be inherited by child threads.
-   */
-  public static void setJobId(String newJobId) {
-    jobId.set(newJobId);
-  }
-
-  /**
-   * Sets the Stage Name of the current thread, which will be inherited by child threads.
-   */
-  public static void setStageName(String newStageName) {
-    stageName.set(newStageName);
-  }
-
-  /**
-   * Sets the Step Name of the current thread, which will be inherited by child threads.
-   */
-  public static void setStepName(String newStepName) {
-    stepName.set(newStepName);
-  }
-
-  /**
-   * Sets the Worker ID of the current thread, which will be inherited by child threads.
-   */
-  public static void setWorkerId(String newWorkerId) {
-    workerId.set(newWorkerId);
-  }
-
-  /**
-   * Sets the Work ID of the current thread, which will be inherited by child threads.
-   */
-  public static void setWorkId(String newWorkId) {
-    workId.set(newWorkId);
-  }
-
-  /**
-   * Gets the Job ID of the current thread.
-   */
-  public static String getJobId() {
-    return jobId.get();
-  }
-
-  /**
-   * Gets the Stage Name of the current thread.
-   */
-  public static String getStageName() {
-    return stageName.get();
-  }
-
-  /**
-   * Gets the Step Name of the current thread.
-   */
-  public static String getStepName() {
-    return stepName.get();
-  }
-
-  /**
-   * Gets the Worker ID of the current thread.
-   */
-  public static String getWorkerId() {
-    return workerId.get();
-  }
-
-  /**
-   * Gets the Work ID of the current thread.
-   */
-  public static String getWorkId() {
-    return workId.get();
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java
index a3e22a660f1ba..25ead03bce373 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java
@@ -20,7 +20,6 @@
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
@@ -133,28 +132,17 @@ public void startBundle() {
 
   @Override
   public void processElement(WindowedValue<InputT> elem) {
-    // TODO: Move the DataflowWorkerLoggingMDC into wrapper used on the Dataflow worker in
-    // invokeProcessElement
-    // Setup new thread local logging before running user code, and restore it after.
-    // Needs to happen here (per-element) since fusion may be running this as part of a call to
-    // output in an earlier step.
-    String previousStepName = DataflowWorkerLoggingMDC.getStepName();
-    DataflowWorkerLoggingMDC.setStepName(context.stepContext.getStepName());
-    try {
-      if (elem.getWindows().size() <= 1
-          || (!RequiresWindowAccess.class.isAssignableFrom(fn.getClass())
-          && context.sideInputReader.isEmpty())) {
-        invokeProcessElement(elem);
-      } else {
-        // We could modify the windowed value (and the processContext) to
-        // avoid repeated allocations, but this is more straightforward.
-        for (BoundedWindow window : elem.getWindows()) {
-          invokeProcessElement(WindowedValue.of(
-              elem.getValue(), elem.getTimestamp(), window, elem.getPane()));
-        }
+    if (elem.getWindows().size() <= 1
+        || (!RequiresWindowAccess.class.isAssignableFrom(fn.getClass())
+            && context.sideInputReader.isEmpty())) {
+      invokeProcessElement(elem);
+    } else {
+      // We could modify the windowed value (and the processContext) to
+      // avoid repeated allocations, but this is more straightforward.
+      for (BoundedWindow window : elem.getWindows()) {
+        invokeProcessElement(WindowedValue.of(
+            elem.getValue(), elem.getTimestamp(), window, elem.getPane()));
       }
-    } finally {
-      DataflowWorkerLoggingMDC.setStepName(previousStepName);
     }
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingMDC.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingMDC.java
deleted file mode 100644
index e64853e837e4a..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingMDC.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.testing;
-
-import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
-
-import org.junit.rules.ExternalResource;
-
-/**
- * Saves and restores the current thread-local logging parameters for tests.
- */
-public class RestoreDataflowLoggingMDC extends ExternalResource {
-  private String previousJobId;
-  private String previousStageName;
-  private String previousStepName;
-  private String previousWorkerId;
-  private String previousWorkId;
-
-  public RestoreDataflowLoggingMDC() {}
-
-  @Override
-  protected void before() throws Throwable {
-    previousJobId = DataflowWorkerLoggingMDC.getJobId();
-    previousStageName = DataflowWorkerLoggingMDC.getStageName();
-    previousStepName = DataflowWorkerLoggingMDC.getStepName();
-    previousWorkerId = DataflowWorkerLoggingMDC.getWorkerId();
-    previousWorkId = DataflowWorkerLoggingMDC.getWorkId();
-  }
-
-  @Override
-  protected void after() {
-    DataflowWorkerLoggingMDC.setJobId(previousJobId);
-    DataflowWorkerLoggingMDC.setStageName(previousStageName);
-    DataflowWorkerLoggingMDC.setStepName(previousStepName);
-    DataflowWorkerLoggingMDC.setWorkerId(previousWorkerId);
-    DataflowWorkerLoggingMDC.setWorkId(previousWorkId);
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingMDCTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingMDCTest.java
deleted file mode 100644
index c122b281bb938..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/RestoreDataflowLoggingMDCTest.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.testing;
-
-import static org.junit.Assert.assertEquals;
-
-import com.google.cloud.dataflow.sdk.runners.worker.logging.DataflowWorkerLoggingMDC;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestRule;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/** Tests for {@link RestoreDataflowLoggingMDC}. */
-@RunWith(JUnit4.class)
-public class RestoreDataflowLoggingMDCTest {
-  @Rule public TestRule restoreMDC = new RestoreDataflowLoggingMDC();
-
-  /**
-   * Asserts that all fields of <tt>DataflowWorkerLoggingMDC</tt> field are null, except a single
-   * index with the expected value.
-   */
-  void assertOneNonNullThreadStatic(int idx, String expected) {
-    String[] strs =
-        new String[] {
-          DataflowWorkerLoggingMDC.getJobId(),
-          DataflowWorkerLoggingMDC.getStageName(),
-          DataflowWorkerLoggingMDC.getStepName(),
-          DataflowWorkerLoggingMDC.getWorkerId(),
-          DataflowWorkerLoggingMDC.getWorkId()
-        };
-    for (int i = 0; i < strs.length; i++) {
-      assertEquals(i == idx ? expected : null, strs[i]);
-    }
-  }
-
-  @Test
-  public void testLoggingParamsClearedA() {
-    DataflowWorkerLoggingMDC.setJobId("job");
-    assertOneNonNullThreadStatic(0, "job");
-  }
-
-  @Test
-  public void testLoggingParamsClearedB() {
-    DataflowWorkerLoggingMDC.setStageName("stage");
-    assertOneNonNullThreadStatic(1, "stage");
-  }
-
-  @Test
-  public void testLoggingParamsClearedC() {
-    DataflowWorkerLoggingMDC.setStepName("step");
-    assertOneNonNullThreadStatic(2, "step");
-  }
-
-  @Test
-  public void testLoggingParamsClearedD() {
-    DataflowWorkerLoggingMDC.setWorkerId("worker");
-    assertOneNonNullThreadStatic(3, "worker");
-  }
-
-  @Test
-  public void testLoggingParamsClearedE() {
-    DataflowWorkerLoggingMDC.setWorkId("work");
-    assertOneNonNullThreadStatic(4, "work");
-  }
-}

From e0d7d0ccb303c04da1068026b42e96afbfd7a3e7 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 19 Feb 2016 09:34:26 -0800
Subject: [PATCH 1485/1541] Rolling back since this breaks tests

Migrate TextIO.Read to be a custom source

This allows for other runners to utilize the TextIO.Read transform since
it no longer relies on the internal details of the Google Cloud Dataflow
runner.

Note that validation has moved to when the PTransform is being applied.

This is for Apache Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115071294
---
 examples/pom.xml                              |   7 -
 .../examples/MinimalWordCountJava8Test.java   |  45 +--
 .../google/cloud/dataflow/sdk/io/TextIO.java  | 286 ++----------------
 .../runners/DataflowPipelineTranslator.java   |   2 +
 .../sdk/runners/dataflow/ReadTranslator.java  |  10 -
 .../runners/dataflow/TextIOTranslator.java    |  34 +++
 .../cloud/dataflow/sdk/io/TextIOTest.java     | 240 ++++-----------
 .../runners/DataflowPipelineRunnerTest.java   |  40 +--
 .../DataflowPipelineTranslatorTest.java       |  17 +-
 .../sdk/runners/TransformTreeTest.java        |  43 +--
 10 files changed, 163 insertions(+), 561 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index adea4b129836e..8e708ced693e4 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -498,12 +498,5 @@
       <artifactId>junit</artifactId>
       <version>${junit.version}</version>
     </dependency>
-
-    <dependency>
-      <groupId>org.mockito</groupId>
-      <artifactId>mockito-all</artifactId>
-      <version>1.9.5</version>
-      <scope>test</scope>
-    </dependency>
   </dependencies>
 </project>
diff --git a/examples/src/test/java8/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java b/examples/src/test/java8/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java
index fcae41c6bb523..5286d61a078f8 100644
--- a/examples/src/test/java8/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java
+++ b/examples/src/test/java8/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java
@@ -18,33 +18,20 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.GcsOptions;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Count;
 import com.google.cloud.dataflow.sdk.transforms.Filter;
 import com.google.cloud.dataflow.sdk.transforms.FlatMapElements;
 import com.google.cloud.dataflow.sdk.transforms.MapElements;
-import com.google.cloud.dataflow.sdk.util.GcsUtil;
-import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-import com.google.common.collect.ImmutableList;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
-import org.mockito.Mockito;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
 
-import java.io.IOException;
 import java.io.Serializable;
-import java.nio.channels.FileChannel;
-import java.nio.channels.SeekableByteChannel;
-import java.nio.file.Files;
-import java.nio.file.StandardOpenOption;
 import java.util.Arrays;
-import java.util.List;
 
 /**
  * To keep {@link MinimalWordCountJava8} simple, it is not factored or testable. This test
@@ -57,9 +44,8 @@ public class MinimalWordCountJava8Test implements Serializable {
    * A basic smoke test that ensures there is no crash at pipeline construction time.
    */
   @Test
-  public void testMinimalWordCountJava8() throws Exception {
+  public void testMinimalWordCountJava8() {
     Pipeline p = TestPipeline.create();
-    p.getOptions().as(GcsOptions.class).setGcsUtil(buildMockGcsUtil());
 
     p.apply(TextIO.Read.from("gs://dataflow-samples/shakespeare/*"))
      .apply(FlatMapElements.via((String word) -> Arrays.asList(word.split("[^a-zA-Z']+")))
@@ -71,33 +57,4 @@ public void testMinimalWordCountJava8() throws Exception {
          .withOutputType(new TypeDescriptor<String>() {}))
      .apply(TextIO.Write.to("gs://YOUR_OUTPUT_BUCKET/AND_OUTPUT_PREFIX"));
   }
-
-  private GcsUtil buildMockGcsUtil() throws IOException {
-    GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class);
-
-    // Any request to open gets a new bogus channel
-    Mockito
-        .when(mockGcsUtil.open(Mockito.any(GcsPath.class)))
-        .then(new Answer<SeekableByteChannel>() {
-          @Override
-          public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
-            return FileChannel.open(
-                Files.createTempFile("channel-", ".tmp"),
-                StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
-          }
-        });
-
-    // Any request for expansion returns a list containing the original GcsPath
-    // This is required to pass validation that occurs in TextIO during apply()
-    Mockito
-        .when(mockGcsUtil.expand(Mockito.any(GcsPath.class)))
-        .then(new Answer<List<GcsPath>>() {
-          @Override
-          public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
-            return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
-          }
-        });
-
-    return mockGcsUtil;
-  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 71e755eebe651..bf72f6bf197f8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -16,41 +16,32 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
-import static com.google.common.base.Preconditions.checkState;
-
 import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.io.Read.Bounded;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils;
 import com.google.cloud.dataflow.sdk.runners.worker.TextReader;
 import com.google.cloud.dataflow.sdk.runners.worker.TextSink;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.io.ByteStreams;
 import com.google.common.primitives.Ints;
-import com.google.protobuf.ByteString;
 
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
 
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.PushbackInputStream;
-import java.nio.ByteBuffer;
-import java.nio.channels.ReadableByteChannel;
-import java.nio.channels.SeekableByteChannel;
 import java.util.List;
-import java.util.NoSuchElementException;
 import java.util.regex.Pattern;
 import java.util.zip.GZIPInputStream;
 
@@ -212,7 +203,7 @@ public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
       @Nullable private final String filepattern;
 
       /** The Coder to use to decode each line. */
-      private final Coder<T> coder;
+      @Nullable private final Coder<T> coder;
 
       /** An option to indicate if input validation is desired. Default is true. */
       private final boolean validate;
@@ -301,48 +292,14 @@ public PCollection<T> apply(PInput input) {
         if (filepattern == null) {
           throw new IllegalStateException("need to set the filepattern of a TextIO.Read transform");
         }
-
-        if (validate) {
-          try {
-            checkState(
-                !IOChannelUtils.getFactory(filepattern).match(filepattern).isEmpty(),
-                "Unable to find any files matching %s",
-                filepattern);
-          } catch (IOException e) {
-            throw new IllegalStateException(
-                String.format("Failed to validate %s", filepattern), e);
-          }
-        }
-
-        // Create a source specific to the requested compression type.
-        final Bounded<T> read;
-        switch(compressionType) {
-          case UNCOMPRESSED:
-            read = com.google.cloud.dataflow.sdk.io.Read.from(
-                new TextSource<T>(filepattern, coder));
-            break;
-          case AUTO:
-            read = com.google.cloud.dataflow.sdk.io.Read.from(
-                CompressedSource.from(new TextSource<T>(filepattern, coder)));
-            break;
-          case BZIP2:
-            read = com.google.cloud.dataflow.sdk.io.Read.from(
-                CompressedSource.from(new TextSource<T>(filepattern, coder))
-                                .withDecompression(CompressedSource.CompressionMode.BZIP2));
-            break;
-          case GZIP:
-            read = com.google.cloud.dataflow.sdk.io.Read.from(
-                CompressedSource.from(new TextSource<T>(filepattern, coder))
-                                .withDecompression(CompressedSource.CompressionMode.GZIP));
-            break;
-          default:
-            throw new IllegalArgumentException("Unknown compression mode: " + compressionType);
-        }
-
-        PCollection<T> pcol = input.getPipeline().apply("Read", read);
-        // Honor the default output coder that would have been used by this PTransform.
-        pcol.setCoder(getDefaultOutputCoder());
-        return pcol;
+        // Force the output's Coder to be what the read is using, and
+        // unchangeable later, to ensure that we read the input in the
+        // format specified by the Read transform.
+        return PCollection.<T>createPrimitiveOutputInternal(
+                input.getPipeline(),
+                WindowingStrategy.globalDefault(),
+                IsBounded.BOUNDED)
+            .setCoder(coder);
       }
 
       @Override
@@ -361,6 +318,17 @@ public boolean needsValidation() {
       public TextIO.CompressionType getCompressionType() {
         return compressionType;
       }
+
+      static {
+        DirectPipelineRunner.registerDefaultTransformEvaluator(
+            Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
+              @Override
+              public void evaluate(
+                  Bound transform, DirectPipelineRunner.EvaluationContext context) {
+                evaluateReadHelper(transform, context);
+              }
+            });
+      }
     }
 
     /** Disallow construction of utility classes. */
@@ -813,204 +781,16 @@ private static void validateOutputComponent(String partialFilePattern) {
   /** Disable construction of utility class. */
   private TextIO() {}
 
-  /**
-   * A {@link FileBasedSource} which can decode records delimited by new line characters.
-   *
-   * <p>This source splits the data into records using {@code UTF-8} {@code \n}, {@code \r}, or
-   * {@code \r\n} as the delimiter. This source is not strict and supports decoding the last record
-   * even if it is not delimited. Finally, no records are decoded if the stream is empty.
-   *
-   * <p>This source supports reading from any arbitrary byte position within the stream. If the
-   * starting position is not {@code 0}, then bytes are skipped until the first delimiter is found
-   * representing the beginning of the first record to be decoded.
-   */
-  @VisibleForTesting
-  static class TextSource<T> extends FileBasedSource<T> {
-    /** The Coder to use to decode each line. */
-    private final Coder<T> coder;
-
-    @VisibleForTesting
-    TextSource(String fileSpec, Coder<T> coder) {
-      super(fileSpec, 1L);
-      this.coder = coder;
-    }
-
-    private TextSource(String fileName, long start, long end, Coder<T> coder) {
-      super(fileName, 1L, start, end);
-      this.coder = coder;
-    }
-
-    @Override
-    protected FileBasedSource<T> createForSubrangeOfFile(String fileName, long start, long end) {
-      return new TextSource<>(fileName, start, end, coder);
-    }
-
-    @Override
-    protected FileBasedReader<T> createSingleFileReader(PipelineOptions options) {
-      return new TextBasedReader<>(this);
-    }
-
-    @Override
-    public boolean producesSortedKeys(PipelineOptions options) throws Exception {
-      return false;
-    }
-
-    @Override
-    public Coder<T> getDefaultOutputCoder() {
-      return coder;
-    }
-
-    /**
-     * A {@link com.google.cloud.dataflow.sdk.io.FileBasedSource.FileBasedReader FileBasedReader}
-     * which can decode records delimited by new line characters.
-     *
-     * See {@link TextSource} for further details.
-     */
-    @VisibleForTesting
-    static class TextBasedReader<T> extends FileBasedReader<T> {
-      private static final int READ_BUFFER_SIZE = 8192;
-      private final Coder<T> coder;
-      private final ByteBuffer readBuffer = ByteBuffer.allocate(READ_BUFFER_SIZE);
-      private ByteString buffer;
-      private int startOfSeparatorInBuffer;
-      private int endOfSeparatorInBuffer;
-      private long startOfNextRecord;
-      private boolean eof;
-      private boolean elementIsPresent;
-      private T currentValue;
-      private ReadableByteChannel inChannel;
-
-      private TextBasedReader(TextSource<T> source) {
-        super(source);
-        coder = source.coder;
-        buffer = ByteString.EMPTY;
-      }
-
-      @Override
-      protected long getCurrentOffset() throws NoSuchElementException {
-        if (!elementIsPresent) {
-          throw new NoSuchElementException();
-        }
-        return startOfNextRecord;
-      }
-
-      @Override
-      public T getCurrent() throws NoSuchElementException {
-        if (!elementIsPresent) {
-          throw new NoSuchElementException();
-        }
-        return currentValue;
-      }
-
-      @Override
-      protected void startReading(ReadableByteChannel channel) throws IOException {
-        this.inChannel = channel;
-        // If the first offset is greater than zero, we need to skip bytes until we see our
-        // first separator.
-        if (getCurrentSource().getStartOffset() > 0) {
-          checkState(channel instanceof SeekableByteChannel,
-              "%s only supports reading from a SeekableByteChannel when given a start offset"
-              + " greater than 0.", TextSource.class.getSimpleName());
-          long requiredPosition = getCurrentSource().getStartOffset() - 1;
-          ((SeekableByteChannel) channel).position(requiredPosition);
-          findSeparatorBounds();
-          buffer = buffer.substring(endOfSeparatorInBuffer);
-          startOfNextRecord = requiredPosition + endOfSeparatorInBuffer;
-          endOfSeparatorInBuffer = 0;
-          startOfSeparatorInBuffer = 0;
-        }
-      }
-
-      /**
-       * Locates the start position and end position of the next delimiter. Will
-       * consume the channel till either EOF or the delimiter bounds are found.
-       *
-       * <p>This fills the buffer and updates the positions as follows:
-       * <pre>{@code
-       * ------------------------------------------------------
-       * | element bytes | delimiter bytes | unconsumed bytes |
-       * ------------------------------------------------------
-       * 0            start of          end of              buffer
-       *              separator         separator           size
-       *              in buffer         in buffer
-       * }</pre>
-       */
-      private void findSeparatorBounds() throws IOException {
-        int bytePositionInBuffer = 0;
-        while (true) {
-          if (!tryToEnsureNumberOfBytesInBuffer(bytePositionInBuffer + 1)) {
-            startOfSeparatorInBuffer = endOfSeparatorInBuffer = bytePositionInBuffer;
-            break;
-          }
-
-          byte currentByte = buffer.byteAt(bytePositionInBuffer);
-
-          if (currentByte == '\n') {
-            startOfSeparatorInBuffer = bytePositionInBuffer;
-            endOfSeparatorInBuffer = startOfSeparatorInBuffer + 1;
-            break;
-          } else if (currentByte == '\r') {
-            startOfSeparatorInBuffer = bytePositionInBuffer;
-            endOfSeparatorInBuffer = startOfSeparatorInBuffer + 1;
-
-            if (tryToEnsureNumberOfBytesInBuffer(bytePositionInBuffer + 2)) {
-              currentByte = buffer.byteAt(bytePositionInBuffer + 1);
-              if (currentByte == '\n') {
-                endOfSeparatorInBuffer += 1;
-              }
-            }
-            break;
-          }
-
-          // Move to the next byte in buffer.
-          bytePositionInBuffer += 1;
-        }
-      }
-
-      @Override
-      protected boolean readNextRecord() throws IOException {
-        startOfNextRecord += endOfSeparatorInBuffer;
-        findSeparatorBounds();
-
-        // If we have reached EOF file and consumed all of the buffer then we know
-        // that there are no more records.
-        if (eof && buffer.size() == 0) {
-          elementIsPresent = false;
-          return false;
-        }
-
-        decodeCurrentElement();
-        return true;
-      }
-
-      /**
-       * Decodes the current element updating the buffer to only contain the unconsumed bytes.
-       *
-       * This invalidates the currently stored {@code startOfSeparatorInBuffer} and
-       * {@code endOfSeparatorInBuffer}.
-       */
-      private void decodeCurrentElement() throws IOException {
-        ByteString dataToDecode = buffer.substring(0, startOfSeparatorInBuffer);
-        currentValue = coder.decode(dataToDecode.newInput(), Context.OUTER);
-        elementIsPresent = true;
-        buffer = buffer.substring(endOfSeparatorInBuffer);
-      }
-
-      /**
-       * Returns false if we were unable to ensure the minimum capacity by consuming the channel.
-       */
-      private boolean tryToEnsureNumberOfBytesInBuffer(int minCapacity) throws IOException {
-        // While we aren't at EOF or haven't fulfilled the minimum buffer capacity,
-        // attempt to read more bytes.
-        while (buffer.size() <= minCapacity && !eof) {
-          eof = inChannel.read(readBuffer) == -1;
-          readBuffer.flip();
-          buffer = buffer.concat(ByteString.copyFrom(readBuffer));
-          readBuffer.clear();
-        }
-        // Return true if we were able to honor the minimum buffer capacity request
-        return buffer.size() >= minCapacity;
-      }
+  private static <T> void evaluateReadHelper(
+      Read.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
+    TextReader<T> reader =
+        new TextReader<>(transform.filepattern, true, null, null, transform.coder,
+            transform.getCompressionType());
+    try {
+      List<T> elems = ReaderUtils.readAllFromReader(reader);
+      context.setPCollection(context.getOutput(transform), elems);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index f7217f7610bb7..1d14254c249c8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -1042,6 +1042,8 @@ private <T> void translateHelper(
         DataflowPipelineRunner.StreamingPubsubIOWrite.class,
         new PubsubIOTranslator.WriteTranslator());
 
+    registerTransformTranslator(
+        TextIO.Read.Bound.class, new TextIOTranslator.ReadTranslator());
     registerTransformTranslator(
         TextIO.Write.Bound.class, new TextIOTranslator.WriteTranslator());
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
index 47a1926764c7e..2513d431cb683 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
@@ -18,7 +18,6 @@
 
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceToDictionary;
 
-import com.google.cloud.dataflow.sdk.io.FileBasedSource;
 import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.Source;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
@@ -41,15 +40,6 @@ public static <T> void translateReadHelper(Source<T> source,
       PTransform<?, ? extends PValue> transform,
       DataflowPipelineTranslator.TranslationContext context) {
     try {
-      // TODO: Move this validation out of translation once IOChannelUtils is portable
-      // and can be reconstructed on the worker.
-      if (source instanceof FileBasedSource) {
-        String filePatternOrSpec = ((FileBasedSource<?>) source).getFileOrPatternSpec();
-        context.getPipelineOptions()
-               .getPathValidator()
-               .validateInputFilePatternSupported(filePatternOrSpec);
-      }
-
       context.addStep(transform, "ParallelRead");
       context.addInput(PropertyNames.FORMAT, PropertyNames.CUSTOM_SOURCE_FORMAT);
       context.addInput(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
index d6c96c31bd80a..abdf07fa58697 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
@@ -29,6 +29,40 @@
  * TextIO transform support code for the Dataflow backend.
  */
 public class TextIOTranslator {
+
+  /**
+   * Implements TextIO Read translation for the Dataflow backend.
+   */
+  @SuppressWarnings({"rawtypes", "unchecked"})
+  public static class ReadTranslator implements TransformTranslator<TextIO.Read.Bound> {
+    @Override
+    public void translate(
+        TextIO.Read.Bound transform,
+        TranslationContext context) {
+      translateReadHelper(transform, context);
+    }
+
+    private <T> void translateReadHelper(
+        TextIO.Read.Bound<T> transform,
+        TranslationContext context) {
+      if (context.getPipelineOptions().isStreaming()) {
+        throw new IllegalArgumentException("TextIO not supported in streaming mode.");
+      }
+
+      PathValidator validator = context.getPipelineOptions().getPathValidator();
+      String filepattern = validator.validateInputFilePatternSupported(transform.getFilepattern());
+
+      context.addStep(transform, "ParallelRead");
+      // TODO: How do we want to specify format and
+      // format-specific properties?
+      context.addInput(PropertyNames.FORMAT, "text");
+      context.addInput(PropertyNames.FILEPATTERN, filepattern);
+      context.addValueOnlyOutput(PropertyNames.OUTPUT, context.getOutput(transform));
+      context.addInput(PropertyNames.VALIDATE_SOURCE, transform.needsValidation());
+      context.addInput(PropertyNames.COMPRESSION_TYPE, transform.getCompressionType().toString());
+    }
+  }
+
   /**
    * Implements TextIO Write translation for the Dataflow backend.
    */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
index 6ad81e4ea0c4a..abd8333bc42db 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -20,7 +20,7 @@
 import static com.google.cloud.dataflow.sdk.TestUtils.LINES_ARRAY;
 import static com.google.cloud.dataflow.sdk.TestUtils.NO_INTS_ARRAY;
 import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES_ARRAY;
-import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
@@ -30,12 +30,9 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.io.TextIO.CompressionType;
-import com.google.cloud.dataflow.sdk.io.TextIO.TextSource;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.SourceTestUtils;
 import com.google.cloud.dataflow.sdk.testing.TestDataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
@@ -46,7 +43,6 @@
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PDone;
-import com.google.common.collect.ImmutableList;
 
 import org.junit.Rule;
 import org.junit.Test;
@@ -55,8 +51,6 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.mockito.Mockito;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
 
 import java.io.BufferedReader;
 import java.io.File;
@@ -64,11 +58,8 @@
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.PrintStream;
-import java.nio.channels.FileChannel;
+import java.nio.ByteBuffer;
 import java.nio.channels.SeekableByteChannel;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.StandardOpenOption;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -83,31 +74,60 @@ public class TextIOTest {
   @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
   @Rule public ExpectedException expectedException = ExpectedException.none();
 
+  private static class EmptySeekableByteChannel implements SeekableByteChannel {
+    @Override
+    public long position() {
+      return 0L;
+    }
+
+    @Override
+    public SeekableByteChannel position(long newPosition) {
+      return this;
+    }
+
+    @Override
+    public long size() {
+      return 0L;
+    }
+
+    @Override
+    public SeekableByteChannel truncate(long size) {
+      return this;
+    }
+
+    @Override
+    public int write(ByteBuffer src) {
+      return 0;
+    }
+
+    @Override
+    public int read(ByteBuffer dst) {
+      return 0;
+    }
+
+    @Override
+    public boolean isOpen() {
+      return true;
+    }
+
+    @Override
+    public void close() { }
+  }
+
   private GcsUtil buildMockGcsUtil() throws IOException {
     GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class);
 
     // Any request to open gets a new bogus channel
     Mockito
         .when(mockGcsUtil.open(Mockito.any(GcsPath.class)))
-        .then(new Answer<SeekableByteChannel>() {
-          @Override
-          public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
-            return FileChannel.open(
-                Files.createTempFile("channel-", ".tmp"),
-                StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
-          }
-        });
-
-    // Any request for expansion returns a list containing the original GcsPath
-    // This is required to pass validation that occurs in TextIO during apply()
+        .thenReturn(new EmptySeekableByteChannel());
+
+    // Any request for expansion gets a single bogus URL
+    // after we first run the expansion code (which will generally
+    // return no results, which causes a crash we aren't testing)
     Mockito
         .when(mockGcsUtil.expand(Mockito.any(GcsPath.class)))
-        .then(new Answer<List<GcsPath>>() {
-          @Override
-          public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
-            return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
-          }
-        });
+        .thenReturn(Arrays.asList(GcsPath.fromUri("gs://bucket/foo")));
 
     return mockGcsUtil;
   }
@@ -169,31 +189,25 @@ public void testReadEmptyInts() throws Exception {
   }
 
   @Test
-  public void testReadNulls() throws Exception {
-    runTestRead(new Void[]{ null, null, null }, VoidCoder.of());
-  }
-
-  @Test
-  public void testReadNamed() throws Exception {
-    String file = tmpFolder.newFile().getAbsolutePath();
+  public void testReadNamed() {
     Pipeline p = TestPipeline.create();
 
     {
       PCollection<String> output1 =
-          p.apply(TextIO.Read.from(file));
-      assertEquals("TextIO.Read/Read.out", output1.getName());
+          p.apply(TextIO.Read.from("/tmp/file.txt"));
+      assertEquals("TextIO.Read.out", output1.getName());
     }
 
     {
       PCollection<String> output2 =
-          p.apply(TextIO.Read.named("MyRead").from(file));
-      assertEquals("MyRead/Read.out", output2.getName());
+          p.apply(TextIO.Read.named("MyRead").from("/tmp/file.txt"));
+      assertEquals("MyRead.out", output2.getName());
     }
 
     {
       PCollection<String> output3 =
-          p.apply(TextIO.Read.from(file).named("HerRead"));
-      assertEquals("HerRead/Read.out", output3.getName());
+          p.apply(TextIO.Read.from("/tmp/file.txt").named("HerRead"));
+      assertEquals("HerRead.out", output3.getName());
     }
   }
 
@@ -364,11 +378,12 @@ private void applyRead(Pipeline pipeline, String path) {
   public void testBadWildcardRecursive() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
-    // Check that applying does fail.
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo**/baz"));
+
+    // Check that running does fail.
     expectedException.expect(IllegalArgumentException.class);
     expectedException.expectMessage("wildcard");
-
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo**/baz"));
+    pipeline.run();
   }
 
   @Test
@@ -396,7 +411,7 @@ public void testCompressionTypeIsSet() throws Exception {
   @Test
   public void testCompressedRead() throws Exception {
     String[] lines = {"Irritable eagle", "Optimistic jay", "Fanciful hawk"};
-    File tmpFile = tmpFolder.newFile();
+    File tmpFile = tmpFolder.newFile("test");
     String filename = tmpFile.getPath();
 
     List<String> expected = new ArrayList<>();
@@ -416,12 +431,14 @@ public void testCompressedRead() throws Exception {
 
     DataflowAssert.that(output).containsInAnyOrder(expected);
     p.run();
+
+    tmpFile.delete();
   }
 
   @Test
   public void testGZIPReadWhenUncompressed() throws Exception {
     String[] lines = {"Meritorious condor", "Obnoxious duck"};
-    File tmpFile = tmpFolder.newFile();
+    File tmpFile = tmpFolder.newFile("test");
     String filename = tmpFile.getPath();
 
     List<String> expected = new ArrayList<>();
@@ -439,6 +456,8 @@ public void testGZIPReadWhenUncompressed() throws Exception {
 
     DataflowAssert.that(output).containsInAnyOrder(expected);
     p.run();
+
+    tmpFile.delete();
   }
 
   @Test
@@ -452,133 +471,4 @@ public void testTextIOGetName() {
     assertEquals(
         "ReadMyFile [TextIO.Read]", TextIO.Read.named("ReadMyFile").from("somefile").toString());
   }
-
-  @Test
-  public void testReadEmptyLines() throws Exception {
-    runTestReadWithData("\n\n\n".getBytes(StandardCharsets.UTF_8),
-        ImmutableList.of("", "", ""));
-  }
-
-  @Test
-  public void testReadFileWithLineFeedDelimiter() throws Exception {
-    runTestReadWithData("asdf\nhjkl\nxyz\n".getBytes(StandardCharsets.UTF_8),
-        ImmutableList.of("asdf", "hjkl", "xyz"));
-  }
-
-  @Test
-  public void testReadFileWithCarriageReturnDelimiter() throws Exception {
-    runTestReadWithData("asdf\rhjkl\rxyz\r".getBytes(StandardCharsets.UTF_8),
-        ImmutableList.of("asdf", "hjkl", "xyz"));
-  }
-
-  @Test
-  public void testReadFileWithCarriageReturnAndLineFeedDelimiter() throws Exception {
-    runTestReadWithData("asdf\r\nhjkl\r\nxyz\r\n".getBytes(StandardCharsets.UTF_8),
-        ImmutableList.of("asdf", "hjkl", "xyz"));
-  }
-
-  @Test
-  public void testReadFileWithMixedDelimiters() throws Exception {
-    runTestReadWithData("asdf\rhjkl\r\nxyz\n".getBytes(StandardCharsets.UTF_8),
-        ImmutableList.of("asdf", "hjkl", "xyz"));
-  }
-
-  @Test
-  public void testReadFileWithLineFeedDelimiterAndNonEmptyBytesAtEnd() throws Exception {
-    runTestReadWithData("asdf\nhjkl\nxyz".getBytes(StandardCharsets.UTF_8),
-        ImmutableList.of("asdf", "hjkl", "xyz"));
-  }
-
-  @Test
-  public void testReadFileWithCarriageReturnDelimiterAndNonEmptyBytesAtEnd() throws Exception {
-    runTestReadWithData("asdf\rhjkl\rxyz".getBytes(StandardCharsets.UTF_8),
-        ImmutableList.of("asdf", "hjkl", "xyz"));
-  }
-
-  @Test
-  public void testReadFileWithCarriageReturnAndLineFeedDelimiterAndNonEmptyBytesAtEnd()
-      throws Exception {
-    runTestReadWithData("asdf\r\nhjkl\r\nxyz".getBytes(StandardCharsets.UTF_8),
-        ImmutableList.of("asdf", "hjkl", "xyz"));
-  }
-
-  @Test
-  public void testReadFileWithMixedDelimitersAndNonEmptyBytesAtEnd() throws Exception {
-    runTestReadWithData("asdf\rhjkl\r\nxyz".getBytes(StandardCharsets.UTF_8),
-        ImmutableList.of("asdf", "hjkl", "xyz"));
-  }
-
-  private void runTestReadWithData(byte[] data, List<String> expectedResults) throws Exception {
-    TextSource<String> source = prepareSource(data);
-    List<String> actual = SourceTestUtils.readFromSource(source, PipelineOptionsFactory.create());
-    assertThat(actual, containsInAnyOrder(new ArrayList<>(expectedResults).toArray(new String[0])));
-  }
-
-  @Test
-  public void testSplittingSourceWithEmptyLines() throws Exception {
-    TextSource<String> source = prepareSource("\n\n\n".getBytes(StandardCharsets.UTF_8));
-    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
-  }
-
-  @Test
-  public void testSplittingSourceWithLineFeedDelimiter() throws Exception {
-    TextSource<String> source = prepareSource("asdf\nhjkl\nxyz\n".getBytes(StandardCharsets.UTF_8));
-    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
-  }
-
-  @Test
-  public void testSplittingSourceWithCarriageReturnDelimiter() throws Exception {
-    TextSource<String> source = prepareSource("asdf\rhjkl\rxyz\r".getBytes(StandardCharsets.UTF_8));
-    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
-  }
-
-  @Test
-  public void testSplittingSourceWithCarriageReturnAndLineFeedDelimiter() throws Exception {
-    TextSource<String> source = prepareSource(
-        "asdf\r\nhjkl\r\nxyz\r\n".getBytes(StandardCharsets.UTF_8));
-    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
-  }
-
-  @Test
-  public void testSplittingSourceWithMixedDelimiters() throws Exception {
-    TextSource<String> source = prepareSource(
-        "asdf\rhjkl\r\nxyz\n".getBytes(StandardCharsets.UTF_8));
-    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
-  }
-
-  @Test
-  public void testSplittingSourceWithLineFeedDelimiterAndNonEmptyBytesAtEnd() throws Exception {
-    TextSource<String> source = prepareSource("asdf\nhjkl\nxyz".getBytes(StandardCharsets.UTF_8));
-    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
-  }
-
-  @Test
-  public void testSplittingSourceWithCarriageReturnDelimiterAndNonEmptyBytesAtEnd()
-      throws Exception {
-    TextSource<String> source = prepareSource("asdf\rhjkl\rxyz".getBytes(StandardCharsets.UTF_8));
-    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
-  }
-
-  @Test
-  public void testSplittingSourceWithCarriageReturnAndLineFeedDelimiterAndNonEmptyBytesAtEnd()
-      throws Exception {
-    TextSource<String> source = prepareSource(
-        "asdf\r\nhjkl\r\nxyz".getBytes(StandardCharsets.UTF_8));
-    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
-  }
-
-  @Test
-  public void testSplittingSourceWithMixedDelimitersAndNonEmptyBytesAtEnd() throws Exception {
-    TextSource<String> source = prepareSource("asdf\rhjkl\r\nxyz".getBytes(StandardCharsets.UTF_8));
-    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
-  }
-
-  private TextSource<String> prepareSource(byte[] data) throws IOException {
-    File file = tmpFolder.newFile();
-    Files.write(file.toPath(), data);
-
-    TextSource<String> source = new TextSource<>(file.toPath().toString(), StringUtf8Coder.of());
-
-    return source;
-  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index c7175cb3b05f5..6d9f90f39ef61 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -19,7 +19,6 @@
 import static com.google.cloud.dataflow.sdk.util.WindowedValue.valueInGlobalWindow;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.containsString;
-import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.startsWith;
 import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
 import static org.junit.Assert.assertEquals;
@@ -96,22 +95,18 @@
 import org.joda.time.Instant;
 import org.junit.Rule;
 import org.junit.Test;
-import org.junit.internal.matchers.ThrowableMessageMatcher;
 import org.junit.rules.ExpectedException;
 import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.mockito.ArgumentCaptor;
 import org.mockito.Mockito;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
 
 import java.io.File;
 import java.io.IOException;
 import java.net.URL;
 import java.net.URLClassLoader;
 import java.nio.channels.FileChannel;
-import java.nio.channels.SeekableByteChannel;
 import java.nio.file.Files;
 import java.nio.file.StandardOpenOption;
 import java.util.ArrayList;
@@ -180,23 +175,12 @@ private static Dataflow buildMockDataflow(
 
   private GcsUtil buildMockGcsUtil(boolean bucketExists) throws IOException {
     GcsUtil mockGcsUtil = mock(GcsUtil.class);
-    when(mockGcsUtil.create(any(GcsPath.class), anyString()))
-        .then(new Answer<SeekableByteChannel>() {
-              @Override
-              public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
-                return FileChannel.open(
-                    Files.createTempFile("channel-", ".tmp"),
-                    StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
-              }
-            });
-
+    when(mockGcsUtil.create(
+        any(GcsPath.class), anyString()))
+        .thenReturn(FileChannel.open(
+            Files.createTempFile("channel-", ".tmp"),
+            StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE));
     when(mockGcsUtil.isGcsPatternSupported(anyString())).thenReturn(true);
-    when(mockGcsUtil.expand(any(GcsPath.class))).then(new Answer<List<GcsPath>>() {
-      @Override
-      public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
-        return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
-      }
-    });
     when(mockGcsUtil.bucketExists(any(GcsPath.class))).thenReturn(bucketExists);
     return mockGcsUtil;
   }
@@ -441,12 +425,10 @@ public void testNonGcsFilePathInReadFailure() throws IOException {
     ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
 
     Pipeline p = buildDataflowPipeline(buildPipelineOptions(jobCaptor));
-    p.apply(TextIO.Read.named("ReadMyNonGcsFile").from(tmpFolder.newFile().getPath()));
+    p.apply(TextIO.Read.named("ReadMyNonGcsFile").from("/tmp/file"));
 
-    thrown.expectCause(Matchers.allOf(
-        instanceOf(IllegalArgumentException.class),
-        ThrowableMessageMatcher.hasMessage(
-            containsString("expected a valid 'gs://' path but was given"))));
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage(containsString("expected a valid 'gs://' path but was given"));
     p.run();
     assertValidJob(jobCaptor.getValue());
   }
@@ -473,9 +455,8 @@ public void testMultiSlashGcsFileReadPath() throws IOException {
     p.apply(TextIO.Read.named("ReadInvalidGcsFile")
         .from("gs://bucket/tmp//file"));
 
-    thrown.expectCause(Matchers.allOf(
-        instanceOf(IllegalArgumentException.class),
-        ThrowableMessageMatcher.hasMessage(containsString("consecutive slashes"))));
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("consecutive slashes");
     p.run();
     assertValidJob(jobCaptor.getValue());
   }
@@ -1145,7 +1126,6 @@ public void testToIsmRecordForMapLikeDoFnWithoutUniqueKeysThrowsException() thro
 
     try {
       doFnTester.processBatch(inputElements);
-      fail("Expected UserCodeException");
     } catch (UserCodeException e) {
       assertTrue(e.getCause() instanceof IllegalStateException);
       IllegalStateException rootCause = (IllegalStateException) e.getCause();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index b9c94ad00b86b..98efc001a92b8 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -19,8 +19,6 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.addObject;
 import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-import static org.hamcrest.Matchers.containsString;
-import static org.hamcrest.Matchers.instanceOf;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
@@ -68,17 +66,13 @@
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterables;
 
-import org.hamcrest.Matchers;
 import org.junit.Assert;
 import org.junit.Rule;
 import org.junit.Test;
-import org.junit.internal.matchers.ThrowableMessageMatcher;
 import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.mockito.ArgumentMatcher;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
 
 import java.io.IOException;
 import java.util.Collections;
@@ -144,12 +138,6 @@ private static Dataflow buildMockDataflow(
 
   private static DataflowPipelineOptions buildPipelineOptions() throws IOException {
     GcsUtil mockGcsUtil = mock(GcsUtil.class);
-    when(mockGcsUtil.expand(any(GcsPath.class))).then(new Answer<List<GcsPath>>() {
-      @Override
-      public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
-        return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
-      }
-    });
     when(mockGcsUtil.bucketExists(any(GcsPath.class))).thenReturn(true);
     when(mockGcsUtil.isGcsPatternSupported(anyString())).thenCallRealMethod();
 
@@ -647,9 +635,8 @@ public void testBadWildcardRecursive() throws Exception {
     pipeline.apply(TextIO.Read.from("gs://bucket/foo**/baz"));
 
     // Check that translation does fail.
-    thrown.expectCause(Matchers.allOf(
-        instanceOf(IllegalArgumentException.class),
-        ThrowableMessageMatcher.hasMessage(containsString("Unsupported wildcard usage"))));
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Unsupported wildcard usage");
     t.translate(pipeline, pipeline.getRunner(), Collections.<DataflowPackage>emptyList());
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
index f1b7cd7fd0169..cb34e5c117e04 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
@@ -18,15 +18,10 @@
 
 import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.not;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.transforms.Count;
 import com.google.cloud.dataflow.sdk.transforms.Create;
@@ -39,13 +34,11 @@
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PValue;
 
-import org.junit.Rule;
+import org.junit.Assert;
 import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-import java.io.File;
 import java.util.Arrays;
 import java.util.EnumSet;
 
@@ -54,7 +47,6 @@
  */
 @RunWith(JUnit4.class)
 public class TransformTreeTest {
-  @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
 
   enum TransformsSeen {
     READ,
@@ -111,14 +103,11 @@ protected Coder<?> getDefaultOutputCoder() {
   // visits the nodes and verifies that the hierarchy was captured.
   @Test
   public void testCompositeCapture() throws Exception {
-    File inputFile = tmpFolder.newFile();
-    File outputFile = tmpFolder.newFile();
-
     Pipeline p = DirectPipeline.createForTest();
 
-    p.apply(TextIO.Read.named("ReadMyFile").from(inputFile.getPath()))
+    p.apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/object"))
         .apply(Sample.<String>any(10))
-        .apply(TextIO.Write.named("WriteMyFile").to(outputFile.getPath()));
+        .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/object"));
 
     final EnumSet<TransformsSeen> visited =
         EnumSet.noneOf(TransformsSeen.class);
@@ -130,19 +119,19 @@ public void testCompositeCapture() throws Exception {
       public void enterCompositeTransform(TransformTreeNode node) {
         PTransform<?, ?> transform = node.getTransform();
         if (transform instanceof Sample.SampleAny) {
-          assertTrue(visited.add(TransformsSeen.SAMPLE_ANY));
-          assertNotNull(node.getEnclosingNode());
-          assertTrue(node.isCompositeNode());
+          Assert.assertTrue(visited.add(TransformsSeen.SAMPLE_ANY));
+          Assert.assertNotNull(node.getEnclosingNode());
+          Assert.assertTrue(node.isCompositeNode());
         }
-        assertThat(transform, not(instanceOf(Read.Bounded.class)));
-        assertThat(transform, not(instanceOf(TextIO.Write.Bound.class)));
+        Assert.assertThat(transform, not(instanceOf(TextIO.Read.Bound.class)));
+        Assert.assertThat(transform, not(instanceOf(TextIO.Write.Bound.class)));
       }
 
       @Override
       public void leaveCompositeTransform(TransformTreeNode node) {
         PTransform<?, ?> transform = node.getTransform();
         if (transform instanceof Sample.SampleAny) {
-          assertTrue(left.add(TransformsSeen.SAMPLE_ANY));
+          Assert.assertTrue(left.add(TransformsSeen.SAMPLE_ANY));
         }
       }
 
@@ -150,11 +139,11 @@ public void leaveCompositeTransform(TransformTreeNode node) {
       public void visitTransform(TransformTreeNode node) {
         PTransform<?, ?> transform = node.getTransform();
         // Pick is a composite, should not be visited here.
-        assertThat(transform, not(instanceOf(Sample.SampleAny.class)));
-        if (transform instanceof Read.Bounded) {
-          assertTrue(visited.add(TransformsSeen.READ));
+        Assert.assertThat(transform, not(instanceOf(Sample.SampleAny.class)));
+        if (transform instanceof TextIO.Read.Bound) {
+          Assert.assertTrue(visited.add(TransformsSeen.READ));
         } else if (transform instanceof TextIO.Write.Bound) {
-          assertTrue(visited.add(TransformsSeen.WRITE));
+          Assert.assertTrue(visited.add(TransformsSeen.WRITE));
         }
       }
 
@@ -163,8 +152,8 @@ public void visitValue(PValue value, TransformTreeNode producer) {
       }
     });
 
-    assertTrue(visited.equals(EnumSet.allOf(TransformsSeen.class)));
-    assertTrue(left.equals(EnumSet.of(TransformsSeen.SAMPLE_ANY)));
+    Assert.assertTrue(visited.equals(EnumSet.allOf(TransformsSeen.class)));
+    Assert.assertTrue(left.equals(EnumSet.of(TransformsSeen.SAMPLE_ANY)));
   }
 
   @Test(expected = IllegalStateException.class)
@@ -174,7 +163,7 @@ public void testOutputChecking() throws Exception {
     p.apply(new InvalidCompositeTransform());
 
     p.traverseTopologically(new RecordingPipelineVisitor());
-    fail("traversal should have failed with an IllegalStateException");
+    Assert.fail("traversal should have failed with an IllegalStateException");
   }
 
   @Test

From ba55083b3144325f079c9fe9aca9a127178b8bbb Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Fri, 19 Feb 2016 10:34:21 -0800
Subject: [PATCH 1486/1541] Clear empty CopyOnAccessInMemoryStateInternals

InMemoryState.clear() marks the state as cleared, but does not remove
the state proper from the StateTable. If a StateTable is completely
empty, we should discard it, as there is no guarantee that we will ever
see elements for the key it is associated with again.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115077034
---
 .../CopyOnAccessInMemoryStateInternals.java   | 32 +++++++++++
 ...opyOnAccessInMemoryStateInternalsTest.java | 56 ++++++++++++++++++-
 2 files changed, 87 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java
index 2a8fa47d88fc6..496897698749b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java
@@ -25,9 +25,12 @@
 import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals.InMemoryState;
 import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
 import com.google.common.base.Optional;
+import com.google.common.collect.Iterables;
 
 import org.joda.time.Instant;
 
+import java.util.Collection;
+import java.util.HashSet;
 import java.util.Map;
 
 import javax.annotation.Nullable;
@@ -102,6 +105,10 @@ public K getKey() {
     return key;
   }
 
+  public boolean isEmpty() {
+    return Iterables.isEmpty(table.values());
+  }
+
   /**
    * A {@link StateTable} that, when a value is retrieved with
    * {@link StateTable#get(StateNamespace, StateTag)}, first attempts to obtain a copy of existing
@@ -167,6 +174,7 @@ private void commit() {
         }
       }
       earliestWatermarkHold = Optional.of(earliestHold);
+      clearEmpty();
       binderFactory = new InMemoryStateBinderFactory<>(key);
       underlying = Optional.absent();
     }
@@ -187,6 +195,30 @@ private Instant getEarliestWatermarkHold() {
       return earliest;
     }
 
+    /**
+     * Clear all empty {@link StateNamespace StateNamespaces} from this table. If all states are
+     * empty, clear the entire table.
+     *
+     * <p>Because {@link InMemoryState} is not removed from the {@link StateTable} after it is
+     * cleared, in case contents are modified after being cleared, the table must be explicitly
+     * checked to ensure that it contains state and removed if not (otherwise we may never use
+     * the table again).
+     */
+    private void clearEmpty() {
+      Collection<StateNamespace> emptyNamespaces = new HashSet<>(this.getNamespacesInUse());
+      for (StateNamespace namespace : this.getNamespacesInUse()) {
+        for (State existingState : this.getTagsInUse(namespace).values()) {
+          if (!((InMemoryState<?>) existingState).isCleared()) {
+            emptyNamespaces.remove(namespace);
+            break;
+          }
+        }
+      }
+      for (StateNamespace empty : emptyNamespaces) {
+        this.clearNamespace(empty);
+      }
+    }
+
     @Override
     protected StateBinder<K> binderForNamespace(final StateNamespace namespace) {
       return binderFactory.forNamespace(namespace);
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternalsTest.java
index b9f12536f41fd..00887857ed943 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternalsTest.java
@@ -22,6 +22,9 @@
 import static org.hamcrest.Matchers.nullValue;
 import static org.hamcrest.Matchers.theInstance;
 import static org.junit.Assert.assertThat;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.verify;
 
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
@@ -262,6 +265,7 @@ public void testCommitWithoutUnderlying() {
 
     BagState<String> reReadStringBag = internals.state(namespace, bagTag);
     assertThat(reReadStringBag.get().read(), containsInAnyOrder("baz", "bar"));
+    assertThat(internals.isEmpty(), is(false));
   }
 
   @Test
@@ -288,6 +292,7 @@ public void testCommitWithUnderlying() {
     BagState<String> underlyingState = underlying.state(namespace, bagTag);
     assertThat(underlyingState.get().read(), containsInAnyOrder("spam", "bar", "baz"));
     assertThat(underlyingState, is(theInstance(stringBag)));
+    assertThat(internals.isEmpty(), is(false));
   }
 
   @Test
@@ -295,7 +300,7 @@ public void testCommitWithClearedInUnderlying() {
     CopyOnAccessInMemoryStateInternals<String> underlying =
         CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
     CopyOnAccessInMemoryStateInternals<String> secondUnderlying =
-        CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
+        spy(CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying));
     CopyOnAccessInMemoryStateInternals<String> internals =
         CopyOnAccessInMemoryStateInternals.withUnderlying(key, secondUnderlying);
 
@@ -316,6 +321,8 @@ public void testCommitWithClearedInUnderlying() {
     internals.commit();
     BagState<String> internalsStringBag = internals.state(namespace, bagTag);
     assertThat(internalsStringBag.get().read(), emptyIterable());
+    verify(secondUnderlying, never()).state(namespace, bagTag);
+    assertThat(internals.isEmpty(), is(false));
   }
 
   @Test
@@ -373,6 +380,53 @@ public void testCommitWithAddedUnderlying() {
     assertThat(reReadUnderlyingState.get().read(), containsInAnyOrder("bar", "baz"));
   }
 
+  @Test
+  public void testCommitWithEmptyTableIsEmpty() {
+    CopyOnAccessInMemoryStateInternals<String> internals =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
+
+    internals.commit();
+
+    assertThat(internals.isEmpty(), is(true));
+  }
+
+  @Test
+  public void testCommitWithOnlyClearedValuesIsEmpty() {
+    CopyOnAccessInMemoryStateInternals<String> internals =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
+
+    StateNamespace namespace = new StateNamespaceForTest("foo");
+    StateTag<Object, BagState<String>> bagTag = StateTags.bag("foo", StringUtf8Coder.of());
+    BagState<String> stringBag = internals.state(namespace, bagTag);
+    assertThat(stringBag.get().read(), emptyIterable());
+
+    stringBag.add("foo");
+    stringBag.clear();
+
+    internals.commit();
+
+    assertThat(internals.isEmpty(), is(true));
+  }
+
+  @Test
+  public void testCommitWithEmptyNewAndFullUnderlyingIsNotEmpty() {
+    CopyOnAccessInMemoryStateInternals<String> underlying =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
+    CopyOnAccessInMemoryStateInternals<String> internals =
+        CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
+
+    StateNamespace namespace = new StateNamespaceForTest("foo");
+    StateTag<Object, BagState<String>> bagTag = StateTags.bag("foo", StringUtf8Coder.of());
+    BagState<String> stringBag = underlying.state(namespace, bagTag);
+    assertThat(stringBag.get().read(), emptyIterable());
+
+    stringBag.add("bar");
+    stringBag.add("baz");
+
+    internals.commit();
+    assertThat(internals.isEmpty(), is(false));
+  }
+
   @Test
   public void testGetEarliestWatermarkHoldAfterCommit() {
     BoundedWindow first = new BoundedWindow() {

From 9005cf56fa665321e7da0eb675e7898d581432ca Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Fri, 19 Feb 2016 11:50:08 -0800
Subject: [PATCH 1487/1541] Add Timers and State to InProcessTransformResult

Move ImmutableInProcessTransformResult to SingleStepTransformResult.
This more accurately describes the item than 'Immutable', especially as
the structure contains multiple mutable elements.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115085535
---
 .../inprocess/InProcessTransformResult.java   | 31 +++++++--
 ...rmResult.java => StepTransformResult.java} | 67 +++++++++++++++----
 .../BoundedReadEvaluatorFactory.java          |  4 +-
 .../evaluator/FlattenEvaluatorFactory.java    |  4 +-
 .../evaluator/GroupByKeyEvaluatorFactory.java |  6 +-
 .../evaluator/ParDoInProcessEvaluator.java    |  4 +-
 .../UnboundedReadEvaluatorFactory.java        |  4 +-
 .../evaluator/ViewEvaluatorFactory.java       |  4 +-
 8 files changed, 94 insertions(+), 30 deletions(-)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/{util/ImmutableInProcessTransformResult.java => StepTransformResult.java} (62%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTransformResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTransformResult.java
index c44f77c8633c6..c9cfb6610cf4c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTransformResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTransformResult.java
@@ -16,9 +16,12 @@
 package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.state.CopyOnAccessInMemoryStateInternals;
 
 import org.joda.time.Instant;
 
@@ -29,7 +32,7 @@
  */
 public interface InProcessTransformResult {
   /**
-   * @return the {@link AppliedPTransform} that produced this result
+   * Returns the {@link AppliedPTransform} that produced this result.
    */
   AppliedPTransform<?, ?, ?> getTransform();
 
@@ -40,13 +43,33 @@ public interface InProcessTransformResult {
   Iterable<? extends UncommittedBundle<?>> getOutputBundles();
 
   /**
-   * @return the {@link CounterSet} used by this {@link PTransform}, or null if this transform did
-   *         not use a {@link CounterSet}
+   * Returns the {@link CounterSet} used by this {@link PTransform}, or null if this transform did
+   * not use a {@link CounterSet}.
    */
   @Nullable CounterSet getCounters();
 
   /**
-   * @return the Watermark Hold for the transform at the time this result was produced
+   * Returns the Watermark Hold for the transform at the time this result was produced.
+   *
+   * If the transform does not set any watermark hold, returns
+   * {@link BoundedWindow#TIMESTAMP_MAX_VALUE}.
    */
   Instant getWatermarkHold();
+
+  /**
+   * Returns the State used by the transform.
+   *
+   * If this evaluation did not access state, this may return null.
+   */
+  CopyOnAccessInMemoryStateInternals<?> getState();
+
+  /**
+   * Returns a TimerUpdateBuilder that was produced as a result of this evaluation. If the
+   * evaluation was triggered due to the delivery of one or more timers, those timers must be added
+   * to the builder before it is complete.
+   *
+   * <p>If this evaluation did not add or remove any timers, returns an empty TimerUpdate.
+   */
+  TimerUpdate getTimerUpdate();
+
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/ImmutableInProcessTransformResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/StepTransformResult.java
similarity index 62%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/ImmutableInProcessTransformResult.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/StepTransformResult.java
index c55887f784af0..54cdbf03f5743 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/ImmutableInProcessTransformResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/StepTransformResult.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 Google Inc.
+ * Copyright (C) 2016 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
@@ -13,13 +13,15 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.state.CopyOnAccessInMemoryStateInternals;
+import com.google.common.base.MoreObjects;
 import com.google.common.collect.ImmutableList;
 
 import org.joda.time.Instant;
@@ -29,19 +31,25 @@
 /**
  * An immutable {@link InProcessTransformResult}.
  */
-public class ImmutableInProcessTransformResult implements InProcessTransformResult {
+public class StepTransformResult implements InProcessTransformResult {
   private final AppliedPTransform<?, ?, ?> transform;
   private final Iterable<? extends UncommittedBundle<?>> bundles;
+  private final CopyOnAccessInMemoryStateInternals<?> state;
+  private final TimerUpdate timerUpdate;
   private final CounterSet counters;
   private final Instant watermarkHold;
 
-  private ImmutableInProcessTransformResult(
+  private StepTransformResult(
       AppliedPTransform<?, ?, ?> transform,
       Iterable<? extends UncommittedBundle<?>> outputBundles,
+      CopyOnAccessInMemoryStateInternals<?> state,
+      TimerUpdate timerUpdate,
       CounterSet counters,
       Instant watermarkHold) {
     this.transform = transform;
     this.bundles = outputBundles;
+    this.state = state;
+    this.timerUpdate = timerUpdate;
     this.counters = counters;
     this.watermarkHold = watermarkHold;
   }
@@ -66,8 +74,17 @@ public Instant getWatermarkHold() {
     return watermarkHold;
   }
 
-  public static Builder withHold(AppliedPTransform<?, ?, ?> transform,
-      Instant watermarkHold) {
+  @Override
+  public CopyOnAccessInMemoryStateInternals<?> getState() {
+    return state;
+  }
+
+  @Override
+  public TimerUpdate getTimerUpdate() {
+    return timerUpdate;
+  }
+
+  public static Builder withHold(AppliedPTransform<?, ?, ?> transform, Instant watermarkHold) {
     return new Builder(transform, watermarkHold);
   }
 
@@ -75,25 +92,39 @@ public static Builder withoutHold(AppliedPTransform<?, ?, ?> transform) {
     return new Builder(transform, BoundedWindow.TIMESTAMP_MAX_VALUE);
   }
 
+  @Override
+  public String toString() {
+    return MoreObjects.toStringHelper(StepTransformResult.class)
+        .add("transform", transform)
+        .toString();
+  }
+
   /**
-   * A builder for creating instances of {@link ImmutableInProcessTransformResult}.
+   * A builder for creating instances of {@link StepTransformResult}.
    */
   public static class Builder {
     private final AppliedPTransform<?, ?, ?> transform;
     private final ImmutableList.Builder<UncommittedBundle<?>> bundlesBuilder;
+    private CopyOnAccessInMemoryStateInternals<?> state;
+    private TimerUpdate timerUpdate;
     private CounterSet counters;
     private final Instant watermarkHold;
 
-    public Builder(AppliedPTransform<?, ?, ?> transform,
-        Instant watermarkHold) {
+    private Builder(AppliedPTransform<?, ?, ?> transform, Instant watermarkHold) {
       this.transform = transform;
       this.watermarkHold = watermarkHold;
       this.bundlesBuilder = ImmutableList.builder();
+      this.timerUpdate = TimerUpdate.builder(null).build();
     }
 
-    public ImmutableInProcessTransformResult build() {
-      return new ImmutableInProcessTransformResult(
-          transform, bundlesBuilder.build(), counters, watermarkHold);
+    public StepTransformResult build() {
+      return new StepTransformResult(
+          transform,
+          bundlesBuilder.build(),
+          state,
+          timerUpdate,
+          counters,
+          watermarkHold);
     }
 
     public Builder withCounters(CounterSet counters) {
@@ -101,6 +132,16 @@ public Builder withCounters(CounterSet counters) {
       return this;
     }
 
+    public Builder withState(CopyOnAccessInMemoryStateInternals<?> state) {
+      this.state = state;
+      return this;
+    }
+
+    public Builder withTimerUpdate(TimerUpdate timerUpdate) {
+      this.timerUpdate = timerUpdate;
+      return this;
+    }
+
     public Builder addOutput(
         UncommittedBundle<?> outputBundle, UncommittedBundle<?>... outputBundles) {
       bundlesBuilder.add(outputBundle);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactory.java
index 7679fa78acc67..58263cb8091ce 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactory.java
@@ -21,9 +21,9 @@
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.StepTransformResult;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.ImmutableInProcessTransformResult;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
@@ -105,7 +105,7 @@ public InProcessTransformResult finishBundle() throws IOException {
                 reader.getCurrent(), reader.getCurrentTimestamp()));
         contentsRemaining = reader.advance();
       }
-      return ImmutableInProcessTransformResult
+      return StepTransformResult
           .withHold(transform, BoundedWindow.TIMESTAMP_MAX_VALUE)
           .addOutput(output)
           .build();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactory.java
index 29910b29b36dc..4bc1683ea1e29 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactory.java
@@ -20,9 +20,9 @@
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.StepTransformResult;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.ImmutableInProcessTransformResult;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.Flatten;
 import com.google.cloud.dataflow.sdk.transforms.Flatten.FlattenPCollectionList;
@@ -54,7 +54,7 @@ private <InputT> TransformEvaluator<InputT> createInMemoryEvaluator(
     final UncommittedBundle<InputT> outputBundle =
         evaluationContext.createBundle(inputBundle, application.getOutput());
     final InProcessTransformResult result =
-        ImmutableInProcessTransformResult.withoutHold(application).addOutput(outputBundle).build();
+        StepTransformResult.withoutHold(application).addOutput(outputBundle).build();
     return new FlattenEvaluator<>(outputBundle, result);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactory.java
index 7598b1e495f32..1cc157b8d8f23 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactory.java
@@ -25,10 +25,10 @@
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.StepTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.StepTransformResult.Builder;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.ImmutableInProcessTransformResult;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.ImmutableInProcessTransformResult.Builder;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
@@ -125,7 +125,7 @@ public void processElement(WindowedValue<KV<K, V>> element) {
 
     @Override
     public InProcessTransformResult finishBundle() {
-      Builder resultBuilder = ImmutableInProcessTransformResult.withoutHold(application);
+      Builder resultBuilder = StepTransformResult.withoutHold(application);
       for (Map.Entry<GroupingKey<K>, List<V>> groupedEntry : groupingMap.entrySet()) {
         K key = groupedEntry.getKey().key;
         KV<K, Iterable<V>> groupedKv = KV.<K, Iterable<V>>of(key, groupedEntry.getValue());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoInProcessEvaluator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoInProcessEvaluator.java
index 797a175f4d316..fdc107ec5f0ac 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoInProcessEvaluator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoInProcessEvaluator.java
@@ -17,7 +17,7 @@
 
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.ImmutableInProcessTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.StepTransformResult;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
@@ -55,7 +55,7 @@ public InProcessTransformResult finishBundle() {
     fnRunner.finishBundle();
     // TODO Use a real value
     Instant hold = BoundedWindow.TIMESTAMP_MAX_VALUE;
-    return ImmutableInProcessTransformResult.withHold(transform, hold)
+    return StepTransformResult.withHold(transform, hold)
         .addOutput(outputBundles)
         .withCounters(counters)
         .build();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactory.java
index e40941eddeaa4..4c383d3787600 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactory.java
@@ -24,9 +24,9 @@
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.StepTransformResult;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.ImmutableInProcessTransformResult;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
@@ -109,7 +109,7 @@ public InProcessTransformResult finishBundle() throws IOException {
       checkpointMark.finalizeCheckpoint();
       // TODO: When exercising create initial splits, make this the minimum across all existing
       // readers
-      return ImmutableInProcessTransformResult.withHold(transform, reader.getWatermark())
+      return StepTransformResult.withHold(transform, reader.getWatermark())
           .addOutput(output)
           .build();
     }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactory.java
index d6518710992da..a78f7236d6eb0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactory.java
@@ -19,9 +19,9 @@
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.PCollectionViewWriter;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
+import com.google.cloud.dataflow.sdk.runners.inprocess.StepTransformResult;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
 import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.ImmutableInProcessTransformResult;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.View.CreatePCollectionView;
@@ -65,7 +65,7 @@ public void processElement(WindowedValue<InT> element) {
       @Override
       public InProcessTransformResult finishBundle() {
         writer.add(elements);
-        return ImmutableInProcessTransformResult.withoutHold(application).build();
+        return StepTransformResult.withoutHold(application).build();
       }
     };
   }

From f801524b864691aacedbf14567b6439fd7087dbd Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Fri, 19 Feb 2016 13:37:06 -0800
Subject: [PATCH 1488/1541] Migrate TextIO.Read to be a custom source

This allows for other runners to utilize the TextIO.Read transform since
it no longer relies on the internal details of the Google Cloud Dataflow
runner.

Note that validation has moved to when the PTransform is being applied.

This is for Apache Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115094963
---
 examples/pom.xml                              |   7 +
 .../examples/MinimalWordCountJava8Test.java   |  45 ++-
 .../google/cloud/dataflow/sdk/io/TextIO.java  | 286 ++++++++++++++++--
 .../runners/DataflowPipelineTranslator.java   |   2 -
 .../sdk/runners/dataflow/ReadTranslator.java  |  10 +
 .../runners/dataflow/TextIOTranslator.java    |  34 ---
 .../cloud/dataflow/sdk/io/TextIOTest.java     | 240 +++++++++++----
 .../runners/DataflowPipelineRunnerTest.java   |  40 ++-
 .../DataflowPipelineTranslatorTest.java       |  17 +-
 .../sdk/runners/TransformTreeTest.java        |  43 ++-
 10 files changed, 561 insertions(+), 163 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index 8e708ced693e4..adea4b129836e 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -498,5 +498,12 @@
       <artifactId>junit</artifactId>
       <version>${junit.version}</version>
     </dependency>
+
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <version>1.9.5</version>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 </project>
diff --git a/examples/src/test/java8/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java b/examples/src/test/java8/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java
index 5286d61a078f8..fcae41c6bb523 100644
--- a/examples/src/test/java8/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java
+++ b/examples/src/test/java8/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java
@@ -18,20 +18,33 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.GcsOptions;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Count;
 import com.google.cloud.dataflow.sdk.transforms.Filter;
 import com.google.cloud.dataflow.sdk.transforms.FlatMapElements;
 import com.google.cloud.dataflow.sdk.transforms.MapElements;
+import com.google.cloud.dataflow.sdk.util.GcsUtil;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.collect.ImmutableList;
 
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
+import org.mockito.Mockito;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
 
+import java.io.IOException;
 import java.io.Serializable;
+import java.nio.channels.FileChannel;
+import java.nio.channels.SeekableByteChannel;
+import java.nio.file.Files;
+import java.nio.file.StandardOpenOption;
 import java.util.Arrays;
+import java.util.List;
 
 /**
  * To keep {@link MinimalWordCountJava8} simple, it is not factored or testable. This test
@@ -44,8 +57,9 @@ public class MinimalWordCountJava8Test implements Serializable {
    * A basic smoke test that ensures there is no crash at pipeline construction time.
    */
   @Test
-  public void testMinimalWordCountJava8() {
+  public void testMinimalWordCountJava8() throws Exception {
     Pipeline p = TestPipeline.create();
+    p.getOptions().as(GcsOptions.class).setGcsUtil(buildMockGcsUtil());
 
     p.apply(TextIO.Read.from("gs://dataflow-samples/shakespeare/*"))
      .apply(FlatMapElements.via((String word) -> Arrays.asList(word.split("[^a-zA-Z']+")))
@@ -57,4 +71,33 @@ public void testMinimalWordCountJava8() {
          .withOutputType(new TypeDescriptor<String>() {}))
      .apply(TextIO.Write.to("gs://YOUR_OUTPUT_BUCKET/AND_OUTPUT_PREFIX"));
   }
+
+  private GcsUtil buildMockGcsUtil() throws IOException {
+    GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class);
+
+    // Any request to open gets a new bogus channel
+    Mockito
+        .when(mockGcsUtil.open(Mockito.any(GcsPath.class)))
+        .then(new Answer<SeekableByteChannel>() {
+          @Override
+          public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
+            return FileChannel.open(
+                Files.createTempFile("channel-", ".tmp"),
+                StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
+          }
+        });
+
+    // Any request for expansion returns a list containing the original GcsPath
+    // This is required to pass validation that occurs in TextIO during apply()
+    Mockito
+        .when(mockGcsUtil.expand(Mockito.any(GcsPath.class)))
+        .then(new Answer<List<GcsPath>>() {
+          @Override
+          public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
+            return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
+          }
+        });
+
+    return mockGcsUtil;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index bf72f6bf197f8..71e755eebe651 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -16,32 +16,41 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
+import static com.google.common.base.Preconditions.checkState;
+
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.Context;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.io.Read.Bounded;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils;
 import com.google.cloud.dataflow.sdk.runners.worker.TextReader;
 import com.google.cloud.dataflow.sdk.runners.worker.TextSink;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.io.ByteStreams;
 import com.google.common.primitives.Ints;
+import com.google.protobuf.ByteString;
 
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
 
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.PushbackInputStream;
+import java.nio.ByteBuffer;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.SeekableByteChannel;
 import java.util.List;
+import java.util.NoSuchElementException;
 import java.util.regex.Pattern;
 import java.util.zip.GZIPInputStream;
 
@@ -203,7 +212,7 @@ public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
       @Nullable private final String filepattern;
 
       /** The Coder to use to decode each line. */
-      @Nullable private final Coder<T> coder;
+      private final Coder<T> coder;
 
       /** An option to indicate if input validation is desired. Default is true. */
       private final boolean validate;
@@ -292,14 +301,48 @@ public PCollection<T> apply(PInput input) {
         if (filepattern == null) {
           throw new IllegalStateException("need to set the filepattern of a TextIO.Read transform");
         }
-        // Force the output's Coder to be what the read is using, and
-        // unchangeable later, to ensure that we read the input in the
-        // format specified by the Read transform.
-        return PCollection.<T>createPrimitiveOutputInternal(
-                input.getPipeline(),
-                WindowingStrategy.globalDefault(),
-                IsBounded.BOUNDED)
-            .setCoder(coder);
+
+        if (validate) {
+          try {
+            checkState(
+                !IOChannelUtils.getFactory(filepattern).match(filepattern).isEmpty(),
+                "Unable to find any files matching %s",
+                filepattern);
+          } catch (IOException e) {
+            throw new IllegalStateException(
+                String.format("Failed to validate %s", filepattern), e);
+          }
+        }
+
+        // Create a source specific to the requested compression type.
+        final Bounded<T> read;
+        switch(compressionType) {
+          case UNCOMPRESSED:
+            read = com.google.cloud.dataflow.sdk.io.Read.from(
+                new TextSource<T>(filepattern, coder));
+            break;
+          case AUTO:
+            read = com.google.cloud.dataflow.sdk.io.Read.from(
+                CompressedSource.from(new TextSource<T>(filepattern, coder)));
+            break;
+          case BZIP2:
+            read = com.google.cloud.dataflow.sdk.io.Read.from(
+                CompressedSource.from(new TextSource<T>(filepattern, coder))
+                                .withDecompression(CompressedSource.CompressionMode.BZIP2));
+            break;
+          case GZIP:
+            read = com.google.cloud.dataflow.sdk.io.Read.from(
+                CompressedSource.from(new TextSource<T>(filepattern, coder))
+                                .withDecompression(CompressedSource.CompressionMode.GZIP));
+            break;
+          default:
+            throw new IllegalArgumentException("Unknown compression mode: " + compressionType);
+        }
+
+        PCollection<T> pcol = input.getPipeline().apply("Read", read);
+        // Honor the default output coder that would have been used by this PTransform.
+        pcol.setCoder(getDefaultOutputCoder());
+        return pcol;
       }
 
       @Override
@@ -318,17 +361,6 @@ public boolean needsValidation() {
       public TextIO.CompressionType getCompressionType() {
         return compressionType;
       }
-
-      static {
-        DirectPipelineRunner.registerDefaultTransformEvaluator(
-            Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
-              @Override
-              public void evaluate(
-                  Bound transform, DirectPipelineRunner.EvaluationContext context) {
-                evaluateReadHelper(transform, context);
-              }
-            });
-      }
     }
 
     /** Disallow construction of utility classes. */
@@ -781,16 +813,204 @@ private static void validateOutputComponent(String partialFilePattern) {
   /** Disable construction of utility class. */
   private TextIO() {}
 
-  private static <T> void evaluateReadHelper(
-      Read.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
-    TextReader<T> reader =
-        new TextReader<>(transform.filepattern, true, null, null, transform.coder,
-            transform.getCompressionType());
-    try {
-      List<T> elems = ReaderUtils.readAllFromReader(reader);
-      context.setPCollection(context.getOutput(transform), elems);
-    } catch (IOException e) {
-      throw new RuntimeException(e);
+  /**
+   * A {@link FileBasedSource} which can decode records delimited by new line characters.
+   *
+   * <p>This source splits the data into records using {@code UTF-8} {@code \n}, {@code \r}, or
+   * {@code \r\n} as the delimiter. This source is not strict and supports decoding the last record
+   * even if it is not delimited. Finally, no records are decoded if the stream is empty.
+   *
+   * <p>This source supports reading from any arbitrary byte position within the stream. If the
+   * starting position is not {@code 0}, then bytes are skipped until the first delimiter is found
+   * representing the beginning of the first record to be decoded.
+   */
+  @VisibleForTesting
+  static class TextSource<T> extends FileBasedSource<T> {
+    /** The Coder to use to decode each line. */
+    private final Coder<T> coder;
+
+    @VisibleForTesting
+    TextSource(String fileSpec, Coder<T> coder) {
+      super(fileSpec, 1L);
+      this.coder = coder;
+    }
+
+    private TextSource(String fileName, long start, long end, Coder<T> coder) {
+      super(fileName, 1L, start, end);
+      this.coder = coder;
+    }
+
+    @Override
+    protected FileBasedSource<T> createForSubrangeOfFile(String fileName, long start, long end) {
+      return new TextSource<>(fileName, start, end, coder);
+    }
+
+    @Override
+    protected FileBasedReader<T> createSingleFileReader(PipelineOptions options) {
+      return new TextBasedReader<>(this);
+    }
+
+    @Override
+    public boolean producesSortedKeys(PipelineOptions options) throws Exception {
+      return false;
+    }
+
+    @Override
+    public Coder<T> getDefaultOutputCoder() {
+      return coder;
+    }
+
+    /**
+     * A {@link com.google.cloud.dataflow.sdk.io.FileBasedSource.FileBasedReader FileBasedReader}
+     * which can decode records delimited by new line characters.
+     *
+     * See {@link TextSource} for further details.
+     */
+    @VisibleForTesting
+    static class TextBasedReader<T> extends FileBasedReader<T> {
+      private static final int READ_BUFFER_SIZE = 8192;
+      private final Coder<T> coder;
+      private final ByteBuffer readBuffer = ByteBuffer.allocate(READ_BUFFER_SIZE);
+      private ByteString buffer;
+      private int startOfSeparatorInBuffer;
+      private int endOfSeparatorInBuffer;
+      private long startOfNextRecord;
+      private boolean eof;
+      private boolean elementIsPresent;
+      private T currentValue;
+      private ReadableByteChannel inChannel;
+
+      private TextBasedReader(TextSource<T> source) {
+        super(source);
+        coder = source.coder;
+        buffer = ByteString.EMPTY;
+      }
+
+      @Override
+      protected long getCurrentOffset() throws NoSuchElementException {
+        if (!elementIsPresent) {
+          throw new NoSuchElementException();
+        }
+        return startOfNextRecord;
+      }
+
+      @Override
+      public T getCurrent() throws NoSuchElementException {
+        if (!elementIsPresent) {
+          throw new NoSuchElementException();
+        }
+        return currentValue;
+      }
+
+      @Override
+      protected void startReading(ReadableByteChannel channel) throws IOException {
+        this.inChannel = channel;
+        // If the first offset is greater than zero, we need to skip bytes until we see our
+        // first separator.
+        if (getCurrentSource().getStartOffset() > 0) {
+          checkState(channel instanceof SeekableByteChannel,
+              "%s only supports reading from a SeekableByteChannel when given a start offset"
+              + " greater than 0.", TextSource.class.getSimpleName());
+          long requiredPosition = getCurrentSource().getStartOffset() - 1;
+          ((SeekableByteChannel) channel).position(requiredPosition);
+          findSeparatorBounds();
+          buffer = buffer.substring(endOfSeparatorInBuffer);
+          startOfNextRecord = requiredPosition + endOfSeparatorInBuffer;
+          endOfSeparatorInBuffer = 0;
+          startOfSeparatorInBuffer = 0;
+        }
+      }
+
+      /**
+       * Locates the start position and end position of the next delimiter. Will
+       * consume the channel till either EOF or the delimiter bounds are found.
+       *
+       * <p>This fills the buffer and updates the positions as follows:
+       * <pre>{@code
+       * ------------------------------------------------------
+       * | element bytes | delimiter bytes | unconsumed bytes |
+       * ------------------------------------------------------
+       * 0            start of          end of              buffer
+       *              separator         separator           size
+       *              in buffer         in buffer
+       * }</pre>
+       */
+      private void findSeparatorBounds() throws IOException {
+        int bytePositionInBuffer = 0;
+        while (true) {
+          if (!tryToEnsureNumberOfBytesInBuffer(bytePositionInBuffer + 1)) {
+            startOfSeparatorInBuffer = endOfSeparatorInBuffer = bytePositionInBuffer;
+            break;
+          }
+
+          byte currentByte = buffer.byteAt(bytePositionInBuffer);
+
+          if (currentByte == '\n') {
+            startOfSeparatorInBuffer = bytePositionInBuffer;
+            endOfSeparatorInBuffer = startOfSeparatorInBuffer + 1;
+            break;
+          } else if (currentByte == '\r') {
+            startOfSeparatorInBuffer = bytePositionInBuffer;
+            endOfSeparatorInBuffer = startOfSeparatorInBuffer + 1;
+
+            if (tryToEnsureNumberOfBytesInBuffer(bytePositionInBuffer + 2)) {
+              currentByte = buffer.byteAt(bytePositionInBuffer + 1);
+              if (currentByte == '\n') {
+                endOfSeparatorInBuffer += 1;
+              }
+            }
+            break;
+          }
+
+          // Move to the next byte in buffer.
+          bytePositionInBuffer += 1;
+        }
+      }
+
+      @Override
+      protected boolean readNextRecord() throws IOException {
+        startOfNextRecord += endOfSeparatorInBuffer;
+        findSeparatorBounds();
+
+        // If we have reached EOF file and consumed all of the buffer then we know
+        // that there are no more records.
+        if (eof && buffer.size() == 0) {
+          elementIsPresent = false;
+          return false;
+        }
+
+        decodeCurrentElement();
+        return true;
+      }
+
+      /**
+       * Decodes the current element updating the buffer to only contain the unconsumed bytes.
+       *
+       * This invalidates the currently stored {@code startOfSeparatorInBuffer} and
+       * {@code endOfSeparatorInBuffer}.
+       */
+      private void decodeCurrentElement() throws IOException {
+        ByteString dataToDecode = buffer.substring(0, startOfSeparatorInBuffer);
+        currentValue = coder.decode(dataToDecode.newInput(), Context.OUTER);
+        elementIsPresent = true;
+        buffer = buffer.substring(endOfSeparatorInBuffer);
+      }
+
+      /**
+       * Returns false if we were unable to ensure the minimum capacity by consuming the channel.
+       */
+      private boolean tryToEnsureNumberOfBytesInBuffer(int minCapacity) throws IOException {
+        // While we aren't at EOF or haven't fulfilled the minimum buffer capacity,
+        // attempt to read more bytes.
+        while (buffer.size() <= minCapacity && !eof) {
+          eof = inChannel.read(readBuffer) == -1;
+          readBuffer.flip();
+          buffer = buffer.concat(ByteString.copyFrom(readBuffer));
+          readBuffer.clear();
+        }
+        // Return true if we were able to honor the minimum buffer capacity request
+        return buffer.size() >= minCapacity;
+      }
     }
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 1d14254c249c8..f7217f7610bb7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -1042,8 +1042,6 @@ private <T> void translateHelper(
         DataflowPipelineRunner.StreamingPubsubIOWrite.class,
         new PubsubIOTranslator.WriteTranslator());
 
-    registerTransformTranslator(
-        TextIO.Read.Bound.class, new TextIOTranslator.ReadTranslator());
     registerTransformTranslator(
         TextIO.Write.Bound.class, new TextIOTranslator.WriteTranslator());
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
index 2513d431cb683..47a1926764c7e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
@@ -18,6 +18,7 @@
 
 import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceToDictionary;
 
+import com.google.cloud.dataflow.sdk.io.FileBasedSource;
 import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.Source;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
@@ -40,6 +41,15 @@ public static <T> void translateReadHelper(Source<T> source,
       PTransform<?, ? extends PValue> transform,
       DataflowPipelineTranslator.TranslationContext context) {
     try {
+      // TODO: Move this validation out of translation once IOChannelUtils is portable
+      // and can be reconstructed on the worker.
+      if (source instanceof FileBasedSource) {
+        String filePatternOrSpec = ((FileBasedSource<?>) source).getFileOrPatternSpec();
+        context.getPipelineOptions()
+               .getPathValidator()
+               .validateInputFilePatternSupported(filePatternOrSpec);
+      }
+
       context.addStep(transform, "ParallelRead");
       context.addInput(PropertyNames.FORMAT, PropertyNames.CUSTOM_SOURCE_FORMAT);
       context.addInput(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
index abdf07fa58697..d6c96c31bd80a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
@@ -29,40 +29,6 @@
  * TextIO transform support code for the Dataflow backend.
  */
 public class TextIOTranslator {
-
-  /**
-   * Implements TextIO Read translation for the Dataflow backend.
-   */
-  @SuppressWarnings({"rawtypes", "unchecked"})
-  public static class ReadTranslator implements TransformTranslator<TextIO.Read.Bound> {
-    @Override
-    public void translate(
-        TextIO.Read.Bound transform,
-        TranslationContext context) {
-      translateReadHelper(transform, context);
-    }
-
-    private <T> void translateReadHelper(
-        TextIO.Read.Bound<T> transform,
-        TranslationContext context) {
-      if (context.getPipelineOptions().isStreaming()) {
-        throw new IllegalArgumentException("TextIO not supported in streaming mode.");
-      }
-
-      PathValidator validator = context.getPipelineOptions().getPathValidator();
-      String filepattern = validator.validateInputFilePatternSupported(transform.getFilepattern());
-
-      context.addStep(transform, "ParallelRead");
-      // TODO: How do we want to specify format and
-      // format-specific properties?
-      context.addInput(PropertyNames.FORMAT, "text");
-      context.addInput(PropertyNames.FILEPATTERN, filepattern);
-      context.addValueOnlyOutput(PropertyNames.OUTPUT, context.getOutput(transform));
-      context.addInput(PropertyNames.VALIDATE_SOURCE, transform.needsValidation());
-      context.addInput(PropertyNames.COMPRESSION_TYPE, transform.getCompressionType().toString());
-    }
-  }
-
   /**
    * Implements TextIO Write translation for the Dataflow backend.
    */
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
index abd8333bc42db..6ad81e4ea0c4a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -20,7 +20,7 @@
 import static com.google.cloud.dataflow.sdk.TestUtils.LINES_ARRAY;
 import static com.google.cloud.dataflow.sdk.TestUtils.NO_INTS_ARRAY;
 import static com.google.cloud.dataflow.sdk.TestUtils.NO_LINES_ARRAY;
-import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
+import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
@@ -30,9 +30,12 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.io.TextIO.CompressionType;
+import com.google.cloud.dataflow.sdk.io.TextIO.TextSource;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.SourceTestUtils;
 import com.google.cloud.dataflow.sdk.testing.TestDataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
@@ -43,6 +46,7 @@
 import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PDone;
+import com.google.common.collect.ImmutableList;
 
 import org.junit.Rule;
 import org.junit.Test;
@@ -51,6 +55,8 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.mockito.Mockito;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
 
 import java.io.BufferedReader;
 import java.io.File;
@@ -58,8 +64,11 @@
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.PrintStream;
-import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
 import java.nio.channels.SeekableByteChannel;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.StandardOpenOption;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -74,60 +83,31 @@ public class TextIOTest {
   @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
   @Rule public ExpectedException expectedException = ExpectedException.none();
 
-  private static class EmptySeekableByteChannel implements SeekableByteChannel {
-    @Override
-    public long position() {
-      return 0L;
-    }
-
-    @Override
-    public SeekableByteChannel position(long newPosition) {
-      return this;
-    }
-
-    @Override
-    public long size() {
-      return 0L;
-    }
-
-    @Override
-    public SeekableByteChannel truncate(long size) {
-      return this;
-    }
-
-    @Override
-    public int write(ByteBuffer src) {
-      return 0;
-    }
-
-    @Override
-    public int read(ByteBuffer dst) {
-      return 0;
-    }
-
-    @Override
-    public boolean isOpen() {
-      return true;
-    }
-
-    @Override
-    public void close() { }
-  }
-
   private GcsUtil buildMockGcsUtil() throws IOException {
     GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class);
 
     // Any request to open gets a new bogus channel
     Mockito
         .when(mockGcsUtil.open(Mockito.any(GcsPath.class)))
-        .thenReturn(new EmptySeekableByteChannel());
-
-    // Any request for expansion gets a single bogus URL
-    // after we first run the expansion code (which will generally
-    // return no results, which causes a crash we aren't testing)
+        .then(new Answer<SeekableByteChannel>() {
+          @Override
+          public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
+            return FileChannel.open(
+                Files.createTempFile("channel-", ".tmp"),
+                StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
+          }
+        });
+
+    // Any request for expansion returns a list containing the original GcsPath
+    // This is required to pass validation that occurs in TextIO during apply()
     Mockito
         .when(mockGcsUtil.expand(Mockito.any(GcsPath.class)))
-        .thenReturn(Arrays.asList(GcsPath.fromUri("gs://bucket/foo")));
+        .then(new Answer<List<GcsPath>>() {
+          @Override
+          public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
+            return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
+          }
+        });
 
     return mockGcsUtil;
   }
@@ -189,25 +169,31 @@ public void testReadEmptyInts() throws Exception {
   }
 
   @Test
-  public void testReadNamed() {
+  public void testReadNulls() throws Exception {
+    runTestRead(new Void[]{ null, null, null }, VoidCoder.of());
+  }
+
+  @Test
+  public void testReadNamed() throws Exception {
+    String file = tmpFolder.newFile().getAbsolutePath();
     Pipeline p = TestPipeline.create();
 
     {
       PCollection<String> output1 =
-          p.apply(TextIO.Read.from("/tmp/file.txt"));
-      assertEquals("TextIO.Read.out", output1.getName());
+          p.apply(TextIO.Read.from(file));
+      assertEquals("TextIO.Read/Read.out", output1.getName());
     }
 
     {
       PCollection<String> output2 =
-          p.apply(TextIO.Read.named("MyRead").from("/tmp/file.txt"));
-      assertEquals("MyRead.out", output2.getName());
+          p.apply(TextIO.Read.named("MyRead").from(file));
+      assertEquals("MyRead/Read.out", output2.getName());
     }
 
     {
       PCollection<String> output3 =
-          p.apply(TextIO.Read.from("/tmp/file.txt").named("HerRead"));
-      assertEquals("HerRead.out", output3.getName());
+          p.apply(TextIO.Read.from(file).named("HerRead"));
+      assertEquals("HerRead/Read.out", output3.getName());
     }
   }
 
@@ -378,12 +364,11 @@ private void applyRead(Pipeline pipeline, String path) {
   public void testBadWildcardRecursive() throws Exception {
     Pipeline pipeline = TestPipeline.create();
 
-    pipeline.apply(TextIO.Read.from("gs://bucket/foo**/baz"));
-
-    // Check that running does fail.
+    // Check that applying does fail.
     expectedException.expect(IllegalArgumentException.class);
     expectedException.expectMessage("wildcard");
-    pipeline.run();
+
+    pipeline.apply(TextIO.Read.from("gs://bucket/foo**/baz"));
   }
 
   @Test
@@ -411,7 +396,7 @@ public void testCompressionTypeIsSet() throws Exception {
   @Test
   public void testCompressedRead() throws Exception {
     String[] lines = {"Irritable eagle", "Optimistic jay", "Fanciful hawk"};
-    File tmpFile = tmpFolder.newFile("test");
+    File tmpFile = tmpFolder.newFile();
     String filename = tmpFile.getPath();
 
     List<String> expected = new ArrayList<>();
@@ -431,14 +416,12 @@ public void testCompressedRead() throws Exception {
 
     DataflowAssert.that(output).containsInAnyOrder(expected);
     p.run();
-
-    tmpFile.delete();
   }
 
   @Test
   public void testGZIPReadWhenUncompressed() throws Exception {
     String[] lines = {"Meritorious condor", "Obnoxious duck"};
-    File tmpFile = tmpFolder.newFile("test");
+    File tmpFile = tmpFolder.newFile();
     String filename = tmpFile.getPath();
 
     List<String> expected = new ArrayList<>();
@@ -456,8 +439,6 @@ public void testGZIPReadWhenUncompressed() throws Exception {
 
     DataflowAssert.that(output).containsInAnyOrder(expected);
     p.run();
-
-    tmpFile.delete();
   }
 
   @Test
@@ -471,4 +452,133 @@ public void testTextIOGetName() {
     assertEquals(
         "ReadMyFile [TextIO.Read]", TextIO.Read.named("ReadMyFile").from("somefile").toString());
   }
+
+  @Test
+  public void testReadEmptyLines() throws Exception {
+    runTestReadWithData("\n\n\n".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("", "", ""));
+  }
+
+  @Test
+  public void testReadFileWithLineFeedDelimiter() throws Exception {
+    runTestReadWithData("asdf\nhjkl\nxyz\n".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  @Test
+  public void testReadFileWithCarriageReturnDelimiter() throws Exception {
+    runTestReadWithData("asdf\rhjkl\rxyz\r".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  @Test
+  public void testReadFileWithCarriageReturnAndLineFeedDelimiter() throws Exception {
+    runTestReadWithData("asdf\r\nhjkl\r\nxyz\r\n".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  @Test
+  public void testReadFileWithMixedDelimiters() throws Exception {
+    runTestReadWithData("asdf\rhjkl\r\nxyz\n".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  @Test
+  public void testReadFileWithLineFeedDelimiterAndNonEmptyBytesAtEnd() throws Exception {
+    runTestReadWithData("asdf\nhjkl\nxyz".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  @Test
+  public void testReadFileWithCarriageReturnDelimiterAndNonEmptyBytesAtEnd() throws Exception {
+    runTestReadWithData("asdf\rhjkl\rxyz".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  @Test
+  public void testReadFileWithCarriageReturnAndLineFeedDelimiterAndNonEmptyBytesAtEnd()
+      throws Exception {
+    runTestReadWithData("asdf\r\nhjkl\r\nxyz".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  @Test
+  public void testReadFileWithMixedDelimitersAndNonEmptyBytesAtEnd() throws Exception {
+    runTestReadWithData("asdf\rhjkl\r\nxyz".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  private void runTestReadWithData(byte[] data, List<String> expectedResults) throws Exception {
+    TextSource<String> source = prepareSource(data);
+    List<String> actual = SourceTestUtils.readFromSource(source, PipelineOptionsFactory.create());
+    assertThat(actual, containsInAnyOrder(new ArrayList<>(expectedResults).toArray(new String[0])));
+  }
+
+  @Test
+  public void testSplittingSourceWithEmptyLines() throws Exception {
+    TextSource<String> source = prepareSource("\n\n\n".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithLineFeedDelimiter() throws Exception {
+    TextSource<String> source = prepareSource("asdf\nhjkl\nxyz\n".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithCarriageReturnDelimiter() throws Exception {
+    TextSource<String> source = prepareSource("asdf\rhjkl\rxyz\r".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithCarriageReturnAndLineFeedDelimiter() throws Exception {
+    TextSource<String> source = prepareSource(
+        "asdf\r\nhjkl\r\nxyz\r\n".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithMixedDelimiters() throws Exception {
+    TextSource<String> source = prepareSource(
+        "asdf\rhjkl\r\nxyz\n".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithLineFeedDelimiterAndNonEmptyBytesAtEnd() throws Exception {
+    TextSource<String> source = prepareSource("asdf\nhjkl\nxyz".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithCarriageReturnDelimiterAndNonEmptyBytesAtEnd()
+      throws Exception {
+    TextSource<String> source = prepareSource("asdf\rhjkl\rxyz".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithCarriageReturnAndLineFeedDelimiterAndNonEmptyBytesAtEnd()
+      throws Exception {
+    TextSource<String> source = prepareSource(
+        "asdf\r\nhjkl\r\nxyz".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithMixedDelimitersAndNonEmptyBytesAtEnd() throws Exception {
+    TextSource<String> source = prepareSource("asdf\rhjkl\r\nxyz".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  private TextSource<String> prepareSource(byte[] data) throws IOException {
+    File file = tmpFolder.newFile();
+    Files.write(file.toPath(), data);
+
+    TextSource<String> source = new TextSource<>(file.toPath().toString(), StringUtf8Coder.of());
+
+    return source;
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 6d9f90f39ef61..c7175cb3b05f5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -19,6 +19,7 @@
 import static com.google.cloud.dataflow.sdk.util.WindowedValue.valueInGlobalWindow;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.startsWith;
 import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
 import static org.junit.Assert.assertEquals;
@@ -95,18 +96,22 @@
 import org.joda.time.Instant;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.internal.matchers.ThrowableMessageMatcher;
 import org.junit.rules.ExpectedException;
 import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.mockito.ArgumentCaptor;
 import org.mockito.Mockito;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
 
 import java.io.File;
 import java.io.IOException;
 import java.net.URL;
 import java.net.URLClassLoader;
 import java.nio.channels.FileChannel;
+import java.nio.channels.SeekableByteChannel;
 import java.nio.file.Files;
 import java.nio.file.StandardOpenOption;
 import java.util.ArrayList;
@@ -175,12 +180,23 @@ private static Dataflow buildMockDataflow(
 
   private GcsUtil buildMockGcsUtil(boolean bucketExists) throws IOException {
     GcsUtil mockGcsUtil = mock(GcsUtil.class);
-    when(mockGcsUtil.create(
-        any(GcsPath.class), anyString()))
-        .thenReturn(FileChannel.open(
-            Files.createTempFile("channel-", ".tmp"),
-            StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE));
+    when(mockGcsUtil.create(any(GcsPath.class), anyString()))
+        .then(new Answer<SeekableByteChannel>() {
+              @Override
+              public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
+                return FileChannel.open(
+                    Files.createTempFile("channel-", ".tmp"),
+                    StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
+              }
+            });
+
     when(mockGcsUtil.isGcsPatternSupported(anyString())).thenReturn(true);
+    when(mockGcsUtil.expand(any(GcsPath.class))).then(new Answer<List<GcsPath>>() {
+      @Override
+      public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
+        return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
+      }
+    });
     when(mockGcsUtil.bucketExists(any(GcsPath.class))).thenReturn(bucketExists);
     return mockGcsUtil;
   }
@@ -425,10 +441,12 @@ public void testNonGcsFilePathInReadFailure() throws IOException {
     ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
 
     Pipeline p = buildDataflowPipeline(buildPipelineOptions(jobCaptor));
-    p.apply(TextIO.Read.named("ReadMyNonGcsFile").from("/tmp/file"));
+    p.apply(TextIO.Read.named("ReadMyNonGcsFile").from(tmpFolder.newFile().getPath()));
 
-    thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage(containsString("expected a valid 'gs://' path but was given"));
+    thrown.expectCause(Matchers.allOf(
+        instanceOf(IllegalArgumentException.class),
+        ThrowableMessageMatcher.hasMessage(
+            containsString("expected a valid 'gs://' path but was given"))));
     p.run();
     assertValidJob(jobCaptor.getValue());
   }
@@ -455,8 +473,9 @@ public void testMultiSlashGcsFileReadPath() throws IOException {
     p.apply(TextIO.Read.named("ReadInvalidGcsFile")
         .from("gs://bucket/tmp//file"));
 
-    thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage("consecutive slashes");
+    thrown.expectCause(Matchers.allOf(
+        instanceOf(IllegalArgumentException.class),
+        ThrowableMessageMatcher.hasMessage(containsString("consecutive slashes"))));
     p.run();
     assertValidJob(jobCaptor.getValue());
   }
@@ -1126,6 +1145,7 @@ public void testToIsmRecordForMapLikeDoFnWithoutUniqueKeysThrowsException() thro
 
     try {
       doFnTester.processBatch(inputElements);
+      fail("Expected UserCodeException");
     } catch (UserCodeException e) {
       assertTrue(e.getCause() instanceof IllegalStateException);
       IllegalStateException rootCause = (IllegalStateException) e.getCause();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 98efc001a92b8..b9c94ad00b86b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -19,6 +19,8 @@
 import static com.google.cloud.dataflow.sdk.util.Structs.addObject;
 import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
 import static com.google.cloud.dataflow.sdk.util.Structs.getString;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.instanceOf;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
@@ -66,13 +68,17 @@
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterables;
 
+import org.hamcrest.Matchers;
 import org.junit.Assert;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.internal.matchers.ThrowableMessageMatcher;
 import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 import org.mockito.ArgumentMatcher;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
 
 import java.io.IOException;
 import java.util.Collections;
@@ -138,6 +144,12 @@ private static Dataflow buildMockDataflow(
 
   private static DataflowPipelineOptions buildPipelineOptions() throws IOException {
     GcsUtil mockGcsUtil = mock(GcsUtil.class);
+    when(mockGcsUtil.expand(any(GcsPath.class))).then(new Answer<List<GcsPath>>() {
+      @Override
+      public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
+        return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
+      }
+    });
     when(mockGcsUtil.bucketExists(any(GcsPath.class))).thenReturn(true);
     when(mockGcsUtil.isGcsPatternSupported(anyString())).thenCallRealMethod();
 
@@ -635,8 +647,9 @@ public void testBadWildcardRecursive() throws Exception {
     pipeline.apply(TextIO.Read.from("gs://bucket/foo**/baz"));
 
     // Check that translation does fail.
-    thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage("Unsupported wildcard usage");
+    thrown.expectCause(Matchers.allOf(
+        instanceOf(IllegalArgumentException.class),
+        ThrowableMessageMatcher.hasMessage(containsString("Unsupported wildcard usage"))));
     t.translate(pipeline, pipeline.getRunner(), Collections.<DataflowPackage>emptyList());
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
index cb34e5c117e04..f1b7cd7fd0169 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
@@ -18,10 +18,15 @@
 
 import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.not;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
+import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.transforms.Count;
 import com.google.cloud.dataflow.sdk.transforms.Create;
@@ -34,11 +39,13 @@
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PValue;
 
-import org.junit.Assert;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.File;
 import java.util.Arrays;
 import java.util.EnumSet;
 
@@ -47,6 +54,7 @@
  */
 @RunWith(JUnit4.class)
 public class TransformTreeTest {
+  @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
 
   enum TransformsSeen {
     READ,
@@ -103,11 +111,14 @@ protected Coder<?> getDefaultOutputCoder() {
   // visits the nodes and verifies that the hierarchy was captured.
   @Test
   public void testCompositeCapture() throws Exception {
+    File inputFile = tmpFolder.newFile();
+    File outputFile = tmpFolder.newFile();
+
     Pipeline p = DirectPipeline.createForTest();
 
-    p.apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/object"))
+    p.apply(TextIO.Read.named("ReadMyFile").from(inputFile.getPath()))
         .apply(Sample.<String>any(10))
-        .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/object"));
+        .apply(TextIO.Write.named("WriteMyFile").to(outputFile.getPath()));
 
     final EnumSet<TransformsSeen> visited =
         EnumSet.noneOf(TransformsSeen.class);
@@ -119,19 +130,19 @@ public void testCompositeCapture() throws Exception {
       public void enterCompositeTransform(TransformTreeNode node) {
         PTransform<?, ?> transform = node.getTransform();
         if (transform instanceof Sample.SampleAny) {
-          Assert.assertTrue(visited.add(TransformsSeen.SAMPLE_ANY));
-          Assert.assertNotNull(node.getEnclosingNode());
-          Assert.assertTrue(node.isCompositeNode());
+          assertTrue(visited.add(TransformsSeen.SAMPLE_ANY));
+          assertNotNull(node.getEnclosingNode());
+          assertTrue(node.isCompositeNode());
         }
-        Assert.assertThat(transform, not(instanceOf(TextIO.Read.Bound.class)));
-        Assert.assertThat(transform, not(instanceOf(TextIO.Write.Bound.class)));
+        assertThat(transform, not(instanceOf(Read.Bounded.class)));
+        assertThat(transform, not(instanceOf(TextIO.Write.Bound.class)));
       }
 
       @Override
       public void leaveCompositeTransform(TransformTreeNode node) {
         PTransform<?, ?> transform = node.getTransform();
         if (transform instanceof Sample.SampleAny) {
-          Assert.assertTrue(left.add(TransformsSeen.SAMPLE_ANY));
+          assertTrue(left.add(TransformsSeen.SAMPLE_ANY));
         }
       }
 
@@ -139,11 +150,11 @@ public void leaveCompositeTransform(TransformTreeNode node) {
       public void visitTransform(TransformTreeNode node) {
         PTransform<?, ?> transform = node.getTransform();
         // Pick is a composite, should not be visited here.
-        Assert.assertThat(transform, not(instanceOf(Sample.SampleAny.class)));
-        if (transform instanceof TextIO.Read.Bound) {
-          Assert.assertTrue(visited.add(TransformsSeen.READ));
+        assertThat(transform, not(instanceOf(Sample.SampleAny.class)));
+        if (transform instanceof Read.Bounded) {
+          assertTrue(visited.add(TransformsSeen.READ));
         } else if (transform instanceof TextIO.Write.Bound) {
-          Assert.assertTrue(visited.add(TransformsSeen.WRITE));
+          assertTrue(visited.add(TransformsSeen.WRITE));
         }
       }
 
@@ -152,8 +163,8 @@ public void visitValue(PValue value, TransformTreeNode producer) {
       }
     });
 
-    Assert.assertTrue(visited.equals(EnumSet.allOf(TransformsSeen.class)));
-    Assert.assertTrue(left.equals(EnumSet.of(TransformsSeen.SAMPLE_ANY)));
+    assertTrue(visited.equals(EnumSet.allOf(TransformsSeen.class)));
+    assertTrue(left.equals(EnumSet.of(TransformsSeen.SAMPLE_ANY)));
   }
 
   @Test(expected = IllegalStateException.class)
@@ -163,7 +174,7 @@ public void testOutputChecking() throws Exception {
     p.apply(new InvalidCompositeTransform());
 
     p.traverseTopologically(new RecordingPipelineVisitor());
-    Assert.fail("traversal should have failed with an IllegalStateException");
+    fail("traversal should have failed with an IllegalStateException");
   }
 
   @Test

From d9c4b1e069ec9fa2adcd54b4335852257a5e85db Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Fri, 19 Feb 2016 14:47:49 -0800
Subject: [PATCH 1489/1541] Remove InProcessPipelineRunner evaluator package

Make primitive evaluators package-private. The evaluators are not part
of the API surface for the InProcessPipelineRunner, and are thus
unavailable.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115101650
---
 .../{evaluator => }/BoundedReadEvaluatorFactory.java   |  8 ++------
 .../inprocess/{evaluator => }/EvaluatorKey.java        |  2 +-
 .../{evaluator => }/FlattenEvaluatorFactory.java       |  9 ++-------
 .../{evaluator => }/GroupByKeyEvaluatorFactory.java    |  9 ++-------
 .../inprocess/{evaluator => }/InProcessCreate.java     |  4 ++--
 .../{evaluator => }/ParDoInProcessEvaluator.java       |  4 +---
 .../{evaluator => }/ParDoMultiEvaluatorFactory.java    | 10 +++-------
 .../{evaluator => }/ParDoSingleEvaluatorFactory.java   | 10 +++-------
 .../{evaluator => }/UnboundedReadEvaluatorFactory.java |  8 ++------
 .../{evaluator => }/ViewEvaluatorFactory.java          |  9 ++-------
 .../BoundedReadEvaluatorFactoryTest.java               |  5 +----
 .../{evaluator => }/FlattenEvaluatorFactoryTest.java   |  4 +---
 .../GroupByKeyEvaluatorFactoryTest.java                |  3 +--
 .../inprocess/{evaluator => }/InProcessCreateTest.java |  2 +-
 .../ParDoMultiEvaluatorFactoryTest.java                |  3 +--
 .../ParDoSingleEvaluatorFactoryTest.java               |  3 +--
 .../UnboundedReadEvaluatorFactoryTest.java             |  5 +----
 .../{evaluator => }/ViewEvaluatorFactoryTest.java      |  3 +--
 18 files changed, 28 insertions(+), 73 deletions(-)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/{evaluator => }/BoundedReadEvaluatorFactory.java (90%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/{evaluator => }/EvaluatorKey.java (96%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/{evaluator => }/FlattenEvaluatorFactory.java (85%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/{evaluator => }/GroupByKeyEvaluatorFactory.java (91%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/{evaluator => }/InProcessCreate.java (97%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/{evaluator => }/ParDoInProcessEvaluator.java (92%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/{evaluator => }/ParDoMultiEvaluatorFactory.java (88%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/{evaluator => }/ParDoSingleEvaluatorFactory.java (88%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/{evaluator => }/UnboundedReadEvaluatorFactory.java (91%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/{evaluator => }/ViewEvaluatorFactory.java (82%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/{evaluator => }/BoundedReadEvaluatorFactoryTest.java (93%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/{evaluator => }/FlattenEvaluatorFactoryTest.java (95%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/{evaluator => }/GroupByKeyEvaluatorFactoryTest.java (97%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/{evaluator => }/InProcessCreateTest.java (97%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/{evaluator => }/ParDoMultiEvaluatorFactoryTest.java (97%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/{evaluator => }/ParDoSingleEvaluatorFactoryTest.java (96%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/{evaluator => }/UnboundedReadEvaluatorFactoryTest.java (94%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/{evaluator => }/ViewEvaluatorFactoryTest.java (95%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactory.java
similarity index 90%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactory.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactory.java
index 58263cb8091ce..d11187c99e35d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactory.java
@@ -13,17 +13,13 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import com.google.cloud.dataflow.sdk.io.Read.Bounded;
 import com.google.cloud.dataflow.sdk.io.Source.Reader;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
-import com.google.cloud.dataflow.sdk.runners.inprocess.StepTransformResult;
-import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
-import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
@@ -40,7 +36,7 @@
  * A {@link TransformEvaluatorFactory} that produces {@link TransformEvaluator TransformEvaluators}
  * for the {@link Bounded Read.Bounded} primitive {@link PTransform}.
  */
-public class BoundedReadEvaluatorFactory implements TransformEvaluatorFactory {
+final class BoundedReadEvaluatorFactory implements TransformEvaluatorFactory {
   /*
    * An evaluator for a Source is stateful, to ensure data is not read multiple times.
    * Evaluators are cached here to ensure that the reader is not restarted if the evaluator is
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/EvaluatorKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/EvaluatorKey.java
similarity index 96%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/EvaluatorKey.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/EvaluatorKey.java
index 5f7ff6a6a39b9..745f8f2718a33 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/EvaluatorKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/EvaluatorKey.java
@@ -13,7 +13,7 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactory.java
similarity index 85%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactory.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactory.java
index 4bc1683ea1e29..d8b5312161d0f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactory.java
@@ -13,16 +13,11 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
-import com.google.cloud.dataflow.sdk.runners.inprocess.StepTransformResult;
-import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
-import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.Flatten;
 import com.google.cloud.dataflow.sdk.transforms.Flatten.FlattenPCollectionList;
@@ -35,7 +30,7 @@
  * The {@link InProcessPipelineRunner} {@link TransformEvaluatorFactory} for the {@link Flatten}
  * {@link PTransform}.
  */
-public class FlattenEvaluatorFactory implements TransformEvaluatorFactory {
+class FlattenEvaluatorFactory implements TransformEvaluatorFactory {
   @SuppressWarnings({"unchecked", "rawtypes"})
   @Override
   public <InputT> TransformEvaluator<InputT> forApplication(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactory.java
similarity index 91%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactory.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactory.java
index 1cc157b8d8f23..44d6909274224 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactory.java
@@ -13,22 +13,17 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
-import com.google.cloud.dataflow.sdk.runners.inprocess.StepTransformResult;
 import com.google.cloud.dataflow.sdk.runners.inprocess.StepTransformResult.Builder;
-import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
-import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
@@ -47,7 +42,7 @@
  * The {@link InProcessPipelineRunner} {@link TransformEvaluatorFactory} for the {@link GroupByKey}
  * {@link PTransform}.
  */
-public class GroupByKeyEvaluatorFactory implements TransformEvaluatorFactory {
+class GroupByKeyEvaluatorFactory implements TransformEvaluatorFactory {
   @Override
   @SuppressWarnings({"unchecked", "rawtypes"})
   public <InputT> TransformEvaluator<InputT> forApplication(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/InProcessCreate.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessCreate.java
similarity index 97%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/InProcessCreate.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessCreate.java
index cdd1b01d82620..0ff881f62d532 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/InProcessCreate.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessCreate.java
@@ -13,7 +13,7 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -44,7 +44,7 @@
  * The coder is inferred via the {@link Values#getDefaultOutputCoder(PInput)} method on the original
  * transform.
  */
-public class InProcessCreate<T> extends PTransform<PInput, PCollection<T>> {
+class InProcessCreate<T> extends PTransform<PInput, PCollection<T>> {
   private final Create.Values<T> original;
   private final InMemorySource<T> source;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoInProcessEvaluator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoInProcessEvaluator.java
similarity index 92%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoInProcessEvaluator.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoInProcessEvaluator.java
index fdc107ec5f0ac..a2b083b0af0c0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoInProcessEvaluator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoInProcessEvaluator.java
@@ -13,11 +13,9 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
-import com.google.cloud.dataflow.sdk.runners.inprocess.StepTransformResult;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactory.java
similarity index 88%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactory.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactory.java
index c6ee1345811bb..ad68a6b204f44 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactory.java
@@ -13,18 +13,14 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext.InProcessStepContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
-import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
-import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
-import com.google.cloud.dataflow.sdk.runners.inprocess.evaluator.ParDoInProcessEvaluator.BundleOutputManager;
+import com.google.cloud.dataflow.sdk.runners.inprocess.ParDoInProcessEvaluator.BundleOutputManager;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -44,7 +40,7 @@
  * The {@link InProcessPipelineRunner} {@link TransformEvaluatorFactory} for the
  * {@link BoundMulti} primitive {@link PTransform}.
  */
-public class ParDoMultiEvaluatorFactory implements TransformEvaluatorFactory {
+class ParDoMultiEvaluatorFactory implements TransformEvaluatorFactory {
   @Override
   public <T> TransformEvaluator<T> forApplication(
       AppliedPTransform<?, ?, ?> application,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactory.java
similarity index 88%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactory.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactory.java
index c6e3f8accc943..737d0e99816d9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactory.java
@@ -13,18 +13,14 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext.InProcessStepContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
-import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
-import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
-import com.google.cloud.dataflow.sdk.runners.inprocess.evaluator.ParDoInProcessEvaluator.BundleOutputManager;
+import com.google.cloud.dataflow.sdk.runners.inprocess.ParDoInProcessEvaluator.BundleOutputManager;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo.Bound;
@@ -41,7 +37,7 @@
  * The {@link InProcessPipelineRunner} {@link TransformEvaluatorFactory} for the
  * {@link Bound ParDo.Bound} primitive {@link PTransform}.
  */
-public class ParDoSingleEvaluatorFactory implements TransformEvaluatorFactory {
+class ParDoSingleEvaluatorFactory implements TransformEvaluatorFactory {
   @Override
   public <T> TransformEvaluator<T> forApplication(
       final AppliedPTransform<?, ?, ?> application,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactory.java
similarity index 91%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactory.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactory.java
index 4c383d3787600..1852ceec0997f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactory.java
@@ -13,7 +13,7 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import com.google.cloud.dataflow.sdk.io.Read.Unbounded;
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
@@ -23,10 +23,6 @@
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
-import com.google.cloud.dataflow.sdk.runners.inprocess.StepTransformResult;
-import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
-import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
@@ -42,7 +38,7 @@
  * A {@link TransformEvaluatorFactory} that produces {@link TransformEvaluator TransformEvaluators}
  * for the {@link Unbounded Read.Unbounded} primitive {@link PTransform}.
  */
-public class UnboundedReadEvaluatorFactory implements TransformEvaluatorFactory {
+class UnboundedReadEvaluatorFactory implements TransformEvaluatorFactory {
   /*
    * An evaluator for a Source is stateful, to ensure the CheckpointMark is properly persisted.
    * Evaluators are cached here to ensure that the checkpoint mark is appropriately reused
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactory.java
similarity index 82%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactory.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactory.java
index a78f7236d6eb0..654652ca1ad41 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactory.java
@@ -13,15 +13,10 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.PCollectionViewWriter;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
-import com.google.cloud.dataflow.sdk.runners.inprocess.StepTransformResult;
-import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
-import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.View.CreatePCollectionView;
@@ -36,7 +31,7 @@
  * The {@link InProcessPipelineRunner} {@link TransformEvaluatorFactory} for the
  * {@link CreatePCollectionView} primitive {@link PTransform}.
  */
-public class ViewEvaluatorFactory implements TransformEvaluatorFactory {
+class ViewEvaluatorFactory implements TransformEvaluatorFactory {
   @SuppressWarnings({"rawtypes", "unchecked"})
   @Override
   public <T> TransformEvaluator<T> forApplication(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactoryTest.java
similarity index 93%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactoryTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactoryTest.java
index 81095b0a0dc3a..0a4c4a1000a10 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/BoundedReadEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactoryTest.java
@@ -13,7 +13,7 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.emptyIterable;
@@ -27,9 +27,6 @@
 import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
-import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
-import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
 import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactoryTest.java
similarity index 95%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactoryTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactoryTest.java
index 8fa411464eab4..c2b9995577a55 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/FlattenEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactoryTest.java
@@ -13,7 +13,7 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.junit.Assert.assertThat;
@@ -23,8 +23,6 @@
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
-import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
 import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactoryTest.java
similarity index 97%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactoryTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactoryTest.java
index 1d7031dee7d5a..d8cb237b9f035 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/GroupByKeyEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactoryTest.java
@@ -13,7 +13,7 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import static org.hamcrest.Matchers.contains;
 import static org.junit.Assert.assertThat;
@@ -25,7 +25,6 @@
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
 import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/InProcessCreateTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessCreateTest.java
similarity index 97%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/InProcessCreateTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessCreateTest.java
index 2b685dce05a97..31deb71552292 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/InProcessCreateTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessCreateTest.java
@@ -13,7 +13,7 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import static org.junit.Assert.fail;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactoryTest.java
similarity index 97%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactoryTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactoryTest.java
index 939121be41137..5251a768b92b2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoMultiEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactoryTest.java
@@ -13,7 +13,7 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertThat;
@@ -24,7 +24,6 @@
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
 import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactoryTest.java
similarity index 96%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactoryTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactoryTest.java
index b0e30034eed8f..c2e148b0d1a18 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ParDoSingleEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactoryTest.java
@@ -13,7 +13,7 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertThat;
@@ -24,7 +24,6 @@
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
 import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactoryTest.java
similarity index 94%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactoryTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactoryTest.java
index c8500f30cba8d..28f2db5498538 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/UnboundedReadEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactoryTest.java
@@ -13,7 +13,7 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.junit.Assert.assertThat;
@@ -25,9 +25,6 @@
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessTransformResult;
-import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
-import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluatorFactory;
 import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactoryTest.java
similarity index 95%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactoryTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactoryTest.java
index 34a9344d28794..c29308f1d93fc 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/evaluator/ViewEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactoryTest.java
@@ -13,7 +13,7 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.evaluator;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.nullValue;
@@ -24,7 +24,6 @@
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.PCollectionViewWriter;
-import com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator;
 import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;

From b968d0a75310f286fd881c9d7e9248884179b72f Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Fri, 19 Feb 2016 15:03:13 -0800
Subject: [PATCH 1490/1541] Proto2Coder: fix static modifier check

The original bitmask check was wrong -- needed to be & not |.
Fix but just by using the existing static utility method from Modifier.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115103309
---
 .../java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
index a012cc36cff22..7d2373841ea09 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
@@ -140,7 +140,7 @@ public Proto2Coder<T> withExtensionsFrom(Iterable<Class<?>> moreExtensionHosts)
         Method registerAllExtensions = extensionHost.getDeclaredMethod(
             "registerAllExtensions", ExtensionRegistry.class);
         Preconditions.checkArgument(
-            0 != (registerAllExtensions.getModifiers() | Modifier.STATIC),
+            Modifier.isStatic(registerAllExtensions.getModifiers()),
             "Method registerAllExtensions() must be static for use with Proto2Coder");
       } catch (NoSuchMethodException | SecurityException e) {
         throw new IllegalArgumentException(e);

From bc857b8359e2e4f7100e5526dc748f6dc25a7603 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Fri, 19 Feb 2016 16:07:38 -0800
Subject: [PATCH 1491/1541] Proto2Coder: minor cleanup

No functional changes, purely reformatting. Separating as prep work for
upcoming changes to detect proto2 vs proto3 objects.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115109541
---
 .../dataflow/sdk/coders/Proto2Coder.java      | 60 +++++++++----------
 1 file changed, 29 insertions(+), 31 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
index 7d2373841ea09..b107f37f8866f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
@@ -15,10 +15,11 @@
  */
 package com.google.cloud.dataflow.sdk.coders;
 
+import static com.google.common.base.Preconditions.checkArgument;
+
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.Structs;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
@@ -88,23 +89,26 @@ private Proto2Coder(Class<T> protoMessageClass, List<Class<?>> extensionHostClas
     this.extensionHostClasses = extensionHostClasses;
   }
 
-  private static final CoderProvider PROVIDER = new CoderProvider() {
-    @Override
-    public <T> Coder<T> getCoder(TypeDescriptor<T> type) throws CannotProvideCoderException {
-      if (type.isSubtypeOf(new TypeDescriptor<Message>() {})) {
-        @SuppressWarnings("unchecked")
-        TypeDescriptor<? extends Message> messageType = (TypeDescriptor<? extends Message>) type;
-        @SuppressWarnings("unchecked")
-        Coder<T> coder = (Coder<T>) Proto2Coder.of(messageType);
-        return coder;
-      } else {
-        throw new CannotProvideCoderException(
-            String.format("Cannot provide Proto2Coder because %s "
-                + "is not a subclass of protocol buffer Messsage",
-                type));
-      }
-    }
-  };
+  private static final CoderProvider PROVIDER =
+      new CoderProvider() {
+        @Override
+        public <T> Coder<T> getCoder(TypeDescriptor<T> type) throws CannotProvideCoderException {
+          if (type.isSubtypeOf(new TypeDescriptor<Message>() {})) {
+            @SuppressWarnings("unchecked")
+            TypeDescriptor<? extends Message> messageType =
+                (TypeDescriptor<? extends Message>) type;
+            @SuppressWarnings("unchecked")
+            Coder<T> coder = (Coder<T>) Proto2Coder.of(messageType);
+            return coder;
+          } else {
+            throw new CannotProvideCoderException(
+                String.format(
+                    "Cannot provide Proto2Coder because %s "
+                        + "is not a subclass of protocol buffer Messsage",
+                    type));
+          }
+        }
+      };
 
   public static CoderProvider coderProvider() {
     return PROVIDER;
@@ -137,9 +141,9 @@ public Proto2Coder<T> withExtensionsFrom(Iterable<Class<?>> moreExtensionHosts)
     for (Class<?> extensionHost : moreExtensionHosts) {
       // Attempt to access the required method, to make sure it's present.
       try {
-        Method registerAllExtensions = extensionHost.getDeclaredMethod(
-            "registerAllExtensions", ExtensionRegistry.class);
-        Preconditions.checkArgument(
+        Method registerAllExtensions =
+            extensionHost.getDeclaredMethod("registerAllExtensions", ExtensionRegistry.class);
+        checkArgument(
             Modifier.isStatic(registerAllExtensions.getModifiers()),
             "Method registerAllExtensions() must be static for use with Proto2Coder");
       } catch (NoSuchMethodException | SecurityException e) {
@@ -188,8 +192,7 @@ public Proto2Coder<T> addExtensionsFrom(Iterable<Class<?>> extensionHosts) {
     for (Class<?> extensionHost : extensionHosts) {
       try {
         // Attempt to access the declared method, to make sure it's present.
-        extensionHost
-            .getDeclaredMethod("registerAllExtensions", ExtensionRegistry.class);
+        extensionHost.getDeclaredMethod("registerAllExtensions", ExtensionRegistry.class);
       } catch (NoSuchMethodException e) {
         throw new IllegalArgumentException(e);
       }
@@ -277,14 +280,11 @@ private Parser<T> getParser() {
     if (memoizedParser == null) {
       try {
         @SuppressWarnings("unchecked")
-        T protoMessageInstance = (T) protoMessageClass
-            .getMethod("getDefaultInstance").invoke(null);
+        T protoMessageInstance = (T) protoMessageClass.getMethod("getDefaultInstance").invoke(null);
         @SuppressWarnings("unchecked")
         Parser<T> tParser = (Parser<T>) protoMessageInstance.getParserForType();
         memoizedParser = tParser;
-      } catch (IllegalAccessException
-          | InvocationTargetException
-          | NoSuchMethodException e) {
+      } catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
         throw new IllegalArgumentException(e);
       }
     }
@@ -301,9 +301,7 @@ private ExtensionRegistry getExtensionRegistry() {
           extensionHost
               .getDeclaredMethod("registerAllExtensions", ExtensionRegistry.class)
               .invoke(null, memoizedExtensionRegistry);
-        } catch (IllegalAccessException
-            | InvocationTargetException
-            | NoSuchMethodException e) {
+        } catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
           throw new IllegalStateException(e);
         }
       }

From 84618188d4d5b26f9937236d58ed1daba99b61aa Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Fri, 19 Feb 2016 16:09:25 -0800
Subject: [PATCH 1492/1541] Add InProcessTimerInternals

This implementation of TimerInternals uses a clock to track the current
processing time, a TimerUpdateBuilder to track updated timers, and a
WatermarkManager.TransformWatermarks to track synchronzied processing
time and input and output event time watermarks.

Update MockClock to provide an advance method.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115109711
---
 .../util/InProcessTimerInternals.java         |  84 +++++++++++
 .../util/InProcessTimerInternalsTest.java     | 131 ++++++++++++++++++
 .../sdk/runners/inprocess/util/MockClock.java |  10 ++
 3 files changed, 225 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessTimerInternals.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessTimerInternalsTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessTimerInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessTimerInternals.java
new file mode 100644
index 0000000000000..c4feef681ee55
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessTimerInternals.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate.TimerUpdateBuilder;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TransformWatermarks;
+import com.google.cloud.dataflow.sdk.util.TimerInternals;
+
+import org.joda.time.Instant;
+
+import javax.annotation.Nullable;
+
+/**
+ * An implementation of {@link TimerInternals} where all relevant data exists in memory.
+ */
+public class InProcessTimerInternals implements TimerInternals {
+  private final Clock processingTimeClock;
+  private final TransformWatermarks watermarks;
+  private final TimerUpdateBuilder timerUpdateBuilder;
+
+  public static InProcessTimerInternals create(
+      Clock clock, TransformWatermarks watermarks, TimerUpdateBuilder timerUpdateBuilder) {
+    return new InProcessTimerInternals(clock, watermarks, timerUpdateBuilder);
+  }
+
+  private InProcessTimerInternals(
+      Clock clock, TransformWatermarks watermarks, TimerUpdateBuilder timerUpdateBuilder) {
+    this.processingTimeClock = clock;
+    this.watermarks = watermarks;
+    this.timerUpdateBuilder = timerUpdateBuilder;
+  }
+
+  @Override
+  public void setTimer(TimerData timerKey) {
+    timerUpdateBuilder.setTimer(timerKey);
+  }
+
+  @Override
+  public void deleteTimer(TimerData timerKey) {
+    timerUpdateBuilder.deletedTimer(timerKey);
+  }
+
+  public TimerUpdate getTimerUpdate() {
+    return timerUpdateBuilder.build();
+  }
+
+  @Override
+  public Instant currentProcessingTime() {
+    return processingTimeClock.now();
+  }
+
+  @Override
+  @Nullable
+  public Instant currentSynchronizedProcessingTime() {
+    return watermarks.getSynchronizedProcessingInputTime();
+  }
+
+  @Override
+  @Nullable
+  public Instant currentInputWatermarkTime() {
+    return watermarks.getInputWatermark();
+  }
+
+  @Override
+  @Nullable
+  public Instant currentOutputWatermarkTime() {
+    return watermarks.getOutputWatermark();
+  }
+}
+
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessTimerInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessTimerInternalsTest.java
new file mode 100644
index 0000000000000..cfe820d561a91
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessTimerInternalsTest.java
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Mockito.when;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate.TimerUpdateBuilder;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TransformWatermarks;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+/**
+ * Tests for {@link InProcessTimerInternals}.
+ */
+@RunWith(JUnit4.class)
+public class InProcessTimerInternalsTest {
+  private MockClock clock;
+  @Mock private TransformWatermarks watermarks;
+
+  private TimerUpdateBuilder timerUpdateBuilder;
+
+  private InProcessTimerInternals internals;
+
+  @Before
+  public void setup() {
+    MockitoAnnotations.initMocks(this);
+    clock = MockClock.fromInstant(new Instant(0));
+
+    timerUpdateBuilder = TimerUpdate.builder(1234);
+
+    internals = InProcessTimerInternals.create(clock, watermarks, timerUpdateBuilder);
+  }
+
+  @Test
+  public void setTimerAddsToBuilder() {
+    TimerData eventTimer =
+        TimerData.of(StateNamespaces.global(), new Instant(20145L), TimeDomain.EVENT_TIME);
+    TimerData processingTimer =
+        TimerData.of(StateNamespaces.global(), new Instant(125555555L), TimeDomain.PROCESSING_TIME);
+    TimerData synchronizedProcessingTimer =
+        TimerData.of(
+            StateNamespaces.global(),
+            new Instant(98745632189L),
+            TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+    internals.setTimer(eventTimer);
+    internals.setTimer(processingTimer);
+    internals.setTimer(synchronizedProcessingTimer);
+
+    assertThat(
+        internals.getTimerUpdate().getSetTimers(),
+        containsInAnyOrder(eventTimer, synchronizedProcessingTimer, processingTimer));
+  }
+
+  @Test
+  public void deleteTimerDeletesOnBuilder() {
+    TimerData eventTimer =
+        TimerData.of(StateNamespaces.global(), new Instant(20145L), TimeDomain.EVENT_TIME);
+    TimerData processingTimer =
+        TimerData.of(StateNamespaces.global(), new Instant(125555555L), TimeDomain.PROCESSING_TIME);
+    TimerData synchronizedProcessingTimer =
+        TimerData.of(
+            StateNamespaces.global(),
+            new Instant(98745632189L),
+            TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+    internals.deleteTimer(eventTimer);
+    internals.deleteTimer(processingTimer);
+    internals.deleteTimer(synchronizedProcessingTimer);
+
+    assertThat(
+        internals.getTimerUpdate().getDeletedTimers(),
+        containsInAnyOrder(eventTimer, synchronizedProcessingTimer, processingTimer));
+  }
+
+  @Test
+  public void getProcessingTimeIsClockNow() {
+    assertThat(internals.currentProcessingTime(), equalTo(clock.now()));
+    Instant oldProcessingTime = internals.currentProcessingTime();
+
+    clock.advance(Duration.standardHours(12));
+
+    assertThat(internals.currentProcessingTime(), equalTo(clock.now()));
+    assertThat(
+        internals.currentProcessingTime(),
+        equalTo(oldProcessingTime.plus(Duration.standardHours(12))));
+  }
+
+  @Test
+  public void getSynchronizedProcessingTimeIsWatermarkSynchronizedInputTime() {
+    when(watermarks.getSynchronizedProcessingInputTime()).thenReturn(new Instant(12345L));
+    assertThat(internals.currentSynchronizedProcessingTime(), equalTo(new Instant(12345L)));
+  }
+
+  @Test
+  public void getInputWatermarkTimeUsesWatermarkTime() {
+    when(watermarks.getInputWatermark()).thenReturn(new Instant(8765L));
+    assertThat(internals.currentInputWatermarkTime(), equalTo(new Instant(8765L)));
+  }
+
+  @Test
+  public void getOutputWatermarkTimeUsesWatermarkTime() {
+    when(watermarks.getOutputWatermark()).thenReturn(new Instant(25525L));
+    assertThat(internals.currentOutputWatermarkTime(), equalTo(new Instant(25525L)));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/MockClock.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/MockClock.java
index b45238474d428..440ed435758ac 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/MockClock.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/MockClock.java
@@ -17,6 +17,7 @@
 
 import static com.google.common.base.Preconditions.checkArgument;
 
+import org.joda.time.Duration;
 import org.joda.time.Instant;
 
 /**
@@ -43,8 +44,17 @@ public void set(Instant newNow) {
     this.now = newNow;
   }
 
+  public void advance(Duration duration) {
+    checkArgument(
+        duration.getMillis() > 0,
+        "Cannot move MockClock backwards in time by duration %s",
+        duration);
+    set(now.plus(duration));
+  }
+
   @Override
   public Instant now() {
     return now;
   }
+
 }

From 234d5eb4517d188111eeea0058ccce7f7ca742e9 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Fri, 19 Feb 2016 18:28:00 -0800
Subject: [PATCH 1493/1541] Move VerifyDynamicWorkRebalancing and dependents to
 worker module

This is a worker-specific test with some hacks in it to make sure it
only runs there. This first change simply moves the test to the right
module.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115119281
---
 .../testing/VerifyDynamicWorkRebalancing.java | 190 ------------------
 .../sdk/transforms/GroupByKeyTest.java        |  30 ---
 2 files changed, 220 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/VerifyDynamicWorkRebalancing.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/VerifyDynamicWorkRebalancing.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/VerifyDynamicWorkRebalancing.java
deleted file mode 100644
index a49d0b496ed2a..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/VerifyDynamicWorkRebalancing.java
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.testing;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.AggregatorRetrievalException;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineJob;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation;
-import com.google.cloud.dataflow.sdk.values.PBegin;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Iterables;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.Collection;
-
-/**
- * Test framework for verifying that particular elements of a collection can be processed
- * in parallel via dynamic work rebalancing, regardless of the initial work partitioning.
- * The motivating use case is verifying the quality of an implementation of
- * {@link com.google.cloud.dataflow.sdk.io.BoundedSource.BoundedReader#splitAtFraction}
- * (correctness in terms of data consistency can already be verified by methods in
- * {@link com.google.cloud.dataflow.sdk.testing.SourceTestUtils}).
- *
- * <p>This test works by blocking at a pre-selected set of sentinel values and making sure
- * the work gets split up such that a thread eventually gets allocated to each of them.
- */
-public class VerifyDynamicWorkRebalancing {
-
-  private static final Logger LOG = LoggerFactory.getLogger(VerifyDynamicWorkRebalancing.class);
-
-  private VerifyDynamicWorkRebalancing() {
-    // Do not instantiate.
-  }
-
-  /**
-   * Reads a source and attempts to dynamically rebalance work to bundles each containing a single
-   * one of the sentinel values using the dataflow runner.  It does this by waiting at each
-   * sentinel value and letting the service dynamically split the work into bundles until
-   * the sentinels are completely separated.
-   *
-   * <p>Sentinels should be chosen such that the source's inherent parallelization allows them
-   * to be separated. For example, in a simple record-based file format, they can be chosen
-   * arbitrarily (e.g. every record is a sentinel), however e.g. in a block-based file format
-   * where parallelization can only happen down to blocks, but not down to individual records,
-   * sentinels must be in different blocks. However, there should be not too many sentinels,
-   * because the test naturally requires at least as many threads (possibly via autoscaling)
-   * as there are sentinels to complete successfully.
-   *
-   * @param source a source PTransform producing the PCollection to be split
-   * @param sentinels a collection of elements that should be separable in the source
-   * @param nonSentinelSleepMsec how long each non-sentinel element should take to process
-   */
-  public static <T> void run(
-      PTransform<PBegin, PCollection<T>> source,
-      Collection<T> sentinels,
-      long nonSentinelSleepMsec) {
-    runWithPipeline(configurePipeline(sentinels.size(), nonSentinelSleepMsec),
-                    source, sentinels, nonSentinelSleepMsec);
-  }
-
-  /**
-   * Like {@link #run}, but uses a pre-configured pipeline.
-   */
-  public static <T> void runWithPipeline(
-      Pipeline p,
-      PTransform<PBegin, PCollection<T>> source,
-      Collection<T> sentinels,
-      long nonSentinelSleepMsec) {
-    HangOnSentinels<T> hangingDoFn = new HangOnSentinels<T>(sentinels, nonSentinelSleepMsec);
-    p.apply(source).apply(ParDo.of(hangingDoFn));
-    PipelineResult result = p.run();
-    long start = System.currentTimeMillis();
-    try {
-      int seenSentinels;
-      do {
-        seenSentinels =
-            Iterables.getOnlyElement(
-                result.getAggregatorValues(hangingDoFn.sentinelCounter).getValues(), 0);
-        int seenNonSentinels =
-            Iterables.getOnlyElement(
-                result.getAggregatorValues(hangingDoFn.nonSentinelCounter).getValues(), 0);
-        LOG.info("Seen {} sentinels, {} non-sentinels so far.", seenSentinels, seenNonSentinels);
-        sleep(1000);
-      } while (seenSentinels < sentinels.size());
-      LOG.info("Took {} ms to separate all the sentinels.", System.currentTimeMillis() - start);
-      LOG.info("Canceling...");
-      ((DataflowPipelineJob) result).cancel();
-      LOG.info("Done.");
-    } catch (AggregatorRetrievalException exn) {
-      throw new RuntimeException(exn);
-    } catch (IOException exn) {
-      throw new RuntimeException(exn);
-    }
-  }
-
-  private static Pipeline configurePipeline(int sentinelCount, long nonSentinelSleepMsec) {
-    if (TestPipeline.isIntegrationTest()) {
-      // The existing implementation updates the progress at this rate, but never while a data item
-      // (e.g. a sentinel value) is being processed, so if non-sentinel sleeps are smaller than
-      // the progress update period, that can lead to reporting progress a few items behind,
-      // leading to the service making stale split suggestions which will be refused by the worker,
-      // and the sentinel will not be separated.
-      // TODO: fix progress reporting and dynamic work rebalancing to work well regardless of this
-      // condition.
-      Preconditions.checkArgument(
-          nonSentinelSleepMsec > ReadOperation.DEFAULT_PROGRESS_UPDATE_PERIOD_MS);
-      // Does not work in (single-threaded) direct runner.  Also, we can't use TestPipeline.create()
-      // directly as we need a handle on the job (to monitor and cancel it) while it's running.
-      PipelineOptions options = TestPipeline.getPipelineOptions();
-      options
-          .as(DataflowPipelineDebugOptions.class)
-          .setNumberOfWorkerHarnessThreads(sentinelCount);
-      // Enable autoscaling so the system will scale up to enough workers to trigger dynamic
-      // work rebalancing.
-      options
-          .as(DataflowPipelineWorkerPoolOptions.class)
-          .setAutoscalingAlgorithm(
-              DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType.THROUGHPUT_BASED);
-      return new TestPipeline(DataflowPipelineRunner.fromOptions(options), options);
-    } else {
-      // Support for other runners could be added here.
-      throw new IllegalArgumentException("Unsupported for this runner.");
-    }
-  }
-
-  private static class HangOnSentinels<T> extends DoFn<T, Void> {
-
-    private Collection<T> sentinels;
-    private final long nonSentinelSleepMsec;
-
-    public final Aggregator<Integer, Integer> sentinelCounter =
-        createAggregator("sentinels", new Sum.SumIntegerFn());
-    public final Aggregator<Integer, Integer> nonSentinelCounter =
-        createAggregator("nonSentinels", new Sum.SumIntegerFn());
-
-    public HangOnSentinels(Collection<T> sentinels, long nonSentinelSleepMsec) {
-      this.sentinels = sentinels;
-      this.nonSentinelSleepMsec = nonSentinelSleepMsec;
-    }
-
-    public void processElement(ProcessContext c) {
-      if (sentinels.contains(c.element())) {
-        sentinelCounter.addValue(1);
-        while (true) {
-          LOG.info("Waiting at sentinel {}.", c.element());
-          sleep(10000);
-        }
-      } else {
-        nonSentinelCounter.addValue(1);
-        sleep(nonSentinelSleepMsec);
-      }
-    }
-  }
-
-  private static void sleep(long slowdownMsec) {
-    try {
-      Thread.sleep(slowdownMsec);
-    } catch (InterruptedException exn) {
-      throw new RuntimeException(exn);
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
index 11a0ad1c7d8c5..455541ae7bd20 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
@@ -30,13 +30,11 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.testing.VerifyDynamicWorkRebalancing;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
@@ -59,7 +57,6 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
@@ -429,31 +426,4 @@ public void processElement(ProcessContext c) throws Exception {
   public void testGroupByKeyGetName() {
     Assert.assertEquals("GroupByKey", GroupByKey.<String, Integer>create().getName());
   }
-
-  @Test
-  @Category(RunnableOnService.class)
-  public void testDynamicWorkRebalancing() {
-    if (TestPipeline.isIntegrationTest()
-        && !TestPipeline.getPipelineOptions().as(StreamingOptions.class).isStreaming()) {
-      VerifyDynamicWorkRebalancing.run(
-          new PTransform<PBegin, PCollection<Integer>>() {
-            public PCollection<Integer> apply(PBegin begin) {
-              List<KV<Integer, Void>> ungroupedPairs = new ArrayList<>();
-              for (int k = 0; k < 100; k++) {
-                ungroupedPairs.add(KV.of(k, (Void) null));
-              }
-              return begin
-                  .apply(Create.of(ungroupedPairs))
-                  .apply(GroupByKey.<Integer, Void>create())
-                  .apply(Keys.<Integer>create());
-            }
-          },
-          // Verified manually to trigger dynamic work rebalancing.
-          // Also, the current implementation starts with an initial splitting into 3 bundles by
-          // default, so by the pigeonhole principle at least one bundle has more than one sentinel.
-          Arrays.asList(5, 7, 17, 18, 19),
-          120);
-      return;
-    }
-  }
 }

From 015e1afede0abb3fc087d4c5d4072f7801ed5d45 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Fri, 19 Feb 2016 20:08:10 -0800
Subject: [PATCH 1494/1541] Allow TransformEvaluatorFactory#forApplication to
 throw

Certain Transforms (like sources) can throw an exception while being
constructed. This allows the transforms to throw an exception while
being constructed rather than on first application.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115122695
---
 .../sdk/runners/inprocess/TransformEvaluatorFactory.java      | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluatorFactory.java
index 7941c1651b73f..3b672e0def5e1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluatorFactory.java
@@ -34,8 +34,10 @@ public interface TransformEvaluatorFactory {
    * Any work that must be done before input elements are processed (such as calling
    * {@link DoFn#startBundle(DoFn.Context)}) must be done before the {@link TransformEvaluator} is
    * made available to the caller.
+   *
+   * @throws Exception whenever constructing the underlying evaluator throws an exception
    */
   <InputT> TransformEvaluator<InputT> forApplication(
       AppliedPTransform<?, ?, ?> application, @Nullable CommittedBundle<?> inputBundle,
-      InProcessEvaluationContext evaluationContext);
+      InProcessEvaluationContext evaluationContext) throws Exception;
 }

From 0e77510fc9b79bf4e78e3fcef6d447ee45ea1682 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Fri, 19 Feb 2016 20:16:57 -0800
Subject: [PATCH 1495/1541] Add TypeDescriptors to Primitive PTransforms in
 Tests

GroupByKeyTest and ViewTest both do not provide a TypeDescriptor on the
PCollection that they output, which stops automated coder inference.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115122994
---
 .../sdk/transforms/GroupByKeyTest.java        | 43 +++++++++++--------
 .../dataflow/sdk/transforms/ViewTest.java     | 21 ++++++---
 2 files changed, 40 insertions(+), 24 deletions(-)

diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
index 455541ae7bd20..75eb92fcb4367 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/GroupByKeyTest.java
@@ -46,6 +46,7 @@
 import com.google.cloud.dataflow.sdk.values.PBegin;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -327,19 +328,23 @@ public void testRemerge() {
   public void testGroupByKeyDirectUnbounded() {
     Pipeline p = createTestDirectRunner();
 
-    PCollection<KV<String, Integer>> input = p
-        .apply(new PTransform<PBegin, PCollection<KV<String, Integer>>>() {
-          @Override
-          public PCollection<KV<String, Integer>> apply(PBegin input) {
-            return PCollection.createPrimitiveOutputInternal(input.getPipeline(),
-                WindowingStrategy.globalDefault(), PCollection.IsBounded.UNBOUNDED);
-          }
-        });
+    PCollection<KV<String, Integer>> input =
+        p.apply(
+            new PTransform<PBegin, PCollection<KV<String, Integer>>>() {
+              @Override
+              public PCollection<KV<String, Integer>> apply(PBegin input) {
+                return PCollection.<KV<String, Integer>>createPrimitiveOutputInternal(
+                        input.getPipeline(),
+                        WindowingStrategy.globalDefault(),
+                        PCollection.IsBounded.UNBOUNDED)
+                    .setTypeDescriptorInternal(new TypeDescriptor<KV<String, Integer>>() {});
+              }
+            });
 
     thrown.expect(IllegalStateException.class);
     thrown.expectMessage(
         "GroupByKey cannot be applied to non-bounded PCollection in the GlobalWindow without "
-        + "a trigger. Use a Window.into or Window.triggering transform prior to GroupByKey.");
+            + "a trigger. Use a Window.into or Window.triggering transform prior to GroupByKey.");
 
     input.apply("GroupByKey", GroupByKey.<String, Integer>create());
   }
@@ -348,14 +353,18 @@ public PCollection<KV<String, Integer>> apply(PBegin input) {
   public void testGroupByKeyServiceUnbounded() {
     Pipeline p = createTestServiceRunner();
 
-    PCollection<KV<String, Integer>> input = p
-        .apply(new PTransform<PBegin, PCollection<KV<String, Integer>>>() {
-          @Override
-          public PCollection<KV<String, Integer>> apply(PBegin input) {
-            return PCollection.createPrimitiveOutputInternal(input.getPipeline(),
-                WindowingStrategy.globalDefault(), PCollection.IsBounded.UNBOUNDED);
-          }
-        });
+    PCollection<KV<String, Integer>> input =
+        p.apply(
+            new PTransform<PBegin, PCollection<KV<String, Integer>>>() {
+              @Override
+              public PCollection<KV<String, Integer>> apply(PBegin input) {
+                return PCollection.<KV<String, Integer>>createPrimitiveOutputInternal(
+                        input.getPipeline(),
+                        WindowingStrategy.globalDefault(),
+                        PCollection.IsBounded.UNBOUNDED)
+                    .setTypeDescriptorInternal(new TypeDescriptor<KV<String, Integer>>() {});
+              }
+            });
 
     thrown.expect(IllegalStateException.class);
     thrown.expectMessage(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
index 16b4640962280..145956961f1f3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/transforms/ViewTest.java
@@ -51,6 +51,7 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 
@@ -1365,13 +1366,19 @@ private void testViewUnbounded(
     thrown.expectMessage("Unable to create a side-input view from input");
     thrown.expectCause(
         ThrowableMessageMatcher.hasMessage(Matchers.containsString("non-bounded PCollection")));
-    pipeline.apply(new PTransform<PBegin, PCollection<KV<String, Integer>>>() {
-      @Override
-      public PCollection<KV<String, Integer>> apply(PBegin input) {
-        return PCollection.createPrimitiveOutputInternal(input.getPipeline(),
-            WindowingStrategy.globalDefault(), PCollection.IsBounded.UNBOUNDED);
-      }
-    }).apply(view);
+    pipeline
+        .apply(
+            new PTransform<PBegin, PCollection<KV<String, Integer>>>() {
+              @Override
+              public PCollection<KV<String, Integer>> apply(PBegin input) {
+                return PCollection.<KV<String, Integer>>createPrimitiveOutputInternal(
+                        input.getPipeline(),
+                        WindowingStrategy.globalDefault(),
+                        PCollection.IsBounded.UNBOUNDED)
+                    .setTypeDescriptorInternal(new TypeDescriptor<KV<String, Integer>>() {});
+              }
+            })
+        .apply(view);
   }
 
   private void testViewNonmerging(

From d58b7dbbf01715a3e5aa023e35d6ef7455caf33f Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 22 Feb 2016 12:12:12 -0800
Subject: [PATCH 1496/1541] Remove native Dataflow text reader

This can be removed since users were migrated to
the custom source implementation.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115255914
---
 .../google/cloud/dataflow/sdk/io/TextIO.java  |  42 +-
 .../sdk/runners/worker/TextReader.java        | 592 ------------------
 .../dataflow/sdk/util/IOFactoryTest.java      |  87 ---
 3 files changed, 3 insertions(+), 718 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
 delete mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 71e755eebe651..0bb2861831caf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -26,7 +26,6 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.worker.TextReader;
 import com.google.cloud.dataflow.sdk.runners.worker.TextSink;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
@@ -37,22 +36,15 @@
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
-import com.google.common.io.ByteStreams;
-import com.google.common.primitives.Ints;
 import com.google.protobuf.ByteString;
 
-import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
-
 import java.io.IOException;
-import java.io.InputStream;
-import java.io.PushbackInputStream;
 import java.nio.ByteBuffer;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.channels.SeekableByteChannel;
 import java.util.List;
 import java.util.NoSuchElementException;
 import java.util.regex.Pattern;
-import java.util.zip.GZIPInputStream;
 
 import javax.annotation.Nullable;
 
@@ -735,7 +727,7 @@ public void evaluate(
   /**
    * Possible text file compression types.
    */
-  public static enum CompressionType implements TextReader.DecompressingStreamFactory {
+  public static enum CompressionType {
     /**
      * Automatically determine the compression type based on filename extension.
      */
@@ -747,34 +739,11 @@ public static enum CompressionType implements TextReader.DecompressingStreamFact
     /**
      * GZipped.
      */
-    GZIP(".gz") {
-      @Override
-      public InputStream createInputStream(InputStream inputStream) throws IOException {
-        // Determine if the input stream is gzipped. The input stream returned from the
-        // GCS connector may already be decompressed, and no action is required.
-        PushbackInputStream stream = new PushbackInputStream(inputStream, 2);
-        byte[] headerBytes = new byte[2];
-        int bytesRead = ByteStreams.read(
-            stream /* source */, headerBytes /* dest */, 0 /* offset */, 2 /* len */);
-        stream.unread(headerBytes, 0, bytesRead);
-        if (bytesRead >= 2) {
-          int header = Ints.fromBytes((byte) 0, (byte) 0, headerBytes[1], headerBytes[0]);
-          if (header == GZIPInputStream.GZIP_MAGIC) {
-            return new GZIPInputStream(stream);
-          }
-        }
-        return stream;
-      }
-    },
+    GZIP(".gz"),
     /**
      * BZipped.
      */
-    BZIP2(".bz2") {
-      @Override
-      public InputStream createInputStream(InputStream inputStream) throws IOException {
-        return new BZip2CompressorInputStream(inputStream);
-      }
-    };
+    BZIP2(".bz2");
 
     private String filenameSuffix;
 
@@ -790,11 +759,6 @@ private CompressionType(String suffix) {
     public boolean matches(String filename) {
       return filename.toLowerCase().endsWith(filenameSuffix.toLowerCase());
     }
-
-    @Override
-    public InputStream createInputStream(InputStream inputStream) throws IOException {
-      return inputStream;
-    }
   }
 
   // Pattern which matches old-style shard output patterns, which are now
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
deleted file mode 100644
index cda7fedbf1970..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextReader.java
+++ /dev/null
@@ -1,592 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.api.client.util.Preconditions.checkNotNull;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudPositionToReaderPosition;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudProgressToReaderProgress;
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.splitRequestToApproximateSplitRequest;
-
-import com.google.api.services.dataflow.model.ApproximateReportedProgress;
-import com.google.api.services.dataflow.model.ApproximateSplitRequest;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.io.range.OffsetRangeTracker;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-import com.google.cloud.dataflow.sdk.util.common.worker.ProgressTracker;
-import com.google.cloud.dataflow.sdk.util.common.worker.ProgressTrackerGroup;
-import com.google.common.annotations.VisibleForTesting;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.PushbackInputStream;
-import java.nio.channels.Channels;
-import java.nio.channels.ReadableByteChannel;
-import java.nio.channels.SeekableByteChannel;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.NoSuchElementException;
-
-import javax.annotation.Nullable;
-
-/**
- * A source that reads text files.
- *
- * @param <T> the type of the elements read from the source
- */
-public class TextReader<T> extends NativeReader<T> {
-  private static final Logger LOG = LoggerFactory.getLogger(TextReader.class);
-
-  @VisibleForTesting static final int BUF_SIZE = 200;
-
-  // The following fields are package-private to be visible in tests.
-  @VisibleForTesting final String filepattern;
-  @VisibleForTesting @Nullable final Long startPosition;
-  @VisibleForTesting @Nullable final Long endPosition;
-  @VisibleForTesting final Coder<T> coder;
-  @VisibleForTesting final TextIO.CompressionType compressionType;
-  @VisibleForTesting final boolean stripTrailingNewlines;
-  @VisibleForTesting @Nullable private Collection<String> expandedFilepattern;
-
-  public TextReader(String filepattern, boolean stripTrailingNewlines,
-                    @Nullable Long startPosition, @Nullable Long endPosition, Coder<T> coder,
-                    TextIO.CompressionType compressionType) {
-    this.filepattern = filepattern;
-    this.startPosition = startPosition;
-    this.endPosition = endPosition;
-    this.coder = coder;
-    this.stripTrailingNewlines = stripTrailingNewlines;
-    this.compressionType = compressionType;
-  }
-
-  @Override
-  public double getTotalParallelism() {
-    try {
-      if (compressionType == TextIO.CompressionType.UNCOMPRESSED) {
-        // All files are splittable.
-        return getTotalParallelismSplittable();
-      } else if (compressionType == TextIO.CompressionType.AUTO) {
-        for (String file : expandedFilepattern()) {
-          if (FilenameBasedStreamFactory.getCompressionTypeForAuto(file)
-              == TextIO.CompressionType.UNCOMPRESSED) {
-            // At least one file is splittable.
-            return getTotalParallelismSplittable();
-          }
-        }
-        // All files were compressed.
-        return getTotalParallelismUnsplittable();
-      } else {
-        // No compressed formats support dynamic work rebalancing yet.
-        return getTotalParallelismUnsplittable();
-      }
-    } catch (IOException exn) {
-      throw new RuntimeException(exn);
-    }
-  }
-
-  private double getTotalParallelismSplittable() {
-    // Assume splittable at every byte.
-    return (endPosition == null ? Double.POSITIVE_INFINITY : endPosition)
-        - (startPosition == null ? 0 : startPosition);
-  }
-
-  private double getTotalParallelismUnsplittable() throws IOException {
-    // Total parallelism is the number of files matched by the filepattern.
-    return expandedFilepattern().size();
-  }
-
-  private NativeReaderIterator<T> newReaderIteratorForRangeInFile(
-      IOChannelFactory factory, String oneFile, long startPosition, @Nullable Long endPosition)
-      throws IOException {
-    return newReaderIteratorForRangeWithStrictStart(
-        factory, oneFile, stripTrailingNewlines, startPosition, endPosition);
-  }
-
-  private NativeReaderIterator<T> newReaderIteratorForFiles(
-      IOChannelFactory factory, Collection<String> files) throws IOException {
-    if (files.size() == 1) {
-      return newReaderIteratorForFile(factory, files.iterator().next(), stripTrailingNewlines);
-    }
-
-    return new TextFileMultiIterator(factory, files.iterator(), stripTrailingNewlines);
-  }
-
-  private TextFileIterator newReaderIteratorForFile(
-      IOChannelFactory factory, String input, boolean stripTrailingNewlines) throws IOException {
-    return newReaderIteratorForRangeWithStrictStart(factory, input, stripTrailingNewlines, 0, null);
-  }
-
-  /**
-   * Returns a new iterator for lines in the given range in the given
-   * file.  Does NOT skip the first line if the range starts in the
-   * middle of a line (instead, the latter half that starts at
-   * startOffset will be returned as the first element).
-   */
-  private TextFileIterator newReaderIteratorForRangeWithStrictStart(IOChannelFactory factory,
-      String input, boolean stripTrailingNewlines, long startOffset, @Nullable Long endOffset)
-      throws IOException {
-    ReadableByteChannel reader = factory.open(input);
-    if (!(reader instanceof SeekableByteChannel)) {
-      throw new UnsupportedOperationException("Unable to seek in stream for " + input);
-    }
-
-    SeekableByteChannel seeker = (SeekableByteChannel) reader;
-
-    return new TextFileIterator(
-        new CopyableSeekableByteChannel(seeker), stripTrailingNewlines, startOffset, endOffset,
-        new FilenameBasedStreamFactory(input, compressionType));
-  }
-
-  private Collection<String> expandedFilepattern() throws IOException {
-    if (expandedFilepattern == null) {
-      IOChannelFactory factory = IOChannelUtils.getFactory(filepattern);
-      expandedFilepattern = factory.match(filepattern);
-    }
-    return expandedFilepattern;
-  }
-
-  @Override
-  public NativeReaderIterator<T> iterator() throws IOException {
-    IOChannelFactory factory = IOChannelUtils.getFactory(filepattern);
-    Collection<String> inputs = expandedFilepattern();
-    if (inputs.isEmpty()) {
-      throw new FileNotFoundException("No match for file pattern '" + filepattern + "'");
-    }
-
-    if (startPosition != null || endPosition != null) {
-      if (inputs.size() != 1) {
-        throw new IllegalArgumentException(
-            "Offset range specified: [" + startPosition + ", " + endPosition + "), so "
-            + "an exact filename was expected, but more than 1 file matched \"" + filepattern
-            + "\" (total " + inputs.size() + "): apparently a filepattern was given.");
-      }
-
-      return newReaderIteratorForRangeInFile(factory, inputs.iterator().next(),
-          startPosition == null ? 0 : startPosition, endPosition);
-    } else {
-      return newReaderIteratorForFiles(factory, inputs);
-    }
-  }
-
-  /**
-   * Factory interface for creating a decompressing {@link InputStream}.
-   */
-  public interface DecompressingStreamFactory {
-    /**
-     * Create a decompressing {@link InputStream} from an existing {@link InputStream}.
-     *
-     * @param inputStream the existing stream
-     * @return a stream that decompresses the contents of the existing stream
-     * @throws IOException
-     */
-    public InputStream createInputStream(InputStream inputStream) throws IOException;
-  }
-
-  /**
-   * Factory for creating decompressing input streams based on a filename and
-   * a {@link TextIO.CompressionType}.  If the compression mode is AUTO, the filename
-   * is checked against known extensions to determine a compression type to use.
-   */
-  protected static class FilenameBasedStreamFactory
-      implements DecompressingStreamFactory {
-    private String filename;
-    private TextIO.CompressionType compressionType;
-
-    public FilenameBasedStreamFactory(String filename, TextIO.CompressionType compressionType) {
-      this.filename = filename;
-      this.compressionType = compressionType;
-    }
-
-    protected TextIO.CompressionType getCompressionTypeForAuto() {
-      return getCompressionTypeForAuto(filename);
-    }
-
-    protected static TextIO.CompressionType getCompressionTypeForAuto(String filepattern) {
-      for (TextIO.CompressionType type : TextIO.CompressionType.values()) {
-        if (type.matches(filepattern) && type != TextIO.CompressionType.AUTO
-            && type != TextIO.CompressionType.UNCOMPRESSED) {
-          return type;
-        }
-      }
-      return TextIO.CompressionType.UNCOMPRESSED;
-    }
-
-    @Override
-    public InputStream createInputStream(InputStream inputStream) throws IOException {
-      if (compressionType == TextIO.CompressionType.AUTO) {
-        return getCompressionTypeForAuto().createInputStream(inputStream);
-      }
-      return compressionType.createInputStream(inputStream);
-    }
-  }
-
-  private class TextFileMultiIterator extends LazyMultiReaderIterator<T> {
-    private final IOChannelFactory factory;
-    private final boolean stripTrailingNewlines;
-
-    public TextFileMultiIterator(
-        IOChannelFactory factory, Iterator<String> inputs, boolean stripTrailingNewlines) {
-      super(inputs);
-      this.factory = factory;
-      this.stripTrailingNewlines = stripTrailingNewlines;
-    }
-
-    @Override
-    protected NativeReaderIterator<T> open(String input) throws IOException {
-      return newReaderIteratorForFile(factory, input, stripTrailingNewlines);
-    }
-  }
-
-  class TextFileIterator extends NativeReaderIterator<T> {
-    private final CopyableSeekableByteChannel seeker;
-    private final PushbackInputStream stream;
-    private final OffsetRangeTracker rangeTracker;
-    private final ProgressTracker<Integer> progressTracker;
-    private final long startOffset;
-    private long offset;
-    private T current;
-    private ScanState state;
-
-    TextFileIterator(CopyableSeekableByteChannel seeker, boolean stripTrailingNewlines,
-        long startOffset, @Nullable Long endOffset,
-        DecompressingStreamFactory compressionStreamFactory) throws IOException {
-      this.offset = Math.max(0, startOffset - 1);
-      this.seeker = checkNotNull(seeker);
-      this.seeker.position(offset);
-      InputStream inputStream =
-          compressionStreamFactory.createInputStream(Channels.newInputStream(seeker));
-      BufferedInputStream bufferedStream = new BufferedInputStream(inputStream);
-      this.stream = new PushbackInputStream(bufferedStream, BUF_SIZE);
-      long stopOffset = (endOffset == null) ? OffsetRangeTracker.OFFSET_INFINITY : endOffset;
-      this.startOffset = startOffset;
-      this.rangeTracker = new OffsetRangeTracker(startOffset, stopOffset);
-      this.progressTracker = checkNotNull(new ProgressTrackerGroup<Integer>() {
-            @Override
-            protected void report(Integer lineLength) {
-              notifyElementRead(lineLength.longValue());
-            }
-          }.start());
-
-      this.state = new ScanState(BUF_SIZE, stripTrailingNewlines);
-    }
-
-    /**
-     * Reads a line of text. A line is considered to be terminated by any
-     * one of a line feed ({@code '\n'}), a carriage return
-     * ({@code '\r'}), or a carriage return followed immediately by a linefeed
-     * ({@code "\r\n"}).
-     *
-     * @return a {@code ByteArrayOutputStream} containing the contents of the
-     *     line, with any line-termination characters stripped if
-     *     stripTrailingNewlines==true, or {@code null} if the end of the stream has
-     *     been reached.
-     * @throws IOException if an I/O error occurs
-     */
-    @Nullable
-    protected ByteArrayOutputStream readElement() throws IOException {
-      ByteArrayOutputStream buffer = new ByteArrayOutputStream(BUF_SIZE);
-
-      int charsConsumed = 0;
-      while (true) {
-        // Attempt to read blocks of data at a time
-        // until a separator is found.
-        if (!state.readBytes(stream)) {
-          break;
-        }
-
-        int consumed = state.consumeUntilSeparator(buffer);
-        charsConsumed += consumed;
-        if (consumed > 0 && state.separatorFound()) {
-          if (state.lastByteRead() == '\r') {
-            charsConsumed += state.copyCharIfLinefeed(buffer, stream);
-          }
-          break;
-        }
-      }
-
-      if (charsConsumed == 0) {
-        // Note that charsConsumed includes the size of any separators that may
-        // have been stripped off -- so if we didn't get anything, we're at the
-        // end of the file.
-        return null;
-      }
-
-      offset += charsConsumed;
-      return buffer;
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      if (this.startOffset > 0) {
-        long savedOffset = offset;
-        // Skip initial partial line.
-        if (readElement() == null) {
-          return false;
-        } else {
-          progressTracker.saw((int) (offset - savedOffset));
-        }
-      }
-      return advance();
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      long startOffset = offset;
-      ByteArrayOutputStream element = readElement(); // As a side effect, updates "offset"
-      if (element != null && rangeTracker.tryReturnRecordAt(true, startOffset)) {
-        current = CoderUtils.decodeFromByteArray(coder, element.toByteArray());
-        progressTracker.saw((int) (offset - startOffset));
-      } else {
-        current = null;
-      }
-      return current != null;
-    }
-
-    @Override
-    public T getCurrent() throws NoSuchElementException {
-      if (current == null) {
-        throw new NoSuchElementException();
-      }
-      return current;
-    }
-
-    @Override
-    public Progress getProgress() {
-      // Currently we assume that only a offset position and fraction are reported as
-      // current progress. An implementor can override this method to update
-      // other metrics, e.g. report a different completion percentage or remaining time.
-      com.google.api.services.dataflow.model.Position currentPosition =
-          new com.google.api.services.dataflow.model.Position();
-      currentPosition.setByteOffset(offset);
-
-      ApproximateReportedProgress progress = new ApproximateReportedProgress();
-      progress.setPosition(currentPosition);
-
-      // If endOffset is unspecified, we don't know the fraction consumed.
-      if (rangeTracker.getStopPosition() != Long.MAX_VALUE) {
-        progress.setFractionConsumed(rangeTracker.getFractionConsumed());
-      }
-
-      return cloudProgressToReaderProgress(progress);
-    }
-
-    @Override
-    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) {
-      checkNotNull(splitRequest);
-
-      // Currently, file-based Reader only supports split at a byte offset.
-      ApproximateSplitRequest splitProgress = splitRequestToApproximateSplitRequest(splitRequest);
-      com.google.api.services.dataflow.model.Position splitPosition = splitProgress.getPosition();
-      if (splitPosition == null) {
-        if (splitProgress.getFractionConsumed() != null) {
-          float fractionConsumed = splitProgress.getFractionConsumed().floatValue();
-          if (fractionConsumed <= 0 || fractionConsumed >= 1) {
-            LOG.warn(
-                "TextReader cannot be split since the provided fraction of "
-                + "work to be completed is out of the valid range (0, 1). Requested: {}",
-                splitRequest);
-          }
-
-          splitPosition = new com.google.api.services.dataflow.model.Position();
-          if (getEndOffset() == Long.MAX_VALUE) {
-            LOG.warn(
-                "TextReader cannot be split since the end offset is set to Long.MAX_VALUE."
-                + " Requested: {}",
-                splitRequest);
-            return null;
-          }
-
-          splitPosition.setByteOffset(
-              rangeTracker.getPositionForFractionConsumed(fractionConsumed));
-        } else {
-          LOG.warn(
-              "TextReader requires either a position or percentage of work to be complete to"
-              + " perform a dynamic split request. Requested: {}",
-              splitRequest);
-          return null;
-        }
-      } else if (splitPosition.getByteOffset() == null) {
-        LOG.warn(
-            "TextReader cannot be split since the provided split position "
-            + "does not contain a valid offset. Requested: {}",
-            splitRequest);
-        return null;
-      }
-      Long splitOffset = splitPosition.getByteOffset();
-
-      if (rangeTracker.trySplitAtPosition(splitOffset)) {
-        return new DynamicSplitResultWithPosition(cloudPositionToReaderPosition(splitPosition));
-      } else {
-        return null;
-      }
-    }
-
-    /**
-     * Returns the end offset of the iterator or Long.MAX_VALUE if unspecified.
-     * This method is called for test ONLY.
-     */
-    long getEndOffset() {
-      return rangeTracker.getStopPosition();
-    }
-
-    /**
-     * Returns the start offset of the iterator.
-     * This method is called for test ONLY.
-     */
-    long getStartOffset() {
-      return rangeTracker.getStartPosition();
-    }
-
-    @Override
-    public void close() throws IOException {
-      stream.close();
-    }
-  }
-
-  /**
-   * ScanState encapsulates the state for the current buffer of text
-   * being scanned.
-   */
-  private static class ScanState {
-    private int start; // Valid bytes in buf start at this index
-    private int pos; // Where the separator is in the buf (if one was found)
-    private int end; // the index of the end of bytes in buf
-    private final byte[] buf;
-    private final boolean stripTrailingNewlines;
-    private byte lastByteRead;
-
-    public ScanState(int size, boolean stripTrailingNewlines) {
-      this.start = 0;
-      this.pos = 0;
-      this.end = 0;
-      this.buf = new byte[size];
-      this.stripTrailingNewlines = stripTrailingNewlines;
-    }
-
-    public boolean readBytes(PushbackInputStream stream) throws IOException {
-      if (start < end) {
-        return true;
-      }
-      assert end <= buf.length : end + " > " + buf.length;
-      int bytesRead = stream.read(buf, end, buf.length - end);
-      if (bytesRead == -1) {
-        return false;
-      }
-      end += bytesRead;
-      return true;
-    }
-
-    /**
-     * Consumes characters until a separator character is found or the
-     * end of buffer is reached.
-     *
-     * <p>Updates the state to indicate the position of the separator
-     * character. If pos==len, no separator was found.
-     *
-     * @return the number of characters consumed.
-     */
-    public int consumeUntilSeparator(ByteArrayOutputStream out) {
-      for (pos = start; pos < end; ++pos) {
-        lastByteRead = buf[pos];
-        if (separatorFound()) {
-          int charsConsumed = (pos - start + 1); // The separator is consumed
-          copyToOutputBuffer(out);
-          start = pos + 1; // skip the separator
-          return charsConsumed;
-        }
-      }
-      // No separator found
-      assert pos == end;
-      int charsConsumed = (pos - start);
-      out.write(buf, start, charsConsumed);
-      start = 0;
-      end = 0;
-      pos = 0;
-      return charsConsumed;
-    }
-
-    public boolean separatorFound() {
-      return lastByteRead == '\n' || lastByteRead == '\r';
-    }
-
-    public byte lastByteRead() {
-      return buf[pos];
-    }
-
-    /**
-     * Copies data from the input buffer to the output buffer.
-     *
-     * <p>If stripTrailing==false, line-termination characters are included in the copy.
-     */
-    private void copyToOutputBuffer(ByteArrayOutputStream out) {
-      int charsCopied = pos - start;
-      if (!stripTrailingNewlines && separatorFound()) {
-        charsCopied++;
-      }
-      out.write(buf, start, charsCopied);
-    }
-
-    /**
-     * Scans the input buffer to determine if a matched carriage return
-     * has an accompanying linefeed and process the input buffer accordingly.
-     *
-     * <p>If stripTrailingNewlines==false and a linefeed character is detected,
-     * it is included in the copy.
-     *
-     * @return the number of characters consumed
-     */
-    private int copyCharIfLinefeed(ByteArrayOutputStream out, PushbackInputStream stream)
-        throws IOException {
-      int charsConsumed = 0;
-      // Check to make sure we don't go off the end of the buffer
-      if ((pos + 1) < end) {
-        if (buf[pos + 1] == '\n') {
-          charsConsumed++;
-          pos++;
-          start++;
-          if (!stripTrailingNewlines) {
-            out.write('\n');
-          }
-        }
-      } else {
-        // We are at the end of the buffer and need one more
-        // byte. Get it the slow but safe way.
-        int b = stream.read();
-        if (b == '\n') {
-          charsConsumed++;
-          if (!stripTrailingNewlines) {
-            out.write(b);
-          }
-        } else if (b != -1) {
-          // Consider replacing unread() since it may be slow if
-          // iterators are cloned frequently.
-          stream.unread(b);
-        }
-      }
-      return charsConsumed;
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
deleted file mode 100644
index 0b8c48b3cf53a..0000000000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/IOFactoryTest.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static org.hamcrest.MatcherAssert.assertThat;
-import static org.hamcrest.Matchers.containsInAnyOrder;
-
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.runners.worker.ReaderUtils;
-import com.google.cloud.dataflow.sdk.runners.worker.TextReader;
-
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.util.Collection;
-
-/**
- * Tests for IOFactory.
- */
-@RunWith(JUnit4.class)
-public class IOFactoryTest {
-  @Rule
-  public TemporaryFolder tmpFolder = new TemporaryFolder();
-
-  @Test
-  public void testLocalFileIO() throws Exception {
-    // Create some files to match against.
-    File foo1 = tmpFolder.newFile("foo1");
-    foo1.createNewFile();
-    File foo2 = tmpFolder.newFile("foo2");
-    foo2.createNewFile();
-    tmpFolder.newFile("barf").createNewFile();
-
-    FileIOChannelFactory factory = new FileIOChannelFactory();
-    Collection<String> paths = factory.match(tmpFolder.getRoot().getCanonicalPath() + "/f*");
-
-    Assert.assertEquals(2, paths.size());
-    Assert.assertTrue(paths.contains(foo1.getCanonicalPath()));
-    Assert.assertTrue(paths.contains(foo2.getCanonicalPath()));
-  }
-
-  @Test
-  public void testMultiFileRead() throws Exception {
-    File file1 = tmpFolder.newFile("file1");
-    try (FileOutputStream output = new FileOutputStream(file1)) {
-      output.write("1\n2".getBytes());
-    }
-
-    File file2 = tmpFolder.newFile("file2");
-    try (FileOutputStream output = new FileOutputStream(file2)) {
-      output.write("3\n4\n".getBytes());
-    }
-
-    File file3 = tmpFolder.newFile("file3");
-    try (FileOutputStream output = new FileOutputStream(file3)) {
-      output.write("5".getBytes());
-    }
-
-
-    TextReader<String> reader = new TextReader<>(
-        tmpFolder.getRoot() + "/file*", true /* strip newlines */, null, null, StringUtf8Coder.of(),
-        TextIO.CompressionType.UNCOMPRESSED);
-
-    assertThat(ReaderUtils.readAllFromReader(reader), containsInAnyOrder("1", "2", "3", "4", "5"));
-  }
-}

From 209364e6ac6890845c9a51253e6c97b31ad2191c Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Mon, 22 Feb 2016 12:21:09 -0800
Subject: [PATCH 1497/1541] Encode elements in InProcessCreate

There is no requirement that the elements of a Create are seralizable -
however, they must be encodeable; because applying a read will ensure
the elements are seralizable, we must ensure that all elements succeed;
do so via encoding all of the available elements into the source, and
decoding them in the reader.

Implement getEstimatedSizeBytes and splitIntoBundles, now that we know
the size of the elements in bytes.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115256859
---
 .../runners/inprocess/InProcessCreate.java    | 154 +++++++++++-------
 .../inprocess/InProcessCreateTest.java        | 126 ++++++++++++++
 2 files changed, 223 insertions(+), 57 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessCreate.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessCreate.java
index 0ff881f62d532..9023b7b2dc4bd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessCreate.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessCreate.java
@@ -18,35 +18,39 @@
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.StandardCoder;
 import com.google.cloud.dataflow.sdk.io.BoundedSource;
 import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.Create.Values;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Optional;
+import com.google.common.base.Throwables;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterators;
 import com.google.common.collect.PeekingIterator;
 
 import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Collections;
+import java.util.ArrayList;
+import java.util.Collection;
 import java.util.List;
 import java.util.NoSuchElementException;
 
+import javax.annotation.Nullable;
+
 /**
- * An in memory implementation of the {@link Values Create.Values} {@link PTransform}, implemented'
+ * An in-process implementation of the {@link Values Create.Values} {@link PTransform}, implemented
  * using a {@link BoundedSource}.
  *
  * The coder is inferred via the {@link Values#getDefaultOutputCoder(PInput)} method on the original
  * transform.
  */
-class InProcessCreate<T> extends PTransform<PInput, PCollection<T>> {
+class InProcessCreate<T> extends ForwardingPTransform<PInput, PCollection<T>> {
   private final Create.Values<T> original;
-  private final InMemorySource<T> source;
 
   public static <T> InProcessCreate<T> from(Create.Values<T> original) {
     return new InProcessCreate<>(original);
@@ -54,38 +58,88 @@ public static <T> InProcessCreate<T> from(Create.Values<T> original) {
 
   private InProcessCreate(Values<T> original) {
     this.original = original;
-    this.source = new InMemorySource<>(original.getElements());
   }
 
   @Override
   public PCollection<T> apply(PInput input) {
-    input.getPipeline().getCoderRegistry();
-    PCollection<T> result = input.getPipeline().apply(Read.from(source));
+    Coder<T> elementCoder;
     try {
-      result.setCoder(original.getDefaultOutputCoder(input));
+      elementCoder = original.getDefaultOutputCoder(input);
     } catch (CannotProvideCoderException e) {
-      throw new IllegalArgumentException("Unable to infer a coder and no Coder was specified. "
-          + "Please set a coder by invoking Create.withCoder() explicitly.", e);
+      throw new IllegalArgumentException(
+          "Unable to infer a coder and no Coder was specified. "
+          + "Please set a coder by invoking Create.withCoder() explicitly.",
+          e);
+    }
+    InMemorySource<T> source;
+    try {
+      source = new InMemorySource<>(original.getElements(), elementCoder);
+    } catch (IOException e) {
+      throw Throwables.propagate(e);
     }
+    PCollection<T> result = input.getPipeline().apply(Read.from(source));
+    result.setCoder(elementCoder);
     return result;
   }
 
-  private static class InMemorySource<T> extends BoundedSource<T> {
-    private final Iterable<T> elements;
+  @Override
+  public PTransform<PInput, PCollection<T>> delegate() {
+    return original;
+  }
 
-    public InMemorySource(Iterable<T> elements) {
-      this.elements = elements;
+  @VisibleForTesting
+  static class InMemorySource<T> extends BoundedSource<T> {
+    private final Collection<byte[]> allElementsBytes;
+    private final long totalSize;
+    private final Coder<T> coder;
+
+    public InMemorySource(Iterable<T> elements, Coder<T> elemCoder)
+        throws CoderException, IOException {
+      allElementsBytes = new ArrayList<>();
+      long totalSize = 0L;
+      for (T element : elements) {
+        byte[] bytes = CoderUtils.encodeToByteArray(elemCoder, element);
+        allElementsBytes.add(bytes);
+        totalSize += bytes.length;
+      }
+      this.totalSize = totalSize;
+      this.coder = elemCoder;
+    }
+
+    /**
+     * Create a new source with the specified bytes. The new source owns the input element bytes,
+     * which must not be modified after this constructor is called.
+     */
+    private InMemorySource(Collection<byte[]> elementBytes, long totalSize, Coder<T> coder) {
+      this.allElementsBytes = ImmutableList.copyOf(elementBytes);
+      this.totalSize = totalSize;
+      this.coder = coder;
     }
 
     @Override
     public List<? extends BoundedSource<T>> splitIntoBundles(
         long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
-      return Collections.singletonList(this);
+      ImmutableList.Builder<InMemorySource<T>> resultBuilder = ImmutableList.builder();
+      long currentSourceSize = 0L;
+      List<byte[]> currentElems = new ArrayList<>();
+      for (byte[] elemBytes : allElementsBytes) {
+        currentElems.add(elemBytes);
+        currentSourceSize += elemBytes.length;
+        if (currentSourceSize >= desiredBundleSizeBytes) {
+          resultBuilder.add(new InMemorySource<>(currentElems, currentSourceSize, coder));
+          currentElems.clear();
+          currentSourceSize = 0L;
+        }
+      }
+      if (!currentElems.isEmpty()) {
+        resultBuilder.add(new InMemorySource<>(currentElems, currentSourceSize, coder));
+      }
+      return resultBuilder.build();
     }
 
     @Override
     public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
-      return 0L;
+      return totalSize;
     }
 
     @Override
@@ -95,7 +149,7 @@ public boolean producesSortedKeys(PipelineOptions options) throws Exception {
 
     @Override
     public BoundedSource.BoundedReader<T> createReader(PipelineOptions options) throws IOException {
-      return new IterableReader();
+      return new BytesReader();
     }
 
     @Override
@@ -103,42 +157,19 @@ public void validate() {}
 
     @Override
     public Coder<T> getDefaultOutputCoder() {
-      // Return a coder that exclusively throws exceptions. The coder is set properly in apply, or
-      // an illegal argument exception is thrown.
-      return new StandardCoder<T>() {
-        @Override
-        public void encode(T value, OutputStream outStream,
-            com.google.cloud.dataflow.sdk.coders.Coder.Context context)
-            throws CoderException, IOException {
-          throw new CoderException("Default Create Coder cannot be used");
-        }
-
-        @Override
-        public T decode(
-            InputStream inStream, com.google.cloud.dataflow.sdk.coders.Coder.Context context)
-            throws CoderException, IOException {
-          throw new CoderException("Default Create Coder cannot be used");
-        }
-
-        @Override
-        public List<? extends Coder<?>> getCoderArguments() {
-          return Collections.emptyList();
-        }
-
-        @Override
-        public void verifyDeterministic()
-            throws com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException {
-          throw new NonDeterministicException(
-              this, Collections.<String>singletonList("Default Create Coder cannot be used"));
-        }
-      };
+      return coder;
     }
 
-    private class IterableReader extends BoundedReader<T> {
-      private final PeekingIterator<T> iter;
+    private class BytesReader extends BoundedReader<T> {
+      private final PeekingIterator<byte[]> iter;
+      /**
+       * Use an optional to distinguish between null next element (as Optional.absent()) and no next
+       * element (next is null).
+       */
+      @Nullable private Optional<T> next;
 
-      public IterableReader() {
-        this.iter = Iterators.peekingIterator(elements.iterator());
+      public BytesReader() {
+        this.iter = Iterators.peekingIterator(allElementsBytes.iterator());
       }
 
       @Override
@@ -148,18 +179,27 @@ public BoundedSource<T> getCurrentSource() {
 
       @Override
       public boolean start() throws IOException {
-        return iter.hasNext();
+        return advance();
       }
 
       @Override
       public boolean advance() throws IOException {
-        iter.next();
-        return iter.hasNext();
+        boolean hasNext = iter.hasNext();
+        if (hasNext) {
+          next = Optional.fromNullable(CoderUtils.decodeFromByteArray(coder, iter.next()));
+        } else {
+          next = null;
+        }
+        return hasNext;
       }
 
       @Override
+      @Nullable
       public T getCurrent() throws NoSuchElementException {
-        return iter.peek();
+        if (next == null) {
+          throw new NoSuchElementException();
+        }
+        return next.orNull();
       }
 
       @Override
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessCreateTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessCreateTest.java
index 31deb71552292..4db014e3ee261 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessCreateTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessCreateTest.java
@@ -15,13 +15,31 @@
  */
 package com.google.cloud.dataflow.sdk.runners.inprocess;
 
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.hasSize;
+import static org.hamcrest.Matchers.is;
+import static org.junit.Assert.assertThat;
 import static org.junit.Assert.fail;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.NullableCoder;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.io.BoundedSource;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessCreate.InMemorySource;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.SourceTestUtils;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.util.SerializableUtils;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.collect.ImmutableList;
 
 import org.hamcrest.Matchers;
 import org.junit.Rule;
@@ -30,7 +48,12 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.io.Serializable;
+import java.util.Collections;
+import java.util.List;
 
 /**
  * Tests for {@link InProcessCreate}.
@@ -50,6 +73,19 @@ public void testConvertsCreate() {
     DataflowAssert.that(p.apply(converted)).containsInAnyOrder(2, 1, 3);
   }
 
+  @Test
+  public void testConvertsCreateWithNullElements() {
+    Create.Values<String> og =
+        Create.<String>of("foo", null, "spam", "ham", null, "eggs")
+            .withCoder(NullableCoder.of(StringUtf8Coder.of()));
+
+    InProcessCreate<String> converted = InProcessCreate.from(og);
+    TestPipeline p = TestPipeline.create();
+
+    DataflowAssert.that(p.apply(converted))
+        .containsInAnyOrder(null, "foo", null, "spam", "ham", "eggs");
+  }
+
   static class Record implements Serializable {}
 
   static class Record2 extends Record {}
@@ -70,4 +106,94 @@ public void testThrowsIllegalArgumentWhenCannotInferCoder() {
 
     fail("Unexpectedly Inferred Coder " + c.getCoder());
   }
+
+  /**
+   * An unserializable class to demonstrate encoding of elements.
+   */
+  private static class UnserializableRecord {
+    private final String myString;
+
+    private UnserializableRecord(String myString) {
+      this.myString = myString;
+    }
+
+    @Override
+    public int hashCode() {
+      return myString.hashCode();
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      return myString.equals(((UnserializableRecord) o).myString);
+    }
+
+    static class UnserializableRecordCoder extends StandardCoder<UnserializableRecord> {
+      private final Coder<String> stringCoder = StringUtf8Coder.of();
+
+      @Override
+      public void encode(
+          UnserializableRecord value,
+          OutputStream outStream,
+          com.google.cloud.dataflow.sdk.coders.Coder.Context context)
+          throws CoderException, IOException {
+        stringCoder.encode(value.myString, outStream, context.nested());
+      }
+
+      @Override
+      public UnserializableRecord decode(
+          InputStream inStream, com.google.cloud.dataflow.sdk.coders.Coder.Context context)
+          throws CoderException, IOException {
+        return new UnserializableRecord(stringCoder.decode(inStream, context.nested()));
+      }
+
+      @Override
+      public List<? extends Coder<?>> getCoderArguments() {
+        return Collections.emptyList();
+      }
+
+      @Override
+      public void verifyDeterministic() throws Coder.NonDeterministicException {
+        stringCoder.verifyDeterministic();
+      }
+    }
+  }
+
+  @Test
+  public void testSerializableOnUnserializableElements() throws Exception {
+    List<UnserializableRecord> elements =
+        ImmutableList.of(
+            new UnserializableRecord("foo"),
+            new UnserializableRecord("bar"),
+            new UnserializableRecord("baz"));
+    InMemorySource<UnserializableRecord> source =
+        new InMemorySource<>(elements, new UnserializableRecord.UnserializableRecordCoder());
+    SerializableUtils.ensureSerializable(source);
+  }
+
+  @Test
+  public void testSplitIntoBundles() throws Exception {
+    InProcessCreate.InMemorySource<Integer> source =
+        new InMemorySource<>(ImmutableList.of(1, 2, 3, 4, 5, 6, 7, 8), BigEndianIntegerCoder.of());
+    PipelineOptions options = PipelineOptionsFactory.create();
+    List<? extends BoundedSource<Integer>> splitSources = source.splitIntoBundles(12, options);
+    assertThat(splitSources, hasSize(3));
+    SourceTestUtils.assertSourcesEqualReferenceSource(source, splitSources, options);
+  }
+
+  @Test
+  public void testDoesNotProduceSortedKeys() throws Exception {
+    InProcessCreate.InMemorySource<String> source =
+        new InMemorySource<>(ImmutableList.of("spam", "ham", "eggs"), StringUtf8Coder.of());
+    assertThat(source.producesSortedKeys(PipelineOptionsFactory.create()), is(false));
+  }
+
+  @Test
+  public void testGetDefaultOutputCoderReturnsConstructorCoder() throws Exception {
+    Coder<Integer> coder = VarIntCoder.of();
+    InProcessCreate.InMemorySource<Integer> source =
+        new InMemorySource<>(ImmutableList.of(1, 2, 3, 4, 5, 6, 7, 8), coder);
+
+    Coder<Integer> defaultCoder = source.getDefaultOutputCoder();
+    assertThat(defaultCoder, equalTo(coder));
+  }
 }

From c857afaf1911175a6d907f53167e39e9b639c665 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 22 Feb 2016 13:49:38 -0800
Subject: [PATCH 1498/1541] Expose base output file name on FileBasedSink

----Release Notes----
Add the ability to get the base output filename to FileBasedSinks.

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115265994
---
 .../google/cloud/dataflow/sdk/io/FileBasedSink.java   | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
index f14f4bf7a00bc..7c301679caf40 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
@@ -120,6 +120,13 @@ public FileBasedSink(String baseOutputFilename, String extension, String fileNam
     this.fileNamingTemplate = fileNamingTemplate;
   }
 
+  /**
+   * Returns the base output filename for this file based sink.
+   */
+  public String getBaseOutputFilename() {
+    return baseOutputFilename;
+  }
+
   /**
    * Perform pipeline-construction-time validation. The default implementation is a no-op.
    * Subclasses should override to ensure the sink is valid and can be written to. It is recommended
@@ -806,6 +813,7 @@ public void flush() throws IOException {
   }
 
   static class ReshardForWrite<T> extends PTransform<PCollection<T>, PCollection<T>> {
+    @Override
     public PCollection<T> apply(PCollection<T> input) {
       return input
           // TODO: This would need to be adapted to write per-window shards.
@@ -815,10 +823,12 @@ public PCollection<T> apply(PCollection<T> input) {
           .apply("RandomKey", ParDo.of(
               new DoFn<T, KV<Long, T>>() {
                 transient long counter, step;
+                @Override
                 public void startBundle(Context c) {
                   counter = (long) (Math.random() * Long.MAX_VALUE);
                   step = 1 + 2 * (long) (Math.random() * Long.MAX_VALUE);
                 }
+                @Override
                 public void processElement(ProcessContext c) {
                   counter += step;
                   c.output(KV.of(counter, c.element()));
@@ -827,6 +837,7 @@ public void processElement(ProcessContext c) {
           .apply(GroupByKey.<Long, T>create())
           .apply("Ungroup", ParDo.of(
               new DoFn<KV<Long, Iterable<T>>, T>() {
+                @Override
                 public void processElement(ProcessContext c) {
                   for (T item : c.element().getValue()) {
                     c.output(item);

From 7ff52a0e184f984254bd9ad4dd71a9f67193e593 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 22 Feb 2016 14:33:36 -0800
Subject: [PATCH 1499/1541] Move over Google Cloud Dataflow worker utilities to
 worker module

Note that CopyableSeekableByteChannel and LazyMultiReaderIterator
were deleted and not moved because they were only used by TextReader
which has now been removed as well.

This is for Apache Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115271204
---
 .../worker/CopyableSeekableByteChannel.java   | 270 --------------
 .../worker/LazyMultiReaderIterator.java       |  87 -----
 .../sdk/runners/worker/ReaderUtils.java       |  74 ----
 .../util/common/worker/ElementCounter.java    |  32 --
 .../sdk/util/common/worker/Operation.java     | 182 ----------
 .../util/common/worker/OutputReceiver.java    |  74 ----
 .../util/common/worker/ProgressTracker.java   |  38 --
 .../common/worker/ProgressTrackerGroup.java   |  71 ----
 .../sdk/util/common/worker/ReadOperation.java | 333 ------------------
 .../sdk/util/common/worker/Receiver.java      |  27 --
 10 files changed, 1188 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannel.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderUtils.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ElementCounter.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiver.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTracker.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackerGroup.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Receiver.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannel.java
deleted file mode 100644
index 55d25b9444cab..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/CopyableSeekableByteChannel.java
+++ /dev/null
@@ -1,270 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.api.client.util.Preconditions.checkNotNull;
-import static com.google.api.client.util.Preconditions.checkState;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.channels.ClosedChannelException;
-import java.nio.channels.SeekableByteChannel;
-
-import javax.annotation.concurrent.GuardedBy;
-
-/**
- * A {@link SeekableByteChannel} that adds copy semantics.
- *
- * <p>This implementation uses a lock to ensure that only one thread accesses
- * the underlying {@code SeekableByteChannel} at any given time.
- *
- * <p>{@link SeekableByteChannel#close} is called on the underlying channel once
- * all {@code CopyableSeekableByteChannel} objects copied from the initial
- * {@code CopyableSeekableByteChannel} are closed.
- *
- * <p>The implementation keeps track of the position of each
- * {@code CopyableSeekableByteChannel}; on access, it synchronizes with the
- * other {@code CopyableSeekableByteChannel} instances accessing the underlying
- * channel, seeks to its own position, performs the operation, updates its local
- * position, and returns the result.
- */
-final class CopyableSeekableByteChannel implements SeekableByteChannel {
-  /** This particular stream's position in the base stream. */
-  private long pos;
-
-  /**
-   * The synchronization object keeping track of the base
-   * {@link SeekableByteChannel}, its reference count, and its current position.
-   * This also doubles as the lock shared by all
-   * {@link CopyableSeekableByteChannel} instances derived from some original
-   * instance.
-   */
-  private final Sync sync;
-
-  /**
-   * Indicates whether this {@link CopyableSeekableByteChannel} is closed.
-   *
-   * <p>Invariant: Unclosed channels own a reference to the base channel,
-   * allowing us to make {@link #close} idempotent.
-   *
-   * <p>This is only modified under the sync lock.
-   */
-  private boolean closed;
-
-  /**
-   * Constructs a new {@link CopyableSeekableByteChannel}.  The supplied base
-   * channel will be closed when this channel and all derived channels are
-   * closed.
-   */
-  public CopyableSeekableByteChannel(SeekableByteChannel base) throws IOException {
-    this(new Sync(base), 0);
-
-    // Update the position to match the original stream's position.
-    //
-    // This doesn't actually need to be synchronized, but it's a little more
-    // obviously correct to always access sync.position while holding sync's
-    // internal monitor.
-    synchronized (sync) {
-      sync.position = base.position();
-      pos = sync.position;
-    }
-  }
-
-  /**
-   * The internal constructor used when deriving a new
-   * {@link CopyableSeekableByteChannel}.
-   *
-   * <p>N.B. This signature is deliberately incompatible with the public
-   * constructor.
-   *
-   * <p>Ordinarily, one would implement copy using a copy constructor, and pass
-   * the object being copied -- but that signature would be compatible with the
-   * public constructor creating a new set of
-   * {@code CopyableSeekableByteChannel} objects for some base channel.  The
-   * copy constructor would still be the one called, since its type is more
-   * specific, but that's fragile; it'd be easy to tweak the signature of the
-   * constructor used for copies without changing callers, which would silently
-   * fall back to using the public constructor.  So instead, we're careful to
-   * give this internal constructor its own unique signature.
-   */
-  private CopyableSeekableByteChannel(Sync sync, long pos) {
-    this.sync = checkNotNull(sync);
-    checkState(sync.base.isOpen(),
-        "the base SeekableByteChannel is not open");
-    synchronized (sync) {
-      sync.refCount++;
-    }
-    this.pos = pos;
-    this.closed = false;
-  }
-
-  /**
-   * Creates a new {@link CopyableSeekableByteChannel} derived from an existing
-   * channel, referencing the same base channel.
-   */
-  public CopyableSeekableByteChannel copy() throws IOException {
-    synchronized (sync) {
-      if (closed) {
-        throw new ClosedChannelException();
-      }
-      return new CopyableSeekableByteChannel(sync, pos);
-    }
-  }
-
-  // SeekableByteChannel implementation
-
-  @Override
-  public long position() throws IOException {
-    synchronized (sync) {
-      if (closed) {
-        throw new ClosedChannelException();
-      }
-      return pos;
-    }
-  }
-
-  @Override
-  public CopyableSeekableByteChannel position(long newPosition)
-      throws IOException {
-    synchronized (sync) {
-      if (closed) {
-        throw new ClosedChannelException();
-      }
-      // Verify that the position is valid for the base channel.
-      sync.base.position(newPosition);
-      this.pos = newPosition;
-      this.sync.position = newPosition;
-    }
-    return this;
-  }
-
-  @Override
-  public int read(ByteBuffer dst) throws IOException {
-    synchronized (sync) {
-      if (closed) {
-        throw new ClosedChannelException();
-      }
-      reposition();
-      int bytesRead = sync.base.read(dst);
-      notePositionAdded(bytesRead);
-      return bytesRead;
-    }
-  }
-
-  @Override
-  public long size() throws IOException {
-    synchronized (sync) {
-      if (closed) {
-        throw new ClosedChannelException();
-      }
-      return sync.base.size();
-    }
-  }
-
-  @Override
-  public CopyableSeekableByteChannel truncate(long size) throws IOException {
-    synchronized (sync) {
-      if (closed) {
-        throw new ClosedChannelException();
-      }
-      sync.base.truncate(size);
-      return this;
-    }
-  }
-
-  @Override
-  public int write(ByteBuffer src) throws IOException {
-    synchronized (sync) {
-      if (closed) {
-        throw new ClosedChannelException();
-      }
-      reposition();
-      int bytesWritten = sync.base.write(src);
-      notePositionAdded(bytesWritten);
-      return bytesWritten;
-    }
-  }
-
-  @Override
-  public boolean isOpen() {
-    synchronized (sync) {
-      if (closed) {
-        return false;
-      }
-      return sync.base.isOpen();
-    }
-  }
-
-  @Override
-  public void close() throws IOException {
-    synchronized (sync) {
-      if (closed) {
-        return;
-      }
-      closed = true;
-      sync.refCount--;
-      if (sync.refCount == 0) {
-        sync.base.close();
-      }
-    }
-  }
-
-  /**
-   * Updates the base stream's position to match the position required by this
-   * {@link CopyableSeekableByteChannel}.
-   */
-  @GuardedBy("sync")
-  private void reposition() throws IOException {
-    if (pos != sync.position) {
-      sync.base.position(pos);
-      sync.position = pos;
-    }
-  }
-
-  /**
-   * Notes that the specified amount has been logically added to the current
-   * stream's position.
-   */
-  @GuardedBy("sync")
-  private void notePositionAdded(int amount) {
-    if (amount < 0) {
-      return;  // Handles EOF indicators.
-    }
-    pos += amount;
-    sync.position += amount;
-  }
-
-  /**
-   * A simple value type used to synchronize a set of
-   * {@link CopyableSeekableByteChannel} instances referencing a single
-   * underlying channel.
-   */
-  private static final class Sync {
-    // N.B. Another way to do this would be to implement something like a
-    // RefcountingForwardingSeekableByteChannel.  Doing so would have the
-    // advantage of clearly isolating the mutable state, at the cost of a lot
-    // more code.
-    public final SeekableByteChannel base;
-    @GuardedBy("this") public long refCount = 0;
-    @GuardedBy("this") public long position = 0;
-
-    public Sync(SeekableByteChannel base) throws IOException {
-      this.base = checkNotNull(base);
-      position = base.position();
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
deleted file mode 100644
index 4a35fa6f709b5..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/LazyMultiReaderIterator.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.NoSuchElementException;
-
-/**
- * Implements a ReaderIterator over a collection of inputs.
- *
- * <p>The sources are used sequentially, each consumed entirely before moving
- * to the next source.
- *
- * <p>The input is lazily constructed by using the abstract method {@code open}
- * to create a source iterator for inputs on demand.  This allows the resources
- * to be produced lazily, as an open source iterator may consume process
- * resources such as file descriptors.
- */
-abstract class LazyMultiReaderIterator<T> extends NativeReader.NativeReaderIterator<T> {
-  private final Iterator<String> inputs;
-  private NativeReader.NativeReaderIterator<T> current;
-
-  public LazyMultiReaderIterator(Iterator<String> inputs) {
-    this.inputs = inputs;
-  }
-
-  @Override
-  public boolean start() throws IOException {
-    return advance();
-  }
-
-  @Override
-  public boolean advance() throws IOException {
-    boolean currentStarted = true;
-    while (true) {
-      // Try moving through the current reader
-      if (current != null) {
-        if (currentStarted ? current.advance() : current.start()) {
-          return true;
-        }
-        current.close();
-        current = null;
-      }
-      // Current reader is done - move on to the next one.
-      if (!inputs.hasNext()) {
-        return false;
-      }
-      current = open(inputs.next());
-      currentStarted = false;
-    }
-  }
-
-  @Override
-  public T getCurrent() throws NoSuchElementException {
-    if (current == null) {
-      throw new NoSuchElementException();
-    }
-    return current.getCurrent();
-  }
-
-  @Override
-  public void close() throws IOException {
-    if (current != null) {
-      current.close();
-      current = null;
-    }
-  }
-
-  protected abstract NativeReader.NativeReaderIterator<T> open(String input) throws IOException;
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderUtils.java
deleted file mode 100644
index 3e065f40ddd39..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/ReaderUtils.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Utilities for working with {@link NativeReader} objects.
- */
-public class ReaderUtils {
-  /**
-   * Creates a {@link NativeReader.NativeReaderIterator} from the given {@link NativeReader} and
-   * reads it to the end.
-   *
-   * @param reader {@link NativeReader} to read from
-   */
-  public static <T> List<T> readAllFromReader(NativeReader<T> reader) throws IOException {
-    try (NativeReader.NativeReaderIterator<T> iterator = reader.iterator()) {
-      return readRemainingFromIterator(iterator, false);
-    }
-  }
-
-  /**
-   * Read elements from a {@link NativeReader.NativeReaderIterator} until either the reader is
-   * exhausted, or {@code n} elements are read. Specifying {@code n == Integer.MAX_VALUE} means
-   * read all remaining elements.
-   */
-  public static <T> List<T> readNItemsFromIterator(
-      NativeReader.NativeReaderIterator<T> reader, int n, boolean started) throws IOException {
-    List<T> res = new ArrayList<>();
-    for (long i = 0; n == Integer.MAX_VALUE || i < n; i++) {
-      if (!((i == 0 && !started) ? reader.start() : reader.advance())) {
-        break;
-      }
-      res.add(reader.getCurrent());
-    }
-    return res;
-  }
-
-  /**
-   * Read elements from a {@link NativeReader.NativeReaderIterator} until either the reader is
-   * exhausted, or n elements are read. Specifying {@code n == Integer.MAX_VALUE} means
-   * read all remaining elements.
-   */
-  public static <T> List<T> readNItemsFromUnstartedIterator(
-      NativeReader.NativeReaderIterator<T> reader, int n) throws IOException {
-    return readNItemsFromIterator(reader, n, false);
-  }
-
-  /**
-   * Read elements from a {@link NativeReader.NativeReaderIterator} until the reader is exhausted.
-   */
-  public static <T> List<T> readRemainingFromIterator(
-      NativeReader.NativeReaderIterator<T> reader, boolean started) throws IOException {
-    return readNItemsFromIterator(reader, Integer.MAX_VALUE, started);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ElementCounter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ElementCounter.java
deleted file mode 100644
index eb90b06ff6f27..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ElementCounter.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-/**
- * Abstract interface that counts elements processed.
- */
-public interface ElementCounter {
-  /**
-   * Updates output counters.
-   */
-  public void update(Object elem) throws Exception;
-
-  /**
-   * Finishes output counters lazy updates.
-   */
-  public void finishLazyUpdate(Object elem);
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
deleted file mode 100644
index df7450cc4513c..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Operation.java
+++ /dev/null
@@ -1,182 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-
-/**
- * The abstract base class for Operations, which correspond to
- * Instructions in the original MapTask InstructionGraph.
- *
- * <p>Call start() to start the operation.
- *
- * <p>A read operation's start() method actually reads the data, and in
- * effect runs the pipeline.
- *
- * <p>Call finish() to finish the operation.
- *
- * <p>Since both start() and finish() may call process() on
- * this operation's consumers, start an operation after
- * starting its consumers, and finish an operation before
- * finishing its consumers.
- */
-public abstract class Operation {
-  /** The name of this operation. */
-  public final String operationName;
-
-  /**
-   * The array of consuming receivers, one per operation output
-   * "port" (e.g., DoFn main or side output).  A receiver might be
-   * null if that output isn't being consumed.
-   */
-  public final OutputReceiver[] receivers;
-
-  /**
-   * The possible initialization states of an Operation.
-   * For internal self-checking purposes.
-   */
-  public enum InitializationState {
-    // start() hasn't yet been called.
-    UNSTARTED,
-
-    // start() has been called, but finish() hasn't yet been called.
-    STARTED,
-
-    // finish() has been called.
-    FINISHED
-  }
-
-  /**
-   * The initialization state of this Operation.
-   *
-   * <p>Written from one thread, but can be read by concurrent threads.
-   */
-  public InitializationState initializationState =
-      InitializationState.UNSTARTED;
-
-  /**
-   * The lock protecting the initialization state.
-   *
-   * <p>Subclasses can use this lock to protect their own state.
-   * However, this lock should be held only for short, bounded
-   * amounts of time.
-   */
-  protected final Object initializationStateLock = new Object();
-
-  protected final StateSampler stateSampler;
-
-  protected final int startState;
-  protected final int processState;
-  protected final int finishState;
-
-  public Operation(String operationName,
-                   OutputReceiver[] receivers,
-                   String counterPrefix,
-                   CounterSet.AddCounterMutator addCounterMutator,
-                   StateSampler stateSampler,
-                   StateSampler.StateKind stateKind) {
-    this.operationName = operationName;
-    this.receivers = receivers;
-    this.stateSampler = stateSampler;
-    startState = stateSampler.stateForName(operationName + "-start", stateKind);
-    processState = stateSampler.stateForName(operationName + "-process", stateKind);
-    finishState = stateSampler.stateForName(operationName + "-finish", stateKind);
-  }
-
-  /**
-   * Constructs an operation in the USER state kind.
-   */
-  public Operation(String operationName,
-                   OutputReceiver[] receivers,
-                   String counterPrefix,
-                   CounterSet.AddCounterMutator addCounterMutator,
-                   StateSampler stateSampler) {
-    this(operationName, receivers, counterPrefix, addCounterMutator,
-        stateSampler, StateSampler.StateKind.USER);
-  }
-
-  /**
-   * Checks that this operation is not yet started, throwing an
-   * exception otherwise.
-   */
-  void checkUnstarted() {
-    if (!(initializationState == InitializationState.UNSTARTED
-          || (initializationState == InitializationState.FINISHED
-              && supportsRestart()))) {
-      throw new AssertionError(
-          "expecting this instruction to not yet be started");
-    }
-  }
-
-  /**
-   * Checks that this operation has been started but not yet finished,
-   * throwing an exception otherwise.
-   */
-  void checkStarted() {
-    if (initializationState != InitializationState.STARTED) {
-      throw new AssertionError(
-          "expecting this instruction to be started");
-    }
-  }
-
-  /**
-   * Checks that this operation has been finished, throwing an
-   * exception otherwise.
-   */
-  void checkFinished() {
-    if (initializationState != InitializationState.FINISHED) {
-      throw new AssertionError(
-          "expecting this instruction to be finished");
-    }
-  }
-
-  /**
-   * Returns true if this Operation has been finished.
-   */
-  boolean isFinished() {
-    return (initializationState == InitializationState.FINISHED);
-  }
-
-  /**
-   * Starts this Operation's execution.  Called after all successsor
-   * consuming operations have been started.
-   */
-  public void start() throws Exception {
-    synchronized (initializationStateLock) {
-      checkUnstarted();
-      initializationState = InitializationState.STARTED;
-    }
-  }
-
-  /**
-   * Finishes this Operation's execution.  Called after all
-   * predecessor producing operations have been finished.
-   */
-  public void finish() throws Exception {
-    synchronized (initializationStateLock) {
-      checkStarted();
-      initializationState = InitializationState.FINISHED;
-    }
-  }
-
-  /**
-   * Returns true if this Operation can be started again after it is finished.
-   */
-  public boolean supportsRestart() {
-    return false;
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiver.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiver.java
deleted file mode 100644
index 12c77f75714a5..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/OutputReceiver.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Receiver that forwards each input it receives to each of a list of
- * output Receivers. Additionally, it invokes output counters who track size
- * information for elements passing through.
- */
-public class OutputReceiver implements Receiver {
-  private final List<Receiver> outputs = new ArrayList<>();
-  private final List<ElementCounter> outputCounters = new ArrayList<>();
-
-  /**
-   * Adds a new receiver that this OutputReceiver forwards to.
-   */
-  public void addOutput(Receiver receiver) {
-    outputs.add(receiver);
-  }
-
-  public void addOutputCounter(ElementCounter outputCounter) {
-    outputCounters.add(outputCounter);
-  }
-
-
-  @Override
-  public void process(Object elem) throws Exception {
-    for (ElementCounter counter : outputCounters) {
-      counter.update(elem);
-    }
-
-    // Fan-out.
-    for (Receiver out : outputs) {
-      if (out != null) {
-        out.process(elem);
-      }
-    }
-
-    for (ElementCounter counter : outputCounters) {
-      counter.finishLazyUpdate(elem);
-    }
-  }
-
-  /** Invoked by tests only. */
-  public int getReceiverCount() {
-    return outputs.size();
-  }
-
-  /** Invoked by tests only. */
-  public Receiver getOnlyReceiver() {
-    if (outputs.size() != 1) {
-      throw new AssertionError("only one receiver expected");
-    }
-
-    return outputs.get(0);
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTracker.java
deleted file mode 100644
index b88edebd62f0b..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTracker.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-/**
- * Provides an interface to an object capable of tracking progress through a
- * collection of elements to be processed.
- *
- * @param <T> the type of elements being tracked
- */
-public interface ProgressTracker<T> {
-  /**
-   * Copies this {@link ProgressTracker}.  The copied tracker will maintain its
-   * own independent notion of the caller's progress through the collection of
-   * elements being processed.
-   */
-  public ProgressTracker<T> copy();
-
-  /**
-   * Reports an element to this {@link ProgressTracker}, as the element is about
-   * to be processed.
-   */
-  public void saw(T element);
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackerGroup.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackerGroup.java
deleted file mode 100644
index 77e05c603043e..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ProgressTrackerGroup.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-/**
- * Implements a group of linked
- * {@link ProgressTracker ProgressTrackers} that
- * collectively track how far a processing loop has gotten through the elements
- * it's processing.  Individual {@code ProgressTracker} instances may be copied,
- * capturing an independent view of the progress of the system; this turns out
- * to be useful for some non-trivial processing loops.  The furthest point
- * reached by any {@code ProgressTracker} is the one reported.
- *
- * <p>This class is abstract.  Its single extension point is
- * {@link #report}, which should be overriden to provide a function that
- * handles the reporting of the supplied element, as appropriate.
- *
- * @param <T> the type of elements being tracked
- */
-public abstract class ProgressTrackerGroup<T> {
-  // TODO: Instead of an abstract class, strongly consider adding an
-  // interface like Receiver to the SDK, so that this class can be final and all
-  // that good stuff.
-  private long nextIndexToReport = 0;
-
-  public ProgressTrackerGroup() {}
-
-  public final ProgressTracker<T> start() {
-    return new Tracker(0);
-  }
-
-  /** Reports the indicated element. */
-  protected abstract void report(T element);
-
-  private final class Tracker implements ProgressTracker<T> {
-    private long nextElementIndex;
-
-    private Tracker(long nextElementIndex) {
-      this.nextElementIndex = nextElementIndex;
-    }
-
-    @Override
-    public ProgressTracker<T> copy() {
-      return new Tracker(nextElementIndex);
-    }
-
-    @Override
-    public void saw(T element) {
-      long thisElementIndex = nextElementIndex;
-      nextElementIndex++;
-      if (thisElementIndex == nextIndexToReport) {
-        nextIndexToReport = nextElementIndex;
-        report(element);
-      }
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
deleted file mode 100644
index c46e2a3669349..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/ReadOperation.java
+++ /dev/null
@@ -1,333 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.SUM;
-
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.Observable;
-import java.util.Observer;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicReference;
-
-/**
- * A read operation.
- *
- * <p>Its start() method iterates through all elements of the source
- * and emits them on its output.
- */
-public class ReadOperation extends Operation {
-  private static final Logger LOG = LoggerFactory.getLogger(ReadOperation.class);
-
-  // This is the rate at which the local, threadsafe progress variable is updated from the iterator,
-  // not the rate of reporting.
-  public static final long DEFAULT_PROGRESS_UPDATE_PERIOD_MS = 100;
-
-  /**
-   * For the reader parallelism counters, large enough values should be sufficient, and there
-   * are issues with arbitrarily large values.
-   *
-   * Specifically, When reporting parallelism as a part of a sum, we want to cap it at a value that
-   * won't impose an artifical constraint on the services view of available parallelism, but small
-   * enough that that adding and subtracting this value for every bundle will not overwhelm values
-   * as small as 1.0.
-   */
-  @VisibleForTesting
-  public static final double LARGE_PARALLELISM_BOUND = 1e7;
-
-  /** The Reader this operation reads from. */
-  public final NativeReader<?> reader;
-
-  /** The total byte counter for all data read by this operation. */
-  final Counter<Long> byteCount;
-
-  /** The counter for estimating total parallelism in this task. */
-  private final Counter<Double> totalParallelismCounter;
-
-  /** The counter for estimating remaining parallelism in this task. */
-  private final Counter<Double> remainingParallelismCounter;
-
-  /**
-   * The Reader's iterator this operation reads from, created by start().
-   *
-   * Guarded by {@link Operation#initializationStateLock}.
-   */
-  volatile NativeReader.NativeReaderIterator<?> readerIterator = null;
-
-  /**
-   * A cache of {@link #readerIterator}'s progress updated inside the read loop
-   * at a bounded rate.
-   *
-   * <p>Necessary so that ReadOperation.getProgress() can return immediately, rather than
-   * potentially wait for a read to complete (which can take an unbounded time, delay a worker
-   * progress update, and cause lease expiration and all sorts of trouble).
-   */
-  private AtomicReference<NativeReader.Progress> progress = new AtomicReference<>();
-
-  /**
-   * On every iteration of the read loop, "progress" is fetched from
-   * {@link #readerIterator} if requested.
-   */
-  private long progressUpdatePeriodMs = DEFAULT_PROGRESS_UPDATE_PERIOD_MS;
-
-  /**
-   * Signals whether the next iteration of the read loop should update the progress.
-   *
-   * <p>Set to true every progressUpdatePeriodMs.
-   */
-  private AtomicBoolean isProgressUpdateRequested = new AtomicBoolean(true);
-
-
-  public ReadOperation(
-      String operationName,
-      NativeReader<?> reader,
-      OutputReceiver[] receivers,
-      String counterPrefix,
-      String systemStageName,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler) {
-    super(operationName, receivers, counterPrefix, addCounterMutator,
-          stateSampler, reader.getStateSamplerStateKind());
-    this.reader = reader;
-    this.byteCount = addCounterMutator.addCounter(
-        Counter.longs(bytesCounterName(counterPrefix, operationName), SUM));
-    reader.addObserver(new ReaderObserver());
-    reader.setStateSamplerAndOperationName(stateSampler, operationName);
-    this.totalParallelismCounter = addCounterMutator.addCounter(
-        Counter.doubles(totalParallelismCounterName(systemStageName), SUM));
-    // Set only when a task is started or split.
-    totalParallelismCounter.resetToValue(boundParallelism(reader.getTotalParallelism()));
-    this.remainingParallelismCounter = addCounterMutator.addCounter(
-        Counter.doubles(remainingParallelismCounterName(systemStageName), SUM));
-  }
-
-  static ReadOperation forTest(
-      NativeReader<?> reader,
-      OutputReceiver outputReceiver,
-      String counterPrefix,
-      CounterSet.AddCounterMutator addCounterMutator,
-      StateSampler stateSampler) {
-    return new ReadOperation("ReadOperation", reader, new OutputReceiver[]{outputReceiver},
-        counterPrefix, "systemStageName", addCounterMutator, stateSampler);
-  }
-
-  public static final long DONT_UPDATE_PERIODICALLY = -1;
-  public static final long UPDATE_ON_EACH_ITERATION = 0;
-
-  /**
-   * Controls the frequency at which progress is updated.  The given value must
-   * be positive or one of the special values of DONT_UPDATE_PERIODICALLY or
-   * UPDATE_ON_EACH_ITERATION.
-   */
-  public void setProgressUpdatePeriodMs(long millis) {
-    assert millis > 0 || millis == DONT_UPDATE_PERIODICALLY || millis == UPDATE_ON_EACH_ITERATION;
-    progressUpdatePeriodMs = millis;
-  }
-
-  protected String bytesCounterName(String counterPrefix, String operationName) {
-    return operationName + "-ByteCount";
-  }
-
-  protected String totalParallelismCounterName(String systemStageName) {
-    return "dataflow_total_parallelism-" + systemStageName;
-  }
-
-  protected String remainingParallelismCounterName(String systemStageName) {
-    return "dataflow_remaining_parallelism-" + systemStageName;
-  }
-
-  public NativeReader<?> getReader() {
-    return reader;
-  }
-
-  @Override
-  public void start() throws Exception {
-    try (StateSampler.ScopedState start = stateSampler.scopedState(startState)) {
-      assert start != null;
-      super.start();
-      runReadLoop();
-    }
-  }
-
-  @Override
-  public boolean supportsRestart() {
-    return reader.supportsRestart();
-  }
-
-  protected void runReadLoop() throws Exception {
-    Receiver receiver = receivers[0];
-    if (receiver == null) {
-      // No consumer of this data; don't do anything.
-      return;
-    }
-
-    try (StateSampler.ScopedState process = stateSampler.scopedState(processState)) {
-      assert process != null;
-      {
-        // Call reader.iterator() outside the lock, because it can take an
-        // unbounded amount of time.
-        NativeReader.NativeReaderIterator<?> iterator = reader.iterator();
-        synchronized (initializationStateLock) {
-          readerIterator = iterator;
-        }
-      }
-
-      // TODO: Consider using the ExecutorService from PipelineOptions instead.
-      Thread updateRequester = null;
-      if (progressUpdatePeriodMs > 0) {
-        updateRequester = new Thread() {
-          @Override
-          public void run() {
-            while (true) {
-              isProgressUpdateRequested.set(true);
-              try {
-                Thread.sleep(progressUpdatePeriodMs);
-              } catch (InterruptedException e) {
-                break;
-              }
-            }
-          }
-        };
-        updateRequester.start();
-      }
-
-      try {
-        // Force a progress update at the beginning and at the end.
-        setProgressFromIterator();
-        for (boolean more = readerIterator.start(); more; more = readerIterator.advance()) {
-          if (isProgressUpdateRequested.getAndSet(false) ||
-              progressUpdatePeriodMs == UPDATE_ON_EACH_ITERATION) {
-            setProgressFromIterator();
-          }
-          receiver.process(readerIterator.getCurrent());
-        }
-        setProgressFromIterator();
-      } finally {
-        if (updateRequester != null) {
-          updateRequester.interrupt();
-          updateRequester.join();
-        }
-      }
-    }
-  }
-
-  @Override
-  public void finish() throws Exception {
-    // Mark operation finished before closing the reader, so that anybody who checks if
-    // it's finished (e.g. requestDynamicSplit) won't use a closed reader.
-    super.finish();
-    readerIterator.close();
-  }
-
-  private void setProgressFromIterator() {
-    try {
-      progress.set(readerIterator.getProgress());
-      remainingParallelismCounter.resetToValue(
-          boundParallelism(readerIterator.getRemainingParallelism()));
-    } catch (UnsupportedOperationException e) {
-      // Ignore: same semantics as null.
-    } catch (Exception e) {
-      // This is not a normal situation, but should not kill the task.
-      LOG.warn("Progress estimation failed", e);
-    }
-  }
-
-  /**
-   * Returns a (possibly slightly stale) value of the progress of the task.
-   * Guaranteed to not block indefinitely. Needs to be thread-safe for sources
-   * which support dynamic work rebalancing.
-   *
-   * @return the task progress, or {@code null} if the source iterator has not
-   * been initialized
-   */
-  public NativeReader.Progress getProgress() {
-    return progress.get();
-  }
-
-  /**
-   * Relays the split request to {@code ReaderIterator}.
-   */
-  public NativeReader.DynamicSplitResult requestDynamicSplit(
-      NativeReader.DynamicSplitRequest splitRequest) {
-    synchronized (initializationStateLock) {
-      if (isFinished()) {
-        LOG.info("Iterator is in the Finished state, returning null stop position.");
-        return null;
-      }
-      if (readerIterator == null) {
-        LOG.info("Iterator has not been initialized, refusing to split at {}", splitRequest);
-        return null;
-      }
-      NativeReader.DynamicSplitResult result = readerIterator.requestDynamicSplit(splitRequest);
-      if (result != null) {
-        // After a successful split, the stop position changed and progress has to be recomputed.
-        setProgressFromIterator();
-        totalParallelismCounter.resetToValue(boundParallelism(reader.getTotalParallelism()));
-      }
-      return result;
-    }
-  }
-
-  /**
-   * This is an observer on the instance of the source. Whenever source reads
-   * an element, update() gets called with the byte size of the element, which
-   * gets added up into the ReadOperation's byte counter.
-   *
-   * <p>Note that when the reader is a {@link GroupingShuffleReader}, update()
-   * is called for each underlying {@link ShuffleEntry} being read, with the
-   * byte size of the {@code ShuffleEntry} - it is not called for each grouped
-   * shuffle element (i.e. key and iterable of values).
-   */
-  private class ReaderObserver implements Observer {
-    @Override
-    public void update(Observable obs, Object obj) {
-      Preconditions.checkArgument(obs == reader, "unexpected observable");
-      Preconditions.checkArgument(obj instanceof Long, "unexpected parameter object");
-      byteCount.addValue((long) obj);
-    }
-  }
-
-  /**
-   * JSON doesn't correctly handle non-finite values, and we want to bound how large each
-   * term in the total sum is.  See {@link #LARGE_PARALLELISM_BOUND}.
-   *
-   * <p>TODO: Remove this hack once we move to gRPC or report this value in a more structured
-   * format.
-   */
-  private static double boundParallelism(double x) {
-    if (Double.isNaN(x) || x < 1) {
-      if (x < 1) {
-        LOG.warn("Invalid parallelism value: " + x);
-      }
-      // Irrational; sums won't come out to an integral value. This is to better avoid
-      // accidental coincidences which would imply the remaining parallelism is zero when it's not.
-      // Also, negative so that it's recognized as "invalid."
-      return -LARGE_PARALLELISM_BOUND * Math.sqrt(2);
-    } else if (x > LARGE_PARALLELISM_BOUND) {
-      return LARGE_PARALLELISM_BOUND;
-    } else {
-      return x;
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Receiver.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Receiver.java
deleted file mode 100644
index ca13ea3185d8e..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Receiver.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-/**
- * Abstract interface of things that accept inputs one at a time via process().
- */
-public interface Receiver {
-  /**
-   * Processes the element.
-   */
-  void process(Object outputElem) throws Exception;
-}

From 2e171b509ebe163ac54332eec626dccdfa18c9ad Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Mon, 22 Feb 2016 15:59:23 -0800
Subject: [PATCH 1500/1541] Add new test proto messages that use map fields

Directly, and transitively.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115280894
---
 sdk/src/main/proto/proto2_coder_test_messages.proto | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sdk/src/main/proto/proto2_coder_test_messages.proto b/sdk/src/main/proto/proto2_coder_test_messages.proto
index 56efd89c36ad5..eb3c3dfa9b900 100644
--- a/sdk/src/main/proto/proto2_coder_test_messages.proto
+++ b/sdk/src/main/proto/proto2_coder_test_messages.proto
@@ -41,3 +41,11 @@ extend MessageC {
   optional MessageA field1 = 101;
   optional MessageB field2 = 102;
 }
+
+message MessageWithMap {
+  map<string, MessageA> field1 = 1;
+}
+
+message ReferencesMessageWithMap {
+  repeated MessageWithMap field1 = 1;
+}

From c11af5fbf37accb49d73bcf206b15f7cc59636bc Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 22 Feb 2016 16:09:35 -0800
Subject: [PATCH 1501/1541] Migrate AvroIO.Write to a custom sink

Note for user requested sharding limits to be supported,
each pipeline runner must support applying those sharding limits.

Google Cloud Dataflow supports sharding limits.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115282027
---
 .../google/cloud/dataflow/sdk/io/AvroIO.java  | 188 +++++++++---------
 .../sdk/runners/DataflowPipelineRunner.java   | 179 +++++++++++++++++
 .../runners/DataflowPipelineTranslator.java   |   5 -
 .../runners/dataflow/AvroIOTranslator.java    |  87 --------
 .../sdk/io/AvroIOGeneratedClassTest.java      |  48 ++---
 .../cloud/dataflow/sdk/io/AvroIOTest.java     |  45 +++++
 6 files changed, 337 insertions(+), 215 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index 9ee7e6b13cc93..70420105c5ca6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -22,24 +22,25 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.io.Read.Bounded;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.worker.AvroSink;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+import com.google.cloud.dataflow.sdk.util.MimeTypes;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 
 import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.reflect.ReflectData;
 
 import java.io.IOException;
-import java.util.List;
+import java.nio.channels.Channels;
+import java.nio.channels.WritableByteChannel;
 import java.util.regex.Pattern;
 
 import javax.annotation.Nullable;
@@ -317,7 +318,7 @@ public PCollection<T> apply(PInput input) {
                 : com.google.cloud.dataflow.sdk.io.Read.from(
                     AvroSource.from(filepattern).withSchema(type));
 
-        PCollection<T> pcol = input.getPipeline().apply(read);
+        PCollection<T> pcol = input.getPipeline().apply("Read", read);
         // Honor the default output coder that would have been used by this PTransform.
         pcol.setCoder(getDefaultOutputCoder());
         return pcol;
@@ -473,8 +474,6 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       final int numShards;
       /** Shard template string. */
       final String shardTemplate;
-      /** Insert a shuffle before writing to decouple parallelism when numShards != 0. */
-      final boolean forceReshard;
       /** The class type of the records. */
       final Class<T> type;
       /** The schema of the output file. */
@@ -484,18 +483,16 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       final boolean validate;
 
       Bound(Class<T> type) {
-        this(null, null, "", 0, ShardNameTemplate.INDEX_OF_MAX, true, type, null, true);
+        this(null, null, "", 0, ShardNameTemplate.INDEX_OF_MAX, type, null, true);
       }
 
       Bound(String name, String filenamePrefix, String filenameSuffix, int numShards,
-          String shardTemplate, boolean forceReshard, Class<T> type, Schema schema,
-          boolean validate) {
+          String shardTemplate, Class<T> type, Schema schema, boolean validate) {
         super(name);
         this.filenamePrefix = filenamePrefix;
         this.filenameSuffix = filenameSuffix;
         this.numShards = numShards;
         this.shardTemplate = shardTemplate;
-        this.forceReshard = forceReshard;
         this.type = type;
         this.schema = schema;
         this.validate = validate;
@@ -509,7 +506,7 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
        */
       public Bound<T> named(String name) {
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard,
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
             type, schema, validate);
       }
 
@@ -525,7 +522,7 @@ public Bound<T> named(String name) {
       public Bound<T> to(String filenamePrefix) {
         validateOutputComponent(filenamePrefix);
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard,
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
             type, schema, validate);
       }
 
@@ -540,7 +537,7 @@ public Bound<T> to(String filenamePrefix) {
       public Bound<T> withSuffix(String filenameSuffix) {
         validateOutputComponent(filenameSuffix);
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard, type,
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type,
             schema, validate);
       }
 
@@ -559,31 +556,9 @@ public Bound<T> withSuffix(String filenameSuffix) {
        * @see ShardNameTemplate
        */
       public Bound<T> withNumShards(int numShards) {
-        return withNumShards(numShards, forceReshard);
-      }
-
-      /**
-       * Returns a new {@link PTransform} that's like this one but
-       * that uses the provided shard count.
-       *
-       * <p>Constraining the number of shards is likely to reduce
-       * the performance of a pipeline. If forceReshard is true, the output
-       * will be shuffled to obtain the desired sharding. If it is false,
-       * data will not be reshuffled, but parallelism of preceeding stages
-       * may be constrained. Setting this value is not recommended
-       * unless you require a specific number of output files.
-       *
-       * <p>Does not modify this object.
-       *
-       * @param numShards the number of shards to use, or 0 to let the system
-       *                  decide.
-       * @param forceReshard whether to force a reshard to obtain the desired sharding.
-       * @see ShardNameTemplate
-       */
-      private Bound<T> withNumShards(int numShards, boolean forceReshard) {
         Preconditions.checkArgument(numShards >= 0);
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard,
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
             type, schema, validate);
       }
 
@@ -597,7 +572,7 @@ private Bound<T> withNumShards(int numShards, boolean forceReshard) {
        */
       public Bound<T> withShardNameTemplate(String shardTemplate) {
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard,
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
             type, schema, validate);
       }
 
@@ -611,22 +586,7 @@ public Bound<T> withShardNameTemplate(String shardTemplate) {
        * <p>Does not modify this object.
        */
       public Bound<T> withoutSharding() {
-        return withoutSharding(forceReshard);
-      }
-
-      /**
-       * Returns a new {@link PTransform} that's like this one but
-       * that forces a single file as output.
-       *
-       * <p>This is a shortcut for
-       * {@code .withNumShards(1, forceReshard).withShardNameTemplate("")}
-       *
-       * <p>Does not modify this object.
-       *
-       * @param forceReshard whether to force a reshard to obtain the desired sharding.
-       */
-      private Bound<T> withoutSharding(boolean forceReshard) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, 1, "", forceReshard,
+        return new Bound<>(name, filenamePrefix, filenameSuffix, 1, "",
             type, schema, validate);
       }
 
@@ -641,7 +601,7 @@ private Bound<T> withoutSharding(boolean forceReshard) {
        */
       public <X> Bound<X> withSchema(Class<X> type) {
         return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
-            forceReshard, type, ReflectData.get().getSchema(type), validate);
+            type, ReflectData.get().getSchema(type), validate);
       }
 
       /**
@@ -653,7 +613,7 @@ public <X> Bound<X> withSchema(Class<X> type) {
        */
       public Bound<GenericRecord> withSchema(Schema schema) {
         return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
-            forceReshard, GenericRecord.class, schema, validate);
+            GenericRecord.class, schema, validate);
       }
 
       /**
@@ -679,7 +639,7 @@ public Bound<GenericRecord> withSchema(String schema) {
        */
       public Bound<T> withoutValidation() {
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard,
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
             type, schema, false);
       }
 
@@ -693,14 +653,12 @@ public PDone apply(PCollection<T> input) {
           throw new IllegalStateException("need to set the schema of an AvroIO.Write transform");
         }
 
-        if (numShards > 0 && forceReshard) {
-          // Reshard and re-apply a version of this write without resharding.
-          return input
-              .apply(new FileBasedSink.ReshardForWrite<T>())
-              .apply(withNumShards(numShards, false));
-        } else {
-          return PDone.in(input.getPipeline());
-        }
+        // Note that custom sinks currently do not expose sharding controls.
+        // Thus pipeline runner writers need to individually add support internally to
+        // apply user requested sharding limits.
+        return input.apply("Write", com.google.cloud.dataflow.sdk.io.Write.to(
+            new AvroSink<>(
+                filenamePrefix, filenameSuffix, shardTemplate, AvroCoder.of(type, schema))));
       }
 
       /**
@@ -742,21 +700,6 @@ public Schema getSchema() {
       public boolean needsValidation() {
         return validate;
       }
-
-      static {
-        @SuppressWarnings("rawtypes")
-        DirectPipelineRunner.TransformEvaluator<Bound> transformEvaluator =
-            new DirectPipelineRunner.TransformEvaluator<Bound>() {
-          @Override
-          @SuppressWarnings("unchecked")
-          public void evaluate(
-              Bound transform, DirectPipelineRunner.EvaluationContext context) {
-            evaluateWriteHelper(transform, context);
-          }
-        };
-        DirectPipelineRunner.registerDefaultTransformEvaluator(
-            Bound.class, transformEvaluator);
-      }
     }
 
     /** Disallow construction of utility class. */
@@ -779,25 +722,72 @@ private static void validateOutputComponent(String partialFilePattern) {
   /** Disallow construction of utility class. */
   private AvroIO() {}
 
-  private static <T> void evaluateWriteHelper(
-      Write.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
-    List<WindowedValue<T>> elems =
-        context.getPCollectionWindowedValues(context.getInput(transform));
-    int numShards = transform.numShards;
-    if (numShards < 1) {
-      // System gets to choose. For direct mode, choose 1.
-      numShards = 1;
+  /**
+   * A {@link FileBasedSink} for Avro files.
+   */
+  @VisibleForTesting
+  static class AvroSink<T> extends FileBasedSink<T> {
+    private final AvroCoder<T> coder;
+
+    @VisibleForTesting
+    AvroSink(
+        String baseOutputFilename, String extension, String fileNameTemplate, AvroCoder<T> coder) {
+      super(baseOutputFilename, extension, fileNameTemplate);
+      this.coder = coder;
+    }
+
+    @Override
+    public FileBasedSink.FileBasedWriteOperation<T> createWriteOperation(PipelineOptions options) {
+      return new AvroWriteOperation<>(this, coder);
+    }
+
+    /**
+     * A {@link com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriteOperation
+     * FileBasedWriteOperation} for Avro files.
+     */
+    private static class AvroWriteOperation<T> extends FileBasedWriteOperation<T> {
+      private final AvroCoder<T> coder;
+
+      private AvroWriteOperation(AvroSink<T> sink, AvroCoder<T> coder) {
+        super(sink);
+        this.coder = coder;
+      }
+
+      @Override
+      public FileBasedWriter<T> createWriter(PipelineOptions options) throws Exception {
+        return new AvroWriter<>(this, coder);
+      }
     }
-    AvroSink<T> writer = new AvroSink<>(transform.filenamePrefix, transform.shardTemplate,
-        transform.filenameSuffix, numShards,
-        WindowedValue.getValueOnlyCoder(AvroCoder.of(transform.type, transform.schema)));
-    try (Sink.SinkWriter<WindowedValue<T>> sink = writer.writer()) {
-      for (WindowedValue<T> elem : elems) {
-        sink.add(elem);
-      }
-    } catch (IOException exn) {
-      throw new RuntimeException(
-          "unable to write to output file \"" + transform.filenamePrefix + "\"", exn);
+
+    /**
+     * A {@link com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriter FileBasedWriter}
+     * for Avro files.
+     */
+    private static class AvroWriter<T> extends FileBasedWriter<T> {
+      private final AvroCoder<T> coder;
+      private DataFileWriter<T> dataFileWriter;
+
+      public AvroWriter(FileBasedWriteOperation<T> writeOperation, AvroCoder<T> coder) {
+        super(writeOperation);
+        this.mimeType = MimeTypes.BINARY;
+        this.coder = coder;
+      }
+
+      @Override
+      protected void prepareWrite(WritableByteChannel channel) throws Exception {
+        dataFileWriter = new DataFileWriter<>(coder.createDatumWriter());
+        dataFileWriter.create(coder.getSchema(), Channels.newOutputStream(channel));
+      }
+
+      @Override
+      public void write(T value) throws Exception {
+        dataFileWriter.append(value);
+      }
+
+      @Override
+      protected void writeFooter() throws Exception {
+        dataFileWriter.close();
+      }
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index f20caa3b962b2..5a57f7fc43dcf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -32,6 +32,7 @@
 import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
 import com.google.cloud.dataflow.sdk.PipelineResult.State;
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -48,8 +49,10 @@
 import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
 import com.google.cloud.dataflow.sdk.io.AvroIO;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.FileBasedSink;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.io.Read;
+import com.google.cloud.dataflow.sdk.io.ShardNameTemplate;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
 import com.google.cloud.dataflow.sdk.io.Write;
@@ -60,6 +63,8 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.JobSpecification;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
 import com.google.cloud.dataflow.sdk.runners.dataflow.AssignWindows;
 import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowAggregatorTransforms;
 import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
@@ -83,6 +88,7 @@
 import com.google.cloud.dataflow.sdk.transforms.WithKeys;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
@@ -331,6 +337,8 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
       ImmutableMap.Builder<Class<?>, Class<?>> builder = ImmutableMap.<Class<?>, Class<?>>builder();
       builder.put(Read.Unbounded.class, UnsupportedIO.class);
       builder.put(Window.Bound.class, AssignWindows.class);
+      builder.put(Write.Bound.class, BatchWrite.class);
+      builder.put(AvroIO.Write.Bound.class, BatchAvroIOWrite.class);
       if (options.getExperiments() == null
           || !options.getExperiments().contains("disable_ism_side_input")) {
         builder.put(View.AsMap.class, BatchViewAsMap.class);
@@ -1918,6 +1926,177 @@ public Iterable<V> apply(Iterable<WindowedValue<V>> input) {
     }
   }
 
+  /**
+   * A {@link PTransform} that uses shuffle to create a fusion break. This allows pushing
+   * parallelism limits such as sharding controls further down the pipeline.
+   */
+  private static class ReshardForWrite<T> extends PTransform<PCollection<T>, PCollection<T>> {
+    @Override
+    public PCollection<T> apply(PCollection<T> input) {
+      return input
+          // TODO: This would need to be adapted to write per-window shards.
+          .apply(Window.<T>into(new GlobalWindows())
+                       .triggering(DefaultTrigger.of())
+                       .discardingFiredPanes())
+          .apply("RandomKey", ParDo.of(
+              new DoFn<T, KV<Long, T>>() {
+                transient long counter, step;
+                @Override
+                public void startBundle(Context c) {
+                  counter = (long) (Math.random() * Long.MAX_VALUE);
+                  step = 1 + 2 * (long) (Math.random() * Long.MAX_VALUE);
+                }
+                @Override
+                public void processElement(ProcessContext c) {
+                  counter += step;
+                  c.output(KV.of(counter, c.element()));
+                }
+              }))
+          .apply(GroupByKey.<Long, T>create())
+          .apply("Ungroup", ParDo.of(
+              new DoFn<KV<Long, Iterable<T>>, T>() {
+                @Override
+                public void processElement(ProcessContext c) {
+                  for (T item : c.element().getValue()) {
+                    c.output(item);
+                  }
+                }
+              }));
+    }
+  }
+
+  /**
+   * Specialized implementation which overrides
+   * {@link com.google.cloud.dataflow.sdk.io.Write.Bound Write.Bound} to provide Google
+   * Cloud Dataflow specific path validation of {@link FileBasedSink}s.
+   */
+  private static class BatchWrite<T> extends PTransform<PCollection<T>, PDone> {
+    private final DataflowPipelineRunner runner;
+    private final Write.Bound<T> transform;
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+    public BatchWrite(DataflowPipelineRunner runner, Write.Bound<T> transform) {
+      this.runner = runner;
+      this.transform = transform;
+    }
+
+    @Override
+    public PDone apply(PCollection<T> input) {
+      if (transform.getSink() instanceof FileBasedSink) {
+        FileBasedSink<?> sink = (FileBasedSink<?>) transform.getSink();
+        PathValidator validator = runner.options.getPathValidator();
+        validator.validateOutputFilePrefixSupported(sink.getBaseOutputFilename());
+      }
+      return transform.apply(input);
+    }
+  }
+
+  /**
+   * Specialized implementation which overrides
+   * {@link com.google.cloud.dataflow.sdk.io.AvroIO.Write.Bound AvroIO.Write.Bound} with
+   * a native sink instead of a custom sink as workaround until custom sinks
+   * have support for sharding controls.
+   */
+  private static class BatchAvroIOWrite<T> extends PTransform<PCollection<T>, PDone> {
+    private final AvroIO.Write.Bound<T> transform;
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+    public BatchAvroIOWrite(DataflowPipelineRunner runner, AvroIO.Write.Bound<T> transform) {
+      this.transform = transform;
+    }
+
+    @Override
+    public PDone apply(PCollection<T> input) {
+      if (transform.getNumShards() > 0) {
+        return input
+            .apply(new ReshardForWrite<T>())
+            .apply(new BatchAvroIONativeWrite<>(transform));
+      } else {
+        return transform.apply(input);
+      }
+    }
+  }
+
+  /**
+   * This {@link PTransform} is used by the {@link DataflowPipelineTranslator} as a way
+   * to provide the native definition of the Avro sink.
+   */
+  private static class BatchAvroIONativeWrite<T> extends PTransform<PCollection<T>, PDone> {
+    private final AvroIO.Write.Bound<T> transform;
+    public BatchAvroIONativeWrite(AvroIO.Write.Bound<T> transform) {
+      this.transform = transform;
+    }
+
+    @Override
+    public PDone apply(PCollection<T> input) {
+      return PDone.in(input.getPipeline());
+    }
+
+    static {
+      DataflowPipelineTranslator.registerTransformTranslator(
+          BatchAvroIONativeWrite.class, new BatchAvroIONativeWriteTranslator());
+    }
+  }
+
+  /**
+   * AvroIO.Write.Bound support code for the Dataflow backend when applying parallelism limits
+   * through user requested sharding limits.
+   */
+  private static class BatchAvroIONativeWriteTranslator
+      implements TransformTranslator<BatchAvroIONativeWrite<?>> {
+    @SuppressWarnings("unchecked")
+    @Override
+    public void translate(@SuppressWarnings("rawtypes") BatchAvroIONativeWrite transform,
+        TranslationContext context) {
+      translateWriteHelper(transform, transform.transform, context);
+    }
+
+    private <T> void translateWriteHelper(
+        BatchAvroIONativeWrite<T> transform,
+        AvroIO.Write.Bound<T> originalTransform,
+        TranslationContext context) {
+      // Note that the original transform can not be used during add step/add input
+      // and is only passed in to get properties from it.
+
+      checkState(originalTransform.getNumShards() > 0,
+          "Native AvroSink is expected to only be used when sharding controls are required.");
+
+      context.addStep(transform, "ParallelWrite");
+      context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
+
+      // TODO: drop this check when server supports alternative templates.
+      switch (originalTransform.getShardTemplate()) {
+        case ShardNameTemplate.INDEX_OF_MAX:
+          break;  // supported by server
+        case "":
+          // Empty shard template allowed - forces single output.
+          Preconditions.checkArgument(originalTransform.getNumShards() <= 1,
+              "Num shards must be <= 1 when using an empty sharding template");
+          break;
+        default:
+          throw new UnsupportedOperationException("Shard template "
+              + originalTransform.getShardTemplate()
+              + " not yet supported by Dataflow service");
+      }
+
+      context.addInput(PropertyNames.FORMAT, "avro");
+      context.addInput(PropertyNames.FILENAME_PREFIX, originalTransform.getFilenamePrefix());
+      context.addInput(PropertyNames.SHARD_NAME_TEMPLATE, originalTransform.getShardTemplate());
+      context.addInput(PropertyNames.FILENAME_SUFFIX, originalTransform.getFilenameSuffix());
+      context.addInput(PropertyNames.VALIDATE_SINK, originalTransform.needsValidation());
+
+      context.addInput(PropertyNames.NUM_SHARDS, (long) originalTransform.getNumShards());
+
+      context.addEncodingInput(
+          WindowedValue.getValueOnlyCoder(
+              AvroCoder.of(originalTransform.getType(), originalTransform.getSchema())));
+    }
+  }
+
   /**
    * Specialized (non-)implementation for
    * {@link com.google.cloud.dataflow.sdk.io.Write.Bound Write.Bound}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index f7217f7610bb7..885260ebb2518 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -41,7 +41,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
-import com.google.cloud.dataflow.sdk.io.AvroIO;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.io.Read;
@@ -49,7 +48,6 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.GroupByKeyAndSortValuesOnly;
-import com.google.cloud.dataflow.sdk.runners.dataflow.AvroIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BigQueryIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.ReadTranslator;
@@ -1028,9 +1026,6 @@ private <T> void translateHelper(
     ///////////////////////////////////////////////////////////////////////////
     // IO Translation.
 
-    registerTransformTranslator(
-        AvroIO.Write.Bound.class, new AvroIOTranslator.WriteTranslator());
-
     registerTransformTranslator(
         BigQueryIO.Read.Bound.class, new BigQueryIOTranslator.ReadTranslator());
     registerTransformTranslator(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
deleted file mode 100644
index b114021c4d470..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.dataflow;
-
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.io.AvroIO;
-import com.google.cloud.dataflow.sdk.io.ShardNameTemplate;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
-import com.google.cloud.dataflow.sdk.util.PathValidator;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.common.base.Preconditions;
-
-/**
- * Avro transform support code for the Dataflow backend.
- */
-public class AvroIOTranslator {
-
-  /**
-   * Implements AvroIO Write translation for the Dataflow backend.
-   */
-  @SuppressWarnings("rawtypes")
-  public static class WriteTranslator implements TransformTranslator<AvroIO.Write.Bound> {
-
-    @Override
-    public void translate(
-        AvroIO.Write.Bound transform,
-        TranslationContext context) {
-      translateWriteHelper(transform, context);
-    }
-
-    private <T> void translateWriteHelper(
-        AvroIO.Write.Bound<T> transform,
-        TranslationContext context) {
-      PathValidator validator = context.getPipelineOptions().getPathValidator();
-      String filenamePrefix = validator.validateOutputFilePrefixSupported(
-          transform.getFilenamePrefix());
-      context.addStep(transform, "ParallelWrite");
-      context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
-
-      // TODO: drop this check when server supports alternative templates.
-      switch (transform.getShardTemplate()) {
-        case ShardNameTemplate.INDEX_OF_MAX:
-          break;  // supported by server
-        case "":
-          // Empty shard template allowed - forces single output.
-          Preconditions.checkArgument(transform.getNumShards() <= 1,
-              "Num shards must be <= 1 when using an empty sharding template");
-          break;
-        default:
-          throw new UnsupportedOperationException("Shard template "
-              + transform.getShardTemplate()
-              + " not yet supported by Dataflow service");
-      }
-
-      context.addInput(PropertyNames.FORMAT, "avro");
-      context.addInput(PropertyNames.FILENAME_PREFIX, filenamePrefix);
-      context.addInput(PropertyNames.SHARD_NAME_TEMPLATE, transform.getShardTemplate());
-      context.addInput(PropertyNames.FILENAME_SUFFIX, transform.getFilenameSuffix());
-      context.addInput(PropertyNames.VALIDATE_SINK, transform.needsValidation());
-
-      long numShards = transform.getNumShards();
-      if (numShards > 0) {
-        context.addInput(PropertyNames.NUM_SHARDS, numShards);
-      }
-
-      context.addEncodingInput(
-          WindowedValue.getValueOnlyCoder(
-              AvroCoder.of(transform.getType(), transform.getSchema())));
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOGeneratedClassTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOGeneratedClassTest.java
index 6bb459de7a08c..927d3a3903316 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOGeneratedClassTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOGeneratedClassTest.java
@@ -148,102 +148,102 @@ <T> void runTestRead(AvroIO.Read.Bound<T> read, String expectedName, T[] expecte
   public void testReadFromGeneratedClass() throws Exception {
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .withSchema(AvroGeneratedUser.class),
-                "AvroIO.Read/Read(AvroSource).out", generateAvroObjects());
+                "AvroIO.Read/Read.out", generateAvroObjects());
     runTestRead(AvroIO.Read.withSchema(AvroGeneratedUser.class)
                            .from(avroFile.getPath()),
-                "AvroIO.Read/Read(AvroSource).out", generateAvroObjects());
+                "AvroIO.Read/Read.out", generateAvroObjects());
     runTestRead(AvroIO.Read.named("MyRead")
                            .from(avroFile.getPath())
                            .withSchema(AvroGeneratedUser.class),
-                "MyRead/Read(AvroSource).out", generateAvroObjects());
+                "MyRead/Read.out", generateAvroObjects());
     runTestRead(AvroIO.Read.named("MyRead")
                            .withSchema(AvroGeneratedUser.class)
                            .from(avroFile.getPath()),
-                "MyRead/Read(AvroSource).out", generateAvroObjects());
+                "MyRead/Read.out", generateAvroObjects());
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .withSchema(AvroGeneratedUser.class)
                            .named("HerRead"),
-                "HerRead/Read(AvroSource).out", generateAvroObjects());
+                "HerRead/Read.out", generateAvroObjects());
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .named("HerRead")
                            .withSchema(AvroGeneratedUser.class),
-                "HerRead/Read(AvroSource).out", generateAvroObjects());
+                "HerRead/Read.out", generateAvroObjects());
     runTestRead(AvroIO.Read.withSchema(AvroGeneratedUser.class)
                            .named("HerRead")
                            .from(avroFile.getPath()),
-                "HerRead/Read(AvroSource).out", generateAvroObjects());
+                "HerRead/Read.out", generateAvroObjects());
     runTestRead(AvroIO.Read.withSchema(AvroGeneratedUser.class)
                            .from(avroFile.getPath())
                            .named("HerRead"),
-                "HerRead/Read(AvroSource).out", generateAvroObjects());
+                "HerRead/Read.out", generateAvroObjects());
   }
 
   @Test
   public void testReadFromSchema() throws Exception {
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .withSchema(schema),
-                "AvroIO.Read/Read(AvroSource).out", generateAvroGenericRecords());
+                "AvroIO.Read/Read.out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.withSchema(schema)
                            .from(avroFile.getPath()),
-                "AvroIO.Read/Read(AvroSource).out", generateAvroGenericRecords());
+                "AvroIO.Read/Read.out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.named("MyRead")
                            .from(avroFile.getPath())
                            .withSchema(schema),
-                "MyRead/Read(AvroSource).out", generateAvroGenericRecords());
+                "MyRead/Read.out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.named("MyRead")
                            .withSchema(schema)
                            .from(avroFile.getPath()),
-                "MyRead/Read(AvroSource).out", generateAvroGenericRecords());
+                "MyRead/Read.out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .withSchema(schema)
                            .named("HerRead"),
-                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
+                "HerRead/Read.out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .named("HerRead")
                            .withSchema(schema),
-                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
+                "HerRead/Read.out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.withSchema(schema)
                            .named("HerRead")
                            .from(avroFile.getPath()),
-                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
+                "HerRead/Read.out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.withSchema(schema)
                            .from(avroFile.getPath())
                            .named("HerRead"),
-                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
+                "HerRead/Read.out", generateAvroGenericRecords());
   }
 
   @Test
   public void testReadFromSchemaString() throws Exception {
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .withSchema(schemaString),
-                "AvroIO.Read/Read(AvroSource).out", generateAvroGenericRecords());
+                "AvroIO.Read/Read.out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.withSchema(schemaString)
                            .from(avroFile.getPath()),
-                "AvroIO.Read/Read(AvroSource).out", generateAvroGenericRecords());
+                "AvroIO.Read/Read.out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.named("MyRead")
                            .from(avroFile.getPath())
                            .withSchema(schemaString),
-                "MyRead/Read(AvroSource).out", generateAvroGenericRecords());
+                "MyRead/Read.out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.named("MyRead")
                            .withSchema(schemaString)
                            .from(avroFile.getPath()),
-                "MyRead/Read(AvroSource).out", generateAvroGenericRecords());
+                "MyRead/Read.out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .withSchema(schemaString)
                            .named("HerRead"),
-                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
+                "HerRead/Read.out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .named("HerRead")
                            .withSchema(schemaString),
-                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
+                "HerRead/Read.out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.withSchema(schemaString)
                            .named("HerRead")
                            .from(avroFile.getPath()),
-                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
+                "HerRead/Read.out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.withSchema(schemaString)
                            .from(avroFile.getPath())
                            .named("HerRead"),
-                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
+                "HerRead/Read.out", generateAvroGenericRecords());
   }
 
   <T> void runTestWrite(AvroIO.Write.Bound<T> write, String expectedName)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
index 30c578bbda886..207b2ba5142a4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
@@ -16,19 +16,28 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
+import static org.hamcrest.Matchers.contains;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
+import com.google.cloud.dataflow.sdk.io.AvroIO.AvroSink;
+import com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriteOperation;
+import com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriter;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.MoreObjects;
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterators;
 
+import org.apache.avro.file.DataFileReader;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.reflect.Nullable;
 import org.junit.Rule;
@@ -38,6 +47,10 @@
 import org.junit.runners.JUnit4;
 
 import java.io.File;
+import java.nio.channels.FileChannel;
+import java.nio.channels.WritableByteChannel;
+import java.nio.file.StandardOpenOption;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Objects;
 
@@ -189,6 +202,38 @@ public void testAvroIOWriteAndReadSchemaUpgrade() throws Throwable {
     p.run();
   }
 
+  @Test
+  public void testAvroSinkWrite() throws Exception {
+    String[] expectedElements = new String[]{ "first", "second", "third" };
+    PipelineOptions options = PipelineOptionsFactory.create();
+    AvroCoder<String> coder = AvroCoder.of(String.class);
+    File tmpFile = tmpFolder.newFile();
+    AvroSink<String> avroSink = new AvroSink<>(
+        "prefix", "suffix", ShardNameTemplate.INDEX_OF_MAX, coder);
+    FileBasedWriteOperation<String> writeOperation = avroSink.createWriteOperation(options);
+    FileBasedWriter<String> writer = writeOperation.createWriter(options);
+
+    WritableByteChannel channel = FileChannel.open(tmpFile.toPath(), StandardOpenOption.WRITE);
+    writer.prepareWrite(channel);
+    writer.writeHeader();
+    for (String element : expectedElements) {
+      writer.write(element);
+      // We expect the channel to remain open
+      assertTrue(channel.isOpen());
+    }
+
+    writer.close();
+    // Ensure that we properly close the channel
+    assertFalse(channel.isOpen());
+
+    // Validate that the data written matches the expected elements in the expected order
+    try (DataFileReader<String> reader = new DataFileReader<>(tmpFile, coder.createDatumReader())) {
+      List<String> actualElements = new ArrayList<>();
+      Iterators.addAll(actualElements, reader);
+      assertThat(actualElements, contains(expectedElements));
+    }
+  }
+
   // TODO: for Write only, test withSuffix, withNumShards,
   // withShardNameTemplate and withoutSharding.
 }

From 1d0c6d0ef0ca24e2b1b7eb330dd0340f696f25ed Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 22 Feb 2016 17:17:35 -0800
Subject: [PATCH 1502/1541] Move Google Cloud Dataflow worker utilities to
 worker module

SourceTranslationUtils had a component that was used for custom
source read translation. This component was moved directly into
the read translation so that the native reader source utilities
could move to the worker maven module along with the native reader
base class.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115288815
---
 .../sdk/runners/dataflow/ReadTranslator.java  |  41 +-
 .../worker/SourceTranslationUtils.java        | 150 --------
 .../sdk/util/common/worker/NativeReader.java  | 364 ------------------
 3 files changed, 40 insertions(+), 515 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/NativeReader.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
index 47a1926764c7e..f110e84adc496 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
@@ -16,8 +16,11 @@
 
 package com.google.cloud.dataflow.sdk.runners.dataflow;
 
-import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.cloudSourceToDictionary;
+import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
+import static com.google.cloud.dataflow.sdk.util.Structs.addDictionary;
+import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
 
+import com.google.api.services.dataflow.model.SourceMetadata;
 import com.google.cloud.dataflow.sdk.io.FileBasedSource;
 import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.Source;
@@ -28,6 +31,9 @@
 import com.google.cloud.dataflow.sdk.util.PropertyNames;
 import com.google.cloud.dataflow.sdk.values.PValue;
 
+import java.util.HashMap;
+import java.util.Map;
+
 /**
  * Translator for the {@code Read} {@code PTransform} for the Dataflow back-end.
  */
@@ -61,4 +67,37 @@ public static <T> void translateReadHelper(Source<T> source,
       throw new RuntimeException(e);
     }
   }
+
+  // Represents a cloud Source as a dictionary for encoding inside the {@code SOURCE_STEP_INPUT}
+  // property of CloudWorkflowStep.input.
+  private static Map<String, Object> cloudSourceToDictionary(
+      com.google.api.services.dataflow.model.Source source) {
+    // Do not translate encoding - the source's encoding is translated elsewhere
+    // to the step's output info.
+    Map<String, Object> res = new HashMap<>();
+    addDictionary(res, PropertyNames.SOURCE_SPEC, source.getSpec());
+    if (source.getMetadata() != null) {
+      addDictionary(res, PropertyNames.SOURCE_METADATA,
+          cloudSourceMetadataToDictionary(source.getMetadata()));
+    }
+    if (source.getDoesNotNeedSplitting() != null) {
+      addBoolean(
+          res, PropertyNames.SOURCE_DOES_NOT_NEED_SPLITTING, source.getDoesNotNeedSplitting());
+    }
+    return res;
+  }
+
+  private static Map<String, Object> cloudSourceMetadataToDictionary(SourceMetadata metadata) {
+    Map<String, Object> res = new HashMap<>();
+    if (metadata.getProducesSortedKeys() != null) {
+      addBoolean(res, PropertyNames.SOURCE_PRODUCES_SORTED_KEYS, metadata.getProducesSortedKeys());
+    }
+    if (metadata.getEstimatedSizeBytes() != null) {
+      addLong(res, PropertyNames.SOURCE_ESTIMATED_SIZE_BYTES, metadata.getEstimatedSizeBytes());
+    }
+    if (metadata.getInfinite() != null) {
+      addBoolean(res, PropertyNames.SOURCE_IS_INFINITE, metadata.getInfinite());
+    }
+    return res;
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
deleted file mode 100644
index 6c2c2c2da2813..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/SourceTranslationUtils.java
+++ /dev/null
@@ -1,150 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
-import static com.google.cloud.dataflow.sdk.util.Structs.addDictionary;
-import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
-import static com.google.cloud.dataflow.sdk.util.Structs.getDictionary;
-
-import com.google.api.services.dataflow.model.ApproximateReportedProgress;
-import com.google.api.services.dataflow.model.ApproximateSplitRequest;
-import com.google.api.services.dataflow.model.Position;
-import com.google.api.services.dataflow.model.Source;
-import com.google.api.services.dataflow.model.SourceMetadata;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.worker.NativeReader;
-
-import java.util.HashMap;
-import java.util.Map;
-
-import javax.annotation.Nullable;
-
-/**
- * Utilities for representing input-specific objects
- * using Dataflow model protos.
- */
-public class SourceTranslationUtils {
-  public static NativeReader.Progress cloudProgressToReaderProgress(
-      @Nullable ApproximateReportedProgress cloudProgress) {
-    return cloudProgress == null ? null : new DataflowReaderProgress(cloudProgress);
-  }
-
-  public static NativeReader.Position cloudPositionToReaderPosition(
-      @Nullable Position cloudPosition) {
-    return cloudPosition == null ? null : new DataflowReaderPosition(cloudPosition);
-  }
-
-  public static ApproximateReportedProgress readerProgressToCloudProgress(
-      @Nullable NativeReader.Progress readerProgress) {
-    return readerProgress == null ? null : ((DataflowReaderProgress) readerProgress).cloudProgress;
-  }
-
-  public static Position toCloudPosition(@Nullable NativeReader.Position readerPosition) {
-    return readerPosition == null ? null : ((DataflowReaderPosition) readerPosition).cloudPosition;
-  }
-
-  public static ApproximateSplitRequest splitRequestToApproximateSplitRequest(
-      @Nullable NativeReader.DynamicSplitRequest splitRequest) {
-    return (splitRequest == null)
-        ? null : ((DataflowDynamicSplitRequest) splitRequest).splitRequest;
-  }
-
-  public static NativeReader.DynamicSplitRequest toDynamicSplitRequest(
-      @Nullable ApproximateSplitRequest splitRequest) {
-    return (splitRequest == null) ? null : new DataflowDynamicSplitRequest(splitRequest);
-  }
-
-  static class DataflowReaderProgress implements NativeReader.Progress {
-    public final ApproximateReportedProgress cloudProgress;
-
-    public DataflowReaderProgress(ApproximateReportedProgress cloudProgress) {
-      this.cloudProgress = cloudProgress;
-    }
-
-    @Override
-    public String toString() {
-      return String.valueOf(cloudProgress);
-    }
-  }
-
-  static class DataflowReaderPosition implements NativeReader.Position {
-    public final Position cloudPosition;
-
-    public DataflowReaderPosition(Position cloudPosition) {
-      this.cloudPosition = cloudPosition;
-    }
-
-    @Override
-    public String toString() {
-      return String.valueOf(cloudPosition);
-    }
-  }
-
-  // Represents a cloud Source as a dictionary for encoding inside the {@code SOURCE_STEP_INPUT}
-  // property of CloudWorkflowStep.input.
-  public static Map<String, Object> cloudSourceToDictionary(Source source) {
-    // Do not translate encoding - the source's encoding is translated elsewhere
-    // to the step's output info.
-    Map<String, Object> res = new HashMap<>();
-    addDictionary(res, PropertyNames.SOURCE_SPEC, source.getSpec());
-    if (source.getMetadata() != null) {
-      addDictionary(res, PropertyNames.SOURCE_METADATA,
-          cloudSourceMetadataToDictionary(source.getMetadata()));
-    }
-    if (source.getDoesNotNeedSplitting() != null) {
-      addBoolean(
-          res, PropertyNames.SOURCE_DOES_NOT_NEED_SPLITTING, source.getDoesNotNeedSplitting());
-    }
-    return res;
-  }
-
-  private static Map<String, Object> cloudSourceMetadataToDictionary(SourceMetadata metadata) {
-    Map<String, Object> res = new HashMap<>();
-    if (metadata.getProducesSortedKeys() != null) {
-      addBoolean(res, PropertyNames.SOURCE_PRODUCES_SORTED_KEYS, metadata.getProducesSortedKeys());
-    }
-    if (metadata.getEstimatedSizeBytes() != null) {
-      addLong(res, PropertyNames.SOURCE_ESTIMATED_SIZE_BYTES, metadata.getEstimatedSizeBytes());
-    }
-    if (metadata.getInfinite() != null) {
-      addBoolean(res, PropertyNames.SOURCE_IS_INFINITE, metadata.getInfinite());
-    }
-    return res;
-  }
-
-  public static Source dictionaryToCloudSource(Map<String, Object> params) throws Exception {
-    Source res = new Source();
-    res.setSpec(getDictionary(params, PropertyNames.SOURCE_SPEC));
-    // SOURCE_METADATA and SOURCE_DOES_NOT_NEED_SPLITTING do not have to be
-    // translated, because they only make sense in cloud Source objects produced by the user.
-    return res;
-  }
-
-  private static class DataflowDynamicSplitRequest implements NativeReader.DynamicSplitRequest {
-    public final ApproximateSplitRequest splitRequest;
-
-    private DataflowDynamicSplitRequest(ApproximateSplitRequest splitRequest) {
-      this.splitRequest = splitRequest;
-    }
-
-    @Override
-    public String toString() {
-      return String.valueOf(splitRequest);
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/NativeReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/NativeReader.java
deleted file mode 100644
index 20f399ffa4ecf..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/NativeReader.java
+++ /dev/null
@@ -1,364 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
-
-import java.io.IOException;
-import java.util.NoSuchElementException;
-import java.util.Observable;
-
-import javax.annotation.Nullable;
-
-/**
- * Abstract base class for native readers in the Dataflow runner.
- *
- * <p>A {@link com.google.api.services.dataflow.model.Source} is read from by getting an
- * {@code Iterator}-like value and iterating through it.
- *
- * <p>This class is intended for formats that have built-in support on the Dataflow service.
- * <b>Do not introduce new implementations:</b> for creating new input formats, use
- * {@link com.google.cloud.dataflow.sdk.io.Source} instead.
- *
- * @param <T> the type of the elements read from the source
- */
-public abstract class NativeReader<T> extends Observable {
-  /**
-   * StateSampler object for readers interested in further breaking
-   * down of the state space at a finer granularity.
-   */
-  protected StateSampler stateSampler = null;
-
-  /**
-   * Name to be used as a prefix with {@code stateSampler}.
-   */
-  protected String stateSamplerOperationName = null;
-
-  /**
-   * Sets the state sampler and the state sampler operation name.
-   *
-   * @param stateSampler the {@link StateSampler} object
-   * @param stateSamplerOperationName the operation name to be used by
-   * the state sampler
-   */
-  public void setStateSamplerAndOperationName(
-      StateSampler stateSampler, String stateSamplerOperationName) {
-    this.stateSampler = stateSampler;
-    this.stateSamplerOperationName = stateSamplerOperationName;
-  }
-
-  /**
-   * Returns a ReaderIterator that allows reading from this source.
-   */
-  public abstract NativeReaderIterator<T> iterator() throws IOException;
-
-  /**
-   * A stateful iterator over the data in a {@link NativeReader}.
-   *
-   * <p>Partially thread-safe: methods {@link #start}, {@link #advance}, {@link #getCurrent},
-   * {@link #close} are called serially, but {@link #requestDynamicSplit} can be called
-   * asynchronously to those.
-   *
-   * <p>There will not be multiple concurrent calls to {@link #requestDynamicSplit}).
-   * {@link #getProgress} can be called concurrently to any other call, including itself, if
-   * {@link #requestDynamicSplit} is implemented.
-   */
-  public abstract static class NativeReaderIterator<T> implements AutoCloseable {
-
-    /**
-     * A value to return from {@link #getRemainingParallelism()} when remaining parallelism
-     * can be interpolated from {@link NativeReader#getTotalParallelism} and the progress fraction.
-     */
-    public static final double REMAINING_PARALLELISM_FROM_PROGRESS_FRACTION = Double.NaN;
-
-    /**
-     * Returns a representation of how far this iterator is through the source.
-     *
-     * @return the progress, or {@code null} if no progress measure can be provided
-     * (implementors are discouraged from throwing {@code UnsupportedOperationException}
-     * in this case). By default, returns {@code null}.
-     */
-    @Nullable
-    public Progress getProgress() {
-      return null;
-    }
-
-    /**
-     * Attempts to split the input in two parts: the "primary" part and the "residual" part.
-     * The current {@link NativeReaderIterator} keeps processing the primary part, while the
-     * residual part will be processed elsewhere (e.g. perhaps on a different worker).
-     *
-     * <p>The primary and residual parts, if concatenated, must represent the same input as the
-     * current input of this {@link NativeReaderIterator} before this call.
-     *
-     * <p>The boundary between the primary part and the residual part is specified in
-     * a framework-specific way using {@link NativeReader.DynamicSplitRequest}: e.g., if the
-     * framework supports the notion of positions, it might be a position at which the input is
-     * asked to split itself (which is not necessarily the same position at which it <i>will</i>
-     * split itself); it might be an approximate fraction of input, or something else.
-     *
-     * <p>{@link NativeReader.DynamicSplitResult} encodes, in a framework-specific way, the
-     * information sufficient to construct a description of the resulting primary and
-     * residual inputs.
-     * For example, it might, again, be a position demarcating these parts, or it might be a pair of
-     * fully-specified input descriptions, or something else.
-     *
-     * <p>After a successful call to {@link #requestDynamicSplit}, subsequent calls should be
-     * interpreted relative to the new primary.
-     *
-     * <p>This call should not affect the range of input represented by the {@link NativeReader}
-     * that produced this {@link NativeReaderIterator}.
-     *
-     * @return {@code null} if the {@link NativeReader.DynamicSplitRequest} cannot be honored
-     *   (in that case the input represented by this {@link NativeReaderIterator} stays the same),
-     *   or a {@link NativeReader.DynamicSplitResult} describing how the input was split into
-     *   a primary and residual part. By default, returns {@code null}.
-     */
-    @Nullable
-    public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest request) {
-      return null;
-    }
-
-    /**
-     * Returns an estimate of the degree of parallelism that could be achieved by
-     * {@link #requestDynamicSplit} taking into account what has already been consumed.
-     * E.g., if the reader has just returned the last record in the source, the remaining
-     * parallelism is 1 because it can't be split up any further. If the reader just
-     * returned the 3rd record in a perfectly parallelizable source with 5 records,
-     * the remaining parallelism is 3 because it could be processed in parallel by this
-     * worker and two others.  If the reader does not support dynamic splitting,
-     * the remaining parallelism is always 1.
-     *
-     * <p>An exact number isn't required, mostly we want to be able to distinguish
-     * between many, few, or one. Should not block.
-     *
-     * <p>An implementor may return {@link #REMAINING_PARALLELISM_FROM_PROGRESS_FRACTION},
-     * in which case the remaining parallelism will be interpolated from
-     * {@link NativeReader#getTotalParallelism} using the current progress fraction.
-     * Infinity may also be returned (indicating no known bound on parallelism),
-     * as may fractional estimates (in which case the sum over all shards is taken).
-     *
-     * <p>By default, returns {@link #REMAINING_PARALLELISM_FROM_PROGRESS_FRACTION}.
-     */
-    public double getRemainingParallelism() {
-      return REMAINING_PARALLELISM_FROM_PROGRESS_FRACTION;
-    }
-
-    /**
-     * Initializes the reader and advances the reader to the first record.
-     *
-     * <p>This method should be called exactly once. The invocation should occur prior to calling
-     * {@link #advance} or {@link #getCurrent}. This method may perform expensive operations that
-     * are needed to initialize the reader.
-     *
-     * @return {@code true} if a record was read, {@code false} if there is no more input available.
-     */
-    public abstract boolean start() throws IOException;
-
-    /**
-     * Advances the reader to the next valid record.
-     *
-     * <p>It is an error to call this without having called {@link #start} first.
-     *
-     * @return {@code true} if a record was read, {@code false} if there is no more input available.
-     */
-    public abstract boolean advance() throws IOException;
-
-    /**
-     * Returns the value of the data item that was read by the last {@link #start} or
-     * {@link #advance} call. The returned value must be effectively immutable and remain valid
-     * indefinitely.
-     *
-     * <p>Multiple calls to this method without an intervening call to {@link #advance} should
-     * return the same result.
-     *
-     * @throws NoSuchElementException if {@link #start} was never called, or if
-     *         the last {@link #start} or {@link #advance} returned {@code false}
-     */
-    public abstract T getCurrent() throws NoSuchElementException;
-
-    /**
-     * @inheritDoc
-     */
-    @Override
-    public void close() throws IOException {
-      // By default, do nothing.
-    }
-  }
-
-  /**
-   * Adapter from old-style reader interface ({@link #hasNext}, {@link #next}) to new-style
-   * iteration interface ({@link NativeReaderIterator#start}, {@link NativeReaderIterator#advance},
-   * {@link NativeReaderIterator#getCurrent}).
-   *
-   * This class is temporary and the intention is to get rid of its subclasses one by one,
-   * converting them to use the new-style interface directly, and then remove this class.
-   *
-   * <p>Provides basic treatment of hasNext()/next() to simplify implementations (e.g. ensuring
-   * hasNext() is called only once and verifying hasNext() in next()) and default no-op
-   * implementations of other operations.
-   *
-   * <p><i>This class is intended for internal usage. Users of Dataflow must not subclass it.</i>
-   */
-  public abstract static class LegacyReaderIterator<T> extends NativeReaderIterator<T> {
-    private Boolean cachedHasNext;
-    private T current;
-    private boolean hasCurrent;
-
-    public final boolean hasNext() throws IOException {
-      if (cachedHasNext == null) {
-        cachedHasNext = hasNextImpl();
-      }
-      return cachedHasNext;
-    }
-
-    protected abstract boolean hasNextImpl() throws IOException;
-
-    public final T next() throws IOException, NoSuchElementException {
-      if (!hasNext()) {
-        throw new NoSuchElementException();
-      }
-      cachedHasNext = null;
-      return nextImpl();
-    }
-
-    protected abstract T nextImpl() throws IOException;
-
-    @Override
-    public boolean start() throws IOException {
-      hasCurrent = advance();
-      return hasCurrent;
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      if (!hasNext()) {
-        hasCurrent = false;
-        return false;
-      }
-      current = next();
-      hasCurrent = true;
-      return true;
-    }
-
-    @Override
-    public T getCurrent() throws NoSuchElementException {
-      if (!hasCurrent) {
-        throw new NoSuchElementException();
-      }
-      return current;
-    }
-  }
-
-  /**
-   * A representation of how far a {@code ReaderIterator} is through a
-   * {@code Reader}.
-   *
-   * <p>The common worker framework does not interpret instances of
-   * this interface.  But a tool-specific framework can make assumptions
-   * about the implementation, and so the concrete Reader subclasses used
-   * by a tool-specific framework should match.
-   */
-  public interface Progress {}
-
-  /**
-   * A representation of a position in an iteration through a
-   * {@code Reader}.
-   *
-   * <p>See the comment on {@link Progress} for how instances of this
-   * interface are used by the rest of the framework.
-   */
-  public interface Position {}
-
-  /**
-   * A framework-specific way to specify how {@link NativeReaderIterator#requestDynamicSplit}
-   * should split the input into a primary and residual part.
-   */
-  public interface DynamicSplitRequest {}
-
-  /**
-   * A framework-specific way to specify how {@link NativeReaderIterator#requestDynamicSplit}
-   * has split the input into a primary and residual part.
-   */
-  public interface DynamicSplitResult {}
-
-  /**
-   * A {@link NativeReader.DynamicSplitResult} that specifies the boundary between the primary and
-   * residual parts of the input using a {@link Position}.
-   */
-  public static final class DynamicSplitResultWithPosition implements DynamicSplitResult {
-    private final Position acceptedPosition;
-
-    public DynamicSplitResultWithPosition(Position acceptedPosition) {
-      this.acceptedPosition = acceptedPosition;
-    }
-
-    public Position getAcceptedPosition() {
-      return acceptedPosition;
-    }
-
-    @Override
-    public String toString() {
-      return String.valueOf(acceptedPosition);
-    }
-  }
-
-  /**
-   * Utility method to notify observers about a new element, which has
-   * been read by this Reader, and its size in bytes. Normally, there
-   * is only one observer, which is a ReadOperation that encapsules
-   * this Reader. Derived classes must call this method whenever they
-   * read additional data, even if that element may never be returned
-   * from the corresponding source iterator.
-   */
-  protected void notifyElementRead(long byteSize) {
-    setChanged();
-    notifyObservers(byteSize);
-  }
-
-  /**
-   * Returns whether this Reader can be restarted.
-   */
-  public boolean supportsRestart() {
-    return false;
-  }
-
-  /**
-   * Returns an estimate of the parallelism of the source being read by this reader, i.e.
-   * the number of bundles it could be split into.  An exact number isn't required, mostly
-   * we want to be able to distinguish between many, few, or one.  Used to cap the parallelism
-   * Dataflow will allocate for this part of the pipeline.  Should not block.
-   *
-   * <p>Defaults to positive infinity, indicating unbounded parallelism.  An unsplittable source
-   * would have parallelism exactly 1.
-   *
-   * <p>See also {@link NativeReaderIterator#getRemainingParallelism} which may be implemented to
-   * complement this method if a better-than-linear estimate of remaining parallelism can be
-   * obtained (e.g. it is easy to detect when one is at the last record).
-   */
-  public double getTotalParallelism() {
-    // By default, don't assume any limitations.
-    return Double.POSITIVE_INFINITY;
-  }
-
-  /**
-   * The default state kind of all the states reported in this reader.
-   * Defaults to {@link StateKind#USER}.
-   */
-  protected StateKind getStateSamplerStateKind() {
-    return StateKind.USER;
-  }
-}

From 7871cbb95bb36c245c3fc8384f1dc1f219a4a4a7 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 22 Feb 2016 19:50:21 -0800
Subject: [PATCH 1503/1541] Port state to new, future-free API

Before this change, a prefetch-then-batched read access pattern was
achieved as follows:

 - First call State#get for some number of state cells,
   retrieving future-like StateContents objects - the
   underlying StateInternals can note all of these to
   batch their actual I/O.
 - Then call StateContents#read on each of those.

After this change, StateContents is removed and the same pattern is
achieved as follows:

 - First call State#readLater for some number of state cells,
   registering intent to read them - the underlying StateInternals
   can note all of these in the same manner to batch their actual
   I/O.
 - Then call State#read on each of those.

The principle driving change is the "rename" of StateContents
to the new ReadableState interface, which then all State subinterfaces
extend.

Classes driving this change:

 - ReadableState (just #get)
 - ValueState (adds #write)
 - CombiningState (adds #add)
 - AccumulatorCombiningState (adds #mergeAccumulators)
 - BagState (just a CombiningState)
 - WatermarkHoldState (adds #getOutputTimeFn)
 - InMemoryStateInternals
 - WindmillStateInternals

The remainder of the change is trivial porting to the new API.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115298579
---
 .../windowing/AfterDelayFromFirstElement.java |  18 +-
 .../sdk/transforms/windowing/AfterPane.java   |   8 +-
 .../sdk/util/MergingActiveWindowSet.java      |   4 +-
 .../dataflow/sdk/util/NonEmptyPanes.java      |  12 +-
 .../dataflow/sdk/util/PaneInfoTracker.java    |  22 ++-
 .../cloud/dataflow/sdk/util/ReduceFn.java     |   4 +-
 .../sdk/util/ReduceFnContextFactory.java      |   8 +-
 .../dataflow/sdk/util/ReduceFnRunner.java     |  15 +-
 .../dataflow/sdk/util/SystemReduceFn.java     |  20 +--
 .../sdk/util/TriggerContextFactory.java       |   3 +-
 .../dataflow/sdk/util/TriggerRunner.java      |  18 +-
 .../dataflow/sdk/util/WatermarkHold.java      |  36 ++--
 ...al.java => AccumulatorCombiningState.java} |  15 +-
 .../dataflow/sdk/util/state/BagState.java     |   4 +-
 ...ingValueState.java => CombiningState.java} |  11 +-
 .../CopyOnAccessInMemoryStateInternals.java   |  48 ++---
 .../util/state/InMemoryStateInternals.java    | 122 +++++++------
 ...{StateContents.java => ReadableState.java} |  18 +-
 .../cloud/dataflow/sdk/util/state/State.java  |   2 +-
 .../dataflow/sdk/util/state/StateMerging.java |  57 +++---
 .../dataflow/sdk/util/state/StateTag.java     |  14 +-
 .../dataflow/sdk/util/state/StateTags.java    |  34 ++--
 .../dataflow/sdk/util/state/ValueState.java   |  12 +-
 ...eInternal.java => WatermarkHoldState.java} |   7 +-
 .../dataflow/sdk/util/ReduceFnTester.java     |   6 +-
 .../dataflow/sdk/util/TriggerTester.java      |   6 +-
 ...opyOnAccessInMemoryStateInternalsTest.java | 166 +++++++++---------
 .../state/InMemoryStateInternalsTest.java     | 150 +++++++---------
 28 files changed, 431 insertions(+), 409 deletions(-)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/{CombiningValueStateInternal.java => AccumulatorCombiningState.java} (74%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/{CombiningValueState.java => CombiningState.java} (81%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/{StateContents.java => ReadableState.java} (67%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/{WatermarkStateInternal.java => WatermarkHoldState.java} (90%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
index 43e77de6bc79f..71968e919ceeb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
@@ -23,8 +23,8 @@
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
-import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
+import com.google.cloud.dataflow.sdk.util.state.AccumulatorCombiningState;
+import com.google.cloud.dataflow.sdk.util.state.CombiningState;
 import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
 import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
 import com.google.cloud.dataflow.sdk.util.state.StateMerging;
@@ -52,7 +52,7 @@ public abstract class AfterDelayFromFirstElement<W extends BoundedWindow> extend
   protected static final List<SerializableFunction<Instant, Instant>> IDENTITY =
       ImmutableList.<SerializableFunction<Instant, Instant>>of();
 
-  protected static final StateTag<Object, CombiningValueStateInternal<Instant,
+  protected static final StateTag<Object, AccumulatorCombiningState<Instant,
                                               Combine.Holder<Instant>, Instant>> DELAYED_UNTIL_TAG =
       StateTags.makeSystemTagInternal(StateTags.combiningValueFromInputInternal(
           "delayed", InstantCoder.of(), Min.MinFn.<Instant>naturalOrder()));
@@ -152,13 +152,13 @@ private AfterDelayFromFirstElement<W> newWith(
 
   @Override
   public void prefetchOnElement(StateAccessor<?> state) {
-    state.access(DELAYED_UNTIL_TAG).get();
+    state.access(DELAYED_UNTIL_TAG).readLater();
   }
 
   @Override
   public void onElement(OnElementContext c) throws Exception {
-    CombiningValueState<Instant, Instant> delayUntilState = c.state().access(DELAYED_UNTIL_TAG);
-    Instant oldDelayUntil = delayUntilState.get().read();
+    CombiningState<Instant, Instant> delayUntilState = c.state().access(DELAYED_UNTIL_TAG);
+    Instant oldDelayUntil = delayUntilState.read();
 
     // Since processing time can only advance, resulting in target wake-up times we would
     // ignore anyhow, we don't bother with it if it is already set.
@@ -200,7 +200,7 @@ public void onMerge(OnMergeContext c) throws Exception {
     // Determine the earliest point across all the windows, and delay to that.
     StateMerging.mergeCombiningValues(c.state(), DELAYED_UNTIL_TAG);
 
-    Instant earliestTargetTime = c.state().access(DELAYED_UNTIL_TAG).get().read();
+    Instant earliestTargetTime = c.state().access(DELAYED_UNTIL_TAG).read();
     if (earliestTargetTime != null) {
       c.setTimer(earliestTargetTime, timeDomain);
     }
@@ -208,7 +208,7 @@ public void onMerge(OnMergeContext c) throws Exception {
 
   @Override
   public void prefetchShouldFire(StateAccessor<?> state) {
-    state.access(DELAYED_UNTIL_TAG).get();
+    state.access(DELAYED_UNTIL_TAG).readLater();
   }
 
   @Override
@@ -223,7 +223,7 @@ public Instant getWatermarkThatGuaranteesFiring(W window) {
 
   @Override
   public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
-    Instant delayedUntil = context.state().access(DELAYED_UNTIL_TAG).get().read();
+    Instant delayedUntil = context.state().access(DELAYED_UNTIL_TAG).read();
     return delayedUntil != null
         && getCurrentTime(context) != null
         && getCurrentTime(context).isAfter(delayedUntil);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
index 729cd3e0633a1..28c8560ac4eea 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
@@ -20,7 +20,7 @@
 import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
+import com.google.cloud.dataflow.sdk.util.state.AccumulatorCombiningState;
 import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
 import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
 import com.google.cloud.dataflow.sdk.util.state.StateMerging;
@@ -41,7 +41,7 @@
 @Experimental(Experimental.Kind.TRIGGER)
 public class AfterPane<W extends BoundedWindow> extends OnceTrigger<W>{
 
-private static final StateTag<Object, CombiningValueStateInternal<Long, long[], Long>>
+private static final StateTag<Object, AccumulatorCombiningState<Long, long[], Long>>
       ELEMENTS_IN_PANE_TAG =
       StateTags.makeSystemTagInternal(StateTags.combiningValueFromInputInternal(
           "count", VarLongCoder.of(), new Sum.SumLongFn()));
@@ -87,12 +87,12 @@ public void onMerge(OnMergeContext context) throws Exception {
 
   @Override
   public void prefetchShouldFire(StateAccessor<?> state) {
-    state.access(ELEMENTS_IN_PANE_TAG).get();
+    state.access(ELEMENTS_IN_PANE_TAG).readLater();
   }
 
   @Override
   public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
-    long count = context.state().access(ELEMENTS_IN_PANE_TAG).get().read();
+    long count = context.state().access(ELEMENTS_IN_PANE_TAG).read();
     return count >= countElems;
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
index d0a49dcfa8c44..95e378d9f4c74 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
@@ -119,7 +119,7 @@ public MergingActiveWindowSet(WindowFn<Object, W> windowFn, StateInternals<?> st
     valueState = state.state(StateNamespaces.global(), mergeTreeAddr);
     // Little use trying to prefetch this state since the ReduceFnRunner is stymied until it is
     // available.
-    activeWindowToStateAddressWindows = emptyIfNull(valueState.get().read());
+    activeWindowToStateAddressWindows = emptyIfNull(valueState.read());
     activeWindowToEphemeralWindows = new HashMap<>();
     originalActiveWindowToStateAddressWindows = deepCopy(activeWindowToStateAddressWindows);
     windowToActiveWindow = invert(activeWindowToStateAddressWindows);
@@ -155,7 +155,7 @@ public void persist() {
     Preconditions.checkState(
         activeWindowToEphemeralWindows.isEmpty(), "Unexpected EPHEMERAL windows before persist");
 
-    valueState.set(activeWindowToStateAddressWindows);
+    valueState.write(activeWindowToStateAddressWindows);
     // No need to update originalActiveWindowToStateAddressWindows since this object is about to
     // become garbage.
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
index 4d6131451e4c9..1270f014230f7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
@@ -19,10 +19,10 @@
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
+import com.google.cloud.dataflow.sdk.util.state.AccumulatorCombiningState;
 import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
+import com.google.cloud.dataflow.sdk.util.state.ReadableState;
 import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateMerging;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
@@ -58,7 +58,7 @@ static <K, W extends BoundedWindow> NonEmptyPanes<K, W> create(
   /**
    * Return true if the current pane for the window in {@code context} is empty.
    */
-  public abstract StateContents<Boolean> isEmpty(StateAccessor<K> context);
+  public abstract ReadableState<Boolean> isEmpty(StateAccessor<K> context);
 
   /**
    * Prefetch in preparation for merging.
@@ -84,7 +84,7 @@ private DiscardingModeNonEmptyPanes(ReduceFn<K, ?, ?, W> reduceFn) {
     }
 
     @Override
-    public StateContents<Boolean> isEmpty(StateAccessor<K> state) {
+    public ReadableState<Boolean> isEmpty(StateAccessor<K> state) {
       return reduceFn.isEmpty(state);
     }
 
@@ -115,7 +115,7 @@ public void onMerge(MergingStateAccessor<K, W> context) {
   private static class GeneralNonEmptyPanes<K, W extends BoundedWindow>
       extends NonEmptyPanes<K, W> {
 
-    private static final StateTag<Object, CombiningValueStateInternal<Long, long[], Long>>
+    private static final StateTag<Object, AccumulatorCombiningState<Long, long[], Long>>
         PANE_ADDITIONS_TAG =
         StateTags.makeSystemTagInternal(StateTags.combiningValueFromInputInternal(
             "count", VarLongCoder.of(), new Sum.SumLongFn()));
@@ -131,7 +131,7 @@ public void clearPane(StateAccessor<K> state) {
     }
 
     @Override
-    public StateContents<Boolean> isEmpty(StateAccessor<K> state) {
+    public ReadableState<Boolean> isEmpty(StateAccessor<K> state) {
       return state.access(PANE_ADDITIONS_TAG).isEmpty();
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
index e66f9a7b0df46..38499c2e2b880 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
@@ -19,8 +19,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.PaneInfoCoder;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
+import com.google.cloud.dataflow.sdk.util.state.ReadableState;
 import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
 import com.google.cloud.dataflow.sdk.util.state.ValueState;
@@ -51,22 +51,28 @@ public void clear(StateAccessor<?> state) {
   }
 
   /**
-   * Return a (future for) the pane info appropriate for {@code context}. The pane info
-   * includes the timing for the pane, who's calculation is quite subtle.
+   * Return a ({@link ReadableState} for) the pane info appropriate for {@code context}. The pane
+   * info includes the timing for the pane, who's calculation is quite subtle.
    *
    * @param isEndOfWindow should be {@code true} only if the pane is being emitted
    * because an end-of-window timer has fired and the trigger agreed we should fire.
    * @param isFinal should be {@code true} only if the triggering machinery can guarantee
    * no further firings for the
    */
-  public StateContents<PaneInfo> getNextPaneInfo(ReduceFn<?, ?, ?, ?>.Context context,
+  public ReadableState<PaneInfo> getNextPaneInfo(ReduceFn<?, ?, ?, ?>.Context context,
       final boolean isEndOfWindow, final boolean isFinal) {
     final Object key = context.key();
-    final StateContents<PaneInfo> previousPaneFuture =
-        context.state().access(PaneInfoTracker.PANE_INFO_TAG).get();
+    final ReadableState<PaneInfo> previousPaneFuture =
+        context.state().access(PaneInfoTracker.PANE_INFO_TAG);
     final Instant windowMaxTimestamp = context.window().maxTimestamp();
 
-    return new StateContents<PaneInfo>() {
+    return new ReadableState<PaneInfo>() {
+      @Override
+      public ReadableState<PaneInfo> readLater() {
+        previousPaneFuture.readLater();
+        return this;
+      }
+
       @Override
       public PaneInfo read() {
         PaneInfo previousPane = previousPaneFuture.read();
@@ -76,7 +82,7 @@ public PaneInfo read() {
   }
 
   public void storeCurrentPaneInfo(ReduceFn<?, ?, ?, ?>.Context context, PaneInfo currentPane) {
-    context.state().access(PANE_INFO_TAG).set(currentPane);
+    context.state().access(PANE_INFO_TAG).write(currentPane);
   }
 
   private <W> PaneInfo describePane(Object key, Instant windowMaxTimestamp, PaneInfo previousPane,
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
index ae0913ae4fd23..c5ef2ea126139 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
@@ -18,8 +18,8 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
+import com.google.cloud.dataflow.sdk.util.state.ReadableState;
 import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.StateContents;
 
 import org.joda.time.Instant;
 
@@ -124,5 +124,5 @@ public void prefetchOnTrigger(StateAccessor<K> context) {}
   /**
    * Returns true if the there is no buffered state.
    */
-  public abstract StateContents<Boolean> isEmpty(StateAccessor<K> context);
+  public abstract ReadableState<Boolean> isEmpty(StateAccessor<K> context);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
index d0296d1ad67e2..b2ab752edf716 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
@@ -22,9 +22,9 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
+import com.google.cloud.dataflow.sdk.util.state.ReadableState;
 import com.google.cloud.dataflow.sdk.util.state.State;
 import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
@@ -90,7 +90,7 @@ public ReduceFn<K, InputT, OutputT, W>.ProcessValueContext forValue(
   }
 
   public ReduceFn<K, InputT, OutputT, W>.OnTriggerContext forTrigger(W window,
-      StateContents<PaneInfo> pane, StateStyle style, OnTriggerCallbacks<OutputT> callbacks) {
+      ReadableState<PaneInfo> pane, StateStyle style, OnTriggerCallbacks<OutputT> callbacks) {
     return new OnTriggerContextImpl(stateContext(window, style), pane, callbacks);
   }
 
@@ -358,11 +358,11 @@ public Timers timers() {
 
   private class OnTriggerContextImpl extends ReduceFn<K, InputT, OutputT, W>.OnTriggerContext {
     private final StateAccessorImpl<K, W> state;
-    private final StateContents<PaneInfo> pane;
+    private final ReadableState<PaneInfo> pane;
     private final OnTriggerCallbacks<OutputT> callbacks;
     private final TimersImpl timers;
 
-    private OnTriggerContextImpl(StateAccessorImpl<K, W> state, StateContents<PaneInfo> pane,
+    private OnTriggerContextImpl(StateAccessorImpl<K, W> state, ReadableState<PaneInfo> pane,
         OnTriggerCallbacks<OutputT> callbacks) {
       reduceFn.super();
       this.state = state;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index d550a2395c4e0..ec83688fb4083 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -29,7 +29,7 @@
 import com.google.cloud.dataflow.sdk.util.ReduceFnContextFactory.StateStyle;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.util.state.StateContents;
+import com.google.cloud.dataflow.sdk.util.state.ReadableState;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces.WindowNamespace;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -716,12 +716,13 @@ private void onTrigger(
       boolean isEndOfWindow,
       boolean isFinished)
           throws Exception {
-    // Collect state.
-    StateContents<Instant> outputTimestampFuture =
-        watermarkHold.extractAndRelease(renamedContext, isFinished);
-    StateContents<PaneInfo> paneFuture =
-        paneInfoTracker.getNextPaneInfo(directContext, isEndOfWindow, isFinished);
-    StateContents<Boolean> isEmptyFuture = nonEmptyPanes.isEmpty(renamedContext.state());
+    // Prefetch necessary states
+    ReadableState<Instant> outputTimestampFuture =
+        watermarkHold.extractAndRelease(renamedContext, isFinished).readLater();
+    ReadableState<PaneInfo> paneFuture =
+        paneInfoTracker.getNextPaneInfo(directContext, isEndOfWindow, isFinished).readLater();
+    ReadableState<Boolean> isEmptyFuture =
+        nonEmptyPanes.isEmpty(renamedContext.state()).readLater();
 
     reduceFn.prefetchOnTrigger(directContext.state());
     triggerRunner.prefetchOnFire(directContext.window(), directContext.state());
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
index 233e1da9a3b0b..d5d912672d8c6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
@@ -23,12 +23,12 @@
 import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.RequiresContextInternal;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.state.AccumulatorCombiningState;
 import com.google.cloud.dataflow.sdk.util.state.BagState;
-import com.google.cloud.dataflow.sdk.util.state.CombiningValueState;
-import com.google.cloud.dataflow.sdk.util.state.CombiningValueStateInternal;
+import com.google.cloud.dataflow.sdk.util.state.CombiningState;
 import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
+import com.google.cloud.dataflow.sdk.util.state.ReadableState;
 import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.StateContents;
 import com.google.cloud.dataflow.sdk.util.state.StateMerging;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
@@ -78,7 +78,7 @@ public void onMerge(OnMergeContext c) throws Exception {
         !(combineFn.getFn() instanceof RequiresContextInternal),
         "Combiner lifting is not supported for combine functions with contexts: %s",
         combineFn.getFn().getClass().getName());
-    final StateTag<K, CombiningValueStateInternal<InputT, AccumT, OutputT>> bufferTag =
+    final StateTag<K, AccumulatorCombiningState<InputT, AccumT, OutputT>> bufferTag =
         StateTags.makeSystemTagInternal(
             StateTags.<K, InputT, AccumT, OutputT>keyedCombiningValue(
                 BUFFER_NAME, combineFn.getAccumulatorCoder(),
@@ -96,10 +96,10 @@ public void onMerge(OnMergeContext c) throws Exception {
     };
   }
 
-  private StateTag<? super K, ? extends CombiningValueState<InputT, OutputT>> bufferTag;
+  private StateTag<? super K, ? extends CombiningState<InputT, OutputT>> bufferTag;
 
-  private SystemReduceFn(
-      StateTag<? super K, ? extends CombiningValueState<InputT, OutputT>> bufferTag) {
+  public SystemReduceFn(
+      StateTag<? super K, ? extends CombiningState<InputT, OutputT>> bufferTag) {
     this.bufferTag = bufferTag;
   }
 
@@ -110,12 +110,12 @@ public void processValue(ProcessValueContext c) throws Exception {
 
   @Override
   public void prefetchOnTrigger(StateAccessor<K> state) {
-    state.access(bufferTag).get();
+    state.access(bufferTag).readLater();
   }
 
   @Override
   public void onTrigger(OnTriggerContext c) throws Exception {
-    c.output(c.state().access(bufferTag).get().read());
+    c.output(c.state().access(bufferTag).read());
   }
 
   @Override
@@ -124,7 +124,7 @@ public void clearState(Context c) throws Exception {
   }
 
   @Override
-  public StateContents<Boolean> isEmpty(StateAccessor<K> state) {
+  public ReadableState<Boolean> isEmpty(StateAccessor<K> state) {
     return state.access(bufferTag).isEmpty();
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
index c27e731e8ea90..87e8b00f58861 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
@@ -292,7 +292,8 @@ public MergingStateAccessorImpl(ExecutableTrigger<W> trigger, Collection<W> acti
     }
 
     @Override
-    public <StorageT extends State> StorageT access(StateTag<? super Object, StorageT> address) {
+    public <StateT extends State> StateT access(
+        StateTag<? super Object, StateT> address) {
       return stateInternals.state(windowNamespace, address);
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
index e78411eb830c1..1b78ddc5b16f3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
@@ -76,7 +76,7 @@ private FinishedTriggersBitSet readFinishedBits(ValueState<BitSet> state) {
       return FinishedTriggersBitSet.emptyWithCapacity(rootTrigger.getFirstIndexAfterSubtree());
     }
 
-    BitSet bitSet = state.get().read();
+    BitSet bitSet = state.read();
     return bitSet == null
         ? FinishedTriggersBitSet.emptyWithCapacity(rootTrigger.getFirstIndexAfterSubtree())
             : FinishedTriggersBitSet.fromBitSet(bitSet);
@@ -89,21 +89,21 @@ public boolean isClosed(StateAccessor<?> state) {
 
   public void prefetchForValue(W window, StateAccessor<?> state) {
     if (isFinishedSetNeeded()) {
-      state.access(FINISHED_BITS_TAG).get();
+      state.access(FINISHED_BITS_TAG).readLater();
     }
     rootTrigger.getSpec().prefetchOnElement(contextFactory.createStateContext(window, rootTrigger));
   }
 
   public void prefetchOnFire(W window, StateAccessor<?> state) {
     if (isFinishedSetNeeded()) {
-      state.access(FINISHED_BITS_TAG).get();
+      state.access(FINISHED_BITS_TAG).readLater();
     }
     rootTrigger.getSpec().prefetchOnFire(contextFactory.createStateContext(window, rootTrigger));
   }
 
   public void prefetchShouldFire(W window, StateAccessor<?> state) {
     if (isFinishedSetNeeded()) {
-      state.access(FINISHED_BITS_TAG).get();
+      state.access(FINISHED_BITS_TAG).readLater();
     }
     rootTrigger.getSpec().prefetchShouldFire(
         contextFactory.createStateContext(window, rootTrigger));
@@ -127,7 +127,7 @@ public void prefetchForMerge(
       W window, Collection<W> mergingWindows, MergingStateAccessor<?, W> state) {
     if (isFinishedSetNeeded()) {
       for (ValueState<?> value : state.accessInEachMergingWindow(FINISHED_BITS_TAG).values()) {
-        value.get();
+        value.readLater();
       }
     }
     rootTrigger.getSpec().prefetchOnMerge(contextFactory.createMergingStateContext(
@@ -185,12 +185,12 @@ private void persistFinishedSet(
       return;
     }
 
-    ValueState<BitSet> finishedSet = state.access(FINISHED_BITS_TAG);
-    if (!finishedSet.get().equals(modifiedFinishedSet)) {
+    ValueState<BitSet> finishedSetState = state.access(FINISHED_BITS_TAG);
+    if (!readFinishedBits(finishedSetState).equals(modifiedFinishedSet)) {
       if (modifiedFinishedSet.getBitSet().isEmpty()) {
-        finishedSet.clear();
+        finishedSetState.clear();
       } else {
-        finishedSet.set(modifiedFinishedSet.getBitSet());
+        finishedSetState.write(modifiedFinishedSet.getBitSet());
       }
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
index d0614db092295..d537ddb0e80da 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
@@ -20,11 +20,11 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
 import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.StateContents;
+import com.google.cloud.dataflow.sdk.util.state.ReadableState;
 import com.google.cloud.dataflow.sdk.util.state.StateMerging;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.StateTags;
-import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
+import com.google.cloud.dataflow.sdk.util.state.WatermarkHoldState;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 
@@ -56,9 +56,9 @@ class WatermarkHold<W extends BoundedWindow> implements Serializable {
    * used for elements.
    */
   public static <W extends BoundedWindow>
-      StateTag<Object, WatermarkStateInternal<W>> watermarkHoldTagForOutputTimeFn(
+      StateTag<Object, WatermarkHoldState<W>> watermarkHoldTagForOutputTimeFn(
           OutputTimeFn<? super W> outputTimeFn) {
-    return StateTags.<Object, WatermarkStateInternal<W>>makeSystemTagInternal(
+    return StateTags.<Object, WatermarkHoldState<W>>makeSystemTagInternal(
         StateTags.<W>watermarkStateInternal("hold", outputTimeFn));
   }
 
@@ -69,13 +69,13 @@ StateTag<Object, WatermarkStateInternal<W>> watermarkHoldTagForOutputTimeFn(
    * would take the end-of-window time as its element time.)
    */
   @VisibleForTesting
-  public static final StateTag<Object, WatermarkStateInternal<BoundedWindow>> EXTRA_HOLD_TAG =
+  public static final StateTag<Object, WatermarkHoldState<BoundedWindow>> EXTRA_HOLD_TAG =
       StateTags.makeSystemTagInternal(StateTags.watermarkStateInternal(
           "extra", OutputTimeFns.outputAtEarliestInputTimestamp()));
 
   private final TimerInternals timerInternals;
   private final WindowingStrategy<?, W> windowingStrategy;
-  private final StateTag<Object, WatermarkStateInternal<W>> elementHoldTag;
+  private final StateTag<Object, WatermarkHoldState<W>> elementHoldTag;
 
   public WatermarkHold(TimerInternals timerInternals, WindowingStrategy<?, W> windowingStrategy) {
     this.timerInternals = timerInternals;
@@ -377,25 +377,27 @@ public void onMerge(ReduceFn<?, ?, ?, W>.OnMergeContext context) {
    * elements in the current pane. If there is no such value the timestamp is the end
    * of the window.
    */
-  public StateContents<Instant> extractAndRelease(
+  public ReadableState<Instant> extractAndRelease(
       final ReduceFn<?, ?, ?, W>.Context context, final boolean isFinished) {
     WindowTracing.debug(
         "extractAndRelease: for key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
         context.key(), context.window(), timerInternals.currentInputWatermarkTime(),
         timerInternals.currentOutputWatermarkTime());
-    final WatermarkStateInternal<W> elementHoldState = context.state().access(elementHoldTag);
-    // Since we only extract holds when a trigger fires it is unreasonable to expect
-    // the state to be prefetched.
-    final StateContents<Instant> elementHoldFuture = elementHoldState.get();
-    final WatermarkStateInternal<BoundedWindow> extraHoldState =
-        context.state().access(EXTRA_HOLD_TAG);
-    final StateContents<Instant> extraHoldFuture = extraHoldState.get();
-    return new StateContents<Instant>() {
+    final WatermarkHoldState<W> elementHoldState = context.state().access(elementHoldTag);
+    final WatermarkHoldState<BoundedWindow> extraHoldState = context.state().access(EXTRA_HOLD_TAG);
+    return new ReadableState<Instant>() {
+      @Override
+      public ReadableState<Instant> readLater() {
+        elementHoldState.readLater();
+        extraHoldState.readLater();
+        return this;
+      }
+
       @Override
       public Instant read() {
         // Read both the element and extra holds.
-        Instant elementHold = elementHoldFuture.read();
-        Instant extraHold = extraHoldFuture.read();
+        Instant elementHold = elementHoldState.read();
+        Instant extraHold = extraHoldState.read();
         Instant hold;
         // Find the minimum, accounting for null.
         if (elementHold == null) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueStateInternal.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/AccumulatorCombiningState.java
similarity index 74%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueStateInternal.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/AccumulatorCombiningState.java
index 2f18b9c80badd..0d78b13bad641 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueStateInternal.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/AccumulatorCombiningState.java
@@ -19,19 +19,21 @@
 
 /**
  * State for a single value that is managed by a {@link CombineFn}. This is an internal extension
- * to {@link CombiningValueState} that includes the {@code AccumT} type.
+ * to {@link CombiningState} that includes the {@code AccumT} type.
  *
  * @param <InputT> the type of values added to the state
  * @param <AccumT> the type of accumulator
  * @param <OutputT> the type of value extracted from the state
  */
-public interface CombiningValueStateInternal<InputT, AccumT, OutputT>
-    extends CombiningValueState<InputT, OutputT> {
+public interface AccumulatorCombiningState<InputT, AccumT, OutputT>
+    extends CombiningState<InputT, OutputT> {
 
   /**
-   * Read the merged accumulator for this combining value.
+   * Read the merged accumulator for this combining value. It is implied that reading the
+   * state involes reading the accumulator, so {@link #readLater} is sufficient to prefetch for
+   * this.
    */
-  StateContents<AccumT> getAccum();
+  AccumT getAccum();
 
   /**
    * Add an accumulator to this combining value. Depending on implementation this may immediately
@@ -43,4 +45,7 @@ public interface CombiningValueStateInternal<InputT, AccumT, OutputT>
    * Merge the given accumulators according to the underlying combiner.
    */
   AccumT mergeAccumulators(Iterable<AccumT> accumulators);
+
+  @Override
+  AccumulatorCombiningState<InputT, AccumT, OutputT> readLater();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/BagState.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/BagState.java
index 5e0eef6a55679..363e480797987 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/BagState.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/BagState.java
@@ -20,5 +20,7 @@
  *
  * @param <T> The type of elements in the bag.
  */
-public interface BagState<T> extends CombiningValueState<T, Iterable<T>> {
+public interface BagState<T> extends CombiningState<T, Iterable<T>> {
+  @Override
+  BagState<T> readLater();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueState.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningState.java
similarity index 81%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueState.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningState.java
index ee1fcac1d8704..673bebbfb42a2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningValueState.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningState.java
@@ -24,20 +24,17 @@
  * @param <InputT> the type of values added to the state
  * @param <OutputT> the type of value extracted from the state
  */
-public interface CombiningValueState<InputT, OutputT> extends State {
+public interface CombiningState<InputT, OutputT> extends ReadableState<OutputT>, State {
   /**
    * Add a value to the buffer.
    */
   void add(InputT value);
 
-  /**
-   * Return the {@link StateContents} object to use for accessing the contents of the buffer.
-   */
-  StateContents<OutputT> get();
-
   /**
    * Return true if this state is empty.
    */
-  StateContents<Boolean> isEmpty();
+  ReadableState<Boolean> isEmpty();
 
+  @Override
+  CombiningState<InputT, OutputT> readLater();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java
index 496897698749b..19e45d6f00823 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java
@@ -68,7 +68,7 @@ private CopyOnAccessInMemoryStateInternals(
    * has not been bound in this {@link CopyOnAccessInMemoryStateInternals}, put a reference to that
    * state within this {@link StateInternals}.
    *
-   * <p>Additionally, stores the {@link WatermarkStateInternal} with the earliest time bound in the
+   * <p>Additionally, stores the {@link WatermarkHoldState} with the earliest time bound in the
    * state table after the commit is completed, enabling calls to
    * {@link #getEarliestWatermarkHold()}.
    *
@@ -185,8 +185,8 @@ private void commit() {
     private Instant getEarliestWatermarkHold() {
       Instant earliest = BoundedWindow.TIMESTAMP_MAX_VALUE;
       for (State existingState : this.values()) {
-        if (existingState instanceof WatermarkStateInternal) {
-          Instant hold = ((WatermarkStateInternal<?>) existingState).get().read();
+        if (existingState instanceof WatermarkHoldState) {
+          Instant hold = ((WatermarkHoldState<?>) existingState).read();
           if (hold != null && hold.isBefore(earliest)) {
             earliest = hold;
           }
@@ -249,17 +249,17 @@ private boolean containedInUnderlying(StateNamespace namespace, StateTag<? super
       public StateBinder<K> forNamespace(final StateNamespace namespace) {
         return new StateBinder<K>() {
           @Override
-          public <W extends BoundedWindow> WatermarkStateInternal<W> bindWatermark(
-              StateTag<? super K, WatermarkStateInternal<W>> address,
+          public <W extends BoundedWindow> WatermarkHoldState<W> bindWatermark(
+              StateTag<? super K, WatermarkHoldState<W>> address,
               OutputTimeFn<? super W> outputTimeFn) {
             if (containedInUnderlying(namespace, address)) {
               @SuppressWarnings("unchecked")
-              InMemoryState<? extends WatermarkStateInternal<W>> existingState =
-                  (InMemoryStateInternals.InMemoryState<? extends WatermarkStateInternal<W>>)
+              InMemoryState<? extends WatermarkHoldState<W>> existingState =
+                  (InMemoryStateInternals.InMemoryState<? extends WatermarkHoldState<W>>)
                   underlying.get().get(namespace, address);
               return existingState.copy();
             } else {
-              return new InMemoryStateInternals.WatermarkStateInternalImplementation<>(
+              return new InMemoryStateInternals.InMemoryWatermarkHold<>(
                   outputTimeFn);
             }
           }
@@ -279,16 +279,16 @@ public <T> ValueState<T> bindValue(
           }
 
           @Override
-          public <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
+          public <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
               bindCombiningValue(
-                  StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
+                  StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
                   Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
             if (containedInUnderlying(namespace, address)) {
               @SuppressWarnings("unchecked")
-              InMemoryState<? extends CombiningValueStateInternal<InputT, AccumT, OutputT>>
+              InMemoryState<? extends AccumulatorCombiningState<InputT, AccumT, OutputT>>
                   existingState = (
                       InMemoryStateInternals
-                          .InMemoryState<? extends CombiningValueStateInternal<InputT, AccumT,
+                          .InMemoryState<? extends AccumulatorCombiningState<InputT, AccumT,
                           OutputT>>) underlying.get().get(namespace, address);
               return existingState.copy();
             } else {
@@ -312,17 +312,17 @@ public <T> BagState<T> bindBag(
           }
 
           @Override
-          public <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
+          public <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
               bindKeyedCombiningValue(
-                  StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
+                  StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
                   Coder<AccumT> accumCoder,
                   KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
             if (containedInUnderlying(namespace, address)) {
               @SuppressWarnings("unchecked")
-              InMemoryState<? extends CombiningValueStateInternal<InputT, AccumT, OutputT>>
+              InMemoryState<? extends AccumulatorCombiningState<InputT, AccumT, OutputT>>
                   existingState = (
                       InMemoryStateInternals
-                          .InMemoryState<? extends CombiningValueStateInternal<InputT, AccumT,
+                          .InMemoryState<? extends AccumulatorCombiningState<InputT, AccumT,
                           OutputT>>) underlying.get().get(namespace, address);
               return existingState.copy();
             } else {
@@ -355,8 +355,8 @@ public Instant readThroughAndGetEarliestHold(StateTable<K> readTo) {
               // eventually discarded, and remember the earliest watermark hold from among those
               // values.
               State state = readTo.get(namespace, existingState.getKey());
-              if (state instanceof WatermarkStateInternal) {
-                Instant hold = ((WatermarkStateInternal<?>) state).get().read();
+              if (state instanceof WatermarkHoldState) {
+                Instant hold = ((WatermarkHoldState<?>) state).read();
                 if (hold != null && hold.isBefore(earliestHold)) {
                   earliestHold = hold;
                 }
@@ -371,8 +371,8 @@ public Instant readThroughAndGetEarliestHold(StateTable<K> readTo) {
       public StateBinder<K> forNamespace(final StateNamespace namespace) {
         return new StateBinder<K>() {
           @Override
-          public <W extends BoundedWindow> WatermarkStateInternal<W> bindWatermark(
-              StateTag<? super K, WatermarkStateInternal<W>> address,
+          public <W extends BoundedWindow> WatermarkHoldState<W> bindWatermark(
+              StateTag<? super K, WatermarkHoldState<W>> address,
               OutputTimeFn<? super W> outputTimeFn) {
             return underlying.get(namespace, address);
           }
@@ -384,9 +384,9 @@ public <T> ValueState<T> bindValue(
           }
 
           @Override
-          public <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
+          public <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
               bindCombiningValue(
-                  StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
+                  StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
                   Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
             return underlying.get(namespace, address);
           }
@@ -398,9 +398,9 @@ public <T> BagState<T> bindBag(
           }
 
           @Override
-          public <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
+          public <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
               bindKeyedCombiningValue(
-                  StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
+                  StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
                   Coder<AccumT> accumCoder,
                   KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
             return underlying.get(namespace, address);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
index 658494bcbb158..4a2555f3584a5 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
@@ -107,25 +107,25 @@ public <T> BagState<T> bindBag(
     }
 
     @Override
-    public <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
+    public <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
         bindCombiningValue(
-            StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>>
+            StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>>
             address, Coder<AccumT> accumCoder,
             final CombineFn<InputT, AccumT, OutputT> combineFn) {
       return new InMemoryCombiningValue<K, InputT, AccumT, OutputT>(key, combineFn.<K>asKeyedFn());
     }
 
     @Override
-    public <W extends BoundedWindow> WatermarkStateInternal<W> bindWatermark(
-        StateTag<? super K, WatermarkStateInternal<W>> address,
+    public <W extends BoundedWindow> WatermarkHoldState<W> bindWatermark(
+        StateTag<? super K, WatermarkHoldState<W>> address,
         OutputTimeFn<? super W> outputTimeFn) {
-      return new WatermarkStateInternalImplementation<W>(outputTimeFn);
+      return new InMemoryWatermarkHold<W>(outputTimeFn);
     }
 
     @Override
-    public <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
+    public <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
         bindKeyedCombiningValue(
-            StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>>
+            StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>>
             address, Coder<AccumT> accumCoder,
             KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
       return new InMemoryCombiningValue<K, InputT, AccumT, OutputT>(key, combineFn);
@@ -145,17 +145,17 @@ public void clear() {
     }
 
     @Override
-    public StateContents<T> get() {
-      return new StateContents<T>() {
-        @Override
-        public T read() {
-          return value;
-        }
-      };
+    public InMemoryValue<T> readLater() {
+      return this;
     }
 
     @Override
-    public void set(T input) {
+    public T read() {
+      return value;
+    }
+
+    @Override
+    public void write(T input) {
       isCleared = false;
       this.value = input;
     }
@@ -176,18 +176,23 @@ public boolean isCleared() {
     }
   }
 
-  static final class WatermarkStateInternalImplementation<W extends BoundedWindow>
-      implements WatermarkStateInternal<W>, InMemoryState<WatermarkStateInternalImplementation<W>> {
+  static final class InMemoryWatermarkHold<W extends BoundedWindow>
+      implements WatermarkHoldState<W>, InMemoryState<InMemoryWatermarkHold<W>> {
 
     private final OutputTimeFn<? super W> outputTimeFn;
 
     @Nullable
     private Instant combinedHold = null;
 
-    public WatermarkStateInternalImplementation(OutputTimeFn<? super W> outputTimeFn) {
+    public InMemoryWatermarkHold(OutputTimeFn<? super W> outputTimeFn) {
       this.outputTimeFn = outputTimeFn;
     }
 
+    @Override
+    public InMemoryWatermarkHold<W> readLater() {
+      return this;
+    }
+
     @Override
     public void clear() {
       // Even though we're clearing we can't remove this from the in-memory state map, since
@@ -196,13 +201,8 @@ public void clear() {
     }
 
     @Override
-    public StateContents<Instant> get() {
-      return new StateContents<Instant>() {
-        @Override
-        public Instant read() {
-          return combinedHold;
-        }
-      };
+    public Instant read() {
+      return combinedHold;
     }
 
     @Override
@@ -217,8 +217,12 @@ public boolean isCleared() {
     }
 
     @Override
-    public StateContents<Boolean> isEmpty() {
-      return new StateContents<Boolean>() {
+    public ReadableState<Boolean> isEmpty() {
+      return new ReadableState<Boolean>() {
+        @Override
+        public ReadableState<Boolean> readLater() {
+          return this;
+        }
         @Override
         public Boolean read() {
           return combinedHold == null;
@@ -237,17 +241,17 @@ public String toString() {
     }
 
     @Override
-    public WatermarkStateInternalImplementation<W> copy() {
-      WatermarkStateInternalImplementation<W> that =
-          new WatermarkStateInternalImplementation<>(outputTimeFn);
+    public InMemoryWatermarkHold<W> copy() {
+      InMemoryWatermarkHold<W> that =
+          new InMemoryWatermarkHold<>(outputTimeFn);
       that.combinedHold = this.combinedHold;
       return that;
     }
   }
 
   static final class InMemoryCombiningValue<K, InputT, AccumT, OutputT>
-      implements CombiningValueStateInternal<InputT, AccumT, OutputT>,
-                 InMemoryState<InMemoryCombiningValue<K, InputT, AccumT, OutputT>> {
+      implements AccumulatorCombiningState<InputT, AccumT, OutputT>,
+          InMemoryState<InMemoryCombiningValue<K, InputT, AccumT, OutputT>> {
     private final K key;
     private boolean isCleared = true;
     private final KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn;
@@ -260,6 +264,11 @@ static final class InMemoryCombiningValue<K, InputT, AccumT, OutputT>
       accum = combineFn.createAccumulator(key);
     }
 
+    @Override
+    public InMemoryCombiningValue<K, InputT, AccumT, OutputT> readLater() {
+      return this;
+    }
+
     @Override
     public void clear() {
       // Even though we're clearing we can't remove this from the in-memory state map, since
@@ -269,13 +278,8 @@ public void clear() {
     }
 
     @Override
-    public StateContents<OutputT> get() {
-      return new StateContents<OutputT>() {
-        @Override
-        public OutputT read() {
-          return combineFn.extractOutput(key, accum);
-        }
-      };
+    public OutputT read() {
+      return combineFn.extractOutput(key, accum);
     }
 
     @Override
@@ -285,18 +289,17 @@ public void add(InputT input) {
     }
 
     @Override
-    public StateContents<AccumT> getAccum() {
-      return new StateContents<AccumT>() {
-        @Override
-        public AccumT read() {
-          return accum;
-        }
-      };
+    public AccumT getAccum() {
+      return accum;
     }
 
     @Override
-    public StateContents<Boolean> isEmpty() {
-      return new StateContents<Boolean>() {
+    public ReadableState<Boolean> isEmpty() {
+      return new ReadableState<Boolean>() {
+        @Override
+        public ReadableState<Boolean> readLater() {
+          return this;
+        }
         @Override
         public Boolean read() {
           return isCleared;
@@ -348,13 +351,13 @@ public void clear() {
     }
 
     @Override
-    public StateContents<Iterable<T>> get() {
-      return new StateContents<Iterable<T>>() {
-        @Override
-        public Iterable<T> read() {
-          return contents;
-        }
-      };
+    public InMemoryBag<T> readLater() {
+      return this;
+    }
+
+    @Override
+    public Iterable<T> read() {
+      return contents;
     }
 
     @Override
@@ -368,8 +371,13 @@ public boolean isCleared() {
     }
 
     @Override
-    public StateContents<Boolean> isEmpty() {
-      return new StateContents<Boolean>() {
+    public ReadableState<Boolean> isEmpty() {
+      return new ReadableState<Boolean>() {
+        @Override
+        public ReadableState<Boolean> readLater() {
+          return this;
+        }
+
         @Override
         public Boolean read() {
           return contents.isEmpty();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContents.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ReadableState.java
similarity index 67%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContents.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ReadableState.java
index e00462fe5bae9..8f690a33de4fb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContents.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ReadableState.java
@@ -31,9 +31,23 @@
  * @param <T> The type of value returned by {@link #read}.
  */
 @Experimental(Kind.STATE)
-public interface StateContents<T> {
+public interface ReadableState<T> {
   /**
-   * Read the current value.
+   * Read the current value, blocking until it is available.
+   *
+   * <p>If there will be many calls to {@link #read} for different state in short succession,
+   * you should first call {@link #readLater} for all of them so the reads can potentially be
+   * batched (depending on the underlying {@link StateInternals} implementation}.
    */
   T read();
+
+  /**
+   * Indicate that the value will be read later.
+   *
+   * <p>This allows a {@link StateInternals} implementation to start an asynchronous prefetch or
+   * to include this state in the next batch of reads.
+   *
+   * @return this for convenient chaining
+   */
+  ReadableState<T> readLater();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/State.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/State.java
index f53e4c198fb41..0cef786ad5cf0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/State.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/State.java
@@ -19,7 +19,7 @@
  * Base interface for all state locations.
  *
  * <p>Specific types of state add appropriate accessors for reading and writing values, see
- * {@link ValueState}, {@link BagState}, and {@link CombiningValueState}.
+ * {@link ValueState}, {@link BagState}, and {@link CombiningState}.
  */
 public interface State {
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java
index 3c53a20413cb5..0b33ea9b8e616 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java
@@ -56,7 +56,7 @@ public static <K, T, W extends BoundedWindow> void prefetchBags(
     // Prefetch everything except what's already in result.
     for (BagState<T> source : map.values()) {
       if (!source.equals(result)) {
-        source.get();
+        source.readLater();
       }
     }
   }
@@ -79,10 +79,11 @@ public static <T, W extends BoundedWindow> void mergeBags(
       return;
     }
     // Prefetch everything except what's already in result.
-    List<StateContents<Iterable<T>>> futures = new ArrayList<>(sources.size());
+    List<ReadableState<Iterable<T>>> futures = new ArrayList<>(sources.size());
     for (BagState<T> source : sources) {
       if (!source.equals(result)) {
-        futures.add(source.get());
+        source.readLater();
+        futures.add(source);
       }
     }
     if (futures.isEmpty()) {
@@ -90,7 +91,7 @@ public static <T, W extends BoundedWindow> void mergeBags(
       return;
     }
     // Transfer from sources to result.
-    for (StateContents<Iterable<T>> future : futures) {
+    for (ReadableState<Iterable<T>> future : futures) {
       for (T element : future.read()) {
         result.add(element);
       }
@@ -107,11 +108,11 @@ public static <T, W extends BoundedWindow> void mergeBags(
    * Prefetch all combining value state for {@code address} across all merging windows in {@code
    * context}.
    */
-  public static <K, StateT extends CombiningValueState<?, ?>, W extends BoundedWindow> void
+  public static <K, StateT extends CombiningState<?, ?>, W extends BoundedWindow> void
       prefetchCombiningValues(MergingStateAccessor<K, W> context,
           StateTag<? super K, StateT> address) {
     for (StateT state : context.accessInEachMergingWindow(address).values()) {
-      state.get();
+      state.readLater();
     }
   }
 
@@ -120,7 +121,7 @@ public static <T, W extends BoundedWindow> void mergeBags(
    */
   public static <K, InputT, AccumT, OutputT, W extends BoundedWindow> void mergeCombiningValues(
       MergingStateAccessor<K, W> context,
-      StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>> address) {
+      StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address) {
     mergeCombiningValues(
         context.accessInEachMergingWindow(address).values(), context.access(address));
   }
@@ -130,8 +131,8 @@ public static <K, InputT, AccumT, OutputT, W extends BoundedWindow> void mergeCo
    * {@code result}.
    */
   public static <InputT, AccumT, OutputT, W extends BoundedWindow> void mergeCombiningValues(
-      Collection<CombiningValueStateInternal<InputT, AccumT, OutputT>> sources,
-      CombiningValueStateInternal<InputT, AccumT, OutputT> result) {
+      Collection<AccumulatorCombiningState<InputT, AccumT, OutputT>> sources,
+      AccumulatorCombiningState<InputT, AccumT, OutputT> result) {
     if (sources.isEmpty()) {
       // Nothing to merge.
       return;
@@ -141,19 +142,19 @@ public static <InputT, AccumT, OutputT, W extends BoundedWindow> void mergeCombi
       return;
     }
     // Prefetch.
-    List<StateContents<AccumT>> futures = new ArrayList<>(sources.size());
-    for (CombiningValueStateInternal<InputT, AccumT, OutputT> source : sources) {
-      futures.add(source.getAccum());
+    List<ReadableState<AccumT>> futures = new ArrayList<>(sources.size());
+    for (AccumulatorCombiningState<InputT, AccumT, OutputT> source : sources) {
+      source.readLater();
     }
     // Read.
     List<AccumT> accumulators = new ArrayList<>(futures.size());
-    for (StateContents<AccumT> future : futures) {
-      accumulators.add(future.read());
+    for (AccumulatorCombiningState<InputT, AccumT, OutputT> source : sources) {
+      accumulators.add(source.getAccum());
     }
     // Merge (possibly update and return one of the existing accumulators).
     AccumT merged = result.mergeAccumulators(accumulators);
     // Clear sources.
-    for (CombiningValueStateInternal<InputT, AccumT, OutputT> source : sources) {
+    for (AccumulatorCombiningState<InputT, AccumT, OutputT> source : sources) {
       source.clear();
     }
     // Update result.
@@ -166,9 +167,9 @@ public static <InputT, AccumT, OutputT, W extends BoundedWindow> void mergeCombi
    */
   public static <K, W extends BoundedWindow> void prefetchWatermarks(
       MergingStateAccessor<K, W> context,
-      StateTag<? super K, WatermarkStateInternal<W>> address) {
-    Map<W, WatermarkStateInternal<W>> map = context.accessInEachMergingWindow(address);
-    WatermarkStateInternal<W> result = context.access(address);
+      StateTag<? super K, WatermarkHoldState<W>> address) {
+    Map<W, WatermarkHoldState<W>> map = context.accessInEachMergingWindow(address);
+    WatermarkHoldState<W> result = context.access(address);
     if (map.isEmpty()) {
       // Nothing to prefetch.
       return;
@@ -183,8 +184,8 @@ public static <K, W extends BoundedWindow> void prefetchWatermarks(
       return;
     }
     // Prefetch.
-    for (WatermarkStateInternal<W> source : map.values()) {
-      source.get();
+    for (WatermarkHoldState<W> source : map.values()) {
+      source.readLater();
     }
   }
 
@@ -194,7 +195,7 @@ public static <K, W extends BoundedWindow> void prefetchWatermarks(
    */
   public static <K, W extends BoundedWindow> void mergeWatermarks(
       MergingStateAccessor<K, W> context,
-      StateTag<? super K, WatermarkStateInternal<W>> address,
+      StateTag<? super K, WatermarkHoldState<W>> address,
       W mergeResult) {
     mergeWatermarks(
         context.accessInEachMergingWindow(address).values(), context.access(address), mergeResult);
@@ -205,7 +206,7 @@ public static <K, W extends BoundedWindow> void mergeWatermarks(
    * into {@code result}, where the final merge result window is {@code mergeResult}.
    */
   public static <W extends BoundedWindow> void mergeWatermarks(
-      Collection<WatermarkStateInternal<W>> sources, WatermarkStateInternal<W> result,
+      Collection<WatermarkHoldState<W>> sources, WatermarkHoldState<W> result,
       W resultWindow) {
     if (sources.isEmpty()) {
       // Nothing to merge.
@@ -218,7 +219,7 @@ public static <W extends BoundedWindow> void mergeWatermarks(
     }
     if (result.getOutputTimeFn().dependsOnlyOnWindow()) {
       // Clear sources.
-      for (WatermarkStateInternal<W> source : sources) {
+      for (WatermarkHoldState<W> source : sources) {
         source.clear();
       }
       // Update directly from window-derived hold.
@@ -228,20 +229,20 @@ public static <W extends BoundedWindow> void mergeWatermarks(
       result.add(hold);
     } else {
       // Prefetch.
-      List<StateContents<Instant>> futures = new ArrayList<>(sources.size());
-      for (WatermarkStateInternal<W> source : sources) {
-        futures.add(source.get());
+      List<ReadableState<Instant>> futures = new ArrayList<>(sources.size());
+      for (WatermarkHoldState<W> source : sources) {
+        futures.add(source);
       }
       // Read.
       List<Instant> outputTimesToMerge = new ArrayList<>(sources.size());
-      for (StateContents<Instant> future : futures) {
+      for (ReadableState<Instant> future : futures) {
         Instant sourceOutputTime = future.read();
         if (sourceOutputTime != null) {
           outputTimesToMerge.add(sourceOutputTime);
         }
       }
       // Clear sources.
-      for (WatermarkStateInternal<W> source : sources) {
+      for (WatermarkHoldState<W> source : sources) {
         source.clear();
       }
       if (!outputTimesToMerge.isEmpty()) {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
index e7b02dfc5a052..2924763b89f87 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
@@ -53,24 +53,24 @@ public interface StateBinder<K> {
 
     <T> BagState<T> bindBag(StateTag<? super K, BagState<T>> address, Coder<T> elemCoder);
 
-    <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
+    <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
     bindCombiningValue(
-        StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
+        StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
         Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn);
 
-    <InputT, AccumT, OutputT> CombiningValueStateInternal<InputT, AccumT, OutputT>
+    <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
     bindKeyedCombiningValue(
-        StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>> address,
+        StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
         Coder<AccumT> accumCoder, KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn);
 
     /**
      * Bind to a watermark {@link StateTag}.
      *
      * <p>This accepts the {@link OutputTimeFn} that dictates how watermark hold timestamps
-     * added to the returned {@link WatermarkStateInternal} are to be combined.
+     * added to the returned {@link WatermarkHoldState} are to be combined.
      */
-    <W extends BoundedWindow> WatermarkStateInternal<W> bindWatermark(
-        StateTag<? super K, WatermarkStateInternal<W>> address,
+    <W extends BoundedWindow> WatermarkHoldState<W> bindWatermark(
+        StateTag<? super K, WatermarkHoldState<W>> address,
         OutputTimeFn<? super W> outputTimeFn);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
index df5ab0310c417..c1efb601b3e0e 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
@@ -71,7 +71,7 @@ public static <T> StateTag<Object, ValueState<T>> value(String id, Coder<T> valu
    * multiple {@code InputT}s into a single {@code OutputT}.
    */
   public static <InputT, AccumT, OutputT>
-    StateTag<Object, CombiningValueStateInternal<InputT, AccumT, OutputT>>
+    StateTag<Object, AccumulatorCombiningState<InputT, AccumT, OutputT>>
     combiningValue(
       String id, Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
     return combiningValueInternal(id, accumCoder, combineFn);
@@ -83,7 +83,7 @@ public static <T> StateTag<Object, ValueState<T>> value(String id, Coder<T> valu
    * {@link KeyedCombineFn} comes from the keyed {@link StateAccessor}.
    */
   public static <K, InputT, AccumT,
-      OutputT> StateTag<K, CombiningValueStateInternal<InputT, AccumT, OutputT>>
+      OutputT> StateTag<K, AccumulatorCombiningState<InputT, AccumT, OutputT>>
       keyedCombiningValue(String id, Coder<AccumT> accumCoder,
           KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
     return keyedCombiningValueInternal(id, accumCoder, combineFn);
@@ -97,7 +97,7 @@ public static <T> StateTag<Object, ValueState<T>> value(String id, Coder<T> valu
    * should only be used to initialize static values.
    */
   public static <InputT, AccumT, OutputT>
-      StateTag<Object, CombiningValueStateInternal<InputT, AccumT, OutputT>>
+      StateTag<Object, AccumulatorCombiningState<InputT, AccumT, OutputT>>
       combiningValueFromInputInternal(
           String id, Coder<InputT> inputCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
     try {
@@ -111,7 +111,7 @@ public static <T> StateTag<Object, ValueState<T>> value(String id, Coder<T> valu
   }
 
   private static <InputT, AccumT,
-      OutputT> StateTag<Object, CombiningValueStateInternal<InputT, AccumT, OutputT>>
+      OutputT> StateTag<Object, AccumulatorCombiningState<InputT, AccumT, OutputT>>
       combiningValueInternal(
       String id, Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
     return
@@ -120,7 +120,7 @@ public static <T> StateTag<Object, ValueState<T>> value(String id, Coder<T> valu
   }
 
   private static <K, InputT, AccumT, OutputT>
-      StateTag<K, CombiningValueStateInternal<InputT, AccumT, OutputT>> keyedCombiningValueInternal(
+      StateTag<K, AccumulatorCombiningState<InputT, AccumT, OutputT>> keyedCombiningValueInternal(
           String id,
           Coder<AccumT> accumCoder,
           KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
@@ -139,7 +139,7 @@ public static <T> StateTag<Object, BagState<T>> bag(String id, Coder<T> elemCode
   /**
    * Create a state tag for holding the watermark.
    */
-  public static <W extends BoundedWindow> StateTag<Object, WatermarkStateInternal<W>>
+  public static <W extends BoundedWindow> StateTag<Object, WatermarkHoldState<W>>
       watermarkStateInternal(String id, OutputTimeFn<? super W> outputTimeFn) {
     return new WatermarkStateTagInternal<W>(new StructuredId(id), outputTimeFn);
   }
@@ -161,7 +161,7 @@ public static <K, StateT extends State> StateTag<K, StateT> makeSystemTagInterna
 
   public static <K, InputT, AccumT, OutputT> StateTag<Object, BagState<AccumT>>
       convertToBagTagInternal(
-          StateTag<? super K, CombiningValueStateInternal<InputT, AccumT, OutputT>> combiningTag) {
+          StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> combiningTag) {
     if (!(combiningTag instanceof KeyedCombiningValueStateTag)) {
       throw new IllegalArgumentException("Unexpected StateTag " + combiningTag);
     }
@@ -315,8 +315,8 @@ public StateTag<Object, ValueState<T>> asKind(StateKind kind) {
    */
   private static class CombiningValueStateTag<InputT, AccumT, OutputT>
       extends KeyedCombiningValueStateTag<Object, InputT, AccumT, OutputT>
-      implements StateTag<Object, CombiningValueStateInternal<InputT, AccumT, OutputT>>,
-      SystemStateTag<Object, CombiningValueStateInternal<InputT, AccumT, OutputT>> {
+      implements StateTag<Object, AccumulatorCombiningState<InputT, AccumT, OutputT>>,
+      SystemStateTag<Object, AccumulatorCombiningState<InputT, AccumT, OutputT>> {
 
     private final Coder<AccumT> accumCoder;
     private final CombineFn<InputT, AccumT, OutputT> combineFn;
@@ -330,7 +330,7 @@ private CombiningValueStateTag(
     }
 
     @Override
-    public StateTag<Object, CombiningValueStateInternal<InputT, AccumT, OutputT>>
+    public StateTag<Object, AccumulatorCombiningState<InputT, AccumT, OutputT>>
     asKind(StateKind kind) {
       return new CombiningValueStateTag<InputT, AccumT, OutputT>(
           id.asKind(kind), accumCoder, combineFn);
@@ -347,8 +347,8 @@ private CombiningValueStateTag(
    * @param <OutputT> type of output values
    */
   private static class KeyedCombiningValueStateTag<K, InputT, AccumT, OutputT>
-      extends StateTagBase<K, CombiningValueStateInternal<InputT, AccumT, OutputT>>
-      implements SystemStateTag<K, CombiningValueStateInternal<InputT, AccumT, OutputT>> {
+      extends StateTagBase<K, AccumulatorCombiningState<InputT, AccumT, OutputT>>
+      implements SystemStateTag<K, AccumulatorCombiningState<InputT, AccumT, OutputT>> {
 
     private final Coder<AccumT> accumCoder;
     private final KeyedCombineFn<K, InputT, AccumT, OutputT> keyedCombineFn;
@@ -362,7 +362,7 @@ protected KeyedCombiningValueStateTag(
     }
 
     @Override
-    public CombiningValueStateInternal<InputT, AccumT, OutputT> bind(
+    public AccumulatorCombiningState<InputT, AccumT, OutputT> bind(
         StateBinder<? extends K> visitor) {
       return visitor.bindKeyedCombiningValue(this, accumCoder, keyedCombineFn);
     }
@@ -388,7 +388,7 @@ public int hashCode() {
     }
 
     @Override
-    public StateTag<K, CombiningValueStateInternal<InputT, AccumT, OutputT>> asKind(
+    public StateTag<K, AccumulatorCombiningState<InputT, AccumT, OutputT>> asKind(
         StateKind kind) {
       return new KeyedCombiningValueStateTag<>(id.asKind(kind), accumCoder, keyedCombineFn);
     }
@@ -446,7 +446,7 @@ public StateTag<Object, BagState<T>> asKind(StateKind kind) {
   }
 
   private static class WatermarkStateTagInternal<W extends BoundedWindow>
-      extends StateTagBase<Object, WatermarkStateInternal<W>> {
+      extends StateTagBase<Object, WatermarkHoldState<W>> {
 
     /**
      * When multiple output times are added to hold the watermark, this determines how they are
@@ -461,7 +461,7 @@ private WatermarkStateTagInternal(StructuredId id, OutputTimeFn<? super W> outpu
     }
 
     @Override
-    public WatermarkStateInternal<W> bind(StateBinder<? extends Object> visitor) {
+    public WatermarkHoldState<W> bind(StateBinder<? extends Object> visitor) {
       return visitor.bindWatermark(this, outputTimeFn);
     }
 
@@ -485,7 +485,7 @@ public int hashCode() {
     }
 
     @Override
-    public StateTag<Object, WatermarkStateInternal<W>> asKind(StateKind kind) {
+    public StateTag<Object, WatermarkHoldState<W>> asKind(StateKind kind) {
       return new WatermarkStateTagInternal<W>(id.asKind(kind), outputTimeFn);
     }
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ValueState.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ValueState.java
index f575dfc7f5c25..19c12bb164a98 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ValueState.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ValueState.java
@@ -24,14 +24,12 @@
  * @param <T> The type of values being stored.
  */
 @Experimental(Kind.STATE)
-public interface ValueState<T> extends State {
-  /**
-   * Return the {@link StateContents} object to use for accessing the contents of the buffer.
-   */
-  StateContents<T> get();
-
+public interface ValueState<T> extends ReadableState<T>, State {
   /**
    * Set the value of the buffer.
    */
-  void set(T input);
+  void write(T input);
+
+  @Override
+  ValueState<T> readLater();
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkHoldState.java
similarity index 90%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkHoldState.java
index 19972aab2e6cd..8a1adc95585b8 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkStateInternal.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkHoldState.java
@@ -29,11 +29,14 @@
  * <p><b><i>For internal use only. This API may change at any time.</i></b>
  */
 @Experimental(Kind.STATE)
-public interface WatermarkStateInternal<W extends BoundedWindow>
-  extends CombiningValueState<Instant, Instant> {
+public interface WatermarkHoldState<W extends BoundedWindow>
+    extends CombiningState<Instant, Instant> {
   /**
    * Return the {@link OutputTimeFn} which will be used to determine a watermark hold time given
    * an element timestamp, and to combine watermarks from windows which are about to be merged.
    */
   OutputTimeFn<? super W> getOutputTimeFn();
+
+  @Override
+  WatermarkHoldState<W> readLater();
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
index a91675cd89fc1..bade9f915eb76 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
@@ -45,7 +45,7 @@
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
+import com.google.cloud.dataflow.sdk.util.state.WatermarkHoldState;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -415,8 +415,8 @@ public Set<StateNamespace> getNamespacesInUse() {
     public Instant earliestWatermarkHold() {
       Instant minimum = null;
       for (State storage : inMemoryState.values()) {
-        if (storage instanceof WatermarkStateInternal) {
-          Instant hold = ((WatermarkStateInternal<BoundedWindow>) storage).get().read();
+        if (storage instanceof WatermarkHoldState) {
+          Instant hold = ((WatermarkHoldState<?>) storage).read();
           if (minimum == null || (hold != null && hold.isBefore(minimum))) {
             minimum = hold;
           }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
index e5507ced8d4d5..0c7183020291d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/TriggerTester.java
@@ -38,7 +38,7 @@
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces.WindowAndTriggerNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces.WindowNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.cloud.dataflow.sdk.util.state.WatermarkStateInternal;
+import com.google.cloud.dataflow.sdk.util.state.WatermarkHoldState;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.common.base.MoreObjects;
 import com.google.common.base.Throwables;
@@ -369,9 +369,9 @@ public Set<StateNamespace> getNamespacesInUse() {
     public Instant earliestWatermarkHold() {
       Instant minimum = null;
       for (State storage : inMemoryState.values()) {
-        if (storage instanceof WatermarkStateInternal) {
+        if (storage instanceof WatermarkHoldState) {
           @SuppressWarnings("unchecked")
-          Instant hold = ((WatermarkStateInternal<BoundedWindow>) storage).get().read();
+          Instant hold = ((WatermarkHoldState<BoundedWindow>) storage).read();
           if (minimum == null || (hold != null && hold.isBefore(minimum))) {
             minimum = hold;
           }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternalsTest.java
index 00887857ed943..5bb0f597728bf 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternalsTest.java
@@ -60,14 +60,14 @@ public void testGetWithEmpty() {
     StateNamespace namespace = new StateNamespaceForTest("foo");
     StateTag<Object, BagState<String>> bagTag = StateTags.bag("foo", StringUtf8Coder.of());
     BagState<String> stringBag = internals.state(namespace, bagTag);
-    assertThat(stringBag.get().read(), emptyIterable());
+    assertThat(stringBag.read(), emptyIterable());
 
     stringBag.add("bar");
     stringBag.add("baz");
-    assertThat(stringBag.get().read(), containsInAnyOrder("baz", "bar"));
+    assertThat(stringBag.read(), containsInAnyOrder("baz", "bar"));
 
     BagState<String> reReadStringBag = internals.state(namespace, bagTag);
-    assertThat(reReadStringBag.get().read(), containsInAnyOrder("baz", "bar"));
+    assertThat(reReadStringBag.read(), containsInAnyOrder("baz", "bar"));
   }
 
   @Test
@@ -80,17 +80,17 @@ public void testGetWithAbsentInUnderlying() {
     StateNamespace namespace = new StateNamespaceForTest("foo");
     StateTag<Object, BagState<String>> bagTag = StateTags.bag("foo", StringUtf8Coder.of());
     BagState<String> stringBag = internals.state(namespace, bagTag);
-    assertThat(stringBag.get().read(), emptyIterable());
+    assertThat(stringBag.read(), emptyIterable());
 
     stringBag.add("bar");
     stringBag.add("baz");
-    assertThat(stringBag.get().read(), containsInAnyOrder("baz", "bar"));
+    assertThat(stringBag.read(), containsInAnyOrder("baz", "bar"));
 
     BagState<String> reReadVoidBag = internals.state(namespace, bagTag);
-    assertThat(reReadVoidBag.get().read(), containsInAnyOrder("baz", "bar"));
+    assertThat(reReadVoidBag.read(), containsInAnyOrder("baz", "bar"));
 
     BagState<String> underlyingState = underlying.state(namespace, bagTag);
-    assertThat(underlyingState.get().read(), emptyIterable());
+    assertThat(underlyingState.read(), emptyIterable());
   }
 
   /**
@@ -106,22 +106,22 @@ public void testGetWithPresentInUnderlying() {
     StateNamespace namespace = new StateNamespaceForTest("foo");
     StateTag<Object, ValueState<String>> valueTag = StateTags.value("foo", StringUtf8Coder.of());
     ValueState<String> underlyingValue = underlying.state(namespace, valueTag);
-    assertThat(underlyingValue.get().read(), nullValue(String.class));
+    assertThat(underlyingValue.read(), nullValue(String.class));
 
-    underlyingValue.set("bar");
-    assertThat(underlyingValue.get().read(), equalTo("bar"));
+    underlyingValue.write("bar");
+    assertThat(underlyingValue.read(), equalTo("bar"));
 
     CopyOnAccessInMemoryStateInternals<String> internals =
         CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
     ValueState<String> copyOnAccessState = internals.state(namespace, valueTag);
-    assertThat(copyOnAccessState.get().read(), equalTo("bar"));
+    assertThat(copyOnAccessState.read(), equalTo("bar"));
 
-    copyOnAccessState.set("baz");
-    assertThat(copyOnAccessState.get().read(), equalTo("baz"));
-    assertThat(underlyingValue.get().read(), equalTo("bar"));
+    copyOnAccessState.write("baz");
+    assertThat(copyOnAccessState.read(), equalTo("baz"));
+    assertThat(underlyingValue.read(), equalTo("bar"));
 
     ValueState<String> reReadUnderlyingValue = underlying.state(namespace, valueTag);
-    assertThat(underlyingValue.get().read(), equalTo(reReadUnderlyingValue.get().read()));
+    assertThat(underlyingValue.read(), equalTo(reReadUnderlyingValue.read()));
   }
 
   @Test
@@ -132,89 +132,89 @@ public void testBagStateWithUnderlying() {
     StateNamespace namespace = new StateNamespaceForTest("foo");
     StateTag<Object, BagState<Integer>> valueTag = StateTags.bag("foo", VarIntCoder.of());
     BagState<Integer> underlyingValue = underlying.state(namespace, valueTag);
-    assertThat(underlyingValue.get().read(), emptyIterable());
+    assertThat(underlyingValue.read(), emptyIterable());
 
     underlyingValue.add(1);
-    assertThat(underlyingValue.get().read(), containsInAnyOrder(1));
+    assertThat(underlyingValue.read(), containsInAnyOrder(1));
 
     CopyOnAccessInMemoryStateInternals<String> internals =
         CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
     BagState<Integer> copyOnAccessState = internals.state(namespace, valueTag);
-    assertThat(copyOnAccessState.get().read(), containsInAnyOrder(1));
+    assertThat(copyOnAccessState.read(), containsInAnyOrder(1));
 
     copyOnAccessState.add(4);
-    assertThat(copyOnAccessState.get().read(), containsInAnyOrder(4, 1));
-    assertThat(underlyingValue.get().read(), containsInAnyOrder(1));
+    assertThat(copyOnAccessState.read(), containsInAnyOrder(4, 1));
+    assertThat(underlyingValue.read(), containsInAnyOrder(1));
 
     BagState<Integer> reReadUnderlyingValue = underlying.state(namespace, valueTag);
-    assertThat(underlyingValue.get().read(), equalTo(reReadUnderlyingValue.get().read()));
+    assertThat(underlyingValue.read(), equalTo(reReadUnderlyingValue.read()));
   }
 
   @Test
-  public void testCombiningValueStateInternalWithUnderlying() throws CannotProvideCoderException {
+  public void testAccumulatorCombiningStateWithUnderlying() throws CannotProvideCoderException {
     CopyOnAccessInMemoryStateInternals<String> underlying =
         CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
     CombineFn<Long, long[], Long> sumLongFn = new Sum.SumLongFn();
 
     StateNamespace namespace = new StateNamespaceForTest("foo");
     CoderRegistry reg = TestPipeline.create().getCoderRegistry();
-    StateTag<Object, CombiningValueStateInternal<Long, long[], Long>> stateTag =
+    StateTag<Object, AccumulatorCombiningState<Long, long[], Long>> stateTag =
         StateTags.combiningValue("summer",
             sumLongFn.getAccumulatorCoder(reg, reg.getDefaultCoder(Long.class)), sumLongFn);
-    CombiningValueState<Long, Long> underlyingValue = underlying.state(namespace, stateTag);
-    assertThat(underlyingValue.get().read(), equalTo(0L));
+    CombiningState<Long, Long> underlyingValue = underlying.state(namespace, stateTag);
+    assertThat(underlyingValue.read(), equalTo(0L));
 
     underlyingValue.add(1L);
-    assertThat(underlyingValue.get().read(), equalTo(1L));
+    assertThat(underlyingValue.read(), equalTo(1L));
 
     CopyOnAccessInMemoryStateInternals<String> internals =
         CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
-    CombiningValueState<Long, Long> copyOnAccessState = internals.state(namespace, stateTag);
-    assertThat(copyOnAccessState.get().read(), equalTo(1L));
+    CombiningState<Long, Long> copyOnAccessState = internals.state(namespace, stateTag);
+    assertThat(copyOnAccessState.read(), equalTo(1L));
 
     copyOnAccessState.add(4L);
-    assertThat(copyOnAccessState.get().read(), equalTo(5L));
-    assertThat(underlyingValue.get().read(), equalTo(1L));
+    assertThat(copyOnAccessState.read(), equalTo(5L));
+    assertThat(underlyingValue.read(), equalTo(1L));
 
-    CombiningValueState<Long, Long> reReadUnderlyingValue = underlying.state(namespace, stateTag);
-    assertThat(underlyingValue.get().read(), equalTo(reReadUnderlyingValue.get().read()));
+    CombiningState<Long, Long> reReadUnderlyingValue = underlying.state(namespace, stateTag);
+    assertThat(underlyingValue.read(), equalTo(reReadUnderlyingValue.read()));
   }
 
   @Test
-  public void testKeyedCombiningValueStateInternalWithUnderlying() throws Exception {
+  public void testKeyedAccumulatorCombiningStateWithUnderlying() throws Exception {
     CopyOnAccessInMemoryStateInternals<String> underlying =
         CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
     KeyedCombineFn<String, Long, long[], Long> sumLongFn = new Sum.SumLongFn().asKeyedFn();
 
     StateNamespace namespace = new StateNamespaceForTest("foo");
     CoderRegistry reg = TestPipeline.create().getCoderRegistry();
-    StateTag<String, CombiningValueStateInternal<Long, long[], Long>> stateTag =
+    StateTag<String, AccumulatorCombiningState<Long, long[], Long>> stateTag =
         StateTags.keyedCombiningValue(
             "summer",
             sumLongFn.getAccumulatorCoder(
                 reg, StringUtf8Coder.of(), reg.getDefaultCoder(Long.class)),
             sumLongFn);
-    CombiningValueState<Long, Long> underlyingValue = underlying.state(namespace, stateTag);
-    assertThat(underlyingValue.get().read(), equalTo(0L));
+    CombiningState<Long, Long> underlyingValue = underlying.state(namespace, stateTag);
+    assertThat(underlyingValue.read(), equalTo(0L));
 
     underlyingValue.add(1L);
-    assertThat(underlyingValue.get().read(), equalTo(1L));
+    assertThat(underlyingValue.read(), equalTo(1L));
 
     CopyOnAccessInMemoryStateInternals<String> internals =
         CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
-    CombiningValueState<Long, Long> copyOnAccessState = internals.state(namespace, stateTag);
-    assertThat(copyOnAccessState.get().read(), equalTo(1L));
+    CombiningState<Long, Long> copyOnAccessState = internals.state(namespace, stateTag);
+    assertThat(copyOnAccessState.read(), equalTo(1L));
 
     copyOnAccessState.add(4L);
-    assertThat(copyOnAccessState.get().read(), equalTo(5L));
-    assertThat(underlyingValue.get().read(), equalTo(1L));
+    assertThat(copyOnAccessState.read(), equalTo(5L));
+    assertThat(underlyingValue.read(), equalTo(1L));
 
-    CombiningValueState<Long, Long> reReadUnderlyingValue = underlying.state(namespace, stateTag);
-    assertThat(underlyingValue.get().read(), equalTo(reReadUnderlyingValue.get().read()));
+    CombiningState<Long, Long> reReadUnderlyingValue = underlying.state(namespace, stateTag);
+    assertThat(underlyingValue.read(), equalTo(reReadUnderlyingValue.read()));
   }
 
   @Test
-  public void testWatermarkStateInternalWithUnderlying() {
+  public void testWatermarkHoldStateWithUnderlying() {
     CopyOnAccessInMemoryStateInternals<String> underlying =
         CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
 
@@ -223,29 +223,29 @@ public void testWatermarkStateInternalWithUnderlying() {
         TestPipeline.create().apply(Create.of("foo")).getWindowingStrategy().getOutputTimeFn();
 
     StateNamespace namespace = new StateNamespaceForTest("foo");
-    StateTag<Object, WatermarkStateInternal<BoundedWindow>> stateTag =
+    StateTag<Object, WatermarkHoldState<BoundedWindow>> stateTag =
         StateTags.watermarkStateInternal("wmstate", outputTimeFn);
-    WatermarkStateInternal<?> underlyingValue = underlying.state(namespace, stateTag);
-    assertThat(underlyingValue.get().read(), nullValue());
+    WatermarkHoldState<?> underlyingValue = underlying.state(namespace, stateTag);
+    assertThat(underlyingValue.read(), nullValue());
 
     underlyingValue.add(new Instant(250L));
-    assertThat(underlyingValue.get().read(), equalTo(new Instant(250L)));
+    assertThat(underlyingValue.read(), equalTo(new Instant(250L)));
 
     CopyOnAccessInMemoryStateInternals<String> internals =
         CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
-    WatermarkStateInternal<BoundedWindow> copyOnAccessState = internals.state(namespace, stateTag);
-    assertThat(copyOnAccessState.get().read(), equalTo(new Instant(250L)));
+    WatermarkHoldState<BoundedWindow> copyOnAccessState = internals.state(namespace, stateTag);
+    assertThat(copyOnAccessState.read(), equalTo(new Instant(250L)));
 
     copyOnAccessState.add(new Instant(100L));
-    assertThat(copyOnAccessState.get().read(), equalTo(new Instant(100L)));
-    assertThat(underlyingValue.get().read(), equalTo(new Instant(250L)));
+    assertThat(copyOnAccessState.read(), equalTo(new Instant(100L)));
+    assertThat(underlyingValue.read(), equalTo(new Instant(250L)));
 
     copyOnAccessState.add(new Instant(500L));
-    assertThat(copyOnAccessState.get().read(), equalTo(new Instant(100L)));
+    assertThat(copyOnAccessState.read(), equalTo(new Instant(100L)));
 
-    WatermarkStateInternal<BoundedWindow> reReadUnderlyingValue =
+    WatermarkHoldState<BoundedWindow> reReadUnderlyingValue =
         underlying.state(namespace, stateTag);
-    assertThat(underlyingValue.get().read(), equalTo(reReadUnderlyingValue.get().read()));
+    assertThat(underlyingValue.read(), equalTo(reReadUnderlyingValue.read()));
   }
 
   @Test
@@ -255,16 +255,16 @@ public void testCommitWithoutUnderlying() {
     StateNamespace namespace = new StateNamespaceForTest("foo");
     StateTag<Object, BagState<String>> bagTag = StateTags.bag("foo", StringUtf8Coder.of());
     BagState<String> stringBag = internals.state(namespace, bagTag);
-    assertThat(stringBag.get().read(), emptyIterable());
+    assertThat(stringBag.read(), emptyIterable());
 
     stringBag.add("bar");
     stringBag.add("baz");
-    assertThat(stringBag.get().read(), containsInAnyOrder("baz", "bar"));
+    assertThat(stringBag.read(), containsInAnyOrder("baz", "bar"));
 
     internals.commit();
 
     BagState<String> reReadStringBag = internals.state(namespace, bagTag);
-    assertThat(reReadStringBag.get().read(), containsInAnyOrder("baz", "bar"));
+    assertThat(reReadStringBag.read(), containsInAnyOrder("baz", "bar"));
     assertThat(internals.isEmpty(), is(false));
   }
 
@@ -278,19 +278,19 @@ public void testCommitWithUnderlying() {
     StateNamespace namespace = new StateNamespaceForTest("foo");
     StateTag<Object, BagState<String>> bagTag = StateTags.bag("foo", StringUtf8Coder.of());
     BagState<String> stringBag = underlying.state(namespace, bagTag);
-    assertThat(stringBag.get().read(), emptyIterable());
+    assertThat(stringBag.read(), emptyIterable());
 
     stringBag.add("bar");
     stringBag.add("baz");
 
     internals.commit();
     BagState<String> reReadStringBag = internals.state(namespace, bagTag);
-    assertThat(reReadStringBag.get().read(), containsInAnyOrder("baz", "bar"));
+    assertThat(reReadStringBag.read(), containsInAnyOrder("baz", "bar"));
 
     reReadStringBag.add("spam");
 
     BagState<String> underlyingState = underlying.state(namespace, bagTag);
-    assertThat(underlyingState.get().read(), containsInAnyOrder("spam", "bar", "baz"));
+    assertThat(underlyingState.read(), containsInAnyOrder("spam", "bar", "baz"));
     assertThat(underlyingState, is(theInstance(stringBag)));
     assertThat(internals.isEmpty(), is(false));
   }
@@ -307,7 +307,7 @@ public void testCommitWithClearedInUnderlying() {
     StateNamespace namespace = new StateNamespaceForTest("foo");
     StateTag<Object, BagState<String>> bagTag = StateTags.bag("foo", StringUtf8Coder.of());
     BagState<String> stringBag = underlying.state(namespace, bagTag);
-    assertThat(stringBag.get().read(), emptyIterable());
+    assertThat(stringBag.read(), emptyIterable());
 
     stringBag.add("bar");
     stringBag.add("baz");
@@ -320,7 +320,7 @@ public void testCommitWithClearedInUnderlying() {
 
     internals.commit();
     BagState<String> internalsStringBag = internals.state(namespace, bagTag);
-    assertThat(internalsStringBag.get().read(), emptyIterable());
+    assertThat(internalsStringBag.read(), emptyIterable());
     verify(secondUnderlying, never()).state(namespace, bagTag);
     assertThat(internals.isEmpty(), is(false));
   }
@@ -335,7 +335,7 @@ public void testCommitWithOverwrittenUnderlying() {
     StateNamespace namespace = new StateNamespaceForTest("foo");
     StateTag<Object, BagState<String>> bagTag = StateTags.bag("foo", StringUtf8Coder.of());
     BagState<String> stringBag = underlying.state(namespace, bagTag);
-    assertThat(stringBag.get().read(), emptyIterable());
+    assertThat(stringBag.read(), emptyIterable());
 
     stringBag.add("bar");
     stringBag.add("baz");
@@ -350,10 +350,10 @@ public void testCommitWithOverwrittenUnderlying() {
 
     BagState<String> reReadInternalState = internals.state(namespace, bagTag);
     assertThat(
-        reReadInternalState.get().read(),
+        reReadInternalState.read(),
         containsInAnyOrder("bar", "baz", "0x00ff00", "eggs", "&", "ham"));
     BagState<String> reReadUnderlyingState = underlying.state(namespace, bagTag);
-    assertThat(reReadUnderlyingState.get().read(), containsInAnyOrder("bar", "baz"));
+    assertThat(reReadUnderlyingState.read(), containsInAnyOrder("bar", "baz"));
   }
 
   @Test
@@ -368,16 +368,16 @@ public void testCommitWithAddedUnderlying() {
     StateNamespace namespace = new StateNamespaceForTest("foo");
     StateTag<Object, BagState<String>> bagTag = StateTags.bag("foo", StringUtf8Coder.of());
     BagState<String> stringBag = underlying.state(namespace, bagTag);
-    assertThat(stringBag.get().read(), emptyIterable());
+    assertThat(stringBag.read(), emptyIterable());
 
     stringBag.add("bar");
     stringBag.add("baz");
 
     BagState<String> internalState = internals.state(namespace, bagTag);
-    assertThat(internalState.get().read(), emptyIterable());
+    assertThat(internalState.read(), emptyIterable());
 
     BagState<String> reReadUnderlyingState = underlying.state(namespace, bagTag);
-    assertThat(reReadUnderlyingState.get().read(), containsInAnyOrder("bar", "baz"));
+    assertThat(reReadUnderlyingState.read(), containsInAnyOrder("bar", "baz"));
   }
 
   @Test
@@ -398,7 +398,7 @@ public void testCommitWithOnlyClearedValuesIsEmpty() {
     StateNamespace namespace = new StateNamespaceForTest("foo");
     StateTag<Object, BagState<String>> bagTag = StateTags.bag("foo", StringUtf8Coder.of());
     BagState<String> stringBag = internals.state(namespace, bagTag);
-    assertThat(stringBag.get().read(), emptyIterable());
+    assertThat(stringBag.read(), emptyIterable());
 
     stringBag.add("foo");
     stringBag.clear();
@@ -418,7 +418,7 @@ public void testCommitWithEmptyNewAndFullUnderlyingIsNotEmpty() {
     StateNamespace namespace = new StateNamespaceForTest("foo");
     StateTag<Object, BagState<String>> bagTag = StateTags.bag("foo", StringUtf8Coder.of());
     BagState<String> stringBag = underlying.state(namespace, bagTag);
-    assertThat(stringBag.get().read(), emptyIterable());
+    assertThat(stringBag.read(), emptyIterable());
 
     stringBag.add("bar");
     stringBag.add("baz");
@@ -444,15 +444,15 @@ public Instant maxTimestamp() {
     CopyOnAccessInMemoryStateInternals<String> internals =
         CopyOnAccessInMemoryStateInternals.withUnderlying("foo", null);
 
-    StateTag<Object, WatermarkStateInternal<BoundedWindow>> firstHoldAddress =
+    StateTag<Object, WatermarkHoldState<BoundedWindow>> firstHoldAddress =
         StateTags.watermarkStateInternal("foo", OutputTimeFns.outputAtEarliestInputTimestamp());
-    WatermarkStateInternal<BoundedWindow> firstHold =
+    WatermarkHoldState<BoundedWindow> firstHold =
         internals.state(StateNamespaces.window(null, first), firstHoldAddress);
     firstHold.add(new Instant(22L));
 
-    StateTag<Object, WatermarkStateInternal<BoundedWindow>> secondHoldAddress =
+    StateTag<Object, WatermarkHoldState<BoundedWindow>> secondHoldAddress =
         StateTags.watermarkStateInternal("foo", OutputTimeFns.outputAtEarliestInputTimestamp());
-    WatermarkStateInternal<BoundedWindow> secondHold =
+    WatermarkHoldState<BoundedWindow> secondHold =
         internals.state(StateNamespaces.window(null, second), secondHoldAddress);
     secondHold.add(new Instant(2L));
 
@@ -476,18 +476,18 @@ public Instant maxTimestamp() {
     };
     CopyOnAccessInMemoryStateInternals<String> underlying =
         CopyOnAccessInMemoryStateInternals.withUnderlying("foo", null);
-    StateTag<Object, WatermarkStateInternal<BoundedWindow>> firstHoldAddress =
+    StateTag<Object, WatermarkHoldState<BoundedWindow>> firstHoldAddress =
         StateTags.watermarkStateInternal("foo", OutputTimeFns.outputAtEarliestInputTimestamp());
-    WatermarkStateInternal<BoundedWindow> firstHold =
+    WatermarkHoldState<BoundedWindow> firstHold =
         underlying.state(StateNamespaces.window(null, first), firstHoldAddress);
     firstHold.add(new Instant(22L));
 
     CopyOnAccessInMemoryStateInternals<String> internals =
         CopyOnAccessInMemoryStateInternals.withUnderlying("foo", underlying.commit());
 
-    StateTag<Object, WatermarkStateInternal<BoundedWindow>> secondHoldAddress =
+    StateTag<Object, WatermarkHoldState<BoundedWindow>> secondHoldAddress =
         StateTags.watermarkStateInternal("foo", OutputTimeFns.outputAtEarliestInputTimestamp());
-    WatermarkStateInternal<BoundedWindow> secondHold =
+    WatermarkHoldState<BoundedWindow> secondHold =
         internals.state(StateNamespaces.window(null, second), secondHoldAddress);
     secondHold.add(new Instant(244L));
 
@@ -513,18 +513,18 @@ public Instant maxTimestamp() {
         };
     CopyOnAccessInMemoryStateInternals<String> underlying =
         CopyOnAccessInMemoryStateInternals.withUnderlying("foo", null);
-    StateTag<Object, WatermarkStateInternal<BoundedWindow>> firstHoldAddress =
+    StateTag<Object, WatermarkHoldState<BoundedWindow>> firstHoldAddress =
         StateTags.watermarkStateInternal("foo", OutputTimeFns.outputAtEarliestInputTimestamp());
-    WatermarkStateInternal<BoundedWindow> firstHold =
+    WatermarkHoldState<BoundedWindow> firstHold =
         underlying.state(StateNamespaces.window(null, first), firstHoldAddress);
     firstHold.add(new Instant(224L));
 
     CopyOnAccessInMemoryStateInternals<String> internals =
         CopyOnAccessInMemoryStateInternals.withUnderlying("foo", underlying.commit());
 
-    StateTag<Object, WatermarkStateInternal<BoundedWindow>> secondHoldAddress =
+    StateTag<Object, WatermarkHoldState<BoundedWindow>> secondHoldAddress =
         StateTags.watermarkStateInternal("foo", OutputTimeFns.outputAtEarliestInputTimestamp());
-    WatermarkStateInternal<BoundedWindow> secondHold =
+    WatermarkHoldState<BoundedWindow> secondHold =
         internals.state(StateNamespaces.window(null, second), secondHoldAddress);
     secondHold.add(new Instant(24L));
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
index 4df13ae9396fb..0c10560e4abd3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternalsTest.java
@@ -46,18 +46,18 @@ public class InMemoryStateInternalsTest {
 
   private static final StateTag<Object, ValueState<String>> STRING_VALUE_ADDR =
       StateTags.value("stringValue", StringUtf8Coder.of());
-  private static final StateTag<Object, CombiningValueStateInternal<Integer, int[], Integer>>
+  private static final StateTag<Object, AccumulatorCombiningState<Integer, int[], Integer>>
       SUM_INTEGER_ADDR = StateTags.combiningValueFromInputInternal(
           "sumInteger", VarIntCoder.of(), new Sum.SumIntegerFn());
   private static final StateTag<Object, BagState<String>> STRING_BAG_ADDR =
       StateTags.bag("stringBag", StringUtf8Coder.of());
-  private static final StateTag<Object, WatermarkStateInternal<BoundedWindow>>
+  private static final StateTag<Object, WatermarkHoldState<BoundedWindow>>
       WATERMARK_EARLIEST_ADDR =
       StateTags.watermarkStateInternal("watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
-  private static final StateTag<Object, WatermarkStateInternal<BoundedWindow>>
+  private static final StateTag<Object, WatermarkHoldState<BoundedWindow>>
       WATERMARK_LATEST_ADDR =
       StateTags.watermarkStateInternal("watermark", OutputTimeFns.outputAtLatestInputTimestamp());
-  private static final StateTag<Object, WatermarkStateInternal<BoundedWindow>> WATERMARK_EOW_ADDR =
+  private static final StateTag<Object, WatermarkHoldState<BoundedWindow>> WATERMARK_EOW_ADDR =
       StateTags.watermarkStateInternal("watermark", OutputTimeFns.outputAtEndOfWindow());
 
   InMemoryStateInternals<String> underTest = InMemoryStateInternals.forKey("dummyKey");
@@ -72,16 +72,14 @@ public void testValue() throws Exception {
         underTest.state(NAMESPACE_2, STRING_VALUE_ADDR),
         Matchers.not(Matchers.sameInstance(value)));
 
-    assertThat(value.get().read(), Matchers.nullValue());
-    StateContents<String> readFuture = value.get();
-    value.set("hello");
-    assertThat(readFuture.read(), Matchers.equalTo("hello"));
-    assertThat(value.get().read(), Matchers.equalTo("hello"));
-    value.set("world");
-    assertThat(readFuture.read(), Matchers.equalTo("world"));
+    assertThat(value.read(), Matchers.nullValue());
+    value.write("hello");
+    assertThat(value.read(), Matchers.equalTo("hello"));
+    value.write("world");
+    assertThat(value.read(), Matchers.equalTo("world"));
 
     value.clear();
-    assertThat(value.get().read(), Matchers.nullValue());
+    assertThat(value.read(), Matchers.nullValue());
     assertThat(underTest.state(NAMESPACE_1, STRING_VALUE_ADDR), Matchers.sameInstance(value));
   }
 
@@ -93,17 +91,15 @@ public void testBag() throws Exception {
     assertEquals(value, underTest.state(NAMESPACE_1, STRING_BAG_ADDR));
     assertFalse(value.equals(underTest.state(NAMESPACE_2, STRING_BAG_ADDR)));
 
-    assertThat(value.get().read(), Matchers.emptyIterable());
-    StateContents<Iterable<String>> readFuture = value.get();
+    assertThat(value.read(), Matchers.emptyIterable());
     value.add("hello");
-    assertThat(readFuture.read(), Matchers.containsInAnyOrder("hello"));
-    assertThat(value.get().read(), Matchers.containsInAnyOrder("hello"));
+    assertThat(value.read(), Matchers.containsInAnyOrder("hello"));
 
     value.add("world");
-    assertThat(value.get().read(), Matchers.containsInAnyOrder("hello", "world"));
+    assertThat(value.read(), Matchers.containsInAnyOrder("hello", "world"));
 
     value.clear();
-    assertThat(value.get().read(), Matchers.emptyIterable());
+    assertThat(value.read(), Matchers.emptyIterable());
     assertThat(underTest.state(NAMESPACE_1, STRING_BAG_ADDR), Matchers.sameInstance(value));
   }
 
@@ -112,7 +108,7 @@ public void testBagIsEmpty() throws Exception {
     BagState<String> value = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
 
     assertThat(value.isEmpty().read(), Matchers.is(true));
-    StateContents<Boolean> readFuture = value.isEmpty();
+    ReadableState<Boolean> readFuture = value.isEmpty();
     value.add("hello");
     assertThat(readFuture.read(), Matchers.is(false));
 
@@ -132,8 +128,8 @@ public void testMergeBagIntoSource() throws Exception {
     StateMerging.mergeBags(Arrays.asList(bag1, bag2), bag1);
 
     // Reading the merged bag gets both the contents
-    assertThat(bag1.get().read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
-    assertThat(bag2.get().read(), Matchers.emptyIterable());
+    assertThat(bag1.read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
+    assertThat(bag2.read(), Matchers.emptyIterable());
   }
 
   @Test
@@ -149,39 +145,37 @@ public void testMergeBagIntoNewNamespace() throws Exception {
     StateMerging.mergeBags(Arrays.asList(bag1, bag2, bag3), bag3);
 
     // Reading the merged bag gets both the contents
-    assertThat(bag3.get().read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
-    assertThat(bag1.get().read(), Matchers.emptyIterable());
-    assertThat(bag2.get().read(), Matchers.emptyIterable());
+    assertThat(bag3.read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
+    assertThat(bag1.read(), Matchers.emptyIterable());
+    assertThat(bag2.read(), Matchers.emptyIterable());
   }
 
   @Test
   public void testCombiningValue() throws Exception {
-    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
+    CombiningState<Integer, Integer> value = underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
 
     // State instances are cached, but depend on the namespace.
     assertEquals(value, underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR));
     assertFalse(value.equals(underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR)));
 
-    assertThat(value.get().read(), Matchers.equalTo(0));
-    StateContents<Integer> readFuture = value.get();
+    assertThat(value.read(), Matchers.equalTo(0));
     value.add(2);
-    assertThat(readFuture.read(), Matchers.equalTo(2));
-    assertThat(value.get().read(), Matchers.equalTo(2));
+    assertThat(value.read(), Matchers.equalTo(2));
 
     value.add(3);
-    assertThat(readFuture.read(), Matchers.equalTo(5));
+    assertThat(value.read(), Matchers.equalTo(5));
 
     value.clear();
-    assertThat(readFuture.read(), Matchers.equalTo(0));
+    assertThat(value.read(), Matchers.equalTo(0));
     assertThat(underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR), Matchers.sameInstance(value));
   }
 
   @Test
   public void testCombiningIsEmpty() throws Exception {
-    CombiningValueState<Integer, Integer> value = underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
+    CombiningState<Integer, Integer> value = underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
 
     assertThat(value.isEmpty().read(), Matchers.is(true));
-    StateContents<Boolean> readFuture = value.isEmpty();
+    ReadableState<Boolean> readFuture = value.isEmpty();
     value.add(5);
     assertThat(readFuture.read(), Matchers.is(false));
 
@@ -191,32 +185,32 @@ public void testCombiningIsEmpty() throws Exception {
 
   @Test
   public void testMergeCombiningValueIntoSource() throws Exception {
-    CombiningValueStateInternal<Integer, int[], Integer> value1 =
+    AccumulatorCombiningState<Integer, int[], Integer> value1 =
         underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
-    CombiningValueStateInternal<Integer, int[], Integer> value2 =
+    AccumulatorCombiningState<Integer, int[], Integer> value2 =
         underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR);
 
     value1.add(5);
     value2.add(10);
     value1.add(6);
 
-    assertThat(value1.get().read(), Matchers.equalTo(11));
-    assertThat(value2.get().read(), Matchers.equalTo(10));
+    assertThat(value1.read(), Matchers.equalTo(11));
+    assertThat(value2.read(), Matchers.equalTo(10));
 
     // Merging clears the old values and updates the result value.
     StateMerging.mergeCombiningValues(Arrays.asList(value1, value2), value1);
 
-    assertThat(value1.get().read(), Matchers.equalTo(21));
-    assertThat(value2.get().read(), Matchers.equalTo(0));
+    assertThat(value1.read(), Matchers.equalTo(21));
+    assertThat(value2.read(), Matchers.equalTo(0));
   }
 
   @Test
   public void testMergeCombiningValueIntoNewNamespace() throws Exception {
-    CombiningValueStateInternal<Integer, int[], Integer> value1 =
+    AccumulatorCombiningState<Integer, int[], Integer> value1 =
         underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
-    CombiningValueStateInternal<Integer, int[], Integer> value2 =
+    AccumulatorCombiningState<Integer, int[], Integer> value2 =
         underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR);
-    CombiningValueStateInternal<Integer, int[], Integer> value3 =
+    AccumulatorCombiningState<Integer, int[], Integer> value3 =
         underTest.state(NAMESPACE_3, SUM_INTEGER_ADDR);
 
     value1.add(5);
@@ -226,93 +220,83 @@ public void testMergeCombiningValueIntoNewNamespace() throws Exception {
     StateMerging.mergeCombiningValues(Arrays.asList(value1, value2), value3);
 
     // Merging clears the old values and updates the result value.
-    assertThat(value1.get().read(), Matchers.equalTo(0));
-    assertThat(value2.get().read(), Matchers.equalTo(0));
-    assertThat(value3.get().read(), Matchers.equalTo(21));
+    assertThat(value1.read(), Matchers.equalTo(0));
+    assertThat(value2.read(), Matchers.equalTo(0));
+    assertThat(value3.read(), Matchers.equalTo(21));
   }
 
   @Test
   public void testWatermarkEarliestState() throws Exception {
-    WatermarkStateInternal<BoundedWindow> value =
+    WatermarkHoldState<BoundedWindow> value =
         underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
 
     // State instances are cached, but depend on the namespace.
     assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR));
     assertFalse(value.equals(underTest.state(NAMESPACE_2, WATERMARK_EARLIEST_ADDR)));
 
-    assertThat(value.get().read(), Matchers.nullValue());
-    StateContents<Instant> readFuture = value.get();
+    assertThat(value.read(), Matchers.nullValue());
     value.add(new Instant(2000));
-    assertThat(readFuture.read(), Matchers.equalTo(new Instant(2000)));
-    assertThat(value.get().read(), Matchers.equalTo(new Instant(2000)));
+    assertThat(value.read(), Matchers.equalTo(new Instant(2000)));
 
     value.add(new Instant(3000));
-    assertThat(readFuture.read(), Matchers.equalTo(new Instant(2000)));
-    assertThat(value.get().read(), Matchers.equalTo(new Instant(2000)));
+    assertThat(value.read(), Matchers.equalTo(new Instant(2000)));
 
     value.add(new Instant(1000));
-    assertThat(readFuture.read(), Matchers.equalTo(new Instant(1000)));
-    assertThat(value.get().read(), Matchers.equalTo(new Instant(1000)));
+    assertThat(value.read(), Matchers.equalTo(new Instant(1000)));
 
     value.clear();
-    assertThat(readFuture.read(), Matchers.equalTo(null));
+    assertThat(value.read(), Matchers.equalTo(null));
     assertThat(underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR), Matchers.sameInstance(value));
   }
 
   @Test
   public void testWatermarkLatestState() throws Exception {
-    WatermarkStateInternal<BoundedWindow> value =
+    WatermarkHoldState<BoundedWindow> value =
         underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR);
 
     // State instances are cached, but depend on the namespace.
     assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR));
     assertFalse(value.equals(underTest.state(NAMESPACE_2, WATERMARK_LATEST_ADDR)));
 
-    assertThat(value.get().read(), Matchers.nullValue());
-    StateContents<Instant> readFuture = value.get();
+    assertThat(value.read(), Matchers.nullValue());
     value.add(new Instant(2000));
-    assertThat(readFuture.read(), Matchers.equalTo(new Instant(2000)));
-    assertThat(value.get().read(), Matchers.equalTo(new Instant(2000)));
+    assertThat(value.read(), Matchers.equalTo(new Instant(2000)));
 
     value.add(new Instant(3000));
-    assertThat(readFuture.read(), Matchers.equalTo(new Instant(3000)));
-    assertThat(value.get().read(), Matchers.equalTo(new Instant(3000)));
+    assertThat(value.read(), Matchers.equalTo(new Instant(3000)));
 
     value.add(new Instant(1000));
-    assertThat(readFuture.read(), Matchers.equalTo(new Instant(3000)));
-    assertThat(value.get().read(), Matchers.equalTo(new Instant(3000)));
+    assertThat(value.read(), Matchers.equalTo(new Instant(3000)));
 
     value.clear();
-    assertThat(readFuture.read(), Matchers.equalTo(null));
+    assertThat(value.read(), Matchers.equalTo(null));
     assertThat(underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR), Matchers.sameInstance(value));
   }
 
   @Test
   public void testWatermarkEndOfWindowState() throws Exception {
-    WatermarkStateInternal<BoundedWindow> value = underTest.state(NAMESPACE_1, WATERMARK_EOW_ADDR);
+    WatermarkHoldState<BoundedWindow> value = underTest.state(NAMESPACE_1, WATERMARK_EOW_ADDR);
 
     // State instances are cached, but depend on the namespace.
     assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_EOW_ADDR));
     assertFalse(value.equals(underTest.state(NAMESPACE_2, WATERMARK_EOW_ADDR)));
 
-    assertThat(value.get().read(), Matchers.nullValue());
-    StateContents<Instant> readFuture = value.get();
+    assertThat(value.read(), Matchers.nullValue());
     value.add(new Instant(2000));
-    assertThat(readFuture.read(), Matchers.equalTo(new Instant(2000)));
-    assertThat(value.get().read(), Matchers.equalTo(new Instant(2000)));
+    assertThat(value.read(), Matchers.equalTo(new Instant(2000)));
 
     value.clear();
-    assertThat(readFuture.read(), Matchers.equalTo(null));
+    assertThat(value.read(), Matchers.equalTo(null));
     assertThat(underTest.state(NAMESPACE_1, WATERMARK_EOW_ADDR), Matchers.sameInstance(value));
   }
 
   @Test
   public void testWatermarkStateIsEmpty() throws Exception {
-    WatermarkStateInternal<BoundedWindow> value =
+    WatermarkHoldState<BoundedWindow> value =
         underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
 
     assertThat(value.isEmpty().read(), Matchers.is(true));
-    StateContents<Boolean> readFuture = value.isEmpty();
+    ReadableState<Boolean> readFuture = value.isEmpty();
     value.add(new Instant(1000));
     assertThat(readFuture.read(), Matchers.is(false));
 
@@ -322,9 +306,9 @@ public void testWatermarkStateIsEmpty() throws Exception {
 
   @Test
   public void testMergeEarliestWatermarkIntoSource() throws Exception {
-    WatermarkStateInternal<BoundedWindow> value1 =
+    WatermarkHoldState<BoundedWindow> value1 =
         underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
-    WatermarkStateInternal<BoundedWindow> value2 =
+    WatermarkHoldState<BoundedWindow> value2 =
         underTest.state(NAMESPACE_2, WATERMARK_EARLIEST_ADDR);
 
     value1.add(new Instant(3000));
@@ -335,17 +319,17 @@ public void testMergeEarliestWatermarkIntoSource() throws Exception {
     // Merging clears the old values and updates the merged value.
     StateMerging.mergeWatermarks(Arrays.asList(value1, value2), value1, WINDOW_1);
 
-    assertThat(value1.get().read(), Matchers.equalTo(new Instant(2000)));
-    assertThat(value2.get().read(), Matchers.equalTo(null));
+    assertThat(value1.read(), Matchers.equalTo(new Instant(2000)));
+    assertThat(value2.read(), Matchers.equalTo(null));
   }
 
   @Test
   public void testMergeLatestWatermarkIntoSource() throws Exception {
-    WatermarkStateInternal<BoundedWindow> value1 =
+    WatermarkHoldState<BoundedWindow> value1 =
         underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR);
-    WatermarkStateInternal<BoundedWindow> value2 =
+    WatermarkHoldState<BoundedWindow> value2 =
         underTest.state(NAMESPACE_2, WATERMARK_LATEST_ADDR);
-    WatermarkStateInternal<BoundedWindow> value3 =
+    WatermarkHoldState<BoundedWindow> value3 =
         underTest.state(NAMESPACE_3, WATERMARK_LATEST_ADDR);
 
     value1.add(new Instant(3000));
@@ -357,8 +341,8 @@ public void testMergeLatestWatermarkIntoSource() throws Exception {
     StateMerging.mergeWatermarks(Arrays.asList(value1, value2), value3, WINDOW_1);
 
     // Merging clears the old values and updates the result value.
-    assertThat(value3.get().read(), Matchers.equalTo(new Instant(5000)));
-    assertThat(value1.get().read(), Matchers.equalTo(null));
-    assertThat(value2.get().read(), Matchers.equalTo(null));
+    assertThat(value3.read(), Matchers.equalTo(new Instant(5000)));
+    assertThat(value1.read(), Matchers.equalTo(null));
+    assertThat(value2.read(), Matchers.equalTo(null));
   }
 }

From 24288e7ba5be86d927cffaccbd4c768797f4361d Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 22 Feb 2016 20:25:49 -0800
Subject: [PATCH 1504/1541] Move some worker-and-example-only dependencies out
 of sdk

Following the separation of the worker module, running

    $ mvn dependency:analyze -pl sdk

yields

    [WARNING] Used undeclared dependencies found:
    [WARNING]    com.google.code.findbugs:jsr305:jar:3.0.1:compile
    [WARNING]    javax.xml.stream:stax-api:jar:1.0-2:compile
    [WARNING]    com.google.http-client:google-http-client:jar:1.21.0:compile
    [WARNING] Unused declared dependencies found:
    [WARNING]    org.eclipse.jetty:jetty-servlet:jar:9.2.10.v20150310:compile
    [WARNING]    org.codehaus.woodstox:woodstox-core-asl:jar:4.1.2:compile
    [WARNING]    org.eclipse.jetty:jetty-server:jar:9.2.10.v20150310:compile
    [WARNING]    javax.servlet:javax.servlet-api:jar:3.1.0:compile
    [WARNING]    org.tukaani:xz:jar:1.5:compile

This change removes unused dependencies. The `xz` and `woodstox` dependencies
are for optional features of AvroSource and XmlSource.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115300250
---
 examples/pom.xml                               |  6 ++++++
 .../main/resources/archetype-resources/pom.xml |  6 ++++++
 sdk/pom.xml                                    | 18 ------------------
 worker/pom.xml                                 | 18 ++++++++++++++++++
 4 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index adea4b129836e..3900a23ba2e44 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -484,6 +484,12 @@
       <scope>runtime</scope>
     </dependency>
 
+    <dependency>
+      <groupId>javax.servlet</groupId>
+      <artifactId>javax.servlet-api</artifactId>
+      <version>3.1.0</version>
+    </dependency>
+
     <!-- Hamcrest and JUnit are required dependencies of DataflowAssert,
          which is used in the main code of DebuggingWordCount example. -->
 
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
index 85310878d7994..bffa376f56669 100644
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
+++ b/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
@@ -166,6 +166,12 @@
       <version>18.0</version>
     </dependency>
 
+     <dependency>
+      <groupId>javax.servlet</groupId>
+      <artifactId>javax.servlet-api</artifactId>
+      <version>3.1.0</version>
+    </dependency>
+
     <!-- Add slf4j API frontend binding with JUL backend -->
     <dependency>
       <groupId>org.slf4j</groupId>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index 1d7458052cd69..e3744fd435a62 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -640,24 +640,6 @@
       <version>${joda.version}</version>
     </dependency>
 
-    <dependency>
-      <groupId>org.eclipse.jetty</groupId>
-      <artifactId>jetty-server</artifactId>
-      <version>9.2.10.v20150310</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.eclipse.jetty</groupId>
-      <artifactId>jetty-servlet</artifactId>
-      <version>9.2.10.v20150310</version>
-    </dependency>
-
-    <dependency>
-      <groupId>javax.servlet</groupId>
-      <artifactId>javax.servlet-api</artifactId>
-      <version>3.1.0</version>
-    </dependency>
-
     <!--
     To use com.google.cloud.dataflow.io.XmlSource, please explicitly declare
     the following two dependencies.
diff --git a/worker/pom.xml b/worker/pom.xml
index 5c5dca1a2dfa4..e621ed4715618 100644
--- a/worker/pom.xml
+++ b/worker/pom.xml
@@ -245,6 +245,24 @@
       <scope>compile</scope>
     </dependency>
 
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-server</artifactId>
+      <version>9.2.10.v20150310</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-servlet</artifactId>
+      <version>9.2.10.v20150310</version>
+    </dependency>
+
+    <dependency>
+      <groupId>javax.servlet</groupId>
+      <artifactId>javax.servlet-api</artifactId>
+      <version>3.1.0</version>
+    </dependency>
+
     <!-- test dependencies -->
     <dependency>
       <groupId>org.hamcrest</groupId>

From c0a814b89bcbcb3541591f7eccd7db5ada2b0a17 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 22 Feb 2016 21:01:59 -0800
Subject: [PATCH 1505/1541] Change visibility of FileBasedSource subclass
 methods and fix return types for existing subclasses.

Limit createForSubrangeOfFile and createSingleFileReader to
protected for FileBasedSource and all of its subclasses.
Also change the return type for subclasses to return
FileBasedSource and FileBasedReader instead of the specific
types.

Note that these methods were meant to be used only by the
FileBasedSource/BlockBasedSource and not called by users
but if they did, this change would be backwards incompatible
because of the return type and visibility changes.

This is for Apache Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115301831
---
 .../java/com/google/cloud/dataflow/sdk/io/AvroSource.java    | 4 ++--
 .../com/google/cloud/dataflow/sdk/io/BlockBasedSource.java   | 4 ++--
 .../java/com/google/cloud/dataflow/sdk/io/XmlSource.java     | 4 ++--
 .../google/cloud/dataflow/sdk/io/FileBasedSourceTest.java    | 5 +++--
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
index da8458c2f78ba..297663e96fddb 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
@@ -242,7 +242,7 @@ public void validate() {
   }
 
   @Override
-  public AvroSource<T> createForSubrangeOfFile(String fileName, long start, long end) {
+  public BlockBasedSource<T> createForSubrangeOfFile(String fileName, long start, long end) {
     byte[] syncMarker = this.syncMarker;
     String codec = this.codec;
     String readSchemaString = this.readSchemaString;
@@ -274,7 +274,7 @@ public AvroSource<T> createForSubrangeOfFile(String fileName, long start, long e
   }
 
   @Override
-  public AvroReader<T> createSingleFileReader(PipelineOptions options) {
+  protected BlockBasedReader<T> createSingleFileReader(PipelineOptions options) {
     return new AvroReader<T>(this);
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
index 9ef4cdcf30c15..f4a9c7db0710b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
@@ -81,14 +81,14 @@ public BlockBasedSource(String fileName, long minBundleSize, long startOffset, l
    * Creates a {@code BlockBasedSource} for the specified range in a single file.
    */
   @Override
-  public abstract BlockBasedSource<T> createForSubrangeOfFile(
+  protected abstract BlockBasedSource<T> createForSubrangeOfFile(
       String fileName, long start, long end);
 
   /**
    * Creates a {@code BlockBasedReader}.
    */
   @Override
-  public abstract BlockBasedReader<T> createSingleFileReader(PipelineOptions options);
+  protected abstract BlockBasedReader<T> createSingleFileReader(PipelineOptions options);
 
   /**
    * A {@code Block} represents a block of records that can be read.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
index 668361b7d953d..d684d2216cd82 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
@@ -183,13 +183,13 @@ private XmlSource(String fileOrPattern, long minBundleSize, long startOffset, lo
   }
 
   @Override
-  public FileBasedSource<T> createForSubrangeOfFile(String fileName, long start, long end) {
+  protected FileBasedSource<T> createForSubrangeOfFile(String fileName, long start, long end) {
     return new XmlSource<T>(
         fileName, getMinBundleSize(), start, end, rootElement, recordElement, recordClass);
   }
 
   @Override
-  public FileBasedReader<T> createSingleFileReader(PipelineOptions options) {
+  protected FileBasedReader<T> createSingleFileReader(PipelineOptions options) {
     return new XMLReader<T>(this);
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
index d50c6a63b9a17..7cf4398393d9d 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSourceTest.java
@@ -112,12 +112,13 @@ public Coder<String> getDefaultOutputCoder() {
     }
 
     @Override
-    public FileBasedSource<String> createForSubrangeOfFile(String fileName, long start, long end) {
+    protected FileBasedSource<String> createForSubrangeOfFile(
+        String fileName, long start, long end) {
       return new TestFileBasedSource(fileName, getMinBundleSize(), start, end, splitHeader);
     }
 
     @Override
-    public FileBasedReader<String> createSingleFileReader(PipelineOptions options) {
+    protected FileBasedReader<String> createSingleFileReader(PipelineOptions options) {
       if (splitHeader == null) {
         return new TestReader(this);
       } else {

From 13a042aed7d01a126a1ff7ecb66e723474191fe0 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 22 Feb 2016 21:17:40 -0800
Subject: [PATCH 1506/1541] Make TestPipeline slightly less
 DataflowPipelineRunner-centric

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115302769
---
 .../dataflow/sdk/testing/TestPipeline.java    | 69 +++++++++++--------
 .../sdk/testing/TestPipelineTest.java         | 18 +++--
 2 files changed, 53 insertions(+), 34 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
index 05b5bad135b5c..a05a7785d9e1d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
@@ -19,22 +19,23 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.PipelineResult;
 import com.google.cloud.dataflow.sdk.options.ApplicationNameOptions;
+import com.google.cloud.dataflow.sdk.options.GcpOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
+import com.google.cloud.dataflow.sdk.util.TestCredential;
 import com.google.common.base.Optional;
 import com.google.common.collect.Iterators;
 
 import com.fasterxml.jackson.databind.ObjectMapper;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import java.io.IOException;
 import java.util.Iterator;
 
+import javax.annotation.Nullable;
+
 /**
  * A creator of test pipelines that can be used inside of tests that can be
  * configured to run locally or against the live service.
@@ -67,7 +68,6 @@
  */
 public class TestPipeline extends Pipeline {
   private static final String PROPERTY_DATAFLOW_OPTIONS = "dataflowOptions";
-  private static final Logger LOG = LoggerFactory.getLogger(TestPipeline.class);
   private static final ObjectMapper MAPPER = new ObjectMapper();
 
   /**
@@ -77,28 +77,22 @@ public class TestPipeline extends Pipeline {
    * {@link Pipeline#run} to execute the pipeline and check the tests.
    */
   public static TestPipeline create() {
-    if (isIntegrationTest()) {
-      TestDataflowPipelineOptions options = getPipelineOptions();
-      LOG.info("Using passed in options: " + options);
-      options.setStableUniqueNames(CheckEnabled.ERROR);
-      return new TestPipeline(TestDataflowPipelineRunner.fromOptions(options), options);
-    } else {
-      DirectPipelineRunner directRunner = DirectPipelineRunner.createForTest();
-      directRunner.getPipelineOptions().setAppName(getAppName());
-      directRunner.getPipelineOptions().setStableUniqueNames(CheckEnabled.ERROR);
-      return new TestPipeline(directRunner, directRunner.getPipelineOptions());
-    }
+    return fromOptions(testingPipelineOptions());
+  }
+
+  public static TestPipeline fromOptions(PipelineOptions options) {
+    return new TestPipeline(PipelineRunner.fromOptions(options), options);
   }
 
   /**
-   * Returns whether this test is running on the Cloud Dataflow service as described
-   * in {@link TestPipeline}.
+   * Returns whether a {@link TestPipeline} supports dynamic work rebalancing, and thus tests
+   * of dynamic work rebalancing are expected to pass.
    */
-  public static boolean isIntegrationTest() {
-    return Boolean.parseBoolean(System.getProperty("runIntegrationTestOnService"));
+  public boolean supportsDynamicWorkRebalancing() {
+    return getRunner() instanceof DataflowPipelineRunner;
   }
 
-  TestPipeline(PipelineRunner<? extends PipelineResult> runner, PipelineOptions options) {
+  private TestPipeline(PipelineRunner<? extends PipelineResult> runner, PipelineOptions options) {
     super(runner, options);
   }
 
@@ -126,14 +120,28 @@ public String toString() {
   }
 
   /**
-   * Creates PipelineOptions for testing with a DataflowPipelineRunner.
+   * Creates {@link PipelineOptions} for testing.
    */
-  public static TestDataflowPipelineOptions getPipelineOptions() {
+  public static PipelineOptions testingPipelineOptions() {
     try {
-      TestDataflowPipelineOptions options = PipelineOptionsFactory.fromArgs(
-              MAPPER.readValue(System.getProperty(PROPERTY_DATAFLOW_OPTIONS), String[].class))
-          .as(TestDataflowPipelineOptions.class);
-      options.setAppName(getAppName());
+      @Nullable String systemDataflowOptions = System.getProperty(PROPERTY_DATAFLOW_OPTIONS);
+      PipelineOptions options =
+          systemDataflowOptions == null
+              ? PipelineOptionsFactory.create()
+              : PipelineOptionsFactory.fromArgs(
+                      MAPPER.readValue(
+                          System.getProperty(PROPERTY_DATAFLOW_OPTIONS), String[].class))
+                  .as(PipelineOptions.class);
+
+      options.as(ApplicationNameOptions.class).setAppName(getAppName());
+      if (isIntegrationTest()) {
+        // TODO: adjust everyone's integration test frameworks to set the runner class via the
+        // pipeline options via PROPERTY_DATAFLOW_OPTIONS
+        options.setRunner(TestDataflowPipelineRunner.class);
+      } else {
+        options.as(GcpOptions.class).setGcpCredential(new TestCredential());
+      }
+      options.setStableUniqueNames(CheckEnabled.ERROR);
       return options;
     } catch (IOException e) {
       throw new RuntimeException("Unable to instantiate test options from system property "
@@ -141,6 +149,13 @@ public static TestDataflowPipelineOptions getPipelineOptions() {
     }
   }
 
+  /**
+   * Returns whether a {@link TestPipeline} should be treated as an integration test.
+   */
+  private static boolean isIntegrationTest() {
+    return Boolean.parseBoolean(System.getProperty("runIntegrationTestOnService"));
+  }
+
   /** Returns the class + method name of the test, or a default name. */
   private static String getAppName() {
     Optional<StackTraceElement> stackTraceElement = findCallersStackTrace();
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
index d74ba6a27b4fb..397920a1dbb73 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/testing/TestPipelineTest.java
@@ -21,6 +21,10 @@
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertThat;
 
+import com.google.cloud.dataflow.sdk.options.ApplicationNameOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.GcpOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 
 import com.fasterxml.jackson.databind.ObjectMapper;
@@ -57,15 +61,17 @@ public void testCreationOfPipelineOptions() throws Exception {
       "--diskSizeGb=2"
     });
     System.getProperties().put("dataflowOptions", stringOptions);
-    TestDataflowPipelineOptions options = TestPipeline.getPipelineOptions();
+    DataflowPipelineOptions options =
+        TestPipeline.testingPipelineOptions().as(DataflowPipelineOptions.class);
     assertEquals(DataflowPipelineRunner.class, options.getRunner());
     assertThat(options.getJobName(), startsWith("testpipelinetest0testcreationofpipelineoptions-"));
-    assertEquals("testProject", options.getProject());
+    assertEquals("testProject", options.as(GcpOptions.class).getProject());
     assertEquals("testApiRootUrl", options.getApiRootUrl());
     assertEquals("testDataflowEndpoint", options.getDataflowEndpoint());
     assertEquals("testTempLocation", options.getTempLocation());
     assertEquals("testServiceAccountName", options.getServiceAccountName());
-    assertEquals("testServiceAccountKeyfile", options.getServiceAccountKeyfile());
+    assertEquals(
+        "testServiceAccountKeyfile", options.as(GcpOptions.class).getServiceAccountKeyfile());
     assertEquals("testZone", options.getZone());
     assertEquals(2, options.getDiskSizeGb());
   }
@@ -75,11 +81,9 @@ public void testCreationOfPipelineOptionsFromReallyVerboselyNamedTestCase() thro
         ObjectMapper mapper = new ObjectMapper();
     String stringOptions = mapper.writeValueAsString(new String[]{});
     System.getProperties().put("dataflowOptions", stringOptions);
-    TestDataflowPipelineOptions options = TestPipeline.getPipelineOptions();
-    assertThat(options.getAppName(), startsWith(
+    PipelineOptions options = TestPipeline.testingPipelineOptions();
+    assertThat(options.as(ApplicationNameOptions.class).getAppName(), startsWith(
         "TestPipelineTest-testCreationOfPipelineOptionsFromReallyVerboselyNamedTestCase"));
-    assertThat(options.getJobName(), startsWith(
-        "testpipelinetest0testcreationofpipelineoptionsfrom"));
   }
 
   @Test

From 6b372ecb3dee573569dfbce415f45265eb3f6f67 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Mon, 22 Feb 2016 21:42:17 -0800
Subject: [PATCH 1507/1541] Add used-but-undeclared dependency on
 google-http-client

Prior to this change, running

    $ mvn dependency:analyze -pl sdk

yields

    [WARNING] Used undeclared dependencies found:
    [WARNING]    com.google.code.findbugs:jsr305:jar:3.0.1:compile
    [WARNING]    javax.xml.stream:stax-api:jar:1.0-2:compile
    [WARNING]    com.google.http-client:google-http-client:jar:1.21.0:compile
    [WARNING] Unused declared dependencies found:
    [WARNING]    org.codehaus.woodstox:woodstox-core-asl:jar:4.1.2:compile
    [WARNING]    org.tukaani:xz:jar:1.5:compile

This is accurate. Many classes depend on com.google.api.client.util from
the google-http-client package.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115303825
---
 sdk/pom.xml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index e3744fd435a62..bdbdd5f31cacd 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -517,6 +517,20 @@
       </exclusions>
     </dependency>
 
+    <dependency>
+      <groupId>com.google.http-client</groupId>
+      <artifactId>google-http-client</artifactId>
+      <version>${google-clients.version}</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency of google-api-client -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
     <dependency>
       <groupId>com.google.http-client</groupId>
       <artifactId>google-http-client-jackson2</artifactId>

From d7b5189c5708b48308060dd40d6f3ab073759d28 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Mon, 22 Feb 2016 23:59:46 -0800
Subject: [PATCH 1508/1541] Migrate TextIO.Write to a custom sink

Note for user requested sharding limits to be supported,
each pipeline runner must support applying those sharding limits.

Google Cloud Dataflow supports sharding limits.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115310814
---
 .../google/cloud/dataflow/sdk/io/TextIO.java  | 187 +++++++++---------
 .../sdk/runners/DataflowPipelineRunner.java   | 108 +++++++++-
 .../runners/DataflowPipelineTranslator.java   |   5 -
 .../runners/dataflow/TextIOTranslator.java    |  91 ---------
 .../cloud/dataflow/sdk/io/TextIOTest.java     |  22 ---
 .../runners/DataflowPipelineRunnerTest.java   |  21 +-
 .../DataflowPipelineTranslatorTest.java       |   4 +-
 .../sdk/runners/TransformTreeTest.java        |   9 +-
 8 files changed, 209 insertions(+), 238 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 0bb2861831caf..d342f250b2e84 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -26,11 +26,9 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.worker.TextSink;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+import com.google.cloud.dataflow.sdk.util.MimeTypes;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
@@ -39,10 +37,13 @@
 import com.google.protobuf.ByteString;
 
 import java.io.IOException;
+import java.io.OutputStream;
 import java.nio.ByteBuffer;
+import java.nio.channels.Channels;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.channels.SeekableByteChannel;
-import java.util.List;
+import java.nio.channels.WritableByteChannel;
+import java.nio.charset.StandardCharsets;
 import java.util.NoSuchElementException;
 import java.util.regex.Pattern;
 
@@ -66,7 +67,7 @@
  *
  * <p>See the following examples:
  *
- * <pre> {@code
+ * <pre>{@code
  * Pipeline p = ...;
  *
  * // A simple Read of a local file (only runs locally):
@@ -79,7 +80,7 @@
  *     p.apply(TextIO.Read.named("ReadNumbers")
  *                        .from("gs://my_bucket/path/to/numbers-*.txt")
  *                        .withCoder(TextualIntegerCoder.of()));
- * } </pre>
+ * }</pre>
  *
  * <p>To write a {@link PCollection} to one or more text files, use
  * {@link TextIO.Write}, specifying {@link TextIO.Write#to(String)} to specify
@@ -94,7 +95,7 @@
  * will be overwritten.
  *
  * <p>For example:
- * <pre> {@code
+ * <pre>{@code
  * // A simple Write to a local file (only runs locally):
  * PCollection<String> lines = ...;
  * lines.apply(TextIO.Write.to("/path/to/file.txt"));
@@ -106,7 +107,7 @@
  *                           .to("gs://my_bucket/path/to/numbers")
  *                           .withSuffix(".txt")
  *                           .withCoder(TextualIntegerCoder.of()));
- * } </pre>
+ * }</pre>
  *
  * <h3>Permissions</h3>
  * <p>When run using the {@link DirectPipelineRunner}, your pipeline can read and write text files
@@ -477,9 +478,6 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       /** Requested number of shards. 0 for automatic. */
       private final int numShards;
 
-      /** Insert a shuffle before writing to decouple parallelism when numShards != 0. */
-      private final boolean forceReshard;
-
       /** The shard template of each file written, combined with prefix and suffix. */
       private final String shardTemplate;
 
@@ -487,17 +485,16 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       private final boolean validate;
 
       Bound(Coder<T> coder) {
-        this(null, null, "", coder, 0, true, ShardNameTemplate.INDEX_OF_MAX, true);
+        this(null, null, "", coder, 0, ShardNameTemplate.INDEX_OF_MAX, true);
       }
 
       private Bound(String name, String filenamePrefix, String filenameSuffix, Coder<T> coder,
-          int numShards, boolean forceReshard, String shardTemplate, boolean validate) {
+          int numShards, String shardTemplate, boolean validate) {
         super(name);
         this.coder = coder;
         this.filenamePrefix = filenamePrefix;
         this.filenameSuffix = filenameSuffix;
         this.numShards = numShards;
-        this.forceReshard = forceReshard;
         this.shardTemplate = shardTemplate;
         this.validate = validate;
       }
@@ -510,7 +507,7 @@ private Bound(String name, String filenamePrefix, String filenameSuffix, Coder<T
        */
       public Bound<T> named(String name) {
         return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
-            forceReshard, shardTemplate, validate);
+            shardTemplate, validate);
       }
 
       /**
@@ -523,7 +520,7 @@ public Bound<T> named(String name) {
        */
       public Bound<T> to(String filenamePrefix) {
         validateOutputComponent(filenamePrefix);
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
             shardTemplate, validate);
       }
 
@@ -537,7 +534,7 @@ public Bound<T> to(String filenamePrefix) {
        */
       public Bound<T> withSuffix(String nameExtension) {
         validateOutputComponent(nameExtension);
-        return new Bound<>(name, filenamePrefix, nameExtension, coder, numShards, forceReshard,
+        return new Bound<>(name, filenamePrefix, nameExtension, coder, numShards,
             shardTemplate, validate);
       }
 
@@ -556,30 +553,8 @@ public Bound<T> withSuffix(String nameExtension) {
        * @see ShardNameTemplate
        */
       public Bound<T> withNumShards(int numShards) {
-        return withNumShards(numShards, forceReshard);
-      }
-
-      /**
-       * Returns a transform for writing to text files that's like this one but
-       * that uses the provided shard count.
-       *
-       * <p>Constraining the number of shards is likely to reduce
-       * the performance of a pipeline. If forceReshard is true, the output
-       * will be shuffled to obtain the desired sharding. If it is false,
-       * data will not be reshuffled, but parallelism of preceeding stages
-       * may be constrained. Setting this value is not recommended
-       * unless you require a specific number of output files.
-       *
-       * <p>Does not modify this object.
-       *
-       * @param numShards the number of shards to use, or 0 to let the system
-       *                  decide.
-       * @param forceReshard whether to force a reshard to obtain the desired sharding.
-       * @see ShardNameTemplate
-       */
-      private Bound<T> withNumShards(int numShards, boolean forceReshard) {
         Preconditions.checkArgument(numShards >= 0);
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
             shardTemplate, validate);
       }
 
@@ -592,7 +567,7 @@ private Bound<T> withNumShards(int numShards, boolean forceReshard) {
        * @see ShardNameTemplate
        */
       public Bound<T> withShardNameTemplate(String shardTemplate) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
             shardTemplate, validate);
       }
 
@@ -610,25 +585,7 @@ public Bound<T> withShardNameTemplate(String shardTemplate) {
        * <p>Does not modify this object.
        */
       public Bound<T> withoutSharding() {
-        return withoutSharding(forceReshard);
-      }
-
-      /**
-       * Returns a transform for writing to text files that's like this one but
-       * that forces a single file as output.
-       *
-       * <p>Constraining the number of shards is likely to reduce
-       * the performance of a pipeline. Using this setting is not recommended
-       * unless you truly require a single output file.
-       *
-       * <p>This is a shortcut for
-       * {@code .withNumShards(1, forceReshard).withShardNameTemplate("")}
-       *
-       * <p>Does not modify this object.
-       */
-      private Bound<T> withoutSharding(boolean forceReshard) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, 1, forceReshard, "",
-            validate);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, 1, "", validate);
       }
 
       /**
@@ -640,7 +597,7 @@ private Bound<T> withoutSharding(boolean forceReshard) {
        * @param <X> the type of the elements of the input {@link PCollection}
        */
       public <X> Bound<X> withCoder(Coder<X> coder) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
             shardTemplate, validate);
       }
 
@@ -655,7 +612,7 @@ public <X> Bound<X> withCoder(Coder<X> coder) {
        * <p>Does not modify this object.
        */
       public Bound<T> withoutValidation() {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
             shardTemplate, false);
       }
 
@@ -665,14 +622,13 @@ public PDone apply(PCollection<T> input) {
           throw new IllegalStateException(
               "need to set the filename prefix of a TextIO.Write transform");
         }
-        if (numShards > 0 && forceReshard) {
-          // Reshard and re-apply a version of this write without resharding.
-          return input
-              .apply(new FileBasedSink.ReshardForWrite<T>())
-              .apply(withNumShards(numShards, false));
-        } else {
-          return PDone.in(input.getPipeline());
-        }
+
+        // Note that custom sinks currently do not expose sharding controls.
+        // Thus pipeline runner writers need to individually add support internally to
+        // apply user requested sharding limits.
+        return input.apply("Write", com.google.cloud.dataflow.sdk.io.Write.to(
+            new TextSink<>(
+                filenamePrefix, filenameSuffix, shardTemplate, coder)));
       }
 
       /**
@@ -710,17 +666,6 @@ public Coder<T> getCoder() {
       public boolean needsValidation() {
         return validate;
       }
-
-      static {
-        DirectPipelineRunner.registerDefaultTransformEvaluator(
-            Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
-              @Override
-              public void evaluate(
-                  Bound transform, DirectPipelineRunner.EvaluationContext context) {
-                evaluateWriteHelper(transform, context);
-              }
-            });
-      }
     }
   }
 
@@ -978,24 +923,70 @@ private boolean tryToEnsureNumberOfBytesInBuffer(int minCapacity) throws IOExcep
     }
   }
 
-  private static <T> void evaluateWriteHelper(
-      Write.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
-    List<T> elems = context.getPCollection(context.getInput(transform));
-    int numShards = transform.numShards;
-    if (numShards < 1) {
-      // System gets to choose. For direct mode, choose 1.
-      numShards = 1;
+  /**
+   * A {@link FileBasedSink} for text files. Produces text files with the new line separator
+   * {@code '\n'} represented in {@code UTF-8} format as the record separator.
+   * Each record (including the last) is terminated.
+   */
+  @VisibleForTesting
+  static class TextSink<T> extends FileBasedSink<T> {
+    private final Coder<T> coder;
+
+    @VisibleForTesting
+    TextSink(
+        String baseOutputFilename, String extension, String fileNameTemplate, Coder<T> coder) {
+      super(baseOutputFilename, extension, fileNameTemplate);
+      this.coder = coder;
     }
-    TextSink<WindowedValue<T>> writer = TextSink.createForDirectPipelineRunner(
-        transform.filenamePrefix, transform.getShardNameTemplate(), transform.filenameSuffix,
-        numShards, true, null, null, transform.coder);
-    try (Sink.SinkWriter<WindowedValue<T>> sink = writer.writer()) {
-      for (T elem : elems) {
-        sink.add(WindowedValue.valueInGlobalWindow(elem));
-      }
-    } catch (IOException exn) {
-      throw new RuntimeException(
-          "unable to write to output file \"" + transform.filenamePrefix + "\"", exn);
+
+    @Override
+    public FileBasedSink.FileBasedWriteOperation<T> createWriteOperation(PipelineOptions options) {
+      return new TextWriteOperation<>(this, coder);
+    }
+
+    /**
+     * A {@link com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriteOperation
+     * FileBasedWriteOperation} for text files.
+     */
+    private static class TextWriteOperation<T> extends FileBasedWriteOperation<T> {
+      private final Coder<T> coder;
+
+      private TextWriteOperation(TextSink<T> sink, Coder<T> coder) {
+        super(sink);
+        this.coder = coder;
+      }
+
+      @Override
+      public FileBasedWriter<T> createWriter(PipelineOptions options) throws Exception {
+        return new TextWriter<>(this, coder);
+      }
+    }
+
+    /**
+     * A {@link com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriter FileBasedWriter}
+     * for text files.
+     */
+    private static class TextWriter<T> extends FileBasedWriter<T> {
+      private static final byte[] NEWLINE = "\n".getBytes(StandardCharsets.UTF_8);
+      private final Coder<T> coder;
+      private OutputStream out;
+
+      public TextWriter(FileBasedWriteOperation<T> writeOperation, Coder<T> coder) {
+        super(writeOperation);
+        this.mimeType = MimeTypes.TEXT;
+        this.coder = coder;
+      }
+
+      @Override
+      protected void prepareWrite(WritableByteChannel channel) throws Exception {
+        out = Channels.newOutputStream(channel);
+      }
+
+      @Override
+      public void write(T value) throws Exception {
+        coder.encode(value, out, Context.OUTER);
+        out.write(NEWLINE);
+      }
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 5a57f7fc43dcf..396d308c3758d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -339,6 +339,7 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
       builder.put(Window.Bound.class, AssignWindows.class);
       builder.put(Write.Bound.class, BatchWrite.class);
       builder.put(AvroIO.Write.Bound.class, BatchAvroIOWrite.class);
+      builder.put(TextIO.Write.Bound.class, BatchTextIOWrite.class);
       if (options.getExperiments() == null
           || !options.getExperiments().contains("disable_ism_side_input")) {
         builder.put(View.AsMap.class, BatchViewAsMap.class);
@@ -1993,6 +1994,111 @@ public PDone apply(PCollection<T> input) {
     }
   }
 
+  /**
+   * Specialized implementation which overrides
+   * {@link com.google.cloud.dataflow.sdk.io.TextIO.Write.Bound TextIO.Write.Bound} with
+   * a native sink instead of a custom sink as workaround until custom sinks
+   * have support for sharding controls.
+   */
+  private static class BatchTextIOWrite<T> extends PTransform<PCollection<T>, PDone> {
+    private final TextIO.Write.Bound<T> transform;
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+    public BatchTextIOWrite(DataflowPipelineRunner runner, TextIO.Write.Bound<T> transform) {
+      this.transform = transform;
+    }
+
+    @Override
+    public PDone apply(PCollection<T> input) {
+      if (transform.getNumShards() > 0) {
+        return input
+            .apply(new ReshardForWrite<T>())
+            .apply(new BatchTextIONativeWrite<>(transform));
+      } else {
+        return transform.apply(input);
+      }
+    }
+  }
+
+  /**
+   * This {@link PTransform} is used by the {@link DataflowPipelineTranslator} as a way
+   * to provide the native definition of the Text sink.
+   */
+  private static class BatchTextIONativeWrite<T> extends PTransform<PCollection<T>, PDone> {
+    private final TextIO.Write.Bound<T> transform;
+    public BatchTextIONativeWrite(TextIO.Write.Bound<T> transform) {
+      this.transform = transform;
+    }
+
+    @Override
+    public PDone apply(PCollection<T> input) {
+      return PDone.in(input.getPipeline());
+    }
+
+    static {
+      DataflowPipelineTranslator.registerTransformTranslator(
+          BatchTextIONativeWrite.class, new BatchTextIONativeWriteTranslator());
+    }
+  }
+
+  /**
+   * TextIO.Write.Bound support code for the Dataflow backend when applying parallelism limits
+   * through user requested sharding limits.
+   */
+  private static class BatchTextIONativeWriteTranslator
+      implements TransformTranslator<BatchTextIONativeWrite<?>> {
+    @SuppressWarnings("unchecked")
+    @Override
+    public void translate(@SuppressWarnings("rawtypes") BatchTextIONativeWrite transform,
+        TranslationContext context) {
+      translateWriteHelper(transform, transform.transform, context);
+    }
+
+    private <T> void translateWriteHelper(
+        BatchTextIONativeWrite<T> transform,
+        TextIO.Write.Bound<T> originalTransform,
+        TranslationContext context) {
+      // Note that the original transform can not be used during add step/add input
+      // and is only passed in to get properties from it.
+
+      checkState(originalTransform.getNumShards() > 0,
+          "Native TextSink is expected to only be used when sharding controls are required.");
+
+      context.addStep(transform, "ParallelWrite");
+      context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
+
+      // TODO: drop this check when server supports alternative templates.
+      switch (originalTransform.getShardTemplate()) {
+        case ShardNameTemplate.INDEX_OF_MAX:
+          break;  // supported by server
+        case "":
+          // Empty shard template allowed - forces single output.
+          Preconditions.checkArgument(originalTransform.getNumShards() <= 1,
+              "Num shards must be <= 1 when using an empty sharding template");
+          break;
+        default:
+          throw new UnsupportedOperationException("Shard template "
+              + originalTransform.getShardTemplate()
+              + " not yet supported by Dataflow service");
+      }
+
+      // TODO: How do we want to specify format and
+      // format-specific properties?
+      context.addInput(PropertyNames.FORMAT, "text");
+      context.addInput(PropertyNames.FILENAME_PREFIX, originalTransform.getFilenamePrefix());
+      context.addInput(PropertyNames.SHARD_NAME_TEMPLATE,
+          originalTransform.getShardNameTemplate());
+      context.addInput(PropertyNames.FILENAME_SUFFIX, originalTransform.getFilenameSuffix());
+      context.addInput(PropertyNames.VALIDATE_SINK, originalTransform.needsValidation());
+      context.addInput(PropertyNames.NUM_SHARDS, (long) originalTransform.getNumShards());
+      context.addEncodingInput(
+          WindowedValue.getValueOnlyCoder(originalTransform.getCoder()));
+
+    }
+  }
+
   /**
    * Specialized implementation which overrides
    * {@link com.google.cloud.dataflow.sdk.io.AvroIO.Write.Bound AvroIO.Write.Bound} with
@@ -2088,9 +2194,7 @@ private <T> void translateWriteHelper(
       context.addInput(PropertyNames.SHARD_NAME_TEMPLATE, originalTransform.getShardTemplate());
       context.addInput(PropertyNames.FILENAME_SUFFIX, originalTransform.getFilenameSuffix());
       context.addInput(PropertyNames.VALIDATE_SINK, originalTransform.needsValidation());
-
       context.addInput(PropertyNames.NUM_SHARDS, (long) originalTransform.getNumShards());
-
       context.addEncodingInput(
           WindowedValue.getValueOnlyCoder(
               AvroCoder.of(originalTransform.getType(), originalTransform.getSchema())));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 885260ebb2518..22ec3bb5c43b1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -44,14 +44,12 @@
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.io.Read;
-import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.GroupByKeyAndSortValuesOnly;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BigQueryIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.ReadTranslator;
-import com.google.cloud.dataflow.sdk.runners.dataflow.TextIOTranslator;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.Create;
@@ -1037,9 +1035,6 @@ private <T> void translateHelper(
         DataflowPipelineRunner.StreamingPubsubIOWrite.class,
         new PubsubIOTranslator.WriteTranslator());
 
-    registerTransformTranslator(
-        TextIO.Write.Bound.class, new TextIOTranslator.WriteTranslator());
-
     registerTransformTranslator(Read.Bounded.class, new ReadTranslator());
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
deleted file mode 100644
index d6c96c31bd80a..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.dataflow;
-
-import com.google.cloud.dataflow.sdk.io.ShardNameTemplate;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
-import com.google.cloud.dataflow.sdk.util.PathValidator;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.common.base.Preconditions;
-
-/**
- * TextIO transform support code for the Dataflow backend.
- */
-public class TextIOTranslator {
-  /**
-   * Implements TextIO Write translation for the Dataflow backend.
-   */
-  @SuppressWarnings({"rawtypes", "unchecked"})
-  public static class WriteTranslator implements TransformTranslator<TextIO.Write.Bound> {
-    @Override
-    public void translate(
-        TextIO.Write.Bound transform,
-        TranslationContext context) {
-      translateWriteHelper(transform, context);
-    }
-
-    private <T> void translateWriteHelper(
-        TextIO.Write.Bound<T> transform,
-        TranslationContext context) {
-      if (context.getPipelineOptions().isStreaming()) {
-        throw new IllegalArgumentException("TextIO not supported in streaming mode.");
-      }
-
-      PathValidator validator = context.getPipelineOptions().getPathValidator();
-      String filenamePrefix = validator.validateOutputFilePrefixSupported(
-          transform.getFilenamePrefix());
-
-      context.addStep(transform, "ParallelWrite");
-      context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
-
-      // TODO: drop this check when server supports alternative templates.
-      switch (transform.getShardTemplate()) {
-        case ShardNameTemplate.INDEX_OF_MAX:
-          break;  // supported by server
-        case "":
-          // Empty shard template allowed - forces single output.
-          Preconditions.checkArgument(transform.getNumShards() <= 1,
-              "Num shards must be <= 1 when using an empty sharding template");
-          break;
-        default:
-          throw new UnsupportedOperationException("Shard template "
-              + transform.getShardTemplate()
-              + " not yet supported by Dataflow service");
-      }
-
-      // TODO: How do we want to specify format and
-      // format-specific properties?
-      context.addInput(PropertyNames.FORMAT, "text");
-      context.addInput(PropertyNames.FILENAME_PREFIX, filenamePrefix);
-      context.addInput(PropertyNames.SHARD_NAME_TEMPLATE,
-          transform.getShardNameTemplate());
-      context.addInput(PropertyNames.FILENAME_SUFFIX, transform.getFilenameSuffix());
-      context.addInput(PropertyNames.VALIDATE_SINK, transform.needsValidation());
-
-      long numShards = transform.getNumShards();
-      if (numShards > 0) {
-        context.addInput(PropertyNames.NUM_SHARDS, numShards);
-      }
-
-      context.addEncodingInput(
-          WindowedValue.getValueOnlyCoder(transform.getCoder()));
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
index 6ad81e4ea0c4a..0a8e3811085f7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -263,28 +263,6 @@ public void testWriteEmptyInts() throws Exception {
     runTestWrite(NO_INTS_ARRAY, TextualIntegerCoder.of());
   }
 
-  @Test
-  public void testWriteSharded() throws IOException {
-    File outFolder = tmpFolder.newFolder();
-    String filename = outFolder.toPath().resolve("output").toString();
-
-    Pipeline p = TestPipeline.create();
-
-    PCollection<String> input =
-        p.apply(Create.of(Arrays.asList(LINES_ARRAY))
-            .withCoder(StringUtf8Coder.of()));
-
-    input.apply(TextIO.Write.to(filename).withNumShards(2).withSuffix(".txt"));
-
-    p.run();
-
-    String[] files = outFolder.list();
-
-    assertThat(Arrays.asList(files),
-        containsInAnyOrder("output-00000-of-00002.txt",
-                           "output-00001-of-00002.txt"));
-  }
-
   @Test
   public void testWriteNamed() {
     {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index c7175cb3b05f5..c5f2d3fe71fd1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -453,16 +453,12 @@ public void testNonGcsFilePathInReadFailure() throws IOException {
 
   @Test
   public void testNonGcsFilePathInWriteFailure() throws IOException {
-    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
-
-    Pipeline p = buildDataflowPipeline(buildPipelineOptions(jobCaptor));
-    p.apply(TextIO.Read.named("ReadMyGcsFile").from("gs://bucket/object"))
-        .apply(TextIO.Write.named("WriteMyNonGcsFile").to("/tmp/file"));
+    Pipeline p = buildDataflowPipeline(buildPipelineOptions());
+    PCollection<String> pc = p.apply(TextIO.Read.named("ReadMyGcsFile").from("gs://bucket/object"));
 
     thrown.expect(IllegalArgumentException.class);
     thrown.expectMessage(containsString("expected a valid 'gs://' path but was given"));
-    p.run();
-    assertValidJob(jobCaptor.getValue());
+    pc.apply(TextIO.Write.named("WriteMyNonGcsFile").to("/tmp/file"));
   }
 
   @Test
@@ -482,17 +478,12 @@ public void testMultiSlashGcsFileReadPath() throws IOException {
 
   @Test
   public void testMultiSlashGcsFileWritePath() throws IOException {
-    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
-
-    Pipeline p = buildDataflowPipeline(buildPipelineOptions(jobCaptor));
-    p.apply(TextIO.Read.named("ReadMyGcsFile").from("gs://bucket/object"))
-        .apply(TextIO.Write.named("WriteInvalidGcsFile")
-            .to("gs://bucket/tmp//file"));
+    Pipeline p = buildDataflowPipeline(buildPipelineOptions());
+    PCollection<String> pc = p.apply(TextIO.Read.named("ReadMyGcsFile").from("gs://bucket/object"));
 
     thrown.expect(IllegalArgumentException.class);
     thrown.expectMessage("consecutive slashes");
-    p.run();
-    assertValidJob(jobCaptor.getValue());
+    pc.apply(TextIO.Write.named("WriteInvalidGcsFile").to("gs://bucket/tmp//file"));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index b9c94ad00b86b..72090a0866a62 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -403,7 +403,7 @@ public void testPredefinedAddStep() throws Exception {
     pipeline.apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/in"))
         .apply(ParDo.of(new NoOpFn()))
         .apply(new EmbeddedTransform(predefinedStep.clone()))
-        .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/out"));
+        .apply(ParDo.of(new NoOpFn()));
     Job job = translator.translate(
         pipeline, pipeline.getRunner(), Collections.<DataflowPackage>emptyList()).getJob();
 
@@ -456,7 +456,7 @@ private static Step createPredefinedStep() throws Exception {
     Job job = translator.translate(
         pipeline, pipeline.getRunner(), Collections.<DataflowPackage>emptyList()).getJob();
 
-    assertEquals(3, job.getSteps().size());
+    assertEquals(13, job.getSteps().size());
     Step step = job.getSteps().get(1);
     assertEquals(stepName, getString(step.getProperties(), PropertyNames.USER_NAME));
     return step;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
index f1b7cd7fd0169..68e1db1a52f2a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
@@ -28,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.io.Write;
 import com.google.cloud.dataflow.sdk.transforms.Count;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -133,9 +134,12 @@ public void enterCompositeTransform(TransformTreeNode node) {
           assertTrue(visited.add(TransformsSeen.SAMPLE_ANY));
           assertNotNull(node.getEnclosingNode());
           assertTrue(node.isCompositeNode());
+        } else if (transform instanceof Write.Bound) {
+          assertTrue(visited.add(TransformsSeen.WRITE));
+          assertNotNull(node.getEnclosingNode());
+          assertTrue(node.isCompositeNode());
         }
         assertThat(transform, not(instanceOf(Read.Bounded.class)));
-        assertThat(transform, not(instanceOf(TextIO.Write.Bound.class)));
       }
 
       @Override
@@ -151,10 +155,9 @@ public void visitTransform(TransformTreeNode node) {
         PTransform<?, ?> transform = node.getTransform();
         // Pick is a composite, should not be visited here.
         assertThat(transform, not(instanceOf(Sample.SampleAny.class)));
+        assertThat(transform, not(instanceOf(Write.Bound.class)));
         if (transform instanceof Read.Bounded) {
           assertTrue(visited.add(TransformsSeen.READ));
-        } else if (transform instanceof TextIO.Write.Bound) {
-          assertTrue(visited.add(TransformsSeen.WRITE));
         }
       }
 

From 9f546efebc5c9307dd85e6eec79038285cffa12b Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Tue, 23 Feb 2016 00:37:55 -0800
Subject: [PATCH 1509/1541] Move Google Cloud Dataflow native sinks to worker
 module

This is for Apache Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115313244
---
 .../dataflow/sdk/runners/worker/AvroSink.java | 135 --------
 .../dataflow/sdk/runners/worker/TextSink.java | 291 ------------------
 .../dataflow/sdk/util/common/worker/Sink.java |  64 ----
 3 files changed, 490 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java
deleted file mode 100644
index b101a2b0fcded..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.MimeTypes;
-import com.google.cloud.dataflow.sdk.util.ShardingWritableByteChannel;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.ValueOnlyWindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-
-import org.apache.avro.Schema;
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.io.DatumWriter;
-
-import java.io.IOException;
-import java.nio.channels.Channels;
-import java.nio.channels.WritableByteChannel;
-import java.util.ArrayList;
-import java.util.Random;
-
-/**
- * A sink that writes Avro files.
- *
- * @param <T> the type of the elements written to the sink
- */
-public class AvroSink<T> extends Sink<WindowedValue<T>> {
-
-  final String filenamePrefix;
-  final String shardFormat;
-  final String filenameSuffix;
-  final int shardCount;
-  final AvroCoder<T> avroCoder;
-  final Schema schema;
-
-  public AvroSink(String filename, ValueOnlyWindowedValueCoder<T> coder) {
-    this(filename, "", "", 1, coder);
-  }
-
-  public AvroSink(String filenamePrefix, String shardFormat, String filenameSuffix, int shardCount,
-                  ValueOnlyWindowedValueCoder<T> coder) {
-    if (!(coder.getValueCoder() instanceof AvroCoder)) {
-      throw new IllegalArgumentException(String.format(
-          "AvroSink requires an AvroCoder, not a %s", coder.getValueCoder().getClass()));
-    }
-
-    this.filenamePrefix = filenamePrefix;
-    this.shardFormat = shardFormat;
-    this.filenameSuffix = filenameSuffix;
-    this.shardCount = shardCount;
-    this.avroCoder = (AvroCoder<T>) coder.getValueCoder();
-    this.schema = this.avroCoder.getSchema();
-  }
-
-  public SinkWriter<WindowedValue<T>> writer(DatumWriter<T> datumWriter) throws IOException {
-    WritableByteChannel writer = IOChannelUtils.create(
-        filenamePrefix, shardFormat, filenameSuffix, shardCount, MimeTypes.BINARY);
-
-    if (writer instanceof ShardingWritableByteChannel) {
-      return new AvroShardingFileWriter(datumWriter, (ShardingWritableByteChannel) writer);
-    } else {
-      return new AvroFileWriter(datumWriter, writer);
-    }
-  }
-
-  @Override
-  public SinkWriter<WindowedValue<T>> writer() throws IOException {
-    return writer(avroCoder.createDatumWriter());
-  }
-
-  /** The SinkWriter for an AvroSink. */
-  class AvroFileWriter implements SinkWriter<WindowedValue<T>> {
-    DataFileWriter<T> fileWriter;
-
-    public AvroFileWriter(DatumWriter<T> datumWriter, WritableByteChannel outputChannel)
-        throws IOException {
-      fileWriter = new DataFileWriter<>(datumWriter);
-      fileWriter.create(schema, Channels.newOutputStream(outputChannel));
-    }
-
-    @Override
-    public long add(WindowedValue<T> value) throws IOException {
-      fileWriter.append(value.getValue());
-      // DataFileWriter doesn't support returning the length written. Use the
-      // coder instead.
-      return CoderUtils.encodeToByteArray(avroCoder, value.getValue()).length;
-    }
-
-    @Override
-    public void close() throws IOException {
-      fileWriter.close();
-    }
-  }
-
-  /** The SinkWriter for an AvroSink, which supports sharding. */
-  class AvroShardingFileWriter implements SinkWriter<WindowedValue<T>> {
-    private ArrayList<AvroFileWriter> fileWriters = new ArrayList<>();
-    private final Random random = new Random();
-
-    public AvroShardingFileWriter(
-        DatumWriter<T> datumWriter, ShardingWritableByteChannel outputChannel) throws IOException {
-      for (int i = 0; i < outputChannel.getNumShards(); i++) {
-        fileWriters.add(new AvroFileWriter(datumWriter, outputChannel.getChannel(i)));
-      }
-    }
-
-    @Override
-    public long add(WindowedValue<T> value) throws IOException {
-      return fileWriters.get(random.nextInt(fileWriters.size())).add(value);
-    }
-
-    @Override
-    public void close() throws IOException {
-      for (AvroFileWriter fileWriter : fileWriters) {
-        fileWriter.close();
-      }
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
deleted file mode 100644
index f48183cc17880..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.MimeTypes;
-import com.google.cloud.dataflow.sdk.util.ShardingWritableByteChannel;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.nio.ByteBuffer;
-import java.nio.channels.WritableByteChannel;
-import java.util.Random;
-
-import javax.annotation.Nullable;
-
-/**
- * A sink that writes text files.
- *
- * @param <T> the type of the elements written to the sink
- */
-public class TextSink<T> extends Sink<T> {
-
-  static final byte[] NEWLINE = getNewline();
-
-  private static byte[] getNewline() {
-    String newline = "\n";
-    try {
-      return newline.getBytes("UTF-8");
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException("UTF-8 not supported", e);
-    }
-  }
-
-  final String namePrefix;
-  final String shardFormat;
-  final String nameSuffix;
-  final int shardCount;
-  final boolean appendTrailingNewlines;
-  final String header;
-  final String footer;
-  final Coder<T> coder;
-
-  /**
-   * For testing only.
-   *
-   * <p>Used by simple tests that write to a single unsharded file.
-   */
-  public static <V> TextSink<WindowedValue<V>> createForTest(
-      String filename,
-      boolean appendTrailingNewlines,
-      @Nullable String header,
-      @Nullable String footer,
-      Coder<V> coder) {
-    return create(filename,
-                  "",
-                  "",
-                  1,
-                  appendTrailingNewlines,
-                  header,
-                  footer,
-                  WindowedValue.getValueOnlyCoder(coder));
-  }
-
-  /**
-   * For DirectPipelineRunner only.
-   * It wraps the coder with {@code WindowedValue.ValueOnlyCoder}.
-   */
-  public static <V> TextSink<WindowedValue<V>> createForDirectPipelineRunner(
-      String filenamePrefix,
-      String shardFormat,
-      String filenameSuffix,
-      int shardCount,
-      boolean appendTrailingNewlines,
-      @Nullable String header,
-      @Nullable String footer,
-      Coder<V> coder) {
-    return create(filenamePrefix,
-                  shardFormat,
-                  filenameSuffix,
-                  shardCount,
-                  appendTrailingNewlines,
-                  header,
-                  footer,
-                  WindowedValue.getValueOnlyCoder(coder));
-  }
-
-  /**
-   * Constructs a new TextSink.
-   *
-   * @param filenamePrefix the prefix of output filenames.
-   * @param shardFormat the shard name template to use for output filenames.
-   * @param filenameSuffix the suffix of output filenames.
-   * @param shardCount the number of outupt shards to produce.
-   * @param appendTrailingNewlines true to append newlines to each output line.
-   * @param header text to place at the beginning of each output file.
-   * @param footer text to place at the end of each output file.
-   * @param coder the code used to encode elements for output.
-   */
-  public static <V> TextSink<V> create(String filenamePrefix,
-                                       String shardFormat,
-                                       String filenameSuffix,
-                                       int shardCount,
-                                       boolean appendTrailingNewlines,
-                                       @Nullable String header,
-                                       @Nullable String footer,
-                                       Coder<V> coder) {
-    return new TextSink<>(filenamePrefix,
-                          shardFormat,
-                          filenameSuffix,
-                          shardCount,
-                          appendTrailingNewlines,
-                          header,
-                          footer,
-                          coder);
-  }
-
-  private TextSink(String filenamePrefix,
-                   String shardFormat,
-                   String filenameSuffix,
-                   int shardCount,
-                   boolean appendTrailingNewlines,
-                   @Nullable String header,
-                   @Nullable String footer,
-                   Coder<T> coder) {
-    this.namePrefix = filenamePrefix;
-    this.shardFormat = shardFormat;
-    this.nameSuffix = filenameSuffix;
-    this.shardCount = shardCount;
-    this.appendTrailingNewlines = appendTrailingNewlines;
-    this.header = header;
-    this.footer = footer;
-    this.coder = coder;
-  }
-
-  @Override
-  public SinkWriter<T> writer() throws IOException {
-    String mimeType;
-
-    if (!(coder instanceof WindowedValueCoder)) {
-      throw new IOException(
-          "Expected WindowedValueCoder for inputCoder, got: "
-          + coder.getClass().getName());
-    }
-    Coder<?> valueCoder = ((WindowedValueCoder<?>) coder).getValueCoder();
-    if (valueCoder.equals(StringUtf8Coder.of())) {
-      mimeType = MimeTypes.TEXT;
-    } else {
-      mimeType = MimeTypes.BINARY;
-    }
-
-    WritableByteChannel writer = IOChannelUtils.create(namePrefix, shardFormat,
-        nameSuffix, shardCount, mimeType);
-
-    if (writer instanceof ShardingWritableByteChannel) {
-      return new ShardingTextFileWriter((ShardingWritableByteChannel) writer);
-    } else {
-      return new TextFileWriter(writer);
-    }
-  }
-
-  /**
-   * Abstract SinkWriter base class shared by sharded and unsharded Text
-   * writer implementations.
-   */
-  abstract class AbstractTextFileWriter implements SinkWriter<T> {
-    protected void init() throws IOException {
-      if (header != null) {
-        printLine(ShardingWritableByteChannel.ALL_SHARDS,
-            CoderUtils.encodeToByteArray(StringUtf8Coder.of(), header));
-      }
-    }
-
-    /**
-     * Adds a value to the sink. Returns the size in bytes of the data written.
-     * The return value does -not- include header/footer size.
-     */
-    @Override
-    public long add(T value) throws IOException {
-      return printLine(getShardNum(value),
-          CoderUtils.encodeToByteArray(coder, value));
-    }
-
-    @Override
-    public void close() throws IOException {
-      if (footer != null) {
-        printLine(ShardingWritableByteChannel.ALL_SHARDS,
-            CoderUtils.encodeToByteArray(StringUtf8Coder.of(), footer));
-      }
-    }
-
-    protected long printLine(int shardNum, byte[] line) throws IOException {
-      long length = line.length;
-      write(shardNum, ByteBuffer.wrap(line));
-
-      if (appendTrailingNewlines) {
-        write(shardNum, ByteBuffer.wrap(NEWLINE));
-        length += NEWLINE.length;
-      }
-
-      return length;
-    }
-
-    protected abstract void write(int shardNum, ByteBuffer buf)
-        throws IOException;
-    protected abstract int getShardNum(T value);
-  }
-
-  /** An unsharded SinkWriter for a TextSink. */
-  class TextFileWriter extends AbstractTextFileWriter {
-    private final WritableByteChannel outputChannel;
-
-    TextFileWriter(WritableByteChannel outputChannel) throws IOException {
-      this.outputChannel = outputChannel;
-      init();
-    }
-
-    @Override
-    public void close() throws IOException {
-      try {
-        super.close();
-      } finally {
-        outputChannel.close();
-      }
-    }
-
-    @Override
-    protected void write(int shardNum, ByteBuffer buf) throws IOException {
-      outputChannel.write(buf);
-    }
-
-    @Override
-    protected int getShardNum(T value) {
-      return 0;
-    }
-  }
-
-  /** A sharding SinkWriter for a TextSink. */
-  class ShardingTextFileWriter extends AbstractTextFileWriter {
-    private final Random rng = new Random();
-    private final int numShards;
-    private final ShardingWritableByteChannel outputChannel;
-
-    // TODO: add support for user-defined sharding function.
-    ShardingTextFileWriter(ShardingWritableByteChannel outputChannel)
-        throws IOException {
-      this.outputChannel = outputChannel;
-      numShards = outputChannel.getNumShards();
-      init();
-    }
-
-    @Override
-    public void close() throws IOException {
-      try {
-        super.close();
-      } finally {
-        outputChannel.close();
-      }
-    }
-
-    @Override
-    protected void write(int shardNum, ByteBuffer buf) throws IOException {
-      outputChannel.writeToShard(shardNum, buf);
-    }
-
-    @Override
-    protected int getShardNum(T value) {
-      return rng.nextInt(numShards);
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
deleted file mode 100644
index b48d70b36072c..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
-
-import java.io.IOException;
-
-/**
- * Abstract base class for Sinks.
- *
- * <p>A Sink is written to by getting a SinkWriter and adding values to
- * it.
- *
- * @param <T> the type of the elements written to the sink
- */
-public abstract class Sink<T> {
-  /**
-   * Returns a Writer that allows writing to this Sink.
-   */
-  public abstract SinkWriter<T> writer() throws IOException;
-
-  /**
-   * Writes to a Sink.
-   */
-  public interface SinkWriter<ElemT> extends AutoCloseable {
-    /**
-     * Adds a value to the sink. Returns the size in bytes of the data written.
-     */
-    public long add(ElemT value) throws IOException;
-
-    @Override
-    public void close() throws IOException;
-  }
-
-  /**
-   * Returns whether this Sink can be restarted.
-   */
-  public boolean supportsRestart() {
-    return false;
-  }
-
-  /**
-   * The default state kind of all the states reported in this Sink.
-   * Defaults to {@link StateKind#USER}.
-   */
-  protected StateKind getStateSamplerStateKind() {
-    return StateKind.USER;
-  }
-}

From 51068d1635f4bb3143412a7708852e21816a1d27 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Tue, 23 Feb 2016 09:29:49 -0800
Subject: [PATCH 1510/1541] Reverts "Move Google Cloud Dataflow native sinks to
 worker module"

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115349469
---
 .../dataflow/sdk/runners/worker/AvroSink.java | 135 ++++++++
 .../dataflow/sdk/runners/worker/TextSink.java | 291 ++++++++++++++++++
 .../dataflow/sdk/util/common/worker/Sink.java |  64 ++++
 3 files changed, 490 insertions(+)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java
new file mode 100644
index 0000000000000..b101a2b0fcded
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.MimeTypes;
+import com.google.cloud.dataflow.sdk.util.ShardingWritableByteChannel;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.ValueOnlyWindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.io.DatumWriter;
+
+import java.io.IOException;
+import java.nio.channels.Channels;
+import java.nio.channels.WritableByteChannel;
+import java.util.ArrayList;
+import java.util.Random;
+
+/**
+ * A sink that writes Avro files.
+ *
+ * @param <T> the type of the elements written to the sink
+ */
+public class AvroSink<T> extends Sink<WindowedValue<T>> {
+
+  final String filenamePrefix;
+  final String shardFormat;
+  final String filenameSuffix;
+  final int shardCount;
+  final AvroCoder<T> avroCoder;
+  final Schema schema;
+
+  public AvroSink(String filename, ValueOnlyWindowedValueCoder<T> coder) {
+    this(filename, "", "", 1, coder);
+  }
+
+  public AvroSink(String filenamePrefix, String shardFormat, String filenameSuffix, int shardCount,
+                  ValueOnlyWindowedValueCoder<T> coder) {
+    if (!(coder.getValueCoder() instanceof AvroCoder)) {
+      throw new IllegalArgumentException(String.format(
+          "AvroSink requires an AvroCoder, not a %s", coder.getValueCoder().getClass()));
+    }
+
+    this.filenamePrefix = filenamePrefix;
+    this.shardFormat = shardFormat;
+    this.filenameSuffix = filenameSuffix;
+    this.shardCount = shardCount;
+    this.avroCoder = (AvroCoder<T>) coder.getValueCoder();
+    this.schema = this.avroCoder.getSchema();
+  }
+
+  public SinkWriter<WindowedValue<T>> writer(DatumWriter<T> datumWriter) throws IOException {
+    WritableByteChannel writer = IOChannelUtils.create(
+        filenamePrefix, shardFormat, filenameSuffix, shardCount, MimeTypes.BINARY);
+
+    if (writer instanceof ShardingWritableByteChannel) {
+      return new AvroShardingFileWriter(datumWriter, (ShardingWritableByteChannel) writer);
+    } else {
+      return new AvroFileWriter(datumWriter, writer);
+    }
+  }
+
+  @Override
+  public SinkWriter<WindowedValue<T>> writer() throws IOException {
+    return writer(avroCoder.createDatumWriter());
+  }
+
+  /** The SinkWriter for an AvroSink. */
+  class AvroFileWriter implements SinkWriter<WindowedValue<T>> {
+    DataFileWriter<T> fileWriter;
+
+    public AvroFileWriter(DatumWriter<T> datumWriter, WritableByteChannel outputChannel)
+        throws IOException {
+      fileWriter = new DataFileWriter<>(datumWriter);
+      fileWriter.create(schema, Channels.newOutputStream(outputChannel));
+    }
+
+    @Override
+    public long add(WindowedValue<T> value) throws IOException {
+      fileWriter.append(value.getValue());
+      // DataFileWriter doesn't support returning the length written. Use the
+      // coder instead.
+      return CoderUtils.encodeToByteArray(avroCoder, value.getValue()).length;
+    }
+
+    @Override
+    public void close() throws IOException {
+      fileWriter.close();
+    }
+  }
+
+  /** The SinkWriter for an AvroSink, which supports sharding. */
+  class AvroShardingFileWriter implements SinkWriter<WindowedValue<T>> {
+    private ArrayList<AvroFileWriter> fileWriters = new ArrayList<>();
+    private final Random random = new Random();
+
+    public AvroShardingFileWriter(
+        DatumWriter<T> datumWriter, ShardingWritableByteChannel outputChannel) throws IOException {
+      for (int i = 0; i < outputChannel.getNumShards(); i++) {
+        fileWriters.add(new AvroFileWriter(datumWriter, outputChannel.getChannel(i)));
+      }
+    }
+
+    @Override
+    public long add(WindowedValue<T> value) throws IOException {
+      return fileWriters.get(random.nextInt(fileWriters.size())).add(value);
+    }
+
+    @Override
+    public void close() throws IOException {
+      for (AvroFileWriter fileWriter : fileWriters) {
+        fileWriter.close();
+      }
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
new file mode 100644
index 0000000000000..f48183cc17880
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
@@ -0,0 +1,291 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.worker;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.cloud.dataflow.sdk.util.MimeTypes;
+import com.google.cloud.dataflow.sdk.util.ShardingWritableByteChannel;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.nio.channels.WritableByteChannel;
+import java.util.Random;
+
+import javax.annotation.Nullable;
+
+/**
+ * A sink that writes text files.
+ *
+ * @param <T> the type of the elements written to the sink
+ */
+public class TextSink<T> extends Sink<T> {
+
+  static final byte[] NEWLINE = getNewline();
+
+  private static byte[] getNewline() {
+    String newline = "\n";
+    try {
+      return newline.getBytes("UTF-8");
+    } catch (UnsupportedEncodingException e) {
+      throw new RuntimeException("UTF-8 not supported", e);
+    }
+  }
+
+  final String namePrefix;
+  final String shardFormat;
+  final String nameSuffix;
+  final int shardCount;
+  final boolean appendTrailingNewlines;
+  final String header;
+  final String footer;
+  final Coder<T> coder;
+
+  /**
+   * For testing only.
+   *
+   * <p>Used by simple tests that write to a single unsharded file.
+   */
+  public static <V> TextSink<WindowedValue<V>> createForTest(
+      String filename,
+      boolean appendTrailingNewlines,
+      @Nullable String header,
+      @Nullable String footer,
+      Coder<V> coder) {
+    return create(filename,
+                  "",
+                  "",
+                  1,
+                  appendTrailingNewlines,
+                  header,
+                  footer,
+                  WindowedValue.getValueOnlyCoder(coder));
+  }
+
+  /**
+   * For DirectPipelineRunner only.
+   * It wraps the coder with {@code WindowedValue.ValueOnlyCoder}.
+   */
+  public static <V> TextSink<WindowedValue<V>> createForDirectPipelineRunner(
+      String filenamePrefix,
+      String shardFormat,
+      String filenameSuffix,
+      int shardCount,
+      boolean appendTrailingNewlines,
+      @Nullable String header,
+      @Nullable String footer,
+      Coder<V> coder) {
+    return create(filenamePrefix,
+                  shardFormat,
+                  filenameSuffix,
+                  shardCount,
+                  appendTrailingNewlines,
+                  header,
+                  footer,
+                  WindowedValue.getValueOnlyCoder(coder));
+  }
+
+  /**
+   * Constructs a new TextSink.
+   *
+   * @param filenamePrefix the prefix of output filenames.
+   * @param shardFormat the shard name template to use for output filenames.
+   * @param filenameSuffix the suffix of output filenames.
+   * @param shardCount the number of outupt shards to produce.
+   * @param appendTrailingNewlines true to append newlines to each output line.
+   * @param header text to place at the beginning of each output file.
+   * @param footer text to place at the end of each output file.
+   * @param coder the code used to encode elements for output.
+   */
+  public static <V> TextSink<V> create(String filenamePrefix,
+                                       String shardFormat,
+                                       String filenameSuffix,
+                                       int shardCount,
+                                       boolean appendTrailingNewlines,
+                                       @Nullable String header,
+                                       @Nullable String footer,
+                                       Coder<V> coder) {
+    return new TextSink<>(filenamePrefix,
+                          shardFormat,
+                          filenameSuffix,
+                          shardCount,
+                          appendTrailingNewlines,
+                          header,
+                          footer,
+                          coder);
+  }
+
+  private TextSink(String filenamePrefix,
+                   String shardFormat,
+                   String filenameSuffix,
+                   int shardCount,
+                   boolean appendTrailingNewlines,
+                   @Nullable String header,
+                   @Nullable String footer,
+                   Coder<T> coder) {
+    this.namePrefix = filenamePrefix;
+    this.shardFormat = shardFormat;
+    this.nameSuffix = filenameSuffix;
+    this.shardCount = shardCount;
+    this.appendTrailingNewlines = appendTrailingNewlines;
+    this.header = header;
+    this.footer = footer;
+    this.coder = coder;
+  }
+
+  @Override
+  public SinkWriter<T> writer() throws IOException {
+    String mimeType;
+
+    if (!(coder instanceof WindowedValueCoder)) {
+      throw new IOException(
+          "Expected WindowedValueCoder for inputCoder, got: "
+          + coder.getClass().getName());
+    }
+    Coder<?> valueCoder = ((WindowedValueCoder<?>) coder).getValueCoder();
+    if (valueCoder.equals(StringUtf8Coder.of())) {
+      mimeType = MimeTypes.TEXT;
+    } else {
+      mimeType = MimeTypes.BINARY;
+    }
+
+    WritableByteChannel writer = IOChannelUtils.create(namePrefix, shardFormat,
+        nameSuffix, shardCount, mimeType);
+
+    if (writer instanceof ShardingWritableByteChannel) {
+      return new ShardingTextFileWriter((ShardingWritableByteChannel) writer);
+    } else {
+      return new TextFileWriter(writer);
+    }
+  }
+
+  /**
+   * Abstract SinkWriter base class shared by sharded and unsharded Text
+   * writer implementations.
+   */
+  abstract class AbstractTextFileWriter implements SinkWriter<T> {
+    protected void init() throws IOException {
+      if (header != null) {
+        printLine(ShardingWritableByteChannel.ALL_SHARDS,
+            CoderUtils.encodeToByteArray(StringUtf8Coder.of(), header));
+      }
+    }
+
+    /**
+     * Adds a value to the sink. Returns the size in bytes of the data written.
+     * The return value does -not- include header/footer size.
+     */
+    @Override
+    public long add(T value) throws IOException {
+      return printLine(getShardNum(value),
+          CoderUtils.encodeToByteArray(coder, value));
+    }
+
+    @Override
+    public void close() throws IOException {
+      if (footer != null) {
+        printLine(ShardingWritableByteChannel.ALL_SHARDS,
+            CoderUtils.encodeToByteArray(StringUtf8Coder.of(), footer));
+      }
+    }
+
+    protected long printLine(int shardNum, byte[] line) throws IOException {
+      long length = line.length;
+      write(shardNum, ByteBuffer.wrap(line));
+
+      if (appendTrailingNewlines) {
+        write(shardNum, ByteBuffer.wrap(NEWLINE));
+        length += NEWLINE.length;
+      }
+
+      return length;
+    }
+
+    protected abstract void write(int shardNum, ByteBuffer buf)
+        throws IOException;
+    protected abstract int getShardNum(T value);
+  }
+
+  /** An unsharded SinkWriter for a TextSink. */
+  class TextFileWriter extends AbstractTextFileWriter {
+    private final WritableByteChannel outputChannel;
+
+    TextFileWriter(WritableByteChannel outputChannel) throws IOException {
+      this.outputChannel = outputChannel;
+      init();
+    }
+
+    @Override
+    public void close() throws IOException {
+      try {
+        super.close();
+      } finally {
+        outputChannel.close();
+      }
+    }
+
+    @Override
+    protected void write(int shardNum, ByteBuffer buf) throws IOException {
+      outputChannel.write(buf);
+    }
+
+    @Override
+    protected int getShardNum(T value) {
+      return 0;
+    }
+  }
+
+  /** A sharding SinkWriter for a TextSink. */
+  class ShardingTextFileWriter extends AbstractTextFileWriter {
+    private final Random rng = new Random();
+    private final int numShards;
+    private final ShardingWritableByteChannel outputChannel;
+
+    // TODO: add support for user-defined sharding function.
+    ShardingTextFileWriter(ShardingWritableByteChannel outputChannel)
+        throws IOException {
+      this.outputChannel = outputChannel;
+      numShards = outputChannel.getNumShards();
+      init();
+    }
+
+    @Override
+    public void close() throws IOException {
+      try {
+        super.close();
+      } finally {
+        outputChannel.close();
+      }
+    }
+
+    @Override
+    protected void write(int shardNum, ByteBuffer buf) throws IOException {
+      outputChannel.writeToShard(shardNum, buf);
+    }
+
+    @Override
+    protected int getShardNum(T value) {
+      return rng.nextInt(numShards);
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
new file mode 100644
index 0000000000000..b48d70b36072c
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
@@ -0,0 +1,64 @@
+/*******************************************************************************
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ ******************************************************************************/
+
+package com.google.cloud.dataflow.sdk.util.common.worker;
+
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
+
+import java.io.IOException;
+
+/**
+ * Abstract base class for Sinks.
+ *
+ * <p>A Sink is written to by getting a SinkWriter and adding values to
+ * it.
+ *
+ * @param <T> the type of the elements written to the sink
+ */
+public abstract class Sink<T> {
+  /**
+   * Returns a Writer that allows writing to this Sink.
+   */
+  public abstract SinkWriter<T> writer() throws IOException;
+
+  /**
+   * Writes to a Sink.
+   */
+  public interface SinkWriter<ElemT> extends AutoCloseable {
+    /**
+     * Adds a value to the sink. Returns the size in bytes of the data written.
+     */
+    public long add(ElemT value) throws IOException;
+
+    @Override
+    public void close() throws IOException;
+  }
+
+  /**
+   * Returns whether this Sink can be restarted.
+   */
+  public boolean supportsRestart() {
+    return false;
+  }
+
+  /**
+   * The default state kind of all the states reported in this Sink.
+   * Defaults to {@link StateKind#USER}.
+   */
+  protected StateKind getStateSamplerStateKind() {
+    return StateKind.USER;
+  }
+}

From 45f5951a0a697b7044f15cd09418854641c9e98a Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Tue, 23 Feb 2016 09:43:54 -0800
Subject: [PATCH 1511/1541] Add the slf4j-jdk bridge to the integration tests

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115350827
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 7d0cba9e1dede..9ff5cd218e3e3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -159,7 +159,7 @@
         <plugin>
           <groupId>org.codehaus.mojo</groupId>
           <artifactId>exec-maven-plugin</artifactId>
-          <version>1.1</version>
+          <version>1.4.0</version>
           <executions>
             <execution>
               <phase>verify</phase>

From 3904c9074e66733686285d09ce5068d28f303dd8 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Tue, 23 Feb 2016 09:53:38 -0800
Subject: [PATCH 1512/1541] Revert "Migrate TextIO.Write to a custom sink"

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115351833
---
 .../google/cloud/dataflow/sdk/io/TextIO.java  | 187 +++++++++---------
 .../sdk/runners/DataflowPipelineRunner.java   | 108 +---------
 .../runners/DataflowPipelineTranslator.java   |   5 +
 .../runners/dataflow/TextIOTranslator.java    |  91 +++++++++
 .../cloud/dataflow/sdk/io/TextIOTest.java     |  22 +++
 .../runners/DataflowPipelineRunnerTest.java   |  21 +-
 .../DataflowPipelineTranslatorTest.java       |   4 +-
 .../sdk/runners/TransformTreeTest.java        |   9 +-
 8 files changed, 238 insertions(+), 209 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index d342f250b2e84..0bb2861831caf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -26,9 +26,11 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.worker.TextSink;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.MimeTypes;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
@@ -37,13 +39,10 @@
 import com.google.protobuf.ByteString;
 
 import java.io.IOException;
-import java.io.OutputStream;
 import java.nio.ByteBuffer;
-import java.nio.channels.Channels;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.channels.SeekableByteChannel;
-import java.nio.channels.WritableByteChannel;
-import java.nio.charset.StandardCharsets;
+import java.util.List;
 import java.util.NoSuchElementException;
 import java.util.regex.Pattern;
 
@@ -67,7 +66,7 @@
  *
  * <p>See the following examples:
  *
- * <pre>{@code
+ * <pre> {@code
  * Pipeline p = ...;
  *
  * // A simple Read of a local file (only runs locally):
@@ -80,7 +79,7 @@
  *     p.apply(TextIO.Read.named("ReadNumbers")
  *                        .from("gs://my_bucket/path/to/numbers-*.txt")
  *                        .withCoder(TextualIntegerCoder.of()));
- * }</pre>
+ * } </pre>
  *
  * <p>To write a {@link PCollection} to one or more text files, use
  * {@link TextIO.Write}, specifying {@link TextIO.Write#to(String)} to specify
@@ -95,7 +94,7 @@
  * will be overwritten.
  *
  * <p>For example:
- * <pre>{@code
+ * <pre> {@code
  * // A simple Write to a local file (only runs locally):
  * PCollection<String> lines = ...;
  * lines.apply(TextIO.Write.to("/path/to/file.txt"));
@@ -107,7 +106,7 @@
  *                           .to("gs://my_bucket/path/to/numbers")
  *                           .withSuffix(".txt")
  *                           .withCoder(TextualIntegerCoder.of()));
- * }</pre>
+ * } </pre>
  *
  * <h3>Permissions</h3>
  * <p>When run using the {@link DirectPipelineRunner}, your pipeline can read and write text files
@@ -478,6 +477,9 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       /** Requested number of shards. 0 for automatic. */
       private final int numShards;
 
+      /** Insert a shuffle before writing to decouple parallelism when numShards != 0. */
+      private final boolean forceReshard;
+
       /** The shard template of each file written, combined with prefix and suffix. */
       private final String shardTemplate;
 
@@ -485,16 +487,17 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       private final boolean validate;
 
       Bound(Coder<T> coder) {
-        this(null, null, "", coder, 0, ShardNameTemplate.INDEX_OF_MAX, true);
+        this(null, null, "", coder, 0, true, ShardNameTemplate.INDEX_OF_MAX, true);
       }
 
       private Bound(String name, String filenamePrefix, String filenameSuffix, Coder<T> coder,
-          int numShards, String shardTemplate, boolean validate) {
+          int numShards, boolean forceReshard, String shardTemplate, boolean validate) {
         super(name);
         this.coder = coder;
         this.filenamePrefix = filenamePrefix;
         this.filenameSuffix = filenameSuffix;
         this.numShards = numShards;
+        this.forceReshard = forceReshard;
         this.shardTemplate = shardTemplate;
         this.validate = validate;
       }
@@ -507,7 +510,7 @@ private Bound(String name, String filenamePrefix, String filenameSuffix, Coder<T
        */
       public Bound<T> named(String name) {
         return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
-            shardTemplate, validate);
+            forceReshard, shardTemplate, validate);
       }
 
       /**
@@ -520,7 +523,7 @@ public Bound<T> named(String name) {
        */
       public Bound<T> to(String filenamePrefix) {
         validateOutputComponent(filenamePrefix);
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
             shardTemplate, validate);
       }
 
@@ -534,7 +537,7 @@ public Bound<T> to(String filenamePrefix) {
        */
       public Bound<T> withSuffix(String nameExtension) {
         validateOutputComponent(nameExtension);
-        return new Bound<>(name, filenamePrefix, nameExtension, coder, numShards,
+        return new Bound<>(name, filenamePrefix, nameExtension, coder, numShards, forceReshard,
             shardTemplate, validate);
       }
 
@@ -553,8 +556,30 @@ public Bound<T> withSuffix(String nameExtension) {
        * @see ShardNameTemplate
        */
       public Bound<T> withNumShards(int numShards) {
+        return withNumShards(numShards, forceReshard);
+      }
+
+      /**
+       * Returns a transform for writing to text files that's like this one but
+       * that uses the provided shard count.
+       *
+       * <p>Constraining the number of shards is likely to reduce
+       * the performance of a pipeline. If forceReshard is true, the output
+       * will be shuffled to obtain the desired sharding. If it is false,
+       * data will not be reshuffled, but parallelism of preceeding stages
+       * may be constrained. Setting this value is not recommended
+       * unless you require a specific number of output files.
+       *
+       * <p>Does not modify this object.
+       *
+       * @param numShards the number of shards to use, or 0 to let the system
+       *                  decide.
+       * @param forceReshard whether to force a reshard to obtain the desired sharding.
+       * @see ShardNameTemplate
+       */
+      private Bound<T> withNumShards(int numShards, boolean forceReshard) {
         Preconditions.checkArgument(numShards >= 0);
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
             shardTemplate, validate);
       }
 
@@ -567,7 +592,7 @@ public Bound<T> withNumShards(int numShards) {
        * @see ShardNameTemplate
        */
       public Bound<T> withShardNameTemplate(String shardTemplate) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
             shardTemplate, validate);
       }
 
@@ -585,7 +610,25 @@ public Bound<T> withShardNameTemplate(String shardTemplate) {
        * <p>Does not modify this object.
        */
       public Bound<T> withoutSharding() {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, 1, "", validate);
+        return withoutSharding(forceReshard);
+      }
+
+      /**
+       * Returns a transform for writing to text files that's like this one but
+       * that forces a single file as output.
+       *
+       * <p>Constraining the number of shards is likely to reduce
+       * the performance of a pipeline. Using this setting is not recommended
+       * unless you truly require a single output file.
+       *
+       * <p>This is a shortcut for
+       * {@code .withNumShards(1, forceReshard).withShardNameTemplate("")}
+       *
+       * <p>Does not modify this object.
+       */
+      private Bound<T> withoutSharding(boolean forceReshard) {
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, 1, forceReshard, "",
+            validate);
       }
 
       /**
@@ -597,7 +640,7 @@ public Bound<T> withoutSharding() {
        * @param <X> the type of the elements of the input {@link PCollection}
        */
       public <X> Bound<X> withCoder(Coder<X> coder) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
             shardTemplate, validate);
       }
 
@@ -612,7 +655,7 @@ public <X> Bound<X> withCoder(Coder<X> coder) {
        * <p>Does not modify this object.
        */
       public Bound<T> withoutValidation() {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
             shardTemplate, false);
       }
 
@@ -622,13 +665,14 @@ public PDone apply(PCollection<T> input) {
           throw new IllegalStateException(
               "need to set the filename prefix of a TextIO.Write transform");
         }
-
-        // Note that custom sinks currently do not expose sharding controls.
-        // Thus pipeline runner writers need to individually add support internally to
-        // apply user requested sharding limits.
-        return input.apply("Write", com.google.cloud.dataflow.sdk.io.Write.to(
-            new TextSink<>(
-                filenamePrefix, filenameSuffix, shardTemplate, coder)));
+        if (numShards > 0 && forceReshard) {
+          // Reshard and re-apply a version of this write without resharding.
+          return input
+              .apply(new FileBasedSink.ReshardForWrite<T>())
+              .apply(withNumShards(numShards, false));
+        } else {
+          return PDone.in(input.getPipeline());
+        }
       }
 
       /**
@@ -666,6 +710,17 @@ public Coder<T> getCoder() {
       public boolean needsValidation() {
         return validate;
       }
+
+      static {
+        DirectPipelineRunner.registerDefaultTransformEvaluator(
+            Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
+              @Override
+              public void evaluate(
+                  Bound transform, DirectPipelineRunner.EvaluationContext context) {
+                evaluateWriteHelper(transform, context);
+              }
+            });
+      }
     }
   }
 
@@ -923,70 +978,24 @@ private boolean tryToEnsureNumberOfBytesInBuffer(int minCapacity) throws IOExcep
     }
   }
 
-  /**
-   * A {@link FileBasedSink} for text files. Produces text files with the new line separator
-   * {@code '\n'} represented in {@code UTF-8} format as the record separator.
-   * Each record (including the last) is terminated.
-   */
-  @VisibleForTesting
-  static class TextSink<T> extends FileBasedSink<T> {
-    private final Coder<T> coder;
-
-    @VisibleForTesting
-    TextSink(
-        String baseOutputFilename, String extension, String fileNameTemplate, Coder<T> coder) {
-      super(baseOutputFilename, extension, fileNameTemplate);
-      this.coder = coder;
+  private static <T> void evaluateWriteHelper(
+      Write.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
+    List<T> elems = context.getPCollection(context.getInput(transform));
+    int numShards = transform.numShards;
+    if (numShards < 1) {
+      // System gets to choose. For direct mode, choose 1.
+      numShards = 1;
     }
-
-    @Override
-    public FileBasedSink.FileBasedWriteOperation<T> createWriteOperation(PipelineOptions options) {
-      return new TextWriteOperation<>(this, coder);
-    }
-
-    /**
-     * A {@link com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriteOperation
-     * FileBasedWriteOperation} for text files.
-     */
-    private static class TextWriteOperation<T> extends FileBasedWriteOperation<T> {
-      private final Coder<T> coder;
-
-      private TextWriteOperation(TextSink<T> sink, Coder<T> coder) {
-        super(sink);
-        this.coder = coder;
-      }
-
-      @Override
-      public FileBasedWriter<T> createWriter(PipelineOptions options) throws Exception {
-        return new TextWriter<>(this, coder);
-      }
-    }
-
-    /**
-     * A {@link com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriter FileBasedWriter}
-     * for text files.
-     */
-    private static class TextWriter<T> extends FileBasedWriter<T> {
-      private static final byte[] NEWLINE = "\n".getBytes(StandardCharsets.UTF_8);
-      private final Coder<T> coder;
-      private OutputStream out;
-
-      public TextWriter(FileBasedWriteOperation<T> writeOperation, Coder<T> coder) {
-        super(writeOperation);
-        this.mimeType = MimeTypes.TEXT;
-        this.coder = coder;
-      }
-
-      @Override
-      protected void prepareWrite(WritableByteChannel channel) throws Exception {
-        out = Channels.newOutputStream(channel);
-      }
-
-      @Override
-      public void write(T value) throws Exception {
-        coder.encode(value, out, Context.OUTER);
-        out.write(NEWLINE);
-      }
+    TextSink<WindowedValue<T>> writer = TextSink.createForDirectPipelineRunner(
+        transform.filenamePrefix, transform.getShardNameTemplate(), transform.filenameSuffix,
+        numShards, true, null, null, transform.coder);
+    try (Sink.SinkWriter<WindowedValue<T>> sink = writer.writer()) {
+      for (T elem : elems) {
+        sink.add(WindowedValue.valueInGlobalWindow(elem));
+      }
+    } catch (IOException exn) {
+      throw new RuntimeException(
+          "unable to write to output file \"" + transform.filenamePrefix + "\"", exn);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 396d308c3758d..5a57f7fc43dcf 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -339,7 +339,6 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
       builder.put(Window.Bound.class, AssignWindows.class);
       builder.put(Write.Bound.class, BatchWrite.class);
       builder.put(AvroIO.Write.Bound.class, BatchAvroIOWrite.class);
-      builder.put(TextIO.Write.Bound.class, BatchTextIOWrite.class);
       if (options.getExperiments() == null
           || !options.getExperiments().contains("disable_ism_side_input")) {
         builder.put(View.AsMap.class, BatchViewAsMap.class);
@@ -1994,111 +1993,6 @@ public PDone apply(PCollection<T> input) {
     }
   }
 
-  /**
-   * Specialized implementation which overrides
-   * {@link com.google.cloud.dataflow.sdk.io.TextIO.Write.Bound TextIO.Write.Bound} with
-   * a native sink instead of a custom sink as workaround until custom sinks
-   * have support for sharding controls.
-   */
-  private static class BatchTextIOWrite<T> extends PTransform<PCollection<T>, PDone> {
-    private final TextIO.Write.Bound<T> transform;
-    /**
-     * Builds an instance of this class from the overridden transform.
-     */
-    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public BatchTextIOWrite(DataflowPipelineRunner runner, TextIO.Write.Bound<T> transform) {
-      this.transform = transform;
-    }
-
-    @Override
-    public PDone apply(PCollection<T> input) {
-      if (transform.getNumShards() > 0) {
-        return input
-            .apply(new ReshardForWrite<T>())
-            .apply(new BatchTextIONativeWrite<>(transform));
-      } else {
-        return transform.apply(input);
-      }
-    }
-  }
-
-  /**
-   * This {@link PTransform} is used by the {@link DataflowPipelineTranslator} as a way
-   * to provide the native definition of the Text sink.
-   */
-  private static class BatchTextIONativeWrite<T> extends PTransform<PCollection<T>, PDone> {
-    private final TextIO.Write.Bound<T> transform;
-    public BatchTextIONativeWrite(TextIO.Write.Bound<T> transform) {
-      this.transform = transform;
-    }
-
-    @Override
-    public PDone apply(PCollection<T> input) {
-      return PDone.in(input.getPipeline());
-    }
-
-    static {
-      DataflowPipelineTranslator.registerTransformTranslator(
-          BatchTextIONativeWrite.class, new BatchTextIONativeWriteTranslator());
-    }
-  }
-
-  /**
-   * TextIO.Write.Bound support code for the Dataflow backend when applying parallelism limits
-   * through user requested sharding limits.
-   */
-  private static class BatchTextIONativeWriteTranslator
-      implements TransformTranslator<BatchTextIONativeWrite<?>> {
-    @SuppressWarnings("unchecked")
-    @Override
-    public void translate(@SuppressWarnings("rawtypes") BatchTextIONativeWrite transform,
-        TranslationContext context) {
-      translateWriteHelper(transform, transform.transform, context);
-    }
-
-    private <T> void translateWriteHelper(
-        BatchTextIONativeWrite<T> transform,
-        TextIO.Write.Bound<T> originalTransform,
-        TranslationContext context) {
-      // Note that the original transform can not be used during add step/add input
-      // and is only passed in to get properties from it.
-
-      checkState(originalTransform.getNumShards() > 0,
-          "Native TextSink is expected to only be used when sharding controls are required.");
-
-      context.addStep(transform, "ParallelWrite");
-      context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
-
-      // TODO: drop this check when server supports alternative templates.
-      switch (originalTransform.getShardTemplate()) {
-        case ShardNameTemplate.INDEX_OF_MAX:
-          break;  // supported by server
-        case "":
-          // Empty shard template allowed - forces single output.
-          Preconditions.checkArgument(originalTransform.getNumShards() <= 1,
-              "Num shards must be <= 1 when using an empty sharding template");
-          break;
-        default:
-          throw new UnsupportedOperationException("Shard template "
-              + originalTransform.getShardTemplate()
-              + " not yet supported by Dataflow service");
-      }
-
-      // TODO: How do we want to specify format and
-      // format-specific properties?
-      context.addInput(PropertyNames.FORMAT, "text");
-      context.addInput(PropertyNames.FILENAME_PREFIX, originalTransform.getFilenamePrefix());
-      context.addInput(PropertyNames.SHARD_NAME_TEMPLATE,
-          originalTransform.getShardNameTemplate());
-      context.addInput(PropertyNames.FILENAME_SUFFIX, originalTransform.getFilenameSuffix());
-      context.addInput(PropertyNames.VALIDATE_SINK, originalTransform.needsValidation());
-      context.addInput(PropertyNames.NUM_SHARDS, (long) originalTransform.getNumShards());
-      context.addEncodingInput(
-          WindowedValue.getValueOnlyCoder(originalTransform.getCoder()));
-
-    }
-  }
-
   /**
    * Specialized implementation which overrides
    * {@link com.google.cloud.dataflow.sdk.io.AvroIO.Write.Bound AvroIO.Write.Bound} with
@@ -2194,7 +2088,9 @@ private <T> void translateWriteHelper(
       context.addInput(PropertyNames.SHARD_NAME_TEMPLATE, originalTransform.getShardTemplate());
       context.addInput(PropertyNames.FILENAME_SUFFIX, originalTransform.getFilenameSuffix());
       context.addInput(PropertyNames.VALIDATE_SINK, originalTransform.needsValidation());
+
       context.addInput(PropertyNames.NUM_SHARDS, (long) originalTransform.getNumShards());
+
       context.addEncodingInput(
           WindowedValue.getValueOnlyCoder(
               AvroCoder.of(originalTransform.getType(), originalTransform.getSchema())));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 22ec3bb5c43b1..885260ebb2518 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -44,12 +44,14 @@
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.io.Read;
+import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.GroupByKeyAndSortValuesOnly;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BigQueryIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.ReadTranslator;
+import com.google.cloud.dataflow.sdk.runners.dataflow.TextIOTranslator;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.Create;
@@ -1035,6 +1037,9 @@ private <T> void translateHelper(
         DataflowPipelineRunner.StreamingPubsubIOWrite.class,
         new PubsubIOTranslator.WriteTranslator());
 
+    registerTransformTranslator(
+        TextIO.Write.Bound.class, new TextIOTranslator.WriteTranslator());
+
     registerTransformTranslator(Read.Bounded.class, new ReadTranslator());
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
new file mode 100644
index 0000000000000..d6c96c31bd80a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.dataflow;
+
+import com.google.cloud.dataflow.sdk.io.ShardNameTemplate;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
+import com.google.cloud.dataflow.sdk.util.PathValidator;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.common.base.Preconditions;
+
+/**
+ * TextIO transform support code for the Dataflow backend.
+ */
+public class TextIOTranslator {
+  /**
+   * Implements TextIO Write translation for the Dataflow backend.
+   */
+  @SuppressWarnings({"rawtypes", "unchecked"})
+  public static class WriteTranslator implements TransformTranslator<TextIO.Write.Bound> {
+    @Override
+    public void translate(
+        TextIO.Write.Bound transform,
+        TranslationContext context) {
+      translateWriteHelper(transform, context);
+    }
+
+    private <T> void translateWriteHelper(
+        TextIO.Write.Bound<T> transform,
+        TranslationContext context) {
+      if (context.getPipelineOptions().isStreaming()) {
+        throw new IllegalArgumentException("TextIO not supported in streaming mode.");
+      }
+
+      PathValidator validator = context.getPipelineOptions().getPathValidator();
+      String filenamePrefix = validator.validateOutputFilePrefixSupported(
+          transform.getFilenamePrefix());
+
+      context.addStep(transform, "ParallelWrite");
+      context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
+
+      // TODO: drop this check when server supports alternative templates.
+      switch (transform.getShardTemplate()) {
+        case ShardNameTemplate.INDEX_OF_MAX:
+          break;  // supported by server
+        case "":
+          // Empty shard template allowed - forces single output.
+          Preconditions.checkArgument(transform.getNumShards() <= 1,
+              "Num shards must be <= 1 when using an empty sharding template");
+          break;
+        default:
+          throw new UnsupportedOperationException("Shard template "
+              + transform.getShardTemplate()
+              + " not yet supported by Dataflow service");
+      }
+
+      // TODO: How do we want to specify format and
+      // format-specific properties?
+      context.addInput(PropertyNames.FORMAT, "text");
+      context.addInput(PropertyNames.FILENAME_PREFIX, filenamePrefix);
+      context.addInput(PropertyNames.SHARD_NAME_TEMPLATE,
+          transform.getShardNameTemplate());
+      context.addInput(PropertyNames.FILENAME_SUFFIX, transform.getFilenameSuffix());
+      context.addInput(PropertyNames.VALIDATE_SINK, transform.needsValidation());
+
+      long numShards = transform.getNumShards();
+      if (numShards > 0) {
+        context.addInput(PropertyNames.NUM_SHARDS, numShards);
+      }
+
+      context.addEncodingInput(
+          WindowedValue.getValueOnlyCoder(transform.getCoder()));
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
index 0a8e3811085f7..6ad81e4ea0c4a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -263,6 +263,28 @@ public void testWriteEmptyInts() throws Exception {
     runTestWrite(NO_INTS_ARRAY, TextualIntegerCoder.of());
   }
 
+  @Test
+  public void testWriteSharded() throws IOException {
+    File outFolder = tmpFolder.newFolder();
+    String filename = outFolder.toPath().resolve("output").toString();
+
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input =
+        p.apply(Create.of(Arrays.asList(LINES_ARRAY))
+            .withCoder(StringUtf8Coder.of()));
+
+    input.apply(TextIO.Write.to(filename).withNumShards(2).withSuffix(".txt"));
+
+    p.run();
+
+    String[] files = outFolder.list();
+
+    assertThat(Arrays.asList(files),
+        containsInAnyOrder("output-00000-of-00002.txt",
+                           "output-00001-of-00002.txt"));
+  }
+
   @Test
   public void testWriteNamed() {
     {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index c5f2d3fe71fd1..c7175cb3b05f5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -453,12 +453,16 @@ public void testNonGcsFilePathInReadFailure() throws IOException {
 
   @Test
   public void testNonGcsFilePathInWriteFailure() throws IOException {
-    Pipeline p = buildDataflowPipeline(buildPipelineOptions());
-    PCollection<String> pc = p.apply(TextIO.Read.named("ReadMyGcsFile").from("gs://bucket/object"));
+    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+    Pipeline p = buildDataflowPipeline(buildPipelineOptions(jobCaptor));
+    p.apply(TextIO.Read.named("ReadMyGcsFile").from("gs://bucket/object"))
+        .apply(TextIO.Write.named("WriteMyNonGcsFile").to("/tmp/file"));
 
     thrown.expect(IllegalArgumentException.class);
     thrown.expectMessage(containsString("expected a valid 'gs://' path but was given"));
-    pc.apply(TextIO.Write.named("WriteMyNonGcsFile").to("/tmp/file"));
+    p.run();
+    assertValidJob(jobCaptor.getValue());
   }
 
   @Test
@@ -478,12 +482,17 @@ public void testMultiSlashGcsFileReadPath() throws IOException {
 
   @Test
   public void testMultiSlashGcsFileWritePath() throws IOException {
-    Pipeline p = buildDataflowPipeline(buildPipelineOptions());
-    PCollection<String> pc = p.apply(TextIO.Read.named("ReadMyGcsFile").from("gs://bucket/object"));
+    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+    Pipeline p = buildDataflowPipeline(buildPipelineOptions(jobCaptor));
+    p.apply(TextIO.Read.named("ReadMyGcsFile").from("gs://bucket/object"))
+        .apply(TextIO.Write.named("WriteInvalidGcsFile")
+            .to("gs://bucket/tmp//file"));
 
     thrown.expect(IllegalArgumentException.class);
     thrown.expectMessage("consecutive slashes");
-    pc.apply(TextIO.Write.named("WriteInvalidGcsFile").to("gs://bucket/tmp//file"));
+    p.run();
+    assertValidJob(jobCaptor.getValue());
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index 72090a0866a62..b9c94ad00b86b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -403,7 +403,7 @@ public void testPredefinedAddStep() throws Exception {
     pipeline.apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/in"))
         .apply(ParDo.of(new NoOpFn()))
         .apply(new EmbeddedTransform(predefinedStep.clone()))
-        .apply(ParDo.of(new NoOpFn()));
+        .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/out"));
     Job job = translator.translate(
         pipeline, pipeline.getRunner(), Collections.<DataflowPackage>emptyList()).getJob();
 
@@ -456,7 +456,7 @@ private static Step createPredefinedStep() throws Exception {
     Job job = translator.translate(
         pipeline, pipeline.getRunner(), Collections.<DataflowPackage>emptyList()).getJob();
 
-    assertEquals(13, job.getSteps().size());
+    assertEquals(3, job.getSteps().size());
     Step step = job.getSteps().get(1);
     assertEquals(stepName, getString(step.getProperties(), PropertyNames.USER_NAME));
     return step;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
index 68e1db1a52f2a..f1b7cd7fd0169 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
@@ -28,7 +28,6 @@
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.io.Write;
 import com.google.cloud.dataflow.sdk.transforms.Count;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -134,12 +133,9 @@ public void enterCompositeTransform(TransformTreeNode node) {
           assertTrue(visited.add(TransformsSeen.SAMPLE_ANY));
           assertNotNull(node.getEnclosingNode());
           assertTrue(node.isCompositeNode());
-        } else if (transform instanceof Write.Bound) {
-          assertTrue(visited.add(TransformsSeen.WRITE));
-          assertNotNull(node.getEnclosingNode());
-          assertTrue(node.isCompositeNode());
         }
         assertThat(transform, not(instanceOf(Read.Bounded.class)));
+        assertThat(transform, not(instanceOf(TextIO.Write.Bound.class)));
       }
 
       @Override
@@ -155,9 +151,10 @@ public void visitTransform(TransformTreeNode node) {
         PTransform<?, ?> transform = node.getTransform();
         // Pick is a composite, should not be visited here.
         assertThat(transform, not(instanceOf(Sample.SampleAny.class)));
-        assertThat(transform, not(instanceOf(Write.Bound.class)));
         if (transform instanceof Read.Bounded) {
           assertTrue(visited.add(TransformsSeen.READ));
+        } else if (transform instanceof TextIO.Write.Bound) {
+          assertTrue(visited.add(TransformsSeen.WRITE));
         }
       }
 

From 635541a74438a02d18910fba6a76c5aacc2c7816 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Tue, 23 Feb 2016 10:12:17 -0800
Subject: [PATCH 1513/1541] Add KeyedWorkItemCoder

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115354168
---
 .../dataflow/sdk/util/KeyedWorkItemCoder.java | 120 ++++++++++++++++++
 .../dataflow/sdk/util/KeyedWorkItems.java     |  29 ++++-
 .../sdk/util/KeyedWorkItemCoderTest.java      |  61 +++++++++
 3 files changed, 209 insertions(+), 1 deletion(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItemCoder.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItemCoderTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItemCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItemCoder.java
new file mode 100644
index 0000000000000..398e82a8d688f
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItemCoder.java
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerDataCoder;
+import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
+import com.google.common.collect.ImmutableList;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.List;
+
+/**
+ * A {@link Coder} for {@link KeyedWorkItem KeyedWorkItems}.
+ */
+public class KeyedWorkItemCoder<K, ElemT> extends StandardCoder<KeyedWorkItem<K, ElemT>> {
+  /**
+   * Create a new {@link KeyedWorkItemCoder} with the provided key coder, element coder, and window
+   * coder.
+   */
+  public static <K, ElemT> KeyedWorkItemCoder<K, ElemT> of(
+      Coder<K> keyCoder, Coder<ElemT> elemCoder, Coder<? extends BoundedWindow> windowCoder) {
+    return new KeyedWorkItemCoder<>(keyCoder, elemCoder, windowCoder);
+  }
+
+  @JsonCreator
+  public static <K, ElemT> KeyedWorkItemCoder<K, ElemT> of(
+      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS) List<Coder<?>> components) {
+    checkArgument(components.size() == 3, "Expecting 3 components, got %s", components.size());
+    @SuppressWarnings("unchecked")
+    Coder<K> keyCoder = (Coder<K>) components.get(0);
+    @SuppressWarnings("unchecked")
+    Coder<ElemT> elemCoder = (Coder<ElemT>) components.get(1);
+    @SuppressWarnings("unchecked")
+    Coder<? extends BoundedWindow> windowCoder = (Coder<? extends BoundedWindow>) components.get(2);
+    return new KeyedWorkItemCoder<>(keyCoder, elemCoder, windowCoder);
+  }
+
+  private final Coder<K> keyCoder;
+  private final Coder<ElemT> elemCoder;
+  private final Coder<? extends BoundedWindow> windowCoder;
+  private final Coder<Iterable<TimerData>> timersCoder;
+  private final Coder<Iterable<WindowedValue<ElemT>>> elemsCoder;
+
+  private KeyedWorkItemCoder(
+      Coder<K> keyCoder, Coder<ElemT> elemCoder, Coder<? extends BoundedWindow> windowCoder) {
+    this.keyCoder = keyCoder;
+    this.elemCoder = elemCoder;
+    this.windowCoder = windowCoder;
+    this.timersCoder = IterableCoder.of(TimerDataCoder.of(windowCoder));
+    this.elemsCoder = IterableCoder.of(FullWindowedValueCoder.of(elemCoder, windowCoder));
+  }
+
+  @Override
+  public void encode(KeyedWorkItem<K, ElemT> value, OutputStream outStream, Coder.Context context)
+      throws CoderException, IOException {
+    Coder.Context nestedContext = context.nested();
+    keyCoder.encode(value.key(), outStream, nestedContext);
+    timersCoder.encode(value.timersIterable(), outStream, nestedContext);
+    elemsCoder.encode(value.elementsIterable(), outStream, nestedContext);
+  }
+
+  @Override
+  public KeyedWorkItem<K, ElemT> decode(InputStream inStream, Coder.Context context)
+      throws CoderException, IOException {
+    Coder.Context nestedContext = context.nested();
+    K key = keyCoder.decode(inStream, nestedContext);
+    Iterable<TimerData> timers = timersCoder.decode(inStream, nestedContext);
+    Iterable<WindowedValue<ElemT>> elems = elemsCoder.decode(inStream, nestedContext);
+    return KeyedWorkItems.workItem(key, timers, elems);
+  }
+
+  @Override
+  public List<? extends Coder<?>> getCoderArguments() {
+    return ImmutableList.of(keyCoder, elemCoder, windowCoder);
+  }
+
+  @Override
+  public void verifyDeterministic() throws Coder.NonDeterministicException {
+    keyCoder.verifyDeterministic();
+    timersCoder.verifyDeterministic();
+    elemsCoder.verifyDeterministic();
+  }
+
+  /**
+   * {@inheritDoc}.
+   *
+   * {@link KeyedWorkItemCoder} is not consistent with equals as it can return a
+   * {@link KeyedWorkItem} of a type different from the originally encoded type.
+   */
+  @Override
+  public boolean consistentWithEquals() {
+    return false;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItems.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItems.java
index b94397c2221b4..734bd2c537e27 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItems.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItems.java
@@ -16,8 +16,11 @@
 package com.google.cloud.dataflow.sdk.util;
 
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
+import com.google.common.base.MoreObjects;
+import com.google.common.collect.Iterables;
 
 import java.util.Collections;
+import java.util.Objects;
 
 /**
  * Static utility methods that provide {@link KeyedWorkItem} implementations.
@@ -63,7 +66,6 @@ public static <K, ElemT> KeyedWorkItem<K, ElemT> workItem(
    * iterable.
    */
   public static class ComposedKeyedWorkItem<K, ElemT> implements KeyedWorkItem<K, ElemT> {
-
     private final K key;
     private final Iterable<TimerData> timers;
     private final Iterable<WindowedValue<ElemT>> elements;
@@ -89,5 +91,30 @@ public Iterable<TimerData> timersIterable() {
     public Iterable<WindowedValue<ElemT>> elementsIterable() {
       return elements;
     }
+
+    @Override
+    public boolean equals(Object other) {
+      if (other == null || !(other instanceof ComposedKeyedWorkItem)) {
+        return false;
+      }
+      KeyedWorkItem<?, ?> that = (KeyedWorkItem<?, ?>) other;
+      return Objects.equals(this.key, that.key())
+          && Iterables.elementsEqual(this.timersIterable(), that.timersIterable())
+          && Iterables.elementsEqual(this.elementsIterable(), that.elementsIterable());
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(key, timers, elements);
+    }
+
+    @Override
+    public String toString() {
+      return MoreObjects.toStringHelper(ComposedKeyedWorkItem.class)
+          .add("key", key)
+          .add("elements", elements)
+          .add("timers", timers)
+          .toString();
+    }
   }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItemCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItemCoderTest.java
new file mode 100644
index 0000000000000..e6cd454fbec30
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItemCoderTest.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+import com.google.common.collect.ImmutableList;
+
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link KeyedWorkItems}.
+ */
+@RunWith(JUnit4.class)
+public class KeyedWorkItemCoderTest {
+  @Test
+  public void testCoderProperties() throws Exception {
+    CoderProperties.coderSerializable(
+        KeyedWorkItemCoder.of(StringUtf8Coder.of(), VarIntCoder.of(), GlobalWindow.Coder.INSTANCE));
+  }
+
+  @Test
+  public void testEncodeDecodeEqual() throws Exception {
+    Iterable<TimerData> timers =
+        ImmutableList.<TimerData>of(
+            TimerData.of(StateNamespaces.global(), new Instant(500L), TimeDomain.EVENT_TIME));
+    Iterable<WindowedValue<Integer>> elements =
+        ImmutableList.of(
+            WindowedValue.valueInGlobalWindow(1),
+            WindowedValue.valueInGlobalWindow(4),
+            WindowedValue.valueInGlobalWindow(8));
+
+    KeyedWorkItemCoder<String, Integer> coder =
+        KeyedWorkItemCoder.of(StringUtf8Coder.of(), VarIntCoder.of(), GlobalWindow.Coder.INSTANCE);
+
+    CoderProperties.coderDecodeEncodeEqual(coder, KeyedWorkItems.workItem("foo", timers, elements));
+    CoderProperties.coderDecodeEncodeEqual(coder, KeyedWorkItems.elementsWorkItem("foo", elements));
+    CoderProperties.coderDecodeEncodeEqual(
+        coder, KeyedWorkItems.<String, Integer>timersWorkItem("foo", timers));
+  }
+}

From 2e89a4b3ddfd5d395aa8d6c4f73b501712b907a7 Mon Sep 17 00:00:00 2001
From: sgmc <sgmc@google.com>
Date: Tue, 23 Feb 2016 10:21:54 -0800
Subject: [PATCH 1514/1541] Revert "Migrate AvroIO.Write to a custom sink"

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115355272
---
 .../google/cloud/dataflow/sdk/io/AvroIO.java  | 188 +++++++++---------
 .../sdk/runners/DataflowPipelineRunner.java   | 179 -----------------
 .../runners/DataflowPipelineTranslator.java   |   5 +
 .../runners/dataflow/AvroIOTranslator.java    |  87 ++++++++
 .../sdk/io/AvroIOGeneratedClassTest.java      |  48 ++---
 .../cloud/dataflow/sdk/io/AvroIOTest.java     |  45 -----
 6 files changed, 215 insertions(+), 337 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index 70420105c5ca6..9ee7e6b13cc93 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -22,25 +22,24 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.io.Read.Bounded;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.worker.AvroSink;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.MimeTypes;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 
 import org.apache.avro.Schema;
-import org.apache.avro.file.DataFileWriter;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.reflect.ReflectData;
 
 import java.io.IOException;
-import java.nio.channels.Channels;
-import java.nio.channels.WritableByteChannel;
+import java.util.List;
 import java.util.regex.Pattern;
 
 import javax.annotation.Nullable;
@@ -318,7 +317,7 @@ public PCollection<T> apply(PInput input) {
                 : com.google.cloud.dataflow.sdk.io.Read.from(
                     AvroSource.from(filepattern).withSchema(type));
 
-        PCollection<T> pcol = input.getPipeline().apply("Read", read);
+        PCollection<T> pcol = input.getPipeline().apply(read);
         // Honor the default output coder that would have been used by this PTransform.
         pcol.setCoder(getDefaultOutputCoder());
         return pcol;
@@ -474,6 +473,8 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       final int numShards;
       /** Shard template string. */
       final String shardTemplate;
+      /** Insert a shuffle before writing to decouple parallelism when numShards != 0. */
+      final boolean forceReshard;
       /** The class type of the records. */
       final Class<T> type;
       /** The schema of the output file. */
@@ -483,16 +484,18 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       final boolean validate;
 
       Bound(Class<T> type) {
-        this(null, null, "", 0, ShardNameTemplate.INDEX_OF_MAX, type, null, true);
+        this(null, null, "", 0, ShardNameTemplate.INDEX_OF_MAX, true, type, null, true);
       }
 
       Bound(String name, String filenamePrefix, String filenameSuffix, int numShards,
-          String shardTemplate, Class<T> type, Schema schema, boolean validate) {
+          String shardTemplate, boolean forceReshard, Class<T> type, Schema schema,
+          boolean validate) {
         super(name);
         this.filenamePrefix = filenamePrefix;
         this.filenameSuffix = filenameSuffix;
         this.numShards = numShards;
         this.shardTemplate = shardTemplate;
+        this.forceReshard = forceReshard;
         this.type = type;
         this.schema = schema;
         this.validate = validate;
@@ -506,7 +509,7 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
        */
       public Bound<T> named(String name) {
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard,
             type, schema, validate);
       }
 
@@ -522,7 +525,7 @@ public Bound<T> named(String name) {
       public Bound<T> to(String filenamePrefix) {
         validateOutputComponent(filenamePrefix);
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard,
             type, schema, validate);
       }
 
@@ -537,7 +540,7 @@ public Bound<T> to(String filenamePrefix) {
       public Bound<T> withSuffix(String filenameSuffix) {
         validateOutputComponent(filenameSuffix);
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type,
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard, type,
             schema, validate);
       }
 
@@ -556,9 +559,31 @@ public Bound<T> withSuffix(String filenameSuffix) {
        * @see ShardNameTemplate
        */
       public Bound<T> withNumShards(int numShards) {
+        return withNumShards(numShards, forceReshard);
+      }
+
+      /**
+       * Returns a new {@link PTransform} that's like this one but
+       * that uses the provided shard count.
+       *
+       * <p>Constraining the number of shards is likely to reduce
+       * the performance of a pipeline. If forceReshard is true, the output
+       * will be shuffled to obtain the desired sharding. If it is false,
+       * data will not be reshuffled, but parallelism of preceeding stages
+       * may be constrained. Setting this value is not recommended
+       * unless you require a specific number of output files.
+       *
+       * <p>Does not modify this object.
+       *
+       * @param numShards the number of shards to use, or 0 to let the system
+       *                  decide.
+       * @param forceReshard whether to force a reshard to obtain the desired sharding.
+       * @see ShardNameTemplate
+       */
+      private Bound<T> withNumShards(int numShards, boolean forceReshard) {
         Preconditions.checkArgument(numShards >= 0);
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard,
             type, schema, validate);
       }
 
@@ -572,7 +597,7 @@ public Bound<T> withNumShards(int numShards) {
        */
       public Bound<T> withShardNameTemplate(String shardTemplate) {
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard,
             type, schema, validate);
       }
 
@@ -586,7 +611,22 @@ public Bound<T> withShardNameTemplate(String shardTemplate) {
        * <p>Does not modify this object.
        */
       public Bound<T> withoutSharding() {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, 1, "",
+        return withoutSharding(forceReshard);
+      }
+
+      /**
+       * Returns a new {@link PTransform} that's like this one but
+       * that forces a single file as output.
+       *
+       * <p>This is a shortcut for
+       * {@code .withNumShards(1, forceReshard).withShardNameTemplate("")}
+       *
+       * <p>Does not modify this object.
+       *
+       * @param forceReshard whether to force a reshard to obtain the desired sharding.
+       */
+      private Bound<T> withoutSharding(boolean forceReshard) {
+        return new Bound<>(name, filenamePrefix, filenameSuffix, 1, "", forceReshard,
             type, schema, validate);
       }
 
@@ -601,7 +641,7 @@ public Bound<T> withoutSharding() {
        */
       public <X> Bound<X> withSchema(Class<X> type) {
         return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
-            type, ReflectData.get().getSchema(type), validate);
+            forceReshard, type, ReflectData.get().getSchema(type), validate);
       }
 
       /**
@@ -613,7 +653,7 @@ public <X> Bound<X> withSchema(Class<X> type) {
        */
       public Bound<GenericRecord> withSchema(Schema schema) {
         return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
-            GenericRecord.class, schema, validate);
+            forceReshard, GenericRecord.class, schema, validate);
       }
 
       /**
@@ -639,7 +679,7 @@ public Bound<GenericRecord> withSchema(String schema) {
        */
       public Bound<T> withoutValidation() {
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard,
             type, schema, false);
       }
 
@@ -653,12 +693,14 @@ public PDone apply(PCollection<T> input) {
           throw new IllegalStateException("need to set the schema of an AvroIO.Write transform");
         }
 
-        // Note that custom sinks currently do not expose sharding controls.
-        // Thus pipeline runner writers need to individually add support internally to
-        // apply user requested sharding limits.
-        return input.apply("Write", com.google.cloud.dataflow.sdk.io.Write.to(
-            new AvroSink<>(
-                filenamePrefix, filenameSuffix, shardTemplate, AvroCoder.of(type, schema))));
+        if (numShards > 0 && forceReshard) {
+          // Reshard and re-apply a version of this write without resharding.
+          return input
+              .apply(new FileBasedSink.ReshardForWrite<T>())
+              .apply(withNumShards(numShards, false));
+        } else {
+          return PDone.in(input.getPipeline());
+        }
       }
 
       /**
@@ -700,6 +742,21 @@ public Schema getSchema() {
       public boolean needsValidation() {
         return validate;
       }
+
+      static {
+        @SuppressWarnings("rawtypes")
+        DirectPipelineRunner.TransformEvaluator<Bound> transformEvaluator =
+            new DirectPipelineRunner.TransformEvaluator<Bound>() {
+          @Override
+          @SuppressWarnings("unchecked")
+          public void evaluate(
+              Bound transform, DirectPipelineRunner.EvaluationContext context) {
+            evaluateWriteHelper(transform, context);
+          }
+        };
+        DirectPipelineRunner.registerDefaultTransformEvaluator(
+            Bound.class, transformEvaluator);
+      }
     }
 
     /** Disallow construction of utility class. */
@@ -722,72 +779,25 @@ private static void validateOutputComponent(String partialFilePattern) {
   /** Disallow construction of utility class. */
   private AvroIO() {}
 
-  /**
-   * A {@link FileBasedSink} for Avro files.
-   */
-  @VisibleForTesting
-  static class AvroSink<T> extends FileBasedSink<T> {
-    private final AvroCoder<T> coder;
-
-    @VisibleForTesting
-    AvroSink(
-        String baseOutputFilename, String extension, String fileNameTemplate, AvroCoder<T> coder) {
-      super(baseOutputFilename, extension, fileNameTemplate);
-      this.coder = coder;
-    }
-
-    @Override
-    public FileBasedSink.FileBasedWriteOperation<T> createWriteOperation(PipelineOptions options) {
-      return new AvroWriteOperation<>(this, coder);
-    }
-
-    /**
-     * A {@link com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriteOperation
-     * FileBasedWriteOperation} for Avro files.
-     */
-    private static class AvroWriteOperation<T> extends FileBasedWriteOperation<T> {
-      private final AvroCoder<T> coder;
-
-      private AvroWriteOperation(AvroSink<T> sink, AvroCoder<T> coder) {
-        super(sink);
-        this.coder = coder;
-      }
-
-      @Override
-      public FileBasedWriter<T> createWriter(PipelineOptions options) throws Exception {
-        return new AvroWriter<>(this, coder);
-      }
+  private static <T> void evaluateWriteHelper(
+      Write.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
+    List<WindowedValue<T>> elems =
+        context.getPCollectionWindowedValues(context.getInput(transform));
+    int numShards = transform.numShards;
+    if (numShards < 1) {
+      // System gets to choose. For direct mode, choose 1.
+      numShards = 1;
     }
-
-    /**
-     * A {@link com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriter FileBasedWriter}
-     * for Avro files.
-     */
-    private static class AvroWriter<T> extends FileBasedWriter<T> {
-      private final AvroCoder<T> coder;
-      private DataFileWriter<T> dataFileWriter;
-
-      public AvroWriter(FileBasedWriteOperation<T> writeOperation, AvroCoder<T> coder) {
-        super(writeOperation);
-        this.mimeType = MimeTypes.BINARY;
-        this.coder = coder;
-      }
-
-      @Override
-      protected void prepareWrite(WritableByteChannel channel) throws Exception {
-        dataFileWriter = new DataFileWriter<>(coder.createDatumWriter());
-        dataFileWriter.create(coder.getSchema(), Channels.newOutputStream(channel));
-      }
-
-      @Override
-      public void write(T value) throws Exception {
-        dataFileWriter.append(value);
-      }
-
-      @Override
-      protected void writeFooter() throws Exception {
-        dataFileWriter.close();
-      }
+    AvroSink<T> writer = new AvroSink<>(transform.filenamePrefix, transform.shardTemplate,
+        transform.filenameSuffix, numShards,
+        WindowedValue.getValueOnlyCoder(AvroCoder.of(transform.type, transform.schema)));
+    try (Sink.SinkWriter<WindowedValue<T>> sink = writer.writer()) {
+      for (WindowedValue<T> elem : elems) {
+        sink.add(elem);
+      }
+    } catch (IOException exn) {
+      throw new RuntimeException(
+          "unable to write to output file \"" + transform.filenamePrefix + "\"", exn);
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 5a57f7fc43dcf..f20caa3b962b2 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -32,7 +32,6 @@
 import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
 import com.google.cloud.dataflow.sdk.PipelineResult.State;
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -49,10 +48,8 @@
 import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
 import com.google.cloud.dataflow.sdk.io.AvroIO;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.io.FileBasedSink;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.io.Read;
-import com.google.cloud.dataflow.sdk.io.ShardNameTemplate;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
 import com.google.cloud.dataflow.sdk.io.Write;
@@ -63,8 +60,6 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.JobSpecification;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
 import com.google.cloud.dataflow.sdk.runners.dataflow.AssignWindows;
 import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowAggregatorTransforms;
 import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
@@ -88,7 +83,6 @@
 import com.google.cloud.dataflow.sdk.transforms.WithKeys;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
@@ -337,8 +331,6 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
       ImmutableMap.Builder<Class<?>, Class<?>> builder = ImmutableMap.<Class<?>, Class<?>>builder();
       builder.put(Read.Unbounded.class, UnsupportedIO.class);
       builder.put(Window.Bound.class, AssignWindows.class);
-      builder.put(Write.Bound.class, BatchWrite.class);
-      builder.put(AvroIO.Write.Bound.class, BatchAvroIOWrite.class);
       if (options.getExperiments() == null
           || !options.getExperiments().contains("disable_ism_side_input")) {
         builder.put(View.AsMap.class, BatchViewAsMap.class);
@@ -1926,177 +1918,6 @@ public Iterable<V> apply(Iterable<WindowedValue<V>> input) {
     }
   }
 
-  /**
-   * A {@link PTransform} that uses shuffle to create a fusion break. This allows pushing
-   * parallelism limits such as sharding controls further down the pipeline.
-   */
-  private static class ReshardForWrite<T> extends PTransform<PCollection<T>, PCollection<T>> {
-    @Override
-    public PCollection<T> apply(PCollection<T> input) {
-      return input
-          // TODO: This would need to be adapted to write per-window shards.
-          .apply(Window.<T>into(new GlobalWindows())
-                       .triggering(DefaultTrigger.of())
-                       .discardingFiredPanes())
-          .apply("RandomKey", ParDo.of(
-              new DoFn<T, KV<Long, T>>() {
-                transient long counter, step;
-                @Override
-                public void startBundle(Context c) {
-                  counter = (long) (Math.random() * Long.MAX_VALUE);
-                  step = 1 + 2 * (long) (Math.random() * Long.MAX_VALUE);
-                }
-                @Override
-                public void processElement(ProcessContext c) {
-                  counter += step;
-                  c.output(KV.of(counter, c.element()));
-                }
-              }))
-          .apply(GroupByKey.<Long, T>create())
-          .apply("Ungroup", ParDo.of(
-              new DoFn<KV<Long, Iterable<T>>, T>() {
-                @Override
-                public void processElement(ProcessContext c) {
-                  for (T item : c.element().getValue()) {
-                    c.output(item);
-                  }
-                }
-              }));
-    }
-  }
-
-  /**
-   * Specialized implementation which overrides
-   * {@link com.google.cloud.dataflow.sdk.io.Write.Bound Write.Bound} to provide Google
-   * Cloud Dataflow specific path validation of {@link FileBasedSink}s.
-   */
-  private static class BatchWrite<T> extends PTransform<PCollection<T>, PDone> {
-    private final DataflowPipelineRunner runner;
-    private final Write.Bound<T> transform;
-    /**
-     * Builds an instance of this class from the overridden transform.
-     */
-    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public BatchWrite(DataflowPipelineRunner runner, Write.Bound<T> transform) {
-      this.runner = runner;
-      this.transform = transform;
-    }
-
-    @Override
-    public PDone apply(PCollection<T> input) {
-      if (transform.getSink() instanceof FileBasedSink) {
-        FileBasedSink<?> sink = (FileBasedSink<?>) transform.getSink();
-        PathValidator validator = runner.options.getPathValidator();
-        validator.validateOutputFilePrefixSupported(sink.getBaseOutputFilename());
-      }
-      return transform.apply(input);
-    }
-  }
-
-  /**
-   * Specialized implementation which overrides
-   * {@link com.google.cloud.dataflow.sdk.io.AvroIO.Write.Bound AvroIO.Write.Bound} with
-   * a native sink instead of a custom sink as workaround until custom sinks
-   * have support for sharding controls.
-   */
-  private static class BatchAvroIOWrite<T> extends PTransform<PCollection<T>, PDone> {
-    private final AvroIO.Write.Bound<T> transform;
-    /**
-     * Builds an instance of this class from the overridden transform.
-     */
-    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public BatchAvroIOWrite(DataflowPipelineRunner runner, AvroIO.Write.Bound<T> transform) {
-      this.transform = transform;
-    }
-
-    @Override
-    public PDone apply(PCollection<T> input) {
-      if (transform.getNumShards() > 0) {
-        return input
-            .apply(new ReshardForWrite<T>())
-            .apply(new BatchAvroIONativeWrite<>(transform));
-      } else {
-        return transform.apply(input);
-      }
-    }
-  }
-
-  /**
-   * This {@link PTransform} is used by the {@link DataflowPipelineTranslator} as a way
-   * to provide the native definition of the Avro sink.
-   */
-  private static class BatchAvroIONativeWrite<T> extends PTransform<PCollection<T>, PDone> {
-    private final AvroIO.Write.Bound<T> transform;
-    public BatchAvroIONativeWrite(AvroIO.Write.Bound<T> transform) {
-      this.transform = transform;
-    }
-
-    @Override
-    public PDone apply(PCollection<T> input) {
-      return PDone.in(input.getPipeline());
-    }
-
-    static {
-      DataflowPipelineTranslator.registerTransformTranslator(
-          BatchAvroIONativeWrite.class, new BatchAvroIONativeWriteTranslator());
-    }
-  }
-
-  /**
-   * AvroIO.Write.Bound support code for the Dataflow backend when applying parallelism limits
-   * through user requested sharding limits.
-   */
-  private static class BatchAvroIONativeWriteTranslator
-      implements TransformTranslator<BatchAvroIONativeWrite<?>> {
-    @SuppressWarnings("unchecked")
-    @Override
-    public void translate(@SuppressWarnings("rawtypes") BatchAvroIONativeWrite transform,
-        TranslationContext context) {
-      translateWriteHelper(transform, transform.transform, context);
-    }
-
-    private <T> void translateWriteHelper(
-        BatchAvroIONativeWrite<T> transform,
-        AvroIO.Write.Bound<T> originalTransform,
-        TranslationContext context) {
-      // Note that the original transform can not be used during add step/add input
-      // and is only passed in to get properties from it.
-
-      checkState(originalTransform.getNumShards() > 0,
-          "Native AvroSink is expected to only be used when sharding controls are required.");
-
-      context.addStep(transform, "ParallelWrite");
-      context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
-
-      // TODO: drop this check when server supports alternative templates.
-      switch (originalTransform.getShardTemplate()) {
-        case ShardNameTemplate.INDEX_OF_MAX:
-          break;  // supported by server
-        case "":
-          // Empty shard template allowed - forces single output.
-          Preconditions.checkArgument(originalTransform.getNumShards() <= 1,
-              "Num shards must be <= 1 when using an empty sharding template");
-          break;
-        default:
-          throw new UnsupportedOperationException("Shard template "
-              + originalTransform.getShardTemplate()
-              + " not yet supported by Dataflow service");
-      }
-
-      context.addInput(PropertyNames.FORMAT, "avro");
-      context.addInput(PropertyNames.FILENAME_PREFIX, originalTransform.getFilenamePrefix());
-      context.addInput(PropertyNames.SHARD_NAME_TEMPLATE, originalTransform.getShardTemplate());
-      context.addInput(PropertyNames.FILENAME_SUFFIX, originalTransform.getFilenameSuffix());
-      context.addInput(PropertyNames.VALIDATE_SINK, originalTransform.needsValidation());
-
-      context.addInput(PropertyNames.NUM_SHARDS, (long) originalTransform.getNumShards());
-
-      context.addEncodingInput(
-          WindowedValue.getValueOnlyCoder(
-              AvroCoder.of(originalTransform.getType(), originalTransform.getSchema())));
-    }
-  }
-
   /**
    * Specialized (non-)implementation for
    * {@link com.google.cloud.dataflow.sdk.io.Write.Bound Write.Bound}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 885260ebb2518..f7217f7610bb7 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -41,6 +41,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+import com.google.cloud.dataflow.sdk.io.AvroIO;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.io.Read;
@@ -48,6 +49,7 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.GroupByKeyAndSortValuesOnly;
+import com.google.cloud.dataflow.sdk.runners.dataflow.AvroIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BigQueryIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.ReadTranslator;
@@ -1026,6 +1028,9 @@ private <T> void translateHelper(
     ///////////////////////////////////////////////////////////////////////////
     // IO Translation.
 
+    registerTransformTranslator(
+        AvroIO.Write.Bound.class, new AvroIOTranslator.WriteTranslator());
+
     registerTransformTranslator(
         BigQueryIO.Read.Bound.class, new BigQueryIOTranslator.ReadTranslator());
     registerTransformTranslator(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
new file mode 100644
index 0000000000000..b114021c4d470
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.runners.dataflow;
+
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.io.AvroIO;
+import com.google.cloud.dataflow.sdk.io.ShardNameTemplate;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
+import com.google.cloud.dataflow.sdk.util.PathValidator;
+import com.google.cloud.dataflow.sdk.util.PropertyNames;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.common.base.Preconditions;
+
+/**
+ * Avro transform support code for the Dataflow backend.
+ */
+public class AvroIOTranslator {
+
+  /**
+   * Implements AvroIO Write translation for the Dataflow backend.
+   */
+  @SuppressWarnings("rawtypes")
+  public static class WriteTranslator implements TransformTranslator<AvroIO.Write.Bound> {
+
+    @Override
+    public void translate(
+        AvroIO.Write.Bound transform,
+        TranslationContext context) {
+      translateWriteHelper(transform, context);
+    }
+
+    private <T> void translateWriteHelper(
+        AvroIO.Write.Bound<T> transform,
+        TranslationContext context) {
+      PathValidator validator = context.getPipelineOptions().getPathValidator();
+      String filenamePrefix = validator.validateOutputFilePrefixSupported(
+          transform.getFilenamePrefix());
+      context.addStep(transform, "ParallelWrite");
+      context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
+
+      // TODO: drop this check when server supports alternative templates.
+      switch (transform.getShardTemplate()) {
+        case ShardNameTemplate.INDEX_OF_MAX:
+          break;  // supported by server
+        case "":
+          // Empty shard template allowed - forces single output.
+          Preconditions.checkArgument(transform.getNumShards() <= 1,
+              "Num shards must be <= 1 when using an empty sharding template");
+          break;
+        default:
+          throw new UnsupportedOperationException("Shard template "
+              + transform.getShardTemplate()
+              + " not yet supported by Dataflow service");
+      }
+
+      context.addInput(PropertyNames.FORMAT, "avro");
+      context.addInput(PropertyNames.FILENAME_PREFIX, filenamePrefix);
+      context.addInput(PropertyNames.SHARD_NAME_TEMPLATE, transform.getShardTemplate());
+      context.addInput(PropertyNames.FILENAME_SUFFIX, transform.getFilenameSuffix());
+      context.addInput(PropertyNames.VALIDATE_SINK, transform.needsValidation());
+
+      long numShards = transform.getNumShards();
+      if (numShards > 0) {
+        context.addInput(PropertyNames.NUM_SHARDS, numShards);
+      }
+
+      context.addEncodingInput(
+          WindowedValue.getValueOnlyCoder(
+              AvroCoder.of(transform.getType(), transform.getSchema())));
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOGeneratedClassTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOGeneratedClassTest.java
index 927d3a3903316..6bb459de7a08c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOGeneratedClassTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOGeneratedClassTest.java
@@ -148,102 +148,102 @@ <T> void runTestRead(AvroIO.Read.Bound<T> read, String expectedName, T[] expecte
   public void testReadFromGeneratedClass() throws Exception {
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .withSchema(AvroGeneratedUser.class),
-                "AvroIO.Read/Read.out", generateAvroObjects());
+                "AvroIO.Read/Read(AvroSource).out", generateAvroObjects());
     runTestRead(AvroIO.Read.withSchema(AvroGeneratedUser.class)
                            .from(avroFile.getPath()),
-                "AvroIO.Read/Read.out", generateAvroObjects());
+                "AvroIO.Read/Read(AvroSource).out", generateAvroObjects());
     runTestRead(AvroIO.Read.named("MyRead")
                            .from(avroFile.getPath())
                            .withSchema(AvroGeneratedUser.class),
-                "MyRead/Read.out", generateAvroObjects());
+                "MyRead/Read(AvroSource).out", generateAvroObjects());
     runTestRead(AvroIO.Read.named("MyRead")
                            .withSchema(AvroGeneratedUser.class)
                            .from(avroFile.getPath()),
-                "MyRead/Read.out", generateAvroObjects());
+                "MyRead/Read(AvroSource).out", generateAvroObjects());
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .withSchema(AvroGeneratedUser.class)
                            .named("HerRead"),
-                "HerRead/Read.out", generateAvroObjects());
+                "HerRead/Read(AvroSource).out", generateAvroObjects());
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .named("HerRead")
                            .withSchema(AvroGeneratedUser.class),
-                "HerRead/Read.out", generateAvroObjects());
+                "HerRead/Read(AvroSource).out", generateAvroObjects());
     runTestRead(AvroIO.Read.withSchema(AvroGeneratedUser.class)
                            .named("HerRead")
                            .from(avroFile.getPath()),
-                "HerRead/Read.out", generateAvroObjects());
+                "HerRead/Read(AvroSource).out", generateAvroObjects());
     runTestRead(AvroIO.Read.withSchema(AvroGeneratedUser.class)
                            .from(avroFile.getPath())
                            .named("HerRead"),
-                "HerRead/Read.out", generateAvroObjects());
+                "HerRead/Read(AvroSource).out", generateAvroObjects());
   }
 
   @Test
   public void testReadFromSchema() throws Exception {
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .withSchema(schema),
-                "AvroIO.Read/Read.out", generateAvroGenericRecords());
+                "AvroIO.Read/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.withSchema(schema)
                            .from(avroFile.getPath()),
-                "AvroIO.Read/Read.out", generateAvroGenericRecords());
+                "AvroIO.Read/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.named("MyRead")
                            .from(avroFile.getPath())
                            .withSchema(schema),
-                "MyRead/Read.out", generateAvroGenericRecords());
+                "MyRead/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.named("MyRead")
                            .withSchema(schema)
                            .from(avroFile.getPath()),
-                "MyRead/Read.out", generateAvroGenericRecords());
+                "MyRead/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .withSchema(schema)
                            .named("HerRead"),
-                "HerRead/Read.out", generateAvroGenericRecords());
+                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .named("HerRead")
                            .withSchema(schema),
-                "HerRead/Read.out", generateAvroGenericRecords());
+                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.withSchema(schema)
                            .named("HerRead")
                            .from(avroFile.getPath()),
-                "HerRead/Read.out", generateAvroGenericRecords());
+                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.withSchema(schema)
                            .from(avroFile.getPath())
                            .named("HerRead"),
-                "HerRead/Read.out", generateAvroGenericRecords());
+                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
   }
 
   @Test
   public void testReadFromSchemaString() throws Exception {
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .withSchema(schemaString),
-                "AvroIO.Read/Read.out", generateAvroGenericRecords());
+                "AvroIO.Read/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.withSchema(schemaString)
                            .from(avroFile.getPath()),
-                "AvroIO.Read/Read.out", generateAvroGenericRecords());
+                "AvroIO.Read/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.named("MyRead")
                            .from(avroFile.getPath())
                            .withSchema(schemaString),
-                "MyRead/Read.out", generateAvroGenericRecords());
+                "MyRead/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.named("MyRead")
                            .withSchema(schemaString)
                            .from(avroFile.getPath()),
-                "MyRead/Read.out", generateAvroGenericRecords());
+                "MyRead/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .withSchema(schemaString)
                            .named("HerRead"),
-                "HerRead/Read.out", generateAvroGenericRecords());
+                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.from(avroFile.getPath())
                            .named("HerRead")
                            .withSchema(schemaString),
-                "HerRead/Read.out", generateAvroGenericRecords());
+                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.withSchema(schemaString)
                            .named("HerRead")
                            .from(avroFile.getPath()),
-                "HerRead/Read.out", generateAvroGenericRecords());
+                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
     runTestRead(AvroIO.Read.withSchema(schemaString)
                            .from(avroFile.getPath())
                            .named("HerRead"),
-                "HerRead/Read.out", generateAvroGenericRecords());
+                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
   }
 
   <T> void runTestWrite(AvroIO.Write.Bound<T> write, String expectedName)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
index 207b2ba5142a4..30c578bbda886 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
@@ -16,28 +16,19 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
-import static org.hamcrest.Matchers.contains;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
-import com.google.cloud.dataflow.sdk.io.AvroIO.AvroSink;
-import com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriteOperation;
-import com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriter;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.MoreObjects;
 import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Iterators;
 
-import org.apache.avro.file.DataFileReader;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.reflect.Nullable;
 import org.junit.Rule;
@@ -47,10 +38,6 @@
 import org.junit.runners.JUnit4;
 
 import java.io.File;
-import java.nio.channels.FileChannel;
-import java.nio.channels.WritableByteChannel;
-import java.nio.file.StandardOpenOption;
-import java.util.ArrayList;
 import java.util.List;
 import java.util.Objects;
 
@@ -202,38 +189,6 @@ public void testAvroIOWriteAndReadSchemaUpgrade() throws Throwable {
     p.run();
   }
 
-  @Test
-  public void testAvroSinkWrite() throws Exception {
-    String[] expectedElements = new String[]{ "first", "second", "third" };
-    PipelineOptions options = PipelineOptionsFactory.create();
-    AvroCoder<String> coder = AvroCoder.of(String.class);
-    File tmpFile = tmpFolder.newFile();
-    AvroSink<String> avroSink = new AvroSink<>(
-        "prefix", "suffix", ShardNameTemplate.INDEX_OF_MAX, coder);
-    FileBasedWriteOperation<String> writeOperation = avroSink.createWriteOperation(options);
-    FileBasedWriter<String> writer = writeOperation.createWriter(options);
-
-    WritableByteChannel channel = FileChannel.open(tmpFile.toPath(), StandardOpenOption.WRITE);
-    writer.prepareWrite(channel);
-    writer.writeHeader();
-    for (String element : expectedElements) {
-      writer.write(element);
-      // We expect the channel to remain open
-      assertTrue(channel.isOpen());
-    }
-
-    writer.close();
-    // Ensure that we properly close the channel
-    assertFalse(channel.isOpen());
-
-    // Validate that the data written matches the expected elements in the expected order
-    try (DataFileReader<String> reader = new DataFileReader<>(tmpFile, coder.createDatumReader())) {
-      List<String> actualElements = new ArrayList<>();
-      Iterators.addAll(actualElements, reader);
-      assertThat(actualElements, contains(expectedElements));
-    }
-  }
-
   // TODO: for Write only, test withSuffix, withNumShards,
   // withShardNameTemplate and withoutSharding.
 }

From 01a0da02daed5f1609237ae85c82fd056ea76339 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 23 Feb 2016 17:35:40 -0800
Subject: [PATCH 1515/1541] Resubmit "Migrate AvroIO.Write to a custom sink"

Note for user requested sharding limits to be supported,
each pipeline runner must support applying those sharding limits.

Google Cloud Dataflow supports sharding limits.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115402880
---
 .../google/cloud/dataflow/sdk/io/AvroIO.java  | 223 +++++++++---------
 .../sdk/runners/DataflowPipelineRunner.java   | 189 +++++++++++++++
 .../runners/DataflowPipelineTranslator.java   |   5 -
 .../runners/dataflow/AvroIOTranslator.java    |  87 -------
 .../sdk/io/AvroIOGeneratedClassTest.java      | 186 ++++++++-------
 .../cloud/dataflow/sdk/io/AvroIOTest.java     |  34 ++-
 6 files changed, 433 insertions(+), 291 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
index 9ee7e6b13cc93..f016b5b47bad9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
@@ -22,24 +22,25 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.io.Read.Bounded;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.worker.AvroSink;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+import com.google.cloud.dataflow.sdk.util.MimeTypes;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 
 import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.reflect.ReflectData;
 
 import java.io.IOException;
-import java.util.List;
+import java.nio.channels.Channels;
+import java.nio.channels.WritableByteChannel;
 import java.util.regex.Pattern;
 
 import javax.annotation.Nullable;
@@ -317,7 +318,7 @@ public PCollection<T> apply(PInput input) {
                 : com.google.cloud.dataflow.sdk.io.Read.from(
                     AvroSource.from(filepattern).withSchema(type));
 
-        PCollection<T> pcol = input.getPipeline().apply(read);
+        PCollection<T> pcol = input.getPipeline().apply("Read", read);
         // Honor the default output coder that would have been used by this PTransform.
         pcol.setCoder(getDefaultOutputCoder());
         return pcol;
@@ -473,8 +474,6 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       final int numShards;
       /** Shard template string. */
       final String shardTemplate;
-      /** Insert a shuffle before writing to decouple parallelism when numShards != 0. */
-      final boolean forceReshard;
       /** The class type of the records. */
       final Class<T> type;
       /** The schema of the output file. */
@@ -484,18 +483,23 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       final boolean validate;
 
       Bound(Class<T> type) {
-        this(null, null, "", 0, ShardNameTemplate.INDEX_OF_MAX, true, type, null, true);
+        this(null, null, "", 0, ShardNameTemplate.INDEX_OF_MAX, type, null, true);
       }
 
-      Bound(String name, String filenamePrefix, String filenameSuffix, int numShards,
-          String shardTemplate, boolean forceReshard, Class<T> type, Schema schema,
+      Bound(
+          String name,
+          String filenamePrefix,
+          String filenameSuffix,
+          int numShards,
+          String shardTemplate,
+          Class<T> type,
+          Schema schema,
           boolean validate) {
         super(name);
         this.filenamePrefix = filenamePrefix;
         this.filenameSuffix = filenameSuffix;
         this.numShards = numShards;
         this.shardTemplate = shardTemplate;
-        this.forceReshard = forceReshard;
         this.type = type;
         this.schema = schema;
         this.validate = validate;
@@ -509,8 +513,7 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
        */
       public Bound<T> named(String name) {
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard,
-            type, schema, validate);
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, validate);
       }
 
       /**
@@ -525,8 +528,7 @@ public Bound<T> named(String name) {
       public Bound<T> to(String filenamePrefix) {
         validateOutputComponent(filenamePrefix);
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard,
-            type, schema, validate);
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, validate);
       }
 
       /**
@@ -540,8 +542,7 @@ public Bound<T> to(String filenamePrefix) {
       public Bound<T> withSuffix(String filenameSuffix) {
         validateOutputComponent(filenameSuffix);
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard, type,
-            schema, validate);
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, validate);
       }
 
       /**
@@ -559,32 +560,9 @@ public Bound<T> withSuffix(String filenameSuffix) {
        * @see ShardNameTemplate
        */
       public Bound<T> withNumShards(int numShards) {
-        return withNumShards(numShards, forceReshard);
-      }
-
-      /**
-       * Returns a new {@link PTransform} that's like this one but
-       * that uses the provided shard count.
-       *
-       * <p>Constraining the number of shards is likely to reduce
-       * the performance of a pipeline. If forceReshard is true, the output
-       * will be shuffled to obtain the desired sharding. If it is false,
-       * data will not be reshuffled, but parallelism of preceeding stages
-       * may be constrained. Setting this value is not recommended
-       * unless you require a specific number of output files.
-       *
-       * <p>Does not modify this object.
-       *
-       * @param numShards the number of shards to use, or 0 to let the system
-       *                  decide.
-       * @param forceReshard whether to force a reshard to obtain the desired sharding.
-       * @see ShardNameTemplate
-       */
-      private Bound<T> withNumShards(int numShards, boolean forceReshard) {
         Preconditions.checkArgument(numShards >= 0);
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard,
-            type, schema, validate);
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, validate);
       }
 
       /**
@@ -597,8 +575,7 @@ private Bound<T> withNumShards(int numShards, boolean forceReshard) {
        */
       public Bound<T> withShardNameTemplate(String shardTemplate) {
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard,
-            type, schema, validate);
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, validate);
       }
 
       /**
@@ -611,23 +588,7 @@ public Bound<T> withShardNameTemplate(String shardTemplate) {
        * <p>Does not modify this object.
        */
       public Bound<T> withoutSharding() {
-        return withoutSharding(forceReshard);
-      }
-
-      /**
-       * Returns a new {@link PTransform} that's like this one but
-       * that forces a single file as output.
-       *
-       * <p>This is a shortcut for
-       * {@code .withNumShards(1, forceReshard).withShardNameTemplate("")}
-       *
-       * <p>Does not modify this object.
-       *
-       * @param forceReshard whether to force a reshard to obtain the desired sharding.
-       */
-      private Bound<T> withoutSharding(boolean forceReshard) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, 1, "", forceReshard,
-            type, schema, validate);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, 1, "", type, schema, validate);
       }
 
       /**
@@ -640,8 +601,15 @@ private Bound<T> withoutSharding(boolean forceReshard) {
        * @param <X> the type of the elements of the input PCollection
        */
       public <X> Bound<X> withSchema(Class<X> type) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
-            forceReshard, type, ReflectData.get().getSchema(type), validate);
+        return new Bound<>(
+            name,
+            filenamePrefix,
+            filenameSuffix,
+            numShards,
+            shardTemplate,
+            type,
+            ReflectData.get().getSchema(type),
+            validate);
       }
 
       /**
@@ -652,8 +620,15 @@ public <X> Bound<X> withSchema(Class<X> type) {
        * <p>Does not modify this object.
        */
       public Bound<GenericRecord> withSchema(Schema schema) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, numShards, shardTemplate,
-            forceReshard, GenericRecord.class, schema, validate);
+        return new Bound<>(
+            name,
+            filenamePrefix,
+            filenameSuffix,
+            numShards,
+            shardTemplate,
+            GenericRecord.class,
+            schema,
+            validate);
       }
 
       /**
@@ -679,8 +654,7 @@ public Bound<GenericRecord> withSchema(String schema) {
        */
       public Bound<T> withoutValidation() {
         return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, forceReshard,
-            type, schema, false);
+            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, false);
       }
 
       @Override
@@ -693,14 +667,14 @@ public PDone apply(PCollection<T> input) {
           throw new IllegalStateException("need to set the schema of an AvroIO.Write transform");
         }
 
-        if (numShards > 0 && forceReshard) {
-          // Reshard and re-apply a version of this write without resharding.
-          return input
-              .apply(new FileBasedSink.ReshardForWrite<T>())
-              .apply(withNumShards(numShards, false));
-        } else {
-          return PDone.in(input.getPipeline());
-        }
+        // Note that custom sinks currently do not expose sharding controls.
+        // Thus pipeline runner writers need to individually add support internally to
+        // apply user requested sharding limits.
+        return input.apply(
+            "Write",
+            com.google.cloud.dataflow.sdk.io.Write.to(
+                new AvroSink<>(
+                    filenamePrefix, filenameSuffix, shardTemplate, AvroCoder.of(type, schema))));
       }
 
       /**
@@ -742,21 +716,6 @@ public Schema getSchema() {
       public boolean needsValidation() {
         return validate;
       }
-
-      static {
-        @SuppressWarnings("rawtypes")
-        DirectPipelineRunner.TransformEvaluator<Bound> transformEvaluator =
-            new DirectPipelineRunner.TransformEvaluator<Bound>() {
-          @Override
-          @SuppressWarnings("unchecked")
-          public void evaluate(
-              Bound transform, DirectPipelineRunner.EvaluationContext context) {
-            evaluateWriteHelper(transform, context);
-          }
-        };
-        DirectPipelineRunner.registerDefaultTransformEvaluator(
-            Bound.class, transformEvaluator);
-      }
     }
 
     /** Disallow construction of utility class. */
@@ -779,25 +738,73 @@ private static void validateOutputComponent(String partialFilePattern) {
   /** Disallow construction of utility class. */
   private AvroIO() {}
 
-  private static <T> void evaluateWriteHelper(
-      Write.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
-    List<WindowedValue<T>> elems =
-        context.getPCollectionWindowedValues(context.getInput(transform));
-    int numShards = transform.numShards;
-    if (numShards < 1) {
-      // System gets to choose. For direct mode, choose 1.
-      numShards = 1;
+  /**
+   * A {@link FileBasedSink} for Avro files.
+   */
+  @VisibleForTesting
+  static class AvroSink<T> extends FileBasedSink<T> {
+    private final AvroCoder<T> coder;
+
+    @VisibleForTesting
+    AvroSink(
+        String baseOutputFilename, String extension, String fileNameTemplate, AvroCoder<T> coder) {
+      super(baseOutputFilename, extension, fileNameTemplate);
+      this.coder = coder;
+    }
+
+    @Override
+    public FileBasedSink.FileBasedWriteOperation<T> createWriteOperation(PipelineOptions options) {
+      return new AvroWriteOperation<>(this, coder);
+    }
+
+    /**
+     * A {@link com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriteOperation
+     * FileBasedWriteOperation} for Avro files.
+     */
+    private static class AvroWriteOperation<T> extends FileBasedWriteOperation<T> {
+      private final AvroCoder<T> coder;
+
+      private AvroWriteOperation(AvroSink<T> sink, AvroCoder<T> coder) {
+        super(sink);
+        this.coder = coder;
+      }
+
+      @Override
+      public FileBasedWriter<T> createWriter(PipelineOptions options) throws Exception {
+        return new AvroWriter<>(this, coder);
+      }
     }
-    AvroSink<T> writer = new AvroSink<>(transform.filenamePrefix, transform.shardTemplate,
-        transform.filenameSuffix, numShards,
-        WindowedValue.getValueOnlyCoder(AvroCoder.of(transform.type, transform.schema)));
-    try (Sink.SinkWriter<WindowedValue<T>> sink = writer.writer()) {
-      for (WindowedValue<T> elem : elems) {
-        sink.add(elem);
-      }
-    } catch (IOException exn) {
-      throw new RuntimeException(
-          "unable to write to output file \"" + transform.filenamePrefix + "\"", exn);
+
+    /**
+     * A {@link com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriter FileBasedWriter}
+     * for Avro files.
+     */
+    private static class AvroWriter<T> extends FileBasedWriter<T> {
+      private final AvroCoder<T> coder;
+      private DataFileWriter<T> dataFileWriter;
+
+      public AvroWriter(FileBasedWriteOperation<T> writeOperation, AvroCoder<T> coder) {
+        super(writeOperation);
+        this.mimeType = MimeTypes.BINARY;
+        this.coder = coder;
+      }
+
+      @SuppressWarnings("deprecation") // uses internal test functionality.
+      @Override
+      protected void prepareWrite(WritableByteChannel channel) throws Exception {
+        dataFileWriter = new DataFileWriter<>(coder.createDatumWriter());
+        dataFileWriter.create(coder.getSchema(), Channels.newOutputStream(channel));
+      }
+
+      @Override
+      public void write(T value) throws Exception {
+        dataFileWriter.append(value);
+      }
+
+      @Override
+      protected void writeFooter() throws Exception {
+        dataFileWriter.flush();
+      }
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index f20caa3b962b2..ac0dceae96b3c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -32,6 +32,7 @@
 import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
 import com.google.cloud.dataflow.sdk.PipelineResult.State;
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
@@ -48,8 +49,10 @@
 import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
 import com.google.cloud.dataflow.sdk.io.AvroIO;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.FileBasedSink;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.io.Read;
+import com.google.cloud.dataflow.sdk.io.ShardNameTemplate;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
 import com.google.cloud.dataflow.sdk.io.Write;
@@ -60,6 +63,8 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.JobSpecification;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
 import com.google.cloud.dataflow.sdk.runners.dataflow.AssignWindows;
 import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowAggregatorTransforms;
 import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
@@ -83,6 +88,7 @@
 import com.google.cloud.dataflow.sdk.transforms.WithKeys;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
@@ -331,6 +337,8 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
       ImmutableMap.Builder<Class<?>, Class<?>> builder = ImmutableMap.<Class<?>, Class<?>>builder();
       builder.put(Read.Unbounded.class, UnsupportedIO.class);
       builder.put(Window.Bound.class, AssignWindows.class);
+      builder.put(Write.Bound.class, BatchWrite.class);
+      builder.put(AvroIO.Write.Bound.class, BatchAvroIOWrite.class);
       if (options.getExperiments() == null
           || !options.getExperiments().contains("disable_ism_side_input")) {
         builder.put(View.AsMap.class, BatchViewAsMap.class);
@@ -1918,6 +1926,187 @@ public Iterable<V> apply(Iterable<WindowedValue<V>> input) {
     }
   }
 
+  /**
+   * A {@link PTransform} that uses shuffle to create a fusion break. This allows pushing
+   * parallelism limits such as sharding controls further down the pipeline.
+   */
+  private static class ReshardForWrite<T> extends PTransform<PCollection<T>, PCollection<T>> {
+    @Override
+    public PCollection<T> apply(PCollection<T> input) {
+      return input
+          // TODO: This would need to be adapted to write per-window shards.
+          .apply(
+              Window.<T>into(new GlobalWindows())
+                  .triggering(DefaultTrigger.of())
+                  .discardingFiredPanes())
+          .apply(
+              "RandomKey",
+              ParDo.of(
+                  new DoFn<T, KV<Long, T>>() {
+                    transient long counter, step;
+
+                    @Override
+                    public void startBundle(Context c) {
+                      counter = (long) (Math.random() * Long.MAX_VALUE);
+                      step = 1 + 2 * (long) (Math.random() * Long.MAX_VALUE);
+                    }
+
+                    @Override
+                    public void processElement(ProcessContext c) {
+                      counter += step;
+                      c.output(KV.of(counter, c.element()));
+                    }
+                  }))
+          .apply(GroupByKey.<Long, T>create())
+          .apply(
+              "Ungroup",
+              ParDo.of(
+                  new DoFn<KV<Long, Iterable<T>>, T>() {
+                    @Override
+                    public void processElement(ProcessContext c) {
+                      for (T item : c.element().getValue()) {
+                        c.output(item);
+                      }
+                    }
+                  }));
+    }
+  }
+
+  /**
+   * Specialized implementation which overrides
+   * {@link com.google.cloud.dataflow.sdk.io.Write.Bound Write.Bound} to provide Google
+   * Cloud Dataflow specific path validation of {@link FileBasedSink}s.
+   */
+  private static class BatchWrite<T> extends PTransform<PCollection<T>, PDone> {
+    private final DataflowPipelineRunner runner;
+    private final Write.Bound<T> transform;
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+    public BatchWrite(DataflowPipelineRunner runner, Write.Bound<T> transform) {
+      this.runner = runner;
+      this.transform = transform;
+    }
+
+    @Override
+    public PDone apply(PCollection<T> input) {
+      if (transform.getSink() instanceof FileBasedSink) {
+        FileBasedSink<?> sink = (FileBasedSink<?>) transform.getSink();
+        PathValidator validator = runner.options.getPathValidator();
+        validator.validateOutputFilePrefixSupported(sink.getBaseOutputFilename());
+      }
+      return transform.apply(input);
+    }
+  }
+
+  /**
+   * Specialized implementation which overrides
+   * {@link com.google.cloud.dataflow.sdk.io.AvroIO.Write.Bound AvroIO.Write.Bound} with
+   * a native sink instead of a custom sink as workaround until custom sinks
+   * have support for sharding controls.
+   */
+  private static class BatchAvroIOWrite<T> extends PTransform<PCollection<T>, PDone> {
+    private final AvroIO.Write.Bound<T> transform;
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+    public BatchAvroIOWrite(DataflowPipelineRunner runner, AvroIO.Write.Bound<T> transform) {
+      this.transform = transform;
+    }
+
+    @Override
+    public PDone apply(PCollection<T> input) {
+      if (transform.getNumShards() > 0) {
+        return input.apply(new ReshardForWrite<T>()).apply(new BatchAvroIONativeWrite<>(transform));
+      } else {
+        return transform.apply(input);
+      }
+    }
+  }
+
+  /**
+   * This {@link PTransform} is used by the {@link DataflowPipelineTranslator} as a way
+   * to provide the native definition of the Avro sink.
+   */
+  private static class BatchAvroIONativeWrite<T> extends PTransform<PCollection<T>, PDone> {
+    private final AvroIO.Write.Bound<T> transform;
+
+    public BatchAvroIONativeWrite(AvroIO.Write.Bound<T> transform) {
+      this.transform = transform;
+    }
+
+    @Override
+    public PDone apply(PCollection<T> input) {
+      return PDone.in(input.getPipeline());
+    }
+
+    static {
+      DataflowPipelineTranslator.registerTransformTranslator(
+          BatchAvroIONativeWrite.class, new BatchAvroIONativeWriteTranslator());
+    }
+  }
+
+  /**
+   * AvroIO.Write.Bound support code for the Dataflow backend when applying parallelism limits
+   * through user requested sharding limits.
+   */
+  private static class BatchAvroIONativeWriteTranslator
+      implements TransformTranslator<BatchAvroIONativeWrite<?>> {
+    @SuppressWarnings("unchecked")
+    @Override
+    public void translate(
+        @SuppressWarnings("rawtypes") BatchAvroIONativeWrite transform,
+        TranslationContext context) {
+      translateWriteHelper(transform, transform.transform, context);
+    }
+
+    private <T> void translateWriteHelper(
+        BatchAvroIONativeWrite<T> transform,
+        AvroIO.Write.Bound<T> originalTransform,
+        TranslationContext context) {
+      // Note that the original transform can not be used during add step/add input
+      // and is only passed in to get properties from it.
+
+      checkState(
+          originalTransform.getNumShards() > 0,
+          "Native AvroSink is expected to only be used when sharding controls are required.");
+
+      context.addStep(transform, "ParallelWrite");
+      context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
+
+      // TODO: drop this check when server supports alternative templates.
+      switch (originalTransform.getShardTemplate()) {
+        case ShardNameTemplate.INDEX_OF_MAX:
+          break; // supported by server
+        case "":
+          // Empty shard template allowed - forces single output.
+          Preconditions.checkArgument(
+              originalTransform.getNumShards() <= 1,
+              "Num shards must be <= 1 when using an empty sharding template");
+          break;
+        default:
+          throw new UnsupportedOperationException(
+              "Shard template "
+                  + originalTransform.getShardTemplate()
+                  + " not yet supported by Dataflow service");
+      }
+
+      context.addInput(PropertyNames.FORMAT, "avro");
+      context.addInput(PropertyNames.FILENAME_PREFIX, originalTransform.getFilenamePrefix());
+      context.addInput(PropertyNames.SHARD_NAME_TEMPLATE, originalTransform.getShardTemplate());
+      context.addInput(PropertyNames.FILENAME_SUFFIX, originalTransform.getFilenameSuffix());
+      context.addInput(PropertyNames.VALIDATE_SINK, originalTransform.needsValidation());
+
+      context.addInput(PropertyNames.NUM_SHARDS, (long) originalTransform.getNumShards());
+
+      context.addEncodingInput(
+          WindowedValue.getValueOnlyCoder(
+              AvroCoder.of(originalTransform.getType(), originalTransform.getSchema())));
+    }
+  }
+
   /**
    * Specialized (non-)implementation for
    * {@link com.google.cloud.dataflow.sdk.io.Write.Bound Write.Bound}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index f7217f7610bb7..885260ebb2518 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -41,7 +41,6 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
 import com.google.cloud.dataflow.sdk.coders.IterableCoder;
-import com.google.cloud.dataflow.sdk.io.AvroIO;
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.io.Read;
@@ -49,7 +48,6 @@
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.GroupByKeyAndSortValuesOnly;
-import com.google.cloud.dataflow.sdk.runners.dataflow.AvroIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BigQueryIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.ReadTranslator;
@@ -1028,9 +1026,6 @@ private <T> void translateHelper(
     ///////////////////////////////////////////////////////////////////////////
     // IO Translation.
 
-    registerTransformTranslator(
-        AvroIO.Write.Bound.class, new AvroIOTranslator.WriteTranslator());
-
     registerTransformTranslator(
         BigQueryIO.Read.Bound.class, new BigQueryIOTranslator.ReadTranslator());
     registerTransformTranslator(
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
deleted file mode 100644
index b114021c4d470..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AvroIOTranslator.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.dataflow;
-
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.io.AvroIO;
-import com.google.cloud.dataflow.sdk.io.ShardNameTemplate;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
-import com.google.cloud.dataflow.sdk.util.PathValidator;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.common.base.Preconditions;
-
-/**
- * Avro transform support code for the Dataflow backend.
- */
-public class AvroIOTranslator {
-
-  /**
-   * Implements AvroIO Write translation for the Dataflow backend.
-   */
-  @SuppressWarnings("rawtypes")
-  public static class WriteTranslator implements TransformTranslator<AvroIO.Write.Bound> {
-
-    @Override
-    public void translate(
-        AvroIO.Write.Bound transform,
-        TranslationContext context) {
-      translateWriteHelper(transform, context);
-    }
-
-    private <T> void translateWriteHelper(
-        AvroIO.Write.Bound<T> transform,
-        TranslationContext context) {
-      PathValidator validator = context.getPipelineOptions().getPathValidator();
-      String filenamePrefix = validator.validateOutputFilePrefixSupported(
-          transform.getFilenamePrefix());
-      context.addStep(transform, "ParallelWrite");
-      context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
-
-      // TODO: drop this check when server supports alternative templates.
-      switch (transform.getShardTemplate()) {
-        case ShardNameTemplate.INDEX_OF_MAX:
-          break;  // supported by server
-        case "":
-          // Empty shard template allowed - forces single output.
-          Preconditions.checkArgument(transform.getNumShards() <= 1,
-              "Num shards must be <= 1 when using an empty sharding template");
-          break;
-        default:
-          throw new UnsupportedOperationException("Shard template "
-              + transform.getShardTemplate()
-              + " not yet supported by Dataflow service");
-      }
-
-      context.addInput(PropertyNames.FORMAT, "avro");
-      context.addInput(PropertyNames.FILENAME_PREFIX, filenamePrefix);
-      context.addInput(PropertyNames.SHARD_NAME_TEMPLATE, transform.getShardTemplate());
-      context.addInput(PropertyNames.FILENAME_SUFFIX, transform.getFilenameSuffix());
-      context.addInput(PropertyNames.VALIDATE_SINK, transform.needsValidation());
-
-      long numShards = transform.getNumShards();
-      if (numShards > 0) {
-        context.addInput(PropertyNames.NUM_SHARDS, numShards);
-      }
-
-      context.addEncodingInput(
-          WindowedValue.getValueOnlyCoder(
-              AvroCoder.of(transform.getType(), transform.getSchema())));
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOGeneratedClassTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOGeneratedClassTest.java
index 6bb459de7a08c..6a7679f5edec3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOGeneratedClassTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOGeneratedClassTest.java
@@ -146,104 +146,110 @@ <T> void runTestRead(AvroIO.Read.Bound<T> read, String expectedName, T[] expecte
 
   @Test
   public void testReadFromGeneratedClass() throws Exception {
-    runTestRead(AvroIO.Read.from(avroFile.getPath())
-                           .withSchema(AvroGeneratedUser.class),
-                "AvroIO.Read/Read(AvroSource).out", generateAvroObjects());
-    runTestRead(AvroIO.Read.withSchema(AvroGeneratedUser.class)
-                           .from(avroFile.getPath()),
-                "AvroIO.Read/Read(AvroSource).out", generateAvroObjects());
-    runTestRead(AvroIO.Read.named("MyRead")
-                           .from(avroFile.getPath())
-                           .withSchema(AvroGeneratedUser.class),
-                "MyRead/Read(AvroSource).out", generateAvroObjects());
-    runTestRead(AvroIO.Read.named("MyRead")
-                           .withSchema(AvroGeneratedUser.class)
-                           .from(avroFile.getPath()),
-                "MyRead/Read(AvroSource).out", generateAvroObjects());
-    runTestRead(AvroIO.Read.from(avroFile.getPath())
-                           .withSchema(AvroGeneratedUser.class)
-                           .named("HerRead"),
-                "HerRead/Read(AvroSource).out", generateAvroObjects());
-    runTestRead(AvroIO.Read.from(avroFile.getPath())
-                           .named("HerRead")
-                           .withSchema(AvroGeneratedUser.class),
-                "HerRead/Read(AvroSource).out", generateAvroObjects());
-    runTestRead(AvroIO.Read.withSchema(AvroGeneratedUser.class)
-                           .named("HerRead")
-                           .from(avroFile.getPath()),
-                "HerRead/Read(AvroSource).out", generateAvroObjects());
-    runTestRead(AvroIO.Read.withSchema(AvroGeneratedUser.class)
-                           .from(avroFile.getPath())
-                           .named("HerRead"),
-                "HerRead/Read(AvroSource).out", generateAvroObjects());
+    runTestRead(
+        AvroIO.Read.from(avroFile.getPath()).withSchema(AvroGeneratedUser.class),
+        "AvroIO.Read/Read.out",
+        generateAvroObjects());
+    runTestRead(
+        AvroIO.Read.withSchema(AvroGeneratedUser.class).from(avroFile.getPath()),
+        "AvroIO.Read/Read.out",
+        generateAvroObjects());
+    runTestRead(
+        AvroIO.Read.named("MyRead").from(avroFile.getPath()).withSchema(AvroGeneratedUser.class),
+        "MyRead/Read.out",
+        generateAvroObjects());
+    runTestRead(
+        AvroIO.Read.named("MyRead").withSchema(AvroGeneratedUser.class).from(avroFile.getPath()),
+        "MyRead/Read.out",
+        generateAvroObjects());
+    runTestRead(
+        AvroIO.Read.from(avroFile.getPath()).withSchema(AvroGeneratedUser.class).named("HerRead"),
+        "HerRead/Read.out",
+        generateAvroObjects());
+    runTestRead(
+        AvroIO.Read.from(avroFile.getPath()).named("HerRead").withSchema(AvroGeneratedUser.class),
+        "HerRead/Read.out",
+        generateAvroObjects());
+    runTestRead(
+        AvroIO.Read.withSchema(AvroGeneratedUser.class).named("HerRead").from(avroFile.getPath()),
+        "HerRead/Read.out",
+        generateAvroObjects());
+    runTestRead(
+        AvroIO.Read.withSchema(AvroGeneratedUser.class).from(avroFile.getPath()).named("HerRead"),
+        "HerRead/Read.out",
+        generateAvroObjects());
   }
 
   @Test
   public void testReadFromSchema() throws Exception {
-    runTestRead(AvroIO.Read.from(avroFile.getPath())
-                           .withSchema(schema),
-                "AvroIO.Read/Read(AvroSource).out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.withSchema(schema)
-                           .from(avroFile.getPath()),
-                "AvroIO.Read/Read(AvroSource).out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.named("MyRead")
-                           .from(avroFile.getPath())
-                           .withSchema(schema),
-                "MyRead/Read(AvroSource).out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.named("MyRead")
-                           .withSchema(schema)
-                           .from(avroFile.getPath()),
-                "MyRead/Read(AvroSource).out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.from(avroFile.getPath())
-                           .withSchema(schema)
-                           .named("HerRead"),
-                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.from(avroFile.getPath())
-                           .named("HerRead")
-                           .withSchema(schema),
-                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.withSchema(schema)
-                           .named("HerRead")
-                           .from(avroFile.getPath()),
-                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.withSchema(schema)
-                           .from(avroFile.getPath())
-                           .named("HerRead"),
-                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
+    runTestRead(
+        AvroIO.Read.from(avroFile.getPath()).withSchema(schema),
+        "AvroIO.Read/Read.out",
+        generateAvroGenericRecords());
+    runTestRead(
+        AvroIO.Read.withSchema(schema).from(avroFile.getPath()),
+        "AvroIO.Read/Read.out",
+        generateAvroGenericRecords());
+    runTestRead(
+        AvroIO.Read.named("MyRead").from(avroFile.getPath()).withSchema(schema),
+        "MyRead/Read.out",
+        generateAvroGenericRecords());
+    runTestRead(
+        AvroIO.Read.named("MyRead").withSchema(schema).from(avroFile.getPath()),
+        "MyRead/Read.out",
+        generateAvroGenericRecords());
+    runTestRead(
+        AvroIO.Read.from(avroFile.getPath()).withSchema(schema).named("HerRead"),
+        "HerRead/Read.out",
+        generateAvroGenericRecords());
+    runTestRead(
+        AvroIO.Read.from(avroFile.getPath()).named("HerRead").withSchema(schema),
+        "HerRead/Read.out",
+        generateAvroGenericRecords());
+    runTestRead(
+        AvroIO.Read.withSchema(schema).named("HerRead").from(avroFile.getPath()),
+        "HerRead/Read.out",
+        generateAvroGenericRecords());
+    runTestRead(
+        AvroIO.Read.withSchema(schema).from(avroFile.getPath()).named("HerRead"),
+        "HerRead/Read.out",
+        generateAvroGenericRecords());
   }
 
   @Test
   public void testReadFromSchemaString() throws Exception {
-    runTestRead(AvroIO.Read.from(avroFile.getPath())
-                           .withSchema(schemaString),
-                "AvroIO.Read/Read(AvroSource).out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.withSchema(schemaString)
-                           .from(avroFile.getPath()),
-                "AvroIO.Read/Read(AvroSource).out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.named("MyRead")
-                           .from(avroFile.getPath())
-                           .withSchema(schemaString),
-                "MyRead/Read(AvroSource).out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.named("MyRead")
-                           .withSchema(schemaString)
-                           .from(avroFile.getPath()),
-                "MyRead/Read(AvroSource).out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.from(avroFile.getPath())
-                           .withSchema(schemaString)
-                           .named("HerRead"),
-                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.from(avroFile.getPath())
-                           .named("HerRead")
-                           .withSchema(schemaString),
-                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.withSchema(schemaString)
-                           .named("HerRead")
-                           .from(avroFile.getPath()),
-                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
-    runTestRead(AvroIO.Read.withSchema(schemaString)
-                           .from(avroFile.getPath())
-                           .named("HerRead"),
-                "HerRead/Read(AvroSource).out", generateAvroGenericRecords());
+    runTestRead(
+        AvroIO.Read.from(avroFile.getPath()).withSchema(schemaString),
+        "AvroIO.Read/Read.out",
+        generateAvroGenericRecords());
+    runTestRead(
+        AvroIO.Read.withSchema(schemaString).from(avroFile.getPath()),
+        "AvroIO.Read/Read.out",
+        generateAvroGenericRecords());
+    runTestRead(
+        AvroIO.Read.named("MyRead").from(avroFile.getPath()).withSchema(schemaString),
+        "MyRead/Read.out",
+        generateAvroGenericRecords());
+    runTestRead(
+        AvroIO.Read.named("MyRead").withSchema(schemaString).from(avroFile.getPath()),
+        "MyRead/Read.out",
+        generateAvroGenericRecords());
+    runTestRead(
+        AvroIO.Read.from(avroFile.getPath()).withSchema(schemaString).named("HerRead"),
+        "HerRead/Read.out",
+        generateAvroGenericRecords());
+    runTestRead(
+        AvroIO.Read.from(avroFile.getPath()).named("HerRead").withSchema(schemaString),
+        "HerRead/Read.out",
+        generateAvroGenericRecords());
+    runTestRead(
+        AvroIO.Read.withSchema(schemaString).named("HerRead").from(avroFile.getPath()),
+        "HerRead/Read.out",
+        generateAvroGenericRecords());
+    runTestRead(
+        AvroIO.Read.withSchema(schemaString).from(avroFile.getPath()).named("HerRead"),
+        "HerRead/Read.out",
+        generateAvroGenericRecords());
   }
 
   <T> void runTestWrite(AvroIO.Write.Bound<T> write, String expectedName)
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
index 30c578bbda886..2258a9136f248 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/AvroIOTest.java
@@ -16,19 +16,25 @@
 
 package com.google.cloud.dataflow.sdk.io;
 
+import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
 import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
 import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.common.base.MoreObjects;
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterators;
 
+import org.apache.avro.file.DataFileReader;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.reflect.Nullable;
 import org.junit.Rule;
@@ -38,6 +44,7 @@
 import org.junit.runners.JUnit4;
 
 import java.io.File;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Objects;
 
@@ -104,7 +111,7 @@ public boolean equals(Object other) {
   }
 
   @Test
-  public void testAvroIOWriteAndRead() throws Throwable {
+  public void testAvroIOWriteAndReadASingleFile() throws Throwable {
     DirectPipeline p = DirectPipeline.createForTest();
     List<GenericClass> values = ImmutableList.of(new GenericClass(3, "hi"),
         new GenericClass(5, "bar"));
@@ -189,6 +196,31 @@ public void testAvroIOWriteAndReadSchemaUpgrade() throws Throwable {
     p.run();
   }
 
+  @SuppressWarnings("deprecation") // using AvroCoder#createDatumReader for tests.
+  @Test
+  public void testAvroSinkWrite() throws Exception {
+    String outputFilePrefix = new File(tmpFolder.getRoot(), "prefix").getAbsolutePath();
+    String[] expectedElements = new String[] {"first", "second", "third"};
+
+    TestPipeline p = TestPipeline.create();
+    p.apply(Create.<String>of(expectedElements))
+        .apply(AvroIO.Write.to(outputFilePrefix).withSchema(String.class));
+    p.run();
+
+    // Validate that the data written matches the expected elements in the expected order
+    String expectedName =
+        IOChannelUtils.constructName(
+            outputFilePrefix, ShardNameTemplate.INDEX_OF_MAX, "" /* no suffix */, 0, 1);
+    File outputFile = new File(expectedName);
+    assertTrue("Expected output file " + expectedName, outputFile.exists());
+    try (DataFileReader<String> reader =
+            new DataFileReader<>(outputFile, AvroCoder.of(String.class).createDatumReader())) {
+      List<String> actualElements = new ArrayList<>();
+      Iterators.addAll(actualElements, reader);
+      assertThat(actualElements, containsInAnyOrder(expectedElements));
+    }
+  }
+
   // TODO: for Write only, test withSuffix, withNumShards,
   // withShardNameTemplate and withoutSharding.
 }

From 96b02f4737457d158db60ae467e7098839c383ab Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Tue, 23 Feb 2016 17:52:25 -0800
Subject: [PATCH 1516/1541] Add GroupByKey InProcess override

This takes a GroupByKey primitive and implements it as a sequence of
composite transforms, including a simpler GroupByKeyOnly primitive.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115404181
---
 .../inprocess/GroupByKeyEvaluatorFactory.java | 128 +++++++++++++++---
 .../inprocess/InProcessPipelineRunner.java    |   9 ++
 .../dataflow/sdk/transforms/GroupByKey.java   |   2 +-
 .../GroupByKeyEvaluatorFactoryTest.java       | 114 ++++++++++------
 4 files changed, 190 insertions(+), 63 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactory.java
index 44d6909274224..0347281749cb1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactory.java
@@ -19,18 +19,29 @@
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.IterableCoder;
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.StepTransformResult.Builder;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey.ReifyTimestampsAndWindows;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowViaWindowSetDoFn;
+import com.google.cloud.dataflow.sdk.util.KeyedWorkItem;
+import com.google.cloud.dataflow.sdk.util.KeyedWorkItemCoder;
+import com.google.cloud.dataflow.sdk.util.KeyedWorkItems;
+import com.google.cloud.dataflow.sdk.util.SystemReduceFn;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
+import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.annotations.VisibleForTesting;
 
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -53,52 +64,56 @@ public <InputT> TransformEvaluator<InputT> forApplication(
         (AppliedPTransform) application, (CommittedBundle) inputBundle, evaluationContext);
   }
 
-  private <K, V> TransformEvaluator<KV<K, V>> createEvaluator(
+  private <K, V> TransformEvaluator<KV<K, WindowedValue<V>>> createEvaluator(
       final AppliedPTransform<
-              PCollection<KV<K, V>>, PCollection<KV<K, Iterable<V>>>, GroupByKeyOnly<K, V>>
+              PCollection<KV<K, WindowedValue<V>>>, PCollection<KeyedWorkItem<K, V>>,
+              InProcessGroupByKeyOnly<K, V>>
           application,
       final CommittedBundle<KV<K, V>> inputBundle,
       final InProcessEvaluationContext evaluationContext) {
     return new GroupByKeyEvaluator<K, V>(evaluationContext, inputBundle, application);
   }
 
-  private static class GroupByKeyEvaluator<K, V> implements TransformEvaluator<KV<K, V>> {
+  private static class GroupByKeyEvaluator<K, V>
+      implements TransformEvaluator<KV<K, WindowedValue<V>>> {
     private final InProcessEvaluationContext evaluationContext;
 
     private final CommittedBundle<KV<K, V>> inputBundle;
     private final AppliedPTransform<
-            PCollection<KV<K, V>>, PCollection<KV<K, Iterable<V>>>, GroupByKeyOnly<K, V>>
+            PCollection<KV<K, WindowedValue<V>>>, PCollection<KeyedWorkItem<K, V>>,
+            InProcessGroupByKeyOnly<K, V>>
         application;
     private final Coder<K> keyCoder;
-    private Map<GroupingKey<K>, List<V>> groupingMap;
+    private Map<GroupingKey<K>, List<WindowedValue<V>>> groupingMap;
 
     public GroupByKeyEvaluator(
         InProcessEvaluationContext evaluationContext,
         CommittedBundle<KV<K, V>> inputBundle,
         AppliedPTransform<
-                PCollection<KV<K, V>>, PCollection<KV<K, Iterable<V>>>, GroupByKeyOnly<K, V>>
+                PCollection<KV<K, WindowedValue<V>>>, PCollection<KeyedWorkItem<K, V>>,
+                InProcessGroupByKeyOnly<K, V>>
             application) {
       this.evaluationContext = evaluationContext;
       this.inputBundle = inputBundle;
       this.application = application;
 
-      PCollection<KV<K, V>> input = application.getInput();
+      PCollection<KV<K, WindowedValue<V>>> input = application.getInput();
       keyCoder = getKeyCoder(input.getCoder());
       groupingMap = new HashMap<>();
     }
 
-    private Coder<K> getKeyCoder(Coder<KV<K, V>> coder) {
+    private Coder<K> getKeyCoder(Coder<KV<K, WindowedValue<V>>> coder) {
       if (!(coder instanceof KvCoder)) {
         throw new IllegalStateException();
       }
       @SuppressWarnings("unchecked")
-      Coder<K> keyCoder = ((KvCoder<K, V>) coder).getKeyCoder();
+      Coder<K> keyCoder = ((KvCoder<K, WindowedValue<V>>) coder).getKeyCoder();
       return keyCoder;
     }
 
     @Override
-    public void processElement(WindowedValue<KV<K, V>> element) {
-      KV<K, V> kv = element.getValue();
+    public void processElement(WindowedValue<KV<K, WindowedValue<V>>> element) {
+      KV<K, WindowedValue<V>> kv = element.getValue();
       K key = kv.getKey();
       byte[] encodedKey;
       try {
@@ -111,20 +126,23 @@ public void processElement(WindowedValue<KV<K, V>> element) {
             exn);
       }
       GroupingKey<K> groupingKey = new GroupingKey<>(key, encodedKey);
-      if (!groupingMap.containsKey(groupingKey)) {
-        groupingMap.put(groupingKey, new ArrayList<V>());
+      List<WindowedValue<V>> values = groupingMap.get(groupingKey);
+      if (values == null) {
+        values = new ArrayList<WindowedValue<V>>();
+        groupingMap.put(groupingKey, values);
       }
-      List<V> values = groupingMap.get(groupingKey);
       values.add(kv.getValue());
     }
 
     @Override
     public InProcessTransformResult finishBundle() {
       Builder resultBuilder = StepTransformResult.withoutHold(application);
-      for (Map.Entry<GroupingKey<K>, List<V>> groupedEntry : groupingMap.entrySet()) {
+      for (Map.Entry<GroupingKey<K>, List<WindowedValue<V>>> groupedEntry :
+          groupingMap.entrySet()) {
         K key = groupedEntry.getKey().key;
-        KV<K, Iterable<V>> groupedKv = KV.<K, Iterable<V>>of(key, groupedEntry.getValue());
-        UncommittedBundle<KV<K, Iterable<V>>> bundle =
+        KeyedWorkItem<K, V> groupedKv =
+            KeyedWorkItems.elementsWorkItem(key, groupedEntry.getValue());
+        UncommittedBundle<KeyedWorkItem<K, V>> bundle =
             evaluationContext.createKeyedBundle(inputBundle, key, application.getOutput());
         bundle.add(WindowedValue.valueInEmptyWindows(groupedKv));
         resultBuilder.addOutput(bundle);
@@ -157,4 +175,78 @@ public int hashCode() {
       }
     }
   }
+
+  /**
+   * An in-memory implementation of the {@link GroupByKey} primitive as a composite
+   * {@link PTransform}.
+   */
+  public static final class InProcessGroupByKey<K, V>
+      extends ForwardingPTransform<PCollection<KV<K, V>>, PCollection<KV<K, Iterable<V>>>> {
+    private final GroupByKey<K, V> original;
+
+    public InProcessGroupByKey(GroupByKey<K, V> from) {
+      this.original = from;
+    }
+
+    @Override
+    public PTransform<PCollection<KV<K, V>>, PCollection<KV<K, Iterable<V>>>> delegate() {
+      return original;
+    }
+
+    @Override
+    public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
+      KvCoder<K, V> inputCoder = (KvCoder<K, V>) input.getCoder();
+
+      // This operation groups by the combination of key and window,
+      // merging windows as needed, using the windows assigned to the
+      // key/value input elements and the window merge operation of the
+      // window function associated with the input PCollection.
+      WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
+
+      // Use the default GroupAlsoByWindow implementation
+      DoFn<KeyedWorkItem<K, V>, KV<K, Iterable<V>>> groupAlsoByWindow =
+          groupAlsoByWindow(windowingStrategy, inputCoder.getValueCoder());
+
+      // By default, implement GroupByKey via a series of lower-level operations.
+      return input
+          // Make each input element's timestamp and assigned windows
+          // explicit, in the value part.
+          .apply(new ReifyTimestampsAndWindows<K, V>())
+
+          .apply(new InProcessGroupByKeyOnly<K, V>())
+          .setCoder(KeyedWorkItemCoder.of(inputCoder.getKeyCoder(),
+              inputCoder.getValueCoder(), input.getWindowingStrategy().getWindowFn().windowCoder()))
+
+          // Group each key's values by window, merging windows as needed.
+          .apply("GroupAlsoByWindow", ParDo.of(groupAlsoByWindow))
+
+          // And update the windowing strategy as appropriate.
+          .setWindowingStrategyInternal(original.updateWindowingStrategy(windowingStrategy))
+          .setCoder(
+              KvCoder.of(inputCoder.getKeyCoder(), IterableCoder.of(inputCoder.getValueCoder())));
+    }
+
+    private <W extends BoundedWindow>
+        DoFn<KeyedWorkItem<K, V>, KV<K, Iterable<V>>> groupAlsoByWindow(
+            final WindowingStrategy<?, W> windowingStrategy, final Coder<V> inputCoder) {
+      return GroupAlsoByWindowViaWindowSetDoFn.create(
+          windowingStrategy, SystemReduceFn.<K, V, W>buffering(inputCoder));
+    }
+  }
+
+  /**
+   * An implementation primitive to use in the evaluation of a {@link GroupByKey}
+   * {@link PTransform}.
+   */
+  public static final class InProcessGroupByKeyOnly<K, V>
+      extends PTransform<PCollection<KV<K, WindowedValue<V>>>, PCollection<KeyedWorkItem<K, V>>> {
+    @Override
+    public PCollection<KeyedWorkItem<K, V>> apply(PCollection<KV<K, WindowedValue<V>>> input) {
+      return PCollection.<KeyedWorkItem<K, V>>createPrimitiveOutputInternal(
+          input.getPipeline(), input.getWindowingStrategy(), input.isBounded());
+    }
+
+    @VisibleForTesting
+    InProcessGroupByKeyOnly() {}
+  }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java
index 7747839d0b9b2..72642da122aea 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java
@@ -19,6 +19,7 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
+import com.google.cloud.dataflow.sdk.runners.inprocess.GroupByKeyEvaluatorFactory.InProcessGroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
@@ -38,6 +39,7 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.PValue;
+import com.google.common.collect.ImmutableMap;
 
 import org.joda.time.Instant;
 
@@ -53,6 +55,13 @@
  */
 @Experimental
 public class InProcessPipelineRunner {
+  @SuppressWarnings({"rawtypes", "unused"})
+  private static Map<Class<? extends PTransform>, Class<? extends PTransform>>
+      defaultTransformOverrides =
+          ImmutableMap.<Class<? extends PTransform>, Class<? extends PTransform>>builder()
+              .put(GroupByKey.class, InProcessGroupByKey.class)
+              .build();
+
   private static Map<Class<?>, TransformEvaluatorFactory> defaultEvaluatorFactories =
       new ConcurrentHashMap<>();
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
index dee7d956ef6ef..8fde3e0869742 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
@@ -296,7 +296,7 @@ static <K, V> Coder<K> getKeyCoder(Coder<KV<K, V>> inputCoder) {
   /**
    * Returns the {@code Coder} of the values of the input to this transform.
    */
-  static <K, V> Coder<V> getInputValueCoder(Coder<KV<K, V>> inputCoder) {
+  public static <K, V> Coder<V> getInputValueCoder(Coder<KV<K, V>> inputCoder) {
     return getInputKvCoder(inputCoder).getValueCoder();
   }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactoryTest.java
index d8cb237b9f035..bb8a15dc8bd2a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactoryTest.java
@@ -28,8 +28,9 @@
 import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+import com.google.cloud.dataflow.sdk.util.KeyedWorkItem;
+import com.google.cloud.dataflow.sdk.util.KeyedWorkItems;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -58,20 +59,22 @@ public void testInMemoryEvaluator() throws Exception {
     KV<String, Integer> firstBar = KV.of("bar", 22);
     KV<String, Integer> secondBar = KV.of("bar", 12);
     KV<String, Integer> firstBaz = KV.of("baz", Integer.MAX_VALUE);
-    PCollection<KV<String, Integer>> kvs =
+    PCollection<KV<String, Integer>> values =
         p.apply(Create.of(firstFoo, firstBar, secondFoo, firstBaz, secondBar, thirdFoo));
-    PCollection<KV<String, Iterable<Integer>>> groupedKvs =
-        kvs.apply(new GroupByKeyOnly<String, Integer>());
+    PCollection<KV<String, WindowedValue<Integer>>> kvs =
+        values.apply(new GroupByKey.ReifyTimestampsAndWindows<String, Integer>());
+    PCollection<KeyedWorkItem<String, Integer>> groupedKvs =
+        kvs.apply(new GroupByKeyEvaluatorFactory.InProcessGroupByKeyOnly<String, Integer>());
 
-    CommittedBundle<KV<String, Integer>> inputBundle =
-        InProcessBundle.unkeyed(kvs).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
+    CommittedBundle<KV<String, WindowedValue<Integer>>> inputBundle =
+        InProcessBundle.unkeyed(kvs).commit(Instant.now());
     InProcessEvaluationContext evaluationContext = mock(InProcessEvaluationContext.class);
 
-    UncommittedBundle<KV<String, Iterable<Integer>>> fooBundle =
+    UncommittedBundle<KeyedWorkItem<String, Integer>> fooBundle =
         InProcessBundle.keyed(groupedKvs, "foo");
-    UncommittedBundle<KV<String, Iterable<Integer>>> barBundle =
+    UncommittedBundle<KeyedWorkItem<String, Integer>> barBundle =
         InProcessBundle.keyed(groupedKvs, "bar");
-    UncommittedBundle<KV<String, Iterable<Integer>>> bazBundle =
+    UncommittedBundle<KeyedWorkItem<String, Integer>> bazBundle =
         InProcessBundle.keyed(groupedKvs, "baz");
 
     when(evaluationContext.createKeyedBundle(inputBundle, "foo", groupedKvs)).thenReturn(fooBundle);
@@ -80,41 +83,64 @@ public void testInMemoryEvaluator() throws Exception {
 
     // The input to a GroupByKey is assumed to be a KvCoder
     @SuppressWarnings("unchecked")
-    Coder<String> keyCoder = ((KvCoder<String, Integer>) kvs.getCoder()).getKeyCoder();
-    TransformEvaluator<KV<String, Integer>> evaluator =
-        new GroupByKeyEvaluatorFactory().forApplication(
-            groupedKvs.getProducingTransformInternal(), inputBundle, evaluationContext);
-
-    evaluator.processElement(WindowedValue.valueInGlobalWindow(firstFoo));
-    evaluator.processElement(WindowedValue.valueInGlobalWindow(secondFoo));
-    evaluator.processElement(WindowedValue.valueInGlobalWindow(thirdFoo));
-    evaluator.processElement(WindowedValue.valueInGlobalWindow(firstBar));
-    evaluator.processElement(WindowedValue.valueInGlobalWindow(secondBar));
-    evaluator.processElement(WindowedValue.valueInGlobalWindow(firstBaz));
+    Coder<String> keyCoder =
+        ((KvCoder<String, WindowedValue<Integer>>) kvs.getCoder()).getKeyCoder();
+    TransformEvaluator<KV<String, WindowedValue<Integer>>> evaluator =
+        new GroupByKeyEvaluatorFactory()
+            .forApplication(
+                groupedKvs.getProducingTransformInternal(), inputBundle, evaluationContext);
+
+    evaluator.processElement(WindowedValue.valueInEmptyWindows(gwValue(firstFoo)));
+    evaluator.processElement(WindowedValue.valueInEmptyWindows(gwValue(secondFoo)));
+    evaluator.processElement(WindowedValue.valueInEmptyWindows(gwValue(thirdFoo)));
+    evaluator.processElement(WindowedValue.valueInEmptyWindows(gwValue(firstBar)));
+    evaluator.processElement(WindowedValue.valueInEmptyWindows(gwValue(secondBar)));
+    evaluator.processElement(WindowedValue.valueInEmptyWindows(gwValue(firstBaz)));
 
     evaluator.finishBundle();
 
     assertThat(
         fooBundle.commit(Instant.now()).getElements(),
-        contains(new KIterVMatcher<String, Integer>(
-            KV.<String, Iterable<Integer>>of("foo", ImmutableSet.of(-1, 1, 3)), keyCoder)));
+        contains(
+            new KeyedWorkItemMatcher<String, Integer>(
+                KeyedWorkItems.elementsWorkItem(
+                    "foo",
+                    ImmutableSet.of(
+                        WindowedValue.valueInGlobalWindow(-1),
+                        WindowedValue.valueInGlobalWindow(1),
+                        WindowedValue.valueInGlobalWindow(3))),
+                keyCoder)));
     assertThat(
         barBundle.commit(Instant.now()).getElements(),
-        contains(new KIterVMatcher<String, Integer>(
-            KV.<String, Iterable<Integer>>of("bar", ImmutableSet.of(12, 22)), keyCoder)));
+        contains(
+            new KeyedWorkItemMatcher<String, Integer>(
+                KeyedWorkItems.elementsWorkItem(
+                    "bar",
+                    ImmutableSet.of(
+                        WindowedValue.valueInGlobalWindow(12),
+                        WindowedValue.valueInGlobalWindow(22))),
+                keyCoder)));
     assertThat(
         bazBundle.commit(Instant.now()).getElements(),
-        contains(new KIterVMatcher<String, Integer>(
-            KV.<String, Iterable<Integer>>of("baz", ImmutableSet.of(Integer.MAX_VALUE)),
-            keyCoder)));
+        contains(
+            new KeyedWorkItemMatcher<String, Integer>(
+                KeyedWorkItems.elementsWorkItem(
+                    "baz",
+                    ImmutableSet.of(WindowedValue.valueInGlobalWindow(Integer.MAX_VALUE))),
+                keyCoder)));
   }
 
-  private static class KIterVMatcher<K, V> extends BaseMatcher<WindowedValue<KV<K, Iterable<V>>>> {
-    private final KV<K, Iterable<V>> myKv;
+  private <K, V> KV<K, WindowedValue<V>> gwValue(KV<K, V> kv) {
+    return KV.of(kv.getKey(), WindowedValue.valueInGlobalWindow(kv.getValue()));
+  }
+
+  private static class KeyedWorkItemMatcher<K, V>
+      extends BaseMatcher<WindowedValue<KeyedWorkItem<K, V>>> {
+    private final KeyedWorkItem<K, V> myWorkItem;
     private final Coder<K> keyCoder;
 
-    public KIterVMatcher(KV<K, Iterable<V>> myKv, Coder<K> keyCoder) {
-      this.myKv = myKv;
+    public KeyedWorkItemMatcher(KeyedWorkItem<K, V> myWorkItem, Coder<K> keyCoder) {
+      this.myWorkItem = myWorkItem;
       this.keyCoder = keyCoder;
     }
 
@@ -123,20 +149,20 @@ public boolean matches(Object item) {
       if (item == null || !(item instanceof WindowedValue)) {
         return false;
       }
-      @SuppressWarnings("unchecked")
-      WindowedValue<KV<K, Iterable<V>>> that = (WindowedValue<KV<K, Iterable<V>>>) item;
-      Multiset<V> myValues = HashMultiset.create();
-      Multiset<V> thatValues = HashMultiset.create();
-      for (V value : myKv.getValue()) {
+      WindowedValue<KeyedWorkItem<K, V>> that = (WindowedValue<KeyedWorkItem<K, V>>) item;
+      Multiset<WindowedValue<V>> myValues = HashMultiset.create();
+      Multiset<WindowedValue<V>> thatValues = HashMultiset.create();
+      for (WindowedValue<V> value : myWorkItem.elementsIterable()) {
         myValues.add(value);
       }
-      for (V value : that.getValue().getValue()) {
+      for (WindowedValue<V> value : that.getValue().elementsIterable()) {
         thatValues.add(value);
       }
       try {
         return myValues.equals(thatValues)
-            && keyCoder.structuralValue(myKv.getKey())
-                   .equals(keyCoder.structuralValue(that.getValue().getKey()));
+            && keyCoder
+                .structuralValue(myWorkItem.key())
+                .equals(keyCoder.structuralValue(that.getValue().key()));
       } catch (Exception e) {
         return false;
       }
@@ -144,11 +170,11 @@ public boolean matches(Object item) {
 
     @Override
     public void describeTo(Description description) {
-      description.appendText("KV<K, Iterable<V>> containing key ")
-          .appendValue(myKv.getKey())
+      description
+          .appendText("KeyedWorkItem<K, V> containing key ")
+          .appendValue(myWorkItem.key())
           .appendText(" and values ")
-          .appendValueList("[", ", ", "]", myKv.getValue());
+          .appendValueList("[", ", ", "]", myWorkItem.elementsIterable());
     }
   }
 }
-

From 6926d8ee1d5fa2afece0024e64e18dc06ec857c4 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 23 Feb 2016 18:38:04 -0800
Subject: [PATCH 1517/1541] Proto2Coder: recompute the extension registry when
 mutated

Proto2Coder's deprecated methods mutate the existing coder's
extension host set, but it may already have been memoized.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115407399
---
 .../cloud/dataflow/sdk/coders/Proto2Coder.java       | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
index b107f37f8866f..56ec0c7d7b68f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
@@ -198,6 +198,11 @@ public Proto2Coder<T> addExtensionsFrom(Iterable<Class<?>> extensionHosts) {
       }
       extensionHostClasses.add(extensionHost);
     }
+    // The memoized extension registry needs to be recomputed because we have mutated this object.
+    synchronized (this) {
+      memoizedExtensionRegistry = null;
+      getExtensionRegistry();
+    }
     return this;
   }
 
@@ -293,18 +298,19 @@ private Parser<T> getParser() {
 
   private transient ExtensionRegistry memoizedExtensionRegistry;
 
-  private ExtensionRegistry getExtensionRegistry() {
+  private synchronized ExtensionRegistry getExtensionRegistry() {
     if (memoizedExtensionRegistry == null) {
-      memoizedExtensionRegistry = ExtensionRegistry.newInstance();
+      ExtensionRegistry registry = ExtensionRegistry.newInstance();
       for (Class<?> extensionHost : extensionHostClasses) {
         try {
           extensionHost
               .getDeclaredMethod("registerAllExtensions", ExtensionRegistry.class)
-              .invoke(null, memoizedExtensionRegistry);
+              .invoke(null, registry);
         } catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
           throw new IllegalStateException(e);
         }
       }
+      memoizedExtensionRegistry = registry.getUnmodifiable();
     }
     return memoizedExtensionRegistry;
   }

From 6613031bfed700567bdc2d4eab6b754c4392ce77 Mon Sep 17 00:00:00 2001
From: peihe <peihe@google.com>
Date: Tue, 23 Feb 2016 18:49:00 -0800
Subject: [PATCH 1518/1541] Support CombineFnWithContext in GroupAlsoByWindows

This requires plumbing pipeline options and side inputs to State API, including:
1. Adding bindKeyedCombiningValueWithContext() to StateTag.java
2. Adding StateContext to StateInternals.java
3. Plumbing through the remaining files

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115408034
---
 .../sdk/util/CombineContextFactory.java       |  18 +++
 .../dataflow/sdk/util/CombineFnUtil.java      |  97 +++++++++++++
 .../dataflow/sdk/util/DoFnRunnerBase.java     |   5 +
 .../GroupAlsoByWindowViaWindowSetDoFn.java    |   3 +-
 ...GroupAlsoByWindowsViaOutputBufferDoFn.java |   3 +-
 .../sdk/util/ReduceFnContextFactory.java      |  64 ++++----
 .../dataflow/sdk/util/ReduceFnRunner.java     |   6 +-
 .../dataflow/sdk/util/SystemReduceFn.java     |  19 ++-
 .../sdk/util/TriggerContextFactory.java       |   4 +-
 .../dataflow/sdk/util/TriggerRunner.java      |   9 +-
 .../dataflow/sdk/util/WindowingInternals.java |   5 +
 .../CopyOnAccessInMemoryStateInternals.java   |  72 ++++++---
 .../util/state/InMemoryStateInternals.java    |  35 ++++-
 .../dataflow/sdk/util/state/StateContext.java |  41 ++++++
 .../sdk/util/state/StateContexts.java         | 107 ++++++++++++++
 .../sdk/util/state/StateInternals.java        |   7 +
 .../dataflow/sdk/util/state/StateTable.java   |   8 +-
 .../dataflow/sdk/util/state/StateTag.java     |   7 +
 .../dataflow/sdk/util/state/StateTags.java    |  83 ++++++++++-
 .../dataflow/sdk/util/CombineFnUtilTest.java  |  62 ++++++++
 .../dataflow/sdk/util/ReduceFnRunnerTest.java | 137 +++++++++++++++++-
 .../dataflow/sdk/util/ReduceFnTester.java     |  58 +++++++-
 22 files changed, 763 insertions(+), 87 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombineFnUtil.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContext.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContexts.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CombineFnUtilTest.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombineContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombineContextFactory.java
index bf09587367016..6f2b89b84d013 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombineContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombineContextFactory.java
@@ -19,6 +19,7 @@
 import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.Context;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.state.StateContext;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 
 /**
@@ -62,6 +63,23 @@ public <T> T sideInput(PCollectionView<T> view) {
     };
   }
 
+  /**
+   * Returns a {@code Combine.Context} that wraps a {@link StateContext}.
+   */
+  public static Context createFromStateContext(final StateContext<?> c) {
+    return new Context() {
+      @Override
+      public PipelineOptions getPipelineOptions() {
+        return c.getPipelineOptions();
+      }
+
+      @Override
+      public <T> T sideInput(PCollectionView<T> view) {
+        return c.sideInput(view);
+      }
+    };
+  }
+
   /**
    * Returns a {@code Combine.Context} from {@code PipelineOptions}, {@code SideInputReader},
    * and the main input window.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombineFnUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombineFnUtil.java
new file mode 100644
index 0000000000000..6201e6e7bb1de
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombineFnUtil.java
@@ -0,0 +1,97 @@
+
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.Context;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
+import com.google.cloud.dataflow.sdk.util.state.StateContext;
+
+import java.io.IOException;
+import java.io.NotSerializableException;
+import java.io.ObjectOutputStream;
+
+/**
+ * Static utility methods that create combine function instances.
+ */
+public class CombineFnUtil {
+  /**
+   * Returns the partial application of the {@link KeyedCombineFnWithContext} to a specific
+   * context to produce a {@link KeyedCombineFn}.
+   *
+   * <p>The returned {@link KeyedCombineFn} cannot be serialized.
+   */
+  public static <K, InputT, AccumT, OutputT> KeyedCombineFn<K, InputT, AccumT, OutputT>
+  bindContext(
+      KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> combineFn,
+      StateContext<?> stateContext) {
+    Context context = CombineContextFactory.createFromStateContext(stateContext);
+    return new NonSerializableBoundedKeyedCombineFn<>(combineFn, context);
+  }
+
+  private static class NonSerializableBoundedKeyedCombineFn<K, InputT, AccumT, OutputT>
+      extends KeyedCombineFn<K, InputT, AccumT, OutputT> {
+    private final KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> combineFn;
+    private final Context context;
+
+    private NonSerializableBoundedKeyedCombineFn(
+        KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> combineFn,
+        Context context) {
+      this.combineFn = combineFn;
+      this.context = context;
+    }
+    @Override
+    public AccumT createAccumulator(K key) {
+      return combineFn.createAccumulator(key, context);
+    }
+    @Override
+    public AccumT addInput(K key, AccumT accumulator, InputT value) {
+      return combineFn.addInput(key, accumulator, value, context);
+    }
+    @Override
+    public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators) {
+      return combineFn.mergeAccumulators(key, accumulators, context);
+    }
+    @Override
+    public OutputT extractOutput(K key, AccumT accumulator) {
+      return combineFn.extractOutput(key, accumulator, context);
+    }
+    @Override
+    public AccumT compact(K key, AccumT accumulator) {
+      return combineFn.compact(key, accumulator, context);
+    }
+    @Override
+    public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<K> keyCoder,
+        Coder<InputT> inputCoder) throws CannotProvideCoderException {
+      return combineFn.getAccumulatorCoder(registry, keyCoder, inputCoder);
+    }
+    @Override
+    public Coder<OutputT> getDefaultOutputCoder(CoderRegistry registry, Coder<K> keyCoder,
+        Coder<InputT> inputCoder) throws CannotProvideCoderException {
+      return combineFn.getDefaultOutputCoder(registry, keyCoder, inputCoder);
+    }
+
+    private void writeObject(@SuppressWarnings("unused") ObjectOutputStream out)
+        throws IOException {
+      throw new NotSerializableException(
+          "Cannot serialize the CombineFn resulting from CombineFnUtil.bindContext.");
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java
index 25ead03bce373..04ec59f57dfad 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java
@@ -540,6 +540,11 @@ public <T> void writePCollectionViewData(
         public StateInternals<?> stateInternals() {
           return context.stepContext.stateInternals();
         }
+
+        @Override
+        public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
+          return context.sideInput(view, mainInputWindow);
+        }
       };
     }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java
index ac2df24fecf2e..f6246d16414d3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java
@@ -77,7 +77,8 @@ public void processElement(ProcessContext c) throws Exception {
             timerInternals,
             c.windowingInternals(),
             droppedDueToClosedWindow,
-            reduceFn);
+            reduceFn,
+            c.getPipelineOptions());
 
     for (TimerData timer : element.timersIterable()) {
       reduceFnRunner.onTimer(timer);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
index 1d1afe38d82e9..d394e81a0edf6 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
@@ -67,7 +67,8 @@ public void processElement(
             timerInternals,
             c.windowingInternals(),
             droppedDueToClosedWindow,
-            reduceFn);
+            reduceFn,
+            c.getPipelineOptions());
 
     Iterable<List<WindowedValue<InputT>>> chunks =
         Iterables.partition(c.element().getValue(), 1000);
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
index b2ab752edf716..bdbaf1098e3a1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
@@ -18,6 +18,7 @@
 import static com.google.common.base.Preconditions.checkNotNull;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
@@ -25,6 +26,8 @@
 import com.google.cloud.dataflow.sdk.util.state.ReadableState;
 import com.google.cloud.dataflow.sdk.util.state.State;
 import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
+import com.google.cloud.dataflow.sdk.util.state.StateContext;
+import com.google.cloud.dataflow.sdk.util.state.StateContexts;
 import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
@@ -51,19 +54,24 @@ public interface OnTriggerCallbacks<OutputT> {
   private final K key;
   private final ReduceFn<K, InputT, OutputT, W> reduceFn;
   private final WindowingStrategy<?, W> windowingStrategy;
-  private StateInternals<K> stateInternals;
-  private ActiveWindowSet<W> activeWindows;
-  private TimerInternals timerInternals;
+  private final StateInternals<K> stateInternals;
+  private final ActiveWindowSet<W> activeWindows;
+  private final TimerInternals timerInternals;
+  private final WindowingInternals<?, ?> windowingInternals;
+  private final PipelineOptions options;
 
   ReduceFnContextFactory(K key, ReduceFn<K, InputT, OutputT, W> reduceFn,
       WindowingStrategy<?, W> windowingStrategy, StateInternals<K> stateInternals,
-      ActiveWindowSet<W> activeWindows, TimerInternals timerInternals) {
+      ActiveWindowSet<W> activeWindows, TimerInternals timerInternals,
+      WindowingInternals<?, ?> windowingInternals, PipelineOptions options) {
     this.key = key;
     this.reduceFn = reduceFn;
     this.windowingStrategy = windowingStrategy;
     this.stateInternals = stateInternals;
     this.activeWindows = activeWindows;
     this.timerInternals = timerInternals;
+    this.windowingInternals = windowingInternals;
+    this.options = options;
   }
 
   /** Where should we look for state associated with a given window? */
@@ -74,24 +82,25 @@ public static enum StateStyle {
     RENAMED
   }
 
-  private StateAccessorImpl<K, W> stateContext(W window, StateStyle style) {
+  private StateAccessorImpl<K, W> stateAccessor(W window, StateStyle style) {
     return new StateAccessorImpl<K, W>(
         activeWindows, windowingStrategy.getWindowFn().windowCoder(),
-        stateInternals, window, style);
+        stateInternals, StateContexts.createFromComponents(options, windowingInternals, window),
+        style);
   }
 
   public ReduceFn<K, InputT, OutputT, W>.Context base(W window, StateStyle style) {
-    return new ContextImpl(stateContext(window, style));
+    return new ContextImpl(stateAccessor(window, style));
   }
 
   public ReduceFn<K, InputT, OutputT, W>.ProcessValueContext forValue(
       W window, InputT value, Instant timestamp, StateStyle style) {
-    return new ProcessValueContextImpl(stateContext(window, style), value, timestamp);
+    return new ProcessValueContextImpl(stateAccessor(window, style), value, timestamp);
   }
 
   public ReduceFn<K, InputT, OutputT, W>.OnTriggerContext forTrigger(W window,
       ReadableState<PaneInfo> pane, StateStyle style, OnTriggerCallbacks<OutputT> callbacks) {
-    return new OnTriggerContextImpl(stateContext(window, style), pane, callbacks);
+    return new OnTriggerContextImpl(stateAccessor(window, style), pane, callbacks);
   }
 
   public ReduceFn<K, InputT, OutputT, W>.OnMergeContext forMerge(
@@ -150,20 +159,20 @@ static class StateAccessorImpl<K, W extends BoundedWindow> implements StateAcces
 
 
     protected final ActiveWindowSet<W> activeWindows;
-    protected final W window;
+    protected final StateContext<W> context;
     protected final StateNamespace windowNamespace;
     protected final Coder<W> windowCoder;
     protected final StateInternals<K> stateInternals;
     protected final StateStyle style;
 
     public StateAccessorImpl(ActiveWindowSet<W> activeWindows, Coder<W> windowCoder,
-        StateInternals<K> stateInternals, W window, StateStyle style) {
+        StateInternals<K> stateInternals, StateContext<W> context, StateStyle style) {
 
       this.activeWindows = activeWindows;
       this.windowCoder = windowCoder;
       this.stateInternals = stateInternals;
-      this.window = checkNotNull(window);
-      this.windowNamespace = namespaceFor(window);
+      this.context = checkNotNull(context);
+      this.windowNamespace = namespaceFor(context.window());
       this.style = style;
     }
 
@@ -176,7 +185,7 @@ protected StateNamespace windowNamespace() {
     }
 
     W window() {
-      return window;
+      return context.window();
     }
 
     StateNamespace namespace() {
@@ -187,10 +196,10 @@ StateNamespace namespace() {
     public <StateT extends State> StateT access(StateTag<? super K, StateT> address) {
       switch (style) {
         case DIRECT:
-          return stateInternals.state(windowNamespace(), address);
+          return stateInternals.state(windowNamespace(), address, context);
         case RENAMED:
           return stateInternals.state(
-              namespaceFor(activeWindows.writeStateAddress(window)), address);
+              namespaceFor(activeWindows.writeStateAddress(context.window())), address, context);
       }
       throw new RuntimeException(); // cases are exhaustive.
     }
@@ -203,7 +212,8 @@ static class MergingStateAccessorImpl<K, W extends BoundedWindow>
     public MergingStateAccessorImpl(ActiveWindowSet<W> activeWindows, Coder<W> windowCoder,
         StateInternals<K> stateInternals, StateStyle style, Collection<W> activeToBeMerged,
         W mergeResult) {
-      super(activeWindows, windowCoder, stateInternals, mergeResult, style);
+      super(activeWindows, windowCoder, stateInternals,
+          StateContexts.windowOnly(mergeResult), style);
       this.activeToBeMerged = activeToBeMerged;
     }
 
@@ -211,11 +221,13 @@ public MergingStateAccessorImpl(ActiveWindowSet<W> activeWindows, Coder<W> windo
     public <StateT extends State> StateT access(StateTag<? super K, StateT> address) {
       switch (style) {
         case DIRECT:
-          return stateInternals.state(windowNamespace(), address);
+          return stateInternals.state(windowNamespace(), address, context);
         case RENAMED:
           return stateInternals.state(
-              namespaceFor(activeWindows.mergedWriteStateAddress(activeToBeMerged, window)),
-              address);
+              namespaceFor(activeWindows.mergedWriteStateAddress(
+                  activeToBeMerged, context.window())),
+              address,
+              context);
       }
       throw new RuntimeException(); // cases are exhaustive.
     }
@@ -235,7 +247,7 @@ public <StateT extends State> Map<W, StateT> accessInEachMergingWindow(
             break;
         }
         Preconditions.checkNotNull(namespace); // cases are exhaustive.
-        builder.put(mergingWindow, stateInternals.state(namespace, address));
+        builder.put(mergingWindow, stateInternals.state(namespace, address, context));
       }
       return builder.build();
     }
@@ -245,19 +257,21 @@ static class PremergingStateAccessorImpl<K, W extends BoundedWindow>
       extends StateAccessorImpl<K, W> implements MergingStateAccessor<K, W> {
     public PremergingStateAccessorImpl(ActiveWindowSet<W> activeWindows, Coder<W> windowCoder,
         StateInternals<K> stateInternals, W window) {
-      super(activeWindows, windowCoder, stateInternals, window, StateStyle.RENAMED);
+      super(activeWindows, windowCoder, stateInternals,
+          StateContexts.windowOnly(window), StateStyle.RENAMED);
     }
 
     Collection<W> mergingWindows() {
-      return activeWindows.readStateAddresses(window);
+      return activeWindows.readStateAddresses(context.window());
     }
 
     @Override
     public <StateT extends State> Map<W, StateT> accessInEachMergingWindow(
         StateTag<? super K, StateT> address) {
       ImmutableMap.Builder<W, StateT> builder = ImmutableMap.builder();
-      for (W stateAddressWindow : activeWindows.readStateAddresses(window)) {
-        StateT stateForWindow = stateInternals.state(namespaceFor(stateAddressWindow), address);
+      for (W stateAddressWindow : activeWindows.readStateAddresses(context.window())) {
+        StateT stateForWindow =
+            stateInternals.state(namespaceFor(stateAddressWindow), address, context);
         builder.put(stateAddressWindow, stateForWindow);
       }
       return builder.build();
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
index ec83688fb4083..fe5c4742103ea 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
@@ -15,6 +15,7 @@
  */
 package com.google.cloud.dataflow.sdk.util;
 
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
@@ -203,7 +204,8 @@ public ReduceFnRunner(
       TimerInternals timerInternals,
       WindowingInternals<?, KV<K, OutputT>> windowingInternals,
       Aggregator<Long, Long> droppedDueToClosedWindow,
-      ReduceFn<K, InputT, OutputT, W> reduceFn) {
+      ReduceFn<K, InputT, OutputT, W> reduceFn,
+      PipelineOptions options) {
     this.key = key;
     this.timerInternals = timerInternals;
     this.paneInfoTracker = new PaneInfoTracker(timerInternals);
@@ -224,7 +226,7 @@ public ReduceFnRunner(
 
     this.contextFactory =
         new ReduceFnContextFactory<K, InputT, OutputT, W>(key, reduceFn, this.windowingStrategy,
-            stateInternals, this.activeWindows, timerInternals);
+            stateInternals, this.activeWindows, timerInternals, windowingInternals, options);
 
     this.watermarkHold = new WatermarkHold<>(timerInternals, windowingStrategy);
     this.triggerRunner =
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
index d5d912672d8c6..16657925e35fa 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
@@ -15,12 +15,11 @@
  */
 package com.google.cloud.dataflow.sdk.util;
 
-import static com.google.common.base.Preconditions.checkArgument;
 
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.RequiresContextInternal;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.state.AccumulatorCombiningState;
@@ -74,15 +73,19 @@ public void onMerge(OnMergeContext c) throws Exception {
       AccumT, OutputT, W>
       combining(
           final Coder<K> keyCoder, final AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
-    checkArgument(
-        !(combineFn.getFn() instanceof RequiresContextInternal),
-        "Combiner lifting is not supported for combine functions with contexts: %s",
-        combineFn.getFn().getClass().getName());
-    final StateTag<K, AccumulatorCombiningState<InputT, AccumT, OutputT>> bufferTag =
-        StateTags.makeSystemTagInternal(
+    final StateTag<K, AccumulatorCombiningState<InputT, AccumT, OutputT>> bufferTag;
+    if (combineFn.getFn() instanceof KeyedCombineFnWithContext) {
+      bufferTag = StateTags.makeSystemTagInternal(
+          StateTags.<K, InputT, AccumT, OutputT>keyedCombiningValueWithContext(
+              BUFFER_NAME, combineFn.getAccumulatorCoder(),
+              (KeyedCombineFnWithContext<K, InputT, AccumT, OutputT>) combineFn.getFn()));
+
+    } else {
+      bufferTag = StateTags.makeSystemTagInternal(
             StateTags.<K, InputT, AccumT, OutputT>keyedCombiningValue(
                 BUFFER_NAME, combineFn.getAccumulatorCoder(),
                 (KeyedCombineFn<K, InputT, AccumT, OutputT>) combineFn.getFn()));
+    }
     return new SystemReduceFn<K, InputT, AccumT, OutputT, W>(bufferTag) {
       @Override
       public void prefetchOnMerge(MergingStateAccessor<K, W> state) throws Exception {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
index 87e8b00f58861..64ff402a9aecd 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
@@ -80,11 +80,11 @@ public Trigger<W>.OnMergeContext createOnMergeContext(W window, Timers timers,
     return new OnMergeContextImpl(window, timers, rootTrigger, finishedSet, finishedSets);
   }
 
-  public StateAccessor<?> createStateContext(W window, ExecutableTrigger<W> trigger) {
+  public StateAccessor<?> createStateAccessor(W window, ExecutableTrigger<W> trigger) {
     return new StateAccessorImpl(window, trigger);
   }
 
-  public MergingStateAccessor<?, W> createMergingStateContext(
+  public MergingStateAccessor<?, W> createMergingStateAccessor(
       W mergeResult, Collection<W> mergingWindows, ExecutableTrigger<W> trigger) {
     return new MergingStateAccessorImpl(trigger, mergingWindows, mergeResult);
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
index 1b78ddc5b16f3..dcfd03516b74f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
@@ -91,14 +91,15 @@ public void prefetchForValue(W window, StateAccessor<?> state) {
     if (isFinishedSetNeeded()) {
       state.access(FINISHED_BITS_TAG).readLater();
     }
-    rootTrigger.getSpec().prefetchOnElement(contextFactory.createStateContext(window, rootTrigger));
+    rootTrigger.getSpec().prefetchOnElement(
+        contextFactory.createStateAccessor(window, rootTrigger));
   }
 
   public void prefetchOnFire(W window, StateAccessor<?> state) {
     if (isFinishedSetNeeded()) {
       state.access(FINISHED_BITS_TAG).readLater();
     }
-    rootTrigger.getSpec().prefetchOnFire(contextFactory.createStateContext(window, rootTrigger));
+    rootTrigger.getSpec().prefetchOnFire(contextFactory.createStateAccessor(window, rootTrigger));
   }
 
   public void prefetchShouldFire(W window, StateAccessor<?> state) {
@@ -106,7 +107,7 @@ public void prefetchShouldFire(W window, StateAccessor<?> state) {
       state.access(FINISHED_BITS_TAG).readLater();
     }
     rootTrigger.getSpec().prefetchShouldFire(
-        contextFactory.createStateContext(window, rootTrigger));
+        contextFactory.createStateAccessor(window, rootTrigger));
   }
 
   /**
@@ -130,7 +131,7 @@ public void prefetchForMerge(
         value.readLater();
       }
     }
-    rootTrigger.getSpec().prefetchOnMerge(contextFactory.createMergingStateContext(
+    rootTrigger.getSpec().prefetchOnMerge(contextFactory.createMergingStateAccessor(
         window, mergingWindows, rootTrigger));
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
index 9ffdbee35fdc8..12fcd532a2752 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
@@ -74,4 +74,9 @@ <T> void writePCollectionViewData(
       TupleTag<?> tag,
       Iterable<WindowedValue<T>> data,
       Coder<T> elemCoder) throws IOException;
+
+  /**
+   * Return the value of the side input for the window of a main input element.
+   */
+  <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java
index 19e45d6f00823..3683b74d9adf3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java
@@ -20,8 +20,10 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
+import com.google.cloud.dataflow.sdk.util.CombineFnUtil;
 import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals.InMemoryState;
 import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
 import com.google.common.base.Optional;
@@ -97,7 +99,13 @@ public Instant getEarliestWatermarkHold() {
 
   @Override
   public <T extends State> T state(StateNamespace namespace, StateTag<? super K, T> address) {
-    return table.get(namespace, address);
+    return state(namespace, address, StateContexts.nullContext());
+  }
+
+  @Override
+  public <T extends State> T state(
+      StateNamespace namespace, StateTag<? super K, T> address, StateContext<?> c) {
+    return table.get(namespace, address, c);
   }
 
   @Override
@@ -220,12 +228,12 @@ private void clearEmpty() {
     }
 
     @Override
-    protected StateBinder<K> binderForNamespace(final StateNamespace namespace) {
-      return binderFactory.forNamespace(namespace);
+    protected StateBinder<K> binderForNamespace(final StateNamespace namespace, StateContext<?> c) {
+      return binderFactory.forNamespace(namespace, c);
     }
 
     private static interface StateBinderFactory<K> {
-      StateBinder<K> forNamespace(StateNamespace namespace);
+      StateBinder<K> forNamespace(StateNamespace namespace, StateContext<?> c);
     }
 
     /**
@@ -246,7 +254,7 @@ private boolean containedInUnderlying(StateNamespace namespace, StateTag<? super
       }
 
       @Override
-      public StateBinder<K> forNamespace(final StateNamespace namespace) {
+      public StateBinder<K> forNamespace(final StateNamespace namespace, final StateContext<?> c) {
         return new StateBinder<K>() {
           @Override
           public <W extends BoundedWindow> WatermarkHoldState<W> bindWatermark(
@@ -256,7 +264,7 @@ public <W extends BoundedWindow> WatermarkHoldState<W> bindWatermark(
               @SuppressWarnings("unchecked")
               InMemoryState<? extends WatermarkHoldState<W>> existingState =
                   (InMemoryStateInternals.InMemoryState<? extends WatermarkHoldState<W>>)
-                  underlying.get().get(namespace, address);
+                  underlying.get().get(namespace, address, c);
               return existingState.copy();
             } else {
               return new InMemoryStateInternals.InMemoryWatermarkHold<>(
@@ -271,7 +279,7 @@ public <T> ValueState<T> bindValue(
               @SuppressWarnings("unchecked")
               InMemoryState<? extends ValueState<T>> existingState =
                   (InMemoryStateInternals.InMemoryState<? extends ValueState<T>>)
-                  underlying.get().get(namespace, address);
+                  underlying.get().get(namespace, address, c);
               return existingState.copy();
             } else {
               return new InMemoryStateInternals.InMemoryValue<>();
@@ -289,7 +297,7 @@ public <T> ValueState<T> bindValue(
                   existingState = (
                       InMemoryStateInternals
                           .InMemoryState<? extends AccumulatorCombiningState<InputT, AccumT,
-                          OutputT>>) underlying.get().get(namespace, address);
+                          OutputT>>) underlying.get().get(namespace, address, c);
               return existingState.copy();
             } else {
               return new InMemoryStateInternals.InMemoryCombiningValue<>(
@@ -304,7 +312,7 @@ public <T> BagState<T> bindBag(
               @SuppressWarnings("unchecked")
               InMemoryState<? extends BagState<T>> existingState =
                   (InMemoryStateInternals.InMemoryState<? extends BagState<T>>)
-                  underlying.get().get(namespace, address);
+                  underlying.get().get(namespace, address, c);
               return existingState.copy();
             } else {
               return new InMemoryStateInternals.InMemoryBag<>();
@@ -323,12 +331,22 @@ public <T> BagState<T> bindBag(
                   existingState = (
                       InMemoryStateInternals
                           .InMemoryState<? extends AccumulatorCombiningState<InputT, AccumT,
-                          OutputT>>) underlying.get().get(namespace, address);
+                          OutputT>>) underlying.get().get(namespace, address, c);
               return existingState.copy();
             } else {
               return new InMemoryStateInternals.InMemoryCombiningValue<>(key, combineFn);
             }
           }
+
+          @Override
+          public <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
+          bindKeyedCombiningValueWithContext(
+                  StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
+                  Coder<AccumT> accumCoder,
+                  KeyedCombineFnWithContext<? super K, InputT, AccumT, OutputT> combineFn) {
+            return bindKeyedCombiningValue(
+                address, accumCoder, CombineFnUtil.bindContext(combineFn, c));
+          }
         };
       }
     }
@@ -354,7 +372,8 @@ public Instant readThroughAndGetEarliestHold(StateTable<K> readTo) {
               // Only read through non-cleared values to ensure that completed windows are
               // eventually discarded, and remember the earliest watermark hold from among those
               // values.
-              State state = readTo.get(namespace, existingState.getKey());
+              State state =
+                  readTo.get(namespace, existingState.getKey(), StateContexts.nullContext());
               if (state instanceof WatermarkHoldState) {
                 Instant hold = ((WatermarkHoldState<?>) state).read();
                 if (hold != null && hold.isBefore(earliestHold)) {
@@ -368,19 +387,19 @@ public Instant readThroughAndGetEarliestHold(StateTable<K> readTo) {
       }
 
       @Override
-      public StateBinder<K> forNamespace(final StateNamespace namespace) {
+      public StateBinder<K> forNamespace(final StateNamespace namespace, final StateContext<?> c) {
         return new StateBinder<K>() {
           @Override
           public <W extends BoundedWindow> WatermarkHoldState<W> bindWatermark(
               StateTag<? super K, WatermarkHoldState<W>> address,
               OutputTimeFn<? super W> outputTimeFn) {
-            return underlying.get(namespace, address);
+            return underlying.get(namespace, address, c);
           }
 
           @Override
           public <T> ValueState<T> bindValue(
               StateTag<? super K, ValueState<T>> address, Coder<T> coder) {
-            return underlying.get(namespace, address);
+            return underlying.get(namespace, address, c);
           }
 
           @Override
@@ -388,13 +407,13 @@ public <T> ValueState<T> bindValue(
               bindCombiningValue(
                   StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
                   Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
-            return underlying.get(namespace, address);
+            return underlying.get(namespace, address, c);
           }
 
           @Override
           public <T> BagState<T> bindBag(
               StateTag<? super K, BagState<T>> address, Coder<T> elemCoder) {
-            return underlying.get(namespace, address);
+            return underlying.get(namespace, address, c);
           }
 
           @Override
@@ -403,24 +422,33 @@ public <T> BagState<T> bindBag(
                   StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
                   Coder<AccumT> accumCoder,
                   KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
-            return underlying.get(namespace, address);
+            return underlying.get(namespace, address, c);
+          }
+
+          @Override
+          public <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
+          bindKeyedCombiningValueWithContext(
+                  StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
+                  Coder<AccumT> accumCoder,
+                  KeyedCombineFnWithContext<? super K, InputT, AccumT, OutputT> combineFn) {
+            return bindKeyedCombiningValue(
+                address, accumCoder, CombineFnUtil.bindContext(combineFn, c));
           }
         };
       }
     }
 
     private static class InMemoryStateBinderFactory<K> implements StateBinderFactory<K> {
-      private final InMemoryStateInternals.InMemoryStateBinder<K> inMemoryStateBinder;
+      private final K key;
 
       public InMemoryStateBinderFactory(K key) {
-        inMemoryStateBinder = new InMemoryStateInternals.InMemoryStateBinder<>(key);
+        this.key = key;
       }
 
       @Override
-      public StateBinder<K> forNamespace(StateNamespace namespace) {
-        return inMemoryStateBinder;
+      public StateBinder<K> forNamespace(StateNamespace namespace, StateContext<?> c) {
+        return new InMemoryStateInternals.InMemoryStateBinder<>(key, c);
       }
     }
   }
-
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
index 4a2555f3584a5..8404801260220 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
@@ -20,8 +20,10 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
+import com.google.cloud.dataflow.sdk.util.CombineFnUtil;
 import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
 
 import org.joda.time.Instant;
@@ -62,8 +64,8 @@ interface InMemoryState<T extends InMemoryState<T>> {
 
   protected final StateTable<K> inMemoryState = new StateTable<K>() {
     @Override
-    protected StateBinder<K> binderForNamespace(final StateNamespace namespace) {
-      return new InMemoryStateBinder<K>(key);
+    protected StateBinder<K> binderForNamespace(StateNamespace namespace, StateContext<?> c) {
+      return new InMemoryStateBinder<K>(key, c);
     }
   };
 
@@ -81,7 +83,13 @@ protected boolean isEmptyForTesting(State state) {
 
   @Override
   public <T extends State> T state(StateNamespace namespace, StateTag<? super K, T> address) {
-    return inMemoryState.get(namespace, address);
+    return inMemoryState.get(namespace, address, StateContexts.nullContext());
+  }
+
+  @Override
+  public <T extends State> T state(
+      StateNamespace namespace, StateTag<? super K, T> address, final StateContext<?> c) {
+    return inMemoryState.get(namespace, address, c);
   }
 
   /**
@@ -89,9 +97,11 @@ public <T extends State> T state(StateNamespace namespace, StateTag<? super K, T
    */
   static class InMemoryStateBinder<K> implements StateBinder<K> {
     private final K key;
+    private final StateContext<?> c;
 
-    InMemoryStateBinder(K key) {
+    InMemoryStateBinder(K key, StateContext<?> c) {
       this.key = key;
+      this.c = c;
     }
 
     @Override
@@ -109,8 +119,8 @@ public <T> BagState<T> bindBag(
     @Override
     public <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
         bindCombiningValue(
-            StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>>
-            address, Coder<AccumT> accumCoder,
+            StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
+            Coder<AccumT> accumCoder,
             final CombineFn<InputT, AccumT, OutputT> combineFn) {
       return new InMemoryCombiningValue<K, InputT, AccumT, OutputT>(key, combineFn.<K>asKeyedFn());
     }
@@ -125,11 +135,20 @@ public <W extends BoundedWindow> WatermarkHoldState<W> bindWatermark(
     @Override
     public <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
         bindKeyedCombiningValue(
-            StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>>
-            address, Coder<AccumT> accumCoder,
+            StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
+            Coder<AccumT> accumCoder,
             KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
       return new InMemoryCombiningValue<K, InputT, AccumT, OutputT>(key, combineFn);
     }
+
+    @Override
+    public <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
+        bindKeyedCombiningValueWithContext(
+            StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
+            Coder<AccumT> accumCoder,
+            KeyedCombineFnWithContext<? super K, InputT, AccumT, OutputT> combineFn) {
+      return bindKeyedCombiningValue(address, accumCoder, CombineFnUtil.bindContext(combineFn, c));
+    }
   }
 
   static final class InMemoryValue<T> implements ValueState<T>, InMemoryState<InMemoryValue<T>> {
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContext.java
new file mode 100644
index 0000000000000..96387d85084af
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContext.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+
+/**
+ * Information accessible the state API.
+ */
+public interface StateContext<W extends BoundedWindow> {
+  /**
+   * Returns the {@code PipelineOptions} specified with the
+   * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner}.
+   */
+  public abstract PipelineOptions getPipelineOptions();
+
+  /**
+   * Returns the value of the side input for the corresponding state window.
+   */
+  public abstract <T> T sideInput(PCollectionView<T> view);
+
+  /**
+   * Returns the window corresponding to the state.
+   */
+  public abstract W window();
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContexts.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContexts.java
new file mode 100644
index 0000000000000..e301d438cdf3a
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContexts.java
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util.state;
+
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.WindowingInternals;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+
+import javax.annotation.Nullable;
+
+/**
+ * Factory that produces {@link StateContext} based on different inputs.
+ */
+public class StateContexts {
+  private static final StateContext<BoundedWindow> NULL_CONTEXT =
+      new StateContext<BoundedWindow>() {
+        @Override
+        public PipelineOptions getPipelineOptions() {
+          throw new IllegalArgumentException("cannot call getPipelineOptions() in a null context");
+        }
+
+        @Override
+        public <T> T sideInput(PCollectionView<T> view) {
+          throw new IllegalArgumentException("cannot call sideInput() in a null context");
+        }
+
+        @Override
+        public BoundedWindow window() {
+          throw new IllegalArgumentException("cannot call window() in a null context");
+        }};
+
+  /**
+   * Returns a fake {@link StateContext}.
+   */
+  @SuppressWarnings("unchecked")
+  public static <W extends BoundedWindow> StateContext<W> nullContext() {
+    return (StateContext<W>) NULL_CONTEXT;
+  }
+
+  /**
+   * Returns a {@link StateContext} that only contains the state window.
+   */
+  public static <W extends BoundedWindow> StateContext<W> windowOnly(final W window) {
+    return new StateContext<W>() {
+      @Override
+      public PipelineOptions getPipelineOptions() {
+        throw new IllegalArgumentException(
+            "cannot call getPipelineOptions() in a window only context");
+      }
+      @Override
+      public <T> T sideInput(PCollectionView<T> view) {
+        throw new IllegalArgumentException("cannot call sideInput() in a window only context");
+      }
+      @Override
+      public W window() {
+        return window;
+      }
+    };
+  }
+
+  /**
+   * Returns a {@link StateContext} from {@code PipelineOptions}, {@link WindowingInternals},
+   * and the state window.
+   */
+  public static <W extends BoundedWindow> StateContext<W> createFromComponents(
+      @Nullable final PipelineOptions options,
+      final WindowingInternals<?, ?> windowingInternals,
+      final W window) {
+    @SuppressWarnings("unchecked")
+    StateContext<W> typedNullContext = (StateContext<W>) NULL_CONTEXT;
+    if (options == null) {
+      return typedNullContext;
+    } else {
+      return new StateContext<W>() {
+
+        @Override
+        public PipelineOptions getPipelineOptions() {
+          return options;
+        }
+
+        @Override
+        public <T> T sideInput(PCollectionView<T> view) {
+          return windowingInternals.sideInput(view, window);
+        }
+
+        @Override
+        public W window() {
+          return window;
+        }
+      };
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
index 78aed870093d2..b31afb4698021 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
@@ -45,4 +45,11 @@ public interface StateInternals<K> {
    * Return the state associated with {@code address} in the specified {@code namespace}.
    */
   <T extends State> T state(StateNamespace namespace, StateTag<? super K, T> address);
+
+  /**
+   * Return the state associated with {@code address} in the specified {@code namespace}
+   * with the {@link StateContext}.
+   */
+  <T extends State> T state(
+      StateNamespace namespace, StateTag<? super K, T> address, StateContext<?> c);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java
index 0f1209ac7854a..edd1dae279e9d 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java
@@ -40,11 +40,11 @@ public abstract class StateTable<K> {
 
   /**
    * Gets the {@link State} in the specified {@link StateNamespace} with the specified {@link
-   * StateTag}, binding it using the {@link #binderForNamespace(StateNamespace)} if it is not
+   * StateTag}, binding it using the {@link #binderForNamespace} if it is not
    * already present in this {@link StateTable}.
    */
   public <StateT extends State> StateT get(
-      StateNamespace namespace, StateTag<? super K, StateT> tag) {
+      StateNamespace namespace, StateTag<? super K, StateT> tag, StateContext<?> c) {
     State storage = stateTable.get(namespace, tag);
     if (storage != null) {
       @SuppressWarnings("unchecked")
@@ -52,7 +52,7 @@ public <StateT extends State> StateT get(
       return typedStorage;
     }
 
-    StateT typedStorage = tag.bind(binderForNamespace(namespace));
+    StateT typedStorage = tag.bind(binderForNamespace(namespace, c));
     stateTable.put(namespace, tag, typedStorage);
     return typedStorage;
   }
@@ -85,5 +85,5 @@ public Set<StateNamespace> getNamespacesInUse() {
    * Provide the {@code StateBinder} to use for creating {@code Storage} instances
    * in the specified {@code namespace}.
    */
-  protected abstract StateBinder<K> binderForNamespace(StateNamespace namespace);
+  protected abstract StateBinder<K> binderForNamespace(StateNamespace namespace, StateContext<?> c);
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
index 2924763b89f87..c87bdb788c360 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
@@ -20,6 +20,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
@@ -63,6 +64,12 @@ public interface StateBinder<K> {
         StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
         Coder<AccumT> accumCoder, KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn);
 
+    <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
+    bindKeyedCombiningValueWithContext(
+        StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
+        Coder<AccumT> accumCoder,
+        KeyedCombineFnWithContext<? super K, InputT, AccumT, OutputT> combineFn);
+
     /**
      * Bind to a watermark {@link StateTag}.
      *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
index c1efb601b3e0e..0cbaa5236922a 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
@@ -22,6 +22,7 @@
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
 import com.google.common.base.MoreObjects;
@@ -89,6 +90,24 @@ public static <T> StateTag<Object, ValueState<T>> value(String id, Coder<T> valu
     return keyedCombiningValueInternal(id, accumCoder, combineFn);
   }
 
+  /**
+   * Create a state tag for values that use a {@link KeyedCombineFnWithContext} to automatically
+   * merge multiple {@code InputT}s into a single {@code OutputT}. The key provided to the
+   * {@link KeyedCombineFn} comes from the keyed {@link StateAccessor}, the context provided comes
+   * from the {@link StateContext}.
+   */
+  public static <K, InputT, AccumT, OutputT>
+      StateTag<K, AccumulatorCombiningState<InputT, AccumT, OutputT>>
+      keyedCombiningValueWithContext(
+          String id,
+          Coder<AccumT> accumCoder,
+          KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> combineFn) {
+    return new KeyedCombiningValueWithContextStateTag<K, InputT, AccumT, OutputT>(
+        new StructuredId(id),
+        accumCoder,
+        combineFn);
+  }
+
   /**
    * Create a state tag for values that use a {@link CombineFn} to automatically merge
    * multiple {@code InputT}s into a single {@code OutputT}.
@@ -337,9 +356,67 @@ private CombiningValueStateTag(
     }
   }
 
+  /**
+   * A state cell for values that are combined according to a {@link KeyedCombineFnWithContext}.
+   *
+   * @param <K> the type of keys
+   * @param <InputT> the type of input values
+   * @param <AccumT> type of mutable accumulator values
+   * @param <OutputT> type of output values
+   */
+  private static class KeyedCombiningValueWithContextStateTag<K, InputT, AccumT, OutputT>
+    extends StateTagBase<K, AccumulatorCombiningState<InputT, AccumT, OutputT>>
+    implements SystemStateTag<K, AccumulatorCombiningState<InputT, AccumT, OutputT>> {
+
+    private final Coder<AccumT> accumCoder;
+    private final KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> combineFn;
+
+    protected KeyedCombiningValueWithContextStateTag(
+        StructuredId id,
+        Coder<AccumT> accumCoder,
+        KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> combineFn) {
+      super(id);
+      this.combineFn = combineFn;
+      this.accumCoder = accumCoder;
+    }
+
+    @Override
+    public AccumulatorCombiningState<InputT, AccumT, OutputT> bind(
+        StateBinder<? extends K> visitor) {
+      return visitor.bindKeyedCombiningValueWithContext(this, accumCoder, combineFn);
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (obj == this) {
+        return true;
+      }
+
+      if (!(obj instanceof KeyedCombiningValueWithContextStateTag)) {
+        return false;
+      }
+
+      KeyedCombiningValueWithContextStateTag<?, ?, ?, ?> that =
+          (KeyedCombiningValueWithContextStateTag<?, ?, ?, ?>) obj;
+      return Objects.equals(this.id, that.id)
+          && Objects.equals(this.accumCoder, that.accumCoder);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(getClass(), id, accumCoder);
+    }
+
+    @Override
+    public StateTag<K, AccumulatorCombiningState<InputT, AccumT, OutputT>> asKind(
+        StateKind kind) {
+      return new KeyedCombiningValueWithContextStateTag<>(
+          id.asKind(kind), accumCoder, combineFn);
+    }
+  }
 
   /**
-   * A general purpose state cell for values of type {@code T}.
+   * A state cell for values that are combined according to a {@link KeyedCombineFn}.
    *
    * @param <K> the type of keys
    * @param <InputT> the type of input values
@@ -355,9 +432,9 @@ private static class KeyedCombiningValueStateTag<K, InputT, AccumT, OutputT>
 
     protected KeyedCombiningValueStateTag(
         StructuredId id,
-        Coder<AccumT> accumCoder, KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
+        Coder<AccumT> accumCoder, KeyedCombineFn<K, InputT, AccumT, OutputT> keyedCombineFn) {
       super(id);
-      this.keyedCombineFn = combineFn;
+      this.keyedCombineFn = keyedCombineFn;
       this.accumCoder = accumCoder;
     }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CombineFnUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CombineFnUtilTest.java
new file mode 100644
index 0000000000000..978ee50f440ba
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/CombineFnUtilTest.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.util;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.withSettings;
+
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
+import com.google.cloud.dataflow.sdk.util.state.StateContexts;
+
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.ByteArrayOutputStream;
+import java.io.NotSerializableException;
+import java.io.ObjectOutputStream;
+
+/**
+ * Unit tests for {@link CombineFnUtil}.
+ */
+@RunWith(JUnit4.class)
+public class CombineFnUtilTest {
+
+  @Rule
+  public ExpectedException expectedException = ExpectedException.none();
+
+  KeyedCombineFnWithContext<Integer, Integer, Integer, Integer> mockCombineFn;
+
+  @SuppressWarnings("unchecked")
+  @Before
+  public void setUp() {
+    mockCombineFn = mock(KeyedCombineFnWithContext.class, withSettings().serializable());
+  }
+
+  @Test
+  public void testNonSerializable() throws Exception {
+    expectedException.expect(NotSerializableException.class);
+    expectedException.expectMessage(
+        "Cannot serialize the CombineFn resulting from CombineFnUtil.bindContext.");
+
+    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+    ObjectOutputStream oos = new ObjectOutputStream(buffer);
+    oos.writeObject(CombineFnUtil.bindContext(mockCombineFn, StateContexts.nullContext()));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunnerTest.java
index ddc33b8e3f45c..c85b1ca4a5b69 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunnerTest.java
@@ -25,18 +25,26 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.any;
 import static org.mockito.Mockito.doAnswer;
 import static org.mockito.Mockito.doNothing;
+import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
+import static org.mockito.Mockito.withSettings;
 
 import com.google.cloud.dataflow.sdk.WindowMatchers;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.CombineFnWithContext;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.Context;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterEach;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterFirst;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterProcessingTime;
 import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
 import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
@@ -47,7 +55,9 @@
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+import com.google.common.base.Preconditions;
 
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -55,12 +65,14 @@
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
+import org.mockito.Matchers;
 import org.mockito.Mock;
 import org.mockito.Mockito;
 import org.mockito.MockitoAnnotations;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
 
+import java.util.Iterator;
 import java.util.List;
 
 /**
@@ -71,8 +83,10 @@
  */
 @RunWith(JUnit4.class)
 public class ReduceFnRunnerTest {
-  @Mock
+  @Mock private SideInputReader mockSideInputReader;
   private Trigger<IntervalWindow> mockTrigger;
+  private PCollectionView<Integer> mockView;
+
   private IntervalWindow firstWindow;
 
   private static Trigger<IntervalWindow>.TriggerContext anyTriggerContext() {
@@ -85,7 +99,17 @@ private static Trigger<IntervalWindow>.OnElementContext anyElementContext() {
   @Before
   public void setUp() {
     MockitoAnnotations.initMocks(this);
+
+    @SuppressWarnings("unchecked")
+    Trigger<IntervalWindow> mockTriggerUnchecked =
+        mock(Trigger.class, withSettings().serializable());
+    mockTrigger = mockTriggerUnchecked;
     when(mockTrigger.buildTrigger()).thenReturn(mockTrigger);
+
+    @SuppressWarnings("unchecked")
+    PCollectionView<Integer> mockViewUnchecked =
+        mock(PCollectionView.class, withSettings().serializable());
+    mockView = mockViewUnchecked;
     firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
   }
 
@@ -227,6 +251,53 @@ public void testOnElementCombiningAccumulating() throws Exception {
     tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
+  @Test
+  public void testOnElementCombiningWithContext() throws Exception {
+    Integer expectedValue = 5;
+    WindowingStrategy<?, IntervalWindow> windowingStrategy = WindowingStrategy
+        .of(FixedWindows.of(Duration.millis(10)))
+        .withTrigger(mockTrigger)
+        .withMode(AccumulationMode.DISCARDING_FIRED_PANES)
+        .withAllowedLateness(Duration.millis(100));
+
+    TestOptions options = PipelineOptionsFactory.as(TestOptions.class);
+    options.setValue(5);
+
+    when(mockSideInputReader.contains(Matchers.<PCollectionView<Integer>>any())).thenReturn(true);
+    when(mockSideInputReader.get(
+        Matchers.<PCollectionView<Integer>>any(), any(BoundedWindow.class))).thenReturn(5);
+
+    @SuppressWarnings({"rawtypes", "unchecked", "unused"})
+    Object suppressWarningsVar = when(mockView.getWindowingStrategyInternal())
+        .thenReturn((WindowingStrategy) windowingStrategy);
+
+    SumAndVerifyContextFn combineFn = new SumAndVerifyContextFn(mockView, expectedValue);
+    // Test basic execution of a trigger using a non-combining window set and discarding mode.
+    ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(
+        windowingStrategy, combineFn.<String>asKeyedFn(),
+        VarIntCoder.of(), options, mockSideInputReader);
+
+    injectElement(tester, 2);
+
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    injectElement(tester, 3);
+
+    when(mockTrigger.shouldFire(anyTriggerContext())).thenReturn(true);
+    triggerShouldFinish(mockTrigger);
+    injectElement(tester, 4);
+
+    // This element shouldn't be seen, because the trigger has finished
+    injectElement(tester, 6);
+
+    assertThat(
+        tester.extractOutput(),
+        contains(
+            isSingleWindowedValue(equalTo(5), 2, 0, 10),
+            isSingleWindowedValue(equalTo(4), 4, 0, 10)));
+    assertTrue(tester.isMarkedFinished(firstWindow));
+    tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
+  }
+
   @Test
   public void testWatermarkHoldAndLateData() throws Exception {
     // Test handling of late data. Specifically, ensure the watermark hold is correct.
@@ -873,4 +944,68 @@ public void testProcessingTime() throws Exception {
         output.get(3),
         WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.LATE, 3, 2)));
   }
+
+  private static class SumAndVerifyContextFn extends CombineFnWithContext<Integer, int[], Integer> {
+
+    private final PCollectionView<Integer> view;
+    private final int expectedValue;
+
+    private SumAndVerifyContextFn(PCollectionView<Integer> view, int expectedValue) {
+      this.view = view;
+      this.expectedValue = expectedValue;
+    }
+    @Override
+    public int[] createAccumulator(Context c) {
+      Preconditions.checkArgument(
+          c.getPipelineOptions().as(TestOptions.class).getValue() == expectedValue);
+      Preconditions.checkArgument(c.sideInput(view) == expectedValue);
+      return wrap(0);
+    }
+
+    @Override
+    public int[] addInput(int[] accumulator, Integer input, Context c) {
+      Preconditions.checkArgument(
+          c.getPipelineOptions().as(TestOptions.class).getValue() == expectedValue);
+      Preconditions.checkArgument(c.sideInput(view) == expectedValue);
+      accumulator[0] += input.intValue();
+      return accumulator;
+    }
+
+    @Override
+    public int[] mergeAccumulators(Iterable<int[]> accumulators, Context c) {
+      Preconditions.checkArgument(
+          c.getPipelineOptions().as(TestOptions.class).getValue() == expectedValue);
+      Preconditions.checkArgument(c.sideInput(view) == expectedValue);
+      Iterator<int[]> iter = accumulators.iterator();
+      if (!iter.hasNext()) {
+        return createAccumulator(c);
+      } else {
+        int[] running = iter.next();
+        while (iter.hasNext()) {
+          running[0] += iter.next()[0];
+        }
+        return running;
+      }
+    }
+
+    @Override
+    public Integer extractOutput(int[] accumulator, Context c) {
+      Preconditions.checkArgument(
+          c.getPipelineOptions().as(TestOptions.class).getValue() == expectedValue);
+      Preconditions.checkArgument(c.sideInput(view) == expectedValue);
+      return accumulator[0];
+    }
+
+    private int[] wrap(int value) {
+      return new int[] { value };
+    }
+  }
+
+  /**
+   * A {@link PipelineOptions} to test combining with context.
+   */
+  public interface TestOptions extends PipelineOptions {
+    Integer getValue();
+    void setValue(Integer value);
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
index bade9f915eb76..d4620a7827c56 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/util/ReduceFnTester.java
@@ -26,9 +26,12 @@
 import com.google.cloud.dataflow.sdk.coders.KvCoder;
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
 import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
 import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
+import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
 import com.google.cloud.dataflow.sdk.transforms.Sum;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
@@ -47,6 +50,7 @@
 import com.google.cloud.dataflow.sdk.util.state.StateTag;
 import com.google.cloud.dataflow.sdk.util.state.WatermarkHoldState;
 import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.TimestampedValue;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 import com.google.common.base.Function;
@@ -99,6 +103,7 @@ public class ReduceFnTester<InputT, OutputT, W extends BoundedWindow> {
   private final Coder<OutputT> outputCoder;
   private final WindowingStrategy<Object, W> objectStrategy;
   private final ReduceFn<String, InputT, OutputT, W> reduceFn;
+  private final PipelineOptions options;
 
   /**
    * If true, the output watermark is automatically advanced to the latest possible
@@ -118,7 +123,9 @@ public class ReduceFnTester<InputT, OutputT, W extends BoundedWindow> {
     return new ReduceFnTester<Integer, Iterable<Integer>, W>(
         windowingStrategy,
         SystemReduceFn.<String, Integer, W>buffering(VarIntCoder.of()),
-        IterableCoder.of(VarIntCoder.of()));
+        IterableCoder.of(VarIntCoder.of()),
+        PipelineOptionsFactory.create(),
+        NullSideInputReader.empty());
   }
 
   public static <W extends BoundedWindow> ReduceFnTester<Integer, Iterable<Integer>, W>
@@ -147,9 +154,30 @@ public class ReduceFnTester<InputT, OutputT, W extends BoundedWindow> {
     return new ReduceFnTester<Integer, OutputT, W>(
         strategy,
         SystemReduceFn.<String, Integer, AccumT, OutputT, W>combining(StringUtf8Coder.of(), fn),
-        outputCoder);
+        outputCoder,
+        PipelineOptionsFactory.create(),
+        NullSideInputReader.empty());
   }
 
+  public static <W extends BoundedWindow, AccumT, OutputT> ReduceFnTester<Integer, OutputT, W>
+  combining(WindowingStrategy<?, W> strategy,
+      KeyedCombineFnWithContext<String, Integer, AccumT, OutputT> combineFn,
+      Coder<OutputT> outputCoder,
+      PipelineOptions options,
+      SideInputReader sideInputReader) throws Exception {
+    CoderRegistry registry = new CoderRegistry();
+    registry.registerStandardCoders();
+    AppliedCombineFn<String, Integer, AccumT, OutputT> fn =
+        AppliedCombineFn.<String, Integer, AccumT, OutputT>withInputCoder(
+            combineFn, registry, KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()));
+
+    return new ReduceFnTester<Integer, OutputT, W>(
+        strategy,
+        SystemReduceFn.<String, Integer, AccumT, OutputT, W>combining(StringUtf8Coder.of(), fn),
+        outputCoder,
+        options,
+        sideInputReader);
+  }
   public static <W extends BoundedWindow, AccumT, OutputT> ReduceFnTester<Integer, OutputT, W>
       combining(WindowFn<?, W> windowFn, Trigger<W> trigger, AccumulationMode mode,
           KeyedCombineFn<String, Integer, AccumT, OutputT> combineFn, Coder<OutputT> outputCoder,
@@ -163,17 +191,19 @@ public class ReduceFnTester<InputT, OutputT, W extends BoundedWindow> {
   }
 
   private ReduceFnTester(WindowingStrategy<?, W> wildcardStrategy,
-      ReduceFn<String, InputT, OutputT, W> reduceFn, Coder<OutputT> outputCoder) throws Exception {
+      ReduceFn<String, InputT, OutputT, W> reduceFn, Coder<OutputT> outputCoder,
+      PipelineOptions options, SideInputReader sideInputReader) throws Exception {
     @SuppressWarnings("unchecked")
     WindowingStrategy<Object, W> objectStrategy = (WindowingStrategy<Object, W>) wildcardStrategy;
 
     this.objectStrategy = objectStrategy;
     this.reduceFn = reduceFn;
     this.windowFn = objectStrategy.getWindowFn();
-    this.windowingInternals = new TestWindowingInternals();
+    this.windowingInternals = new TestWindowingInternals(sideInputReader);
     this.outputCoder = outputCoder;
     this.autoAdvanceOutputWatermark = true;
-    executableTrigger = wildcardStrategy.getTrigger();
+    this.executableTrigger = wildcardStrategy.getTrigger();
+    this.options = options;
   }
 
   public void setAutoAdvanceOutputWatermark(boolean autoAdvanceOutputWatermark) {
@@ -193,7 +223,8 @@ ReduceFnRunner<String, InputT, OutputT, W> createRunner() {
         timerInternals,
         windowingInternals,
         droppedDueToClosedWindow,
-        reduceFn);
+        reduceFn,
+        options);
   }
 
   public ExecutableTrigger<W> getTrigger() {
@@ -432,6 +463,11 @@ public Instant earliestWatermarkHold() {
    */
   private class TestWindowingInternals implements WindowingInternals<InputT, KV<String, OutputT>> {
     private List<WindowedValue<KV<String, OutputT>>> outputs = new ArrayList<>();
+    private SideInputReader sideInputReader;
+
+    private TestWindowingInternals(SideInputReader sideInputReader) {
+      this.sideInputReader = sideInputReader;
+    }
 
     @Override
     public void outputWindowedValue(KV<String, OutputT> output, Instant timestamp,
@@ -476,6 +512,16 @@ public StateInternals<Object> stateInternals() {
           (TestInMemoryStateInternals) stateInternals;
       return untypedStateInternals;
     }
+
+    @Override
+    public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
+      if (!sideInputReader.contains(view)) {
+        throw new IllegalArgumentException("calling sideInput() with unknown view");
+      }
+      BoundedWindow sideInputWindow =
+          view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(mainInputWindow);
+      return sideInputReader.get(view, sideInputWindow);
+    }
   }
 
   private static class TestAssignContext<W extends BoundedWindow>

From 87b28e7dd1b2f8ca31eb155cc5bad4f98717664b Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Tue, 23 Feb 2016 19:12:37 -0800
Subject: [PATCH 1519/1541] Add InProcess Override for CreatePCollectionView

Because windowing is used to retrieve values from a
PCollectionView, the elements must go through a GroupByKey.

Provide a PTransform override for use in the InProcess runner to apply
global grouping by window and pane, and a WriteView primitive to store
the contents of the view in a PCollectionView.

Update the View PTransform to make the view it returns available outside
of the application.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115409320
---
 .../inprocess/InProcessPipelineRunner.java    |  5 +-
 .../inprocess/ViewEvaluatorFactory.java       | 69 ++++++++++++++++---
 .../cloud/dataflow/sdk/transforms/View.java   |  5 ++
 .../inprocess/ViewEvaluatorFactoryTest.java   | 33 +++++++--
 4 files changed, 96 insertions(+), 16 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java
index 72642da122aea..26c5061f67212 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java
@@ -20,10 +20,12 @@
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.annotations.Experimental;
 import com.google.cloud.dataflow.sdk.runners.inprocess.GroupByKeyEvaluatorFactory.InProcessGroupByKey;
+import com.google.cloud.dataflow.sdk.runners.inprocess.ViewEvaluatorFactory.InProcessCreatePCollectionView;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.View.CreatePCollectionView;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
 import com.google.cloud.dataflow.sdk.util.BaseExecutionContext;
@@ -60,6 +62,7 @@ public class InProcessPipelineRunner {
       defaultTransformOverrides =
           ImmutableMap.<Class<? extends PTransform>, Class<? extends PTransform>>builder()
               .put(GroupByKey.class, InProcessGroupByKey.class)
+              .put(CreatePCollectionView.class, InProcessCreatePCollectionView.class)
               .build();
 
   private static Map<Class<?>, TransformEvaluatorFactory> defaultEvaluatorFactories =
@@ -222,7 +225,7 @@ <T> UncommittedBundle<T> createKeyedBundle(
      * Create a bundle whose elements will be used in a PCollectionView.
      */
     <ElemT, ViewT> PCollectionViewWriter<ElemT, ViewT> createPCollectionViewWriter(
-        PCollection<ElemT> input, PCollectionView<ViewT> output);
+        PCollection<Iterable<ElemT>> input, PCollectionView<ViewT> output);
 
     /**
      * Get the options used by this {@link Pipeline}.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactory.java
index 654652ca1ad41..f47cd1de986b0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactory.java
@@ -15,11 +15,16 @@
  */
 package com.google.cloud.dataflow.sdk.runners.inprocess;
 
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.PCollectionViewWriter;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.Values;
 import com.google.cloud.dataflow.sdk.transforms.View.CreatePCollectionView;
+import com.google.cloud.dataflow.sdk.transforms.WithKeys;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
@@ -30,6 +35,12 @@
 /**
  * The {@link InProcessPipelineRunner} {@link TransformEvaluatorFactory} for the
  * {@link CreatePCollectionView} primitive {@link PTransform}.
+ *
+ * <p>The {@link ViewEvaluatorFactory} produces {@link TransformEvaluator TransformEvaluators} for
+ * the {@link WriteView} {@link PTransform}, which is part of the
+ * {@link InProcessCreatePCollectionView} composite transform. This transform is an override for the
+ * {@link CreatePCollectionView} transform that applies windowing and triggers before the view is
+ * written.
  */
 class ViewEvaluatorFactory implements TransformEvaluatorFactory {
   @SuppressWarnings({"rawtypes", "unchecked"})
@@ -42,19 +53,21 @@ public <T> TransformEvaluator<T> forApplication(
         (AppliedPTransform) application, evaluationContext);
   }
 
-  private <InT, OuT> TransformEvaluator<InT> createEvaluator(
-      final AppliedPTransform<PCollection<InT>, PCollectionView<OuT>,
-      CreatePCollectionView<InT, OuT>> application,
+  private <InT, OuT> TransformEvaluator<Iterable<InT>> createEvaluator(
+      final AppliedPTransform<PCollection<Iterable<InT>>, PCollectionView<OuT>, WriteView<InT, OuT>>
+          application,
       InProcessEvaluationContext context) {
-    PCollection<InT> input = application.getInput();
+    PCollection<Iterable<InT>> input = application.getInput();
     final PCollectionViewWriter<InT, OuT> writer =
         context.createPCollectionViewWriter(input, application.getOutput());
-    return new TransformEvaluator<InT>() {
+    return new TransformEvaluator<Iterable<InT>>() {
       private final List<WindowedValue<InT>> elements = new ArrayList<>();
 
       @Override
-      public void processElement(WindowedValue<InT> element) {
-        elements.add(element);
+      public void processElement(WindowedValue<Iterable<InT>> element) {
+        for (InT input : element.getValue()) {
+          elements.add(element.withValue(input));
+        }
       }
 
       @Override
@@ -64,5 +77,45 @@ public InProcessTransformResult finishBundle() {
       }
     };
   }
-}
 
+  /**
+   * An in-process override for {@link CreatePCollectionView}.
+   */
+  public static class InProcessCreatePCollectionView<ElemT, ViewT>
+      extends PTransform<PCollection<ElemT>, PCollectionView<ViewT>> {
+    private final CreatePCollectionView<ElemT, ViewT> og;
+
+    private InProcessCreatePCollectionView(CreatePCollectionView<ElemT, ViewT> og) {
+      this.og = og;
+    }
+
+    @Override
+    public PCollectionView<ViewT> apply(PCollection<ElemT> input) {
+      return input.apply(WithKeys.<Void, ElemT>of((Void) null))
+          .setCoder(KvCoder.of(VoidCoder.of(), input.getCoder()))
+          .apply(GroupByKey.<Void, ElemT>create())
+          .apply(Values.<Iterable<ElemT>>create())
+          .apply(new WriteView<ElemT, ViewT>(og));
+    }
+  }
+
+  /**
+   * An in-process implementation of the {@link CreatePCollectionView} primitive.
+   *
+   * This implementation requires the input {@link PCollection} to be an iterable, which is provided
+   * to {@link PCollectionView#fromIterableInternal(Iterable)}.
+   */
+  public static final class WriteView<ElemT, ViewT>
+      extends PTransform<PCollection<Iterable<ElemT>>, PCollectionView<ViewT>> {
+    private final CreatePCollectionView<ElemT, ViewT> og;
+
+    WriteView(CreatePCollectionView<ElemT, ViewT> og) {
+      this.og = og;
+    }
+
+    @Override
+    public PCollectionView<ViewT> apply(PCollection<Iterable<ElemT>> input) {
+      return og.getView();
+    }
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
index a41da34399791..e2c4487ae5f1f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.transforms;
 
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.cloud.dataflow.sdk.util.PCollectionViews;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -435,6 +436,10 @@ public static <ElemT, ViewT> CreatePCollectionView<ElemT, ViewT> of(
       return new CreatePCollectionView<>(view);
     }
 
+    public PCollectionView<ViewT> getView() {
+      return view;
+    }
+
     @Override
     public PCollectionView<ViewT> apply(PCollection<ElemT> input) {
       return view;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactoryTest.java
index c29308f1d93fc..021709b37fd6a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactoryTest.java
@@ -21,16 +21,24 @@
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.PCollectionViewWriter;
 import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.Values;
+import com.google.cloud.dataflow.sdk.transforms.View.CreatePCollectionView;
+import com.google.cloud.dataflow.sdk.transforms.WithKeys;
+import com.google.cloud.dataflow.sdk.util.PCollectionViews;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.common.collect.ImmutableList;
 
 import org.joda.time.Instant;
 import org.junit.Test;
@@ -45,19 +53,30 @@ public class ViewEvaluatorFactoryTest {
   @Test
   public void testInMemoryEvaluator() throws Exception {
     TestPipeline p = TestPipeline.create();
+
     PCollection<String> input = p.apply(Create.of("foo", "bar"));
-    PCollectionView<Iterable<String>> view = input.apply(View.<String>asIterable());
+    CreatePCollectionView<String, Iterable<String>> createView =
+        CreatePCollectionView.of(
+            PCollectionViews.iterableView(p, input.getWindowingStrategy(), StringUtf8Coder.of()));
+    PCollection<Iterable<String>> concat =
+        input.apply(WithKeys.<Void, String>of((Void) null))
+            .setCoder(KvCoder.of(VoidCoder.of(), StringUtf8Coder.of()))
+            .apply(GroupByKey.<Void, String>create())
+            .apply(Values.<Iterable<String>>create());
+    PCollectionView<Iterable<String>> view =
+        concat.apply(new ViewEvaluatorFactory.WriteView<>(createView));
 
     InProcessEvaluationContext context = mock(InProcessEvaluationContext.class);
     TestViewWriter<String, Iterable<String>> viewWriter = new TestViewWriter<>();
-    when(context.createPCollectionViewWriter(input, view)).thenReturn(viewWriter);
+    when(context.createPCollectionViewWriter(concat, view)).thenReturn(viewWriter);
 
     CommittedBundle<String> inputBundle = InProcessBundle.unkeyed(input).commit(Instant.now());
-    TransformEvaluator<String> evaluator = new ViewEvaluatorFactory().forApplication(
-        view.getProducingTransformInternal(), inputBundle, context);
+    TransformEvaluator<Iterable<String>> evaluator =
+        new ViewEvaluatorFactory()
+            .forApplication(view.getProducingTransformInternal(), inputBundle, context);
 
-    evaluator.processElement(WindowedValue.valueInGlobalWindow("foo"));
-    evaluator.processElement(WindowedValue.valueInGlobalWindow("bar"));
+    evaluator.processElement(
+        WindowedValue.<Iterable<String>>valueInGlobalWindow(ImmutableList.of("foo", "bar")));
     assertThat(viewWriter.latest, nullValue());
 
     evaluator.finishBundle();

From db708bb9a157d0b86519f9309d5c0b945a72ebde Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 23 Feb 2016 19:53:21 -0800
Subject: [PATCH 1520/1541] Update maven-dependency-plugin to latest

Prior to version 2.10 configuration options we require are
unavailable. Specifically, to have a strict dependency policy with
our optional dependency configurations, we will need:

 - ignoredUnusedDeclaredDependencies
 - ignoredUsedUndeclaredDependencies

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115411144
---
 pom.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pom.xml b/pom.xml
index 9ff5cd218e3e3..cf24c7bda0180 100644
--- a/pom.xml
+++ b/pom.xml
@@ -293,6 +293,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-dependency-plugin</artifactId>
+          <version>2.10</version>
           <configuration>
             <ignoreNonCompile>true</ignoreNonCompile>
           </configuration>

From 01fd8595e7d54009d313e98889fdf322d98b5c63 Mon Sep 17 00:00:00 2001
From: dhalperi <dhalperi@google.com>
Date: Tue, 23 Feb 2016 20:07:32 -0800
Subject: [PATCH 1521/1541] ProtoCoder: a Coder for Protocol Buffers Messages

----Release Notes----
Adds ProtoCoder, which is a Coder for Protocol Buffers messages. ProtoCoder has
the following advantages over Proto2Coder:
 * ProtoCoder is designed to support Protocol Buffers syntax versions 2 and 3,
   not just proto2.
 * ProtoCoder dynamically inspects the Message type it encodes and will throw a
   Coder.NonDeterministicException when the configured message cannot be
   encoded deterministically.
 * Because early versions of the Protocol Buffers 2 syntax did not allow
   non-deterministic types, Proto2Coder does not do this type inspection and
   instead always claims to be able to coder deterministically. For backwards
   compatibility, we are not changing this behavior.

Proto2Coder is now deprecated; we recommend that all users switch to
ProtoCoder.
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115411869
---
 sdk/pom.xml                                   |   2 +-
 .../dataflow/sdk/coders/CoderRegistry.java    |   5 +-
 .../dataflow/sdk/coders/Proto2Coder.java      |   3 +
 .../sdk/coders/StringDelegateCoder.java       |   4 +-
 .../sdk/coders/protobuf/ProtoCoder.java       | 411 ++++++++++++++++++
 .../sdk/coders/protobuf/ProtobufUtil.java     | 171 ++++++++
 .../sdk/coders/CoderRegistryTest.java         |  14 +-
 .../dataflow/sdk/coders/Proto2CoderTest.java  |   1 +
 .../sdk/coders/protobuf/ProtoCoderTest.java   | 182 ++++++++
 .../sdk/coders/protobuf/ProtobufUtilTest.java | 195 +++++++++
 10 files changed, 980 insertions(+), 8 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtoCoder.java
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtobufUtil.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtoCoderTest.java
 create mode 100644 sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtobufUtilTest.java

diff --git a/sdk/pom.xml b/sdk/pom.xml
index bdbdd5f31cacd..bc6d519f749da 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -442,7 +442,7 @@
     <dependency>
       <groupId>com.google.cloud.dataflow</groupId>
       <artifactId>google-cloud-dataflow-java-proto-library-all</artifactId>
-      <version>0.5.160127</version>
+      <version>0.5.160222</version>
     </dependency>
 
     <dependency>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
index 1ef09712d4d4f..00982e64ffd38 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
@@ -18,6 +18,7 @@
 
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException.ReasonCode;
+import com.google.cloud.dataflow.sdk.coders.protobuf.ProtoCoder;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.util.CoderUtils;
 import com.google.cloud.dataflow.sdk.values.KV;
@@ -81,8 +82,8 @@ public class CoderRegistry implements CoderProvider {
   private static final Logger LOG = LoggerFactory.getLogger(CoderRegistry.class);
 
   public CoderRegistry() {
-    setFallbackCoderProvider(CoderProviders.firstOf(Proto2Coder.coderProvider(),
-        SerializableCoder.PROVIDER));
+    setFallbackCoderProvider(
+        CoderProviders.firstOf(ProtoCoder.coderProvider(), SerializableCoder.PROVIDER));
   }
 
   /**
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
index 56ec0c7d7b68f..ef91ba96e8c98 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
@@ -17,6 +17,7 @@
 
 import static com.google.common.base.Preconditions.checkArgument;
 
+import com.google.cloud.dataflow.sdk.coders.protobuf.ProtoCoder;
 import com.google.cloud.dataflow.sdk.util.CloudObject;
 import com.google.cloud.dataflow.sdk.util.Structs;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
@@ -71,7 +72,9 @@
  * </pre>
  *
  * @param <T> the type of elements handled by this coder, must extend {@code Message}
+ * @deprecated Use {@link ProtoCoder}.
  */
+@Deprecated
 public class Proto2Coder<T extends Message> extends AtomicCoder<T> {
 
   /** The class of Protobuf message to be encoded. */
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
index 9696b58511c32..1fc1247226a8c 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.dataflow.sdk.coders;
 
+import com.google.cloud.dataflow.sdk.coders.protobuf.ProtoCoder;
+
 import java.lang.reflect.InvocationTargetException;
 
 /**
@@ -35,7 +37,7 @@
  *
  * <p>This method of encoding is not designed for ease of evolution of {@code Clazz};
  * it should only be used in cases where the class is stable or the encoding is not
- * important. If evolution of the class is important, see {@link Proto2Coder}, {@link AvroCoder},
+ * important. If evolution of the class is important, see {@link ProtoCoder}, {@link AvroCoder},
  * or {@link JAXBCoder}.
  *
  * @param <T> The type of objects coded.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtoCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtoCoder.java
new file mode 100644
index 0000000000000..d8c8e9e2438ef
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtoCoder.java
@@ -0,0 +1,411 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.coders.protobuf;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.google.api.services.datastore.DatastoreV1;
+import com.google.api.services.datastore.DatastoreV1.Entity;
+import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.CoderProvider;
+import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages;
+import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageA;
+import com.google.cloud.dataflow.sdk.util.CloudObject;
+import com.google.cloud.dataflow.sdk.util.Structs;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+import com.google.protobuf.ExtensionRegistry;
+import com.google.protobuf.Message;
+import com.google.protobuf.Parser;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import javax.annotation.Nullable;
+
+/**
+ * A {@link Coder} using Google Protocol Buffers binary format. {@link ProtoCoder} supports both
+ * Protocol Buffers syntax versions 2 and 3.
+ *
+ * <p>To learn more about Protocol Buffers, visit:
+ * <a href="https://developers.google.com/protocol-buffers">https://developers.google.com/protocol-buffers</a>
+ *
+ * <p>{@link ProtoCoder} is registered in the global {@link CoderRegistry} as the default
+ * {@link Coder} for any {@link Message} object. Custom message extensions are also supported, but
+ * these extensions must be registered for a particular {@link ProtoCoder} instance and that
+ * instance must be registered on the {@link PCollection} that needs the extensions:
+ *
+ * <pre>{@code
+ * import MyProtoFile;
+ * import MyProtoFile.MyMessage;
+ *
+ * Coder<MyMessage> coder = ProtoCoder.of(MyMessage.class).withExtensionsFrom(MyProtoFile.class);
+ * PCollection<MyMessage> records =  input.apply(...).setCoder(coder);
+ * }</pre>
+ *
+ * <h3>Versioning</h3>
+ *
+ * <p>{@link ProtoCoder} supports both versions 2 and 3 of the Protocol Buffers syntax. However,
+ * the Java runtime version of the <code>google.com.protobuf</code> library must match exactly the
+ * version of <code>protoc</code> that was used to produce the JAR files containing the compiled
+ * <code>.proto</code> messages.
+ *
+ * <p>For more information, see the
+ * <a href="https://developers.google.com/protocol-buffers/docs/proto3#using-proto2-message-types">Protocol Buffers documentation</a>.
+ *
+ * <h3>{@link ProtoCoder} and Determinism</h3>
+ *
+ * <p>In general, Protocol Buffers messages can be encoded deterministically within a single
+ * pipeline as long as:
+ *
+ * <ul>
+ * <li>The encoded messages (and any transitively linked messages) do not use <code>map</code>
+ *     fields.</li>
+ * <li>Every Java VM that encodes or decodes the messages use the same runtime version of the
+ *     Protocol Buffers library and the same compiled <code>.proto</code> file JAR.</li>
+ * </ul>
+ *
+ * <h3>{@link ProtoCoder} and Encoding Stability</h3>
+ *
+ * <p>When changing Protocol Buffers messages, follow the rules in the Protocol Buffers language
+ * guides for
+ * <a href="https://developers.google.com/protocol-buffers/docs/proto#updating">{@code proto2}</a>
+ * and
+ * <a href="https://developers.google.com/protocol-buffers/docs/proto3#updating">{@code proto3}</a>
+ * syntaxes, depending on your message type. Following these guidelines will ensure that the
+ * old encoded data can be read by new versions of the code.
+ *
+ * <p>Generally, any change to the message type, registered extensions, runtime library, or
+ * compiled proto JARs may change the encoding. Thus even if both the original and updated messages
+ * can be encoded deterministically within a single job, these deterministic encodings may not be
+ * the same across jobs.
+ *
+ * @param <T> the Protocol Buffers {@link Message} handled by this {@link Coder}.
+ */
+public class ProtoCoder<T extends Message> extends AtomicCoder<T> {
+
+  /**
+   * A {@link CoderProvider} that returns a {@link ProtoCoder} with an empty
+   * {@link ExtensionRegistry}.
+   */
+  public static CoderProvider coderProvider() {
+    return PROVIDER;
+  }
+
+  /**
+   * Returns a {@link ProtoCoder} for the given Protocol Buffers {@link Message}.
+   */
+  public static <T extends Message> ProtoCoder<T> of(Class<T> protoMessageClass) {
+    return new ProtoCoder<T>(protoMessageClass, ImmutableSet.<Class<?>>of());
+  }
+
+  /**
+   * Returns a {@link ProtoCoder} for the Protocol Buffers {@link Message} indicated by the given
+   * {@link TypeDescriptor}.
+   */
+  public static <T extends Message> ProtoCoder<T> of(TypeDescriptor<T> protoMessageType) {
+    @SuppressWarnings("unchecked")
+    Class<T> protoMessageClass = (Class<T>) protoMessageType.getRawType();
+    return of(protoMessageClass);
+  }
+
+  /**
+   * Returns a {@link ProtoCoder} like this one, but with the extensions from the given classes
+   * registered.
+   *
+   * <p>Each of the extension host classes must be an class automatically generated by the
+   * Protocol Buffers compiler, {@code protoc}, that contains messages. For example, the class
+   * {@link Proto2CoderTestMessages} is the extension host for the {@link Message} classes
+   * {@link MessageA Proto2CoderTestMessages.MessageA} and the class {@link DatastoreV1} is the
+   * extension host for the Google Cloud Datastore {@link Entity} entity type.
+   *
+   * <p>Does not modify this object.
+   */
+  public ProtoCoder<T> withExtensionsFrom(Iterable<Class<?>> moreExtensionHosts) {
+    for (Class<?> extensionHost : moreExtensionHosts) {
+      // Attempt to access the required method, to make sure it's present.
+      try {
+        Method registerAllExtensions =
+            extensionHost.getDeclaredMethod("registerAllExtensions", ExtensionRegistry.class);
+        checkArgument(
+            Modifier.isStatic(registerAllExtensions.getModifiers()),
+            "Method registerAllExtensions() must be static");
+      } catch (NoSuchMethodException | SecurityException e) {
+        throw new IllegalArgumentException(
+            String.format("Unable to register extensions for %s", extensionHost.getCanonicalName()),
+            e);
+      }
+    }
+
+    return new ProtoCoder<T>(
+        protoMessageClass,
+        new ImmutableSet.Builder<Class<?>>()
+            .addAll(extensionHostClasses)
+            .addAll(moreExtensionHosts)
+            .build());
+  }
+
+  /**
+   * See {@link #withExtensionsFrom(Iterable)}.
+   *
+   * <p>Does not modify this object.
+   */
+  public ProtoCoder<T> withExtensionsFrom(Class<?>... moreExtensionHosts) {
+    return withExtensionsFrom(Arrays.asList(moreExtensionHosts));
+  }
+
+  @Override
+  public void encode(T value, OutputStream outStream, Context context) throws IOException {
+    if (value == null) {
+      throw new CoderException("cannot encode a null " + protoMessageClass.getSimpleName());
+    }
+    if (context.isWholeStream) {
+      value.writeTo(outStream);
+    } else {
+      value.writeDelimitedTo(outStream);
+    }
+  }
+
+  @Override
+  public T decode(InputStream inStream, Context context) throws IOException {
+    if (context.isWholeStream) {
+      return getParser().parseFrom(inStream, getExtensionRegistry());
+    } else {
+      return getParser().parseDelimitedFrom(inStream, getExtensionRegistry());
+    }
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (this == other) {
+      return true;
+    }
+    if (!(other instanceof ProtoCoder)) {
+      return false;
+    }
+    ProtoCoder<?> otherCoder = (ProtoCoder<?>) other;
+    return protoMessageClass.equals(otherCoder.protoMessageClass)
+        && Sets.newHashSet(extensionHostClasses)
+            .equals(Sets.newHashSet(otherCoder.extensionHostClasses));
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(protoMessageClass, extensionHostClasses);
+  }
+
+  /**
+   * The encoding identifier is designed to support evolution as per the design of Protocol
+   * Buffers. In order to use this class effectively, carefully follow the advice in the Protocol
+   * Buffers documentation at
+   * <a href="https://developers.google.com/protocol-buffers/docs/proto#updating">Updating
+   * A Message Type</a>.
+   *
+   * <p>In particular, the encoding identifier is guaranteed to be the same for {@link ProtoCoder}
+   * instances of the same principal message class, with the same registered extension host classes,
+   * and otherwise distinct. Note that the encoding ID does not encode any version of the message
+   * or extensions, nor does it include the message schema.
+   *
+   * <p>When modifying a message class, here are the broadest guidelines; see the above link
+   * for greater detail.
+   *
+   * <ul>
+   * <li>Do not change the numeric tags for any fields.
+   * <li>Never remove a <code>required</code> field.
+   * <li>Only add <code>optional</code> or <code>repeated</code> fields, with sensible defaults.
+   * <li>When changing the type of a field, consult the Protocol Buffers documentation to ensure
+   * the new and old types are interchangeable.
+   * </ul>
+   *
+   * <p>Code consuming this message class should be prepared to support <i>all</i> versions of
+   * the class until it is certain that no remaining serialized instances exist.
+   *
+   * <p>If backwards incompatible changes must be made, the best recourse is to change the name
+   * of your Protocol Buffers message class.
+   */
+  @Override
+  public String getEncodingId() {
+    return protoMessageClass.getName() + getSortedExtensionClasses().toString();
+  }
+
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    ProtobufUtil.verifyDeterministic(this);
+  }
+
+  /**
+   * Returns the Protocol Buffers {@link Message} type this {@link ProtoCoder} supports.
+   */
+  public Class<T> getMessageType() {
+    return protoMessageClass;
+  }
+
+  /**
+   * Returns the {@link ExtensionRegistry} listing all known Protocol Buffers extension messages
+   * to {@code T} registered with this {@link ProtoCoder}.
+   */
+  public ExtensionRegistry getExtensionRegistry() {
+    if (memoizedExtensionRegistry == null) {
+      ExtensionRegistry registry = ExtensionRegistry.newInstance();
+      for (Class<?> extensionHost : extensionHostClasses) {
+        try {
+          extensionHost
+              .getDeclaredMethod("registerAllExtensions", ExtensionRegistry.class)
+              .invoke(null, registry);
+        } catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
+          throw new IllegalStateException(e);
+        }
+      }
+      memoizedExtensionRegistry = registry.getUnmodifiable();
+    }
+    return memoizedExtensionRegistry;
+  }
+
+  ////////////////////////////////////////////////////////////////////////////////////
+  // Private implementation details below.
+
+  /** The {@link Message} type to be coded. */
+  private final Class<T> protoMessageClass;
+
+  /**
+   * All extension host classes included in this {@link ProtoCoder}. The extensions from these
+   * classes will be included in the {@link ExtensionRegistry} used during encoding and decoding.
+   */
+  private final Set<Class<?>> extensionHostClasses;
+
+  // Constants used to serialize and deserialize
+  private static final String PROTO_MESSAGE_CLASS = "proto_message_class";
+  private static final String PROTO_EXTENSION_HOSTS = "proto_extension_hosts";
+
+  // Transient fields that are lazy initialized and then memoized.
+  private transient ExtensionRegistry memoizedExtensionRegistry;
+  private transient Parser<T> memoizedParser;
+
+  /** Private constructor. */
+  private ProtoCoder(Class<T> protoMessageClass, Set<Class<?>> extensionHostClasses) {
+    this.protoMessageClass = protoMessageClass;
+    this.extensionHostClasses = extensionHostClasses;
+  }
+
+  /**
+   * @deprecated For JSON deserialization only.
+   */
+  @JsonCreator
+  @Deprecated
+  public static <T extends Message> ProtoCoder<T> of(
+      @JsonProperty(PROTO_MESSAGE_CLASS) String protoMessageClassName,
+      @Nullable @JsonProperty(PROTO_EXTENSION_HOSTS) List<String> extensionHostClassNames) {
+
+    try {
+      @SuppressWarnings("unchecked")
+      Class<T> protoMessageClass = (Class<T>) Class.forName(protoMessageClassName);
+      List<Class<?>> extensionHostClasses = Lists.newArrayList();
+      if (extensionHostClassNames != null) {
+        for (String extensionHostClassName : extensionHostClassNames) {
+          extensionHostClasses.add(Class.forName(extensionHostClassName));
+        }
+      }
+      return of(protoMessageClass).withExtensionsFrom(extensionHostClasses);
+    } catch (ClassNotFoundException e) {
+      throw new IllegalArgumentException(e);
+    }
+  }
+
+  @Override
+  public CloudObject asCloudObject() {
+    CloudObject result = super.asCloudObject();
+    Structs.addString(result, PROTO_MESSAGE_CLASS, protoMessageClass.getName());
+    List<CloudObject> extensionHostClassNames = Lists.newArrayList();
+    for (String className : getSortedExtensionClasses()) {
+      extensionHostClassNames.add(CloudObject.forString(className));
+    }
+    Structs.addList(result, PROTO_EXTENSION_HOSTS, extensionHostClassNames);
+    return result;
+  }
+
+  /** Get the memoized {@link Parser}, possibly initializing it lazily. */
+  private Parser<T> getParser() {
+    if (memoizedParser == null) {
+      try {
+        @SuppressWarnings("unchecked")
+        T protoMessageInstance = (T) protoMessageClass.getMethod("getDefaultInstance").invoke(null);
+        @SuppressWarnings("unchecked")
+        Parser<T> tParser = (Parser<T>) protoMessageInstance.getParserForType();
+        memoizedParser = tParser;
+      } catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
+        throw new IllegalArgumentException(e);
+      }
+    }
+    return memoizedParser;
+  }
+
+  /**
+   * The implementation of the {@link CoderProvider} for this {@link ProtoCoder} returned by
+   * {@link #coderProvider()}.
+   */
+  private static final CoderProvider PROVIDER =
+      new CoderProvider() {
+        @Override
+        public <T> Coder<T> getCoder(TypeDescriptor<T> type) throws CannotProvideCoderException {
+          if (!type.isSubtypeOf(new TypeDescriptor<Message>() {})) {
+            throw new CannotProvideCoderException(
+                String.format(
+                    "Cannot provide %s because %s is not a subclass of %s",
+                    ProtoCoder.class.getSimpleName(),
+                    type,
+                    Message.class.getName()));
+          }
+
+          @SuppressWarnings("unchecked")
+          TypeDescriptor<? extends Message> messageType = (TypeDescriptor<? extends Message>) type;
+          try {
+            @SuppressWarnings("unchecked")
+            Coder<T> coder = (Coder<T>) ProtoCoder.of(messageType);
+            return coder;
+          } catch (IllegalArgumentException e) {
+            throw new CannotProvideCoderException(e);
+          }
+        }
+      };
+
+  private SortedSet<String> getSortedExtensionClasses() {
+    SortedSet<String> ret = new TreeSet<>();
+    for (Class<?> clazz : extensionHostClasses) {
+      ret.add(clazz.getName());
+    }
+    return ret;
+  }
+}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtobufUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtobufUtil.java
new file mode 100644
index 0000000000000..597b1de8430ea
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtobufUtil.java
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders.protobuf;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
+import com.google.protobuf.Descriptors.Descriptor;
+import com.google.protobuf.Descriptors.FieldDescriptor;
+import com.google.protobuf.Descriptors.FileDescriptor.Syntax;
+import com.google.protobuf.Descriptors.GenericDescriptor;
+import com.google.protobuf.ExtensionRegistry;
+import com.google.protobuf.ExtensionRegistry.ExtensionInfo;
+import com.google.protobuf.Message;
+
+import java.lang.reflect.InvocationTargetException;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * Utility functions for reflecting and analyzing Protocol Buffers classes.
+ *
+ * <p>Used by {@link ProtoCoder}, but in a separate file for testing and isolation.
+ */
+class ProtobufUtil {
+  /**
+   * Returns the {@link Descriptor} for the given Protocol Buffers {@link Message}.
+   *
+   * @throws IllegalArgumentException if there is an error in Java reflection.
+   */
+  static Descriptor getDescriptorForClass(Class<? extends Message> clazz) {
+    try {
+      return (Descriptor) clazz.getMethod("getDescriptor").invoke(null);
+    } catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
+      throw new IllegalArgumentException(e);
+    }
+  }
+
+  /**
+   * Returns the {@link Descriptor} for the given Protocol Buffers {@link Message} as well as
+   * every class it can include transitively.
+   *
+   * @throws IllegalArgumentException if there is an error in Java reflection.
+   */
+  static Set<Descriptor> getRecursiveDescriptorsForClass(
+      Class<? extends Message> clazz, ExtensionRegistry registry) {
+    Descriptor root = getDescriptorForClass(clazz);
+    Set<Descriptor> descriptors = new HashSet<>();
+    recursivelyAddDescriptors(root, descriptors, registry);
+    return descriptors;
+  }
+
+  /**
+   * Recursively walks the given {@link Message} class and verifies that every field or message
+   * linked in uses the Protocol Buffers proto2 syntax.
+   */
+  static void checkProto2Syntax(Class<? extends Message> clazz, ExtensionRegistry registry) {
+    for (GenericDescriptor d : getRecursiveDescriptorsForClass(clazz, registry)) {
+      Syntax s = d.getFile().getSyntax();
+      checkArgument(
+          s == Syntax.PROTO2,
+          "Message %s or one of its dependencies does not use proto2 syntax: %s in file %s",
+          clazz.getName(),
+          d.getFullName(),
+          d.getFile().getName());
+    }
+  }
+
+  /**
+   * Recursively checks whether the specified class uses any Protocol Buffers fields that cannot
+   * be deterministically encoded.
+   *
+   * @throws NonDeterministicException if the object cannot be encoded deterministically.
+   */
+  static void verifyDeterministic(ProtoCoder<?> coder) throws NonDeterministicException {
+    Class<? extends Message> message = coder.getMessageType();
+    ExtensionRegistry registry = coder.getExtensionRegistry();
+    Set<Descriptor> descriptors = getRecursiveDescriptorsForClass(message, registry);
+    for (Descriptor d : descriptors) {
+      for (FieldDescriptor fd : d.getFields()) {
+        // If there is a transitively reachable Protocol Buffers map field, then this object cannot
+        // be encoded deterministically.
+        if (fd.isMapField()) {
+          String reason =
+              String.format(
+                  "Protocol Buffers message %s transitively includes Map field %s (from file %s)."
+                      + " Maps cannot be deterministically encoded.",
+                  message.getName(),
+                  fd.getFullName(),
+                  fd.getFile().getFullName());
+          throw new NonDeterministicException(coder, reason);
+        }
+      }
+    }
+  }
+
+  ////////////////////////////////////////////////////////////////////////////////////////////////
+  // Disable construction of utility class
+  private ProtobufUtil() {}
+
+  private static void recursivelyAddDescriptors(
+      Descriptor message, Set<Descriptor> descriptors, ExtensionRegistry registry) {
+    if (descriptors.contains(message)) {
+      return;
+    }
+    descriptors.add(message);
+
+    for (FieldDescriptor f : message.getFields()) {
+      recursivelyAddDescriptors(f, descriptors, registry);
+    }
+    for (FieldDescriptor f : message.getExtensions()) {
+      recursivelyAddDescriptors(f, descriptors, registry);
+    }
+    for (ExtensionInfo info :
+        registry.getAllImmutableExtensionsByExtendedType(message.getFullName())) {
+      recursivelyAddDescriptors(info.descriptor, descriptors, registry);
+    }
+    for (ExtensionInfo info :
+        registry.getAllMutableExtensionsByExtendedType(message.getFullName())) {
+      recursivelyAddDescriptors(info.descriptor, descriptors, registry);
+    }
+  }
+
+  private static void recursivelyAddDescriptors(
+      FieldDescriptor field, Set<Descriptor> descriptors, ExtensionRegistry registry) {
+    switch (field.getType()) {
+      case BOOL:
+      case BYTES:
+      case DOUBLE:
+      case ENUM:
+      case FIXED32:
+      case FIXED64:
+      case FLOAT:
+      case INT32:
+      case INT64:
+      case SFIXED32:
+      case SFIXED64:
+      case SINT32:
+      case SINT64:
+      case STRING:
+      case UINT32:
+      case UINT64:
+        // Primitive types do not transitively access anything else.
+        break;
+
+      case GROUP:
+      case MESSAGE:
+        // Recursively adds all the fields from this nested Message.
+        recursivelyAddDescriptors(field.getMessageType(), descriptors, registry);
+        break;
+
+      default:
+        throw new UnsupportedOperationException(
+            "Unexpected Protocol Buffers field type: " + field.getType());
+    }
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
index 7fd0d22ea5602..2f350b288dc44 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
@@ -22,6 +22,8 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.CoderRegistry.IncompatibleCoderException;
+import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageA;
+import com.google.cloud.dataflow.sdk.coders.protobuf.ProtoCoder;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
@@ -33,6 +35,7 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
 import com.google.common.collect.ImmutableList;
+import com.google.protobuf.Duration;
 
 import org.junit.Rule;
 import org.junit.Test;
@@ -82,11 +85,14 @@ public void testSerializableFallbackCoderProvider() throws Exception {
   }
 
   @Test
-  public void testProto2CoderFallbackCoderProvider() throws Exception {
+  public void testProtoCoderFallbackCoderProvider() throws Exception {
     CoderRegistry registry = getStandardRegistry();
-    Coder<Proto2CoderTestMessages.MessageA> coder =
-        registry.getDefaultCoder(Proto2CoderTestMessages.MessageA.class);
-    assertEquals(coder, Proto2Coder.of(new TypeDescriptor<Proto2CoderTestMessages.MessageA>() {}));
+
+    // MessageA is a Protocol Buffers test message with syntax 2
+    assertEquals(registry.getDefaultCoder(MessageA.class), ProtoCoder.of(MessageA.class));
+
+    // Duration is a Protocol Buffers default type with syntax 3
+    assertEquals(registry.getDefaultCoder(Duration.class), ProtoCoder.of(Duration.class));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
index f4c355715a470..91ebc65438270 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/Proto2CoderTest.java
@@ -35,6 +35,7 @@
 /**
  * Tests for Proto2Coder.
  */
+@SuppressWarnings("deprecation") // test of a deprecated coder.
 @RunWith(JUnit4.class)
 public class Proto2CoderTest {
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtoCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtoCoderTest.java
new file mode 100644
index 0000000000000..6f4e99d388c66
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtoCoderTest.java
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders.protobuf;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
+import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.coders.ListCoder;
+import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages;
+import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageA;
+import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageB;
+import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageC;
+import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageWithMap;
+import com.google.cloud.dataflow.sdk.testing.CoderProperties;
+import com.google.cloud.dataflow.sdk.util.CoderUtils;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.collect.ImmutableList;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link ProtoCoder}.
+ */
+@RunWith(JUnit4.class)
+public class ProtoCoderTest {
+
+  @Rule public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void testFactoryMethodAgreement() throws Exception {
+    assertEquals(ProtoCoder.of(new TypeDescriptor<MessageA>() {}), ProtoCoder.of(MessageA.class));
+
+    assertEquals(
+        ProtoCoder.of(new TypeDescriptor<MessageA>() {}),
+        ProtoCoder.coderProvider().getCoder(new TypeDescriptor<MessageA>() {}));
+  }
+
+  @Test
+  public void testProviderCannotProvideCoder() throws Exception {
+    thrown.expect(CannotProvideCoderException.class);
+    thrown.expectMessage("java.lang.Integer is not a subclass of com.google.protobuf.Message");
+
+    ProtoCoder.coderProvider().getCoder(new TypeDescriptor<Integer>() {});
+  }
+
+  @Test
+  public void testCoderEncodeDecodeEqual() throws Exception {
+    MessageA value =
+        MessageA.newBuilder()
+            .setField1("hello")
+            .addField2(MessageB.newBuilder().setField1(true).build())
+            .addField2(MessageB.newBuilder().setField1(false).build())
+            .build();
+    CoderProperties.coderDecodeEncodeEqual(ProtoCoder.of(MessageA.class), value);
+  }
+
+  @Test
+  public void testCoderEncodeDecodeEqualNestedContext() throws Exception {
+    MessageA value1 =
+        MessageA.newBuilder()
+            .setField1("hello")
+            .addField2(MessageB.newBuilder().setField1(true).build())
+            .addField2(MessageB.newBuilder().setField1(false).build())
+            .build();
+    MessageA value2 =
+        MessageA.newBuilder()
+            .setField1("world")
+            .addField2(MessageB.newBuilder().setField1(false).build())
+            .addField2(MessageB.newBuilder().setField1(true).build())
+            .build();
+    CoderProperties.coderDecodeEncodeEqual(
+        ListCoder.of(ProtoCoder.of(MessageA.class)), ImmutableList.of(value1, value2));
+  }
+
+  @Test
+  public void testCoderEncodeDecodeExtensionsEqual() throws Exception {
+    MessageC value =
+        MessageC.newBuilder()
+            .setExtension(
+                Proto2CoderTestMessages.field1,
+                MessageA.newBuilder()
+                    .setField1("hello")
+                    .addField2(MessageB.newBuilder().setField1(true).build())
+                    .build())
+            .setExtension(
+                Proto2CoderTestMessages.field2, MessageB.newBuilder().setField1(false).build())
+            .build();
+    CoderProperties.coderDecodeEncodeEqual(
+        ProtoCoder.of(MessageC.class).withExtensionsFrom(Proto2CoderTestMessages.class), value);
+  }
+
+  @Test
+  public void testCoderSerialization() throws Exception {
+    ProtoCoder<MessageA> coder = ProtoCoder.of(MessageA.class);
+    CoderProperties.coderSerializable(coder);
+  }
+
+  @Test
+  public void testCoderExtensionsSerialization() throws Exception {
+    ProtoCoder<MessageC> coder =
+        ProtoCoder.of(MessageC.class).withExtensionsFrom(Proto2CoderTestMessages.class);
+    CoderProperties.coderSerializable(coder);
+  }
+
+  @Test
+  public void testEncodingId() throws Exception {
+    Coder<MessageA> coderA = ProtoCoder.of(MessageA.class);
+    CoderProperties.coderHasEncodingId(coderA, MessageA.class.getName() + "[]");
+
+    ProtoCoder<MessageC> coder =
+        ProtoCoder.of(MessageC.class).withExtensionsFrom(Proto2CoderTestMessages.class);
+    CoderProperties.coderHasEncodingId(
+        coder,
+        String.format("%s[%s]", MessageC.class.getName(), Proto2CoderTestMessages.class.getName()));
+  }
+
+  @Test
+  public void encodeNullThrowsCoderException() throws Exception {
+    thrown.expect(CoderException.class);
+    thrown.expectMessage("cannot encode a null MessageA");
+
+    CoderUtils.encodeToBase64(ProtoCoder.of(MessageA.class), null);
+  }
+
+  @Test
+  public void testDeterministicCoder() throws NonDeterministicException {
+    Coder<MessageA> coder = ProtoCoder.of(MessageA.class);
+    coder.verifyDeterministic();
+  }
+
+  @Test
+  public void testNonDeterministicCoder() throws NonDeterministicException {
+    thrown.expect(NonDeterministicException.class);
+    thrown.expectMessage(MessageWithMap.class.getName() + " transitively includes Map field");
+
+    Coder<MessageWithMap> coder = ProtoCoder.of(MessageWithMap.class);
+    coder.verifyDeterministic();
+  }
+
+  @Test
+  public void testNonDeterministicProperty() throws CoderException {
+    MessageWithMap.Builder msg1B = MessageWithMap.newBuilder();
+    MessageWithMap.Builder msg2B = MessageWithMap.newBuilder();
+
+    // Built in reverse order but with equal contents.
+    for (int i = 0; i < 10; ++i) {
+      msg1B.getMutableField1().put("key" + i, MessageA.getDefaultInstance());
+      msg2B.getMutableField1().put("key" + (9 - i), MessageA.getDefaultInstance());
+    }
+
+    // Assert the messages are equal.
+    MessageWithMap msg1 = msg1B.build();
+    MessageWithMap msg2 = msg2B.build();
+    assertEquals(msg2, msg1);
+
+    // Assert the encoded messages are not equal.
+    Coder<MessageWithMap> coder = ProtoCoder.of(MessageWithMap.class);
+    assertNotEquals(CoderUtils.encodeToBase64(coder, msg2), CoderUtils.encodeToBase64(coder, msg1));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtobufUtilTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtobufUtilTest.java
new file mode 100644
index 0000000000000..f2192e62e640b
--- /dev/null
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtobufUtilTest.java
@@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.coders.protobuf;
+
+import static com.google.cloud.dataflow.sdk.coders.protobuf.ProtobufUtil.checkProto2Syntax;
+import static com.google.cloud.dataflow.sdk.coders.protobuf.ProtobufUtil.getRecursiveDescriptorsForClass;
+import static com.google.cloud.dataflow.sdk.coders.protobuf.ProtobufUtil.verifyDeterministic;
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertThat;
+
+import com.google.api.services.datastore.DatastoreV1.Entity;
+import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
+import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages;
+import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageA;
+import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageB;
+import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageC;
+import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageWithMap;
+import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.ReferencesMessageWithMap;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
+import com.google.protobuf.Any;
+import com.google.protobuf.Descriptors.GenericDescriptor;
+import com.google.protobuf.Duration;
+import com.google.protobuf.ExtensionRegistry;
+import com.google.protobuf.Message;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * Tests for {@link ProtobufUtil}.
+ */
+@RunWith(JUnit4.class)
+public class ProtobufUtilTest {
+  @Rule public ExpectedException thrown = ExpectedException.none();
+
+  private static final Set<String> MESSAGE_A_ONLY =
+      ImmutableSet.of("proto2_coder_test_messages.MessageA");
+
+  private static final Set<String> MESSAGE_B_ONLY =
+      ImmutableSet.of("proto2_coder_test_messages.MessageB");
+
+  private static final Set<String> MESSAGE_C_ONLY =
+      ImmutableSet.of("proto2_coder_test_messages.MessageC");
+
+  // map fields are actually represented as a nested Message in generated Java code.
+  private static final Set<String> WITH_MAP_ONLY =
+      ImmutableSet.of(
+          "proto2_coder_test_messages.MessageWithMap",
+          "proto2_coder_test_messages.MessageWithMap.Field1Entry");
+
+  private static final Set<String> REFERS_MAP_ONLY =
+      ImmutableSet.of("proto2_coder_test_messages.ReferencesMessageWithMap");
+
+  // A references A and B.
+  private static final Set<String> MESSAGE_A_ALL = Sets.union(MESSAGE_A_ONLY, MESSAGE_B_ONLY);
+
+  // C, only with registered extensions, references A.
+  private static final Set<String> MESSAGE_C_EXT = Sets.union(MESSAGE_C_ONLY, MESSAGE_A_ALL);
+
+  // MessageWithMap references A.
+  private static final Set<String> WITH_MAP_ALL = Sets.union(WITH_MAP_ONLY, MESSAGE_A_ALL);
+
+  // ReferencesMessageWithMap references MessageWithMap.
+  private static final Set<String> REFERS_MAP_ALL = Sets.union(REFERS_MAP_ONLY, WITH_MAP_ALL);
+
+  @Test
+  public void testRecursiveDescriptorsMessageA() {
+    assertThat(getRecursiveDescriptorFullNames(MessageA.class), equalTo(MESSAGE_A_ALL));
+  }
+
+  @Test
+  public void testRecursiveDescriptorsMessageB() {
+    assertThat(getRecursiveDescriptorFullNames(MessageB.class), equalTo(MESSAGE_B_ONLY));
+  }
+
+  @Test
+  public void testRecursiveDescriptorsMessageC() {
+    assertThat(getRecursiveDescriptorFullNames(MessageC.class), equalTo(MESSAGE_C_ONLY));
+  }
+
+  @Test
+  public void testRecursiveDescriptorsMessageCWithExtensions() {
+    // With extensions, Message C has a reference to Message A and Message B.
+    ExtensionRegistry registry = ExtensionRegistry.newInstance();
+    Proto2CoderTestMessages.registerAllExtensions(registry);
+    assertThat(getRecursiveDescriptorFullNames(MessageC.class, registry), equalTo(MESSAGE_C_EXT));
+  }
+
+  @Test
+  public void testRecursiveDescriptorsMessageWithMap() {
+    assertThat(getRecursiveDescriptorFullNames(MessageWithMap.class), equalTo(WITH_MAP_ALL));
+  }
+
+  @Test
+  public void testRecursiveDescriptorsReferencesMessageWithMap() {
+    assertThat(
+        getRecursiveDescriptorFullNames(ReferencesMessageWithMap.class), equalTo(REFERS_MAP_ALL));
+  }
+
+  @Test
+  public void testVerifyProto2() {
+    // Everything in Dataflow's Proto2TestMessages uses Proto2 syntax.
+    checkProto2Syntax(MessageA.class, ExtensionRegistry.getEmptyRegistry());
+    checkProto2Syntax(MessageB.class, ExtensionRegistry.getEmptyRegistry());
+    checkProto2Syntax(MessageC.class, ExtensionRegistry.getEmptyRegistry());
+    checkProto2Syntax(MessageWithMap.class, ExtensionRegistry.getEmptyRegistry());
+    checkProto2Syntax(ReferencesMessageWithMap.class, ExtensionRegistry.getEmptyRegistry());
+  }
+
+  @Test
+  public void testAnyIsNotProto2() {
+    // Any is a core Protocol Buffers type that uses proto3 syntax.
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage(Any.class.getCanonicalName());
+    thrown.expectMessage("in file " + Any.getDescriptor().getFile().getName());
+
+    checkProto2Syntax(Any.class, ExtensionRegistry.getEmptyRegistry());
+  }
+
+  @Test
+  public void testDurationIsNotProto2() {
+    // Duration is a core Protocol Buffers type that uses proto3 syntax.
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage(Duration.class.getCanonicalName());
+    thrown.expectMessage("in file " + Duration.getDescriptor().getFile().getName());
+
+    checkProto2Syntax(Duration.class, ExtensionRegistry.getEmptyRegistry());
+  }
+
+  @Test
+  public void testEntityIsDeterministic() throws NonDeterministicException {
+    // Cloud Datastore's Entities can be encoded deterministically.
+    verifyDeterministic(ProtoCoder.of(Entity.class));
+  }
+
+  @Test
+  public void testMessageWithMapIsNotDeterministic() throws NonDeterministicException {
+    String mapFieldName = MessageWithMap.getDescriptor().findFieldByNumber(1).getFullName();
+    thrown.expect(NonDeterministicException.class);
+    thrown.expectMessage(MessageWithMap.class.getName());
+    thrown.expectMessage("transitively includes Map field " + mapFieldName);
+    thrown.expectMessage("file " + MessageWithMap.getDescriptor().getFile().getName());
+
+    verifyDeterministic(ProtoCoder.of(MessageWithMap.class));
+  }
+
+  @Test
+  public void testMessageWithTransitiveMapIsNotDeterministic() throws NonDeterministicException {
+    String mapFieldName = MessageWithMap.getDescriptor().findFieldByNumber(1).getFullName();
+    thrown.expect(NonDeterministicException.class);
+    thrown.expectMessage(ReferencesMessageWithMap.class.getName());
+    thrown.expectMessage("transitively includes Map field " + mapFieldName);
+    thrown.expectMessage("file " + MessageWithMap.getDescriptor().getFile().getName());
+
+    verifyDeterministic(ProtoCoder.of(ReferencesMessageWithMap.class));
+  }
+
+  ////////////////////////////////////////////////////////////////////////////////////////////
+
+  /** Helper used to test the recursive class traversal and print good error messages. */
+  private static Set<String> getRecursiveDescriptorFullNames(Class<? extends Message> clazz) {
+    return getRecursiveDescriptorFullNames(clazz, ExtensionRegistry.getEmptyRegistry());
+  }
+
+  /** Helper used to test the recursive class traversal and print good error messages. */
+  private static Set<String> getRecursiveDescriptorFullNames(
+      Class<? extends Message> clazz, ExtensionRegistry registry) {
+    Set<String> result = new HashSet<>();
+    for (GenericDescriptor d : getRecursiveDescriptorsForClass(clazz, registry)) {
+      result.add(d.getFullName());
+    }
+    return result;
+  }
+}

From f7fc939c1266caa0e7139c50904e66fca97b44c2 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Tue, 23 Feb 2016 22:17:46 -0800
Subject: [PATCH 1522/1541] Add used-but-undeclared findbugs JSR305
 dependencies

This artifact provides annotations such as @Nullable, @ThreadSafe,
and @NotThreadSafe, which we use throughout the SDK.

Relevant pieces of diff from `mvn dependency:tree -Dverbose`:

 [INFO] com.google.cloud.dataflow:google-cloud-dataflow-java-sdk-all:jar:...
+[INFO] +- com.google.code.findbugs:jsr305:jar:3.0.1:compile
 [INFO] +- com.google.cloud.bigtable:bigtable-protos:jar:0.2.3:compile
-[INFO] |  +- com.google.code.findbugs:jsr305:jar:3.0.1:compile
+[INFO] |  +- (com.google.code.findbugs:jsr305:jar:3.0.1:compile - omitted...)

And pieces that did not change but are simply other dependencies that also
depend on it so their entries were already omitted:

 [INFO] +- io.grpc:grpc-all:jar:0.12.0:compile
 [INFO] |  +- io.grpc:grpc-core:jar:0.12.0:compile
 [INFO] |     +- (com.google.code.findbugs:jsr305:jar:3.0.0:compile - omit...)
 [INFO] +- com.google.cloud.bigdataoss:gcsio:jar:1.4.3:compile
 [INFO] |  +- (com.google.code.findbugs:jsr305:jar:2.0.3:compile - omitted...)
 [INFO] +- com.google.cloud.bigdataoss:util:jar:1.4.3:compile
 [INFO] |  +- (com.google.code.findbugs:jsr305:jar:2.0.3:compile - omitted...)
 [INFO] +- com.google.cloud.bigtable:bigtable-client-core:jar:0.2.3:compile
 [INFO] |  +- (com.google.code.findbugs:jsr305:jar:3.0.1:compile - omitted...)
 [INFO] +- com.google.oauth-client:google-oauth-client:jar:1.21.0:compile
 [INFO] |  +- (com.google.code.findbugs:jsr305:jar:1.3.9:compile - omitted...)
 [INFO] +- com.google.http-client:google-http-client:jar:1.21.0:compile
 [INFO] |  +- (com.google.code.findbugs:jsr305:jar:1.3.9:compile - omitted...)

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115418405
---
 examples/pom.xml | 6 ++++++
 pom.xml          | 1 +
 sdk/pom.xml      | 6 ++++++
 worker/pom.xml   | 6 ++++++
 4 files changed, 19 insertions(+)

diff --git a/examples/pom.xml b/examples/pom.xml
index 3900a23ba2e44..d7834cb3ef703 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -465,6 +465,12 @@
       <version>${guava.version}</version>
     </dependency>
 
+    <dependency>
+      <groupId>com.google.code.findbugs</groupId>
+      <artifactId>jsr305</artifactId>
+      <version>${jsr305.version}</version>
+    </dependency>
+
     <dependency>
       <groupId>joda-time</groupId>
       <artifactId>joda-time</artifactId>
diff --git a/pom.xml b/pom.xml
index cf24c7bda0180..d9466051004e8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -77,6 +77,7 @@
     <hamcrest.version>1.3</hamcrest.version>
     <jackson.version>2.7.0</jackson.version>
     <joda.version>2.4</joda.version>
+    <jsr305.version>3.0.1</jsr305.version>
     <junit.version>4.11</junit.version>
     <protobuf.version>3.0.0-beta-1</protobuf.version>
     <pubsub.version>v1-rev7-1.21.0</pubsub.version>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index bc6d519f749da..aeead722ca4a8 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -612,6 +612,12 @@
       <version>${protobuf.version}</version>
     </dependency>
 
+    <dependency>
+      <groupId>com.google.code.findbugs</groupId>
+      <artifactId>jsr305</artifactId>
+      <version>${jsr305.version}</version>
+    </dependency>
+
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-core</artifactId>
diff --git a/worker/pom.xml b/worker/pom.xml
index e621ed4715618..ab9e172dbf37e 100644
--- a/worker/pom.xml
+++ b/worker/pom.xml
@@ -263,6 +263,12 @@
       <version>3.1.0</version>
     </dependency>
 
+    <dependency>
+      <groupId>com.google.code.findbugs</groupId>
+      <artifactId>jsr305</artifactId>
+      <version>${jsr305.version}</version>
+    </dependency>
+
     <!-- test dependencies -->
     <dependency>
       <groupId>org.hamcrest</groupId>

From d15d924d5dae18a07067cc3a71ba3b50431fe3d7 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 24 Feb 2016 08:42:55 -0800
Subject: [PATCH 1523/1541] Handle PCollectionList.empty() in
 FlattenEvaluatorFactory

PCollectionList.empty() is a valid argument to a Flatten#pCollections
PTransform. It should succeed and produce no output.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115455733
---
 .../inprocess/FlattenEvaluatorFactory.java    |  8 ++++++-
 .../FlattenEvaluatorFactoryTest.java          | 23 +++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactory.java
index d8b5312161d0f..14428888e2b59 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactory.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 Google Inc.
+ * Copyright (C) 2016 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
@@ -46,6 +46,12 @@ private <InputT> TransformEvaluator<InputT> createInMemoryEvaluator(
           application,
       final CommittedBundle<InputT> inputBundle,
       final InProcessEvaluationContext evaluationContext) {
+    if (inputBundle == null) {
+      // it is impossible to call processElement on a flatten with no input bundle. A Flatten with
+      // no input bundle occurs as an output of Flatten.pcollections(PCollectionList.empty())
+      return new FlattenEvaluator<>(
+          null, StepTransformResult.withoutHold(application).build());
+    }
     final UncommittedBundle<InputT> outputBundle =
         evaluationContext.createBundle(inputBundle, application.getOutput());
     final InProcessTransformResult result =
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactoryTest.java
index c2b9995577a55..dac42b60f8a10 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactoryTest.java
@@ -16,6 +16,7 @@
 package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.emptyIterable;
 import static org.junit.Assert.assertThat;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
@@ -112,4 +113,26 @@ public void testFlattenInMemoryEvaluator() throws Exception {
             WindowedValue.timestampedValueInGlobalWindow(-4, new Instant(-4096)),
             WindowedValue.valueInGlobalWindow(-1)));
   }
+
+  @Test
+  public void testFlattenInMemoryEvaluatorWithEmptyPCollectionList() throws Exception {
+    TestPipeline p = TestPipeline.create();
+    PCollectionList<Integer> list = PCollectionList.empty(p);
+
+    PCollection<Integer> flattened = list.apply(Flatten.<Integer>pCollections());
+
+    InProcessEvaluationContext context = mock(InProcessEvaluationContext.class);
+
+    FlattenEvaluatorFactory factory = new FlattenEvaluatorFactory();
+    TransformEvaluator<Integer> emptyEvaluator =
+        factory.forApplication(flattened.getProducingTransformInternal(), null, context);
+
+    InProcessTransformResult leftSideResult = emptyEvaluator.finishBundle();
+
+    assertThat(leftSideResult.getOutputBundles(), emptyIterable());
+    assertThat(
+        leftSideResult.getTransform(),
+        Matchers.<AppliedPTransform<?, ?, ?>>equalTo(flattened.getProducingTransformInternal()));
+  }
+
 }

From 1cc0211c19910d1d67d28ade1fd0c044bec8421d Mon Sep 17 00:00:00 2001
From: herohde <herohde@google.com>
Date: Wed, 24 Feb 2016 11:20:32 -0800
Subject: [PATCH 1524/1541] Set worker harness container image to INVALID until
 next release

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115473121
---
 pom.xml                                       |   1 -
 .../sdk/runners/DataflowPipelineRunner.java   |   9 +-
 worker/pom.xml                                | 301 ------------------
 3 files changed, 4 insertions(+), 307 deletions(-)
 delete mode 100644 worker/pom.xml

diff --git a/pom.xml b/pom.xml
index d9466051004e8..760a10d92fcb7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -88,7 +88,6 @@
   <packaging>pom</packaging>
   <modules>
     <module>sdk</module>
-    <module>worker</module>
     <module>examples</module>
     <module>maven-archetypes/starter</module>
     <module>maven-archetypes/examples</module>
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index ac0dceae96b3c..54fadeac3d371 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -206,11 +206,10 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
 
   // Default Docker container images that execute Dataflow worker harness, residing in Google
   // Container Registry, separately for Batch and Streaming.
-  // TODO: Set these once versioned containers are ready.
-  public static final String BATCH_WORKER_HARNESS_CONTAINER_IMAGE = null;
-      //"dataflow.gcr.io/v1beta3/java-batch:20160201-rc00---INVALID";
-  public static final String STREAMING_WORKER_HARNESS_CONTAINER_IMAGE = null;
-      //"dataflow.gcr.io/v1beta3/java-streaming:20160201-rc00---INVALID";
+  public static final String BATCH_WORKER_HARNESS_CONTAINER_IMAGE
+      = "dataflow.gcr.io/v1beta3/java-batch:INVALID";
+  public static final String STREAMING_WORKER_HARNESS_CONTAINER_IMAGE
+      = "dataflow.gcr.io/v1beta3/java-streaming:INVALID";
 
   // The limit of CreateJob request size.
   private static final int CREATE_JOB_REQUEST_LIMIT_BYTES = 10 * 1024 * 1024;
diff --git a/worker/pom.xml b/worker/pom.xml
deleted file mode 100644
index ab9e172dbf37e..0000000000000
--- a/worker/pom.xml
+++ /dev/null
@@ -1,301 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  ~ Copyright (C) 2015 Google Inc.
-  ~
-  ~ Licensed under the Apache License, Version 2.0 (the "License"); you may not
-  ~ use this file except in compliance with the License. You may obtain a copy of
-  ~ the License at
-  ~
-  ~ http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-  ~ License for the specific language governing permissions and limitations under
-  ~ the License.
-  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-
-  <parent>
-    <groupId>com.google.cloud.dataflow</groupId>
-    <artifactId>google-cloud-dataflow-java-sdk-parent</artifactId>
-    <version>1.5.0-SNAPSHOT</version>
-  </parent>
-
-  <groupId>com.google.cloud.dataflow</groupId>
-  <artifactId>google-cloud-dataflow-java-worker-all</artifactId>
-  <name>Google Cloud Dataflow Java Worker - All</name>
-  <description>Google Cloud Dataflow Java SDK provides a simple, Java-based
-    interface for processing virtually any size data using Google cloud
-    resources. This artifact includes entire Dataflow Java Worker.</description>
-  <url>http://cloud.google.com/dataflow</url>
-
-  <packaging>jar</packaging>
-
-  <properties>
-    <timestamp>${maven.build.timestamp}</timestamp>
-    <maven.build.timestamp.format>yyyy-MM-dd HH:mm</maven.build.timestamp.format>
-    <testParallelValue>none</testParallelValue>
-  </properties>
-
-  <build>
-    <resources>
-      <resource>
-        <directory>src/main/resources</directory>
-        <filtering>true</filtering>
-      </resource>
-    </resources>
-
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-compiler-plugin</artifactId>
-      </plugin>
-
-      <!-- Run CheckStyle. -->
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-checkstyle-plugin</artifactId>
-        <version>2.12</version>
-        <dependencies>
-          <dependency>
-            <groupId>com.puppycrawl.tools</groupId>
-            <artifactId>checkstyle</artifactId>
-            <version>6.6</version>
-          </dependency>
-        </dependencies>
-        <configuration>
-          <configLocation>../checkstyle.xml</configLocation>
-          <consoleOutput>true</consoleOutput>
-          <failOnViolation>true</failOnViolation>
-          <includeResources>false</includeResources>
-          <includeTestSourceDirectory>true</includeTestSourceDirectory>
-          <excludes>${project.build.directory}/generated-test-sources/**</excludes>
-        </configuration>
-        <executions>
-          <execution>
-            <goals>
-              <goal>check</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-jar-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>default-jar</id>
-            <goals>
-              <goal>jar</goal>
-            </goals>
-          </execution>
-          <execution>
-            <id>default-test-jar</id>
-            <goals>
-              <goal>test-jar</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-
-      <!-- Source plugin for generating source and test-source JARs. -->
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-source-plugin</artifactId>
-        <version>2.4</version>
-        <executions>
-          <execution>
-            <id>attach-sources</id>
-            <phase>compile</phase>
-            <goals>
-              <goal>jar</goal>
-            </goals>
-          </execution>
-          <execution>
-            <id>attach-test-sources</id>
-            <phase>test-compile</phase>
-            <goals>
-              <goal>test-jar</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-shade-plugin</artifactId>
-        <version>2.4.1</version>
-        <executions>
-          <!-- In the first phase, we pick dependencies and relocate them. -->
-          <execution>
-            <id>bundle-and-repackage</id>
-            <phase>package</phase>
-            <goals>
-              <goal>shade</goal>
-            </goals>
-            <configuration>
-              <shadeTestJar>true</shadeTestJar>
-              <artifactSet>
-                <includes>
-                  <include>com.google.guava:guava</include>
-                </includes>
-              </artifactSet>
-              <filters>
-                <filter>
-                  <artifact>*:*</artifact>
-                  <excludes>
-                    <exclude>META-INF/*.SF</exclude>
-                    <exclude>META-INF/*.DSA</exclude>
-                    <exclude>META-INF/*.RSA</exclude>
-                  </excludes>
-                </filter>
-              </filters>
-              <relocations>
-                <!-- TODO: Once ready, change the following pattern to 'com'
-                     only, exclude 'com.google.cloud.dataflow.**', and remove
-                     the second relocation. -->
-                <relocation>
-                  <pattern>com.google.common</pattern>
-                  <shadedPattern>com.google.cloud.dataflow.worker.repackaged.com.google.common</shadedPattern>
-                </relocation>
-                <relocation>
-                  <pattern>com.google.thirdparty</pattern>
-                  <shadedPattern>com.google.cloud.dataflow.worker.repackaged.com.google.thirdparty</shadedPattern>
-                </relocation>
-              </relocations>
-            </configuration>
-          </execution>
-
-          <!-- In the second phase, we pick remaining dependencies and bundle
-               them without repackaging. -->
-          <execution>
-            <id>bundle-rest-without-repackaging</id>
-            <phase>package</phase>
-            <goals>
-              <goal>shade</goal>
-            </goals>
-            <configuration>
-              <shadeTestJar>true</shadeTestJar>
-              <finalName>${project.artifactId}-bundled-${project.version}</finalName>
-              <artifactSet>
-                <excludes>
-                  <exclude>com.google.guava:guava</exclude>
-                </excludes>
-              </artifactSet>
-              <filters>
-                <filter>
-                  <artifact>*:*</artifact>
-                  <excludes>
-                    <exclude>META-INF/*.SF</exclude>
-                    <exclude>META-INF/*.DSA</exclude>
-                    <exclude>META-INF/*.RSA</exclude>
-                  </excludes>
-                </filter>
-              </filters>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
-      <!-- Coverage analysis for unit tests. -->
-      <plugin>
-        <groupId>org.jacoco</groupId>
-        <artifactId>jacoco-maven-plugin</artifactId>
-      </plugin>
-    </plugins>
-  </build>
-
-  <dependencies>
-    <dependency>
-      <groupId>com.google.cloud.dataflow</groupId>
-      <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
-      <version>${project.version}</version>
-      <scope>provided</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.cloud.dataflow</groupId>
-      <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.guava</groupId>
-      <artifactId>guava</artifactId>
-      <version>${guava.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>io.grpc</groupId>
-      <artifactId>grpc-all</artifactId>
-      <version>0.12.0</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.auth</groupId>
-      <artifactId>google-auth-library-oauth2-http</artifactId>
-      <version>0.3.0</version>
-      <scope>compile</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.eclipse.jetty</groupId>
-      <artifactId>jetty-server</artifactId>
-      <version>9.2.10.v20150310</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.eclipse.jetty</groupId>
-      <artifactId>jetty-servlet</artifactId>
-      <version>9.2.10.v20150310</version>
-    </dependency>
-
-    <dependency>
-      <groupId>javax.servlet</groupId>
-      <artifactId>javax.servlet-api</artifactId>
-      <version>3.1.0</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.code.findbugs</groupId>
-      <artifactId>jsr305</artifactId>
-      <version>${jsr305.version}</version>
-    </dependency>
-
-    <!-- test dependencies -->
-    <dependency>
-      <groupId>org.hamcrest</groupId>
-      <artifactId>hamcrest-all</artifactId>
-      <version>${hamcrest.version}</version>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <version>${junit.version}</version>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-jdk14</artifactId>
-      <version>${slf4j.version}</version>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.mockito</groupId>
-      <artifactId>mockito-all</artifactId>
-      <version>1.9.5</version>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
-</project>

From 045e3436e6b2ce9593c1f8ebeaa57ea7d229134e Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 24 Feb 2016 13:56:45 -0800
Subject: [PATCH 1525/1541] Handle Undeclared Side Outputs in
 ParDoInProcessEvaluator

The value of an Undeclared Side Output is ignored by the
InProcessPipelineRunner.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115489641
---
 .../inprocess/ParDoInProcessEvaluator.java    | 17 ++++-
 .../ParDoMultiEvaluatorFactoryTest.java       | 74 +++++++++++++++++++
 .../ParDoSingleEvaluatorFactoryTest.java      | 42 +++++++++++
 3 files changed, 131 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoInProcessEvaluator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoInProcessEvaluator.java
index a2b083b0af0c0..f0b2ca25646d1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoInProcessEvaluator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoInProcessEvaluator.java
@@ -27,7 +27,10 @@
 
 import org.joda.time.Instant;
 
+import java.util.ArrayList;
 import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 
 class ParDoInProcessEvaluator<T> {
@@ -61,6 +64,7 @@ public InProcessTransformResult finishBundle() {
 
   static class BundleOutputManager implements OutputManager {
     private final Map<TupleTag<?>, UncommittedBundle<?>> bundles;
+    private final Map<TupleTag<?>, List<?>> undeclaredOutputs;
 
     public static BundleOutputManager create(Map<TupleTag<?>, UncommittedBundle<?>> outputBundles) {
       return new BundleOutputManager(outputBundles);
@@ -68,6 +72,7 @@ public static BundleOutputManager create(Map<TupleTag<?>, UncommittedBundle<?>>
 
     private BundleOutputManager(Map<TupleTag<?>, UncommittedBundle<?>> bundles) {
       this.bundles = bundles;
+      undeclaredOutputs = new HashMap<>();
     }
 
     @SuppressWarnings("unchecked")
@@ -75,8 +80,16 @@ private BundleOutputManager(Map<TupleTag<?>, UncommittedBundle<?>> bundles) {
     public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
       @SuppressWarnings("rawtypes")
       UncommittedBundle bundle = bundles.get(tag);
-      bundle.add(output);
+      if (bundle == null) {
+        List undeclaredContents = undeclaredOutputs.get(tag);
+        if (undeclaredContents == null) {
+          undeclaredContents = new ArrayList<T>();
+          undeclaredOutputs.put(tag, undeclaredContents);
+        }
+        undeclaredContents.add(output);
+      } else {
+        bundle.add(output);
+      }
     }
   }
 }
-
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactoryTest.java
index 5251a768b92b2..c55a9d53a8e39 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactoryTest.java
@@ -137,5 +137,79 @@ public void processElement(ProcessContext c) {
             WindowedValue.timestampedValueInGlobalWindow(4, new Instant(1000)),
             WindowedValue.valueInGlobalWindow(5, PaneInfo.ON_TIME_AND_ONLY_FIRING)));
   }
+
+  @Test
+  public void testParDoMultiUndeclaredSideOutput() throws Exception {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of("foo", "bara", "bazam"));
+
+    TupleTag<KV<String, Integer>> mainOutputTag = new TupleTag<KV<String, Integer>>() {};
+    final TupleTag<String> elementTag = new TupleTag<>();
+    final TupleTag<Integer> lengthTag = new TupleTag<>();
+
+    BoundMulti<String, KV<String, Integer>> pardo =
+        ParDo.of(new DoFn<String, KV<String, Integer>>() {
+          @Override
+          public void processElement(ProcessContext c) {
+            c.output(KV.<String, Integer>of(c.element(), c.element().length()));
+            c.sideOutput(elementTag, c.element());
+            c.sideOutput(lengthTag, c.element().length());
+          }
+        }).withOutputTags(mainOutputTag, TupleTagList.of(elementTag));
+    PCollectionTuple outputTuple = input.apply(pardo);
+
+    CommittedBundle<String> inputBundle = InProcessBundle.unkeyed(input).commit(Instant.now());
+
+    PCollection<KV<String, Integer>> mainOutput = outputTuple.get(mainOutputTag);
+    PCollection<String> elementOutput = outputTuple.get(elementTag);
+
+    InProcessEvaluationContext evaluationContext = mock(InProcessEvaluationContext.class);
+    UncommittedBundle<KV<String, Integer>> mainOutputBundle = InProcessBundle.unkeyed(mainOutput);
+    UncommittedBundle<String> elementOutputBundle = InProcessBundle.unkeyed(elementOutput);
+
+    when(evaluationContext.createBundle(inputBundle, mainOutput)).thenReturn(mainOutputBundle);
+    when(evaluationContext.createBundle(inputBundle, elementOutput))
+        .thenReturn(elementOutputBundle);
+
+    InProcessExecutionContext executionContext =
+        new InProcessExecutionContext();
+    when(evaluationContext.getExecutionContext(mainOutput.getProducingTransformInternal()))
+        .thenReturn(executionContext);
+    CounterSet counters = new CounterSet();
+    when(evaluationContext.createCounterSet()).thenReturn(counters);
+
+    com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator<String> evaluator =
+        new ParDoMultiEvaluatorFactory().forApplication(
+            mainOutput.getProducingTransformInternal(), inputBundle, evaluationContext);
+
+    evaluator.processElement(WindowedValue.valueInGlobalWindow("foo"));
+    evaluator.processElement(
+        WindowedValue.timestampedValueInGlobalWindow("bara", new Instant(1000)));
+    evaluator.processElement(
+        WindowedValue.valueInGlobalWindow("bazam", PaneInfo.ON_TIME_AND_ONLY_FIRING));
+
+    InProcessTransformResult result = evaluator.finishBundle();
+    assertThat(
+        result.getOutputBundles(),
+        Matchers.<UncommittedBundle<?>>containsInAnyOrder(
+            mainOutputBundle, elementOutputBundle));
+    assertThat(result.getWatermarkHold(), equalTo(BoundedWindow.TIMESTAMP_MAX_VALUE));
+    assertThat(result.getCounters(), equalTo(counters));
+
+    assertThat(
+        mainOutputBundle.commit(Instant.now()).getElements(),
+        Matchers.<WindowedValue<KV<String, Integer>>>containsInAnyOrder(
+            WindowedValue.valueInGlobalWindow(KV.of("foo", 3)),
+            WindowedValue.timestampedValueInGlobalWindow(KV.of("bara", 4), new Instant(1000)),
+            WindowedValue.valueInGlobalWindow(
+                KV.of("bazam", 5), PaneInfo.ON_TIME_AND_ONLY_FIRING)));
+    assertThat(
+        elementOutputBundle.commit(Instant.now()).getElements(),
+        Matchers.<WindowedValue<String>>containsInAnyOrder(
+            WindowedValue.valueInGlobalWindow("foo"),
+            WindowedValue.timestampedValueInGlobalWindow("bara", new Instant(1000)),
+            WindowedValue.valueInGlobalWindow("bazam", PaneInfo.ON_TIME_AND_ONLY_FIRING)));
+  }
 }
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactoryTest.java
index c2e148b0d1a18..4fc765ce5db51 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactoryTest.java
@@ -34,6 +34,7 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
 
 import org.hamcrest.Matchers;
 import org.joda.time.Instant;
@@ -92,5 +93,46 @@ public void testParDoInMemoryTransformEvaluator() throws Exception {
             WindowedValue.timestampedValueInGlobalWindow(4, new Instant(1000)),
             WindowedValue.valueInGlobalWindow(5, PaneInfo.ON_TIME_AND_ONLY_FIRING)));
   }
+
+  @Test
+  public void testSideOutputToUndeclaredSideOutputSucceeds() throws Exception {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of("foo", "bara", "bazam"));
+    final TupleTag<Integer> sideOutputTag = new TupleTag<Integer>() {};
+    PCollection<Integer> collection = input.apply(ParDo.of(new DoFn<String, Integer>() {
+      @Override public void processElement(ProcessContext c) {
+        c.sideOutput(sideOutputTag, c.element().length());
+      }
+    }));
+    CommittedBundle<String> inputBundle = InProcessBundle.unkeyed(input).commit(Instant.now());
+
+    InProcessEvaluationContext evaluationContext = mock(InProcessEvaluationContext.class);
+    UncommittedBundle<Integer> outputBundle =
+        InProcessBundle.unkeyed(collection);
+    when(evaluationContext.createBundle(inputBundle, collection)).thenReturn(outputBundle);
+    InProcessExecutionContext executionContext =
+        new InProcessExecutionContext();
+    when(evaluationContext.getExecutionContext(collection.getProducingTransformInternal()))
+        .thenReturn(executionContext);
+    CounterSet counters = new CounterSet();
+    when(evaluationContext.createCounterSet()).thenReturn(counters);
+
+    TransformEvaluator<String> evaluator =
+        new ParDoSingleEvaluatorFactory().forApplication(
+            collection.getProducingTransformInternal(), inputBundle, evaluationContext);
+
+    evaluator.processElement(WindowedValue.valueInGlobalWindow("foo"));
+    evaluator.processElement(
+        WindowedValue.timestampedValueInGlobalWindow("bara", new Instant(1000)));
+    evaluator.processElement(
+        WindowedValue.valueInGlobalWindow("bazam", PaneInfo.ON_TIME_AND_ONLY_FIRING));
+
+    InProcessTransformResult result = evaluator.finishBundle();
+    assertThat(
+        result.getOutputBundles(), Matchers.<UncommittedBundle<?>>containsInAnyOrder(outputBundle));
+    assertThat(result.getWatermarkHold(), equalTo(BoundedWindow.TIMESTAMP_MAX_VALUE));
+    assertThat(result.getCounters(), equalTo(counters));
+  }
 }
 

From 639e9d95b61704ae1740a0a1f02f76c3d480fa48 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 24 Feb 2016 15:24:22 -0800
Subject: [PATCH 1526/1541] Rollback revert "Migrate TextIO.Write to a custom
 sink"

Note for user requested sharding limits to be supported,
each pipeline runner must support applying those sharding limits.

DirectPipelineRunner and Google Cloud Dataflow supports sharding limits.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115500204
---
 .../cloud/dataflow/sdk/io/FileBasedSink.java  |  17 +-
 .../google/cloud/dataflow/sdk/io/TextIO.java  | 187 +++++++++---------
 .../sdk/runners/DataflowPipelineRunner.java   | 131 ++++++++++--
 .../runners/DataflowPipelineTranslator.java   |   6 -
 .../sdk/runners/DirectPipelineRunner.java     |  89 +++++++++
 .../runners/dataflow/TextIOTranslator.java    |  91 ---------
 .../dataflow/sdk/io/FileBasedSinkTest.java    |  29 ++-
 .../cloud/dataflow/sdk/io/TextIOTest.java     |  22 ---
 .../runners/DataflowPipelineRunnerTest.java   |  21 +-
 .../DataflowPipelineTranslatorTest.java       |   4 +-
 .../sdk/runners/DirectPipelineRunnerTest.java |  80 +++++++-
 .../sdk/runners/TransformTreeTest.java        |   9 +-
 12 files changed, 414 insertions(+), 272 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
index 7c301679caf40..dda500c369064 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
@@ -355,7 +355,7 @@ protected final List<String> generateDestinationFilenames(int numFiles) {
       String baseOutputFilename = getSink().baseOutputFilename;
       String fileNamingTemplate = getSink().fileNamingTemplate;
 
-      String suffix = (extension.length() == 0) ? extension : ("." + extension);
+      String suffix = getFileExtension(extension);
       for (int i = 0; i < numFiles; i++) {
         destFilenames.add(IOChannelUtils.constructName(
             baseOutputFilename, fileNamingTemplate, suffix, i, numFiles));
@@ -363,6 +363,21 @@ protected final List<String> generateDestinationFilenames(int numFiles) {
       return destFilenames;
     }
 
+    /**
+     * Returns the file extension to be used. If the user did not request a file
+     * extension then this method returns the empty string. Otherwise this method
+     * adds a {@code "."} to the beginning of the users extension if one is not present.
+     */
+    private String getFileExtension(String usersExtension) {
+      if (usersExtension == null || usersExtension.isEmpty()) {
+        return "";
+      }
+      if (usersExtension.startsWith(".")) {
+        return usersExtension;
+      }
+      return "." + usersExtension;
+    }
+
     /**
      * Removes temporary output files. Uses the temporary filename to find files to remove.
      *
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
index 0bb2861831caf..d342f250b2e84 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
@@ -26,11 +26,9 @@
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.worker.TextSink;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
+import com.google.cloud.dataflow.sdk.util.MimeTypes;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
@@ -39,10 +37,13 @@
 import com.google.protobuf.ByteString;
 
 import java.io.IOException;
+import java.io.OutputStream;
 import java.nio.ByteBuffer;
+import java.nio.channels.Channels;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.channels.SeekableByteChannel;
-import java.util.List;
+import java.nio.channels.WritableByteChannel;
+import java.nio.charset.StandardCharsets;
 import java.util.NoSuchElementException;
 import java.util.regex.Pattern;
 
@@ -66,7 +67,7 @@
  *
  * <p>See the following examples:
  *
- * <pre> {@code
+ * <pre>{@code
  * Pipeline p = ...;
  *
  * // A simple Read of a local file (only runs locally):
@@ -79,7 +80,7 @@
  *     p.apply(TextIO.Read.named("ReadNumbers")
  *                        .from("gs://my_bucket/path/to/numbers-*.txt")
  *                        .withCoder(TextualIntegerCoder.of()));
- * } </pre>
+ * }</pre>
  *
  * <p>To write a {@link PCollection} to one or more text files, use
  * {@link TextIO.Write}, specifying {@link TextIO.Write#to(String)} to specify
@@ -94,7 +95,7 @@
  * will be overwritten.
  *
  * <p>For example:
- * <pre> {@code
+ * <pre>{@code
  * // A simple Write to a local file (only runs locally):
  * PCollection<String> lines = ...;
  * lines.apply(TextIO.Write.to("/path/to/file.txt"));
@@ -106,7 +107,7 @@
  *                           .to("gs://my_bucket/path/to/numbers")
  *                           .withSuffix(".txt")
  *                           .withCoder(TextualIntegerCoder.of()));
- * } </pre>
+ * }</pre>
  *
  * <h3>Permissions</h3>
  * <p>When run using the {@link DirectPipelineRunner}, your pipeline can read and write text files
@@ -477,9 +478,6 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       /** Requested number of shards. 0 for automatic. */
       private final int numShards;
 
-      /** Insert a shuffle before writing to decouple parallelism when numShards != 0. */
-      private final boolean forceReshard;
-
       /** The shard template of each file written, combined with prefix and suffix. */
       private final String shardTemplate;
 
@@ -487,17 +485,16 @@ public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
       private final boolean validate;
 
       Bound(Coder<T> coder) {
-        this(null, null, "", coder, 0, true, ShardNameTemplate.INDEX_OF_MAX, true);
+        this(null, null, "", coder, 0, ShardNameTemplate.INDEX_OF_MAX, true);
       }
 
       private Bound(String name, String filenamePrefix, String filenameSuffix, Coder<T> coder,
-          int numShards, boolean forceReshard, String shardTemplate, boolean validate) {
+          int numShards, String shardTemplate, boolean validate) {
         super(name);
         this.coder = coder;
         this.filenamePrefix = filenamePrefix;
         this.filenameSuffix = filenameSuffix;
         this.numShards = numShards;
-        this.forceReshard = forceReshard;
         this.shardTemplate = shardTemplate;
         this.validate = validate;
       }
@@ -510,7 +507,7 @@ private Bound(String name, String filenamePrefix, String filenameSuffix, Coder<T
        */
       public Bound<T> named(String name) {
         return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
-            forceReshard, shardTemplate, validate);
+            shardTemplate, validate);
       }
 
       /**
@@ -523,7 +520,7 @@ public Bound<T> named(String name) {
        */
       public Bound<T> to(String filenamePrefix) {
         validateOutputComponent(filenamePrefix);
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
             shardTemplate, validate);
       }
 
@@ -537,7 +534,7 @@ public Bound<T> to(String filenamePrefix) {
        */
       public Bound<T> withSuffix(String nameExtension) {
         validateOutputComponent(nameExtension);
-        return new Bound<>(name, filenamePrefix, nameExtension, coder, numShards, forceReshard,
+        return new Bound<>(name, filenamePrefix, nameExtension, coder, numShards,
             shardTemplate, validate);
       }
 
@@ -556,30 +553,8 @@ public Bound<T> withSuffix(String nameExtension) {
        * @see ShardNameTemplate
        */
       public Bound<T> withNumShards(int numShards) {
-        return withNumShards(numShards, forceReshard);
-      }
-
-      /**
-       * Returns a transform for writing to text files that's like this one but
-       * that uses the provided shard count.
-       *
-       * <p>Constraining the number of shards is likely to reduce
-       * the performance of a pipeline. If forceReshard is true, the output
-       * will be shuffled to obtain the desired sharding. If it is false,
-       * data will not be reshuffled, but parallelism of preceeding stages
-       * may be constrained. Setting this value is not recommended
-       * unless you require a specific number of output files.
-       *
-       * <p>Does not modify this object.
-       *
-       * @param numShards the number of shards to use, or 0 to let the system
-       *                  decide.
-       * @param forceReshard whether to force a reshard to obtain the desired sharding.
-       * @see ShardNameTemplate
-       */
-      private Bound<T> withNumShards(int numShards, boolean forceReshard) {
         Preconditions.checkArgument(numShards >= 0);
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
             shardTemplate, validate);
       }
 
@@ -592,7 +567,7 @@ private Bound<T> withNumShards(int numShards, boolean forceReshard) {
        * @see ShardNameTemplate
        */
       public Bound<T> withShardNameTemplate(String shardTemplate) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
             shardTemplate, validate);
       }
 
@@ -610,25 +585,7 @@ public Bound<T> withShardNameTemplate(String shardTemplate) {
        * <p>Does not modify this object.
        */
       public Bound<T> withoutSharding() {
-        return withoutSharding(forceReshard);
-      }
-
-      /**
-       * Returns a transform for writing to text files that's like this one but
-       * that forces a single file as output.
-       *
-       * <p>Constraining the number of shards is likely to reduce
-       * the performance of a pipeline. Using this setting is not recommended
-       * unless you truly require a single output file.
-       *
-       * <p>This is a shortcut for
-       * {@code .withNumShards(1, forceReshard).withShardNameTemplate("")}
-       *
-       * <p>Does not modify this object.
-       */
-      private Bound<T> withoutSharding(boolean forceReshard) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, 1, forceReshard, "",
-            validate);
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, 1, "", validate);
       }
 
       /**
@@ -640,7 +597,7 @@ private Bound<T> withoutSharding(boolean forceReshard) {
        * @param <X> the type of the elements of the input {@link PCollection}
        */
       public <X> Bound<X> withCoder(Coder<X> coder) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
             shardTemplate, validate);
       }
 
@@ -655,7 +612,7 @@ public <X> Bound<X> withCoder(Coder<X> coder) {
        * <p>Does not modify this object.
        */
       public Bound<T> withoutValidation() {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards, forceReshard,
+        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
             shardTemplate, false);
       }
 
@@ -665,14 +622,13 @@ public PDone apply(PCollection<T> input) {
           throw new IllegalStateException(
               "need to set the filename prefix of a TextIO.Write transform");
         }
-        if (numShards > 0 && forceReshard) {
-          // Reshard and re-apply a version of this write without resharding.
-          return input
-              .apply(new FileBasedSink.ReshardForWrite<T>())
-              .apply(withNumShards(numShards, false));
-        } else {
-          return PDone.in(input.getPipeline());
-        }
+
+        // Note that custom sinks currently do not expose sharding controls.
+        // Thus pipeline runner writers need to individually add support internally to
+        // apply user requested sharding limits.
+        return input.apply("Write", com.google.cloud.dataflow.sdk.io.Write.to(
+            new TextSink<>(
+                filenamePrefix, filenameSuffix, shardTemplate, coder)));
       }
 
       /**
@@ -710,17 +666,6 @@ public Coder<T> getCoder() {
       public boolean needsValidation() {
         return validate;
       }
-
-      static {
-        DirectPipelineRunner.registerDefaultTransformEvaluator(
-            Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
-              @Override
-              public void evaluate(
-                  Bound transform, DirectPipelineRunner.EvaluationContext context) {
-                evaluateWriteHelper(transform, context);
-              }
-            });
-      }
     }
   }
 
@@ -978,24 +923,70 @@ private boolean tryToEnsureNumberOfBytesInBuffer(int minCapacity) throws IOExcep
     }
   }
 
-  private static <T> void evaluateWriteHelper(
-      Write.Bound<T> transform, DirectPipelineRunner.EvaluationContext context) {
-    List<T> elems = context.getPCollection(context.getInput(transform));
-    int numShards = transform.numShards;
-    if (numShards < 1) {
-      // System gets to choose. For direct mode, choose 1.
-      numShards = 1;
+  /**
+   * A {@link FileBasedSink} for text files. Produces text files with the new line separator
+   * {@code '\n'} represented in {@code UTF-8} format as the record separator.
+   * Each record (including the last) is terminated.
+   */
+  @VisibleForTesting
+  static class TextSink<T> extends FileBasedSink<T> {
+    private final Coder<T> coder;
+
+    @VisibleForTesting
+    TextSink(
+        String baseOutputFilename, String extension, String fileNameTemplate, Coder<T> coder) {
+      super(baseOutputFilename, extension, fileNameTemplate);
+      this.coder = coder;
     }
-    TextSink<WindowedValue<T>> writer = TextSink.createForDirectPipelineRunner(
-        transform.filenamePrefix, transform.getShardNameTemplate(), transform.filenameSuffix,
-        numShards, true, null, null, transform.coder);
-    try (Sink.SinkWriter<WindowedValue<T>> sink = writer.writer()) {
-      for (T elem : elems) {
-        sink.add(WindowedValue.valueInGlobalWindow(elem));
-      }
-    } catch (IOException exn) {
-      throw new RuntimeException(
-          "unable to write to output file \"" + transform.filenamePrefix + "\"", exn);
+
+    @Override
+    public FileBasedSink.FileBasedWriteOperation<T> createWriteOperation(PipelineOptions options) {
+      return new TextWriteOperation<>(this, coder);
+    }
+
+    /**
+     * A {@link com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriteOperation
+     * FileBasedWriteOperation} for text files.
+     */
+    private static class TextWriteOperation<T> extends FileBasedWriteOperation<T> {
+      private final Coder<T> coder;
+
+      private TextWriteOperation(TextSink<T> sink, Coder<T> coder) {
+        super(sink);
+        this.coder = coder;
+      }
+
+      @Override
+      public FileBasedWriter<T> createWriter(PipelineOptions options) throws Exception {
+        return new TextWriter<>(this, coder);
+      }
+    }
+
+    /**
+     * A {@link com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriter FileBasedWriter}
+     * for text files.
+     */
+    private static class TextWriter<T> extends FileBasedWriter<T> {
+      private static final byte[] NEWLINE = "\n".getBytes(StandardCharsets.UTF_8);
+      private final Coder<T> coder;
+      private OutputStream out;
+
+      public TextWriter(FileBasedWriteOperation<T> writeOperation, Coder<T> coder) {
+        super(writeOperation);
+        this.mimeType = MimeTypes.TEXT;
+        this.coder = coder;
+      }
+
+      @Override
+      protected void prepareWrite(WritableByteChannel channel) throws Exception {
+        out = Channels.newOutputStream(channel);
+      }
+
+      @Override
+      public void write(T value) throws Exception {
+        coder.encode(value, out, Context.OUTER);
+        out.write(NEWLINE);
+      }
     }
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 54fadeac3d371..06b2295080f76 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -338,6 +338,7 @@ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
       builder.put(Window.Bound.class, AssignWindows.class);
       builder.put(Write.Bound.class, BatchWrite.class);
       builder.put(AvroIO.Write.Bound.class, BatchAvroIOWrite.class);
+      builder.put(TextIO.Write.Bound.class, BatchTextIOWrite.class);
       if (options.getExperiments() == null
           || !options.getExperiments().contains("disable_ism_side_input")) {
         builder.put(View.AsMap.class, BatchViewAsMap.class);
@@ -1999,6 +2000,111 @@ public PDone apply(PCollection<T> input) {
     }
   }
 
+  /**
+   * Specialized implementation which overrides
+   * {@link com.google.cloud.dataflow.sdk.io.TextIO.Write.Bound TextIO.Write.Bound} with
+   * a native sink instead of a custom sink as workaround until custom sinks
+   * have support for sharding controls.
+   */
+  private static class BatchTextIOWrite<T> extends PTransform<PCollection<T>, PDone> {
+    private final TextIO.Write.Bound<T> transform;
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+    public BatchTextIOWrite(DataflowPipelineRunner runner, TextIO.Write.Bound<T> transform) {
+      this.transform = transform;
+    }
+
+    @Override
+    public PDone apply(PCollection<T> input) {
+      if (transform.getNumShards() > 0) {
+        return input
+            .apply(new ReshardForWrite<T>())
+            .apply(new BatchTextIONativeWrite<>(transform));
+      } else {
+        return transform.apply(input);
+      }
+    }
+  }
+
+  /**
+   * This {@link PTransform} is used by the {@link DataflowPipelineTranslator} as a way
+   * to provide the native definition of the Text sink.
+   */
+  private static class BatchTextIONativeWrite<T> extends PTransform<PCollection<T>, PDone> {
+    private final TextIO.Write.Bound<T> transform;
+    public BatchTextIONativeWrite(TextIO.Write.Bound<T> transform) {
+      this.transform = transform;
+    }
+
+    @Override
+    public PDone apply(PCollection<T> input) {
+      return PDone.in(input.getPipeline());
+    }
+
+    static {
+      DataflowPipelineTranslator.registerTransformTranslator(
+          BatchTextIONativeWrite.class, new BatchTextIONativeWriteTranslator());
+    }
+  }
+
+  /**
+   * TextIO.Write.Bound support code for the Dataflow backend when applying parallelism limits
+   * through user requested sharding limits.
+   */
+  private static class BatchTextIONativeWriteTranslator
+      implements TransformTranslator<BatchTextIONativeWrite<?>> {
+    @SuppressWarnings("unchecked")
+    @Override
+    public void translate(@SuppressWarnings("rawtypes") BatchTextIONativeWrite transform,
+        TranslationContext context) {
+      translateWriteHelper(transform, transform.transform, context);
+    }
+
+    private <T> void translateWriteHelper(
+        BatchTextIONativeWrite<T> transform,
+        TextIO.Write.Bound<T> originalTransform,
+        TranslationContext context) {
+      // Note that the original transform can not be used during add step/add input
+      // and is only passed in to get properties from it.
+
+      checkState(originalTransform.getNumShards() > 0,
+          "Native TextSink is expected to only be used when sharding controls are required.");
+
+      context.addStep(transform, "ParallelWrite");
+      context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
+
+      // TODO: drop this check when server supports alternative templates.
+      switch (originalTransform.getShardTemplate()) {
+        case ShardNameTemplate.INDEX_OF_MAX:
+          break;  // supported by server
+        case "":
+          // Empty shard template allowed - forces single output.
+          Preconditions.checkArgument(originalTransform.getNumShards() <= 1,
+              "Num shards must be <= 1 when using an empty sharding template");
+          break;
+        default:
+          throw new UnsupportedOperationException("Shard template "
+              + originalTransform.getShardTemplate()
+              + " not yet supported by Dataflow service");
+      }
+
+      // TODO: How do we want to specify format and
+      // format-specific properties?
+      context.addInput(PropertyNames.FORMAT, "text");
+      context.addInput(PropertyNames.FILENAME_PREFIX, originalTransform.getFilenamePrefix());
+      context.addInput(PropertyNames.SHARD_NAME_TEMPLATE,
+          originalTransform.getShardNameTemplate());
+      context.addInput(PropertyNames.FILENAME_SUFFIX, originalTransform.getFilenameSuffix());
+      context.addInput(PropertyNames.VALIDATE_SINK, originalTransform.needsValidation());
+      context.addInput(PropertyNames.NUM_SHARDS, (long) originalTransform.getNumShards());
+      context.addEncodingInput(
+          WindowedValue.getValueOnlyCoder(originalTransform.getCoder()));
+
+    }
+  }
+
   /**
    * Specialized implementation which overrides
    * {@link com.google.cloud.dataflow.sdk.io.AvroIO.Write.Bound AvroIO.Write.Bound} with
@@ -2018,7 +2124,9 @@ public BatchAvroIOWrite(DataflowPipelineRunner runner, AvroIO.Write.Bound<T> tra
     @Override
     public PDone apply(PCollection<T> input) {
       if (transform.getNumShards() > 0) {
-        return input.apply(new ReshardForWrite<T>()).apply(new BatchAvroIONativeWrite<>(transform));
+        return input
+            .apply(new ReshardForWrite<T>())
+            .apply(new BatchAvroIONativeWrite<>(transform));
       } else {
         return transform.apply(input);
       }
@@ -2031,7 +2139,6 @@ public PDone apply(PCollection<T> input) {
    */
   private static class BatchAvroIONativeWrite<T> extends PTransform<PCollection<T>, PDone> {
     private final AvroIO.Write.Bound<T> transform;
-
     public BatchAvroIONativeWrite(AvroIO.Write.Bound<T> transform) {
       this.transform = transform;
     }
@@ -2055,8 +2162,7 @@ private static class BatchAvroIONativeWriteTranslator
       implements TransformTranslator<BatchAvroIONativeWrite<?>> {
     @SuppressWarnings("unchecked")
     @Override
-    public void translate(
-        @SuppressWarnings("rawtypes") BatchAvroIONativeWrite transform,
+    public void translate(@SuppressWarnings("rawtypes") BatchAvroIONativeWrite transform,
         TranslationContext context) {
       translateWriteHelper(transform, transform.transform, context);
     }
@@ -2068,8 +2174,7 @@ private <T> void translateWriteHelper(
       // Note that the original transform can not be used during add step/add input
       // and is only passed in to get properties from it.
 
-      checkState(
-          originalTransform.getNumShards() > 0,
+      checkState(originalTransform.getNumShards() > 0,
           "Native AvroSink is expected to only be used when sharding controls are required.");
 
       context.addStep(transform, "ParallelWrite");
@@ -2078,18 +2183,16 @@ private <T> void translateWriteHelper(
       // TODO: drop this check when server supports alternative templates.
       switch (originalTransform.getShardTemplate()) {
         case ShardNameTemplate.INDEX_OF_MAX:
-          break; // supported by server
+          break;  // supported by server
         case "":
           // Empty shard template allowed - forces single output.
-          Preconditions.checkArgument(
-              originalTransform.getNumShards() <= 1,
+          Preconditions.checkArgument(originalTransform.getNumShards() <= 1,
               "Num shards must be <= 1 when using an empty sharding template");
           break;
         default:
-          throw new UnsupportedOperationException(
-              "Shard template "
-                  + originalTransform.getShardTemplate()
-                  + " not yet supported by Dataflow service");
+          throw new UnsupportedOperationException("Shard template "
+              + originalTransform.getShardTemplate()
+              + " not yet supported by Dataflow service");
       }
 
       context.addInput(PropertyNames.FORMAT, "avro");
@@ -2097,9 +2200,7 @@ private <T> void translateWriteHelper(
       context.addInput(PropertyNames.SHARD_NAME_TEMPLATE, originalTransform.getShardTemplate());
       context.addInput(PropertyNames.FILENAME_SUFFIX, originalTransform.getFilenameSuffix());
       context.addInput(PropertyNames.VALIDATE_SINK, originalTransform.needsValidation());
-
       context.addInput(PropertyNames.NUM_SHARDS, (long) originalTransform.getNumShards());
-
       context.addEncodingInput(
           WindowedValue.getValueOnlyCoder(
               AvroCoder.of(originalTransform.getType(), originalTransform.getSchema())));
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
index 885260ebb2518..ae3a40310372f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
@@ -44,14 +44,12 @@
 import com.google.cloud.dataflow.sdk.io.BigQueryIO;
 import com.google.cloud.dataflow.sdk.io.PubsubIO;
 import com.google.cloud.dataflow.sdk.io.Read;
-import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.StreamingOptions;
 import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.GroupByKeyAndSortValuesOnly;
 import com.google.cloud.dataflow.sdk.runners.dataflow.BigQueryIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
 import com.google.cloud.dataflow.sdk.runners.dataflow.ReadTranslator;
-import com.google.cloud.dataflow.sdk.runners.dataflow.TextIOTranslator;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.Combine;
 import com.google.cloud.dataflow.sdk.transforms.Create;
@@ -998,7 +996,6 @@ private <InputT, OutputT> void translateSingleHelper(
           }
         });
 
-
     registerTransformTranslator(
         Window.Bound.class,
         new DataflowPipelineTranslator.TransformTranslator<Window.Bound>() {
@@ -1037,9 +1034,6 @@ private <T> void translateHelper(
         DataflowPipelineRunner.StreamingPubsubIOWrite.class,
         new PubsubIOTranslator.WriteTranslator());
 
-    registerTransformTranslator(
-        TextIO.Write.Bound.class, new TextIOTranslator.WriteTranslator());
-
     registerTransformTranslator(Read.Bounded.class, new ReadTranslator());
   }
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 332a49603a394..4543b5a534484 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -17,6 +17,7 @@
 package com.google.cloud.dataflow.sdk.runners;
 
 import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkState;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
@@ -24,6 +25,8 @@
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.ListCoder;
+import com.google.cloud.dataflow.sdk.io.FileBasedSink;
+import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
@@ -36,6 +39,8 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.Partition;
+import com.google.cloud.dataflow.sdk.transforms.Partition.PartitionFn;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
@@ -51,6 +56,7 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionList;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.PDone;
 import com.google.cloud.dataflow.sdk.values.PInput;
 import com.google.cloud.dataflow.sdk.values.POutput;
 import com.google.cloud.dataflow.sdk.values.PValue;
@@ -232,6 +238,8 @@ public <OutputT extends POutput, InputT extends PInput> OutputT apply(
       PTransform<InputT, OutputT> transform, InputT input) {
     if (transform instanceof Combine.GroupedValues) {
       return (OutputT) applyTestCombine((Combine.GroupedValues) transform, (PCollection) input);
+    } else if (transform instanceof TextIO.Write.Bound) {
+      return (OutputT) applyTextIOWrite((TextIO.Write.Bound) transform, (PCollection<?>) input);
     } else {
       return super.apply(transform, input);
     }
@@ -253,6 +261,87 @@ private <K, InputT, AccumT, OutputT> PCollection<KV<K, OutputT>> applyTestCombin
     return output;
   }
 
+  private static class ElementProcessingOrderPartitionFn<T> implements PartitionFn<T> {
+    private int elementNumber;
+    @Override
+    public int partitionFor(T elem, int numPartitions) {
+      return elementNumber++ % numPartitions;
+    }
+  }
+
+  /**
+   * Applies TextIO.Write honoring user requested sharding controls (i.e. withNumShards)
+   * by applying a partition function based upon the number of shards the user requested.
+   */
+  private static class DirectTextIOWrite<T> extends PTransform<PCollection<T>, PDone> {
+    private final TextIO.Write.Bound<T> transform;
+
+    private DirectTextIOWrite(TextIO.Write.Bound<T> transform) {
+      this.transform = transform;
+    }
+
+    @Override
+    public PDone apply(PCollection<T> input) {
+      checkState(transform.getNumShards() > 1,
+          "DirectTextIOWrite is expected to only be used when sharding controls are required.");
+
+      // Evenly distribute all the elements across the partitions.
+      PCollectionList<T> partitionedElements =
+          input.apply(Partition.of(transform.getNumShards(),
+                                   new ElementProcessingOrderPartitionFn<T>()));
+
+      // For each input PCollection partition, create a write transform that represents
+      // one of the specific shards.
+      for (int i = 0; i < transform.getNumShards(); ++i) {
+        /*
+         * This logic mirrors the file naming strategy within
+         * {@link FileBasedSink#generateDestinationFilenames()}
+         */
+        String outputFilename = IOChannelUtils.constructName(
+            transform.getFilenamePrefix(),
+            transform.getShardNameTemplate(),
+            getFileExtension(transform.getFilenameSuffix()),
+            i,
+            transform.getNumShards());
+
+        String transformName = String.format("%s(Shard:%s)", transform.getName(), i);
+        partitionedElements.get(i).apply(transformName,
+            transform.withNumShards(1).withShardNameTemplate("").withSuffix("").to(outputFilename));
+      }
+      return PDone.in(input.getPipeline());
+    }
+  }
+
+  /**
+   * Returns the file extension to be used. If the user did not request a file
+   * extension then this method returns the empty string. Otherwise this method
+   * adds a {@code "."} to the beginning of the users extension if one is not present.
+   *
+   * <p>This is copied from {@link FileBasedSink} to not expose it.
+   */
+  private static String getFileExtension(String usersExtension) {
+    if (usersExtension == null || usersExtension.isEmpty()) {
+      return "";
+    }
+    if (usersExtension.startsWith(".")) {
+      return usersExtension;
+    }
+    return "." + usersExtension;
+  }
+
+  /**
+   * Apply the override for TextIO.Write.Bound if the user requested sharding controls
+   * greater than one.
+   */
+  private <T> PDone applyTextIOWrite(TextIO.Write.Bound<T> transform, PCollection<T> input) {
+    if (transform.getNumShards() <= 1) {
+      // By default, the DirectPipelineRunner outputs to only 1 shard. Since the user never
+      // requested sharding controls greater than 1, we default to outputting to 1 file.
+      return super.apply(transform.withNumShards(1), input);
+    }
+    return input.apply(new DirectTextIOWrite<>(transform));
+  }
+
   /**
    * The implementation may split the {@link KeyedCombineFn} into ADD, MERGE and EXTRACT phases (
    * see {@code com.google.cloud.dataflow.sdk.runners.worker.CombineValuesFn}). In order to emulate
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
deleted file mode 100644
index d6c96c31bd80a..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/TextIOTranslator.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.dataflow;
-
-import com.google.cloud.dataflow.sdk.io.ShardNameTemplate;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
-import com.google.cloud.dataflow.sdk.util.PathValidator;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.common.base.Preconditions;
-
-/**
- * TextIO transform support code for the Dataflow backend.
- */
-public class TextIOTranslator {
-  /**
-   * Implements TextIO Write translation for the Dataflow backend.
-   */
-  @SuppressWarnings({"rawtypes", "unchecked"})
-  public static class WriteTranslator implements TransformTranslator<TextIO.Write.Bound> {
-    @Override
-    public void translate(
-        TextIO.Write.Bound transform,
-        TranslationContext context) {
-      translateWriteHelper(transform, context);
-    }
-
-    private <T> void translateWriteHelper(
-        TextIO.Write.Bound<T> transform,
-        TranslationContext context) {
-      if (context.getPipelineOptions().isStreaming()) {
-        throw new IllegalArgumentException("TextIO not supported in streaming mode.");
-      }
-
-      PathValidator validator = context.getPipelineOptions().getPathValidator();
-      String filenamePrefix = validator.validateOutputFilePrefixSupported(
-          transform.getFilenamePrefix());
-
-      context.addStep(transform, "ParallelWrite");
-      context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
-
-      // TODO: drop this check when server supports alternative templates.
-      switch (transform.getShardTemplate()) {
-        case ShardNameTemplate.INDEX_OF_MAX:
-          break;  // supported by server
-        case "":
-          // Empty shard template allowed - forces single output.
-          Preconditions.checkArgument(transform.getNumShards() <= 1,
-              "Num shards must be <= 1 when using an empty sharding template");
-          break;
-        default:
-          throw new UnsupportedOperationException("Shard template "
-              + transform.getShardTemplate()
-              + " not yet supported by Dataflow service");
-      }
-
-      // TODO: How do we want to specify format and
-      // format-specific properties?
-      context.addInput(PropertyNames.FORMAT, "text");
-      context.addInput(PropertyNames.FILENAME_PREFIX, filenamePrefix);
-      context.addInput(PropertyNames.SHARD_NAME_TEMPLATE,
-          transform.getShardNameTemplate());
-      context.addInput(PropertyNames.FILENAME_SUFFIX, transform.getFilenameSuffix());
-      context.addInput(PropertyNames.VALIDATE_SINK, transform.needsValidation());
-
-      long numShards = transform.getNumShards();
-      if (numShards > 0) {
-        context.addInput(PropertyNames.NUM_SHARDS, numShards);
-      }
-
-      context.addEncodingInput(
-          WindowedValue.getValueOnlyCoder(transform.getCoder()));
-    }
-  }
-}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSinkTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSinkTest.java
index 8236ae68351ea..da23f3a562297 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSinkTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/FileBasedSinkTest.java
@@ -56,7 +56,6 @@ public class FileBasedSinkTest {
 
   private String baseOutputFilename = "output";
   private String baseTemporaryFilename = "temp";
-  private String testExtension = "test";
 
   private String appendToTempFolder(String filename) {
     return Paths.get(tmpFolder.getRoot().getPath(), filename).toString();
@@ -314,7 +313,7 @@ public void testCopyToOutputFiles() throws Exception {
   public void testGenerateOutputFilenamesWithTemplate() {
     List<String> expected;
     List<String> actual;
-    SimpleSink sink = buildSink(".SS.of.NN");
+    SimpleSink sink = new SimpleSink(getBaseOutputFilename(), "test", ".SS.of.NN");
     SimpleSink.SimpleWriteOperation writeOp = new SimpleSink.SimpleWriteOperation(sink);
 
     expected = Arrays.asList(appendToTempFolder("output.00.of.03.test"),
@@ -329,6 +328,23 @@ public void testGenerateOutputFilenamesWithTemplate() {
     expected = new ArrayList<>();
     actual = writeOp.generateDestinationFilenames(0);
     assertEquals(expected, actual);
+
+    // Also validate that we handle the case where the user specified "." that we do
+    // not prefix an additional "." making "..test"
+    sink = new SimpleSink(getBaseOutputFilename(), ".test", ".SS.of.NN");
+    writeOp = new SimpleSink.SimpleWriteOperation(sink);
+    expected = Arrays.asList(appendToTempFolder("output.00.of.03.test"),
+        appendToTempFolder("output.01.of.03.test"), appendToTempFolder("output.02.of.03.test"));
+    actual = writeOp.generateDestinationFilenames(3);
+    assertEquals(expected, actual);
+
+    expected = Arrays.asList(appendToTempFolder("output.00.of.01.test"));
+    actual = writeOp.generateDestinationFilenames(1);
+    assertEquals(expected, actual);
+
+    expected = new ArrayList<>();
+    actual = writeOp.generateDestinationFilenames(0);
+    assertEquals(expected, actual);
   }
 
   /**
@@ -457,14 +473,7 @@ public void write(String value) throws Exception {
    * Build a SimpleSink with default options.
    */
   private SimpleSink buildSink() {
-    return new SimpleSink(getBaseOutputFilename(), testExtension);
-  }
-
-  /**
-   * Build a SimpleSink with default options and the given shard template.
-   */
-  private SimpleSink buildSink(String shardTemplate) {
-    return new SimpleSink(getBaseOutputFilename(), testExtension, shardTemplate);
+    return new SimpleSink(getBaseOutputFilename(), "test");
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
index 6ad81e4ea0c4a..0a8e3811085f7 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/TextIOTest.java
@@ -263,28 +263,6 @@ public void testWriteEmptyInts() throws Exception {
     runTestWrite(NO_INTS_ARRAY, TextualIntegerCoder.of());
   }
 
-  @Test
-  public void testWriteSharded() throws IOException {
-    File outFolder = tmpFolder.newFolder();
-    String filename = outFolder.toPath().resolve("output").toString();
-
-    Pipeline p = TestPipeline.create();
-
-    PCollection<String> input =
-        p.apply(Create.of(Arrays.asList(LINES_ARRAY))
-            .withCoder(StringUtf8Coder.of()));
-
-    input.apply(TextIO.Write.to(filename).withNumShards(2).withSuffix(".txt"));
-
-    p.run();
-
-    String[] files = outFolder.list();
-
-    assertThat(Arrays.asList(files),
-        containsInAnyOrder("output-00000-of-00002.txt",
-                           "output-00001-of-00002.txt"));
-  }
-
   @Test
   public void testWriteNamed() {
     {
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index c7175cb3b05f5..c5f2d3fe71fd1 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -453,16 +453,12 @@ public void testNonGcsFilePathInReadFailure() throws IOException {
 
   @Test
   public void testNonGcsFilePathInWriteFailure() throws IOException {
-    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
-
-    Pipeline p = buildDataflowPipeline(buildPipelineOptions(jobCaptor));
-    p.apply(TextIO.Read.named("ReadMyGcsFile").from("gs://bucket/object"))
-        .apply(TextIO.Write.named("WriteMyNonGcsFile").to("/tmp/file"));
+    Pipeline p = buildDataflowPipeline(buildPipelineOptions());
+    PCollection<String> pc = p.apply(TextIO.Read.named("ReadMyGcsFile").from("gs://bucket/object"));
 
     thrown.expect(IllegalArgumentException.class);
     thrown.expectMessage(containsString("expected a valid 'gs://' path but was given"));
-    p.run();
-    assertValidJob(jobCaptor.getValue());
+    pc.apply(TextIO.Write.named("WriteMyNonGcsFile").to("/tmp/file"));
   }
 
   @Test
@@ -482,17 +478,12 @@ public void testMultiSlashGcsFileReadPath() throws IOException {
 
   @Test
   public void testMultiSlashGcsFileWritePath() throws IOException {
-    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
-
-    Pipeline p = buildDataflowPipeline(buildPipelineOptions(jobCaptor));
-    p.apply(TextIO.Read.named("ReadMyGcsFile").from("gs://bucket/object"))
-        .apply(TextIO.Write.named("WriteInvalidGcsFile")
-            .to("gs://bucket/tmp//file"));
+    Pipeline p = buildDataflowPipeline(buildPipelineOptions());
+    PCollection<String> pc = p.apply(TextIO.Read.named("ReadMyGcsFile").from("gs://bucket/object"));
 
     thrown.expect(IllegalArgumentException.class);
     thrown.expectMessage("consecutive slashes");
-    p.run();
-    assertValidJob(jobCaptor.getValue());
+    pc.apply(TextIO.Write.named("WriteInvalidGcsFile").to("gs://bucket/tmp//file"));
   }
 
   @Test
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
index b9c94ad00b86b..72090a0866a62 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslatorTest.java
@@ -403,7 +403,7 @@ public void testPredefinedAddStep() throws Exception {
     pipeline.apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/in"))
         .apply(ParDo.of(new NoOpFn()))
         .apply(new EmbeddedTransform(predefinedStep.clone()))
-        .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/out"));
+        .apply(ParDo.of(new NoOpFn()));
     Job job = translator.translate(
         pipeline, pipeline.getRunner(), Collections.<DataflowPackage>emptyList()).getJob();
 
@@ -456,7 +456,7 @@ private static Step createPredefinedStep() throws Exception {
     Job job = translator.translate(
         pipeline, pipeline.getRunner(), Collections.<DataflowPackage>emptyList()).getJob();
 
-    assertEquals(3, job.getSteps().size());
+    assertEquals(13, job.getSteps().size());
     Step step = job.getSteps().get(1);
     assertEquals(stepName, getString(step.getProperties(), PropertyNames.USER_NAME));
     return step;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java
index 904e4bbddc65f..4a0f91c91727c 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java
@@ -16,35 +16,48 @@
 
 package com.google.cloud.dataflow.sdk.runners;
 
+import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.isA;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThat;
 
+import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.io.ShardNameTemplate;
+import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.common.io.Files;
 
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
+import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
+import java.io.File;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.Serializable;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
 
 /** Tests for {@link DirectPipelineRunner}. */
 @RunWith(JUnit4.class)
 public class DirectPipelineRunnerTest implements Serializable {
-
-  @Rule
-  public transient ExpectedException expectedException = ExpectedException.none();
+  @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
+  @Rule public ExpectedException expectedException = ExpectedException.none();
 
   @Test
   public void testToString() {
@@ -54,6 +67,7 @@ public void testToString() {
         runner.toString());
   }
 
+  /** A {@link Coder} that fails during decoding. */
   private static class CrashingCoder<T> extends AtomicCoder<T> {
     @Override
     public void encode(T value, OutputStream stream, Context context) throws CoderException {
@@ -68,18 +82,21 @@ public T decode(
     }
   }
 
+  /** A {@link DoFn} that outputs {@code 'hello'}. */
+  private static class HelloDoFn extends DoFn<Integer, String> {
+    @Override
+    public void processElement(DoFn<Integer, String>.ProcessContext c) throws Exception {
+      c.output("hello");
+    }
+  }
+
   @Test
   public void testCoderException() {
     DirectPipeline pipeline = DirectPipeline.createForTest();
 
     pipeline
         .apply("CreateTestData", Create.of(42))
-        .apply("CrashDuringCoding", ParDo.of(new DoFn<Integer, String>() {
-          @Override
-          public void processElement(ProcessContext context) {
-            context.output("hello");
-          }
-        }))
+        .apply("CrashDuringCoding", ParDo.of(new HelloDoFn()))
         .setCoder(new CrashingCoder<String>());
 
       expectedException.expect(RuntimeException.class);
@@ -92,4 +109,49 @@ public void testDirectPipelineOptions() {
     DirectPipelineOptions options = PipelineOptionsFactory.create().as(DirectPipelineOptions.class);
     assertNull(options.getDirectPipelineRunnerRandomSeed());
   }
+
+  @Test
+  public void testTextIOWriteWithDefaultShardingStrategy() throws Exception {
+    String prefix = IOChannelUtils.resolve(Files.createTempDir().toString(), "output");
+    Pipeline p = DirectPipeline.createForTest();
+    String[] expectedElements = new String[]{ "a", "b", "c", "d", "e", "f", "g", "h", "i" };
+    p.apply(Create.of(expectedElements))
+     .apply(TextIO.Write.to(prefix).withSuffix("txt"));
+    p.run();
+
+    String filename =
+        IOChannelUtils.constructName(prefix, ShardNameTemplate.INDEX_OF_MAX, ".txt", 0, 1);
+    List<String> fileContents =
+        Files.readLines(new File(filename), StandardCharsets.UTF_8);
+    // Ensure that each file got at least one record
+    assertFalse(fileContents.isEmpty());
+
+    assertThat(fileContents, containsInAnyOrder(expectedElements));
+  }
+
+  @Test
+  public void testTextIOWriteWithLimitedNumberOfShards() throws Exception {
+    final int numShards = 3;
+    String prefix = IOChannelUtils.resolve(Files.createTempDir().toString(), "shardedOutput");
+    Pipeline p = DirectPipeline.createForTest();
+    String[] expectedElements = new String[]{ "a", "b", "c", "d", "e", "f", "g", "h", "i" };
+    p.apply(Create.of(expectedElements))
+     .apply(TextIO.Write.to(prefix).withNumShards(numShards).withSuffix("txt"));
+    p.run();
+
+    List<String> allContents = new ArrayList<>();
+    for (int i = 0; i < numShards; ++i) {
+      String shardFileName =
+          IOChannelUtils.constructName(prefix, ShardNameTemplate.INDEX_OF_MAX, ".txt", i, 3);
+      List<String> shardFileContents =
+          Files.readLines(new File(shardFileName), StandardCharsets.UTF_8);
+
+      // Ensure that each file got at least one record
+      assertFalse(shardFileContents.isEmpty());
+
+      allContents.addAll(shardFileContents);
+    }
+
+    assertThat(allContents, containsInAnyOrder(expectedElements));
+  }
 }
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
index f1b7cd7fd0169..68e1db1a52f2a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/TransformTreeTest.java
@@ -28,6 +28,7 @@
 import com.google.cloud.dataflow.sdk.coders.VoidCoder;
 import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.io.Write;
 import com.google.cloud.dataflow.sdk.transforms.Count;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
@@ -133,9 +134,12 @@ public void enterCompositeTransform(TransformTreeNode node) {
           assertTrue(visited.add(TransformsSeen.SAMPLE_ANY));
           assertNotNull(node.getEnclosingNode());
           assertTrue(node.isCompositeNode());
+        } else if (transform instanceof Write.Bound) {
+          assertTrue(visited.add(TransformsSeen.WRITE));
+          assertNotNull(node.getEnclosingNode());
+          assertTrue(node.isCompositeNode());
         }
         assertThat(transform, not(instanceOf(Read.Bounded.class)));
-        assertThat(transform, not(instanceOf(TextIO.Write.Bound.class)));
       }
 
       @Override
@@ -151,10 +155,9 @@ public void visitTransform(TransformTreeNode node) {
         PTransform<?, ?> transform = node.getTransform();
         // Pick is a composite, should not be visited here.
         assertThat(transform, not(instanceOf(Sample.SampleAny.class)));
+        assertThat(transform, not(instanceOf(Write.Bound.class)));
         if (transform instanceof Read.Bounded) {
           assertTrue(visited.add(TransformsSeen.READ));
-        } else if (transform instanceof TextIO.Write.Bound) {
-          assertTrue(visited.add(TransformsSeen.WRITE));
         }
       }
 

From ca98da2a372210325e1c8985292b4040d1ac8c62 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 24 Feb 2016 16:01:50 -0800
Subject: [PATCH 1527/1541] Update Timers and State in the InProcess
 ParDoEvaluator

If the ParDo accessed state, put the committed value into the transform
result, and likewise with timers.

Add a #commitState method to InProcessStepContext to return the
committed state.

Implement stateInternals() and timerInternals() to provide actual
implementations of StateInternals and TimerInternals. Use concrete types
due to implementation requirements. stateInternals() and
timerInternals() construct response values the first time they are
called based on the underlying data structure; #commitState returns null
if and only if stateInternals was not used by the transform, and
likewise for #getTimerUpdate

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115504122
---
 .../inprocess/InProcessExecutionContext.java  | 108 +++++++++
 .../inprocess/InProcessPipelineRunner.java    |  47 +---
 .../inprocess/ParDoInProcessEvaluator.java    |  34 ++-
 .../inprocess/ParDoMultiEvaluatorFactory.java |  25 +-
 .../ParDoSingleEvaluatorFactory.java          |  30 +--
 .../util/InMemoryWatermarkManager.java        |  17 ++
 .../ParDoMultiEvaluatorFactoryTest.java       | 223 +++++++++++++++++-
 .../ParDoSingleEvaluatorFactoryTest.java      | 184 ++++++++++++++-
 8 files changed, 556 insertions(+), 112 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessExecutionContext.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessExecutionContext.java
new file mode 100644
index 0000000000000..6342cd48a9690
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessExecutionContext.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess;
+
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.Clock;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TransformWatermarks;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessTimerInternals;
+import com.google.cloud.dataflow.sdk.util.BaseExecutionContext;
+import com.google.cloud.dataflow.sdk.util.ExecutionContext;
+import com.google.cloud.dataflow.sdk.util.TimerInternals;
+import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
+import com.google.cloud.dataflow.sdk.util.state.CopyOnAccessInMemoryStateInternals;
+
+/**
+ * Execution Context for the {@link InProcessPipelineRunner}.
+ *
+ * This implementation is not thread safe. A new {@link InProcessExecutionContext} must be created
+ * for each thread that requires it.
+ */
+class InProcessExecutionContext
+    extends BaseExecutionContext<InProcessExecutionContext.InProcessStepContext> {
+  private final Clock clock;
+  private final Object key;
+  private final CopyOnAccessInMemoryStateInternals<Object> existingState;
+  private final TransformWatermarks watermarks;
+
+  public InProcessExecutionContext(Clock clock, Object key,
+      CopyOnAccessInMemoryStateInternals<Object> existingState, TransformWatermarks watermarks) {
+    this.clock = clock;
+    this.key = key;
+    this.existingState = existingState;
+    this.watermarks = watermarks;
+  }
+
+  @Override
+  protected InProcessStepContext createStepContext(
+      String stepName, String transformName, StateSampler stateSampler) {
+    return new InProcessStepContext(this, stepName, transformName);
+  }
+
+  /**
+   * Step Context for the {@link InProcessPipelineRunner}.
+   */
+  public class InProcessStepContext
+      extends com.google.cloud.dataflow.sdk.util.BaseExecutionContext.StepContext {
+    private CopyOnAccessInMemoryStateInternals<Object> stateInternals;
+    private InProcessTimerInternals timerInternals;
+
+    public InProcessStepContext(
+        ExecutionContext executionContext, String stepName, String transformName) {
+      super(executionContext, stepName, transformName);
+    }
+
+    @Override
+    public CopyOnAccessInMemoryStateInternals<Object> stateInternals() {
+      if (stateInternals == null) {
+        stateInternals = CopyOnAccessInMemoryStateInternals.withUnderlying(key, existingState);
+      }
+      return stateInternals;
+    }
+
+    @Override
+    public InProcessTimerInternals timerInternals() {
+      if (timerInternals == null) {
+        timerInternals =
+            InProcessTimerInternals.create(clock, watermarks, TimerUpdate.builder(key));
+      }
+      return timerInternals;
+    }
+
+    /**
+     * Commits the state of this step, and returns the committed state. If the step has not
+     * accessed any state, return null.
+     */
+    public CopyOnAccessInMemoryStateInternals<?> commitState() {
+      if (stateInternals != null) {
+        return stateInternals.commit();
+      }
+      return null;
+    }
+
+    /**
+     * Gets the timer update of the {@link TimerInternals} of this {@link InProcessStepContext},
+     * which is empty if the {@link TimerInternals} were never accessed.
+     */
+    public TimerUpdate getTimerUpdate() {
+      if (timerInternals == null) {
+        return TimerUpdate.empty();
+      }
+      return timerInternals.getTimerUpdate();
+    }
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java
index 26c5061f67212..124de46b94769 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java
@@ -28,16 +28,12 @@
 import com.google.cloud.dataflow.sdk.transforms.View.CreatePCollectionView;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
-import com.google.cloud.dataflow.sdk.util.BaseExecutionContext;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.TimerInternals;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.util.state.StateInternals;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionView;
 import com.google.cloud.dataflow.sdk.values.PValue;
@@ -158,46 +154,6 @@ public static interface PCollectionViewWriter<ElemT, ViewT> {
     void add(Iterable<WindowedValue<ElemT>> values);
   }
 
-  /**
-   * Execution Context for the InMemoryPipelineRunner.
-   *
-   * This implementation is not thread safe. A new InMemoryExecutionContext must be created for each
-   * thread that requires it.
-   */
-  public static class InProcessExecutionContext
-      extends BaseExecutionContext<InProcessExecutionContext.InProcessStepContext> {
-    @Override
-    protected InProcessStepContext createStepContext(
-        String stepName, String transformName, StateSampler stateSampler) {
-      return new InProcessStepContext(this, stepName, transformName);
-    }
-
-    /**
-     * Step Context for the InMemoryPipelineRunner.
-     */
-    public class InProcessStepContext
-        extends com.google.cloud.dataflow.sdk.util.BaseExecutionContext.StepContext {
-      public InProcessStepContext(
-          InProcessExecutionContext executionContext, String stepName, String transformName) {
-        super(executionContext, stepName, transformName);
-      }
-
-      @Override
-      public StateInternals stateInternals() {
-        // TODO get or create state for current key.
-        throw new UnsupportedOperationException("StateInternals not yet meaningfully supported");
-      }
-
-      @Override
-      public TimerInternals timerInternals() {
-        // TODO: Have the executionContext/evaluationContext pass this in
-        throw new UnsupportedOperationException("TimerInternals not yet meaningfully supported");
-      }
-    }
-
-  }
-
-
   /**
    * The evaluation context for the {@link InProcessPipelineRunner}. Contains state shared within
    * the current evaluation.
@@ -235,7 +191,8 @@ <ElemT, ViewT> PCollectionViewWriter<ElemT, ViewT> createPCollectionViewWriter(
     /**
      * Get an {@link ExecutionContext} for the provided application.
      */
-    InProcessExecutionContext getExecutionContext(AppliedPTransform<?, ?, ?> application);
+    InProcessExecutionContext getExecutionContext(
+        AppliedPTransform<?, ?, ?> application, @Nullable Object key);
 
     /**
      * Get the Step Name for the provided application.
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoInProcessEvaluator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoInProcessEvaluator.java
index f0b2ca25646d1..2a21e8cbf5ed3 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoInProcessEvaluator.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoInProcessEvaluator.java
@@ -15,49 +15,63 @@
  */
 package com.google.cloud.dataflow.sdk.runners.inprocess;
 
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessExecutionContext.InProcessStepContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
 import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.state.CopyOnAccessInMemoryStateInternals;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
-import org.joda.time.Instant;
-
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-class ParDoInProcessEvaluator<T> {
+class ParDoInProcessEvaluator<T> implements TransformEvaluator<T> {
   private final DoFnRunner<T, ?> fnRunner;
   private final AppliedPTransform<PCollection<T>, ?, ?> transform;
   private final CounterSet counters;
   private final Collection<UncommittedBundle<?>> outputBundles;
+  private final InProcessStepContext stepContext;
 
-  public ParDoInProcessEvaluator(DoFnRunner<T, ?> fnRunner,
-      AppliedPTransform<PCollection<T>, ?, ?> transform, CounterSet counters,
-      Collection<UncommittedBundle<?>> outputBundles) {
+  public ParDoInProcessEvaluator(
+      DoFnRunner<T, ?> fnRunner,
+      AppliedPTransform<PCollection<T>, ?, ?> transform,
+      CounterSet counters,
+      Collection<UncommittedBundle<?>> outputBundles,
+      InProcessStepContext stepContext) {
     this.fnRunner = fnRunner;
     this.transform = transform;
     this.counters = counters;
     this.outputBundles = outputBundles;
+    this.stepContext = stepContext;
   }
 
+  @Override
   public void processElement(WindowedValue<T> element) {
     fnRunner.processElement(element);
   }
 
+  @Override
   public InProcessTransformResult finishBundle() {
     fnRunner.finishBundle();
-    // TODO Use a real value
-    Instant hold = BoundedWindow.TIMESTAMP_MAX_VALUE;
-    return StepTransformResult.withHold(transform, hold)
+    StepTransformResult.Builder resultBuilder;
+    CopyOnAccessInMemoryStateInternals<?> state = stepContext.commitState();
+    if (state != null) {
+      resultBuilder =
+          StepTransformResult.withHold(transform, state.getEarliestWatermarkHold())
+              .withState(state);
+    } else {
+      resultBuilder = StepTransformResult.withoutHold(transform);
+    }
+    return resultBuilder
         .addOutput(outputBundles)
+        .withTimerUpdate(stepContext.getTimerUpdate())
         .withCounters(counters)
         .build();
   }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactory.java
index ad68a6b204f44..e3ae1a028c168 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactory.java
@@ -15,10 +15,9 @@
  */
 package com.google.cloud.dataflow.sdk.runners.inprocess;
 
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessExecutionContext.InProcessStepContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext.InProcessStepContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.ParDoInProcessEvaluator.BundleOutputManager;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
@@ -27,7 +26,6 @@
 import com.google.cloud.dataflow.sdk.transforms.ParDo.BoundMulti;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
 import com.google.cloud.dataflow.sdk.util.DoFnRunners;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
@@ -46,20 +44,7 @@ public <T> TransformEvaluator<T> forApplication(
       AppliedPTransform<?, ?, ?> application,
       CommittedBundle<?> inputBundle,
       InProcessEvaluationContext evaluationContext) {
-    @SuppressWarnings({"unchecked", "rawtypes"})
-    final ParDoInProcessEvaluator<T> multiEvaluator =
-        createMultiEvaluator((AppliedPTransform) application, inputBundle, evaluationContext);
-    return new TransformEvaluator<T>() {
-      @Override
-      public void processElement(WindowedValue<T> value) {
-        multiEvaluator.processElement(value);
-      }
-
-      @Override
-      public InProcessTransformResult finishBundle() {
-        return multiEvaluator.finishBundle();
-      }
-    };
+    return createMultiEvaluator((AppliedPTransform) application, inputBundle, evaluationContext);
   }
 
   private static <InT, OuT> ParDoInProcessEvaluator<InT> createMultiEvaluator(
@@ -74,7 +59,8 @@ private static <InT, OuT> ParDoInProcessEvaluator<InT> createMultiEvaluator(
           outputEntry.getKey(),
           evaluationContext.createBundle(inputBundle, outputEntry.getValue()));
     }
-    InProcessExecutionContext executionContext = evaluationContext.getExecutionContext(application);
+    InProcessExecutionContext executionContext =
+        evaluationContext.getExecutionContext(application, inputBundle.getKey());
     String stepName = evaluationContext.getStepName(application);
     InProcessStepContext stepContext =
         executionContext.getOrCreateStepContext(stepName, stepName, null);
@@ -96,6 +82,7 @@ private static <InT, OuT> ParDoInProcessEvaluator<InT> createMultiEvaluator(
 
     runner.startBundle();
 
-    return new ParDoInProcessEvaluator<>(runner, application, counters, outputBundles.values());
+    return new ParDoInProcessEvaluator<>(
+        runner, application, counters, outputBundles.values(), stepContext);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactory.java
index 737d0e99816d9..cd79c219bd678 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactory.java
@@ -15,10 +15,9 @@
  */
 package com.google.cloud.dataflow.sdk.runners.inprocess;
 
+import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessExecutionContext.InProcessStepContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext.InProcessStepContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.ParDoInProcessEvaluator.BundleOutputManager;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
@@ -26,7 +25,6 @@
 import com.google.cloud.dataflow.sdk.transforms.ParDo.Bound;
 import com.google.cloud.dataflow.sdk.util.DoFnRunner;
 import com.google.cloud.dataflow.sdk.util.DoFnRunners;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
@@ -43,22 +41,7 @@ public <T> TransformEvaluator<T> forApplication(
       final AppliedPTransform<?, ?, ?> application,
       CommittedBundle<?> inputBundle,
       InProcessEvaluationContext evaluationContext) {
-    @SuppressWarnings({"unchecked", "rawtypes"})
-    final ParDoInProcessEvaluator<T> evaluator =
-        createSingleEvaluator((AppliedPTransform) application, inputBundle, evaluationContext);
-    TransformEvaluator<T> singleEvaluator =
-        new TransformEvaluator<T>() {
-          @Override
-          public void processElement(WindowedValue<T> value) {
-            evaluator.processElement(value);
-          }
-
-          @Override
-          public InProcessTransformResult finishBundle() {
-            return evaluator.finishBundle();
-          }
-        };
-    return singleEvaluator;
+    return createSingleEvaluator((AppliedPTransform) application, inputBundle, evaluationContext);
   }
 
   private static <InputT, OutputT> ParDoInProcessEvaluator<InputT> createSingleEvaluator(
@@ -69,7 +52,8 @@ private static <InputT, OutputT> ParDoInProcessEvaluator<InputT> createSingleEva
     UncommittedBundle<OutputT> outputBundle =
         evaluationContext.createBundle(inputBundle, application.getOutput());
 
-    InProcessExecutionContext executionContext = evaluationContext.getExecutionContext(application);
+    InProcessExecutionContext executionContext =
+        evaluationContext.getExecutionContext(application, inputBundle.getKey());
     String stepName = evaluationContext.getStepName(application);
     InProcessStepContext stepContext =
         executionContext.getOrCreateStepContext(stepName, stepName, null);
@@ -92,6 +76,10 @@ private static <InputT, OutputT> ParDoInProcessEvaluator<InputT> createSingleEva
 
     runner.startBundle();
     return new ParDoInProcessEvaluator<InputT>(
-        runner, application, counters, Collections.<UncommittedBundle<?>>singleton(outputBundle));
+        runner,
+        application,
+        counters,
+        Collections.<UncommittedBundle<?>>singleton(outputBundle),
+        stepContext);
   }
 }
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
index ea6e00a79acf9..4428e413b8000 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
@@ -1244,6 +1244,23 @@ Iterable<? extends TimerData> getSetTimers() {
     Iterable<? extends TimerData> getDeletedTimers() {
       return deletedTimers;
     }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(key, completedTimers, setTimers, deletedTimers);
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (other == null || !(other instanceof TimerUpdate)) {
+        return false;
+      }
+      TimerUpdate that = (TimerUpdate) other;
+      return Objects.equals(this.key, that.key)
+          && Objects.equals(this.completedTimers, that.completedTimers)
+          && Objects.equals(this.setTimers, that.setTimers)
+          && Objects.equals(this.deletedTimers, that.deletedTimers);
+    }
   }
 
   /**
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactoryTest.java
index c55a9d53a8e39..80863b9b7654b 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactoryTest.java
@@ -15,15 +15,19 @@
  */
 package com.google.cloud.dataflow.sdk.runners.inprocess;
 
+import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.not;
+import static org.hamcrest.Matchers.nullValue;
 import static org.junit.Assert.assertThat;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate;
 import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
@@ -31,9 +35,20 @@
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.ParDo.BoundMulti;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.state.BagState;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.cloud.dataflow.sdk.util.state.StateTags;
+import com.google.cloud.dataflow.sdk.util.state.WatermarkHoldState;
 import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
@@ -41,6 +56,7 @@
 import com.google.cloud.dataflow.sdk.values.TupleTagList;
 
 import org.hamcrest.Matchers;
+import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -81,20 +97,18 @@ public void processElement(ProcessContext c) {
     PCollection<Integer> lengthOutput = outputTuple.get(lengthTag);
 
     InProcessEvaluationContext evaluationContext = mock(InProcessEvaluationContext.class);
-    UncommittedBundle<KV<String, Integer>> mainOutputBundle =
-        InProcessBundle.unkeyed(mainOutput);
-    UncommittedBundle<String> elementOutputBundle =
-        InProcessBundle.unkeyed(elementOutput);
-    UncommittedBundle<Integer> lengthOutputBundle =
-        InProcessBundle.unkeyed(lengthOutput);
+    UncommittedBundle<KV<String, Integer>> mainOutputBundle = InProcessBundle.unkeyed(mainOutput);
+    UncommittedBundle<String> elementOutputBundle = InProcessBundle.unkeyed(elementOutput);
+    UncommittedBundle<Integer> lengthOutputBundle = InProcessBundle.unkeyed(lengthOutput);
 
     when(evaluationContext.createBundle(inputBundle, mainOutput)).thenReturn(mainOutputBundle);
     when(evaluationContext.createBundle(inputBundle, elementOutput))
         .thenReturn(elementOutputBundle);
     when(evaluationContext.createBundle(inputBundle, lengthOutput)).thenReturn(lengthOutputBundle);
 
-    InProcessExecutionContext executionContext = new InProcessExecutionContext();
-    when(evaluationContext.getExecutionContext(mainOutput.getProducingTransformInternal()))
+    InProcessExecutionContext executionContext =
+        new InProcessExecutionContext(null, null, null, null);
+    when(evaluationContext.getExecutionContext(mainOutput.getProducingTransformInternal(), null))
         .thenReturn(executionContext);
     CounterSet counters = new CounterSet();
     when(evaluationContext.createCounterSet()).thenReturn(counters);
@@ -173,8 +187,8 @@ public void processElement(ProcessContext c) {
         .thenReturn(elementOutputBundle);
 
     InProcessExecutionContext executionContext =
-        new InProcessExecutionContext();
-    when(evaluationContext.getExecutionContext(mainOutput.getProducingTransformInternal()))
+        new InProcessExecutionContext(null, null, null, null);
+    when(evaluationContext.getExecutionContext(mainOutput.getProducingTransformInternal(), null))
         .thenReturn(executionContext);
     CounterSet counters = new CounterSet();
     when(evaluationContext.createCounterSet()).thenReturn(counters);
@@ -211,5 +225,190 @@ public void processElement(ProcessContext c) {
             WindowedValue.timestampedValueInGlobalWindow("bara", new Instant(1000)),
             WindowedValue.valueInGlobalWindow("bazam", PaneInfo.ON_TIME_AND_ONLY_FIRING)));
   }
-}
 
+  @Test
+  public void finishBundleWithStatePutsStateInResult() throws Exception {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of("foo", "bara", "bazam"));
+
+    TupleTag<KV<String, Integer>> mainOutputTag = new TupleTag<KV<String, Integer>>() {};
+    final TupleTag<String> elementTag = new TupleTag<>();
+
+    final StateTag<Object, WatermarkHoldState<BoundedWindow>> watermarkTag =
+        StateTags.watermarkStateInternal("myId", OutputTimeFns.outputAtEndOfWindow());
+    final StateTag<Object, BagState<String>> bagTag = StateTags.bag("myBag", StringUtf8Coder.of());
+    final StateNamespace windowNs =
+        StateNamespaces.window(GlobalWindow.Coder.INSTANCE, GlobalWindow.INSTANCE);
+    BoundMulti<String, KV<String, Integer>> pardo =
+        ParDo.of(
+                new DoFn<String, KV<String, Integer>>() {
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    c.windowingInternals()
+                        .stateInternals()
+                        .state(StateNamespaces.global(), watermarkTag)
+                        .add(new Instant(20202L + c.element().length()));
+                    c.windowingInternals()
+                        .stateInternals()
+                        .state(
+                            StateNamespaces.window(
+                                GlobalWindow.Coder.INSTANCE, GlobalWindow.INSTANCE),
+                            bagTag)
+                        .add(c.element());
+                  }
+                })
+            .withOutputTags(mainOutputTag, TupleTagList.of(elementTag));
+    PCollectionTuple outputTuple = input.apply(pardo);
+
+    CommittedBundle<String> inputBundle = InProcessBundle.unkeyed(input).commit(Instant.now());
+
+    PCollection<KV<String, Integer>> mainOutput = outputTuple.get(mainOutputTag);
+    PCollection<String> elementOutput = outputTuple.get(elementTag);
+
+    InProcessEvaluationContext evaluationContext = mock(InProcessEvaluationContext.class);
+    UncommittedBundle<KV<String, Integer>> mainOutputBundle = InProcessBundle.unkeyed(mainOutput);
+    UncommittedBundle<String> elementOutputBundle = InProcessBundle.unkeyed(elementOutput);
+
+    when(evaluationContext.createBundle(inputBundle, mainOutput)).thenReturn(mainOutputBundle);
+    when(evaluationContext.createBundle(inputBundle, elementOutput))
+        .thenReturn(elementOutputBundle);
+
+    InProcessExecutionContext executionContext =
+        new InProcessExecutionContext(null, "myKey", null, null);
+    when(evaluationContext.getExecutionContext(mainOutput.getProducingTransformInternal(), null))
+        .thenReturn(executionContext);
+    CounterSet counters = new CounterSet();
+    when(evaluationContext.createCounterSet()).thenReturn(counters);
+
+    com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator<String> evaluator =
+        new ParDoMultiEvaluatorFactory().forApplication(
+            mainOutput.getProducingTransformInternal(), inputBundle, evaluationContext);
+
+    evaluator.processElement(WindowedValue.valueInGlobalWindow("foo"));
+    evaluator.processElement(
+        WindowedValue.timestampedValueInGlobalWindow("bara", new Instant(1000)));
+    evaluator.processElement(
+        WindowedValue.valueInGlobalWindow("bazam", PaneInfo.ON_TIME_AND_ONLY_FIRING));
+
+    InProcessTransformResult result = evaluator.finishBundle();
+    assertThat(
+        result.getOutputBundles(),
+        Matchers.<UncommittedBundle<?>>containsInAnyOrder(mainOutputBundle, elementOutputBundle));
+    assertThat(result.getWatermarkHold(), equalTo(new Instant(20205L)));
+    assertThat(result.getState(), not(nullValue()));
+    assertThat(
+        result.getState().state(StateNamespaces.global(), watermarkTag).read(),
+        equalTo(new Instant(20205L)));
+    assertThat(
+        result.getState().state(windowNs, bagTag).read(),
+        containsInAnyOrder("foo", "bara", "bazam"));
+  }
+
+  @Test
+  public void finishBundleWithStateAndTimersPutsTimersInResult() throws Exception {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of("foo", "bara", "bazam"));
+
+    TupleTag<KV<String, Integer>> mainOutputTag = new TupleTag<KV<String, Integer>>() {};
+    final TupleTag<String> elementTag = new TupleTag<>();
+
+    final TimerData addedTimer =
+        TimerData.of(
+            StateNamespaces.window(
+                IntervalWindow.getCoder(),
+                new IntervalWindow(
+                    new Instant(0).plus(Duration.standardMinutes(5)),
+                    new Instant(1)
+                        .plus(Duration.standardMinutes(5))
+                        .plus(Duration.standardHours(1)))),
+            new Instant(54541L),
+            TimeDomain.EVENT_TIME);
+    final TimerData deletedTimer =
+        TimerData.of(
+            StateNamespaces.window(
+                IntervalWindow.getCoder(),
+                new IntervalWindow(new Instant(0), new Instant(0).plus(Duration.standardHours(1)))),
+            new Instant(3400000),
+            TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+
+    BoundMulti<String, KV<String, Integer>> pardo =
+        ParDo.of(
+                new DoFn<String, KV<String, Integer>>() {
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    c.windowingInternals().stateInternals();
+                    c.windowingInternals()
+                        .timerInternals()
+                        .setTimer(
+                            TimerData.of(
+                                StateNamespaces.window(
+                                    IntervalWindow.getCoder(),
+                                    new IntervalWindow(
+                                        new Instant(0).plus(Duration.standardMinutes(5)),
+                                        new Instant(1)
+                                            .plus(Duration.standardMinutes(5))
+                                            .plus(Duration.standardHours(1)))),
+                                new Instant(54541L),
+                                TimeDomain.EVENT_TIME));
+                    c.windowingInternals()
+                        .timerInternals()
+                        .deleteTimer(
+                            TimerData.of(
+                                StateNamespaces.window(
+                                    IntervalWindow.getCoder(),
+                                    new IntervalWindow(
+                                        new Instant(0),
+                                        new Instant(0).plus(Duration.standardHours(1)))),
+                                new Instant(3400000),
+                                TimeDomain.SYNCHRONIZED_PROCESSING_TIME));
+                  }
+                })
+            .withOutputTags(mainOutputTag, TupleTagList.of(elementTag));
+    PCollectionTuple outputTuple = input.apply(pardo);
+
+    CommittedBundle<String> inputBundle = InProcessBundle.unkeyed(input).commit(Instant.now());
+
+    PCollection<KV<String, Integer>> mainOutput = outputTuple.get(mainOutputTag);
+    PCollection<String> elementOutput = outputTuple.get(elementTag);
+
+    InProcessEvaluationContext evaluationContext = mock(InProcessEvaluationContext.class);
+    UncommittedBundle<KV<String, Integer>> mainOutputBundle = InProcessBundle.unkeyed(mainOutput);
+    UncommittedBundle<String> elementOutputBundle = InProcessBundle.unkeyed(elementOutput);
+
+    when(evaluationContext.createBundle(inputBundle, mainOutput)).thenReturn(mainOutputBundle);
+    when(evaluationContext.createBundle(inputBundle, elementOutput))
+        .thenReturn(elementOutputBundle);
+
+    InProcessExecutionContext executionContext =
+        new InProcessExecutionContext(null, "myKey", null, null);
+    when(evaluationContext.getExecutionContext(mainOutput.getProducingTransformInternal(), null))
+        .thenReturn(executionContext);
+    CounterSet counters = new CounterSet();
+    when(evaluationContext.createCounterSet()).thenReturn(counters);
+
+    com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator<String> evaluator =
+        new ParDoMultiEvaluatorFactory().forApplication(
+            mainOutput.getProducingTransformInternal(), inputBundle, evaluationContext);
+
+    evaluator.processElement(WindowedValue.valueInGlobalWindow("foo"));
+    evaluator.processElement(
+        WindowedValue.timestampedValueInGlobalWindow("bara", new Instant(1000)));
+    evaluator.processElement(
+        WindowedValue.valueInGlobalWindow("bazam", PaneInfo.ON_TIME_AND_ONLY_FIRING));
+
+    InProcessTransformResult result = evaluator.finishBundle();
+    assertThat(
+        result.getTimerUpdate(),
+        equalTo(
+            TimerUpdate.builder("myKey")
+                .setTimer(addedTimer)
+                .setTimer(addedTimer)
+                .setTimer(addedTimer)
+                .deletedTimer(deletedTimer)
+                .deletedTimer(deletedTimer)
+                .deletedTimer(deletedTimer)
+                .build()));
+  }
+}
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactoryTest.java
index 4fc765ce5db51..919e69e9dd2de 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactoryTest.java
@@ -15,28 +15,45 @@
  */
 package com.google.cloud.dataflow.sdk.runners.inprocess;
 
+import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.not;
+import static org.hamcrest.Matchers.nullValue;
 import static org.junit.Assert.assertThat;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessExecutionContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
+import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate;
 import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
 import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+import com.google.cloud.dataflow.sdk.util.TimeDomain;
+import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
+import com.google.cloud.dataflow.sdk.util.state.BagState;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
+import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
+import com.google.cloud.dataflow.sdk.util.state.StateTag;
+import com.google.cloud.dataflow.sdk.util.state.StateTags;
+import com.google.cloud.dataflow.sdk.util.state.WatermarkHoldState;
+import com.google.cloud.dataflow.sdk.values.KV;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 import com.google.cloud.dataflow.sdk.values.TupleTag;
 
 import org.hamcrest.Matchers;
+import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -65,8 +82,9 @@ public void testParDoInMemoryTransformEvaluator() throws Exception {
     UncommittedBundle<Integer> outputBundle =
         InProcessBundle.unkeyed(collection);
     when(evaluationContext.createBundle(inputBundle, collection)).thenReturn(outputBundle);
-    InProcessExecutionContext executionContext = new InProcessExecutionContext();
-    when(evaluationContext.getExecutionContext(collection.getProducingTransformInternal()))
+    InProcessExecutionContext executionContext =
+        new InProcessExecutionContext(null, null, null, null);
+    when(evaluationContext.getExecutionContext(collection.getProducingTransformInternal(), null))
         .thenReturn(executionContext);
     CounterSet counters = new CounterSet();
     when(evaluationContext.createCounterSet()).thenReturn(counters);
@@ -112,8 +130,8 @@ public void testSideOutputToUndeclaredSideOutputSucceeds() throws Exception {
         InProcessBundle.unkeyed(collection);
     when(evaluationContext.createBundle(inputBundle, collection)).thenReturn(outputBundle);
     InProcessExecutionContext executionContext =
-        new InProcessExecutionContext();
-    when(evaluationContext.getExecutionContext(collection.getProducingTransformInternal()))
+        new InProcessExecutionContext(null, null, null, null);
+    when(evaluationContext.getExecutionContext(collection.getProducingTransformInternal(), null))
         .thenReturn(executionContext);
     CounterSet counters = new CounterSet();
     when(evaluationContext.createCounterSet()).thenReturn(counters);
@@ -134,5 +152,161 @@ public void testSideOutputToUndeclaredSideOutputSucceeds() throws Exception {
     assertThat(result.getWatermarkHold(), equalTo(BoundedWindow.TIMESTAMP_MAX_VALUE));
     assertThat(result.getCounters(), equalTo(counters));
   }
+
+  @Test
+  public void finishBundleWithStatePutsStateInResult() throws Exception {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of("foo", "bara", "bazam"));
+
+    final StateTag<Object, WatermarkHoldState<BoundedWindow>> watermarkTag =
+        StateTags.watermarkStateInternal("myId", OutputTimeFns.outputAtEarliestInputTimestamp());
+    final StateTag<Object, BagState<String>> bagTag = StateTags.bag("myBag", StringUtf8Coder.of());
+    final StateNamespace windowNs =
+        StateNamespaces.window(GlobalWindow.Coder.INSTANCE, GlobalWindow.INSTANCE);
+    ParDo.Bound<String, KV<String, Integer>> pardo =
+        ParDo.of(
+            new DoFn<String, KV<String, Integer>>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                c.windowingInternals()
+                    .stateInternals()
+                    .state(StateNamespaces.global(), watermarkTag)
+                    .add(new Instant(124443L - c.element().length()));
+                c.windowingInternals()
+                    .stateInternals()
+                    .state(
+                        StateNamespaces.window(GlobalWindow.Coder.INSTANCE, GlobalWindow.INSTANCE),
+                        bagTag)
+                    .add(c.element());
+              }
+            });
+    PCollection<KV<String, Integer>> mainOutput = input.apply(pardo);
+
+    CommittedBundle<String> inputBundle = InProcessBundle.unkeyed(input).commit(Instant.now());
+
+    InProcessEvaluationContext evaluationContext = mock(InProcessEvaluationContext.class);
+    UncommittedBundle<KV<String, Integer>> mainOutputBundle = InProcessBundle.unkeyed(mainOutput);
+
+    when(evaluationContext.createBundle(inputBundle, mainOutput)).thenReturn(mainOutputBundle);
+
+    InProcessExecutionContext executionContext =
+        new InProcessExecutionContext(null, "myKey", null, null);
+    when(evaluationContext.getExecutionContext(mainOutput.getProducingTransformInternal(), null))
+        .thenReturn(executionContext);
+    CounterSet counters = new CounterSet();
+    when(evaluationContext.createCounterSet()).thenReturn(counters);
+
+    com.google.cloud.dataflow.sdk.runners.inprocess.TransformEvaluator<String> evaluator =
+        new ParDoSingleEvaluatorFactory()
+            .forApplication(
+                mainOutput.getProducingTransformInternal(), inputBundle, evaluationContext);
+
+    evaluator.processElement(WindowedValue.valueInGlobalWindow("foo"));
+    evaluator.processElement(
+        WindowedValue.timestampedValueInGlobalWindow("bara", new Instant(1000)));
+    evaluator.processElement(
+        WindowedValue.valueInGlobalWindow("bazam", PaneInfo.ON_TIME_AND_ONLY_FIRING));
+
+    InProcessTransformResult result = evaluator.finishBundle();
+    assertThat(result.getWatermarkHold(), equalTo(new Instant(124438L)));
+    assertThat(result.getState(), not(nullValue()));
+    assertThat(
+        result.getState().state(StateNamespaces.global(), watermarkTag).read(),
+        equalTo(new Instant(124438L)));
+    assertThat(
+        result.getState().state(windowNs, bagTag).read(),
+        containsInAnyOrder("foo", "bara", "bazam"));
+  }
+
+  @Test
+  public void finishBundleWithStateAndTimersPutsTimersInResult() throws Exception {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of("foo", "bara", "bazam"));
+
+    final TimerData addedTimer =
+        TimerData.of(
+            StateNamespaces.window(
+                IntervalWindow.getCoder(),
+                new IntervalWindow(
+                    new Instant(0).plus(Duration.standardMinutes(5)),
+                    new Instant(1)
+                        .plus(Duration.standardMinutes(5))
+                        .plus(Duration.standardHours(1)))),
+            new Instant(54541L),
+            TimeDomain.EVENT_TIME);
+    final TimerData deletedTimer =
+        TimerData.of(
+            StateNamespaces.window(
+                IntervalWindow.getCoder(),
+                new IntervalWindow(new Instant(0), new Instant(0).plus(Duration.standardHours(1)))),
+            new Instant(3400000),
+            TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
+
+    ParDo.Bound<String, KV<String, Integer>> pardo =
+        ParDo.of(
+                new DoFn<String, KV<String, Integer>>() {
+                  @Override
+                  public void processElement(ProcessContext c) {
+                    c.windowingInternals().stateInternals();
+                    c.windowingInternals()
+                        .timerInternals()
+                        .setTimer(
+                            TimerData.of(
+                                StateNamespaces.window(
+                                    IntervalWindow.getCoder(),
+                                    new IntervalWindow(
+                                        new Instant(0).plus(Duration.standardMinutes(5)),
+                                        new Instant(1)
+                                            .plus(Duration.standardMinutes(5))
+                                            .plus(Duration.standardHours(1)))),
+                                new Instant(54541L),
+                                TimeDomain.EVENT_TIME));
+                    c.windowingInternals()
+                        .timerInternals()
+                        .deleteTimer(
+                            TimerData.of(
+                                StateNamespaces.window(
+                                    IntervalWindow.getCoder(),
+                                    new IntervalWindow(
+                                        new Instant(0),
+                                        new Instant(0).plus(Duration.standardHours(1)))),
+                                new Instant(3400000),
+                                TimeDomain.SYNCHRONIZED_PROCESSING_TIME));
+                  }
+                });
+    PCollection<KV<String, Integer>> mainOutput = input.apply(pardo);
+
+    CommittedBundle<String> inputBundle = InProcessBundle.unkeyed(input).commit(Instant.now());
+
+    InProcessEvaluationContext evaluationContext = mock(InProcessEvaluationContext.class);
+    UncommittedBundle<KV<String, Integer>> mainOutputBundle = InProcessBundle.unkeyed(mainOutput);
+
+    when(evaluationContext.createBundle(inputBundle, mainOutput)).thenReturn(mainOutputBundle);
+
+    InProcessExecutionContext executionContext =
+        new InProcessExecutionContext(null, "myKey", null, null);
+    when(evaluationContext.getExecutionContext(mainOutput.getProducingTransformInternal(), null))
+        .thenReturn(executionContext);
+    CounterSet counters = new CounterSet();
+    when(evaluationContext.createCounterSet()).thenReturn(counters);
+
+    TransformEvaluator<String> evaluator =
+        new ParDoSingleEvaluatorFactory()
+            .forApplication(
+                mainOutput.getProducingTransformInternal(), inputBundle, evaluationContext);
+
+    evaluator.processElement(WindowedValue.valueInGlobalWindow("foo"));
+
+    InProcessTransformResult result = evaluator.finishBundle();
+    assertThat(
+        result.getTimerUpdate(),
+        equalTo(
+            TimerUpdate.builder("myKey")
+                .setTimer(addedTimer)
+                .deletedTimer(deletedTimer)
+                .build()));
+  }
 }
 

From 7b28d235000b9232a4fe55ebde76f77be26bd094 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 24 Feb 2016 16:27:52 -0800
Subject: [PATCH 1528/1541] Rollback reverts "Move Google Cloud Dataflow native
 sinks to worker module"

This is for Apache Beam.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115507028
---
 .../dataflow/sdk/runners/worker/AvroSink.java | 135 --------
 .../dataflow/sdk/runners/worker/TextSink.java | 291 ------------------
 .../dataflow/sdk/util/common/worker/Sink.java |  64 ----
 3 files changed, 490 deletions(-)
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
 delete mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java
deleted file mode 100644
index b101a2b0fcded..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/AvroSink.java
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.MimeTypes;
-import com.google.cloud.dataflow.sdk.util.ShardingWritableByteChannel;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.ValueOnlyWindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-
-import org.apache.avro.Schema;
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.io.DatumWriter;
-
-import java.io.IOException;
-import java.nio.channels.Channels;
-import java.nio.channels.WritableByteChannel;
-import java.util.ArrayList;
-import java.util.Random;
-
-/**
- * A sink that writes Avro files.
- *
- * @param <T> the type of the elements written to the sink
- */
-public class AvroSink<T> extends Sink<WindowedValue<T>> {
-
-  final String filenamePrefix;
-  final String shardFormat;
-  final String filenameSuffix;
-  final int shardCount;
-  final AvroCoder<T> avroCoder;
-  final Schema schema;
-
-  public AvroSink(String filename, ValueOnlyWindowedValueCoder<T> coder) {
-    this(filename, "", "", 1, coder);
-  }
-
-  public AvroSink(String filenamePrefix, String shardFormat, String filenameSuffix, int shardCount,
-                  ValueOnlyWindowedValueCoder<T> coder) {
-    if (!(coder.getValueCoder() instanceof AvroCoder)) {
-      throw new IllegalArgumentException(String.format(
-          "AvroSink requires an AvroCoder, not a %s", coder.getValueCoder().getClass()));
-    }
-
-    this.filenamePrefix = filenamePrefix;
-    this.shardFormat = shardFormat;
-    this.filenameSuffix = filenameSuffix;
-    this.shardCount = shardCount;
-    this.avroCoder = (AvroCoder<T>) coder.getValueCoder();
-    this.schema = this.avroCoder.getSchema();
-  }
-
-  public SinkWriter<WindowedValue<T>> writer(DatumWriter<T> datumWriter) throws IOException {
-    WritableByteChannel writer = IOChannelUtils.create(
-        filenamePrefix, shardFormat, filenameSuffix, shardCount, MimeTypes.BINARY);
-
-    if (writer instanceof ShardingWritableByteChannel) {
-      return new AvroShardingFileWriter(datumWriter, (ShardingWritableByteChannel) writer);
-    } else {
-      return new AvroFileWriter(datumWriter, writer);
-    }
-  }
-
-  @Override
-  public SinkWriter<WindowedValue<T>> writer() throws IOException {
-    return writer(avroCoder.createDatumWriter());
-  }
-
-  /** The SinkWriter for an AvroSink. */
-  class AvroFileWriter implements SinkWriter<WindowedValue<T>> {
-    DataFileWriter<T> fileWriter;
-
-    public AvroFileWriter(DatumWriter<T> datumWriter, WritableByteChannel outputChannel)
-        throws IOException {
-      fileWriter = new DataFileWriter<>(datumWriter);
-      fileWriter.create(schema, Channels.newOutputStream(outputChannel));
-    }
-
-    @Override
-    public long add(WindowedValue<T> value) throws IOException {
-      fileWriter.append(value.getValue());
-      // DataFileWriter doesn't support returning the length written. Use the
-      // coder instead.
-      return CoderUtils.encodeToByteArray(avroCoder, value.getValue()).length;
-    }
-
-    @Override
-    public void close() throws IOException {
-      fileWriter.close();
-    }
-  }
-
-  /** The SinkWriter for an AvroSink, which supports sharding. */
-  class AvroShardingFileWriter implements SinkWriter<WindowedValue<T>> {
-    private ArrayList<AvroFileWriter> fileWriters = new ArrayList<>();
-    private final Random random = new Random();
-
-    public AvroShardingFileWriter(
-        DatumWriter<T> datumWriter, ShardingWritableByteChannel outputChannel) throws IOException {
-      for (int i = 0; i < outputChannel.getNumShards(); i++) {
-        fileWriters.add(new AvroFileWriter(datumWriter, outputChannel.getChannel(i)));
-      }
-    }
-
-    @Override
-    public long add(WindowedValue<T> value) throws IOException {
-      return fileWriters.get(random.nextInt(fileWriters.size())).add(value);
-    }
-
-    @Override
-    public void close() throws IOException {
-      for (AvroFileWriter fileWriter : fileWriters) {
-        fileWriter.close();
-      }
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
deleted file mode 100644
index f48183cc17880..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/TextSink.java
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.MimeTypes;
-import com.google.cloud.dataflow.sdk.util.ShardingWritableByteChannel;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.common.worker.Sink;
-
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.nio.ByteBuffer;
-import java.nio.channels.WritableByteChannel;
-import java.util.Random;
-
-import javax.annotation.Nullable;
-
-/**
- * A sink that writes text files.
- *
- * @param <T> the type of the elements written to the sink
- */
-public class TextSink<T> extends Sink<T> {
-
-  static final byte[] NEWLINE = getNewline();
-
-  private static byte[] getNewline() {
-    String newline = "\n";
-    try {
-      return newline.getBytes("UTF-8");
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException("UTF-8 not supported", e);
-    }
-  }
-
-  final String namePrefix;
-  final String shardFormat;
-  final String nameSuffix;
-  final int shardCount;
-  final boolean appendTrailingNewlines;
-  final String header;
-  final String footer;
-  final Coder<T> coder;
-
-  /**
-   * For testing only.
-   *
-   * <p>Used by simple tests that write to a single unsharded file.
-   */
-  public static <V> TextSink<WindowedValue<V>> createForTest(
-      String filename,
-      boolean appendTrailingNewlines,
-      @Nullable String header,
-      @Nullable String footer,
-      Coder<V> coder) {
-    return create(filename,
-                  "",
-                  "",
-                  1,
-                  appendTrailingNewlines,
-                  header,
-                  footer,
-                  WindowedValue.getValueOnlyCoder(coder));
-  }
-
-  /**
-   * For DirectPipelineRunner only.
-   * It wraps the coder with {@code WindowedValue.ValueOnlyCoder}.
-   */
-  public static <V> TextSink<WindowedValue<V>> createForDirectPipelineRunner(
-      String filenamePrefix,
-      String shardFormat,
-      String filenameSuffix,
-      int shardCount,
-      boolean appendTrailingNewlines,
-      @Nullable String header,
-      @Nullable String footer,
-      Coder<V> coder) {
-    return create(filenamePrefix,
-                  shardFormat,
-                  filenameSuffix,
-                  shardCount,
-                  appendTrailingNewlines,
-                  header,
-                  footer,
-                  WindowedValue.getValueOnlyCoder(coder));
-  }
-
-  /**
-   * Constructs a new TextSink.
-   *
-   * @param filenamePrefix the prefix of output filenames.
-   * @param shardFormat the shard name template to use for output filenames.
-   * @param filenameSuffix the suffix of output filenames.
-   * @param shardCount the number of outupt shards to produce.
-   * @param appendTrailingNewlines true to append newlines to each output line.
-   * @param header text to place at the beginning of each output file.
-   * @param footer text to place at the end of each output file.
-   * @param coder the code used to encode elements for output.
-   */
-  public static <V> TextSink<V> create(String filenamePrefix,
-                                       String shardFormat,
-                                       String filenameSuffix,
-                                       int shardCount,
-                                       boolean appendTrailingNewlines,
-                                       @Nullable String header,
-                                       @Nullable String footer,
-                                       Coder<V> coder) {
-    return new TextSink<>(filenamePrefix,
-                          shardFormat,
-                          filenameSuffix,
-                          shardCount,
-                          appendTrailingNewlines,
-                          header,
-                          footer,
-                          coder);
-  }
-
-  private TextSink(String filenamePrefix,
-                   String shardFormat,
-                   String filenameSuffix,
-                   int shardCount,
-                   boolean appendTrailingNewlines,
-                   @Nullable String header,
-                   @Nullable String footer,
-                   Coder<T> coder) {
-    this.namePrefix = filenamePrefix;
-    this.shardFormat = shardFormat;
-    this.nameSuffix = filenameSuffix;
-    this.shardCount = shardCount;
-    this.appendTrailingNewlines = appendTrailingNewlines;
-    this.header = header;
-    this.footer = footer;
-    this.coder = coder;
-  }
-
-  @Override
-  public SinkWriter<T> writer() throws IOException {
-    String mimeType;
-
-    if (!(coder instanceof WindowedValueCoder)) {
-      throw new IOException(
-          "Expected WindowedValueCoder for inputCoder, got: "
-          + coder.getClass().getName());
-    }
-    Coder<?> valueCoder = ((WindowedValueCoder<?>) coder).getValueCoder();
-    if (valueCoder.equals(StringUtf8Coder.of())) {
-      mimeType = MimeTypes.TEXT;
-    } else {
-      mimeType = MimeTypes.BINARY;
-    }
-
-    WritableByteChannel writer = IOChannelUtils.create(namePrefix, shardFormat,
-        nameSuffix, shardCount, mimeType);
-
-    if (writer instanceof ShardingWritableByteChannel) {
-      return new ShardingTextFileWriter((ShardingWritableByteChannel) writer);
-    } else {
-      return new TextFileWriter(writer);
-    }
-  }
-
-  /**
-   * Abstract SinkWriter base class shared by sharded and unsharded Text
-   * writer implementations.
-   */
-  abstract class AbstractTextFileWriter implements SinkWriter<T> {
-    protected void init() throws IOException {
-      if (header != null) {
-        printLine(ShardingWritableByteChannel.ALL_SHARDS,
-            CoderUtils.encodeToByteArray(StringUtf8Coder.of(), header));
-      }
-    }
-
-    /**
-     * Adds a value to the sink. Returns the size in bytes of the data written.
-     * The return value does -not- include header/footer size.
-     */
-    @Override
-    public long add(T value) throws IOException {
-      return printLine(getShardNum(value),
-          CoderUtils.encodeToByteArray(coder, value));
-    }
-
-    @Override
-    public void close() throws IOException {
-      if (footer != null) {
-        printLine(ShardingWritableByteChannel.ALL_SHARDS,
-            CoderUtils.encodeToByteArray(StringUtf8Coder.of(), footer));
-      }
-    }
-
-    protected long printLine(int shardNum, byte[] line) throws IOException {
-      long length = line.length;
-      write(shardNum, ByteBuffer.wrap(line));
-
-      if (appendTrailingNewlines) {
-        write(shardNum, ByteBuffer.wrap(NEWLINE));
-        length += NEWLINE.length;
-      }
-
-      return length;
-    }
-
-    protected abstract void write(int shardNum, ByteBuffer buf)
-        throws IOException;
-    protected abstract int getShardNum(T value);
-  }
-
-  /** An unsharded SinkWriter for a TextSink. */
-  class TextFileWriter extends AbstractTextFileWriter {
-    private final WritableByteChannel outputChannel;
-
-    TextFileWriter(WritableByteChannel outputChannel) throws IOException {
-      this.outputChannel = outputChannel;
-      init();
-    }
-
-    @Override
-    public void close() throws IOException {
-      try {
-        super.close();
-      } finally {
-        outputChannel.close();
-      }
-    }
-
-    @Override
-    protected void write(int shardNum, ByteBuffer buf) throws IOException {
-      outputChannel.write(buf);
-    }
-
-    @Override
-    protected int getShardNum(T value) {
-      return 0;
-    }
-  }
-
-  /** A sharding SinkWriter for a TextSink. */
-  class ShardingTextFileWriter extends AbstractTextFileWriter {
-    private final Random rng = new Random();
-    private final int numShards;
-    private final ShardingWritableByteChannel outputChannel;
-
-    // TODO: add support for user-defined sharding function.
-    ShardingTextFileWriter(ShardingWritableByteChannel outputChannel)
-        throws IOException {
-      this.outputChannel = outputChannel;
-      numShards = outputChannel.getNumShards();
-      init();
-    }
-
-    @Override
-    public void close() throws IOException {
-      try {
-        super.close();
-      } finally {
-        outputChannel.close();
-      }
-    }
-
-    @Override
-    protected void write(int shardNum, ByteBuffer buf) throws IOException {
-      outputChannel.writeToShard(shardNum, buf);
-    }
-
-    @Override
-    protected int getShardNum(T value) {
-      return rng.nextInt(numShards);
-    }
-  }
-}
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
deleted file mode 100644
index b48d70b36072c..0000000000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/Sink.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler.StateKind;
-
-import java.io.IOException;
-
-/**
- * Abstract base class for Sinks.
- *
- * <p>A Sink is written to by getting a SinkWriter and adding values to
- * it.
- *
- * @param <T> the type of the elements written to the sink
- */
-public abstract class Sink<T> {
-  /**
-   * Returns a Writer that allows writing to this Sink.
-   */
-  public abstract SinkWriter<T> writer() throws IOException;
-
-  /**
-   * Writes to a Sink.
-   */
-  public interface SinkWriter<ElemT> extends AutoCloseable {
-    /**
-     * Adds a value to the sink. Returns the size in bytes of the data written.
-     */
-    public long add(ElemT value) throws IOException;
-
-    @Override
-    public void close() throws IOException;
-  }
-
-  /**
-   * Returns whether this Sink can be restarted.
-   */
-  public boolean supportsRestart() {
-    return false;
-  }
-
-  /**
-   * The default state kind of all the states reported in this Sink.
-   * Defaults to {@link StateKind#USER}.
-   */
-  protected StateKind getStateSamplerStateKind() {
-    return StateKind.USER;
-  }
-}

From 8b5257fc64be2697d001bd335302e14591aca044 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Wed, 24 Feb 2016 17:58:23 -0800
Subject: [PATCH 1529/1541] Use a static variable for CoderCalled in WriteTest

The test is broken when Create serializes the created values at
apply-time rather than at execution-time, as the modification happens
after the Create is applied within the Write PTransform. Moving to a
static variable allows the state of the operation to be captured at any
time without breaking the test, so long as the captured (and executed)
operation uses the same ClassLoader as the test.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115515376
---
 .../main/java/com/google/cloud/dataflow/sdk/io/Write.java   | 2 +-
 .../java/com/google/cloud/dataflow/sdk/io/WriteTest.java    | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Write.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Write.java
index 4d130cc865020..0b78b8384ea64 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Write.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Write.java
@@ -117,7 +117,7 @@ private <WriteT> PDone createWrite(
         PCollection<T> input, WriteOperation<T, WriteT> writeOperation) {
       Pipeline p = input.getPipeline();
 
-      // A coder to user for the WriteOperation.
+      // A coder to use for the WriteOperation.
       @SuppressWarnings("unchecked")
       Coder<WriteOperation<T, WriteT>> operationCoder =
           (Coder<WriteOperation<T, WriteT>>) SerializableCoder.of(writeOperation.getClass());
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/WriteTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/WriteTest.java
index 2340187f1c854..b92f2f18ea156 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/WriteTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/io/WriteTest.java
@@ -184,8 +184,12 @@ private enum State {
       FINALIZED
     }
 
+    // Must be static in case the WriteOperation is serialized before the its coder is obtained.
+    // If this occurs, the value will be modified but not reflected in the WriteOperation that is
+    // executed by the runner, and the finalize method will fail.
+    private static volatile boolean coderCalled = false;
+
     private State state = State.INITIAL;
-    private boolean coderCalled = false;
 
     private final TestSink sink;
     private final UUID id = UUID.randomUUID();

From 510a55dbbf9b6d1a94817f7e8e78e8211dd559a4 Mon Sep 17 00:00:00 2001
From: lcwik <lcwik@google.com>
Date: Wed, 24 Feb 2016 18:01:53 -0800
Subject: [PATCH 1530/1541] Honor user requested shard limits for AvroIO.Write
 on DirectPipelineRunner

During the migration to custom sink within AvroIO, shard controls
were removed for DirectPipelineRunner. This change adds them
back.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115515647
---
 .../sdk/runners/DirectPipelineRunner.java     | 59 +++++++++++++++++++
 .../sdk/runners/DirectPipelineRunnerTest.java | 53 +++++++++++++++++
 2 files changed, 112 insertions(+)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
index 4543b5a534484..872cfef7fb9a0 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
@@ -25,6 +25,7 @@
 import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.ListCoder;
+import com.google.cloud.dataflow.sdk.io.AvroIO;
 import com.google.cloud.dataflow.sdk.io.FileBasedSink;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
@@ -240,6 +241,8 @@ public <OutputT extends POutput, InputT extends PInput> OutputT apply(
       return (OutputT) applyTestCombine((Combine.GroupedValues) transform, (PCollection) input);
     } else if (transform instanceof TextIO.Write.Bound) {
       return (OutputT) applyTextIOWrite((TextIO.Write.Bound) transform, (PCollection<?>) input);
+    } else if (transform instanceof AvroIO.Write.Bound) {
+      return (OutputT) applyAvroIOWrite((AvroIO.Write.Bound) transform, (PCollection<?>) input);
     } else {
       return super.apply(transform, input);
     }
@@ -342,6 +345,62 @@ private <T> PDone applyTextIOWrite(TextIO.Write.Bound<T> transform, PCollection<
     return input.apply(new DirectTextIOWrite<>(transform));
   }
 
+  /**
+   * Applies AvroIO.Write honoring user requested sharding controls (i.e. withNumShards)
+   * by applying a partition function based upon the number of shards the user requested.
+   */
+  private static class DirectAvroIOWrite<T> extends PTransform<PCollection<T>, PDone> {
+    private final AvroIO.Write.Bound<T> transform;
+
+    private DirectAvroIOWrite(AvroIO.Write.Bound<T> transform) {
+      this.transform = transform;
+    }
+
+    @Override
+    public PDone apply(PCollection<T> input) {
+      checkState(transform.getNumShards() > 1,
+          "DirectAvroIOWrite is expected to only be used when sharding controls are required.");
+
+      // Evenly distribute all the elements across the partitions.
+      PCollectionList<T> partitionedElements =
+          input.apply(Partition.of(transform.getNumShards(),
+                                   new ElementProcessingOrderPartitionFn<T>()));
+
+      // For each input PCollection partition, create a write transform that represents
+      // one of the specific shards.
+      for (int i = 0; i < transform.getNumShards(); ++i) {
+        /*
+         * This logic mirrors the file naming strategy within
+         * {@link FileBasedSink#generateDestinationFilenames()}
+         */
+        String outputFilename = IOChannelUtils.constructName(
+            transform.getFilenamePrefix(),
+            transform.getShardNameTemplate(),
+            getFileExtension(transform.getFilenameSuffix()),
+            i,
+            transform.getNumShards());
+
+        String transformName = String.format("%s(Shard:%s)", transform.getName(), i);
+        partitionedElements.get(i).apply(transformName,
+            transform.withNumShards(1).withShardNameTemplate("").withSuffix("").to(outputFilename));
+      }
+      return PDone.in(input.getPipeline());
+    }
+  }
+
+  /**
+   * Apply the override for AvroIO.Write.Bound if the user requested sharding controls
+   * greater than one.
+   */
+  private <T> PDone applyAvroIOWrite(AvroIO.Write.Bound<T> transform, PCollection<T> input) {
+    if (transform.getNumShards() <= 1) {
+      // By default, the DirectPipelineRunner outputs to only 1 shard. Since the user never
+      // requested sharding controls greater than 1, we default to outputting to 1 file.
+      return super.apply(transform.withNumShards(1), input);
+    }
+    return input.apply(new DirectAvroIOWrite<>(transform));
+  }
+
   /**
    * The implementation may split the {@link KeyedCombineFn} into ADD, MERGE and EXTRACT phases (
    * see {@code com.google.cloud.dataflow.sdk.runners.worker.CombineValuesFn}). In order to emulate
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java
index 4a0f91c91727c..6524e144f8da3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunnerTest.java
@@ -25,8 +25,10 @@
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.CoderException;
+import com.google.cloud.dataflow.sdk.io.AvroIO;
 import com.google.cloud.dataflow.sdk.io.ShardNameTemplate;
 import com.google.cloud.dataflow.sdk.io.TextIO;
 import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
@@ -36,8 +38,10 @@
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
 import com.google.cloud.dataflow.sdk.transforms.ParDo;
 import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+import com.google.common.collect.Iterables;
 import com.google.common.io.Files;
 
+import org.apache.avro.file.DataFileReader;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
@@ -154,4 +158,53 @@ public void testTextIOWriteWithLimitedNumberOfShards() throws Exception {
 
     assertThat(allContents, containsInAnyOrder(expectedElements));
   }
+
+  @Test
+  public void testAvroIOWriteWithDefaultShardingStrategy() throws Exception {
+    String prefix = IOChannelUtils.resolve(Files.createTempDir().toString(), "output");
+    Pipeline p = DirectPipeline.createForTest();
+    String[] expectedElements = new String[]{ "a", "b", "c", "d", "e", "f", "g", "h", "i" };
+    p.apply(Create.of(expectedElements))
+     .apply(AvroIO.Write.withSchema(String.class).to(prefix).withSuffix(".avro"));
+    p.run();
+
+    String filename =
+        IOChannelUtils.constructName(prefix, ShardNameTemplate.INDEX_OF_MAX, ".avro", 0, 1);
+    List<String> fileContents = new ArrayList<>();
+    Iterables.addAll(fileContents, DataFileReader.openReader(
+        new File(filename), AvroCoder.of(String.class).createDatumReader()));
+
+    // Ensure that each file got at least one record
+    assertFalse(fileContents.isEmpty());
+
+    assertThat(fileContents, containsInAnyOrder(expectedElements));
+  }
+
+  @Test
+  public void testAvroIOWriteWithLimitedNumberOfShards() throws Exception {
+    final int numShards = 3;
+    String prefix = IOChannelUtils.resolve(Files.createTempDir().toString(), "shardedOutput");
+    Pipeline p = DirectPipeline.createForTest();
+    String[] expectedElements = new String[]{ "a", "b", "c", "d", "e", "f", "g", "h", "i" };
+    p.apply(Create.of(expectedElements))
+     .apply(AvroIO.Write.withSchema(String.class).to(prefix)
+                        .withNumShards(numShards).withSuffix(".avro"));
+    p.run();
+
+    List<String> allContents = new ArrayList<>();
+    for (int i = 0; i < numShards; ++i) {
+      String shardFileName =
+          IOChannelUtils.constructName(prefix, ShardNameTemplate.INDEX_OF_MAX, ".avro", i, 3);
+      List<String> shardFileContents = new ArrayList<>();
+      Iterables.addAll(shardFileContents, DataFileReader.openReader(
+          new File(shardFileName), AvroCoder.of(String.class).createDatumReader()));
+
+      // Ensure that each file got at least one record
+      assertFalse(shardFileContents.isEmpty());
+
+      allContents.addAll(shardFileContents);
+    }
+
+    assertThat(allContents, containsInAnyOrder(expectedElements));
+  }
 }

From 6c710405af5fef7e8a6b334a7e393dc5d8ab006e Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 25 Feb 2016 10:07:00 -0800
Subject: [PATCH 1531/1541] Adjust dependencies to avoid pulling in unneeded
 stax-api

This is the javax.xml.stream API that is provided with Java 1.6 and
up.

 - The dependency on stax-api from stax2-api switched from "compile"
   scope to "provided" scope in current versions, so we can remove
   the transitive dependency simply by upgrade.
 - The woodstox-core-asl package is upgraded to a version explicitly
   compatible with the new version of stax2-api.
 - The dependency on stax-api from woodstox-core-asl remains in "compile"
   scope even after upgrade, so this change explicitly suppresses it.

Selected bits of `mvn dependency:tree -Dverbose`:

 [INFO] com.google.cloud.dataflow:google-cloud-dataflow-java-sdk-all:jar:1...)
-[INFO] +- org.codehaus.woodstox:stax2-api:jar:3.1.1:compile
-[INFO] |  \- javax.xml.stream:stax-api:jar:1.0-2:compile
-[INFO] +- org.codehaus.woodstox:woodstox-core-asl:jar:4.1.2:compile
-[INFO] |  +- (javax.xml.stream:stax-api:jar:1.0-2:compile - omitted for d...)
-[INFO] |  \- (org.codehaus.woodstox:stax2-api:jar:3.1.1:compile - omitted...)
+[INFO] +- org.codehaus.woodstox:stax2-api:jar:3.1.4:compile
+[INFO] +- org.codehaus.woodstox:woodstox-core-asl:jar:4.4.1:compile
+[INFO] |  \- (org.codehaus.woodstox:stax2-api:jar:3.1.4:compile - omitted...)

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115574469
---
 pom.xml     |  2 ++
 sdk/pom.xml | 11 +++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 760a10d92fcb7..bfade1a610f08 100644
--- a/pom.xml
+++ b/pom.xml
@@ -82,7 +82,9 @@
     <protobuf.version>3.0.0-beta-1</protobuf.version>
     <pubsub.version>v1-rev7-1.21.0</pubsub.version>
     <slf4j.version>1.7.14</slf4j.version>
+    <stax2.version>3.1.4</stax2.version>
     <storage.version>v1-rev53-1.21.0</storage.version>
+    <woodstox.version>4.4.1</woodstox.version>
   </properties>
 
   <packaging>pom</packaging>
diff --git a/sdk/pom.xml b/sdk/pom.xml
index aeead722ca4a8..1f15b02643e84 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -667,15 +667,22 @@
     <dependency>
       <groupId>org.codehaus.woodstox</groupId>
       <artifactId>stax2-api</artifactId>
-      <version>3.1.1</version>
+      <version>${stax2.version}</version>
       <optional>true</optional>
     </dependency>
 
     <dependency>
       <groupId>org.codehaus.woodstox</groupId>
       <artifactId>woodstox-core-asl</artifactId>
-      <version>4.1.2</version>
+      <version>${woodstox.version}</version>
       <optional>true</optional>
+      <exclusions>
+        <!-- javax.xml.stream:stax-api is included in JDK 1.6+ -->
+        <exclusion>
+          <groupId>javax.xml.stream</groupId>
+          <artifactId>stax-api</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
 
     <!--

From 3eb309242047f08afb105cc8a4e0c05f72131fec Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Thu, 25 Feb 2016 10:44:41 -0800
Subject: [PATCH 1532/1541] Handle multiple requests in InProcess Read
 Primitives

Each source that is invoked by a read should produce its elements in
serial. Using a queue of available sources enforces only one worker
having access to a source at a time.

Add EmptyTransformEvaluator, to be returned in the case that there are
no unused sources. EmptyTransformEvaluator ignores all input, produces
no output, and cannot advance the watermark.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115578920
---
 .../BoundedReadEvaluatorFactory.java          | 61 ++++++++++----
 .../inprocess/EmptyTransformEvaluator.java    | 49 +++++++++++
 .../UnboundedReadEvaluatorFactory.java        | 75 ++++++++++++-----
 .../BoundedReadEvaluatorFactoryTest.java      | 75 +++++++++++++----
 .../UnboundedReadEvaluatorFactoryTest.java    | 83 ++++++++++++++-----
 5 files changed, 273 insertions(+), 70 deletions(-)
 create mode 100644 sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/EmptyTransformEvaluator.java

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactory.java
index d11187c99e35d..1c0279897aac4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactory.java
@@ -27,8 +27,10 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
 import java.io.IOException;
-import java.util.Map;
+import java.util.Queue;
 import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.ConcurrentMap;
 
 import javax.annotation.Nullable;
 
@@ -42,34 +44,62 @@ final class BoundedReadEvaluatorFactory implements TransformEvaluatorFactory {
    * Evaluators are cached here to ensure that the reader is not restarted if the evaluator is
    * retriggered.
    */
-  private final Map<EvaluatorKey, BoundedReadEvaluator<?>> sourceEvaluators =
-      new ConcurrentHashMap<>();
+  private final ConcurrentMap<EvaluatorKey, Queue<? extends BoundedReadEvaluator<?>>>
+      sourceEvaluators = new ConcurrentHashMap<>();
 
   @SuppressWarnings({"unchecked", "rawtypes"})
   @Override
   public <InputT> TransformEvaluator<InputT> forApplication(
       AppliedPTransform<?, ?, ?> application,
       @Nullable CommittedBundle<?> inputBundle,
-      InProcessEvaluationContext evaluationContext) {
+      InProcessEvaluationContext evaluationContext)
+      throws IOException {
     return getTransformEvaluator((AppliedPTransform) application, evaluationContext);
   }
 
   private <OutputT> TransformEvaluator<?> getTransformEvaluator(
       final AppliedPTransform<?, PCollection<OutputT>, Bounded<OutputT>> transform,
-      final InProcessEvaluationContext evaluationContext) {
+      final InProcessEvaluationContext evaluationContext)
+      throws IOException {
+    BoundedReadEvaluator<?> evaluator =
+        getTransformEvaluatorQueue(transform, evaluationContext).poll();
+    if (evaluator == null) {
+      return EmptyTransformEvaluator.create(transform);
+    }
+    return evaluator;
+  }
+
+  /**
+   * Get the queue of {@link TransformEvaluator TransformEvaluators} that produce elements for the
+   * provided application of {@link Bounded Read.Bounded}, initializing it if required.
+   *
+   * <p>This method is thread-safe, and will only produce new evaluators if no other invocation has
+   * already done so.
+   */
+  @SuppressWarnings("unchecked")
+  private <OutputT> Queue<BoundedReadEvaluator<OutputT>> getTransformEvaluatorQueue(
+      final AppliedPTransform<?, PCollection<OutputT>, Bounded<OutputT>> transform,
+      final InProcessEvaluationContext evaluationContext)
+      throws IOException {
+    // Key by the application and the context the evaluation is occurring in (which call to
+    // Pipeline#run).
     EvaluatorKey key = new EvaluatorKey(transform, evaluationContext);
-    @SuppressWarnings("unchecked")
-    BoundedReadEvaluator<OutputT> result =
-        (BoundedReadEvaluator<OutputT>) sourceEvaluators.get(key);
-    if (result == null) {
-      try {
-        result = new BoundedReadEvaluator<OutputT>(transform, evaluationContext);
-      } catch (IOException e) {
-        throw new RuntimeException(e);
+    Queue<BoundedReadEvaluator<OutputT>> evaluatorQueue =
+        (Queue<BoundedReadEvaluator<OutputT>>) sourceEvaluators.get(key);
+    if (evaluatorQueue == null) {
+      evaluatorQueue = new ConcurrentLinkedQueue<>();
+      if (sourceEvaluators.putIfAbsent(key, evaluatorQueue) == null) {
+        // If no queue existed in the evaluators, add an evaluator to initialize the evaluator
+        // factory for this transform
+        BoundedReadEvaluator<OutputT> evaluator =
+            new BoundedReadEvaluator<OutputT>(transform, evaluationContext);
+        evaluatorQueue.offer(evaluator);
+      } else {
+        // otherwise return the existing Queue that arrived before us
+        evaluatorQueue = (Queue<BoundedReadEvaluator<OutputT>>) sourceEvaluators.get(key);
       }
-      sourceEvaluators.put(key, result);
     }
-    return result;
+    return evaluatorQueue;
   }
 
   private static class BoundedReadEvaluator<OutputT> implements TransformEvaluator<Object> {
@@ -108,4 +138,3 @@ public InProcessTransformResult finishBundle() throws IOException {
     }
   }
 }
-
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/EmptyTransformEvaluator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/EmptyTransformEvaluator.java
new file mode 100644
index 0000000000000..fc092377942df
--- /dev/null
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/EmptyTransformEvaluator.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.runners.inprocess;
+
+import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+import com.google.cloud.dataflow.sdk.util.WindowedValue;
+
+/**
+ * A {@link TransformEvaluator} that ignores all input and produces no output. The result of
+ * invoking {@link #finishBundle()} on this evaluator is to return an
+ * {@link InProcessTransformResult} with no elements and a timestamp hold equal to
+ * {@link BoundedWindow#TIMESTAMP_MIN_VALUE}. Because the result contains no elements, this hold
+ * will not affect the watermark.
+ */
+final class EmptyTransformEvaluator<T> implements TransformEvaluator<T> {
+  public static <T> TransformEvaluator<T> create(AppliedPTransform<?, ?, ?> transform) {
+    return new EmptyTransformEvaluator<T>(transform);
+  }
+
+  private final AppliedPTransform<?, ?, ?> transform;
+
+  private EmptyTransformEvaluator(AppliedPTransform<?, ?, ?> transform) {
+    this.transform = transform;
+  }
+
+  @Override
+  public void processElement(WindowedValue<T> element) throws Exception {}
+
+  @Override
+  public InProcessTransformResult finishBundle() throws Exception {
+    return StepTransformResult.withHold(transform, BoundedWindow.TIMESTAMP_MIN_VALUE)
+        .build();
+  }
+}
+
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactory.java
index 1852ceec0997f..4beac337d6049 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactory.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactory.java
@@ -29,8 +29,10 @@
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
 import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
+import java.util.Queue;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.ConcurrentMap;
 
 import javax.annotation.Nullable;
 
@@ -44,42 +46,74 @@ class UnboundedReadEvaluatorFactory implements TransformEvaluatorFactory {
    * Evaluators are cached here to ensure that the checkpoint mark is appropriately reused
    * and any splits are honored.
    */
-  private final Map<EvaluatorKey, UnboundedReadEvaluator<?>> sourceEvaluators = new HashMap<>();
+  private final ConcurrentMap<EvaluatorKey, Queue<? extends UnboundedReadEvaluator<?>>>
+      sourceEvaluators = new ConcurrentHashMap<>();
 
   @SuppressWarnings({"unchecked", "rawtypes"})
   @Override
-  public <InputT> TransformEvaluator<InputT> forApplication(
-      AppliedPTransform<?, ?, ?> application,
-      @Nullable CommittedBundle<?> inputBundle,
-      InProcessEvaluationContext evaluationContext) {
+  public <InputT> TransformEvaluator<InputT> forApplication(AppliedPTransform<?, ?, ?> application,
+      @Nullable CommittedBundle<?> inputBundle, InProcessEvaluationContext evaluationContext) {
     return getTransformEvaluator((AppliedPTransform) application, evaluationContext);
   }
 
   private <OutputT> TransformEvaluator<?> getTransformEvaluator(
       final AppliedPTransform<?, PCollection<OutputT>, Unbounded<OutputT>> transform,
       final InProcessEvaluationContext evaluationContext) {
+    UnboundedReadEvaluator<?> currentEvaluator =
+        getTransformEvaluatorQueue(transform, evaluationContext).poll();
+    if (currentEvaluator == null) {
+      return EmptyTransformEvaluator.create(transform);
+    }
+    return currentEvaluator;
+  }
+
+  /**
+   * Get the queue of {@link TransformEvaluator TransformEvaluators} that produce elements for the
+   * provided application of {@link Unbounded Read.Unbounded}, initializing it if required.
+   *
+   * <p>This method is thread-safe, and will only produce new evaluators if no other invocation has
+   * already done so.
+   */
+  @SuppressWarnings("unchecked")
+  private <OutputT> Queue<UnboundedReadEvaluator<OutputT>> getTransformEvaluatorQueue(
+      final AppliedPTransform<?, PCollection<OutputT>, Unbounded<OutputT>> transform,
+      final InProcessEvaluationContext evaluationContext) {
+    // Key by the application and the context the evaluation is occurring in (which call to
+    // Pipeline#run).
     EvaluatorKey key = new EvaluatorKey(transform, evaluationContext);
     @SuppressWarnings("unchecked")
-    UnboundedReadEvaluator<OutputT> result =
-        (UnboundedReadEvaluator<OutputT>) sourceEvaluators.get(key);
-    if (result == null) {
-      result = new UnboundedReadEvaluator<OutputT>(transform, evaluationContext);
-      sourceEvaluators.put(key, result);
+    Queue<UnboundedReadEvaluator<OutputT>> evaluatorQueue =
+        (Queue<UnboundedReadEvaluator<OutputT>>) sourceEvaluators.get(key);
+    if (evaluatorQueue == null) {
+      evaluatorQueue = new ConcurrentLinkedQueue<>();
+      if (sourceEvaluators.putIfAbsent(key, evaluatorQueue) == null) {
+        // If no queue existed in the evaluators, add an evaluator to initialize the evaluator
+        // factory for this transform
+        UnboundedReadEvaluator<OutputT> evaluator =
+            new UnboundedReadEvaluator<OutputT>(transform, evaluationContext, evaluatorQueue);
+        evaluatorQueue.offer(evaluator);
+      } else {
+        // otherwise return the existing Queue that arrived before us
+        evaluatorQueue = (Queue<UnboundedReadEvaluator<OutputT>>) sourceEvaluators.get(key);
+      }
     }
-    return result;
+    return evaluatorQueue;
   }
 
   private static class UnboundedReadEvaluator<OutputT> implements TransformEvaluator<Object> {
     private static final int ARBITRARY_MAX_ELEMENTS = 10;
     private final AppliedPTransform<?, PCollection<OutputT>, Unbounded<OutputT>> transform;
     private final InProcessEvaluationContext evaluationContext;
+    private final Queue<UnboundedReadEvaluator<OutputT>> evaluatorQueue;
     private CheckpointMark checkpointMark;
 
     public UnboundedReadEvaluator(
         AppliedPTransform<?, PCollection<OutputT>, Unbounded<OutputT>> transform,
-        InProcessEvaluationContext evaluationContext) {
+        InProcessEvaluationContext evaluationContext,
+        Queue<UnboundedReadEvaluator<OutputT>> evaluatorQueue) {
       this.transform = transform;
       this.evaluationContext = evaluationContext;
+      this.evaluatorQueue = evaluatorQueue;
       this.checkpointMark = null;
     }
 
@@ -103,11 +137,14 @@ public InProcessTransformResult finishBundle() throws IOException {
       }
       checkpointMark = reader.getCheckpointMark();
       checkpointMark.finalizeCheckpoint();
-      // TODO: When exercising create initial splits, make this the minimum across all existing
-      // readers
-      return StepTransformResult.withHold(transform, reader.getWatermark())
-          .addOutput(output)
-          .build();
+      // TODO: When exercising create initial splits, make this the minimum watermark across all
+      // existing readers
+      StepTransformResult result =
+          StepTransformResult.withHold(transform, reader.getWatermark())
+              .addOutput(output)
+              .build();
+      evaluatorQueue.offer(this);
+      return result;
     }
 
     private <CheckpointMarkT extends CheckpointMark> UnboundedReader<OutputT> createReader(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactoryTest.java
index 0a4c4a1000a10..e17926decaf20 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactoryTest.java
@@ -33,6 +33,7 @@
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.PCollection;
 
+import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -42,14 +43,23 @@
  */
 @RunWith(JUnit4.class)
 public class BoundedReadEvaluatorFactoryTest {
-  @Test
-  public void boundedSourceInMemoryTransformEvaluatorProducesElements() throws Exception {
-    BoundedSource<Long> source = CountingSource.upTo(10L);
+  private BoundedSource<Long> source;
+  private PCollection<Long> longs;
+  private TransformEvaluatorFactory factory;
+  private InProcessEvaluationContext context;
+
+  @Before
+  public void setup() {
+    source = CountingSource.upTo(10L);
     TestPipeline p = TestPipeline.create();
-    PCollection<Long> longs = p.apply(Read.from(source));
+    longs = p.apply(Read.from(source));
+
+    factory = new BoundedReadEvaluatorFactory();
+    context = mock(InProcessEvaluationContext.class);
+  }
 
-    TransformEvaluatorFactory factory = new BoundedReadEvaluatorFactory();
-    InProcessEvaluationContext context = mock(InProcessEvaluationContext.class);
+  @Test
+  public void boundedSourceInMemoryTransformEvaluatorProducesElements() throws Exception {
     UncommittedBundle<Long> output = InProcessBundle.unkeyed(longs);
     when(context.createRootBundle(longs)).thenReturn(output);
 
@@ -63,14 +73,13 @@ public void boundedSourceInMemoryTransformEvaluatorProducesElements() throws Exc
             gw(1L), gw(2L), gw(4L), gw(8L), gw(9L), gw(7L), gw(6L), gw(5L), gw(3L), gw(0L)));
   }
 
+  /**
+   * Demonstrate that acquiring multiple {@link TransformEvaluator TransformEvaluators} for the same
+   * {@link Bounded Read.Bounded} application with the same evaluation context only produces the
+   * elements once.
+   */
   @Test
-  public void boundedSourceInMemoryTransformEvaluatorMultipleCalls() throws Exception {
-    BoundedSource<Long> source = CountingSource.upTo(10L);
-    TestPipeline p = TestPipeline.create();
-    PCollection<Long> longs = p.apply(Read.from(source));
-
-    TransformEvaluatorFactory factory = new BoundedReadEvaluatorFactory();
-    InProcessEvaluationContext context = mock(InProcessEvaluationContext.class);
+  public void boundedSourceInMemoryTransformEvaluatorAfterFinishIsEmpty() throws Exception {
     UncommittedBundle<Long> output =
         InProcessBundle.unkeyed(longs);
     when(context.createRootBundle(longs)).thenReturn(output);
@@ -91,7 +100,45 @@ public void boundedSourceInMemoryTransformEvaluatorMultipleCalls() throws Except
     TransformEvaluator<?> secondEvaluator =
         factory.forApplication(longs.getProducingTransformInternal(), null, context);
     InProcessTransformResult secondResult = secondEvaluator.finishBundle();
-    assertThat(secondResult.getWatermarkHold(), equalTo(BoundedWindow.TIMESTAMP_MAX_VALUE));
+    assertThat(secondResult.getWatermarkHold(), equalTo(BoundedWindow.TIMESTAMP_MIN_VALUE));
+    assertThat(secondResult.getOutputBundles(), emptyIterable());
+    assertThat(
+        secondOutput.commit(BoundedWindow.TIMESTAMP_MAX_VALUE).getElements(), emptyIterable());
+    assertThat(
+        outputElements,
+        containsInAnyOrder(
+            gw(1L), gw(2L), gw(4L), gw(8L), gw(9L), gw(7L), gw(6L), gw(5L), gw(3L), gw(0L)));
+  }
+
+  /**
+   * Demonstrates that acquiring multiple evaluators from the factory are independent, but
+   * the elements in the source are only produced once.
+   */
+  @Test
+  public void boundedSourceEvaluatorSimultaneousEvaluations() throws Exception {
+    UncommittedBundle<Long> output = InProcessBundle.unkeyed(longs);
+    UncommittedBundle<Long> secondOutput = InProcessBundle.unkeyed(longs);
+    when(context.createRootBundle(longs)).thenReturn(output).thenReturn(secondOutput);
+
+    // create both evaluators before finishing either.
+    TransformEvaluator<?> evaluator =
+        factory.forApplication(longs.getProducingTransformInternal(), null, context);
+    TransformEvaluator<?> secondEvaluator =
+        factory.forApplication(longs.getProducingTransformInternal(), null, context);
+
+    InProcessTransformResult secondResult = secondEvaluator.finishBundle();
+
+    InProcessTransformResult result = evaluator.finishBundle();
+    assertThat(result.getWatermarkHold(), equalTo(BoundedWindow.TIMESTAMP_MAX_VALUE));
+    Iterable<? extends WindowedValue<Long>> outputElements =
+        output.commit(BoundedWindow.TIMESTAMP_MAX_VALUE).getElements();
+
+    assertThat(
+        outputElements,
+        containsInAnyOrder(
+            gw(1L), gw(2L), gw(4L), gw(8L), gw(9L), gw(7L), gw(6L), gw(5L), gw(3L), gw(0L)));
+    assertThat(secondResult.getWatermarkHold(), equalTo(BoundedWindow.TIMESTAMP_MIN_VALUE));
+    assertThat(secondResult.getOutputBundles(), emptyIterable());
     assertThat(
         secondOutput.commit(BoundedWindow.TIMESTAMP_MAX_VALUE).getElements(), emptyIterable());
     assertThat(
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactoryTest.java
index 28f2db5498538..864005696f314 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactoryTest.java
@@ -16,6 +16,8 @@
 package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.emptyIterable;
+import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertThat;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
@@ -28,6 +30,7 @@
 import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
 import com.google.cloud.dataflow.sdk.values.PCollection;
@@ -36,6 +39,7 @@
 import org.joda.time.DateTime;
 import org.joda.time.Instant;
 import org.joda.time.ReadableInstant;
+import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -44,50 +48,56 @@
  */
 @RunWith(JUnit4.class)
 public class UnboundedReadEvaluatorFactoryTest {
-  @Test
-  public void unboundedSourceInMemoryTransformEvaluatorProducesElements() throws Exception {
+  private PCollection<Long> longs;
+  private TransformEvaluatorFactory factory;
+  private InProcessEvaluationContext context;
+  private UncommittedBundle<Long> output;
+
+  @Before
+  public void setup() {
     UnboundedSource<Long, ?> source =
         CountingSource.unboundedWithTimestampFn(new LongToInstantFn());
     TestPipeline p = TestPipeline.create();
-    PCollection<Long> longs = p.apply(Read.from(source));
+    longs = p.apply(Read.from(source));
 
-    TransformEvaluatorFactory factory = new UnboundedReadEvaluatorFactory();
-    InProcessEvaluationContext context = mock(InProcessEvaluationContext.class);
-    UncommittedBundle<Long> output = InProcessBundle.unkeyed(longs);
+    factory = new UnboundedReadEvaluatorFactory();
+    context = mock(InProcessEvaluationContext.class);
+    output = InProcessBundle.unkeyed(longs);
     when(context.createRootBundle(longs)).thenReturn(output);
+  }
 
+  @Test
+  public void unboundedSourceInMemoryTransformEvaluatorProducesElements() throws Exception {
     TransformEvaluator<?> evaluator =
         factory.forApplication(longs.getProducingTransformInternal(), null, context);
+
     InProcessTransformResult result = evaluator.finishBundle();
     assertThat(
         result.getWatermarkHold(), Matchers.<ReadableInstant>lessThan(DateTime.now().toInstant()));
     assertThat(
         output.commit(Instant.now()).getElements(),
-        containsInAnyOrder(tgw(1L), tgw(2L), tgw(4L), tgw(8L), tgw(9L), tgw(7L), tgw(6L), tgw(5L),
-            tgw(3L), tgw(0L)));
+        containsInAnyOrder(
+            tgw(1L), tgw(2L), tgw(4L), tgw(8L), tgw(9L), tgw(7L), tgw(6L), tgw(5L), tgw(3L),
+            tgw(0L)));
   }
 
+  /**
+   * Demonstrate that multiple sequential creations will produce additional elements if the source
+   * can provide them.
+   */
   @Test
-  public void unboundedSourceInMemoryTransformEvaluatorMultipleCalls() throws Exception {
-    UnboundedSource<Long, ?> source =
-        CountingSource.unboundedWithTimestampFn(new LongToInstantFn());
-    TestPipeline p = TestPipeline.create();
-    PCollection<Long> longs = p.apply(Read.from(source));
-
-    TransformEvaluatorFactory factory = new UnboundedReadEvaluatorFactory();
-    InProcessEvaluationContext context = mock(InProcessEvaluationContext.class);
-    UncommittedBundle<Long> output = InProcessBundle.unkeyed(longs);
-    when(context.createRootBundle(longs)).thenReturn(output);
-
+  public void unboundedSourceInMemoryTransformEvaluatorMultipleSequentialCalls() throws Exception {
     TransformEvaluator<?> evaluator =
         factory.forApplication(longs.getProducingTransformInternal(), null, context);
+
     InProcessTransformResult result = evaluator.finishBundle();
     assertThat(
         result.getWatermarkHold(), Matchers.<ReadableInstant>lessThan(DateTime.now().toInstant()));
     assertThat(
         output.commit(Instant.now()).getElements(),
-        containsInAnyOrder(tgw(1L), tgw(2L), tgw(4L), tgw(8L), tgw(9L), tgw(7L), tgw(6L), tgw(5L),
-            tgw(3L), tgw(0L)));
+        containsInAnyOrder(
+            tgw(1L), tgw(2L), tgw(4L), tgw(8L), tgw(9L), tgw(7L), tgw(6L), tgw(5L), tgw(3L),
+            tgw(0L)));
 
     UncommittedBundle<Long> secondOutput = InProcessBundle.unkeyed(longs);
     when(context.createRootBundle(longs)).thenReturn(secondOutput);
@@ -103,6 +113,37 @@ public void unboundedSourceInMemoryTransformEvaluatorMultipleCalls() throws Exce
             tgw(15L), tgw(13L), tgw(10L)));
   }
 
+  // TODO: Once the source is split into multiple sources before evaluating, this test will have to
+  // be updated.
+  /**
+   * Demonstrate that only a single unfinished instance of TransformEvaluator can be created at a
+   * time, with other calls returning an empty evaluator.
+   */
+  @Test
+  public void unboundedSourceWithMultipleSimultaneousEvaluatorsIndependent() throws Exception {
+    UncommittedBundle<Long> secondOutput = InProcessBundle.unkeyed(longs);
+
+    TransformEvaluator<?> evaluator =
+        factory.forApplication(longs.getProducingTransformInternal(), null, context);
+
+    TransformEvaluator<?> secondEvaluator =
+        factory.forApplication(longs.getProducingTransformInternal(), null, context);
+
+    InProcessTransformResult secondResult = secondEvaluator.finishBundle();
+    InProcessTransformResult result = evaluator.finishBundle();
+
+    assertThat(
+        result.getWatermarkHold(), Matchers.<ReadableInstant>lessThan(DateTime.now().toInstant()));
+    assertThat(
+        output.commit(Instant.now()).getElements(),
+        containsInAnyOrder(
+            tgw(1L), tgw(2L), tgw(4L), tgw(8L), tgw(9L), tgw(7L), tgw(6L), tgw(5L), tgw(3L),
+            tgw(0L)));
+
+    assertThat(secondResult.getWatermarkHold(), equalTo(BoundedWindow.TIMESTAMP_MIN_VALUE));
+    assertThat(secondOutput.commit(Instant.now()).getElements(), emptyIterable());
+  }
+
   /**
    * A terse alias for producing timestamped longs in the {@link GlobalWindow}, where
    * the timestamp is the epoch offset by the value of the element.

From 06c8911ed421276a240c7e1aa8bf9e8f8d961c05 Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Thu, 25 Feb 2016 13:59:48 -0800
Subject: [PATCH 1533/1541] Finish Flattenning InProcess package

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115601346
---
 .../sdk/runners/inprocess/{util => }/Clock.java        |  2 +-
 .../inprocess/{util => }/InMemoryWatermarkManager.java |  2 +-
 .../runners/inprocess/{util => }/InProcessBundle.java  |  2 +-
 .../{util => }/InProcessBundleOutputManager.java       |  3 +--
 .../runners/inprocess/InProcessExecutionContext.java   |  6 ++----
 .../inprocess/{util => }/InProcessTimerInternals.java  |  8 ++++----
 .../runners/inprocess/InProcessTransformResult.java    |  2 +-
 .../runners/inprocess/{util => }/NanosOffsetClock.java |  2 +-
 .../sdk/runners/inprocess/StepTransformResult.java     |  2 +-
 .../inprocess/BoundedReadEvaluatorFactoryTest.java     |  1 -
 .../runners/inprocess/FlattenEvaluatorFactoryTest.java |  1 -
 .../inprocess/GroupByKeyEvaluatorFactoryTest.java      |  1 -
 .../{util => }/InMemoryWatermarkManagerTest.java       | 10 +++++-----
 .../inprocess/{util => }/InProcessBundleTest.java      |  2 +-
 .../{util => }/InProcessTimerInternalsTest.java        |  8 ++++----
 .../sdk/runners/inprocess/{util => }/MockClock.java    |  2 +-
 .../inprocess/ParDoMultiEvaluatorFactoryTest.java      |  3 +--
 .../inprocess/ParDoSingleEvaluatorFactoryTest.java     |  3 +--
 .../inprocess/UnboundedReadEvaluatorFactoryTest.java   |  1 -
 .../runners/inprocess/ViewEvaluatorFactoryTest.java    |  1 -
 20 files changed, 26 insertions(+), 36 deletions(-)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/{util => }/Clock.java (92%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/{util => }/InMemoryWatermarkManager.java (99%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/{util => }/InProcessBundle.java (98%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/{util => }/InProcessBundleOutputManager.java (92%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/{util => }/InProcessTimerInternals.java (85%)
 rename sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/{util => }/NanosOffsetClock.java (96%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/{util => }/InMemoryWatermarkManagerTest.java (99%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/{util => }/InProcessBundleTest.java (98%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/{util => }/InProcessTimerInternalsTest.java (92%)
 rename sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/{util => }/MockClock.java (96%)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/Clock.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/Clock.java
similarity index 92%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/Clock.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/Clock.java
index f2d57117f780d..11e6ec1686d92 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/Clock.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/Clock.java
@@ -13,7 +13,7 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import org.joda.time.Instant;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InMemoryWatermarkManager.java
similarity index 99%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InMemoryWatermarkManager.java
index 4428e413b8000..e280e22d2bb99 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InMemoryWatermarkManager.java
@@ -13,7 +13,7 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundle.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessBundle.java
similarity index 98%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundle.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessBundle.java
index 18f863dbbfc75..cc20161097e8f 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundle.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessBundle.java
@@ -13,7 +13,7 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import static com.google.common.base.Preconditions.checkState;
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundleOutputManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessBundleOutputManager.java
similarity index 92%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundleOutputManager.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessBundleOutputManager.java
index 00462dc379a34..406e2d46386d1 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundleOutputManager.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessBundleOutputManager.java
@@ -13,9 +13,8 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
 import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessExecutionContext.java
index 6342cd48a9690..43cd9eb573c69 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessExecutionContext.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessExecutionContext.java
@@ -15,10 +15,8 @@
  */
 package com.google.cloud.dataflow.sdk.runners.inprocess;
 
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.Clock;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TransformWatermarks;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessTimerInternals;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TimerUpdate;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TransformWatermarks;
 import com.google.cloud.dataflow.sdk.util.BaseExecutionContext;
 import com.google.cloud.dataflow.sdk.util.ExecutionContext;
 import com.google.cloud.dataflow.sdk.util.TimerInternals;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessTimerInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTimerInternals.java
similarity index 85%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessTimerInternals.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTimerInternals.java
index c4feef681ee55..06ba7b82f432b 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessTimerInternals.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTimerInternals.java
@@ -13,11 +13,11 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate.TimerUpdateBuilder;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TransformWatermarks;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TimerUpdate;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TimerUpdate.TimerUpdateBuilder;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TransformWatermarks;
 import com.google.cloud.dataflow.sdk.util.TimerInternals;
 
 import org.joda.time.Instant;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTransformResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTransformResult.java
index c9cfb6610cf4c..3f9e94ad9f048 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTransformResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTransformResult.java
@@ -15,8 +15,8 @@
  */
 package com.google.cloud.dataflow.sdk.runners.inprocess;
 
+import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TimerUpdate;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.PTransform;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/NanosOffsetClock.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/NanosOffsetClock.java
similarity index 96%
rename from sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/NanosOffsetClock.java
rename to sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/NanosOffsetClock.java
index 623f07d675df0..958e26d6ee417 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/NanosOffsetClock.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/NanosOffsetClock.java
@@ -13,7 +13,7 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/StepTransformResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/StepTransformResult.java
index 54cdbf03f5743..3c4ee29d96f41 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/StepTransformResult.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/StepTransformResult.java
@@ -15,8 +15,8 @@
  */
 package com.google.cloud.dataflow.sdk.runners.inprocess;
 
+import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TimerUpdate;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.common.CounterSet;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactoryTest.java
index e17926decaf20..9f22fbbe9e519 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactoryTest.java
@@ -27,7 +27,6 @@
 import com.google.cloud.dataflow.sdk.io.Read;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
 import com.google.cloud.dataflow.sdk.util.WindowedValue;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactoryTest.java
index dac42b60f8a10..bf25970affc15 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactoryTest.java
@@ -24,7 +24,6 @@
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.Create;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactoryTest.java
index bb8a15dc8bd2a..5c9e824afe417 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactoryTest.java
@@ -25,7 +25,6 @@
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/InMemoryWatermarkManagerTest.java
similarity index 99%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/InMemoryWatermarkManagerTest.java
index d4979f215678a..24251522d8248 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InMemoryWatermarkManagerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/InMemoryWatermarkManagerTest.java
@@ -13,7 +13,7 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.containsInAnyOrder;
@@ -23,12 +23,12 @@
 import static org.hamcrest.Matchers.nullValue;
 import static org.junit.Assert.assertThat;
 
+import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.FiredTimers;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TimerUpdate;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TimerUpdate.TimerUpdateBuilder;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TransformWatermarks;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.FiredTimers;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate.TimerUpdateBuilder;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TransformWatermarks;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
 import com.google.cloud.dataflow.sdk.transforms.Create;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundleTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessBundleTest.java
similarity index 98%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundleTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessBundleTest.java
index 57d8c908e0ce2..dcba86bc252c3 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessBundleTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessBundleTest.java
@@ -13,7 +13,7 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.equalTo;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessTimerInternalsTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTimerInternalsTest.java
similarity index 92%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessTimerInternalsTest.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTimerInternalsTest.java
index cfe820d561a91..435a5ba9e3bd2 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/InProcessTimerInternalsTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTimerInternalsTest.java
@@ -13,16 +13,16 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertThat;
 import static org.mockito.Mockito.when;
 
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate.TimerUpdateBuilder;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TransformWatermarks;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TimerUpdate;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TimerUpdate.TimerUpdateBuilder;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TransformWatermarks;
 import com.google.cloud.dataflow.sdk.util.TimeDomain;
 import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
 import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/MockClock.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/MockClock.java
similarity index 96%
rename from sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/MockClock.java
rename to sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/MockClock.java
index 440ed435758ac..d69660b399c07 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/util/MockClock.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/MockClock.java
@@ -13,7 +13,7 @@
  * License for the specific language governing permissions and limitations under
  * the License.
  */
-package com.google.cloud.dataflow.sdk.runners.inprocess.util;
+package com.google.cloud.dataflow.sdk.runners.inprocess;
 
 import static com.google.common.base.Preconditions.checkArgument;
 
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactoryTest.java
index 80863b9b7654b..033f9de204d3a 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactoryTest.java
@@ -24,11 +24,10 @@
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TimerUpdate;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactoryTest.java
index 919e69e9dd2de..ae599bab62bc5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactoryTest.java
@@ -24,11 +24,10 @@
 import static org.mockito.Mockito.when;
 
 import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TimerUpdate;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InMemoryWatermarkManager.TimerUpdate;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.DoFn;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactoryTest.java
index 864005696f314..f139c5648e951 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactoryTest.java
@@ -27,7 +27,6 @@
 import com.google.cloud.dataflow.sdk.io.UnboundedSource;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
 import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactoryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactoryTest.java
index 021709b37fd6a..2f5cd0fb888a4 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactoryTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactoryTest.java
@@ -27,7 +27,6 @@
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.InProcessEvaluationContext;
 import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.PCollectionViewWriter;
-import com.google.cloud.dataflow.sdk.runners.inprocess.util.InProcessBundle;
 import com.google.cloud.dataflow.sdk.testing.TestPipeline;
 import com.google.cloud.dataflow.sdk.transforms.Create;
 import com.google.cloud.dataflow.sdk.transforms.GroupByKey;

From fba914736cc3f3401aa96c252a1336e9e5865b1e Mon Sep 17 00:00:00 2001
From: tgroh <tgroh@google.com>
Date: Thu, 25 Feb 2016 13:59:52 -0800
Subject: [PATCH 1534/1541] Ensure a TypedPValue has a Coder on
 finishSpecifying

Coders cannot be set on a PValue that is is marked as finished
specifying, and a coder is required for every TypedPValue in a pipeline.

Ensure that a coder is always available when a TypedPValue has been
finished by invoking getCoder() (which will throw an exception if no
coder is available)

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115601351
---
 .../dataflow/sdk/values/TypedPValue.java      |  3 ++
 .../dataflow/sdk/values/TypedPValueTest.java  | 51 ++++++++++---------
 2 files changed, 31 insertions(+), 23 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
index 9b210b20b265a..29fd639409ec9 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
@@ -83,6 +83,9 @@ public void finishSpecifying() {
       return;
     }
     super.finishSpecifying();
+    // Ensure that this TypedPValue has a coder by inferring the coder if none exists; If not,
+    // this will throw an exception.
+    getCoder();
   }
 
   /////////////////////////////////////////////////////////////////////////////
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypedPValueTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypedPValueTest.java
index 4c621111656a0..b0a13ec37d284 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypedPValueTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/values/TypedPValueTest.java
@@ -20,7 +20,6 @@
 import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.not;
 import static org.junit.Assert.assertThat;
-import static org.junit.Assert.fail;
 
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
@@ -132,28 +131,34 @@ public void processElement(ProcessContext c) throws Exception {
   @Test
   public void testParDoWithNoSideOutputsErrorDoesNotMentionTupleTag() {
     Pipeline p = TestPipeline.create();
-    PCollection<EmptyClass> input = p
-        .apply(Create.of(1, 2, 3))
-        .apply(ParDo.of(new EmptyClassDoFn()));
-
-    try {
-      input.getCoder();
-    } catch (IllegalStateException exc) {
-      String message = exc.getMessage();
-
-      // Output specific to ParDo TupleTag side outputs should not be present.
-      assertThat(message, not(containsString("erasure")));
-      assertThat(message, not(containsString("see TupleTag Javadoc")));
-      // Instead, expect output suggesting other possible fixes.
-      assertThat(message,
-          containsString("Building a Coder using a registered CoderFactory failed"));
-      assertThat(message,
-          containsString("Building a Coder from the @DefaultCoder annotation failed"));
-      assertThat(message,
-          containsString("Building a Coder from the fallback CoderProvider failed"));
-      return;
-    }
-    fail("Should have thrown IllegalStateException due to failure to infer a coder.");
+    PCollection<EmptyClass> input =
+        p.apply(Create.of(1, 2, 3)).apply(ParDo.of(new EmptyClassDoFn()));
+
+    thrown.expect(IllegalStateException.class);
+
+    // Output specific to ParDo TupleTag side outputs should not be present.
+    thrown.expectMessage(not(containsString("erasure")));
+    thrown.expectMessage(not(containsString("see TupleTag Javadoc")));
+    // Instead, expect output suggesting other possible fixes.
+    thrown.expectMessage(containsString("Building a Coder using a registered CoderFactory failed"));
+    thrown.expectMessage(
+        containsString("Building a Coder from the @DefaultCoder annotation failed"));
+    thrown.expectMessage(containsString("Building a Coder from the fallback CoderProvider failed"));
+
+    input.getCoder();
+  }
+
+  @Test
+  public void testFinishSpecifyingShouldFailIfNoCoderInferrable() {
+    Pipeline p = TestPipeline.create();
+    PCollection<EmptyClass> unencodable =
+        p.apply(Create.of(1, 2, 3)).apply(ParDo.of(new EmptyClassDoFn()));
+
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("Unable to return a default Coder");
+    thrown.expectMessage("Inferring a Coder from the CoderRegistry failed");
+
+    unencodable.finishSpecifying();
   }
 }
 

From 31116460fb4f6c47d48e01c507389f4eb8f8b3cd Mon Sep 17 00:00:00 2001
From: bchambers <bchambers@google.com>
Date: Thu, 25 Feb 2016 14:26:03 -0800
Subject: [PATCH 1535/1541] Switch to the start state when lazily initializing

Previously, we would attribute time spent running the startBundle of a
DoFn as time spent in -process state.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115604508
---
 .../sdk/util/common/worker/StateSampler.java  | 47 +++++++++++++------
 1 file changed, 32 insertions(+), 15 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
index df916a0a6abb8..00d3b3b904ca4 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
@@ -67,7 +67,7 @@ public enum StateKind {
   private volatile int currentState;
 
   /** Special value of {@code currentState} that means we do not sample. */
-  private static final int DO_NOT_SAMPLE = -1;
+  public static final int DO_NOT_SAMPLE = -1;
 
   /**
    * A counter that increments with each state transition. May be used
@@ -113,6 +113,30 @@ public StateSampler(String prefix,
     this.prefix = prefix;
     this.counterSetMutator = counterSetMutator;
     currentState = DO_NOT_SAMPLE;
+    scheduleSampling(samplingPeriodMs);
+  }
+
+  /**
+   * Constructs a new {@link StateSampler} that can be used to obtain
+   * an approximate breakdown of the time spent by an execution
+   * context in various states, as a fraction of the total time.
+   *
+   * @param prefix the prefix of the counter names for the states
+   * @param counterSetMutator the {@link CounterSet.AddCounterMutator}
+   * used to create a counter for each distinct state
+   */
+  public StateSampler(String prefix,
+                      CounterSet.AddCounterMutator counterSetMutator) {
+    this(prefix, counterSetMutator, DEFAULT_SAMPLING_PERIOD_MS);
+  }
+
+  /**
+   * Called by the constructor to schedule sampling at the given period.
+   *
+   * <p>Should not be overridden by sub-classes unless they want to change
+   * or disable the automatic sampling of state.
+   */
+  protected void scheduleSampling(final long samplingPeriodMs) {
     // Here "stratified sampling" is used, which makes sure that there's 1 uniformly chosen sampled
     // point in every bucket of samplingPeriodMs, to prevent pathological behavior in case some
     // states happen to occur at a similar period.
@@ -148,20 +172,6 @@ public void run() {
             TimeUnit.MILLISECONDS);
   }
 
-  /**
-   * Constructs a new {@link StateSampler} that can be used to obtain
-   * an approximate breakdown of the time spent by an execution
-   * context in various states, as a fraction of the total time.
-   *
-   * @param prefix the prefix of the counter names for the states
-   * @param counterSetMutator the {@link CounterSet.AddCounterMutator}
-   * used to create a counter for each distinct state
-   */
-  public StateSampler(String prefix,
-                      CounterSet.AddCounterMutator counterSetMutator) {
-    this(prefix, counterSetMutator, DEFAULT_SAMPLING_PERIOD_MS);
-  }
-
   public synchronized void run() {
     long startTimestampNs = System.nanoTime();
     int state = currentState;
@@ -254,6 +264,13 @@ public synchronized StateSamplerInfo getInfo() {
             stateTransitionCount, null);
   }
 
+  /**
+   * Returns the current state of this state sampler.
+   */
+  public int getCurrentState() {
+    return currentState;
+  }
+
   /**
    * Sets the current thread state.
    *

From 89e624141c589be950d792508705966b56f2bc61 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 25 Feb 2016 14:30:23 -0800
Subject: [PATCH 1536/1541] Fix SDK deps and enable strict enforcement

After this change, the analysis by `mvn dependency:analyze` for the SDK
will fail if there are any transitive dependency violations.

To enable this, we take the following scope interpretations and adjust
the pom.xml accordingly:

A "compile" scope optional dependency is required for build but left out
of transitive dependencies. We have one of these:

 - org.codehaus.woodstox:stax2-api is an API that the SDK references
   directly, but it will not throw a class loading error until XmlSource
   is used.

A "runtime" scope optional dependency is never reference directly from the
SDK, but is an implementation referenced indirectly and loaded lazily.
We have two of these:

 - org.tukaani:xz is an optional dependency to support xz-compressed
   streams at runtime we use from Apache commons, though the SDK does not
   directly use the library. Our use of Apache commons is an implementation
   detail so xz is really our dependency.
 - org.codehaus.woodstox:woodstox-core-asl is an implementation of stax2-api.
   Such an implementation must be present on the classpath when XmlSource is
   used, but the SDK deliberately does not reference a specific
   implementation.

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115604996
---
 sdk/pom.xml                                   | 25 ++++++++++++++++---
 .../cloud/dataflow/sdk/io/XmlSource.java      | 18 +++++++------
 2 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/sdk/pom.xml b/sdk/pom.xml
index 1f15b02643e84..4995da06d3c04 100644
--- a/sdk/pom.xml
+++ b/sdk/pom.xml
@@ -141,6 +141,19 @@
         <artifactId>maven-compiler-plugin</artifactId>
       </plugin>
 
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <executions>
+          <execution>
+            <goals><goal>analyze-only</goal></goals>
+            <configuration>
+              <failOnWarning>true</failOnWarning>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
       <!-- Run CheckStyle pass on transforms, as they are release in
            source form. -->
       <plugin>
@@ -661,8 +674,11 @@
     </dependency>
 
     <!--
-    To use com.google.cloud.dataflow.io.XmlSource, please explicitly declare
-    the following two dependencies.
+    To use com.google.cloud.dataflow.io.XmlSource:
+
+    1. Explicitly declare the following dependency for the stax2 API.
+    2. Include a stax2 implementation on the classpath. One example
+       is given below as an optional runtime dependency on woodstox-core-asl
     -->
     <dependency>
       <groupId>org.codehaus.woodstox</groupId>
@@ -675,6 +691,7 @@
       <groupId>org.codehaus.woodstox</groupId>
       <artifactId>woodstox-core-asl</artifactId>
       <version>${woodstox.version}</version>
+      <scope>runtime</scope>
       <optional>true</optional>
       <exclusions>
         <!-- javax.xml.stream:stax-api is included in JDK 1.6+ -->
@@ -687,12 +704,14 @@
 
     <!--
     To use com.google.cloud.dataflow.io.AvroSource with XZ-encoded files,
-    please explicitly declare this dependency.
+    please explicitly declare this dependency to include org.tukaani:xz on
+    the classpath at runtime.
     -->
     <dependency>
       <groupId>org.tukaani</groupId>
       <artifactId>xz</artifactId>
       <version>1.5</version>
+      <scope>runtime</scope>
       <optional>true</optional>
     </dependency>
 
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
index d684d2216cd82..1ead39187d615 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
@@ -17,6 +17,7 @@
 import com.google.cloud.dataflow.sdk.coders.Coder;
 import com.google.cloud.dataflow.sdk.coders.JAXBCoder;
 import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
 import com.google.common.base.Preconditions;
 
 import org.codehaus.stax2.XMLInputFactory2;
@@ -94,18 +95,21 @@
  * <p>Currently, only XML files that use single-byte characters are supported. Using a file that
  * contains multi-byte characters may result in data loss or duplication.
  *
- * <p>To use {@code XmlSource}, explicitly declare dependencies on following two jars from Woodstox
- * StAX XML parser.
- * (1) stax2-api-3.1.1.jar
- * (2) woodstox-core-asl-4.1.2.jar
- * These dependencies have been declared as optional in Maven sdk/pom.xml file of Google Cloud
- * Dataflow.
+ * <p>To use {@link XmlSource}:
+ * <ol>
+ *   <li>Explicitly declare a dependency on org.codehaus.woodstox:stax2-api</li>
+ *   <li>Include a compatible implementation on the classpath at run-time,
+ *       such as org.codehaus.woodstox:woodstox-core-asl</li>
+ * </ol>
+ *
+ * <p>These dependencies have been declared as optional in Maven sdk/pom.xml file of
+ * Google Cloud Dataflow.
  *
  * <p><h3>Permissions</h3>
  * Permission requirements depend on the
  * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner PipelineRunner} that is
  * used to execute the Dataflow job. Please refer to the documentation of corresponding
- * {@code PipelineRunner}s for more details.
+ * {@link PipelineRunner PipelineRunners} for more details.
  *
  * @param <T> Type of the objects that represent the records of the XML file. The
  *        {@code PCollection} generated by this source will be of this type.

From c290b5e1b4b65a74ef73889722fde285a168b346 Mon Sep 17 00:00:00 2001
From: klk <klk@google.com>
Date: Thu, 25 Feb 2016 15:37:18 -0800
Subject: [PATCH 1537/1541] Fix worker dependencies and turn on strict checking

Here is a pruned and edited tree diff of the dependency:tree output. It
is formatted to emphasize the packages that were SDK deps that are now
direct deps. Many of these are actually provided by lots of transitive
dependencies.

 [INFO] com.google.cloud.dataflow:google-cloud-dataflow-java-worker-all
 [INFO] +- com.google.cloud.dataflow:google-cloud-dataflow-java-sdk-all

-[INFO] |  +- com.google.api-client:google-api-client:jar:1.21.0
-[INFO] |  +- com.google.apis:google-api-services-bigquery:jar:v2-rev248-1...
-[INFO] |  +- com.google.apis:google-api-services-dataflow:jar:v1b3-rev19-...
-[INFO] |  +- com.google.cloud.dataflow:google-cloud-dataflow-java-proto-l...
-[INFO] |  +- com.google.http-client:google-http-client:jar:1.19.0
-[INFO] |  +- com.google.http-client:google-http-client-jackson2:jar:1.19....
-[INFO] |  +- com.google.protobuf:protobuf-java:jar:3.0.0-beta-1
-[INFO] |  +- com.fasterxml.jackson.core:jackson-annotations:jar:2.7.0
-[INFO] |  +- com.fasterxml.jackson.core:jackson-core:jar:2.7.0
-[INFO] |  +- com.fasterxml.jackson.core:jackson-databind:jar:2.7.0
-[INFO] |  +- org.apache.avro:avro:jar:1.7.7
-[INFO] |  +- joda-time:joda-time:jar:2.4
-[INFO] |  +- org.slf4j:slf4j-api:jar:1.7.14

+[INFO] +- com.google.api-client:google-api-client:jar:1.21.0
+[INFO] +- com.google.apis:google-api-services-bigquery:jar:v2-rev248-1.21...
+[INFO] +- com.google.apis:google-api-services-dataflow:jar:v1b3-rev19-1.2...
+[INFO] +- com.google.apis:google-api-services-datastore-protobuf:jar:v1be...
+[INFO] +- com.google.cloud.dataflow:google-cloud-dataflow-java-proto-libr...
+[INFO] +- com.google.http-client:google-http-client:jar:1.21.0
+[INFO] +- com.google.http-client:google-http-client-jackson2:jar:1.21.0
+[INFO] +- com.google.protobuf:protobuf-java:jar:3.0.0-beta-1:compile
+[INFO] +- com.fasterxml.jackson.core:jackson-annotations:jar:2.7.0
+[INFO] +- com.fasterxml.jackson.core:jackson-core:jar:2.7.0
+[INFO] +- com.fasterxml.jackson.core:jackson-databind:jar:2.7.0
+[INFO] +- joda-time:joda-time:jar:2.4
+[INFO] +- org.apache.avro:avro:jar:1.7.7
+[INFO] +- org.slf4j:slf4j-api:jar:1.7.14

One from gRPC:

 [INFO] +- io.grpc:grpc-all:jar:0.12.0
 [INFO] |  +- io.grpc:grpc-netty:jar:0.12.0
-[INFO] |  |  |  +- io.netty:netty-handler:jar:4.1.0.Beta8
+[INFO] +- io.netty:netty-handler:jar:4.1.0.Beta8

One from jetty:

 [INFO] +- org.eclipse.jetty:jetty-server:jar:9.2.10.v20150310
 [INFO] |  +- org.eclipse.jetty:jetty-http:jar:9.2.10.v20150310
-[INFO] |  |  \- org.eclipse.jetty:jetty-util:jar:9.2.10.v20150310
+[INFO] +- org.eclipse.jetty:jetty-util:jar:9.2.10.v20150310

----Release Notes----

[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115612383
---
 pom.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pom.xml b/pom.xml
index bfade1a610f08..ba130d25a3d25 100644
--- a/pom.xml
+++ b/pom.xml
@@ -71,6 +71,7 @@
     <bigquery.version>v2-rev248-1.21.0</bigquery.version>
     <bigtable.version>0.2.3</bigtable.version>
     <dataflow.version>v1b3-rev19-1.21.0</dataflow.version>
+    <dataflow.proto.version>0.5.160222</dataflow.proto.version>
     <datastore.version>v1beta2-rev1-4.0.0</datastore.version>
     <google-clients.version>1.21.0</google-clients.version>
     <guava.version>19.0</guava.version>

From d4dcaaa0500a6dbd314fe7119d73c461b2de17f7 Mon Sep 17 00:00:00 2001
From: davor <davor@google.com>
Date: Thu, 25 Feb 2016 18:01:30 -0800
Subject: [PATCH 1538/1541] Update worker harness container image

Pin version to the GitHub push 20160225-00.

----Release Notes----
[]
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=115626265
---
 .../cloud/dataflow/sdk/runners/DataflowPipelineRunner.java    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 06b2295080f76..6eb6c2f7ad968 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -207,9 +207,9 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
   // Default Docker container images that execute Dataflow worker harness, residing in Google
   // Container Registry, separately for Batch and Streaming.
   public static final String BATCH_WORKER_HARNESS_CONTAINER_IMAGE
-      = "dataflow.gcr.io/v1beta3/java-batch:INVALID";
+      = "dataflow.gcr.io/v1beta3/java-batch:github-20160225-00";
   public static final String STREAMING_WORKER_HARNESS_CONTAINER_IMAGE
-      = "dataflow.gcr.io/v1beta3/java-streaming:INVALID";
+      = "dataflow.gcr.io/v1beta3/java-streaming:github-20160225-00";
 
   // The limit of CreateJob request size.
   private static final int CREATE_JOB_REQUEST_LIMIT_BYTES = 10 * 1024 * 1024;

From 41e5cc9e50be887e079ab746153745ee9e445664 Mon Sep 17 00:00:00 2001
From: Frances Perry <fjp@google.com>
Date: Fri, 26 Feb 2016 12:00:44 -0800
Subject: [PATCH 1539/1541] Revert "Add a first README.md file (at least to
 trigger the github mirroring)"

This reverts commit 11e842717f70298a4ea8436363b3101117685f60 in preparation for the initial code drop.
---
 README.md | 68 -------------------------------------------------------
 1 file changed, 68 deletions(-)
 delete mode 100644 README.md

diff --git a/README.md b/README.md
deleted file mode 100644
index f3533818d7aa4..0000000000000
--- a/README.md
+++ /dev/null
@@ -1,68 +0,0 @@
-# Apache Beam
-
-[Apache Beam](http://beam.incubator.apache.org) provides a simple, powerful
-programming model for building both batch and streaming parallel data processing
-pipelines. It also covers the data integration processus.
-
-[General usage](http://beam.incubator.apache.org/documentation/getting-started) is
-a good starting point for Apache Beam.
-
-You can take a look on the [Beam Examples](http://git-wip-us.apache.org/repos/asf/incubator-beam/examples).
-
-## Status [Build Status](http://builds.apache.org/job/beam-master)
-
-## Overview
-
-The key concepts in this programming model are:
-
-* `PCollection`: represents a collection of data, which could be bounded or unbounded in size.
-* `PTransform`: represents a computation that transforms input PCollections into output PCollections.
-* `Pipeline`: manages a directed acyclic graph of PTransforms and PCollections that is ready for execution.
-* `PipelineRunner`: specifies where and how the pipeline should execute.
-
-We provide the following PipelineRunners:
-
-  1. The `DirectPipelineRunner` runs the pipeline on your local machine.
-  2. The `BlockingDataflowPipelineRunner` submits the pipeline to the Dataflow Service via the `DataflowPipelineRunner`
-and then prints messages about the job status until the execution is complete.
-  3. The `SparkPipelineRunner` runs the pipeline on an Apache Spark cluster.
-  4. The `FlinkPipelineRunner` runs the pipeline on an Apache Flink cluster.
-
-## Getting Started
-
-The following command will build both the `sdk` and `example` modules and
-install them in your local Maven repository:
-
-    mvn clean install
-
-You can speed up the build and install process by using the following options:
-
-  1. To skip execution of the unit tests, run:
-
-        mvn install -DskipTests
-
-  2. While iterating on a specific module, use the following command to compile
-  and reinstall it. For example, to reinstall the `examples` module, run:
-
-        mvn install -pl examples
-
-  Be careful, however, as this command will use the most recently installed SDK
-  from the local repository (or Maven Central) even if you have changed it
-  locally.
-
-After building and installing, you can execute the `WordCount` and other
-example pipelines by following the instructions in this [README](https://git-wip-us.apache.org/repos/asf/incubator-beam/examples/README.md).
-
-## Contact Us
-
-You can subscribe on the mailing lists to discuss and get involved in Apache Beam:
-
-* [Subscribe](mailto:user-subscribe@beam.incubator.apache.org) on the [user@beam.incubator.apache.org](mailto:user@beam.incubator.apache.org)
-* [Subscribe](mailto:dev-subscribe@beam.incubator.apache.org) on the [dev@beam.incubator.apache.org](mailto:dev@beam.incubator.apache.org)
-
-You can report issue on [Jira](https://issues.apache.org/jira/browse/BEAM).
-
-## More Information
-
-* [Apache Beam](http://beam.incubator.apache.org)
-* [Apache Beam Documentation](http://beam.incubator.apache.org/documentation)

From 3623a237fb1d40ace9d2a06690f89cb3ff3dbb20 Mon Sep 17 00:00:00 2001
From: Frances Perry <fjp@google.com>
Date: Fri, 26 Feb 2016 12:22:15 -0800
Subject: [PATCH 1540/1541] Update README for initial code drop.

---
 README.md | 138 +++++++++++++++++-------------------------------------
 1 file changed, 43 insertions(+), 95 deletions(-)

diff --git a/README.md b/README.md
index d5345a38c16cb..db4a13fc8925b 100644
--- a/README.md
+++ b/README.md
@@ -1,125 +1,73 @@
-# Google Cloud Dataflow SDK for Java
+# Apache Beam
 
-[Google Cloud Dataflow](https://cloud.google.com/dataflow/) provides a simple,
-powerful programming model for building both batch and streaming parallel data
-processing pipelines. This repository hosts the open-sourced Cloud Dataflow SDK
-for Java, which can be used to run pipelines against the Google Cloud Dataflow
-Service.
+[Apache Beam](http://beam.incubator.apache.org) is a unified model for defining both batch and streaming data-parallel processing pipelines, as well as a set of language-specific SDKs for constructing pipelines and Runners for executing them on distributed processing backends like [Apache Spark](http://spark.apache.org/), [Apache Flink](http://flink.apache.org), and [Google Cloud Dataflow](http://cloud.google.com/dataflow).
 
-[General usage](https://cloud.google.com/dataflow/getting-started) of Google
-Cloud Dataflow does **not** require use of this repository. Instead:
 
-1. depend directly on a specific
-[version](https://cloud.google.com/dataflow/release-notes/java) of the SDK in
-the [Maven Central Repository](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.google.cloud.dataflow%22)
-by adding the following dependency to development
-environments like Eclipse or Apache Maven:
+## Status 
 
-        <dependency>
-          <groupId>com.google.cloud.dataflow</groupId>
-          <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
-          <version>version_number</version>
-        </dependency>
+_**The Apache Beam project is in the process of bootstrapping. This includes the creation of project resources, the refactoring of the initial code submissions, and the formulation of project documentation, planning, and design documents. Please expect a significant amount of churn and breaking changes in the near future.**_
 
-1. download the example pipelines from the separate
-[DataflowJavaSDK-examples](https://github.com/GoogleCloudPlatform/DataflowJavaSDK-examples)
-repository.
+[Build Status](http://builds.apache.org/job/beam-master)
 
-However, if you'd like to contribute to the SDK, write your own PipelineRunner,
-or just dig in for the fun of it, please stay with us here!
 
-## Status [![Build Status](https://travis-ci.org/GoogleCloudPlatform/DataflowJavaSDK.svg?branch=master)](https://travis-ci.org/GoogleCloudPlatform/DataflowJavaSDK)
+## Overview
 
-Both the SDK and the Dataflow Service are generally available, open to all
-developers, and considered stable and fully qualified for production use.
+Beam provides a general approach to expressing [embarrassingly parallel](https://en.wikipedia.org/wiki/Embarrassingly_parallel) data processing pipelines and supports three categories of users, each of which have relatively disparate backgrounds and needs.
 
-## Overview
+1. _End Users_: Writing pipelines with an existing SDK, running it on an existing runner. These users want to focus on writing their application logic and have everything else just work.
+2. _SDK Writers_: Developing a Beam SDK targeted at a specific user community (Java, Python, Scala, Go, R, graphical, etc). These users are language geeks, and  would prefer to be shielded from all the details of various runners and their implementations.
+3. _Runner Writers_: Have an execution environment for distributed processing and would like to support programs written against the Beam Model. Would prefer to be shielded from details of multiple SDKs.
 
-The key concepts in this programming model are:
-
-* [`PCollection`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java):
-represents a collection of data, which could be bounded or unbounded in size.
-* [`PTransform`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java):
-represents a computation that transforms input PCollections into output
-PCollections.
-* [`Pipeline`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java):
-manages a directed acyclic graph of PTransforms and PCollections that is ready
-for execution.
-* [`PipelineRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java):
-specifies where and how the pipeline should execute.
-
-We provide three PipelineRunners:
-
-  1. The [`DirectPipelineRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java)
-runs the pipeline on your local machine.
-  2. The [`DataflowPipelineRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java)
-submits the pipeline to the Dataflow Service, where it runs using managed
-resources in the [Google Cloud Platform](https://cloud.google.com) (GCP).
-  3. The [`BlockingDataflowPipelineRunner`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java)
-submits the pipeline to the Dataflow Service via the `DataflowPipelineRunner`
-and then prints messages about the job status until the execution is complete.
-
-The SDK is built to be extensible and support additional execution environments
-beyond local execution and the Google Cloud Dataflow Service. In partnership
-with [Cloudera](https://www.cloudera.com/), you can run Dataflow pipelines on
-an [Apache Spark](https://spark.apache.org/) backend using the
-[`SparkPipelineRunner`](https://github.com/cloudera/spark-dataflow).
-Additionally, you can run Dataflow pipelines on an
-[Apache Flink](https://flink.apache.org/) backend using the
-[`FlinkPipelineRunner`](https://github.com/dataArtisans/flink-dataflow).
 
-## Getting Started
+### The Beam Model
+
+The model behind Beam evolved from a number of internal Google data processing projects, including [MapReduce](http://research.google.com/archive/mapreduce.html), [FlumeJava](http://research.google.com/pubs/pub35650.html), and [Millwheel](http://research.google.com/pubs/pub41378.html). This model was originally known as the “[Dataflow Model](http://www.vldb.org/pvldb/vol8/p1792-Akidau.pdf)”. 
+
+To learn more about the Beam Model (though still under the original name of Dataflow), see the World Beyond Batch: [Streaming 101](https://wiki.apache.org/incubator/BeamProposal) and [Streaming 102](https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-101) posts on O’Reilly’s Radar site, and the [VLDB 2015 paper](http://www.vldb.org/pvldb/vol8/p1792-Akidau.pdf).
 
-This repository consists of the following parts:
+The key concepts in the Beam programming model are:
 
-* The [`sdk`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk)
-module provides a set of basic Java APIs to program against.
-* The [`examples`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples)
-module provides a few samples to get started. We recommend starting with the
-`WordCount` example.
-* The [`contrib`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/contrib)
-directory hosts community-contributed Dataflow modules.
+* `PCollection`: represents a collection of data, which could be bounded or unbounded in size.
+* `PTransform`: represents a computation that transforms input PCollections into output PCollections.
+* `Pipeline`: manages a directed acyclic graph of PTransforms and PCollections that is ready for execution.
+* `PipelineRunner`: specifies where and how the pipeline should execute.
 
-The following command will build both the `sdk` and `example` modules and
-install them in your local Maven repository:
 
-    mvn clean install
+### SDKs
 
-You can speed up the build and install process by using the following options:
+Beam supports multiple language specific SDKs for writing pipelines against the Beam Model. 
 
-  1. To skip execution of the unit tests, run:
+Currently, this repository contains the Beam Java SDK, which is in the process of evolving from the [Dataflow Java SDK](https://github.com/GoogleCloudPlatform/DataflowJavaSDK). The [Dataflow Python SDK](https://github.com/GoogleCloudPlatform/DataflowPythonSDK) will also become part of Beam in the near future.
 
-        mvn install -DskipTests
+Have ideas for new SDKs or DSLs? See the [Jira](https://issues.apache.org/jira/browse/BEAM/component/12328909/).
 
-  2. While iterating on a specific module, use the following command to compile
-  and reinstall it. For example, to reinstall the `examples` module, run:
 
-        mvn install -pl examples
+### Runners
 
-  Be careful, however, as this command will use the most recently installed SDK
-  from the local repository (or Maven Central) even if you have changed it
-  locally.
+Beam supports executing programs on multiple distributed processing backends. After the Beam project's initial bootstrapping completes, it will include:
+  1. The `DirectPipelineRunner` runs the pipeline on your local machine.
+  2. The `DataflowPipelineRunner` submits the pipeline to the [Google Cloud Dataflow](http://cloud.google.com/dataflow/).
+  3. The `SparkPipelineRunner` runs the pipeline on an Apache Spark cluster. See the code that will be donated at [cloudera/spark-dataflow](https://github.com/cloudera/spark-dataflow).
+  4. The `FlinkPipelineRunner` runs the pipeline on an Apache Flink cluster. See the code that will be donated at [dataArtisans/flink-dataflow](https://github.com/dataArtisans/flink-dataflow).
 
-If you are using [Eclipse](https://eclipse.org/) integrated development
-environment (IDE), the
-[Cloud Dataflow Plugin for Eclipse](https://cloud.google.com/dataflow/getting-started-eclipse)
-provides tools to create and execute Dataflow pipelines locally and on the
-Dataflow Service.
+Have ideas for new Runners? See the [Jira](https://issues.apache.org/jira/browse/BEAM/component/12328916/).
+
+
+## Getting Started
+
+_Coming soon!_
 
-After building and installing, you can execute the `WordCount` and other
-example pipelines by following the instructions in this
-[README](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/README.md).
 
 ## Contact Us
 
-We welcome all usage-related questions on [Stack Overflow](http://stackoverflow.com/questions/tagged/google-cloud-dataflow)
-tagged with `google-cloud-dataflow`.
+To get involved in Apache Beam:
+
+* [Subscribe](mailto:user-subscribe@beam.incubator.apache.org) or [mail](mailto:user@beam.incubator.apache.org) the [user@beam.incubator.apache.org](http://mail-archives.apache.org/mod_mbox/incubator-beam-user/) list.
+* [Subscribe](mailto:dev-subscribe@beam.incubator.apache.org) or [mail](mailto:dev@beam.incubator.apache.org) the [dev@beam.incubator.apache.org](http://mail-archives.apache.org/mod_mbox/incubator-beam-dev/) list.
+* Report issues on [Jira](https://issues.apache.org/jira/browse/BEAM).
 
-Please use [issue tracker](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/issues)
-on GitHub to report any bugs, comments or questions regarding SDK development.
 
 ## More Information
 
-* [Google Cloud Dataflow](https://cloud.google.com/dataflow/)
-* [Dataflow Concepts and Programming Model](https://cloud.google.com/dataflow/model/programming-model)
-* [Java API Reference](https://cloud.google.com/dataflow/java-sdk/JavaDoc/index)
+* [Apache Beam](http://beam.incubator.apache.org)
+* [Apache Beam Documentation](http://beam.incubator.apache.org/documentation)

From 2efe7617a595b8ac6167db40724d9874afd6df23 Mon Sep 17 00:00:00 2001
From: Frances Perry <fjp@google.com>
Date: Fri, 26 Feb 2016 12:40:01 -0800
Subject: [PATCH 1541/1541] Remove Google-specific contribution rules

---
 CONTRIBUTING.md | 31 -------------------------------
 1 file changed, 31 deletions(-)
 delete mode 100644 CONTRIBUTING.md

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
deleted file mode 100644
index 8b0fcba196f81..0000000000000
--- a/CONTRIBUTING.md
+++ /dev/null
@@ -1,31 +0,0 @@
-Want to contribute? Great! First, read this page (including the small print at
-the end).
-
-### Before you contribute
-Before we can use your code, you must sign the
-[Google Individual Contributor License Agreement](https://developers.google.com/open-source/cla/individual?csw=1)
-(CLA), which you can do online. The CLA is necessary mainly because you own the
-copyright to your changes, even after your contribution becomes part of our
-codebase, so we need your permission to use and distribute your code. We also
-need to be sure of various other things. For instance that you'll tell us if you
-know that your code infringes on other people's patents. You don't have to sign
-the CLA until after you've submitted your code for review and a member has
-approved it, but you must do it before we can put your code into our codebase.
-
-Before you start working on a larger contribution, we recommend to get in touch
-with us first through the issue tracker with your idea so that we can help out
-and possibly guide you. Coordinating up front makes it much easier to avoid
-frustration later on.
-
-### Code reviews
-All submissions, including submissions by project members, require review. We
-use GitHub pull requests for this purpose.
-
-### Organization
-During our review and triage of incoming pull requests, we'll advise whether to
-include your contribution into the mainline SDK, or to maintain it within the
-separate group of [community-contributed modules](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/tree/master/contrib).
-
-### The small print
-Contributions made by corporations are covered by a different agreement than
-the one above, the Software Grant and Corporate Contributor License Agreement.